{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999873081950983, "eval_steps": 500, "global_step": 78790, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.5383609803150105e-05, "grad_norm": 272.0, "learning_rate": 3.807589795659347e-08, "loss": 5.3357, "step": 1 }, { "epoch": 0.00012691804901575053, "grad_norm": 205.0, "learning_rate": 1.9037948978296736e-07, "loss": 4.8448, "step": 5 }, { "epoch": 0.00025383609803150106, "grad_norm": 184.0, "learning_rate": 3.807589795659347e-07, "loss": 4.5743, "step": 10 }, { "epoch": 0.00038075414704725157, "grad_norm": 195.0, "learning_rate": 5.711384693489021e-07, "loss": 4.8461, "step": 15 }, { "epoch": 0.0005076721960630021, "grad_norm": 192.0, "learning_rate": 7.615179591318694e-07, "loss": 4.9251, "step": 20 }, { "epoch": 0.0006345902450787526, "grad_norm": 181.0, "learning_rate": 9.518974489148368e-07, "loss": 4.5278, "step": 25 }, { "epoch": 0.0007615082940945031, "grad_norm": 160.0, "learning_rate": 1.1422769386978042e-06, "loss": 4.4324, "step": 30 }, { "epoch": 0.0008884263431102537, "grad_norm": 236.0, "learning_rate": 1.3326564284807715e-06, "loss": 4.6558, "step": 35 }, { "epoch": 0.0010153443921260042, "grad_norm": 212.0, "learning_rate": 1.5230359182637389e-06, "loss": 4.4854, "step": 40 }, { "epoch": 0.0011422624411417549, "grad_norm": 201.0, "learning_rate": 1.7134154080467062e-06, "loss": 4.5418, "step": 45 }, { "epoch": 0.0012691804901575053, "grad_norm": 162.0, "learning_rate": 1.9037948978296735e-06, "loss": 4.6377, "step": 50 }, { "epoch": 0.0013960985391732559, "grad_norm": 221.0, "learning_rate": 2.094174387612641e-06, "loss": 4.5738, "step": 55 }, { "epoch": 0.0015230165881890063, "grad_norm": 184.0, "learning_rate": 2.2845538773956084e-06, "loss": 4.5584, "step": 60 }, { "epoch": 0.0016499346372047569, "grad_norm": 149.0, "learning_rate": 2.474933367178576e-06, "loss": 4.3779, "step": 65 }, { "epoch": 0.0017768526862205075, "grad_norm": 171.0, "learning_rate": 2.665312856961543e-06, "loss": 4.2784, "step": 70 }, { "epoch": 0.0019037707352362579, "grad_norm": 189.0, "learning_rate": 2.85569234674451e-06, "loss": 4.4795, "step": 75 }, { "epoch": 0.0020306887842520085, "grad_norm": 186.0, "learning_rate": 3.0460718365274778e-06, "loss": 4.2467, "step": 80 }, { "epoch": 0.002157606833267759, "grad_norm": 163.0, "learning_rate": 3.236451326310445e-06, "loss": 4.3212, "step": 85 }, { "epoch": 0.0022845248822835097, "grad_norm": 166.0, "learning_rate": 3.4268308160934124e-06, "loss": 3.9971, "step": 90 }, { "epoch": 0.00241144293129926, "grad_norm": 143.0, "learning_rate": 3.61721030587638e-06, "loss": 4.1489, "step": 95 }, { "epoch": 0.0025383609803150105, "grad_norm": 176.0, "learning_rate": 3.807589795659347e-06, "loss": 4.2167, "step": 100 }, { "epoch": 0.0026652790293307613, "grad_norm": 153.0, "learning_rate": 3.997969285442315e-06, "loss": 4.0921, "step": 105 }, { "epoch": 0.0027921970783465117, "grad_norm": 149.0, "learning_rate": 4.188348775225282e-06, "loss": 3.9412, "step": 110 }, { "epoch": 0.002919115127362262, "grad_norm": 162.0, "learning_rate": 4.37872826500825e-06, "loss": 4.0691, "step": 115 }, { "epoch": 0.0030460331763780125, "grad_norm": 118.5, "learning_rate": 4.569107754791217e-06, "loss": 3.9134, "step": 120 }, { "epoch": 0.0031729512253937634, "grad_norm": 103.0, "learning_rate": 4.759487244574184e-06, "loss": 3.6624, "step": 125 }, { "epoch": 0.0032998692744095138, "grad_norm": 50.75, "learning_rate": 4.949866734357152e-06, "loss": 3.4292, "step": 130 }, { "epoch": 0.003426787323425264, "grad_norm": 104.0, "learning_rate": 5.140246224140119e-06, "loss": 3.6117, "step": 135 }, { "epoch": 0.003553705372441015, "grad_norm": 117.5, "learning_rate": 5.330625713923086e-06, "loss": 3.4234, "step": 140 }, { "epoch": 0.0036806234214567654, "grad_norm": 89.5, "learning_rate": 5.521005203706053e-06, "loss": 3.2125, "step": 145 }, { "epoch": 0.0038075414704725158, "grad_norm": 111.5, "learning_rate": 5.71138469348902e-06, "loss": 3.6397, "step": 150 }, { "epoch": 0.003934459519488266, "grad_norm": 74.0, "learning_rate": 5.901764183271988e-06, "loss": 3.3793, "step": 155 }, { "epoch": 0.004061377568504017, "grad_norm": 72.0, "learning_rate": 6.0921436730549555e-06, "loss": 3.3413, "step": 160 }, { "epoch": 0.004188295617519768, "grad_norm": 80.5, "learning_rate": 6.2825231628379235e-06, "loss": 3.2639, "step": 165 }, { "epoch": 0.004315213666535518, "grad_norm": 58.25, "learning_rate": 6.47290265262089e-06, "loss": 3.336, "step": 170 }, { "epoch": 0.004442131715551269, "grad_norm": 67.5, "learning_rate": 6.663282142403858e-06, "loss": 3.0847, "step": 175 }, { "epoch": 0.0045690497645670194, "grad_norm": 59.75, "learning_rate": 6.853661632186825e-06, "loss": 3.0869, "step": 180 }, { "epoch": 0.004695967813582769, "grad_norm": 58.5, "learning_rate": 7.044041121969793e-06, "loss": 2.914, "step": 185 }, { "epoch": 0.00482288586259852, "grad_norm": 49.5, "learning_rate": 7.23442061175276e-06, "loss": 2.9086, "step": 190 }, { "epoch": 0.004949803911614271, "grad_norm": 54.75, "learning_rate": 7.424800101535728e-06, "loss": 2.8756, "step": 195 }, { "epoch": 0.005076721960630021, "grad_norm": 38.75, "learning_rate": 7.615179591318694e-06, "loss": 2.5932, "step": 200 }, { "epoch": 0.005203640009645772, "grad_norm": 59.5, "learning_rate": 7.805559081101661e-06, "loss": 2.7407, "step": 205 }, { "epoch": 0.005330558058661523, "grad_norm": 51.5, "learning_rate": 7.99593857088463e-06, "loss": 2.8418, "step": 210 }, { "epoch": 0.005457476107677273, "grad_norm": 39.25, "learning_rate": 8.186318060667597e-06, "loss": 2.5818, "step": 215 }, { "epoch": 0.0055843941566930235, "grad_norm": 46.75, "learning_rate": 8.376697550450563e-06, "loss": 2.6925, "step": 220 }, { "epoch": 0.0057113122057087734, "grad_norm": 52.0, "learning_rate": 8.567077040233531e-06, "loss": 2.635, "step": 225 }, { "epoch": 0.005838230254724524, "grad_norm": 36.75, "learning_rate": 8.7574565300165e-06, "loss": 2.5519, "step": 230 }, { "epoch": 0.005965148303740275, "grad_norm": 33.75, "learning_rate": 8.947836019799466e-06, "loss": 2.4245, "step": 235 }, { "epoch": 0.006092066352756025, "grad_norm": 38.75, "learning_rate": 9.138215509582434e-06, "loss": 2.3632, "step": 240 }, { "epoch": 0.006218984401771776, "grad_norm": 28.125, "learning_rate": 9.328594999365402e-06, "loss": 2.3792, "step": 245 }, { "epoch": 0.006345902450787527, "grad_norm": 27.5, "learning_rate": 9.518974489148368e-06, "loss": 2.3519, "step": 250 }, { "epoch": 0.006472820499803277, "grad_norm": 30.0, "learning_rate": 9.709353978931334e-06, "loss": 2.287, "step": 255 }, { "epoch": 0.0065997385488190275, "grad_norm": 18.875, "learning_rate": 9.899733468714304e-06, "loss": 2.3668, "step": 260 }, { "epoch": 0.006726656597834778, "grad_norm": 17.875, "learning_rate": 1.009011295849727e-05, "loss": 2.2229, "step": 265 }, { "epoch": 0.006853574646850528, "grad_norm": 17.25, "learning_rate": 1.0280492448280238e-05, "loss": 2.0801, "step": 270 }, { "epoch": 0.006980492695866279, "grad_norm": 784.0, "learning_rate": 1.0470871938063204e-05, "loss": 2.4075, "step": 275 }, { "epoch": 0.00710741074488203, "grad_norm": 28.625, "learning_rate": 1.0661251427846172e-05, "loss": 2.1454, "step": 280 }, { "epoch": 0.00723432879389778, "grad_norm": 18.25, "learning_rate": 1.085163091762914e-05, "loss": 2.205, "step": 285 }, { "epoch": 0.007361246842913531, "grad_norm": 17.25, "learning_rate": 1.1042010407412107e-05, "loss": 2.0846, "step": 290 }, { "epoch": 0.007488164891929282, "grad_norm": 16.75, "learning_rate": 1.1232389897195075e-05, "loss": 2.0992, "step": 295 }, { "epoch": 0.0076150829409450315, "grad_norm": 16.875, "learning_rate": 1.142276938697804e-05, "loss": 1.9657, "step": 300 }, { "epoch": 0.007742000989960782, "grad_norm": 18.875, "learning_rate": 1.161314887676101e-05, "loss": 1.8515, "step": 305 }, { "epoch": 0.007868919038976532, "grad_norm": 16.125, "learning_rate": 1.1803528366543977e-05, "loss": 2.0928, "step": 310 }, { "epoch": 0.007995837087992284, "grad_norm": 17.125, "learning_rate": 1.1993907856326943e-05, "loss": 1.9805, "step": 315 }, { "epoch": 0.008122755137008034, "grad_norm": 13.625, "learning_rate": 1.2184287346109911e-05, "loss": 1.9078, "step": 320 }, { "epoch": 0.008249673186023784, "grad_norm": 12.4375, "learning_rate": 1.2374666835892879e-05, "loss": 1.8346, "step": 325 }, { "epoch": 0.008376591235039536, "grad_norm": 20.5, "learning_rate": 1.2565046325675847e-05, "loss": 1.9188, "step": 330 }, { "epoch": 0.008503509284055286, "grad_norm": 12.6875, "learning_rate": 1.2755425815458813e-05, "loss": 1.8707, "step": 335 }, { "epoch": 0.008630427333071036, "grad_norm": 12.0, "learning_rate": 1.294580530524178e-05, "loss": 1.7527, "step": 340 }, { "epoch": 0.008757345382086787, "grad_norm": 13.6875, "learning_rate": 1.313618479502475e-05, "loss": 1.743, "step": 345 }, { "epoch": 0.008884263431102537, "grad_norm": 14.0, "learning_rate": 1.3326564284807715e-05, "loss": 1.6637, "step": 350 }, { "epoch": 0.009011181480118287, "grad_norm": 13.1875, "learning_rate": 1.3516943774590683e-05, "loss": 1.7138, "step": 355 }, { "epoch": 0.009138099529134039, "grad_norm": 12.5625, "learning_rate": 1.370732326437365e-05, "loss": 1.6835, "step": 360 }, { "epoch": 0.009265017578149789, "grad_norm": 13.375, "learning_rate": 1.389770275415662e-05, "loss": 1.6557, "step": 365 }, { "epoch": 0.009391935627165539, "grad_norm": 12.0625, "learning_rate": 1.4088082243939586e-05, "loss": 1.5855, "step": 370 }, { "epoch": 0.00951885367618129, "grad_norm": 11.125, "learning_rate": 1.4278461733722552e-05, "loss": 1.4324, "step": 375 }, { "epoch": 0.00964577172519704, "grad_norm": 18.5, "learning_rate": 1.446884122350552e-05, "loss": 1.6738, "step": 380 }, { "epoch": 0.00977268977421279, "grad_norm": 12.375, "learning_rate": 1.4659220713288486e-05, "loss": 1.369, "step": 385 }, { "epoch": 0.009899607823228542, "grad_norm": 10.875, "learning_rate": 1.4849600203071456e-05, "loss": 1.4851, "step": 390 }, { "epoch": 0.010026525872244292, "grad_norm": 11.3125, "learning_rate": 1.5039979692854422e-05, "loss": 1.3972, "step": 395 }, { "epoch": 0.010153443921260042, "grad_norm": 11.375, "learning_rate": 1.5230359182637388e-05, "loss": 1.3869, "step": 400 }, { "epoch": 0.010280361970275794, "grad_norm": 11.125, "learning_rate": 1.5420738672420358e-05, "loss": 1.3477, "step": 405 }, { "epoch": 0.010407280019291544, "grad_norm": 10.25, "learning_rate": 1.5611118162203323e-05, "loss": 1.2925, "step": 410 }, { "epoch": 0.010534198068307294, "grad_norm": 10.5625, "learning_rate": 1.580149765198629e-05, "loss": 1.244, "step": 415 }, { "epoch": 0.010661116117323045, "grad_norm": 8.75, "learning_rate": 1.599187714176926e-05, "loss": 1.7445, "step": 420 }, { "epoch": 0.010788034166338795, "grad_norm": 8.1875, "learning_rate": 1.6182256631552226e-05, "loss": 1.1078, "step": 425 }, { "epoch": 0.010914952215354545, "grad_norm": 8.375, "learning_rate": 1.6372636121335194e-05, "loss": 1.2133, "step": 430 }, { "epoch": 0.011041870264370295, "grad_norm": 6.125, "learning_rate": 1.656301561111816e-05, "loss": 1.1496, "step": 435 }, { "epoch": 0.011168788313386047, "grad_norm": 5.0625, "learning_rate": 1.6753395100901127e-05, "loss": 1.1155, "step": 440 }, { "epoch": 0.011295706362401797, "grad_norm": 8.625, "learning_rate": 1.6943774590684095e-05, "loss": 1.0698, "step": 445 }, { "epoch": 0.011422624411417547, "grad_norm": 5.78125, "learning_rate": 1.7134154080467063e-05, "loss": 1.0804, "step": 450 }, { "epoch": 0.011549542460433299, "grad_norm": 7.125, "learning_rate": 1.732453357025003e-05, "loss": 1.0711, "step": 455 }, { "epoch": 0.011676460509449049, "grad_norm": 5.59375, "learning_rate": 1.7514913060033e-05, "loss": 1.0761, "step": 460 }, { "epoch": 0.011803378558464798, "grad_norm": 5.34375, "learning_rate": 1.7705292549815963e-05, "loss": 1.041, "step": 465 }, { "epoch": 0.01193029660748055, "grad_norm": 4.8125, "learning_rate": 1.789567203959893e-05, "loss": 1.0256, "step": 470 }, { "epoch": 0.0120572146564963, "grad_norm": 3.625, "learning_rate": 1.80860515293819e-05, "loss": 0.9748, "step": 475 }, { "epoch": 0.01218413270551205, "grad_norm": 238.0, "learning_rate": 1.8276431019164867e-05, "loss": 1.5886, "step": 480 }, { "epoch": 0.012311050754527802, "grad_norm": 4.375, "learning_rate": 1.8466810508947835e-05, "loss": 0.9838, "step": 485 }, { "epoch": 0.012437968803543552, "grad_norm": 2.921875, "learning_rate": 1.8657189998730803e-05, "loss": 0.9764, "step": 490 }, { "epoch": 0.012564886852559302, "grad_norm": 2.859375, "learning_rate": 1.8847569488513768e-05, "loss": 1.4237, "step": 495 }, { "epoch": 0.012691804901575053, "grad_norm": 3.859375, "learning_rate": 1.9037948978296736e-05, "loss": 0.9789, "step": 500 }, { "epoch": 0.012818722950590803, "grad_norm": 3.0625, "learning_rate": 1.9228328468079704e-05, "loss": 1.2897, "step": 505 }, { "epoch": 0.012945640999606553, "grad_norm": 2.703125, "learning_rate": 1.941870795786267e-05, "loss": 0.9062, "step": 510 }, { "epoch": 0.013072559048622305, "grad_norm": 3.734375, "learning_rate": 1.960908744764564e-05, "loss": 1.3845, "step": 515 }, { "epoch": 0.013199477097638055, "grad_norm": 3.71875, "learning_rate": 1.9799466937428608e-05, "loss": 0.9043, "step": 520 }, { "epoch": 0.013326395146653805, "grad_norm": 2.890625, "learning_rate": 1.9989846427211572e-05, "loss": 1.0817, "step": 525 }, { "epoch": 0.013453313195669557, "grad_norm": 3.140625, "learning_rate": 2.018022591699454e-05, "loss": 0.8974, "step": 530 }, { "epoch": 0.013580231244685307, "grad_norm": 2.078125, "learning_rate": 2.0370605406777508e-05, "loss": 0.8958, "step": 535 }, { "epoch": 0.013707149293701057, "grad_norm": 2.46875, "learning_rate": 2.0560984896560476e-05, "loss": 0.8978, "step": 540 }, { "epoch": 0.013834067342716808, "grad_norm": 2.1875, "learning_rate": 2.075136438634344e-05, "loss": 0.8757, "step": 545 }, { "epoch": 0.013960985391732558, "grad_norm": 3.03125, "learning_rate": 2.094174387612641e-05, "loss": 0.8985, "step": 550 }, { "epoch": 0.014087903440748308, "grad_norm": 3.671875, "learning_rate": 2.113212336590938e-05, "loss": 0.8483, "step": 555 }, { "epoch": 0.01421482148976406, "grad_norm": 2.796875, "learning_rate": 2.1322502855692345e-05, "loss": 0.9025, "step": 560 }, { "epoch": 0.01434173953877981, "grad_norm": 2.546875, "learning_rate": 2.1512882345475313e-05, "loss": 0.876, "step": 565 }, { "epoch": 0.01446865758779556, "grad_norm": 1.9296875, "learning_rate": 2.170326183525828e-05, "loss": 0.8757, "step": 570 }, { "epoch": 0.014595575636811312, "grad_norm": 1.9765625, "learning_rate": 2.189364132504125e-05, "loss": 1.4453, "step": 575 }, { "epoch": 0.014722493685827061, "grad_norm": 2.28125, "learning_rate": 2.2084020814824213e-05, "loss": 0.8123, "step": 580 }, { "epoch": 0.014849411734842811, "grad_norm": 1.4765625, "learning_rate": 2.227440030460718e-05, "loss": 0.8358, "step": 585 }, { "epoch": 0.014976329783858563, "grad_norm": 1.8359375, "learning_rate": 2.246477979439015e-05, "loss": 0.8641, "step": 590 }, { "epoch": 0.015103247832874313, "grad_norm": 2.46875, "learning_rate": 2.2655159284173114e-05, "loss": 0.8246, "step": 595 }, { "epoch": 0.015230165881890063, "grad_norm": 1.828125, "learning_rate": 2.284553877395608e-05, "loss": 0.8271, "step": 600 }, { "epoch": 0.015357083930905815, "grad_norm": 1.28125, "learning_rate": 2.3035918263739053e-05, "loss": 0.8374, "step": 605 }, { "epoch": 0.015484001979921565, "grad_norm": 13.1875, "learning_rate": 2.322629775352202e-05, "loss": 0.8652, "step": 610 }, { "epoch": 0.015610920028937315, "grad_norm": 2.65625, "learning_rate": 2.3416677243304986e-05, "loss": 0.7961, "step": 615 }, { "epoch": 0.015737838077953065, "grad_norm": 1.84375, "learning_rate": 2.3607056733087954e-05, "loss": 0.8301, "step": 620 }, { "epoch": 0.015864756126968815, "grad_norm": 1.640625, "learning_rate": 2.379743622287092e-05, "loss": 0.8079, "step": 625 }, { "epoch": 0.015991674175984568, "grad_norm": 1.15625, "learning_rate": 2.3987815712653886e-05, "loss": 0.7914, "step": 630 }, { "epoch": 0.016118592225000318, "grad_norm": 1.671875, "learning_rate": 2.4178195202436854e-05, "loss": 0.8292, "step": 635 }, { "epoch": 0.016245510274016068, "grad_norm": 1.7890625, "learning_rate": 2.4368574692219822e-05, "loss": 0.7855, "step": 640 }, { "epoch": 0.016372428323031818, "grad_norm": 1.484375, "learning_rate": 2.4558954182002793e-05, "loss": 0.776, "step": 645 }, { "epoch": 0.016499346372047568, "grad_norm": 1.90625, "learning_rate": 2.4749333671785758e-05, "loss": 0.8049, "step": 650 }, { "epoch": 0.016626264421063318, "grad_norm": 1.4375, "learning_rate": 2.4939713161568726e-05, "loss": 0.8198, "step": 655 }, { "epoch": 0.01675318247007907, "grad_norm": 1.6875, "learning_rate": 2.5130092651351694e-05, "loss": 0.7706, "step": 660 }, { "epoch": 0.01688010051909482, "grad_norm": 1.3828125, "learning_rate": 2.532047214113466e-05, "loss": 0.7917, "step": 665 }, { "epoch": 0.01700701856811057, "grad_norm": 1.328125, "learning_rate": 2.5510851630917626e-05, "loss": 0.7711, "step": 670 }, { "epoch": 0.01713393661712632, "grad_norm": 1.4921875, "learning_rate": 2.5701231120700594e-05, "loss": 0.799, "step": 675 }, { "epoch": 0.01726085466614207, "grad_norm": 1.34375, "learning_rate": 2.589161061048356e-05, "loss": 0.8004, "step": 680 }, { "epoch": 0.01738777271515782, "grad_norm": 1.40625, "learning_rate": 2.6081990100266527e-05, "loss": 0.7804, "step": 685 }, { "epoch": 0.017514690764173575, "grad_norm": 1.9453125, "learning_rate": 2.62723695900495e-05, "loss": 0.8277, "step": 690 }, { "epoch": 0.017641608813189325, "grad_norm": 1.515625, "learning_rate": 2.6462749079832466e-05, "loss": 0.8378, "step": 695 }, { "epoch": 0.017768526862205074, "grad_norm": 1.65625, "learning_rate": 2.665312856961543e-05, "loss": 0.7667, "step": 700 }, { "epoch": 0.017895444911220824, "grad_norm": 1.7265625, "learning_rate": 2.68435080593984e-05, "loss": 0.8154, "step": 705 }, { "epoch": 0.018022362960236574, "grad_norm": 1.546875, "learning_rate": 2.7033887549181367e-05, "loss": 0.762, "step": 710 }, { "epoch": 0.018149281009252324, "grad_norm": 1.3984375, "learning_rate": 2.722426703896433e-05, "loss": 0.7604, "step": 715 }, { "epoch": 0.018276199058268078, "grad_norm": 2.03125, "learning_rate": 2.74146465287473e-05, "loss": 0.7916, "step": 720 }, { "epoch": 0.018403117107283828, "grad_norm": 1.3046875, "learning_rate": 2.7605026018530267e-05, "loss": 0.7644, "step": 725 }, { "epoch": 0.018530035156299578, "grad_norm": 1.1484375, "learning_rate": 2.779540550831324e-05, "loss": 0.7819, "step": 730 }, { "epoch": 0.018656953205315328, "grad_norm": 1.3125, "learning_rate": 2.79857849980962e-05, "loss": 0.7561, "step": 735 }, { "epoch": 0.018783871254331078, "grad_norm": 1.328125, "learning_rate": 2.817616448787917e-05, "loss": 0.7791, "step": 740 }, { "epoch": 0.018910789303346828, "grad_norm": 1.125, "learning_rate": 2.836654397766214e-05, "loss": 0.7501, "step": 745 }, { "epoch": 0.01903770735236258, "grad_norm": 4.0, "learning_rate": 2.8556923467445104e-05, "loss": 0.7769, "step": 750 }, { "epoch": 0.01916462540137833, "grad_norm": 1.515625, "learning_rate": 2.8747302957228072e-05, "loss": 0.8059, "step": 755 }, { "epoch": 0.01929154345039408, "grad_norm": 1.28125, "learning_rate": 2.893768244701104e-05, "loss": 0.7381, "step": 760 }, { "epoch": 0.01941846149940983, "grad_norm": 1.3984375, "learning_rate": 2.9128061936794008e-05, "loss": 0.7719, "step": 765 }, { "epoch": 0.01954537954842558, "grad_norm": 1.3125, "learning_rate": 2.9318441426576972e-05, "loss": 0.7458, "step": 770 }, { "epoch": 0.01967229759744133, "grad_norm": 1.5234375, "learning_rate": 2.950882091635994e-05, "loss": 0.7818, "step": 775 }, { "epoch": 0.019799215646457084, "grad_norm": 1.3515625, "learning_rate": 2.969920040614291e-05, "loss": 0.7656, "step": 780 }, { "epoch": 0.019926133695472834, "grad_norm": 1.328125, "learning_rate": 2.9889579895925876e-05, "loss": 0.788, "step": 785 }, { "epoch": 0.020053051744488584, "grad_norm": 1.21875, "learning_rate": 3.0079959385708844e-05, "loss": 0.7759, "step": 790 }, { "epoch": 0.020179969793504334, "grad_norm": 1.265625, "learning_rate": 3.0270338875491812e-05, "loss": 0.785, "step": 795 }, { "epoch": 0.020306887842520084, "grad_norm": 1.203125, "learning_rate": 3.0460718365274777e-05, "loss": 0.7527, "step": 800 }, { "epoch": 0.020433805891535834, "grad_norm": 1.1875, "learning_rate": 3.0651097855057745e-05, "loss": 0.7369, "step": 805 }, { "epoch": 0.020560723940551588, "grad_norm": 1.328125, "learning_rate": 3.0841477344840716e-05, "loss": 0.7809, "step": 810 }, { "epoch": 0.020687641989567337, "grad_norm": 1.1796875, "learning_rate": 3.103185683462368e-05, "loss": 0.7537, "step": 815 }, { "epoch": 0.020814560038583087, "grad_norm": 1.46875, "learning_rate": 3.1222236324406645e-05, "loss": 0.7496, "step": 820 }, { "epoch": 0.020941478087598837, "grad_norm": 1.1640625, "learning_rate": 3.1412615814189617e-05, "loss": 0.7835, "step": 825 }, { "epoch": 0.021068396136614587, "grad_norm": 1.09375, "learning_rate": 3.160299530397258e-05, "loss": 0.8078, "step": 830 }, { "epoch": 0.021195314185630337, "grad_norm": 1.3671875, "learning_rate": 3.1793374793755546e-05, "loss": 0.792, "step": 835 }, { "epoch": 0.02132223223464609, "grad_norm": 1.3671875, "learning_rate": 3.198375428353852e-05, "loss": 0.7801, "step": 840 }, { "epoch": 0.02144915028366184, "grad_norm": 1.34375, "learning_rate": 3.217413377332149e-05, "loss": 0.7767, "step": 845 }, { "epoch": 0.02157606833267759, "grad_norm": 1.2265625, "learning_rate": 3.236451326310445e-05, "loss": 0.7361, "step": 850 }, { "epoch": 0.02170298638169334, "grad_norm": 1.21875, "learning_rate": 3.255489275288742e-05, "loss": 0.7332, "step": 855 }, { "epoch": 0.02182990443070909, "grad_norm": 1.6015625, "learning_rate": 3.274527224267039e-05, "loss": 0.7609, "step": 860 }, { "epoch": 0.02195682247972484, "grad_norm": 1.0546875, "learning_rate": 3.2935651732453353e-05, "loss": 0.7424, "step": 865 }, { "epoch": 0.02208374052874059, "grad_norm": 1.203125, "learning_rate": 3.312603122223632e-05, "loss": 0.7594, "step": 870 }, { "epoch": 0.022210658577756344, "grad_norm": 1.4296875, "learning_rate": 3.331641071201929e-05, "loss": 0.7754, "step": 875 }, { "epoch": 0.022337576626772094, "grad_norm": 1.1796875, "learning_rate": 3.3506790201802254e-05, "loss": 0.7339, "step": 880 }, { "epoch": 0.022464494675787844, "grad_norm": 1.2109375, "learning_rate": 3.3697169691585225e-05, "loss": 0.7469, "step": 885 }, { "epoch": 0.022591412724803594, "grad_norm": 1.3515625, "learning_rate": 3.388754918136819e-05, "loss": 0.7497, "step": 890 }, { "epoch": 0.022718330773819344, "grad_norm": 1.484375, "learning_rate": 3.407792867115116e-05, "loss": 0.7736, "step": 895 }, { "epoch": 0.022845248822835094, "grad_norm": 1.203125, "learning_rate": 3.4268308160934126e-05, "loss": 0.7338, "step": 900 }, { "epoch": 0.022972166871850847, "grad_norm": 1.4921875, "learning_rate": 3.445868765071709e-05, "loss": 0.7458, "step": 905 }, { "epoch": 0.023099084920866597, "grad_norm": 1.0234375, "learning_rate": 3.464906714050006e-05, "loss": 0.7402, "step": 910 }, { "epoch": 0.023226002969882347, "grad_norm": 1.640625, "learning_rate": 3.4839446630283026e-05, "loss": 0.7303, "step": 915 }, { "epoch": 0.023352921018898097, "grad_norm": 1.296875, "learning_rate": 3.5029826120066e-05, "loss": 0.7157, "step": 920 }, { "epoch": 0.023479839067913847, "grad_norm": 1.265625, "learning_rate": 3.522020560984896e-05, "loss": 0.7267, "step": 925 }, { "epoch": 0.023606757116929597, "grad_norm": 0.99609375, "learning_rate": 3.541058509963193e-05, "loss": 0.7436, "step": 930 }, { "epoch": 0.02373367516594535, "grad_norm": 1.140625, "learning_rate": 3.56009645894149e-05, "loss": 0.7357, "step": 935 }, { "epoch": 0.0238605932149611, "grad_norm": 1.1640625, "learning_rate": 3.579134407919786e-05, "loss": 0.7327, "step": 940 }, { "epoch": 0.02398751126397685, "grad_norm": 1.8125, "learning_rate": 3.5981723568980834e-05, "loss": 0.7235, "step": 945 }, { "epoch": 0.0241144293129926, "grad_norm": 1.3359375, "learning_rate": 3.61721030587638e-05, "loss": 0.7015, "step": 950 }, { "epoch": 0.02424134736200835, "grad_norm": 1.53125, "learning_rate": 3.636248254854676e-05, "loss": 0.7442, "step": 955 }, { "epoch": 0.0243682654110241, "grad_norm": 1.53125, "learning_rate": 3.6552862038329735e-05, "loss": 0.7534, "step": 960 }, { "epoch": 0.024495183460039854, "grad_norm": 1.34375, "learning_rate": 3.67432415281127e-05, "loss": 0.7566, "step": 965 }, { "epoch": 0.024622101509055604, "grad_norm": 1.5234375, "learning_rate": 3.693362101789567e-05, "loss": 0.7376, "step": 970 }, { "epoch": 0.024749019558071354, "grad_norm": 1.2890625, "learning_rate": 3.7124000507678635e-05, "loss": 0.7191, "step": 975 }, { "epoch": 0.024875937607087104, "grad_norm": 1.3671875, "learning_rate": 3.7314379997461607e-05, "loss": 0.7084, "step": 980 }, { "epoch": 0.025002855656102854, "grad_norm": 1.625, "learning_rate": 3.750475948724457e-05, "loss": 0.7412, "step": 985 }, { "epoch": 0.025129773705118603, "grad_norm": 1.2734375, "learning_rate": 3.7695138977027536e-05, "loss": 0.7371, "step": 990 }, { "epoch": 0.025256691754134357, "grad_norm": 1.1875, "learning_rate": 3.788551846681051e-05, "loss": 0.6818, "step": 995 }, { "epoch": 0.025383609803150107, "grad_norm": 1.3671875, "learning_rate": 3.807589795659347e-05, "loss": 0.7427, "step": 1000 }, { "epoch": 0.025510527852165857, "grad_norm": 1.1640625, "learning_rate": 3.826627744637644e-05, "loss": 0.7281, "step": 1005 }, { "epoch": 0.025637445901181607, "grad_norm": 1.6015625, "learning_rate": 3.845665693615941e-05, "loss": 0.7514, "step": 1010 }, { "epoch": 0.025764363950197357, "grad_norm": 1.03125, "learning_rate": 3.864703642594237e-05, "loss": 0.7102, "step": 1015 }, { "epoch": 0.025891281999213107, "grad_norm": 1.2421875, "learning_rate": 3.883741591572534e-05, "loss": 0.7106, "step": 1020 }, { "epoch": 0.02601820004822886, "grad_norm": 1.59375, "learning_rate": 3.902779540550831e-05, "loss": 0.7762, "step": 1025 }, { "epoch": 0.02614511809724461, "grad_norm": 1.546875, "learning_rate": 3.921817489529128e-05, "loss": 0.7326, "step": 1030 }, { "epoch": 0.02627203614626036, "grad_norm": 1.375, "learning_rate": 3.9408554385074244e-05, "loss": 0.7161, "step": 1035 }, { "epoch": 0.02639895419527611, "grad_norm": 1.359375, "learning_rate": 3.9598933874857215e-05, "loss": 0.7344, "step": 1040 }, { "epoch": 0.02652587224429186, "grad_norm": 1.734375, "learning_rate": 3.978931336464018e-05, "loss": 0.7506, "step": 1045 }, { "epoch": 0.02665279029330761, "grad_norm": 1.234375, "learning_rate": 3.9979692854423145e-05, "loss": 0.7141, "step": 1050 }, { "epoch": 0.026779708342323363, "grad_norm": 1.5859375, "learning_rate": 4.017007234420611e-05, "loss": 0.7139, "step": 1055 }, { "epoch": 0.026906626391339113, "grad_norm": 1.2578125, "learning_rate": 4.036045183398908e-05, "loss": 0.7, "step": 1060 }, { "epoch": 0.027033544440354863, "grad_norm": 0.8359375, "learning_rate": 4.0550831323772045e-05, "loss": 0.699, "step": 1065 }, { "epoch": 0.027160462489370613, "grad_norm": 1.59375, "learning_rate": 4.0741210813555016e-05, "loss": 0.7395, "step": 1070 }, { "epoch": 0.027287380538386363, "grad_norm": 1.296875, "learning_rate": 4.093159030333799e-05, "loss": 0.7023, "step": 1075 }, { "epoch": 0.027414298587402113, "grad_norm": 1.4609375, "learning_rate": 4.112196979312095e-05, "loss": 0.7191, "step": 1080 }, { "epoch": 0.027541216636417867, "grad_norm": 1.3984375, "learning_rate": 4.131234928290392e-05, "loss": 0.6993, "step": 1085 }, { "epoch": 0.027668134685433617, "grad_norm": 1.484375, "learning_rate": 4.150272877268688e-05, "loss": 0.7054, "step": 1090 }, { "epoch": 0.027795052734449367, "grad_norm": 1.53125, "learning_rate": 4.169310826246985e-05, "loss": 0.7155, "step": 1095 }, { "epoch": 0.027921970783465117, "grad_norm": 1.0546875, "learning_rate": 4.188348775225282e-05, "loss": 0.7181, "step": 1100 }, { "epoch": 0.028048888832480866, "grad_norm": 1.609375, "learning_rate": 4.207386724203579e-05, "loss": 0.699, "step": 1105 }, { "epoch": 0.028175806881496616, "grad_norm": 1.515625, "learning_rate": 4.226424673181876e-05, "loss": 0.741, "step": 1110 }, { "epoch": 0.02830272493051237, "grad_norm": 1.65625, "learning_rate": 4.2454626221601725e-05, "loss": 0.6791, "step": 1115 }, { "epoch": 0.02842964297952812, "grad_norm": 1.1484375, "learning_rate": 4.264500571138469e-05, "loss": 0.7146, "step": 1120 }, { "epoch": 0.02855656102854387, "grad_norm": 1.6015625, "learning_rate": 4.2835385201167654e-05, "loss": 0.7209, "step": 1125 }, { "epoch": 0.02868347907755962, "grad_norm": 1.8125, "learning_rate": 4.3025764690950625e-05, "loss": 0.7073, "step": 1130 }, { "epoch": 0.02881039712657537, "grad_norm": 1.4140625, "learning_rate": 4.321614418073359e-05, "loss": 0.7497, "step": 1135 }, { "epoch": 0.02893731517559112, "grad_norm": 3.140625, "learning_rate": 4.340652367051656e-05, "loss": 0.7213, "step": 1140 }, { "epoch": 0.02906423322460687, "grad_norm": 1.2734375, "learning_rate": 4.3596903160299526e-05, "loss": 0.6982, "step": 1145 }, { "epoch": 0.029191151273622623, "grad_norm": 1.3359375, "learning_rate": 4.37872826500825e-05, "loss": 0.6729, "step": 1150 }, { "epoch": 0.029318069322638373, "grad_norm": 1.2578125, "learning_rate": 4.3977662139865455e-05, "loss": 0.7018, "step": 1155 }, { "epoch": 0.029444987371654123, "grad_norm": 1.546875, "learning_rate": 4.4168041629648426e-05, "loss": 0.6933, "step": 1160 }, { "epoch": 0.029571905420669873, "grad_norm": 1.40625, "learning_rate": 4.43584211194314e-05, "loss": 0.7109, "step": 1165 }, { "epoch": 0.029698823469685623, "grad_norm": 1.140625, "learning_rate": 4.454880060921436e-05, "loss": 0.6615, "step": 1170 }, { "epoch": 0.029825741518701373, "grad_norm": 1.515625, "learning_rate": 4.4739180098997334e-05, "loss": 0.7078, "step": 1175 }, { "epoch": 0.029952659567717126, "grad_norm": 1.1171875, "learning_rate": 4.49295595887803e-05, "loss": 0.7374, "step": 1180 }, { "epoch": 0.030079577616732876, "grad_norm": 1.3671875, "learning_rate": 4.511993907856327e-05, "loss": 0.6978, "step": 1185 }, { "epoch": 0.030206495665748626, "grad_norm": 1.2734375, "learning_rate": 4.531031856834623e-05, "loss": 0.697, "step": 1190 }, { "epoch": 0.030333413714764376, "grad_norm": 1.8046875, "learning_rate": 4.55006980581292e-05, "loss": 0.7155, "step": 1195 }, { "epoch": 0.030460331763780126, "grad_norm": 1.5078125, "learning_rate": 4.569107754791216e-05, "loss": 0.7299, "step": 1200 }, { "epoch": 0.030587249812795876, "grad_norm": 1.078125, "learning_rate": 4.5881457037695135e-05, "loss": 0.6845, "step": 1205 }, { "epoch": 0.03071416786181163, "grad_norm": 1.2734375, "learning_rate": 4.6071836527478106e-05, "loss": 0.7057, "step": 1210 }, { "epoch": 0.03084108591082738, "grad_norm": 1.34375, "learning_rate": 4.626221601726107e-05, "loss": 0.6979, "step": 1215 }, { "epoch": 0.03096800395984313, "grad_norm": 1.0625, "learning_rate": 4.645259550704404e-05, "loss": 0.6719, "step": 1220 }, { "epoch": 0.03109492200885888, "grad_norm": 1.296875, "learning_rate": 4.6642974996827e-05, "loss": 0.732, "step": 1225 }, { "epoch": 0.03122184005787463, "grad_norm": 1.078125, "learning_rate": 4.683335448660997e-05, "loss": 0.6862, "step": 1230 }, { "epoch": 0.03134875810689038, "grad_norm": 1.28125, "learning_rate": 4.7023733976392936e-05, "loss": 0.7366, "step": 1235 }, { "epoch": 0.03147567615590613, "grad_norm": 1.234375, "learning_rate": 4.721411346617591e-05, "loss": 0.6893, "step": 1240 }, { "epoch": 0.03160259420492188, "grad_norm": 1.1328125, "learning_rate": 4.740449295595888e-05, "loss": 0.6801, "step": 1245 }, { "epoch": 0.03172951225393763, "grad_norm": 1.3046875, "learning_rate": 4.759487244574184e-05, "loss": 0.7213, "step": 1250 }, { "epoch": 0.03185643030295338, "grad_norm": 1.3359375, "learning_rate": 4.7785251935524814e-05, "loss": 0.6802, "step": 1255 }, { "epoch": 0.031983348351969136, "grad_norm": 1.09375, "learning_rate": 4.797563142530777e-05, "loss": 0.6857, "step": 1260 }, { "epoch": 0.03211026640098488, "grad_norm": 1.28125, "learning_rate": 4.8166010915090744e-05, "loss": 0.7129, "step": 1265 }, { "epoch": 0.032237184450000636, "grad_norm": 1.0859375, "learning_rate": 4.835639040487371e-05, "loss": 0.6858, "step": 1270 }, { "epoch": 0.03236410249901638, "grad_norm": 1.1171875, "learning_rate": 4.854676989465668e-05, "loss": 0.6728, "step": 1275 }, { "epoch": 0.032491020548032136, "grad_norm": 1.078125, "learning_rate": 4.8737149384439644e-05, "loss": 0.6887, "step": 1280 }, { "epoch": 0.03261793859704789, "grad_norm": 1.2265625, "learning_rate": 4.8927528874222615e-05, "loss": 0.6996, "step": 1285 }, { "epoch": 0.032744856646063636, "grad_norm": 1.2890625, "learning_rate": 4.911790836400559e-05, "loss": 0.7047, "step": 1290 }, { "epoch": 0.03287177469507939, "grad_norm": 0.69921875, "learning_rate": 4.9308287853788545e-05, "loss": 0.6854, "step": 1295 }, { "epoch": 0.032998692744095136, "grad_norm": 1.0625, "learning_rate": 4.9498667343571516e-05, "loss": 0.6856, "step": 1300 }, { "epoch": 0.03312561079311089, "grad_norm": 0.67578125, "learning_rate": 4.968904683335448e-05, "loss": 0.6567, "step": 1305 }, { "epoch": 0.033252528842126636, "grad_norm": 1.4140625, "learning_rate": 4.987942632313745e-05, "loss": 0.7169, "step": 1310 }, { "epoch": 0.03337944689114239, "grad_norm": 1.0, "learning_rate": 5.0069805812920416e-05, "loss": 0.6801, "step": 1315 }, { "epoch": 0.03350636494015814, "grad_norm": 0.9140625, "learning_rate": 5.026018530270339e-05, "loss": 0.691, "step": 1320 }, { "epoch": 0.03363328298917389, "grad_norm": 1.125, "learning_rate": 5.0450564792486346e-05, "loss": 0.6955, "step": 1325 }, { "epoch": 0.03376020103818964, "grad_norm": 0.88671875, "learning_rate": 5.064094428226932e-05, "loss": 0.6679, "step": 1330 }, { "epoch": 0.03388711908720539, "grad_norm": 0.93359375, "learning_rate": 5.083132377205228e-05, "loss": 0.6718, "step": 1335 }, { "epoch": 0.03401403713622114, "grad_norm": 0.80859375, "learning_rate": 5.102170326183525e-05, "loss": 0.6725, "step": 1340 }, { "epoch": 0.034140955185236896, "grad_norm": 0.8828125, "learning_rate": 5.1212082751618224e-05, "loss": 0.6271, "step": 1345 }, { "epoch": 0.03426787323425264, "grad_norm": 0.984375, "learning_rate": 5.140246224140119e-05, "loss": 0.6749, "step": 1350 }, { "epoch": 0.034394791283268396, "grad_norm": 0.9296875, "learning_rate": 5.159284173118416e-05, "loss": 0.6698, "step": 1355 }, { "epoch": 0.03452170933228414, "grad_norm": 0.86328125, "learning_rate": 5.178322122096712e-05, "loss": 0.6728, "step": 1360 }, { "epoch": 0.034648627381299896, "grad_norm": 1.0390625, "learning_rate": 5.197360071075009e-05, "loss": 0.7094, "step": 1365 }, { "epoch": 0.03477554543031564, "grad_norm": 0.73828125, "learning_rate": 5.2163980200533054e-05, "loss": 0.6787, "step": 1370 }, { "epoch": 0.034902463479331396, "grad_norm": 0.87109375, "learning_rate": 5.2354359690316025e-05, "loss": 0.6592, "step": 1375 }, { "epoch": 0.03502938152834715, "grad_norm": 0.86328125, "learning_rate": 5.2544739180099e-05, "loss": 0.6585, "step": 1380 }, { "epoch": 0.035156299577362896, "grad_norm": 0.90625, "learning_rate": 5.273511866988196e-05, "loss": 0.6725, "step": 1385 }, { "epoch": 0.03528321762637865, "grad_norm": 0.984375, "learning_rate": 5.292549815966493e-05, "loss": 0.6723, "step": 1390 }, { "epoch": 0.035410135675394395, "grad_norm": 0.81640625, "learning_rate": 5.311587764944789e-05, "loss": 0.6691, "step": 1395 }, { "epoch": 0.03553705372441015, "grad_norm": 1.0390625, "learning_rate": 5.330625713923086e-05, "loss": 0.6773, "step": 1400 }, { "epoch": 0.0356639717734259, "grad_norm": 0.8359375, "learning_rate": 5.3496636629013826e-05, "loss": 0.6568, "step": 1405 }, { "epoch": 0.03579088982244165, "grad_norm": 0.90625, "learning_rate": 5.36870161187968e-05, "loss": 0.6847, "step": 1410 }, { "epoch": 0.0359178078714574, "grad_norm": 0.953125, "learning_rate": 5.387739560857976e-05, "loss": 0.6464, "step": 1415 }, { "epoch": 0.03604472592047315, "grad_norm": 1.1015625, "learning_rate": 5.4067775098362734e-05, "loss": 0.6348, "step": 1420 }, { "epoch": 0.0361716439694889, "grad_norm": 1.0859375, "learning_rate": 5.4258154588145705e-05, "loss": 0.6737, "step": 1425 }, { "epoch": 0.03629856201850465, "grad_norm": 0.95703125, "learning_rate": 5.444853407792866e-05, "loss": 0.7024, "step": 1430 }, { "epoch": 0.0364254800675204, "grad_norm": 0.828125, "learning_rate": 5.4638913567711634e-05, "loss": 0.6834, "step": 1435 }, { "epoch": 0.036552398116536156, "grad_norm": 0.72265625, "learning_rate": 5.48292930574946e-05, "loss": 0.691, "step": 1440 }, { "epoch": 0.0366793161655519, "grad_norm": 0.94140625, "learning_rate": 5.501967254727757e-05, "loss": 0.6826, "step": 1445 }, { "epoch": 0.036806234214567655, "grad_norm": 1.15625, "learning_rate": 5.5210052037060535e-05, "loss": 0.6588, "step": 1450 }, { "epoch": 0.0369331522635834, "grad_norm": 0.68359375, "learning_rate": 5.5400431526843506e-05, "loss": 0.6584, "step": 1455 }, { "epoch": 0.037060070312599155, "grad_norm": 0.7578125, "learning_rate": 5.559081101662648e-05, "loss": 0.6509, "step": 1460 }, { "epoch": 0.0371869883616149, "grad_norm": 0.5078125, "learning_rate": 5.5781190506409435e-05, "loss": 0.6281, "step": 1465 }, { "epoch": 0.037313906410630655, "grad_norm": 0.80859375, "learning_rate": 5.59715699961924e-05, "loss": 0.6634, "step": 1470 }, { "epoch": 0.03744082445964641, "grad_norm": 0.875, "learning_rate": 5.616194948597537e-05, "loss": 0.652, "step": 1475 }, { "epoch": 0.037567742508662155, "grad_norm": 0.83984375, "learning_rate": 5.635232897575834e-05, "loss": 0.6838, "step": 1480 }, { "epoch": 0.03769466055767791, "grad_norm": 0.73828125, "learning_rate": 5.654270846554131e-05, "loss": 0.7079, "step": 1485 }, { "epoch": 0.037821578606693655, "grad_norm": 0.9140625, "learning_rate": 5.673308795532428e-05, "loss": 0.6795, "step": 1490 }, { "epoch": 0.03794849665570941, "grad_norm": 0.6171875, "learning_rate": 5.692346744510724e-05, "loss": 0.6233, "step": 1495 }, { "epoch": 0.03807541470472516, "grad_norm": 0.68359375, "learning_rate": 5.711384693489021e-05, "loss": 0.6671, "step": 1500 }, { "epoch": 0.03820233275374091, "grad_norm": 0.96875, "learning_rate": 5.730422642467317e-05, "loss": 0.6537, "step": 1505 }, { "epoch": 0.03832925080275666, "grad_norm": 0.72265625, "learning_rate": 5.7494605914456143e-05, "loss": 0.6516, "step": 1510 }, { "epoch": 0.03845616885177241, "grad_norm": 0.81640625, "learning_rate": 5.7684985404239115e-05, "loss": 0.6746, "step": 1515 }, { "epoch": 0.03858308690078816, "grad_norm": 0.88671875, "learning_rate": 5.787536489402208e-05, "loss": 0.6706, "step": 1520 }, { "epoch": 0.03871000494980391, "grad_norm": 0.80859375, "learning_rate": 5.806574438380505e-05, "loss": 0.687, "step": 1525 }, { "epoch": 0.03883692299881966, "grad_norm": 0.80078125, "learning_rate": 5.8256123873588015e-05, "loss": 0.6922, "step": 1530 }, { "epoch": 0.038963841047835415, "grad_norm": 0.69921875, "learning_rate": 5.844650336337098e-05, "loss": 0.6822, "step": 1535 }, { "epoch": 0.03909075909685116, "grad_norm": 0.70703125, "learning_rate": 5.8636882853153944e-05, "loss": 0.6735, "step": 1540 }, { "epoch": 0.039217677145866915, "grad_norm": 0.83203125, "learning_rate": 5.8827262342936916e-05, "loss": 0.6716, "step": 1545 }, { "epoch": 0.03934459519488266, "grad_norm": 0.7578125, "learning_rate": 5.901764183271988e-05, "loss": 0.6528, "step": 1550 }, { "epoch": 0.039471513243898415, "grad_norm": 1.953125, "learning_rate": 5.920802132250285e-05, "loss": 0.6813, "step": 1555 }, { "epoch": 0.03959843129291417, "grad_norm": 0.60546875, "learning_rate": 5.939840081228582e-05, "loss": 0.6623, "step": 1560 }, { "epoch": 0.039725349341929915, "grad_norm": 0.58984375, "learning_rate": 5.958878030206879e-05, "loss": 0.6935, "step": 1565 }, { "epoch": 0.03985226739094567, "grad_norm": 0.703125, "learning_rate": 5.977915979185175e-05, "loss": 0.645, "step": 1570 }, { "epoch": 0.039979185439961415, "grad_norm": 0.828125, "learning_rate": 5.996953928163472e-05, "loss": 0.6075, "step": 1575 }, { "epoch": 0.04010610348897717, "grad_norm": 0.7109375, "learning_rate": 6.015991877141769e-05, "loss": 0.6185, "step": 1580 }, { "epoch": 0.040233021537992915, "grad_norm": 1.03125, "learning_rate": 6.035029826120065e-05, "loss": 0.6657, "step": 1585 }, { "epoch": 0.04035993958700867, "grad_norm": 0.62109375, "learning_rate": 6.0540677750983624e-05, "loss": 0.6555, "step": 1590 }, { "epoch": 0.04048685763602442, "grad_norm": 0.66796875, "learning_rate": 6.0731057240766596e-05, "loss": 0.6466, "step": 1595 }, { "epoch": 0.04061377568504017, "grad_norm": 0.68359375, "learning_rate": 6.092143673054955e-05, "loss": 0.6787, "step": 1600 }, { "epoch": 0.04074069373405592, "grad_norm": 0.94140625, "learning_rate": 6.111181622033252e-05, "loss": 0.6447, "step": 1605 }, { "epoch": 0.04086761178307167, "grad_norm": 0.80078125, "learning_rate": 6.130219571011549e-05, "loss": 0.6502, "step": 1610 }, { "epoch": 0.04099452983208742, "grad_norm": 0.75390625, "learning_rate": 6.149257519989845e-05, "loss": 0.6198, "step": 1615 }, { "epoch": 0.041121447881103175, "grad_norm": 0.609375, "learning_rate": 6.168295468968143e-05, "loss": 0.6673, "step": 1620 }, { "epoch": 0.04124836593011892, "grad_norm": 0.703125, "learning_rate": 6.18733341794644e-05, "loss": 0.685, "step": 1625 }, { "epoch": 0.041375283979134675, "grad_norm": 0.5859375, "learning_rate": 6.206371366924736e-05, "loss": 0.6181, "step": 1630 }, { "epoch": 0.04150220202815042, "grad_norm": 0.72265625, "learning_rate": 6.225409315903033e-05, "loss": 0.6791, "step": 1635 }, { "epoch": 0.041629120077166175, "grad_norm": 0.7109375, "learning_rate": 6.244447264881329e-05, "loss": 0.667, "step": 1640 }, { "epoch": 0.04175603812618192, "grad_norm": 0.80078125, "learning_rate": 6.263485213859625e-05, "loss": 0.6519, "step": 1645 }, { "epoch": 0.041882956175197675, "grad_norm": 0.640625, "learning_rate": 6.282523162837923e-05, "loss": 0.659, "step": 1650 }, { "epoch": 0.04200987422421343, "grad_norm": 0.625, "learning_rate": 6.30156111181622e-05, "loss": 0.6423, "step": 1655 }, { "epoch": 0.042136792273229175, "grad_norm": 0.7578125, "learning_rate": 6.320599060794516e-05, "loss": 0.653, "step": 1660 }, { "epoch": 0.04226371032224493, "grad_norm": 0.69921875, "learning_rate": 6.339637009772814e-05, "loss": 0.6731, "step": 1665 }, { "epoch": 0.042390628371260675, "grad_norm": 0.53515625, "learning_rate": 6.358674958751109e-05, "loss": 0.6201, "step": 1670 }, { "epoch": 0.04251754642027643, "grad_norm": 0.79296875, "learning_rate": 6.377712907729407e-05, "loss": 0.6541, "step": 1675 }, { "epoch": 0.04264446446929218, "grad_norm": 0.8515625, "learning_rate": 6.396750856707703e-05, "loss": 0.681, "step": 1680 }, { "epoch": 0.04277138251830793, "grad_norm": 0.6015625, "learning_rate": 6.415788805686e-05, "loss": 0.6555, "step": 1685 }, { "epoch": 0.04289830056732368, "grad_norm": 0.9140625, "learning_rate": 6.434826754664298e-05, "loss": 0.6523, "step": 1690 }, { "epoch": 0.04302521861633943, "grad_norm": 0.76953125, "learning_rate": 6.453864703642594e-05, "loss": 0.6443, "step": 1695 }, { "epoch": 0.04315213666535518, "grad_norm": 0.671875, "learning_rate": 6.47290265262089e-05, "loss": 0.6453, "step": 1700 }, { "epoch": 0.04327905471437093, "grad_norm": 0.56640625, "learning_rate": 6.491940601599187e-05, "loss": 0.6404, "step": 1705 }, { "epoch": 0.04340597276338668, "grad_norm": 0.5234375, "learning_rate": 6.510978550577484e-05, "loss": 0.6793, "step": 1710 }, { "epoch": 0.043532890812402435, "grad_norm": 0.8125, "learning_rate": 6.53001649955578e-05, "loss": 0.6599, "step": 1715 }, { "epoch": 0.04365980886141818, "grad_norm": 0.98046875, "learning_rate": 6.549054448534078e-05, "loss": 0.6774, "step": 1720 }, { "epoch": 0.043786726910433935, "grad_norm": 0.76171875, "learning_rate": 6.568092397512374e-05, "loss": 0.6553, "step": 1725 }, { "epoch": 0.04391364495944968, "grad_norm": 0.65234375, "learning_rate": 6.587130346490671e-05, "loss": 0.6735, "step": 1730 }, { "epoch": 0.044040563008465435, "grad_norm": 0.57421875, "learning_rate": 6.606168295468969e-05, "loss": 0.6245, "step": 1735 }, { "epoch": 0.04416748105748118, "grad_norm": 0.546875, "learning_rate": 6.625206244447264e-05, "loss": 0.6334, "step": 1740 }, { "epoch": 0.044294399106496934, "grad_norm": 0.734375, "learning_rate": 6.644244193425561e-05, "loss": 0.661, "step": 1745 }, { "epoch": 0.04442131715551269, "grad_norm": 0.5234375, "learning_rate": 6.663282142403858e-05, "loss": 0.6352, "step": 1750 }, { "epoch": 0.044548235204528434, "grad_norm": 0.578125, "learning_rate": 6.682320091382154e-05, "loss": 0.6618, "step": 1755 }, { "epoch": 0.04467515325354419, "grad_norm": 0.69140625, "learning_rate": 6.701358040360451e-05, "loss": 0.6618, "step": 1760 }, { "epoch": 0.044802071302559934, "grad_norm": 0.5859375, "learning_rate": 6.720395989338749e-05, "loss": 0.6546, "step": 1765 }, { "epoch": 0.04492898935157569, "grad_norm": 0.57421875, "learning_rate": 6.739433938317045e-05, "loss": 0.6602, "step": 1770 }, { "epoch": 0.04505590740059144, "grad_norm": 0.90234375, "learning_rate": 6.758471887295342e-05, "loss": 0.6874, "step": 1775 }, { "epoch": 0.04518282544960719, "grad_norm": 0.5390625, "learning_rate": 6.777509836273638e-05, "loss": 0.5836, "step": 1780 }, { "epoch": 0.04530974349862294, "grad_norm": 0.70703125, "learning_rate": 6.796547785251934e-05, "loss": 0.6631, "step": 1785 }, { "epoch": 0.04543666154763869, "grad_norm": 0.5234375, "learning_rate": 6.815585734230232e-05, "loss": 0.6002, "step": 1790 }, { "epoch": 0.04556357959665444, "grad_norm": 0.57421875, "learning_rate": 6.834623683208529e-05, "loss": 0.6386, "step": 1795 }, { "epoch": 0.04569049764567019, "grad_norm": 0.640625, "learning_rate": 6.853661632186825e-05, "loss": 0.6911, "step": 1800 }, { "epoch": 0.04581741569468594, "grad_norm": 0.671875, "learning_rate": 6.872699581165122e-05, "loss": 0.6634, "step": 1805 }, { "epoch": 0.045944333743701694, "grad_norm": 0.984375, "learning_rate": 6.891737530143418e-05, "loss": 0.6662, "step": 1810 }, { "epoch": 0.04607125179271744, "grad_norm": 0.52734375, "learning_rate": 6.910775479121715e-05, "loss": 0.6487, "step": 1815 }, { "epoch": 0.046198169841733194, "grad_norm": 0.73046875, "learning_rate": 6.929813428100012e-05, "loss": 0.6393, "step": 1820 }, { "epoch": 0.04632508789074894, "grad_norm": 0.60546875, "learning_rate": 6.948851377078309e-05, "loss": 0.6467, "step": 1825 }, { "epoch": 0.046452005939764694, "grad_norm": 0.5703125, "learning_rate": 6.967889326056605e-05, "loss": 0.6095, "step": 1830 }, { "epoch": 0.04657892398878045, "grad_norm": 0.65234375, "learning_rate": 6.986927275034903e-05, "loss": 0.6743, "step": 1835 }, { "epoch": 0.046705842037796194, "grad_norm": 0.76953125, "learning_rate": 7.0059652240132e-05, "loss": 0.6454, "step": 1840 }, { "epoch": 0.04683276008681195, "grad_norm": 0.546875, "learning_rate": 7.025003172991496e-05, "loss": 0.6315, "step": 1845 }, { "epoch": 0.046959678135827694, "grad_norm": 0.78125, "learning_rate": 7.044041121969792e-05, "loss": 0.6169, "step": 1850 }, { "epoch": 0.04708659618484345, "grad_norm": 0.67578125, "learning_rate": 7.063079070948089e-05, "loss": 0.6548, "step": 1855 }, { "epoch": 0.047213514233859194, "grad_norm": 0.78125, "learning_rate": 7.082117019926385e-05, "loss": 0.671, "step": 1860 }, { "epoch": 0.04734043228287495, "grad_norm": 0.6640625, "learning_rate": 7.101154968904683e-05, "loss": 0.6811, "step": 1865 }, { "epoch": 0.0474673503318907, "grad_norm": 0.6796875, "learning_rate": 7.12019291788298e-05, "loss": 0.6282, "step": 1870 }, { "epoch": 0.04759426838090645, "grad_norm": 0.8203125, "learning_rate": 7.139230866861276e-05, "loss": 0.6645, "step": 1875 }, { "epoch": 0.0477211864299222, "grad_norm": 0.5703125, "learning_rate": 7.158268815839573e-05, "loss": 0.6365, "step": 1880 }, { "epoch": 0.04784810447893795, "grad_norm": 0.515625, "learning_rate": 7.177306764817869e-05, "loss": 0.6158, "step": 1885 }, { "epoch": 0.0479750225279537, "grad_norm": 0.5390625, "learning_rate": 7.196344713796167e-05, "loss": 0.6389, "step": 1890 }, { "epoch": 0.048101940576969454, "grad_norm": 0.609375, "learning_rate": 7.215382662774463e-05, "loss": 0.6471, "step": 1895 }, { "epoch": 0.0482288586259852, "grad_norm": 0.56640625, "learning_rate": 7.23442061175276e-05, "loss": 0.6379, "step": 1900 }, { "epoch": 0.048355776675000954, "grad_norm": 0.75, "learning_rate": 7.253458560731058e-05, "loss": 0.6735, "step": 1905 }, { "epoch": 0.0484826947240167, "grad_norm": 0.6953125, "learning_rate": 7.272496509709353e-05, "loss": 0.6646, "step": 1910 }, { "epoch": 0.048609612773032454, "grad_norm": 0.546875, "learning_rate": 7.291534458687649e-05, "loss": 0.6406, "step": 1915 }, { "epoch": 0.0487365308220482, "grad_norm": 0.54296875, "learning_rate": 7.310572407665947e-05, "loss": 0.6613, "step": 1920 }, { "epoch": 0.048863448871063954, "grad_norm": 0.6015625, "learning_rate": 7.329610356644243e-05, "loss": 0.6043, "step": 1925 }, { "epoch": 0.04899036692007971, "grad_norm": 0.65234375, "learning_rate": 7.34864830562254e-05, "loss": 0.6611, "step": 1930 }, { "epoch": 0.049117284969095454, "grad_norm": 0.7421875, "learning_rate": 7.367686254600838e-05, "loss": 0.627, "step": 1935 }, { "epoch": 0.04924420301811121, "grad_norm": 0.59765625, "learning_rate": 7.386724203579134e-05, "loss": 0.662, "step": 1940 }, { "epoch": 0.049371121067126954, "grad_norm": 0.53515625, "learning_rate": 7.40576215255743e-05, "loss": 0.6376, "step": 1945 }, { "epoch": 0.04949803911614271, "grad_norm": 0.76171875, "learning_rate": 7.424800101535727e-05, "loss": 0.638, "step": 1950 }, { "epoch": 0.04962495716515846, "grad_norm": 0.81640625, "learning_rate": 7.443838050514024e-05, "loss": 0.6303, "step": 1955 }, { "epoch": 0.04975187521417421, "grad_norm": 0.6015625, "learning_rate": 7.462875999492321e-05, "loss": 0.6455, "step": 1960 }, { "epoch": 0.04987879326318996, "grad_norm": 0.62109375, "learning_rate": 7.481913948470618e-05, "loss": 0.6121, "step": 1965 }, { "epoch": 0.05000571131220571, "grad_norm": 0.75, "learning_rate": 7.500951897448914e-05, "loss": 0.6497, "step": 1970 }, { "epoch": 0.05013262936122146, "grad_norm": 0.52734375, "learning_rate": 7.519989846427211e-05, "loss": 0.6101, "step": 1975 }, { "epoch": 0.05025954741023721, "grad_norm": 0.5859375, "learning_rate": 7.539027795405507e-05, "loss": 0.6481, "step": 1980 }, { "epoch": 0.05038646545925296, "grad_norm": 0.451171875, "learning_rate": 7.558065744383805e-05, "loss": 0.6089, "step": 1985 }, { "epoch": 0.050513383508268714, "grad_norm": 0.609375, "learning_rate": 7.577103693362101e-05, "loss": 0.651, "step": 1990 }, { "epoch": 0.05064030155728446, "grad_norm": 0.671875, "learning_rate": 7.596141642340399e-05, "loss": 0.6594, "step": 1995 }, { "epoch": 0.050767219606300214, "grad_norm": 0.703125, "learning_rate": 7.615179591318694e-05, "loss": 0.6233, "step": 2000 }, { "epoch": 0.05089413765531596, "grad_norm": 0.54296875, "learning_rate": 7.634217540296991e-05, "loss": 0.6106, "step": 2005 }, { "epoch": 0.051021055704331714, "grad_norm": 0.75390625, "learning_rate": 7.653255489275289e-05, "loss": 0.6657, "step": 2010 }, { "epoch": 0.05114797375334746, "grad_norm": 0.65234375, "learning_rate": 7.672293438253585e-05, "loss": 0.6543, "step": 2015 }, { "epoch": 0.051274891802363214, "grad_norm": 0.7265625, "learning_rate": 7.691331387231882e-05, "loss": 0.6445, "step": 2020 }, { "epoch": 0.05140180985137897, "grad_norm": 0.60546875, "learning_rate": 7.710369336210178e-05, "loss": 0.6189, "step": 2025 }, { "epoch": 0.051528727900394714, "grad_norm": 0.482421875, "learning_rate": 7.729407285188474e-05, "loss": 0.5912, "step": 2030 }, { "epoch": 0.05165564594941047, "grad_norm": 0.66796875, "learning_rate": 7.748445234166772e-05, "loss": 0.6321, "step": 2035 }, { "epoch": 0.05178256399842621, "grad_norm": 0.494140625, "learning_rate": 7.767483183145067e-05, "loss": 0.6215, "step": 2040 }, { "epoch": 0.05190948204744197, "grad_norm": 0.70703125, "learning_rate": 7.786521132123365e-05, "loss": 0.62, "step": 2045 }, { "epoch": 0.05203640009645772, "grad_norm": 0.5390625, "learning_rate": 7.805559081101662e-05, "loss": 0.6119, "step": 2050 }, { "epoch": 0.05216331814547347, "grad_norm": 0.53515625, "learning_rate": 7.82459703007996e-05, "loss": 0.5962, "step": 2055 }, { "epoch": 0.05229023619448922, "grad_norm": 0.6171875, "learning_rate": 7.843634979058256e-05, "loss": 0.6369, "step": 2060 }, { "epoch": 0.05241715424350497, "grad_norm": 0.6484375, "learning_rate": 7.862672928036551e-05, "loss": 0.644, "step": 2065 }, { "epoch": 0.05254407229252072, "grad_norm": 0.58203125, "learning_rate": 7.881710877014849e-05, "loss": 0.6246, "step": 2070 }, { "epoch": 0.05267099034153647, "grad_norm": 0.494140625, "learning_rate": 7.900748825993145e-05, "loss": 0.6002, "step": 2075 }, { "epoch": 0.05279790839055222, "grad_norm": 0.65625, "learning_rate": 7.919786774971443e-05, "loss": 0.6182, "step": 2080 }, { "epoch": 0.052924826439567974, "grad_norm": 0.62890625, "learning_rate": 7.938824723949738e-05, "loss": 0.646, "step": 2085 }, { "epoch": 0.05305174448858372, "grad_norm": 0.5546875, "learning_rate": 7.957862672928036e-05, "loss": 0.622, "step": 2090 }, { "epoch": 0.05317866253759947, "grad_norm": 0.65625, "learning_rate": 7.976900621906332e-05, "loss": 0.6439, "step": 2095 }, { "epoch": 0.05330558058661522, "grad_norm": 0.54296875, "learning_rate": 7.995938570884629e-05, "loss": 0.587, "step": 2100 }, { "epoch": 0.05343249863563097, "grad_norm": 0.5859375, "learning_rate": 8.014976519862927e-05, "loss": 0.654, "step": 2105 }, { "epoch": 0.05355941668464673, "grad_norm": 0.5, "learning_rate": 8.034014468841222e-05, "loss": 0.6263, "step": 2110 }, { "epoch": 0.05368633473366247, "grad_norm": 0.59375, "learning_rate": 8.05305241781952e-05, "loss": 0.6465, "step": 2115 }, { "epoch": 0.05381325278267823, "grad_norm": 0.578125, "learning_rate": 8.072090366797816e-05, "loss": 0.643, "step": 2120 }, { "epoch": 0.05394017083169397, "grad_norm": 0.70703125, "learning_rate": 8.091128315776114e-05, "loss": 0.6197, "step": 2125 }, { "epoch": 0.05406708888070973, "grad_norm": 0.6015625, "learning_rate": 8.110166264754409e-05, "loss": 0.653, "step": 2130 }, { "epoch": 0.05419400692972547, "grad_norm": 0.515625, "learning_rate": 8.129204213732705e-05, "loss": 0.6484, "step": 2135 }, { "epoch": 0.05432092497874123, "grad_norm": 0.61328125, "learning_rate": 8.148242162711003e-05, "loss": 0.6467, "step": 2140 }, { "epoch": 0.05444784302775698, "grad_norm": 0.5859375, "learning_rate": 8.1672801116893e-05, "loss": 0.6501, "step": 2145 }, { "epoch": 0.054574761076772726, "grad_norm": 0.5078125, "learning_rate": 8.186318060667598e-05, "loss": 0.6246, "step": 2150 }, { "epoch": 0.05470167912578848, "grad_norm": 0.6640625, "learning_rate": 8.205356009645893e-05, "loss": 0.623, "step": 2155 }, { "epoch": 0.054828597174804226, "grad_norm": 0.76171875, "learning_rate": 8.22439395862419e-05, "loss": 0.649, "step": 2160 }, { "epoch": 0.05495551522381998, "grad_norm": 0.5703125, "learning_rate": 8.243431907602487e-05, "loss": 0.6043, "step": 2165 }, { "epoch": 0.05508243327283573, "grad_norm": 0.55859375, "learning_rate": 8.262469856580783e-05, "loss": 0.6557, "step": 2170 }, { "epoch": 0.05520935132185148, "grad_norm": 0.640625, "learning_rate": 8.281507805559081e-05, "loss": 0.6299, "step": 2175 }, { "epoch": 0.05533626937086723, "grad_norm": 0.6015625, "learning_rate": 8.300545754537376e-05, "loss": 0.6491, "step": 2180 }, { "epoch": 0.05546318741988298, "grad_norm": 0.546875, "learning_rate": 8.319583703515674e-05, "loss": 0.6459, "step": 2185 }, { "epoch": 0.05559010546889873, "grad_norm": 0.68359375, "learning_rate": 8.33862165249397e-05, "loss": 0.6279, "step": 2190 }, { "epoch": 0.05571702351791448, "grad_norm": 0.5078125, "learning_rate": 8.357659601472268e-05, "loss": 0.6086, "step": 2195 }, { "epoch": 0.05584394156693023, "grad_norm": 0.57421875, "learning_rate": 8.376697550450563e-05, "loss": 0.6297, "step": 2200 }, { "epoch": 0.055970859615945986, "grad_norm": 0.58984375, "learning_rate": 8.39573549942886e-05, "loss": 0.6362, "step": 2205 }, { "epoch": 0.05609777766496173, "grad_norm": 0.56640625, "learning_rate": 8.414773448407158e-05, "loss": 0.6122, "step": 2210 }, { "epoch": 0.056224695713977486, "grad_norm": 0.51171875, "learning_rate": 8.433811397385454e-05, "loss": 0.6127, "step": 2215 }, { "epoch": 0.05635161376299323, "grad_norm": 0.53515625, "learning_rate": 8.452849346363752e-05, "loss": 0.6421, "step": 2220 }, { "epoch": 0.056478531812008986, "grad_norm": 0.5703125, "learning_rate": 8.471887295342047e-05, "loss": 0.6287, "step": 2225 }, { "epoch": 0.05660544986102474, "grad_norm": 0.55078125, "learning_rate": 8.490925244320345e-05, "loss": 0.6354, "step": 2230 }, { "epoch": 0.056732367910040486, "grad_norm": 0.56640625, "learning_rate": 8.509963193298641e-05, "loss": 0.6613, "step": 2235 }, { "epoch": 0.05685928595905624, "grad_norm": 0.6171875, "learning_rate": 8.529001142276938e-05, "loss": 0.6648, "step": 2240 }, { "epoch": 0.056986204008071986, "grad_norm": 0.66796875, "learning_rate": 8.548039091255234e-05, "loss": 0.662, "step": 2245 }, { "epoch": 0.05711312205708774, "grad_norm": 0.462890625, "learning_rate": 8.567077040233531e-05, "loss": 0.6343, "step": 2250 }, { "epoch": 0.057240040106103486, "grad_norm": 0.53125, "learning_rate": 8.586114989211829e-05, "loss": 0.57, "step": 2255 }, { "epoch": 0.05736695815511924, "grad_norm": 0.55078125, "learning_rate": 8.605152938190125e-05, "loss": 0.6289, "step": 2260 }, { "epoch": 0.05749387620413499, "grad_norm": 0.64453125, "learning_rate": 8.624190887168423e-05, "loss": 0.6142, "step": 2265 }, { "epoch": 0.05762079425315074, "grad_norm": 0.59765625, "learning_rate": 8.643228836146718e-05, "loss": 0.6098, "step": 2270 }, { "epoch": 0.05774771230216649, "grad_norm": 0.67578125, "learning_rate": 8.662266785125014e-05, "loss": 0.6281, "step": 2275 }, { "epoch": 0.05787463035118224, "grad_norm": 0.51171875, "learning_rate": 8.681304734103312e-05, "loss": 0.6073, "step": 2280 }, { "epoch": 0.05800154840019799, "grad_norm": 0.55859375, "learning_rate": 8.700342683081609e-05, "loss": 0.6362, "step": 2285 }, { "epoch": 0.05812846644921374, "grad_norm": 0.5859375, "learning_rate": 8.719380632059905e-05, "loss": 0.6416, "step": 2290 }, { "epoch": 0.05825538449822949, "grad_norm": 0.61328125, "learning_rate": 8.738418581038202e-05, "loss": 0.5979, "step": 2295 }, { "epoch": 0.058382302547245246, "grad_norm": 0.55078125, "learning_rate": 8.7574565300165e-05, "loss": 0.6077, "step": 2300 }, { "epoch": 0.05850922059626099, "grad_norm": 0.5703125, "learning_rate": 8.776494478994796e-05, "loss": 0.5921, "step": 2305 }, { "epoch": 0.058636138645276746, "grad_norm": 0.53515625, "learning_rate": 8.795532427973091e-05, "loss": 0.6277, "step": 2310 }, { "epoch": 0.05876305669429249, "grad_norm": 0.52734375, "learning_rate": 8.814570376951389e-05, "loss": 0.5909, "step": 2315 }, { "epoch": 0.058889974743308246, "grad_norm": 0.71484375, "learning_rate": 8.833608325929685e-05, "loss": 0.657, "step": 2320 }, { "epoch": 0.059016892792324, "grad_norm": 0.6015625, "learning_rate": 8.852646274907983e-05, "loss": 0.6059, "step": 2325 }, { "epoch": 0.059143810841339746, "grad_norm": 0.52734375, "learning_rate": 8.87168422388628e-05, "loss": 0.6323, "step": 2330 }, { "epoch": 0.0592707288903555, "grad_norm": 0.51953125, "learning_rate": 8.890722172864576e-05, "loss": 0.5956, "step": 2335 }, { "epoch": 0.059397646939371246, "grad_norm": 0.5234375, "learning_rate": 8.909760121842872e-05, "loss": 0.6334, "step": 2340 }, { "epoch": 0.059524564988387, "grad_norm": 0.59765625, "learning_rate": 8.928798070821169e-05, "loss": 0.6395, "step": 2345 }, { "epoch": 0.059651483037402746, "grad_norm": 0.68359375, "learning_rate": 8.947836019799467e-05, "loss": 0.6459, "step": 2350 }, { "epoch": 0.0597784010864185, "grad_norm": 0.546875, "learning_rate": 8.966873968777762e-05, "loss": 0.5945, "step": 2355 }, { "epoch": 0.05990531913543425, "grad_norm": 0.56640625, "learning_rate": 8.98591191775606e-05, "loss": 0.6539, "step": 2360 }, { "epoch": 0.06003223718445, "grad_norm": 0.68359375, "learning_rate": 9.004949866734356e-05, "loss": 0.6224, "step": 2365 }, { "epoch": 0.06015915523346575, "grad_norm": 0.55859375, "learning_rate": 9.023987815712654e-05, "loss": 0.6155, "step": 2370 }, { "epoch": 0.0602860732824815, "grad_norm": 0.49609375, "learning_rate": 9.04302576469095e-05, "loss": 0.6172, "step": 2375 }, { "epoch": 0.06041299133149725, "grad_norm": 0.546875, "learning_rate": 9.062063713669245e-05, "loss": 0.6434, "step": 2380 }, { "epoch": 0.060539909380513006, "grad_norm": 0.62109375, "learning_rate": 9.081101662647543e-05, "loss": 0.6201, "step": 2385 }, { "epoch": 0.06066682742952875, "grad_norm": 0.4921875, "learning_rate": 9.10013961162584e-05, "loss": 0.6161, "step": 2390 }, { "epoch": 0.060793745478544506, "grad_norm": 0.53515625, "learning_rate": 9.119177560604138e-05, "loss": 0.6489, "step": 2395 }, { "epoch": 0.06092066352756025, "grad_norm": 0.578125, "learning_rate": 9.138215509582433e-05, "loss": 0.6683, "step": 2400 }, { "epoch": 0.061047581576576006, "grad_norm": 0.453125, "learning_rate": 9.15725345856073e-05, "loss": 0.6205, "step": 2405 }, { "epoch": 0.06117449962559175, "grad_norm": 0.48046875, "learning_rate": 9.176291407539027e-05, "loss": 0.619, "step": 2410 }, { "epoch": 0.061301417674607506, "grad_norm": 0.53125, "learning_rate": 9.195329356517323e-05, "loss": 0.6084, "step": 2415 }, { "epoch": 0.06142833572362326, "grad_norm": 0.53125, "learning_rate": 9.214367305495621e-05, "loss": 0.6375, "step": 2420 }, { "epoch": 0.061555253772639006, "grad_norm": 0.55859375, "learning_rate": 9.233405254473916e-05, "loss": 0.6214, "step": 2425 }, { "epoch": 0.06168217182165476, "grad_norm": 0.54296875, "learning_rate": 9.252443203452214e-05, "loss": 0.6299, "step": 2430 }, { "epoch": 0.061809089870670506, "grad_norm": 0.59375, "learning_rate": 9.27148115243051e-05, "loss": 0.6488, "step": 2435 }, { "epoch": 0.06193600791968626, "grad_norm": 0.466796875, "learning_rate": 9.290519101408808e-05, "loss": 0.6261, "step": 2440 }, { "epoch": 0.06206292596870201, "grad_norm": 0.5390625, "learning_rate": 9.309557050387105e-05, "loss": 0.6356, "step": 2445 }, { "epoch": 0.06218984401771776, "grad_norm": 0.5625, "learning_rate": 9.3285949993654e-05, "loss": 0.6417, "step": 2450 }, { "epoch": 0.06231676206673351, "grad_norm": 0.5234375, "learning_rate": 9.347632948343698e-05, "loss": 0.6392, "step": 2455 }, { "epoch": 0.06244368011574926, "grad_norm": 0.57421875, "learning_rate": 9.366670897321994e-05, "loss": 0.6104, "step": 2460 }, { "epoch": 0.06257059816476501, "grad_norm": 0.46484375, "learning_rate": 9.385708846300292e-05, "loss": 0.6127, "step": 2465 }, { "epoch": 0.06269751621378077, "grad_norm": 0.7265625, "learning_rate": 9.404746795278587e-05, "loss": 0.5829, "step": 2470 }, { "epoch": 0.06282443426279652, "grad_norm": 0.5234375, "learning_rate": 9.423784744256885e-05, "loss": 0.6239, "step": 2475 }, { "epoch": 0.06295135231181226, "grad_norm": 0.515625, "learning_rate": 9.442822693235181e-05, "loss": 0.6399, "step": 2480 }, { "epoch": 0.06307827036082801, "grad_norm": 0.56640625, "learning_rate": 9.461860642213478e-05, "loss": 0.6455, "step": 2485 }, { "epoch": 0.06320518840984377, "grad_norm": 0.5703125, "learning_rate": 9.480898591191776e-05, "loss": 0.6102, "step": 2490 }, { "epoch": 0.06333210645885952, "grad_norm": 0.470703125, "learning_rate": 9.499936540170071e-05, "loss": 0.5839, "step": 2495 }, { "epoch": 0.06345902450787526, "grad_norm": 0.56640625, "learning_rate": 9.518974489148369e-05, "loss": 0.6502, "step": 2500 }, { "epoch": 0.06358594255689101, "grad_norm": 0.55078125, "learning_rate": 9.538012438126665e-05, "loss": 0.6053, "step": 2505 }, { "epoch": 0.06371286060590677, "grad_norm": 0.51171875, "learning_rate": 9.557050387104963e-05, "loss": 0.6156, "step": 2510 }, { "epoch": 0.06383977865492252, "grad_norm": 0.6015625, "learning_rate": 9.576088336083258e-05, "loss": 0.6092, "step": 2515 }, { "epoch": 0.06396669670393827, "grad_norm": 0.53515625, "learning_rate": 9.595126285061554e-05, "loss": 0.6292, "step": 2520 }, { "epoch": 0.06409361475295401, "grad_norm": 0.515625, "learning_rate": 9.614164234039852e-05, "loss": 0.6206, "step": 2525 }, { "epoch": 0.06422053280196977, "grad_norm": 0.6328125, "learning_rate": 9.633202183018149e-05, "loss": 0.6429, "step": 2530 }, { "epoch": 0.06434745085098552, "grad_norm": 0.57421875, "learning_rate": 9.652240131996447e-05, "loss": 0.595, "step": 2535 }, { "epoch": 0.06447436890000127, "grad_norm": 0.65625, "learning_rate": 9.671278080974742e-05, "loss": 0.6042, "step": 2540 }, { "epoch": 0.06460128694901703, "grad_norm": 0.59375, "learning_rate": 9.69031602995304e-05, "loss": 0.6354, "step": 2545 }, { "epoch": 0.06472820499803277, "grad_norm": 0.6015625, "learning_rate": 9.709353978931336e-05, "loss": 0.6187, "step": 2550 }, { "epoch": 0.06485512304704852, "grad_norm": 0.60546875, "learning_rate": 9.728391927909632e-05, "loss": 0.6665, "step": 2555 }, { "epoch": 0.06498204109606427, "grad_norm": 0.490234375, "learning_rate": 9.747429876887929e-05, "loss": 0.6293, "step": 2560 }, { "epoch": 0.06510895914508003, "grad_norm": 0.57421875, "learning_rate": 9.766467825866225e-05, "loss": 0.6023, "step": 2565 }, { "epoch": 0.06523587719409578, "grad_norm": 0.484375, "learning_rate": 9.785505774844523e-05, "loss": 0.6092, "step": 2570 }, { "epoch": 0.06536279524311152, "grad_norm": 0.5546875, "learning_rate": 9.80454372382282e-05, "loss": 0.6174, "step": 2575 }, { "epoch": 0.06548971329212727, "grad_norm": 0.5, "learning_rate": 9.823581672801117e-05, "loss": 0.5959, "step": 2580 }, { "epoch": 0.06561663134114303, "grad_norm": 0.61328125, "learning_rate": 9.842619621779412e-05, "loss": 0.6428, "step": 2585 }, { "epoch": 0.06574354939015878, "grad_norm": 0.55859375, "learning_rate": 9.861657570757709e-05, "loss": 0.6309, "step": 2590 }, { "epoch": 0.06587046743917452, "grad_norm": 0.55859375, "learning_rate": 9.880695519736007e-05, "loss": 0.6325, "step": 2595 }, { "epoch": 0.06599738548819027, "grad_norm": 0.4921875, "learning_rate": 9.899733468714303e-05, "loss": 0.57, "step": 2600 }, { "epoch": 0.06612430353720603, "grad_norm": 0.46875, "learning_rate": 9.9187714176926e-05, "loss": 0.6094, "step": 2605 }, { "epoch": 0.06625122158622178, "grad_norm": 0.59765625, "learning_rate": 9.937809366670896e-05, "loss": 0.6386, "step": 2610 }, { "epoch": 0.06637813963523753, "grad_norm": 0.5703125, "learning_rate": 9.956847315649193e-05, "loss": 0.6176, "step": 2615 }, { "epoch": 0.06650505768425327, "grad_norm": 0.55859375, "learning_rate": 9.97588526462749e-05, "loss": 0.6248, "step": 2620 }, { "epoch": 0.06663197573326902, "grad_norm": 0.490234375, "learning_rate": 9.994923213605785e-05, "loss": 0.5997, "step": 2625 }, { "epoch": 0.06675889378228478, "grad_norm": 0.5234375, "learning_rate": 0.00010013961162584083, "loss": 0.672, "step": 2630 }, { "epoch": 0.06688581183130053, "grad_norm": 0.5, "learning_rate": 0.0001003299911156238, "loss": 0.5985, "step": 2635 }, { "epoch": 0.06701272988031629, "grad_norm": 0.64453125, "learning_rate": 0.00010052037060540678, "loss": 0.6486, "step": 2640 }, { "epoch": 0.06713964792933202, "grad_norm": 0.53515625, "learning_rate": 0.00010071075009518974, "loss": 0.6036, "step": 2645 }, { "epoch": 0.06726656597834778, "grad_norm": 0.5078125, "learning_rate": 0.00010090112958497269, "loss": 0.6068, "step": 2650 }, { "epoch": 0.06739348402736353, "grad_norm": 0.4609375, "learning_rate": 0.00010109150907475567, "loss": 0.5895, "step": 2655 }, { "epoch": 0.06752040207637929, "grad_norm": 0.5, "learning_rate": 0.00010128188856453863, "loss": 0.6453, "step": 2660 }, { "epoch": 0.06764732012539504, "grad_norm": 0.5390625, "learning_rate": 0.00010147226805432161, "loss": 0.632, "step": 2665 }, { "epoch": 0.06777423817441078, "grad_norm": 0.6171875, "learning_rate": 0.00010166264754410456, "loss": 0.6195, "step": 2670 }, { "epoch": 0.06790115622342653, "grad_norm": 0.5625, "learning_rate": 0.00010185302703388754, "loss": 0.6386, "step": 2675 }, { "epoch": 0.06802807427244228, "grad_norm": 0.59375, "learning_rate": 0.0001020434065236705, "loss": 0.6385, "step": 2680 }, { "epoch": 0.06815499232145804, "grad_norm": 0.45703125, "learning_rate": 0.00010223378601345347, "loss": 0.6061, "step": 2685 }, { "epoch": 0.06828191037047379, "grad_norm": 0.6328125, "learning_rate": 0.00010242416550323645, "loss": 0.6157, "step": 2690 }, { "epoch": 0.06840882841948953, "grad_norm": 0.57421875, "learning_rate": 0.0001026145449930194, "loss": 0.6134, "step": 2695 }, { "epoch": 0.06853574646850528, "grad_norm": 0.46875, "learning_rate": 0.00010280492448280238, "loss": 0.5608, "step": 2700 }, { "epoch": 0.06866266451752104, "grad_norm": 0.50390625, "learning_rate": 0.00010299530397258534, "loss": 0.6291, "step": 2705 }, { "epoch": 0.06878958256653679, "grad_norm": 0.5546875, "learning_rate": 0.00010318568346236832, "loss": 0.6008, "step": 2710 }, { "epoch": 0.06891650061555253, "grad_norm": 0.47265625, "learning_rate": 0.00010337606295215128, "loss": 0.6229, "step": 2715 }, { "epoch": 0.06904341866456828, "grad_norm": 0.60546875, "learning_rate": 0.00010356644244193424, "loss": 0.6362, "step": 2720 }, { "epoch": 0.06917033671358404, "grad_norm": 0.5625, "learning_rate": 0.00010375682193171721, "loss": 0.6163, "step": 2725 }, { "epoch": 0.06929725476259979, "grad_norm": 0.50390625, "learning_rate": 0.00010394720142150018, "loss": 0.5971, "step": 2730 }, { "epoch": 0.06942417281161554, "grad_norm": 0.5390625, "learning_rate": 0.00010413758091128316, "loss": 0.5724, "step": 2735 }, { "epoch": 0.06955109086063128, "grad_norm": 0.455078125, "learning_rate": 0.00010432796040106611, "loss": 0.6201, "step": 2740 }, { "epoch": 0.06967800890964704, "grad_norm": 0.578125, "learning_rate": 0.00010451833989084909, "loss": 0.615, "step": 2745 }, { "epoch": 0.06980492695866279, "grad_norm": 0.546875, "learning_rate": 0.00010470871938063205, "loss": 0.6191, "step": 2750 }, { "epoch": 0.06993184500767854, "grad_norm": 0.494140625, "learning_rate": 0.00010489909887041502, "loss": 0.6005, "step": 2755 }, { "epoch": 0.0700587630566943, "grad_norm": 0.478515625, "learning_rate": 0.000105089478360198, "loss": 0.6181, "step": 2760 }, { "epoch": 0.07018568110571004, "grad_norm": 0.400390625, "learning_rate": 0.00010527985784998094, "loss": 0.6085, "step": 2765 }, { "epoch": 0.07031259915472579, "grad_norm": 0.494140625, "learning_rate": 0.00010547023733976392, "loss": 0.587, "step": 2770 }, { "epoch": 0.07043951720374154, "grad_norm": 0.46875, "learning_rate": 0.00010566061682954689, "loss": 0.5961, "step": 2775 }, { "epoch": 0.0705664352527573, "grad_norm": 0.59765625, "learning_rate": 0.00010585099631932987, "loss": 0.6308, "step": 2780 }, { "epoch": 0.07069335330177305, "grad_norm": 0.5, "learning_rate": 0.00010604137580911282, "loss": 0.6073, "step": 2785 }, { "epoch": 0.07082027135078879, "grad_norm": 0.49609375, "learning_rate": 0.00010623175529889578, "loss": 0.5748, "step": 2790 }, { "epoch": 0.07094718939980454, "grad_norm": 0.5234375, "learning_rate": 0.00010642213478867876, "loss": 0.6517, "step": 2795 }, { "epoch": 0.0710741074488203, "grad_norm": 0.55859375, "learning_rate": 0.00010661251427846172, "loss": 0.601, "step": 2800 }, { "epoch": 0.07120102549783605, "grad_norm": 0.54296875, "learning_rate": 0.0001068028937682447, "loss": 0.6218, "step": 2805 }, { "epoch": 0.0713279435468518, "grad_norm": 0.5390625, "learning_rate": 0.00010699327325802765, "loss": 0.638, "step": 2810 }, { "epoch": 0.07145486159586754, "grad_norm": 0.62890625, "learning_rate": 0.00010718365274781063, "loss": 0.6301, "step": 2815 }, { "epoch": 0.0715817796448833, "grad_norm": 0.53515625, "learning_rate": 0.0001073740322375936, "loss": 0.6586, "step": 2820 }, { "epoch": 0.07170869769389905, "grad_norm": 0.53125, "learning_rate": 0.00010756441172737656, "loss": 0.6058, "step": 2825 }, { "epoch": 0.0718356157429148, "grad_norm": 0.56640625, "learning_rate": 0.00010775479121715952, "loss": 0.6134, "step": 2830 }, { "epoch": 0.07196253379193054, "grad_norm": 0.5234375, "learning_rate": 0.00010794517070694249, "loss": 0.6275, "step": 2835 }, { "epoch": 0.0720894518409463, "grad_norm": 0.5390625, "learning_rate": 0.00010813555019672547, "loss": 0.6114, "step": 2840 }, { "epoch": 0.07221636988996205, "grad_norm": 0.490234375, "learning_rate": 0.00010832592968650843, "loss": 0.6514, "step": 2845 }, { "epoch": 0.0723432879389778, "grad_norm": 0.5, "learning_rate": 0.00010851630917629141, "loss": 0.5789, "step": 2850 }, { "epoch": 0.07247020598799356, "grad_norm": 0.5, "learning_rate": 0.00010870668866607436, "loss": 0.6001, "step": 2855 }, { "epoch": 0.0725971240370093, "grad_norm": 0.46484375, "learning_rate": 0.00010889706815585733, "loss": 0.616, "step": 2860 }, { "epoch": 0.07272404208602505, "grad_norm": 0.53125, "learning_rate": 0.0001090874476456403, "loss": 0.5978, "step": 2865 }, { "epoch": 0.0728509601350408, "grad_norm": 0.5, "learning_rate": 0.00010927782713542327, "loss": 0.6331, "step": 2870 }, { "epoch": 0.07297787818405656, "grad_norm": 0.4765625, "learning_rate": 0.00010946820662520623, "loss": 0.5873, "step": 2875 }, { "epoch": 0.07310479623307231, "grad_norm": 0.50390625, "learning_rate": 0.0001096585861149892, "loss": 0.629, "step": 2880 }, { "epoch": 0.07323171428208805, "grad_norm": 0.427734375, "learning_rate": 0.00010984896560477218, "loss": 0.597, "step": 2885 }, { "epoch": 0.0733586323311038, "grad_norm": 0.490234375, "learning_rate": 0.00011003934509455514, "loss": 0.5983, "step": 2890 }, { "epoch": 0.07348555038011956, "grad_norm": 0.5234375, "learning_rate": 0.00011022972458433809, "loss": 0.6068, "step": 2895 }, { "epoch": 0.07361246842913531, "grad_norm": 0.5859375, "learning_rate": 0.00011042010407412107, "loss": 0.6355, "step": 2900 }, { "epoch": 0.07373938647815106, "grad_norm": 0.56640625, "learning_rate": 0.00011061048356390403, "loss": 0.6329, "step": 2905 }, { "epoch": 0.0738663045271668, "grad_norm": 0.53515625, "learning_rate": 0.00011080086305368701, "loss": 0.6179, "step": 2910 }, { "epoch": 0.07399322257618256, "grad_norm": 0.48828125, "learning_rate": 0.00011099124254346998, "loss": 0.6116, "step": 2915 }, { "epoch": 0.07412014062519831, "grad_norm": 0.546875, "learning_rate": 0.00011118162203325295, "loss": 0.6153, "step": 2920 }, { "epoch": 0.07424705867421406, "grad_norm": 0.56640625, "learning_rate": 0.0001113720015230359, "loss": 0.6367, "step": 2925 }, { "epoch": 0.0743739767232298, "grad_norm": 0.5078125, "learning_rate": 0.00011156238101281887, "loss": 0.5976, "step": 2930 }, { "epoch": 0.07450089477224556, "grad_norm": 0.50390625, "learning_rate": 0.00011175276050260185, "loss": 0.6221, "step": 2935 }, { "epoch": 0.07462781282126131, "grad_norm": 0.54296875, "learning_rate": 0.0001119431399923848, "loss": 0.6076, "step": 2940 }, { "epoch": 0.07475473087027706, "grad_norm": 0.47265625, "learning_rate": 0.00011213351948216778, "loss": 0.6082, "step": 2945 }, { "epoch": 0.07488164891929282, "grad_norm": 0.5, "learning_rate": 0.00011232389897195074, "loss": 0.593, "step": 2950 }, { "epoch": 0.07500856696830856, "grad_norm": 0.5390625, "learning_rate": 0.00011251427846173372, "loss": 0.6291, "step": 2955 }, { "epoch": 0.07513548501732431, "grad_norm": 0.455078125, "learning_rate": 0.00011270465795151668, "loss": 0.5949, "step": 2960 }, { "epoch": 0.07526240306634006, "grad_norm": 0.490234375, "learning_rate": 0.00011289503744129964, "loss": 0.5966, "step": 2965 }, { "epoch": 0.07538932111535582, "grad_norm": 0.546875, "learning_rate": 0.00011308541693108261, "loss": 0.6153, "step": 2970 }, { "epoch": 0.07551623916437157, "grad_norm": 0.50390625, "learning_rate": 0.00011327579642086558, "loss": 0.5814, "step": 2975 }, { "epoch": 0.07564315721338731, "grad_norm": 0.53125, "learning_rate": 0.00011346617591064856, "loss": 0.6303, "step": 2980 }, { "epoch": 0.07577007526240306, "grad_norm": 0.490234375, "learning_rate": 0.00011365655540043152, "loss": 0.6125, "step": 2985 }, { "epoch": 0.07589699331141882, "grad_norm": 0.484375, "learning_rate": 0.00011384693489021449, "loss": 0.6275, "step": 2990 }, { "epoch": 0.07602391136043457, "grad_norm": 0.462890625, "learning_rate": 0.00011403731437999745, "loss": 0.6015, "step": 2995 }, { "epoch": 0.07615082940945032, "grad_norm": 0.490234375, "learning_rate": 0.00011422769386978042, "loss": 0.5961, "step": 3000 }, { "epoch": 0.07627774745846606, "grad_norm": 0.5, "learning_rate": 0.00011441807335956339, "loss": 0.6127, "step": 3005 }, { "epoch": 0.07640466550748182, "grad_norm": 0.5, "learning_rate": 0.00011460845284934634, "loss": 0.6312, "step": 3010 }, { "epoch": 0.07653158355649757, "grad_norm": 0.494140625, "learning_rate": 0.00011479883233912932, "loss": 0.6124, "step": 3015 }, { "epoch": 0.07665850160551332, "grad_norm": 0.4765625, "learning_rate": 0.00011498921182891229, "loss": 0.5996, "step": 3020 }, { "epoch": 0.07678541965452908, "grad_norm": 0.51171875, "learning_rate": 0.00011517959131869527, "loss": 0.6102, "step": 3025 }, { "epoch": 0.07691233770354482, "grad_norm": 0.482421875, "learning_rate": 0.00011536997080847823, "loss": 0.5938, "step": 3030 }, { "epoch": 0.07703925575256057, "grad_norm": 0.470703125, "learning_rate": 0.00011556035029826118, "loss": 0.6038, "step": 3035 }, { "epoch": 0.07716617380157632, "grad_norm": 0.45703125, "learning_rate": 0.00011575072978804416, "loss": 0.5888, "step": 3040 }, { "epoch": 0.07729309185059208, "grad_norm": 0.482421875, "learning_rate": 0.00011594110927782712, "loss": 0.618, "step": 3045 }, { "epoch": 0.07742000989960782, "grad_norm": 0.546875, "learning_rate": 0.0001161314887676101, "loss": 0.6157, "step": 3050 }, { "epoch": 0.07754692794862357, "grad_norm": 0.5234375, "learning_rate": 0.00011632186825739305, "loss": 0.6504, "step": 3055 }, { "epoch": 0.07767384599763932, "grad_norm": 0.470703125, "learning_rate": 0.00011651224774717603, "loss": 0.5815, "step": 3060 }, { "epoch": 0.07780076404665508, "grad_norm": 0.466796875, "learning_rate": 0.000116702627236959, "loss": 0.6404, "step": 3065 }, { "epoch": 0.07792768209567083, "grad_norm": 0.54296875, "learning_rate": 0.00011689300672674196, "loss": 0.6381, "step": 3070 }, { "epoch": 0.07805460014468657, "grad_norm": 0.412109375, "learning_rate": 0.00011708338621652494, "loss": 0.5685, "step": 3075 }, { "epoch": 0.07818151819370232, "grad_norm": 0.447265625, "learning_rate": 0.00011727376570630789, "loss": 0.6064, "step": 3080 }, { "epoch": 0.07830843624271808, "grad_norm": 0.5234375, "learning_rate": 0.00011746414519609087, "loss": 0.6148, "step": 3085 }, { "epoch": 0.07843535429173383, "grad_norm": 0.48828125, "learning_rate": 0.00011765452468587383, "loss": 0.6073, "step": 3090 }, { "epoch": 0.07856227234074958, "grad_norm": 0.490234375, "learning_rate": 0.00011784490417565681, "loss": 0.6105, "step": 3095 }, { "epoch": 0.07868919038976532, "grad_norm": 0.453125, "learning_rate": 0.00011803528366543976, "loss": 0.5797, "step": 3100 }, { "epoch": 0.07881610843878108, "grad_norm": 0.53125, "learning_rate": 0.00011822566315522273, "loss": 0.6018, "step": 3105 }, { "epoch": 0.07894302648779683, "grad_norm": 0.470703125, "learning_rate": 0.0001184160426450057, "loss": 0.5867, "step": 3110 }, { "epoch": 0.07906994453681258, "grad_norm": 0.498046875, "learning_rate": 0.00011860642213478867, "loss": 0.61, "step": 3115 }, { "epoch": 0.07919686258582834, "grad_norm": 0.462890625, "learning_rate": 0.00011879680162457165, "loss": 0.6183, "step": 3120 }, { "epoch": 0.07932378063484408, "grad_norm": 0.45703125, "learning_rate": 0.0001189871811143546, "loss": 0.6028, "step": 3125 }, { "epoch": 0.07945069868385983, "grad_norm": 0.51171875, "learning_rate": 0.00011917756060413758, "loss": 0.6174, "step": 3130 }, { "epoch": 0.07957761673287558, "grad_norm": 0.482421875, "learning_rate": 0.00011936794009392054, "loss": 0.627, "step": 3135 }, { "epoch": 0.07970453478189134, "grad_norm": 0.46484375, "learning_rate": 0.0001195583195837035, "loss": 0.5751, "step": 3140 }, { "epoch": 0.07983145283090708, "grad_norm": 0.482421875, "learning_rate": 0.00011974869907348647, "loss": 0.5854, "step": 3145 }, { "epoch": 0.07995837087992283, "grad_norm": 0.4296875, "learning_rate": 0.00011993907856326943, "loss": 0.6023, "step": 3150 }, { "epoch": 0.08008528892893858, "grad_norm": 0.50390625, "learning_rate": 0.00012012945805305241, "loss": 0.6016, "step": 3155 }, { "epoch": 0.08021220697795434, "grad_norm": 0.60546875, "learning_rate": 0.00012031983754283538, "loss": 0.6212, "step": 3160 }, { "epoch": 0.08033912502697009, "grad_norm": 0.458984375, "learning_rate": 0.00012051021703261835, "loss": 0.6033, "step": 3165 }, { "epoch": 0.08046604307598583, "grad_norm": 0.55078125, "learning_rate": 0.0001207005965224013, "loss": 0.6028, "step": 3170 }, { "epoch": 0.08059296112500158, "grad_norm": 0.5078125, "learning_rate": 0.00012089097601218427, "loss": 0.5698, "step": 3175 }, { "epoch": 0.08071987917401734, "grad_norm": 0.478515625, "learning_rate": 0.00012108135550196725, "loss": 0.6084, "step": 3180 }, { "epoch": 0.08084679722303309, "grad_norm": 0.46875, "learning_rate": 0.00012127173499175021, "loss": 0.6183, "step": 3185 }, { "epoch": 0.08097371527204884, "grad_norm": 0.46484375, "learning_rate": 0.00012146211448153319, "loss": 0.6064, "step": 3190 }, { "epoch": 0.08110063332106458, "grad_norm": 0.455078125, "learning_rate": 0.00012165249397131614, "loss": 0.5829, "step": 3195 }, { "epoch": 0.08122755137008034, "grad_norm": 0.416015625, "learning_rate": 0.0001218428734610991, "loss": 0.6033, "step": 3200 }, { "epoch": 0.08135446941909609, "grad_norm": 0.470703125, "learning_rate": 0.00012203325295088208, "loss": 0.5905, "step": 3205 }, { "epoch": 0.08148138746811184, "grad_norm": 0.5, "learning_rate": 0.00012222363244066505, "loss": 0.6228, "step": 3210 }, { "epoch": 0.0816083055171276, "grad_norm": 0.423828125, "learning_rate": 0.00012241401193044803, "loss": 0.5903, "step": 3215 }, { "epoch": 0.08173522356614334, "grad_norm": 0.4765625, "learning_rate": 0.00012260439142023098, "loss": 0.633, "step": 3220 }, { "epoch": 0.08186214161515909, "grad_norm": 0.515625, "learning_rate": 0.00012279477091001396, "loss": 0.6106, "step": 3225 }, { "epoch": 0.08198905966417484, "grad_norm": 0.4921875, "learning_rate": 0.0001229851503997969, "loss": 0.6077, "step": 3230 }, { "epoch": 0.0821159777131906, "grad_norm": 0.546875, "learning_rate": 0.00012317552988957989, "loss": 0.5916, "step": 3235 }, { "epoch": 0.08224289576220635, "grad_norm": 0.52734375, "learning_rate": 0.00012336590937936286, "loss": 0.6004, "step": 3240 }, { "epoch": 0.08236981381122209, "grad_norm": 0.470703125, "learning_rate": 0.00012355628886914582, "loss": 0.5865, "step": 3245 }, { "epoch": 0.08249673186023784, "grad_norm": 0.4765625, "learning_rate": 0.0001237466683589288, "loss": 0.6099, "step": 3250 }, { "epoch": 0.0826236499092536, "grad_norm": 0.48828125, "learning_rate": 0.00012393704784871174, "loss": 0.6343, "step": 3255 }, { "epoch": 0.08275056795826935, "grad_norm": 0.4609375, "learning_rate": 0.00012412742733849472, "loss": 0.6, "step": 3260 }, { "epoch": 0.08287748600728509, "grad_norm": 0.435546875, "learning_rate": 0.0001243178068282777, "loss": 0.6127, "step": 3265 }, { "epoch": 0.08300440405630084, "grad_norm": 0.453125, "learning_rate": 0.00012450818631806065, "loss": 0.6184, "step": 3270 }, { "epoch": 0.0831313221053166, "grad_norm": 0.5078125, "learning_rate": 0.00012469856580784363, "loss": 0.6158, "step": 3275 }, { "epoch": 0.08325824015433235, "grad_norm": 0.53515625, "learning_rate": 0.00012488894529762658, "loss": 0.6081, "step": 3280 }, { "epoch": 0.0833851582033481, "grad_norm": 0.49609375, "learning_rate": 0.00012507932478740956, "loss": 0.5813, "step": 3285 }, { "epoch": 0.08351207625236384, "grad_norm": 0.4921875, "learning_rate": 0.0001252697042771925, "loss": 0.6234, "step": 3290 }, { "epoch": 0.0836389943013796, "grad_norm": 0.470703125, "learning_rate": 0.0001254600837669755, "loss": 0.6064, "step": 3295 }, { "epoch": 0.08376591235039535, "grad_norm": 0.458984375, "learning_rate": 0.00012565046325675847, "loss": 0.6004, "step": 3300 }, { "epoch": 0.0838928303994111, "grad_norm": 0.490234375, "learning_rate": 0.00012584084274654142, "loss": 0.6135, "step": 3305 }, { "epoch": 0.08401974844842686, "grad_norm": 0.46484375, "learning_rate": 0.0001260312222363244, "loss": 0.5976, "step": 3310 }, { "epoch": 0.0841466664974426, "grad_norm": 0.46875, "learning_rate": 0.00012622160172610735, "loss": 0.5942, "step": 3315 }, { "epoch": 0.08427358454645835, "grad_norm": 0.458984375, "learning_rate": 0.00012641198121589032, "loss": 0.623, "step": 3320 }, { "epoch": 0.0844005025954741, "grad_norm": 0.5078125, "learning_rate": 0.0001266023607056733, "loss": 0.605, "step": 3325 }, { "epoch": 0.08452742064448986, "grad_norm": 0.494140625, "learning_rate": 0.00012679274019545628, "loss": 0.6056, "step": 3330 }, { "epoch": 0.08465433869350561, "grad_norm": 0.45703125, "learning_rate": 0.00012698311968523923, "loss": 0.5948, "step": 3335 }, { "epoch": 0.08478125674252135, "grad_norm": 0.4765625, "learning_rate": 0.00012717349917502218, "loss": 0.6013, "step": 3340 }, { "epoch": 0.0849081747915371, "grad_norm": 0.470703125, "learning_rate": 0.00012736387866480516, "loss": 0.5813, "step": 3345 }, { "epoch": 0.08503509284055286, "grad_norm": 0.4921875, "learning_rate": 0.00012755425815458814, "loss": 0.5874, "step": 3350 }, { "epoch": 0.08516201088956861, "grad_norm": 0.5078125, "learning_rate": 0.00012774463764437112, "loss": 0.6148, "step": 3355 }, { "epoch": 0.08528892893858436, "grad_norm": 0.482421875, "learning_rate": 0.00012793501713415407, "loss": 0.6076, "step": 3360 }, { "epoch": 0.0854158469876001, "grad_norm": 0.4453125, "learning_rate": 0.00012812539662393705, "loss": 0.5665, "step": 3365 }, { "epoch": 0.08554276503661586, "grad_norm": 0.515625, "learning_rate": 0.00012831577611372, "loss": 0.5892, "step": 3370 }, { "epoch": 0.08566968308563161, "grad_norm": 0.4765625, "learning_rate": 0.00012850615560350298, "loss": 0.5863, "step": 3375 }, { "epoch": 0.08579660113464736, "grad_norm": 0.515625, "learning_rate": 0.00012869653509328595, "loss": 0.6006, "step": 3380 }, { "epoch": 0.0859235191836631, "grad_norm": 0.51171875, "learning_rate": 0.0001288869145830689, "loss": 0.5661, "step": 3385 }, { "epoch": 0.08605043723267886, "grad_norm": 0.5625, "learning_rate": 0.00012907729407285188, "loss": 0.6046, "step": 3390 }, { "epoch": 0.08617735528169461, "grad_norm": 0.474609375, "learning_rate": 0.00012926767356263483, "loss": 0.6138, "step": 3395 }, { "epoch": 0.08630427333071036, "grad_norm": 0.4453125, "learning_rate": 0.0001294580530524178, "loss": 0.6072, "step": 3400 }, { "epoch": 0.08643119137972612, "grad_norm": 0.47265625, "learning_rate": 0.00012964843254220076, "loss": 0.6207, "step": 3405 }, { "epoch": 0.08655810942874186, "grad_norm": 0.439453125, "learning_rate": 0.00012983881203198374, "loss": 0.5913, "step": 3410 }, { "epoch": 0.08668502747775761, "grad_norm": 0.51171875, "learning_rate": 0.00013002919152176672, "loss": 0.608, "step": 3415 }, { "epoch": 0.08681194552677336, "grad_norm": 0.484375, "learning_rate": 0.00013021957101154967, "loss": 0.6274, "step": 3420 }, { "epoch": 0.08693886357578912, "grad_norm": 0.4375, "learning_rate": 0.00013040995050133265, "loss": 0.5928, "step": 3425 }, { "epoch": 0.08706578162480487, "grad_norm": 0.443359375, "learning_rate": 0.0001306003299911156, "loss": 0.602, "step": 3430 }, { "epoch": 0.08719269967382061, "grad_norm": 0.52734375, "learning_rate": 0.00013079070948089858, "loss": 0.5963, "step": 3435 }, { "epoch": 0.08731961772283636, "grad_norm": 0.5078125, "learning_rate": 0.00013098108897068156, "loss": 0.587, "step": 3440 }, { "epoch": 0.08744653577185212, "grad_norm": 0.5078125, "learning_rate": 0.0001311714684604645, "loss": 0.5914, "step": 3445 }, { "epoch": 0.08757345382086787, "grad_norm": 0.48046875, "learning_rate": 0.00013136184795024748, "loss": 0.6168, "step": 3450 }, { "epoch": 0.08770037186988362, "grad_norm": 0.490234375, "learning_rate": 0.00013155222744003044, "loss": 0.5906, "step": 3455 }, { "epoch": 0.08782728991889936, "grad_norm": 0.4609375, "learning_rate": 0.00013174260692981341, "loss": 0.6356, "step": 3460 }, { "epoch": 0.08795420796791512, "grad_norm": 0.48046875, "learning_rate": 0.0001319329864195964, "loss": 0.6028, "step": 3465 }, { "epoch": 0.08808112601693087, "grad_norm": 0.46875, "learning_rate": 0.00013212336590937937, "loss": 0.5946, "step": 3470 }, { "epoch": 0.08820804406594662, "grad_norm": 0.435546875, "learning_rate": 0.00013231374539916232, "loss": 0.5921, "step": 3475 }, { "epoch": 0.08833496211496236, "grad_norm": 0.46875, "learning_rate": 0.00013250412488894527, "loss": 0.5667, "step": 3480 }, { "epoch": 0.08846188016397812, "grad_norm": 0.44921875, "learning_rate": 0.00013269450437872825, "loss": 0.5663, "step": 3485 }, { "epoch": 0.08858879821299387, "grad_norm": 0.486328125, "learning_rate": 0.00013288488386851123, "loss": 0.6144, "step": 3490 }, { "epoch": 0.08871571626200962, "grad_norm": 0.427734375, "learning_rate": 0.00013307526335829418, "loss": 0.5728, "step": 3495 }, { "epoch": 0.08884263431102538, "grad_norm": 0.51953125, "learning_rate": 0.00013326564284807716, "loss": 0.6221, "step": 3500 }, { "epoch": 0.08896955236004112, "grad_norm": 0.46875, "learning_rate": 0.00013345602233786014, "loss": 0.6027, "step": 3505 }, { "epoch": 0.08909647040905687, "grad_norm": 0.5078125, "learning_rate": 0.0001336464018276431, "loss": 0.6468, "step": 3510 }, { "epoch": 0.08922338845807262, "grad_norm": 0.404296875, "learning_rate": 0.00013383678131742604, "loss": 0.6003, "step": 3515 }, { "epoch": 0.08935030650708838, "grad_norm": 0.453125, "learning_rate": 0.00013402716080720902, "loss": 0.5981, "step": 3520 }, { "epoch": 0.08947722455610413, "grad_norm": 0.47265625, "learning_rate": 0.000134217540296992, "loss": 0.6179, "step": 3525 }, { "epoch": 0.08960414260511987, "grad_norm": 0.50390625, "learning_rate": 0.00013440791978677497, "loss": 0.5857, "step": 3530 }, { "epoch": 0.08973106065413562, "grad_norm": 0.466796875, "learning_rate": 0.00013459829927655792, "loss": 0.5821, "step": 3535 }, { "epoch": 0.08985797870315138, "grad_norm": 0.50390625, "learning_rate": 0.0001347886787663409, "loss": 0.6067, "step": 3540 }, { "epoch": 0.08998489675216713, "grad_norm": 0.470703125, "learning_rate": 0.00013497905825612385, "loss": 0.5518, "step": 3545 }, { "epoch": 0.09011181480118288, "grad_norm": 0.451171875, "learning_rate": 0.00013516943774590683, "loss": 0.6094, "step": 3550 }, { "epoch": 0.09023873285019862, "grad_norm": 0.4453125, "learning_rate": 0.0001353598172356898, "loss": 0.5796, "step": 3555 }, { "epoch": 0.09036565089921438, "grad_norm": 0.52734375, "learning_rate": 0.00013555019672547276, "loss": 0.5904, "step": 3560 }, { "epoch": 0.09049256894823013, "grad_norm": 0.494140625, "learning_rate": 0.00013574057621525574, "loss": 0.6085, "step": 3565 }, { "epoch": 0.09061948699724588, "grad_norm": 0.484375, "learning_rate": 0.0001359309557050387, "loss": 0.6197, "step": 3570 }, { "epoch": 0.09074640504626164, "grad_norm": 0.51171875, "learning_rate": 0.00013612133519482167, "loss": 0.6009, "step": 3575 }, { "epoch": 0.09087332309527738, "grad_norm": 0.515625, "learning_rate": 0.00013631171468460465, "loss": 0.6048, "step": 3580 }, { "epoch": 0.09100024114429313, "grad_norm": 0.53125, "learning_rate": 0.0001365020941743876, "loss": 0.6298, "step": 3585 }, { "epoch": 0.09112715919330888, "grad_norm": 0.4765625, "learning_rate": 0.00013669247366417057, "loss": 0.5879, "step": 3590 }, { "epoch": 0.09125407724232464, "grad_norm": 0.486328125, "learning_rate": 0.00013688285315395353, "loss": 0.6052, "step": 3595 }, { "epoch": 0.09138099529134038, "grad_norm": 0.484375, "learning_rate": 0.0001370732326437365, "loss": 0.6237, "step": 3600 }, { "epoch": 0.09150791334035613, "grad_norm": 0.498046875, "learning_rate": 0.00013726361213351948, "loss": 0.6253, "step": 3605 }, { "epoch": 0.09163483138937188, "grad_norm": 0.470703125, "learning_rate": 0.00013745399162330243, "loss": 0.5667, "step": 3610 }, { "epoch": 0.09176174943838764, "grad_norm": 0.5078125, "learning_rate": 0.0001376443711130854, "loss": 0.5872, "step": 3615 }, { "epoch": 0.09188866748740339, "grad_norm": 0.4921875, "learning_rate": 0.00013783475060286836, "loss": 0.5736, "step": 3620 }, { "epoch": 0.09201558553641913, "grad_norm": 0.462890625, "learning_rate": 0.00013802513009265134, "loss": 0.5735, "step": 3625 }, { "epoch": 0.09214250358543488, "grad_norm": 0.4765625, "learning_rate": 0.0001382155095824343, "loss": 0.5812, "step": 3630 }, { "epoch": 0.09226942163445064, "grad_norm": 0.5078125, "learning_rate": 0.00013840588907221727, "loss": 0.6121, "step": 3635 }, { "epoch": 0.09239633968346639, "grad_norm": 0.498046875, "learning_rate": 0.00013859626856200025, "loss": 0.6091, "step": 3640 }, { "epoch": 0.09252325773248214, "grad_norm": 0.51171875, "learning_rate": 0.00013878664805178323, "loss": 0.6104, "step": 3645 }, { "epoch": 0.09265017578149788, "grad_norm": 0.48828125, "learning_rate": 0.00013897702754156618, "loss": 0.5663, "step": 3650 }, { "epoch": 0.09277709383051364, "grad_norm": 0.4921875, "learning_rate": 0.00013916740703134913, "loss": 0.6469, "step": 3655 }, { "epoch": 0.09290401187952939, "grad_norm": 0.431640625, "learning_rate": 0.0001393577865211321, "loss": 0.6131, "step": 3660 }, { "epoch": 0.09303092992854514, "grad_norm": 0.470703125, "learning_rate": 0.00013954816601091508, "loss": 0.6047, "step": 3665 }, { "epoch": 0.0931578479775609, "grad_norm": 0.4453125, "learning_rate": 0.00013973854550069806, "loss": 0.5655, "step": 3670 }, { "epoch": 0.09328476602657663, "grad_norm": 0.46484375, "learning_rate": 0.000139928924990481, "loss": 0.5723, "step": 3675 }, { "epoch": 0.09341168407559239, "grad_norm": 0.51953125, "learning_rate": 0.000140119304480264, "loss": 0.6003, "step": 3680 }, { "epoch": 0.09353860212460814, "grad_norm": 0.482421875, "learning_rate": 0.00014030968397004694, "loss": 0.5767, "step": 3685 }, { "epoch": 0.0936655201736239, "grad_norm": 0.462890625, "learning_rate": 0.00014050006345982992, "loss": 0.5726, "step": 3690 }, { "epoch": 0.09379243822263963, "grad_norm": 0.48828125, "learning_rate": 0.0001406904429496129, "loss": 0.5969, "step": 3695 }, { "epoch": 0.09391935627165539, "grad_norm": 0.46484375, "learning_rate": 0.00014088082243939585, "loss": 0.6232, "step": 3700 }, { "epoch": 0.09404627432067114, "grad_norm": 0.4375, "learning_rate": 0.00014107120192917883, "loss": 0.5967, "step": 3705 }, { "epoch": 0.0941731923696869, "grad_norm": 0.484375, "learning_rate": 0.00014126158141896178, "loss": 0.616, "step": 3710 }, { "epoch": 0.09430011041870265, "grad_norm": 0.46484375, "learning_rate": 0.00014145196090874476, "loss": 0.6114, "step": 3715 }, { "epoch": 0.09442702846771839, "grad_norm": 0.5, "learning_rate": 0.0001416423403985277, "loss": 0.6007, "step": 3720 }, { "epoch": 0.09455394651673414, "grad_norm": 0.44140625, "learning_rate": 0.00014183271988831069, "loss": 0.5997, "step": 3725 }, { "epoch": 0.0946808645657499, "grad_norm": 0.453125, "learning_rate": 0.00014202309937809366, "loss": 0.6093, "step": 3730 }, { "epoch": 0.09480778261476565, "grad_norm": 0.482421875, "learning_rate": 0.00014221347886787661, "loss": 0.5747, "step": 3735 }, { "epoch": 0.0949347006637814, "grad_norm": 0.455078125, "learning_rate": 0.0001424038583576596, "loss": 0.601, "step": 3740 }, { "epoch": 0.09506161871279714, "grad_norm": 0.46875, "learning_rate": 0.00014259423784744254, "loss": 0.5947, "step": 3745 }, { "epoch": 0.0951885367618129, "grad_norm": 0.48046875, "learning_rate": 0.00014278461733722552, "loss": 0.6021, "step": 3750 }, { "epoch": 0.09531545481082865, "grad_norm": 0.4375, "learning_rate": 0.0001429749968270085, "loss": 0.5895, "step": 3755 }, { "epoch": 0.0954423728598444, "grad_norm": 0.5234375, "learning_rate": 0.00014316537631679145, "loss": 0.6209, "step": 3760 }, { "epoch": 0.09556929090886015, "grad_norm": 0.4453125, "learning_rate": 0.00014335575580657443, "loss": 0.5899, "step": 3765 }, { "epoch": 0.0956962089578759, "grad_norm": 0.431640625, "learning_rate": 0.00014354613529635738, "loss": 0.5999, "step": 3770 }, { "epoch": 0.09582312700689165, "grad_norm": 0.46484375, "learning_rate": 0.00014373651478614036, "loss": 0.5876, "step": 3775 }, { "epoch": 0.0959500450559074, "grad_norm": 0.50390625, "learning_rate": 0.00014392689427592334, "loss": 0.6257, "step": 3780 }, { "epoch": 0.09607696310492315, "grad_norm": 0.474609375, "learning_rate": 0.0001441172737657063, "loss": 0.6, "step": 3785 }, { "epoch": 0.09620388115393891, "grad_norm": 0.47265625, "learning_rate": 0.00014430765325548927, "loss": 0.6053, "step": 3790 }, { "epoch": 0.09633079920295465, "grad_norm": 0.42578125, "learning_rate": 0.00014449803274527222, "loss": 0.5847, "step": 3795 }, { "epoch": 0.0964577172519704, "grad_norm": 0.466796875, "learning_rate": 0.0001446884122350552, "loss": 0.598, "step": 3800 }, { "epoch": 0.09658463530098615, "grad_norm": 0.44921875, "learning_rate": 0.00014487879172483817, "loss": 0.6139, "step": 3805 }, { "epoch": 0.09671155335000191, "grad_norm": 0.453125, "learning_rate": 0.00014506917121462115, "loss": 0.5886, "step": 3810 }, { "epoch": 0.09683847139901765, "grad_norm": 0.470703125, "learning_rate": 0.0001452595507044041, "loss": 0.6051, "step": 3815 }, { "epoch": 0.0969653894480334, "grad_norm": 0.43359375, "learning_rate": 0.00014544993019418705, "loss": 0.5975, "step": 3820 }, { "epoch": 0.09709230749704915, "grad_norm": 0.455078125, "learning_rate": 0.00014564030968397003, "loss": 0.6003, "step": 3825 }, { "epoch": 0.09721922554606491, "grad_norm": 0.48828125, "learning_rate": 0.00014583068917375298, "loss": 0.604, "step": 3830 }, { "epoch": 0.09734614359508066, "grad_norm": 0.48828125, "learning_rate": 0.00014602106866353596, "loss": 0.6183, "step": 3835 }, { "epoch": 0.0974730616440964, "grad_norm": 0.5, "learning_rate": 0.00014621144815331894, "loss": 0.6002, "step": 3840 }, { "epoch": 0.09759997969311215, "grad_norm": 0.486328125, "learning_rate": 0.00014640182764310192, "loss": 0.6152, "step": 3845 }, { "epoch": 0.09772689774212791, "grad_norm": 0.52734375, "learning_rate": 0.00014659220713288487, "loss": 0.5627, "step": 3850 }, { "epoch": 0.09785381579114366, "grad_norm": 0.4765625, "learning_rate": 0.00014678258662266782, "loss": 0.614, "step": 3855 }, { "epoch": 0.09798073384015941, "grad_norm": 0.4921875, "learning_rate": 0.0001469729661124508, "loss": 0.5839, "step": 3860 }, { "epoch": 0.09810765188917515, "grad_norm": 0.44921875, "learning_rate": 0.00014716334560223378, "loss": 0.6067, "step": 3865 }, { "epoch": 0.09823456993819091, "grad_norm": 0.421875, "learning_rate": 0.00014735372509201675, "loss": 0.5754, "step": 3870 }, { "epoch": 0.09836148798720666, "grad_norm": 0.4453125, "learning_rate": 0.0001475441045817997, "loss": 0.5853, "step": 3875 }, { "epoch": 0.09848840603622241, "grad_norm": 0.490234375, "learning_rate": 0.00014773448407158268, "loss": 0.5955, "step": 3880 }, { "epoch": 0.09861532408523817, "grad_norm": 0.4453125, "learning_rate": 0.00014792486356136563, "loss": 0.5959, "step": 3885 }, { "epoch": 0.09874224213425391, "grad_norm": 0.52734375, "learning_rate": 0.0001481152430511486, "loss": 0.5687, "step": 3890 }, { "epoch": 0.09886916018326966, "grad_norm": 0.462890625, "learning_rate": 0.0001483056225409316, "loss": 0.5999, "step": 3895 }, { "epoch": 0.09899607823228541, "grad_norm": 0.435546875, "learning_rate": 0.00014849600203071454, "loss": 0.5872, "step": 3900 }, { "epoch": 0.09912299628130117, "grad_norm": 0.462890625, "learning_rate": 0.00014868638152049752, "loss": 0.5726, "step": 3905 }, { "epoch": 0.09924991433031692, "grad_norm": 0.484375, "learning_rate": 0.00014887676101028047, "loss": 0.6089, "step": 3910 }, { "epoch": 0.09937683237933266, "grad_norm": 0.474609375, "learning_rate": 0.00014906714050006345, "loss": 0.5923, "step": 3915 }, { "epoch": 0.09950375042834841, "grad_norm": 0.439453125, "learning_rate": 0.00014925751998984643, "loss": 0.583, "step": 3920 }, { "epoch": 0.09963066847736417, "grad_norm": 0.44140625, "learning_rate": 0.00014944789947962938, "loss": 0.5778, "step": 3925 }, { "epoch": 0.09975758652637992, "grad_norm": 0.486328125, "learning_rate": 0.00014963827896941236, "loss": 0.5801, "step": 3930 }, { "epoch": 0.09988450457539566, "grad_norm": 0.44140625, "learning_rate": 0.0001498286584591953, "loss": 0.5654, "step": 3935 }, { "epoch": 0.10001142262441141, "grad_norm": 0.486328125, "learning_rate": 0.00015001903794897828, "loss": 0.598, "step": 3940 }, { "epoch": 0.10013834067342717, "grad_norm": 0.423828125, "learning_rate": 0.00015020941743876124, "loss": 0.5858, "step": 3945 }, { "epoch": 0.10026525872244292, "grad_norm": 0.4609375, "learning_rate": 0.00015039979692854421, "loss": 0.5972, "step": 3950 }, { "epoch": 0.10039217677145867, "grad_norm": 0.5, "learning_rate": 0.0001505901764183272, "loss": 0.6028, "step": 3955 }, { "epoch": 0.10051909482047441, "grad_norm": 0.44921875, "learning_rate": 0.00015078055590811014, "loss": 0.5678, "step": 3960 }, { "epoch": 0.10064601286949017, "grad_norm": 0.470703125, "learning_rate": 0.00015097093539789312, "loss": 0.606, "step": 3965 }, { "epoch": 0.10077293091850592, "grad_norm": 0.458984375, "learning_rate": 0.0001511613148876761, "loss": 0.5587, "step": 3970 }, { "epoch": 0.10089984896752167, "grad_norm": 0.4296875, "learning_rate": 0.00015135169437745905, "loss": 0.5899, "step": 3975 }, { "epoch": 0.10102676701653743, "grad_norm": 0.453125, "learning_rate": 0.00015154207386724203, "loss": 0.5636, "step": 3980 }, { "epoch": 0.10115368506555317, "grad_norm": 0.4375, "learning_rate": 0.000151732453357025, "loss": 0.5687, "step": 3985 }, { "epoch": 0.10128060311456892, "grad_norm": 0.4765625, "learning_rate": 0.00015192283284680798, "loss": 0.5805, "step": 3990 }, { "epoch": 0.10140752116358467, "grad_norm": 0.4765625, "learning_rate": 0.0001521132123365909, "loss": 0.6074, "step": 3995 }, { "epoch": 0.10153443921260043, "grad_norm": 0.447265625, "learning_rate": 0.0001523035918263739, "loss": 0.5884, "step": 4000 }, { "epoch": 0.10166135726161618, "grad_norm": 0.439453125, "learning_rate": 0.00015249397131615686, "loss": 0.5822, "step": 4005 }, { "epoch": 0.10178827531063192, "grad_norm": 0.44921875, "learning_rate": 0.00015268435080593982, "loss": 0.5755, "step": 4010 }, { "epoch": 0.10191519335964767, "grad_norm": 0.46875, "learning_rate": 0.0001528747302957228, "loss": 0.5784, "step": 4015 }, { "epoch": 0.10204211140866343, "grad_norm": 0.435546875, "learning_rate": 0.00015306510978550577, "loss": 0.5927, "step": 4020 }, { "epoch": 0.10216902945767918, "grad_norm": 0.46484375, "learning_rate": 0.00015325548927528875, "loss": 0.5706, "step": 4025 }, { "epoch": 0.10229594750669492, "grad_norm": 0.46484375, "learning_rate": 0.0001534458687650717, "loss": 0.5926, "step": 4030 }, { "epoch": 0.10242286555571067, "grad_norm": 0.455078125, "learning_rate": 0.00015363624825485465, "loss": 0.602, "step": 4035 }, { "epoch": 0.10254978360472643, "grad_norm": 0.453125, "learning_rate": 0.00015382662774463763, "loss": 0.5912, "step": 4040 }, { "epoch": 0.10267670165374218, "grad_norm": 0.50390625, "learning_rate": 0.00015401700723442058, "loss": 0.6236, "step": 4045 }, { "epoch": 0.10280361970275793, "grad_norm": 0.453125, "learning_rate": 0.00015420738672420356, "loss": 0.5814, "step": 4050 }, { "epoch": 0.10293053775177367, "grad_norm": 0.494140625, "learning_rate": 0.00015439776621398654, "loss": 0.5971, "step": 4055 }, { "epoch": 0.10305745580078943, "grad_norm": 0.4296875, "learning_rate": 0.0001545881457037695, "loss": 0.575, "step": 4060 }, { "epoch": 0.10318437384980518, "grad_norm": 0.515625, "learning_rate": 0.00015477852519355247, "loss": 0.6267, "step": 4065 }, { "epoch": 0.10331129189882093, "grad_norm": 0.48828125, "learning_rate": 0.00015496890468333545, "loss": 0.5936, "step": 4070 }, { "epoch": 0.10343820994783669, "grad_norm": 0.4296875, "learning_rate": 0.00015515928417311842, "loss": 0.5733, "step": 4075 }, { "epoch": 0.10356512799685243, "grad_norm": 0.50390625, "learning_rate": 0.00015534966366290135, "loss": 0.5775, "step": 4080 }, { "epoch": 0.10369204604586818, "grad_norm": 0.470703125, "learning_rate": 0.00015554004315268433, "loss": 0.5802, "step": 4085 }, { "epoch": 0.10381896409488393, "grad_norm": 0.4765625, "learning_rate": 0.0001557304226424673, "loss": 0.5865, "step": 4090 }, { "epoch": 0.10394588214389969, "grad_norm": 0.451171875, "learning_rate": 0.00015592080213225025, "loss": 0.6109, "step": 4095 }, { "epoch": 0.10407280019291544, "grad_norm": 0.453125, "learning_rate": 0.00015611118162203323, "loss": 0.612, "step": 4100 }, { "epoch": 0.10419971824193118, "grad_norm": 0.474609375, "learning_rate": 0.0001563015611118162, "loss": 0.5828, "step": 4105 }, { "epoch": 0.10432663629094693, "grad_norm": 0.451171875, "learning_rate": 0.0001564919406015992, "loss": 0.5499, "step": 4110 }, { "epoch": 0.10445355433996269, "grad_norm": 0.462890625, "learning_rate": 0.00015668232009138214, "loss": 0.6043, "step": 4115 }, { "epoch": 0.10458047238897844, "grad_norm": 0.4375, "learning_rate": 0.00015687269958116512, "loss": 0.5702, "step": 4120 }, { "epoch": 0.1047073904379942, "grad_norm": 0.435546875, "learning_rate": 0.0001570630790709481, "loss": 0.6009, "step": 4125 }, { "epoch": 0.10483430848700993, "grad_norm": 0.4921875, "learning_rate": 0.00015725345856073102, "loss": 0.6101, "step": 4130 }, { "epoch": 0.10496122653602569, "grad_norm": 0.455078125, "learning_rate": 0.000157443838050514, "loss": 0.5827, "step": 4135 }, { "epoch": 0.10508814458504144, "grad_norm": 0.46875, "learning_rate": 0.00015763421754029698, "loss": 0.576, "step": 4140 }, { "epoch": 0.1052150626340572, "grad_norm": 0.458984375, "learning_rate": 0.00015782459703007995, "loss": 0.5681, "step": 4145 }, { "epoch": 0.10534198068307293, "grad_norm": 0.455078125, "learning_rate": 0.0001580149765198629, "loss": 0.6152, "step": 4150 }, { "epoch": 0.10546889873208869, "grad_norm": 0.439453125, "learning_rate": 0.00015820535600964588, "loss": 0.5663, "step": 4155 }, { "epoch": 0.10559581678110444, "grad_norm": 0.458984375, "learning_rate": 0.00015839573549942886, "loss": 0.5881, "step": 4160 }, { "epoch": 0.1057227348301202, "grad_norm": 0.453125, "learning_rate": 0.00015858611498921179, "loss": 0.5666, "step": 4165 }, { "epoch": 0.10584965287913595, "grad_norm": 0.42578125, "learning_rate": 0.00015877649447899476, "loss": 0.5744, "step": 4170 }, { "epoch": 0.10597657092815169, "grad_norm": 0.466796875, "learning_rate": 0.00015896687396877774, "loss": 0.6017, "step": 4175 }, { "epoch": 0.10610348897716744, "grad_norm": 0.431640625, "learning_rate": 0.00015915725345856072, "loss": 0.6209, "step": 4180 }, { "epoch": 0.1062304070261832, "grad_norm": 0.4609375, "learning_rate": 0.00015934763294834367, "loss": 0.5779, "step": 4185 }, { "epoch": 0.10635732507519895, "grad_norm": 0.46484375, "learning_rate": 0.00015953801243812665, "loss": 0.585, "step": 4190 }, { "epoch": 0.1064842431242147, "grad_norm": 0.46484375, "learning_rate": 0.00015972839192790963, "loss": 0.5975, "step": 4195 }, { "epoch": 0.10661116117323044, "grad_norm": 0.46484375, "learning_rate": 0.00015991877141769258, "loss": 0.5659, "step": 4200 }, { "epoch": 0.1067380792222462, "grad_norm": 0.48046875, "learning_rate": 0.00016010915090747556, "loss": 0.5765, "step": 4205 }, { "epoch": 0.10686499727126195, "grad_norm": 0.4375, "learning_rate": 0.00016029953039725853, "loss": 0.6042, "step": 4210 }, { "epoch": 0.1069919153202777, "grad_norm": 0.48828125, "learning_rate": 0.0001604899098870415, "loss": 0.5748, "step": 4215 }, { "epoch": 0.10711883336929345, "grad_norm": 0.515625, "learning_rate": 0.00016068028937682444, "loss": 0.5847, "step": 4220 }, { "epoch": 0.1072457514183092, "grad_norm": 0.474609375, "learning_rate": 0.00016087066886660741, "loss": 0.6111, "step": 4225 }, { "epoch": 0.10737266946732495, "grad_norm": 0.494140625, "learning_rate": 0.0001610610483563904, "loss": 0.5809, "step": 4230 }, { "epoch": 0.1074995875163407, "grad_norm": 0.41015625, "learning_rate": 0.00016125142784617334, "loss": 0.548, "step": 4235 }, { "epoch": 0.10762650556535645, "grad_norm": 0.47265625, "learning_rate": 0.00016144180733595632, "loss": 0.6141, "step": 4240 }, { "epoch": 0.10775342361437219, "grad_norm": 0.443359375, "learning_rate": 0.0001616321868257393, "loss": 0.6037, "step": 4245 }, { "epoch": 0.10788034166338795, "grad_norm": 0.48828125, "learning_rate": 0.00016182256631552228, "loss": 0.5795, "step": 4250 }, { "epoch": 0.1080072597124037, "grad_norm": 0.431640625, "learning_rate": 0.00016201294580530523, "loss": 0.5524, "step": 4255 }, { "epoch": 0.10813417776141945, "grad_norm": 0.4375, "learning_rate": 0.00016220332529508818, "loss": 0.5733, "step": 4260 }, { "epoch": 0.1082610958104352, "grad_norm": 0.4609375, "learning_rate": 0.00016239370478487116, "loss": 0.5876, "step": 4265 }, { "epoch": 0.10838801385945095, "grad_norm": 0.46484375, "learning_rate": 0.0001625840842746541, "loss": 0.5595, "step": 4270 }, { "epoch": 0.1085149319084667, "grad_norm": 0.44140625, "learning_rate": 0.0001627744637644371, "loss": 0.5675, "step": 4275 }, { "epoch": 0.10864184995748245, "grad_norm": 0.486328125, "learning_rate": 0.00016296484325422007, "loss": 0.6043, "step": 4280 }, { "epoch": 0.1087687680064982, "grad_norm": 0.439453125, "learning_rate": 0.00016315522274400304, "loss": 0.593, "step": 4285 }, { "epoch": 0.10889568605551396, "grad_norm": 0.44921875, "learning_rate": 0.000163345602233786, "loss": 0.5452, "step": 4290 }, { "epoch": 0.1090226041045297, "grad_norm": 0.4296875, "learning_rate": 0.00016353598172356897, "loss": 0.5794, "step": 4295 }, { "epoch": 0.10914952215354545, "grad_norm": 0.44921875, "learning_rate": 0.00016372636121335195, "loss": 0.5685, "step": 4300 }, { "epoch": 0.1092764402025612, "grad_norm": 0.447265625, "learning_rate": 0.00016391674070313488, "loss": 0.5698, "step": 4305 }, { "epoch": 0.10940335825157696, "grad_norm": 0.45703125, "learning_rate": 0.00016410712019291785, "loss": 0.5964, "step": 4310 }, { "epoch": 0.10953027630059271, "grad_norm": 0.458984375, "learning_rate": 0.00016429749968270083, "loss": 0.5813, "step": 4315 }, { "epoch": 0.10965719434960845, "grad_norm": 0.451171875, "learning_rate": 0.0001644878791724838, "loss": 0.6387, "step": 4320 }, { "epoch": 0.1097841123986242, "grad_norm": 0.49609375, "learning_rate": 0.00016467825866226676, "loss": 0.6144, "step": 4325 }, { "epoch": 0.10991103044763996, "grad_norm": 0.462890625, "learning_rate": 0.00016486863815204974, "loss": 0.5826, "step": 4330 }, { "epoch": 0.11003794849665571, "grad_norm": 0.43359375, "learning_rate": 0.00016505901764183272, "loss": 0.5858, "step": 4335 }, { "epoch": 0.11016486654567147, "grad_norm": 0.4296875, "learning_rate": 0.00016524939713161567, "loss": 0.5761, "step": 4340 }, { "epoch": 0.1102917845946872, "grad_norm": 0.4375, "learning_rate": 0.00016543977662139865, "loss": 0.5703, "step": 4345 }, { "epoch": 0.11041870264370296, "grad_norm": 0.44140625, "learning_rate": 0.00016563015611118162, "loss": 0.5854, "step": 4350 }, { "epoch": 0.11054562069271871, "grad_norm": 0.46484375, "learning_rate": 0.00016582053560096458, "loss": 0.5769, "step": 4355 }, { "epoch": 0.11067253874173447, "grad_norm": 0.4375, "learning_rate": 0.00016601091509074753, "loss": 0.5745, "step": 4360 }, { "epoch": 0.1107994567907502, "grad_norm": 0.490234375, "learning_rate": 0.0001662012945805305, "loss": 0.5952, "step": 4365 }, { "epoch": 0.11092637483976596, "grad_norm": 0.4140625, "learning_rate": 0.00016639167407031348, "loss": 0.5631, "step": 4370 }, { "epoch": 0.11105329288878171, "grad_norm": 0.45703125, "learning_rate": 0.00016658205356009643, "loss": 0.612, "step": 4375 }, { "epoch": 0.11118021093779747, "grad_norm": 0.484375, "learning_rate": 0.0001667724330498794, "loss": 0.562, "step": 4380 }, { "epoch": 0.11130712898681322, "grad_norm": 0.466796875, "learning_rate": 0.0001669628125396624, "loss": 0.5602, "step": 4385 }, { "epoch": 0.11143404703582896, "grad_norm": 0.4609375, "learning_rate": 0.00016715319202944537, "loss": 0.5825, "step": 4390 }, { "epoch": 0.11156096508484471, "grad_norm": 0.462890625, "learning_rate": 0.0001673435715192283, "loss": 0.5776, "step": 4395 }, { "epoch": 0.11168788313386047, "grad_norm": 0.453125, "learning_rate": 0.00016753395100901127, "loss": 0.5721, "step": 4400 }, { "epoch": 0.11181480118287622, "grad_norm": 0.451171875, "learning_rate": 0.00016772433049879425, "loss": 0.5848, "step": 4405 }, { "epoch": 0.11194171923189197, "grad_norm": 0.49609375, "learning_rate": 0.0001679147099885772, "loss": 0.5761, "step": 4410 }, { "epoch": 0.11206863728090771, "grad_norm": 0.458984375, "learning_rate": 0.00016810508947836018, "loss": 0.5821, "step": 4415 }, { "epoch": 0.11219555532992347, "grad_norm": 0.431640625, "learning_rate": 0.00016829546896814316, "loss": 0.5397, "step": 4420 }, { "epoch": 0.11232247337893922, "grad_norm": 0.38671875, "learning_rate": 0.00016848584845792613, "loss": 0.5458, "step": 4425 }, { "epoch": 0.11244939142795497, "grad_norm": 0.4453125, "learning_rate": 0.00016867622794770908, "loss": 0.5583, "step": 4430 }, { "epoch": 0.11257630947697073, "grad_norm": 0.5078125, "learning_rate": 0.00016886660743749206, "loss": 0.5946, "step": 4435 }, { "epoch": 0.11270322752598647, "grad_norm": 0.44140625, "learning_rate": 0.00016905698692727504, "loss": 0.6134, "step": 4440 }, { "epoch": 0.11283014557500222, "grad_norm": 0.4609375, "learning_rate": 0.00016924736641705796, "loss": 0.5852, "step": 4445 }, { "epoch": 0.11295706362401797, "grad_norm": 0.470703125, "learning_rate": 0.00016943774590684094, "loss": 0.5855, "step": 4450 }, { "epoch": 0.11308398167303373, "grad_norm": 0.46484375, "learning_rate": 0.00016962812539662392, "loss": 0.5778, "step": 4455 }, { "epoch": 0.11321089972204948, "grad_norm": 0.455078125, "learning_rate": 0.0001698185048864069, "loss": 0.5951, "step": 4460 }, { "epoch": 0.11333781777106522, "grad_norm": 0.50390625, "learning_rate": 0.00017000888437618985, "loss": 0.5931, "step": 4465 }, { "epoch": 0.11346473582008097, "grad_norm": 0.4375, "learning_rate": 0.00017019926386597283, "loss": 0.5477, "step": 4470 }, { "epoch": 0.11359165386909673, "grad_norm": 0.4375, "learning_rate": 0.0001703896433557558, "loss": 0.5842, "step": 4475 }, { "epoch": 0.11371857191811248, "grad_norm": 0.470703125, "learning_rate": 0.00017058002284553876, "loss": 0.5757, "step": 4480 }, { "epoch": 0.11384548996712822, "grad_norm": 0.431640625, "learning_rate": 0.0001707704023353217, "loss": 0.5479, "step": 4485 }, { "epoch": 0.11397240801614397, "grad_norm": 0.44921875, "learning_rate": 0.0001709607818251047, "loss": 0.5556, "step": 4490 }, { "epoch": 0.11409932606515973, "grad_norm": 0.46875, "learning_rate": 0.00017115116131488766, "loss": 0.5815, "step": 4495 }, { "epoch": 0.11422624411417548, "grad_norm": 0.3984375, "learning_rate": 0.00017134154080467062, "loss": 0.5352, "step": 4500 }, { "epoch": 0.11435316216319123, "grad_norm": 0.43359375, "learning_rate": 0.0001715319202944536, "loss": 0.569, "step": 4505 }, { "epoch": 0.11448008021220697, "grad_norm": 0.474609375, "learning_rate": 0.00017172229978423657, "loss": 0.5901, "step": 4510 }, { "epoch": 0.11460699826122273, "grad_norm": 0.470703125, "learning_rate": 0.00017191267927401952, "loss": 0.5373, "step": 4515 }, { "epoch": 0.11473391631023848, "grad_norm": 0.474609375, "learning_rate": 0.0001721030587638025, "loss": 0.5958, "step": 4520 }, { "epoch": 0.11486083435925423, "grad_norm": 0.466796875, "learning_rate": 0.00017229343825358548, "loss": 0.6273, "step": 4525 }, { "epoch": 0.11498775240826999, "grad_norm": 0.466796875, "learning_rate": 0.00017248381774336846, "loss": 0.5857, "step": 4530 }, { "epoch": 0.11511467045728573, "grad_norm": 0.41796875, "learning_rate": 0.00017267419723315138, "loss": 0.5903, "step": 4535 }, { "epoch": 0.11524158850630148, "grad_norm": 0.431640625, "learning_rate": 0.00017286457672293436, "loss": 0.5702, "step": 4540 }, { "epoch": 0.11536850655531723, "grad_norm": 0.439453125, "learning_rate": 0.00017305495621271734, "loss": 0.5986, "step": 4545 }, { "epoch": 0.11549542460433299, "grad_norm": 0.451171875, "learning_rate": 0.0001732453357025003, "loss": 0.5707, "step": 4550 }, { "epoch": 0.11562234265334874, "grad_norm": 0.431640625, "learning_rate": 0.00017343571519228327, "loss": 0.563, "step": 4555 }, { "epoch": 0.11574926070236448, "grad_norm": 0.4453125, "learning_rate": 0.00017362609468206624, "loss": 0.5661, "step": 4560 }, { "epoch": 0.11587617875138023, "grad_norm": 0.419921875, "learning_rate": 0.00017381647417184922, "loss": 0.5834, "step": 4565 }, { "epoch": 0.11600309680039599, "grad_norm": 0.451171875, "learning_rate": 0.00017400685366163217, "loss": 0.5908, "step": 4570 }, { "epoch": 0.11613001484941174, "grad_norm": 0.451171875, "learning_rate": 0.00017419723315141513, "loss": 0.5177, "step": 4575 }, { "epoch": 0.11625693289842748, "grad_norm": 0.462890625, "learning_rate": 0.0001743876126411981, "loss": 0.5889, "step": 4580 }, { "epoch": 0.11638385094744323, "grad_norm": 0.4375, "learning_rate": 0.00017457799213098105, "loss": 0.5329, "step": 4585 }, { "epoch": 0.11651076899645899, "grad_norm": 0.4609375, "learning_rate": 0.00017476837162076403, "loss": 0.6233, "step": 4590 }, { "epoch": 0.11663768704547474, "grad_norm": 0.43359375, "learning_rate": 0.000174958751110547, "loss": 0.5811, "step": 4595 }, { "epoch": 0.11676460509449049, "grad_norm": 0.4609375, "learning_rate": 0.00017514913060033, "loss": 0.5651, "step": 4600 }, { "epoch": 0.11689152314350623, "grad_norm": 0.453125, "learning_rate": 0.00017533951009011294, "loss": 0.5975, "step": 4605 }, { "epoch": 0.11701844119252199, "grad_norm": 0.439453125, "learning_rate": 0.00017552988957989592, "loss": 0.5914, "step": 4610 }, { "epoch": 0.11714535924153774, "grad_norm": 0.421875, "learning_rate": 0.0001757202690696789, "loss": 0.5493, "step": 4615 }, { "epoch": 0.11727227729055349, "grad_norm": 0.451171875, "learning_rate": 0.00017591064855946182, "loss": 0.5849, "step": 4620 }, { "epoch": 0.11739919533956925, "grad_norm": 0.478515625, "learning_rate": 0.0001761010280492448, "loss": 0.5974, "step": 4625 }, { "epoch": 0.11752611338858499, "grad_norm": 0.455078125, "learning_rate": 0.00017629140753902778, "loss": 0.5545, "step": 4630 }, { "epoch": 0.11765303143760074, "grad_norm": 0.435546875, "learning_rate": 0.00017648178702881075, "loss": 0.5739, "step": 4635 }, { "epoch": 0.11777994948661649, "grad_norm": 0.453125, "learning_rate": 0.0001766721665185937, "loss": 0.5731, "step": 4640 }, { "epoch": 0.11790686753563225, "grad_norm": 0.466796875, "learning_rate": 0.00017686254600837668, "loss": 0.5722, "step": 4645 }, { "epoch": 0.118033785584648, "grad_norm": 0.498046875, "learning_rate": 0.00017705292549815966, "loss": 0.5954, "step": 4650 }, { "epoch": 0.11816070363366374, "grad_norm": 0.447265625, "learning_rate": 0.0001772433049879426, "loss": 0.5728, "step": 4655 }, { "epoch": 0.11828762168267949, "grad_norm": 0.421875, "learning_rate": 0.0001774336844777256, "loss": 0.5641, "step": 4660 }, { "epoch": 0.11841453973169525, "grad_norm": 0.4375, "learning_rate": 0.00017762406396750857, "loss": 0.5798, "step": 4665 }, { "epoch": 0.118541457780711, "grad_norm": 0.474609375, "learning_rate": 0.00017781444345729152, "loss": 0.5934, "step": 4670 }, { "epoch": 0.11866837582972675, "grad_norm": 0.46484375, "learning_rate": 0.00017800482294707447, "loss": 0.5718, "step": 4675 }, { "epoch": 0.11879529387874249, "grad_norm": 0.42578125, "learning_rate": 0.00017819520243685745, "loss": 0.5831, "step": 4680 }, { "epoch": 0.11892221192775825, "grad_norm": 0.3984375, "learning_rate": 0.00017838558192664043, "loss": 0.5331, "step": 4685 }, { "epoch": 0.119049129976774, "grad_norm": 0.458984375, "learning_rate": 0.00017857596141642338, "loss": 0.5879, "step": 4690 }, { "epoch": 0.11917604802578975, "grad_norm": 0.51953125, "learning_rate": 0.00017876634090620636, "loss": 0.5603, "step": 4695 }, { "epoch": 0.11930296607480549, "grad_norm": 0.470703125, "learning_rate": 0.00017895672039598933, "loss": 0.5922, "step": 4700 }, { "epoch": 0.11942988412382124, "grad_norm": 0.4375, "learning_rate": 0.0001791470998857723, "loss": 0.5766, "step": 4705 }, { "epoch": 0.119556802172837, "grad_norm": 0.431640625, "learning_rate": 0.00017933747937555524, "loss": 0.565, "step": 4710 }, { "epoch": 0.11968372022185275, "grad_norm": 0.4609375, "learning_rate": 0.00017952785886533821, "loss": 0.5748, "step": 4715 }, { "epoch": 0.1198106382708685, "grad_norm": 0.453125, "learning_rate": 0.0001797182383551212, "loss": 0.5881, "step": 4720 }, { "epoch": 0.11993755631988424, "grad_norm": 0.484375, "learning_rate": 0.00017990861784490414, "loss": 0.5911, "step": 4725 }, { "epoch": 0.1200644743689, "grad_norm": 0.447265625, "learning_rate": 0.00018009899733468712, "loss": 0.6032, "step": 4730 }, { "epoch": 0.12019139241791575, "grad_norm": 0.451171875, "learning_rate": 0.0001802893768244701, "loss": 0.5904, "step": 4735 }, { "epoch": 0.1203183104669315, "grad_norm": 0.447265625, "learning_rate": 0.00018047975631425308, "loss": 0.603, "step": 4740 }, { "epoch": 0.12044522851594726, "grad_norm": 0.453125, "learning_rate": 0.00018067013580403603, "loss": 0.5817, "step": 4745 }, { "epoch": 0.120572146564963, "grad_norm": 0.43359375, "learning_rate": 0.000180860515293819, "loss": 0.572, "step": 4750 }, { "epoch": 0.12069906461397875, "grad_norm": 0.46875, "learning_rate": 0.00018105089478360199, "loss": 0.5799, "step": 4755 }, { "epoch": 0.1208259826629945, "grad_norm": 0.45703125, "learning_rate": 0.0001812412742733849, "loss": 0.5722, "step": 4760 }, { "epoch": 0.12095290071201026, "grad_norm": 0.4609375, "learning_rate": 0.0001814316537631679, "loss": 0.5608, "step": 4765 }, { "epoch": 0.12107981876102601, "grad_norm": 0.458984375, "learning_rate": 0.00018162203325295087, "loss": 0.5586, "step": 4770 }, { "epoch": 0.12120673681004175, "grad_norm": 0.451171875, "learning_rate": 0.00018181241274273384, "loss": 0.5647, "step": 4775 }, { "epoch": 0.1213336548590575, "grad_norm": 0.45703125, "learning_rate": 0.0001820027922325168, "loss": 0.5623, "step": 4780 }, { "epoch": 0.12146057290807326, "grad_norm": 0.439453125, "learning_rate": 0.00018219317172229977, "loss": 0.5715, "step": 4785 }, { "epoch": 0.12158749095708901, "grad_norm": 0.46875, "learning_rate": 0.00018238355121208275, "loss": 0.555, "step": 4790 }, { "epoch": 0.12171440900610475, "grad_norm": 0.4296875, "learning_rate": 0.0001825739307018657, "loss": 0.5835, "step": 4795 }, { "epoch": 0.1218413270551205, "grad_norm": 0.4296875, "learning_rate": 0.00018276431019164865, "loss": 0.5827, "step": 4800 }, { "epoch": 0.12196824510413626, "grad_norm": 0.486328125, "learning_rate": 0.00018295468968143163, "loss": 0.5936, "step": 4805 }, { "epoch": 0.12209516315315201, "grad_norm": 0.443359375, "learning_rate": 0.0001831450691712146, "loss": 0.5458, "step": 4810 }, { "epoch": 0.12222208120216776, "grad_norm": 0.451171875, "learning_rate": 0.00018333544866099756, "loss": 0.5709, "step": 4815 }, { "epoch": 0.1223489992511835, "grad_norm": 0.451171875, "learning_rate": 0.00018352582815078054, "loss": 0.594, "step": 4820 }, { "epoch": 0.12247591730019926, "grad_norm": 0.4921875, "learning_rate": 0.00018371620764056352, "loss": 0.5877, "step": 4825 }, { "epoch": 0.12260283534921501, "grad_norm": 0.4375, "learning_rate": 0.00018390658713034647, "loss": 0.562, "step": 4830 }, { "epoch": 0.12272975339823076, "grad_norm": 0.484375, "learning_rate": 0.00018409696662012945, "loss": 0.5536, "step": 4835 }, { "epoch": 0.12285667144724652, "grad_norm": 0.453125, "learning_rate": 0.00018428734610991242, "loss": 0.5804, "step": 4840 }, { "epoch": 0.12298358949626226, "grad_norm": 0.4453125, "learning_rate": 0.0001844777255996954, "loss": 0.5773, "step": 4845 }, { "epoch": 0.12311050754527801, "grad_norm": 0.447265625, "learning_rate": 0.00018466810508947833, "loss": 0.5404, "step": 4850 }, { "epoch": 0.12323742559429376, "grad_norm": 0.427734375, "learning_rate": 0.0001848584845792613, "loss": 0.5663, "step": 4855 }, { "epoch": 0.12336434364330952, "grad_norm": 0.421875, "learning_rate": 0.00018504886406904428, "loss": 0.5712, "step": 4860 }, { "epoch": 0.12349126169232527, "grad_norm": 0.47265625, "learning_rate": 0.00018523924355882723, "loss": 0.5885, "step": 4865 }, { "epoch": 0.12361817974134101, "grad_norm": 0.435546875, "learning_rate": 0.0001854296230486102, "loss": 0.5768, "step": 4870 }, { "epoch": 0.12374509779035676, "grad_norm": 0.447265625, "learning_rate": 0.0001856200025383932, "loss": 0.5736, "step": 4875 }, { "epoch": 0.12387201583937252, "grad_norm": 0.447265625, "learning_rate": 0.00018581038202817617, "loss": 0.5818, "step": 4880 }, { "epoch": 0.12399893388838827, "grad_norm": 0.404296875, "learning_rate": 0.00018600076151795912, "loss": 0.5536, "step": 4885 }, { "epoch": 0.12412585193740402, "grad_norm": 0.427734375, "learning_rate": 0.0001861911410077421, "loss": 0.5668, "step": 4890 }, { "epoch": 0.12425276998641976, "grad_norm": 0.451171875, "learning_rate": 0.00018638152049752505, "loss": 0.5557, "step": 4895 }, { "epoch": 0.12437968803543552, "grad_norm": 0.46875, "learning_rate": 0.000186571899987308, "loss": 0.5914, "step": 4900 }, { "epoch": 0.12450660608445127, "grad_norm": 0.412109375, "learning_rate": 0.00018676227947709098, "loss": 0.5816, "step": 4905 }, { "epoch": 0.12463352413346702, "grad_norm": 0.453125, "learning_rate": 0.00018695265896687396, "loss": 0.5916, "step": 4910 }, { "epoch": 0.12476044218248276, "grad_norm": 0.44921875, "learning_rate": 0.00018714303845665693, "loss": 0.5542, "step": 4915 }, { "epoch": 0.12488736023149852, "grad_norm": 0.439453125, "learning_rate": 0.00018733341794643988, "loss": 0.5371, "step": 4920 }, { "epoch": 0.12501427828051428, "grad_norm": 0.443359375, "learning_rate": 0.00018752379743622286, "loss": 0.5647, "step": 4925 }, { "epoch": 0.12514119632953002, "grad_norm": 0.4765625, "learning_rate": 0.00018771417692600584, "loss": 0.5689, "step": 4930 }, { "epoch": 0.12526811437854576, "grad_norm": 0.4296875, "learning_rate": 0.00018790455641578876, "loss": 0.5545, "step": 4935 }, { "epoch": 0.12539503242756153, "grad_norm": 0.431640625, "learning_rate": 0.00018809493590557174, "loss": 0.5878, "step": 4940 }, { "epoch": 0.12552195047657727, "grad_norm": 0.384765625, "learning_rate": 0.00018828531539535472, "loss": 0.5271, "step": 4945 }, { "epoch": 0.12564886852559304, "grad_norm": 0.470703125, "learning_rate": 0.0001884756948851377, "loss": 0.5903, "step": 4950 }, { "epoch": 0.12577578657460878, "grad_norm": 0.439453125, "learning_rate": 0.00018866607437492065, "loss": 0.5717, "step": 4955 }, { "epoch": 0.12590270462362452, "grad_norm": 0.482421875, "learning_rate": 0.00018885645386470363, "loss": 0.5612, "step": 4960 }, { "epoch": 0.12602962267264028, "grad_norm": 0.427734375, "learning_rate": 0.0001890468333544866, "loss": 0.5539, "step": 4965 }, { "epoch": 0.12615654072165602, "grad_norm": 0.4296875, "learning_rate": 0.00018923721284426956, "loss": 0.5791, "step": 4970 }, { "epoch": 0.12628345877067176, "grad_norm": 0.439453125, "learning_rate": 0.00018942759233405254, "loss": 0.5554, "step": 4975 }, { "epoch": 0.12641037681968753, "grad_norm": 0.47265625, "learning_rate": 0.00018961797182383551, "loss": 0.5416, "step": 4980 }, { "epoch": 0.12653729486870327, "grad_norm": 0.447265625, "learning_rate": 0.0001898083513136185, "loss": 0.5752, "step": 4985 }, { "epoch": 0.12666421291771904, "grad_norm": 0.42578125, "learning_rate": 0.00018999873080340142, "loss": 0.5922, "step": 4990 }, { "epoch": 0.12679113096673478, "grad_norm": 0.404296875, "learning_rate": 0.0001901891102931844, "loss": 0.5641, "step": 4995 }, { "epoch": 0.12691804901575052, "grad_norm": 0.451171875, "learning_rate": 0.00019037948978296737, "loss": 0.5628, "step": 5000 }, { "epoch": 0.12704496706476628, "grad_norm": 0.375, "learning_rate": 0.00019056986927275032, "loss": 0.5349, "step": 5005 }, { "epoch": 0.12717188511378202, "grad_norm": 0.421875, "learning_rate": 0.0001907602487625333, "loss": 0.5641, "step": 5010 }, { "epoch": 0.1272988031627978, "grad_norm": 0.5078125, "learning_rate": 0.00019095062825231628, "loss": 0.611, "step": 5015 }, { "epoch": 0.12742572121181353, "grad_norm": 0.416015625, "learning_rate": 0.00019114100774209926, "loss": 0.5749, "step": 5020 }, { "epoch": 0.12755263926082927, "grad_norm": 0.451171875, "learning_rate": 0.00019133138723188218, "loss": 0.5609, "step": 5025 }, { "epoch": 0.12767955730984504, "grad_norm": 0.474609375, "learning_rate": 0.00019152176672166516, "loss": 0.5834, "step": 5030 }, { "epoch": 0.12780647535886078, "grad_norm": 0.458984375, "learning_rate": 0.00019171214621144814, "loss": 0.5463, "step": 5035 }, { "epoch": 0.12793339340787654, "grad_norm": 0.455078125, "learning_rate": 0.0001919025257012311, "loss": 0.5531, "step": 5040 }, { "epoch": 0.12806031145689228, "grad_norm": 0.4140625, "learning_rate": 0.00019209290519101407, "loss": 0.5376, "step": 5045 }, { "epoch": 0.12818722950590802, "grad_norm": 0.470703125, "learning_rate": 0.00019228328468079704, "loss": 0.5485, "step": 5050 }, { "epoch": 0.1283141475549238, "grad_norm": 0.48046875, "learning_rate": 0.00019247366417058002, "loss": 0.5837, "step": 5055 }, { "epoch": 0.12844106560393953, "grad_norm": 0.43359375, "learning_rate": 0.00019266404366036297, "loss": 0.5703, "step": 5060 }, { "epoch": 0.1285679836529553, "grad_norm": 0.455078125, "learning_rate": 0.00019285442315014595, "loss": 0.5831, "step": 5065 }, { "epoch": 0.12869490170197104, "grad_norm": 0.470703125, "learning_rate": 0.00019304480263992893, "loss": 0.594, "step": 5070 }, { "epoch": 0.12882181975098678, "grad_norm": 0.4765625, "learning_rate": 0.00019323518212971185, "loss": 0.5678, "step": 5075 }, { "epoch": 0.12894873780000254, "grad_norm": 0.42578125, "learning_rate": 0.00019342556161949483, "loss": 0.5721, "step": 5080 }, { "epoch": 0.12907565584901828, "grad_norm": 0.4453125, "learning_rate": 0.0001936159411092778, "loss": 0.5788, "step": 5085 }, { "epoch": 0.12920257389803405, "grad_norm": 0.439453125, "learning_rate": 0.0001938063205990608, "loss": 0.5342, "step": 5090 }, { "epoch": 0.1293294919470498, "grad_norm": 0.431640625, "learning_rate": 0.00019399670008884374, "loss": 0.5716, "step": 5095 }, { "epoch": 0.12945640999606553, "grad_norm": 0.46875, "learning_rate": 0.00019418707957862672, "loss": 0.5803, "step": 5100 }, { "epoch": 0.1295833280450813, "grad_norm": 0.44140625, "learning_rate": 0.0001943774590684097, "loss": 0.5884, "step": 5105 }, { "epoch": 0.12971024609409704, "grad_norm": 0.4375, "learning_rate": 0.00019456783855819265, "loss": 0.5672, "step": 5110 }, { "epoch": 0.1298371641431128, "grad_norm": 0.453125, "learning_rate": 0.00019475821804797563, "loss": 0.573, "step": 5115 }, { "epoch": 0.12996408219212854, "grad_norm": 0.43359375, "learning_rate": 0.00019494859753775858, "loss": 0.5476, "step": 5120 }, { "epoch": 0.13009100024114428, "grad_norm": 0.45703125, "learning_rate": 0.00019513897702754155, "loss": 0.5953, "step": 5125 }, { "epoch": 0.13021791829016005, "grad_norm": 0.455078125, "learning_rate": 0.0001953293565173245, "loss": 0.6139, "step": 5130 }, { "epoch": 0.1303448363391758, "grad_norm": 0.447265625, "learning_rate": 0.00019551973600710748, "loss": 0.5803, "step": 5135 }, { "epoch": 0.13047175438819156, "grad_norm": 0.4296875, "learning_rate": 0.00019571011549689046, "loss": 0.5766, "step": 5140 }, { "epoch": 0.1305986724372073, "grad_norm": 0.427734375, "learning_rate": 0.0001959004949866734, "loss": 0.5658, "step": 5145 }, { "epoch": 0.13072559048622304, "grad_norm": 0.43359375, "learning_rate": 0.0001960908744764564, "loss": 0.5548, "step": 5150 }, { "epoch": 0.1308525085352388, "grad_norm": 0.50390625, "learning_rate": 0.00019628125396623937, "loss": 0.5725, "step": 5155 }, { "epoch": 0.13097942658425454, "grad_norm": 0.412109375, "learning_rate": 0.00019647163345602235, "loss": 0.5625, "step": 5160 }, { "epoch": 0.1311063446332703, "grad_norm": 0.439453125, "learning_rate": 0.00019666201294580527, "loss": 0.5485, "step": 5165 }, { "epoch": 0.13123326268228605, "grad_norm": 0.439453125, "learning_rate": 0.00019685239243558825, "loss": 0.5666, "step": 5170 }, { "epoch": 0.1313601807313018, "grad_norm": 0.453125, "learning_rate": 0.00019704277192537123, "loss": 0.5653, "step": 5175 }, { "epoch": 0.13148709878031756, "grad_norm": 0.458984375, "learning_rate": 0.00019723315141515418, "loss": 0.5842, "step": 5180 }, { "epoch": 0.1316140168293333, "grad_norm": 0.412109375, "learning_rate": 0.00019742353090493716, "loss": 0.5558, "step": 5185 }, { "epoch": 0.13174093487834904, "grad_norm": 0.38671875, "learning_rate": 0.00019761391039472013, "loss": 0.5551, "step": 5190 }, { "epoch": 0.1318678529273648, "grad_norm": 0.423828125, "learning_rate": 0.0001978042898845031, "loss": 0.6015, "step": 5195 }, { "epoch": 0.13199477097638054, "grad_norm": 0.447265625, "learning_rate": 0.00019799466937428606, "loss": 0.5766, "step": 5200 }, { "epoch": 0.1321216890253963, "grad_norm": 0.458984375, "learning_rate": 0.00019818504886406904, "loss": 0.5768, "step": 5205 }, { "epoch": 0.13224860707441205, "grad_norm": 0.4140625, "learning_rate": 0.000198375428353852, "loss": 0.5297, "step": 5210 }, { "epoch": 0.1323755251234278, "grad_norm": 0.439453125, "learning_rate": 0.00019856580784363494, "loss": 0.5682, "step": 5215 }, { "epoch": 0.13250244317244356, "grad_norm": 0.453125, "learning_rate": 0.00019875618733341792, "loss": 0.55, "step": 5220 }, { "epoch": 0.1326293612214593, "grad_norm": 0.40234375, "learning_rate": 0.0001989465668232009, "loss": 0.5761, "step": 5225 }, { "epoch": 0.13275627927047506, "grad_norm": 0.421875, "learning_rate": 0.00019913694631298385, "loss": 0.5557, "step": 5230 }, { "epoch": 0.1328831973194908, "grad_norm": 0.44140625, "learning_rate": 0.00019932732580276683, "loss": 0.546, "step": 5235 }, { "epoch": 0.13301011536850654, "grad_norm": 0.421875, "learning_rate": 0.0001995177052925498, "loss": 0.5473, "step": 5240 }, { "epoch": 0.1331370334175223, "grad_norm": 0.435546875, "learning_rate": 0.00019970808478233279, "loss": 0.5709, "step": 5245 }, { "epoch": 0.13326395146653805, "grad_norm": 0.455078125, "learning_rate": 0.0001998984642721157, "loss": 0.5756, "step": 5250 }, { "epoch": 0.13339086951555382, "grad_norm": 0.478515625, "learning_rate": 0.0002000888437618987, "loss": 0.5652, "step": 5255 }, { "epoch": 0.13351778756456956, "grad_norm": 0.431640625, "learning_rate": 0.00020027922325168167, "loss": 0.5698, "step": 5260 }, { "epoch": 0.1336447056135853, "grad_norm": 0.42578125, "learning_rate": 0.00020046960274146462, "loss": 0.5595, "step": 5265 }, { "epoch": 0.13377162366260106, "grad_norm": 0.453125, "learning_rate": 0.0002006599822312476, "loss": 0.5684, "step": 5270 }, { "epoch": 0.1338985417116168, "grad_norm": 0.44140625, "learning_rate": 0.00020085036172103057, "loss": 0.5701, "step": 5275 }, { "epoch": 0.13402545976063257, "grad_norm": 0.447265625, "learning_rate": 0.00020104074121081355, "loss": 0.5541, "step": 5280 }, { "epoch": 0.1341523778096483, "grad_norm": 0.44140625, "learning_rate": 0.0002012311207005965, "loss": 0.5635, "step": 5285 }, { "epoch": 0.13427929585866405, "grad_norm": 0.400390625, "learning_rate": 0.00020142150019037948, "loss": 0.5801, "step": 5290 }, { "epoch": 0.13440621390767982, "grad_norm": 0.439453125, "learning_rate": 0.00020161187968016246, "loss": 0.5634, "step": 5295 }, { "epoch": 0.13453313195669556, "grad_norm": 0.4453125, "learning_rate": 0.00020180225916994538, "loss": 0.553, "step": 5300 }, { "epoch": 0.13466005000571132, "grad_norm": 0.419921875, "learning_rate": 0.00020199263865972836, "loss": 0.5657, "step": 5305 }, { "epoch": 0.13478696805472706, "grad_norm": 0.44140625, "learning_rate": 0.00020218301814951134, "loss": 0.5837, "step": 5310 }, { "epoch": 0.1349138861037428, "grad_norm": 0.427734375, "learning_rate": 0.00020237339763929432, "loss": 0.5763, "step": 5315 }, { "epoch": 0.13504080415275857, "grad_norm": 0.439453125, "learning_rate": 0.00020256377712907727, "loss": 0.5803, "step": 5320 }, { "epoch": 0.1351677222017743, "grad_norm": 0.439453125, "learning_rate": 0.00020275415661886025, "loss": 0.5547, "step": 5325 }, { "epoch": 0.13529464025079008, "grad_norm": 0.451171875, "learning_rate": 0.00020294453610864322, "loss": 0.5714, "step": 5330 }, { "epoch": 0.13542155829980582, "grad_norm": 0.45703125, "learning_rate": 0.00020313491559842618, "loss": 0.5604, "step": 5335 }, { "epoch": 0.13554847634882156, "grad_norm": 0.423828125, "learning_rate": 0.00020332529508820913, "loss": 0.5864, "step": 5340 }, { "epoch": 0.13567539439783732, "grad_norm": 0.470703125, "learning_rate": 0.0002035156745779921, "loss": 0.5728, "step": 5345 }, { "epoch": 0.13580231244685306, "grad_norm": 0.4296875, "learning_rate": 0.00020370605406777508, "loss": 0.5578, "step": 5350 }, { "epoch": 0.13592923049586883, "grad_norm": 0.3984375, "learning_rate": 0.00020389643355755803, "loss": 0.5569, "step": 5355 }, { "epoch": 0.13605614854488457, "grad_norm": 0.439453125, "learning_rate": 0.000204086813047341, "loss": 0.557, "step": 5360 }, { "epoch": 0.1361830665939003, "grad_norm": 0.4375, "learning_rate": 0.000204277192537124, "loss": 0.5227, "step": 5365 }, { "epoch": 0.13630998464291608, "grad_norm": 0.423828125, "learning_rate": 0.00020446757202690694, "loss": 0.5652, "step": 5370 }, { "epoch": 0.13643690269193182, "grad_norm": 0.40234375, "learning_rate": 0.00020465795151668992, "loss": 0.5442, "step": 5375 }, { "epoch": 0.13656382074094758, "grad_norm": 0.421875, "learning_rate": 0.0002048483310064729, "loss": 0.5726, "step": 5380 }, { "epoch": 0.13669073878996332, "grad_norm": 0.4296875, "learning_rate": 0.00020503871049625588, "loss": 0.5831, "step": 5385 }, { "epoch": 0.13681765683897906, "grad_norm": 0.443359375, "learning_rate": 0.0002052290899860388, "loss": 0.5657, "step": 5390 }, { "epoch": 0.13694457488799483, "grad_norm": 0.4140625, "learning_rate": 0.00020541946947582178, "loss": 0.5765, "step": 5395 }, { "epoch": 0.13707149293701057, "grad_norm": 0.47265625, "learning_rate": 0.00020560984896560476, "loss": 0.5561, "step": 5400 }, { "epoch": 0.1371984109860263, "grad_norm": 0.447265625, "learning_rate": 0.0002058002284553877, "loss": 0.5751, "step": 5405 }, { "epoch": 0.13732532903504208, "grad_norm": 0.443359375, "learning_rate": 0.00020599060794517068, "loss": 0.5739, "step": 5410 }, { "epoch": 0.13745224708405782, "grad_norm": 0.443359375, "learning_rate": 0.00020618098743495366, "loss": 0.5697, "step": 5415 }, { "epoch": 0.13757916513307358, "grad_norm": 0.43359375, "learning_rate": 0.00020637136692473664, "loss": 0.5514, "step": 5420 }, { "epoch": 0.13770608318208932, "grad_norm": 0.42578125, "learning_rate": 0.0002065617464145196, "loss": 0.5447, "step": 5425 }, { "epoch": 0.13783300123110506, "grad_norm": 0.4140625, "learning_rate": 0.00020675212590430257, "loss": 0.5222, "step": 5430 }, { "epoch": 0.13795991928012083, "grad_norm": 0.419921875, "learning_rate": 0.00020694250539408552, "loss": 0.5875, "step": 5435 }, { "epoch": 0.13808683732913657, "grad_norm": 0.447265625, "learning_rate": 0.00020713288488386847, "loss": 0.5527, "step": 5440 }, { "epoch": 0.13821375537815234, "grad_norm": 0.447265625, "learning_rate": 0.00020732326437365145, "loss": 0.5624, "step": 5445 }, { "epoch": 0.13834067342716808, "grad_norm": 0.416015625, "learning_rate": 0.00020751364386343443, "loss": 0.513, "step": 5450 }, { "epoch": 0.13846759147618382, "grad_norm": 0.447265625, "learning_rate": 0.0002077040233532174, "loss": 0.5724, "step": 5455 }, { "epoch": 0.13859450952519958, "grad_norm": 0.408203125, "learning_rate": 0.00020789440284300036, "loss": 0.5411, "step": 5460 }, { "epoch": 0.13872142757421532, "grad_norm": 0.4296875, "learning_rate": 0.00020808478233278334, "loss": 0.5461, "step": 5465 }, { "epoch": 0.1388483456232311, "grad_norm": 0.435546875, "learning_rate": 0.00020827516182256631, "loss": 0.5679, "step": 5470 }, { "epoch": 0.13897526367224683, "grad_norm": 0.431640625, "learning_rate": 0.00020846554131234924, "loss": 0.5804, "step": 5475 }, { "epoch": 0.13910218172126257, "grad_norm": 0.41015625, "learning_rate": 0.00020865592080213222, "loss": 0.5826, "step": 5480 }, { "epoch": 0.13922909977027834, "grad_norm": 0.466796875, "learning_rate": 0.0002088463002919152, "loss": 0.5785, "step": 5485 }, { "epoch": 0.13935601781929408, "grad_norm": 0.455078125, "learning_rate": 0.00020903667978169817, "loss": 0.6157, "step": 5490 }, { "epoch": 0.13948293586830984, "grad_norm": 0.455078125, "learning_rate": 0.00020922705927148112, "loss": 0.5399, "step": 5495 }, { "epoch": 0.13960985391732558, "grad_norm": 0.447265625, "learning_rate": 0.0002094174387612641, "loss": 0.5945, "step": 5500 }, { "epoch": 0.13973677196634132, "grad_norm": 0.439453125, "learning_rate": 0.00020960781825104708, "loss": 0.5879, "step": 5505 }, { "epoch": 0.1398636900153571, "grad_norm": 0.4375, "learning_rate": 0.00020979819774083003, "loss": 0.5649, "step": 5510 }, { "epoch": 0.13999060806437283, "grad_norm": 0.423828125, "learning_rate": 0.000209988577230613, "loss": 0.5909, "step": 5515 }, { "epoch": 0.1401175261133886, "grad_norm": 0.431640625, "learning_rate": 0.000210178956720396, "loss": 0.5688, "step": 5520 }, { "epoch": 0.14024444416240434, "grad_norm": 0.408203125, "learning_rate": 0.00021036933621017896, "loss": 0.5225, "step": 5525 }, { "epoch": 0.14037136221142008, "grad_norm": 0.4140625, "learning_rate": 0.0002105597156999619, "loss": 0.5806, "step": 5530 }, { "epoch": 0.14049828026043584, "grad_norm": 0.4296875, "learning_rate": 0.00021075009518974487, "loss": 0.5564, "step": 5535 }, { "epoch": 0.14062519830945158, "grad_norm": 0.4375, "learning_rate": 0.00021094047467952784, "loss": 0.5323, "step": 5540 }, { "epoch": 0.14075211635846735, "grad_norm": 0.41015625, "learning_rate": 0.0002111308541693108, "loss": 0.5317, "step": 5545 }, { "epoch": 0.1408790344074831, "grad_norm": 0.43359375, "learning_rate": 0.00021132123365909377, "loss": 0.5842, "step": 5550 }, { "epoch": 0.14100595245649883, "grad_norm": 0.423828125, "learning_rate": 0.00021151161314887675, "loss": 0.5883, "step": 5555 }, { "epoch": 0.1411328705055146, "grad_norm": 0.4296875, "learning_rate": 0.00021170199263865973, "loss": 0.5654, "step": 5560 }, { "epoch": 0.14125978855453034, "grad_norm": 0.3671875, "learning_rate": 0.00021189237212844265, "loss": 0.5383, "step": 5565 }, { "epoch": 0.1413867066035461, "grad_norm": 0.46875, "learning_rate": 0.00021208275161822563, "loss": 0.5687, "step": 5570 }, { "epoch": 0.14151362465256184, "grad_norm": 0.447265625, "learning_rate": 0.0002122731311080086, "loss": 0.5829, "step": 5575 }, { "epoch": 0.14164054270157758, "grad_norm": 0.419921875, "learning_rate": 0.00021246351059779156, "loss": 0.5707, "step": 5580 }, { "epoch": 0.14176746075059335, "grad_norm": 0.40234375, "learning_rate": 0.00021265389008757454, "loss": 0.5371, "step": 5585 }, { "epoch": 0.1418943787996091, "grad_norm": 0.455078125, "learning_rate": 0.00021284426957735752, "loss": 0.5528, "step": 5590 }, { "epoch": 0.14202129684862486, "grad_norm": 0.318359375, "learning_rate": 0.0002130346490671405, "loss": 0.4959, "step": 5595 }, { "epoch": 0.1421482148976406, "grad_norm": 0.431640625, "learning_rate": 0.00021322502855692345, "loss": 0.5784, "step": 5600 }, { "epoch": 0.14227513294665634, "grad_norm": 0.439453125, "learning_rate": 0.00021341540804670642, "loss": 0.577, "step": 5605 }, { "epoch": 0.1424020509956721, "grad_norm": 0.427734375, "learning_rate": 0.0002136057875364894, "loss": 0.5659, "step": 5610 }, { "epoch": 0.14252896904468784, "grad_norm": 0.42578125, "learning_rate": 0.00021379616702627233, "loss": 0.5649, "step": 5615 }, { "epoch": 0.1426558870937036, "grad_norm": 0.4453125, "learning_rate": 0.0002139865465160553, "loss": 0.5727, "step": 5620 }, { "epoch": 0.14278280514271935, "grad_norm": 0.42578125, "learning_rate": 0.00021417692600583828, "loss": 0.5545, "step": 5625 }, { "epoch": 0.1429097231917351, "grad_norm": 0.38671875, "learning_rate": 0.00021436730549562126, "loss": 0.5448, "step": 5630 }, { "epoch": 0.14303664124075086, "grad_norm": 0.421875, "learning_rate": 0.0002145576849854042, "loss": 0.5412, "step": 5635 }, { "epoch": 0.1431635592897666, "grad_norm": 0.41015625, "learning_rate": 0.0002147480644751872, "loss": 0.5292, "step": 5640 }, { "epoch": 0.14329047733878233, "grad_norm": 0.4296875, "learning_rate": 0.00021493844396497017, "loss": 0.551, "step": 5645 }, { "epoch": 0.1434173953877981, "grad_norm": 0.435546875, "learning_rate": 0.00021512882345475312, "loss": 0.5554, "step": 5650 }, { "epoch": 0.14354431343681384, "grad_norm": 0.435546875, "learning_rate": 0.0002153192029445361, "loss": 0.5752, "step": 5655 }, { "epoch": 0.1436712314858296, "grad_norm": 0.443359375, "learning_rate": 0.00021550958243431905, "loss": 0.5524, "step": 5660 }, { "epoch": 0.14379814953484535, "grad_norm": 0.3984375, "learning_rate": 0.00021569996192410203, "loss": 0.5404, "step": 5665 }, { "epoch": 0.1439250675838611, "grad_norm": 0.435546875, "learning_rate": 0.00021589034141388498, "loss": 0.5425, "step": 5670 }, { "epoch": 0.14405198563287686, "grad_norm": 0.44921875, "learning_rate": 0.00021608072090366796, "loss": 0.5708, "step": 5675 }, { "epoch": 0.1441789036818926, "grad_norm": 0.4453125, "learning_rate": 0.00021627110039345093, "loss": 0.5561, "step": 5680 }, { "epoch": 0.14430582173090836, "grad_norm": 0.451171875, "learning_rate": 0.00021646147988323389, "loss": 0.5917, "step": 5685 }, { "epoch": 0.1444327397799241, "grad_norm": 0.41796875, "learning_rate": 0.00021665185937301686, "loss": 0.5681, "step": 5690 }, { "epoch": 0.14455965782893984, "grad_norm": 0.431640625, "learning_rate": 0.00021684223886279984, "loss": 0.5618, "step": 5695 }, { "epoch": 0.1446865758779556, "grad_norm": 0.390625, "learning_rate": 0.00021703261835258282, "loss": 0.5202, "step": 5700 }, { "epoch": 0.14481349392697135, "grad_norm": 0.41796875, "learning_rate": 0.00021722299784236574, "loss": 0.5444, "step": 5705 }, { "epoch": 0.14494041197598712, "grad_norm": 0.423828125, "learning_rate": 0.00021741337733214872, "loss": 0.5659, "step": 5710 }, { "epoch": 0.14506733002500286, "grad_norm": 0.421875, "learning_rate": 0.0002176037568219317, "loss": 0.5389, "step": 5715 }, { "epoch": 0.1451942480740186, "grad_norm": 0.390625, "learning_rate": 0.00021779413631171465, "loss": 0.5588, "step": 5720 }, { "epoch": 0.14532116612303436, "grad_norm": 0.439453125, "learning_rate": 0.00021798451580149763, "loss": 0.5763, "step": 5725 }, { "epoch": 0.1454480841720501, "grad_norm": 0.451171875, "learning_rate": 0.0002181748952912806, "loss": 0.5402, "step": 5730 }, { "epoch": 0.14557500222106587, "grad_norm": 0.419921875, "learning_rate": 0.00021836527478106359, "loss": 0.5254, "step": 5735 }, { "epoch": 0.1457019202700816, "grad_norm": 0.40625, "learning_rate": 0.00021855565427084654, "loss": 0.5539, "step": 5740 }, { "epoch": 0.14582883831909735, "grad_norm": 0.443359375, "learning_rate": 0.00021874603376062951, "loss": 0.5714, "step": 5745 }, { "epoch": 0.14595575636811312, "grad_norm": 0.4296875, "learning_rate": 0.00021893641325041247, "loss": 0.5491, "step": 5750 }, { "epoch": 0.14608267441712885, "grad_norm": 0.431640625, "learning_rate": 0.00021912679274019542, "loss": 0.5454, "step": 5755 }, { "epoch": 0.14620959246614462, "grad_norm": 0.43359375, "learning_rate": 0.0002193171722299784, "loss": 0.5599, "step": 5760 }, { "epoch": 0.14633651051516036, "grad_norm": 0.439453125, "learning_rate": 0.00021950755171976137, "loss": 0.5502, "step": 5765 }, { "epoch": 0.1464634285641761, "grad_norm": 0.431640625, "learning_rate": 0.00021969793120954435, "loss": 0.5649, "step": 5770 }, { "epoch": 0.14659034661319187, "grad_norm": 0.4609375, "learning_rate": 0.0002198883106993273, "loss": 0.5672, "step": 5775 }, { "epoch": 0.1467172646622076, "grad_norm": 0.43359375, "learning_rate": 0.00022007869018911028, "loss": 0.5537, "step": 5780 }, { "epoch": 0.14684418271122338, "grad_norm": 0.427734375, "learning_rate": 0.00022026906967889326, "loss": 0.5744, "step": 5785 }, { "epoch": 0.14697110076023912, "grad_norm": 0.6171875, "learning_rate": 0.00022045944916867618, "loss": 0.5336, "step": 5790 }, { "epoch": 0.14709801880925485, "grad_norm": 0.41015625, "learning_rate": 0.00022064982865845916, "loss": 0.568, "step": 5795 }, { "epoch": 0.14722493685827062, "grad_norm": 0.443359375, "learning_rate": 0.00022084020814824214, "loss": 0.5601, "step": 5800 }, { "epoch": 0.14735185490728636, "grad_norm": 0.41796875, "learning_rate": 0.00022103058763802512, "loss": 0.5572, "step": 5805 }, { "epoch": 0.14747877295630213, "grad_norm": 0.41015625, "learning_rate": 0.00022122096712780807, "loss": 0.5666, "step": 5810 }, { "epoch": 0.14760569100531787, "grad_norm": 0.40625, "learning_rate": 0.00022141134661759105, "loss": 0.527, "step": 5815 }, { "epoch": 0.1477326090543336, "grad_norm": 0.404296875, "learning_rate": 0.00022160172610737402, "loss": 0.5738, "step": 5820 }, { "epoch": 0.14785952710334938, "grad_norm": 0.443359375, "learning_rate": 0.00022179210559715697, "loss": 0.5611, "step": 5825 }, { "epoch": 0.14798644515236511, "grad_norm": 0.400390625, "learning_rate": 0.00022198248508693995, "loss": 0.5424, "step": 5830 }, { "epoch": 0.14811336320138088, "grad_norm": 0.396484375, "learning_rate": 0.00022217286457672293, "loss": 0.5533, "step": 5835 }, { "epoch": 0.14824028125039662, "grad_norm": 0.4453125, "learning_rate": 0.0002223632440665059, "loss": 0.549, "step": 5840 }, { "epoch": 0.14836719929941236, "grad_norm": 0.416015625, "learning_rate": 0.00022255362355628883, "loss": 0.5416, "step": 5845 }, { "epoch": 0.14849411734842813, "grad_norm": 0.44140625, "learning_rate": 0.0002227440030460718, "loss": 0.5594, "step": 5850 }, { "epoch": 0.14862103539744387, "grad_norm": 0.48046875, "learning_rate": 0.0002229343825358548, "loss": 0.5409, "step": 5855 }, { "epoch": 0.1487479534464596, "grad_norm": 0.43359375, "learning_rate": 0.00022312476202563774, "loss": 0.5981, "step": 5860 }, { "epoch": 0.14887487149547537, "grad_norm": 0.453125, "learning_rate": 0.00022331514151542072, "loss": 0.5813, "step": 5865 }, { "epoch": 0.14900178954449111, "grad_norm": 0.43359375, "learning_rate": 0.0002235055210052037, "loss": 0.5622, "step": 5870 }, { "epoch": 0.14912870759350688, "grad_norm": 0.41796875, "learning_rate": 0.00022369590049498667, "loss": 0.5028, "step": 5875 }, { "epoch": 0.14925562564252262, "grad_norm": 0.42578125, "learning_rate": 0.0002238862799847696, "loss": 0.5761, "step": 5880 }, { "epoch": 0.14938254369153836, "grad_norm": 0.419921875, "learning_rate": 0.00022407665947455258, "loss": 0.5664, "step": 5885 }, { "epoch": 0.14950946174055413, "grad_norm": 0.45703125, "learning_rate": 0.00022426703896433556, "loss": 0.5539, "step": 5890 }, { "epoch": 0.14963637978956987, "grad_norm": 0.4765625, "learning_rate": 0.0002244574184541185, "loss": 0.5648, "step": 5895 }, { "epoch": 0.14976329783858564, "grad_norm": 0.396484375, "learning_rate": 0.00022464779794390148, "loss": 0.5334, "step": 5900 }, { "epoch": 0.14989021588760137, "grad_norm": 0.392578125, "learning_rate": 0.00022483817743368446, "loss": 0.5504, "step": 5905 }, { "epoch": 0.15001713393661711, "grad_norm": 0.4296875, "learning_rate": 0.00022502855692346744, "loss": 0.5737, "step": 5910 }, { "epoch": 0.15014405198563288, "grad_norm": 0.431640625, "learning_rate": 0.0002252189364132504, "loss": 0.5751, "step": 5915 }, { "epoch": 0.15027097003464862, "grad_norm": 0.396484375, "learning_rate": 0.00022540931590303337, "loss": 0.5749, "step": 5920 }, { "epoch": 0.1503978880836644, "grad_norm": 0.4296875, "learning_rate": 0.00022559969539281635, "loss": 0.5616, "step": 5925 }, { "epoch": 0.15052480613268013, "grad_norm": 0.4375, "learning_rate": 0.00022579007488259927, "loss": 0.5847, "step": 5930 }, { "epoch": 0.15065172418169587, "grad_norm": 0.419921875, "learning_rate": 0.00022598045437238225, "loss": 0.5894, "step": 5935 }, { "epoch": 0.15077864223071163, "grad_norm": 0.416015625, "learning_rate": 0.00022617083386216523, "loss": 0.561, "step": 5940 }, { "epoch": 0.15090556027972737, "grad_norm": 0.416015625, "learning_rate": 0.0002263612133519482, "loss": 0.5374, "step": 5945 }, { "epoch": 0.15103247832874314, "grad_norm": 0.404296875, "learning_rate": 0.00022655159284173116, "loss": 0.5516, "step": 5950 }, { "epoch": 0.15115939637775888, "grad_norm": 0.427734375, "learning_rate": 0.00022674197233151414, "loss": 0.5833, "step": 5955 }, { "epoch": 0.15128631442677462, "grad_norm": 0.435546875, "learning_rate": 0.0002269323518212971, "loss": 0.585, "step": 5960 }, { "epoch": 0.1514132324757904, "grad_norm": 0.41796875, "learning_rate": 0.00022712273131108006, "loss": 0.5602, "step": 5965 }, { "epoch": 0.15154015052480613, "grad_norm": 0.4609375, "learning_rate": 0.00022731311080086304, "loss": 0.5826, "step": 5970 }, { "epoch": 0.1516670685738219, "grad_norm": 0.416015625, "learning_rate": 0.000227503490290646, "loss": 0.6, "step": 5975 }, { "epoch": 0.15179398662283763, "grad_norm": 0.431640625, "learning_rate": 0.00022769386978042897, "loss": 0.5661, "step": 5980 }, { "epoch": 0.15192090467185337, "grad_norm": 0.435546875, "learning_rate": 0.00022788424927021192, "loss": 0.584, "step": 5985 }, { "epoch": 0.15204782272086914, "grad_norm": 0.43359375, "learning_rate": 0.0002280746287599949, "loss": 0.5422, "step": 5990 }, { "epoch": 0.15217474076988488, "grad_norm": 0.443359375, "learning_rate": 0.00022826500824977788, "loss": 0.6043, "step": 5995 }, { "epoch": 0.15230165881890065, "grad_norm": 0.41796875, "learning_rate": 0.00022845538773956083, "loss": 0.5451, "step": 6000 }, { "epoch": 0.1524285768679164, "grad_norm": 0.423828125, "learning_rate": 0.0002286457672293438, "loss": 0.5444, "step": 6005 }, { "epoch": 0.15255549491693213, "grad_norm": 0.4296875, "learning_rate": 0.00022883614671912679, "loss": 0.556, "step": 6010 }, { "epoch": 0.1526824129659479, "grad_norm": 0.431640625, "learning_rate": 0.00022902652620890976, "loss": 0.5723, "step": 6015 }, { "epoch": 0.15280933101496363, "grad_norm": 0.427734375, "learning_rate": 0.0002292169056986927, "loss": 0.5679, "step": 6020 }, { "epoch": 0.1529362490639794, "grad_norm": 0.423828125, "learning_rate": 0.00022940728518847567, "loss": 0.5442, "step": 6025 }, { "epoch": 0.15306316711299514, "grad_norm": 0.43359375, "learning_rate": 0.00022959766467825864, "loss": 0.5715, "step": 6030 }, { "epoch": 0.15319008516201088, "grad_norm": 0.4296875, "learning_rate": 0.0002297880441680416, "loss": 0.5492, "step": 6035 }, { "epoch": 0.15331700321102665, "grad_norm": 0.37890625, "learning_rate": 0.00022997842365782457, "loss": 0.5618, "step": 6040 }, { "epoch": 0.1534439212600424, "grad_norm": 0.37890625, "learning_rate": 0.00023016880314760755, "loss": 0.5436, "step": 6045 }, { "epoch": 0.15357083930905815, "grad_norm": 0.416015625, "learning_rate": 0.00023035918263739053, "loss": 0.5651, "step": 6050 }, { "epoch": 0.1536977573580739, "grad_norm": 0.41796875, "learning_rate": 0.00023054956212717348, "loss": 0.5667, "step": 6055 }, { "epoch": 0.15382467540708963, "grad_norm": 0.427734375, "learning_rate": 0.00023073994161695646, "loss": 0.516, "step": 6060 }, { "epoch": 0.1539515934561054, "grad_norm": 0.451171875, "learning_rate": 0.00023093032110673944, "loss": 0.5572, "step": 6065 }, { "epoch": 0.15407851150512114, "grad_norm": 0.48046875, "learning_rate": 0.00023112070059652236, "loss": 0.5427, "step": 6070 }, { "epoch": 0.15420542955413688, "grad_norm": 0.419921875, "learning_rate": 0.00023131108008630534, "loss": 0.5831, "step": 6075 }, { "epoch": 0.15433234760315265, "grad_norm": 0.416015625, "learning_rate": 0.00023150145957608832, "loss": 0.5557, "step": 6080 }, { "epoch": 0.1544592656521684, "grad_norm": 0.421875, "learning_rate": 0.0002316918390658713, "loss": 0.567, "step": 6085 }, { "epoch": 0.15458618370118415, "grad_norm": 0.43359375, "learning_rate": 0.00023188221855565425, "loss": 0.5598, "step": 6090 }, { "epoch": 0.1547131017501999, "grad_norm": 0.44921875, "learning_rate": 0.00023207259804543722, "loss": 0.5466, "step": 6095 }, { "epoch": 0.15484001979921563, "grad_norm": 0.38671875, "learning_rate": 0.0002322629775352202, "loss": 0.5373, "step": 6100 }, { "epoch": 0.1549669378482314, "grad_norm": 0.4296875, "learning_rate": 0.00023245335702500313, "loss": 0.5394, "step": 6105 }, { "epoch": 0.15509385589724714, "grad_norm": 0.38671875, "learning_rate": 0.0002326437365147861, "loss": 0.5371, "step": 6110 }, { "epoch": 0.1552207739462629, "grad_norm": 0.365234375, "learning_rate": 0.00023283411600456908, "loss": 0.4905, "step": 6115 }, { "epoch": 0.15534769199527865, "grad_norm": 0.427734375, "learning_rate": 0.00023302449549435206, "loss": 0.5958, "step": 6120 }, { "epoch": 0.1554746100442944, "grad_norm": 0.4140625, "learning_rate": 0.000233214874984135, "loss": 0.5248, "step": 6125 }, { "epoch": 0.15560152809331015, "grad_norm": 0.44921875, "learning_rate": 0.000233405254473918, "loss": 0.5556, "step": 6130 }, { "epoch": 0.1557284461423259, "grad_norm": 0.416015625, "learning_rate": 0.00023359563396370097, "loss": 0.5335, "step": 6135 }, { "epoch": 0.15585536419134166, "grad_norm": 0.388671875, "learning_rate": 0.00023378601345348392, "loss": 0.5267, "step": 6140 }, { "epoch": 0.1559822822403574, "grad_norm": 0.39453125, "learning_rate": 0.0002339763929432669, "loss": 0.5536, "step": 6145 }, { "epoch": 0.15610920028937314, "grad_norm": 0.4375, "learning_rate": 0.00023416677243304988, "loss": 0.5459, "step": 6150 }, { "epoch": 0.1562361183383889, "grad_norm": 0.431640625, "learning_rate": 0.00023435715192283285, "loss": 0.5891, "step": 6155 }, { "epoch": 0.15636303638740465, "grad_norm": 0.43359375, "learning_rate": 0.00023454753141261578, "loss": 0.5552, "step": 6160 }, { "epoch": 0.15648995443642041, "grad_norm": 0.439453125, "learning_rate": 0.00023473791090239876, "loss": 0.5469, "step": 6165 }, { "epoch": 0.15661687248543615, "grad_norm": 0.396484375, "learning_rate": 0.00023492829039218173, "loss": 0.548, "step": 6170 }, { "epoch": 0.1567437905344519, "grad_norm": 0.423828125, "learning_rate": 0.00023511866988196469, "loss": 0.5552, "step": 6175 }, { "epoch": 0.15687070858346766, "grad_norm": 0.43359375, "learning_rate": 0.00023530904937174766, "loss": 0.5344, "step": 6180 }, { "epoch": 0.1569976266324834, "grad_norm": 0.404296875, "learning_rate": 0.00023549942886153064, "loss": 0.5848, "step": 6185 }, { "epoch": 0.15712454468149917, "grad_norm": 0.439453125, "learning_rate": 0.00023568980835131362, "loss": 0.5765, "step": 6190 }, { "epoch": 0.1572514627305149, "grad_norm": 0.4453125, "learning_rate": 0.00023588018784109657, "loss": 0.559, "step": 6195 }, { "epoch": 0.15737838077953065, "grad_norm": 0.41796875, "learning_rate": 0.00023607056733087952, "loss": 0.5327, "step": 6200 }, { "epoch": 0.15750529882854641, "grad_norm": 0.421875, "learning_rate": 0.0002362609468206625, "loss": 0.5298, "step": 6205 }, { "epoch": 0.15763221687756215, "grad_norm": 0.4296875, "learning_rate": 0.00023645132631044545, "loss": 0.5674, "step": 6210 }, { "epoch": 0.15775913492657792, "grad_norm": 0.4140625, "learning_rate": 0.00023664170580022843, "loss": 0.5396, "step": 6215 }, { "epoch": 0.15788605297559366, "grad_norm": 0.419921875, "learning_rate": 0.0002368320852900114, "loss": 0.5569, "step": 6220 }, { "epoch": 0.1580129710246094, "grad_norm": 0.423828125, "learning_rate": 0.00023702246477979439, "loss": 0.5421, "step": 6225 }, { "epoch": 0.15813988907362517, "grad_norm": 0.416015625, "learning_rate": 0.00023721284426957734, "loss": 0.5461, "step": 6230 }, { "epoch": 0.1582668071226409, "grad_norm": 0.4140625, "learning_rate": 0.00023740322375936031, "loss": 0.5754, "step": 6235 }, { "epoch": 0.15839372517165667, "grad_norm": 0.439453125, "learning_rate": 0.0002375936032491433, "loss": 0.5523, "step": 6240 }, { "epoch": 0.1585206432206724, "grad_norm": 0.42578125, "learning_rate": 0.00023778398273892622, "loss": 0.5532, "step": 6245 }, { "epoch": 0.15864756126968815, "grad_norm": 0.390625, "learning_rate": 0.0002379743622287092, "loss": 0.5434, "step": 6250 }, { "epoch": 0.15877447931870392, "grad_norm": 0.40625, "learning_rate": 0.00023816474171849217, "loss": 0.5811, "step": 6255 }, { "epoch": 0.15890139736771966, "grad_norm": 0.392578125, "learning_rate": 0.00023835512120827515, "loss": 0.5525, "step": 6260 }, { "epoch": 0.15902831541673543, "grad_norm": 0.46875, "learning_rate": 0.0002385455006980581, "loss": 0.4924, "step": 6265 }, { "epoch": 0.15915523346575117, "grad_norm": 0.4375, "learning_rate": 0.00023873588018784108, "loss": 0.5131, "step": 6270 }, { "epoch": 0.1592821515147669, "grad_norm": 0.41796875, "learning_rate": 0.00023892625967762406, "loss": 0.5203, "step": 6275 }, { "epoch": 0.15940906956378267, "grad_norm": 0.41015625, "learning_rate": 0.000239116639167407, "loss": 0.5468, "step": 6280 }, { "epoch": 0.1595359876127984, "grad_norm": 0.400390625, "learning_rate": 0.00023930701865719, "loss": 0.5413, "step": 6285 }, { "epoch": 0.15966290566181415, "grad_norm": 0.412109375, "learning_rate": 0.00023949739814697294, "loss": 0.5557, "step": 6290 }, { "epoch": 0.15978982371082992, "grad_norm": 0.404296875, "learning_rate": 0.00023968777763675592, "loss": 0.5551, "step": 6295 }, { "epoch": 0.15991674175984566, "grad_norm": 0.40625, "learning_rate": 0.00023987815712653887, "loss": 0.553, "step": 6300 }, { "epoch": 0.16004365980886143, "grad_norm": 0.431640625, "learning_rate": 0.00024006853661632185, "loss": 0.5431, "step": 6305 }, { "epoch": 0.16017057785787717, "grad_norm": 0.36328125, "learning_rate": 0.00024025891610610482, "loss": 0.5071, "step": 6310 }, { "epoch": 0.1602974959068929, "grad_norm": 0.421875, "learning_rate": 0.00024044929559588777, "loss": 0.5497, "step": 6315 }, { "epoch": 0.16042441395590867, "grad_norm": 0.41796875, "learning_rate": 0.00024063967508567075, "loss": 0.5606, "step": 6320 }, { "epoch": 0.1605513320049244, "grad_norm": 0.41796875, "learning_rate": 0.00024083005457545373, "loss": 0.5376, "step": 6325 }, { "epoch": 0.16067825005394018, "grad_norm": 0.421875, "learning_rate": 0.0002410204340652367, "loss": 0.5637, "step": 6330 }, { "epoch": 0.16080516810295592, "grad_norm": 0.470703125, "learning_rate": 0.00024121081355501963, "loss": 0.5601, "step": 6335 }, { "epoch": 0.16093208615197166, "grad_norm": 0.3984375, "learning_rate": 0.0002414011930448026, "loss": 0.5747, "step": 6340 }, { "epoch": 0.16105900420098743, "grad_norm": 0.45703125, "learning_rate": 0.0002415915725345856, "loss": 0.5664, "step": 6345 }, { "epoch": 0.16118592225000317, "grad_norm": 0.390625, "learning_rate": 0.00024178195202436854, "loss": 0.5663, "step": 6350 }, { "epoch": 0.16131284029901893, "grad_norm": 0.40625, "learning_rate": 0.00024197233151415152, "loss": 0.5348, "step": 6355 }, { "epoch": 0.16143975834803467, "grad_norm": 0.396484375, "learning_rate": 0.0002421627110039345, "loss": 0.538, "step": 6360 }, { "epoch": 0.1615666763970504, "grad_norm": 0.416015625, "learning_rate": 0.00024235309049371747, "loss": 0.572, "step": 6365 }, { "epoch": 0.16169359444606618, "grad_norm": 0.41015625, "learning_rate": 0.00024254346998350043, "loss": 0.555, "step": 6370 }, { "epoch": 0.16182051249508192, "grad_norm": 0.4140625, "learning_rate": 0.0002427338494732834, "loss": 0.5525, "step": 6375 }, { "epoch": 0.1619474305440977, "grad_norm": 0.421875, "learning_rate": 0.00024292422896306638, "loss": 0.5671, "step": 6380 }, { "epoch": 0.16207434859311343, "grad_norm": 0.376953125, "learning_rate": 0.0002431146084528493, "loss": 0.5264, "step": 6385 }, { "epoch": 0.16220126664212917, "grad_norm": 0.447265625, "learning_rate": 0.00024330498794263228, "loss": 0.5533, "step": 6390 }, { "epoch": 0.16232818469114493, "grad_norm": 0.396484375, "learning_rate": 0.00024349536743241526, "loss": 0.5671, "step": 6395 }, { "epoch": 0.16245510274016067, "grad_norm": 0.404296875, "learning_rate": 0.0002436857469221982, "loss": 0.5285, "step": 6400 }, { "epoch": 0.16258202078917644, "grad_norm": 0.423828125, "learning_rate": 0.0002438761264119812, "loss": 0.5407, "step": 6405 }, { "epoch": 0.16270893883819218, "grad_norm": 0.427734375, "learning_rate": 0.00024406650590176417, "loss": 0.5823, "step": 6410 }, { "epoch": 0.16283585688720792, "grad_norm": 0.392578125, "learning_rate": 0.0002442568853915471, "loss": 0.5368, "step": 6415 }, { "epoch": 0.1629627749362237, "grad_norm": 0.423828125, "learning_rate": 0.0002444472648813301, "loss": 0.5707, "step": 6420 }, { "epoch": 0.16308969298523943, "grad_norm": 0.44140625, "learning_rate": 0.0002446376443711131, "loss": 0.5874, "step": 6425 }, { "epoch": 0.1632166110342552, "grad_norm": 0.443359375, "learning_rate": 0.00024482802386089606, "loss": 0.5781, "step": 6430 }, { "epoch": 0.16334352908327093, "grad_norm": 0.4140625, "learning_rate": 0.000245018403350679, "loss": 0.5554, "step": 6435 }, { "epoch": 0.16347044713228667, "grad_norm": 0.427734375, "learning_rate": 0.00024520878284046196, "loss": 0.5782, "step": 6440 }, { "epoch": 0.16359736518130244, "grad_norm": 0.416015625, "learning_rate": 0.00024539916233024494, "loss": 0.517, "step": 6445 }, { "epoch": 0.16372428323031818, "grad_norm": 0.392578125, "learning_rate": 0.0002455895418200279, "loss": 0.5374, "step": 6450 }, { "epoch": 0.16385120127933395, "grad_norm": 0.4140625, "learning_rate": 0.00024577992130981084, "loss": 0.5592, "step": 6455 }, { "epoch": 0.1639781193283497, "grad_norm": 0.435546875, "learning_rate": 0.0002459703007995938, "loss": 0.5194, "step": 6460 }, { "epoch": 0.16410503737736543, "grad_norm": 0.40625, "learning_rate": 0.0002461606802893768, "loss": 0.5459, "step": 6465 }, { "epoch": 0.1642319554263812, "grad_norm": 0.404296875, "learning_rate": 0.00024635105977915977, "loss": 0.5607, "step": 6470 }, { "epoch": 0.16435887347539693, "grad_norm": 0.458984375, "learning_rate": 0.00024654143926894275, "loss": 0.5759, "step": 6475 }, { "epoch": 0.1644857915244127, "grad_norm": 0.408203125, "learning_rate": 0.00024673181875872573, "loss": 0.537, "step": 6480 }, { "epoch": 0.16461270957342844, "grad_norm": 0.416015625, "learning_rate": 0.0002469221982485087, "loss": 0.53, "step": 6485 }, { "epoch": 0.16473962762244418, "grad_norm": 0.40625, "learning_rate": 0.00024711257773829163, "loss": 0.5559, "step": 6490 }, { "epoch": 0.16486654567145995, "grad_norm": 0.45703125, "learning_rate": 0.0002473029572280746, "loss": 0.5818, "step": 6495 }, { "epoch": 0.16499346372047569, "grad_norm": 0.396484375, "learning_rate": 0.0002474933367178576, "loss": 0.5602, "step": 6500 }, { "epoch": 0.16512038176949143, "grad_norm": 0.431640625, "learning_rate": 0.0002476837162076405, "loss": 0.5531, "step": 6505 }, { "epoch": 0.1652472998185072, "grad_norm": 0.396484375, "learning_rate": 0.0002478740956974235, "loss": 0.5271, "step": 6510 }, { "epoch": 0.16537421786752293, "grad_norm": 0.392578125, "learning_rate": 0.00024806447518720647, "loss": 0.5437, "step": 6515 }, { "epoch": 0.1655011359165387, "grad_norm": 0.427734375, "learning_rate": 0.00024825485467698944, "loss": 0.5787, "step": 6520 }, { "epoch": 0.16562805396555444, "grad_norm": 0.400390625, "learning_rate": 0.0002484452341667724, "loss": 0.5271, "step": 6525 }, { "epoch": 0.16575497201457018, "grad_norm": 0.451171875, "learning_rate": 0.0002486356136565554, "loss": 0.5477, "step": 6530 }, { "epoch": 0.16588189006358595, "grad_norm": 0.431640625, "learning_rate": 0.0002488259931463384, "loss": 0.5375, "step": 6535 }, { "epoch": 0.16600880811260169, "grad_norm": 0.392578125, "learning_rate": 0.0002490163726361213, "loss": 0.5404, "step": 6540 }, { "epoch": 0.16613572616161745, "grad_norm": 0.40625, "learning_rate": 0.0002492067521259043, "loss": 0.5795, "step": 6545 }, { "epoch": 0.1662626442106332, "grad_norm": 0.396484375, "learning_rate": 0.00024939713161568726, "loss": 0.5459, "step": 6550 }, { "epoch": 0.16638956225964893, "grad_norm": 0.451171875, "learning_rate": 0.00024958751110547024, "loss": 0.561, "step": 6555 }, { "epoch": 0.1665164803086647, "grad_norm": 0.4140625, "learning_rate": 0.00024977789059525316, "loss": 0.5626, "step": 6560 }, { "epoch": 0.16664339835768044, "grad_norm": 0.4375, "learning_rate": 0.00024996827008503614, "loss": 0.5451, "step": 6565 }, { "epoch": 0.1667703164066962, "grad_norm": 0.39453125, "learning_rate": 0.0002501586495748191, "loss": 0.555, "step": 6570 }, { "epoch": 0.16689723445571195, "grad_norm": 0.396484375, "learning_rate": 0.0002503490290646021, "loss": 0.5337, "step": 6575 }, { "epoch": 0.16702415250472769, "grad_norm": 0.408203125, "learning_rate": 0.000250539408554385, "loss": 0.6116, "step": 6580 }, { "epoch": 0.16715107055374345, "grad_norm": 0.40234375, "learning_rate": 0.000250729788044168, "loss": 0.5399, "step": 6585 }, { "epoch": 0.1672779886027592, "grad_norm": 0.400390625, "learning_rate": 0.000250920167533951, "loss": 0.551, "step": 6590 }, { "epoch": 0.16740490665177496, "grad_norm": 0.423828125, "learning_rate": 0.00025111054702373395, "loss": 0.5378, "step": 6595 }, { "epoch": 0.1675318247007907, "grad_norm": 0.43359375, "learning_rate": 0.00025130092651351693, "loss": 0.5898, "step": 6600 }, { "epoch": 0.16765874274980644, "grad_norm": 0.41015625, "learning_rate": 0.0002514913060032999, "loss": 0.5819, "step": 6605 }, { "epoch": 0.1677856607988222, "grad_norm": 0.4375, "learning_rate": 0.00025168168549308283, "loss": 0.5507, "step": 6610 }, { "epoch": 0.16791257884783795, "grad_norm": 0.37890625, "learning_rate": 0.0002518720649828658, "loss": 0.5346, "step": 6615 }, { "epoch": 0.1680394968968537, "grad_norm": 0.392578125, "learning_rate": 0.0002520624444726488, "loss": 0.549, "step": 6620 }, { "epoch": 0.16816641494586945, "grad_norm": 0.435546875, "learning_rate": 0.00025225282396243177, "loss": 0.5312, "step": 6625 }, { "epoch": 0.1682933329948852, "grad_norm": 0.427734375, "learning_rate": 0.0002524432034522147, "loss": 0.541, "step": 6630 }, { "epoch": 0.16842025104390096, "grad_norm": 0.408203125, "learning_rate": 0.00025263358294199767, "loss": 0.5663, "step": 6635 }, { "epoch": 0.1685471690929167, "grad_norm": 0.400390625, "learning_rate": 0.00025282396243178065, "loss": 0.5445, "step": 6640 }, { "epoch": 0.16867408714193247, "grad_norm": 0.4140625, "learning_rate": 0.0002530143419215636, "loss": 0.5921, "step": 6645 }, { "epoch": 0.1688010051909482, "grad_norm": 0.4140625, "learning_rate": 0.0002532047214113466, "loss": 0.5209, "step": 6650 }, { "epoch": 0.16892792323996395, "grad_norm": 0.390625, "learning_rate": 0.0002533951009011296, "loss": 0.5136, "step": 6655 }, { "epoch": 0.1690548412889797, "grad_norm": 0.396484375, "learning_rate": 0.00025358548039091256, "loss": 0.5266, "step": 6660 }, { "epoch": 0.16918175933799545, "grad_norm": 0.400390625, "learning_rate": 0.0002537758598806955, "loss": 0.5188, "step": 6665 }, { "epoch": 0.16930867738701122, "grad_norm": 0.458984375, "learning_rate": 0.00025396623937047846, "loss": 0.5919, "step": 6670 }, { "epoch": 0.16943559543602696, "grad_norm": 0.423828125, "learning_rate": 0.00025415661886026144, "loss": 0.5584, "step": 6675 }, { "epoch": 0.1695625134850427, "grad_norm": 0.41796875, "learning_rate": 0.00025434699835004437, "loss": 0.5707, "step": 6680 }, { "epoch": 0.16968943153405847, "grad_norm": 0.421875, "learning_rate": 0.00025453737783982734, "loss": 0.576, "step": 6685 }, { "epoch": 0.1698163495830742, "grad_norm": 0.375, "learning_rate": 0.0002547277573296103, "loss": 0.5492, "step": 6690 }, { "epoch": 0.16994326763208997, "grad_norm": 0.42578125, "learning_rate": 0.0002549181368193933, "loss": 0.5418, "step": 6695 }, { "epoch": 0.1700701856811057, "grad_norm": 0.41796875, "learning_rate": 0.0002551085163091763, "loss": 0.561, "step": 6700 }, { "epoch": 0.17019710373012145, "grad_norm": 0.4296875, "learning_rate": 0.00025529889579895926, "loss": 0.5693, "step": 6705 }, { "epoch": 0.17032402177913722, "grad_norm": 0.4296875, "learning_rate": 0.00025548927528874223, "loss": 0.565, "step": 6710 }, { "epoch": 0.17045093982815296, "grad_norm": 0.40625, "learning_rate": 0.00025567965477852516, "loss": 0.5392, "step": 6715 }, { "epoch": 0.17057785787716873, "grad_norm": 0.412109375, "learning_rate": 0.00025587003426830814, "loss": 0.5531, "step": 6720 }, { "epoch": 0.17070477592618447, "grad_norm": 0.408203125, "learning_rate": 0.0002560604137580911, "loss": 0.5566, "step": 6725 }, { "epoch": 0.1708316939752002, "grad_norm": 0.41796875, "learning_rate": 0.0002562507932478741, "loss": 0.5465, "step": 6730 }, { "epoch": 0.17095861202421597, "grad_norm": 0.46875, "learning_rate": 0.000256441172737657, "loss": 0.5674, "step": 6735 }, { "epoch": 0.1710855300732317, "grad_norm": 0.41796875, "learning_rate": 0.00025663155222744, "loss": 0.5648, "step": 6740 }, { "epoch": 0.17121244812224745, "grad_norm": 0.41796875, "learning_rate": 0.00025682193171722297, "loss": 0.5293, "step": 6745 }, { "epoch": 0.17133936617126322, "grad_norm": 0.828125, "learning_rate": 0.00025701231120700595, "loss": 0.5612, "step": 6750 }, { "epoch": 0.17146628422027896, "grad_norm": 0.4609375, "learning_rate": 0.00025720269069678893, "loss": 0.5096, "step": 6755 }, { "epoch": 0.17159320226929473, "grad_norm": 0.453125, "learning_rate": 0.0002573930701865719, "loss": 0.559, "step": 6760 }, { "epoch": 0.17172012031831047, "grad_norm": 0.41796875, "learning_rate": 0.0002575834496763549, "loss": 0.5382, "step": 6765 }, { "epoch": 0.1718470383673262, "grad_norm": 0.42578125, "learning_rate": 0.0002577738291661378, "loss": 0.5784, "step": 6770 }, { "epoch": 0.17197395641634197, "grad_norm": 0.384765625, "learning_rate": 0.0002579642086559208, "loss": 0.5481, "step": 6775 }, { "epoch": 0.1721008744653577, "grad_norm": 0.37890625, "learning_rate": 0.00025815458814570377, "loss": 0.5355, "step": 6780 }, { "epoch": 0.17222779251437348, "grad_norm": 0.376953125, "learning_rate": 0.0002583449676354867, "loss": 0.5577, "step": 6785 }, { "epoch": 0.17235471056338922, "grad_norm": 0.42578125, "learning_rate": 0.00025853534712526967, "loss": 0.5232, "step": 6790 }, { "epoch": 0.17248162861240496, "grad_norm": 0.4296875, "learning_rate": 0.00025872572661505265, "loss": 0.5516, "step": 6795 }, { "epoch": 0.17260854666142073, "grad_norm": 0.39453125, "learning_rate": 0.0002589161061048356, "loss": 0.5222, "step": 6800 }, { "epoch": 0.17273546471043646, "grad_norm": 0.3828125, "learning_rate": 0.00025910648559461855, "loss": 0.5176, "step": 6805 }, { "epoch": 0.17286238275945223, "grad_norm": 0.419921875, "learning_rate": 0.0002592968650844015, "loss": 0.532, "step": 6810 }, { "epoch": 0.17298930080846797, "grad_norm": 0.39453125, "learning_rate": 0.0002594872445741845, "loss": 0.5756, "step": 6815 }, { "epoch": 0.1731162188574837, "grad_norm": 0.435546875, "learning_rate": 0.0002596776240639675, "loss": 0.5668, "step": 6820 }, { "epoch": 0.17324313690649948, "grad_norm": 0.447265625, "learning_rate": 0.00025986800355375046, "loss": 0.5656, "step": 6825 }, { "epoch": 0.17337005495551522, "grad_norm": 0.40234375, "learning_rate": 0.00026005838304353344, "loss": 0.5777, "step": 6830 }, { "epoch": 0.17349697300453099, "grad_norm": 0.396484375, "learning_rate": 0.0002602487625333164, "loss": 0.5545, "step": 6835 }, { "epoch": 0.17362389105354673, "grad_norm": 0.330078125, "learning_rate": 0.00026043914202309934, "loss": 0.5438, "step": 6840 }, { "epoch": 0.17375080910256246, "grad_norm": 0.40234375, "learning_rate": 0.0002606295215128823, "loss": 0.5591, "step": 6845 }, { "epoch": 0.17387772715157823, "grad_norm": 0.412109375, "learning_rate": 0.0002608199010026653, "loss": 0.5311, "step": 6850 }, { "epoch": 0.17400464520059397, "grad_norm": 0.39453125, "learning_rate": 0.0002610102804924482, "loss": 0.5635, "step": 6855 }, { "epoch": 0.17413156324960974, "grad_norm": 0.41796875, "learning_rate": 0.0002612006599822312, "loss": 0.5633, "step": 6860 }, { "epoch": 0.17425848129862548, "grad_norm": 0.412109375, "learning_rate": 0.0002613910394720142, "loss": 0.5442, "step": 6865 }, { "epoch": 0.17438539934764122, "grad_norm": 0.3984375, "learning_rate": 0.00026158141896179715, "loss": 0.5405, "step": 6870 }, { "epoch": 0.17451231739665699, "grad_norm": 0.39453125, "learning_rate": 0.00026177179845158013, "loss": 0.5077, "step": 6875 }, { "epoch": 0.17463923544567272, "grad_norm": 0.40234375, "learning_rate": 0.0002619621779413631, "loss": 0.5341, "step": 6880 }, { "epoch": 0.1747661534946885, "grad_norm": 0.388671875, "learning_rate": 0.0002621525574311461, "loss": 0.5268, "step": 6885 }, { "epoch": 0.17489307154370423, "grad_norm": 0.3984375, "learning_rate": 0.000262342936920929, "loss": 0.5766, "step": 6890 }, { "epoch": 0.17501998959271997, "grad_norm": 0.423828125, "learning_rate": 0.000262533316410712, "loss": 0.5319, "step": 6895 }, { "epoch": 0.17514690764173574, "grad_norm": 0.4296875, "learning_rate": 0.00026272369590049497, "loss": 0.5401, "step": 6900 }, { "epoch": 0.17527382569075148, "grad_norm": 0.375, "learning_rate": 0.00026291407539027795, "loss": 0.5646, "step": 6905 }, { "epoch": 0.17540074373976725, "grad_norm": 0.453125, "learning_rate": 0.00026310445488006087, "loss": 0.5647, "step": 6910 }, { "epoch": 0.17552766178878298, "grad_norm": 0.396484375, "learning_rate": 0.00026329483436984385, "loss": 0.5273, "step": 6915 }, { "epoch": 0.17565457983779872, "grad_norm": 0.376953125, "learning_rate": 0.00026348521385962683, "loss": 0.5129, "step": 6920 }, { "epoch": 0.1757814978868145, "grad_norm": 0.400390625, "learning_rate": 0.0002636755933494098, "loss": 0.5647, "step": 6925 }, { "epoch": 0.17590841593583023, "grad_norm": 0.41015625, "learning_rate": 0.0002638659728391928, "loss": 0.5296, "step": 6930 }, { "epoch": 0.176035333984846, "grad_norm": 0.37890625, "learning_rate": 0.00026405635232897576, "loss": 0.5346, "step": 6935 }, { "epoch": 0.17616225203386174, "grad_norm": 0.404296875, "learning_rate": 0.00026424673181875874, "loss": 0.5616, "step": 6940 }, { "epoch": 0.17628917008287748, "grad_norm": 0.392578125, "learning_rate": 0.00026443711130854166, "loss": 0.5301, "step": 6945 }, { "epoch": 0.17641608813189325, "grad_norm": 0.38671875, "learning_rate": 0.00026462749079832464, "loss": 0.5385, "step": 6950 }, { "epoch": 0.17654300618090898, "grad_norm": 0.41796875, "learning_rate": 0.0002648178702881076, "loss": 0.5477, "step": 6955 }, { "epoch": 0.17666992422992472, "grad_norm": 0.380859375, "learning_rate": 0.00026500824977789054, "loss": 0.5338, "step": 6960 }, { "epoch": 0.1767968422789405, "grad_norm": 0.400390625, "learning_rate": 0.0002651986292676735, "loss": 0.5438, "step": 6965 }, { "epoch": 0.17692376032795623, "grad_norm": 0.435546875, "learning_rate": 0.0002653890087574565, "loss": 0.5328, "step": 6970 }, { "epoch": 0.177050678376972, "grad_norm": 0.39453125, "learning_rate": 0.0002655793882472395, "loss": 0.5288, "step": 6975 }, { "epoch": 0.17717759642598774, "grad_norm": 0.40625, "learning_rate": 0.00026576976773702246, "loss": 0.5493, "step": 6980 }, { "epoch": 0.17730451447500348, "grad_norm": 0.431640625, "learning_rate": 0.00026596014722680544, "loss": 0.5871, "step": 6985 }, { "epoch": 0.17743143252401924, "grad_norm": 0.42578125, "learning_rate": 0.00026615052671658836, "loss": 0.5691, "step": 6990 }, { "epoch": 0.17755835057303498, "grad_norm": 0.40625, "learning_rate": 0.00026634090620637134, "loss": 0.5604, "step": 6995 }, { "epoch": 0.17768526862205075, "grad_norm": 0.404296875, "learning_rate": 0.0002665312856961543, "loss": 0.5443, "step": 7000 }, { "epoch": 0.1778121866710665, "grad_norm": 0.40234375, "learning_rate": 0.0002667216651859373, "loss": 0.5759, "step": 7005 }, { "epoch": 0.17793910472008223, "grad_norm": 0.4453125, "learning_rate": 0.00026691204467572027, "loss": 0.5729, "step": 7010 }, { "epoch": 0.178066022769098, "grad_norm": 0.390625, "learning_rate": 0.0002671024241655032, "loss": 0.5315, "step": 7015 }, { "epoch": 0.17819294081811374, "grad_norm": 0.41796875, "learning_rate": 0.0002672928036552862, "loss": 0.5569, "step": 7020 }, { "epoch": 0.1783198588671295, "grad_norm": 0.4453125, "learning_rate": 0.00026748318314506915, "loss": 0.5309, "step": 7025 }, { "epoch": 0.17844677691614524, "grad_norm": 0.40625, "learning_rate": 0.0002676735626348521, "loss": 0.5449, "step": 7030 }, { "epoch": 0.17857369496516098, "grad_norm": 0.43359375, "learning_rate": 0.00026786394212463505, "loss": 0.5802, "step": 7035 }, { "epoch": 0.17870061301417675, "grad_norm": 0.412109375, "learning_rate": 0.00026805432161441803, "loss": 0.5516, "step": 7040 }, { "epoch": 0.1788275310631925, "grad_norm": 0.453125, "learning_rate": 0.000268244701104201, "loss": 0.5875, "step": 7045 }, { "epoch": 0.17895444911220826, "grad_norm": 0.427734375, "learning_rate": 0.000268435080593984, "loss": 0.5163, "step": 7050 }, { "epoch": 0.179081367161224, "grad_norm": 0.419921875, "learning_rate": 0.00026862546008376697, "loss": 0.5217, "step": 7055 }, { "epoch": 0.17920828521023974, "grad_norm": 0.396484375, "learning_rate": 0.00026881583957354994, "loss": 0.543, "step": 7060 }, { "epoch": 0.1793352032592555, "grad_norm": 0.40625, "learning_rate": 0.00026900621906333287, "loss": 0.5588, "step": 7065 }, { "epoch": 0.17946212130827124, "grad_norm": 0.41015625, "learning_rate": 0.00026919659855311585, "loss": 0.5708, "step": 7070 }, { "epoch": 0.179589039357287, "grad_norm": 0.39453125, "learning_rate": 0.0002693869780428988, "loss": 0.5483, "step": 7075 }, { "epoch": 0.17971595740630275, "grad_norm": 0.431640625, "learning_rate": 0.0002695773575326818, "loss": 0.5389, "step": 7080 }, { "epoch": 0.1798428754553185, "grad_norm": 0.40625, "learning_rate": 0.0002697677370224647, "loss": 0.5435, "step": 7085 }, { "epoch": 0.17996979350433426, "grad_norm": 0.404296875, "learning_rate": 0.0002699581165122477, "loss": 0.5622, "step": 7090 }, { "epoch": 0.18009671155335, "grad_norm": 0.38671875, "learning_rate": 0.0002701484960020307, "loss": 0.5643, "step": 7095 }, { "epoch": 0.18022362960236576, "grad_norm": 0.41015625, "learning_rate": 0.00027033887549181366, "loss": 0.5802, "step": 7100 }, { "epoch": 0.1803505476513815, "grad_norm": 0.40625, "learning_rate": 0.00027052925498159664, "loss": 0.54, "step": 7105 }, { "epoch": 0.18047746570039724, "grad_norm": 0.412109375, "learning_rate": 0.0002707196344713796, "loss": 0.531, "step": 7110 }, { "epoch": 0.180604383749413, "grad_norm": 0.38671875, "learning_rate": 0.0002709100139611626, "loss": 0.5337, "step": 7115 }, { "epoch": 0.18073130179842875, "grad_norm": 0.404296875, "learning_rate": 0.0002711003934509455, "loss": 0.5435, "step": 7120 }, { "epoch": 0.18085821984744452, "grad_norm": 0.408203125, "learning_rate": 0.0002712907729407285, "loss": 0.557, "step": 7125 }, { "epoch": 0.18098513789646026, "grad_norm": 0.43359375, "learning_rate": 0.0002714811524305115, "loss": 0.5483, "step": 7130 }, { "epoch": 0.181112055945476, "grad_norm": 0.384765625, "learning_rate": 0.0002716715319202944, "loss": 0.5415, "step": 7135 }, { "epoch": 0.18123897399449176, "grad_norm": 0.408203125, "learning_rate": 0.0002718619114100774, "loss": 0.5405, "step": 7140 }, { "epoch": 0.1813658920435075, "grad_norm": 0.380859375, "learning_rate": 0.00027205229089986036, "loss": 0.5357, "step": 7145 }, { "epoch": 0.18149281009252327, "grad_norm": 0.380859375, "learning_rate": 0.00027224267038964333, "loss": 0.5387, "step": 7150 }, { "epoch": 0.181619728141539, "grad_norm": 0.38671875, "learning_rate": 0.0002724330498794263, "loss": 0.5301, "step": 7155 }, { "epoch": 0.18174664619055475, "grad_norm": 0.359375, "learning_rate": 0.0002726234293692093, "loss": 0.5229, "step": 7160 }, { "epoch": 0.18187356423957052, "grad_norm": 0.42578125, "learning_rate": 0.00027281380885899227, "loss": 0.5791, "step": 7165 }, { "epoch": 0.18200048228858626, "grad_norm": 0.4375, "learning_rate": 0.0002730041883487752, "loss": 0.551, "step": 7170 }, { "epoch": 0.182127400337602, "grad_norm": 0.38671875, "learning_rate": 0.00027319456783855817, "loss": 0.5698, "step": 7175 }, { "epoch": 0.18225431838661776, "grad_norm": 0.396484375, "learning_rate": 0.00027338494732834115, "loss": 0.5743, "step": 7180 }, { "epoch": 0.1823812364356335, "grad_norm": 0.408203125, "learning_rate": 0.0002735753268181241, "loss": 0.5782, "step": 7185 }, { "epoch": 0.18250815448464927, "grad_norm": 0.353515625, "learning_rate": 0.00027376570630790705, "loss": 0.5333, "step": 7190 }, { "epoch": 0.182635072533665, "grad_norm": 0.392578125, "learning_rate": 0.00027395608579769003, "loss": 0.5718, "step": 7195 }, { "epoch": 0.18276199058268075, "grad_norm": 0.42578125, "learning_rate": 0.000274146465287473, "loss": 0.5504, "step": 7200 }, { "epoch": 0.18288890863169652, "grad_norm": 0.38671875, "learning_rate": 0.000274336844777256, "loss": 0.535, "step": 7205 }, { "epoch": 0.18301582668071226, "grad_norm": 0.38671875, "learning_rate": 0.00027452722426703896, "loss": 0.5563, "step": 7210 }, { "epoch": 0.18314274472972802, "grad_norm": 0.390625, "learning_rate": 0.0002747176037568219, "loss": 0.5561, "step": 7215 }, { "epoch": 0.18326966277874376, "grad_norm": 0.408203125, "learning_rate": 0.00027490798324660487, "loss": 0.5184, "step": 7220 }, { "epoch": 0.1833965808277595, "grad_norm": 0.390625, "learning_rate": 0.00027509836273638784, "loss": 0.5496, "step": 7225 }, { "epoch": 0.18352349887677527, "grad_norm": 0.40625, "learning_rate": 0.0002752887422261708, "loss": 0.5395, "step": 7230 }, { "epoch": 0.183650416925791, "grad_norm": 0.40234375, "learning_rate": 0.0002754791217159538, "loss": 0.5546, "step": 7235 }, { "epoch": 0.18377733497480678, "grad_norm": 0.3671875, "learning_rate": 0.0002756695012057367, "loss": 0.5446, "step": 7240 }, { "epoch": 0.18390425302382252, "grad_norm": 0.41015625, "learning_rate": 0.0002758598806955197, "loss": 0.5324, "step": 7245 }, { "epoch": 0.18403117107283826, "grad_norm": 0.37890625, "learning_rate": 0.0002760502601853027, "loss": 0.5318, "step": 7250 }, { "epoch": 0.18415808912185402, "grad_norm": 0.41796875, "learning_rate": 0.00027624063967508566, "loss": 0.5762, "step": 7255 }, { "epoch": 0.18428500717086976, "grad_norm": 0.41796875, "learning_rate": 0.0002764310191648686, "loss": 0.5519, "step": 7260 }, { "epoch": 0.18441192521988553, "grad_norm": 0.40625, "learning_rate": 0.00027662139865465156, "loss": 0.5607, "step": 7265 }, { "epoch": 0.18453884326890127, "grad_norm": 0.40625, "learning_rate": 0.00027681177814443454, "loss": 0.4999, "step": 7270 }, { "epoch": 0.184665761317917, "grad_norm": 0.41796875, "learning_rate": 0.0002770021576342175, "loss": 0.516, "step": 7275 }, { "epoch": 0.18479267936693278, "grad_norm": 0.3828125, "learning_rate": 0.0002771925371240005, "loss": 0.5373, "step": 7280 }, { "epoch": 0.18491959741594852, "grad_norm": 0.3828125, "learning_rate": 0.00027738291661378347, "loss": 0.5316, "step": 7285 }, { "epoch": 0.18504651546496428, "grad_norm": 0.376953125, "learning_rate": 0.00027757329610356645, "loss": 0.5227, "step": 7290 }, { "epoch": 0.18517343351398002, "grad_norm": 0.4140625, "learning_rate": 0.0002777636755933494, "loss": 0.5232, "step": 7295 }, { "epoch": 0.18530035156299576, "grad_norm": 0.3984375, "learning_rate": 0.00027795405508313235, "loss": 0.5211, "step": 7300 }, { "epoch": 0.18542726961201153, "grad_norm": 0.3984375, "learning_rate": 0.00027814443457291533, "loss": 0.5121, "step": 7305 }, { "epoch": 0.18555418766102727, "grad_norm": 0.396484375, "learning_rate": 0.00027833481406269825, "loss": 0.5504, "step": 7310 }, { "epoch": 0.18568110571004304, "grad_norm": 0.392578125, "learning_rate": 0.00027852519355248123, "loss": 0.5364, "step": 7315 }, { "epoch": 0.18580802375905878, "grad_norm": 0.404296875, "learning_rate": 0.0002787155730422642, "loss": 0.518, "step": 7320 }, { "epoch": 0.18593494180807452, "grad_norm": 0.419921875, "learning_rate": 0.0002789059525320472, "loss": 0.5439, "step": 7325 }, { "epoch": 0.18606185985709028, "grad_norm": 0.41015625, "learning_rate": 0.00027909633202183017, "loss": 0.5728, "step": 7330 }, { "epoch": 0.18618877790610602, "grad_norm": 0.3984375, "learning_rate": 0.00027928671151161315, "loss": 0.5491, "step": 7335 }, { "epoch": 0.1863156959551218, "grad_norm": 0.392578125, "learning_rate": 0.0002794770910013961, "loss": 0.5359, "step": 7340 }, { "epoch": 0.18644261400413753, "grad_norm": 0.419921875, "learning_rate": 0.00027966747049117905, "loss": 0.5434, "step": 7345 }, { "epoch": 0.18656953205315327, "grad_norm": 0.4140625, "learning_rate": 0.000279857849980962, "loss": 0.5644, "step": 7350 }, { "epoch": 0.18669645010216904, "grad_norm": 0.388671875, "learning_rate": 0.000280048229470745, "loss": 0.5519, "step": 7355 }, { "epoch": 0.18682336815118478, "grad_norm": 0.412109375, "learning_rate": 0.000280238608960528, "loss": 0.5522, "step": 7360 }, { "epoch": 0.18695028620020054, "grad_norm": 0.400390625, "learning_rate": 0.0002804289884503109, "loss": 0.5228, "step": 7365 }, { "epoch": 0.18707720424921628, "grad_norm": 0.423828125, "learning_rate": 0.0002806193679400939, "loss": 0.5731, "step": 7370 }, { "epoch": 0.18720412229823202, "grad_norm": 0.4140625, "learning_rate": 0.00028080974742987686, "loss": 0.5332, "step": 7375 }, { "epoch": 0.1873310403472478, "grad_norm": 0.419921875, "learning_rate": 0.00028100012691965984, "loss": 0.5476, "step": 7380 }, { "epoch": 0.18745795839626353, "grad_norm": 0.40234375, "learning_rate": 0.0002811905064094428, "loss": 0.5372, "step": 7385 }, { "epoch": 0.18758487644527927, "grad_norm": 0.392578125, "learning_rate": 0.0002813808858992258, "loss": 0.5308, "step": 7390 }, { "epoch": 0.18771179449429504, "grad_norm": 0.384765625, "learning_rate": 0.0002815712653890088, "loss": 0.5185, "step": 7395 }, { "epoch": 0.18783871254331078, "grad_norm": 0.396484375, "learning_rate": 0.0002817616448787917, "loss": 0.5506, "step": 7400 }, { "epoch": 0.18796563059232654, "grad_norm": 0.423828125, "learning_rate": 0.0002819520243685747, "loss": 0.5284, "step": 7405 }, { "epoch": 0.18809254864134228, "grad_norm": 0.40234375, "learning_rate": 0.00028214240385835765, "loss": 0.5801, "step": 7410 }, { "epoch": 0.18821946669035802, "grad_norm": 0.41796875, "learning_rate": 0.0002823327833481406, "loss": 0.5713, "step": 7415 }, { "epoch": 0.1883463847393738, "grad_norm": 0.3984375, "learning_rate": 0.00028252316283792356, "loss": 0.5451, "step": 7420 }, { "epoch": 0.18847330278838953, "grad_norm": 0.40625, "learning_rate": 0.00028271354232770654, "loss": 0.5494, "step": 7425 }, { "epoch": 0.1886002208374053, "grad_norm": 0.404296875, "learning_rate": 0.0002829039218174895, "loss": 0.5516, "step": 7430 }, { "epoch": 0.18872713888642104, "grad_norm": 0.43359375, "learning_rate": 0.00028309430130727244, "loss": 0.5477, "step": 7435 }, { "epoch": 0.18885405693543678, "grad_norm": 0.408203125, "learning_rate": 0.0002832846807970554, "loss": 0.5799, "step": 7440 }, { "epoch": 0.18898097498445254, "grad_norm": 0.384765625, "learning_rate": 0.0002834750602868384, "loss": 0.5443, "step": 7445 }, { "epoch": 0.18910789303346828, "grad_norm": 0.44921875, "learning_rate": 0.00028366543977662137, "loss": 0.5506, "step": 7450 }, { "epoch": 0.18923481108248405, "grad_norm": 0.392578125, "learning_rate": 0.00028385581926640435, "loss": 0.5417, "step": 7455 }, { "epoch": 0.1893617291314998, "grad_norm": 0.40625, "learning_rate": 0.00028404619875618733, "loss": 0.5206, "step": 7460 }, { "epoch": 0.18948864718051553, "grad_norm": 0.39453125, "learning_rate": 0.0002842365782459703, "loss": 0.5696, "step": 7465 }, { "epoch": 0.1896155652295313, "grad_norm": 0.4140625, "learning_rate": 0.00028442695773575323, "loss": 0.5809, "step": 7470 }, { "epoch": 0.18974248327854704, "grad_norm": 0.3984375, "learning_rate": 0.0002846173372255362, "loss": 0.5542, "step": 7475 }, { "epoch": 0.1898694013275628, "grad_norm": 0.40234375, "learning_rate": 0.0002848077167153192, "loss": 0.5131, "step": 7480 }, { "epoch": 0.18999631937657854, "grad_norm": 0.408203125, "learning_rate": 0.0002849980962051021, "loss": 0.5388, "step": 7485 }, { "epoch": 0.19012323742559428, "grad_norm": 0.390625, "learning_rate": 0.0002851884756948851, "loss": 0.5559, "step": 7490 }, { "epoch": 0.19025015547461005, "grad_norm": 0.384765625, "learning_rate": 0.00028537885518466807, "loss": 0.5695, "step": 7495 }, { "epoch": 0.1903770735236258, "grad_norm": 0.388671875, "learning_rate": 0.00028556923467445104, "loss": 0.508, "step": 7500 }, { "epoch": 0.19050399157264156, "grad_norm": 0.3984375, "learning_rate": 0.000285759614164234, "loss": 0.5414, "step": 7505 }, { "epoch": 0.1906309096216573, "grad_norm": 0.427734375, "learning_rate": 0.000285949993654017, "loss": 0.5416, "step": 7510 }, { "epoch": 0.19075782767067304, "grad_norm": 0.373046875, "learning_rate": 0.0002861403731438, "loss": 0.5209, "step": 7515 }, { "epoch": 0.1908847457196888, "grad_norm": 0.40625, "learning_rate": 0.0002863307526335829, "loss": 0.557, "step": 7520 }, { "epoch": 0.19101166376870454, "grad_norm": 0.39453125, "learning_rate": 0.0002865211321233659, "loss": 0.541, "step": 7525 }, { "epoch": 0.1911385818177203, "grad_norm": 0.412109375, "learning_rate": 0.00028671151161314886, "loss": 0.5559, "step": 7530 }, { "epoch": 0.19126549986673605, "grad_norm": 0.39453125, "learning_rate": 0.0002869018911029318, "loss": 0.5286, "step": 7535 }, { "epoch": 0.1913924179157518, "grad_norm": 0.37890625, "learning_rate": 0.00028709227059271476, "loss": 0.4846, "step": 7540 }, { "epoch": 0.19151933596476756, "grad_norm": 0.388671875, "learning_rate": 0.00028728265008249774, "loss": 0.5334, "step": 7545 }, { "epoch": 0.1916462540137833, "grad_norm": 0.396484375, "learning_rate": 0.0002874730295722807, "loss": 0.5379, "step": 7550 }, { "epoch": 0.19177317206279906, "grad_norm": 0.37890625, "learning_rate": 0.0002876634090620637, "loss": 0.5096, "step": 7555 }, { "epoch": 0.1919000901118148, "grad_norm": 0.388671875, "learning_rate": 0.0002878537885518467, "loss": 0.5091, "step": 7560 }, { "epoch": 0.19202700816083054, "grad_norm": 0.412109375, "learning_rate": 0.00028804416804162965, "loss": 0.5459, "step": 7565 }, { "epoch": 0.1921539262098463, "grad_norm": 0.404296875, "learning_rate": 0.0002882345475314126, "loss": 0.5372, "step": 7570 }, { "epoch": 0.19228084425886205, "grad_norm": 0.396484375, "learning_rate": 0.00028842492702119555, "loss": 0.5451, "step": 7575 }, { "epoch": 0.19240776230787782, "grad_norm": 0.435546875, "learning_rate": 0.00028861530651097853, "loss": 0.5555, "step": 7580 }, { "epoch": 0.19253468035689356, "grad_norm": 0.388671875, "learning_rate": 0.0002888056860007615, "loss": 0.5243, "step": 7585 }, { "epoch": 0.1926615984059093, "grad_norm": 0.37109375, "learning_rate": 0.00028899606549054443, "loss": 0.5301, "step": 7590 }, { "epoch": 0.19278851645492506, "grad_norm": 0.3984375, "learning_rate": 0.0002891864449803274, "loss": 0.5274, "step": 7595 }, { "epoch": 0.1929154345039408, "grad_norm": 0.41015625, "learning_rate": 0.0002893768244701104, "loss": 0.5414, "step": 7600 }, { "epoch": 0.19304235255295657, "grad_norm": 0.423828125, "learning_rate": 0.00028956720395989337, "loss": 0.5283, "step": 7605 }, { "epoch": 0.1931692706019723, "grad_norm": 0.392578125, "learning_rate": 0.00028975758344967635, "loss": 0.5335, "step": 7610 }, { "epoch": 0.19329618865098805, "grad_norm": 0.39453125, "learning_rate": 0.0002899479629394593, "loss": 0.4989, "step": 7615 }, { "epoch": 0.19342310670000382, "grad_norm": 0.3671875, "learning_rate": 0.0002901383424292423, "loss": 0.5213, "step": 7620 }, { "epoch": 0.19355002474901956, "grad_norm": 0.375, "learning_rate": 0.0002903287219190252, "loss": 0.5217, "step": 7625 }, { "epoch": 0.1936769427980353, "grad_norm": 0.404296875, "learning_rate": 0.0002905191014088082, "loss": 0.5712, "step": 7630 }, { "epoch": 0.19380386084705106, "grad_norm": 0.41015625, "learning_rate": 0.0002907094808985912, "loss": 0.4914, "step": 7635 }, { "epoch": 0.1939307788960668, "grad_norm": 0.4140625, "learning_rate": 0.0002908998603883741, "loss": 0.5577, "step": 7640 }, { "epoch": 0.19405769694508257, "grad_norm": 0.392578125, "learning_rate": 0.0002910902398781571, "loss": 0.5626, "step": 7645 }, { "epoch": 0.1941846149940983, "grad_norm": 0.376953125, "learning_rate": 0.00029128061936794006, "loss": 0.5459, "step": 7650 }, { "epoch": 0.19431153304311405, "grad_norm": 0.380859375, "learning_rate": 0.00029147099885772304, "loss": 0.5418, "step": 7655 }, { "epoch": 0.19443845109212982, "grad_norm": 0.380859375, "learning_rate": 0.00029166137834750597, "loss": 0.512, "step": 7660 }, { "epoch": 0.19456536914114556, "grad_norm": 0.419921875, "learning_rate": 0.00029185175783728894, "loss": 0.5531, "step": 7665 }, { "epoch": 0.19469228719016132, "grad_norm": 0.4140625, "learning_rate": 0.0002920421373270719, "loss": 0.5552, "step": 7670 }, { "epoch": 0.19481920523917706, "grad_norm": 0.4453125, "learning_rate": 0.0002922325168168549, "loss": 0.5624, "step": 7675 }, { "epoch": 0.1949461232881928, "grad_norm": 0.41796875, "learning_rate": 0.0002924228963066379, "loss": 0.5409, "step": 7680 }, { "epoch": 0.19507304133720857, "grad_norm": 0.41015625, "learning_rate": 0.00029261327579642086, "loss": 0.5389, "step": 7685 }, { "epoch": 0.1951999593862243, "grad_norm": 0.439453125, "learning_rate": 0.00029280365528620383, "loss": 0.5965, "step": 7690 }, { "epoch": 0.19532687743524008, "grad_norm": 0.3984375, "learning_rate": 0.00029299403477598676, "loss": 0.5278, "step": 7695 }, { "epoch": 0.19545379548425582, "grad_norm": 0.396484375, "learning_rate": 0.00029318441426576974, "loss": 0.5238, "step": 7700 }, { "epoch": 0.19558071353327156, "grad_norm": 0.41796875, "learning_rate": 0.0002933747937555527, "loss": 0.5239, "step": 7705 }, { "epoch": 0.19570763158228732, "grad_norm": 0.38671875, "learning_rate": 0.00029356517324533564, "loss": 0.536, "step": 7710 }, { "epoch": 0.19583454963130306, "grad_norm": 0.369140625, "learning_rate": 0.0002937555527351186, "loss": 0.5289, "step": 7715 }, { "epoch": 0.19596146768031883, "grad_norm": 0.4296875, "learning_rate": 0.0002939459322249016, "loss": 0.561, "step": 7720 }, { "epoch": 0.19608838572933457, "grad_norm": 0.3984375, "learning_rate": 0.00029413631171468457, "loss": 0.5326, "step": 7725 }, { "epoch": 0.1962153037783503, "grad_norm": 0.380859375, "learning_rate": 0.00029432669120446755, "loss": 0.5336, "step": 7730 }, { "epoch": 0.19634222182736608, "grad_norm": 0.400390625, "learning_rate": 0.00029451707069425053, "loss": 0.5486, "step": 7735 }, { "epoch": 0.19646913987638182, "grad_norm": 0.361328125, "learning_rate": 0.0002947074501840335, "loss": 0.5545, "step": 7740 }, { "epoch": 0.19659605792539758, "grad_norm": 0.365234375, "learning_rate": 0.00029489782967381643, "loss": 0.518, "step": 7745 }, { "epoch": 0.19672297597441332, "grad_norm": 0.380859375, "learning_rate": 0.0002950882091635994, "loss": 0.4927, "step": 7750 }, { "epoch": 0.19684989402342906, "grad_norm": 0.369140625, "learning_rate": 0.0002952785886533824, "loss": 0.5427, "step": 7755 }, { "epoch": 0.19697681207244483, "grad_norm": 0.416015625, "learning_rate": 0.00029546896814316537, "loss": 0.5947, "step": 7760 }, { "epoch": 0.19710373012146057, "grad_norm": 0.4375, "learning_rate": 0.0002956593476329483, "loss": 0.5497, "step": 7765 }, { "epoch": 0.19723064817047634, "grad_norm": 0.3203125, "learning_rate": 0.00029584972712273127, "loss": 0.472, "step": 7770 }, { "epoch": 0.19735756621949208, "grad_norm": 0.4140625, "learning_rate": 0.00029604010661251425, "loss": 0.5405, "step": 7775 }, { "epoch": 0.19748448426850782, "grad_norm": 0.39453125, "learning_rate": 0.0002962304861022972, "loss": 0.5205, "step": 7780 }, { "epoch": 0.19761140231752358, "grad_norm": 0.3984375, "learning_rate": 0.0002964208655920802, "loss": 0.5354, "step": 7785 }, { "epoch": 0.19773832036653932, "grad_norm": 0.408203125, "learning_rate": 0.0002966112450818632, "loss": 0.5206, "step": 7790 }, { "epoch": 0.1978652384155551, "grad_norm": 0.3828125, "learning_rate": 0.00029680162457164616, "loss": 0.5143, "step": 7795 }, { "epoch": 0.19799215646457083, "grad_norm": 0.404296875, "learning_rate": 0.0002969920040614291, "loss": 0.5477, "step": 7800 }, { "epoch": 0.19811907451358657, "grad_norm": 0.404296875, "learning_rate": 0.00029718238355121206, "loss": 0.5149, "step": 7805 }, { "epoch": 0.19824599256260234, "grad_norm": 0.4296875, "learning_rate": 0.00029737276304099504, "loss": 0.5508, "step": 7810 }, { "epoch": 0.19837291061161808, "grad_norm": 0.39453125, "learning_rate": 0.00029756314253077796, "loss": 0.527, "step": 7815 }, { "epoch": 0.19849982866063384, "grad_norm": 0.38671875, "learning_rate": 0.00029775352202056094, "loss": 0.4974, "step": 7820 }, { "epoch": 0.19862674670964958, "grad_norm": 0.390625, "learning_rate": 0.0002979439015103439, "loss": 0.5297, "step": 7825 }, { "epoch": 0.19875366475866532, "grad_norm": 0.392578125, "learning_rate": 0.0002981342810001269, "loss": 0.5495, "step": 7830 }, { "epoch": 0.1988805828076811, "grad_norm": 0.4140625, "learning_rate": 0.0002983246604899099, "loss": 0.5419, "step": 7835 }, { "epoch": 0.19900750085669683, "grad_norm": 0.39453125, "learning_rate": 0.00029851503997969285, "loss": 0.5299, "step": 7840 }, { "epoch": 0.19913441890571257, "grad_norm": 0.3828125, "learning_rate": 0.00029870541946947583, "loss": 0.5529, "step": 7845 }, { "epoch": 0.19926133695472834, "grad_norm": 0.419921875, "learning_rate": 0.00029889579895925875, "loss": 0.5312, "step": 7850 }, { "epoch": 0.19938825500374407, "grad_norm": 0.42578125, "learning_rate": 0.00029908617844904173, "loss": 0.5408, "step": 7855 }, { "epoch": 0.19951517305275984, "grad_norm": 0.365234375, "learning_rate": 0.0002992765579388247, "loss": 0.5391, "step": 7860 }, { "epoch": 0.19964209110177558, "grad_norm": 0.38671875, "learning_rate": 0.0002994669374286077, "loss": 0.5494, "step": 7865 }, { "epoch": 0.19976900915079132, "grad_norm": 0.38671875, "learning_rate": 0.0002996573169183906, "loss": 0.5742, "step": 7870 }, { "epoch": 0.1998959271998071, "grad_norm": 0.416015625, "learning_rate": 0.0002998476964081736, "loss": 0.5557, "step": 7875 }, { "epoch": 0.20002284524882283, "grad_norm": 0.388671875, "learning_rate": 0.0002999999998527912, "loss": 0.5426, "step": 7880 }, { "epoch": 0.2001497632978386, "grad_norm": 0.419921875, "learning_rate": 0.000299999994700483, "loss": 0.563, "step": 7885 }, { "epoch": 0.20027668134685433, "grad_norm": 0.36328125, "learning_rate": 0.00029999998218773486, "loss": 0.4823, "step": 7890 }, { "epoch": 0.20040359939587007, "grad_norm": 0.375, "learning_rate": 0.00029999996231454734, "loss": 0.5303, "step": 7895 }, { "epoch": 0.20053051744488584, "grad_norm": 0.39453125, "learning_rate": 0.0002999999350809214, "loss": 0.5596, "step": 7900 }, { "epoch": 0.20065743549390158, "grad_norm": 0.376953125, "learning_rate": 0.00029999990048685843, "loss": 0.5111, "step": 7905 }, { "epoch": 0.20078435354291735, "grad_norm": 0.41015625, "learning_rate": 0.00029999985853236, "loss": 0.5752, "step": 7910 }, { "epoch": 0.2009112715919331, "grad_norm": 0.38671875, "learning_rate": 0.0002999998092174284, "loss": 0.5336, "step": 7915 }, { "epoch": 0.20103818964094883, "grad_norm": 0.380859375, "learning_rate": 0.00029999975254206584, "loss": 0.5234, "step": 7920 }, { "epoch": 0.2011651076899646, "grad_norm": 0.546875, "learning_rate": 0.00029999968850627517, "loss": 0.5397, "step": 7925 }, { "epoch": 0.20129202573898033, "grad_norm": 0.400390625, "learning_rate": 0.00029999961711005957, "loss": 0.4941, "step": 7930 }, { "epoch": 0.2014189437879961, "grad_norm": 0.408203125, "learning_rate": 0.00029999953835342245, "loss": 0.5421, "step": 7935 }, { "epoch": 0.20154586183701184, "grad_norm": 0.390625, "learning_rate": 0.0002999994522363678, "loss": 0.5694, "step": 7940 }, { "epoch": 0.20167277988602758, "grad_norm": 0.37890625, "learning_rate": 0.00029999935875889977, "loss": 0.5152, "step": 7945 }, { "epoch": 0.20179969793504335, "grad_norm": 0.376953125, "learning_rate": 0.00029999925792102293, "loss": 0.5129, "step": 7950 }, { "epoch": 0.2019266159840591, "grad_norm": 0.390625, "learning_rate": 0.00029999914972274225, "loss": 0.5436, "step": 7955 }, { "epoch": 0.20205353403307486, "grad_norm": 0.376953125, "learning_rate": 0.0002999990341640631, "loss": 0.5334, "step": 7960 }, { "epoch": 0.2021804520820906, "grad_norm": 0.3984375, "learning_rate": 0.00029999891124499103, "loss": 0.5396, "step": 7965 }, { "epoch": 0.20230737013110633, "grad_norm": 0.39453125, "learning_rate": 0.00029999878096553215, "loss": 0.5402, "step": 7970 }, { "epoch": 0.2024342881801221, "grad_norm": 0.431640625, "learning_rate": 0.0002999986433256928, "loss": 0.5644, "step": 7975 }, { "epoch": 0.20256120622913784, "grad_norm": 0.458984375, "learning_rate": 0.00029999849832547984, "loss": 0.5623, "step": 7980 }, { "epoch": 0.2026881242781536, "grad_norm": 0.40625, "learning_rate": 0.0002999983459649003, "loss": 0.5525, "step": 7985 }, { "epoch": 0.20281504232716935, "grad_norm": 0.412109375, "learning_rate": 0.00029999818624396166, "loss": 0.5324, "step": 7990 }, { "epoch": 0.2029419603761851, "grad_norm": 0.43359375, "learning_rate": 0.0002999980191626718, "loss": 0.538, "step": 7995 }, { "epoch": 0.20306887842520085, "grad_norm": 0.39453125, "learning_rate": 0.00029999784472103887, "loss": 0.5242, "step": 8000 }, { "epoch": 0.2031957964742166, "grad_norm": 0.384765625, "learning_rate": 0.00029999766291907145, "loss": 0.5465, "step": 8005 }, { "epoch": 0.20332271452323236, "grad_norm": 0.38671875, "learning_rate": 0.00029999747375677847, "loss": 0.5309, "step": 8010 }, { "epoch": 0.2034496325722481, "grad_norm": 0.41796875, "learning_rate": 0.0002999972772341692, "loss": 0.5647, "step": 8015 }, { "epoch": 0.20357655062126384, "grad_norm": 0.400390625, "learning_rate": 0.0002999970733512533, "loss": 0.5342, "step": 8020 }, { "epoch": 0.2037034686702796, "grad_norm": 0.37890625, "learning_rate": 0.0002999968621080407, "loss": 0.5325, "step": 8025 }, { "epoch": 0.20383038671929535, "grad_norm": 0.388671875, "learning_rate": 0.0002999966435045419, "loss": 0.5593, "step": 8030 }, { "epoch": 0.20395730476831112, "grad_norm": 0.43359375, "learning_rate": 0.0002999964175407675, "loss": 0.5431, "step": 8035 }, { "epoch": 0.20408422281732685, "grad_norm": 0.4140625, "learning_rate": 0.0002999961842167287, "loss": 0.5376, "step": 8040 }, { "epoch": 0.2042111408663426, "grad_norm": 0.392578125, "learning_rate": 0.0002999959435324368, "loss": 0.5274, "step": 8045 }, { "epoch": 0.20433805891535836, "grad_norm": 0.376953125, "learning_rate": 0.00029999569548790376, "loss": 0.4754, "step": 8050 }, { "epoch": 0.2044649769643741, "grad_norm": 0.4296875, "learning_rate": 0.00029999544008314164, "loss": 0.5832, "step": 8055 }, { "epoch": 0.20459189501338984, "grad_norm": 0.423828125, "learning_rate": 0.00029999517731816314, "loss": 0.5635, "step": 8060 }, { "epoch": 0.2047188130624056, "grad_norm": 0.84765625, "learning_rate": 0.00029999490719298093, "loss": 0.5032, "step": 8065 }, { "epoch": 0.20484573111142135, "grad_norm": 0.40234375, "learning_rate": 0.0002999946297076084, "loss": 0.5716, "step": 8070 }, { "epoch": 0.20497264916043711, "grad_norm": 0.38671875, "learning_rate": 0.0002999943448620592, "loss": 0.5385, "step": 8075 }, { "epoch": 0.20509956720945285, "grad_norm": 0.38671875, "learning_rate": 0.00029999405265634715, "loss": 0.5137, "step": 8080 }, { "epoch": 0.2052264852584686, "grad_norm": 0.36328125, "learning_rate": 0.00029999375309048675, "loss": 0.5142, "step": 8085 }, { "epoch": 0.20535340330748436, "grad_norm": 0.39453125, "learning_rate": 0.00029999344616449264, "loss": 0.5652, "step": 8090 }, { "epoch": 0.2054803213565001, "grad_norm": 0.396484375, "learning_rate": 0.0002999931318783798, "loss": 0.5413, "step": 8095 }, { "epoch": 0.20560723940551587, "grad_norm": 0.41015625, "learning_rate": 0.0002999928102321638, "loss": 0.5477, "step": 8100 }, { "epoch": 0.2057341574545316, "grad_norm": 0.392578125, "learning_rate": 0.00029999248122586033, "loss": 0.5216, "step": 8105 }, { "epoch": 0.20586107550354735, "grad_norm": 0.396484375, "learning_rate": 0.0002999921448594856, "loss": 0.5383, "step": 8110 }, { "epoch": 0.20598799355256311, "grad_norm": 0.3671875, "learning_rate": 0.000299991801133056, "loss": 0.5247, "step": 8115 }, { "epoch": 0.20611491160157885, "grad_norm": 0.443359375, "learning_rate": 0.0002999914500465884, "loss": 0.5174, "step": 8120 }, { "epoch": 0.20624182965059462, "grad_norm": 0.462890625, "learning_rate": 0.0002999910916001003, "loss": 0.5448, "step": 8125 }, { "epoch": 0.20636874769961036, "grad_norm": 0.41796875, "learning_rate": 0.0002999907257936089, "loss": 0.5187, "step": 8130 }, { "epoch": 0.2064956657486261, "grad_norm": 0.44921875, "learning_rate": 0.0002999903526271324, "loss": 0.5534, "step": 8135 }, { "epoch": 0.20662258379764187, "grad_norm": 0.37109375, "learning_rate": 0.000299989972100689, "loss": 0.5165, "step": 8140 }, { "epoch": 0.2067495018466576, "grad_norm": 0.416015625, "learning_rate": 0.0002999895842142975, "loss": 0.4994, "step": 8145 }, { "epoch": 0.20687641989567337, "grad_norm": 0.37890625, "learning_rate": 0.0002999891889679768, "loss": 0.5102, "step": 8150 }, { "epoch": 0.20700333794468911, "grad_norm": 0.37109375, "learning_rate": 0.00029998878636174634, "loss": 0.5318, "step": 8155 }, { "epoch": 0.20713025599370485, "grad_norm": 0.37890625, "learning_rate": 0.00029998837639562593, "loss": 0.5414, "step": 8160 }, { "epoch": 0.20725717404272062, "grad_norm": 0.40234375, "learning_rate": 0.0002999879590696356, "loss": 0.5184, "step": 8165 }, { "epoch": 0.20738409209173636, "grad_norm": 0.388671875, "learning_rate": 0.0002999875343837958, "loss": 0.5479, "step": 8170 }, { "epoch": 0.20751101014075213, "grad_norm": 0.40234375, "learning_rate": 0.00029998710233812754, "loss": 0.5239, "step": 8175 }, { "epoch": 0.20763792818976787, "grad_norm": 0.390625, "learning_rate": 0.00029998666293265183, "loss": 0.5656, "step": 8180 }, { "epoch": 0.2077648462387836, "grad_norm": 0.3984375, "learning_rate": 0.0002999862161673904, "loss": 0.5176, "step": 8185 }, { "epoch": 0.20789176428779937, "grad_norm": 0.3984375, "learning_rate": 0.00029998576204236504, "loss": 0.5355, "step": 8190 }, { "epoch": 0.20801868233681511, "grad_norm": 0.353515625, "learning_rate": 0.0002999853005575981, "loss": 0.5214, "step": 8195 }, { "epoch": 0.20814560038583088, "grad_norm": 0.380859375, "learning_rate": 0.00029998483171311217, "loss": 0.5512, "step": 8200 }, { "epoch": 0.20827251843484662, "grad_norm": 0.36328125, "learning_rate": 0.00029998435550893034, "loss": 0.522, "step": 8205 }, { "epoch": 0.20839943648386236, "grad_norm": 0.408203125, "learning_rate": 0.0002999838719450759, "loss": 0.5099, "step": 8210 }, { "epoch": 0.20852635453287813, "grad_norm": 0.408203125, "learning_rate": 0.0002999833810215726, "loss": 0.5183, "step": 8215 }, { "epoch": 0.20865327258189387, "grad_norm": 0.40234375, "learning_rate": 0.0002999828827384446, "loss": 0.5181, "step": 8220 }, { "epoch": 0.20878019063090963, "grad_norm": 0.453125, "learning_rate": 0.0002999823770957162, "loss": 0.5451, "step": 8225 }, { "epoch": 0.20890710867992537, "grad_norm": 0.41015625, "learning_rate": 0.00029998186409341233, "loss": 0.5218, "step": 8230 }, { "epoch": 0.2090340267289411, "grad_norm": 0.365234375, "learning_rate": 0.0002999813437315582, "loss": 0.5243, "step": 8235 }, { "epoch": 0.20916094477795688, "grad_norm": 0.400390625, "learning_rate": 0.0002999808160101792, "loss": 0.5766, "step": 8240 }, { "epoch": 0.20928786282697262, "grad_norm": 0.416015625, "learning_rate": 0.00029998028092930134, "loss": 0.5284, "step": 8245 }, { "epoch": 0.2094147808759884, "grad_norm": 0.39453125, "learning_rate": 0.0002999797384889508, "loss": 0.5193, "step": 8250 }, { "epoch": 0.20954169892500413, "grad_norm": 0.408203125, "learning_rate": 0.0002999791886891543, "loss": 0.5234, "step": 8255 }, { "epoch": 0.20966861697401987, "grad_norm": 0.373046875, "learning_rate": 0.00029997863152993865, "loss": 0.5461, "step": 8260 }, { "epoch": 0.20979553502303563, "grad_norm": 0.38671875, "learning_rate": 0.0002999780670113314, "loss": 0.5315, "step": 8265 }, { "epoch": 0.20992245307205137, "grad_norm": 0.357421875, "learning_rate": 0.0002999774951333601, "loss": 0.5057, "step": 8270 }, { "epoch": 0.2100493711210671, "grad_norm": 0.373046875, "learning_rate": 0.0002999769158960528, "loss": 0.5013, "step": 8275 }, { "epoch": 0.21017628917008288, "grad_norm": 0.404296875, "learning_rate": 0.00029997632929943806, "loss": 0.536, "step": 8280 }, { "epoch": 0.21030320721909862, "grad_norm": 0.373046875, "learning_rate": 0.00029997573534354453, "loss": 0.5653, "step": 8285 }, { "epoch": 0.2104301252681144, "grad_norm": 0.3828125, "learning_rate": 0.00029997513402840146, "loss": 0.4881, "step": 8290 }, { "epoch": 0.21055704331713013, "grad_norm": 0.3671875, "learning_rate": 0.00029997452535403826, "loss": 0.5076, "step": 8295 }, { "epoch": 0.21068396136614587, "grad_norm": 0.392578125, "learning_rate": 0.0002999739093204849, "loss": 0.5232, "step": 8300 }, { "epoch": 0.21081087941516163, "grad_norm": 0.380859375, "learning_rate": 0.0002999732859277715, "loss": 0.5329, "step": 8305 }, { "epoch": 0.21093779746417737, "grad_norm": 0.375, "learning_rate": 0.0002999726551759287, "loss": 0.5385, "step": 8310 }, { "epoch": 0.21106471551319314, "grad_norm": 0.40234375, "learning_rate": 0.00029997201706498746, "loss": 0.5353, "step": 8315 }, { "epoch": 0.21119163356220888, "grad_norm": 0.40625, "learning_rate": 0.0002999713715949791, "loss": 0.5199, "step": 8320 }, { "epoch": 0.21131855161122462, "grad_norm": 0.3828125, "learning_rate": 0.0002999707187659352, "loss": 0.5169, "step": 8325 }, { "epoch": 0.2114454696602404, "grad_norm": 0.37890625, "learning_rate": 0.00029997005857788797, "loss": 0.552, "step": 8330 }, { "epoch": 0.21157238770925613, "grad_norm": 0.373046875, "learning_rate": 0.00029996939103086966, "loss": 0.5394, "step": 8335 }, { "epoch": 0.2116993057582719, "grad_norm": 0.3828125, "learning_rate": 0.00029996871612491307, "loss": 0.5444, "step": 8340 }, { "epoch": 0.21182622380728763, "grad_norm": 0.35546875, "learning_rate": 0.00029996803386005133, "loss": 0.4925, "step": 8345 }, { "epoch": 0.21195314185630337, "grad_norm": 0.37109375, "learning_rate": 0.0002999673442363179, "loss": 0.5164, "step": 8350 }, { "epoch": 0.21208005990531914, "grad_norm": 0.375, "learning_rate": 0.0002999666472537466, "loss": 0.5237, "step": 8355 }, { "epoch": 0.21220697795433488, "grad_norm": 0.365234375, "learning_rate": 0.00029996594291237174, "loss": 0.5038, "step": 8360 }, { "epoch": 0.21233389600335065, "grad_norm": 0.380859375, "learning_rate": 0.0002999652312122277, "loss": 0.5511, "step": 8365 }, { "epoch": 0.2124608140523664, "grad_norm": 0.384765625, "learning_rate": 0.0002999645121533495, "loss": 0.5381, "step": 8370 }, { "epoch": 0.21258773210138213, "grad_norm": 0.345703125, "learning_rate": 0.0002999637857357725, "loss": 0.5334, "step": 8375 }, { "epoch": 0.2127146501503979, "grad_norm": 0.369140625, "learning_rate": 0.00029996305195953226, "loss": 0.5096, "step": 8380 }, { "epoch": 0.21284156819941363, "grad_norm": 0.376953125, "learning_rate": 0.0002999623108246648, "loss": 0.537, "step": 8385 }, { "epoch": 0.2129684862484294, "grad_norm": 0.40625, "learning_rate": 0.00029996156233120646, "loss": 0.5481, "step": 8390 }, { "epoch": 0.21309540429744514, "grad_norm": 0.42578125, "learning_rate": 0.000299960806479194, "loss": 0.5425, "step": 8395 }, { "epoch": 0.21322232234646088, "grad_norm": 0.384765625, "learning_rate": 0.00029996004326866447, "loss": 0.4891, "step": 8400 }, { "epoch": 0.21334924039547665, "grad_norm": 0.375, "learning_rate": 0.00029995927269965537, "loss": 0.5391, "step": 8405 }, { "epoch": 0.2134761584444924, "grad_norm": 0.41796875, "learning_rate": 0.0002999584947722045, "loss": 0.5741, "step": 8410 }, { "epoch": 0.21360307649350815, "grad_norm": 0.427734375, "learning_rate": 0.00029995770948635004, "loss": 0.5737, "step": 8415 }, { "epoch": 0.2137299945425239, "grad_norm": 0.365234375, "learning_rate": 0.0002999569168421305, "loss": 0.5147, "step": 8420 }, { "epoch": 0.21385691259153963, "grad_norm": 0.38671875, "learning_rate": 0.00029995611683958476, "loss": 0.5571, "step": 8425 }, { "epoch": 0.2139838306405554, "grad_norm": 0.357421875, "learning_rate": 0.0002999553094787521, "loss": 0.5411, "step": 8430 }, { "epoch": 0.21411074868957114, "grad_norm": 0.39453125, "learning_rate": 0.0002999544947596722, "loss": 0.5473, "step": 8435 }, { "epoch": 0.2142376667385869, "grad_norm": 0.404296875, "learning_rate": 0.00029995367268238494, "loss": 0.5352, "step": 8440 }, { "epoch": 0.21436458478760265, "grad_norm": 0.416015625, "learning_rate": 0.0002999528432469307, "loss": 0.522, "step": 8445 }, { "epoch": 0.2144915028366184, "grad_norm": 0.3828125, "learning_rate": 0.00029995200645335015, "loss": 0.5302, "step": 8450 }, { "epoch": 0.21461842088563415, "grad_norm": 0.40234375, "learning_rate": 0.00029995116230168437, "loss": 0.5202, "step": 8455 }, { "epoch": 0.2147453389346499, "grad_norm": 0.359375, "learning_rate": 0.0002999503107919748, "loss": 0.5216, "step": 8460 }, { "epoch": 0.21487225698366566, "grad_norm": 0.40625, "learning_rate": 0.00029994945192426324, "loss": 0.5203, "step": 8465 }, { "epoch": 0.2149991750326814, "grad_norm": 0.408203125, "learning_rate": 0.0002999485856985918, "loss": 0.5518, "step": 8470 }, { "epoch": 0.21512609308169714, "grad_norm": 0.390625, "learning_rate": 0.00029994771211500293, "loss": 0.5553, "step": 8475 }, { "epoch": 0.2152530111307129, "grad_norm": 0.3828125, "learning_rate": 0.0002999468311735396, "loss": 0.5566, "step": 8480 }, { "epoch": 0.21537992917972865, "grad_norm": 0.431640625, "learning_rate": 0.000299945942874245, "loss": 0.5274, "step": 8485 }, { "epoch": 0.21550684722874439, "grad_norm": 0.37890625, "learning_rate": 0.0002999450472171627, "loss": 0.5288, "step": 8490 }, { "epoch": 0.21563376527776015, "grad_norm": 0.392578125, "learning_rate": 0.00029994414420233666, "loss": 0.5514, "step": 8495 }, { "epoch": 0.2157606833267759, "grad_norm": 0.37890625, "learning_rate": 0.0002999432338298112, "loss": 0.5095, "step": 8500 }, { "epoch": 0.21588760137579166, "grad_norm": 0.388671875, "learning_rate": 0.000299942316099631, "loss": 0.5092, "step": 8505 }, { "epoch": 0.2160145194248074, "grad_norm": 0.40234375, "learning_rate": 0.00029994139101184104, "loss": 0.5161, "step": 8510 }, { "epoch": 0.21614143747382314, "grad_norm": 0.361328125, "learning_rate": 0.0002999404585664868, "loss": 0.5127, "step": 8515 }, { "epoch": 0.2162683555228389, "grad_norm": 0.466796875, "learning_rate": 0.000299939518763614, "loss": 0.5099, "step": 8520 }, { "epoch": 0.21639527357185465, "grad_norm": 0.361328125, "learning_rate": 0.00029993857160326865, "loss": 0.5126, "step": 8525 }, { "epoch": 0.2165221916208704, "grad_norm": 0.34375, "learning_rate": 0.0002999376170854974, "loss": 0.5209, "step": 8530 }, { "epoch": 0.21664910966988615, "grad_norm": 0.3828125, "learning_rate": 0.0002999366552103469, "loss": 0.563, "step": 8535 }, { "epoch": 0.2167760277189019, "grad_norm": 0.392578125, "learning_rate": 0.0002999356859778646, "loss": 0.5401, "step": 8540 }, { "epoch": 0.21690294576791766, "grad_norm": 0.380859375, "learning_rate": 0.0002999347093880978, "loss": 0.5092, "step": 8545 }, { "epoch": 0.2170298638169334, "grad_norm": 0.396484375, "learning_rate": 0.0002999337254410946, "loss": 0.5378, "step": 8550 }, { "epoch": 0.21715678186594917, "grad_norm": 0.40625, "learning_rate": 0.0002999327341369032, "loss": 0.5422, "step": 8555 }, { "epoch": 0.2172836999149649, "grad_norm": 0.38671875, "learning_rate": 0.00029993173547557223, "loss": 0.5026, "step": 8560 }, { "epoch": 0.21741061796398065, "grad_norm": 0.37890625, "learning_rate": 0.0002999307294571508, "loss": 0.5233, "step": 8565 }, { "epoch": 0.2175375360129964, "grad_norm": 0.384765625, "learning_rate": 0.00029992971608168813, "loss": 0.5329, "step": 8570 }, { "epoch": 0.21766445406201215, "grad_norm": 0.388671875, "learning_rate": 0.000299928695349234, "loss": 0.5233, "step": 8575 }, { "epoch": 0.21779137211102792, "grad_norm": 0.361328125, "learning_rate": 0.00029992766725983855, "loss": 0.496, "step": 8580 }, { "epoch": 0.21791829016004366, "grad_norm": 0.390625, "learning_rate": 0.0002999266318135522, "loss": 0.5396, "step": 8585 }, { "epoch": 0.2180452082090594, "grad_norm": 0.404296875, "learning_rate": 0.0002999255890104257, "loss": 0.5647, "step": 8590 }, { "epoch": 0.21817212625807517, "grad_norm": 0.392578125, "learning_rate": 0.00029992453885051035, "loss": 0.5391, "step": 8595 }, { "epoch": 0.2182990443070909, "grad_norm": 0.390625, "learning_rate": 0.00029992348133385755, "loss": 0.5718, "step": 8600 }, { "epoch": 0.21842596235610667, "grad_norm": 0.390625, "learning_rate": 0.0002999224164605193, "loss": 0.5459, "step": 8605 }, { "epoch": 0.2185528804051224, "grad_norm": 0.30078125, "learning_rate": 0.00029992134423054763, "loss": 0.5001, "step": 8610 }, { "epoch": 0.21867979845413815, "grad_norm": 0.515625, "learning_rate": 0.0002999202646439955, "loss": 0.537, "step": 8615 }, { "epoch": 0.21880671650315392, "grad_norm": 0.37890625, "learning_rate": 0.0002999191777009156, "loss": 0.5236, "step": 8620 }, { "epoch": 0.21893363455216966, "grad_norm": 0.37109375, "learning_rate": 0.0002999180834013614, "loss": 0.5407, "step": 8625 }, { "epoch": 0.21906055260118543, "grad_norm": 0.35546875, "learning_rate": 0.0002999169817453866, "loss": 0.5104, "step": 8630 }, { "epoch": 0.21918747065020117, "grad_norm": 0.40625, "learning_rate": 0.00029991587273304516, "loss": 0.5232, "step": 8635 }, { "epoch": 0.2193143886992169, "grad_norm": 0.4140625, "learning_rate": 0.0002999147563643916, "loss": 0.5406, "step": 8640 }, { "epoch": 0.21944130674823267, "grad_norm": 0.3828125, "learning_rate": 0.0002999136326394807, "loss": 0.528, "step": 8645 }, { "epoch": 0.2195682247972484, "grad_norm": 0.375, "learning_rate": 0.0002999125015583675, "loss": 0.5419, "step": 8650 }, { "epoch": 0.21969514284626418, "grad_norm": 0.361328125, "learning_rate": 0.00029991136312110755, "loss": 0.4887, "step": 8655 }, { "epoch": 0.21982206089527992, "grad_norm": 0.412109375, "learning_rate": 0.0002999102173277568, "loss": 0.546, "step": 8660 }, { "epoch": 0.21994897894429566, "grad_norm": 0.357421875, "learning_rate": 0.00029990906417837134, "loss": 0.5413, "step": 8665 }, { "epoch": 0.22007589699331143, "grad_norm": 0.3984375, "learning_rate": 0.0002999079036730078, "loss": 0.5362, "step": 8670 }, { "epoch": 0.22020281504232717, "grad_norm": 0.39453125, "learning_rate": 0.0002999067358117232, "loss": 0.5279, "step": 8675 }, { "epoch": 0.22032973309134293, "grad_norm": 0.412109375, "learning_rate": 0.00029990556059457477, "loss": 0.5228, "step": 8680 }, { "epoch": 0.22045665114035867, "grad_norm": 0.416015625, "learning_rate": 0.00029990437802162017, "loss": 0.5407, "step": 8685 }, { "epoch": 0.2205835691893744, "grad_norm": 0.400390625, "learning_rate": 0.0002999031880929175, "loss": 0.5855, "step": 8690 }, { "epoch": 0.22071048723839018, "grad_norm": 0.40625, "learning_rate": 0.0002999019908085251, "loss": 0.5197, "step": 8695 }, { "epoch": 0.22083740528740592, "grad_norm": 0.392578125, "learning_rate": 0.0002999007861685017, "loss": 0.5226, "step": 8700 }, { "epoch": 0.2209643233364217, "grad_norm": 0.34375, "learning_rate": 0.0002998995741729064, "loss": 0.5121, "step": 8705 }, { "epoch": 0.22109124138543743, "grad_norm": 0.384765625, "learning_rate": 0.0002998983548217987, "loss": 0.5215, "step": 8710 }, { "epoch": 0.22121815943445317, "grad_norm": 0.36328125, "learning_rate": 0.0002998971281152385, "loss": 0.5405, "step": 8715 }, { "epoch": 0.22134507748346893, "grad_norm": 0.349609375, "learning_rate": 0.00029989589405328594, "loss": 0.4938, "step": 8720 }, { "epoch": 0.22147199553248467, "grad_norm": 0.37109375, "learning_rate": 0.00029989465263600154, "loss": 0.5444, "step": 8725 }, { "epoch": 0.2215989135815004, "grad_norm": 0.373046875, "learning_rate": 0.00029989340386344624, "loss": 0.5212, "step": 8730 }, { "epoch": 0.22172583163051618, "grad_norm": 0.375, "learning_rate": 0.00029989214773568134, "loss": 0.5304, "step": 8735 }, { "epoch": 0.22185274967953192, "grad_norm": 0.375, "learning_rate": 0.0002998908842527685, "loss": 0.5335, "step": 8740 }, { "epoch": 0.22197966772854769, "grad_norm": 0.326171875, "learning_rate": 0.00029988961341476957, "loss": 0.508, "step": 8745 }, { "epoch": 0.22210658577756343, "grad_norm": 0.384765625, "learning_rate": 0.00029988833522174706, "loss": 0.5513, "step": 8750 }, { "epoch": 0.22223350382657917, "grad_norm": 0.384765625, "learning_rate": 0.0002998870496737637, "loss": 0.5052, "step": 8755 }, { "epoch": 0.22236042187559493, "grad_norm": 0.38671875, "learning_rate": 0.00029988575677088246, "loss": 0.5125, "step": 8760 }, { "epoch": 0.22248733992461067, "grad_norm": 0.38671875, "learning_rate": 0.00029988445651316683, "loss": 0.5269, "step": 8765 }, { "epoch": 0.22261425797362644, "grad_norm": 0.40625, "learning_rate": 0.00029988314890068066, "loss": 0.5474, "step": 8770 }, { "epoch": 0.22274117602264218, "grad_norm": 0.357421875, "learning_rate": 0.00029988183393348807, "loss": 0.5251, "step": 8775 }, { "epoch": 0.22286809407165792, "grad_norm": 0.326171875, "learning_rate": 0.00029988051161165363, "loss": 0.4843, "step": 8780 }, { "epoch": 0.22299501212067369, "grad_norm": 0.3828125, "learning_rate": 0.0002998791819352421, "loss": 0.5318, "step": 8785 }, { "epoch": 0.22312193016968943, "grad_norm": 1.8671875, "learning_rate": 0.00029987784490431886, "loss": 0.5361, "step": 8790 }, { "epoch": 0.2232488482187052, "grad_norm": 0.37890625, "learning_rate": 0.0002998765005189495, "loss": 0.5243, "step": 8795 }, { "epoch": 0.22337576626772093, "grad_norm": 0.376953125, "learning_rate": 0.00029987514877919996, "loss": 0.5472, "step": 8800 }, { "epoch": 0.22350268431673667, "grad_norm": 0.375, "learning_rate": 0.00029987378968513654, "loss": 0.5255, "step": 8805 }, { "epoch": 0.22362960236575244, "grad_norm": 0.4140625, "learning_rate": 0.000299872423236826, "loss": 0.5815, "step": 8810 }, { "epoch": 0.22375652041476818, "grad_norm": 0.3984375, "learning_rate": 0.00029987104943433526, "loss": 0.532, "step": 8815 }, { "epoch": 0.22388343846378395, "grad_norm": 0.384765625, "learning_rate": 0.00029986966827773195, "loss": 0.5521, "step": 8820 }, { "epoch": 0.22401035651279969, "grad_norm": 0.380859375, "learning_rate": 0.00029986827976708364, "loss": 0.512, "step": 8825 }, { "epoch": 0.22413727456181542, "grad_norm": 0.392578125, "learning_rate": 0.0002998668839024586, "loss": 0.4953, "step": 8830 }, { "epoch": 0.2242641926108312, "grad_norm": 0.392578125, "learning_rate": 0.0002998654806839252, "loss": 0.5349, "step": 8835 }, { "epoch": 0.22439111065984693, "grad_norm": 0.35546875, "learning_rate": 0.00029986407011155236, "loss": 0.5247, "step": 8840 }, { "epoch": 0.2245180287088627, "grad_norm": 0.38671875, "learning_rate": 0.0002998626521854093, "loss": 0.5342, "step": 8845 }, { "epoch": 0.22464494675787844, "grad_norm": 0.3828125, "learning_rate": 0.00029986122690556563, "loss": 0.5363, "step": 8850 }, { "epoch": 0.22477186480689418, "grad_norm": 0.375, "learning_rate": 0.00029985979427209127, "loss": 0.5242, "step": 8855 }, { "epoch": 0.22489878285590995, "grad_norm": 0.3671875, "learning_rate": 0.00029985835428505644, "loss": 0.5259, "step": 8860 }, { "epoch": 0.22502570090492569, "grad_norm": 0.474609375, "learning_rate": 0.0002998569069445319, "loss": 0.5457, "step": 8865 }, { "epoch": 0.22515261895394145, "grad_norm": 0.361328125, "learning_rate": 0.0002998554522505886, "loss": 0.5303, "step": 8870 }, { "epoch": 0.2252795370029572, "grad_norm": 0.38671875, "learning_rate": 0.000299853990203298, "loss": 0.5359, "step": 8875 }, { "epoch": 0.22540645505197293, "grad_norm": 0.384765625, "learning_rate": 0.00029985252080273175, "loss": 0.5182, "step": 8880 }, { "epoch": 0.2255333731009887, "grad_norm": 0.392578125, "learning_rate": 0.000299851044048962, "loss": 0.5177, "step": 8885 }, { "epoch": 0.22566029115000444, "grad_norm": 0.392578125, "learning_rate": 0.0002998495599420612, "loss": 0.5372, "step": 8890 }, { "epoch": 0.2257872091990202, "grad_norm": 0.396484375, "learning_rate": 0.0002998480684821022, "loss": 0.5373, "step": 8895 }, { "epoch": 0.22591412724803595, "grad_norm": 0.3984375, "learning_rate": 0.0002998465696691582, "loss": 0.5522, "step": 8900 }, { "epoch": 0.22604104529705168, "grad_norm": 0.39453125, "learning_rate": 0.00029984506350330263, "loss": 0.5538, "step": 8905 }, { "epoch": 0.22616796334606745, "grad_norm": 0.375, "learning_rate": 0.0002998435499846096, "loss": 0.5403, "step": 8910 }, { "epoch": 0.2262948813950832, "grad_norm": 0.375, "learning_rate": 0.00029984202911315317, "loss": 0.5325, "step": 8915 }, { "epoch": 0.22642179944409896, "grad_norm": 0.373046875, "learning_rate": 0.0002998405008890081, "loss": 0.5472, "step": 8920 }, { "epoch": 0.2265487174931147, "grad_norm": 0.396484375, "learning_rate": 0.0002998389653122493, "loss": 0.5321, "step": 8925 }, { "epoch": 0.22667563554213044, "grad_norm": 0.357421875, "learning_rate": 0.00029983742238295225, "loss": 0.5493, "step": 8930 }, { "epoch": 0.2268025535911462, "grad_norm": 0.376953125, "learning_rate": 0.0002998358721011925, "loss": 0.5224, "step": 8935 }, { "epoch": 0.22692947164016194, "grad_norm": 0.380859375, "learning_rate": 0.00029983431446704617, "loss": 0.5276, "step": 8940 }, { "epoch": 0.22705638968917768, "grad_norm": 0.361328125, "learning_rate": 0.00029983274948058977, "loss": 0.5037, "step": 8945 }, { "epoch": 0.22718330773819345, "grad_norm": 0.36328125, "learning_rate": 0.0002998311771419, "loss": 0.5299, "step": 8950 }, { "epoch": 0.2273102257872092, "grad_norm": 0.34765625, "learning_rate": 0.00029982959745105406, "loss": 0.5259, "step": 8955 }, { "epoch": 0.22743714383622496, "grad_norm": 0.359375, "learning_rate": 0.00029982801040812947, "loss": 0.5202, "step": 8960 }, { "epoch": 0.2275640618852407, "grad_norm": 0.384765625, "learning_rate": 0.0002998264160132041, "loss": 0.5193, "step": 8965 }, { "epoch": 0.22769097993425644, "grad_norm": 0.384765625, "learning_rate": 0.0002998248142663562, "loss": 0.5039, "step": 8970 }, { "epoch": 0.2278178979832722, "grad_norm": 0.380859375, "learning_rate": 0.0002998232051676643, "loss": 0.4962, "step": 8975 }, { "epoch": 0.22794481603228794, "grad_norm": 0.38671875, "learning_rate": 0.0002998215887172074, "loss": 0.4986, "step": 8980 }, { "epoch": 0.2280717340813037, "grad_norm": 0.365234375, "learning_rate": 0.0002998199649150648, "loss": 0.5095, "step": 8985 }, { "epoch": 0.22819865213031945, "grad_norm": 0.365234375, "learning_rate": 0.00029981833376131625, "loss": 0.5299, "step": 8990 }, { "epoch": 0.2283255701793352, "grad_norm": 0.375, "learning_rate": 0.0002998166952560417, "loss": 0.5185, "step": 8995 }, { "epoch": 0.22845248822835096, "grad_norm": 0.375, "learning_rate": 0.00029981504939932164, "loss": 0.5336, "step": 9000 }, { "epoch": 0.2285794062773667, "grad_norm": 0.38671875, "learning_rate": 0.0002998133961912368, "loss": 0.5166, "step": 9005 }, { "epoch": 0.22870632432638247, "grad_norm": 0.392578125, "learning_rate": 0.00029981173563186817, "loss": 0.5294, "step": 9010 }, { "epoch": 0.2288332423753982, "grad_norm": 0.353515625, "learning_rate": 0.00029981006772129745, "loss": 0.5491, "step": 9015 }, { "epoch": 0.22896016042441394, "grad_norm": 0.400390625, "learning_rate": 0.0002998083924596063, "loss": 0.491, "step": 9020 }, { "epoch": 0.2290870784734297, "grad_norm": 0.369140625, "learning_rate": 0.0002998067098468771, "loss": 0.534, "step": 9025 }, { "epoch": 0.22921399652244545, "grad_norm": 0.380859375, "learning_rate": 0.0002998050198831923, "loss": 0.5004, "step": 9030 }, { "epoch": 0.22934091457146122, "grad_norm": 0.376953125, "learning_rate": 0.0002998033225686348, "loss": 0.5221, "step": 9035 }, { "epoch": 0.22946783262047696, "grad_norm": 0.375, "learning_rate": 0.000299801617903288, "loss": 0.5105, "step": 9040 }, { "epoch": 0.2295947506694927, "grad_norm": 0.3671875, "learning_rate": 0.00029979990588723546, "loss": 0.5231, "step": 9045 }, { "epoch": 0.22972166871850846, "grad_norm": 0.39453125, "learning_rate": 0.0002997981865205612, "loss": 0.5229, "step": 9050 }, { "epoch": 0.2298485867675242, "grad_norm": 0.373046875, "learning_rate": 0.00029979645980334954, "loss": 0.5466, "step": 9055 }, { "epoch": 0.22997550481653997, "grad_norm": 0.39453125, "learning_rate": 0.0002997947257356854, "loss": 0.5266, "step": 9060 }, { "epoch": 0.2301024228655557, "grad_norm": 0.365234375, "learning_rate": 0.00029979298431765365, "loss": 0.5304, "step": 9065 }, { "epoch": 0.23022934091457145, "grad_norm": 0.380859375, "learning_rate": 0.00029979123554933984, "loss": 0.5064, "step": 9070 }, { "epoch": 0.23035625896358722, "grad_norm": 0.291015625, "learning_rate": 0.00029978947943082974, "loss": 0.486, "step": 9075 }, { "epoch": 0.23048317701260296, "grad_norm": 0.375, "learning_rate": 0.0002997877159622096, "loss": 0.519, "step": 9080 }, { "epoch": 0.23061009506161873, "grad_norm": 0.375, "learning_rate": 0.0002997859451435659, "loss": 0.5149, "step": 9085 }, { "epoch": 0.23073701311063446, "grad_norm": 0.43359375, "learning_rate": 0.0002997841669749855, "loss": 0.5315, "step": 9090 }, { "epoch": 0.2308639311596502, "grad_norm": 0.421875, "learning_rate": 0.0002997823814565557, "loss": 0.5331, "step": 9095 }, { "epoch": 0.23099084920866597, "grad_norm": 0.4453125, "learning_rate": 0.0002997805885883641, "loss": 0.5336, "step": 9100 }, { "epoch": 0.2311177672576817, "grad_norm": 0.388671875, "learning_rate": 0.0002997787883704987, "loss": 0.5269, "step": 9105 }, { "epoch": 0.23124468530669748, "grad_norm": 0.37109375, "learning_rate": 0.00029977698080304785, "loss": 0.5335, "step": 9110 }, { "epoch": 0.23137160335571322, "grad_norm": 0.388671875, "learning_rate": 0.00029977516588610017, "loss": 0.5311, "step": 9115 }, { "epoch": 0.23149852140472896, "grad_norm": 0.37109375, "learning_rate": 0.00029977334361974483, "loss": 0.5307, "step": 9120 }, { "epoch": 0.23162543945374472, "grad_norm": 0.39453125, "learning_rate": 0.00029977151400407113, "loss": 0.5566, "step": 9125 }, { "epoch": 0.23175235750276046, "grad_norm": 0.400390625, "learning_rate": 0.00029976967703916884, "loss": 0.5153, "step": 9130 }, { "epoch": 0.23187927555177623, "grad_norm": 0.35546875, "learning_rate": 0.0002997678327251282, "loss": 0.5216, "step": 9135 }, { "epoch": 0.23200619360079197, "grad_norm": 0.416015625, "learning_rate": 0.0002997659810620397, "loss": 0.5056, "step": 9140 }, { "epoch": 0.2321331116498077, "grad_norm": 0.3671875, "learning_rate": 0.00029976412204999413, "loss": 0.5215, "step": 9145 }, { "epoch": 0.23226002969882348, "grad_norm": 0.35546875, "learning_rate": 0.0002997622556890828, "loss": 0.5231, "step": 9150 }, { "epoch": 0.23238694774783922, "grad_norm": 0.365234375, "learning_rate": 0.00029976038197939716, "loss": 0.5197, "step": 9155 }, { "epoch": 0.23251386579685496, "grad_norm": 0.37890625, "learning_rate": 0.00029975850092102933, "loss": 0.5037, "step": 9160 }, { "epoch": 0.23264078384587072, "grad_norm": 0.357421875, "learning_rate": 0.00029975661251407145, "loss": 0.5307, "step": 9165 }, { "epoch": 0.23276770189488646, "grad_norm": 0.37109375, "learning_rate": 0.0002997547167586162, "loss": 0.5206, "step": 9170 }, { "epoch": 0.23289461994390223, "grad_norm": 0.357421875, "learning_rate": 0.00029975281365475677, "loss": 0.5167, "step": 9175 }, { "epoch": 0.23302153799291797, "grad_norm": 0.369140625, "learning_rate": 0.00029975090320258634, "loss": 0.4707, "step": 9180 }, { "epoch": 0.2331484560419337, "grad_norm": 0.380859375, "learning_rate": 0.00029974898540219877, "loss": 0.5369, "step": 9185 }, { "epoch": 0.23327537409094948, "grad_norm": 0.388671875, "learning_rate": 0.00029974706025368816, "loss": 0.5352, "step": 9190 }, { "epoch": 0.23340229213996522, "grad_norm": 0.400390625, "learning_rate": 0.0002997451277571489, "loss": 0.5487, "step": 9195 }, { "epoch": 0.23352921018898098, "grad_norm": 0.392578125, "learning_rate": 0.0002997431879126759, "loss": 0.5084, "step": 9200 }, { "epoch": 0.23365612823799672, "grad_norm": 0.388671875, "learning_rate": 0.00029974124072036434, "loss": 0.51, "step": 9205 }, { "epoch": 0.23378304628701246, "grad_norm": 0.416015625, "learning_rate": 0.0002997392861803097, "loss": 0.5559, "step": 9210 }, { "epoch": 0.23390996433602823, "grad_norm": 0.365234375, "learning_rate": 0.00029973732429260797, "loss": 0.5414, "step": 9215 }, { "epoch": 0.23403688238504397, "grad_norm": 0.357421875, "learning_rate": 0.00029973535505735535, "loss": 0.517, "step": 9220 }, { "epoch": 0.23416380043405974, "grad_norm": 0.361328125, "learning_rate": 0.00029973337847464854, "loss": 0.499, "step": 9225 }, { "epoch": 0.23429071848307548, "grad_norm": 0.376953125, "learning_rate": 0.00029973139454458447, "loss": 0.5294, "step": 9230 }, { "epoch": 0.23441763653209122, "grad_norm": 0.36328125, "learning_rate": 0.0002997294032672605, "loss": 0.5537, "step": 9235 }, { "epoch": 0.23454455458110698, "grad_norm": 0.3671875, "learning_rate": 0.0002997274046427744, "loss": 0.4964, "step": 9240 }, { "epoch": 0.23467147263012272, "grad_norm": 0.361328125, "learning_rate": 0.0002997253986712242, "loss": 0.5132, "step": 9245 }, { "epoch": 0.2347983906791385, "grad_norm": 0.390625, "learning_rate": 0.0002997233853527083, "loss": 0.538, "step": 9250 }, { "epoch": 0.23492530872815423, "grad_norm": 0.361328125, "learning_rate": 0.0002997213646873255, "loss": 0.5387, "step": 9255 }, { "epoch": 0.23505222677716997, "grad_norm": 0.39453125, "learning_rate": 0.000299719336675175, "loss": 0.5522, "step": 9260 }, { "epoch": 0.23517914482618574, "grad_norm": 0.359375, "learning_rate": 0.00029971730131635627, "loss": 0.5177, "step": 9265 }, { "epoch": 0.23530606287520148, "grad_norm": 0.39453125, "learning_rate": 0.00029971525861096927, "loss": 0.5139, "step": 9270 }, { "epoch": 0.23543298092421724, "grad_norm": 0.396484375, "learning_rate": 0.00029971320855911405, "loss": 0.5366, "step": 9275 }, { "epoch": 0.23555989897323298, "grad_norm": 0.39453125, "learning_rate": 0.0002997111511608914, "loss": 0.5219, "step": 9280 }, { "epoch": 0.23568681702224872, "grad_norm": 0.390625, "learning_rate": 0.00029970908641640225, "loss": 0.5645, "step": 9285 }, { "epoch": 0.2358137350712645, "grad_norm": 0.453125, "learning_rate": 0.00029970701432574775, "loss": 0.5063, "step": 9290 }, { "epoch": 0.23594065312028023, "grad_norm": 0.546875, "learning_rate": 0.0002997049348890298, "loss": 0.5154, "step": 9295 }, { "epoch": 0.236067571169296, "grad_norm": 0.390625, "learning_rate": 0.00029970284810635023, "loss": 0.5327, "step": 9300 }, { "epoch": 0.23619448921831174, "grad_norm": 0.375, "learning_rate": 0.0002997007539778116, "loss": 0.5196, "step": 9305 }, { "epoch": 0.23632140726732748, "grad_norm": 0.39453125, "learning_rate": 0.0002996986525035166, "loss": 0.5221, "step": 9310 }, { "epoch": 0.23644832531634324, "grad_norm": 0.41796875, "learning_rate": 0.00029969654368356834, "loss": 0.5238, "step": 9315 }, { "epoch": 0.23657524336535898, "grad_norm": 0.369140625, "learning_rate": 0.0002996944275180703, "loss": 0.527, "step": 9320 }, { "epoch": 0.23670216141437475, "grad_norm": 0.38671875, "learning_rate": 0.00029969230400712633, "loss": 0.5432, "step": 9325 }, { "epoch": 0.2368290794633905, "grad_norm": 0.462890625, "learning_rate": 0.00029969017315084064, "loss": 0.5076, "step": 9330 }, { "epoch": 0.23695599751240623, "grad_norm": 0.392578125, "learning_rate": 0.0002996880349493178, "loss": 0.5122, "step": 9335 }, { "epoch": 0.237082915561422, "grad_norm": 0.37109375, "learning_rate": 0.0002996858894026627, "loss": 0.5312, "step": 9340 }, { "epoch": 0.23720983361043774, "grad_norm": 0.376953125, "learning_rate": 0.0002996837365109806, "loss": 0.5304, "step": 9345 }, { "epoch": 0.2373367516594535, "grad_norm": 0.37109375, "learning_rate": 0.0002996815762743772, "loss": 0.5449, "step": 9350 }, { "epoch": 0.23746366970846924, "grad_norm": 0.359375, "learning_rate": 0.00029967940869295845, "loss": 0.5149, "step": 9355 }, { "epoch": 0.23759058775748498, "grad_norm": 0.359375, "learning_rate": 0.0002996772337668308, "loss": 0.5049, "step": 9360 }, { "epoch": 0.23771750580650075, "grad_norm": 0.3515625, "learning_rate": 0.0002996750514961009, "loss": 0.5435, "step": 9365 }, { "epoch": 0.2378444238555165, "grad_norm": 0.37109375, "learning_rate": 0.0002996728618808758, "loss": 0.5304, "step": 9370 }, { "epoch": 0.23797134190453223, "grad_norm": 0.35546875, "learning_rate": 0.000299670664921263, "loss": 0.5273, "step": 9375 }, { "epoch": 0.238098259953548, "grad_norm": 0.35546875, "learning_rate": 0.0002996684606173703, "loss": 0.4952, "step": 9380 }, { "epoch": 0.23822517800256374, "grad_norm": 0.361328125, "learning_rate": 0.00029966624896930587, "loss": 0.5222, "step": 9385 }, { "epoch": 0.2383520960515795, "grad_norm": 0.375, "learning_rate": 0.00029966402997717823, "loss": 0.5166, "step": 9390 }, { "epoch": 0.23847901410059524, "grad_norm": 0.369140625, "learning_rate": 0.0002996618036410962, "loss": 0.5173, "step": 9395 }, { "epoch": 0.23860593214961098, "grad_norm": 0.345703125, "learning_rate": 0.0002996595699611691, "loss": 0.4855, "step": 9400 }, { "epoch": 0.23873285019862675, "grad_norm": 0.341796875, "learning_rate": 0.00029965732893750657, "loss": 0.5257, "step": 9405 }, { "epoch": 0.2388597682476425, "grad_norm": 0.38671875, "learning_rate": 0.0002996550805702185, "loss": 0.5292, "step": 9410 }, { "epoch": 0.23898668629665826, "grad_norm": 0.3515625, "learning_rate": 0.0002996528248594153, "loss": 0.5256, "step": 9415 }, { "epoch": 0.239113604345674, "grad_norm": 0.353515625, "learning_rate": 0.0002996505618052075, "loss": 0.5117, "step": 9420 }, { "epoch": 0.23924052239468974, "grad_norm": 0.369140625, "learning_rate": 0.00029964829140770625, "loss": 0.5275, "step": 9425 }, { "epoch": 0.2393674404437055, "grad_norm": 0.396484375, "learning_rate": 0.00029964601366702295, "loss": 0.5222, "step": 9430 }, { "epoch": 0.23949435849272124, "grad_norm": 0.365234375, "learning_rate": 0.0002996437285832694, "loss": 0.5115, "step": 9435 }, { "epoch": 0.239621276541737, "grad_norm": 0.357421875, "learning_rate": 0.0002996414361565577, "loss": 0.5168, "step": 9440 }, { "epoch": 0.23974819459075275, "grad_norm": 0.38671875, "learning_rate": 0.0002996391363870003, "loss": 0.5348, "step": 9445 }, { "epoch": 0.2398751126397685, "grad_norm": 0.353515625, "learning_rate": 0.00029963682927471014, "loss": 0.494, "step": 9450 }, { "epoch": 0.24000203068878426, "grad_norm": 0.38671875, "learning_rate": 0.00029963451481980035, "loss": 0.5117, "step": 9455 }, { "epoch": 0.2401289487378, "grad_norm": 0.37109375, "learning_rate": 0.0002996321930223845, "loss": 0.499, "step": 9460 }, { "epoch": 0.24025586678681576, "grad_norm": 0.376953125, "learning_rate": 0.00029962986388257657, "loss": 0.5281, "step": 9465 }, { "epoch": 0.2403827848358315, "grad_norm": 0.357421875, "learning_rate": 0.0002996275274004908, "loss": 0.5406, "step": 9470 }, { "epoch": 0.24050970288484724, "grad_norm": 0.375, "learning_rate": 0.0002996251835762419, "loss": 0.5119, "step": 9475 }, { "epoch": 0.240636620933863, "grad_norm": 0.365234375, "learning_rate": 0.0002996228324099448, "loss": 0.5427, "step": 9480 }, { "epoch": 0.24076353898287875, "grad_norm": 0.34765625, "learning_rate": 0.00029962047390171497, "loss": 0.5177, "step": 9485 }, { "epoch": 0.24089045703189452, "grad_norm": 0.3828125, "learning_rate": 0.00029961810805166807, "loss": 0.5145, "step": 9490 }, { "epoch": 0.24101737508091026, "grad_norm": 0.384765625, "learning_rate": 0.0002996157348599202, "loss": 0.5337, "step": 9495 }, { "epoch": 0.241144293129926, "grad_norm": 0.337890625, "learning_rate": 0.00029961335432658777, "loss": 0.5158, "step": 9500 }, { "epoch": 0.24127121117894176, "grad_norm": 0.34765625, "learning_rate": 0.0002996109664517877, "loss": 0.5221, "step": 9505 }, { "epoch": 0.2413981292279575, "grad_norm": 0.36328125, "learning_rate": 0.00029960857123563706, "loss": 0.5581, "step": 9510 }, { "epoch": 0.24152504727697327, "grad_norm": 0.34375, "learning_rate": 0.00029960616867825344, "loss": 0.519, "step": 9515 }, { "epoch": 0.241651965325989, "grad_norm": 0.40234375, "learning_rate": 0.00029960375877975475, "loss": 0.5478, "step": 9520 }, { "epoch": 0.24177888337500475, "grad_norm": 0.373046875, "learning_rate": 0.00029960134154025916, "loss": 0.4954, "step": 9525 }, { "epoch": 0.24190580142402052, "grad_norm": 0.365234375, "learning_rate": 0.00029959891695988534, "loss": 0.5511, "step": 9530 }, { "epoch": 0.24203271947303626, "grad_norm": 0.369140625, "learning_rate": 0.0002995964850387523, "loss": 0.5283, "step": 9535 }, { "epoch": 0.24215963752205202, "grad_norm": 0.359375, "learning_rate": 0.0002995940457769793, "loss": 0.51, "step": 9540 }, { "epoch": 0.24228655557106776, "grad_norm": 0.361328125, "learning_rate": 0.00029959159917468603, "loss": 0.5425, "step": 9545 }, { "epoch": 0.2424134736200835, "grad_norm": 0.36328125, "learning_rate": 0.0002995891452319926, "loss": 0.5408, "step": 9550 }, { "epoch": 0.24254039166909927, "grad_norm": 0.38671875, "learning_rate": 0.00029958668394901945, "loss": 0.5611, "step": 9555 }, { "epoch": 0.242667309718115, "grad_norm": 0.365234375, "learning_rate": 0.00029958421532588726, "loss": 0.4897, "step": 9560 }, { "epoch": 0.24279422776713078, "grad_norm": 0.33984375, "learning_rate": 0.0002995817393627172, "loss": 0.5186, "step": 9565 }, { "epoch": 0.24292114581614652, "grad_norm": 0.388671875, "learning_rate": 0.0002995792560596308, "loss": 0.5172, "step": 9570 }, { "epoch": 0.24304806386516226, "grad_norm": 0.345703125, "learning_rate": 0.0002995767654167499, "loss": 0.5196, "step": 9575 }, { "epoch": 0.24317498191417802, "grad_norm": 0.38671875, "learning_rate": 0.0002995742674341967, "loss": 0.5282, "step": 9580 }, { "epoch": 0.24330189996319376, "grad_norm": 0.375, "learning_rate": 0.00029957176211209375, "loss": 0.526, "step": 9585 }, { "epoch": 0.2434288180122095, "grad_norm": 0.359375, "learning_rate": 0.000299569249450564, "loss": 0.5154, "step": 9590 }, { "epoch": 0.24355573606122527, "grad_norm": 0.3828125, "learning_rate": 0.0002995667294497308, "loss": 0.5263, "step": 9595 }, { "epoch": 0.243682654110241, "grad_norm": 0.37109375, "learning_rate": 0.00029956420210971774, "loss": 0.5226, "step": 9600 }, { "epoch": 0.24380957215925678, "grad_norm": 0.3671875, "learning_rate": 0.0002995616674306489, "loss": 0.5295, "step": 9605 }, { "epoch": 0.24393649020827252, "grad_norm": 0.37890625, "learning_rate": 0.00029955912541264855, "loss": 0.5534, "step": 9610 }, { "epoch": 0.24406340825728826, "grad_norm": 0.396484375, "learning_rate": 0.00029955657605584157, "loss": 0.5192, "step": 9615 }, { "epoch": 0.24419032630630402, "grad_norm": 0.390625, "learning_rate": 0.0002995540193603529, "loss": 0.5236, "step": 9620 }, { "epoch": 0.24431724435531976, "grad_norm": 0.3671875, "learning_rate": 0.0002995514553263081, "loss": 0.5046, "step": 9625 }, { "epoch": 0.24444416240433553, "grad_norm": 0.37109375, "learning_rate": 0.000299548883953833, "loss": 0.5343, "step": 9630 }, { "epoch": 0.24457108045335127, "grad_norm": 0.353515625, "learning_rate": 0.0002995463052430537, "loss": 0.486, "step": 9635 }, { "epoch": 0.244697998502367, "grad_norm": 0.380859375, "learning_rate": 0.00029954371919409673, "loss": 0.5133, "step": 9640 }, { "epoch": 0.24482491655138278, "grad_norm": 0.376953125, "learning_rate": 0.0002995411258070891, "loss": 0.5293, "step": 9645 }, { "epoch": 0.24495183460039852, "grad_norm": 0.388671875, "learning_rate": 0.00029953852508215795, "loss": 0.5553, "step": 9650 }, { "epoch": 0.24507875264941428, "grad_norm": 0.396484375, "learning_rate": 0.00029953591701943096, "loss": 0.5153, "step": 9655 }, { "epoch": 0.24520567069843002, "grad_norm": 0.34765625, "learning_rate": 0.0002995333016190361, "loss": 0.5228, "step": 9660 }, { "epoch": 0.24533258874744576, "grad_norm": 0.369140625, "learning_rate": 0.00029953067888110167, "loss": 0.5245, "step": 9665 }, { "epoch": 0.24545950679646153, "grad_norm": 0.359375, "learning_rate": 0.0002995280488057564, "loss": 0.5209, "step": 9670 }, { "epoch": 0.24558642484547727, "grad_norm": 0.400390625, "learning_rate": 0.00029952541139312934, "loss": 0.5316, "step": 9675 }, { "epoch": 0.24571334289449304, "grad_norm": 0.39453125, "learning_rate": 0.0002995227666433499, "loss": 0.546, "step": 9680 }, { "epoch": 0.24584026094350878, "grad_norm": 0.400390625, "learning_rate": 0.0002995201145565479, "loss": 0.5034, "step": 9685 }, { "epoch": 0.24596717899252452, "grad_norm": 0.37890625, "learning_rate": 0.0002995174551328534, "loss": 0.5189, "step": 9690 }, { "epoch": 0.24609409704154028, "grad_norm": 0.388671875, "learning_rate": 0.00029951478837239695, "loss": 0.4971, "step": 9695 }, { "epoch": 0.24622101509055602, "grad_norm": 0.36328125, "learning_rate": 0.00029951211427530936, "loss": 0.5168, "step": 9700 }, { "epoch": 0.2463479331395718, "grad_norm": 0.39453125, "learning_rate": 0.00029950943284172193, "loss": 0.5353, "step": 9705 }, { "epoch": 0.24647485118858753, "grad_norm": 0.376953125, "learning_rate": 0.0002995067440717661, "loss": 0.5447, "step": 9710 }, { "epoch": 0.24660176923760327, "grad_norm": 0.369140625, "learning_rate": 0.000299504047965574, "loss": 0.5288, "step": 9715 }, { "epoch": 0.24672868728661904, "grad_norm": 0.37890625, "learning_rate": 0.0002995013445232778, "loss": 0.5252, "step": 9720 }, { "epoch": 0.24685560533563478, "grad_norm": 0.345703125, "learning_rate": 0.0002994986337450102, "loss": 0.5322, "step": 9725 }, { "epoch": 0.24698252338465054, "grad_norm": 0.361328125, "learning_rate": 0.0002994959156309041, "loss": 0.5321, "step": 9730 }, { "epoch": 0.24710944143366628, "grad_norm": 0.361328125, "learning_rate": 0.000299493190181093, "loss": 0.5195, "step": 9735 }, { "epoch": 0.24723635948268202, "grad_norm": 0.369140625, "learning_rate": 0.00029949045739571063, "loss": 0.5444, "step": 9740 }, { "epoch": 0.2473632775316978, "grad_norm": 0.361328125, "learning_rate": 0.00029948771727489104, "loss": 0.5257, "step": 9745 }, { "epoch": 0.24749019558071353, "grad_norm": 0.3671875, "learning_rate": 0.00029948496981876873, "loss": 0.4611, "step": 9750 }, { "epoch": 0.2476171136297293, "grad_norm": 0.357421875, "learning_rate": 0.00029948221502747847, "loss": 0.5104, "step": 9755 }, { "epoch": 0.24774403167874504, "grad_norm": 0.365234375, "learning_rate": 0.0002994794529011555, "loss": 0.4973, "step": 9760 }, { "epoch": 0.24787094972776078, "grad_norm": 0.34765625, "learning_rate": 0.0002994766834399353, "loss": 0.5258, "step": 9765 }, { "epoch": 0.24799786777677654, "grad_norm": 0.37890625, "learning_rate": 0.0002994739066439538, "loss": 0.5434, "step": 9770 }, { "epoch": 0.24812478582579228, "grad_norm": 0.310546875, "learning_rate": 0.0002994711225133472, "loss": 0.4886, "step": 9775 }, { "epoch": 0.24825170387480805, "grad_norm": 0.37109375, "learning_rate": 0.00029946833104825223, "loss": 0.5346, "step": 9780 }, { "epoch": 0.2483786219238238, "grad_norm": 0.37890625, "learning_rate": 0.00029946553224880575, "loss": 0.5304, "step": 9785 }, { "epoch": 0.24850553997283953, "grad_norm": 0.37890625, "learning_rate": 0.0002994627261151452, "loss": 0.5273, "step": 9790 }, { "epoch": 0.2486324580218553, "grad_norm": 0.328125, "learning_rate": 0.00029945991264740813, "loss": 0.4962, "step": 9795 }, { "epoch": 0.24875937607087104, "grad_norm": 0.380859375, "learning_rate": 0.0002994570918457327, "loss": 0.5367, "step": 9800 }, { "epoch": 0.2488862941198868, "grad_norm": 0.359375, "learning_rate": 0.00029945426371025735, "loss": 0.5027, "step": 9805 }, { "epoch": 0.24901321216890254, "grad_norm": 0.3671875, "learning_rate": 0.0002994514282411208, "loss": 0.5259, "step": 9810 }, { "epoch": 0.24914013021791828, "grad_norm": 0.369140625, "learning_rate": 0.00029944858543846215, "loss": 0.5251, "step": 9815 }, { "epoch": 0.24926704826693405, "grad_norm": 0.359375, "learning_rate": 0.000299445735302421, "loss": 0.504, "step": 9820 }, { "epoch": 0.2493939663159498, "grad_norm": 0.396484375, "learning_rate": 0.00029944287783313715, "loss": 0.5217, "step": 9825 }, { "epoch": 0.24952088436496553, "grad_norm": 0.349609375, "learning_rate": 0.0002994400130307508, "loss": 0.5232, "step": 9830 }, { "epoch": 0.2496478024139813, "grad_norm": 0.39453125, "learning_rate": 0.0002994371408954025, "loss": 0.524, "step": 9835 }, { "epoch": 0.24977472046299704, "grad_norm": 0.34765625, "learning_rate": 0.0002994342614272333, "loss": 0.5079, "step": 9840 }, { "epoch": 0.2499016385120128, "grad_norm": 0.384765625, "learning_rate": 0.00029943137462638434, "loss": 0.5152, "step": 9845 }, { "epoch": 0.25002855656102857, "grad_norm": 0.353515625, "learning_rate": 0.0002994284804929974, "loss": 0.4897, "step": 9850 }, { "epoch": 0.2501554746100443, "grad_norm": 0.369140625, "learning_rate": 0.0002994255790272144, "loss": 0.5011, "step": 9855 }, { "epoch": 0.25028239265906005, "grad_norm": 0.365234375, "learning_rate": 0.0002994226702291778, "loss": 0.5161, "step": 9860 }, { "epoch": 0.2504093107080758, "grad_norm": 0.369140625, "learning_rate": 0.00029941975409903033, "loss": 0.5071, "step": 9865 }, { "epoch": 0.25053622875709153, "grad_norm": 0.375, "learning_rate": 0.000299416830636915, "loss": 0.538, "step": 9870 }, { "epoch": 0.2506631468061073, "grad_norm": 0.353515625, "learning_rate": 0.00029941389984297525, "loss": 0.528, "step": 9875 }, { "epoch": 0.25079006485512306, "grad_norm": 0.375, "learning_rate": 0.00029941096171735507, "loss": 0.5169, "step": 9880 }, { "epoch": 0.2509169829041388, "grad_norm": 0.365234375, "learning_rate": 0.0002994080162601984, "loss": 0.5043, "step": 9885 }, { "epoch": 0.25104390095315454, "grad_norm": 0.36328125, "learning_rate": 0.00029940506347164996, "loss": 0.5108, "step": 9890 }, { "epoch": 0.2511708190021703, "grad_norm": 0.345703125, "learning_rate": 0.00029940210335185457, "loss": 0.5115, "step": 9895 }, { "epoch": 0.2512977370511861, "grad_norm": 0.37109375, "learning_rate": 0.0002993991359009575, "loss": 0.5379, "step": 9900 }, { "epoch": 0.2514246551002018, "grad_norm": 0.36328125, "learning_rate": 0.0002993961611191043, "loss": 0.5189, "step": 9905 }, { "epoch": 0.25155157314921756, "grad_norm": 0.38671875, "learning_rate": 0.000299393179006441, "loss": 0.5005, "step": 9910 }, { "epoch": 0.2516784911982333, "grad_norm": 0.373046875, "learning_rate": 0.00029939018956311387, "loss": 0.5049, "step": 9915 }, { "epoch": 0.25180540924724903, "grad_norm": 0.39453125, "learning_rate": 0.0002993871927892697, "loss": 0.5439, "step": 9920 }, { "epoch": 0.2519323272962648, "grad_norm": 0.384765625, "learning_rate": 0.00029938418868505546, "loss": 0.5567, "step": 9925 }, { "epoch": 0.25205924534528057, "grad_norm": 0.408203125, "learning_rate": 0.0002993811772506186, "loss": 0.5219, "step": 9930 }, { "epoch": 0.2521861633942963, "grad_norm": 0.36328125, "learning_rate": 0.0002993781584861069, "loss": 0.4877, "step": 9935 }, { "epoch": 0.25231308144331205, "grad_norm": 0.37109375, "learning_rate": 0.00029937513239166843, "loss": 0.5046, "step": 9940 }, { "epoch": 0.2524399994923278, "grad_norm": 0.375, "learning_rate": 0.0002993720989674517, "loss": 0.5169, "step": 9945 }, { "epoch": 0.2525669175413435, "grad_norm": 0.341796875, "learning_rate": 0.00029936905821360566, "loss": 0.4873, "step": 9950 }, { "epoch": 0.2526938355903593, "grad_norm": 0.357421875, "learning_rate": 0.00029936601013027934, "loss": 0.5158, "step": 9955 }, { "epoch": 0.25282075363937506, "grad_norm": 0.361328125, "learning_rate": 0.00029936295471762246, "loss": 0.5008, "step": 9960 }, { "epoch": 0.25294767168839083, "grad_norm": 0.359375, "learning_rate": 0.00029935989197578486, "loss": 0.5201, "step": 9965 }, { "epoch": 0.25307458973740654, "grad_norm": 0.36328125, "learning_rate": 0.00029935682190491687, "loss": 0.5192, "step": 9970 }, { "epoch": 0.2532015077864223, "grad_norm": 0.390625, "learning_rate": 0.00029935374450516914, "loss": 0.543, "step": 9975 }, { "epoch": 0.2533284258354381, "grad_norm": 0.328125, "learning_rate": 0.0002993506597766926, "loss": 0.4409, "step": 9980 }, { "epoch": 0.2534553438844538, "grad_norm": 0.361328125, "learning_rate": 0.0002993475677196387, "loss": 0.5186, "step": 9985 }, { "epoch": 0.25358226193346955, "grad_norm": 0.361328125, "learning_rate": 0.0002993444683341592, "loss": 0.4951, "step": 9990 }, { "epoch": 0.2537091799824853, "grad_norm": 0.3828125, "learning_rate": 0.0002993413616204061, "loss": 0.5063, "step": 9995 }, { "epoch": 0.25383609803150103, "grad_norm": 0.302734375, "learning_rate": 0.00029933824757853185, "loss": 0.4975, "step": 10000 }, { "epoch": 0.2539630160805168, "grad_norm": 0.375, "learning_rate": 0.0002993351262086893, "loss": 0.5347, "step": 10005 }, { "epoch": 0.25408993412953257, "grad_norm": 0.365234375, "learning_rate": 0.0002993319975110316, "loss": 0.5373, "step": 10010 }, { "epoch": 0.25421685217854834, "grad_norm": 0.359375, "learning_rate": 0.0002993288614857122, "loss": 0.5229, "step": 10015 }, { "epoch": 0.25434377022756405, "grad_norm": 0.369140625, "learning_rate": 0.0002993257181328851, "loss": 0.4872, "step": 10020 }, { "epoch": 0.2544706882765798, "grad_norm": 0.365234375, "learning_rate": 0.0002993225674527045, "loss": 0.4657, "step": 10025 }, { "epoch": 0.2545976063255956, "grad_norm": 0.416015625, "learning_rate": 0.000299319409445325, "loss": 0.5215, "step": 10030 }, { "epoch": 0.2547245243746113, "grad_norm": 0.359375, "learning_rate": 0.00029931624411090157, "loss": 0.4802, "step": 10035 }, { "epoch": 0.25485144242362706, "grad_norm": 0.353515625, "learning_rate": 0.00029931307144958946, "loss": 0.4943, "step": 10040 }, { "epoch": 0.25497836047264283, "grad_norm": 0.3671875, "learning_rate": 0.0002993098914615445, "loss": 0.5143, "step": 10045 }, { "epoch": 0.25510527852165854, "grad_norm": 0.359375, "learning_rate": 0.00029930670414692257, "loss": 0.4956, "step": 10050 }, { "epoch": 0.2552321965706743, "grad_norm": 0.36328125, "learning_rate": 0.00029930350950588014, "loss": 0.4776, "step": 10055 }, { "epoch": 0.2553591146196901, "grad_norm": 0.349609375, "learning_rate": 0.00029930030753857404, "loss": 0.5101, "step": 10060 }, { "epoch": 0.25548603266870584, "grad_norm": 0.361328125, "learning_rate": 0.00029929709824516126, "loss": 0.5106, "step": 10065 }, { "epoch": 0.25561295071772155, "grad_norm": 0.408203125, "learning_rate": 0.0002992938816257994, "loss": 0.5048, "step": 10070 }, { "epoch": 0.2557398687667373, "grad_norm": 0.36328125, "learning_rate": 0.0002992906576806462, "loss": 0.5032, "step": 10075 }, { "epoch": 0.2558667868157531, "grad_norm": 0.361328125, "learning_rate": 0.00029928742640985986, "loss": 0.5161, "step": 10080 }, { "epoch": 0.2559937048647688, "grad_norm": 0.361328125, "learning_rate": 0.000299284187813599, "loss": 0.5049, "step": 10085 }, { "epoch": 0.25612062291378457, "grad_norm": 0.384765625, "learning_rate": 0.00029928094189202257, "loss": 0.5451, "step": 10090 }, { "epoch": 0.25624754096280034, "grad_norm": 0.341796875, "learning_rate": 0.0002992776886452897, "loss": 0.5175, "step": 10095 }, { "epoch": 0.25637445901181605, "grad_norm": 0.380859375, "learning_rate": 0.00029927442807356014, "loss": 0.5099, "step": 10100 }, { "epoch": 0.2565013770608318, "grad_norm": 0.384765625, "learning_rate": 0.0002992711601769939, "loss": 0.5218, "step": 10105 }, { "epoch": 0.2566282951098476, "grad_norm": 0.34765625, "learning_rate": 0.00029926788495575124, "loss": 0.5244, "step": 10110 }, { "epoch": 0.25675521315886335, "grad_norm": 0.365234375, "learning_rate": 0.0002992646024099929, "loss": 0.5243, "step": 10115 }, { "epoch": 0.25688213120787906, "grad_norm": 0.3671875, "learning_rate": 0.00029926131253988004, "loss": 0.5213, "step": 10120 }, { "epoch": 0.25700904925689483, "grad_norm": 0.38671875, "learning_rate": 0.00029925801534557404, "loss": 0.5108, "step": 10125 }, { "epoch": 0.2571359673059106, "grad_norm": 0.322265625, "learning_rate": 0.00029925471082723664, "loss": 0.4882, "step": 10130 }, { "epoch": 0.2572628853549263, "grad_norm": 0.365234375, "learning_rate": 0.00029925139898503, "loss": 0.5017, "step": 10135 }, { "epoch": 0.2573898034039421, "grad_norm": 0.37109375, "learning_rate": 0.00029924807981911676, "loss": 0.5361, "step": 10140 }, { "epoch": 0.25751672145295784, "grad_norm": 0.361328125, "learning_rate": 0.00029924475332965966, "loss": 0.4912, "step": 10145 }, { "epoch": 0.25764363950197355, "grad_norm": 0.3671875, "learning_rate": 0.00029924141951682195, "loss": 0.522, "step": 10150 }, { "epoch": 0.2577705575509893, "grad_norm": 0.359375, "learning_rate": 0.00029923807838076716, "loss": 0.519, "step": 10155 }, { "epoch": 0.2578974756000051, "grad_norm": 0.34375, "learning_rate": 0.0002992347299216594, "loss": 0.4557, "step": 10160 }, { "epoch": 0.2580243936490208, "grad_norm": 0.333984375, "learning_rate": 0.0002992313741396629, "loss": 0.5084, "step": 10165 }, { "epoch": 0.25815131169803657, "grad_norm": 0.36328125, "learning_rate": 0.0002992280110349423, "loss": 0.5153, "step": 10170 }, { "epoch": 0.25827822974705233, "grad_norm": 0.34765625, "learning_rate": 0.0002992246406076626, "loss": 0.5038, "step": 10175 }, { "epoch": 0.2584051477960681, "grad_norm": 0.3828125, "learning_rate": 0.0002992212628579893, "loss": 0.5105, "step": 10180 }, { "epoch": 0.2585320658450838, "grad_norm": 0.365234375, "learning_rate": 0.000299217877786088, "loss": 0.511, "step": 10185 }, { "epoch": 0.2586589838940996, "grad_norm": 0.345703125, "learning_rate": 0.0002992144853921249, "loss": 0.5153, "step": 10190 }, { "epoch": 0.25878590194311535, "grad_norm": 0.376953125, "learning_rate": 0.0002992110856762665, "loss": 0.4976, "step": 10195 }, { "epoch": 0.25891281999213106, "grad_norm": 0.349609375, "learning_rate": 0.0002992076786386795, "loss": 0.4756, "step": 10200 }, { "epoch": 0.2590397380411468, "grad_norm": 0.357421875, "learning_rate": 0.00029920426427953116, "loss": 0.5009, "step": 10205 }, { "epoch": 0.2591666560901626, "grad_norm": 0.37890625, "learning_rate": 0.00029920084259898896, "loss": 0.4828, "step": 10210 }, { "epoch": 0.2592935741391783, "grad_norm": 0.36328125, "learning_rate": 0.00029919741359722094, "loss": 0.4988, "step": 10215 }, { "epoch": 0.2594204921881941, "grad_norm": 0.392578125, "learning_rate": 0.00029919397727439515, "loss": 0.5377, "step": 10220 }, { "epoch": 0.25954741023720984, "grad_norm": 0.3984375, "learning_rate": 0.0002991905336306804, "loss": 0.502, "step": 10225 }, { "epoch": 0.2596743282862256, "grad_norm": 0.35546875, "learning_rate": 0.0002991870826662456, "loss": 0.5511, "step": 10230 }, { "epoch": 0.2598012463352413, "grad_norm": 0.37890625, "learning_rate": 0.00029918362438126003, "loss": 0.5486, "step": 10235 }, { "epoch": 0.2599281643842571, "grad_norm": 0.361328125, "learning_rate": 0.0002991801587758935, "loss": 0.4867, "step": 10240 }, { "epoch": 0.26005508243327286, "grad_norm": 0.34765625, "learning_rate": 0.000299176685850316, "loss": 0.4925, "step": 10245 }, { "epoch": 0.26018200048228857, "grad_norm": 0.353515625, "learning_rate": 0.0002991732056046979, "loss": 0.4951, "step": 10250 }, { "epoch": 0.26030891853130433, "grad_norm": 0.3984375, "learning_rate": 0.00029916971803921, "loss": 0.5259, "step": 10255 }, { "epoch": 0.2604358365803201, "grad_norm": 0.37890625, "learning_rate": 0.00029916622315402354, "loss": 0.5078, "step": 10260 }, { "epoch": 0.2605627546293358, "grad_norm": 0.369140625, "learning_rate": 0.00029916272094930986, "loss": 0.4919, "step": 10265 }, { "epoch": 0.2606896726783516, "grad_norm": 0.408203125, "learning_rate": 0.00029915921142524093, "loss": 0.5159, "step": 10270 }, { "epoch": 0.26081659072736735, "grad_norm": 0.390625, "learning_rate": 0.0002991556945819889, "loss": 0.5388, "step": 10275 }, { "epoch": 0.2609435087763831, "grad_norm": 0.375, "learning_rate": 0.00029915217041972636, "loss": 0.48, "step": 10280 }, { "epoch": 0.2610704268253988, "grad_norm": 0.392578125, "learning_rate": 0.0002991486389386262, "loss": 0.5001, "step": 10285 }, { "epoch": 0.2611973448744146, "grad_norm": 0.373046875, "learning_rate": 0.0002991451001388618, "loss": 0.4952, "step": 10290 }, { "epoch": 0.26132426292343036, "grad_norm": 0.369140625, "learning_rate": 0.0002991415540206067, "loss": 0.5075, "step": 10295 }, { "epoch": 0.2614511809724461, "grad_norm": 0.361328125, "learning_rate": 0.0002991380005840349, "loss": 0.5116, "step": 10300 }, { "epoch": 0.26157809902146184, "grad_norm": 0.365234375, "learning_rate": 0.00029913443982932086, "loss": 0.4966, "step": 10305 }, { "epoch": 0.2617050170704776, "grad_norm": 0.353515625, "learning_rate": 0.0002991308717566393, "loss": 0.5007, "step": 10310 }, { "epoch": 0.2618319351194933, "grad_norm": 0.404296875, "learning_rate": 0.0002991272963661653, "loss": 0.5234, "step": 10315 }, { "epoch": 0.2619588531685091, "grad_norm": 0.34765625, "learning_rate": 0.00029912371365807414, "loss": 0.4787, "step": 10320 }, { "epoch": 0.26208577121752485, "grad_norm": 0.373046875, "learning_rate": 0.00029912012363254186, "loss": 0.5255, "step": 10325 }, { "epoch": 0.2622126892665406, "grad_norm": 0.392578125, "learning_rate": 0.0002991165262897445, "loss": 0.5149, "step": 10330 }, { "epoch": 0.26233960731555633, "grad_norm": 0.380859375, "learning_rate": 0.00029911292162985856, "loss": 0.5029, "step": 10335 }, { "epoch": 0.2624665253645721, "grad_norm": 0.373046875, "learning_rate": 0.00029910930965306094, "loss": 0.522, "step": 10340 }, { "epoch": 0.26259344341358787, "grad_norm": 0.3515625, "learning_rate": 0.00029910569035952896, "loss": 0.5032, "step": 10345 }, { "epoch": 0.2627203614626036, "grad_norm": 0.3671875, "learning_rate": 0.00029910206374944006, "loss": 0.5004, "step": 10350 }, { "epoch": 0.26284727951161935, "grad_norm": 0.380859375, "learning_rate": 0.0002990984298229724, "loss": 0.5218, "step": 10355 }, { "epoch": 0.2629741975606351, "grad_norm": 0.3984375, "learning_rate": 0.0002990947885803041, "loss": 0.5266, "step": 10360 }, { "epoch": 0.2631011156096508, "grad_norm": 0.33984375, "learning_rate": 0.0002990911400216139, "loss": 0.4976, "step": 10365 }, { "epoch": 0.2632280336586666, "grad_norm": 0.35546875, "learning_rate": 0.0002990874841470809, "loss": 0.4989, "step": 10370 }, { "epoch": 0.26335495170768236, "grad_norm": 0.35546875, "learning_rate": 0.00029908382095688446, "loss": 0.519, "step": 10375 }, { "epoch": 0.2634818697566981, "grad_norm": 0.365234375, "learning_rate": 0.00029908015045120425, "loss": 0.4887, "step": 10380 }, { "epoch": 0.26360878780571384, "grad_norm": 0.384765625, "learning_rate": 0.00029907647263022045, "loss": 0.5082, "step": 10385 }, { "epoch": 0.2637357058547296, "grad_norm": 0.357421875, "learning_rate": 0.00029907278749411354, "loss": 0.5127, "step": 10390 }, { "epoch": 0.2638626239037454, "grad_norm": 0.390625, "learning_rate": 0.0002990690950430643, "loss": 0.5379, "step": 10395 }, { "epoch": 0.2639895419527611, "grad_norm": 0.37890625, "learning_rate": 0.00029906539527725396, "loss": 0.5032, "step": 10400 }, { "epoch": 0.26411646000177685, "grad_norm": 0.34765625, "learning_rate": 0.0002990616881968641, "loss": 0.5321, "step": 10405 }, { "epoch": 0.2642433780507926, "grad_norm": 0.359375, "learning_rate": 0.00029905797380207655, "loss": 0.5126, "step": 10410 }, { "epoch": 0.26437029609980833, "grad_norm": 0.38671875, "learning_rate": 0.00029905425209307356, "loss": 0.5204, "step": 10415 }, { "epoch": 0.2644972141488241, "grad_norm": 0.34375, "learning_rate": 0.00029905052307003784, "loss": 0.4993, "step": 10420 }, { "epoch": 0.26462413219783987, "grad_norm": 0.47265625, "learning_rate": 0.00029904678673315234, "loss": 0.5137, "step": 10425 }, { "epoch": 0.2647510502468556, "grad_norm": 0.376953125, "learning_rate": 0.00029904304308260034, "loss": 0.5094, "step": 10430 }, { "epoch": 0.26487796829587135, "grad_norm": 0.333984375, "learning_rate": 0.00029903929211856563, "loss": 0.5333, "step": 10435 }, { "epoch": 0.2650048863448871, "grad_norm": 0.353515625, "learning_rate": 0.0002990355338412322, "loss": 0.4975, "step": 10440 }, { "epoch": 0.2651318043939029, "grad_norm": 0.384765625, "learning_rate": 0.0002990317682507845, "loss": 0.4935, "step": 10445 }, { "epoch": 0.2652587224429186, "grad_norm": 0.375, "learning_rate": 0.00029902799534740725, "loss": 0.51, "step": 10450 }, { "epoch": 0.26538564049193436, "grad_norm": 0.37890625, "learning_rate": 0.00029902421513128567, "loss": 0.5082, "step": 10455 }, { "epoch": 0.2655125585409501, "grad_norm": 0.341796875, "learning_rate": 0.0002990204276026052, "loss": 0.4981, "step": 10460 }, { "epoch": 0.26563947658996584, "grad_norm": 0.35546875, "learning_rate": 0.00029901663276155174, "loss": 0.5081, "step": 10465 }, { "epoch": 0.2657663946389816, "grad_norm": 0.341796875, "learning_rate": 0.00029901283060831144, "loss": 0.4943, "step": 10470 }, { "epoch": 0.2658933126879974, "grad_norm": 0.5625, "learning_rate": 0.00029900902114307095, "loss": 0.5207, "step": 10475 }, { "epoch": 0.2660202307370131, "grad_norm": 0.3359375, "learning_rate": 0.0002990052043660171, "loss": 0.4686, "step": 10480 }, { "epoch": 0.26614714878602885, "grad_norm": 0.3515625, "learning_rate": 0.00029900138027733725, "loss": 0.5174, "step": 10485 }, { "epoch": 0.2662740668350446, "grad_norm": 0.36328125, "learning_rate": 0.000298997548877219, "loss": 0.5033, "step": 10490 }, { "epoch": 0.2664009848840604, "grad_norm": 0.353515625, "learning_rate": 0.0002989937101658504, "loss": 0.4918, "step": 10495 }, { "epoch": 0.2665279029330761, "grad_norm": 0.37890625, "learning_rate": 0.0002989898641434198, "loss": 0.5, "step": 10500 }, { "epoch": 0.26665482098209187, "grad_norm": 0.369140625, "learning_rate": 0.0002989860108101159, "loss": 0.5341, "step": 10505 }, { "epoch": 0.26678173903110763, "grad_norm": 0.357421875, "learning_rate": 0.0002989821501661278, "loss": 0.5021, "step": 10510 }, { "epoch": 0.26690865708012335, "grad_norm": 0.37890625, "learning_rate": 0.00029897828221164493, "loss": 0.5117, "step": 10515 }, { "epoch": 0.2670355751291391, "grad_norm": 0.37890625, "learning_rate": 0.00029897440694685705, "loss": 0.5474, "step": 10520 }, { "epoch": 0.2671624931781549, "grad_norm": 0.376953125, "learning_rate": 0.00029897052437195446, "loss": 0.513, "step": 10525 }, { "epoch": 0.2672894112271706, "grad_norm": 0.373046875, "learning_rate": 0.0002989666344871275, "loss": 0.5494, "step": 10530 }, { "epoch": 0.26741632927618636, "grad_norm": 0.357421875, "learning_rate": 0.00029896273729256717, "loss": 0.5111, "step": 10535 }, { "epoch": 0.2675432473252021, "grad_norm": 0.357421875, "learning_rate": 0.00029895883278846465, "loss": 0.5007, "step": 10540 }, { "epoch": 0.2676701653742179, "grad_norm": 0.35546875, "learning_rate": 0.0002989549209750115, "loss": 0.5044, "step": 10545 }, { "epoch": 0.2677970834232336, "grad_norm": 0.384765625, "learning_rate": 0.0002989510018523998, "loss": 0.5098, "step": 10550 }, { "epoch": 0.2679240014722494, "grad_norm": 0.3515625, "learning_rate": 0.00029894707542082175, "loss": 0.5232, "step": 10555 }, { "epoch": 0.26805091952126514, "grad_norm": 0.369140625, "learning_rate": 0.00029894314168047, "loss": 0.5146, "step": 10560 }, { "epoch": 0.26817783757028085, "grad_norm": 0.333984375, "learning_rate": 0.0002989392006315376, "loss": 0.4927, "step": 10565 }, { "epoch": 0.2683047556192966, "grad_norm": 0.380859375, "learning_rate": 0.000298935252274218, "loss": 0.529, "step": 10570 }, { "epoch": 0.2684316736683124, "grad_norm": 0.37890625, "learning_rate": 0.0002989312966087049, "loss": 0.5402, "step": 10575 }, { "epoch": 0.2685585917173281, "grad_norm": 0.390625, "learning_rate": 0.00029892733363519233, "loss": 0.5455, "step": 10580 }, { "epoch": 0.26868550976634387, "grad_norm": 0.349609375, "learning_rate": 0.0002989233633538749, "loss": 0.5264, "step": 10585 }, { "epoch": 0.26881242781535963, "grad_norm": 0.357421875, "learning_rate": 0.00029891938576494736, "loss": 0.5286, "step": 10590 }, { "epoch": 0.26893934586437535, "grad_norm": 0.3828125, "learning_rate": 0.0002989154008686048, "loss": 0.4978, "step": 10595 }, { "epoch": 0.2690662639133911, "grad_norm": 0.349609375, "learning_rate": 0.0002989114086650429, "loss": 0.4862, "step": 10600 }, { "epoch": 0.2691931819624069, "grad_norm": 0.345703125, "learning_rate": 0.0002989074091544575, "loss": 0.4824, "step": 10605 }, { "epoch": 0.26932010001142265, "grad_norm": 0.34765625, "learning_rate": 0.00029890340233704486, "loss": 0.5215, "step": 10610 }, { "epoch": 0.26944701806043836, "grad_norm": 0.37109375, "learning_rate": 0.00029889938821300155, "loss": 0.5036, "step": 10615 }, { "epoch": 0.2695739361094541, "grad_norm": 0.3671875, "learning_rate": 0.0002988953667825245, "loss": 0.502, "step": 10620 }, { "epoch": 0.2697008541584699, "grad_norm": 0.3828125, "learning_rate": 0.00029889133804581125, "loss": 0.4951, "step": 10625 }, { "epoch": 0.2698277722074856, "grad_norm": 0.37890625, "learning_rate": 0.0002988873020030593, "loss": 0.5126, "step": 10630 }, { "epoch": 0.2699546902565014, "grad_norm": 0.4140625, "learning_rate": 0.0002988832586544668, "loss": 0.5526, "step": 10635 }, { "epoch": 0.27008160830551714, "grad_norm": 0.357421875, "learning_rate": 0.000298879208000232, "loss": 0.4925, "step": 10640 }, { "epoch": 0.27020852635453285, "grad_norm": 0.34765625, "learning_rate": 0.00029887515004055375, "loss": 0.5104, "step": 10645 }, { "epoch": 0.2703354444035486, "grad_norm": 0.3671875, "learning_rate": 0.00029887108477563126, "loss": 0.4955, "step": 10650 }, { "epoch": 0.2704623624525644, "grad_norm": 0.3515625, "learning_rate": 0.00029886701220566397, "loss": 0.5336, "step": 10655 }, { "epoch": 0.27058928050158015, "grad_norm": 0.345703125, "learning_rate": 0.0002988629323308516, "loss": 0.4603, "step": 10660 }, { "epoch": 0.27071619855059587, "grad_norm": 0.34375, "learning_rate": 0.00029885884515139455, "loss": 0.4998, "step": 10665 }, { "epoch": 0.27084311659961163, "grad_norm": 0.328125, "learning_rate": 0.0002988547506674932, "loss": 0.4615, "step": 10670 }, { "epoch": 0.2709700346486274, "grad_norm": 0.328125, "learning_rate": 0.00029885064887934855, "loss": 0.4881, "step": 10675 }, { "epoch": 0.2710969526976431, "grad_norm": 0.341796875, "learning_rate": 0.0002988465397871618, "loss": 0.5008, "step": 10680 }, { "epoch": 0.2712238707466589, "grad_norm": 0.380859375, "learning_rate": 0.0002988424233911347, "loss": 0.5078, "step": 10685 }, { "epoch": 0.27135078879567465, "grad_norm": 0.36328125, "learning_rate": 0.0002988382996914692, "loss": 0.4788, "step": 10690 }, { "epoch": 0.27147770684469036, "grad_norm": 0.353515625, "learning_rate": 0.0002988341686883675, "loss": 0.5031, "step": 10695 }, { "epoch": 0.2716046248937061, "grad_norm": 0.359375, "learning_rate": 0.00029883003038203254, "loss": 0.5015, "step": 10700 }, { "epoch": 0.2717315429427219, "grad_norm": 0.3671875, "learning_rate": 0.00029882588477266725, "loss": 0.4914, "step": 10705 }, { "epoch": 0.27185846099173766, "grad_norm": 0.3515625, "learning_rate": 0.00029882173186047506, "loss": 0.5231, "step": 10710 }, { "epoch": 0.27198537904075337, "grad_norm": 0.373046875, "learning_rate": 0.0002988175716456598, "loss": 0.5143, "step": 10715 }, { "epoch": 0.27211229708976914, "grad_norm": 0.37890625, "learning_rate": 0.00029881340412842553, "loss": 0.4906, "step": 10720 }, { "epoch": 0.2722392151387849, "grad_norm": 0.36328125, "learning_rate": 0.0002988092293089768, "loss": 0.4825, "step": 10725 }, { "epoch": 0.2723661331878006, "grad_norm": 0.337890625, "learning_rate": 0.0002988050471875185, "loss": 0.5012, "step": 10730 }, { "epoch": 0.2724930512368164, "grad_norm": 0.384765625, "learning_rate": 0.0002988008577642558, "loss": 0.4996, "step": 10735 }, { "epoch": 0.27261996928583215, "grad_norm": 0.365234375, "learning_rate": 0.0002987966610393943, "loss": 0.4753, "step": 10740 }, { "epoch": 0.27274688733484787, "grad_norm": 0.375, "learning_rate": 0.0002987924570131399, "loss": 0.5273, "step": 10745 }, { "epoch": 0.27287380538386363, "grad_norm": 0.345703125, "learning_rate": 0.00029878824568569886, "loss": 0.4869, "step": 10750 }, { "epoch": 0.2730007234328794, "grad_norm": 0.359375, "learning_rate": 0.0002987840270572779, "loss": 0.5178, "step": 10755 }, { "epoch": 0.27312764148189517, "grad_norm": 0.37109375, "learning_rate": 0.00029877980112808406, "loss": 0.4841, "step": 10760 }, { "epoch": 0.2732545595309109, "grad_norm": 0.375, "learning_rate": 0.00029877556789832454, "loss": 0.5152, "step": 10765 }, { "epoch": 0.27338147757992665, "grad_norm": 0.36328125, "learning_rate": 0.0002987713273682072, "loss": 0.5345, "step": 10770 }, { "epoch": 0.2735083956289424, "grad_norm": 0.359375, "learning_rate": 0.0002987670795379401, "loss": 0.5419, "step": 10775 }, { "epoch": 0.2736353136779581, "grad_norm": 0.369140625, "learning_rate": 0.0002987628244077317, "loss": 0.5146, "step": 10780 }, { "epoch": 0.2737622317269739, "grad_norm": 0.349609375, "learning_rate": 0.00029875856197779073, "loss": 0.4744, "step": 10785 }, { "epoch": 0.27388914977598966, "grad_norm": 0.357421875, "learning_rate": 0.00029875429224832643, "loss": 0.4939, "step": 10790 }, { "epoch": 0.27401606782500537, "grad_norm": 0.3984375, "learning_rate": 0.0002987500152195482, "loss": 0.4924, "step": 10795 }, { "epoch": 0.27414298587402114, "grad_norm": 0.3515625, "learning_rate": 0.000298745730891666, "loss": 0.4695, "step": 10800 }, { "epoch": 0.2742699039230369, "grad_norm": 0.345703125, "learning_rate": 0.00029874143926489, "loss": 0.4947, "step": 10805 }, { "epoch": 0.2743968219720526, "grad_norm": 0.365234375, "learning_rate": 0.0002987371403394309, "loss": 0.5163, "step": 10810 }, { "epoch": 0.2745237400210684, "grad_norm": 0.34375, "learning_rate": 0.00029873283411549956, "loss": 0.5451, "step": 10815 }, { "epoch": 0.27465065807008415, "grad_norm": 0.380859375, "learning_rate": 0.00029872852059330724, "loss": 0.5234, "step": 10820 }, { "epoch": 0.2747775761190999, "grad_norm": 0.373046875, "learning_rate": 0.0002987241997730657, "loss": 0.5135, "step": 10825 }, { "epoch": 0.27490449416811563, "grad_norm": 0.33203125, "learning_rate": 0.0002987198716549869, "loss": 0.4722, "step": 10830 }, { "epoch": 0.2750314122171314, "grad_norm": 0.369140625, "learning_rate": 0.00029871553623928324, "loss": 0.496, "step": 10835 }, { "epoch": 0.27515833026614717, "grad_norm": 0.34375, "learning_rate": 0.0002987111935261675, "loss": 0.4985, "step": 10840 }, { "epoch": 0.2752852483151629, "grad_norm": 0.41015625, "learning_rate": 0.00029870684351585264, "loss": 0.5252, "step": 10845 }, { "epoch": 0.27541216636417865, "grad_norm": 0.3671875, "learning_rate": 0.0002987024862085523, "loss": 0.4969, "step": 10850 }, { "epoch": 0.2755390844131944, "grad_norm": 0.37109375, "learning_rate": 0.00029869812160448014, "loss": 0.482, "step": 10855 }, { "epoch": 0.2756660024622101, "grad_norm": 0.361328125, "learning_rate": 0.0002986937497038504, "loss": 0.4832, "step": 10860 }, { "epoch": 0.2757929205112259, "grad_norm": 0.3359375, "learning_rate": 0.0002986893705068776, "loss": 0.5069, "step": 10865 }, { "epoch": 0.27591983856024166, "grad_norm": 0.349609375, "learning_rate": 0.0002986849840137766, "loss": 0.4841, "step": 10870 }, { "epoch": 0.2760467566092574, "grad_norm": 0.39453125, "learning_rate": 0.0002986805902247626, "loss": 0.4813, "step": 10875 }, { "epoch": 0.27617367465827314, "grad_norm": 0.361328125, "learning_rate": 0.00029867618914005144, "loss": 0.498, "step": 10880 }, { "epoch": 0.2763005927072889, "grad_norm": 0.4296875, "learning_rate": 0.00029867178075985876, "loss": 0.5158, "step": 10885 }, { "epoch": 0.2764275107563047, "grad_norm": 0.4140625, "learning_rate": 0.0002986673650844011, "loss": 0.56, "step": 10890 }, { "epoch": 0.2765544288053204, "grad_norm": 0.337890625, "learning_rate": 0.000298662942113895, "loss": 0.4925, "step": 10895 }, { "epoch": 0.27668134685433615, "grad_norm": 0.38671875, "learning_rate": 0.00029865851184855756, "loss": 0.4824, "step": 10900 }, { "epoch": 0.2768082649033519, "grad_norm": 0.39453125, "learning_rate": 0.00029865407428860617, "loss": 0.5533, "step": 10905 }, { "epoch": 0.27693518295236763, "grad_norm": 0.34765625, "learning_rate": 0.0002986496294342586, "loss": 0.5086, "step": 10910 }, { "epoch": 0.2770621010013834, "grad_norm": 0.36328125, "learning_rate": 0.00029864517728573286, "loss": 0.523, "step": 10915 }, { "epoch": 0.27718901905039917, "grad_norm": 0.359375, "learning_rate": 0.0002986407178432475, "loss": 0.4688, "step": 10920 }, { "epoch": 0.27731593709941493, "grad_norm": 0.333984375, "learning_rate": 0.0002986362511070213, "loss": 0.5125, "step": 10925 }, { "epoch": 0.27744285514843064, "grad_norm": 0.37890625, "learning_rate": 0.0002986317770772735, "loss": 0.4978, "step": 10930 }, { "epoch": 0.2775697731974464, "grad_norm": 0.357421875, "learning_rate": 0.00029862729575422367, "loss": 0.5053, "step": 10935 }, { "epoch": 0.2776966912464622, "grad_norm": 0.3828125, "learning_rate": 0.00029862280713809157, "loss": 0.5334, "step": 10940 }, { "epoch": 0.2778236092954779, "grad_norm": 0.36328125, "learning_rate": 0.0002986183112290975, "loss": 0.5275, "step": 10945 }, { "epoch": 0.27795052734449366, "grad_norm": 0.349609375, "learning_rate": 0.00029861380802746214, "loss": 0.5073, "step": 10950 }, { "epoch": 0.2780774453935094, "grad_norm": 0.361328125, "learning_rate": 0.0002986092975334064, "loss": 0.5198, "step": 10955 }, { "epoch": 0.27820436344252514, "grad_norm": 0.36328125, "learning_rate": 0.00029860477974715167, "loss": 0.5148, "step": 10960 }, { "epoch": 0.2783312814915409, "grad_norm": 0.36328125, "learning_rate": 0.00029860025466891957, "loss": 0.5469, "step": 10965 }, { "epoch": 0.2784581995405567, "grad_norm": 0.3359375, "learning_rate": 0.00029859572229893217, "loss": 0.4754, "step": 10970 }, { "epoch": 0.27858511758957244, "grad_norm": 0.333984375, "learning_rate": 0.0002985911826374119, "loss": 0.5106, "step": 10975 }, { "epoch": 0.27871203563858815, "grad_norm": 0.361328125, "learning_rate": 0.0002985866356845814, "loss": 0.5116, "step": 10980 }, { "epoch": 0.2788389536876039, "grad_norm": 0.337890625, "learning_rate": 0.000298582081440664, "loss": 0.5026, "step": 10985 }, { "epoch": 0.2789658717366197, "grad_norm": 0.34765625, "learning_rate": 0.0002985775199058829, "loss": 0.5177, "step": 10990 }, { "epoch": 0.2790927897856354, "grad_norm": 0.392578125, "learning_rate": 0.00029857295108046215, "loss": 0.498, "step": 10995 }, { "epoch": 0.27921970783465117, "grad_norm": 0.375, "learning_rate": 0.0002985683749646259, "loss": 0.5242, "step": 11000 }, { "epoch": 0.27934662588366693, "grad_norm": 0.318359375, "learning_rate": 0.0002985637915585987, "loss": 0.5013, "step": 11005 }, { "epoch": 0.27947354393268264, "grad_norm": 0.326171875, "learning_rate": 0.00029855920086260536, "loss": 0.5127, "step": 11010 }, { "epoch": 0.2796004619816984, "grad_norm": 0.36328125, "learning_rate": 0.0002985546028768712, "loss": 0.5297, "step": 11015 }, { "epoch": 0.2797273800307142, "grad_norm": 0.365234375, "learning_rate": 0.00029854999760162187, "loss": 0.5184, "step": 11020 }, { "epoch": 0.2798542980797299, "grad_norm": 0.330078125, "learning_rate": 0.0002985453850370834, "loss": 0.4979, "step": 11025 }, { "epoch": 0.27998121612874566, "grad_norm": 0.34375, "learning_rate": 0.0002985407651834819, "loss": 0.5001, "step": 11030 }, { "epoch": 0.2801081341777614, "grad_norm": 0.3359375, "learning_rate": 0.0002985361380410444, "loss": 0.4877, "step": 11035 }, { "epoch": 0.2802350522267772, "grad_norm": 0.357421875, "learning_rate": 0.00029853150360999766, "loss": 0.5121, "step": 11040 }, { "epoch": 0.2803619702757929, "grad_norm": 0.3515625, "learning_rate": 0.00029852686189056924, "loss": 0.4738, "step": 11045 }, { "epoch": 0.28048888832480867, "grad_norm": 0.369140625, "learning_rate": 0.0002985222128829869, "loss": 0.5235, "step": 11050 }, { "epoch": 0.28061580637382444, "grad_norm": 0.349609375, "learning_rate": 0.0002985175565874787, "loss": 0.4924, "step": 11055 }, { "epoch": 0.28074272442284015, "grad_norm": 0.359375, "learning_rate": 0.00029851289300427314, "loss": 0.4989, "step": 11060 }, { "epoch": 0.2808696424718559, "grad_norm": 0.333984375, "learning_rate": 0.0002985082221335991, "loss": 0.5002, "step": 11065 }, { "epoch": 0.2809965605208717, "grad_norm": 0.36328125, "learning_rate": 0.00029850354397568577, "loss": 0.5407, "step": 11070 }, { "epoch": 0.2811234785698874, "grad_norm": 0.365234375, "learning_rate": 0.0002984988585307627, "loss": 0.4835, "step": 11075 }, { "epoch": 0.28125039661890316, "grad_norm": 0.3671875, "learning_rate": 0.00029849416579905976, "loss": 0.509, "step": 11080 }, { "epoch": 0.28137731466791893, "grad_norm": 0.38671875, "learning_rate": 0.00029848946578080735, "loss": 0.5034, "step": 11085 }, { "epoch": 0.2815042327169347, "grad_norm": 0.390625, "learning_rate": 0.0002984847584762359, "loss": 0.542, "step": 11090 }, { "epoch": 0.2816311507659504, "grad_norm": 0.365234375, "learning_rate": 0.0002984800438855765, "loss": 0.5122, "step": 11095 }, { "epoch": 0.2817580688149662, "grad_norm": 0.357421875, "learning_rate": 0.0002984753220090605, "loss": 0.5018, "step": 11100 }, { "epoch": 0.28188498686398195, "grad_norm": 0.375, "learning_rate": 0.0002984705928469196, "loss": 0.4755, "step": 11105 }, { "epoch": 0.28201190491299766, "grad_norm": 0.35546875, "learning_rate": 0.0002984658563993859, "loss": 0.5488, "step": 11110 }, { "epoch": 0.2821388229620134, "grad_norm": 0.353515625, "learning_rate": 0.00029846111266669176, "loss": 0.509, "step": 11115 }, { "epoch": 0.2822657410110292, "grad_norm": 0.365234375, "learning_rate": 0.00029845636164906996, "loss": 0.5146, "step": 11120 }, { "epoch": 0.2823926590600449, "grad_norm": 0.376953125, "learning_rate": 0.00029845160334675356, "loss": 0.5076, "step": 11125 }, { "epoch": 0.28251957710906067, "grad_norm": 0.37890625, "learning_rate": 0.0002984468377599762, "loss": 0.6023, "step": 11130 }, { "epoch": 0.28264649515807644, "grad_norm": 0.396484375, "learning_rate": 0.00029844206488897153, "loss": 0.5313, "step": 11135 }, { "epoch": 0.2827734132070922, "grad_norm": 0.37109375, "learning_rate": 0.000298437284733974, "loss": 0.5227, "step": 11140 }, { "epoch": 0.2829003312561079, "grad_norm": 0.3515625, "learning_rate": 0.000298432497295218, "loss": 0.487, "step": 11145 }, { "epoch": 0.2830272493051237, "grad_norm": 0.3515625, "learning_rate": 0.00029842770257293845, "loss": 0.5167, "step": 11150 }, { "epoch": 0.28315416735413945, "grad_norm": 0.376953125, "learning_rate": 0.00029842290056737063, "loss": 0.5182, "step": 11155 }, { "epoch": 0.28328108540315516, "grad_norm": 0.41015625, "learning_rate": 0.00029841809127875024, "loss": 0.5445, "step": 11160 }, { "epoch": 0.28340800345217093, "grad_norm": 0.3984375, "learning_rate": 0.00029841327470731324, "loss": 0.4976, "step": 11165 }, { "epoch": 0.2835349215011867, "grad_norm": 0.322265625, "learning_rate": 0.00029840845085329595, "loss": 0.4895, "step": 11170 }, { "epoch": 0.2836618395502024, "grad_norm": 0.34375, "learning_rate": 0.00029840361971693514, "loss": 0.5135, "step": 11175 }, { "epoch": 0.2837887575992182, "grad_norm": 0.35546875, "learning_rate": 0.0002983987812984678, "loss": 0.4913, "step": 11180 }, { "epoch": 0.28391567564823394, "grad_norm": 0.361328125, "learning_rate": 0.00029839393559813135, "loss": 0.509, "step": 11185 }, { "epoch": 0.2840425936972497, "grad_norm": 0.37109375, "learning_rate": 0.00029838908261616356, "loss": 0.4945, "step": 11190 }, { "epoch": 0.2841695117462654, "grad_norm": 0.3984375, "learning_rate": 0.0002983842223528027, "loss": 0.5062, "step": 11195 }, { "epoch": 0.2842964297952812, "grad_norm": 0.34765625, "learning_rate": 0.0002983793548082871, "loss": 0.4761, "step": 11200 }, { "epoch": 0.28442334784429696, "grad_norm": 0.361328125, "learning_rate": 0.00029837447998285567, "loss": 0.5029, "step": 11205 }, { "epoch": 0.28455026589331267, "grad_norm": 0.353515625, "learning_rate": 0.0002983695978767476, "loss": 0.5087, "step": 11210 }, { "epoch": 0.28467718394232844, "grad_norm": 0.37890625, "learning_rate": 0.0002983647084902025, "loss": 0.5192, "step": 11215 }, { "epoch": 0.2848041019913442, "grad_norm": 0.359375, "learning_rate": 0.00029835981182346023, "loss": 0.5059, "step": 11220 }, { "epoch": 0.2849310200403599, "grad_norm": 0.345703125, "learning_rate": 0.00029835490787676104, "loss": 0.5185, "step": 11225 }, { "epoch": 0.2850579380893757, "grad_norm": 0.380859375, "learning_rate": 0.0002983499966503457, "loss": 0.4997, "step": 11230 }, { "epoch": 0.28518485613839145, "grad_norm": 0.369140625, "learning_rate": 0.00029834507814445504, "loss": 0.4947, "step": 11235 }, { "epoch": 0.2853117741874072, "grad_norm": 0.388671875, "learning_rate": 0.0002983401523593305, "loss": 0.555, "step": 11240 }, { "epoch": 0.28543869223642293, "grad_norm": 0.390625, "learning_rate": 0.0002983352192952138, "loss": 0.5043, "step": 11245 }, { "epoch": 0.2855656102854387, "grad_norm": 0.357421875, "learning_rate": 0.00029833027895234703, "loss": 0.5007, "step": 11250 }, { "epoch": 0.28569252833445447, "grad_norm": 0.361328125, "learning_rate": 0.00029832533133097253, "loss": 0.5291, "step": 11255 }, { "epoch": 0.2858194463834702, "grad_norm": 0.369140625, "learning_rate": 0.0002983203764313331, "loss": 0.5002, "step": 11260 }, { "epoch": 0.28594636443248594, "grad_norm": 0.328125, "learning_rate": 0.00029831541425367184, "loss": 0.4916, "step": 11265 }, { "epoch": 0.2860732824815017, "grad_norm": 0.36328125, "learning_rate": 0.0002983104447982323, "loss": 0.4913, "step": 11270 }, { "epoch": 0.2862002005305174, "grad_norm": 0.365234375, "learning_rate": 0.0002983054680652583, "loss": 0.5227, "step": 11275 }, { "epoch": 0.2863271185795332, "grad_norm": 0.361328125, "learning_rate": 0.0002983004840549941, "loss": 0.5017, "step": 11280 }, { "epoch": 0.28645403662854896, "grad_norm": 0.33203125, "learning_rate": 0.0002982954927676842, "loss": 0.4637, "step": 11285 }, { "epoch": 0.28658095467756467, "grad_norm": 0.359375, "learning_rate": 0.0002982904942035735, "loss": 0.5225, "step": 11290 }, { "epoch": 0.28670787272658044, "grad_norm": 0.345703125, "learning_rate": 0.00029828548836290737, "loss": 0.5035, "step": 11295 }, { "epoch": 0.2868347907755962, "grad_norm": 0.365234375, "learning_rate": 0.00029828047524593136, "loss": 0.5232, "step": 11300 }, { "epoch": 0.28696170882461197, "grad_norm": 0.369140625, "learning_rate": 0.00029827545485289154, "loss": 0.4885, "step": 11305 }, { "epoch": 0.2870886268736277, "grad_norm": 0.359375, "learning_rate": 0.0002982704271840342, "loss": 0.4884, "step": 11310 }, { "epoch": 0.28721554492264345, "grad_norm": 0.34375, "learning_rate": 0.00029826539223960606, "loss": 0.4642, "step": 11315 }, { "epoch": 0.2873424629716592, "grad_norm": 0.365234375, "learning_rate": 0.0002982603500198542, "loss": 0.506, "step": 11320 }, { "epoch": 0.28746938102067493, "grad_norm": 0.3671875, "learning_rate": 0.000298255300525026, "loss": 0.5084, "step": 11325 }, { "epoch": 0.2875962990696907, "grad_norm": 0.353515625, "learning_rate": 0.00029825024375536925, "loss": 0.526, "step": 11330 }, { "epoch": 0.28772321711870646, "grad_norm": 0.34765625, "learning_rate": 0.0002982451797111321, "loss": 0.4938, "step": 11335 }, { "epoch": 0.2878501351677222, "grad_norm": 0.375, "learning_rate": 0.000298240108392563, "loss": 0.5384, "step": 11340 }, { "epoch": 0.28797705321673794, "grad_norm": 0.3515625, "learning_rate": 0.00029823502979991085, "loss": 0.5132, "step": 11345 }, { "epoch": 0.2881039712657537, "grad_norm": 0.306640625, "learning_rate": 0.0002982299439334249, "loss": 0.4601, "step": 11350 }, { "epoch": 0.2882308893147695, "grad_norm": 0.341796875, "learning_rate": 0.0002982248507933546, "loss": 0.5228, "step": 11355 }, { "epoch": 0.2883578073637852, "grad_norm": 0.34765625, "learning_rate": 0.0002982197503799499, "loss": 0.5364, "step": 11360 }, { "epoch": 0.28848472541280096, "grad_norm": 0.349609375, "learning_rate": 0.0002982146426934611, "loss": 0.5013, "step": 11365 }, { "epoch": 0.2886116434618167, "grad_norm": 0.34765625, "learning_rate": 0.00029820952773413885, "loss": 0.4716, "step": 11370 }, { "epoch": 0.28873856151083244, "grad_norm": 0.345703125, "learning_rate": 0.0002982044055022341, "loss": 0.506, "step": 11375 }, { "epoch": 0.2888654795598482, "grad_norm": 0.35546875, "learning_rate": 0.0002981992759979982, "loss": 0.5236, "step": 11380 }, { "epoch": 0.28899239760886397, "grad_norm": 0.333984375, "learning_rate": 0.00029819413922168285, "loss": 0.4889, "step": 11385 }, { "epoch": 0.2891193156578797, "grad_norm": 0.369140625, "learning_rate": 0.00029818899517354016, "loss": 0.4963, "step": 11390 }, { "epoch": 0.28924623370689545, "grad_norm": 0.34375, "learning_rate": 0.00029818384385382247, "loss": 0.4911, "step": 11395 }, { "epoch": 0.2893731517559112, "grad_norm": 0.33984375, "learning_rate": 0.00029817868526278257, "loss": 0.5038, "step": 11400 }, { "epoch": 0.289500069804927, "grad_norm": 0.3671875, "learning_rate": 0.00029817351940067367, "loss": 0.5118, "step": 11405 }, { "epoch": 0.2896269878539427, "grad_norm": 0.376953125, "learning_rate": 0.00029816834626774914, "loss": 0.4838, "step": 11410 }, { "epoch": 0.28975390590295846, "grad_norm": 0.306640625, "learning_rate": 0.0002981631658642629, "loss": 0.4887, "step": 11415 }, { "epoch": 0.28988082395197423, "grad_norm": 0.337890625, "learning_rate": 0.0002981579781904691, "loss": 0.5189, "step": 11420 }, { "epoch": 0.29000774200098994, "grad_norm": 0.345703125, "learning_rate": 0.00029815278324662244, "loss": 0.5324, "step": 11425 }, { "epoch": 0.2901346600500057, "grad_norm": 0.36328125, "learning_rate": 0.0002981475810329776, "loss": 0.5048, "step": 11430 }, { "epoch": 0.2902615780990215, "grad_norm": 0.328125, "learning_rate": 0.00029814237154979, "loss": 0.5166, "step": 11435 }, { "epoch": 0.2903884961480372, "grad_norm": 0.361328125, "learning_rate": 0.00029813715479731523, "loss": 0.4735, "step": 11440 }, { "epoch": 0.29051541419705296, "grad_norm": 0.353515625, "learning_rate": 0.00029813193077580933, "loss": 0.4996, "step": 11445 }, { "epoch": 0.2906423322460687, "grad_norm": 0.37890625, "learning_rate": 0.0002981266994855286, "loss": 0.4807, "step": 11450 }, { "epoch": 0.2907692502950845, "grad_norm": 0.34375, "learning_rate": 0.0002981214609267297, "loss": 0.4813, "step": 11455 }, { "epoch": 0.2908961683441002, "grad_norm": 0.373046875, "learning_rate": 0.0002981162150996697, "loss": 0.5426, "step": 11460 }, { "epoch": 0.29102308639311597, "grad_norm": 0.353515625, "learning_rate": 0.00029811096200460605, "loss": 0.5286, "step": 11465 }, { "epoch": 0.29115000444213174, "grad_norm": 0.359375, "learning_rate": 0.0002981057016417965, "loss": 0.4936, "step": 11470 }, { "epoch": 0.29127692249114745, "grad_norm": 0.35546875, "learning_rate": 0.00029810043401149914, "loss": 0.5519, "step": 11475 }, { "epoch": 0.2914038405401632, "grad_norm": 0.365234375, "learning_rate": 0.0002980951591139725, "loss": 0.4776, "step": 11480 }, { "epoch": 0.291530758589179, "grad_norm": 0.357421875, "learning_rate": 0.00029808987694947536, "loss": 0.4949, "step": 11485 }, { "epoch": 0.2916576766381947, "grad_norm": 0.37109375, "learning_rate": 0.000298084587518267, "loss": 0.5128, "step": 11490 }, { "epoch": 0.29178459468721046, "grad_norm": 0.35546875, "learning_rate": 0.0002980792908206069, "loss": 0.5036, "step": 11495 }, { "epoch": 0.29191151273622623, "grad_norm": 0.33203125, "learning_rate": 0.000298073986856755, "loss": 0.5148, "step": 11500 }, { "epoch": 0.29203843078524194, "grad_norm": 0.3515625, "learning_rate": 0.0002980686756269715, "loss": 0.5051, "step": 11505 }, { "epoch": 0.2921653488342577, "grad_norm": 0.34765625, "learning_rate": 0.0002980633571315171, "loss": 0.4712, "step": 11510 }, { "epoch": 0.2922922668832735, "grad_norm": 0.31640625, "learning_rate": 0.00029805803137065274, "loss": 0.4581, "step": 11515 }, { "epoch": 0.29241918493228924, "grad_norm": 0.392578125, "learning_rate": 0.00029805269834463975, "loss": 0.5323, "step": 11520 }, { "epoch": 0.29254610298130496, "grad_norm": 0.384765625, "learning_rate": 0.00029804735805373983, "loss": 0.5135, "step": 11525 }, { "epoch": 0.2926730210303207, "grad_norm": 0.359375, "learning_rate": 0.000298042010498215, "loss": 0.4899, "step": 11530 }, { "epoch": 0.2927999390793365, "grad_norm": 0.337890625, "learning_rate": 0.0002980366556783277, "loss": 0.5069, "step": 11535 }, { "epoch": 0.2929268571283522, "grad_norm": 0.341796875, "learning_rate": 0.0002980312935943407, "loss": 0.4689, "step": 11540 }, { "epoch": 0.29305377517736797, "grad_norm": 0.34765625, "learning_rate": 0.00029802592424651715, "loss": 0.4808, "step": 11545 }, { "epoch": 0.29318069322638374, "grad_norm": 0.328125, "learning_rate": 0.0002980205476351204, "loss": 0.4793, "step": 11550 }, { "epoch": 0.29330761127539945, "grad_norm": 0.337890625, "learning_rate": 0.0002980151637604143, "loss": 0.4952, "step": 11555 }, { "epoch": 0.2934345293244152, "grad_norm": 0.36328125, "learning_rate": 0.0002980097726226631, "loss": 0.5145, "step": 11560 }, { "epoch": 0.293561447373431, "grad_norm": 0.35546875, "learning_rate": 0.00029800437422213135, "loss": 0.4901, "step": 11565 }, { "epoch": 0.29368836542244675, "grad_norm": 0.361328125, "learning_rate": 0.00029799896855908385, "loss": 0.5311, "step": 11570 }, { "epoch": 0.29381528347146246, "grad_norm": 0.361328125, "learning_rate": 0.000297993555633786, "loss": 0.4995, "step": 11575 }, { "epoch": 0.29394220152047823, "grad_norm": 0.318359375, "learning_rate": 0.00029798813544650325, "loss": 0.4872, "step": 11580 }, { "epoch": 0.294069119569494, "grad_norm": 0.345703125, "learning_rate": 0.0002979827079975017, "loss": 0.4858, "step": 11585 }, { "epoch": 0.2941960376185097, "grad_norm": 0.359375, "learning_rate": 0.00029797727328704756, "loss": 0.5031, "step": 11590 }, { "epoch": 0.2943229556675255, "grad_norm": 0.361328125, "learning_rate": 0.0002979718313154076, "loss": 0.498, "step": 11595 }, { "epoch": 0.29444987371654124, "grad_norm": 0.373046875, "learning_rate": 0.0002979663820828488, "loss": 0.5119, "step": 11600 }, { "epoch": 0.29457679176555696, "grad_norm": 0.353515625, "learning_rate": 0.0002979609255896386, "loss": 0.5126, "step": 11605 }, { "epoch": 0.2947037098145727, "grad_norm": 0.412109375, "learning_rate": 0.0002979554618360447, "loss": 0.523, "step": 11610 }, { "epoch": 0.2948306278635885, "grad_norm": 0.322265625, "learning_rate": 0.00029794999082233525, "loss": 0.4808, "step": 11615 }, { "epoch": 0.29495754591260426, "grad_norm": 0.359375, "learning_rate": 0.00029794451254877867, "loss": 0.4976, "step": 11620 }, { "epoch": 0.29508446396161997, "grad_norm": 0.341796875, "learning_rate": 0.0002979390270156438, "loss": 0.4853, "step": 11625 }, { "epoch": 0.29521138201063574, "grad_norm": 0.34765625, "learning_rate": 0.00029793353422319976, "loss": 0.5066, "step": 11630 }, { "epoch": 0.2953383000596515, "grad_norm": 0.376953125, "learning_rate": 0.00029792803417171617, "loss": 0.4966, "step": 11635 }, { "epoch": 0.2954652181086672, "grad_norm": 0.341796875, "learning_rate": 0.0002979225268614629, "loss": 0.5051, "step": 11640 }, { "epoch": 0.295592136157683, "grad_norm": 0.3671875, "learning_rate": 0.0002979170122927101, "loss": 0.5066, "step": 11645 }, { "epoch": 0.29571905420669875, "grad_norm": 0.33203125, "learning_rate": 0.00029791149046572846, "loss": 0.4875, "step": 11650 }, { "epoch": 0.29584597225571446, "grad_norm": 0.349609375, "learning_rate": 0.0002979059613807889, "loss": 0.5294, "step": 11655 }, { "epoch": 0.29597289030473023, "grad_norm": 0.3515625, "learning_rate": 0.0002979004250381627, "loss": 0.5103, "step": 11660 }, { "epoch": 0.296099808353746, "grad_norm": 0.380859375, "learning_rate": 0.00029789488143812154, "loss": 0.4894, "step": 11665 }, { "epoch": 0.29622672640276176, "grad_norm": 0.396484375, "learning_rate": 0.0002978893305809375, "loss": 0.5238, "step": 11670 }, { "epoch": 0.2963536444517775, "grad_norm": 0.34765625, "learning_rate": 0.000297883772466883, "loss": 0.4988, "step": 11675 }, { "epoch": 0.29648056250079324, "grad_norm": 0.357421875, "learning_rate": 0.0002978782070962306, "loss": 0.4962, "step": 11680 }, { "epoch": 0.296607480549809, "grad_norm": 0.337890625, "learning_rate": 0.0002978726344692534, "loss": 0.4825, "step": 11685 }, { "epoch": 0.2967343985988247, "grad_norm": 0.3671875, "learning_rate": 0.0002978670545862251, "loss": 0.453, "step": 11690 }, { "epoch": 0.2968613166478405, "grad_norm": 0.34765625, "learning_rate": 0.0002978614674474192, "loss": 0.4804, "step": 11695 }, { "epoch": 0.29698823469685626, "grad_norm": 0.3828125, "learning_rate": 0.00029785587305311003, "loss": 0.5229, "step": 11700 }, { "epoch": 0.29711515274587197, "grad_norm": 0.58984375, "learning_rate": 0.00029785027140357215, "loss": 0.4689, "step": 11705 }, { "epoch": 0.29724207079488774, "grad_norm": 0.353515625, "learning_rate": 0.00029784466249908024, "loss": 0.49, "step": 11710 }, { "epoch": 0.2973689888439035, "grad_norm": 0.35546875, "learning_rate": 0.00029783904633990966, "loss": 0.4484, "step": 11715 }, { "epoch": 0.2974959068929192, "grad_norm": 0.390625, "learning_rate": 0.000297833422926336, "loss": 0.5563, "step": 11720 }, { "epoch": 0.297622824941935, "grad_norm": 0.388671875, "learning_rate": 0.0002978277922586352, "loss": 0.518, "step": 11725 }, { "epoch": 0.29774974299095075, "grad_norm": 0.3671875, "learning_rate": 0.0002978221543370834, "loss": 0.4802, "step": 11730 }, { "epoch": 0.2978766610399665, "grad_norm": 0.3828125, "learning_rate": 0.00029781650916195745, "loss": 0.5065, "step": 11735 }, { "epoch": 0.29800357908898223, "grad_norm": 0.287109375, "learning_rate": 0.0002978108567335343, "loss": 0.5022, "step": 11740 }, { "epoch": 0.298130497137998, "grad_norm": 0.3515625, "learning_rate": 0.0002978051970520912, "loss": 0.4964, "step": 11745 }, { "epoch": 0.29825741518701376, "grad_norm": 0.34375, "learning_rate": 0.0002977995301179061, "loss": 0.486, "step": 11750 }, { "epoch": 0.2983843332360295, "grad_norm": 0.376953125, "learning_rate": 0.0002977938559312568, "loss": 0.511, "step": 11755 }, { "epoch": 0.29851125128504524, "grad_norm": 0.37109375, "learning_rate": 0.00029778817449242187, "loss": 0.5012, "step": 11760 }, { "epoch": 0.298638169334061, "grad_norm": 0.353515625, "learning_rate": 0.0002977824858016801, "loss": 0.5093, "step": 11765 }, { "epoch": 0.2987650873830767, "grad_norm": 0.35546875, "learning_rate": 0.0002977767898593107, "loss": 0.483, "step": 11770 }, { "epoch": 0.2988920054320925, "grad_norm": 0.35546875, "learning_rate": 0.000297771086665593, "loss": 0.5031, "step": 11775 }, { "epoch": 0.29901892348110826, "grad_norm": 0.37109375, "learning_rate": 0.00029776537622080697, "loss": 0.5223, "step": 11780 }, { "epoch": 0.299145841530124, "grad_norm": 0.36328125, "learning_rate": 0.0002977596585252328, "loss": 0.5014, "step": 11785 }, { "epoch": 0.29927275957913974, "grad_norm": 0.35546875, "learning_rate": 0.00029775393357915104, "loss": 0.4924, "step": 11790 }, { "epoch": 0.2993996776281555, "grad_norm": 0.359375, "learning_rate": 0.0002977482013828426, "loss": 0.5025, "step": 11795 }, { "epoch": 0.29952659567717127, "grad_norm": 0.34375, "learning_rate": 0.0002977424619365888, "loss": 0.4975, "step": 11800 }, { "epoch": 0.299653513726187, "grad_norm": 0.341796875, "learning_rate": 0.0002977367152406712, "loss": 0.4995, "step": 11805 }, { "epoch": 0.29978043177520275, "grad_norm": 0.365234375, "learning_rate": 0.0002977309612953719, "loss": 0.4955, "step": 11810 }, { "epoch": 0.2999073498242185, "grad_norm": 0.353515625, "learning_rate": 0.0002977252001009731, "loss": 0.5179, "step": 11815 }, { "epoch": 0.30003426787323423, "grad_norm": 0.375, "learning_rate": 0.0002977194316577576, "loss": 0.5231, "step": 11820 }, { "epoch": 0.30016118592225, "grad_norm": 0.359375, "learning_rate": 0.00029771365596600847, "loss": 0.5201, "step": 11825 }, { "epoch": 0.30028810397126576, "grad_norm": 0.365234375, "learning_rate": 0.00029770787302600904, "loss": 0.514, "step": 11830 }, { "epoch": 0.30041502202028153, "grad_norm": 0.333984375, "learning_rate": 0.0002977020828380431, "loss": 0.5221, "step": 11835 }, { "epoch": 0.30054194006929724, "grad_norm": 0.34765625, "learning_rate": 0.0002976962854023949, "loss": 0.5073, "step": 11840 }, { "epoch": 0.300668858118313, "grad_norm": 0.375, "learning_rate": 0.0002976904807193487, "loss": 0.4856, "step": 11845 }, { "epoch": 0.3007957761673288, "grad_norm": 0.3515625, "learning_rate": 0.0002976846687891895, "loss": 0.5026, "step": 11850 }, { "epoch": 0.3009226942163445, "grad_norm": 0.369140625, "learning_rate": 0.0002976788496122024, "loss": 0.5506, "step": 11855 }, { "epoch": 0.30104961226536026, "grad_norm": 0.35546875, "learning_rate": 0.00029767302318867297, "loss": 0.4953, "step": 11860 }, { "epoch": 0.301176530314376, "grad_norm": 0.345703125, "learning_rate": 0.0002976671895188871, "loss": 0.4973, "step": 11865 }, { "epoch": 0.30130344836339173, "grad_norm": 0.34375, "learning_rate": 0.0002976613486031311, "loss": 0.4752, "step": 11870 }, { "epoch": 0.3014303664124075, "grad_norm": 0.34765625, "learning_rate": 0.0002976555004416915, "loss": 0.491, "step": 11875 }, { "epoch": 0.30155728446142327, "grad_norm": 0.328125, "learning_rate": 0.00029764964503485537, "loss": 0.5013, "step": 11880 }, { "epoch": 0.30168420251043904, "grad_norm": 0.359375, "learning_rate": 0.00029764378238291, "loss": 0.514, "step": 11885 }, { "epoch": 0.30181112055945475, "grad_norm": 0.34765625, "learning_rate": 0.00029763791248614297, "loss": 0.5094, "step": 11890 }, { "epoch": 0.3019380386084705, "grad_norm": 0.337890625, "learning_rate": 0.0002976320353448424, "loss": 0.4945, "step": 11895 }, { "epoch": 0.3020649566574863, "grad_norm": 0.357421875, "learning_rate": 0.00029762615095929666, "loss": 0.5041, "step": 11900 }, { "epoch": 0.302191874706502, "grad_norm": 0.359375, "learning_rate": 0.0002976202593297945, "loss": 0.5173, "step": 11905 }, { "epoch": 0.30231879275551776, "grad_norm": 0.3984375, "learning_rate": 0.000297614360456625, "loss": 0.5232, "step": 11910 }, { "epoch": 0.30244571080453353, "grad_norm": 0.3671875, "learning_rate": 0.0002976084543400777, "loss": 0.5052, "step": 11915 }, { "epoch": 0.30257262885354924, "grad_norm": 0.392578125, "learning_rate": 0.0002976025409804423, "loss": 0.5259, "step": 11920 }, { "epoch": 0.302699546902565, "grad_norm": 0.353515625, "learning_rate": 0.000297596620378009, "loss": 0.5151, "step": 11925 }, { "epoch": 0.3028264649515808, "grad_norm": 0.3203125, "learning_rate": 0.00029759069253306836, "loss": 0.4972, "step": 11930 }, { "epoch": 0.3029533830005965, "grad_norm": 0.34765625, "learning_rate": 0.0002975847574459112, "loss": 0.5169, "step": 11935 }, { "epoch": 0.30308030104961226, "grad_norm": 0.337890625, "learning_rate": 0.0002975788151168288, "loss": 0.4673, "step": 11940 }, { "epoch": 0.303207219098628, "grad_norm": 0.359375, "learning_rate": 0.0002975728655461127, "loss": 0.516, "step": 11945 }, { "epoch": 0.3033341371476438, "grad_norm": 0.365234375, "learning_rate": 0.0002975669087340549, "loss": 0.5245, "step": 11950 }, { "epoch": 0.3034610551966595, "grad_norm": 0.359375, "learning_rate": 0.00029756094468094766, "loss": 0.4958, "step": 11955 }, { "epoch": 0.30358797324567527, "grad_norm": 0.3515625, "learning_rate": 0.00029755497338708367, "loss": 0.5216, "step": 11960 }, { "epoch": 0.30371489129469104, "grad_norm": 0.369140625, "learning_rate": 0.00029754899485275586, "loss": 0.4731, "step": 11965 }, { "epoch": 0.30384180934370675, "grad_norm": 0.373046875, "learning_rate": 0.00029754300907825767, "loss": 0.5075, "step": 11970 }, { "epoch": 0.3039687273927225, "grad_norm": 0.33984375, "learning_rate": 0.00029753701606388287, "loss": 0.5027, "step": 11975 }, { "epoch": 0.3040956454417383, "grad_norm": 0.345703125, "learning_rate": 0.00029753101580992533, "loss": 0.5132, "step": 11980 }, { "epoch": 0.304222563490754, "grad_norm": 0.37109375, "learning_rate": 0.0002975250083166797, "loss": 0.5247, "step": 11985 }, { "epoch": 0.30434948153976976, "grad_norm": 0.359375, "learning_rate": 0.00029751899358444067, "loss": 0.4781, "step": 11990 }, { "epoch": 0.30447639958878553, "grad_norm": 0.388671875, "learning_rate": 0.0002975129716135034, "loss": 0.515, "step": 11995 }, { "epoch": 0.3046033176378013, "grad_norm": 0.35546875, "learning_rate": 0.00029750694240416334, "loss": 0.5145, "step": 12000 }, { "epoch": 0.304730235686817, "grad_norm": 0.35546875, "learning_rate": 0.00029750090595671635, "loss": 0.4921, "step": 12005 }, { "epoch": 0.3048571537358328, "grad_norm": 0.36328125, "learning_rate": 0.0002974948622714587, "loss": 0.4806, "step": 12010 }, { "epoch": 0.30498407178484854, "grad_norm": 0.2431640625, "learning_rate": 0.0002974888113486869, "loss": 0.4252, "step": 12015 }, { "epoch": 0.30511098983386425, "grad_norm": 0.365234375, "learning_rate": 0.0002974827531886979, "loss": 0.5204, "step": 12020 }, { "epoch": 0.30523790788288, "grad_norm": 0.353515625, "learning_rate": 0.00029747668779178893, "loss": 0.5364, "step": 12025 }, { "epoch": 0.3053648259318958, "grad_norm": 0.357421875, "learning_rate": 0.0002974706151582576, "loss": 0.5035, "step": 12030 }, { "epoch": 0.3054917439809115, "grad_norm": 0.365234375, "learning_rate": 0.000297464535288402, "loss": 0.5004, "step": 12035 }, { "epoch": 0.30561866202992727, "grad_norm": 0.322265625, "learning_rate": 0.00029745844818252035, "loss": 0.4965, "step": 12040 }, { "epoch": 0.30574558007894304, "grad_norm": 0.376953125, "learning_rate": 0.00029745235384091135, "loss": 0.5399, "step": 12045 }, { "epoch": 0.3058724981279588, "grad_norm": 0.359375, "learning_rate": 0.00029744625226387416, "loss": 0.5078, "step": 12050 }, { "epoch": 0.3059994161769745, "grad_norm": 0.37109375, "learning_rate": 0.0002974401434517081, "loss": 0.4856, "step": 12055 }, { "epoch": 0.3061263342259903, "grad_norm": 0.375, "learning_rate": 0.0002974340274047129, "loss": 0.4648, "step": 12060 }, { "epoch": 0.30625325227500605, "grad_norm": 0.33984375, "learning_rate": 0.0002974279041231887, "loss": 0.4882, "step": 12065 }, { "epoch": 0.30638017032402176, "grad_norm": 0.3671875, "learning_rate": 0.000297421773607436, "loss": 0.4745, "step": 12070 }, { "epoch": 0.30650708837303753, "grad_norm": 0.375, "learning_rate": 0.00029741563585775554, "loss": 0.5057, "step": 12075 }, { "epoch": 0.3066340064220533, "grad_norm": 0.357421875, "learning_rate": 0.0002974094908744486, "loss": 0.5215, "step": 12080 }, { "epoch": 0.306760924471069, "grad_norm": 0.369140625, "learning_rate": 0.00029740333865781665, "loss": 0.5089, "step": 12085 }, { "epoch": 0.3068878425200848, "grad_norm": 0.37890625, "learning_rate": 0.0002973971792081616, "loss": 0.4947, "step": 12090 }, { "epoch": 0.30701476056910054, "grad_norm": 0.32421875, "learning_rate": 0.0002973910125257857, "loss": 0.4829, "step": 12095 }, { "epoch": 0.3071416786181163, "grad_norm": 0.34375, "learning_rate": 0.00029738483861099145, "loss": 0.5006, "step": 12100 }, { "epoch": 0.307268596667132, "grad_norm": 0.359375, "learning_rate": 0.0002973786574640819, "loss": 0.4932, "step": 12105 }, { "epoch": 0.3073955147161478, "grad_norm": 0.341796875, "learning_rate": 0.00029737246908536036, "loss": 0.4986, "step": 12110 }, { "epoch": 0.30752243276516356, "grad_norm": 0.357421875, "learning_rate": 0.0002973662734751305, "loss": 0.5069, "step": 12115 }, { "epoch": 0.30764935081417927, "grad_norm": 0.361328125, "learning_rate": 0.00029736007063369625, "loss": 0.4833, "step": 12120 }, { "epoch": 0.30777626886319503, "grad_norm": 0.345703125, "learning_rate": 0.00029735386056136206, "loss": 0.4801, "step": 12125 }, { "epoch": 0.3079031869122108, "grad_norm": 0.333984375, "learning_rate": 0.00029734764325843263, "loss": 0.4962, "step": 12130 }, { "epoch": 0.3080301049612265, "grad_norm": 0.359375, "learning_rate": 0.000297341418725213, "loss": 0.48, "step": 12135 }, { "epoch": 0.3081570230102423, "grad_norm": 0.357421875, "learning_rate": 0.0002973351869620087, "loss": 0.5091, "step": 12140 }, { "epoch": 0.30828394105925805, "grad_norm": 0.333984375, "learning_rate": 0.0002973289479691255, "loss": 0.5061, "step": 12145 }, { "epoch": 0.30841085910827376, "grad_norm": 0.365234375, "learning_rate": 0.00029732270174686936, "loss": 0.4775, "step": 12150 }, { "epoch": 0.30853777715728953, "grad_norm": 0.361328125, "learning_rate": 0.00029731644829554706, "loss": 0.4846, "step": 12155 }, { "epoch": 0.3086646952063053, "grad_norm": 0.337890625, "learning_rate": 0.0002973101876154653, "loss": 0.4974, "step": 12160 }, { "epoch": 0.30879161325532106, "grad_norm": 0.36328125, "learning_rate": 0.00029730391970693123, "loss": 0.4757, "step": 12165 }, { "epoch": 0.3089185313043368, "grad_norm": 0.369140625, "learning_rate": 0.0002972976445702526, "loss": 0.5122, "step": 12170 }, { "epoch": 0.30904544935335254, "grad_norm": 0.341796875, "learning_rate": 0.0002972913622057372, "loss": 0.5068, "step": 12175 }, { "epoch": 0.3091723674023683, "grad_norm": 0.359375, "learning_rate": 0.0002972850726136933, "loss": 0.4748, "step": 12180 }, { "epoch": 0.309299285451384, "grad_norm": 0.341796875, "learning_rate": 0.0002972787757944296, "loss": 0.4738, "step": 12185 }, { "epoch": 0.3094262035003998, "grad_norm": 0.365234375, "learning_rate": 0.000297272471748255, "loss": 0.4796, "step": 12190 }, { "epoch": 0.30955312154941556, "grad_norm": 0.33984375, "learning_rate": 0.0002972661604754789, "loss": 0.5013, "step": 12195 }, { "epoch": 0.30968003959843127, "grad_norm": 0.357421875, "learning_rate": 0.0002972598419764109, "loss": 0.502, "step": 12200 }, { "epoch": 0.30980695764744703, "grad_norm": 0.361328125, "learning_rate": 0.00029725351625136125, "loss": 0.466, "step": 12205 }, { "epoch": 0.3099338756964628, "grad_norm": 0.361328125, "learning_rate": 0.0002972471833006401, "loss": 0.5255, "step": 12210 }, { "epoch": 0.31006079374547857, "grad_norm": 0.349609375, "learning_rate": 0.0002972408431245584, "loss": 0.5123, "step": 12215 }, { "epoch": 0.3101877117944943, "grad_norm": 0.3671875, "learning_rate": 0.0002972344957234271, "loss": 0.506, "step": 12220 }, { "epoch": 0.31031462984351005, "grad_norm": 0.357421875, "learning_rate": 0.00029722814109755776, "loss": 0.4973, "step": 12225 }, { "epoch": 0.3104415478925258, "grad_norm": 0.384765625, "learning_rate": 0.0002972217792472622, "loss": 0.5298, "step": 12230 }, { "epoch": 0.3105684659415415, "grad_norm": 0.353515625, "learning_rate": 0.0002972154101728526, "loss": 0.5148, "step": 12235 }, { "epoch": 0.3106953839905573, "grad_norm": 0.365234375, "learning_rate": 0.0002972090338746414, "loss": 0.4982, "step": 12240 }, { "epoch": 0.31082230203957306, "grad_norm": 0.37109375, "learning_rate": 0.0002972026503529417, "loss": 0.4833, "step": 12245 }, { "epoch": 0.3109492200885888, "grad_norm": 0.3359375, "learning_rate": 0.00029719625960806644, "loss": 0.4875, "step": 12250 }, { "epoch": 0.31107613813760454, "grad_norm": 0.33984375, "learning_rate": 0.00029718986164032935, "loss": 0.5052, "step": 12255 }, { "epoch": 0.3112030561866203, "grad_norm": 0.3359375, "learning_rate": 0.00029718345645004445, "loss": 0.5096, "step": 12260 }, { "epoch": 0.3113299742356361, "grad_norm": 0.375, "learning_rate": 0.00029717704403752594, "loss": 0.5048, "step": 12265 }, { "epoch": 0.3114568922846518, "grad_norm": 0.341796875, "learning_rate": 0.0002971706244030885, "loss": 0.4953, "step": 12270 }, { "epoch": 0.31158381033366755, "grad_norm": 0.3671875, "learning_rate": 0.0002971641975470472, "loss": 0.4755, "step": 12275 }, { "epoch": 0.3117107283826833, "grad_norm": 0.3515625, "learning_rate": 0.0002971577634697173, "loss": 0.5175, "step": 12280 }, { "epoch": 0.31183764643169903, "grad_norm": 0.3828125, "learning_rate": 0.00029715132217141455, "loss": 0.4691, "step": 12285 }, { "epoch": 0.3119645644807148, "grad_norm": 0.365234375, "learning_rate": 0.00029714487365245507, "loss": 0.5043, "step": 12290 }, { "epoch": 0.31209148252973057, "grad_norm": 0.33984375, "learning_rate": 0.0002971384179131552, "loss": 0.4961, "step": 12295 }, { "epoch": 0.3122184005787463, "grad_norm": 0.359375, "learning_rate": 0.00029713195495383184, "loss": 0.4997, "step": 12300 }, { "epoch": 0.31234531862776205, "grad_norm": 0.3984375, "learning_rate": 0.00029712548477480203, "loss": 0.5399, "step": 12305 }, { "epoch": 0.3124722366767778, "grad_norm": 0.34765625, "learning_rate": 0.0002971190073763833, "loss": 0.5198, "step": 12310 }, { "epoch": 0.3125991547257936, "grad_norm": 0.337890625, "learning_rate": 0.0002971125227588935, "loss": 0.4957, "step": 12315 }, { "epoch": 0.3127260727748093, "grad_norm": 0.3671875, "learning_rate": 0.0002971060309226507, "loss": 0.5552, "step": 12320 }, { "epoch": 0.31285299082382506, "grad_norm": 0.3203125, "learning_rate": 0.0002970995318679737, "loss": 0.4744, "step": 12325 }, { "epoch": 0.31297990887284083, "grad_norm": 0.365234375, "learning_rate": 0.0002970930255951812, "loss": 0.5034, "step": 12330 }, { "epoch": 0.31310682692185654, "grad_norm": 0.359375, "learning_rate": 0.0002970865121045925, "loss": 0.4826, "step": 12335 }, { "epoch": 0.3132337449708723, "grad_norm": 0.3671875, "learning_rate": 0.00029707999139652727, "loss": 0.5102, "step": 12340 }, { "epoch": 0.3133606630198881, "grad_norm": 0.337890625, "learning_rate": 0.0002970734634713055, "loss": 0.5118, "step": 12345 }, { "epoch": 0.3134875810689038, "grad_norm": 0.369140625, "learning_rate": 0.0002970669283292474, "loss": 0.5163, "step": 12350 }, { "epoch": 0.31361449911791955, "grad_norm": 0.38671875, "learning_rate": 0.00029706038597067375, "loss": 0.5227, "step": 12355 }, { "epoch": 0.3137414171669353, "grad_norm": 0.376953125, "learning_rate": 0.00029705383639590553, "loss": 0.5191, "step": 12360 }, { "epoch": 0.31386833521595103, "grad_norm": 0.36328125, "learning_rate": 0.0002970472796052641, "loss": 0.4876, "step": 12365 }, { "epoch": 0.3139952532649668, "grad_norm": 0.380859375, "learning_rate": 0.0002970407155990713, "loss": 0.4767, "step": 12370 }, { "epoch": 0.31412217131398257, "grad_norm": 0.3515625, "learning_rate": 0.0002970341443776491, "loss": 0.4647, "step": 12375 }, { "epoch": 0.31424908936299834, "grad_norm": 0.31640625, "learning_rate": 0.00029702756594132, "loss": 0.4736, "step": 12380 }, { "epoch": 0.31437600741201405, "grad_norm": 0.345703125, "learning_rate": 0.0002970209802904069, "loss": 0.5138, "step": 12385 }, { "epoch": 0.3145029254610298, "grad_norm": 0.345703125, "learning_rate": 0.00029701438742523275, "loss": 0.5111, "step": 12390 }, { "epoch": 0.3146298435100456, "grad_norm": 0.357421875, "learning_rate": 0.0002970077873461212, "loss": 0.5016, "step": 12395 }, { "epoch": 0.3147567615590613, "grad_norm": 0.33984375, "learning_rate": 0.0002970011800533961, "loss": 0.5005, "step": 12400 }, { "epoch": 0.31488367960807706, "grad_norm": 0.384765625, "learning_rate": 0.0002969945655473817, "loss": 0.5057, "step": 12405 }, { "epoch": 0.31501059765709283, "grad_norm": 0.376953125, "learning_rate": 0.00029698794382840244, "loss": 0.4775, "step": 12410 }, { "epoch": 0.31513751570610854, "grad_norm": 0.36328125, "learning_rate": 0.0002969813148967834, "loss": 0.4913, "step": 12415 }, { "epoch": 0.3152644337551243, "grad_norm": 0.365234375, "learning_rate": 0.00029697467875284975, "loss": 0.51, "step": 12420 }, { "epoch": 0.3153913518041401, "grad_norm": 0.341796875, "learning_rate": 0.00029696803539692714, "loss": 0.4891, "step": 12425 }, { "epoch": 0.31551826985315584, "grad_norm": 0.357421875, "learning_rate": 0.00029696138482934156, "loss": 0.5109, "step": 12430 }, { "epoch": 0.31564518790217155, "grad_norm": 0.3359375, "learning_rate": 0.0002969547270504194, "loss": 0.482, "step": 12435 }, { "epoch": 0.3157721059511873, "grad_norm": 0.3671875, "learning_rate": 0.0002969480620604873, "loss": 0.4968, "step": 12440 }, { "epoch": 0.3158990240002031, "grad_norm": 0.3515625, "learning_rate": 0.00029694138985987236, "loss": 0.4963, "step": 12445 }, { "epoch": 0.3160259420492188, "grad_norm": 0.361328125, "learning_rate": 0.00029693471044890197, "loss": 0.5086, "step": 12450 }, { "epoch": 0.31615286009823457, "grad_norm": 0.3671875, "learning_rate": 0.0002969280238279038, "loss": 0.4972, "step": 12455 }, { "epoch": 0.31627977814725033, "grad_norm": 0.35546875, "learning_rate": 0.0002969213299972061, "loss": 0.5134, "step": 12460 }, { "epoch": 0.31640669619626605, "grad_norm": 0.341796875, "learning_rate": 0.00029691462895713725, "loss": 0.4935, "step": 12465 }, { "epoch": 0.3165336142452818, "grad_norm": 0.3828125, "learning_rate": 0.00029690792070802605, "loss": 0.5169, "step": 12470 }, { "epoch": 0.3166605322942976, "grad_norm": 0.34375, "learning_rate": 0.00029690120525020173, "loss": 0.4889, "step": 12475 }, { "epoch": 0.31678745034331335, "grad_norm": 0.341796875, "learning_rate": 0.0002968944825839938, "loss": 0.5016, "step": 12480 }, { "epoch": 0.31691436839232906, "grad_norm": 0.36328125, "learning_rate": 0.000296887752709732, "loss": 0.5067, "step": 12485 }, { "epoch": 0.3170412864413448, "grad_norm": 0.3671875, "learning_rate": 0.0002968810156277468, "loss": 0.5431, "step": 12490 }, { "epoch": 0.3171682044903606, "grad_norm": 0.35546875, "learning_rate": 0.00029687427133836863, "loss": 0.485, "step": 12495 }, { "epoch": 0.3172951225393763, "grad_norm": 0.33203125, "learning_rate": 0.00029686751984192854, "loss": 0.4706, "step": 12500 }, { "epoch": 0.3174220405883921, "grad_norm": 0.345703125, "learning_rate": 0.0002968607611387577, "loss": 0.4927, "step": 12505 }, { "epoch": 0.31754895863740784, "grad_norm": 0.359375, "learning_rate": 0.0002968539952291878, "loss": 0.5266, "step": 12510 }, { "epoch": 0.31767587668642355, "grad_norm": 0.400390625, "learning_rate": 0.00029684722211355084, "loss": 0.5216, "step": 12515 }, { "epoch": 0.3178027947354393, "grad_norm": 0.345703125, "learning_rate": 0.0002968404417921792, "loss": 0.4408, "step": 12520 }, { "epoch": 0.3179297127844551, "grad_norm": 0.349609375, "learning_rate": 0.0002968336542654056, "loss": 0.4826, "step": 12525 }, { "epoch": 0.31805663083347085, "grad_norm": 0.369140625, "learning_rate": 0.00029682685953356307, "loss": 0.5117, "step": 12530 }, { "epoch": 0.31818354888248657, "grad_norm": 0.375, "learning_rate": 0.00029682005759698506, "loss": 0.5121, "step": 12535 }, { "epoch": 0.31831046693150233, "grad_norm": 0.357421875, "learning_rate": 0.0002968132484560053, "loss": 0.5193, "step": 12540 }, { "epoch": 0.3184373849805181, "grad_norm": 0.353515625, "learning_rate": 0.00029680643211095785, "loss": 0.502, "step": 12545 }, { "epoch": 0.3185643030295338, "grad_norm": 0.353515625, "learning_rate": 0.0002967996085621773, "loss": 0.543, "step": 12550 }, { "epoch": 0.3186912210785496, "grad_norm": 0.34375, "learning_rate": 0.0002967927778099984, "loss": 0.5139, "step": 12555 }, { "epoch": 0.31881813912756535, "grad_norm": 0.341796875, "learning_rate": 0.00029678593985475645, "loss": 0.4617, "step": 12560 }, { "epoch": 0.31894505717658106, "grad_norm": 0.33984375, "learning_rate": 0.0002967790946967868, "loss": 0.4809, "step": 12565 }, { "epoch": 0.3190719752255968, "grad_norm": 0.357421875, "learning_rate": 0.00029677224233642555, "loss": 0.488, "step": 12570 }, { "epoch": 0.3191988932746126, "grad_norm": 0.349609375, "learning_rate": 0.0002967653827740087, "loss": 0.4931, "step": 12575 }, { "epoch": 0.3193258113236283, "grad_norm": 0.3828125, "learning_rate": 0.00029675851600987306, "loss": 0.5057, "step": 12580 }, { "epoch": 0.3194527293726441, "grad_norm": 0.34375, "learning_rate": 0.00029675164204435555, "loss": 0.4926, "step": 12585 }, { "epoch": 0.31957964742165984, "grad_norm": 0.375, "learning_rate": 0.0002967447608777933, "loss": 0.484, "step": 12590 }, { "epoch": 0.3197065654706756, "grad_norm": 0.34765625, "learning_rate": 0.0002967378725105242, "loss": 0.4879, "step": 12595 }, { "epoch": 0.3198334835196913, "grad_norm": 0.353515625, "learning_rate": 0.0002967309769428861, "loss": 0.5218, "step": 12600 }, { "epoch": 0.3199604015687071, "grad_norm": 0.361328125, "learning_rate": 0.0002967240741752174, "loss": 0.4823, "step": 12605 }, { "epoch": 0.32008731961772285, "grad_norm": 0.353515625, "learning_rate": 0.00029671716420785684, "loss": 0.5039, "step": 12610 }, { "epoch": 0.32021423766673857, "grad_norm": 0.359375, "learning_rate": 0.00029671024704114345, "loss": 0.5073, "step": 12615 }, { "epoch": 0.32034115571575433, "grad_norm": 0.3515625, "learning_rate": 0.00029670332267541676, "loss": 0.4858, "step": 12620 }, { "epoch": 0.3204680737647701, "grad_norm": 0.33984375, "learning_rate": 0.0002966963911110164, "loss": 0.4876, "step": 12625 }, { "epoch": 0.3205949918137858, "grad_norm": 0.3671875, "learning_rate": 0.0002966894523482826, "loss": 0.5196, "step": 12630 }, { "epoch": 0.3207219098628016, "grad_norm": 0.44921875, "learning_rate": 0.0002966825063875557, "loss": 0.5417, "step": 12635 }, { "epoch": 0.32084882791181735, "grad_norm": 0.373046875, "learning_rate": 0.00029667555322917676, "loss": 0.5021, "step": 12640 }, { "epoch": 0.3209757459608331, "grad_norm": 0.3984375, "learning_rate": 0.0002966685928734868, "loss": 0.5047, "step": 12645 }, { "epoch": 0.3211026640098488, "grad_norm": 0.375, "learning_rate": 0.0002966616253208274, "loss": 0.5223, "step": 12650 }, { "epoch": 0.3212295820588646, "grad_norm": 0.37890625, "learning_rate": 0.00029665465057154047, "loss": 0.5135, "step": 12655 }, { "epoch": 0.32135650010788036, "grad_norm": 0.353515625, "learning_rate": 0.0002966476686259683, "loss": 0.4895, "step": 12660 }, { "epoch": 0.3214834181568961, "grad_norm": 0.345703125, "learning_rate": 0.00029664067948445344, "loss": 0.447, "step": 12665 }, { "epoch": 0.32161033620591184, "grad_norm": 0.359375, "learning_rate": 0.0002966336831473388, "loss": 0.4912, "step": 12670 }, { "epoch": 0.3217372542549276, "grad_norm": 0.375, "learning_rate": 0.00029662667961496774, "loss": 0.505, "step": 12675 }, { "epoch": 0.3218641723039433, "grad_norm": 0.388671875, "learning_rate": 0.000296619668887684, "loss": 0.4564, "step": 12680 }, { "epoch": 0.3219910903529591, "grad_norm": 0.359375, "learning_rate": 0.0002966126509658314, "loss": 0.4933, "step": 12685 }, { "epoch": 0.32211800840197485, "grad_norm": 0.353515625, "learning_rate": 0.0002966056258497545, "loss": 0.4945, "step": 12690 }, { "epoch": 0.3222449264509906, "grad_norm": 0.333984375, "learning_rate": 0.0002965985935397979, "loss": 0.5018, "step": 12695 }, { "epoch": 0.32237184450000633, "grad_norm": 0.38671875, "learning_rate": 0.0002965915540363067, "loss": 0.5028, "step": 12700 }, { "epoch": 0.3224987625490221, "grad_norm": 0.35546875, "learning_rate": 0.00029658450733962644, "loss": 0.5076, "step": 12705 }, { "epoch": 0.32262568059803787, "grad_norm": 0.33203125, "learning_rate": 0.0002965774534501027, "loss": 0.4732, "step": 12710 }, { "epoch": 0.3227525986470536, "grad_norm": 0.36328125, "learning_rate": 0.0002965703923680817, "loss": 0.5444, "step": 12715 }, { "epoch": 0.32287951669606935, "grad_norm": 0.353515625, "learning_rate": 0.0002965633240939099, "loss": 0.4983, "step": 12720 }, { "epoch": 0.3230064347450851, "grad_norm": 0.357421875, "learning_rate": 0.0002965562486279343, "loss": 0.478, "step": 12725 }, { "epoch": 0.3231333527941008, "grad_norm": 0.3515625, "learning_rate": 0.0002965491659705019, "loss": 0.4918, "step": 12730 }, { "epoch": 0.3232602708431166, "grad_norm": 0.359375, "learning_rate": 0.00029654207612196025, "loss": 0.4994, "step": 12735 }, { "epoch": 0.32338718889213236, "grad_norm": 0.35546875, "learning_rate": 0.00029653497908265733, "loss": 0.5073, "step": 12740 }, { "epoch": 0.3235141069411481, "grad_norm": 0.357421875, "learning_rate": 0.0002965278748529414, "loss": 0.5088, "step": 12745 }, { "epoch": 0.32364102499016384, "grad_norm": 0.369140625, "learning_rate": 0.00029652076343316097, "loss": 0.5202, "step": 12750 }, { "epoch": 0.3237679430391796, "grad_norm": 0.3515625, "learning_rate": 0.00029651364482366504, "loss": 0.4972, "step": 12755 }, { "epoch": 0.3238948610881954, "grad_norm": 0.359375, "learning_rate": 0.00029650651902480294, "loss": 0.4757, "step": 12760 }, { "epoch": 0.3240217791372111, "grad_norm": 0.34765625, "learning_rate": 0.0002964993860369243, "loss": 0.4881, "step": 12765 }, { "epoch": 0.32414869718622685, "grad_norm": 0.326171875, "learning_rate": 0.0002964922458603792, "loss": 0.4846, "step": 12770 }, { "epoch": 0.3242756152352426, "grad_norm": 0.33203125, "learning_rate": 0.00029648509849551787, "loss": 0.4755, "step": 12775 }, { "epoch": 0.32440253328425833, "grad_norm": 0.3515625, "learning_rate": 0.00029647794394269106, "loss": 0.5223, "step": 12780 }, { "epoch": 0.3245294513332741, "grad_norm": 0.353515625, "learning_rate": 0.00029647078220225, "loss": 0.4828, "step": 12785 }, { "epoch": 0.32465636938228987, "grad_norm": 0.3203125, "learning_rate": 0.000296463613274546, "loss": 0.4761, "step": 12790 }, { "epoch": 0.3247832874313056, "grad_norm": 0.35546875, "learning_rate": 0.00029645643715993077, "loss": 0.4895, "step": 12795 }, { "epoch": 0.32491020548032135, "grad_norm": 0.3515625, "learning_rate": 0.00029644925385875656, "loss": 0.4892, "step": 12800 }, { "epoch": 0.3250371235293371, "grad_norm": 0.380859375, "learning_rate": 0.0002964420633713758, "loss": 0.5179, "step": 12805 }, { "epoch": 0.3251640415783529, "grad_norm": 0.341796875, "learning_rate": 0.0002964348656981413, "loss": 0.4937, "step": 12810 }, { "epoch": 0.3252909596273686, "grad_norm": 0.337890625, "learning_rate": 0.0002964276608394063, "loss": 0.4878, "step": 12815 }, { "epoch": 0.32541787767638436, "grad_norm": 0.330078125, "learning_rate": 0.0002964204487955243, "loss": 0.4902, "step": 12820 }, { "epoch": 0.3255447957254001, "grad_norm": 0.3515625, "learning_rate": 0.0002964132295668492, "loss": 0.4761, "step": 12825 }, { "epoch": 0.32567171377441584, "grad_norm": 0.328125, "learning_rate": 0.00029640600315373523, "loss": 0.4711, "step": 12830 }, { "epoch": 0.3257986318234316, "grad_norm": 0.357421875, "learning_rate": 0.000296398769556537, "loss": 0.4976, "step": 12835 }, { "epoch": 0.3259255498724474, "grad_norm": 0.34765625, "learning_rate": 0.00029639152877560947, "loss": 0.5096, "step": 12840 }, { "epoch": 0.3260524679214631, "grad_norm": 0.341796875, "learning_rate": 0.000296384280811308, "loss": 0.4727, "step": 12845 }, { "epoch": 0.32617938597047885, "grad_norm": 0.359375, "learning_rate": 0.0002963770256639881, "loss": 0.5233, "step": 12850 }, { "epoch": 0.3263063040194946, "grad_norm": 0.33984375, "learning_rate": 0.0002963697633340059, "loss": 0.5089, "step": 12855 }, { "epoch": 0.3264332220685104, "grad_norm": 0.380859375, "learning_rate": 0.0002963624938217177, "loss": 0.5008, "step": 12860 }, { "epoch": 0.3265601401175261, "grad_norm": 0.3671875, "learning_rate": 0.0002963552171274802, "loss": 0.4934, "step": 12865 }, { "epoch": 0.32668705816654187, "grad_norm": 0.357421875, "learning_rate": 0.0002963479332516506, "loss": 0.4908, "step": 12870 }, { "epoch": 0.32681397621555763, "grad_norm": 0.37109375, "learning_rate": 0.0002963406421945861, "loss": 0.5201, "step": 12875 }, { "epoch": 0.32694089426457335, "grad_norm": 0.36328125, "learning_rate": 0.00029633334395664466, "loss": 0.4899, "step": 12880 }, { "epoch": 0.3270678123135891, "grad_norm": 0.36328125, "learning_rate": 0.0002963260385381843, "loss": 0.5032, "step": 12885 }, { "epoch": 0.3271947303626049, "grad_norm": 0.39453125, "learning_rate": 0.00029631872593956346, "loss": 0.5169, "step": 12890 }, { "epoch": 0.3273216484116206, "grad_norm": 0.3515625, "learning_rate": 0.000296311406161141, "loss": 0.4779, "step": 12895 }, { "epoch": 0.32744856646063636, "grad_norm": 0.3828125, "learning_rate": 0.00029630407920327626, "loss": 0.51, "step": 12900 }, { "epoch": 0.3275754845096521, "grad_norm": 0.33203125, "learning_rate": 0.00029629674506632855, "loss": 0.4935, "step": 12905 }, { "epoch": 0.3277024025586679, "grad_norm": 0.33203125, "learning_rate": 0.00029628940375065783, "loss": 0.4787, "step": 12910 }, { "epoch": 0.3278293206076836, "grad_norm": 0.318359375, "learning_rate": 0.0002962820552566244, "loss": 0.4949, "step": 12915 }, { "epoch": 0.3279562386566994, "grad_norm": 0.34375, "learning_rate": 0.00029627469958458873, "loss": 0.4717, "step": 12920 }, { "epoch": 0.32808315670571514, "grad_norm": 0.357421875, "learning_rate": 0.00029626733673491183, "loss": 0.5002, "step": 12925 }, { "epoch": 0.32821007475473085, "grad_norm": 0.35546875, "learning_rate": 0.000296259966707955, "loss": 0.4832, "step": 12930 }, { "epoch": 0.3283369928037466, "grad_norm": 0.353515625, "learning_rate": 0.0002962525895040798, "loss": 0.4901, "step": 12935 }, { "epoch": 0.3284639108527624, "grad_norm": 0.388671875, "learning_rate": 0.00029624520512364837, "loss": 0.5031, "step": 12940 }, { "epoch": 0.3285908289017781, "grad_norm": 0.359375, "learning_rate": 0.00029623781356702297, "loss": 0.5304, "step": 12945 }, { "epoch": 0.32871774695079387, "grad_norm": 0.357421875, "learning_rate": 0.0002962304148345663, "loss": 0.4931, "step": 12950 }, { "epoch": 0.32884466499980963, "grad_norm": 0.34765625, "learning_rate": 0.00029622300892664135, "loss": 0.5128, "step": 12955 }, { "epoch": 0.3289715830488254, "grad_norm": 0.365234375, "learning_rate": 0.0002962155958436117, "loss": 0.5137, "step": 12960 }, { "epoch": 0.3290985010978411, "grad_norm": 0.33984375, "learning_rate": 0.00029620817558584096, "loss": 0.5196, "step": 12965 }, { "epoch": 0.3292254191468569, "grad_norm": 0.302734375, "learning_rate": 0.00029620074815369334, "loss": 0.4583, "step": 12970 }, { "epoch": 0.32935233719587265, "grad_norm": 0.373046875, "learning_rate": 0.00029619331354753314, "loss": 0.4638, "step": 12975 }, { "epoch": 0.32947925524488836, "grad_norm": 0.359375, "learning_rate": 0.00029618587176772536, "loss": 0.5062, "step": 12980 }, { "epoch": 0.3296061732939041, "grad_norm": 0.333984375, "learning_rate": 0.0002961784228146351, "loss": 0.511, "step": 12985 }, { "epoch": 0.3297330913429199, "grad_norm": 0.3515625, "learning_rate": 0.0002961709666886278, "loss": 0.5026, "step": 12990 }, { "epoch": 0.3298600093919356, "grad_norm": 0.361328125, "learning_rate": 0.00029616350339006943, "loss": 0.5096, "step": 12995 }, { "epoch": 0.32998692744095137, "grad_norm": 0.326171875, "learning_rate": 0.0002961560329193261, "loss": 0.5188, "step": 13000 }, { "epoch": 0.33011384548996714, "grad_norm": 0.359375, "learning_rate": 0.00029614855527676455, "loss": 0.4944, "step": 13005 }, { "epoch": 0.33024076353898285, "grad_norm": 0.361328125, "learning_rate": 0.00029614107046275156, "loss": 0.5093, "step": 13010 }, { "epoch": 0.3303676815879986, "grad_norm": 0.345703125, "learning_rate": 0.00029613357847765447, "loss": 0.4852, "step": 13015 }, { "epoch": 0.3304945996370144, "grad_norm": 0.365234375, "learning_rate": 0.0002961260793218409, "loss": 0.492, "step": 13020 }, { "epoch": 0.33062151768603015, "grad_norm": 0.349609375, "learning_rate": 0.00029611857299567884, "loss": 0.4787, "step": 13025 }, { "epoch": 0.33074843573504586, "grad_norm": 0.3671875, "learning_rate": 0.0002961110594995366, "loss": 0.5009, "step": 13030 }, { "epoch": 0.33087535378406163, "grad_norm": 0.337890625, "learning_rate": 0.0002961035388337828, "loss": 0.504, "step": 13035 }, { "epoch": 0.3310022718330774, "grad_norm": 0.357421875, "learning_rate": 0.0002960960109987866, "loss": 0.4817, "step": 13040 }, { "epoch": 0.3311291898820931, "grad_norm": 0.390625, "learning_rate": 0.0002960884759949174, "loss": 0.5092, "step": 13045 }, { "epoch": 0.3312561079311089, "grad_norm": 0.384765625, "learning_rate": 0.0002960809338225448, "loss": 0.5452, "step": 13050 }, { "epoch": 0.33138302598012465, "grad_norm": 0.35546875, "learning_rate": 0.000296073384482039, "loss": 0.4584, "step": 13055 }, { "epoch": 0.33150994402914036, "grad_norm": 0.33984375, "learning_rate": 0.00029606582797377036, "loss": 0.4993, "step": 13060 }, { "epoch": 0.3316368620781561, "grad_norm": 0.345703125, "learning_rate": 0.0002960582642981098, "loss": 0.4896, "step": 13065 }, { "epoch": 0.3317637801271719, "grad_norm": 0.34765625, "learning_rate": 0.00029605069345542835, "loss": 0.502, "step": 13070 }, { "epoch": 0.33189069817618766, "grad_norm": 0.357421875, "learning_rate": 0.00029604311544609757, "loss": 0.4945, "step": 13075 }, { "epoch": 0.33201761622520337, "grad_norm": 0.3515625, "learning_rate": 0.00029603553027048924, "loss": 0.5065, "step": 13080 }, { "epoch": 0.33214453427421914, "grad_norm": 0.359375, "learning_rate": 0.0002960279379289756, "loss": 0.5402, "step": 13085 }, { "epoch": 0.3322714523232349, "grad_norm": 0.353515625, "learning_rate": 0.0002960203384219293, "loss": 0.5037, "step": 13090 }, { "epoch": 0.3323983703722506, "grad_norm": 0.34375, "learning_rate": 0.0002960127317497231, "loss": 0.471, "step": 13095 }, { "epoch": 0.3325252884212664, "grad_norm": 0.33984375, "learning_rate": 0.00029600511791273037, "loss": 0.4857, "step": 13100 }, { "epoch": 0.33265220647028215, "grad_norm": 0.36328125, "learning_rate": 0.00029599749691132463, "loss": 0.5072, "step": 13105 }, { "epoch": 0.33277912451929786, "grad_norm": 0.369140625, "learning_rate": 0.0002959898687458799, "loss": 0.5097, "step": 13110 }, { "epoch": 0.33290604256831363, "grad_norm": 0.359375, "learning_rate": 0.0002959822334167704, "loss": 0.4884, "step": 13115 }, { "epoch": 0.3330329606173294, "grad_norm": 0.361328125, "learning_rate": 0.00029597459092437086, "loss": 0.5239, "step": 13120 }, { "epoch": 0.33315987866634517, "grad_norm": 0.375, "learning_rate": 0.00029596694126905634, "loss": 0.4829, "step": 13125 }, { "epoch": 0.3332867967153609, "grad_norm": 0.35546875, "learning_rate": 0.00029595928445120217, "loss": 0.4868, "step": 13130 }, { "epoch": 0.33341371476437665, "grad_norm": 0.455078125, "learning_rate": 0.000295951620471184, "loss": 0.5012, "step": 13135 }, { "epoch": 0.3335406328133924, "grad_norm": 0.34765625, "learning_rate": 0.00029594394932937803, "loss": 0.4909, "step": 13140 }, { "epoch": 0.3336675508624081, "grad_norm": 0.369140625, "learning_rate": 0.0002959362710261605, "loss": 0.4922, "step": 13145 }, { "epoch": 0.3337944689114239, "grad_norm": 0.37109375, "learning_rate": 0.00029592858556190836, "loss": 0.5111, "step": 13150 }, { "epoch": 0.33392138696043966, "grad_norm": 0.333984375, "learning_rate": 0.0002959208929369986, "loss": 0.5079, "step": 13155 }, { "epoch": 0.33404830500945537, "grad_norm": 0.38671875, "learning_rate": 0.0002959131931518088, "loss": 0.5021, "step": 13160 }, { "epoch": 0.33417522305847114, "grad_norm": 0.353515625, "learning_rate": 0.0002959054862067167, "loss": 0.5054, "step": 13165 }, { "epoch": 0.3343021411074869, "grad_norm": 0.34375, "learning_rate": 0.0002958977721021005, "loss": 0.5204, "step": 13170 }, { "epoch": 0.3344290591565027, "grad_norm": 0.353515625, "learning_rate": 0.0002958900508383388, "loss": 0.5092, "step": 13175 }, { "epoch": 0.3345559772055184, "grad_norm": 0.369140625, "learning_rate": 0.0002958823224158104, "loss": 0.5195, "step": 13180 }, { "epoch": 0.33468289525453415, "grad_norm": 0.31640625, "learning_rate": 0.00029587458683489454, "loss": 0.4789, "step": 13185 }, { "epoch": 0.3348098133035499, "grad_norm": 0.3125, "learning_rate": 0.0002958668440959708, "loss": 0.5025, "step": 13190 }, { "epoch": 0.33493673135256563, "grad_norm": 0.345703125, "learning_rate": 0.00029585909419941914, "loss": 0.5023, "step": 13195 }, { "epoch": 0.3350636494015814, "grad_norm": 0.341796875, "learning_rate": 0.00029585133714561977, "loss": 0.4998, "step": 13200 }, { "epoch": 0.33519056745059717, "grad_norm": 0.353515625, "learning_rate": 0.00029584357293495345, "loss": 0.5091, "step": 13205 }, { "epoch": 0.3353174854996129, "grad_norm": 0.376953125, "learning_rate": 0.0002958358015678011, "loss": 0.48, "step": 13210 }, { "epoch": 0.33544440354862864, "grad_norm": 0.330078125, "learning_rate": 0.000295828023044544, "loss": 0.5031, "step": 13215 }, { "epoch": 0.3355713215976444, "grad_norm": 0.345703125, "learning_rate": 0.00029582023736556397, "loss": 0.5018, "step": 13220 }, { "epoch": 0.3356982396466602, "grad_norm": 0.35546875, "learning_rate": 0.000295812444531243, "loss": 0.479, "step": 13225 }, { "epoch": 0.3358251576956759, "grad_norm": 0.361328125, "learning_rate": 0.0002958046445419634, "loss": 0.4803, "step": 13230 }, { "epoch": 0.33595207574469166, "grad_norm": 0.353515625, "learning_rate": 0.00029579683739810797, "loss": 0.4919, "step": 13235 }, { "epoch": 0.3360789937937074, "grad_norm": 0.337890625, "learning_rate": 0.0002957890231000598, "loss": 0.4905, "step": 13240 }, { "epoch": 0.33620591184272314, "grad_norm": 0.330078125, "learning_rate": 0.0002957812016482024, "loss": 0.479, "step": 13245 }, { "epoch": 0.3363328298917389, "grad_norm": 0.38671875, "learning_rate": 0.00029577337304291946, "loss": 0.5191, "step": 13250 }, { "epoch": 0.33645974794075467, "grad_norm": 0.35546875, "learning_rate": 0.00029576553728459515, "loss": 0.465, "step": 13255 }, { "epoch": 0.3365866659897704, "grad_norm": 0.341796875, "learning_rate": 0.000295757694373614, "loss": 0.4782, "step": 13260 }, { "epoch": 0.33671358403878615, "grad_norm": 0.35546875, "learning_rate": 0.00029574984431036093, "loss": 0.468, "step": 13265 }, { "epoch": 0.3368405020878019, "grad_norm": 0.380859375, "learning_rate": 0.00029574198709522096, "loss": 0.5027, "step": 13270 }, { "epoch": 0.33696742013681763, "grad_norm": 0.3359375, "learning_rate": 0.00029573412272857977, "loss": 0.4921, "step": 13275 }, { "epoch": 0.3370943381858334, "grad_norm": 0.3359375, "learning_rate": 0.00029572625121082326, "loss": 0.5271, "step": 13280 }, { "epoch": 0.33722125623484916, "grad_norm": 0.34765625, "learning_rate": 0.0002957183725423376, "loss": 0.4832, "step": 13285 }, { "epoch": 0.33734817428386493, "grad_norm": 0.337890625, "learning_rate": 0.00029571048672350945, "loss": 0.495, "step": 13290 }, { "epoch": 0.33747509233288064, "grad_norm": 0.361328125, "learning_rate": 0.0002957025937547258, "loss": 0.5143, "step": 13295 }, { "epoch": 0.3376020103818964, "grad_norm": 0.326171875, "learning_rate": 0.0002956946936363739, "loss": 0.4628, "step": 13300 }, { "epoch": 0.3377289284309122, "grad_norm": 0.369140625, "learning_rate": 0.0002956867863688414, "loss": 0.513, "step": 13305 }, { "epoch": 0.3378558464799279, "grad_norm": 0.380859375, "learning_rate": 0.0002956788719525164, "loss": 0.4988, "step": 13310 }, { "epoch": 0.33798276452894366, "grad_norm": 0.33984375, "learning_rate": 0.00029567095038778707, "loss": 0.4839, "step": 13315 }, { "epoch": 0.3381096825779594, "grad_norm": 0.373046875, "learning_rate": 0.0002956630216750423, "loss": 0.503, "step": 13320 }, { "epoch": 0.33823660062697514, "grad_norm": 0.359375, "learning_rate": 0.00029565508581467104, "loss": 0.5086, "step": 13325 }, { "epoch": 0.3383635186759909, "grad_norm": 0.341796875, "learning_rate": 0.0002956471428070628, "loss": 0.4798, "step": 13330 }, { "epoch": 0.33849043672500667, "grad_norm": 0.333984375, "learning_rate": 0.00029563919265260724, "loss": 0.4876, "step": 13335 }, { "epoch": 0.33861735477402244, "grad_norm": 0.3671875, "learning_rate": 0.0002956312353516945, "loss": 0.5131, "step": 13340 }, { "epoch": 0.33874427282303815, "grad_norm": 0.33984375, "learning_rate": 0.00029562327090471507, "loss": 0.5024, "step": 13345 }, { "epoch": 0.3388711908720539, "grad_norm": 0.373046875, "learning_rate": 0.0002956152993120597, "loss": 0.5043, "step": 13350 }, { "epoch": 0.3389981089210697, "grad_norm": 0.3515625, "learning_rate": 0.00029560732057411964, "loss": 0.5021, "step": 13355 }, { "epoch": 0.3391250269700854, "grad_norm": 0.359375, "learning_rate": 0.00029559933469128636, "loss": 0.5073, "step": 13360 }, { "epoch": 0.33925194501910116, "grad_norm": 0.37109375, "learning_rate": 0.00029559134166395174, "loss": 0.4918, "step": 13365 }, { "epoch": 0.33937886306811693, "grad_norm": 0.3515625, "learning_rate": 0.00029558334149250797, "loss": 0.5083, "step": 13370 }, { "epoch": 0.33950578111713264, "grad_norm": 0.416015625, "learning_rate": 0.00029557533417734765, "loss": 0.4946, "step": 13375 }, { "epoch": 0.3396326991661484, "grad_norm": 0.3515625, "learning_rate": 0.0002955673197188636, "loss": 0.5132, "step": 13380 }, { "epoch": 0.3397596172151642, "grad_norm": 0.376953125, "learning_rate": 0.00029555929811744916, "loss": 0.4873, "step": 13385 }, { "epoch": 0.33988653526417995, "grad_norm": 0.3515625, "learning_rate": 0.00029555126937349797, "loss": 0.5152, "step": 13390 }, { "epoch": 0.34001345331319566, "grad_norm": 0.39453125, "learning_rate": 0.000295543233487404, "loss": 0.5181, "step": 13395 }, { "epoch": 0.3401403713622114, "grad_norm": 0.3984375, "learning_rate": 0.0002955351904595615, "loss": 0.4625, "step": 13400 }, { "epoch": 0.3402672894112272, "grad_norm": 0.341796875, "learning_rate": 0.0002955271402903652, "loss": 0.5301, "step": 13405 }, { "epoch": 0.3403942074602429, "grad_norm": 0.345703125, "learning_rate": 0.0002955190829802101, "loss": 0.5086, "step": 13410 }, { "epoch": 0.34052112550925867, "grad_norm": 0.369140625, "learning_rate": 0.00029551101852949156, "loss": 0.4753, "step": 13415 }, { "epoch": 0.34064804355827444, "grad_norm": 0.369140625, "learning_rate": 0.0002955029469386053, "loss": 0.4974, "step": 13420 }, { "epoch": 0.34077496160729015, "grad_norm": 0.357421875, "learning_rate": 0.00029549486820794736, "loss": 0.5053, "step": 13425 }, { "epoch": 0.3409018796563059, "grad_norm": 0.384765625, "learning_rate": 0.00029548678233791426, "loss": 0.5203, "step": 13430 }, { "epoch": 0.3410287977053217, "grad_norm": 0.341796875, "learning_rate": 0.00029547868932890267, "loss": 0.5023, "step": 13435 }, { "epoch": 0.34115571575433745, "grad_norm": 0.3671875, "learning_rate": 0.00029547058918130974, "loss": 0.4926, "step": 13440 }, { "epoch": 0.34128263380335316, "grad_norm": 0.359375, "learning_rate": 0.00029546248189553294, "loss": 0.513, "step": 13445 }, { "epoch": 0.34140955185236893, "grad_norm": 0.3515625, "learning_rate": 0.00029545436747197007, "loss": 0.4924, "step": 13450 }, { "epoch": 0.3415364699013847, "grad_norm": 0.33984375, "learning_rate": 0.0002954462459110194, "loss": 0.4784, "step": 13455 }, { "epoch": 0.3416633879504004, "grad_norm": 0.326171875, "learning_rate": 0.00029543811721307933, "loss": 0.4781, "step": 13460 }, { "epoch": 0.3417903059994162, "grad_norm": 0.3515625, "learning_rate": 0.0002954299813785488, "loss": 0.5038, "step": 13465 }, { "epoch": 0.34191722404843194, "grad_norm": 0.376953125, "learning_rate": 0.00029542183840782697, "loss": 0.5151, "step": 13470 }, { "epoch": 0.34204414209744766, "grad_norm": 0.357421875, "learning_rate": 0.0002954136883013135, "loss": 0.5064, "step": 13475 }, { "epoch": 0.3421710601464634, "grad_norm": 0.341796875, "learning_rate": 0.0002954055310594083, "loss": 0.4631, "step": 13480 }, { "epoch": 0.3422979781954792, "grad_norm": 0.345703125, "learning_rate": 0.0002953973666825116, "loss": 0.4388, "step": 13485 }, { "epoch": 0.3424248962444949, "grad_norm": 0.34375, "learning_rate": 0.000295389195171024, "loss": 0.4996, "step": 13490 }, { "epoch": 0.34255181429351067, "grad_norm": 0.3203125, "learning_rate": 0.0002953810165253465, "loss": 0.4884, "step": 13495 }, { "epoch": 0.34267873234252644, "grad_norm": 0.326171875, "learning_rate": 0.00029537283074588047, "loss": 0.4763, "step": 13500 }, { "epoch": 0.3428056503915422, "grad_norm": 0.337890625, "learning_rate": 0.0002953646378330275, "loss": 0.4773, "step": 13505 }, { "epoch": 0.3429325684405579, "grad_norm": 0.361328125, "learning_rate": 0.0002953564377871897, "loss": 0.5199, "step": 13510 }, { "epoch": 0.3430594864895737, "grad_norm": 0.34375, "learning_rate": 0.00029534823060876935, "loss": 0.5268, "step": 13515 }, { "epoch": 0.34318640453858945, "grad_norm": 0.365234375, "learning_rate": 0.0002953400162981693, "loss": 0.4962, "step": 13520 }, { "epoch": 0.34331332258760516, "grad_norm": 0.359375, "learning_rate": 0.0002953317948557925, "loss": 0.4802, "step": 13525 }, { "epoch": 0.34344024063662093, "grad_norm": 0.333984375, "learning_rate": 0.00029532356628204237, "loss": 0.4943, "step": 13530 }, { "epoch": 0.3435671586856367, "grad_norm": 0.361328125, "learning_rate": 0.0002953153305773228, "loss": 0.5106, "step": 13535 }, { "epoch": 0.3436940767346524, "grad_norm": 0.373046875, "learning_rate": 0.0002953070877420378, "loss": 0.5019, "step": 13540 }, { "epoch": 0.3438209947836682, "grad_norm": 0.361328125, "learning_rate": 0.0002952988377765919, "loss": 0.5108, "step": 13545 }, { "epoch": 0.34394791283268394, "grad_norm": 0.32421875, "learning_rate": 0.00029529058068138994, "loss": 0.458, "step": 13550 }, { "epoch": 0.3440748308816997, "grad_norm": 0.349609375, "learning_rate": 0.00029528231645683704, "loss": 0.5039, "step": 13555 }, { "epoch": 0.3442017489307154, "grad_norm": 0.31640625, "learning_rate": 0.0002952740451033387, "loss": 0.5023, "step": 13560 }, { "epoch": 0.3443286669797312, "grad_norm": 0.37109375, "learning_rate": 0.0002952657666213009, "loss": 0.4839, "step": 13565 }, { "epoch": 0.34445558502874696, "grad_norm": 0.337890625, "learning_rate": 0.0002952574810111298, "loss": 0.483, "step": 13570 }, { "epoch": 0.34458250307776267, "grad_norm": 0.330078125, "learning_rate": 0.0002952491882732319, "loss": 0.4954, "step": 13575 }, { "epoch": 0.34470942112677844, "grad_norm": 0.37890625, "learning_rate": 0.00029524088840801425, "loss": 0.4944, "step": 13580 }, { "epoch": 0.3448363391757942, "grad_norm": 0.361328125, "learning_rate": 0.000295232581415884, "loss": 0.5284, "step": 13585 }, { "epoch": 0.3449632572248099, "grad_norm": 0.375, "learning_rate": 0.0002952242672972489, "loss": 0.5125, "step": 13590 }, { "epoch": 0.3450901752738257, "grad_norm": 0.34765625, "learning_rate": 0.0002952159460525168, "loss": 0.5235, "step": 13595 }, { "epoch": 0.34521709332284145, "grad_norm": 0.337890625, "learning_rate": 0.00029520761768209614, "loss": 0.5136, "step": 13600 }, { "epoch": 0.3453440113718572, "grad_norm": 0.369140625, "learning_rate": 0.00029519928218639546, "loss": 0.4882, "step": 13605 }, { "epoch": 0.34547092942087293, "grad_norm": 0.345703125, "learning_rate": 0.0002951909395658239, "loss": 0.5092, "step": 13610 }, { "epoch": 0.3455978474698887, "grad_norm": 0.353515625, "learning_rate": 0.00029518258982079074, "loss": 0.4716, "step": 13615 }, { "epoch": 0.34572476551890446, "grad_norm": 0.34765625, "learning_rate": 0.0002951742329517057, "loss": 0.4894, "step": 13620 }, { "epoch": 0.3458516835679202, "grad_norm": 0.33984375, "learning_rate": 0.0002951658689589789, "loss": 0.4818, "step": 13625 }, { "epoch": 0.34597860161693594, "grad_norm": 0.369140625, "learning_rate": 0.0002951574978430207, "loss": 0.5322, "step": 13630 }, { "epoch": 0.3461055196659517, "grad_norm": 0.3203125, "learning_rate": 0.00029514911960424196, "loss": 0.4862, "step": 13635 }, { "epoch": 0.3462324377149674, "grad_norm": 0.318359375, "learning_rate": 0.0002951407342430537, "loss": 0.4868, "step": 13640 }, { "epoch": 0.3463593557639832, "grad_norm": 0.349609375, "learning_rate": 0.00029513234175986746, "loss": 0.5087, "step": 13645 }, { "epoch": 0.34648627381299896, "grad_norm": 0.359375, "learning_rate": 0.000295123942155095, "loss": 0.4795, "step": 13650 }, { "epoch": 0.3466131918620147, "grad_norm": 0.359375, "learning_rate": 0.00029511553542914854, "loss": 0.4891, "step": 13655 }, { "epoch": 0.34674010991103044, "grad_norm": 0.34765625, "learning_rate": 0.00029510712158244053, "loss": 0.495, "step": 13660 }, { "epoch": 0.3468670279600462, "grad_norm": 0.361328125, "learning_rate": 0.0002950987006153839, "loss": 0.4882, "step": 13665 }, { "epoch": 0.34699394600906197, "grad_norm": 0.3359375, "learning_rate": 0.00029509027252839175, "loss": 0.4542, "step": 13670 }, { "epoch": 0.3471208640580777, "grad_norm": 0.37109375, "learning_rate": 0.00029508183732187784, "loss": 0.5271, "step": 13675 }, { "epoch": 0.34724778210709345, "grad_norm": 0.345703125, "learning_rate": 0.00029507339499625584, "loss": 0.509, "step": 13680 }, { "epoch": 0.3473747001561092, "grad_norm": 0.349609375, "learning_rate": 0.00029506494555194023, "loss": 0.4793, "step": 13685 }, { "epoch": 0.34750161820512493, "grad_norm": 30.875, "learning_rate": 0.0002950564889893455, "loss": 0.7118, "step": 13690 }, { "epoch": 0.3476285362541407, "grad_norm": 0.3359375, "learning_rate": 0.0002950480253088866, "loss": 0.4866, "step": 13695 }, { "epoch": 0.34775545430315646, "grad_norm": 0.357421875, "learning_rate": 0.00029503955451097894, "loss": 0.4864, "step": 13700 }, { "epoch": 0.3478823723521722, "grad_norm": 0.34375, "learning_rate": 0.00029503107659603807, "loss": 0.4875, "step": 13705 }, { "epoch": 0.34800929040118794, "grad_norm": 0.35546875, "learning_rate": 0.00029502259156448005, "loss": 0.4845, "step": 13710 }, { "epoch": 0.3481362084502037, "grad_norm": 0.375, "learning_rate": 0.00029501409941672124, "loss": 0.4688, "step": 13715 }, { "epoch": 0.3482631264992195, "grad_norm": 0.38671875, "learning_rate": 0.00029500560015317833, "loss": 0.5007, "step": 13720 }, { "epoch": 0.3483900445482352, "grad_norm": 0.369140625, "learning_rate": 0.0002949970937742684, "loss": 0.5057, "step": 13725 }, { "epoch": 0.34851696259725096, "grad_norm": 0.376953125, "learning_rate": 0.0002949885802804088, "loss": 0.4758, "step": 13730 }, { "epoch": 0.3486438806462667, "grad_norm": 0.34765625, "learning_rate": 0.0002949800596720173, "loss": 0.5107, "step": 13735 }, { "epoch": 0.34877079869528244, "grad_norm": 0.349609375, "learning_rate": 0.0002949715319495121, "loss": 0.5136, "step": 13740 }, { "epoch": 0.3488977167442982, "grad_norm": 0.357421875, "learning_rate": 0.00029496299711331153, "loss": 0.4679, "step": 13745 }, { "epoch": 0.34902463479331397, "grad_norm": 0.3359375, "learning_rate": 0.00029495445516383446, "loss": 0.486, "step": 13750 }, { "epoch": 0.3491515528423297, "grad_norm": 0.3671875, "learning_rate": 0.0002949459061015, "loss": 0.4805, "step": 13755 }, { "epoch": 0.34927847089134545, "grad_norm": 0.384765625, "learning_rate": 0.0002949373499267277, "loss": 0.4904, "step": 13760 }, { "epoch": 0.3494053889403612, "grad_norm": 0.34375, "learning_rate": 0.0002949287866399373, "loss": 0.4786, "step": 13765 }, { "epoch": 0.349532306989377, "grad_norm": 0.3203125, "learning_rate": 0.00029492021624154914, "loss": 0.4579, "step": 13770 }, { "epoch": 0.3496592250383927, "grad_norm": 0.326171875, "learning_rate": 0.00029491163873198367, "loss": 0.4934, "step": 13775 }, { "epoch": 0.34978614308740846, "grad_norm": 0.357421875, "learning_rate": 0.0002949030541116618, "loss": 0.4695, "step": 13780 }, { "epoch": 0.34991306113642423, "grad_norm": 0.3671875, "learning_rate": 0.00029489446238100477, "loss": 0.466, "step": 13785 }, { "epoch": 0.35003997918543994, "grad_norm": 0.341796875, "learning_rate": 0.00029488586354043425, "loss": 0.5107, "step": 13790 }, { "epoch": 0.3501668972344557, "grad_norm": 0.3359375, "learning_rate": 0.0002948772575903721, "loss": 0.4734, "step": 13795 }, { "epoch": 0.3502938152834715, "grad_norm": 0.37890625, "learning_rate": 0.0002948686445312406, "loss": 0.5256, "step": 13800 }, { "epoch": 0.3504207333324872, "grad_norm": 0.376953125, "learning_rate": 0.00029486002436346245, "loss": 0.5188, "step": 13805 }, { "epoch": 0.35054765138150296, "grad_norm": 0.42578125, "learning_rate": 0.00029485139708746057, "loss": 0.4928, "step": 13810 }, { "epoch": 0.3506745694305187, "grad_norm": 0.390625, "learning_rate": 0.0002948427627036584, "loss": 0.4836, "step": 13815 }, { "epoch": 0.3508014874795345, "grad_norm": 0.326171875, "learning_rate": 0.0002948341212124795, "loss": 0.5103, "step": 13820 }, { "epoch": 0.3509284055285502, "grad_norm": 0.37109375, "learning_rate": 0.00029482547261434796, "loss": 0.5068, "step": 13825 }, { "epoch": 0.35105532357756597, "grad_norm": 0.359375, "learning_rate": 0.0002948168169096882, "loss": 0.5219, "step": 13830 }, { "epoch": 0.35118224162658174, "grad_norm": 0.345703125, "learning_rate": 0.00029480815409892495, "loss": 0.4867, "step": 13835 }, { "epoch": 0.35130915967559745, "grad_norm": 0.3984375, "learning_rate": 0.00029479948418248316, "loss": 0.485, "step": 13840 }, { "epoch": 0.3514360777246132, "grad_norm": 0.3515625, "learning_rate": 0.00029479080716078847, "loss": 0.4808, "step": 13845 }, { "epoch": 0.351562995773629, "grad_norm": 0.333984375, "learning_rate": 0.00029478212303426653, "loss": 0.502, "step": 13850 }, { "epoch": 0.3516899138226447, "grad_norm": 0.357421875, "learning_rate": 0.0002947734318033434, "loss": 0.4973, "step": 13855 }, { "epoch": 0.35181683187166046, "grad_norm": 0.365234375, "learning_rate": 0.00029476473346844574, "loss": 0.5015, "step": 13860 }, { "epoch": 0.35194374992067623, "grad_norm": 0.341796875, "learning_rate": 0.00029475602803000027, "loss": 0.487, "step": 13865 }, { "epoch": 0.352070667969692, "grad_norm": 0.322265625, "learning_rate": 0.0002947473154884341, "loss": 0.4954, "step": 13870 }, { "epoch": 0.3521975860187077, "grad_norm": 0.34765625, "learning_rate": 0.00029473859584417484, "loss": 0.4881, "step": 13875 }, { "epoch": 0.3523245040677235, "grad_norm": 0.326171875, "learning_rate": 0.0002947298690976503, "loss": 0.4794, "step": 13880 }, { "epoch": 0.35245142211673924, "grad_norm": 0.326171875, "learning_rate": 0.00029472113524928883, "loss": 0.4829, "step": 13885 }, { "epoch": 0.35257834016575496, "grad_norm": 0.34765625, "learning_rate": 0.00029471239429951885, "loss": 0.4969, "step": 13890 }, { "epoch": 0.3527052582147707, "grad_norm": 0.361328125, "learning_rate": 0.00029470364624876934, "loss": 0.5375, "step": 13895 }, { "epoch": 0.3528321762637865, "grad_norm": 0.3671875, "learning_rate": 0.00029469489109746954, "loss": 0.4942, "step": 13900 }, { "epoch": 0.3529590943128022, "grad_norm": 0.36328125, "learning_rate": 0.00029468612884604907, "loss": 0.5211, "step": 13905 }, { "epoch": 0.35308601236181797, "grad_norm": 0.3515625, "learning_rate": 0.0002946773594949379, "loss": 0.5047, "step": 13910 }, { "epoch": 0.35321293041083374, "grad_norm": 0.353515625, "learning_rate": 0.0002946685830445663, "loss": 0.5068, "step": 13915 }, { "epoch": 0.35333984845984945, "grad_norm": 0.3203125, "learning_rate": 0.000294659799495365, "loss": 0.4863, "step": 13920 }, { "epoch": 0.3534667665088652, "grad_norm": 0.341796875, "learning_rate": 0.00029465100884776496, "loss": 0.501, "step": 13925 }, { "epoch": 0.353593684557881, "grad_norm": 0.33984375, "learning_rate": 0.0002946422111021975, "loss": 0.5128, "step": 13930 }, { "epoch": 0.35372060260689675, "grad_norm": 0.369140625, "learning_rate": 0.0002946334062590944, "loss": 0.5207, "step": 13935 }, { "epoch": 0.35384752065591246, "grad_norm": 0.34765625, "learning_rate": 0.0002946245943188876, "loss": 0.4861, "step": 13940 }, { "epoch": 0.35397443870492823, "grad_norm": 0.330078125, "learning_rate": 0.0002946157752820097, "loss": 0.4951, "step": 13945 }, { "epoch": 0.354101356753944, "grad_norm": 0.392578125, "learning_rate": 0.0002946069491488932, "loss": 0.5144, "step": 13950 }, { "epoch": 0.3542282748029597, "grad_norm": 0.361328125, "learning_rate": 0.0002945981159199713, "loss": 0.4909, "step": 13955 }, { "epoch": 0.3543551928519755, "grad_norm": 0.33984375, "learning_rate": 0.0002945892755956775, "loss": 0.4856, "step": 13960 }, { "epoch": 0.35448211090099124, "grad_norm": 0.36328125, "learning_rate": 0.00029458042817644554, "loss": 0.5187, "step": 13965 }, { "epoch": 0.35460902895000695, "grad_norm": 0.35546875, "learning_rate": 0.00029457157366270955, "loss": 0.4938, "step": 13970 }, { "epoch": 0.3547359469990227, "grad_norm": 0.38671875, "learning_rate": 0.00029456271205490403, "loss": 0.5143, "step": 13975 }, { "epoch": 0.3548628650480385, "grad_norm": 0.361328125, "learning_rate": 0.0002945538433534638, "loss": 0.4895, "step": 13980 }, { "epoch": 0.35498978309705426, "grad_norm": 0.3359375, "learning_rate": 0.00029454496755882407, "loss": 0.5255, "step": 13985 }, { "epoch": 0.35511670114606997, "grad_norm": 0.35546875, "learning_rate": 0.00029453608467142035, "loss": 0.4925, "step": 13990 }, { "epoch": 0.35524361919508574, "grad_norm": 0.37109375, "learning_rate": 0.0002945271946916886, "loss": 0.5059, "step": 13995 }, { "epoch": 0.3553705372441015, "grad_norm": 0.328125, "learning_rate": 0.0002945182976200649, "loss": 0.5046, "step": 14000 }, { "epoch": 0.3554974552931172, "grad_norm": 0.361328125, "learning_rate": 0.0002945093934569859, "loss": 0.5232, "step": 14005 }, { "epoch": 0.355624373342133, "grad_norm": 0.3359375, "learning_rate": 0.00029450048220288857, "loss": 0.4881, "step": 14010 }, { "epoch": 0.35575129139114875, "grad_norm": 0.349609375, "learning_rate": 0.0002944915638582101, "loss": 0.5275, "step": 14015 }, { "epoch": 0.35587820944016446, "grad_norm": 0.36328125, "learning_rate": 0.0002944826384233882, "loss": 0.5018, "step": 14020 }, { "epoch": 0.35600512748918023, "grad_norm": 0.32421875, "learning_rate": 0.00029447370589886075, "loss": 0.4797, "step": 14025 }, { "epoch": 0.356132045538196, "grad_norm": 0.361328125, "learning_rate": 0.0002944647662850661, "loss": 0.4968, "step": 14030 }, { "epoch": 0.35625896358721176, "grad_norm": 0.345703125, "learning_rate": 0.0002944558195824429, "loss": 0.4575, "step": 14035 }, { "epoch": 0.3563858816362275, "grad_norm": 0.357421875, "learning_rate": 0.0002944468657914302, "loss": 0.5088, "step": 14040 }, { "epoch": 0.35651279968524324, "grad_norm": 0.3515625, "learning_rate": 0.0002944379049124673, "loss": 0.5159, "step": 14045 }, { "epoch": 0.356639717734259, "grad_norm": 0.34765625, "learning_rate": 0.00029442893694599405, "loss": 0.4919, "step": 14050 }, { "epoch": 0.3567666357832747, "grad_norm": 0.35546875, "learning_rate": 0.00029441996189245027, "loss": 0.5066, "step": 14055 }, { "epoch": 0.3568935538322905, "grad_norm": 0.380859375, "learning_rate": 0.0002944109797522766, "loss": 0.5033, "step": 14060 }, { "epoch": 0.35702047188130626, "grad_norm": 0.375, "learning_rate": 0.00029440199052591353, "loss": 0.5049, "step": 14065 }, { "epoch": 0.35714738993032197, "grad_norm": 0.337890625, "learning_rate": 0.0002943929942138024, "loss": 0.4891, "step": 14070 }, { "epoch": 0.35727430797933774, "grad_norm": 0.337890625, "learning_rate": 0.00029438399081638456, "loss": 0.469, "step": 14075 }, { "epoch": 0.3574012260283535, "grad_norm": 0.359375, "learning_rate": 0.00029437498033410184, "loss": 0.4948, "step": 14080 }, { "epoch": 0.35752814407736927, "grad_norm": 0.34765625, "learning_rate": 0.0002943659627673963, "loss": 0.5157, "step": 14085 }, { "epoch": 0.357655062126385, "grad_norm": 0.35546875, "learning_rate": 0.00029435693811671047, "loss": 0.5199, "step": 14090 }, { "epoch": 0.35778198017540075, "grad_norm": 0.349609375, "learning_rate": 0.00029434790638248716, "loss": 0.5077, "step": 14095 }, { "epoch": 0.3579088982244165, "grad_norm": 0.408203125, "learning_rate": 0.0002943388675651696, "loss": 0.5103, "step": 14100 }, { "epoch": 0.35803581627343223, "grad_norm": 0.365234375, "learning_rate": 0.0002943298216652014, "loss": 0.4871, "step": 14105 }, { "epoch": 0.358162734322448, "grad_norm": 0.3515625, "learning_rate": 0.00029432076868302624, "loss": 0.5034, "step": 14110 }, { "epoch": 0.35828965237146376, "grad_norm": 0.3671875, "learning_rate": 0.0002943117086190885, "loss": 0.5103, "step": 14115 }, { "epoch": 0.3584165704204795, "grad_norm": 0.3515625, "learning_rate": 0.0002943026414738327, "loss": 0.507, "step": 14120 }, { "epoch": 0.35854348846949524, "grad_norm": 0.373046875, "learning_rate": 0.0002942935672477037, "loss": 0.5023, "step": 14125 }, { "epoch": 0.358670406518511, "grad_norm": 0.37890625, "learning_rate": 0.0002942844859411469, "loss": 0.5107, "step": 14130 }, { "epoch": 0.3587973245675267, "grad_norm": 0.373046875, "learning_rate": 0.0002942753975546078, "loss": 0.53, "step": 14135 }, { "epoch": 0.3589242426165425, "grad_norm": 0.3515625, "learning_rate": 0.0002942663020885324, "loss": 0.4866, "step": 14140 }, { "epoch": 0.35905116066555826, "grad_norm": 0.34375, "learning_rate": 0.0002942571995433671, "loss": 0.4805, "step": 14145 }, { "epoch": 0.359178078714574, "grad_norm": 0.40234375, "learning_rate": 0.00029424808991955845, "loss": 0.5057, "step": 14150 }, { "epoch": 0.35930499676358973, "grad_norm": 0.37109375, "learning_rate": 0.00029423897321755347, "loss": 0.4818, "step": 14155 }, { "epoch": 0.3594319148126055, "grad_norm": 0.359375, "learning_rate": 0.0002942298494377996, "loss": 0.5053, "step": 14160 }, { "epoch": 0.35955883286162127, "grad_norm": 0.365234375, "learning_rate": 0.00029422071858074437, "loss": 0.5011, "step": 14165 }, { "epoch": 0.359685750910637, "grad_norm": 0.33203125, "learning_rate": 0.00029421158064683596, "loss": 0.5049, "step": 14170 }, { "epoch": 0.35981266895965275, "grad_norm": 0.35546875, "learning_rate": 0.00029420243563652276, "loss": 0.4813, "step": 14175 }, { "epoch": 0.3599395870086685, "grad_norm": 0.34375, "learning_rate": 0.0002941932835502535, "loss": 0.4783, "step": 14180 }, { "epoch": 0.3600665050576842, "grad_norm": 0.3515625, "learning_rate": 0.0002941841243884772, "loss": 0.4785, "step": 14185 }, { "epoch": 0.3601934231067, "grad_norm": 0.328125, "learning_rate": 0.0002941749581516434, "loss": 0.489, "step": 14190 }, { "epoch": 0.36032034115571576, "grad_norm": 0.33984375, "learning_rate": 0.00029416578484020177, "loss": 0.5325, "step": 14195 }, { "epoch": 0.36044725920473153, "grad_norm": 0.3671875, "learning_rate": 0.0002941566044546026, "loss": 0.5131, "step": 14200 }, { "epoch": 0.36057417725374724, "grad_norm": 0.326171875, "learning_rate": 0.0002941474169952962, "loss": 0.484, "step": 14205 }, { "epoch": 0.360701095302763, "grad_norm": 0.32421875, "learning_rate": 0.00029413822246273345, "loss": 0.4828, "step": 14210 }, { "epoch": 0.3608280133517788, "grad_norm": 0.345703125, "learning_rate": 0.0002941290208573656, "loss": 0.5087, "step": 14215 }, { "epoch": 0.3609549314007945, "grad_norm": 0.365234375, "learning_rate": 0.0002941198121796441, "loss": 0.514, "step": 14220 }, { "epoch": 0.36108184944981025, "grad_norm": 0.337890625, "learning_rate": 0.0002941105964300208, "loss": 0.4628, "step": 14225 }, { "epoch": 0.361208767498826, "grad_norm": 0.357421875, "learning_rate": 0.00029410137360894793, "loss": 0.5068, "step": 14230 }, { "epoch": 0.36133568554784173, "grad_norm": 0.369140625, "learning_rate": 0.0002940921437168781, "loss": 0.5025, "step": 14235 }, { "epoch": 0.3614626035968575, "grad_norm": 0.390625, "learning_rate": 0.0002940829067542641, "loss": 0.5056, "step": 14240 }, { "epoch": 0.36158952164587327, "grad_norm": 0.34375, "learning_rate": 0.0002940736627215593, "loss": 0.4703, "step": 14245 }, { "epoch": 0.36171643969488904, "grad_norm": 0.357421875, "learning_rate": 0.0002940644116192173, "loss": 0.4922, "step": 14250 }, { "epoch": 0.36184335774390475, "grad_norm": 0.361328125, "learning_rate": 0.00029405515344769204, "loss": 0.4961, "step": 14255 }, { "epoch": 0.3619702757929205, "grad_norm": 0.373046875, "learning_rate": 0.00029404588820743766, "loss": 0.4752, "step": 14260 }, { "epoch": 0.3620971938419363, "grad_norm": 0.361328125, "learning_rate": 0.00029403661589890904, "loss": 0.5216, "step": 14265 }, { "epoch": 0.362224111890952, "grad_norm": 0.40234375, "learning_rate": 0.00029402733652256094, "loss": 0.52, "step": 14270 }, { "epoch": 0.36235102993996776, "grad_norm": 0.3515625, "learning_rate": 0.0002940180500788489, "loss": 0.4896, "step": 14275 }, { "epoch": 0.36247794798898353, "grad_norm": 0.34765625, "learning_rate": 0.0002940087565682285, "loss": 0.465, "step": 14280 }, { "epoch": 0.36260486603799924, "grad_norm": 0.3671875, "learning_rate": 0.0002939994559911558, "loss": 0.499, "step": 14285 }, { "epoch": 0.362731784087015, "grad_norm": 0.38671875, "learning_rate": 0.00029399014834808707, "loss": 0.531, "step": 14290 }, { "epoch": 0.3628587021360308, "grad_norm": 0.34375, "learning_rate": 0.00029398083363947917, "loss": 0.5068, "step": 14295 }, { "epoch": 0.36298562018504654, "grad_norm": 0.365234375, "learning_rate": 0.00029397151186578914, "loss": 0.5051, "step": 14300 }, { "epoch": 0.36311253823406225, "grad_norm": 0.353515625, "learning_rate": 0.0002939621830274743, "loss": 0.4846, "step": 14305 }, { "epoch": 0.363239456283078, "grad_norm": 0.34765625, "learning_rate": 0.00029395284712499254, "loss": 0.4754, "step": 14310 }, { "epoch": 0.3633663743320938, "grad_norm": 0.361328125, "learning_rate": 0.0002939435041588019, "loss": 0.4996, "step": 14315 }, { "epoch": 0.3634932923811095, "grad_norm": 0.296875, "learning_rate": 0.0002939341541293609, "loss": 0.4492, "step": 14320 }, { "epoch": 0.36362021043012527, "grad_norm": 0.365234375, "learning_rate": 0.00029392479703712823, "loss": 0.4985, "step": 14325 }, { "epoch": 0.36374712847914104, "grad_norm": 0.333984375, "learning_rate": 0.0002939154328825631, "loss": 0.4701, "step": 14330 }, { "epoch": 0.36387404652815675, "grad_norm": 0.337890625, "learning_rate": 0.00029390606166612503, "loss": 0.4565, "step": 14335 }, { "epoch": 0.3640009645771725, "grad_norm": 0.32421875, "learning_rate": 0.00029389668338827385, "loss": 0.4692, "step": 14340 }, { "epoch": 0.3641278826261883, "grad_norm": 0.353515625, "learning_rate": 0.00029388729804946965, "loss": 0.4646, "step": 14345 }, { "epoch": 0.364254800675204, "grad_norm": 0.33984375, "learning_rate": 0.00029387790565017313, "loss": 0.4757, "step": 14350 }, { "epoch": 0.36438171872421976, "grad_norm": 0.375, "learning_rate": 0.00029386850619084507, "loss": 0.4955, "step": 14355 }, { "epoch": 0.36450863677323553, "grad_norm": 0.3828125, "learning_rate": 0.00029385909967194674, "loss": 0.4743, "step": 14360 }, { "epoch": 0.3646355548222513, "grad_norm": 0.37890625, "learning_rate": 0.0002938496860939397, "loss": 0.5192, "step": 14365 }, { "epoch": 0.364762472871267, "grad_norm": 0.337890625, "learning_rate": 0.00029384026545728584, "loss": 0.4862, "step": 14370 }, { "epoch": 0.3648893909202828, "grad_norm": 0.349609375, "learning_rate": 0.0002938308377624475, "loss": 0.4686, "step": 14375 }, { "epoch": 0.36501630896929854, "grad_norm": 0.341796875, "learning_rate": 0.00029382140300988717, "loss": 0.4927, "step": 14380 }, { "epoch": 0.36514322701831425, "grad_norm": 0.326171875, "learning_rate": 0.00029381196120006794, "loss": 0.4805, "step": 14385 }, { "epoch": 0.36527014506733, "grad_norm": 0.3515625, "learning_rate": 0.000293802512333453, "loss": 0.55, "step": 14390 }, { "epoch": 0.3653970631163458, "grad_norm": 0.3125, "learning_rate": 0.00029379305641050613, "loss": 0.4881, "step": 14395 }, { "epoch": 0.3655239811653615, "grad_norm": 0.341796875, "learning_rate": 0.0002937835934316912, "loss": 0.4776, "step": 14400 }, { "epoch": 0.36565089921437727, "grad_norm": 0.349609375, "learning_rate": 0.00029377412339747264, "loss": 0.5169, "step": 14405 }, { "epoch": 0.36577781726339303, "grad_norm": 0.365234375, "learning_rate": 0.00029376464630831515, "loss": 0.5033, "step": 14410 }, { "epoch": 0.3659047353124088, "grad_norm": 0.36328125, "learning_rate": 0.00029375516216468374, "loss": 0.497, "step": 14415 }, { "epoch": 0.3660316533614245, "grad_norm": 0.37109375, "learning_rate": 0.0002937456709670437, "loss": 0.4902, "step": 14420 }, { "epoch": 0.3661585714104403, "grad_norm": 0.380859375, "learning_rate": 0.00029373617271586094, "loss": 0.4934, "step": 14425 }, { "epoch": 0.36628548945945605, "grad_norm": 0.345703125, "learning_rate": 0.00029372666741160137, "loss": 0.4814, "step": 14430 }, { "epoch": 0.36641240750847176, "grad_norm": 0.345703125, "learning_rate": 0.0002937171550547315, "loss": 0.487, "step": 14435 }, { "epoch": 0.3665393255574875, "grad_norm": 0.33984375, "learning_rate": 0.0002937076356457181, "loss": 0.5047, "step": 14440 }, { "epoch": 0.3666662436065033, "grad_norm": 0.330078125, "learning_rate": 0.00029369810918502827, "loss": 0.4787, "step": 14445 }, { "epoch": 0.366793161655519, "grad_norm": 0.38671875, "learning_rate": 0.0002936885756731294, "loss": 0.4865, "step": 14450 }, { "epoch": 0.3669200797045348, "grad_norm": 0.34765625, "learning_rate": 0.0002936790351104894, "loss": 0.5127, "step": 14455 }, { "epoch": 0.36704699775355054, "grad_norm": 0.326171875, "learning_rate": 0.0002936694874975764, "loss": 0.4963, "step": 14460 }, { "epoch": 0.3671739158025663, "grad_norm": 0.349609375, "learning_rate": 0.0002936599328348589, "loss": 0.51, "step": 14465 }, { "epoch": 0.367300833851582, "grad_norm": 0.341796875, "learning_rate": 0.0002936503711228056, "loss": 0.5076, "step": 14470 }, { "epoch": 0.3674277519005978, "grad_norm": 0.349609375, "learning_rate": 0.00029364080236188593, "loss": 0.4835, "step": 14475 }, { "epoch": 0.36755466994961355, "grad_norm": 0.359375, "learning_rate": 0.0002936312265525693, "loss": 0.4609, "step": 14480 }, { "epoch": 0.36768158799862927, "grad_norm": 0.349609375, "learning_rate": 0.00029362164369532554, "loss": 0.4922, "step": 14485 }, { "epoch": 0.36780850604764503, "grad_norm": 0.34765625, "learning_rate": 0.00029361205379062496, "loss": 0.4456, "step": 14490 }, { "epoch": 0.3679354240966608, "grad_norm": 0.326171875, "learning_rate": 0.0002936024568389381, "loss": 0.486, "step": 14495 }, { "epoch": 0.3680623421456765, "grad_norm": 0.380859375, "learning_rate": 0.00029359285284073593, "loss": 0.5166, "step": 14500 }, { "epoch": 0.3681892601946923, "grad_norm": 0.365234375, "learning_rate": 0.0002935832417964896, "loss": 0.4788, "step": 14505 }, { "epoch": 0.36831617824370805, "grad_norm": 0.330078125, "learning_rate": 0.0002935736237066708, "loss": 0.4922, "step": 14510 }, { "epoch": 0.3684430962927238, "grad_norm": 0.380859375, "learning_rate": 0.00029356399857175154, "loss": 0.511, "step": 14515 }, { "epoch": 0.3685700143417395, "grad_norm": 0.373046875, "learning_rate": 0.000293554366392204, "loss": 0.5127, "step": 14520 }, { "epoch": 0.3686969323907553, "grad_norm": 0.361328125, "learning_rate": 0.00029354472716850094, "loss": 0.4651, "step": 14525 }, { "epoch": 0.36882385043977106, "grad_norm": 0.359375, "learning_rate": 0.0002935350809011153, "loss": 0.479, "step": 14530 }, { "epoch": 0.3689507684887868, "grad_norm": 0.34375, "learning_rate": 0.00029352542759052033, "loss": 0.475, "step": 14535 }, { "epoch": 0.36907768653780254, "grad_norm": 0.36328125, "learning_rate": 0.00029351576723718985, "loss": 0.5066, "step": 14540 }, { "epoch": 0.3692046045868183, "grad_norm": 0.373046875, "learning_rate": 0.00029350609984159784, "loss": 0.4987, "step": 14545 }, { "epoch": 0.369331522635834, "grad_norm": 0.337890625, "learning_rate": 0.00029349642540421866, "loss": 0.5073, "step": 14550 }, { "epoch": 0.3694584406848498, "grad_norm": 0.349609375, "learning_rate": 0.00029348674392552706, "loss": 0.4866, "step": 14555 }, { "epoch": 0.36958535873386555, "grad_norm": 0.349609375, "learning_rate": 0.0002934770554059981, "loss": 0.49, "step": 14560 }, { "epoch": 0.36971227678288127, "grad_norm": 0.349609375, "learning_rate": 0.0002934673598461072, "loss": 0.4707, "step": 14565 }, { "epoch": 0.36983919483189703, "grad_norm": 0.3359375, "learning_rate": 0.00029345765724633, "loss": 0.4737, "step": 14570 }, { "epoch": 0.3699661128809128, "grad_norm": 0.37109375, "learning_rate": 0.00029344794760714283, "loss": 0.4935, "step": 14575 }, { "epoch": 0.37009303092992857, "grad_norm": 0.388671875, "learning_rate": 0.000293438230929022, "loss": 0.5204, "step": 14580 }, { "epoch": 0.3702199489789443, "grad_norm": 0.3359375, "learning_rate": 0.00029342850721244423, "loss": 0.4499, "step": 14585 }, { "epoch": 0.37034686702796005, "grad_norm": 0.37109375, "learning_rate": 0.0002934187764578868, "loss": 0.4941, "step": 14590 }, { "epoch": 0.3704737850769758, "grad_norm": 0.35546875, "learning_rate": 0.0002934090386658271, "loss": 0.4823, "step": 14595 }, { "epoch": 0.3706007031259915, "grad_norm": 0.353515625, "learning_rate": 0.0002933992938367431, "loss": 0.5295, "step": 14600 }, { "epoch": 0.3707276211750073, "grad_norm": 0.345703125, "learning_rate": 0.00029338954197111276, "loss": 0.4639, "step": 14605 }, { "epoch": 0.37085453922402306, "grad_norm": 1.4609375, "learning_rate": 0.00029337978306941473, "loss": 0.5056, "step": 14610 }, { "epoch": 0.3709814572730388, "grad_norm": 0.369140625, "learning_rate": 0.0002933700171321279, "loss": 0.518, "step": 14615 }, { "epoch": 0.37110837532205454, "grad_norm": 0.36328125, "learning_rate": 0.00029336024415973144, "loss": 0.4836, "step": 14620 }, { "epoch": 0.3712352933710703, "grad_norm": 0.35546875, "learning_rate": 0.0002933504641527049, "loss": 0.4769, "step": 14625 }, { "epoch": 0.3713622114200861, "grad_norm": 0.35546875, "learning_rate": 0.00029334067711152815, "loss": 0.4778, "step": 14630 }, { "epoch": 0.3714891294691018, "grad_norm": 0.349609375, "learning_rate": 0.0002933308830366815, "loss": 0.5229, "step": 14635 }, { "epoch": 0.37161604751811755, "grad_norm": 0.353515625, "learning_rate": 0.00029332108192864554, "loss": 0.4883, "step": 14640 }, { "epoch": 0.3717429655671333, "grad_norm": 0.373046875, "learning_rate": 0.00029331127378790113, "loss": 0.4908, "step": 14645 }, { "epoch": 0.37186988361614903, "grad_norm": 0.333984375, "learning_rate": 0.0002933014586149296, "loss": 0.4964, "step": 14650 }, { "epoch": 0.3719968016651648, "grad_norm": 0.330078125, "learning_rate": 0.00029329163641021266, "loss": 0.4912, "step": 14655 }, { "epoch": 0.37212371971418057, "grad_norm": 0.361328125, "learning_rate": 0.00029328180717423213, "loss": 0.4786, "step": 14660 }, { "epoch": 0.3722506377631963, "grad_norm": 0.330078125, "learning_rate": 0.0002932719709074704, "loss": 0.456, "step": 14665 }, { "epoch": 0.37237755581221205, "grad_norm": 0.34375, "learning_rate": 0.0002932621276104101, "loss": 0.4449, "step": 14670 }, { "epoch": 0.3725044738612278, "grad_norm": 0.3515625, "learning_rate": 0.0002932522772835343, "loss": 0.5021, "step": 14675 }, { "epoch": 0.3726313919102436, "grad_norm": 0.34375, "learning_rate": 0.0002932424199273264, "loss": 0.4683, "step": 14680 }, { "epoch": 0.3727583099592593, "grad_norm": 0.353515625, "learning_rate": 0.0002932325555422699, "loss": 0.504, "step": 14685 }, { "epoch": 0.37288522800827506, "grad_norm": 0.349609375, "learning_rate": 0.000293222684128849, "loss": 0.4748, "step": 14690 }, { "epoch": 0.3730121460572908, "grad_norm": 0.357421875, "learning_rate": 0.00029321280568754803, "loss": 0.4921, "step": 14695 }, { "epoch": 0.37313906410630654, "grad_norm": 0.34765625, "learning_rate": 0.0002932029202188517, "loss": 0.4711, "step": 14700 }, { "epoch": 0.3732659821553223, "grad_norm": 0.361328125, "learning_rate": 0.00029319302772324516, "loss": 0.4767, "step": 14705 }, { "epoch": 0.3733929002043381, "grad_norm": 0.345703125, "learning_rate": 0.0002931831282012138, "loss": 0.5045, "step": 14710 }, { "epoch": 0.3735198182533538, "grad_norm": 0.37890625, "learning_rate": 0.0002931732216532433, "loss": 0.4876, "step": 14715 }, { "epoch": 0.37364673630236955, "grad_norm": 0.337890625, "learning_rate": 0.00029316330807981994, "loss": 0.4962, "step": 14720 }, { "epoch": 0.3737736543513853, "grad_norm": 0.337890625, "learning_rate": 0.00029315338748143, "loss": 0.4581, "step": 14725 }, { "epoch": 0.3739005724004011, "grad_norm": 0.384765625, "learning_rate": 0.00029314345985856046, "loss": 0.5107, "step": 14730 }, { "epoch": 0.3740274904494168, "grad_norm": 0.33984375, "learning_rate": 0.0002931335252116983, "loss": 0.4835, "step": 14735 }, { "epoch": 0.37415440849843257, "grad_norm": 0.361328125, "learning_rate": 0.0002931235835413311, "loss": 0.4928, "step": 14740 }, { "epoch": 0.37428132654744833, "grad_norm": 0.33984375, "learning_rate": 0.00029311363484794663, "loss": 0.4977, "step": 14745 }, { "epoch": 0.37440824459646405, "grad_norm": 0.326171875, "learning_rate": 0.0002931036791320331, "loss": 0.4718, "step": 14750 }, { "epoch": 0.3745351626454798, "grad_norm": 0.34375, "learning_rate": 0.00029309371639407906, "loss": 0.4862, "step": 14755 }, { "epoch": 0.3746620806944956, "grad_norm": 0.333984375, "learning_rate": 0.00029308374663457337, "loss": 0.466, "step": 14760 }, { "epoch": 0.3747889987435113, "grad_norm": 0.345703125, "learning_rate": 0.00029307376985400523, "loss": 0.5204, "step": 14765 }, { "epoch": 0.37491591679252706, "grad_norm": 0.34765625, "learning_rate": 0.00029306378605286417, "loss": 0.4814, "step": 14770 }, { "epoch": 0.3750428348415428, "grad_norm": 0.3359375, "learning_rate": 0.00029305379523164013, "loss": 0.4629, "step": 14775 }, { "epoch": 0.37516975289055854, "grad_norm": 0.33203125, "learning_rate": 0.00029304379739082334, "loss": 0.4671, "step": 14780 }, { "epoch": 0.3752966709395743, "grad_norm": 0.349609375, "learning_rate": 0.0002930337925309044, "loss": 0.4908, "step": 14785 }, { "epoch": 0.3754235889885901, "grad_norm": 0.359375, "learning_rate": 0.00029302378065237424, "loss": 0.4903, "step": 14790 }, { "epoch": 0.37555050703760584, "grad_norm": 0.36328125, "learning_rate": 0.00029301376175572415, "loss": 0.4778, "step": 14795 }, { "epoch": 0.37567742508662155, "grad_norm": 0.3515625, "learning_rate": 0.0002930037358414457, "loss": 0.4883, "step": 14800 }, { "epoch": 0.3758043431356373, "grad_norm": 0.337890625, "learning_rate": 0.00029299370291003087, "loss": 0.5036, "step": 14805 }, { "epoch": 0.3759312611846531, "grad_norm": 0.375, "learning_rate": 0.0002929836629619721, "loss": 0.4962, "step": 14810 }, { "epoch": 0.3760581792336688, "grad_norm": 0.392578125, "learning_rate": 0.00029297361599776183, "loss": 0.4975, "step": 14815 }, { "epoch": 0.37618509728268457, "grad_norm": 0.341796875, "learning_rate": 0.00029296356201789325, "loss": 0.487, "step": 14820 }, { "epoch": 0.37631201533170033, "grad_norm": 0.34765625, "learning_rate": 0.0002929535010228596, "loss": 0.4976, "step": 14825 }, { "epoch": 0.37643893338071605, "grad_norm": 0.337890625, "learning_rate": 0.0002929434330131546, "loss": 0.4864, "step": 14830 }, { "epoch": 0.3765658514297318, "grad_norm": 0.34375, "learning_rate": 0.00029293335798927234, "loss": 0.4858, "step": 14835 }, { "epoch": 0.3766927694787476, "grad_norm": 0.373046875, "learning_rate": 0.00029292327595170707, "loss": 0.5054, "step": 14840 }, { "epoch": 0.37681968752776335, "grad_norm": 0.361328125, "learning_rate": 0.0002929131869009536, "loss": 0.4937, "step": 14845 }, { "epoch": 0.37694660557677906, "grad_norm": 0.330078125, "learning_rate": 0.00029290309083750696, "loss": 0.4646, "step": 14850 }, { "epoch": 0.3770735236257948, "grad_norm": 0.33984375, "learning_rate": 0.0002928929877618626, "loss": 0.5052, "step": 14855 }, { "epoch": 0.3772004416748106, "grad_norm": 0.341796875, "learning_rate": 0.00029288287767451624, "loss": 0.4885, "step": 14860 }, { "epoch": 0.3773273597238263, "grad_norm": 0.361328125, "learning_rate": 0.00029287276057596404, "loss": 0.5011, "step": 14865 }, { "epoch": 0.3774542777728421, "grad_norm": 0.349609375, "learning_rate": 0.00029286263646670233, "loss": 0.4941, "step": 14870 }, { "epoch": 0.37758119582185784, "grad_norm": 0.34375, "learning_rate": 0.00029285250534722795, "loss": 0.4732, "step": 14875 }, { "epoch": 0.37770811387087355, "grad_norm": 0.365234375, "learning_rate": 0.00029284236721803807, "loss": 0.5163, "step": 14880 }, { "epoch": 0.3778350319198893, "grad_norm": 0.33203125, "learning_rate": 0.0002928322220796301, "loss": 0.4812, "step": 14885 }, { "epoch": 0.3779619499689051, "grad_norm": 0.34375, "learning_rate": 0.0002928220699325019, "loss": 0.4753, "step": 14890 }, { "epoch": 0.37808886801792085, "grad_norm": 0.369140625, "learning_rate": 0.0002928119107771517, "loss": 0.5092, "step": 14895 }, { "epoch": 0.37821578606693657, "grad_norm": 0.36328125, "learning_rate": 0.0002928017446140778, "loss": 0.5015, "step": 14900 }, { "epoch": 0.37834270411595233, "grad_norm": 0.341796875, "learning_rate": 0.0002927915714437792, "loss": 0.4776, "step": 14905 }, { "epoch": 0.3784696221649681, "grad_norm": 0.3359375, "learning_rate": 0.00029278139126675515, "loss": 0.4713, "step": 14910 }, { "epoch": 0.3785965402139838, "grad_norm": 0.345703125, "learning_rate": 0.000292771204083505, "loss": 0.4876, "step": 14915 }, { "epoch": 0.3787234582629996, "grad_norm": 0.35546875, "learning_rate": 0.0002927610098945288, "loss": 0.4594, "step": 14920 }, { "epoch": 0.37885037631201535, "grad_norm": 0.345703125, "learning_rate": 0.00029275080870032674, "loss": 0.4839, "step": 14925 }, { "epoch": 0.37897729436103106, "grad_norm": 0.34765625, "learning_rate": 0.0002927406005013993, "loss": 0.4929, "step": 14930 }, { "epoch": 0.3791042124100468, "grad_norm": 0.40625, "learning_rate": 0.00029273038529824746, "loss": 0.5078, "step": 14935 }, { "epoch": 0.3792311304590626, "grad_norm": 0.390625, "learning_rate": 0.0002927201630913725, "loss": 0.5146, "step": 14940 }, { "epoch": 0.37935804850807836, "grad_norm": 0.357421875, "learning_rate": 0.000292709933881276, "loss": 0.5136, "step": 14945 }, { "epoch": 0.37948496655709407, "grad_norm": 0.3359375, "learning_rate": 0.0002926996976684599, "loss": 0.4743, "step": 14950 }, { "epoch": 0.37961188460610984, "grad_norm": 0.333984375, "learning_rate": 0.0002926894544534264, "loss": 0.4904, "step": 14955 }, { "epoch": 0.3797388026551256, "grad_norm": 0.328125, "learning_rate": 0.0002926792042366783, "loss": 0.4685, "step": 14960 }, { "epoch": 0.3798657207041413, "grad_norm": 0.3515625, "learning_rate": 0.00029266894701871843, "loss": 0.4978, "step": 14965 }, { "epoch": 0.3799926387531571, "grad_norm": 0.34765625, "learning_rate": 0.0002926586828000501, "loss": 0.4789, "step": 14970 }, { "epoch": 0.38011955680217285, "grad_norm": 0.38671875, "learning_rate": 0.00029264841158117713, "loss": 0.5195, "step": 14975 }, { "epoch": 0.38024647485118857, "grad_norm": 0.3515625, "learning_rate": 0.0002926381333626034, "loss": 0.5146, "step": 14980 }, { "epoch": 0.38037339290020433, "grad_norm": 0.345703125, "learning_rate": 0.0002926278481448333, "loss": 0.4844, "step": 14985 }, { "epoch": 0.3805003109492201, "grad_norm": 0.34765625, "learning_rate": 0.0002926175559283715, "loss": 0.4473, "step": 14990 }, { "epoch": 0.3806272289982358, "grad_norm": 0.36328125, "learning_rate": 0.000292607256713723, "loss": 0.4684, "step": 14995 }, { "epoch": 0.3807541470472516, "grad_norm": 0.337890625, "learning_rate": 0.0002925969505013932, "loss": 0.4887, "step": 15000 }, { "epoch": 0.38088106509626735, "grad_norm": 0.359375, "learning_rate": 0.00029258663729188794, "loss": 0.5076, "step": 15005 }, { "epoch": 0.3810079831452831, "grad_norm": 0.361328125, "learning_rate": 0.0002925763170857131, "loss": 0.4805, "step": 15010 }, { "epoch": 0.3811349011942988, "grad_norm": 0.369140625, "learning_rate": 0.00029256598988337525, "loss": 0.4924, "step": 15015 }, { "epoch": 0.3812618192433146, "grad_norm": 0.345703125, "learning_rate": 0.000292555655685381, "loss": 0.5029, "step": 15020 }, { "epoch": 0.38138873729233036, "grad_norm": 0.36328125, "learning_rate": 0.00029254531449223755, "loss": 0.4974, "step": 15025 }, { "epoch": 0.38151565534134607, "grad_norm": 0.271484375, "learning_rate": 0.00029253496630445225, "loss": 0.4444, "step": 15030 }, { "epoch": 0.38164257339036184, "grad_norm": 0.337890625, "learning_rate": 0.000292524611122533, "loss": 0.4886, "step": 15035 }, { "epoch": 0.3817694914393776, "grad_norm": 0.34375, "learning_rate": 0.0002925142489469878, "loss": 0.5103, "step": 15040 }, { "epoch": 0.3818964094883933, "grad_norm": 0.341796875, "learning_rate": 0.0002925038797783252, "loss": 0.4627, "step": 15045 }, { "epoch": 0.3820233275374091, "grad_norm": 0.357421875, "learning_rate": 0.000292493503617054, "loss": 0.5112, "step": 15050 }, { "epoch": 0.38215024558642485, "grad_norm": 0.359375, "learning_rate": 0.0002924831204636833, "loss": 0.4742, "step": 15055 }, { "epoch": 0.3822771636354406, "grad_norm": 0.388671875, "learning_rate": 0.0002924727303187227, "loss": 0.4968, "step": 15060 }, { "epoch": 0.38240408168445633, "grad_norm": 0.37109375, "learning_rate": 0.0002924623331826819, "loss": 0.4894, "step": 15065 }, { "epoch": 0.3825309997334721, "grad_norm": 0.345703125, "learning_rate": 0.00029245192905607117, "loss": 0.4943, "step": 15070 }, { "epoch": 0.38265791778248787, "grad_norm": 0.359375, "learning_rate": 0.00029244151793940106, "loss": 0.4833, "step": 15075 }, { "epoch": 0.3827848358315036, "grad_norm": 0.3671875, "learning_rate": 0.0002924310998331824, "loss": 0.4952, "step": 15080 }, { "epoch": 0.38291175388051935, "grad_norm": 0.36328125, "learning_rate": 0.0002924206747379264, "loss": 0.5237, "step": 15085 }, { "epoch": 0.3830386719295351, "grad_norm": 0.373046875, "learning_rate": 0.0002924102426541446, "loss": 0.532, "step": 15090 }, { "epoch": 0.3831655899785508, "grad_norm": 0.345703125, "learning_rate": 0.00029239980358234893, "loss": 0.4753, "step": 15095 }, { "epoch": 0.3832925080275666, "grad_norm": 0.365234375, "learning_rate": 0.00029238935752305163, "loss": 0.4885, "step": 15100 }, { "epoch": 0.38341942607658236, "grad_norm": 0.3359375, "learning_rate": 0.0002923789044767653, "loss": 0.4804, "step": 15105 }, { "epoch": 0.3835463441255981, "grad_norm": 0.359375, "learning_rate": 0.00029236844444400286, "loss": 0.4801, "step": 15110 }, { "epoch": 0.38367326217461384, "grad_norm": 0.345703125, "learning_rate": 0.0002923579774252775, "loss": 0.5004, "step": 15115 }, { "epoch": 0.3838001802236296, "grad_norm": 0.34765625, "learning_rate": 0.0002923475034211029, "loss": 0.4775, "step": 15120 }, { "epoch": 0.3839270982726454, "grad_norm": 0.357421875, "learning_rate": 0.00029233702243199305, "loss": 0.4857, "step": 15125 }, { "epoch": 0.3840540163216611, "grad_norm": 0.34375, "learning_rate": 0.00029232653445846217, "loss": 0.4843, "step": 15130 }, { "epoch": 0.38418093437067685, "grad_norm": 0.298828125, "learning_rate": 0.000292316039501025, "loss": 0.4814, "step": 15135 }, { "epoch": 0.3843078524196926, "grad_norm": 0.3203125, "learning_rate": 0.00029230553756019636, "loss": 0.4645, "step": 15140 }, { "epoch": 0.38443477046870833, "grad_norm": 0.333984375, "learning_rate": 0.00029229502863649176, "loss": 0.46, "step": 15145 }, { "epoch": 0.3845616885177241, "grad_norm": 0.36328125, "learning_rate": 0.00029228451273042677, "loss": 0.5061, "step": 15150 }, { "epoch": 0.38468860656673987, "grad_norm": 0.3671875, "learning_rate": 0.00029227398984251737, "loss": 0.5061, "step": 15155 }, { "epoch": 0.38481552461575563, "grad_norm": 0.349609375, "learning_rate": 0.00029226345997328, "loss": 0.513, "step": 15160 }, { "epoch": 0.38494244266477134, "grad_norm": 0.3671875, "learning_rate": 0.00029225292312323135, "loss": 0.4711, "step": 15165 }, { "epoch": 0.3850693607137871, "grad_norm": 0.3359375, "learning_rate": 0.0002922423792928884, "loss": 0.4721, "step": 15170 }, { "epoch": 0.3851962787628029, "grad_norm": 0.345703125, "learning_rate": 0.00029223182848276856, "loss": 0.4648, "step": 15175 }, { "epoch": 0.3853231968118186, "grad_norm": 0.3515625, "learning_rate": 0.00029222127069338953, "loss": 0.5137, "step": 15180 }, { "epoch": 0.38545011486083436, "grad_norm": 0.35546875, "learning_rate": 0.0002922107059252694, "loss": 0.5139, "step": 15185 }, { "epoch": 0.3855770329098501, "grad_norm": 0.373046875, "learning_rate": 0.0002922001341789266, "loss": 0.4704, "step": 15190 }, { "epoch": 0.38570395095886584, "grad_norm": 0.361328125, "learning_rate": 0.0002921895554548799, "loss": 0.5107, "step": 15195 }, { "epoch": 0.3858308690078816, "grad_norm": 0.30859375, "learning_rate": 0.0002921789697536483, "loss": 0.4837, "step": 15200 }, { "epoch": 0.3859577870568974, "grad_norm": 0.361328125, "learning_rate": 0.0002921683770757513, "loss": 0.4933, "step": 15205 }, { "epoch": 0.38608470510591314, "grad_norm": 0.345703125, "learning_rate": 0.00029215777742170863, "loss": 0.477, "step": 15210 }, { "epoch": 0.38621162315492885, "grad_norm": 0.34765625, "learning_rate": 0.00029214717079204054, "loss": 0.4747, "step": 15215 }, { "epoch": 0.3863385412039446, "grad_norm": 0.376953125, "learning_rate": 0.0002921365571872673, "loss": 0.5018, "step": 15220 }, { "epoch": 0.3864654592529604, "grad_norm": 0.32421875, "learning_rate": 0.0002921259366079099, "loss": 0.4654, "step": 15225 }, { "epoch": 0.3865923773019761, "grad_norm": 0.33203125, "learning_rate": 0.0002921153090544894, "loss": 0.465, "step": 15230 }, { "epoch": 0.38671929535099187, "grad_norm": 0.326171875, "learning_rate": 0.00029210467452752727, "loss": 0.4695, "step": 15235 }, { "epoch": 0.38684621340000763, "grad_norm": 0.361328125, "learning_rate": 0.00029209403302754534, "loss": 0.4942, "step": 15240 }, { "epoch": 0.38697313144902334, "grad_norm": 0.359375, "learning_rate": 0.00029208338455506585, "loss": 0.4731, "step": 15245 }, { "epoch": 0.3871000494980391, "grad_norm": 0.328125, "learning_rate": 0.00029207272911061123, "loss": 0.4974, "step": 15250 }, { "epoch": 0.3872269675470549, "grad_norm": 0.357421875, "learning_rate": 0.00029206206669470444, "loss": 0.4922, "step": 15255 }, { "epoch": 0.3873538855960706, "grad_norm": 0.357421875, "learning_rate": 0.0002920513973078686, "loss": 0.4598, "step": 15260 }, { "epoch": 0.38748080364508636, "grad_norm": 0.36328125, "learning_rate": 0.0002920407209506273, "loss": 0.4985, "step": 15265 }, { "epoch": 0.3876077216941021, "grad_norm": 0.341796875, "learning_rate": 0.0002920300376235044, "loss": 0.4742, "step": 15270 }, { "epoch": 0.3877346397431179, "grad_norm": 0.33984375, "learning_rate": 0.0002920193473270241, "loss": 0.4494, "step": 15275 }, { "epoch": 0.3878615577921336, "grad_norm": 0.349609375, "learning_rate": 0.000292008650061711, "loss": 0.4765, "step": 15280 }, { "epoch": 0.38798847584114937, "grad_norm": 0.3359375, "learning_rate": 0.00029199794582809006, "loss": 0.4607, "step": 15285 }, { "epoch": 0.38811539389016514, "grad_norm": 0.359375, "learning_rate": 0.00029198723462668647, "loss": 0.4859, "step": 15290 }, { "epoch": 0.38824231193918085, "grad_norm": 0.32421875, "learning_rate": 0.00029197651645802576, "loss": 0.4512, "step": 15295 }, { "epoch": 0.3883692299881966, "grad_norm": 0.337890625, "learning_rate": 0.00029196579132263403, "loss": 0.4997, "step": 15300 }, { "epoch": 0.3884961480372124, "grad_norm": 0.3515625, "learning_rate": 0.0002919550592210374, "loss": 0.5075, "step": 15305 }, { "epoch": 0.3886230660862281, "grad_norm": 0.3828125, "learning_rate": 0.00029194432015376264, "loss": 0.5008, "step": 15310 }, { "epoch": 0.38874998413524386, "grad_norm": 0.33984375, "learning_rate": 0.0002919335741213366, "loss": 0.4923, "step": 15315 }, { "epoch": 0.38887690218425963, "grad_norm": 0.380859375, "learning_rate": 0.0002919228211242866, "loss": 0.5215, "step": 15320 }, { "epoch": 0.3890038202332754, "grad_norm": 0.32421875, "learning_rate": 0.0002919120611631403, "loss": 0.4885, "step": 15325 }, { "epoch": 0.3891307382822911, "grad_norm": 0.35546875, "learning_rate": 0.0002919012942384257, "loss": 0.5209, "step": 15330 }, { "epoch": 0.3892576563313069, "grad_norm": 0.3671875, "learning_rate": 0.0002918905203506711, "loss": 0.4818, "step": 15335 }, { "epoch": 0.38938457438032265, "grad_norm": 0.3203125, "learning_rate": 0.0002918797395004052, "loss": 0.4741, "step": 15340 }, { "epoch": 0.38951149242933836, "grad_norm": 0.353515625, "learning_rate": 0.000291868951688157, "loss": 0.4809, "step": 15345 }, { "epoch": 0.3896384104783541, "grad_norm": 0.357421875, "learning_rate": 0.0002918581569144559, "loss": 0.4865, "step": 15350 }, { "epoch": 0.3897653285273699, "grad_norm": 0.353515625, "learning_rate": 0.0002918473551798315, "loss": 0.4917, "step": 15355 }, { "epoch": 0.3898922465763856, "grad_norm": 0.345703125, "learning_rate": 0.00029183654648481385, "loss": 0.5214, "step": 15360 }, { "epoch": 0.39001916462540137, "grad_norm": 0.328125, "learning_rate": 0.0002918257308299334, "loss": 0.4511, "step": 15365 }, { "epoch": 0.39014608267441714, "grad_norm": 0.373046875, "learning_rate": 0.00029181490821572083, "loss": 0.4805, "step": 15370 }, { "epoch": 0.3902730007234329, "grad_norm": 0.35546875, "learning_rate": 0.0002918040786427072, "loss": 0.4878, "step": 15375 }, { "epoch": 0.3903999187724486, "grad_norm": 0.359375, "learning_rate": 0.00029179324211142394, "loss": 0.4958, "step": 15380 }, { "epoch": 0.3905268368214644, "grad_norm": 0.3671875, "learning_rate": 0.00029178239862240275, "loss": 0.5324, "step": 15385 }, { "epoch": 0.39065375487048015, "grad_norm": 0.33203125, "learning_rate": 0.00029177154817617575, "loss": 0.4928, "step": 15390 }, { "epoch": 0.39078067291949586, "grad_norm": 0.3515625, "learning_rate": 0.00029176069077327534, "loss": 0.4895, "step": 15395 }, { "epoch": 0.39090759096851163, "grad_norm": 0.373046875, "learning_rate": 0.0002917498264142343, "loss": 0.5065, "step": 15400 }, { "epoch": 0.3910345090175274, "grad_norm": 0.365234375, "learning_rate": 0.0002917389550995858, "loss": 0.4972, "step": 15405 }, { "epoch": 0.3911614270665431, "grad_norm": 0.345703125, "learning_rate": 0.00029172807682986316, "loss": 0.4907, "step": 15410 }, { "epoch": 0.3912883451155589, "grad_norm": 0.333984375, "learning_rate": 0.00029171719160560024, "loss": 0.4547, "step": 15415 }, { "epoch": 0.39141526316457464, "grad_norm": 0.33984375, "learning_rate": 0.00029170629942733127, "loss": 0.4524, "step": 15420 }, { "epoch": 0.3915421812135904, "grad_norm": 0.34765625, "learning_rate": 0.00029169540029559053, "loss": 0.4931, "step": 15425 }, { "epoch": 0.3916690992626061, "grad_norm": 0.3203125, "learning_rate": 0.000291684494210913, "loss": 0.4643, "step": 15430 }, { "epoch": 0.3917960173116219, "grad_norm": 0.328125, "learning_rate": 0.0002916735811738338, "loss": 0.484, "step": 15435 }, { "epoch": 0.39192293536063766, "grad_norm": 0.3046875, "learning_rate": 0.00029166266118488835, "loss": 0.4505, "step": 15440 }, { "epoch": 0.39204985340965337, "grad_norm": 0.357421875, "learning_rate": 0.00029165173424461257, "loss": 0.4901, "step": 15445 }, { "epoch": 0.39217677145866914, "grad_norm": 0.337890625, "learning_rate": 0.00029164080035354263, "loss": 0.4785, "step": 15450 }, { "epoch": 0.3923036895076849, "grad_norm": 0.353515625, "learning_rate": 0.000291629859512215, "loss": 0.4753, "step": 15455 }, { "epoch": 0.3924306075567006, "grad_norm": 0.365234375, "learning_rate": 0.0002916189117211666, "loss": 0.4999, "step": 15460 }, { "epoch": 0.3925575256057164, "grad_norm": 0.3359375, "learning_rate": 0.0002916079569809346, "loss": 0.4836, "step": 15465 }, { "epoch": 0.39268444365473215, "grad_norm": 0.359375, "learning_rate": 0.0002915969952920566, "loss": 0.4789, "step": 15470 }, { "epoch": 0.39281136170374786, "grad_norm": 0.35546875, "learning_rate": 0.0002915860266550704, "loss": 0.5178, "step": 15475 }, { "epoch": 0.39293827975276363, "grad_norm": 0.345703125, "learning_rate": 0.00029157505107051427, "loss": 0.4975, "step": 15480 }, { "epoch": 0.3930651978017794, "grad_norm": 0.33984375, "learning_rate": 0.0002915640685389268, "loss": 0.46, "step": 15485 }, { "epoch": 0.39319211585079517, "grad_norm": 0.375, "learning_rate": 0.0002915530790608469, "loss": 0.5114, "step": 15490 }, { "epoch": 0.3933190338998109, "grad_norm": 0.380859375, "learning_rate": 0.0002915420826368138, "loss": 0.4913, "step": 15495 }, { "epoch": 0.39344595194882664, "grad_norm": 0.3515625, "learning_rate": 0.00029153107926736707, "loss": 0.4727, "step": 15500 }, { "epoch": 0.3935728699978424, "grad_norm": 0.33984375, "learning_rate": 0.0002915200689530466, "loss": 0.4919, "step": 15505 }, { "epoch": 0.3936997880468581, "grad_norm": 0.357421875, "learning_rate": 0.0002915090516943928, "loss": 0.4929, "step": 15510 }, { "epoch": 0.3938267060958739, "grad_norm": 0.345703125, "learning_rate": 0.00029149802749194617, "loss": 0.5026, "step": 15515 }, { "epoch": 0.39395362414488966, "grad_norm": 0.34375, "learning_rate": 0.0002914869963462477, "loss": 0.4614, "step": 15520 }, { "epoch": 0.39408054219390537, "grad_norm": 0.33984375, "learning_rate": 0.00029147595825783866, "loss": 0.4519, "step": 15525 }, { "epoch": 0.39420746024292114, "grad_norm": 0.3515625, "learning_rate": 0.0002914649132272607, "loss": 0.4728, "step": 15530 }, { "epoch": 0.3943343782919369, "grad_norm": 0.322265625, "learning_rate": 0.00029145386125505583, "loss": 0.4711, "step": 15535 }, { "epoch": 0.39446129634095267, "grad_norm": 0.3515625, "learning_rate": 0.00029144280234176637, "loss": 0.495, "step": 15540 }, { "epoch": 0.3945882143899684, "grad_norm": 0.3515625, "learning_rate": 0.00029143173648793486, "loss": 0.5137, "step": 15545 }, { "epoch": 0.39471513243898415, "grad_norm": 0.33203125, "learning_rate": 0.0002914206636941044, "loss": 0.4869, "step": 15550 }, { "epoch": 0.3948420504879999, "grad_norm": 0.333984375, "learning_rate": 0.00029140958396081836, "loss": 0.4455, "step": 15555 }, { "epoch": 0.39496896853701563, "grad_norm": 0.357421875, "learning_rate": 0.00029139849728862033, "loss": 0.505, "step": 15560 }, { "epoch": 0.3950958865860314, "grad_norm": 0.359375, "learning_rate": 0.00029138740367805436, "loss": 0.5049, "step": 15565 }, { "epoch": 0.39522280463504716, "grad_norm": 0.341796875, "learning_rate": 0.0002913763031296648, "loss": 0.471, "step": 15570 }, { "epoch": 0.3953497226840629, "grad_norm": 0.326171875, "learning_rate": 0.00029136519564399636, "loss": 0.5096, "step": 15575 }, { "epoch": 0.39547664073307864, "grad_norm": 0.341796875, "learning_rate": 0.0002913540812215941, "loss": 0.4921, "step": 15580 }, { "epoch": 0.3956035587820944, "grad_norm": 0.337890625, "learning_rate": 0.00029134295986300336, "loss": 0.5031, "step": 15585 }, { "epoch": 0.3957304768311102, "grad_norm": 0.359375, "learning_rate": 0.00029133183156876993, "loss": 0.5116, "step": 15590 }, { "epoch": 0.3958573948801259, "grad_norm": 0.341796875, "learning_rate": 0.0002913206963394398, "loss": 0.4795, "step": 15595 }, { "epoch": 0.39598431292914166, "grad_norm": 0.361328125, "learning_rate": 0.00029130955417555935, "loss": 0.4815, "step": 15600 }, { "epoch": 0.3961112309781574, "grad_norm": 0.353515625, "learning_rate": 0.0002912984050776754, "loss": 0.5034, "step": 15605 }, { "epoch": 0.39623814902717314, "grad_norm": 0.328125, "learning_rate": 0.000291287249046335, "loss": 0.4766, "step": 15610 }, { "epoch": 0.3963650670761889, "grad_norm": 0.40625, "learning_rate": 0.00029127608608208556, "loss": 0.4903, "step": 15615 }, { "epoch": 0.39649198512520467, "grad_norm": 0.37109375, "learning_rate": 0.0002912649161854748, "loss": 0.4679, "step": 15620 }, { "epoch": 0.3966189031742204, "grad_norm": 0.34765625, "learning_rate": 0.000291253739357051, "loss": 0.5015, "step": 15625 }, { "epoch": 0.39674582122323615, "grad_norm": 0.34765625, "learning_rate": 0.00029124255559736243, "loss": 0.4935, "step": 15630 }, { "epoch": 0.3968727392722519, "grad_norm": 0.35546875, "learning_rate": 0.00029123136490695787, "loss": 0.4886, "step": 15635 }, { "epoch": 0.3969996573212677, "grad_norm": 0.392578125, "learning_rate": 0.00029122016728638653, "loss": 0.5161, "step": 15640 }, { "epoch": 0.3971265753702834, "grad_norm": 0.37109375, "learning_rate": 0.00029120896273619783, "loss": 0.5147, "step": 15645 }, { "epoch": 0.39725349341929916, "grad_norm": 0.36328125, "learning_rate": 0.00029119775125694157, "loss": 0.4941, "step": 15650 }, { "epoch": 0.39738041146831493, "grad_norm": 0.34765625, "learning_rate": 0.0002911865328491679, "loss": 0.4703, "step": 15655 }, { "epoch": 0.39750732951733064, "grad_norm": 0.33203125, "learning_rate": 0.00029117530751342734, "loss": 0.445, "step": 15660 }, { "epoch": 0.3976342475663464, "grad_norm": 0.3515625, "learning_rate": 0.00029116407525027065, "loss": 0.5152, "step": 15665 }, { "epoch": 0.3977611656153622, "grad_norm": 0.357421875, "learning_rate": 0.000291152836060249, "loss": 0.5115, "step": 15670 }, { "epoch": 0.3978880836643779, "grad_norm": 0.39453125, "learning_rate": 0.00029114158994391395, "loss": 0.5255, "step": 15675 }, { "epoch": 0.39801500171339366, "grad_norm": 0.34765625, "learning_rate": 0.0002911303369018173, "loss": 0.4916, "step": 15680 }, { "epoch": 0.3981419197624094, "grad_norm": 0.328125, "learning_rate": 0.0002911190769345112, "loss": 0.4829, "step": 15685 }, { "epoch": 0.39826883781142514, "grad_norm": 0.361328125, "learning_rate": 0.0002911078100425482, "loss": 0.5097, "step": 15690 }, { "epoch": 0.3983957558604409, "grad_norm": 0.349609375, "learning_rate": 0.0002910965362264812, "loss": 0.4166, "step": 15695 }, { "epoch": 0.39852267390945667, "grad_norm": 0.330078125, "learning_rate": 0.0002910852554868634, "loss": 0.4617, "step": 15700 }, { "epoch": 0.39864959195847244, "grad_norm": 0.34765625, "learning_rate": 0.00029107396782424824, "loss": 0.5118, "step": 15705 }, { "epoch": 0.39877651000748815, "grad_norm": 0.32421875, "learning_rate": 0.0002910626732391897, "loss": 0.4631, "step": 15710 }, { "epoch": 0.3989034280565039, "grad_norm": 0.365234375, "learning_rate": 0.000291051371732242, "loss": 0.5033, "step": 15715 }, { "epoch": 0.3990303461055197, "grad_norm": 0.337890625, "learning_rate": 0.0002910400633039597, "loss": 0.4881, "step": 15720 }, { "epoch": 0.3991572641545354, "grad_norm": 0.35546875, "learning_rate": 0.00029102874795489765, "loss": 0.5108, "step": 15725 }, { "epoch": 0.39928418220355116, "grad_norm": 0.345703125, "learning_rate": 0.0002910174256856111, "loss": 0.4857, "step": 15730 }, { "epoch": 0.39941110025256693, "grad_norm": 0.341796875, "learning_rate": 0.0002910060964966556, "loss": 0.4963, "step": 15735 }, { "epoch": 0.39953801830158264, "grad_norm": 0.330078125, "learning_rate": 0.0002909947603885872, "loss": 0.4733, "step": 15740 }, { "epoch": 0.3996649363505984, "grad_norm": 0.34375, "learning_rate": 0.000290983417361962, "loss": 0.4965, "step": 15745 }, { "epoch": 0.3997918543996142, "grad_norm": 0.35546875, "learning_rate": 0.0002909720674173367, "loss": 0.4972, "step": 15750 }, { "epoch": 0.39991877244862994, "grad_norm": 0.357421875, "learning_rate": 0.00029096071055526826, "loss": 0.4813, "step": 15755 }, { "epoch": 0.40004569049764566, "grad_norm": 0.33984375, "learning_rate": 0.0002909493467763138, "loss": 0.4731, "step": 15760 }, { "epoch": 0.4001726085466614, "grad_norm": 0.3515625, "learning_rate": 0.00029093797608103117, "loss": 0.5003, "step": 15765 }, { "epoch": 0.4002995265956772, "grad_norm": 0.3359375, "learning_rate": 0.0002909265984699781, "loss": 0.4636, "step": 15770 }, { "epoch": 0.4004264446446929, "grad_norm": 0.3515625, "learning_rate": 0.000290915213943713, "loss": 0.4907, "step": 15775 }, { "epoch": 0.40055336269370867, "grad_norm": 0.3515625, "learning_rate": 0.00029090382250279446, "loss": 0.486, "step": 15780 }, { "epoch": 0.40068028074272444, "grad_norm": 0.3203125, "learning_rate": 0.0002908924241477815, "loss": 0.4807, "step": 15785 }, { "epoch": 0.40080719879174015, "grad_norm": 0.326171875, "learning_rate": 0.0002908810188792334, "loss": 0.4652, "step": 15790 }, { "epoch": 0.4009341168407559, "grad_norm": 0.34765625, "learning_rate": 0.00029086960669770983, "loss": 0.4649, "step": 15795 }, { "epoch": 0.4010610348897717, "grad_norm": 0.337890625, "learning_rate": 0.0002908581876037708, "loss": 0.4999, "step": 15800 }, { "epoch": 0.40118795293878745, "grad_norm": 0.357421875, "learning_rate": 0.0002908467615979766, "loss": 0.4867, "step": 15805 }, { "epoch": 0.40131487098780316, "grad_norm": 0.33984375, "learning_rate": 0.00029083532868088793, "loss": 0.4847, "step": 15810 }, { "epoch": 0.40144178903681893, "grad_norm": 0.33203125, "learning_rate": 0.00029082388885306575, "loss": 0.4689, "step": 15815 }, { "epoch": 0.4015687070858347, "grad_norm": 0.314453125, "learning_rate": 0.00029081244211507143, "loss": 0.4575, "step": 15820 }, { "epoch": 0.4016956251348504, "grad_norm": 0.3359375, "learning_rate": 0.00029080098846746665, "loss": 0.4939, "step": 15825 }, { "epoch": 0.4018225431838662, "grad_norm": 0.310546875, "learning_rate": 0.0002907895279108135, "loss": 0.4464, "step": 15830 }, { "epoch": 0.40194946123288194, "grad_norm": 0.3515625, "learning_rate": 0.0002907780604456743, "loss": 0.5042, "step": 15835 }, { "epoch": 0.40207637928189766, "grad_norm": 0.333984375, "learning_rate": 0.00029076658607261175, "loss": 0.4753, "step": 15840 }, { "epoch": 0.4022032973309134, "grad_norm": 0.365234375, "learning_rate": 0.0002907551047921889, "loss": 0.4775, "step": 15845 }, { "epoch": 0.4023302153799292, "grad_norm": 0.345703125, "learning_rate": 0.0002907436166049691, "loss": 0.4687, "step": 15850 }, { "epoch": 0.40245713342894496, "grad_norm": 0.33984375, "learning_rate": 0.0002907321215115161, "loss": 0.5007, "step": 15855 }, { "epoch": 0.40258405147796067, "grad_norm": 0.361328125, "learning_rate": 0.000290720619512394, "loss": 0.5184, "step": 15860 }, { "epoch": 0.40271096952697644, "grad_norm": 0.36328125, "learning_rate": 0.0002907091106081671, "loss": 0.5148, "step": 15865 }, { "epoch": 0.4028378875759922, "grad_norm": 0.310546875, "learning_rate": 0.0002906975947994002, "loss": 0.4673, "step": 15870 }, { "epoch": 0.4029648056250079, "grad_norm": 0.380859375, "learning_rate": 0.0002906860720866583, "loss": 0.4974, "step": 15875 }, { "epoch": 0.4030917236740237, "grad_norm": 0.419921875, "learning_rate": 0.00029067454247050703, "loss": 0.5014, "step": 15880 }, { "epoch": 0.40321864172303945, "grad_norm": 0.341796875, "learning_rate": 0.0002906630059515119, "loss": 0.5145, "step": 15885 }, { "epoch": 0.40334555977205516, "grad_norm": 0.3671875, "learning_rate": 0.0002906514625302391, "loss": 0.5129, "step": 15890 }, { "epoch": 0.40347247782107093, "grad_norm": 0.3671875, "learning_rate": 0.00029063991220725513, "loss": 0.4909, "step": 15895 }, { "epoch": 0.4035993958700867, "grad_norm": 0.345703125, "learning_rate": 0.00029062835498312663, "loss": 0.4923, "step": 15900 }, { "epoch": 0.4037263139191024, "grad_norm": 0.333984375, "learning_rate": 0.0002906167908584208, "loss": 0.4971, "step": 15905 }, { "epoch": 0.4038532319681182, "grad_norm": 0.3359375, "learning_rate": 0.00029060521983370505, "loss": 0.4916, "step": 15910 }, { "epoch": 0.40398015001713394, "grad_norm": 0.3203125, "learning_rate": 0.0002905936419095471, "loss": 0.4686, "step": 15915 }, { "epoch": 0.4041070680661497, "grad_norm": 0.322265625, "learning_rate": 0.00029058205708651526, "loss": 0.4696, "step": 15920 }, { "epoch": 0.4042339861151654, "grad_norm": 0.353515625, "learning_rate": 0.00029057046536517785, "loss": 0.4934, "step": 15925 }, { "epoch": 0.4043609041641812, "grad_norm": 0.337890625, "learning_rate": 0.0002905588667461037, "loss": 0.4668, "step": 15930 }, { "epoch": 0.40448782221319696, "grad_norm": 0.37109375, "learning_rate": 0.00029054726122986196, "loss": 0.5056, "step": 15935 }, { "epoch": 0.40461474026221267, "grad_norm": 0.333984375, "learning_rate": 0.00029053564881702205, "loss": 0.5055, "step": 15940 }, { "epoch": 0.40474165831122844, "grad_norm": 0.337890625, "learning_rate": 0.00029052402950815385, "loss": 0.4954, "step": 15945 }, { "epoch": 0.4048685763602442, "grad_norm": 0.33984375, "learning_rate": 0.0002905124033038275, "loss": 0.474, "step": 15950 }, { "epoch": 0.4049954944092599, "grad_norm": 0.37109375, "learning_rate": 0.0002905007702046134, "loss": 0.4763, "step": 15955 }, { "epoch": 0.4051224124582757, "grad_norm": 0.3515625, "learning_rate": 0.00029048913021108264, "loss": 0.5117, "step": 15960 }, { "epoch": 0.40524933050729145, "grad_norm": 0.408203125, "learning_rate": 0.00029047748332380614, "loss": 0.4702, "step": 15965 }, { "epoch": 0.4053762485563072, "grad_norm": 0.33984375, "learning_rate": 0.00029046582954335545, "loss": 0.4703, "step": 15970 }, { "epoch": 0.40550316660532293, "grad_norm": 0.33984375, "learning_rate": 0.0002904541688703025, "loss": 0.4722, "step": 15975 }, { "epoch": 0.4056300846543387, "grad_norm": 0.326171875, "learning_rate": 0.00029044250130521947, "loss": 0.4933, "step": 15980 }, { "epoch": 0.40575700270335446, "grad_norm": 0.34765625, "learning_rate": 0.0002904308268486788, "loss": 0.4919, "step": 15985 }, { "epoch": 0.4058839207523702, "grad_norm": 0.375, "learning_rate": 0.00029041914550125345, "loss": 0.4591, "step": 15990 }, { "epoch": 0.40601083880138594, "grad_norm": 0.33984375, "learning_rate": 0.0002904074572635165, "loss": 0.4944, "step": 15995 }, { "epoch": 0.4061377568504017, "grad_norm": 0.333984375, "learning_rate": 0.0002903957621360416, "loss": 0.469, "step": 16000 }, { "epoch": 0.4062646748994174, "grad_norm": 0.35546875, "learning_rate": 0.00029038406011940254, "loss": 0.4572, "step": 16005 }, { "epoch": 0.4063915929484332, "grad_norm": 0.3359375, "learning_rate": 0.0002903723512141736, "loss": 0.4942, "step": 16010 }, { "epoch": 0.40651851099744896, "grad_norm": 0.357421875, "learning_rate": 0.00029036063542092923, "loss": 0.5168, "step": 16015 }, { "epoch": 0.4066454290464647, "grad_norm": 0.345703125, "learning_rate": 0.0002903489127402445, "loss": 0.484, "step": 16020 }, { "epoch": 0.40677234709548044, "grad_norm": 0.359375, "learning_rate": 0.00029033718317269446, "loss": 0.489, "step": 16025 }, { "epoch": 0.4068992651444962, "grad_norm": 0.34765625, "learning_rate": 0.00029032544671885475, "loss": 0.4726, "step": 16030 }, { "epoch": 0.40702618319351197, "grad_norm": 0.330078125, "learning_rate": 0.00029031370337930125, "loss": 0.4115, "step": 16035 }, { "epoch": 0.4071531012425277, "grad_norm": 0.34375, "learning_rate": 0.0002903019531546103, "loss": 0.4805, "step": 16040 }, { "epoch": 0.40728001929154345, "grad_norm": 0.359375, "learning_rate": 0.00029029019604535835, "loss": 0.484, "step": 16045 }, { "epoch": 0.4074069373405592, "grad_norm": 0.3515625, "learning_rate": 0.0002902784320521223, "loss": 0.4861, "step": 16050 }, { "epoch": 0.40753385538957493, "grad_norm": 0.330078125, "learning_rate": 0.0002902666611754795, "loss": 0.4851, "step": 16055 }, { "epoch": 0.4076607734385907, "grad_norm": 0.349609375, "learning_rate": 0.00029025488341600755, "loss": 0.4842, "step": 16060 }, { "epoch": 0.40778769148760646, "grad_norm": 0.361328125, "learning_rate": 0.00029024309877428436, "loss": 0.458, "step": 16065 }, { "epoch": 0.40791460953662223, "grad_norm": 0.375, "learning_rate": 0.00029023130725088815, "loss": 0.4895, "step": 16070 }, { "epoch": 0.40804152758563794, "grad_norm": 0.365234375, "learning_rate": 0.0002902195088463975, "loss": 0.5034, "step": 16075 }, { "epoch": 0.4081684456346537, "grad_norm": 0.345703125, "learning_rate": 0.00029020770356139145, "loss": 0.492, "step": 16080 }, { "epoch": 0.4082953636836695, "grad_norm": 0.35546875, "learning_rate": 0.00029019589139644926, "loss": 0.4733, "step": 16085 }, { "epoch": 0.4084222817326852, "grad_norm": 0.3671875, "learning_rate": 0.0002901840723521505, "loss": 0.4909, "step": 16090 }, { "epoch": 0.40854919978170096, "grad_norm": 0.373046875, "learning_rate": 0.00029017224642907517, "loss": 0.5219, "step": 16095 }, { "epoch": 0.4086761178307167, "grad_norm": 0.314453125, "learning_rate": 0.0002901604136278035, "loss": 0.4828, "step": 16100 }, { "epoch": 0.40880303587973243, "grad_norm": 0.37109375, "learning_rate": 0.00029014857394891617, "loss": 0.4989, "step": 16105 }, { "epoch": 0.4089299539287482, "grad_norm": 0.33984375, "learning_rate": 0.0002901367273929942, "loss": 0.4979, "step": 16110 }, { "epoch": 0.40905687197776397, "grad_norm": 0.326171875, "learning_rate": 0.0002901248739606188, "loss": 0.4789, "step": 16115 }, { "epoch": 0.4091837900267797, "grad_norm": 0.337890625, "learning_rate": 0.00029011301365237165, "loss": 0.481, "step": 16120 }, { "epoch": 0.40931070807579545, "grad_norm": 0.349609375, "learning_rate": 0.00029010114646883475, "loss": 0.4732, "step": 16125 }, { "epoch": 0.4094376261248112, "grad_norm": 0.359375, "learning_rate": 0.00029008927241059034, "loss": 0.488, "step": 16130 }, { "epoch": 0.409564544173827, "grad_norm": 1.546875, "learning_rate": 0.0002900773914782212, "loss": 0.4951, "step": 16135 }, { "epoch": 0.4096914622228427, "grad_norm": 0.353515625, "learning_rate": 0.00029006550367231023, "loss": 0.5161, "step": 16140 }, { "epoch": 0.40981838027185846, "grad_norm": 0.37890625, "learning_rate": 0.0002900536089934408, "loss": 0.485, "step": 16145 }, { "epoch": 0.40994529832087423, "grad_norm": 0.373046875, "learning_rate": 0.00029004170744219656, "loss": 0.5124, "step": 16150 }, { "epoch": 0.41007221636988994, "grad_norm": 0.357421875, "learning_rate": 0.00029002979901916157, "loss": 0.4757, "step": 16155 }, { "epoch": 0.4101991344189057, "grad_norm": 0.34375, "learning_rate": 0.00029001788372492006, "loss": 0.434, "step": 16160 }, { "epoch": 0.4103260524679215, "grad_norm": 0.34375, "learning_rate": 0.0002900059615600568, "loss": 0.4698, "step": 16165 }, { "epoch": 0.4104529705169372, "grad_norm": 0.380859375, "learning_rate": 0.00028999403252515674, "loss": 0.4964, "step": 16170 }, { "epoch": 0.41057988856595296, "grad_norm": 0.318359375, "learning_rate": 0.0002899820966208053, "loss": 0.4646, "step": 16175 }, { "epoch": 0.4107068066149687, "grad_norm": 0.345703125, "learning_rate": 0.00028997015384758817, "loss": 0.4597, "step": 16180 }, { "epoch": 0.4108337246639845, "grad_norm": 0.349609375, "learning_rate": 0.0002899582042060913, "loss": 0.4786, "step": 16185 }, { "epoch": 0.4109606427130002, "grad_norm": 0.392578125, "learning_rate": 0.0002899462476969011, "loss": 0.4635, "step": 16190 }, { "epoch": 0.41108756076201597, "grad_norm": 0.3515625, "learning_rate": 0.0002899342843206043, "loss": 0.4788, "step": 16195 }, { "epoch": 0.41121447881103174, "grad_norm": 0.36328125, "learning_rate": 0.0002899223140777879, "loss": 0.4926, "step": 16200 }, { "epoch": 0.41134139686004745, "grad_norm": 0.33984375, "learning_rate": 0.00028991033696903925, "loss": 0.494, "step": 16205 }, { "epoch": 0.4114683149090632, "grad_norm": 0.373046875, "learning_rate": 0.0002898983529949462, "loss": 0.4711, "step": 16210 }, { "epoch": 0.411595232958079, "grad_norm": 0.34375, "learning_rate": 0.00028988636215609656, "loss": 0.4594, "step": 16215 }, { "epoch": 0.4117221510070947, "grad_norm": 0.361328125, "learning_rate": 0.0002898743644530789, "loss": 0.4937, "step": 16220 }, { "epoch": 0.41184906905611046, "grad_norm": 0.3515625, "learning_rate": 0.00028986235988648194, "loss": 0.4679, "step": 16225 }, { "epoch": 0.41197598710512623, "grad_norm": 0.328125, "learning_rate": 0.00028985034845689466, "loss": 0.4575, "step": 16230 }, { "epoch": 0.412102905154142, "grad_norm": 0.3515625, "learning_rate": 0.0002898383301649065, "loss": 0.4827, "step": 16235 }, { "epoch": 0.4122298232031577, "grad_norm": 0.359375, "learning_rate": 0.0002898263050111071, "loss": 0.4912, "step": 16240 }, { "epoch": 0.4123567412521735, "grad_norm": 0.361328125, "learning_rate": 0.00028981427299608673, "loss": 0.4906, "step": 16245 }, { "epoch": 0.41248365930118924, "grad_norm": 0.361328125, "learning_rate": 0.00028980223412043563, "loss": 0.4935, "step": 16250 }, { "epoch": 0.41261057735020495, "grad_norm": 0.35546875, "learning_rate": 0.00028979018838474455, "loss": 0.4582, "step": 16255 }, { "epoch": 0.4127374953992207, "grad_norm": 0.3046875, "learning_rate": 0.00028977813578960463, "loss": 0.4693, "step": 16260 }, { "epoch": 0.4128644134482365, "grad_norm": 0.34375, "learning_rate": 0.0002897660763356073, "loss": 0.497, "step": 16265 }, { "epoch": 0.4129913314972522, "grad_norm": 0.3359375, "learning_rate": 0.0002897540100233443, "loss": 0.4823, "step": 16270 }, { "epoch": 0.41311824954626797, "grad_norm": 0.35546875, "learning_rate": 0.00028974193685340767, "loss": 0.5149, "step": 16275 }, { "epoch": 0.41324516759528374, "grad_norm": 0.341796875, "learning_rate": 0.00028972985682638986, "loss": 0.4602, "step": 16280 }, { "epoch": 0.4133720856442995, "grad_norm": 0.359375, "learning_rate": 0.0002897177699428836, "loss": 0.4834, "step": 16285 }, { "epoch": 0.4134990036933152, "grad_norm": 0.380859375, "learning_rate": 0.00028970567620348206, "loss": 0.4725, "step": 16290 }, { "epoch": 0.413625921742331, "grad_norm": 0.375, "learning_rate": 0.0002896935756087786, "loss": 0.4938, "step": 16295 }, { "epoch": 0.41375283979134675, "grad_norm": 0.333984375, "learning_rate": 0.00028968146815936714, "loss": 0.448, "step": 16300 }, { "epoch": 0.41387975784036246, "grad_norm": 0.3515625, "learning_rate": 0.0002896693538558416, "loss": 0.4798, "step": 16305 }, { "epoch": 0.41400667588937823, "grad_norm": 0.353515625, "learning_rate": 0.0002896572326987965, "loss": 0.4685, "step": 16310 }, { "epoch": 0.414133593938394, "grad_norm": 0.34375, "learning_rate": 0.0002896451046888266, "loss": 0.4724, "step": 16315 }, { "epoch": 0.4142605119874097, "grad_norm": 0.349609375, "learning_rate": 0.0002896329698265271, "loss": 0.4636, "step": 16320 }, { "epoch": 0.4143874300364255, "grad_norm": 0.341796875, "learning_rate": 0.0002896208281124934, "loss": 0.4911, "step": 16325 }, { "epoch": 0.41451434808544124, "grad_norm": 0.359375, "learning_rate": 0.0002896086795473212, "loss": 0.4669, "step": 16330 }, { "epoch": 0.41464126613445695, "grad_norm": 0.365234375, "learning_rate": 0.00028959652413160675, "loss": 0.4741, "step": 16335 }, { "epoch": 0.4147681841834727, "grad_norm": 0.34375, "learning_rate": 0.0002895843618659465, "loss": 0.4824, "step": 16340 }, { "epoch": 0.4148951022324885, "grad_norm": 0.359375, "learning_rate": 0.00028957219275093716, "loss": 0.4947, "step": 16345 }, { "epoch": 0.41502202028150426, "grad_norm": 0.349609375, "learning_rate": 0.00028956001678717594, "loss": 0.4872, "step": 16350 }, { "epoch": 0.41514893833051997, "grad_norm": 0.357421875, "learning_rate": 0.0002895478339752603, "loss": 0.4888, "step": 16355 }, { "epoch": 0.41527585637953573, "grad_norm": 0.33203125, "learning_rate": 0.000289535644315788, "loss": 0.4548, "step": 16360 }, { "epoch": 0.4154027744285515, "grad_norm": 0.357421875, "learning_rate": 0.0002895234478093573, "loss": 0.4881, "step": 16365 }, { "epoch": 0.4155296924775672, "grad_norm": 0.33984375, "learning_rate": 0.0002895112444565665, "loss": 0.5096, "step": 16370 }, { "epoch": 0.415656610526583, "grad_norm": 0.341796875, "learning_rate": 0.00028949903425801453, "loss": 0.4723, "step": 16375 }, { "epoch": 0.41578352857559875, "grad_norm": 0.345703125, "learning_rate": 0.00028948681721430056, "loss": 0.5271, "step": 16380 }, { "epoch": 0.41591044662461446, "grad_norm": 0.30859375, "learning_rate": 0.00028947459332602396, "loss": 0.4422, "step": 16385 }, { "epoch": 0.41603736467363023, "grad_norm": 0.37109375, "learning_rate": 0.0002894623625937847, "loss": 0.5119, "step": 16390 }, { "epoch": 0.416164282722646, "grad_norm": 0.36328125, "learning_rate": 0.00028945012501818283, "loss": 0.5011, "step": 16395 }, { "epoch": 0.41629120077166176, "grad_norm": 0.345703125, "learning_rate": 0.00028943788059981884, "loss": 0.4684, "step": 16400 }, { "epoch": 0.4164181188206775, "grad_norm": 0.349609375, "learning_rate": 0.0002894256293392937, "loss": 0.4842, "step": 16405 }, { "epoch": 0.41654503686969324, "grad_norm": 0.35546875, "learning_rate": 0.0002894133712372083, "loss": 0.4653, "step": 16410 }, { "epoch": 0.416671954918709, "grad_norm": 0.306640625, "learning_rate": 0.00028940110629416445, "loss": 0.4409, "step": 16415 }, { "epoch": 0.4167988729677247, "grad_norm": 0.33984375, "learning_rate": 0.00028938883451076373, "loss": 0.4725, "step": 16420 }, { "epoch": 0.4169257910167405, "grad_norm": 0.375, "learning_rate": 0.00028937655588760856, "loss": 0.453, "step": 16425 }, { "epoch": 0.41705270906575626, "grad_norm": 0.333984375, "learning_rate": 0.0002893642704253012, "loss": 0.4873, "step": 16430 }, { "epoch": 0.41717962711477197, "grad_norm": 0.349609375, "learning_rate": 0.00028935197812444464, "loss": 0.4863, "step": 16435 }, { "epoch": 0.41730654516378773, "grad_norm": 0.337890625, "learning_rate": 0.0002893396789856421, "loss": 0.4816, "step": 16440 }, { "epoch": 0.4174334632128035, "grad_norm": 0.33203125, "learning_rate": 0.0002893273730094969, "loss": 0.4831, "step": 16445 }, { "epoch": 0.41756038126181927, "grad_norm": 0.328125, "learning_rate": 0.000289315060196613, "loss": 0.4671, "step": 16450 }, { "epoch": 0.417687299310835, "grad_norm": 0.36328125, "learning_rate": 0.00028930274054759467, "loss": 0.493, "step": 16455 }, { "epoch": 0.41781421735985075, "grad_norm": 0.349609375, "learning_rate": 0.0002892904140630463, "loss": 0.4916, "step": 16460 }, { "epoch": 0.4179411354088665, "grad_norm": 0.341796875, "learning_rate": 0.0002892780807435728, "loss": 0.511, "step": 16465 }, { "epoch": 0.4180680534578822, "grad_norm": 0.375, "learning_rate": 0.0002892657405897794, "loss": 0.5122, "step": 16470 }, { "epoch": 0.418194971506898, "grad_norm": 0.330078125, "learning_rate": 0.00028925339360227155, "loss": 0.4894, "step": 16475 }, { "epoch": 0.41832188955591376, "grad_norm": 0.353515625, "learning_rate": 0.0002892410397816552, "loss": 0.4999, "step": 16480 }, { "epoch": 0.4184488076049295, "grad_norm": 0.337890625, "learning_rate": 0.00028922867912853644, "loss": 0.477, "step": 16485 }, { "epoch": 0.41857572565394524, "grad_norm": 0.34375, "learning_rate": 0.0002892163116435219, "loss": 0.4708, "step": 16490 }, { "epoch": 0.418702643702961, "grad_norm": 0.3515625, "learning_rate": 0.0002892039373272183, "loss": 0.5014, "step": 16495 }, { "epoch": 0.4188295617519768, "grad_norm": 0.33203125, "learning_rate": 0.00028919155618023306, "loss": 0.5026, "step": 16500 }, { "epoch": 0.4189564798009925, "grad_norm": 0.349609375, "learning_rate": 0.00028917916820317353, "loss": 0.4814, "step": 16505 }, { "epoch": 0.41908339785000825, "grad_norm": 0.353515625, "learning_rate": 0.0002891667733966477, "loss": 0.4731, "step": 16510 }, { "epoch": 0.419210315899024, "grad_norm": 0.34765625, "learning_rate": 0.0002891543717612637, "loss": 0.5033, "step": 16515 }, { "epoch": 0.41933723394803973, "grad_norm": 0.3671875, "learning_rate": 0.00028914196329763017, "loss": 0.4962, "step": 16520 }, { "epoch": 0.4194641519970555, "grad_norm": 0.3515625, "learning_rate": 0.00028912954800635585, "loss": 0.4443, "step": 16525 }, { "epoch": 0.41959107004607127, "grad_norm": 0.3203125, "learning_rate": 0.00028911712588805007, "loss": 0.4696, "step": 16530 }, { "epoch": 0.419717988095087, "grad_norm": 0.34375, "learning_rate": 0.0002891046969433224, "loss": 0.4746, "step": 16535 }, { "epoch": 0.41984490614410275, "grad_norm": 0.369140625, "learning_rate": 0.0002890922611727826, "loss": 0.5207, "step": 16540 }, { "epoch": 0.4199718241931185, "grad_norm": 0.361328125, "learning_rate": 0.0002890798185770409, "loss": 0.4664, "step": 16545 }, { "epoch": 0.4200987422421342, "grad_norm": 0.33984375, "learning_rate": 0.00028906736915670797, "loss": 0.5013, "step": 16550 }, { "epoch": 0.42022566029115, "grad_norm": 0.357421875, "learning_rate": 0.0002890549129123946, "loss": 0.4906, "step": 16555 }, { "epoch": 0.42035257834016576, "grad_norm": 0.3359375, "learning_rate": 0.0002890424498447121, "loss": 0.4987, "step": 16560 }, { "epoch": 0.42047949638918153, "grad_norm": 0.337890625, "learning_rate": 0.0002890299799542719, "loss": 0.4824, "step": 16565 }, { "epoch": 0.42060641443819724, "grad_norm": 0.345703125, "learning_rate": 0.000289017503241686, "loss": 0.4579, "step": 16570 }, { "epoch": 0.420733332487213, "grad_norm": 0.3359375, "learning_rate": 0.0002890050197075666, "loss": 0.484, "step": 16575 }, { "epoch": 0.4208602505362288, "grad_norm": 0.35546875, "learning_rate": 0.00028899252935252624, "loss": 0.471, "step": 16580 }, { "epoch": 0.4209871685852445, "grad_norm": 0.345703125, "learning_rate": 0.0002889800321771778, "loss": 0.4815, "step": 16585 }, { "epoch": 0.42111408663426025, "grad_norm": 0.357421875, "learning_rate": 0.0002889675281821346, "loss": 0.4789, "step": 16590 }, { "epoch": 0.421241004683276, "grad_norm": 0.35546875, "learning_rate": 0.0002889550173680101, "loss": 0.5083, "step": 16595 }, { "epoch": 0.42136792273229173, "grad_norm": 0.35546875, "learning_rate": 0.0002889424997354183, "loss": 0.4842, "step": 16600 }, { "epoch": 0.4214948407813075, "grad_norm": 0.3515625, "learning_rate": 0.0002889299752849734, "loss": 0.477, "step": 16605 }, { "epoch": 0.42162175883032327, "grad_norm": 0.3359375, "learning_rate": 0.00028891744401728987, "loss": 0.4923, "step": 16610 }, { "epoch": 0.42174867687933904, "grad_norm": 0.330078125, "learning_rate": 0.00028890490593298275, "loss": 0.5036, "step": 16615 }, { "epoch": 0.42187559492835475, "grad_norm": 0.36328125, "learning_rate": 0.0002888923610326672, "loss": 0.4499, "step": 16620 }, { "epoch": 0.4220025129773705, "grad_norm": 0.365234375, "learning_rate": 0.00028887980931695885, "loss": 0.5096, "step": 16625 }, { "epoch": 0.4221294310263863, "grad_norm": 0.345703125, "learning_rate": 0.00028886725078647356, "loss": 0.4909, "step": 16630 }, { "epoch": 0.422256349075402, "grad_norm": 0.341796875, "learning_rate": 0.0002888546854418276, "loss": 0.4688, "step": 16635 }, { "epoch": 0.42238326712441776, "grad_norm": 0.36328125, "learning_rate": 0.00028884211328363755, "loss": 0.489, "step": 16640 }, { "epoch": 0.42251018517343353, "grad_norm": 0.359375, "learning_rate": 0.0002888295343125203, "loss": 0.4908, "step": 16645 }, { "epoch": 0.42263710322244924, "grad_norm": 0.36328125, "learning_rate": 0.00028881694852909306, "loss": 0.488, "step": 16650 }, { "epoch": 0.422764021271465, "grad_norm": 0.35546875, "learning_rate": 0.0002888043559339735, "loss": 0.4865, "step": 16655 }, { "epoch": 0.4228909393204808, "grad_norm": 0.35546875, "learning_rate": 0.0002887917565277795, "loss": 0.5887, "step": 16660 }, { "epoch": 0.42301785736949654, "grad_norm": 0.341796875, "learning_rate": 0.00028877915031112923, "loss": 0.4837, "step": 16665 }, { "epoch": 0.42314477541851225, "grad_norm": 0.357421875, "learning_rate": 0.00028876653728464134, "loss": 0.4697, "step": 16670 }, { "epoch": 0.423271693467528, "grad_norm": 0.357421875, "learning_rate": 0.00028875391744893485, "loss": 0.4885, "step": 16675 }, { "epoch": 0.4233986115165438, "grad_norm": 0.33203125, "learning_rate": 0.0002887412908046288, "loss": 0.4595, "step": 16680 }, { "epoch": 0.4235255295655595, "grad_norm": 0.349609375, "learning_rate": 0.0002887286573523429, "loss": 0.523, "step": 16685 }, { "epoch": 0.42365244761457527, "grad_norm": 0.365234375, "learning_rate": 0.00028871601709269705, "loss": 0.4992, "step": 16690 }, { "epoch": 0.42377936566359103, "grad_norm": 0.365234375, "learning_rate": 0.0002887033700263115, "loss": 0.5052, "step": 16695 }, { "epoch": 0.42390628371260675, "grad_norm": 0.33984375, "learning_rate": 0.0002886907161538068, "loss": 0.4978, "step": 16700 }, { "epoch": 0.4240332017616225, "grad_norm": 0.369140625, "learning_rate": 0.00028867805547580397, "loss": 0.4545, "step": 16705 }, { "epoch": 0.4241601198106383, "grad_norm": 0.349609375, "learning_rate": 0.00028866538799292415, "loss": 0.4546, "step": 16710 }, { "epoch": 0.42428703785965405, "grad_norm": 0.33984375, "learning_rate": 0.000288652713705789, "loss": 0.4482, "step": 16715 }, { "epoch": 0.42441395590866976, "grad_norm": 0.33984375, "learning_rate": 0.0002886400326150204, "loss": 0.5131, "step": 16720 }, { "epoch": 0.4245408739576855, "grad_norm": 0.37890625, "learning_rate": 0.0002886273447212406, "loss": 0.4941, "step": 16725 }, { "epoch": 0.4246677920067013, "grad_norm": 0.330078125, "learning_rate": 0.0002886146500250723, "loss": 0.4794, "step": 16730 }, { "epoch": 0.424794710055717, "grad_norm": 0.34765625, "learning_rate": 0.0002886019485271383, "loss": 0.4749, "step": 16735 }, { "epoch": 0.4249216281047328, "grad_norm": 0.3515625, "learning_rate": 0.00028858924022806185, "loss": 0.4917, "step": 16740 }, { "epoch": 0.42504854615374854, "grad_norm": 0.3515625, "learning_rate": 0.00028857652512846667, "loss": 0.461, "step": 16745 }, { "epoch": 0.42517546420276425, "grad_norm": 0.318359375, "learning_rate": 0.00028856380322897653, "loss": 0.4861, "step": 16750 }, { "epoch": 0.42530238225178, "grad_norm": 0.349609375, "learning_rate": 0.0002885510745302158, "loss": 0.4623, "step": 16755 }, { "epoch": 0.4254293003007958, "grad_norm": 0.34765625, "learning_rate": 0.000288538339032809, "loss": 0.4644, "step": 16760 }, { "epoch": 0.4255562183498115, "grad_norm": 0.36328125, "learning_rate": 0.0002885255967373811, "loss": 0.496, "step": 16765 }, { "epoch": 0.42568313639882727, "grad_norm": 0.36328125, "learning_rate": 0.0002885128476445574, "loss": 0.4912, "step": 16770 }, { "epoch": 0.42581005444784303, "grad_norm": 0.3671875, "learning_rate": 0.0002885000917549634, "loss": 0.498, "step": 16775 }, { "epoch": 0.4259369724968588, "grad_norm": 0.337890625, "learning_rate": 0.0002884873290692251, "loss": 0.4565, "step": 16780 }, { "epoch": 0.4260638905458745, "grad_norm": 0.3359375, "learning_rate": 0.0002884745595879686, "loss": 0.4542, "step": 16785 }, { "epoch": 0.4261908085948903, "grad_norm": 0.369140625, "learning_rate": 0.00028846178331182074, "loss": 0.5024, "step": 16790 }, { "epoch": 0.42631772664390605, "grad_norm": 0.3671875, "learning_rate": 0.0002884490002414083, "loss": 0.5121, "step": 16795 }, { "epoch": 0.42644464469292176, "grad_norm": 0.328125, "learning_rate": 0.00028843621037735864, "loss": 0.4687, "step": 16800 }, { "epoch": 0.4265715627419375, "grad_norm": 0.326171875, "learning_rate": 0.0002884234137202992, "loss": 0.4698, "step": 16805 }, { "epoch": 0.4266984807909533, "grad_norm": 0.322265625, "learning_rate": 0.00028841061027085795, "loss": 0.4472, "step": 16810 }, { "epoch": 0.426825398839969, "grad_norm": 0.337890625, "learning_rate": 0.0002883978000296633, "loss": 0.472, "step": 16815 }, { "epoch": 0.4269523168889848, "grad_norm": 0.3359375, "learning_rate": 0.00028838498299734366, "loss": 0.503, "step": 16820 }, { "epoch": 0.42707923493800054, "grad_norm": 0.337890625, "learning_rate": 0.000288372159174528, "loss": 0.4702, "step": 16825 }, { "epoch": 0.4272061529870163, "grad_norm": 0.36328125, "learning_rate": 0.00028835932856184567, "loss": 0.4735, "step": 16830 }, { "epoch": 0.427333071036032, "grad_norm": 0.330078125, "learning_rate": 0.0002883464911599262, "loss": 0.4759, "step": 16835 }, { "epoch": 0.4274599890850478, "grad_norm": 0.361328125, "learning_rate": 0.0002883336469693995, "loss": 0.494, "step": 16840 }, { "epoch": 0.42758690713406355, "grad_norm": 0.34765625, "learning_rate": 0.00028832079599089587, "loss": 0.4783, "step": 16845 }, { "epoch": 0.42771382518307927, "grad_norm": 0.328125, "learning_rate": 0.00028830793822504585, "loss": 0.4777, "step": 16850 }, { "epoch": 0.42784074323209503, "grad_norm": 0.3515625, "learning_rate": 0.0002882950736724804, "loss": 0.4588, "step": 16855 }, { "epoch": 0.4279676612811108, "grad_norm": 0.35546875, "learning_rate": 0.0002882822023338308, "loss": 0.5019, "step": 16860 }, { "epoch": 0.4280945793301265, "grad_norm": 0.3359375, "learning_rate": 0.0002882693242097286, "loss": 0.4883, "step": 16865 }, { "epoch": 0.4282214973791423, "grad_norm": 0.3828125, "learning_rate": 0.00028825643930080567, "loss": 0.5136, "step": 16870 }, { "epoch": 0.42834841542815805, "grad_norm": 0.333984375, "learning_rate": 0.0002882435476076944, "loss": 0.4676, "step": 16875 }, { "epoch": 0.4284753334771738, "grad_norm": 0.337890625, "learning_rate": 0.00028823064913102736, "loss": 0.49, "step": 16880 }, { "epoch": 0.4286022515261895, "grad_norm": 0.369140625, "learning_rate": 0.0002882177438714374, "loss": 0.45, "step": 16885 }, { "epoch": 0.4287291695752053, "grad_norm": 0.365234375, "learning_rate": 0.00028820483182955773, "loss": 0.4746, "step": 16890 }, { "epoch": 0.42885608762422106, "grad_norm": 0.279296875, "learning_rate": 0.00028819191300602206, "loss": 0.4421, "step": 16895 }, { "epoch": 0.4289830056732368, "grad_norm": 0.345703125, "learning_rate": 0.00028817898740146435, "loss": 0.4602, "step": 16900 }, { "epoch": 0.42910992372225254, "grad_norm": 0.337890625, "learning_rate": 0.00028816605501651865, "loss": 0.4712, "step": 16905 }, { "epoch": 0.4292368417712683, "grad_norm": 0.369140625, "learning_rate": 0.0002881531158518197, "loss": 0.4886, "step": 16910 }, { "epoch": 0.429363759820284, "grad_norm": 0.3359375, "learning_rate": 0.0002881401699080024, "loss": 0.4756, "step": 16915 }, { "epoch": 0.4294906778692998, "grad_norm": 0.333984375, "learning_rate": 0.00028812721718570204, "loss": 0.457, "step": 16920 }, { "epoch": 0.42961759591831555, "grad_norm": 0.46875, "learning_rate": 0.00028811425768555406, "loss": 0.5032, "step": 16925 }, { "epoch": 0.4297445139673313, "grad_norm": 0.375, "learning_rate": 0.00028810129140819453, "loss": 0.4415, "step": 16930 }, { "epoch": 0.42987143201634703, "grad_norm": 0.359375, "learning_rate": 0.00028808831835425966, "loss": 0.4865, "step": 16935 }, { "epoch": 0.4299983500653628, "grad_norm": 0.345703125, "learning_rate": 0.000288075338524386, "loss": 0.5032, "step": 16940 }, { "epoch": 0.43012526811437857, "grad_norm": 0.3671875, "learning_rate": 0.00028806235191921047, "loss": 0.451, "step": 16945 }, { "epoch": 0.4302521861633943, "grad_norm": 0.35546875, "learning_rate": 0.0002880493585393703, "loss": 0.4801, "step": 16950 }, { "epoch": 0.43037910421241005, "grad_norm": 0.3515625, "learning_rate": 0.00028803635838550316, "loss": 0.4711, "step": 16955 }, { "epoch": 0.4305060222614258, "grad_norm": 0.3671875, "learning_rate": 0.00028802335145824683, "loss": 0.4601, "step": 16960 }, { "epoch": 0.4306329403104415, "grad_norm": 0.345703125, "learning_rate": 0.0002880103377582397, "loss": 0.4613, "step": 16965 }, { "epoch": 0.4307598583594573, "grad_norm": 0.359375, "learning_rate": 0.0002879973172861202, "loss": 0.5122, "step": 16970 }, { "epoch": 0.43088677640847306, "grad_norm": 0.341796875, "learning_rate": 0.00028798429004252735, "loss": 0.455, "step": 16975 }, { "epoch": 0.43101369445748877, "grad_norm": 0.33203125, "learning_rate": 0.00028797125602810034, "loss": 0.488, "step": 16980 }, { "epoch": 0.43114061250650454, "grad_norm": 0.330078125, "learning_rate": 0.00028795821524347874, "loss": 0.4869, "step": 16985 }, { "epoch": 0.4312675305555203, "grad_norm": 0.341796875, "learning_rate": 0.0002879451676893025, "loss": 0.4827, "step": 16990 }, { "epoch": 0.4313944486045361, "grad_norm": 0.32421875, "learning_rate": 0.0002879321133662118, "loss": 0.4865, "step": 16995 }, { "epoch": 0.4315213666535518, "grad_norm": 0.326171875, "learning_rate": 0.0002879190522748473, "loss": 0.4733, "step": 17000 }, { "epoch": 0.43164828470256755, "grad_norm": 0.333984375, "learning_rate": 0.00028790598441584976, "loss": 0.4777, "step": 17005 }, { "epoch": 0.4317752027515833, "grad_norm": 0.345703125, "learning_rate": 0.00028789290978986057, "loss": 0.468, "step": 17010 }, { "epoch": 0.43190212080059903, "grad_norm": 0.306640625, "learning_rate": 0.0002878798283975212, "loss": 0.4687, "step": 17015 }, { "epoch": 0.4320290388496148, "grad_norm": 0.345703125, "learning_rate": 0.00028786674023947357, "loss": 0.4804, "step": 17020 }, { "epoch": 0.43215595689863057, "grad_norm": 0.361328125, "learning_rate": 0.0002878536453163599, "loss": 0.4676, "step": 17025 }, { "epoch": 0.4322828749476463, "grad_norm": 0.357421875, "learning_rate": 0.0002878405436288228, "loss": 0.4775, "step": 17030 }, { "epoch": 0.43240979299666205, "grad_norm": 0.3671875, "learning_rate": 0.00028782743517750514, "loss": 0.4855, "step": 17035 }, { "epoch": 0.4325367110456778, "grad_norm": 0.3203125, "learning_rate": 0.0002878143199630501, "loss": 0.4776, "step": 17040 }, { "epoch": 0.4326636290946936, "grad_norm": 0.3515625, "learning_rate": 0.0002878011979861013, "loss": 0.4682, "step": 17045 }, { "epoch": 0.4327905471437093, "grad_norm": 0.3359375, "learning_rate": 0.0002877880692473026, "loss": 0.46, "step": 17050 }, { "epoch": 0.43291746519272506, "grad_norm": 0.3203125, "learning_rate": 0.0002877749337472982, "loss": 0.4726, "step": 17055 }, { "epoch": 0.4330443832417408, "grad_norm": 0.326171875, "learning_rate": 0.0002877617914867327, "loss": 0.4865, "step": 17060 }, { "epoch": 0.43317130129075654, "grad_norm": 0.326171875, "learning_rate": 0.000287748642466251, "loss": 0.4552, "step": 17065 }, { "epoch": 0.4332982193397723, "grad_norm": 0.361328125, "learning_rate": 0.00028773548668649824, "loss": 0.5101, "step": 17070 }, { "epoch": 0.4334251373887881, "grad_norm": 0.345703125, "learning_rate": 0.00028772232414812004, "loss": 0.4998, "step": 17075 }, { "epoch": 0.4335520554378038, "grad_norm": 0.3671875, "learning_rate": 0.00028770915485176225, "loss": 0.483, "step": 17080 }, { "epoch": 0.43367897348681955, "grad_norm": 0.375, "learning_rate": 0.0002876959787980711, "loss": 0.4826, "step": 17085 }, { "epoch": 0.4338058915358353, "grad_norm": 0.341796875, "learning_rate": 0.0002876827959876931, "loss": 0.4775, "step": 17090 }, { "epoch": 0.4339328095848511, "grad_norm": 0.3359375, "learning_rate": 0.0002876696064212751, "loss": 0.4853, "step": 17095 }, { "epoch": 0.4340597276338668, "grad_norm": 0.36328125, "learning_rate": 0.00028765641009946444, "loss": 0.491, "step": 17100 }, { "epoch": 0.43418664568288257, "grad_norm": 0.35546875, "learning_rate": 0.00028764320702290855, "loss": 0.4868, "step": 17105 }, { "epoch": 0.43431356373189833, "grad_norm": 0.3671875, "learning_rate": 0.0002876299971922553, "loss": 0.4851, "step": 17110 }, { "epoch": 0.43444048178091405, "grad_norm": 0.35546875, "learning_rate": 0.0002876167806081529, "loss": 0.5012, "step": 17115 }, { "epoch": 0.4345673998299298, "grad_norm": 0.376953125, "learning_rate": 0.0002876035572712499, "loss": 0.4904, "step": 17120 }, { "epoch": 0.4346943178789456, "grad_norm": 0.34765625, "learning_rate": 0.0002875903271821952, "loss": 0.4938, "step": 17125 }, { "epoch": 0.4348212359279613, "grad_norm": 0.330078125, "learning_rate": 0.0002875770903416379, "loss": 0.4566, "step": 17130 }, { "epoch": 0.43494815397697706, "grad_norm": 0.33203125, "learning_rate": 0.0002875638467502276, "loss": 0.4656, "step": 17135 }, { "epoch": 0.4350750720259928, "grad_norm": 0.357421875, "learning_rate": 0.00028755059640861414, "loss": 0.5173, "step": 17140 }, { "epoch": 0.4352019900750086, "grad_norm": 0.359375, "learning_rate": 0.0002875373393174477, "loss": 0.5024, "step": 17145 }, { "epoch": 0.4353289081240243, "grad_norm": 0.326171875, "learning_rate": 0.0002875240754773788, "loss": 0.4707, "step": 17150 }, { "epoch": 0.4354558261730401, "grad_norm": 0.353515625, "learning_rate": 0.00028751080488905833, "loss": 0.4947, "step": 17155 }, { "epoch": 0.43558274422205584, "grad_norm": 0.384765625, "learning_rate": 0.0002874975275531374, "loss": 0.4799, "step": 17160 }, { "epoch": 0.43570966227107155, "grad_norm": 0.353515625, "learning_rate": 0.0002874842434702676, "loss": 0.4866, "step": 17165 }, { "epoch": 0.4358365803200873, "grad_norm": 0.33984375, "learning_rate": 0.00028747095264110067, "loss": 0.5142, "step": 17170 }, { "epoch": 0.4359634983691031, "grad_norm": 0.369140625, "learning_rate": 0.00028745765506628894, "loss": 0.4851, "step": 17175 }, { "epoch": 0.4360904164181188, "grad_norm": 0.35546875, "learning_rate": 0.0002874443507464848, "loss": 0.461, "step": 17180 }, { "epoch": 0.43621733446713457, "grad_norm": 0.345703125, "learning_rate": 0.0002874310396823411, "loss": 0.4897, "step": 17185 }, { "epoch": 0.43634425251615033, "grad_norm": 0.36328125, "learning_rate": 0.00028741772187451106, "loss": 0.448, "step": 17190 }, { "epoch": 0.4364711705651661, "grad_norm": 0.3671875, "learning_rate": 0.00028740439732364816, "loss": 0.5015, "step": 17195 }, { "epoch": 0.4365980886141818, "grad_norm": 0.333984375, "learning_rate": 0.0002873910660304062, "loss": 0.4961, "step": 17200 }, { "epoch": 0.4367250066631976, "grad_norm": 0.41015625, "learning_rate": 0.00028737772799543933, "loss": 0.5095, "step": 17205 }, { "epoch": 0.43685192471221335, "grad_norm": 0.333984375, "learning_rate": 0.0002873643832194021, "loss": 0.4936, "step": 17210 }, { "epoch": 0.43697884276122906, "grad_norm": 0.337890625, "learning_rate": 0.0002873510317029492, "loss": 0.4884, "step": 17215 }, { "epoch": 0.4371057608102448, "grad_norm": 0.404296875, "learning_rate": 0.000287337673446736, "loss": 0.4585, "step": 17220 }, { "epoch": 0.4372326788592606, "grad_norm": 0.333984375, "learning_rate": 0.0002873243084514179, "loss": 0.4872, "step": 17225 }, { "epoch": 0.4373595969082763, "grad_norm": 0.38671875, "learning_rate": 0.0002873109367176506, "loss": 0.5128, "step": 17230 }, { "epoch": 0.43748651495729207, "grad_norm": 0.35546875, "learning_rate": 0.00028729755824609036, "loss": 0.4842, "step": 17235 }, { "epoch": 0.43761343300630784, "grad_norm": 0.3671875, "learning_rate": 0.0002872841730373936, "loss": 0.4672, "step": 17240 }, { "epoch": 0.43774035105532355, "grad_norm": 0.34765625, "learning_rate": 0.0002872707810922172, "loss": 0.489, "step": 17245 }, { "epoch": 0.4378672691043393, "grad_norm": 0.353515625, "learning_rate": 0.00028725738241121827, "loss": 0.4764, "step": 17250 }, { "epoch": 0.4379941871533551, "grad_norm": 0.328125, "learning_rate": 0.0002872439769950542, "loss": 0.4845, "step": 17255 }, { "epoch": 0.43812110520237085, "grad_norm": 0.36328125, "learning_rate": 0.00028723056484438294, "loss": 0.5125, "step": 17260 }, { "epoch": 0.43824802325138656, "grad_norm": 0.34375, "learning_rate": 0.00028721714595986245, "loss": 0.4738, "step": 17265 }, { "epoch": 0.43837494130040233, "grad_norm": 0.337890625, "learning_rate": 0.00028720372034215125, "loss": 0.4632, "step": 17270 }, { "epoch": 0.4385018593494181, "grad_norm": 0.365234375, "learning_rate": 0.0002871902879919082, "loss": 0.4902, "step": 17275 }, { "epoch": 0.4386287773984338, "grad_norm": 0.3359375, "learning_rate": 0.00028717684890979233, "loss": 0.5, "step": 17280 }, { "epoch": 0.4387556954474496, "grad_norm": 0.341796875, "learning_rate": 0.00028716340309646317, "loss": 0.4607, "step": 17285 }, { "epoch": 0.43888261349646535, "grad_norm": 0.373046875, "learning_rate": 0.0002871499505525804, "loss": 0.4761, "step": 17290 }, { "epoch": 0.43900953154548106, "grad_norm": 0.322265625, "learning_rate": 0.0002871364912788042, "loss": 0.48, "step": 17295 }, { "epoch": 0.4391364495944968, "grad_norm": 0.333984375, "learning_rate": 0.00028712302527579504, "loss": 0.4856, "step": 17300 }, { "epoch": 0.4392633676435126, "grad_norm": 0.345703125, "learning_rate": 0.00028710955254421364, "loss": 0.4732, "step": 17305 }, { "epoch": 0.43939028569252836, "grad_norm": 0.34375, "learning_rate": 0.0002870960730847211, "loss": 0.4882, "step": 17310 }, { "epoch": 0.43951720374154407, "grad_norm": 0.302734375, "learning_rate": 0.0002870825868979789, "loss": 0.4683, "step": 17315 }, { "epoch": 0.43964412179055984, "grad_norm": 0.3515625, "learning_rate": 0.0002870690939846487, "loss": 0.4885, "step": 17320 }, { "epoch": 0.4397710398395756, "grad_norm": 0.375, "learning_rate": 0.0002870555943453927, "loss": 0.5009, "step": 17325 }, { "epoch": 0.4398979578885913, "grad_norm": 0.3125, "learning_rate": 0.0002870420879808733, "loss": 0.4592, "step": 17330 }, { "epoch": 0.4400248759376071, "grad_norm": 0.345703125, "learning_rate": 0.00028702857489175317, "loss": 0.4963, "step": 17335 }, { "epoch": 0.44015179398662285, "grad_norm": 0.3515625, "learning_rate": 0.0002870150550786955, "loss": 0.4823, "step": 17340 }, { "epoch": 0.44027871203563856, "grad_norm": 0.375, "learning_rate": 0.00028700152854236363, "loss": 0.4816, "step": 17345 }, { "epoch": 0.44040563008465433, "grad_norm": 0.3359375, "learning_rate": 0.0002869879952834214, "loss": 0.48, "step": 17350 }, { "epoch": 0.4405325481336701, "grad_norm": 0.330078125, "learning_rate": 0.0002869744553025327, "loss": 0.4863, "step": 17355 }, { "epoch": 0.44065946618268587, "grad_norm": 0.369140625, "learning_rate": 0.0002869609086003621, "loss": 0.4799, "step": 17360 }, { "epoch": 0.4407863842317016, "grad_norm": 0.37109375, "learning_rate": 0.00028694735517757427, "loss": 0.5052, "step": 17365 }, { "epoch": 0.44091330228071735, "grad_norm": 0.373046875, "learning_rate": 0.00028693379503483426, "loss": 0.529, "step": 17370 }, { "epoch": 0.4410402203297331, "grad_norm": 0.375, "learning_rate": 0.0002869202281728075, "loss": 0.5225, "step": 17375 }, { "epoch": 0.4411671383787488, "grad_norm": 0.341796875, "learning_rate": 0.0002869066545921597, "loss": 0.4841, "step": 17380 }, { "epoch": 0.4412940564277646, "grad_norm": 0.349609375, "learning_rate": 0.0002868930742935568, "loss": 0.4933, "step": 17385 }, { "epoch": 0.44142097447678036, "grad_norm": 0.33203125, "learning_rate": 0.00028687948727766535, "loss": 0.4649, "step": 17390 }, { "epoch": 0.44154789252579607, "grad_norm": 0.34375, "learning_rate": 0.000286865893545152, "loss": 0.4892, "step": 17395 }, { "epoch": 0.44167481057481184, "grad_norm": 0.361328125, "learning_rate": 0.00028685229309668373, "loss": 0.48, "step": 17400 }, { "epoch": 0.4418017286238276, "grad_norm": 0.34765625, "learning_rate": 0.00028683868593292795, "loss": 0.4741, "step": 17405 }, { "epoch": 0.4419286466728434, "grad_norm": 0.35546875, "learning_rate": 0.00028682507205455237, "loss": 0.4812, "step": 17410 }, { "epoch": 0.4420555647218591, "grad_norm": 0.33203125, "learning_rate": 0.000286811451462225, "loss": 0.4679, "step": 17415 }, { "epoch": 0.44218248277087485, "grad_norm": 0.341796875, "learning_rate": 0.00028679782415661425, "loss": 0.4665, "step": 17420 }, { "epoch": 0.4423094008198906, "grad_norm": 0.345703125, "learning_rate": 0.00028678419013838875, "loss": 0.4837, "step": 17425 }, { "epoch": 0.44243631886890633, "grad_norm": 0.37109375, "learning_rate": 0.0002867705494082175, "loss": 0.4743, "step": 17430 }, { "epoch": 0.4425632369179221, "grad_norm": 0.32421875, "learning_rate": 0.00028675690196676987, "loss": 0.4315, "step": 17435 }, { "epoch": 0.44269015496693787, "grad_norm": 0.365234375, "learning_rate": 0.00028674324781471553, "loss": 0.4817, "step": 17440 }, { "epoch": 0.4428170730159536, "grad_norm": 0.36328125, "learning_rate": 0.00028672958695272454, "loss": 0.4821, "step": 17445 }, { "epoch": 0.44294399106496934, "grad_norm": 0.330078125, "learning_rate": 0.0002867159193814671, "loss": 0.4788, "step": 17450 }, { "epoch": 0.4430709091139851, "grad_norm": 0.328125, "learning_rate": 0.00028670224510161403, "loss": 0.4764, "step": 17455 }, { "epoch": 0.4431978271630008, "grad_norm": 0.34375, "learning_rate": 0.0002866885641138362, "loss": 0.4902, "step": 17460 }, { "epoch": 0.4433247452120166, "grad_norm": 0.36328125, "learning_rate": 0.000286674876418805, "loss": 0.4753, "step": 17465 }, { "epoch": 0.44345166326103236, "grad_norm": 0.353515625, "learning_rate": 0.000286661182017192, "loss": 0.4495, "step": 17470 }, { "epoch": 0.4435785813100481, "grad_norm": 0.333984375, "learning_rate": 0.00028664748090966935, "loss": 0.4929, "step": 17475 }, { "epoch": 0.44370549935906384, "grad_norm": 0.33984375, "learning_rate": 0.00028663377309690914, "loss": 0.4819, "step": 17480 }, { "epoch": 0.4438324174080796, "grad_norm": 0.330078125, "learning_rate": 0.0002866200585795842, "loss": 0.4743, "step": 17485 }, { "epoch": 0.44395933545709537, "grad_norm": 0.341796875, "learning_rate": 0.0002866063373583674, "loss": 0.4999, "step": 17490 }, { "epoch": 0.4440862535061111, "grad_norm": 0.32421875, "learning_rate": 0.00028659260943393193, "loss": 0.481, "step": 17495 }, { "epoch": 0.44421317155512685, "grad_norm": 0.34765625, "learning_rate": 0.00028657887480695166, "loss": 0.464, "step": 17500 }, { "epoch": 0.4443400896041426, "grad_norm": 0.337890625, "learning_rate": 0.00028656513347810034, "loss": 0.4797, "step": 17505 }, { "epoch": 0.44446700765315833, "grad_norm": 0.365234375, "learning_rate": 0.0002865513854480523, "loss": 0.5066, "step": 17510 }, { "epoch": 0.4445939257021741, "grad_norm": 0.359375, "learning_rate": 0.0002865376307174822, "loss": 0.471, "step": 17515 }, { "epoch": 0.44472084375118986, "grad_norm": 0.361328125, "learning_rate": 0.0002865238692870649, "loss": 0.5068, "step": 17520 }, { "epoch": 0.44484776180020563, "grad_norm": 0.3359375, "learning_rate": 0.0002865101011574758, "loss": 0.4661, "step": 17525 }, { "epoch": 0.44497467984922134, "grad_norm": 0.34375, "learning_rate": 0.0002864963263293904, "loss": 0.4829, "step": 17530 }, { "epoch": 0.4451015978982371, "grad_norm": 0.353515625, "learning_rate": 0.0002864825448034846, "loss": 0.4839, "step": 17535 }, { "epoch": 0.4452285159472529, "grad_norm": 0.35546875, "learning_rate": 0.0002864687565804347, "loss": 0.5195, "step": 17540 }, { "epoch": 0.4453554339962686, "grad_norm": 0.345703125, "learning_rate": 0.0002864549616609173, "loss": 0.482, "step": 17545 }, { "epoch": 0.44548235204528436, "grad_norm": 0.3046875, "learning_rate": 0.00028644116004560927, "loss": 0.4362, "step": 17550 }, { "epoch": 0.4456092700943001, "grad_norm": 0.359375, "learning_rate": 0.0002864273517351879, "loss": 0.4695, "step": 17555 }, { "epoch": 0.44573618814331584, "grad_norm": 0.357421875, "learning_rate": 0.00028641353673033066, "loss": 0.4944, "step": 17560 }, { "epoch": 0.4458631061923316, "grad_norm": 0.3515625, "learning_rate": 0.0002863997150317156, "loss": 0.4608, "step": 17565 }, { "epoch": 0.44599002424134737, "grad_norm": 0.357421875, "learning_rate": 0.00028638588664002074, "loss": 0.5008, "step": 17570 }, { "epoch": 0.44611694229036314, "grad_norm": 0.34765625, "learning_rate": 0.0002863720515559248, "loss": 0.4721, "step": 17575 }, { "epoch": 0.44624386033937885, "grad_norm": 0.35546875, "learning_rate": 0.00028635820978010665, "loss": 0.4969, "step": 17580 }, { "epoch": 0.4463707783883946, "grad_norm": 0.35546875, "learning_rate": 0.00028634436131324544, "loss": 0.4786, "step": 17585 }, { "epoch": 0.4464976964374104, "grad_norm": 0.34375, "learning_rate": 0.0002863305061560207, "loss": 0.4819, "step": 17590 }, { "epoch": 0.4466246144864261, "grad_norm": 0.36328125, "learning_rate": 0.0002863166443091124, "loss": 0.4697, "step": 17595 }, { "epoch": 0.44675153253544186, "grad_norm": 0.345703125, "learning_rate": 0.0002863027757732006, "loss": 0.4704, "step": 17600 }, { "epoch": 0.44687845058445763, "grad_norm": 0.36328125, "learning_rate": 0.00028628890054896596, "loss": 0.4683, "step": 17605 }, { "epoch": 0.44700536863347334, "grad_norm": 0.3203125, "learning_rate": 0.0002862750186370892, "loss": 0.4811, "step": 17610 }, { "epoch": 0.4471322866824891, "grad_norm": 0.36328125, "learning_rate": 0.00028626113003825155, "loss": 0.4994, "step": 17615 }, { "epoch": 0.4472592047315049, "grad_norm": 0.337890625, "learning_rate": 0.0002862472347531345, "loss": 0.4595, "step": 17620 }, { "epoch": 0.44738612278052065, "grad_norm": 0.33984375, "learning_rate": 0.00028623333278242, "loss": 0.467, "step": 17625 }, { "epoch": 0.44751304082953636, "grad_norm": 0.345703125, "learning_rate": 0.0002862194241267901, "loss": 0.4592, "step": 17630 }, { "epoch": 0.4476399588785521, "grad_norm": 0.3671875, "learning_rate": 0.00028620550878692734, "loss": 0.4998, "step": 17635 }, { "epoch": 0.4477668769275679, "grad_norm": 0.34765625, "learning_rate": 0.00028619158676351444, "loss": 0.4912, "step": 17640 }, { "epoch": 0.4478937949765836, "grad_norm": 0.35546875, "learning_rate": 0.00028617765805723466, "loss": 0.4961, "step": 17645 }, { "epoch": 0.44802071302559937, "grad_norm": 0.353515625, "learning_rate": 0.0002861637226687715, "loss": 0.4728, "step": 17650 }, { "epoch": 0.44814763107461514, "grad_norm": 0.353515625, "learning_rate": 0.00028614978059880866, "loss": 0.4977, "step": 17655 }, { "epoch": 0.44827454912363085, "grad_norm": 0.34765625, "learning_rate": 0.0002861358318480303, "loss": 0.4756, "step": 17660 }, { "epoch": 0.4484014671726466, "grad_norm": 0.37890625, "learning_rate": 0.00028612187641712097, "loss": 0.4911, "step": 17665 }, { "epoch": 0.4485283852216624, "grad_norm": 0.349609375, "learning_rate": 0.0002861079143067653, "loss": 0.512, "step": 17670 }, { "epoch": 0.4486553032706781, "grad_norm": 0.353515625, "learning_rate": 0.00028609394551764856, "loss": 0.456, "step": 17675 }, { "epoch": 0.44878222131969386, "grad_norm": 0.345703125, "learning_rate": 0.00028607997005045615, "loss": 0.4749, "step": 17680 }, { "epoch": 0.44890913936870963, "grad_norm": 0.369140625, "learning_rate": 0.0002860659879058737, "loss": 0.4978, "step": 17685 }, { "epoch": 0.4490360574177254, "grad_norm": 0.34765625, "learning_rate": 0.0002860519990845875, "loss": 0.483, "step": 17690 }, { "epoch": 0.4491629754667411, "grad_norm": 0.345703125, "learning_rate": 0.0002860380035872839, "loss": 0.4804, "step": 17695 }, { "epoch": 0.4492898935157569, "grad_norm": 0.345703125, "learning_rate": 0.00028602400141464966, "loss": 0.4915, "step": 17700 }, { "epoch": 0.44941681156477264, "grad_norm": 18.75, "learning_rate": 0.00028600999256737187, "loss": 0.5135, "step": 17705 }, { "epoch": 0.44954372961378836, "grad_norm": 0.37890625, "learning_rate": 0.0002859959770461379, "loss": 0.5004, "step": 17710 }, { "epoch": 0.4496706476628041, "grad_norm": 0.3515625, "learning_rate": 0.00028598195485163546, "loss": 0.4661, "step": 17715 }, { "epoch": 0.4497975657118199, "grad_norm": 0.345703125, "learning_rate": 0.00028596792598455274, "loss": 0.4812, "step": 17720 }, { "epoch": 0.4499244837608356, "grad_norm": 0.357421875, "learning_rate": 0.00028595389044557804, "loss": 0.4837, "step": 17725 }, { "epoch": 0.45005140180985137, "grad_norm": 0.326171875, "learning_rate": 0.0002859398482354, "loss": 0.4591, "step": 17730 }, { "epoch": 0.45017831985886714, "grad_norm": 0.3359375, "learning_rate": 0.00028592579935470783, "loss": 0.482, "step": 17735 }, { "epoch": 0.4503052379078829, "grad_norm": 0.369140625, "learning_rate": 0.00028591174380419084, "loss": 0.4701, "step": 17740 }, { "epoch": 0.4504321559568986, "grad_norm": 0.3515625, "learning_rate": 0.0002858976815845387, "loss": 0.4782, "step": 17745 }, { "epoch": 0.4505590740059144, "grad_norm": 0.37890625, "learning_rate": 0.0002858836126964415, "loss": 0.5078, "step": 17750 }, { "epoch": 0.45068599205493015, "grad_norm": 0.3671875, "learning_rate": 0.00028586953714058956, "loss": 0.4791, "step": 17755 }, { "epoch": 0.45081291010394586, "grad_norm": 0.349609375, "learning_rate": 0.0002858554549176735, "loss": 0.498, "step": 17760 }, { "epoch": 0.45093982815296163, "grad_norm": 0.365234375, "learning_rate": 0.00028584136602838436, "loss": 0.481, "step": 17765 }, { "epoch": 0.4510667462019774, "grad_norm": 0.33203125, "learning_rate": 0.0002858272704734136, "loss": 0.4674, "step": 17770 }, { "epoch": 0.4511936642509931, "grad_norm": 0.330078125, "learning_rate": 0.00028581316825345267, "loss": 0.4558, "step": 17775 }, { "epoch": 0.4513205823000089, "grad_norm": 0.35546875, "learning_rate": 0.00028579905936919374, "loss": 0.4801, "step": 17780 }, { "epoch": 0.45144750034902464, "grad_norm": 0.34375, "learning_rate": 0.00028578494382132906, "loss": 0.4451, "step": 17785 }, { "epoch": 0.4515744183980404, "grad_norm": 0.3359375, "learning_rate": 0.00028577082161055126, "loss": 0.4873, "step": 17790 }, { "epoch": 0.4517013364470561, "grad_norm": 0.33984375, "learning_rate": 0.0002857566927375533, "loss": 0.4534, "step": 17795 }, { "epoch": 0.4518282544960719, "grad_norm": 0.333984375, "learning_rate": 0.0002857425572030285, "loss": 0.4851, "step": 17800 }, { "epoch": 0.45195517254508766, "grad_norm": 0.345703125, "learning_rate": 0.00028572841500767055, "loss": 0.4721, "step": 17805 }, { "epoch": 0.45208209059410337, "grad_norm": 0.34375, "learning_rate": 0.00028571426615217333, "loss": 0.4762, "step": 17810 }, { "epoch": 0.45220900864311914, "grad_norm": 0.33203125, "learning_rate": 0.00028570011063723115, "loss": 0.4469, "step": 17815 }, { "epoch": 0.4523359266921349, "grad_norm": 0.328125, "learning_rate": 0.00028568594846353854, "loss": 0.4966, "step": 17820 }, { "epoch": 0.4524628447411506, "grad_norm": 0.35546875, "learning_rate": 0.00028567177963179053, "loss": 0.4968, "step": 17825 }, { "epoch": 0.4525897627901664, "grad_norm": 0.33203125, "learning_rate": 0.0002856576041426823, "loss": 0.4724, "step": 17830 }, { "epoch": 0.45271668083918215, "grad_norm": 0.337890625, "learning_rate": 0.0002856434219969095, "loss": 0.4828, "step": 17835 }, { "epoch": 0.4528435988881979, "grad_norm": 0.349609375, "learning_rate": 0.000285629233195168, "loss": 0.4756, "step": 17840 }, { "epoch": 0.45297051693721363, "grad_norm": 0.353515625, "learning_rate": 0.00028561503773815404, "loss": 0.4579, "step": 17845 }, { "epoch": 0.4530974349862294, "grad_norm": 0.357421875, "learning_rate": 0.00028560083562656426, "loss": 0.4767, "step": 17850 }, { "epoch": 0.45322435303524516, "grad_norm": 0.349609375, "learning_rate": 0.0002855866268610954, "loss": 0.4508, "step": 17855 }, { "epoch": 0.4533512710842609, "grad_norm": 0.345703125, "learning_rate": 0.00028557241144244483, "loss": 0.4916, "step": 17860 }, { "epoch": 0.45347818913327664, "grad_norm": 0.333984375, "learning_rate": 0.0002855581893713101, "loss": 0.48, "step": 17865 }, { "epoch": 0.4536051071822924, "grad_norm": 0.33203125, "learning_rate": 0.0002855439606483889, "loss": 0.4792, "step": 17870 }, { "epoch": 0.4537320252313081, "grad_norm": 0.37109375, "learning_rate": 0.0002855297252743796, "loss": 0.4658, "step": 17875 }, { "epoch": 0.4538589432803239, "grad_norm": 0.34375, "learning_rate": 0.00028551548324998063, "loss": 0.4815, "step": 17880 }, { "epoch": 0.45398586132933966, "grad_norm": 0.36328125, "learning_rate": 0.0002855012345758909, "loss": 0.4823, "step": 17885 }, { "epoch": 0.45411277937835537, "grad_norm": 0.365234375, "learning_rate": 0.0002854869792528096, "loss": 0.5005, "step": 17890 }, { "epoch": 0.45423969742737114, "grad_norm": 0.349609375, "learning_rate": 0.0002854727172814361, "loss": 0.4932, "step": 17895 }, { "epoch": 0.4543666154763869, "grad_norm": 0.33984375, "learning_rate": 0.0002854584486624704, "loss": 0.4788, "step": 17900 }, { "epoch": 0.45449353352540267, "grad_norm": 0.34375, "learning_rate": 0.0002854441733966126, "loss": 0.4555, "step": 17905 }, { "epoch": 0.4546204515744184, "grad_norm": 0.318359375, "learning_rate": 0.00028542989148456317, "loss": 0.442, "step": 17910 }, { "epoch": 0.45474736962343415, "grad_norm": 0.375, "learning_rate": 0.00028541560292702283, "loss": 0.4726, "step": 17915 }, { "epoch": 0.4548742876724499, "grad_norm": 0.361328125, "learning_rate": 0.00028540130772469286, "loss": 0.4572, "step": 17920 }, { "epoch": 0.45500120572146563, "grad_norm": 0.357421875, "learning_rate": 0.00028538700587827466, "loss": 0.4925, "step": 17925 }, { "epoch": 0.4551281237704814, "grad_norm": 0.36328125, "learning_rate": 0.00028537269738847, "loss": 0.5228, "step": 17930 }, { "epoch": 0.45525504181949716, "grad_norm": 0.31640625, "learning_rate": 0.000285358382255981, "loss": 0.4845, "step": 17935 }, { "epoch": 0.4553819598685129, "grad_norm": 0.341796875, "learning_rate": 0.0002853440604815101, "loss": 0.4638, "step": 17940 }, { "epoch": 0.45550887791752864, "grad_norm": 0.361328125, "learning_rate": 0.0002853297320657601, "loss": 0.4859, "step": 17945 }, { "epoch": 0.4556357959665444, "grad_norm": 0.35546875, "learning_rate": 0.00028531539700943397, "loss": 0.4655, "step": 17950 }, { "epoch": 0.4557627140155602, "grad_norm": 0.34375, "learning_rate": 0.0002853010553132353, "loss": 0.483, "step": 17955 }, { "epoch": 0.4558896320645759, "grad_norm": 0.341796875, "learning_rate": 0.0002852867069778676, "loss": 0.4469, "step": 17960 }, { "epoch": 0.45601655011359166, "grad_norm": 0.349609375, "learning_rate": 0.00028527235200403523, "loss": 0.4786, "step": 17965 }, { "epoch": 0.4561434681626074, "grad_norm": 0.349609375, "learning_rate": 0.0002852579903924424, "loss": 0.5151, "step": 17970 }, { "epoch": 0.45627038621162314, "grad_norm": 0.3359375, "learning_rate": 0.00028524362214379386, "loss": 0.4929, "step": 17975 }, { "epoch": 0.4563973042606389, "grad_norm": 0.35546875, "learning_rate": 0.0002852292472587946, "loss": 0.4964, "step": 17980 }, { "epoch": 0.45652422230965467, "grad_norm": 0.33203125, "learning_rate": 0.00028521486573815006, "loss": 0.4872, "step": 17985 }, { "epoch": 0.4566511403586704, "grad_norm": 0.34765625, "learning_rate": 0.000285200477582566, "loss": 0.4635, "step": 17990 }, { "epoch": 0.45677805840768615, "grad_norm": 0.365234375, "learning_rate": 0.00028518608279274823, "loss": 0.4736, "step": 17995 }, { "epoch": 0.4569049764567019, "grad_norm": 0.349609375, "learning_rate": 0.0002851716813694033, "loss": 0.4756, "step": 18000 }, { "epoch": 0.4570318945057177, "grad_norm": 0.361328125, "learning_rate": 0.00028515727331323784, "loss": 0.4863, "step": 18005 }, { "epoch": 0.4571588125547334, "grad_norm": 0.333984375, "learning_rate": 0.00028514285862495873, "loss": 0.468, "step": 18010 }, { "epoch": 0.45728573060374916, "grad_norm": 0.35546875, "learning_rate": 0.0002851284373052735, "loss": 0.449, "step": 18015 }, { "epoch": 0.45741264865276493, "grad_norm": 0.341796875, "learning_rate": 0.0002851140093548896, "loss": 0.47, "step": 18020 }, { "epoch": 0.45753956670178064, "grad_norm": 0.32421875, "learning_rate": 0.00028509957477451507, "loss": 0.4599, "step": 18025 }, { "epoch": 0.4576664847507964, "grad_norm": 0.36328125, "learning_rate": 0.0002850851335648583, "loss": 0.4822, "step": 18030 }, { "epoch": 0.4577934027998122, "grad_norm": 0.357421875, "learning_rate": 0.0002850706857266278, "loss": 0.4769, "step": 18035 }, { "epoch": 0.4579203208488279, "grad_norm": 0.345703125, "learning_rate": 0.00028505623126053254, "loss": 0.4786, "step": 18040 }, { "epoch": 0.45804723889784366, "grad_norm": 0.353515625, "learning_rate": 0.00028504177016728177, "loss": 0.4393, "step": 18045 }, { "epoch": 0.4581741569468594, "grad_norm": 0.361328125, "learning_rate": 0.0002850273024475852, "loss": 0.4734, "step": 18050 }, { "epoch": 0.4583010749958752, "grad_norm": 0.337890625, "learning_rate": 0.0002850128281021526, "loss": 0.4804, "step": 18055 }, { "epoch": 0.4584279930448909, "grad_norm": 0.3515625, "learning_rate": 0.0002849983471316944, "loss": 0.4515, "step": 18060 }, { "epoch": 0.45855491109390667, "grad_norm": 0.333984375, "learning_rate": 0.000284983859536921, "loss": 0.4538, "step": 18065 }, { "epoch": 0.45868182914292244, "grad_norm": 0.341796875, "learning_rate": 0.0002849693653185434, "loss": 0.4849, "step": 18070 }, { "epoch": 0.45880874719193815, "grad_norm": 0.34765625, "learning_rate": 0.0002849548644772729, "loss": 0.4786, "step": 18075 }, { "epoch": 0.4589356652409539, "grad_norm": 0.326171875, "learning_rate": 0.00028494035701382087, "loss": 0.4709, "step": 18080 }, { "epoch": 0.4590625832899697, "grad_norm": 0.34375, "learning_rate": 0.0002849258429288993, "loss": 0.4466, "step": 18085 }, { "epoch": 0.4591895013389854, "grad_norm": 0.375, "learning_rate": 0.0002849113222232203, "loss": 0.4845, "step": 18090 }, { "epoch": 0.45931641938800116, "grad_norm": 0.314453125, "learning_rate": 0.0002848967948974966, "loss": 0.4662, "step": 18095 }, { "epoch": 0.45944333743701693, "grad_norm": 0.328125, "learning_rate": 0.00028488226095244084, "loss": 0.4611, "step": 18100 }, { "epoch": 0.45957025548603264, "grad_norm": 0.353515625, "learning_rate": 0.0002848677203887662, "loss": 0.4915, "step": 18105 }, { "epoch": 0.4596971735350484, "grad_norm": 0.345703125, "learning_rate": 0.00028485317320718634, "loss": 0.4456, "step": 18110 }, { "epoch": 0.4598240915840642, "grad_norm": 0.328125, "learning_rate": 0.0002848386194084149, "loss": 0.4458, "step": 18115 }, { "epoch": 0.45995100963307994, "grad_norm": 0.328125, "learning_rate": 0.0002848240589931662, "loss": 0.4704, "step": 18120 }, { "epoch": 0.46007792768209566, "grad_norm": 0.341796875, "learning_rate": 0.00028480949196215457, "loss": 0.4744, "step": 18125 }, { "epoch": 0.4602048457311114, "grad_norm": 0.380859375, "learning_rate": 0.00028479491831609493, "loss": 0.4974, "step": 18130 }, { "epoch": 0.4603317637801272, "grad_norm": 0.36328125, "learning_rate": 0.0002847803380557023, "loss": 0.4937, "step": 18135 }, { "epoch": 0.4604586818291429, "grad_norm": 0.326171875, "learning_rate": 0.00028476575118169213, "loss": 0.4689, "step": 18140 }, { "epoch": 0.46058559987815867, "grad_norm": 0.353515625, "learning_rate": 0.0002847511576947803, "loss": 0.4811, "step": 18145 }, { "epoch": 0.46071251792717444, "grad_norm": 0.3359375, "learning_rate": 0.00028473655759568285, "loss": 0.4942, "step": 18150 }, { "epoch": 0.46083943597619015, "grad_norm": 0.3515625, "learning_rate": 0.00028472195088511613, "loss": 0.4812, "step": 18155 }, { "epoch": 0.4609663540252059, "grad_norm": 1.203125, "learning_rate": 0.00028470733756379694, "loss": 0.4915, "step": 18160 }, { "epoch": 0.4610932720742217, "grad_norm": 0.33984375, "learning_rate": 0.0002846927176324424, "loss": 0.4563, "step": 18165 }, { "epoch": 0.46122019012323745, "grad_norm": 0.31640625, "learning_rate": 0.00028467809109176983, "loss": 0.4309, "step": 18170 }, { "epoch": 0.46134710817225316, "grad_norm": 0.3515625, "learning_rate": 0.000284663457942497, "loss": 0.4651, "step": 18175 }, { "epoch": 0.46147402622126893, "grad_norm": 0.69140625, "learning_rate": 0.00028464881818534194, "loss": 0.4551, "step": 18180 }, { "epoch": 0.4616009442702847, "grad_norm": 0.33203125, "learning_rate": 0.000284634171821023, "loss": 0.4414, "step": 18185 }, { "epoch": 0.4617278623193004, "grad_norm": 0.337890625, "learning_rate": 0.0002846195188502589, "loss": 0.4938, "step": 18190 }, { "epoch": 0.4618547803683162, "grad_norm": 0.3515625, "learning_rate": 0.00028460485927376855, "loss": 0.4783, "step": 18195 }, { "epoch": 0.46198169841733194, "grad_norm": 0.337890625, "learning_rate": 0.00028459019309227143, "loss": 0.4837, "step": 18200 }, { "epoch": 0.46210861646634765, "grad_norm": 0.337890625, "learning_rate": 0.00028457552030648715, "loss": 0.4792, "step": 18205 }, { "epoch": 0.4622355345153634, "grad_norm": 0.365234375, "learning_rate": 0.00028456084091713573, "loss": 0.4634, "step": 18210 }, { "epoch": 0.4623624525643792, "grad_norm": 0.322265625, "learning_rate": 0.0002845461549249374, "loss": 0.4739, "step": 18215 }, { "epoch": 0.46248937061339496, "grad_norm": 0.37890625, "learning_rate": 0.00028453146233061284, "loss": 0.5071, "step": 18220 }, { "epoch": 0.46261628866241067, "grad_norm": 0.365234375, "learning_rate": 0.00028451676313488304, "loss": 0.5039, "step": 18225 }, { "epoch": 0.46274320671142644, "grad_norm": 0.3671875, "learning_rate": 0.00028450205733846927, "loss": 0.4917, "step": 18230 }, { "epoch": 0.4628701247604422, "grad_norm": 0.359375, "learning_rate": 0.0002844873449420931, "loss": 0.4985, "step": 18235 }, { "epoch": 0.4629970428094579, "grad_norm": 0.330078125, "learning_rate": 0.0002844726259464765, "loss": 0.48, "step": 18240 }, { "epoch": 0.4631239608584737, "grad_norm": 0.337890625, "learning_rate": 0.0002844579003523417, "loss": 0.467, "step": 18245 }, { "epoch": 0.46325087890748945, "grad_norm": 0.357421875, "learning_rate": 0.0002844431681604112, "loss": 0.4909, "step": 18250 }, { "epoch": 0.46337779695650516, "grad_norm": 0.35546875, "learning_rate": 0.0002844284293714081, "loss": 0.505, "step": 18255 }, { "epoch": 0.46350471500552093, "grad_norm": 0.349609375, "learning_rate": 0.00028441368398605553, "loss": 0.4633, "step": 18260 }, { "epoch": 0.4636316330545367, "grad_norm": 0.357421875, "learning_rate": 0.00028439893200507695, "loss": 0.4894, "step": 18265 }, { "epoch": 0.46375855110355246, "grad_norm": 0.345703125, "learning_rate": 0.00028438417342919634, "loss": 0.469, "step": 18270 }, { "epoch": 0.4638854691525682, "grad_norm": 0.3125, "learning_rate": 0.0002843694082591379, "loss": 0.4628, "step": 18275 }, { "epoch": 0.46401238720158394, "grad_norm": 0.35546875, "learning_rate": 0.0002843546364956261, "loss": 0.5008, "step": 18280 }, { "epoch": 0.4641393052505997, "grad_norm": 0.314453125, "learning_rate": 0.0002843398581393858, "loss": 0.4631, "step": 18285 }, { "epoch": 0.4642662232996154, "grad_norm": 0.349609375, "learning_rate": 0.0002843250731911422, "loss": 0.4743, "step": 18290 }, { "epoch": 0.4643931413486312, "grad_norm": 0.361328125, "learning_rate": 0.0002843102816516208, "loss": 0.4755, "step": 18295 }, { "epoch": 0.46452005939764696, "grad_norm": 0.349609375, "learning_rate": 0.00028429548352154735, "loss": 0.5061, "step": 18300 }, { "epoch": 0.46464697744666267, "grad_norm": 0.36328125, "learning_rate": 0.000284280678801648, "loss": 0.5078, "step": 18305 }, { "epoch": 0.46477389549567844, "grad_norm": 0.32421875, "learning_rate": 0.0002842658674926492, "loss": 0.4751, "step": 18310 }, { "epoch": 0.4649008135446942, "grad_norm": 0.353515625, "learning_rate": 0.0002842510495952778, "loss": 0.4861, "step": 18315 }, { "epoch": 0.4650277315937099, "grad_norm": 0.345703125, "learning_rate": 0.0002842362251102609, "loss": 0.443, "step": 18320 }, { "epoch": 0.4651546496427257, "grad_norm": 0.33203125, "learning_rate": 0.00028422139403832587, "loss": 0.4959, "step": 18325 }, { "epoch": 0.46528156769174145, "grad_norm": 0.3359375, "learning_rate": 0.0002842065563802005, "loss": 0.4711, "step": 18330 }, { "epoch": 0.4654084857407572, "grad_norm": 0.3515625, "learning_rate": 0.00028419171213661285, "loss": 0.5041, "step": 18335 }, { "epoch": 0.46553540378977293, "grad_norm": 0.3359375, "learning_rate": 0.0002841768613082914, "loss": 0.487, "step": 18340 }, { "epoch": 0.4656623218387887, "grad_norm": 0.349609375, "learning_rate": 0.0002841620038959648, "loss": 0.4902, "step": 18345 }, { "epoch": 0.46578923988780446, "grad_norm": 0.326171875, "learning_rate": 0.0002841471399003621, "loss": 0.4534, "step": 18350 }, { "epoch": 0.4659161579368202, "grad_norm": 0.3515625, "learning_rate": 0.0002841322693222127, "loss": 0.4538, "step": 18355 }, { "epoch": 0.46604307598583594, "grad_norm": 0.3359375, "learning_rate": 0.0002841173921622462, "loss": 0.4579, "step": 18360 }, { "epoch": 0.4661699940348517, "grad_norm": 0.349609375, "learning_rate": 0.00028410250842119273, "loss": 0.4936, "step": 18365 }, { "epoch": 0.4662969120838674, "grad_norm": 0.341796875, "learning_rate": 0.0002840876180997826, "loss": 0.4878, "step": 18370 }, { "epoch": 0.4664238301328832, "grad_norm": 0.357421875, "learning_rate": 0.00028407272119874646, "loss": 0.4752, "step": 18375 }, { "epoch": 0.46655074818189896, "grad_norm": 0.333984375, "learning_rate": 0.0002840578177188153, "loss": 0.4713, "step": 18380 }, { "epoch": 0.4666776662309147, "grad_norm": 0.33984375, "learning_rate": 0.0002840429076607205, "loss": 0.4859, "step": 18385 }, { "epoch": 0.46680458427993043, "grad_norm": 0.357421875, "learning_rate": 0.0002840279910251935, "loss": 0.4692, "step": 18390 }, { "epoch": 0.4669315023289462, "grad_norm": 0.32421875, "learning_rate": 0.0002840130678129664, "loss": 0.4764, "step": 18395 }, { "epoch": 0.46705842037796197, "grad_norm": 0.359375, "learning_rate": 0.00028399813802477145, "loss": 0.4898, "step": 18400 }, { "epoch": 0.4671853384269777, "grad_norm": 0.373046875, "learning_rate": 0.00028398320166134124, "loss": 0.472, "step": 18405 }, { "epoch": 0.46731225647599345, "grad_norm": 0.326171875, "learning_rate": 0.00028396825872340875, "loss": 0.4758, "step": 18410 }, { "epoch": 0.4674391745250092, "grad_norm": 0.328125, "learning_rate": 0.0002839533092117071, "loss": 0.4765, "step": 18415 }, { "epoch": 0.4675660925740249, "grad_norm": 0.357421875, "learning_rate": 0.00028393835312696996, "loss": 0.4731, "step": 18420 }, { "epoch": 0.4676930106230407, "grad_norm": 0.359375, "learning_rate": 0.0002839233904699312, "loss": 0.4981, "step": 18425 }, { "epoch": 0.46781992867205646, "grad_norm": 0.3515625, "learning_rate": 0.00028390842124132495, "loss": 0.4764, "step": 18430 }, { "epoch": 0.46794684672107223, "grad_norm": 0.333984375, "learning_rate": 0.00028389344544188587, "loss": 0.4663, "step": 18435 }, { "epoch": 0.46807376477008794, "grad_norm": 0.318359375, "learning_rate": 0.00028387846307234874, "loss": 0.4929, "step": 18440 }, { "epoch": 0.4682006828191037, "grad_norm": 0.3359375, "learning_rate": 0.00028386347413344873, "loss": 0.4597, "step": 18445 }, { "epoch": 0.4683276008681195, "grad_norm": 0.34375, "learning_rate": 0.00028384847862592146, "loss": 0.499, "step": 18450 }, { "epoch": 0.4684545189171352, "grad_norm": 0.359375, "learning_rate": 0.00028383347655050256, "loss": 0.4769, "step": 18455 }, { "epoch": 0.46858143696615095, "grad_norm": 0.330078125, "learning_rate": 0.0002838184679079283, "loss": 0.4797, "step": 18460 }, { "epoch": 0.4687083550151667, "grad_norm": 0.34375, "learning_rate": 0.00028380345269893515, "loss": 0.4865, "step": 18465 }, { "epoch": 0.46883527306418243, "grad_norm": 0.39453125, "learning_rate": 0.0002837884309242599, "loss": 0.4919, "step": 18470 }, { "epoch": 0.4689621911131982, "grad_norm": 0.376953125, "learning_rate": 0.0002837734025846396, "loss": 0.4653, "step": 18475 }, { "epoch": 0.46908910916221397, "grad_norm": 0.34765625, "learning_rate": 0.0002837583676808117, "loss": 0.4645, "step": 18480 }, { "epoch": 0.46921602721122974, "grad_norm": 0.3359375, "learning_rate": 0.000283743326213514, "loss": 0.4822, "step": 18485 }, { "epoch": 0.46934294526024545, "grad_norm": 0.318359375, "learning_rate": 0.0002837282781834845, "loss": 0.4862, "step": 18490 }, { "epoch": 0.4694698633092612, "grad_norm": 0.33984375, "learning_rate": 0.0002837132235914618, "loss": 0.4775, "step": 18495 }, { "epoch": 0.469596781358277, "grad_norm": 0.361328125, "learning_rate": 0.0002836981624381844, "loss": 0.4712, "step": 18500 }, { "epoch": 0.4697236994072927, "grad_norm": 0.373046875, "learning_rate": 0.0002836830947243914, "loss": 0.4953, "step": 18505 }, { "epoch": 0.46985061745630846, "grad_norm": 0.359375, "learning_rate": 0.00028366802045082224, "loss": 0.4494, "step": 18510 }, { "epoch": 0.46997753550532423, "grad_norm": 0.33203125, "learning_rate": 0.0002836529396182166, "loss": 0.4637, "step": 18515 }, { "epoch": 0.47010445355433994, "grad_norm": 0.34375, "learning_rate": 0.0002836378522273143, "loss": 0.476, "step": 18520 }, { "epoch": 0.4702313716033557, "grad_norm": 0.30859375, "learning_rate": 0.00028362275827885597, "loss": 0.4572, "step": 18525 }, { "epoch": 0.4703582896523715, "grad_norm": 0.3671875, "learning_rate": 0.0002836076577735821, "loss": 0.491, "step": 18530 }, { "epoch": 0.4704852077013872, "grad_norm": 0.3515625, "learning_rate": 0.0002835925507122336, "loss": 0.5015, "step": 18535 }, { "epoch": 0.47061212575040295, "grad_norm": 0.3515625, "learning_rate": 0.00028357743709555194, "loss": 0.4771, "step": 18540 }, { "epoch": 0.4707390437994187, "grad_norm": 0.337890625, "learning_rate": 0.0002835623169242786, "loss": 0.4555, "step": 18545 }, { "epoch": 0.4708659618484345, "grad_norm": 0.35546875, "learning_rate": 0.0002835471901991556, "loss": 0.4759, "step": 18550 }, { "epoch": 0.4709928798974502, "grad_norm": 0.345703125, "learning_rate": 0.0002835320569209252, "loss": 0.4922, "step": 18555 }, { "epoch": 0.47111979794646597, "grad_norm": 0.3828125, "learning_rate": 0.0002835169170903299, "loss": 0.4793, "step": 18560 }, { "epoch": 0.47124671599548174, "grad_norm": 0.34375, "learning_rate": 0.00028350177070811264, "loss": 0.4681, "step": 18565 }, { "epoch": 0.47137363404449745, "grad_norm": 0.328125, "learning_rate": 0.00028348661777501675, "loss": 0.4628, "step": 18570 }, { "epoch": 0.4715005520935132, "grad_norm": 0.3828125, "learning_rate": 0.00028347145829178564, "loss": 0.4903, "step": 18575 }, { "epoch": 0.471627470142529, "grad_norm": 0.349609375, "learning_rate": 0.0002834562922591633, "loss": 0.4943, "step": 18580 }, { "epoch": 0.4717543881915447, "grad_norm": 0.3203125, "learning_rate": 0.0002834411196778938, "loss": 0.4647, "step": 18585 }, { "epoch": 0.47188130624056046, "grad_norm": 0.419921875, "learning_rate": 0.0002834259405487217, "loss": 0.4896, "step": 18590 }, { "epoch": 0.47200822428957623, "grad_norm": 0.3828125, "learning_rate": 0.0002834107548723919, "loss": 0.4991, "step": 18595 }, { "epoch": 0.472135142338592, "grad_norm": 0.37109375, "learning_rate": 0.0002833955626496495, "loss": 0.4793, "step": 18600 }, { "epoch": 0.4722620603876077, "grad_norm": 0.349609375, "learning_rate": 0.00028338036388123993, "loss": 0.4817, "step": 18605 }, { "epoch": 0.4723889784366235, "grad_norm": 0.33984375, "learning_rate": 0.0002833651585679091, "loss": 0.4618, "step": 18610 }, { "epoch": 0.47251589648563924, "grad_norm": 0.34765625, "learning_rate": 0.00028334994671040307, "loss": 0.4704, "step": 18615 }, { "epoch": 0.47264281453465495, "grad_norm": 0.357421875, "learning_rate": 0.00028333472830946823, "loss": 0.4492, "step": 18620 }, { "epoch": 0.4727697325836707, "grad_norm": 0.3359375, "learning_rate": 0.00028331950336585136, "loss": 0.4671, "step": 18625 }, { "epoch": 0.4728966506326865, "grad_norm": 0.37890625, "learning_rate": 0.0002833042718802996, "loss": 0.4862, "step": 18630 }, { "epoch": 0.4730235686817022, "grad_norm": 0.318359375, "learning_rate": 0.0002832890338535603, "loss": 0.4511, "step": 18635 }, { "epoch": 0.47315048673071797, "grad_norm": 0.33984375, "learning_rate": 0.0002832737892863812, "loss": 0.4802, "step": 18640 }, { "epoch": 0.47327740477973373, "grad_norm": 0.337890625, "learning_rate": 0.0002832585381795104, "loss": 0.4743, "step": 18645 }, { "epoch": 0.4734043228287495, "grad_norm": 0.34375, "learning_rate": 0.0002832432805336962, "loss": 0.4768, "step": 18650 }, { "epoch": 0.4735312408777652, "grad_norm": 0.337890625, "learning_rate": 0.0002832280163496873, "loss": 0.4843, "step": 18655 }, { "epoch": 0.473658158926781, "grad_norm": 0.349609375, "learning_rate": 0.0002832127456282327, "loss": 0.4844, "step": 18660 }, { "epoch": 0.47378507697579675, "grad_norm": 0.333984375, "learning_rate": 0.00028319746837008166, "loss": 0.4566, "step": 18665 }, { "epoch": 0.47391199502481246, "grad_norm": 0.3359375, "learning_rate": 0.000283182184575984, "loss": 0.481, "step": 18670 }, { "epoch": 0.4740389130738282, "grad_norm": 0.435546875, "learning_rate": 0.00028316689424668956, "loss": 0.5087, "step": 18675 }, { "epoch": 0.474165831122844, "grad_norm": 0.345703125, "learning_rate": 0.00028315159738294865, "loss": 0.4678, "step": 18680 }, { "epoch": 0.4742927491718597, "grad_norm": 0.376953125, "learning_rate": 0.00028313629398551195, "loss": 0.481, "step": 18685 }, { "epoch": 0.4744196672208755, "grad_norm": 0.34765625, "learning_rate": 0.0002831209840551303, "loss": 0.466, "step": 18690 }, { "epoch": 0.47454658526989124, "grad_norm": 0.333984375, "learning_rate": 0.000283105667592555, "loss": 0.468, "step": 18695 }, { "epoch": 0.474673503318907, "grad_norm": 0.33203125, "learning_rate": 0.0002830903445985376, "loss": 0.4597, "step": 18700 }, { "epoch": 0.4748004213679227, "grad_norm": 0.349609375, "learning_rate": 0.00028307501507382997, "loss": 0.4596, "step": 18705 }, { "epoch": 0.4749273394169385, "grad_norm": 0.384765625, "learning_rate": 0.0002830596790191844, "loss": 0.5043, "step": 18710 }, { "epoch": 0.47505425746595425, "grad_norm": 0.322265625, "learning_rate": 0.00028304433643535335, "loss": 0.4755, "step": 18715 }, { "epoch": 0.47518117551496997, "grad_norm": 0.345703125, "learning_rate": 0.00028302898732308977, "loss": 0.4616, "step": 18720 }, { "epoch": 0.47530809356398573, "grad_norm": 0.35546875, "learning_rate": 0.0002830136316831468, "loss": 0.4561, "step": 18725 }, { "epoch": 0.4754350116130015, "grad_norm": 0.337890625, "learning_rate": 0.0002829982695162778, "loss": 0.4338, "step": 18730 }, { "epoch": 0.4755619296620172, "grad_norm": 0.361328125, "learning_rate": 0.0002829829008232368, "loss": 0.4662, "step": 18735 }, { "epoch": 0.475688847711033, "grad_norm": 0.345703125, "learning_rate": 0.0002829675256047777, "loss": 0.4829, "step": 18740 }, { "epoch": 0.47581576576004875, "grad_norm": 0.328125, "learning_rate": 0.00028295214386165517, "loss": 0.4496, "step": 18745 }, { "epoch": 0.47594268380906446, "grad_norm": 0.361328125, "learning_rate": 0.00028293675559462394, "loss": 0.4803, "step": 18750 }, { "epoch": 0.4760696018580802, "grad_norm": 0.330078125, "learning_rate": 0.000282921360804439, "loss": 0.4712, "step": 18755 }, { "epoch": 0.476196519907096, "grad_norm": 0.341796875, "learning_rate": 0.0002829059594918559, "loss": 0.4826, "step": 18760 }, { "epoch": 0.47632343795611176, "grad_norm": 0.345703125, "learning_rate": 0.0002828905516576302, "loss": 0.4503, "step": 18765 }, { "epoch": 0.4764503560051275, "grad_norm": 0.33203125, "learning_rate": 0.00028287513730251813, "loss": 0.484, "step": 18770 }, { "epoch": 0.47657727405414324, "grad_norm": 0.2890625, "learning_rate": 0.000282859716427276, "loss": 0.458, "step": 18775 }, { "epoch": 0.476704192103159, "grad_norm": 0.353515625, "learning_rate": 0.0002828442890326605, "loss": 0.4634, "step": 18780 }, { "epoch": 0.4768311101521747, "grad_norm": 0.32421875, "learning_rate": 0.0002828288551194287, "loss": 0.5033, "step": 18785 }, { "epoch": 0.4769580282011905, "grad_norm": 0.322265625, "learning_rate": 0.00028281341468833785, "loss": 0.4843, "step": 18790 }, { "epoch": 0.47708494625020625, "grad_norm": 0.337890625, "learning_rate": 0.00028279796774014566, "loss": 0.4933, "step": 18795 }, { "epoch": 0.47721186429922197, "grad_norm": 0.328125, "learning_rate": 0.0002827825142756101, "loss": 0.4217, "step": 18800 }, { "epoch": 0.47733878234823773, "grad_norm": 0.34765625, "learning_rate": 0.00028276705429548945, "loss": 0.457, "step": 18805 }, { "epoch": 0.4774657003972535, "grad_norm": 0.33984375, "learning_rate": 0.00028275158780054227, "loss": 0.4682, "step": 18810 }, { "epoch": 0.47759261844626927, "grad_norm": 0.349609375, "learning_rate": 0.00028273611479152764, "loss": 0.4671, "step": 18815 }, { "epoch": 0.477719536495285, "grad_norm": 0.302734375, "learning_rate": 0.0002827206352692047, "loss": 0.4616, "step": 18820 }, { "epoch": 0.47784645454430075, "grad_norm": 0.32421875, "learning_rate": 0.000282705149234333, "loss": 0.4493, "step": 18825 }, { "epoch": 0.4779733725933165, "grad_norm": 0.3515625, "learning_rate": 0.0002826896566876726, "loss": 0.4599, "step": 18830 }, { "epoch": 0.4781002906423322, "grad_norm": 0.37109375, "learning_rate": 0.0002826741576299835, "loss": 0.4865, "step": 18835 }, { "epoch": 0.478227208691348, "grad_norm": 0.380859375, "learning_rate": 0.0002826586520620264, "loss": 0.5202, "step": 18840 }, { "epoch": 0.47835412674036376, "grad_norm": 0.359375, "learning_rate": 0.0002826431399845621, "loss": 0.4731, "step": 18845 }, { "epoch": 0.4784810447893795, "grad_norm": 0.345703125, "learning_rate": 0.0002826276213983517, "loss": 0.4883, "step": 18850 }, { "epoch": 0.47860796283839524, "grad_norm": 0.341796875, "learning_rate": 0.00028261209630415677, "loss": 0.4848, "step": 18855 }, { "epoch": 0.478734880887411, "grad_norm": 0.365234375, "learning_rate": 0.0002825965647027391, "loss": 0.4779, "step": 18860 }, { "epoch": 0.4788617989364268, "grad_norm": 0.35546875, "learning_rate": 0.0002825810265948608, "loss": 0.4913, "step": 18865 }, { "epoch": 0.4789887169854425, "grad_norm": 0.32421875, "learning_rate": 0.0002825654819812844, "loss": 0.469, "step": 18870 }, { "epoch": 0.47911563503445825, "grad_norm": 0.33984375, "learning_rate": 0.00028254993086277255, "loss": 0.4553, "step": 18875 }, { "epoch": 0.479242553083474, "grad_norm": 0.349609375, "learning_rate": 0.00028253437324008844, "loss": 0.4713, "step": 18880 }, { "epoch": 0.47936947113248973, "grad_norm": 0.345703125, "learning_rate": 0.00028251880911399536, "loss": 0.4805, "step": 18885 }, { "epoch": 0.4794963891815055, "grad_norm": 0.328125, "learning_rate": 0.0002825032384852572, "loss": 0.4679, "step": 18890 }, { "epoch": 0.47962330723052127, "grad_norm": 0.337890625, "learning_rate": 0.0002824876613546378, "loss": 0.4517, "step": 18895 }, { "epoch": 0.479750225279537, "grad_norm": 0.3359375, "learning_rate": 0.0002824720777229017, "loss": 0.4864, "step": 18900 }, { "epoch": 0.47987714332855275, "grad_norm": 0.3359375, "learning_rate": 0.0002824564875908135, "loss": 0.5186, "step": 18905 }, { "epoch": 0.4800040613775685, "grad_norm": 0.369140625, "learning_rate": 0.0002824408909591382, "loss": 0.488, "step": 18910 }, { "epoch": 0.4801309794265843, "grad_norm": 0.33203125, "learning_rate": 0.0002824252878286412, "loss": 0.4867, "step": 18915 }, { "epoch": 0.4802578974756, "grad_norm": 0.341796875, "learning_rate": 0.000282409678200088, "loss": 0.455, "step": 18920 }, { "epoch": 0.48038481552461576, "grad_norm": 0.3203125, "learning_rate": 0.00028239406207424466, "loss": 0.4667, "step": 18925 }, { "epoch": 0.4805117335736315, "grad_norm": 0.33984375, "learning_rate": 0.0002823784394518774, "loss": 0.4722, "step": 18930 }, { "epoch": 0.48063865162264724, "grad_norm": 0.33984375, "learning_rate": 0.0002823628103337529, "loss": 0.4656, "step": 18935 }, { "epoch": 0.480765569671663, "grad_norm": 0.359375, "learning_rate": 0.00028234717472063804, "loss": 0.4802, "step": 18940 }, { "epoch": 0.4808924877206788, "grad_norm": 0.35546875, "learning_rate": 0.0002823315326133, "loss": 0.4819, "step": 18945 }, { "epoch": 0.4810194057696945, "grad_norm": 0.337890625, "learning_rate": 0.00028231588401250637, "loss": 0.4631, "step": 18950 }, { "epoch": 0.48114632381871025, "grad_norm": 0.3359375, "learning_rate": 0.000282300228919025, "loss": 0.4378, "step": 18955 }, { "epoch": 0.481273241867726, "grad_norm": 0.34375, "learning_rate": 0.0002822845673336241, "loss": 0.4798, "step": 18960 }, { "epoch": 0.48140015991674173, "grad_norm": 0.3515625, "learning_rate": 0.0002822688992570722, "loss": 0.4843, "step": 18965 }, { "epoch": 0.4815270779657575, "grad_norm": 0.337890625, "learning_rate": 0.00028225322469013813, "loss": 0.4835, "step": 18970 }, { "epoch": 0.48165399601477327, "grad_norm": 0.330078125, "learning_rate": 0.0002822375436335909, "loss": 0.4677, "step": 18975 }, { "epoch": 0.48178091406378903, "grad_norm": 0.365234375, "learning_rate": 0.0002822218560882002, "loss": 0.5053, "step": 18980 }, { "epoch": 0.48190783211280475, "grad_norm": 0.365234375, "learning_rate": 0.0002822061620547356, "loss": 0.48, "step": 18985 }, { "epoch": 0.4820347501618205, "grad_norm": 0.328125, "learning_rate": 0.0002821904615339674, "loss": 0.4476, "step": 18990 }, { "epoch": 0.4821616682108363, "grad_norm": 0.33203125, "learning_rate": 0.00028217475452666576, "loss": 0.4765, "step": 18995 }, { "epoch": 0.482288586259852, "grad_norm": 0.3984375, "learning_rate": 0.0002821590410336017, "loss": 0.489, "step": 19000 }, { "epoch": 0.48241550430886776, "grad_norm": 0.373046875, "learning_rate": 0.0002821433210555461, "loss": 0.5108, "step": 19005 }, { "epoch": 0.4825424223578835, "grad_norm": 0.337890625, "learning_rate": 0.00028212759459327036, "loss": 0.4608, "step": 19010 }, { "epoch": 0.48266934040689924, "grad_norm": 0.322265625, "learning_rate": 0.00028211186164754616, "loss": 0.4669, "step": 19015 }, { "epoch": 0.482796258455915, "grad_norm": 0.333984375, "learning_rate": 0.0002820961222191456, "loss": 0.4745, "step": 19020 }, { "epoch": 0.4829231765049308, "grad_norm": 0.31640625, "learning_rate": 0.00028208037630884084, "loss": 0.4471, "step": 19025 }, { "epoch": 0.48305009455394654, "grad_norm": 0.3359375, "learning_rate": 0.0002820646239174047, "loss": 0.4716, "step": 19030 }, { "epoch": 0.48317701260296225, "grad_norm": 0.3125, "learning_rate": 0.0002820488650456101, "loss": 0.4728, "step": 19035 }, { "epoch": 0.483303930651978, "grad_norm": 0.3515625, "learning_rate": 0.00028203309969423025, "loss": 0.4856, "step": 19040 }, { "epoch": 0.4834308487009938, "grad_norm": 0.34765625, "learning_rate": 0.0002820173278640388, "loss": 0.477, "step": 19045 }, { "epoch": 0.4835577667500095, "grad_norm": 0.34375, "learning_rate": 0.00028200154955580964, "loss": 0.4721, "step": 19050 }, { "epoch": 0.48368468479902527, "grad_norm": 0.34375, "learning_rate": 0.000281985764770317, "loss": 0.4832, "step": 19055 }, { "epoch": 0.48381160284804103, "grad_norm": 0.3359375, "learning_rate": 0.0002819699735083355, "loss": 0.4668, "step": 19060 }, { "epoch": 0.48393852089705675, "grad_norm": 0.3515625, "learning_rate": 0.00028195417577064, "loss": 0.4545, "step": 19065 }, { "epoch": 0.4840654389460725, "grad_norm": 0.357421875, "learning_rate": 0.00028193837155800563, "loss": 0.4762, "step": 19070 }, { "epoch": 0.4841923569950883, "grad_norm": 0.373046875, "learning_rate": 0.0002819225608712079, "loss": 0.501, "step": 19075 }, { "epoch": 0.48431927504410405, "grad_norm": 0.34765625, "learning_rate": 0.00028190674371102273, "loss": 0.4407, "step": 19080 }, { "epoch": 0.48444619309311976, "grad_norm": 0.32421875, "learning_rate": 0.0002818909200782261, "loss": 0.4523, "step": 19085 }, { "epoch": 0.4845731111421355, "grad_norm": 0.361328125, "learning_rate": 0.00028187508997359455, "loss": 0.4956, "step": 19090 }, { "epoch": 0.4847000291911513, "grad_norm": 0.34765625, "learning_rate": 0.00028185925339790495, "loss": 0.4718, "step": 19095 }, { "epoch": 0.484826947240167, "grad_norm": 0.359375, "learning_rate": 0.0002818434103519343, "loss": 0.4652, "step": 19100 }, { "epoch": 0.4849538652891828, "grad_norm": 0.349609375, "learning_rate": 0.00028182756083645997, "loss": 0.4802, "step": 19105 }, { "epoch": 0.48508078333819854, "grad_norm": 0.33203125, "learning_rate": 0.00028181170485225976, "loss": 0.4977, "step": 19110 }, { "epoch": 0.48520770138721425, "grad_norm": 0.357421875, "learning_rate": 0.00028179584240011174, "loss": 0.4958, "step": 19115 }, { "epoch": 0.48533461943623, "grad_norm": 0.330078125, "learning_rate": 0.00028177997348079414, "loss": 0.4492, "step": 19120 }, { "epoch": 0.4854615374852458, "grad_norm": 0.326171875, "learning_rate": 0.0002817640980950858, "loss": 0.4811, "step": 19125 }, { "epoch": 0.48558845553426155, "grad_norm": 0.328125, "learning_rate": 0.0002817482162437657, "loss": 0.5007, "step": 19130 }, { "epoch": 0.48571537358327727, "grad_norm": 0.353515625, "learning_rate": 0.00028173232792761305, "loss": 0.4858, "step": 19135 }, { "epoch": 0.48584229163229303, "grad_norm": 0.34765625, "learning_rate": 0.00028171643314740753, "loss": 0.4942, "step": 19140 }, { "epoch": 0.4859692096813088, "grad_norm": 0.78515625, "learning_rate": 0.0002817005319039291, "loss": 0.4617, "step": 19145 }, { "epoch": 0.4860961277303245, "grad_norm": 0.34375, "learning_rate": 0.00028168462419795806, "loss": 0.469, "step": 19150 }, { "epoch": 0.4862230457793403, "grad_norm": 0.333984375, "learning_rate": 0.000281668710030275, "loss": 0.4828, "step": 19155 }, { "epoch": 0.48634996382835605, "grad_norm": 0.3046875, "learning_rate": 0.0002816527894016607, "loss": 0.4437, "step": 19160 }, { "epoch": 0.48647688187737176, "grad_norm": 0.310546875, "learning_rate": 0.00028163686231289653, "loss": 0.4379, "step": 19165 }, { "epoch": 0.4866037999263875, "grad_norm": 0.3125, "learning_rate": 0.00028162092876476394, "loss": 0.472, "step": 19170 }, { "epoch": 0.4867307179754033, "grad_norm": 0.345703125, "learning_rate": 0.00028160498875804486, "loss": 0.4813, "step": 19175 }, { "epoch": 0.486857636024419, "grad_norm": 0.34375, "learning_rate": 0.00028158904229352134, "loss": 0.5014, "step": 19180 }, { "epoch": 0.48698455407343477, "grad_norm": 0.341796875, "learning_rate": 0.00028157308937197593, "loss": 0.4838, "step": 19185 }, { "epoch": 0.48711147212245054, "grad_norm": 0.357421875, "learning_rate": 0.00028155712999419145, "loss": 0.4942, "step": 19190 }, { "epoch": 0.4872383901714663, "grad_norm": 0.349609375, "learning_rate": 0.00028154116416095106, "loss": 0.4593, "step": 19195 }, { "epoch": 0.487365308220482, "grad_norm": 0.333984375, "learning_rate": 0.0002815251918730381, "loss": 0.4875, "step": 19200 }, { "epoch": 0.4874922262694978, "grad_norm": 0.34375, "learning_rate": 0.00028150921313123634, "loss": 0.4661, "step": 19205 }, { "epoch": 0.48761914431851355, "grad_norm": 0.345703125, "learning_rate": 0.0002814932279363299, "loss": 0.4874, "step": 19210 }, { "epoch": 0.48774606236752927, "grad_norm": 0.3359375, "learning_rate": 0.00028147723628910317, "loss": 0.4877, "step": 19215 }, { "epoch": 0.48787298041654503, "grad_norm": 0.32421875, "learning_rate": 0.00028146123819034076, "loss": 0.5011, "step": 19220 }, { "epoch": 0.4879998984655608, "grad_norm": 0.330078125, "learning_rate": 0.0002814452336408278, "loss": 0.4788, "step": 19225 }, { "epoch": 0.4881268165145765, "grad_norm": 0.328125, "learning_rate": 0.0002814292226413496, "loss": 0.4454, "step": 19230 }, { "epoch": 0.4882537345635923, "grad_norm": 0.36328125, "learning_rate": 0.0002814132051926918, "loss": 0.4795, "step": 19235 }, { "epoch": 0.48838065261260805, "grad_norm": 0.341796875, "learning_rate": 0.00028139718129564035, "loss": 0.4992, "step": 19240 }, { "epoch": 0.4885075706616238, "grad_norm": 0.33203125, "learning_rate": 0.00028138115095098157, "loss": 0.4857, "step": 19245 }, { "epoch": 0.4886344887106395, "grad_norm": 0.337890625, "learning_rate": 0.000281365114159502, "loss": 0.5043, "step": 19250 }, { "epoch": 0.4887614067596553, "grad_norm": 0.365234375, "learning_rate": 0.0002813490709219886, "loss": 0.4817, "step": 19255 }, { "epoch": 0.48888832480867106, "grad_norm": 0.3359375, "learning_rate": 0.0002813330212392287, "loss": 0.4982, "step": 19260 }, { "epoch": 0.48901524285768677, "grad_norm": 0.43359375, "learning_rate": 0.0002813169651120097, "loss": 0.4549, "step": 19265 }, { "epoch": 0.48914216090670254, "grad_norm": 0.345703125, "learning_rate": 0.00028130090254111954, "loss": 0.4684, "step": 19270 }, { "epoch": 0.4892690789557183, "grad_norm": 0.3359375, "learning_rate": 0.0002812848335273464, "loss": 0.4647, "step": 19275 }, { "epoch": 0.489395997004734, "grad_norm": 0.32421875, "learning_rate": 0.0002812687580714788, "loss": 0.4882, "step": 19280 }, { "epoch": 0.4895229150537498, "grad_norm": 0.33203125, "learning_rate": 0.0002812526761743055, "loss": 0.4664, "step": 19285 }, { "epoch": 0.48964983310276555, "grad_norm": 0.322265625, "learning_rate": 0.0002812365878366157, "loss": 0.4742, "step": 19290 }, { "epoch": 0.4897767511517813, "grad_norm": 0.3515625, "learning_rate": 0.00028122049305919877, "loss": 0.4818, "step": 19295 }, { "epoch": 0.48990366920079703, "grad_norm": 0.30859375, "learning_rate": 0.00028120439184284453, "loss": 0.4585, "step": 19300 }, { "epoch": 0.4900305872498128, "grad_norm": 0.314453125, "learning_rate": 0.00028118828418834303, "loss": 0.4605, "step": 19305 }, { "epoch": 0.49015750529882857, "grad_norm": 0.36328125, "learning_rate": 0.0002811721700964847, "loss": 0.498, "step": 19310 }, { "epoch": 0.4902844233478443, "grad_norm": 0.349609375, "learning_rate": 0.00028115604956806023, "loss": 0.4715, "step": 19315 }, { "epoch": 0.49041134139686005, "grad_norm": 0.36328125, "learning_rate": 0.0002811399226038607, "loss": 0.4985, "step": 19320 }, { "epoch": 0.4905382594458758, "grad_norm": 0.35546875, "learning_rate": 0.00028112378920467734, "loss": 0.5192, "step": 19325 }, { "epoch": 0.4906651774948915, "grad_norm": 0.337890625, "learning_rate": 0.00028110764937130185, "loss": 0.4736, "step": 19330 }, { "epoch": 0.4907920955439073, "grad_norm": 0.34375, "learning_rate": 0.0002810915031045263, "loss": 0.4594, "step": 19335 }, { "epoch": 0.49091901359292306, "grad_norm": 0.333984375, "learning_rate": 0.0002810753504051429, "loss": 0.4499, "step": 19340 }, { "epoch": 0.4910459316419388, "grad_norm": 0.337890625, "learning_rate": 0.00028105919127394423, "loss": 0.4793, "step": 19345 }, { "epoch": 0.49117284969095454, "grad_norm": 0.345703125, "learning_rate": 0.0002810430257117233, "loss": 0.4831, "step": 19350 }, { "epoch": 0.4912997677399703, "grad_norm": 0.353515625, "learning_rate": 0.00028102685371927326, "loss": 0.4671, "step": 19355 }, { "epoch": 0.4914266857889861, "grad_norm": 0.326171875, "learning_rate": 0.0002810106752973877, "loss": 0.4567, "step": 19360 }, { "epoch": 0.4915536038380018, "grad_norm": 0.380859375, "learning_rate": 0.00028099449044686055, "loss": 0.4909, "step": 19365 }, { "epoch": 0.49168052188701755, "grad_norm": 0.326171875, "learning_rate": 0.00028097829916848586, "loss": 0.4416, "step": 19370 }, { "epoch": 0.4918074399360333, "grad_norm": 0.390625, "learning_rate": 0.0002809621014630582, "loss": 0.4863, "step": 19375 }, { "epoch": 0.49193435798504903, "grad_norm": 0.34765625, "learning_rate": 0.00028094589733137246, "loss": 0.4421, "step": 19380 }, { "epoch": 0.4920612760340648, "grad_norm": 0.33203125, "learning_rate": 0.0002809296867742237, "loss": 0.474, "step": 19385 }, { "epoch": 0.49218819408308057, "grad_norm": 0.3203125, "learning_rate": 0.00028091346979240727, "loss": 0.4662, "step": 19390 }, { "epoch": 0.49231511213209633, "grad_norm": 0.359375, "learning_rate": 0.00028089724638671914, "loss": 0.4826, "step": 19395 }, { "epoch": 0.49244203018111204, "grad_norm": 0.35546875, "learning_rate": 0.00028088101655795517, "loss": 0.4659, "step": 19400 }, { "epoch": 0.4925689482301278, "grad_norm": 0.353515625, "learning_rate": 0.00028086478030691194, "loss": 0.46, "step": 19405 }, { "epoch": 0.4926958662791436, "grad_norm": 0.359375, "learning_rate": 0.000280848537634386, "loss": 0.4405, "step": 19410 }, { "epoch": 0.4928227843281593, "grad_norm": 0.32421875, "learning_rate": 0.00028083228854117447, "loss": 0.451, "step": 19415 }, { "epoch": 0.49294970237717506, "grad_norm": 0.361328125, "learning_rate": 0.00028081603302807466, "loss": 0.4504, "step": 19420 }, { "epoch": 0.4930766204261908, "grad_norm": 0.33984375, "learning_rate": 0.0002807997710958842, "loss": 0.4646, "step": 19425 }, { "epoch": 0.49320353847520654, "grad_norm": 0.287109375, "learning_rate": 0.0002807835027454011, "loss": 0.4523, "step": 19430 }, { "epoch": 0.4933304565242223, "grad_norm": 0.34765625, "learning_rate": 0.00028076722797742363, "loss": 0.482, "step": 19435 }, { "epoch": 0.4934573745732381, "grad_norm": 0.3515625, "learning_rate": 0.0002807509467927503, "loss": 0.4828, "step": 19440 }, { "epoch": 0.4935842926222538, "grad_norm": 0.392578125, "learning_rate": 0.00028073465919218016, "loss": 0.4948, "step": 19445 }, { "epoch": 0.49371121067126955, "grad_norm": 0.3671875, "learning_rate": 0.00028071836517651235, "loss": 0.452, "step": 19450 }, { "epoch": 0.4938381287202853, "grad_norm": 0.345703125, "learning_rate": 0.0002807020647465464, "loss": 0.4891, "step": 19455 }, { "epoch": 0.4939650467693011, "grad_norm": 0.3515625, "learning_rate": 0.0002806857579030823, "loss": 0.4799, "step": 19460 }, { "epoch": 0.4940919648183168, "grad_norm": 0.333984375, "learning_rate": 0.00028066944464692, "loss": 0.4665, "step": 19465 }, { "epoch": 0.49421888286733257, "grad_norm": 0.345703125, "learning_rate": 0.00028065312497886017, "loss": 0.444, "step": 19470 }, { "epoch": 0.49434580091634833, "grad_norm": 0.373046875, "learning_rate": 0.00028063679889970357, "loss": 0.4517, "step": 19475 }, { "epoch": 0.49447271896536404, "grad_norm": 0.341796875, "learning_rate": 0.0002806204664102512, "loss": 0.4971, "step": 19480 }, { "epoch": 0.4945996370143798, "grad_norm": 0.349609375, "learning_rate": 0.0002806041275113046, "loss": 0.438, "step": 19485 }, { "epoch": 0.4947265550633956, "grad_norm": 0.35546875, "learning_rate": 0.0002805877822036655, "loss": 0.4777, "step": 19490 }, { "epoch": 0.4948534731124113, "grad_norm": 0.353515625, "learning_rate": 0.00028057143048813594, "loss": 0.5009, "step": 19495 }, { "epoch": 0.49498039116142706, "grad_norm": 0.345703125, "learning_rate": 0.00028055507236551835, "loss": 0.457, "step": 19500 }, { "epoch": 0.4951073092104428, "grad_norm": 0.35546875, "learning_rate": 0.0002805387078366153, "loss": 0.4985, "step": 19505 }, { "epoch": 0.4952342272594586, "grad_norm": 0.34375, "learning_rate": 0.00028052233690222986, "loss": 0.4593, "step": 19510 }, { "epoch": 0.4953611453084743, "grad_norm": 0.33203125, "learning_rate": 0.0002805059595631654, "loss": 0.4983, "step": 19515 }, { "epoch": 0.49548806335749007, "grad_norm": 0.3359375, "learning_rate": 0.0002804895758202254, "loss": 0.4314, "step": 19520 }, { "epoch": 0.49561498140650584, "grad_norm": 0.3359375, "learning_rate": 0.0002804731856742139, "loss": 0.459, "step": 19525 }, { "epoch": 0.49574189945552155, "grad_norm": 0.337890625, "learning_rate": 0.0002804567891259352, "loss": 0.4457, "step": 19530 }, { "epoch": 0.4958688175045373, "grad_norm": 0.330078125, "learning_rate": 0.0002804403861761938, "loss": 0.4514, "step": 19535 }, { "epoch": 0.4959957355535531, "grad_norm": 0.37109375, "learning_rate": 0.0002804239768257947, "loss": 0.4955, "step": 19540 }, { "epoch": 0.4961226536025688, "grad_norm": 0.353515625, "learning_rate": 0.0002804075610755429, "loss": 0.458, "step": 19545 }, { "epoch": 0.49624957165158456, "grad_norm": 0.34765625, "learning_rate": 0.00028039113892624405, "loss": 0.4758, "step": 19550 }, { "epoch": 0.49637648970060033, "grad_norm": 0.361328125, "learning_rate": 0.00028037471037870395, "loss": 0.4836, "step": 19555 }, { "epoch": 0.4965034077496161, "grad_norm": 0.337890625, "learning_rate": 0.00028035827543372875, "loss": 0.473, "step": 19560 }, { "epoch": 0.4966303257986318, "grad_norm": 0.3203125, "learning_rate": 0.00028034183409212494, "loss": 0.4741, "step": 19565 }, { "epoch": 0.4967572438476476, "grad_norm": 0.353515625, "learning_rate": 0.0002803253863546992, "loss": 0.4585, "step": 19570 }, { "epoch": 0.49688416189666335, "grad_norm": 0.326171875, "learning_rate": 0.0002803089322222587, "loss": 0.4932, "step": 19575 }, { "epoch": 0.49701107994567906, "grad_norm": 0.326171875, "learning_rate": 0.0002802924716956108, "loss": 0.4659, "step": 19580 }, { "epoch": 0.4971379979946948, "grad_norm": 0.3046875, "learning_rate": 0.0002802760047755632, "loss": 0.4484, "step": 19585 }, { "epoch": 0.4972649160437106, "grad_norm": 0.341796875, "learning_rate": 0.0002802595314629239, "loss": 0.4704, "step": 19590 }, { "epoch": 0.4973918340927263, "grad_norm": 0.3046875, "learning_rate": 0.00028024305175850135, "loss": 0.4849, "step": 19595 }, { "epoch": 0.49751875214174207, "grad_norm": 0.37890625, "learning_rate": 0.0002802265656631041, "loss": 0.4827, "step": 19600 }, { "epoch": 0.49764567019075784, "grad_norm": 0.341796875, "learning_rate": 0.00028021007317754115, "loss": 0.4738, "step": 19605 }, { "epoch": 0.4977725882397736, "grad_norm": 0.34375, "learning_rate": 0.00028019357430262177, "loss": 0.4474, "step": 19610 }, { "epoch": 0.4978995062887893, "grad_norm": 0.337890625, "learning_rate": 0.0002801770690391556, "loss": 0.4904, "step": 19615 }, { "epoch": 0.4980264243378051, "grad_norm": 0.345703125, "learning_rate": 0.00028016055738795247, "loss": 0.485, "step": 19620 }, { "epoch": 0.49815334238682085, "grad_norm": 0.375, "learning_rate": 0.0002801440393498227, "loss": 0.4662, "step": 19625 }, { "epoch": 0.49828026043583656, "grad_norm": 0.32421875, "learning_rate": 0.00028012751492557665, "loss": 0.4586, "step": 19630 }, { "epoch": 0.49840717848485233, "grad_norm": 0.322265625, "learning_rate": 0.0002801109841160253, "loss": 0.4674, "step": 19635 }, { "epoch": 0.4985340965338681, "grad_norm": 0.345703125, "learning_rate": 0.00028009444692197983, "loss": 0.4756, "step": 19640 }, { "epoch": 0.4986610145828838, "grad_norm": 0.345703125, "learning_rate": 0.00028007790334425166, "loss": 0.4584, "step": 19645 }, { "epoch": 0.4987879326318996, "grad_norm": 0.314453125, "learning_rate": 0.0002800613533836526, "loss": 0.453, "step": 19650 }, { "epoch": 0.49891485068091534, "grad_norm": 0.328125, "learning_rate": 0.00028004479704099475, "loss": 0.4614, "step": 19655 }, { "epoch": 0.49904176872993106, "grad_norm": 0.3671875, "learning_rate": 0.00028002823431709045, "loss": 0.4921, "step": 19660 }, { "epoch": 0.4991686867789468, "grad_norm": 0.33203125, "learning_rate": 0.00028001166521275254, "loss": 0.4785, "step": 19665 }, { "epoch": 0.4992956048279626, "grad_norm": 0.373046875, "learning_rate": 0.000279995089728794, "loss": 0.4563, "step": 19670 }, { "epoch": 0.49942252287697836, "grad_norm": 0.341796875, "learning_rate": 0.0002799785078660282, "loss": 0.4687, "step": 19675 }, { "epoch": 0.49954944092599407, "grad_norm": 0.35546875, "learning_rate": 0.00027996191962526874, "loss": 0.4741, "step": 19680 }, { "epoch": 0.49967635897500984, "grad_norm": 0.359375, "learning_rate": 0.00027994532500732973, "loss": 0.4824, "step": 19685 }, { "epoch": 0.4998032770240256, "grad_norm": 0.341796875, "learning_rate": 0.0002799287240130253, "loss": 0.4782, "step": 19690 }, { "epoch": 0.4999301950730413, "grad_norm": 0.404296875, "learning_rate": 0.00027991211664317017, "loss": 0.4464, "step": 19695 }, { "epoch": 0.5000571131220571, "grad_norm": 0.359375, "learning_rate": 0.0002798955028985793, "loss": 0.4606, "step": 19700 }, { "epoch": 0.5001840311710728, "grad_norm": 0.35546875, "learning_rate": 0.00027987888278006775, "loss": 0.4549, "step": 19705 }, { "epoch": 0.5003109492200886, "grad_norm": 0.365234375, "learning_rate": 0.0002798622562884512, "loss": 0.4681, "step": 19710 }, { "epoch": 0.5004378672691043, "grad_norm": 0.333984375, "learning_rate": 0.00027984562342454547, "loss": 0.4702, "step": 19715 }, { "epoch": 0.5005647853181201, "grad_norm": 0.359375, "learning_rate": 0.00027982898418916667, "loss": 0.5077, "step": 19720 }, { "epoch": 0.5006917033671359, "grad_norm": 0.333984375, "learning_rate": 0.0002798123385831314, "loss": 0.4637, "step": 19725 }, { "epoch": 0.5008186214161516, "grad_norm": 0.349609375, "learning_rate": 0.00027979568660725635, "loss": 0.4687, "step": 19730 }, { "epoch": 0.5009455394651673, "grad_norm": 0.349609375, "learning_rate": 0.0002797790282623587, "loss": 0.4706, "step": 19735 }, { "epoch": 0.5010724575141831, "grad_norm": 0.375, "learning_rate": 0.00027976236354925577, "loss": 0.4708, "step": 19740 }, { "epoch": 0.5011993755631988, "grad_norm": 0.3515625, "learning_rate": 0.00027974569246876535, "loss": 0.4785, "step": 19745 }, { "epoch": 0.5013262936122146, "grad_norm": 0.326171875, "learning_rate": 0.0002797290150217055, "loss": 0.4471, "step": 19750 }, { "epoch": 0.5014532116612304, "grad_norm": 0.35546875, "learning_rate": 0.0002797123312088946, "loss": 0.4691, "step": 19755 }, { "epoch": 0.5015801297102461, "grad_norm": 0.32421875, "learning_rate": 0.0002796956410311512, "loss": 0.4596, "step": 19760 }, { "epoch": 0.5017070477592619, "grad_norm": 0.349609375, "learning_rate": 0.00027967894448929445, "loss": 0.5016, "step": 19765 }, { "epoch": 0.5018339658082775, "grad_norm": 0.349609375, "learning_rate": 0.0002796622415841435, "loss": 0.4919, "step": 19770 }, { "epoch": 0.5019608838572933, "grad_norm": 0.34375, "learning_rate": 0.00027964553231651804, "loss": 0.4744, "step": 19775 }, { "epoch": 0.5020878019063091, "grad_norm": 0.37109375, "learning_rate": 0.00027962881668723794, "loss": 0.4742, "step": 19780 }, { "epoch": 0.5022147199553249, "grad_norm": 0.32421875, "learning_rate": 0.0002796120946971234, "loss": 0.4697, "step": 19785 }, { "epoch": 0.5023416380043406, "grad_norm": 0.345703125, "learning_rate": 0.000279595366346995, "loss": 0.5061, "step": 19790 }, { "epoch": 0.5024685560533564, "grad_norm": 0.33203125, "learning_rate": 0.00027957863163767364, "loss": 0.4584, "step": 19795 }, { "epoch": 0.5025954741023722, "grad_norm": 0.318359375, "learning_rate": 0.0002795618905699804, "loss": 0.4674, "step": 19800 }, { "epoch": 0.5027223921513878, "grad_norm": 0.41015625, "learning_rate": 0.00027954514314473686, "loss": 0.4951, "step": 19805 }, { "epoch": 0.5028493102004036, "grad_norm": 0.345703125, "learning_rate": 0.00027952838936276466, "loss": 0.4676, "step": 19810 }, { "epoch": 0.5029762282494193, "grad_norm": 0.337890625, "learning_rate": 0.0002795116292248861, "loss": 0.467, "step": 19815 }, { "epoch": 0.5031031462984351, "grad_norm": 0.322265625, "learning_rate": 0.0002794948627319234, "loss": 0.4532, "step": 19820 }, { "epoch": 0.5032300643474509, "grad_norm": 0.357421875, "learning_rate": 0.0002794780898846994, "loss": 0.5256, "step": 19825 }, { "epoch": 0.5033569823964666, "grad_norm": 0.353515625, "learning_rate": 0.0002794613106840371, "loss": 0.4534, "step": 19830 }, { "epoch": 0.5034839004454823, "grad_norm": 0.359375, "learning_rate": 0.00027944452513075984, "loss": 0.472, "step": 19835 }, { "epoch": 0.5036108184944981, "grad_norm": 0.435546875, "learning_rate": 0.0002794277332256913, "loss": 0.4754, "step": 19840 }, { "epoch": 0.5037377365435138, "grad_norm": 0.32421875, "learning_rate": 0.00027941093496965547, "loss": 0.4389, "step": 19845 }, { "epoch": 0.5038646545925296, "grad_norm": 0.34765625, "learning_rate": 0.0002793941303634766, "loss": 0.4737, "step": 19850 }, { "epoch": 0.5039915726415454, "grad_norm": 0.3515625, "learning_rate": 0.0002793773194079793, "loss": 0.4919, "step": 19855 }, { "epoch": 0.5041184906905611, "grad_norm": 0.3515625, "learning_rate": 0.00027936050210398847, "loss": 0.4787, "step": 19860 }, { "epoch": 0.5042454087395769, "grad_norm": 0.3359375, "learning_rate": 0.00027934367845232935, "loss": 0.4785, "step": 19865 }, { "epoch": 0.5043723267885926, "grad_norm": 0.3359375, "learning_rate": 0.0002793268484538274, "loss": 0.4951, "step": 19870 }, { "epoch": 0.5044992448376083, "grad_norm": 0.37109375, "learning_rate": 0.00027931001210930857, "loss": 0.4809, "step": 19875 }, { "epoch": 0.5046261628866241, "grad_norm": 0.36328125, "learning_rate": 0.0002792931694195989, "loss": 0.4816, "step": 19880 }, { "epoch": 0.5047530809356399, "grad_norm": 0.34765625, "learning_rate": 0.00027927632038552493, "loss": 0.4898, "step": 19885 }, { "epoch": 0.5048799989846556, "grad_norm": 0.369140625, "learning_rate": 0.00027925946500791343, "loss": 0.4757, "step": 19890 }, { "epoch": 0.5050069170336714, "grad_norm": 0.34765625, "learning_rate": 0.0002792426032875915, "loss": 0.4622, "step": 19895 }, { "epoch": 0.505133835082687, "grad_norm": 0.337890625, "learning_rate": 0.00027922573522538646, "loss": 0.4668, "step": 19900 }, { "epoch": 0.5052607531317028, "grad_norm": 0.326171875, "learning_rate": 0.0002792088608221261, "loss": 0.473, "step": 19905 }, { "epoch": 0.5053876711807186, "grad_norm": 0.3671875, "learning_rate": 0.0002791919800786384, "loss": 0.479, "step": 19910 }, { "epoch": 0.5055145892297344, "grad_norm": 0.33984375, "learning_rate": 0.00027917509299575167, "loss": 0.4592, "step": 19915 }, { "epoch": 0.5056415072787501, "grad_norm": 0.369140625, "learning_rate": 0.0002791581995742946, "loss": 0.492, "step": 19920 }, { "epoch": 0.5057684253277659, "grad_norm": 0.3359375, "learning_rate": 0.0002791412998150961, "loss": 0.484, "step": 19925 }, { "epoch": 0.5058953433767817, "grad_norm": 0.34765625, "learning_rate": 0.00027912439371898553, "loss": 0.4838, "step": 19930 }, { "epoch": 0.5060222614257973, "grad_norm": 0.369140625, "learning_rate": 0.00027910748128679233, "loss": 0.4524, "step": 19935 }, { "epoch": 0.5061491794748131, "grad_norm": 0.33984375, "learning_rate": 0.00027909056251934644, "loss": 0.4903, "step": 19940 }, { "epoch": 0.5062760975238288, "grad_norm": 0.361328125, "learning_rate": 0.00027907363741747814, "loss": 0.4887, "step": 19945 }, { "epoch": 0.5064030155728446, "grad_norm": 0.318359375, "learning_rate": 0.0002790567059820178, "loss": 0.4887, "step": 19950 }, { "epoch": 0.5065299336218604, "grad_norm": 0.359375, "learning_rate": 0.00027903976821379636, "loss": 0.4949, "step": 19955 }, { "epoch": 0.5066568516708762, "grad_norm": 0.330078125, "learning_rate": 0.0002790228241136449, "loss": 0.4415, "step": 19960 }, { "epoch": 0.5067837697198919, "grad_norm": 0.353515625, "learning_rate": 0.0002790058736823948, "loss": 0.468, "step": 19965 }, { "epoch": 0.5069106877689076, "grad_norm": 0.333984375, "learning_rate": 0.0002789889169208779, "loss": 0.454, "step": 19970 }, { "epoch": 0.5070376058179233, "grad_norm": 0.32421875, "learning_rate": 0.00027897195382992627, "loss": 0.4682, "step": 19975 }, { "epoch": 0.5071645238669391, "grad_norm": 0.3203125, "learning_rate": 0.00027895498441037216, "loss": 0.4759, "step": 19980 }, { "epoch": 0.5072914419159549, "grad_norm": 0.345703125, "learning_rate": 0.00027893800866304837, "loss": 0.4895, "step": 19985 }, { "epoch": 0.5074183599649706, "grad_norm": 0.3203125, "learning_rate": 0.0002789210265887879, "loss": 0.4323, "step": 19990 }, { "epoch": 0.5075452780139864, "grad_norm": 0.34375, "learning_rate": 0.0002789040381884239, "loss": 0.473, "step": 19995 }, { "epoch": 0.5076721960630021, "grad_norm": 0.3359375, "learning_rate": 0.0002788870434627902, "loss": 0.518, "step": 20000 }, { "epoch": 0.5077991141120178, "grad_norm": 0.328125, "learning_rate": 0.0002788700424127206, "loss": 0.4702, "step": 20005 }, { "epoch": 0.5079260321610336, "grad_norm": 0.33984375, "learning_rate": 0.00027885303503904927, "loss": 0.4528, "step": 20010 }, { "epoch": 0.5080529502100494, "grad_norm": 0.361328125, "learning_rate": 0.0002788360213426109, "loss": 0.4584, "step": 20015 }, { "epoch": 0.5081798682590651, "grad_norm": 0.31640625, "learning_rate": 0.0002788190013242403, "loss": 0.4891, "step": 20020 }, { "epoch": 0.5083067863080809, "grad_norm": 0.326171875, "learning_rate": 0.0002788019749847726, "loss": 0.485, "step": 20025 }, { "epoch": 0.5084337043570967, "grad_norm": 0.33203125, "learning_rate": 0.0002787849423250433, "loss": 0.4423, "step": 20030 }, { "epoch": 0.5085606224061123, "grad_norm": 0.34375, "learning_rate": 0.00027876790334588813, "loss": 0.4788, "step": 20035 }, { "epoch": 0.5086875404551281, "grad_norm": 0.359375, "learning_rate": 0.0002787508580481433, "loss": 0.4728, "step": 20040 }, { "epoch": 0.5088144585041439, "grad_norm": 0.361328125, "learning_rate": 0.00027873380643264514, "loss": 0.4692, "step": 20045 }, { "epoch": 0.5089413765531596, "grad_norm": 0.3671875, "learning_rate": 0.00027871674850023036, "loss": 0.4848, "step": 20050 }, { "epoch": 0.5090682946021754, "grad_norm": 0.34375, "learning_rate": 0.00027869968425173604, "loss": 0.4463, "step": 20055 }, { "epoch": 0.5091952126511912, "grad_norm": 0.326171875, "learning_rate": 0.00027868261368799944, "loss": 0.4846, "step": 20060 }, { "epoch": 0.5093221307002068, "grad_norm": 0.32421875, "learning_rate": 0.00027866553680985827, "loss": 0.4799, "step": 20065 }, { "epoch": 0.5094490487492226, "grad_norm": 0.35546875, "learning_rate": 0.00027864845361815047, "loss": 0.4556, "step": 20070 }, { "epoch": 0.5095759667982384, "grad_norm": 0.3671875, "learning_rate": 0.0002786313641137143, "loss": 0.4863, "step": 20075 }, { "epoch": 0.5097028848472541, "grad_norm": 0.333984375, "learning_rate": 0.00027861426829738827, "loss": 0.4611, "step": 20080 }, { "epoch": 0.5098298028962699, "grad_norm": 0.3359375, "learning_rate": 0.0002785971661700114, "loss": 0.4824, "step": 20085 }, { "epoch": 0.5099567209452857, "grad_norm": 0.33203125, "learning_rate": 0.0002785800577324228, "loss": 0.4778, "step": 20090 }, { "epoch": 0.5100836389943014, "grad_norm": 0.3359375, "learning_rate": 0.00027856294298546195, "loss": 0.4483, "step": 20095 }, { "epoch": 0.5102105570433171, "grad_norm": 0.44140625, "learning_rate": 0.0002785458219299687, "loss": 0.4846, "step": 20100 }, { "epoch": 0.5103374750923328, "grad_norm": 0.51953125, "learning_rate": 0.00027852869456678316, "loss": 0.4635, "step": 20105 }, { "epoch": 0.5104643931413486, "grad_norm": 0.384765625, "learning_rate": 0.00027851156089674583, "loss": 0.5003, "step": 20110 }, { "epoch": 0.5105913111903644, "grad_norm": 0.34765625, "learning_rate": 0.0002784944209206974, "loss": 0.4892, "step": 20115 }, { "epoch": 0.5107182292393802, "grad_norm": 0.3515625, "learning_rate": 0.00027847727463947887, "loss": 0.4575, "step": 20120 }, { "epoch": 0.5108451472883959, "grad_norm": 0.3828125, "learning_rate": 0.0002784601220539317, "loss": 0.4954, "step": 20125 }, { "epoch": 0.5109720653374117, "grad_norm": 0.3828125, "learning_rate": 0.00027844296316489745, "loss": 0.4762, "step": 20130 }, { "epoch": 0.5110989833864273, "grad_norm": 0.333984375, "learning_rate": 0.0002784257979732182, "loss": 0.466, "step": 20135 }, { "epoch": 0.5112259014354431, "grad_norm": 0.361328125, "learning_rate": 0.0002784086264797362, "loss": 0.4866, "step": 20140 }, { "epoch": 0.5113528194844589, "grad_norm": 0.34765625, "learning_rate": 0.00027839144868529406, "loss": 0.4641, "step": 20145 }, { "epoch": 0.5114797375334746, "grad_norm": 0.40234375, "learning_rate": 0.00027837426459073465, "loss": 0.4783, "step": 20150 }, { "epoch": 0.5116066555824904, "grad_norm": 0.330078125, "learning_rate": 0.00027835707419690125, "loss": 0.4883, "step": 20155 }, { "epoch": 0.5117335736315062, "grad_norm": 0.34375, "learning_rate": 0.0002783398775046373, "loss": 0.4876, "step": 20160 }, { "epoch": 0.5118604916805218, "grad_norm": 0.345703125, "learning_rate": 0.00027832267451478675, "loss": 0.473, "step": 20165 }, { "epoch": 0.5119874097295376, "grad_norm": 0.359375, "learning_rate": 0.00027830546522819366, "loss": 0.4654, "step": 20170 }, { "epoch": 0.5121143277785534, "grad_norm": 0.3203125, "learning_rate": 0.00027828824964570246, "loss": 0.4669, "step": 20175 }, { "epoch": 0.5122412458275691, "grad_norm": 0.35546875, "learning_rate": 0.00027827102776815796, "loss": 0.4882, "step": 20180 }, { "epoch": 0.5123681638765849, "grad_norm": 0.33984375, "learning_rate": 0.0002782537995964052, "loss": 0.4687, "step": 20185 }, { "epoch": 0.5124950819256007, "grad_norm": 0.38671875, "learning_rate": 0.00027823656513128965, "loss": 0.4811, "step": 20190 }, { "epoch": 0.5126219999746164, "grad_norm": 0.35546875, "learning_rate": 0.00027821932437365686, "loss": 0.4918, "step": 20195 }, { "epoch": 0.5127489180236321, "grad_norm": 0.36328125, "learning_rate": 0.00027820207732435294, "loss": 0.4621, "step": 20200 }, { "epoch": 0.5128758360726479, "grad_norm": 0.361328125, "learning_rate": 0.0002781848239842242, "loss": 0.4644, "step": 20205 }, { "epoch": 0.5130027541216636, "grad_norm": 0.296875, "learning_rate": 0.00027816756435411716, "loss": 0.4597, "step": 20210 }, { "epoch": 0.5131296721706794, "grad_norm": 0.306640625, "learning_rate": 0.0002781502984348787, "loss": 0.4709, "step": 20215 }, { "epoch": 0.5132565902196952, "grad_norm": 0.365234375, "learning_rate": 0.0002781330262273563, "loss": 0.474, "step": 20220 }, { "epoch": 0.5133835082687109, "grad_norm": 0.345703125, "learning_rate": 0.00027811574773239727, "loss": 0.4765, "step": 20225 }, { "epoch": 0.5135104263177267, "grad_norm": 0.375, "learning_rate": 0.00027809846295084954, "loss": 0.4678, "step": 20230 }, { "epoch": 0.5136373443667424, "grad_norm": 0.376953125, "learning_rate": 0.00027808117188356127, "loss": 0.4953, "step": 20235 }, { "epoch": 0.5137642624157581, "grad_norm": 0.318359375, "learning_rate": 0.00027806387453138085, "loss": 0.4824, "step": 20240 }, { "epoch": 0.5138911804647739, "grad_norm": 0.36328125, "learning_rate": 0.00027804657089515716, "loss": 0.5065, "step": 20245 }, { "epoch": 0.5140180985137897, "grad_norm": 0.337890625, "learning_rate": 0.0002780292609757393, "loss": 0.4836, "step": 20250 }, { "epoch": 0.5141450165628054, "grad_norm": 0.34765625, "learning_rate": 0.00027801194477397655, "loss": 0.4416, "step": 20255 }, { "epoch": 0.5142719346118212, "grad_norm": 0.60546875, "learning_rate": 0.0002779946222907187, "loss": 0.4508, "step": 20260 }, { "epoch": 0.5143988526608368, "grad_norm": 0.376953125, "learning_rate": 0.00027797729352681564, "loss": 0.4758, "step": 20265 }, { "epoch": 0.5145257707098526, "grad_norm": 0.34765625, "learning_rate": 0.00027795995848311784, "loss": 0.48, "step": 20270 }, { "epoch": 0.5146526887588684, "grad_norm": 0.4296875, "learning_rate": 0.00027794261716047587, "loss": 0.4646, "step": 20275 }, { "epoch": 0.5147796068078841, "grad_norm": 0.34375, "learning_rate": 0.00027792526955974057, "loss": 0.4682, "step": 20280 }, { "epoch": 0.5149065248568999, "grad_norm": 0.359375, "learning_rate": 0.00027790791568176325, "loss": 0.4767, "step": 20285 }, { "epoch": 0.5150334429059157, "grad_norm": 0.318359375, "learning_rate": 0.0002778905555273955, "loss": 0.4787, "step": 20290 }, { "epoch": 0.5151603609549315, "grad_norm": 0.373046875, "learning_rate": 0.0002778731890974891, "loss": 0.4994, "step": 20295 }, { "epoch": 0.5152872790039471, "grad_norm": 0.36328125, "learning_rate": 0.0002778558163928963, "loss": 0.4978, "step": 20300 }, { "epoch": 0.5154141970529629, "grad_norm": 0.3515625, "learning_rate": 0.0002778384374144694, "loss": 0.5086, "step": 20305 }, { "epoch": 0.5155411151019786, "grad_norm": 0.376953125, "learning_rate": 0.00027782105216306145, "loss": 0.4923, "step": 20310 }, { "epoch": 0.5156680331509944, "grad_norm": 0.35546875, "learning_rate": 0.0002778036606395253, "loss": 0.4713, "step": 20315 }, { "epoch": 0.5157949512000102, "grad_norm": 0.357421875, "learning_rate": 0.0002777862628447145, "loss": 0.4898, "step": 20320 }, { "epoch": 0.5159218692490259, "grad_norm": 0.349609375, "learning_rate": 0.00027776885877948265, "loss": 0.5042, "step": 20325 }, { "epoch": 0.5160487872980416, "grad_norm": 0.333984375, "learning_rate": 0.00027775144844468375, "loss": 0.4704, "step": 20330 }, { "epoch": 0.5161757053470574, "grad_norm": 0.353515625, "learning_rate": 0.00027773403184117224, "loss": 0.4774, "step": 20335 }, { "epoch": 0.5163026233960731, "grad_norm": 0.326171875, "learning_rate": 0.0002777166089698026, "loss": 0.4458, "step": 20340 }, { "epoch": 0.5164295414450889, "grad_norm": 0.359375, "learning_rate": 0.00027769917983142987, "loss": 0.5095, "step": 20345 }, { "epoch": 0.5165564594941047, "grad_norm": 0.361328125, "learning_rate": 0.0002776817444269092, "loss": 0.4935, "step": 20350 }, { "epoch": 0.5166833775431204, "grad_norm": 0.357421875, "learning_rate": 0.00027766430275709625, "loss": 0.49, "step": 20355 }, { "epoch": 0.5168102955921362, "grad_norm": 0.337890625, "learning_rate": 0.00027764685482284683, "loss": 0.4726, "step": 20360 }, { "epoch": 0.5169372136411519, "grad_norm": 0.345703125, "learning_rate": 0.00027762940062501704, "loss": 0.4865, "step": 20365 }, { "epoch": 0.5170641316901676, "grad_norm": 0.34375, "learning_rate": 0.00027761194016446346, "loss": 0.4533, "step": 20370 }, { "epoch": 0.5171910497391834, "grad_norm": 0.345703125, "learning_rate": 0.00027759447344204277, "loss": 0.4924, "step": 20375 }, { "epoch": 0.5173179677881992, "grad_norm": 0.38671875, "learning_rate": 0.0002775770004586121, "loss": 0.4982, "step": 20380 }, { "epoch": 0.5174448858372149, "grad_norm": 0.34375, "learning_rate": 0.00027755952121502885, "loss": 0.4619, "step": 20385 }, { "epoch": 0.5175718038862307, "grad_norm": 0.357421875, "learning_rate": 0.00027754203571215066, "loss": 0.4596, "step": 20390 }, { "epoch": 0.5176987219352465, "grad_norm": 0.3359375, "learning_rate": 0.0002775245439508356, "loss": 0.467, "step": 20395 }, { "epoch": 0.5178256399842621, "grad_norm": 0.357421875, "learning_rate": 0.00027750704593194203, "loss": 0.4964, "step": 20400 }, { "epoch": 0.5179525580332779, "grad_norm": 0.3515625, "learning_rate": 0.0002774895416563284, "loss": 0.4801, "step": 20405 }, { "epoch": 0.5180794760822937, "grad_norm": 0.3515625, "learning_rate": 0.00027747203112485385, "loss": 0.4874, "step": 20410 }, { "epoch": 0.5182063941313094, "grad_norm": 0.33984375, "learning_rate": 0.00027745451433837745, "loss": 0.4784, "step": 20415 }, { "epoch": 0.5183333121803252, "grad_norm": 0.3359375, "learning_rate": 0.0002774369912977588, "loss": 0.4949, "step": 20420 }, { "epoch": 0.518460230229341, "grad_norm": 0.33203125, "learning_rate": 0.00027741946200385775, "loss": 0.4711, "step": 20425 }, { "epoch": 0.5185871482783566, "grad_norm": 0.337890625, "learning_rate": 0.0002774019264575345, "loss": 0.4865, "step": 20430 }, { "epoch": 0.5187140663273724, "grad_norm": 0.361328125, "learning_rate": 0.00027738438465964944, "loss": 0.4573, "step": 20435 }, { "epoch": 0.5188409843763881, "grad_norm": 0.345703125, "learning_rate": 0.0002773668366110634, "loss": 0.4786, "step": 20440 }, { "epoch": 0.5189679024254039, "grad_norm": 0.330078125, "learning_rate": 0.0002773492823126374, "loss": 0.4875, "step": 20445 }, { "epoch": 0.5190948204744197, "grad_norm": 0.345703125, "learning_rate": 0.0002773317217652328, "loss": 0.4941, "step": 20450 }, { "epoch": 0.5192217385234354, "grad_norm": 0.3515625, "learning_rate": 0.00027731415496971135, "loss": 0.461, "step": 20455 }, { "epoch": 0.5193486565724512, "grad_norm": 0.3203125, "learning_rate": 0.00027729658192693503, "loss": 0.4557, "step": 20460 }, { "epoch": 0.5194755746214669, "grad_norm": 0.333984375, "learning_rate": 0.0002772790026377662, "loss": 0.4901, "step": 20465 }, { "epoch": 0.5196024926704826, "grad_norm": 0.369140625, "learning_rate": 0.00027726141710306736, "loss": 0.4909, "step": 20470 }, { "epoch": 0.5197294107194984, "grad_norm": 0.3515625, "learning_rate": 0.00027724382532370146, "loss": 0.5092, "step": 20475 }, { "epoch": 0.5198563287685142, "grad_norm": 0.34375, "learning_rate": 0.0002772262273005318, "loss": 0.4918, "step": 20480 }, { "epoch": 0.5199832468175299, "grad_norm": 0.337890625, "learning_rate": 0.00027720862303442183, "loss": 0.4667, "step": 20485 }, { "epoch": 0.5201101648665457, "grad_norm": 0.359375, "learning_rate": 0.0002771910125262354, "loss": 0.5141, "step": 20490 }, { "epoch": 0.5202370829155615, "grad_norm": 0.33984375, "learning_rate": 0.0002771733957768366, "loss": 0.4564, "step": 20495 }, { "epoch": 0.5203640009645771, "grad_norm": 0.302734375, "learning_rate": 0.00027715577278709, "loss": 0.4532, "step": 20500 }, { "epoch": 0.5204909190135929, "grad_norm": 0.390625, "learning_rate": 0.0002771381435578602, "loss": 0.4902, "step": 20505 }, { "epoch": 0.5206178370626087, "grad_norm": 0.375, "learning_rate": 0.0002771205080900124, "loss": 0.4348, "step": 20510 }, { "epoch": 0.5207447551116244, "grad_norm": 0.369140625, "learning_rate": 0.0002771028663844119, "loss": 0.5054, "step": 20515 }, { "epoch": 0.5208716731606402, "grad_norm": 0.330078125, "learning_rate": 0.0002770852184419244, "loss": 0.4739, "step": 20520 }, { "epoch": 0.520998591209656, "grad_norm": 0.33984375, "learning_rate": 0.00027706756426341576, "loss": 0.4562, "step": 20525 }, { "epoch": 0.5211255092586716, "grad_norm": 0.333984375, "learning_rate": 0.0002770499038497524, "loss": 0.4777, "step": 20530 }, { "epoch": 0.5212524273076874, "grad_norm": 0.35546875, "learning_rate": 0.0002770322372018009, "loss": 0.4746, "step": 20535 }, { "epoch": 0.5213793453567032, "grad_norm": 0.34765625, "learning_rate": 0.00027701456432042813, "loss": 0.5115, "step": 20540 }, { "epoch": 0.5215062634057189, "grad_norm": 0.353515625, "learning_rate": 0.00027699688520650126, "loss": 0.4602, "step": 20545 }, { "epoch": 0.5216331814547347, "grad_norm": 0.349609375, "learning_rate": 0.00027697919986088783, "loss": 0.4781, "step": 20550 }, { "epoch": 0.5217600995037505, "grad_norm": 0.40625, "learning_rate": 0.0002769615082844556, "loss": 0.489, "step": 20555 }, { "epoch": 0.5218870175527662, "grad_norm": 0.365234375, "learning_rate": 0.00027694381047807276, "loss": 0.5083, "step": 20560 }, { "epoch": 0.5220139356017819, "grad_norm": 0.33984375, "learning_rate": 0.0002769261064426077, "loss": 0.4513, "step": 20565 }, { "epoch": 0.5221408536507977, "grad_norm": 0.37109375, "learning_rate": 0.00027690839617892915, "loss": 0.4933, "step": 20570 }, { "epoch": 0.5222677716998134, "grad_norm": 4.90625, "learning_rate": 0.00027689067968790623, "loss": 0.4491, "step": 20575 }, { "epoch": 0.5223946897488292, "grad_norm": 0.33984375, "learning_rate": 0.00027687295697040814, "loss": 0.5057, "step": 20580 }, { "epoch": 0.522521607797845, "grad_norm": 0.34375, "learning_rate": 0.0002768552280273046, "loss": 0.4811, "step": 20585 }, { "epoch": 0.5226485258468607, "grad_norm": 0.345703125, "learning_rate": 0.0002768374928594655, "loss": 0.4714, "step": 20590 }, { "epoch": 0.5227754438958764, "grad_norm": 0.384765625, "learning_rate": 0.00027681975146776126, "loss": 0.4644, "step": 20595 }, { "epoch": 0.5229023619448921, "grad_norm": 0.359375, "learning_rate": 0.0002768020038530623, "loss": 0.4627, "step": 20600 }, { "epoch": 0.5230292799939079, "grad_norm": 0.330078125, "learning_rate": 0.00027678425001623945, "loss": 0.4665, "step": 20605 }, { "epoch": 0.5231561980429237, "grad_norm": 0.30078125, "learning_rate": 0.000276766489958164, "loss": 0.4604, "step": 20610 }, { "epoch": 0.5232831160919394, "grad_norm": 0.31640625, "learning_rate": 0.00027674872367970736, "loss": 0.4445, "step": 20615 }, { "epoch": 0.5234100341409552, "grad_norm": 0.33203125, "learning_rate": 0.0002767309511817414, "loss": 0.488, "step": 20620 }, { "epoch": 0.523536952189971, "grad_norm": 0.341796875, "learning_rate": 0.0002767131724651381, "loss": 0.4793, "step": 20625 }, { "epoch": 0.5236638702389866, "grad_norm": 0.353515625, "learning_rate": 0.00027669538753076993, "loss": 0.4657, "step": 20630 }, { "epoch": 0.5237907882880024, "grad_norm": 0.37890625, "learning_rate": 0.0002766775963795095, "loss": 0.4554, "step": 20635 }, { "epoch": 0.5239177063370182, "grad_norm": 0.349609375, "learning_rate": 0.0002766597990122299, "loss": 0.4479, "step": 20640 }, { "epoch": 0.5240446243860339, "grad_norm": 0.357421875, "learning_rate": 0.0002766419954298045, "loss": 0.48, "step": 20645 }, { "epoch": 0.5241715424350497, "grad_norm": 0.365234375, "learning_rate": 0.0002766241856331067, "loss": 0.4635, "step": 20650 }, { "epoch": 0.5242984604840655, "grad_norm": 0.333984375, "learning_rate": 0.0002766063696230107, "loss": 0.4682, "step": 20655 }, { "epoch": 0.5244253785330812, "grad_norm": 0.35546875, "learning_rate": 0.0002765885474003904, "loss": 0.4404, "step": 20660 }, { "epoch": 0.5245522965820969, "grad_norm": 0.365234375, "learning_rate": 0.0002765707189661206, "loss": 0.4663, "step": 20665 }, { "epoch": 0.5246792146311127, "grad_norm": 0.33984375, "learning_rate": 0.00027655288432107606, "loss": 0.4624, "step": 20670 }, { "epoch": 0.5248061326801284, "grad_norm": 0.349609375, "learning_rate": 0.0002765350434661319, "loss": 0.4713, "step": 20675 }, { "epoch": 0.5249330507291442, "grad_norm": 0.396484375, "learning_rate": 0.0002765171964021635, "loss": 0.4963, "step": 20680 }, { "epoch": 0.52505996877816, "grad_norm": 0.35546875, "learning_rate": 0.0002764993431300467, "loss": 0.4935, "step": 20685 }, { "epoch": 0.5251868868271757, "grad_norm": 0.322265625, "learning_rate": 0.0002764814836506575, "loss": 0.4625, "step": 20690 }, { "epoch": 0.5253138048761914, "grad_norm": 0.357421875, "learning_rate": 0.0002764636179648723, "loss": 0.5003, "step": 20695 }, { "epoch": 0.5254407229252072, "grad_norm": 0.3515625, "learning_rate": 0.0002764457460735678, "loss": 0.4383, "step": 20700 }, { "epoch": 0.5255676409742229, "grad_norm": 0.375, "learning_rate": 0.0002764278679776208, "loss": 0.4844, "step": 20705 }, { "epoch": 0.5256945590232387, "grad_norm": 0.33984375, "learning_rate": 0.00027640998367790874, "loss": 0.4886, "step": 20710 }, { "epoch": 0.5258214770722545, "grad_norm": 0.349609375, "learning_rate": 0.00027639209317530914, "loss": 0.4727, "step": 20715 }, { "epoch": 0.5259483951212702, "grad_norm": 0.380859375, "learning_rate": 0.0002763741964706999, "loss": 0.4918, "step": 20720 }, { "epoch": 0.526075313170286, "grad_norm": 0.34375, "learning_rate": 0.00027635629356495913, "loss": 0.4401, "step": 20725 }, { "epoch": 0.5262022312193017, "grad_norm": 0.373046875, "learning_rate": 0.00027633838445896536, "loss": 0.4716, "step": 20730 }, { "epoch": 0.5263291492683174, "grad_norm": 0.322265625, "learning_rate": 0.0002763204691535974, "loss": 0.4999, "step": 20735 }, { "epoch": 0.5264560673173332, "grad_norm": 0.33984375, "learning_rate": 0.0002763025476497344, "loss": 0.4695, "step": 20740 }, { "epoch": 0.526582985366349, "grad_norm": 0.353515625, "learning_rate": 0.0002762846199482556, "loss": 0.4581, "step": 20745 }, { "epoch": 0.5267099034153647, "grad_norm": 0.361328125, "learning_rate": 0.0002762666860500409, "loss": 0.4731, "step": 20750 }, { "epoch": 0.5268368214643805, "grad_norm": 0.357421875, "learning_rate": 0.00027624874595597016, "loss": 0.495, "step": 20755 }, { "epoch": 0.5269637395133961, "grad_norm": 0.37109375, "learning_rate": 0.00027623079966692375, "loss": 0.4685, "step": 20760 }, { "epoch": 0.5270906575624119, "grad_norm": 0.390625, "learning_rate": 0.0002762128471837823, "loss": 0.465, "step": 20765 }, { "epoch": 0.5272175756114277, "grad_norm": 0.34375, "learning_rate": 0.00027619488850742666, "loss": 0.4627, "step": 20770 }, { "epoch": 0.5273444936604434, "grad_norm": 0.72265625, "learning_rate": 0.0002761769236387382, "loss": 0.4675, "step": 20775 }, { "epoch": 0.5274714117094592, "grad_norm": 0.37109375, "learning_rate": 0.0002761589525785983, "loss": 0.4786, "step": 20780 }, { "epoch": 0.527598329758475, "grad_norm": 0.357421875, "learning_rate": 0.00027614097532788884, "loss": 0.4907, "step": 20785 }, { "epoch": 0.5277252478074907, "grad_norm": 0.35546875, "learning_rate": 0.00027612299188749196, "loss": 0.4708, "step": 20790 }, { "epoch": 0.5278521658565064, "grad_norm": 0.341796875, "learning_rate": 0.00027610500225829014, "loss": 0.442, "step": 20795 }, { "epoch": 0.5279790839055222, "grad_norm": 0.349609375, "learning_rate": 0.0002760870064411661, "loss": 0.4781, "step": 20800 }, { "epoch": 0.5281060019545379, "grad_norm": 0.3359375, "learning_rate": 0.00027606900443700284, "loss": 0.4796, "step": 20805 }, { "epoch": 0.5282329200035537, "grad_norm": 0.33203125, "learning_rate": 0.0002760509962466838, "loss": 0.4743, "step": 20810 }, { "epoch": 0.5283598380525695, "grad_norm": 0.357421875, "learning_rate": 0.00027603298187109257, "loss": 0.4804, "step": 20815 }, { "epoch": 0.5284867561015852, "grad_norm": 0.34765625, "learning_rate": 0.0002760149613111131, "loss": 0.4827, "step": 20820 }, { "epoch": 0.528613674150601, "grad_norm": 0.361328125, "learning_rate": 0.0002759969345676297, "loss": 0.4697, "step": 20825 }, { "epoch": 0.5287405921996167, "grad_norm": 0.328125, "learning_rate": 0.00027597890164152687, "loss": 0.4589, "step": 20830 }, { "epoch": 0.5288675102486324, "grad_norm": 0.31640625, "learning_rate": 0.00027596086253368955, "loss": 0.4481, "step": 20835 }, { "epoch": 0.5289944282976482, "grad_norm": 0.333984375, "learning_rate": 0.0002759428172450029, "loss": 0.4605, "step": 20840 }, { "epoch": 0.529121346346664, "grad_norm": 0.36328125, "learning_rate": 0.00027592476577635236, "loss": 0.4567, "step": 20845 }, { "epoch": 0.5292482643956797, "grad_norm": 0.32421875, "learning_rate": 0.00027590670812862374, "loss": 0.4659, "step": 20850 }, { "epoch": 0.5293751824446955, "grad_norm": 0.33984375, "learning_rate": 0.00027588864430270305, "loss": 0.4534, "step": 20855 }, { "epoch": 0.5295021004937112, "grad_norm": 0.396484375, "learning_rate": 0.0002758705742994768, "loss": 0.4717, "step": 20860 }, { "epoch": 0.5296290185427269, "grad_norm": 0.375, "learning_rate": 0.00027585249811983156, "loss": 0.4545, "step": 20865 }, { "epoch": 0.5297559365917427, "grad_norm": 0.357421875, "learning_rate": 0.0002758344157646544, "loss": 0.5126, "step": 20870 }, { "epoch": 0.5298828546407585, "grad_norm": 0.3203125, "learning_rate": 0.00027581632723483256, "loss": 0.4624, "step": 20875 }, { "epoch": 0.5300097726897742, "grad_norm": 0.373046875, "learning_rate": 0.0002757982325312537, "loss": 0.4619, "step": 20880 }, { "epoch": 0.53013669073879, "grad_norm": 0.298828125, "learning_rate": 0.0002757801316548056, "loss": 0.4512, "step": 20885 }, { "epoch": 0.5302636087878058, "grad_norm": 0.330078125, "learning_rate": 0.0002757620246063766, "loss": 0.4642, "step": 20890 }, { "epoch": 0.5303905268368214, "grad_norm": 0.34765625, "learning_rate": 0.0002757439113868551, "loss": 0.4755, "step": 20895 }, { "epoch": 0.5305174448858372, "grad_norm": 0.373046875, "learning_rate": 0.00027572579199713004, "loss": 0.4694, "step": 20900 }, { "epoch": 0.530644362934853, "grad_norm": 0.369140625, "learning_rate": 0.0002757076664380904, "loss": 0.4565, "step": 20905 }, { "epoch": 0.5307712809838687, "grad_norm": 0.353515625, "learning_rate": 0.0002756895347106257, "loss": 0.4676, "step": 20910 }, { "epoch": 0.5308981990328845, "grad_norm": 0.337890625, "learning_rate": 0.0002756713968156255, "loss": 0.4839, "step": 20915 }, { "epoch": 0.5310251170819003, "grad_norm": 0.33203125, "learning_rate": 0.00027565325275397996, "loss": 0.473, "step": 20920 }, { "epoch": 0.531152035130916, "grad_norm": 0.291015625, "learning_rate": 0.0002756351025265794, "loss": 0.4487, "step": 20925 }, { "epoch": 0.5312789531799317, "grad_norm": 0.341796875, "learning_rate": 0.00027561694613431435, "loss": 0.4753, "step": 20930 }, { "epoch": 0.5314058712289474, "grad_norm": 0.341796875, "learning_rate": 0.0002755987835780758, "loss": 0.4719, "step": 20935 }, { "epoch": 0.5315327892779632, "grad_norm": 0.345703125, "learning_rate": 0.000275580614858755, "loss": 0.4436, "step": 20940 }, { "epoch": 0.531659707326979, "grad_norm": 0.34765625, "learning_rate": 0.00027556243997724347, "loss": 0.4859, "step": 20945 }, { "epoch": 0.5317866253759947, "grad_norm": 0.353515625, "learning_rate": 0.00027554425893443294, "loss": 0.4538, "step": 20950 }, { "epoch": 0.5319135434250105, "grad_norm": 0.37109375, "learning_rate": 0.0002755260717312157, "loss": 0.4719, "step": 20955 }, { "epoch": 0.5320404614740262, "grad_norm": 0.33984375, "learning_rate": 0.00027550787836848415, "loss": 0.4827, "step": 20960 }, { "epoch": 0.5321673795230419, "grad_norm": 0.341796875, "learning_rate": 0.0002754896788471309, "loss": 0.4765, "step": 20965 }, { "epoch": 0.5322942975720577, "grad_norm": 0.302734375, "learning_rate": 0.0002754714731680492, "loss": 0.4469, "step": 20970 }, { "epoch": 0.5324212156210735, "grad_norm": 0.3515625, "learning_rate": 0.00027545326133213225, "loss": 0.4708, "step": 20975 }, { "epoch": 0.5325481336700892, "grad_norm": 0.337890625, "learning_rate": 0.00027543504334027373, "loss": 0.4472, "step": 20980 }, { "epoch": 0.532675051719105, "grad_norm": 0.326171875, "learning_rate": 0.0002754168191933676, "loss": 0.4619, "step": 20985 }, { "epoch": 0.5328019697681208, "grad_norm": 0.35546875, "learning_rate": 0.0002753985888923081, "loss": 0.4558, "step": 20990 }, { "epoch": 0.5329288878171364, "grad_norm": 0.33203125, "learning_rate": 0.0002753803524379898, "loss": 0.4813, "step": 20995 }, { "epoch": 0.5330558058661522, "grad_norm": 0.294921875, "learning_rate": 0.00027536210983130765, "loss": 0.431, "step": 21000 }, { "epoch": 0.533182723915168, "grad_norm": 0.34375, "learning_rate": 0.00027534386107315657, "loss": 0.4787, "step": 21005 }, { "epoch": 0.5333096419641837, "grad_norm": 0.349609375, "learning_rate": 0.0002753256061644322, "loss": 0.5275, "step": 21010 }, { "epoch": 0.5334365600131995, "grad_norm": 0.349609375, "learning_rate": 0.0002753073451060303, "loss": 0.4814, "step": 21015 }, { "epoch": 0.5335634780622153, "grad_norm": 0.376953125, "learning_rate": 0.0002752890778988469, "loss": 0.4692, "step": 21020 }, { "epoch": 0.5336903961112309, "grad_norm": 0.357421875, "learning_rate": 0.0002752708045437782, "loss": 0.4654, "step": 21025 }, { "epoch": 0.5338173141602467, "grad_norm": 0.3125, "learning_rate": 0.0002752525250417212, "loss": 0.445, "step": 21030 }, { "epoch": 0.5339442322092625, "grad_norm": 0.33203125, "learning_rate": 0.00027523423939357263, "loss": 0.4438, "step": 21035 }, { "epoch": 0.5340711502582782, "grad_norm": 0.384765625, "learning_rate": 0.0002752159476002298, "loss": 0.4443, "step": 21040 }, { "epoch": 0.534198068307294, "grad_norm": 0.3515625, "learning_rate": 0.0002751976496625903, "loss": 0.4641, "step": 21045 }, { "epoch": 0.5343249863563098, "grad_norm": 0.3515625, "learning_rate": 0.000275179345581552, "loss": 0.4862, "step": 21050 }, { "epoch": 0.5344519044053255, "grad_norm": 0.3671875, "learning_rate": 0.0002751610353580131, "loss": 0.4597, "step": 21055 }, { "epoch": 0.5345788224543412, "grad_norm": 0.3359375, "learning_rate": 0.00027514271899287205, "loss": 0.4926, "step": 21060 }, { "epoch": 0.534705740503357, "grad_norm": 0.33203125, "learning_rate": 0.00027512439648702763, "loss": 0.4746, "step": 21065 }, { "epoch": 0.5348326585523727, "grad_norm": 0.3984375, "learning_rate": 0.0002751060678413789, "loss": 0.4366, "step": 21070 }, { "epoch": 0.5349595766013885, "grad_norm": 0.34765625, "learning_rate": 0.0002750877330568253, "loss": 0.4656, "step": 21075 }, { "epoch": 0.5350864946504043, "grad_norm": 0.36328125, "learning_rate": 0.00027506939213426643, "loss": 0.4683, "step": 21080 }, { "epoch": 0.53521341269942, "grad_norm": 0.333984375, "learning_rate": 0.00027505104507460226, "loss": 0.4829, "step": 21085 }, { "epoch": 0.5353403307484358, "grad_norm": 0.365234375, "learning_rate": 0.00027503269187873316, "loss": 0.4919, "step": 21090 }, { "epoch": 0.5354672487974514, "grad_norm": 0.3359375, "learning_rate": 0.0002750143325475597, "loss": 0.467, "step": 21095 }, { "epoch": 0.5355941668464672, "grad_norm": 0.27734375, "learning_rate": 0.00027499596708198274, "loss": 0.4462, "step": 21100 }, { "epoch": 0.535721084895483, "grad_norm": 0.357421875, "learning_rate": 0.0002749775954829034, "loss": 0.4577, "step": 21105 }, { "epoch": 0.5358480029444987, "grad_norm": 0.345703125, "learning_rate": 0.0002749592177512233, "loss": 0.4778, "step": 21110 }, { "epoch": 0.5359749209935145, "grad_norm": 0.34765625, "learning_rate": 0.0002749408338878442, "loss": 0.4641, "step": 21115 }, { "epoch": 0.5361018390425303, "grad_norm": 0.34375, "learning_rate": 0.00027492244389366806, "loss": 0.4684, "step": 21120 }, { "epoch": 0.5362287570915459, "grad_norm": 0.345703125, "learning_rate": 0.00027490404776959744, "loss": 0.4548, "step": 21125 }, { "epoch": 0.5363556751405617, "grad_norm": 0.330078125, "learning_rate": 0.00027488564551653486, "loss": 0.4592, "step": 21130 }, { "epoch": 0.5364825931895775, "grad_norm": 0.314453125, "learning_rate": 0.00027486723713538347, "loss": 0.4244, "step": 21135 }, { "epoch": 0.5366095112385932, "grad_norm": 0.33203125, "learning_rate": 0.00027484882262704646, "loss": 0.4632, "step": 21140 }, { "epoch": 0.536736429287609, "grad_norm": 0.341796875, "learning_rate": 0.0002748304019924275, "loss": 0.492, "step": 21145 }, { "epoch": 0.5368633473366248, "grad_norm": 0.349609375, "learning_rate": 0.00027481197523243037, "loss": 0.4665, "step": 21150 }, { "epoch": 0.5369902653856405, "grad_norm": 0.3359375, "learning_rate": 0.00027479354234795936, "loss": 0.4762, "step": 21155 }, { "epoch": 0.5371171834346562, "grad_norm": 0.35546875, "learning_rate": 0.000274775103339919, "loss": 0.6609, "step": 21160 }, { "epoch": 0.537244101483672, "grad_norm": 0.326171875, "learning_rate": 0.00027475665820921394, "loss": 0.4855, "step": 21165 }, { "epoch": 0.5373710195326877, "grad_norm": 0.333984375, "learning_rate": 0.0002747382069567494, "loss": 0.4957, "step": 21170 }, { "epoch": 0.5374979375817035, "grad_norm": 0.359375, "learning_rate": 0.00027471974958343074, "loss": 0.4773, "step": 21175 }, { "epoch": 0.5376248556307193, "grad_norm": 0.318359375, "learning_rate": 0.0002747012860901636, "loss": 0.4594, "step": 21180 }, { "epoch": 0.537751773679735, "grad_norm": 0.3203125, "learning_rate": 0.000274682816477854, "loss": 0.4609, "step": 21185 }, { "epoch": 0.5378786917287507, "grad_norm": 0.359375, "learning_rate": 0.00027466434074740836, "loss": 0.4811, "step": 21190 }, { "epoch": 0.5380056097777665, "grad_norm": 0.357421875, "learning_rate": 0.00027464585889973314, "loss": 0.4849, "step": 21195 }, { "epoch": 0.5381325278267822, "grad_norm": 0.33984375, "learning_rate": 0.0002746273709357353, "loss": 0.4684, "step": 21200 }, { "epoch": 0.538259445875798, "grad_norm": 0.375, "learning_rate": 0.000274608876856322, "loss": 0.4731, "step": 21205 }, { "epoch": 0.5383863639248138, "grad_norm": 0.3515625, "learning_rate": 0.0002745903766624007, "loss": 0.4979, "step": 21210 }, { "epoch": 0.5385132819738295, "grad_norm": 0.365234375, "learning_rate": 0.0002745718703548793, "loss": 0.4989, "step": 21215 }, { "epoch": 0.5386402000228453, "grad_norm": 0.361328125, "learning_rate": 0.00027455335793466576, "loss": 0.4663, "step": 21220 }, { "epoch": 0.538767118071861, "grad_norm": 0.365234375, "learning_rate": 0.00027453483940266863, "loss": 0.4727, "step": 21225 }, { "epoch": 0.5388940361208767, "grad_norm": 0.376953125, "learning_rate": 0.00027451631475979655, "loss": 0.539, "step": 21230 }, { "epoch": 0.5390209541698925, "grad_norm": 0.345703125, "learning_rate": 0.0002744977840069585, "loss": 0.4392, "step": 21235 }, { "epoch": 0.5391478722189083, "grad_norm": 0.388671875, "learning_rate": 0.00027447924714506374, "loss": 0.4691, "step": 21240 }, { "epoch": 0.539274790267924, "grad_norm": 0.361328125, "learning_rate": 0.000274460704175022, "loss": 0.4508, "step": 21245 }, { "epoch": 0.5394017083169398, "grad_norm": 0.349609375, "learning_rate": 0.00027444215509774293, "loss": 0.4605, "step": 21250 }, { "epoch": 0.5395286263659556, "grad_norm": 0.33203125, "learning_rate": 0.00027442359991413696, "loss": 0.4739, "step": 21255 }, { "epoch": 0.5396555444149712, "grad_norm": 0.427734375, "learning_rate": 0.0002744050386251145, "loss": 0.4597, "step": 21260 }, { "epoch": 0.539782462463987, "grad_norm": 0.353515625, "learning_rate": 0.0002743864712315864, "loss": 0.4433, "step": 21265 }, { "epoch": 0.5399093805130027, "grad_norm": 0.36328125, "learning_rate": 0.00027436789773446367, "loss": 0.496, "step": 21270 }, { "epoch": 0.5400362985620185, "grad_norm": 0.361328125, "learning_rate": 0.00027434931813465774, "loss": 0.4794, "step": 21275 }, { "epoch": 0.5401632166110343, "grad_norm": 0.31640625, "learning_rate": 0.0002743307324330803, "loss": 0.4424, "step": 21280 }, { "epoch": 0.54029013466005, "grad_norm": 0.3359375, "learning_rate": 0.00027431214063064333, "loss": 0.5031, "step": 21285 }, { "epoch": 0.5404170527090657, "grad_norm": 0.330078125, "learning_rate": 0.00027429354272825914, "loss": 0.4395, "step": 21290 }, { "epoch": 0.5405439707580815, "grad_norm": 0.3359375, "learning_rate": 0.00027427493872684037, "loss": 0.4734, "step": 21295 }, { "epoch": 0.5406708888070972, "grad_norm": 0.349609375, "learning_rate": 0.0002742563286272998, "loss": 0.4852, "step": 21300 }, { "epoch": 0.540797806856113, "grad_norm": 0.35546875, "learning_rate": 0.00027423771243055073, "loss": 0.5005, "step": 21305 }, { "epoch": 0.5409247249051288, "grad_norm": 0.353515625, "learning_rate": 0.0002742190901375066, "loss": 0.4687, "step": 21310 }, { "epoch": 0.5410516429541445, "grad_norm": 0.353515625, "learning_rate": 0.00027420046174908113, "loss": 0.4807, "step": 21315 }, { "epoch": 0.5411785610031603, "grad_norm": 0.3359375, "learning_rate": 0.0002741818272661885, "loss": 0.4308, "step": 21320 }, { "epoch": 0.541305479052176, "grad_norm": 0.376953125, "learning_rate": 0.0002741631866897431, "loss": 0.4724, "step": 21325 }, { "epoch": 0.5414323971011917, "grad_norm": 0.369140625, "learning_rate": 0.00027414454002065963, "loss": 0.438, "step": 21330 }, { "epoch": 0.5415593151502075, "grad_norm": 0.29296875, "learning_rate": 0.00027412588725985295, "loss": 0.4586, "step": 21335 }, { "epoch": 0.5416862331992233, "grad_norm": 0.314453125, "learning_rate": 0.00027410722840823845, "loss": 0.473, "step": 21340 }, { "epoch": 0.541813151248239, "grad_norm": 0.7109375, "learning_rate": 0.0002740885634667317, "loss": 0.4715, "step": 21345 }, { "epoch": 0.5419400692972548, "grad_norm": 0.33203125, "learning_rate": 0.0002740698924362486, "loss": 0.446, "step": 21350 }, { "epoch": 0.5420669873462706, "grad_norm": 0.341796875, "learning_rate": 0.0002740512153177052, "loss": 0.4587, "step": 21355 }, { "epoch": 0.5421939053952862, "grad_norm": 0.3828125, "learning_rate": 0.0002740325321120182, "loss": 0.4977, "step": 21360 }, { "epoch": 0.542320823444302, "grad_norm": 0.337890625, "learning_rate": 0.0002740138428201042, "loss": 0.4771, "step": 21365 }, { "epoch": 0.5424477414933178, "grad_norm": 0.328125, "learning_rate": 0.0002739951474428803, "loss": 0.4716, "step": 21370 }, { "epoch": 0.5425746595423335, "grad_norm": 0.337890625, "learning_rate": 0.00027397644598126396, "loss": 0.4875, "step": 21375 }, { "epoch": 0.5427015775913493, "grad_norm": 0.349609375, "learning_rate": 0.0002739577384361728, "loss": 0.4949, "step": 21380 }, { "epoch": 0.5428284956403651, "grad_norm": 0.36328125, "learning_rate": 0.0002739390248085248, "loss": 0.4741, "step": 21385 }, { "epoch": 0.5429554136893807, "grad_norm": 0.330078125, "learning_rate": 0.00027392030509923817, "loss": 0.4527, "step": 21390 }, { "epoch": 0.5430823317383965, "grad_norm": 0.35546875, "learning_rate": 0.00027390157930923156, "loss": 0.4939, "step": 21395 }, { "epoch": 0.5432092497874123, "grad_norm": 0.33984375, "learning_rate": 0.00027388284743942384, "loss": 0.4997, "step": 21400 }, { "epoch": 0.543336167836428, "grad_norm": 0.373046875, "learning_rate": 0.0002738641094907341, "loss": 0.4557, "step": 21405 }, { "epoch": 0.5434630858854438, "grad_norm": 0.349609375, "learning_rate": 0.00027384536546408184, "loss": 0.4899, "step": 21410 }, { "epoch": 0.5435900039344596, "grad_norm": 0.330078125, "learning_rate": 0.0002738266153603869, "loss": 0.4789, "step": 21415 }, { "epoch": 0.5437169219834753, "grad_norm": 0.33984375, "learning_rate": 0.0002738078591805692, "loss": 0.4666, "step": 21420 }, { "epoch": 0.543843840032491, "grad_norm": 0.33203125, "learning_rate": 0.00027378909692554916, "loss": 0.4837, "step": 21425 }, { "epoch": 0.5439707580815067, "grad_norm": 0.3359375, "learning_rate": 0.0002737703285962475, "loss": 0.4126, "step": 21430 }, { "epoch": 0.5440976761305225, "grad_norm": 0.34765625, "learning_rate": 0.00027375155419358504, "loss": 0.4734, "step": 21435 }, { "epoch": 0.5442245941795383, "grad_norm": 0.373046875, "learning_rate": 0.00027373277371848316, "loss": 0.4714, "step": 21440 }, { "epoch": 0.544351512228554, "grad_norm": 0.361328125, "learning_rate": 0.00027371398717186343, "loss": 0.4267, "step": 21445 }, { "epoch": 0.5444784302775698, "grad_norm": 0.373046875, "learning_rate": 0.0002736951945546475, "loss": 0.4737, "step": 21450 }, { "epoch": 0.5446053483265855, "grad_norm": 0.3671875, "learning_rate": 0.0002736763958677577, "loss": 0.4801, "step": 21455 }, { "epoch": 0.5447322663756012, "grad_norm": 0.3515625, "learning_rate": 0.00027365759111211646, "loss": 0.4791, "step": 21460 }, { "epoch": 0.544859184424617, "grad_norm": 0.33984375, "learning_rate": 0.00027363878028864645, "loss": 0.455, "step": 21465 }, { "epoch": 0.5449861024736328, "grad_norm": 0.349609375, "learning_rate": 0.0002736199633982708, "loss": 0.4708, "step": 21470 }, { "epoch": 0.5451130205226485, "grad_norm": 0.32421875, "learning_rate": 0.00027360114044191265, "loss": 0.4629, "step": 21475 }, { "epoch": 0.5452399385716643, "grad_norm": 0.359375, "learning_rate": 0.0002735823114204959, "loss": 0.459, "step": 21480 }, { "epoch": 0.5453668566206801, "grad_norm": 0.314453125, "learning_rate": 0.00027356347633494425, "loss": 0.4449, "step": 21485 }, { "epoch": 0.5454937746696957, "grad_norm": 0.34765625, "learning_rate": 0.00027354463518618216, "loss": 0.4472, "step": 21490 }, { "epoch": 0.5456206927187115, "grad_norm": 0.302734375, "learning_rate": 0.0002735257879751339, "loss": 0.4388, "step": 21495 }, { "epoch": 0.5457476107677273, "grad_norm": 0.349609375, "learning_rate": 0.0002735069347027246, "loss": 0.4997, "step": 21500 }, { "epoch": 0.545874528816743, "grad_norm": 0.359375, "learning_rate": 0.0002734880753698791, "loss": 0.4813, "step": 21505 }, { "epoch": 0.5460014468657588, "grad_norm": 0.333984375, "learning_rate": 0.0002734692099775229, "loss": 0.4462, "step": 21510 }, { "epoch": 0.5461283649147746, "grad_norm": 0.3515625, "learning_rate": 0.0002734503385265818, "loss": 0.4618, "step": 21515 }, { "epoch": 0.5462552829637903, "grad_norm": 0.337890625, "learning_rate": 0.00027343146101798174, "loss": 0.441, "step": 21520 }, { "epoch": 0.546382201012806, "grad_norm": 0.36328125, "learning_rate": 0.00027341257745264907, "loss": 0.4747, "step": 21525 }, { "epoch": 0.5465091190618218, "grad_norm": 0.36328125, "learning_rate": 0.0002733936878315104, "loss": 0.4939, "step": 21530 }, { "epoch": 0.5466360371108375, "grad_norm": 0.291015625, "learning_rate": 0.0002733747921554926, "loss": 0.4505, "step": 21535 }, { "epoch": 0.5467629551598533, "grad_norm": 0.3828125, "learning_rate": 0.00027335589042552295, "loss": 0.4925, "step": 21540 }, { "epoch": 0.5468898732088691, "grad_norm": 0.341796875, "learning_rate": 0.0002733369826425288, "loss": 0.4621, "step": 21545 }, { "epoch": 0.5470167912578848, "grad_norm": 0.36328125, "learning_rate": 0.00027331806880743814, "loss": 0.4923, "step": 21550 }, { "epoch": 0.5471437093069005, "grad_norm": 0.310546875, "learning_rate": 0.0002732991489211789, "loss": 0.4347, "step": 21555 }, { "epoch": 0.5472706273559163, "grad_norm": 1.390625, "learning_rate": 0.00027328022298467953, "loss": 0.4787, "step": 21560 }, { "epoch": 0.547397545404932, "grad_norm": 0.345703125, "learning_rate": 0.0002732612909988688, "loss": 0.4661, "step": 21565 }, { "epoch": 0.5475244634539478, "grad_norm": 0.34765625, "learning_rate": 0.00027324235296467554, "loss": 0.4708, "step": 21570 }, { "epoch": 0.5476513815029636, "grad_norm": 0.341796875, "learning_rate": 0.0002732234088830291, "loss": 0.4803, "step": 21575 }, { "epoch": 0.5477782995519793, "grad_norm": 0.357421875, "learning_rate": 0.0002732044587548591, "loss": 0.4911, "step": 21580 }, { "epoch": 0.5479052176009951, "grad_norm": 0.337890625, "learning_rate": 0.00027318550258109545, "loss": 0.451, "step": 21585 }, { "epoch": 0.5480321356500107, "grad_norm": 0.36328125, "learning_rate": 0.0002731665403626682, "loss": 0.5079, "step": 21590 }, { "epoch": 0.5481590536990265, "grad_norm": 0.357421875, "learning_rate": 0.00027314757210050787, "loss": 0.4609, "step": 21595 }, { "epoch": 0.5482859717480423, "grad_norm": 0.3515625, "learning_rate": 0.00027312859779554525, "loss": 0.4649, "step": 21600 }, { "epoch": 0.548412889797058, "grad_norm": 0.37890625, "learning_rate": 0.0002731096174487114, "loss": 0.4975, "step": 21605 }, { "epoch": 0.5485398078460738, "grad_norm": 0.33203125, "learning_rate": 0.00027309063106093764, "loss": 0.4576, "step": 21610 }, { "epoch": 0.5486667258950896, "grad_norm": 0.328125, "learning_rate": 0.00027307163863315564, "loss": 0.4645, "step": 21615 }, { "epoch": 0.5487936439441052, "grad_norm": 0.337890625, "learning_rate": 0.0002730526401662974, "loss": 0.4562, "step": 21620 }, { "epoch": 0.548920561993121, "grad_norm": 0.59765625, "learning_rate": 0.00027303363566129503, "loss": 0.4734, "step": 21625 }, { "epoch": 0.5490474800421368, "grad_norm": 0.375, "learning_rate": 0.00027301462511908125, "loss": 0.4612, "step": 21630 }, { "epoch": 0.5491743980911525, "grad_norm": 0.31640625, "learning_rate": 0.0002729956085405888, "loss": 0.47, "step": 21635 }, { "epoch": 0.5493013161401683, "grad_norm": 0.380859375, "learning_rate": 0.0002729765859267508, "loss": 0.4822, "step": 21640 }, { "epoch": 0.5494282341891841, "grad_norm": 0.333984375, "learning_rate": 0.00027295755727850074, "loss": 0.4364, "step": 21645 }, { "epoch": 0.5495551522381998, "grad_norm": 0.33203125, "learning_rate": 0.00027293852259677236, "loss": 0.4813, "step": 21650 }, { "epoch": 0.5496820702872155, "grad_norm": 0.34375, "learning_rate": 0.0002729194818824996, "loss": 0.4477, "step": 21655 }, { "epoch": 0.5498089883362313, "grad_norm": 0.322265625, "learning_rate": 0.0002729004351366168, "loss": 0.4739, "step": 21660 }, { "epoch": 0.549935906385247, "grad_norm": 0.33203125, "learning_rate": 0.0002728813823600587, "loss": 0.4676, "step": 21665 }, { "epoch": 0.5500628244342628, "grad_norm": 0.333984375, "learning_rate": 0.00027286232355376003, "loss": 0.4749, "step": 21670 }, { "epoch": 0.5501897424832786, "grad_norm": 0.349609375, "learning_rate": 0.00027284325871865613, "loss": 0.4705, "step": 21675 }, { "epoch": 0.5503166605322943, "grad_norm": 0.328125, "learning_rate": 0.0002728241878556824, "loss": 0.4563, "step": 21680 }, { "epoch": 0.5504435785813101, "grad_norm": 0.361328125, "learning_rate": 0.00027280511096577475, "loss": 0.4414, "step": 21685 }, { "epoch": 0.5505704966303258, "grad_norm": 0.333984375, "learning_rate": 0.00027278602804986924, "loss": 0.4588, "step": 21690 }, { "epoch": 0.5506974146793415, "grad_norm": 0.34375, "learning_rate": 0.0002727669391089022, "loss": 0.446, "step": 21695 }, { "epoch": 0.5508243327283573, "grad_norm": 0.31640625, "learning_rate": 0.0002727478441438104, "loss": 0.4736, "step": 21700 }, { "epoch": 0.5509512507773731, "grad_norm": 0.3515625, "learning_rate": 0.00027272874315553073, "loss": 0.4441, "step": 21705 }, { "epoch": 0.5510781688263888, "grad_norm": 0.337890625, "learning_rate": 0.0002727096361450006, "loss": 0.4943, "step": 21710 }, { "epoch": 0.5512050868754046, "grad_norm": 0.36328125, "learning_rate": 0.00027269052311315747, "loss": 0.4834, "step": 21715 }, { "epoch": 0.5513320049244202, "grad_norm": 0.341796875, "learning_rate": 0.00027267140406093916, "loss": 0.477, "step": 21720 }, { "epoch": 0.551458922973436, "grad_norm": 0.361328125, "learning_rate": 0.000272652278989284, "loss": 0.4598, "step": 21725 }, { "epoch": 0.5515858410224518, "grad_norm": 0.33203125, "learning_rate": 0.0002726331478991304, "loss": 0.4959, "step": 21730 }, { "epoch": 0.5517127590714676, "grad_norm": 0.345703125, "learning_rate": 0.00027261401079141704, "loss": 0.4735, "step": 21735 }, { "epoch": 0.5518396771204833, "grad_norm": 0.328125, "learning_rate": 0.000272594867667083, "loss": 0.4981, "step": 21740 }, { "epoch": 0.5519665951694991, "grad_norm": 0.345703125, "learning_rate": 0.00027257571852706765, "loss": 0.4836, "step": 21745 }, { "epoch": 0.5520935132185149, "grad_norm": 0.306640625, "learning_rate": 0.00027255656337231063, "loss": 0.4494, "step": 21750 }, { "epoch": 0.5522204312675305, "grad_norm": 0.333984375, "learning_rate": 0.0002725374022037518, "loss": 0.4583, "step": 21755 }, { "epoch": 0.5523473493165463, "grad_norm": 0.62109375, "learning_rate": 0.00027251823502233153, "loss": 0.4412, "step": 21760 }, { "epoch": 0.552474267365562, "grad_norm": 1.3515625, "learning_rate": 0.00027249906182899027, "loss": 0.4579, "step": 21765 }, { "epoch": 0.5526011854145778, "grad_norm": 0.33203125, "learning_rate": 0.00027247988262466883, "loss": 0.444, "step": 21770 }, { "epoch": 0.5527281034635936, "grad_norm": 0.33203125, "learning_rate": 0.0002724606974103083, "loss": 0.4967, "step": 21775 }, { "epoch": 0.5528550215126093, "grad_norm": 0.3515625, "learning_rate": 0.0002724415061868502, "loss": 0.4743, "step": 21780 }, { "epoch": 0.5529819395616251, "grad_norm": 0.333984375, "learning_rate": 0.0002724223089552361, "loss": 0.8284, "step": 21785 }, { "epoch": 0.5531088576106408, "grad_norm": 0.35546875, "learning_rate": 0.0002724031057164081, "loss": 0.4807, "step": 21790 }, { "epoch": 0.5532357756596565, "grad_norm": 0.37890625, "learning_rate": 0.00027238389647130844, "loss": 0.4849, "step": 21795 }, { "epoch": 0.5533626937086723, "grad_norm": 0.3515625, "learning_rate": 0.00027236468122087976, "loss": 0.4653, "step": 21800 }, { "epoch": 0.5534896117576881, "grad_norm": 0.36328125, "learning_rate": 0.0002723454599660649, "loss": 0.4515, "step": 21805 }, { "epoch": 0.5536165298067038, "grad_norm": 0.357421875, "learning_rate": 0.000272326232707807, "loss": 0.4639, "step": 21810 }, { "epoch": 0.5537434478557196, "grad_norm": 0.3515625, "learning_rate": 0.00027230699944704963, "loss": 0.4886, "step": 21815 }, { "epoch": 0.5538703659047353, "grad_norm": 0.328125, "learning_rate": 0.0002722877601847366, "loss": 0.4934, "step": 21820 }, { "epoch": 0.553997283953751, "grad_norm": 0.3515625, "learning_rate": 0.00027226851492181183, "loss": 0.461, "step": 21825 }, { "epoch": 0.5541242020027668, "grad_norm": 0.376953125, "learning_rate": 0.0002722492636592197, "loss": 0.473, "step": 21830 }, { "epoch": 0.5542511200517826, "grad_norm": 0.3515625, "learning_rate": 0.0002722300063979049, "loss": 0.4786, "step": 21835 }, { "epoch": 0.5543780381007983, "grad_norm": 0.35546875, "learning_rate": 0.00027221074313881237, "loss": 0.4623, "step": 21840 }, { "epoch": 0.5545049561498141, "grad_norm": 0.408203125, "learning_rate": 0.00027219147388288744, "loss": 0.5028, "step": 21845 }, { "epoch": 0.5546318741988299, "grad_norm": 0.32421875, "learning_rate": 0.0002721721986310755, "loss": 0.5116, "step": 21850 }, { "epoch": 0.5547587922478455, "grad_norm": 0.32421875, "learning_rate": 0.00027215291738432243, "loss": 0.4262, "step": 21855 }, { "epoch": 0.5548857102968613, "grad_norm": 0.357421875, "learning_rate": 0.0002721336301435744, "loss": 0.4648, "step": 21860 }, { "epoch": 0.5550126283458771, "grad_norm": 0.35546875, "learning_rate": 0.00027211433690977775, "loss": 0.4719, "step": 21865 }, { "epoch": 0.5551395463948928, "grad_norm": 0.349609375, "learning_rate": 0.0002720950376838792, "loss": 0.4759, "step": 21870 }, { "epoch": 0.5552664644439086, "grad_norm": 0.359375, "learning_rate": 0.0002720757324668259, "loss": 0.4661, "step": 21875 }, { "epoch": 0.5553933824929244, "grad_norm": 0.3671875, "learning_rate": 0.00027205642125956495, "loss": 0.4612, "step": 21880 }, { "epoch": 0.55552030054194, "grad_norm": 0.359375, "learning_rate": 0.00027203710406304403, "loss": 0.4825, "step": 21885 }, { "epoch": 0.5556472185909558, "grad_norm": 0.328125, "learning_rate": 0.00027201778087821105, "loss": 0.4676, "step": 21890 }, { "epoch": 0.5557741366399715, "grad_norm": 0.33203125, "learning_rate": 0.0002719984517060142, "loss": 0.477, "step": 21895 }, { "epoch": 0.5559010546889873, "grad_norm": 0.341796875, "learning_rate": 0.0002719791165474019, "loss": 0.4521, "step": 21900 }, { "epoch": 0.5560279727380031, "grad_norm": 0.3359375, "learning_rate": 0.0002719597754033229, "loss": 0.4457, "step": 21905 }, { "epoch": 0.5561548907870189, "grad_norm": 0.322265625, "learning_rate": 0.00027194042827472626, "loss": 0.4543, "step": 21910 }, { "epoch": 0.5562818088360346, "grad_norm": 0.341796875, "learning_rate": 0.0002719210751625615, "loss": 0.4758, "step": 21915 }, { "epoch": 0.5564087268850503, "grad_norm": 0.34375, "learning_rate": 0.0002719017160677781, "loss": 0.4811, "step": 21920 }, { "epoch": 0.556535644934066, "grad_norm": 0.33984375, "learning_rate": 0.00027188235099132605, "loss": 0.4532, "step": 21925 }, { "epoch": 0.5566625629830818, "grad_norm": 0.33984375, "learning_rate": 0.00027186297993415557, "loss": 0.4493, "step": 21930 }, { "epoch": 0.5567894810320976, "grad_norm": 0.369140625, "learning_rate": 0.0002718436028972172, "loss": 0.4877, "step": 21935 }, { "epoch": 0.5569163990811133, "grad_norm": 0.357421875, "learning_rate": 0.00027182421988146177, "loss": 0.4651, "step": 21940 }, { "epoch": 0.5570433171301291, "grad_norm": 0.33203125, "learning_rate": 0.00027180483088784047, "loss": 0.473, "step": 21945 }, { "epoch": 0.5571702351791449, "grad_norm": 0.365234375, "learning_rate": 0.00027178543591730456, "loss": 0.4704, "step": 21950 }, { "epoch": 0.5572971532281605, "grad_norm": 0.365234375, "learning_rate": 0.0002717660349708058, "loss": 0.5059, "step": 21955 }, { "epoch": 0.5574240712771763, "grad_norm": 0.326171875, "learning_rate": 0.0002717466280492963, "loss": 0.4632, "step": 21960 }, { "epoch": 0.5575509893261921, "grad_norm": 0.369140625, "learning_rate": 0.0002717272151537282, "loss": 0.4893, "step": 21965 }, { "epoch": 0.5576779073752078, "grad_norm": 0.34765625, "learning_rate": 0.00027170779628505416, "loss": 0.4889, "step": 21970 }, { "epoch": 0.5578048254242236, "grad_norm": 0.34375, "learning_rate": 0.000271688371444227, "loss": 0.4994, "step": 21975 }, { "epoch": 0.5579317434732394, "grad_norm": 0.322265625, "learning_rate": 0.0002716689406322, "loss": 0.4645, "step": 21980 }, { "epoch": 0.558058661522255, "grad_norm": 0.35546875, "learning_rate": 0.00027164950384992646, "loss": 0.445, "step": 21985 }, { "epoch": 0.5581855795712708, "grad_norm": 0.3515625, "learning_rate": 0.0002716300610983603, "loss": 0.4815, "step": 21990 }, { "epoch": 0.5583124976202866, "grad_norm": 0.34765625, "learning_rate": 0.00027161061237845544, "loss": 0.4615, "step": 21995 }, { "epoch": 0.5584394156693023, "grad_norm": 0.31640625, "learning_rate": 0.0002715911576911663, "loss": 0.4563, "step": 22000 }, { "epoch": 0.5585663337183181, "grad_norm": 0.3359375, "learning_rate": 0.00027157169703744753, "loss": 0.4756, "step": 22005 }, { "epoch": 0.5586932517673339, "grad_norm": 0.328125, "learning_rate": 0.0002715522304182539, "loss": 0.4401, "step": 22010 }, { "epoch": 0.5588201698163496, "grad_norm": 0.369140625, "learning_rate": 0.00027153275783454084, "loss": 0.4855, "step": 22015 }, { "epoch": 0.5589470878653653, "grad_norm": 0.365234375, "learning_rate": 0.00027151327928726375, "loss": 0.4764, "step": 22020 }, { "epoch": 0.5590740059143811, "grad_norm": 0.34375, "learning_rate": 0.00027149379477737844, "loss": 0.4785, "step": 22025 }, { "epoch": 0.5592009239633968, "grad_norm": 0.337890625, "learning_rate": 0.000271474304305841, "loss": 0.4677, "step": 22030 }, { "epoch": 0.5593278420124126, "grad_norm": 0.318359375, "learning_rate": 0.0002714548078736078, "loss": 0.4588, "step": 22035 }, { "epoch": 0.5594547600614284, "grad_norm": 0.322265625, "learning_rate": 0.00027143530548163564, "loss": 0.4778, "step": 22040 }, { "epoch": 0.5595816781104441, "grad_norm": 0.33984375, "learning_rate": 0.0002714157971308814, "loss": 0.4455, "step": 22045 }, { "epoch": 0.5597085961594598, "grad_norm": 0.33984375, "learning_rate": 0.00027139628282230234, "loss": 0.4679, "step": 22050 }, { "epoch": 0.5598355142084755, "grad_norm": 0.326171875, "learning_rate": 0.000271376762556856, "loss": 0.427, "step": 22055 }, { "epoch": 0.5599624322574913, "grad_norm": 0.357421875, "learning_rate": 0.00027135723633550033, "loss": 0.4675, "step": 22060 }, { "epoch": 0.5600893503065071, "grad_norm": 0.376953125, "learning_rate": 0.0002713377041591934, "loss": 0.4925, "step": 22065 }, { "epoch": 0.5602162683555229, "grad_norm": 0.35546875, "learning_rate": 0.00027131816602889364, "loss": 0.4518, "step": 22070 }, { "epoch": 0.5603431864045386, "grad_norm": 0.33984375, "learning_rate": 0.00027129862194555984, "loss": 0.4813, "step": 22075 }, { "epoch": 0.5604701044535544, "grad_norm": 0.33203125, "learning_rate": 0.0002712790719101509, "loss": 0.4787, "step": 22080 }, { "epoch": 0.56059702250257, "grad_norm": 0.341796875, "learning_rate": 0.0002712595159236263, "loss": 0.4486, "step": 22085 }, { "epoch": 0.5607239405515858, "grad_norm": 0.337890625, "learning_rate": 0.00027123995398694553, "loss": 0.4616, "step": 22090 }, { "epoch": 0.5608508586006016, "grad_norm": 0.318359375, "learning_rate": 0.00027122038610106846, "loss": 0.4574, "step": 22095 }, { "epoch": 0.5609777766496173, "grad_norm": 0.361328125, "learning_rate": 0.0002712008122669554, "loss": 0.4502, "step": 22100 }, { "epoch": 0.5611046946986331, "grad_norm": 0.3515625, "learning_rate": 0.00027118123248556666, "loss": 0.4551, "step": 22105 }, { "epoch": 0.5612316127476489, "grad_norm": 0.32421875, "learning_rate": 0.0002711616467578632, "loss": 0.4405, "step": 22110 }, { "epoch": 0.5613585307966646, "grad_norm": 0.326171875, "learning_rate": 0.000271142055084806, "loss": 0.4428, "step": 22115 }, { "epoch": 0.5614854488456803, "grad_norm": 0.35546875, "learning_rate": 0.0002711224574673564, "loss": 0.4797, "step": 22120 }, { "epoch": 0.5616123668946961, "grad_norm": 0.35546875, "learning_rate": 0.0002711028539064761, "loss": 0.4759, "step": 22125 }, { "epoch": 0.5617392849437118, "grad_norm": 0.33203125, "learning_rate": 0.0002710832444031269, "loss": 0.4353, "step": 22130 }, { "epoch": 0.5618662029927276, "grad_norm": 0.337890625, "learning_rate": 0.0002710636289582712, "loss": 0.4784, "step": 22135 }, { "epoch": 0.5619931210417434, "grad_norm": 0.333984375, "learning_rate": 0.00027104400757287144, "loss": 0.4872, "step": 22140 }, { "epoch": 0.5621200390907591, "grad_norm": 0.380859375, "learning_rate": 0.00027102438024789044, "loss": 0.474, "step": 22145 }, { "epoch": 0.5622469571397748, "grad_norm": 0.3515625, "learning_rate": 0.0002710047469842913, "loss": 0.4649, "step": 22150 }, { "epoch": 0.5623738751887906, "grad_norm": 0.310546875, "learning_rate": 0.00027098510778303744, "loss": 0.4615, "step": 22155 }, { "epoch": 0.5625007932378063, "grad_norm": 0.337890625, "learning_rate": 0.00027096546264509256, "loss": 0.4497, "step": 22160 }, { "epoch": 0.5626277112868221, "grad_norm": 0.34765625, "learning_rate": 0.00027094581157142056, "loss": 0.4716, "step": 22165 }, { "epoch": 0.5627546293358379, "grad_norm": 0.3203125, "learning_rate": 0.0002709261545629858, "loss": 0.4623, "step": 22170 }, { "epoch": 0.5628815473848536, "grad_norm": 0.328125, "learning_rate": 0.0002709064916207528, "loss": 0.4664, "step": 22175 }, { "epoch": 0.5630084654338694, "grad_norm": 0.330078125, "learning_rate": 0.0002708868227456864, "loss": 0.4522, "step": 22180 }, { "epoch": 0.563135383482885, "grad_norm": 0.369140625, "learning_rate": 0.0002708671479387518, "loss": 0.4876, "step": 22185 }, { "epoch": 0.5632623015319008, "grad_norm": 0.34375, "learning_rate": 0.00027084746720091435, "loss": 0.4194, "step": 22190 }, { "epoch": 0.5633892195809166, "grad_norm": 0.41796875, "learning_rate": 0.00027082778053313987, "loss": 0.4805, "step": 22195 }, { "epoch": 0.5635161376299324, "grad_norm": 0.32421875, "learning_rate": 0.00027080808793639426, "loss": 0.4744, "step": 22200 }, { "epoch": 0.5636430556789481, "grad_norm": 0.35546875, "learning_rate": 0.000270788389411644, "loss": 0.4662, "step": 22205 }, { "epoch": 0.5637699737279639, "grad_norm": 0.345703125, "learning_rate": 0.0002707686849598555, "loss": 0.4909, "step": 22210 }, { "epoch": 0.5638968917769797, "grad_norm": 0.73828125, "learning_rate": 0.00027074897458199574, "loss": 0.4522, "step": 22215 }, { "epoch": 0.5640238098259953, "grad_norm": 0.345703125, "learning_rate": 0.00027072925827903194, "loss": 0.4968, "step": 22220 }, { "epoch": 0.5641507278750111, "grad_norm": 0.328125, "learning_rate": 0.0002707095360519315, "loss": 0.4606, "step": 22225 }, { "epoch": 0.5642776459240268, "grad_norm": 0.296875, "learning_rate": 0.00027068980790166225, "loss": 0.4604, "step": 22230 }, { "epoch": 0.5644045639730426, "grad_norm": 0.337890625, "learning_rate": 0.00027067007382919214, "loss": 0.4171, "step": 22235 }, { "epoch": 0.5645314820220584, "grad_norm": 0.330078125, "learning_rate": 0.0002706503338354896, "loss": 0.502, "step": 22240 }, { "epoch": 0.5646584000710742, "grad_norm": 0.353515625, "learning_rate": 0.00027063058792152326, "loss": 0.4505, "step": 22245 }, { "epoch": 0.5647853181200898, "grad_norm": 0.37109375, "learning_rate": 0.00027061083608826206, "loss": 0.4785, "step": 22250 }, { "epoch": 0.5649122361691056, "grad_norm": 0.349609375, "learning_rate": 0.00027059107833667513, "loss": 0.4582, "step": 22255 }, { "epoch": 0.5650391542181213, "grad_norm": 0.3359375, "learning_rate": 0.00027057131466773197, "loss": 0.4632, "step": 22260 }, { "epoch": 0.5651660722671371, "grad_norm": 0.345703125, "learning_rate": 0.00027055154508240245, "loss": 0.4532, "step": 22265 }, { "epoch": 0.5652929903161529, "grad_norm": 0.341796875, "learning_rate": 0.0002705317695816567, "loss": 0.5051, "step": 22270 }, { "epoch": 0.5654199083651686, "grad_norm": 0.37109375, "learning_rate": 0.00027051198816646497, "loss": 0.5003, "step": 22275 }, { "epoch": 0.5655468264141844, "grad_norm": 0.341796875, "learning_rate": 0.00027049220083779805, "loss": 0.472, "step": 22280 }, { "epoch": 0.5656737444632001, "grad_norm": 0.33984375, "learning_rate": 0.00027047240759662676, "loss": 0.434, "step": 22285 }, { "epoch": 0.5658006625122158, "grad_norm": 0.361328125, "learning_rate": 0.00027045260844392243, "loss": 0.4982, "step": 22290 }, { "epoch": 0.5659275805612316, "grad_norm": 0.3203125, "learning_rate": 0.00027043280338065657, "loss": 0.4797, "step": 22295 }, { "epoch": 0.5660544986102474, "grad_norm": 0.333984375, "learning_rate": 0.0002704129924078011, "loss": 0.4784, "step": 22300 }, { "epoch": 0.5661814166592631, "grad_norm": 0.33203125, "learning_rate": 0.000270393175526328, "loss": 0.4683, "step": 22305 }, { "epoch": 0.5663083347082789, "grad_norm": 0.3671875, "learning_rate": 0.0002703733527372097, "loss": 0.4724, "step": 22310 }, { "epoch": 0.5664352527572946, "grad_norm": 0.3515625, "learning_rate": 0.000270353524041419, "loss": 0.4535, "step": 22315 }, { "epoch": 0.5665621708063103, "grad_norm": 0.333984375, "learning_rate": 0.0002703336894399288, "loss": 0.4457, "step": 22320 }, { "epoch": 0.5666890888553261, "grad_norm": 0.341796875, "learning_rate": 0.0002703138489337124, "loss": 0.4601, "step": 22325 }, { "epoch": 0.5668160069043419, "grad_norm": 0.333984375, "learning_rate": 0.0002702940025237433, "loss": 0.4589, "step": 22330 }, { "epoch": 0.5669429249533576, "grad_norm": 0.341796875, "learning_rate": 0.0002702741502109955, "loss": 0.4646, "step": 22335 }, { "epoch": 0.5670698430023734, "grad_norm": 0.349609375, "learning_rate": 0.00027025429199644297, "loss": 0.4417, "step": 22340 }, { "epoch": 0.5671967610513892, "grad_norm": 0.35546875, "learning_rate": 0.0002702344278810603, "loss": 0.4727, "step": 22345 }, { "epoch": 0.5673236791004048, "grad_norm": 0.341796875, "learning_rate": 0.00027021455786582207, "loss": 0.4624, "step": 22350 }, { "epoch": 0.5674505971494206, "grad_norm": 0.337890625, "learning_rate": 0.00027019468195170346, "loss": 0.4362, "step": 22355 }, { "epoch": 0.5675775151984364, "grad_norm": 0.3515625, "learning_rate": 0.00027017480013967964, "loss": 0.4798, "step": 22360 }, { "epoch": 0.5677044332474521, "grad_norm": 0.369140625, "learning_rate": 0.0002701549124307263, "loss": 0.491, "step": 22365 }, { "epoch": 0.5678313512964679, "grad_norm": 0.34375, "learning_rate": 0.00027013501882581923, "loss": 0.4511, "step": 22370 }, { "epoch": 0.5679582693454837, "grad_norm": 0.34375, "learning_rate": 0.00027011511932593463, "loss": 0.4683, "step": 22375 }, { "epoch": 0.5680851873944994, "grad_norm": 0.35546875, "learning_rate": 0.000270095213932049, "loss": 0.4641, "step": 22380 }, { "epoch": 0.5682121054435151, "grad_norm": 0.349609375, "learning_rate": 0.0002700753026451391, "loss": 0.4835, "step": 22385 }, { "epoch": 0.5683390234925308, "grad_norm": 0.373046875, "learning_rate": 0.0002700553854661818, "loss": 0.4668, "step": 22390 }, { "epoch": 0.5684659415415466, "grad_norm": 0.365234375, "learning_rate": 0.00027003546239615464, "loss": 0.4857, "step": 22395 }, { "epoch": 0.5685928595905624, "grad_norm": 0.306640625, "learning_rate": 0.00027001553343603517, "loss": 0.4839, "step": 22400 }, { "epoch": 0.5687197776395782, "grad_norm": 0.369140625, "learning_rate": 0.00026999559858680125, "loss": 0.4623, "step": 22405 }, { "epoch": 0.5688466956885939, "grad_norm": 0.326171875, "learning_rate": 0.0002699756578494311, "loss": 0.4835, "step": 22410 }, { "epoch": 0.5689736137376096, "grad_norm": 0.34765625, "learning_rate": 0.00026995571122490324, "loss": 0.463, "step": 22415 }, { "epoch": 0.5691005317866253, "grad_norm": 0.36328125, "learning_rate": 0.0002699357587141964, "loss": 0.4719, "step": 22420 }, { "epoch": 0.5692274498356411, "grad_norm": 0.333984375, "learning_rate": 0.00026991580031828963, "loss": 0.4697, "step": 22425 }, { "epoch": 0.5693543678846569, "grad_norm": 0.33203125, "learning_rate": 0.00026989583603816234, "loss": 0.4601, "step": 22430 }, { "epoch": 0.5694812859336726, "grad_norm": 0.3203125, "learning_rate": 0.0002698758658747941, "loss": 0.462, "step": 22435 }, { "epoch": 0.5696082039826884, "grad_norm": 0.35546875, "learning_rate": 0.00026985588982916494, "loss": 0.4861, "step": 22440 }, { "epoch": 0.5697351220317042, "grad_norm": 0.361328125, "learning_rate": 0.00026983590790225496, "loss": 0.4815, "step": 22445 }, { "epoch": 0.5698620400807198, "grad_norm": 0.36328125, "learning_rate": 0.0002698159200950447, "loss": 0.4707, "step": 22450 }, { "epoch": 0.5699889581297356, "grad_norm": 0.35546875, "learning_rate": 0.00026979592640851503, "loss": 0.4494, "step": 22455 }, { "epoch": 0.5701158761787514, "grad_norm": 0.34765625, "learning_rate": 0.0002697759268436469, "loss": 0.4813, "step": 22460 }, { "epoch": 0.5702427942277671, "grad_norm": 0.3046875, "learning_rate": 0.00026975592140142184, "loss": 0.4238, "step": 22465 }, { "epoch": 0.5703697122767829, "grad_norm": 0.34765625, "learning_rate": 0.00026973591008282134, "loss": 0.4617, "step": 22470 }, { "epoch": 0.5704966303257987, "grad_norm": 0.337890625, "learning_rate": 0.0002697158928888274, "loss": 0.4711, "step": 22475 }, { "epoch": 0.5706235483748144, "grad_norm": 0.322265625, "learning_rate": 0.00026969586982042237, "loss": 0.4432, "step": 22480 }, { "epoch": 0.5707504664238301, "grad_norm": 0.3359375, "learning_rate": 0.0002696758408785887, "loss": 0.4586, "step": 22485 }, { "epoch": 0.5708773844728459, "grad_norm": 0.345703125, "learning_rate": 0.0002696558060643091, "loss": 0.4733, "step": 22490 }, { "epoch": 0.5710043025218616, "grad_norm": 0.3515625, "learning_rate": 0.0002696357653785668, "loss": 0.4781, "step": 22495 }, { "epoch": 0.5711312205708774, "grad_norm": 0.365234375, "learning_rate": 0.0002696157188223452, "loss": 0.4648, "step": 22500 }, { "epoch": 0.5712581386198932, "grad_norm": 0.330078125, "learning_rate": 0.00026959566639662784, "loss": 0.4554, "step": 22505 }, { "epoch": 0.5713850566689089, "grad_norm": 0.345703125, "learning_rate": 0.0002695756081023988, "loss": 0.4826, "step": 22510 }, { "epoch": 0.5715119747179246, "grad_norm": 0.333984375, "learning_rate": 0.0002695555439406423, "loss": 0.455, "step": 22515 }, { "epoch": 0.5716388927669404, "grad_norm": 0.34375, "learning_rate": 0.00026953547391234296, "loss": 0.44, "step": 22520 }, { "epoch": 0.5717658108159561, "grad_norm": 0.33984375, "learning_rate": 0.0002695153980184854, "loss": 0.5054, "step": 22525 }, { "epoch": 0.5718927288649719, "grad_norm": 0.322265625, "learning_rate": 0.00026949531626005495, "loss": 0.4605, "step": 22530 }, { "epoch": 0.5720196469139877, "grad_norm": 0.3359375, "learning_rate": 0.0002694752286380369, "loss": 0.4295, "step": 22535 }, { "epoch": 0.5721465649630034, "grad_norm": 0.37890625, "learning_rate": 0.000269455135153417, "loss": 0.4747, "step": 22540 }, { "epoch": 0.5722734830120192, "grad_norm": 0.33203125, "learning_rate": 0.0002694350358071812, "loss": 0.4445, "step": 22545 }, { "epoch": 0.5724004010610348, "grad_norm": 0.361328125, "learning_rate": 0.00026941493060031577, "loss": 0.4813, "step": 22550 }, { "epoch": 0.5725273191100506, "grad_norm": 0.369140625, "learning_rate": 0.00026939481953380725, "loss": 0.484, "step": 22555 }, { "epoch": 0.5726542371590664, "grad_norm": 0.337890625, "learning_rate": 0.0002693747026086425, "loss": 0.4749, "step": 22560 }, { "epoch": 0.5727811552080821, "grad_norm": 0.33984375, "learning_rate": 0.0002693545798258086, "loss": 0.4593, "step": 22565 }, { "epoch": 0.5729080732570979, "grad_norm": 0.337890625, "learning_rate": 0.00026933445118629306, "loss": 0.4422, "step": 22570 }, { "epoch": 0.5730349913061137, "grad_norm": 0.361328125, "learning_rate": 0.00026931431669108356, "loss": 0.4766, "step": 22575 }, { "epoch": 0.5731619093551293, "grad_norm": 0.34375, "learning_rate": 0.000269294176341168, "loss": 0.4493, "step": 22580 }, { "epoch": 0.5732888274041451, "grad_norm": 0.3671875, "learning_rate": 0.00026927403013753473, "loss": 0.4846, "step": 22585 }, { "epoch": 0.5734157454531609, "grad_norm": 0.32421875, "learning_rate": 0.00026925387808117234, "loss": 0.4372, "step": 22590 }, { "epoch": 0.5735426635021766, "grad_norm": 0.345703125, "learning_rate": 0.0002692337201730697, "loss": 0.4513, "step": 22595 }, { "epoch": 0.5736695815511924, "grad_norm": 0.326171875, "learning_rate": 0.0002692135564142159, "loss": 0.4733, "step": 22600 }, { "epoch": 0.5737964996002082, "grad_norm": 0.349609375, "learning_rate": 0.0002691933868056003, "loss": 0.4361, "step": 22605 }, { "epoch": 0.5739234176492239, "grad_norm": 0.34375, "learning_rate": 0.00026917321134821274, "loss": 0.4498, "step": 22610 }, { "epoch": 0.5740503356982396, "grad_norm": 0.33984375, "learning_rate": 0.00026915303004304314, "loss": 0.4296, "step": 22615 }, { "epoch": 0.5741772537472554, "grad_norm": 0.345703125, "learning_rate": 0.00026913284289108185, "loss": 0.426, "step": 22620 }, { "epoch": 0.5743041717962711, "grad_norm": 0.34375, "learning_rate": 0.0002691126498933194, "loss": 0.4998, "step": 22625 }, { "epoch": 0.5744310898452869, "grad_norm": 0.3125, "learning_rate": 0.0002690924510507467, "loss": 0.4709, "step": 22630 }, { "epoch": 0.5745580078943027, "grad_norm": 0.345703125, "learning_rate": 0.00026907224636435484, "loss": 0.457, "step": 22635 }, { "epoch": 0.5746849259433184, "grad_norm": 0.3515625, "learning_rate": 0.00026905203583513525, "loss": 0.462, "step": 22640 }, { "epoch": 0.5748118439923342, "grad_norm": 0.341796875, "learning_rate": 0.00026903181946407974, "loss": 0.4807, "step": 22645 }, { "epoch": 0.5749387620413499, "grad_norm": 0.33984375, "learning_rate": 0.0002690115972521802, "loss": 0.4754, "step": 22650 }, { "epoch": 0.5750656800903656, "grad_norm": 0.328125, "learning_rate": 0.000268991369200429, "loss": 0.4819, "step": 22655 }, { "epoch": 0.5751925981393814, "grad_norm": 0.30859375, "learning_rate": 0.0002689711353098188, "loss": 0.4791, "step": 22660 }, { "epoch": 0.5753195161883972, "grad_norm": 0.37109375, "learning_rate": 0.0002689508955813423, "loss": 0.4611, "step": 22665 }, { "epoch": 0.5754464342374129, "grad_norm": 0.32421875, "learning_rate": 0.00026893065001599276, "loss": 0.4513, "step": 22670 }, { "epoch": 0.5755733522864287, "grad_norm": 0.34375, "learning_rate": 0.0002689103986147636, "loss": 0.4621, "step": 22675 }, { "epoch": 0.5757002703354444, "grad_norm": 0.322265625, "learning_rate": 0.00026889014137864857, "loss": 0.4606, "step": 22680 }, { "epoch": 0.5758271883844601, "grad_norm": 0.361328125, "learning_rate": 0.0002688698783086416, "loss": 0.4658, "step": 22685 }, { "epoch": 0.5759541064334759, "grad_norm": 0.314453125, "learning_rate": 0.0002688496094057371, "loss": 0.4439, "step": 22690 }, { "epoch": 0.5760810244824917, "grad_norm": 0.341796875, "learning_rate": 0.00026882933467092964, "loss": 0.4527, "step": 22695 }, { "epoch": 0.5762079425315074, "grad_norm": 0.3515625, "learning_rate": 0.00026880905410521397, "loss": 0.4546, "step": 22700 }, { "epoch": 0.5763348605805232, "grad_norm": 0.33984375, "learning_rate": 0.00026878876770958547, "loss": 0.4692, "step": 22705 }, { "epoch": 0.576461778629539, "grad_norm": 0.359375, "learning_rate": 0.0002687684754850393, "loss": 0.4691, "step": 22710 }, { "epoch": 0.5765886966785546, "grad_norm": 0.3359375, "learning_rate": 0.0002687481774325714, "loss": 0.4969, "step": 22715 }, { "epoch": 0.5767156147275704, "grad_norm": 0.359375, "learning_rate": 0.00026872787355317784, "loss": 0.4575, "step": 22720 }, { "epoch": 0.5768425327765861, "grad_norm": 0.31640625, "learning_rate": 0.0002687075638478547, "loss": 0.4746, "step": 22725 }, { "epoch": 0.5769694508256019, "grad_norm": 0.32421875, "learning_rate": 0.0002686872483175988, "loss": 0.4736, "step": 22730 }, { "epoch": 0.5770963688746177, "grad_norm": 0.314453125, "learning_rate": 0.0002686669269634068, "loss": 0.4392, "step": 22735 }, { "epoch": 0.5772232869236334, "grad_norm": 0.306640625, "learning_rate": 0.000268646599786276, "loss": 0.4568, "step": 22740 }, { "epoch": 0.5773502049726491, "grad_norm": 0.34375, "learning_rate": 0.0002686262667872038, "loss": 0.4547, "step": 22745 }, { "epoch": 0.5774771230216649, "grad_norm": 0.359375, "learning_rate": 0.00026860592796718795, "loss": 0.4722, "step": 22750 }, { "epoch": 0.5776040410706806, "grad_norm": 0.359375, "learning_rate": 0.0002685855833272265, "loss": 0.5001, "step": 22755 }, { "epoch": 0.5777309591196964, "grad_norm": 0.35546875, "learning_rate": 0.00026856523286831763, "loss": 0.5049, "step": 22760 }, { "epoch": 0.5778578771687122, "grad_norm": 0.32421875, "learning_rate": 0.0002685448765914601, "loss": 0.4524, "step": 22765 }, { "epoch": 0.5779847952177279, "grad_norm": 0.359375, "learning_rate": 0.0002685245144976526, "loss": 0.4586, "step": 22770 }, { "epoch": 0.5781117132667437, "grad_norm": 0.380859375, "learning_rate": 0.0002685041465878944, "loss": 0.483, "step": 22775 }, { "epoch": 0.5782386313157594, "grad_norm": 0.3359375, "learning_rate": 0.00026848377286318504, "loss": 0.4786, "step": 22780 }, { "epoch": 0.5783655493647751, "grad_norm": 0.35546875, "learning_rate": 0.0002684633933245241, "loss": 0.4624, "step": 22785 }, { "epoch": 0.5784924674137909, "grad_norm": 0.341796875, "learning_rate": 0.00026844300797291155, "loss": 0.4818, "step": 22790 }, { "epoch": 0.5786193854628067, "grad_norm": 0.318359375, "learning_rate": 0.0002684226168093479, "loss": 0.4471, "step": 22795 }, { "epoch": 0.5787463035118224, "grad_norm": 0.34375, "learning_rate": 0.00026840221983483355, "loss": 0.4602, "step": 22800 }, { "epoch": 0.5788732215608382, "grad_norm": 0.330078125, "learning_rate": 0.0002683818170503694, "loss": 0.4835, "step": 22805 }, { "epoch": 0.579000139609854, "grad_norm": 0.33984375, "learning_rate": 0.00026836140845695674, "loss": 0.4792, "step": 22810 }, { "epoch": 0.5791270576588696, "grad_norm": 0.322265625, "learning_rate": 0.00026834099405559684, "loss": 0.4348, "step": 22815 }, { "epoch": 0.5792539757078854, "grad_norm": 0.34375, "learning_rate": 0.0002683205738472915, "loss": 0.431, "step": 22820 }, { "epoch": 0.5793808937569012, "grad_norm": 0.34765625, "learning_rate": 0.00026830014783304275, "loss": 0.4862, "step": 22825 }, { "epoch": 0.5795078118059169, "grad_norm": 0.353515625, "learning_rate": 0.00026827971601385293, "loss": 0.4841, "step": 22830 }, { "epoch": 0.5796347298549327, "grad_norm": 0.328125, "learning_rate": 0.00026825927839072445, "loss": 0.4618, "step": 22835 }, { "epoch": 0.5797616479039485, "grad_norm": 0.333984375, "learning_rate": 0.00026823883496466036, "loss": 0.475, "step": 22840 }, { "epoch": 0.5798885659529641, "grad_norm": 0.322265625, "learning_rate": 0.0002682183857366637, "loss": 0.4805, "step": 22845 }, { "epoch": 0.5800154840019799, "grad_norm": 0.322265625, "learning_rate": 0.000268197930707738, "loss": 0.4695, "step": 22850 }, { "epoch": 0.5801424020509957, "grad_norm": 0.349609375, "learning_rate": 0.0002681774698788869, "loss": 0.4654, "step": 22855 }, { "epoch": 0.5802693201000114, "grad_norm": 0.3515625, "learning_rate": 0.0002681570032511144, "loss": 0.4322, "step": 22860 }, { "epoch": 0.5803962381490272, "grad_norm": 0.34375, "learning_rate": 0.0002681365308254249, "loss": 0.4577, "step": 22865 }, { "epoch": 0.580523156198043, "grad_norm": 0.326171875, "learning_rate": 0.00026811605260282275, "loss": 0.468, "step": 22870 }, { "epoch": 0.5806500742470587, "grad_norm": 0.341796875, "learning_rate": 0.00026809556858431303, "loss": 0.437, "step": 22875 }, { "epoch": 0.5807769922960744, "grad_norm": 0.353515625, "learning_rate": 0.0002680750787709008, "loss": 0.4738, "step": 22880 }, { "epoch": 0.5809039103450901, "grad_norm": 0.36328125, "learning_rate": 0.0002680545831635915, "loss": 0.4679, "step": 22885 }, { "epoch": 0.5810308283941059, "grad_norm": 0.337890625, "learning_rate": 0.0002680340817633909, "loss": 0.4721, "step": 22890 }, { "epoch": 0.5811577464431217, "grad_norm": 0.34765625, "learning_rate": 0.0002680135745713048, "loss": 0.4393, "step": 22895 }, { "epoch": 0.5812846644921374, "grad_norm": 0.333984375, "learning_rate": 0.00026799306158833963, "loss": 0.4424, "step": 22900 }, { "epoch": 0.5814115825411532, "grad_norm": 0.43359375, "learning_rate": 0.0002679725428155019, "loss": 0.469, "step": 22905 }, { "epoch": 0.581538500590169, "grad_norm": 0.345703125, "learning_rate": 0.0002679520182537986, "loss": 0.4685, "step": 22910 }, { "epoch": 0.5816654186391846, "grad_norm": 0.333984375, "learning_rate": 0.00026793148790423666, "loss": 0.4438, "step": 22915 }, { "epoch": 0.5817923366882004, "grad_norm": 0.34765625, "learning_rate": 0.0002679109517678236, "loss": 0.4625, "step": 22920 }, { "epoch": 0.5819192547372162, "grad_norm": 0.345703125, "learning_rate": 0.0002678904098455671, "loss": 0.4453, "step": 22925 }, { "epoch": 0.5820461727862319, "grad_norm": 0.3359375, "learning_rate": 0.0002678698621384752, "loss": 0.4838, "step": 22930 }, { "epoch": 0.5821730908352477, "grad_norm": 0.341796875, "learning_rate": 0.00026784930864755604, "loss": 0.4678, "step": 22935 }, { "epoch": 0.5823000088842635, "grad_norm": 0.380859375, "learning_rate": 0.0002678287493738183, "loss": 0.4693, "step": 22940 }, { "epoch": 0.5824269269332791, "grad_norm": 0.376953125, "learning_rate": 0.00026780818431827074, "loss": 0.4827, "step": 22945 }, { "epoch": 0.5825538449822949, "grad_norm": 0.33984375, "learning_rate": 0.00026778761348192256, "loss": 0.4591, "step": 22950 }, { "epoch": 0.5826807630313107, "grad_norm": 0.369140625, "learning_rate": 0.000267767036865783, "loss": 0.4636, "step": 22955 }, { "epoch": 0.5828076810803264, "grad_norm": 0.349609375, "learning_rate": 0.000267746454470862, "loss": 0.4792, "step": 22960 }, { "epoch": 0.5829345991293422, "grad_norm": 0.37890625, "learning_rate": 0.00026772586629816923, "loss": 0.4621, "step": 22965 }, { "epoch": 0.583061517178358, "grad_norm": 0.3359375, "learning_rate": 0.0002677052723487152, "loss": 0.4741, "step": 22970 }, { "epoch": 0.5831884352273737, "grad_norm": 0.32421875, "learning_rate": 0.0002676846726235103, "loss": 0.4519, "step": 22975 }, { "epoch": 0.5833153532763894, "grad_norm": 0.357421875, "learning_rate": 0.0002676640671235654, "loss": 0.4721, "step": 22980 }, { "epoch": 0.5834422713254052, "grad_norm": 0.341796875, "learning_rate": 0.0002676434558498916, "loss": 0.4779, "step": 22985 }, { "epoch": 0.5835691893744209, "grad_norm": 0.341796875, "learning_rate": 0.00026762283880350026, "loss": 0.4588, "step": 22990 }, { "epoch": 0.5836961074234367, "grad_norm": 0.33203125, "learning_rate": 0.00026760221598540315, "loss": 0.4814, "step": 22995 }, { "epoch": 0.5838230254724525, "grad_norm": 0.341796875, "learning_rate": 0.00026758158739661206, "loss": 0.4443, "step": 23000 }, { "epoch": 0.5839499435214682, "grad_norm": 0.330078125, "learning_rate": 0.00026756095303813937, "loss": 0.4652, "step": 23005 }, { "epoch": 0.5840768615704839, "grad_norm": 0.365234375, "learning_rate": 0.0002675403129109975, "loss": 0.4878, "step": 23010 }, { "epoch": 0.5842037796194997, "grad_norm": 0.341796875, "learning_rate": 0.0002675196670161993, "loss": 0.4681, "step": 23015 }, { "epoch": 0.5843306976685154, "grad_norm": 0.353515625, "learning_rate": 0.00026749901535475787, "loss": 0.4866, "step": 23020 }, { "epoch": 0.5844576157175312, "grad_norm": 0.353515625, "learning_rate": 0.0002674783579276866, "loss": 0.4698, "step": 23025 }, { "epoch": 0.584584533766547, "grad_norm": 0.322265625, "learning_rate": 0.000267457694735999, "loss": 0.4689, "step": 23030 }, { "epoch": 0.5847114518155627, "grad_norm": 0.34375, "learning_rate": 0.0002674370257807092, "loss": 0.4901, "step": 23035 }, { "epoch": 0.5848383698645785, "grad_norm": 0.33984375, "learning_rate": 0.0002674163510628313, "loss": 0.4345, "step": 23040 }, { "epoch": 0.5849652879135941, "grad_norm": 0.365234375, "learning_rate": 0.0002673956705833798, "loss": 0.4933, "step": 23045 }, { "epoch": 0.5850922059626099, "grad_norm": 0.337890625, "learning_rate": 0.00026737498434336953, "loss": 0.4302, "step": 23050 }, { "epoch": 0.5852191240116257, "grad_norm": 0.33203125, "learning_rate": 0.0002673542923438156, "loss": 0.4447, "step": 23055 }, { "epoch": 0.5853460420606414, "grad_norm": 0.330078125, "learning_rate": 0.00026733359458573316, "loss": 0.4652, "step": 23060 }, { "epoch": 0.5854729601096572, "grad_norm": 0.33203125, "learning_rate": 0.00026731289107013805, "loss": 0.5026, "step": 23065 }, { "epoch": 0.585599878158673, "grad_norm": 0.33203125, "learning_rate": 0.0002672921817980461, "loss": 0.4757, "step": 23070 }, { "epoch": 0.5857267962076887, "grad_norm": 0.5234375, "learning_rate": 0.0002672714667704735, "loss": 0.4956, "step": 23075 }, { "epoch": 0.5858537142567044, "grad_norm": 0.3203125, "learning_rate": 0.00026725074598843673, "loss": 0.4651, "step": 23080 }, { "epoch": 0.5859806323057202, "grad_norm": 0.375, "learning_rate": 0.00026723001945295257, "loss": 0.4863, "step": 23085 }, { "epoch": 0.5861075503547359, "grad_norm": 0.3515625, "learning_rate": 0.0002672092871650381, "loss": 0.4754, "step": 23090 }, { "epoch": 0.5862344684037517, "grad_norm": 0.36328125, "learning_rate": 0.00026718854912571047, "loss": 0.4901, "step": 23095 }, { "epoch": 0.5863613864527675, "grad_norm": 0.296875, "learning_rate": 0.0002671678053359875, "loss": 0.42, "step": 23100 }, { "epoch": 0.5864883045017832, "grad_norm": 0.34375, "learning_rate": 0.00026714705579688704, "loss": 0.5146, "step": 23105 }, { "epoch": 0.5866152225507989, "grad_norm": 0.365234375, "learning_rate": 0.00026712630050942715, "loss": 0.4537, "step": 23110 }, { "epoch": 0.5867421405998147, "grad_norm": 0.341796875, "learning_rate": 0.0002671055394746264, "loss": 0.4749, "step": 23115 }, { "epoch": 0.5868690586488304, "grad_norm": 0.337890625, "learning_rate": 0.0002670847726935034, "loss": 0.4656, "step": 23120 }, { "epoch": 0.5869959766978462, "grad_norm": 0.333984375, "learning_rate": 0.0002670640001670773, "loss": 0.4855, "step": 23125 }, { "epoch": 0.587122894746862, "grad_norm": 0.3515625, "learning_rate": 0.00026704322189636734, "loss": 0.4391, "step": 23130 }, { "epoch": 0.5872498127958777, "grad_norm": 0.34765625, "learning_rate": 0.0002670224378823931, "loss": 0.4772, "step": 23135 }, { "epoch": 0.5873767308448935, "grad_norm": 0.3359375, "learning_rate": 0.0002670016481261744, "loss": 0.4741, "step": 23140 }, { "epoch": 0.5875036488939092, "grad_norm": 0.333984375, "learning_rate": 0.0002669808526287315, "loss": 0.4543, "step": 23145 }, { "epoch": 0.5876305669429249, "grad_norm": 0.30078125, "learning_rate": 0.00026696005139108475, "loss": 0.4439, "step": 23150 }, { "epoch": 0.5877574849919407, "grad_norm": 0.318359375, "learning_rate": 0.00026693924441425484, "loss": 0.4874, "step": 23155 }, { "epoch": 0.5878844030409565, "grad_norm": 0.32421875, "learning_rate": 0.0002669184316992628, "loss": 0.4848, "step": 23160 }, { "epoch": 0.5880113210899722, "grad_norm": 0.3671875, "learning_rate": 0.00026689761324712993, "loss": 0.4841, "step": 23165 }, { "epoch": 0.588138239138988, "grad_norm": 0.341796875, "learning_rate": 0.0002668767890588777, "loss": 0.4595, "step": 23170 }, { "epoch": 0.5882651571880037, "grad_norm": 0.35546875, "learning_rate": 0.000266855959135528, "loss": 0.4651, "step": 23175 }, { "epoch": 0.5883920752370194, "grad_norm": 0.341796875, "learning_rate": 0.00026683512347810295, "loss": 0.5086, "step": 23180 }, { "epoch": 0.5885189932860352, "grad_norm": 0.330078125, "learning_rate": 0.0002668142820876249, "loss": 0.5021, "step": 23185 }, { "epoch": 0.588645911335051, "grad_norm": 0.326171875, "learning_rate": 0.0002667934349651166, "loss": 0.4703, "step": 23190 }, { "epoch": 0.5887728293840667, "grad_norm": 0.3203125, "learning_rate": 0.0002667725821116009, "loss": 0.4502, "step": 23195 }, { "epoch": 0.5888997474330825, "grad_norm": 0.35546875, "learning_rate": 0.00026675172352810117, "loss": 0.4797, "step": 23200 }, { "epoch": 0.5890266654820983, "grad_norm": 0.357421875, "learning_rate": 0.0002667308592156408, "loss": 0.4798, "step": 23205 }, { "epoch": 0.5891535835311139, "grad_norm": 0.345703125, "learning_rate": 0.00026670998917524373, "loss": 0.4643, "step": 23210 }, { "epoch": 0.5892805015801297, "grad_norm": 0.357421875, "learning_rate": 0.000266689113407934, "loss": 0.467, "step": 23215 }, { "epoch": 0.5894074196291454, "grad_norm": 0.34375, "learning_rate": 0.0002666682319147359, "loss": 0.4416, "step": 23220 }, { "epoch": 0.5895343376781612, "grad_norm": 0.3359375, "learning_rate": 0.00026664734469667415, "loss": 0.4655, "step": 23225 }, { "epoch": 0.589661255727177, "grad_norm": 0.33203125, "learning_rate": 0.00026662645175477373, "loss": 0.4736, "step": 23230 }, { "epoch": 0.5897881737761927, "grad_norm": 0.3515625, "learning_rate": 0.0002666055530900597, "loss": 0.4852, "step": 23235 }, { "epoch": 0.5899150918252085, "grad_norm": 0.33203125, "learning_rate": 0.00026658464870355766, "loss": 0.4581, "step": 23240 }, { "epoch": 0.5900420098742242, "grad_norm": 0.296875, "learning_rate": 0.0002665637385962933, "loss": 0.4547, "step": 23245 }, { "epoch": 0.5901689279232399, "grad_norm": 0.333984375, "learning_rate": 0.0002665428227692928, "loss": 0.4296, "step": 23250 }, { "epoch": 0.5902958459722557, "grad_norm": 0.333984375, "learning_rate": 0.0002665219012235824, "loss": 0.4756, "step": 23255 }, { "epoch": 0.5904227640212715, "grad_norm": 0.349609375, "learning_rate": 0.0002665009739601887, "loss": 0.4758, "step": 23260 }, { "epoch": 0.5905496820702872, "grad_norm": 0.345703125, "learning_rate": 0.00026648004098013854, "loss": 0.4467, "step": 23265 }, { "epoch": 0.590676600119303, "grad_norm": 0.3359375, "learning_rate": 0.0002664591022844593, "loss": 0.476, "step": 23270 }, { "epoch": 0.5908035181683187, "grad_norm": 0.337890625, "learning_rate": 0.0002664381578741782, "loss": 0.4787, "step": 23275 }, { "epoch": 0.5909304362173344, "grad_norm": 0.345703125, "learning_rate": 0.00026641720775032315, "loss": 0.4612, "step": 23280 }, { "epoch": 0.5910573542663502, "grad_norm": 0.32421875, "learning_rate": 0.00026639625191392203, "loss": 0.5083, "step": 23285 }, { "epoch": 0.591184272315366, "grad_norm": 0.359375, "learning_rate": 0.0002663752903660032, "loss": 0.4879, "step": 23290 }, { "epoch": 0.5913111903643817, "grad_norm": 0.359375, "learning_rate": 0.00026635432310759526, "loss": 0.4998, "step": 23295 }, { "epoch": 0.5914381084133975, "grad_norm": 0.33984375, "learning_rate": 0.000266333350139727, "loss": 0.4669, "step": 23300 }, { "epoch": 0.5915650264624133, "grad_norm": 0.359375, "learning_rate": 0.00026631237146342767, "loss": 0.4796, "step": 23305 }, { "epoch": 0.5916919445114289, "grad_norm": 0.283203125, "learning_rate": 0.0002662913870797265, "loss": 0.3954, "step": 23310 }, { "epoch": 0.5918188625604447, "grad_norm": 0.337890625, "learning_rate": 0.0002662703969896533, "loss": 0.4808, "step": 23315 }, { "epoch": 0.5919457806094605, "grad_norm": 0.341796875, "learning_rate": 0.00026624940119423813, "loss": 0.4392, "step": 23320 }, { "epoch": 0.5920726986584762, "grad_norm": 0.357421875, "learning_rate": 0.000266228399694511, "loss": 0.4848, "step": 23325 }, { "epoch": 0.592199616707492, "grad_norm": 0.349609375, "learning_rate": 0.0002662073924915027, "loss": 0.4634, "step": 23330 }, { "epoch": 0.5923265347565078, "grad_norm": 0.36328125, "learning_rate": 0.00026618637958624396, "loss": 0.4837, "step": 23335 }, { "epoch": 0.5924534528055235, "grad_norm": 0.3671875, "learning_rate": 0.00026616536097976584, "loss": 0.4414, "step": 23340 }, { "epoch": 0.5925803708545392, "grad_norm": 0.3359375, "learning_rate": 0.00026614433667309967, "loss": 0.468, "step": 23345 }, { "epoch": 0.592707288903555, "grad_norm": 0.333984375, "learning_rate": 0.00026612330666727724, "loss": 0.4502, "step": 23350 }, { "epoch": 0.5928342069525707, "grad_norm": 0.349609375, "learning_rate": 0.0002661022709633304, "loss": 0.4573, "step": 23355 }, { "epoch": 0.5929611250015865, "grad_norm": 0.341796875, "learning_rate": 0.0002660812295622913, "loss": 0.4486, "step": 23360 }, { "epoch": 0.5930880430506023, "grad_norm": 0.333984375, "learning_rate": 0.0002660601824651925, "loss": 0.4868, "step": 23365 }, { "epoch": 0.593214961099618, "grad_norm": 0.35546875, "learning_rate": 0.00026603912967306686, "loss": 0.4971, "step": 23370 }, { "epoch": 0.5933418791486337, "grad_norm": 0.337890625, "learning_rate": 0.0002660180711869473, "loss": 0.4442, "step": 23375 }, { "epoch": 0.5934687971976494, "grad_norm": 0.34375, "learning_rate": 0.00026599700700786724, "loss": 0.4421, "step": 23380 }, { "epoch": 0.5935957152466652, "grad_norm": 0.3359375, "learning_rate": 0.00026597593713686025, "loss": 0.4745, "step": 23385 }, { "epoch": 0.593722633295681, "grad_norm": 0.3359375, "learning_rate": 0.0002659548615749602, "loss": 0.4828, "step": 23390 }, { "epoch": 0.5938495513446967, "grad_norm": 0.380859375, "learning_rate": 0.0002659337803232013, "loss": 0.4783, "step": 23395 }, { "epoch": 0.5939764693937125, "grad_norm": 0.3515625, "learning_rate": 0.00026591269338261796, "loss": 0.4374, "step": 23400 }, { "epoch": 0.5941033874427283, "grad_norm": 0.359375, "learning_rate": 0.0002658916007542449, "loss": 0.4481, "step": 23405 }, { "epoch": 0.5942303054917439, "grad_norm": 0.345703125, "learning_rate": 0.0002658705024391172, "loss": 0.5039, "step": 23410 }, { "epoch": 0.5943572235407597, "grad_norm": 0.3515625, "learning_rate": 0.00026584939843827014, "loss": 0.4799, "step": 23415 }, { "epoch": 0.5944841415897755, "grad_norm": 0.3359375, "learning_rate": 0.0002658282887527392, "loss": 0.4722, "step": 23420 }, { "epoch": 0.5946110596387912, "grad_norm": 0.3125, "learning_rate": 0.00026580717338356023, "loss": 0.4558, "step": 23425 }, { "epoch": 0.594737977687807, "grad_norm": 0.3515625, "learning_rate": 0.00026578605233176946, "loss": 0.5116, "step": 23430 }, { "epoch": 0.5948648957368228, "grad_norm": 0.337890625, "learning_rate": 0.0002657649255984032, "loss": 0.4565, "step": 23435 }, { "epoch": 0.5949918137858384, "grad_norm": 0.34765625, "learning_rate": 0.0002657437931844981, "loss": 0.469, "step": 23440 }, { "epoch": 0.5951187318348542, "grad_norm": 0.328125, "learning_rate": 0.00026572265509109126, "loss": 0.4768, "step": 23445 }, { "epoch": 0.59524564988387, "grad_norm": 0.4296875, "learning_rate": 0.0002657015113192198, "loss": 0.4638, "step": 23450 }, { "epoch": 0.5953725679328857, "grad_norm": 0.326171875, "learning_rate": 0.00026568036186992125, "loss": 0.4468, "step": 23455 }, { "epoch": 0.5954994859819015, "grad_norm": 0.33984375, "learning_rate": 0.0002656592067442334, "loss": 0.4483, "step": 23460 }, { "epoch": 0.5956264040309173, "grad_norm": 0.322265625, "learning_rate": 0.0002656380459431944, "loss": 0.4711, "step": 23465 }, { "epoch": 0.595753322079933, "grad_norm": 0.33984375, "learning_rate": 0.00026561687946784256, "loss": 0.4559, "step": 23470 }, { "epoch": 0.5958802401289487, "grad_norm": 0.361328125, "learning_rate": 0.00026559570731921647, "loss": 0.4913, "step": 23475 }, { "epoch": 0.5960071581779645, "grad_norm": 0.34765625, "learning_rate": 0.000265574529498355, "loss": 0.4753, "step": 23480 }, { "epoch": 0.5961340762269802, "grad_norm": 0.349609375, "learning_rate": 0.00026555334600629753, "loss": 0.4738, "step": 23485 }, { "epoch": 0.596260994275996, "grad_norm": 0.37890625, "learning_rate": 0.00026553215684408337, "loss": 0.4946, "step": 23490 }, { "epoch": 0.5963879123250118, "grad_norm": 0.357421875, "learning_rate": 0.0002655109620127522, "loss": 0.4782, "step": 23495 }, { "epoch": 0.5965148303740275, "grad_norm": 0.376953125, "learning_rate": 0.00026548976151334424, "loss": 0.5069, "step": 23500 }, { "epoch": 0.5966417484230433, "grad_norm": 0.34765625, "learning_rate": 0.0002654685553468997, "loss": 0.4823, "step": 23505 }, { "epoch": 0.596768666472059, "grad_norm": 0.333984375, "learning_rate": 0.0002654473435144591, "loss": 0.4905, "step": 23510 }, { "epoch": 0.5968955845210747, "grad_norm": 0.345703125, "learning_rate": 0.0002654261260170633, "loss": 0.4504, "step": 23515 }, { "epoch": 0.5970225025700905, "grad_norm": 0.326171875, "learning_rate": 0.0002654049028557535, "loss": 0.4562, "step": 23520 }, { "epoch": 0.5971494206191063, "grad_norm": 0.33984375, "learning_rate": 0.0002653836740315711, "loss": 0.4461, "step": 23525 }, { "epoch": 0.597276338668122, "grad_norm": 0.36328125, "learning_rate": 0.0002653624395455578, "loss": 0.476, "step": 23530 }, { "epoch": 0.5974032567171378, "grad_norm": 0.349609375, "learning_rate": 0.00026534119939875553, "loss": 0.4789, "step": 23535 }, { "epoch": 0.5975301747661534, "grad_norm": 0.349609375, "learning_rate": 0.0002653199535922066, "loss": 0.456, "step": 23540 }, { "epoch": 0.5976570928151692, "grad_norm": 0.3203125, "learning_rate": 0.0002652987021269534, "loss": 0.453, "step": 23545 }, { "epoch": 0.597784010864185, "grad_norm": 0.369140625, "learning_rate": 0.00026527744500403884, "loss": 0.4565, "step": 23550 }, { "epoch": 0.5979109289132007, "grad_norm": 0.34375, "learning_rate": 0.000265256182224506, "loss": 0.4806, "step": 23555 }, { "epoch": 0.5980378469622165, "grad_norm": 0.34375, "learning_rate": 0.0002652349137893982, "loss": 0.4726, "step": 23560 }, { "epoch": 0.5981647650112323, "grad_norm": 0.3515625, "learning_rate": 0.000265213639699759, "loss": 0.4966, "step": 23565 }, { "epoch": 0.598291683060248, "grad_norm": 0.322265625, "learning_rate": 0.0002651923599566325, "loss": 0.4515, "step": 23570 }, { "epoch": 0.5984186011092637, "grad_norm": 0.328125, "learning_rate": 0.00026517107456106275, "loss": 0.4648, "step": 23575 }, { "epoch": 0.5985455191582795, "grad_norm": 0.330078125, "learning_rate": 0.00026514978351409427, "loss": 0.4861, "step": 23580 }, { "epoch": 0.5986724372072952, "grad_norm": 0.345703125, "learning_rate": 0.00026512848681677174, "loss": 0.4594, "step": 23585 }, { "epoch": 0.598799355256311, "grad_norm": 0.318359375, "learning_rate": 0.0002651071844701402, "loss": 0.4656, "step": 23590 }, { "epoch": 0.5989262733053268, "grad_norm": 0.341796875, "learning_rate": 0.00026508587647524506, "loss": 0.4746, "step": 23595 }, { "epoch": 0.5990531913543425, "grad_norm": 0.32421875, "learning_rate": 0.00026506456283313174, "loss": 0.4447, "step": 23600 }, { "epoch": 0.5991801094033582, "grad_norm": 0.3359375, "learning_rate": 0.0002650432435448462, "loss": 0.4637, "step": 23605 }, { "epoch": 0.599307027452374, "grad_norm": 0.349609375, "learning_rate": 0.0002650219186114345, "loss": 0.4895, "step": 23610 }, { "epoch": 0.5994339455013897, "grad_norm": 0.3203125, "learning_rate": 0.00026500058803394304, "loss": 0.4486, "step": 23615 }, { "epoch": 0.5995608635504055, "grad_norm": 0.373046875, "learning_rate": 0.00026497925181341857, "loss": 0.5045, "step": 23620 }, { "epoch": 0.5996877815994213, "grad_norm": 0.34765625, "learning_rate": 0.000264957909950908, "loss": 0.4901, "step": 23625 }, { "epoch": 0.599814699648437, "grad_norm": 0.341796875, "learning_rate": 0.00026493656244745863, "loss": 0.4801, "step": 23630 }, { "epoch": 0.5999416176974528, "grad_norm": 0.333984375, "learning_rate": 0.00026491520930411783, "loss": 0.4634, "step": 23635 }, { "epoch": 0.6000685357464685, "grad_norm": 0.34375, "learning_rate": 0.0002648938505219335, "loss": 0.4765, "step": 23640 }, { "epoch": 0.6001954537954842, "grad_norm": 0.337890625, "learning_rate": 0.0002648724861019538, "loss": 0.4503, "step": 23645 }, { "epoch": 0.6003223718445, "grad_norm": 0.333984375, "learning_rate": 0.0002648511160452269, "loss": 0.4674, "step": 23650 }, { "epoch": 0.6004492898935158, "grad_norm": 0.349609375, "learning_rate": 0.00026482974035280153, "loss": 0.4598, "step": 23655 }, { "epoch": 0.6005762079425315, "grad_norm": 0.32421875, "learning_rate": 0.00026480835902572647, "loss": 0.4297, "step": 23660 }, { "epoch": 0.6007031259915473, "grad_norm": 0.322265625, "learning_rate": 0.00026478697206505094, "loss": 0.4429, "step": 23665 }, { "epoch": 0.6008300440405631, "grad_norm": 0.328125, "learning_rate": 0.0002647655794718245, "loss": 0.4635, "step": 23670 }, { "epoch": 0.6009569620895787, "grad_norm": 0.359375, "learning_rate": 0.00026474418124709673, "loss": 0.4663, "step": 23675 }, { "epoch": 0.6010838801385945, "grad_norm": 0.333984375, "learning_rate": 0.00026472277739191777, "loss": 0.4451, "step": 23680 }, { "epoch": 0.6012107981876103, "grad_norm": 0.353515625, "learning_rate": 0.00026470136790733773, "loss": 0.4719, "step": 23685 }, { "epoch": 0.601337716236626, "grad_norm": 0.35546875, "learning_rate": 0.0002646799527944073, "loss": 0.455, "step": 23690 }, { "epoch": 0.6014646342856418, "grad_norm": 0.30078125, "learning_rate": 0.00026465853205417727, "loss": 0.4644, "step": 23695 }, { "epoch": 0.6015915523346576, "grad_norm": 0.330078125, "learning_rate": 0.0002646371056876988, "loss": 0.4622, "step": 23700 }, { "epoch": 0.6017184703836732, "grad_norm": 0.330078125, "learning_rate": 0.0002646156736960231, "loss": 0.4582, "step": 23705 }, { "epoch": 0.601845388432689, "grad_norm": 0.37890625, "learning_rate": 0.00026459423608020204, "loss": 0.4521, "step": 23710 }, { "epoch": 0.6019723064817047, "grad_norm": 0.376953125, "learning_rate": 0.00026457279284128744, "loss": 0.4569, "step": 23715 }, { "epoch": 0.6020992245307205, "grad_norm": 0.361328125, "learning_rate": 0.0002645513439803315, "loss": 0.4705, "step": 23720 }, { "epoch": 0.6022261425797363, "grad_norm": 0.369140625, "learning_rate": 0.0002645298894983868, "loss": 0.4771, "step": 23725 }, { "epoch": 0.602353060628752, "grad_norm": 0.34375, "learning_rate": 0.000264508429396506, "loss": 0.4282, "step": 23730 }, { "epoch": 0.6024799786777678, "grad_norm": 0.349609375, "learning_rate": 0.0002644869636757422, "loss": 0.4733, "step": 23735 }, { "epoch": 0.6026068967267835, "grad_norm": 0.341796875, "learning_rate": 0.00026446549233714867, "loss": 0.442, "step": 23740 }, { "epoch": 0.6027338147757992, "grad_norm": 0.349609375, "learning_rate": 0.00026444401538177904, "loss": 0.4434, "step": 23745 }, { "epoch": 0.602860732824815, "grad_norm": 0.34375, "learning_rate": 0.00026442253281068717, "loss": 0.4632, "step": 23750 }, { "epoch": 0.6029876508738308, "grad_norm": 0.41796875, "learning_rate": 0.00026440104462492717, "loss": 0.4766, "step": 23755 }, { "epoch": 0.6031145689228465, "grad_norm": 0.380859375, "learning_rate": 0.00026437955082555354, "loss": 0.47, "step": 23760 }, { "epoch": 0.6032414869718623, "grad_norm": 0.349609375, "learning_rate": 0.0002643580514136209, "loss": 0.4432, "step": 23765 }, { "epoch": 0.6033684050208781, "grad_norm": 0.36328125, "learning_rate": 0.0002643365463901842, "loss": 0.4451, "step": 23770 }, { "epoch": 0.6034953230698937, "grad_norm": 0.337890625, "learning_rate": 0.00026431503575629874, "loss": 0.4614, "step": 23775 }, { "epoch": 0.6036222411189095, "grad_norm": 0.33203125, "learning_rate": 0.00026429351951302, "loss": 0.474, "step": 23780 }, { "epoch": 0.6037491591679253, "grad_norm": 0.328125, "learning_rate": 0.00026427199766140374, "loss": 0.4667, "step": 23785 }, { "epoch": 0.603876077216941, "grad_norm": 0.349609375, "learning_rate": 0.0002642504702025061, "loss": 0.4599, "step": 23790 }, { "epoch": 0.6040029952659568, "grad_norm": 0.330078125, "learning_rate": 0.00026422893713738346, "loss": 0.4471, "step": 23795 }, { "epoch": 0.6041299133149726, "grad_norm": 0.36328125, "learning_rate": 0.0002642073984670923, "loss": 0.4749, "step": 23800 }, { "epoch": 0.6042568313639882, "grad_norm": 0.333984375, "learning_rate": 0.0002641858541926896, "loss": 0.4301, "step": 23805 }, { "epoch": 0.604383749413004, "grad_norm": 0.357421875, "learning_rate": 0.0002641643043152326, "loss": 0.4883, "step": 23810 }, { "epoch": 0.6045106674620198, "grad_norm": 0.3671875, "learning_rate": 0.0002641427488357786, "loss": 0.4792, "step": 23815 }, { "epoch": 0.6046375855110355, "grad_norm": 0.330078125, "learning_rate": 0.0002641211877553854, "loss": 0.4456, "step": 23820 }, { "epoch": 0.6047645035600513, "grad_norm": 0.326171875, "learning_rate": 0.00026409962107511094, "loss": 0.451, "step": 23825 }, { "epoch": 0.6048914216090671, "grad_norm": 0.333984375, "learning_rate": 0.00026407804879601356, "loss": 0.4705, "step": 23830 }, { "epoch": 0.6050183396580828, "grad_norm": 0.34765625, "learning_rate": 0.00026405647091915174, "loss": 0.4997, "step": 23835 }, { "epoch": 0.6051452577070985, "grad_norm": 0.330078125, "learning_rate": 0.00026403488744558437, "loss": 0.485, "step": 23840 }, { "epoch": 0.6052721757561143, "grad_norm": 0.34765625, "learning_rate": 0.00026401329837637044, "loss": 0.4398, "step": 23845 }, { "epoch": 0.60539909380513, "grad_norm": 0.34375, "learning_rate": 0.0002639917037125694, "loss": 0.405, "step": 23850 }, { "epoch": 0.6055260118541458, "grad_norm": 0.32421875, "learning_rate": 0.00026397010345524083, "loss": 0.4637, "step": 23855 }, { "epoch": 0.6056529299031616, "grad_norm": 0.35546875, "learning_rate": 0.00026394849760544477, "loss": 0.4593, "step": 23860 }, { "epoch": 0.6057798479521773, "grad_norm": 0.357421875, "learning_rate": 0.00026392688616424126, "loss": 0.4565, "step": 23865 }, { "epoch": 0.605906766001193, "grad_norm": 0.322265625, "learning_rate": 0.00026390526913269084, "loss": 0.468, "step": 23870 }, { "epoch": 0.6060336840502087, "grad_norm": 0.341796875, "learning_rate": 0.0002638836465118542, "loss": 0.4699, "step": 23875 }, { "epoch": 0.6061606020992245, "grad_norm": 0.333984375, "learning_rate": 0.00026386201830279246, "loss": 0.4689, "step": 23880 }, { "epoch": 0.6062875201482403, "grad_norm": 0.357421875, "learning_rate": 0.0002638403845065668, "loss": 0.4457, "step": 23885 }, { "epoch": 0.606414438197256, "grad_norm": 0.36328125, "learning_rate": 0.00026381874512423887, "loss": 0.4709, "step": 23890 }, { "epoch": 0.6065413562462718, "grad_norm": 0.341796875, "learning_rate": 0.0002637971001568704, "loss": 0.4546, "step": 23895 }, { "epoch": 0.6066682742952876, "grad_norm": 0.32421875, "learning_rate": 0.00026377544960552355, "loss": 0.4414, "step": 23900 }, { "epoch": 0.6067951923443032, "grad_norm": 0.34765625, "learning_rate": 0.00026375379347126074, "loss": 0.462, "step": 23905 }, { "epoch": 0.606922110393319, "grad_norm": 0.376953125, "learning_rate": 0.0002637321317551446, "loss": 0.4734, "step": 23910 }, { "epoch": 0.6070490284423348, "grad_norm": 0.33203125, "learning_rate": 0.000263710464458238, "loss": 0.4141, "step": 23915 }, { "epoch": 0.6071759464913505, "grad_norm": 0.349609375, "learning_rate": 0.0002636887915816043, "loss": 0.4538, "step": 23920 }, { "epoch": 0.6073028645403663, "grad_norm": 0.37109375, "learning_rate": 0.0002636671131263068, "loss": 0.496, "step": 23925 }, { "epoch": 0.6074297825893821, "grad_norm": 0.359375, "learning_rate": 0.00026364542909340936, "loss": 0.475, "step": 23930 }, { "epoch": 0.6075567006383978, "grad_norm": 0.337890625, "learning_rate": 0.000263623739483976, "loss": 0.4777, "step": 23935 }, { "epoch": 0.6076836186874135, "grad_norm": 0.373046875, "learning_rate": 0.000263602044299071, "loss": 0.4716, "step": 23940 }, { "epoch": 0.6078105367364293, "grad_norm": 0.462890625, "learning_rate": 0.00026358034353975894, "loss": 0.4647, "step": 23945 }, { "epoch": 0.607937454785445, "grad_norm": 0.396484375, "learning_rate": 0.0002635586372071047, "loss": 0.4635, "step": 23950 }, { "epoch": 0.6080643728344608, "grad_norm": 0.34375, "learning_rate": 0.0002635369253021733, "loss": 0.443, "step": 23955 }, { "epoch": 0.6081912908834766, "grad_norm": 0.345703125, "learning_rate": 0.00026351520782603023, "loss": 0.4273, "step": 23960 }, { "epoch": 0.6083182089324923, "grad_norm": 0.359375, "learning_rate": 0.0002634934847797411, "loss": 0.4791, "step": 23965 }, { "epoch": 0.608445126981508, "grad_norm": 0.353515625, "learning_rate": 0.00026347175616437193, "loss": 0.474, "step": 23970 }, { "epoch": 0.6085720450305238, "grad_norm": 0.337890625, "learning_rate": 0.00026345002198098884, "loss": 0.4373, "step": 23975 }, { "epoch": 0.6086989630795395, "grad_norm": 0.337890625, "learning_rate": 0.0002634282822306584, "loss": 0.4509, "step": 23980 }, { "epoch": 0.6088258811285553, "grad_norm": 0.345703125, "learning_rate": 0.00026340653691444736, "loss": 0.4503, "step": 23985 }, { "epoch": 0.6089527991775711, "grad_norm": 0.341796875, "learning_rate": 0.0002633847860334227, "loss": 0.4587, "step": 23990 }, { "epoch": 0.6090797172265868, "grad_norm": 0.33984375, "learning_rate": 0.0002633630295886517, "loss": 0.4573, "step": 23995 }, { "epoch": 0.6092066352756026, "grad_norm": 0.3515625, "learning_rate": 0.00026334126758120204, "loss": 0.4821, "step": 24000 }, { "epoch": 0.6093335533246182, "grad_norm": 0.349609375, "learning_rate": 0.00026331950001214153, "loss": 0.4676, "step": 24005 }, { "epoch": 0.609460471373634, "grad_norm": 0.337890625, "learning_rate": 0.0002632977268825383, "loss": 0.4648, "step": 24010 }, { "epoch": 0.6095873894226498, "grad_norm": 0.345703125, "learning_rate": 0.00026327594819346075, "loss": 0.413, "step": 24015 }, { "epoch": 0.6097143074716656, "grad_norm": 0.33984375, "learning_rate": 0.0002632541639459775, "loss": 0.4506, "step": 24020 }, { "epoch": 0.6098412255206813, "grad_norm": 0.328125, "learning_rate": 0.0002632323741411576, "loss": 0.4678, "step": 24025 }, { "epoch": 0.6099681435696971, "grad_norm": 0.341796875, "learning_rate": 0.00026321057878007014, "loss": 0.4233, "step": 24030 }, { "epoch": 0.6100950616187127, "grad_norm": 0.345703125, "learning_rate": 0.00026318877786378475, "loss": 0.4782, "step": 24035 }, { "epoch": 0.6102219796677285, "grad_norm": 0.341796875, "learning_rate": 0.0002631669713933711, "loss": 0.4529, "step": 24040 }, { "epoch": 0.6103488977167443, "grad_norm": 0.345703125, "learning_rate": 0.0002631451593698992, "loss": 0.4548, "step": 24045 }, { "epoch": 0.61047581576576, "grad_norm": 0.359375, "learning_rate": 0.0002631233417944394, "loss": 0.4413, "step": 24050 }, { "epoch": 0.6106027338147758, "grad_norm": 0.32421875, "learning_rate": 0.00026310151866806234, "loss": 0.4288, "step": 24055 }, { "epoch": 0.6107296518637916, "grad_norm": 0.357421875, "learning_rate": 0.0002630796899918388, "loss": 0.4759, "step": 24060 }, { "epoch": 0.6108565699128073, "grad_norm": 0.337890625, "learning_rate": 0.00026305785576683985, "loss": 0.4502, "step": 24065 }, { "epoch": 0.610983487961823, "grad_norm": 0.330078125, "learning_rate": 0.00026303601599413697, "loss": 0.4414, "step": 24070 }, { "epoch": 0.6111104060108388, "grad_norm": 0.357421875, "learning_rate": 0.0002630141706748018, "loss": 0.4677, "step": 24075 }, { "epoch": 0.6112373240598545, "grad_norm": 0.376953125, "learning_rate": 0.0002629923198099064, "loss": 0.4896, "step": 24080 }, { "epoch": 0.6113642421088703, "grad_norm": 0.31640625, "learning_rate": 0.0002629704634005228, "loss": 0.4601, "step": 24085 }, { "epoch": 0.6114911601578861, "grad_norm": 0.318359375, "learning_rate": 0.00026294860144772354, "loss": 0.4654, "step": 24090 }, { "epoch": 0.6116180782069018, "grad_norm": 0.333984375, "learning_rate": 0.00026292673395258146, "loss": 0.4469, "step": 24095 }, { "epoch": 0.6117449962559176, "grad_norm": 0.341796875, "learning_rate": 0.00026290486091616947, "loss": 0.4678, "step": 24100 }, { "epoch": 0.6118719143049333, "grad_norm": 0.37109375, "learning_rate": 0.00026288298233956097, "loss": 0.47, "step": 24105 }, { "epoch": 0.611998832353949, "grad_norm": 0.341796875, "learning_rate": 0.0002628610982238295, "loss": 0.4526, "step": 24110 }, { "epoch": 0.6121257504029648, "grad_norm": 0.36328125, "learning_rate": 0.00026283920857004883, "loss": 0.4802, "step": 24115 }, { "epoch": 0.6122526684519806, "grad_norm": 0.34765625, "learning_rate": 0.00026281731337929326, "loss": 0.4704, "step": 24120 }, { "epoch": 0.6123795865009963, "grad_norm": 0.3359375, "learning_rate": 0.00026279541265263696, "loss": 0.467, "step": 24125 }, { "epoch": 0.6125065045500121, "grad_norm": 0.341796875, "learning_rate": 0.0002627735063911547, "loss": 0.4691, "step": 24130 }, { "epoch": 0.6126334225990278, "grad_norm": 0.345703125, "learning_rate": 0.00026275159459592147, "loss": 0.4368, "step": 24135 }, { "epoch": 0.6127603406480435, "grad_norm": 0.40625, "learning_rate": 0.00026272967726801235, "loss": 0.4918, "step": 24140 }, { "epoch": 0.6128872586970593, "grad_norm": 0.345703125, "learning_rate": 0.0002627077544085029, "loss": 0.4626, "step": 24145 }, { "epoch": 0.6130141767460751, "grad_norm": 0.32421875, "learning_rate": 0.00026268582601846876, "loss": 0.4538, "step": 24150 }, { "epoch": 0.6131410947950908, "grad_norm": 0.3046875, "learning_rate": 0.0002626638920989861, "loss": 0.4498, "step": 24155 }, { "epoch": 0.6132680128441066, "grad_norm": 0.2890625, "learning_rate": 0.00026264195265113114, "loss": 0.4131, "step": 24160 }, { "epoch": 0.6133949308931224, "grad_norm": 0.337890625, "learning_rate": 0.00026262000767598043, "loss": 0.4521, "step": 24165 }, { "epoch": 0.613521848942138, "grad_norm": 0.341796875, "learning_rate": 0.00026259805717461076, "loss": 0.4942, "step": 24170 }, { "epoch": 0.6136487669911538, "grad_norm": 0.361328125, "learning_rate": 0.0002625761011480993, "loss": 0.444, "step": 24175 }, { "epoch": 0.6137756850401695, "grad_norm": 0.341796875, "learning_rate": 0.0002625541395975234, "loss": 0.4747, "step": 24180 }, { "epoch": 0.6139026030891853, "grad_norm": 0.349609375, "learning_rate": 0.0002625321725239607, "loss": 0.4424, "step": 24185 }, { "epoch": 0.6140295211382011, "grad_norm": 0.333984375, "learning_rate": 0.00026251019992848916, "loss": 0.4565, "step": 24190 }, { "epoch": 0.6141564391872169, "grad_norm": 0.322265625, "learning_rate": 0.0002624882218121869, "loss": 0.4434, "step": 24195 }, { "epoch": 0.6142833572362326, "grad_norm": 0.34375, "learning_rate": 0.00026246623817613234, "loss": 0.4541, "step": 24200 }, { "epoch": 0.6144102752852483, "grad_norm": 1.5546875, "learning_rate": 0.0002624442490214044, "loss": 0.4885, "step": 24205 }, { "epoch": 0.614537193334264, "grad_norm": 0.33984375, "learning_rate": 0.00026242225434908184, "loss": 0.457, "step": 24210 }, { "epoch": 0.6146641113832798, "grad_norm": 0.333984375, "learning_rate": 0.0002624002541602441, "loss": 0.4522, "step": 24215 }, { "epoch": 0.6147910294322956, "grad_norm": 0.357421875, "learning_rate": 0.00026237824845597066, "loss": 0.4631, "step": 24220 }, { "epoch": 0.6149179474813113, "grad_norm": 0.345703125, "learning_rate": 0.00026235623723734136, "loss": 0.4617, "step": 24225 }, { "epoch": 0.6150448655303271, "grad_norm": 0.322265625, "learning_rate": 0.0002623342205054362, "loss": 0.4553, "step": 24230 }, { "epoch": 0.6151717835793428, "grad_norm": 0.37890625, "learning_rate": 0.0002623121982613356, "loss": 0.4731, "step": 24235 }, { "epoch": 0.6152987016283585, "grad_norm": 0.328125, "learning_rate": 0.00026229017050612017, "loss": 0.4538, "step": 24240 }, { "epoch": 0.6154256196773743, "grad_norm": 0.345703125, "learning_rate": 0.0002622681372408708, "loss": 0.4496, "step": 24245 }, { "epoch": 0.6155525377263901, "grad_norm": 0.3359375, "learning_rate": 0.0002622460984666687, "loss": 0.4559, "step": 24250 }, { "epoch": 0.6156794557754058, "grad_norm": 0.33984375, "learning_rate": 0.00026222405418459515, "loss": 0.4525, "step": 24255 }, { "epoch": 0.6158063738244216, "grad_norm": 0.322265625, "learning_rate": 0.00026220200439573205, "loss": 0.4616, "step": 24260 }, { "epoch": 0.6159332918734374, "grad_norm": 0.3203125, "learning_rate": 0.0002621799491011613, "loss": 0.4726, "step": 24265 }, { "epoch": 0.616060209922453, "grad_norm": 0.365234375, "learning_rate": 0.00026215788830196514, "loss": 0.4486, "step": 24270 }, { "epoch": 0.6161871279714688, "grad_norm": 0.318359375, "learning_rate": 0.000262135821999226, "loss": 0.4414, "step": 24275 }, { "epoch": 0.6163140460204846, "grad_norm": 0.3671875, "learning_rate": 0.0002621137501940268, "loss": 0.4795, "step": 24280 }, { "epoch": 0.6164409640695003, "grad_norm": 0.330078125, "learning_rate": 0.0002620916728874505, "loss": 0.4795, "step": 24285 }, { "epoch": 0.6165678821185161, "grad_norm": 0.341796875, "learning_rate": 0.00026206959008058056, "loss": 0.4559, "step": 24290 }, { "epoch": 0.6166948001675319, "grad_norm": 0.3671875, "learning_rate": 0.0002620475017745004, "loss": 0.4512, "step": 24295 }, { "epoch": 0.6168217182165475, "grad_norm": 0.33984375, "learning_rate": 0.0002620254079702939, "loss": 0.4478, "step": 24300 }, { "epoch": 0.6169486362655633, "grad_norm": 0.296875, "learning_rate": 0.00026200330866904537, "loss": 0.4705, "step": 24305 }, { "epoch": 0.6170755543145791, "grad_norm": 0.326171875, "learning_rate": 0.00026198120387183906, "loss": 0.4749, "step": 24310 }, { "epoch": 0.6172024723635948, "grad_norm": 0.314453125, "learning_rate": 0.0002619590935797597, "loss": 0.4427, "step": 24315 }, { "epoch": 0.6173293904126106, "grad_norm": 0.326171875, "learning_rate": 0.0002619369777938921, "loss": 0.4512, "step": 24320 }, { "epoch": 0.6174563084616264, "grad_norm": 0.33984375, "learning_rate": 0.00026191485651532175, "loss": 0.454, "step": 24325 }, { "epoch": 0.6175832265106421, "grad_norm": 0.34765625, "learning_rate": 0.0002618927297451339, "loss": 0.4461, "step": 24330 }, { "epoch": 0.6177101445596578, "grad_norm": 0.361328125, "learning_rate": 0.00026187059748441436, "loss": 0.4679, "step": 24335 }, { "epoch": 0.6178370626086735, "grad_norm": 0.34375, "learning_rate": 0.00026184845973424917, "loss": 0.4579, "step": 24340 }, { "epoch": 0.6179639806576893, "grad_norm": 0.34765625, "learning_rate": 0.00026182631649572464, "loss": 0.4783, "step": 24345 }, { "epoch": 0.6180908987067051, "grad_norm": 0.337890625, "learning_rate": 0.0002618041677699273, "loss": 0.469, "step": 24350 }, { "epoch": 0.6182178167557209, "grad_norm": 0.3359375, "learning_rate": 0.00026178201355794395, "loss": 0.4675, "step": 24355 }, { "epoch": 0.6183447348047366, "grad_norm": 0.31640625, "learning_rate": 0.0002617598538608617, "loss": 0.4495, "step": 24360 }, { "epoch": 0.6184716528537524, "grad_norm": 0.3515625, "learning_rate": 0.00026173768867976806, "loss": 0.4715, "step": 24365 }, { "epoch": 0.618598570902768, "grad_norm": 0.326171875, "learning_rate": 0.00026171551801575045, "loss": 0.4326, "step": 24370 }, { "epoch": 0.6187254889517838, "grad_norm": 0.337890625, "learning_rate": 0.0002616933418698969, "loss": 0.4453, "step": 24375 }, { "epoch": 0.6188524070007996, "grad_norm": 0.337890625, "learning_rate": 0.0002616711602432955, "loss": 0.4706, "step": 24380 }, { "epoch": 0.6189793250498153, "grad_norm": 0.32421875, "learning_rate": 0.00026164897313703483, "loss": 0.442, "step": 24385 }, { "epoch": 0.6191062430988311, "grad_norm": 0.3359375, "learning_rate": 0.00026162678055220354, "loss": 0.4656, "step": 24390 }, { "epoch": 0.6192331611478469, "grad_norm": 0.328125, "learning_rate": 0.0002616045824898905, "loss": 0.4805, "step": 24395 }, { "epoch": 0.6193600791968625, "grad_norm": 0.34765625, "learning_rate": 0.0002615823789511851, "loss": 0.4639, "step": 24400 }, { "epoch": 0.6194869972458783, "grad_norm": 0.333984375, "learning_rate": 0.00026156016993717683, "loss": 0.446, "step": 24405 }, { "epoch": 0.6196139152948941, "grad_norm": 0.251953125, "learning_rate": 0.0002615379554489554, "loss": 0.4413, "step": 24410 }, { "epoch": 0.6197408333439098, "grad_norm": 0.337890625, "learning_rate": 0.00026151573548761095, "loss": 0.4477, "step": 24415 }, { "epoch": 0.6198677513929256, "grad_norm": 0.31640625, "learning_rate": 0.00026149351005423383, "loss": 0.4664, "step": 24420 }, { "epoch": 0.6199946694419414, "grad_norm": 0.361328125, "learning_rate": 0.00026147127914991455, "loss": 0.4755, "step": 24425 }, { "epoch": 0.6201215874909571, "grad_norm": 0.36328125, "learning_rate": 0.000261449042775744, "loss": 0.4563, "step": 24430 }, { "epoch": 0.6202485055399728, "grad_norm": 0.34375, "learning_rate": 0.00026142680093281333, "loss": 0.4994, "step": 24435 }, { "epoch": 0.6203754235889886, "grad_norm": 0.31640625, "learning_rate": 0.00026140455362221396, "loss": 0.4692, "step": 24440 }, { "epoch": 0.6205023416380043, "grad_norm": 0.33984375, "learning_rate": 0.0002613823008450374, "loss": 0.458, "step": 24445 }, { "epoch": 0.6206292596870201, "grad_norm": 0.32421875, "learning_rate": 0.0002613600426023758, "loss": 0.4224, "step": 24450 }, { "epoch": 0.6207561777360359, "grad_norm": 0.322265625, "learning_rate": 0.0002613377788953213, "loss": 0.4863, "step": 24455 }, { "epoch": 0.6208830957850516, "grad_norm": 0.38671875, "learning_rate": 0.00026131550972496626, "loss": 0.4904, "step": 24460 }, { "epoch": 0.6210100138340674, "grad_norm": 0.322265625, "learning_rate": 0.00026129323509240356, "loss": 0.4503, "step": 24465 }, { "epoch": 0.621136931883083, "grad_norm": 0.34765625, "learning_rate": 0.00026127095499872607, "loss": 0.43, "step": 24470 }, { "epoch": 0.6212638499320988, "grad_norm": 0.35546875, "learning_rate": 0.0002612486694450272, "loss": 0.496, "step": 24475 }, { "epoch": 0.6213907679811146, "grad_norm": 0.330078125, "learning_rate": 0.0002612263784324004, "loss": 0.4346, "step": 24480 }, { "epoch": 0.6215176860301304, "grad_norm": 0.34765625, "learning_rate": 0.0002612040819619396, "loss": 0.4908, "step": 24485 }, { "epoch": 0.6216446040791461, "grad_norm": 36.25, "learning_rate": 0.0002611817800347387, "loss": 0.8336, "step": 24490 }, { "epoch": 0.6217715221281619, "grad_norm": 0.33984375, "learning_rate": 0.0002611594726518922, "loss": 0.4871, "step": 24495 }, { "epoch": 0.6218984401771775, "grad_norm": 0.34765625, "learning_rate": 0.00026113715981449464, "loss": 0.4892, "step": 24500 }, { "epoch": 0.6220253582261933, "grad_norm": 0.34765625, "learning_rate": 0.0002611148415236409, "loss": 0.4479, "step": 24505 }, { "epoch": 0.6221522762752091, "grad_norm": 0.310546875, "learning_rate": 0.00026109251778042616, "loss": 0.4675, "step": 24510 }, { "epoch": 0.6222791943242248, "grad_norm": 0.330078125, "learning_rate": 0.0002610701885859458, "loss": 0.4724, "step": 24515 }, { "epoch": 0.6224061123732406, "grad_norm": 0.359375, "learning_rate": 0.0002610478539412956, "loss": 0.4506, "step": 24520 }, { "epoch": 0.6225330304222564, "grad_norm": 0.357421875, "learning_rate": 0.0002610255138475714, "loss": 0.4732, "step": 24525 }, { "epoch": 0.6226599484712722, "grad_norm": 0.345703125, "learning_rate": 0.00026100316830586945, "loss": 0.473, "step": 24530 }, { "epoch": 0.6227868665202878, "grad_norm": 0.322265625, "learning_rate": 0.00026098081731728624, "loss": 0.4473, "step": 24535 }, { "epoch": 0.6229137845693036, "grad_norm": 0.328125, "learning_rate": 0.00026095846088291857, "loss": 0.4751, "step": 24540 }, { "epoch": 0.6230407026183193, "grad_norm": 0.32421875, "learning_rate": 0.00026093609900386345, "loss": 0.4973, "step": 24545 }, { "epoch": 0.6231676206673351, "grad_norm": 0.341796875, "learning_rate": 0.00026091373168121804, "loss": 0.4343, "step": 24550 }, { "epoch": 0.6232945387163509, "grad_norm": 0.341796875, "learning_rate": 0.0002608913589160801, "loss": 0.4486, "step": 24555 }, { "epoch": 0.6234214567653666, "grad_norm": 0.32421875, "learning_rate": 0.0002608689807095473, "loss": 0.4887, "step": 24560 }, { "epoch": 0.6235483748143823, "grad_norm": 0.310546875, "learning_rate": 0.00026084659706271775, "loss": 0.4383, "step": 24565 }, { "epoch": 0.6236752928633981, "grad_norm": 0.384765625, "learning_rate": 0.0002608242079766899, "loss": 0.495, "step": 24570 }, { "epoch": 0.6238022109124138, "grad_norm": 0.353515625, "learning_rate": 0.00026080181345256227, "loss": 0.4821, "step": 24575 }, { "epoch": 0.6239291289614296, "grad_norm": 0.34375, "learning_rate": 0.0002607794134914338, "loss": 0.4569, "step": 24580 }, { "epoch": 0.6240560470104454, "grad_norm": 0.3359375, "learning_rate": 0.00026075700809440364, "loss": 0.4581, "step": 24585 }, { "epoch": 0.6241829650594611, "grad_norm": 0.34375, "learning_rate": 0.0002607345972625712, "loss": 0.4799, "step": 24590 }, { "epoch": 0.6243098831084769, "grad_norm": 0.3359375, "learning_rate": 0.0002607121809970362, "loss": 0.4539, "step": 24595 }, { "epoch": 0.6244368011574926, "grad_norm": 0.326171875, "learning_rate": 0.0002606897592988986, "loss": 0.457, "step": 24600 }, { "epoch": 0.6245637192065083, "grad_norm": 0.373046875, "learning_rate": 0.0002606673321692586, "loss": 0.4872, "step": 24605 }, { "epoch": 0.6246906372555241, "grad_norm": 0.353515625, "learning_rate": 0.00026064489960921664, "loss": 0.5024, "step": 24610 }, { "epoch": 0.6248175553045399, "grad_norm": 0.31640625, "learning_rate": 0.0002606224616198736, "loss": 0.4549, "step": 24615 }, { "epoch": 0.6249444733535556, "grad_norm": 0.3359375, "learning_rate": 0.00026060001820233037, "loss": 0.4503, "step": 24620 }, { "epoch": 0.6250713914025714, "grad_norm": 0.3203125, "learning_rate": 0.00026057756935768834, "loss": 0.4468, "step": 24625 }, { "epoch": 0.6251983094515872, "grad_norm": 0.345703125, "learning_rate": 0.00026055511508704904, "loss": 0.4534, "step": 24630 }, { "epoch": 0.6253252275006028, "grad_norm": 0.333984375, "learning_rate": 0.00026053265539151423, "loss": 0.4499, "step": 24635 }, { "epoch": 0.6254521455496186, "grad_norm": 0.3359375, "learning_rate": 0.00026051019027218607, "loss": 0.4545, "step": 24640 }, { "epoch": 0.6255790635986344, "grad_norm": 0.33984375, "learning_rate": 0.0002604877197301669, "loss": 0.4655, "step": 24645 }, { "epoch": 0.6257059816476501, "grad_norm": 0.37109375, "learning_rate": 0.0002604652437665594, "loss": 0.4716, "step": 24650 }, { "epoch": 0.6258328996966659, "grad_norm": 0.341796875, "learning_rate": 0.0002604427623824663, "loss": 0.4532, "step": 24655 }, { "epoch": 0.6259598177456817, "grad_norm": 0.326171875, "learning_rate": 0.0002604202755789908, "loss": 0.4658, "step": 24660 }, { "epoch": 0.6260867357946973, "grad_norm": 0.333984375, "learning_rate": 0.0002603977833572365, "loss": 0.4563, "step": 24665 }, { "epoch": 0.6262136538437131, "grad_norm": 0.32421875, "learning_rate": 0.00026037528571830684, "loss": 0.447, "step": 24670 }, { "epoch": 0.6263405718927288, "grad_norm": 0.341796875, "learning_rate": 0.00026035278266330595, "loss": 0.4958, "step": 24675 }, { "epoch": 0.6264674899417446, "grad_norm": 0.36328125, "learning_rate": 0.0002603302741933379, "loss": 0.4605, "step": 24680 }, { "epoch": 0.6265944079907604, "grad_norm": 0.33984375, "learning_rate": 0.00026030776030950734, "loss": 0.4861, "step": 24685 }, { "epoch": 0.6267213260397761, "grad_norm": 0.326171875, "learning_rate": 0.0002602852410129189, "loss": 0.4755, "step": 24690 }, { "epoch": 0.6268482440887919, "grad_norm": 0.341796875, "learning_rate": 0.00026026271630467756, "loss": 0.4664, "step": 24695 }, { "epoch": 0.6269751621378076, "grad_norm": 0.328125, "learning_rate": 0.00026024018618588866, "loss": 0.4268, "step": 24700 }, { "epoch": 0.6271020801868233, "grad_norm": 0.3203125, "learning_rate": 0.0002602176506576578, "loss": 0.4653, "step": 24705 }, { "epoch": 0.6272289982358391, "grad_norm": 0.314453125, "learning_rate": 0.0002601951097210906, "loss": 0.4497, "step": 24710 }, { "epoch": 0.6273559162848549, "grad_norm": 0.322265625, "learning_rate": 0.0002601725633772934, "loss": 0.4368, "step": 24715 }, { "epoch": 0.6274828343338706, "grad_norm": 0.337890625, "learning_rate": 0.0002601500116273723, "loss": 0.4467, "step": 24720 }, { "epoch": 0.6276097523828864, "grad_norm": 0.345703125, "learning_rate": 0.00026012745447243405, "loss": 0.4847, "step": 24725 }, { "epoch": 0.6277366704319021, "grad_norm": 0.33984375, "learning_rate": 0.0002601048919135855, "loss": 0.4609, "step": 24730 }, { "epoch": 0.6278635884809178, "grad_norm": 0.359375, "learning_rate": 0.0002600823239519337, "loss": 0.4932, "step": 24735 }, { "epoch": 0.6279905065299336, "grad_norm": 0.3203125, "learning_rate": 0.00026005975058858617, "loss": 0.464, "step": 24740 }, { "epoch": 0.6281174245789494, "grad_norm": 0.326171875, "learning_rate": 0.00026003717182465055, "loss": 0.4816, "step": 24745 }, { "epoch": 0.6282443426279651, "grad_norm": 0.361328125, "learning_rate": 0.0002600145876612347, "loss": 0.4691, "step": 24750 }, { "epoch": 0.6283712606769809, "grad_norm": 0.349609375, "learning_rate": 0.00025999199809944684, "loss": 0.5121, "step": 24755 }, { "epoch": 0.6284981787259967, "grad_norm": 0.37890625, "learning_rate": 0.00025996940314039545, "loss": 0.4868, "step": 24760 }, { "epoch": 0.6286250967750123, "grad_norm": 0.35546875, "learning_rate": 0.00025994680278518923, "loss": 0.4493, "step": 24765 }, { "epoch": 0.6287520148240281, "grad_norm": 0.384765625, "learning_rate": 0.0002599241970349373, "loss": 0.4482, "step": 24770 }, { "epoch": 0.6288789328730439, "grad_norm": 0.291015625, "learning_rate": 0.00025990158589074867, "loss": 0.4141, "step": 24775 }, { "epoch": 0.6290058509220596, "grad_norm": 0.337890625, "learning_rate": 0.0002598789693537331, "loss": 0.4587, "step": 24780 }, { "epoch": 0.6291327689710754, "grad_norm": 0.3671875, "learning_rate": 0.0002598563474250002, "loss": 0.4702, "step": 24785 }, { "epoch": 0.6292596870200912, "grad_norm": 0.3359375, "learning_rate": 0.0002598337201056601, "loss": 0.478, "step": 24790 }, { "epoch": 0.6293866050691069, "grad_norm": 0.494140625, "learning_rate": 0.0002598110873968232, "loss": 0.4776, "step": 24795 }, { "epoch": 0.6295135231181226, "grad_norm": 0.359375, "learning_rate": 0.0002597884492995999, "loss": 0.4743, "step": 24800 }, { "epoch": 0.6296404411671384, "grad_norm": 0.353515625, "learning_rate": 0.00025976580581510113, "loss": 0.4663, "step": 24805 }, { "epoch": 0.6297673592161541, "grad_norm": 0.365234375, "learning_rate": 0.000259743156944438, "loss": 0.4882, "step": 24810 }, { "epoch": 0.6298942772651699, "grad_norm": 0.36328125, "learning_rate": 0.0002597205026887219, "loss": 0.4687, "step": 24815 }, { "epoch": 0.6300211953141857, "grad_norm": 0.32421875, "learning_rate": 0.0002596978430490644, "loss": 0.4983, "step": 24820 }, { "epoch": 0.6301481133632014, "grad_norm": 0.33203125, "learning_rate": 0.0002596751780265775, "loss": 0.4726, "step": 24825 }, { "epoch": 0.6302750314122171, "grad_norm": 0.322265625, "learning_rate": 0.00025965250762237325, "loss": 0.4731, "step": 24830 }, { "epoch": 0.6304019494612328, "grad_norm": 0.322265625, "learning_rate": 0.0002596298318375641, "loss": 0.4719, "step": 24835 }, { "epoch": 0.6305288675102486, "grad_norm": 0.3515625, "learning_rate": 0.00025960715067326284, "loss": 0.4338, "step": 24840 }, { "epoch": 0.6306557855592644, "grad_norm": 0.345703125, "learning_rate": 0.0002595844641305823, "loss": 0.4692, "step": 24845 }, { "epoch": 0.6307827036082801, "grad_norm": 0.337890625, "learning_rate": 0.0002595617722106358, "loss": 0.4385, "step": 24850 }, { "epoch": 0.6309096216572959, "grad_norm": 0.328125, "learning_rate": 0.0002595390749145368, "loss": 0.421, "step": 24855 }, { "epoch": 0.6310365397063117, "grad_norm": 0.3359375, "learning_rate": 0.000259516372243399, "loss": 0.4671, "step": 24860 }, { "epoch": 0.6311634577553273, "grad_norm": 0.330078125, "learning_rate": 0.0002594936641983364, "loss": 0.4692, "step": 24865 }, { "epoch": 0.6312903758043431, "grad_norm": 0.322265625, "learning_rate": 0.00025947095078046335, "loss": 0.4347, "step": 24870 }, { "epoch": 0.6314172938533589, "grad_norm": 0.3828125, "learning_rate": 0.00025944823199089434, "loss": 0.4899, "step": 24875 }, { "epoch": 0.6315442119023746, "grad_norm": 0.3515625, "learning_rate": 0.00025942550783074423, "loss": 0.4783, "step": 24880 }, { "epoch": 0.6316711299513904, "grad_norm": 0.3359375, "learning_rate": 0.00025940277830112797, "loss": 0.4588, "step": 24885 }, { "epoch": 0.6317980480004062, "grad_norm": 0.34765625, "learning_rate": 0.000259380043403161, "loss": 0.48, "step": 24890 }, { "epoch": 0.6319249660494219, "grad_norm": 0.353515625, "learning_rate": 0.0002593573031379589, "loss": 0.4761, "step": 24895 }, { "epoch": 0.6320518840984376, "grad_norm": 0.330078125, "learning_rate": 0.00025933455750663743, "loss": 0.4584, "step": 24900 }, { "epoch": 0.6321788021474534, "grad_norm": 0.359375, "learning_rate": 0.0002593118065103128, "loss": 0.4943, "step": 24905 }, { "epoch": 0.6323057201964691, "grad_norm": 0.3203125, "learning_rate": 0.0002592890501501014, "loss": 0.4362, "step": 24910 }, { "epoch": 0.6324326382454849, "grad_norm": 0.412109375, "learning_rate": 0.0002592662884271198, "loss": 0.4488, "step": 24915 }, { "epoch": 0.6325595562945007, "grad_norm": 0.34765625, "learning_rate": 0.00025924352134248495, "loss": 0.4673, "step": 24920 }, { "epoch": 0.6326864743435164, "grad_norm": 0.328125, "learning_rate": 0.000259220748897314, "loss": 0.4458, "step": 24925 }, { "epoch": 0.6328133923925321, "grad_norm": 0.365234375, "learning_rate": 0.0002591979710927245, "loss": 0.477, "step": 24930 }, { "epoch": 0.6329403104415479, "grad_norm": 0.35546875, "learning_rate": 0.000259175187929834, "loss": 0.4641, "step": 24935 }, { "epoch": 0.6330672284905636, "grad_norm": 0.341796875, "learning_rate": 0.00025915239940976054, "loss": 0.4577, "step": 24940 }, { "epoch": 0.6331941465395794, "grad_norm": 0.359375, "learning_rate": 0.0002591296055336223, "loss": 0.4944, "step": 24945 }, { "epoch": 0.6333210645885952, "grad_norm": 0.36328125, "learning_rate": 0.0002591068063025378, "loss": 0.4478, "step": 24950 }, { "epoch": 0.6334479826376109, "grad_norm": 0.341796875, "learning_rate": 0.00025908400171762574, "loss": 0.4501, "step": 24955 }, { "epoch": 0.6335749006866267, "grad_norm": 0.328125, "learning_rate": 0.0002590611917800052, "loss": 0.4586, "step": 24960 }, { "epoch": 0.6337018187356424, "grad_norm": 0.376953125, "learning_rate": 0.0002590383764907954, "loss": 0.4698, "step": 24965 }, { "epoch": 0.6338287367846581, "grad_norm": 0.361328125, "learning_rate": 0.0002590155558511159, "loss": 0.4808, "step": 24970 }, { "epoch": 0.6339556548336739, "grad_norm": 0.353515625, "learning_rate": 0.00025899272986208656, "loss": 0.4592, "step": 24975 }, { "epoch": 0.6340825728826897, "grad_norm": 0.310546875, "learning_rate": 0.00025896989852482736, "loss": 0.4538, "step": 24980 }, { "epoch": 0.6342094909317054, "grad_norm": 0.30859375, "learning_rate": 0.00025894706184045863, "loss": 0.4419, "step": 24985 }, { "epoch": 0.6343364089807212, "grad_norm": 0.3359375, "learning_rate": 0.000258924219810101, "loss": 0.4527, "step": 24990 }, { "epoch": 0.6344633270297368, "grad_norm": 0.365234375, "learning_rate": 0.0002589013724348753, "loss": 0.4764, "step": 24995 }, { "epoch": 0.6345902450787526, "grad_norm": 0.349609375, "learning_rate": 0.00025887851971590257, "loss": 0.4535, "step": 25000 }, { "epoch": 0.6347171631277684, "grad_norm": 0.328125, "learning_rate": 0.0002588556616543042, "loss": 0.456, "step": 25005 }, { "epoch": 0.6348440811767841, "grad_norm": 0.345703125, "learning_rate": 0.000258832798251202, "loss": 0.4706, "step": 25010 }, { "epoch": 0.6349709992257999, "grad_norm": 0.33984375, "learning_rate": 0.0002588099295077177, "loss": 0.4424, "step": 25015 }, { "epoch": 0.6350979172748157, "grad_norm": 0.3515625, "learning_rate": 0.0002587870554249735, "loss": 0.4793, "step": 25020 }, { "epoch": 0.6352248353238314, "grad_norm": 0.380859375, "learning_rate": 0.00025876417600409176, "loss": 0.4475, "step": 25025 }, { "epoch": 0.6353517533728471, "grad_norm": 0.376953125, "learning_rate": 0.00025874129124619534, "loss": 0.4647, "step": 25030 }, { "epoch": 0.6354786714218629, "grad_norm": 0.330078125, "learning_rate": 0.000258718401152407, "loss": 0.4583, "step": 25035 }, { "epoch": 0.6356055894708786, "grad_norm": 0.359375, "learning_rate": 0.00025869550572385003, "loss": 0.4474, "step": 25040 }, { "epoch": 0.6357325075198944, "grad_norm": 0.345703125, "learning_rate": 0.00025867260496164793, "loss": 0.4935, "step": 25045 }, { "epoch": 0.6358594255689102, "grad_norm": 0.328125, "learning_rate": 0.0002586496988669244, "loss": 0.4604, "step": 25050 }, { "epoch": 0.6359863436179259, "grad_norm": 0.349609375, "learning_rate": 0.0002586267874408034, "loss": 0.4771, "step": 25055 }, { "epoch": 0.6361132616669417, "grad_norm": 0.345703125, "learning_rate": 0.0002586038706844092, "loss": 0.4464, "step": 25060 }, { "epoch": 0.6362401797159574, "grad_norm": 0.3515625, "learning_rate": 0.00025858094859886635, "loss": 0.4505, "step": 25065 }, { "epoch": 0.6363670977649731, "grad_norm": 0.3359375, "learning_rate": 0.0002585580211852996, "loss": 0.4302, "step": 25070 }, { "epoch": 0.6364940158139889, "grad_norm": 0.369140625, "learning_rate": 0.000258535088444834, "loss": 0.4662, "step": 25075 }, { "epoch": 0.6366209338630047, "grad_norm": 0.359375, "learning_rate": 0.0002585121503785948, "loss": 0.4623, "step": 25080 }, { "epoch": 0.6367478519120204, "grad_norm": 0.333984375, "learning_rate": 0.0002584892069877076, "loss": 0.4784, "step": 25085 }, { "epoch": 0.6368747699610362, "grad_norm": 0.341796875, "learning_rate": 0.00025846625827329826, "loss": 0.4538, "step": 25090 }, { "epoch": 0.6370016880100519, "grad_norm": 0.3359375, "learning_rate": 0.00025844330423649284, "loss": 0.4693, "step": 25095 }, { "epoch": 0.6371286060590676, "grad_norm": 0.337890625, "learning_rate": 0.0002584203448784177, "loss": 0.4582, "step": 25100 }, { "epoch": 0.6372555241080834, "grad_norm": 0.357421875, "learning_rate": 0.0002583973802001994, "loss": 0.4826, "step": 25105 }, { "epoch": 0.6373824421570992, "grad_norm": 0.3125, "learning_rate": 0.00025837441020296477, "loss": 0.4568, "step": 25110 }, { "epoch": 0.6375093602061149, "grad_norm": 0.34765625, "learning_rate": 0.0002583514348878411, "loss": 0.4503, "step": 25115 }, { "epoch": 0.6376362782551307, "grad_norm": 0.306640625, "learning_rate": 0.00025832845425595566, "loss": 0.4683, "step": 25120 }, { "epoch": 0.6377631963041465, "grad_norm": 0.32421875, "learning_rate": 0.0002583054683084361, "loss": 0.4613, "step": 25125 }, { "epoch": 0.6378901143531621, "grad_norm": 0.36328125, "learning_rate": 0.0002582824770464104, "loss": 0.495, "step": 25130 }, { "epoch": 0.6380170324021779, "grad_norm": 0.337890625, "learning_rate": 0.0002582594804710066, "loss": 0.4688, "step": 25135 }, { "epoch": 0.6381439504511937, "grad_norm": 0.328125, "learning_rate": 0.00025823647858335324, "loss": 0.4338, "step": 25140 }, { "epoch": 0.6382708685002094, "grad_norm": 0.34375, "learning_rate": 0.000258213471384579, "loss": 0.5016, "step": 25145 }, { "epoch": 0.6383977865492252, "grad_norm": 0.330078125, "learning_rate": 0.0002581904588758129, "loss": 0.447, "step": 25150 }, { "epoch": 0.638524704598241, "grad_norm": 0.3359375, "learning_rate": 0.00025816744105818396, "loss": 0.4403, "step": 25155 }, { "epoch": 0.6386516226472566, "grad_norm": 0.333984375, "learning_rate": 0.00025814441793282184, "loss": 0.4778, "step": 25160 }, { "epoch": 0.6387785406962724, "grad_norm": 0.337890625, "learning_rate": 0.0002581213895008562, "loss": 0.471, "step": 25165 }, { "epoch": 0.6389054587452881, "grad_norm": 0.318359375, "learning_rate": 0.00025809835576341705, "loss": 0.465, "step": 25170 }, { "epoch": 0.6390323767943039, "grad_norm": 0.3125, "learning_rate": 0.0002580753167216346, "loss": 0.4451, "step": 25175 }, { "epoch": 0.6391592948433197, "grad_norm": 0.3671875, "learning_rate": 0.0002580522723766395, "loss": 0.4626, "step": 25180 }, { "epoch": 0.6392862128923354, "grad_norm": 0.35546875, "learning_rate": 0.0002580292227295624, "loss": 0.4605, "step": 25185 }, { "epoch": 0.6394131309413512, "grad_norm": 0.330078125, "learning_rate": 0.0002580061677815343, "loss": 0.4343, "step": 25190 }, { "epoch": 0.6395400489903669, "grad_norm": 0.3359375, "learning_rate": 0.00025798310753368664, "loss": 0.4502, "step": 25195 }, { "epoch": 0.6396669670393826, "grad_norm": 0.32421875, "learning_rate": 0.00025796004198715096, "loss": 0.4596, "step": 25200 }, { "epoch": 0.6397938850883984, "grad_norm": 0.361328125, "learning_rate": 0.0002579369711430589, "loss": 0.4389, "step": 25205 }, { "epoch": 0.6399208031374142, "grad_norm": 0.298828125, "learning_rate": 0.0002579138950025427, "loss": 0.4614, "step": 25210 }, { "epoch": 0.6400477211864299, "grad_norm": 0.353515625, "learning_rate": 0.00025789081356673475, "loss": 0.4681, "step": 25215 }, { "epoch": 0.6401746392354457, "grad_norm": 0.353515625, "learning_rate": 0.0002578677268367674, "loss": 0.4765, "step": 25220 }, { "epoch": 0.6403015572844615, "grad_norm": 0.33984375, "learning_rate": 0.0002578446348137738, "loss": 0.4326, "step": 25225 }, { "epoch": 0.6404284753334771, "grad_norm": 0.349609375, "learning_rate": 0.0002578215374988869, "loss": 0.4633, "step": 25230 }, { "epoch": 0.6405553933824929, "grad_norm": 0.33984375, "learning_rate": 0.0002577984348932401, "loss": 0.4717, "step": 25235 }, { "epoch": 0.6406823114315087, "grad_norm": 0.333984375, "learning_rate": 0.00025777532699796697, "loss": 0.4716, "step": 25240 }, { "epoch": 0.6408092294805244, "grad_norm": 0.349609375, "learning_rate": 0.0002577522138142015, "loss": 0.4669, "step": 25245 }, { "epoch": 0.6409361475295402, "grad_norm": 0.3671875, "learning_rate": 0.0002577290953430779, "loss": 0.4788, "step": 25250 }, { "epoch": 0.641063065578556, "grad_norm": 0.36328125, "learning_rate": 0.00025770597158573043, "loss": 0.4817, "step": 25255 }, { "epoch": 0.6411899836275716, "grad_norm": 0.34375, "learning_rate": 0.0002576828425432939, "loss": 0.453, "step": 25260 }, { "epoch": 0.6413169016765874, "grad_norm": 0.353515625, "learning_rate": 0.0002576597082169031, "loss": 0.4633, "step": 25265 }, { "epoch": 0.6414438197256032, "grad_norm": 0.322265625, "learning_rate": 0.00025763656860769337, "loss": 0.456, "step": 25270 }, { "epoch": 0.6415707377746189, "grad_norm": 0.341796875, "learning_rate": 0.00025761342371680005, "loss": 0.473, "step": 25275 }, { "epoch": 0.6416976558236347, "grad_norm": 0.3359375, "learning_rate": 0.0002575902735453589, "loss": 0.4699, "step": 25280 }, { "epoch": 0.6418245738726505, "grad_norm": 0.359375, "learning_rate": 0.00025756711809450596, "loss": 0.4783, "step": 25285 }, { "epoch": 0.6419514919216662, "grad_norm": 0.33984375, "learning_rate": 0.0002575439573653773, "loss": 0.4727, "step": 25290 }, { "epoch": 0.6420784099706819, "grad_norm": 0.330078125, "learning_rate": 0.0002575207913591095, "loss": 0.4631, "step": 25295 }, { "epoch": 0.6422053280196977, "grad_norm": 0.32421875, "learning_rate": 0.0002574976200768393, "loss": 0.4757, "step": 25300 }, { "epoch": 0.6423322460687134, "grad_norm": 0.330078125, "learning_rate": 0.0002574744435197037, "loss": 0.4543, "step": 25305 }, { "epoch": 0.6424591641177292, "grad_norm": 0.341796875, "learning_rate": 0.00025745126168884, "loss": 0.4667, "step": 25310 }, { "epoch": 0.642586082166745, "grad_norm": 0.349609375, "learning_rate": 0.00025742807458538564, "loss": 0.4507, "step": 25315 }, { "epoch": 0.6427130002157607, "grad_norm": 0.318359375, "learning_rate": 0.00025740488221047853, "loss": 0.4443, "step": 25320 }, { "epoch": 0.6428399182647765, "grad_norm": 0.361328125, "learning_rate": 0.0002573816845652566, "loss": 0.4618, "step": 25325 }, { "epoch": 0.6429668363137921, "grad_norm": 0.314453125, "learning_rate": 0.00025735848165085815, "loss": 0.4517, "step": 25330 }, { "epoch": 0.6430937543628079, "grad_norm": 0.33984375, "learning_rate": 0.0002573352734684218, "loss": 0.4708, "step": 25335 }, { "epoch": 0.6432206724118237, "grad_norm": 0.30078125, "learning_rate": 0.0002573120600190863, "loss": 0.4282, "step": 25340 }, { "epoch": 0.6433475904608394, "grad_norm": 0.302734375, "learning_rate": 0.0002572888413039908, "loss": 0.4508, "step": 25345 }, { "epoch": 0.6434745085098552, "grad_norm": 0.337890625, "learning_rate": 0.0002572656173242746, "loss": 0.4626, "step": 25350 }, { "epoch": 0.643601426558871, "grad_norm": 0.34765625, "learning_rate": 0.0002572423880810773, "loss": 0.4647, "step": 25355 }, { "epoch": 0.6437283446078866, "grad_norm": 0.3515625, "learning_rate": 0.0002572191535755387, "loss": 0.4254, "step": 25360 }, { "epoch": 0.6438552626569024, "grad_norm": 0.369140625, "learning_rate": 0.00025719591380879897, "loss": 0.4651, "step": 25365 }, { "epoch": 0.6439821807059182, "grad_norm": 6.75, "learning_rate": 0.00025717266878199843, "loss": 0.4677, "step": 25370 }, { "epoch": 0.6441090987549339, "grad_norm": 0.35546875, "learning_rate": 0.0002571494184962777, "loss": 0.4586, "step": 25375 }, { "epoch": 0.6442360168039497, "grad_norm": 0.36328125, "learning_rate": 0.0002571261629527777, "loss": 0.4421, "step": 25380 }, { "epoch": 0.6443629348529655, "grad_norm": 0.33984375, "learning_rate": 0.0002571029021526396, "loss": 0.4593, "step": 25385 }, { "epoch": 0.6444898529019812, "grad_norm": 0.357421875, "learning_rate": 0.0002570796360970047, "loss": 0.4715, "step": 25390 }, { "epoch": 0.6446167709509969, "grad_norm": 0.32421875, "learning_rate": 0.00025705636478701475, "loss": 0.4703, "step": 25395 }, { "epoch": 0.6447436890000127, "grad_norm": 0.361328125, "learning_rate": 0.0002570330882238116, "loss": 0.428, "step": 25400 }, { "epoch": 0.6448706070490284, "grad_norm": 0.341796875, "learning_rate": 0.0002570098064085375, "loss": 0.4685, "step": 25405 }, { "epoch": 0.6449975250980442, "grad_norm": 0.345703125, "learning_rate": 0.00025698651934233477, "loss": 0.4365, "step": 25410 }, { "epoch": 0.64512444314706, "grad_norm": 0.33203125, "learning_rate": 0.00025696322702634615, "loss": 0.4458, "step": 25415 }, { "epoch": 0.6452513611960757, "grad_norm": 0.333984375, "learning_rate": 0.00025693992946171465, "loss": 0.4602, "step": 25420 }, { "epoch": 0.6453782792450914, "grad_norm": 0.3671875, "learning_rate": 0.00025691662664958334, "loss": 0.4481, "step": 25425 }, { "epoch": 0.6455051972941072, "grad_norm": 0.34375, "learning_rate": 0.00025689331859109573, "loss": 0.4764, "step": 25430 }, { "epoch": 0.6456321153431229, "grad_norm": 0.328125, "learning_rate": 0.00025687000528739566, "loss": 0.4571, "step": 25435 }, { "epoch": 0.6457590333921387, "grad_norm": 0.345703125, "learning_rate": 0.00025684668673962694, "loss": 0.4348, "step": 25440 }, { "epoch": 0.6458859514411545, "grad_norm": 0.33203125, "learning_rate": 0.00025682336294893386, "loss": 0.4559, "step": 25445 }, { "epoch": 0.6460128694901702, "grad_norm": 0.34765625, "learning_rate": 0.0002568000339164609, "loss": 0.4746, "step": 25450 }, { "epoch": 0.646139787539186, "grad_norm": 0.365234375, "learning_rate": 0.0002567766996433528, "loss": 0.453, "step": 25455 }, { "epoch": 0.6462667055882017, "grad_norm": 0.3671875, "learning_rate": 0.00025675336013075467, "loss": 0.4927, "step": 25460 }, { "epoch": 0.6463936236372174, "grad_norm": 0.326171875, "learning_rate": 0.0002567300153798116, "loss": 0.4577, "step": 25465 }, { "epoch": 0.6465205416862332, "grad_norm": 0.34765625, "learning_rate": 0.0002567066653916692, "loss": 0.4716, "step": 25470 }, { "epoch": 0.646647459735249, "grad_norm": 0.326171875, "learning_rate": 0.00025668331016747326, "loss": 0.4595, "step": 25475 }, { "epoch": 0.6467743777842647, "grad_norm": 0.3515625, "learning_rate": 0.00025665994970836985, "loss": 0.4481, "step": 25480 }, { "epoch": 0.6469012958332805, "grad_norm": 0.330078125, "learning_rate": 0.0002566365840155051, "loss": 0.4896, "step": 25485 }, { "epoch": 0.6470282138822963, "grad_norm": 0.341796875, "learning_rate": 0.00025661321309002565, "loss": 0.4622, "step": 25490 }, { "epoch": 0.6471551319313119, "grad_norm": 0.349609375, "learning_rate": 0.00025658983693307833, "loss": 0.4591, "step": 25495 }, { "epoch": 0.6472820499803277, "grad_norm": 0.326171875, "learning_rate": 0.0002565664555458102, "loss": 0.4498, "step": 25500 }, { "epoch": 0.6474089680293434, "grad_norm": 0.328125, "learning_rate": 0.0002565430689293685, "loss": 0.4744, "step": 25505 }, { "epoch": 0.6475358860783592, "grad_norm": 0.328125, "learning_rate": 0.0002565196770849008, "loss": 0.4734, "step": 25510 }, { "epoch": 0.647662804127375, "grad_norm": 0.32421875, "learning_rate": 0.0002564962800135551, "loss": 0.4448, "step": 25515 }, { "epoch": 0.6477897221763907, "grad_norm": 0.345703125, "learning_rate": 0.0002564728777164793, "loss": 0.5029, "step": 25520 }, { "epoch": 0.6479166402254064, "grad_norm": 0.333984375, "learning_rate": 0.00025644947019482184, "loss": 0.4667, "step": 25525 }, { "epoch": 0.6480435582744222, "grad_norm": 0.3359375, "learning_rate": 0.0002564260574497312, "loss": 0.4647, "step": 25530 }, { "epoch": 0.6481704763234379, "grad_norm": 0.333984375, "learning_rate": 0.00025640263948235637, "loss": 0.4283, "step": 25535 }, { "epoch": 0.6482973943724537, "grad_norm": 0.3515625, "learning_rate": 0.00025637921629384643, "loss": 0.4918, "step": 25540 }, { "epoch": 0.6484243124214695, "grad_norm": 0.341796875, "learning_rate": 0.00025635578788535065, "loss": 0.4465, "step": 25545 }, { "epoch": 0.6485512304704852, "grad_norm": 0.369140625, "learning_rate": 0.00025633235425801876, "loss": 0.447, "step": 25550 }, { "epoch": 0.648678148519501, "grad_norm": 3.453125, "learning_rate": 0.00025630891541300057, "loss": 0.4453, "step": 25555 }, { "epoch": 0.6488050665685167, "grad_norm": 0.4609375, "learning_rate": 0.0002562854713514463, "loss": 0.4565, "step": 25560 }, { "epoch": 0.6489319846175324, "grad_norm": 0.337890625, "learning_rate": 0.0002562620220745063, "loss": 0.4516, "step": 25565 }, { "epoch": 0.6490589026665482, "grad_norm": 0.341796875, "learning_rate": 0.00025623856758333113, "loss": 0.4391, "step": 25570 }, { "epoch": 0.649185820715564, "grad_norm": 0.341796875, "learning_rate": 0.00025621510787907184, "loss": 0.4446, "step": 25575 }, { "epoch": 0.6493127387645797, "grad_norm": 0.3515625, "learning_rate": 0.00025619164296287946, "loss": 0.4444, "step": 25580 }, { "epoch": 0.6494396568135955, "grad_norm": 0.341796875, "learning_rate": 0.0002561681728359055, "loss": 0.4661, "step": 25585 }, { "epoch": 0.6495665748626112, "grad_norm": 0.333984375, "learning_rate": 0.0002561446974993016, "loss": 0.4617, "step": 25590 }, { "epoch": 0.6496934929116269, "grad_norm": 0.361328125, "learning_rate": 0.00025612121695421967, "loss": 0.4495, "step": 25595 }, { "epoch": 0.6498204109606427, "grad_norm": 0.359375, "learning_rate": 0.0002560977312018119, "loss": 0.4669, "step": 25600 }, { "epoch": 0.6499473290096585, "grad_norm": 0.34765625, "learning_rate": 0.0002560742402432307, "loss": 0.465, "step": 25605 }, { "epoch": 0.6500742470586742, "grad_norm": 0.333984375, "learning_rate": 0.00025605074407962877, "loss": 0.4626, "step": 25610 }, { "epoch": 0.65020116510769, "grad_norm": 0.369140625, "learning_rate": 0.0002560272427121591, "loss": 0.4602, "step": 25615 }, { "epoch": 0.6503280831567058, "grad_norm": 0.34375, "learning_rate": 0.00025600373614197483, "loss": 0.456, "step": 25620 }, { "epoch": 0.6504550012057214, "grad_norm": 0.34765625, "learning_rate": 0.0002559802243702295, "loss": 0.4619, "step": 25625 }, { "epoch": 0.6505819192547372, "grad_norm": 0.376953125, "learning_rate": 0.0002559567073980768, "loss": 0.4824, "step": 25630 }, { "epoch": 0.650708837303753, "grad_norm": 0.328125, "learning_rate": 0.0002559331852266706, "loss": 0.4513, "step": 25635 }, { "epoch": 0.6508357553527687, "grad_norm": 0.330078125, "learning_rate": 0.00025590965785716525, "loss": 0.4504, "step": 25640 }, { "epoch": 0.6509626734017845, "grad_norm": 0.314453125, "learning_rate": 0.0002558861252907151, "loss": 0.4531, "step": 25645 }, { "epoch": 0.6510895914508003, "grad_norm": 0.353515625, "learning_rate": 0.0002558625875284751, "loss": 0.4467, "step": 25650 }, { "epoch": 0.651216509499816, "grad_norm": 0.34375, "learning_rate": 0.00025583904457159994, "loss": 0.4949, "step": 25655 }, { "epoch": 0.6513434275488317, "grad_norm": 0.34375, "learning_rate": 0.00025581549642124515, "loss": 0.4757, "step": 25660 }, { "epoch": 0.6514703455978474, "grad_norm": 0.349609375, "learning_rate": 0.00025579194307856604, "loss": 0.4647, "step": 25665 }, { "epoch": 0.6515972636468632, "grad_norm": 0.341796875, "learning_rate": 0.00025576838454471846, "loss": 0.4197, "step": 25670 }, { "epoch": 0.651724181695879, "grad_norm": 0.37109375, "learning_rate": 0.00025574482082085835, "loss": 0.4811, "step": 25675 }, { "epoch": 0.6518510997448947, "grad_norm": 0.34375, "learning_rate": 0.000255721251908142, "loss": 0.4786, "step": 25680 }, { "epoch": 0.6519780177939105, "grad_norm": 0.322265625, "learning_rate": 0.0002556976778077259, "loss": 0.456, "step": 25685 }, { "epoch": 0.6521049358429262, "grad_norm": 0.3359375, "learning_rate": 0.0002556740985207669, "loss": 0.444, "step": 25690 }, { "epoch": 0.6522318538919419, "grad_norm": 0.314453125, "learning_rate": 0.00025565051404842205, "loss": 0.4563, "step": 25695 }, { "epoch": 0.6523587719409577, "grad_norm": 0.337890625, "learning_rate": 0.00025562692439184845, "loss": 0.4571, "step": 25700 }, { "epoch": 0.6524856899899735, "grad_norm": 0.42578125, "learning_rate": 0.0002556033295522038, "loss": 0.4618, "step": 25705 }, { "epoch": 0.6526126080389892, "grad_norm": 0.330078125, "learning_rate": 0.00025557972953064583, "loss": 0.4483, "step": 25710 }, { "epoch": 0.652739526088005, "grad_norm": 0.36328125, "learning_rate": 0.00025555612432833256, "loss": 0.4847, "step": 25715 }, { "epoch": 0.6528664441370208, "grad_norm": 0.357421875, "learning_rate": 0.0002555325139464223, "loss": 0.4769, "step": 25720 }, { "epoch": 0.6529933621860364, "grad_norm": 0.330078125, "learning_rate": 0.00025550889838607364, "loss": 0.4523, "step": 25725 }, { "epoch": 0.6531202802350522, "grad_norm": 0.341796875, "learning_rate": 0.0002554852776484454, "loss": 0.4401, "step": 25730 }, { "epoch": 0.653247198284068, "grad_norm": 0.31640625, "learning_rate": 0.0002554616517346966, "loss": 0.4543, "step": 25735 }, { "epoch": 0.6533741163330837, "grad_norm": 0.314453125, "learning_rate": 0.0002554380206459866, "loss": 0.4506, "step": 25740 }, { "epoch": 0.6535010343820995, "grad_norm": 0.33984375, "learning_rate": 0.00025541438438347484, "loss": 0.4526, "step": 25745 }, { "epoch": 0.6536279524311153, "grad_norm": 0.34375, "learning_rate": 0.0002553907429483212, "loss": 0.4615, "step": 25750 }, { "epoch": 0.653754870480131, "grad_norm": 0.34375, "learning_rate": 0.0002553670963416859, "loss": 0.4726, "step": 25755 }, { "epoch": 0.6538817885291467, "grad_norm": 0.3671875, "learning_rate": 0.0002553434445647291, "loss": 0.4848, "step": 25760 }, { "epoch": 0.6540087065781625, "grad_norm": 0.3359375, "learning_rate": 0.00025531978761861144, "loss": 0.4685, "step": 25765 }, { "epoch": 0.6541356246271782, "grad_norm": 0.341796875, "learning_rate": 0.00025529612550449375, "loss": 0.4688, "step": 25770 }, { "epoch": 0.654262542676194, "grad_norm": 0.35546875, "learning_rate": 0.0002552724582235371, "loss": 0.4489, "step": 25775 }, { "epoch": 0.6543894607252098, "grad_norm": 0.337890625, "learning_rate": 0.0002552487857769029, "loss": 0.4667, "step": 25780 }, { "epoch": 0.6545163787742255, "grad_norm": 0.3515625, "learning_rate": 0.0002552251081657527, "loss": 0.4512, "step": 25785 }, { "epoch": 0.6546432968232412, "grad_norm": 0.33203125, "learning_rate": 0.00025520142539124836, "loss": 0.4643, "step": 25790 }, { "epoch": 0.654770214872257, "grad_norm": 0.333984375, "learning_rate": 0.00025517773745455195, "loss": 0.4522, "step": 25795 }, { "epoch": 0.6548971329212727, "grad_norm": 0.333984375, "learning_rate": 0.0002551540443568259, "loss": 0.4663, "step": 25800 }, { "epoch": 0.6550240509702885, "grad_norm": 0.30859375, "learning_rate": 0.0002551303460992327, "loss": 0.4534, "step": 25805 }, { "epoch": 0.6551509690193043, "grad_norm": 0.33984375, "learning_rate": 0.00025510664268293536, "loss": 0.4758, "step": 25810 }, { "epoch": 0.65527788706832, "grad_norm": 0.345703125, "learning_rate": 0.00025508293410909694, "loss": 0.4744, "step": 25815 }, { "epoch": 0.6554048051173358, "grad_norm": 0.35546875, "learning_rate": 0.0002550592203788808, "loss": 0.4557, "step": 25820 }, { "epoch": 0.6555317231663514, "grad_norm": 0.34765625, "learning_rate": 0.0002550355014934506, "loss": 0.4616, "step": 25825 }, { "epoch": 0.6556586412153672, "grad_norm": 0.337890625, "learning_rate": 0.0002550117774539701, "loss": 0.4547, "step": 25830 }, { "epoch": 0.655785559264383, "grad_norm": 0.35546875, "learning_rate": 0.0002549880482616036, "loss": 0.5105, "step": 25835 }, { "epoch": 0.6559124773133987, "grad_norm": 0.33203125, "learning_rate": 0.0002549643139175153, "loss": 0.4574, "step": 25840 }, { "epoch": 0.6560393953624145, "grad_norm": 0.310546875, "learning_rate": 0.00025494057442286994, "loss": 0.4315, "step": 25845 }, { "epoch": 0.6561663134114303, "grad_norm": 0.318359375, "learning_rate": 0.00025491682977883245, "loss": 0.4557, "step": 25850 }, { "epoch": 0.6562932314604459, "grad_norm": 0.326171875, "learning_rate": 0.0002548930799865679, "loss": 0.4481, "step": 25855 }, { "epoch": 0.6564201495094617, "grad_norm": 0.37109375, "learning_rate": 0.0002548693250472417, "loss": 0.4569, "step": 25860 }, { "epoch": 0.6565470675584775, "grad_norm": 0.318359375, "learning_rate": 0.0002548455649620195, "loss": 0.4633, "step": 25865 }, { "epoch": 0.6566739856074932, "grad_norm": 0.3125, "learning_rate": 0.00025482179973206716, "loss": 0.4493, "step": 25870 }, { "epoch": 0.656800903656509, "grad_norm": 0.37890625, "learning_rate": 0.0002547980293585509, "loss": 0.4704, "step": 25875 }, { "epoch": 0.6569278217055248, "grad_norm": 0.35546875, "learning_rate": 0.000254774253842637, "loss": 0.4529, "step": 25880 }, { "epoch": 0.6570547397545405, "grad_norm": 0.333984375, "learning_rate": 0.0002547504731854923, "loss": 0.4341, "step": 25885 }, { "epoch": 0.6571816578035562, "grad_norm": 0.33984375, "learning_rate": 0.00025472668738828354, "loss": 0.4789, "step": 25890 }, { "epoch": 0.657308575852572, "grad_norm": 0.361328125, "learning_rate": 0.000254702896452178, "loss": 0.482, "step": 25895 }, { "epoch": 0.6574354939015877, "grad_norm": 0.35546875, "learning_rate": 0.000254679100378343, "loss": 0.5136, "step": 25900 }, { "epoch": 0.6575624119506035, "grad_norm": 0.353515625, "learning_rate": 0.00025465529916794623, "loss": 0.4619, "step": 25905 }, { "epoch": 0.6576893299996193, "grad_norm": 0.33203125, "learning_rate": 0.00025463149282215565, "loss": 0.4601, "step": 25910 }, { "epoch": 0.657816248048635, "grad_norm": 0.357421875, "learning_rate": 0.0002546076813421394, "loss": 0.4469, "step": 25915 }, { "epoch": 0.6579431660976508, "grad_norm": 0.33984375, "learning_rate": 0.00025458386472906587, "loss": 0.4533, "step": 25920 }, { "epoch": 0.6580700841466665, "grad_norm": 0.31640625, "learning_rate": 0.0002545600429841038, "loss": 0.4545, "step": 25925 }, { "epoch": 0.6581970021956822, "grad_norm": 0.33984375, "learning_rate": 0.00025453621610842204, "loss": 0.4995, "step": 25930 }, { "epoch": 0.658323920244698, "grad_norm": 0.328125, "learning_rate": 0.00025451238410318977, "loss": 0.4331, "step": 25935 }, { "epoch": 0.6584508382937138, "grad_norm": 0.33203125, "learning_rate": 0.0002544885469695765, "loss": 0.4932, "step": 25940 }, { "epoch": 0.6585777563427295, "grad_norm": 0.34765625, "learning_rate": 0.0002544647047087518, "loss": 0.4643, "step": 25945 }, { "epoch": 0.6587046743917453, "grad_norm": 0.337890625, "learning_rate": 0.00025444085732188567, "loss": 0.4857, "step": 25950 }, { "epoch": 0.658831592440761, "grad_norm": 0.390625, "learning_rate": 0.0002544170048101483, "loss": 0.4924, "step": 25955 }, { "epoch": 0.6589585104897767, "grad_norm": 0.32421875, "learning_rate": 0.0002543931471747101, "loss": 0.4774, "step": 25960 }, { "epoch": 0.6590854285387925, "grad_norm": 0.330078125, "learning_rate": 0.00025436928441674174, "loss": 0.4569, "step": 25965 }, { "epoch": 0.6592123465878083, "grad_norm": 0.3515625, "learning_rate": 0.00025434541653741416, "loss": 0.4541, "step": 25970 }, { "epoch": 0.659339264636824, "grad_norm": 0.333984375, "learning_rate": 0.00025432154353789856, "loss": 0.4687, "step": 25975 }, { "epoch": 0.6594661826858398, "grad_norm": 0.33984375, "learning_rate": 0.0002542976654193663, "loss": 0.4599, "step": 25980 }, { "epoch": 0.6595931007348556, "grad_norm": 0.359375, "learning_rate": 0.0002542737821829893, "loss": 0.4764, "step": 25985 }, { "epoch": 0.6597200187838712, "grad_norm": 0.333984375, "learning_rate": 0.0002542498938299392, "loss": 0.4809, "step": 25990 }, { "epoch": 0.659846936832887, "grad_norm": 0.330078125, "learning_rate": 0.00025422600036138847, "loss": 0.4426, "step": 25995 }, { "epoch": 0.6599738548819027, "grad_norm": 0.333984375, "learning_rate": 0.0002542021017785093, "loss": 0.4836, "step": 26000 }, { "epoch": 0.6601007729309185, "grad_norm": 0.361328125, "learning_rate": 0.00025417819808247455, "loss": 0.4473, "step": 26005 }, { "epoch": 0.6602276909799343, "grad_norm": 0.349609375, "learning_rate": 0.0002541542892744571, "loss": 0.4131, "step": 26010 }, { "epoch": 0.66035460902895, "grad_norm": 0.326171875, "learning_rate": 0.0002541303753556302, "loss": 0.4405, "step": 26015 }, { "epoch": 0.6604815270779657, "grad_norm": 0.326171875, "learning_rate": 0.0002541064563271673, "loss": 0.4696, "step": 26020 }, { "epoch": 0.6606084451269815, "grad_norm": 0.361328125, "learning_rate": 0.00025408253219024195, "loss": 0.4593, "step": 26025 }, { "epoch": 0.6607353631759972, "grad_norm": 0.328125, "learning_rate": 0.00025405860294602827, "loss": 0.4113, "step": 26030 }, { "epoch": 0.660862281225013, "grad_norm": 0.328125, "learning_rate": 0.00025403466859570033, "loss": 0.4605, "step": 26035 }, { "epoch": 0.6609891992740288, "grad_norm": 0.341796875, "learning_rate": 0.00025401072914043275, "loss": 0.4608, "step": 26040 }, { "epoch": 0.6611161173230445, "grad_norm": 0.322265625, "learning_rate": 0.00025398678458140006, "loss": 0.4747, "step": 26045 }, { "epoch": 0.6612430353720603, "grad_norm": 0.333984375, "learning_rate": 0.0002539628349197773, "loss": 0.4381, "step": 26050 }, { "epoch": 0.661369953421076, "grad_norm": 0.39453125, "learning_rate": 0.0002539388801567396, "loss": 0.4829, "step": 26055 }, { "epoch": 0.6614968714700917, "grad_norm": 0.376953125, "learning_rate": 0.00025391492029346247, "loss": 0.5172, "step": 26060 }, { "epoch": 0.6616237895191075, "grad_norm": 0.38671875, "learning_rate": 0.0002538909553311216, "loss": 0.4619, "step": 26065 }, { "epoch": 0.6617507075681233, "grad_norm": 0.37109375, "learning_rate": 0.0002538669852708929, "loss": 0.484, "step": 26070 }, { "epoch": 0.661877625617139, "grad_norm": 0.3515625, "learning_rate": 0.0002538430101139527, "loss": 0.4687, "step": 26075 }, { "epoch": 0.6620045436661548, "grad_norm": 0.984375, "learning_rate": 0.0002538190298614773, "loss": 0.4885, "step": 26080 }, { "epoch": 0.6621314617151706, "grad_norm": 0.353515625, "learning_rate": 0.00025379504451464346, "loss": 0.4984, "step": 26085 }, { "epoch": 0.6622583797641862, "grad_norm": 0.3359375, "learning_rate": 0.0002537710540746281, "loss": 0.4522, "step": 26090 }, { "epoch": 0.662385297813202, "grad_norm": 0.326171875, "learning_rate": 0.0002537470585426085, "loss": 0.4472, "step": 26095 }, { "epoch": 0.6625122158622178, "grad_norm": 0.30078125, "learning_rate": 0.00025372305791976205, "loss": 0.4437, "step": 26100 }, { "epoch": 0.6626391339112335, "grad_norm": 0.361328125, "learning_rate": 0.0002536990522072665, "loss": 0.4533, "step": 26105 }, { "epoch": 0.6627660519602493, "grad_norm": 0.3515625, "learning_rate": 0.0002536750414062997, "loss": 0.454, "step": 26110 }, { "epoch": 0.6628929700092651, "grad_norm": 0.337890625, "learning_rate": 0.00025365102551803997, "loss": 0.4491, "step": 26115 }, { "epoch": 0.6630198880582807, "grad_norm": 0.341796875, "learning_rate": 0.00025362700454366566, "loss": 0.4277, "step": 26120 }, { "epoch": 0.6631468061072965, "grad_norm": 0.32421875, "learning_rate": 0.00025360297848435557, "loss": 0.4604, "step": 26125 }, { "epoch": 0.6632737241563122, "grad_norm": 0.369140625, "learning_rate": 0.00025357894734128855, "loss": 0.4593, "step": 26130 }, { "epoch": 0.663400642205328, "grad_norm": 0.31640625, "learning_rate": 0.00025355491111564383, "loss": 0.4401, "step": 26135 }, { "epoch": 0.6635275602543438, "grad_norm": 0.359375, "learning_rate": 0.0002535308698086009, "loss": 0.4602, "step": 26140 }, { "epoch": 0.6636544783033596, "grad_norm": 0.3515625, "learning_rate": 0.00025350682342133944, "loss": 0.4425, "step": 26145 }, { "epoch": 0.6637813963523753, "grad_norm": 0.3515625, "learning_rate": 0.0002534827719550394, "loss": 0.4786, "step": 26150 }, { "epoch": 0.663908314401391, "grad_norm": 0.3515625, "learning_rate": 0.00025345871541088095, "loss": 0.4394, "step": 26155 }, { "epoch": 0.6640352324504067, "grad_norm": 0.3359375, "learning_rate": 0.0002534346537900445, "loss": 0.4635, "step": 26160 }, { "epoch": 0.6641621504994225, "grad_norm": 0.3671875, "learning_rate": 0.0002534105870937108, "loss": 0.4592, "step": 26165 }, { "epoch": 0.6642890685484383, "grad_norm": 0.451171875, "learning_rate": 0.00025338651532306084, "loss": 0.4494, "step": 26170 }, { "epoch": 0.664415986597454, "grad_norm": 0.337890625, "learning_rate": 0.00025336243847927576, "loss": 0.4604, "step": 26175 }, { "epoch": 0.6645429046464698, "grad_norm": 0.337890625, "learning_rate": 0.0002533383565635369, "loss": 0.4715, "step": 26180 }, { "epoch": 0.6646698226954856, "grad_norm": 0.328125, "learning_rate": 0.0002533142695770262, "loss": 0.4612, "step": 26185 }, { "epoch": 0.6647967407445012, "grad_norm": 0.361328125, "learning_rate": 0.0002532901775209253, "loss": 0.4581, "step": 26190 }, { "epoch": 0.664923658793517, "grad_norm": 0.357421875, "learning_rate": 0.0002532660803964166, "loss": 0.4614, "step": 26195 }, { "epoch": 0.6650505768425328, "grad_norm": 0.314453125, "learning_rate": 0.0002532419782046825, "loss": 0.4612, "step": 26200 }, { "epoch": 0.6651774948915485, "grad_norm": 0.341796875, "learning_rate": 0.00025321787094690555, "loss": 0.4579, "step": 26205 }, { "epoch": 0.6653044129405643, "grad_norm": 0.388671875, "learning_rate": 0.0002531937586242689, "loss": 0.4648, "step": 26210 }, { "epoch": 0.6654313309895801, "grad_norm": 0.349609375, "learning_rate": 0.00025316964123795556, "loss": 0.4408, "step": 26215 }, { "epoch": 0.6655582490385957, "grad_norm": 0.328125, "learning_rate": 0.00025314551878914904, "loss": 0.4662, "step": 26220 }, { "epoch": 0.6656851670876115, "grad_norm": 0.337890625, "learning_rate": 0.00025312139127903297, "loss": 0.4515, "step": 26225 }, { "epoch": 0.6658120851366273, "grad_norm": 0.3125, "learning_rate": 0.00025309725870879135, "loss": 0.4436, "step": 26230 }, { "epoch": 0.665939003185643, "grad_norm": 0.38671875, "learning_rate": 0.0002530731210796083, "loss": 0.4631, "step": 26235 }, { "epoch": 0.6660659212346588, "grad_norm": 0.345703125, "learning_rate": 0.0002530489783926683, "loss": 0.4619, "step": 26240 }, { "epoch": 0.6661928392836746, "grad_norm": 0.36328125, "learning_rate": 0.0002530248306491559, "loss": 0.4956, "step": 26245 }, { "epoch": 0.6663197573326903, "grad_norm": 0.30078125, "learning_rate": 0.0002530006778502561, "loss": 0.4616, "step": 26250 }, { "epoch": 0.666446675381706, "grad_norm": 0.357421875, "learning_rate": 0.0002529765199971541, "loss": 0.4575, "step": 26255 }, { "epoch": 0.6665735934307218, "grad_norm": 0.34765625, "learning_rate": 0.00025295235709103526, "loss": 0.4711, "step": 26260 }, { "epoch": 0.6667005114797375, "grad_norm": 0.34375, "learning_rate": 0.0002529281891330853, "loss": 0.4489, "step": 26265 }, { "epoch": 0.6668274295287533, "grad_norm": 0.353515625, "learning_rate": 0.0002529040161244901, "loss": 0.4931, "step": 26270 }, { "epoch": 0.6669543475777691, "grad_norm": 0.328125, "learning_rate": 0.0002528798380664358, "loss": 0.4687, "step": 26275 }, { "epoch": 0.6670812656267848, "grad_norm": 0.3515625, "learning_rate": 0.00025285565496010884, "loss": 0.4676, "step": 26280 }, { "epoch": 0.6672081836758005, "grad_norm": 0.333984375, "learning_rate": 0.00025283146680669586, "loss": 0.4529, "step": 26285 }, { "epoch": 0.6673351017248162, "grad_norm": 0.333984375, "learning_rate": 0.0002528072736073838, "loss": 0.4652, "step": 26290 }, { "epoch": 0.667462019773832, "grad_norm": 0.322265625, "learning_rate": 0.0002527830753633597, "loss": 0.4338, "step": 26295 }, { "epoch": 0.6675889378228478, "grad_norm": 0.32421875, "learning_rate": 0.0002527588720758111, "loss": 0.4254, "step": 26300 }, { "epoch": 0.6677158558718636, "grad_norm": 0.35546875, "learning_rate": 0.0002527346637459256, "loss": 0.4421, "step": 26305 }, { "epoch": 0.6678427739208793, "grad_norm": 0.3515625, "learning_rate": 0.000252710450374891, "loss": 0.46, "step": 26310 }, { "epoch": 0.6679696919698951, "grad_norm": 0.328125, "learning_rate": 0.00025268623196389553, "loss": 0.4863, "step": 26315 }, { "epoch": 0.6680966100189107, "grad_norm": 0.314453125, "learning_rate": 0.00025266200851412757, "loss": 0.4464, "step": 26320 }, { "epoch": 0.6682235280679265, "grad_norm": 0.32421875, "learning_rate": 0.00025263778002677576, "loss": 0.4495, "step": 26325 }, { "epoch": 0.6683504461169423, "grad_norm": 0.37109375, "learning_rate": 0.000252613546503029, "loss": 0.4668, "step": 26330 }, { "epoch": 0.668477364165958, "grad_norm": 0.330078125, "learning_rate": 0.0002525893079440763, "loss": 0.4527, "step": 26335 }, { "epoch": 0.6686042822149738, "grad_norm": 0.3046875, "learning_rate": 0.00025256506435110713, "loss": 0.4832, "step": 26340 }, { "epoch": 0.6687312002639896, "grad_norm": 0.33203125, "learning_rate": 0.0002525408157253112, "loss": 0.4698, "step": 26345 }, { "epoch": 0.6688581183130053, "grad_norm": 0.3515625, "learning_rate": 0.00025251656206787824, "loss": 0.4599, "step": 26350 }, { "epoch": 0.668985036362021, "grad_norm": 0.341796875, "learning_rate": 0.0002524923033799984, "loss": 0.4584, "step": 26355 }, { "epoch": 0.6691119544110368, "grad_norm": 0.34375, "learning_rate": 0.0002524680396628621, "loss": 0.4908, "step": 26360 }, { "epoch": 0.6692388724600525, "grad_norm": 0.318359375, "learning_rate": 0.0002524437709176599, "loss": 0.4805, "step": 26365 }, { "epoch": 0.6693657905090683, "grad_norm": 0.322265625, "learning_rate": 0.00025241949714558266, "loss": 0.4421, "step": 26370 }, { "epoch": 0.6694927085580841, "grad_norm": 0.3671875, "learning_rate": 0.00025239521834782147, "loss": 0.461, "step": 26375 }, { "epoch": 0.6696196266070998, "grad_norm": 0.3125, "learning_rate": 0.0002523709345255677, "loss": 0.4275, "step": 26380 }, { "epoch": 0.6697465446561155, "grad_norm": 0.3203125, "learning_rate": 0.000252346645680013, "loss": 0.4794, "step": 26385 }, { "epoch": 0.6698734627051313, "grad_norm": 0.365234375, "learning_rate": 0.0002523223518123491, "loss": 0.4491, "step": 26390 }, { "epoch": 0.670000380754147, "grad_norm": 0.310546875, "learning_rate": 0.0002522980529237682, "loss": 0.4353, "step": 26395 }, { "epoch": 0.6701272988031628, "grad_norm": 0.333984375, "learning_rate": 0.00025227374901546257, "loss": 0.4556, "step": 26400 }, { "epoch": 0.6702542168521786, "grad_norm": 0.34375, "learning_rate": 0.00025224944008862485, "loss": 0.4304, "step": 26405 }, { "epoch": 0.6703811349011943, "grad_norm": 0.337890625, "learning_rate": 0.0002522251261444478, "loss": 0.4571, "step": 26410 }, { "epoch": 0.6705080529502101, "grad_norm": 0.3125, "learning_rate": 0.0002522008071841246, "loss": 0.4582, "step": 26415 }, { "epoch": 0.6706349709992258, "grad_norm": 0.390625, "learning_rate": 0.00025217648320884843, "loss": 0.4919, "step": 26420 }, { "epoch": 0.6707618890482415, "grad_norm": 0.32421875, "learning_rate": 0.0002521521542198129, "loss": 0.4435, "step": 26425 }, { "epoch": 0.6708888070972573, "grad_norm": 0.314453125, "learning_rate": 0.000252127820218212, "loss": 0.4599, "step": 26430 }, { "epoch": 0.6710157251462731, "grad_norm": 0.3515625, "learning_rate": 0.00025210348120523955, "loss": 0.4496, "step": 26435 }, { "epoch": 0.6711426431952888, "grad_norm": 0.326171875, "learning_rate": 0.00025207913718208985, "loss": 0.4379, "step": 26440 }, { "epoch": 0.6712695612443046, "grad_norm": 0.30859375, "learning_rate": 0.00025205478814995766, "loss": 0.4475, "step": 26445 }, { "epoch": 0.6713964792933204, "grad_norm": 0.3515625, "learning_rate": 0.00025203043411003767, "loss": 0.4504, "step": 26450 }, { "epoch": 0.671523397342336, "grad_norm": 0.3515625, "learning_rate": 0.0002520060750635249, "loss": 0.4627, "step": 26455 }, { "epoch": 0.6716503153913518, "grad_norm": 0.35546875, "learning_rate": 0.0002519817110116147, "loss": 0.4588, "step": 26460 }, { "epoch": 0.6717772334403675, "grad_norm": 0.345703125, "learning_rate": 0.0002519573419555025, "loss": 0.4417, "step": 26465 }, { "epoch": 0.6719041514893833, "grad_norm": 0.279296875, "learning_rate": 0.0002519329678963842, "loss": 0.4238, "step": 26470 }, { "epoch": 0.6720310695383991, "grad_norm": 0.36328125, "learning_rate": 0.0002519085888354557, "loss": 0.4301, "step": 26475 }, { "epoch": 0.6721579875874149, "grad_norm": 0.33203125, "learning_rate": 0.00025188420477391345, "loss": 0.4884, "step": 26480 }, { "epoch": 0.6722849056364305, "grad_norm": 0.34765625, "learning_rate": 0.0002518598157129538, "loss": 0.4481, "step": 26485 }, { "epoch": 0.6724118236854463, "grad_norm": 0.3203125, "learning_rate": 0.00025183542165377353, "loss": 0.4527, "step": 26490 }, { "epoch": 0.672538741734462, "grad_norm": 0.36328125, "learning_rate": 0.0002518110225975697, "loss": 0.4665, "step": 26495 }, { "epoch": 0.6726656597834778, "grad_norm": 0.3515625, "learning_rate": 0.0002517866185455396, "loss": 0.4613, "step": 26500 }, { "epoch": 0.6727925778324936, "grad_norm": 0.33984375, "learning_rate": 0.0002517622094988806, "loss": 0.4634, "step": 26505 }, { "epoch": 0.6729194958815093, "grad_norm": 0.337890625, "learning_rate": 0.00025173779545879056, "loss": 0.4338, "step": 26510 }, { "epoch": 0.6730464139305251, "grad_norm": 0.345703125, "learning_rate": 0.0002517133764264674, "loss": 0.4808, "step": 26515 }, { "epoch": 0.6731733319795408, "grad_norm": 0.330078125, "learning_rate": 0.0002516889524031094, "loss": 0.4585, "step": 26520 }, { "epoch": 0.6733002500285565, "grad_norm": 0.36328125, "learning_rate": 0.000251664523389915, "loss": 0.4834, "step": 26525 }, { "epoch": 0.6734271680775723, "grad_norm": 0.33203125, "learning_rate": 0.00025164008938808295, "loss": 0.448, "step": 26530 }, { "epoch": 0.6735540861265881, "grad_norm": 0.365234375, "learning_rate": 0.00025161565039881213, "loss": 0.467, "step": 26535 }, { "epoch": 0.6736810041756038, "grad_norm": 0.35546875, "learning_rate": 0.0002515912064233019, "loss": 0.4525, "step": 26540 }, { "epoch": 0.6738079222246196, "grad_norm": 0.349609375, "learning_rate": 0.0002515667574627516, "loss": 0.4573, "step": 26545 }, { "epoch": 0.6739348402736353, "grad_norm": 0.333984375, "learning_rate": 0.00025154230351836105, "loss": 0.4721, "step": 26550 }, { "epoch": 0.674061758322651, "grad_norm": 0.3125, "learning_rate": 0.00025151784459133, "loss": 0.4479, "step": 26555 }, { "epoch": 0.6741886763716668, "grad_norm": 0.3515625, "learning_rate": 0.00025149338068285887, "loss": 0.4811, "step": 26560 }, { "epoch": 0.6743155944206826, "grad_norm": 0.33984375, "learning_rate": 0.0002514689117941479, "loss": 0.4575, "step": 26565 }, { "epoch": 0.6744425124696983, "grad_norm": 0.333984375, "learning_rate": 0.0002514444379263978, "loss": 0.4423, "step": 26570 }, { "epoch": 0.6745694305187141, "grad_norm": 0.3671875, "learning_rate": 0.00025141995908080963, "loss": 0.4623, "step": 26575 }, { "epoch": 0.6746963485677299, "grad_norm": 0.37109375, "learning_rate": 0.00025139547525858445, "loss": 0.4628, "step": 26580 }, { "epoch": 0.6748232666167455, "grad_norm": 0.345703125, "learning_rate": 0.0002513709864609237, "loss": 0.4267, "step": 26585 }, { "epoch": 0.6749501846657613, "grad_norm": 0.3359375, "learning_rate": 0.000251346492689029, "loss": 0.4705, "step": 26590 }, { "epoch": 0.675077102714777, "grad_norm": 0.34375, "learning_rate": 0.00025132199394410225, "loss": 0.4964, "step": 26595 }, { "epoch": 0.6752040207637928, "grad_norm": 0.361328125, "learning_rate": 0.00025129749022734563, "loss": 0.4687, "step": 26600 }, { "epoch": 0.6753309388128086, "grad_norm": 0.333984375, "learning_rate": 0.00025127298153996153, "loss": 0.4636, "step": 26605 }, { "epoch": 0.6754578568618244, "grad_norm": 0.3515625, "learning_rate": 0.0002512484678831526, "loss": 0.5044, "step": 26610 }, { "epoch": 0.6755847749108401, "grad_norm": 0.36328125, "learning_rate": 0.00025122394925812167, "loss": 0.4391, "step": 26615 }, { "epoch": 0.6757116929598558, "grad_norm": 0.36328125, "learning_rate": 0.00025119942566607185, "loss": 0.4897, "step": 26620 }, { "epoch": 0.6758386110088715, "grad_norm": 0.353515625, "learning_rate": 0.0002511748971082065, "loss": 0.4663, "step": 26625 }, { "epoch": 0.6759655290578873, "grad_norm": 0.35546875, "learning_rate": 0.0002511503635857293, "loss": 0.473, "step": 26630 }, { "epoch": 0.6760924471069031, "grad_norm": 0.322265625, "learning_rate": 0.000251125825099844, "loss": 0.4311, "step": 26635 }, { "epoch": 0.6762193651559189, "grad_norm": 0.32421875, "learning_rate": 0.0002511012816517548, "loss": 0.4932, "step": 26640 }, { "epoch": 0.6763462832049346, "grad_norm": 0.322265625, "learning_rate": 0.00025107673324266595, "loss": 0.4549, "step": 26645 }, { "epoch": 0.6764732012539503, "grad_norm": 0.35546875, "learning_rate": 0.0002510521798737821, "loss": 0.456, "step": 26650 }, { "epoch": 0.676600119302966, "grad_norm": 0.376953125, "learning_rate": 0.000251027621546308, "loss": 0.4669, "step": 26655 }, { "epoch": 0.6767270373519818, "grad_norm": 0.34765625, "learning_rate": 0.00025100305826144875, "loss": 0.4431, "step": 26660 }, { "epoch": 0.6768539554009976, "grad_norm": 0.31640625, "learning_rate": 0.0002509784900204097, "loss": 0.4173, "step": 26665 }, { "epoch": 0.6769808734500133, "grad_norm": 0.3125, "learning_rate": 0.0002509539168243963, "loss": 0.4668, "step": 26670 }, { "epoch": 0.6771077914990291, "grad_norm": 0.345703125, "learning_rate": 0.00025092933867461444, "loss": 0.4575, "step": 26675 }, { "epoch": 0.6772347095480449, "grad_norm": 0.349609375, "learning_rate": 0.00025090475557227015, "loss": 0.4708, "step": 26680 }, { "epoch": 0.6773616275970605, "grad_norm": 0.345703125, "learning_rate": 0.00025088016751856975, "loss": 0.4544, "step": 26685 }, { "epoch": 0.6774885456460763, "grad_norm": 0.33203125, "learning_rate": 0.0002508555745147197, "loss": 0.4356, "step": 26690 }, { "epoch": 0.6776154636950921, "grad_norm": 0.341796875, "learning_rate": 0.0002508309765619267, "loss": 0.4749, "step": 26695 }, { "epoch": 0.6777423817441078, "grad_norm": 0.357421875, "learning_rate": 0.0002508063736613979, "loss": 0.477, "step": 26700 }, { "epoch": 0.6778692997931236, "grad_norm": 0.330078125, "learning_rate": 0.0002507817658143405, "loss": 0.4591, "step": 26705 }, { "epoch": 0.6779962178421394, "grad_norm": 0.33203125, "learning_rate": 0.00025075715302196196, "loss": 0.4694, "step": 26710 }, { "epoch": 0.678123135891155, "grad_norm": 0.333984375, "learning_rate": 0.0002507325352854701, "loss": 0.4493, "step": 26715 }, { "epoch": 0.6782500539401708, "grad_norm": 0.32421875, "learning_rate": 0.00025070791260607293, "loss": 0.4434, "step": 26720 }, { "epoch": 0.6783769719891866, "grad_norm": 0.353515625, "learning_rate": 0.00025068328498497846, "loss": 0.4772, "step": 26725 }, { "epoch": 0.6785038900382023, "grad_norm": 0.35546875, "learning_rate": 0.0002506586524233954, "loss": 0.451, "step": 26730 }, { "epoch": 0.6786308080872181, "grad_norm": 0.345703125, "learning_rate": 0.0002506340149225323, "loss": 0.4344, "step": 26735 }, { "epoch": 0.6787577261362339, "grad_norm": 0.349609375, "learning_rate": 0.00025060937248359816, "loss": 0.4526, "step": 26740 }, { "epoch": 0.6788846441852496, "grad_norm": 0.33984375, "learning_rate": 0.00025058472510780224, "loss": 0.4377, "step": 26745 }, { "epoch": 0.6790115622342653, "grad_norm": 0.326171875, "learning_rate": 0.00025056007279635395, "loss": 0.46, "step": 26750 }, { "epoch": 0.679138480283281, "grad_norm": 0.34375, "learning_rate": 0.0002505354155504629, "loss": 0.4879, "step": 26755 }, { "epoch": 0.6792653983322968, "grad_norm": 0.32421875, "learning_rate": 0.00025051075337133907, "loss": 0.4739, "step": 26760 }, { "epoch": 0.6793923163813126, "grad_norm": 0.328125, "learning_rate": 0.0002504860862601926, "loss": 0.4408, "step": 26765 }, { "epoch": 0.6795192344303284, "grad_norm": 0.34765625, "learning_rate": 0.00025046141421823393, "loss": 0.4801, "step": 26770 }, { "epoch": 0.6796461524793441, "grad_norm": 0.34765625, "learning_rate": 0.00025043673724667363, "loss": 0.4435, "step": 26775 }, { "epoch": 0.6797730705283599, "grad_norm": 0.333984375, "learning_rate": 0.00025041205534672274, "loss": 0.4509, "step": 26780 }, { "epoch": 0.6798999885773755, "grad_norm": 0.32421875, "learning_rate": 0.0002503873685195922, "loss": 0.4451, "step": 26785 }, { "epoch": 0.6800269066263913, "grad_norm": 0.322265625, "learning_rate": 0.0002503626767664935, "loss": 0.4388, "step": 26790 }, { "epoch": 0.6801538246754071, "grad_norm": 0.361328125, "learning_rate": 0.00025033798008863823, "loss": 0.4527, "step": 26795 }, { "epoch": 0.6802807427244228, "grad_norm": 0.333984375, "learning_rate": 0.00025031327848723826, "loss": 0.4571, "step": 26800 }, { "epoch": 0.6804076607734386, "grad_norm": 0.35546875, "learning_rate": 0.0002502885719635057, "loss": 0.4538, "step": 26805 }, { "epoch": 0.6805345788224544, "grad_norm": 0.35546875, "learning_rate": 0.0002502638605186528, "loss": 0.4442, "step": 26810 }, { "epoch": 0.68066149687147, "grad_norm": 0.33984375, "learning_rate": 0.0002502391441538922, "loss": 0.4629, "step": 26815 }, { "epoch": 0.6807884149204858, "grad_norm": 0.341796875, "learning_rate": 0.00025021442287043676, "loss": 0.4573, "step": 26820 }, { "epoch": 0.6809153329695016, "grad_norm": 0.330078125, "learning_rate": 0.00025018969666949954, "loss": 0.4548, "step": 26825 }, { "epoch": 0.6810422510185173, "grad_norm": 0.462890625, "learning_rate": 0.00025016496555229376, "loss": 0.4488, "step": 26830 }, { "epoch": 0.6811691690675331, "grad_norm": 0.380859375, "learning_rate": 0.00025014022952003307, "loss": 0.4712, "step": 26835 }, { "epoch": 0.6812960871165489, "grad_norm": 0.345703125, "learning_rate": 0.0002501154885739312, "loss": 0.4708, "step": 26840 }, { "epoch": 0.6814230051655646, "grad_norm": 0.3359375, "learning_rate": 0.00025009074271520207, "loss": 0.4782, "step": 26845 }, { "epoch": 0.6815499232145803, "grad_norm": 0.322265625, "learning_rate": 0.00025006599194506016, "loss": 0.438, "step": 26850 }, { "epoch": 0.6816768412635961, "grad_norm": 0.34375, "learning_rate": 0.00025004123626471987, "loss": 0.4558, "step": 26855 }, { "epoch": 0.6818037593126118, "grad_norm": 0.31640625, "learning_rate": 0.000250016475675396, "loss": 0.4352, "step": 26860 }, { "epoch": 0.6819306773616276, "grad_norm": 0.37890625, "learning_rate": 0.0002499917101783035, "loss": 0.4846, "step": 26865 }, { "epoch": 0.6820575954106434, "grad_norm": 0.326171875, "learning_rate": 0.00024996693977465763, "loss": 0.4673, "step": 26870 }, { "epoch": 0.6821845134596591, "grad_norm": 0.34375, "learning_rate": 0.0002499421644656738, "loss": 0.4637, "step": 26875 }, { "epoch": 0.6823114315086749, "grad_norm": 0.3671875, "learning_rate": 0.00024991738425256775, "loss": 0.4948, "step": 26880 }, { "epoch": 0.6824383495576906, "grad_norm": 0.330078125, "learning_rate": 0.00024989259913655557, "loss": 0.446, "step": 26885 }, { "epoch": 0.6825652676067063, "grad_norm": 0.326171875, "learning_rate": 0.0002498678091188532, "loss": 0.4507, "step": 26890 }, { "epoch": 0.6826921856557221, "grad_norm": 0.35546875, "learning_rate": 0.0002498430142006773, "loss": 0.4602, "step": 26895 }, { "epoch": 0.6828191037047379, "grad_norm": 0.345703125, "learning_rate": 0.00024981821438324446, "loss": 0.4489, "step": 26900 }, { "epoch": 0.6829460217537536, "grad_norm": 0.337890625, "learning_rate": 0.00024979340966777155, "loss": 0.4765, "step": 26905 }, { "epoch": 0.6830729398027694, "grad_norm": 0.326171875, "learning_rate": 0.0002497686000554759, "loss": 0.4592, "step": 26910 }, { "epoch": 0.683199857851785, "grad_norm": 0.36328125, "learning_rate": 0.00024974378554757466, "loss": 0.4489, "step": 26915 }, { "epoch": 0.6833267759008008, "grad_norm": 0.330078125, "learning_rate": 0.00024971896614528564, "loss": 0.4481, "step": 26920 }, { "epoch": 0.6834536939498166, "grad_norm": 0.322265625, "learning_rate": 0.0002496941418498267, "loss": 0.4699, "step": 26925 }, { "epoch": 0.6835806119988324, "grad_norm": 0.298828125, "learning_rate": 0.00024966931266241587, "loss": 0.4264, "step": 26930 }, { "epoch": 0.6837075300478481, "grad_norm": 0.33984375, "learning_rate": 0.00024964447858427164, "loss": 0.4369, "step": 26935 }, { "epoch": 0.6838344480968639, "grad_norm": 0.330078125, "learning_rate": 0.0002496196396166125, "loss": 0.4559, "step": 26940 }, { "epoch": 0.6839613661458797, "grad_norm": 0.37109375, "learning_rate": 0.00024959479576065736, "loss": 0.4734, "step": 26945 }, { "epoch": 0.6840882841948953, "grad_norm": 0.3359375, "learning_rate": 0.0002495699470176252, "loss": 0.4253, "step": 26950 }, { "epoch": 0.6842152022439111, "grad_norm": 0.349609375, "learning_rate": 0.00024954509338873544, "loss": 0.4294, "step": 26955 }, { "epoch": 0.6843421202929268, "grad_norm": 0.345703125, "learning_rate": 0.00024952023487520764, "loss": 0.4595, "step": 26960 }, { "epoch": 0.6844690383419426, "grad_norm": 0.37109375, "learning_rate": 0.0002494953714782615, "loss": 0.4475, "step": 26965 }, { "epoch": 0.6845959563909584, "grad_norm": 0.3203125, "learning_rate": 0.00024947050319911715, "loss": 0.4515, "step": 26970 }, { "epoch": 0.6847228744399741, "grad_norm": 0.37109375, "learning_rate": 0.00024944563003899477, "loss": 0.4434, "step": 26975 }, { "epoch": 0.6848497924889898, "grad_norm": 0.333984375, "learning_rate": 0.000249420751999115, "loss": 0.485, "step": 26980 }, { "epoch": 0.6849767105380056, "grad_norm": 0.35546875, "learning_rate": 0.0002493958690806985, "loss": 0.4605, "step": 26985 }, { "epoch": 0.6851036285870213, "grad_norm": 0.37109375, "learning_rate": 0.0002493709812849663, "loss": 0.4706, "step": 26990 }, { "epoch": 0.6852305466360371, "grad_norm": 0.34765625, "learning_rate": 0.0002493460886131397, "loss": 0.472, "step": 26995 }, { "epoch": 0.6853574646850529, "grad_norm": 0.318359375, "learning_rate": 0.00024932119106644, "loss": 0.4742, "step": 27000 }, { "epoch": 0.6854843827340686, "grad_norm": 0.33984375, "learning_rate": 0.00024929628864608905, "loss": 0.4596, "step": 27005 }, { "epoch": 0.6856113007830844, "grad_norm": 0.341796875, "learning_rate": 0.00024927138135330876, "loss": 0.4651, "step": 27010 }, { "epoch": 0.6857382188321001, "grad_norm": 0.361328125, "learning_rate": 0.0002492464691893214, "loss": 0.4679, "step": 27015 }, { "epoch": 0.6858651368811158, "grad_norm": 0.37109375, "learning_rate": 0.00024922155215534927, "loss": 0.4605, "step": 27020 }, { "epoch": 0.6859920549301316, "grad_norm": 0.306640625, "learning_rate": 0.00024919663025261516, "loss": 0.4549, "step": 27025 }, { "epoch": 0.6861189729791474, "grad_norm": 0.3203125, "learning_rate": 0.00024917170348234184, "loss": 0.449, "step": 27030 }, { "epoch": 0.6862458910281631, "grad_norm": 0.3515625, "learning_rate": 0.0002491467718457526, "loss": 0.4655, "step": 27035 }, { "epoch": 0.6863728090771789, "grad_norm": 0.32421875, "learning_rate": 0.0002491218353440707, "loss": 0.4633, "step": 27040 }, { "epoch": 0.6864997271261947, "grad_norm": 0.32421875, "learning_rate": 0.0002490968939785198, "loss": 0.4605, "step": 27045 }, { "epoch": 0.6866266451752103, "grad_norm": 0.322265625, "learning_rate": 0.0002490719477503239, "loss": 0.4204, "step": 27050 }, { "epoch": 0.6867535632242261, "grad_norm": 0.333984375, "learning_rate": 0.00024904699666070693, "loss": 0.4428, "step": 27055 }, { "epoch": 0.6868804812732419, "grad_norm": 0.373046875, "learning_rate": 0.00024902204071089326, "loss": 0.4371, "step": 27060 }, { "epoch": 0.6870073993222576, "grad_norm": 0.33203125, "learning_rate": 0.0002489970799021076, "loss": 0.4601, "step": 27065 }, { "epoch": 0.6871343173712734, "grad_norm": 0.318359375, "learning_rate": 0.00024897211423557453, "loss": 0.4698, "step": 27070 }, { "epoch": 0.6872612354202892, "grad_norm": 0.337890625, "learning_rate": 0.0002489471437125193, "loss": 0.4594, "step": 27075 }, { "epoch": 0.6873881534693048, "grad_norm": 0.326171875, "learning_rate": 0.00024892216833416713, "loss": 0.4516, "step": 27080 }, { "epoch": 0.6875150715183206, "grad_norm": 0.345703125, "learning_rate": 0.0002488971881017436, "loss": 0.4581, "step": 27085 }, { "epoch": 0.6876419895673364, "grad_norm": 0.365234375, "learning_rate": 0.00024887220301647437, "loss": 0.4394, "step": 27090 }, { "epoch": 0.6877689076163521, "grad_norm": 0.330078125, "learning_rate": 0.0002488472130795856, "loss": 0.429, "step": 27095 }, { "epoch": 0.6878958256653679, "grad_norm": 0.35546875, "learning_rate": 0.00024882221829230345, "loss": 0.4684, "step": 27100 }, { "epoch": 0.6880227437143837, "grad_norm": 0.37890625, "learning_rate": 0.00024879721865585436, "loss": 0.5074, "step": 27105 }, { "epoch": 0.6881496617633994, "grad_norm": 0.34765625, "learning_rate": 0.00024877221417146515, "loss": 0.4451, "step": 27110 }, { "epoch": 0.6882765798124151, "grad_norm": 0.318359375, "learning_rate": 0.0002487472048403627, "loss": 0.4564, "step": 27115 }, { "epoch": 0.6884034978614308, "grad_norm": 0.361328125, "learning_rate": 0.00024872219066377426, "loss": 0.4737, "step": 27120 }, { "epoch": 0.6885304159104466, "grad_norm": 0.318359375, "learning_rate": 0.0002486971716429273, "loss": 0.448, "step": 27125 }, { "epoch": 0.6886573339594624, "grad_norm": 0.34375, "learning_rate": 0.0002486721477790494, "loss": 0.4217, "step": 27130 }, { "epoch": 0.6887842520084781, "grad_norm": 0.353515625, "learning_rate": 0.0002486471190733685, "loss": 0.4408, "step": 27135 }, { "epoch": 0.6889111700574939, "grad_norm": 0.30078125, "learning_rate": 0.0002486220855271128, "loss": 0.4514, "step": 27140 }, { "epoch": 0.6890380881065096, "grad_norm": 0.36328125, "learning_rate": 0.00024859704714151056, "loss": 0.4704, "step": 27145 }, { "epoch": 0.6891650061555253, "grad_norm": 0.314453125, "learning_rate": 0.00024857200391779064, "loss": 0.4221, "step": 27150 }, { "epoch": 0.6892919242045411, "grad_norm": 0.353515625, "learning_rate": 0.00024854695585718164, "loss": 0.4084, "step": 27155 }, { "epoch": 0.6894188422535569, "grad_norm": 0.3515625, "learning_rate": 0.0002485219029609128, "loss": 0.4739, "step": 27160 }, { "epoch": 0.6895457603025726, "grad_norm": 0.330078125, "learning_rate": 0.0002484968452302134, "loss": 0.4573, "step": 27165 }, { "epoch": 0.6896726783515884, "grad_norm": 0.361328125, "learning_rate": 0.0002484717826663131, "loss": 0.4672, "step": 27170 }, { "epoch": 0.6897995964006042, "grad_norm": 0.333984375, "learning_rate": 0.0002484467152704416, "loss": 0.4337, "step": 27175 }, { "epoch": 0.6899265144496198, "grad_norm": 0.333984375, "learning_rate": 0.00024842164304382904, "loss": 0.4532, "step": 27180 }, { "epoch": 0.6900534324986356, "grad_norm": 0.330078125, "learning_rate": 0.0002483965659877056, "loss": 0.4487, "step": 27185 }, { "epoch": 0.6901803505476514, "grad_norm": 0.341796875, "learning_rate": 0.00024837148410330193, "loss": 0.4574, "step": 27190 }, { "epoch": 0.6903072685966671, "grad_norm": 0.349609375, "learning_rate": 0.0002483463973918487, "loss": 0.4584, "step": 27195 }, { "epoch": 0.6904341866456829, "grad_norm": 0.70703125, "learning_rate": 0.0002483213058545769, "loss": 0.4741, "step": 27200 }, { "epoch": 0.6905611046946987, "grad_norm": 0.375, "learning_rate": 0.0002482962094927178, "loss": 0.4634, "step": 27205 }, { "epoch": 0.6906880227437144, "grad_norm": 0.333984375, "learning_rate": 0.0002482711083075028, "loss": 0.4496, "step": 27210 }, { "epoch": 0.6908149407927301, "grad_norm": 0.328125, "learning_rate": 0.00024824600230016373, "loss": 0.4617, "step": 27215 }, { "epoch": 0.6909418588417459, "grad_norm": 0.326171875, "learning_rate": 0.00024822089147193247, "loss": 0.4129, "step": 27220 }, { "epoch": 0.6910687768907616, "grad_norm": 0.33984375, "learning_rate": 0.0002481957758240412, "loss": 0.5042, "step": 27225 }, { "epoch": 0.6911956949397774, "grad_norm": 0.3671875, "learning_rate": 0.0002481706553577222, "loss": 0.4716, "step": 27230 }, { "epoch": 0.6913226129887932, "grad_norm": 0.345703125, "learning_rate": 0.0002481455300742084, "loss": 0.4609, "step": 27235 }, { "epoch": 0.6914495310378089, "grad_norm": 0.380859375, "learning_rate": 0.0002481203999747325, "loss": 0.4738, "step": 27240 }, { "epoch": 0.6915764490868246, "grad_norm": 0.341796875, "learning_rate": 0.0002480952650605276, "loss": 0.4669, "step": 27245 }, { "epoch": 0.6917033671358404, "grad_norm": 0.330078125, "learning_rate": 0.0002480701253328272, "loss": 0.4613, "step": 27250 }, { "epoch": 0.6918302851848561, "grad_norm": 0.328125, "learning_rate": 0.00024804498079286473, "loss": 0.4633, "step": 27255 }, { "epoch": 0.6919572032338719, "grad_norm": 0.33984375, "learning_rate": 0.0002480198314418742, "loss": 0.4296, "step": 27260 }, { "epoch": 0.6920841212828877, "grad_norm": 0.34375, "learning_rate": 0.00024799467728108947, "loss": 0.473, "step": 27265 }, { "epoch": 0.6922110393319034, "grad_norm": 0.3671875, "learning_rate": 0.00024796951831174504, "loss": 0.4687, "step": 27270 }, { "epoch": 0.6923379573809192, "grad_norm": 0.333984375, "learning_rate": 0.0002479443545350754, "loss": 0.4437, "step": 27275 }, { "epoch": 0.6924648754299348, "grad_norm": 0.32421875, "learning_rate": 0.00024791918595231527, "loss": 0.4308, "step": 27280 }, { "epoch": 0.6925917934789506, "grad_norm": 0.3203125, "learning_rate": 0.0002478940125646997, "loss": 0.4391, "step": 27285 }, { "epoch": 0.6927187115279664, "grad_norm": 0.3828125, "learning_rate": 0.0002478688343734638, "loss": 0.4818, "step": 27290 }, { "epoch": 0.6928456295769821, "grad_norm": 0.359375, "learning_rate": 0.0002478436513798433, "loss": 0.4743, "step": 27295 }, { "epoch": 0.6929725476259979, "grad_norm": 0.33203125, "learning_rate": 0.00024781846358507383, "loss": 0.4785, "step": 27300 }, { "epoch": 0.6930994656750137, "grad_norm": 0.337890625, "learning_rate": 0.00024779327099039126, "loss": 0.4691, "step": 27305 }, { "epoch": 0.6932263837240294, "grad_norm": 0.34375, "learning_rate": 0.00024776807359703183, "loss": 0.4508, "step": 27310 }, { "epoch": 0.6933533017730451, "grad_norm": 0.326171875, "learning_rate": 0.00024774287140623207, "loss": 0.4385, "step": 27315 }, { "epoch": 0.6934802198220609, "grad_norm": 0.345703125, "learning_rate": 0.0002477176644192284, "loss": 0.4423, "step": 27320 }, { "epoch": 0.6936071378710766, "grad_norm": 0.3359375, "learning_rate": 0.00024769245263725796, "loss": 0.4668, "step": 27325 }, { "epoch": 0.6937340559200924, "grad_norm": 0.330078125, "learning_rate": 0.00024766723606155775, "loss": 0.4236, "step": 27330 }, { "epoch": 0.6938609739691082, "grad_norm": 0.3359375, "learning_rate": 0.0002476420146933652, "loss": 0.4492, "step": 27335 }, { "epoch": 0.6939878920181239, "grad_norm": 0.326171875, "learning_rate": 0.00024761678853391783, "loss": 0.4296, "step": 27340 }, { "epoch": 0.6941148100671396, "grad_norm": 0.33984375, "learning_rate": 0.0002475915575844536, "loss": 0.4605, "step": 27345 }, { "epoch": 0.6942417281161554, "grad_norm": 0.30859375, "learning_rate": 0.0002475663218462105, "loss": 0.4456, "step": 27350 }, { "epoch": 0.6943686461651711, "grad_norm": 0.37109375, "learning_rate": 0.00024754108132042684, "loss": 0.4847, "step": 27355 }, { "epoch": 0.6944955642141869, "grad_norm": 0.337890625, "learning_rate": 0.00024751583600834113, "loss": 0.4585, "step": 27360 }, { "epoch": 0.6946224822632027, "grad_norm": 0.33984375, "learning_rate": 0.00024749058591119223, "loss": 0.4384, "step": 27365 }, { "epoch": 0.6947494003122184, "grad_norm": 0.345703125, "learning_rate": 0.0002474653310302191, "loss": 0.4577, "step": 27370 }, { "epoch": 0.6948763183612342, "grad_norm": 0.3359375, "learning_rate": 0.00024744007136666104, "loss": 0.4585, "step": 27375 }, { "epoch": 0.6950032364102499, "grad_norm": 0.349609375, "learning_rate": 0.00024741480692175744, "loss": 0.4662, "step": 27380 }, { "epoch": 0.6951301544592656, "grad_norm": 0.3203125, "learning_rate": 0.0002473895376967481, "loss": 0.4333, "step": 27385 }, { "epoch": 0.6952570725082814, "grad_norm": 0.33984375, "learning_rate": 0.00024736426369287296, "loss": 0.4676, "step": 27390 }, { "epoch": 0.6953839905572972, "grad_norm": 0.345703125, "learning_rate": 0.0002473389849113721, "loss": 0.427, "step": 27395 }, { "epoch": 0.6955109086063129, "grad_norm": 0.318359375, "learning_rate": 0.0002473137013534861, "loss": 0.4183, "step": 27400 }, { "epoch": 0.6956378266553287, "grad_norm": 0.5234375, "learning_rate": 0.00024728841302045553, "loss": 0.4542, "step": 27405 }, { "epoch": 0.6957647447043444, "grad_norm": 0.3203125, "learning_rate": 0.00024726311991352123, "loss": 0.4306, "step": 27410 }, { "epoch": 0.6958916627533601, "grad_norm": 0.37890625, "learning_rate": 0.00024723782203392447, "loss": 0.4544, "step": 27415 }, { "epoch": 0.6960185808023759, "grad_norm": 0.35546875, "learning_rate": 0.00024721251938290644, "loss": 0.4416, "step": 27420 }, { "epoch": 0.6961454988513917, "grad_norm": 0.3359375, "learning_rate": 0.0002471872119617089, "loss": 0.4595, "step": 27425 }, { "epoch": 0.6962724169004074, "grad_norm": 0.376953125, "learning_rate": 0.00024716189977157347, "loss": 0.47, "step": 27430 }, { "epoch": 0.6963993349494232, "grad_norm": 0.404296875, "learning_rate": 0.0002471365828137424, "loss": 0.4712, "step": 27435 }, { "epoch": 0.696526252998439, "grad_norm": 0.34375, "learning_rate": 0.0002471112610894579, "loss": 0.4543, "step": 27440 }, { "epoch": 0.6966531710474546, "grad_norm": 0.365234375, "learning_rate": 0.00024708593459996244, "loss": 0.4606, "step": 27445 }, { "epoch": 0.6967800890964704, "grad_norm": 0.330078125, "learning_rate": 0.0002470606033464989, "loss": 0.4561, "step": 27450 }, { "epoch": 0.6969070071454861, "grad_norm": 0.328125, "learning_rate": 0.00024703526733031023, "loss": 0.4408, "step": 27455 }, { "epoch": 0.6970339251945019, "grad_norm": 0.328125, "learning_rate": 0.00024700992655263954, "loss": 0.4392, "step": 27460 }, { "epoch": 0.6971608432435177, "grad_norm": 0.337890625, "learning_rate": 0.00024698458101473053, "loss": 0.4597, "step": 27465 }, { "epoch": 0.6972877612925334, "grad_norm": 0.341796875, "learning_rate": 0.00024695923071782667, "loss": 0.4573, "step": 27470 }, { "epoch": 0.6974146793415492, "grad_norm": 0.353515625, "learning_rate": 0.00024693387566317203, "loss": 0.4686, "step": 27475 }, { "epoch": 0.6975415973905649, "grad_norm": 0.33203125, "learning_rate": 0.00024690851585201074, "loss": 0.4239, "step": 27480 }, { "epoch": 0.6976685154395806, "grad_norm": 0.318359375, "learning_rate": 0.0002468831512855871, "loss": 0.4511, "step": 27485 }, { "epoch": 0.6977954334885964, "grad_norm": 0.3359375, "learning_rate": 0.0002468577819651459, "loss": 0.4298, "step": 27490 }, { "epoch": 0.6979223515376122, "grad_norm": 0.359375, "learning_rate": 0.00024683240789193187, "loss": 0.4806, "step": 27495 }, { "epoch": 0.6980492695866279, "grad_norm": 0.3203125, "learning_rate": 0.0002468070290671902, "loss": 0.4434, "step": 27500 }, { "epoch": 0.6981761876356437, "grad_norm": 0.32421875, "learning_rate": 0.0002467816454921662, "loss": 0.4277, "step": 27505 }, { "epoch": 0.6983031056846594, "grad_norm": 0.3359375, "learning_rate": 0.0002467562571681053, "loss": 0.4542, "step": 27510 }, { "epoch": 0.6984300237336751, "grad_norm": 0.34375, "learning_rate": 0.00024673086409625344, "loss": 0.4832, "step": 27515 }, { "epoch": 0.6985569417826909, "grad_norm": 0.337890625, "learning_rate": 0.00024670546627785666, "loss": 0.4507, "step": 27520 }, { "epoch": 0.6986838598317067, "grad_norm": 0.337890625, "learning_rate": 0.00024668006371416116, "loss": 0.4613, "step": 27525 }, { "epoch": 0.6988107778807224, "grad_norm": 0.345703125, "learning_rate": 0.00024665465640641345, "loss": 0.4582, "step": 27530 }, { "epoch": 0.6989376959297382, "grad_norm": 0.34375, "learning_rate": 0.0002466292443558602, "loss": 0.4683, "step": 27535 }, { "epoch": 0.699064613978754, "grad_norm": 0.3203125, "learning_rate": 0.00024660382756374846, "loss": 0.4333, "step": 27540 }, { "epoch": 0.6991915320277696, "grad_norm": 0.3359375, "learning_rate": 0.0002465784060313254, "loss": 0.4426, "step": 27545 }, { "epoch": 0.6993184500767854, "grad_norm": 0.365234375, "learning_rate": 0.0002465529797598384, "loss": 0.4711, "step": 27550 }, { "epoch": 0.6994453681258012, "grad_norm": 0.337890625, "learning_rate": 0.00024652754875053516, "loss": 0.4673, "step": 27555 }, { "epoch": 0.6995722861748169, "grad_norm": 0.349609375, "learning_rate": 0.0002465021130046635, "loss": 0.4507, "step": 27560 }, { "epoch": 0.6996992042238327, "grad_norm": 0.326171875, "learning_rate": 0.0002464766725234717, "loss": 0.4698, "step": 27565 }, { "epoch": 0.6998261222728485, "grad_norm": 0.3125, "learning_rate": 0.0002464512273082079, "loss": 0.4388, "step": 27570 }, { "epoch": 0.6999530403218641, "grad_norm": 0.349609375, "learning_rate": 0.00024642577736012087, "loss": 0.4576, "step": 27575 }, { "epoch": 0.7000799583708799, "grad_norm": 0.32421875, "learning_rate": 0.0002464003226804593, "loss": 0.4307, "step": 27580 }, { "epoch": 0.7002068764198957, "grad_norm": 0.328125, "learning_rate": 0.0002463748632704723, "loss": 0.4395, "step": 27585 }, { "epoch": 0.7003337944689114, "grad_norm": 0.33203125, "learning_rate": 0.0002463493991314092, "loss": 0.4716, "step": 27590 }, { "epoch": 0.7004607125179272, "grad_norm": 0.3125, "learning_rate": 0.00024632393026451945, "loss": 0.4468, "step": 27595 }, { "epoch": 0.700587630566943, "grad_norm": 0.34375, "learning_rate": 0.0002462984566710528, "loss": 0.4541, "step": 27600 }, { "epoch": 0.7007145486159587, "grad_norm": 0.33984375, "learning_rate": 0.00024627297835225925, "loss": 0.4414, "step": 27605 }, { "epoch": 0.7008414666649744, "grad_norm": 0.349609375, "learning_rate": 0.000246247495309389, "loss": 0.466, "step": 27610 }, { "epoch": 0.7009683847139901, "grad_norm": 0.34375, "learning_rate": 0.00024622200754369244, "loss": 0.4536, "step": 27615 }, { "epoch": 0.7010953027630059, "grad_norm": 0.345703125, "learning_rate": 0.0002461965150564204, "loss": 0.4572, "step": 27620 }, { "epoch": 0.7012222208120217, "grad_norm": 0.31640625, "learning_rate": 0.0002461710178488236, "loss": 0.4605, "step": 27625 }, { "epoch": 0.7013491388610374, "grad_norm": 0.341796875, "learning_rate": 0.00024614551592215333, "loss": 0.4638, "step": 27630 }, { "epoch": 0.7014760569100532, "grad_norm": 0.3359375, "learning_rate": 0.00024612000927766084, "loss": 0.453, "step": 27635 }, { "epoch": 0.701602974959069, "grad_norm": 0.353515625, "learning_rate": 0.0002460944979165978, "loss": 0.4758, "step": 27640 }, { "epoch": 0.7017298930080846, "grad_norm": 0.318359375, "learning_rate": 0.000246068981840216, "loss": 0.4216, "step": 27645 }, { "epoch": 0.7018568110571004, "grad_norm": 0.369140625, "learning_rate": 0.0002460434610497676, "loss": 0.4625, "step": 27650 }, { "epoch": 0.7019837291061162, "grad_norm": 0.341796875, "learning_rate": 0.00024601793554650475, "loss": 0.434, "step": 27655 }, { "epoch": 0.7021106471551319, "grad_norm": 0.361328125, "learning_rate": 0.0002459924053316801, "loss": 0.4468, "step": 27660 }, { "epoch": 0.7022375652041477, "grad_norm": 0.30078125, "learning_rate": 0.00024596687040654634, "loss": 0.4227, "step": 27665 }, { "epoch": 0.7023644832531635, "grad_norm": 0.33984375, "learning_rate": 0.00024594133077235645, "loss": 0.4582, "step": 27670 }, { "epoch": 0.7024914013021791, "grad_norm": 0.35546875, "learning_rate": 0.0002459157864303637, "loss": 0.4645, "step": 27675 }, { "epoch": 0.7026183193511949, "grad_norm": 0.337890625, "learning_rate": 0.0002458902373818215, "loss": 0.4495, "step": 27680 }, { "epoch": 0.7027452374002107, "grad_norm": 0.341796875, "learning_rate": 0.0002458646836279836, "loss": 0.4322, "step": 27685 }, { "epoch": 0.7028721554492264, "grad_norm": 0.337890625, "learning_rate": 0.0002458391251701038, "loss": 0.4525, "step": 27690 }, { "epoch": 0.7029990734982422, "grad_norm": 0.333984375, "learning_rate": 0.0002458135620094364, "loss": 0.4499, "step": 27695 }, { "epoch": 0.703125991547258, "grad_norm": 0.310546875, "learning_rate": 0.0002457879941472356, "loss": 0.4717, "step": 27700 }, { "epoch": 0.7032529095962737, "grad_norm": 0.36328125, "learning_rate": 0.0002457624215847561, "loss": 0.4336, "step": 27705 }, { "epoch": 0.7033798276452894, "grad_norm": 0.330078125, "learning_rate": 0.0002457368443232527, "loss": 0.4389, "step": 27710 }, { "epoch": 0.7035067456943052, "grad_norm": 0.28515625, "learning_rate": 0.0002457112623639805, "loss": 0.4303, "step": 27715 }, { "epoch": 0.7036336637433209, "grad_norm": 0.328125, "learning_rate": 0.00024568567570819474, "loss": 0.4538, "step": 27720 }, { "epoch": 0.7037605817923367, "grad_norm": 0.3359375, "learning_rate": 0.000245660084357151, "loss": 0.431, "step": 27725 }, { "epoch": 0.7038874998413525, "grad_norm": 0.359375, "learning_rate": 0.0002456344883121051, "loss": 0.4985, "step": 27730 }, { "epoch": 0.7040144178903682, "grad_norm": 0.341796875, "learning_rate": 0.0002456088875743129, "loss": 0.4505, "step": 27735 }, { "epoch": 0.704141335939384, "grad_norm": 0.345703125, "learning_rate": 0.0002455832821450307, "loss": 0.4648, "step": 27740 }, { "epoch": 0.7042682539883997, "grad_norm": 0.33203125, "learning_rate": 0.0002455576720255149, "loss": 0.4761, "step": 27745 }, { "epoch": 0.7043951720374154, "grad_norm": 0.359375, "learning_rate": 0.00024553205721702216, "loss": 0.4797, "step": 27750 }, { "epoch": 0.7045220900864312, "grad_norm": 0.376953125, "learning_rate": 0.0002455064377208095, "loss": 0.469, "step": 27755 }, { "epoch": 0.704649008135447, "grad_norm": 0.34375, "learning_rate": 0.00024548081353813395, "loss": 0.4639, "step": 27760 }, { "epoch": 0.7047759261844627, "grad_norm": 0.341796875, "learning_rate": 0.00024545518467025294, "loss": 0.4725, "step": 27765 }, { "epoch": 0.7049028442334785, "grad_norm": 0.341796875, "learning_rate": 0.000245429551118424, "loss": 0.4735, "step": 27770 }, { "epoch": 0.7050297622824941, "grad_norm": 0.3359375, "learning_rate": 0.000245403912883905, "loss": 0.4293, "step": 27775 }, { "epoch": 0.7051566803315099, "grad_norm": 0.322265625, "learning_rate": 0.000245378269967954, "loss": 0.4596, "step": 27780 }, { "epoch": 0.7052835983805257, "grad_norm": 0.3359375, "learning_rate": 0.0002453526223718293, "loss": 0.4547, "step": 27785 }, { "epoch": 0.7054105164295414, "grad_norm": 0.345703125, "learning_rate": 0.0002453269700967894, "loss": 0.4341, "step": 27790 }, { "epoch": 0.7055374344785572, "grad_norm": 0.32421875, "learning_rate": 0.0002453013131440931, "loss": 0.4599, "step": 27795 }, { "epoch": 0.705664352527573, "grad_norm": 0.3359375, "learning_rate": 0.00024527565151499927, "loss": 0.4452, "step": 27800 }, { "epoch": 0.7057912705765887, "grad_norm": 0.3359375, "learning_rate": 0.0002452499852107672, "loss": 0.4694, "step": 27805 }, { "epoch": 0.7059181886256044, "grad_norm": 0.369140625, "learning_rate": 0.0002452243142326562, "loss": 0.4657, "step": 27810 }, { "epoch": 0.7060451066746202, "grad_norm": 0.326171875, "learning_rate": 0.0002451986385819261, "loss": 0.4119, "step": 27815 }, { "epoch": 0.7061720247236359, "grad_norm": 0.330078125, "learning_rate": 0.00024517295825983676, "loss": 0.4585, "step": 27820 }, { "epoch": 0.7062989427726517, "grad_norm": 0.3828125, "learning_rate": 0.0002451472732676483, "loss": 0.4913, "step": 27825 }, { "epoch": 0.7064258608216675, "grad_norm": 0.298828125, "learning_rate": 0.000245121583606621, "loss": 0.4272, "step": 27830 }, { "epoch": 0.7065527788706832, "grad_norm": 0.34765625, "learning_rate": 0.0002450958892780154, "loss": 0.4736, "step": 27835 }, { "epoch": 0.7066796969196989, "grad_norm": 0.330078125, "learning_rate": 0.0002450701902830925, "loss": 0.4942, "step": 27840 }, { "epoch": 0.7068066149687147, "grad_norm": 0.7265625, "learning_rate": 0.0002450444866231132, "loss": 0.4672, "step": 27845 }, { "epoch": 0.7069335330177304, "grad_norm": 0.341796875, "learning_rate": 0.00024501877829933883, "loss": 0.473, "step": 27850 }, { "epoch": 0.7070604510667462, "grad_norm": 0.365234375, "learning_rate": 0.0002449930653130308, "loss": 0.453, "step": 27855 }, { "epoch": 0.707187369115762, "grad_norm": 0.357421875, "learning_rate": 0.0002449673476654509, "loss": 0.4607, "step": 27860 }, { "epoch": 0.7073142871647777, "grad_norm": 0.349609375, "learning_rate": 0.00024494162535786113, "loss": 0.4575, "step": 27865 }, { "epoch": 0.7074412052137935, "grad_norm": 0.322265625, "learning_rate": 0.0002449158983915236, "loss": 0.4233, "step": 27870 }, { "epoch": 0.7075681232628092, "grad_norm": 0.34765625, "learning_rate": 0.00024489016676770077, "loss": 0.46, "step": 27875 }, { "epoch": 0.7076950413118249, "grad_norm": 0.3125, "learning_rate": 0.0002448644304876552, "loss": 0.4167, "step": 27880 }, { "epoch": 0.7078219593608407, "grad_norm": 0.330078125, "learning_rate": 0.00024483868955264983, "loss": 0.4254, "step": 27885 }, { "epoch": 0.7079488774098565, "grad_norm": 0.349609375, "learning_rate": 0.00024481294396394777, "loss": 0.4665, "step": 27890 }, { "epoch": 0.7080757954588722, "grad_norm": 0.33984375, "learning_rate": 0.0002447871937228123, "loss": 0.4459, "step": 27895 }, { "epoch": 0.708202713507888, "grad_norm": 0.337890625, "learning_rate": 0.000244761438830507, "loss": 0.4583, "step": 27900 }, { "epoch": 0.7083296315569038, "grad_norm": 0.3359375, "learning_rate": 0.00024473567928829564, "loss": 0.4707, "step": 27905 }, { "epoch": 0.7084565496059194, "grad_norm": 0.376953125, "learning_rate": 0.00024470991509744223, "loss": 0.4593, "step": 27910 }, { "epoch": 0.7085834676549352, "grad_norm": 0.310546875, "learning_rate": 0.00024468414625921096, "loss": 0.4417, "step": 27915 }, { "epoch": 0.708710385703951, "grad_norm": 0.36328125, "learning_rate": 0.00024465837277486645, "loss": 0.4823, "step": 27920 }, { "epoch": 0.7088373037529667, "grad_norm": 0.33984375, "learning_rate": 0.0002446325946456732, "loss": 0.4349, "step": 27925 }, { "epoch": 0.7089642218019825, "grad_norm": 0.359375, "learning_rate": 0.0002446068118728963, "loss": 0.4184, "step": 27930 }, { "epoch": 0.7090911398509983, "grad_norm": 0.328125, "learning_rate": 0.0002445810244578008, "loss": 0.4499, "step": 27935 }, { "epoch": 0.7092180579000139, "grad_norm": 0.3203125, "learning_rate": 0.0002445552324016521, "loss": 0.4297, "step": 27940 }, { "epoch": 0.7093449759490297, "grad_norm": 0.341796875, "learning_rate": 0.0002445294357057158, "loss": 0.4835, "step": 27945 }, { "epoch": 0.7094718939980454, "grad_norm": 0.34375, "learning_rate": 0.0002445036343712578, "loss": 0.4628, "step": 27950 }, { "epoch": 0.7095988120470612, "grad_norm": 0.328125, "learning_rate": 0.00024447782839954405, "loss": 0.4697, "step": 27955 }, { "epoch": 0.709725730096077, "grad_norm": 0.35546875, "learning_rate": 0.0002444520177918409, "loss": 0.4593, "step": 27960 }, { "epoch": 0.7098526481450927, "grad_norm": 0.376953125, "learning_rate": 0.0002444262025494149, "loss": 0.4498, "step": 27965 }, { "epoch": 0.7099795661941085, "grad_norm": 0.333984375, "learning_rate": 0.00024440038267353276, "loss": 0.4947, "step": 27970 }, { "epoch": 0.7101064842431242, "grad_norm": 0.3515625, "learning_rate": 0.0002443745581654614, "loss": 0.4763, "step": 27975 }, { "epoch": 0.7102334022921399, "grad_norm": 0.3515625, "learning_rate": 0.00024434872902646807, "loss": 0.4697, "step": 27980 }, { "epoch": 0.7103603203411557, "grad_norm": 0.296875, "learning_rate": 0.0002443228952578202, "loss": 0.4418, "step": 27985 }, { "epoch": 0.7104872383901715, "grad_norm": 0.34765625, "learning_rate": 0.00024429705686078543, "loss": 0.4314, "step": 27990 }, { "epoch": 0.7106141564391872, "grad_norm": 0.310546875, "learning_rate": 0.0002442712138366316, "loss": 0.4506, "step": 27995 }, { "epoch": 0.710741074488203, "grad_norm": 0.3359375, "learning_rate": 0.00024424536618662695, "loss": 0.4355, "step": 28000 }, { "epoch": 0.7108679925372187, "grad_norm": 0.337890625, "learning_rate": 0.00024421951391203966, "loss": 0.4521, "step": 28005 }, { "epoch": 0.7109949105862344, "grad_norm": 0.318359375, "learning_rate": 0.0002441936570141384, "loss": 0.4606, "step": 28010 }, { "epoch": 0.7111218286352502, "grad_norm": 0.365234375, "learning_rate": 0.0002441677954941919, "loss": 0.4709, "step": 28015 }, { "epoch": 0.711248746684266, "grad_norm": 0.298828125, "learning_rate": 0.00024414192935346915, "loss": 0.4209, "step": 28020 }, { "epoch": 0.7113756647332817, "grad_norm": 0.359375, "learning_rate": 0.0002441160585932394, "loss": 0.4576, "step": 28025 }, { "epoch": 0.7115025827822975, "grad_norm": 0.33203125, "learning_rate": 0.00024409018321477219, "loss": 0.4099, "step": 28030 }, { "epoch": 0.7116295008313133, "grad_norm": 0.357421875, "learning_rate": 0.00024406430321933717, "loss": 0.4214, "step": 28035 }, { "epoch": 0.7117564188803289, "grad_norm": 0.33203125, "learning_rate": 0.00024403841860820428, "loss": 0.4659, "step": 28040 }, { "epoch": 0.7118833369293447, "grad_norm": 0.31640625, "learning_rate": 0.00024401252938264362, "loss": 0.4307, "step": 28045 }, { "epoch": 0.7120102549783605, "grad_norm": 0.3359375, "learning_rate": 0.00024398663554392558, "loss": 0.4489, "step": 28050 }, { "epoch": 0.7121371730273762, "grad_norm": 0.361328125, "learning_rate": 0.0002439607370933208, "loss": 0.4517, "step": 28055 }, { "epoch": 0.712264091076392, "grad_norm": 0.32421875, "learning_rate": 0.0002439348340321001, "loss": 0.4499, "step": 28060 }, { "epoch": 0.7123910091254078, "grad_norm": 0.353515625, "learning_rate": 0.00024390892636153447, "loss": 0.4525, "step": 28065 }, { "epoch": 0.7125179271744235, "grad_norm": 0.31640625, "learning_rate": 0.00024388301408289525, "loss": 0.4351, "step": 28070 }, { "epoch": 0.7126448452234392, "grad_norm": 0.33984375, "learning_rate": 0.0002438570971974539, "loss": 0.4585, "step": 28075 }, { "epoch": 0.712771763272455, "grad_norm": 0.384765625, "learning_rate": 0.0002438311757064822, "loss": 0.4811, "step": 28080 }, { "epoch": 0.7128986813214707, "grad_norm": 0.3203125, "learning_rate": 0.00024380524961125207, "loss": 0.487, "step": 28085 }, { "epoch": 0.7130255993704865, "grad_norm": 0.36328125, "learning_rate": 0.00024377931891303575, "loss": 0.4734, "step": 28090 }, { "epoch": 0.7131525174195023, "grad_norm": 0.3359375, "learning_rate": 0.00024375338361310554, "loss": 0.469, "step": 28095 }, { "epoch": 0.713279435468518, "grad_norm": 0.369140625, "learning_rate": 0.0002437274437127342, "loss": 0.4793, "step": 28100 }, { "epoch": 0.7134063535175337, "grad_norm": 0.35546875, "learning_rate": 0.0002437014992131945, "loss": 0.4924, "step": 28105 }, { "epoch": 0.7135332715665494, "grad_norm": 0.36328125, "learning_rate": 0.0002436755501157596, "loss": 0.45, "step": 28110 }, { "epoch": 0.7136601896155652, "grad_norm": 0.328125, "learning_rate": 0.00024364959642170274, "loss": 0.4411, "step": 28115 }, { "epoch": 0.713787107664581, "grad_norm": 0.361328125, "learning_rate": 0.0002436236381322975, "loss": 0.4811, "step": 28120 }, { "epoch": 0.7139140257135967, "grad_norm": 0.359375, "learning_rate": 0.0002435976752488176, "loss": 0.4807, "step": 28125 }, { "epoch": 0.7140409437626125, "grad_norm": 0.369140625, "learning_rate": 0.00024357170777253707, "loss": 0.4629, "step": 28130 }, { "epoch": 0.7141678618116283, "grad_norm": 0.3671875, "learning_rate": 0.00024354573570473017, "loss": 0.4761, "step": 28135 }, { "epoch": 0.7142947798606439, "grad_norm": 0.328125, "learning_rate": 0.0002435197590466712, "loss": 0.4407, "step": 28140 }, { "epoch": 0.7144216979096597, "grad_norm": 0.34375, "learning_rate": 0.00024349377779963493, "loss": 0.4628, "step": 28145 }, { "epoch": 0.7145486159586755, "grad_norm": 0.34375, "learning_rate": 0.00024346779196489626, "loss": 0.4604, "step": 28150 }, { "epoch": 0.7146755340076912, "grad_norm": 0.35546875, "learning_rate": 0.00024344180154373024, "loss": 0.4655, "step": 28155 }, { "epoch": 0.714802452056707, "grad_norm": 0.37109375, "learning_rate": 0.00024341580653741224, "loss": 0.4903, "step": 28160 }, { "epoch": 0.7149293701057228, "grad_norm": 0.34765625, "learning_rate": 0.00024338980694721786, "loss": 0.4652, "step": 28165 }, { "epoch": 0.7150562881547385, "grad_norm": 0.3359375, "learning_rate": 0.00024336380277442283, "loss": 0.461, "step": 28170 }, { "epoch": 0.7151832062037542, "grad_norm": 0.318359375, "learning_rate": 0.00024333779402030317, "loss": 0.4752, "step": 28175 }, { "epoch": 0.71531012425277, "grad_norm": 0.359375, "learning_rate": 0.00024331178068613517, "loss": 0.452, "step": 28180 }, { "epoch": 0.7154370423017857, "grad_norm": 0.34375, "learning_rate": 0.00024328576277319523, "loss": 0.4767, "step": 28185 }, { "epoch": 0.7155639603508015, "grad_norm": 0.330078125, "learning_rate": 0.0002432597402827601, "loss": 0.4326, "step": 28190 }, { "epoch": 0.7156908783998173, "grad_norm": 0.35546875, "learning_rate": 0.00024323371321610662, "loss": 0.4396, "step": 28195 }, { "epoch": 0.715817796448833, "grad_norm": 0.333984375, "learning_rate": 0.00024320768157451206, "loss": 0.4405, "step": 28200 }, { "epoch": 0.7159447144978487, "grad_norm": 0.369140625, "learning_rate": 0.0002431816453592536, "loss": 0.465, "step": 28205 }, { "epoch": 0.7160716325468645, "grad_norm": 0.365234375, "learning_rate": 0.00024315560457160892, "loss": 0.4846, "step": 28210 }, { "epoch": 0.7161985505958802, "grad_norm": 0.330078125, "learning_rate": 0.00024312955921285587, "loss": 0.4622, "step": 28215 }, { "epoch": 0.716325468644896, "grad_norm": 0.34375, "learning_rate": 0.00024310350928427246, "loss": 0.4636, "step": 28220 }, { "epoch": 0.7164523866939118, "grad_norm": 0.349609375, "learning_rate": 0.00024307745478713692, "loss": 0.4549, "step": 28225 }, { "epoch": 0.7165793047429275, "grad_norm": 0.326171875, "learning_rate": 0.00024305139572272774, "loss": 0.4265, "step": 28230 }, { "epoch": 0.7167062227919433, "grad_norm": 0.390625, "learning_rate": 0.00024302533209232368, "loss": 0.4771, "step": 28235 }, { "epoch": 0.716833140840959, "grad_norm": 0.349609375, "learning_rate": 0.00024299926389720355, "loss": 0.4634, "step": 28240 }, { "epoch": 0.7169600588899747, "grad_norm": 0.369140625, "learning_rate": 0.00024297319113864666, "loss": 0.4642, "step": 28245 }, { "epoch": 0.7170869769389905, "grad_norm": 0.34765625, "learning_rate": 0.00024294711381793227, "loss": 0.4633, "step": 28250 }, { "epoch": 0.7172138949880063, "grad_norm": 0.341796875, "learning_rate": 0.00024292103193634005, "loss": 0.4523, "step": 28255 }, { "epoch": 0.717340813037022, "grad_norm": 0.34765625, "learning_rate": 0.0002428949454951498, "loss": 0.477, "step": 28260 }, { "epoch": 0.7174677310860378, "grad_norm": 0.330078125, "learning_rate": 0.00024286885449564157, "loss": 0.468, "step": 28265 }, { "epoch": 0.7175946491350534, "grad_norm": 0.33984375, "learning_rate": 0.00024284275893909565, "loss": 0.4628, "step": 28270 }, { "epoch": 0.7177215671840692, "grad_norm": 0.34375, "learning_rate": 0.00024281665882679257, "loss": 0.4324, "step": 28275 }, { "epoch": 0.717848485233085, "grad_norm": 0.330078125, "learning_rate": 0.00024279055416001296, "loss": 0.4538, "step": 28280 }, { "epoch": 0.7179754032821007, "grad_norm": 0.361328125, "learning_rate": 0.00024276444494003785, "loss": 0.487, "step": 28285 }, { "epoch": 0.7181023213311165, "grad_norm": 0.36328125, "learning_rate": 0.00024273833116814837, "loss": 0.4598, "step": 28290 }, { "epoch": 0.7182292393801323, "grad_norm": 0.373046875, "learning_rate": 0.00024271221284562593, "loss": 0.4606, "step": 28295 }, { "epoch": 0.718356157429148, "grad_norm": 0.326171875, "learning_rate": 0.00024268608997375212, "loss": 0.463, "step": 28300 }, { "epoch": 0.7184830754781637, "grad_norm": 0.34765625, "learning_rate": 0.00024265996255380882, "loss": 0.4389, "step": 28305 }, { "epoch": 0.7186099935271795, "grad_norm": 0.349609375, "learning_rate": 0.00024263383058707803, "loss": 0.4711, "step": 28310 }, { "epoch": 0.7187369115761952, "grad_norm": 0.34765625, "learning_rate": 0.00024260769407484208, "loss": 0.4681, "step": 28315 }, { "epoch": 0.718863829625211, "grad_norm": 0.373046875, "learning_rate": 0.00024258155301838354, "loss": 0.453, "step": 28320 }, { "epoch": 0.7189907476742268, "grad_norm": 0.31640625, "learning_rate": 0.00024255540741898498, "loss": 0.4446, "step": 28325 }, { "epoch": 0.7191176657232425, "grad_norm": 0.330078125, "learning_rate": 0.0002425292572779295, "loss": 0.4339, "step": 28330 }, { "epoch": 0.7192445837722583, "grad_norm": 0.326171875, "learning_rate": 0.0002425031025965002, "loss": 0.4832, "step": 28335 }, { "epoch": 0.719371501821274, "grad_norm": 0.35546875, "learning_rate": 0.00024247694337598048, "loss": 0.4993, "step": 28340 }, { "epoch": 0.7194984198702897, "grad_norm": 0.33203125, "learning_rate": 0.00024245077961765406, "loss": 0.4559, "step": 28345 }, { "epoch": 0.7196253379193055, "grad_norm": 0.3515625, "learning_rate": 0.00024242461132280467, "loss": 0.4509, "step": 28350 }, { "epoch": 0.7197522559683213, "grad_norm": 0.291015625, "learning_rate": 0.00024239843849271643, "loss": 0.4172, "step": 28355 }, { "epoch": 0.719879174017337, "grad_norm": 0.322265625, "learning_rate": 0.0002423722611286736, "loss": 0.457, "step": 28360 }, { "epoch": 0.7200060920663528, "grad_norm": 0.34375, "learning_rate": 0.0002423460792319607, "loss": 0.4512, "step": 28365 }, { "epoch": 0.7201330101153685, "grad_norm": 0.34375, "learning_rate": 0.0002423198928038625, "loss": 0.4422, "step": 28370 }, { "epoch": 0.7202599281643842, "grad_norm": 0.345703125, "learning_rate": 0.00024229370184566395, "loss": 0.4657, "step": 28375 }, { "epoch": 0.7203868462134, "grad_norm": 0.341796875, "learning_rate": 0.0002422675063586502, "loss": 0.4317, "step": 28380 }, { "epoch": 0.7205137642624158, "grad_norm": 0.27734375, "learning_rate": 0.00024224130634410663, "loss": 0.4193, "step": 28385 }, { "epoch": 0.7206406823114315, "grad_norm": 0.328125, "learning_rate": 0.00024221510180331895, "loss": 0.4828, "step": 28390 }, { "epoch": 0.7207676003604473, "grad_norm": 0.353515625, "learning_rate": 0.00024218889273757295, "loss": 0.482, "step": 28395 }, { "epoch": 0.7208945184094631, "grad_norm": 0.353515625, "learning_rate": 0.00024216267914815468, "loss": 0.4491, "step": 28400 }, { "epoch": 0.7210214364584787, "grad_norm": 0.36328125, "learning_rate": 0.00024213646103635048, "loss": 0.4531, "step": 28405 }, { "epoch": 0.7211483545074945, "grad_norm": 0.3203125, "learning_rate": 0.0002421102384034468, "loss": 0.4498, "step": 28410 }, { "epoch": 0.7212752725565102, "grad_norm": 0.341796875, "learning_rate": 0.00024208401125073046, "loss": 0.4474, "step": 28415 }, { "epoch": 0.721402190605526, "grad_norm": 0.365234375, "learning_rate": 0.00024205777957948837, "loss": 0.4647, "step": 28420 }, { "epoch": 0.7215291086545418, "grad_norm": 0.3515625, "learning_rate": 0.0002420315433910077, "loss": 0.4629, "step": 28425 }, { "epoch": 0.7216560267035576, "grad_norm": 0.330078125, "learning_rate": 0.00024200530268657583, "loss": 0.4402, "step": 28430 }, { "epoch": 0.7217829447525733, "grad_norm": 0.30078125, "learning_rate": 0.0002419790574674804, "loss": 0.4372, "step": 28435 }, { "epoch": 0.721909862801589, "grad_norm": 0.35546875, "learning_rate": 0.00024195280773500933, "loss": 0.4754, "step": 28440 }, { "epoch": 0.7220367808506047, "grad_norm": 0.328125, "learning_rate": 0.00024192655349045055, "loss": 0.4598, "step": 28445 }, { "epoch": 0.7221636988996205, "grad_norm": 0.34375, "learning_rate": 0.00024190029473509245, "loss": 0.4711, "step": 28450 }, { "epoch": 0.7222906169486363, "grad_norm": 0.283203125, "learning_rate": 0.00024187403147022345, "loss": 0.4736, "step": 28455 }, { "epoch": 0.722417534997652, "grad_norm": 0.361328125, "learning_rate": 0.0002418477636971323, "loss": 0.4686, "step": 28460 }, { "epoch": 0.7225444530466678, "grad_norm": 0.31640625, "learning_rate": 0.00024182149141710804, "loss": 0.4557, "step": 28465 }, { "epoch": 0.7226713710956835, "grad_norm": 0.328125, "learning_rate": 0.00024179521463143974, "loss": 0.4292, "step": 28470 }, { "epoch": 0.7227982891446992, "grad_norm": 0.34375, "learning_rate": 0.00024176893334141683, "loss": 0.4971, "step": 28475 }, { "epoch": 0.722925207193715, "grad_norm": 0.34375, "learning_rate": 0.00024174264754832893, "loss": 0.4939, "step": 28480 }, { "epoch": 0.7230521252427308, "grad_norm": 0.341796875, "learning_rate": 0.00024171635725346585, "loss": 0.512, "step": 28485 }, { "epoch": 0.7231790432917465, "grad_norm": 0.349609375, "learning_rate": 0.00024169006245811762, "loss": 0.4722, "step": 28490 }, { "epoch": 0.7233059613407623, "grad_norm": 0.330078125, "learning_rate": 0.0002416637631635746, "loss": 0.4406, "step": 28495 }, { "epoch": 0.7234328793897781, "grad_norm": 0.34765625, "learning_rate": 0.0002416374593711272, "loss": 0.4308, "step": 28500 }, { "epoch": 0.7235597974387937, "grad_norm": 0.345703125, "learning_rate": 0.0002416111510820662, "loss": 0.4469, "step": 28505 }, { "epoch": 0.7236867154878095, "grad_norm": 0.349609375, "learning_rate": 0.00024158483829768253, "loss": 0.4477, "step": 28510 }, { "epoch": 0.7238136335368253, "grad_norm": 0.357421875, "learning_rate": 0.00024155852101926726, "loss": 0.43, "step": 28515 }, { "epoch": 0.723940551585841, "grad_norm": 0.33203125, "learning_rate": 0.00024153219924811186, "loss": 0.4458, "step": 28520 }, { "epoch": 0.7240674696348568, "grad_norm": 0.369140625, "learning_rate": 0.0002415058729855079, "loss": 0.4621, "step": 28525 }, { "epoch": 0.7241943876838726, "grad_norm": 0.3359375, "learning_rate": 0.00024147954223274724, "loss": 0.4559, "step": 28530 }, { "epoch": 0.7243213057328882, "grad_norm": 0.365234375, "learning_rate": 0.00024145320699112184, "loss": 0.4959, "step": 28535 }, { "epoch": 0.724448223781904, "grad_norm": 0.3515625, "learning_rate": 0.00024142686726192402, "loss": 0.4658, "step": 28540 }, { "epoch": 0.7245751418309198, "grad_norm": 0.380859375, "learning_rate": 0.00024140052304644626, "loss": 0.4705, "step": 28545 }, { "epoch": 0.7247020598799355, "grad_norm": 0.3515625, "learning_rate": 0.00024137417434598118, "loss": 0.433, "step": 28550 }, { "epoch": 0.7248289779289513, "grad_norm": 0.375, "learning_rate": 0.00024134782116182183, "loss": 0.4525, "step": 28555 }, { "epoch": 0.7249558959779671, "grad_norm": 0.3359375, "learning_rate": 0.00024132146349526123, "loss": 0.4435, "step": 28560 }, { "epoch": 0.7250828140269828, "grad_norm": 0.365234375, "learning_rate": 0.00024129510134759287, "loss": 0.4834, "step": 28565 }, { "epoch": 0.7252097320759985, "grad_norm": 0.353515625, "learning_rate": 0.00024126873472011019, "loss": 0.4899, "step": 28570 }, { "epoch": 0.7253366501250142, "grad_norm": 0.32421875, "learning_rate": 0.00024124236361410705, "loss": 0.4387, "step": 28575 }, { "epoch": 0.72546356817403, "grad_norm": 0.35546875, "learning_rate": 0.00024121598803087755, "loss": 0.4151, "step": 28580 }, { "epoch": 0.7255904862230458, "grad_norm": 0.349609375, "learning_rate": 0.00024118960797171575, "loss": 0.4918, "step": 28585 }, { "epoch": 0.7257174042720616, "grad_norm": 0.341796875, "learning_rate": 0.00024116322343791625, "loss": 0.4525, "step": 28590 }, { "epoch": 0.7258443223210773, "grad_norm": 0.359375, "learning_rate": 0.00024113683443077372, "loss": 0.4836, "step": 28595 }, { "epoch": 0.7259712403700931, "grad_norm": 0.34375, "learning_rate": 0.00024111044095158297, "loss": 0.4492, "step": 28600 }, { "epoch": 0.7260981584191087, "grad_norm": 0.3671875, "learning_rate": 0.00024108404300163922, "loss": 0.4684, "step": 28605 }, { "epoch": 0.7262250764681245, "grad_norm": 0.337890625, "learning_rate": 0.0002410576405822378, "loss": 0.476, "step": 28610 }, { "epoch": 0.7263519945171403, "grad_norm": 0.34375, "learning_rate": 0.00024103123369467413, "loss": 0.4591, "step": 28615 }, { "epoch": 0.726478912566156, "grad_norm": 0.36328125, "learning_rate": 0.00024100482234024414, "loss": 0.4428, "step": 28620 }, { "epoch": 0.7266058306151718, "grad_norm": 0.75390625, "learning_rate": 0.00024097840652024377, "loss": 0.4607, "step": 28625 }, { "epoch": 0.7267327486641876, "grad_norm": 0.34765625, "learning_rate": 0.00024095198623596927, "loss": 0.4293, "step": 28630 }, { "epoch": 0.7268596667132032, "grad_norm": 0.326171875, "learning_rate": 0.00024092556148871695, "loss": 0.4778, "step": 28635 }, { "epoch": 0.726986584762219, "grad_norm": 0.349609375, "learning_rate": 0.0002408991322797836, "loss": 0.4582, "step": 28640 }, { "epoch": 0.7271135028112348, "grad_norm": 0.349609375, "learning_rate": 0.00024087269861046602, "loss": 0.4577, "step": 28645 }, { "epoch": 0.7272404208602505, "grad_norm": 0.353515625, "learning_rate": 0.0002408462604820613, "loss": 0.4411, "step": 28650 }, { "epoch": 0.7273673389092663, "grad_norm": 0.341796875, "learning_rate": 0.00024081981789586678, "loss": 0.4591, "step": 28655 }, { "epoch": 0.7274942569582821, "grad_norm": 0.34375, "learning_rate": 0.00024079337085318003, "loss": 0.4471, "step": 28660 }, { "epoch": 0.7276211750072978, "grad_norm": 0.32421875, "learning_rate": 0.00024076691935529868, "loss": 0.4626, "step": 28665 }, { "epoch": 0.7277480930563135, "grad_norm": 0.369140625, "learning_rate": 0.00024074046340352075, "loss": 0.4698, "step": 28670 }, { "epoch": 0.7278750111053293, "grad_norm": 0.3359375, "learning_rate": 0.00024071400299914447, "loss": 0.4503, "step": 28675 }, { "epoch": 0.728001929154345, "grad_norm": 0.35546875, "learning_rate": 0.00024068753814346816, "loss": 0.4779, "step": 28680 }, { "epoch": 0.7281288472033608, "grad_norm": 0.34765625, "learning_rate": 0.00024066106883779045, "loss": 0.4534, "step": 28685 }, { "epoch": 0.7282557652523766, "grad_norm": 0.333984375, "learning_rate": 0.00024063459508341027, "loss": 0.456, "step": 28690 }, { "epoch": 0.7283826833013923, "grad_norm": 0.333984375, "learning_rate": 0.00024060811688162657, "loss": 0.5946, "step": 28695 }, { "epoch": 0.728509601350408, "grad_norm": 0.328125, "learning_rate": 0.0002405816342337387, "loss": 0.4424, "step": 28700 }, { "epoch": 0.7286365193994238, "grad_norm": 0.333984375, "learning_rate": 0.0002405551471410461, "loss": 0.4801, "step": 28705 }, { "epoch": 0.7287634374484395, "grad_norm": 0.375, "learning_rate": 0.00024052865560484848, "loss": 0.4635, "step": 28710 }, { "epoch": 0.7288903554974553, "grad_norm": 0.32421875, "learning_rate": 0.00024050215962644582, "loss": 0.45, "step": 28715 }, { "epoch": 0.7290172735464711, "grad_norm": 0.34375, "learning_rate": 0.00024047565920713826, "loss": 0.4624, "step": 28720 }, { "epoch": 0.7291441915954868, "grad_norm": 0.349609375, "learning_rate": 0.00024044915434822612, "loss": 0.4929, "step": 28725 }, { "epoch": 0.7292711096445026, "grad_norm": 0.34375, "learning_rate": 0.00024042264505101, "loss": 0.4425, "step": 28730 }, { "epoch": 0.7293980276935182, "grad_norm": 0.341796875, "learning_rate": 0.0002403961313167907, "loss": 0.4865, "step": 28735 }, { "epoch": 0.729524945742534, "grad_norm": 0.328125, "learning_rate": 0.00024036961314686928, "loss": 0.4327, "step": 28740 }, { "epoch": 0.7296518637915498, "grad_norm": 0.333984375, "learning_rate": 0.0002403430905425469, "loss": 0.4664, "step": 28745 }, { "epoch": 0.7297787818405655, "grad_norm": 0.341796875, "learning_rate": 0.00024031656350512507, "loss": 0.459, "step": 28750 }, { "epoch": 0.7299056998895813, "grad_norm": 0.3515625, "learning_rate": 0.00024029003203590541, "loss": 0.4354, "step": 28755 }, { "epoch": 0.7300326179385971, "grad_norm": 0.349609375, "learning_rate": 0.0002402634961361899, "loss": 0.453, "step": 28760 }, { "epoch": 0.7301595359876129, "grad_norm": 0.3359375, "learning_rate": 0.0002402369558072806, "loss": 0.4699, "step": 28765 }, { "epoch": 0.7302864540366285, "grad_norm": 0.33984375, "learning_rate": 0.00024021041105047977, "loss": 0.4277, "step": 28770 }, { "epoch": 0.7304133720856443, "grad_norm": 0.32421875, "learning_rate": 0.00024018386186709003, "loss": 0.4333, "step": 28775 }, { "epoch": 0.73054029013466, "grad_norm": 0.3671875, "learning_rate": 0.00024015730825841409, "loss": 0.4738, "step": 28780 }, { "epoch": 0.7306672081836758, "grad_norm": 0.33984375, "learning_rate": 0.00024013075022575497, "loss": 0.4293, "step": 28785 }, { "epoch": 0.7307941262326916, "grad_norm": 0.333984375, "learning_rate": 0.00024010418777041585, "loss": 0.4531, "step": 28790 }, { "epoch": 0.7309210442817073, "grad_norm": 0.34765625, "learning_rate": 0.0002400776208937001, "loss": 0.4653, "step": 28795 }, { "epoch": 0.731047962330723, "grad_norm": 0.35546875, "learning_rate": 0.0002400510495969114, "loss": 0.456, "step": 28800 }, { "epoch": 0.7311748803797388, "grad_norm": 0.341796875, "learning_rate": 0.00024002447388135354, "loss": 0.465, "step": 28805 }, { "epoch": 0.7313017984287545, "grad_norm": 0.326171875, "learning_rate": 0.00023999789374833065, "loss": 0.4553, "step": 28810 }, { "epoch": 0.7314287164777703, "grad_norm": 0.353515625, "learning_rate": 0.0002399713091991469, "loss": 0.4723, "step": 28815 }, { "epoch": 0.7315556345267861, "grad_norm": 0.333984375, "learning_rate": 0.00023994472023510693, "loss": 0.4568, "step": 28820 }, { "epoch": 0.7316825525758018, "grad_norm": 0.349609375, "learning_rate": 0.00023991812685751529, "loss": 0.4451, "step": 28825 }, { "epoch": 0.7318094706248176, "grad_norm": 0.34765625, "learning_rate": 0.000239891529067677, "loss": 0.4805, "step": 28830 }, { "epoch": 0.7319363886738333, "grad_norm": 0.337890625, "learning_rate": 0.0002398649268668972, "loss": 0.4601, "step": 28835 }, { "epoch": 0.732063306722849, "grad_norm": 0.33984375, "learning_rate": 0.00023983832025648122, "loss": 0.4786, "step": 28840 }, { "epoch": 0.7321902247718648, "grad_norm": 0.359375, "learning_rate": 0.0002398117092377347, "loss": 0.4384, "step": 28845 }, { "epoch": 0.7323171428208806, "grad_norm": 0.353515625, "learning_rate": 0.00023978509381196333, "loss": 0.4575, "step": 28850 }, { "epoch": 0.7324440608698963, "grad_norm": 0.32421875, "learning_rate": 0.00023975847398047321, "loss": 0.4501, "step": 28855 }, { "epoch": 0.7325709789189121, "grad_norm": 0.32421875, "learning_rate": 0.0002397318497445705, "loss": 0.4356, "step": 28860 }, { "epoch": 0.7326978969679279, "grad_norm": 0.341796875, "learning_rate": 0.00023970522110556167, "loss": 0.4369, "step": 28865 }, { "epoch": 0.7328248150169435, "grad_norm": 0.34375, "learning_rate": 0.00023967858806475336, "loss": 0.4876, "step": 28870 }, { "epoch": 0.7329517330659593, "grad_norm": 0.32421875, "learning_rate": 0.0002396519506234525, "loss": 0.4613, "step": 28875 }, { "epoch": 0.733078651114975, "grad_norm": 0.33984375, "learning_rate": 0.00023962530878296609, "loss": 0.4404, "step": 28880 }, { "epoch": 0.7332055691639908, "grad_norm": 0.341796875, "learning_rate": 0.00023959866254460153, "loss": 0.4569, "step": 28885 }, { "epoch": 0.7333324872130066, "grad_norm": 0.3203125, "learning_rate": 0.00023957201190966625, "loss": 0.4472, "step": 28890 }, { "epoch": 0.7334594052620224, "grad_norm": 0.373046875, "learning_rate": 0.00023954535687946806, "loss": 0.435, "step": 28895 }, { "epoch": 0.733586323311038, "grad_norm": 0.31640625, "learning_rate": 0.0002395186974553148, "loss": 0.4289, "step": 28900 }, { "epoch": 0.7337132413600538, "grad_norm": 0.33203125, "learning_rate": 0.00023949203363851477, "loss": 0.4729, "step": 28905 }, { "epoch": 0.7338401594090695, "grad_norm": 0.345703125, "learning_rate": 0.00023946536543037632, "loss": 0.4474, "step": 28910 }, { "epoch": 0.7339670774580853, "grad_norm": 0.373046875, "learning_rate": 0.00023943869283220797, "loss": 0.4906, "step": 28915 }, { "epoch": 0.7340939955071011, "grad_norm": 0.333984375, "learning_rate": 0.00023941201584531867, "loss": 0.4745, "step": 28920 }, { "epoch": 0.7342209135561168, "grad_norm": 0.34375, "learning_rate": 0.00023938533447101732, "loss": 0.4529, "step": 28925 }, { "epoch": 0.7343478316051326, "grad_norm": 0.357421875, "learning_rate": 0.00023935864871061315, "loss": 0.4404, "step": 28930 }, { "epoch": 0.7344747496541483, "grad_norm": 0.37109375, "learning_rate": 0.00023933195856541578, "loss": 0.4379, "step": 28935 }, { "epoch": 0.734601667703164, "grad_norm": 0.3515625, "learning_rate": 0.00023930526403673474, "loss": 0.4641, "step": 28940 }, { "epoch": 0.7347285857521798, "grad_norm": 0.3359375, "learning_rate": 0.00023927856512587998, "loss": 0.4842, "step": 28945 }, { "epoch": 0.7348555038011956, "grad_norm": 0.28125, "learning_rate": 0.00023925186183416158, "loss": 0.45, "step": 28950 }, { "epoch": 0.7349824218502113, "grad_norm": 0.349609375, "learning_rate": 0.0002392251541628899, "loss": 0.4775, "step": 28955 }, { "epoch": 0.7351093398992271, "grad_norm": 0.326171875, "learning_rate": 0.00023919844211337543, "loss": 0.4318, "step": 28960 }, { "epoch": 0.7352362579482428, "grad_norm": 0.337890625, "learning_rate": 0.00023917172568692894, "loss": 0.4371, "step": 28965 }, { "epoch": 0.7353631759972585, "grad_norm": 0.34765625, "learning_rate": 0.0002391450048848614, "loss": 0.4546, "step": 28970 }, { "epoch": 0.7354900940462743, "grad_norm": 0.3359375, "learning_rate": 0.000239118279708484, "loss": 0.4647, "step": 28975 }, { "epoch": 0.7356170120952901, "grad_norm": 0.35546875, "learning_rate": 0.00023909155015910803, "loss": 0.4482, "step": 28980 }, { "epoch": 0.7357439301443058, "grad_norm": 0.333984375, "learning_rate": 0.00023906481623804527, "loss": 0.4822, "step": 28985 }, { "epoch": 0.7358708481933216, "grad_norm": 0.37109375, "learning_rate": 0.00023903807794660743, "loss": 0.4712, "step": 28990 }, { "epoch": 0.7359977662423374, "grad_norm": 0.330078125, "learning_rate": 0.00023901133528610654, "loss": 0.4526, "step": 28995 }, { "epoch": 0.736124684291353, "grad_norm": 0.400390625, "learning_rate": 0.0002389845882578549, "loss": 0.4597, "step": 29000 }, { "epoch": 0.7362516023403688, "grad_norm": 0.32421875, "learning_rate": 0.00023895783686316498, "loss": 0.4683, "step": 29005 }, { "epoch": 0.7363785203893846, "grad_norm": 0.333984375, "learning_rate": 0.00023893108110334947, "loss": 0.4353, "step": 29010 }, { "epoch": 0.7365054384384003, "grad_norm": 0.322265625, "learning_rate": 0.00023890432097972114, "loss": 0.4342, "step": 29015 }, { "epoch": 0.7366323564874161, "grad_norm": 0.353515625, "learning_rate": 0.0002388775564935933, "loss": 0.4839, "step": 29020 }, { "epoch": 0.7367592745364319, "grad_norm": 0.361328125, "learning_rate": 0.0002388507876462791, "loss": 0.4405, "step": 29025 }, { "epoch": 0.7368861925854476, "grad_norm": 0.3671875, "learning_rate": 0.00023882401443909215, "loss": 0.4605, "step": 29030 }, { "epoch": 0.7370131106344633, "grad_norm": 0.34765625, "learning_rate": 0.0002387972368733462, "loss": 0.4553, "step": 29035 }, { "epoch": 0.737140028683479, "grad_norm": 0.37890625, "learning_rate": 0.0002387704549503552, "loss": 0.4535, "step": 29040 }, { "epoch": 0.7372669467324948, "grad_norm": 0.34375, "learning_rate": 0.00023874366867143338, "loss": 0.4537, "step": 29045 }, { "epoch": 0.7373938647815106, "grad_norm": 0.322265625, "learning_rate": 0.00023871687803789503, "loss": 0.4248, "step": 29050 }, { "epoch": 0.7375207828305264, "grad_norm": 0.34765625, "learning_rate": 0.00023869008305105484, "loss": 0.4641, "step": 29055 }, { "epoch": 0.7376477008795421, "grad_norm": 0.3359375, "learning_rate": 0.0002386632837122276, "loss": 0.4492, "step": 29060 }, { "epoch": 0.7377746189285578, "grad_norm": 0.3125, "learning_rate": 0.00023863648002272835, "loss": 0.44, "step": 29065 }, { "epoch": 0.7379015369775735, "grad_norm": 0.326171875, "learning_rate": 0.00023860967198387236, "loss": 0.4759, "step": 29070 }, { "epoch": 0.7380284550265893, "grad_norm": 0.35546875, "learning_rate": 0.00023858285959697502, "loss": 0.4672, "step": 29075 }, { "epoch": 0.7381553730756051, "grad_norm": 0.310546875, "learning_rate": 0.00023855604286335207, "loss": 0.4389, "step": 29080 }, { "epoch": 0.7382822911246208, "grad_norm": 0.35546875, "learning_rate": 0.0002385292217843194, "loss": 0.543, "step": 29085 }, { "epoch": 0.7384092091736366, "grad_norm": 0.34765625, "learning_rate": 0.000238502396361193, "loss": 0.4681, "step": 29090 }, { "epoch": 0.7385361272226524, "grad_norm": 0.3359375, "learning_rate": 0.00023847556659528936, "loss": 0.4634, "step": 29095 }, { "epoch": 0.738663045271668, "grad_norm": 0.341796875, "learning_rate": 0.0002384487324879248, "loss": 0.4493, "step": 29100 }, { "epoch": 0.7387899633206838, "grad_norm": 0.353515625, "learning_rate": 0.00023842189404041627, "loss": 0.4536, "step": 29105 }, { "epoch": 0.7389168813696996, "grad_norm": 0.337890625, "learning_rate": 0.0002383950512540806, "loss": 0.4785, "step": 29110 }, { "epoch": 0.7390437994187153, "grad_norm": 0.365234375, "learning_rate": 0.00023836820413023494, "loss": 0.4446, "step": 29115 }, { "epoch": 0.7391707174677311, "grad_norm": 0.35546875, "learning_rate": 0.0002383413526701967, "loss": 0.4474, "step": 29120 }, { "epoch": 0.7392976355167469, "grad_norm": 0.318359375, "learning_rate": 0.00023831449687528348, "loss": 0.431, "step": 29125 }, { "epoch": 0.7394245535657625, "grad_norm": 0.32421875, "learning_rate": 0.0002382876367468131, "loss": 0.4636, "step": 29130 }, { "epoch": 0.7395514716147783, "grad_norm": 0.341796875, "learning_rate": 0.0002382607722861036, "loss": 0.4617, "step": 29135 }, { "epoch": 0.7396783896637941, "grad_norm": 0.3828125, "learning_rate": 0.00023823390349447308, "loss": 0.4949, "step": 29140 }, { "epoch": 0.7398053077128098, "grad_norm": 0.376953125, "learning_rate": 0.0002382070303732401, "loss": 0.4855, "step": 29145 }, { "epoch": 0.7399322257618256, "grad_norm": 0.35546875, "learning_rate": 0.00023818015292372327, "loss": 0.4638, "step": 29150 }, { "epoch": 0.7400591438108414, "grad_norm": 0.349609375, "learning_rate": 0.00023815327114724144, "loss": 0.4304, "step": 29155 }, { "epoch": 0.7401860618598571, "grad_norm": 0.353515625, "learning_rate": 0.00023812638504511373, "loss": 0.4375, "step": 29160 }, { "epoch": 0.7403129799088728, "grad_norm": 0.32421875, "learning_rate": 0.00023809949461865942, "loss": 0.4473, "step": 29165 }, { "epoch": 0.7404398979578886, "grad_norm": 0.306640625, "learning_rate": 0.000238072599869198, "loss": 0.4546, "step": 29170 }, { "epoch": 0.7405668160069043, "grad_norm": 0.33203125, "learning_rate": 0.00023804570079804916, "loss": 0.4534, "step": 29175 }, { "epoch": 0.7406937340559201, "grad_norm": 0.328125, "learning_rate": 0.0002380187974065329, "loss": 0.4494, "step": 29180 }, { "epoch": 0.7408206521049359, "grad_norm": 0.328125, "learning_rate": 0.00023799188969596925, "loss": 0.4339, "step": 29185 }, { "epoch": 0.7409475701539516, "grad_norm": 0.345703125, "learning_rate": 0.00023796497766767865, "loss": 0.4539, "step": 29190 }, { "epoch": 0.7410744882029674, "grad_norm": 0.357421875, "learning_rate": 0.0002379380613229817, "loss": 0.4489, "step": 29195 }, { "epoch": 0.741201406251983, "grad_norm": 0.349609375, "learning_rate": 0.00023791114066319904, "loss": 0.4666, "step": 29200 }, { "epoch": 0.7413283243009988, "grad_norm": 0.322265625, "learning_rate": 0.00023788421568965171, "loss": 0.438, "step": 29205 }, { "epoch": 0.7414552423500146, "grad_norm": 0.326171875, "learning_rate": 0.000237857286403661, "loss": 0.4463, "step": 29210 }, { "epoch": 0.7415821603990304, "grad_norm": 0.361328125, "learning_rate": 0.0002378303528065482, "loss": 0.4532, "step": 29215 }, { "epoch": 0.7417090784480461, "grad_norm": 0.361328125, "learning_rate": 0.00023780341489963497, "loss": 0.4654, "step": 29220 }, { "epoch": 0.7418359964970619, "grad_norm": 0.357421875, "learning_rate": 0.00023777647268424315, "loss": 0.4726, "step": 29225 }, { "epoch": 0.7419629145460775, "grad_norm": 0.36328125, "learning_rate": 0.00023774952616169477, "loss": 0.4643, "step": 29230 }, { "epoch": 0.7420898325950933, "grad_norm": 0.322265625, "learning_rate": 0.0002377225753333121, "loss": 0.4646, "step": 29235 }, { "epoch": 0.7422167506441091, "grad_norm": 0.33203125, "learning_rate": 0.00023769562020041764, "loss": 0.4433, "step": 29240 }, { "epoch": 0.7423436686931248, "grad_norm": 0.337890625, "learning_rate": 0.00023766866076433394, "loss": 0.4439, "step": 29245 }, { "epoch": 0.7424705867421406, "grad_norm": 0.34375, "learning_rate": 0.00023764169702638406, "loss": 0.4584, "step": 29250 }, { "epoch": 0.7425975047911564, "grad_norm": 0.369140625, "learning_rate": 0.000237614728987891, "loss": 0.4773, "step": 29255 }, { "epoch": 0.7427244228401721, "grad_norm": 0.330078125, "learning_rate": 0.00023758775665017807, "loss": 0.4598, "step": 29260 }, { "epoch": 0.7428513408891878, "grad_norm": 0.3359375, "learning_rate": 0.00023756078001456885, "loss": 0.467, "step": 29265 }, { "epoch": 0.7429782589382036, "grad_norm": 0.35546875, "learning_rate": 0.00023753379908238702, "loss": 0.4539, "step": 29270 }, { "epoch": 0.7431051769872193, "grad_norm": 0.337890625, "learning_rate": 0.00023750681385495653, "loss": 0.4462, "step": 29275 }, { "epoch": 0.7432320950362351, "grad_norm": 0.341796875, "learning_rate": 0.0002374798243336015, "loss": 0.4593, "step": 29280 }, { "epoch": 0.7433590130852509, "grad_norm": 0.337890625, "learning_rate": 0.0002374528305196464, "loss": 0.4768, "step": 29285 }, { "epoch": 0.7434859311342666, "grad_norm": 0.322265625, "learning_rate": 0.0002374258324144157, "loss": 0.4514, "step": 29290 }, { "epoch": 0.7436128491832824, "grad_norm": 0.330078125, "learning_rate": 0.00023739883001923427, "loss": 0.4638, "step": 29295 }, { "epoch": 0.7437397672322981, "grad_norm": 0.5859375, "learning_rate": 0.00023737182333542702, "loss": 0.4819, "step": 29300 }, { "epoch": 0.7438666852813138, "grad_norm": 0.35546875, "learning_rate": 0.00023734481236431927, "loss": 0.4312, "step": 29305 }, { "epoch": 0.7439936033303296, "grad_norm": 0.37109375, "learning_rate": 0.00023731779710723627, "loss": 0.4442, "step": 29310 }, { "epoch": 0.7441205213793454, "grad_norm": 0.353515625, "learning_rate": 0.00023729077756550382, "loss": 0.4622, "step": 29315 }, { "epoch": 0.7442474394283611, "grad_norm": 0.34375, "learning_rate": 0.00023726375374044767, "loss": 0.4606, "step": 29320 }, { "epoch": 0.7443743574773769, "grad_norm": 0.388671875, "learning_rate": 0.00023723672563339388, "loss": 0.4613, "step": 29325 }, { "epoch": 0.7445012755263926, "grad_norm": 0.3359375, "learning_rate": 0.00023720969324566874, "loss": 0.4642, "step": 29330 }, { "epoch": 0.7446281935754083, "grad_norm": 0.349609375, "learning_rate": 0.00023718265657859868, "loss": 0.4605, "step": 29335 }, { "epoch": 0.7447551116244241, "grad_norm": 0.33984375, "learning_rate": 0.00023715561563351033, "loss": 0.4774, "step": 29340 }, { "epoch": 0.7448820296734399, "grad_norm": 0.33984375, "learning_rate": 0.00023712857041173068, "loss": 0.4614, "step": 29345 }, { "epoch": 0.7450089477224556, "grad_norm": 0.373046875, "learning_rate": 0.0002371015209145868, "loss": 0.4399, "step": 29350 }, { "epoch": 0.7451358657714714, "grad_norm": 0.34375, "learning_rate": 0.000237074467143406, "loss": 0.4602, "step": 29355 }, { "epoch": 0.7452627838204872, "grad_norm": 0.337890625, "learning_rate": 0.00023704740909951576, "loss": 0.4688, "step": 29360 }, { "epoch": 0.7453897018695028, "grad_norm": 0.34375, "learning_rate": 0.00023702034678424385, "loss": 0.445, "step": 29365 }, { "epoch": 0.7455166199185186, "grad_norm": 0.345703125, "learning_rate": 0.00023699328019891814, "loss": 0.4656, "step": 29370 }, { "epoch": 0.7456435379675344, "grad_norm": 0.35546875, "learning_rate": 0.00023696620934486685, "loss": 0.4545, "step": 29375 }, { "epoch": 0.7457704560165501, "grad_norm": 0.330078125, "learning_rate": 0.0002369391342234183, "loss": 0.4404, "step": 29380 }, { "epoch": 0.7458973740655659, "grad_norm": 0.390625, "learning_rate": 0.0002369120548359011, "loss": 0.4763, "step": 29385 }, { "epoch": 0.7460242921145817, "grad_norm": 0.34765625, "learning_rate": 0.00023688497118364393, "loss": 0.4553, "step": 29390 }, { "epoch": 0.7461512101635973, "grad_norm": 0.333984375, "learning_rate": 0.00023685788326797592, "loss": 0.45, "step": 29395 }, { "epoch": 0.7462781282126131, "grad_norm": 0.337890625, "learning_rate": 0.0002368307910902261, "loss": 0.4372, "step": 29400 }, { "epoch": 0.7464050462616288, "grad_norm": 0.330078125, "learning_rate": 0.00023680369465172398, "loss": 0.4495, "step": 29405 }, { "epoch": 0.7465319643106446, "grad_norm": 0.345703125, "learning_rate": 0.00023677659395379913, "loss": 0.4716, "step": 29410 }, { "epoch": 0.7466588823596604, "grad_norm": 0.32421875, "learning_rate": 0.00023674948899778141, "loss": 0.4334, "step": 29415 }, { "epoch": 0.7467858004086761, "grad_norm": 0.361328125, "learning_rate": 0.0002367223797850008, "loss": 0.4688, "step": 29420 }, { "epoch": 0.7469127184576919, "grad_norm": 0.296875, "learning_rate": 0.00023669526631678753, "loss": 0.4464, "step": 29425 }, { "epoch": 0.7470396365067076, "grad_norm": 0.353515625, "learning_rate": 0.00023666814859447214, "loss": 0.438, "step": 29430 }, { "epoch": 0.7471665545557233, "grad_norm": 0.330078125, "learning_rate": 0.00023664102661938514, "loss": 0.4414, "step": 29435 }, { "epoch": 0.7472934726047391, "grad_norm": 0.341796875, "learning_rate": 0.00023661390039285751, "loss": 0.4744, "step": 29440 }, { "epoch": 0.7474203906537549, "grad_norm": 42.75, "learning_rate": 0.00023658676991622032, "loss": 0.4222, "step": 29445 }, { "epoch": 0.7475473087027706, "grad_norm": 0.34375, "learning_rate": 0.00023655963519080483, "loss": 0.4499, "step": 29450 }, { "epoch": 0.7476742267517864, "grad_norm": 0.31640625, "learning_rate": 0.0002365324962179425, "loss": 0.4407, "step": 29455 }, { "epoch": 0.7478011448008022, "grad_norm": 0.3515625, "learning_rate": 0.00023650535299896505, "loss": 0.4785, "step": 29460 }, { "epoch": 0.7479280628498178, "grad_norm": 0.330078125, "learning_rate": 0.00023647820553520437, "loss": 0.453, "step": 29465 }, { "epoch": 0.7480549808988336, "grad_norm": 0.34765625, "learning_rate": 0.0002364510538279926, "loss": 0.4697, "step": 29470 }, { "epoch": 0.7481818989478494, "grad_norm": 0.298828125, "learning_rate": 0.00023642389787866206, "loss": 0.3988, "step": 29475 }, { "epoch": 0.7483088169968651, "grad_norm": 0.330078125, "learning_rate": 0.00023639673768854528, "loss": 0.4596, "step": 29480 }, { "epoch": 0.7484357350458809, "grad_norm": 0.33984375, "learning_rate": 0.000236369573258975, "loss": 0.4318, "step": 29485 }, { "epoch": 0.7485626530948967, "grad_norm": 0.3671875, "learning_rate": 0.00023634240459128415, "loss": 0.4559, "step": 29490 }, { "epoch": 0.7486895711439123, "grad_norm": 0.333984375, "learning_rate": 0.0002363152316868059, "loss": 0.4417, "step": 29495 }, { "epoch": 0.7488164891929281, "grad_norm": 0.33984375, "learning_rate": 0.0002362880545468736, "loss": 0.4705, "step": 29500 }, { "epoch": 0.7489434072419439, "grad_norm": 0.31640625, "learning_rate": 0.00023626087317282085, "loss": 0.4262, "step": 29505 }, { "epoch": 0.7490703252909596, "grad_norm": 0.361328125, "learning_rate": 0.00023623368756598144, "loss": 0.4525, "step": 29510 }, { "epoch": 0.7491972433399754, "grad_norm": 0.361328125, "learning_rate": 0.00023620649772768934, "loss": 0.4315, "step": 29515 }, { "epoch": 0.7493241613889912, "grad_norm": 0.35546875, "learning_rate": 0.00023617930365927873, "loss": 0.4399, "step": 29520 }, { "epoch": 0.7494510794380069, "grad_norm": 0.318359375, "learning_rate": 0.000236152105362084, "loss": 0.4342, "step": 29525 }, { "epoch": 0.7495779974870226, "grad_norm": 0.3359375, "learning_rate": 0.00023612490283743973, "loss": 0.4177, "step": 29530 }, { "epoch": 0.7497049155360384, "grad_norm": 0.3203125, "learning_rate": 0.00023609769608668084, "loss": 0.4358, "step": 29535 }, { "epoch": 0.7498318335850541, "grad_norm": 0.30859375, "learning_rate": 0.00023607048511114232, "loss": 0.4413, "step": 29540 }, { "epoch": 0.7499587516340699, "grad_norm": 0.34375, "learning_rate": 0.00023604326991215934, "loss": 0.4629, "step": 29545 }, { "epoch": 0.7500856696830857, "grad_norm": 0.349609375, "learning_rate": 0.0002360160504910674, "loss": 0.4737, "step": 29550 }, { "epoch": 0.7502125877321014, "grad_norm": 0.328125, "learning_rate": 0.00023598882684920213, "loss": 0.451, "step": 29555 }, { "epoch": 0.7503395057811171, "grad_norm": 0.345703125, "learning_rate": 0.00023596159898789935, "loss": 0.4927, "step": 29560 }, { "epoch": 0.7504664238301328, "grad_norm": 0.337890625, "learning_rate": 0.00023593436690849516, "loss": 0.4925, "step": 29565 }, { "epoch": 0.7505933418791486, "grad_norm": 0.341796875, "learning_rate": 0.00023590713061232582, "loss": 0.4506, "step": 29570 }, { "epoch": 0.7507202599281644, "grad_norm": 0.35546875, "learning_rate": 0.00023587989010072783, "loss": 0.4428, "step": 29575 }, { "epoch": 0.7508471779771801, "grad_norm": 0.33203125, "learning_rate": 0.0002358526453750378, "loss": 0.4592, "step": 29580 }, { "epoch": 0.7509740960261959, "grad_norm": 0.375, "learning_rate": 0.00023582539643659267, "loss": 0.4716, "step": 29585 }, { "epoch": 0.7511010140752117, "grad_norm": 0.3359375, "learning_rate": 0.00023579814328672955, "loss": 0.4806, "step": 29590 }, { "epoch": 0.7512279321242273, "grad_norm": 0.34375, "learning_rate": 0.00023577088592678565, "loss": 0.4493, "step": 29595 }, { "epoch": 0.7513548501732431, "grad_norm": 0.349609375, "learning_rate": 0.00023574362435809862, "loss": 0.4696, "step": 29600 }, { "epoch": 0.7514817682222589, "grad_norm": 0.365234375, "learning_rate": 0.00023571635858200607, "loss": 0.4986, "step": 29605 }, { "epoch": 0.7516086862712746, "grad_norm": 0.349609375, "learning_rate": 0.00023568908859984597, "loss": 0.4722, "step": 29610 }, { "epoch": 0.7517356043202904, "grad_norm": 0.34765625, "learning_rate": 0.00023566181441295642, "loss": 0.4207, "step": 29615 }, { "epoch": 0.7518625223693062, "grad_norm": 0.328125, "learning_rate": 0.00023563453602267574, "loss": 0.4348, "step": 29620 }, { "epoch": 0.7519894404183219, "grad_norm": 0.341796875, "learning_rate": 0.00023560725343034248, "loss": 0.4808, "step": 29625 }, { "epoch": 0.7521163584673376, "grad_norm": 0.3359375, "learning_rate": 0.00023557996663729538, "loss": 0.4564, "step": 29630 }, { "epoch": 0.7522432765163534, "grad_norm": 0.326171875, "learning_rate": 0.00023555267564487345, "loss": 0.4244, "step": 29635 }, { "epoch": 0.7523701945653691, "grad_norm": 0.33984375, "learning_rate": 0.0002355253804544158, "loss": 0.4654, "step": 29640 }, { "epoch": 0.7524971126143849, "grad_norm": 0.353515625, "learning_rate": 0.0002354980810672618, "loss": 0.4302, "step": 29645 }, { "epoch": 0.7526240306634007, "grad_norm": 0.34375, "learning_rate": 0.000235470777484751, "loss": 0.4656, "step": 29650 }, { "epoch": 0.7527509487124164, "grad_norm": 0.3828125, "learning_rate": 0.00023544346970822316, "loss": 0.45, "step": 29655 }, { "epoch": 0.7528778667614321, "grad_norm": 0.337890625, "learning_rate": 0.0002354161577390184, "loss": 0.4491, "step": 29660 }, { "epoch": 0.7530047848104479, "grad_norm": 0.3359375, "learning_rate": 0.00023538884157847672, "loss": 0.4774, "step": 29665 }, { "epoch": 0.7531317028594636, "grad_norm": 0.36328125, "learning_rate": 0.00023536152122793862, "loss": 0.4993, "step": 29670 }, { "epoch": 0.7532586209084794, "grad_norm": 0.3359375, "learning_rate": 0.00023533419668874468, "loss": 0.4715, "step": 29675 }, { "epoch": 0.7533855389574952, "grad_norm": 0.34375, "learning_rate": 0.0002353068679622357, "loss": 0.4614, "step": 29680 }, { "epoch": 0.7535124570065109, "grad_norm": 0.349609375, "learning_rate": 0.00023527953504975268, "loss": 0.4474, "step": 29685 }, { "epoch": 0.7536393750555267, "grad_norm": 0.33984375, "learning_rate": 0.00023525219795263684, "loss": 0.4522, "step": 29690 }, { "epoch": 0.7537662931045424, "grad_norm": 0.33203125, "learning_rate": 0.00023522485667222962, "loss": 0.4618, "step": 29695 }, { "epoch": 0.7538932111535581, "grad_norm": 0.375, "learning_rate": 0.00023519751120987267, "loss": 0.4379, "step": 29700 }, { "epoch": 0.7540201292025739, "grad_norm": 0.341796875, "learning_rate": 0.0002351701615669077, "loss": 0.4466, "step": 29705 }, { "epoch": 0.7541470472515897, "grad_norm": 0.353515625, "learning_rate": 0.00023514280774467692, "loss": 0.4878, "step": 29710 }, { "epoch": 0.7542739653006054, "grad_norm": 0.330078125, "learning_rate": 0.00023511544974452245, "loss": 0.4517, "step": 29715 }, { "epoch": 0.7544008833496212, "grad_norm": 0.322265625, "learning_rate": 0.00023508808756778673, "loss": 0.447, "step": 29720 }, { "epoch": 0.754527801398637, "grad_norm": 0.353515625, "learning_rate": 0.00023506072121581248, "loss": 0.4425, "step": 29725 }, { "epoch": 0.7546547194476526, "grad_norm": 0.341796875, "learning_rate": 0.00023503335068994252, "loss": 0.4412, "step": 29730 }, { "epoch": 0.7547816374966684, "grad_norm": 0.298828125, "learning_rate": 0.00023500597599151988, "loss": 0.4513, "step": 29735 }, { "epoch": 0.7549085555456841, "grad_norm": 0.349609375, "learning_rate": 0.00023497859712188788, "loss": 0.4797, "step": 29740 }, { "epoch": 0.7550354735946999, "grad_norm": 0.306640625, "learning_rate": 0.00023495121408238998, "loss": 0.4566, "step": 29745 }, { "epoch": 0.7551623916437157, "grad_norm": 0.330078125, "learning_rate": 0.0002349238268743698, "loss": 0.4687, "step": 29750 }, { "epoch": 0.7552893096927314, "grad_norm": 0.333984375, "learning_rate": 0.00023489643549917132, "loss": 0.4843, "step": 29755 }, { "epoch": 0.7554162277417471, "grad_norm": 0.353515625, "learning_rate": 0.00023486903995813854, "loss": 0.477, "step": 29760 }, { "epoch": 0.7555431457907629, "grad_norm": 0.35546875, "learning_rate": 0.00023484164025261583, "loss": 0.4468, "step": 29765 }, { "epoch": 0.7556700638397786, "grad_norm": 0.375, "learning_rate": 0.00023481423638394756, "loss": 0.4702, "step": 29770 }, { "epoch": 0.7557969818887944, "grad_norm": 0.291015625, "learning_rate": 0.00023478682835347854, "loss": 0.4459, "step": 29775 }, { "epoch": 0.7559238999378102, "grad_norm": 0.34375, "learning_rate": 0.0002347594161625536, "loss": 0.4506, "step": 29780 }, { "epoch": 0.7560508179868259, "grad_norm": 0.34375, "learning_rate": 0.0002347319998125179, "loss": 0.4825, "step": 29785 }, { "epoch": 0.7561777360358417, "grad_norm": 0.328125, "learning_rate": 0.00023470457930471666, "loss": 0.4359, "step": 29790 }, { "epoch": 0.7563046540848574, "grad_norm": 0.337890625, "learning_rate": 0.0002346771546404955, "loss": 0.4271, "step": 29795 }, { "epoch": 0.7564315721338731, "grad_norm": 0.32421875, "learning_rate": 0.00023464972582120012, "loss": 0.4581, "step": 29800 }, { "epoch": 0.7565584901828889, "grad_norm": 0.337890625, "learning_rate": 0.00023462229284817638, "loss": 0.4692, "step": 29805 }, { "epoch": 0.7566854082319047, "grad_norm": 0.333984375, "learning_rate": 0.00023459485572277042, "loss": 0.4778, "step": 29810 }, { "epoch": 0.7568123262809204, "grad_norm": 0.35546875, "learning_rate": 0.0002345674144463286, "loss": 0.4955, "step": 29815 }, { "epoch": 0.7569392443299362, "grad_norm": 0.349609375, "learning_rate": 0.00023453996902019744, "loss": 0.4702, "step": 29820 }, { "epoch": 0.7570661623789519, "grad_norm": 0.349609375, "learning_rate": 0.00023451251944572372, "loss": 0.4677, "step": 29825 }, { "epoch": 0.7571930804279676, "grad_norm": 0.34375, "learning_rate": 0.00023448506572425426, "loss": 0.4597, "step": 29830 }, { "epoch": 0.7573199984769834, "grad_norm": 0.36328125, "learning_rate": 0.00023445760785713635, "loss": 0.4758, "step": 29835 }, { "epoch": 0.7574469165259992, "grad_norm": 0.349609375, "learning_rate": 0.00023443014584571725, "loss": 0.4689, "step": 29840 }, { "epoch": 0.7575738345750149, "grad_norm": 0.333984375, "learning_rate": 0.00023440267969134448, "loss": 0.5345, "step": 29845 }, { "epoch": 0.7577007526240307, "grad_norm": 0.341796875, "learning_rate": 0.00023437520939536584, "loss": 0.4478, "step": 29850 }, { "epoch": 0.7578276706730465, "grad_norm": 0.353515625, "learning_rate": 0.00023434773495912932, "loss": 0.4692, "step": 29855 }, { "epoch": 0.7579545887220621, "grad_norm": 0.41796875, "learning_rate": 0.00023432025638398307, "loss": 0.4583, "step": 29860 }, { "epoch": 0.7580815067710779, "grad_norm": 0.38671875, "learning_rate": 0.00023429277367127536, "loss": 0.4812, "step": 29865 }, { "epoch": 0.7582084248200937, "grad_norm": 0.353515625, "learning_rate": 0.00023426528682235487, "loss": 0.4746, "step": 29870 }, { "epoch": 0.7583353428691094, "grad_norm": 0.3125, "learning_rate": 0.00023423779583857032, "loss": 0.4419, "step": 29875 }, { "epoch": 0.7584622609181252, "grad_norm": 0.333984375, "learning_rate": 0.00023421030072127066, "loss": 0.4559, "step": 29880 }, { "epoch": 0.758589178967141, "grad_norm": 0.3515625, "learning_rate": 0.00023418280147180512, "loss": 0.4681, "step": 29885 }, { "epoch": 0.7587160970161567, "grad_norm": 0.345703125, "learning_rate": 0.000234155298091523, "loss": 0.4482, "step": 29890 }, { "epoch": 0.7588430150651724, "grad_norm": 0.36328125, "learning_rate": 0.00023412779058177395, "loss": 0.4555, "step": 29895 }, { "epoch": 0.7589699331141881, "grad_norm": 0.3359375, "learning_rate": 0.00023410027894390774, "loss": 0.4603, "step": 29900 }, { "epoch": 0.7590968511632039, "grad_norm": 0.34765625, "learning_rate": 0.0002340727631792743, "loss": 0.4911, "step": 29905 }, { "epoch": 0.7592237692122197, "grad_norm": 0.373046875, "learning_rate": 0.00023404524328922392, "loss": 0.4372, "step": 29910 }, { "epoch": 0.7593506872612354, "grad_norm": 0.33984375, "learning_rate": 0.00023401771927510686, "loss": 0.4399, "step": 29915 }, { "epoch": 0.7594776053102512, "grad_norm": 0.337890625, "learning_rate": 0.00023399019113827386, "loss": 0.4495, "step": 29920 }, { "epoch": 0.7596045233592669, "grad_norm": 0.330078125, "learning_rate": 0.00023396265888007557, "loss": 0.4164, "step": 29925 }, { "epoch": 0.7597314414082826, "grad_norm": 0.34765625, "learning_rate": 0.0002339351225018631, "loss": 0.4552, "step": 29930 }, { "epoch": 0.7598583594572984, "grad_norm": 0.333984375, "learning_rate": 0.00023390758200498757, "loss": 0.4616, "step": 29935 }, { "epoch": 0.7599852775063142, "grad_norm": 0.357421875, "learning_rate": 0.00023388003739080045, "loss": 0.4468, "step": 29940 }, { "epoch": 0.7601121955553299, "grad_norm": 0.3359375, "learning_rate": 0.00023385248866065328, "loss": 0.4376, "step": 29945 }, { "epoch": 0.7602391136043457, "grad_norm": 0.34765625, "learning_rate": 0.00023382493581589793, "loss": 0.465, "step": 29950 }, { "epoch": 0.7603660316533615, "grad_norm": 0.314453125, "learning_rate": 0.00023379737885788633, "loss": 0.4734, "step": 29955 }, { "epoch": 0.7604929497023771, "grad_norm": 0.376953125, "learning_rate": 0.00023376981778797074, "loss": 0.4642, "step": 29960 }, { "epoch": 0.7606198677513929, "grad_norm": 0.345703125, "learning_rate": 0.0002337422526075036, "loss": 0.4814, "step": 29965 }, { "epoch": 0.7607467858004087, "grad_norm": 0.333984375, "learning_rate": 0.00023371468331783742, "loss": 0.4392, "step": 29970 }, { "epoch": 0.7608737038494244, "grad_norm": 0.322265625, "learning_rate": 0.00023368710992032512, "loss": 0.4427, "step": 29975 }, { "epoch": 0.7610006218984402, "grad_norm": 0.296875, "learning_rate": 0.00023365953241631966, "loss": 0.4112, "step": 29980 }, { "epoch": 0.761127539947456, "grad_norm": 0.357421875, "learning_rate": 0.00023363195080717427, "loss": 0.4248, "step": 29985 }, { "epoch": 0.7612544579964716, "grad_norm": 0.33984375, "learning_rate": 0.00023360436509424235, "loss": 0.4563, "step": 29990 }, { "epoch": 0.7613813760454874, "grad_norm": 0.337890625, "learning_rate": 0.00023357677527887757, "loss": 0.4563, "step": 29995 }, { "epoch": 0.7615082940945032, "grad_norm": 0.337890625, "learning_rate": 0.0002335491813624337, "loss": 0.4819, "step": 30000 }, { "epoch": 0.7616352121435189, "grad_norm": 0.359375, "learning_rate": 0.00023352158334626478, "loss": 0.4829, "step": 30005 }, { "epoch": 0.7617621301925347, "grad_norm": 0.345703125, "learning_rate": 0.00023349398123172503, "loss": 0.4735, "step": 30010 }, { "epoch": 0.7618890482415505, "grad_norm": 0.314453125, "learning_rate": 0.00023346637502016888, "loss": 0.4254, "step": 30015 }, { "epoch": 0.7620159662905662, "grad_norm": 0.3515625, "learning_rate": 0.00023343876471295099, "loss": 0.4208, "step": 30020 }, { "epoch": 0.7621428843395819, "grad_norm": 0.359375, "learning_rate": 0.00023341115031142614, "loss": 0.4516, "step": 30025 }, { "epoch": 0.7622698023885977, "grad_norm": 0.34375, "learning_rate": 0.00023338353181694935, "loss": 0.4615, "step": 30030 }, { "epoch": 0.7623967204376134, "grad_norm": 0.349609375, "learning_rate": 0.00023335590923087584, "loss": 0.4757, "step": 30035 }, { "epoch": 0.7625236384866292, "grad_norm": 7.40625, "learning_rate": 0.00023332828255456114, "loss": 0.4416, "step": 30040 }, { "epoch": 0.762650556535645, "grad_norm": 0.328125, "learning_rate": 0.00023330065178936074, "loss": 0.4674, "step": 30045 }, { "epoch": 0.7627774745846607, "grad_norm": 0.4609375, "learning_rate": 0.00023327301693663058, "loss": 0.4883, "step": 30050 }, { "epoch": 0.7629043926336765, "grad_norm": 0.32421875, "learning_rate": 0.00023324537799772666, "loss": 0.4187, "step": 30055 }, { "epoch": 0.7630313106826921, "grad_norm": 0.337890625, "learning_rate": 0.00023321773497400519, "loss": 0.4437, "step": 30060 }, { "epoch": 0.7631582287317079, "grad_norm": 1.1796875, "learning_rate": 0.00023319008786682255, "loss": 0.4729, "step": 30065 }, { "epoch": 0.7632851467807237, "grad_norm": 0.31640625, "learning_rate": 0.00023316243667753552, "loss": 0.4566, "step": 30070 }, { "epoch": 0.7634120648297394, "grad_norm": 0.365234375, "learning_rate": 0.00023313478140750082, "loss": 0.4399, "step": 30075 }, { "epoch": 0.7635389828787552, "grad_norm": 0.330078125, "learning_rate": 0.0002331071220580755, "loss": 0.4614, "step": 30080 }, { "epoch": 0.763665900927771, "grad_norm": 0.361328125, "learning_rate": 0.00023307945863061682, "loss": 0.4655, "step": 30085 }, { "epoch": 0.7637928189767866, "grad_norm": 0.33984375, "learning_rate": 0.0002330517911264822, "loss": 0.4449, "step": 30090 }, { "epoch": 0.7639197370258024, "grad_norm": 0.3359375, "learning_rate": 0.0002330241195470293, "loss": 0.4487, "step": 30095 }, { "epoch": 0.7640466550748182, "grad_norm": 0.333984375, "learning_rate": 0.00023299644389361586, "loss": 0.4372, "step": 30100 }, { "epoch": 0.7641735731238339, "grad_norm": 0.310546875, "learning_rate": 0.00023296876416760006, "loss": 0.4422, "step": 30105 }, { "epoch": 0.7643004911728497, "grad_norm": 0.357421875, "learning_rate": 0.00023294108037033999, "loss": 0.4389, "step": 30110 }, { "epoch": 0.7644274092218655, "grad_norm": 0.337890625, "learning_rate": 0.00023291339250319418, "loss": 0.4489, "step": 30115 }, { "epoch": 0.7645543272708812, "grad_norm": 0.345703125, "learning_rate": 0.0002328857005675212, "loss": 0.4499, "step": 30120 }, { "epoch": 0.7646812453198969, "grad_norm": 0.328125, "learning_rate": 0.00023285800456467995, "loss": 0.4528, "step": 30125 }, { "epoch": 0.7648081633689127, "grad_norm": 0.36328125, "learning_rate": 0.00023283030449602936, "loss": 0.4771, "step": 30130 }, { "epoch": 0.7649350814179284, "grad_norm": 0.33984375, "learning_rate": 0.00023280260036292878, "loss": 0.4716, "step": 30135 }, { "epoch": 0.7650619994669442, "grad_norm": 0.3671875, "learning_rate": 0.00023277489216673753, "loss": 0.447, "step": 30140 }, { "epoch": 0.76518891751596, "grad_norm": 0.357421875, "learning_rate": 0.00023274717990881536, "loss": 0.4381, "step": 30145 }, { "epoch": 0.7653158355649757, "grad_norm": 0.330078125, "learning_rate": 0.00023271946359052195, "loss": 0.4627, "step": 30150 }, { "epoch": 0.7654427536139915, "grad_norm": 0.35546875, "learning_rate": 0.00023269174321321752, "loss": 0.4752, "step": 30155 }, { "epoch": 0.7655696716630072, "grad_norm": 0.333984375, "learning_rate": 0.00023266401877826206, "loss": 0.4786, "step": 30160 }, { "epoch": 0.7656965897120229, "grad_norm": 0.3359375, "learning_rate": 0.0002326362902870162, "loss": 0.4562, "step": 30165 }, { "epoch": 0.7658235077610387, "grad_norm": 0.357421875, "learning_rate": 0.0002326085577408405, "loss": 0.4646, "step": 30170 }, { "epoch": 0.7659504258100545, "grad_norm": 0.359375, "learning_rate": 0.0002325808211410958, "loss": 0.4586, "step": 30175 }, { "epoch": 0.7660773438590702, "grad_norm": 0.34375, "learning_rate": 0.00023255308048914305, "loss": 0.4536, "step": 30180 }, { "epoch": 0.766204261908086, "grad_norm": 0.375, "learning_rate": 0.00023252533578634356, "loss": 0.4836, "step": 30185 }, { "epoch": 0.7663311799571016, "grad_norm": 0.34765625, "learning_rate": 0.0002324975870340587, "loss": 0.451, "step": 30190 }, { "epoch": 0.7664580980061174, "grad_norm": 0.349609375, "learning_rate": 0.00023246983423365008, "loss": 0.4617, "step": 30195 }, { "epoch": 0.7665850160551332, "grad_norm": 0.318359375, "learning_rate": 0.0002324420773864796, "loss": 0.4449, "step": 30200 }, { "epoch": 0.766711934104149, "grad_norm": 0.298828125, "learning_rate": 0.00023241431649390917, "loss": 0.44, "step": 30205 }, { "epoch": 0.7668388521531647, "grad_norm": 0.337890625, "learning_rate": 0.00023238655155730111, "loss": 0.4293, "step": 30210 }, { "epoch": 0.7669657702021805, "grad_norm": 0.3359375, "learning_rate": 0.00023235878257801774, "loss": 0.4627, "step": 30215 }, { "epoch": 0.7670926882511963, "grad_norm": 0.353515625, "learning_rate": 0.00023233100955742173, "loss": 0.433, "step": 30220 }, { "epoch": 0.7672196063002119, "grad_norm": 0.333984375, "learning_rate": 0.0002323032324968759, "loss": 0.457, "step": 30225 }, { "epoch": 0.7673465243492277, "grad_norm": 0.34375, "learning_rate": 0.0002322754513977432, "loss": 0.4741, "step": 30230 }, { "epoch": 0.7674734423982434, "grad_norm": 0.328125, "learning_rate": 0.0002322476662613869, "loss": 0.4551, "step": 30235 }, { "epoch": 0.7676003604472592, "grad_norm": 0.357421875, "learning_rate": 0.00023221987708917034, "loss": 0.458, "step": 30240 }, { "epoch": 0.767727278496275, "grad_norm": 0.326171875, "learning_rate": 0.00023219208388245718, "loss": 0.4224, "step": 30245 }, { "epoch": 0.7678541965452907, "grad_norm": 0.33984375, "learning_rate": 0.00023216428664261124, "loss": 0.4505, "step": 30250 }, { "epoch": 0.7679811145943064, "grad_norm": 0.35546875, "learning_rate": 0.00023213648537099643, "loss": 0.47, "step": 30255 }, { "epoch": 0.7681080326433222, "grad_norm": 0.361328125, "learning_rate": 0.00023210868006897704, "loss": 0.4429, "step": 30260 }, { "epoch": 0.7682349506923379, "grad_norm": 0.34375, "learning_rate": 0.00023208087073791744, "loss": 0.457, "step": 30265 }, { "epoch": 0.7683618687413537, "grad_norm": 0.38671875, "learning_rate": 0.00023205305737918216, "loss": 0.5059, "step": 30270 }, { "epoch": 0.7684887867903695, "grad_norm": 0.37109375, "learning_rate": 0.00023202523999413607, "loss": 0.467, "step": 30275 }, { "epoch": 0.7686157048393852, "grad_norm": 0.3671875, "learning_rate": 0.00023199741858414413, "loss": 0.4506, "step": 30280 }, { "epoch": 0.768742622888401, "grad_norm": 0.349609375, "learning_rate": 0.00023196959315057152, "loss": 0.4638, "step": 30285 }, { "epoch": 0.7688695409374167, "grad_norm": 0.296875, "learning_rate": 0.0002319417636947836, "loss": 0.4261, "step": 30290 }, { "epoch": 0.7689964589864324, "grad_norm": 0.341796875, "learning_rate": 0.00023191393021814601, "loss": 0.469, "step": 30295 }, { "epoch": 0.7691233770354482, "grad_norm": 0.337890625, "learning_rate": 0.00023188609272202444, "loss": 0.4361, "step": 30300 }, { "epoch": 0.769250295084464, "grad_norm": 0.34765625, "learning_rate": 0.00023185825120778498, "loss": 0.4489, "step": 30305 }, { "epoch": 0.7693772131334797, "grad_norm": 0.33203125, "learning_rate": 0.00023183040567679376, "loss": 0.4253, "step": 30310 }, { "epoch": 0.7695041311824955, "grad_norm": 0.359375, "learning_rate": 0.00023180255613041708, "loss": 0.4997, "step": 30315 }, { "epoch": 0.7696310492315113, "grad_norm": 0.333984375, "learning_rate": 0.00023177470257002155, "loss": 0.4604, "step": 30320 }, { "epoch": 0.7697579672805269, "grad_norm": 0.30078125, "learning_rate": 0.00023174684499697394, "loss": 0.4238, "step": 30325 }, { "epoch": 0.7698848853295427, "grad_norm": 0.328125, "learning_rate": 0.00023171898341264126, "loss": 0.4327, "step": 30330 }, { "epoch": 0.7700118033785585, "grad_norm": 0.349609375, "learning_rate": 0.00023169111781839056, "loss": 0.4216, "step": 30335 }, { "epoch": 0.7701387214275742, "grad_norm": 0.337890625, "learning_rate": 0.00023166324821558928, "loss": 0.4387, "step": 30340 }, { "epoch": 0.77026563947659, "grad_norm": 0.34375, "learning_rate": 0.00023163537460560495, "loss": 0.4663, "step": 30345 }, { "epoch": 0.7703925575256058, "grad_norm": 0.34375, "learning_rate": 0.00023160749698980523, "loss": 0.4485, "step": 30350 }, { "epoch": 0.7705194755746214, "grad_norm": 0.326171875, "learning_rate": 0.00023157961536955822, "loss": 0.4691, "step": 30355 }, { "epoch": 0.7706463936236372, "grad_norm": 0.34765625, "learning_rate": 0.00023155172974623198, "loss": 0.4562, "step": 30360 }, { "epoch": 0.770773311672653, "grad_norm": 0.34375, "learning_rate": 0.00023152384012119483, "loss": 0.4548, "step": 30365 }, { "epoch": 0.7709002297216687, "grad_norm": 0.341796875, "learning_rate": 0.00023149594649581528, "loss": 0.4711, "step": 30370 }, { "epoch": 0.7710271477706845, "grad_norm": 0.31640625, "learning_rate": 0.00023146804887146213, "loss": 0.4499, "step": 30375 }, { "epoch": 0.7711540658197003, "grad_norm": 0.36328125, "learning_rate": 0.00023144014724950425, "loss": 0.471, "step": 30380 }, { "epoch": 0.771280983868716, "grad_norm": 0.365234375, "learning_rate": 0.00023141224163131084, "loss": 0.4677, "step": 30385 }, { "epoch": 0.7714079019177317, "grad_norm": 0.34765625, "learning_rate": 0.00023138433201825114, "loss": 0.4546, "step": 30390 }, { "epoch": 0.7715348199667474, "grad_norm": 0.349609375, "learning_rate": 0.00023135641841169467, "loss": 0.4311, "step": 30395 }, { "epoch": 0.7716617380157632, "grad_norm": 0.353515625, "learning_rate": 0.0002313285008130112, "loss": 0.4471, "step": 30400 }, { "epoch": 0.771788656064779, "grad_norm": 0.3359375, "learning_rate": 0.0002313005792235705, "loss": 0.417, "step": 30405 }, { "epoch": 0.7719155741137947, "grad_norm": 0.328125, "learning_rate": 0.0002312726536447429, "loss": 0.4321, "step": 30410 }, { "epoch": 0.7720424921628105, "grad_norm": 0.34765625, "learning_rate": 0.0002312447240778984, "loss": 0.476, "step": 30415 }, { "epoch": 0.7721694102118263, "grad_norm": 0.326171875, "learning_rate": 0.00023121679052440778, "loss": 0.4651, "step": 30420 }, { "epoch": 0.7722963282608419, "grad_norm": 0.35546875, "learning_rate": 0.00023118885298564155, "loss": 0.4708, "step": 30425 }, { "epoch": 0.7724232463098577, "grad_norm": 0.345703125, "learning_rate": 0.00023116091146297067, "loss": 0.4611, "step": 30430 }, { "epoch": 0.7725501643588735, "grad_norm": 0.341796875, "learning_rate": 0.00023113296595776616, "loss": 0.4445, "step": 30435 }, { "epoch": 0.7726770824078892, "grad_norm": 0.357421875, "learning_rate": 0.0002311050164713993, "loss": 0.4363, "step": 30440 }, { "epoch": 0.772804000456905, "grad_norm": 0.330078125, "learning_rate": 0.00023107706300524165, "loss": 0.4242, "step": 30445 }, { "epoch": 0.7729309185059208, "grad_norm": 0.39453125, "learning_rate": 0.00023104910556066483, "loss": 0.4866, "step": 30450 }, { "epoch": 0.7730578365549364, "grad_norm": 0.349609375, "learning_rate": 0.00023102114413904064, "loss": 0.4396, "step": 30455 }, { "epoch": 0.7731847546039522, "grad_norm": 0.328125, "learning_rate": 0.00023099317874174122, "loss": 0.4739, "step": 30460 }, { "epoch": 0.773311672652968, "grad_norm": 0.3203125, "learning_rate": 0.0002309652093701387, "loss": 0.4513, "step": 30465 }, { "epoch": 0.7734385907019837, "grad_norm": 0.328125, "learning_rate": 0.0002309372360256057, "loss": 0.4123, "step": 30470 }, { "epoch": 0.7735655087509995, "grad_norm": 0.34765625, "learning_rate": 0.0002309092587095147, "loss": 0.4371, "step": 30475 }, { "epoch": 0.7736924268000153, "grad_norm": 0.36328125, "learning_rate": 0.00023088127742323865, "loss": 0.4411, "step": 30480 }, { "epoch": 0.773819344849031, "grad_norm": 0.328125, "learning_rate": 0.00023085329216815053, "loss": 0.4743, "step": 30485 }, { "epoch": 0.7739462628980467, "grad_norm": 0.318359375, "learning_rate": 0.00023082530294562357, "loss": 0.4469, "step": 30490 }, { "epoch": 0.7740731809470625, "grad_norm": 0.337890625, "learning_rate": 0.00023079730975703117, "loss": 0.4472, "step": 30495 }, { "epoch": 0.7742000989960782, "grad_norm": 0.32421875, "learning_rate": 0.000230769312603747, "loss": 0.4344, "step": 30500 }, { "epoch": 0.774327017045094, "grad_norm": 0.3515625, "learning_rate": 0.0002307413114871448, "loss": 0.4758, "step": 30505 }, { "epoch": 0.7744539350941098, "grad_norm": 0.36328125, "learning_rate": 0.0002307133064085986, "loss": 0.4554, "step": 30510 }, { "epoch": 0.7745808531431255, "grad_norm": 0.32421875, "learning_rate": 0.0002306852973694826, "loss": 0.4522, "step": 30515 }, { "epoch": 0.7747077711921412, "grad_norm": 0.357421875, "learning_rate": 0.00023065728437117125, "loss": 0.4822, "step": 30520 }, { "epoch": 0.774834689241157, "grad_norm": 0.34765625, "learning_rate": 0.00023062926741503908, "loss": 0.5009, "step": 30525 }, { "epoch": 0.7749616072901727, "grad_norm": 0.341796875, "learning_rate": 0.00023060124650246087, "loss": 0.4941, "step": 30530 }, { "epoch": 0.7750885253391885, "grad_norm": 0.326171875, "learning_rate": 0.00023057322163481155, "loss": 0.4583, "step": 30535 }, { "epoch": 0.7752154433882043, "grad_norm": 0.318359375, "learning_rate": 0.0002305451928134664, "loss": 0.4336, "step": 30540 }, { "epoch": 0.77534236143722, "grad_norm": 0.333984375, "learning_rate": 0.0002305171600398007, "loss": 0.464, "step": 30545 }, { "epoch": 0.7754692794862358, "grad_norm": 0.353515625, "learning_rate": 0.00023048912331519, "loss": 0.4346, "step": 30550 }, { "epoch": 0.7755961975352514, "grad_norm": 0.34375, "learning_rate": 0.0002304610826410101, "loss": 0.4423, "step": 30555 }, { "epoch": 0.7757231155842672, "grad_norm": 0.32421875, "learning_rate": 0.00023043303801863698, "loss": 0.4525, "step": 30560 }, { "epoch": 0.775850033633283, "grad_norm": 0.330078125, "learning_rate": 0.00023040498944944666, "loss": 0.4396, "step": 30565 }, { "epoch": 0.7759769516822987, "grad_norm": 0.32421875, "learning_rate": 0.00023037693693481556, "loss": 0.7331, "step": 30570 }, { "epoch": 0.7761038697313145, "grad_norm": 0.30859375, "learning_rate": 0.0002303488804761202, "loss": 0.4415, "step": 30575 }, { "epoch": 0.7762307877803303, "grad_norm": 0.34765625, "learning_rate": 0.00023032082007473723, "loss": 0.4382, "step": 30580 }, { "epoch": 0.776357705829346, "grad_norm": 0.353515625, "learning_rate": 0.00023029275573204365, "loss": 0.4168, "step": 30585 }, { "epoch": 0.7764846238783617, "grad_norm": 0.3671875, "learning_rate": 0.00023026468744941653, "loss": 0.4779, "step": 30590 }, { "epoch": 0.7766115419273775, "grad_norm": 0.34765625, "learning_rate": 0.00023023661522823314, "loss": 0.4409, "step": 30595 }, { "epoch": 0.7767384599763932, "grad_norm": 0.353515625, "learning_rate": 0.00023020853906987107, "loss": 0.4603, "step": 30600 }, { "epoch": 0.776865378025409, "grad_norm": 0.33984375, "learning_rate": 0.00023018045897570786, "loss": 0.4863, "step": 30605 }, { "epoch": 0.7769922960744248, "grad_norm": 0.302734375, "learning_rate": 0.00023015237494712152, "loss": 0.4601, "step": 30610 }, { "epoch": 0.7771192141234405, "grad_norm": 0.353515625, "learning_rate": 0.00023012428698549004, "loss": 0.4274, "step": 30615 }, { "epoch": 0.7772461321724562, "grad_norm": 0.359375, "learning_rate": 0.00023009619509219172, "loss": 0.4418, "step": 30620 }, { "epoch": 0.777373050221472, "grad_norm": 0.314453125, "learning_rate": 0.00023006809926860505, "loss": 0.4411, "step": 30625 }, { "epoch": 0.7774999682704877, "grad_norm": 0.3203125, "learning_rate": 0.00023003999951610858, "loss": 0.4636, "step": 30630 }, { "epoch": 0.7776268863195035, "grad_norm": 0.31640625, "learning_rate": 0.00023001189583608122, "loss": 0.4331, "step": 30635 }, { "epoch": 0.7777538043685193, "grad_norm": 0.3671875, "learning_rate": 0.00022998378822990207, "loss": 0.4793, "step": 30640 }, { "epoch": 0.777880722417535, "grad_norm": 0.34375, "learning_rate": 0.00022995567669895024, "loss": 0.4684, "step": 30645 }, { "epoch": 0.7780076404665508, "grad_norm": 0.359375, "learning_rate": 0.0002299275612446052, "loss": 0.4768, "step": 30650 }, { "epoch": 0.7781345585155665, "grad_norm": 0.326171875, "learning_rate": 0.00022989944186824663, "loss": 0.4598, "step": 30655 }, { "epoch": 0.7782614765645822, "grad_norm": 0.34375, "learning_rate": 0.00022987131857125424, "loss": 0.4658, "step": 30660 }, { "epoch": 0.778388394613598, "grad_norm": 0.357421875, "learning_rate": 0.00022984319135500804, "loss": 0.4974, "step": 30665 }, { "epoch": 0.7785153126626138, "grad_norm": 0.337890625, "learning_rate": 0.00022981506022088822, "loss": 0.4446, "step": 30670 }, { "epoch": 0.7786422307116295, "grad_norm": 0.330078125, "learning_rate": 0.00022978692517027521, "loss": 0.4347, "step": 30675 }, { "epoch": 0.7787691487606453, "grad_norm": 0.337890625, "learning_rate": 0.00022975878620454962, "loss": 0.457, "step": 30680 }, { "epoch": 0.778896066809661, "grad_norm": 0.353515625, "learning_rate": 0.00022973064332509213, "loss": 0.4572, "step": 30685 }, { "epoch": 0.7790229848586767, "grad_norm": 0.361328125, "learning_rate": 0.00022970249653328373, "loss": 0.4553, "step": 30690 }, { "epoch": 0.7791499029076925, "grad_norm": 0.345703125, "learning_rate": 0.0002296743458305056, "loss": 0.4656, "step": 30695 }, { "epoch": 0.7792768209567082, "grad_norm": 0.326171875, "learning_rate": 0.00022964619121813897, "loss": 0.4298, "step": 30700 }, { "epoch": 0.779403739005724, "grad_norm": 0.34765625, "learning_rate": 0.0002296180326975655, "loss": 0.4533, "step": 30705 }, { "epoch": 0.7795306570547398, "grad_norm": 0.35546875, "learning_rate": 0.00022958987027016693, "loss": 0.4725, "step": 30710 }, { "epoch": 0.7796575751037556, "grad_norm": 0.333984375, "learning_rate": 0.0002295617039373251, "loss": 0.4492, "step": 30715 }, { "epoch": 0.7797844931527712, "grad_norm": 0.34765625, "learning_rate": 0.00022953353370042215, "loss": 0.4471, "step": 30720 }, { "epoch": 0.779911411201787, "grad_norm": 0.33984375, "learning_rate": 0.00022950535956084043, "loss": 0.4815, "step": 30725 }, { "epoch": 0.7800383292508027, "grad_norm": 0.337890625, "learning_rate": 0.0002294771815199623, "loss": 0.4538, "step": 30730 }, { "epoch": 0.7801652472998185, "grad_norm": 0.35546875, "learning_rate": 0.0002294489995791706, "loss": 0.445, "step": 30735 }, { "epoch": 0.7802921653488343, "grad_norm": 0.33203125, "learning_rate": 0.00022942081373984806, "loss": 0.4422, "step": 30740 }, { "epoch": 0.78041908339785, "grad_norm": 0.37890625, "learning_rate": 0.00022939262400337792, "loss": 0.4625, "step": 30745 }, { "epoch": 0.7805460014468658, "grad_norm": 0.3515625, "learning_rate": 0.00022936443037114329, "loss": 0.4645, "step": 30750 }, { "epoch": 0.7806729194958815, "grad_norm": 0.341796875, "learning_rate": 0.00022933623284452766, "loss": 0.4491, "step": 30755 }, { "epoch": 0.7807998375448972, "grad_norm": 0.337890625, "learning_rate": 0.0002293080314249147, "loss": 0.4289, "step": 30760 }, { "epoch": 0.780926755593913, "grad_norm": 0.33984375, "learning_rate": 0.0002292798261136883, "loss": 0.4492, "step": 30765 }, { "epoch": 0.7810536736429288, "grad_norm": 0.328125, "learning_rate": 0.00022925161691223235, "loss": 0.4549, "step": 30770 }, { "epoch": 0.7811805916919445, "grad_norm": 0.32421875, "learning_rate": 0.00022922340382193113, "loss": 0.4719, "step": 30775 }, { "epoch": 0.7813075097409603, "grad_norm": 0.34375, "learning_rate": 0.00022919518684416906, "loss": 0.4456, "step": 30780 }, { "epoch": 0.781434427789976, "grad_norm": 0.3203125, "learning_rate": 0.00022916696598033069, "loss": 0.44, "step": 30785 }, { "epoch": 0.7815613458389917, "grad_norm": 0.330078125, "learning_rate": 0.00022913874123180086, "loss": 0.462, "step": 30790 }, { "epoch": 0.7816882638880075, "grad_norm": 0.341796875, "learning_rate": 0.00022911051259996446, "loss": 0.4698, "step": 30795 }, { "epoch": 0.7818151819370233, "grad_norm": 0.3203125, "learning_rate": 0.0002290822800862068, "loss": 0.4234, "step": 30800 }, { "epoch": 0.781942099986039, "grad_norm": 0.337890625, "learning_rate": 0.00022905404369191314, "loss": 0.4665, "step": 30805 }, { "epoch": 0.7820690180350548, "grad_norm": 0.345703125, "learning_rate": 0.00022902580341846907, "loss": 0.4519, "step": 30810 }, { "epoch": 0.7821959360840706, "grad_norm": 0.341796875, "learning_rate": 0.00022899755926726026, "loss": 0.4409, "step": 30815 }, { "epoch": 0.7823228541330862, "grad_norm": 0.33984375, "learning_rate": 0.00022896931123967272, "loss": 0.4561, "step": 30820 }, { "epoch": 0.782449772182102, "grad_norm": 0.34375, "learning_rate": 0.00022894105933709245, "loss": 0.4136, "step": 30825 }, { "epoch": 0.7825766902311178, "grad_norm": 0.3515625, "learning_rate": 0.00022891280356090595, "loss": 0.436, "step": 30830 }, { "epoch": 0.7827036082801335, "grad_norm": 0.400390625, "learning_rate": 0.00022888454391249954, "loss": 0.4554, "step": 30835 }, { "epoch": 0.7828305263291493, "grad_norm": 0.345703125, "learning_rate": 0.00022885628039326, "loss": 0.455, "step": 30840 }, { "epoch": 0.7829574443781651, "grad_norm": 0.345703125, "learning_rate": 0.00022882801300457424, "loss": 0.422, "step": 30845 }, { "epoch": 0.7830843624271808, "grad_norm": 0.3203125, "learning_rate": 0.00022879974174782924, "loss": 0.4222, "step": 30850 }, { "epoch": 0.7832112804761965, "grad_norm": 0.330078125, "learning_rate": 0.00022877146662441227, "loss": 0.4388, "step": 30855 }, { "epoch": 0.7833381985252122, "grad_norm": 0.345703125, "learning_rate": 0.0002287431876357109, "loss": 0.4537, "step": 30860 }, { "epoch": 0.783465116574228, "grad_norm": 0.345703125, "learning_rate": 0.00022871490478311264, "loss": 0.4769, "step": 30865 }, { "epoch": 0.7835920346232438, "grad_norm": 0.30078125, "learning_rate": 0.00022868661806800533, "loss": 0.4417, "step": 30870 }, { "epoch": 0.7837189526722596, "grad_norm": 0.361328125, "learning_rate": 0.000228658327491777, "loss": 0.468, "step": 30875 }, { "epoch": 0.7838458707212753, "grad_norm": 0.353515625, "learning_rate": 0.00022863003305581594, "loss": 0.4829, "step": 30880 }, { "epoch": 0.783972788770291, "grad_norm": 0.3515625, "learning_rate": 0.0002286017347615104, "loss": 0.4747, "step": 30885 }, { "epoch": 0.7840997068193067, "grad_norm": 0.330078125, "learning_rate": 0.0002285734326102491, "loss": 0.4488, "step": 30890 }, { "epoch": 0.7842266248683225, "grad_norm": 0.345703125, "learning_rate": 0.00022854512660342078, "loss": 0.4334, "step": 30895 }, { "epoch": 0.7843535429173383, "grad_norm": 0.357421875, "learning_rate": 0.00022851681674241432, "loss": 0.4751, "step": 30900 }, { "epoch": 0.784480460966354, "grad_norm": 0.3203125, "learning_rate": 0.000228488503028619, "loss": 0.4607, "step": 30905 }, { "epoch": 0.7846073790153698, "grad_norm": 0.33984375, "learning_rate": 0.0002284601854634241, "loss": 0.4563, "step": 30910 }, { "epoch": 0.7847342970643856, "grad_norm": 0.337890625, "learning_rate": 0.00022843186404821912, "loss": 0.4568, "step": 30915 }, { "epoch": 0.7848612151134012, "grad_norm": 0.33984375, "learning_rate": 0.0002284035387843938, "loss": 0.4782, "step": 30920 }, { "epoch": 0.784988133162417, "grad_norm": 0.337890625, "learning_rate": 0.00022837520967333805, "loss": 0.4655, "step": 30925 }, { "epoch": 0.7851150512114328, "grad_norm": 0.337890625, "learning_rate": 0.00022834687671644202, "loss": 0.4513, "step": 30930 }, { "epoch": 0.7852419692604485, "grad_norm": 0.328125, "learning_rate": 0.000228318539915096, "loss": 0.4684, "step": 30935 }, { "epoch": 0.7853688873094643, "grad_norm": 0.36328125, "learning_rate": 0.00022829019927069036, "loss": 0.4693, "step": 30940 }, { "epoch": 0.7854958053584801, "grad_norm": 0.3671875, "learning_rate": 0.0002282618547846159, "loss": 0.4932, "step": 30945 }, { "epoch": 0.7856227234074957, "grad_norm": 0.3515625, "learning_rate": 0.00022823350645826332, "loss": 0.4409, "step": 30950 }, { "epoch": 0.7857496414565115, "grad_norm": 0.35546875, "learning_rate": 0.00022820515429302374, "loss": 0.4619, "step": 30955 }, { "epoch": 0.7858765595055273, "grad_norm": 0.357421875, "learning_rate": 0.0002281767982902884, "loss": 0.4597, "step": 30960 }, { "epoch": 0.786003477554543, "grad_norm": 0.31640625, "learning_rate": 0.00022814843845144877, "loss": 0.439, "step": 30965 }, { "epoch": 0.7861303956035588, "grad_norm": 0.328125, "learning_rate": 0.0002281200747778964, "loss": 0.4648, "step": 30970 }, { "epoch": 0.7862573136525746, "grad_norm": 0.328125, "learning_rate": 0.000228091707271023, "loss": 0.4668, "step": 30975 }, { "epoch": 0.7863842317015903, "grad_norm": 0.36328125, "learning_rate": 0.00022806333593222063, "loss": 0.4576, "step": 30980 }, { "epoch": 0.786511149750606, "grad_norm": 0.35546875, "learning_rate": 0.00022803496076288153, "loss": 0.4485, "step": 30985 }, { "epoch": 0.7866380677996218, "grad_norm": 0.373046875, "learning_rate": 0.00022800658176439794, "loss": 0.4888, "step": 30990 }, { "epoch": 0.7867649858486375, "grad_norm": 0.326171875, "learning_rate": 0.00022797819893816246, "loss": 0.4674, "step": 30995 }, { "epoch": 0.7868919038976533, "grad_norm": 0.333984375, "learning_rate": 0.00022794981228556785, "loss": 0.4356, "step": 31000 }, { "epoch": 0.7870188219466691, "grad_norm": 0.333984375, "learning_rate": 0.000227921421808007, "loss": 0.4465, "step": 31005 }, { "epoch": 0.7871457399956848, "grad_norm": 0.349609375, "learning_rate": 0.00022789302750687298, "loss": 0.4581, "step": 31010 }, { "epoch": 0.7872726580447006, "grad_norm": 0.32421875, "learning_rate": 0.00022786462938355917, "loss": 0.4473, "step": 31015 }, { "epoch": 0.7873995760937162, "grad_norm": 0.37109375, "learning_rate": 0.00022783622743945898, "loss": 0.4636, "step": 31020 }, { "epoch": 0.787526494142732, "grad_norm": 0.33984375, "learning_rate": 0.00022780782167596615, "loss": 0.473, "step": 31025 }, { "epoch": 0.7876534121917478, "grad_norm": 0.3359375, "learning_rate": 0.0002277794120944745, "loss": 0.4742, "step": 31030 }, { "epoch": 0.7877803302407635, "grad_norm": 0.365234375, "learning_rate": 0.00022775099869637804, "loss": 0.5002, "step": 31035 }, { "epoch": 0.7879072482897793, "grad_norm": 0.333984375, "learning_rate": 0.00022772258148307106, "loss": 0.4456, "step": 31040 }, { "epoch": 0.7880341663387951, "grad_norm": 0.36328125, "learning_rate": 0.00022769416045594797, "loss": 0.4496, "step": 31045 }, { "epoch": 0.7881610843878107, "grad_norm": 0.375, "learning_rate": 0.00022766573561640337, "loss": 0.4473, "step": 31050 }, { "epoch": 0.7882880024368265, "grad_norm": 0.337890625, "learning_rate": 0.00022763730696583208, "loss": 0.4421, "step": 31055 }, { "epoch": 0.7884149204858423, "grad_norm": 0.361328125, "learning_rate": 0.0002276088745056291, "loss": 0.4639, "step": 31060 }, { "epoch": 0.788541838534858, "grad_norm": 0.326171875, "learning_rate": 0.00022758043823718952, "loss": 0.4381, "step": 31065 }, { "epoch": 0.7886687565838738, "grad_norm": 0.341796875, "learning_rate": 0.00022755199816190878, "loss": 0.4539, "step": 31070 }, { "epoch": 0.7887956746328896, "grad_norm": 0.337890625, "learning_rate": 0.00022752355428118227, "loss": 0.4535, "step": 31075 }, { "epoch": 0.7889225926819053, "grad_norm": 0.357421875, "learning_rate": 0.0002274951065964059, "loss": 0.4459, "step": 31080 }, { "epoch": 0.789049510730921, "grad_norm": 0.392578125, "learning_rate": 0.00022746665510897554, "loss": 0.4742, "step": 31085 }, { "epoch": 0.7891764287799368, "grad_norm": 0.349609375, "learning_rate": 0.00022743819982028727, "loss": 0.43, "step": 31090 }, { "epoch": 0.7893033468289525, "grad_norm": 0.330078125, "learning_rate": 0.0002274097407317374, "loss": 0.3995, "step": 31095 }, { "epoch": 0.7894302648779683, "grad_norm": 0.328125, "learning_rate": 0.00022738127784472235, "loss": 0.4509, "step": 31100 }, { "epoch": 0.7895571829269841, "grad_norm": 0.375, "learning_rate": 0.00022735281116063882, "loss": 0.4367, "step": 31105 }, { "epoch": 0.7896841009759998, "grad_norm": 1.3203125, "learning_rate": 0.00022732434068088364, "loss": 0.508, "step": 31110 }, { "epoch": 0.7898110190250155, "grad_norm": 0.359375, "learning_rate": 0.0002272958664068539, "loss": 0.442, "step": 31115 }, { "epoch": 0.7899379370740313, "grad_norm": 2.890625, "learning_rate": 0.00022726738833994678, "loss": 0.497, "step": 31120 }, { "epoch": 0.790064855123047, "grad_norm": 0.373046875, "learning_rate": 0.00022723890648155968, "loss": 0.4348, "step": 31125 }, { "epoch": 0.7901917731720628, "grad_norm": 0.328125, "learning_rate": 0.0002272104208330902, "loss": 0.4522, "step": 31130 }, { "epoch": 0.7903186912210786, "grad_norm": 0.345703125, "learning_rate": 0.00022718193139593614, "loss": 0.4609, "step": 31135 }, { "epoch": 0.7904456092700943, "grad_norm": 0.35546875, "learning_rate": 0.00022715343817149544, "loss": 0.4693, "step": 31140 }, { "epoch": 0.7905725273191101, "grad_norm": 0.349609375, "learning_rate": 0.0002271249411611663, "loss": 0.4559, "step": 31145 }, { "epoch": 0.7906994453681258, "grad_norm": 0.310546875, "learning_rate": 0.00022709644036634697, "loss": 0.4704, "step": 31150 }, { "epoch": 0.7908263634171415, "grad_norm": 0.337890625, "learning_rate": 0.00022706793578843604, "loss": 0.4689, "step": 31155 }, { "epoch": 0.7909532814661573, "grad_norm": 0.3515625, "learning_rate": 0.00022703942742883222, "loss": 0.4995, "step": 31160 }, { "epoch": 0.791080199515173, "grad_norm": 0.333984375, "learning_rate": 0.00022701091528893436, "loss": 0.4873, "step": 31165 }, { "epoch": 0.7912071175641888, "grad_norm": 0.33203125, "learning_rate": 0.00022698239937014156, "loss": 0.4563, "step": 31170 }, { "epoch": 0.7913340356132046, "grad_norm": 0.353515625, "learning_rate": 0.0002269538796738531, "loss": 0.46, "step": 31175 }, { "epoch": 0.7914609536622204, "grad_norm": 0.326171875, "learning_rate": 0.00022692535620146847, "loss": 0.4377, "step": 31180 }, { "epoch": 0.791587871711236, "grad_norm": 0.337890625, "learning_rate": 0.00022689682895438718, "loss": 0.4518, "step": 31185 }, { "epoch": 0.7917147897602518, "grad_norm": 0.330078125, "learning_rate": 0.00022686829793400917, "loss": 0.4735, "step": 31190 }, { "epoch": 0.7918417078092675, "grad_norm": 0.341796875, "learning_rate": 0.00022683976314173435, "loss": 0.4505, "step": 31195 }, { "epoch": 0.7919686258582833, "grad_norm": 0.353515625, "learning_rate": 0.00022681122457896295, "loss": 0.4481, "step": 31200 }, { "epoch": 0.7920955439072991, "grad_norm": 0.357421875, "learning_rate": 0.0002267826822470954, "loss": 0.4601, "step": 31205 }, { "epoch": 0.7922224619563148, "grad_norm": 0.3359375, "learning_rate": 0.00022675413614753223, "loss": 0.4363, "step": 31210 }, { "epoch": 0.7923493800053305, "grad_norm": 0.3515625, "learning_rate": 0.00022672558628167416, "loss": 0.4385, "step": 31215 }, { "epoch": 0.7924762980543463, "grad_norm": 0.359375, "learning_rate": 0.00022669703265092213, "loss": 0.4831, "step": 31220 }, { "epoch": 0.792603216103362, "grad_norm": 0.3359375, "learning_rate": 0.00022666847525667727, "loss": 0.4682, "step": 31225 }, { "epoch": 0.7927301341523778, "grad_norm": 0.32421875, "learning_rate": 0.00022663991410034088, "loss": 0.4351, "step": 31230 }, { "epoch": 0.7928570522013936, "grad_norm": 0.3359375, "learning_rate": 0.00022661134918331436, "loss": 0.4476, "step": 31235 }, { "epoch": 0.7929839702504093, "grad_norm": 0.333984375, "learning_rate": 0.00022658278050699952, "loss": 0.4571, "step": 31240 }, { "epoch": 0.7931108882994251, "grad_norm": 0.361328125, "learning_rate": 0.00022655420807279813, "loss": 0.4607, "step": 31245 }, { "epoch": 0.7932378063484408, "grad_norm": 0.357421875, "learning_rate": 0.00022652563188211224, "loss": 0.4465, "step": 31250 }, { "epoch": 0.7933647243974565, "grad_norm": 0.33203125, "learning_rate": 0.0002264970519363441, "loss": 0.4595, "step": 31255 }, { "epoch": 0.7934916424464723, "grad_norm": 0.33203125, "learning_rate": 0.00022646846823689605, "loss": 0.4518, "step": 31260 }, { "epoch": 0.7936185604954881, "grad_norm": 0.328125, "learning_rate": 0.00022643988078517076, "loss": 0.4575, "step": 31265 }, { "epoch": 0.7937454785445038, "grad_norm": 0.32421875, "learning_rate": 0.00022641128958257096, "loss": 0.4441, "step": 31270 }, { "epoch": 0.7938723965935196, "grad_norm": 0.333984375, "learning_rate": 0.00022638269463049962, "loss": 0.4368, "step": 31275 }, { "epoch": 0.7939993146425354, "grad_norm": 0.345703125, "learning_rate": 0.00022635409593035988, "loss": 0.4362, "step": 31280 }, { "epoch": 0.794126232691551, "grad_norm": 0.3671875, "learning_rate": 0.00022632549348355508, "loss": 0.4383, "step": 31285 }, { "epoch": 0.7942531507405668, "grad_norm": 0.349609375, "learning_rate": 0.0002262968872914887, "loss": 0.4773, "step": 31290 }, { "epoch": 0.7943800687895826, "grad_norm": 0.34375, "learning_rate": 0.00022626827735556444, "loss": 0.4372, "step": 31295 }, { "epoch": 0.7945069868385983, "grad_norm": 0.359375, "learning_rate": 0.00022623966367718622, "loss": 0.4653, "step": 31300 }, { "epoch": 0.7946339048876141, "grad_norm": 0.36328125, "learning_rate": 0.00022621104625775807, "loss": 0.4862, "step": 31305 }, { "epoch": 0.7947608229366299, "grad_norm": 0.322265625, "learning_rate": 0.00022618242509868424, "loss": 0.4495, "step": 31310 }, { "epoch": 0.7948877409856455, "grad_norm": 0.326171875, "learning_rate": 0.00022615380020136913, "loss": 0.4333, "step": 31315 }, { "epoch": 0.7950146590346613, "grad_norm": 48.25, "learning_rate": 0.0002261251715672174, "loss": 0.5593, "step": 31320 }, { "epoch": 0.795141577083677, "grad_norm": 0.361328125, "learning_rate": 0.00022609653919763385, "loss": 0.4739, "step": 31325 }, { "epoch": 0.7952684951326928, "grad_norm": 0.349609375, "learning_rate": 0.00022606790309402333, "loss": 0.4447, "step": 31330 }, { "epoch": 0.7953954131817086, "grad_norm": 0.296875, "learning_rate": 0.00022603926325779123, "loss": 0.4152, "step": 31335 }, { "epoch": 0.7955223312307244, "grad_norm": 0.365234375, "learning_rate": 0.0002260106196903427, "loss": 0.4507, "step": 31340 }, { "epoch": 0.7956492492797401, "grad_norm": 0.34375, "learning_rate": 0.0002259819723930834, "loss": 0.4341, "step": 31345 }, { "epoch": 0.7957761673287558, "grad_norm": 0.34765625, "learning_rate": 0.00022595332136741893, "loss": 0.4797, "step": 31350 }, { "epoch": 0.7959030853777715, "grad_norm": 0.345703125, "learning_rate": 0.00022592466661475524, "loss": 0.4583, "step": 31355 }, { "epoch": 0.7960300034267873, "grad_norm": 0.33203125, "learning_rate": 0.0002258960081364984, "loss": 0.4465, "step": 31360 }, { "epoch": 0.7961569214758031, "grad_norm": 0.337890625, "learning_rate": 0.00022586734593405466, "loss": 0.4834, "step": 31365 }, { "epoch": 0.7962838395248188, "grad_norm": 0.357421875, "learning_rate": 0.0002258386800088305, "loss": 0.4391, "step": 31370 }, { "epoch": 0.7964107575738346, "grad_norm": 0.3359375, "learning_rate": 0.00022581001036223256, "loss": 0.4574, "step": 31375 }, { "epoch": 0.7965376756228503, "grad_norm": 0.330078125, "learning_rate": 0.00022578133699566758, "loss": 0.4525, "step": 31380 }, { "epoch": 0.796664593671866, "grad_norm": 0.37109375, "learning_rate": 0.0002257526599105426, "loss": 0.4366, "step": 31385 }, { "epoch": 0.7967915117208818, "grad_norm": 0.3671875, "learning_rate": 0.00022572397910826473, "loss": 0.4728, "step": 31390 }, { "epoch": 0.7969184297698976, "grad_norm": 0.34765625, "learning_rate": 0.00022569529459024137, "loss": 0.4845, "step": 31395 }, { "epoch": 0.7970453478189133, "grad_norm": 0.337890625, "learning_rate": 0.00022566660635788005, "loss": 0.478, "step": 31400 }, { "epoch": 0.7971722658679291, "grad_norm": 0.34375, "learning_rate": 0.00022563791441258853, "loss": 0.4527, "step": 31405 }, { "epoch": 0.7972991839169449, "grad_norm": 0.328125, "learning_rate": 0.00022560921875577466, "loss": 0.4387, "step": 31410 }, { "epoch": 0.7974261019659605, "grad_norm": 0.33203125, "learning_rate": 0.0002255805193888466, "loss": 0.4619, "step": 31415 }, { "epoch": 0.7975530200149763, "grad_norm": 0.3515625, "learning_rate": 0.0002255518163132125, "loss": 0.4407, "step": 31420 }, { "epoch": 0.7976799380639921, "grad_norm": 0.3671875, "learning_rate": 0.00022552310953028086, "loss": 0.4816, "step": 31425 }, { "epoch": 0.7978068561130078, "grad_norm": 0.328125, "learning_rate": 0.00022549439904146035, "loss": 0.4326, "step": 31430 }, { "epoch": 0.7979337741620236, "grad_norm": 0.328125, "learning_rate": 0.00022546568484815973, "loss": 0.4572, "step": 31435 }, { "epoch": 0.7980606922110394, "grad_norm": 0.3203125, "learning_rate": 0.000225436966951788, "loss": 0.4423, "step": 31440 }, { "epoch": 0.7981876102600551, "grad_norm": 0.341796875, "learning_rate": 0.00022540824535375442, "loss": 0.4686, "step": 31445 }, { "epoch": 0.7983145283090708, "grad_norm": 0.341796875, "learning_rate": 0.00022537952005546822, "loss": 0.4747, "step": 31450 }, { "epoch": 0.7984414463580866, "grad_norm": 0.337890625, "learning_rate": 0.00022535079105833903, "loss": 0.4247, "step": 31455 }, { "epoch": 0.7985683644071023, "grad_norm": 0.3203125, "learning_rate": 0.0002253220583637765, "loss": 0.4604, "step": 31460 }, { "epoch": 0.7986952824561181, "grad_norm": 0.341796875, "learning_rate": 0.00022529332197319062, "loss": 0.4407, "step": 31465 }, { "epoch": 0.7988222005051339, "grad_norm": 0.322265625, "learning_rate": 0.0002252645818879914, "loss": 0.4415, "step": 31470 }, { "epoch": 0.7989491185541496, "grad_norm": 0.359375, "learning_rate": 0.00022523583810958912, "loss": 0.4413, "step": 31475 }, { "epoch": 0.7990760366031653, "grad_norm": 0.330078125, "learning_rate": 0.00022520709063939427, "loss": 0.4658, "step": 31480 }, { "epoch": 0.799202954652181, "grad_norm": 0.31640625, "learning_rate": 0.00022517833947881735, "loss": 0.4348, "step": 31485 }, { "epoch": 0.7993298727011968, "grad_norm": 0.32421875, "learning_rate": 0.00022514958462926935, "loss": 0.4149, "step": 31490 }, { "epoch": 0.7994567907502126, "grad_norm": 0.33984375, "learning_rate": 0.0002251208260921611, "loss": 0.4387, "step": 31495 }, { "epoch": 0.7995837087992284, "grad_norm": 0.302734375, "learning_rate": 0.00022509206386890388, "loss": 0.4375, "step": 31500 }, { "epoch": 0.7997106268482441, "grad_norm": 0.31640625, "learning_rate": 0.000225063297960909, "loss": 0.4273, "step": 31505 }, { "epoch": 0.7998375448972599, "grad_norm": 0.34375, "learning_rate": 0.000225034528369588, "loss": 0.4221, "step": 31510 }, { "epoch": 0.7999644629462755, "grad_norm": 0.3671875, "learning_rate": 0.00022500575509635257, "loss": 0.4576, "step": 31515 }, { "epoch": 0.8000913809952913, "grad_norm": 0.341796875, "learning_rate": 0.0002249769781426146, "loss": 0.4463, "step": 31520 }, { "epoch": 0.8002182990443071, "grad_norm": 0.3359375, "learning_rate": 0.00022494819750978616, "loss": 0.457, "step": 31525 }, { "epoch": 0.8003452170933228, "grad_norm": 0.333984375, "learning_rate": 0.00022491941319927958, "loss": 0.4505, "step": 31530 }, { "epoch": 0.8004721351423386, "grad_norm": 0.314453125, "learning_rate": 0.0002248906252125072, "loss": 0.4408, "step": 31535 }, { "epoch": 0.8005990531913544, "grad_norm": 0.33203125, "learning_rate": 0.0002248618335508817, "loss": 0.4387, "step": 31540 }, { "epoch": 0.80072597124037, "grad_norm": 0.341796875, "learning_rate": 0.00022483303821581584, "loss": 0.4663, "step": 31545 }, { "epoch": 0.8008528892893858, "grad_norm": 0.333984375, "learning_rate": 0.00022480423920872257, "loss": 0.4556, "step": 31550 }, { "epoch": 0.8009798073384016, "grad_norm": 0.33203125, "learning_rate": 0.0002247754365310151, "loss": 0.4015, "step": 31555 }, { "epoch": 0.8011067253874173, "grad_norm": 0.35546875, "learning_rate": 0.00022474663018410675, "loss": 0.4489, "step": 31560 }, { "epoch": 0.8012336434364331, "grad_norm": 0.357421875, "learning_rate": 0.00022471782016941102, "loss": 0.4638, "step": 31565 }, { "epoch": 0.8013605614854489, "grad_norm": 0.353515625, "learning_rate": 0.00022468900648834163, "loss": 0.4099, "step": 31570 }, { "epoch": 0.8014874795344646, "grad_norm": 0.33203125, "learning_rate": 0.00022466018914231244, "loss": 0.4678, "step": 31575 }, { "epoch": 0.8016143975834803, "grad_norm": 0.330078125, "learning_rate": 0.0002246313681327375, "loss": 0.4572, "step": 31580 }, { "epoch": 0.8017413156324961, "grad_norm": 0.330078125, "learning_rate": 0.00022460254346103108, "loss": 0.4273, "step": 31585 }, { "epoch": 0.8018682336815118, "grad_norm": 0.369140625, "learning_rate": 0.00022457371512860756, "loss": 0.4482, "step": 31590 }, { "epoch": 0.8019951517305276, "grad_norm": 0.349609375, "learning_rate": 0.00022454488313688155, "loss": 0.4467, "step": 31595 }, { "epoch": 0.8021220697795434, "grad_norm": 0.328125, "learning_rate": 0.00022451604748726777, "loss": 0.4635, "step": 31600 }, { "epoch": 0.8022489878285591, "grad_norm": 0.337890625, "learning_rate": 0.00022448720818118124, "loss": 0.4136, "step": 31605 }, { "epoch": 0.8023759058775749, "grad_norm": 0.333984375, "learning_rate": 0.0002244583652200371, "loss": 0.4794, "step": 31610 }, { "epoch": 0.8025028239265906, "grad_norm": 0.333984375, "learning_rate": 0.00022442951860525065, "loss": 0.4535, "step": 31615 }, { "epoch": 0.8026297419756063, "grad_norm": 0.3671875, "learning_rate": 0.00022440066833823733, "loss": 0.5045, "step": 31620 }, { "epoch": 0.8027566600246221, "grad_norm": 0.33984375, "learning_rate": 0.0002243718144204129, "loss": 0.465, "step": 31625 }, { "epoch": 0.8028835780736379, "grad_norm": 0.32421875, "learning_rate": 0.00022434295685319316, "loss": 0.4403, "step": 31630 }, { "epoch": 0.8030104961226536, "grad_norm": 0.328125, "learning_rate": 0.00022431409563799411, "loss": 0.4533, "step": 31635 }, { "epoch": 0.8031374141716694, "grad_norm": 0.330078125, "learning_rate": 0.00022428523077623198, "loss": 0.4551, "step": 31640 }, { "epoch": 0.803264332220685, "grad_norm": 0.326171875, "learning_rate": 0.00022425636226932317, "loss": 0.4547, "step": 31645 }, { "epoch": 0.8033912502697008, "grad_norm": 0.349609375, "learning_rate": 0.00022422749011868424, "loss": 0.4184, "step": 31650 }, { "epoch": 0.8035181683187166, "grad_norm": 0.3203125, "learning_rate": 0.00022419861432573196, "loss": 0.4451, "step": 31655 }, { "epoch": 0.8036450863677324, "grad_norm": 0.33203125, "learning_rate": 0.00022416973489188324, "loss": 0.4425, "step": 31660 }, { "epoch": 0.8037720044167481, "grad_norm": 0.33203125, "learning_rate": 0.00022414085181855518, "loss": 0.4721, "step": 31665 }, { "epoch": 0.8038989224657639, "grad_norm": 0.357421875, "learning_rate": 0.00022411196510716504, "loss": 0.4561, "step": 31670 }, { "epoch": 0.8040258405147797, "grad_norm": 0.3515625, "learning_rate": 0.00022408307475913026, "loss": 0.4506, "step": 31675 }, { "epoch": 0.8041527585637953, "grad_norm": 0.27734375, "learning_rate": 0.00022405418077586857, "loss": 0.447, "step": 31680 }, { "epoch": 0.8042796766128111, "grad_norm": 0.330078125, "learning_rate": 0.00022402528315879766, "loss": 0.4638, "step": 31685 }, { "epoch": 0.8044065946618268, "grad_norm": 0.33203125, "learning_rate": 0.00022399638190933566, "loss": 0.4694, "step": 31690 }, { "epoch": 0.8045335127108426, "grad_norm": 0.337890625, "learning_rate": 0.00022396747702890067, "loss": 0.4409, "step": 31695 }, { "epoch": 0.8046604307598584, "grad_norm": 0.34765625, "learning_rate": 0.000223938568518911, "loss": 0.4454, "step": 31700 }, { "epoch": 0.8047873488088741, "grad_norm": 0.3515625, "learning_rate": 0.00022390965638078523, "loss": 0.4601, "step": 31705 }, { "epoch": 0.8049142668578899, "grad_norm": 0.333984375, "learning_rate": 0.0002238807406159421, "loss": 0.4192, "step": 31710 }, { "epoch": 0.8050411849069056, "grad_norm": 0.373046875, "learning_rate": 0.00022385182122580045, "loss": 0.4441, "step": 31715 }, { "epoch": 0.8051681029559213, "grad_norm": 0.392578125, "learning_rate": 0.00022382289821177935, "loss": 0.4798, "step": 31720 }, { "epoch": 0.8052950210049371, "grad_norm": 0.345703125, "learning_rate": 0.000223793971575298, "loss": 0.4282, "step": 31725 }, { "epoch": 0.8054219390539529, "grad_norm": 0.345703125, "learning_rate": 0.00022376504131777587, "loss": 0.4485, "step": 31730 }, { "epoch": 0.8055488571029686, "grad_norm": 0.328125, "learning_rate": 0.00022373610744063258, "loss": 0.4391, "step": 31735 }, { "epoch": 0.8056757751519844, "grad_norm": 0.322265625, "learning_rate": 0.00022370716994528784, "loss": 0.4471, "step": 31740 }, { "epoch": 0.8058026932010001, "grad_norm": 0.33203125, "learning_rate": 0.00022367822883316167, "loss": 0.4427, "step": 31745 }, { "epoch": 0.8059296112500158, "grad_norm": 0.365234375, "learning_rate": 0.00022364928410567415, "loss": 0.458, "step": 31750 }, { "epoch": 0.8060565292990316, "grad_norm": 0.34375, "learning_rate": 0.00022362033576424557, "loss": 0.4403, "step": 31755 }, { "epoch": 0.8061834473480474, "grad_norm": 0.298828125, "learning_rate": 0.0002235913838102965, "loss": 0.4337, "step": 31760 }, { "epoch": 0.8063103653970631, "grad_norm": 0.34375, "learning_rate": 0.00022356242824524746, "loss": 0.4612, "step": 31765 }, { "epoch": 0.8064372834460789, "grad_norm": 0.3125, "learning_rate": 0.0002235334690705194, "loss": 0.4427, "step": 31770 }, { "epoch": 0.8065642014950947, "grad_norm": 0.34375, "learning_rate": 0.00022350450628753327, "loss": 0.424, "step": 31775 }, { "epoch": 0.8066911195441103, "grad_norm": 0.326171875, "learning_rate": 0.00022347553989771036, "loss": 0.4435, "step": 31780 }, { "epoch": 0.8068180375931261, "grad_norm": 0.349609375, "learning_rate": 0.00022344656990247195, "loss": 0.4729, "step": 31785 }, { "epoch": 0.8069449556421419, "grad_norm": 0.36328125, "learning_rate": 0.00022341759630323962, "loss": 0.4333, "step": 31790 }, { "epoch": 0.8070718736911576, "grad_norm": 0.3515625, "learning_rate": 0.00022338861910143509, "loss": 0.461, "step": 31795 }, { "epoch": 0.8071987917401734, "grad_norm": 0.3203125, "learning_rate": 0.00022335963829848022, "loss": 0.4537, "step": 31800 }, { "epoch": 0.8073257097891892, "grad_norm": 0.333984375, "learning_rate": 0.00022333065389579715, "loss": 0.4525, "step": 31805 }, { "epoch": 0.8074526278382048, "grad_norm": 0.3359375, "learning_rate": 0.00022330166589480807, "loss": 0.4577, "step": 31810 }, { "epoch": 0.8075795458872206, "grad_norm": 0.35546875, "learning_rate": 0.00022327267429693548, "loss": 0.4732, "step": 31815 }, { "epoch": 0.8077064639362364, "grad_norm": 0.345703125, "learning_rate": 0.00022324367910360194, "loss": 0.4584, "step": 31820 }, { "epoch": 0.8078333819852521, "grad_norm": 0.306640625, "learning_rate": 0.00022321468031623025, "loss": 0.4378, "step": 31825 }, { "epoch": 0.8079603000342679, "grad_norm": 0.349609375, "learning_rate": 0.00022318567793624337, "loss": 0.4737, "step": 31830 }, { "epoch": 0.8080872180832837, "grad_norm": 0.357421875, "learning_rate": 0.0002231566719650644, "loss": 0.4483, "step": 31835 }, { "epoch": 0.8082141361322994, "grad_norm": 0.34765625, "learning_rate": 0.00022312766240411669, "loss": 0.4594, "step": 31840 }, { "epoch": 0.8083410541813151, "grad_norm": 0.314453125, "learning_rate": 0.00022309864925482367, "loss": 0.462, "step": 31845 }, { "epoch": 0.8084679722303308, "grad_norm": 0.302734375, "learning_rate": 0.0002230696325186091, "loss": 0.4316, "step": 31850 }, { "epoch": 0.8085948902793466, "grad_norm": 0.345703125, "learning_rate": 0.00022304061219689678, "loss": 0.4449, "step": 31855 }, { "epoch": 0.8087218083283624, "grad_norm": 0.337890625, "learning_rate": 0.0002230115882911107, "loss": 0.4735, "step": 31860 }, { "epoch": 0.8088487263773781, "grad_norm": 0.330078125, "learning_rate": 0.00022298256080267504, "loss": 0.4135, "step": 31865 }, { "epoch": 0.8089756444263939, "grad_norm": 0.33984375, "learning_rate": 0.00022295352973301425, "loss": 0.4588, "step": 31870 }, { "epoch": 0.8091025624754097, "grad_norm": 0.3359375, "learning_rate": 0.0002229244950835528, "loss": 0.4333, "step": 31875 }, { "epoch": 0.8092294805244253, "grad_norm": 0.3203125, "learning_rate": 0.00022289545685571537, "loss": 0.455, "step": 31880 }, { "epoch": 0.8093563985734411, "grad_norm": 0.34765625, "learning_rate": 0.00022286641505092695, "loss": 0.458, "step": 31885 }, { "epoch": 0.8094833166224569, "grad_norm": 0.333984375, "learning_rate": 0.0002228373696706126, "loss": 0.4529, "step": 31890 }, { "epoch": 0.8096102346714726, "grad_norm": 0.361328125, "learning_rate": 0.00022280832071619745, "loss": 0.4429, "step": 31895 }, { "epoch": 0.8097371527204884, "grad_norm": 0.330078125, "learning_rate": 0.0002227792681891071, "loss": 0.4396, "step": 31900 }, { "epoch": 0.8098640707695042, "grad_norm": 0.3671875, "learning_rate": 0.00022275021209076698, "loss": 0.4539, "step": 31905 }, { "epoch": 0.8099909888185198, "grad_norm": 0.333984375, "learning_rate": 0.00022272115242260297, "loss": 0.4666, "step": 31910 }, { "epoch": 0.8101179068675356, "grad_norm": 0.3828125, "learning_rate": 0.00022269208918604096, "loss": 0.4692, "step": 31915 }, { "epoch": 0.8102448249165514, "grad_norm": 0.34765625, "learning_rate": 0.0002226630223825071, "loss": 0.444, "step": 31920 }, { "epoch": 0.8103717429655671, "grad_norm": 0.3515625, "learning_rate": 0.00022263395201342764, "loss": 0.4307, "step": 31925 }, { "epoch": 0.8104986610145829, "grad_norm": 0.330078125, "learning_rate": 0.00022260487808022913, "loss": 0.4678, "step": 31930 }, { "epoch": 0.8106255790635987, "grad_norm": 0.353515625, "learning_rate": 0.00022257580058433816, "loss": 0.5014, "step": 31935 }, { "epoch": 0.8107524971126144, "grad_norm": 0.3203125, "learning_rate": 0.0002225467195271816, "loss": 0.4237, "step": 31940 }, { "epoch": 0.8108794151616301, "grad_norm": 0.353515625, "learning_rate": 0.00022251763491018637, "loss": 0.4432, "step": 31945 }, { "epoch": 0.8110063332106459, "grad_norm": 0.349609375, "learning_rate": 0.0002224885467347797, "loss": 0.4619, "step": 31950 }, { "epoch": 0.8111332512596616, "grad_norm": 0.37109375, "learning_rate": 0.00022245945500238892, "loss": 0.4809, "step": 31955 }, { "epoch": 0.8112601693086774, "grad_norm": 0.349609375, "learning_rate": 0.0002224303597144415, "loss": 0.4702, "step": 31960 }, { "epoch": 0.8113870873576932, "grad_norm": 0.328125, "learning_rate": 0.00022240126087236521, "loss": 0.464, "step": 31965 }, { "epoch": 0.8115140054067089, "grad_norm": 0.353515625, "learning_rate": 0.0002223721584775879, "loss": 0.4493, "step": 31970 }, { "epoch": 0.8116409234557246, "grad_norm": 0.322265625, "learning_rate": 0.00022234305253153761, "loss": 0.4181, "step": 31975 }, { "epoch": 0.8117678415047404, "grad_norm": 0.4140625, "learning_rate": 0.00022231394303564255, "loss": 0.4362, "step": 31980 }, { "epoch": 0.8118947595537561, "grad_norm": 0.34375, "learning_rate": 0.0002222848299913311, "loss": 0.4315, "step": 31985 }, { "epoch": 0.8120216776027719, "grad_norm": 0.318359375, "learning_rate": 0.0002222557134000318, "loss": 0.4737, "step": 31990 }, { "epoch": 0.8121485956517877, "grad_norm": 0.34375, "learning_rate": 0.0002222265932631735, "loss": 0.4532, "step": 31995 }, { "epoch": 0.8122755137008034, "grad_norm": 0.345703125, "learning_rate": 0.00022219746958218497, "loss": 0.4766, "step": 32000 }, { "epoch": 0.8124024317498192, "grad_norm": 0.361328125, "learning_rate": 0.0002221683423584954, "loss": 0.4447, "step": 32005 }, { "epoch": 0.8125293497988348, "grad_norm": 0.3515625, "learning_rate": 0.000222139211593534, "loss": 0.4493, "step": 32010 }, { "epoch": 0.8126562678478506, "grad_norm": 0.318359375, "learning_rate": 0.00022211007728873024, "loss": 0.4535, "step": 32015 }, { "epoch": 0.8127831858968664, "grad_norm": 0.357421875, "learning_rate": 0.0002220809394455137, "loss": 0.4566, "step": 32020 }, { "epoch": 0.8129101039458821, "grad_norm": 0.35546875, "learning_rate": 0.00022205179806531418, "loss": 0.4738, "step": 32025 }, { "epoch": 0.8130370219948979, "grad_norm": 0.322265625, "learning_rate": 0.00022202265314956167, "loss": 0.4159, "step": 32030 }, { "epoch": 0.8131639400439137, "grad_norm": 0.345703125, "learning_rate": 0.0002219935046996862, "loss": 0.47, "step": 32035 }, { "epoch": 0.8132908580929294, "grad_norm": 0.35546875, "learning_rate": 0.00022196435271711819, "loss": 0.4351, "step": 32040 }, { "epoch": 0.8134177761419451, "grad_norm": 0.3359375, "learning_rate": 0.000221935197203288, "loss": 0.434, "step": 32045 }, { "epoch": 0.8135446941909609, "grad_norm": 0.365234375, "learning_rate": 0.00022190603815962635, "loss": 0.4735, "step": 32050 }, { "epoch": 0.8136716122399766, "grad_norm": 0.34375, "learning_rate": 0.00022187687558756404, "loss": 0.4624, "step": 32055 }, { "epoch": 0.8137985302889924, "grad_norm": 0.376953125, "learning_rate": 0.0002218477094885321, "loss": 0.4609, "step": 32060 }, { "epoch": 0.8139254483380082, "grad_norm": 0.33203125, "learning_rate": 0.00022181853986396173, "loss": 0.462, "step": 32065 }, { "epoch": 0.8140523663870239, "grad_norm": 0.357421875, "learning_rate": 0.00022178936671528413, "loss": 0.4526, "step": 32070 }, { "epoch": 0.8141792844360396, "grad_norm": 0.34765625, "learning_rate": 0.00022176019004393097, "loss": 0.4563, "step": 32075 }, { "epoch": 0.8143062024850554, "grad_norm": 0.34375, "learning_rate": 0.00022173100985133383, "loss": 0.4426, "step": 32080 }, { "epoch": 0.8144331205340711, "grad_norm": 0.349609375, "learning_rate": 0.0002217018261389246, "loss": 0.4461, "step": 32085 }, { "epoch": 0.8145600385830869, "grad_norm": 0.341796875, "learning_rate": 0.00022167263890813532, "loss": 0.4187, "step": 32090 }, { "epoch": 0.8146869566321027, "grad_norm": 0.376953125, "learning_rate": 0.0002216434481603982, "loss": 0.428, "step": 32095 }, { "epoch": 0.8148138746811184, "grad_norm": 0.357421875, "learning_rate": 0.00022161425389714565, "loss": 0.4461, "step": 32100 }, { "epoch": 0.8149407927301342, "grad_norm": 0.357421875, "learning_rate": 0.0002215850561198102, "loss": 0.4639, "step": 32105 }, { "epoch": 0.8150677107791499, "grad_norm": 0.357421875, "learning_rate": 0.0002215558548298245, "loss": 0.4363, "step": 32110 }, { "epoch": 0.8151946288281656, "grad_norm": 0.33984375, "learning_rate": 0.00022152665002862153, "loss": 0.47, "step": 32115 }, { "epoch": 0.8153215468771814, "grad_norm": 0.361328125, "learning_rate": 0.00022149744171763434, "loss": 0.4524, "step": 32120 }, { "epoch": 0.8154484649261972, "grad_norm": 0.37109375, "learning_rate": 0.0002214682298982962, "loss": 0.4531, "step": 32125 }, { "epoch": 0.8155753829752129, "grad_norm": 0.361328125, "learning_rate": 0.0002214390145720404, "loss": 0.4657, "step": 32130 }, { "epoch": 0.8157023010242287, "grad_norm": 0.353515625, "learning_rate": 0.00022140979574030069, "loss": 0.465, "step": 32135 }, { "epoch": 0.8158292190732445, "grad_norm": 0.34375, "learning_rate": 0.00022138057340451075, "loss": 0.4348, "step": 32140 }, { "epoch": 0.8159561371222601, "grad_norm": 0.357421875, "learning_rate": 0.00022135134756610448, "loss": 0.449, "step": 32145 }, { "epoch": 0.8160830551712759, "grad_norm": 0.384765625, "learning_rate": 0.000221322118226516, "loss": 0.4447, "step": 32150 }, { "epoch": 0.8162099732202917, "grad_norm": 0.359375, "learning_rate": 0.0002212928853871796, "loss": 0.4524, "step": 32155 }, { "epoch": 0.8163368912693074, "grad_norm": 0.37109375, "learning_rate": 0.00022126364904952972, "loss": 0.499, "step": 32160 }, { "epoch": 0.8164638093183232, "grad_norm": 0.337890625, "learning_rate": 0.00022123440921500097, "loss": 0.4243, "step": 32165 }, { "epoch": 0.816590727367339, "grad_norm": 0.3359375, "learning_rate": 0.0002212051658850281, "loss": 0.4524, "step": 32170 }, { "epoch": 0.8167176454163546, "grad_norm": 0.33203125, "learning_rate": 0.00022117591906104618, "loss": 0.4677, "step": 32175 }, { "epoch": 0.8168445634653704, "grad_norm": 0.359375, "learning_rate": 0.00022114666874449015, "loss": 0.4451, "step": 32180 }, { "epoch": 0.8169714815143861, "grad_norm": 0.326171875, "learning_rate": 0.0002211174149367955, "loss": 0.4087, "step": 32185 }, { "epoch": 0.8170983995634019, "grad_norm": 0.34765625, "learning_rate": 0.00022108815763939764, "loss": 0.4282, "step": 32190 }, { "epoch": 0.8172253176124177, "grad_norm": 0.330078125, "learning_rate": 0.00022105889685373217, "loss": 0.4553, "step": 32195 }, { "epoch": 0.8173522356614334, "grad_norm": 0.3359375, "learning_rate": 0.00022102963258123498, "loss": 0.4521, "step": 32200 }, { "epoch": 0.8174791537104492, "grad_norm": 0.36328125, "learning_rate": 0.00022100036482334198, "loss": 0.4511, "step": 32205 }, { "epoch": 0.8176060717594649, "grad_norm": 0.333984375, "learning_rate": 0.00022097109358148936, "loss": 0.4397, "step": 32210 }, { "epoch": 0.8177329898084806, "grad_norm": 0.349609375, "learning_rate": 0.0002209418188571134, "loss": 0.4339, "step": 32215 }, { "epoch": 0.8178599078574964, "grad_norm": 0.341796875, "learning_rate": 0.00022091254065165073, "loss": 0.4534, "step": 32220 }, { "epoch": 0.8179868259065122, "grad_norm": 0.3359375, "learning_rate": 0.0002208832589665379, "loss": 0.4667, "step": 32225 }, { "epoch": 0.8181137439555279, "grad_norm": 0.3515625, "learning_rate": 0.00022085397380321178, "loss": 0.4682, "step": 32230 }, { "epoch": 0.8182406620045437, "grad_norm": 0.337890625, "learning_rate": 0.00022082468516310946, "loss": 0.4549, "step": 32235 }, { "epoch": 0.8183675800535594, "grad_norm": 0.349609375, "learning_rate": 0.00022079539304766798, "loss": 0.4412, "step": 32240 }, { "epoch": 0.8184944981025751, "grad_norm": 0.34765625, "learning_rate": 0.00022076609745832476, "loss": 0.4403, "step": 32245 }, { "epoch": 0.8186214161515909, "grad_norm": 0.318359375, "learning_rate": 0.00022073679839651735, "loss": 0.4484, "step": 32250 }, { "epoch": 0.8187483342006067, "grad_norm": 0.341796875, "learning_rate": 0.00022070749586368342, "loss": 0.4581, "step": 32255 }, { "epoch": 0.8188752522496224, "grad_norm": 0.34765625, "learning_rate": 0.00022067818986126083, "loss": 0.4505, "step": 32260 }, { "epoch": 0.8190021702986382, "grad_norm": 0.337890625, "learning_rate": 0.0002206488803906876, "loss": 0.4337, "step": 32265 }, { "epoch": 0.819129088347654, "grad_norm": 0.34375, "learning_rate": 0.000220619567453402, "loss": 0.4231, "step": 32270 }, { "epoch": 0.8192560063966696, "grad_norm": 0.349609375, "learning_rate": 0.00022059025105084228, "loss": 0.4308, "step": 32275 }, { "epoch": 0.8193829244456854, "grad_norm": 0.27734375, "learning_rate": 0.0002205609311844471, "loss": 0.4365, "step": 32280 }, { "epoch": 0.8195098424947012, "grad_norm": 0.341796875, "learning_rate": 0.00022053160785565515, "loss": 0.4183, "step": 32285 }, { "epoch": 0.8196367605437169, "grad_norm": 0.33984375, "learning_rate": 0.00022050228106590527, "loss": 0.4365, "step": 32290 }, { "epoch": 0.8197636785927327, "grad_norm": 0.314453125, "learning_rate": 0.0002204729508166366, "loss": 0.4455, "step": 32295 }, { "epoch": 0.8198905966417485, "grad_norm": 0.375, "learning_rate": 0.00022044361710928825, "loss": 0.4458, "step": 32300 }, { "epoch": 0.8200175146907642, "grad_norm": 0.361328125, "learning_rate": 0.00022041427994529966, "loss": 0.4437, "step": 32305 }, { "epoch": 0.8201444327397799, "grad_norm": 0.34765625, "learning_rate": 0.00022038493932611042, "loss": 0.4633, "step": 32310 }, { "epoch": 0.8202713507887957, "grad_norm": 0.310546875, "learning_rate": 0.00022035559525316025, "loss": 0.4598, "step": 32315 }, { "epoch": 0.8203982688378114, "grad_norm": 0.33984375, "learning_rate": 0.00022032624772788908, "loss": 0.4501, "step": 32320 }, { "epoch": 0.8205251868868272, "grad_norm": 0.330078125, "learning_rate": 0.00022029689675173692, "loss": 0.437, "step": 32325 }, { "epoch": 0.820652104935843, "grad_norm": 0.349609375, "learning_rate": 0.00022026754232614402, "loss": 0.4447, "step": 32330 }, { "epoch": 0.8207790229848587, "grad_norm": 0.330078125, "learning_rate": 0.0002202381844525508, "loss": 0.4398, "step": 32335 }, { "epoch": 0.8209059410338744, "grad_norm": 0.3671875, "learning_rate": 0.00022020882313239783, "loss": 0.4478, "step": 32340 }, { "epoch": 0.8210328590828901, "grad_norm": 0.345703125, "learning_rate": 0.00022017945836712597, "loss": 0.4479, "step": 32345 }, { "epoch": 0.8211597771319059, "grad_norm": 0.306640625, "learning_rate": 0.00022015009015817597, "loss": 0.4331, "step": 32350 }, { "epoch": 0.8212866951809217, "grad_norm": 0.361328125, "learning_rate": 0.000220120718506989, "loss": 0.4598, "step": 32355 }, { "epoch": 0.8214136132299374, "grad_norm": 0.337890625, "learning_rate": 0.00022009134341500632, "loss": 0.4487, "step": 32360 }, { "epoch": 0.8215405312789532, "grad_norm": 0.34375, "learning_rate": 0.00022006196488366937, "loss": 0.4853, "step": 32365 }, { "epoch": 0.821667449327969, "grad_norm": 0.34375, "learning_rate": 0.0002200325829144196, "loss": 0.4848, "step": 32370 }, { "epoch": 0.8217943673769846, "grad_norm": 0.34375, "learning_rate": 0.00022000319750869896, "loss": 0.449, "step": 32375 }, { "epoch": 0.8219212854260004, "grad_norm": 0.31640625, "learning_rate": 0.0002199738086679493, "loss": 0.4329, "step": 32380 }, { "epoch": 0.8220482034750162, "grad_norm": 0.333984375, "learning_rate": 0.00021994441639361275, "loss": 0.4393, "step": 32385 }, { "epoch": 0.8221751215240319, "grad_norm": 0.330078125, "learning_rate": 0.0002199150206871315, "loss": 0.4408, "step": 32390 }, { "epoch": 0.8223020395730477, "grad_norm": 0.333984375, "learning_rate": 0.00021988562154994808, "loss": 0.4288, "step": 32395 }, { "epoch": 0.8224289576220635, "grad_norm": 0.314453125, "learning_rate": 0.00021985621898350498, "loss": 0.4315, "step": 32400 }, { "epoch": 0.8225558756710792, "grad_norm": 0.35546875, "learning_rate": 0.0002198268129892451, "loss": 0.4463, "step": 32405 }, { "epoch": 0.8226827937200949, "grad_norm": 0.34765625, "learning_rate": 0.00021979740356861132, "loss": 0.4543, "step": 32410 }, { "epoch": 0.8228097117691107, "grad_norm": 0.3515625, "learning_rate": 0.00021976799072304673, "loss": 0.4415, "step": 32415 }, { "epoch": 0.8229366298181264, "grad_norm": 0.365234375, "learning_rate": 0.00021973857445399463, "loss": 0.4379, "step": 32420 }, { "epoch": 0.8230635478671422, "grad_norm": 0.357421875, "learning_rate": 0.00021970915476289847, "loss": 0.4615, "step": 32425 }, { "epoch": 0.823190465916158, "grad_norm": 0.337890625, "learning_rate": 0.0002196797316512018, "loss": 0.4488, "step": 32430 }, { "epoch": 0.8233173839651737, "grad_norm": 0.341796875, "learning_rate": 0.00021965030512034856, "loss": 0.4537, "step": 32435 }, { "epoch": 0.8234443020141894, "grad_norm": 0.375, "learning_rate": 0.00021962087517178253, "loss": 0.4771, "step": 32440 }, { "epoch": 0.8235712200632052, "grad_norm": 0.337890625, "learning_rate": 0.00021959144180694787, "loss": 0.4194, "step": 32445 }, { "epoch": 0.8236981381122209, "grad_norm": 0.34765625, "learning_rate": 0.00021956200502728894, "loss": 0.4586, "step": 32450 }, { "epoch": 0.8238250561612367, "grad_norm": 0.376953125, "learning_rate": 0.00021953256483425008, "loss": 0.4456, "step": 32455 }, { "epoch": 0.8239519742102525, "grad_norm": 0.3515625, "learning_rate": 0.00021950312122927596, "loss": 0.4383, "step": 32460 }, { "epoch": 0.8240788922592682, "grad_norm": 0.34765625, "learning_rate": 0.00021947367421381138, "loss": 0.4896, "step": 32465 }, { "epoch": 0.824205810308284, "grad_norm": 0.3359375, "learning_rate": 0.0002194442237893013, "loss": 0.4745, "step": 32470 }, { "epoch": 0.8243327283572996, "grad_norm": 0.341796875, "learning_rate": 0.00021941476995719083, "loss": 0.4429, "step": 32475 }, { "epoch": 0.8244596464063154, "grad_norm": 0.3359375, "learning_rate": 0.00021938531271892522, "loss": 0.4392, "step": 32480 }, { "epoch": 0.8245865644553312, "grad_norm": 0.341796875, "learning_rate": 0.00021935585207594996, "loss": 0.4332, "step": 32485 }, { "epoch": 0.824713482504347, "grad_norm": 0.328125, "learning_rate": 0.00021932638802971065, "loss": 0.4334, "step": 32490 }, { "epoch": 0.8248404005533627, "grad_norm": 0.35546875, "learning_rate": 0.00021929692058165313, "loss": 0.4468, "step": 32495 }, { "epoch": 0.8249673186023785, "grad_norm": 0.341796875, "learning_rate": 0.00021926744973322325, "loss": 0.4687, "step": 32500 }, { "epoch": 0.8250942366513941, "grad_norm": 0.359375, "learning_rate": 0.00021923797548586727, "loss": 0.4388, "step": 32505 }, { "epoch": 0.8252211547004099, "grad_norm": 0.349609375, "learning_rate": 0.0002192084978410314, "loss": 0.448, "step": 32510 }, { "epoch": 0.8253480727494257, "grad_norm": 0.345703125, "learning_rate": 0.00021917901680016216, "loss": 0.458, "step": 32515 }, { "epoch": 0.8254749907984414, "grad_norm": 0.34375, "learning_rate": 0.00021914953236470603, "loss": 0.4529, "step": 32520 }, { "epoch": 0.8256019088474572, "grad_norm": 0.32421875, "learning_rate": 0.00021912004453610994, "loss": 0.451, "step": 32525 }, { "epoch": 0.825728826896473, "grad_norm": 0.30859375, "learning_rate": 0.0002190905533158208, "loss": 0.4549, "step": 32530 }, { "epoch": 0.8258557449454887, "grad_norm": 0.361328125, "learning_rate": 0.00021906105870528572, "loss": 0.4739, "step": 32535 }, { "epoch": 0.8259826629945044, "grad_norm": 0.34765625, "learning_rate": 0.00021903156070595198, "loss": 0.4497, "step": 32540 }, { "epoch": 0.8261095810435202, "grad_norm": 0.35546875, "learning_rate": 0.0002190020593192671, "loss": 0.4397, "step": 32545 }, { "epoch": 0.8262364990925359, "grad_norm": 0.3203125, "learning_rate": 0.0002189725545466786, "loss": 0.4608, "step": 32550 }, { "epoch": 0.8263634171415517, "grad_norm": 0.373046875, "learning_rate": 0.00021894304638963436, "loss": 0.4715, "step": 32555 }, { "epoch": 0.8264903351905675, "grad_norm": 0.322265625, "learning_rate": 0.0002189135348495823, "loss": 0.4253, "step": 32560 }, { "epoch": 0.8266172532395832, "grad_norm": 0.326171875, "learning_rate": 0.00021888401992797052, "loss": 0.4367, "step": 32565 }, { "epoch": 0.826744171288599, "grad_norm": 0.34765625, "learning_rate": 0.00021885450162624734, "loss": 0.4435, "step": 32570 }, { "epoch": 0.8268710893376147, "grad_norm": 0.328125, "learning_rate": 0.00021882497994586114, "loss": 0.4525, "step": 32575 }, { "epoch": 0.8269980073866304, "grad_norm": 0.6015625, "learning_rate": 0.0002187954548882606, "loss": 0.4652, "step": 32580 }, { "epoch": 0.8271249254356462, "grad_norm": 0.3671875, "learning_rate": 0.00021876592645489457, "loss": 0.448, "step": 32585 }, { "epoch": 0.827251843484662, "grad_norm": 0.32421875, "learning_rate": 0.00021873639464721186, "loss": 0.4474, "step": 32590 }, { "epoch": 0.8273787615336777, "grad_norm": 0.35546875, "learning_rate": 0.00021870685946666165, "loss": 0.4203, "step": 32595 }, { "epoch": 0.8275056795826935, "grad_norm": 0.330078125, "learning_rate": 0.00021867732091469325, "loss": 0.4468, "step": 32600 }, { "epoch": 0.8276325976317092, "grad_norm": 0.359375, "learning_rate": 0.00021864777899275603, "loss": 0.4551, "step": 32605 }, { "epoch": 0.8277595156807249, "grad_norm": 0.33984375, "learning_rate": 0.00021861823370229969, "loss": 0.4604, "step": 32610 }, { "epoch": 0.8278864337297407, "grad_norm": 0.337890625, "learning_rate": 0.0002185886850447739, "loss": 0.4458, "step": 32615 }, { "epoch": 0.8280133517787565, "grad_norm": 0.326171875, "learning_rate": 0.00021855913302162863, "loss": 0.4455, "step": 32620 }, { "epoch": 0.8281402698277722, "grad_norm": 0.353515625, "learning_rate": 0.00021852957763431407, "loss": 0.4526, "step": 32625 }, { "epoch": 0.828267187876788, "grad_norm": 0.353515625, "learning_rate": 0.00021850001888428045, "loss": 0.4538, "step": 32630 }, { "epoch": 0.8283941059258038, "grad_norm": 0.359375, "learning_rate": 0.00021847045677297815, "loss": 0.4646, "step": 32635 }, { "epoch": 0.8285210239748194, "grad_norm": 0.3359375, "learning_rate": 0.00021844089130185782, "loss": 0.4592, "step": 32640 }, { "epoch": 0.8286479420238352, "grad_norm": 0.3359375, "learning_rate": 0.0002184113224723702, "loss": 0.4372, "step": 32645 }, { "epoch": 0.828774860072851, "grad_norm": 0.359375, "learning_rate": 0.00021838175028596627, "loss": 0.4616, "step": 32650 }, { "epoch": 0.8289017781218667, "grad_norm": 0.369140625, "learning_rate": 0.0002183521747440971, "loss": 0.4609, "step": 32655 }, { "epoch": 0.8290286961708825, "grad_norm": 0.3515625, "learning_rate": 0.0002183225958482139, "loss": 0.4511, "step": 32660 }, { "epoch": 0.8291556142198983, "grad_norm": 0.330078125, "learning_rate": 0.00021829301359976814, "loss": 0.4281, "step": 32665 }, { "epoch": 0.8292825322689139, "grad_norm": 0.349609375, "learning_rate": 0.00021826342800021145, "loss": 0.4745, "step": 32670 }, { "epoch": 0.8294094503179297, "grad_norm": 0.34375, "learning_rate": 0.0002182338390509955, "loss": 0.4483, "step": 32675 }, { "epoch": 0.8295363683669454, "grad_norm": 0.333984375, "learning_rate": 0.00021820424675357227, "loss": 0.408, "step": 32680 }, { "epoch": 0.8296632864159612, "grad_norm": 0.328125, "learning_rate": 0.0002181746511093938, "loss": 0.4785, "step": 32685 }, { "epoch": 0.829790204464977, "grad_norm": 0.388671875, "learning_rate": 0.00021814505211991238, "loss": 0.4543, "step": 32690 }, { "epoch": 0.8299171225139927, "grad_norm": 0.33984375, "learning_rate": 0.00021811544978658038, "loss": 0.4116, "step": 32695 }, { "epoch": 0.8300440405630085, "grad_norm": 0.33984375, "learning_rate": 0.0002180858441108504, "loss": 0.4419, "step": 32700 }, { "epoch": 0.8301709586120242, "grad_norm": 0.34765625, "learning_rate": 0.0002180562350941752, "loss": 0.4433, "step": 32705 }, { "epoch": 0.8302978766610399, "grad_norm": 0.341796875, "learning_rate": 0.0002180266227380076, "loss": 0.4532, "step": 32710 }, { "epoch": 0.8304247947100557, "grad_norm": 0.3515625, "learning_rate": 0.00021799700704380073, "loss": 0.4548, "step": 32715 }, { "epoch": 0.8305517127590715, "grad_norm": 0.34765625, "learning_rate": 0.00021796738801300783, "loss": 0.4457, "step": 32720 }, { "epoch": 0.8306786308080872, "grad_norm": 0.345703125, "learning_rate": 0.00021793776564708227, "loss": 0.4186, "step": 32725 }, { "epoch": 0.830805548857103, "grad_norm": 0.33984375, "learning_rate": 0.00021790813994747763, "loss": 0.4641, "step": 32730 }, { "epoch": 0.8309324669061188, "grad_norm": 0.3125, "learning_rate": 0.00021787851091564762, "loss": 0.4231, "step": 32735 }, { "epoch": 0.8310593849551344, "grad_norm": 0.3359375, "learning_rate": 0.00021784887855304607, "loss": 0.4577, "step": 32740 }, { "epoch": 0.8311863030041502, "grad_norm": 0.369140625, "learning_rate": 0.00021781924286112705, "loss": 0.4603, "step": 32745 }, { "epoch": 0.831313221053166, "grad_norm": 0.353515625, "learning_rate": 0.00021778960384134487, "loss": 0.4346, "step": 32750 }, { "epoch": 0.8314401391021817, "grad_norm": 0.3125, "learning_rate": 0.0002177599614951538, "loss": 0.4665, "step": 32755 }, { "epoch": 0.8315670571511975, "grad_norm": 0.333984375, "learning_rate": 0.00021773031582400846, "loss": 0.4503, "step": 32760 }, { "epoch": 0.8316939752002133, "grad_norm": 0.33984375, "learning_rate": 0.00021770066682936343, "loss": 0.4476, "step": 32765 }, { "epoch": 0.8318208932492289, "grad_norm": 0.33984375, "learning_rate": 0.00021767101451267373, "loss": 0.4654, "step": 32770 }, { "epoch": 0.8319478112982447, "grad_norm": 0.34375, "learning_rate": 0.0002176413588753942, "loss": 0.45, "step": 32775 }, { "epoch": 0.8320747293472605, "grad_norm": 0.33984375, "learning_rate": 0.0002176116999189802, "loss": 0.4744, "step": 32780 }, { "epoch": 0.8322016473962762, "grad_norm": 0.298828125, "learning_rate": 0.00021758203764488698, "loss": 0.4344, "step": 32785 }, { "epoch": 0.832328565445292, "grad_norm": 0.33984375, "learning_rate": 0.00021755237205457013, "loss": 0.4287, "step": 32790 }, { "epoch": 0.8324554834943078, "grad_norm": 0.330078125, "learning_rate": 0.0002175227031494853, "loss": 0.4498, "step": 32795 }, { "epoch": 0.8325824015433235, "grad_norm": 0.34765625, "learning_rate": 0.0002174930309310883, "loss": 0.4521, "step": 32800 }, { "epoch": 0.8327093195923392, "grad_norm": 0.326171875, "learning_rate": 0.0002174633554008352, "loss": 0.4453, "step": 32805 }, { "epoch": 0.832836237641355, "grad_norm": 0.419921875, "learning_rate": 0.00021743367656018204, "loss": 0.4393, "step": 32810 }, { "epoch": 0.8329631556903707, "grad_norm": 0.330078125, "learning_rate": 0.00021740399441058528, "loss": 0.4489, "step": 32815 }, { "epoch": 0.8330900737393865, "grad_norm": 0.369140625, "learning_rate": 0.00021737430895350137, "loss": 0.4473, "step": 32820 }, { "epoch": 0.8332169917884023, "grad_norm": 0.34765625, "learning_rate": 0.00021734462019038693, "loss": 0.4305, "step": 32825 }, { "epoch": 0.833343909837418, "grad_norm": 0.3515625, "learning_rate": 0.00021731492812269884, "loss": 0.4265, "step": 32830 }, { "epoch": 0.8334708278864338, "grad_norm": 0.33984375, "learning_rate": 0.00021728523275189402, "loss": 0.4846, "step": 32835 }, { "epoch": 0.8335977459354494, "grad_norm": 0.322265625, "learning_rate": 0.00021725553407942963, "loss": 0.4447, "step": 32840 }, { "epoch": 0.8337246639844652, "grad_norm": 0.330078125, "learning_rate": 0.000217225832106763, "loss": 0.4561, "step": 32845 }, { "epoch": 0.833851582033481, "grad_norm": 0.32421875, "learning_rate": 0.00021719612683535153, "loss": 0.4369, "step": 32850 }, { "epoch": 0.8339785000824967, "grad_norm": 0.337890625, "learning_rate": 0.00021716641826665292, "loss": 0.4343, "step": 32855 }, { "epoch": 0.8341054181315125, "grad_norm": 0.341796875, "learning_rate": 0.00021713670640212488, "loss": 0.4673, "step": 32860 }, { "epoch": 0.8342323361805283, "grad_norm": 0.36328125, "learning_rate": 0.00021710699124322542, "loss": 0.4739, "step": 32865 }, { "epoch": 0.8343592542295439, "grad_norm": 0.32421875, "learning_rate": 0.00021707727279141265, "loss": 0.4455, "step": 32870 }, { "epoch": 0.8344861722785597, "grad_norm": 0.369140625, "learning_rate": 0.00021704755104814477, "loss": 0.4398, "step": 32875 }, { "epoch": 0.8346130903275755, "grad_norm": 0.380859375, "learning_rate": 0.00021701782601488034, "loss": 0.4521, "step": 32880 }, { "epoch": 0.8347400083765912, "grad_norm": 0.341796875, "learning_rate": 0.00021698809769307785, "loss": 0.4557, "step": 32885 }, { "epoch": 0.834866926425607, "grad_norm": 0.35546875, "learning_rate": 0.00021695836608419608, "loss": 0.4689, "step": 32890 }, { "epoch": 0.8349938444746228, "grad_norm": 0.322265625, "learning_rate": 0.00021692863118969396, "loss": 0.4235, "step": 32895 }, { "epoch": 0.8351207625236385, "grad_norm": 0.365234375, "learning_rate": 0.00021689889301103058, "loss": 0.4704, "step": 32900 }, { "epoch": 0.8352476805726542, "grad_norm": 0.357421875, "learning_rate": 0.0002168691515496651, "loss": 0.4351, "step": 32905 }, { "epoch": 0.83537459862167, "grad_norm": 0.35546875, "learning_rate": 0.0002168394068070571, "loss": 0.4389, "step": 32910 }, { "epoch": 0.8355015166706857, "grad_norm": 0.349609375, "learning_rate": 0.000216809658784666, "loss": 0.4399, "step": 32915 }, { "epoch": 0.8356284347197015, "grad_norm": 0.34765625, "learning_rate": 0.00021677990748395154, "loss": 0.4522, "step": 32920 }, { "epoch": 0.8357553527687173, "grad_norm": 0.365234375, "learning_rate": 0.0002167501529063736, "loss": 0.4618, "step": 32925 }, { "epoch": 0.835882270817733, "grad_norm": 0.34375, "learning_rate": 0.0002167203950533923, "loss": 0.4495, "step": 32930 }, { "epoch": 0.8360091888667487, "grad_norm": 0.33203125, "learning_rate": 0.00021669063392646776, "loss": 0.4176, "step": 32935 }, { "epoch": 0.8361361069157645, "grad_norm": 0.333984375, "learning_rate": 0.00021666086952706038, "loss": 0.4326, "step": 32940 }, { "epoch": 0.8362630249647802, "grad_norm": 0.357421875, "learning_rate": 0.00021663110185663064, "loss": 0.437, "step": 32945 }, { "epoch": 0.836389943013796, "grad_norm": 0.34375, "learning_rate": 0.00021660133091663933, "loss": 0.4709, "step": 32950 }, { "epoch": 0.8365168610628118, "grad_norm": 0.34375, "learning_rate": 0.00021657155670854725, "loss": 0.4377, "step": 32955 }, { "epoch": 0.8366437791118275, "grad_norm": 0.328125, "learning_rate": 0.00021654177923381538, "loss": 0.4533, "step": 32960 }, { "epoch": 0.8367706971608433, "grad_norm": 0.3359375, "learning_rate": 0.00021651199849390495, "loss": 0.4526, "step": 32965 }, { "epoch": 0.836897615209859, "grad_norm": 0.34765625, "learning_rate": 0.00021648221449027718, "loss": 0.4582, "step": 32970 }, { "epoch": 0.8370245332588747, "grad_norm": 0.34375, "learning_rate": 0.00021645242722439367, "loss": 0.4237, "step": 32975 }, { "epoch": 0.8371514513078905, "grad_norm": 0.33203125, "learning_rate": 0.00021642263669771603, "loss": 0.421, "step": 32980 }, { "epoch": 0.8372783693569062, "grad_norm": 0.29296875, "learning_rate": 0.00021639284291170602, "loss": 0.3871, "step": 32985 }, { "epoch": 0.837405287405922, "grad_norm": 0.287109375, "learning_rate": 0.0002163630458678257, "loss": 0.4319, "step": 32990 }, { "epoch": 0.8375322054549378, "grad_norm": 0.322265625, "learning_rate": 0.00021633324556753712, "loss": 0.438, "step": 32995 }, { "epoch": 0.8376591235039536, "grad_norm": 0.345703125, "learning_rate": 0.00021630344201230262, "loss": 0.4369, "step": 33000 }, { "epoch": 0.8377860415529692, "grad_norm": 0.328125, "learning_rate": 0.00021627363520358462, "loss": 0.4319, "step": 33005 }, { "epoch": 0.837912959601985, "grad_norm": 0.40625, "learning_rate": 0.00021624382514284578, "loss": 0.4688, "step": 33010 }, { "epoch": 0.8380398776510007, "grad_norm": 0.3359375, "learning_rate": 0.00021621401183154882, "loss": 0.4475, "step": 33015 }, { "epoch": 0.8381667957000165, "grad_norm": 0.33984375, "learning_rate": 0.00021618419527115663, "loss": 0.4223, "step": 33020 }, { "epoch": 0.8382937137490323, "grad_norm": 0.333984375, "learning_rate": 0.00021615437546313238, "loss": 0.4598, "step": 33025 }, { "epoch": 0.838420631798048, "grad_norm": 0.4453125, "learning_rate": 0.00021612455240893925, "loss": 0.4584, "step": 33030 }, { "epoch": 0.8385475498470637, "grad_norm": 0.341796875, "learning_rate": 0.00021609472611004067, "loss": 0.4725, "step": 33035 }, { "epoch": 0.8386744678960795, "grad_norm": 0.326171875, "learning_rate": 0.00021606489656790024, "loss": 0.4307, "step": 33040 }, { "epoch": 0.8388013859450952, "grad_norm": 0.333984375, "learning_rate": 0.00021603506378398168, "loss": 0.4227, "step": 33045 }, { "epoch": 0.838928303994111, "grad_norm": 0.380859375, "learning_rate": 0.0002160052277597488, "loss": 0.4592, "step": 33050 }, { "epoch": 0.8390552220431268, "grad_norm": 0.37890625, "learning_rate": 0.00021597538849666572, "loss": 0.4403, "step": 33055 }, { "epoch": 0.8391821400921425, "grad_norm": 0.34765625, "learning_rate": 0.00021594554599619654, "loss": 0.4647, "step": 33060 }, { "epoch": 0.8393090581411583, "grad_norm": 0.3515625, "learning_rate": 0.0002159157002598057, "loss": 0.4674, "step": 33065 }, { "epoch": 0.839435976190174, "grad_norm": 0.361328125, "learning_rate": 0.00021588585128895775, "loss": 0.4663, "step": 33070 }, { "epoch": 0.8395628942391897, "grad_norm": 0.3515625, "learning_rate": 0.0002158559990851173, "loss": 0.4617, "step": 33075 }, { "epoch": 0.8396898122882055, "grad_norm": 0.341796875, "learning_rate": 0.00021582614364974923, "loss": 0.4819, "step": 33080 }, { "epoch": 0.8398167303372213, "grad_norm": 0.87890625, "learning_rate": 0.0002157962849843185, "loss": 0.395, "step": 33085 }, { "epoch": 0.839943648386237, "grad_norm": 0.310546875, "learning_rate": 0.00021576642309029028, "loss": 0.4003, "step": 33090 }, { "epoch": 0.8400705664352528, "grad_norm": 0.34765625, "learning_rate": 0.00021573655796912984, "loss": 0.4846, "step": 33095 }, { "epoch": 0.8401974844842685, "grad_norm": 0.3203125, "learning_rate": 0.0002157066896223027, "loss": 0.4505, "step": 33100 }, { "epoch": 0.8403244025332842, "grad_norm": 0.3359375, "learning_rate": 0.00021567681805127447, "loss": 0.4634, "step": 33105 }, { "epoch": 0.8404513205823, "grad_norm": 0.361328125, "learning_rate": 0.00021564694325751095, "loss": 0.4478, "step": 33110 }, { "epoch": 0.8405782386313158, "grad_norm": 0.326171875, "learning_rate": 0.00021561706524247812, "loss": 0.4531, "step": 33115 }, { "epoch": 0.8407051566803315, "grad_norm": 0.341796875, "learning_rate": 0.000215587184007642, "loss": 0.4585, "step": 33120 }, { "epoch": 0.8408320747293473, "grad_norm": 0.32421875, "learning_rate": 0.00021555729955446886, "loss": 0.7461, "step": 33125 }, { "epoch": 0.8409589927783631, "grad_norm": 0.330078125, "learning_rate": 0.00021552741188442517, "loss": 0.4183, "step": 33130 }, { "epoch": 0.8410859108273787, "grad_norm": 0.34375, "learning_rate": 0.0002154975209989775, "loss": 0.4429, "step": 33135 }, { "epoch": 0.8412128288763945, "grad_norm": 0.34765625, "learning_rate": 0.00021546762689959254, "loss": 0.457, "step": 33140 }, { "epoch": 0.8413397469254102, "grad_norm": 0.341796875, "learning_rate": 0.00021543772958773717, "loss": 0.4311, "step": 33145 }, { "epoch": 0.841466664974426, "grad_norm": 0.359375, "learning_rate": 0.00021540782906487852, "loss": 0.4537, "step": 33150 }, { "epoch": 0.8415935830234418, "grad_norm": 0.353515625, "learning_rate": 0.00021537792533248375, "loss": 0.4671, "step": 33155 }, { "epoch": 0.8417205010724575, "grad_norm": 0.36328125, "learning_rate": 0.00021534801839202025, "loss": 0.4894, "step": 33160 }, { "epoch": 0.8418474191214733, "grad_norm": 0.34375, "learning_rate": 0.0002153181082449555, "loss": 0.4547, "step": 33165 }, { "epoch": 0.841974337170489, "grad_norm": 0.310546875, "learning_rate": 0.00021528819489275718, "loss": 0.4257, "step": 33170 }, { "epoch": 0.8421012552195047, "grad_norm": 0.33984375, "learning_rate": 0.00021525827833689318, "loss": 0.4244, "step": 33175 }, { "epoch": 0.8422281732685205, "grad_norm": 0.3359375, "learning_rate": 0.0002152283585788314, "loss": 0.4392, "step": 33180 }, { "epoch": 0.8423550913175363, "grad_norm": 0.357421875, "learning_rate": 0.0002151984356200401, "loss": 0.4645, "step": 33185 }, { "epoch": 0.842482009366552, "grad_norm": 0.333984375, "learning_rate": 0.00021516850946198747, "loss": 0.437, "step": 33190 }, { "epoch": 0.8426089274155678, "grad_norm": 0.34375, "learning_rate": 0.0002151385801061421, "loss": 0.4259, "step": 33195 }, { "epoch": 0.8427358454645835, "grad_norm": 0.33984375, "learning_rate": 0.00021510864755397254, "loss": 0.4764, "step": 33200 }, { "epoch": 0.8428627635135992, "grad_norm": 0.3046875, "learning_rate": 0.0002150787118069476, "loss": 0.4321, "step": 33205 }, { "epoch": 0.842989681562615, "grad_norm": 0.33203125, "learning_rate": 0.00021504877286653617, "loss": 0.4466, "step": 33210 }, { "epoch": 0.8431165996116308, "grad_norm": 0.318359375, "learning_rate": 0.00021501883073420737, "loss": 0.4366, "step": 33215 }, { "epoch": 0.8432435176606465, "grad_norm": 0.35546875, "learning_rate": 0.00021498888541143042, "loss": 0.4542, "step": 33220 }, { "epoch": 0.8433704357096623, "grad_norm": 0.333984375, "learning_rate": 0.0002149589368996748, "loss": 0.4608, "step": 33225 }, { "epoch": 0.8434973537586781, "grad_norm": 0.361328125, "learning_rate": 0.00021492898520040997, "loss": 0.4785, "step": 33230 }, { "epoch": 0.8436242718076937, "grad_norm": 0.34765625, "learning_rate": 0.0002148990303151057, "loss": 0.4769, "step": 33235 }, { "epoch": 0.8437511898567095, "grad_norm": 0.330078125, "learning_rate": 0.00021486907224523192, "loss": 0.4279, "step": 33240 }, { "epoch": 0.8438781079057253, "grad_norm": 0.330078125, "learning_rate": 0.00021483911099225858, "loss": 0.4365, "step": 33245 }, { "epoch": 0.844005025954741, "grad_norm": 0.345703125, "learning_rate": 0.00021480914655765587, "loss": 0.4462, "step": 33250 }, { "epoch": 0.8441319440037568, "grad_norm": 0.359375, "learning_rate": 0.00021477917894289418, "loss": 0.4586, "step": 33255 }, { "epoch": 0.8442588620527726, "grad_norm": 0.37890625, "learning_rate": 0.000214749208149444, "loss": 0.4475, "step": 33260 }, { "epoch": 0.8443857801017883, "grad_norm": 0.3359375, "learning_rate": 0.00021471923417877591, "loss": 0.4541, "step": 33265 }, { "epoch": 0.844512698150804, "grad_norm": 0.337890625, "learning_rate": 0.0002146892570323608, "loss": 0.4175, "step": 33270 }, { "epoch": 0.8446396161998198, "grad_norm": 0.361328125, "learning_rate": 0.00021465927671166966, "loss": 0.4697, "step": 33275 }, { "epoch": 0.8447665342488355, "grad_norm": 0.30859375, "learning_rate": 0.00021462929321817352, "loss": 0.4557, "step": 33280 }, { "epoch": 0.8448934522978513, "grad_norm": 0.318359375, "learning_rate": 0.00021459930655334374, "loss": 0.4558, "step": 33285 }, { "epoch": 0.8450203703468671, "grad_norm": 0.33984375, "learning_rate": 0.0002145693167186517, "loss": 0.4305, "step": 33290 }, { "epoch": 0.8451472883958828, "grad_norm": 0.34765625, "learning_rate": 0.000214539323715569, "loss": 0.4544, "step": 33295 }, { "epoch": 0.8452742064448985, "grad_norm": 0.353515625, "learning_rate": 0.00021450932754556743, "loss": 0.4631, "step": 33300 }, { "epoch": 0.8454011244939142, "grad_norm": 0.318359375, "learning_rate": 0.0002144793282101188, "loss": 0.4585, "step": 33305 }, { "epoch": 0.84552804254293, "grad_norm": 0.32421875, "learning_rate": 0.00021444932571069525, "loss": 0.4407, "step": 33310 }, { "epoch": 0.8456549605919458, "grad_norm": 0.34765625, "learning_rate": 0.00021441932004876896, "loss": 0.4626, "step": 33315 }, { "epoch": 0.8457818786409615, "grad_norm": 0.3515625, "learning_rate": 0.0002143893112258123, "loss": 0.4499, "step": 33320 }, { "epoch": 0.8459087966899773, "grad_norm": 0.359375, "learning_rate": 0.0002143592992432978, "loss": 0.4769, "step": 33325 }, { "epoch": 0.8460357147389931, "grad_norm": 0.36328125, "learning_rate": 0.00021432928410269813, "loss": 0.4516, "step": 33330 }, { "epoch": 0.8461626327880087, "grad_norm": 0.306640625, "learning_rate": 0.00021429926580548606, "loss": 0.4517, "step": 33335 }, { "epoch": 0.8462895508370245, "grad_norm": 0.357421875, "learning_rate": 0.00021426924435313468, "loss": 0.4549, "step": 33340 }, { "epoch": 0.8464164688860403, "grad_norm": 0.34375, "learning_rate": 0.00021423921974711703, "loss": 0.4358, "step": 33345 }, { "epoch": 0.846543386935056, "grad_norm": 0.337890625, "learning_rate": 0.00021420919198890647, "loss": 0.4462, "step": 33350 }, { "epoch": 0.8466703049840718, "grad_norm": 0.35546875, "learning_rate": 0.00021417916107997645, "loss": 0.4338, "step": 33355 }, { "epoch": 0.8467972230330876, "grad_norm": 0.33203125, "learning_rate": 0.0002141491270218006, "loss": 0.4436, "step": 33360 }, { "epoch": 0.8469241410821032, "grad_norm": 0.34765625, "learning_rate": 0.0002141190898158526, "loss": 0.4313, "step": 33365 }, { "epoch": 0.847051059131119, "grad_norm": 0.345703125, "learning_rate": 0.00021408904946360643, "loss": 0.4307, "step": 33370 }, { "epoch": 0.8471779771801348, "grad_norm": 0.345703125, "learning_rate": 0.0002140590059665361, "loss": 0.4592, "step": 33375 }, { "epoch": 0.8473048952291505, "grad_norm": 0.3671875, "learning_rate": 0.00021402895932611586, "loss": 0.4322, "step": 33380 }, { "epoch": 0.8474318132781663, "grad_norm": 0.330078125, "learning_rate": 0.0002139989095438201, "loss": 0.4041, "step": 33385 }, { "epoch": 0.8475587313271821, "grad_norm": 0.3828125, "learning_rate": 0.00021396885662112335, "loss": 0.495, "step": 33390 }, { "epoch": 0.8476856493761978, "grad_norm": 0.33984375, "learning_rate": 0.00021393880055950028, "loss": 0.4564, "step": 33395 }, { "epoch": 0.8478125674252135, "grad_norm": 0.361328125, "learning_rate": 0.00021390874136042575, "loss": 0.4584, "step": 33400 }, { "epoch": 0.8479394854742293, "grad_norm": 0.328125, "learning_rate": 0.00021387867902537472, "loss": 0.4378, "step": 33405 }, { "epoch": 0.848066403523245, "grad_norm": 0.328125, "learning_rate": 0.00021384861355582235, "loss": 0.4311, "step": 33410 }, { "epoch": 0.8481933215722608, "grad_norm": 0.314453125, "learning_rate": 0.000213818544953244, "loss": 0.4213, "step": 33415 }, { "epoch": 0.8483202396212766, "grad_norm": 0.330078125, "learning_rate": 0.00021378847321911504, "loss": 0.4587, "step": 33420 }, { "epoch": 0.8484471576702923, "grad_norm": 0.31640625, "learning_rate": 0.0002137583983549111, "loss": 0.4321, "step": 33425 }, { "epoch": 0.8485740757193081, "grad_norm": 0.341796875, "learning_rate": 0.0002137283203621079, "loss": 0.4408, "step": 33430 }, { "epoch": 0.8487009937683238, "grad_norm": 0.33984375, "learning_rate": 0.00021369823924218147, "loss": 0.4342, "step": 33435 }, { "epoch": 0.8488279118173395, "grad_norm": 0.34375, "learning_rate": 0.0002136681549966078, "loss": 0.4456, "step": 33440 }, { "epoch": 0.8489548298663553, "grad_norm": 0.33984375, "learning_rate": 0.00021363806762686314, "loss": 0.4459, "step": 33445 }, { "epoch": 0.849081747915371, "grad_norm": 0.3359375, "learning_rate": 0.00021360797713442385, "loss": 0.4759, "step": 33450 }, { "epoch": 0.8492086659643868, "grad_norm": 0.349609375, "learning_rate": 0.00021357788352076643, "loss": 0.4328, "step": 33455 }, { "epoch": 0.8493355840134026, "grad_norm": 0.337890625, "learning_rate": 0.00021354778678736762, "loss": 0.4479, "step": 33460 }, { "epoch": 0.8494625020624182, "grad_norm": 0.36328125, "learning_rate": 0.0002135176869357042, "loss": 0.4628, "step": 33465 }, { "epoch": 0.849589420111434, "grad_norm": 0.328125, "learning_rate": 0.0002134875839672532, "loss": 0.4434, "step": 33470 }, { "epoch": 0.8497163381604498, "grad_norm": 0.32421875, "learning_rate": 0.00021345747788349174, "loss": 0.4452, "step": 33475 }, { "epoch": 0.8498432562094655, "grad_norm": 0.3671875, "learning_rate": 0.0002134273686858971, "loss": 0.4255, "step": 33480 }, { "epoch": 0.8499701742584813, "grad_norm": 0.318359375, "learning_rate": 0.00021339725637594678, "loss": 0.4318, "step": 33485 }, { "epoch": 0.8500970923074971, "grad_norm": 0.330078125, "learning_rate": 0.00021336714095511835, "loss": 0.4489, "step": 33490 }, { "epoch": 0.8502240103565128, "grad_norm": 0.337890625, "learning_rate": 0.00021333702242488955, "loss": 0.4889, "step": 33495 }, { "epoch": 0.8503509284055285, "grad_norm": 0.36328125, "learning_rate": 0.00021330690078673827, "loss": 0.4694, "step": 33500 }, { "epoch": 0.8504778464545443, "grad_norm": 0.375, "learning_rate": 0.00021327677604214258, "loss": 0.4677, "step": 33505 }, { "epoch": 0.85060476450356, "grad_norm": 0.337890625, "learning_rate": 0.0002132466481925807, "loss": 0.4489, "step": 33510 }, { "epoch": 0.8507316825525758, "grad_norm": 0.33984375, "learning_rate": 0.000213216517239531, "loss": 0.4461, "step": 33515 }, { "epoch": 0.8508586006015916, "grad_norm": 0.34375, "learning_rate": 0.00021318638318447195, "loss": 0.4577, "step": 33520 }, { "epoch": 0.8509855186506073, "grad_norm": 42.25, "learning_rate": 0.0002131562460288823, "loss": 0.7502, "step": 33525 }, { "epoch": 0.851112436699623, "grad_norm": 0.310546875, "learning_rate": 0.0002131261057742408, "loss": 0.4482, "step": 33530 }, { "epoch": 0.8512393547486388, "grad_norm": 0.3515625, "learning_rate": 0.0002130959624220264, "loss": 0.4338, "step": 33535 }, { "epoch": 0.8513662727976545, "grad_norm": 0.3359375, "learning_rate": 0.00021306581597371825, "loss": 0.4555, "step": 33540 }, { "epoch": 0.8514931908466703, "grad_norm": 0.337890625, "learning_rate": 0.00021303566643079568, "loss": 0.4492, "step": 33545 }, { "epoch": 0.8516201088956861, "grad_norm": 0.326171875, "learning_rate": 0.00021300551379473804, "loss": 0.4622, "step": 33550 }, { "epoch": 0.8517470269447018, "grad_norm": 0.34765625, "learning_rate": 0.00021297535806702494, "loss": 0.4538, "step": 33555 }, { "epoch": 0.8518739449937176, "grad_norm": 0.357421875, "learning_rate": 0.00021294519924913613, "loss": 0.4223, "step": 33560 }, { "epoch": 0.8520008630427333, "grad_norm": 0.37109375, "learning_rate": 0.0002129150373425514, "loss": 0.4483, "step": 33565 }, { "epoch": 0.852127781091749, "grad_norm": 0.314453125, "learning_rate": 0.00021288487234875092, "loss": 0.4405, "step": 33570 }, { "epoch": 0.8522546991407648, "grad_norm": 0.328125, "learning_rate": 0.00021285470426921478, "loss": 0.4277, "step": 33575 }, { "epoch": 0.8523816171897806, "grad_norm": 0.34375, "learning_rate": 0.00021282453310542335, "loss": 0.4334, "step": 33580 }, { "epoch": 0.8525085352387963, "grad_norm": 0.3671875, "learning_rate": 0.0002127943588588571, "loss": 0.4756, "step": 33585 }, { "epoch": 0.8526354532878121, "grad_norm": 0.333984375, "learning_rate": 0.00021276418153099667, "loss": 0.4689, "step": 33590 }, { "epoch": 0.8527623713368279, "grad_norm": 0.35546875, "learning_rate": 0.00021273400112332283, "loss": 0.4439, "step": 33595 }, { "epoch": 0.8528892893858435, "grad_norm": 0.37109375, "learning_rate": 0.00021270381763731654, "loss": 0.4722, "step": 33600 }, { "epoch": 0.8530162074348593, "grad_norm": 0.35546875, "learning_rate": 0.0002126736310744589, "loss": 0.4588, "step": 33605 }, { "epoch": 0.853143125483875, "grad_norm": 0.365234375, "learning_rate": 0.0002126434414362312, "loss": 0.4859, "step": 33610 }, { "epoch": 0.8532700435328908, "grad_norm": 0.298828125, "learning_rate": 0.00021261324872411477, "loss": 0.4292, "step": 33615 }, { "epoch": 0.8533969615819066, "grad_norm": 0.330078125, "learning_rate": 0.00021258305293959115, "loss": 0.4349, "step": 33620 }, { "epoch": 0.8535238796309224, "grad_norm": 0.359375, "learning_rate": 0.00021255285408414208, "loss": 0.4572, "step": 33625 }, { "epoch": 0.853650797679938, "grad_norm": 0.58984375, "learning_rate": 0.0002125226521592493, "loss": 0.4721, "step": 33630 }, { "epoch": 0.8537777157289538, "grad_norm": 0.330078125, "learning_rate": 0.00021249244716639492, "loss": 0.4293, "step": 33635 }, { "epoch": 0.8539046337779695, "grad_norm": 0.328125, "learning_rate": 0.00021246223910706107, "loss": 0.4367, "step": 33640 }, { "epoch": 0.8540315518269853, "grad_norm": 0.3359375, "learning_rate": 0.00021243202798273004, "loss": 0.4582, "step": 33645 }, { "epoch": 0.8541584698760011, "grad_norm": 0.341796875, "learning_rate": 0.00021240181379488426, "loss": 0.4362, "step": 33650 }, { "epoch": 0.8542853879250168, "grad_norm": 0.333984375, "learning_rate": 0.0002123715965450063, "loss": 0.4429, "step": 33655 }, { "epoch": 0.8544123059740326, "grad_norm": 0.333984375, "learning_rate": 0.00021234137623457895, "loss": 0.4452, "step": 33660 }, { "epoch": 0.8545392240230483, "grad_norm": 0.328125, "learning_rate": 0.00021231115286508514, "loss": 0.4649, "step": 33665 }, { "epoch": 0.854666142072064, "grad_norm": 0.345703125, "learning_rate": 0.00021228092643800785, "loss": 0.4715, "step": 33670 }, { "epoch": 0.8547930601210798, "grad_norm": 0.365234375, "learning_rate": 0.0002122506969548303, "loss": 0.4737, "step": 33675 }, { "epoch": 0.8549199781700956, "grad_norm": 0.341796875, "learning_rate": 0.00021222046441703589, "loss": 0.4401, "step": 33680 }, { "epoch": 0.8550468962191113, "grad_norm": 0.298828125, "learning_rate": 0.00021219022882610806, "loss": 0.4199, "step": 33685 }, { "epoch": 0.8551738142681271, "grad_norm": 0.33203125, "learning_rate": 0.00021215999018353042, "loss": 0.4228, "step": 33690 }, { "epoch": 0.8553007323171429, "grad_norm": 0.341796875, "learning_rate": 0.00021212974849078688, "loss": 0.4462, "step": 33695 }, { "epoch": 0.8554276503661585, "grad_norm": 0.3203125, "learning_rate": 0.00021209950374936133, "loss": 0.4402, "step": 33700 }, { "epoch": 0.8555545684151743, "grad_norm": 0.357421875, "learning_rate": 0.00021206925596073783, "loss": 0.4674, "step": 33705 }, { "epoch": 0.8556814864641901, "grad_norm": 0.33984375, "learning_rate": 0.00021203900512640068, "loss": 0.4272, "step": 33710 }, { "epoch": 0.8558084045132058, "grad_norm": 0.35546875, "learning_rate": 0.00021200875124783428, "loss": 0.4146, "step": 33715 }, { "epoch": 0.8559353225622216, "grad_norm": 0.330078125, "learning_rate": 0.00021197849432652316, "loss": 0.3957, "step": 33720 }, { "epoch": 0.8560622406112374, "grad_norm": 0.35546875, "learning_rate": 0.00021194823436395196, "loss": 0.4627, "step": 33725 }, { "epoch": 0.856189158660253, "grad_norm": 0.36328125, "learning_rate": 0.00021191797136160562, "loss": 0.4301, "step": 33730 }, { "epoch": 0.8563160767092688, "grad_norm": 0.357421875, "learning_rate": 0.0002118877053209691, "loss": 0.4555, "step": 33735 }, { "epoch": 0.8564429947582846, "grad_norm": 0.349609375, "learning_rate": 0.00021185743624352752, "loss": 0.4505, "step": 33740 }, { "epoch": 0.8565699128073003, "grad_norm": 0.34765625, "learning_rate": 0.00021182716413076618, "loss": 0.4336, "step": 33745 }, { "epoch": 0.8566968308563161, "grad_norm": 0.322265625, "learning_rate": 0.00021179688898417053, "loss": 0.464, "step": 33750 }, { "epoch": 0.8568237489053319, "grad_norm": 0.345703125, "learning_rate": 0.00021176661080522612, "loss": 0.424, "step": 33755 }, { "epoch": 0.8569506669543476, "grad_norm": 0.33984375, "learning_rate": 0.00021173632959541874, "loss": 0.4443, "step": 33760 }, { "epoch": 0.8570775850033633, "grad_norm": 0.36328125, "learning_rate": 0.0002117060453562343, "loss": 0.408, "step": 33765 }, { "epoch": 0.857204503052379, "grad_norm": 0.330078125, "learning_rate": 0.00021167575808915873, "loss": 0.4447, "step": 33770 }, { "epoch": 0.8573314211013948, "grad_norm": 0.328125, "learning_rate": 0.00021164546779567836, "loss": 0.4418, "step": 33775 }, { "epoch": 0.8574583391504106, "grad_norm": 0.328125, "learning_rate": 0.00021161517447727937, "loss": 0.4161, "step": 33780 }, { "epoch": 0.8575852571994264, "grad_norm": 0.357421875, "learning_rate": 0.00021158487813544832, "loss": 0.4562, "step": 33785 }, { "epoch": 0.8577121752484421, "grad_norm": 0.32421875, "learning_rate": 0.00021155457877167187, "loss": 0.42, "step": 33790 }, { "epoch": 0.8578390932974578, "grad_norm": 0.34375, "learning_rate": 0.0002115242763874367, "loss": 0.4592, "step": 33795 }, { "epoch": 0.8579660113464735, "grad_norm": 0.353515625, "learning_rate": 0.00021149397098422983, "loss": 0.4413, "step": 33800 }, { "epoch": 0.8580929293954893, "grad_norm": 0.34765625, "learning_rate": 0.00021146366256353833, "loss": 0.4619, "step": 33805 }, { "epoch": 0.8582198474445051, "grad_norm": 0.33984375, "learning_rate": 0.00021143335112684935, "loss": 0.4198, "step": 33810 }, { "epoch": 0.8583467654935208, "grad_norm": 0.384765625, "learning_rate": 0.00021140303667565035, "loss": 0.4742, "step": 33815 }, { "epoch": 0.8584736835425366, "grad_norm": 0.3203125, "learning_rate": 0.0002113727192114287, "loss": 0.4408, "step": 33820 }, { "epoch": 0.8586006015915524, "grad_norm": 0.34375, "learning_rate": 0.00021134239873567224, "loss": 0.4408, "step": 33825 }, { "epoch": 0.858727519640568, "grad_norm": 0.353515625, "learning_rate": 0.00021131207524986874, "loss": 0.4676, "step": 33830 }, { "epoch": 0.8588544376895838, "grad_norm": 0.3515625, "learning_rate": 0.00021128174875550607, "loss": 0.4632, "step": 33835 }, { "epoch": 0.8589813557385996, "grad_norm": 0.337890625, "learning_rate": 0.00021125141925407243, "loss": 0.4332, "step": 33840 }, { "epoch": 0.8591082737876153, "grad_norm": 0.310546875, "learning_rate": 0.000211221086747056, "loss": 0.4419, "step": 33845 }, { "epoch": 0.8592351918366311, "grad_norm": 0.3359375, "learning_rate": 0.00021119075123594527, "loss": 0.4345, "step": 33850 }, { "epoch": 0.8593621098856469, "grad_norm": 0.345703125, "learning_rate": 0.00021116041272222877, "loss": 0.4412, "step": 33855 }, { "epoch": 0.8594890279346626, "grad_norm": 0.3359375, "learning_rate": 0.00021113007120739517, "loss": 0.4085, "step": 33860 }, { "epoch": 0.8596159459836783, "grad_norm": 0.35546875, "learning_rate": 0.0002110997266929333, "loss": 0.4639, "step": 33865 }, { "epoch": 0.8597428640326941, "grad_norm": 0.33203125, "learning_rate": 0.00021106937918033221, "loss": 0.4053, "step": 33870 }, { "epoch": 0.8598697820817098, "grad_norm": 0.341796875, "learning_rate": 0.00021103902867108097, "loss": 0.4529, "step": 33875 }, { "epoch": 0.8599967001307256, "grad_norm": 0.32421875, "learning_rate": 0.00021100867516666888, "loss": 0.4436, "step": 33880 }, { "epoch": 0.8601236181797414, "grad_norm": 0.3671875, "learning_rate": 0.00021097831866858545, "loss": 0.4737, "step": 33885 }, { "epoch": 0.8602505362287571, "grad_norm": 0.349609375, "learning_rate": 0.00021094795917832025, "loss": 0.4374, "step": 33890 }, { "epoch": 0.8603774542777728, "grad_norm": 0.357421875, "learning_rate": 0.00021091759669736293, "loss": 0.4712, "step": 33895 }, { "epoch": 0.8605043723267886, "grad_norm": 0.333984375, "learning_rate": 0.00021088723122720344, "loss": 0.4521, "step": 33900 }, { "epoch": 0.8606312903758043, "grad_norm": 0.35546875, "learning_rate": 0.00021085686276933177, "loss": 0.4661, "step": 33905 }, { "epoch": 0.8607582084248201, "grad_norm": 0.34375, "learning_rate": 0.000210826491325238, "loss": 0.4554, "step": 33910 }, { "epoch": 0.8608851264738359, "grad_norm": 0.35546875, "learning_rate": 0.0002107961168964126, "loss": 0.4743, "step": 33915 }, { "epoch": 0.8610120445228516, "grad_norm": 0.337890625, "learning_rate": 0.00021076573948434593, "loss": 0.4268, "step": 33920 }, { "epoch": 0.8611389625718674, "grad_norm": 0.357421875, "learning_rate": 0.00021073535909052864, "loss": 0.4562, "step": 33925 }, { "epoch": 0.861265880620883, "grad_norm": 0.3515625, "learning_rate": 0.0002107049757164515, "loss": 0.4535, "step": 33930 }, { "epoch": 0.8613927986698988, "grad_norm": 0.34375, "learning_rate": 0.00021067458936360536, "loss": 0.4487, "step": 33935 }, { "epoch": 0.8615197167189146, "grad_norm": 0.34375, "learning_rate": 0.0002106442000334813, "loss": 0.4584, "step": 33940 }, { "epoch": 0.8616466347679304, "grad_norm": 0.318359375, "learning_rate": 0.0002106138077275705, "loss": 0.4453, "step": 33945 }, { "epoch": 0.8617735528169461, "grad_norm": 0.3515625, "learning_rate": 0.0002105834124473643, "loss": 0.4582, "step": 33950 }, { "epoch": 0.8619004708659619, "grad_norm": 0.35546875, "learning_rate": 0.00021055301419435415, "loss": 0.4386, "step": 33955 }, { "epoch": 0.8620273889149775, "grad_norm": 0.3203125, "learning_rate": 0.00021052261297003174, "loss": 0.4317, "step": 33960 }, { "epoch": 0.8621543069639933, "grad_norm": 0.3203125, "learning_rate": 0.00021049220877588883, "loss": 0.4261, "step": 33965 }, { "epoch": 0.8622812250130091, "grad_norm": 0.30078125, "learning_rate": 0.00021046180161341733, "loss": 0.4372, "step": 33970 }, { "epoch": 0.8624081430620248, "grad_norm": 0.357421875, "learning_rate": 0.00021043139148410925, "loss": 0.4576, "step": 33975 }, { "epoch": 0.8625350611110406, "grad_norm": 0.421875, "learning_rate": 0.00021040097838945695, "loss": 0.4253, "step": 33980 }, { "epoch": 0.8626619791600564, "grad_norm": 0.341796875, "learning_rate": 0.00021037056233095267, "loss": 0.4402, "step": 33985 }, { "epoch": 0.8627888972090721, "grad_norm": 0.322265625, "learning_rate": 0.00021034014331008897, "loss": 0.4334, "step": 33990 }, { "epoch": 0.8629158152580878, "grad_norm": 0.328125, "learning_rate": 0.0002103097213283584, "loss": 0.4306, "step": 33995 }, { "epoch": 0.8630427333071036, "grad_norm": 0.30078125, "learning_rate": 0.00021027929638725386, "loss": 0.4217, "step": 34000 }, { "epoch": 0.8631696513561193, "grad_norm": 0.3671875, "learning_rate": 0.0002102488684882683, "loss": 0.4775, "step": 34005 }, { "epoch": 0.8632965694051351, "grad_norm": 0.322265625, "learning_rate": 0.0002102184376328947, "loss": 0.4095, "step": 34010 }, { "epoch": 0.8634234874541509, "grad_norm": 0.328125, "learning_rate": 0.00021018800382262642, "loss": 0.4241, "step": 34015 }, { "epoch": 0.8635504055031666, "grad_norm": 0.33984375, "learning_rate": 0.00021015756705895672, "loss": 0.448, "step": 34020 }, { "epoch": 0.8636773235521824, "grad_norm": 0.341796875, "learning_rate": 0.00021012712734337917, "loss": 0.4187, "step": 34025 }, { "epoch": 0.8638042416011981, "grad_norm": 0.390625, "learning_rate": 0.00021009668467738746, "loss": 0.4861, "step": 34030 }, { "epoch": 0.8639311596502138, "grad_norm": 0.31640625, "learning_rate": 0.00021006623906247535, "loss": 0.4411, "step": 34035 }, { "epoch": 0.8640580776992296, "grad_norm": 0.37109375, "learning_rate": 0.00021003579050013678, "loss": 0.4115, "step": 34040 }, { "epoch": 0.8641849957482454, "grad_norm": 0.37109375, "learning_rate": 0.00021000533899186594, "loss": 0.4509, "step": 34045 }, { "epoch": 0.8643119137972611, "grad_norm": 0.33984375, "learning_rate": 0.00020997488453915701, "loss": 0.4597, "step": 34050 }, { "epoch": 0.8644388318462769, "grad_norm": 0.318359375, "learning_rate": 0.0002099444271435044, "loss": 0.4378, "step": 34055 }, { "epoch": 0.8645657498952926, "grad_norm": 0.349609375, "learning_rate": 0.0002099139668064026, "loss": 0.4414, "step": 34060 }, { "epoch": 0.8646926679443083, "grad_norm": 0.330078125, "learning_rate": 0.00020988350352934635, "loss": 0.4487, "step": 34065 }, { "epoch": 0.8648195859933241, "grad_norm": 0.3671875, "learning_rate": 0.00020985303731383035, "loss": 0.4595, "step": 34070 }, { "epoch": 0.8649465040423399, "grad_norm": 0.314453125, "learning_rate": 0.00020982256816134973, "loss": 0.4279, "step": 34075 }, { "epoch": 0.8650734220913556, "grad_norm": 0.34765625, "learning_rate": 0.00020979209607339952, "loss": 0.4692, "step": 34080 }, { "epoch": 0.8652003401403714, "grad_norm": 0.345703125, "learning_rate": 0.0002097616210514749, "loss": 0.4368, "step": 34085 }, { "epoch": 0.8653272581893872, "grad_norm": 0.33984375, "learning_rate": 0.00020973114309707144, "loss": 0.4701, "step": 34090 }, { "epoch": 0.8654541762384028, "grad_norm": 0.341796875, "learning_rate": 0.00020970066221168455, "loss": 0.4356, "step": 34095 }, { "epoch": 0.8655810942874186, "grad_norm": 0.318359375, "learning_rate": 0.00020967017839680993, "loss": 0.4397, "step": 34100 }, { "epoch": 0.8657080123364344, "grad_norm": 0.333984375, "learning_rate": 0.0002096396916539434, "loss": 0.4331, "step": 34105 }, { "epoch": 0.8658349303854501, "grad_norm": 0.34375, "learning_rate": 0.000209609201984581, "loss": 0.4384, "step": 34110 }, { "epoch": 0.8659618484344659, "grad_norm": 0.353515625, "learning_rate": 0.0002095787093902188, "loss": 0.4528, "step": 34115 }, { "epoch": 0.8660887664834817, "grad_norm": 0.34375, "learning_rate": 0.00020954821387235303, "loss": 0.464, "step": 34120 }, { "epoch": 0.8662156845324974, "grad_norm": 0.35546875, "learning_rate": 0.00020951771543248015, "loss": 0.4368, "step": 34125 }, { "epoch": 0.8663426025815131, "grad_norm": 0.341796875, "learning_rate": 0.0002094872140720967, "loss": 0.4328, "step": 34130 }, { "epoch": 0.8664695206305288, "grad_norm": 0.44921875, "learning_rate": 0.0002094567097926993, "loss": 0.4556, "step": 34135 }, { "epoch": 0.8665964386795446, "grad_norm": 0.341796875, "learning_rate": 0.0002094262025957849, "loss": 0.4429, "step": 34140 }, { "epoch": 0.8667233567285604, "grad_norm": 0.345703125, "learning_rate": 0.00020939569248285033, "loss": 0.4594, "step": 34145 }, { "epoch": 0.8668502747775761, "grad_norm": 0.34765625, "learning_rate": 0.00020936517945539286, "loss": 0.4308, "step": 34150 }, { "epoch": 0.8669771928265919, "grad_norm": 0.330078125, "learning_rate": 0.0002093346635149096, "loss": 0.4539, "step": 34155 }, { "epoch": 0.8671041108756076, "grad_norm": 0.318359375, "learning_rate": 0.00020930414466289814, "loss": 0.4594, "step": 34160 }, { "epoch": 0.8672310289246233, "grad_norm": 0.330078125, "learning_rate": 0.00020927362290085587, "loss": 0.4278, "step": 34165 }, { "epoch": 0.8673579469736391, "grad_norm": 0.349609375, "learning_rate": 0.00020924309823028056, "loss": 0.499, "step": 34170 }, { "epoch": 0.8674848650226549, "grad_norm": 0.35546875, "learning_rate": 0.00020921257065267, "loss": 0.4535, "step": 34175 }, { "epoch": 0.8676117830716706, "grad_norm": 0.37890625, "learning_rate": 0.00020918204016952217, "loss": 0.4374, "step": 34180 }, { "epoch": 0.8677387011206864, "grad_norm": 0.34765625, "learning_rate": 0.00020915150678233523, "loss": 0.4448, "step": 34185 }, { "epoch": 0.8678656191697022, "grad_norm": 0.341796875, "learning_rate": 0.0002091209704926074, "loss": 0.4394, "step": 34190 }, { "epoch": 0.8679925372187178, "grad_norm": 0.322265625, "learning_rate": 0.00020909043130183712, "loss": 0.4574, "step": 34195 }, { "epoch": 0.8681194552677336, "grad_norm": 0.318359375, "learning_rate": 0.00020905988921152287, "loss": 0.4299, "step": 34200 }, { "epoch": 0.8682463733167494, "grad_norm": 0.32421875, "learning_rate": 0.0002090293442231634, "loss": 0.4711, "step": 34205 }, { "epoch": 0.8683732913657651, "grad_norm": 0.337890625, "learning_rate": 0.00020899879633825752, "loss": 0.4686, "step": 34210 }, { "epoch": 0.8685002094147809, "grad_norm": 0.349609375, "learning_rate": 0.00020896824555830426, "loss": 0.4616, "step": 34215 }, { "epoch": 0.8686271274637967, "grad_norm": 0.337890625, "learning_rate": 0.00020893769188480265, "loss": 0.4083, "step": 34220 }, { "epoch": 0.8687540455128123, "grad_norm": 0.3515625, "learning_rate": 0.00020890713531925196, "loss": 0.4913, "step": 34225 }, { "epoch": 0.8688809635618281, "grad_norm": 0.3359375, "learning_rate": 0.0002088765758631516, "loss": 0.4361, "step": 34230 }, { "epoch": 0.8690078816108439, "grad_norm": 0.32421875, "learning_rate": 0.00020884601351800112, "loss": 0.4468, "step": 34235 }, { "epoch": 0.8691347996598596, "grad_norm": 0.33203125, "learning_rate": 0.00020881544828530018, "loss": 0.437, "step": 34240 }, { "epoch": 0.8692617177088754, "grad_norm": 0.33203125, "learning_rate": 0.00020878488016654864, "loss": 0.4451, "step": 34245 }, { "epoch": 0.8693886357578912, "grad_norm": 0.365234375, "learning_rate": 0.00020875430916324648, "loss": 0.4589, "step": 34250 }, { "epoch": 0.8695155538069069, "grad_norm": 0.33984375, "learning_rate": 0.00020872373527689377, "loss": 0.4213, "step": 34255 }, { "epoch": 0.8696424718559226, "grad_norm": 0.310546875, "learning_rate": 0.0002086931585089907, "loss": 0.4369, "step": 34260 }, { "epoch": 0.8697693899049384, "grad_norm": 0.3515625, "learning_rate": 0.00020866257886103777, "loss": 0.5006, "step": 34265 }, { "epoch": 0.8698963079539541, "grad_norm": 0.337890625, "learning_rate": 0.00020863199633453544, "loss": 0.4342, "step": 34270 }, { "epoch": 0.8700232260029699, "grad_norm": 0.333984375, "learning_rate": 0.00020860141093098443, "loss": 0.4415, "step": 34275 }, { "epoch": 0.8701501440519857, "grad_norm": 0.341796875, "learning_rate": 0.00020857082265188546, "loss": 0.4285, "step": 34280 }, { "epoch": 0.8702770621010014, "grad_norm": 0.337890625, "learning_rate": 0.00020854023149873964, "loss": 0.4294, "step": 34285 }, { "epoch": 0.8704039801500172, "grad_norm": 0.3671875, "learning_rate": 0.00020850963747304793, "loss": 0.4469, "step": 34290 }, { "epoch": 0.8705308981990328, "grad_norm": 0.34765625, "learning_rate": 0.00020847904057631165, "loss": 0.4577, "step": 34295 }, { "epoch": 0.8706578162480486, "grad_norm": 0.337890625, "learning_rate": 0.00020844844081003213, "loss": 0.4642, "step": 34300 }, { "epoch": 0.8707847342970644, "grad_norm": 0.36328125, "learning_rate": 0.0002084178381757109, "loss": 0.4632, "step": 34305 }, { "epoch": 0.8709116523460801, "grad_norm": 0.34375, "learning_rate": 0.00020838723267484964, "loss": 0.4622, "step": 34310 }, { "epoch": 0.8710385703950959, "grad_norm": 0.357421875, "learning_rate": 0.0002083566243089501, "loss": 0.4313, "step": 34315 }, { "epoch": 0.8711654884441117, "grad_norm": 0.361328125, "learning_rate": 0.00020832601307951428, "loss": 0.4789, "step": 34320 }, { "epoch": 0.8712924064931273, "grad_norm": 0.34765625, "learning_rate": 0.00020829539898804415, "loss": 0.4538, "step": 34325 }, { "epoch": 0.8714193245421431, "grad_norm": 0.34375, "learning_rate": 0.00020826478203604212, "loss": 0.4718, "step": 34330 }, { "epoch": 0.8715462425911589, "grad_norm": 0.369140625, "learning_rate": 0.00020823416222501037, "loss": 0.4524, "step": 34335 }, { "epoch": 0.8716731606401746, "grad_norm": 0.357421875, "learning_rate": 0.00020820353955645153, "loss": 0.4516, "step": 34340 }, { "epoch": 0.8718000786891904, "grad_norm": 0.359375, "learning_rate": 0.00020817291403186816, "loss": 0.4814, "step": 34345 }, { "epoch": 0.8719269967382062, "grad_norm": 0.326171875, "learning_rate": 0.0002081422856527631, "loss": 0.4554, "step": 34350 }, { "epoch": 0.8720539147872219, "grad_norm": 0.34765625, "learning_rate": 0.0002081116544206392, "loss": 0.4358, "step": 34355 }, { "epoch": 0.8721808328362376, "grad_norm": 0.333984375, "learning_rate": 0.0002080810203369996, "loss": 0.4475, "step": 34360 }, { "epoch": 0.8723077508852534, "grad_norm": 0.3515625, "learning_rate": 0.00020805038340334744, "loss": 0.4644, "step": 34365 }, { "epoch": 0.8724346689342691, "grad_norm": 0.3515625, "learning_rate": 0.0002080197436211861, "loss": 0.4291, "step": 34370 }, { "epoch": 0.8725615869832849, "grad_norm": 0.341796875, "learning_rate": 0.00020798910099201906, "loss": 0.4558, "step": 34375 }, { "epoch": 0.8726885050323007, "grad_norm": 0.345703125, "learning_rate": 0.00020795845551734994, "loss": 0.4572, "step": 34380 }, { "epoch": 0.8728154230813164, "grad_norm": 0.33203125, "learning_rate": 0.00020792780719868246, "loss": 0.4351, "step": 34385 }, { "epoch": 0.8729423411303322, "grad_norm": 0.328125, "learning_rate": 0.00020789715603752062, "loss": 0.4203, "step": 34390 }, { "epoch": 0.8730692591793479, "grad_norm": 0.31640625, "learning_rate": 0.00020786650203536835, "loss": 0.4455, "step": 34395 }, { "epoch": 0.8731961772283636, "grad_norm": 0.345703125, "learning_rate": 0.00020783584519372988, "loss": 0.4888, "step": 34400 }, { "epoch": 0.8733230952773794, "grad_norm": 0.345703125, "learning_rate": 0.0002078051855141095, "loss": 0.4457, "step": 34405 }, { "epoch": 0.8734500133263952, "grad_norm": 0.357421875, "learning_rate": 0.00020777452299801174, "loss": 0.4554, "step": 34410 }, { "epoch": 0.8735769313754109, "grad_norm": 0.349609375, "learning_rate": 0.00020774385764694116, "loss": 0.4731, "step": 34415 }, { "epoch": 0.8737038494244267, "grad_norm": 0.375, "learning_rate": 0.00020771318946240244, "loss": 0.4569, "step": 34420 }, { "epoch": 0.8738307674734423, "grad_norm": 0.33984375, "learning_rate": 0.00020768251844590052, "loss": 0.447, "step": 34425 }, { "epoch": 0.8739576855224581, "grad_norm": 0.337890625, "learning_rate": 0.00020765184459894042, "loss": 0.4706, "step": 34430 }, { "epoch": 0.8740846035714739, "grad_norm": 0.33203125, "learning_rate": 0.00020762116792302725, "loss": 0.392, "step": 34435 }, { "epoch": 0.8742115216204897, "grad_norm": 0.322265625, "learning_rate": 0.00020759048841966627, "loss": 0.4283, "step": 34440 }, { "epoch": 0.8743384396695054, "grad_norm": 0.34375, "learning_rate": 0.00020755980609036306, "loss": 0.4198, "step": 34445 }, { "epoch": 0.8744653577185212, "grad_norm": 0.34765625, "learning_rate": 0.00020752912093662302, "loss": 0.4329, "step": 34450 }, { "epoch": 0.874592275767537, "grad_norm": 0.36328125, "learning_rate": 0.00020749843295995199, "loss": 0.4911, "step": 34455 }, { "epoch": 0.8747191938165526, "grad_norm": 0.341796875, "learning_rate": 0.00020746774216185573, "loss": 0.428, "step": 34460 }, { "epoch": 0.8748461118655684, "grad_norm": 0.33984375, "learning_rate": 0.00020743704854384024, "loss": 0.459, "step": 34465 }, { "epoch": 0.8749730299145841, "grad_norm": 0.330078125, "learning_rate": 0.00020740635210741168, "loss": 0.47, "step": 34470 }, { "epoch": 0.8750999479635999, "grad_norm": 0.361328125, "learning_rate": 0.00020737565285407629, "loss": 0.5008, "step": 34475 }, { "epoch": 0.8752268660126157, "grad_norm": 0.353515625, "learning_rate": 0.00020734495078534045, "loss": 0.4589, "step": 34480 }, { "epoch": 0.8753537840616314, "grad_norm": 0.353515625, "learning_rate": 0.0002073142459027107, "loss": 0.4691, "step": 34485 }, { "epoch": 0.8754807021106471, "grad_norm": 0.33203125, "learning_rate": 0.00020728353820769377, "loss": 0.4718, "step": 34490 }, { "epoch": 0.8756076201596629, "grad_norm": 0.36328125, "learning_rate": 0.00020725282770179645, "loss": 0.4778, "step": 34495 }, { "epoch": 0.8757345382086786, "grad_norm": 0.3203125, "learning_rate": 0.00020722211438652569, "loss": 0.439, "step": 34500 }, { "epoch": 0.8758614562576944, "grad_norm": 0.3671875, "learning_rate": 0.0002071913982633885, "loss": 0.4399, "step": 34505 }, { "epoch": 0.8759883743067102, "grad_norm": 0.3359375, "learning_rate": 0.00020716067933389224, "loss": 0.419, "step": 34510 }, { "epoch": 0.8761152923557259, "grad_norm": 0.330078125, "learning_rate": 0.00020712995759954422, "loss": 0.3995, "step": 34515 }, { "epoch": 0.8762422104047417, "grad_norm": 0.341796875, "learning_rate": 0.0002070992330618519, "loss": 0.4504, "step": 34520 }, { "epoch": 0.8763691284537574, "grad_norm": 0.345703125, "learning_rate": 0.00020706850572232296, "loss": 0.4607, "step": 34525 }, { "epoch": 0.8764960465027731, "grad_norm": 0.294921875, "learning_rate": 0.0002070377755824652, "loss": 0.4271, "step": 34530 }, { "epoch": 0.8766229645517889, "grad_norm": 0.33203125, "learning_rate": 0.00020700704264378652, "loss": 0.434, "step": 34535 }, { "epoch": 0.8767498826008047, "grad_norm": 0.3515625, "learning_rate": 0.00020697630690779493, "loss": 0.456, "step": 34540 }, { "epoch": 0.8768768006498204, "grad_norm": 0.32421875, "learning_rate": 0.00020694556837599863, "loss": 0.4243, "step": 34545 }, { "epoch": 0.8770037186988362, "grad_norm": 0.34375, "learning_rate": 0.00020691482704990604, "loss": 0.4193, "step": 34550 }, { "epoch": 0.877130636747852, "grad_norm": 0.349609375, "learning_rate": 0.00020688408293102555, "loss": 0.4385, "step": 34555 }, { "epoch": 0.8772575547968676, "grad_norm": 0.359375, "learning_rate": 0.00020685333602086576, "loss": 0.4367, "step": 34560 }, { "epoch": 0.8773844728458834, "grad_norm": 0.369140625, "learning_rate": 0.0002068225863209354, "loss": 0.4353, "step": 34565 }, { "epoch": 0.8775113908948992, "grad_norm": 0.373046875, "learning_rate": 0.0002067918338327434, "loss": 0.47, "step": 34570 }, { "epoch": 0.8776383089439149, "grad_norm": 0.35546875, "learning_rate": 0.00020676107855779866, "loss": 0.4512, "step": 34575 }, { "epoch": 0.8777652269929307, "grad_norm": 0.33203125, "learning_rate": 0.0002067303204976105, "loss": 0.4228, "step": 34580 }, { "epoch": 0.8778921450419465, "grad_norm": 0.345703125, "learning_rate": 0.00020669955965368805, "loss": 0.449, "step": 34585 }, { "epoch": 0.8780190630909621, "grad_norm": 0.34375, "learning_rate": 0.00020666879602754083, "loss": 0.4692, "step": 34590 }, { "epoch": 0.8781459811399779, "grad_norm": 0.357421875, "learning_rate": 0.00020663802962067837, "loss": 0.4358, "step": 34595 }, { "epoch": 0.8782728991889936, "grad_norm": 0.33984375, "learning_rate": 0.0002066072604346103, "loss": 0.4479, "step": 34600 }, { "epoch": 0.8783998172380094, "grad_norm": 0.337890625, "learning_rate": 0.00020657648847084654, "loss": 0.4287, "step": 34605 }, { "epoch": 0.8785267352870252, "grad_norm": 0.349609375, "learning_rate": 0.00020654571373089704, "loss": 0.4427, "step": 34610 }, { "epoch": 0.878653653336041, "grad_norm": 0.380859375, "learning_rate": 0.0002065149362162719, "loss": 0.4318, "step": 34615 }, { "epoch": 0.8787805713850567, "grad_norm": 0.357421875, "learning_rate": 0.0002064841559284813, "loss": 0.4264, "step": 34620 }, { "epoch": 0.8789074894340724, "grad_norm": 0.345703125, "learning_rate": 0.00020645337286903572, "loss": 0.4614, "step": 34625 }, { "epoch": 0.8790344074830881, "grad_norm": 0.3515625, "learning_rate": 0.00020642258703944564, "loss": 0.4507, "step": 34630 }, { "epoch": 0.8791613255321039, "grad_norm": 0.333984375, "learning_rate": 0.00020639179844122164, "loss": 0.4358, "step": 34635 }, { "epoch": 0.8792882435811197, "grad_norm": 0.359375, "learning_rate": 0.00020636100707587457, "loss": 0.4152, "step": 34640 }, { "epoch": 0.8794151616301354, "grad_norm": 0.345703125, "learning_rate": 0.00020633021294491527, "loss": 0.438, "step": 34645 }, { "epoch": 0.8795420796791512, "grad_norm": 0.3671875, "learning_rate": 0.00020629941604985493, "loss": 0.459, "step": 34650 }, { "epoch": 0.8796689977281669, "grad_norm": 0.34765625, "learning_rate": 0.0002062686163922047, "loss": 0.4486, "step": 34655 }, { "epoch": 0.8797959157771826, "grad_norm": 0.3359375, "learning_rate": 0.00020623781397347585, "loss": 0.4565, "step": 34660 }, { "epoch": 0.8799228338261984, "grad_norm": 0.345703125, "learning_rate": 0.00020620700879517986, "loss": 0.4587, "step": 34665 }, { "epoch": 0.8800497518752142, "grad_norm": 0.330078125, "learning_rate": 0.0002061762008588283, "loss": 0.4503, "step": 34670 }, { "epoch": 0.8801766699242299, "grad_norm": 0.3359375, "learning_rate": 0.00020614539016593298, "loss": 0.4453, "step": 34675 }, { "epoch": 0.8803035879732457, "grad_norm": 0.34375, "learning_rate": 0.0002061145767180058, "loss": 0.4241, "step": 34680 }, { "epoch": 0.8804305060222615, "grad_norm": 0.359375, "learning_rate": 0.00020608376051655855, "loss": 0.4487, "step": 34685 }, { "epoch": 0.8805574240712771, "grad_norm": 0.341796875, "learning_rate": 0.0002060529415631036, "loss": 0.462, "step": 34690 }, { "epoch": 0.8806843421202929, "grad_norm": 0.345703125, "learning_rate": 0.00020602211985915313, "loss": 0.4392, "step": 34695 }, { "epoch": 0.8808112601693087, "grad_norm": 0.330078125, "learning_rate": 0.0002059912954062195, "loss": 0.4448, "step": 34700 }, { "epoch": 0.8809381782183244, "grad_norm": 0.326171875, "learning_rate": 0.00020596046820581536, "loss": 0.4401, "step": 34705 }, { "epoch": 0.8810650962673402, "grad_norm": 0.33984375, "learning_rate": 0.00020592963825945336, "loss": 0.48, "step": 34710 }, { "epoch": 0.881192014316356, "grad_norm": 0.337890625, "learning_rate": 0.00020589880556864625, "loss": 0.4477, "step": 34715 }, { "epoch": 0.8813189323653717, "grad_norm": 0.333984375, "learning_rate": 0.00020586797013490705, "loss": 0.4531, "step": 34720 }, { "epoch": 0.8814458504143874, "grad_norm": 0.32421875, "learning_rate": 0.00020583713195974874, "loss": 0.4321, "step": 34725 }, { "epoch": 0.8815727684634032, "grad_norm": 0.322265625, "learning_rate": 0.0002058062910446847, "loss": 0.4318, "step": 34730 }, { "epoch": 0.8816996865124189, "grad_norm": 0.34375, "learning_rate": 0.0002057754473912281, "loss": 0.4332, "step": 34735 }, { "epoch": 0.8818266045614347, "grad_norm": 0.345703125, "learning_rate": 0.00020574460100089255, "loss": 0.4635, "step": 34740 }, { "epoch": 0.8819535226104505, "grad_norm": 0.345703125, "learning_rate": 0.00020571375187519164, "loss": 0.4431, "step": 34745 }, { "epoch": 0.8820804406594662, "grad_norm": 0.333984375, "learning_rate": 0.00020568290001563914, "loss": 0.4632, "step": 34750 }, { "epoch": 0.8822073587084819, "grad_norm": 0.333984375, "learning_rate": 0.0002056520454237489, "loss": 0.416, "step": 34755 }, { "epoch": 0.8823342767574976, "grad_norm": 0.314453125, "learning_rate": 0.00020562118810103498, "loss": 0.4273, "step": 34760 }, { "epoch": 0.8824611948065134, "grad_norm": 0.35546875, "learning_rate": 0.00020559032804901145, "loss": 0.4491, "step": 34765 }, { "epoch": 0.8825881128555292, "grad_norm": 0.34765625, "learning_rate": 0.00020555946526919268, "loss": 0.4495, "step": 34770 }, { "epoch": 0.882715030904545, "grad_norm": 0.3203125, "learning_rate": 0.0002055285997630931, "loss": 0.4266, "step": 34775 }, { "epoch": 0.8828419489535607, "grad_norm": 0.365234375, "learning_rate": 0.00020549773153222728, "loss": 0.4481, "step": 34780 }, { "epoch": 0.8829688670025765, "grad_norm": 0.345703125, "learning_rate": 0.00020546686057810987, "loss": 0.413, "step": 34785 }, { "epoch": 0.8830957850515921, "grad_norm": 0.349609375, "learning_rate": 0.00020543598690225568, "loss": 0.4342, "step": 34790 }, { "epoch": 0.8832227031006079, "grad_norm": 0.349609375, "learning_rate": 0.0002054051105061797, "loss": 0.45, "step": 34795 }, { "epoch": 0.8833496211496237, "grad_norm": 0.3515625, "learning_rate": 0.00020537423139139704, "loss": 0.4244, "step": 34800 }, { "epoch": 0.8834765391986394, "grad_norm": 0.33203125, "learning_rate": 0.00020534334955942291, "loss": 0.4334, "step": 34805 }, { "epoch": 0.8836034572476552, "grad_norm": 0.380859375, "learning_rate": 0.0002053124650117726, "loss": 0.466, "step": 34810 }, { "epoch": 0.883730375296671, "grad_norm": 0.349609375, "learning_rate": 0.00020528157774996172, "loss": 0.4583, "step": 34815 }, { "epoch": 0.8838572933456867, "grad_norm": 0.3359375, "learning_rate": 0.00020525068777550585, "loss": 0.4483, "step": 34820 }, { "epoch": 0.8839842113947024, "grad_norm": 0.3515625, "learning_rate": 0.00020521979508992072, "loss": 0.4266, "step": 34825 }, { "epoch": 0.8841111294437182, "grad_norm": 0.3515625, "learning_rate": 0.0002051888996947222, "loss": 0.4289, "step": 34830 }, { "epoch": 0.8842380474927339, "grad_norm": 0.35546875, "learning_rate": 0.00020515800159142642, "loss": 0.4783, "step": 34835 }, { "epoch": 0.8843649655417497, "grad_norm": 0.361328125, "learning_rate": 0.00020512710078154946, "loss": 0.4493, "step": 34840 }, { "epoch": 0.8844918835907655, "grad_norm": 1.40625, "learning_rate": 0.0002050961972666076, "loss": 0.4143, "step": 34845 }, { "epoch": 0.8846188016397812, "grad_norm": 0.326171875, "learning_rate": 0.0002050652910481173, "loss": 0.4185, "step": 34850 }, { "epoch": 0.8847457196887969, "grad_norm": 0.322265625, "learning_rate": 0.0002050343821275951, "loss": 0.4261, "step": 34855 }, { "epoch": 0.8848726377378127, "grad_norm": 0.333984375, "learning_rate": 0.00020500347050655767, "loss": 0.4581, "step": 34860 }, { "epoch": 0.8849995557868284, "grad_norm": 0.353515625, "learning_rate": 0.00020497255618652188, "loss": 0.4422, "step": 34865 }, { "epoch": 0.8851264738358442, "grad_norm": 0.353515625, "learning_rate": 0.00020494163916900466, "loss": 0.4596, "step": 34870 }, { "epoch": 0.88525339188486, "grad_norm": 0.32421875, "learning_rate": 0.00020491071945552309, "loss": 0.4235, "step": 34875 }, { "epoch": 0.8853803099338757, "grad_norm": 0.39453125, "learning_rate": 0.0002048797970475944, "loss": 0.4387, "step": 34880 }, { "epoch": 0.8855072279828915, "grad_norm": 0.32421875, "learning_rate": 0.00020484887194673586, "loss": 0.4555, "step": 34885 }, { "epoch": 0.8856341460319072, "grad_norm": 0.365234375, "learning_rate": 0.00020481794415446513, "loss": 0.4367, "step": 34890 }, { "epoch": 0.8857610640809229, "grad_norm": 0.365234375, "learning_rate": 0.00020478701367229962, "loss": 0.4573, "step": 34895 }, { "epoch": 0.8858879821299387, "grad_norm": 0.314453125, "learning_rate": 0.00020475608050175723, "loss": 0.4161, "step": 34900 }, { "epoch": 0.8860149001789545, "grad_norm": 0.337890625, "learning_rate": 0.00020472514464435577, "loss": 0.4357, "step": 34905 }, { "epoch": 0.8861418182279702, "grad_norm": 0.330078125, "learning_rate": 0.00020469420610161326, "loss": 0.4382, "step": 34910 }, { "epoch": 0.886268736276986, "grad_norm": 0.373046875, "learning_rate": 0.00020466326487504784, "loss": 0.4678, "step": 34915 }, { "epoch": 0.8863956543260016, "grad_norm": 0.396484375, "learning_rate": 0.00020463232096617776, "loss": 0.4307, "step": 34920 }, { "epoch": 0.8865225723750174, "grad_norm": 0.34765625, "learning_rate": 0.00020460137437652148, "loss": 0.4275, "step": 34925 }, { "epoch": 0.8866494904240332, "grad_norm": 0.345703125, "learning_rate": 0.00020457042510759754, "loss": 0.4524, "step": 34930 }, { "epoch": 0.886776408473049, "grad_norm": 0.345703125, "learning_rate": 0.00020453947316092453, "loss": 0.4726, "step": 34935 }, { "epoch": 0.8869033265220647, "grad_norm": 0.30859375, "learning_rate": 0.0002045085185380213, "loss": 0.4215, "step": 34940 }, { "epoch": 0.8870302445710805, "grad_norm": 0.37109375, "learning_rate": 0.00020447756124040685, "loss": 0.4473, "step": 34945 }, { "epoch": 0.8871571626200963, "grad_norm": 0.328125, "learning_rate": 0.0002044466012696001, "loss": 0.444, "step": 34950 }, { "epoch": 0.8872840806691119, "grad_norm": 0.33984375, "learning_rate": 0.00020441563862712032, "loss": 0.446, "step": 34955 }, { "epoch": 0.8874109987181277, "grad_norm": 0.357421875, "learning_rate": 0.00020438467331448682, "loss": 0.4565, "step": 34960 }, { "epoch": 0.8875379167671434, "grad_norm": 0.34765625, "learning_rate": 0.0002043537053332191, "loss": 0.4111, "step": 34965 }, { "epoch": 0.8876648348161592, "grad_norm": 0.326171875, "learning_rate": 0.00020432273468483665, "loss": 0.4365, "step": 34970 }, { "epoch": 0.887791752865175, "grad_norm": 0.3359375, "learning_rate": 0.0002042917613708593, "loss": 0.4372, "step": 34975 }, { "epoch": 0.8879186709141907, "grad_norm": 0.3359375, "learning_rate": 0.00020426078539280686, "loss": 0.4764, "step": 34980 }, { "epoch": 0.8880455889632065, "grad_norm": 0.34765625, "learning_rate": 0.00020422980675219925, "loss": 0.4448, "step": 34985 }, { "epoch": 0.8881725070122222, "grad_norm": 0.349609375, "learning_rate": 0.00020419882545055665, "loss": 0.4614, "step": 34990 }, { "epoch": 0.8882994250612379, "grad_norm": 0.33984375, "learning_rate": 0.0002041678414893993, "loss": 0.4811, "step": 34995 }, { "epoch": 0.8884263431102537, "grad_norm": 0.357421875, "learning_rate": 0.0002041368548702475, "loss": 0.42, "step": 35000 }, { "epoch": 0.8885532611592695, "grad_norm": 0.333984375, "learning_rate": 0.00020410586559462183, "loss": 0.4172, "step": 35005 }, { "epoch": 0.8886801792082852, "grad_norm": 0.33203125, "learning_rate": 0.00020407487366404283, "loss": 0.4184, "step": 35010 }, { "epoch": 0.888807097257301, "grad_norm": 0.376953125, "learning_rate": 0.00020404387908003134, "loss": 0.4641, "step": 35015 }, { "epoch": 0.8889340153063167, "grad_norm": 0.34765625, "learning_rate": 0.00020401288184410823, "loss": 0.4456, "step": 35020 }, { "epoch": 0.8890609333553324, "grad_norm": 0.330078125, "learning_rate": 0.00020398188195779458, "loss": 0.4466, "step": 35025 }, { "epoch": 0.8891878514043482, "grad_norm": 0.30859375, "learning_rate": 0.00020395087942261144, "loss": 0.3938, "step": 35030 }, { "epoch": 0.889314769453364, "grad_norm": 0.34375, "learning_rate": 0.00020391987424008017, "loss": 0.4595, "step": 35035 }, { "epoch": 0.8894416875023797, "grad_norm": 0.3671875, "learning_rate": 0.00020388886641172212, "loss": 0.4513, "step": 35040 }, { "epoch": 0.8895686055513955, "grad_norm": 0.361328125, "learning_rate": 0.00020385785593905886, "loss": 0.4281, "step": 35045 }, { "epoch": 0.8896955236004113, "grad_norm": 0.33203125, "learning_rate": 0.00020382684282361206, "loss": 0.4527, "step": 35050 }, { "epoch": 0.8898224416494269, "grad_norm": 0.3359375, "learning_rate": 0.0002037958270669035, "loss": 0.4339, "step": 35055 }, { "epoch": 0.8899493596984427, "grad_norm": 0.373046875, "learning_rate": 0.00020376480867045522, "loss": 0.4358, "step": 35060 }, { "epoch": 0.8900762777474585, "grad_norm": 0.341796875, "learning_rate": 0.00020373378763578913, "loss": 0.4453, "step": 35065 }, { "epoch": 0.8902031957964742, "grad_norm": 0.341796875, "learning_rate": 0.00020370276396442752, "loss": 0.4148, "step": 35070 }, { "epoch": 0.89033011384549, "grad_norm": 0.3359375, "learning_rate": 0.00020367173765789264, "loss": 0.4645, "step": 35075 }, { "epoch": 0.8904570318945058, "grad_norm": 0.322265625, "learning_rate": 0.000203640708717707, "loss": 0.45, "step": 35080 }, { "epoch": 0.8905839499435214, "grad_norm": 0.322265625, "learning_rate": 0.0002036096771453931, "loss": 0.434, "step": 35085 }, { "epoch": 0.8907108679925372, "grad_norm": 0.337890625, "learning_rate": 0.00020357864294247375, "loss": 0.4419, "step": 35090 }, { "epoch": 0.890837786041553, "grad_norm": 0.337890625, "learning_rate": 0.0002035476061104717, "loss": 0.4396, "step": 35095 }, { "epoch": 0.8909647040905687, "grad_norm": 0.33984375, "learning_rate": 0.00020351656665090997, "loss": 0.4414, "step": 35100 }, { "epoch": 0.8910916221395845, "grad_norm": 0.337890625, "learning_rate": 0.00020348552456531165, "loss": 0.4556, "step": 35105 }, { "epoch": 0.8912185401886003, "grad_norm": 0.345703125, "learning_rate": 0.0002034544798551999, "loss": 0.467, "step": 35110 }, { "epoch": 0.891345458237616, "grad_norm": 0.353515625, "learning_rate": 0.00020342343252209815, "loss": 0.4487, "step": 35115 }, { "epoch": 0.8914723762866317, "grad_norm": 0.333984375, "learning_rate": 0.00020339238256752984, "loss": 0.419, "step": 35120 }, { "epoch": 0.8915992943356474, "grad_norm": 0.369140625, "learning_rate": 0.0002033613299930186, "loss": 0.4474, "step": 35125 }, { "epoch": 0.8917262123846632, "grad_norm": 0.34765625, "learning_rate": 0.00020333027480008811, "loss": 0.439, "step": 35130 }, { "epoch": 0.891853130433679, "grad_norm": 0.435546875, "learning_rate": 0.0002032992169902623, "loss": 0.4492, "step": 35135 }, { "epoch": 0.8919800484826947, "grad_norm": 0.34375, "learning_rate": 0.00020326815656506519, "loss": 0.4342, "step": 35140 }, { "epoch": 0.8921069665317105, "grad_norm": 0.333984375, "learning_rate": 0.0002032370935260208, "loss": 0.4602, "step": 35145 }, { "epoch": 0.8922338845807263, "grad_norm": 0.32421875, "learning_rate": 0.00020320602787465345, "loss": 0.4574, "step": 35150 }, { "epoch": 0.8923608026297419, "grad_norm": 0.33984375, "learning_rate": 0.00020317495961248748, "loss": 0.4621, "step": 35155 }, { "epoch": 0.8924877206787577, "grad_norm": 0.341796875, "learning_rate": 0.0002031438887410475, "loss": 0.4326, "step": 35160 }, { "epoch": 0.8926146387277735, "grad_norm": 0.30859375, "learning_rate": 0.00020311281526185803, "loss": 0.4113, "step": 35165 }, { "epoch": 0.8927415567767892, "grad_norm": 0.353515625, "learning_rate": 0.00020308173917644382, "loss": 0.4448, "step": 35170 }, { "epoch": 0.892868474825805, "grad_norm": 0.37109375, "learning_rate": 0.00020305066048632983, "loss": 0.4657, "step": 35175 }, { "epoch": 0.8929953928748208, "grad_norm": 0.3515625, "learning_rate": 0.0002030195791930411, "loss": 0.4583, "step": 35180 }, { "epoch": 0.8931223109238364, "grad_norm": 0.30859375, "learning_rate": 0.00020298849529810272, "loss": 0.4444, "step": 35185 }, { "epoch": 0.8932492289728522, "grad_norm": 0.330078125, "learning_rate": 0.00020295740880304, "loss": 0.4441, "step": 35190 }, { "epoch": 0.893376147021868, "grad_norm": 0.357421875, "learning_rate": 0.00020292631970937836, "loss": 0.4607, "step": 35195 }, { "epoch": 0.8935030650708837, "grad_norm": 0.330078125, "learning_rate": 0.00020289522801864325, "loss": 0.428, "step": 35200 }, { "epoch": 0.8936299831198995, "grad_norm": 0.36328125, "learning_rate": 0.00020286413373236034, "loss": 0.4348, "step": 35205 }, { "epoch": 0.8937569011689153, "grad_norm": 0.3125, "learning_rate": 0.00020283303685205548, "loss": 0.4393, "step": 35210 }, { "epoch": 0.893883819217931, "grad_norm": 0.330078125, "learning_rate": 0.00020280193737925452, "loss": 0.4683, "step": 35215 }, { "epoch": 0.8940107372669467, "grad_norm": 0.349609375, "learning_rate": 0.00020277083531548354, "loss": 0.4409, "step": 35220 }, { "epoch": 0.8941376553159625, "grad_norm": 0.34765625, "learning_rate": 0.00020273973066226876, "loss": 0.4678, "step": 35225 }, { "epoch": 0.8942645733649782, "grad_norm": 0.3515625, "learning_rate": 0.00020270862342113634, "loss": 0.4379, "step": 35230 }, { "epoch": 0.894391491413994, "grad_norm": 0.333984375, "learning_rate": 0.0002026775135936128, "loss": 0.4456, "step": 35235 }, { "epoch": 0.8945184094630098, "grad_norm": 0.34375, "learning_rate": 0.0002026464011812246, "loss": 0.4724, "step": 35240 }, { "epoch": 0.8946453275120255, "grad_norm": 0.3359375, "learning_rate": 0.0002026152861854985, "loss": 0.4381, "step": 35245 }, { "epoch": 0.8947722455610413, "grad_norm": 0.375, "learning_rate": 0.00020258416860796124, "loss": 0.4778, "step": 35250 }, { "epoch": 0.894899163610057, "grad_norm": 0.337890625, "learning_rate": 0.00020255304845013977, "loss": 0.4394, "step": 35255 }, { "epoch": 0.8950260816590727, "grad_norm": 0.3671875, "learning_rate": 0.00020252192571356117, "loss": 0.4606, "step": 35260 }, { "epoch": 0.8951529997080885, "grad_norm": 0.349609375, "learning_rate": 0.0002024908003997526, "loss": 0.4508, "step": 35265 }, { "epoch": 0.8952799177571042, "grad_norm": 0.34375, "learning_rate": 0.0002024596725102413, "loss": 0.429, "step": 35270 }, { "epoch": 0.89540683580612, "grad_norm": 0.376953125, "learning_rate": 0.00020242854204655485, "loss": 0.4925, "step": 35275 }, { "epoch": 0.8955337538551358, "grad_norm": 0.33984375, "learning_rate": 0.0002023974090102207, "loss": 0.4359, "step": 35280 }, { "epoch": 0.8956606719041514, "grad_norm": 0.34375, "learning_rate": 0.00020236627340276657, "loss": 0.4457, "step": 35285 }, { "epoch": 0.8957875899531672, "grad_norm": 0.357421875, "learning_rate": 0.00020233513522572024, "loss": 0.4473, "step": 35290 }, { "epoch": 0.895914508002183, "grad_norm": 0.34375, "learning_rate": 0.0002023039944806097, "loss": 0.4264, "step": 35295 }, { "epoch": 0.8960414260511987, "grad_norm": 0.328125, "learning_rate": 0.00020227285116896295, "loss": 0.436, "step": 35300 }, { "epoch": 0.8961683441002145, "grad_norm": 0.337890625, "learning_rate": 0.00020224170529230823, "loss": 0.4367, "step": 35305 }, { "epoch": 0.8962952621492303, "grad_norm": 0.37890625, "learning_rate": 0.00020221055685217386, "loss": 0.4584, "step": 35310 }, { "epoch": 0.896422180198246, "grad_norm": 0.3671875, "learning_rate": 0.00020217940585008827, "loss": 0.4529, "step": 35315 }, { "epoch": 0.8965490982472617, "grad_norm": 0.345703125, "learning_rate": 0.00020214825228758, "loss": 0.4161, "step": 35320 }, { "epoch": 0.8966760162962775, "grad_norm": 0.357421875, "learning_rate": 0.00020211709616617778, "loss": 0.4631, "step": 35325 }, { "epoch": 0.8968029343452932, "grad_norm": 0.337890625, "learning_rate": 0.0002020859374874104, "loss": 0.4549, "step": 35330 }, { "epoch": 0.896929852394309, "grad_norm": 0.330078125, "learning_rate": 0.00020205477625280678, "loss": 0.4535, "step": 35335 }, { "epoch": 0.8970567704433248, "grad_norm": 0.36328125, "learning_rate": 0.00020202361246389605, "loss": 0.4213, "step": 35340 }, { "epoch": 0.8971836884923405, "grad_norm": 0.33203125, "learning_rate": 0.00020199244612220739, "loss": 0.4408, "step": 35345 }, { "epoch": 0.8973106065413562, "grad_norm": 0.361328125, "learning_rate": 0.0002019612772292701, "loss": 0.4473, "step": 35350 }, { "epoch": 0.897437524590372, "grad_norm": 0.3359375, "learning_rate": 0.00020193010578661367, "loss": 0.4662, "step": 35355 }, { "epoch": 0.8975644426393877, "grad_norm": 0.349609375, "learning_rate": 0.0002018989317957676, "loss": 0.4366, "step": 35360 }, { "epoch": 0.8976913606884035, "grad_norm": 0.34375, "learning_rate": 0.00020186775525826158, "loss": 0.4314, "step": 35365 }, { "epoch": 0.8978182787374193, "grad_norm": 0.318359375, "learning_rate": 0.00020183657617562552, "loss": 0.4342, "step": 35370 }, { "epoch": 0.897945196786435, "grad_norm": 0.33203125, "learning_rate": 0.0002018053945493893, "loss": 0.4449, "step": 35375 }, { "epoch": 0.8980721148354508, "grad_norm": 0.326171875, "learning_rate": 0.00020177421038108302, "loss": 0.4413, "step": 35380 }, { "epoch": 0.8981990328844665, "grad_norm": 0.330078125, "learning_rate": 0.00020174302367223688, "loss": 0.4458, "step": 35385 }, { "epoch": 0.8983259509334822, "grad_norm": 0.34765625, "learning_rate": 0.00020171183442438113, "loss": 0.4699, "step": 35390 }, { "epoch": 0.898452868982498, "grad_norm": 0.341796875, "learning_rate": 0.00020168064263904626, "loss": 0.4661, "step": 35395 }, { "epoch": 0.8985797870315138, "grad_norm": 0.3203125, "learning_rate": 0.00020164944831776288, "loss": 0.4474, "step": 35400 }, { "epoch": 0.8987067050805295, "grad_norm": 0.33984375, "learning_rate": 0.00020161825146206166, "loss": 0.459, "step": 35405 }, { "epoch": 0.8988336231295453, "grad_norm": 0.337890625, "learning_rate": 0.00020158705207347342, "loss": 0.4559, "step": 35410 }, { "epoch": 0.8989605411785611, "grad_norm": 0.361328125, "learning_rate": 0.000201555850153529, "loss": 0.4631, "step": 35415 }, { "epoch": 0.8990874592275767, "grad_norm": 0.37109375, "learning_rate": 0.0002015246457037596, "loss": 0.4442, "step": 35420 }, { "epoch": 0.8992143772765925, "grad_norm": 0.33203125, "learning_rate": 0.00020149343872569633, "loss": 0.467, "step": 35425 }, { "epoch": 0.8993412953256082, "grad_norm": 0.32421875, "learning_rate": 0.00020146222922087057, "loss": 0.4185, "step": 35430 }, { "epoch": 0.899468213374624, "grad_norm": 0.333984375, "learning_rate": 0.0002014310171908137, "loss": 0.4113, "step": 35435 }, { "epoch": 0.8995951314236398, "grad_norm": 0.345703125, "learning_rate": 0.00020139980263705732, "loss": 0.4465, "step": 35440 }, { "epoch": 0.8997220494726555, "grad_norm": 0.341796875, "learning_rate": 0.0002013685855611331, "loss": 0.4286, "step": 35445 }, { "epoch": 0.8998489675216712, "grad_norm": 0.326171875, "learning_rate": 0.00020133736596457286, "loss": 0.4269, "step": 35450 }, { "epoch": 0.899975885570687, "grad_norm": 0.32421875, "learning_rate": 0.00020130614384890848, "loss": 0.4593, "step": 35455 }, { "epoch": 0.9001028036197027, "grad_norm": 0.328125, "learning_rate": 0.00020127491921567207, "loss": 0.4719, "step": 35460 }, { "epoch": 0.9002297216687185, "grad_norm": 0.3359375, "learning_rate": 0.00020124369206639582, "loss": 0.4337, "step": 35465 }, { "epoch": 0.9003566397177343, "grad_norm": 0.298828125, "learning_rate": 0.00020121246240261198, "loss": 0.4564, "step": 35470 }, { "epoch": 0.90048355776675, "grad_norm": 0.34765625, "learning_rate": 0.00020118123022585307, "loss": 0.4558, "step": 35475 }, { "epoch": 0.9006104758157658, "grad_norm": 0.3671875, "learning_rate": 0.00020114999553765152, "loss": 0.4728, "step": 35480 }, { "epoch": 0.9007373938647815, "grad_norm": 0.35546875, "learning_rate": 0.0002011187583395401, "loss": 0.4503, "step": 35485 }, { "epoch": 0.9008643119137972, "grad_norm": 0.337890625, "learning_rate": 0.00020108751863305154, "loss": 0.4339, "step": 35490 }, { "epoch": 0.900991229962813, "grad_norm": 0.359375, "learning_rate": 0.00020105627641971881, "loss": 0.4778, "step": 35495 }, { "epoch": 0.9011181480118288, "grad_norm": 0.33203125, "learning_rate": 0.00020102503170107492, "loss": 0.4394, "step": 35500 }, { "epoch": 0.9012450660608445, "grad_norm": 0.32421875, "learning_rate": 0.00020099378447865308, "loss": 0.4486, "step": 35505 }, { "epoch": 0.9013719841098603, "grad_norm": 0.328125, "learning_rate": 0.00020096253475398654, "loss": 0.4501, "step": 35510 }, { "epoch": 0.901498902158876, "grad_norm": 0.33203125, "learning_rate": 0.00020093128252860872, "loss": 0.4416, "step": 35515 }, { "epoch": 0.9016258202078917, "grad_norm": 0.3203125, "learning_rate": 0.0002009000278040531, "loss": 0.4519, "step": 35520 }, { "epoch": 0.9017527382569075, "grad_norm": 0.3359375, "learning_rate": 0.00020086877058185347, "loss": 0.4734, "step": 35525 }, { "epoch": 0.9018796563059233, "grad_norm": 0.328125, "learning_rate": 0.00020083751086354349, "loss": 0.4614, "step": 35530 }, { "epoch": 0.902006574354939, "grad_norm": 0.388671875, "learning_rate": 0.00020080624865065713, "loss": 0.4911, "step": 35535 }, { "epoch": 0.9021334924039548, "grad_norm": 0.33203125, "learning_rate": 0.00020077498394472835, "loss": 0.4383, "step": 35540 }, { "epoch": 0.9022604104529706, "grad_norm": 0.359375, "learning_rate": 0.00020074371674729132, "loss": 0.4724, "step": 35545 }, { "epoch": 0.9023873285019862, "grad_norm": 0.353515625, "learning_rate": 0.00020071244705988038, "loss": 0.4109, "step": 35550 }, { "epoch": 0.902514246551002, "grad_norm": 0.349609375, "learning_rate": 0.00020068117488402978, "loss": 0.4848, "step": 35555 }, { "epoch": 0.9026411646000178, "grad_norm": 0.326171875, "learning_rate": 0.00020064990022127422, "loss": 0.4208, "step": 35560 }, { "epoch": 0.9027680826490335, "grad_norm": 0.34765625, "learning_rate": 0.0002006186230731482, "loss": 0.4438, "step": 35565 }, { "epoch": 0.9028950006980493, "grad_norm": 0.35546875, "learning_rate": 0.00020058734344118647, "loss": 0.4651, "step": 35570 }, { "epoch": 0.903021918747065, "grad_norm": 0.333984375, "learning_rate": 0.00020055606132692396, "loss": 0.4256, "step": 35575 }, { "epoch": 0.9031488367960808, "grad_norm": 0.353515625, "learning_rate": 0.00020052477673189567, "loss": 0.448, "step": 35580 }, { "epoch": 0.9032757548450965, "grad_norm": 0.337890625, "learning_rate": 0.0002004934896576367, "loss": 0.444, "step": 35585 }, { "epoch": 0.9034026728941122, "grad_norm": 0.345703125, "learning_rate": 0.00020046220010568232, "loss": 0.4225, "step": 35590 }, { "epoch": 0.903529590943128, "grad_norm": 0.353515625, "learning_rate": 0.0002004309080775679, "loss": 0.4337, "step": 35595 }, { "epoch": 0.9036565089921438, "grad_norm": 0.328125, "learning_rate": 0.0002003996135748289, "loss": 0.4333, "step": 35600 }, { "epoch": 0.9037834270411595, "grad_norm": 0.34375, "learning_rate": 0.00020036831659900093, "loss": 0.4645, "step": 35605 }, { "epoch": 0.9039103450901753, "grad_norm": 0.341796875, "learning_rate": 0.00020033701715161973, "loss": 0.4488, "step": 35610 }, { "epoch": 0.904037263139191, "grad_norm": 0.333984375, "learning_rate": 0.00020030571523422115, "loss": 0.4457, "step": 35615 }, { "epoch": 0.9041641811882067, "grad_norm": 0.349609375, "learning_rate": 0.00020027441084834115, "loss": 0.4185, "step": 35620 }, { "epoch": 0.9042910992372225, "grad_norm": 0.322265625, "learning_rate": 0.00020024310399551583, "loss": 0.4551, "step": 35625 }, { "epoch": 0.9044180172862383, "grad_norm": 0.337890625, "learning_rate": 0.00020021179467728144, "loss": 0.4388, "step": 35630 }, { "epoch": 0.904544935335254, "grad_norm": 0.349609375, "learning_rate": 0.0002001804828951743, "loss": 0.4395, "step": 35635 }, { "epoch": 0.9046718533842698, "grad_norm": 0.361328125, "learning_rate": 0.00020014916865073083, "loss": 0.4473, "step": 35640 }, { "epoch": 0.9047987714332856, "grad_norm": 0.34375, "learning_rate": 0.00020011785194548764, "loss": 0.4302, "step": 35645 }, { "epoch": 0.9049256894823012, "grad_norm": 0.353515625, "learning_rate": 0.00020008653278098137, "loss": 0.4344, "step": 35650 }, { "epoch": 0.905052607531317, "grad_norm": 0.33984375, "learning_rate": 0.00020005521115874891, "loss": 0.4428, "step": 35655 }, { "epoch": 0.9051795255803328, "grad_norm": 0.341796875, "learning_rate": 0.00020002388708032718, "loss": 0.4528, "step": 35660 }, { "epoch": 0.9053064436293485, "grad_norm": 0.361328125, "learning_rate": 0.00019999256054725327, "loss": 0.4321, "step": 35665 }, { "epoch": 0.9054333616783643, "grad_norm": 0.34765625, "learning_rate": 0.00019996123156106431, "loss": 0.4527, "step": 35670 }, { "epoch": 0.9055602797273801, "grad_norm": 0.337890625, "learning_rate": 0.00019992990012329762, "loss": 0.4612, "step": 35675 }, { "epoch": 0.9056871977763958, "grad_norm": 0.357421875, "learning_rate": 0.0001998985662354906, "loss": 0.4548, "step": 35680 }, { "epoch": 0.9058141158254115, "grad_norm": 0.34375, "learning_rate": 0.00019986722989918083, "loss": 0.4134, "step": 35685 }, { "epoch": 0.9059410338744273, "grad_norm": 0.3359375, "learning_rate": 0.00019983589111590596, "loss": 0.4229, "step": 35690 }, { "epoch": 0.906067951923443, "grad_norm": 0.35546875, "learning_rate": 0.00019980454988720376, "loss": 0.4325, "step": 35695 }, { "epoch": 0.9061948699724588, "grad_norm": 0.357421875, "learning_rate": 0.0001997732062146121, "loss": 0.4544, "step": 35700 }, { "epoch": 0.9063217880214746, "grad_norm": 0.37890625, "learning_rate": 0.00019974186009966908, "loss": 0.4702, "step": 35705 }, { "epoch": 0.9064487060704903, "grad_norm": 0.318359375, "learning_rate": 0.00019971051154391278, "loss": 0.4729, "step": 35710 }, { "epoch": 0.906575624119506, "grad_norm": 0.34765625, "learning_rate": 0.00019967916054888148, "loss": 0.4444, "step": 35715 }, { "epoch": 0.9067025421685218, "grad_norm": 0.330078125, "learning_rate": 0.00019964780711611356, "loss": 0.4644, "step": 35720 }, { "epoch": 0.9068294602175375, "grad_norm": 0.380859375, "learning_rate": 0.00019961645124714757, "loss": 0.4448, "step": 35725 }, { "epoch": 0.9069563782665533, "grad_norm": 0.32421875, "learning_rate": 0.00019958509294352203, "loss": 0.4366, "step": 35730 }, { "epoch": 0.907083296315569, "grad_norm": 0.353515625, "learning_rate": 0.00019955373220677575, "loss": 0.4293, "step": 35735 }, { "epoch": 0.9072102143645848, "grad_norm": 0.361328125, "learning_rate": 0.00019952236903844755, "loss": 0.4274, "step": 35740 }, { "epoch": 0.9073371324136006, "grad_norm": 0.359375, "learning_rate": 0.00019949100344007642, "loss": 0.4307, "step": 35745 }, { "epoch": 0.9074640504626162, "grad_norm": 0.333984375, "learning_rate": 0.00019945963541320152, "loss": 0.4532, "step": 35750 }, { "epoch": 0.907590968511632, "grad_norm": 0.294921875, "learning_rate": 0.00019942826495936196, "loss": 0.4198, "step": 35755 }, { "epoch": 0.9077178865606478, "grad_norm": 0.3046875, "learning_rate": 0.00019939689208009717, "loss": 0.4192, "step": 35760 }, { "epoch": 0.9078448046096635, "grad_norm": 0.380859375, "learning_rate": 0.00019936551677694655, "loss": 0.5054, "step": 35765 }, { "epoch": 0.9079717226586793, "grad_norm": 0.345703125, "learning_rate": 0.00019933413905144965, "loss": 0.469, "step": 35770 }, { "epoch": 0.9080986407076951, "grad_norm": 0.345703125, "learning_rate": 0.00019930275890514621, "loss": 0.4483, "step": 35775 }, { "epoch": 0.9082255587567107, "grad_norm": 0.33203125, "learning_rate": 0.00019927137633957606, "loss": 0.4455, "step": 35780 }, { "epoch": 0.9083524768057265, "grad_norm": 0.33203125, "learning_rate": 0.00019923999135627908, "loss": 0.46, "step": 35785 }, { "epoch": 0.9084793948547423, "grad_norm": 0.3359375, "learning_rate": 0.0001992086039567953, "loss": 0.4402, "step": 35790 }, { "epoch": 0.908606312903758, "grad_norm": 0.3515625, "learning_rate": 0.000199177214142665, "loss": 0.4374, "step": 35795 }, { "epoch": 0.9087332309527738, "grad_norm": 0.376953125, "learning_rate": 0.0001991458219154284, "loss": 0.4525, "step": 35800 }, { "epoch": 0.9088601490017896, "grad_norm": 0.3359375, "learning_rate": 0.0001991144272766258, "loss": 0.4484, "step": 35805 }, { "epoch": 0.9089870670508053, "grad_norm": 0.361328125, "learning_rate": 0.0001990830302277979, "loss": 0.4747, "step": 35810 }, { "epoch": 0.909113985099821, "grad_norm": 0.34375, "learning_rate": 0.00019905163077048522, "loss": 0.4623, "step": 35815 }, { "epoch": 0.9092409031488368, "grad_norm": 0.330078125, "learning_rate": 0.00019902022890622856, "loss": 0.436, "step": 35820 }, { "epoch": 0.9093678211978525, "grad_norm": 0.36328125, "learning_rate": 0.0001989888246365688, "loss": 0.4325, "step": 35825 }, { "epoch": 0.9094947392468683, "grad_norm": 0.373046875, "learning_rate": 0.00019895741796304694, "loss": 0.4455, "step": 35830 }, { "epoch": 0.9096216572958841, "grad_norm": 0.326171875, "learning_rate": 0.00019892600888720408, "loss": 0.4457, "step": 35835 }, { "epoch": 0.9097485753448998, "grad_norm": 0.33203125, "learning_rate": 0.00019889459741058142, "loss": 0.4396, "step": 35840 }, { "epoch": 0.9098754933939156, "grad_norm": 0.345703125, "learning_rate": 0.00019886318353472038, "loss": 0.435, "step": 35845 }, { "epoch": 0.9100024114429313, "grad_norm": 0.34375, "learning_rate": 0.0001988317672611624, "loss": 0.4528, "step": 35850 }, { "epoch": 0.910129329491947, "grad_norm": 0.35546875, "learning_rate": 0.00019880034859144904, "loss": 0.4657, "step": 35855 }, { "epoch": 0.9102562475409628, "grad_norm": 0.318359375, "learning_rate": 0.00019876892752712197, "loss": 0.4293, "step": 35860 }, { "epoch": 0.9103831655899786, "grad_norm": 0.34375, "learning_rate": 0.00019873750406972307, "loss": 0.4811, "step": 35865 }, { "epoch": 0.9105100836389943, "grad_norm": 0.349609375, "learning_rate": 0.00019870607822079425, "loss": 0.4664, "step": 35870 }, { "epoch": 0.9106370016880101, "grad_norm": 0.322265625, "learning_rate": 0.00019867464998187762, "loss": 0.4341, "step": 35875 }, { "epoch": 0.9107639197370258, "grad_norm": 0.3671875, "learning_rate": 0.0001986432193545153, "loss": 0.4686, "step": 35880 }, { "epoch": 0.9108908377860415, "grad_norm": 0.365234375, "learning_rate": 0.00019861178634024957, "loss": 0.4466, "step": 35885 }, { "epoch": 0.9110177558350573, "grad_norm": 0.33203125, "learning_rate": 0.00019858035094062285, "loss": 0.439, "step": 35890 }, { "epoch": 0.911144673884073, "grad_norm": 0.33203125, "learning_rate": 0.0001985489131571777, "loss": 0.4452, "step": 35895 }, { "epoch": 0.9112715919330888, "grad_norm": 0.353515625, "learning_rate": 0.00019851747299145663, "loss": 0.4291, "step": 35900 }, { "epoch": 0.9113985099821046, "grad_norm": 0.35546875, "learning_rate": 0.0001984860304450025, "loss": 0.4619, "step": 35905 }, { "epoch": 0.9115254280311204, "grad_norm": 0.333984375, "learning_rate": 0.0001984545855193582, "loss": 0.4263, "step": 35910 }, { "epoch": 0.911652346080136, "grad_norm": 0.333984375, "learning_rate": 0.00019842313821606672, "loss": 0.4644, "step": 35915 }, { "epoch": 0.9117792641291518, "grad_norm": 0.353515625, "learning_rate": 0.00019839168853667114, "loss": 0.4445, "step": 35920 }, { "epoch": 0.9119061821781675, "grad_norm": 0.3359375, "learning_rate": 0.00019836023648271464, "loss": 0.4352, "step": 35925 }, { "epoch": 0.9120331002271833, "grad_norm": 0.365234375, "learning_rate": 0.00019832878205574056, "loss": 0.4586, "step": 35930 }, { "epoch": 0.9121600182761991, "grad_norm": 0.32421875, "learning_rate": 0.00019829732525729248, "loss": 0.4414, "step": 35935 }, { "epoch": 0.9122869363252148, "grad_norm": 0.330078125, "learning_rate": 0.00019826586608891384, "loss": 0.4365, "step": 35940 }, { "epoch": 0.9124138543742305, "grad_norm": 0.353515625, "learning_rate": 0.00019823440455214839, "loss": 0.432, "step": 35945 }, { "epoch": 0.9125407724232463, "grad_norm": 0.330078125, "learning_rate": 0.0001982029406485399, "loss": 0.4424, "step": 35950 }, { "epoch": 0.912667690472262, "grad_norm": 0.357421875, "learning_rate": 0.00019817147437963236, "loss": 0.4775, "step": 35955 }, { "epoch": 0.9127946085212778, "grad_norm": 0.318359375, "learning_rate": 0.00019814000574696974, "loss": 0.4508, "step": 35960 }, { "epoch": 0.9129215265702936, "grad_norm": 0.357421875, "learning_rate": 0.0001981085347520962, "loss": 0.453, "step": 35965 }, { "epoch": 0.9130484446193093, "grad_norm": 0.376953125, "learning_rate": 0.00019807706139655603, "loss": 0.4248, "step": 35970 }, { "epoch": 0.9131753626683251, "grad_norm": 0.330078125, "learning_rate": 0.00019804558568189363, "loss": 0.4421, "step": 35975 }, { "epoch": 0.9133022807173408, "grad_norm": 0.33203125, "learning_rate": 0.00019801410760965347, "loss": 0.4368, "step": 35980 }, { "epoch": 0.9134291987663565, "grad_norm": 0.37109375, "learning_rate": 0.00019798262718138013, "loss": 0.4822, "step": 35985 }, { "epoch": 0.9135561168153723, "grad_norm": 0.310546875, "learning_rate": 0.00019795114439861844, "loss": 0.4441, "step": 35990 }, { "epoch": 0.9136830348643881, "grad_norm": 0.328125, "learning_rate": 0.00019791965926291315, "loss": 0.4384, "step": 35995 }, { "epoch": 0.9138099529134038, "grad_norm": 0.3125, "learning_rate": 0.00019788817177580933, "loss": 0.425, "step": 36000 }, { "epoch": 0.9139368709624196, "grad_norm": 0.337890625, "learning_rate": 0.00019785668193885196, "loss": 0.4408, "step": 36005 }, { "epoch": 0.9140637890114354, "grad_norm": 0.34765625, "learning_rate": 0.0001978251897535863, "loss": 0.451, "step": 36010 }, { "epoch": 0.914190707060451, "grad_norm": 0.333984375, "learning_rate": 0.00019779369522155758, "loss": 0.4148, "step": 36015 }, { "epoch": 0.9143176251094668, "grad_norm": 0.341796875, "learning_rate": 0.0001977621983443113, "loss": 0.4557, "step": 36020 }, { "epoch": 0.9144445431584826, "grad_norm": 0.32421875, "learning_rate": 0.00019773069912339296, "loss": 0.4191, "step": 36025 }, { "epoch": 0.9145714612074983, "grad_norm": 0.32421875, "learning_rate": 0.00019769919756034823, "loss": 0.4731, "step": 36030 }, { "epoch": 0.9146983792565141, "grad_norm": 0.365234375, "learning_rate": 0.0001976676936567229, "loss": 0.4398, "step": 36035 }, { "epoch": 0.9148252973055299, "grad_norm": 0.349609375, "learning_rate": 0.00019763618741406284, "loss": 0.4499, "step": 36040 }, { "epoch": 0.9149522153545455, "grad_norm": 0.306640625, "learning_rate": 0.000197604678833914, "loss": 0.4288, "step": 36045 }, { "epoch": 0.9150791334035613, "grad_norm": 0.37109375, "learning_rate": 0.00019757316791782256, "loss": 0.4704, "step": 36050 }, { "epoch": 0.915206051452577, "grad_norm": 0.357421875, "learning_rate": 0.0001975416546673347, "loss": 0.4873, "step": 36055 }, { "epoch": 0.9153329695015928, "grad_norm": 0.345703125, "learning_rate": 0.00019751013908399679, "loss": 0.4371, "step": 36060 }, { "epoch": 0.9154598875506086, "grad_norm": 0.330078125, "learning_rate": 0.00019747862116935525, "loss": 0.4521, "step": 36065 }, { "epoch": 0.9155868055996244, "grad_norm": 0.359375, "learning_rate": 0.00019744710092495671, "loss": 0.4512, "step": 36070 }, { "epoch": 0.9157137236486401, "grad_norm": 0.330078125, "learning_rate": 0.00019741557835234786, "loss": 0.4387, "step": 36075 }, { "epoch": 0.9158406416976558, "grad_norm": 0.330078125, "learning_rate": 0.00019738405345307545, "loss": 0.4839, "step": 36080 }, { "epoch": 0.9159675597466715, "grad_norm": 0.333984375, "learning_rate": 0.0001973525262286864, "loss": 0.4522, "step": 36085 }, { "epoch": 0.9160944777956873, "grad_norm": 0.34765625, "learning_rate": 0.0001973209966807277, "loss": 0.4173, "step": 36090 }, { "epoch": 0.9162213958447031, "grad_norm": 0.33203125, "learning_rate": 0.00019728946481074663, "loss": 0.476, "step": 36095 }, { "epoch": 0.9163483138937188, "grad_norm": 0.3203125, "learning_rate": 0.00019725793062029033, "loss": 0.4233, "step": 36100 }, { "epoch": 0.9164752319427346, "grad_norm": 0.3515625, "learning_rate": 0.00019722639411090615, "loss": 0.4592, "step": 36105 }, { "epoch": 0.9166021499917504, "grad_norm": 0.34765625, "learning_rate": 0.0001971948552841417, "loss": 0.4411, "step": 36110 }, { "epoch": 0.916729068040766, "grad_norm": 0.341796875, "learning_rate": 0.0001971633141415445, "loss": 0.4644, "step": 36115 }, { "epoch": 0.9168559860897818, "grad_norm": 0.330078125, "learning_rate": 0.00019713177068466219, "loss": 0.4311, "step": 36120 }, { "epoch": 0.9169829041387976, "grad_norm": 0.353515625, "learning_rate": 0.00019710022491504272, "loss": 0.4445, "step": 36125 }, { "epoch": 0.9171098221878133, "grad_norm": 0.365234375, "learning_rate": 0.00019706867683423393, "loss": 0.456, "step": 36130 }, { "epoch": 0.9172367402368291, "grad_norm": 0.3515625, "learning_rate": 0.00019703712644378394, "loss": 0.4476, "step": 36135 }, { "epoch": 0.9173636582858449, "grad_norm": 0.3515625, "learning_rate": 0.00019700557374524095, "loss": 0.4715, "step": 36140 }, { "epoch": 0.9174905763348605, "grad_norm": 0.337890625, "learning_rate": 0.0001969740187401531, "loss": 0.4415, "step": 36145 }, { "epoch": 0.9176174943838763, "grad_norm": 0.341796875, "learning_rate": 0.00019694246143006888, "loss": 0.4424, "step": 36150 }, { "epoch": 0.9177444124328921, "grad_norm": 0.3203125, "learning_rate": 0.0001969109018165368, "loss": 0.4376, "step": 36155 }, { "epoch": 0.9178713304819078, "grad_norm": 0.34375, "learning_rate": 0.00019687933990110543, "loss": 0.4709, "step": 36160 }, { "epoch": 0.9179982485309236, "grad_norm": 0.333984375, "learning_rate": 0.00019684777568532358, "loss": 0.4723, "step": 36165 }, { "epoch": 0.9181251665799394, "grad_norm": 0.318359375, "learning_rate": 0.00019681620917074, "loss": 0.4067, "step": 36170 }, { "epoch": 0.9182520846289551, "grad_norm": 0.33984375, "learning_rate": 0.00019678464035890368, "loss": 0.4718, "step": 36175 }, { "epoch": 0.9183790026779708, "grad_norm": 0.359375, "learning_rate": 0.00019675306925136368, "loss": 0.4405, "step": 36180 }, { "epoch": 0.9185059207269866, "grad_norm": 0.341796875, "learning_rate": 0.0001967214958496692, "loss": 0.4463, "step": 36185 }, { "epoch": 0.9186328387760023, "grad_norm": 0.33984375, "learning_rate": 0.00019668992015536954, "loss": 0.4261, "step": 36190 }, { "epoch": 0.9187597568250181, "grad_norm": 0.345703125, "learning_rate": 0.00019665834217001408, "loss": 0.4266, "step": 36195 }, { "epoch": 0.9188866748740339, "grad_norm": 0.376953125, "learning_rate": 0.0001966267618951524, "loss": 0.4547, "step": 36200 }, { "epoch": 0.9190135929230496, "grad_norm": 0.34765625, "learning_rate": 0.00019659517933233407, "loss": 0.4736, "step": 36205 }, { "epoch": 0.9191405109720653, "grad_norm": 0.3359375, "learning_rate": 0.00019656359448310884, "loss": 0.4411, "step": 36210 }, { "epoch": 0.919267429021081, "grad_norm": 0.337890625, "learning_rate": 0.00019653200734902655, "loss": 0.4472, "step": 36215 }, { "epoch": 0.9193943470700968, "grad_norm": 0.345703125, "learning_rate": 0.00019650041793163722, "loss": 0.4598, "step": 36220 }, { "epoch": 0.9195212651191126, "grad_norm": 0.302734375, "learning_rate": 0.00019646882623249094, "loss": 0.4464, "step": 36225 }, { "epoch": 0.9196481831681284, "grad_norm": 0.359375, "learning_rate": 0.0001964372322531378, "loss": 0.4477, "step": 36230 }, { "epoch": 0.9197751012171441, "grad_norm": 0.333984375, "learning_rate": 0.00019640563599512823, "loss": 0.426, "step": 36235 }, { "epoch": 0.9199020192661599, "grad_norm": 0.330078125, "learning_rate": 0.00019637403746001255, "loss": 0.4252, "step": 36240 }, { "epoch": 0.9200289373151755, "grad_norm": 0.357421875, "learning_rate": 0.00019634243664934134, "loss": 0.4583, "step": 36245 }, { "epoch": 0.9201558553641913, "grad_norm": 0.33203125, "learning_rate": 0.00019631083356466523, "loss": 0.4204, "step": 36250 }, { "epoch": 0.9202827734132071, "grad_norm": 0.34765625, "learning_rate": 0.00019627922820753495, "loss": 0.431, "step": 36255 }, { "epoch": 0.9204096914622228, "grad_norm": 0.341796875, "learning_rate": 0.0001962476205795014, "loss": 0.4524, "step": 36260 }, { "epoch": 0.9205366095112386, "grad_norm": 0.361328125, "learning_rate": 0.0001962160106821155, "loss": 0.4549, "step": 36265 }, { "epoch": 0.9206635275602544, "grad_norm": 0.31640625, "learning_rate": 0.00019618439851692838, "loss": 0.4447, "step": 36270 }, { "epoch": 0.9207904456092701, "grad_norm": 0.3203125, "learning_rate": 0.00019615278408549125, "loss": 0.4049, "step": 36275 }, { "epoch": 0.9209173636582858, "grad_norm": 0.337890625, "learning_rate": 0.0001961211673893553, "loss": 0.4514, "step": 36280 }, { "epoch": 0.9210442817073016, "grad_norm": 0.3671875, "learning_rate": 0.00019608954843007212, "loss": 0.4628, "step": 36285 }, { "epoch": 0.9211711997563173, "grad_norm": 0.345703125, "learning_rate": 0.00019605792720919316, "loss": 0.4364, "step": 36290 }, { "epoch": 0.9212981178053331, "grad_norm": 0.326171875, "learning_rate": 0.00019602630372827002, "loss": 0.4575, "step": 36295 }, { "epoch": 0.9214250358543489, "grad_norm": 0.341796875, "learning_rate": 0.00019599467798885452, "loss": 0.4459, "step": 36300 }, { "epoch": 0.9215519539033646, "grad_norm": 0.365234375, "learning_rate": 0.00019596304999249848, "loss": 0.4562, "step": 36305 }, { "epoch": 0.9216788719523803, "grad_norm": 0.349609375, "learning_rate": 0.00019593141974075384, "loss": 0.4249, "step": 36310 }, { "epoch": 0.9218057900013961, "grad_norm": 0.34375, "learning_rate": 0.00019589978723517278, "loss": 0.4323, "step": 36315 }, { "epoch": 0.9219327080504118, "grad_norm": 0.330078125, "learning_rate": 0.00019586815247730745, "loss": 0.4502, "step": 36320 }, { "epoch": 0.9220596260994276, "grad_norm": 0.345703125, "learning_rate": 0.00019583651546871016, "loss": 0.4513, "step": 36325 }, { "epoch": 0.9221865441484434, "grad_norm": 0.32421875, "learning_rate": 0.00019580487621093332, "loss": 0.4387, "step": 36330 }, { "epoch": 0.9223134621974591, "grad_norm": 0.328125, "learning_rate": 0.0001957732347055294, "loss": 0.4346, "step": 36335 }, { "epoch": 0.9224403802464749, "grad_norm": 0.361328125, "learning_rate": 0.00019574159095405108, "loss": 0.4656, "step": 36340 }, { "epoch": 0.9225672982954906, "grad_norm": 0.341796875, "learning_rate": 0.00019570994495805114, "loss": 0.4214, "step": 36345 }, { "epoch": 0.9226942163445063, "grad_norm": 0.33203125, "learning_rate": 0.0001956782967190824, "loss": 0.4606, "step": 36350 }, { "epoch": 0.9228211343935221, "grad_norm": 0.36328125, "learning_rate": 0.00019564664623869784, "loss": 0.4238, "step": 36355 }, { "epoch": 0.9229480524425379, "grad_norm": 0.3515625, "learning_rate": 0.00019561499351845055, "loss": 0.4372, "step": 36360 }, { "epoch": 0.9230749704915536, "grad_norm": 0.353515625, "learning_rate": 0.0001955833385598937, "loss": 0.4531, "step": 36365 }, { "epoch": 0.9232018885405694, "grad_norm": 0.357421875, "learning_rate": 0.0001955516813645806, "loss": 0.4454, "step": 36370 }, { "epoch": 0.9233288065895852, "grad_norm": 0.37109375, "learning_rate": 0.00019552002193406456, "loss": 0.4732, "step": 36375 }, { "epoch": 0.9234557246386008, "grad_norm": 0.37109375, "learning_rate": 0.00019548836026989924, "loss": 0.4437, "step": 36380 }, { "epoch": 0.9235826426876166, "grad_norm": 0.3515625, "learning_rate": 0.0001954566963736382, "loss": 0.4495, "step": 36385 }, { "epoch": 0.9237095607366324, "grad_norm": 0.33203125, "learning_rate": 0.00019542503024683514, "loss": 0.4654, "step": 36390 }, { "epoch": 0.9238364787856481, "grad_norm": 0.33203125, "learning_rate": 0.00019539336189104396, "loss": 0.4476, "step": 36395 }, { "epoch": 0.9239633968346639, "grad_norm": 0.35546875, "learning_rate": 0.00019536169130781862, "loss": 0.4411, "step": 36400 }, { "epoch": 0.9240903148836797, "grad_norm": 0.3203125, "learning_rate": 0.0001953300184987131, "loss": 0.4093, "step": 36405 }, { "epoch": 0.9242172329326953, "grad_norm": 0.392578125, "learning_rate": 0.0001952983434652817, "loss": 0.4674, "step": 36410 }, { "epoch": 0.9243441509817111, "grad_norm": 0.38671875, "learning_rate": 0.0001952666662090786, "loss": 0.444, "step": 36415 }, { "epoch": 0.9244710690307268, "grad_norm": 0.345703125, "learning_rate": 0.0001952349867316582, "loss": 0.47, "step": 36420 }, { "epoch": 0.9245979870797426, "grad_norm": 0.345703125, "learning_rate": 0.00019520330503457504, "loss": 0.4463, "step": 36425 }, { "epoch": 0.9247249051287584, "grad_norm": 0.341796875, "learning_rate": 0.00019517162111938371, "loss": 0.4383, "step": 36430 }, { "epoch": 0.9248518231777741, "grad_norm": 0.34375, "learning_rate": 0.00019513993498763887, "loss": 0.4412, "step": 36435 }, { "epoch": 0.9249787412267899, "grad_norm": 0.359375, "learning_rate": 0.00019510824664089547, "loss": 0.4655, "step": 36440 }, { "epoch": 0.9251056592758056, "grad_norm": 0.3828125, "learning_rate": 0.00019507655608070836, "loss": 0.4474, "step": 36445 }, { "epoch": 0.9252325773248213, "grad_norm": 0.357421875, "learning_rate": 0.00019504486330863263, "loss": 0.4631, "step": 36450 }, { "epoch": 0.9253594953738371, "grad_norm": 0.333984375, "learning_rate": 0.00019501316832622336, "loss": 0.4204, "step": 36455 }, { "epoch": 0.9254864134228529, "grad_norm": 0.349609375, "learning_rate": 0.00019498147113503587, "loss": 0.4385, "step": 36460 }, { "epoch": 0.9256133314718686, "grad_norm": 0.341796875, "learning_rate": 0.00019494977173662546, "loss": 0.4515, "step": 36465 }, { "epoch": 0.9257402495208844, "grad_norm": 0.3359375, "learning_rate": 0.00019491807013254772, "loss": 0.4258, "step": 36470 }, { "epoch": 0.9258671675699001, "grad_norm": 0.333984375, "learning_rate": 0.00019488636632435813, "loss": 0.439, "step": 36475 }, { "epoch": 0.9259940856189158, "grad_norm": 0.33984375, "learning_rate": 0.0001948546603136124, "loss": 0.4298, "step": 36480 }, { "epoch": 0.9261210036679316, "grad_norm": 0.333984375, "learning_rate": 0.00019482295210186644, "loss": 0.4421, "step": 36485 }, { "epoch": 0.9262479217169474, "grad_norm": 0.37890625, "learning_rate": 0.00019479124169067603, "loss": 0.4518, "step": 36490 }, { "epoch": 0.9263748397659631, "grad_norm": 0.3125, "learning_rate": 0.00019475952908159724, "loss": 0.4556, "step": 36495 }, { "epoch": 0.9265017578149789, "grad_norm": 0.375, "learning_rate": 0.00019472781427618615, "loss": 0.4556, "step": 36500 }, { "epoch": 0.9266286758639947, "grad_norm": 0.3359375, "learning_rate": 0.00019469609727599909, "loss": 0.428, "step": 36505 }, { "epoch": 0.9267555939130103, "grad_norm": 0.337890625, "learning_rate": 0.00019466437808259224, "loss": 0.4496, "step": 36510 }, { "epoch": 0.9268825119620261, "grad_norm": 0.3203125, "learning_rate": 0.00019463265669752226, "loss": 0.3972, "step": 36515 }, { "epoch": 0.9270094300110419, "grad_norm": 0.353515625, "learning_rate": 0.00019460093312234555, "loss": 0.4576, "step": 36520 }, { "epoch": 0.9271363480600576, "grad_norm": 0.345703125, "learning_rate": 0.00019456920735861884, "loss": 0.4478, "step": 36525 }, { "epoch": 0.9272632661090734, "grad_norm": 0.306640625, "learning_rate": 0.00019453747940789882, "loss": 0.4391, "step": 36530 }, { "epoch": 0.9273901841580892, "grad_norm": 0.3359375, "learning_rate": 0.0001945057492717425, "loss": 0.4421, "step": 36535 }, { "epoch": 0.9275171022071049, "grad_norm": 0.328125, "learning_rate": 0.00019447401695170678, "loss": 0.4718, "step": 36540 }, { "epoch": 0.9276440202561206, "grad_norm": 0.349609375, "learning_rate": 0.00019444228244934873, "loss": 0.4513, "step": 36545 }, { "epoch": 0.9277709383051364, "grad_norm": 0.326171875, "learning_rate": 0.00019441054576622556, "loss": 0.4286, "step": 36550 }, { "epoch": 0.9278978563541521, "grad_norm": 0.35546875, "learning_rate": 0.00019437880690389468, "loss": 0.4366, "step": 36555 }, { "epoch": 0.9280247744031679, "grad_norm": 0.33984375, "learning_rate": 0.00019434706586391332, "loss": 0.472, "step": 36560 }, { "epoch": 0.9281516924521837, "grad_norm": 0.337890625, "learning_rate": 0.00019431532264783922, "loss": 0.4505, "step": 36565 }, { "epoch": 0.9282786105011994, "grad_norm": 0.337890625, "learning_rate": 0.0001942835772572298, "loss": 0.4338, "step": 36570 }, { "epoch": 0.9284055285502151, "grad_norm": 0.341796875, "learning_rate": 0.00019425182969364297, "loss": 0.4432, "step": 36575 }, { "epoch": 0.9285324465992308, "grad_norm": 0.34765625, "learning_rate": 0.0001942200799586364, "loss": 0.4474, "step": 36580 }, { "epoch": 0.9286593646482466, "grad_norm": 0.3515625, "learning_rate": 0.00019418832805376818, "loss": 0.4507, "step": 36585 }, { "epoch": 0.9287862826972624, "grad_norm": 0.345703125, "learning_rate": 0.00019415657398059628, "loss": 0.4481, "step": 36590 }, { "epoch": 0.9289132007462781, "grad_norm": 0.322265625, "learning_rate": 0.00019412481774067886, "loss": 0.4304, "step": 36595 }, { "epoch": 0.9290401187952939, "grad_norm": 0.330078125, "learning_rate": 0.0001940930593355742, "loss": 0.4498, "step": 36600 }, { "epoch": 0.9291670368443097, "grad_norm": 0.353515625, "learning_rate": 0.00019406129876684073, "loss": 0.4773, "step": 36605 }, { "epoch": 0.9292939548933253, "grad_norm": 0.353515625, "learning_rate": 0.00019402953603603687, "loss": 0.4365, "step": 36610 }, { "epoch": 0.9294208729423411, "grad_norm": 0.337890625, "learning_rate": 0.00019399777114472122, "loss": 0.4509, "step": 36615 }, { "epoch": 0.9295477909913569, "grad_norm": 0.345703125, "learning_rate": 0.00019396600409445248, "loss": 0.4722, "step": 36620 }, { "epoch": 0.9296747090403726, "grad_norm": 0.34375, "learning_rate": 0.0001939342348867894, "loss": 0.4535, "step": 36625 }, { "epoch": 0.9298016270893884, "grad_norm": 0.353515625, "learning_rate": 0.0001939024635232909, "loss": 0.4189, "step": 36630 }, { "epoch": 0.9299285451384042, "grad_norm": 0.345703125, "learning_rate": 0.000193870690005516, "loss": 0.4578, "step": 36635 }, { "epoch": 0.9300554631874198, "grad_norm": 0.349609375, "learning_rate": 0.00019383891433502387, "loss": 0.4586, "step": 36640 }, { "epoch": 0.9301823812364356, "grad_norm": 0.3359375, "learning_rate": 0.00019380713651337368, "loss": 0.4582, "step": 36645 }, { "epoch": 0.9303092992854514, "grad_norm": 0.333984375, "learning_rate": 0.00019377535654212472, "loss": 0.4186, "step": 36650 }, { "epoch": 0.9304362173344671, "grad_norm": 0.326171875, "learning_rate": 0.0001937435744228364, "loss": 0.4337, "step": 36655 }, { "epoch": 0.9305631353834829, "grad_norm": 0.333984375, "learning_rate": 0.00019371179015706839, "loss": 0.4244, "step": 36660 }, { "epoch": 0.9306900534324987, "grad_norm": 0.34765625, "learning_rate": 0.00019368000374638024, "loss": 0.4799, "step": 36665 }, { "epoch": 0.9308169714815144, "grad_norm": 0.357421875, "learning_rate": 0.0001936482151923317, "loss": 0.4658, "step": 36670 }, { "epoch": 0.9309438895305301, "grad_norm": 0.359375, "learning_rate": 0.00019361642449648259, "loss": 0.4583, "step": 36675 }, { "epoch": 0.9310708075795459, "grad_norm": 0.341796875, "learning_rate": 0.00019358463166039292, "loss": 0.4813, "step": 36680 }, { "epoch": 0.9311977256285616, "grad_norm": 0.380859375, "learning_rate": 0.00019355283668562274, "loss": 0.4523, "step": 36685 }, { "epoch": 0.9313246436775774, "grad_norm": 0.326171875, "learning_rate": 0.00019352103957373222, "loss": 0.4215, "step": 36690 }, { "epoch": 0.9314515617265932, "grad_norm": 0.33203125, "learning_rate": 0.00019348924032628164, "loss": 0.4388, "step": 36695 }, { "epoch": 0.9315784797756089, "grad_norm": 0.357421875, "learning_rate": 0.00019345743894483132, "loss": 0.4664, "step": 36700 }, { "epoch": 0.9317053978246247, "grad_norm": 0.32421875, "learning_rate": 0.00019342563543094183, "loss": 0.4365, "step": 36705 }, { "epoch": 0.9318323158736403, "grad_norm": 0.33984375, "learning_rate": 0.00019339382978617365, "loss": 0.4308, "step": 36710 }, { "epoch": 0.9319592339226561, "grad_norm": 0.34765625, "learning_rate": 0.00019336202201208756, "loss": 0.4363, "step": 36715 }, { "epoch": 0.9320861519716719, "grad_norm": 0.314453125, "learning_rate": 0.00019333021211024432, "loss": 0.4278, "step": 36720 }, { "epoch": 0.9322130700206877, "grad_norm": 0.37890625, "learning_rate": 0.00019329840008220484, "loss": 0.4524, "step": 36725 }, { "epoch": 0.9323399880697034, "grad_norm": 0.345703125, "learning_rate": 0.00019326658592953012, "loss": 0.4397, "step": 36730 }, { "epoch": 0.9324669061187192, "grad_norm": 0.380859375, "learning_rate": 0.00019323476965378125, "loss": 0.4645, "step": 36735 }, { "epoch": 0.9325938241677348, "grad_norm": 0.34375, "learning_rate": 0.00019320295125651945, "loss": 0.4243, "step": 36740 }, { "epoch": 0.9327207422167506, "grad_norm": 0.345703125, "learning_rate": 0.00019317113073930608, "loss": 0.4514, "step": 36745 }, { "epoch": 0.9328476602657664, "grad_norm": 0.361328125, "learning_rate": 0.00019313930810370243, "loss": 0.4506, "step": 36750 }, { "epoch": 0.9329745783147821, "grad_norm": 0.345703125, "learning_rate": 0.00019310748335127017, "loss": 0.4625, "step": 36755 }, { "epoch": 0.9331014963637979, "grad_norm": 0.34765625, "learning_rate": 0.00019307565648357092, "loss": 0.4404, "step": 36760 }, { "epoch": 0.9332284144128137, "grad_norm": 0.322265625, "learning_rate": 0.00019304382750216632, "loss": 0.4199, "step": 36765 }, { "epoch": 0.9333553324618294, "grad_norm": 0.341796875, "learning_rate": 0.00019301199640861825, "loss": 0.4692, "step": 36770 }, { "epoch": 0.9334822505108451, "grad_norm": 0.353515625, "learning_rate": 0.00019298016320448867, "loss": 0.4373, "step": 36775 }, { "epoch": 0.9336091685598609, "grad_norm": 0.345703125, "learning_rate": 0.0001929483278913396, "loss": 0.4332, "step": 36780 }, { "epoch": 0.9337360866088766, "grad_norm": 0.3515625, "learning_rate": 0.00019291649047073315, "loss": 0.4323, "step": 36785 }, { "epoch": 0.9338630046578924, "grad_norm": 0.330078125, "learning_rate": 0.00019288465094423164, "loss": 0.4439, "step": 36790 }, { "epoch": 0.9339899227069082, "grad_norm": 0.353515625, "learning_rate": 0.00019285280931339736, "loss": 0.4527, "step": 36795 }, { "epoch": 0.9341168407559239, "grad_norm": 0.36328125, "learning_rate": 0.00019282096557979284, "loss": 0.4602, "step": 36800 }, { "epoch": 0.9342437588049397, "grad_norm": 0.3515625, "learning_rate": 0.0001927891197449806, "loss": 0.4712, "step": 36805 }, { "epoch": 0.9343706768539554, "grad_norm": 0.361328125, "learning_rate": 0.00019275727181052328, "loss": 0.4512, "step": 36810 }, { "epoch": 0.9344975949029711, "grad_norm": 0.3515625, "learning_rate": 0.00019272542177798361, "loss": 0.4544, "step": 36815 }, { "epoch": 0.9346245129519869, "grad_norm": 0.328125, "learning_rate": 0.0001926935696489246, "loss": 0.4356, "step": 36820 }, { "epoch": 0.9347514310010027, "grad_norm": 0.33984375, "learning_rate": 0.0001926617154249091, "loss": 0.46, "step": 36825 }, { "epoch": 0.9348783490500184, "grad_norm": 0.33984375, "learning_rate": 0.00019262985910750022, "loss": 0.4449, "step": 36830 }, { "epoch": 0.9350052670990342, "grad_norm": 0.369140625, "learning_rate": 0.00019259800069826113, "loss": 0.455, "step": 36835 }, { "epoch": 0.9351321851480499, "grad_norm": 0.359375, "learning_rate": 0.0001925661401987551, "loss": 0.4569, "step": 36840 }, { "epoch": 0.9352591031970656, "grad_norm": 0.30859375, "learning_rate": 0.0001925342776105455, "loss": 0.4161, "step": 36845 }, { "epoch": 0.9353860212460814, "grad_norm": 0.330078125, "learning_rate": 0.0001925024129351959, "loss": 0.4265, "step": 36850 }, { "epoch": 0.9355129392950972, "grad_norm": 0.34765625, "learning_rate": 0.00019247054617426982, "loss": 0.4334, "step": 36855 }, { "epoch": 0.9356398573441129, "grad_norm": 0.341796875, "learning_rate": 0.00019243867732933096, "loss": 0.4672, "step": 36860 }, { "epoch": 0.9357667753931287, "grad_norm": 0.3203125, "learning_rate": 0.0001924068064019431, "loss": 0.3964, "step": 36865 }, { "epoch": 0.9358936934421445, "grad_norm": 0.369140625, "learning_rate": 0.00019237493339367017, "loss": 0.4763, "step": 36870 }, { "epoch": 0.9360206114911601, "grad_norm": 0.341796875, "learning_rate": 0.00019234305830607607, "loss": 0.4797, "step": 36875 }, { "epoch": 0.9361475295401759, "grad_norm": 0.330078125, "learning_rate": 0.00019231118114072497, "loss": 0.4413, "step": 36880 }, { "epoch": 0.9362744475891916, "grad_norm": 0.357421875, "learning_rate": 0.00019227930189918112, "loss": 0.4647, "step": 36885 }, { "epoch": 0.9364013656382074, "grad_norm": 0.37890625, "learning_rate": 0.00019224742058300873, "loss": 0.4531, "step": 36890 }, { "epoch": 0.9365282836872232, "grad_norm": 0.3515625, "learning_rate": 0.00019221553719377227, "loss": 0.484, "step": 36895 }, { "epoch": 0.936655201736239, "grad_norm": 0.365234375, "learning_rate": 0.00019218365173303623, "loss": 0.4543, "step": 36900 }, { "epoch": 0.9367821197852546, "grad_norm": 0.349609375, "learning_rate": 0.00019215176420236518, "loss": 0.4174, "step": 36905 }, { "epoch": 0.9369090378342704, "grad_norm": 0.326171875, "learning_rate": 0.0001921198746033238, "loss": 0.4199, "step": 36910 }, { "epoch": 0.9370359558832861, "grad_norm": 0.345703125, "learning_rate": 0.000192087982937477, "loss": 0.4155, "step": 36915 }, { "epoch": 0.9371628739323019, "grad_norm": 0.34375, "learning_rate": 0.00019205608920638964, "loss": 0.4453, "step": 36920 }, { "epoch": 0.9372897919813177, "grad_norm": 0.3515625, "learning_rate": 0.00019202419341162672, "loss": 0.4443, "step": 36925 }, { "epoch": 0.9374167100303334, "grad_norm": 0.349609375, "learning_rate": 0.0001919922955547534, "loss": 0.4326, "step": 36930 }, { "epoch": 0.9375436280793492, "grad_norm": 0.404296875, "learning_rate": 0.00019196039563733485, "loss": 0.4265, "step": 36935 }, { "epoch": 0.9376705461283649, "grad_norm": 0.3359375, "learning_rate": 0.00019192849366093638, "loss": 0.4579, "step": 36940 }, { "epoch": 0.9377974641773806, "grad_norm": 0.3515625, "learning_rate": 0.00019189658962712346, "loss": 0.437, "step": 36945 }, { "epoch": 0.9379243822263964, "grad_norm": 0.337890625, "learning_rate": 0.00019186468353746155, "loss": 0.4408, "step": 36950 }, { "epoch": 0.9380513002754122, "grad_norm": 0.353515625, "learning_rate": 0.0001918327753935163, "loss": 0.4473, "step": 36955 }, { "epoch": 0.9381782183244279, "grad_norm": 0.33203125, "learning_rate": 0.00019180086519685348, "loss": 0.4442, "step": 36960 }, { "epoch": 0.9383051363734437, "grad_norm": 0.33984375, "learning_rate": 0.00019176895294903878, "loss": 0.446, "step": 36965 }, { "epoch": 0.9384320544224595, "grad_norm": 0.353515625, "learning_rate": 0.00019173703865163822, "loss": 0.4466, "step": 36970 }, { "epoch": 0.9385589724714751, "grad_norm": 0.35546875, "learning_rate": 0.0001917051223062178, "loss": 0.4754, "step": 36975 }, { "epoch": 0.9386858905204909, "grad_norm": 0.33203125, "learning_rate": 0.00019167320391434364, "loss": 0.4329, "step": 36980 }, { "epoch": 0.9388128085695067, "grad_norm": 0.330078125, "learning_rate": 0.000191641283477582, "loss": 0.4262, "step": 36985 }, { "epoch": 0.9389397266185224, "grad_norm": 0.32421875, "learning_rate": 0.00019160936099749915, "loss": 0.4599, "step": 36990 }, { "epoch": 0.9390666446675382, "grad_norm": 0.3203125, "learning_rate": 0.0001915774364756615, "loss": 0.4456, "step": 36995 }, { "epoch": 0.939193562716554, "grad_norm": 0.333984375, "learning_rate": 0.00019154550991363562, "loss": 0.4237, "step": 37000 }, { "epoch": 0.9393204807655696, "grad_norm": 0.36328125, "learning_rate": 0.0001915135813129881, "loss": 0.4064, "step": 37005 }, { "epoch": 0.9394473988145854, "grad_norm": 0.341796875, "learning_rate": 0.0001914816506752857, "loss": 0.4218, "step": 37010 }, { "epoch": 0.9395743168636012, "grad_norm": 0.349609375, "learning_rate": 0.00019144971800209525, "loss": 0.4528, "step": 37015 }, { "epoch": 0.9397012349126169, "grad_norm": 0.34765625, "learning_rate": 0.0001914177832949836, "loss": 0.4324, "step": 37020 }, { "epoch": 0.9398281529616327, "grad_norm": 0.373046875, "learning_rate": 0.00019138584655551788, "loss": 0.4148, "step": 37025 }, { "epoch": 0.9399550710106485, "grad_norm": 0.353515625, "learning_rate": 0.00019135390778526513, "loss": 0.4604, "step": 37030 }, { "epoch": 0.9400819890596642, "grad_norm": 0.341796875, "learning_rate": 0.00019132196698579257, "loss": 0.4311, "step": 37035 }, { "epoch": 0.9402089071086799, "grad_norm": 0.326171875, "learning_rate": 0.00019129002415866757, "loss": 0.4331, "step": 37040 }, { "epoch": 0.9403358251576956, "grad_norm": 0.357421875, "learning_rate": 0.00019125807930545752, "loss": 0.4384, "step": 37045 }, { "epoch": 0.9404627432067114, "grad_norm": 0.33203125, "learning_rate": 0.00019122613242772998, "loss": 0.4703, "step": 37050 }, { "epoch": 0.9405896612557272, "grad_norm": 0.34765625, "learning_rate": 0.00019119418352705252, "loss": 0.4514, "step": 37055 }, { "epoch": 0.940716579304743, "grad_norm": 0.345703125, "learning_rate": 0.00019116223260499292, "loss": 0.4478, "step": 37060 }, { "epoch": 0.9408434973537587, "grad_norm": 0.36328125, "learning_rate": 0.0001911302796631189, "loss": 0.4467, "step": 37065 }, { "epoch": 0.9409704154027744, "grad_norm": 0.375, "learning_rate": 0.0001910983247029985, "loss": 0.4293, "step": 37070 }, { "epoch": 0.9410973334517901, "grad_norm": 0.330078125, "learning_rate": 0.00019106636772619966, "loss": 0.4475, "step": 37075 }, { "epoch": 0.9412242515008059, "grad_norm": 0.37890625, "learning_rate": 0.0001910344087342905, "loss": 0.4226, "step": 37080 }, { "epoch": 0.9413511695498217, "grad_norm": 0.341796875, "learning_rate": 0.00019100244772883926, "loss": 0.4295, "step": 37085 }, { "epoch": 0.9414780875988374, "grad_norm": 0.341796875, "learning_rate": 0.00019097048471141423, "loss": 0.4587, "step": 37090 }, { "epoch": 0.9416050056478532, "grad_norm": 0.341796875, "learning_rate": 0.00019093851968358388, "loss": 0.4378, "step": 37095 }, { "epoch": 0.941731923696869, "grad_norm": 0.32421875, "learning_rate": 0.0001909065526469166, "loss": 0.457, "step": 37100 }, { "epoch": 0.9418588417458846, "grad_norm": 0.359375, "learning_rate": 0.00019087458360298116, "loss": 0.4625, "step": 37105 }, { "epoch": 0.9419857597949004, "grad_norm": 0.376953125, "learning_rate": 0.00019084261255334614, "loss": 0.4391, "step": 37110 }, { "epoch": 0.9421126778439162, "grad_norm": 0.349609375, "learning_rate": 0.00019081063949958038, "loss": 0.4316, "step": 37115 }, { "epoch": 0.9422395958929319, "grad_norm": 0.376953125, "learning_rate": 0.0001907786644432528, "loss": 0.4394, "step": 37120 }, { "epoch": 0.9423665139419477, "grad_norm": 0.35546875, "learning_rate": 0.00019074668738593243, "loss": 0.4711, "step": 37125 }, { "epoch": 0.9424934319909635, "grad_norm": 0.35546875, "learning_rate": 0.0001907147083291883, "loss": 0.468, "step": 37130 }, { "epoch": 0.9426203500399792, "grad_norm": 0.361328125, "learning_rate": 0.00019068272727458966, "loss": 0.4432, "step": 37135 }, { "epoch": 0.9427472680889949, "grad_norm": 0.37890625, "learning_rate": 0.00019065074422370583, "loss": 0.4517, "step": 37140 }, { "epoch": 0.9428741861380107, "grad_norm": 0.328125, "learning_rate": 0.00019061875917810616, "loss": 0.4021, "step": 37145 }, { "epoch": 0.9430011041870264, "grad_norm": 0.35546875, "learning_rate": 0.00019058677213936018, "loss": 0.4312, "step": 37150 }, { "epoch": 0.9431280222360422, "grad_norm": 0.3515625, "learning_rate": 0.0001905547831090374, "loss": 0.4008, "step": 37155 }, { "epoch": 0.943254940285058, "grad_norm": 0.3359375, "learning_rate": 0.00019052279208870763, "loss": 0.431, "step": 37160 }, { "epoch": 0.9433818583340737, "grad_norm": 0.333984375, "learning_rate": 0.0001904907990799405, "loss": 0.44, "step": 37165 }, { "epoch": 0.9435087763830894, "grad_norm": 0.365234375, "learning_rate": 0.0001904588040843061, "loss": 0.4516, "step": 37170 }, { "epoch": 0.9436356944321052, "grad_norm": 0.34765625, "learning_rate": 0.00019042680710337423, "loss": 0.4443, "step": 37175 }, { "epoch": 0.9437626124811209, "grad_norm": 0.328125, "learning_rate": 0.00019039480813871508, "loss": 0.4206, "step": 37180 }, { "epoch": 0.9438895305301367, "grad_norm": 0.353515625, "learning_rate": 0.00019036280719189876, "loss": 0.447, "step": 37185 }, { "epoch": 0.9440164485791525, "grad_norm": 0.35546875, "learning_rate": 0.00019033080426449555, "loss": 0.4319, "step": 37190 }, { "epoch": 0.9441433666281682, "grad_norm": 0.33203125, "learning_rate": 0.0001902987993580759, "loss": 0.419, "step": 37195 }, { "epoch": 0.944270284677184, "grad_norm": 0.3515625, "learning_rate": 0.00019026679247421014, "loss": 0.46, "step": 37200 }, { "epoch": 0.9443972027261996, "grad_norm": 0.361328125, "learning_rate": 0.00019023478361446895, "loss": 0.4255, "step": 37205 }, { "epoch": 0.9445241207752154, "grad_norm": 0.36328125, "learning_rate": 0.00019020277278042298, "loss": 0.4251, "step": 37210 }, { "epoch": 0.9446510388242312, "grad_norm": 0.333984375, "learning_rate": 0.00019017075997364297, "loss": 0.4543, "step": 37215 }, { "epoch": 0.944777956873247, "grad_norm": 0.34375, "learning_rate": 0.00019013874519569976, "loss": 0.4254, "step": 37220 }, { "epoch": 0.9449048749222627, "grad_norm": 0.337890625, "learning_rate": 0.00019010672844816426, "loss": 0.456, "step": 37225 }, { "epoch": 0.9450317929712785, "grad_norm": 0.328125, "learning_rate": 0.0001900747097326076, "loss": 0.4054, "step": 37230 }, { "epoch": 0.9451587110202943, "grad_norm": 0.32421875, "learning_rate": 0.00019004268905060091, "loss": 0.4369, "step": 37235 }, { "epoch": 0.9452856290693099, "grad_norm": 0.31640625, "learning_rate": 0.00019001066640371542, "loss": 0.4324, "step": 37240 }, { "epoch": 0.9454125471183257, "grad_norm": 0.322265625, "learning_rate": 0.00018997864179352245, "loss": 0.4135, "step": 37245 }, { "epoch": 0.9455394651673414, "grad_norm": 0.357421875, "learning_rate": 0.00018994661522159348, "loss": 0.4406, "step": 37250 }, { "epoch": 0.9456663832163572, "grad_norm": 0.345703125, "learning_rate": 0.0001899145866895, "loss": 0.4615, "step": 37255 }, { "epoch": 0.945793301265373, "grad_norm": 0.333984375, "learning_rate": 0.00018988255619881365, "loss": 0.4401, "step": 37260 }, { "epoch": 0.9459202193143887, "grad_norm": 0.349609375, "learning_rate": 0.00018985052375110613, "loss": 0.4606, "step": 37265 }, { "epoch": 0.9460471373634044, "grad_norm": 0.322265625, "learning_rate": 0.00018981848934794933, "loss": 0.4479, "step": 37270 }, { "epoch": 0.9461740554124202, "grad_norm": 0.3515625, "learning_rate": 0.00018978645299091507, "loss": 0.4319, "step": 37275 }, { "epoch": 0.9463009734614359, "grad_norm": 0.31640625, "learning_rate": 0.00018975441468157544, "loss": 0.4393, "step": 37280 }, { "epoch": 0.9464278915104517, "grad_norm": 0.330078125, "learning_rate": 0.00018972237442150248, "loss": 0.4277, "step": 37285 }, { "epoch": 0.9465548095594675, "grad_norm": 0.369140625, "learning_rate": 0.0001896903322122685, "loss": 0.4639, "step": 37290 }, { "epoch": 0.9466817276084832, "grad_norm": 0.35546875, "learning_rate": 0.0001896582880554457, "loss": 0.455, "step": 37295 }, { "epoch": 0.946808645657499, "grad_norm": 0.33984375, "learning_rate": 0.00018962624195260647, "loss": 0.4586, "step": 37300 }, { "epoch": 0.9469355637065147, "grad_norm": 0.337890625, "learning_rate": 0.00018959419390532337, "loss": 0.4567, "step": 37305 }, { "epoch": 0.9470624817555304, "grad_norm": 0.341796875, "learning_rate": 0.00018956214391516895, "loss": 0.4267, "step": 37310 }, { "epoch": 0.9471893998045462, "grad_norm": 0.3828125, "learning_rate": 0.00018953009198371587, "loss": 0.4662, "step": 37315 }, { "epoch": 0.947316317853562, "grad_norm": 0.330078125, "learning_rate": 0.0001894980381125369, "loss": 0.4411, "step": 37320 }, { "epoch": 0.9474432359025777, "grad_norm": 0.333984375, "learning_rate": 0.00018946598230320494, "loss": 0.4337, "step": 37325 }, { "epoch": 0.9475701539515935, "grad_norm": 0.353515625, "learning_rate": 0.00018943392455729298, "loss": 0.459, "step": 37330 }, { "epoch": 0.9476970720006092, "grad_norm": 0.361328125, "learning_rate": 0.00018940186487637403, "loss": 0.4458, "step": 37335 }, { "epoch": 0.9478239900496249, "grad_norm": 0.361328125, "learning_rate": 0.0001893698032620213, "loss": 0.4561, "step": 37340 }, { "epoch": 0.9479509080986407, "grad_norm": 0.3125, "learning_rate": 0.00018933773971580796, "loss": 0.4371, "step": 37345 }, { "epoch": 0.9480778261476565, "grad_norm": 0.310546875, "learning_rate": 0.00018930567423930744, "loss": 0.4038, "step": 37350 }, { "epoch": 0.9482047441966722, "grad_norm": 0.333984375, "learning_rate": 0.00018927360683409312, "loss": 0.449, "step": 37355 }, { "epoch": 0.948331662245688, "grad_norm": 0.3203125, "learning_rate": 0.00018924153750173858, "loss": 0.4252, "step": 37360 }, { "epoch": 0.9484585802947038, "grad_norm": 0.345703125, "learning_rate": 0.00018920946624381737, "loss": 0.4366, "step": 37365 }, { "epoch": 0.9485854983437194, "grad_norm": 0.291015625, "learning_rate": 0.0001891773930619033, "loss": 0.3802, "step": 37370 }, { "epoch": 0.9487124163927352, "grad_norm": 0.337890625, "learning_rate": 0.00018914531795757018, "loss": 0.4212, "step": 37375 }, { "epoch": 0.948839334441751, "grad_norm": 0.361328125, "learning_rate": 0.00018911324093239185, "loss": 0.4407, "step": 37380 }, { "epoch": 0.9489662524907667, "grad_norm": 0.326171875, "learning_rate": 0.00018908116198794244, "loss": 0.4382, "step": 37385 }, { "epoch": 0.9490931705397825, "grad_norm": 0.330078125, "learning_rate": 0.00018904908112579597, "loss": 0.4378, "step": 37390 }, { "epoch": 0.9492200885887982, "grad_norm": 0.353515625, "learning_rate": 0.0001890169983475266, "loss": 0.4574, "step": 37395 }, { "epoch": 0.949347006637814, "grad_norm": 0.357421875, "learning_rate": 0.00018898491365470867, "loss": 0.4535, "step": 37400 }, { "epoch": 0.9494739246868297, "grad_norm": 0.3828125, "learning_rate": 0.00018895282704891656, "loss": 0.4728, "step": 37405 }, { "epoch": 0.9496008427358454, "grad_norm": 0.328125, "learning_rate": 0.00018892073853172474, "loss": 0.4395, "step": 37410 }, { "epoch": 0.9497277607848612, "grad_norm": 0.31640625, "learning_rate": 0.0001888886481047078, "loss": 0.4374, "step": 37415 }, { "epoch": 0.949854678833877, "grad_norm": 0.32421875, "learning_rate": 0.00018885655576944037, "loss": 0.4157, "step": 37420 }, { "epoch": 0.9499815968828927, "grad_norm": 0.314453125, "learning_rate": 0.00018882446152749724, "loss": 0.4268, "step": 37425 }, { "epoch": 0.9501085149319085, "grad_norm": 0.33984375, "learning_rate": 0.0001887923653804533, "loss": 0.4454, "step": 37430 }, { "epoch": 0.9502354329809242, "grad_norm": 0.32421875, "learning_rate": 0.00018876026732988338, "loss": 0.4074, "step": 37435 }, { "epoch": 0.9503623510299399, "grad_norm": 0.32421875, "learning_rate": 0.00018872816737736253, "loss": 0.4343, "step": 37440 }, { "epoch": 0.9504892690789557, "grad_norm": 0.322265625, "learning_rate": 0.000188696065524466, "loss": 0.4634, "step": 37445 }, { "epoch": 0.9506161871279715, "grad_norm": 0.34765625, "learning_rate": 0.00018866396177276898, "loss": 0.4292, "step": 37450 }, { "epoch": 0.9507431051769872, "grad_norm": 0.337890625, "learning_rate": 0.00018863185612384668, "loss": 0.4359, "step": 37455 }, { "epoch": 0.950870023226003, "grad_norm": 0.341796875, "learning_rate": 0.00018859974857927465, "loss": 0.445, "step": 37460 }, { "epoch": 0.9509969412750188, "grad_norm": 0.37109375, "learning_rate": 0.00018856763914062833, "loss": 0.4707, "step": 37465 }, { "epoch": 0.9511238593240344, "grad_norm": 0.3515625, "learning_rate": 0.00018853552780948333, "loss": 0.4676, "step": 37470 }, { "epoch": 0.9512507773730502, "grad_norm": 0.38671875, "learning_rate": 0.00018850341458741526, "loss": 0.4732, "step": 37475 }, { "epoch": 0.951377695422066, "grad_norm": 0.34375, "learning_rate": 0.00018847129947600002, "loss": 0.4173, "step": 37480 }, { "epoch": 0.9515046134710817, "grad_norm": 0.341796875, "learning_rate": 0.00018843918247681343, "loss": 0.4811, "step": 37485 }, { "epoch": 0.9516315315200975, "grad_norm": 0.3125, "learning_rate": 0.0001884070635914315, "loss": 0.4523, "step": 37490 }, { "epoch": 0.9517584495691133, "grad_norm": 0.359375, "learning_rate": 0.00018837494282143022, "loss": 0.4806, "step": 37495 }, { "epoch": 0.9518853676181289, "grad_norm": 0.359375, "learning_rate": 0.00018834282016838582, "loss": 0.4498, "step": 37500 }, { "epoch": 0.9520122856671447, "grad_norm": 0.37109375, "learning_rate": 0.00018831069563387447, "loss": 0.4626, "step": 37505 }, { "epoch": 0.9521392037161605, "grad_norm": 0.341796875, "learning_rate": 0.00018827856921947253, "loss": 0.4629, "step": 37510 }, { "epoch": 0.9522661217651762, "grad_norm": 0.3515625, "learning_rate": 0.00018824644092675652, "loss": 0.4746, "step": 37515 }, { "epoch": 0.952393039814192, "grad_norm": 0.3046875, "learning_rate": 0.0001882143107573028, "loss": 0.4402, "step": 37520 }, { "epoch": 0.9525199578632078, "grad_norm": 0.34375, "learning_rate": 0.00018818217871268813, "loss": 0.4495, "step": 37525 }, { "epoch": 0.9526468759122235, "grad_norm": 0.357421875, "learning_rate": 0.00018815004479448916, "loss": 0.437, "step": 37530 }, { "epoch": 0.9527737939612392, "grad_norm": 0.322265625, "learning_rate": 0.00018811790900428267, "loss": 0.4585, "step": 37535 }, { "epoch": 0.952900712010255, "grad_norm": 0.359375, "learning_rate": 0.00018808577134364558, "loss": 0.4631, "step": 37540 }, { "epoch": 0.9530276300592707, "grad_norm": 0.361328125, "learning_rate": 0.00018805363181415482, "loss": 0.4584, "step": 37545 }, { "epoch": 0.9531545481082865, "grad_norm": 0.328125, "learning_rate": 0.00018802149041738758, "loss": 0.4462, "step": 37550 }, { "epoch": 0.9532814661573022, "grad_norm": 0.34375, "learning_rate": 0.00018798934715492094, "loss": 0.4551, "step": 37555 }, { "epoch": 0.953408384206318, "grad_norm": 0.361328125, "learning_rate": 0.00018795720202833215, "loss": 0.4512, "step": 37560 }, { "epoch": 0.9535353022553338, "grad_norm": 0.353515625, "learning_rate": 0.00018792505503919854, "loss": 0.4473, "step": 37565 }, { "epoch": 0.9536622203043494, "grad_norm": 0.353515625, "learning_rate": 0.00018789290618909758, "loss": 0.4518, "step": 37570 }, { "epoch": 0.9537891383533652, "grad_norm": 0.3515625, "learning_rate": 0.00018786075547960686, "loss": 0.4267, "step": 37575 }, { "epoch": 0.953916056402381, "grad_norm": 0.345703125, "learning_rate": 0.00018782860291230392, "loss": 0.4551, "step": 37580 }, { "epoch": 0.9540429744513967, "grad_norm": 0.365234375, "learning_rate": 0.00018779644848876653, "loss": 0.4406, "step": 37585 }, { "epoch": 0.9541698925004125, "grad_norm": 0.33203125, "learning_rate": 0.00018776429221057246, "loss": 0.4344, "step": 37590 }, { "epoch": 0.9542968105494283, "grad_norm": 0.380859375, "learning_rate": 0.00018773213407929964, "loss": 0.4346, "step": 37595 }, { "epoch": 0.9544237285984439, "grad_norm": 0.353515625, "learning_rate": 0.00018769997409652595, "loss": 0.4527, "step": 37600 }, { "epoch": 0.9545506466474597, "grad_norm": 0.375, "learning_rate": 0.0001876678122638296, "loss": 0.4453, "step": 37605 }, { "epoch": 0.9546775646964755, "grad_norm": 0.337890625, "learning_rate": 0.00018763564858278867, "loss": 0.4615, "step": 37610 }, { "epoch": 0.9548044827454912, "grad_norm": 0.33203125, "learning_rate": 0.00018760348305498154, "loss": 0.464, "step": 37615 }, { "epoch": 0.954931400794507, "grad_norm": 0.3671875, "learning_rate": 0.0001875713156819864, "loss": 0.4806, "step": 37620 }, { "epoch": 0.9550583188435228, "grad_norm": 0.373046875, "learning_rate": 0.00018753914646538184, "loss": 0.4942, "step": 37625 }, { "epoch": 0.9551852368925385, "grad_norm": 0.32421875, "learning_rate": 0.00018750697540674628, "loss": 0.434, "step": 37630 }, { "epoch": 0.9553121549415542, "grad_norm": 0.326171875, "learning_rate": 0.0001874748025076583, "loss": 0.4333, "step": 37635 }, { "epoch": 0.95543907299057, "grad_norm": 0.3203125, "learning_rate": 0.00018744262776969675, "loss": 0.4243, "step": 37640 }, { "epoch": 0.9555659910395857, "grad_norm": 0.330078125, "learning_rate": 0.00018741045119444037, "loss": 0.4233, "step": 37645 }, { "epoch": 0.9556929090886015, "grad_norm": 0.326171875, "learning_rate": 0.00018737827278346808, "loss": 0.4342, "step": 37650 }, { "epoch": 0.9558198271376173, "grad_norm": 0.337890625, "learning_rate": 0.0001873460925383588, "loss": 0.4395, "step": 37655 }, { "epoch": 0.955946745186633, "grad_norm": 0.341796875, "learning_rate": 0.00018731391046069166, "loss": 0.4474, "step": 37660 }, { "epoch": 0.9560736632356488, "grad_norm": 0.36328125, "learning_rate": 0.00018728172655204577, "loss": 0.4376, "step": 37665 }, { "epoch": 0.9562005812846645, "grad_norm": 0.32421875, "learning_rate": 0.00018724954081400042, "loss": 0.4218, "step": 37670 }, { "epoch": 0.9563274993336802, "grad_norm": 0.3671875, "learning_rate": 0.00018721735324813497, "loss": 0.4415, "step": 37675 }, { "epoch": 0.956454417382696, "grad_norm": 0.3515625, "learning_rate": 0.0001871851638560288, "loss": 0.4746, "step": 37680 }, { "epoch": 0.9565813354317118, "grad_norm": 0.3359375, "learning_rate": 0.0001871529726392614, "loss": 0.4285, "step": 37685 }, { "epoch": 0.9567082534807275, "grad_norm": 0.326171875, "learning_rate": 0.0001871207795994125, "loss": 0.4377, "step": 37690 }, { "epoch": 0.9568351715297433, "grad_norm": 0.376953125, "learning_rate": 0.00018708858473806168, "loss": 0.4583, "step": 37695 }, { "epoch": 0.956962089578759, "grad_norm": 0.369140625, "learning_rate": 0.0001870563880567888, "loss": 0.4446, "step": 37700 }, { "epoch": 0.9570890076277747, "grad_norm": 0.3203125, "learning_rate": 0.00018702418955717377, "loss": 0.4165, "step": 37705 }, { "epoch": 0.9572159256767905, "grad_norm": 0.341796875, "learning_rate": 0.00018699198924079647, "loss": 0.4581, "step": 37710 }, { "epoch": 0.9573428437258062, "grad_norm": 0.337890625, "learning_rate": 0.000186959787109237, "loss": 0.4433, "step": 37715 }, { "epoch": 0.957469761774822, "grad_norm": 0.35546875, "learning_rate": 0.0001869275831640755, "loss": 0.4416, "step": 37720 }, { "epoch": 0.9575966798238378, "grad_norm": 0.322265625, "learning_rate": 0.00018689537740689216, "loss": 0.4335, "step": 37725 }, { "epoch": 0.9577235978728535, "grad_norm": 0.322265625, "learning_rate": 0.00018686316983926735, "loss": 0.4342, "step": 37730 }, { "epoch": 0.9578505159218692, "grad_norm": 0.32421875, "learning_rate": 0.00018683096046278154, "loss": 0.3909, "step": 37735 }, { "epoch": 0.957977433970885, "grad_norm": 0.33984375, "learning_rate": 0.00018679874927901514, "loss": 0.4549, "step": 37740 }, { "epoch": 0.9581043520199007, "grad_norm": 0.392578125, "learning_rate": 0.0001867665362895488, "loss": 0.4665, "step": 37745 }, { "epoch": 0.9582312700689165, "grad_norm": 0.3515625, "learning_rate": 0.00018673432149596316, "loss": 0.4472, "step": 37750 }, { "epoch": 0.9583581881179323, "grad_norm": 0.34765625, "learning_rate": 0.00018670210489983897, "loss": 0.4511, "step": 37755 }, { "epoch": 0.958485106166948, "grad_norm": 0.361328125, "learning_rate": 0.00018666988650275706, "loss": 0.4409, "step": 37760 }, { "epoch": 0.9586120242159637, "grad_norm": 0.353515625, "learning_rate": 0.00018663766630629852, "loss": 0.4529, "step": 37765 }, { "epoch": 0.9587389422649795, "grad_norm": 0.365234375, "learning_rate": 0.00018660544431204425, "loss": 0.43, "step": 37770 }, { "epoch": 0.9588658603139952, "grad_norm": 0.380859375, "learning_rate": 0.0001865732205215754, "loss": 0.4679, "step": 37775 }, { "epoch": 0.958992778363011, "grad_norm": 0.365234375, "learning_rate": 0.00018654099493647323, "loss": 0.4478, "step": 37780 }, { "epoch": 0.9591196964120268, "grad_norm": 0.345703125, "learning_rate": 0.00018650876755831898, "loss": 0.449, "step": 37785 }, { "epoch": 0.9592466144610425, "grad_norm": 0.337890625, "learning_rate": 0.000186476538388694, "loss": 0.4268, "step": 37790 }, { "epoch": 0.9593735325100583, "grad_norm": 0.3515625, "learning_rate": 0.00018644430742917987, "loss": 0.4472, "step": 37795 }, { "epoch": 0.959500450559074, "grad_norm": 0.3515625, "learning_rate": 0.00018641207468135808, "loss": 0.454, "step": 37800 }, { "epoch": 0.9596273686080897, "grad_norm": 0.3515625, "learning_rate": 0.0001863798401468103, "loss": 0.4727, "step": 37805 }, { "epoch": 0.9597542866571055, "grad_norm": 0.345703125, "learning_rate": 0.0001863476038271182, "loss": 0.4107, "step": 37810 }, { "epoch": 0.9598812047061213, "grad_norm": 0.353515625, "learning_rate": 0.00018631536572386368, "loss": 0.4645, "step": 37815 }, { "epoch": 0.960008122755137, "grad_norm": 0.294921875, "learning_rate": 0.00018628312583862867, "loss": 0.416, "step": 37820 }, { "epoch": 0.9601350408041528, "grad_norm": 0.330078125, "learning_rate": 0.00018625088417299507, "loss": 0.4257, "step": 37825 }, { "epoch": 0.9602619588531686, "grad_norm": 0.36328125, "learning_rate": 0.00018621864072854507, "loss": 0.484, "step": 37830 }, { "epoch": 0.9603888769021842, "grad_norm": 0.330078125, "learning_rate": 0.0001861863955068608, "loss": 0.4678, "step": 37835 }, { "epoch": 0.9605157949512, "grad_norm": 0.31640625, "learning_rate": 0.0001861541485095245, "loss": 0.4343, "step": 37840 }, { "epoch": 0.9606427130002158, "grad_norm": 0.353515625, "learning_rate": 0.0001861218997381185, "loss": 0.4269, "step": 37845 }, { "epoch": 0.9607696310492315, "grad_norm": 0.337890625, "learning_rate": 0.0001860896491942253, "loss": 0.4465, "step": 37850 }, { "epoch": 0.9608965490982473, "grad_norm": 0.34375, "learning_rate": 0.00018605739687942738, "loss": 0.4405, "step": 37855 }, { "epoch": 0.961023467147263, "grad_norm": 0.3671875, "learning_rate": 0.00018602514279530738, "loss": 0.4644, "step": 37860 }, { "epoch": 0.9611503851962787, "grad_norm": 0.326171875, "learning_rate": 0.00018599288694344797, "loss": 0.4542, "step": 37865 }, { "epoch": 0.9612773032452945, "grad_norm": 0.3125, "learning_rate": 0.00018596062932543195, "loss": 0.4585, "step": 37870 }, { "epoch": 0.9614042212943102, "grad_norm": 0.3359375, "learning_rate": 0.00018592836994284217, "loss": 0.4481, "step": 37875 }, { "epoch": 0.961531139343326, "grad_norm": 0.33984375, "learning_rate": 0.00018589610879726157, "loss": 0.436, "step": 37880 }, { "epoch": 0.9616580573923418, "grad_norm": 0.333984375, "learning_rate": 0.00018586384589027322, "loss": 0.4498, "step": 37885 }, { "epoch": 0.9617849754413575, "grad_norm": 0.345703125, "learning_rate": 0.0001858315812234602, "loss": 0.4319, "step": 37890 }, { "epoch": 0.9619118934903733, "grad_norm": 0.361328125, "learning_rate": 0.00018579931479840577, "loss": 0.438, "step": 37895 }, { "epoch": 0.962038811539389, "grad_norm": 0.3359375, "learning_rate": 0.00018576704661669327, "loss": 0.4379, "step": 37900 }, { "epoch": 0.9621657295884047, "grad_norm": 0.361328125, "learning_rate": 0.00018573477667990603, "loss": 0.4328, "step": 37905 }, { "epoch": 0.9622926476374205, "grad_norm": 0.34375, "learning_rate": 0.0001857025049896275, "loss": 0.433, "step": 37910 }, { "epoch": 0.9624195656864363, "grad_norm": 0.3359375, "learning_rate": 0.00018567023154744127, "loss": 0.4347, "step": 37915 }, { "epoch": 0.962546483735452, "grad_norm": 0.345703125, "learning_rate": 0.000185637956354931, "loss": 0.4115, "step": 37920 }, { "epoch": 0.9626734017844678, "grad_norm": 0.3203125, "learning_rate": 0.00018560567941368046, "loss": 0.4345, "step": 37925 }, { "epoch": 0.9628003198334835, "grad_norm": 0.328125, "learning_rate": 0.00018557340072527335, "loss": 0.4499, "step": 37930 }, { "epoch": 0.9629272378824992, "grad_norm": 0.388671875, "learning_rate": 0.00018554112029129367, "loss": 0.4762, "step": 37935 }, { "epoch": 0.963054155931515, "grad_norm": 0.353515625, "learning_rate": 0.00018550883811332538, "loss": 0.4883, "step": 37940 }, { "epoch": 0.9631810739805308, "grad_norm": 0.330078125, "learning_rate": 0.0001854765541929525, "loss": 0.4221, "step": 37945 }, { "epoch": 0.9633079920295465, "grad_norm": 0.33984375, "learning_rate": 0.00018544426853175927, "loss": 0.4339, "step": 37950 }, { "epoch": 0.9634349100785623, "grad_norm": 0.330078125, "learning_rate": 0.0001854119811313299, "loss": 0.4555, "step": 37955 }, { "epoch": 0.9635618281275781, "grad_norm": 0.306640625, "learning_rate": 0.0001853796919932487, "loss": 0.4148, "step": 37960 }, { "epoch": 0.9636887461765937, "grad_norm": 0.345703125, "learning_rate": 0.00018534740111910016, "loss": 0.4369, "step": 37965 }, { "epoch": 0.9638156642256095, "grad_norm": 0.3359375, "learning_rate": 0.00018531510851046867, "loss": 0.446, "step": 37970 }, { "epoch": 0.9639425822746253, "grad_norm": 0.314453125, "learning_rate": 0.0001852828141689389, "loss": 0.449, "step": 37975 }, { "epoch": 0.964069500323641, "grad_norm": 0.328125, "learning_rate": 0.0001852505180960955, "loss": 0.4649, "step": 37980 }, { "epoch": 0.9641964183726568, "grad_norm": 0.337890625, "learning_rate": 0.0001852182202935232, "loss": 0.4359, "step": 37985 }, { "epoch": 0.9643233364216726, "grad_norm": 0.361328125, "learning_rate": 0.00018518592076280688, "loss": 0.4701, "step": 37990 }, { "epoch": 0.9644502544706883, "grad_norm": 0.3515625, "learning_rate": 0.00018515361950553145, "loss": 0.4459, "step": 37995 }, { "epoch": 0.964577172519704, "grad_norm": 0.34375, "learning_rate": 0.0001851213165232819, "loss": 0.456, "step": 38000 }, { "epoch": 0.9647040905687198, "grad_norm": 0.35546875, "learning_rate": 0.00018508901181764334, "loss": 0.4579, "step": 38005 }, { "epoch": 0.9648310086177355, "grad_norm": 0.345703125, "learning_rate": 0.00018505670539020093, "loss": 0.4322, "step": 38010 }, { "epoch": 0.9649579266667513, "grad_norm": 0.33203125, "learning_rate": 0.00018502439724253994, "loss": 0.4409, "step": 38015 }, { "epoch": 0.965084844715767, "grad_norm": 0.353515625, "learning_rate": 0.00018499208737624576, "loss": 0.4577, "step": 38020 }, { "epoch": 0.9652117627647828, "grad_norm": 0.33984375, "learning_rate": 0.00018495977579290377, "loss": 0.4504, "step": 38025 }, { "epoch": 0.9653386808137985, "grad_norm": 0.33203125, "learning_rate": 0.00018492746249409956, "loss": 0.4432, "step": 38030 }, { "epoch": 0.9654655988628142, "grad_norm": 0.345703125, "learning_rate": 0.00018489514748141865, "loss": 0.4407, "step": 38035 }, { "epoch": 0.96559251691183, "grad_norm": 0.33984375, "learning_rate": 0.00018486283075644675, "loss": 0.4355, "step": 38040 }, { "epoch": 0.9657194349608458, "grad_norm": 0.3203125, "learning_rate": 0.00018483051232076958, "loss": 0.4464, "step": 38045 }, { "epoch": 0.9658463530098615, "grad_norm": 0.31640625, "learning_rate": 0.00018479819217597315, "loss": 0.4272, "step": 38050 }, { "epoch": 0.9659732710588773, "grad_norm": 0.341796875, "learning_rate": 0.00018476587032364316, "loss": 0.4464, "step": 38055 }, { "epoch": 0.9661001891078931, "grad_norm": 0.353515625, "learning_rate": 0.00018473354676536586, "loss": 0.4524, "step": 38060 }, { "epoch": 0.9662271071569087, "grad_norm": 0.333984375, "learning_rate": 0.00018470122150272727, "loss": 0.431, "step": 38065 }, { "epoch": 0.9663540252059245, "grad_norm": 0.333984375, "learning_rate": 0.0001846688945373135, "loss": 0.4041, "step": 38070 }, { "epoch": 0.9664809432549403, "grad_norm": 0.3515625, "learning_rate": 0.0001846365658707109, "loss": 0.4421, "step": 38075 }, { "epoch": 0.966607861303956, "grad_norm": 0.3515625, "learning_rate": 0.00018460423550450584, "loss": 0.4569, "step": 38080 }, { "epoch": 0.9667347793529718, "grad_norm": 0.369140625, "learning_rate": 0.0001845719034402847, "loss": 0.4448, "step": 38085 }, { "epoch": 0.9668616974019876, "grad_norm": 0.333984375, "learning_rate": 0.000184539569679634, "loss": 0.4206, "step": 38090 }, { "epoch": 0.9669886154510033, "grad_norm": 0.359375, "learning_rate": 0.00018450723422414045, "loss": 0.4714, "step": 38095 }, { "epoch": 0.967115533500019, "grad_norm": 0.353515625, "learning_rate": 0.00018447489707539062, "loss": 0.4433, "step": 38100 }, { "epoch": 0.9672424515490348, "grad_norm": 0.349609375, "learning_rate": 0.0001844425582349713, "loss": 0.4604, "step": 38105 }, { "epoch": 0.9673693695980505, "grad_norm": 0.345703125, "learning_rate": 0.00018441021770446942, "loss": 0.4613, "step": 38110 }, { "epoch": 0.9674962876470663, "grad_norm": 0.35546875, "learning_rate": 0.00018437787548547182, "loss": 0.4779, "step": 38115 }, { "epoch": 0.9676232056960821, "grad_norm": 0.34765625, "learning_rate": 0.0001843455315795656, "loss": 0.4396, "step": 38120 }, { "epoch": 0.9677501237450978, "grad_norm": 0.341796875, "learning_rate": 0.00018431318598833782, "loss": 0.4464, "step": 38125 }, { "epoch": 0.9678770417941135, "grad_norm": 0.412109375, "learning_rate": 0.00018428083871337562, "loss": 0.436, "step": 38130 }, { "epoch": 0.9680039598431293, "grad_norm": 0.3515625, "learning_rate": 0.00018424848975626636, "loss": 0.4461, "step": 38135 }, { "epoch": 0.968130877892145, "grad_norm": 0.3125, "learning_rate": 0.00018421613911859735, "loss": 0.4381, "step": 38140 }, { "epoch": 0.9682577959411608, "grad_norm": 0.349609375, "learning_rate": 0.00018418378680195603, "loss": 0.4641, "step": 38145 }, { "epoch": 0.9683847139901766, "grad_norm": 0.359375, "learning_rate": 0.00018415143280792992, "loss": 0.4666, "step": 38150 }, { "epoch": 0.9685116320391923, "grad_norm": 0.369140625, "learning_rate": 0.0001841190771381066, "loss": 0.4518, "step": 38155 }, { "epoch": 0.9686385500882081, "grad_norm": 0.373046875, "learning_rate": 0.00018408671979407374, "loss": 0.4447, "step": 38160 }, { "epoch": 0.9687654681372238, "grad_norm": 0.359375, "learning_rate": 0.00018405436077741913, "loss": 0.4532, "step": 38165 }, { "epoch": 0.9688923861862395, "grad_norm": 0.345703125, "learning_rate": 0.00018402200008973057, "loss": 0.4641, "step": 38170 }, { "epoch": 0.9690193042352553, "grad_norm": 0.3203125, "learning_rate": 0.00018398963773259602, "loss": 0.4215, "step": 38175 }, { "epoch": 0.969146222284271, "grad_norm": 0.3515625, "learning_rate": 0.00018395727370760354, "loss": 0.4426, "step": 38180 }, { "epoch": 0.9692731403332868, "grad_norm": 0.3515625, "learning_rate": 0.00018392490801634112, "loss": 0.4576, "step": 38185 }, { "epoch": 0.9694000583823026, "grad_norm": 0.357421875, "learning_rate": 0.00018389254066039702, "loss": 0.4556, "step": 38190 }, { "epoch": 0.9695269764313182, "grad_norm": 0.333984375, "learning_rate": 0.0001838601716413594, "loss": 0.4485, "step": 38195 }, { "epoch": 0.969653894480334, "grad_norm": 0.326171875, "learning_rate": 0.00018382780096081665, "loss": 0.4465, "step": 38200 }, { "epoch": 0.9697808125293498, "grad_norm": 0.359375, "learning_rate": 0.00018379542862035722, "loss": 0.4378, "step": 38205 }, { "epoch": 0.9699077305783655, "grad_norm": 0.3359375, "learning_rate": 0.00018376305462156954, "loss": 0.4495, "step": 38210 }, { "epoch": 0.9700346486273813, "grad_norm": 0.375, "learning_rate": 0.00018373067896604222, "loss": 0.4641, "step": 38215 }, { "epoch": 0.9701615666763971, "grad_norm": 0.34375, "learning_rate": 0.00018369830165536395, "loss": 0.4365, "step": 38220 }, { "epoch": 0.9702884847254128, "grad_norm": 0.357421875, "learning_rate": 0.00018366592269112341, "loss": 0.443, "step": 38225 }, { "epoch": 0.9704154027744285, "grad_norm": 0.369140625, "learning_rate": 0.00018363354207490945, "loss": 0.4546, "step": 38230 }, { "epoch": 0.9705423208234443, "grad_norm": 0.474609375, "learning_rate": 0.000183601159808311, "loss": 0.4291, "step": 38235 }, { "epoch": 0.97066923887246, "grad_norm": 0.333984375, "learning_rate": 0.00018356877589291703, "loss": 0.4341, "step": 38240 }, { "epoch": 0.9707961569214758, "grad_norm": 0.35546875, "learning_rate": 0.00018353639033031656, "loss": 0.4419, "step": 38245 }, { "epoch": 0.9709230749704916, "grad_norm": 0.33203125, "learning_rate": 0.00018350400312209882, "loss": 0.4569, "step": 38250 }, { "epoch": 0.9710499930195073, "grad_norm": 0.337890625, "learning_rate": 0.00018347161426985294, "loss": 0.4358, "step": 38255 }, { "epoch": 0.9711769110685231, "grad_norm": 0.326171875, "learning_rate": 0.00018343922377516832, "loss": 0.4436, "step": 38260 }, { "epoch": 0.9713038291175388, "grad_norm": 0.3359375, "learning_rate": 0.0001834068316396343, "loss": 0.4527, "step": 38265 }, { "epoch": 0.9714307471665545, "grad_norm": 0.330078125, "learning_rate": 0.00018337443786484034, "loss": 0.4275, "step": 38270 }, { "epoch": 0.9715576652155703, "grad_norm": 0.322265625, "learning_rate": 0.00018334204245237603, "loss": 0.443, "step": 38275 }, { "epoch": 0.9716845832645861, "grad_norm": 0.314453125, "learning_rate": 0.000183309645403831, "loss": 0.4703, "step": 38280 }, { "epoch": 0.9718115013136018, "grad_norm": 0.326171875, "learning_rate": 0.00018327724672079492, "loss": 0.4235, "step": 38285 }, { "epoch": 0.9719384193626176, "grad_norm": 0.361328125, "learning_rate": 0.00018324484640485757, "loss": 0.4614, "step": 38290 }, { "epoch": 0.9720653374116333, "grad_norm": 0.3515625, "learning_rate": 0.00018321244445760888, "loss": 0.4622, "step": 38295 }, { "epoch": 0.972192255460649, "grad_norm": 0.322265625, "learning_rate": 0.00018318004088063874, "loss": 0.4148, "step": 38300 }, { "epoch": 0.9723191735096648, "grad_norm": 0.345703125, "learning_rate": 0.00018314763567553726, "loss": 0.433, "step": 38305 }, { "epoch": 0.9724460915586806, "grad_norm": 0.33984375, "learning_rate": 0.00018311522884389446, "loss": 0.4358, "step": 38310 }, { "epoch": 0.9725730096076963, "grad_norm": 0.33984375, "learning_rate": 0.00018308282038730062, "loss": 0.454, "step": 38315 }, { "epoch": 0.9726999276567121, "grad_norm": 0.328125, "learning_rate": 0.00018305041030734594, "loss": 0.4488, "step": 38320 }, { "epoch": 0.9728268457057279, "grad_norm": 0.3671875, "learning_rate": 0.00018301799860562075, "loss": 0.4438, "step": 38325 }, { "epoch": 0.9729537637547435, "grad_norm": 0.353515625, "learning_rate": 0.00018298558528371557, "loss": 0.435, "step": 38330 }, { "epoch": 0.9730806818037593, "grad_norm": 0.353515625, "learning_rate": 0.00018295317034322083, "loss": 0.4622, "step": 38335 }, { "epoch": 0.973207599852775, "grad_norm": 0.35546875, "learning_rate": 0.00018292075378572716, "loss": 0.4209, "step": 38340 }, { "epoch": 0.9733345179017908, "grad_norm": 0.345703125, "learning_rate": 0.0001828883356128252, "loss": 0.4349, "step": 38345 }, { "epoch": 0.9734614359508066, "grad_norm": 0.34375, "learning_rate": 0.00018285591582610576, "loss": 0.486, "step": 38350 }, { "epoch": 0.9735883539998224, "grad_norm": 1.34375, "learning_rate": 0.0001828234944271596, "loss": 0.4408, "step": 38355 }, { "epoch": 0.973715272048838, "grad_norm": 0.34765625, "learning_rate": 0.0001827910714175776, "loss": 0.4212, "step": 38360 }, { "epoch": 0.9738421900978538, "grad_norm": 0.388671875, "learning_rate": 0.00018275864679895084, "loss": 0.4492, "step": 38365 }, { "epoch": 0.9739691081468695, "grad_norm": 0.345703125, "learning_rate": 0.0001827262205728703, "loss": 0.4207, "step": 38370 }, { "epoch": 0.9740960261958853, "grad_norm": 0.3359375, "learning_rate": 0.00018269379274092715, "loss": 0.4147, "step": 38375 }, { "epoch": 0.9742229442449011, "grad_norm": 0.3203125, "learning_rate": 0.0001826613633047126, "loss": 0.4075, "step": 38380 }, { "epoch": 0.9743498622939168, "grad_norm": 0.462890625, "learning_rate": 0.000182628932265818, "loss": 0.4385, "step": 38385 }, { "epoch": 0.9744767803429326, "grad_norm": 0.34765625, "learning_rate": 0.00018259649962583467, "loss": 0.4998, "step": 38390 }, { "epoch": 0.9746036983919483, "grad_norm": 0.330078125, "learning_rate": 0.0001825640653863541, "loss": 0.4589, "step": 38395 }, { "epoch": 0.974730616440964, "grad_norm": 0.369140625, "learning_rate": 0.00018253162954896782, "loss": 0.4554, "step": 38400 }, { "epoch": 0.9748575344899798, "grad_norm": 0.33984375, "learning_rate": 0.00018249919211526744, "loss": 0.4504, "step": 38405 }, { "epoch": 0.9749844525389956, "grad_norm": 0.31640625, "learning_rate": 0.00018246675308684462, "loss": 0.4461, "step": 38410 }, { "epoch": 0.9751113705880113, "grad_norm": 0.369140625, "learning_rate": 0.00018243431246529114, "loss": 0.4576, "step": 38415 }, { "epoch": 0.9752382886370271, "grad_norm": 0.330078125, "learning_rate": 0.0001824018702521989, "loss": 0.4333, "step": 38420 }, { "epoch": 0.9753652066860429, "grad_norm": 0.357421875, "learning_rate": 0.0001823694264491598, "loss": 0.4687, "step": 38425 }, { "epoch": 0.9754921247350585, "grad_norm": 0.318359375, "learning_rate": 0.00018233698105776582, "loss": 0.4349, "step": 38430 }, { "epoch": 0.9756190427840743, "grad_norm": 0.36328125, "learning_rate": 0.0001823045340796091, "loss": 0.4574, "step": 38435 }, { "epoch": 0.9757459608330901, "grad_norm": 0.345703125, "learning_rate": 0.00018227208551628172, "loss": 0.4369, "step": 38440 }, { "epoch": 0.9758728788821058, "grad_norm": 0.32421875, "learning_rate": 0.000182239635369376, "loss": 0.4375, "step": 38445 }, { "epoch": 0.9759997969311216, "grad_norm": 0.33984375, "learning_rate": 0.0001822071836404842, "loss": 0.4528, "step": 38450 }, { "epoch": 0.9761267149801374, "grad_norm": 0.345703125, "learning_rate": 0.00018217473033119868, "loss": 0.4335, "step": 38455 }, { "epoch": 0.976253633029153, "grad_norm": 0.34765625, "learning_rate": 0.00018214227544311197, "loss": 0.4768, "step": 38460 }, { "epoch": 0.9763805510781688, "grad_norm": 0.357421875, "learning_rate": 0.00018210981897781668, "loss": 0.4428, "step": 38465 }, { "epoch": 0.9765074691271846, "grad_norm": 0.326171875, "learning_rate": 0.00018207736093690528, "loss": 0.4666, "step": 38470 }, { "epoch": 0.9766343871762003, "grad_norm": 0.33203125, "learning_rate": 0.0001820449013219706, "loss": 0.4586, "step": 38475 }, { "epoch": 0.9767613052252161, "grad_norm": 0.35546875, "learning_rate": 0.0001820124401346054, "loss": 0.4472, "step": 38480 }, { "epoch": 0.9768882232742319, "grad_norm": 0.35546875, "learning_rate": 0.00018197997737640247, "loss": 0.4255, "step": 38485 }, { "epoch": 0.9770151413232476, "grad_norm": 0.330078125, "learning_rate": 0.00018194751304895479, "loss": 0.4413, "step": 38490 }, { "epoch": 0.9771420593722633, "grad_norm": 0.341796875, "learning_rate": 0.0001819150471538554, "loss": 0.4188, "step": 38495 }, { "epoch": 0.977268977421279, "grad_norm": 0.322265625, "learning_rate": 0.00018188257969269732, "loss": 0.4595, "step": 38500 }, { "epoch": 0.9773958954702948, "grad_norm": 0.361328125, "learning_rate": 0.0001818501106670738, "loss": 0.4405, "step": 38505 }, { "epoch": 0.9775228135193106, "grad_norm": 0.32421875, "learning_rate": 0.000181817640078578, "loss": 0.4223, "step": 38510 }, { "epoch": 0.9776497315683264, "grad_norm": 0.34375, "learning_rate": 0.00018178516792880328, "loss": 0.4169, "step": 38515 }, { "epoch": 0.9777766496173421, "grad_norm": 0.322265625, "learning_rate": 0.00018175269421934304, "loss": 0.4392, "step": 38520 }, { "epoch": 0.9779035676663579, "grad_norm": 0.341796875, "learning_rate": 0.00018172021895179074, "loss": 0.4528, "step": 38525 }, { "epoch": 0.9780304857153735, "grad_norm": 0.353515625, "learning_rate": 0.00018168774212773997, "loss": 0.4666, "step": 38530 }, { "epoch": 0.9781574037643893, "grad_norm": 0.328125, "learning_rate": 0.00018165526374878428, "loss": 0.4624, "step": 38535 }, { "epoch": 0.9782843218134051, "grad_norm": 0.337890625, "learning_rate": 0.0001816227838165174, "loss": 0.4236, "step": 38540 }, { "epoch": 0.9784112398624208, "grad_norm": 0.365234375, "learning_rate": 0.00018159030233253311, "loss": 0.441, "step": 38545 }, { "epoch": 0.9785381579114366, "grad_norm": 0.3515625, "learning_rate": 0.00018155781929842524, "loss": 0.4403, "step": 38550 }, { "epoch": 0.9786650759604524, "grad_norm": 0.3515625, "learning_rate": 0.00018152533471578784, "loss": 0.4587, "step": 38555 }, { "epoch": 0.978791994009468, "grad_norm": 0.328125, "learning_rate": 0.00018149284858621475, "loss": 0.4375, "step": 38560 }, { "epoch": 0.9789189120584838, "grad_norm": 0.337890625, "learning_rate": 0.00018146036091130015, "loss": 0.4331, "step": 38565 }, { "epoch": 0.9790458301074996, "grad_norm": 0.337890625, "learning_rate": 0.0001814278716926382, "loss": 0.4422, "step": 38570 }, { "epoch": 0.9791727481565153, "grad_norm": 0.3359375, "learning_rate": 0.00018139538093182305, "loss": 0.4488, "step": 38575 }, { "epoch": 0.9792996662055311, "grad_norm": 0.361328125, "learning_rate": 0.0001813628886304491, "loss": 0.4643, "step": 38580 }, { "epoch": 0.9794265842545469, "grad_norm": 0.3359375, "learning_rate": 0.00018133039479011068, "loss": 0.4635, "step": 38585 }, { "epoch": 0.9795535023035626, "grad_norm": 0.357421875, "learning_rate": 0.00018129789941240227, "loss": 0.4718, "step": 38590 }, { "epoch": 0.9796804203525783, "grad_norm": 0.337890625, "learning_rate": 0.00018126540249891842, "loss": 0.4424, "step": 38595 }, { "epoch": 0.9798073384015941, "grad_norm": 0.3359375, "learning_rate": 0.0001812329040512537, "loss": 0.4258, "step": 38600 }, { "epoch": 0.9799342564506098, "grad_norm": 0.369140625, "learning_rate": 0.00018120040407100288, "loss": 0.4749, "step": 38605 }, { "epoch": 0.9800611744996256, "grad_norm": 0.3203125, "learning_rate": 0.00018116790255976055, "loss": 0.4504, "step": 38610 }, { "epoch": 0.9801880925486414, "grad_norm": 0.349609375, "learning_rate": 0.00018113539951912174, "loss": 0.4248, "step": 38615 }, { "epoch": 0.9803150105976571, "grad_norm": 0.349609375, "learning_rate": 0.00018110289495068123, "loss": 0.4353, "step": 38620 }, { "epoch": 0.9804419286466728, "grad_norm": 0.337890625, "learning_rate": 0.0001810703888560341, "loss": 0.4297, "step": 38625 }, { "epoch": 0.9805688466956886, "grad_norm": 0.369140625, "learning_rate": 0.00018103788123677536, "loss": 0.4206, "step": 38630 }, { "epoch": 0.9806957647447043, "grad_norm": 0.357421875, "learning_rate": 0.00018100537209450016, "loss": 0.4653, "step": 38635 }, { "epoch": 0.9808226827937201, "grad_norm": 0.3359375, "learning_rate": 0.00018097286143080367, "loss": 0.4266, "step": 38640 }, { "epoch": 0.9809496008427359, "grad_norm": 0.345703125, "learning_rate": 0.0001809403492472812, "loss": 0.4528, "step": 38645 }, { "epoch": 0.9810765188917516, "grad_norm": 0.29296875, "learning_rate": 0.00018090783554552812, "loss": 0.4326, "step": 38650 }, { "epoch": 0.9812034369407674, "grad_norm": 0.3359375, "learning_rate": 0.0001808753203271399, "loss": 0.4435, "step": 38655 }, { "epoch": 0.981330354989783, "grad_norm": 0.33984375, "learning_rate": 0.00018084280359371196, "loss": 0.4366, "step": 38660 }, { "epoch": 0.9814572730387988, "grad_norm": 0.359375, "learning_rate": 0.00018081028534684, "loss": 0.451, "step": 38665 }, { "epoch": 0.9815841910878146, "grad_norm": 0.333984375, "learning_rate": 0.00018077776558811953, "loss": 0.4201, "step": 38670 }, { "epoch": 0.9817111091368304, "grad_norm": 0.33984375, "learning_rate": 0.0001807452443191464, "loss": 0.4856, "step": 38675 }, { "epoch": 0.9818380271858461, "grad_norm": 0.3359375, "learning_rate": 0.00018071272154151637, "loss": 0.4317, "step": 38680 }, { "epoch": 0.9819649452348619, "grad_norm": 0.326171875, "learning_rate": 0.0001806801972568254, "loss": 0.4165, "step": 38685 }, { "epoch": 0.9820918632838777, "grad_norm": 0.3359375, "learning_rate": 0.00018064767146666927, "loss": 0.4348, "step": 38690 }, { "epoch": 0.9822187813328933, "grad_norm": 0.341796875, "learning_rate": 0.00018061514417264416, "loss": 0.4397, "step": 38695 }, { "epoch": 0.9823456993819091, "grad_norm": 0.376953125, "learning_rate": 0.0001805826153763461, "loss": 0.4379, "step": 38700 }, { "epoch": 0.9824726174309248, "grad_norm": 0.3515625, "learning_rate": 0.00018055008507937125, "loss": 0.4205, "step": 38705 }, { "epoch": 0.9825995354799406, "grad_norm": 0.3359375, "learning_rate": 0.00018051755328331597, "loss": 0.4461, "step": 38710 }, { "epoch": 0.9827264535289564, "grad_norm": 0.34765625, "learning_rate": 0.0001804850199897765, "loss": 0.4516, "step": 38715 }, { "epoch": 0.9828533715779721, "grad_norm": 0.3828125, "learning_rate": 0.00018045248520034924, "loss": 0.414, "step": 38720 }, { "epoch": 0.9829802896269878, "grad_norm": 0.349609375, "learning_rate": 0.00018041994891663062, "loss": 0.4645, "step": 38725 }, { "epoch": 0.9831072076760036, "grad_norm": 0.337890625, "learning_rate": 0.00018038741114021728, "loss": 0.4333, "step": 38730 }, { "epoch": 0.9832341257250193, "grad_norm": 0.36328125, "learning_rate": 0.00018035487187270573, "loss": 0.4243, "step": 38735 }, { "epoch": 0.9833610437740351, "grad_norm": 0.341796875, "learning_rate": 0.0001803223311156927, "loss": 0.461, "step": 38740 }, { "epoch": 0.9834879618230509, "grad_norm": 0.34765625, "learning_rate": 0.00018028978887077497, "loss": 0.455, "step": 38745 }, { "epoch": 0.9836148798720666, "grad_norm": 0.349609375, "learning_rate": 0.0001802572451395494, "loss": 0.4309, "step": 38750 }, { "epoch": 0.9837417979210824, "grad_norm": 0.35546875, "learning_rate": 0.00018022469992361284, "loss": 0.4637, "step": 38755 }, { "epoch": 0.9838687159700981, "grad_norm": 0.349609375, "learning_rate": 0.00018019215322456235, "loss": 0.4593, "step": 38760 }, { "epoch": 0.9839956340191138, "grad_norm": 0.330078125, "learning_rate": 0.00018015960504399485, "loss": 0.4078, "step": 38765 }, { "epoch": 0.9841225520681296, "grad_norm": 0.365234375, "learning_rate": 0.00018012705538350755, "loss": 0.4463, "step": 38770 }, { "epoch": 0.9842494701171454, "grad_norm": 0.322265625, "learning_rate": 0.0001800945042446977, "loss": 0.4442, "step": 38775 }, { "epoch": 0.9843763881661611, "grad_norm": 0.333984375, "learning_rate": 0.00018006195162916248, "loss": 0.4117, "step": 38780 }, { "epoch": 0.9845033062151769, "grad_norm": 0.365234375, "learning_rate": 0.00018002939753849925, "loss": 0.4559, "step": 38785 }, { "epoch": 0.9846302242641927, "grad_norm": 0.349609375, "learning_rate": 0.00017999684197430546, "loss": 0.4508, "step": 38790 }, { "epoch": 0.9847571423132083, "grad_norm": 0.337890625, "learning_rate": 0.0001799642849381786, "loss": 0.4453, "step": 38795 }, { "epoch": 0.9848840603622241, "grad_norm": 0.37109375, "learning_rate": 0.00017993172643171617, "loss": 0.4577, "step": 38800 }, { "epoch": 0.9850109784112399, "grad_norm": 0.337890625, "learning_rate": 0.0001798991664565159, "loss": 0.4486, "step": 38805 }, { "epoch": 0.9851378964602556, "grad_norm": 0.349609375, "learning_rate": 0.0001798666050141754, "loss": 0.4355, "step": 38810 }, { "epoch": 0.9852648145092714, "grad_norm": 0.34765625, "learning_rate": 0.00017983404210629255, "loss": 0.4226, "step": 38815 }, { "epoch": 0.9853917325582872, "grad_norm": 0.369140625, "learning_rate": 0.00017980147773446505, "loss": 0.4463, "step": 38820 }, { "epoch": 0.9855186506073028, "grad_norm": 0.35546875, "learning_rate": 0.000179768911900291, "loss": 0.4497, "step": 38825 }, { "epoch": 0.9856455686563186, "grad_norm": 0.33203125, "learning_rate": 0.00017973634460536822, "loss": 0.4313, "step": 38830 }, { "epoch": 0.9857724867053343, "grad_norm": 0.3671875, "learning_rate": 0.0001797037758512949, "loss": 0.4575, "step": 38835 }, { "epoch": 0.9858994047543501, "grad_norm": 0.318359375, "learning_rate": 0.00017967120563966915, "loss": 0.4333, "step": 38840 }, { "epoch": 0.9860263228033659, "grad_norm": 0.365234375, "learning_rate": 0.00017963863397208913, "loss": 0.4806, "step": 38845 }, { "epoch": 0.9861532408523817, "grad_norm": 0.33984375, "learning_rate": 0.00017960606085015314, "loss": 0.4624, "step": 38850 }, { "epoch": 0.9862801589013974, "grad_norm": 0.3671875, "learning_rate": 0.00017957348627545958, "loss": 0.4752, "step": 38855 }, { "epoch": 0.9864070769504131, "grad_norm": 0.345703125, "learning_rate": 0.00017954091024960676, "loss": 0.4063, "step": 38860 }, { "epoch": 0.9865339949994288, "grad_norm": 0.341796875, "learning_rate": 0.00017950833277419325, "loss": 0.4345, "step": 38865 }, { "epoch": 0.9866609130484446, "grad_norm": 0.3359375, "learning_rate": 0.00017947575385081764, "loss": 0.4463, "step": 38870 }, { "epoch": 0.9867878310974604, "grad_norm": 0.34765625, "learning_rate": 0.00017944317348107854, "loss": 0.4528, "step": 38875 }, { "epoch": 0.9869147491464761, "grad_norm": 0.34375, "learning_rate": 0.00017941059166657464, "loss": 0.4761, "step": 38880 }, { "epoch": 0.9870416671954919, "grad_norm": 0.34375, "learning_rate": 0.0001793780084089047, "loss": 0.4526, "step": 38885 }, { "epoch": 0.9871685852445076, "grad_norm": 0.318359375, "learning_rate": 0.00017934542370966764, "loss": 0.4527, "step": 38890 }, { "epoch": 0.9872955032935233, "grad_norm": 0.345703125, "learning_rate": 0.00017931283757046225, "loss": 0.4605, "step": 38895 }, { "epoch": 0.9874224213425391, "grad_norm": 0.337890625, "learning_rate": 0.00017928024999288764, "loss": 0.4494, "step": 38900 }, { "epoch": 0.9875493393915549, "grad_norm": 0.322265625, "learning_rate": 0.00017924766097854281, "loss": 0.4229, "step": 38905 }, { "epoch": 0.9876762574405706, "grad_norm": 0.357421875, "learning_rate": 0.0001792150705290269, "loss": 0.4545, "step": 38910 }, { "epoch": 0.9878031754895864, "grad_norm": 0.34765625, "learning_rate": 0.00017918247864593916, "loss": 0.4511, "step": 38915 }, { "epoch": 0.9879300935386022, "grad_norm": 0.353515625, "learning_rate": 0.00017914988533087881, "loss": 0.4542, "step": 38920 }, { "epoch": 0.9880570115876178, "grad_norm": 0.34375, "learning_rate": 0.00017911729058544515, "loss": 0.4383, "step": 38925 }, { "epoch": 0.9881839296366336, "grad_norm": 0.333984375, "learning_rate": 0.0001790846944112377, "loss": 0.434, "step": 38930 }, { "epoch": 0.9883108476856494, "grad_norm": 0.369140625, "learning_rate": 0.00017905209680985583, "loss": 0.4559, "step": 38935 }, { "epoch": 0.9884377657346651, "grad_norm": 0.353515625, "learning_rate": 0.00017901949778289917, "loss": 0.4503, "step": 38940 }, { "epoch": 0.9885646837836809, "grad_norm": 0.34375, "learning_rate": 0.0001789868973319673, "loss": 0.4405, "step": 38945 }, { "epoch": 0.9886916018326967, "grad_norm": 0.35546875, "learning_rate": 0.00017895429545865992, "loss": 0.4269, "step": 38950 }, { "epoch": 0.9888185198817124, "grad_norm": 0.3359375, "learning_rate": 0.0001789216921645768, "loss": 0.4123, "step": 38955 }, { "epoch": 0.9889454379307281, "grad_norm": 0.353515625, "learning_rate": 0.00017888908745131778, "loss": 0.4405, "step": 38960 }, { "epoch": 0.9890723559797439, "grad_norm": 0.359375, "learning_rate": 0.00017885648132048273, "loss": 0.4297, "step": 38965 }, { "epoch": 0.9891992740287596, "grad_norm": 0.3515625, "learning_rate": 0.00017882387377367168, "loss": 0.4724, "step": 38970 }, { "epoch": 0.9893261920777754, "grad_norm": 0.349609375, "learning_rate": 0.00017879126481248458, "loss": 0.4404, "step": 38975 }, { "epoch": 0.9894531101267912, "grad_norm": 0.341796875, "learning_rate": 0.0001787586544385216, "loss": 0.4009, "step": 38980 }, { "epoch": 0.9895800281758069, "grad_norm": 0.33984375, "learning_rate": 0.0001787260426533829, "loss": 0.4165, "step": 38985 }, { "epoch": 0.9897069462248226, "grad_norm": 0.3359375, "learning_rate": 0.0001786934294586687, "loss": 0.4327, "step": 38990 }, { "epoch": 0.9898338642738383, "grad_norm": 0.326171875, "learning_rate": 0.00017866081485597938, "loss": 0.4556, "step": 38995 }, { "epoch": 0.9899607823228541, "grad_norm": 0.380859375, "learning_rate": 0.0001786281988469153, "loss": 0.4792, "step": 39000 }, { "epoch": 0.9900877003718699, "grad_norm": 0.330078125, "learning_rate": 0.00017859558143307688, "loss": 0.4487, "step": 39005 }, { "epoch": 0.9902146184208857, "grad_norm": 0.349609375, "learning_rate": 0.00017856296261606472, "loss": 0.4317, "step": 39010 }, { "epoch": 0.9903415364699014, "grad_norm": 0.32421875, "learning_rate": 0.0001785303423974793, "loss": 0.4255, "step": 39015 }, { "epoch": 0.9904684545189172, "grad_norm": 0.330078125, "learning_rate": 0.00017849772077892137, "loss": 0.4243, "step": 39020 }, { "epoch": 0.9905953725679328, "grad_norm": 0.326171875, "learning_rate": 0.00017846509776199164, "loss": 0.4424, "step": 39025 }, { "epoch": 0.9907222906169486, "grad_norm": 0.359375, "learning_rate": 0.00017843247334829086, "loss": 0.4637, "step": 39030 }, { "epoch": 0.9908492086659644, "grad_norm": 0.32421875, "learning_rate": 0.00017839984753942, "loss": 0.4497, "step": 39035 }, { "epoch": 0.9909761267149801, "grad_norm": 0.357421875, "learning_rate": 0.0001783672203369799, "loss": 0.4584, "step": 39040 }, { "epoch": 0.9911030447639959, "grad_norm": 0.328125, "learning_rate": 0.0001783345917425716, "loss": 0.4295, "step": 39045 }, { "epoch": 0.9912299628130117, "grad_norm": 0.3515625, "learning_rate": 0.00017830196175779614, "loss": 0.4891, "step": 39050 }, { "epoch": 0.9913568808620273, "grad_norm": 0.345703125, "learning_rate": 0.00017826933038425475, "loss": 0.4283, "step": 39055 }, { "epoch": 0.9914837989110431, "grad_norm": 0.3515625, "learning_rate": 0.00017823669762354858, "loss": 0.4623, "step": 39060 }, { "epoch": 0.9916107169600589, "grad_norm": 0.34765625, "learning_rate": 0.00017820406347727883, "loss": 0.4713, "step": 39065 }, { "epoch": 0.9917376350090746, "grad_norm": 0.34765625, "learning_rate": 0.000178171427947047, "loss": 0.4603, "step": 39070 }, { "epoch": 0.9918645530580904, "grad_norm": 0.359375, "learning_rate": 0.00017813879103445438, "loss": 0.4523, "step": 39075 }, { "epoch": 0.9919914711071062, "grad_norm": 0.36328125, "learning_rate": 0.00017810615274110249, "loss": 0.4741, "step": 39080 }, { "epoch": 0.9921183891561219, "grad_norm": 0.3359375, "learning_rate": 0.00017807351306859287, "loss": 0.422, "step": 39085 }, { "epoch": 0.9922453072051376, "grad_norm": 0.326171875, "learning_rate": 0.0001780408720185272, "loss": 0.4422, "step": 39090 }, { "epoch": 0.9923722252541534, "grad_norm": 0.373046875, "learning_rate": 0.0001780082295925071, "loss": 0.4486, "step": 39095 }, { "epoch": 0.9924991433031691, "grad_norm": 0.31640625, "learning_rate": 0.0001779755857921343, "loss": 0.4426, "step": 39100 }, { "epoch": 0.9926260613521849, "grad_norm": 0.32421875, "learning_rate": 0.00017794294061901063, "loss": 0.4452, "step": 39105 }, { "epoch": 0.9927529794012007, "grad_norm": 0.3203125, "learning_rate": 0.00017791029407473804, "loss": 0.4122, "step": 39110 }, { "epoch": 0.9928798974502164, "grad_norm": 0.35546875, "learning_rate": 0.0001778776461609184, "loss": 0.4713, "step": 39115 }, { "epoch": 0.9930068154992322, "grad_norm": 0.345703125, "learning_rate": 0.0001778449968791538, "loss": 0.4123, "step": 39120 }, { "epoch": 0.9931337335482479, "grad_norm": 0.369140625, "learning_rate": 0.00017781234623104629, "loss": 0.4587, "step": 39125 }, { "epoch": 0.9932606515972636, "grad_norm": 0.32421875, "learning_rate": 0.00017777969421819804, "loss": 0.4027, "step": 39130 }, { "epoch": 0.9933875696462794, "grad_norm": 0.337890625, "learning_rate": 0.00017774704084221125, "loss": 0.3925, "step": 39135 }, { "epoch": 0.9935144876952952, "grad_norm": 0.36328125, "learning_rate": 0.00017771438610468825, "loss": 0.452, "step": 39140 }, { "epoch": 0.9936414057443109, "grad_norm": 0.33203125, "learning_rate": 0.00017768173000723127, "loss": 0.4774, "step": 39145 }, { "epoch": 0.9937683237933267, "grad_norm": 0.328125, "learning_rate": 0.00017764907255144287, "loss": 0.4492, "step": 39150 }, { "epoch": 0.9938952418423423, "grad_norm": 0.333984375, "learning_rate": 0.00017761641373892551, "loss": 0.431, "step": 39155 }, { "epoch": 0.9940221598913581, "grad_norm": 0.34765625, "learning_rate": 0.00017758375357128175, "loss": 0.4355, "step": 39160 }, { "epoch": 0.9941490779403739, "grad_norm": 0.359375, "learning_rate": 0.00017755109205011416, "loss": 0.4428, "step": 39165 }, { "epoch": 0.9942759959893896, "grad_norm": 0.337890625, "learning_rate": 0.00017751842917702545, "loss": 0.4486, "step": 39170 }, { "epoch": 0.9944029140384054, "grad_norm": 0.359375, "learning_rate": 0.00017748576495361845, "loss": 0.4593, "step": 39175 }, { "epoch": 0.9945298320874212, "grad_norm": 0.33984375, "learning_rate": 0.00017745309938149581, "loss": 0.4159, "step": 39180 }, { "epoch": 0.994656750136437, "grad_norm": 0.3515625, "learning_rate": 0.00017742043246226057, "loss": 0.4537, "step": 39185 }, { "epoch": 0.9947836681854526, "grad_norm": 0.3515625, "learning_rate": 0.0001773877641975156, "loss": 0.4677, "step": 39190 }, { "epoch": 0.9949105862344684, "grad_norm": 0.34765625, "learning_rate": 0.000177355094588864, "loss": 0.4538, "step": 39195 }, { "epoch": 0.9950375042834841, "grad_norm": 0.369140625, "learning_rate": 0.00017732242363790876, "loss": 0.4511, "step": 39200 }, { "epoch": 0.9951644223324999, "grad_norm": 0.357421875, "learning_rate": 0.0001772897513462531, "loss": 0.4467, "step": 39205 }, { "epoch": 0.9952913403815157, "grad_norm": 0.3671875, "learning_rate": 0.00017725707771550024, "loss": 0.4607, "step": 39210 }, { "epoch": 0.9954182584305314, "grad_norm": 0.37890625, "learning_rate": 0.0001772244027472534, "loss": 0.4488, "step": 39215 }, { "epoch": 0.9955451764795472, "grad_norm": 0.35546875, "learning_rate": 0.00017719172644311597, "loss": 0.4378, "step": 39220 }, { "epoch": 0.9956720945285629, "grad_norm": 0.353515625, "learning_rate": 0.00017715904880469138, "loss": 0.3987, "step": 39225 }, { "epoch": 0.9957990125775786, "grad_norm": 0.341796875, "learning_rate": 0.000177126369833583, "loss": 0.4211, "step": 39230 }, { "epoch": 0.9959259306265944, "grad_norm": 0.375, "learning_rate": 0.00017709368953139454, "loss": 0.4701, "step": 39235 }, { "epoch": 0.9960528486756102, "grad_norm": 0.36328125, "learning_rate": 0.00017706100789972949, "loss": 0.4628, "step": 39240 }, { "epoch": 0.9961797667246259, "grad_norm": 0.3359375, "learning_rate": 0.00017702832494019153, "loss": 0.4545, "step": 39245 }, { "epoch": 0.9963066847736417, "grad_norm": 0.33203125, "learning_rate": 0.00017699564065438452, "loss": 0.4327, "step": 39250 }, { "epoch": 0.9964336028226574, "grad_norm": 0.359375, "learning_rate": 0.00017696295504391214, "loss": 0.4578, "step": 39255 }, { "epoch": 0.9965605208716731, "grad_norm": 0.3203125, "learning_rate": 0.00017693026811037827, "loss": 0.4528, "step": 39260 }, { "epoch": 0.9966874389206889, "grad_norm": 0.314453125, "learning_rate": 0.00017689757985538688, "loss": 0.4085, "step": 39265 }, { "epoch": 0.9968143569697047, "grad_norm": 0.337890625, "learning_rate": 0.00017686489028054196, "loss": 0.4833, "step": 39270 }, { "epoch": 0.9969412750187204, "grad_norm": 0.34765625, "learning_rate": 0.00017683219938744758, "loss": 0.4256, "step": 39275 }, { "epoch": 0.9970681930677362, "grad_norm": 0.345703125, "learning_rate": 0.0001767995071777079, "loss": 0.421, "step": 39280 }, { "epoch": 0.997195111116752, "grad_norm": 0.353515625, "learning_rate": 0.00017676681365292704, "loss": 0.467, "step": 39285 }, { "epoch": 0.9973220291657676, "grad_norm": 0.330078125, "learning_rate": 0.0001767341188147093, "loss": 0.4348, "step": 39290 }, { "epoch": 0.9974489472147834, "grad_norm": 0.33984375, "learning_rate": 0.00017670142266465902, "loss": 0.4351, "step": 39295 }, { "epoch": 0.9975758652637992, "grad_norm": 0.341796875, "learning_rate": 0.00017666872520438054, "loss": 0.456, "step": 39300 }, { "epoch": 0.9977027833128149, "grad_norm": 0.33203125, "learning_rate": 0.00017663602643547833, "loss": 0.4533, "step": 39305 }, { "epoch": 0.9978297013618307, "grad_norm": 0.34375, "learning_rate": 0.00017660332635955692, "loss": 0.4175, "step": 39310 }, { "epoch": 0.9979566194108465, "grad_norm": 0.349609375, "learning_rate": 0.0001765706249782209, "loss": 0.4441, "step": 39315 }, { "epoch": 0.9980835374598621, "grad_norm": 0.359375, "learning_rate": 0.0001765379222930749, "loss": 0.4373, "step": 39320 }, { "epoch": 0.9982104555088779, "grad_norm": 0.357421875, "learning_rate": 0.00017650521830572363, "loss": 0.4561, "step": 39325 }, { "epoch": 0.9983373735578936, "grad_norm": 0.31640625, "learning_rate": 0.00017647251301777185, "loss": 0.4181, "step": 39330 }, { "epoch": 0.9984642916069094, "grad_norm": 0.34375, "learning_rate": 0.00017643980643082435, "loss": 0.447, "step": 39335 }, { "epoch": 0.9985912096559252, "grad_norm": 0.326171875, "learning_rate": 0.00017640709854648614, "loss": 0.431, "step": 39340 }, { "epoch": 0.998718127704941, "grad_norm": 0.35546875, "learning_rate": 0.0001763743893663621, "loss": 0.4532, "step": 39345 }, { "epoch": 0.9988450457539567, "grad_norm": 0.33984375, "learning_rate": 0.00017634167889205726, "loss": 0.4541, "step": 39350 }, { "epoch": 0.9989719638029724, "grad_norm": 0.314453125, "learning_rate": 0.00017630896712517676, "loss": 0.4212, "step": 39355 }, { "epoch": 0.9990988818519881, "grad_norm": 0.33984375, "learning_rate": 0.00017627625406732575, "loss": 0.4134, "step": 39360 }, { "epoch": 0.9992257999010039, "grad_norm": 0.33203125, "learning_rate": 0.00017624353972010937, "loss": 0.4844, "step": 39365 }, { "epoch": 0.9993527179500197, "grad_norm": 0.349609375, "learning_rate": 0.00017621082408513292, "loss": 0.4317, "step": 39370 }, { "epoch": 0.9994796359990354, "grad_norm": 0.56640625, "learning_rate": 0.00017617810716400185, "loss": 0.4365, "step": 39375 }, { "epoch": 0.9996065540480512, "grad_norm": 19.125, "learning_rate": 0.00017614538895832144, "loss": 0.4796, "step": 39380 }, { "epoch": 0.999733472097067, "grad_norm": 0.31640625, "learning_rate": 0.00017611266946969726, "loss": 0.4528, "step": 39385 }, { "epoch": 0.9998603901460826, "grad_norm": 0.373046875, "learning_rate": 0.00017607994869973473, "loss": 0.4305, "step": 39390 }, { "epoch": 0.9999873081950984, "grad_norm": 0.30078125, "learning_rate": 0.00017604722665003956, "loss": 0.4177, "step": 39395 }, { "epoch": 0.9999873081950984, "eval_loss": 0.4502251446247101, "eval_runtime": 33.158, "eval_samples_per_second": 4.343, "eval_steps_per_second": 4.343, "step": 39395 }, { "epoch": 1.0001142262441143, "grad_norm": 0.33984375, "learning_rate": 0.0001760145033222173, "loss": 0.4473, "step": 39400 }, { "epoch": 1.0002411442931298, "grad_norm": 0.3359375, "learning_rate": 0.00017598177871787373, "loss": 0.4204, "step": 39405 }, { "epoch": 1.0003680623421456, "grad_norm": 0.314453125, "learning_rate": 0.00017594905283861467, "loss": 0.4103, "step": 39410 }, { "epoch": 1.0004949803911614, "grad_norm": 0.3125, "learning_rate": 0.00017591632568604592, "loss": 0.3791, "step": 39415 }, { "epoch": 1.0006218984401771, "grad_norm": 0.32421875, "learning_rate": 0.0001758835972617734, "loss": 0.423, "step": 39420 }, { "epoch": 1.000748816489193, "grad_norm": 0.341796875, "learning_rate": 0.00017585086756740305, "loss": 0.4404, "step": 39425 }, { "epoch": 1.0008757345382087, "grad_norm": 0.34375, "learning_rate": 0.00017581813660454091, "loss": 0.3983, "step": 39430 }, { "epoch": 1.0010026525872244, "grad_norm": 0.322265625, "learning_rate": 0.00017578540437479306, "loss": 0.4164, "step": 39435 }, { "epoch": 1.0011295706362402, "grad_norm": 0.33203125, "learning_rate": 0.00017575267087976577, "loss": 0.3856, "step": 39440 }, { "epoch": 1.001256488685256, "grad_norm": 0.34375, "learning_rate": 0.00017571993612106515, "loss": 0.4138, "step": 39445 }, { "epoch": 1.0013834067342717, "grad_norm": 0.3515625, "learning_rate": 0.0001756872001002975, "loss": 0.4455, "step": 39450 }, { "epoch": 1.0015103247832875, "grad_norm": 0.337890625, "learning_rate": 0.0001756544628190692, "loss": 0.4328, "step": 39455 }, { "epoch": 1.0016372428323033, "grad_norm": 0.341796875, "learning_rate": 0.00017562172427898655, "loss": 0.4449, "step": 39460 }, { "epoch": 1.001764160881319, "grad_norm": 0.337890625, "learning_rate": 0.00017558898448165618, "loss": 0.4409, "step": 39465 }, { "epoch": 1.0018910789303346, "grad_norm": 0.35546875, "learning_rate": 0.00017555624342868452, "loss": 0.4306, "step": 39470 }, { "epoch": 1.0020179969793503, "grad_norm": 0.3671875, "learning_rate": 0.0001755235011216781, "loss": 0.4253, "step": 39475 }, { "epoch": 1.0021449150283661, "grad_norm": 0.33984375, "learning_rate": 0.00017549075756224375, "loss": 0.3894, "step": 39480 }, { "epoch": 1.0022718330773819, "grad_norm": 0.32421875, "learning_rate": 0.00017545801275198802, "loss": 0.4183, "step": 39485 }, { "epoch": 1.0023987511263976, "grad_norm": 0.328125, "learning_rate": 0.00017542526669251778, "loss": 0.4046, "step": 39490 }, { "epoch": 1.0025256691754134, "grad_norm": 0.318359375, "learning_rate": 0.00017539251938543978, "loss": 0.398, "step": 39495 }, { "epoch": 1.0026525872244292, "grad_norm": 0.34765625, "learning_rate": 0.00017535977083236103, "loss": 0.4404, "step": 39500 }, { "epoch": 1.002779505273445, "grad_norm": 0.337890625, "learning_rate": 0.0001753270210348884, "loss": 0.421, "step": 39505 }, { "epoch": 1.0029064233224607, "grad_norm": 0.34375, "learning_rate": 0.00017529426999462891, "loss": 0.4112, "step": 39510 }, { "epoch": 1.0030333413714765, "grad_norm": 0.34375, "learning_rate": 0.00017526151771318972, "loss": 0.4033, "step": 39515 }, { "epoch": 1.0031602594204923, "grad_norm": 0.3515625, "learning_rate": 0.0001752287641921779, "loss": 0.4432, "step": 39520 }, { "epoch": 1.003287177469508, "grad_norm": 0.369140625, "learning_rate": 0.00017519600943320063, "loss": 0.4097, "step": 39525 }, { "epoch": 1.0034140955185238, "grad_norm": 0.337890625, "learning_rate": 0.00017516325343786525, "loss": 0.4156, "step": 39530 }, { "epoch": 1.0035410135675393, "grad_norm": 0.318359375, "learning_rate": 0.0001751304962077791, "loss": 0.4174, "step": 39535 }, { "epoch": 1.003667931616555, "grad_norm": 0.384765625, "learning_rate": 0.0001750977377445494, "loss": 0.412, "step": 39540 }, { "epoch": 1.0037948496655709, "grad_norm": 0.318359375, "learning_rate": 0.0001750649780497838, "loss": 0.4254, "step": 39545 }, { "epoch": 1.0039217677145866, "grad_norm": 0.35546875, "learning_rate": 0.00017503221712508965, "loss": 0.3983, "step": 39550 }, { "epoch": 1.0040486857636024, "grad_norm": 0.33984375, "learning_rate": 0.00017499945497207456, "loss": 0.3987, "step": 39555 }, { "epoch": 1.0041756038126182, "grad_norm": 0.34375, "learning_rate": 0.00017496669159234622, "loss": 0.411, "step": 39560 }, { "epoch": 1.004302521861634, "grad_norm": 0.345703125, "learning_rate": 0.00017493392698751228, "loss": 0.4374, "step": 39565 }, { "epoch": 1.0044294399106497, "grad_norm": 0.345703125, "learning_rate": 0.00017490116115918047, "loss": 0.3993, "step": 39570 }, { "epoch": 1.0045563579596655, "grad_norm": 0.3125, "learning_rate": 0.0001748683941089586, "loss": 0.4262, "step": 39575 }, { "epoch": 1.0046832760086812, "grad_norm": 0.365234375, "learning_rate": 0.00017483562583845455, "loss": 0.4032, "step": 39580 }, { "epoch": 1.004810194057697, "grad_norm": 0.330078125, "learning_rate": 0.00017480285634927618, "loss": 0.4168, "step": 39585 }, { "epoch": 1.0049371121067128, "grad_norm": 0.337890625, "learning_rate": 0.00017477008564303155, "loss": 0.381, "step": 39590 }, { "epoch": 1.0050640301557285, "grad_norm": 0.33203125, "learning_rate": 0.00017473731372132868, "loss": 0.4165, "step": 39595 }, { "epoch": 1.0051909482047443, "grad_norm": 0.337890625, "learning_rate": 0.0001747045405857757, "loss": 0.4506, "step": 39600 }, { "epoch": 1.0053178662537599, "grad_norm": 0.34375, "learning_rate": 0.00017467176623798075, "loss": 0.3994, "step": 39605 }, { "epoch": 1.0054447843027756, "grad_norm": 0.34375, "learning_rate": 0.00017463899067955206, "loss": 0.4235, "step": 39610 }, { "epoch": 1.0055717023517914, "grad_norm": 0.337890625, "learning_rate": 0.00017460621391209792, "loss": 0.4218, "step": 39615 }, { "epoch": 1.0056986204008072, "grad_norm": 0.3515625, "learning_rate": 0.0001745734359372266, "loss": 0.4336, "step": 39620 }, { "epoch": 1.005825538449823, "grad_norm": 0.365234375, "learning_rate": 0.00017454065675654665, "loss": 0.4397, "step": 39625 }, { "epoch": 1.0059524564988387, "grad_norm": 0.3359375, "learning_rate": 0.00017450787637166644, "loss": 0.3866, "step": 39630 }, { "epoch": 1.0060793745478545, "grad_norm": 0.349609375, "learning_rate": 0.00017447509478419444, "loss": 0.4317, "step": 39635 }, { "epoch": 1.0062062925968702, "grad_norm": 0.357421875, "learning_rate": 0.00017444231199573935, "loss": 0.4245, "step": 39640 }, { "epoch": 1.006333210645886, "grad_norm": 0.34765625, "learning_rate": 0.0001744095280079097, "loss": 0.4393, "step": 39645 }, { "epoch": 1.0064601286949018, "grad_norm": 0.349609375, "learning_rate": 0.00017437674282231422, "loss": 0.4257, "step": 39650 }, { "epoch": 1.0065870467439175, "grad_norm": 0.3515625, "learning_rate": 0.00017434395644056174, "loss": 0.442, "step": 39655 }, { "epoch": 1.0067139647929333, "grad_norm": 0.3671875, "learning_rate": 0.00017431116886426103, "loss": 0.4142, "step": 39660 }, { "epoch": 1.006840882841949, "grad_norm": 0.3359375, "learning_rate": 0.00017427838009502087, "loss": 0.44, "step": 39665 }, { "epoch": 1.0069678008909646, "grad_norm": 0.345703125, "learning_rate": 0.00017424559013445032, "loss": 0.4433, "step": 39670 }, { "epoch": 1.0070947189399804, "grad_norm": 0.337890625, "learning_rate": 0.0001742127989841583, "loss": 0.4249, "step": 39675 }, { "epoch": 1.0072216369889961, "grad_norm": 0.36328125, "learning_rate": 0.0001741800066457539, "loss": 0.4225, "step": 39680 }, { "epoch": 1.007348555038012, "grad_norm": 0.3359375, "learning_rate": 0.00017414721312084618, "loss": 0.4257, "step": 39685 }, { "epoch": 1.0074754730870277, "grad_norm": 0.359375, "learning_rate": 0.00017411441841104434, "loss": 0.4241, "step": 39690 }, { "epoch": 1.0076023911360434, "grad_norm": 0.34375, "learning_rate": 0.0001740816225179576, "loss": 0.434, "step": 39695 }, { "epoch": 1.0077293091850592, "grad_norm": 0.3203125, "learning_rate": 0.00017404882544319524, "loss": 0.4035, "step": 39700 }, { "epoch": 1.007856227234075, "grad_norm": 0.33203125, "learning_rate": 0.0001740160271883666, "loss": 0.4009, "step": 39705 }, { "epoch": 1.0079831452830907, "grad_norm": 0.341796875, "learning_rate": 0.00017398322775508102, "loss": 0.4396, "step": 39710 }, { "epoch": 1.0081100633321065, "grad_norm": 0.34375, "learning_rate": 0.0001739504271449481, "loss": 0.4405, "step": 39715 }, { "epoch": 1.0082369813811223, "grad_norm": 0.35546875, "learning_rate": 0.00017391762535957716, "loss": 0.4229, "step": 39720 }, { "epoch": 1.008363899430138, "grad_norm": 0.3515625, "learning_rate": 0.0001738848224005779, "loss": 0.4302, "step": 39725 }, { "epoch": 1.0084908174791538, "grad_norm": 0.318359375, "learning_rate": 0.00017385201826955997, "loss": 0.4171, "step": 39730 }, { "epoch": 1.0086177355281694, "grad_norm": 0.369140625, "learning_rate": 0.00017381921296813298, "loss": 0.4219, "step": 39735 }, { "epoch": 1.0087446535771851, "grad_norm": 0.341796875, "learning_rate": 0.0001737864064979067, "loss": 0.4101, "step": 39740 }, { "epoch": 1.008871571626201, "grad_norm": 0.35546875, "learning_rate": 0.00017375359886049092, "loss": 0.4188, "step": 39745 }, { "epoch": 1.0089984896752167, "grad_norm": 0.359375, "learning_rate": 0.00017372079005749548, "loss": 0.4102, "step": 39750 }, { "epoch": 1.0091254077242324, "grad_norm": 0.326171875, "learning_rate": 0.00017368798009053036, "loss": 0.4129, "step": 39755 }, { "epoch": 1.0092523257732482, "grad_norm": 0.28515625, "learning_rate": 0.00017365516896120548, "loss": 0.3942, "step": 39760 }, { "epoch": 1.009379243822264, "grad_norm": 0.322265625, "learning_rate": 0.0001736223566711309, "loss": 0.4308, "step": 39765 }, { "epoch": 1.0095061618712797, "grad_norm": 0.361328125, "learning_rate": 0.00017358954322191665, "loss": 0.4137, "step": 39770 }, { "epoch": 1.0096330799202955, "grad_norm": 0.33984375, "learning_rate": 0.00017355672861517295, "loss": 0.4276, "step": 39775 }, { "epoch": 1.0097599979693113, "grad_norm": 0.298828125, "learning_rate": 0.0001735239128525099, "loss": 0.3959, "step": 39780 }, { "epoch": 1.009886916018327, "grad_norm": 0.3515625, "learning_rate": 0.00017349109593553787, "loss": 0.408, "step": 39785 }, { "epoch": 1.0100138340673428, "grad_norm": 0.361328125, "learning_rate": 0.0001734582778658671, "loss": 0.4348, "step": 39790 }, { "epoch": 1.0101407521163586, "grad_norm": 0.337890625, "learning_rate": 0.000173425458645108, "loss": 0.4419, "step": 39795 }, { "epoch": 1.010267670165374, "grad_norm": 0.359375, "learning_rate": 0.00017339263827487094, "loss": 0.418, "step": 39800 }, { "epoch": 1.0103945882143899, "grad_norm": 0.361328125, "learning_rate": 0.00017335981675676649, "loss": 0.4021, "step": 39805 }, { "epoch": 1.0105215062634056, "grad_norm": 0.34765625, "learning_rate": 0.0001733269940924051, "loss": 0.4095, "step": 39810 }, { "epoch": 1.0106484243124214, "grad_norm": 0.33984375, "learning_rate": 0.0001732941702833974, "loss": 0.4146, "step": 39815 }, { "epoch": 1.0107753423614372, "grad_norm": 0.375, "learning_rate": 0.00017326134533135403, "loss": 0.4501, "step": 39820 }, { "epoch": 1.010902260410453, "grad_norm": 0.365234375, "learning_rate": 0.00017322851923788578, "loss": 0.4628, "step": 39825 }, { "epoch": 1.0110291784594687, "grad_norm": 0.341796875, "learning_rate": 0.0001731956920046033, "loss": 0.3981, "step": 39830 }, { "epoch": 1.0111560965084845, "grad_norm": 0.36328125, "learning_rate": 0.0001731628636331174, "loss": 0.4107, "step": 39835 }, { "epoch": 1.0112830145575002, "grad_norm": 0.337890625, "learning_rate": 0.00017313003412503908, "loss": 0.4095, "step": 39840 }, { "epoch": 1.011409932606516, "grad_norm": 0.3671875, "learning_rate": 0.00017309720348197916, "loss": 0.4372, "step": 39845 }, { "epoch": 1.0115368506555318, "grad_norm": 0.345703125, "learning_rate": 0.00017306437170554867, "loss": 0.4181, "step": 39850 }, { "epoch": 1.0116637687045476, "grad_norm": 0.34375, "learning_rate": 0.00017303153879735868, "loss": 0.4093, "step": 39855 }, { "epoch": 1.0117906867535633, "grad_norm": 0.353515625, "learning_rate": 0.00017299870475902024, "loss": 0.4134, "step": 39860 }, { "epoch": 1.011917604802579, "grad_norm": 0.33984375, "learning_rate": 0.00017296586959214454, "loss": 0.4246, "step": 39865 }, { "epoch": 1.0120445228515946, "grad_norm": 0.32421875, "learning_rate": 0.00017293303329834274, "loss": 0.4112, "step": 39870 }, { "epoch": 1.0121714409006104, "grad_norm": 0.33984375, "learning_rate": 0.0001729001958792261, "loss": 0.4131, "step": 39875 }, { "epoch": 1.0122983589496262, "grad_norm": 0.380859375, "learning_rate": 0.000172867357336406, "loss": 0.4479, "step": 39880 }, { "epoch": 1.012425276998642, "grad_norm": 0.330078125, "learning_rate": 0.0001728345176714938, "loss": 0.4259, "step": 39885 }, { "epoch": 1.0125521950476577, "grad_norm": 0.330078125, "learning_rate": 0.00017280167688610092, "loss": 0.4164, "step": 39890 }, { "epoch": 1.0126791130966735, "grad_norm": 0.3828125, "learning_rate": 0.0001727688349818388, "loss": 0.4146, "step": 39895 }, { "epoch": 1.0128060311456892, "grad_norm": 0.3515625, "learning_rate": 0.00017273599196031907, "loss": 0.4277, "step": 39900 }, { "epoch": 1.012932949194705, "grad_norm": 0.35546875, "learning_rate": 0.00017270314782315323, "loss": 0.4649, "step": 39905 }, { "epoch": 1.0130598672437208, "grad_norm": 0.345703125, "learning_rate": 0.000172670302571953, "loss": 0.4156, "step": 39910 }, { "epoch": 1.0131867852927365, "grad_norm": 0.3359375, "learning_rate": 0.00017263745620833002, "loss": 0.4108, "step": 39915 }, { "epoch": 1.0133137033417523, "grad_norm": 0.369140625, "learning_rate": 0.00017260460873389606, "loss": 0.4432, "step": 39920 }, { "epoch": 1.013440621390768, "grad_norm": 0.3671875, "learning_rate": 0.00017257176015026296, "loss": 0.4373, "step": 39925 }, { "epoch": 1.0135675394397838, "grad_norm": 0.34375, "learning_rate": 0.00017253891045904265, "loss": 0.4309, "step": 39930 }, { "epoch": 1.0136944574887994, "grad_norm": 0.375, "learning_rate": 0.00017250605966184692, "loss": 0.4154, "step": 39935 }, { "epoch": 1.0138213755378152, "grad_norm": 0.37109375, "learning_rate": 0.00017247320776028784, "loss": 0.4488, "step": 39940 }, { "epoch": 1.013948293586831, "grad_norm": 0.33984375, "learning_rate": 0.00017244035475597738, "loss": 0.3807, "step": 39945 }, { "epoch": 1.0140752116358467, "grad_norm": 0.337890625, "learning_rate": 0.0001724075006505277, "loss": 0.4145, "step": 39950 }, { "epoch": 1.0142021296848625, "grad_norm": 0.35546875, "learning_rate": 0.0001723746454455508, "loss": 0.4109, "step": 39955 }, { "epoch": 1.0143290477338782, "grad_norm": 0.34765625, "learning_rate": 0.00017234178914265905, "loss": 0.403, "step": 39960 }, { "epoch": 1.014455965782894, "grad_norm": 0.349609375, "learning_rate": 0.00017230893174346453, "loss": 0.4359, "step": 39965 }, { "epoch": 1.0145828838319098, "grad_norm": 0.3515625, "learning_rate": 0.00017227607324957968, "loss": 0.4037, "step": 39970 }, { "epoch": 1.0147098018809255, "grad_norm": 0.33984375, "learning_rate": 0.00017224321366261674, "loss": 0.3906, "step": 39975 }, { "epoch": 1.0148367199299413, "grad_norm": 0.341796875, "learning_rate": 0.00017221035298418822, "loss": 0.423, "step": 39980 }, { "epoch": 1.014963637978957, "grad_norm": 0.353515625, "learning_rate": 0.0001721774912159065, "loss": 0.4184, "step": 39985 }, { "epoch": 1.0150905560279728, "grad_norm": 0.3515625, "learning_rate": 0.00017214462835938413, "loss": 0.4358, "step": 39990 }, { "epoch": 1.0152174740769886, "grad_norm": 0.365234375, "learning_rate": 0.00017211176441623365, "loss": 0.4362, "step": 39995 }, { "epoch": 1.0153443921260041, "grad_norm": 0.3359375, "learning_rate": 0.0001720788993880677, "loss": 0.4133, "step": 40000 }, { "epoch": 1.01547131017502, "grad_norm": 0.333984375, "learning_rate": 0.00017204603327649896, "loss": 0.4142, "step": 40005 }, { "epoch": 1.0155982282240357, "grad_norm": 0.333984375, "learning_rate": 0.00017201316608314016, "loss": 0.4146, "step": 40010 }, { "epoch": 1.0157251462730514, "grad_norm": 0.353515625, "learning_rate": 0.00017198029780960403, "loss": 0.4031, "step": 40015 }, { "epoch": 1.0158520643220672, "grad_norm": 0.333984375, "learning_rate": 0.00017194742845750348, "loss": 0.3985, "step": 40020 }, { "epoch": 1.015978982371083, "grad_norm": 0.341796875, "learning_rate": 0.00017191455802845134, "loss": 0.4191, "step": 40025 }, { "epoch": 1.0161059004200987, "grad_norm": 0.357421875, "learning_rate": 0.00017188168652406059, "loss": 0.4331, "step": 40030 }, { "epoch": 1.0162328184691145, "grad_norm": 0.341796875, "learning_rate": 0.00017184881394594417, "loss": 0.4587, "step": 40035 }, { "epoch": 1.0163597365181303, "grad_norm": 0.3203125, "learning_rate": 0.00017181594029571515, "loss": 0.42, "step": 40040 }, { "epoch": 1.016486654567146, "grad_norm": 0.337890625, "learning_rate": 0.00017178306557498664, "loss": 0.4296, "step": 40045 }, { "epoch": 1.0166135726161618, "grad_norm": 0.357421875, "learning_rate": 0.00017175018978537177, "loss": 0.4307, "step": 40050 }, { "epoch": 1.0167404906651776, "grad_norm": 0.328125, "learning_rate": 0.0001717173129284838, "loss": 0.4002, "step": 40055 }, { "epoch": 1.0168674087141933, "grad_norm": 0.365234375, "learning_rate": 0.00017168443500593586, "loss": 0.4231, "step": 40060 }, { "epoch": 1.016994326763209, "grad_norm": 0.361328125, "learning_rate": 0.00017165155601934137, "loss": 0.4228, "step": 40065 }, { "epoch": 1.0171212448122247, "grad_norm": 0.3515625, "learning_rate": 0.00017161867597031367, "loss": 0.4305, "step": 40070 }, { "epoch": 1.0172481628612404, "grad_norm": 0.337890625, "learning_rate": 0.00017158579486046616, "loss": 0.4218, "step": 40075 }, { "epoch": 1.0173750809102562, "grad_norm": 0.34765625, "learning_rate": 0.00017155291269141225, "loss": 0.4058, "step": 40080 }, { "epoch": 1.017501998959272, "grad_norm": 0.353515625, "learning_rate": 0.00017152002946476552, "loss": 0.4183, "step": 40085 }, { "epoch": 1.0176289170082877, "grad_norm": 0.337890625, "learning_rate": 0.0001714871451821395, "loss": 0.4303, "step": 40090 }, { "epoch": 1.0177558350573035, "grad_norm": 0.349609375, "learning_rate": 0.00017145425984514784, "loss": 0.425, "step": 40095 }, { "epoch": 1.0178827531063193, "grad_norm": 0.32421875, "learning_rate": 0.00017142137345540419, "loss": 0.4046, "step": 40100 }, { "epoch": 1.018009671155335, "grad_norm": 0.36328125, "learning_rate": 0.00017138848601452232, "loss": 0.4485, "step": 40105 }, { "epoch": 1.0181365892043508, "grad_norm": 0.353515625, "learning_rate": 0.00017135559752411592, "loss": 0.4344, "step": 40110 }, { "epoch": 1.0182635072533666, "grad_norm": 0.318359375, "learning_rate": 0.00017132270798579888, "loss": 0.3968, "step": 40115 }, { "epoch": 1.0183904253023823, "grad_norm": 0.337890625, "learning_rate": 0.00017128981740118497, "loss": 0.4211, "step": 40120 }, { "epoch": 1.018517343351398, "grad_norm": 0.337890625, "learning_rate": 0.00017125692577188827, "loss": 0.4358, "step": 40125 }, { "epoch": 1.0186442614004139, "grad_norm": 0.337890625, "learning_rate": 0.00017122403309952267, "loss": 0.4173, "step": 40130 }, { "epoch": 1.0187711794494294, "grad_norm": 0.361328125, "learning_rate": 0.00017119113938570223, "loss": 0.4339, "step": 40135 }, { "epoch": 1.0188980974984452, "grad_norm": 0.322265625, "learning_rate": 0.00017115824463204105, "loss": 0.3971, "step": 40140 }, { "epoch": 1.019025015547461, "grad_norm": 0.3359375, "learning_rate": 0.00017112534884015321, "loss": 0.4343, "step": 40145 }, { "epoch": 1.0191519335964767, "grad_norm": 0.34765625, "learning_rate": 0.00017109245201165295, "loss": 0.4541, "step": 40150 }, { "epoch": 1.0192788516454925, "grad_norm": 0.337890625, "learning_rate": 0.0001710595541481544, "loss": 0.4346, "step": 40155 }, { "epoch": 1.0194057696945082, "grad_norm": 0.359375, "learning_rate": 0.00017102665525127198, "loss": 0.4293, "step": 40160 }, { "epoch": 1.019532687743524, "grad_norm": 0.322265625, "learning_rate": 0.00017099375532261991, "loss": 0.444, "step": 40165 }, { "epoch": 1.0196596057925398, "grad_norm": 0.37109375, "learning_rate": 0.00017096085436381266, "loss": 0.4188, "step": 40170 }, { "epoch": 1.0197865238415555, "grad_norm": 0.357421875, "learning_rate": 0.00017092795237646464, "loss": 0.4491, "step": 40175 }, { "epoch": 1.0199134418905713, "grad_norm": 0.353515625, "learning_rate": 0.00017089504936219034, "loss": 0.4366, "step": 40180 }, { "epoch": 1.020040359939587, "grad_norm": 0.3359375, "learning_rate": 0.00017086214532260424, "loss": 0.4205, "step": 40185 }, { "epoch": 1.0201672779886028, "grad_norm": 0.34375, "learning_rate": 0.00017082924025932106, "loss": 0.4128, "step": 40190 }, { "epoch": 1.0202941960376186, "grad_norm": 0.337890625, "learning_rate": 0.0001707963341739553, "loss": 0.4205, "step": 40195 }, { "epoch": 1.0204211140866342, "grad_norm": 0.333984375, "learning_rate": 0.00017076342706812172, "loss": 0.4257, "step": 40200 }, { "epoch": 1.02054803213565, "grad_norm": 0.34375, "learning_rate": 0.00017073051894343503, "loss": 0.4196, "step": 40205 }, { "epoch": 1.0206749501846657, "grad_norm": 0.357421875, "learning_rate": 0.00017069760980151008, "loss": 0.4129, "step": 40210 }, { "epoch": 1.0208018682336815, "grad_norm": 0.32421875, "learning_rate": 0.00017066469964396164, "loss": 0.3962, "step": 40215 }, { "epoch": 1.0209287862826972, "grad_norm": 0.34765625, "learning_rate": 0.00017063178847240455, "loss": 0.4318, "step": 40220 }, { "epoch": 1.021055704331713, "grad_norm": 0.34375, "learning_rate": 0.0001705988762884539, "loss": 0.3908, "step": 40225 }, { "epoch": 1.0211826223807288, "grad_norm": 0.310546875, "learning_rate": 0.00017056596309372458, "loss": 0.4207, "step": 40230 }, { "epoch": 1.0213095404297445, "grad_norm": 0.34765625, "learning_rate": 0.00017053304888983167, "loss": 0.3952, "step": 40235 }, { "epoch": 1.0214364584787603, "grad_norm": 0.33203125, "learning_rate": 0.00017050013367839016, "loss": 0.4373, "step": 40240 }, { "epoch": 1.021563376527776, "grad_norm": 0.353515625, "learning_rate": 0.0001704672174610153, "loss": 0.4089, "step": 40245 }, { "epoch": 1.0216902945767918, "grad_norm": 0.34765625, "learning_rate": 0.0001704343002393222, "loss": 0.4164, "step": 40250 }, { "epoch": 1.0218172126258076, "grad_norm": 0.373046875, "learning_rate": 0.00017040138201492616, "loss": 0.4304, "step": 40255 }, { "epoch": 1.0219441306748234, "grad_norm": 0.353515625, "learning_rate": 0.00017036846278944243, "loss": 0.3917, "step": 40260 }, { "epoch": 1.022071048723839, "grad_norm": 0.361328125, "learning_rate": 0.00017033554256448634, "loss": 0.4142, "step": 40265 }, { "epoch": 1.0221979667728547, "grad_norm": 0.349609375, "learning_rate": 0.0001703026213416733, "loss": 0.4101, "step": 40270 }, { "epoch": 1.0223248848218704, "grad_norm": 0.306640625, "learning_rate": 0.0001702696991226187, "loss": 0.3972, "step": 40275 }, { "epoch": 1.0224518028708862, "grad_norm": 0.328125, "learning_rate": 0.00017023677590893803, "loss": 0.4105, "step": 40280 }, { "epoch": 1.022578720919902, "grad_norm": 0.328125, "learning_rate": 0.00017020385170224677, "loss": 0.4339, "step": 40285 }, { "epoch": 1.0227056389689178, "grad_norm": 0.33203125, "learning_rate": 0.00017017092650416064, "loss": 0.4029, "step": 40290 }, { "epoch": 1.0228325570179335, "grad_norm": 0.35546875, "learning_rate": 0.00017013800031629518, "loss": 0.4358, "step": 40295 }, { "epoch": 1.0229594750669493, "grad_norm": 0.33984375, "learning_rate": 0.00017010507314026605, "loss": 0.4063, "step": 40300 }, { "epoch": 1.023086393115965, "grad_norm": 0.345703125, "learning_rate": 0.000170072144977689, "loss": 0.4317, "step": 40305 }, { "epoch": 1.0232133111649808, "grad_norm": 0.36328125, "learning_rate": 0.00017003921583017977, "loss": 0.4314, "step": 40310 }, { "epoch": 1.0233402292139966, "grad_norm": 0.33203125, "learning_rate": 0.0001700062856993542, "loss": 0.411, "step": 40315 }, { "epoch": 1.0234671472630124, "grad_norm": 0.34375, "learning_rate": 0.00016997335458682818, "loss": 0.4196, "step": 40320 }, { "epoch": 1.0235940653120281, "grad_norm": 0.34765625, "learning_rate": 0.00016994042249421752, "loss": 0.4249, "step": 40325 }, { "epoch": 1.0237209833610437, "grad_norm": 0.33984375, "learning_rate": 0.0001699074894231384, "loss": 0.3891, "step": 40330 }, { "epoch": 1.0238479014100594, "grad_norm": 0.455078125, "learning_rate": 0.00016987455537520663, "loss": 0.4299, "step": 40335 }, { "epoch": 1.0239748194590752, "grad_norm": 0.36328125, "learning_rate": 0.00016984162035203838, "loss": 0.4084, "step": 40340 }, { "epoch": 1.024101737508091, "grad_norm": 0.330078125, "learning_rate": 0.00016980868435524966, "loss": 0.4227, "step": 40345 }, { "epoch": 1.0242286555571067, "grad_norm": 0.349609375, "learning_rate": 0.00016977574738645675, "loss": 0.4139, "step": 40350 }, { "epoch": 1.0243555736061225, "grad_norm": 0.35546875, "learning_rate": 0.0001697428094472758, "loss": 0.4218, "step": 40355 }, { "epoch": 1.0244824916551383, "grad_norm": 0.359375, "learning_rate": 0.00016970987053932304, "loss": 0.4283, "step": 40360 }, { "epoch": 1.024609409704154, "grad_norm": 0.35546875, "learning_rate": 0.00016967693066421475, "loss": 0.4175, "step": 40365 }, { "epoch": 1.0247363277531698, "grad_norm": 0.34765625, "learning_rate": 0.00016964398982356735, "loss": 0.4186, "step": 40370 }, { "epoch": 1.0248632458021856, "grad_norm": 0.34375, "learning_rate": 0.00016961104801899713, "loss": 0.4398, "step": 40375 }, { "epoch": 1.0249901638512013, "grad_norm": 0.3515625, "learning_rate": 0.00016957810525212067, "loss": 0.4234, "step": 40380 }, { "epoch": 1.025117081900217, "grad_norm": 0.328125, "learning_rate": 0.00016954516152455437, "loss": 0.4341, "step": 40385 }, { "epoch": 1.0252439999492329, "grad_norm": 0.376953125, "learning_rate": 0.00016951221683791478, "loss": 0.4379, "step": 40390 }, { "epoch": 1.0253709179982486, "grad_norm": 0.34765625, "learning_rate": 0.00016947927119381845, "loss": 0.419, "step": 40395 }, { "epoch": 1.0254978360472642, "grad_norm": 0.35546875, "learning_rate": 0.00016944632459388206, "loss": 0.4405, "step": 40400 }, { "epoch": 1.02562475409628, "grad_norm": 0.33984375, "learning_rate": 0.00016941337703972232, "loss": 0.4142, "step": 40405 }, { "epoch": 1.0257516721452957, "grad_norm": 0.349609375, "learning_rate": 0.0001693804285329558, "loss": 0.402, "step": 40410 }, { "epoch": 1.0258785901943115, "grad_norm": 0.330078125, "learning_rate": 0.00016934747907519944, "loss": 0.4065, "step": 40415 }, { "epoch": 1.0260055082433273, "grad_norm": 0.3515625, "learning_rate": 0.00016931452866806996, "loss": 0.4171, "step": 40420 }, { "epoch": 1.026132426292343, "grad_norm": 0.337890625, "learning_rate": 0.00016928157731318429, "loss": 0.4325, "step": 40425 }, { "epoch": 1.0262593443413588, "grad_norm": 0.326171875, "learning_rate": 0.00016924862501215929, "loss": 0.4358, "step": 40430 }, { "epoch": 1.0263862623903746, "grad_norm": 0.341796875, "learning_rate": 0.00016921567176661194, "loss": 0.397, "step": 40435 }, { "epoch": 1.0265131804393903, "grad_norm": 0.361328125, "learning_rate": 0.00016918271757815917, "loss": 0.4362, "step": 40440 }, { "epoch": 1.026640098488406, "grad_norm": 0.330078125, "learning_rate": 0.0001691497624484181, "loss": 0.4249, "step": 40445 }, { "epoch": 1.0267670165374219, "grad_norm": 0.34765625, "learning_rate": 0.00016911680637900586, "loss": 0.4521, "step": 40450 }, { "epoch": 1.0268939345864376, "grad_norm": 0.36328125, "learning_rate": 0.00016908384937153954, "loss": 0.4238, "step": 40455 }, { "epoch": 1.0270208526354534, "grad_norm": 0.361328125, "learning_rate": 0.0001690508914276363, "loss": 0.4107, "step": 40460 }, { "epoch": 1.027147770684469, "grad_norm": 0.333984375, "learning_rate": 0.00016901793254891345, "loss": 0.4008, "step": 40465 }, { "epoch": 1.0272746887334847, "grad_norm": 0.34375, "learning_rate": 0.00016898497273698821, "loss": 0.3924, "step": 40470 }, { "epoch": 1.0274016067825005, "grad_norm": 0.34765625, "learning_rate": 0.00016895201199347792, "loss": 0.4324, "step": 40475 }, { "epoch": 1.0275285248315162, "grad_norm": 0.341796875, "learning_rate": 0.00016891905031999992, "loss": 0.4166, "step": 40480 }, { "epoch": 1.027655442880532, "grad_norm": 0.34765625, "learning_rate": 0.00016888608771817166, "loss": 0.4021, "step": 40485 }, { "epoch": 1.0277823609295478, "grad_norm": 0.33984375, "learning_rate": 0.00016885312418961067, "loss": 0.4265, "step": 40490 }, { "epoch": 1.0279092789785635, "grad_norm": 0.3515625, "learning_rate": 0.00016882015973593433, "loss": 0.4293, "step": 40495 }, { "epoch": 1.0280361970275793, "grad_norm": 0.333984375, "learning_rate": 0.00016878719435876026, "loss": 0.4364, "step": 40500 }, { "epoch": 1.028163115076595, "grad_norm": 0.37890625, "learning_rate": 0.00016875422805970598, "loss": 0.435, "step": 40505 }, { "epoch": 1.0282900331256108, "grad_norm": 0.357421875, "learning_rate": 0.0001687212608403893, "loss": 0.4208, "step": 40510 }, { "epoch": 1.0284169511746266, "grad_norm": 0.330078125, "learning_rate": 0.00016868829270242776, "loss": 0.4064, "step": 40515 }, { "epoch": 1.0285438692236424, "grad_norm": 0.345703125, "learning_rate": 0.00016865532364743917, "loss": 0.4172, "step": 40520 }, { "epoch": 1.0286707872726581, "grad_norm": 0.26953125, "learning_rate": 0.00016862235367704127, "loss": 0.3989, "step": 40525 }, { "epoch": 1.0287977053216737, "grad_norm": 0.37109375, "learning_rate": 0.00016858938279285188, "loss": 0.4236, "step": 40530 }, { "epoch": 1.0289246233706895, "grad_norm": 0.353515625, "learning_rate": 0.00016855641099648887, "loss": 0.4433, "step": 40535 }, { "epoch": 1.0290515414197052, "grad_norm": 0.37890625, "learning_rate": 0.00016852343828957018, "loss": 0.4643, "step": 40540 }, { "epoch": 1.029178459468721, "grad_norm": 0.36328125, "learning_rate": 0.00016849046467371377, "loss": 0.4193, "step": 40545 }, { "epoch": 1.0293053775177368, "grad_norm": 0.333984375, "learning_rate": 0.0001684574901505376, "loss": 0.4353, "step": 40550 }, { "epoch": 1.0294322955667525, "grad_norm": 0.3671875, "learning_rate": 0.00016842451472165974, "loss": 0.4501, "step": 40555 }, { "epoch": 1.0295592136157683, "grad_norm": 0.326171875, "learning_rate": 0.0001683915383886983, "loss": 0.4204, "step": 40560 }, { "epoch": 1.029686131664784, "grad_norm": 0.357421875, "learning_rate": 0.00016835856115327135, "loss": 0.4219, "step": 40565 }, { "epoch": 1.0298130497137998, "grad_norm": 0.3359375, "learning_rate": 0.00016832558301699713, "loss": 0.402, "step": 40570 }, { "epoch": 1.0299399677628156, "grad_norm": 0.337890625, "learning_rate": 0.0001682926039814939, "loss": 0.4307, "step": 40575 }, { "epoch": 1.0300668858118314, "grad_norm": 0.357421875, "learning_rate": 0.00016825962404837987, "loss": 0.4127, "step": 40580 }, { "epoch": 1.0301938038608471, "grad_norm": 0.376953125, "learning_rate": 0.00016822664321927333, "loss": 0.4253, "step": 40585 }, { "epoch": 1.030320721909863, "grad_norm": 0.353515625, "learning_rate": 0.0001681936614957927, "loss": 0.4238, "step": 40590 }, { "epoch": 1.0304476399588784, "grad_norm": 0.373046875, "learning_rate": 0.00016816067887955632, "loss": 0.4092, "step": 40595 }, { "epoch": 1.0305745580078942, "grad_norm": 0.3671875, "learning_rate": 0.00016812769537218267, "loss": 0.4217, "step": 40600 }, { "epoch": 1.03070147605691, "grad_norm": 0.345703125, "learning_rate": 0.00016809471097529023, "loss": 0.4269, "step": 40605 }, { "epoch": 1.0308283941059257, "grad_norm": 0.337890625, "learning_rate": 0.0001680617256904975, "loss": 0.4231, "step": 40610 }, { "epoch": 1.0309553121549415, "grad_norm": 0.376953125, "learning_rate": 0.00016802873951942315, "loss": 0.4396, "step": 40615 }, { "epoch": 1.0310822302039573, "grad_norm": 0.3515625, "learning_rate": 0.0001679957524636857, "loss": 0.3964, "step": 40620 }, { "epoch": 1.031209148252973, "grad_norm": 0.326171875, "learning_rate": 0.00016796276452490385, "loss": 0.3951, "step": 40625 }, { "epoch": 1.0313360663019888, "grad_norm": 0.3671875, "learning_rate": 0.0001679297757046963, "loss": 0.4234, "step": 40630 }, { "epoch": 1.0314629843510046, "grad_norm": 0.4375, "learning_rate": 0.00016789678600468176, "loss": 0.4209, "step": 40635 }, { "epoch": 1.0315899024000204, "grad_norm": 0.357421875, "learning_rate": 0.0001678637954264791, "loss": 0.4447, "step": 40640 }, { "epoch": 1.0317168204490361, "grad_norm": 0.33203125, "learning_rate": 0.0001678308039717071, "loss": 0.4271, "step": 40645 }, { "epoch": 1.0318437384980519, "grad_norm": 0.322265625, "learning_rate": 0.00016779781164198465, "loss": 0.4112, "step": 40650 }, { "epoch": 1.0319706565470677, "grad_norm": 0.30859375, "learning_rate": 0.00016776481843893068, "loss": 0.4064, "step": 40655 }, { "epoch": 1.0320975745960834, "grad_norm": 0.375, "learning_rate": 0.00016773182436416413, "loss": 0.4353, "step": 40660 }, { "epoch": 1.032224492645099, "grad_norm": 0.353515625, "learning_rate": 0.00016769882941930405, "loss": 0.4336, "step": 40665 }, { "epoch": 1.0323514106941147, "grad_norm": 0.333984375, "learning_rate": 0.00016766583360596946, "loss": 0.3592, "step": 40670 }, { "epoch": 1.0324783287431305, "grad_norm": 0.349609375, "learning_rate": 0.00016763283692577944, "loss": 0.4398, "step": 40675 }, { "epoch": 1.0326052467921463, "grad_norm": 0.37109375, "learning_rate": 0.00016759983938035317, "loss": 0.4441, "step": 40680 }, { "epoch": 1.032732164841162, "grad_norm": 0.361328125, "learning_rate": 0.00016756684097130972, "loss": 0.3996, "step": 40685 }, { "epoch": 1.0328590828901778, "grad_norm": 0.34765625, "learning_rate": 0.0001675338417002684, "loss": 0.4133, "step": 40690 }, { "epoch": 1.0329860009391936, "grad_norm": 0.671875, "learning_rate": 0.00016750084156884847, "loss": 0.4024, "step": 40695 }, { "epoch": 1.0331129189882093, "grad_norm": 0.341796875, "learning_rate": 0.00016746784057866925, "loss": 0.4151, "step": 40700 }, { "epoch": 1.033239837037225, "grad_norm": 0.345703125, "learning_rate": 0.00016743483873135, "loss": 0.4314, "step": 40705 }, { "epoch": 1.0333667550862409, "grad_norm": 0.353515625, "learning_rate": 0.0001674018360285102, "loss": 0.4428, "step": 40710 }, { "epoch": 1.0334936731352566, "grad_norm": 0.328125, "learning_rate": 0.0001673688324717692, "loss": 0.4379, "step": 40715 }, { "epoch": 1.0336205911842724, "grad_norm": 0.31640625, "learning_rate": 0.00016733582806274656, "loss": 0.4118, "step": 40720 }, { "epoch": 1.033747509233288, "grad_norm": 0.341796875, "learning_rate": 0.00016730282280306166, "loss": 0.4082, "step": 40725 }, { "epoch": 1.0338744272823037, "grad_norm": 0.37890625, "learning_rate": 0.00016726981669433418, "loss": 0.4389, "step": 40730 }, { "epoch": 1.0340013453313195, "grad_norm": 0.34765625, "learning_rate": 0.0001672368097381837, "loss": 0.4442, "step": 40735 }, { "epoch": 1.0341282633803353, "grad_norm": 0.375, "learning_rate": 0.0001672038019362298, "loss": 0.4805, "step": 40740 }, { "epoch": 1.034255181429351, "grad_norm": 0.359375, "learning_rate": 0.0001671707932900922, "loss": 0.4387, "step": 40745 }, { "epoch": 1.0343820994783668, "grad_norm": 0.365234375, "learning_rate": 0.00016713778380139064, "loss": 0.4426, "step": 40750 }, { "epoch": 1.0345090175273826, "grad_norm": 0.36328125, "learning_rate": 0.0001671047734717448, "loss": 0.4326, "step": 40755 }, { "epoch": 1.0346359355763983, "grad_norm": 0.32421875, "learning_rate": 0.0001670717623027746, "loss": 0.3847, "step": 40760 }, { "epoch": 1.034762853625414, "grad_norm": 0.345703125, "learning_rate": 0.00016703875029609977, "loss": 0.4124, "step": 40765 }, { "epoch": 1.0348897716744299, "grad_norm": 0.34375, "learning_rate": 0.00016700573745334027, "loss": 0.4297, "step": 40770 }, { "epoch": 1.0350166897234456, "grad_norm": 0.34765625, "learning_rate": 0.000166972723776116, "loss": 0.3917, "step": 40775 }, { "epoch": 1.0351436077724614, "grad_norm": 0.33203125, "learning_rate": 0.00016693970926604697, "loss": 0.4139, "step": 40780 }, { "epoch": 1.0352705258214772, "grad_norm": 0.365234375, "learning_rate": 0.0001669066939247531, "loss": 0.4011, "step": 40785 }, { "epoch": 1.035397443870493, "grad_norm": 0.31640625, "learning_rate": 0.00016687367775385456, "loss": 0.3927, "step": 40790 }, { "epoch": 1.0355243619195085, "grad_norm": 0.318359375, "learning_rate": 0.00016684066075497132, "loss": 0.4392, "step": 40795 }, { "epoch": 1.0356512799685242, "grad_norm": 0.349609375, "learning_rate": 0.0001668076429297236, "loss": 0.4493, "step": 40800 }, { "epoch": 1.03577819801754, "grad_norm": 0.3515625, "learning_rate": 0.00016677462427973155, "loss": 0.4352, "step": 40805 }, { "epoch": 1.0359051160665558, "grad_norm": 0.33984375, "learning_rate": 0.0001667416048066153, "loss": 0.3948, "step": 40810 }, { "epoch": 1.0360320341155715, "grad_norm": 0.3828125, "learning_rate": 0.0001667085845119952, "loss": 0.4196, "step": 40815 }, { "epoch": 1.0361589521645873, "grad_norm": 0.369140625, "learning_rate": 0.0001666755633974915, "loss": 0.3869, "step": 40820 }, { "epoch": 1.036285870213603, "grad_norm": 0.376953125, "learning_rate": 0.0001666425414647246, "loss": 0.4392, "step": 40825 }, { "epoch": 1.0364127882626188, "grad_norm": 0.34765625, "learning_rate": 0.00016660951871531478, "loss": 0.4057, "step": 40830 }, { "epoch": 1.0365397063116346, "grad_norm": 0.341796875, "learning_rate": 0.00016657649515088252, "loss": 0.4152, "step": 40835 }, { "epoch": 1.0366666243606504, "grad_norm": 0.37109375, "learning_rate": 0.00016654347077304827, "loss": 0.4046, "step": 40840 }, { "epoch": 1.0367935424096661, "grad_norm": 0.337890625, "learning_rate": 0.0001665104455834324, "loss": 0.4062, "step": 40845 }, { "epoch": 1.036920460458682, "grad_norm": 0.34375, "learning_rate": 0.00016647741958365563, "loss": 0.4109, "step": 40850 }, { "epoch": 1.0370473785076977, "grad_norm": 0.3671875, "learning_rate": 0.0001664443927753384, "loss": 0.4152, "step": 40855 }, { "epoch": 1.0371742965567132, "grad_norm": 0.36328125, "learning_rate": 0.00016641136516010136, "loss": 0.4266, "step": 40860 }, { "epoch": 1.037301214605729, "grad_norm": 0.34765625, "learning_rate": 0.0001663783367395652, "loss": 0.3921, "step": 40865 }, { "epoch": 1.0374281326547448, "grad_norm": 0.349609375, "learning_rate": 0.0001663453075153506, "loss": 0.4151, "step": 40870 }, { "epoch": 1.0375550507037605, "grad_norm": 0.3359375, "learning_rate": 0.00016631227748907824, "loss": 0.4191, "step": 40875 }, { "epoch": 1.0376819687527763, "grad_norm": 0.310546875, "learning_rate": 0.00016627924666236891, "loss": 0.4073, "step": 40880 }, { "epoch": 1.037808886801792, "grad_norm": 0.357421875, "learning_rate": 0.00016624621503684344, "loss": 0.4363, "step": 40885 }, { "epoch": 1.0379358048508078, "grad_norm": 0.34765625, "learning_rate": 0.0001662131826141227, "loss": 0.4136, "step": 40890 }, { "epoch": 1.0380627228998236, "grad_norm": 0.359375, "learning_rate": 0.00016618014939582748, "loss": 0.4322, "step": 40895 }, { "epoch": 1.0381896409488394, "grad_norm": 0.36328125, "learning_rate": 0.00016614711538357884, "loss": 0.4008, "step": 40900 }, { "epoch": 1.0383165589978551, "grad_norm": 0.3515625, "learning_rate": 0.0001661140805789977, "loss": 0.4255, "step": 40905 }, { "epoch": 1.038443477046871, "grad_norm": 0.361328125, "learning_rate": 0.000166081044983705, "loss": 0.4117, "step": 40910 }, { "epoch": 1.0385703950958867, "grad_norm": 0.32421875, "learning_rate": 0.0001660480085993218, "loss": 0.4038, "step": 40915 }, { "epoch": 1.0386973131449024, "grad_norm": 0.345703125, "learning_rate": 0.00016601497142746925, "loss": 0.4668, "step": 40920 }, { "epoch": 1.038824231193918, "grad_norm": 0.35546875, "learning_rate": 0.0001659819334697684, "loss": 0.4073, "step": 40925 }, { "epoch": 1.0389511492429337, "grad_norm": 0.349609375, "learning_rate": 0.00016594889472784047, "loss": 0.411, "step": 40930 }, { "epoch": 1.0390780672919495, "grad_norm": 0.349609375, "learning_rate": 0.0001659158552033066, "loss": 0.4491, "step": 40935 }, { "epoch": 1.0392049853409653, "grad_norm": 0.357421875, "learning_rate": 0.0001658828148977881, "loss": 0.4086, "step": 40940 }, { "epoch": 1.039331903389981, "grad_norm": 0.3515625, "learning_rate": 0.00016584977381290614, "loss": 0.4103, "step": 40945 }, { "epoch": 1.0394588214389968, "grad_norm": 0.35546875, "learning_rate": 0.00016581673195028212, "loss": 0.4148, "step": 40950 }, { "epoch": 1.0395857394880126, "grad_norm": 0.34765625, "learning_rate": 0.00016578368931153742, "loss": 0.4295, "step": 40955 }, { "epoch": 1.0397126575370284, "grad_norm": 0.365234375, "learning_rate": 0.00016575064589829334, "loss": 0.4266, "step": 40960 }, { "epoch": 1.0398395755860441, "grad_norm": 0.33984375, "learning_rate": 0.0001657176017121713, "loss": 0.4091, "step": 40965 }, { "epoch": 1.0399664936350599, "grad_norm": 0.345703125, "learning_rate": 0.00016568455675479283, "loss": 0.4074, "step": 40970 }, { "epoch": 1.0400934116840757, "grad_norm": 0.3515625, "learning_rate": 0.0001656515110277794, "loss": 0.4258, "step": 40975 }, { "epoch": 1.0402203297330914, "grad_norm": 0.353515625, "learning_rate": 0.00016561846453275254, "loss": 0.4466, "step": 40980 }, { "epoch": 1.0403472477821072, "grad_norm": 0.341796875, "learning_rate": 0.00016558541727133388, "loss": 0.4109, "step": 40985 }, { "epoch": 1.0404741658311227, "grad_norm": 0.3515625, "learning_rate": 0.000165552369245145, "loss": 0.4094, "step": 40990 }, { "epoch": 1.0406010838801385, "grad_norm": 0.3671875, "learning_rate": 0.0001655193204558076, "loss": 0.4184, "step": 40995 }, { "epoch": 1.0407280019291543, "grad_norm": 0.337890625, "learning_rate": 0.00016548627090494326, "loss": 0.4226, "step": 41000 }, { "epoch": 1.04085491997817, "grad_norm": 0.359375, "learning_rate": 0.0001654532205941738, "loss": 0.4387, "step": 41005 }, { "epoch": 1.0409818380271858, "grad_norm": 0.359375, "learning_rate": 0.00016542016952512096, "loss": 0.4408, "step": 41010 }, { "epoch": 1.0411087560762016, "grad_norm": 0.365234375, "learning_rate": 0.0001653871176994065, "loss": 0.4253, "step": 41015 }, { "epoch": 1.0412356741252173, "grad_norm": 0.357421875, "learning_rate": 0.00016535406511865235, "loss": 0.418, "step": 41020 }, { "epoch": 1.041362592174233, "grad_norm": 0.337890625, "learning_rate": 0.00016532101178448036, "loss": 0.4312, "step": 41025 }, { "epoch": 1.0414895102232489, "grad_norm": 0.34765625, "learning_rate": 0.00016528795769851242, "loss": 0.3968, "step": 41030 }, { "epoch": 1.0416164282722646, "grad_norm": 0.353515625, "learning_rate": 0.00016525490286237047, "loss": 0.4364, "step": 41035 }, { "epoch": 1.0417433463212804, "grad_norm": 0.365234375, "learning_rate": 0.00016522184727767647, "loss": 0.4376, "step": 41040 }, { "epoch": 1.0418702643702962, "grad_norm": 0.345703125, "learning_rate": 0.00016518879094605255, "loss": 0.4455, "step": 41045 }, { "epoch": 1.041997182419312, "grad_norm": 0.3359375, "learning_rate": 0.0001651557338691207, "loss": 0.4114, "step": 41050 }, { "epoch": 1.0421241004683277, "grad_norm": 0.337890625, "learning_rate": 0.00016512267604850298, "loss": 0.4414, "step": 41055 }, { "epoch": 1.0422510185173433, "grad_norm": 0.359375, "learning_rate": 0.00016508961748582158, "loss": 0.4039, "step": 41060 }, { "epoch": 1.042377936566359, "grad_norm": 0.341796875, "learning_rate": 0.0001650565581826987, "loss": 0.4472, "step": 41065 }, { "epoch": 1.0425048546153748, "grad_norm": 0.36328125, "learning_rate": 0.00016502349814075646, "loss": 0.4147, "step": 41070 }, { "epoch": 1.0426317726643906, "grad_norm": 0.365234375, "learning_rate": 0.0001649904373616172, "loss": 0.3909, "step": 41075 }, { "epoch": 1.0427586907134063, "grad_norm": 0.345703125, "learning_rate": 0.00016495737584690316, "loss": 0.4152, "step": 41080 }, { "epoch": 1.042885608762422, "grad_norm": 0.353515625, "learning_rate": 0.00016492431359823662, "loss": 0.4244, "step": 41085 }, { "epoch": 1.0430125268114379, "grad_norm": 0.34375, "learning_rate": 0.00016489125061723992, "loss": 0.4089, "step": 41090 }, { "epoch": 1.0431394448604536, "grad_norm": 0.365234375, "learning_rate": 0.00016485818690553552, "loss": 0.4365, "step": 41095 }, { "epoch": 1.0432663629094694, "grad_norm": 0.3359375, "learning_rate": 0.0001648251224647458, "loss": 0.4349, "step": 41100 }, { "epoch": 1.0433932809584852, "grad_norm": 0.34765625, "learning_rate": 0.0001647920572964932, "loss": 0.4301, "step": 41105 }, { "epoch": 1.043520199007501, "grad_norm": 0.302734375, "learning_rate": 0.0001647589914024003, "loss": 0.4112, "step": 41110 }, { "epoch": 1.0436471170565167, "grad_norm": 0.33203125, "learning_rate": 0.00016472592478408954, "loss": 0.4243, "step": 41115 }, { "epoch": 1.0437740351055325, "grad_norm": 0.34375, "learning_rate": 0.00016469285744318354, "loss": 0.4522, "step": 41120 }, { "epoch": 1.043900953154548, "grad_norm": 2.125, "learning_rate": 0.00016465978938130484, "loss": 0.4229, "step": 41125 }, { "epoch": 1.0440278712035638, "grad_norm": 0.33203125, "learning_rate": 0.00016462672060007613, "loss": 0.3975, "step": 41130 }, { "epoch": 1.0441547892525795, "grad_norm": 0.337890625, "learning_rate": 0.00016459365110112008, "loss": 0.4071, "step": 41135 }, { "epoch": 1.0442817073015953, "grad_norm": 0.361328125, "learning_rate": 0.00016456058088605934, "loss": 0.4402, "step": 41140 }, { "epoch": 1.044408625350611, "grad_norm": 0.328125, "learning_rate": 0.00016452750995651674, "loss": 0.4572, "step": 41145 }, { "epoch": 1.0445355433996268, "grad_norm": 0.373046875, "learning_rate": 0.000164494438314115, "loss": 0.4282, "step": 41150 }, { "epoch": 1.0446624614486426, "grad_norm": 0.328125, "learning_rate": 0.00016446136596047695, "loss": 0.3867, "step": 41155 }, { "epoch": 1.0447893794976584, "grad_norm": 0.380859375, "learning_rate": 0.00016442829289722545, "loss": 0.4203, "step": 41160 }, { "epoch": 1.0449162975466741, "grad_norm": 0.349609375, "learning_rate": 0.00016439521912598332, "loss": 0.426, "step": 41165 }, { "epoch": 1.04504321559569, "grad_norm": 0.33203125, "learning_rate": 0.00016436214464837356, "loss": 0.4169, "step": 41170 }, { "epoch": 1.0451701336447057, "grad_norm": 0.31640625, "learning_rate": 0.00016432906946601904, "loss": 0.3942, "step": 41175 }, { "epoch": 1.0452970516937214, "grad_norm": 0.3671875, "learning_rate": 0.00016429599358054283, "loss": 0.4479, "step": 41180 }, { "epoch": 1.0454239697427372, "grad_norm": 0.390625, "learning_rate": 0.00016426291699356792, "loss": 0.4156, "step": 41185 }, { "epoch": 1.0455508877917528, "grad_norm": 0.275390625, "learning_rate": 0.0001642298397067173, "loss": 0.3742, "step": 41190 }, { "epoch": 1.0456778058407685, "grad_norm": 0.353515625, "learning_rate": 0.00016419676172161414, "loss": 0.4279, "step": 41195 }, { "epoch": 1.0458047238897843, "grad_norm": 0.353515625, "learning_rate": 0.00016416368303988157, "loss": 0.4199, "step": 41200 }, { "epoch": 1.0459316419388, "grad_norm": 0.34375, "learning_rate": 0.00016413060366314266, "loss": 0.4267, "step": 41205 }, { "epoch": 1.0460585599878158, "grad_norm": 0.361328125, "learning_rate": 0.00016409752359302073, "loss": 0.3848, "step": 41210 }, { "epoch": 1.0461854780368316, "grad_norm": 0.373046875, "learning_rate": 0.00016406444283113884, "loss": 0.459, "step": 41215 }, { "epoch": 1.0463123960858474, "grad_norm": 0.34765625, "learning_rate": 0.0001640313613791204, "loss": 0.4099, "step": 41220 }, { "epoch": 1.0464393141348631, "grad_norm": 0.33203125, "learning_rate": 0.00016399827923858866, "loss": 0.409, "step": 41225 }, { "epoch": 1.046566232183879, "grad_norm": 0.3359375, "learning_rate": 0.00016396519641116686, "loss": 0.4154, "step": 41230 }, { "epoch": 1.0466931502328947, "grad_norm": 0.359375, "learning_rate": 0.00016393211289847853, "loss": 0.4182, "step": 41235 }, { "epoch": 1.0468200682819104, "grad_norm": 0.345703125, "learning_rate": 0.00016389902870214695, "loss": 0.423, "step": 41240 }, { "epoch": 1.0469469863309262, "grad_norm": 0.3359375, "learning_rate": 0.00016386594382379553, "loss": 0.4357, "step": 41245 }, { "epoch": 1.047073904379942, "grad_norm": 0.32421875, "learning_rate": 0.0001638328582650478, "loss": 0.4165, "step": 41250 }, { "epoch": 1.0472008224289575, "grad_norm": 0.345703125, "learning_rate": 0.00016379977202752717, "loss": 0.4165, "step": 41255 }, { "epoch": 1.0473277404779733, "grad_norm": 0.361328125, "learning_rate": 0.00016376668511285726, "loss": 0.4333, "step": 41260 }, { "epoch": 1.047454658526989, "grad_norm": 0.361328125, "learning_rate": 0.0001637335975226616, "loss": 0.4217, "step": 41265 }, { "epoch": 1.0475815765760048, "grad_norm": 0.345703125, "learning_rate": 0.0001637005092585638, "loss": 0.4353, "step": 41270 }, { "epoch": 1.0477084946250206, "grad_norm": 0.369140625, "learning_rate": 0.00016366742032218745, "loss": 0.4192, "step": 41275 }, { "epoch": 1.0478354126740363, "grad_norm": 0.345703125, "learning_rate": 0.00016363433071515623, "loss": 0.3989, "step": 41280 }, { "epoch": 1.0479623307230521, "grad_norm": 0.36328125, "learning_rate": 0.00016360124043909383, "loss": 0.4119, "step": 41285 }, { "epoch": 1.0480892487720679, "grad_norm": 0.349609375, "learning_rate": 0.00016356814949562395, "loss": 0.4203, "step": 41290 }, { "epoch": 1.0482161668210837, "grad_norm": 0.34375, "learning_rate": 0.00016353505788637042, "loss": 0.4451, "step": 41295 }, { "epoch": 1.0483430848700994, "grad_norm": 0.365234375, "learning_rate": 0.00016350196561295698, "loss": 0.4223, "step": 41300 }, { "epoch": 1.0484700029191152, "grad_norm": 0.357421875, "learning_rate": 0.00016346887267700745, "loss": 0.4202, "step": 41305 }, { "epoch": 1.048596920968131, "grad_norm": 0.373046875, "learning_rate": 0.00016343577908014574, "loss": 0.4254, "step": 41310 }, { "epoch": 1.0487238390171467, "grad_norm": 0.36328125, "learning_rate": 0.00016340268482399565, "loss": 0.3904, "step": 41315 }, { "epoch": 1.0488507570661625, "grad_norm": 0.3671875, "learning_rate": 0.00016336958991018118, "loss": 0.4123, "step": 41320 }, { "epoch": 1.048977675115178, "grad_norm": 0.341796875, "learning_rate": 0.0001633364943403263, "loss": 0.4161, "step": 41325 }, { "epoch": 1.0491045931641938, "grad_norm": 0.359375, "learning_rate": 0.00016330339811605488, "loss": 0.4059, "step": 41330 }, { "epoch": 1.0492315112132096, "grad_norm": 0.328125, "learning_rate": 0.00016327030123899108, "loss": 0.4143, "step": 41335 }, { "epoch": 1.0493584292622253, "grad_norm": 0.333984375, "learning_rate": 0.00016323720371075884, "loss": 0.3775, "step": 41340 }, { "epoch": 1.049485347311241, "grad_norm": 0.36328125, "learning_rate": 0.0001632041055329823, "loss": 0.4369, "step": 41345 }, { "epoch": 1.0496122653602569, "grad_norm": 0.353515625, "learning_rate": 0.0001631710067072856, "loss": 0.4284, "step": 41350 }, { "epoch": 1.0497391834092726, "grad_norm": 0.3359375, "learning_rate": 0.00016313790723529277, "loss": 0.4393, "step": 41355 }, { "epoch": 1.0498661014582884, "grad_norm": 0.3203125, "learning_rate": 0.00016310480711862813, "loss": 0.4133, "step": 41360 }, { "epoch": 1.0499930195073042, "grad_norm": 0.3671875, "learning_rate": 0.00016307170635891581, "loss": 0.4265, "step": 41365 }, { "epoch": 1.05011993755632, "grad_norm": 0.322265625, "learning_rate": 0.00016303860495778008, "loss": 0.4085, "step": 41370 }, { "epoch": 1.0502468556053357, "grad_norm": 0.32421875, "learning_rate": 0.00016300550291684513, "loss": 0.404, "step": 41375 }, { "epoch": 1.0503737736543515, "grad_norm": 0.33984375, "learning_rate": 0.0001629724002377354, "loss": 0.4199, "step": 41380 }, { "epoch": 1.0505006917033672, "grad_norm": 0.373046875, "learning_rate": 0.0001629392969220751, "loss": 0.4706, "step": 41385 }, { "epoch": 1.0506276097523828, "grad_norm": 0.33203125, "learning_rate": 0.00016290619297148868, "loss": 0.4074, "step": 41390 }, { "epoch": 1.0507545278013986, "grad_norm": 0.34765625, "learning_rate": 0.00016287308838760055, "loss": 0.4226, "step": 41395 }, { "epoch": 1.0508814458504143, "grad_norm": 0.341796875, "learning_rate": 0.0001628399831720351, "loss": 0.4117, "step": 41400 }, { "epoch": 1.05100836389943, "grad_norm": 0.34375, "learning_rate": 0.00016280687732641677, "loss": 0.4332, "step": 41405 }, { "epoch": 1.0511352819484459, "grad_norm": 0.3828125, "learning_rate": 0.00016277377085237006, "loss": 0.4511, "step": 41410 }, { "epoch": 1.0512621999974616, "grad_norm": 0.341796875, "learning_rate": 0.0001627406637515195, "loss": 0.3962, "step": 41415 }, { "epoch": 1.0513891180464774, "grad_norm": 0.353515625, "learning_rate": 0.00016270755602548962, "loss": 0.4489, "step": 41420 }, { "epoch": 1.0515160360954932, "grad_norm": 0.365234375, "learning_rate": 0.00016267444767590505, "loss": 0.4267, "step": 41425 }, { "epoch": 1.051642954144509, "grad_norm": 0.337890625, "learning_rate": 0.0001626413387043904, "loss": 0.3931, "step": 41430 }, { "epoch": 1.0517698721935247, "grad_norm": 0.33203125, "learning_rate": 0.0001626082291125703, "loss": 0.4042, "step": 41435 }, { "epoch": 1.0518967902425405, "grad_norm": 0.341796875, "learning_rate": 0.00016257511890206943, "loss": 0.4272, "step": 41440 }, { "epoch": 1.0520237082915562, "grad_norm": 0.36328125, "learning_rate": 0.00016254200807451247, "loss": 0.3943, "step": 41445 }, { "epoch": 1.052150626340572, "grad_norm": 0.326171875, "learning_rate": 0.00016250889663152416, "loss": 0.4025, "step": 41450 }, { "epoch": 1.0522775443895875, "grad_norm": 0.36328125, "learning_rate": 0.00016247578457472927, "loss": 0.4608, "step": 41455 }, { "epoch": 1.0524044624386033, "grad_norm": 0.349609375, "learning_rate": 0.00016244267190575255, "loss": 0.4148, "step": 41460 }, { "epoch": 1.052531380487619, "grad_norm": 0.357421875, "learning_rate": 0.00016240955862621896, "loss": 0.4292, "step": 41465 }, { "epoch": 1.0526582985366348, "grad_norm": 0.353515625, "learning_rate": 0.00016237644473775326, "loss": 0.404, "step": 41470 }, { "epoch": 1.0527852165856506, "grad_norm": 0.34375, "learning_rate": 0.00016234333024198034, "loss": 0.3903, "step": 41475 }, { "epoch": 1.0529121346346664, "grad_norm": 0.33984375, "learning_rate": 0.0001623102151405251, "loss": 0.4293, "step": 41480 }, { "epoch": 1.0530390526836821, "grad_norm": 0.37890625, "learning_rate": 0.0001622770994350125, "loss": 0.4448, "step": 41485 }, { "epoch": 1.053165970732698, "grad_norm": 0.35546875, "learning_rate": 0.0001622439831270676, "loss": 0.4296, "step": 41490 }, { "epoch": 1.0532928887817137, "grad_norm": 0.388671875, "learning_rate": 0.00016221086621831524, "loss": 0.4409, "step": 41495 }, { "epoch": 1.0534198068307294, "grad_norm": 0.341796875, "learning_rate": 0.00016217774871038056, "loss": 0.4255, "step": 41500 }, { "epoch": 1.0535467248797452, "grad_norm": 0.349609375, "learning_rate": 0.00016214463060488862, "loss": 0.436, "step": 41505 }, { "epoch": 1.053673642928761, "grad_norm": 0.3359375, "learning_rate": 0.00016211151190346444, "loss": 0.3976, "step": 41510 }, { "epoch": 1.0538005609777767, "grad_norm": 0.357421875, "learning_rate": 0.00016207839260773323, "loss": 0.4196, "step": 41515 }, { "epoch": 1.0539274790267923, "grad_norm": 0.349609375, "learning_rate": 0.00016204527271932012, "loss": 0.4165, "step": 41520 }, { "epoch": 1.054054397075808, "grad_norm": 0.359375, "learning_rate": 0.0001620121522398503, "loss": 0.4008, "step": 41525 }, { "epoch": 1.0541813151248238, "grad_norm": 0.333984375, "learning_rate": 0.0001619790311709489, "loss": 0.3949, "step": 41530 }, { "epoch": 1.0543082331738396, "grad_norm": 0.357421875, "learning_rate": 0.00016194590951424122, "loss": 0.4268, "step": 41535 }, { "epoch": 1.0544351512228554, "grad_norm": 0.349609375, "learning_rate": 0.00016191278727135255, "loss": 0.3895, "step": 41540 }, { "epoch": 1.0545620692718711, "grad_norm": 0.349609375, "learning_rate": 0.00016187966444390812, "loss": 0.3879, "step": 41545 }, { "epoch": 1.054688987320887, "grad_norm": 0.333984375, "learning_rate": 0.0001618465410335333, "loss": 0.4035, "step": 41550 }, { "epoch": 1.0548159053699027, "grad_norm": 0.33203125, "learning_rate": 0.00016181341704185346, "loss": 0.3923, "step": 41555 }, { "epoch": 1.0549428234189184, "grad_norm": 0.349609375, "learning_rate": 0.00016178029247049394, "loss": 0.4293, "step": 41560 }, { "epoch": 1.0550697414679342, "grad_norm": 0.365234375, "learning_rate": 0.00016174716732108015, "loss": 0.4134, "step": 41565 }, { "epoch": 1.05519665951695, "grad_norm": 0.357421875, "learning_rate": 0.00016171404159523754, "loss": 0.4231, "step": 41570 }, { "epoch": 1.0553235775659657, "grad_norm": 0.31640625, "learning_rate": 0.00016168091529459154, "loss": 0.4076, "step": 41575 }, { "epoch": 1.0554504956149815, "grad_norm": 0.359375, "learning_rate": 0.00016164778842076772, "loss": 0.4235, "step": 41580 }, { "epoch": 1.0555774136639973, "grad_norm": 0.33984375, "learning_rate": 0.00016161466097539155, "loss": 0.4199, "step": 41585 }, { "epoch": 1.0557043317130128, "grad_norm": 0.3515625, "learning_rate": 0.0001615815329600886, "loss": 0.4242, "step": 41590 }, { "epoch": 1.0558312497620286, "grad_norm": 0.337890625, "learning_rate": 0.00016154840437648444, "loss": 0.45, "step": 41595 }, { "epoch": 1.0559581678110443, "grad_norm": 0.353515625, "learning_rate": 0.0001615152752262047, "loss": 0.4089, "step": 41600 }, { "epoch": 1.0560850858600601, "grad_norm": 0.326171875, "learning_rate": 0.00016148214551087493, "loss": 0.4233, "step": 41605 }, { "epoch": 1.0562120039090759, "grad_norm": 0.357421875, "learning_rate": 0.00016144901523212088, "loss": 0.4245, "step": 41610 }, { "epoch": 1.0563389219580916, "grad_norm": 0.326171875, "learning_rate": 0.00016141588439156824, "loss": 0.3904, "step": 41615 }, { "epoch": 1.0564658400071074, "grad_norm": 0.30859375, "learning_rate": 0.00016138275299084265, "loss": 0.4206, "step": 41620 }, { "epoch": 1.0565927580561232, "grad_norm": 0.349609375, "learning_rate": 0.00016134962103156998, "loss": 0.3974, "step": 41625 }, { "epoch": 1.056719676105139, "grad_norm": 0.3515625, "learning_rate": 0.0001613164885153759, "loss": 0.4401, "step": 41630 }, { "epoch": 1.0568465941541547, "grad_norm": 0.36328125, "learning_rate": 0.00016128335544388623, "loss": 0.4245, "step": 41635 }, { "epoch": 1.0569735122031705, "grad_norm": 0.369140625, "learning_rate": 0.00016125022181872676, "loss": 0.4163, "step": 41640 }, { "epoch": 1.0571004302521863, "grad_norm": 0.353515625, "learning_rate": 0.00016121708764152344, "loss": 0.4067, "step": 41645 }, { "epoch": 1.057227348301202, "grad_norm": 0.33203125, "learning_rate": 0.00016118395291390205, "loss": 0.415, "step": 41650 }, { "epoch": 1.0573542663502176, "grad_norm": 0.353515625, "learning_rate": 0.0001611508176374886, "loss": 0.437, "step": 41655 }, { "epoch": 1.0574811843992333, "grad_norm": 0.35546875, "learning_rate": 0.00016111768181390894, "loss": 0.3903, "step": 41660 }, { "epoch": 1.057608102448249, "grad_norm": 0.357421875, "learning_rate": 0.00016108454544478906, "loss": 0.4066, "step": 41665 }, { "epoch": 1.0577350204972649, "grad_norm": 0.34375, "learning_rate": 0.00016105140853175492, "loss": 0.458, "step": 41670 }, { "epoch": 1.0578619385462806, "grad_norm": 0.34765625, "learning_rate": 0.00016101827107643262, "loss": 0.3874, "step": 41675 }, { "epoch": 1.0579888565952964, "grad_norm": 0.328125, "learning_rate": 0.00016098513308044813, "loss": 0.4239, "step": 41680 }, { "epoch": 1.0581157746443122, "grad_norm": 0.34375, "learning_rate": 0.00016095199454542756, "loss": 0.4016, "step": 41685 }, { "epoch": 1.058242692693328, "grad_norm": 0.34765625, "learning_rate": 0.00016091885547299699, "loss": 0.4294, "step": 41690 }, { "epoch": 1.0583696107423437, "grad_norm": 0.34765625, "learning_rate": 0.00016088571586478248, "loss": 0.3997, "step": 41695 }, { "epoch": 1.0584965287913595, "grad_norm": 0.3515625, "learning_rate": 0.0001608525757224102, "loss": 0.4284, "step": 41700 }, { "epoch": 1.0586234468403752, "grad_norm": 0.6015625, "learning_rate": 0.0001608194350475064, "loss": 0.4155, "step": 41705 }, { "epoch": 1.058750364889391, "grad_norm": 0.32421875, "learning_rate": 0.00016078629384169723, "loss": 0.4111, "step": 41710 }, { "epoch": 1.0588772829384068, "grad_norm": 0.359375, "learning_rate": 0.00016075315210660894, "loss": 0.4388, "step": 41715 }, { "epoch": 1.0590042009874223, "grad_norm": 0.3671875, "learning_rate": 0.00016072000984386777, "loss": 0.429, "step": 41720 }, { "epoch": 1.059131119036438, "grad_norm": 0.3359375, "learning_rate": 0.00016068686705509994, "loss": 0.3807, "step": 41725 }, { "epoch": 1.0592580370854539, "grad_norm": 0.341796875, "learning_rate": 0.00016065372374193178, "loss": 0.4187, "step": 41730 }, { "epoch": 1.0593849551344696, "grad_norm": 0.392578125, "learning_rate": 0.0001606205799059897, "loss": 0.4617, "step": 41735 }, { "epoch": 1.0595118731834854, "grad_norm": 0.345703125, "learning_rate": 0.0001605874355489, "loss": 0.3974, "step": 41740 }, { "epoch": 1.0596387912325012, "grad_norm": 0.34375, "learning_rate": 0.00016055429067228898, "loss": 0.4135, "step": 41745 }, { "epoch": 1.059765709281517, "grad_norm": 0.34765625, "learning_rate": 0.00016052114527778316, "loss": 0.4201, "step": 41750 }, { "epoch": 1.0598926273305327, "grad_norm": 0.33984375, "learning_rate": 0.00016048799936700894, "loss": 0.4032, "step": 41755 }, { "epoch": 1.0600195453795485, "grad_norm": 0.32421875, "learning_rate": 0.0001604548529415928, "loss": 0.4048, "step": 41760 }, { "epoch": 1.0601464634285642, "grad_norm": 0.35546875, "learning_rate": 0.00016042170600316115, "loss": 0.4249, "step": 41765 }, { "epoch": 1.06027338147758, "grad_norm": 0.32421875, "learning_rate": 0.00016038855855334054, "loss": 0.4061, "step": 41770 }, { "epoch": 1.0604002995265958, "grad_norm": 0.36328125, "learning_rate": 0.0001603554105937575, "loss": 0.4545, "step": 41775 }, { "epoch": 1.0605272175756115, "grad_norm": 0.359375, "learning_rate": 0.0001603222621260386, "loss": 0.4216, "step": 41780 }, { "epoch": 1.060654135624627, "grad_norm": 0.373046875, "learning_rate": 0.0001602891131518104, "loss": 0.4437, "step": 41785 }, { "epoch": 1.0607810536736428, "grad_norm": 0.392578125, "learning_rate": 0.00016025596367269952, "loss": 0.4425, "step": 41790 }, { "epoch": 1.0609079717226586, "grad_norm": 0.328125, "learning_rate": 0.00016022281369033255, "loss": 0.4025, "step": 41795 }, { "epoch": 1.0610348897716744, "grad_norm": 0.373046875, "learning_rate": 0.00016018966320633624, "loss": 0.4414, "step": 41800 }, { "epoch": 1.0611618078206901, "grad_norm": 0.328125, "learning_rate": 0.0001601565122223372, "loss": 0.4251, "step": 41805 }, { "epoch": 1.061288725869706, "grad_norm": 0.34765625, "learning_rate": 0.00016012336073996215, "loss": 0.4375, "step": 41810 }, { "epoch": 1.0614156439187217, "grad_norm": 0.3515625, "learning_rate": 0.00016009020876083782, "loss": 0.4312, "step": 41815 }, { "epoch": 1.0615425619677374, "grad_norm": 0.341796875, "learning_rate": 0.00016005705628659094, "loss": 0.4224, "step": 41820 }, { "epoch": 1.0616694800167532, "grad_norm": 0.359375, "learning_rate": 0.00016002390331884835, "loss": 0.4335, "step": 41825 }, { "epoch": 1.061796398065769, "grad_norm": 0.318359375, "learning_rate": 0.0001599907498592368, "loss": 0.4066, "step": 41830 }, { "epoch": 1.0619233161147847, "grad_norm": 0.349609375, "learning_rate": 0.00015995759590938313, "loss": 0.4338, "step": 41835 }, { "epoch": 1.0620502341638005, "grad_norm": 0.3359375, "learning_rate": 0.00015992444147091422, "loss": 0.4275, "step": 41840 }, { "epoch": 1.0621771522128163, "grad_norm": 0.33203125, "learning_rate": 0.00015989128654545694, "loss": 0.4103, "step": 41845 }, { "epoch": 1.062304070261832, "grad_norm": 0.29296875, "learning_rate": 0.00015985813113463814, "loss": 0.4262, "step": 41850 }, { "epoch": 1.0624309883108476, "grad_norm": 0.341796875, "learning_rate": 0.00015982497524008474, "loss": 0.4275, "step": 41855 }, { "epoch": 1.0625579063598634, "grad_norm": 0.3671875, "learning_rate": 0.00015979181886342374, "loss": 0.4143, "step": 41860 }, { "epoch": 1.0626848244088791, "grad_norm": 0.322265625, "learning_rate": 0.00015975866200628207, "loss": 0.3721, "step": 41865 }, { "epoch": 1.062811742457895, "grad_norm": 0.32421875, "learning_rate": 0.00015972550467028678, "loss": 0.4014, "step": 41870 }, { "epoch": 1.0629386605069107, "grad_norm": 0.341796875, "learning_rate": 0.00015969234685706484, "loss": 0.411, "step": 41875 }, { "epoch": 1.0630655785559264, "grad_norm": 0.330078125, "learning_rate": 0.0001596591885682433, "loss": 0.3978, "step": 41880 }, { "epoch": 1.0631924966049422, "grad_norm": 0.337890625, "learning_rate": 0.00015962602980544924, "loss": 0.4009, "step": 41885 }, { "epoch": 1.063319414653958, "grad_norm": 0.3515625, "learning_rate": 0.00015959287057030967, "loss": 0.4304, "step": 41890 }, { "epoch": 1.0634463327029737, "grad_norm": 0.33203125, "learning_rate": 0.00015955971086445184, "loss": 0.4177, "step": 41895 }, { "epoch": 1.0635732507519895, "grad_norm": 0.318359375, "learning_rate": 0.00015952655068950276, "loss": 0.396, "step": 41900 }, { "epoch": 1.0637001688010053, "grad_norm": 0.35546875, "learning_rate": 0.00015949339004708962, "loss": 0.4213, "step": 41905 }, { "epoch": 1.063827086850021, "grad_norm": 0.375, "learning_rate": 0.00015946022893883967, "loss": 0.4183, "step": 41910 }, { "epoch": 1.0639540048990366, "grad_norm": 0.373046875, "learning_rate": 0.00015942706736638, "loss": 0.4049, "step": 41915 }, { "epoch": 1.0640809229480523, "grad_norm": 0.349609375, "learning_rate": 0.0001593939053313379, "loss": 0.4338, "step": 41920 }, { "epoch": 1.064207840997068, "grad_norm": 0.361328125, "learning_rate": 0.00015936074283534062, "loss": 0.4152, "step": 41925 }, { "epoch": 1.0643347590460839, "grad_norm": 0.357421875, "learning_rate": 0.0001593275798800154, "loss": 0.4022, "step": 41930 }, { "epoch": 1.0644616770950996, "grad_norm": 0.373046875, "learning_rate": 0.00015929441646698962, "loss": 0.4404, "step": 41935 }, { "epoch": 1.0645885951441154, "grad_norm": 0.349609375, "learning_rate": 0.00015926125259789047, "loss": 0.4296, "step": 41940 }, { "epoch": 1.0647155131931312, "grad_norm": 0.361328125, "learning_rate": 0.00015922808827434533, "loss": 0.4375, "step": 41945 }, { "epoch": 1.064842431242147, "grad_norm": 0.359375, "learning_rate": 0.0001591949234979816, "loss": 0.4056, "step": 41950 }, { "epoch": 1.0649693492911627, "grad_norm": 0.359375, "learning_rate": 0.0001591617582704266, "loss": 0.431, "step": 41955 }, { "epoch": 1.0650962673401785, "grad_norm": 0.31640625, "learning_rate": 0.00015912859259330778, "loss": 0.3969, "step": 41960 }, { "epoch": 1.0652231853891942, "grad_norm": 0.34375, "learning_rate": 0.0001590954264682526, "loss": 0.4182, "step": 41965 }, { "epoch": 1.06535010343821, "grad_norm": 0.35546875, "learning_rate": 0.00015906225989688845, "loss": 0.3966, "step": 41970 }, { "epoch": 1.0654770214872258, "grad_norm": 0.306640625, "learning_rate": 0.00015902909288084284, "loss": 0.4157, "step": 41975 }, { "epoch": 1.0656039395362416, "grad_norm": 0.34765625, "learning_rate": 0.0001589959254217432, "loss": 0.4191, "step": 41980 }, { "epoch": 1.065730857585257, "grad_norm": 0.357421875, "learning_rate": 0.00015896275752121707, "loss": 0.4254, "step": 41985 }, { "epoch": 1.0658577756342729, "grad_norm": 0.318359375, "learning_rate": 0.000158929589180892, "loss": 0.4495, "step": 41990 }, { "epoch": 1.0659846936832886, "grad_norm": 0.35546875, "learning_rate": 0.00015889642040239557, "loss": 0.4264, "step": 41995 }, { "epoch": 1.0661116117323044, "grad_norm": 0.3671875, "learning_rate": 0.00015886325118735534, "loss": 0.4325, "step": 42000 }, { "epoch": 1.0662385297813202, "grad_norm": 0.33984375, "learning_rate": 0.00015883008153739893, "loss": 0.4097, "step": 42005 }, { "epoch": 1.066365447830336, "grad_norm": 0.349609375, "learning_rate": 0.00015879691145415388, "loss": 0.425, "step": 42010 }, { "epoch": 1.0664923658793517, "grad_norm": 0.34375, "learning_rate": 0.0001587637409392479, "loss": 0.4358, "step": 42015 }, { "epoch": 1.0666192839283675, "grad_norm": 0.3359375, "learning_rate": 0.00015873056999430864, "loss": 0.4244, "step": 42020 }, { "epoch": 1.0667462019773832, "grad_norm": 0.326171875, "learning_rate": 0.0001586973986209638, "loss": 0.4248, "step": 42025 }, { "epoch": 1.066873120026399, "grad_norm": 0.330078125, "learning_rate": 0.00015866422682084106, "loss": 0.4389, "step": 42030 }, { "epoch": 1.0670000380754148, "grad_norm": 0.39453125, "learning_rate": 0.0001586310545955682, "loss": 0.4612, "step": 42035 }, { "epoch": 1.0671269561244305, "grad_norm": 0.345703125, "learning_rate": 0.0001585978819467729, "loss": 0.4309, "step": 42040 }, { "epoch": 1.0672538741734463, "grad_norm": 0.328125, "learning_rate": 0.00015856470887608298, "loss": 0.4171, "step": 42045 }, { "epoch": 1.0673807922224618, "grad_norm": 0.314453125, "learning_rate": 0.00015853153538512618, "loss": 0.3888, "step": 42050 }, { "epoch": 1.0675077102714776, "grad_norm": 0.33203125, "learning_rate": 0.00015849836147553033, "loss": 0.4019, "step": 42055 }, { "epoch": 1.0676346283204934, "grad_norm": 0.330078125, "learning_rate": 0.0001584651871489233, "loss": 0.4098, "step": 42060 }, { "epoch": 1.0677615463695092, "grad_norm": 0.326171875, "learning_rate": 0.0001584320124069329, "loss": 0.4053, "step": 42065 }, { "epoch": 1.067888464418525, "grad_norm": 0.37109375, "learning_rate": 0.000158398837251187, "loss": 0.4272, "step": 42070 }, { "epoch": 1.0680153824675407, "grad_norm": 0.3359375, "learning_rate": 0.0001583656616833135, "loss": 0.4274, "step": 42075 }, { "epoch": 1.0681423005165565, "grad_norm": 0.392578125, "learning_rate": 0.0001583324857049403, "loss": 0.4295, "step": 42080 }, { "epoch": 1.0682692185655722, "grad_norm": 0.35546875, "learning_rate": 0.00015829930931769538, "loss": 0.3915, "step": 42085 }, { "epoch": 1.068396136614588, "grad_norm": 0.34375, "learning_rate": 0.00015826613252320666, "loss": 0.4033, "step": 42090 }, { "epoch": 1.0685230546636038, "grad_norm": 0.33984375, "learning_rate": 0.00015823295532310208, "loss": 0.4341, "step": 42095 }, { "epoch": 1.0686499727126195, "grad_norm": 0.353515625, "learning_rate": 0.0001581997777190097, "loss": 0.3986, "step": 42100 }, { "epoch": 1.0687768907616353, "grad_norm": 0.353515625, "learning_rate": 0.00015816659971255745, "loss": 0.4281, "step": 42105 }, { "epoch": 1.068903808810651, "grad_norm": 0.3515625, "learning_rate": 0.0001581334213053734, "loss": 0.3958, "step": 42110 }, { "epoch": 1.0690307268596668, "grad_norm": 0.357421875, "learning_rate": 0.00015810024249908564, "loss": 0.4524, "step": 42115 }, { "epoch": 1.0691576449086824, "grad_norm": 0.34375, "learning_rate": 0.0001580670632953222, "loss": 0.4642, "step": 42120 }, { "epoch": 1.0692845629576981, "grad_norm": 0.35546875, "learning_rate": 0.0001580338836957112, "loss": 0.4144, "step": 42125 }, { "epoch": 1.069411481006714, "grad_norm": 0.349609375, "learning_rate": 0.0001580007037018807, "loss": 0.4525, "step": 42130 }, { "epoch": 1.0695383990557297, "grad_norm": 0.337890625, "learning_rate": 0.00015796752331545885, "loss": 0.4269, "step": 42135 }, { "epoch": 1.0696653171047454, "grad_norm": 0.33984375, "learning_rate": 0.00015793434253807382, "loss": 0.4104, "step": 42140 }, { "epoch": 1.0697922351537612, "grad_norm": 0.328125, "learning_rate": 0.00015790116137135373, "loss": 0.4208, "step": 42145 }, { "epoch": 1.069919153202777, "grad_norm": 0.3359375, "learning_rate": 0.00015786797981692682, "loss": 0.4153, "step": 42150 }, { "epoch": 1.0700460712517927, "grad_norm": 0.36328125, "learning_rate": 0.00015783479787642127, "loss": 0.4037, "step": 42155 }, { "epoch": 1.0701729893008085, "grad_norm": 0.345703125, "learning_rate": 0.00015780161555146532, "loss": 0.4314, "step": 42160 }, { "epoch": 1.0702999073498243, "grad_norm": 0.361328125, "learning_rate": 0.00015776843284368724, "loss": 0.4461, "step": 42165 }, { "epoch": 1.07042682539884, "grad_norm": 0.337890625, "learning_rate": 0.00015773524975471522, "loss": 0.414, "step": 42170 }, { "epoch": 1.0705537434478558, "grad_norm": 0.33984375, "learning_rate": 0.00015770206628617752, "loss": 0.4275, "step": 42175 }, { "epoch": 1.0706806614968714, "grad_norm": 0.35546875, "learning_rate": 0.00015766888243970258, "loss": 0.415, "step": 42180 }, { "epoch": 1.0708075795458871, "grad_norm": 0.341796875, "learning_rate": 0.0001576356982169186, "loss": 0.413, "step": 42185 }, { "epoch": 1.070934497594903, "grad_norm": 0.34375, "learning_rate": 0.00015760251361945392, "loss": 0.4429, "step": 42190 }, { "epoch": 1.0710614156439187, "grad_norm": 0.3359375, "learning_rate": 0.00015756932864893699, "loss": 0.4387, "step": 42195 }, { "epoch": 1.0711883336929344, "grad_norm": 0.326171875, "learning_rate": 0.0001575361433069961, "loss": 0.4297, "step": 42200 }, { "epoch": 1.0713152517419502, "grad_norm": 0.345703125, "learning_rate": 0.00015750295759525958, "loss": 0.4199, "step": 42205 }, { "epoch": 1.071442169790966, "grad_norm": 0.3515625, "learning_rate": 0.00015746977151535603, "loss": 0.3944, "step": 42210 }, { "epoch": 1.0715690878399817, "grad_norm": 0.33203125, "learning_rate": 0.00015743658506891373, "loss": 0.4298, "step": 42215 }, { "epoch": 1.0716960058889975, "grad_norm": 0.34765625, "learning_rate": 0.00015740339825756115, "loss": 0.401, "step": 42220 }, { "epoch": 1.0718229239380133, "grad_norm": 0.34765625, "learning_rate": 0.00015737021108292676, "loss": 0.4286, "step": 42225 }, { "epoch": 1.071949841987029, "grad_norm": 0.36328125, "learning_rate": 0.00015733702354663907, "loss": 0.4008, "step": 42230 }, { "epoch": 1.0720767600360448, "grad_norm": 0.318359375, "learning_rate": 0.0001573038356503265, "loss": 0.4006, "step": 42235 }, { "epoch": 1.0722036780850606, "grad_norm": 0.330078125, "learning_rate": 0.00015727064739561765, "loss": 0.4423, "step": 42240 }, { "epoch": 1.0723305961340763, "grad_norm": 0.34375, "learning_rate": 0.0001572374587841411, "loss": 0.418, "step": 42245 }, { "epoch": 1.0724575141830919, "grad_norm": 0.3359375, "learning_rate": 0.00015720426981752526, "loss": 0.4136, "step": 42250 }, { "epoch": 1.0725844322321076, "grad_norm": 0.34375, "learning_rate": 0.00015717108049739882, "loss": 0.4544, "step": 42255 }, { "epoch": 1.0727113502811234, "grad_norm": 0.34375, "learning_rate": 0.00015713789082539027, "loss": 0.4179, "step": 42260 }, { "epoch": 1.0728382683301392, "grad_norm": 0.359375, "learning_rate": 0.00015710470080312826, "loss": 0.4193, "step": 42265 }, { "epoch": 1.072965186379155, "grad_norm": 0.328125, "learning_rate": 0.00015707151043224145, "loss": 0.4056, "step": 42270 }, { "epoch": 1.0730921044281707, "grad_norm": 0.31640625, "learning_rate": 0.0001570383197143584, "loss": 0.3814, "step": 42275 }, { "epoch": 1.0732190224771865, "grad_norm": 0.33203125, "learning_rate": 0.00015700512865110783, "loss": 0.4097, "step": 42280 }, { "epoch": 1.0733459405262022, "grad_norm": 0.3203125, "learning_rate": 0.00015697193724411836, "loss": 0.4186, "step": 42285 }, { "epoch": 1.073472858575218, "grad_norm": 0.35546875, "learning_rate": 0.00015693874549501877, "loss": 0.4357, "step": 42290 }, { "epoch": 1.0735997766242338, "grad_norm": 0.353515625, "learning_rate": 0.00015690555340543762, "loss": 0.4233, "step": 42295 }, { "epoch": 1.0737266946732495, "grad_norm": 0.35546875, "learning_rate": 0.00015687236097700375, "loss": 0.432, "step": 42300 }, { "epoch": 1.0738536127222653, "grad_norm": 0.369140625, "learning_rate": 0.00015683916821134587, "loss": 0.4487, "step": 42305 }, { "epoch": 1.073980530771281, "grad_norm": 0.345703125, "learning_rate": 0.0001568059751100927, "loss": 0.4276, "step": 42310 }, { "epoch": 1.0741074488202966, "grad_norm": 0.349609375, "learning_rate": 0.00015677278167487309, "loss": 0.4285, "step": 42315 }, { "epoch": 1.0742343668693124, "grad_norm": 0.384765625, "learning_rate": 0.00015673958790731577, "loss": 0.4507, "step": 42320 }, { "epoch": 1.0743612849183282, "grad_norm": 0.333984375, "learning_rate": 0.00015670639380904955, "loss": 0.4214, "step": 42325 }, { "epoch": 1.074488202967344, "grad_norm": 0.37890625, "learning_rate": 0.00015667319938170325, "loss": 0.4283, "step": 42330 }, { "epoch": 1.0746151210163597, "grad_norm": 0.36328125, "learning_rate": 0.0001566400046269057, "loss": 0.4099, "step": 42335 }, { "epoch": 1.0747420390653755, "grad_norm": 0.337890625, "learning_rate": 0.0001566068095462858, "loss": 0.4306, "step": 42340 }, { "epoch": 1.0748689571143912, "grad_norm": 0.341796875, "learning_rate": 0.0001565736141414724, "loss": 0.4238, "step": 42345 }, { "epoch": 1.074995875163407, "grad_norm": 0.333984375, "learning_rate": 0.00015654041841409435, "loss": 0.4226, "step": 42350 }, { "epoch": 1.0751227932124228, "grad_norm": 0.359375, "learning_rate": 0.00015650722236578056, "loss": 0.4115, "step": 42355 }, { "epoch": 1.0752497112614385, "grad_norm": 0.34765625, "learning_rate": 0.00015647402599815998, "loss": 0.3946, "step": 42360 }, { "epoch": 1.0753766293104543, "grad_norm": 0.35546875, "learning_rate": 0.0001564408293128615, "loss": 0.4201, "step": 42365 }, { "epoch": 1.07550354735947, "grad_norm": 0.333984375, "learning_rate": 0.0001564076323115141, "loss": 0.4096, "step": 42370 }, { "epoch": 1.0756304654084858, "grad_norm": 0.390625, "learning_rate": 0.00015637443499574676, "loss": 0.4099, "step": 42375 }, { "epoch": 1.0757573834575016, "grad_norm": 0.35546875, "learning_rate": 0.00015634123736718843, "loss": 0.4171, "step": 42380 }, { "epoch": 1.0758843015065171, "grad_norm": 0.3671875, "learning_rate": 0.0001563080394274681, "loss": 0.4258, "step": 42385 }, { "epoch": 1.076011219555533, "grad_norm": 0.36328125, "learning_rate": 0.00015627484117821473, "loss": 0.4728, "step": 42390 }, { "epoch": 1.0761381376045487, "grad_norm": 0.419921875, "learning_rate": 0.00015624164262105744, "loss": 0.4393, "step": 42395 }, { "epoch": 1.0762650556535645, "grad_norm": 0.326171875, "learning_rate": 0.00015620844375762526, "loss": 0.4336, "step": 42400 }, { "epoch": 1.0763919737025802, "grad_norm": 0.314453125, "learning_rate": 0.00015617524458954723, "loss": 0.413, "step": 42405 }, { "epoch": 1.076518891751596, "grad_norm": 0.375, "learning_rate": 0.0001561420451184524, "loss": 0.4459, "step": 42410 }, { "epoch": 1.0766458098006118, "grad_norm": 0.361328125, "learning_rate": 0.00015610884534596984, "loss": 0.439, "step": 42415 }, { "epoch": 1.0767727278496275, "grad_norm": 0.37109375, "learning_rate": 0.00015607564527372867, "loss": 0.4171, "step": 42420 }, { "epoch": 1.0768996458986433, "grad_norm": 0.322265625, "learning_rate": 0.00015604244490335798, "loss": 0.3936, "step": 42425 }, { "epoch": 1.077026563947659, "grad_norm": 0.359375, "learning_rate": 0.000156009244236487, "loss": 0.4179, "step": 42430 }, { "epoch": 1.0771534819966748, "grad_norm": 0.345703125, "learning_rate": 0.00015597604327474473, "loss": 0.4209, "step": 42435 }, { "epoch": 1.0772804000456906, "grad_norm": 0.333984375, "learning_rate": 0.00015594284201976043, "loss": 0.4156, "step": 42440 }, { "epoch": 1.0774073180947061, "grad_norm": 0.365234375, "learning_rate": 0.00015590964047316322, "loss": 0.4108, "step": 42445 }, { "epoch": 1.077534236143722, "grad_norm": 0.33203125, "learning_rate": 0.00015587643863658237, "loss": 0.4733, "step": 42450 }, { "epoch": 1.0776611541927377, "grad_norm": 0.34375, "learning_rate": 0.00015584323651164693, "loss": 0.4218, "step": 42455 }, { "epoch": 1.0777880722417534, "grad_norm": 0.357421875, "learning_rate": 0.00015581003409998625, "loss": 0.4165, "step": 42460 }, { "epoch": 1.0779149902907692, "grad_norm": 0.34375, "learning_rate": 0.0001557768314032295, "loss": 0.428, "step": 42465 }, { "epoch": 1.078041908339785, "grad_norm": 0.318359375, "learning_rate": 0.00015574362842300598, "loss": 0.4213, "step": 42470 }, { "epoch": 1.0781688263888007, "grad_norm": 0.349609375, "learning_rate": 0.00015571042516094482, "loss": 0.4518, "step": 42475 }, { "epoch": 1.0782957444378165, "grad_norm": 0.35546875, "learning_rate": 0.00015567722161867543, "loss": 0.4279, "step": 42480 }, { "epoch": 1.0784226624868323, "grad_norm": 0.33984375, "learning_rate": 0.000155644017797827, "loss": 0.3895, "step": 42485 }, { "epoch": 1.078549580535848, "grad_norm": 0.341796875, "learning_rate": 0.00015561081370002886, "loss": 0.423, "step": 42490 }, { "epoch": 1.0786764985848638, "grad_norm": 0.353515625, "learning_rate": 0.00015557760932691036, "loss": 0.4207, "step": 42495 }, { "epoch": 1.0788034166338796, "grad_norm": 0.345703125, "learning_rate": 0.0001555444046801008, "loss": 0.448, "step": 42500 }, { "epoch": 1.0789303346828953, "grad_norm": 0.3515625, "learning_rate": 0.00015551119976122947, "loss": 0.4171, "step": 42505 }, { "epoch": 1.079057252731911, "grad_norm": 0.357421875, "learning_rate": 0.00015547799457192577, "loss": 0.3929, "step": 42510 }, { "epoch": 1.0791841707809267, "grad_norm": 0.369140625, "learning_rate": 0.0001554447891138191, "loss": 0.4362, "step": 42515 }, { "epoch": 1.0793110888299424, "grad_norm": 0.330078125, "learning_rate": 0.00015541158338853875, "loss": 0.392, "step": 42520 }, { "epoch": 1.0794380068789582, "grad_norm": 0.345703125, "learning_rate": 0.00015537837739771417, "loss": 0.436, "step": 42525 }, { "epoch": 1.079564924927974, "grad_norm": 0.353515625, "learning_rate": 0.00015534517114297475, "loss": 0.3937, "step": 42530 }, { "epoch": 1.0796918429769897, "grad_norm": 0.3359375, "learning_rate": 0.00015531196462594996, "loss": 0.4338, "step": 42535 }, { "epoch": 1.0798187610260055, "grad_norm": 0.357421875, "learning_rate": 0.00015527875784826912, "loss": 0.4409, "step": 42540 }, { "epoch": 1.0799456790750213, "grad_norm": 0.36328125, "learning_rate": 0.00015524555081156182, "loss": 0.425, "step": 42545 }, { "epoch": 1.080072597124037, "grad_norm": 0.369140625, "learning_rate": 0.0001552123435174573, "loss": 0.3914, "step": 42550 }, { "epoch": 1.0801995151730528, "grad_norm": 0.330078125, "learning_rate": 0.00015517913596758523, "loss": 0.4284, "step": 42555 }, { "epoch": 1.0803264332220686, "grad_norm": 0.341796875, "learning_rate": 0.00015514592816357503, "loss": 0.4175, "step": 42560 }, { "epoch": 1.0804533512710843, "grad_norm": 0.333984375, "learning_rate": 0.00015511272010705618, "loss": 0.4072, "step": 42565 }, { "epoch": 1.0805802693201, "grad_norm": 0.33203125, "learning_rate": 0.0001550795117996582, "loss": 0.4143, "step": 42570 }, { "epoch": 1.0807071873691159, "grad_norm": 0.375, "learning_rate": 0.0001550463032430106, "loss": 0.4387, "step": 42575 }, { "epoch": 1.0808341054181314, "grad_norm": 0.37890625, "learning_rate": 0.00015501309443874292, "loss": 0.4728, "step": 42580 }, { "epoch": 1.0809610234671472, "grad_norm": 0.365234375, "learning_rate": 0.00015497988538848464, "loss": 0.4195, "step": 42585 }, { "epoch": 1.081087941516163, "grad_norm": 0.322265625, "learning_rate": 0.00015494667609386537, "loss": 0.3874, "step": 42590 }, { "epoch": 1.0812148595651787, "grad_norm": 0.37890625, "learning_rate": 0.00015491346655651468, "loss": 0.4353, "step": 42595 }, { "epoch": 1.0813417776141945, "grad_norm": 0.357421875, "learning_rate": 0.0001548802567780622, "loss": 0.4301, "step": 42600 }, { "epoch": 1.0814686956632102, "grad_norm": 0.361328125, "learning_rate": 0.00015484704676013743, "loss": 0.4513, "step": 42605 }, { "epoch": 1.081595613712226, "grad_norm": 0.36328125, "learning_rate": 0.00015481383650437001, "loss": 0.4369, "step": 42610 }, { "epoch": 1.0817225317612418, "grad_norm": 0.353515625, "learning_rate": 0.00015478062601238953, "loss": 0.4443, "step": 42615 }, { "epoch": 1.0818494498102575, "grad_norm": 0.353515625, "learning_rate": 0.00015474741528582567, "loss": 0.4166, "step": 42620 }, { "epoch": 1.0819763678592733, "grad_norm": 0.3828125, "learning_rate": 0.000154714204326308, "loss": 0.4431, "step": 42625 }, { "epoch": 1.082103285908289, "grad_norm": 0.3515625, "learning_rate": 0.00015468099313546623, "loss": 0.41, "step": 42630 }, { "epoch": 1.0822302039573048, "grad_norm": 0.353515625, "learning_rate": 0.00015464778171492995, "loss": 0.4081, "step": 42635 }, { "epoch": 1.0823571220063206, "grad_norm": 0.3671875, "learning_rate": 0.0001546145700663289, "loss": 0.4478, "step": 42640 }, { "epoch": 1.0824840400553364, "grad_norm": 0.3515625, "learning_rate": 0.00015458135819129272, "loss": 0.4168, "step": 42645 }, { "epoch": 1.082610958104352, "grad_norm": 0.359375, "learning_rate": 0.00015454814609145115, "loss": 0.408, "step": 42650 }, { "epoch": 1.0827378761533677, "grad_norm": 0.3515625, "learning_rate": 0.00015451493376843385, "loss": 0.4372, "step": 42655 }, { "epoch": 1.0828647942023835, "grad_norm": 0.353515625, "learning_rate": 0.00015448172122387057, "loss": 0.4461, "step": 42660 }, { "epoch": 1.0829917122513992, "grad_norm": 0.322265625, "learning_rate": 0.00015444850845939097, "loss": 0.3996, "step": 42665 }, { "epoch": 1.083118630300415, "grad_norm": 0.3359375, "learning_rate": 0.00015441529547662486, "loss": 0.4199, "step": 42670 }, { "epoch": 1.0832455483494308, "grad_norm": 0.33203125, "learning_rate": 0.00015438208227720192, "loss": 0.4191, "step": 42675 }, { "epoch": 1.0833724663984465, "grad_norm": 0.33984375, "learning_rate": 0.00015434886886275196, "loss": 0.4046, "step": 42680 }, { "epoch": 1.0834993844474623, "grad_norm": 0.37109375, "learning_rate": 0.00015431565523490477, "loss": 0.4396, "step": 42685 }, { "epoch": 1.083626302496478, "grad_norm": 0.359375, "learning_rate": 0.0001542824413952901, "loss": 0.4411, "step": 42690 }, { "epoch": 1.0837532205454938, "grad_norm": 0.36328125, "learning_rate": 0.00015424922734553773, "loss": 0.4393, "step": 42695 }, { "epoch": 1.0838801385945096, "grad_norm": 0.34375, "learning_rate": 0.00015421601308727748, "loss": 0.4422, "step": 42700 }, { "epoch": 1.0840070566435254, "grad_norm": 0.3359375, "learning_rate": 0.00015418279862213912, "loss": 0.3915, "step": 42705 }, { "epoch": 1.084133974692541, "grad_norm": 0.3359375, "learning_rate": 0.00015414958395175248, "loss": 0.4302, "step": 42710 }, { "epoch": 1.0842608927415567, "grad_norm": 0.361328125, "learning_rate": 0.00015411636907774744, "loss": 0.4345, "step": 42715 }, { "epoch": 1.0843878107905724, "grad_norm": 0.353515625, "learning_rate": 0.00015408315400175377, "loss": 0.4209, "step": 42720 }, { "epoch": 1.0845147288395882, "grad_norm": 0.337890625, "learning_rate": 0.0001540499387254014, "loss": 0.443, "step": 42725 }, { "epoch": 1.084641646888604, "grad_norm": 0.353515625, "learning_rate": 0.00015401672325032016, "loss": 0.4272, "step": 42730 }, { "epoch": 1.0847685649376198, "grad_norm": 0.37109375, "learning_rate": 0.0001539835075781399, "loss": 0.4133, "step": 42735 }, { "epoch": 1.0848954829866355, "grad_norm": 0.361328125, "learning_rate": 0.00015395029171049048, "loss": 0.4303, "step": 42740 }, { "epoch": 1.0850224010356513, "grad_norm": 0.35546875, "learning_rate": 0.00015391707564900184, "loss": 0.4495, "step": 42745 }, { "epoch": 1.085149319084667, "grad_norm": 0.33203125, "learning_rate": 0.00015388385939530387, "loss": 0.4138, "step": 42750 }, { "epoch": 1.0852762371336828, "grad_norm": 0.34765625, "learning_rate": 0.0001538506429510264, "loss": 0.444, "step": 42755 }, { "epoch": 1.0854031551826986, "grad_norm": 0.3359375, "learning_rate": 0.0001538174263177995, "loss": 0.3755, "step": 42760 }, { "epoch": 1.0855300732317144, "grad_norm": 0.349609375, "learning_rate": 0.000153784209497253, "loss": 0.4079, "step": 42765 }, { "epoch": 1.0856569912807301, "grad_norm": 0.392578125, "learning_rate": 0.00015375099249101683, "loss": 0.4555, "step": 42770 }, { "epoch": 1.0857839093297459, "grad_norm": 0.376953125, "learning_rate": 0.0001537177753007209, "loss": 0.4402, "step": 42775 }, { "epoch": 1.0859108273787614, "grad_norm": 0.388671875, "learning_rate": 0.00015368455792799532, "loss": 0.4348, "step": 42780 }, { "epoch": 1.0860377454277772, "grad_norm": 0.365234375, "learning_rate": 0.0001536513403744699, "loss": 0.4147, "step": 42785 }, { "epoch": 1.086164663476793, "grad_norm": 0.40234375, "learning_rate": 0.00015361812264177464, "loss": 0.4188, "step": 42790 }, { "epoch": 1.0862915815258087, "grad_norm": 0.373046875, "learning_rate": 0.00015358490473153954, "loss": 0.43, "step": 42795 }, { "epoch": 1.0864184995748245, "grad_norm": 0.365234375, "learning_rate": 0.00015355168664539466, "loss": 0.4223, "step": 42800 }, { "epoch": 1.0865454176238403, "grad_norm": 0.3515625, "learning_rate": 0.00015351846838496984, "loss": 0.4183, "step": 42805 }, { "epoch": 1.086672335672856, "grad_norm": 0.353515625, "learning_rate": 0.00015348524995189523, "loss": 0.3937, "step": 42810 }, { "epoch": 1.0867992537218718, "grad_norm": 0.357421875, "learning_rate": 0.00015345203134780081, "loss": 0.4369, "step": 42815 }, { "epoch": 1.0869261717708876, "grad_norm": 0.345703125, "learning_rate": 0.00015341881257431656, "loss": 0.4273, "step": 42820 }, { "epoch": 1.0870530898199033, "grad_norm": 0.326171875, "learning_rate": 0.00015338559363307254, "loss": 0.4222, "step": 42825 }, { "epoch": 1.087180007868919, "grad_norm": 0.337890625, "learning_rate": 0.00015335237452569883, "loss": 0.3882, "step": 42830 }, { "epoch": 1.0873069259179349, "grad_norm": 0.333984375, "learning_rate": 0.00015331915525382535, "loss": 0.3889, "step": 42835 }, { "epoch": 1.0874338439669506, "grad_norm": 0.373046875, "learning_rate": 0.0001532859358190823, "loss": 0.4308, "step": 42840 }, { "epoch": 1.0875607620159662, "grad_norm": 0.345703125, "learning_rate": 0.00015325271622309967, "loss": 0.4233, "step": 42845 }, { "epoch": 1.087687680064982, "grad_norm": 0.349609375, "learning_rate": 0.00015321949646750755, "loss": 0.422, "step": 42850 }, { "epoch": 1.0878145981139977, "grad_norm": 0.3359375, "learning_rate": 0.00015318627655393606, "loss": 0.4371, "step": 42855 }, { "epoch": 1.0879415161630135, "grad_norm": 0.333984375, "learning_rate": 0.00015315305648401522, "loss": 0.4115, "step": 42860 }, { "epoch": 1.0880684342120293, "grad_norm": 0.333984375, "learning_rate": 0.00015311983625937512, "loss": 0.3902, "step": 42865 }, { "epoch": 1.088195352261045, "grad_norm": 0.3671875, "learning_rate": 0.00015308661588164598, "loss": 0.4339, "step": 42870 }, { "epoch": 1.0883222703100608, "grad_norm": 0.3515625, "learning_rate": 0.00015305339535245778, "loss": 0.4127, "step": 42875 }, { "epoch": 1.0884491883590766, "grad_norm": 0.326171875, "learning_rate": 0.00015302017467344066, "loss": 0.3681, "step": 42880 }, { "epoch": 1.0885761064080923, "grad_norm": 0.3828125, "learning_rate": 0.0001529869538462248, "loss": 0.4182, "step": 42885 }, { "epoch": 1.088703024457108, "grad_norm": 0.34375, "learning_rate": 0.0001529537328724403, "loss": 0.4165, "step": 42890 }, { "epoch": 1.0888299425061239, "grad_norm": 0.35546875, "learning_rate": 0.00015292051175371733, "loss": 0.4078, "step": 42895 }, { "epoch": 1.0889568605551396, "grad_norm": 0.353515625, "learning_rate": 0.00015288729049168595, "loss": 0.4141, "step": 42900 }, { "epoch": 1.0890837786041554, "grad_norm": 0.3671875, "learning_rate": 0.00015285406908797643, "loss": 0.4506, "step": 42905 }, { "epoch": 1.0892106966531712, "grad_norm": 0.337890625, "learning_rate": 0.00015282084754421888, "loss": 0.4282, "step": 42910 }, { "epoch": 1.0893376147021867, "grad_norm": 0.369140625, "learning_rate": 0.00015278762586204344, "loss": 0.3904, "step": 42915 }, { "epoch": 1.0894645327512025, "grad_norm": 0.36328125, "learning_rate": 0.0001527544040430803, "loss": 0.4285, "step": 42920 }, { "epoch": 1.0895914508002182, "grad_norm": 0.365234375, "learning_rate": 0.00015272118208895966, "loss": 0.4034, "step": 42925 }, { "epoch": 1.089718368849234, "grad_norm": 0.361328125, "learning_rate": 0.00015268796000131172, "loss": 0.4166, "step": 42930 }, { "epoch": 1.0898452868982498, "grad_norm": 0.33984375, "learning_rate": 0.00015265473778176662, "loss": 0.4079, "step": 42935 }, { "epoch": 1.0899722049472655, "grad_norm": 0.3359375, "learning_rate": 0.00015262151543195466, "loss": 0.4016, "step": 42940 }, { "epoch": 1.0900991229962813, "grad_norm": 0.314453125, "learning_rate": 0.0001525882929535059, "loss": 0.3934, "step": 42945 }, { "epoch": 1.090226041045297, "grad_norm": 0.365234375, "learning_rate": 0.0001525550703480507, "loss": 0.4426, "step": 42950 }, { "epoch": 1.0903529590943128, "grad_norm": 0.357421875, "learning_rate": 0.00015252184761721917, "loss": 0.4352, "step": 42955 }, { "epoch": 1.0904798771433286, "grad_norm": 0.318359375, "learning_rate": 0.00015248862476264163, "loss": 0.4233, "step": 42960 }, { "epoch": 1.0906067951923444, "grad_norm": 0.35546875, "learning_rate": 0.0001524554017859482, "loss": 0.4184, "step": 42965 }, { "epoch": 1.0907337132413601, "grad_norm": 0.353515625, "learning_rate": 0.00015242217868876922, "loss": 0.4317, "step": 42970 }, { "epoch": 1.0908606312903757, "grad_norm": 0.34375, "learning_rate": 0.0001523889554727349, "loss": 0.4193, "step": 42975 }, { "epoch": 1.0909875493393915, "grad_norm": 0.345703125, "learning_rate": 0.00015235573213947547, "loss": 0.4054, "step": 42980 }, { "epoch": 1.0911144673884072, "grad_norm": 0.359375, "learning_rate": 0.00015232250869062124, "loss": 0.438, "step": 42985 }, { "epoch": 1.091241385437423, "grad_norm": 0.3515625, "learning_rate": 0.0001522892851278024, "loss": 0.4397, "step": 42990 }, { "epoch": 1.0913683034864388, "grad_norm": 0.3515625, "learning_rate": 0.00015225606145264926, "loss": 0.4144, "step": 42995 }, { "epoch": 1.0914952215354545, "grad_norm": 0.35546875, "learning_rate": 0.00015222283766679206, "loss": 0.4295, "step": 43000 }, { "epoch": 1.0916221395844703, "grad_norm": 0.310546875, "learning_rate": 0.0001521896137718611, "loss": 0.3842, "step": 43005 }, { "epoch": 1.091749057633486, "grad_norm": 0.3515625, "learning_rate": 0.0001521563897694867, "loss": 0.4213, "step": 43010 }, { "epoch": 1.0918759756825018, "grad_norm": 0.34375, "learning_rate": 0.00015212316566129913, "loss": 0.3878, "step": 43015 }, { "epoch": 1.0920028937315176, "grad_norm": 0.326171875, "learning_rate": 0.00015208994144892864, "loss": 0.4081, "step": 43020 }, { "epoch": 1.0921298117805334, "grad_norm": 0.34375, "learning_rate": 0.00015205671713400552, "loss": 0.4404, "step": 43025 }, { "epoch": 1.0922567298295491, "grad_norm": 0.33984375, "learning_rate": 0.00015202349271816013, "loss": 0.4129, "step": 43030 }, { "epoch": 1.092383647878565, "grad_norm": 0.3671875, "learning_rate": 0.0001519902682030228, "loss": 0.4413, "step": 43035 }, { "epoch": 1.0925105659275807, "grad_norm": 0.3515625, "learning_rate": 0.00015195704359022372, "loss": 0.4044, "step": 43040 }, { "epoch": 1.0926374839765962, "grad_norm": 0.341796875, "learning_rate": 0.00015192381888139335, "loss": 0.4199, "step": 43045 }, { "epoch": 1.092764402025612, "grad_norm": 0.341796875, "learning_rate": 0.00015189059407816197, "loss": 0.4332, "step": 43050 }, { "epoch": 1.0928913200746277, "grad_norm": 0.365234375, "learning_rate": 0.00015185736918215984, "loss": 0.4197, "step": 43055 }, { "epoch": 1.0930182381236435, "grad_norm": 0.365234375, "learning_rate": 0.00015182414419501736, "loss": 0.4032, "step": 43060 }, { "epoch": 1.0931451561726593, "grad_norm": 0.39453125, "learning_rate": 0.00015179091911836487, "loss": 0.3961, "step": 43065 }, { "epoch": 1.093272074221675, "grad_norm": 0.380859375, "learning_rate": 0.00015175769395383272, "loss": 0.4316, "step": 43070 }, { "epoch": 1.0933989922706908, "grad_norm": 0.35546875, "learning_rate": 0.0001517244687030512, "loss": 0.4423, "step": 43075 }, { "epoch": 1.0935259103197066, "grad_norm": 0.3359375, "learning_rate": 0.00015169124336765065, "loss": 0.4141, "step": 43080 }, { "epoch": 1.0936528283687224, "grad_norm": 0.31640625, "learning_rate": 0.0001516580179492615, "loss": 0.3776, "step": 43085 }, { "epoch": 1.0937797464177381, "grad_norm": 0.349609375, "learning_rate": 0.00015162479244951405, "loss": 0.4488, "step": 43090 }, { "epoch": 1.0939066644667539, "grad_norm": 0.359375, "learning_rate": 0.0001515915668700387, "loss": 0.4075, "step": 43095 }, { "epoch": 1.0940335825157697, "grad_norm": 0.326171875, "learning_rate": 0.0001515583412124658, "loss": 0.392, "step": 43100 }, { "epoch": 1.0941605005647854, "grad_norm": 0.37890625, "learning_rate": 0.00015152511547842571, "loss": 0.431, "step": 43105 }, { "epoch": 1.094287418613801, "grad_norm": 0.37109375, "learning_rate": 0.0001514918896695488, "loss": 0.3962, "step": 43110 }, { "epoch": 1.0944143366628167, "grad_norm": 0.337890625, "learning_rate": 0.00015145866378746546, "loss": 0.4133, "step": 43115 }, { "epoch": 1.0945412547118325, "grad_norm": 0.337890625, "learning_rate": 0.00015142543783380608, "loss": 0.4147, "step": 43120 }, { "epoch": 1.0946681727608483, "grad_norm": 0.365234375, "learning_rate": 0.000151392211810201, "loss": 0.4409, "step": 43125 }, { "epoch": 1.094795090809864, "grad_norm": 0.3515625, "learning_rate": 0.0001513589857182807, "loss": 0.3916, "step": 43130 }, { "epoch": 1.0949220088588798, "grad_norm": 0.32421875, "learning_rate": 0.00015132575955967547, "loss": 0.392, "step": 43135 }, { "epoch": 1.0950489269078956, "grad_norm": 0.341796875, "learning_rate": 0.0001512925333360158, "loss": 0.4105, "step": 43140 }, { "epoch": 1.0951758449569113, "grad_norm": 0.341796875, "learning_rate": 0.00015125930704893196, "loss": 0.4105, "step": 43145 }, { "epoch": 1.095302763005927, "grad_norm": 0.337890625, "learning_rate": 0.00015122608070005443, "loss": 0.4031, "step": 43150 }, { "epoch": 1.0954296810549429, "grad_norm": 0.359375, "learning_rate": 0.00015119285429101366, "loss": 0.4087, "step": 43155 }, { "epoch": 1.0955565991039586, "grad_norm": 0.271484375, "learning_rate": 0.00015115962782343999, "loss": 0.3915, "step": 43160 }, { "epoch": 1.0956835171529744, "grad_norm": 0.37109375, "learning_rate": 0.0001511264012989638, "loss": 0.4081, "step": 43165 }, { "epoch": 1.0958104352019902, "grad_norm": 0.326171875, "learning_rate": 0.00015109317471921562, "loss": 0.413, "step": 43170 }, { "epoch": 1.095937353251006, "grad_norm": 0.341796875, "learning_rate": 0.00015105994808582574, "loss": 0.4481, "step": 43175 }, { "epoch": 1.0960642713000215, "grad_norm": 0.326171875, "learning_rate": 0.00015102672140042465, "loss": 0.4021, "step": 43180 }, { "epoch": 1.0961911893490373, "grad_norm": 0.369140625, "learning_rate": 0.00015099349466464275, "loss": 0.4388, "step": 43185 }, { "epoch": 1.096318107398053, "grad_norm": 0.353515625, "learning_rate": 0.00015096026788011043, "loss": 0.4165, "step": 43190 }, { "epoch": 1.0964450254470688, "grad_norm": 0.359375, "learning_rate": 0.0001509270410484582, "loss": 0.4263, "step": 43195 }, { "epoch": 1.0965719434960846, "grad_norm": 0.33203125, "learning_rate": 0.00015089381417131636, "loss": 0.3847, "step": 43200 }, { "epoch": 1.0966988615451003, "grad_norm": 0.357421875, "learning_rate": 0.0001508605872503155, "loss": 0.3996, "step": 43205 }, { "epoch": 1.096825779594116, "grad_norm": 0.35546875, "learning_rate": 0.00015082736028708594, "loss": 0.4274, "step": 43210 }, { "epoch": 1.0969526976431319, "grad_norm": 0.330078125, "learning_rate": 0.0001507941332832581, "loss": 0.4085, "step": 43215 }, { "epoch": 1.0970796156921476, "grad_norm": 0.361328125, "learning_rate": 0.0001507609062404625, "loss": 0.4188, "step": 43220 }, { "epoch": 1.0972065337411634, "grad_norm": 0.33984375, "learning_rate": 0.00015072767916032953, "loss": 0.4273, "step": 43225 }, { "epoch": 1.0973334517901792, "grad_norm": 0.341796875, "learning_rate": 0.00015069445204448964, "loss": 0.402, "step": 43230 }, { "epoch": 1.097460369839195, "grad_norm": 0.3671875, "learning_rate": 0.00015066122489457325, "loss": 0.4002, "step": 43235 }, { "epoch": 1.0975872878882105, "grad_norm": 0.3515625, "learning_rate": 0.00015062799771221077, "loss": 0.4296, "step": 43240 }, { "epoch": 1.0977142059372262, "grad_norm": 0.357421875, "learning_rate": 0.00015059477049903276, "loss": 0.4238, "step": 43245 }, { "epoch": 1.097841123986242, "grad_norm": 0.35546875, "learning_rate": 0.00015056154325666956, "loss": 0.4165, "step": 43250 }, { "epoch": 1.0979680420352578, "grad_norm": 0.349609375, "learning_rate": 0.0001505283159867517, "loss": 0.4214, "step": 43255 }, { "epoch": 1.0980949600842735, "grad_norm": 0.349609375, "learning_rate": 0.00015049508869090954, "loss": 0.4237, "step": 43260 }, { "epoch": 1.0982218781332893, "grad_norm": 0.3203125, "learning_rate": 0.00015046186137077363, "loss": 0.4047, "step": 43265 }, { "epoch": 1.098348796182305, "grad_norm": 0.3359375, "learning_rate": 0.0001504286340279743, "loss": 0.4256, "step": 43270 }, { "epoch": 1.0984757142313208, "grad_norm": 0.34375, "learning_rate": 0.0001503954066641421, "loss": 0.4073, "step": 43275 }, { "epoch": 1.0986026322803366, "grad_norm": 0.359375, "learning_rate": 0.00015036217928090744, "loss": 0.3899, "step": 43280 }, { "epoch": 1.0987295503293524, "grad_norm": 0.341796875, "learning_rate": 0.0001503289518799008, "loss": 0.4323, "step": 43285 }, { "epoch": 1.0988564683783681, "grad_norm": 0.35546875, "learning_rate": 0.00015029572446275264, "loss": 0.4054, "step": 43290 }, { "epoch": 1.098983386427384, "grad_norm": 0.31640625, "learning_rate": 0.00015026249703109339, "loss": 0.4044, "step": 43295 }, { "epoch": 1.0991103044763997, "grad_norm": 0.326171875, "learning_rate": 0.0001502292695865535, "loss": 0.4324, "step": 43300 }, { "epoch": 1.0992372225254154, "grad_norm": 0.330078125, "learning_rate": 0.00015019604213076346, "loss": 0.4238, "step": 43305 }, { "epoch": 1.099364140574431, "grad_norm": 0.365234375, "learning_rate": 0.00015016281466535367, "loss": 0.4343, "step": 43310 }, { "epoch": 1.0994910586234468, "grad_norm": 0.349609375, "learning_rate": 0.00015012958719195466, "loss": 0.4275, "step": 43315 }, { "epoch": 1.0996179766724625, "grad_norm": 0.361328125, "learning_rate": 0.00015009635971219689, "loss": 0.4169, "step": 43320 }, { "epoch": 1.0997448947214783, "grad_norm": 0.33984375, "learning_rate": 0.00015006313222771072, "loss": 0.408, "step": 43325 }, { "epoch": 1.099871812770494, "grad_norm": 0.34375, "learning_rate": 0.00015002990474012669, "loss": 0.3991, "step": 43330 }, { "epoch": 1.0999987308195098, "grad_norm": 0.345703125, "learning_rate": 0.00014999667725107527, "loss": 0.3995, "step": 43335 }, { "epoch": 1.1001256488685256, "grad_norm": 0.328125, "learning_rate": 0.00014996344976218689, "loss": 0.4234, "step": 43340 }, { "epoch": 1.1002525669175414, "grad_norm": 0.359375, "learning_rate": 0.00014993022227509202, "loss": 0.4258, "step": 43345 }, { "epoch": 1.1003794849665571, "grad_norm": 0.34375, "learning_rate": 0.00014989699479142112, "loss": 0.4195, "step": 43350 }, { "epoch": 1.100506403015573, "grad_norm": 0.35546875, "learning_rate": 0.00014986376731280466, "loss": 0.4583, "step": 43355 }, { "epoch": 1.1006333210645887, "grad_norm": 0.3515625, "learning_rate": 0.0001498305398408731, "loss": 0.4251, "step": 43360 }, { "epoch": 1.1007602391136044, "grad_norm": 0.333984375, "learning_rate": 0.00014979731237725678, "loss": 0.4275, "step": 43365 }, { "epoch": 1.1008871571626202, "grad_norm": 0.333984375, "learning_rate": 0.00014976408492358635, "loss": 0.4166, "step": 43370 }, { "epoch": 1.1010140752116357, "grad_norm": 0.3671875, "learning_rate": 0.00014973085748149216, "loss": 0.4476, "step": 43375 }, { "epoch": 1.1011409932606515, "grad_norm": 0.36328125, "learning_rate": 0.0001496976300526047, "loss": 0.4408, "step": 43380 }, { "epoch": 1.1012679113096673, "grad_norm": 0.33984375, "learning_rate": 0.00014966440263855432, "loss": 0.4276, "step": 43385 }, { "epoch": 1.101394829358683, "grad_norm": 0.36328125, "learning_rate": 0.00014963117524097164, "loss": 0.4166, "step": 43390 }, { "epoch": 1.1015217474076988, "grad_norm": 0.333984375, "learning_rate": 0.00014959794786148706, "loss": 0.4411, "step": 43395 }, { "epoch": 1.1016486654567146, "grad_norm": 0.31640625, "learning_rate": 0.000149564720501731, "loss": 0.4157, "step": 43400 }, { "epoch": 1.1017755835057303, "grad_norm": 0.390625, "learning_rate": 0.00014953149316333396, "loss": 0.4151, "step": 43405 }, { "epoch": 1.1019025015547461, "grad_norm": 0.326171875, "learning_rate": 0.00014949826584792633, "loss": 0.4163, "step": 43410 }, { "epoch": 1.1020294196037619, "grad_norm": 0.359375, "learning_rate": 0.0001494650385571386, "loss": 0.4154, "step": 43415 }, { "epoch": 1.1021563376527777, "grad_norm": 0.357421875, "learning_rate": 0.00014943181129260121, "loss": 0.4652, "step": 43420 }, { "epoch": 1.1022832557017934, "grad_norm": 0.3515625, "learning_rate": 0.00014939858405594463, "loss": 0.403, "step": 43425 }, { "epoch": 1.1024101737508092, "grad_norm": 0.3359375, "learning_rate": 0.0001493653568487993, "loss": 0.4131, "step": 43430 }, { "epoch": 1.102537091799825, "grad_norm": 0.33984375, "learning_rate": 0.00014933212967279565, "loss": 0.4233, "step": 43435 }, { "epoch": 1.1026640098488407, "grad_norm": 0.33984375, "learning_rate": 0.0001492989025295641, "loss": 0.4302, "step": 43440 }, { "epoch": 1.1027909278978563, "grad_norm": 0.341796875, "learning_rate": 0.00014926567542073517, "loss": 0.4238, "step": 43445 }, { "epoch": 1.102917845946872, "grad_norm": 0.37109375, "learning_rate": 0.0001492324483479392, "loss": 0.4172, "step": 43450 }, { "epoch": 1.1030447639958878, "grad_norm": 0.359375, "learning_rate": 0.00014919922131280668, "loss": 0.4455, "step": 43455 }, { "epoch": 1.1031716820449036, "grad_norm": 0.322265625, "learning_rate": 0.0001491659943169681, "loss": 0.4391, "step": 43460 }, { "epoch": 1.1032986000939193, "grad_norm": 0.34765625, "learning_rate": 0.00014913276736205382, "loss": 0.4186, "step": 43465 }, { "epoch": 1.103425518142935, "grad_norm": 0.35546875, "learning_rate": 0.00014909954044969433, "loss": 0.3924, "step": 43470 }, { "epoch": 1.1035524361919509, "grad_norm": 0.32421875, "learning_rate": 0.00014906631358152004, "loss": 0.4105, "step": 43475 }, { "epoch": 1.1036793542409666, "grad_norm": 0.333984375, "learning_rate": 0.00014903308675916135, "loss": 0.4153, "step": 43480 }, { "epoch": 1.1038062722899824, "grad_norm": 0.326171875, "learning_rate": 0.00014899985998424866, "loss": 0.406, "step": 43485 }, { "epoch": 1.1039331903389982, "grad_norm": 0.341796875, "learning_rate": 0.0001489666332584125, "loss": 0.3992, "step": 43490 }, { "epoch": 1.104060108388014, "grad_norm": 0.349609375, "learning_rate": 0.00014893340658328323, "loss": 0.4332, "step": 43495 }, { "epoch": 1.1041870264370297, "grad_norm": 0.326171875, "learning_rate": 0.00014890017996049124, "loss": 0.4341, "step": 43500 }, { "epoch": 1.1043139444860453, "grad_norm": 0.353515625, "learning_rate": 0.00014886695339166704, "loss": 0.4323, "step": 43505 }, { "epoch": 1.104440862535061, "grad_norm": 0.359375, "learning_rate": 0.00014883372687844094, "loss": 0.429, "step": 43510 }, { "epoch": 1.1045677805840768, "grad_norm": 0.349609375, "learning_rate": 0.00014880050042244335, "loss": 0.4088, "step": 43515 }, { "epoch": 1.1046946986330926, "grad_norm": 0.36328125, "learning_rate": 0.0001487672740253048, "loss": 0.3919, "step": 43520 }, { "epoch": 1.1048216166821083, "grad_norm": 0.345703125, "learning_rate": 0.0001487340476886556, "loss": 0.4137, "step": 43525 }, { "epoch": 1.104948534731124, "grad_norm": 0.31640625, "learning_rate": 0.00014870082141412615, "loss": 0.3924, "step": 43530 }, { "epoch": 1.1050754527801399, "grad_norm": 0.3671875, "learning_rate": 0.00014866759520334693, "loss": 0.4395, "step": 43535 }, { "epoch": 1.1052023708291556, "grad_norm": 0.333984375, "learning_rate": 0.00014863436905794826, "loss": 0.4171, "step": 43540 }, { "epoch": 1.1053292888781714, "grad_norm": 0.359375, "learning_rate": 0.00014860114297956056, "loss": 0.4124, "step": 43545 }, { "epoch": 1.1054562069271872, "grad_norm": 0.375, "learning_rate": 0.00014856791696981424, "loss": 0.4429, "step": 43550 }, { "epoch": 1.105583124976203, "grad_norm": 0.326171875, "learning_rate": 0.00014853469103033964, "loss": 0.3993, "step": 43555 }, { "epoch": 1.1057100430252187, "grad_norm": 0.345703125, "learning_rate": 0.00014850146516276716, "loss": 0.445, "step": 43560 }, { "epoch": 1.1058369610742345, "grad_norm": 0.3359375, "learning_rate": 0.00014846823936872717, "loss": 0.4093, "step": 43565 }, { "epoch": 1.1059638791232502, "grad_norm": 0.359375, "learning_rate": 0.0001484350136498501, "loss": 0.4102, "step": 43570 }, { "epoch": 1.1060907971722658, "grad_norm": 0.337890625, "learning_rate": 0.0001484017880077662, "loss": 0.4171, "step": 43575 }, { "epoch": 1.1062177152212815, "grad_norm": 0.390625, "learning_rate": 0.000148368562444106, "loss": 0.4376, "step": 43580 }, { "epoch": 1.1063446332702973, "grad_norm": 0.349609375, "learning_rate": 0.0001483353369604998, "loss": 0.4244, "step": 43585 }, { "epoch": 1.106471551319313, "grad_norm": 0.349609375, "learning_rate": 0.00014830211155857797, "loss": 0.4055, "step": 43590 }, { "epoch": 1.1065984693683288, "grad_norm": 0.35546875, "learning_rate": 0.00014826888623997083, "loss": 0.3996, "step": 43595 }, { "epoch": 1.1067253874173446, "grad_norm": 0.36328125, "learning_rate": 0.00014823566100630875, "loss": 0.4193, "step": 43600 }, { "epoch": 1.1068523054663604, "grad_norm": 0.3515625, "learning_rate": 0.0001482024358592221, "loss": 0.4075, "step": 43605 }, { "epoch": 1.1069792235153761, "grad_norm": 0.357421875, "learning_rate": 0.00014816921080034113, "loss": 0.4274, "step": 43610 }, { "epoch": 1.107106141564392, "grad_norm": 0.318359375, "learning_rate": 0.00014813598583129632, "loss": 0.4087, "step": 43615 }, { "epoch": 1.1072330596134077, "grad_norm": 0.337890625, "learning_rate": 0.00014810276095371792, "loss": 0.4045, "step": 43620 }, { "epoch": 1.1073599776624234, "grad_norm": 0.3515625, "learning_rate": 0.0001480695361692363, "loss": 0.4293, "step": 43625 }, { "epoch": 1.1074868957114392, "grad_norm": 0.337890625, "learning_rate": 0.0001480363114794818, "loss": 0.3909, "step": 43630 }, { "epoch": 1.1076138137604548, "grad_norm": 0.359375, "learning_rate": 0.00014800308688608468, "loss": 0.3992, "step": 43635 }, { "epoch": 1.1077407318094705, "grad_norm": 0.375, "learning_rate": 0.00014796986239067522, "loss": 0.4374, "step": 43640 }, { "epoch": 1.1078676498584863, "grad_norm": 0.326171875, "learning_rate": 0.0001479366379948839, "loss": 0.4126, "step": 43645 }, { "epoch": 1.107994567907502, "grad_norm": 0.34375, "learning_rate": 0.0001479034137003409, "loss": 0.4109, "step": 43650 }, { "epoch": 1.1081214859565178, "grad_norm": 0.337890625, "learning_rate": 0.00014787018950867655, "loss": 0.4065, "step": 43655 }, { "epoch": 1.1082484040055336, "grad_norm": 0.3515625, "learning_rate": 0.00014783696542152117, "loss": 0.406, "step": 43660 }, { "epoch": 1.1083753220545494, "grad_norm": 0.34765625, "learning_rate": 0.00014780374144050503, "loss": 0.4411, "step": 43665 }, { "epoch": 1.1085022401035651, "grad_norm": 0.373046875, "learning_rate": 0.00014777051756725842, "loss": 0.4115, "step": 43670 }, { "epoch": 1.108629158152581, "grad_norm": 0.328125, "learning_rate": 0.00014773729380341164, "loss": 0.4232, "step": 43675 }, { "epoch": 1.1087560762015967, "grad_norm": 0.3359375, "learning_rate": 0.00014770407015059496, "loss": 0.3971, "step": 43680 }, { "epoch": 1.1088829942506124, "grad_norm": 0.349609375, "learning_rate": 0.00014767084661043857, "loss": 0.4101, "step": 43685 }, { "epoch": 1.1090099122996282, "grad_norm": 0.28125, "learning_rate": 0.0001476376231845729, "loss": 0.3817, "step": 43690 }, { "epoch": 1.109136830348644, "grad_norm": 0.3671875, "learning_rate": 0.00014760439987462808, "loss": 0.4409, "step": 43695 }, { "epoch": 1.1092637483976597, "grad_norm": 0.3515625, "learning_rate": 0.0001475711766822344, "loss": 0.4093, "step": 43700 }, { "epoch": 1.1093906664466755, "grad_norm": 0.365234375, "learning_rate": 0.00014753795360902206, "loss": 0.4238, "step": 43705 }, { "epoch": 1.109517584495691, "grad_norm": 0.330078125, "learning_rate": 0.0001475047306566214, "loss": 0.4177, "step": 43710 }, { "epoch": 1.1096445025447068, "grad_norm": 0.369140625, "learning_rate": 0.00014747150782666265, "loss": 0.4107, "step": 43715 }, { "epoch": 1.1097714205937226, "grad_norm": 0.337890625, "learning_rate": 0.00014743828512077598, "loss": 0.3988, "step": 43720 }, { "epoch": 1.1098983386427383, "grad_norm": 0.380859375, "learning_rate": 0.00014740506254059166, "loss": 0.4209, "step": 43725 }, { "epoch": 1.1100252566917541, "grad_norm": 0.38671875, "learning_rate": 0.00014737184008773983, "loss": 0.4126, "step": 43730 }, { "epoch": 1.1101521747407699, "grad_norm": 0.35546875, "learning_rate": 0.00014733861776385082, "loss": 0.4104, "step": 43735 }, { "epoch": 1.1102790927897856, "grad_norm": 0.35546875, "learning_rate": 0.00014730539557055475, "loss": 0.4265, "step": 43740 }, { "epoch": 1.1104060108388014, "grad_norm": 0.353515625, "learning_rate": 0.00014727217350948184, "loss": 0.4102, "step": 43745 }, { "epoch": 1.1105329288878172, "grad_norm": 0.3828125, "learning_rate": 0.0001472389515822623, "loss": 0.4193, "step": 43750 }, { "epoch": 1.110659846936833, "grad_norm": 0.359375, "learning_rate": 0.00014720572979052632, "loss": 0.4009, "step": 43755 }, { "epoch": 1.1107867649858487, "grad_norm": 0.33984375, "learning_rate": 0.00014717250813590406, "loss": 0.4245, "step": 43760 }, { "epoch": 1.1109136830348645, "grad_norm": 0.36328125, "learning_rate": 0.00014713928662002563, "loss": 0.4162, "step": 43765 }, { "epoch": 1.11104060108388, "grad_norm": 0.37890625, "learning_rate": 0.0001471060652445213, "loss": 0.4223, "step": 43770 }, { "epoch": 1.1111675191328958, "grad_norm": 0.345703125, "learning_rate": 0.00014707284401102122, "loss": 0.4276, "step": 43775 }, { "epoch": 1.1112944371819116, "grad_norm": 0.3359375, "learning_rate": 0.0001470396229211555, "loss": 0.4172, "step": 43780 }, { "epoch": 1.1114213552309273, "grad_norm": 0.33984375, "learning_rate": 0.00014700640197655435, "loss": 0.4303, "step": 43785 }, { "epoch": 1.111548273279943, "grad_norm": 0.322265625, "learning_rate": 0.00014697318117884786, "loss": 0.4468, "step": 43790 }, { "epoch": 1.1116751913289589, "grad_norm": 0.33984375, "learning_rate": 0.00014693996052966612, "loss": 0.4433, "step": 43795 }, { "epoch": 1.1118021093779746, "grad_norm": 0.326171875, "learning_rate": 0.00014690674003063934, "loss": 0.386, "step": 43800 }, { "epoch": 1.1119290274269904, "grad_norm": 0.35546875, "learning_rate": 0.00014687351968339755, "loss": 0.4211, "step": 43805 }, { "epoch": 1.1120559454760062, "grad_norm": 0.361328125, "learning_rate": 0.00014684029948957092, "loss": 0.434, "step": 43810 }, { "epoch": 1.112182863525022, "grad_norm": 0.369140625, "learning_rate": 0.00014680707945078955, "loss": 0.4226, "step": 43815 }, { "epoch": 1.1123097815740377, "grad_norm": 0.388671875, "learning_rate": 0.0001467738595686835, "loss": 0.4473, "step": 43820 }, { "epoch": 1.1124366996230535, "grad_norm": 0.341796875, "learning_rate": 0.0001467406398448829, "loss": 0.3901, "step": 43825 }, { "epoch": 1.1125636176720692, "grad_norm": 0.359375, "learning_rate": 0.00014670742028101774, "loss": 0.4147, "step": 43830 }, { "epoch": 1.112690535721085, "grad_norm": 0.33203125, "learning_rate": 0.0001466742008787182, "loss": 0.4133, "step": 43835 }, { "epoch": 1.1128174537701006, "grad_norm": 0.349609375, "learning_rate": 0.0001466409816396143, "loss": 0.4161, "step": 43840 }, { "epoch": 1.1129443718191163, "grad_norm": 0.341796875, "learning_rate": 0.0001466077625653361, "loss": 0.3869, "step": 43845 }, { "epoch": 1.113071289868132, "grad_norm": 0.365234375, "learning_rate": 0.00014657454365751366, "loss": 0.4342, "step": 43850 }, { "epoch": 1.1131982079171479, "grad_norm": 0.341796875, "learning_rate": 0.00014654132491777693, "loss": 0.4057, "step": 43855 }, { "epoch": 1.1133251259661636, "grad_norm": 0.33203125, "learning_rate": 0.00014650810634775607, "loss": 0.4204, "step": 43860 }, { "epoch": 1.1134520440151794, "grad_norm": 0.361328125, "learning_rate": 0.00014647488794908102, "loss": 0.4668, "step": 43865 }, { "epoch": 1.1135789620641952, "grad_norm": 0.359375, "learning_rate": 0.00014644166972338184, "loss": 0.4253, "step": 43870 }, { "epoch": 1.113705880113211, "grad_norm": 0.349609375, "learning_rate": 0.0001464084516722885, "loss": 0.3973, "step": 43875 }, { "epoch": 1.1138327981622267, "grad_norm": 0.328125, "learning_rate": 0.000146375233797431, "loss": 0.4076, "step": 43880 }, { "epoch": 1.1139597162112425, "grad_norm": 0.34765625, "learning_rate": 0.00014634201610043928, "loss": 0.4358, "step": 43885 }, { "epoch": 1.1140866342602582, "grad_norm": 0.353515625, "learning_rate": 0.0001463087985829434, "loss": 0.4238, "step": 43890 }, { "epoch": 1.114213552309274, "grad_norm": 0.357421875, "learning_rate": 0.00014627558124657332, "loss": 0.419, "step": 43895 }, { "epoch": 1.1143404703582895, "grad_norm": 0.337890625, "learning_rate": 0.00014624236409295898, "loss": 0.4231, "step": 43900 }, { "epoch": 1.1144673884073053, "grad_norm": 0.345703125, "learning_rate": 0.00014620914712373034, "loss": 0.4138, "step": 43905 }, { "epoch": 1.114594306456321, "grad_norm": 0.3671875, "learning_rate": 0.00014617593034051734, "loss": 0.4214, "step": 43910 }, { "epoch": 1.1147212245053368, "grad_norm": 0.333984375, "learning_rate": 0.00014614271374494993, "loss": 0.404, "step": 43915 }, { "epoch": 1.1148481425543526, "grad_norm": 0.341796875, "learning_rate": 0.00014610949733865803, "loss": 0.4324, "step": 43920 }, { "epoch": 1.1149750606033684, "grad_norm": 0.35546875, "learning_rate": 0.00014607628112327153, "loss": 0.4331, "step": 43925 }, { "epoch": 1.1151019786523841, "grad_norm": 0.359375, "learning_rate": 0.0001460430651004203, "loss": 0.4155, "step": 43930 }, { "epoch": 1.1152288967014, "grad_norm": 0.34765625, "learning_rate": 0.00014600984927173435, "loss": 0.44, "step": 43935 }, { "epoch": 1.1153558147504157, "grad_norm": 0.337890625, "learning_rate": 0.00014597663363884352, "loss": 0.414, "step": 43940 }, { "epoch": 1.1154827327994314, "grad_norm": 0.369140625, "learning_rate": 0.00014594341820337766, "loss": 0.4325, "step": 43945 }, { "epoch": 1.1156096508484472, "grad_norm": 0.302734375, "learning_rate": 0.00014591020296696663, "loss": 0.4057, "step": 43950 }, { "epoch": 1.115736568897463, "grad_norm": 0.37109375, "learning_rate": 0.00014587698793124028, "loss": 0.418, "step": 43955 }, { "epoch": 1.1158634869464787, "grad_norm": 0.34375, "learning_rate": 0.00014584377309782853, "loss": 0.3698, "step": 43960 }, { "epoch": 1.1159904049954945, "grad_norm": 0.357421875, "learning_rate": 0.0001458105584683612, "loss": 0.4436, "step": 43965 }, { "epoch": 1.11611732304451, "grad_norm": 0.3515625, "learning_rate": 0.00014577734404446808, "loss": 0.4514, "step": 43970 }, { "epoch": 1.1162442410935258, "grad_norm": 0.373046875, "learning_rate": 0.00014574412982777899, "loss": 0.4418, "step": 43975 }, { "epoch": 1.1163711591425416, "grad_norm": 0.345703125, "learning_rate": 0.00014571091581992376, "loss": 0.4261, "step": 43980 }, { "epoch": 1.1164980771915574, "grad_norm": 0.3359375, "learning_rate": 0.0001456777020225322, "loss": 0.4366, "step": 43985 }, { "epoch": 1.1166249952405731, "grad_norm": 0.349609375, "learning_rate": 0.00014564448843723408, "loss": 0.4347, "step": 43990 }, { "epoch": 1.116751913289589, "grad_norm": 0.333984375, "learning_rate": 0.00014561127506565916, "loss": 0.411, "step": 43995 }, { "epoch": 1.1168788313386047, "grad_norm": 0.357421875, "learning_rate": 0.00014557806190943723, "loss": 0.425, "step": 44000 }, { "epoch": 1.1170057493876204, "grad_norm": 0.37890625, "learning_rate": 0.00014554484897019807, "loss": 0.4547, "step": 44005 }, { "epoch": 1.1171326674366362, "grad_norm": 0.361328125, "learning_rate": 0.00014551163624957134, "loss": 0.4259, "step": 44010 }, { "epoch": 1.117259585485652, "grad_norm": 0.345703125, "learning_rate": 0.00014547842374918685, "loss": 0.4117, "step": 44015 }, { "epoch": 1.1173865035346677, "grad_norm": 0.3046875, "learning_rate": 0.00014544521147067427, "loss": 0.3795, "step": 44020 }, { "epoch": 1.1175134215836835, "grad_norm": 0.36328125, "learning_rate": 0.0001454119994156634, "loss": 0.4512, "step": 44025 }, { "epoch": 1.1176403396326993, "grad_norm": 0.3515625, "learning_rate": 0.00014537878758578388, "loss": 0.4264, "step": 44030 }, { "epoch": 1.1177672576817148, "grad_norm": 0.36328125, "learning_rate": 0.00014534557598266544, "loss": 0.4079, "step": 44035 }, { "epoch": 1.1178941757307306, "grad_norm": 0.357421875, "learning_rate": 0.0001453123646079377, "loss": 0.4211, "step": 44040 }, { "epoch": 1.1180210937797463, "grad_norm": 0.64453125, "learning_rate": 0.00014527915346323038, "loss": 0.3983, "step": 44045 }, { "epoch": 1.1181480118287621, "grad_norm": 0.34375, "learning_rate": 0.00014524594255017312, "loss": 0.4169, "step": 44050 }, { "epoch": 1.1182749298777779, "grad_norm": 0.3359375, "learning_rate": 0.00014521273187039554, "loss": 0.4107, "step": 44055 }, { "epoch": 1.1184018479267936, "grad_norm": 0.341796875, "learning_rate": 0.00014517952142552732, "loss": 0.4248, "step": 44060 }, { "epoch": 1.1185287659758094, "grad_norm": 0.333984375, "learning_rate": 0.00014514631121719806, "loss": 0.4082, "step": 44065 }, { "epoch": 1.1186556840248252, "grad_norm": 0.36328125, "learning_rate": 0.00014511310124703738, "loss": 0.3825, "step": 44070 }, { "epoch": 1.118782602073841, "grad_norm": 0.349609375, "learning_rate": 0.00014507989151667488, "loss": 0.4324, "step": 44075 }, { "epoch": 1.1189095201228567, "grad_norm": 0.31640625, "learning_rate": 0.00014504668202774008, "loss": 0.3838, "step": 44080 }, { "epoch": 1.1190364381718725, "grad_norm": 0.349609375, "learning_rate": 0.0001450134727818627, "loss": 0.4028, "step": 44085 }, { "epoch": 1.1191633562208883, "grad_norm": 0.3671875, "learning_rate": 0.00014498026378067218, "loss": 0.4302, "step": 44090 }, { "epoch": 1.119290274269904, "grad_norm": 0.333984375, "learning_rate": 0.00014494705502579813, "loss": 0.4339, "step": 44095 }, { "epoch": 1.1194171923189198, "grad_norm": 0.3671875, "learning_rate": 0.00014491384651887007, "loss": 0.4498, "step": 44100 }, { "epoch": 1.1195441103679353, "grad_norm": 0.34765625, "learning_rate": 0.00014488063826151757, "loss": 0.4228, "step": 44105 }, { "epoch": 1.119671028416951, "grad_norm": 0.357421875, "learning_rate": 0.00014484743025537007, "loss": 0.4309, "step": 44110 }, { "epoch": 1.1197979464659669, "grad_norm": 0.3359375, "learning_rate": 0.00014481422250205714, "loss": 0.3986, "step": 44115 }, { "epoch": 1.1199248645149826, "grad_norm": 0.337890625, "learning_rate": 0.00014478101500320825, "loss": 0.4144, "step": 44120 }, { "epoch": 1.1200517825639984, "grad_norm": 0.3359375, "learning_rate": 0.00014474780776045287, "loss": 0.4172, "step": 44125 }, { "epoch": 1.1201787006130142, "grad_norm": 0.3515625, "learning_rate": 0.00014471460077542043, "loss": 0.4305, "step": 44130 }, { "epoch": 1.12030561866203, "grad_norm": 0.3359375, "learning_rate": 0.00014468139404974048, "loss": 0.4059, "step": 44135 }, { "epoch": 1.1204325367110457, "grad_norm": 0.365234375, "learning_rate": 0.00014464818758504238, "loss": 0.4075, "step": 44140 }, { "epoch": 1.1205594547600615, "grad_norm": 0.333984375, "learning_rate": 0.00014461498138295553, "loss": 0.4246, "step": 44145 }, { "epoch": 1.1206863728090772, "grad_norm": 0.3515625, "learning_rate": 0.00014458177544510945, "loss": 0.4465, "step": 44150 }, { "epoch": 1.120813290858093, "grad_norm": 0.33984375, "learning_rate": 0.00014454856977313353, "loss": 0.4334, "step": 44155 }, { "epoch": 1.1209402089071088, "grad_norm": 0.365234375, "learning_rate": 0.00014451536436865708, "loss": 0.4298, "step": 44160 }, { "epoch": 1.1210671269561243, "grad_norm": 0.333984375, "learning_rate": 0.00014448215923330954, "loss": 0.4403, "step": 44165 }, { "epoch": 1.12119404500514, "grad_norm": 0.3359375, "learning_rate": 0.0001444489543687202, "loss": 0.4065, "step": 44170 }, { "epoch": 1.1213209630541559, "grad_norm": 0.359375, "learning_rate": 0.0001444157497765185, "loss": 0.4165, "step": 44175 }, { "epoch": 1.1214478811031716, "grad_norm": 0.361328125, "learning_rate": 0.00014438254545833372, "loss": 0.4013, "step": 44180 }, { "epoch": 1.1215747991521874, "grad_norm": 0.357421875, "learning_rate": 0.0001443493414157952, "loss": 0.4233, "step": 44185 }, { "epoch": 1.1217017172012032, "grad_norm": 0.361328125, "learning_rate": 0.00014431613765053227, "loss": 0.4143, "step": 44190 }, { "epoch": 1.121828635250219, "grad_norm": 0.341796875, "learning_rate": 0.00014428293416417418, "loss": 0.3933, "step": 44195 }, { "epoch": 1.1219555532992347, "grad_norm": 0.3515625, "learning_rate": 0.00014424973095835024, "loss": 0.4448, "step": 44200 }, { "epoch": 1.1220824713482505, "grad_norm": 0.357421875, "learning_rate": 0.00014421652803468964, "loss": 0.4184, "step": 44205 }, { "epoch": 1.1222093893972662, "grad_norm": 0.388671875, "learning_rate": 0.00014418332539482178, "loss": 0.4446, "step": 44210 }, { "epoch": 1.122336307446282, "grad_norm": 0.328125, "learning_rate": 0.0001441501230403758, "loss": 0.4284, "step": 44215 }, { "epoch": 1.1224632254952978, "grad_norm": 0.337890625, "learning_rate": 0.00014411692097298094, "loss": 0.409, "step": 44220 }, { "epoch": 1.1225901435443135, "grad_norm": 0.314453125, "learning_rate": 0.00014408371919426646, "loss": 0.3843, "step": 44225 }, { "epoch": 1.1227170615933293, "grad_norm": 0.359375, "learning_rate": 0.00014405051770586153, "loss": 0.4029, "step": 44230 }, { "epoch": 1.1228439796423448, "grad_norm": 0.369140625, "learning_rate": 0.0001440173165093953, "loss": 0.4256, "step": 44235 }, { "epoch": 1.1229708976913606, "grad_norm": 0.359375, "learning_rate": 0.00014398411560649697, "loss": 0.4162, "step": 44240 }, { "epoch": 1.1230978157403764, "grad_norm": 0.35546875, "learning_rate": 0.00014395091499879573, "loss": 0.4015, "step": 44245 }, { "epoch": 1.1232247337893921, "grad_norm": 0.369140625, "learning_rate": 0.00014391771468792063, "loss": 0.4184, "step": 44250 }, { "epoch": 1.123351651838408, "grad_norm": 0.32421875, "learning_rate": 0.00014388451467550083, "loss": 0.3963, "step": 44255 }, { "epoch": 1.1234785698874237, "grad_norm": 0.373046875, "learning_rate": 0.0001438513149631655, "loss": 0.4618, "step": 44260 }, { "epoch": 1.1236054879364394, "grad_norm": 0.35546875, "learning_rate": 0.0001438181155525437, "loss": 0.4193, "step": 44265 }, { "epoch": 1.1237324059854552, "grad_norm": 0.330078125, "learning_rate": 0.00014378491644526445, "loss": 0.4185, "step": 44270 }, { "epoch": 1.123859324034471, "grad_norm": 0.3359375, "learning_rate": 0.0001437517176429569, "loss": 0.4141, "step": 44275 }, { "epoch": 1.1239862420834867, "grad_norm": 0.34765625, "learning_rate": 0.00014371851914725013, "loss": 0.4061, "step": 44280 }, { "epoch": 1.1241131601325025, "grad_norm": 0.81640625, "learning_rate": 0.00014368532095977309, "loss": 0.3898, "step": 44285 }, { "epoch": 1.1242400781815183, "grad_norm": 0.390625, "learning_rate": 0.00014365212308215485, "loss": 0.443, "step": 44290 }, { "epoch": 1.124366996230534, "grad_norm": 0.361328125, "learning_rate": 0.00014361892551602437, "loss": 0.3958, "step": 44295 }, { "epoch": 1.1244939142795496, "grad_norm": 0.36328125, "learning_rate": 0.00014358572826301066, "loss": 0.4078, "step": 44300 }, { "epoch": 1.1246208323285654, "grad_norm": 0.341796875, "learning_rate": 0.00014355253132474274, "loss": 0.4259, "step": 44305 }, { "epoch": 1.1247477503775811, "grad_norm": 0.341796875, "learning_rate": 0.00014351933470284958, "loss": 0.3925, "step": 44310 }, { "epoch": 1.124874668426597, "grad_norm": 0.345703125, "learning_rate": 0.00014348613839896003, "loss": 0.4162, "step": 44315 }, { "epoch": 1.1250015864756127, "grad_norm": 0.345703125, "learning_rate": 0.00014345294241470307, "loss": 0.4103, "step": 44320 }, { "epoch": 1.1251285045246284, "grad_norm": 0.349609375, "learning_rate": 0.00014341974675170766, "loss": 0.4413, "step": 44325 }, { "epoch": 1.1252554225736442, "grad_norm": 0.341796875, "learning_rate": 0.0001433865514116026, "loss": 0.4215, "step": 44330 }, { "epoch": 1.12538234062266, "grad_norm": 0.34375, "learning_rate": 0.00014335335639601685, "loss": 0.4059, "step": 44335 }, { "epoch": 1.1255092586716757, "grad_norm": 0.337890625, "learning_rate": 0.00014332016170657926, "loss": 0.3934, "step": 44340 }, { "epoch": 1.1256361767206915, "grad_norm": 0.330078125, "learning_rate": 0.00014328696734491864, "loss": 0.4258, "step": 44345 }, { "epoch": 1.1257630947697073, "grad_norm": 0.3515625, "learning_rate": 0.0001432537733126639, "loss": 0.4297, "step": 44350 }, { "epoch": 1.125890012818723, "grad_norm": 0.361328125, "learning_rate": 0.0001432205796114438, "loss": 0.4007, "step": 44355 }, { "epoch": 1.1260169308677388, "grad_norm": 0.353515625, "learning_rate": 0.0001431873862428872, "loss": 0.4532, "step": 44360 }, { "epoch": 1.1261438489167546, "grad_norm": 0.337890625, "learning_rate": 0.0001431541932086228, "loss": 0.4144, "step": 44365 }, { "epoch": 1.12627076696577, "grad_norm": 0.3515625, "learning_rate": 0.00014312100051027944, "loss": 0.4252, "step": 44370 }, { "epoch": 1.1263976850147859, "grad_norm": 0.353515625, "learning_rate": 0.0001430878081494858, "loss": 0.4204, "step": 44375 }, { "epoch": 1.1265246030638016, "grad_norm": 0.3359375, "learning_rate": 0.0001430546161278707, "loss": 0.4246, "step": 44380 }, { "epoch": 1.1266515211128174, "grad_norm": 0.369140625, "learning_rate": 0.00014302142444706282, "loss": 0.4111, "step": 44385 }, { "epoch": 1.1267784391618332, "grad_norm": 0.328125, "learning_rate": 0.00014298823310869086, "loss": 0.3981, "step": 44390 }, { "epoch": 1.126905357210849, "grad_norm": 0.37890625, "learning_rate": 0.00014295504211438344, "loss": 0.4188, "step": 44395 }, { "epoch": 1.1270322752598647, "grad_norm": 0.349609375, "learning_rate": 0.00014292185146576935, "loss": 0.4227, "step": 44400 }, { "epoch": 1.1271591933088805, "grad_norm": 0.34765625, "learning_rate": 0.0001428886611644772, "loss": 0.4074, "step": 44405 }, { "epoch": 1.1272861113578962, "grad_norm": 0.369140625, "learning_rate": 0.0001428554712121356, "loss": 0.4308, "step": 44410 }, { "epoch": 1.127413029406912, "grad_norm": 0.359375, "learning_rate": 0.00014282228161037323, "loss": 0.3992, "step": 44415 }, { "epoch": 1.1275399474559278, "grad_norm": 0.349609375, "learning_rate": 0.00014278909236081856, "loss": 0.386, "step": 44420 }, { "epoch": 1.1276668655049435, "grad_norm": 0.32421875, "learning_rate": 0.0001427559034651003, "loss": 0.4129, "step": 44425 }, { "epoch": 1.127793783553959, "grad_norm": 0.359375, "learning_rate": 0.000142722714924847, "loss": 0.417, "step": 44430 }, { "epoch": 1.1279207016029749, "grad_norm": 0.3671875, "learning_rate": 0.00014268952674168715, "loss": 0.4204, "step": 44435 }, { "epoch": 1.1280476196519906, "grad_norm": 0.3125, "learning_rate": 0.0001426563389172493, "loss": 0.3786, "step": 44440 }, { "epoch": 1.1281745377010064, "grad_norm": 0.357421875, "learning_rate": 0.000142623151453162, "loss": 0.4295, "step": 44445 }, { "epoch": 1.1283014557500222, "grad_norm": 0.330078125, "learning_rate": 0.00014258996435105372, "loss": 0.4033, "step": 44450 }, { "epoch": 1.128428373799038, "grad_norm": 0.341796875, "learning_rate": 0.0001425567776125529, "loss": 0.4406, "step": 44455 }, { "epoch": 1.1285552918480537, "grad_norm": 0.365234375, "learning_rate": 0.00014252359123928803, "loss": 0.4151, "step": 44460 }, { "epoch": 1.1286822098970695, "grad_norm": 0.333984375, "learning_rate": 0.00014249040523288756, "loss": 0.4138, "step": 44465 }, { "epoch": 1.1288091279460852, "grad_norm": 0.3359375, "learning_rate": 0.00014245721959497994, "loss": 0.4077, "step": 44470 }, { "epoch": 1.128936045995101, "grad_norm": 0.380859375, "learning_rate": 0.00014242403432719356, "loss": 0.4006, "step": 44475 }, { "epoch": 1.1290629640441168, "grad_norm": 0.34765625, "learning_rate": 0.0001423908494311568, "loss": 0.4218, "step": 44480 }, { "epoch": 1.1291898820931325, "grad_norm": 0.349609375, "learning_rate": 0.00014235766490849804, "loss": 0.4038, "step": 44485 }, { "epoch": 1.1293168001421483, "grad_norm": 0.380859375, "learning_rate": 0.00014232448076084562, "loss": 0.4402, "step": 44490 }, { "epoch": 1.129443718191164, "grad_norm": 0.32421875, "learning_rate": 0.00014229129698982784, "loss": 0.3947, "step": 44495 }, { "epoch": 1.1295706362401798, "grad_norm": 0.361328125, "learning_rate": 0.00014225811359707302, "loss": 0.393, "step": 44500 }, { "epoch": 1.1296975542891954, "grad_norm": 0.310546875, "learning_rate": 0.00014222493058420953, "loss": 0.3389, "step": 44505 }, { "epoch": 1.1298244723382111, "grad_norm": 0.349609375, "learning_rate": 0.00014219174795286559, "loss": 0.4129, "step": 44510 }, { "epoch": 1.129951390387227, "grad_norm": 0.36328125, "learning_rate": 0.00014215856570466947, "loss": 0.4539, "step": 44515 }, { "epoch": 1.1300783084362427, "grad_norm": 0.353515625, "learning_rate": 0.00014212538384124934, "loss": 0.4313, "step": 44520 }, { "epoch": 1.1302052264852585, "grad_norm": 0.333984375, "learning_rate": 0.00014209220236423357, "loss": 0.4301, "step": 44525 }, { "epoch": 1.1303321445342742, "grad_norm": 0.33203125, "learning_rate": 0.00014205902127525024, "loss": 0.3912, "step": 44530 }, { "epoch": 1.13045906258329, "grad_norm": 0.353515625, "learning_rate": 0.00014202584057592763, "loss": 0.4181, "step": 44535 }, { "epoch": 1.1305859806323058, "grad_norm": 0.326171875, "learning_rate": 0.00014199266026789382, "loss": 0.407, "step": 44540 }, { "epoch": 1.1307128986813215, "grad_norm": 0.333984375, "learning_rate": 0.0001419594803527769, "loss": 0.3952, "step": 44545 }, { "epoch": 1.1308398167303373, "grad_norm": 0.345703125, "learning_rate": 0.00014192630083220518, "loss": 0.4466, "step": 44550 }, { "epoch": 1.130966734779353, "grad_norm": 0.3203125, "learning_rate": 0.00014189312170780663, "loss": 0.3901, "step": 44555 }, { "epoch": 1.1310936528283686, "grad_norm": 0.349609375, "learning_rate": 0.00014185994298120938, "loss": 0.4079, "step": 44560 }, { "epoch": 1.1312205708773844, "grad_norm": 0.33984375, "learning_rate": 0.00014182676465404148, "loss": 0.4261, "step": 44565 }, { "epoch": 1.1313474889264001, "grad_norm": 0.357421875, "learning_rate": 0.00014179358672793097, "loss": 0.4147, "step": 44570 }, { "epoch": 1.131474406975416, "grad_norm": 0.35546875, "learning_rate": 0.00014176040920450586, "loss": 0.4607, "step": 44575 }, { "epoch": 1.1316013250244317, "grad_norm": 0.373046875, "learning_rate": 0.00014172723208539424, "loss": 0.4285, "step": 44580 }, { "epoch": 1.1317282430734474, "grad_norm": 0.359375, "learning_rate": 0.000141694055372224, "loss": 0.4408, "step": 44585 }, { "epoch": 1.1318551611224632, "grad_norm": 0.349609375, "learning_rate": 0.00014166087906662317, "loss": 0.4563, "step": 44590 }, { "epoch": 1.131982079171479, "grad_norm": 0.333984375, "learning_rate": 0.0001416277031702197, "loss": 0.4069, "step": 44595 }, { "epoch": 1.1321089972204947, "grad_norm": 0.3671875, "learning_rate": 0.0001415945276846415, "loss": 0.4111, "step": 44600 }, { "epoch": 1.1322359152695105, "grad_norm": 0.33203125, "learning_rate": 0.00014156135261151646, "loss": 0.426, "step": 44605 }, { "epoch": 1.1323628333185263, "grad_norm": 0.31640625, "learning_rate": 0.00014152817795247253, "loss": 0.4075, "step": 44610 }, { "epoch": 1.132489751367542, "grad_norm": 0.3359375, "learning_rate": 0.00014149500370913752, "loss": 0.4239, "step": 44615 }, { "epoch": 1.1326166694165578, "grad_norm": 0.365234375, "learning_rate": 0.00014146182988313924, "loss": 0.4397, "step": 44620 }, { "epoch": 1.1327435874655736, "grad_norm": 0.365234375, "learning_rate": 0.0001414286564761056, "loss": 0.4066, "step": 44625 }, { "epoch": 1.1328705055145893, "grad_norm": 0.33984375, "learning_rate": 0.00014139548348966437, "loss": 0.3781, "step": 44630 }, { "epoch": 1.1329974235636049, "grad_norm": 0.384765625, "learning_rate": 0.00014136231092544334, "loss": 0.4573, "step": 44635 }, { "epoch": 1.1331243416126207, "grad_norm": 0.34375, "learning_rate": 0.00014132913878507028, "loss": 0.4087, "step": 44640 }, { "epoch": 1.1332512596616364, "grad_norm": 1.5, "learning_rate": 0.00014129596707017293, "loss": 0.3914, "step": 44645 }, { "epoch": 1.1333781777106522, "grad_norm": 0.337890625, "learning_rate": 0.00014126279578237895, "loss": 0.396, "step": 44650 }, { "epoch": 1.133505095759668, "grad_norm": 0.345703125, "learning_rate": 0.00014122962492331615, "loss": 0.374, "step": 44655 }, { "epoch": 1.1336320138086837, "grad_norm": 0.36328125, "learning_rate": 0.00014119645449461215, "loss": 0.391, "step": 44660 }, { "epoch": 1.1337589318576995, "grad_norm": 0.35546875, "learning_rate": 0.00014116328449789462, "loss": 0.4067, "step": 44665 }, { "epoch": 1.1338858499067153, "grad_norm": 0.361328125, "learning_rate": 0.00014113011493479123, "loss": 0.4466, "step": 44670 }, { "epoch": 1.134012767955731, "grad_norm": 0.34765625, "learning_rate": 0.00014109694580692953, "loss": 0.42, "step": 44675 }, { "epoch": 1.1341396860047468, "grad_norm": 0.35546875, "learning_rate": 0.00014106377711593717, "loss": 0.4336, "step": 44680 }, { "epoch": 1.1342666040537626, "grad_norm": 0.345703125, "learning_rate": 0.00014103060886344173, "loss": 0.4186, "step": 44685 }, { "epoch": 1.1343935221027783, "grad_norm": 0.37109375, "learning_rate": 0.00014099744105107074, "loss": 0.4087, "step": 44690 }, { "epoch": 1.1345204401517939, "grad_norm": 0.373046875, "learning_rate": 0.0001409642736804517, "loss": 0.4375, "step": 44695 }, { "epoch": 1.1346473582008096, "grad_norm": 0.3515625, "learning_rate": 0.00014093110675321213, "loss": 0.4352, "step": 44700 }, { "epoch": 1.1347742762498254, "grad_norm": 0.359375, "learning_rate": 0.00014089794027097956, "loss": 0.4024, "step": 44705 }, { "epoch": 1.1349011942988412, "grad_norm": 0.349609375, "learning_rate": 0.00014086477423538139, "loss": 0.4293, "step": 44710 }, { "epoch": 1.135028112347857, "grad_norm": 0.326171875, "learning_rate": 0.00014083160864804517, "loss": 0.4191, "step": 44715 }, { "epoch": 1.1351550303968727, "grad_norm": 0.31640625, "learning_rate": 0.00014079844351059826, "loss": 0.3857, "step": 44720 }, { "epoch": 1.1352819484458885, "grad_norm": 0.3046875, "learning_rate": 0.00014076527882466808, "loss": 0.4233, "step": 44725 }, { "epoch": 1.1354088664949042, "grad_norm": 0.328125, "learning_rate": 0.00014073211459188196, "loss": 0.4169, "step": 44730 }, { "epoch": 1.13553578454392, "grad_norm": 0.33203125, "learning_rate": 0.0001406989508138673, "loss": 0.4486, "step": 44735 }, { "epoch": 1.1356627025929358, "grad_norm": 0.361328125, "learning_rate": 0.00014066578749225142, "loss": 0.4372, "step": 44740 }, { "epoch": 1.1357896206419515, "grad_norm": 0.34765625, "learning_rate": 0.00014063262462866158, "loss": 0.4184, "step": 44745 }, { "epoch": 1.1359165386909673, "grad_norm": 0.357421875, "learning_rate": 0.00014059946222472517, "loss": 0.4461, "step": 44750 }, { "epoch": 1.136043456739983, "grad_norm": 0.34765625, "learning_rate": 0.0001405663002820694, "loss": 0.4175, "step": 44755 }, { "epoch": 1.1361703747889988, "grad_norm": 0.322265625, "learning_rate": 0.00014053313880232153, "loss": 0.3951, "step": 44760 }, { "epoch": 1.1362972928380146, "grad_norm": 0.365234375, "learning_rate": 0.00014049997778710875, "loss": 0.4163, "step": 44765 }, { "epoch": 1.1364242108870302, "grad_norm": 0.5234375, "learning_rate": 0.00014046681723805827, "loss": 0.486, "step": 44770 }, { "epoch": 1.136551128936046, "grad_norm": 0.3359375, "learning_rate": 0.00014043365715679722, "loss": 0.4103, "step": 44775 }, { "epoch": 1.1366780469850617, "grad_norm": 0.369140625, "learning_rate": 0.00014040049754495285, "loss": 0.4143, "step": 44780 }, { "epoch": 1.1368049650340775, "grad_norm": 0.35546875, "learning_rate": 0.00014036733840415227, "loss": 0.3979, "step": 44785 }, { "epoch": 1.1369318830830932, "grad_norm": 0.36328125, "learning_rate": 0.0001403341797360225, "loss": 0.4128, "step": 44790 }, { "epoch": 1.137058801132109, "grad_norm": 0.32421875, "learning_rate": 0.0001403010215421907, "loss": 0.3999, "step": 44795 }, { "epoch": 1.1371857191811248, "grad_norm": 0.359375, "learning_rate": 0.00014026786382428393, "loss": 0.4363, "step": 44800 }, { "epoch": 1.1373126372301405, "grad_norm": 0.314453125, "learning_rate": 0.00014023470658392924, "loss": 0.3976, "step": 44805 }, { "epoch": 1.1374395552791563, "grad_norm": 0.3515625, "learning_rate": 0.00014020154982275355, "loss": 0.413, "step": 44810 }, { "epoch": 1.137566473328172, "grad_norm": 0.361328125, "learning_rate": 0.00014016839354238393, "loss": 0.408, "step": 44815 }, { "epoch": 1.1376933913771878, "grad_norm": 0.34375, "learning_rate": 0.0001401352377444473, "loss": 0.4022, "step": 44820 }, { "epoch": 1.1378203094262034, "grad_norm": 0.341796875, "learning_rate": 0.00014010208243057064, "loss": 0.4111, "step": 44825 }, { "epoch": 1.1379472274752191, "grad_norm": 0.345703125, "learning_rate": 0.00014006892760238087, "loss": 0.4273, "step": 44830 }, { "epoch": 1.138074145524235, "grad_norm": 0.35546875, "learning_rate": 0.00014003577326150485, "loss": 0.4173, "step": 44835 }, { "epoch": 1.1382010635732507, "grad_norm": 0.3515625, "learning_rate": 0.00014000261940956942, "loss": 0.4344, "step": 44840 }, { "epoch": 1.1383279816222664, "grad_norm": 0.349609375, "learning_rate": 0.0001399694660482015, "loss": 0.4353, "step": 44845 }, { "epoch": 1.1384548996712822, "grad_norm": 0.3515625, "learning_rate": 0.00013993631317902794, "loss": 0.4261, "step": 44850 }, { "epoch": 1.138581817720298, "grad_norm": 0.333984375, "learning_rate": 0.00013990316080367545, "loss": 0.4211, "step": 44855 }, { "epoch": 1.1387087357693138, "grad_norm": 0.357421875, "learning_rate": 0.00013987000892377085, "loss": 0.4015, "step": 44860 }, { "epoch": 1.1388356538183295, "grad_norm": 0.341796875, "learning_rate": 0.00013983685754094083, "loss": 0.4131, "step": 44865 }, { "epoch": 1.1389625718673453, "grad_norm": 0.34765625, "learning_rate": 0.0001398037066568122, "loss": 0.4272, "step": 44870 }, { "epoch": 1.139089489916361, "grad_norm": 0.357421875, "learning_rate": 0.00013977055627301162, "loss": 0.4451, "step": 44875 }, { "epoch": 1.1392164079653768, "grad_norm": 0.349609375, "learning_rate": 0.00013973740639116578, "loss": 0.4309, "step": 44880 }, { "epoch": 1.1393433260143926, "grad_norm": 0.34765625, "learning_rate": 0.00013970425701290132, "loss": 0.4023, "step": 44885 }, { "epoch": 1.1394702440634084, "grad_norm": 0.3671875, "learning_rate": 0.00013967110813984486, "loss": 0.4497, "step": 44890 }, { "epoch": 1.1395971621124241, "grad_norm": 0.3125, "learning_rate": 0.000139637959773623, "loss": 0.4208, "step": 44895 }, { "epoch": 1.1397240801614397, "grad_norm": 0.357421875, "learning_rate": 0.00013960481191586227, "loss": 0.4302, "step": 44900 }, { "epoch": 1.1398509982104554, "grad_norm": 0.353515625, "learning_rate": 0.00013957166456818936, "loss": 0.4273, "step": 44905 }, { "epoch": 1.1399779162594712, "grad_norm": 0.349609375, "learning_rate": 0.00013953851773223065, "loss": 0.4381, "step": 44910 }, { "epoch": 1.140104834308487, "grad_norm": 0.3203125, "learning_rate": 0.00013950537140961278, "loss": 0.4133, "step": 44915 }, { "epoch": 1.1402317523575027, "grad_norm": 0.365234375, "learning_rate": 0.00013947222560196213, "loss": 0.4383, "step": 44920 }, { "epoch": 1.1403586704065185, "grad_norm": 0.365234375, "learning_rate": 0.00013943908031090516, "loss": 0.4031, "step": 44925 }, { "epoch": 1.1404855884555343, "grad_norm": 0.337890625, "learning_rate": 0.00013940593553806834, "loss": 0.3943, "step": 44930 }, { "epoch": 1.14061250650455, "grad_norm": 0.337890625, "learning_rate": 0.00013937279128507804, "loss": 0.4266, "step": 44935 }, { "epoch": 1.1407394245535658, "grad_norm": 0.33984375, "learning_rate": 0.00013933964755356062, "loss": 0.4214, "step": 44940 }, { "epoch": 1.1408663426025816, "grad_norm": 0.375, "learning_rate": 0.00013930650434514243, "loss": 0.428, "step": 44945 }, { "epoch": 1.1409932606515973, "grad_norm": 0.337890625, "learning_rate": 0.00013927336166144985, "loss": 0.3963, "step": 44950 }, { "epoch": 1.141120178700613, "grad_norm": 0.349609375, "learning_rate": 0.00013924021950410912, "loss": 0.457, "step": 44955 }, { "epoch": 1.1412470967496287, "grad_norm": 0.33203125, "learning_rate": 0.00013920707787474655, "loss": 0.4249, "step": 44960 }, { "epoch": 1.1413740147986444, "grad_norm": 0.35546875, "learning_rate": 0.00013917393677498831, "loss": 0.4542, "step": 44965 }, { "epoch": 1.1415009328476602, "grad_norm": 0.33203125, "learning_rate": 0.00013914079620646073, "loss": 0.4302, "step": 44970 }, { "epoch": 1.141627850896676, "grad_norm": 0.357421875, "learning_rate": 0.00013910765617078998, "loss": 0.3878, "step": 44975 }, { "epoch": 1.1417547689456917, "grad_norm": 0.32421875, "learning_rate": 0.00013907451666960217, "loss": 0.3812, "step": 44980 }, { "epoch": 1.1418816869947075, "grad_norm": 0.34375, "learning_rate": 0.00013904137770452349, "loss": 0.4249, "step": 44985 }, { "epoch": 1.1420086050437233, "grad_norm": 0.330078125, "learning_rate": 0.00013900823927718, "loss": 0.4222, "step": 44990 }, { "epoch": 1.142135523092739, "grad_norm": 0.357421875, "learning_rate": 0.00013897510138919788, "loss": 0.4198, "step": 44995 }, { "epoch": 1.1422624411417548, "grad_norm": 0.361328125, "learning_rate": 0.00013894196404220312, "loss": 0.4204, "step": 45000 }, { "epoch": 1.1423893591907706, "grad_norm": 0.33984375, "learning_rate": 0.00013890882723782181, "loss": 0.4049, "step": 45005 }, { "epoch": 1.1425162772397863, "grad_norm": 0.365234375, "learning_rate": 0.00013887569097767991, "loss": 0.4214, "step": 45010 }, { "epoch": 1.142643195288802, "grad_norm": 0.361328125, "learning_rate": 0.00013884255526340342, "loss": 0.4102, "step": 45015 }, { "epoch": 1.1427701133378179, "grad_norm": 0.37890625, "learning_rate": 0.00013880942009661826, "loss": 0.4505, "step": 45020 }, { "epoch": 1.1428970313868336, "grad_norm": 0.34765625, "learning_rate": 0.00013877628547895042, "loss": 0.4388, "step": 45025 }, { "epoch": 1.1430239494358494, "grad_norm": 0.33203125, "learning_rate": 0.0001387431514120258, "loss": 0.4105, "step": 45030 }, { "epoch": 1.143150867484865, "grad_norm": 0.34765625, "learning_rate": 0.0001387100178974702, "loss": 0.4222, "step": 45035 }, { "epoch": 1.1432777855338807, "grad_norm": 0.314453125, "learning_rate": 0.0001386768849369096, "loss": 0.3845, "step": 45040 }, { "epoch": 1.1434047035828965, "grad_norm": 0.337890625, "learning_rate": 0.00013864375253196974, "loss": 0.4143, "step": 45045 }, { "epoch": 1.1435316216319122, "grad_norm": 0.361328125, "learning_rate": 0.0001386106206842764, "loss": 0.4408, "step": 45050 }, { "epoch": 1.143658539680928, "grad_norm": 0.33203125, "learning_rate": 0.00013857748939545537, "loss": 0.4061, "step": 45055 }, { "epoch": 1.1437854577299438, "grad_norm": 0.357421875, "learning_rate": 0.0001385443586671324, "loss": 0.4047, "step": 45060 }, { "epoch": 1.1439123757789595, "grad_norm": 0.349609375, "learning_rate": 0.00013851122850093314, "loss": 0.42, "step": 45065 }, { "epoch": 1.1440392938279753, "grad_norm": 0.328125, "learning_rate": 0.00013847809889848335, "loss": 0.4032, "step": 45070 }, { "epoch": 1.144166211876991, "grad_norm": 0.353515625, "learning_rate": 0.00013844496986140865, "loss": 0.4356, "step": 45075 }, { "epoch": 1.1442931299260068, "grad_norm": 0.33203125, "learning_rate": 0.0001384118413913347, "loss": 0.4295, "step": 45080 }, { "epoch": 1.1444200479750226, "grad_norm": 0.373046875, "learning_rate": 0.00013837871348988705, "loss": 0.4202, "step": 45085 }, { "epoch": 1.1445469660240382, "grad_norm": 0.345703125, "learning_rate": 0.00013834558615869128, "loss": 0.4018, "step": 45090 }, { "epoch": 1.144673884073054, "grad_norm": 0.33984375, "learning_rate": 0.00013831245939937298, "loss": 0.4464, "step": 45095 }, { "epoch": 1.1448008021220697, "grad_norm": 0.341796875, "learning_rate": 0.00013827933321355765, "loss": 0.4121, "step": 45100 }, { "epoch": 1.1449277201710855, "grad_norm": 0.353515625, "learning_rate": 0.00013824620760287078, "loss": 0.4231, "step": 45105 }, { "epoch": 1.1450546382201012, "grad_norm": 0.328125, "learning_rate": 0.00013821308256893778, "loss": 0.4087, "step": 45110 }, { "epoch": 1.145181556269117, "grad_norm": 0.306640625, "learning_rate": 0.00013817995811338417, "loss": 0.41, "step": 45115 }, { "epoch": 1.1453084743181328, "grad_norm": 0.365234375, "learning_rate": 0.00013814683423783527, "loss": 0.4497, "step": 45120 }, { "epoch": 1.1454353923671485, "grad_norm": 0.353515625, "learning_rate": 0.00013811371094391651, "loss": 0.4305, "step": 45125 }, { "epoch": 1.1455623104161643, "grad_norm": 0.376953125, "learning_rate": 0.00013808058823325324, "loss": 0.4468, "step": 45130 }, { "epoch": 1.14568922846518, "grad_norm": 0.3359375, "learning_rate": 0.00013804746610747076, "loss": 0.401, "step": 45135 }, { "epoch": 1.1458161465141958, "grad_norm": 0.330078125, "learning_rate": 0.00013801434456819434, "loss": 0.3995, "step": 45140 }, { "epoch": 1.1459430645632116, "grad_norm": 0.328125, "learning_rate": 0.0001379812236170492, "loss": 0.4143, "step": 45145 }, { "epoch": 1.1460699826122274, "grad_norm": 0.33203125, "learning_rate": 0.0001379481032556607, "loss": 0.4013, "step": 45150 }, { "epoch": 1.1461969006612431, "grad_norm": 0.318359375, "learning_rate": 0.00013791498348565389, "loss": 0.3994, "step": 45155 }, { "epoch": 1.146323818710259, "grad_norm": 0.349609375, "learning_rate": 0.00013788186430865412, "loss": 0.4106, "step": 45160 }, { "epoch": 1.1464507367592744, "grad_norm": 0.365234375, "learning_rate": 0.0001378487457262864, "loss": 0.4298, "step": 45165 }, { "epoch": 1.1465776548082902, "grad_norm": 0.3984375, "learning_rate": 0.0001378156277401759, "loss": 0.4386, "step": 45170 }, { "epoch": 1.146704572857306, "grad_norm": 0.390625, "learning_rate": 0.0001377825103519477, "loss": 0.4249, "step": 45175 }, { "epoch": 1.1468314909063217, "grad_norm": 0.283203125, "learning_rate": 0.00013774939356322685, "loss": 0.3529, "step": 45180 }, { "epoch": 1.1469584089553375, "grad_norm": 0.34375, "learning_rate": 0.00013771627737563838, "loss": 0.4446, "step": 45185 }, { "epoch": 1.1470853270043533, "grad_norm": 0.359375, "learning_rate": 0.00013768316179080725, "loss": 0.4336, "step": 45190 }, { "epoch": 1.147212245053369, "grad_norm": 0.3515625, "learning_rate": 0.0001376500468103585, "loss": 0.4123, "step": 45195 }, { "epoch": 1.1473391631023848, "grad_norm": 0.345703125, "learning_rate": 0.00013761693243591704, "loss": 0.3974, "step": 45200 }, { "epoch": 1.1474660811514006, "grad_norm": 0.361328125, "learning_rate": 0.00013758381866910775, "loss": 0.4369, "step": 45205 }, { "epoch": 1.1475929992004164, "grad_norm": 0.34765625, "learning_rate": 0.00013755070551155554, "loss": 0.4182, "step": 45210 }, { "epoch": 1.1477199172494321, "grad_norm": 0.345703125, "learning_rate": 0.0001375175929648852, "loss": 0.4397, "step": 45215 }, { "epoch": 1.1478468352984479, "grad_norm": 0.337890625, "learning_rate": 0.00013748448103072166, "loss": 0.3948, "step": 45220 }, { "epoch": 1.1479737533474634, "grad_norm": 0.357421875, "learning_rate": 0.00013745136971068963, "loss": 0.4425, "step": 45225 }, { "epoch": 1.1481006713964792, "grad_norm": 0.353515625, "learning_rate": 0.00013741825900641394, "loss": 0.3896, "step": 45230 }, { "epoch": 1.148227589445495, "grad_norm": 0.34375, "learning_rate": 0.0001373851489195192, "loss": 0.3569, "step": 45235 }, { "epoch": 1.1483545074945107, "grad_norm": 0.35546875, "learning_rate": 0.0001373520394516302, "loss": 0.4199, "step": 45240 }, { "epoch": 1.1484814255435265, "grad_norm": 0.34375, "learning_rate": 0.0001373189306043716, "loss": 0.3862, "step": 45245 }, { "epoch": 1.1486083435925423, "grad_norm": 0.3515625, "learning_rate": 0.00013728582237936804, "loss": 0.4379, "step": 45250 }, { "epoch": 1.148735261641558, "grad_norm": 0.34375, "learning_rate": 0.0001372527147782441, "loss": 0.4166, "step": 45255 }, { "epoch": 1.1488621796905738, "grad_norm": 0.35546875, "learning_rate": 0.00013721960780262434, "loss": 0.3972, "step": 45260 }, { "epoch": 1.1489890977395896, "grad_norm": 0.3359375, "learning_rate": 0.00013718650145413331, "loss": 0.4206, "step": 45265 }, { "epoch": 1.1491160157886053, "grad_norm": 0.349609375, "learning_rate": 0.0001371533957343956, "loss": 0.4076, "step": 45270 }, { "epoch": 1.149242933837621, "grad_norm": 0.3515625, "learning_rate": 0.00013712029064503564, "loss": 0.4369, "step": 45275 }, { "epoch": 1.1493698518866369, "grad_norm": 0.322265625, "learning_rate": 0.00013708718618767783, "loss": 0.4328, "step": 45280 }, { "epoch": 1.1494967699356526, "grad_norm": 0.365234375, "learning_rate": 0.00013705408236394672, "loss": 0.4144, "step": 45285 }, { "epoch": 1.1496236879846684, "grad_norm": 0.345703125, "learning_rate": 0.0001370209791754666, "loss": 0.3863, "step": 45290 }, { "epoch": 1.1497506060336842, "grad_norm": 0.330078125, "learning_rate": 0.00013698787662386191, "loss": 0.4136, "step": 45295 }, { "epoch": 1.1498775240826997, "grad_norm": 0.361328125, "learning_rate": 0.0001369547747107569, "loss": 0.4056, "step": 45300 }, { "epoch": 1.1500044421317155, "grad_norm": 0.353515625, "learning_rate": 0.0001369216734377759, "loss": 0.3968, "step": 45305 }, { "epoch": 1.1501313601807313, "grad_norm": 0.34765625, "learning_rate": 0.00013688857280654313, "loss": 0.4092, "step": 45310 }, { "epoch": 1.150258278229747, "grad_norm": 0.353515625, "learning_rate": 0.0001368554728186829, "loss": 0.4236, "step": 45315 }, { "epoch": 1.1503851962787628, "grad_norm": 0.341796875, "learning_rate": 0.00013682237347581943, "loss": 0.4145, "step": 45320 }, { "epoch": 1.1505121143277786, "grad_norm": 0.369140625, "learning_rate": 0.00013678927477957679, "loss": 0.4129, "step": 45325 }, { "epoch": 1.1506390323767943, "grad_norm": 0.373046875, "learning_rate": 0.00013675617673157922, "loss": 0.4067, "step": 45330 }, { "epoch": 1.15076595042581, "grad_norm": 0.3515625, "learning_rate": 0.00013672307933345075, "loss": 0.4003, "step": 45335 }, { "epoch": 1.1508928684748259, "grad_norm": 0.326171875, "learning_rate": 0.00013668998258681544, "loss": 0.4304, "step": 45340 }, { "epoch": 1.1510197865238416, "grad_norm": 0.32421875, "learning_rate": 0.00013665688649329742, "loss": 0.4079, "step": 45345 }, { "epoch": 1.1511467045728574, "grad_norm": 0.357421875, "learning_rate": 0.00013662379105452069, "loss": 0.4341, "step": 45350 }, { "epoch": 1.151273622621873, "grad_norm": 0.3515625, "learning_rate": 0.00013659069627210916, "loss": 0.436, "step": 45355 }, { "epoch": 1.1514005406708887, "grad_norm": 0.330078125, "learning_rate": 0.00013655760214768687, "loss": 0.4275, "step": 45360 }, { "epoch": 1.1515274587199045, "grad_norm": 0.341796875, "learning_rate": 0.00013652450868287768, "loss": 0.4106, "step": 45365 }, { "epoch": 1.1516543767689202, "grad_norm": 0.3515625, "learning_rate": 0.00013649141587930547, "loss": 0.4137, "step": 45370 }, { "epoch": 1.151781294817936, "grad_norm": 0.35546875, "learning_rate": 0.00013645832373859412, "loss": 0.4078, "step": 45375 }, { "epoch": 1.1519082128669518, "grad_norm": 0.32421875, "learning_rate": 0.00013642523226236743, "loss": 0.4021, "step": 45380 }, { "epoch": 1.1520351309159675, "grad_norm": 0.349609375, "learning_rate": 0.00013639214145224916, "loss": 0.4221, "step": 45385 }, { "epoch": 1.1521620489649833, "grad_norm": 0.34765625, "learning_rate": 0.00013635905130986308, "loss": 0.41, "step": 45390 }, { "epoch": 1.152288967013999, "grad_norm": 0.36328125, "learning_rate": 0.00013632596183683294, "loss": 0.4398, "step": 45395 }, { "epoch": 1.1524158850630148, "grad_norm": 0.369140625, "learning_rate": 0.00013629287303478242, "loss": 0.4304, "step": 45400 }, { "epoch": 1.1525428031120306, "grad_norm": 0.365234375, "learning_rate": 0.0001362597849053351, "loss": 0.4163, "step": 45405 }, { "epoch": 1.1526697211610464, "grad_norm": 0.3515625, "learning_rate": 0.0001362266974501147, "loss": 0.4406, "step": 45410 }, { "epoch": 1.1527966392100621, "grad_norm": 0.34765625, "learning_rate": 0.00013619361067074477, "loss": 0.4288, "step": 45415 }, { "epoch": 1.152923557259078, "grad_norm": 0.349609375, "learning_rate": 0.0001361605245688489, "loss": 0.4047, "step": 45420 }, { "epoch": 1.1530504753080937, "grad_norm": 0.322265625, "learning_rate": 0.00013612743914605054, "loss": 0.4211, "step": 45425 }, { "epoch": 1.1531773933571092, "grad_norm": 0.34765625, "learning_rate": 0.00013609435440397322, "loss": 0.4106, "step": 45430 }, { "epoch": 1.153304311406125, "grad_norm": 0.357421875, "learning_rate": 0.00013606127034424039, "loss": 0.4407, "step": 45435 }, { "epoch": 1.1534312294551408, "grad_norm": 0.341796875, "learning_rate": 0.00013602818696847546, "loss": 0.4271, "step": 45440 }, { "epoch": 1.1535581475041565, "grad_norm": 0.34765625, "learning_rate": 0.00013599510427830187, "loss": 0.4229, "step": 45445 }, { "epoch": 1.1536850655531723, "grad_norm": 0.326171875, "learning_rate": 0.0001359620222753429, "loss": 0.4197, "step": 45450 }, { "epoch": 1.153811983602188, "grad_norm": 0.36328125, "learning_rate": 0.00013592894096122192, "loss": 0.4461, "step": 45455 }, { "epoch": 1.1539389016512038, "grad_norm": 0.333984375, "learning_rate": 0.0001358958603375622, "loss": 0.416, "step": 45460 }, { "epoch": 1.1540658197002196, "grad_norm": 0.3515625, "learning_rate": 0.00013586278040598695, "loss": 0.4315, "step": 45465 }, { "epoch": 1.1541927377492354, "grad_norm": 0.34375, "learning_rate": 0.00013582970116811946, "loss": 0.426, "step": 45470 }, { "epoch": 1.1543196557982511, "grad_norm": 0.34765625, "learning_rate": 0.0001357966226255829, "loss": 0.4408, "step": 45475 }, { "epoch": 1.154446573847267, "grad_norm": 0.3671875, "learning_rate": 0.0001357635447800004, "loss": 0.4301, "step": 45480 }, { "epoch": 1.1545734918962827, "grad_norm": 0.34375, "learning_rate": 0.0001357304676329951, "loss": 0.3906, "step": 45485 }, { "epoch": 1.1547004099452982, "grad_norm": 0.359375, "learning_rate": 0.00013569739118619007, "loss": 0.4275, "step": 45490 }, { "epoch": 1.154827327994314, "grad_norm": 0.3671875, "learning_rate": 0.00013566431544120836, "loss": 0.4134, "step": 45495 }, { "epoch": 1.1549542460433297, "grad_norm": 0.330078125, "learning_rate": 0.00013563124039967298, "loss": 0.424, "step": 45500 }, { "epoch": 1.1550811640923455, "grad_norm": 0.376953125, "learning_rate": 0.00013559816606320693, "loss": 0.439, "step": 45505 }, { "epoch": 1.1552080821413613, "grad_norm": 0.37890625, "learning_rate": 0.00013556509243343307, "loss": 0.4462, "step": 45510 }, { "epoch": 1.155335000190377, "grad_norm": 0.361328125, "learning_rate": 0.00013553201951197442, "loss": 0.4203, "step": 45515 }, { "epoch": 1.1554619182393928, "grad_norm": 0.328125, "learning_rate": 0.00013549894730045383, "loss": 0.3856, "step": 45520 }, { "epoch": 1.1555888362884086, "grad_norm": 0.357421875, "learning_rate": 0.0001354658758004941, "loss": 0.4039, "step": 45525 }, { "epoch": 1.1557157543374244, "grad_norm": 0.326171875, "learning_rate": 0.00013543280501371799, "loss": 0.4291, "step": 45530 }, { "epoch": 1.1558426723864401, "grad_norm": 0.365234375, "learning_rate": 0.00013539973494174838, "loss": 0.4482, "step": 45535 }, { "epoch": 1.1559695904354559, "grad_norm": 0.3671875, "learning_rate": 0.000135366665586208, "loss": 0.3902, "step": 45540 }, { "epoch": 1.1560965084844717, "grad_norm": 0.328125, "learning_rate": 0.00013533359694871947, "loss": 0.4024, "step": 45545 }, { "epoch": 1.1562234265334874, "grad_norm": 0.37109375, "learning_rate": 0.00013530052903090552, "loss": 0.4365, "step": 45550 }, { "epoch": 1.1563503445825032, "grad_norm": 0.361328125, "learning_rate": 0.00013526746183438872, "loss": 0.4065, "step": 45555 }, { "epoch": 1.1564772626315187, "grad_norm": 0.333984375, "learning_rate": 0.00013523439536079174, "loss": 0.4016, "step": 45560 }, { "epoch": 1.1566041806805345, "grad_norm": 0.302734375, "learning_rate": 0.0001352013296117371, "loss": 0.3941, "step": 45565 }, { "epoch": 1.1567310987295503, "grad_norm": 0.330078125, "learning_rate": 0.0001351682645888473, "loss": 0.4064, "step": 45570 }, { "epoch": 1.156858016778566, "grad_norm": 0.3671875, "learning_rate": 0.00013513520029374487, "loss": 0.4289, "step": 45575 }, { "epoch": 1.1569849348275818, "grad_norm": 0.373046875, "learning_rate": 0.00013510213672805223, "loss": 0.3921, "step": 45580 }, { "epoch": 1.1571118528765976, "grad_norm": 0.36328125, "learning_rate": 0.0001350690738933918, "loss": 0.4052, "step": 45585 }, { "epoch": 1.1572387709256133, "grad_norm": 0.341796875, "learning_rate": 0.00013503601179138596, "loss": 0.4253, "step": 45590 }, { "epoch": 1.157365688974629, "grad_norm": 0.357421875, "learning_rate": 0.00013500295042365702, "loss": 0.433, "step": 45595 }, { "epoch": 1.1574926070236449, "grad_norm": 0.36328125, "learning_rate": 0.00013496988979182738, "loss": 0.4218, "step": 45600 }, { "epoch": 1.1576195250726606, "grad_norm": 0.359375, "learning_rate": 0.00013493682989751922, "loss": 0.3961, "step": 45605 }, { "epoch": 1.1577464431216764, "grad_norm": 0.35546875, "learning_rate": 0.00013490377074235489, "loss": 0.4294, "step": 45610 }, { "epoch": 1.1578733611706922, "grad_norm": 0.36328125, "learning_rate": 0.00013487071232795645, "loss": 0.4164, "step": 45615 }, { "epoch": 1.1580002792197077, "grad_norm": 0.345703125, "learning_rate": 0.00013483765465594616, "loss": 0.4425, "step": 45620 }, { "epoch": 1.1581271972687235, "grad_norm": 0.353515625, "learning_rate": 0.00013480459772794614, "loss": 0.4389, "step": 45625 }, { "epoch": 1.1582541153177393, "grad_norm": 0.349609375, "learning_rate": 0.00013477154154557843, "loss": 0.4013, "step": 45630 }, { "epoch": 1.158381033366755, "grad_norm": 0.337890625, "learning_rate": 0.00013473848611046508, "loss": 0.4222, "step": 45635 }, { "epoch": 1.1585079514157708, "grad_norm": 0.3671875, "learning_rate": 0.00013470543142422818, "loss": 0.4561, "step": 45640 }, { "epoch": 1.1586348694647866, "grad_norm": 0.34375, "learning_rate": 0.00013467237748848967, "loss": 0.4225, "step": 45645 }, { "epoch": 1.1587617875138023, "grad_norm": 0.359375, "learning_rate": 0.0001346393243048715, "loss": 0.4015, "step": 45650 }, { "epoch": 1.158888705562818, "grad_norm": 0.35546875, "learning_rate": 0.00013460627187499548, "loss": 0.4063, "step": 45655 }, { "epoch": 1.1590156236118339, "grad_norm": 0.38671875, "learning_rate": 0.00013457322020048363, "loss": 0.4119, "step": 45660 }, { "epoch": 1.1591425416608496, "grad_norm": 0.353515625, "learning_rate": 0.00013454016928295774, "loss": 0.4249, "step": 45665 }, { "epoch": 1.1592694597098654, "grad_norm": 0.37109375, "learning_rate": 0.00013450711912403956, "loss": 0.4155, "step": 45670 }, { "epoch": 1.1593963777588812, "grad_norm": 0.3515625, "learning_rate": 0.0001344740697253509, "loss": 0.4106, "step": 45675 }, { "epoch": 1.159523295807897, "grad_norm": 0.345703125, "learning_rate": 0.0001344410210885134, "loss": 0.4266, "step": 45680 }, { "epoch": 1.1596502138569127, "grad_norm": 0.33984375, "learning_rate": 0.00013440797321514884, "loss": 0.4229, "step": 45685 }, { "epoch": 1.1597771319059285, "grad_norm": 0.380859375, "learning_rate": 0.00013437492610687883, "loss": 0.4373, "step": 45690 }, { "epoch": 1.159904049954944, "grad_norm": 0.349609375, "learning_rate": 0.00013434187976532494, "loss": 0.401, "step": 45695 }, { "epoch": 1.1600309680039598, "grad_norm": 0.365234375, "learning_rate": 0.0001343088341921088, "loss": 0.4346, "step": 45700 }, { "epoch": 1.1601578860529755, "grad_norm": 0.353515625, "learning_rate": 0.00013427578938885192, "loss": 0.4321, "step": 45705 }, { "epoch": 1.1602848041019913, "grad_norm": 0.3515625, "learning_rate": 0.00013424274535717574, "loss": 0.4412, "step": 45710 }, { "epoch": 1.160411722151007, "grad_norm": 0.36328125, "learning_rate": 0.0001342097020987018, "loss": 0.431, "step": 45715 }, { "epoch": 1.1605386402000228, "grad_norm": 0.326171875, "learning_rate": 0.00013417665961505147, "loss": 0.4459, "step": 45720 }, { "epoch": 1.1606655582490386, "grad_norm": 0.34765625, "learning_rate": 0.00013414361790784615, "loss": 0.4227, "step": 45725 }, { "epoch": 1.1607924762980544, "grad_norm": 0.376953125, "learning_rate": 0.00013411057697870722, "loss": 0.4215, "step": 45730 }, { "epoch": 1.1609193943470701, "grad_norm": 0.3359375, "learning_rate": 0.00013407753682925593, "loss": 0.4197, "step": 45735 }, { "epoch": 1.161046312396086, "grad_norm": 0.33984375, "learning_rate": 0.00013404449746111358, "loss": 0.4126, "step": 45740 }, { "epoch": 1.1611732304451017, "grad_norm": 0.318359375, "learning_rate": 0.00013401145887590138, "loss": 0.4035, "step": 45745 }, { "epoch": 1.1613001484941172, "grad_norm": 0.328125, "learning_rate": 0.00013397842107524052, "loss": 0.4268, "step": 45750 }, { "epoch": 1.161427066543133, "grad_norm": 0.37109375, "learning_rate": 0.00013394538406075212, "loss": 0.4401, "step": 45755 }, { "epoch": 1.1615539845921488, "grad_norm": 0.337890625, "learning_rate": 0.00013391234783405738, "loss": 0.4246, "step": 45760 }, { "epoch": 1.1616809026411645, "grad_norm": 0.345703125, "learning_rate": 0.0001338793123967773, "loss": 0.4002, "step": 45765 }, { "epoch": 1.1618078206901803, "grad_norm": 0.369140625, "learning_rate": 0.00013384627775053296, "loss": 0.428, "step": 45770 }, { "epoch": 1.161934738739196, "grad_norm": 0.369140625, "learning_rate": 0.00013381324389694532, "loss": 0.4239, "step": 45775 }, { "epoch": 1.1620616567882118, "grad_norm": 0.341796875, "learning_rate": 0.00013378021083763537, "loss": 0.3897, "step": 45780 }, { "epoch": 1.1621885748372276, "grad_norm": 0.35546875, "learning_rate": 0.00013374717857422394, "loss": 0.4275, "step": 45785 }, { "epoch": 1.1623154928862434, "grad_norm": 0.3203125, "learning_rate": 0.00013371414710833205, "loss": 0.4018, "step": 45790 }, { "epoch": 1.1624424109352591, "grad_norm": 0.349609375, "learning_rate": 0.00013368111644158047, "loss": 0.4156, "step": 45795 }, { "epoch": 1.162569328984275, "grad_norm": 0.365234375, "learning_rate": 0.00013364808657558998, "loss": 0.4263, "step": 45800 }, { "epoch": 1.1626962470332907, "grad_norm": 0.330078125, "learning_rate": 0.00013361505751198138, "loss": 0.4173, "step": 45805 }, { "epoch": 1.1628231650823064, "grad_norm": 0.353515625, "learning_rate": 0.0001335820292523754, "loss": 0.4408, "step": 45810 }, { "epoch": 1.1629500831313222, "grad_norm": 0.35546875, "learning_rate": 0.00013354900179839268, "loss": 0.4327, "step": 45815 }, { "epoch": 1.163077001180338, "grad_norm": 0.337890625, "learning_rate": 0.00013351597515165394, "loss": 0.4154, "step": 45820 }, { "epoch": 1.1632039192293535, "grad_norm": 0.3671875, "learning_rate": 0.0001334829493137797, "loss": 0.4077, "step": 45825 }, { "epoch": 1.1633308372783693, "grad_norm": 0.345703125, "learning_rate": 0.00013344992428639056, "loss": 0.4167, "step": 45830 }, { "epoch": 1.163457755327385, "grad_norm": 0.337890625, "learning_rate": 0.000133416900071107, "loss": 0.4451, "step": 45835 }, { "epoch": 1.1635846733764008, "grad_norm": 0.357421875, "learning_rate": 0.00013338387666954958, "loss": 0.4272, "step": 45840 }, { "epoch": 1.1637115914254166, "grad_norm": 0.31640625, "learning_rate": 0.0001333508540833387, "loss": 0.3961, "step": 45845 }, { "epoch": 1.1638385094744323, "grad_norm": 0.33984375, "learning_rate": 0.00013331783231409477, "loss": 0.4453, "step": 45850 }, { "epoch": 1.1639654275234481, "grad_norm": 0.361328125, "learning_rate": 0.0001332848113634382, "loss": 1.0871, "step": 45855 }, { "epoch": 1.1640923455724639, "grad_norm": 0.33203125, "learning_rate": 0.00013325179123298926, "loss": 0.4022, "step": 45860 }, { "epoch": 1.1642192636214796, "grad_norm": 0.353515625, "learning_rate": 0.00013321877192436828, "loss": 0.4126, "step": 45865 }, { "epoch": 1.1643461816704954, "grad_norm": 0.35546875, "learning_rate": 0.00013318575343919547, "loss": 0.4075, "step": 45870 }, { "epoch": 1.1644730997195112, "grad_norm": 0.34375, "learning_rate": 0.00013315273577909102, "loss": 0.4147, "step": 45875 }, { "epoch": 1.164600017768527, "grad_norm": 0.34375, "learning_rate": 0.0001331197189456751, "loss": 0.4129, "step": 45880 }, { "epoch": 1.1647269358175425, "grad_norm": 0.361328125, "learning_rate": 0.00013308670294056787, "loss": 0.4268, "step": 45885 }, { "epoch": 1.1648538538665583, "grad_norm": 0.353515625, "learning_rate": 0.0001330536877653894, "loss": 0.4429, "step": 45890 }, { "epoch": 1.164980771915574, "grad_norm": 0.3828125, "learning_rate": 0.0001330206734217597, "loss": 0.4173, "step": 45895 }, { "epoch": 1.1651076899645898, "grad_norm": 0.333984375, "learning_rate": 0.00013298765991129883, "loss": 0.3853, "step": 45900 }, { "epoch": 1.1652346080136056, "grad_norm": 0.361328125, "learning_rate": 0.00013295464723562668, "loss": 0.4361, "step": 45905 }, { "epoch": 1.1653615260626213, "grad_norm": 0.35546875, "learning_rate": 0.00013292163539636314, "loss": 0.425, "step": 45910 }, { "epoch": 1.165488444111637, "grad_norm": 0.36328125, "learning_rate": 0.00013288862439512822, "loss": 0.4331, "step": 45915 }, { "epoch": 1.1656153621606529, "grad_norm": 0.3359375, "learning_rate": 0.0001328556142335417, "loss": 0.4147, "step": 45920 }, { "epoch": 1.1657422802096686, "grad_norm": 0.3515625, "learning_rate": 0.0001328226049132233, "loss": 0.4277, "step": 45925 }, { "epoch": 1.1658691982586844, "grad_norm": 0.32421875, "learning_rate": 0.00013278959643579286, "loss": 0.4325, "step": 45930 }, { "epoch": 1.1659961163077002, "grad_norm": 0.318359375, "learning_rate": 0.00013275658880287008, "loss": 0.4044, "step": 45935 }, { "epoch": 1.166123034356716, "grad_norm": 0.353515625, "learning_rate": 0.00013272358201607464, "loss": 0.4091, "step": 45940 }, { "epoch": 1.1662499524057317, "grad_norm": 0.357421875, "learning_rate": 0.00013269057607702612, "loss": 0.3773, "step": 45945 }, { "epoch": 1.1663768704547475, "grad_norm": 0.35546875, "learning_rate": 0.00013265757098734416, "loss": 0.3772, "step": 45950 }, { "epoch": 1.1665037885037632, "grad_norm": 0.33203125, "learning_rate": 0.00013262456674864825, "loss": 0.3879, "step": 45955 }, { "epoch": 1.1666307065527788, "grad_norm": 0.3671875, "learning_rate": 0.00013259156336255793, "loss": 0.4156, "step": 45960 }, { "epoch": 1.1667576246017946, "grad_norm": 0.322265625, "learning_rate": 0.0001325585608306927, "loss": 0.4512, "step": 45965 }, { "epoch": 1.1668845426508103, "grad_norm": 0.341796875, "learning_rate": 0.0001325255591546719, "loss": 0.4253, "step": 45970 }, { "epoch": 1.167011460699826, "grad_norm": 0.373046875, "learning_rate": 0.0001324925583361149, "loss": 0.4284, "step": 45975 }, { "epoch": 1.1671383787488419, "grad_norm": 0.3359375, "learning_rate": 0.00013245955837664118, "loss": 0.4134, "step": 45980 }, { "epoch": 1.1672652967978576, "grad_norm": 0.310546875, "learning_rate": 0.00013242655927786994, "loss": 0.3952, "step": 45985 }, { "epoch": 1.1673922148468734, "grad_norm": 0.359375, "learning_rate": 0.0001323935610414204, "loss": 0.436, "step": 45990 }, { "epoch": 1.1675191328958892, "grad_norm": 0.33984375, "learning_rate": 0.00013236056366891185, "loss": 0.4127, "step": 45995 }, { "epoch": 1.167646050944905, "grad_norm": 0.3515625, "learning_rate": 0.00013232756716196334, "loss": 0.3964, "step": 46000 }, { "epoch": 1.1677729689939207, "grad_norm": 0.388671875, "learning_rate": 0.00013229457152219413, "loss": 0.4266, "step": 46005 }, { "epoch": 1.1678998870429365, "grad_norm": 0.341796875, "learning_rate": 0.00013226157675122322, "loss": 0.436, "step": 46010 }, { "epoch": 1.168026805091952, "grad_norm": 0.341796875, "learning_rate": 0.00013222858285066971, "loss": 0.4268, "step": 46015 }, { "epoch": 1.1681537231409678, "grad_norm": 0.345703125, "learning_rate": 0.00013219558982215254, "loss": 0.4252, "step": 46020 }, { "epoch": 1.1682806411899835, "grad_norm": 0.375, "learning_rate": 0.00013216259766729067, "loss": 0.4011, "step": 46025 }, { "epoch": 1.1684075592389993, "grad_norm": 0.3125, "learning_rate": 0.00013212960638770305, "loss": 0.4492, "step": 46030 }, { "epoch": 1.168534477288015, "grad_norm": 0.375, "learning_rate": 0.00013209661598500845, "loss": 0.3967, "step": 46035 }, { "epoch": 1.1686613953370308, "grad_norm": 0.333984375, "learning_rate": 0.00013206362646082584, "loss": 0.3906, "step": 46040 }, { "epoch": 1.1687883133860466, "grad_norm": 0.375, "learning_rate": 0.00013203063781677392, "loss": 0.4176, "step": 46045 }, { "epoch": 1.1689152314350624, "grad_norm": 0.349609375, "learning_rate": 0.00013199765005447145, "loss": 0.4259, "step": 46050 }, { "epoch": 1.1690421494840781, "grad_norm": 0.330078125, "learning_rate": 0.00013196466317553712, "loss": 0.3935, "step": 46055 }, { "epoch": 1.169169067533094, "grad_norm": 0.349609375, "learning_rate": 0.0001319316771815896, "loss": 0.3996, "step": 46060 }, { "epoch": 1.1692959855821097, "grad_norm": 0.36328125, "learning_rate": 0.00013189869207424747, "loss": 0.4214, "step": 46065 }, { "epoch": 1.1694229036311254, "grad_norm": 0.34375, "learning_rate": 0.00013186570785512933, "loss": 0.4218, "step": 46070 }, { "epoch": 1.1695498216801412, "grad_norm": 0.3359375, "learning_rate": 0.00013183272452585364, "loss": 0.4268, "step": 46075 }, { "epoch": 1.169676739729157, "grad_norm": 0.373046875, "learning_rate": 0.00013179974208803893, "loss": 0.4142, "step": 46080 }, { "epoch": 1.1698036577781727, "grad_norm": 0.3828125, "learning_rate": 0.00013176676054330364, "loss": 0.4128, "step": 46085 }, { "epoch": 1.1699305758271883, "grad_norm": 0.349609375, "learning_rate": 0.00013173377989326616, "loss": 0.4343, "step": 46090 }, { "epoch": 1.170057493876204, "grad_norm": 0.318359375, "learning_rate": 0.00013170080013954482, "loss": 0.4193, "step": 46095 }, { "epoch": 1.1701844119252198, "grad_norm": 0.33984375, "learning_rate": 0.00013166782128375785, "loss": 0.403, "step": 46100 }, { "epoch": 1.1703113299742356, "grad_norm": 0.33203125, "learning_rate": 0.00013163484332752367, "loss": 0.4216, "step": 46105 }, { "epoch": 1.1704382480232514, "grad_norm": 0.359375, "learning_rate": 0.00013160186627246038, "loss": 0.4128, "step": 46110 }, { "epoch": 1.1705651660722671, "grad_norm": 0.333984375, "learning_rate": 0.0001315688901201862, "loss": 0.4108, "step": 46115 }, { "epoch": 1.170692084121283, "grad_norm": 0.349609375, "learning_rate": 0.00013153591487231927, "loss": 0.4224, "step": 46120 }, { "epoch": 1.1708190021702987, "grad_norm": 0.375, "learning_rate": 0.00013150294053047759, "loss": 0.445, "step": 46125 }, { "epoch": 1.1709459202193144, "grad_norm": 0.359375, "learning_rate": 0.00013146996709627928, "loss": 0.421, "step": 46130 }, { "epoch": 1.1710728382683302, "grad_norm": 0.349609375, "learning_rate": 0.0001314369945713423, "loss": 0.4162, "step": 46135 }, { "epoch": 1.171199756317346, "grad_norm": 0.337890625, "learning_rate": 0.0001314040229572846, "loss": 0.4286, "step": 46140 }, { "epoch": 1.1713266743663617, "grad_norm": 0.33984375, "learning_rate": 0.00013137105225572408, "loss": 0.4568, "step": 46145 }, { "epoch": 1.1714535924153773, "grad_norm": 0.373046875, "learning_rate": 0.00013133808246827862, "loss": 0.4256, "step": 46150 }, { "epoch": 1.171580510464393, "grad_norm": 0.337890625, "learning_rate": 0.00013130511359656598, "loss": 0.4185, "step": 46155 }, { "epoch": 1.1717074285134088, "grad_norm": 0.33984375, "learning_rate": 0.00013127214564220396, "loss": 0.4198, "step": 46160 }, { "epoch": 1.1718343465624246, "grad_norm": 0.34765625, "learning_rate": 0.00013123917860681037, "loss": 0.4045, "step": 46165 }, { "epoch": 1.1719612646114403, "grad_norm": 0.359375, "learning_rate": 0.00013120621249200273, "loss": 0.4108, "step": 46170 }, { "epoch": 1.1720881826604561, "grad_norm": 0.36328125, "learning_rate": 0.0001311732472993988, "loss": 0.4273, "step": 46175 }, { "epoch": 1.1722151007094719, "grad_norm": 0.328125, "learning_rate": 0.00013114028303061614, "loss": 0.4065, "step": 46180 }, { "epoch": 1.1723420187584876, "grad_norm": 0.337890625, "learning_rate": 0.00013110731968727228, "loss": 0.4068, "step": 46185 }, { "epoch": 1.1724689368075034, "grad_norm": 0.34375, "learning_rate": 0.0001310743572709847, "loss": 0.4068, "step": 46190 }, { "epoch": 1.1725958548565192, "grad_norm": 0.345703125, "learning_rate": 0.00013104139578337087, "loss": 0.4198, "step": 46195 }, { "epoch": 1.172722772905535, "grad_norm": 0.361328125, "learning_rate": 0.00013100843522604816, "loss": 0.3972, "step": 46200 }, { "epoch": 1.1728496909545507, "grad_norm": 0.3515625, "learning_rate": 0.00013097547560063402, "loss": 0.4013, "step": 46205 }, { "epoch": 1.1729766090035665, "grad_norm": 0.3515625, "learning_rate": 0.0001309425169087457, "loss": 0.4021, "step": 46210 }, { "epoch": 1.1731035270525823, "grad_norm": 0.345703125, "learning_rate": 0.00013090955915200052, "loss": 0.4304, "step": 46215 }, { "epoch": 1.173230445101598, "grad_norm": 0.3359375, "learning_rate": 0.00013087660233201563, "loss": 0.4016, "step": 46220 }, { "epoch": 1.1733573631506136, "grad_norm": 0.349609375, "learning_rate": 0.00013084364645040819, "loss": 0.4517, "step": 46225 }, { "epoch": 1.1734842811996293, "grad_norm": 0.330078125, "learning_rate": 0.00013081069150879543, "loss": 0.3901, "step": 46230 }, { "epoch": 1.173611199248645, "grad_norm": 0.365234375, "learning_rate": 0.00013077773750879443, "loss": 0.415, "step": 46235 }, { "epoch": 1.1737381172976609, "grad_norm": 0.345703125, "learning_rate": 0.0001307447844520222, "loss": 0.4332, "step": 46240 }, { "epoch": 1.1738650353466766, "grad_norm": 0.35546875, "learning_rate": 0.00013071183234009565, "loss": 0.4364, "step": 46245 }, { "epoch": 1.1739919533956924, "grad_norm": 0.365234375, "learning_rate": 0.00013067888117463186, "loss": 0.4248, "step": 46250 }, { "epoch": 1.1741188714447082, "grad_norm": 0.34375, "learning_rate": 0.00013064593095724769, "loss": 0.4224, "step": 46255 }, { "epoch": 1.174245789493724, "grad_norm": 0.365234375, "learning_rate": 0.00013061298168955994, "loss": 0.3986, "step": 46260 }, { "epoch": 1.1743727075427397, "grad_norm": 0.326171875, "learning_rate": 0.00013058003337318547, "loss": 0.422, "step": 46265 }, { "epoch": 1.1744996255917555, "grad_norm": 0.357421875, "learning_rate": 0.00013054708600974106, "loss": 0.4482, "step": 46270 }, { "epoch": 1.1746265436407712, "grad_norm": 0.359375, "learning_rate": 0.00013051413960084337, "loss": 0.4371, "step": 46275 }, { "epoch": 1.1747534616897868, "grad_norm": 0.359375, "learning_rate": 0.00013048119414810905, "loss": 0.4453, "step": 46280 }, { "epoch": 1.1748803797388025, "grad_norm": 0.333984375, "learning_rate": 0.00013044824965315478, "loss": 0.4133, "step": 46285 }, { "epoch": 1.1750072977878183, "grad_norm": 0.361328125, "learning_rate": 0.00013041530611759708, "loss": 0.4035, "step": 46290 }, { "epoch": 1.175134215836834, "grad_norm": 0.3984375, "learning_rate": 0.00013038236354305253, "loss": 0.4166, "step": 46295 }, { "epoch": 1.1752611338858499, "grad_norm": 0.33984375, "learning_rate": 0.0001303494219311376, "loss": 0.3847, "step": 46300 }, { "epoch": 1.1753880519348656, "grad_norm": 0.3671875, "learning_rate": 0.00013031648128346873, "loss": 0.4275, "step": 46305 }, { "epoch": 1.1755149699838814, "grad_norm": 0.34375, "learning_rate": 0.00013028354160166225, "loss": 0.4335, "step": 46310 }, { "epoch": 1.1756418880328972, "grad_norm": 0.330078125, "learning_rate": 0.00013025060288733456, "loss": 0.4061, "step": 46315 }, { "epoch": 1.175768806081913, "grad_norm": 0.33203125, "learning_rate": 0.00013021766514210192, "loss": 0.4363, "step": 46320 }, { "epoch": 1.1758957241309287, "grad_norm": 0.392578125, "learning_rate": 0.0001301847283675805, "loss": 0.4223, "step": 46325 }, { "epoch": 1.1760226421799445, "grad_norm": 0.32421875, "learning_rate": 0.00013015179256538665, "loss": 0.4122, "step": 46330 }, { "epoch": 1.1761495602289602, "grad_norm": 0.359375, "learning_rate": 0.0001301188577371364, "loss": 0.4379, "step": 46335 }, { "epoch": 1.176276478277976, "grad_norm": 0.375, "learning_rate": 0.0001300859238844459, "loss": 0.4281, "step": 46340 }, { "epoch": 1.1764033963269918, "grad_norm": 0.3828125, "learning_rate": 0.00013005299100893116, "loss": 0.4437, "step": 46345 }, { "epoch": 1.1765303143760075, "grad_norm": 0.34765625, "learning_rate": 0.00013002005911220814, "loss": 0.4113, "step": 46350 }, { "epoch": 1.176657232425023, "grad_norm": 0.353515625, "learning_rate": 0.00012998712819589295, "loss": 0.4392, "step": 46355 }, { "epoch": 1.1767841504740388, "grad_norm": 0.3828125, "learning_rate": 0.0001299541982616014, "loss": 0.4201, "step": 46360 }, { "epoch": 1.1769110685230546, "grad_norm": 0.333984375, "learning_rate": 0.00012992126931094932, "loss": 0.4159, "step": 46365 }, { "epoch": 1.1770379865720704, "grad_norm": 0.359375, "learning_rate": 0.00012988834134555256, "loss": 0.4279, "step": 46370 }, { "epoch": 1.1771649046210861, "grad_norm": 0.357421875, "learning_rate": 0.00012985541436702692, "loss": 0.416, "step": 46375 }, { "epoch": 1.177291822670102, "grad_norm": 0.337890625, "learning_rate": 0.00012982248837698805, "loss": 0.4235, "step": 46380 }, { "epoch": 1.1774187407191177, "grad_norm": 0.3515625, "learning_rate": 0.00012978956337705166, "loss": 0.4296, "step": 46385 }, { "epoch": 1.1775456587681334, "grad_norm": 0.3515625, "learning_rate": 0.00012975663936883332, "loss": 0.4053, "step": 46390 }, { "epoch": 1.1776725768171492, "grad_norm": 0.353515625, "learning_rate": 0.00012972371635394864, "loss": 0.4457, "step": 46395 }, { "epoch": 1.177799494866165, "grad_norm": 0.34765625, "learning_rate": 0.00012969079433401308, "loss": 0.433, "step": 46400 }, { "epoch": 1.1779264129151807, "grad_norm": 0.3203125, "learning_rate": 0.00012965787331064218, "loss": 0.4287, "step": 46405 }, { "epoch": 1.1780533309641965, "grad_norm": 0.34765625, "learning_rate": 0.00012962495328545134, "loss": 0.4101, "step": 46410 }, { "epoch": 1.178180249013212, "grad_norm": 0.333984375, "learning_rate": 0.00012959203426005585, "loss": 0.3931, "step": 46415 }, { "epoch": 1.1783071670622278, "grad_norm": 0.353515625, "learning_rate": 0.0001295591162360712, "loss": 0.4059, "step": 46420 }, { "epoch": 1.1784340851112436, "grad_norm": 0.341796875, "learning_rate": 0.00012952619921511257, "loss": 0.4187, "step": 46425 }, { "epoch": 1.1785610031602594, "grad_norm": 0.341796875, "learning_rate": 0.00012949328319879517, "loss": 0.4103, "step": 46430 }, { "epoch": 1.1786879212092751, "grad_norm": 0.369140625, "learning_rate": 0.0001294603681887342, "loss": 0.4129, "step": 46435 }, { "epoch": 1.178814839258291, "grad_norm": 0.353515625, "learning_rate": 0.00012942745418654478, "loss": 0.4434, "step": 46440 }, { "epoch": 1.1789417573073067, "grad_norm": 0.361328125, "learning_rate": 0.00012939454119384195, "loss": 0.3972, "step": 46445 }, { "epoch": 1.1790686753563224, "grad_norm": 0.357421875, "learning_rate": 0.00012936162921224083, "loss": 0.4354, "step": 46450 }, { "epoch": 1.1791955934053382, "grad_norm": 0.341796875, "learning_rate": 0.00012932871824335632, "loss": 0.4611, "step": 46455 }, { "epoch": 1.179322511454354, "grad_norm": 0.326171875, "learning_rate": 0.0001292958082888034, "loss": 0.3802, "step": 46460 }, { "epoch": 1.1794494295033697, "grad_norm": 0.337890625, "learning_rate": 0.0001292628993501969, "loss": 0.3939, "step": 46465 }, { "epoch": 1.1795763475523855, "grad_norm": 0.345703125, "learning_rate": 0.00012922999142915169, "loss": 0.4179, "step": 46470 }, { "epoch": 1.1797032656014013, "grad_norm": 0.333984375, "learning_rate": 0.00012919708452728245, "loss": 0.4285, "step": 46475 }, { "epoch": 1.179830183650417, "grad_norm": 0.34765625, "learning_rate": 0.00012916417864620403, "loss": 0.4253, "step": 46480 }, { "epoch": 1.1799571016994328, "grad_norm": 0.341796875, "learning_rate": 0.00012913127378753108, "loss": 0.4196, "step": 46485 }, { "epoch": 1.1800840197484483, "grad_norm": 0.353515625, "learning_rate": 0.0001290983699528782, "loss": 0.4099, "step": 46490 }, { "epoch": 1.180210937797464, "grad_norm": 0.33984375, "learning_rate": 0.00012906546714385996, "loss": 0.3874, "step": 46495 }, { "epoch": 1.1803378558464799, "grad_norm": 0.349609375, "learning_rate": 0.00012903256536209094, "loss": 0.4273, "step": 46500 }, { "epoch": 1.1804647738954956, "grad_norm": 0.37109375, "learning_rate": 0.0001289996646091856, "loss": 0.4389, "step": 46505 }, { "epoch": 1.1805916919445114, "grad_norm": 0.341796875, "learning_rate": 0.0001289667648867583, "loss": 0.3985, "step": 46510 }, { "epoch": 1.1807186099935272, "grad_norm": 0.353515625, "learning_rate": 0.00012893386619642353, "loss": 0.392, "step": 46515 }, { "epoch": 1.180845528042543, "grad_norm": 0.34765625, "learning_rate": 0.00012890096853979553, "loss": 0.4388, "step": 46520 }, { "epoch": 1.1809724460915587, "grad_norm": 0.33984375, "learning_rate": 0.00012886807191848858, "loss": 0.3926, "step": 46525 }, { "epoch": 1.1810993641405745, "grad_norm": 0.353515625, "learning_rate": 0.00012883517633411693, "loss": 0.4374, "step": 46530 }, { "epoch": 1.1812262821895902, "grad_norm": 0.345703125, "learning_rate": 0.00012880228178829476, "loss": 0.4023, "step": 46535 }, { "epoch": 1.181353200238606, "grad_norm": 0.349609375, "learning_rate": 0.0001287693882826361, "loss": 0.4243, "step": 46540 }, { "epoch": 1.1814801182876216, "grad_norm": 0.396484375, "learning_rate": 0.0001287364958187552, "loss": 0.437, "step": 46545 }, { "epoch": 1.1816070363366373, "grad_norm": 0.36328125, "learning_rate": 0.00012870360439826595, "loss": 0.4259, "step": 46550 }, { "epoch": 1.181733954385653, "grad_norm": 0.34765625, "learning_rate": 0.00012867071402278233, "loss": 0.4201, "step": 46555 }, { "epoch": 1.1818608724346689, "grad_norm": 0.359375, "learning_rate": 0.00012863782469391832, "loss": 0.407, "step": 46560 }, { "epoch": 1.1819877904836846, "grad_norm": 0.33203125, "learning_rate": 0.00012860493641328772, "loss": 0.4185, "step": 46565 }, { "epoch": 1.1821147085327004, "grad_norm": 0.283203125, "learning_rate": 0.00012857204918250434, "loss": 0.3762, "step": 46570 }, { "epoch": 1.1822416265817162, "grad_norm": 0.369140625, "learning_rate": 0.00012853916300318202, "loss": 0.3969, "step": 46575 }, { "epoch": 1.182368544630732, "grad_norm": 0.36328125, "learning_rate": 0.0001285062778769344, "loss": 0.4349, "step": 46580 }, { "epoch": 1.1824954626797477, "grad_norm": 0.359375, "learning_rate": 0.00012847339380537518, "loss": 0.4119, "step": 46585 }, { "epoch": 1.1826223807287635, "grad_norm": 0.373046875, "learning_rate": 0.00012844051079011792, "loss": 0.4312, "step": 46590 }, { "epoch": 1.1827492987777792, "grad_norm": 0.34375, "learning_rate": 0.00012840762883277625, "loss": 0.4166, "step": 46595 }, { "epoch": 1.182876216826795, "grad_norm": 0.333984375, "learning_rate": 0.00012837474793496356, "loss": 0.412, "step": 46600 }, { "epoch": 1.1830031348758108, "grad_norm": 0.376953125, "learning_rate": 0.0001283418680982934, "loss": 0.4287, "step": 46605 }, { "epoch": 1.1831300529248265, "grad_norm": 0.35546875, "learning_rate": 0.0001283089893243792, "loss": 0.4188, "step": 46610 }, { "epoch": 1.1832569709738423, "grad_norm": 0.37890625, "learning_rate": 0.00012827611161483416, "loss": 0.4489, "step": 46615 }, { "epoch": 1.1833838890228578, "grad_norm": 0.35546875, "learning_rate": 0.00012824323497127174, "loss": 0.4142, "step": 46620 }, { "epoch": 1.1835108070718736, "grad_norm": 0.375, "learning_rate": 0.0001282103593953051, "loss": 0.4336, "step": 46625 }, { "epoch": 1.1836377251208894, "grad_norm": 0.279296875, "learning_rate": 0.00012817748488854746, "loss": 0.4216, "step": 46630 }, { "epoch": 1.1837646431699052, "grad_norm": 0.33984375, "learning_rate": 0.00012814461145261194, "loss": 0.3955, "step": 46635 }, { "epoch": 1.183891561218921, "grad_norm": 0.3359375, "learning_rate": 0.00012811173908911162, "loss": 0.3818, "step": 46640 }, { "epoch": 1.1840184792679367, "grad_norm": 0.33984375, "learning_rate": 0.00012807886779965953, "loss": 0.3986, "step": 46645 }, { "epoch": 1.1841453973169525, "grad_norm": 0.3359375, "learning_rate": 0.00012804599758586867, "loss": 0.4181, "step": 46650 }, { "epoch": 1.1842723153659682, "grad_norm": 0.361328125, "learning_rate": 0.000128013128449352, "loss": 0.446, "step": 46655 }, { "epoch": 1.184399233414984, "grad_norm": 0.369140625, "learning_rate": 0.00012798026039172232, "loss": 0.4032, "step": 46660 }, { "epoch": 1.1845261514639998, "grad_norm": 0.34765625, "learning_rate": 0.00012794739341459243, "loss": 0.4045, "step": 46665 }, { "epoch": 1.1846530695130155, "grad_norm": 0.34765625, "learning_rate": 0.00012791452751957524, "loss": 0.4214, "step": 46670 }, { "epoch": 1.1847799875620313, "grad_norm": 0.353515625, "learning_rate": 0.0001278816627082834, "loss": 0.405, "step": 46675 }, { "epoch": 1.1849069056110468, "grad_norm": 0.353515625, "learning_rate": 0.00012784879898232956, "loss": 0.4472, "step": 46680 }, { "epoch": 1.1850338236600626, "grad_norm": 0.328125, "learning_rate": 0.0001278159363433263, "loss": 0.4134, "step": 46685 }, { "epoch": 1.1851607417090784, "grad_norm": 0.35546875, "learning_rate": 0.0001277830747928862, "loss": 0.4325, "step": 46690 }, { "epoch": 1.1852876597580941, "grad_norm": 0.353515625, "learning_rate": 0.0001277502143326218, "loss": 0.4291, "step": 46695 }, { "epoch": 1.18541457780711, "grad_norm": 0.349609375, "learning_rate": 0.0001277173549641455, "loss": 0.4428, "step": 46700 }, { "epoch": 1.1855414958561257, "grad_norm": 0.36328125, "learning_rate": 0.0001276844966890697, "loss": 0.4314, "step": 46705 }, { "epoch": 1.1856684139051414, "grad_norm": 0.326171875, "learning_rate": 0.00012765163950900681, "loss": 0.4296, "step": 46710 }, { "epoch": 1.1857953319541572, "grad_norm": 0.28515625, "learning_rate": 0.00012761878342556904, "loss": 0.4159, "step": 46715 }, { "epoch": 1.185922250003173, "grad_norm": 0.318359375, "learning_rate": 0.00012758592844036863, "loss": 0.4044, "step": 46720 }, { "epoch": 1.1860491680521887, "grad_norm": 0.349609375, "learning_rate": 0.00012755307455501782, "loss": 0.4011, "step": 46725 }, { "epoch": 1.1861760861012045, "grad_norm": 0.34375, "learning_rate": 0.00012752022177112864, "loss": 0.4241, "step": 46730 }, { "epoch": 1.1863030041502203, "grad_norm": 0.37109375, "learning_rate": 0.00012748737009031326, "loss": 0.427, "step": 46735 }, { "epoch": 1.186429922199236, "grad_norm": 1.3515625, "learning_rate": 0.00012745451951418364, "loss": 0.4326, "step": 46740 }, { "epoch": 1.1865568402482518, "grad_norm": 0.345703125, "learning_rate": 0.0001274216700443518, "loss": 0.408, "step": 46745 }, { "epoch": 1.1866837582972676, "grad_norm": 0.33984375, "learning_rate": 0.00012738882168242962, "loss": 0.405, "step": 46750 }, { "epoch": 1.1868106763462831, "grad_norm": 0.34375, "learning_rate": 0.00012735597443002894, "loss": 0.3921, "step": 46755 }, { "epoch": 1.186937594395299, "grad_norm": 0.34765625, "learning_rate": 0.0001273231282887616, "loss": 0.4291, "step": 46760 }, { "epoch": 1.1870645124443147, "grad_norm": 0.3515625, "learning_rate": 0.0001272902832602393, "loss": 0.448, "step": 46765 }, { "epoch": 1.1871914304933304, "grad_norm": 0.359375, "learning_rate": 0.00012725743934607374, "loss": 0.3952, "step": 46770 }, { "epoch": 1.1873183485423462, "grad_norm": 0.34375, "learning_rate": 0.00012722459654787658, "loss": 0.4076, "step": 46775 }, { "epoch": 1.187445266591362, "grad_norm": 0.369140625, "learning_rate": 0.0001271917548672594, "loss": 0.4528, "step": 46780 }, { "epoch": 1.1875721846403777, "grad_norm": 0.33203125, "learning_rate": 0.00012715891430583375, "loss": 0.4103, "step": 46785 }, { "epoch": 1.1876991026893935, "grad_norm": 0.330078125, "learning_rate": 0.00012712607486521098, "loss": 0.4058, "step": 46790 }, { "epoch": 1.1878260207384093, "grad_norm": 0.306640625, "learning_rate": 0.00012709323654700269, "loss": 0.4067, "step": 46795 }, { "epoch": 1.187952938787425, "grad_norm": 0.333984375, "learning_rate": 0.00012706039935282017, "loss": 0.403, "step": 46800 }, { "epoch": 1.1880798568364408, "grad_norm": 0.3203125, "learning_rate": 0.00012702756328427468, "loss": 0.3998, "step": 46805 }, { "epoch": 1.1882067748854563, "grad_norm": 0.341796875, "learning_rate": 0.00012699472834297752, "loss": 0.416, "step": 46810 }, { "epoch": 1.188333692934472, "grad_norm": 0.341796875, "learning_rate": 0.00012696189453053987, "loss": 0.3867, "step": 46815 }, { "epoch": 1.1884606109834879, "grad_norm": 0.365234375, "learning_rate": 0.00012692906184857288, "loss": 0.419, "step": 46820 }, { "epoch": 1.1885875290325036, "grad_norm": 0.3515625, "learning_rate": 0.00012689623029868766, "loss": 0.4285, "step": 46825 }, { "epoch": 1.1887144470815194, "grad_norm": 0.373046875, "learning_rate": 0.00012686339988249518, "loss": 0.4205, "step": 46830 }, { "epoch": 1.1888413651305352, "grad_norm": 0.34765625, "learning_rate": 0.00012683057060160647, "loss": 0.4051, "step": 46835 }, { "epoch": 1.188968283179551, "grad_norm": 0.345703125, "learning_rate": 0.00012679774245763243, "loss": 0.4202, "step": 46840 }, { "epoch": 1.1890952012285667, "grad_norm": 0.380859375, "learning_rate": 0.00012676491545218392, "loss": 0.4307, "step": 46845 }, { "epoch": 1.1892221192775825, "grad_norm": 0.357421875, "learning_rate": 0.00012673208958687175, "loss": 0.4247, "step": 46850 }, { "epoch": 1.1893490373265982, "grad_norm": 0.3828125, "learning_rate": 0.00012669926486330666, "loss": 0.4262, "step": 46855 }, { "epoch": 1.189475955375614, "grad_norm": 0.35546875, "learning_rate": 0.00012666644128309936, "loss": 0.4283, "step": 46860 }, { "epoch": 1.1896028734246298, "grad_norm": 0.341796875, "learning_rate": 0.0001266336188478605, "loss": 0.4336, "step": 46865 }, { "epoch": 1.1897297914736455, "grad_norm": 0.392578125, "learning_rate": 0.0001266007975592007, "loss": 0.4225, "step": 46870 }, { "epoch": 1.1898567095226613, "grad_norm": 0.3359375, "learning_rate": 0.0001265679774187304, "loss": 0.4129, "step": 46875 }, { "epoch": 1.189983627571677, "grad_norm": 0.341796875, "learning_rate": 0.00012653515842806013, "loss": 0.3976, "step": 46880 }, { "epoch": 1.1901105456206926, "grad_norm": 0.3359375, "learning_rate": 0.0001265023405888003, "loss": 0.4302, "step": 46885 }, { "epoch": 1.1902374636697084, "grad_norm": 0.369140625, "learning_rate": 0.0001264695239025612, "loss": 0.4314, "step": 46890 }, { "epoch": 1.1903643817187242, "grad_norm": 0.357421875, "learning_rate": 0.00012643670837095327, "loss": 0.4418, "step": 46895 }, { "epoch": 1.19049129976774, "grad_norm": 0.35546875, "learning_rate": 0.00012640389399558663, "loss": 0.4054, "step": 46900 }, { "epoch": 1.1906182178167557, "grad_norm": 0.333984375, "learning_rate": 0.00012637108077807153, "loss": 0.4064, "step": 46905 }, { "epoch": 1.1907451358657715, "grad_norm": 0.359375, "learning_rate": 0.00012633826872001806, "loss": 0.4109, "step": 46910 }, { "epoch": 1.1908720539147872, "grad_norm": 0.361328125, "learning_rate": 0.00012630545782303635, "loss": 0.4023, "step": 46915 }, { "epoch": 1.190998971963803, "grad_norm": 0.3515625, "learning_rate": 0.0001262726480887363, "loss": 0.4133, "step": 46920 }, { "epoch": 1.1911258900128188, "grad_norm": 0.62109375, "learning_rate": 0.00012623983951872805, "loss": 0.4292, "step": 46925 }, { "epoch": 1.1912528080618345, "grad_norm": 0.3671875, "learning_rate": 0.00012620703211462138, "loss": 0.4256, "step": 46930 }, { "epoch": 1.1913797261108503, "grad_norm": 0.333984375, "learning_rate": 0.00012617422587802616, "loss": 0.4028, "step": 46935 }, { "epoch": 1.191506644159866, "grad_norm": 0.361328125, "learning_rate": 0.00012614142081055217, "loss": 0.4462, "step": 46940 }, { "epoch": 1.1916335622088816, "grad_norm": 0.337890625, "learning_rate": 0.0001261086169138092, "loss": 0.4298, "step": 46945 }, { "epoch": 1.1917604802578974, "grad_norm": 0.333984375, "learning_rate": 0.00012607581418940686, "loss": 0.4, "step": 46950 }, { "epoch": 1.1918873983069131, "grad_norm": 0.349609375, "learning_rate": 0.0001260430126389548, "loss": 0.4126, "step": 46955 }, { "epoch": 1.192014316355929, "grad_norm": 0.353515625, "learning_rate": 0.00012601021226406255, "loss": 0.4373, "step": 46960 }, { "epoch": 1.1921412344049447, "grad_norm": 0.361328125, "learning_rate": 0.00012597741306633964, "loss": 0.4175, "step": 46965 }, { "epoch": 1.1922681524539605, "grad_norm": 0.365234375, "learning_rate": 0.00012594461504739549, "loss": 0.4041, "step": 46970 }, { "epoch": 1.1923950705029762, "grad_norm": 0.35546875, "learning_rate": 0.0001259118182088395, "loss": 0.424, "step": 46975 }, { "epoch": 1.192521988551992, "grad_norm": 0.345703125, "learning_rate": 0.00012587902255228097, "loss": 0.3993, "step": 46980 }, { "epoch": 1.1926489066010078, "grad_norm": 0.35546875, "learning_rate": 0.00012584622807932923, "loss": 0.4407, "step": 46985 }, { "epoch": 1.1927758246500235, "grad_norm": 0.330078125, "learning_rate": 0.00012581343479159345, "loss": 0.4079, "step": 46990 }, { "epoch": 1.1929027426990393, "grad_norm": 0.341796875, "learning_rate": 0.0001257806426906828, "loss": 0.4278, "step": 46995 }, { "epoch": 1.193029660748055, "grad_norm": 0.353515625, "learning_rate": 0.00012574785177820637, "loss": 0.435, "step": 47000 }, { "epoch": 1.1931565787970708, "grad_norm": 0.349609375, "learning_rate": 0.00012571506205577316, "loss": 0.4057, "step": 47005 }, { "epoch": 1.1932834968460866, "grad_norm": 0.337890625, "learning_rate": 0.00012568227352499221, "loss": 0.3991, "step": 47010 }, { "epoch": 1.1934104148951024, "grad_norm": 0.333984375, "learning_rate": 0.0001256494861874724, "loss": 0.4017, "step": 47015 }, { "epoch": 1.193537332944118, "grad_norm": 0.3359375, "learning_rate": 0.0001256167000448226, "loss": 0.4358, "step": 47020 }, { "epoch": 1.1936642509931337, "grad_norm": 0.34765625, "learning_rate": 0.00012558391509865163, "loss": 0.4219, "step": 47025 }, { "epoch": 1.1937911690421494, "grad_norm": 0.345703125, "learning_rate": 0.0001255511313505682, "loss": 0.4244, "step": 47030 }, { "epoch": 1.1939180870911652, "grad_norm": 0.3671875, "learning_rate": 0.00012551834880218104, "loss": 0.3951, "step": 47035 }, { "epoch": 1.194045005140181, "grad_norm": 0.333984375, "learning_rate": 0.00012548556745509875, "loss": 0.4124, "step": 47040 }, { "epoch": 1.1941719231891967, "grad_norm": 0.353515625, "learning_rate": 0.00012545278731092985, "loss": 0.4295, "step": 47045 }, { "epoch": 1.1942988412382125, "grad_norm": 0.359375, "learning_rate": 0.00012542000837128294, "loss": 0.403, "step": 47050 }, { "epoch": 1.1944257592872283, "grad_norm": 0.33984375, "learning_rate": 0.00012538723063776642, "loss": 0.4108, "step": 47055 }, { "epoch": 1.194552677336244, "grad_norm": 0.341796875, "learning_rate": 0.00012535445411198867, "loss": 0.421, "step": 47060 }, { "epoch": 1.1946795953852598, "grad_norm": 0.330078125, "learning_rate": 0.00012532167879555808, "loss": 0.4228, "step": 47065 }, { "epoch": 1.1948065134342756, "grad_norm": 0.3515625, "learning_rate": 0.00012528890469008287, "loss": 0.3991, "step": 47070 }, { "epoch": 1.1949334314832911, "grad_norm": 0.330078125, "learning_rate": 0.00012525613179717126, "loss": 0.3949, "step": 47075 }, { "epoch": 1.1950603495323069, "grad_norm": 0.349609375, "learning_rate": 0.0001252233601184314, "loss": 0.4067, "step": 47080 }, { "epoch": 1.1951872675813227, "grad_norm": 0.345703125, "learning_rate": 0.0001251905896554714, "loss": 0.4045, "step": 47085 }, { "epoch": 1.1953141856303384, "grad_norm": 0.328125, "learning_rate": 0.00012515782040989926, "loss": 0.4042, "step": 47090 }, { "epoch": 1.1954411036793542, "grad_norm": 0.369140625, "learning_rate": 0.000125125052383323, "loss": 0.4528, "step": 47095 }, { "epoch": 1.19556802172837, "grad_norm": 0.318359375, "learning_rate": 0.00012509228557735052, "loss": 0.3923, "step": 47100 }, { "epoch": 1.1956949397773857, "grad_norm": 0.380859375, "learning_rate": 0.00012505951999358965, "loss": 0.4363, "step": 47105 }, { "epoch": 1.1958218578264015, "grad_norm": 0.380859375, "learning_rate": 0.00012502675563364813, "loss": 0.4212, "step": 47110 }, { "epoch": 1.1959487758754173, "grad_norm": 0.361328125, "learning_rate": 0.00012499399249913385, "loss": 0.3953, "step": 47115 }, { "epoch": 1.196075693924433, "grad_norm": 0.33984375, "learning_rate": 0.0001249612305916544, "loss": 0.377, "step": 47120 }, { "epoch": 1.1962026119734488, "grad_norm": 0.3359375, "learning_rate": 0.00012492846991281735, "loss": 0.4046, "step": 47125 }, { "epoch": 1.1963295300224646, "grad_norm": 0.330078125, "learning_rate": 0.00012489571046423034, "loss": 0.4081, "step": 47130 }, { "epoch": 1.1964564480714803, "grad_norm": 0.369140625, "learning_rate": 0.0001248629522475008, "loss": 0.4338, "step": 47135 }, { "epoch": 1.196583366120496, "grad_norm": 0.353515625, "learning_rate": 0.00012483019526423613, "loss": 0.4311, "step": 47140 }, { "epoch": 1.1967102841695119, "grad_norm": 0.361328125, "learning_rate": 0.00012479743951604382, "loss": 0.4338, "step": 47145 }, { "epoch": 1.1968372022185274, "grad_norm": 0.326171875, "learning_rate": 0.00012476468500453107, "loss": 0.4095, "step": 47150 }, { "epoch": 1.1969641202675432, "grad_norm": 0.337890625, "learning_rate": 0.0001247319317313052, "loss": 0.419, "step": 47155 }, { "epoch": 1.197091038316559, "grad_norm": 0.345703125, "learning_rate": 0.00012469917969797339, "loss": 0.4384, "step": 47160 }, { "epoch": 1.1972179563655747, "grad_norm": 0.3359375, "learning_rate": 0.00012466642890614273, "loss": 0.4159, "step": 47165 }, { "epoch": 1.1973448744145905, "grad_norm": 0.337890625, "learning_rate": 0.00012463367935742025, "loss": 0.4145, "step": 47170 }, { "epoch": 1.1974717924636062, "grad_norm": 0.384765625, "learning_rate": 0.0001246009310534131, "loss": 0.4326, "step": 47175 }, { "epoch": 1.197598710512622, "grad_norm": 0.326171875, "learning_rate": 0.00012456818399572814, "loss": 0.3967, "step": 47180 }, { "epoch": 1.1977256285616378, "grad_norm": 0.361328125, "learning_rate": 0.0001245354381859722, "loss": 0.4047, "step": 47185 }, { "epoch": 1.1978525466106535, "grad_norm": 0.337890625, "learning_rate": 0.00012450269362575223, "loss": 0.4036, "step": 47190 }, { "epoch": 1.1979794646596693, "grad_norm": 0.337890625, "learning_rate": 0.00012446995031667492, "loss": 0.4077, "step": 47195 }, { "epoch": 1.198106382708685, "grad_norm": 0.37890625, "learning_rate": 0.00012443720826034698, "loss": 0.4313, "step": 47200 }, { "epoch": 1.1982333007577008, "grad_norm": 0.357421875, "learning_rate": 0.00012440446745837504, "loss": 0.3944, "step": 47205 }, { "epoch": 1.1983602188067164, "grad_norm": 0.33203125, "learning_rate": 0.00012437172791236568, "loss": 0.3957, "step": 47210 }, { "epoch": 1.1984871368557322, "grad_norm": 0.3359375, "learning_rate": 0.0001243389896239254, "loss": 0.4095, "step": 47215 }, { "epoch": 1.198614054904748, "grad_norm": 0.3359375, "learning_rate": 0.0001243062525946607, "loss": 0.4228, "step": 47220 }, { "epoch": 1.1987409729537637, "grad_norm": 0.357421875, "learning_rate": 0.00012427351682617799, "loss": 0.409, "step": 47225 }, { "epoch": 1.1988678910027795, "grad_norm": 0.35546875, "learning_rate": 0.00012424078232008351, "loss": 0.4169, "step": 47230 }, { "epoch": 1.1989948090517952, "grad_norm": 0.369140625, "learning_rate": 0.00012420804907798354, "loss": 0.4117, "step": 47235 }, { "epoch": 1.199121727100811, "grad_norm": 0.333984375, "learning_rate": 0.00012417531710148442, "loss": 0.413, "step": 47240 }, { "epoch": 1.1992486451498268, "grad_norm": 0.361328125, "learning_rate": 0.00012414258639219215, "loss": 0.4013, "step": 47245 }, { "epoch": 1.1993755631988425, "grad_norm": 0.36328125, "learning_rate": 0.0001241098569517129, "loss": 0.4142, "step": 47250 }, { "epoch": 1.1995024812478583, "grad_norm": 0.359375, "learning_rate": 0.00012407712878165263, "loss": 0.4214, "step": 47255 }, { "epoch": 1.199629399296874, "grad_norm": 0.357421875, "learning_rate": 0.0001240444018836173, "loss": 0.4213, "step": 47260 }, { "epoch": 1.1997563173458898, "grad_norm": 0.349609375, "learning_rate": 0.0001240116762592129, "loss": 0.4253, "step": 47265 }, { "epoch": 1.1998832353949056, "grad_norm": 0.328125, "learning_rate": 0.00012397895191004516, "loss": 0.4275, "step": 47270 }, { "epoch": 1.2000101534439214, "grad_norm": 0.34375, "learning_rate": 0.00012394622883771987, "loss": 0.4201, "step": 47275 }, { "epoch": 1.2001370714929371, "grad_norm": 0.357421875, "learning_rate": 0.00012391350704384278, "loss": 0.4319, "step": 47280 }, { "epoch": 1.2002639895419527, "grad_norm": 0.35546875, "learning_rate": 0.00012388078653001949, "loss": 0.423, "step": 47285 }, { "epoch": 1.2003909075909684, "grad_norm": 0.322265625, "learning_rate": 0.00012384806729785557, "loss": 0.4091, "step": 47290 }, { "epoch": 1.2005178256399842, "grad_norm": 0.3671875, "learning_rate": 0.00012381534934895658, "loss": 0.435, "step": 47295 }, { "epoch": 1.200644743689, "grad_norm": 0.37109375, "learning_rate": 0.000123782632684928, "loss": 0.4272, "step": 47300 }, { "epoch": 1.2007716617380157, "grad_norm": 0.359375, "learning_rate": 0.00012374991730737514, "loss": 0.3815, "step": 47305 }, { "epoch": 1.2008985797870315, "grad_norm": 0.357421875, "learning_rate": 0.00012371720321790344, "loss": 0.4159, "step": 47310 }, { "epoch": 1.2010254978360473, "grad_norm": 0.35546875, "learning_rate": 0.0001236844904181181, "loss": 0.4111, "step": 47315 }, { "epoch": 1.201152415885063, "grad_norm": 0.373046875, "learning_rate": 0.00012365177890962434, "loss": 0.4291, "step": 47320 }, { "epoch": 1.2012793339340788, "grad_norm": 0.365234375, "learning_rate": 0.00012361906869402727, "loss": 0.4437, "step": 47325 }, { "epoch": 1.2014062519830946, "grad_norm": 0.35546875, "learning_rate": 0.00012358635977293203, "loss": 0.4068, "step": 47330 }, { "epoch": 1.2015331700321104, "grad_norm": 0.34765625, "learning_rate": 0.00012355365214794354, "loss": 0.4183, "step": 47335 }, { "epoch": 1.201660088081126, "grad_norm": 0.32421875, "learning_rate": 0.0001235209458206668, "loss": 0.4042, "step": 47340 }, { "epoch": 1.2017870061301417, "grad_norm": 0.36328125, "learning_rate": 0.00012348824079270673, "loss": 0.44, "step": 47345 }, { "epoch": 1.2019139241791574, "grad_norm": 0.3359375, "learning_rate": 0.00012345553706566813, "loss": 0.4228, "step": 47350 }, { "epoch": 1.2020408422281732, "grad_norm": 0.345703125, "learning_rate": 0.00012342283464115576, "loss": 0.4178, "step": 47355 }, { "epoch": 1.202167760277189, "grad_norm": 0.33984375, "learning_rate": 0.0001233901335207742, "loss": 0.4149, "step": 47360 }, { "epoch": 1.2022946783262047, "grad_norm": 0.380859375, "learning_rate": 0.00012335743370612832, "loss": 0.4225, "step": 47365 }, { "epoch": 1.2024215963752205, "grad_norm": 0.37109375, "learning_rate": 0.00012332473519882252, "loss": 0.4483, "step": 47370 }, { "epoch": 1.2025485144242363, "grad_norm": 0.333984375, "learning_rate": 0.00012329203800046134, "loss": 0.4164, "step": 47375 }, { "epoch": 1.202675432473252, "grad_norm": 0.365234375, "learning_rate": 0.0001232593421126492, "loss": 0.4214, "step": 47380 }, { "epoch": 1.2028023505222678, "grad_norm": 0.357421875, "learning_rate": 0.0001232266475369905, "loss": 0.4245, "step": 47385 }, { "epoch": 1.2029292685712836, "grad_norm": 0.337890625, "learning_rate": 0.00012319395427508954, "loss": 0.3877, "step": 47390 }, { "epoch": 1.2030561866202993, "grad_norm": 0.34375, "learning_rate": 0.00012316126232855056, "loss": 0.3994, "step": 47395 }, { "epoch": 1.203183104669315, "grad_norm": 0.357421875, "learning_rate": 0.00012312857169897775, "loss": 0.4236, "step": 47400 }, { "epoch": 1.2033100227183309, "grad_norm": 0.369140625, "learning_rate": 0.00012309588238797525, "loss": 0.4069, "step": 47405 }, { "epoch": 1.2034369407673466, "grad_norm": 0.349609375, "learning_rate": 0.00012306319439714706, "loss": 0.4213, "step": 47410 }, { "epoch": 1.2035638588163622, "grad_norm": 0.3203125, "learning_rate": 0.00012303050772809716, "loss": 0.3994, "step": 47415 }, { "epoch": 1.203690776865378, "grad_norm": 0.33203125, "learning_rate": 0.00012299782238242953, "loss": 0.3718, "step": 47420 }, { "epoch": 1.2038176949143937, "grad_norm": 0.37109375, "learning_rate": 0.00012296513836174797, "loss": 0.4116, "step": 47425 }, { "epoch": 1.2039446129634095, "grad_norm": 0.3515625, "learning_rate": 0.0001229324556676563, "loss": 0.4118, "step": 47430 }, { "epoch": 1.2040715310124253, "grad_norm": 0.34765625, "learning_rate": 0.0001228997743017583, "loss": 0.3992, "step": 47435 }, { "epoch": 1.204198449061441, "grad_norm": 0.326171875, "learning_rate": 0.00012286709426565756, "loss": 0.4137, "step": 47440 }, { "epoch": 1.2043253671104568, "grad_norm": 0.3671875, "learning_rate": 0.0001228344155609577, "loss": 0.4406, "step": 47445 }, { "epoch": 1.2044522851594726, "grad_norm": 0.359375, "learning_rate": 0.00012280173818926226, "loss": 0.4368, "step": 47450 }, { "epoch": 1.2045792032084883, "grad_norm": 0.33984375, "learning_rate": 0.00012276906215217469, "loss": 0.3966, "step": 47455 }, { "epoch": 1.204706121257504, "grad_norm": 0.373046875, "learning_rate": 0.00012273638745129834, "loss": 0.4096, "step": 47460 }, { "epoch": 1.2048330393065199, "grad_norm": 0.349609375, "learning_rate": 0.00012270371408823667, "loss": 0.4125, "step": 47465 }, { "epoch": 1.2049599573555356, "grad_norm": 0.33203125, "learning_rate": 0.00012267104206459286, "loss": 0.3988, "step": 47470 }, { "epoch": 1.2050868754045512, "grad_norm": 0.3515625, "learning_rate": 0.0001226383713819701, "loss": 0.4241, "step": 47475 }, { "epoch": 1.205213793453567, "grad_norm": 0.341796875, "learning_rate": 0.0001226057020419716, "loss": 0.4392, "step": 47480 }, { "epoch": 1.2053407115025827, "grad_norm": 0.365234375, "learning_rate": 0.0001225730340462003, "loss": 0.4098, "step": 47485 }, { "epoch": 1.2054676295515985, "grad_norm": 0.34765625, "learning_rate": 0.00012254036739625936, "loss": 0.416, "step": 47490 }, { "epoch": 1.2055945476006142, "grad_norm": 0.328125, "learning_rate": 0.00012250770209375162, "loss": 0.405, "step": 47495 }, { "epoch": 1.20572146564963, "grad_norm": 0.3359375, "learning_rate": 0.00012247503814028003, "loss": 0.4279, "step": 47500 }, { "epoch": 1.2058483836986458, "grad_norm": 0.326171875, "learning_rate": 0.0001224423755374473, "loss": 0.421, "step": 47505 }, { "epoch": 1.2059753017476615, "grad_norm": 0.36328125, "learning_rate": 0.00012240971428685628, "loss": 0.4347, "step": 47510 }, { "epoch": 1.2061022197966773, "grad_norm": 0.359375, "learning_rate": 0.0001223770543901096, "loss": 0.3946, "step": 47515 }, { "epoch": 1.206229137845693, "grad_norm": 0.34375, "learning_rate": 0.00012234439584880982, "loss": 0.4191, "step": 47520 }, { "epoch": 1.2063560558947088, "grad_norm": 0.333984375, "learning_rate": 0.00012231173866455956, "loss": 0.4336, "step": 47525 }, { "epoch": 1.2064829739437246, "grad_norm": 0.353515625, "learning_rate": 0.0001222790828389612, "loss": 0.4299, "step": 47530 }, { "epoch": 1.2066098919927404, "grad_norm": 0.34765625, "learning_rate": 0.0001222464283736172, "loss": 0.4304, "step": 47535 }, { "epoch": 1.2067368100417561, "grad_norm": 0.380859375, "learning_rate": 0.00012221377527012997, "loss": 0.4526, "step": 47540 }, { "epoch": 1.2068637280907717, "grad_norm": 0.322265625, "learning_rate": 0.0001221811235301017, "loss": 0.3958, "step": 47545 }, { "epoch": 1.2069906461397875, "grad_norm": 0.369140625, "learning_rate": 0.00012214847315513452, "loss": 0.4273, "step": 47550 }, { "epoch": 1.2071175641888032, "grad_norm": 0.345703125, "learning_rate": 0.00012211582414683076, "loss": 0.4343, "step": 47555 }, { "epoch": 1.207244482237819, "grad_norm": 0.330078125, "learning_rate": 0.00012208317650679238, "loss": 0.4306, "step": 47560 }, { "epoch": 1.2073714002868348, "grad_norm": 0.345703125, "learning_rate": 0.00012205053023662141, "loss": 0.4285, "step": 47565 }, { "epoch": 1.2074983183358505, "grad_norm": 0.349609375, "learning_rate": 0.00012201788533791982, "loss": 0.4247, "step": 47570 }, { "epoch": 1.2076252363848663, "grad_norm": 0.349609375, "learning_rate": 0.00012198524181228942, "loss": 0.4348, "step": 47575 }, { "epoch": 1.207752154433882, "grad_norm": 0.373046875, "learning_rate": 0.00012195259966133205, "loss": 0.4066, "step": 47580 }, { "epoch": 1.2078790724828978, "grad_norm": 0.357421875, "learning_rate": 0.00012191995888664943, "loss": 0.4119, "step": 47585 }, { "epoch": 1.2080059905319136, "grad_norm": 0.345703125, "learning_rate": 0.00012188731948984325, "loss": 0.4064, "step": 47590 }, { "epoch": 1.2081329085809294, "grad_norm": 0.384765625, "learning_rate": 0.00012185468147251513, "loss": 0.4427, "step": 47595 }, { "epoch": 1.2082598266299451, "grad_norm": 0.337890625, "learning_rate": 0.00012182204483626654, "loss": 0.396, "step": 47600 }, { "epoch": 1.2083867446789607, "grad_norm": 0.3671875, "learning_rate": 0.00012178940958269901, "loss": 0.4143, "step": 47605 }, { "epoch": 1.2085136627279764, "grad_norm": 0.34765625, "learning_rate": 0.00012175677571341385, "loss": 0.44, "step": 47610 }, { "epoch": 1.2086405807769922, "grad_norm": 0.380859375, "learning_rate": 0.00012172414323001252, "loss": 0.4244, "step": 47615 }, { "epoch": 1.208767498826008, "grad_norm": 0.37890625, "learning_rate": 0.00012169151213409621, "loss": 0.4022, "step": 47620 }, { "epoch": 1.2088944168750237, "grad_norm": 0.345703125, "learning_rate": 0.00012165888242726614, "loss": 0.4089, "step": 47625 }, { "epoch": 1.2090213349240395, "grad_norm": 0.33203125, "learning_rate": 0.00012162625411112337, "loss": 0.3963, "step": 47630 }, { "epoch": 1.2091482529730553, "grad_norm": 0.32421875, "learning_rate": 0.00012159362718726904, "loss": 0.4246, "step": 47635 }, { "epoch": 1.209275171022071, "grad_norm": 0.361328125, "learning_rate": 0.00012156100165730412, "loss": 0.4403, "step": 47640 }, { "epoch": 1.2094020890710868, "grad_norm": 0.3359375, "learning_rate": 0.00012152837752282952, "loss": 0.409, "step": 47645 }, { "epoch": 1.2095290071201026, "grad_norm": 0.341796875, "learning_rate": 0.00012149575478544609, "loss": 0.4126, "step": 47650 }, { "epoch": 1.2096559251691184, "grad_norm": 0.353515625, "learning_rate": 0.00012146313344675462, "loss": 0.4339, "step": 47655 }, { "epoch": 1.2097828432181341, "grad_norm": 0.349609375, "learning_rate": 0.00012143051350835579, "loss": 0.3962, "step": 47660 }, { "epoch": 1.2099097612671499, "grad_norm": 0.3359375, "learning_rate": 0.00012139789497185032, "loss": 0.4066, "step": 47665 }, { "epoch": 1.2100366793161657, "grad_norm": 0.318359375, "learning_rate": 0.00012136527783883873, "loss": 0.403, "step": 47670 }, { "epoch": 1.2101635973651814, "grad_norm": 0.376953125, "learning_rate": 0.0001213326621109215, "loss": 0.4429, "step": 47675 }, { "epoch": 1.210290515414197, "grad_norm": 0.34765625, "learning_rate": 0.00012130004778969918, "loss": 0.4373, "step": 47680 }, { "epoch": 1.2104174334632127, "grad_norm": 0.330078125, "learning_rate": 0.0001212674348767721, "loss": 0.4239, "step": 47685 }, { "epoch": 1.2105443515122285, "grad_norm": 0.361328125, "learning_rate": 0.00012123482337374052, "loss": 0.4114, "step": 47690 }, { "epoch": 1.2106712695612443, "grad_norm": 0.3515625, "learning_rate": 0.00012120221328220472, "loss": 0.4244, "step": 47695 }, { "epoch": 1.21079818761026, "grad_norm": 0.34375, "learning_rate": 0.00012116960460376483, "loss": 0.428, "step": 47700 }, { "epoch": 1.2109251056592758, "grad_norm": 0.341796875, "learning_rate": 0.00012113699734002093, "loss": 0.4446, "step": 47705 }, { "epoch": 1.2110520237082916, "grad_norm": 0.3671875, "learning_rate": 0.00012110439149257311, "loss": 0.4398, "step": 47710 }, { "epoch": 1.2111789417573073, "grad_norm": 0.341796875, "learning_rate": 0.00012107178706302129, "loss": 0.4262, "step": 47715 }, { "epoch": 1.211305859806323, "grad_norm": 0.369140625, "learning_rate": 0.00012103918405296534, "loss": 0.4236, "step": 47720 }, { "epoch": 1.2114327778553389, "grad_norm": 0.341796875, "learning_rate": 0.0001210065824640051, "loss": 0.4353, "step": 47725 }, { "epoch": 1.2115596959043546, "grad_norm": 0.33984375, "learning_rate": 0.00012097398229774032, "loss": 0.4273, "step": 47730 }, { "epoch": 1.2116866139533702, "grad_norm": 0.353515625, "learning_rate": 0.00012094138355577063, "loss": 0.4389, "step": 47735 }, { "epoch": 1.211813532002386, "grad_norm": 0.34375, "learning_rate": 0.00012090878623969567, "loss": 0.4342, "step": 47740 }, { "epoch": 1.2119404500514017, "grad_norm": 0.376953125, "learning_rate": 0.00012087619035111501, "loss": 0.4207, "step": 47745 }, { "epoch": 1.2120673681004175, "grad_norm": 0.330078125, "learning_rate": 0.00012084359589162807, "loss": 0.4392, "step": 47750 }, { "epoch": 1.2121942861494333, "grad_norm": 0.37109375, "learning_rate": 0.00012081100286283427, "loss": 0.4395, "step": 47755 }, { "epoch": 1.212321204198449, "grad_norm": 0.345703125, "learning_rate": 0.00012077841126633295, "loss": 0.4158, "step": 47760 }, { "epoch": 1.2124481222474648, "grad_norm": 0.33984375, "learning_rate": 0.00012074582110372334, "loss": 0.4104, "step": 47765 }, { "epoch": 1.2125750402964806, "grad_norm": 0.375, "learning_rate": 0.00012071323237660465, "loss": 0.437, "step": 47770 }, { "epoch": 1.2127019583454963, "grad_norm": 0.333984375, "learning_rate": 0.00012068064508657598, "loss": 0.4151, "step": 47775 }, { "epoch": 1.212828876394512, "grad_norm": 0.359375, "learning_rate": 0.00012064805923523636, "loss": 0.4265, "step": 47780 }, { "epoch": 1.2129557944435279, "grad_norm": 0.3828125, "learning_rate": 0.00012061547482418477, "loss": 0.4346, "step": 47785 }, { "epoch": 1.2130827124925436, "grad_norm": 0.34765625, "learning_rate": 0.00012058289185502015, "loss": 0.4188, "step": 47790 }, { "epoch": 1.2132096305415594, "grad_norm": 0.34765625, "learning_rate": 0.00012055031032934129, "loss": 0.4259, "step": 47795 }, { "epoch": 1.2133365485905752, "grad_norm": 0.337890625, "learning_rate": 0.00012051773024874693, "loss": 0.4191, "step": 47800 }, { "epoch": 1.213463466639591, "grad_norm": 0.373046875, "learning_rate": 0.00012048515161483586, "loss": 0.407, "step": 47805 }, { "epoch": 1.2135903846886065, "grad_norm": 0.36328125, "learning_rate": 0.00012045257442920664, "loss": 0.4099, "step": 47810 }, { "epoch": 1.2137173027376222, "grad_norm": 0.330078125, "learning_rate": 0.00012041999869345782, "loss": 0.4094, "step": 47815 }, { "epoch": 1.213844220786638, "grad_norm": 0.337890625, "learning_rate": 0.0001203874244091879, "loss": 0.4374, "step": 47820 }, { "epoch": 1.2139711388356538, "grad_norm": 0.37109375, "learning_rate": 0.00012035485157799525, "loss": 0.4278, "step": 47825 }, { "epoch": 1.2140980568846695, "grad_norm": 0.34375, "learning_rate": 0.0001203222802014782, "loss": 0.3956, "step": 47830 }, { "epoch": 1.2142249749336853, "grad_norm": 0.318359375, "learning_rate": 0.00012028971028123507, "loss": 0.4174, "step": 47835 }, { "epoch": 1.214351892982701, "grad_norm": 0.35546875, "learning_rate": 0.00012025714181886402, "loss": 0.4253, "step": 47840 }, { "epoch": 1.2144788110317168, "grad_norm": 0.32421875, "learning_rate": 0.00012022457481596317, "loss": 0.4195, "step": 47845 }, { "epoch": 1.2146057290807326, "grad_norm": 0.349609375, "learning_rate": 0.00012019200927413058, "loss": 0.4254, "step": 47850 }, { "epoch": 1.2147326471297484, "grad_norm": 0.349609375, "learning_rate": 0.00012015944519496421, "loss": 0.4315, "step": 47855 }, { "epoch": 1.2148595651787641, "grad_norm": 0.32421875, "learning_rate": 0.00012012688258006195, "loss": 0.379, "step": 47860 }, { "epoch": 1.21498648322778, "grad_norm": 0.30859375, "learning_rate": 0.00012009432143102168, "loss": 0.4089, "step": 47865 }, { "epoch": 1.2151134012767955, "grad_norm": 0.357421875, "learning_rate": 0.00012006176174944115, "loss": 0.4327, "step": 47870 }, { "epoch": 1.2152403193258112, "grad_norm": 0.33203125, "learning_rate": 0.00012002920353691802, "loss": 0.4111, "step": 47875 }, { "epoch": 1.215367237374827, "grad_norm": 0.369140625, "learning_rate": 0.00011999664679504997, "loss": 0.4338, "step": 47880 }, { "epoch": 1.2154941554238428, "grad_norm": 0.345703125, "learning_rate": 0.00011996409152543452, "loss": 0.4143, "step": 47885 }, { "epoch": 1.2156210734728585, "grad_norm": 0.353515625, "learning_rate": 0.00011993153772966912, "loss": 0.4391, "step": 47890 }, { "epoch": 1.2157479915218743, "grad_norm": 0.3671875, "learning_rate": 0.0001198989854093512, "loss": 0.4261, "step": 47895 }, { "epoch": 1.21587490957089, "grad_norm": 0.353515625, "learning_rate": 0.00011986643456607808, "loss": 0.4311, "step": 47900 }, { "epoch": 1.2160018276199058, "grad_norm": 0.345703125, "learning_rate": 0.00011983388520144695, "loss": 0.3979, "step": 47905 }, { "epoch": 1.2161287456689216, "grad_norm": 0.3046875, "learning_rate": 0.00011980133731705511, "loss": 0.4248, "step": 47910 }, { "epoch": 1.2162556637179374, "grad_norm": 0.369140625, "learning_rate": 0.0001197687909144996, "loss": 0.435, "step": 47915 }, { "epoch": 1.2163825817669531, "grad_norm": 0.353515625, "learning_rate": 0.0001197362459953775, "loss": 0.4298, "step": 47920 }, { "epoch": 1.216509499815969, "grad_norm": 0.373046875, "learning_rate": 0.00011970370256128568, "loss": 0.4229, "step": 47925 }, { "epoch": 1.2166364178649847, "grad_norm": 0.326171875, "learning_rate": 0.00011967116061382118, "loss": 0.406, "step": 47930 }, { "epoch": 1.2167633359140004, "grad_norm": 0.349609375, "learning_rate": 0.00011963862015458075, "loss": 0.4394, "step": 47935 }, { "epoch": 1.2168902539630162, "grad_norm": 0.357421875, "learning_rate": 0.00011960608118516111, "loss": 0.4222, "step": 47940 }, { "epoch": 1.2170171720120317, "grad_norm": 0.33984375, "learning_rate": 0.00011957354370715898, "loss": 0.3807, "step": 47945 }, { "epoch": 1.2171440900610475, "grad_norm": 0.33203125, "learning_rate": 0.00011954100772217088, "loss": 0.3905, "step": 47950 }, { "epoch": 1.2172710081100633, "grad_norm": 0.333984375, "learning_rate": 0.00011950847323179347, "loss": 0.4173, "step": 47955 }, { "epoch": 1.217397926159079, "grad_norm": 0.337890625, "learning_rate": 0.00011947594023762311, "loss": 0.4195, "step": 47960 }, { "epoch": 1.2175248442080948, "grad_norm": 0.353515625, "learning_rate": 0.00011944340874125622, "loss": 0.3936, "step": 47965 }, { "epoch": 1.2176517622571106, "grad_norm": 0.365234375, "learning_rate": 0.00011941087874428908, "loss": 0.4438, "step": 47970 }, { "epoch": 1.2177786803061263, "grad_norm": 0.357421875, "learning_rate": 0.00011937835024831793, "loss": 0.43, "step": 47975 }, { "epoch": 1.2179055983551421, "grad_norm": 0.34765625, "learning_rate": 0.0001193458232549389, "loss": 0.4349, "step": 47980 }, { "epoch": 1.2180325164041579, "grad_norm": 0.361328125, "learning_rate": 0.00011931329776574815, "loss": 0.4237, "step": 47985 }, { "epoch": 1.2181594344531737, "grad_norm": 0.3671875, "learning_rate": 0.00011928077378234164, "loss": 0.4259, "step": 47990 }, { "epoch": 1.2182863525021894, "grad_norm": 0.3359375, "learning_rate": 0.00011924825130631531, "loss": 0.4191, "step": 47995 }, { "epoch": 1.218413270551205, "grad_norm": 0.37890625, "learning_rate": 0.00011921573033926506, "loss": 0.4299, "step": 48000 }, { "epoch": 1.2185401886002207, "grad_norm": 0.376953125, "learning_rate": 0.00011918321088278667, "loss": 0.4312, "step": 48005 }, { "epoch": 1.2186671066492365, "grad_norm": 0.349609375, "learning_rate": 0.00011915069293847586, "loss": 0.3926, "step": 48010 }, { "epoch": 1.2187940246982523, "grad_norm": 0.35546875, "learning_rate": 0.00011911817650792823, "loss": 0.4394, "step": 48015 }, { "epoch": 1.218920942747268, "grad_norm": 0.34765625, "learning_rate": 0.00011908566159273939, "loss": 0.431, "step": 48020 }, { "epoch": 1.2190478607962838, "grad_norm": 0.322265625, "learning_rate": 0.00011905314819450482, "loss": 0.4197, "step": 48025 }, { "epoch": 1.2191747788452996, "grad_norm": 0.34375, "learning_rate": 0.00011902063631481992, "loss": 0.444, "step": 48030 }, { "epoch": 1.2193016968943153, "grad_norm": 0.345703125, "learning_rate": 0.00011898812595528008, "loss": 0.4027, "step": 48035 }, { "epoch": 1.219428614943331, "grad_norm": 0.357421875, "learning_rate": 0.00011895561711748055, "loss": 0.4137, "step": 48040 }, { "epoch": 1.2195555329923469, "grad_norm": 0.302734375, "learning_rate": 0.00011892310980301652, "loss": 0.3758, "step": 48045 }, { "epoch": 1.2196824510413626, "grad_norm": 0.376953125, "learning_rate": 0.0001188906040134831, "loss": 0.3999, "step": 48050 }, { "epoch": 1.2198093690903784, "grad_norm": 0.33984375, "learning_rate": 0.00011885809975047529, "loss": 0.4312, "step": 48055 }, { "epoch": 1.2199362871393942, "grad_norm": 0.333984375, "learning_rate": 0.00011882559701558819, "loss": 0.4237, "step": 48060 }, { "epoch": 1.22006320518841, "grad_norm": 0.345703125, "learning_rate": 0.00011879309581041663, "loss": 0.434, "step": 48065 }, { "epoch": 1.2201901232374257, "grad_norm": 0.357421875, "learning_rate": 0.00011876059613655542, "loss": 0.4366, "step": 48070 }, { "epoch": 1.2203170412864413, "grad_norm": 0.3671875, "learning_rate": 0.0001187280979955993, "loss": 0.4372, "step": 48075 }, { "epoch": 1.220443959335457, "grad_norm": 0.357421875, "learning_rate": 0.00011869560138914295, "loss": 0.4141, "step": 48080 }, { "epoch": 1.2205708773844728, "grad_norm": 0.34375, "learning_rate": 0.00011866310631878101, "loss": 0.4126, "step": 48085 }, { "epoch": 1.2206977954334886, "grad_norm": 0.357421875, "learning_rate": 0.00011863061278610794, "loss": 0.4288, "step": 48090 }, { "epoch": 1.2208247134825043, "grad_norm": 0.333984375, "learning_rate": 0.0001185981207927182, "loss": 0.4043, "step": 48095 }, { "epoch": 1.22095163153152, "grad_norm": 0.3515625, "learning_rate": 0.00011856563034020617, "loss": 0.4303, "step": 48100 }, { "epoch": 1.2210785495805359, "grad_norm": 0.287109375, "learning_rate": 0.0001185331414301661, "loss": 0.38, "step": 48105 }, { "epoch": 1.2212054676295516, "grad_norm": 0.349609375, "learning_rate": 0.00011850065406419229, "loss": 0.432, "step": 48110 }, { "epoch": 1.2213323856785674, "grad_norm": 0.3515625, "learning_rate": 0.0001184681682438788, "loss": 0.4348, "step": 48115 }, { "epoch": 1.2214593037275832, "grad_norm": 0.349609375, "learning_rate": 0.00011843568397081973, "loss": 0.4365, "step": 48120 }, { "epoch": 1.221586221776599, "grad_norm": 0.365234375, "learning_rate": 0.0001184032012466091, "loss": 0.4355, "step": 48125 }, { "epoch": 1.2217131398256147, "grad_norm": 0.34375, "learning_rate": 0.00011837072007284081, "loss": 0.4293, "step": 48130 }, { "epoch": 1.2218400578746302, "grad_norm": 0.376953125, "learning_rate": 0.0001183382404511087, "loss": 0.4389, "step": 48135 }, { "epoch": 1.221966975923646, "grad_norm": 0.353515625, "learning_rate": 0.0001183057623830065, "loss": 0.4431, "step": 48140 }, { "epoch": 1.2220938939726618, "grad_norm": 0.365234375, "learning_rate": 0.00011827328587012791, "loss": 0.3947, "step": 48145 }, { "epoch": 1.2222208120216775, "grad_norm": 0.33984375, "learning_rate": 0.00011824081091406652, "loss": 0.4359, "step": 48150 }, { "epoch": 1.2223477300706933, "grad_norm": 0.353515625, "learning_rate": 0.00011820833751641593, "loss": 0.4003, "step": 48155 }, { "epoch": 1.222474648119709, "grad_norm": 0.376953125, "learning_rate": 0.00011817586567876953, "loss": 0.4339, "step": 48160 }, { "epoch": 1.2226015661687248, "grad_norm": 0.35546875, "learning_rate": 0.00011814339540272075, "loss": 0.4204, "step": 48165 }, { "epoch": 1.2227284842177406, "grad_norm": 0.34375, "learning_rate": 0.00011811092668986285, "loss": 0.3905, "step": 48170 }, { "epoch": 1.2228554022667564, "grad_norm": 0.37109375, "learning_rate": 0.00011807845954178908, "loss": 0.4504, "step": 48175 }, { "epoch": 1.2229823203157721, "grad_norm": 0.384765625, "learning_rate": 0.00011804599396009252, "loss": 0.4056, "step": 48180 }, { "epoch": 1.223109238364788, "grad_norm": 0.341796875, "learning_rate": 0.00011801352994636638, "loss": 0.4265, "step": 48185 }, { "epoch": 1.2232361564138037, "grad_norm": 0.32421875, "learning_rate": 0.00011798106750220358, "loss": 0.4358, "step": 48190 }, { "epoch": 1.2233630744628194, "grad_norm": 0.32421875, "learning_rate": 0.000117948606629197, "loss": 0.4145, "step": 48195 }, { "epoch": 1.2234899925118352, "grad_norm": 0.33203125, "learning_rate": 0.00011791614732893958, "loss": 0.3959, "step": 48200 }, { "epoch": 1.223616910560851, "grad_norm": 0.345703125, "learning_rate": 0.000117883689603024, "loss": 0.4118, "step": 48205 }, { "epoch": 1.2237438286098665, "grad_norm": 0.357421875, "learning_rate": 0.00011785123345304301, "loss": 0.3991, "step": 48210 }, { "epoch": 1.2238707466588823, "grad_norm": 0.35546875, "learning_rate": 0.00011781877888058916, "loss": 0.4584, "step": 48215 }, { "epoch": 1.223997664707898, "grad_norm": 0.330078125, "learning_rate": 0.00011778632588725503, "loss": 0.4181, "step": 48220 }, { "epoch": 1.2241245827569138, "grad_norm": 0.3515625, "learning_rate": 0.00011775387447463304, "loss": 0.438, "step": 48225 }, { "epoch": 1.2242515008059296, "grad_norm": 0.369140625, "learning_rate": 0.00011772142464431557, "loss": 0.4025, "step": 48230 }, { "epoch": 1.2243784188549454, "grad_norm": 0.36328125, "learning_rate": 0.00011768897639789495, "loss": 0.4252, "step": 48235 }, { "epoch": 1.2245053369039611, "grad_norm": 0.36328125, "learning_rate": 0.0001176565297369634, "loss": 0.4106, "step": 48240 }, { "epoch": 1.224632254952977, "grad_norm": 0.34375, "learning_rate": 0.00011762408466311298, "loss": 0.4143, "step": 48245 }, { "epoch": 1.2247591730019927, "grad_norm": 0.349609375, "learning_rate": 0.00011759164117793592, "loss": 0.4048, "step": 48250 }, { "epoch": 1.2248860910510084, "grad_norm": 0.322265625, "learning_rate": 0.00011755919928302407, "loss": 0.4066, "step": 48255 }, { "epoch": 1.2250130091000242, "grad_norm": 0.349609375, "learning_rate": 0.00011752675897996943, "loss": 0.4121, "step": 48260 }, { "epoch": 1.2251399271490397, "grad_norm": 0.353515625, "learning_rate": 0.00011749432027036377, "loss": 0.4154, "step": 48265 }, { "epoch": 1.2252668451980555, "grad_norm": 0.361328125, "learning_rate": 0.00011746188315579888, "loss": 0.4184, "step": 48270 }, { "epoch": 1.2253937632470713, "grad_norm": 0.3515625, "learning_rate": 0.00011742944763786638, "loss": 0.4703, "step": 48275 }, { "epoch": 1.225520681296087, "grad_norm": 0.318359375, "learning_rate": 0.00011739701371815794, "loss": 0.4125, "step": 48280 }, { "epoch": 1.2256475993451028, "grad_norm": 0.34375, "learning_rate": 0.00011736458139826506, "loss": 0.41, "step": 48285 }, { "epoch": 1.2257745173941186, "grad_norm": 0.34765625, "learning_rate": 0.00011733215067977915, "loss": 0.4349, "step": 48290 }, { "epoch": 1.2259014354431343, "grad_norm": 0.33984375, "learning_rate": 0.00011729972156429162, "loss": 0.4205, "step": 48295 }, { "epoch": 1.2260283534921501, "grad_norm": 0.34765625, "learning_rate": 0.00011726729405339371, "loss": 0.4178, "step": 48300 }, { "epoch": 1.2261552715411659, "grad_norm": 0.345703125, "learning_rate": 0.00011723486814867659, "loss": 0.4044, "step": 48305 }, { "epoch": 1.2262821895901816, "grad_norm": 0.33984375, "learning_rate": 0.00011720244385173149, "loss": 0.4088, "step": 48310 }, { "epoch": 1.2264091076391974, "grad_norm": 0.34375, "learning_rate": 0.0001171700211641494, "loss": 0.4107, "step": 48315 }, { "epoch": 1.2265360256882132, "grad_norm": 0.330078125, "learning_rate": 0.00011713760008752129, "loss": 0.4074, "step": 48320 }, { "epoch": 1.226662943737229, "grad_norm": 0.357421875, "learning_rate": 0.00011710518062343805, "loss": 0.395, "step": 48325 }, { "epoch": 1.2267898617862447, "grad_norm": 0.361328125, "learning_rate": 0.00011707276277349054, "loss": 0.4223, "step": 48330 }, { "epoch": 1.2269167798352605, "grad_norm": 0.36328125, "learning_rate": 0.0001170403465392694, "loss": 0.4283, "step": 48335 }, { "epoch": 1.227043697884276, "grad_norm": 0.361328125, "learning_rate": 0.00011700793192236535, "loss": 0.4425, "step": 48340 }, { "epoch": 1.2271706159332918, "grad_norm": 0.34765625, "learning_rate": 0.00011697551892436893, "loss": 0.4115, "step": 48345 }, { "epoch": 1.2272975339823076, "grad_norm": 0.359375, "learning_rate": 0.0001169431075468706, "loss": 0.3903, "step": 48350 }, { "epoch": 1.2274244520313233, "grad_norm": 0.359375, "learning_rate": 0.00011691069779146086, "loss": 0.4387, "step": 48355 }, { "epoch": 1.227551370080339, "grad_norm": 0.412109375, "learning_rate": 0.00011687828965973, "loss": 0.4437, "step": 48360 }, { "epoch": 1.2276782881293549, "grad_norm": 0.34765625, "learning_rate": 0.00011684588315326826, "loss": 0.4073, "step": 48365 }, { "epoch": 1.2278052061783706, "grad_norm": 0.345703125, "learning_rate": 0.00011681347827366577, "loss": 0.4205, "step": 48370 }, { "epoch": 1.2279321242273864, "grad_norm": 0.34765625, "learning_rate": 0.00011678107502251276, "loss": 0.3852, "step": 48375 }, { "epoch": 1.2280590422764022, "grad_norm": 0.326171875, "learning_rate": 0.00011674867340139915, "loss": 0.3649, "step": 48380 }, { "epoch": 1.228185960325418, "grad_norm": 0.318359375, "learning_rate": 0.00011671627341191489, "loss": 0.4, "step": 48385 }, { "epoch": 1.2283128783744337, "grad_norm": 0.333984375, "learning_rate": 0.00011668387505564984, "loss": 0.3924, "step": 48390 }, { "epoch": 1.2284397964234495, "grad_norm": 0.353515625, "learning_rate": 0.00011665147833419372, "loss": 0.4095, "step": 48395 }, { "epoch": 1.228566714472465, "grad_norm": 0.357421875, "learning_rate": 0.00011661908324913635, "loss": 0.4127, "step": 48400 }, { "epoch": 1.2286936325214808, "grad_norm": 0.345703125, "learning_rate": 0.00011658668980206723, "loss": 0.3855, "step": 48405 }, { "epoch": 1.2288205505704966, "grad_norm": 0.33203125, "learning_rate": 0.00011655429799457595, "loss": 0.4274, "step": 48410 }, { "epoch": 1.2289474686195123, "grad_norm": 0.33203125, "learning_rate": 0.00011652190782825192, "loss": 0.4262, "step": 48415 }, { "epoch": 1.229074386668528, "grad_norm": 0.359375, "learning_rate": 0.00011648951930468457, "loss": 0.4264, "step": 48420 }, { "epoch": 1.2292013047175439, "grad_norm": 0.33984375, "learning_rate": 0.00011645713242546313, "loss": 0.3917, "step": 48425 }, { "epoch": 1.2293282227665596, "grad_norm": 0.345703125, "learning_rate": 0.00011642474719217679, "loss": 0.4223, "step": 48430 }, { "epoch": 1.2294551408155754, "grad_norm": 0.349609375, "learning_rate": 0.0001163923636064148, "loss": 0.4047, "step": 48435 }, { "epoch": 1.2295820588645912, "grad_norm": 0.3671875, "learning_rate": 0.0001163599816697661, "loss": 0.4223, "step": 48440 }, { "epoch": 1.229708976913607, "grad_norm": 0.35546875, "learning_rate": 0.00011632760138381974, "loss": 0.4144, "step": 48445 }, { "epoch": 1.2298358949626227, "grad_norm": 0.373046875, "learning_rate": 0.00011629522275016456, "loss": 0.4409, "step": 48450 }, { "epoch": 1.2299628130116385, "grad_norm": 0.359375, "learning_rate": 0.00011626284577038939, "loss": 0.4656, "step": 48455 }, { "epoch": 1.2300897310606542, "grad_norm": 0.373046875, "learning_rate": 0.0001162304704460829, "loss": 0.4682, "step": 48460 }, { "epoch": 1.23021664910967, "grad_norm": 0.328125, "learning_rate": 0.00011619809677883382, "loss": 0.4088, "step": 48465 }, { "epoch": 1.2303435671586858, "grad_norm": 0.34375, "learning_rate": 0.00011616572477023063, "loss": 0.4104, "step": 48470 }, { "epoch": 1.2304704852077013, "grad_norm": 2.9375, "learning_rate": 0.00011613335442186183, "loss": 0.375, "step": 48475 }, { "epoch": 1.230597403256717, "grad_norm": 0.34375, "learning_rate": 0.00011610098573531587, "loss": 0.404, "step": 48480 }, { "epoch": 1.2307243213057328, "grad_norm": 0.3515625, "learning_rate": 0.00011606861871218104, "loss": 0.4242, "step": 48485 }, { "epoch": 1.2308512393547486, "grad_norm": 0.33984375, "learning_rate": 0.00011603625335404557, "loss": 0.4152, "step": 48490 }, { "epoch": 1.2309781574037644, "grad_norm": 0.384765625, "learning_rate": 0.00011600388966249756, "loss": 0.4377, "step": 48495 }, { "epoch": 1.2311050754527801, "grad_norm": 0.34765625, "learning_rate": 0.0001159715276391252, "loss": 0.4002, "step": 48500 }, { "epoch": 1.231231993501796, "grad_norm": 0.37890625, "learning_rate": 0.00011593916728551643, "loss": 0.4431, "step": 48505 }, { "epoch": 1.2313589115508117, "grad_norm": 0.353515625, "learning_rate": 0.00011590680860325914, "loss": 0.4384, "step": 48510 }, { "epoch": 1.2314858295998274, "grad_norm": 0.322265625, "learning_rate": 0.00011587445159394119, "loss": 0.3998, "step": 48515 }, { "epoch": 1.2316127476488432, "grad_norm": 0.3515625, "learning_rate": 0.00011584209625915025, "loss": 0.4208, "step": 48520 }, { "epoch": 1.231739665697859, "grad_norm": 0.35546875, "learning_rate": 0.0001158097426004741, "loss": 0.4158, "step": 48525 }, { "epoch": 1.2318665837468745, "grad_norm": 0.357421875, "learning_rate": 0.00011577739061950023, "loss": 0.4358, "step": 48530 }, { "epoch": 1.2319935017958903, "grad_norm": 0.357421875, "learning_rate": 0.0001157450403178162, "loss": 0.4228, "step": 48535 }, { "epoch": 1.232120419844906, "grad_norm": 0.3515625, "learning_rate": 0.00011571269169700938, "loss": 0.4286, "step": 48540 }, { "epoch": 1.2322473378939218, "grad_norm": 0.34375, "learning_rate": 0.00011568034475866712, "loss": 0.4128, "step": 48545 }, { "epoch": 1.2323742559429376, "grad_norm": 0.388671875, "learning_rate": 0.00011564799950437666, "loss": 0.4513, "step": 48550 }, { "epoch": 1.2325011739919534, "grad_norm": 0.326171875, "learning_rate": 0.00011561565593572518, "loss": 0.402, "step": 48555 }, { "epoch": 1.2326280920409691, "grad_norm": 0.357421875, "learning_rate": 0.00011558331405429976, "loss": 0.456, "step": 48560 }, { "epoch": 1.232755010089985, "grad_norm": 0.33984375, "learning_rate": 0.0001155509738616874, "loss": 0.4321, "step": 48565 }, { "epoch": 1.2328819281390007, "grad_norm": 0.353515625, "learning_rate": 0.00011551863535947506, "loss": 0.4195, "step": 48570 }, { "epoch": 1.2330088461880164, "grad_norm": 0.33203125, "learning_rate": 0.00011548629854924956, "loss": 0.4009, "step": 48575 }, { "epoch": 1.2331357642370322, "grad_norm": 0.337890625, "learning_rate": 0.00011545396343259762, "loss": 0.4092, "step": 48580 }, { "epoch": 1.233262682286048, "grad_norm": 0.34765625, "learning_rate": 0.00011542163001110597, "loss": 0.4492, "step": 48585 }, { "epoch": 1.2333896003350637, "grad_norm": 0.373046875, "learning_rate": 0.00011538929828636111, "loss": 0.4088, "step": 48590 }, { "epoch": 1.2335165183840795, "grad_norm": 0.3203125, "learning_rate": 0.0001153569682599496, "loss": 0.3967, "step": 48595 }, { "epoch": 1.2336434364330953, "grad_norm": 0.365234375, "learning_rate": 0.00011532463993345788, "loss": 0.4091, "step": 48600 }, { "epoch": 1.2337703544821108, "grad_norm": 0.330078125, "learning_rate": 0.00011529231330847226, "loss": 0.427, "step": 48605 }, { "epoch": 1.2338972725311266, "grad_norm": 0.33203125, "learning_rate": 0.00011525998838657903, "loss": 0.3986, "step": 48610 }, { "epoch": 1.2340241905801423, "grad_norm": 0.353515625, "learning_rate": 0.00011522766516936429, "loss": 0.4271, "step": 48615 }, { "epoch": 1.2341511086291581, "grad_norm": 0.33203125, "learning_rate": 0.00011519534365841414, "loss": 0.4164, "step": 48620 }, { "epoch": 1.2342780266781739, "grad_norm": 0.34375, "learning_rate": 0.00011516302385531465, "loss": 0.4038, "step": 48625 }, { "epoch": 1.2344049447271896, "grad_norm": 0.349609375, "learning_rate": 0.00011513070576165172, "loss": 0.425, "step": 48630 }, { "epoch": 1.2345318627762054, "grad_norm": 0.34375, "learning_rate": 0.00011509838937901116, "loss": 0.4211, "step": 48635 }, { "epoch": 1.2346587808252212, "grad_norm": 0.330078125, "learning_rate": 0.0001150660747089787, "loss": 0.4136, "step": 48640 }, { "epoch": 1.234785698874237, "grad_norm": 0.359375, "learning_rate": 0.00011503376175314007, "loss": 0.4073, "step": 48645 }, { "epoch": 1.2349126169232527, "grad_norm": 0.328125, "learning_rate": 0.00011500145051308083, "loss": 0.4128, "step": 48650 }, { "epoch": 1.2350395349722685, "grad_norm": 0.369140625, "learning_rate": 0.00011496914099038646, "loss": 0.4153, "step": 48655 }, { "epoch": 1.2351664530212842, "grad_norm": 0.353515625, "learning_rate": 0.00011493683318664239, "loss": 0.4267, "step": 48660 }, { "epoch": 1.2352933710702998, "grad_norm": 0.365234375, "learning_rate": 0.00011490452710343395, "loss": 0.3878, "step": 48665 }, { "epoch": 1.2354202891193156, "grad_norm": 0.345703125, "learning_rate": 0.00011487222274234637, "loss": 0.4109, "step": 48670 }, { "epoch": 1.2355472071683313, "grad_norm": 0.353515625, "learning_rate": 0.00011483992010496482, "loss": 0.4131, "step": 48675 }, { "epoch": 1.235674125217347, "grad_norm": 0.34765625, "learning_rate": 0.00011480761919287441, "loss": 0.4449, "step": 48680 }, { "epoch": 1.2358010432663629, "grad_norm": 0.337890625, "learning_rate": 0.00011477532000766004, "loss": 0.4331, "step": 48685 }, { "epoch": 1.2359279613153786, "grad_norm": 0.328125, "learning_rate": 0.00011474302255090675, "loss": 0.4125, "step": 48690 }, { "epoch": 1.2360548793643944, "grad_norm": 0.349609375, "learning_rate": 0.0001147107268241993, "loss": 0.434, "step": 48695 }, { "epoch": 1.2361817974134102, "grad_norm": 0.326171875, "learning_rate": 0.00011467843282912243, "loss": 0.4396, "step": 48700 }, { "epoch": 1.236308715462426, "grad_norm": 0.330078125, "learning_rate": 0.0001146461405672608, "loss": 0.4334, "step": 48705 }, { "epoch": 1.2364356335114417, "grad_norm": 0.357421875, "learning_rate": 0.00011461385004019894, "loss": 0.4299, "step": 48710 }, { "epoch": 1.2365625515604575, "grad_norm": 0.3671875, "learning_rate": 0.00011458156124952139, "loss": 0.4187, "step": 48715 }, { "epoch": 1.2366894696094732, "grad_norm": 0.349609375, "learning_rate": 0.00011454927419681248, "loss": 0.4243, "step": 48720 }, { "epoch": 1.236816387658489, "grad_norm": 0.337890625, "learning_rate": 0.0001145169888836566, "loss": 0.4349, "step": 48725 }, { "epoch": 1.2369433057075048, "grad_norm": 0.35546875, "learning_rate": 0.00011448470531163792, "loss": 0.4219, "step": 48730 }, { "epoch": 1.2370702237565205, "grad_norm": 0.33984375, "learning_rate": 0.00011445242348234063, "loss": 0.4044, "step": 48735 }, { "epoch": 1.237197141805536, "grad_norm": 0.365234375, "learning_rate": 0.00011442014339734876, "loss": 0.4437, "step": 48740 }, { "epoch": 1.2373240598545518, "grad_norm": 0.32421875, "learning_rate": 0.0001143878650582462, "loss": 0.4033, "step": 48745 }, { "epoch": 1.2374509779035676, "grad_norm": 0.361328125, "learning_rate": 0.00011435558846661699, "loss": 0.4288, "step": 48750 }, { "epoch": 1.2375778959525834, "grad_norm": 0.341796875, "learning_rate": 0.00011432331362404485, "loss": 0.4193, "step": 48755 }, { "epoch": 1.2377048140015992, "grad_norm": 0.33984375, "learning_rate": 0.0001142910405321135, "loss": 0.4129, "step": 48760 }, { "epoch": 1.237831732050615, "grad_norm": 2.203125, "learning_rate": 0.00011425876919240654, "loss": 0.4111, "step": 48765 }, { "epoch": 1.2379586500996307, "grad_norm": 0.333984375, "learning_rate": 0.00011422649960650756, "loss": 0.4165, "step": 48770 }, { "epoch": 1.2380855681486465, "grad_norm": 0.365234375, "learning_rate": 0.00011419423177599999, "loss": 0.435, "step": 48775 }, { "epoch": 1.2382124861976622, "grad_norm": 0.32421875, "learning_rate": 0.00011416196570246722, "loss": 0.4048, "step": 48780 }, { "epoch": 1.238339404246678, "grad_norm": 0.333984375, "learning_rate": 0.00011412970138749251, "loss": 0.4003, "step": 48785 }, { "epoch": 1.2384663222956938, "grad_norm": 0.34765625, "learning_rate": 0.00011409743883265907, "loss": 0.4306, "step": 48790 }, { "epoch": 1.2385932403447093, "grad_norm": 0.33984375, "learning_rate": 0.00011406517803954995, "loss": 0.4205, "step": 48795 }, { "epoch": 1.238720158393725, "grad_norm": 0.33984375, "learning_rate": 0.0001140329190097483, "loss": 0.4276, "step": 48800 }, { "epoch": 1.2388470764427408, "grad_norm": 0.349609375, "learning_rate": 0.00011400066174483695, "loss": 0.4384, "step": 48805 }, { "epoch": 1.2389739944917566, "grad_norm": 0.345703125, "learning_rate": 0.00011396840624639875, "loss": 0.4252, "step": 48810 }, { "epoch": 1.2391009125407724, "grad_norm": 0.357421875, "learning_rate": 0.00011393615251601655, "loss": 0.4006, "step": 48815 }, { "epoch": 1.2392278305897881, "grad_norm": 0.3671875, "learning_rate": 0.000113903900555273, "loss": 0.4272, "step": 48820 }, { "epoch": 1.239354748638804, "grad_norm": 0.326171875, "learning_rate": 0.00011387165036575066, "loss": 0.4091, "step": 48825 }, { "epoch": 1.2394816666878197, "grad_norm": 0.35546875, "learning_rate": 0.00011383940194903203, "loss": 0.4257, "step": 48830 }, { "epoch": 1.2396085847368354, "grad_norm": 0.330078125, "learning_rate": 0.00011380715530669957, "loss": 0.4113, "step": 48835 }, { "epoch": 1.2397355027858512, "grad_norm": 0.33984375, "learning_rate": 0.00011377491044033553, "loss": 0.4138, "step": 48840 }, { "epoch": 1.239862420834867, "grad_norm": 0.3671875, "learning_rate": 0.00011374266735152224, "loss": 0.447, "step": 48845 }, { "epoch": 1.2399893388838827, "grad_norm": 0.3515625, "learning_rate": 0.00011371042604184183, "loss": 0.4168, "step": 48850 }, { "epoch": 1.2401162569328985, "grad_norm": 0.34765625, "learning_rate": 0.00011367818651287636, "loss": 0.4092, "step": 48855 }, { "epoch": 1.2402431749819143, "grad_norm": 0.353515625, "learning_rate": 0.0001136459487662078, "loss": 0.4159, "step": 48860 }, { "epoch": 1.24037009303093, "grad_norm": 0.3515625, "learning_rate": 0.00011361371280341804, "loss": 0.4235, "step": 48865 }, { "epoch": 1.2404970110799456, "grad_norm": 0.36328125, "learning_rate": 0.00011358147862608891, "loss": 0.4198, "step": 48870 }, { "epoch": 1.2406239291289614, "grad_norm": 0.41796875, "learning_rate": 0.00011354924623580206, "loss": 0.4506, "step": 48875 }, { "epoch": 1.2407508471779771, "grad_norm": 0.34765625, "learning_rate": 0.00011351701563413924, "loss": 0.4079, "step": 48880 }, { "epoch": 1.240877765226993, "grad_norm": 0.314453125, "learning_rate": 0.00011348478682268188, "loss": 0.419, "step": 48885 }, { "epoch": 1.2410046832760087, "grad_norm": 0.380859375, "learning_rate": 0.00011345255980301155, "loss": 0.455, "step": 48890 }, { "epoch": 1.2411316013250244, "grad_norm": 0.349609375, "learning_rate": 0.00011342033457670953, "loss": 0.411, "step": 48895 }, { "epoch": 1.2412585193740402, "grad_norm": 0.34375, "learning_rate": 0.00011338811114535712, "loss": 0.4018, "step": 48900 }, { "epoch": 1.241385437423056, "grad_norm": 0.37109375, "learning_rate": 0.00011335588951053553, "loss": 0.431, "step": 48905 }, { "epoch": 1.2415123554720717, "grad_norm": 0.365234375, "learning_rate": 0.00011332366967382583, "loss": 0.4456, "step": 48910 }, { "epoch": 1.2416392735210875, "grad_norm": 3.078125, "learning_rate": 0.00011329145163680904, "loss": 0.4658, "step": 48915 }, { "epoch": 1.2417661915701033, "grad_norm": 0.326171875, "learning_rate": 0.00011325923540106608, "loss": 0.396, "step": 48920 }, { "epoch": 1.241893109619119, "grad_norm": 0.365234375, "learning_rate": 0.00011322702096817783, "loss": 0.4079, "step": 48925 }, { "epoch": 1.2420200276681346, "grad_norm": 0.35546875, "learning_rate": 0.00011319480833972502, "loss": 0.4207, "step": 48930 }, { "epoch": 1.2421469457171503, "grad_norm": 0.357421875, "learning_rate": 0.00011316259751728822, "loss": 0.4116, "step": 48935 }, { "epoch": 1.242273863766166, "grad_norm": 0.359375, "learning_rate": 0.00011313038850244817, "loss": 0.4281, "step": 48940 }, { "epoch": 1.2424007818151819, "grad_norm": 0.3515625, "learning_rate": 0.00011309818129678526, "loss": 0.4328, "step": 48945 }, { "epoch": 1.2425276998641976, "grad_norm": 0.349609375, "learning_rate": 0.0001130659759018799, "loss": 0.4221, "step": 48950 }, { "epoch": 1.2426546179132134, "grad_norm": 0.333984375, "learning_rate": 0.00011303377231931239, "loss": 0.4102, "step": 48955 }, { "epoch": 1.2427815359622292, "grad_norm": 0.3515625, "learning_rate": 0.00011300157055066295, "loss": 0.4205, "step": 48960 }, { "epoch": 1.242908454011245, "grad_norm": 0.326171875, "learning_rate": 0.00011296937059751166, "loss": 0.3942, "step": 48965 }, { "epoch": 1.2430353720602607, "grad_norm": 0.3359375, "learning_rate": 0.00011293717246143865, "loss": 0.4271, "step": 48970 }, { "epoch": 1.2431622901092765, "grad_norm": 0.31640625, "learning_rate": 0.00011290497614402382, "loss": 0.4238, "step": 48975 }, { "epoch": 1.2432892081582922, "grad_norm": 0.361328125, "learning_rate": 0.00011287278164684703, "loss": 0.4113, "step": 48980 }, { "epoch": 1.243416126207308, "grad_norm": 0.380859375, "learning_rate": 0.00011284058897148805, "loss": 0.443, "step": 48985 }, { "epoch": 1.2435430442563238, "grad_norm": 0.333984375, "learning_rate": 0.00011280839811952657, "loss": 0.4009, "step": 48990 }, { "epoch": 1.2436699623053395, "grad_norm": 0.353515625, "learning_rate": 0.00011277620909254214, "loss": 0.4102, "step": 48995 }, { "epoch": 1.2437968803543553, "grad_norm": 0.361328125, "learning_rate": 0.00011274402189211433, "loss": 0.4036, "step": 49000 }, { "epoch": 1.2439237984033709, "grad_norm": 0.345703125, "learning_rate": 0.00011271183651982254, "loss": 0.416, "step": 49005 }, { "epoch": 1.2440507164523866, "grad_norm": 0.3671875, "learning_rate": 0.00011267965297724604, "loss": 0.4254, "step": 49010 }, { "epoch": 1.2441776345014024, "grad_norm": 0.353515625, "learning_rate": 0.00011264747126596414, "loss": 0.4107, "step": 49015 }, { "epoch": 1.2443045525504182, "grad_norm": 0.345703125, "learning_rate": 0.00011261529138755596, "loss": 0.4163, "step": 49020 }, { "epoch": 1.244431470599434, "grad_norm": 0.359375, "learning_rate": 0.00011258311334360053, "loss": 0.4233, "step": 49025 }, { "epoch": 1.2445583886484497, "grad_norm": 0.345703125, "learning_rate": 0.00011255093713567681, "loss": 0.4175, "step": 49030 }, { "epoch": 1.2446853066974655, "grad_norm": 0.29296875, "learning_rate": 0.0001125187627653637, "loss": 0.4023, "step": 49035 }, { "epoch": 1.2448122247464812, "grad_norm": 0.3515625, "learning_rate": 0.00011248659023423994, "loss": 0.4469, "step": 49040 }, { "epoch": 1.244939142795497, "grad_norm": 0.3359375, "learning_rate": 0.00011245441954388427, "loss": 0.3941, "step": 49045 }, { "epoch": 1.2450660608445128, "grad_norm": 0.341796875, "learning_rate": 0.0001124222506958753, "loss": 0.4288, "step": 49050 }, { "epoch": 1.2451929788935285, "grad_norm": 0.359375, "learning_rate": 0.00011239008369179149, "loss": 0.4347, "step": 49055 }, { "epoch": 1.245319896942544, "grad_norm": 0.3359375, "learning_rate": 0.00011235791853321127, "loss": 0.4299, "step": 49060 }, { "epoch": 1.2454468149915598, "grad_norm": 0.375, "learning_rate": 0.00011232575522171303, "loss": 0.3911, "step": 49065 }, { "epoch": 1.2455737330405756, "grad_norm": 0.337890625, "learning_rate": 0.00011229359375887497, "loss": 0.4188, "step": 49070 }, { "epoch": 1.2457006510895914, "grad_norm": 0.333984375, "learning_rate": 0.00011226143414627525, "loss": 0.4247, "step": 49075 }, { "epoch": 1.2458275691386071, "grad_norm": 0.3359375, "learning_rate": 0.00011222927638549194, "loss": 0.4299, "step": 49080 }, { "epoch": 1.245954487187623, "grad_norm": 0.3515625, "learning_rate": 0.00011219712047810293, "loss": 0.4222, "step": 49085 }, { "epoch": 1.2460814052366387, "grad_norm": 0.337890625, "learning_rate": 0.00011216496642568621, "loss": 0.4205, "step": 49090 }, { "epoch": 1.2462083232856545, "grad_norm": 0.3671875, "learning_rate": 0.00011213281422981952, "loss": 0.4409, "step": 49095 }, { "epoch": 1.2463352413346702, "grad_norm": 0.34375, "learning_rate": 0.00011210066389208054, "loss": 0.4128, "step": 49100 }, { "epoch": 1.246462159383686, "grad_norm": 0.337890625, "learning_rate": 0.0001120685154140469, "loss": 0.4347, "step": 49105 }, { "epoch": 1.2465890774327018, "grad_norm": 0.330078125, "learning_rate": 0.00011203636879729608, "loss": 0.4065, "step": 49110 }, { "epoch": 1.2467159954817175, "grad_norm": 0.302734375, "learning_rate": 0.00011200422404340554, "loss": 0.4074, "step": 49115 }, { "epoch": 1.2468429135307333, "grad_norm": 0.353515625, "learning_rate": 0.00011197208115395255, "loss": 0.4094, "step": 49120 }, { "epoch": 1.246969831579749, "grad_norm": 0.337890625, "learning_rate": 0.00011193994013051439, "loss": 0.417, "step": 49125 }, { "epoch": 1.2470967496287648, "grad_norm": 0.35546875, "learning_rate": 0.0001119078009746682, "loss": 0.4009, "step": 49130 }, { "epoch": 1.2472236676777804, "grad_norm": 0.3671875, "learning_rate": 0.00011187566368799109, "loss": 0.4085, "step": 49135 }, { "epoch": 1.2473505857267961, "grad_norm": 0.33984375, "learning_rate": 0.00011184352827205996, "loss": 0.4086, "step": 49140 }, { "epoch": 1.247477503775812, "grad_norm": 0.31640625, "learning_rate": 0.00011181139472845171, "loss": 0.3958, "step": 49145 }, { "epoch": 1.2476044218248277, "grad_norm": 0.33984375, "learning_rate": 0.00011177926305874311, "loss": 0.3932, "step": 49150 }, { "epoch": 1.2477313398738434, "grad_norm": 0.353515625, "learning_rate": 0.00011174713326451085, "loss": 0.4061, "step": 49155 }, { "epoch": 1.2478582579228592, "grad_norm": 0.302734375, "learning_rate": 0.0001117150053473315, "loss": 0.4141, "step": 49160 }, { "epoch": 1.247985175971875, "grad_norm": 0.365234375, "learning_rate": 0.00011168287930878158, "loss": 0.4192, "step": 49165 }, { "epoch": 1.2481120940208907, "grad_norm": 0.361328125, "learning_rate": 0.00011165075515043753, "loss": 0.3992, "step": 49170 }, { "epoch": 1.2482390120699065, "grad_norm": 0.359375, "learning_rate": 0.00011161863287387565, "loss": 0.408, "step": 49175 }, { "epoch": 1.2483659301189223, "grad_norm": 0.37109375, "learning_rate": 0.00011158651248067217, "loss": 0.428, "step": 49180 }, { "epoch": 1.248492848167938, "grad_norm": 0.369140625, "learning_rate": 0.0001115543939724032, "loss": 0.4121, "step": 49185 }, { "epoch": 1.2486197662169538, "grad_norm": 0.328125, "learning_rate": 0.00011152227735064476, "loss": 0.4212, "step": 49190 }, { "epoch": 1.2487466842659694, "grad_norm": 0.34765625, "learning_rate": 0.00011149016261697291, "loss": 0.4196, "step": 49195 }, { "epoch": 1.2488736023149851, "grad_norm": 0.353515625, "learning_rate": 0.00011145804977296342, "loss": 0.4188, "step": 49200 }, { "epoch": 1.2490005203640009, "grad_norm": 0.376953125, "learning_rate": 0.00011142593882019209, "loss": 0.4547, "step": 49205 }, { "epoch": 1.2491274384130167, "grad_norm": 0.34765625, "learning_rate": 0.00011139382976023456, "loss": 0.4112, "step": 49210 }, { "epoch": 1.2492543564620324, "grad_norm": 0.421875, "learning_rate": 0.00011136172259466643, "loss": 0.434, "step": 49215 }, { "epoch": 1.2493812745110482, "grad_norm": 0.37890625, "learning_rate": 0.00011132961732506318, "loss": 0.4464, "step": 49220 }, { "epoch": 1.249508192560064, "grad_norm": 0.33984375, "learning_rate": 0.00011129751395300022, "loss": 0.4126, "step": 49225 }, { "epoch": 1.2496351106090797, "grad_norm": 0.34375, "learning_rate": 0.00011126541248005284, "loss": 0.415, "step": 49230 }, { "epoch": 1.2497620286580955, "grad_norm": 0.353515625, "learning_rate": 0.00011123331290779623, "loss": 0.4273, "step": 49235 }, { "epoch": 1.2498889467071113, "grad_norm": 0.36328125, "learning_rate": 0.00011120121523780547, "loss": 0.398, "step": 49240 }, { "epoch": 1.250015864756127, "grad_norm": 0.3671875, "learning_rate": 0.00011116911947165566, "loss": 0.4262, "step": 49245 }, { "epoch": 1.2501427828051428, "grad_norm": 0.3359375, "learning_rate": 0.00011113702561092167, "loss": 0.4096, "step": 49250 }, { "epoch": 1.2502697008541586, "grad_norm": 0.34765625, "learning_rate": 0.00011110493365717834, "loss": 0.4341, "step": 49255 }, { "epoch": 1.2503966189031743, "grad_norm": 0.35546875, "learning_rate": 0.00011107284361200046, "loss": 0.4223, "step": 49260 }, { "epoch": 1.25052353695219, "grad_norm": 0.333984375, "learning_rate": 0.00011104075547696264, "loss": 0.4351, "step": 49265 }, { "epoch": 1.2506504550012056, "grad_norm": 0.322265625, "learning_rate": 0.00011100866925363941, "loss": 0.4214, "step": 49270 }, { "epoch": 1.2507773730502214, "grad_norm": 0.341796875, "learning_rate": 0.00011097658494360529, "loss": 0.411, "step": 49275 }, { "epoch": 1.2509042910992372, "grad_norm": 0.353515625, "learning_rate": 0.00011094450254843456, "loss": 0.4407, "step": 49280 }, { "epoch": 1.251031209148253, "grad_norm": 0.37109375, "learning_rate": 0.00011091242206970152, "loss": 0.4173, "step": 49285 }, { "epoch": 1.2511581271972687, "grad_norm": 0.35546875, "learning_rate": 0.0001108803435089804, "loss": 0.4069, "step": 49290 }, { "epoch": 1.2512850452462845, "grad_norm": 0.35546875, "learning_rate": 0.00011084826686784521, "loss": 0.4029, "step": 49295 }, { "epoch": 1.2514119632953002, "grad_norm": 0.349609375, "learning_rate": 0.00011081619214787, "loss": 0.4402, "step": 49300 }, { "epoch": 1.251538881344316, "grad_norm": 0.357421875, "learning_rate": 0.00011078411935062863, "loss": 0.4384, "step": 49305 }, { "epoch": 1.2516657993933318, "grad_norm": 0.33203125, "learning_rate": 0.00011075204847769487, "loss": 0.4312, "step": 49310 }, { "epoch": 1.2517927174423475, "grad_norm": 0.318359375, "learning_rate": 0.00011071997953064241, "loss": 0.4166, "step": 49315 }, { "epoch": 1.251919635491363, "grad_norm": 0.361328125, "learning_rate": 0.00011068791251104499, "loss": 0.4264, "step": 49320 }, { "epoch": 1.2520465535403789, "grad_norm": 0.357421875, "learning_rate": 0.00011065584742047601, "loss": 0.428, "step": 49325 }, { "epoch": 1.2521734715893946, "grad_norm": 0.341796875, "learning_rate": 0.00011062378426050894, "loss": 0.4367, "step": 49330 }, { "epoch": 1.2523003896384104, "grad_norm": 0.35546875, "learning_rate": 0.00011059172303271708, "loss": 0.4057, "step": 49335 }, { "epoch": 1.2524273076874262, "grad_norm": 0.35546875, "learning_rate": 0.00011055966373867369, "loss": 0.413, "step": 49340 }, { "epoch": 1.252554225736442, "grad_norm": 0.337890625, "learning_rate": 0.0001105276063799519, "loss": 0.3844, "step": 49345 }, { "epoch": 1.2526811437854577, "grad_norm": 0.380859375, "learning_rate": 0.00011049555095812474, "loss": 0.4333, "step": 49350 }, { "epoch": 1.2528080618344735, "grad_norm": 0.341796875, "learning_rate": 0.00011046349747476513, "loss": 0.4163, "step": 49355 }, { "epoch": 1.2529349798834892, "grad_norm": 0.3359375, "learning_rate": 0.00011043144593144597, "loss": 0.4225, "step": 49360 }, { "epoch": 1.253061897932505, "grad_norm": 0.34375, "learning_rate": 0.00011039939632973998, "loss": 0.4131, "step": 49365 }, { "epoch": 1.2531888159815208, "grad_norm": 0.34375, "learning_rate": 0.00011036734867121984, "loss": 0.4057, "step": 49370 }, { "epoch": 1.2533157340305365, "grad_norm": 0.306640625, "learning_rate": 0.00011033530295745812, "loss": 0.4113, "step": 49375 }, { "epoch": 1.2534426520795523, "grad_norm": 0.345703125, "learning_rate": 0.00011030325919002723, "loss": 0.3984, "step": 49380 }, { "epoch": 1.253569570128568, "grad_norm": 0.33984375, "learning_rate": 0.00011027121737049965, "loss": 0.4318, "step": 49385 }, { "epoch": 1.2536964881775838, "grad_norm": 0.322265625, "learning_rate": 0.00011023917750044763, "loss": 0.4239, "step": 49390 }, { "epoch": 1.2538234062265996, "grad_norm": 0.365234375, "learning_rate": 0.00011020713958144332, "loss": 0.4453, "step": 49395 }, { "epoch": 1.2539503242756154, "grad_norm": 0.34375, "learning_rate": 0.00011017510361505882, "loss": 0.4162, "step": 49400 }, { "epoch": 1.254077242324631, "grad_norm": 0.359375, "learning_rate": 0.0001101430696028661, "loss": 0.4173, "step": 49405 }, { "epoch": 1.2542041603736467, "grad_norm": 0.341796875, "learning_rate": 0.00011011103754643709, "loss": 0.3986, "step": 49410 }, { "epoch": 1.2543310784226624, "grad_norm": 0.345703125, "learning_rate": 0.00011007900744734358, "loss": 0.4108, "step": 49415 }, { "epoch": 1.2544579964716782, "grad_norm": 0.3359375, "learning_rate": 0.00011004697930715727, "loss": 0.4242, "step": 49420 }, { "epoch": 1.254584914520694, "grad_norm": 0.353515625, "learning_rate": 0.00011001495312744979, "loss": 0.4502, "step": 49425 }, { "epoch": 1.2547118325697098, "grad_norm": 0.34375, "learning_rate": 0.00010998292890979261, "loss": 0.4153, "step": 49430 }, { "epoch": 1.2548387506187255, "grad_norm": 0.36328125, "learning_rate": 0.00010995090665575717, "loss": 0.4174, "step": 49435 }, { "epoch": 1.2549656686677413, "grad_norm": 0.345703125, "learning_rate": 0.00010991888636691475, "loss": 0.4465, "step": 49440 }, { "epoch": 1.255092586716757, "grad_norm": 0.341796875, "learning_rate": 0.00010988686804483666, "loss": 0.4466, "step": 49445 }, { "epoch": 1.2552195047657728, "grad_norm": 0.353515625, "learning_rate": 0.00010985485169109396, "loss": 0.393, "step": 49450 }, { "epoch": 1.2553464228147884, "grad_norm": 0.318359375, "learning_rate": 0.00010982283730725769, "loss": 0.3947, "step": 49455 }, { "epoch": 1.2554733408638041, "grad_norm": 0.349609375, "learning_rate": 0.00010979082489489881, "loss": 0.4146, "step": 49460 }, { "epoch": 1.25560025891282, "grad_norm": 0.357421875, "learning_rate": 0.00010975881445558812, "loss": 0.4025, "step": 49465 }, { "epoch": 1.2557271769618357, "grad_norm": 0.3359375, "learning_rate": 0.00010972680599089639, "loss": 0.3933, "step": 49470 }, { "epoch": 1.2558540950108514, "grad_norm": 0.365234375, "learning_rate": 0.00010969479950239424, "loss": 0.4345, "step": 49475 }, { "epoch": 1.2559810130598672, "grad_norm": 0.3515625, "learning_rate": 0.00010966279499165224, "loss": 0.4169, "step": 49480 }, { "epoch": 1.256107931108883, "grad_norm": 0.33203125, "learning_rate": 0.00010963079246024077, "loss": 0.3823, "step": 49485 }, { "epoch": 1.2562348491578987, "grad_norm": 0.34765625, "learning_rate": 0.00010959879190973029, "loss": 0.4439, "step": 49490 }, { "epoch": 1.2563617672069145, "grad_norm": 0.341796875, "learning_rate": 0.00010956679334169097, "loss": 0.4071, "step": 49495 }, { "epoch": 1.2564886852559303, "grad_norm": 0.35546875, "learning_rate": 0.000109534796757693, "loss": 0.4338, "step": 49500 }, { "epoch": 1.256615603304946, "grad_norm": 0.3515625, "learning_rate": 0.00010950280215930638, "loss": 0.4213, "step": 49505 }, { "epoch": 1.2567425213539618, "grad_norm": 0.333984375, "learning_rate": 0.0001094708095481012, "loss": 0.4263, "step": 49510 }, { "epoch": 1.2568694394029776, "grad_norm": 0.33984375, "learning_rate": 0.00010943881892564723, "loss": 0.4187, "step": 49515 }, { "epoch": 1.2569963574519933, "grad_norm": 0.34375, "learning_rate": 0.00010940683029351427, "loss": 0.4176, "step": 49520 }, { "epoch": 1.257123275501009, "grad_norm": 0.400390625, "learning_rate": 0.00010937484365327197, "loss": 0.4251, "step": 49525 }, { "epoch": 1.2572501935500249, "grad_norm": 1.0390625, "learning_rate": 0.00010934285900648989, "loss": 0.4431, "step": 49530 }, { "epoch": 1.2573771115990404, "grad_norm": 0.36328125, "learning_rate": 0.00010931087635473754, "loss": 0.4579, "step": 49535 }, { "epoch": 1.2575040296480562, "grad_norm": 0.341796875, "learning_rate": 0.00010927889569958428, "loss": 0.4063, "step": 49540 }, { "epoch": 1.257630947697072, "grad_norm": 0.3671875, "learning_rate": 0.0001092469170425994, "loss": 0.4157, "step": 49545 }, { "epoch": 1.2577578657460877, "grad_norm": 0.3515625, "learning_rate": 0.00010921494038535206, "loss": 0.4246, "step": 49550 }, { "epoch": 1.2578847837951035, "grad_norm": 0.35546875, "learning_rate": 0.00010918296572941134, "loss": 0.4369, "step": 49555 }, { "epoch": 1.2580117018441193, "grad_norm": 0.359375, "learning_rate": 0.00010915099307634625, "loss": 0.4046, "step": 49560 }, { "epoch": 1.258138619893135, "grad_norm": 0.3671875, "learning_rate": 0.00010911902242772557, "loss": 0.4154, "step": 49565 }, { "epoch": 1.2582655379421508, "grad_norm": 0.376953125, "learning_rate": 0.00010908705378511827, "loss": 0.4294, "step": 49570 }, { "epoch": 1.2583924559911666, "grad_norm": 0.3515625, "learning_rate": 0.00010905508715009288, "loss": 0.4292, "step": 49575 }, { "epoch": 1.2585193740401823, "grad_norm": 0.33203125, "learning_rate": 0.0001090231225242181, "loss": 0.4101, "step": 49580 }, { "epoch": 1.2586462920891979, "grad_norm": 0.34765625, "learning_rate": 0.00010899115990906238, "loss": 0.4437, "step": 49585 }, { "epoch": 1.2587732101382136, "grad_norm": 0.369140625, "learning_rate": 0.0001089591993061941, "loss": 0.4203, "step": 49590 }, { "epoch": 1.2589001281872294, "grad_norm": 0.34375, "learning_rate": 0.00010892724071718156, "loss": 0.4178, "step": 49595 }, { "epoch": 1.2590270462362452, "grad_norm": 0.3515625, "learning_rate": 0.00010889528414359298, "loss": 0.4393, "step": 49600 }, { "epoch": 1.259153964285261, "grad_norm": 0.33203125, "learning_rate": 0.00010886332958699641, "loss": 0.3841, "step": 49605 }, { "epoch": 1.2592808823342767, "grad_norm": 0.34375, "learning_rate": 0.00010883137704895983, "loss": 0.4185, "step": 49610 }, { "epoch": 1.2594078003832925, "grad_norm": 0.357421875, "learning_rate": 0.00010879942653105124, "loss": 0.4096, "step": 49615 }, { "epoch": 1.2595347184323082, "grad_norm": 0.341796875, "learning_rate": 0.00010876747803483837, "loss": 0.4053, "step": 49620 }, { "epoch": 1.259661636481324, "grad_norm": 0.35546875, "learning_rate": 0.00010873553156188892, "loss": 0.4285, "step": 49625 }, { "epoch": 1.2597885545303398, "grad_norm": 0.333984375, "learning_rate": 0.00010870358711377043, "loss": 0.4233, "step": 49630 }, { "epoch": 1.2599154725793555, "grad_norm": 0.376953125, "learning_rate": 0.00010867164469205053, "loss": 0.4245, "step": 49635 }, { "epoch": 1.2600423906283713, "grad_norm": 0.361328125, "learning_rate": 0.00010863970429829659, "loss": 0.411, "step": 49640 }, { "epoch": 1.260169308677387, "grad_norm": 0.3515625, "learning_rate": 0.00010860776593407586, "loss": 0.4044, "step": 49645 }, { "epoch": 1.2602962267264028, "grad_norm": 0.3515625, "learning_rate": 0.00010857582960095558, "loss": 0.4312, "step": 49650 }, { "epoch": 1.2604231447754186, "grad_norm": 0.3359375, "learning_rate": 0.0001085438953005028, "loss": 0.3942, "step": 49655 }, { "epoch": 1.2605500628244344, "grad_norm": 0.3515625, "learning_rate": 0.0001085119630342846, "loss": 0.4334, "step": 49660 }, { "epoch": 1.2606769808734501, "grad_norm": 0.328125, "learning_rate": 0.00010848003280386785, "loss": 0.4282, "step": 49665 }, { "epoch": 1.2608038989224657, "grad_norm": 0.32421875, "learning_rate": 0.00010844810461081934, "loss": 0.4013, "step": 49670 }, { "epoch": 1.2609308169714815, "grad_norm": 0.33984375, "learning_rate": 0.00010841617845670579, "loss": 0.4036, "step": 49675 }, { "epoch": 1.2610577350204972, "grad_norm": 0.35546875, "learning_rate": 0.00010838425434309383, "loss": 0.4211, "step": 49680 }, { "epoch": 1.261184653069513, "grad_norm": 0.365234375, "learning_rate": 0.00010835233227154986, "loss": 0.4354, "step": 49685 }, { "epoch": 1.2613115711185288, "grad_norm": 0.357421875, "learning_rate": 0.0001083204122436404, "loss": 0.436, "step": 49690 }, { "epoch": 1.2614384891675445, "grad_norm": 0.3203125, "learning_rate": 0.00010828849426093168, "loss": 0.391, "step": 49695 }, { "epoch": 1.2615654072165603, "grad_norm": 0.357421875, "learning_rate": 0.00010825657832498992, "loss": 0.422, "step": 49700 }, { "epoch": 1.261692325265576, "grad_norm": 0.337890625, "learning_rate": 0.00010822466443738127, "loss": 0.4309, "step": 49705 }, { "epoch": 1.2618192433145918, "grad_norm": 0.361328125, "learning_rate": 0.0001081927525996717, "loss": 0.4244, "step": 49710 }, { "epoch": 1.2619461613636076, "grad_norm": 0.34375, "learning_rate": 0.0001081608428134271, "loss": 0.4049, "step": 49715 }, { "epoch": 1.2620730794126231, "grad_norm": 0.376953125, "learning_rate": 0.00010812893508021328, "loss": 0.458, "step": 49720 }, { "epoch": 1.262199997461639, "grad_norm": 0.359375, "learning_rate": 0.00010809702940159595, "loss": 0.4242, "step": 49725 }, { "epoch": 1.2623269155106547, "grad_norm": 0.3515625, "learning_rate": 0.00010806512577914065, "loss": 0.4319, "step": 49730 }, { "epoch": 1.2624538335596704, "grad_norm": 0.345703125, "learning_rate": 0.00010803322421441296, "loss": 0.3991, "step": 49735 }, { "epoch": 1.2625807516086862, "grad_norm": 0.3671875, "learning_rate": 0.00010800132470897824, "loss": 0.432, "step": 49740 }, { "epoch": 1.262707669657702, "grad_norm": 0.322265625, "learning_rate": 0.00010796942726440178, "loss": 0.3948, "step": 49745 }, { "epoch": 1.2628345877067177, "grad_norm": 0.3359375, "learning_rate": 0.00010793753188224878, "loss": 0.3951, "step": 49750 }, { "epoch": 1.2629615057557335, "grad_norm": 0.361328125, "learning_rate": 0.00010790563856408428, "loss": 0.4185, "step": 49755 }, { "epoch": 1.2630884238047493, "grad_norm": 0.35546875, "learning_rate": 0.0001078737473114734, "loss": 0.3845, "step": 49760 }, { "epoch": 1.263215341853765, "grad_norm": 0.328125, "learning_rate": 0.00010784185812598093, "loss": 0.4177, "step": 49765 }, { "epoch": 1.2633422599027808, "grad_norm": 0.328125, "learning_rate": 0.00010780997100917171, "loss": 0.4037, "step": 49770 }, { "epoch": 1.2634691779517966, "grad_norm": 0.341796875, "learning_rate": 0.00010777808596261037, "loss": 0.4084, "step": 49775 }, { "epoch": 1.2635960960008124, "grad_norm": 0.365234375, "learning_rate": 0.00010774620298786157, "loss": 0.4201, "step": 49780 }, { "epoch": 1.2637230140498281, "grad_norm": 0.37109375, "learning_rate": 0.00010771432208648975, "loss": 0.4338, "step": 49785 }, { "epoch": 1.2638499320988439, "grad_norm": 0.326171875, "learning_rate": 0.0001076824432600593, "loss": 0.404, "step": 49790 }, { "epoch": 1.2639768501478597, "grad_norm": 0.345703125, "learning_rate": 0.00010765056651013453, "loss": 0.4148, "step": 49795 }, { "epoch": 1.2641037681968752, "grad_norm": 0.33984375, "learning_rate": 0.00010761869183827955, "loss": 0.4042, "step": 49800 }, { "epoch": 1.264230686245891, "grad_norm": 0.3671875, "learning_rate": 0.0001075868192460585, "loss": 0.4036, "step": 49805 }, { "epoch": 1.2643576042949067, "grad_norm": 0.333984375, "learning_rate": 0.00010755494873503531, "loss": 0.413, "step": 49810 }, { "epoch": 1.2644845223439225, "grad_norm": 0.337890625, "learning_rate": 0.00010752308030677389, "loss": 0.4137, "step": 49815 }, { "epoch": 1.2646114403929383, "grad_norm": 0.345703125, "learning_rate": 0.00010749121396283796, "loss": 0.4238, "step": 49820 }, { "epoch": 1.264738358441954, "grad_norm": 0.34765625, "learning_rate": 0.00010745934970479126, "loss": 0.4332, "step": 49825 }, { "epoch": 1.2648652764909698, "grad_norm": 0.359375, "learning_rate": 0.00010742748753419735, "loss": 0.4369, "step": 49830 }, { "epoch": 1.2649921945399856, "grad_norm": 0.345703125, "learning_rate": 0.00010739562745261968, "loss": 0.4118, "step": 49835 }, { "epoch": 1.2651191125890013, "grad_norm": 0.359375, "learning_rate": 0.00010736376946162156, "loss": 0.4025, "step": 49840 }, { "epoch": 1.265246030638017, "grad_norm": 0.328125, "learning_rate": 0.00010733191356276632, "loss": 0.4206, "step": 49845 }, { "epoch": 1.2653729486870327, "grad_norm": 0.345703125, "learning_rate": 0.00010730005975761709, "loss": 0.4159, "step": 49850 }, { "epoch": 1.2654998667360484, "grad_norm": 0.333984375, "learning_rate": 0.00010726820804773687, "loss": 0.435, "step": 49855 }, { "epoch": 1.2656267847850642, "grad_norm": 0.72265625, "learning_rate": 0.00010723635843468867, "loss": 0.3902, "step": 49860 }, { "epoch": 1.26575370283408, "grad_norm": 0.3203125, "learning_rate": 0.00010720451092003536, "loss": 0.3846, "step": 49865 }, { "epoch": 1.2658806208830957, "grad_norm": 0.35546875, "learning_rate": 0.00010717266550533965, "loss": 0.4163, "step": 49870 }, { "epoch": 1.2660075389321115, "grad_norm": 0.341796875, "learning_rate": 0.00010714082219216417, "loss": 0.4299, "step": 49875 }, { "epoch": 1.2661344569811273, "grad_norm": 0.345703125, "learning_rate": 0.00010710898098207142, "loss": 0.4085, "step": 49880 }, { "epoch": 1.266261375030143, "grad_norm": 0.341796875, "learning_rate": 0.00010707714187662394, "loss": 0.4301, "step": 49885 }, { "epoch": 1.2663882930791588, "grad_norm": 0.375, "learning_rate": 0.00010704530487738402, "loss": 0.4179, "step": 49890 }, { "epoch": 1.2665152111281746, "grad_norm": 0.326171875, "learning_rate": 0.00010701346998591388, "loss": 0.427, "step": 49895 }, { "epoch": 1.2666421291771903, "grad_norm": 0.353515625, "learning_rate": 0.0001069816372037756, "loss": 0.3984, "step": 49900 }, { "epoch": 1.266769047226206, "grad_norm": 0.353515625, "learning_rate": 0.0001069498065325313, "loss": 0.4176, "step": 49905 }, { "epoch": 1.2668959652752219, "grad_norm": 0.37109375, "learning_rate": 0.00010691797797374281, "loss": 0.4068, "step": 49910 }, { "epoch": 1.2670228833242376, "grad_norm": 0.306640625, "learning_rate": 0.00010688615152897201, "loss": 0.39, "step": 49915 }, { "epoch": 1.2671498013732534, "grad_norm": 0.3515625, "learning_rate": 0.00010685432719978058, "loss": 0.4393, "step": 49920 }, { "epoch": 1.2672767194222692, "grad_norm": 0.376953125, "learning_rate": 0.0001068225049877301, "loss": 0.3985, "step": 49925 }, { "epoch": 1.267403637471285, "grad_norm": 0.34375, "learning_rate": 0.00010679068489438209, "loss": 0.4161, "step": 49930 }, { "epoch": 1.2675305555203005, "grad_norm": 0.333984375, "learning_rate": 0.000106758866921298, "loss": 0.4039, "step": 49935 }, { "epoch": 1.2676574735693162, "grad_norm": 0.328125, "learning_rate": 0.00010672705107003908, "loss": 0.4066, "step": 49940 }, { "epoch": 1.267784391618332, "grad_norm": 0.3046875, "learning_rate": 0.00010669523734216645, "loss": 0.4003, "step": 49945 }, { "epoch": 1.2679113096673478, "grad_norm": 0.34375, "learning_rate": 0.00010666342573924135, "loss": 0.4185, "step": 49950 }, { "epoch": 1.2680382277163635, "grad_norm": 0.357421875, "learning_rate": 0.00010663161626282468, "loss": 0.4169, "step": 49955 }, { "epoch": 1.2681651457653793, "grad_norm": 0.3515625, "learning_rate": 0.00010659980891447731, "loss": 0.4184, "step": 49960 }, { "epoch": 1.268292063814395, "grad_norm": 0.3359375, "learning_rate": 0.00010656800369576006, "loss": 0.435, "step": 49965 }, { "epoch": 1.2684189818634108, "grad_norm": 0.345703125, "learning_rate": 0.00010653620060823354, "loss": 0.4006, "step": 49970 }, { "epoch": 1.2685458999124266, "grad_norm": 0.333984375, "learning_rate": 0.00010650439965345834, "loss": 0.4225, "step": 49975 }, { "epoch": 1.2686728179614424, "grad_norm": 0.326171875, "learning_rate": 0.00010647260083299492, "loss": 0.3844, "step": 49980 }, { "epoch": 1.268799736010458, "grad_norm": 0.349609375, "learning_rate": 0.00010644080414840366, "loss": 0.4053, "step": 49985 }, { "epoch": 1.2689266540594737, "grad_norm": 0.33984375, "learning_rate": 0.00010640900960124478, "loss": 0.4171, "step": 49990 }, { "epoch": 1.2690535721084895, "grad_norm": 0.359375, "learning_rate": 0.00010637721719307845, "loss": 0.4042, "step": 49995 }, { "epoch": 1.2691804901575052, "grad_norm": 0.318359375, "learning_rate": 0.00010634542692546468, "loss": 0.3876, "step": 50000 }, { "epoch": 1.269307408206521, "grad_norm": 0.353515625, "learning_rate": 0.00010631363879996343, "loss": 0.4469, "step": 50005 }, { "epoch": 1.2694343262555368, "grad_norm": 0.35546875, "learning_rate": 0.00010628185281813447, "loss": 0.4597, "step": 50010 }, { "epoch": 1.2695612443045525, "grad_norm": 0.353515625, "learning_rate": 0.00010625006898153761, "loss": 0.4091, "step": 50015 }, { "epoch": 1.2696881623535683, "grad_norm": 0.357421875, "learning_rate": 0.00010621828729173243, "loss": 0.4306, "step": 50020 }, { "epoch": 1.269815080402584, "grad_norm": 0.345703125, "learning_rate": 0.00010618650775027848, "loss": 0.4261, "step": 50025 }, { "epoch": 1.2699419984515998, "grad_norm": 0.32421875, "learning_rate": 0.00010615473035873515, "loss": 0.3996, "step": 50030 }, { "epoch": 1.2700689165006156, "grad_norm": 0.34375, "learning_rate": 0.0001061229551186617, "loss": 0.4066, "step": 50035 }, { "epoch": 1.2701958345496314, "grad_norm": 0.34375, "learning_rate": 0.0001060911820316174, "loss": 0.4198, "step": 50040 }, { "epoch": 1.2703227525986471, "grad_norm": 0.3515625, "learning_rate": 0.00010605941109916129, "loss": 0.4524, "step": 50045 }, { "epoch": 1.270449670647663, "grad_norm": 0.36328125, "learning_rate": 0.0001060276423228524, "loss": 0.4154, "step": 50050 }, { "epoch": 1.2705765886966787, "grad_norm": 0.337890625, "learning_rate": 0.00010599587570424952, "loss": 0.397, "step": 50055 }, { "epoch": 1.2707035067456944, "grad_norm": 0.3515625, "learning_rate": 0.00010596411124491153, "loss": 0.4207, "step": 50060 }, { "epoch": 1.27083042479471, "grad_norm": 0.349609375, "learning_rate": 0.00010593234894639707, "loss": 0.4171, "step": 50065 }, { "epoch": 1.2709573428437257, "grad_norm": 0.35546875, "learning_rate": 0.00010590058881026464, "loss": 0.4231, "step": 50070 }, { "epoch": 1.2710842608927415, "grad_norm": 0.341796875, "learning_rate": 0.00010586883083807278, "loss": 0.4142, "step": 50075 }, { "epoch": 1.2712111789417573, "grad_norm": 0.37109375, "learning_rate": 0.00010583707503137984, "loss": 0.4131, "step": 50080 }, { "epoch": 1.271338096990773, "grad_norm": 0.3515625, "learning_rate": 0.00010580532139174401, "loss": 0.4485, "step": 50085 }, { "epoch": 1.2714650150397888, "grad_norm": 0.35546875, "learning_rate": 0.00010577356992072348, "loss": 0.4205, "step": 50090 }, { "epoch": 1.2715919330888046, "grad_norm": 0.7265625, "learning_rate": 0.00010574182061987627, "loss": 0.4248, "step": 50095 }, { "epoch": 1.2717188511378203, "grad_norm": 0.33203125, "learning_rate": 0.00010571007349076022, "loss": 0.413, "step": 50100 }, { "epoch": 1.2718457691868361, "grad_norm": 0.3515625, "learning_rate": 0.00010567832853493328, "loss": 0.4256, "step": 50105 }, { "epoch": 1.2719726872358519, "grad_norm": 0.33203125, "learning_rate": 0.0001056465857539531, "loss": 0.4188, "step": 50110 }, { "epoch": 1.2720996052848674, "grad_norm": 0.34375, "learning_rate": 0.00010561484514937729, "loss": 0.4227, "step": 50115 }, { "epoch": 1.2722265233338832, "grad_norm": 0.33984375, "learning_rate": 0.00010558310672276334, "loss": 0.4054, "step": 50120 }, { "epoch": 1.272353441382899, "grad_norm": 0.359375, "learning_rate": 0.00010555137047566864, "loss": 0.4, "step": 50125 }, { "epoch": 1.2724803594319147, "grad_norm": 0.357421875, "learning_rate": 0.00010551963640965046, "loss": 0.4326, "step": 50130 }, { "epoch": 1.2726072774809305, "grad_norm": 0.3515625, "learning_rate": 0.00010548790452626604, "loss": 0.4206, "step": 50135 }, { "epoch": 1.2727341955299463, "grad_norm": 0.349609375, "learning_rate": 0.00010545617482707239, "loss": 0.4134, "step": 50140 }, { "epoch": 1.272861113578962, "grad_norm": 0.361328125, "learning_rate": 0.0001054244473136265, "loss": 0.4077, "step": 50145 }, { "epoch": 1.2729880316279778, "grad_norm": 0.34765625, "learning_rate": 0.00010539272198748526, "loss": 0.4076, "step": 50150 }, { "epoch": 1.2731149496769936, "grad_norm": 0.369140625, "learning_rate": 0.00010536099885020535, "loss": 0.436, "step": 50155 }, { "epoch": 1.2732418677260093, "grad_norm": 0.40625, "learning_rate": 0.00010532927790334348, "loss": 0.4073, "step": 50160 }, { "epoch": 1.273368785775025, "grad_norm": 0.392578125, "learning_rate": 0.00010529755914845614, "loss": 0.4319, "step": 50165 }, { "epoch": 1.2734957038240409, "grad_norm": 0.34765625, "learning_rate": 0.00010526584258709974, "loss": 0.4072, "step": 50170 }, { "epoch": 1.2736226218730566, "grad_norm": 0.34765625, "learning_rate": 0.00010523412822083064, "loss": 0.4366, "step": 50175 }, { "epoch": 1.2737495399220724, "grad_norm": 0.3828125, "learning_rate": 0.00010520241605120503, "loss": 0.4605, "step": 50180 }, { "epoch": 1.2738764579710882, "grad_norm": 0.384765625, "learning_rate": 0.00010517070607977902, "loss": 0.4446, "step": 50185 }, { "epoch": 1.274003376020104, "grad_norm": 0.3203125, "learning_rate": 0.00010513899830810863, "loss": 0.4069, "step": 50190 }, { "epoch": 1.2741302940691197, "grad_norm": 0.26171875, "learning_rate": 0.00010510729273774964, "loss": 0.3879, "step": 50195 }, { "epoch": 1.2742572121181353, "grad_norm": 0.375, "learning_rate": 0.00010507558937025797, "loss": 0.4378, "step": 50200 }, { "epoch": 1.274384130167151, "grad_norm": 0.353515625, "learning_rate": 0.00010504388820718927, "loss": 0.4282, "step": 50205 }, { "epoch": 1.2745110482161668, "grad_norm": 0.380859375, "learning_rate": 0.00010501218925009903, "loss": 0.4383, "step": 50210 }, { "epoch": 1.2746379662651826, "grad_norm": 0.357421875, "learning_rate": 0.00010498049250054276, "loss": 0.4193, "step": 50215 }, { "epoch": 1.2747648843141983, "grad_norm": 0.314453125, "learning_rate": 0.00010494879796007575, "loss": 0.4315, "step": 50220 }, { "epoch": 1.274891802363214, "grad_norm": 0.34375, "learning_rate": 0.00010491710563025332, "loss": 0.3688, "step": 50225 }, { "epoch": 1.2750187204122299, "grad_norm": 0.37109375, "learning_rate": 0.00010488541551263057, "loss": 0.4314, "step": 50230 }, { "epoch": 1.2751456384612456, "grad_norm": 0.37109375, "learning_rate": 0.0001048537276087625, "loss": 0.4167, "step": 50235 }, { "epoch": 1.2752725565102614, "grad_norm": 0.341796875, "learning_rate": 0.00010482204192020401, "loss": 0.4246, "step": 50240 }, { "epoch": 1.2753994745592772, "grad_norm": 0.37109375, "learning_rate": 0.00010479035844850995, "loss": 0.4313, "step": 50245 }, { "epoch": 1.2755263926082927, "grad_norm": 0.322265625, "learning_rate": 0.00010475867719523499, "loss": 0.3961, "step": 50250 }, { "epoch": 1.2756533106573085, "grad_norm": 0.349609375, "learning_rate": 0.00010472699816193367, "loss": 0.4222, "step": 50255 }, { "epoch": 1.2757802287063242, "grad_norm": 0.3515625, "learning_rate": 0.00010469532135016054, "loss": 0.3994, "step": 50260 }, { "epoch": 1.27590714675534, "grad_norm": 0.35546875, "learning_rate": 0.00010466364676146993, "loss": 0.4299, "step": 50265 }, { "epoch": 1.2760340648043558, "grad_norm": 0.353515625, "learning_rate": 0.00010463197439741616, "loss": 0.422, "step": 50270 }, { "epoch": 1.2761609828533715, "grad_norm": 0.341796875, "learning_rate": 0.00010460030425955331, "loss": 0.3737, "step": 50275 }, { "epoch": 1.2762879009023873, "grad_norm": 0.34765625, "learning_rate": 0.00010456863634943546, "loss": 0.4115, "step": 50280 }, { "epoch": 1.276414818951403, "grad_norm": 0.33203125, "learning_rate": 0.00010453697066861653, "loss": 0.4093, "step": 50285 }, { "epoch": 1.2765417370004188, "grad_norm": 0.353515625, "learning_rate": 0.00010450530721865033, "loss": 0.4113, "step": 50290 }, { "epoch": 1.2766686550494346, "grad_norm": 0.37109375, "learning_rate": 0.00010447364600109058, "loss": 0.4209, "step": 50295 }, { "epoch": 1.2767955730984504, "grad_norm": 0.365234375, "learning_rate": 0.00010444198701749086, "loss": 0.4159, "step": 50300 }, { "epoch": 1.2769224911474661, "grad_norm": 0.33984375, "learning_rate": 0.00010441033026940472, "loss": 0.39, "step": 50305 }, { "epoch": 1.277049409196482, "grad_norm": 0.34765625, "learning_rate": 0.00010437867575838552, "loss": 0.4235, "step": 50310 }, { "epoch": 1.2771763272454977, "grad_norm": 0.34765625, "learning_rate": 0.00010434702348598652, "loss": 0.4084, "step": 50315 }, { "epoch": 1.2773032452945134, "grad_norm": 0.384765625, "learning_rate": 0.00010431537345376091, "loss": 0.4147, "step": 50320 }, { "epoch": 1.2774301633435292, "grad_norm": 0.349609375, "learning_rate": 0.00010428372566326164, "loss": 0.4346, "step": 50325 }, { "epoch": 1.2775570813925448, "grad_norm": 0.349609375, "learning_rate": 0.00010425208011604182, "loss": 0.4214, "step": 50330 }, { "epoch": 1.2776839994415605, "grad_norm": 0.341796875, "learning_rate": 0.00010422043681365418, "loss": 0.4028, "step": 50335 }, { "epoch": 1.2778109174905763, "grad_norm": 0.34375, "learning_rate": 0.0001041887957576515, "loss": 0.4183, "step": 50340 }, { "epoch": 1.277937835539592, "grad_norm": 0.359375, "learning_rate": 0.00010415715694958632, "loss": 0.4332, "step": 50345 }, { "epoch": 1.2780647535886078, "grad_norm": 0.37109375, "learning_rate": 0.00010412552039101123, "loss": 0.4294, "step": 50350 }, { "epoch": 1.2781916716376236, "grad_norm": 0.330078125, "learning_rate": 0.00010409388608347856, "loss": 0.4178, "step": 50355 }, { "epoch": 1.2783185896866394, "grad_norm": 0.3515625, "learning_rate": 0.00010406225402854064, "loss": 0.4182, "step": 50360 }, { "epoch": 1.2784455077356551, "grad_norm": 0.341796875, "learning_rate": 0.00010403062422774959, "loss": 0.4358, "step": 50365 }, { "epoch": 1.278572425784671, "grad_norm": 0.34765625, "learning_rate": 0.00010399899668265751, "loss": 0.4226, "step": 50370 }, { "epoch": 1.2786993438336867, "grad_norm": 0.35546875, "learning_rate": 0.00010396737139481631, "loss": 0.4523, "step": 50375 }, { "epoch": 1.2788262618827022, "grad_norm": 0.345703125, "learning_rate": 0.00010393574836577788, "loss": 0.4052, "step": 50380 }, { "epoch": 1.278953179931718, "grad_norm": 0.369140625, "learning_rate": 0.00010390412759709389, "loss": 0.4065, "step": 50385 }, { "epoch": 1.2790800979807337, "grad_norm": 0.353515625, "learning_rate": 0.00010387250909031603, "loss": 0.384, "step": 50390 }, { "epoch": 1.2792070160297495, "grad_norm": 0.40625, "learning_rate": 0.00010384089284699578, "loss": 0.3972, "step": 50395 }, { "epoch": 1.2793339340787653, "grad_norm": 0.36328125, "learning_rate": 0.00010380927886868451, "loss": 0.4026, "step": 50400 }, { "epoch": 1.279460852127781, "grad_norm": 0.333984375, "learning_rate": 0.00010377766715693356, "loss": 0.3794, "step": 50405 }, { "epoch": 1.2795877701767968, "grad_norm": 0.345703125, "learning_rate": 0.00010374605771329407, "loss": 0.4121, "step": 50410 }, { "epoch": 1.2797146882258126, "grad_norm": 0.34765625, "learning_rate": 0.00010371445053931711, "loss": 0.4054, "step": 50415 }, { "epoch": 1.2798416062748283, "grad_norm": 0.33984375, "learning_rate": 0.00010368284563655355, "loss": 0.4182, "step": 50420 }, { "epoch": 1.2799685243238441, "grad_norm": 0.3515625, "learning_rate": 0.00010365124300655439, "loss": 0.4196, "step": 50425 }, { "epoch": 1.2800954423728599, "grad_norm": 0.3515625, "learning_rate": 0.00010361964265087025, "loss": 0.4092, "step": 50430 }, { "epoch": 1.2802223604218756, "grad_norm": 0.345703125, "learning_rate": 0.00010358804457105176, "loss": 0.4001, "step": 50435 }, { "epoch": 1.2803492784708914, "grad_norm": 0.32421875, "learning_rate": 0.00010355644876864945, "loss": 0.4086, "step": 50440 }, { "epoch": 1.2804761965199072, "grad_norm": 0.3515625, "learning_rate": 0.00010352485524521369, "loss": 0.4387, "step": 50445 }, { "epoch": 1.280603114568923, "grad_norm": 0.3359375, "learning_rate": 0.0001034932640022947, "loss": 0.3987, "step": 50450 }, { "epoch": 1.2807300326179387, "grad_norm": 0.359375, "learning_rate": 0.0001034616750414428, "loss": 0.4383, "step": 50455 }, { "epoch": 1.2808569506669545, "grad_norm": 0.36328125, "learning_rate": 0.00010343008836420796, "loss": 0.4093, "step": 50460 }, { "epoch": 1.28098386871597, "grad_norm": 0.3203125, "learning_rate": 0.0001033985039721401, "loss": 0.3835, "step": 50465 }, { "epoch": 1.2811107867649858, "grad_norm": 0.357421875, "learning_rate": 0.00010336692186678911, "loss": 0.4171, "step": 50470 }, { "epoch": 1.2812377048140016, "grad_norm": 0.341796875, "learning_rate": 0.0001033353420497047, "loss": 0.3988, "step": 50475 }, { "epoch": 1.2813646228630173, "grad_norm": 0.345703125, "learning_rate": 0.00010330376452243648, "loss": 0.417, "step": 50480 }, { "epoch": 1.281491540912033, "grad_norm": 0.359375, "learning_rate": 0.0001032721892865339, "loss": 0.4094, "step": 50485 }, { "epoch": 1.2816184589610489, "grad_norm": 0.365234375, "learning_rate": 0.00010324061634354641, "loss": 0.3978, "step": 50490 }, { "epoch": 1.2817453770100646, "grad_norm": 0.337890625, "learning_rate": 0.00010320904569502324, "loss": 0.3932, "step": 50495 }, { "epoch": 1.2818722950590804, "grad_norm": 0.35546875, "learning_rate": 0.00010317747734251352, "loss": 0.4087, "step": 50500 }, { "epoch": 1.2819992131080962, "grad_norm": 0.34765625, "learning_rate": 0.00010314591128756637, "loss": 0.4109, "step": 50505 }, { "epoch": 1.2821261311571117, "grad_norm": 0.294921875, "learning_rate": 0.00010311434753173068, "loss": 0.3917, "step": 50510 }, { "epoch": 1.2822530492061275, "grad_norm": 0.349609375, "learning_rate": 0.00010308278607655523, "loss": 0.4268, "step": 50515 }, { "epoch": 1.2823799672551432, "grad_norm": 0.349609375, "learning_rate": 0.00010305122692358885, "loss": 0.3973, "step": 50520 }, { "epoch": 1.282506885304159, "grad_norm": 0.3515625, "learning_rate": 0.00010301967007438004, "loss": 0.4139, "step": 50525 }, { "epoch": 1.2826338033531748, "grad_norm": 0.345703125, "learning_rate": 0.00010298811553047733, "loss": 0.3918, "step": 50530 }, { "epoch": 1.2827607214021906, "grad_norm": 0.35546875, "learning_rate": 0.00010295656329342905, "loss": 0.4033, "step": 50535 }, { "epoch": 1.2828876394512063, "grad_norm": 0.35546875, "learning_rate": 0.00010292501336478346, "loss": 0.4203, "step": 50540 }, { "epoch": 1.283014557500222, "grad_norm": 0.359375, "learning_rate": 0.0001028934657460887, "loss": 0.4068, "step": 50545 }, { "epoch": 1.2831414755492379, "grad_norm": 0.353515625, "learning_rate": 0.00010286192043889283, "loss": 0.4203, "step": 50550 }, { "epoch": 1.2832683935982536, "grad_norm": 0.330078125, "learning_rate": 0.00010283037744474374, "loss": 0.4175, "step": 50555 }, { "epoch": 1.2833953116472694, "grad_norm": 0.3515625, "learning_rate": 0.00010279883676518927, "loss": 0.3996, "step": 50560 }, { "epoch": 1.2835222296962852, "grad_norm": 0.36328125, "learning_rate": 0.00010276729840177707, "loss": 0.4205, "step": 50565 }, { "epoch": 1.283649147745301, "grad_norm": 0.337890625, "learning_rate": 0.0001027357623560547, "loss": 0.4261, "step": 50570 }, { "epoch": 1.2837760657943167, "grad_norm": 0.35546875, "learning_rate": 0.00010270422862956959, "loss": 0.4325, "step": 50575 }, { "epoch": 1.2839029838433325, "grad_norm": 0.34375, "learning_rate": 0.00010267269722386921, "loss": 0.4303, "step": 50580 }, { "epoch": 1.2840299018923482, "grad_norm": 0.3671875, "learning_rate": 0.00010264116814050072, "loss": 0.4187, "step": 50585 }, { "epoch": 1.284156819941364, "grad_norm": 0.3359375, "learning_rate": 0.00010260964138101121, "loss": 0.396, "step": 50590 }, { "epoch": 1.2842837379903795, "grad_norm": 0.34765625, "learning_rate": 0.00010257811694694776, "loss": 0.4058, "step": 50595 }, { "epoch": 1.2844106560393953, "grad_norm": 0.3515625, "learning_rate": 0.00010254659483985721, "loss": 0.4246, "step": 50600 }, { "epoch": 1.284537574088411, "grad_norm": 0.359375, "learning_rate": 0.00010251507506128636, "loss": 0.4374, "step": 50605 }, { "epoch": 1.2846644921374268, "grad_norm": 0.341796875, "learning_rate": 0.00010248355761278186, "loss": 0.419, "step": 50610 }, { "epoch": 1.2847914101864426, "grad_norm": 0.3671875, "learning_rate": 0.00010245204249589026, "loss": 0.4145, "step": 50615 }, { "epoch": 1.2849183282354584, "grad_norm": 0.359375, "learning_rate": 0.00010242052971215798, "loss": 0.3989, "step": 50620 }, { "epoch": 1.2850452462844741, "grad_norm": 0.357421875, "learning_rate": 0.00010238901926313136, "loss": 0.4289, "step": 50625 }, { "epoch": 1.28517216433349, "grad_norm": 0.326171875, "learning_rate": 0.0001023575111503566, "loss": 1.1256, "step": 50630 }, { "epoch": 1.2852990823825057, "grad_norm": 0.37109375, "learning_rate": 0.0001023260053753798, "loss": 0.4222, "step": 50635 }, { "epoch": 1.2854260004315214, "grad_norm": 0.361328125, "learning_rate": 0.00010229450193974684, "loss": 0.3931, "step": 50640 }, { "epoch": 1.285552918480537, "grad_norm": 0.349609375, "learning_rate": 0.00010226300084500376, "loss": 0.4009, "step": 50645 }, { "epoch": 1.2856798365295528, "grad_norm": 0.302734375, "learning_rate": 0.0001022315020926962, "loss": 0.379, "step": 50650 }, { "epoch": 1.2858067545785685, "grad_norm": 0.35546875, "learning_rate": 0.0001022000056843698, "loss": 0.4206, "step": 50655 }, { "epoch": 1.2859336726275843, "grad_norm": 0.337890625, "learning_rate": 0.00010216851162157009, "loss": 0.4153, "step": 50660 }, { "epoch": 1.2860605906766, "grad_norm": 0.3515625, "learning_rate": 0.00010213701990584242, "loss": 0.4294, "step": 50665 }, { "epoch": 1.2861875087256158, "grad_norm": 0.359375, "learning_rate": 0.00010210553053873215, "loss": 0.4073, "step": 50670 }, { "epoch": 1.2863144267746316, "grad_norm": 0.33203125, "learning_rate": 0.00010207404352178443, "loss": 0.4061, "step": 50675 }, { "epoch": 1.2864413448236474, "grad_norm": 0.34375, "learning_rate": 0.0001020425588565443, "loss": 0.4019, "step": 50680 }, { "epoch": 1.2865682628726631, "grad_norm": 0.3046875, "learning_rate": 0.0001020110765445567, "loss": 0.4195, "step": 50685 }, { "epoch": 1.286695180921679, "grad_norm": 0.35546875, "learning_rate": 0.00010197959658736646, "loss": 0.4336, "step": 50690 }, { "epoch": 1.2868220989706947, "grad_norm": 0.361328125, "learning_rate": 0.00010194811898651828, "loss": 0.4222, "step": 50695 }, { "epoch": 1.2869490170197104, "grad_norm": 0.328125, "learning_rate": 0.00010191664374355669, "loss": 0.4279, "step": 50700 }, { "epoch": 1.2870759350687262, "grad_norm": 0.34765625, "learning_rate": 0.00010188517086002633, "loss": 0.4103, "step": 50705 }, { "epoch": 1.287202853117742, "grad_norm": 0.34375, "learning_rate": 0.00010185370033747142, "loss": 0.4198, "step": 50710 }, { "epoch": 1.2873297711667577, "grad_norm": 0.35546875, "learning_rate": 0.0001018222321774363, "loss": 0.4368, "step": 50715 }, { "epoch": 1.2874566892157735, "grad_norm": 0.345703125, "learning_rate": 0.00010179076638146503, "loss": 0.4428, "step": 50720 }, { "epoch": 1.287583607264789, "grad_norm": 0.35546875, "learning_rate": 0.00010175930295110168, "loss": 0.411, "step": 50725 }, { "epoch": 1.2877105253138048, "grad_norm": 1.265625, "learning_rate": 0.00010172784188789009, "loss": 0.4377, "step": 50730 }, { "epoch": 1.2878374433628206, "grad_norm": 0.345703125, "learning_rate": 0.00010169638319337408, "loss": 0.4084, "step": 50735 }, { "epoch": 1.2879643614118363, "grad_norm": 0.34765625, "learning_rate": 0.0001016649268690973, "loss": 0.4103, "step": 50740 }, { "epoch": 1.2880912794608521, "grad_norm": 0.34375, "learning_rate": 0.00010163347291660329, "loss": 0.4166, "step": 50745 }, { "epoch": 1.2882181975098679, "grad_norm": 0.359375, "learning_rate": 0.00010160202133743553, "loss": 0.4095, "step": 50750 }, { "epoch": 1.2883451155588836, "grad_norm": 0.357421875, "learning_rate": 0.00010157057213313729, "loss": 0.4395, "step": 50755 }, { "epoch": 1.2884720336078994, "grad_norm": 0.34765625, "learning_rate": 0.00010153912530525176, "loss": 0.4341, "step": 50760 }, { "epoch": 1.2885989516569152, "grad_norm": 0.3359375, "learning_rate": 0.00010150768085532203, "loss": 0.4039, "step": 50765 }, { "epoch": 1.288725869705931, "grad_norm": 0.318359375, "learning_rate": 0.00010147623878489114, "loss": 0.3947, "step": 50770 }, { "epoch": 1.2888527877549465, "grad_norm": 0.32421875, "learning_rate": 0.00010144479909550187, "loss": 0.3902, "step": 50775 }, { "epoch": 1.2889797058039623, "grad_norm": 0.341796875, "learning_rate": 0.00010141336178869697, "loss": 0.4362, "step": 50780 }, { "epoch": 1.289106623852978, "grad_norm": 0.34375, "learning_rate": 0.00010138192686601905, "loss": 0.4329, "step": 50785 }, { "epoch": 1.2892335419019938, "grad_norm": 0.333984375, "learning_rate": 0.00010135049432901058, "loss": 0.3818, "step": 50790 }, { "epoch": 1.2893604599510096, "grad_norm": 0.345703125, "learning_rate": 0.00010131906417921401, "loss": 0.417, "step": 50795 }, { "epoch": 1.2894873780000253, "grad_norm": 0.298828125, "learning_rate": 0.00010128763641817157, "loss": 0.4047, "step": 50800 }, { "epoch": 1.289614296049041, "grad_norm": 0.357421875, "learning_rate": 0.0001012562110474254, "loss": 0.4225, "step": 50805 }, { "epoch": 1.2897412140980569, "grad_norm": 0.361328125, "learning_rate": 0.00010122478806851754, "loss": 0.4547, "step": 50810 }, { "epoch": 1.2898681321470726, "grad_norm": 0.357421875, "learning_rate": 0.0001011933674829899, "loss": 0.4274, "step": 50815 }, { "epoch": 1.2899950501960884, "grad_norm": 0.365234375, "learning_rate": 0.00010116194929238425, "loss": 0.4219, "step": 50820 }, { "epoch": 1.2901219682451042, "grad_norm": 0.365234375, "learning_rate": 0.00010113053349824233, "loss": 0.4015, "step": 50825 }, { "epoch": 1.29024888629412, "grad_norm": 0.341796875, "learning_rate": 0.00010109912010210562, "loss": 0.4288, "step": 50830 }, { "epoch": 1.2903758043431357, "grad_norm": 0.33984375, "learning_rate": 0.0001010677091055156, "loss": 0.4237, "step": 50835 }, { "epoch": 1.2905027223921515, "grad_norm": 0.328125, "learning_rate": 0.00010103630051001367, "loss": 0.3884, "step": 50840 }, { "epoch": 1.2906296404411672, "grad_norm": 0.373046875, "learning_rate": 0.00010100489431714093, "loss": 0.4153, "step": 50845 }, { "epoch": 1.290756558490183, "grad_norm": 0.36328125, "learning_rate": 0.00010097349052843853, "loss": 0.4459, "step": 50850 }, { "epoch": 1.2908834765391988, "grad_norm": 0.36328125, "learning_rate": 0.00010094208914544742, "loss": 0.4425, "step": 50855 }, { "epoch": 1.2910103945882143, "grad_norm": 0.333984375, "learning_rate": 0.00010091069016970845, "loss": 0.413, "step": 50860 }, { "epoch": 1.29113731263723, "grad_norm": 0.337890625, "learning_rate": 0.00010087929360276234, "loss": 0.4404, "step": 50865 }, { "epoch": 1.2912642306862459, "grad_norm": 0.341796875, "learning_rate": 0.00010084789944614973, "loss": 0.4204, "step": 50870 }, { "epoch": 1.2913911487352616, "grad_norm": 0.330078125, "learning_rate": 0.00010081650770141114, "loss": 0.4114, "step": 50875 }, { "epoch": 1.2915180667842774, "grad_norm": 0.37109375, "learning_rate": 0.00010078511837008692, "loss": 0.389, "step": 50880 }, { "epoch": 1.2916449848332932, "grad_norm": 0.35546875, "learning_rate": 0.00010075373145371734, "loss": 0.406, "step": 50885 }, { "epoch": 1.291771902882309, "grad_norm": 0.32421875, "learning_rate": 0.00010072234695384246, "loss": 0.4023, "step": 50890 }, { "epoch": 1.2918988209313247, "grad_norm": 0.361328125, "learning_rate": 0.00010069096487200246, "loss": 0.4121, "step": 50895 }, { "epoch": 1.2920257389803405, "grad_norm": 0.318359375, "learning_rate": 0.00010065958520973716, "loss": 0.3999, "step": 50900 }, { "epoch": 1.2921526570293562, "grad_norm": 0.34765625, "learning_rate": 0.00010062820796858638, "loss": 0.4442, "step": 50905 }, { "epoch": 1.2922795750783718, "grad_norm": 0.34375, "learning_rate": 0.00010059683315008973, "loss": 0.4217, "step": 50910 }, { "epoch": 1.2924064931273875, "grad_norm": 0.3359375, "learning_rate": 0.00010056546075578683, "loss": 0.4234, "step": 50915 }, { "epoch": 1.2925334111764033, "grad_norm": 0.375, "learning_rate": 0.00010053409078721707, "loss": 0.4098, "step": 50920 }, { "epoch": 1.292660329225419, "grad_norm": 0.341796875, "learning_rate": 0.00010050272324591979, "loss": 0.389, "step": 50925 }, { "epoch": 1.2927872472744348, "grad_norm": 0.357421875, "learning_rate": 0.00010047135813343413, "loss": 0.431, "step": 50930 }, { "epoch": 1.2929141653234506, "grad_norm": 0.337890625, "learning_rate": 0.00010043999545129922, "loss": 0.422, "step": 50935 }, { "epoch": 1.2930410833724664, "grad_norm": 0.357421875, "learning_rate": 0.000100408635201054, "loss": 0.4431, "step": 50940 }, { "epoch": 1.2931680014214821, "grad_norm": 0.37109375, "learning_rate": 0.00010037727738423725, "loss": 0.4384, "step": 50945 }, { "epoch": 1.293294919470498, "grad_norm": 0.322265625, "learning_rate": 0.00010034592200238777, "loss": 0.402, "step": 50950 }, { "epoch": 1.2934218375195137, "grad_norm": 0.3671875, "learning_rate": 0.00010031456905704404, "loss": 0.4143, "step": 50955 }, { "epoch": 1.2935487555685294, "grad_norm": 0.388671875, "learning_rate": 0.0001002832185497447, "loss": 0.4128, "step": 50960 }, { "epoch": 1.2936756736175452, "grad_norm": 0.34765625, "learning_rate": 0.00010025187048202801, "loss": 0.3934, "step": 50965 }, { "epoch": 1.293802591666561, "grad_norm": 0.396484375, "learning_rate": 0.00010022052485543221, "loss": 0.4506, "step": 50970 }, { "epoch": 1.2939295097155767, "grad_norm": 0.33203125, "learning_rate": 0.00010018918167149543, "loss": 0.4151, "step": 50975 }, { "epoch": 1.2940564277645925, "grad_norm": 0.369140625, "learning_rate": 0.00010015784093175568, "loss": 0.4169, "step": 50980 }, { "epoch": 1.2941833458136083, "grad_norm": 0.3359375, "learning_rate": 0.00010012650263775082, "loss": 0.4426, "step": 50985 }, { "epoch": 1.2943102638626238, "grad_norm": 0.353515625, "learning_rate": 0.00010009516679101856, "loss": 0.4243, "step": 50990 }, { "epoch": 1.2944371819116396, "grad_norm": 0.35546875, "learning_rate": 0.00010006383339309662, "loss": 0.4117, "step": 50995 }, { "epoch": 1.2945640999606554, "grad_norm": 0.3359375, "learning_rate": 0.0001000325024455225, "loss": 0.4316, "step": 51000 }, { "epoch": 1.2946910180096711, "grad_norm": 0.37890625, "learning_rate": 0.00010000117394983356, "loss": 0.4503, "step": 51005 }, { "epoch": 1.294817936058687, "grad_norm": 0.337890625, "learning_rate": 9.996984790756712e-05, "loss": 0.442, "step": 51010 }, { "epoch": 1.2949448541077027, "grad_norm": 0.33984375, "learning_rate": 9.993852432026024e-05, "loss": 0.3996, "step": 51015 }, { "epoch": 1.2950717721567184, "grad_norm": 0.35546875, "learning_rate": 9.990720318945009e-05, "loss": 0.4114, "step": 51020 }, { "epoch": 1.2951986902057342, "grad_norm": 0.37109375, "learning_rate": 9.987588451667352e-05, "loss": 0.4167, "step": 51025 }, { "epoch": 1.29532560825475, "grad_norm": 0.353515625, "learning_rate": 9.984456830346734e-05, "loss": 0.4287, "step": 51030 }, { "epoch": 1.2954525263037657, "grad_norm": 0.373046875, "learning_rate": 9.98132545513682e-05, "loss": 0.4096, "step": 51035 }, { "epoch": 1.2955794443527813, "grad_norm": 0.361328125, "learning_rate": 9.978194326191268e-05, "loss": 0.4204, "step": 51040 }, { "epoch": 1.295706362401797, "grad_norm": 0.341796875, "learning_rate": 9.975063443663719e-05, "loss": 0.3981, "step": 51045 }, { "epoch": 1.2958332804508128, "grad_norm": 0.33203125, "learning_rate": 9.971932807707806e-05, "loss": 0.4091, "step": 51050 }, { "epoch": 1.2959601984998286, "grad_norm": 0.341796875, "learning_rate": 9.968802418477148e-05, "loss": 0.4235, "step": 51055 }, { "epoch": 1.2960871165488443, "grad_norm": 0.3515625, "learning_rate": 9.965672276125351e-05, "loss": 0.4278, "step": 51060 }, { "epoch": 1.29621403459786, "grad_norm": 0.3671875, "learning_rate": 9.962542380806005e-05, "loss": 0.4331, "step": 51065 }, { "epoch": 1.2963409526468759, "grad_norm": 0.302734375, "learning_rate": 9.959412732672704e-05, "loss": 0.4146, "step": 51070 }, { "epoch": 1.2964678706958916, "grad_norm": 0.3515625, "learning_rate": 9.95628333187901e-05, "loss": 0.4423, "step": 51075 }, { "epoch": 1.2965947887449074, "grad_norm": 0.345703125, "learning_rate": 9.95315417857848e-05, "loss": 0.4197, "step": 51080 }, { "epoch": 1.2967217067939232, "grad_norm": 0.375, "learning_rate": 9.950025272924669e-05, "loss": 0.4088, "step": 51085 }, { "epoch": 1.296848624842939, "grad_norm": 0.33203125, "learning_rate": 9.946896615071105e-05, "loss": 0.4131, "step": 51090 }, { "epoch": 1.2969755428919547, "grad_norm": 0.357421875, "learning_rate": 9.943768205171311e-05, "loss": 0.4001, "step": 51095 }, { "epoch": 1.2971024609409705, "grad_norm": 0.361328125, "learning_rate": 9.9406400433788e-05, "loss": 0.4113, "step": 51100 }, { "epoch": 1.2972293789899862, "grad_norm": 0.3359375, "learning_rate": 9.937512129847063e-05, "loss": 0.4009, "step": 51105 }, { "epoch": 1.297356297039002, "grad_norm": 0.345703125, "learning_rate": 9.93438446472959e-05, "loss": 0.4434, "step": 51110 }, { "epoch": 1.2974832150880178, "grad_norm": 0.36328125, "learning_rate": 9.931257048179855e-05, "loss": 0.4371, "step": 51115 }, { "epoch": 1.2976101331370336, "grad_norm": 0.34765625, "learning_rate": 9.928129880351316e-05, "loss": 0.4258, "step": 51120 }, { "epoch": 1.297737051186049, "grad_norm": 0.37109375, "learning_rate": 9.925002961397426e-05, "loss": 0.4407, "step": 51125 }, { "epoch": 1.2978639692350649, "grad_norm": 0.36328125, "learning_rate": 9.921876291471618e-05, "loss": 0.4057, "step": 51130 }, { "epoch": 1.2979908872840806, "grad_norm": 0.34375, "learning_rate": 9.918749870727316e-05, "loss": 0.4233, "step": 51135 }, { "epoch": 1.2981178053330964, "grad_norm": 0.35546875, "learning_rate": 9.915623699317935e-05, "loss": 0.3912, "step": 51140 }, { "epoch": 1.2982447233821122, "grad_norm": 0.37109375, "learning_rate": 9.912497777396868e-05, "loss": 0.4174, "step": 51145 }, { "epoch": 1.298371641431128, "grad_norm": 0.35546875, "learning_rate": 9.909372105117516e-05, "loss": 0.4294, "step": 51150 }, { "epoch": 1.2984985594801437, "grad_norm": 0.376953125, "learning_rate": 9.906246682633241e-05, "loss": 0.4268, "step": 51155 }, { "epoch": 1.2986254775291595, "grad_norm": 0.345703125, "learning_rate": 9.903121510097416e-05, "loss": 0.4367, "step": 51160 }, { "epoch": 1.2987523955781752, "grad_norm": 0.361328125, "learning_rate": 9.899996587663387e-05, "loss": 0.4428, "step": 51165 }, { "epoch": 1.298879313627191, "grad_norm": 0.3359375, "learning_rate": 9.896871915484497e-05, "loss": 0.8415, "step": 51170 }, { "epoch": 1.2990062316762065, "grad_norm": 0.36328125, "learning_rate": 9.893747493714069e-05, "loss": 0.4413, "step": 51175 }, { "epoch": 1.2991331497252223, "grad_norm": 0.33984375, "learning_rate": 9.890623322505417e-05, "loss": 0.4078, "step": 51180 }, { "epoch": 1.299260067774238, "grad_norm": 0.3359375, "learning_rate": 9.887499402011842e-05, "loss": 0.4393, "step": 51185 }, { "epoch": 1.2993869858232538, "grad_norm": 0.357421875, "learning_rate": 9.884375732386634e-05, "loss": 0.4337, "step": 51190 }, { "epoch": 1.2995139038722696, "grad_norm": 0.3671875, "learning_rate": 9.88125231378307e-05, "loss": 0.421, "step": 51195 }, { "epoch": 1.2996408219212854, "grad_norm": 0.30859375, "learning_rate": 9.87812914635442e-05, "loss": 0.3957, "step": 51200 }, { "epoch": 1.2997677399703012, "grad_norm": 0.328125, "learning_rate": 9.875006230253925e-05, "loss": 0.4122, "step": 51205 }, { "epoch": 1.299894658019317, "grad_norm": 0.357421875, "learning_rate": 9.871883565634839e-05, "loss": 0.4135, "step": 51210 }, { "epoch": 1.3000215760683327, "grad_norm": 0.33984375, "learning_rate": 9.868761152650383e-05, "loss": 0.4133, "step": 51215 }, { "epoch": 1.3001484941173485, "grad_norm": 0.337890625, "learning_rate": 9.865638991453775e-05, "loss": 0.4074, "step": 51220 }, { "epoch": 1.3002754121663642, "grad_norm": 0.328125, "learning_rate": 9.862517082198214e-05, "loss": 0.3977, "step": 51225 }, { "epoch": 1.30040233021538, "grad_norm": 0.341796875, "learning_rate": 9.859395425036895e-05, "loss": 0.417, "step": 51230 }, { "epoch": 1.3005292482643958, "grad_norm": 0.365234375, "learning_rate": 9.856274020122992e-05, "loss": 0.408, "step": 51235 }, { "epoch": 1.3006561663134115, "grad_norm": 0.34375, "learning_rate": 9.853152867609677e-05, "loss": 0.4132, "step": 51240 }, { "epoch": 1.3007830843624273, "grad_norm": 0.3515625, "learning_rate": 9.850031967650099e-05, "loss": 0.4298, "step": 51245 }, { "epoch": 1.300910002411443, "grad_norm": 0.34765625, "learning_rate": 9.846911320397402e-05, "loss": 0.4196, "step": 51250 }, { "epoch": 1.3010369204604586, "grad_norm": 0.373046875, "learning_rate": 9.843790926004714e-05, "loss": 0.4009, "step": 51255 }, { "epoch": 1.3011638385094744, "grad_norm": 0.369140625, "learning_rate": 9.84067078462515e-05, "loss": 0.4503, "step": 51260 }, { "epoch": 1.3012907565584901, "grad_norm": 0.31640625, "learning_rate": 9.837550896411813e-05, "loss": 0.4006, "step": 51265 }, { "epoch": 1.301417674607506, "grad_norm": 0.35546875, "learning_rate": 9.834431261517799e-05, "loss": 0.4142, "step": 51270 }, { "epoch": 1.3015445926565217, "grad_norm": 0.34375, "learning_rate": 9.831311880096186e-05, "loss": 0.4107, "step": 51275 }, { "epoch": 1.3016715107055374, "grad_norm": 0.33203125, "learning_rate": 9.828192752300037e-05, "loss": 0.4039, "step": 51280 }, { "epoch": 1.3017984287545532, "grad_norm": 0.353515625, "learning_rate": 9.825073878282414e-05, "loss": 0.4269, "step": 51285 }, { "epoch": 1.301925346803569, "grad_norm": 0.373046875, "learning_rate": 9.821955258196355e-05, "loss": 0.407, "step": 51290 }, { "epoch": 1.3020522648525847, "grad_norm": 0.3203125, "learning_rate": 9.818836892194887e-05, "loss": 0.4132, "step": 51295 }, { "epoch": 1.3021791829016005, "grad_norm": 0.310546875, "learning_rate": 9.815718780431029e-05, "loss": 0.3716, "step": 51300 }, { "epoch": 1.302306100950616, "grad_norm": 0.349609375, "learning_rate": 9.812600923057785e-05, "loss": 0.4341, "step": 51305 }, { "epoch": 1.3024330189996318, "grad_norm": 0.34375, "learning_rate": 9.809483320228147e-05, "loss": 0.4448, "step": 51310 }, { "epoch": 1.3025599370486476, "grad_norm": 0.376953125, "learning_rate": 9.806365972095095e-05, "loss": 0.4278, "step": 51315 }, { "epoch": 1.3026868550976634, "grad_norm": 0.38671875, "learning_rate": 9.803248878811598e-05, "loss": 0.4398, "step": 51320 }, { "epoch": 1.3028137731466791, "grad_norm": 0.37109375, "learning_rate": 9.800132040530609e-05, "loss": 0.417, "step": 51325 }, { "epoch": 1.3029406911956949, "grad_norm": 0.345703125, "learning_rate": 9.797015457405058e-05, "loss": 0.4147, "step": 51330 }, { "epoch": 1.3030676092447107, "grad_norm": 0.376953125, "learning_rate": 9.793899129587897e-05, "loss": 0.4198, "step": 51335 }, { "epoch": 1.3031945272937264, "grad_norm": 0.341796875, "learning_rate": 9.790783057232032e-05, "loss": 0.4235, "step": 51340 }, { "epoch": 1.3033214453427422, "grad_norm": 0.326171875, "learning_rate": 9.787667240490367e-05, "loss": 0.4105, "step": 51345 }, { "epoch": 1.303448363391758, "grad_norm": 0.326171875, "learning_rate": 9.78455167951579e-05, "loss": 0.4225, "step": 51350 }, { "epoch": 1.3035752814407737, "grad_norm": 0.3671875, "learning_rate": 9.781436374461187e-05, "loss": 0.4262, "step": 51355 }, { "epoch": 1.3037021994897895, "grad_norm": 0.37109375, "learning_rate": 9.778321325479422e-05, "loss": 0.432, "step": 51360 }, { "epoch": 1.3038291175388053, "grad_norm": 0.369140625, "learning_rate": 9.775206532723351e-05, "loss": 0.4528, "step": 51365 }, { "epoch": 1.303956035587821, "grad_norm": 0.33984375, "learning_rate": 9.772091996345815e-05, "loss": 0.4059, "step": 51370 }, { "epoch": 1.3040829536368368, "grad_norm": 0.322265625, "learning_rate": 9.768977716499641e-05, "loss": 0.4037, "step": 51375 }, { "epoch": 1.3042098716858526, "grad_norm": 0.337890625, "learning_rate": 9.765863693337648e-05, "loss": 0.4325, "step": 51380 }, { "epoch": 1.3043367897348683, "grad_norm": 0.33203125, "learning_rate": 9.762749927012637e-05, "loss": 0.4049, "step": 51385 }, { "epoch": 1.3044637077838839, "grad_norm": 0.26953125, "learning_rate": 9.759636417677397e-05, "loss": 0.3917, "step": 51390 }, { "epoch": 1.3045906258328996, "grad_norm": 0.330078125, "learning_rate": 9.756523165484712e-05, "loss": 0.426, "step": 51395 }, { "epoch": 1.3047175438819154, "grad_norm": 0.33984375, "learning_rate": 9.753410170587349e-05, "loss": 0.4107, "step": 51400 }, { "epoch": 1.3048444619309312, "grad_norm": 0.33984375, "learning_rate": 9.750297433138057e-05, "loss": 0.4181, "step": 51405 }, { "epoch": 1.304971379979947, "grad_norm": 0.3515625, "learning_rate": 9.747184953289581e-05, "loss": 0.4188, "step": 51410 }, { "epoch": 1.3050982980289627, "grad_norm": 0.3515625, "learning_rate": 9.744072731194648e-05, "loss": 0.4112, "step": 51415 }, { "epoch": 1.3052252160779785, "grad_norm": 0.369140625, "learning_rate": 9.740960767005971e-05, "loss": 0.4529, "step": 51420 }, { "epoch": 1.3053521341269942, "grad_norm": 0.36328125, "learning_rate": 9.737849060876256e-05, "loss": 0.4362, "step": 51425 }, { "epoch": 1.30547905217601, "grad_norm": 0.34765625, "learning_rate": 9.734737612958191e-05, "loss": 0.4204, "step": 51430 }, { "epoch": 1.3056059702250258, "grad_norm": 0.357421875, "learning_rate": 9.73162642340445e-05, "loss": 0.4214, "step": 51435 }, { "epoch": 1.3057328882740413, "grad_norm": 0.333984375, "learning_rate": 9.728515492367705e-05, "loss": 0.4093, "step": 51440 }, { "epoch": 1.305859806323057, "grad_norm": 0.35546875, "learning_rate": 9.725404820000605e-05, "loss": 0.4329, "step": 51445 }, { "epoch": 1.3059867243720729, "grad_norm": 0.357421875, "learning_rate": 9.722294406455788e-05, "loss": 0.4332, "step": 51450 }, { "epoch": 1.3061136424210886, "grad_norm": 0.353515625, "learning_rate": 9.719184251885885e-05, "loss": 0.4203, "step": 51455 }, { "epoch": 1.3062405604701044, "grad_norm": 0.361328125, "learning_rate": 9.716074356443501e-05, "loss": 0.4396, "step": 51460 }, { "epoch": 1.3063674785191202, "grad_norm": 0.376953125, "learning_rate": 9.712964720281247e-05, "loss": 0.4345, "step": 51465 }, { "epoch": 1.306494396568136, "grad_norm": 0.37109375, "learning_rate": 9.709855343551708e-05, "loss": 0.3964, "step": 51470 }, { "epoch": 1.3066213146171517, "grad_norm": 0.359375, "learning_rate": 9.706746226407462e-05, "loss": 0.3816, "step": 51475 }, { "epoch": 1.3067482326661675, "grad_norm": 0.322265625, "learning_rate": 9.703637369001067e-05, "loss": 0.416, "step": 51480 }, { "epoch": 1.3068751507151832, "grad_norm": 0.353515625, "learning_rate": 9.700528771485082e-05, "loss": 0.3987, "step": 51485 }, { "epoch": 1.307002068764199, "grad_norm": 0.37109375, "learning_rate": 9.697420434012036e-05, "loss": 0.4133, "step": 51490 }, { "epoch": 1.3071289868132148, "grad_norm": 0.357421875, "learning_rate": 9.694312356734454e-05, "loss": 0.4229, "step": 51495 }, { "epoch": 1.3072559048622305, "grad_norm": 0.376953125, "learning_rate": 9.691204539804857e-05, "loss": 0.4056, "step": 51500 }, { "epoch": 1.3073828229112463, "grad_norm": 0.359375, "learning_rate": 9.688096983375735e-05, "loss": 0.4223, "step": 51505 }, { "epoch": 1.307509740960262, "grad_norm": 0.375, "learning_rate": 9.684989687599579e-05, "loss": 0.4281, "step": 51510 }, { "epoch": 1.3076366590092778, "grad_norm": 0.330078125, "learning_rate": 9.681882652628861e-05, "loss": 0.3942, "step": 51515 }, { "epoch": 1.3077635770582934, "grad_norm": 0.365234375, "learning_rate": 9.67877587861604e-05, "loss": 0.4222, "step": 51520 }, { "epoch": 1.3078904951073091, "grad_norm": 0.34375, "learning_rate": 9.675669365713567e-05, "loss": 0.3869, "step": 51525 }, { "epoch": 1.308017413156325, "grad_norm": 0.33203125, "learning_rate": 9.672563114073881e-05, "loss": 0.3857, "step": 51530 }, { "epoch": 1.3081443312053407, "grad_norm": 0.341796875, "learning_rate": 9.669457123849401e-05, "loss": 0.4306, "step": 51535 }, { "epoch": 1.3082712492543564, "grad_norm": 0.333984375, "learning_rate": 9.666351395192538e-05, "loss": 0.3945, "step": 51540 }, { "epoch": 1.3083981673033722, "grad_norm": 0.345703125, "learning_rate": 9.663245928255686e-05, "loss": 0.4306, "step": 51545 }, { "epoch": 1.308525085352388, "grad_norm": 0.322265625, "learning_rate": 9.660140723191231e-05, "loss": 0.415, "step": 51550 }, { "epoch": 1.3086520034014038, "grad_norm": 0.33984375, "learning_rate": 9.657035780151539e-05, "loss": 0.3808, "step": 51555 }, { "epoch": 1.3087789214504195, "grad_norm": 0.291015625, "learning_rate": 9.653931099288977e-05, "loss": 0.3887, "step": 51560 }, { "epoch": 1.3089058394994353, "grad_norm": 0.34765625, "learning_rate": 9.650826680755885e-05, "loss": 0.4069, "step": 51565 }, { "epoch": 1.3090327575484508, "grad_norm": 0.326171875, "learning_rate": 9.647722524704601e-05, "loss": 0.3893, "step": 51570 }, { "epoch": 1.3091596755974666, "grad_norm": 0.38671875, "learning_rate": 9.644618631287437e-05, "loss": 0.431, "step": 51575 }, { "epoch": 1.3092865936464824, "grad_norm": 0.380859375, "learning_rate": 9.641515000656705e-05, "loss": 0.4443, "step": 51580 }, { "epoch": 1.3094135116954981, "grad_norm": 0.330078125, "learning_rate": 9.638411632964692e-05, "loss": 0.4108, "step": 51585 }, { "epoch": 1.309540429744514, "grad_norm": 0.349609375, "learning_rate": 9.635308528363687e-05, "loss": 0.428, "step": 51590 }, { "epoch": 1.3096673477935297, "grad_norm": 0.3515625, "learning_rate": 9.632205687005958e-05, "loss": 0.4201, "step": 51595 }, { "epoch": 1.3097942658425454, "grad_norm": 0.33984375, "learning_rate": 9.629103109043756e-05, "loss": 0.4271, "step": 51600 }, { "epoch": 1.3099211838915612, "grad_norm": 0.349609375, "learning_rate": 9.626000794629327e-05, "loss": 0.3969, "step": 51605 }, { "epoch": 1.310048101940577, "grad_norm": 0.33203125, "learning_rate": 9.622898743914899e-05, "loss": 0.4059, "step": 51610 }, { "epoch": 1.3101750199895927, "grad_norm": 0.365234375, "learning_rate": 9.619796957052687e-05, "loss": 0.4053, "step": 51615 }, { "epoch": 1.3103019380386085, "grad_norm": 0.35546875, "learning_rate": 9.616695434194893e-05, "loss": 0.4199, "step": 51620 }, { "epoch": 1.3104288560876243, "grad_norm": 0.330078125, "learning_rate": 9.613594175493712e-05, "loss": 0.4119, "step": 51625 }, { "epoch": 1.31055577413664, "grad_norm": 0.341796875, "learning_rate": 9.61049318110132e-05, "loss": 0.4035, "step": 51630 }, { "epoch": 1.3106826921856558, "grad_norm": 0.345703125, "learning_rate": 9.607392451169875e-05, "loss": 0.4232, "step": 51635 }, { "epoch": 1.3108096102346716, "grad_norm": 0.36328125, "learning_rate": 9.60429198585154e-05, "loss": 0.3973, "step": 51640 }, { "epoch": 1.3109365282836873, "grad_norm": 0.33203125, "learning_rate": 9.601191785298447e-05, "loss": 0.4045, "step": 51645 }, { "epoch": 1.311063446332703, "grad_norm": 0.35546875, "learning_rate": 9.598091849662716e-05, "loss": 0.415, "step": 51650 }, { "epoch": 1.3111903643817187, "grad_norm": 0.318359375, "learning_rate": 9.594992179096471e-05, "loss": 0.426, "step": 51655 }, { "epoch": 1.3113172824307344, "grad_norm": 0.31640625, "learning_rate": 9.591892773751809e-05, "loss": 0.3911, "step": 51660 }, { "epoch": 1.3114442004797502, "grad_norm": 0.341796875, "learning_rate": 9.588793633780812e-05, "loss": 0.397, "step": 51665 }, { "epoch": 1.311571118528766, "grad_norm": 0.365234375, "learning_rate": 9.585694759335559e-05, "loss": 0.4272, "step": 51670 }, { "epoch": 1.3116980365777817, "grad_norm": 0.345703125, "learning_rate": 9.582596150568105e-05, "loss": 0.424, "step": 51675 }, { "epoch": 1.3118249546267975, "grad_norm": 0.330078125, "learning_rate": 9.579497807630498e-05, "loss": 0.423, "step": 51680 }, { "epoch": 1.3119518726758133, "grad_norm": 0.35546875, "learning_rate": 9.576399730674778e-05, "loss": 0.4346, "step": 51685 }, { "epoch": 1.312078790724829, "grad_norm": 0.357421875, "learning_rate": 9.57330191985296e-05, "loss": 0.4229, "step": 51690 }, { "epoch": 1.3122057087738448, "grad_norm": 0.34765625, "learning_rate": 9.570204375317057e-05, "loss": 0.4103, "step": 51695 }, { "epoch": 1.3123326268228606, "grad_norm": 0.388671875, "learning_rate": 9.567107097219065e-05, "loss": 0.4407, "step": 51700 }, { "epoch": 1.312459544871876, "grad_norm": 0.330078125, "learning_rate": 9.56401008571096e-05, "loss": 0.3965, "step": 51705 }, { "epoch": 1.3125864629208919, "grad_norm": 0.361328125, "learning_rate": 9.560913340944709e-05, "loss": 0.4227, "step": 51710 }, { "epoch": 1.3127133809699076, "grad_norm": 0.32421875, "learning_rate": 9.55781686307228e-05, "loss": 0.4174, "step": 51715 }, { "epoch": 1.3128402990189234, "grad_norm": 0.359375, "learning_rate": 9.55472065224561e-05, "loss": 0.4192, "step": 51720 }, { "epoch": 1.3129672170679392, "grad_norm": 0.35546875, "learning_rate": 9.551624708616626e-05, "loss": 0.4007, "step": 51725 }, { "epoch": 1.313094135116955, "grad_norm": 0.357421875, "learning_rate": 9.54852903233725e-05, "loss": 0.4305, "step": 51730 }, { "epoch": 1.3132210531659707, "grad_norm": 0.34765625, "learning_rate": 9.545433623559384e-05, "loss": 0.3958, "step": 51735 }, { "epoch": 1.3133479712149865, "grad_norm": 0.337890625, "learning_rate": 9.542338482434914e-05, "loss": 0.4207, "step": 51740 }, { "epoch": 1.3134748892640022, "grad_norm": 0.34375, "learning_rate": 9.539243609115724e-05, "loss": 0.4318, "step": 51745 }, { "epoch": 1.313601807313018, "grad_norm": 0.333984375, "learning_rate": 9.536149003753674e-05, "loss": 0.3769, "step": 51750 }, { "epoch": 1.3137287253620338, "grad_norm": 0.353515625, "learning_rate": 9.533054666500612e-05, "loss": 0.4398, "step": 51755 }, { "epoch": 1.3138556434110495, "grad_norm": 0.361328125, "learning_rate": 9.529960597508383e-05, "loss": 0.3904, "step": 51760 }, { "epoch": 1.3139825614600653, "grad_norm": 0.3515625, "learning_rate": 9.526866796928809e-05, "loss": 0.4181, "step": 51765 }, { "epoch": 1.314109479509081, "grad_norm": 0.33984375, "learning_rate": 9.523773264913699e-05, "loss": 0.4284, "step": 51770 }, { "epoch": 1.3142363975580968, "grad_norm": 0.345703125, "learning_rate": 9.52068000161485e-05, "loss": 0.4016, "step": 51775 }, { "epoch": 1.3143633156071126, "grad_norm": 0.31640625, "learning_rate": 9.517587007184058e-05, "loss": 0.4291, "step": 51780 }, { "epoch": 1.3144902336561282, "grad_norm": 0.357421875, "learning_rate": 9.514494281773085e-05, "loss": 0.4206, "step": 51785 }, { "epoch": 1.314617151705144, "grad_norm": 0.353515625, "learning_rate": 9.511401825533692e-05, "loss": 0.423, "step": 51790 }, { "epoch": 1.3147440697541597, "grad_norm": 0.337890625, "learning_rate": 9.508309638617627e-05, "loss": 0.4193, "step": 51795 }, { "epoch": 1.3148709878031755, "grad_norm": 0.373046875, "learning_rate": 9.505217721176616e-05, "loss": 0.4273, "step": 51800 }, { "epoch": 1.3149979058521912, "grad_norm": 0.369140625, "learning_rate": 9.502126073362388e-05, "loss": 0.4303, "step": 51805 }, { "epoch": 1.315124823901207, "grad_norm": 0.34375, "learning_rate": 9.49903469532664e-05, "loss": 0.4114, "step": 51810 }, { "epoch": 1.3152517419502228, "grad_norm": 0.36328125, "learning_rate": 9.49594358722107e-05, "loss": 0.4233, "step": 51815 }, { "epoch": 1.3153786599992385, "grad_norm": 0.357421875, "learning_rate": 9.492852749197359e-05, "loss": 0.4488, "step": 51820 }, { "epoch": 1.3155055780482543, "grad_norm": 0.3515625, "learning_rate": 9.489762181407165e-05, "loss": 0.4162, "step": 51825 }, { "epoch": 1.31563249609727, "grad_norm": 0.365234375, "learning_rate": 9.486671884002148e-05, "loss": 0.4423, "step": 51830 }, { "epoch": 1.3157594141462856, "grad_norm": 0.36328125, "learning_rate": 9.483581857133938e-05, "loss": 0.4282, "step": 51835 }, { "epoch": 1.3158863321953014, "grad_norm": 0.357421875, "learning_rate": 9.480492100954177e-05, "loss": 0.411, "step": 51840 }, { "epoch": 1.3160132502443171, "grad_norm": 0.35546875, "learning_rate": 9.477402615614468e-05, "loss": 0.4593, "step": 51845 }, { "epoch": 1.316140168293333, "grad_norm": 0.341796875, "learning_rate": 9.47431340126641e-05, "loss": 0.4277, "step": 51850 }, { "epoch": 1.3162670863423487, "grad_norm": 0.3046875, "learning_rate": 9.471224458061596e-05, "loss": 0.3909, "step": 51855 }, { "epoch": 1.3163940043913644, "grad_norm": 0.3515625, "learning_rate": 9.468135786151597e-05, "loss": 0.4243, "step": 51860 }, { "epoch": 1.3165209224403802, "grad_norm": 0.333984375, "learning_rate": 9.465047385687969e-05, "loss": 0.4452, "step": 51865 }, { "epoch": 1.316647840489396, "grad_norm": 0.35546875, "learning_rate": 9.461959256822261e-05, "loss": 0.4214, "step": 51870 }, { "epoch": 1.3167747585384117, "grad_norm": 0.322265625, "learning_rate": 9.45887139970601e-05, "loss": 0.416, "step": 51875 }, { "epoch": 1.3169016765874275, "grad_norm": 0.3359375, "learning_rate": 9.455783814490725e-05, "loss": 0.3873, "step": 51880 }, { "epoch": 1.3170285946364433, "grad_norm": 0.37109375, "learning_rate": 9.452696501327923e-05, "loss": 0.4218, "step": 51885 }, { "epoch": 1.317155512685459, "grad_norm": 0.353515625, "learning_rate": 9.449609460369097e-05, "loss": 0.4116, "step": 51890 }, { "epoch": 1.3172824307344748, "grad_norm": 0.35546875, "learning_rate": 9.446522691765722e-05, "loss": 0.4207, "step": 51895 }, { "epoch": 1.3174093487834906, "grad_norm": 0.33203125, "learning_rate": 9.443436195669258e-05, "loss": 0.3836, "step": 51900 }, { "epoch": 1.3175362668325064, "grad_norm": 0.361328125, "learning_rate": 9.440349972231173e-05, "loss": 0.4295, "step": 51905 }, { "epoch": 1.3176631848815221, "grad_norm": 0.353515625, "learning_rate": 9.437264021602903e-05, "loss": 0.42, "step": 51910 }, { "epoch": 1.3177901029305379, "grad_norm": 0.337890625, "learning_rate": 9.434178343935873e-05, "loss": 0.4044, "step": 51915 }, { "epoch": 1.3179170209795534, "grad_norm": 0.34765625, "learning_rate": 9.43109293938149e-05, "loss": 0.4323, "step": 51920 }, { "epoch": 1.3180439390285692, "grad_norm": 0.333984375, "learning_rate": 9.428007808091157e-05, "loss": 0.408, "step": 51925 }, { "epoch": 1.318170857077585, "grad_norm": 0.8046875, "learning_rate": 9.424922950216263e-05, "loss": 0.3955, "step": 51930 }, { "epoch": 1.3182977751266007, "grad_norm": 0.357421875, "learning_rate": 9.421838365908182e-05, "loss": 0.4129, "step": 51935 }, { "epoch": 1.3184246931756165, "grad_norm": 0.337890625, "learning_rate": 9.418754055318268e-05, "loss": 0.4177, "step": 51940 }, { "epoch": 1.3185516112246323, "grad_norm": 0.345703125, "learning_rate": 9.415670018597872e-05, "loss": 0.4205, "step": 51945 }, { "epoch": 1.318678529273648, "grad_norm": 0.357421875, "learning_rate": 9.412586255898321e-05, "loss": 0.4318, "step": 51950 }, { "epoch": 1.3188054473226638, "grad_norm": 0.345703125, "learning_rate": 9.409502767370934e-05, "loss": 0.4336, "step": 51955 }, { "epoch": 1.3189323653716796, "grad_norm": 0.353515625, "learning_rate": 9.406419553167024e-05, "loss": 0.4113, "step": 51960 }, { "epoch": 1.3190592834206953, "grad_norm": 0.341796875, "learning_rate": 9.403336613437873e-05, "loss": 0.4214, "step": 51965 }, { "epoch": 1.3191862014697109, "grad_norm": 0.333984375, "learning_rate": 9.400253948334767e-05, "loss": 0.3983, "step": 51970 }, { "epoch": 1.3193131195187267, "grad_norm": 0.32421875, "learning_rate": 9.39717155800897e-05, "loss": 0.3917, "step": 51975 }, { "epoch": 1.3194400375677424, "grad_norm": 0.34765625, "learning_rate": 9.394089442611734e-05, "loss": 0.4193, "step": 51980 }, { "epoch": 1.3195669556167582, "grad_norm": 0.3515625, "learning_rate": 9.391007602294299e-05, "loss": 0.425, "step": 51985 }, { "epoch": 1.319693873665774, "grad_norm": 0.341796875, "learning_rate": 9.387926037207882e-05, "loss": 0.4382, "step": 51990 }, { "epoch": 1.3198207917147897, "grad_norm": 0.349609375, "learning_rate": 9.3848447475037e-05, "loss": 0.4224, "step": 51995 }, { "epoch": 1.3199477097638055, "grad_norm": 0.3359375, "learning_rate": 9.381763733332948e-05, "loss": 0.4013, "step": 52000 }, { "epoch": 1.3200746278128213, "grad_norm": 0.33984375, "learning_rate": 9.378682994846815e-05, "loss": 0.4184, "step": 52005 }, { "epoch": 1.320201545861837, "grad_norm": 0.33203125, "learning_rate": 9.375602532196466e-05, "loss": 0.4109, "step": 52010 }, { "epoch": 1.3203284639108528, "grad_norm": 0.357421875, "learning_rate": 9.37252234553306e-05, "loss": 0.4087, "step": 52015 }, { "epoch": 1.3204553819598686, "grad_norm": 0.361328125, "learning_rate": 9.369442435007743e-05, "loss": 0.4188, "step": 52020 }, { "epoch": 1.3205823000088843, "grad_norm": 0.373046875, "learning_rate": 9.366362800771635e-05, "loss": 0.4239, "step": 52025 }, { "epoch": 1.3207092180579, "grad_norm": 0.32421875, "learning_rate": 9.363283442975867e-05, "loss": 0.4018, "step": 52030 }, { "epoch": 1.3208361361069159, "grad_norm": 0.3515625, "learning_rate": 9.360204361771536e-05, "loss": 0.4063, "step": 52035 }, { "epoch": 1.3209630541559316, "grad_norm": 0.33984375, "learning_rate": 9.35712555730973e-05, "loss": 0.3981, "step": 52040 }, { "epoch": 1.3210899722049474, "grad_norm": 0.376953125, "learning_rate": 9.354047029741523e-05, "loss": 0.4345, "step": 52045 }, { "epoch": 1.321216890253963, "grad_norm": 0.36328125, "learning_rate": 9.35096877921798e-05, "loss": 0.4345, "step": 52050 }, { "epoch": 1.3213438083029787, "grad_norm": 0.365234375, "learning_rate": 9.347890805890151e-05, "loss": 0.4161, "step": 52055 }, { "epoch": 1.3214707263519945, "grad_norm": 0.31640625, "learning_rate": 9.344813109909067e-05, "loss": 0.4157, "step": 52060 }, { "epoch": 1.3215976444010102, "grad_norm": 0.361328125, "learning_rate": 9.341735691425752e-05, "loss": 0.4121, "step": 52065 }, { "epoch": 1.321724562450026, "grad_norm": 0.3203125, "learning_rate": 9.338658550591212e-05, "loss": 0.4, "step": 52070 }, { "epoch": 1.3218514804990418, "grad_norm": 0.345703125, "learning_rate": 9.335581687556443e-05, "loss": 0.435, "step": 52075 }, { "epoch": 1.3219783985480575, "grad_norm": 0.3515625, "learning_rate": 9.332505102472418e-05, "loss": 0.4497, "step": 52080 }, { "epoch": 1.3221053165970733, "grad_norm": 0.349609375, "learning_rate": 9.329428795490115e-05, "loss": 0.4331, "step": 52085 }, { "epoch": 1.322232234646089, "grad_norm": 0.337890625, "learning_rate": 9.32635276676048e-05, "loss": 0.4128, "step": 52090 }, { "epoch": 1.3223591526951048, "grad_norm": 0.36328125, "learning_rate": 9.323277016434456e-05, "loss": 0.3996, "step": 52095 }, { "epoch": 1.3224860707441204, "grad_norm": 0.36328125, "learning_rate": 9.320201544662967e-05, "loss": 0.4347, "step": 52100 }, { "epoch": 1.3226129887931362, "grad_norm": 0.337890625, "learning_rate": 9.317126351596929e-05, "loss": 0.4318, "step": 52105 }, { "epoch": 1.322739906842152, "grad_norm": 0.361328125, "learning_rate": 9.314051437387236e-05, "loss": 0.4409, "step": 52110 }, { "epoch": 1.3228668248911677, "grad_norm": 0.3515625, "learning_rate": 9.310976802184774e-05, "loss": 0.4104, "step": 52115 }, { "epoch": 1.3229937429401835, "grad_norm": 0.353515625, "learning_rate": 9.307902446140413e-05, "loss": 0.4081, "step": 52120 }, { "epoch": 1.3231206609891992, "grad_norm": 0.32421875, "learning_rate": 9.304828369405008e-05, "loss": 0.4365, "step": 52125 }, { "epoch": 1.323247579038215, "grad_norm": 0.36328125, "learning_rate": 9.301754572129413e-05, "loss": 0.4401, "step": 52130 }, { "epoch": 1.3233744970872308, "grad_norm": 0.34765625, "learning_rate": 9.298681054464449e-05, "loss": 0.4215, "step": 52135 }, { "epoch": 1.3235014151362465, "grad_norm": 0.349609375, "learning_rate": 9.295607816560934e-05, "loss": 0.3905, "step": 52140 }, { "epoch": 1.3236283331852623, "grad_norm": 0.35546875, "learning_rate": 9.292534858569672e-05, "loss": 0.4189, "step": 52145 }, { "epoch": 1.323755251234278, "grad_norm": 0.359375, "learning_rate": 9.289462180641443e-05, "loss": 0.3949, "step": 52150 }, { "epoch": 1.3238821692832938, "grad_norm": 0.345703125, "learning_rate": 9.286389782927038e-05, "loss": 0.4041, "step": 52155 }, { "epoch": 1.3240090873323096, "grad_norm": 0.349609375, "learning_rate": 9.283317665577212e-05, "loss": 0.4525, "step": 52160 }, { "epoch": 1.3241360053813254, "grad_norm": 0.3515625, "learning_rate": 9.280245828742709e-05, "loss": 0.409, "step": 52165 }, { "epoch": 1.3242629234303411, "grad_norm": 0.37890625, "learning_rate": 9.277174272574261e-05, "loss": 0.4269, "step": 52170 }, { "epoch": 1.324389841479357, "grad_norm": 0.34375, "learning_rate": 9.274102997222597e-05, "loss": 0.4104, "step": 52175 }, { "epoch": 1.3245167595283727, "grad_norm": 0.37890625, "learning_rate": 9.271032002838416e-05, "loss": 0.3956, "step": 52180 }, { "epoch": 1.3246436775773882, "grad_norm": 0.353515625, "learning_rate": 9.267961289572415e-05, "loss": 0.4435, "step": 52185 }, { "epoch": 1.324770595626404, "grad_norm": 0.35546875, "learning_rate": 9.264890857575266e-05, "loss": 0.4449, "step": 52190 }, { "epoch": 1.3248975136754197, "grad_norm": 0.328125, "learning_rate": 9.26182070699764e-05, "loss": 0.4199, "step": 52195 }, { "epoch": 1.3250244317244355, "grad_norm": 0.345703125, "learning_rate": 9.258750837990184e-05, "loss": 0.3955, "step": 52200 }, { "epoch": 1.3251513497734513, "grad_norm": 0.359375, "learning_rate": 9.255681250703542e-05, "loss": 0.3998, "step": 52205 }, { "epoch": 1.325278267822467, "grad_norm": 0.341796875, "learning_rate": 9.25261194528833e-05, "loss": 0.3901, "step": 52210 }, { "epoch": 1.3254051858714828, "grad_norm": 0.341796875, "learning_rate": 9.249542921895155e-05, "loss": 0.3884, "step": 52215 }, { "epoch": 1.3255321039204986, "grad_norm": 0.34765625, "learning_rate": 9.246474180674625e-05, "loss": 0.4315, "step": 52220 }, { "epoch": 1.3256590219695144, "grad_norm": 0.349609375, "learning_rate": 9.243405721777315e-05, "loss": 0.4184, "step": 52225 }, { "epoch": 1.3257859400185301, "grad_norm": 0.34765625, "learning_rate": 9.240337545353792e-05, "loss": 0.4147, "step": 52230 }, { "epoch": 1.3259128580675457, "grad_norm": 0.333984375, "learning_rate": 9.237269651554613e-05, "loss": 0.4176, "step": 52235 }, { "epoch": 1.3260397761165614, "grad_norm": 0.376953125, "learning_rate": 9.234202040530316e-05, "loss": 0.4457, "step": 52240 }, { "epoch": 1.3261666941655772, "grad_norm": 0.359375, "learning_rate": 9.231134712431426e-05, "loss": 0.4297, "step": 52245 }, { "epoch": 1.326293612214593, "grad_norm": 0.35546875, "learning_rate": 9.228067667408458e-05, "loss": 0.4182, "step": 52250 }, { "epoch": 1.3264205302636087, "grad_norm": 0.337890625, "learning_rate": 9.225000905611913e-05, "loss": 0.3968, "step": 52255 }, { "epoch": 1.3265474483126245, "grad_norm": 0.37109375, "learning_rate": 9.221934427192273e-05, "loss": 0.4332, "step": 52260 }, { "epoch": 1.3266743663616403, "grad_norm": 0.302734375, "learning_rate": 9.218868232300007e-05, "loss": 0.4249, "step": 52265 }, { "epoch": 1.326801284410656, "grad_norm": 0.373046875, "learning_rate": 9.215802321085576e-05, "loss": 0.4334, "step": 52270 }, { "epoch": 1.3269282024596718, "grad_norm": 0.357421875, "learning_rate": 9.212736693699418e-05, "loss": 0.4269, "step": 52275 }, { "epoch": 1.3270551205086876, "grad_norm": 0.34765625, "learning_rate": 9.209671350291962e-05, "loss": 0.4097, "step": 52280 }, { "epoch": 1.3271820385577033, "grad_norm": 0.33984375, "learning_rate": 9.206606291013632e-05, "loss": 0.3969, "step": 52285 }, { "epoch": 1.327308956606719, "grad_norm": 0.388671875, "learning_rate": 9.203541516014824e-05, "loss": 0.4035, "step": 52290 }, { "epoch": 1.3274358746557349, "grad_norm": 0.333984375, "learning_rate": 9.20047702544592e-05, "loss": 0.421, "step": 52295 }, { "epoch": 1.3275627927047506, "grad_norm": 0.35546875, "learning_rate": 9.197412819457304e-05, "loss": 0.4026, "step": 52300 }, { "epoch": 1.3276897107537664, "grad_norm": 0.357421875, "learning_rate": 9.194348898199328e-05, "loss": 0.41, "step": 52305 }, { "epoch": 1.3278166288027822, "grad_norm": 0.34765625, "learning_rate": 9.191285261822336e-05, "loss": 0.4277, "step": 52310 }, { "epoch": 1.3279435468517977, "grad_norm": 0.365234375, "learning_rate": 9.188221910476666e-05, "loss": 0.4126, "step": 52315 }, { "epoch": 1.3280704649008135, "grad_norm": 0.345703125, "learning_rate": 9.18515884431263e-05, "loss": 0.4078, "step": 52320 }, { "epoch": 1.3281973829498293, "grad_norm": 0.357421875, "learning_rate": 9.182096063480533e-05, "loss": 0.4378, "step": 52325 }, { "epoch": 1.328324300998845, "grad_norm": 0.330078125, "learning_rate": 9.179033568130666e-05, "loss": 0.387, "step": 52330 }, { "epoch": 1.3284512190478608, "grad_norm": 0.365234375, "learning_rate": 9.175971358413303e-05, "loss": 0.476, "step": 52335 }, { "epoch": 1.3285781370968766, "grad_norm": 0.341796875, "learning_rate": 9.172909434478698e-05, "loss": 0.4311, "step": 52340 }, { "epoch": 1.3287050551458923, "grad_norm": 0.345703125, "learning_rate": 9.169847796477113e-05, "loss": 0.3967, "step": 52345 }, { "epoch": 1.328831973194908, "grad_norm": 0.380859375, "learning_rate": 9.166786444558775e-05, "loss": 0.451, "step": 52350 }, { "epoch": 1.3289588912439239, "grad_norm": 0.35546875, "learning_rate": 9.163725378873901e-05, "loss": 0.4293, "step": 52355 }, { "epoch": 1.3290858092929396, "grad_norm": 0.3125, "learning_rate": 9.1606645995727e-05, "loss": 0.4097, "step": 52360 }, { "epoch": 1.3292127273419552, "grad_norm": 0.341796875, "learning_rate": 9.15760410680536e-05, "loss": 0.4157, "step": 52365 }, { "epoch": 1.329339645390971, "grad_norm": 0.33984375, "learning_rate": 9.154543900722054e-05, "loss": 0.4273, "step": 52370 }, { "epoch": 1.3294665634399867, "grad_norm": 0.326171875, "learning_rate": 9.151483981472956e-05, "loss": 0.3747, "step": 52375 }, { "epoch": 1.3295934814890025, "grad_norm": 0.369140625, "learning_rate": 9.14842434920821e-05, "loss": 0.4359, "step": 52380 }, { "epoch": 1.3297203995380182, "grad_norm": 0.365234375, "learning_rate": 9.145365004077948e-05, "loss": 0.4118, "step": 52385 }, { "epoch": 1.329847317587034, "grad_norm": 0.3515625, "learning_rate": 9.142305946232293e-05, "loss": 0.4318, "step": 52390 }, { "epoch": 1.3299742356360498, "grad_norm": 0.333984375, "learning_rate": 9.139247175821354e-05, "loss": 0.3969, "step": 52395 }, { "epoch": 1.3301011536850655, "grad_norm": 0.365234375, "learning_rate": 9.136188692995218e-05, "loss": 0.4406, "step": 52400 }, { "epoch": 1.3302280717340813, "grad_norm": 0.3515625, "learning_rate": 9.133130497903966e-05, "loss": 0.4002, "step": 52405 }, { "epoch": 1.330354989783097, "grad_norm": 0.49609375, "learning_rate": 9.130072590697667e-05, "loss": 0.4465, "step": 52410 }, { "epoch": 1.3304819078321128, "grad_norm": 0.341796875, "learning_rate": 9.127014971526365e-05, "loss": 0.4447, "step": 52415 }, { "epoch": 1.3306088258811286, "grad_norm": 0.37890625, "learning_rate": 9.1239576405401e-05, "loss": 0.4283, "step": 52420 }, { "epoch": 1.3307357439301444, "grad_norm": 0.345703125, "learning_rate": 9.120900597888895e-05, "loss": 0.4268, "step": 52425 }, { "epoch": 1.3308626619791601, "grad_norm": 0.34375, "learning_rate": 9.117843843722755e-05, "loss": 0.4164, "step": 52430 }, { "epoch": 1.330989580028176, "grad_norm": 0.34375, "learning_rate": 9.114787378191675e-05, "loss": 0.4117, "step": 52435 }, { "epoch": 1.3311164980771917, "grad_norm": 0.353515625, "learning_rate": 9.111731201445633e-05, "loss": 0.4163, "step": 52440 }, { "epoch": 1.3312434161262074, "grad_norm": 0.314453125, "learning_rate": 9.108675313634595e-05, "loss": 0.3897, "step": 52445 }, { "epoch": 1.331370334175223, "grad_norm": 0.333984375, "learning_rate": 9.105619714908513e-05, "loss": 0.405, "step": 52450 }, { "epoch": 1.3314972522242388, "grad_norm": 0.345703125, "learning_rate": 9.102564405417325e-05, "loss": 0.4023, "step": 52455 }, { "epoch": 1.3316241702732545, "grad_norm": 0.359375, "learning_rate": 9.099509385310951e-05, "loss": 0.4004, "step": 52460 }, { "epoch": 1.3317510883222703, "grad_norm": 0.345703125, "learning_rate": 9.096454654739297e-05, "loss": 0.4056, "step": 52465 }, { "epoch": 1.331878006371286, "grad_norm": 0.357421875, "learning_rate": 9.093400213852266e-05, "loss": 0.4091, "step": 52470 }, { "epoch": 1.3320049244203018, "grad_norm": 0.3515625, "learning_rate": 9.090346062799734e-05, "loss": 0.4217, "step": 52475 }, { "epoch": 1.3321318424693176, "grad_norm": 0.361328125, "learning_rate": 9.087292201731567e-05, "loss": 0.4163, "step": 52480 }, { "epoch": 1.3322587605183334, "grad_norm": 0.326171875, "learning_rate": 9.084238630797616e-05, "loss": 0.4337, "step": 52485 }, { "epoch": 1.3323856785673491, "grad_norm": 0.353515625, "learning_rate": 9.081185350147718e-05, "loss": 0.4135, "step": 52490 }, { "epoch": 1.3325125966163647, "grad_norm": 0.365234375, "learning_rate": 9.078132359931696e-05, "loss": 0.422, "step": 52495 }, { "epoch": 1.3326395146653804, "grad_norm": 0.38671875, "learning_rate": 9.075079660299362e-05, "loss": 0.4222, "step": 52500 }, { "epoch": 1.3327664327143962, "grad_norm": 0.359375, "learning_rate": 9.07202725140051e-05, "loss": 0.3999, "step": 52505 }, { "epoch": 1.332893350763412, "grad_norm": 0.34765625, "learning_rate": 9.068975133384919e-05, "loss": 0.4051, "step": 52510 }, { "epoch": 1.3330202688124277, "grad_norm": 0.357421875, "learning_rate": 9.065923306402354e-05, "loss": 0.4107, "step": 52515 }, { "epoch": 1.3331471868614435, "grad_norm": 0.3359375, "learning_rate": 9.06287177060257e-05, "loss": 0.3974, "step": 52520 }, { "epoch": 1.3332741049104593, "grad_norm": 0.35546875, "learning_rate": 9.0598205261353e-05, "loss": 0.3972, "step": 52525 }, { "epoch": 1.333401022959475, "grad_norm": 0.322265625, "learning_rate": 9.05676957315027e-05, "loss": 0.3984, "step": 52530 }, { "epoch": 1.3335279410084908, "grad_norm": 0.35546875, "learning_rate": 9.053718911797194e-05, "loss": 0.4175, "step": 52535 }, { "epoch": 1.3336548590575066, "grad_norm": 0.34765625, "learning_rate": 9.050668542225757e-05, "loss": 0.42, "step": 52540 }, { "epoch": 1.3337817771065223, "grad_norm": 0.34375, "learning_rate": 9.047618464585651e-05, "loss": 0.4422, "step": 52545 }, { "epoch": 1.3339086951555381, "grad_norm": 0.3046875, "learning_rate": 9.044568679026534e-05, "loss": 0.3782, "step": 52550 }, { "epoch": 1.3340356132045539, "grad_norm": 0.34765625, "learning_rate": 9.04151918569806e-05, "loss": 0.3973, "step": 52555 }, { "epoch": 1.3341625312535697, "grad_norm": 0.35546875, "learning_rate": 9.038469984749866e-05, "loss": 0.4071, "step": 52560 }, { "epoch": 1.3342894493025854, "grad_norm": 0.3671875, "learning_rate": 9.035421076331576e-05, "loss": 0.4468, "step": 52565 }, { "epoch": 1.3344163673516012, "grad_norm": 0.322265625, "learning_rate": 9.032372460592794e-05, "loss": 0.4056, "step": 52570 }, { "epoch": 1.334543285400617, "grad_norm": 0.333984375, "learning_rate": 9.02932413768312e-05, "loss": 0.4136, "step": 52575 }, { "epoch": 1.3346702034496325, "grad_norm": 0.3515625, "learning_rate": 9.026276107752133e-05, "loss": 0.4081, "step": 52580 }, { "epoch": 1.3347971214986483, "grad_norm": 0.34375, "learning_rate": 9.0232283709494e-05, "loss": 0.4437, "step": 52585 }, { "epoch": 1.334924039547664, "grad_norm": 0.3359375, "learning_rate": 9.020180927424468e-05, "loss": 0.408, "step": 52590 }, { "epoch": 1.3350509575966798, "grad_norm": 0.345703125, "learning_rate": 9.017133777326868e-05, "loss": 0.461, "step": 52595 }, { "epoch": 1.3351778756456956, "grad_norm": 0.337890625, "learning_rate": 9.014086920806139e-05, "loss": 0.4221, "step": 52600 }, { "epoch": 1.3353047936947113, "grad_norm": 0.35546875, "learning_rate": 9.01104035801178e-05, "loss": 0.4094, "step": 52605 }, { "epoch": 1.335431711743727, "grad_norm": 0.361328125, "learning_rate": 9.007994089093285e-05, "loss": 0.4167, "step": 52610 }, { "epoch": 1.3355586297927429, "grad_norm": 0.359375, "learning_rate": 9.00494811420013e-05, "loss": 0.4109, "step": 52615 }, { "epoch": 1.3356855478417586, "grad_norm": 0.33984375, "learning_rate": 9.001902433481786e-05, "loss": 0.4391, "step": 52620 }, { "epoch": 1.3358124658907744, "grad_norm": 0.337890625, "learning_rate": 8.998857047087699e-05, "loss": 0.4279, "step": 52625 }, { "epoch": 1.33593938393979, "grad_norm": 0.330078125, "learning_rate": 8.99581195516731e-05, "loss": 0.3872, "step": 52630 }, { "epoch": 1.3360663019888057, "grad_norm": 0.365234375, "learning_rate": 8.99276715787003e-05, "loss": 0.4128, "step": 52635 }, { "epoch": 1.3361932200378215, "grad_norm": 0.3671875, "learning_rate": 8.989722655345279e-05, "loss": 0.4155, "step": 52640 }, { "epoch": 1.3363201380868373, "grad_norm": 0.3203125, "learning_rate": 8.986678447742438e-05, "loss": 0.4368, "step": 52645 }, { "epoch": 1.336447056135853, "grad_norm": 0.341796875, "learning_rate": 8.983634535210893e-05, "loss": 0.411, "step": 52650 }, { "epoch": 1.3365739741848688, "grad_norm": 0.365234375, "learning_rate": 8.980590917900001e-05, "loss": 0.4299, "step": 52655 }, { "epoch": 1.3367008922338846, "grad_norm": 0.353515625, "learning_rate": 8.977547595959117e-05, "loss": 0.4098, "step": 52660 }, { "epoch": 1.3368278102829003, "grad_norm": 0.349609375, "learning_rate": 8.974504569537575e-05, "loss": 0.404, "step": 52665 }, { "epoch": 1.336954728331916, "grad_norm": 0.33984375, "learning_rate": 8.971461838784693e-05, "loss": 0.4143, "step": 52670 }, { "epoch": 1.3370816463809319, "grad_norm": 0.306640625, "learning_rate": 8.968419403849777e-05, "loss": 0.3759, "step": 52675 }, { "epoch": 1.3372085644299476, "grad_norm": 0.322265625, "learning_rate": 8.96537726488212e-05, "loss": 0.4021, "step": 52680 }, { "epoch": 1.3373354824789634, "grad_norm": 0.353515625, "learning_rate": 8.962335422030995e-05, "loss": 0.4136, "step": 52685 }, { "epoch": 1.3374624005279792, "grad_norm": 0.34375, "learning_rate": 8.959293875445665e-05, "loss": 0.4368, "step": 52690 }, { "epoch": 1.337589318576995, "grad_norm": 0.376953125, "learning_rate": 8.956252625275381e-05, "loss": 0.4259, "step": 52695 }, { "epoch": 1.3377162366260107, "grad_norm": 0.328125, "learning_rate": 8.953211671669372e-05, "loss": 0.4196, "step": 52700 }, { "epoch": 1.3378431546750265, "grad_norm": 0.353515625, "learning_rate": 8.950171014776856e-05, "loss": 0.4372, "step": 52705 }, { "epoch": 1.337970072724042, "grad_norm": 0.32421875, "learning_rate": 8.947130654747042e-05, "loss": 0.4073, "step": 52710 }, { "epoch": 1.3380969907730578, "grad_norm": 0.3515625, "learning_rate": 8.944090591729112e-05, "loss": 0.4203, "step": 52715 }, { "epoch": 1.3382239088220735, "grad_norm": 0.376953125, "learning_rate": 8.941050825872241e-05, "loss": 0.3969, "step": 52720 }, { "epoch": 1.3383508268710893, "grad_norm": 0.384765625, "learning_rate": 8.938011357325597e-05, "loss": 0.4233, "step": 52725 }, { "epoch": 1.338477744920105, "grad_norm": 0.3515625, "learning_rate": 8.934972186238322e-05, "loss": 0.4144, "step": 52730 }, { "epoch": 1.3386046629691208, "grad_norm": 0.365234375, "learning_rate": 8.931933312759544e-05, "loss": 0.4109, "step": 52735 }, { "epoch": 1.3387315810181366, "grad_norm": 0.357421875, "learning_rate": 8.928894737038382e-05, "loss": 0.4158, "step": 52740 }, { "epoch": 1.3388584990671524, "grad_norm": 0.357421875, "learning_rate": 8.925856459223937e-05, "loss": 0.4107, "step": 52745 }, { "epoch": 1.3389854171161681, "grad_norm": 0.39453125, "learning_rate": 8.922818479465296e-05, "loss": 0.4344, "step": 52750 }, { "epoch": 1.339112335165184, "grad_norm": 0.35546875, "learning_rate": 8.919780797911532e-05, "loss": 0.4156, "step": 52755 }, { "epoch": 1.3392392532141995, "grad_norm": 0.3359375, "learning_rate": 8.916743414711703e-05, "loss": 0.4261, "step": 52760 }, { "epoch": 1.3393661712632152, "grad_norm": 0.345703125, "learning_rate": 8.91370633001485e-05, "loss": 0.4322, "step": 52765 }, { "epoch": 1.339493089312231, "grad_norm": 0.35546875, "learning_rate": 8.91066954397e-05, "loss": 0.4111, "step": 52770 }, { "epoch": 1.3396200073612468, "grad_norm": 0.35546875, "learning_rate": 8.907633056726174e-05, "loss": 0.4089, "step": 52775 }, { "epoch": 1.3397469254102625, "grad_norm": 0.333984375, "learning_rate": 8.904596868432367e-05, "loss": 0.4071, "step": 52780 }, { "epoch": 1.3398738434592783, "grad_norm": 0.3125, "learning_rate": 8.901560979237557e-05, "loss": 0.429, "step": 52785 }, { "epoch": 1.340000761508294, "grad_norm": 0.349609375, "learning_rate": 8.898525389290727e-05, "loss": 0.395, "step": 52790 }, { "epoch": 1.3401276795573098, "grad_norm": 0.33984375, "learning_rate": 8.895490098740825e-05, "loss": 0.4175, "step": 52795 }, { "epoch": 1.3402545976063256, "grad_norm": 0.337890625, "learning_rate": 8.892455107736792e-05, "loss": 0.4135, "step": 52800 }, { "epoch": 1.3403815156553414, "grad_norm": 0.333984375, "learning_rate": 8.889420416427554e-05, "loss": 0.3994, "step": 52805 }, { "epoch": 1.3405084337043571, "grad_norm": 0.32421875, "learning_rate": 8.886386024962023e-05, "loss": 0.3889, "step": 52810 }, { "epoch": 1.340635351753373, "grad_norm": 0.3515625, "learning_rate": 8.883351933489091e-05, "loss": 0.4164, "step": 52815 }, { "epoch": 1.3407622698023887, "grad_norm": 0.330078125, "learning_rate": 8.880318142157645e-05, "loss": 0.3868, "step": 52820 }, { "epoch": 1.3408891878514044, "grad_norm": 0.330078125, "learning_rate": 8.877284651116552e-05, "loss": 0.4079, "step": 52825 }, { "epoch": 1.3410161059004202, "grad_norm": 0.349609375, "learning_rate": 8.874251460514662e-05, "loss": 0.4505, "step": 52830 }, { "epoch": 1.341143023949436, "grad_norm": 0.357421875, "learning_rate": 8.871218570500812e-05, "loss": 0.4531, "step": 52835 }, { "epoch": 1.3412699419984517, "grad_norm": 0.404296875, "learning_rate": 8.868185981223825e-05, "loss": 0.4328, "step": 52840 }, { "epoch": 1.3413968600474673, "grad_norm": 0.359375, "learning_rate": 8.865153692832503e-05, "loss": 0.4029, "step": 52845 }, { "epoch": 1.341523778096483, "grad_norm": 0.298828125, "learning_rate": 8.862121705475654e-05, "loss": 0.4087, "step": 52850 }, { "epoch": 1.3416506961454988, "grad_norm": 0.34765625, "learning_rate": 8.859090019302046e-05, "loss": 0.4242, "step": 52855 }, { "epoch": 1.3417776141945146, "grad_norm": 0.345703125, "learning_rate": 8.856058634460441e-05, "loss": 0.4301, "step": 52860 }, { "epoch": 1.3419045322435303, "grad_norm": 0.322265625, "learning_rate": 8.853027551099596e-05, "loss": 0.4107, "step": 52865 }, { "epoch": 1.3420314502925461, "grad_norm": 0.365234375, "learning_rate": 8.84999676936824e-05, "loss": 0.4066, "step": 52870 }, { "epoch": 1.3421583683415619, "grad_norm": 0.3359375, "learning_rate": 8.846966289415092e-05, "loss": 0.3982, "step": 52875 }, { "epoch": 1.3422852863905776, "grad_norm": 0.337890625, "learning_rate": 8.843936111388857e-05, "loss": 0.3952, "step": 52880 }, { "epoch": 1.3424122044395934, "grad_norm": 0.337890625, "learning_rate": 8.840906235438226e-05, "loss": 0.4321, "step": 52885 }, { "epoch": 1.3425391224886092, "grad_norm": 0.322265625, "learning_rate": 8.837876661711869e-05, "loss": 0.3955, "step": 52890 }, { "epoch": 1.3426660405376247, "grad_norm": 0.373046875, "learning_rate": 8.834847390358454e-05, "loss": 0.418, "step": 52895 }, { "epoch": 1.3427929585866405, "grad_norm": 0.357421875, "learning_rate": 8.831818421526619e-05, "loss": 0.4131, "step": 52900 }, { "epoch": 1.3429198766356563, "grad_norm": 0.361328125, "learning_rate": 8.828789755364998e-05, "loss": 0.4259, "step": 52905 }, { "epoch": 1.343046794684672, "grad_norm": 0.3515625, "learning_rate": 8.8257613920222e-05, "loss": 0.4172, "step": 52910 }, { "epoch": 1.3431737127336878, "grad_norm": 0.361328125, "learning_rate": 8.822733331646837e-05, "loss": 0.4162, "step": 52915 }, { "epoch": 1.3433006307827036, "grad_norm": 0.33984375, "learning_rate": 8.81970557438749e-05, "loss": 0.4131, "step": 52920 }, { "epoch": 1.3434275488317193, "grad_norm": 0.361328125, "learning_rate": 8.816678120392727e-05, "loss": 0.4413, "step": 52925 }, { "epoch": 1.343554466880735, "grad_norm": 0.36328125, "learning_rate": 8.813650969811105e-05, "loss": 0.4493, "step": 52930 }, { "epoch": 1.3436813849297509, "grad_norm": 0.35546875, "learning_rate": 8.810624122791167e-05, "loss": 0.4142, "step": 52935 }, { "epoch": 1.3438083029787666, "grad_norm": 0.3359375, "learning_rate": 8.807597579481434e-05, "loss": 0.4043, "step": 52940 }, { "epoch": 1.3439352210277824, "grad_norm": 0.353515625, "learning_rate": 8.804571340030425e-05, "loss": 0.4284, "step": 52945 }, { "epoch": 1.3440621390767982, "grad_norm": 0.34765625, "learning_rate": 8.801545404586631e-05, "loss": 0.4169, "step": 52950 }, { "epoch": 1.344189057125814, "grad_norm": 0.353515625, "learning_rate": 8.798519773298536e-05, "loss": 0.4304, "step": 52955 }, { "epoch": 1.3443159751748297, "grad_norm": 0.369140625, "learning_rate": 8.795494446314604e-05, "loss": 0.4398, "step": 52960 }, { "epoch": 1.3444428932238455, "grad_norm": 0.353515625, "learning_rate": 8.792469423783292e-05, "loss": 0.4515, "step": 52965 }, { "epoch": 1.3445698112728612, "grad_norm": 0.34765625, "learning_rate": 8.789444705853023e-05, "loss": 0.4224, "step": 52970 }, { "epoch": 1.3446967293218768, "grad_norm": 0.328125, "learning_rate": 8.786420292672234e-05, "loss": 0.3914, "step": 52975 }, { "epoch": 1.3448236473708925, "grad_norm": 0.34375, "learning_rate": 8.783396184389328e-05, "loss": 0.4311, "step": 52980 }, { "epoch": 1.3449505654199083, "grad_norm": 0.353515625, "learning_rate": 8.780372381152692e-05, "loss": 0.4316, "step": 52985 }, { "epoch": 1.345077483468924, "grad_norm": 0.36328125, "learning_rate": 8.777348883110706e-05, "loss": 0.411, "step": 52990 }, { "epoch": 1.3452044015179399, "grad_norm": 0.353515625, "learning_rate": 8.774325690411735e-05, "loss": 0.441, "step": 52995 }, { "epoch": 1.3453313195669556, "grad_norm": 0.33984375, "learning_rate": 8.77130280320412e-05, "loss": 0.398, "step": 53000 }, { "epoch": 1.3454582376159714, "grad_norm": 0.36328125, "learning_rate": 8.768280221636196e-05, "loss": 0.398, "step": 53005 }, { "epoch": 1.3455851556649872, "grad_norm": 0.330078125, "learning_rate": 8.76525794585628e-05, "loss": 0.3793, "step": 53010 }, { "epoch": 1.345712073714003, "grad_norm": 0.3203125, "learning_rate": 8.76223597601267e-05, "loss": 0.4109, "step": 53015 }, { "epoch": 1.3458389917630187, "grad_norm": 0.341796875, "learning_rate": 8.759214312253658e-05, "loss": 0.3955, "step": 53020 }, { "epoch": 1.3459659098120342, "grad_norm": 0.328125, "learning_rate": 8.756192954727517e-05, "loss": 0.4296, "step": 53025 }, { "epoch": 1.34609282786105, "grad_norm": 0.34765625, "learning_rate": 8.753171903582491e-05, "loss": 0.4222, "step": 53030 }, { "epoch": 1.3462197459100658, "grad_norm": 0.369140625, "learning_rate": 8.750151158966839e-05, "loss": 0.4296, "step": 53035 }, { "epoch": 1.3463466639590815, "grad_norm": 0.376953125, "learning_rate": 8.747130721028778e-05, "loss": 0.4404, "step": 53040 }, { "epoch": 1.3464735820080973, "grad_norm": 0.37890625, "learning_rate": 8.744110589916516e-05, "loss": 0.4525, "step": 53045 }, { "epoch": 1.346600500057113, "grad_norm": 0.369140625, "learning_rate": 8.741090765778262e-05, "loss": 0.4193, "step": 53050 }, { "epoch": 1.3467274181061288, "grad_norm": 0.337890625, "learning_rate": 8.738071248762193e-05, "loss": 0.4382, "step": 53055 }, { "epoch": 1.3468543361551446, "grad_norm": 0.333984375, "learning_rate": 8.735052039016471e-05, "loss": 0.4278, "step": 53060 }, { "epoch": 1.3469812542041604, "grad_norm": 0.359375, "learning_rate": 8.732033136689249e-05, "loss": 0.4153, "step": 53065 }, { "epoch": 1.3471081722531761, "grad_norm": 0.33984375, "learning_rate": 8.729014541928666e-05, "loss": 0.4112, "step": 53070 }, { "epoch": 1.347235090302192, "grad_norm": 0.341796875, "learning_rate": 8.725996254882842e-05, "loss": 0.4429, "step": 53075 }, { "epoch": 1.3473620083512077, "grad_norm": 0.361328125, "learning_rate": 8.722978275699883e-05, "loss": 0.4334, "step": 53080 }, { "epoch": 1.3474889264002234, "grad_norm": 0.34375, "learning_rate": 8.719960604527877e-05, "loss": 0.4409, "step": 53085 }, { "epoch": 1.3476158444492392, "grad_norm": 0.369140625, "learning_rate": 8.716943241514904e-05, "loss": 0.4542, "step": 53090 }, { "epoch": 1.347742762498255, "grad_norm": 0.349609375, "learning_rate": 8.713926186809022e-05, "loss": 0.4269, "step": 53095 }, { "epoch": 1.3478696805472707, "grad_norm": 0.30859375, "learning_rate": 8.710909440558274e-05, "loss": 0.4296, "step": 53100 }, { "epoch": 1.3479965985962865, "grad_norm": 0.361328125, "learning_rate": 8.707893002910701e-05, "loss": 0.4273, "step": 53105 }, { "epoch": 1.348123516645302, "grad_norm": 0.32421875, "learning_rate": 8.70487687401431e-05, "loss": 0.3961, "step": 53110 }, { "epoch": 1.3482504346943178, "grad_norm": 0.359375, "learning_rate": 8.701861054017105e-05, "loss": 0.4093, "step": 53115 }, { "epoch": 1.3483773527433336, "grad_norm": 0.365234375, "learning_rate": 8.69884554306706e-05, "loss": 0.4351, "step": 53120 }, { "epoch": 1.3485042707923494, "grad_norm": 0.322265625, "learning_rate": 8.695830341312162e-05, "loss": 0.3878, "step": 53125 }, { "epoch": 1.3486311888413651, "grad_norm": 0.361328125, "learning_rate": 8.692815448900359e-05, "loss": 0.4243, "step": 53130 }, { "epoch": 1.348758106890381, "grad_norm": 0.345703125, "learning_rate": 8.689800865979588e-05, "loss": 0.4226, "step": 53135 }, { "epoch": 1.3488850249393967, "grad_norm": 0.38671875, "learning_rate": 8.686786592697775e-05, "loss": 0.4307, "step": 53140 }, { "epoch": 1.3490119429884124, "grad_norm": 0.396484375, "learning_rate": 8.68377262920283e-05, "loss": 0.4107, "step": 53145 }, { "epoch": 1.3491388610374282, "grad_norm": 0.337890625, "learning_rate": 8.680758975642647e-05, "loss": 0.4125, "step": 53150 }, { "epoch": 1.349265779086444, "grad_norm": 0.322265625, "learning_rate": 8.677745632165102e-05, "loss": 0.4209, "step": 53155 }, { "epoch": 1.3493926971354595, "grad_norm": 0.345703125, "learning_rate": 8.674732598918056e-05, "loss": 0.4184, "step": 53160 }, { "epoch": 1.3495196151844753, "grad_norm": 0.353515625, "learning_rate": 8.671719876049366e-05, "loss": 0.4282, "step": 53165 }, { "epoch": 1.349646533233491, "grad_norm": 0.34765625, "learning_rate": 8.668707463706861e-05, "loss": 0.4158, "step": 53170 }, { "epoch": 1.3497734512825068, "grad_norm": 0.3359375, "learning_rate": 8.66569536203836e-05, "loss": 0.4184, "step": 53175 }, { "epoch": 1.3499003693315226, "grad_norm": 0.337890625, "learning_rate": 8.662683571191664e-05, "loss": 0.4134, "step": 53180 }, { "epoch": 1.3500272873805383, "grad_norm": 0.361328125, "learning_rate": 8.65967209131456e-05, "loss": 0.4001, "step": 53185 }, { "epoch": 1.350154205429554, "grad_norm": 0.365234375, "learning_rate": 8.656660922554822e-05, "loss": 0.418, "step": 53190 }, { "epoch": 1.3502811234785699, "grad_norm": 0.37109375, "learning_rate": 8.653650065060203e-05, "loss": 0.4708, "step": 53195 }, { "epoch": 1.3504080415275856, "grad_norm": 0.357421875, "learning_rate": 8.650639518978446e-05, "loss": 0.4438, "step": 53200 }, { "epoch": 1.3505349595766014, "grad_norm": 0.34765625, "learning_rate": 8.647629284457283e-05, "loss": 0.436, "step": 53205 }, { "epoch": 1.3506618776256172, "grad_norm": 0.353515625, "learning_rate": 8.644619361644419e-05, "loss": 0.4497, "step": 53210 }, { "epoch": 1.350788795674633, "grad_norm": 0.38671875, "learning_rate": 8.641609750687552e-05, "loss": 0.4315, "step": 53215 }, { "epoch": 1.3509157137236487, "grad_norm": 0.36328125, "learning_rate": 8.638600451734361e-05, "loss": 0.3909, "step": 53220 }, { "epoch": 1.3510426317726645, "grad_norm": 0.3515625, "learning_rate": 8.635591464932505e-05, "loss": 0.4141, "step": 53225 }, { "epoch": 1.3511695498216802, "grad_norm": 0.3515625, "learning_rate": 8.63258279042965e-05, "loss": 0.4301, "step": 53230 }, { "epoch": 1.351296467870696, "grad_norm": 0.375, "learning_rate": 8.629574428373419e-05, "loss": 0.444, "step": 53235 }, { "epoch": 1.3514233859197116, "grad_norm": 0.361328125, "learning_rate": 8.626566378911432e-05, "loss": 0.4407, "step": 53240 }, { "epoch": 1.3515503039687273, "grad_norm": 0.34765625, "learning_rate": 8.623558642191295e-05, "loss": 0.4393, "step": 53245 }, { "epoch": 1.351677222017743, "grad_norm": 0.341796875, "learning_rate": 8.620551218360597e-05, "loss": 0.4514, "step": 53250 }, { "epoch": 1.3518041400667589, "grad_norm": 0.3671875, "learning_rate": 8.617544107566908e-05, "loss": 0.4303, "step": 53255 }, { "epoch": 1.3519310581157746, "grad_norm": 0.3203125, "learning_rate": 8.614537309957788e-05, "loss": 0.4042, "step": 53260 }, { "epoch": 1.3520579761647904, "grad_norm": 0.3515625, "learning_rate": 8.611530825680779e-05, "loss": 0.4222, "step": 53265 }, { "epoch": 1.3521848942138062, "grad_norm": 0.33984375, "learning_rate": 8.608524654883406e-05, "loss": 0.4053, "step": 53270 }, { "epoch": 1.352311812262822, "grad_norm": 0.384765625, "learning_rate": 8.605518797713177e-05, "loss": 0.4341, "step": 53275 }, { "epoch": 1.3524387303118377, "grad_norm": 0.337890625, "learning_rate": 8.602513254317598e-05, "loss": 0.4498, "step": 53280 }, { "epoch": 1.3525656483608535, "grad_norm": 0.365234375, "learning_rate": 8.599508024844147e-05, "loss": 0.4399, "step": 53285 }, { "epoch": 1.352692566409869, "grad_norm": 0.314453125, "learning_rate": 8.596503109440282e-05, "loss": 0.3992, "step": 53290 }, { "epoch": 1.3528194844588848, "grad_norm": 0.369140625, "learning_rate": 8.593498508253463e-05, "loss": 0.4389, "step": 53295 }, { "epoch": 1.3529464025079005, "grad_norm": 0.310546875, "learning_rate": 8.590494221431122e-05, "loss": 0.3834, "step": 53300 }, { "epoch": 1.3530733205569163, "grad_norm": 0.373046875, "learning_rate": 8.587490249120675e-05, "loss": 0.461, "step": 53305 }, { "epoch": 1.353200238605932, "grad_norm": 0.3671875, "learning_rate": 8.584486591469527e-05, "loss": 0.4221, "step": 53310 }, { "epoch": 1.3533271566549478, "grad_norm": 0.349609375, "learning_rate": 8.581483248625067e-05, "loss": 0.4122, "step": 53315 }, { "epoch": 1.3534540747039636, "grad_norm": 0.33203125, "learning_rate": 8.578480220734668e-05, "loss": 0.4199, "step": 53320 }, { "epoch": 1.3535809927529794, "grad_norm": 0.361328125, "learning_rate": 8.575477507945687e-05, "loss": 0.3961, "step": 53325 }, { "epoch": 1.3537079108019952, "grad_norm": 0.36328125, "learning_rate": 8.572475110405464e-05, "loss": 0.4223, "step": 53330 }, { "epoch": 1.353834828851011, "grad_norm": 0.36328125, "learning_rate": 8.569473028261327e-05, "loss": 0.41, "step": 53335 }, { "epoch": 1.3539617469000267, "grad_norm": 0.35546875, "learning_rate": 8.566471261660587e-05, "loss": 0.3967, "step": 53340 }, { "epoch": 1.3540886649490425, "grad_norm": 0.345703125, "learning_rate": 8.56346981075054e-05, "loss": 0.4095, "step": 53345 }, { "epoch": 1.3542155829980582, "grad_norm": 0.35546875, "learning_rate": 8.56046867567846e-05, "loss": 0.4208, "step": 53350 }, { "epoch": 1.354342501047074, "grad_norm": 0.357421875, "learning_rate": 8.557467856591622e-05, "loss": 0.4095, "step": 53355 }, { "epoch": 1.3544694190960898, "grad_norm": 0.3359375, "learning_rate": 8.55446735363727e-05, "loss": 0.4237, "step": 53360 }, { "epoch": 1.3545963371451055, "grad_norm": 0.345703125, "learning_rate": 8.551467166962631e-05, "loss": 0.4355, "step": 53365 }, { "epoch": 1.3547232551941213, "grad_norm": 0.357421875, "learning_rate": 8.548467296714937e-05, "loss": 0.4474, "step": 53370 }, { "epoch": 1.3548501732431368, "grad_norm": 0.34375, "learning_rate": 8.545467743041382e-05, "loss": 0.4133, "step": 53375 }, { "epoch": 1.3549770912921526, "grad_norm": 0.37109375, "learning_rate": 8.542468506089152e-05, "loss": 0.4217, "step": 53380 }, { "epoch": 1.3551040093411684, "grad_norm": 0.365234375, "learning_rate": 8.539469586005422e-05, "loss": 0.4168, "step": 53385 }, { "epoch": 1.3552309273901841, "grad_norm": 0.38671875, "learning_rate": 8.536470982937345e-05, "loss": 0.4428, "step": 53390 }, { "epoch": 1.3553578454392, "grad_norm": 0.345703125, "learning_rate": 8.533472697032063e-05, "loss": 0.3853, "step": 53395 }, { "epoch": 1.3554847634882157, "grad_norm": 0.34375, "learning_rate": 8.5304747284367e-05, "loss": 0.4075, "step": 53400 }, { "epoch": 1.3556116815372314, "grad_norm": 0.37109375, "learning_rate": 8.527477077298366e-05, "loss": 0.4239, "step": 53405 }, { "epoch": 1.3557385995862472, "grad_norm": 0.380859375, "learning_rate": 8.524479743764151e-05, "loss": 0.4302, "step": 53410 }, { "epoch": 1.355865517635263, "grad_norm": 0.333984375, "learning_rate": 8.521482727981132e-05, "loss": 0.4238, "step": 53415 }, { "epoch": 1.3559924356842787, "grad_norm": 0.33203125, "learning_rate": 8.51848603009638e-05, "loss": 0.4426, "step": 53420 }, { "epoch": 1.3561193537332943, "grad_norm": 0.3359375, "learning_rate": 8.515489650256934e-05, "loss": 0.4192, "step": 53425 }, { "epoch": 1.35624627178231, "grad_norm": 0.36328125, "learning_rate": 8.512493588609832e-05, "loss": 0.4294, "step": 53430 }, { "epoch": 1.3563731898313258, "grad_norm": 0.3671875, "learning_rate": 8.509497845302083e-05, "loss": 0.4042, "step": 53435 }, { "epoch": 1.3565001078803416, "grad_norm": 0.357421875, "learning_rate": 8.506502420480689e-05, "loss": 0.4256, "step": 53440 }, { "epoch": 1.3566270259293574, "grad_norm": 0.376953125, "learning_rate": 8.50350731429263e-05, "loss": 0.413, "step": 53445 }, { "epoch": 1.3567539439783731, "grad_norm": 0.349609375, "learning_rate": 8.500512526884885e-05, "loss": 0.4279, "step": 53450 }, { "epoch": 1.356880862027389, "grad_norm": 0.36328125, "learning_rate": 8.497518058404401e-05, "loss": 0.4295, "step": 53455 }, { "epoch": 1.3570077800764047, "grad_norm": 0.35546875, "learning_rate": 8.494523908998114e-05, "loss": 0.4422, "step": 53460 }, { "epoch": 1.3571346981254204, "grad_norm": 0.337890625, "learning_rate": 8.491530078812951e-05, "loss": 0.4069, "step": 53465 }, { "epoch": 1.3572616161744362, "grad_norm": 0.349609375, "learning_rate": 8.488536567995811e-05, "loss": 0.4272, "step": 53470 }, { "epoch": 1.357388534223452, "grad_norm": 0.369140625, "learning_rate": 8.485543376693585e-05, "loss": 0.4041, "step": 53475 }, { "epoch": 1.3575154522724677, "grad_norm": 0.34375, "learning_rate": 8.482550505053154e-05, "loss": 0.4214, "step": 53480 }, { "epoch": 1.3576423703214835, "grad_norm": 0.3515625, "learning_rate": 8.479557953221373e-05, "loss": 0.4354, "step": 53485 }, { "epoch": 1.3577692883704993, "grad_norm": 0.34765625, "learning_rate": 8.476565721345088e-05, "loss": 0.4307, "step": 53490 }, { "epoch": 1.357896206419515, "grad_norm": 0.373046875, "learning_rate": 8.473573809571124e-05, "loss": 0.4224, "step": 53495 }, { "epoch": 1.3580231244685308, "grad_norm": 0.33203125, "learning_rate": 8.470582218046294e-05, "loss": 0.3919, "step": 53500 }, { "epoch": 1.3581500425175463, "grad_norm": 0.35546875, "learning_rate": 8.467590946917392e-05, "loss": 0.3967, "step": 53505 }, { "epoch": 1.358276960566562, "grad_norm": 0.361328125, "learning_rate": 8.464599996331201e-05, "loss": 0.422, "step": 53510 }, { "epoch": 1.3584038786155779, "grad_norm": 0.349609375, "learning_rate": 8.461609366434485e-05, "loss": 0.4137, "step": 53515 }, { "epoch": 1.3585307966645936, "grad_norm": 0.34765625, "learning_rate": 8.458619057373986e-05, "loss": 0.4401, "step": 53520 }, { "epoch": 1.3586577147136094, "grad_norm": 0.341796875, "learning_rate": 8.455629069296451e-05, "loss": 0.413, "step": 53525 }, { "epoch": 1.3587846327626252, "grad_norm": 0.388671875, "learning_rate": 8.452639402348589e-05, "loss": 0.4084, "step": 53530 }, { "epoch": 1.358911550811641, "grad_norm": 0.337890625, "learning_rate": 8.449650056677106e-05, "loss": 0.433, "step": 53535 }, { "epoch": 1.3590384688606567, "grad_norm": 0.353515625, "learning_rate": 8.446661032428678e-05, "loss": 0.4073, "step": 53540 }, { "epoch": 1.3591653869096725, "grad_norm": 0.33984375, "learning_rate": 8.443672329749989e-05, "loss": 0.4235, "step": 53545 }, { "epoch": 1.3592923049586882, "grad_norm": 0.34375, "learning_rate": 8.440683948787688e-05, "loss": 0.3944, "step": 53550 }, { "epoch": 1.3594192230077038, "grad_norm": 0.345703125, "learning_rate": 8.437695889688412e-05, "loss": 0.4044, "step": 53555 }, { "epoch": 1.3595461410567196, "grad_norm": 0.396484375, "learning_rate": 8.434708152598784e-05, "loss": 0.4421, "step": 53560 }, { "epoch": 1.3596730591057353, "grad_norm": 0.35546875, "learning_rate": 8.431720737665412e-05, "loss": 0.4253, "step": 53565 }, { "epoch": 1.359799977154751, "grad_norm": 0.37890625, "learning_rate": 8.428733645034886e-05, "loss": 0.4282, "step": 53570 }, { "epoch": 1.3599268952037669, "grad_norm": 0.345703125, "learning_rate": 8.425746874853786e-05, "loss": 0.4317, "step": 53575 }, { "epoch": 1.3600538132527826, "grad_norm": 0.353515625, "learning_rate": 8.422760427268665e-05, "loss": 0.4242, "step": 53580 }, { "epoch": 1.3601807313017984, "grad_norm": 0.376953125, "learning_rate": 8.419774302426071e-05, "loss": 0.3853, "step": 53585 }, { "epoch": 1.3603076493508142, "grad_norm": 0.349609375, "learning_rate": 8.41678850047253e-05, "loss": 0.4092, "step": 53590 }, { "epoch": 1.36043456739983, "grad_norm": 0.31640625, "learning_rate": 8.413803021554556e-05, "loss": 0.3921, "step": 53595 }, { "epoch": 1.3605614854488457, "grad_norm": 0.365234375, "learning_rate": 8.410817865818636e-05, "loss": 0.3942, "step": 53600 }, { "epoch": 1.3606884034978615, "grad_norm": 0.34765625, "learning_rate": 8.407833033411267e-05, "loss": 0.3998, "step": 53605 }, { "epoch": 1.3608153215468772, "grad_norm": 0.37109375, "learning_rate": 8.404848524478898e-05, "loss": 0.4388, "step": 53610 }, { "epoch": 1.360942239595893, "grad_norm": 0.296875, "learning_rate": 8.401864339167992e-05, "loss": 0.3893, "step": 53615 }, { "epoch": 1.3610691576449088, "grad_norm": 0.375, "learning_rate": 8.398880477624973e-05, "loss": 0.4164, "step": 53620 }, { "epoch": 1.3611960756939245, "grad_norm": 0.3671875, "learning_rate": 8.395896939996259e-05, "loss": 0.4192, "step": 53625 }, { "epoch": 1.3613229937429403, "grad_norm": 0.3515625, "learning_rate": 8.392913726428254e-05, "loss": 0.4109, "step": 53630 }, { "epoch": 1.361449911791956, "grad_norm": 0.353515625, "learning_rate": 8.389930837067339e-05, "loss": 0.439, "step": 53635 }, { "epoch": 1.3615768298409716, "grad_norm": 0.30078125, "learning_rate": 8.386948272059884e-05, "loss": 0.4049, "step": 53640 }, { "epoch": 1.3617037478899874, "grad_norm": 0.341796875, "learning_rate": 8.383966031552244e-05, "loss": 0.3957, "step": 53645 }, { "epoch": 1.3618306659390031, "grad_norm": 0.38671875, "learning_rate": 8.380984115690757e-05, "loss": 0.4048, "step": 53650 }, { "epoch": 1.361957583988019, "grad_norm": 0.353515625, "learning_rate": 8.37800252462174e-05, "loss": 0.4322, "step": 53655 }, { "epoch": 1.3620845020370347, "grad_norm": 0.328125, "learning_rate": 8.375021258491503e-05, "loss": 0.4045, "step": 53660 }, { "epoch": 1.3622114200860505, "grad_norm": 0.341796875, "learning_rate": 8.372040317446327e-05, "loss": 0.3983, "step": 53665 }, { "epoch": 1.3623383381350662, "grad_norm": 0.34765625, "learning_rate": 8.369059701632497e-05, "loss": 0.4196, "step": 53670 }, { "epoch": 1.362465256184082, "grad_norm": 0.345703125, "learning_rate": 8.366079411196269e-05, "loss": 0.4233, "step": 53675 }, { "epoch": 1.3625921742330978, "grad_norm": 0.341796875, "learning_rate": 8.363099446283882e-05, "loss": 0.4308, "step": 53680 }, { "epoch": 1.3627190922821135, "grad_norm": 0.3515625, "learning_rate": 8.36011980704156e-05, "loss": 0.4099, "step": 53685 }, { "epoch": 1.362846010331129, "grad_norm": 0.34375, "learning_rate": 8.357140493615506e-05, "loss": 0.4185, "step": 53690 }, { "epoch": 1.3629729283801448, "grad_norm": 0.3359375, "learning_rate": 8.354161506151931e-05, "loss": 0.4061, "step": 53695 }, { "epoch": 1.3630998464291606, "grad_norm": 0.328125, "learning_rate": 8.351182844797002e-05, "loss": 0.3933, "step": 53700 }, { "epoch": 1.3632267644781764, "grad_norm": 0.33984375, "learning_rate": 8.348204509696883e-05, "loss": 0.3999, "step": 53705 }, { "epoch": 1.3633536825271921, "grad_norm": 0.36328125, "learning_rate": 8.345226500997719e-05, "loss": 0.3914, "step": 53710 }, { "epoch": 1.363480600576208, "grad_norm": 0.37890625, "learning_rate": 8.34224881884564e-05, "loss": 0.4091, "step": 53715 }, { "epoch": 1.3636075186252237, "grad_norm": 0.3515625, "learning_rate": 8.33927146338676e-05, "loss": 0.392, "step": 53720 }, { "epoch": 1.3637344366742394, "grad_norm": 0.34375, "learning_rate": 8.336294434767175e-05, "loss": 0.4227, "step": 53725 }, { "epoch": 1.3638613547232552, "grad_norm": 0.357421875, "learning_rate": 8.333317733132962e-05, "loss": 0.4543, "step": 53730 }, { "epoch": 1.363988272772271, "grad_norm": 0.357421875, "learning_rate": 8.330341358630196e-05, "loss": 0.427, "step": 53735 }, { "epoch": 1.3641151908212867, "grad_norm": 0.3515625, "learning_rate": 8.327365311404924e-05, "loss": 0.4095, "step": 53740 }, { "epoch": 1.3642421088703025, "grad_norm": 0.357421875, "learning_rate": 8.32438959160318e-05, "loss": 0.3926, "step": 53745 }, { "epoch": 1.3643690269193183, "grad_norm": 0.349609375, "learning_rate": 8.321414199370977e-05, "loss": 0.4202, "step": 53750 }, { "epoch": 1.364495944968334, "grad_norm": 0.34375, "learning_rate": 8.318439134854322e-05, "loss": 0.3945, "step": 53755 }, { "epoch": 1.3646228630173498, "grad_norm": 0.328125, "learning_rate": 8.315464398199196e-05, "loss": 0.6423, "step": 53760 }, { "epoch": 1.3647497810663656, "grad_norm": 0.345703125, "learning_rate": 8.312489989551563e-05, "loss": 0.4422, "step": 53765 }, { "epoch": 1.3648766991153811, "grad_norm": 0.322265625, "learning_rate": 8.309515909057386e-05, "loss": 0.4083, "step": 53770 }, { "epoch": 1.3650036171643969, "grad_norm": 0.37109375, "learning_rate": 8.306542156862601e-05, "loss": 0.4164, "step": 53775 }, { "epoch": 1.3651305352134127, "grad_norm": 0.326171875, "learning_rate": 8.303568733113125e-05, "loss": 0.3881, "step": 53780 }, { "epoch": 1.3652574532624284, "grad_norm": 0.34765625, "learning_rate": 8.300595637954863e-05, "loss": 0.3767, "step": 53785 }, { "epoch": 1.3653843713114442, "grad_norm": 0.33203125, "learning_rate": 8.2976228715337e-05, "loss": 0.4085, "step": 53790 }, { "epoch": 1.36551128936046, "grad_norm": 0.361328125, "learning_rate": 8.294650433995519e-05, "loss": 0.4234, "step": 53795 }, { "epoch": 1.3656382074094757, "grad_norm": 0.326171875, "learning_rate": 8.29167832548617e-05, "loss": 0.4067, "step": 53800 }, { "epoch": 1.3657651254584915, "grad_norm": 0.361328125, "learning_rate": 8.288706546151493e-05, "loss": 0.4114, "step": 53805 }, { "epoch": 1.3658920435075073, "grad_norm": 0.306640625, "learning_rate": 8.285735096137312e-05, "loss": 0.4157, "step": 53810 }, { "epoch": 1.366018961556523, "grad_norm": 0.328125, "learning_rate": 8.282763975589435e-05, "loss": 0.4025, "step": 53815 }, { "epoch": 1.3661458796055386, "grad_norm": 0.361328125, "learning_rate": 8.279793184653655e-05, "loss": 0.4222, "step": 53820 }, { "epoch": 1.3662727976545543, "grad_norm": 0.33984375, "learning_rate": 8.276822723475746e-05, "loss": 0.4483, "step": 53825 }, { "epoch": 1.36639971570357, "grad_norm": 0.357421875, "learning_rate": 8.273852592201468e-05, "loss": 0.4298, "step": 53830 }, { "epoch": 1.3665266337525859, "grad_norm": 0.33984375, "learning_rate": 8.270882790976564e-05, "loss": 0.4235, "step": 53835 }, { "epoch": 1.3666535518016016, "grad_norm": 0.345703125, "learning_rate": 8.267913319946761e-05, "loss": 0.4291, "step": 53840 }, { "epoch": 1.3667804698506174, "grad_norm": 0.365234375, "learning_rate": 8.264944179257763e-05, "loss": 0.4104, "step": 53845 }, { "epoch": 1.3669073878996332, "grad_norm": 0.337890625, "learning_rate": 8.261975369055277e-05, "loss": 0.4473, "step": 53850 }, { "epoch": 1.367034305948649, "grad_norm": 0.318359375, "learning_rate": 8.259006889484971e-05, "loss": 0.4249, "step": 53855 }, { "epoch": 1.3671612239976647, "grad_norm": 0.34375, "learning_rate": 8.256038740692518e-05, "loss": 0.3937, "step": 53860 }, { "epoch": 1.3672881420466805, "grad_norm": 0.33984375, "learning_rate": 8.253070922823557e-05, "loss": 0.4276, "step": 53865 }, { "epoch": 1.3674150600956962, "grad_norm": 0.345703125, "learning_rate": 8.250103436023718e-05, "loss": 0.4133, "step": 53870 }, { "epoch": 1.367541978144712, "grad_norm": 0.359375, "learning_rate": 8.247136280438615e-05, "loss": 0.4244, "step": 53875 }, { "epoch": 1.3676688961937278, "grad_norm": 0.357421875, "learning_rate": 8.244169456213845e-05, "loss": 0.4259, "step": 53880 }, { "epoch": 1.3677958142427435, "grad_norm": 0.3671875, "learning_rate": 8.241202963494989e-05, "loss": 0.4137, "step": 53885 }, { "epoch": 1.3679227322917593, "grad_norm": 0.34375, "learning_rate": 8.238236802427608e-05, "loss": 0.4169, "step": 53890 }, { "epoch": 1.368049650340775, "grad_norm": 0.353515625, "learning_rate": 8.235270973157257e-05, "loss": 0.4312, "step": 53895 }, { "epoch": 1.3681765683897908, "grad_norm": 0.365234375, "learning_rate": 8.232305475829464e-05, "loss": 0.4173, "step": 53900 }, { "epoch": 1.3683034864388064, "grad_norm": 0.337890625, "learning_rate": 8.229340310589744e-05, "loss": 0.4237, "step": 53905 }, { "epoch": 1.3684304044878222, "grad_norm": 0.34375, "learning_rate": 8.2263754775836e-05, "loss": 0.4033, "step": 53910 }, { "epoch": 1.368557322536838, "grad_norm": 0.376953125, "learning_rate": 8.223410976956513e-05, "loss": 0.4245, "step": 53915 }, { "epoch": 1.3686842405858537, "grad_norm": 0.34765625, "learning_rate": 8.220446808853942e-05, "loss": 0.3963, "step": 53920 }, { "epoch": 1.3688111586348695, "grad_norm": 0.36328125, "learning_rate": 8.217482973421354e-05, "loss": 0.4485, "step": 53925 }, { "epoch": 1.3689380766838852, "grad_norm": 0.361328125, "learning_rate": 8.214519470804173e-05, "loss": 0.439, "step": 53930 }, { "epoch": 1.369064994732901, "grad_norm": 0.333984375, "learning_rate": 8.211556301147812e-05, "loss": 0.4117, "step": 53935 }, { "epoch": 1.3691919127819168, "grad_norm": 0.349609375, "learning_rate": 8.208593464597686e-05, "loss": 0.4129, "step": 53940 }, { "epoch": 1.3693188308309325, "grad_norm": 0.353515625, "learning_rate": 8.205630961299173e-05, "loss": 0.4465, "step": 53945 }, { "epoch": 1.3694457488799483, "grad_norm": 0.35546875, "learning_rate": 8.202668791397644e-05, "loss": 0.4186, "step": 53950 }, { "epoch": 1.3695726669289638, "grad_norm": 0.333984375, "learning_rate": 8.199706955038449e-05, "loss": 0.4395, "step": 53955 }, { "epoch": 1.3696995849779796, "grad_norm": 0.357421875, "learning_rate": 8.196745452366924e-05, "loss": 0.4457, "step": 53960 }, { "epoch": 1.3698265030269954, "grad_norm": 0.333984375, "learning_rate": 8.19378428352839e-05, "loss": 0.4335, "step": 53965 }, { "epoch": 1.3699534210760111, "grad_norm": 0.36328125, "learning_rate": 8.190823448668148e-05, "loss": 0.4456, "step": 53970 }, { "epoch": 1.370080339125027, "grad_norm": 0.376953125, "learning_rate": 8.187862947931491e-05, "loss": 0.4432, "step": 53975 }, { "epoch": 1.3702072571740427, "grad_norm": 0.3515625, "learning_rate": 8.184902781463679e-05, "loss": 0.4184, "step": 53980 }, { "epoch": 1.3703341752230584, "grad_norm": 0.375, "learning_rate": 8.181942949409975e-05, "loss": 0.4245, "step": 53985 }, { "epoch": 1.3704610932720742, "grad_norm": 0.3515625, "learning_rate": 8.178983451915618e-05, "loss": 0.4414, "step": 53990 }, { "epoch": 1.37058801132109, "grad_norm": 0.37109375, "learning_rate": 8.176024289125826e-05, "loss": 0.4077, "step": 53995 }, { "epoch": 1.3707149293701058, "grad_norm": 0.30859375, "learning_rate": 8.173065461185802e-05, "loss": 0.3889, "step": 54000 }, { "epoch": 1.3708418474191215, "grad_norm": 0.322265625, "learning_rate": 8.170106968240737e-05, "loss": 0.4174, "step": 54005 }, { "epoch": 1.3709687654681373, "grad_norm": 0.337890625, "learning_rate": 8.167148810435797e-05, "loss": 0.4021, "step": 54010 }, { "epoch": 1.371095683517153, "grad_norm": 0.322265625, "learning_rate": 8.164190987916148e-05, "loss": 0.3999, "step": 54015 }, { "epoch": 1.3712226015661688, "grad_norm": 0.345703125, "learning_rate": 8.161233500826927e-05, "loss": 0.4462, "step": 54020 }, { "epoch": 1.3713495196151846, "grad_norm": 0.375, "learning_rate": 8.158276349313252e-05, "loss": 0.4144, "step": 54025 }, { "epoch": 1.3714764376642004, "grad_norm": 0.341796875, "learning_rate": 8.15531953352023e-05, "loss": 0.4279, "step": 54030 }, { "epoch": 1.371603355713216, "grad_norm": 0.35546875, "learning_rate": 8.152363053592952e-05, "loss": 0.4223, "step": 54035 }, { "epoch": 1.3717302737622317, "grad_norm": 0.32421875, "learning_rate": 8.149406909676495e-05, "loss": 0.4281, "step": 54040 }, { "epoch": 1.3718571918112474, "grad_norm": 0.349609375, "learning_rate": 8.146451101915903e-05, "loss": 0.4406, "step": 54045 }, { "epoch": 1.3719841098602632, "grad_norm": 0.357421875, "learning_rate": 8.143495630456232e-05, "loss": 0.4329, "step": 54050 }, { "epoch": 1.372111027909279, "grad_norm": 0.345703125, "learning_rate": 8.140540495442498e-05, "loss": 0.4119, "step": 54055 }, { "epoch": 1.3722379459582947, "grad_norm": 0.333984375, "learning_rate": 8.13758569701971e-05, "loss": 0.3892, "step": 54060 }, { "epoch": 1.3723648640073105, "grad_norm": 0.353515625, "learning_rate": 8.13463123533286e-05, "loss": 0.4417, "step": 54065 }, { "epoch": 1.3724917820563263, "grad_norm": 0.326171875, "learning_rate": 8.131677110526919e-05, "loss": 0.3906, "step": 54070 }, { "epoch": 1.372618700105342, "grad_norm": 0.32421875, "learning_rate": 8.128723322746846e-05, "loss": 0.3828, "step": 54075 }, { "epoch": 1.3727456181543578, "grad_norm": 0.322265625, "learning_rate": 8.125769872137584e-05, "loss": 0.3985, "step": 54080 }, { "epoch": 1.3728725362033734, "grad_norm": 0.345703125, "learning_rate": 8.122816758844055e-05, "loss": 0.4121, "step": 54085 }, { "epoch": 1.3729994542523891, "grad_norm": 0.361328125, "learning_rate": 8.119863983011163e-05, "loss": 0.4046, "step": 54090 }, { "epoch": 1.3731263723014049, "grad_norm": 0.357421875, "learning_rate": 8.116911544783809e-05, "loss": 0.4086, "step": 54095 }, { "epoch": 1.3732532903504207, "grad_norm": 0.34375, "learning_rate": 8.113959444306865e-05, "loss": 0.4292, "step": 54100 }, { "epoch": 1.3733802083994364, "grad_norm": 0.375, "learning_rate": 8.11100768172518e-05, "loss": 0.4415, "step": 54105 }, { "epoch": 1.3735071264484522, "grad_norm": 0.31640625, "learning_rate": 8.108056257183614e-05, "loss": 0.3895, "step": 54110 }, { "epoch": 1.373634044497468, "grad_norm": 0.375, "learning_rate": 8.10510517082698e-05, "loss": 0.4339, "step": 54115 }, { "epoch": 1.3737609625464837, "grad_norm": 0.36328125, "learning_rate": 8.102154422800086e-05, "loss": 0.4298, "step": 54120 }, { "epoch": 1.3738878805954995, "grad_norm": 0.345703125, "learning_rate": 8.09920401324773e-05, "loss": 0.433, "step": 54125 }, { "epoch": 1.3740147986445153, "grad_norm": 0.3125, "learning_rate": 8.096253942314683e-05, "loss": 0.4092, "step": 54130 }, { "epoch": 1.374141716693531, "grad_norm": 0.3828125, "learning_rate": 8.093304210145707e-05, "loss": 0.424, "step": 54135 }, { "epoch": 1.3742686347425468, "grad_norm": 0.345703125, "learning_rate": 8.090354816885541e-05, "loss": 0.3838, "step": 54140 }, { "epoch": 1.3743955527915626, "grad_norm": 0.349609375, "learning_rate": 8.087405762678911e-05, "loss": 0.4278, "step": 54145 }, { "epoch": 1.3745224708405783, "grad_norm": 0.390625, "learning_rate": 8.084457047670526e-05, "loss": 0.4305, "step": 54150 }, { "epoch": 1.374649388889594, "grad_norm": 0.3515625, "learning_rate": 8.081508672005081e-05, "loss": 0.4205, "step": 54155 }, { "epoch": 1.3747763069386099, "grad_norm": 0.34765625, "learning_rate": 8.078560635827248e-05, "loss": 0.4182, "step": 54160 }, { "epoch": 1.3749032249876256, "grad_norm": 0.337890625, "learning_rate": 8.075612939281683e-05, "loss": 0.4208, "step": 54165 }, { "epoch": 1.3750301430366412, "grad_norm": 0.359375, "learning_rate": 8.072665582513037e-05, "loss": 0.4018, "step": 54170 }, { "epoch": 1.375157061085657, "grad_norm": 0.357421875, "learning_rate": 8.069718565665932e-05, "loss": 0.4115, "step": 54175 }, { "epoch": 1.3752839791346727, "grad_norm": 0.318359375, "learning_rate": 8.066771888884971e-05, "loss": 0.3985, "step": 54180 }, { "epoch": 1.3754108971836885, "grad_norm": 0.3671875, "learning_rate": 8.063825552314757e-05, "loss": 0.4225, "step": 54185 }, { "epoch": 1.3755378152327042, "grad_norm": 0.365234375, "learning_rate": 8.06087955609986e-05, "loss": 0.4512, "step": 54190 }, { "epoch": 1.37566473328172, "grad_norm": 0.328125, "learning_rate": 8.057933900384839e-05, "loss": 0.3993, "step": 54195 }, { "epoch": 1.3757916513307358, "grad_norm": 0.29296875, "learning_rate": 8.054988585314238e-05, "loss": 0.3895, "step": 54200 }, { "epoch": 1.3759185693797515, "grad_norm": 0.322265625, "learning_rate": 8.05204361103258e-05, "loss": 0.4357, "step": 54205 }, { "epoch": 1.3760454874287673, "grad_norm": 0.333984375, "learning_rate": 8.049098977684373e-05, "loss": 0.4089, "step": 54210 }, { "epoch": 1.376172405477783, "grad_norm": 0.32421875, "learning_rate": 8.04615468541411e-05, "loss": 0.4118, "step": 54215 }, { "epoch": 1.3762993235267986, "grad_norm": 0.3515625, "learning_rate": 8.04321073436627e-05, "loss": 0.3953, "step": 54220 }, { "epoch": 1.3764262415758144, "grad_norm": 0.369140625, "learning_rate": 8.040267124685304e-05, "loss": 0.4171, "step": 54225 }, { "epoch": 1.3765531596248302, "grad_norm": 0.310546875, "learning_rate": 8.03732385651566e-05, "loss": 0.4046, "step": 54230 }, { "epoch": 1.376680077673846, "grad_norm": 0.353515625, "learning_rate": 8.034380930001756e-05, "loss": 0.4462, "step": 54235 }, { "epoch": 1.3768069957228617, "grad_norm": 0.37109375, "learning_rate": 8.031438345288009e-05, "loss": 0.4047, "step": 54240 }, { "epoch": 1.3769339137718775, "grad_norm": 0.345703125, "learning_rate": 8.028496102518806e-05, "loss": 0.4113, "step": 54245 }, { "epoch": 1.3770608318208932, "grad_norm": 0.32421875, "learning_rate": 8.025554201838524e-05, "loss": 0.4054, "step": 54250 }, { "epoch": 1.377187749869909, "grad_norm": 0.349609375, "learning_rate": 8.022612643391512e-05, "loss": 0.4075, "step": 54255 }, { "epoch": 1.3773146679189248, "grad_norm": 0.365234375, "learning_rate": 8.019671427322125e-05, "loss": 0.417, "step": 54260 }, { "epoch": 1.3774415859679405, "grad_norm": 0.328125, "learning_rate": 8.01673055377468e-05, "loss": 0.3995, "step": 54265 }, { "epoch": 1.3775685040169563, "grad_norm": 0.365234375, "learning_rate": 8.013790022893484e-05, "loss": 0.4321, "step": 54270 }, { "epoch": 1.377695422065972, "grad_norm": 0.353515625, "learning_rate": 8.01084983482283e-05, "loss": 0.4212, "step": 54275 }, { "epoch": 1.3778223401149878, "grad_norm": 0.359375, "learning_rate": 8.007909989706991e-05, "loss": 0.4191, "step": 54280 }, { "epoch": 1.3779492581640036, "grad_norm": 0.35546875, "learning_rate": 8.004970487690223e-05, "loss": 0.4189, "step": 54285 }, { "epoch": 1.3780761762130194, "grad_norm": 0.357421875, "learning_rate": 8.002031328916765e-05, "loss": 0.4186, "step": 54290 }, { "epoch": 1.3782030942620351, "grad_norm": 0.357421875, "learning_rate": 7.999092513530839e-05, "loss": 0.3797, "step": 54295 }, { "epoch": 1.3783300123110507, "grad_norm": 0.380859375, "learning_rate": 7.99615404167666e-05, "loss": 0.3998, "step": 54300 }, { "epoch": 1.3784569303600664, "grad_norm": 0.310546875, "learning_rate": 7.993215913498411e-05, "loss": 0.3928, "step": 54305 }, { "epoch": 1.3785838484090822, "grad_norm": 0.34375, "learning_rate": 7.990278129140268e-05, "loss": 0.4057, "step": 54310 }, { "epoch": 1.378710766458098, "grad_norm": 0.330078125, "learning_rate": 7.987340688746383e-05, "loss": 0.4362, "step": 54315 }, { "epoch": 1.3788376845071137, "grad_norm": 0.337890625, "learning_rate": 7.984403592460897e-05, "loss": 0.4073, "step": 54320 }, { "epoch": 1.3789646025561295, "grad_norm": 0.35546875, "learning_rate": 7.981466840427932e-05, "loss": 0.4406, "step": 54325 }, { "epoch": 1.3790915206051453, "grad_norm": 0.35546875, "learning_rate": 7.978530432791593e-05, "loss": 0.4216, "step": 54330 }, { "epoch": 1.379218438654161, "grad_norm": 0.3515625, "learning_rate": 7.975594369695965e-05, "loss": 0.4125, "step": 54335 }, { "epoch": 1.3793453567031768, "grad_norm": 0.35546875, "learning_rate": 7.972658651285128e-05, "loss": 0.417, "step": 54340 }, { "epoch": 1.3794722747521926, "grad_norm": 0.33984375, "learning_rate": 7.96972327770313e-05, "loss": 0.4187, "step": 54345 }, { "epoch": 1.3795991928012081, "grad_norm": 0.369140625, "learning_rate": 7.966788249094015e-05, "loss": 0.4442, "step": 54350 }, { "epoch": 1.379726110850224, "grad_norm": 0.3515625, "learning_rate": 7.963853565601794e-05, "loss": 0.4097, "step": 54355 }, { "epoch": 1.3798530288992397, "grad_norm": 0.35546875, "learning_rate": 7.960919227370473e-05, "loss": 0.4121, "step": 54360 }, { "epoch": 1.3799799469482554, "grad_norm": 0.384765625, "learning_rate": 7.957985234544046e-05, "loss": 0.4221, "step": 54365 }, { "epoch": 1.3801068649972712, "grad_norm": 0.345703125, "learning_rate": 7.955051587266477e-05, "loss": 0.4238, "step": 54370 }, { "epoch": 1.380233783046287, "grad_norm": 0.345703125, "learning_rate": 7.952118285681723e-05, "loss": 0.4194, "step": 54375 }, { "epoch": 1.3803607010953027, "grad_norm": 0.318359375, "learning_rate": 7.949185329933716e-05, "loss": 0.4069, "step": 54380 }, { "epoch": 1.3804876191443185, "grad_norm": 0.353515625, "learning_rate": 7.946252720166377e-05, "loss": 0.4265, "step": 54385 }, { "epoch": 1.3806145371933343, "grad_norm": 0.35546875, "learning_rate": 7.943320456523607e-05, "loss": 0.4019, "step": 54390 }, { "epoch": 1.38074145524235, "grad_norm": 0.365234375, "learning_rate": 7.940388539149292e-05, "loss": 0.424, "step": 54395 }, { "epoch": 1.3808683732913658, "grad_norm": 0.359375, "learning_rate": 7.937456968187298e-05, "loss": 0.4101, "step": 54400 }, { "epoch": 1.3809952913403816, "grad_norm": 0.333984375, "learning_rate": 7.934525743781477e-05, "loss": 0.4299, "step": 54405 }, { "epoch": 1.3811222093893973, "grad_norm": 0.359375, "learning_rate": 7.931594866075657e-05, "loss": 0.4401, "step": 54410 }, { "epoch": 1.381249127438413, "grad_norm": 0.357421875, "learning_rate": 7.928664335213669e-05, "loss": 0.4174, "step": 54415 }, { "epoch": 1.3813760454874289, "grad_norm": 0.357421875, "learning_rate": 7.925734151339304e-05, "loss": 0.4262, "step": 54420 }, { "epoch": 1.3815029635364446, "grad_norm": 0.396484375, "learning_rate": 7.922804314596341e-05, "loss": 0.46, "step": 54425 }, { "epoch": 1.3816298815854604, "grad_norm": 0.3671875, "learning_rate": 7.919874825128557e-05, "loss": 0.4488, "step": 54430 }, { "epoch": 1.381756799634476, "grad_norm": 0.3515625, "learning_rate": 7.916945683079695e-05, "loss": 0.4109, "step": 54435 }, { "epoch": 1.3818837176834917, "grad_norm": 0.3515625, "learning_rate": 7.914016888593485e-05, "loss": 0.4284, "step": 54440 }, { "epoch": 1.3820106357325075, "grad_norm": 0.341796875, "learning_rate": 7.911088441813647e-05, "loss": 0.4485, "step": 54445 }, { "epoch": 1.3821375537815233, "grad_norm": 0.361328125, "learning_rate": 7.908160342883871e-05, "loss": 0.418, "step": 54450 }, { "epoch": 1.382264471830539, "grad_norm": 0.376953125, "learning_rate": 7.905232591947846e-05, "loss": 0.4165, "step": 54455 }, { "epoch": 1.3823913898795548, "grad_norm": 0.33203125, "learning_rate": 7.90230518914923e-05, "loss": 0.4089, "step": 54460 }, { "epoch": 1.3825183079285706, "grad_norm": 0.359375, "learning_rate": 7.899378134631672e-05, "loss": 0.4078, "step": 54465 }, { "epoch": 1.3826452259775863, "grad_norm": 0.330078125, "learning_rate": 7.8964514285388e-05, "loss": 0.3755, "step": 54470 }, { "epoch": 1.382772144026602, "grad_norm": 0.341796875, "learning_rate": 7.893525071014225e-05, "loss": 0.4117, "step": 54475 }, { "epoch": 1.3828990620756176, "grad_norm": 0.314453125, "learning_rate": 7.890599062201547e-05, "loss": 0.3889, "step": 54480 }, { "epoch": 1.3830259801246334, "grad_norm": 0.380859375, "learning_rate": 7.887673402244333e-05, "loss": 0.421, "step": 54485 }, { "epoch": 1.3831528981736492, "grad_norm": 0.33984375, "learning_rate": 7.884748091286159e-05, "loss": 0.4041, "step": 54490 }, { "epoch": 1.383279816222665, "grad_norm": 0.3203125, "learning_rate": 7.881823129470561e-05, "loss": 0.3851, "step": 54495 }, { "epoch": 1.3834067342716807, "grad_norm": 0.37890625, "learning_rate": 7.878898516941061e-05, "loss": 0.3994, "step": 54500 }, { "epoch": 1.3835336523206965, "grad_norm": 0.375, "learning_rate": 7.87597425384118e-05, "loss": 0.4224, "step": 54505 }, { "epoch": 1.3836605703697122, "grad_norm": 0.361328125, "learning_rate": 7.873050340314405e-05, "loss": 0.4355, "step": 54510 }, { "epoch": 1.383787488418728, "grad_norm": 0.35546875, "learning_rate": 7.87012677650421e-05, "loss": 0.3952, "step": 54515 }, { "epoch": 1.3839144064677438, "grad_norm": 0.3515625, "learning_rate": 7.867203562554056e-05, "loss": 0.4183, "step": 54520 }, { "epoch": 1.3840413245167595, "grad_norm": 0.341796875, "learning_rate": 7.864280698607379e-05, "loss": 0.4229, "step": 54525 }, { "epoch": 1.3841682425657753, "grad_norm": 0.361328125, "learning_rate": 7.861358184807607e-05, "loss": 0.4023, "step": 54530 }, { "epoch": 1.384295160614791, "grad_norm": 0.3359375, "learning_rate": 7.858436021298145e-05, "loss": 0.4133, "step": 54535 }, { "epoch": 1.3844220786638068, "grad_norm": 0.359375, "learning_rate": 7.855514208222382e-05, "loss": 0.4255, "step": 54540 }, { "epoch": 1.3845489967128226, "grad_norm": 0.345703125, "learning_rate": 7.852592745723693e-05, "loss": 0.4273, "step": 54545 }, { "epoch": 1.3846759147618384, "grad_norm": 0.369140625, "learning_rate": 7.849671633945424e-05, "loss": 0.4167, "step": 54550 }, { "epoch": 1.3848028328108541, "grad_norm": 0.34375, "learning_rate": 7.846750873030925e-05, "loss": 0.417, "step": 54555 }, { "epoch": 1.38492975085987, "grad_norm": 0.337890625, "learning_rate": 7.843830463123512e-05, "loss": 0.4277, "step": 54560 }, { "epoch": 1.3850566689088855, "grad_norm": 0.298828125, "learning_rate": 7.840910404366487e-05, "loss": 0.4085, "step": 54565 }, { "epoch": 1.3851835869579012, "grad_norm": 0.349609375, "learning_rate": 7.837990696903136e-05, "loss": 0.4091, "step": 54570 }, { "epoch": 1.385310505006917, "grad_norm": 0.3671875, "learning_rate": 7.83507134087673e-05, "loss": 0.4122, "step": 54575 }, { "epoch": 1.3854374230559328, "grad_norm": 0.359375, "learning_rate": 7.832152336430513e-05, "loss": 0.412, "step": 54580 }, { "epoch": 1.3855643411049485, "grad_norm": 0.341796875, "learning_rate": 7.82923368370773e-05, "loss": 0.4102, "step": 54585 }, { "epoch": 1.3856912591539643, "grad_norm": 0.326171875, "learning_rate": 7.826315382851596e-05, "loss": 0.3984, "step": 54590 }, { "epoch": 1.38581817720298, "grad_norm": 0.34375, "learning_rate": 7.823397434005308e-05, "loss": 0.4232, "step": 54595 }, { "epoch": 1.3859450952519958, "grad_norm": 0.341796875, "learning_rate": 7.820479837312047e-05, "loss": 0.4211, "step": 54600 }, { "epoch": 1.3860720133010116, "grad_norm": 0.34765625, "learning_rate": 7.817562592914984e-05, "loss": 0.4256, "step": 54605 }, { "epoch": 1.3861989313500274, "grad_norm": 0.359375, "learning_rate": 7.814645700957255e-05, "loss": 0.4183, "step": 54610 }, { "epoch": 1.386325849399043, "grad_norm": 0.36328125, "learning_rate": 7.811729161582007e-05, "loss": 0.4388, "step": 54615 }, { "epoch": 1.3864527674480587, "grad_norm": 0.3671875, "learning_rate": 7.808812974932344e-05, "loss": 0.4473, "step": 54620 }, { "epoch": 1.3865796854970744, "grad_norm": 0.3671875, "learning_rate": 7.805897141151364e-05, "loss": 0.4051, "step": 54625 }, { "epoch": 1.3867066035460902, "grad_norm": 0.341796875, "learning_rate": 7.802981660382146e-05, "loss": 0.4162, "step": 54630 }, { "epoch": 1.386833521595106, "grad_norm": 0.369140625, "learning_rate": 7.800066532767751e-05, "loss": 0.4039, "step": 54635 }, { "epoch": 1.3869604396441217, "grad_norm": 0.353515625, "learning_rate": 7.797151758451224e-05, "loss": 0.4148, "step": 54640 }, { "epoch": 1.3870873576931375, "grad_norm": 0.337890625, "learning_rate": 7.79423733757559e-05, "loss": 0.4166, "step": 54645 }, { "epoch": 1.3872142757421533, "grad_norm": 0.357421875, "learning_rate": 7.79132327028386e-05, "loss": 0.4145, "step": 54650 }, { "epoch": 1.387341193791169, "grad_norm": 0.380859375, "learning_rate": 7.788409556719019e-05, "loss": 0.4075, "step": 54655 }, { "epoch": 1.3874681118401848, "grad_norm": 0.337890625, "learning_rate": 7.785496197024056e-05, "loss": 0.4347, "step": 54660 }, { "epoch": 1.3875950298892006, "grad_norm": 0.32421875, "learning_rate": 7.78258319134192e-05, "loss": 0.3742, "step": 54665 }, { "epoch": 1.3877219479382163, "grad_norm": 0.357421875, "learning_rate": 7.779670539815551e-05, "loss": 0.4364, "step": 54670 }, { "epoch": 1.3878488659872321, "grad_norm": 0.32421875, "learning_rate": 7.776758242587866e-05, "loss": 0.4213, "step": 54675 }, { "epoch": 1.3879757840362479, "grad_norm": 0.369140625, "learning_rate": 7.773846299801783e-05, "loss": 0.4327, "step": 54680 }, { "epoch": 1.3881027020852637, "grad_norm": 0.6875, "learning_rate": 7.770934711600186e-05, "loss": 0.4189, "step": 54685 }, { "epoch": 1.3882296201342794, "grad_norm": 0.33203125, "learning_rate": 7.76802347812594e-05, "loss": 0.3945, "step": 54690 }, { "epoch": 1.388356538183295, "grad_norm": 0.37109375, "learning_rate": 7.765112599521904e-05, "loss": 0.4131, "step": 54695 }, { "epoch": 1.3884834562323107, "grad_norm": 0.353515625, "learning_rate": 7.762202075930909e-05, "loss": 0.3888, "step": 54700 }, { "epoch": 1.3886103742813265, "grad_norm": 0.3359375, "learning_rate": 7.759291907495774e-05, "loss": 0.4048, "step": 54705 }, { "epoch": 1.3887372923303423, "grad_norm": 0.349609375, "learning_rate": 7.756382094359301e-05, "loss": 0.4148, "step": 54710 }, { "epoch": 1.388864210379358, "grad_norm": 0.341796875, "learning_rate": 7.753472636664276e-05, "loss": 0.4053, "step": 54715 }, { "epoch": 1.3889911284283738, "grad_norm": 0.341796875, "learning_rate": 7.75056353455346e-05, "loss": 0.4051, "step": 54720 }, { "epoch": 1.3891180464773896, "grad_norm": 0.328125, "learning_rate": 7.747654788169604e-05, "loss": 0.4312, "step": 54725 }, { "epoch": 1.3892449645264053, "grad_norm": 0.326171875, "learning_rate": 7.744746397655438e-05, "loss": 0.4172, "step": 54730 }, { "epoch": 1.389371882575421, "grad_norm": 0.33984375, "learning_rate": 7.741838363153672e-05, "loss": 0.4273, "step": 54735 }, { "epoch": 1.3894988006244369, "grad_norm": 0.3671875, "learning_rate": 7.73893068480701e-05, "loss": 0.4226, "step": 54740 }, { "epoch": 1.3896257186734524, "grad_norm": 0.341796875, "learning_rate": 7.736023362758127e-05, "loss": 0.4416, "step": 54745 }, { "epoch": 1.3897526367224682, "grad_norm": 0.37109375, "learning_rate": 7.73311639714968e-05, "loss": 0.4367, "step": 54750 }, { "epoch": 1.389879554771484, "grad_norm": 0.38671875, "learning_rate": 7.730209788124323e-05, "loss": 0.4415, "step": 54755 }, { "epoch": 1.3900064728204997, "grad_norm": 0.310546875, "learning_rate": 7.727303535824674e-05, "loss": 0.4092, "step": 54760 }, { "epoch": 1.3901333908695155, "grad_norm": 0.333984375, "learning_rate": 7.724397640393343e-05, "loss": 0.4215, "step": 54765 }, { "epoch": 1.3902603089185313, "grad_norm": 0.359375, "learning_rate": 7.721492101972923e-05, "loss": 0.3903, "step": 54770 }, { "epoch": 1.390387226967547, "grad_norm": 0.34375, "learning_rate": 7.718586920705986e-05, "loss": 0.4273, "step": 54775 }, { "epoch": 1.3905141450165628, "grad_norm": 0.353515625, "learning_rate": 7.71568209673509e-05, "loss": 0.4192, "step": 54780 }, { "epoch": 1.3906410630655786, "grad_norm": 0.369140625, "learning_rate": 7.71277763020277e-05, "loss": 0.404, "step": 54785 }, { "epoch": 1.3907679811145943, "grad_norm": 0.37109375, "learning_rate": 7.709873521251549e-05, "loss": 0.4488, "step": 54790 }, { "epoch": 1.39089489916361, "grad_norm": 0.345703125, "learning_rate": 7.706969770023929e-05, "loss": 0.4168, "step": 54795 }, { "epoch": 1.3910218172126259, "grad_norm": 0.3515625, "learning_rate": 7.704066376662393e-05, "loss": 0.4127, "step": 54800 }, { "epoch": 1.3911487352616416, "grad_norm": 0.322265625, "learning_rate": 7.701163341309419e-05, "loss": 0.403, "step": 54805 }, { "epoch": 1.3912756533106574, "grad_norm": 0.3671875, "learning_rate": 7.698260664107454e-05, "loss": 0.4287, "step": 54810 }, { "epoch": 1.3914025713596732, "grad_norm": 0.333984375, "learning_rate": 7.695358345198928e-05, "loss": 0.4018, "step": 54815 }, { "epoch": 1.391529489408689, "grad_norm": 0.357421875, "learning_rate": 7.692456384726259e-05, "loss": 0.4128, "step": 54820 }, { "epoch": 1.3916564074577047, "grad_norm": 0.37109375, "learning_rate": 7.689554782831836e-05, "loss": 0.4245, "step": 54825 }, { "epoch": 1.3917833255067202, "grad_norm": 0.341796875, "learning_rate": 7.686653539658056e-05, "loss": 0.3843, "step": 54830 }, { "epoch": 1.391910243555736, "grad_norm": 0.36328125, "learning_rate": 7.683752655347272e-05, "loss": 0.4046, "step": 54835 }, { "epoch": 1.3920371616047518, "grad_norm": 0.37890625, "learning_rate": 7.680852130041835e-05, "loss": 0.4621, "step": 54840 }, { "epoch": 1.3921640796537675, "grad_norm": 0.326171875, "learning_rate": 7.677951963884064e-05, "loss": 0.4253, "step": 54845 }, { "epoch": 1.3922909977027833, "grad_norm": 0.34765625, "learning_rate": 7.675052157016276e-05, "loss": 0.4052, "step": 54850 }, { "epoch": 1.392417915751799, "grad_norm": 0.341796875, "learning_rate": 7.672152709580759e-05, "loss": 0.4052, "step": 54855 }, { "epoch": 1.3925448338008148, "grad_norm": 0.337890625, "learning_rate": 7.66925362171979e-05, "loss": 0.4057, "step": 54860 }, { "epoch": 1.3926717518498306, "grad_norm": 0.32421875, "learning_rate": 7.666354893575622e-05, "loss": 0.4112, "step": 54865 }, { "epoch": 1.3927986698988464, "grad_norm": 0.33984375, "learning_rate": 7.663456525290503e-05, "loss": 0.4382, "step": 54870 }, { "epoch": 1.3929255879478621, "grad_norm": 0.380859375, "learning_rate": 7.660558517006647e-05, "loss": 0.4357, "step": 54875 }, { "epoch": 1.3930525059968777, "grad_norm": 0.353515625, "learning_rate": 7.657660868866263e-05, "loss": 0.4165, "step": 54880 }, { "epoch": 1.3931794240458935, "grad_norm": 0.353515625, "learning_rate": 7.654763581011536e-05, "loss": 0.4161, "step": 54885 }, { "epoch": 1.3933063420949092, "grad_norm": 0.3828125, "learning_rate": 7.651866653584633e-05, "loss": 0.4338, "step": 54890 }, { "epoch": 1.393433260143925, "grad_norm": 0.341796875, "learning_rate": 7.648970086727708e-05, "loss": 0.4323, "step": 54895 }, { "epoch": 1.3935601781929408, "grad_norm": 0.361328125, "learning_rate": 7.646073880582885e-05, "loss": 0.4015, "step": 54900 }, { "epoch": 1.3936870962419565, "grad_norm": 0.330078125, "learning_rate": 7.643178035292292e-05, "loss": 0.4045, "step": 54905 }, { "epoch": 1.3938140142909723, "grad_norm": 0.35546875, "learning_rate": 7.640282550998026e-05, "loss": 0.4103, "step": 54910 }, { "epoch": 1.393940932339988, "grad_norm": 0.3359375, "learning_rate": 7.63738742784216e-05, "loss": 0.4067, "step": 54915 }, { "epoch": 1.3940678503890038, "grad_norm": 0.349609375, "learning_rate": 7.634492665966762e-05, "loss": 0.3829, "step": 54920 }, { "epoch": 1.3941947684380196, "grad_norm": 0.33984375, "learning_rate": 7.631598265513869e-05, "loss": 0.4182, "step": 54925 }, { "epoch": 1.3943216864870354, "grad_norm": 0.345703125, "learning_rate": 7.628704226625519e-05, "loss": 0.3866, "step": 54930 }, { "epoch": 1.3944486045360511, "grad_norm": 0.34375, "learning_rate": 7.625810549443717e-05, "loss": 0.3849, "step": 54935 }, { "epoch": 1.394575522585067, "grad_norm": 0.33203125, "learning_rate": 7.622917234110454e-05, "loss": 0.4098, "step": 54940 }, { "epoch": 1.3947024406340827, "grad_norm": 0.326171875, "learning_rate": 7.620024280767702e-05, "loss": 0.4146, "step": 54945 }, { "epoch": 1.3948293586830984, "grad_norm": 0.35546875, "learning_rate": 7.617131689557422e-05, "loss": 0.4176, "step": 54950 }, { "epoch": 1.3949562767321142, "grad_norm": 0.36328125, "learning_rate": 7.614239460621546e-05, "loss": 0.4236, "step": 54955 }, { "epoch": 1.3950831947811297, "grad_norm": 0.33203125, "learning_rate": 7.611347594102e-05, "loss": 0.4002, "step": 54960 }, { "epoch": 1.3952101128301455, "grad_norm": 0.3671875, "learning_rate": 7.608456090140684e-05, "loss": 0.4566, "step": 54965 }, { "epoch": 1.3953370308791613, "grad_norm": 0.333984375, "learning_rate": 7.605564948879482e-05, "loss": 0.4059, "step": 54970 }, { "epoch": 1.395463948928177, "grad_norm": 0.353515625, "learning_rate": 7.602674170460262e-05, "loss": 0.4433, "step": 54975 }, { "epoch": 1.3955908669771928, "grad_norm": 0.373046875, "learning_rate": 7.59978375502487e-05, "loss": 0.4199, "step": 54980 }, { "epoch": 1.3957177850262086, "grad_norm": 0.3515625, "learning_rate": 7.596893702715146e-05, "loss": 0.4135, "step": 54985 }, { "epoch": 1.3958447030752243, "grad_norm": 0.34375, "learning_rate": 7.594004013672899e-05, "loss": 0.3918, "step": 54990 }, { "epoch": 1.3959716211242401, "grad_norm": 0.318359375, "learning_rate": 7.591114688039921e-05, "loss": 0.3962, "step": 54995 }, { "epoch": 1.3960985391732559, "grad_norm": 0.283203125, "learning_rate": 7.588225725957999e-05, "loss": 0.3739, "step": 55000 }, { "epoch": 1.3962254572222716, "grad_norm": 0.34375, "learning_rate": 7.585337127568884e-05, "loss": 0.412, "step": 55005 }, { "epoch": 1.3963523752712872, "grad_norm": 0.333984375, "learning_rate": 7.582448893014326e-05, "loss": 0.4097, "step": 55010 }, { "epoch": 1.396479293320303, "grad_norm": 0.365234375, "learning_rate": 7.579561022436048e-05, "loss": 0.4474, "step": 55015 }, { "epoch": 1.3966062113693187, "grad_norm": 0.37890625, "learning_rate": 7.576673515975753e-05, "loss": 0.4057, "step": 55020 }, { "epoch": 1.3967331294183345, "grad_norm": 0.34765625, "learning_rate": 7.573786373775131e-05, "loss": 0.4231, "step": 55025 }, { "epoch": 1.3968600474673503, "grad_norm": 0.3359375, "learning_rate": 7.570899595975853e-05, "loss": 0.4131, "step": 55030 }, { "epoch": 1.396986965516366, "grad_norm": 0.361328125, "learning_rate": 7.568013182719575e-05, "loss": 0.414, "step": 55035 }, { "epoch": 1.3971138835653818, "grad_norm": 0.35546875, "learning_rate": 7.565127134147929e-05, "loss": 0.4296, "step": 55040 }, { "epoch": 1.3972408016143976, "grad_norm": 0.33984375, "learning_rate": 7.56224145040253e-05, "loss": 0.424, "step": 55045 }, { "epoch": 1.3973677196634133, "grad_norm": 0.33203125, "learning_rate": 7.559356131624983e-05, "loss": 0.3879, "step": 55050 }, { "epoch": 1.397494637712429, "grad_norm": 0.357421875, "learning_rate": 7.556471177956859e-05, "loss": 0.4438, "step": 55055 }, { "epoch": 1.3976215557614449, "grad_norm": 0.3203125, "learning_rate": 7.553586589539736e-05, "loss": 0.4187, "step": 55060 }, { "epoch": 1.3977484738104606, "grad_norm": 0.359375, "learning_rate": 7.550702366515154e-05, "loss": 0.3853, "step": 55065 }, { "epoch": 1.3978753918594764, "grad_norm": 0.357421875, "learning_rate": 7.547818509024632e-05, "loss": 0.4154, "step": 55070 }, { "epoch": 1.3980023099084922, "grad_norm": 0.33203125, "learning_rate": 7.544935017209696e-05, "loss": 0.4013, "step": 55075 }, { "epoch": 1.398129227957508, "grad_norm": 0.3671875, "learning_rate": 7.542051891211825e-05, "loss": 0.4278, "step": 55080 }, { "epoch": 1.3982561460065237, "grad_norm": 0.357421875, "learning_rate": 7.539169131172498e-05, "loss": 0.394, "step": 55085 }, { "epoch": 1.3983830640555395, "grad_norm": 0.361328125, "learning_rate": 7.536286737233173e-05, "loss": 0.4318, "step": 55090 }, { "epoch": 1.398509982104555, "grad_norm": 0.35546875, "learning_rate": 7.53340470953528e-05, "loss": 0.4033, "step": 55095 }, { "epoch": 1.3986369001535708, "grad_norm": 0.369140625, "learning_rate": 7.530523048220247e-05, "loss": 0.414, "step": 55100 }, { "epoch": 1.3987638182025866, "grad_norm": 0.353515625, "learning_rate": 7.527641753429471e-05, "loss": 0.415, "step": 55105 }, { "epoch": 1.3988907362516023, "grad_norm": 0.365234375, "learning_rate": 7.524760825304338e-05, "loss": 0.4073, "step": 55110 }, { "epoch": 1.399017654300618, "grad_norm": 0.359375, "learning_rate": 7.521880263986208e-05, "loss": 0.4198, "step": 55115 }, { "epoch": 1.3991445723496339, "grad_norm": 0.349609375, "learning_rate": 7.51900006961644e-05, "loss": 0.403, "step": 55120 }, { "epoch": 1.3992714903986496, "grad_norm": 0.322265625, "learning_rate": 7.516120242336359e-05, "loss": 0.381, "step": 55125 }, { "epoch": 1.3993984084476654, "grad_norm": 0.357421875, "learning_rate": 7.513240782287276e-05, "loss": 0.4251, "step": 55130 }, { "epoch": 1.3995253264966812, "grad_norm": 0.33984375, "learning_rate": 7.510361689610487e-05, "loss": 0.4362, "step": 55135 }, { "epoch": 1.399652244545697, "grad_norm": 0.333984375, "learning_rate": 7.507482964447265e-05, "loss": 0.4191, "step": 55140 }, { "epoch": 1.3997791625947125, "grad_norm": 0.37109375, "learning_rate": 7.504604606938863e-05, "loss": 0.4148, "step": 55145 }, { "epoch": 1.3999060806437282, "grad_norm": 0.35546875, "learning_rate": 7.501726617226537e-05, "loss": 0.3979, "step": 55150 }, { "epoch": 1.400032998692744, "grad_norm": 0.345703125, "learning_rate": 7.498848995451494e-05, "loss": 0.4142, "step": 55155 }, { "epoch": 1.4001599167417598, "grad_norm": 0.33984375, "learning_rate": 7.495971741754947e-05, "loss": 0.4177, "step": 55160 }, { "epoch": 1.4002868347907755, "grad_norm": 0.349609375, "learning_rate": 7.493094856278073e-05, "loss": 0.3987, "step": 55165 }, { "epoch": 1.4004137528397913, "grad_norm": 0.337890625, "learning_rate": 7.490218339162047e-05, "loss": 0.4197, "step": 55170 }, { "epoch": 1.400540670888807, "grad_norm": 0.337890625, "learning_rate": 7.487342190548014e-05, "loss": 0.4276, "step": 55175 }, { "epoch": 1.4006675889378228, "grad_norm": 0.35546875, "learning_rate": 7.484466410577103e-05, "loss": 0.3978, "step": 55180 }, { "epoch": 1.4007945069868386, "grad_norm": 0.333984375, "learning_rate": 7.481590999390438e-05, "loss": 0.4074, "step": 55185 }, { "epoch": 1.4009214250358544, "grad_norm": 0.341796875, "learning_rate": 7.478715957129106e-05, "loss": 0.4091, "step": 55190 }, { "epoch": 1.4010483430848701, "grad_norm": 0.328125, "learning_rate": 7.475841283934187e-05, "loss": 0.4054, "step": 55195 }, { "epoch": 1.401175261133886, "grad_norm": 0.357421875, "learning_rate": 7.472966979946738e-05, "loss": 0.4148, "step": 55200 }, { "epoch": 1.4013021791829017, "grad_norm": 0.353515625, "learning_rate": 7.470093045307802e-05, "loss": 0.4133, "step": 55205 }, { "epoch": 1.4014290972319174, "grad_norm": 0.35546875, "learning_rate": 7.4672194801584e-05, "loss": 0.4012, "step": 55210 }, { "epoch": 1.4015560152809332, "grad_norm": 0.35546875, "learning_rate": 7.464346284639536e-05, "loss": 0.4158, "step": 55215 }, { "epoch": 1.401682933329949, "grad_norm": 0.375, "learning_rate": 7.4614734588922e-05, "loss": 0.4109, "step": 55220 }, { "epoch": 1.4018098513789645, "grad_norm": 0.349609375, "learning_rate": 7.458601003057353e-05, "loss": 0.436, "step": 55225 }, { "epoch": 1.4019367694279803, "grad_norm": 0.353515625, "learning_rate": 7.455728917275956e-05, "loss": 0.4272, "step": 55230 }, { "epoch": 1.402063687476996, "grad_norm": 0.34765625, "learning_rate": 7.452857201688936e-05, "loss": 0.3893, "step": 55235 }, { "epoch": 1.4021906055260118, "grad_norm": 0.357421875, "learning_rate": 7.4499858564372e-05, "loss": 0.4409, "step": 55240 }, { "epoch": 1.4023175235750276, "grad_norm": 0.345703125, "learning_rate": 7.447114881661657e-05, "loss": 0.3828, "step": 55245 }, { "epoch": 1.4024444416240434, "grad_norm": 0.341796875, "learning_rate": 7.444244277503178e-05, "loss": 0.4124, "step": 55250 }, { "epoch": 1.4025713596730591, "grad_norm": 0.333984375, "learning_rate": 7.441374044102624e-05, "loss": 0.4104, "step": 55255 }, { "epoch": 1.402698277722075, "grad_norm": 0.37109375, "learning_rate": 7.438504181600834e-05, "loss": 0.428, "step": 55260 }, { "epoch": 1.4028251957710907, "grad_norm": 0.341796875, "learning_rate": 7.435634690138631e-05, "loss": 0.4196, "step": 55265 }, { "epoch": 1.4029521138201064, "grad_norm": 0.357421875, "learning_rate": 7.43276556985682e-05, "loss": 0.4083, "step": 55270 }, { "epoch": 1.403079031869122, "grad_norm": 0.322265625, "learning_rate": 7.42989682089619e-05, "loss": 0.4068, "step": 55275 }, { "epoch": 1.4032059499181377, "grad_norm": 0.34375, "learning_rate": 7.427028443397505e-05, "loss": 0.4062, "step": 55280 }, { "epoch": 1.4033328679671535, "grad_norm": 0.3515625, "learning_rate": 7.42416043750152e-05, "loss": 0.4189, "step": 55285 }, { "epoch": 1.4034597860161693, "grad_norm": 0.345703125, "learning_rate": 7.421292803348963e-05, "loss": 0.4446, "step": 55290 }, { "epoch": 1.403586704065185, "grad_norm": 0.35546875, "learning_rate": 7.41842554108055e-05, "loss": 0.4369, "step": 55295 }, { "epoch": 1.4037136221142008, "grad_norm": 0.337890625, "learning_rate": 7.415558650836971e-05, "loss": 0.4139, "step": 55300 }, { "epoch": 1.4038405401632166, "grad_norm": 0.33984375, "learning_rate": 7.412692132758912e-05, "loss": 0.4137, "step": 55305 }, { "epoch": 1.4039674582122323, "grad_norm": 0.345703125, "learning_rate": 7.409825986987027e-05, "loss": 0.4278, "step": 55310 }, { "epoch": 1.4040943762612481, "grad_norm": 0.33984375, "learning_rate": 7.406960213661954e-05, "loss": 0.4057, "step": 55315 }, { "epoch": 1.4042212943102639, "grad_norm": 0.349609375, "learning_rate": 7.404094812924321e-05, "loss": 0.4284, "step": 55320 }, { "epoch": 1.4043482123592796, "grad_norm": 0.365234375, "learning_rate": 7.401229784914733e-05, "loss": 0.4227, "step": 55325 }, { "epoch": 1.4044751304082954, "grad_norm": 0.384765625, "learning_rate": 7.398365129773772e-05, "loss": 0.3894, "step": 55330 }, { "epoch": 1.4046020484573112, "grad_norm": 0.3359375, "learning_rate": 7.395500847642005e-05, "loss": 0.4042, "step": 55335 }, { "epoch": 1.404728966506327, "grad_norm": 0.33203125, "learning_rate": 7.392636938659982e-05, "loss": 0.3977, "step": 55340 }, { "epoch": 1.4048558845553427, "grad_norm": 0.333984375, "learning_rate": 7.389773402968236e-05, "loss": 0.4142, "step": 55345 }, { "epoch": 1.4049828026043585, "grad_norm": 0.35546875, "learning_rate": 7.386910240707276e-05, "loss": 0.4263, "step": 55350 }, { "epoch": 1.4051097206533743, "grad_norm": 0.318359375, "learning_rate": 7.384047452017599e-05, "loss": 0.3888, "step": 55355 }, { "epoch": 1.4052366387023898, "grad_norm": 0.3203125, "learning_rate": 7.381185037039678e-05, "loss": 0.4252, "step": 55360 }, { "epoch": 1.4053635567514056, "grad_norm": 0.359375, "learning_rate": 7.378322995913975e-05, "loss": 0.4049, "step": 55365 }, { "epoch": 1.4054904748004213, "grad_norm": 0.421875, "learning_rate": 7.375461328780918e-05, "loss": 0.416, "step": 55370 }, { "epoch": 1.405617392849437, "grad_norm": 0.333984375, "learning_rate": 7.372600035780944e-05, "loss": 0.4173, "step": 55375 }, { "epoch": 1.4057443108984529, "grad_norm": 0.357421875, "learning_rate": 7.369739117054448e-05, "loss": 0.4363, "step": 55380 }, { "epoch": 1.4058712289474686, "grad_norm": 0.36328125, "learning_rate": 7.366878572741812e-05, "loss": 0.3922, "step": 55385 }, { "epoch": 1.4059981469964844, "grad_norm": 0.353515625, "learning_rate": 7.364018402983406e-05, "loss": 0.41, "step": 55390 }, { "epoch": 1.4061250650455002, "grad_norm": 0.359375, "learning_rate": 7.36115860791957e-05, "loss": 0.4143, "step": 55395 }, { "epoch": 1.406251983094516, "grad_norm": 0.337890625, "learning_rate": 7.358299187690642e-05, "loss": 0.4196, "step": 55400 }, { "epoch": 1.4063789011435317, "grad_norm": 0.37109375, "learning_rate": 7.35544014243693e-05, "loss": 0.4171, "step": 55405 }, { "epoch": 1.4065058191925472, "grad_norm": 0.33203125, "learning_rate": 7.352581472298725e-05, "loss": 0.4368, "step": 55410 }, { "epoch": 1.406632737241563, "grad_norm": 0.3515625, "learning_rate": 7.3497231774163e-05, "loss": 0.4087, "step": 55415 }, { "epoch": 1.4067596552905788, "grad_norm": 0.33984375, "learning_rate": 7.346865257929912e-05, "loss": 0.3774, "step": 55420 }, { "epoch": 1.4068865733395945, "grad_norm": 0.359375, "learning_rate": 7.344007713979797e-05, "loss": 0.4321, "step": 55425 }, { "epoch": 1.4070134913886103, "grad_norm": 0.345703125, "learning_rate": 7.341150545706168e-05, "loss": 0.3858, "step": 55430 }, { "epoch": 1.407140409437626, "grad_norm": 0.3671875, "learning_rate": 7.338293753249238e-05, "loss": 0.4097, "step": 55435 }, { "epoch": 1.4072673274866419, "grad_norm": 0.35546875, "learning_rate": 7.335437336749179e-05, "loss": 0.4118, "step": 55440 }, { "epoch": 1.4073942455356576, "grad_norm": 0.33984375, "learning_rate": 7.33258129634616e-05, "loss": 0.4297, "step": 55445 }, { "epoch": 1.4075211635846734, "grad_norm": 0.345703125, "learning_rate": 7.329725632180319e-05, "loss": 0.3882, "step": 55450 }, { "epoch": 1.4076480816336892, "grad_norm": 0.3671875, "learning_rate": 7.326870344391789e-05, "loss": 0.4117, "step": 55455 }, { "epoch": 1.407774999682705, "grad_norm": 0.37109375, "learning_rate": 7.324015433120672e-05, "loss": 0.4221, "step": 55460 }, { "epoch": 1.4079019177317207, "grad_norm": 0.32421875, "learning_rate": 7.321160898507062e-05, "loss": 0.3943, "step": 55465 }, { "epoch": 1.4080288357807365, "grad_norm": 0.34765625, "learning_rate": 7.318306740691022e-05, "loss": 0.4369, "step": 55470 }, { "epoch": 1.4081557538297522, "grad_norm": 0.34375, "learning_rate": 7.315452959812615e-05, "loss": 0.4087, "step": 55475 }, { "epoch": 1.408282671878768, "grad_norm": 0.3515625, "learning_rate": 7.31259955601187e-05, "loss": 0.3875, "step": 55480 }, { "epoch": 1.4084095899277838, "grad_norm": 0.345703125, "learning_rate": 7.309746529428804e-05, "loss": 0.4381, "step": 55485 }, { "epoch": 1.4085365079767993, "grad_norm": 0.34765625, "learning_rate": 7.30689388020341e-05, "loss": 0.4078, "step": 55490 }, { "epoch": 1.408663426025815, "grad_norm": 0.357421875, "learning_rate": 7.304041608475666e-05, "loss": 0.4113, "step": 55495 }, { "epoch": 1.4087903440748308, "grad_norm": 0.369140625, "learning_rate": 7.301189714385539e-05, "loss": 0.4183, "step": 55500 }, { "epoch": 1.4089172621238466, "grad_norm": 0.34765625, "learning_rate": 7.298338198072967e-05, "loss": 0.4072, "step": 55505 }, { "epoch": 1.4090441801728624, "grad_norm": 0.359375, "learning_rate": 7.29548705967787e-05, "loss": 0.4222, "step": 55510 }, { "epoch": 1.4091710982218781, "grad_norm": 0.357421875, "learning_rate": 7.292636299340154e-05, "loss": 0.4103, "step": 55515 }, { "epoch": 1.409298016270894, "grad_norm": 0.33984375, "learning_rate": 7.289785917199707e-05, "loss": 0.3903, "step": 55520 }, { "epoch": 1.4094249343199097, "grad_norm": 0.306640625, "learning_rate": 7.286935913396392e-05, "loss": 0.4099, "step": 55525 }, { "epoch": 1.4095518523689254, "grad_norm": 0.328125, "learning_rate": 7.28408628807006e-05, "loss": 0.4325, "step": 55530 }, { "epoch": 1.4096787704179412, "grad_norm": 0.3515625, "learning_rate": 7.281237041360538e-05, "loss": 0.4396, "step": 55535 }, { "epoch": 1.4098056884669568, "grad_norm": 0.337890625, "learning_rate": 7.278388173407643e-05, "loss": 0.3997, "step": 55540 }, { "epoch": 1.4099326065159725, "grad_norm": 0.373046875, "learning_rate": 7.275539684351157e-05, "loss": 0.422, "step": 55545 }, { "epoch": 1.4100595245649883, "grad_norm": 0.33203125, "learning_rate": 7.272691574330868e-05, "loss": 0.4166, "step": 55550 }, { "epoch": 1.410186442614004, "grad_norm": 0.3828125, "learning_rate": 7.269843843486525e-05, "loss": 0.4374, "step": 55555 }, { "epoch": 1.4103133606630198, "grad_norm": 0.361328125, "learning_rate": 7.26699649195786e-05, "loss": 0.4086, "step": 55560 }, { "epoch": 1.4104402787120356, "grad_norm": 0.365234375, "learning_rate": 7.264149519884602e-05, "loss": 0.4383, "step": 55565 }, { "epoch": 1.4105671967610514, "grad_norm": 0.34375, "learning_rate": 7.261302927406445e-05, "loss": 0.4164, "step": 55570 }, { "epoch": 1.4106941148100671, "grad_norm": 0.35546875, "learning_rate": 7.25845671466307e-05, "loss": 0.4232, "step": 55575 }, { "epoch": 1.410821032859083, "grad_norm": 0.361328125, "learning_rate": 7.255610881794142e-05, "loss": 0.4365, "step": 55580 }, { "epoch": 1.4109479509080987, "grad_norm": 0.376953125, "learning_rate": 7.252765428939304e-05, "loss": 0.4471, "step": 55585 }, { "epoch": 1.4110748689571144, "grad_norm": 0.36328125, "learning_rate": 7.249920356238179e-05, "loss": 0.4224, "step": 55590 }, { "epoch": 1.4112017870061302, "grad_norm": 0.34765625, "learning_rate": 7.247075663830374e-05, "loss": 0.4426, "step": 55595 }, { "epoch": 1.411328705055146, "grad_norm": 0.373046875, "learning_rate": 7.244231351855481e-05, "loss": 0.4376, "step": 55600 }, { "epoch": 1.4114556231041617, "grad_norm": 0.34375, "learning_rate": 7.241387420453064e-05, "loss": 0.4305, "step": 55605 }, { "epoch": 1.4115825411531775, "grad_norm": 0.337890625, "learning_rate": 7.238543869762676e-05, "loss": 0.4143, "step": 55610 }, { "epoch": 1.4117094592021933, "grad_norm": 0.349609375, "learning_rate": 7.235700699923848e-05, "loss": 0.4324, "step": 55615 }, { "epoch": 1.411836377251209, "grad_norm": 0.3359375, "learning_rate": 7.232857911076088e-05, "loss": 0.4016, "step": 55620 }, { "epoch": 1.4119632953002246, "grad_norm": 0.34765625, "learning_rate": 7.230015503358903e-05, "loss": 0.3963, "step": 55625 }, { "epoch": 1.4120902133492403, "grad_norm": 0.369140625, "learning_rate": 7.227173476911762e-05, "loss": 0.436, "step": 55630 }, { "epoch": 1.412217131398256, "grad_norm": 0.33203125, "learning_rate": 7.224331831874122e-05, "loss": 0.4056, "step": 55635 }, { "epoch": 1.4123440494472719, "grad_norm": 0.359375, "learning_rate": 7.221490568385416e-05, "loss": 0.4118, "step": 55640 }, { "epoch": 1.4124709674962876, "grad_norm": 0.3359375, "learning_rate": 7.218649686585074e-05, "loss": 0.4194, "step": 55645 }, { "epoch": 1.4125978855453034, "grad_norm": 0.349609375, "learning_rate": 7.215809186612493e-05, "loss": 0.4147, "step": 55650 }, { "epoch": 1.4127248035943192, "grad_norm": 0.3515625, "learning_rate": 7.212969068607055e-05, "loss": 0.4547, "step": 55655 }, { "epoch": 1.412851721643335, "grad_norm": 0.373046875, "learning_rate": 7.210129332708123e-05, "loss": 0.455, "step": 55660 }, { "epoch": 1.4129786396923507, "grad_norm": 0.353515625, "learning_rate": 7.207289979055041e-05, "loss": 0.443, "step": 55665 }, { "epoch": 1.4131055577413665, "grad_norm": 0.359375, "learning_rate": 7.204451007787136e-05, "loss": 0.4205, "step": 55670 }, { "epoch": 1.413232475790382, "grad_norm": 0.34765625, "learning_rate": 7.201612419043715e-05, "loss": 0.4173, "step": 55675 }, { "epoch": 1.4133593938393978, "grad_norm": 0.373046875, "learning_rate": 7.198774212964065e-05, "loss": 0.4206, "step": 55680 }, { "epoch": 1.4134863118884136, "grad_norm": 0.34765625, "learning_rate": 7.195936389687453e-05, "loss": 0.4062, "step": 55685 }, { "epoch": 1.4136132299374293, "grad_norm": 0.33203125, "learning_rate": 7.193098949353138e-05, "loss": 0.4183, "step": 55690 }, { "epoch": 1.413740147986445, "grad_norm": 0.337890625, "learning_rate": 7.190261892100347e-05, "loss": 0.3904, "step": 55695 }, { "epoch": 1.4138670660354609, "grad_norm": 0.33203125, "learning_rate": 7.187425218068295e-05, "loss": 0.3968, "step": 55700 }, { "epoch": 1.4139939840844766, "grad_norm": 0.365234375, "learning_rate": 7.184588927396176e-05, "loss": 0.4312, "step": 55705 }, { "epoch": 1.4141209021334924, "grad_norm": 0.35546875, "learning_rate": 7.181753020223164e-05, "loss": 0.4053, "step": 55710 }, { "epoch": 1.4142478201825082, "grad_norm": 0.375, "learning_rate": 7.178917496688412e-05, "loss": 0.442, "step": 55715 }, { "epoch": 1.414374738231524, "grad_norm": 0.357421875, "learning_rate": 7.176082356931068e-05, "loss": 0.4115, "step": 55720 }, { "epoch": 1.4145016562805397, "grad_norm": 0.3359375, "learning_rate": 7.173247601090249e-05, "loss": 0.424, "step": 55725 }, { "epoch": 1.4146285743295555, "grad_norm": 0.31640625, "learning_rate": 7.170413229305049e-05, "loss": 0.3886, "step": 55730 }, { "epoch": 1.4147554923785712, "grad_norm": 0.365234375, "learning_rate": 7.167579241714553e-05, "loss": 0.4168, "step": 55735 }, { "epoch": 1.414882410427587, "grad_norm": 0.357421875, "learning_rate": 7.164745638457825e-05, "loss": 0.4152, "step": 55740 }, { "epoch": 1.4150093284766028, "grad_norm": 0.35546875, "learning_rate": 7.161912419673901e-05, "loss": 0.4185, "step": 55745 }, { "epoch": 1.4151362465256185, "grad_norm": 0.361328125, "learning_rate": 7.15907958550182e-05, "loss": 0.4543, "step": 55750 }, { "epoch": 1.415263164574634, "grad_norm": 0.349609375, "learning_rate": 7.156247136080579e-05, "loss": 0.4104, "step": 55755 }, { "epoch": 1.4153900826236498, "grad_norm": 0.3515625, "learning_rate": 7.153415071549165e-05, "loss": 0.4188, "step": 55760 }, { "epoch": 1.4155170006726656, "grad_norm": 0.341796875, "learning_rate": 7.150583392046549e-05, "loss": 0.3989, "step": 55765 }, { "epoch": 1.4156439187216814, "grad_norm": 0.345703125, "learning_rate": 7.147752097711681e-05, "loss": 0.4088, "step": 55770 }, { "epoch": 1.4157708367706971, "grad_norm": 0.3671875, "learning_rate": 7.144921188683487e-05, "loss": 0.414, "step": 55775 }, { "epoch": 1.415897754819713, "grad_norm": 0.392578125, "learning_rate": 7.142090665100883e-05, "loss": 0.4563, "step": 55780 }, { "epoch": 1.4160246728687287, "grad_norm": 0.357421875, "learning_rate": 7.13926052710276e-05, "loss": 0.4225, "step": 55785 }, { "epoch": 1.4161515909177445, "grad_norm": 0.400390625, "learning_rate": 7.136430774827985e-05, "loss": 0.4426, "step": 55790 }, { "epoch": 1.4162785089667602, "grad_norm": 0.3671875, "learning_rate": 7.133601408415426e-05, "loss": 0.4404, "step": 55795 }, { "epoch": 1.416405427015776, "grad_norm": 0.326171875, "learning_rate": 7.130772428003913e-05, "loss": 0.406, "step": 55800 }, { "epoch": 1.4165323450647915, "grad_norm": 0.3359375, "learning_rate": 7.127943833732261e-05, "loss": 0.4067, "step": 55805 }, { "epoch": 1.4166592631138073, "grad_norm": 0.35546875, "learning_rate": 7.125115625739262e-05, "loss": 0.3998, "step": 55810 }, { "epoch": 1.416786181162823, "grad_norm": 0.33984375, "learning_rate": 7.122287804163711e-05, "loss": 0.3919, "step": 55815 }, { "epoch": 1.4169130992118388, "grad_norm": 0.349609375, "learning_rate": 7.119460369144359e-05, "loss": 0.4157, "step": 55820 }, { "epoch": 1.4170400172608546, "grad_norm": 0.314453125, "learning_rate": 7.116633320819945e-05, "loss": 0.4199, "step": 55825 }, { "epoch": 1.4171669353098704, "grad_norm": 0.33203125, "learning_rate": 7.113806659329197e-05, "loss": 0.419, "step": 55830 }, { "epoch": 1.4172938533588861, "grad_norm": 0.353515625, "learning_rate": 7.110980384810813e-05, "loss": 0.4371, "step": 55835 }, { "epoch": 1.417420771407902, "grad_norm": 0.328125, "learning_rate": 7.10815449740348e-05, "loss": 0.3991, "step": 55840 }, { "epoch": 1.4175476894569177, "grad_norm": 0.3359375, "learning_rate": 7.105328997245861e-05, "loss": 0.4539, "step": 55845 }, { "epoch": 1.4176746075059334, "grad_norm": 0.3359375, "learning_rate": 7.102503884476605e-05, "loss": 0.4059, "step": 55850 }, { "epoch": 1.4178015255549492, "grad_norm": 0.34375, "learning_rate": 7.099679159234336e-05, "loss": 0.4263, "step": 55855 }, { "epoch": 1.417928443603965, "grad_norm": 0.396484375, "learning_rate": 7.096854821657664e-05, "loss": 0.4607, "step": 55860 }, { "epoch": 1.4180553616529807, "grad_norm": 0.353515625, "learning_rate": 7.094030871885177e-05, "loss": 0.4234, "step": 55865 }, { "epoch": 1.4181822797019965, "grad_norm": 0.36328125, "learning_rate": 7.091207310055442e-05, "loss": 0.4648, "step": 55870 }, { "epoch": 1.4183091977510123, "grad_norm": 0.37109375, "learning_rate": 7.088384136307018e-05, "loss": 0.424, "step": 55875 }, { "epoch": 1.418436115800028, "grad_norm": 0.3515625, "learning_rate": 7.085561350778434e-05, "loss": 0.4229, "step": 55880 }, { "epoch": 1.4185630338490438, "grad_norm": 0.373046875, "learning_rate": 7.082738953608196e-05, "loss": 0.4181, "step": 55885 }, { "epoch": 1.4186899518980594, "grad_norm": 0.333984375, "learning_rate": 7.079916944934809e-05, "loss": 0.4062, "step": 55890 }, { "epoch": 1.4188168699470751, "grad_norm": 0.34765625, "learning_rate": 7.077095324896742e-05, "loss": 0.4394, "step": 55895 }, { "epoch": 1.4189437879960909, "grad_norm": 0.357421875, "learning_rate": 7.074274093632454e-05, "loss": 0.4326, "step": 55900 }, { "epoch": 1.4190707060451067, "grad_norm": 0.3671875, "learning_rate": 7.071453251280376e-05, "loss": 0.4295, "step": 55905 }, { "epoch": 1.4191976240941224, "grad_norm": 0.359375, "learning_rate": 7.06863279797893e-05, "loss": 0.418, "step": 55910 }, { "epoch": 1.4193245421431382, "grad_norm": 0.375, "learning_rate": 7.065812733866514e-05, "loss": 0.4157, "step": 55915 }, { "epoch": 1.419451460192154, "grad_norm": 0.361328125, "learning_rate": 7.062993059081506e-05, "loss": 0.4067, "step": 55920 }, { "epoch": 1.4195783782411697, "grad_norm": 0.3515625, "learning_rate": 7.060173773762266e-05, "loss": 0.4064, "step": 55925 }, { "epoch": 1.4197052962901855, "grad_norm": 0.318359375, "learning_rate": 7.057354878047137e-05, "loss": 0.4261, "step": 55930 }, { "epoch": 1.4198322143392013, "grad_norm": 0.361328125, "learning_rate": 7.054536372074434e-05, "loss": 0.4128, "step": 55935 }, { "epoch": 1.4199591323882168, "grad_norm": 0.36328125, "learning_rate": 7.051718255982472e-05, "loss": 0.411, "step": 55940 }, { "epoch": 1.4200860504372326, "grad_norm": 0.34765625, "learning_rate": 7.048900529909529e-05, "loss": 0.4089, "step": 55945 }, { "epoch": 1.4202129684862483, "grad_norm": 0.337890625, "learning_rate": 7.046083193993872e-05, "loss": 0.3906, "step": 55950 }, { "epoch": 1.420339886535264, "grad_norm": 0.375, "learning_rate": 7.043266248373742e-05, "loss": 0.4071, "step": 55955 }, { "epoch": 1.4204668045842799, "grad_norm": 0.3515625, "learning_rate": 7.040449693187363e-05, "loss": 0.4147, "step": 55960 }, { "epoch": 1.4205937226332956, "grad_norm": 0.333984375, "learning_rate": 7.037633528572952e-05, "loss": 0.4299, "step": 55965 }, { "epoch": 1.4207206406823114, "grad_norm": 0.349609375, "learning_rate": 7.034817754668692e-05, "loss": 0.4203, "step": 55970 }, { "epoch": 1.4208475587313272, "grad_norm": 0.353515625, "learning_rate": 7.032002371612752e-05, "loss": 0.4369, "step": 55975 }, { "epoch": 1.420974476780343, "grad_norm": 0.349609375, "learning_rate": 7.029187379543281e-05, "loss": 0.4243, "step": 55980 }, { "epoch": 1.4211013948293587, "grad_norm": 0.349609375, "learning_rate": 7.026372778598411e-05, "loss": 0.3985, "step": 55985 }, { "epoch": 1.4212283128783745, "grad_norm": 0.349609375, "learning_rate": 7.023558568916254e-05, "loss": 0.4235, "step": 55990 }, { "epoch": 1.4213552309273902, "grad_norm": 0.3515625, "learning_rate": 7.020744750634898e-05, "loss": 0.4283, "step": 55995 }, { "epoch": 1.421482148976406, "grad_norm": 0.3828125, "learning_rate": 7.017931323892415e-05, "loss": 0.4315, "step": 56000 }, { "epoch": 1.4216090670254218, "grad_norm": 0.34765625, "learning_rate": 7.015118288826867e-05, "loss": 0.4073, "step": 56005 }, { "epoch": 1.4217359850744375, "grad_norm": 0.392578125, "learning_rate": 7.012305645576283e-05, "loss": 0.3971, "step": 56010 }, { "epoch": 1.4218629031234533, "grad_norm": 0.3828125, "learning_rate": 7.00949339427868e-05, "loss": 0.4336, "step": 56015 }, { "epoch": 1.4219898211724689, "grad_norm": 0.35546875, "learning_rate": 7.006681535072054e-05, "loss": 0.4399, "step": 56020 }, { "epoch": 1.4221167392214846, "grad_norm": 0.353515625, "learning_rate": 7.00387006809438e-05, "loss": 0.3962, "step": 56025 }, { "epoch": 1.4222436572705004, "grad_norm": 0.337890625, "learning_rate": 7.001058993483617e-05, "loss": 0.3923, "step": 56030 }, { "epoch": 1.4223705753195162, "grad_norm": 0.349609375, "learning_rate": 6.998248311377703e-05, "loss": 0.4161, "step": 56035 }, { "epoch": 1.422497493368532, "grad_norm": 0.34765625, "learning_rate": 6.995438021914552e-05, "loss": 0.4204, "step": 56040 }, { "epoch": 1.4226244114175477, "grad_norm": 0.3203125, "learning_rate": 6.992628125232073e-05, "loss": 0.443, "step": 56045 }, { "epoch": 1.4227513294665635, "grad_norm": 0.33984375, "learning_rate": 6.989818621468141e-05, "loss": 0.3902, "step": 56050 }, { "epoch": 1.4228782475155792, "grad_norm": 0.3515625, "learning_rate": 6.987009510760622e-05, "loss": 0.4309, "step": 56055 }, { "epoch": 1.423005165564595, "grad_norm": 0.34765625, "learning_rate": 6.984200793247346e-05, "loss": 0.4099, "step": 56060 }, { "epoch": 1.4231320836136108, "grad_norm": 0.32421875, "learning_rate": 6.981392469066152e-05, "loss": 0.4152, "step": 56065 }, { "epoch": 1.4232590016626263, "grad_norm": 0.326171875, "learning_rate": 6.978584538354833e-05, "loss": 0.414, "step": 56070 }, { "epoch": 1.423385919711642, "grad_norm": 0.36328125, "learning_rate": 6.975777001251175e-05, "loss": 0.4346, "step": 56075 }, { "epoch": 1.4235128377606578, "grad_norm": 0.3359375, "learning_rate": 6.972969857892945e-05, "loss": 0.426, "step": 56080 }, { "epoch": 1.4236397558096736, "grad_norm": 0.34375, "learning_rate": 6.970163108417887e-05, "loss": 0.3809, "step": 56085 }, { "epoch": 1.4237666738586894, "grad_norm": 0.34765625, "learning_rate": 6.967356752963725e-05, "loss": 0.4298, "step": 56090 }, { "epoch": 1.4238935919077051, "grad_norm": 0.32421875, "learning_rate": 6.96455079166817e-05, "loss": 0.4238, "step": 56095 }, { "epoch": 1.424020509956721, "grad_norm": 0.3515625, "learning_rate": 6.961745224668905e-05, "loss": 0.4135, "step": 56100 }, { "epoch": 1.4241474280057367, "grad_norm": 0.33203125, "learning_rate": 6.958940052103599e-05, "loss": 0.3891, "step": 56105 }, { "epoch": 1.4242743460547524, "grad_norm": 0.361328125, "learning_rate": 6.956135274109903e-05, "loss": 0.4265, "step": 56110 }, { "epoch": 1.4244012641037682, "grad_norm": 0.353515625, "learning_rate": 6.953330890825438e-05, "loss": 0.4186, "step": 56115 }, { "epoch": 1.424528182152784, "grad_norm": 0.359375, "learning_rate": 6.950526902387827e-05, "loss": 0.4164, "step": 56120 }, { "epoch": 1.4246551002017998, "grad_norm": 0.375, "learning_rate": 6.947723308934656e-05, "loss": 0.4433, "step": 56125 }, { "epoch": 1.4247820182508155, "grad_norm": 0.34765625, "learning_rate": 6.944920110603488e-05, "loss": 0.3978, "step": 56130 }, { "epoch": 1.4249089362998313, "grad_norm": 0.3359375, "learning_rate": 6.942117307531886e-05, "loss": 0.4182, "step": 56135 }, { "epoch": 1.425035854348847, "grad_norm": 0.3515625, "learning_rate": 6.939314899857379e-05, "loss": 0.4012, "step": 56140 }, { "epoch": 1.4251627723978628, "grad_norm": 0.359375, "learning_rate": 6.936512887717479e-05, "loss": 0.4235, "step": 56145 }, { "epoch": 1.4252896904468786, "grad_norm": 0.34765625, "learning_rate": 6.933711271249678e-05, "loss": 0.4233, "step": 56150 }, { "epoch": 1.4254166084958941, "grad_norm": 0.357421875, "learning_rate": 6.930910050591454e-05, "loss": 0.4307, "step": 56155 }, { "epoch": 1.42554352654491, "grad_norm": 0.33984375, "learning_rate": 6.92810922588026e-05, "loss": 0.3941, "step": 56160 }, { "epoch": 1.4256704445939257, "grad_norm": 0.333984375, "learning_rate": 6.925308797253529e-05, "loss": 0.3844, "step": 56165 }, { "epoch": 1.4257973626429414, "grad_norm": 0.333984375, "learning_rate": 6.922508764848678e-05, "loss": 0.3827, "step": 56170 }, { "epoch": 1.4259242806919572, "grad_norm": 0.3515625, "learning_rate": 6.919709128803107e-05, "loss": 0.4203, "step": 56175 }, { "epoch": 1.426051198740973, "grad_norm": 0.32421875, "learning_rate": 6.916909889254189e-05, "loss": 0.3886, "step": 56180 }, { "epoch": 1.4261781167899887, "grad_norm": 0.359375, "learning_rate": 6.914111046339278e-05, "loss": 0.4224, "step": 56185 }, { "epoch": 1.4263050348390045, "grad_norm": 0.333984375, "learning_rate": 6.911312600195721e-05, "loss": 0.429, "step": 56190 }, { "epoch": 1.4264319528880203, "grad_norm": 0.37109375, "learning_rate": 6.908514550960834e-05, "loss": 0.4011, "step": 56195 }, { "epoch": 1.426558870937036, "grad_norm": 0.36328125, "learning_rate": 6.905716898771915e-05, "loss": 0.4447, "step": 56200 }, { "epoch": 1.4266857889860516, "grad_norm": 0.361328125, "learning_rate": 6.902919643766237e-05, "loss": 0.4503, "step": 56205 }, { "epoch": 1.4268127070350674, "grad_norm": 0.37109375, "learning_rate": 6.900122786081073e-05, "loss": 0.4495, "step": 56210 }, { "epoch": 1.4269396250840831, "grad_norm": 0.373046875, "learning_rate": 6.897326325853657e-05, "loss": 0.4428, "step": 56215 }, { "epoch": 1.4270665431330989, "grad_norm": 0.365234375, "learning_rate": 6.894530263221211e-05, "loss": 0.3886, "step": 56220 }, { "epoch": 1.4271934611821147, "grad_norm": 0.357421875, "learning_rate": 6.891734598320936e-05, "loss": 0.4206, "step": 56225 }, { "epoch": 1.4273203792311304, "grad_norm": 0.353515625, "learning_rate": 6.888939331290015e-05, "loss": 0.4381, "step": 56230 }, { "epoch": 1.4274472972801462, "grad_norm": 0.3515625, "learning_rate": 6.886144462265609e-05, "loss": 0.4165, "step": 56235 }, { "epoch": 1.427574215329162, "grad_norm": 0.34375, "learning_rate": 6.883349991384861e-05, "loss": 0.4143, "step": 56240 }, { "epoch": 1.4277011333781777, "grad_norm": 0.353515625, "learning_rate": 6.880555918784898e-05, "loss": 0.4605, "step": 56245 }, { "epoch": 1.4278280514271935, "grad_norm": 0.37109375, "learning_rate": 6.877762244602815e-05, "loss": 0.4035, "step": 56250 }, { "epoch": 1.4279549694762093, "grad_norm": 0.373046875, "learning_rate": 6.87496896897571e-05, "loss": 0.4353, "step": 56255 }, { "epoch": 1.428081887525225, "grad_norm": 0.34375, "learning_rate": 6.872176092040639e-05, "loss": 0.4254, "step": 56260 }, { "epoch": 1.4282088055742408, "grad_norm": 0.3359375, "learning_rate": 6.869383613934652e-05, "loss": 0.4026, "step": 56265 }, { "epoch": 1.4283357236232566, "grad_norm": 0.349609375, "learning_rate": 6.866591534794771e-05, "loss": 0.4342, "step": 56270 }, { "epoch": 1.4284626416722723, "grad_norm": 0.3671875, "learning_rate": 6.863799854758003e-05, "loss": 0.4441, "step": 56275 }, { "epoch": 1.428589559721288, "grad_norm": 0.328125, "learning_rate": 6.861008573961338e-05, "loss": 0.4123, "step": 56280 }, { "epoch": 1.4287164777703036, "grad_norm": 0.287109375, "learning_rate": 6.858217692541731e-05, "loss": 0.3934, "step": 56285 }, { "epoch": 1.4288433958193194, "grad_norm": 0.333984375, "learning_rate": 6.855427210636146e-05, "loss": 0.4321, "step": 56290 }, { "epoch": 1.4289703138683352, "grad_norm": 0.3671875, "learning_rate": 6.852637128381503e-05, "loss": 0.4334, "step": 56295 }, { "epoch": 1.429097231917351, "grad_norm": 0.3671875, "learning_rate": 6.84984744591471e-05, "loss": 0.409, "step": 56300 }, { "epoch": 1.4292241499663667, "grad_norm": 0.345703125, "learning_rate": 6.847058163372657e-05, "loss": 0.4025, "step": 56305 }, { "epoch": 1.4293510680153825, "grad_norm": 0.33203125, "learning_rate": 6.844269280892212e-05, "loss": 0.38, "step": 56310 }, { "epoch": 1.4294779860643982, "grad_norm": 0.361328125, "learning_rate": 6.84148079861022e-05, "loss": 0.4276, "step": 56315 }, { "epoch": 1.429604904113414, "grad_norm": 0.33984375, "learning_rate": 6.838692716663521e-05, "loss": 0.4121, "step": 56320 }, { "epoch": 1.4297318221624298, "grad_norm": 0.3359375, "learning_rate": 6.835905035188919e-05, "loss": 0.4007, "step": 56325 }, { "epoch": 1.4298587402114455, "grad_norm": 0.36328125, "learning_rate": 6.833117754323205e-05, "loss": 0.4185, "step": 56330 }, { "epoch": 1.429985658260461, "grad_norm": 0.3359375, "learning_rate": 6.830330874203149e-05, "loss": 0.4455, "step": 56335 }, { "epoch": 1.4301125763094769, "grad_norm": 0.369140625, "learning_rate": 6.827544394965503e-05, "loss": 0.4058, "step": 56340 }, { "epoch": 1.4302394943584926, "grad_norm": 0.341796875, "learning_rate": 6.824758316746999e-05, "loss": 0.4172, "step": 56345 }, { "epoch": 1.4303664124075084, "grad_norm": 0.349609375, "learning_rate": 6.821972639684346e-05, "loss": 0.403, "step": 56350 }, { "epoch": 1.4304933304565242, "grad_norm": 0.341796875, "learning_rate": 6.819187363914238e-05, "loss": 0.4119, "step": 56355 }, { "epoch": 1.43062024850554, "grad_norm": 0.330078125, "learning_rate": 6.816402489573343e-05, "loss": 0.4, "step": 56360 }, { "epoch": 1.4307471665545557, "grad_norm": 0.369140625, "learning_rate": 6.813618016798322e-05, "loss": 0.3995, "step": 56365 }, { "epoch": 1.4308740846035715, "grad_norm": 0.345703125, "learning_rate": 6.810833945725806e-05, "loss": 0.3944, "step": 56370 }, { "epoch": 1.4310010026525872, "grad_norm": 0.34375, "learning_rate": 6.8080502764924e-05, "loss": 0.4255, "step": 56375 }, { "epoch": 1.431127920701603, "grad_norm": 0.388671875, "learning_rate": 6.805267009234708e-05, "loss": 0.432, "step": 56380 }, { "epoch": 1.4312548387506188, "grad_norm": 0.341796875, "learning_rate": 6.802484144089302e-05, "loss": 0.4324, "step": 56385 }, { "epoch": 1.4313817567996345, "grad_norm": 0.328125, "learning_rate": 6.799701681192732e-05, "loss": 0.4273, "step": 56390 }, { "epoch": 1.4315086748486503, "grad_norm": 0.34765625, "learning_rate": 6.796919620681533e-05, "loss": 0.4197, "step": 56395 }, { "epoch": 1.431635592897666, "grad_norm": 0.36328125, "learning_rate": 6.794137962692223e-05, "loss": 0.4479, "step": 56400 }, { "epoch": 1.4317625109466818, "grad_norm": 0.353515625, "learning_rate": 6.791356707361293e-05, "loss": 0.4356, "step": 56405 }, { "epoch": 1.4318894289956976, "grad_norm": 0.375, "learning_rate": 6.788575854825218e-05, "loss": 0.4227, "step": 56410 }, { "epoch": 1.4320163470447134, "grad_norm": 0.337890625, "learning_rate": 6.785795405220459e-05, "loss": 0.4056, "step": 56415 }, { "epoch": 1.432143265093729, "grad_norm": 0.380859375, "learning_rate": 6.783015358683443e-05, "loss": 0.4313, "step": 56420 }, { "epoch": 1.4322701831427447, "grad_norm": 0.353515625, "learning_rate": 6.780235715350592e-05, "loss": 0.4099, "step": 56425 }, { "epoch": 1.4323971011917604, "grad_norm": 0.33203125, "learning_rate": 6.777456475358302e-05, "loss": 0.3949, "step": 56430 }, { "epoch": 1.4325240192407762, "grad_norm": 0.3515625, "learning_rate": 6.77467763884294e-05, "loss": 0.9604, "step": 56435 }, { "epoch": 1.432650937289792, "grad_norm": 0.3671875, "learning_rate": 6.771899205940876e-05, "loss": 0.3893, "step": 56440 }, { "epoch": 1.4327778553388077, "grad_norm": 0.353515625, "learning_rate": 6.769121176788438e-05, "loss": 0.3953, "step": 56445 }, { "epoch": 1.4329047733878235, "grad_norm": 0.345703125, "learning_rate": 6.766343551521942e-05, "loss": 0.4077, "step": 56450 }, { "epoch": 1.4330316914368393, "grad_norm": 0.3359375, "learning_rate": 6.763566330277692e-05, "loss": 0.4139, "step": 56455 }, { "epoch": 1.433158609485855, "grad_norm": 0.3515625, "learning_rate": 6.760789513191964e-05, "loss": 0.4104, "step": 56460 }, { "epoch": 1.4332855275348706, "grad_norm": 0.349609375, "learning_rate": 6.75801310040101e-05, "loss": 0.4187, "step": 56465 }, { "epoch": 1.4334124455838864, "grad_norm": 0.330078125, "learning_rate": 6.755237092041071e-05, "loss": 0.4179, "step": 56470 }, { "epoch": 1.4335393636329021, "grad_norm": 0.3984375, "learning_rate": 6.752461488248365e-05, "loss": 0.4383, "step": 56475 }, { "epoch": 1.433666281681918, "grad_norm": 0.357421875, "learning_rate": 6.749686289159087e-05, "loss": 0.4085, "step": 56480 }, { "epoch": 1.4337931997309337, "grad_norm": 0.376953125, "learning_rate": 6.746911494909416e-05, "loss": 0.4513, "step": 56485 }, { "epoch": 1.4339201177799494, "grad_norm": 0.373046875, "learning_rate": 6.744137105635511e-05, "loss": 0.4644, "step": 56490 }, { "epoch": 1.4340470358289652, "grad_norm": 0.32421875, "learning_rate": 6.74136312147351e-05, "loss": 0.4403, "step": 56495 }, { "epoch": 1.434173953877981, "grad_norm": 0.349609375, "learning_rate": 6.73858954255953e-05, "loss": 0.4183, "step": 56500 }, { "epoch": 1.4343008719269967, "grad_norm": 0.357421875, "learning_rate": 6.735816369029666e-05, "loss": 0.4161, "step": 56505 }, { "epoch": 1.4344277899760125, "grad_norm": 0.361328125, "learning_rate": 6.733043601020005e-05, "loss": 0.4166, "step": 56510 }, { "epoch": 1.4345547080250283, "grad_norm": 0.3515625, "learning_rate": 6.730271238666602e-05, "loss": 0.4263, "step": 56515 }, { "epoch": 1.434681626074044, "grad_norm": 0.373046875, "learning_rate": 6.727499282105493e-05, "loss": 0.4319, "step": 56520 }, { "epoch": 1.4348085441230598, "grad_norm": 0.341796875, "learning_rate": 6.724727731472702e-05, "loss": 0.4063, "step": 56525 }, { "epoch": 1.4349354621720756, "grad_norm": 0.341796875, "learning_rate": 6.721956586904218e-05, "loss": 0.4023, "step": 56530 }, { "epoch": 1.4350623802210913, "grad_norm": 0.35546875, "learning_rate": 6.719185848536034e-05, "loss": 0.4299, "step": 56535 }, { "epoch": 1.435189298270107, "grad_norm": 0.333984375, "learning_rate": 6.7164155165041e-05, "loss": 0.3971, "step": 56540 }, { "epoch": 1.4353162163191229, "grad_norm": 0.337890625, "learning_rate": 6.713645590944358e-05, "loss": 0.4051, "step": 56545 }, { "epoch": 1.4354431343681384, "grad_norm": 0.3515625, "learning_rate": 6.710876071992725e-05, "loss": 0.4115, "step": 56550 }, { "epoch": 1.4355700524171542, "grad_norm": 0.36328125, "learning_rate": 6.708106959785102e-05, "loss": 0.4353, "step": 56555 }, { "epoch": 1.43569697046617, "grad_norm": 0.361328125, "learning_rate": 6.705338254457368e-05, "loss": 0.4074, "step": 56560 }, { "epoch": 1.4358238885151857, "grad_norm": 0.349609375, "learning_rate": 6.702569956145374e-05, "loss": 0.4264, "step": 56565 }, { "epoch": 1.4359508065642015, "grad_norm": 0.365234375, "learning_rate": 6.699802064984975e-05, "loss": 0.4326, "step": 56570 }, { "epoch": 1.4360777246132173, "grad_norm": 0.3359375, "learning_rate": 6.69703458111198e-05, "loss": 0.4114, "step": 56575 }, { "epoch": 1.436204642662233, "grad_norm": 0.33984375, "learning_rate": 6.694267504662192e-05, "loss": 0.3879, "step": 56580 }, { "epoch": 1.4363315607112488, "grad_norm": 0.353515625, "learning_rate": 6.691500835771389e-05, "loss": 0.4071, "step": 56585 }, { "epoch": 1.4364584787602646, "grad_norm": 0.349609375, "learning_rate": 6.688734574575331e-05, "loss": 0.4341, "step": 56590 }, { "epoch": 1.4365853968092803, "grad_norm": 0.365234375, "learning_rate": 6.685968721209756e-05, "loss": 0.425, "step": 56595 }, { "epoch": 1.4367123148582959, "grad_norm": 0.390625, "learning_rate": 6.683203275810383e-05, "loss": 0.4668, "step": 56600 }, { "epoch": 1.4368392329073116, "grad_norm": 0.33203125, "learning_rate": 6.68043823851291e-05, "loss": 0.4162, "step": 56605 }, { "epoch": 1.4369661509563274, "grad_norm": 0.3203125, "learning_rate": 6.677673609453022e-05, "loss": 0.4319, "step": 56610 }, { "epoch": 1.4370930690053432, "grad_norm": 0.359375, "learning_rate": 6.674909388766374e-05, "loss": 0.4054, "step": 56615 }, { "epoch": 1.437219987054359, "grad_norm": 0.330078125, "learning_rate": 6.672145576588608e-05, "loss": 0.4282, "step": 56620 }, { "epoch": 1.4373469051033747, "grad_norm": 0.359375, "learning_rate": 6.66938217305534e-05, "loss": 0.4258, "step": 56625 }, { "epoch": 1.4374738231523905, "grad_norm": 0.333984375, "learning_rate": 6.666619178302165e-05, "loss": 0.4008, "step": 56630 }, { "epoch": 1.4376007412014062, "grad_norm": 0.33203125, "learning_rate": 6.663856592464673e-05, "loss": 0.4017, "step": 56635 }, { "epoch": 1.437727659250422, "grad_norm": 0.349609375, "learning_rate": 6.661094415678417e-05, "loss": 0.4194, "step": 56640 }, { "epoch": 1.4378545772994378, "grad_norm": 0.3359375, "learning_rate": 6.658332648078938e-05, "loss": 0.4104, "step": 56645 }, { "epoch": 1.4379814953484535, "grad_norm": 0.361328125, "learning_rate": 6.65557128980175e-05, "loss": 0.4058, "step": 56650 }, { "epoch": 1.4381084133974693, "grad_norm": 0.357421875, "learning_rate": 6.652810340982359e-05, "loss": 0.4365, "step": 56655 }, { "epoch": 1.438235331446485, "grad_norm": 0.33984375, "learning_rate": 6.650049801756236e-05, "loss": 0.4132, "step": 56660 }, { "epoch": 1.4383622494955008, "grad_norm": 0.337890625, "learning_rate": 6.647289672258845e-05, "loss": 0.4162, "step": 56665 }, { "epoch": 1.4384891675445166, "grad_norm": 0.373046875, "learning_rate": 6.644529952625622e-05, "loss": 0.4166, "step": 56670 }, { "epoch": 1.4386160855935324, "grad_norm": 0.353515625, "learning_rate": 6.641770642991985e-05, "loss": 0.449, "step": 56675 }, { "epoch": 1.438743003642548, "grad_norm": 0.33984375, "learning_rate": 6.639011743493328e-05, "loss": 0.4276, "step": 56680 }, { "epoch": 1.4388699216915637, "grad_norm": 0.359375, "learning_rate": 6.63625325426504e-05, "loss": 0.4302, "step": 56685 }, { "epoch": 1.4389968397405795, "grad_norm": 0.375, "learning_rate": 6.633495175442475e-05, "loss": 0.4418, "step": 56690 }, { "epoch": 1.4391237577895952, "grad_norm": 0.365234375, "learning_rate": 6.630737507160961e-05, "loss": 0.412, "step": 56695 }, { "epoch": 1.439250675838611, "grad_norm": 0.3671875, "learning_rate": 6.627980249555831e-05, "loss": 0.4254, "step": 56700 }, { "epoch": 1.4393775938876268, "grad_norm": 0.369140625, "learning_rate": 6.625223402762374e-05, "loss": 0.4197, "step": 56705 }, { "epoch": 1.4395045119366425, "grad_norm": 0.33984375, "learning_rate": 6.622466966915868e-05, "loss": 0.4312, "step": 56710 }, { "epoch": 1.4396314299856583, "grad_norm": 0.349609375, "learning_rate": 6.619710942151573e-05, "loss": 0.4222, "step": 56715 }, { "epoch": 1.439758348034674, "grad_norm": 0.33203125, "learning_rate": 6.616955328604722e-05, "loss": 0.4108, "step": 56720 }, { "epoch": 1.4398852660836898, "grad_norm": 0.3359375, "learning_rate": 6.614200126410533e-05, "loss": 0.4449, "step": 56725 }, { "epoch": 1.4400121841327054, "grad_norm": 0.33203125, "learning_rate": 6.611445335704206e-05, "loss": 0.4359, "step": 56730 }, { "epoch": 1.4401391021817211, "grad_norm": 0.34375, "learning_rate": 6.608690956620914e-05, "loss": 0.4298, "step": 56735 }, { "epoch": 1.440266020230737, "grad_norm": 0.34765625, "learning_rate": 6.605936989295814e-05, "loss": 0.4025, "step": 56740 }, { "epoch": 1.4403929382797527, "grad_norm": 0.365234375, "learning_rate": 6.603183433864042e-05, "loss": 0.4224, "step": 56745 }, { "epoch": 1.4405198563287684, "grad_norm": 0.37109375, "learning_rate": 6.600430290460713e-05, "loss": 0.4603, "step": 56750 }, { "epoch": 1.4406467743777842, "grad_norm": 0.365234375, "learning_rate": 6.597677559220918e-05, "loss": 0.4235, "step": 56755 }, { "epoch": 1.4407736924268, "grad_norm": 0.349609375, "learning_rate": 6.594925240279746e-05, "loss": 0.4316, "step": 56760 }, { "epoch": 1.4409006104758157, "grad_norm": 0.32421875, "learning_rate": 6.592173333772244e-05, "loss": 0.3949, "step": 56765 }, { "epoch": 1.4410275285248315, "grad_norm": 0.349609375, "learning_rate": 6.589421839833445e-05, "loss": 0.3984, "step": 56770 }, { "epoch": 1.4411544465738473, "grad_norm": 0.353515625, "learning_rate": 6.586670758598363e-05, "loss": 0.3975, "step": 56775 }, { "epoch": 1.441281364622863, "grad_norm": 0.39453125, "learning_rate": 6.583920090202001e-05, "loss": 0.443, "step": 56780 }, { "epoch": 1.4414082826718788, "grad_norm": 0.384765625, "learning_rate": 6.581169834779327e-05, "loss": 0.4579, "step": 56785 }, { "epoch": 1.4415352007208946, "grad_norm": 0.359375, "learning_rate": 6.578419992465296e-05, "loss": 0.4081, "step": 56790 }, { "epoch": 1.4416621187699104, "grad_norm": 0.349609375, "learning_rate": 6.57567056339484e-05, "loss": 0.4286, "step": 56795 }, { "epoch": 1.4417890368189261, "grad_norm": 0.349609375, "learning_rate": 6.572921547702875e-05, "loss": 0.4219, "step": 56800 }, { "epoch": 1.4419159548679419, "grad_norm": 0.3515625, "learning_rate": 6.570172945524291e-05, "loss": 0.4114, "step": 56805 }, { "epoch": 1.4420428729169577, "grad_norm": 0.39453125, "learning_rate": 6.567424756993967e-05, "loss": 0.448, "step": 56810 }, { "epoch": 1.4421697909659732, "grad_norm": 0.333984375, "learning_rate": 6.564676982246747e-05, "loss": 0.4228, "step": 56815 }, { "epoch": 1.442296709014989, "grad_norm": 0.337890625, "learning_rate": 6.561929621417463e-05, "loss": 0.4015, "step": 56820 }, { "epoch": 1.4424236270640047, "grad_norm": 0.341796875, "learning_rate": 6.559182674640937e-05, "loss": 0.3962, "step": 56825 }, { "epoch": 1.4425505451130205, "grad_norm": 0.34765625, "learning_rate": 6.556436142051954e-05, "loss": 0.4038, "step": 56830 }, { "epoch": 1.4426774631620363, "grad_norm": 0.353515625, "learning_rate": 6.553690023785287e-05, "loss": 0.4261, "step": 56835 }, { "epoch": 1.442804381211052, "grad_norm": 0.353515625, "learning_rate": 6.550944319975685e-05, "loss": 0.4388, "step": 56840 }, { "epoch": 1.4429312992600678, "grad_norm": 0.33984375, "learning_rate": 6.548199030757881e-05, "loss": 0.3999, "step": 56845 }, { "epoch": 1.4430582173090836, "grad_norm": 0.37109375, "learning_rate": 6.545454156266578e-05, "loss": 0.4451, "step": 56850 }, { "epoch": 1.4431851353580993, "grad_norm": 0.373046875, "learning_rate": 6.542709696636477e-05, "loss": 0.4205, "step": 56855 }, { "epoch": 1.443312053407115, "grad_norm": 0.373046875, "learning_rate": 6.539965652002243e-05, "loss": 0.4156, "step": 56860 }, { "epoch": 1.4434389714561306, "grad_norm": 0.369140625, "learning_rate": 6.537222022498523e-05, "loss": 0.4295, "step": 56865 }, { "epoch": 1.4435658895051464, "grad_norm": 0.365234375, "learning_rate": 6.534478808259948e-05, "loss": 0.4335, "step": 56870 }, { "epoch": 1.4436928075541622, "grad_norm": 0.369140625, "learning_rate": 6.531736009421126e-05, "loss": 0.3911, "step": 56875 }, { "epoch": 1.443819725603178, "grad_norm": 0.34765625, "learning_rate": 6.528993626116639e-05, "loss": 0.4063, "step": 56880 }, { "epoch": 1.4439466436521937, "grad_norm": 0.35546875, "learning_rate": 6.526251658481065e-05, "loss": 0.4292, "step": 56885 }, { "epoch": 1.4440735617012095, "grad_norm": 0.34765625, "learning_rate": 6.523510106648948e-05, "loss": 0.424, "step": 56890 }, { "epoch": 1.4442004797502253, "grad_norm": 0.369140625, "learning_rate": 6.520768970754813e-05, "loss": 0.416, "step": 56895 }, { "epoch": 1.444327397799241, "grad_norm": 0.349609375, "learning_rate": 6.518028250933167e-05, "loss": 0.4233, "step": 56900 }, { "epoch": 1.4444543158482568, "grad_norm": 0.375, "learning_rate": 6.515287947318497e-05, "loss": 0.4389, "step": 56905 }, { "epoch": 1.4445812338972726, "grad_norm": 0.375, "learning_rate": 6.512548060045266e-05, "loss": 0.4239, "step": 56910 }, { "epoch": 1.4447081519462883, "grad_norm": 0.35546875, "learning_rate": 6.509808589247922e-05, "loss": 0.4043, "step": 56915 }, { "epoch": 1.444835069995304, "grad_norm": 0.33984375, "learning_rate": 6.507069535060888e-05, "loss": 0.4176, "step": 56920 }, { "epoch": 1.4449619880443199, "grad_norm": 0.3203125, "learning_rate": 6.504330897618568e-05, "loss": 0.4269, "step": 56925 }, { "epoch": 1.4450889060933356, "grad_norm": 0.359375, "learning_rate": 6.501592677055343e-05, "loss": 0.4235, "step": 56930 }, { "epoch": 1.4452158241423514, "grad_norm": 0.36328125, "learning_rate": 6.498854873505584e-05, "loss": 0.4178, "step": 56935 }, { "epoch": 1.4453427421913672, "grad_norm": 0.33984375, "learning_rate": 6.496117487103632e-05, "loss": 0.4119, "step": 56940 }, { "epoch": 1.4454696602403827, "grad_norm": 0.36328125, "learning_rate": 6.493380517983799e-05, "loss": 0.4207, "step": 56945 }, { "epoch": 1.4455965782893985, "grad_norm": 0.357421875, "learning_rate": 6.490643966280404e-05, "loss": 0.4131, "step": 56950 }, { "epoch": 1.4457234963384142, "grad_norm": 0.345703125, "learning_rate": 6.487907832127718e-05, "loss": 0.4137, "step": 56955 }, { "epoch": 1.44585041438743, "grad_norm": 0.33984375, "learning_rate": 6.485172115660005e-05, "loss": 0.4226, "step": 56960 }, { "epoch": 1.4459773324364458, "grad_norm": 0.3515625, "learning_rate": 6.482436817011504e-05, "loss": 0.3918, "step": 56965 }, { "epoch": 1.4461042504854615, "grad_norm": 0.3359375, "learning_rate": 6.479701936316435e-05, "loss": 0.3835, "step": 56970 }, { "epoch": 1.4462311685344773, "grad_norm": 0.341796875, "learning_rate": 6.476967473708999e-05, "loss": 0.4212, "step": 56975 }, { "epoch": 1.446358086583493, "grad_norm": 0.37109375, "learning_rate": 6.474233429323373e-05, "loss": 0.4276, "step": 56980 }, { "epoch": 1.4464850046325088, "grad_norm": 0.337890625, "learning_rate": 6.471499803293717e-05, "loss": 0.422, "step": 56985 }, { "epoch": 1.4466119226815246, "grad_norm": 0.359375, "learning_rate": 6.468766595754169e-05, "loss": 0.4195, "step": 56990 }, { "epoch": 1.4467388407305402, "grad_norm": 0.341796875, "learning_rate": 6.466033806838845e-05, "loss": 0.4044, "step": 56995 }, { "epoch": 1.446865758779556, "grad_norm": 0.3515625, "learning_rate": 6.463301436681842e-05, "loss": 0.4036, "step": 57000 }, { "epoch": 1.4469926768285717, "grad_norm": 0.3515625, "learning_rate": 6.460569485417232e-05, "loss": 0.4174, "step": 57005 }, { "epoch": 1.4471195948775875, "grad_norm": 0.3359375, "learning_rate": 6.457837953179082e-05, "loss": 0.4196, "step": 57010 }, { "epoch": 1.4472465129266032, "grad_norm": 0.373046875, "learning_rate": 6.455106840101421e-05, "loss": 0.416, "step": 57015 }, { "epoch": 1.447373430975619, "grad_norm": 0.353515625, "learning_rate": 6.452376146318258e-05, "loss": 0.4664, "step": 57020 }, { "epoch": 1.4475003490246348, "grad_norm": 0.33203125, "learning_rate": 6.449645871963597e-05, "loss": 0.4452, "step": 57025 }, { "epoch": 1.4476272670736505, "grad_norm": 0.369140625, "learning_rate": 6.446916017171408e-05, "loss": 0.4413, "step": 57030 }, { "epoch": 1.4477541851226663, "grad_norm": 0.359375, "learning_rate": 6.444186582075643e-05, "loss": 0.4201, "step": 57035 }, { "epoch": 1.447881103171682, "grad_norm": 0.34765625, "learning_rate": 6.441457566810234e-05, "loss": 0.4165, "step": 57040 }, { "epoch": 1.4480080212206978, "grad_norm": 0.353515625, "learning_rate": 6.438728971509093e-05, "loss": 0.3983, "step": 57045 }, { "epoch": 1.4481349392697136, "grad_norm": 0.36328125, "learning_rate": 6.43600079630611e-05, "loss": 0.4395, "step": 57050 }, { "epoch": 1.4482618573187294, "grad_norm": 0.3671875, "learning_rate": 6.433273041335158e-05, "loss": 0.4555, "step": 57055 }, { "epoch": 1.4483887753677451, "grad_norm": 0.388671875, "learning_rate": 6.430545706730087e-05, "loss": 0.4359, "step": 57060 }, { "epoch": 1.448515693416761, "grad_norm": 0.3671875, "learning_rate": 6.42781879262472e-05, "loss": 0.4303, "step": 57065 }, { "epoch": 1.4486426114657767, "grad_norm": 0.333984375, "learning_rate": 6.425092299152868e-05, "loss": 0.4299, "step": 57070 }, { "epoch": 1.4487695295147924, "grad_norm": 0.36328125, "learning_rate": 6.422366226448327e-05, "loss": 0.414, "step": 57075 }, { "epoch": 1.448896447563808, "grad_norm": 0.33984375, "learning_rate": 6.419640574644856e-05, "loss": 0.4151, "step": 57080 }, { "epoch": 1.4490233656128237, "grad_norm": 0.32421875, "learning_rate": 6.416915343876206e-05, "loss": 0.4225, "step": 57085 }, { "epoch": 1.4491502836618395, "grad_norm": 0.349609375, "learning_rate": 6.414190534276101e-05, "loss": 0.4259, "step": 57090 }, { "epoch": 1.4492772017108553, "grad_norm": 0.365234375, "learning_rate": 6.41146614597824e-05, "loss": 0.3727, "step": 57095 }, { "epoch": 1.449404119759871, "grad_norm": 0.3515625, "learning_rate": 6.40874217911632e-05, "loss": 0.3998, "step": 57100 }, { "epoch": 1.4495310378088868, "grad_norm": 0.349609375, "learning_rate": 6.406018633824001e-05, "loss": 0.3991, "step": 57105 }, { "epoch": 1.4496579558579026, "grad_norm": 0.328125, "learning_rate": 6.40329551023492e-05, "loss": 0.3963, "step": 57110 }, { "epoch": 1.4497848739069183, "grad_norm": 0.365234375, "learning_rate": 6.400572808482708e-05, "loss": 0.4242, "step": 57115 }, { "epoch": 1.4499117919559341, "grad_norm": 0.326171875, "learning_rate": 6.397850528700961e-05, "loss": 0.407, "step": 57120 }, { "epoch": 1.4500387100049499, "grad_norm": 0.328125, "learning_rate": 6.395128671023262e-05, "loss": 0.3815, "step": 57125 }, { "epoch": 1.4501656280539654, "grad_norm": 0.33984375, "learning_rate": 6.392407235583171e-05, "loss": 0.4166, "step": 57130 }, { "epoch": 1.4502925461029812, "grad_norm": 0.369140625, "learning_rate": 6.389686222514224e-05, "loss": 0.4406, "step": 57135 }, { "epoch": 1.450419464151997, "grad_norm": 0.353515625, "learning_rate": 6.386965631949947e-05, "loss": 0.4251, "step": 57140 }, { "epoch": 1.4505463822010127, "grad_norm": 0.34765625, "learning_rate": 6.38424546402384e-05, "loss": 0.4303, "step": 57145 }, { "epoch": 1.4506733002500285, "grad_norm": 0.35546875, "learning_rate": 6.381525718869372e-05, "loss": 0.4348, "step": 57150 }, { "epoch": 1.4508002182990443, "grad_norm": 0.326171875, "learning_rate": 6.378806396620006e-05, "loss": 0.4417, "step": 57155 }, { "epoch": 1.45092713634806, "grad_norm": 0.396484375, "learning_rate": 6.376087497409174e-05, "loss": 0.4455, "step": 57160 }, { "epoch": 1.4510540543970758, "grad_norm": 0.34375, "learning_rate": 6.373369021370296e-05, "loss": 0.4071, "step": 57165 }, { "epoch": 1.4511809724460916, "grad_norm": 0.34375, "learning_rate": 6.370650968636762e-05, "loss": 0.4331, "step": 57170 }, { "epoch": 1.4513078904951073, "grad_norm": 0.36328125, "learning_rate": 6.367933339341942e-05, "loss": 0.4598, "step": 57175 }, { "epoch": 1.451434808544123, "grad_norm": 0.326171875, "learning_rate": 6.365216133619201e-05, "loss": 0.4115, "step": 57180 }, { "epoch": 1.4515617265931389, "grad_norm": 0.3359375, "learning_rate": 6.362499351601864e-05, "loss": 0.4097, "step": 57185 }, { "epoch": 1.4516886446421546, "grad_norm": 0.341796875, "learning_rate": 6.359782993423243e-05, "loss": 0.4049, "step": 57190 }, { "epoch": 1.4518155626911704, "grad_norm": 0.3515625, "learning_rate": 6.357067059216623e-05, "loss": 0.4078, "step": 57195 }, { "epoch": 1.4519424807401862, "grad_norm": 0.353515625, "learning_rate": 6.354351549115287e-05, "loss": 0.4386, "step": 57200 }, { "epoch": 1.452069398789202, "grad_norm": 0.353515625, "learning_rate": 6.351636463252474e-05, "loss": 0.4161, "step": 57205 }, { "epoch": 1.4521963168382175, "grad_norm": 0.35546875, "learning_rate": 6.348921801761416e-05, "loss": 0.4169, "step": 57210 }, { "epoch": 1.4523232348872332, "grad_norm": 0.34375, "learning_rate": 6.346207564775321e-05, "loss": 0.4051, "step": 57215 }, { "epoch": 1.452450152936249, "grad_norm": 0.349609375, "learning_rate": 6.343493752427371e-05, "loss": 0.4086, "step": 57220 }, { "epoch": 1.4525770709852648, "grad_norm": 0.35546875, "learning_rate": 6.340780364850734e-05, "loss": 0.4153, "step": 57225 }, { "epoch": 1.4527039890342806, "grad_norm": 0.357421875, "learning_rate": 6.338067402178558e-05, "loss": 0.4137, "step": 57230 }, { "epoch": 1.4528309070832963, "grad_norm": 0.33984375, "learning_rate": 6.335354864543961e-05, "loss": 0.3967, "step": 57235 }, { "epoch": 1.452957825132312, "grad_norm": 0.3828125, "learning_rate": 6.332642752080051e-05, "loss": 0.4383, "step": 57240 }, { "epoch": 1.4530847431813279, "grad_norm": 0.345703125, "learning_rate": 6.329931064919909e-05, "loss": 0.4164, "step": 57245 }, { "epoch": 1.4532116612303436, "grad_norm": 0.359375, "learning_rate": 6.32721980319659e-05, "loss": 0.4251, "step": 57250 }, { "epoch": 1.4533385792793594, "grad_norm": 0.3671875, "learning_rate": 6.324508967043145e-05, "loss": 0.4152, "step": 57255 }, { "epoch": 1.453465497328375, "grad_norm": 0.328125, "learning_rate": 6.32179855659259e-05, "loss": 0.4285, "step": 57260 }, { "epoch": 1.4535924153773907, "grad_norm": 0.384765625, "learning_rate": 6.319088571977919e-05, "loss": 0.4418, "step": 57265 }, { "epoch": 1.4537193334264065, "grad_norm": 0.3359375, "learning_rate": 6.316379013332116e-05, "loss": 0.4093, "step": 57270 }, { "epoch": 1.4538462514754222, "grad_norm": 0.359375, "learning_rate": 6.313669880788138e-05, "loss": 0.4191, "step": 57275 }, { "epoch": 1.453973169524438, "grad_norm": 0.337890625, "learning_rate": 6.310961174478919e-05, "loss": 0.4166, "step": 57280 }, { "epoch": 1.4541000875734538, "grad_norm": 0.345703125, "learning_rate": 6.308252894537372e-05, "loss": 0.4302, "step": 57285 }, { "epoch": 1.4542270056224695, "grad_norm": 0.365234375, "learning_rate": 6.305545041096395e-05, "loss": 0.4138, "step": 57290 }, { "epoch": 1.4543539236714853, "grad_norm": 0.33984375, "learning_rate": 6.302837614288859e-05, "loss": 0.4296, "step": 57295 }, { "epoch": 1.454480841720501, "grad_norm": 0.33203125, "learning_rate": 6.300130614247617e-05, "loss": 0.4191, "step": 57300 }, { "epoch": 1.4546077597695168, "grad_norm": 0.30859375, "learning_rate": 6.2974240411055e-05, "loss": 0.3908, "step": 57305 }, { "epoch": 1.4547346778185326, "grad_norm": 0.353515625, "learning_rate": 6.29471789499532e-05, "loss": 0.4044, "step": 57310 }, { "epoch": 1.4548615958675484, "grad_norm": 0.37109375, "learning_rate": 6.292012176049863e-05, "loss": 0.42, "step": 57315 }, { "epoch": 1.4549885139165641, "grad_norm": 0.35546875, "learning_rate": 6.289306884401896e-05, "loss": 0.4166, "step": 57320 }, { "epoch": 1.45511543196558, "grad_norm": 0.35546875, "learning_rate": 6.286602020184174e-05, "loss": 0.4008, "step": 57325 }, { "epoch": 1.4552423500145957, "grad_norm": 0.369140625, "learning_rate": 6.283897583529423e-05, "loss": 0.4121, "step": 57330 }, { "epoch": 1.4553692680636114, "grad_norm": 0.34765625, "learning_rate": 6.281193574570342e-05, "loss": 0.3936, "step": 57335 }, { "epoch": 1.4554961861126272, "grad_norm": 0.337890625, "learning_rate": 6.278489993439616e-05, "loss": 0.3926, "step": 57340 }, { "epoch": 1.4556231041616428, "grad_norm": 0.3359375, "learning_rate": 6.275786840269918e-05, "loss": 0.4317, "step": 57345 }, { "epoch": 1.4557500222106585, "grad_norm": 0.365234375, "learning_rate": 6.273084115193881e-05, "loss": 0.4342, "step": 57350 }, { "epoch": 1.4558769402596743, "grad_norm": 0.3515625, "learning_rate": 6.270381818344134e-05, "loss": 0.3822, "step": 57355 }, { "epoch": 1.45600385830869, "grad_norm": 0.349609375, "learning_rate": 6.267679949853272e-05, "loss": 0.4293, "step": 57360 }, { "epoch": 1.4561307763577058, "grad_norm": 0.34765625, "learning_rate": 6.264978509853877e-05, "loss": 0.4364, "step": 57365 }, { "epoch": 1.4562576944067216, "grad_norm": 0.375, "learning_rate": 6.262277498478507e-05, "loss": 0.424, "step": 57370 }, { "epoch": 1.4563846124557374, "grad_norm": 0.34375, "learning_rate": 6.2595769158597e-05, "loss": 0.4106, "step": 57375 }, { "epoch": 1.4565115305047531, "grad_norm": 0.341796875, "learning_rate": 6.256876762129971e-05, "loss": 0.4215, "step": 57380 }, { "epoch": 1.456638448553769, "grad_norm": 0.369140625, "learning_rate": 6.254177037421811e-05, "loss": 0.42, "step": 57385 }, { "epoch": 1.4567653666027847, "grad_norm": 0.3359375, "learning_rate": 6.251477741867706e-05, "loss": 0.4273, "step": 57390 }, { "epoch": 1.4568922846518002, "grad_norm": 0.40234375, "learning_rate": 6.248778875600104e-05, "loss": 0.4612, "step": 57395 }, { "epoch": 1.457019202700816, "grad_norm": 0.35546875, "learning_rate": 6.246080438751436e-05, "loss": 0.3902, "step": 57400 }, { "epoch": 1.4571461207498317, "grad_norm": 0.373046875, "learning_rate": 6.243382431454112e-05, "loss": 0.4188, "step": 57405 }, { "epoch": 1.4572730387988475, "grad_norm": 0.37890625, "learning_rate": 6.240684853840527e-05, "loss": 0.3989, "step": 57410 }, { "epoch": 1.4573999568478633, "grad_norm": 0.369140625, "learning_rate": 6.237987706043045e-05, "loss": 0.4049, "step": 57415 }, { "epoch": 1.457526874896879, "grad_norm": 0.333984375, "learning_rate": 6.235290988194011e-05, "loss": 0.3987, "step": 57420 }, { "epoch": 1.4576537929458948, "grad_norm": 0.337890625, "learning_rate": 6.232594700425764e-05, "loss": 0.4109, "step": 57425 }, { "epoch": 1.4577807109949106, "grad_norm": 0.34765625, "learning_rate": 6.2298988428706e-05, "loss": 0.4096, "step": 57430 }, { "epoch": 1.4579076290439263, "grad_norm": 0.37109375, "learning_rate": 6.227203415660807e-05, "loss": 0.418, "step": 57435 }, { "epoch": 1.4580345470929421, "grad_norm": 0.337890625, "learning_rate": 6.22450841892865e-05, "loss": 0.4427, "step": 57440 }, { "epoch": 1.4581614651419579, "grad_norm": 0.341796875, "learning_rate": 6.221813852806366e-05, "loss": 0.3924, "step": 57445 }, { "epoch": 1.4582883831909736, "grad_norm": 0.359375, "learning_rate": 6.219119717426177e-05, "loss": 0.4357, "step": 57450 }, { "epoch": 1.4584153012399894, "grad_norm": 0.353515625, "learning_rate": 6.216426012920288e-05, "loss": 0.4267, "step": 57455 }, { "epoch": 1.4585422192890052, "grad_norm": 0.38671875, "learning_rate": 6.213732739420878e-05, "loss": 0.4143, "step": 57460 }, { "epoch": 1.458669137338021, "grad_norm": 0.333984375, "learning_rate": 6.211039897060102e-05, "loss": 0.3979, "step": 57465 }, { "epoch": 1.4587960553870367, "grad_norm": 0.3515625, "learning_rate": 6.208347485970098e-05, "loss": 0.4352, "step": 57470 }, { "epoch": 1.4589229734360523, "grad_norm": 0.37890625, "learning_rate": 6.20565550628298e-05, "loss": 0.4424, "step": 57475 }, { "epoch": 1.459049891485068, "grad_norm": 0.3515625, "learning_rate": 6.202963958130843e-05, "loss": 0.4029, "step": 57480 }, { "epoch": 1.4591768095340838, "grad_norm": 0.33984375, "learning_rate": 6.200272841645761e-05, "loss": 0.445, "step": 57485 }, { "epoch": 1.4593037275830996, "grad_norm": 0.345703125, "learning_rate": 6.197582156959784e-05, "loss": 0.4321, "step": 57490 }, { "epoch": 1.4594306456321153, "grad_norm": 0.330078125, "learning_rate": 6.19489190420494e-05, "loss": 0.4165, "step": 57495 }, { "epoch": 1.459557563681131, "grad_norm": 0.369140625, "learning_rate": 6.192202083513247e-05, "loss": 0.4334, "step": 57500 }, { "epoch": 1.4596844817301469, "grad_norm": 0.333984375, "learning_rate": 6.189512695016691e-05, "loss": 0.401, "step": 57505 }, { "epoch": 1.4598113997791626, "grad_norm": 0.322265625, "learning_rate": 6.186823738847231e-05, "loss": 0.3989, "step": 57510 }, { "epoch": 1.4599383178281784, "grad_norm": 0.35546875, "learning_rate": 6.184135215136825e-05, "loss": 0.427, "step": 57515 }, { "epoch": 1.4600652358771942, "grad_norm": 0.3359375, "learning_rate": 6.181447124017392e-05, "loss": 0.4241, "step": 57520 }, { "epoch": 1.4601921539262097, "grad_norm": 0.328125, "learning_rate": 6.178759465620837e-05, "loss": 0.4119, "step": 57525 }, { "epoch": 1.4603190719752255, "grad_norm": 0.341796875, "learning_rate": 6.176072240079041e-05, "loss": 0.4222, "step": 57530 }, { "epoch": 1.4604459900242412, "grad_norm": 0.33984375, "learning_rate": 6.173385447523864e-05, "loss": 0.4213, "step": 57535 }, { "epoch": 1.460572908073257, "grad_norm": 0.36328125, "learning_rate": 6.170699088087148e-05, "loss": 0.4186, "step": 57540 }, { "epoch": 1.4606998261222728, "grad_norm": 0.330078125, "learning_rate": 6.168013161900711e-05, "loss": 0.4036, "step": 57545 }, { "epoch": 1.4608267441712885, "grad_norm": 0.345703125, "learning_rate": 6.165327669096349e-05, "loss": 0.3996, "step": 57550 }, { "epoch": 1.4609536622203043, "grad_norm": 0.345703125, "learning_rate": 6.162642609805839e-05, "loss": 0.393, "step": 57555 }, { "epoch": 1.46108058026932, "grad_norm": 0.345703125, "learning_rate": 6.159957984160936e-05, "loss": 0.3955, "step": 57560 }, { "epoch": 1.4612074983183359, "grad_norm": 0.353515625, "learning_rate": 6.157273792293375e-05, "loss": 0.4123, "step": 57565 }, { "epoch": 1.4613344163673516, "grad_norm": 0.353515625, "learning_rate": 6.154590034334866e-05, "loss": 0.4298, "step": 57570 }, { "epoch": 1.4614613344163674, "grad_norm": 0.35546875, "learning_rate": 6.151906710417093e-05, "loss": 0.4105, "step": 57575 }, { "epoch": 1.4615882524653832, "grad_norm": 0.3359375, "learning_rate": 6.14922382067174e-05, "loss": 0.4061, "step": 57580 }, { "epoch": 1.461715170514399, "grad_norm": 0.35546875, "learning_rate": 6.146541365230443e-05, "loss": 0.4311, "step": 57585 }, { "epoch": 1.4618420885634147, "grad_norm": 0.35546875, "learning_rate": 6.14385934422484e-05, "loss": 0.4521, "step": 57590 }, { "epoch": 1.4619690066124305, "grad_norm": 0.326171875, "learning_rate": 6.141177757786531e-05, "loss": 0.3928, "step": 57595 }, { "epoch": 1.4620959246614462, "grad_norm": 0.390625, "learning_rate": 6.1384966060471e-05, "loss": 0.4124, "step": 57600 }, { "epoch": 1.462222842710462, "grad_norm": 0.359375, "learning_rate": 6.135815889138112e-05, "loss": 0.4135, "step": 57605 }, { "epoch": 1.4623497607594775, "grad_norm": 0.35546875, "learning_rate": 6.133135607191108e-05, "loss": 0.4242, "step": 57610 }, { "epoch": 1.4624766788084933, "grad_norm": 0.37109375, "learning_rate": 6.130455760337605e-05, "loss": 0.4365, "step": 57615 }, { "epoch": 1.462603596857509, "grad_norm": 0.37109375, "learning_rate": 6.127776348709107e-05, "loss": 0.4183, "step": 57620 }, { "epoch": 1.4627305149065248, "grad_norm": 0.376953125, "learning_rate": 6.12509737243709e-05, "loss": 0.4213, "step": 57625 }, { "epoch": 1.4628574329555406, "grad_norm": 0.34375, "learning_rate": 6.122418831653008e-05, "loss": 0.4274, "step": 57630 }, { "epoch": 1.4629843510045564, "grad_norm": 0.345703125, "learning_rate": 6.119740726488298e-05, "loss": 0.396, "step": 57635 }, { "epoch": 1.4631112690535721, "grad_norm": 0.35546875, "learning_rate": 6.117063057074368e-05, "loss": 0.422, "step": 57640 }, { "epoch": 1.463238187102588, "grad_norm": 0.369140625, "learning_rate": 6.11438582354262e-05, "loss": 0.4173, "step": 57645 }, { "epoch": 1.4633651051516037, "grad_norm": 0.3359375, "learning_rate": 6.11170902602442e-05, "loss": 0.4306, "step": 57650 }, { "epoch": 1.4634920232006194, "grad_norm": 0.35546875, "learning_rate": 6.109032664651119e-05, "loss": 0.4071, "step": 57655 }, { "epoch": 1.463618941249635, "grad_norm": 0.353515625, "learning_rate": 6.106356739554042e-05, "loss": 0.4358, "step": 57660 }, { "epoch": 1.4637458592986508, "grad_norm": 0.32421875, "learning_rate": 6.103681250864492e-05, "loss": 0.4218, "step": 57665 }, { "epoch": 1.4638727773476665, "grad_norm": 0.345703125, "learning_rate": 6.101006198713766e-05, "loss": 0.4156, "step": 57670 }, { "epoch": 1.4639996953966823, "grad_norm": 0.36328125, "learning_rate": 6.09833158323312e-05, "loss": 0.3906, "step": 57675 }, { "epoch": 1.464126613445698, "grad_norm": 0.357421875, "learning_rate": 6.095657404553797e-05, "loss": 0.4374, "step": 57680 }, { "epoch": 1.4642535314947138, "grad_norm": 0.35546875, "learning_rate": 6.092983662807019e-05, "loss": 0.4135, "step": 57685 }, { "epoch": 1.4643804495437296, "grad_norm": 0.359375, "learning_rate": 6.0903103581239834e-05, "loss": 0.4148, "step": 57690 }, { "epoch": 1.4645073675927454, "grad_norm": 0.359375, "learning_rate": 6.0876374906358716e-05, "loss": 0.4027, "step": 57695 }, { "epoch": 1.4646342856417611, "grad_norm": 0.357421875, "learning_rate": 6.0849650604738296e-05, "loss": 0.4151, "step": 57700 }, { "epoch": 1.464761203690777, "grad_norm": 0.357421875, "learning_rate": 6.082293067769009e-05, "loss": 0.4303, "step": 57705 }, { "epoch": 1.4648881217397927, "grad_norm": 0.353515625, "learning_rate": 6.079621512652515e-05, "loss": 0.4328, "step": 57710 }, { "epoch": 1.4650150397888084, "grad_norm": 0.35546875, "learning_rate": 6.076950395255438e-05, "loss": 0.4281, "step": 57715 }, { "epoch": 1.4651419578378242, "grad_norm": 0.33984375, "learning_rate": 6.074279715708852e-05, "loss": 0.4059, "step": 57720 }, { "epoch": 1.46526887588684, "grad_norm": 0.35546875, "learning_rate": 6.0716094741438056e-05, "loss": 0.4293, "step": 57725 }, { "epoch": 1.4653957939358557, "grad_norm": 0.36328125, "learning_rate": 6.068939670691324e-05, "loss": 0.4049, "step": 57730 }, { "epoch": 1.4655227119848715, "grad_norm": 0.333984375, "learning_rate": 6.0662703054824166e-05, "loss": 0.3897, "step": 57735 }, { "epoch": 1.465649630033887, "grad_norm": 0.3125, "learning_rate": 6.063601378648061e-05, "loss": 0.4128, "step": 57740 }, { "epoch": 1.4657765480829028, "grad_norm": 0.34375, "learning_rate": 6.060932890319231e-05, "loss": 0.4179, "step": 57745 }, { "epoch": 1.4659034661319186, "grad_norm": 0.361328125, "learning_rate": 6.058264840626862e-05, "loss": 0.4275, "step": 57750 }, { "epoch": 1.4660303841809343, "grad_norm": 0.322265625, "learning_rate": 6.055597229701877e-05, "loss": 0.419, "step": 57755 }, { "epoch": 1.46615730222995, "grad_norm": 0.37109375, "learning_rate": 6.052930057675174e-05, "loss": 0.4147, "step": 57760 }, { "epoch": 1.4662842202789659, "grad_norm": 0.33984375, "learning_rate": 6.050263324677622e-05, "loss": 0.4065, "step": 57765 }, { "epoch": 1.4664111383279816, "grad_norm": 0.373046875, "learning_rate": 6.047597030840089e-05, "loss": 0.413, "step": 57770 }, { "epoch": 1.4665380563769974, "grad_norm": 0.365234375, "learning_rate": 6.044931176293404e-05, "loss": 0.4235, "step": 57775 }, { "epoch": 1.4666649744260132, "grad_norm": 0.376953125, "learning_rate": 6.042265761168379e-05, "loss": 0.3945, "step": 57780 }, { "epoch": 1.466791892475029, "grad_norm": 0.353515625, "learning_rate": 6.039600785595805e-05, "loss": 0.4398, "step": 57785 }, { "epoch": 1.4669188105240445, "grad_norm": 0.330078125, "learning_rate": 6.0369362497064515e-05, "loss": 0.4114, "step": 57790 }, { "epoch": 1.4670457285730603, "grad_norm": 0.361328125, "learning_rate": 6.034272153631066e-05, "loss": 0.419, "step": 57795 }, { "epoch": 1.467172646622076, "grad_norm": 0.365234375, "learning_rate": 6.0316084975003735e-05, "loss": 0.4182, "step": 57800 }, { "epoch": 1.4672995646710918, "grad_norm": 0.318359375, "learning_rate": 6.0289452814450796e-05, "loss": 0.3959, "step": 57805 }, { "epoch": 1.4674264827201076, "grad_norm": 0.37890625, "learning_rate": 6.0262825055958686e-05, "loss": 0.4438, "step": 57810 }, { "epoch": 1.4675534007691233, "grad_norm": 0.353515625, "learning_rate": 6.0236201700834e-05, "loss": 0.4339, "step": 57815 }, { "epoch": 1.467680318818139, "grad_norm": 0.345703125, "learning_rate": 6.020958275038307e-05, "loss": 0.4186, "step": 57820 }, { "epoch": 1.4678072368671549, "grad_norm": 0.353515625, "learning_rate": 6.018296820591222e-05, "loss": 0.3992, "step": 57825 }, { "epoch": 1.4679341549161706, "grad_norm": 0.34375, "learning_rate": 6.0156358068727275e-05, "loss": 0.4151, "step": 57830 }, { "epoch": 1.4680610729651864, "grad_norm": 0.357421875, "learning_rate": 6.01297523401341e-05, "loss": 0.4066, "step": 57835 }, { "epoch": 1.4681879910142022, "grad_norm": 0.357421875, "learning_rate": 6.01031510214382e-05, "loss": 0.3917, "step": 57840 }, { "epoch": 1.468314909063218, "grad_norm": 0.345703125, "learning_rate": 6.007655411394486e-05, "loss": 0.4313, "step": 57845 }, { "epoch": 1.4684418271122337, "grad_norm": 0.369140625, "learning_rate": 6.004996161895918e-05, "loss": 0.4285, "step": 57850 }, { "epoch": 1.4685687451612495, "grad_norm": 0.37109375, "learning_rate": 6.002337353778607e-05, "loss": 0.4509, "step": 57855 }, { "epoch": 1.4686956632102652, "grad_norm": 0.376953125, "learning_rate": 5.999678987173016e-05, "loss": 0.4166, "step": 57860 }, { "epoch": 1.468822581259281, "grad_norm": 0.3828125, "learning_rate": 5.997021062209593e-05, "loss": 0.4453, "step": 57865 }, { "epoch": 1.4689494993082968, "grad_norm": 0.365234375, "learning_rate": 5.9943635790187606e-05, "loss": 0.4255, "step": 57870 }, { "epoch": 1.4690764173573123, "grad_norm": 0.32421875, "learning_rate": 5.99170653773092e-05, "loss": 0.4212, "step": 57875 }, { "epoch": 1.469203335406328, "grad_norm": 0.341796875, "learning_rate": 5.98904993847645e-05, "loss": 0.4011, "step": 57880 }, { "epoch": 1.4693302534553438, "grad_norm": 0.33203125, "learning_rate": 5.986393781385712e-05, "loss": 0.396, "step": 57885 }, { "epoch": 1.4694571715043596, "grad_norm": 0.353515625, "learning_rate": 5.983738066589034e-05, "loss": 0.4093, "step": 57890 }, { "epoch": 1.4695840895533754, "grad_norm": 0.37109375, "learning_rate": 5.9810827942167426e-05, "loss": 0.4503, "step": 57895 }, { "epoch": 1.4697110076023912, "grad_norm": 0.345703125, "learning_rate": 5.978427964399126e-05, "loss": 0.4078, "step": 57900 }, { "epoch": 1.469837925651407, "grad_norm": 0.34375, "learning_rate": 5.975773577266457e-05, "loss": 0.4188, "step": 57905 }, { "epoch": 1.4699648437004227, "grad_norm": 0.37109375, "learning_rate": 5.973119632948977e-05, "loss": 0.4336, "step": 57910 }, { "epoch": 1.4700917617494385, "grad_norm": 0.349609375, "learning_rate": 5.970466131576927e-05, "loss": 0.4245, "step": 57915 }, { "epoch": 1.4702186797984542, "grad_norm": 0.330078125, "learning_rate": 5.967813073280507e-05, "loss": 0.3986, "step": 57920 }, { "epoch": 1.4703455978474698, "grad_norm": 0.34375, "learning_rate": 5.9651604581899005e-05, "loss": 0.4059, "step": 57925 }, { "epoch": 1.4704725158964855, "grad_norm": 0.32421875, "learning_rate": 5.962508286435272e-05, "loss": 0.3959, "step": 57930 }, { "epoch": 1.4705994339455013, "grad_norm": 0.345703125, "learning_rate": 5.959856558146762e-05, "loss": 0.4107, "step": 57935 }, { "epoch": 1.470726351994517, "grad_norm": 0.34375, "learning_rate": 5.9572052734544906e-05, "loss": 0.423, "step": 57940 }, { "epoch": 1.4708532700435328, "grad_norm": 0.38671875, "learning_rate": 5.954554432488553e-05, "loss": 0.3986, "step": 57945 }, { "epoch": 1.4709801880925486, "grad_norm": 0.34765625, "learning_rate": 5.951904035379027e-05, "loss": 0.4057, "step": 57950 }, { "epoch": 1.4711071061415644, "grad_norm": 0.361328125, "learning_rate": 5.94925408225596e-05, "loss": 0.4052, "step": 57955 }, { "epoch": 1.4712340241905801, "grad_norm": 0.3671875, "learning_rate": 5.946604573249395e-05, "loss": 0.3948, "step": 57960 }, { "epoch": 1.471360942239596, "grad_norm": 0.34375, "learning_rate": 5.9439555084893385e-05, "loss": 0.4207, "step": 57965 }, { "epoch": 1.4714878602886117, "grad_norm": 0.36328125, "learning_rate": 5.9413068881057754e-05, "loss": 0.4392, "step": 57970 }, { "epoch": 1.4716147783376274, "grad_norm": 0.357421875, "learning_rate": 5.938658712228678e-05, "loss": 0.4179, "step": 57975 }, { "epoch": 1.4717416963866432, "grad_norm": 0.35546875, "learning_rate": 5.936010980987986e-05, "loss": 0.435, "step": 57980 }, { "epoch": 1.471868614435659, "grad_norm": 0.37109375, "learning_rate": 5.933363694513619e-05, "loss": 0.4423, "step": 57985 }, { "epoch": 1.4719955324846747, "grad_norm": 0.33984375, "learning_rate": 5.930716852935491e-05, "loss": 0.3941, "step": 57990 }, { "epoch": 1.4721224505336905, "grad_norm": 0.359375, "learning_rate": 5.928070456383472e-05, "loss": 0.4096, "step": 57995 }, { "epoch": 1.4722493685827063, "grad_norm": 0.375, "learning_rate": 5.925424504987423e-05, "loss": 0.4308, "step": 58000 }, { "epoch": 1.4723762866317218, "grad_norm": 0.337890625, "learning_rate": 5.9227789988771794e-05, "loss": 0.4064, "step": 58005 }, { "epoch": 1.4725032046807376, "grad_norm": 0.345703125, "learning_rate": 5.920133938182555e-05, "loss": 0.4015, "step": 58010 }, { "epoch": 1.4726301227297534, "grad_norm": 0.3515625, "learning_rate": 5.9174893230333347e-05, "loss": 0.4402, "step": 58015 }, { "epoch": 1.4727570407787691, "grad_norm": 0.35546875, "learning_rate": 5.914845153559301e-05, "loss": 0.4492, "step": 58020 }, { "epoch": 1.472883958827785, "grad_norm": 0.34375, "learning_rate": 5.912201429890197e-05, "loss": 0.4065, "step": 58025 }, { "epoch": 1.4730108768768007, "grad_norm": 0.361328125, "learning_rate": 5.9095581521557486e-05, "loss": 0.4167, "step": 58030 }, { "epoch": 1.4731377949258164, "grad_norm": 0.369140625, "learning_rate": 5.906915320485661e-05, "loss": 0.4435, "step": 58035 }, { "epoch": 1.4732647129748322, "grad_norm": 0.36328125, "learning_rate": 5.9042729350096155e-05, "loss": 0.4063, "step": 58040 }, { "epoch": 1.473391631023848, "grad_norm": 0.359375, "learning_rate": 5.901630995857274e-05, "loss": 0.415, "step": 58045 }, { "epoch": 1.4735185490728637, "grad_norm": 0.306640625, "learning_rate": 5.898989503158277e-05, "loss": 0.3735, "step": 58050 }, { "epoch": 1.4736454671218793, "grad_norm": 0.384765625, "learning_rate": 5.896348457042237e-05, "loss": 0.4199, "step": 58055 }, { "epoch": 1.473772385170895, "grad_norm": 0.357421875, "learning_rate": 5.8937078576387545e-05, "loss": 0.4292, "step": 58060 }, { "epoch": 1.4738993032199108, "grad_norm": 0.375, "learning_rate": 5.891067705077393e-05, "loss": 0.4328, "step": 58065 }, { "epoch": 1.4740262212689266, "grad_norm": 0.328125, "learning_rate": 5.8884279994877165e-05, "loss": 0.4447, "step": 58070 }, { "epoch": 1.4741531393179423, "grad_norm": 0.349609375, "learning_rate": 5.885788740999249e-05, "loss": 0.4145, "step": 58075 }, { "epoch": 1.474280057366958, "grad_norm": 0.37109375, "learning_rate": 5.8831499297414916e-05, "loss": 0.4254, "step": 58080 }, { "epoch": 1.4744069754159739, "grad_norm": 0.392578125, "learning_rate": 5.8805115658439406e-05, "loss": 0.4263, "step": 58085 }, { "epoch": 1.4745338934649896, "grad_norm": 0.375, "learning_rate": 5.877873649436055e-05, "loss": 0.4084, "step": 58090 }, { "epoch": 1.4746608115140054, "grad_norm": 0.341796875, "learning_rate": 5.8752361806472765e-05, "loss": 0.4328, "step": 58095 }, { "epoch": 1.4747877295630212, "grad_norm": 0.34765625, "learning_rate": 5.8725991596070226e-05, "loss": 0.3884, "step": 58100 }, { "epoch": 1.474914647612037, "grad_norm": 0.310546875, "learning_rate": 5.869962586444692e-05, "loss": 0.4282, "step": 58105 }, { "epoch": 1.4750415656610527, "grad_norm": 0.34765625, "learning_rate": 5.867326461289661e-05, "loss": 0.4092, "step": 58110 }, { "epoch": 1.4751684837100685, "grad_norm": 0.359375, "learning_rate": 5.8646907842712823e-05, "loss": 0.4299, "step": 58115 }, { "epoch": 1.4752954017590842, "grad_norm": 0.373046875, "learning_rate": 5.8620555555188866e-05, "loss": 0.4251, "step": 58120 }, { "epoch": 1.4754223198081, "grad_norm": 0.333984375, "learning_rate": 5.859420775161787e-05, "loss": 0.4129, "step": 58125 }, { "epoch": 1.4755492378571158, "grad_norm": 0.34765625, "learning_rate": 5.8567864433292684e-05, "loss": 0.4086, "step": 58130 }, { "epoch": 1.4756761559061315, "grad_norm": 0.341796875, "learning_rate": 5.854152560150597e-05, "loss": 0.4157, "step": 58135 }, { "epoch": 1.475803073955147, "grad_norm": 0.36328125, "learning_rate": 5.851519125755009e-05, "loss": 0.4162, "step": 58140 }, { "epoch": 1.4759299920041629, "grad_norm": 0.3515625, "learning_rate": 5.8488861402717405e-05, "loss": 0.3965, "step": 58145 }, { "epoch": 1.4760569100531786, "grad_norm": 0.357421875, "learning_rate": 5.8462536038299845e-05, "loss": 0.3984, "step": 58150 }, { "epoch": 1.4761838281021944, "grad_norm": 0.3671875, "learning_rate": 5.843621516558911e-05, "loss": 0.4086, "step": 58155 }, { "epoch": 1.4763107461512102, "grad_norm": 0.337890625, "learning_rate": 5.840989878587689e-05, "loss": 0.3964, "step": 58160 }, { "epoch": 1.476437664200226, "grad_norm": 0.337890625, "learning_rate": 5.838358690045443e-05, "loss": 0.4227, "step": 58165 }, { "epoch": 1.4765645822492417, "grad_norm": 0.341796875, "learning_rate": 5.835727951061288e-05, "loss": 0.4162, "step": 58170 }, { "epoch": 1.4766915002982575, "grad_norm": 0.37890625, "learning_rate": 5.833097661764312e-05, "loss": 0.4543, "step": 58175 }, { "epoch": 1.4768184183472732, "grad_norm": 0.34765625, "learning_rate": 5.830467822283584e-05, "loss": 0.4163, "step": 58180 }, { "epoch": 1.476945336396289, "grad_norm": 0.3359375, "learning_rate": 5.827838432748145e-05, "loss": 0.4064, "step": 58185 }, { "epoch": 1.4770722544453045, "grad_norm": 0.34765625, "learning_rate": 5.82520949328702e-05, "loss": 0.4076, "step": 58190 }, { "epoch": 1.4771991724943203, "grad_norm": 0.365234375, "learning_rate": 5.822581004029212e-05, "loss": 0.4465, "step": 58195 }, { "epoch": 1.477326090543336, "grad_norm": 0.345703125, "learning_rate": 5.819952965103697e-05, "loss": 0.4066, "step": 58200 }, { "epoch": 1.4774530085923518, "grad_norm": 0.33984375, "learning_rate": 5.81732537663943e-05, "loss": 0.4144, "step": 58205 }, { "epoch": 1.4775799266413676, "grad_norm": 0.37109375, "learning_rate": 5.814698238765352e-05, "loss": 0.4283, "step": 58210 }, { "epoch": 1.4777068446903834, "grad_norm": 0.34765625, "learning_rate": 5.8120715516103736e-05, "loss": 0.3951, "step": 58215 }, { "epoch": 1.4778337627393991, "grad_norm": 0.376953125, "learning_rate": 5.809445315303385e-05, "loss": 0.4354, "step": 58220 }, { "epoch": 1.477960680788415, "grad_norm": 0.419921875, "learning_rate": 5.8068195299732516e-05, "loss": 0.4044, "step": 58225 }, { "epoch": 1.4780875988374307, "grad_norm": 0.373046875, "learning_rate": 5.804194195748818e-05, "loss": 0.4279, "step": 58230 }, { "epoch": 1.4782145168864465, "grad_norm": 0.36328125, "learning_rate": 5.801569312758918e-05, "loss": 0.415, "step": 58235 }, { "epoch": 1.4783414349354622, "grad_norm": 0.357421875, "learning_rate": 5.798944881132347e-05, "loss": 0.4377, "step": 58240 }, { "epoch": 1.478468352984478, "grad_norm": 0.365234375, "learning_rate": 5.796320900997887e-05, "loss": 0.426, "step": 58245 }, { "epoch": 1.4785952710334938, "grad_norm": 0.373046875, "learning_rate": 5.793697372484292e-05, "loss": 0.4397, "step": 58250 }, { "epoch": 1.4787221890825095, "grad_norm": 0.36328125, "learning_rate": 5.7910742957203e-05, "loss": 0.4301, "step": 58255 }, { "epoch": 1.4788491071315253, "grad_norm": 0.36328125, "learning_rate": 5.788451670834625e-05, "loss": 0.4329, "step": 58260 }, { "epoch": 1.478976025180541, "grad_norm": 0.35546875, "learning_rate": 5.7858294979559565e-05, "loss": 0.4095, "step": 58265 }, { "epoch": 1.4791029432295566, "grad_norm": 0.34375, "learning_rate": 5.783207777212959e-05, "loss": 0.4139, "step": 58270 }, { "epoch": 1.4792298612785724, "grad_norm": 0.388671875, "learning_rate": 5.78058650873429e-05, "loss": 0.4063, "step": 58275 }, { "epoch": 1.4793567793275881, "grad_norm": 0.3359375, "learning_rate": 5.777965692648568e-05, "loss": 0.4072, "step": 58280 }, { "epoch": 1.479483697376604, "grad_norm": 0.345703125, "learning_rate": 5.775345329084396e-05, "loss": 0.3987, "step": 58285 }, { "epoch": 1.4796106154256197, "grad_norm": 0.373046875, "learning_rate": 5.772725418170355e-05, "loss": 0.4029, "step": 58290 }, { "epoch": 1.4797375334746354, "grad_norm": 0.359375, "learning_rate": 5.7701059600350014e-05, "loss": 0.4143, "step": 58295 }, { "epoch": 1.4798644515236512, "grad_norm": 0.38671875, "learning_rate": 5.767486954806871e-05, "loss": 0.4228, "step": 58300 }, { "epoch": 1.479991369572667, "grad_norm": 0.349609375, "learning_rate": 5.764868402614479e-05, "loss": 0.4241, "step": 58305 }, { "epoch": 1.4801182876216827, "grad_norm": 0.380859375, "learning_rate": 5.7622503035863105e-05, "loss": 0.4292, "step": 58310 }, { "epoch": 1.4802452056706985, "grad_norm": 0.32421875, "learning_rate": 5.7596326578508453e-05, "loss": 0.3967, "step": 58315 }, { "epoch": 1.480372123719714, "grad_norm": 0.3828125, "learning_rate": 5.757015465536525e-05, "loss": 0.4332, "step": 58320 }, { "epoch": 1.4804990417687298, "grad_norm": 0.341796875, "learning_rate": 5.7543987267717734e-05, "loss": 0.3966, "step": 58325 }, { "epoch": 1.4806259598177456, "grad_norm": 0.34375, "learning_rate": 5.751782441684987e-05, "loss": 0.3963, "step": 58330 }, { "epoch": 1.4807528778667614, "grad_norm": 0.330078125, "learning_rate": 5.749166610404559e-05, "loss": 0.4106, "step": 58335 }, { "epoch": 1.4808797959157771, "grad_norm": 0.34375, "learning_rate": 5.74655123305884e-05, "loss": 0.4326, "step": 58340 }, { "epoch": 1.4810067139647929, "grad_norm": 0.37109375, "learning_rate": 5.7439363097761666e-05, "loss": 0.4339, "step": 58345 }, { "epoch": 1.4811336320138087, "grad_norm": 0.37109375, "learning_rate": 5.741321840684851e-05, "loss": 0.417, "step": 58350 }, { "epoch": 1.4812605500628244, "grad_norm": 0.349609375, "learning_rate": 5.738707825913184e-05, "loss": 0.4188, "step": 58355 }, { "epoch": 1.4813874681118402, "grad_norm": 0.349609375, "learning_rate": 5.736094265589435e-05, "loss": 0.4329, "step": 58360 }, { "epoch": 1.481514386160856, "grad_norm": 0.33203125, "learning_rate": 5.7334811598418507e-05, "loss": 0.4138, "step": 58365 }, { "epoch": 1.4816413042098717, "grad_norm": 0.33984375, "learning_rate": 5.730868508798654e-05, "loss": 0.3806, "step": 58370 }, { "epoch": 1.4817682222588875, "grad_norm": 0.34375, "learning_rate": 5.728256312588047e-05, "loss": 0.4334, "step": 58375 }, { "epoch": 1.4818951403079033, "grad_norm": 0.345703125, "learning_rate": 5.725644571338211e-05, "loss": 0.4342, "step": 58380 }, { "epoch": 1.482022058356919, "grad_norm": 0.35546875, "learning_rate": 5.7230332851772944e-05, "loss": 0.4023, "step": 58385 }, { "epoch": 1.4821489764059348, "grad_norm": 0.3671875, "learning_rate": 5.7204224542334457e-05, "loss": 0.4232, "step": 58390 }, { "epoch": 1.4822758944549506, "grad_norm": 0.337890625, "learning_rate": 5.71781207863477e-05, "loss": 0.4068, "step": 58395 }, { "epoch": 1.4824028125039663, "grad_norm": 0.357421875, "learning_rate": 5.715202158509354e-05, "loss": 0.3954, "step": 58400 }, { "epoch": 1.4825297305529819, "grad_norm": 0.359375, "learning_rate": 5.712592693985274e-05, "loss": 0.4381, "step": 58405 }, { "epoch": 1.4826566486019976, "grad_norm": 0.330078125, "learning_rate": 5.70998368519057e-05, "loss": 0.4003, "step": 58410 }, { "epoch": 1.4827835666510134, "grad_norm": 0.341796875, "learning_rate": 5.707375132253268e-05, "loss": 0.4567, "step": 58415 }, { "epoch": 1.4829104847000292, "grad_norm": 0.357421875, "learning_rate": 5.704767035301365e-05, "loss": 0.4111, "step": 58420 }, { "epoch": 1.483037402749045, "grad_norm": 0.341796875, "learning_rate": 5.702159394462841e-05, "loss": 0.4168, "step": 58425 }, { "epoch": 1.4831643207980607, "grad_norm": 0.388671875, "learning_rate": 5.6995522098656536e-05, "loss": 0.4434, "step": 58430 }, { "epoch": 1.4832912388470765, "grad_norm": 0.35546875, "learning_rate": 5.696945481637734e-05, "loss": 0.4039, "step": 58435 }, { "epoch": 1.4834181568960922, "grad_norm": 0.3828125, "learning_rate": 5.6943392099069915e-05, "loss": 0.4551, "step": 58440 }, { "epoch": 1.483545074945108, "grad_norm": 0.3515625, "learning_rate": 5.6917333948013195e-05, "loss": 0.4447, "step": 58445 }, { "epoch": 1.4836719929941236, "grad_norm": 0.349609375, "learning_rate": 5.6891280364485806e-05, "loss": 0.4488, "step": 58450 }, { "epoch": 1.4837989110431393, "grad_norm": 0.3671875, "learning_rate": 5.686523134976615e-05, "loss": 0.4056, "step": 58455 }, { "epoch": 1.483925829092155, "grad_norm": 0.294921875, "learning_rate": 5.683918690513256e-05, "loss": 0.3709, "step": 58460 }, { "epoch": 1.4840527471411709, "grad_norm": 0.384765625, "learning_rate": 5.6813147031862935e-05, "loss": 0.4203, "step": 58465 }, { "epoch": 1.4841796651901866, "grad_norm": 0.359375, "learning_rate": 5.678711173123508e-05, "loss": 0.4259, "step": 58470 }, { "epoch": 1.4843065832392024, "grad_norm": 0.35546875, "learning_rate": 5.6761081004526465e-05, "loss": 0.4241, "step": 58475 }, { "epoch": 1.4844335012882182, "grad_norm": 0.365234375, "learning_rate": 5.6735054853014524e-05, "loss": 0.4041, "step": 58480 }, { "epoch": 1.484560419337234, "grad_norm": 0.34375, "learning_rate": 5.6709033277976276e-05, "loss": 0.4101, "step": 58485 }, { "epoch": 1.4846873373862497, "grad_norm": 0.349609375, "learning_rate": 5.6683016280688616e-05, "loss": 0.4103, "step": 58490 }, { "epoch": 1.4848142554352655, "grad_norm": 0.345703125, "learning_rate": 5.6657003862428176e-05, "loss": 0.419, "step": 58495 }, { "epoch": 1.4849411734842812, "grad_norm": 0.361328125, "learning_rate": 5.663099602447137e-05, "loss": 0.4237, "step": 58500 }, { "epoch": 1.485068091533297, "grad_norm": 0.3359375, "learning_rate": 5.66049927680944e-05, "loss": 0.4009, "step": 58505 }, { "epoch": 1.4851950095823128, "grad_norm": 0.3671875, "learning_rate": 5.657899409457322e-05, "loss": 0.4425, "step": 58510 }, { "epoch": 1.4853219276313285, "grad_norm": 0.3359375, "learning_rate": 5.655300000518358e-05, "loss": 0.3932, "step": 58515 }, { "epoch": 1.4854488456803443, "grad_norm": 0.365234375, "learning_rate": 5.6527010501200986e-05, "loss": 0.4356, "step": 58520 }, { "epoch": 1.48557576372936, "grad_norm": 0.322265625, "learning_rate": 5.6501025583900775e-05, "loss": 0.4019, "step": 58525 }, { "epoch": 1.4857026817783758, "grad_norm": 0.357421875, "learning_rate": 5.6475045254558006e-05, "loss": 0.3989, "step": 58530 }, { "epoch": 1.4858295998273914, "grad_norm": 0.330078125, "learning_rate": 5.644906951444752e-05, "loss": 0.4065, "step": 58535 }, { "epoch": 1.4859565178764071, "grad_norm": 0.322265625, "learning_rate": 5.642309836484392e-05, "loss": 0.4147, "step": 58540 }, { "epoch": 1.486083435925423, "grad_norm": 0.337890625, "learning_rate": 5.6397131807021605e-05, "loss": 0.4036, "step": 58545 }, { "epoch": 1.4862103539744387, "grad_norm": 0.361328125, "learning_rate": 5.637116984225475e-05, "loss": 0.4134, "step": 58550 }, { "epoch": 1.4863372720234544, "grad_norm": 0.48046875, "learning_rate": 5.634521247181722e-05, "loss": 0.4057, "step": 58555 }, { "epoch": 1.4864641900724702, "grad_norm": 0.37890625, "learning_rate": 5.63192596969829e-05, "loss": 0.4253, "step": 58560 }, { "epoch": 1.486591108121486, "grad_norm": 0.333984375, "learning_rate": 5.6293311519025166e-05, "loss": 0.4342, "step": 58565 }, { "epoch": 1.4867180261705017, "grad_norm": 0.34765625, "learning_rate": 5.626736793921732e-05, "loss": 0.3959, "step": 58570 }, { "epoch": 1.4868449442195175, "grad_norm": 0.35546875, "learning_rate": 5.6241428958832375e-05, "loss": 0.4014, "step": 58575 }, { "epoch": 1.4869718622685333, "grad_norm": 0.349609375, "learning_rate": 5.621549457914317e-05, "loss": 0.411, "step": 58580 }, { "epoch": 1.4870987803175488, "grad_norm": 0.921875, "learning_rate": 5.6189564801422245e-05, "loss": 0.4042, "step": 58585 }, { "epoch": 1.4872256983665646, "grad_norm": 0.361328125, "learning_rate": 5.616363962694204e-05, "loss": 0.4168, "step": 58590 }, { "epoch": 1.4873526164155804, "grad_norm": 0.36328125, "learning_rate": 5.613771905697467e-05, "loss": 0.4377, "step": 58595 }, { "epoch": 1.4874795344645961, "grad_norm": 0.37109375, "learning_rate": 5.6111803092792055e-05, "loss": 0.428, "step": 58600 }, { "epoch": 1.487606452513612, "grad_norm": 0.357421875, "learning_rate": 5.608589173566584e-05, "loss": 0.4337, "step": 58605 }, { "epoch": 1.4877333705626277, "grad_norm": 0.365234375, "learning_rate": 5.6059984986867514e-05, "loss": 0.4368, "step": 58610 }, { "epoch": 1.4878602886116434, "grad_norm": 0.337890625, "learning_rate": 5.603408284766831e-05, "loss": 0.4074, "step": 58615 }, { "epoch": 1.4879872066606592, "grad_norm": 0.365234375, "learning_rate": 5.6008185319339216e-05, "loss": 0.3959, "step": 58620 }, { "epoch": 1.488114124709675, "grad_norm": 0.34765625, "learning_rate": 5.598229240315103e-05, "loss": 0.41, "step": 58625 }, { "epoch": 1.4882410427586907, "grad_norm": 0.341796875, "learning_rate": 5.595640410037426e-05, "loss": 0.4036, "step": 58630 }, { "epoch": 1.4883679608077065, "grad_norm": 0.376953125, "learning_rate": 5.593052041227933e-05, "loss": 0.4122, "step": 58635 }, { "epoch": 1.4884948788567223, "grad_norm": 0.31640625, "learning_rate": 5.590464134013629e-05, "loss": 0.4096, "step": 58640 }, { "epoch": 1.488621796905738, "grad_norm": 0.3359375, "learning_rate": 5.587876688521497e-05, "loss": 0.3958, "step": 58645 }, { "epoch": 1.4887487149547538, "grad_norm": 0.3359375, "learning_rate": 5.585289704878511e-05, "loss": 0.3959, "step": 58650 }, { "epoch": 1.4888756330037696, "grad_norm": 0.33203125, "learning_rate": 5.58270318321161e-05, "loss": 0.3841, "step": 58655 }, { "epoch": 1.4890025510527853, "grad_norm": 0.337890625, "learning_rate": 5.5801171236477116e-05, "loss": 0.4048, "step": 58660 }, { "epoch": 1.4891294691018009, "grad_norm": 0.34375, "learning_rate": 5.577531526313714e-05, "loss": 0.4291, "step": 58665 }, { "epoch": 1.4892563871508167, "grad_norm": 0.359375, "learning_rate": 5.5749463913364915e-05, "loss": 0.436, "step": 58670 }, { "epoch": 1.4893833051998324, "grad_norm": 0.380859375, "learning_rate": 5.572361718842894e-05, "loss": 0.4136, "step": 58675 }, { "epoch": 1.4895102232488482, "grad_norm": 0.349609375, "learning_rate": 5.5697775089597524e-05, "loss": 0.4042, "step": 58680 }, { "epoch": 1.489637141297864, "grad_norm": 0.341796875, "learning_rate": 5.567193761813871e-05, "loss": 0.3947, "step": 58685 }, { "epoch": 1.4897640593468797, "grad_norm": 0.33203125, "learning_rate": 5.564610477532035e-05, "loss": 0.431, "step": 58690 }, { "epoch": 1.4898909773958955, "grad_norm": 0.369140625, "learning_rate": 5.562027656241006e-05, "loss": 0.4368, "step": 58695 }, { "epoch": 1.4900178954449113, "grad_norm": 0.34375, "learning_rate": 5.559445298067518e-05, "loss": 0.4196, "step": 58700 }, { "epoch": 1.490144813493927, "grad_norm": 0.3828125, "learning_rate": 5.5568634031382904e-05, "loss": 0.416, "step": 58705 }, { "epoch": 1.4902717315429428, "grad_norm": 0.3515625, "learning_rate": 5.554281971580007e-05, "loss": 0.4262, "step": 58710 }, { "epoch": 1.4903986495919583, "grad_norm": 0.326171875, "learning_rate": 5.551701003519351e-05, "loss": 0.4082, "step": 58715 }, { "epoch": 1.490525567640974, "grad_norm": 0.35546875, "learning_rate": 5.549120499082958e-05, "loss": 0.429, "step": 58720 }, { "epoch": 1.4906524856899899, "grad_norm": 0.357421875, "learning_rate": 5.546540458397462e-05, "loss": 0.4145, "step": 58725 }, { "epoch": 1.4907794037390056, "grad_norm": 0.34375, "learning_rate": 5.543960881589461e-05, "loss": 0.3944, "step": 58730 }, { "epoch": 1.4909063217880214, "grad_norm": 0.333984375, "learning_rate": 5.541381768785532e-05, "loss": 0.4072, "step": 58735 }, { "epoch": 1.4910332398370372, "grad_norm": 0.3671875, "learning_rate": 5.538803120112231e-05, "loss": 0.4183, "step": 58740 }, { "epoch": 1.491160157886053, "grad_norm": 0.3359375, "learning_rate": 5.536224935696094e-05, "loss": 0.3941, "step": 58745 }, { "epoch": 1.4912870759350687, "grad_norm": 0.345703125, "learning_rate": 5.533647215663629e-05, "loss": 0.4462, "step": 58750 }, { "epoch": 1.4914139939840845, "grad_norm": 0.35546875, "learning_rate": 5.5310699601413236e-05, "loss": 0.4181, "step": 58755 }, { "epoch": 1.4915409120331002, "grad_norm": 0.349609375, "learning_rate": 5.528493169255643e-05, "loss": 0.4091, "step": 58760 }, { "epoch": 1.491667830082116, "grad_norm": 0.37109375, "learning_rate": 5.525916843133031e-05, "loss": 0.4347, "step": 58765 }, { "epoch": 1.4917947481311318, "grad_norm": 0.34375, "learning_rate": 5.523340981899905e-05, "loss": 0.4251, "step": 58770 }, { "epoch": 1.4919216661801475, "grad_norm": 0.3671875, "learning_rate": 5.520765585682657e-05, "loss": 0.4151, "step": 58775 }, { "epoch": 1.4920485842291633, "grad_norm": 0.349609375, "learning_rate": 5.518190654607671e-05, "loss": 0.3988, "step": 58780 }, { "epoch": 1.492175502278179, "grad_norm": 0.341796875, "learning_rate": 5.5156161888012915e-05, "loss": 0.3904, "step": 58785 }, { "epoch": 1.4923024203271948, "grad_norm": 0.357421875, "learning_rate": 5.51304218838985e-05, "loss": 0.4221, "step": 58790 }, { "epoch": 1.4924293383762106, "grad_norm": 0.3828125, "learning_rate": 5.510468653499647e-05, "loss": 0.4395, "step": 58795 }, { "epoch": 1.4925562564252262, "grad_norm": 0.337890625, "learning_rate": 5.507895584256964e-05, "loss": 0.4182, "step": 58800 }, { "epoch": 1.492683174474242, "grad_norm": 0.3359375, "learning_rate": 5.5053229807880675e-05, "loss": 0.3768, "step": 58805 }, { "epoch": 1.4928100925232577, "grad_norm": 0.3046875, "learning_rate": 5.502750843219189e-05, "loss": 0.3753, "step": 58810 }, { "epoch": 1.4929370105722735, "grad_norm": 0.369140625, "learning_rate": 5.5001791716765454e-05, "loss": 0.4109, "step": 58815 }, { "epoch": 1.4930639286212892, "grad_norm": 0.369140625, "learning_rate": 5.497607966286325e-05, "loss": 0.4394, "step": 58820 }, { "epoch": 1.493190846670305, "grad_norm": 0.33984375, "learning_rate": 5.4950372271746965e-05, "loss": 0.4349, "step": 58825 }, { "epoch": 1.4933177647193208, "grad_norm": 0.359375, "learning_rate": 5.492466954467805e-05, "loss": 0.4159, "step": 58830 }, { "epoch": 1.4934446827683365, "grad_norm": 0.3359375, "learning_rate": 5.489897148291767e-05, "loss": 0.4061, "step": 58835 }, { "epoch": 1.4935716008173523, "grad_norm": 0.361328125, "learning_rate": 5.487327808772695e-05, "loss": 0.4009, "step": 58840 }, { "epoch": 1.493698518866368, "grad_norm": 0.34765625, "learning_rate": 5.484758936036656e-05, "loss": 0.4035, "step": 58845 }, { "epoch": 1.4938254369153836, "grad_norm": 0.365234375, "learning_rate": 5.482190530209706e-05, "loss": 0.4312, "step": 58850 }, { "epoch": 1.4939523549643994, "grad_norm": 0.3046875, "learning_rate": 5.479622591417876e-05, "loss": 0.4085, "step": 58855 }, { "epoch": 1.4940792730134151, "grad_norm": 0.373046875, "learning_rate": 5.477055119787172e-05, "loss": 0.4301, "step": 58860 }, { "epoch": 1.494206191062431, "grad_norm": 0.3515625, "learning_rate": 5.4744881154435824e-05, "loss": 0.4391, "step": 58865 }, { "epoch": 1.4943331091114467, "grad_norm": 0.33984375, "learning_rate": 5.471921578513064e-05, "loss": 0.4232, "step": 58870 }, { "epoch": 1.4944600271604624, "grad_norm": 0.365234375, "learning_rate": 5.4693555091215546e-05, "loss": 0.4338, "step": 58875 }, { "epoch": 1.4945869452094782, "grad_norm": 0.3984375, "learning_rate": 5.4667899073949774e-05, "loss": 0.4104, "step": 58880 }, { "epoch": 1.494713863258494, "grad_norm": 0.34375, "learning_rate": 5.464224773459224e-05, "loss": 0.4026, "step": 58885 }, { "epoch": 1.4948407813075097, "grad_norm": 0.34765625, "learning_rate": 5.4616601074401617e-05, "loss": 0.3954, "step": 58890 }, { "epoch": 1.4949676993565255, "grad_norm": 0.345703125, "learning_rate": 5.459095909463637e-05, "loss": 0.4139, "step": 58895 }, { "epoch": 1.4950946174055413, "grad_norm": 0.3359375, "learning_rate": 5.456532179655472e-05, "loss": 0.4235, "step": 58900 }, { "epoch": 1.495221535454557, "grad_norm": 0.333984375, "learning_rate": 5.4539689181414764e-05, "loss": 0.4187, "step": 58905 }, { "epoch": 1.4953484535035728, "grad_norm": 0.388671875, "learning_rate": 5.4514061250474236e-05, "loss": 0.4087, "step": 58910 }, { "epoch": 1.4954753715525886, "grad_norm": 0.34375, "learning_rate": 5.4488438004990687e-05, "loss": 0.4061, "step": 58915 }, { "epoch": 1.4956022896016044, "grad_norm": 0.306640625, "learning_rate": 5.446281944622143e-05, "loss": 0.3934, "step": 58920 }, { "epoch": 1.4957292076506201, "grad_norm": 0.35546875, "learning_rate": 5.4437205575423574e-05, "loss": 0.4179, "step": 58925 }, { "epoch": 1.4958561256996357, "grad_norm": 0.359375, "learning_rate": 5.441159639385396e-05, "loss": 0.4561, "step": 58930 }, { "epoch": 1.4959830437486514, "grad_norm": 0.3203125, "learning_rate": 5.438599190276925e-05, "loss": 0.4005, "step": 58935 }, { "epoch": 1.4961099617976672, "grad_norm": 0.37890625, "learning_rate": 5.4360392103425833e-05, "loss": 0.4178, "step": 58940 }, { "epoch": 1.496236879846683, "grad_norm": 0.33984375, "learning_rate": 5.433479699707986e-05, "loss": 0.4072, "step": 58945 }, { "epoch": 1.4963637978956987, "grad_norm": 0.3359375, "learning_rate": 5.43092065849873e-05, "loss": 0.4117, "step": 58950 }, { "epoch": 1.4964907159447145, "grad_norm": 0.3515625, "learning_rate": 5.42836208684038e-05, "loss": 0.4, "step": 58955 }, { "epoch": 1.4966176339937303, "grad_norm": 0.376953125, "learning_rate": 5.4258039848584964e-05, "loss": 0.4225, "step": 58960 }, { "epoch": 1.496744552042746, "grad_norm": 0.3359375, "learning_rate": 5.42324635267859e-05, "loss": 0.3789, "step": 58965 }, { "epoch": 1.4968714700917618, "grad_norm": 0.361328125, "learning_rate": 5.420689190426177e-05, "loss": 0.4046, "step": 58970 }, { "epoch": 1.4969983881407776, "grad_norm": 0.36328125, "learning_rate": 5.4181324982267295e-05, "loss": 0.4305, "step": 58975 }, { "epoch": 1.4971253061897931, "grad_norm": 0.40234375, "learning_rate": 5.415576276205704e-05, "loss": 0.4359, "step": 58980 }, { "epoch": 1.4972522242388089, "grad_norm": 0.337890625, "learning_rate": 5.4130205244885315e-05, "loss": 0.4197, "step": 58985 }, { "epoch": 1.4973791422878246, "grad_norm": 0.3515625, "learning_rate": 5.410465243200623e-05, "loss": 0.4364, "step": 58990 }, { "epoch": 1.4975060603368404, "grad_norm": 0.3515625, "learning_rate": 5.4079104324673664e-05, "loss": 0.4048, "step": 58995 }, { "epoch": 1.4976329783858562, "grad_norm": 0.341796875, "learning_rate": 5.405356092414123e-05, "loss": 0.426, "step": 59000 }, { "epoch": 1.497759896434872, "grad_norm": 0.357421875, "learning_rate": 5.402802223166234e-05, "loss": 0.4136, "step": 59005 }, { "epoch": 1.4978868144838877, "grad_norm": 0.34765625, "learning_rate": 5.400248824849018e-05, "loss": 0.4359, "step": 59010 }, { "epoch": 1.4980137325329035, "grad_norm": 0.32421875, "learning_rate": 5.3976958975877657e-05, "loss": 0.3969, "step": 59015 }, { "epoch": 1.4981406505819193, "grad_norm": 0.322265625, "learning_rate": 5.395143441507753e-05, "loss": 0.4114, "step": 59020 }, { "epoch": 1.498267568630935, "grad_norm": 0.36328125, "learning_rate": 5.392591456734221e-05, "loss": 0.4235, "step": 59025 }, { "epoch": 1.4983944866799508, "grad_norm": 0.3359375, "learning_rate": 5.390039943392402e-05, "loss": 0.4068, "step": 59030 }, { "epoch": 1.4985214047289666, "grad_norm": 0.35546875, "learning_rate": 5.3874889016074976e-05, "loss": 0.4054, "step": 59035 }, { "epoch": 1.4986483227779823, "grad_norm": 0.34375, "learning_rate": 5.3849383315046823e-05, "loss": 0.3962, "step": 59040 }, { "epoch": 1.498775240826998, "grad_norm": 0.3515625, "learning_rate": 5.3823882332091073e-05, "loss": 0.4071, "step": 59045 }, { "epoch": 1.4989021588760139, "grad_norm": 0.34375, "learning_rate": 5.379838606845917e-05, "loss": 0.4265, "step": 59050 }, { "epoch": 1.4990290769250296, "grad_norm": 0.36328125, "learning_rate": 5.3772894525402135e-05, "loss": 0.4409, "step": 59055 }, { "epoch": 1.4991559949740454, "grad_norm": 0.3515625, "learning_rate": 5.3747407704170824e-05, "loss": 0.4323, "step": 59060 }, { "epoch": 1.499282913023061, "grad_norm": 0.3359375, "learning_rate": 5.3721925606015895e-05, "loss": 0.4186, "step": 59065 }, { "epoch": 1.4994098310720767, "grad_norm": 0.369140625, "learning_rate": 5.369644823218772e-05, "loss": 0.4386, "step": 59070 }, { "epoch": 1.4995367491210925, "grad_norm": 0.359375, "learning_rate": 5.367097558393645e-05, "loss": 0.417, "step": 59075 }, { "epoch": 1.4996636671701082, "grad_norm": 0.359375, "learning_rate": 5.3645507662512064e-05, "loss": 0.4447, "step": 59080 }, { "epoch": 1.499790585219124, "grad_norm": 0.310546875, "learning_rate": 5.3620044469164216e-05, "loss": 0.399, "step": 59085 }, { "epoch": 1.4999175032681398, "grad_norm": 0.392578125, "learning_rate": 5.359458600514234e-05, "loss": 0.4248, "step": 59090 }, { "epoch": 1.5000444213171555, "grad_norm": 0.3515625, "learning_rate": 5.3569132271695795e-05, "loss": 0.3919, "step": 59095 }, { "epoch": 1.5001713393661713, "grad_norm": 0.35546875, "learning_rate": 5.35436832700735e-05, "loss": 0.4455, "step": 59100 }, { "epoch": 1.5002982574151869, "grad_norm": 0.384765625, "learning_rate": 5.351823900152425e-05, "loss": 0.4374, "step": 59105 }, { "epoch": 1.5004251754642026, "grad_norm": 0.3515625, "learning_rate": 5.349279946729659e-05, "loss": 0.4099, "step": 59110 }, { "epoch": 1.5005520935132184, "grad_norm": 0.345703125, "learning_rate": 5.346736466863882e-05, "loss": 0.4237, "step": 59115 }, { "epoch": 1.5006790115622342, "grad_norm": 0.3515625, "learning_rate": 5.344193460679895e-05, "loss": 0.433, "step": 59120 }, { "epoch": 1.50080592961125, "grad_norm": 0.33984375, "learning_rate": 5.341650928302493e-05, "loss": 0.4268, "step": 59125 }, { "epoch": 1.5009328476602657, "grad_norm": 0.369140625, "learning_rate": 5.3391088698564346e-05, "loss": 0.4155, "step": 59130 }, { "epoch": 1.5010597657092815, "grad_norm": 0.40625, "learning_rate": 5.336567285466453e-05, "loss": 0.4279, "step": 59135 }, { "epoch": 1.5011866837582972, "grad_norm": 0.365234375, "learning_rate": 5.334026175257268e-05, "loss": 0.4168, "step": 59140 }, { "epoch": 1.501313601807313, "grad_norm": 0.34375, "learning_rate": 5.3314855393535665e-05, "loss": 0.3911, "step": 59145 }, { "epoch": 1.5014405198563288, "grad_norm": 0.365234375, "learning_rate": 5.328945377880014e-05, "loss": 0.4023, "step": 59150 }, { "epoch": 1.5015674379053445, "grad_norm": 0.341796875, "learning_rate": 5.326405690961264e-05, "loss": 0.4153, "step": 59155 }, { "epoch": 1.5016943559543603, "grad_norm": 0.37109375, "learning_rate": 5.323866478721934e-05, "loss": 0.4397, "step": 59160 }, { "epoch": 1.501821274003376, "grad_norm": 0.33984375, "learning_rate": 5.321327741286621e-05, "loss": 0.4168, "step": 59165 }, { "epoch": 1.5019481920523918, "grad_norm": 0.353515625, "learning_rate": 5.3187894787799e-05, "loss": 0.4108, "step": 59170 }, { "epoch": 1.5020751101014076, "grad_norm": 0.330078125, "learning_rate": 5.316251691326324e-05, "loss": 0.3995, "step": 59175 }, { "epoch": 1.5022020281504234, "grad_norm": 0.337890625, "learning_rate": 5.313714379050419e-05, "loss": 0.426, "step": 59180 }, { "epoch": 1.5023289461994391, "grad_norm": 0.330078125, "learning_rate": 5.311177542076691e-05, "loss": 0.4256, "step": 59185 }, { "epoch": 1.502455864248455, "grad_norm": 0.35546875, "learning_rate": 5.308641180529623e-05, "loss": 0.4311, "step": 59190 }, { "epoch": 1.5025827822974707, "grad_norm": 0.373046875, "learning_rate": 5.30610529453367e-05, "loss": 0.4592, "step": 59195 }, { "epoch": 1.5027097003464864, "grad_norm": 0.33984375, "learning_rate": 5.3035698842132644e-05, "loss": 0.3882, "step": 59200 }, { "epoch": 1.502836618395502, "grad_norm": 0.35546875, "learning_rate": 5.301034949692827e-05, "loss": 0.4128, "step": 59205 }, { "epoch": 1.5029635364445177, "grad_norm": 0.35546875, "learning_rate": 5.298500491096742e-05, "loss": 0.4231, "step": 59210 }, { "epoch": 1.5030904544935335, "grad_norm": 0.35546875, "learning_rate": 5.295966508549366e-05, "loss": 0.4243, "step": 59215 }, { "epoch": 1.5032173725425493, "grad_norm": 0.38671875, "learning_rate": 5.293433002175057e-05, "loss": 0.9278, "step": 59220 }, { "epoch": 1.503344290591565, "grad_norm": 0.36328125, "learning_rate": 5.29089997209812e-05, "loss": 0.4321, "step": 59225 }, { "epoch": 1.5034712086405808, "grad_norm": 0.328125, "learning_rate": 5.2883674184428555e-05, "loss": 0.4076, "step": 59230 }, { "epoch": 1.5035981266895966, "grad_norm": 0.35546875, "learning_rate": 5.2858353413335334e-05, "loss": 0.4323, "step": 59235 }, { "epoch": 1.5037250447386121, "grad_norm": 0.365234375, "learning_rate": 5.283303740894402e-05, "loss": 0.4107, "step": 59240 }, { "epoch": 1.503851962787628, "grad_norm": 0.34765625, "learning_rate": 5.280772617249684e-05, "loss": 0.4388, "step": 59245 }, { "epoch": 1.5039788808366437, "grad_norm": 0.376953125, "learning_rate": 5.278241970523584e-05, "loss": 0.42, "step": 59250 }, { "epoch": 1.5041057988856594, "grad_norm": 0.333984375, "learning_rate": 5.275711800840276e-05, "loss": 0.3792, "step": 59255 }, { "epoch": 1.5042327169346752, "grad_norm": 0.353515625, "learning_rate": 5.2731821083239174e-05, "loss": 0.4199, "step": 59260 }, { "epoch": 1.504359634983691, "grad_norm": 0.365234375, "learning_rate": 5.2706528930986385e-05, "loss": 0.4304, "step": 59265 }, { "epoch": 1.5044865530327067, "grad_norm": 0.357421875, "learning_rate": 5.2681241552885454e-05, "loss": 0.4257, "step": 59270 }, { "epoch": 1.5046134710817225, "grad_norm": 0.384765625, "learning_rate": 5.265595895017719e-05, "loss": 0.4212, "step": 59275 }, { "epoch": 1.5047403891307383, "grad_norm": 0.318359375, "learning_rate": 5.26306811241023e-05, "loss": 0.3822, "step": 59280 }, { "epoch": 1.504867307179754, "grad_norm": 0.384765625, "learning_rate": 5.260540807590108e-05, "loss": 0.4184, "step": 59285 }, { "epoch": 1.5049942252287698, "grad_norm": 0.373046875, "learning_rate": 5.2580139806813656e-05, "loss": 0.4257, "step": 59290 }, { "epoch": 1.5051211432777856, "grad_norm": 0.373046875, "learning_rate": 5.2554876318080016e-05, "loss": 0.4289, "step": 59295 }, { "epoch": 1.5052480613268013, "grad_norm": 0.3359375, "learning_rate": 5.252961761093979e-05, "loss": 0.4137, "step": 59300 }, { "epoch": 1.505374979375817, "grad_norm": 0.361328125, "learning_rate": 5.2504363686632375e-05, "loss": 0.4096, "step": 59305 }, { "epoch": 1.5055018974248329, "grad_norm": 0.349609375, "learning_rate": 5.247911454639702e-05, "loss": 0.4188, "step": 59310 }, { "epoch": 1.5056288154738486, "grad_norm": 0.37109375, "learning_rate": 5.2453870191472645e-05, "loss": 0.4142, "step": 59315 }, { "epoch": 1.5057557335228644, "grad_norm": 0.357421875, "learning_rate": 5.2428630623098026e-05, "loss": 0.4253, "step": 59320 }, { "epoch": 1.5058826515718802, "grad_norm": 0.34765625, "learning_rate": 5.240339584251163e-05, "loss": 0.4011, "step": 59325 }, { "epoch": 1.506009569620896, "grad_norm": 0.29296875, "learning_rate": 5.237816585095171e-05, "loss": 0.4243, "step": 59330 }, { "epoch": 1.5061364876699115, "grad_norm": 0.38671875, "learning_rate": 5.235294064965632e-05, "loss": 0.4166, "step": 59335 }, { "epoch": 1.5062634057189273, "grad_norm": 0.33984375, "learning_rate": 5.2327720239863175e-05, "loss": 0.397, "step": 59340 }, { "epoch": 1.506390323767943, "grad_norm": 0.34375, "learning_rate": 5.230250462280995e-05, "loss": 0.4165, "step": 59345 }, { "epoch": 1.5065172418169588, "grad_norm": 0.3828125, "learning_rate": 5.2277293799733905e-05, "loss": 0.443, "step": 59350 }, { "epoch": 1.5066441598659746, "grad_norm": 0.365234375, "learning_rate": 5.225208777187212e-05, "loss": 0.425, "step": 59355 }, { "epoch": 1.5067710779149903, "grad_norm": 0.345703125, "learning_rate": 5.2226886540461455e-05, "loss": 0.4357, "step": 59360 }, { "epoch": 1.506897995964006, "grad_norm": 0.318359375, "learning_rate": 5.220169010673847e-05, "loss": 0.4097, "step": 59365 }, { "epoch": 1.5070249140130216, "grad_norm": 0.341796875, "learning_rate": 5.2176498471939645e-05, "loss": 0.4161, "step": 59370 }, { "epoch": 1.5071518320620374, "grad_norm": 0.349609375, "learning_rate": 5.215131163730107e-05, "loss": 0.3834, "step": 59375 }, { "epoch": 1.5072787501110532, "grad_norm": 0.357421875, "learning_rate": 5.212612960405866e-05, "loss": 0.4146, "step": 59380 }, { "epoch": 1.507405668160069, "grad_norm": 0.349609375, "learning_rate": 5.2100952373448075e-05, "loss": 0.4063, "step": 59385 }, { "epoch": 1.5075325862090847, "grad_norm": 0.359375, "learning_rate": 5.2075779946704774e-05, "loss": 0.4297, "step": 59390 }, { "epoch": 1.5076595042581005, "grad_norm": 0.34375, "learning_rate": 5.205061232506393e-05, "loss": 0.4264, "step": 59395 }, { "epoch": 1.5077864223071162, "grad_norm": 0.34765625, "learning_rate": 5.202544950976053e-05, "loss": 0.4102, "step": 59400 }, { "epoch": 1.507913340356132, "grad_norm": 0.3671875, "learning_rate": 5.2000291502029245e-05, "loss": 0.4183, "step": 59405 }, { "epoch": 1.5080402584051478, "grad_norm": 0.345703125, "learning_rate": 5.197513830310467e-05, "loss": 0.4377, "step": 59410 }, { "epoch": 1.5081671764541635, "grad_norm": 0.361328125, "learning_rate": 5.194998991422101e-05, "loss": 0.4292, "step": 59415 }, { "epoch": 1.5082940945031793, "grad_norm": 0.341796875, "learning_rate": 5.1924846336612276e-05, "loss": 0.4466, "step": 59420 }, { "epoch": 1.508421012552195, "grad_norm": 0.3828125, "learning_rate": 5.1899707571512265e-05, "loss": 0.4155, "step": 59425 }, { "epoch": 1.5085479306012108, "grad_norm": 0.349609375, "learning_rate": 5.187457362015455e-05, "loss": 0.3823, "step": 59430 }, { "epoch": 1.5086748486502266, "grad_norm": 0.33984375, "learning_rate": 5.18494444837724e-05, "loss": 0.4372, "step": 59435 }, { "epoch": 1.5088017666992424, "grad_norm": 0.337890625, "learning_rate": 5.1824320163598924e-05, "loss": 0.4293, "step": 59440 }, { "epoch": 1.5089286847482581, "grad_norm": 0.32421875, "learning_rate": 5.1799200660866886e-05, "loss": 0.414, "step": 59445 }, { "epoch": 1.509055602797274, "grad_norm": 0.37109375, "learning_rate": 5.177408597680902e-05, "loss": 0.4036, "step": 59450 }, { "epoch": 1.5091825208462897, "grad_norm": 0.341796875, "learning_rate": 5.1748976112657614e-05, "loss": 0.4013, "step": 59455 }, { "epoch": 1.5093094388953054, "grad_norm": 0.3515625, "learning_rate": 5.172387106964481e-05, "loss": 0.4016, "step": 59460 }, { "epoch": 1.5094363569443212, "grad_norm": 0.34765625, "learning_rate": 5.1698770849002467e-05, "loss": 0.3947, "step": 59465 }, { "epoch": 1.5095632749933368, "grad_norm": 0.353515625, "learning_rate": 5.167367545196233e-05, "loss": 0.4039, "step": 59470 }, { "epoch": 1.5096901930423525, "grad_norm": 0.361328125, "learning_rate": 5.164858487975577e-05, "loss": 0.4012, "step": 59475 }, { "epoch": 1.5098171110913683, "grad_norm": 0.37890625, "learning_rate": 5.162349913361397e-05, "loss": 0.4318, "step": 59480 }, { "epoch": 1.509944029140384, "grad_norm": 0.35546875, "learning_rate": 5.159841821476788e-05, "loss": 0.4004, "step": 59485 }, { "epoch": 1.5100709471893998, "grad_norm": 0.365234375, "learning_rate": 5.157334212444822e-05, "loss": 0.411, "step": 59490 }, { "epoch": 1.5101978652384156, "grad_norm": 0.35546875, "learning_rate": 5.154827086388544e-05, "loss": 0.3952, "step": 59495 }, { "epoch": 1.5103247832874314, "grad_norm": 0.35546875, "learning_rate": 5.15232044343098e-05, "loss": 0.4014, "step": 59500 }, { "epoch": 1.510451701336447, "grad_norm": 0.357421875, "learning_rate": 5.149814283695129e-05, "loss": 0.437, "step": 59505 }, { "epoch": 1.5105786193854627, "grad_norm": 0.34375, "learning_rate": 5.147308607303967e-05, "loss": 0.4109, "step": 59510 }, { "epoch": 1.5107055374344784, "grad_norm": 0.36328125, "learning_rate": 5.1448034143804476e-05, "loss": 0.4028, "step": 59515 }, { "epoch": 1.5108324554834942, "grad_norm": 0.353515625, "learning_rate": 5.142298705047493e-05, "loss": 0.4077, "step": 59520 }, { "epoch": 1.51095937353251, "grad_norm": 0.337890625, "learning_rate": 5.139794479428021e-05, "loss": 0.4116, "step": 59525 }, { "epoch": 1.5110862915815257, "grad_norm": 0.3359375, "learning_rate": 5.137290737644904e-05, "loss": 0.4173, "step": 59530 }, { "epoch": 1.5112132096305415, "grad_norm": 0.359375, "learning_rate": 5.134787479820997e-05, "loss": 0.4458, "step": 59535 }, { "epoch": 1.5113401276795573, "grad_norm": 0.3203125, "learning_rate": 5.1322847060791444e-05, "loss": 0.4107, "step": 59540 }, { "epoch": 1.511467045728573, "grad_norm": 0.314453125, "learning_rate": 5.129782416542151e-05, "loss": 0.4094, "step": 59545 }, { "epoch": 1.5115939637775888, "grad_norm": 0.337890625, "learning_rate": 5.127280611332801e-05, "loss": 0.4135, "step": 59550 }, { "epoch": 1.5117208818266046, "grad_norm": 0.328125, "learning_rate": 5.124779290573861e-05, "loss": 0.4258, "step": 59555 }, { "epoch": 1.5118477998756203, "grad_norm": 0.35546875, "learning_rate": 5.122278454388066e-05, "loss": 0.4234, "step": 59560 }, { "epoch": 1.5119747179246361, "grad_norm": 0.353515625, "learning_rate": 5.119778102898134e-05, "loss": 0.4097, "step": 59565 }, { "epoch": 1.5121016359736519, "grad_norm": 0.37109375, "learning_rate": 5.117278236226754e-05, "loss": 0.4206, "step": 59570 }, { "epoch": 1.5122285540226676, "grad_norm": 0.357421875, "learning_rate": 5.114778854496593e-05, "loss": 0.4149, "step": 59575 }, { "epoch": 1.5123554720716834, "grad_norm": 0.330078125, "learning_rate": 5.112279957830298e-05, "loss": 0.4317, "step": 59580 }, { "epoch": 1.5124823901206992, "grad_norm": 0.34375, "learning_rate": 5.109781546350485e-05, "loss": 0.4383, "step": 59585 }, { "epoch": 1.512609308169715, "grad_norm": 0.353515625, "learning_rate": 5.107283620179747e-05, "loss": 0.4135, "step": 59590 }, { "epoch": 1.5127362262187307, "grad_norm": 0.333984375, "learning_rate": 5.104786179440667e-05, "loss": 0.4086, "step": 59595 }, { "epoch": 1.5128631442677463, "grad_norm": 0.359375, "learning_rate": 5.102289224255787e-05, "loss": 0.442, "step": 59600 }, { "epoch": 1.512990062316762, "grad_norm": 0.359375, "learning_rate": 5.099792754747633e-05, "loss": 0.436, "step": 59605 }, { "epoch": 1.5131169803657778, "grad_norm": 0.33203125, "learning_rate": 5.097296771038698e-05, "loss": 0.3979, "step": 59610 }, { "epoch": 1.5132438984147936, "grad_norm": 0.40625, "learning_rate": 5.094801273251472e-05, "loss": 0.4289, "step": 59615 }, { "epoch": 1.5133708164638093, "grad_norm": 0.357421875, "learning_rate": 5.092306261508402e-05, "loss": 0.4169, "step": 59620 }, { "epoch": 1.513497734512825, "grad_norm": 0.35546875, "learning_rate": 5.089811735931917e-05, "loss": 0.442, "step": 59625 }, { "epoch": 1.5136246525618409, "grad_norm": 0.380859375, "learning_rate": 5.0873176966444224e-05, "loss": 0.4076, "step": 59630 }, { "epoch": 1.5137515706108564, "grad_norm": 0.349609375, "learning_rate": 5.084824143768301e-05, "loss": 0.4431, "step": 59635 }, { "epoch": 1.5138784886598722, "grad_norm": 0.3203125, "learning_rate": 5.0823310774259074e-05, "loss": 0.4063, "step": 59640 }, { "epoch": 1.514005406708888, "grad_norm": 0.384765625, "learning_rate": 5.079838497739578e-05, "loss": 0.4301, "step": 59645 }, { "epoch": 1.5141323247579037, "grad_norm": 0.37109375, "learning_rate": 5.077346404831621e-05, "loss": 0.4496, "step": 59650 }, { "epoch": 1.5142592428069195, "grad_norm": 0.353515625, "learning_rate": 5.0748547988243184e-05, "loss": 0.4193, "step": 59655 }, { "epoch": 1.5143861608559352, "grad_norm": 0.34765625, "learning_rate": 5.0723636798399434e-05, "loss": 0.4013, "step": 59660 }, { "epoch": 1.514513078904951, "grad_norm": 0.333984375, "learning_rate": 5.0698730480007274e-05, "loss": 0.3923, "step": 59665 }, { "epoch": 1.5146399969539668, "grad_norm": 0.357421875, "learning_rate": 5.0673829034288865e-05, "loss": 0.4025, "step": 59670 }, { "epoch": 1.5147669150029826, "grad_norm": 0.3359375, "learning_rate": 5.0648932462466094e-05, "loss": 0.421, "step": 59675 }, { "epoch": 1.5148938330519983, "grad_norm": 0.34765625, "learning_rate": 5.062404076576061e-05, "loss": 0.3975, "step": 59680 }, { "epoch": 1.515020751101014, "grad_norm": 0.32421875, "learning_rate": 5.0599153945393876e-05, "loss": 0.3835, "step": 59685 }, { "epoch": 1.5151476691500299, "grad_norm": 0.373046875, "learning_rate": 5.057427200258702e-05, "loss": 0.4242, "step": 59690 }, { "epoch": 1.5152745871990456, "grad_norm": 0.35546875, "learning_rate": 5.054939493856107e-05, "loss": 0.3953, "step": 59695 }, { "epoch": 1.5154015052480614, "grad_norm": 0.359375, "learning_rate": 5.052452275453671e-05, "loss": 0.3849, "step": 59700 }, { "epoch": 1.5155284232970772, "grad_norm": 0.349609375, "learning_rate": 5.049965545173438e-05, "loss": 0.4189, "step": 59705 }, { "epoch": 1.515655341346093, "grad_norm": 0.36328125, "learning_rate": 5.0474793031374325e-05, "loss": 0.4473, "step": 59710 }, { "epoch": 1.5157822593951087, "grad_norm": 0.34375, "learning_rate": 5.0449935494676533e-05, "loss": 0.402, "step": 59715 }, { "epoch": 1.5159091774441245, "grad_norm": 0.4375, "learning_rate": 5.042508284286071e-05, "loss": 0.4108, "step": 59720 }, { "epoch": 1.5160360954931402, "grad_norm": 0.36328125, "learning_rate": 5.040023507714644e-05, "loss": 0.4201, "step": 59725 }, { "epoch": 1.516163013542156, "grad_norm": 0.37109375, "learning_rate": 5.0375392198752964e-05, "loss": 0.4111, "step": 59730 }, { "epoch": 1.5162899315911715, "grad_norm": 0.359375, "learning_rate": 5.035055420889932e-05, "loss": 0.4205, "step": 59735 }, { "epoch": 1.5164168496401873, "grad_norm": 0.35546875, "learning_rate": 5.0325721108804264e-05, "loss": 0.4396, "step": 59740 }, { "epoch": 1.516543767689203, "grad_norm": 0.353515625, "learning_rate": 5.030089289968639e-05, "loss": 0.4335, "step": 59745 }, { "epoch": 1.5166706857382188, "grad_norm": 0.4140625, "learning_rate": 5.027606958276398e-05, "loss": 0.4142, "step": 59750 }, { "epoch": 1.5167976037872346, "grad_norm": 0.376953125, "learning_rate": 5.025125115925511e-05, "loss": 0.3843, "step": 59755 }, { "epoch": 1.5169245218362504, "grad_norm": 0.37890625, "learning_rate": 5.022643763037762e-05, "loss": 0.434, "step": 59760 }, { "epoch": 1.517051439885266, "grad_norm": 0.36328125, "learning_rate": 5.020162899734903e-05, "loss": 0.3934, "step": 59765 }, { "epoch": 1.5171783579342817, "grad_norm": 0.318359375, "learning_rate": 5.01768252613868e-05, "loss": 0.4235, "step": 59770 }, { "epoch": 1.5173052759832975, "grad_norm": 0.322265625, "learning_rate": 5.0152026423708e-05, "loss": 0.4138, "step": 59775 }, { "epoch": 1.5174321940323132, "grad_norm": 0.34765625, "learning_rate": 5.012723248552945e-05, "loss": 0.3898, "step": 59780 }, { "epoch": 1.517559112081329, "grad_norm": 0.326171875, "learning_rate": 5.010244344806785e-05, "loss": 0.4019, "step": 59785 }, { "epoch": 1.5176860301303448, "grad_norm": 0.298828125, "learning_rate": 5.0077659312539556e-05, "loss": 0.3968, "step": 59790 }, { "epoch": 1.5178129481793605, "grad_norm": 0.36328125, "learning_rate": 5.0052880080160727e-05, "loss": 0.4135, "step": 59795 }, { "epoch": 1.5179398662283763, "grad_norm": 0.345703125, "learning_rate": 5.0028105752147256e-05, "loss": 0.4354, "step": 59800 }, { "epoch": 1.518066784277392, "grad_norm": 0.328125, "learning_rate": 5.00033363297148e-05, "loss": 0.4163, "step": 59805 }, { "epoch": 1.5181937023264078, "grad_norm": 0.353515625, "learning_rate": 4.997857181407881e-05, "loss": 0.425, "step": 59810 }, { "epoch": 1.5183206203754236, "grad_norm": 0.361328125, "learning_rate": 4.995381220645447e-05, "loss": 0.4033, "step": 59815 }, { "epoch": 1.5184475384244394, "grad_norm": 0.3203125, "learning_rate": 4.99290575080567e-05, "loss": 0.4385, "step": 59820 }, { "epoch": 1.5185744564734551, "grad_norm": 0.35546875, "learning_rate": 4.99043077201002e-05, "loss": 0.4269, "step": 59825 }, { "epoch": 1.518701374522471, "grad_norm": 0.35546875, "learning_rate": 4.9879562843799476e-05, "loss": 0.4056, "step": 59830 }, { "epoch": 1.5188282925714867, "grad_norm": 0.35546875, "learning_rate": 4.9854822880368704e-05, "loss": 0.4012, "step": 59835 }, { "epoch": 1.5189552106205024, "grad_norm": 0.361328125, "learning_rate": 4.983008783102189e-05, "loss": 0.4176, "step": 59840 }, { "epoch": 1.5190821286695182, "grad_norm": 0.361328125, "learning_rate": 4.980535769697272e-05, "loss": 0.4036, "step": 59845 }, { "epoch": 1.519209046718534, "grad_norm": 0.310546875, "learning_rate": 4.9780632479434766e-05, "loss": 0.417, "step": 59850 }, { "epoch": 1.5193359647675497, "grad_norm": 0.337890625, "learning_rate": 4.975591217962121e-05, "loss": 0.4107, "step": 59855 }, { "epoch": 1.5194628828165655, "grad_norm": 0.357421875, "learning_rate": 4.973119679874517e-05, "loss": 0.3986, "step": 59860 }, { "epoch": 1.519589800865581, "grad_norm": 0.32421875, "learning_rate": 4.9706486338019364e-05, "loss": 0.4435, "step": 59865 }, { "epoch": 1.5197167189145968, "grad_norm": 0.359375, "learning_rate": 4.968178079865632e-05, "loss": 0.4365, "step": 59870 }, { "epoch": 1.5198436369636126, "grad_norm": 0.33984375, "learning_rate": 4.965708018186833e-05, "loss": 0.4077, "step": 59875 }, { "epoch": 1.5199705550126283, "grad_norm": 0.337890625, "learning_rate": 4.963238448886745e-05, "loss": 0.3801, "step": 59880 }, { "epoch": 1.5200974730616441, "grad_norm": 0.33984375, "learning_rate": 4.9607693720865476e-05, "loss": 0.414, "step": 59885 }, { "epoch": 1.5202243911106599, "grad_norm": 0.34375, "learning_rate": 4.9583007879073976e-05, "loss": 0.4018, "step": 59890 }, { "epoch": 1.5203513091596756, "grad_norm": 0.3515625, "learning_rate": 4.955832696470428e-05, "loss": 0.3768, "step": 59895 }, { "epoch": 1.5204782272086912, "grad_norm": 0.345703125, "learning_rate": 4.953365097896747e-05, "loss": 0.4002, "step": 59900 }, { "epoch": 1.520605145257707, "grad_norm": 0.353515625, "learning_rate": 4.9508979923074394e-05, "loss": 0.4341, "step": 59905 }, { "epoch": 1.5207320633067227, "grad_norm": 0.333984375, "learning_rate": 4.9484313798235595e-05, "loss": 0.3926, "step": 59910 }, { "epoch": 1.5208589813557385, "grad_norm": 0.36328125, "learning_rate": 4.945965260566153e-05, "loss": 0.4125, "step": 59915 }, { "epoch": 1.5209858994047543, "grad_norm": 0.36328125, "learning_rate": 4.943499634656226e-05, "loss": 0.4069, "step": 59920 }, { "epoch": 1.52111281745377, "grad_norm": 0.337890625, "learning_rate": 4.941034502214765e-05, "loss": 0.4094, "step": 59925 }, { "epoch": 1.5212397355027858, "grad_norm": 0.375, "learning_rate": 4.938569863362735e-05, "loss": 0.4287, "step": 59930 }, { "epoch": 1.5213666535518016, "grad_norm": 0.3515625, "learning_rate": 4.936105718221068e-05, "loss": 0.4096, "step": 59935 }, { "epoch": 1.5214935716008173, "grad_norm": 0.361328125, "learning_rate": 4.933642066910691e-05, "loss": 0.4338, "step": 59940 }, { "epoch": 1.521620489649833, "grad_norm": 0.375, "learning_rate": 4.9311789095524873e-05, "loss": 0.4315, "step": 59945 }, { "epoch": 1.5217474076988489, "grad_norm": 0.392578125, "learning_rate": 4.928716246267322e-05, "loss": 0.4254, "step": 59950 }, { "epoch": 1.5218743257478646, "grad_norm": 0.34765625, "learning_rate": 4.9262540771760417e-05, "loss": 0.4463, "step": 59955 }, { "epoch": 1.5220012437968804, "grad_norm": 0.349609375, "learning_rate": 4.9237924023994584e-05, "loss": 0.4166, "step": 59960 }, { "epoch": 1.5221281618458962, "grad_norm": 0.36328125, "learning_rate": 4.9213312220583695e-05, "loss": 0.4115, "step": 59965 }, { "epoch": 1.522255079894912, "grad_norm": 0.361328125, "learning_rate": 4.918870536273538e-05, "loss": 0.4203, "step": 59970 }, { "epoch": 1.5223819979439277, "grad_norm": 0.357421875, "learning_rate": 4.916410345165718e-05, "loss": 0.4408, "step": 59975 }, { "epoch": 1.5225089159929435, "grad_norm": 0.328125, "learning_rate": 4.913950648855627e-05, "loss": 0.4116, "step": 59980 }, { "epoch": 1.5226358340419592, "grad_norm": 0.365234375, "learning_rate": 4.911491447463957e-05, "loss": 0.4307, "step": 59985 }, { "epoch": 1.522762752090975, "grad_norm": 0.34765625, "learning_rate": 4.909032741111386e-05, "loss": 0.4074, "step": 59990 }, { "epoch": 1.5228896701399908, "grad_norm": 0.34765625, "learning_rate": 4.906574529918557e-05, "loss": 0.4198, "step": 59995 }, { "epoch": 1.5230165881890063, "grad_norm": 0.361328125, "learning_rate": 4.9041168140060957e-05, "loss": 0.3989, "step": 60000 }, { "epoch": 1.523143506238022, "grad_norm": 0.34765625, "learning_rate": 4.901659593494601e-05, "loss": 0.4159, "step": 60005 }, { "epoch": 1.5232704242870378, "grad_norm": 0.36328125, "learning_rate": 4.899202868504643e-05, "loss": 0.3934, "step": 60010 }, { "epoch": 1.5233973423360536, "grad_norm": 0.353515625, "learning_rate": 4.896746639156781e-05, "loss": 0.4137, "step": 60015 }, { "epoch": 1.5235242603850694, "grad_norm": 0.359375, "learning_rate": 4.894290905571537e-05, "loss": 0.4188, "step": 60020 }, { "epoch": 1.5236511784340852, "grad_norm": 0.28125, "learning_rate": 4.891835667869412e-05, "loss": 0.4142, "step": 60025 }, { "epoch": 1.5237780964831007, "grad_norm": 0.353515625, "learning_rate": 4.889380926170884e-05, "loss": 0.4271, "step": 60030 }, { "epoch": 1.5239050145321165, "grad_norm": 0.373046875, "learning_rate": 4.886926680596402e-05, "loss": 0.4405, "step": 60035 }, { "epoch": 1.5240319325811322, "grad_norm": 0.365234375, "learning_rate": 4.884472931266405e-05, "loss": 0.4346, "step": 60040 }, { "epoch": 1.524158850630148, "grad_norm": 0.333984375, "learning_rate": 4.8820196783012916e-05, "loss": 0.4312, "step": 60045 }, { "epoch": 1.5242857686791638, "grad_norm": 0.337890625, "learning_rate": 4.879566921821443e-05, "loss": 0.443, "step": 60050 }, { "epoch": 1.5244126867281795, "grad_norm": 0.349609375, "learning_rate": 4.877114661947212e-05, "loss": 0.3958, "step": 60055 }, { "epoch": 1.5245396047771953, "grad_norm": 0.376953125, "learning_rate": 4.874662898798934e-05, "loss": 0.4269, "step": 60060 }, { "epoch": 1.524666522826211, "grad_norm": 0.359375, "learning_rate": 4.8722116324969126e-05, "loss": 0.434, "step": 60065 }, { "epoch": 1.5247934408752268, "grad_norm": 0.369140625, "learning_rate": 4.869760863161435e-05, "loss": 0.4422, "step": 60070 }, { "epoch": 1.5249203589242426, "grad_norm": 0.33203125, "learning_rate": 4.867310590912754e-05, "loss": 0.4074, "step": 60075 }, { "epoch": 1.5250472769732584, "grad_norm": 0.365234375, "learning_rate": 4.864860815871106e-05, "loss": 0.4564, "step": 60080 }, { "epoch": 1.5251741950222741, "grad_norm": 0.36328125, "learning_rate": 4.862411538156701e-05, "loss": 0.4294, "step": 60085 }, { "epoch": 1.52530111307129, "grad_norm": 0.353515625, "learning_rate": 4.85996275788972e-05, "loss": 0.4094, "step": 60090 }, { "epoch": 1.5254280311203057, "grad_norm": 0.36328125, "learning_rate": 4.85751447519033e-05, "loss": 0.4268, "step": 60095 }, { "epoch": 1.5255549491693214, "grad_norm": 0.330078125, "learning_rate": 4.855066690178667e-05, "loss": 0.3754, "step": 60100 }, { "epoch": 1.5256818672183372, "grad_norm": 0.3125, "learning_rate": 4.852619402974834e-05, "loss": 0.3914, "step": 60105 }, { "epoch": 1.525808785267353, "grad_norm": 0.349609375, "learning_rate": 4.850172613698929e-05, "loss": 0.4093, "step": 60110 }, { "epoch": 1.5259357033163687, "grad_norm": 0.353515625, "learning_rate": 4.8477263224710126e-05, "loss": 0.428, "step": 60115 }, { "epoch": 1.5260626213653845, "grad_norm": 0.330078125, "learning_rate": 4.8452805294111196e-05, "loss": 0.4132, "step": 60120 }, { "epoch": 1.5261895394144003, "grad_norm": 0.333984375, "learning_rate": 4.8428352346392674e-05, "loss": 0.4097, "step": 60125 }, { "epoch": 1.5263164574634158, "grad_norm": 0.36328125, "learning_rate": 4.840390438275443e-05, "loss": 0.441, "step": 60130 }, { "epoch": 1.5264433755124316, "grad_norm": 0.326171875, "learning_rate": 4.837946140439613e-05, "loss": 0.4233, "step": 60135 }, { "epoch": 1.5265702935614474, "grad_norm": 0.330078125, "learning_rate": 4.835502341251719e-05, "loss": 0.4238, "step": 60140 }, { "epoch": 1.5266972116104631, "grad_norm": 0.375, "learning_rate": 4.833059040831675e-05, "loss": 0.4083, "step": 60145 }, { "epoch": 1.526824129659479, "grad_norm": 0.36328125, "learning_rate": 4.830616239299375e-05, "loss": 0.4105, "step": 60150 }, { "epoch": 1.5269510477084947, "grad_norm": 0.35546875, "learning_rate": 4.828173936774686e-05, "loss": 0.4299, "step": 60155 }, { "epoch": 1.5270779657575104, "grad_norm": 0.34765625, "learning_rate": 4.825732133377443e-05, "loss": 0.4227, "step": 60160 }, { "epoch": 1.527204883806526, "grad_norm": 0.34765625, "learning_rate": 4.823290829227477e-05, "loss": 0.4048, "step": 60165 }, { "epoch": 1.5273318018555417, "grad_norm": 0.337890625, "learning_rate": 4.820850024444577e-05, "loss": 0.4035, "step": 60170 }, { "epoch": 1.5274587199045575, "grad_norm": 0.35546875, "learning_rate": 4.81840971914851e-05, "loss": 0.4153, "step": 60175 }, { "epoch": 1.5275856379535733, "grad_norm": 0.359375, "learning_rate": 4.8159699134590184e-05, "loss": 0.4081, "step": 60180 }, { "epoch": 1.527712556002589, "grad_norm": 0.33203125, "learning_rate": 4.813530607495832e-05, "loss": 0.4142, "step": 60185 }, { "epoch": 1.5278394740516048, "grad_norm": 0.357421875, "learning_rate": 4.81109180137864e-05, "loss": 0.4202, "step": 60190 }, { "epoch": 1.5279663921006206, "grad_norm": 0.34765625, "learning_rate": 4.808653495227115e-05, "loss": 0.418, "step": 60195 }, { "epoch": 1.5280933101496363, "grad_norm": 0.373046875, "learning_rate": 4.8062156891609034e-05, "loss": 0.4112, "step": 60200 }, { "epoch": 1.528220228198652, "grad_norm": 0.353515625, "learning_rate": 4.803778383299627e-05, "loss": 0.4031, "step": 60205 }, { "epoch": 1.5283471462476679, "grad_norm": 0.373046875, "learning_rate": 4.801341577762885e-05, "loss": 0.4126, "step": 60210 }, { "epoch": 1.5284740642966836, "grad_norm": 0.35546875, "learning_rate": 4.7989052726702484e-05, "loss": 0.4091, "step": 60215 }, { "epoch": 1.5286009823456994, "grad_norm": 0.34375, "learning_rate": 4.7964694681412665e-05, "loss": 0.4039, "step": 60220 }, { "epoch": 1.5287279003947152, "grad_norm": 0.39453125, "learning_rate": 4.794034164295457e-05, "loss": 0.4085, "step": 60225 }, { "epoch": 1.528854818443731, "grad_norm": 0.326171875, "learning_rate": 4.7915993612523325e-05, "loss": 0.3979, "step": 60230 }, { "epoch": 1.5289817364927467, "grad_norm": 0.34375, "learning_rate": 4.7891650591313596e-05, "loss": 0.4288, "step": 60235 }, { "epoch": 1.5291086545417625, "grad_norm": 0.318359375, "learning_rate": 4.786731258051992e-05, "loss": 0.3925, "step": 60240 }, { "epoch": 1.5292355725907782, "grad_norm": 0.314453125, "learning_rate": 4.784297958133652e-05, "loss": 0.4146, "step": 60245 }, { "epoch": 1.529362490639794, "grad_norm": 0.353515625, "learning_rate": 4.78186515949574e-05, "loss": 0.4243, "step": 60250 }, { "epoch": 1.5294894086888098, "grad_norm": 0.35546875, "learning_rate": 4.7794328622576325e-05, "loss": 0.439, "step": 60255 }, { "epoch": 1.5296163267378255, "grad_norm": 0.384765625, "learning_rate": 4.777001066538685e-05, "loss": 0.4194, "step": 60260 }, { "epoch": 1.529743244786841, "grad_norm": 0.34765625, "learning_rate": 4.774569772458226e-05, "loss": 0.3954, "step": 60265 }, { "epoch": 1.5298701628358569, "grad_norm": 0.341796875, "learning_rate": 4.7721389801355534e-05, "loss": 0.4185, "step": 60270 }, { "epoch": 1.5299970808848726, "grad_norm": 0.330078125, "learning_rate": 4.769708689689946e-05, "loss": 0.4135, "step": 60275 }, { "epoch": 1.5301239989338884, "grad_norm": 0.36328125, "learning_rate": 4.76727890124066e-05, "loss": 0.4324, "step": 60280 }, { "epoch": 1.5302509169829042, "grad_norm": 0.349609375, "learning_rate": 4.764849614906916e-05, "loss": 0.4211, "step": 60285 }, { "epoch": 1.53037783503192, "grad_norm": 0.35546875, "learning_rate": 4.762420830807929e-05, "loss": 0.4265, "step": 60290 }, { "epoch": 1.5305047530809355, "grad_norm": 0.3515625, "learning_rate": 4.759992549062876e-05, "loss": 0.4151, "step": 60295 }, { "epoch": 1.5306316711299512, "grad_norm": 0.34375, "learning_rate": 4.757564769790908e-05, "loss": 0.4247, "step": 60300 }, { "epoch": 1.530758589178967, "grad_norm": 0.349609375, "learning_rate": 4.755137493111157e-05, "loss": 0.4071, "step": 60305 }, { "epoch": 1.5308855072279828, "grad_norm": 0.3671875, "learning_rate": 4.752710719142727e-05, "loss": 0.4086, "step": 60310 }, { "epoch": 1.5310124252769985, "grad_norm": 0.359375, "learning_rate": 4.7502844480047014e-05, "loss": 0.4211, "step": 60315 }, { "epoch": 1.5311393433260143, "grad_norm": 0.3515625, "learning_rate": 4.7478586798161347e-05, "loss": 0.4014, "step": 60320 }, { "epoch": 1.53126626137503, "grad_norm": 0.359375, "learning_rate": 4.745433414696059e-05, "loss": 0.4336, "step": 60325 }, { "epoch": 1.5313931794240458, "grad_norm": 0.34375, "learning_rate": 4.743008652763481e-05, "loss": 0.4254, "step": 60330 }, { "epoch": 1.5315200974730616, "grad_norm": 0.3515625, "learning_rate": 4.740584394137377e-05, "loss": 0.4312, "step": 60335 }, { "epoch": 1.5316470155220774, "grad_norm": 0.34765625, "learning_rate": 4.738160638936715e-05, "loss": 0.4089, "step": 60340 }, { "epoch": 1.5317739335710931, "grad_norm": 0.345703125, "learning_rate": 4.735737387280424e-05, "loss": 0.4315, "step": 60345 }, { "epoch": 1.531900851620109, "grad_norm": 0.36328125, "learning_rate": 4.7333146392874035e-05, "loss": 0.4215, "step": 60350 }, { "epoch": 1.5320277696691247, "grad_norm": 0.369140625, "learning_rate": 4.7308923950765515e-05, "loss": 0.4074, "step": 60355 }, { "epoch": 1.5321546877181405, "grad_norm": 0.326171875, "learning_rate": 4.7284706547667165e-05, "loss": 0.395, "step": 60360 }, { "epoch": 1.5322816057671562, "grad_norm": 0.341796875, "learning_rate": 4.726049418476738e-05, "loss": 0.4237, "step": 60365 }, { "epoch": 1.532408523816172, "grad_norm": 0.3515625, "learning_rate": 4.72362868632542e-05, "loss": 0.4247, "step": 60370 }, { "epoch": 1.5325354418651878, "grad_norm": 0.357421875, "learning_rate": 4.721208458431549e-05, "loss": 0.4212, "step": 60375 }, { "epoch": 1.5326623599142035, "grad_norm": 0.33984375, "learning_rate": 4.718788734913884e-05, "loss": 0.4552, "step": 60380 }, { "epoch": 1.5327892779632193, "grad_norm": 0.337890625, "learning_rate": 4.716369515891162e-05, "loss": 0.4342, "step": 60385 }, { "epoch": 1.532916196012235, "grad_norm": 0.35546875, "learning_rate": 4.71395080148209e-05, "loss": 0.4294, "step": 60390 }, { "epoch": 1.5330431140612506, "grad_norm": 0.33984375, "learning_rate": 4.711532591805355e-05, "loss": 0.4329, "step": 60395 }, { "epoch": 1.5331700321102664, "grad_norm": 0.33984375, "learning_rate": 4.709114886979617e-05, "loss": 0.4248, "step": 60400 }, { "epoch": 1.5332969501592821, "grad_norm": 0.3515625, "learning_rate": 4.7066976871235134e-05, "loss": 0.4357, "step": 60405 }, { "epoch": 1.533423868208298, "grad_norm": 0.45703125, "learning_rate": 4.704280992355649e-05, "loss": 0.4237, "step": 60410 }, { "epoch": 1.5335507862573137, "grad_norm": 0.361328125, "learning_rate": 4.701864802794619e-05, "loss": 0.4228, "step": 60415 }, { "epoch": 1.5336777043063294, "grad_norm": 0.357421875, "learning_rate": 4.6994491185589826e-05, "loss": 0.4159, "step": 60420 }, { "epoch": 1.5338046223553452, "grad_norm": 0.326171875, "learning_rate": 4.6970339397672685e-05, "loss": 0.4039, "step": 60425 }, { "epoch": 1.5339315404043607, "grad_norm": 0.357421875, "learning_rate": 4.6946192665379995e-05, "loss": 0.4135, "step": 60430 }, { "epoch": 1.5340584584533765, "grad_norm": 0.392578125, "learning_rate": 4.692205098989659e-05, "loss": 0.4305, "step": 60435 }, { "epoch": 1.5341853765023923, "grad_norm": 0.341796875, "learning_rate": 4.6897914372407084e-05, "loss": 0.3932, "step": 60440 }, { "epoch": 1.534312294551408, "grad_norm": 0.32421875, "learning_rate": 4.687378281409584e-05, "loss": 0.4041, "step": 60445 }, { "epoch": 1.5344392126004238, "grad_norm": 0.341796875, "learning_rate": 4.6849656316147003e-05, "loss": 0.4016, "step": 60450 }, { "epoch": 1.5345661306494396, "grad_norm": 0.349609375, "learning_rate": 4.6825534879744445e-05, "loss": 0.4217, "step": 60455 }, { "epoch": 1.5346930486984554, "grad_norm": 0.34375, "learning_rate": 4.680141850607177e-05, "loss": 0.4157, "step": 60460 }, { "epoch": 1.5348199667474711, "grad_norm": 0.3515625, "learning_rate": 4.677730719631241e-05, "loss": 0.4265, "step": 60465 }, { "epoch": 1.5349468847964869, "grad_norm": 0.37890625, "learning_rate": 4.675320095164945e-05, "loss": 0.4398, "step": 60470 }, { "epoch": 1.5350738028455027, "grad_norm": 0.375, "learning_rate": 4.672909977326575e-05, "loss": 0.428, "step": 60475 }, { "epoch": 1.5352007208945184, "grad_norm": 0.3515625, "learning_rate": 4.670500366234403e-05, "loss": 0.432, "step": 60480 }, { "epoch": 1.5353276389435342, "grad_norm": 0.3125, "learning_rate": 4.6680912620066635e-05, "loss": 0.4055, "step": 60485 }, { "epoch": 1.53545455699255, "grad_norm": 0.349609375, "learning_rate": 4.6656826647615704e-05, "loss": 0.4449, "step": 60490 }, { "epoch": 1.5355814750415657, "grad_norm": 0.359375, "learning_rate": 4.663274574617313e-05, "loss": 0.4388, "step": 60495 }, { "epoch": 1.5357083930905815, "grad_norm": 0.341796875, "learning_rate": 4.6608669916920485e-05, "loss": 0.416, "step": 60500 }, { "epoch": 1.5358353111395973, "grad_norm": 0.37109375, "learning_rate": 4.6584599161039285e-05, "loss": 0.4127, "step": 60505 }, { "epoch": 1.535962229188613, "grad_norm": 0.359375, "learning_rate": 4.6560533479710604e-05, "loss": 0.4072, "step": 60510 }, { "epoch": 1.5360891472376288, "grad_norm": 0.35546875, "learning_rate": 4.6536472874115336e-05, "loss": 0.4389, "step": 60515 }, { "epoch": 1.5362160652866446, "grad_norm": 0.357421875, "learning_rate": 4.651241734543413e-05, "loss": 0.3863, "step": 60520 }, { "epoch": 1.5363429833356603, "grad_norm": 0.341796875, "learning_rate": 4.648836689484738e-05, "loss": 0.4156, "step": 60525 }, { "epoch": 1.5364699013846759, "grad_norm": 0.341796875, "learning_rate": 4.646432152353525e-05, "loss": 0.4107, "step": 60530 }, { "epoch": 1.5365968194336916, "grad_norm": 0.3515625, "learning_rate": 4.6440281232677604e-05, "loss": 0.4271, "step": 60535 }, { "epoch": 1.5367237374827074, "grad_norm": 0.373046875, "learning_rate": 4.641624602345406e-05, "loss": 0.4366, "step": 60540 }, { "epoch": 1.5368506555317232, "grad_norm": 0.35546875, "learning_rate": 4.6392215897044104e-05, "loss": 0.3991, "step": 60545 }, { "epoch": 1.536977573580739, "grad_norm": 0.337890625, "learning_rate": 4.6368190854626855e-05, "loss": 0.3748, "step": 60550 }, { "epoch": 1.5371044916297547, "grad_norm": 0.3359375, "learning_rate": 4.6344170897381196e-05, "loss": 0.393, "step": 60555 }, { "epoch": 1.5372314096787703, "grad_norm": 0.38671875, "learning_rate": 4.632015602648579e-05, "loss": 0.4312, "step": 60560 }, { "epoch": 1.537358327727786, "grad_norm": 0.365234375, "learning_rate": 4.629614624311901e-05, "loss": 0.4257, "step": 60565 }, { "epoch": 1.5374852457768018, "grad_norm": 0.365234375, "learning_rate": 4.627214154845903e-05, "loss": 0.4238, "step": 60570 }, { "epoch": 1.5376121638258176, "grad_norm": 0.333984375, "learning_rate": 4.6248141943683744e-05, "loss": 0.422, "step": 60575 }, { "epoch": 1.5377390818748333, "grad_norm": 0.333984375, "learning_rate": 4.622414742997076e-05, "loss": 0.4265, "step": 60580 }, { "epoch": 1.537865999923849, "grad_norm": 0.3359375, "learning_rate": 4.6200158008497574e-05, "loss": 0.4349, "step": 60585 }, { "epoch": 1.5379929179728649, "grad_norm": 0.3671875, "learning_rate": 4.617617368044129e-05, "loss": 0.4157, "step": 60590 }, { "epoch": 1.5381198360218806, "grad_norm": 0.384765625, "learning_rate": 4.615219444697879e-05, "loss": 0.4415, "step": 60595 }, { "epoch": 1.5382467540708964, "grad_norm": 0.345703125, "learning_rate": 4.612822030928671e-05, "loss": 0.417, "step": 60600 }, { "epoch": 1.5383736721199122, "grad_norm": 0.35546875, "learning_rate": 4.6104251268541534e-05, "loss": 0.4063, "step": 60605 }, { "epoch": 1.538500590168928, "grad_norm": 0.33203125, "learning_rate": 4.608028732591934e-05, "loss": 0.403, "step": 60610 }, { "epoch": 1.5386275082179437, "grad_norm": 0.359375, "learning_rate": 4.6056328482596064e-05, "loss": 0.427, "step": 60615 }, { "epoch": 1.5387544262669595, "grad_norm": 0.33203125, "learning_rate": 4.603237473974735e-05, "loss": 0.4157, "step": 60620 }, { "epoch": 1.5388813443159752, "grad_norm": 0.330078125, "learning_rate": 4.600842609854859e-05, "loss": 0.3956, "step": 60625 }, { "epoch": 1.539008262364991, "grad_norm": 0.36328125, "learning_rate": 4.598448256017494e-05, "loss": 0.3943, "step": 60630 }, { "epoch": 1.5391351804140068, "grad_norm": 0.361328125, "learning_rate": 4.5960544125801295e-05, "loss": 0.4162, "step": 60635 }, { "epoch": 1.5392620984630225, "grad_norm": 0.353515625, "learning_rate": 4.5936610796602304e-05, "loss": 0.4249, "step": 60640 }, { "epoch": 1.5393890165120383, "grad_norm": 0.361328125, "learning_rate": 4.5912682573752365e-05, "loss": 0.4067, "step": 60645 }, { "epoch": 1.539515934561054, "grad_norm": 0.349609375, "learning_rate": 4.588875945842564e-05, "loss": 0.4299, "step": 60650 }, { "epoch": 1.5396428526100698, "grad_norm": 0.37109375, "learning_rate": 4.5864841451795957e-05, "loss": 0.4271, "step": 60655 }, { "epoch": 1.5397697706590854, "grad_norm": 0.37890625, "learning_rate": 4.584092855503707e-05, "loss": 0.4484, "step": 60660 }, { "epoch": 1.5398966887081011, "grad_norm": 0.337890625, "learning_rate": 4.5817020769322336e-05, "loss": 0.4331, "step": 60665 }, { "epoch": 1.540023606757117, "grad_norm": 0.337890625, "learning_rate": 4.579311809582484e-05, "loss": 0.4027, "step": 60670 }, { "epoch": 1.5401505248061327, "grad_norm": 0.373046875, "learning_rate": 4.5769220535717585e-05, "loss": 0.3884, "step": 60675 }, { "epoch": 1.5402774428551484, "grad_norm": 0.349609375, "learning_rate": 4.574532809017315e-05, "loss": 0.4372, "step": 60680 }, { "epoch": 1.5404043609041642, "grad_norm": 0.357421875, "learning_rate": 4.5721440760363935e-05, "loss": 0.4447, "step": 60685 }, { "epoch": 1.54053127895318, "grad_norm": 0.365234375, "learning_rate": 4.5697558547462106e-05, "loss": 0.4588, "step": 60690 }, { "epoch": 1.5406581970021955, "grad_norm": 0.357421875, "learning_rate": 4.56736814526395e-05, "loss": 0.3884, "step": 60695 }, { "epoch": 1.5407851150512113, "grad_norm": 0.345703125, "learning_rate": 4.56498094770678e-05, "loss": 0.3992, "step": 60700 }, { "epoch": 1.540912033100227, "grad_norm": 0.349609375, "learning_rate": 4.5625942621918384e-05, "loss": 0.4221, "step": 60705 }, { "epoch": 1.5410389511492428, "grad_norm": 0.353515625, "learning_rate": 4.560208088836239e-05, "loss": 0.4308, "step": 60710 }, { "epoch": 1.5411658691982586, "grad_norm": 0.369140625, "learning_rate": 4.557822427757071e-05, "loss": 0.4103, "step": 60715 }, { "epoch": 1.5412927872472744, "grad_norm": 0.34765625, "learning_rate": 4.555437279071395e-05, "loss": 0.4293, "step": 60720 }, { "epoch": 1.5414197052962901, "grad_norm": 0.345703125, "learning_rate": 4.553052642896247e-05, "loss": 0.411, "step": 60725 }, { "epoch": 1.541546623345306, "grad_norm": 0.3359375, "learning_rate": 4.5506685193486484e-05, "loss": 0.4041, "step": 60730 }, { "epoch": 1.5416735413943217, "grad_norm": 0.349609375, "learning_rate": 4.5482849085455845e-05, "loss": 0.4276, "step": 60735 }, { "epoch": 1.5418004594433374, "grad_norm": 0.32421875, "learning_rate": 4.5459018106040165e-05, "loss": 0.3938, "step": 60740 }, { "epoch": 1.5419273774923532, "grad_norm": 0.3671875, "learning_rate": 4.543519225640883e-05, "loss": 0.4336, "step": 60745 }, { "epoch": 1.542054295541369, "grad_norm": 0.375, "learning_rate": 4.5411371537730895e-05, "loss": 0.4215, "step": 60750 }, { "epoch": 1.5421812135903847, "grad_norm": 0.337890625, "learning_rate": 4.538755595117535e-05, "loss": 0.4154, "step": 60755 }, { "epoch": 1.5423081316394005, "grad_norm": 0.34375, "learning_rate": 4.536374549791077e-05, "loss": 0.4185, "step": 60760 }, { "epoch": 1.5424350496884163, "grad_norm": 0.337890625, "learning_rate": 4.533994017910551e-05, "loss": 0.4752, "step": 60765 }, { "epoch": 1.542561967737432, "grad_norm": 0.326171875, "learning_rate": 4.531613999592769e-05, "loss": 0.4217, "step": 60770 }, { "epoch": 1.5426888857864478, "grad_norm": 0.373046875, "learning_rate": 4.52923449495452e-05, "loss": 0.4065, "step": 60775 }, { "epoch": 1.5428158038354636, "grad_norm": 0.36328125, "learning_rate": 4.526855504112562e-05, "loss": 0.4272, "step": 60780 }, { "epoch": 1.5429427218844793, "grad_norm": 0.34765625, "learning_rate": 4.5244770271836334e-05, "loss": 0.4409, "step": 60785 }, { "epoch": 1.543069639933495, "grad_norm": 0.359375, "learning_rate": 4.5220990642844385e-05, "loss": 0.4275, "step": 60790 }, { "epoch": 1.5431965579825107, "grad_norm": 0.3359375, "learning_rate": 4.519721615531674e-05, "loss": 0.3998, "step": 60795 }, { "epoch": 1.5433234760315264, "grad_norm": 0.34375, "learning_rate": 4.517344681041995e-05, "loss": 0.3975, "step": 60800 }, { "epoch": 1.5434503940805422, "grad_norm": 0.337890625, "learning_rate": 4.5149682609320364e-05, "loss": 0.4199, "step": 60805 }, { "epoch": 1.543577312129558, "grad_norm": 0.36328125, "learning_rate": 4.512592355318408e-05, "loss": 0.4261, "step": 60810 }, { "epoch": 1.5437042301785737, "grad_norm": 0.341796875, "learning_rate": 4.510216964317698e-05, "loss": 0.3736, "step": 60815 }, { "epoch": 1.5438311482275895, "grad_norm": 0.380859375, "learning_rate": 4.507842088046459e-05, "loss": 0.4445, "step": 60820 }, { "epoch": 1.543958066276605, "grad_norm": 0.353515625, "learning_rate": 4.505467726621229e-05, "loss": 0.4133, "step": 60825 }, { "epoch": 1.5440849843256208, "grad_norm": 0.328125, "learning_rate": 4.5030938801585184e-05, "loss": 0.3983, "step": 60830 }, { "epoch": 1.5442119023746366, "grad_norm": 0.33203125, "learning_rate": 4.500720548774812e-05, "loss": 0.4138, "step": 60835 }, { "epoch": 1.5443388204236523, "grad_norm": 0.345703125, "learning_rate": 4.498347732586564e-05, "loss": 0.4118, "step": 60840 }, { "epoch": 1.544465738472668, "grad_norm": 0.341796875, "learning_rate": 4.4959754317102115e-05, "loss": 0.3969, "step": 60845 }, { "epoch": 1.5445926565216839, "grad_norm": 0.34375, "learning_rate": 4.493603646262159e-05, "loss": 0.4171, "step": 60850 }, { "epoch": 1.5447195745706996, "grad_norm": 0.3359375, "learning_rate": 4.4912323763587866e-05, "loss": 0.3895, "step": 60855 }, { "epoch": 1.5448464926197154, "grad_norm": 0.34375, "learning_rate": 4.48886162211646e-05, "loss": 0.4312, "step": 60860 }, { "epoch": 1.5449734106687312, "grad_norm": 0.36328125, "learning_rate": 4.4864913836515055e-05, "loss": 0.4215, "step": 60865 }, { "epoch": 1.545100328717747, "grad_norm": 0.337890625, "learning_rate": 4.484121661080232e-05, "loss": 0.4208, "step": 60870 }, { "epoch": 1.5452272467667627, "grad_norm": 0.361328125, "learning_rate": 4.481752454518919e-05, "loss": 0.413, "step": 60875 }, { "epoch": 1.5453541648157785, "grad_norm": 0.33984375, "learning_rate": 4.479383764083824e-05, "loss": 0.4072, "step": 60880 }, { "epoch": 1.5454810828647942, "grad_norm": 0.34375, "learning_rate": 4.477015589891176e-05, "loss": 0.4356, "step": 60885 }, { "epoch": 1.54560800091381, "grad_norm": 0.361328125, "learning_rate": 4.474647932057182e-05, "loss": 0.4298, "step": 60890 }, { "epoch": 1.5457349189628258, "grad_norm": 0.357421875, "learning_rate": 4.472280790698021e-05, "loss": 0.3902, "step": 60895 }, { "epoch": 1.5458618370118415, "grad_norm": 0.34765625, "learning_rate": 4.4699141659298434e-05, "loss": 0.4202, "step": 60900 }, { "epoch": 1.5459887550608573, "grad_norm": 0.341796875, "learning_rate": 4.467548057868788e-05, "loss": 0.4133, "step": 60905 }, { "epoch": 1.546115673109873, "grad_norm": 0.361328125, "learning_rate": 4.465182466630954e-05, "loss": 0.4328, "step": 60910 }, { "epoch": 1.5462425911588888, "grad_norm": 0.359375, "learning_rate": 4.462817392332414e-05, "loss": 0.4182, "step": 60915 }, { "epoch": 1.5463695092079046, "grad_norm": 0.3515625, "learning_rate": 4.460452835089233e-05, "loss": 0.4141, "step": 60920 }, { "epoch": 1.5464964272569202, "grad_norm": 0.34765625, "learning_rate": 4.458088795017432e-05, "loss": 0.4075, "step": 60925 }, { "epoch": 1.546623345305936, "grad_norm": 0.3515625, "learning_rate": 4.455725272233016e-05, "loss": 0.4332, "step": 60930 }, { "epoch": 1.5467502633549517, "grad_norm": 0.33203125, "learning_rate": 4.4533622668519605e-05, "loss": 0.4085, "step": 60935 }, { "epoch": 1.5468771814039675, "grad_norm": 0.345703125, "learning_rate": 4.450999778990217e-05, "loss": 0.4182, "step": 60940 }, { "epoch": 1.5470040994529832, "grad_norm": 0.357421875, "learning_rate": 4.448637808763711e-05, "loss": 0.4241, "step": 60945 }, { "epoch": 1.547131017501999, "grad_norm": 0.359375, "learning_rate": 4.446276356288346e-05, "loss": 0.4047, "step": 60950 }, { "epoch": 1.5472579355510148, "grad_norm": 0.38671875, "learning_rate": 4.4439154216799974e-05, "loss": 0.4239, "step": 60955 }, { "epoch": 1.5473848536000303, "grad_norm": 0.341796875, "learning_rate": 4.441555005054511e-05, "loss": 0.4123, "step": 60960 }, { "epoch": 1.547511771649046, "grad_norm": 0.33984375, "learning_rate": 4.4391951065277174e-05, "loss": 0.4377, "step": 60965 }, { "epoch": 1.5476386896980618, "grad_norm": 0.369140625, "learning_rate": 4.436835726215412e-05, "loss": 0.4162, "step": 60970 }, { "epoch": 1.5477656077470776, "grad_norm": 0.326171875, "learning_rate": 4.43447686423337e-05, "loss": 0.415, "step": 60975 }, { "epoch": 1.5478925257960934, "grad_norm": 0.337890625, "learning_rate": 4.4321185206973345e-05, "loss": 0.3971, "step": 60980 }, { "epoch": 1.5480194438451091, "grad_norm": 0.359375, "learning_rate": 4.4297606957230386e-05, "loss": 0.4353, "step": 60985 }, { "epoch": 1.548146361894125, "grad_norm": 0.3359375, "learning_rate": 4.4274033894261756e-05, "loss": 0.4046, "step": 60990 }, { "epoch": 1.5482732799431407, "grad_norm": 0.37890625, "learning_rate": 4.425046601922411e-05, "loss": 0.4226, "step": 60995 }, { "epoch": 1.5484001979921564, "grad_norm": 0.333984375, "learning_rate": 4.422690333327403e-05, "loss": 0.3848, "step": 61000 }, { "epoch": 1.5485271160411722, "grad_norm": 0.3671875, "learning_rate": 4.420334583756766e-05, "loss": 0.4095, "step": 61005 }, { "epoch": 1.548654034090188, "grad_norm": 0.353515625, "learning_rate": 4.417979353326096e-05, "loss": 0.3792, "step": 61010 }, { "epoch": 1.5487809521392037, "grad_norm": 0.365234375, "learning_rate": 4.415624642150965e-05, "loss": 0.4245, "step": 61015 }, { "epoch": 1.5489078701882195, "grad_norm": 0.353515625, "learning_rate": 4.4132704503469156e-05, "loss": 0.431, "step": 61020 }, { "epoch": 1.5490347882372353, "grad_norm": 0.34765625, "learning_rate": 4.4109167780294704e-05, "loss": 0.4279, "step": 61025 }, { "epoch": 1.549161706286251, "grad_norm": 0.318359375, "learning_rate": 4.408563625314118e-05, "loss": 0.4123, "step": 61030 }, { "epoch": 1.5492886243352668, "grad_norm": 0.328125, "learning_rate": 4.406210992316331e-05, "loss": 0.393, "step": 61035 }, { "epoch": 1.5494155423842826, "grad_norm": 0.345703125, "learning_rate": 4.4038588791515504e-05, "loss": 0.3906, "step": 61040 }, { "epoch": 1.5495424604332984, "grad_norm": 0.361328125, "learning_rate": 4.40150728593519e-05, "loss": 0.4077, "step": 61045 }, { "epoch": 1.5496693784823141, "grad_norm": 0.3359375, "learning_rate": 4.399156212782648e-05, "loss": 0.4675, "step": 61050 }, { "epoch": 1.5497962965313299, "grad_norm": 0.333984375, "learning_rate": 4.396805659809288e-05, "loss": 0.4145, "step": 61055 }, { "epoch": 1.5499232145803454, "grad_norm": 0.333984375, "learning_rate": 4.394455627130453e-05, "loss": 0.3957, "step": 61060 }, { "epoch": 1.5500501326293612, "grad_norm": 0.330078125, "learning_rate": 4.3921061148614535e-05, "loss": 0.4172, "step": 61065 }, { "epoch": 1.550177050678377, "grad_norm": 0.365234375, "learning_rate": 4.389757123117578e-05, "loss": 0.4325, "step": 61070 }, { "epoch": 1.5503039687273927, "grad_norm": 0.34375, "learning_rate": 4.3874086520140973e-05, "loss": 0.4031, "step": 61075 }, { "epoch": 1.5504308867764085, "grad_norm": 0.341796875, "learning_rate": 4.385060701666248e-05, "loss": 0.4054, "step": 61080 }, { "epoch": 1.5505578048254243, "grad_norm": 0.34375, "learning_rate": 4.382713272189241e-05, "loss": 0.4352, "step": 61085 }, { "epoch": 1.5506847228744398, "grad_norm": 0.380859375, "learning_rate": 4.380366363698264e-05, "loss": 0.4166, "step": 61090 }, { "epoch": 1.5508116409234556, "grad_norm": 0.31640625, "learning_rate": 4.3780199763084795e-05, "loss": 0.377, "step": 61095 }, { "epoch": 1.5509385589724713, "grad_norm": 0.384765625, "learning_rate": 4.375674110135025e-05, "loss": 0.4167, "step": 61100 }, { "epoch": 1.5510654770214871, "grad_norm": 0.353515625, "learning_rate": 4.3733287652930036e-05, "loss": 0.4451, "step": 61105 }, { "epoch": 1.5511923950705029, "grad_norm": 0.328125, "learning_rate": 4.3709839418975116e-05, "loss": 0.4066, "step": 61110 }, { "epoch": 1.5513193131195187, "grad_norm": 0.375, "learning_rate": 4.368639640063604e-05, "loss": 0.431, "step": 61115 }, { "epoch": 1.5514462311685344, "grad_norm": 0.341796875, "learning_rate": 4.366295859906314e-05, "loss": 0.4178, "step": 61120 }, { "epoch": 1.5515731492175502, "grad_norm": 0.365234375, "learning_rate": 4.363952601540651e-05, "loss": 0.4183, "step": 61125 }, { "epoch": 1.551700067266566, "grad_norm": 0.34375, "learning_rate": 4.361609865081596e-05, "loss": 0.406, "step": 61130 }, { "epoch": 1.5518269853155817, "grad_norm": 0.34375, "learning_rate": 4.359267650644109e-05, "loss": 0.4223, "step": 61135 }, { "epoch": 1.5519539033645975, "grad_norm": 0.365234375, "learning_rate": 4.356925958343117e-05, "loss": 0.4101, "step": 61140 }, { "epoch": 1.5520808214136133, "grad_norm": 0.3671875, "learning_rate": 4.354584788293526e-05, "loss": 0.4492, "step": 61145 }, { "epoch": 1.552207739462629, "grad_norm": 0.353515625, "learning_rate": 4.352244140610223e-05, "loss": 0.4172, "step": 61150 }, { "epoch": 1.5523346575116448, "grad_norm": 0.3046875, "learning_rate": 4.3499040154080586e-05, "loss": 0.4245, "step": 61155 }, { "epoch": 1.5524615755606606, "grad_norm": 0.373046875, "learning_rate": 4.347564412801863e-05, "loss": 0.4426, "step": 61160 }, { "epoch": 1.5525884936096763, "grad_norm": 0.365234375, "learning_rate": 4.345225332906436e-05, "loss": 0.4306, "step": 61165 }, { "epoch": 1.552715411658692, "grad_norm": 0.359375, "learning_rate": 4.342886775836554e-05, "loss": 0.4293, "step": 61170 }, { "epoch": 1.5528423297077079, "grad_norm": 0.349609375, "learning_rate": 4.340548741706978e-05, "loss": 0.4422, "step": 61175 }, { "epoch": 1.5529692477567236, "grad_norm": 0.341796875, "learning_rate": 4.338211230632429e-05, "loss": 0.4159, "step": 61180 }, { "epoch": 1.5530961658057394, "grad_norm": 0.37890625, "learning_rate": 4.335874242727608e-05, "loss": 0.4373, "step": 61185 }, { "epoch": 1.553223083854755, "grad_norm": 0.361328125, "learning_rate": 4.333537778107189e-05, "loss": 0.4126, "step": 61190 }, { "epoch": 1.5533500019037707, "grad_norm": 0.373046875, "learning_rate": 4.3312018368858234e-05, "loss": 0.4148, "step": 61195 }, { "epoch": 1.5534769199527865, "grad_norm": 0.345703125, "learning_rate": 4.328866419178134e-05, "loss": 0.3951, "step": 61200 }, { "epoch": 1.5536038380018022, "grad_norm": 0.33984375, "learning_rate": 4.3265315250987174e-05, "loss": 0.3985, "step": 61205 }, { "epoch": 1.553730756050818, "grad_norm": 0.34375, "learning_rate": 4.3241971547621475e-05, "loss": 0.4167, "step": 61210 }, { "epoch": 1.5538576740998338, "grad_norm": 0.365234375, "learning_rate": 4.3218633082829716e-05, "loss": 0.4071, "step": 61215 }, { "epoch": 1.5539845921488495, "grad_norm": 0.353515625, "learning_rate": 4.3195299857757096e-05, "loss": 0.4149, "step": 61220 }, { "epoch": 1.554111510197865, "grad_norm": 0.3671875, "learning_rate": 4.3171971873548514e-05, "loss": 0.4212, "step": 61225 }, { "epoch": 1.5542384282468809, "grad_norm": 0.35546875, "learning_rate": 4.3148649131348765e-05, "loss": 0.4412, "step": 61230 }, { "epoch": 1.5543653462958966, "grad_norm": 0.37890625, "learning_rate": 4.312533163230223e-05, "loss": 0.4356, "step": 61235 }, { "epoch": 1.5544922643449124, "grad_norm": 0.3359375, "learning_rate": 4.310201937755307e-05, "loss": 0.417, "step": 61240 }, { "epoch": 1.5546191823939282, "grad_norm": 0.349609375, "learning_rate": 4.3078712368245274e-05, "loss": 0.4035, "step": 61245 }, { "epoch": 1.554746100442944, "grad_norm": 0.3515625, "learning_rate": 4.3055410605522484e-05, "loss": 0.4066, "step": 61250 }, { "epoch": 1.5548730184919597, "grad_norm": 0.3359375, "learning_rate": 4.303211409052807e-05, "loss": 0.4114, "step": 61255 }, { "epoch": 1.5549999365409755, "grad_norm": 0.390625, "learning_rate": 4.3008822824405235e-05, "loss": 0.4179, "step": 61260 }, { "epoch": 1.5551268545899912, "grad_norm": 0.345703125, "learning_rate": 4.2985536808296825e-05, "loss": 0.4262, "step": 61265 }, { "epoch": 1.555253772639007, "grad_norm": 0.34375, "learning_rate": 4.29622560433455e-05, "loss": 0.4091, "step": 61270 }, { "epoch": 1.5553806906880228, "grad_norm": 0.373046875, "learning_rate": 4.293898053069364e-05, "loss": 0.4222, "step": 61275 }, { "epoch": 1.5555076087370385, "grad_norm": 0.33203125, "learning_rate": 4.2915710271483364e-05, "loss": 0.4032, "step": 61280 }, { "epoch": 1.5556345267860543, "grad_norm": 0.3671875, "learning_rate": 4.2892445266856516e-05, "loss": 0.4271, "step": 61285 }, { "epoch": 1.55576144483507, "grad_norm": 0.357421875, "learning_rate": 4.286918551795471e-05, "loss": 0.4221, "step": 61290 }, { "epoch": 1.5558883628840858, "grad_norm": 0.341796875, "learning_rate": 4.284593102591927e-05, "loss": 0.3923, "step": 61295 }, { "epoch": 1.5560152809331016, "grad_norm": 0.359375, "learning_rate": 4.282268179189133e-05, "loss": 0.4111, "step": 61300 }, { "epoch": 1.5561421989821174, "grad_norm": 0.365234375, "learning_rate": 4.279943781701173e-05, "loss": 0.4041, "step": 61305 }, { "epoch": 1.5562691170311331, "grad_norm": 0.365234375, "learning_rate": 4.2776199102421004e-05, "loss": 0.3978, "step": 61310 }, { "epoch": 1.556396035080149, "grad_norm": 0.357421875, "learning_rate": 4.275296564925942e-05, "loss": 0.4255, "step": 61315 }, { "epoch": 1.5565229531291644, "grad_norm": 0.345703125, "learning_rate": 4.272973745866715e-05, "loss": 0.4079, "step": 61320 }, { "epoch": 1.5566498711781802, "grad_norm": 0.349609375, "learning_rate": 4.270651453178393e-05, "loss": 0.4135, "step": 61325 }, { "epoch": 1.556776789227196, "grad_norm": 0.33203125, "learning_rate": 4.2683296869749294e-05, "loss": 0.4264, "step": 61330 }, { "epoch": 1.5569037072762117, "grad_norm": 0.310546875, "learning_rate": 4.266008447370253e-05, "loss": 0.4208, "step": 61335 }, { "epoch": 1.5570306253252275, "grad_norm": 0.345703125, "learning_rate": 4.2636877344782676e-05, "loss": 0.4034, "step": 61340 }, { "epoch": 1.5571575433742433, "grad_norm": 0.361328125, "learning_rate": 4.261367548412847e-05, "loss": 0.4145, "step": 61345 }, { "epoch": 1.557284461423259, "grad_norm": 0.341796875, "learning_rate": 4.259047889287844e-05, "loss": 0.4147, "step": 61350 }, { "epoch": 1.5574113794722746, "grad_norm": 0.60546875, "learning_rate": 4.256728757217083e-05, "loss": 0.3913, "step": 61355 }, { "epoch": 1.5575382975212904, "grad_norm": 0.341796875, "learning_rate": 4.254410152314356e-05, "loss": 0.4038, "step": 61360 }, { "epoch": 1.5576652155703061, "grad_norm": 0.353515625, "learning_rate": 4.2520920746934486e-05, "loss": 0.4171, "step": 61365 }, { "epoch": 1.557792133619322, "grad_norm": 0.349609375, "learning_rate": 4.2497745244681006e-05, "loss": 0.4129, "step": 61370 }, { "epoch": 1.5579190516683377, "grad_norm": 0.361328125, "learning_rate": 4.2474575017520355e-05, "loss": 0.42, "step": 61375 }, { "epoch": 1.5580459697173534, "grad_norm": 0.34765625, "learning_rate": 4.2451410066589474e-05, "loss": 0.4484, "step": 61380 }, { "epoch": 1.5581728877663692, "grad_norm": 0.322265625, "learning_rate": 4.242825039302507e-05, "loss": 0.4233, "step": 61385 }, { "epoch": 1.558299805815385, "grad_norm": 0.345703125, "learning_rate": 4.24050959979635e-05, "loss": 0.4259, "step": 61390 }, { "epoch": 1.5584267238644007, "grad_norm": 0.35546875, "learning_rate": 4.238194688254107e-05, "loss": 0.4093, "step": 61395 }, { "epoch": 1.5585536419134165, "grad_norm": 0.34765625, "learning_rate": 4.235880304789365e-05, "loss": 0.4114, "step": 61400 }, { "epoch": 1.5586805599624323, "grad_norm": 0.41015625, "learning_rate": 4.2335664495156864e-05, "loss": 0.455, "step": 61405 }, { "epoch": 1.558807478011448, "grad_norm": 0.349609375, "learning_rate": 4.231253122546615e-05, "loss": 0.4147, "step": 61410 }, { "epoch": 1.5589343960604638, "grad_norm": 0.32421875, "learning_rate": 4.228940323995662e-05, "loss": 0.4023, "step": 61415 }, { "epoch": 1.5590613141094796, "grad_norm": 0.361328125, "learning_rate": 4.226628053976313e-05, "loss": 0.4262, "step": 61420 }, { "epoch": 1.5591882321584953, "grad_norm": 0.337890625, "learning_rate": 4.224316312602037e-05, "loss": 0.4007, "step": 61425 }, { "epoch": 1.559315150207511, "grad_norm": 0.38671875, "learning_rate": 4.222005099986269e-05, "loss": 0.4232, "step": 61430 }, { "epoch": 1.5594420682565269, "grad_norm": 0.341796875, "learning_rate": 4.219694416242416e-05, "loss": 0.4028, "step": 61435 }, { "epoch": 1.5595689863055426, "grad_norm": 0.337890625, "learning_rate": 4.217384261483865e-05, "loss": 0.4321, "step": 61440 }, { "epoch": 1.5596959043545584, "grad_norm": 0.345703125, "learning_rate": 4.2150746358239726e-05, "loss": 0.4039, "step": 61445 }, { "epoch": 1.5598228224035742, "grad_norm": 0.3515625, "learning_rate": 4.212765539376073e-05, "loss": 0.4145, "step": 61450 }, { "epoch": 1.5599497404525897, "grad_norm": 0.349609375, "learning_rate": 4.2104569722534695e-05, "loss": 0.4165, "step": 61455 }, { "epoch": 1.5600766585016055, "grad_norm": 0.546875, "learning_rate": 4.2081489345694444e-05, "loss": 0.4486, "step": 61460 }, { "epoch": 1.5602035765506213, "grad_norm": 0.3515625, "learning_rate": 4.205841426437254e-05, "loss": 0.4183, "step": 61465 }, { "epoch": 1.560330494599637, "grad_norm": 0.3359375, "learning_rate": 4.203534447970119e-05, "loss": 0.4045, "step": 61470 }, { "epoch": 1.5604574126486528, "grad_norm": 0.337890625, "learning_rate": 4.201227999281253e-05, "loss": 0.424, "step": 61475 }, { "epoch": 1.5605843306976686, "grad_norm": 0.3515625, "learning_rate": 4.1989220804838275e-05, "loss": 0.4259, "step": 61480 }, { "epoch": 1.5607112487466843, "grad_norm": 0.3828125, "learning_rate": 4.196616691690989e-05, "loss": 0.4444, "step": 61485 }, { "epoch": 1.5608381667956999, "grad_norm": 0.35546875, "learning_rate": 4.1943118330158695e-05, "loss": 0.4316, "step": 61490 }, { "epoch": 1.5609650848447156, "grad_norm": 0.361328125, "learning_rate": 4.192007504571563e-05, "loss": 0.4213, "step": 61495 }, { "epoch": 1.5610920028937314, "grad_norm": 0.349609375, "learning_rate": 4.189703706471146e-05, "loss": 0.4086, "step": 61500 }, { "epoch": 1.5612189209427472, "grad_norm": 0.37890625, "learning_rate": 4.18740043882766e-05, "loss": 0.422, "step": 61505 }, { "epoch": 1.561345838991763, "grad_norm": 0.337890625, "learning_rate": 4.185097701754127e-05, "loss": 0.4116, "step": 61510 }, { "epoch": 1.5614727570407787, "grad_norm": 0.36328125, "learning_rate": 4.182795495363543e-05, "loss": 0.4147, "step": 61515 }, { "epoch": 1.5615996750897945, "grad_norm": 0.357421875, "learning_rate": 4.1804938197688756e-05, "loss": 0.4177, "step": 61520 }, { "epoch": 1.5617265931388102, "grad_norm": 0.33984375, "learning_rate": 4.178192675083066e-05, "loss": 0.3989, "step": 61525 }, { "epoch": 1.561853511187826, "grad_norm": 0.361328125, "learning_rate": 4.17589206141903e-05, "loss": 0.4196, "step": 61530 }, { "epoch": 1.5619804292368418, "grad_norm": 0.37109375, "learning_rate": 4.173591978889659e-05, "loss": 0.4148, "step": 61535 }, { "epoch": 1.5621073472858575, "grad_norm": 0.34765625, "learning_rate": 4.171292427607818e-05, "loss": 0.4295, "step": 61540 }, { "epoch": 1.5622342653348733, "grad_norm": 0.376953125, "learning_rate": 4.168993407686338e-05, "loss": 0.4154, "step": 61545 }, { "epoch": 1.562361183383889, "grad_norm": 0.333984375, "learning_rate": 4.166694919238044e-05, "loss": 0.4026, "step": 61550 }, { "epoch": 1.5624881014329048, "grad_norm": 0.3515625, "learning_rate": 4.164396962375713e-05, "loss": 0.4309, "step": 61555 }, { "epoch": 1.5626150194819206, "grad_norm": 0.357421875, "learning_rate": 4.1620995372121035e-05, "loss": 0.4014, "step": 61560 }, { "epoch": 1.5627419375309364, "grad_norm": 0.345703125, "learning_rate": 4.159802643859956e-05, "loss": 0.4115, "step": 61565 }, { "epoch": 1.5628688555799521, "grad_norm": 0.34765625, "learning_rate": 4.157506282431976e-05, "loss": 0.4139, "step": 61570 }, { "epoch": 1.562995773628968, "grad_norm": 0.314453125, "learning_rate": 4.155210453040842e-05, "loss": 0.4087, "step": 61575 }, { "epoch": 1.5631226916779837, "grad_norm": 0.32421875, "learning_rate": 4.152915155799212e-05, "loss": 0.3944, "step": 61580 }, { "epoch": 1.5632496097269992, "grad_norm": 0.357421875, "learning_rate": 4.1506203908197136e-05, "loss": 0.4469, "step": 61585 }, { "epoch": 1.563376527776015, "grad_norm": 0.33984375, "learning_rate": 4.148326158214953e-05, "loss": 0.3981, "step": 61590 }, { "epoch": 1.5635034458250308, "grad_norm": 0.34375, "learning_rate": 4.146032458097504e-05, "loss": 0.4414, "step": 61595 }, { "epoch": 1.5636303638740465, "grad_norm": 0.36328125, "learning_rate": 4.143739290579919e-05, "loss": 0.4237, "step": 61600 }, { "epoch": 1.5637572819230623, "grad_norm": 0.341796875, "learning_rate": 4.141446655774721e-05, "loss": 0.4246, "step": 61605 }, { "epoch": 1.563884199972078, "grad_norm": 0.357421875, "learning_rate": 4.1391545537944074e-05, "loss": 0.4289, "step": 61610 }, { "epoch": 1.5640111180210938, "grad_norm": 0.31640625, "learning_rate": 4.1368629847514565e-05, "loss": 0.4373, "step": 61615 }, { "epoch": 1.5641380360701094, "grad_norm": 0.328125, "learning_rate": 4.134571948758314e-05, "loss": 0.4357, "step": 61620 }, { "epoch": 1.5642649541191251, "grad_norm": 0.318359375, "learning_rate": 4.1322814459273965e-05, "loss": 0.4086, "step": 61625 }, { "epoch": 1.564391872168141, "grad_norm": 0.349609375, "learning_rate": 4.1299914763710986e-05, "loss": 0.4219, "step": 61630 }, { "epoch": 1.5645187902171567, "grad_norm": 0.37890625, "learning_rate": 4.12770204020179e-05, "loss": 0.4282, "step": 61635 }, { "epoch": 1.5646457082661724, "grad_norm": 0.349609375, "learning_rate": 4.125413137531807e-05, "loss": 0.4016, "step": 61640 }, { "epoch": 1.5647726263151882, "grad_norm": 0.359375, "learning_rate": 4.1231247684734734e-05, "loss": 0.4337, "step": 61645 }, { "epoch": 1.564899544364204, "grad_norm": 0.34375, "learning_rate": 4.120836933139074e-05, "loss": 0.4025, "step": 61650 }, { "epoch": 1.5650264624132197, "grad_norm": 0.37109375, "learning_rate": 4.118549631640875e-05, "loss": 0.4259, "step": 61655 }, { "epoch": 1.5651533804622355, "grad_norm": 0.3671875, "learning_rate": 4.11626286409111e-05, "loss": 0.4275, "step": 61660 }, { "epoch": 1.5652802985112513, "grad_norm": 0.34375, "learning_rate": 4.1139766306019894e-05, "loss": 0.416, "step": 61665 }, { "epoch": 1.565407216560267, "grad_norm": 0.330078125, "learning_rate": 4.1116909312857e-05, "loss": 0.4184, "step": 61670 }, { "epoch": 1.5655341346092828, "grad_norm": 0.3671875, "learning_rate": 4.109405766254395e-05, "loss": 0.4058, "step": 61675 }, { "epoch": 1.5656610526582986, "grad_norm": 0.353515625, "learning_rate": 4.1071211356202166e-05, "loss": 0.4117, "step": 61680 }, { "epoch": 1.5657879707073143, "grad_norm": 0.33984375, "learning_rate": 4.1048370394952635e-05, "loss": 0.386, "step": 61685 }, { "epoch": 1.5659148887563301, "grad_norm": 0.345703125, "learning_rate": 4.102553477991617e-05, "loss": 0.4353, "step": 61690 }, { "epoch": 1.5660418068053459, "grad_norm": 0.36328125, "learning_rate": 4.100270451221331e-05, "loss": 0.3793, "step": 61695 }, { "epoch": 1.5661687248543616, "grad_norm": 0.345703125, "learning_rate": 4.0979879592964323e-05, "loss": 0.4056, "step": 61700 }, { "epoch": 1.5662956429033774, "grad_norm": 0.373046875, "learning_rate": 4.095706002328922e-05, "loss": 0.4438, "step": 61705 }, { "epoch": 1.5664225609523932, "grad_norm": 0.34375, "learning_rate": 4.093424580430773e-05, "loss": 0.4136, "step": 61710 }, { "epoch": 1.566549479001409, "grad_norm": 0.376953125, "learning_rate": 4.091143693713931e-05, "loss": 0.4269, "step": 61715 }, { "epoch": 1.5666763970504245, "grad_norm": 0.3359375, "learning_rate": 4.0888633422903287e-05, "loss": 0.4048, "step": 61720 }, { "epoch": 1.5668033150994403, "grad_norm": 0.3359375, "learning_rate": 4.086583526271855e-05, "loss": 0.3862, "step": 61725 }, { "epoch": 1.566930233148456, "grad_norm": 0.29296875, "learning_rate": 4.08430424577038e-05, "loss": 0.3998, "step": 61730 }, { "epoch": 1.5670571511974718, "grad_norm": 0.3671875, "learning_rate": 4.0820255008977434e-05, "loss": 0.4301, "step": 61735 }, { "epoch": 1.5671840692464876, "grad_norm": 0.341796875, "learning_rate": 4.0797472917657715e-05, "loss": 0.416, "step": 61740 }, { "epoch": 1.5673109872955033, "grad_norm": 0.35546875, "learning_rate": 4.0774696184862485e-05, "loss": 0.423, "step": 61745 }, { "epoch": 1.5674379053445189, "grad_norm": 0.3515625, "learning_rate": 4.075192481170941e-05, "loss": 0.4208, "step": 61750 }, { "epoch": 1.5675648233935346, "grad_norm": 0.333984375, "learning_rate": 4.072915879931586e-05, "loss": 0.3956, "step": 61755 }, { "epoch": 1.5676917414425504, "grad_norm": 0.33984375, "learning_rate": 4.070639814879898e-05, "loss": 0.411, "step": 61760 }, { "epoch": 1.5678186594915662, "grad_norm": 0.34375, "learning_rate": 4.0683642861275596e-05, "loss": 0.4108, "step": 61765 }, { "epoch": 1.567945577540582, "grad_norm": 0.373046875, "learning_rate": 4.066089293786232e-05, "loss": 0.41, "step": 61770 }, { "epoch": 1.5680724955895977, "grad_norm": 0.34375, "learning_rate": 4.063814837967545e-05, "loss": 0.4113, "step": 61775 }, { "epoch": 1.5681994136386135, "grad_norm": 0.34765625, "learning_rate": 4.061540918783109e-05, "loss": 0.4103, "step": 61780 }, { "epoch": 1.5683263316876292, "grad_norm": 0.337890625, "learning_rate": 4.059267536344504e-05, "loss": 0.4465, "step": 61785 }, { "epoch": 1.568453249736645, "grad_norm": 0.357421875, "learning_rate": 4.056994690763277e-05, "loss": 0.4179, "step": 61790 }, { "epoch": 1.5685801677856608, "grad_norm": 0.353515625, "learning_rate": 4.054722382150967e-05, "loss": 0.4252, "step": 61795 }, { "epoch": 1.5687070858346766, "grad_norm": 0.404296875, "learning_rate": 4.052450610619068e-05, "loss": 0.4461, "step": 61800 }, { "epoch": 1.5688340038836923, "grad_norm": 0.357421875, "learning_rate": 4.050179376279053e-05, "loss": 0.3962, "step": 61805 }, { "epoch": 1.568960921932708, "grad_norm": 0.365234375, "learning_rate": 4.047908679242379e-05, "loss": 0.4458, "step": 61810 }, { "epoch": 1.5690878399817239, "grad_norm": 0.337890625, "learning_rate": 4.0456385196204625e-05, "loss": 0.4423, "step": 61815 }, { "epoch": 1.5692147580307396, "grad_norm": 0.328125, "learning_rate": 4.0433688975247016e-05, "loss": 0.3957, "step": 61820 }, { "epoch": 1.5693416760797554, "grad_norm": 0.345703125, "learning_rate": 4.041099813066465e-05, "loss": 0.4063, "step": 61825 }, { "epoch": 1.5694685941287712, "grad_norm": 0.33203125, "learning_rate": 4.038831266357093e-05, "loss": 0.3965, "step": 61830 }, { "epoch": 1.569595512177787, "grad_norm": 0.36328125, "learning_rate": 4.036563257507907e-05, "loss": 0.4199, "step": 61835 }, { "epoch": 1.5697224302268027, "grad_norm": 0.3359375, "learning_rate": 4.034295786630192e-05, "loss": 0.4146, "step": 61840 }, { "epoch": 1.5698493482758185, "grad_norm": 0.365234375, "learning_rate": 4.032028853835216e-05, "loss": 0.4459, "step": 61845 }, { "epoch": 1.569976266324834, "grad_norm": 0.3359375, "learning_rate": 4.0297624592342165e-05, "loss": 0.4219, "step": 61850 }, { "epoch": 1.5701031843738498, "grad_norm": 0.328125, "learning_rate": 4.0274966029384024e-05, "loss": 0.4385, "step": 61855 }, { "epoch": 1.5702301024228655, "grad_norm": 0.357421875, "learning_rate": 4.0252312850589544e-05, "loss": 0.4362, "step": 61860 }, { "epoch": 1.5703570204718813, "grad_norm": 0.3671875, "learning_rate": 4.0229665057070386e-05, "loss": 0.424, "step": 61865 }, { "epoch": 1.570483938520897, "grad_norm": 0.34765625, "learning_rate": 4.020702264993785e-05, "loss": 0.3866, "step": 61870 }, { "epoch": 1.5706108565699128, "grad_norm": 0.330078125, "learning_rate": 4.0184385630302986e-05, "loss": 0.4331, "step": 61875 }, { "epoch": 1.5707377746189286, "grad_norm": 0.337890625, "learning_rate": 4.016175399927656e-05, "loss": 0.3892, "step": 61880 }, { "epoch": 1.5708646926679442, "grad_norm": 0.353515625, "learning_rate": 4.013912775796907e-05, "loss": 0.3874, "step": 61885 }, { "epoch": 1.57099161071696, "grad_norm": 0.3515625, "learning_rate": 4.0116506907490884e-05, "loss": 0.4065, "step": 61890 }, { "epoch": 1.5711185287659757, "grad_norm": 0.3671875, "learning_rate": 4.0093891448951905e-05, "loss": 0.4479, "step": 61895 }, { "epoch": 1.5712454468149915, "grad_norm": 0.359375, "learning_rate": 4.0071281383461903e-05, "loss": 0.4191, "step": 61900 }, { "epoch": 1.5713723648640072, "grad_norm": 0.33984375, "learning_rate": 4.0048676712130336e-05, "loss": 0.4129, "step": 61905 }, { "epoch": 1.571499282913023, "grad_norm": 0.365234375, "learning_rate": 4.0026077436066415e-05, "loss": 0.423, "step": 61910 }, { "epoch": 1.5716262009620388, "grad_norm": 0.33984375, "learning_rate": 4.0003483556379044e-05, "loss": 0.4335, "step": 61915 }, { "epoch": 1.5717531190110545, "grad_norm": 0.376953125, "learning_rate": 3.998089507417693e-05, "loss": 0.4265, "step": 61920 }, { "epoch": 1.5718800370600703, "grad_norm": 0.33984375, "learning_rate": 3.9958311990568443e-05, "loss": 0.4397, "step": 61925 }, { "epoch": 1.572006955109086, "grad_norm": 0.33203125, "learning_rate": 3.993573430666177e-05, "loss": 0.435, "step": 61930 }, { "epoch": 1.5721338731581018, "grad_norm": 0.32421875, "learning_rate": 3.991316202356478e-05, "loss": 0.4103, "step": 61935 }, { "epoch": 1.5722607912071176, "grad_norm": 0.357421875, "learning_rate": 3.989059514238509e-05, "loss": 0.4234, "step": 61940 }, { "epoch": 1.5723877092561334, "grad_norm": 0.359375, "learning_rate": 3.9868033664230026e-05, "loss": 0.4234, "step": 61945 }, { "epoch": 1.5725146273051491, "grad_norm": 0.384765625, "learning_rate": 3.984547759020667e-05, "loss": 0.4368, "step": 61950 }, { "epoch": 1.572641545354165, "grad_norm": 0.353515625, "learning_rate": 3.982292692142187e-05, "loss": 0.4044, "step": 61955 }, { "epoch": 1.5727684634031807, "grad_norm": 0.333984375, "learning_rate": 3.980038165898209e-05, "loss": 0.4185, "step": 61960 }, { "epoch": 1.5728953814521964, "grad_norm": 0.3671875, "learning_rate": 3.9777841803993756e-05, "loss": 0.423, "step": 61965 }, { "epoch": 1.5730222995012122, "grad_norm": 0.33984375, "learning_rate": 3.9755307357562796e-05, "loss": 0.4377, "step": 61970 }, { "epoch": 1.573149217550228, "grad_norm": 0.3671875, "learning_rate": 3.973277832079499e-05, "loss": 0.4312, "step": 61975 }, { "epoch": 1.5732761355992437, "grad_norm": 0.37109375, "learning_rate": 3.971025469479584e-05, "loss": 0.4056, "step": 61980 }, { "epoch": 1.5734030536482593, "grad_norm": 0.36328125, "learning_rate": 3.968773648067055e-05, "loss": 0.4306, "step": 61985 }, { "epoch": 1.573529971697275, "grad_norm": 0.328125, "learning_rate": 3.966522367952406e-05, "loss": 0.4308, "step": 61990 }, { "epoch": 1.5736568897462908, "grad_norm": 0.353515625, "learning_rate": 3.964271629246112e-05, "loss": 0.4264, "step": 61995 }, { "epoch": 1.5737838077953066, "grad_norm": 0.359375, "learning_rate": 3.9620214320586143e-05, "loss": 0.453, "step": 62000 }, { "epoch": 1.5739107258443223, "grad_norm": 0.34375, "learning_rate": 3.959771776500328e-05, "loss": 0.4235, "step": 62005 }, { "epoch": 1.5740376438933381, "grad_norm": 0.341796875, "learning_rate": 3.9575226626816414e-05, "loss": 0.408, "step": 62010 }, { "epoch": 1.5741645619423537, "grad_norm": 0.361328125, "learning_rate": 3.95527409071292e-05, "loss": 0.4086, "step": 62015 }, { "epoch": 1.5742914799913694, "grad_norm": 0.341796875, "learning_rate": 3.9530260607045e-05, "loss": 0.4174, "step": 62020 }, { "epoch": 1.5744183980403852, "grad_norm": 0.33203125, "learning_rate": 3.95077857276669e-05, "loss": 0.388, "step": 62025 }, { "epoch": 1.574545316089401, "grad_norm": 0.3359375, "learning_rate": 3.9485316270097736e-05, "loss": 0.4158, "step": 62030 }, { "epoch": 1.5746722341384167, "grad_norm": 0.369140625, "learning_rate": 3.9462852235440045e-05, "loss": 0.4344, "step": 62035 }, { "epoch": 1.5747991521874325, "grad_norm": 0.36328125, "learning_rate": 3.9440393624796205e-05, "loss": 0.4242, "step": 62040 }, { "epoch": 1.5749260702364483, "grad_norm": 0.296875, "learning_rate": 3.941794043926821e-05, "loss": 0.382, "step": 62045 }, { "epoch": 1.575052988285464, "grad_norm": 0.341796875, "learning_rate": 3.939549267995778e-05, "loss": 0.4054, "step": 62050 }, { "epoch": 1.5751799063344798, "grad_norm": 0.3515625, "learning_rate": 3.937305034796652e-05, "loss": 0.4199, "step": 62055 }, { "epoch": 1.5753068243834956, "grad_norm": 0.330078125, "learning_rate": 3.935061344439561e-05, "loss": 0.3946, "step": 62060 }, { "epoch": 1.5754337424325113, "grad_norm": 0.376953125, "learning_rate": 3.9328181970346025e-05, "loss": 0.4334, "step": 62065 }, { "epoch": 1.575560660481527, "grad_norm": 0.353515625, "learning_rate": 3.930575592691846e-05, "loss": 0.4085, "step": 62070 }, { "epoch": 1.5756875785305429, "grad_norm": 0.359375, "learning_rate": 3.928333531521335e-05, "loss": 0.4359, "step": 62075 }, { "epoch": 1.5758144965795586, "grad_norm": 0.333984375, "learning_rate": 3.926092013633088e-05, "loss": 0.4218, "step": 62080 }, { "epoch": 1.5759414146285744, "grad_norm": 0.328125, "learning_rate": 3.9238510391370944e-05, "loss": 0.377, "step": 62085 }, { "epoch": 1.5760683326775902, "grad_norm": 0.345703125, "learning_rate": 3.921610608143319e-05, "loss": 0.4105, "step": 62090 }, { "epoch": 1.576195250726606, "grad_norm": 0.353515625, "learning_rate": 3.919370720761698e-05, "loss": 0.409, "step": 62095 }, { "epoch": 1.5763221687756217, "grad_norm": 0.359375, "learning_rate": 3.91713137710214e-05, "loss": 0.4414, "step": 62100 }, { "epoch": 1.5764490868246375, "grad_norm": 0.341796875, "learning_rate": 3.9148925772745334e-05, "loss": 0.4122, "step": 62105 }, { "epoch": 1.5765760048736532, "grad_norm": 0.34375, "learning_rate": 3.9126543213887315e-05, "loss": 0.4211, "step": 62110 }, { "epoch": 1.5767029229226688, "grad_norm": 0.345703125, "learning_rate": 3.91041660955456e-05, "loss": 0.4493, "step": 62115 }, { "epoch": 1.5768298409716845, "grad_norm": 0.328125, "learning_rate": 3.908179441881833e-05, "loss": 0.412, "step": 62120 }, { "epoch": 1.5769567590207003, "grad_norm": 0.333984375, "learning_rate": 3.905942818480321e-05, "loss": 0.4135, "step": 62125 }, { "epoch": 1.577083677069716, "grad_norm": 0.3359375, "learning_rate": 3.903706739459774e-05, "loss": 0.3941, "step": 62130 }, { "epoch": 1.5772105951187319, "grad_norm": 0.357421875, "learning_rate": 3.90147120492992e-05, "loss": 0.421, "step": 62135 }, { "epoch": 1.5773375131677476, "grad_norm": 0.34765625, "learning_rate": 3.899236215000453e-05, "loss": 0.4106, "step": 62140 }, { "epoch": 1.5774644312167634, "grad_norm": 0.3671875, "learning_rate": 3.8970017697810436e-05, "loss": 0.4278, "step": 62145 }, { "epoch": 1.577591349265779, "grad_norm": 0.369140625, "learning_rate": 3.894767869381335e-05, "loss": 0.4418, "step": 62150 }, { "epoch": 1.5777182673147947, "grad_norm": 0.330078125, "learning_rate": 3.892534513910943e-05, "loss": 0.4011, "step": 62155 }, { "epoch": 1.5778451853638105, "grad_norm": 0.35546875, "learning_rate": 3.890301703479457e-05, "loss": 0.4244, "step": 62160 }, { "epoch": 1.5779721034128262, "grad_norm": 0.333984375, "learning_rate": 3.8880694381964424e-05, "loss": 0.3903, "step": 62165 }, { "epoch": 1.578099021461842, "grad_norm": 0.36328125, "learning_rate": 3.885837718171433e-05, "loss": 0.4206, "step": 62170 }, { "epoch": 1.5782259395108578, "grad_norm": 0.3359375, "learning_rate": 3.8836065435139395e-05, "loss": 0.418, "step": 62175 }, { "epoch": 1.5783528575598735, "grad_norm": 0.36328125, "learning_rate": 3.88137591433344e-05, "loss": 0.4293, "step": 62180 }, { "epoch": 1.5784797756088893, "grad_norm": 0.33984375, "learning_rate": 3.879145830739401e-05, "loss": 0.3928, "step": 62185 }, { "epoch": 1.578606693657905, "grad_norm": 0.337890625, "learning_rate": 3.876916292841246e-05, "loss": 0.4046, "step": 62190 }, { "epoch": 1.5787336117069208, "grad_norm": 0.341796875, "learning_rate": 3.874687300748377e-05, "loss": 0.4222, "step": 62195 }, { "epoch": 1.5788605297559366, "grad_norm": 0.345703125, "learning_rate": 3.8724588545701714e-05, "loss": 0.4046, "step": 62200 }, { "epoch": 1.5789874478049524, "grad_norm": 0.369140625, "learning_rate": 3.870230954415972e-05, "loss": 0.4148, "step": 62205 }, { "epoch": 1.5791143658539681, "grad_norm": 0.35546875, "learning_rate": 3.868003600395112e-05, "loss": 0.4161, "step": 62210 }, { "epoch": 1.579241283902984, "grad_norm": 0.36328125, "learning_rate": 3.865776792616879e-05, "loss": 0.4327, "step": 62215 }, { "epoch": 1.5793682019519997, "grad_norm": 0.333984375, "learning_rate": 3.8635505311905456e-05, "loss": 0.4054, "step": 62220 }, { "epoch": 1.5794951200010154, "grad_norm": 0.345703125, "learning_rate": 3.8613248162253494e-05, "loss": 0.4051, "step": 62225 }, { "epoch": 1.5796220380500312, "grad_norm": 0.326171875, "learning_rate": 3.85909964783051e-05, "loss": 0.407, "step": 62230 }, { "epoch": 1.579748956099047, "grad_norm": 0.349609375, "learning_rate": 3.85687502611521e-05, "loss": 0.4186, "step": 62235 }, { "epoch": 1.5798758741480627, "grad_norm": 0.33984375, "learning_rate": 3.854650951188612e-05, "loss": 0.4103, "step": 62240 }, { "epoch": 1.5800027921970785, "grad_norm": 0.3125, "learning_rate": 3.8524274231598556e-05, "loss": 0.3911, "step": 62245 }, { "epoch": 1.580129710246094, "grad_norm": 0.35546875, "learning_rate": 3.850204442138044e-05, "loss": 0.4024, "step": 62250 }, { "epoch": 1.5802566282951098, "grad_norm": 0.349609375, "learning_rate": 3.84798200823226e-05, "loss": 0.4398, "step": 62255 }, { "epoch": 1.5803835463441256, "grad_norm": 0.3515625, "learning_rate": 3.845760121551556e-05, "loss": 0.4054, "step": 62260 }, { "epoch": 1.5805104643931414, "grad_norm": 0.353515625, "learning_rate": 3.84353878220496e-05, "loss": 0.4085, "step": 62265 }, { "epoch": 1.5806373824421571, "grad_norm": 0.328125, "learning_rate": 3.841317990301473e-05, "loss": 0.4332, "step": 62270 }, { "epoch": 1.580764300491173, "grad_norm": 0.34375, "learning_rate": 3.839097745950065e-05, "loss": 0.4393, "step": 62275 }, { "epoch": 1.5808912185401884, "grad_norm": 0.341796875, "learning_rate": 3.836878049259687e-05, "loss": 0.4297, "step": 62280 }, { "epoch": 1.5810181365892042, "grad_norm": 0.35546875, "learning_rate": 3.83465890033925e-05, "loss": 0.4279, "step": 62285 }, { "epoch": 1.58114505463822, "grad_norm": 0.375, "learning_rate": 3.832440299297659e-05, "loss": 0.4132, "step": 62290 }, { "epoch": 1.5812719726872357, "grad_norm": 0.35546875, "learning_rate": 3.830222246243774e-05, "loss": 0.3834, "step": 62295 }, { "epoch": 1.5813988907362515, "grad_norm": 0.359375, "learning_rate": 3.828004741286433e-05, "loss": 0.4119, "step": 62300 }, { "epoch": 1.5815258087852673, "grad_norm": 0.341796875, "learning_rate": 3.825787784534446e-05, "loss": 0.4306, "step": 62305 }, { "epoch": 1.581652726834283, "grad_norm": 0.328125, "learning_rate": 3.8235713760966044e-05, "loss": 0.4349, "step": 62310 }, { "epoch": 1.5817796448832988, "grad_norm": 0.37109375, "learning_rate": 3.821355516081664e-05, "loss": 0.4626, "step": 62315 }, { "epoch": 1.5819065629323146, "grad_norm": 0.33984375, "learning_rate": 3.819140204598355e-05, "loss": 0.4143, "step": 62320 }, { "epoch": 1.5820334809813303, "grad_norm": 0.330078125, "learning_rate": 3.816925441755384e-05, "loss": 0.4055, "step": 62325 }, { "epoch": 1.582160399030346, "grad_norm": 0.3671875, "learning_rate": 3.814711227661425e-05, "loss": 0.4365, "step": 62330 }, { "epoch": 1.5822873170793619, "grad_norm": 0.33984375, "learning_rate": 3.8124975624251305e-05, "loss": 0.4095, "step": 62335 }, { "epoch": 1.5824142351283776, "grad_norm": 0.345703125, "learning_rate": 3.8102844461551246e-05, "loss": 0.4548, "step": 62340 }, { "epoch": 1.5825411531773934, "grad_norm": 0.34375, "learning_rate": 3.808071878960004e-05, "loss": 0.4156, "step": 62345 }, { "epoch": 1.5826680712264092, "grad_norm": 0.36328125, "learning_rate": 3.805859860948339e-05, "loss": 0.4103, "step": 62350 }, { "epoch": 1.582794989275425, "grad_norm": 0.359375, "learning_rate": 3.80364839222867e-05, "loss": 0.4041, "step": 62355 }, { "epoch": 1.5829219073244407, "grad_norm": 0.373046875, "learning_rate": 3.8014374729095104e-05, "loss": 0.4162, "step": 62360 }, { "epoch": 1.5830488253734565, "grad_norm": 0.359375, "learning_rate": 3.799227103099357e-05, "loss": 0.4123, "step": 62365 }, { "epoch": 1.5831757434224722, "grad_norm": 0.384765625, "learning_rate": 3.797017282906669e-05, "loss": 0.4475, "step": 62370 }, { "epoch": 1.583302661471488, "grad_norm": 0.373046875, "learning_rate": 3.794808012439875e-05, "loss": 0.4255, "step": 62375 }, { "epoch": 1.5834295795205036, "grad_norm": 0.3359375, "learning_rate": 3.792599291807395e-05, "loss": 0.4137, "step": 62380 }, { "epoch": 1.5835564975695193, "grad_norm": 0.349609375, "learning_rate": 3.7903911211176005e-05, "loss": 0.4212, "step": 62385 }, { "epoch": 1.583683415618535, "grad_norm": 0.333984375, "learning_rate": 3.788183500478849e-05, "loss": 0.4351, "step": 62390 }, { "epoch": 1.5838103336675509, "grad_norm": 0.35546875, "learning_rate": 3.7859764299994677e-05, "loss": 0.3943, "step": 62395 }, { "epoch": 1.5839372517165666, "grad_norm": 0.369140625, "learning_rate": 3.783769909787756e-05, "loss": 0.3994, "step": 62400 }, { "epoch": 1.5840641697655824, "grad_norm": 0.349609375, "learning_rate": 3.781563939951988e-05, "loss": 0.4139, "step": 62405 }, { "epoch": 1.5841910878145982, "grad_norm": 0.34375, "learning_rate": 3.7793585206004073e-05, "loss": 0.4563, "step": 62410 }, { "epoch": 1.5843180058636137, "grad_norm": 0.353515625, "learning_rate": 3.777153651841234e-05, "loss": 0.4438, "step": 62415 }, { "epoch": 1.5844449239126295, "grad_norm": 0.3515625, "learning_rate": 3.7749493337826605e-05, "loss": 0.4426, "step": 62420 }, { "epoch": 1.5845718419616452, "grad_norm": 0.3828125, "learning_rate": 3.772745566532852e-05, "loss": 0.4352, "step": 62425 }, { "epoch": 1.584698760010661, "grad_norm": 0.3515625, "learning_rate": 3.7705423501999425e-05, "loss": 0.4073, "step": 62430 }, { "epoch": 1.5848256780596768, "grad_norm": 0.33203125, "learning_rate": 3.7683396848920504e-05, "loss": 0.4371, "step": 62435 }, { "epoch": 1.5849525961086925, "grad_norm": 0.330078125, "learning_rate": 3.766137570717258e-05, "loss": 0.4166, "step": 62440 }, { "epoch": 1.5850795141577083, "grad_norm": 0.357421875, "learning_rate": 3.763936007783617e-05, "loss": 0.4312, "step": 62445 }, { "epoch": 1.585206432206724, "grad_norm": 0.357421875, "learning_rate": 3.7617349961991585e-05, "loss": 0.4176, "step": 62450 }, { "epoch": 1.5853333502557398, "grad_norm": 0.361328125, "learning_rate": 3.759534536071889e-05, "loss": 0.421, "step": 62455 }, { "epoch": 1.5854602683047556, "grad_norm": 0.375, "learning_rate": 3.757334627509784e-05, "loss": 0.4304, "step": 62460 }, { "epoch": 1.5855871863537714, "grad_norm": 0.341796875, "learning_rate": 3.75513527062079e-05, "loss": 0.4212, "step": 62465 }, { "epoch": 1.5857141044027872, "grad_norm": 0.34765625, "learning_rate": 3.75293646551283e-05, "loss": 0.4131, "step": 62470 }, { "epoch": 1.585841022451803, "grad_norm": 0.345703125, "learning_rate": 3.7507382122937966e-05, "loss": 0.3956, "step": 62475 }, { "epoch": 1.5859679405008187, "grad_norm": 0.357421875, "learning_rate": 3.7485405110715574e-05, "loss": 0.4465, "step": 62480 }, { "epoch": 1.5860948585498345, "grad_norm": 0.36328125, "learning_rate": 3.7463433619539543e-05, "loss": 0.4411, "step": 62485 }, { "epoch": 1.5862217765988502, "grad_norm": 0.369140625, "learning_rate": 3.744146765048799e-05, "loss": 0.3778, "step": 62490 }, { "epoch": 1.586348694647866, "grad_norm": 0.369140625, "learning_rate": 3.741950720463874e-05, "loss": 0.4055, "step": 62495 }, { "epoch": 1.5864756126968818, "grad_norm": 0.34765625, "learning_rate": 3.7397552283069464e-05, "loss": 0.4117, "step": 62500 }, { "epoch": 1.5866025307458975, "grad_norm": 0.349609375, "learning_rate": 3.737560288685746e-05, "loss": 0.4155, "step": 62505 }, { "epoch": 1.5867294487949133, "grad_norm": 0.34375, "learning_rate": 3.735365901707976e-05, "loss": 0.4235, "step": 62510 }, { "epoch": 1.5868563668439288, "grad_norm": 0.341796875, "learning_rate": 3.733172067481311e-05, "loss": 0.3695, "step": 62515 }, { "epoch": 1.5869832848929446, "grad_norm": 0.333984375, "learning_rate": 3.730978786113408e-05, "loss": 0.4363, "step": 62520 }, { "epoch": 1.5871102029419604, "grad_norm": 0.37890625, "learning_rate": 3.728786057711885e-05, "loss": 0.4162, "step": 62525 }, { "epoch": 1.5872371209909761, "grad_norm": 0.337890625, "learning_rate": 3.726593882384337e-05, "loss": 0.4269, "step": 62530 }, { "epoch": 1.587364039039992, "grad_norm": 0.310546875, "learning_rate": 3.7244022602383416e-05, "loss": 0.3872, "step": 62535 }, { "epoch": 1.5874909570890077, "grad_norm": 0.361328125, "learning_rate": 3.722211191381434e-05, "loss": 0.4156, "step": 62540 }, { "epoch": 1.5876178751380232, "grad_norm": 0.328125, "learning_rate": 3.720020675921132e-05, "loss": 0.4015, "step": 62545 }, { "epoch": 1.587744793187039, "grad_norm": 0.326171875, "learning_rate": 3.717830713964921e-05, "loss": 0.4104, "step": 62550 }, { "epoch": 1.5878717112360548, "grad_norm": 0.33984375, "learning_rate": 3.71564130562026e-05, "loss": 0.4423, "step": 62555 }, { "epoch": 1.5879986292850705, "grad_norm": 0.365234375, "learning_rate": 3.7134524509945887e-05, "loss": 0.4061, "step": 62560 }, { "epoch": 1.5881255473340863, "grad_norm": 0.330078125, "learning_rate": 3.711264150195309e-05, "loss": 0.4175, "step": 62565 }, { "epoch": 1.588252465383102, "grad_norm": 0.349609375, "learning_rate": 3.709076403329802e-05, "loss": 0.4365, "step": 62570 }, { "epoch": 1.5883793834321178, "grad_norm": 0.3359375, "learning_rate": 3.7068892105054174e-05, "loss": 0.3975, "step": 62575 }, { "epoch": 1.5885063014811336, "grad_norm": 0.341796875, "learning_rate": 3.704702571829482e-05, "loss": 0.4104, "step": 62580 }, { "epoch": 1.5886332195301494, "grad_norm": 0.3515625, "learning_rate": 3.702516487409289e-05, "loss": 0.4065, "step": 62585 }, { "epoch": 1.5887601375791651, "grad_norm": 0.380859375, "learning_rate": 3.700330957352115e-05, "loss": 0.4127, "step": 62590 }, { "epoch": 1.5888870556281809, "grad_norm": 0.35546875, "learning_rate": 3.698145981765198e-05, "loss": 0.3973, "step": 62595 }, { "epoch": 1.5890139736771967, "grad_norm": 0.31640625, "learning_rate": 3.695961560755756e-05, "loss": 0.3869, "step": 62600 }, { "epoch": 1.5891408917262124, "grad_norm": 0.34765625, "learning_rate": 3.6937776944309746e-05, "loss": 0.3932, "step": 62605 }, { "epoch": 1.5892678097752282, "grad_norm": 0.349609375, "learning_rate": 3.6915943828980196e-05, "loss": 0.394, "step": 62610 }, { "epoch": 1.589394727824244, "grad_norm": 0.330078125, "learning_rate": 3.689411626264026e-05, "loss": 0.4102, "step": 62615 }, { "epoch": 1.5895216458732597, "grad_norm": 0.34765625, "learning_rate": 3.687229424636092e-05, "loss": 0.4223, "step": 62620 }, { "epoch": 1.5896485639222755, "grad_norm": 0.322265625, "learning_rate": 3.685047778121309e-05, "loss": 0.4115, "step": 62625 }, { "epoch": 1.5897754819712913, "grad_norm": 0.38671875, "learning_rate": 3.6828666868267246e-05, "loss": 0.4137, "step": 62630 }, { "epoch": 1.589902400020307, "grad_norm": 0.341796875, "learning_rate": 3.6806861508593644e-05, "loss": 0.4212, "step": 62635 }, { "epoch": 1.5900293180693228, "grad_norm": 0.3515625, "learning_rate": 3.6785061703262245e-05, "loss": 0.4023, "step": 62640 }, { "epoch": 1.5901562361183383, "grad_norm": 0.33984375, "learning_rate": 3.6763267453342785e-05, "loss": 0.3991, "step": 62645 }, { "epoch": 1.590283154167354, "grad_norm": 0.357421875, "learning_rate": 3.674147875990467e-05, "loss": 0.4412, "step": 62650 }, { "epoch": 1.5904100722163699, "grad_norm": 0.32421875, "learning_rate": 3.671969562401708e-05, "loss": 0.3999, "step": 62655 }, { "epoch": 1.5905369902653856, "grad_norm": 0.349609375, "learning_rate": 3.66979180467489e-05, "loss": 0.3908, "step": 62660 }, { "epoch": 1.5906639083144014, "grad_norm": 0.365234375, "learning_rate": 3.667614602916877e-05, "loss": 0.4242, "step": 62665 }, { "epoch": 1.5907908263634172, "grad_norm": 0.3671875, "learning_rate": 3.665437957234499e-05, "loss": 0.4215, "step": 62670 }, { "epoch": 1.590917744412433, "grad_norm": 0.376953125, "learning_rate": 3.663261867734566e-05, "loss": 0.4134, "step": 62675 }, { "epoch": 1.5910446624614485, "grad_norm": 0.353515625, "learning_rate": 3.661086334523853e-05, "loss": 0.3912, "step": 62680 }, { "epoch": 1.5911715805104643, "grad_norm": 0.337890625, "learning_rate": 3.6589113577091205e-05, "loss": 0.4307, "step": 62685 }, { "epoch": 1.59129849855948, "grad_norm": 0.33203125, "learning_rate": 3.656736937397091e-05, "loss": 0.4304, "step": 62690 }, { "epoch": 1.5914254166084958, "grad_norm": 0.353515625, "learning_rate": 3.6545630736944556e-05, "loss": 0.4224, "step": 62695 }, { "epoch": 1.5915523346575116, "grad_norm": 0.34375, "learning_rate": 3.652389766707898e-05, "loss": 0.4064, "step": 62700 }, { "epoch": 1.5916792527065273, "grad_norm": 0.353515625, "learning_rate": 3.6502170165440526e-05, "loss": 0.4268, "step": 62705 }, { "epoch": 1.591806170755543, "grad_norm": 0.318359375, "learning_rate": 3.6480448233095365e-05, "loss": 0.413, "step": 62710 }, { "epoch": 1.5919330888045589, "grad_norm": 0.33984375, "learning_rate": 3.64587318711094e-05, "loss": 0.4368, "step": 62715 }, { "epoch": 1.5920600068535746, "grad_norm": 0.3515625, "learning_rate": 3.643702108054822e-05, "loss": 0.4046, "step": 62720 }, { "epoch": 1.5921869249025904, "grad_norm": 0.33984375, "learning_rate": 3.641531586247719e-05, "loss": 0.4283, "step": 62725 }, { "epoch": 1.5923138429516062, "grad_norm": 0.35546875, "learning_rate": 3.6393616217961364e-05, "loss": 0.4164, "step": 62730 }, { "epoch": 1.592440761000622, "grad_norm": 0.3515625, "learning_rate": 3.637192214806554e-05, "loss": 0.4204, "step": 62735 }, { "epoch": 1.5925676790496377, "grad_norm": 0.3515625, "learning_rate": 3.6350233653854236e-05, "loss": 0.4067, "step": 62740 }, { "epoch": 1.5926945970986535, "grad_norm": 0.349609375, "learning_rate": 3.632855073639164e-05, "loss": 0.3953, "step": 62745 }, { "epoch": 1.5928215151476692, "grad_norm": 0.345703125, "learning_rate": 3.630687339674183e-05, "loss": 0.425, "step": 62750 }, { "epoch": 1.592948433196685, "grad_norm": 0.3359375, "learning_rate": 3.628520163596845e-05, "loss": 0.4116, "step": 62755 }, { "epoch": 1.5930753512457008, "grad_norm": 0.328125, "learning_rate": 3.626353545513493e-05, "loss": 0.4017, "step": 62760 }, { "epoch": 1.5932022692947165, "grad_norm": 0.37109375, "learning_rate": 3.6241874855304426e-05, "loss": 0.4188, "step": 62765 }, { "epoch": 1.5933291873437323, "grad_norm": 0.33203125, "learning_rate": 3.6220219837539796e-05, "loss": 0.4216, "step": 62770 }, { "epoch": 1.593456105392748, "grad_norm": 0.3515625, "learning_rate": 3.619857040290362e-05, "loss": 0.4297, "step": 62775 }, { "epoch": 1.5935830234417636, "grad_norm": 0.33203125, "learning_rate": 3.617692655245831e-05, "loss": 0.4263, "step": 62780 }, { "epoch": 1.5937099414907794, "grad_norm": 0.34375, "learning_rate": 3.6155288287265874e-05, "loss": 0.3933, "step": 62785 }, { "epoch": 1.5938368595397951, "grad_norm": 0.380859375, "learning_rate": 3.613365560838808e-05, "loss": 0.4292, "step": 62790 }, { "epoch": 1.593963777588811, "grad_norm": 0.39453125, "learning_rate": 3.6112028516886446e-05, "loss": 0.4444, "step": 62795 }, { "epoch": 1.5940906956378267, "grad_norm": 0.328125, "learning_rate": 3.609040701382222e-05, "loss": 0.4176, "step": 62800 }, { "epoch": 1.5942176136868424, "grad_norm": 0.337890625, "learning_rate": 3.606879110025634e-05, "loss": 0.3854, "step": 62805 }, { "epoch": 1.594344531735858, "grad_norm": 0.33203125, "learning_rate": 3.6047180777249455e-05, "loss": 0.399, "step": 62810 }, { "epoch": 1.5944714497848738, "grad_norm": 0.365234375, "learning_rate": 3.602557604586207e-05, "loss": 0.4193, "step": 62815 }, { "epoch": 1.5945983678338895, "grad_norm": 0.33203125, "learning_rate": 3.600397690715425e-05, "loss": 0.4144, "step": 62820 }, { "epoch": 1.5947252858829053, "grad_norm": 0.375, "learning_rate": 3.598238336218589e-05, "loss": 0.4192, "step": 62825 }, { "epoch": 1.594852203931921, "grad_norm": 0.333984375, "learning_rate": 3.5960795412016554e-05, "loss": 0.3868, "step": 62830 }, { "epoch": 1.5949791219809368, "grad_norm": 0.34765625, "learning_rate": 3.5939213057705555e-05, "loss": 0.4119, "step": 62835 }, { "epoch": 1.5951060400299526, "grad_norm": 0.357421875, "learning_rate": 3.591763630031194e-05, "loss": 0.4349, "step": 62840 }, { "epoch": 1.5952329580789684, "grad_norm": 0.353515625, "learning_rate": 3.589606514089447e-05, "loss": 0.3889, "step": 62845 }, { "epoch": 1.5953598761279841, "grad_norm": 0.3359375, "learning_rate": 3.58744995805116e-05, "loss": 0.4382, "step": 62850 }, { "epoch": 1.595486794177, "grad_norm": 0.349609375, "learning_rate": 3.5852939620221604e-05, "loss": 0.4134, "step": 62855 }, { "epoch": 1.5956137122260157, "grad_norm": 0.349609375, "learning_rate": 3.583138526108239e-05, "loss": 0.4252, "step": 62860 }, { "epoch": 1.5957406302750314, "grad_norm": 0.34765625, "learning_rate": 3.580983650415162e-05, "loss": 0.4114, "step": 62865 }, { "epoch": 1.5958675483240472, "grad_norm": 0.380859375, "learning_rate": 3.578829335048666e-05, "loss": 0.4179, "step": 62870 }, { "epoch": 1.595994466373063, "grad_norm": 0.353515625, "learning_rate": 3.576675580114468e-05, "loss": 0.3828, "step": 62875 }, { "epoch": 1.5961213844220787, "grad_norm": 0.361328125, "learning_rate": 3.574522385718248e-05, "loss": 0.4068, "step": 62880 }, { "epoch": 1.5962483024710945, "grad_norm": 0.322265625, "learning_rate": 3.572369751965665e-05, "loss": 0.411, "step": 62885 }, { "epoch": 1.5963752205201103, "grad_norm": 0.359375, "learning_rate": 3.5702176789623466e-05, "loss": 0.4415, "step": 62890 }, { "epoch": 1.596502138569126, "grad_norm": 0.34375, "learning_rate": 3.5680661668138923e-05, "loss": 0.3991, "step": 62895 }, { "epoch": 1.5966290566181418, "grad_norm": 0.34765625, "learning_rate": 3.565915215625877e-05, "loss": 0.4539, "step": 62900 }, { "epoch": 1.5967559746671576, "grad_norm": 0.3359375, "learning_rate": 3.563764825503848e-05, "loss": 0.4124, "step": 62905 }, { "epoch": 1.5968828927161731, "grad_norm": 0.349609375, "learning_rate": 3.561614996553325e-05, "loss": 0.4161, "step": 62910 }, { "epoch": 1.5970098107651889, "grad_norm": 0.34765625, "learning_rate": 3.559465728879796e-05, "loss": 0.4203, "step": 62915 }, { "epoch": 1.5971367288142047, "grad_norm": 0.365234375, "learning_rate": 3.5573170225887266e-05, "loss": 0.4326, "step": 62920 }, { "epoch": 1.5972636468632204, "grad_norm": 0.41015625, "learning_rate": 3.555168877785554e-05, "loss": 0.4144, "step": 62925 }, { "epoch": 1.5973905649122362, "grad_norm": 0.345703125, "learning_rate": 3.5530212945756806e-05, "loss": 0.4213, "step": 62930 }, { "epoch": 1.597517482961252, "grad_norm": 0.34375, "learning_rate": 3.550874273064496e-05, "loss": 0.4066, "step": 62935 }, { "epoch": 1.5976444010102677, "grad_norm": 0.330078125, "learning_rate": 3.548727813357347e-05, "loss": 0.4103, "step": 62940 }, { "epoch": 1.5977713190592833, "grad_norm": 0.306640625, "learning_rate": 3.546581915559566e-05, "loss": 0.3591, "step": 62945 }, { "epoch": 1.597898237108299, "grad_norm": 0.345703125, "learning_rate": 3.5444365797764497e-05, "loss": 0.3822, "step": 62950 }, { "epoch": 1.5980251551573148, "grad_norm": 0.341796875, "learning_rate": 3.542291806113266e-05, "loss": 0.4251, "step": 62955 }, { "epoch": 1.5981520732063306, "grad_norm": 0.3671875, "learning_rate": 3.540147594675259e-05, "loss": 0.435, "step": 62960 }, { "epoch": 1.5982789912553463, "grad_norm": 0.408203125, "learning_rate": 3.538003945567644e-05, "loss": 0.4488, "step": 62965 }, { "epoch": 1.598405909304362, "grad_norm": 0.3359375, "learning_rate": 3.5358608588956116e-05, "loss": 0.403, "step": 62970 }, { "epoch": 1.5985328273533779, "grad_norm": 0.3515625, "learning_rate": 3.5337183347643184e-05, "loss": 0.3897, "step": 62975 }, { "epoch": 1.5986597454023936, "grad_norm": 0.32421875, "learning_rate": 3.5315763732788996e-05, "loss": 0.4136, "step": 62980 }, { "epoch": 1.5987866634514094, "grad_norm": 0.35546875, "learning_rate": 3.5294349745444606e-05, "loss": 0.4175, "step": 62985 }, { "epoch": 1.5989135815004252, "grad_norm": 0.35546875, "learning_rate": 3.527294138666078e-05, "loss": 0.4292, "step": 62990 }, { "epoch": 1.599040499549441, "grad_norm": 0.34765625, "learning_rate": 3.525153865748796e-05, "loss": 0.3951, "step": 62995 }, { "epoch": 1.5991674175984567, "grad_norm": 0.34375, "learning_rate": 3.523014155897649e-05, "loss": 0.4121, "step": 63000 }, { "epoch": 1.5992943356474725, "grad_norm": 0.34375, "learning_rate": 3.520875009217626e-05, "loss": 0.4171, "step": 63005 }, { "epoch": 1.5994212536964882, "grad_norm": 0.353515625, "learning_rate": 3.518736425813692e-05, "loss": 0.379, "step": 63010 }, { "epoch": 1.599548171745504, "grad_norm": 0.34765625, "learning_rate": 3.51659840579079e-05, "loss": 0.4292, "step": 63015 }, { "epoch": 1.5996750897945198, "grad_norm": 0.337890625, "learning_rate": 3.514460949253824e-05, "loss": 0.3898, "step": 63020 }, { "epoch": 1.5998020078435355, "grad_norm": 0.373046875, "learning_rate": 3.512324056307689e-05, "loss": 0.4544, "step": 63025 }, { "epoch": 1.5999289258925513, "grad_norm": 0.3515625, "learning_rate": 3.510187727057237e-05, "loss": 0.4116, "step": 63030 }, { "epoch": 1.600055843941567, "grad_norm": 0.3515625, "learning_rate": 3.508051961607295e-05, "loss": 0.4248, "step": 63035 }, { "epoch": 1.6001827619905828, "grad_norm": 0.3671875, "learning_rate": 3.505916760062667e-05, "loss": 0.4273, "step": 63040 }, { "epoch": 1.6003096800395984, "grad_norm": 0.365234375, "learning_rate": 3.503782122528124e-05, "loss": 0.432, "step": 63045 }, { "epoch": 1.6004365980886142, "grad_norm": 0.34765625, "learning_rate": 3.5016480491084134e-05, "loss": 0.4147, "step": 63050 }, { "epoch": 1.60056351613763, "grad_norm": 0.359375, "learning_rate": 3.499514539908253e-05, "loss": 0.4111, "step": 63055 }, { "epoch": 1.6006904341866457, "grad_norm": 0.357421875, "learning_rate": 3.4973815950323284e-05, "loss": 0.4219, "step": 63060 }, { "epoch": 1.6008173522356615, "grad_norm": 0.34375, "learning_rate": 3.495249214585311e-05, "loss": 0.4102, "step": 63065 }, { "epoch": 1.6009442702846772, "grad_norm": 0.3359375, "learning_rate": 3.493117398671832e-05, "loss": 0.4054, "step": 63070 }, { "epoch": 1.6010711883336928, "grad_norm": 0.349609375, "learning_rate": 3.490986147396499e-05, "loss": 0.4163, "step": 63075 }, { "epoch": 1.6011981063827085, "grad_norm": 0.376953125, "learning_rate": 3.48885546086389e-05, "loss": 0.4238, "step": 63080 }, { "epoch": 1.6013250244317243, "grad_norm": 0.37109375, "learning_rate": 3.4867253391785585e-05, "loss": 0.4346, "step": 63085 }, { "epoch": 1.60145194248074, "grad_norm": 0.33984375, "learning_rate": 3.484595782445029e-05, "loss": 0.4299, "step": 63090 }, { "epoch": 1.6015788605297558, "grad_norm": 0.357421875, "learning_rate": 3.482466790767793e-05, "loss": 0.4202, "step": 63095 }, { "epoch": 1.6017057785787716, "grad_norm": 0.30859375, "learning_rate": 3.480338364251329e-05, "loss": 0.3827, "step": 63100 }, { "epoch": 1.6018326966277874, "grad_norm": 0.359375, "learning_rate": 3.478210503000072e-05, "loss": 0.4193, "step": 63105 }, { "epoch": 1.6019596146768031, "grad_norm": 0.353515625, "learning_rate": 3.476083207118435e-05, "loss": 0.4008, "step": 63110 }, { "epoch": 1.602086532725819, "grad_norm": 0.3515625, "learning_rate": 3.473956476710806e-05, "loss": 0.4058, "step": 63115 }, { "epoch": 1.6022134507748347, "grad_norm": 0.365234375, "learning_rate": 3.47183031188154e-05, "loss": 0.4321, "step": 63120 }, { "epoch": 1.6023403688238504, "grad_norm": 0.365234375, "learning_rate": 3.469704712734966e-05, "loss": 0.4167, "step": 63125 }, { "epoch": 1.6024672868728662, "grad_norm": 0.330078125, "learning_rate": 3.467579679375392e-05, "loss": 0.4196, "step": 63130 }, { "epoch": 1.602594204921882, "grad_norm": 0.345703125, "learning_rate": 3.4654552119070885e-05, "loss": 0.4264, "step": 63135 }, { "epoch": 1.6027211229708977, "grad_norm": 0.314453125, "learning_rate": 3.4633313104343036e-05, "loss": 0.4269, "step": 63140 }, { "epoch": 1.6028480410199135, "grad_norm": 0.361328125, "learning_rate": 3.461207975061256e-05, "loss": 0.4341, "step": 63145 }, { "epoch": 1.6029749590689293, "grad_norm": 0.375, "learning_rate": 3.459085205892137e-05, "loss": 0.4303, "step": 63150 }, { "epoch": 1.603101877117945, "grad_norm": 0.33984375, "learning_rate": 3.4569630030311085e-05, "loss": 0.4031, "step": 63155 }, { "epoch": 1.6032287951669608, "grad_norm": 0.3515625, "learning_rate": 3.454841366582309e-05, "loss": 0.4102, "step": 63160 }, { "epoch": 1.6033557132159766, "grad_norm": 0.33203125, "learning_rate": 3.452720296649845e-05, "loss": 0.3977, "step": 63165 }, { "epoch": 1.6034826312649924, "grad_norm": 0.361328125, "learning_rate": 3.4505997933377944e-05, "loss": 0.3985, "step": 63170 }, { "epoch": 1.603609549314008, "grad_norm": 0.353515625, "learning_rate": 3.448479856750208e-05, "loss": 0.4141, "step": 63175 }, { "epoch": 1.6037364673630237, "grad_norm": 0.365234375, "learning_rate": 3.4463604869911176e-05, "loss": 0.4309, "step": 63180 }, { "epoch": 1.6038633854120394, "grad_norm": 0.34375, "learning_rate": 3.444241684164512e-05, "loss": 0.4215, "step": 63185 }, { "epoch": 1.6039903034610552, "grad_norm": 0.341796875, "learning_rate": 3.442123448374368e-05, "loss": 0.4058, "step": 63190 }, { "epoch": 1.604117221510071, "grad_norm": 0.375, "learning_rate": 3.440005779724621e-05, "loss": 0.4306, "step": 63195 }, { "epoch": 1.6042441395590867, "grad_norm": 0.345703125, "learning_rate": 3.437888678319187e-05, "loss": 0.4241, "step": 63200 }, { "epoch": 1.6043710576081025, "grad_norm": 0.361328125, "learning_rate": 3.43577214426195e-05, "loss": 0.411, "step": 63205 }, { "epoch": 1.604497975657118, "grad_norm": 0.365234375, "learning_rate": 3.4336561776567686e-05, "loss": 0.4406, "step": 63210 }, { "epoch": 1.6046248937061338, "grad_norm": 0.357421875, "learning_rate": 3.431540778607469e-05, "loss": 0.4468, "step": 63215 }, { "epoch": 1.6047518117551496, "grad_norm": 0.333984375, "learning_rate": 3.429425947217855e-05, "loss": 0.3992, "step": 63220 }, { "epoch": 1.6048787298041653, "grad_norm": 0.34375, "learning_rate": 3.427311683591703e-05, "loss": 0.3926, "step": 63225 }, { "epoch": 1.6050056478531811, "grad_norm": 0.37109375, "learning_rate": 3.425197987832754e-05, "loss": 0.4066, "step": 63230 }, { "epoch": 1.6051325659021969, "grad_norm": 0.369140625, "learning_rate": 3.423084860044732e-05, "loss": 0.4148, "step": 63235 }, { "epoch": 1.6052594839512127, "grad_norm": 0.326171875, "learning_rate": 3.420972300331322e-05, "loss": 0.4137, "step": 63240 }, { "epoch": 1.6053864020002284, "grad_norm": 0.349609375, "learning_rate": 3.41886030879619e-05, "loss": 0.4063, "step": 63245 }, { "epoch": 1.6055133200492442, "grad_norm": 0.388671875, "learning_rate": 3.416748885542965e-05, "loss": 0.4444, "step": 63250 }, { "epoch": 1.60564023809826, "grad_norm": 0.3671875, "learning_rate": 3.414638030675261e-05, "loss": 0.4046, "step": 63255 }, { "epoch": 1.6057671561472757, "grad_norm": 0.380859375, "learning_rate": 3.412527744296655e-05, "loss": 0.4431, "step": 63260 }, { "epoch": 1.6058940741962915, "grad_norm": 0.341796875, "learning_rate": 3.4104180265106936e-05, "loss": 0.4095, "step": 63265 }, { "epoch": 1.6060209922453073, "grad_norm": 0.341796875, "learning_rate": 3.408308877420906e-05, "loss": 0.4194, "step": 63270 }, { "epoch": 1.606147910294323, "grad_norm": 0.349609375, "learning_rate": 3.406200297130786e-05, "loss": 0.4115, "step": 63275 }, { "epoch": 1.6062748283433388, "grad_norm": 0.396484375, "learning_rate": 3.404092285743796e-05, "loss": 0.4343, "step": 63280 }, { "epoch": 1.6064017463923546, "grad_norm": 0.3359375, "learning_rate": 3.401984843363381e-05, "loss": 0.4028, "step": 63285 }, { "epoch": 1.6065286644413703, "grad_norm": 0.361328125, "learning_rate": 3.399877970092949e-05, "loss": 0.4337, "step": 63290 }, { "epoch": 1.606655582490386, "grad_norm": 0.357421875, "learning_rate": 3.397771666035884e-05, "loss": 0.4004, "step": 63295 }, { "epoch": 1.6067825005394019, "grad_norm": 0.359375, "learning_rate": 3.3956659312955426e-05, "loss": 0.4383, "step": 63300 }, { "epoch": 1.6069094185884174, "grad_norm": 0.34765625, "learning_rate": 3.3935607659752506e-05, "loss": 0.4084, "step": 63305 }, { "epoch": 1.6070363366374332, "grad_norm": 0.345703125, "learning_rate": 3.391456170178309e-05, "loss": 0.4265, "step": 63310 }, { "epoch": 1.607163254686449, "grad_norm": 0.359375, "learning_rate": 3.3893521440079845e-05, "loss": 0.4267, "step": 63315 }, { "epoch": 1.6072901727354647, "grad_norm": 0.35546875, "learning_rate": 3.3872486875675285e-05, "loss": 0.4341, "step": 63320 }, { "epoch": 1.6074170907844805, "grad_norm": 0.34765625, "learning_rate": 3.385145800960155e-05, "loss": 0.4423, "step": 63325 }, { "epoch": 1.6075440088334962, "grad_norm": 2.8125, "learning_rate": 3.3830434842890496e-05, "loss": 0.4101, "step": 63330 }, { "epoch": 1.607670926882512, "grad_norm": 0.3359375, "learning_rate": 3.380941737657373e-05, "loss": 0.4011, "step": 63335 }, { "epoch": 1.6077978449315276, "grad_norm": 0.326171875, "learning_rate": 3.3788405611682536e-05, "loss": 0.4098, "step": 63340 }, { "epoch": 1.6079247629805433, "grad_norm": 0.33203125, "learning_rate": 3.376739954924803e-05, "loss": 0.4408, "step": 63345 }, { "epoch": 1.608051681029559, "grad_norm": 0.337890625, "learning_rate": 3.374639919030092e-05, "loss": 0.4259, "step": 63350 }, { "epoch": 1.6081785990785749, "grad_norm": 0.333984375, "learning_rate": 3.372540453587168e-05, "loss": 0.4161, "step": 63355 }, { "epoch": 1.6083055171275906, "grad_norm": 0.337890625, "learning_rate": 3.370441558699055e-05, "loss": 0.4313, "step": 63360 }, { "epoch": 1.6084324351766064, "grad_norm": 0.3515625, "learning_rate": 3.3683432344687404e-05, "loss": 0.4271, "step": 63365 }, { "epoch": 1.6085593532256222, "grad_norm": 0.3515625, "learning_rate": 3.3662454809991904e-05, "loss": 0.4548, "step": 63370 }, { "epoch": 1.608686271274638, "grad_norm": 0.3671875, "learning_rate": 3.364148298393336e-05, "loss": 0.4299, "step": 63375 }, { "epoch": 1.6088131893236537, "grad_norm": 0.3359375, "learning_rate": 3.362051686754094e-05, "loss": 0.4005, "step": 63380 }, { "epoch": 1.6089401073726695, "grad_norm": 0.330078125, "learning_rate": 3.359955646184339e-05, "loss": 0.3997, "step": 63385 }, { "epoch": 1.6090670254216852, "grad_norm": 0.34375, "learning_rate": 3.357860176786925e-05, "loss": 0.43, "step": 63390 }, { "epoch": 1.609193943470701, "grad_norm": 0.337890625, "learning_rate": 3.355765278664675e-05, "loss": 0.4132, "step": 63395 }, { "epoch": 1.6093208615197168, "grad_norm": 0.33984375, "learning_rate": 3.353670951920383e-05, "loss": 0.4417, "step": 63400 }, { "epoch": 1.6094477795687325, "grad_norm": 0.376953125, "learning_rate": 3.351577196656819e-05, "loss": 0.4206, "step": 63405 }, { "epoch": 1.6095746976177483, "grad_norm": 0.34375, "learning_rate": 3.349484012976722e-05, "loss": 0.4144, "step": 63410 }, { "epoch": 1.609701615666764, "grad_norm": 0.365234375, "learning_rate": 3.347391400982803e-05, "loss": 0.4274, "step": 63415 }, { "epoch": 1.6098285337157798, "grad_norm": 0.3671875, "learning_rate": 3.3452993607777426e-05, "loss": 0.4297, "step": 63420 }, { "epoch": 1.6099554517647956, "grad_norm": 0.328125, "learning_rate": 3.343207892464203e-05, "loss": 0.4161, "step": 63425 }, { "epoch": 1.6100823698138114, "grad_norm": 0.34765625, "learning_rate": 3.34111699614481e-05, "loss": 0.4079, "step": 63430 }, { "epoch": 1.6102092878628271, "grad_norm": 0.34375, "learning_rate": 3.339026671922161e-05, "loss": 0.4426, "step": 63435 }, { "epoch": 1.6103362059118427, "grad_norm": 0.34765625, "learning_rate": 3.336936919898824e-05, "loss": 0.3888, "step": 63440 }, { "epoch": 1.6104631239608584, "grad_norm": 0.390625, "learning_rate": 3.334847740177352e-05, "loss": 0.4394, "step": 63445 }, { "epoch": 1.6105900420098742, "grad_norm": 0.361328125, "learning_rate": 3.332759132860254e-05, "loss": 0.4374, "step": 63450 }, { "epoch": 1.61071696005889, "grad_norm": 0.359375, "learning_rate": 3.330671098050017e-05, "loss": 0.4329, "step": 63455 }, { "epoch": 1.6108438781079057, "grad_norm": 0.349609375, "learning_rate": 3.3285836358491026e-05, "loss": 0.4123, "step": 63460 }, { "epoch": 1.6109707961569215, "grad_norm": 0.306640625, "learning_rate": 3.326496746359937e-05, "loss": 0.4187, "step": 63465 }, { "epoch": 1.6110977142059373, "grad_norm": 0.373046875, "learning_rate": 3.324410429684928e-05, "loss": 0.4325, "step": 63470 }, { "epoch": 1.6112246322549528, "grad_norm": 0.349609375, "learning_rate": 3.3223246859264486e-05, "loss": 0.4211, "step": 63475 }, { "epoch": 1.6113515503039686, "grad_norm": 0.33984375, "learning_rate": 3.320239515186846e-05, "loss": 0.414, "step": 63480 }, { "epoch": 1.6114784683529844, "grad_norm": 0.326171875, "learning_rate": 3.318154917568436e-05, "loss": 0.4001, "step": 63485 }, { "epoch": 1.6116053864020001, "grad_norm": 0.337890625, "learning_rate": 3.316070893173511e-05, "loss": 0.4188, "step": 63490 }, { "epoch": 1.611732304451016, "grad_norm": 0.359375, "learning_rate": 3.313987442104329e-05, "loss": 0.4173, "step": 63495 }, { "epoch": 1.6118592225000317, "grad_norm": 0.330078125, "learning_rate": 3.311904564463133e-05, "loss": 0.4173, "step": 63500 }, { "epoch": 1.6119861405490474, "grad_norm": 0.337890625, "learning_rate": 3.3098222603521235e-05, "loss": 0.3895, "step": 63505 }, { "epoch": 1.6121130585980632, "grad_norm": 0.359375, "learning_rate": 3.3077405298734754e-05, "loss": 0.427, "step": 63510 }, { "epoch": 1.612239976647079, "grad_norm": 0.361328125, "learning_rate": 3.3056593731293476e-05, "loss": 0.438, "step": 63515 }, { "epoch": 1.6123668946960947, "grad_norm": 0.353515625, "learning_rate": 3.3035787902218544e-05, "loss": 0.4299, "step": 63520 }, { "epoch": 1.6124938127451105, "grad_norm": 0.353515625, "learning_rate": 3.301498781253091e-05, "loss": 0.4023, "step": 63525 }, { "epoch": 1.6126207307941263, "grad_norm": 0.36328125, "learning_rate": 3.299419346325121e-05, "loss": 0.4324, "step": 63530 }, { "epoch": 1.612747648843142, "grad_norm": 0.361328125, "learning_rate": 3.297340485539984e-05, "loss": 0.433, "step": 63535 }, { "epoch": 1.6128745668921578, "grad_norm": 0.353515625, "learning_rate": 3.295262198999689e-05, "loss": 0.4162, "step": 63540 }, { "epoch": 1.6130014849411736, "grad_norm": 0.29296875, "learning_rate": 3.2931844868062135e-05, "loss": 0.3603, "step": 63545 }, { "epoch": 1.6131284029901893, "grad_norm": 0.3359375, "learning_rate": 3.291107349061512e-05, "loss": 0.393, "step": 63550 }, { "epoch": 1.613255321039205, "grad_norm": 0.357421875, "learning_rate": 3.289030785867511e-05, "loss": 0.4084, "step": 63555 }, { "epoch": 1.6133822390882209, "grad_norm": 0.337890625, "learning_rate": 3.286954797326102e-05, "loss": 0.3991, "step": 63560 }, { "epoch": 1.6135091571372366, "grad_norm": 0.33203125, "learning_rate": 3.284879383539152e-05, "loss": 0.3948, "step": 63565 }, { "epoch": 1.6136360751862522, "grad_norm": 0.314453125, "learning_rate": 3.282804544608508e-05, "loss": 0.4025, "step": 63570 }, { "epoch": 1.613762993235268, "grad_norm": 0.35546875, "learning_rate": 3.280730280635978e-05, "loss": 0.4207, "step": 63575 }, { "epoch": 1.6138899112842837, "grad_norm": 0.376953125, "learning_rate": 3.2786565917233445e-05, "loss": 0.4521, "step": 63580 }, { "epoch": 1.6140168293332995, "grad_norm": 0.35546875, "learning_rate": 3.276583477972358e-05, "loss": 0.4212, "step": 63585 }, { "epoch": 1.6141437473823153, "grad_norm": 0.365234375, "learning_rate": 3.274510939484757e-05, "loss": 0.3923, "step": 63590 }, { "epoch": 1.614270665431331, "grad_norm": 0.3515625, "learning_rate": 3.272438976362231e-05, "loss": 0.4163, "step": 63595 }, { "epoch": 1.6143975834803468, "grad_norm": 0.3359375, "learning_rate": 3.2703675887064546e-05, "loss": 0.383, "step": 63600 }, { "epoch": 1.6145245015293623, "grad_norm": 0.341796875, "learning_rate": 3.268296776619066e-05, "loss": 0.4079, "step": 63605 }, { "epoch": 1.614651419578378, "grad_norm": 0.36328125, "learning_rate": 3.2662265402016824e-05, "loss": 0.4281, "step": 63610 }, { "epoch": 1.6147783376273939, "grad_norm": 0.369140625, "learning_rate": 3.264156879555889e-05, "loss": 0.4117, "step": 63615 }, { "epoch": 1.6149052556764096, "grad_norm": 0.34375, "learning_rate": 3.262087794783244e-05, "loss": 0.3804, "step": 63620 }, { "epoch": 1.6150321737254254, "grad_norm": 0.330078125, "learning_rate": 3.260019285985273e-05, "loss": 0.4143, "step": 63625 }, { "epoch": 1.6151590917744412, "grad_norm": 0.35546875, "learning_rate": 3.257951353263475e-05, "loss": 0.4103, "step": 63630 }, { "epoch": 1.615286009823457, "grad_norm": 0.345703125, "learning_rate": 3.255883996719331e-05, "loss": 0.4092, "step": 63635 }, { "epoch": 1.6154129278724727, "grad_norm": 0.345703125, "learning_rate": 3.253817216454282e-05, "loss": 0.4351, "step": 63640 }, { "epoch": 1.6155398459214885, "grad_norm": 0.345703125, "learning_rate": 3.251751012569743e-05, "loss": 0.4232, "step": 63645 }, { "epoch": 1.6156667639705042, "grad_norm": 0.326171875, "learning_rate": 3.2496853851671024e-05, "loss": 0.4106, "step": 63650 }, { "epoch": 1.61579368201952, "grad_norm": 0.353515625, "learning_rate": 3.247620334347718e-05, "loss": 0.4013, "step": 63655 }, { "epoch": 1.6159206000685358, "grad_norm": 0.33984375, "learning_rate": 3.245555860212922e-05, "loss": 0.4093, "step": 63660 }, { "epoch": 1.6160475181175515, "grad_norm": 0.3671875, "learning_rate": 3.2434919628640154e-05, "loss": 0.4093, "step": 63665 }, { "epoch": 1.6161744361665673, "grad_norm": 0.357421875, "learning_rate": 3.241428642402278e-05, "loss": 0.4172, "step": 63670 }, { "epoch": 1.616301354215583, "grad_norm": 0.345703125, "learning_rate": 3.2393658989289535e-05, "loss": 0.3909, "step": 63675 }, { "epoch": 1.6164282722645988, "grad_norm": 0.337890625, "learning_rate": 3.2373037325452614e-05, "loss": 0.4081, "step": 63680 }, { "epoch": 1.6165551903136146, "grad_norm": 0.35546875, "learning_rate": 3.235242143352387e-05, "loss": 0.4092, "step": 63685 }, { "epoch": 1.6166821083626304, "grad_norm": 0.34765625, "learning_rate": 3.233181131451491e-05, "loss": 0.4174, "step": 63690 }, { "epoch": 1.6168090264116461, "grad_norm": 0.322265625, "learning_rate": 3.231120696943713e-05, "loss": 0.4182, "step": 63695 }, { "epoch": 1.616935944460662, "grad_norm": 0.37109375, "learning_rate": 3.229060839930156e-05, "loss": 0.4349, "step": 63700 }, { "epoch": 1.6170628625096775, "grad_norm": 0.349609375, "learning_rate": 3.227001560511894e-05, "loss": 0.4056, "step": 63705 }, { "epoch": 1.6171897805586932, "grad_norm": 0.33984375, "learning_rate": 3.224942858789976e-05, "loss": 0.3976, "step": 63710 }, { "epoch": 1.617316698607709, "grad_norm": 0.375, "learning_rate": 3.2228847348654205e-05, "loss": 0.4269, "step": 63715 }, { "epoch": 1.6174436166567248, "grad_norm": 0.32421875, "learning_rate": 3.22082718883922e-05, "loss": 0.4236, "step": 63720 }, { "epoch": 1.6175705347057405, "grad_norm": 0.328125, "learning_rate": 3.218770220812338e-05, "loss": 0.4087, "step": 63725 }, { "epoch": 1.6176974527547563, "grad_norm": 0.34765625, "learning_rate": 3.216713830885708e-05, "loss": 0.3845, "step": 63730 }, { "epoch": 1.6178243708037718, "grad_norm": 0.34765625, "learning_rate": 3.2146580191602356e-05, "loss": 0.4065, "step": 63735 }, { "epoch": 1.6179512888527876, "grad_norm": 0.34765625, "learning_rate": 3.212602785736797e-05, "loss": 0.4147, "step": 63740 }, { "epoch": 1.6180782069018034, "grad_norm": 0.33984375, "learning_rate": 3.2105481307162486e-05, "loss": 0.4267, "step": 63745 }, { "epoch": 1.6182051249508191, "grad_norm": 0.345703125, "learning_rate": 3.208494054199406e-05, "loss": 0.4177, "step": 63750 }, { "epoch": 1.618332042999835, "grad_norm": 0.34765625, "learning_rate": 3.2064405562870596e-05, "loss": 0.4128, "step": 63755 }, { "epoch": 1.6184589610488507, "grad_norm": 0.35546875, "learning_rate": 3.20438763707998e-05, "loss": 0.4153, "step": 63760 }, { "epoch": 1.6185858790978664, "grad_norm": 0.3515625, "learning_rate": 3.202335296678902e-05, "loss": 0.406, "step": 63765 }, { "epoch": 1.6187127971468822, "grad_norm": 0.345703125, "learning_rate": 3.200283535184531e-05, "loss": 0.4152, "step": 63770 }, { "epoch": 1.618839715195898, "grad_norm": 0.34765625, "learning_rate": 3.198232352697546e-05, "loss": 0.378, "step": 63775 }, { "epoch": 1.6189666332449137, "grad_norm": 0.337890625, "learning_rate": 3.196181749318599e-05, "loss": 0.4083, "step": 63780 }, { "epoch": 1.6190935512939295, "grad_norm": 0.35546875, "learning_rate": 3.194131725148312e-05, "loss": 0.4117, "step": 63785 }, { "epoch": 1.6192204693429453, "grad_norm": 0.34375, "learning_rate": 3.1920822802872784e-05, "loss": 0.4318, "step": 63790 }, { "epoch": 1.619347387391961, "grad_norm": 0.326171875, "learning_rate": 3.190033414836063e-05, "loss": 0.3818, "step": 63795 }, { "epoch": 1.6194743054409768, "grad_norm": 0.349609375, "learning_rate": 3.187985128895204e-05, "loss": 0.4165, "step": 63800 }, { "epoch": 1.6196012234899926, "grad_norm": 0.33984375, "learning_rate": 3.1859374225652095e-05, "loss": 0.3926, "step": 63805 }, { "epoch": 1.6197281415390083, "grad_norm": 0.380859375, "learning_rate": 3.183890295946559e-05, "loss": 0.4513, "step": 63810 }, { "epoch": 1.6198550595880241, "grad_norm": 0.341796875, "learning_rate": 3.181843749139702e-05, "loss": 0.4068, "step": 63815 }, { "epoch": 1.6199819776370399, "grad_norm": 0.33203125, "learning_rate": 3.179797782245068e-05, "loss": 0.4392, "step": 63820 }, { "epoch": 1.6201088956860557, "grad_norm": 0.31640625, "learning_rate": 3.1777523953630493e-05, "loss": 0.4037, "step": 63825 }, { "epoch": 1.6202358137350714, "grad_norm": 0.359375, "learning_rate": 3.175707588594006e-05, "loss": 0.4005, "step": 63830 }, { "epoch": 1.620362731784087, "grad_norm": 0.365234375, "learning_rate": 3.173663362038285e-05, "loss": 0.4093, "step": 63835 }, { "epoch": 1.6204896498331027, "grad_norm": 0.33984375, "learning_rate": 3.171619715796191e-05, "loss": 0.4061, "step": 63840 }, { "epoch": 1.6206165678821185, "grad_norm": 0.34765625, "learning_rate": 3.169576649968008e-05, "loss": 0.4131, "step": 63845 }, { "epoch": 1.6207434859311343, "grad_norm": 0.37109375, "learning_rate": 3.1675341646539866e-05, "loss": 0.4203, "step": 63850 }, { "epoch": 1.62087040398015, "grad_norm": 0.33203125, "learning_rate": 3.165492259954348e-05, "loss": 0.4251, "step": 63855 }, { "epoch": 1.6209973220291658, "grad_norm": 0.349609375, "learning_rate": 3.163450935969293e-05, "loss": 0.4132, "step": 63860 }, { "epoch": 1.6211242400781816, "grad_norm": 0.375, "learning_rate": 3.161410192798984e-05, "loss": 0.4205, "step": 63865 }, { "epoch": 1.6212511581271971, "grad_norm": 0.34375, "learning_rate": 3.159370030543561e-05, "loss": 0.4048, "step": 63870 }, { "epoch": 1.6213780761762129, "grad_norm": 0.33203125, "learning_rate": 3.1573304493031325e-05, "loss": 0.4052, "step": 63875 }, { "epoch": 1.6215049942252286, "grad_norm": 0.376953125, "learning_rate": 3.155291449177779e-05, "loss": 0.4115, "step": 63880 }, { "epoch": 1.6216319122742444, "grad_norm": 0.35546875, "learning_rate": 3.153253030267561e-05, "loss": 0.4141, "step": 63885 }, { "epoch": 1.6217588303232602, "grad_norm": 0.3515625, "learning_rate": 3.151215192672496e-05, "loss": 0.4167, "step": 63890 }, { "epoch": 1.621885748372276, "grad_norm": 0.36328125, "learning_rate": 3.1491779364925814e-05, "loss": 0.4097, "step": 63895 }, { "epoch": 1.6220126664212917, "grad_norm": 0.34765625, "learning_rate": 3.147141261827786e-05, "loss": 0.4211, "step": 63900 }, { "epoch": 1.6221395844703075, "grad_norm": 0.40234375, "learning_rate": 3.1451051687780474e-05, "loss": 0.4483, "step": 63905 }, { "epoch": 1.6222665025193233, "grad_norm": 0.322265625, "learning_rate": 3.143069657443272e-05, "loss": 0.3624, "step": 63910 }, { "epoch": 1.622393420568339, "grad_norm": 0.337890625, "learning_rate": 3.141034727923347e-05, "loss": 0.3857, "step": 63915 }, { "epoch": 1.6225203386173548, "grad_norm": 0.357421875, "learning_rate": 3.1390003803181277e-05, "loss": 0.4182, "step": 63920 }, { "epoch": 1.6226472566663706, "grad_norm": 0.345703125, "learning_rate": 3.136966614727432e-05, "loss": 0.4067, "step": 63925 }, { "epoch": 1.6227741747153863, "grad_norm": 0.376953125, "learning_rate": 3.134933431251061e-05, "loss": 0.4278, "step": 63930 }, { "epoch": 1.622901092764402, "grad_norm": 0.349609375, "learning_rate": 3.132900829988778e-05, "loss": 0.4126, "step": 63935 }, { "epoch": 1.6230280108134179, "grad_norm": 0.34375, "learning_rate": 3.130868811040326e-05, "loss": 0.4395, "step": 63940 }, { "epoch": 1.6231549288624336, "grad_norm": 0.341796875, "learning_rate": 3.128837374505409e-05, "loss": 0.3913, "step": 63945 }, { "epoch": 1.6232818469114494, "grad_norm": 0.318359375, "learning_rate": 3.126806520483717e-05, "loss": 0.392, "step": 63950 }, { "epoch": 1.6234087649604652, "grad_norm": 0.337890625, "learning_rate": 3.124776249074901e-05, "loss": 0.4238, "step": 63955 }, { "epoch": 1.623535683009481, "grad_norm": 0.3671875, "learning_rate": 3.122746560378581e-05, "loss": 0.4218, "step": 63960 }, { "epoch": 1.6236626010584967, "grad_norm": 0.361328125, "learning_rate": 3.1207174544943584e-05, "loss": 0.4458, "step": 63965 }, { "epoch": 1.6237895191075122, "grad_norm": 0.361328125, "learning_rate": 3.1186889315217986e-05, "loss": 0.407, "step": 63970 }, { "epoch": 1.623916437156528, "grad_norm": 0.349609375, "learning_rate": 3.116660991560439e-05, "loss": 0.4233, "step": 63975 }, { "epoch": 1.6240433552055438, "grad_norm": 0.357421875, "learning_rate": 3.114633634709791e-05, "loss": 0.4495, "step": 63980 }, { "epoch": 1.6241702732545595, "grad_norm": 0.357421875, "learning_rate": 3.112606861069332e-05, "loss": 0.403, "step": 63985 }, { "epoch": 1.6242971913035753, "grad_norm": 0.349609375, "learning_rate": 3.110580670738523e-05, "loss": 0.4034, "step": 63990 }, { "epoch": 1.624424109352591, "grad_norm": 0.37890625, "learning_rate": 3.108555063816785e-05, "loss": 0.4111, "step": 63995 }, { "epoch": 1.6245510274016066, "grad_norm": 0.34375, "learning_rate": 3.106530040403514e-05, "loss": 0.4044, "step": 64000 }, { "epoch": 1.6246779454506224, "grad_norm": 0.3125, "learning_rate": 3.1045056005980715e-05, "loss": 0.3963, "step": 64005 }, { "epoch": 1.6248048634996382, "grad_norm": 0.345703125, "learning_rate": 3.102481744499803e-05, "loss": 0.4136, "step": 64010 }, { "epoch": 1.624931781548654, "grad_norm": 0.359375, "learning_rate": 3.100458472208018e-05, "loss": 0.4403, "step": 64015 }, { "epoch": 1.6250586995976697, "grad_norm": 0.306640625, "learning_rate": 3.0984357838219946e-05, "loss": 0.3872, "step": 64020 }, { "epoch": 1.6251856176466855, "grad_norm": 0.35546875, "learning_rate": 3.0964136794409876e-05, "loss": 0.3991, "step": 64025 }, { "epoch": 1.6253125356957012, "grad_norm": 0.359375, "learning_rate": 3.094392159164219e-05, "loss": 0.4313, "step": 64030 }, { "epoch": 1.625439453744717, "grad_norm": 0.357421875, "learning_rate": 3.092371223090884e-05, "loss": 0.4264, "step": 64035 }, { "epoch": 1.6255663717937328, "grad_norm": 0.353515625, "learning_rate": 3.0903508713201506e-05, "loss": 0.4172, "step": 64040 }, { "epoch": 1.6256932898427485, "grad_norm": 0.33984375, "learning_rate": 3.088331103951156e-05, "loss": 0.3937, "step": 64045 }, { "epoch": 1.6258202078917643, "grad_norm": 0.361328125, "learning_rate": 3.086311921083009e-05, "loss": 0.4127, "step": 64050 }, { "epoch": 1.62594712594078, "grad_norm": 0.3359375, "learning_rate": 3.0842933228147904e-05, "loss": 0.4104, "step": 64055 }, { "epoch": 1.6260740439897958, "grad_norm": 0.328125, "learning_rate": 3.082275309245553e-05, "loss": 0.4037, "step": 64060 }, { "epoch": 1.6262009620388116, "grad_norm": 0.34375, "learning_rate": 3.0802578804743135e-05, "loss": 0.4043, "step": 64065 }, { "epoch": 1.6263278800878274, "grad_norm": 0.369140625, "learning_rate": 3.078241036600075e-05, "loss": 0.4083, "step": 64070 }, { "epoch": 1.6264547981368431, "grad_norm": 0.36328125, "learning_rate": 3.076224777721798e-05, "loss": 0.8259, "step": 64075 }, { "epoch": 1.626581716185859, "grad_norm": 0.33984375, "learning_rate": 3.0742091039384234e-05, "loss": 0.4253, "step": 64080 }, { "epoch": 1.6267086342348747, "grad_norm": 0.361328125, "learning_rate": 3.0721940153488575e-05, "loss": 0.4129, "step": 64085 }, { "epoch": 1.6268355522838904, "grad_norm": 0.357421875, "learning_rate": 3.070179512051981e-05, "loss": 0.4251, "step": 64090 }, { "epoch": 1.6269624703329062, "grad_norm": 0.353515625, "learning_rate": 3.0681655941466446e-05, "loss": 0.4245, "step": 64095 }, { "epoch": 1.6270893883819217, "grad_norm": 0.35546875, "learning_rate": 3.066152261731668e-05, "loss": 0.4331, "step": 64100 }, { "epoch": 1.6272163064309375, "grad_norm": 0.33984375, "learning_rate": 3.064139514905847e-05, "loss": 0.4047, "step": 64105 }, { "epoch": 1.6273432244799533, "grad_norm": 0.369140625, "learning_rate": 3.062127353767945e-05, "loss": 0.4193, "step": 64110 }, { "epoch": 1.627470142528969, "grad_norm": 0.34375, "learning_rate": 3.060115778416698e-05, "loss": 0.4137, "step": 64115 }, { "epoch": 1.6275970605779848, "grad_norm": 0.359375, "learning_rate": 3.0581047889508146e-05, "loss": 0.4422, "step": 64120 }, { "epoch": 1.6277239786270006, "grad_norm": 0.357421875, "learning_rate": 3.056094385468972e-05, "loss": 0.4374, "step": 64125 }, { "epoch": 1.6278508966760163, "grad_norm": 0.361328125, "learning_rate": 3.054084568069817e-05, "loss": 0.4392, "step": 64130 }, { "epoch": 1.627977814725032, "grad_norm": 0.341796875, "learning_rate": 3.052075336851975e-05, "loss": 0.416, "step": 64135 }, { "epoch": 1.6281047327740477, "grad_norm": 0.328125, "learning_rate": 3.05006669191404e-05, "loss": 0.4225, "step": 64140 }, { "epoch": 1.6282316508230634, "grad_norm": 0.3359375, "learning_rate": 3.0480586333545704e-05, "loss": 0.4039, "step": 64145 }, { "epoch": 1.6283585688720792, "grad_norm": 0.35546875, "learning_rate": 3.046051161272103e-05, "loss": 0.4039, "step": 64150 }, { "epoch": 1.628485486921095, "grad_norm": 0.318359375, "learning_rate": 3.0440442757651384e-05, "loss": 0.4391, "step": 64155 }, { "epoch": 1.6286124049701107, "grad_norm": 0.345703125, "learning_rate": 3.0420379769321652e-05, "loss": 0.4244, "step": 64160 }, { "epoch": 1.6287393230191265, "grad_norm": 0.333984375, "learning_rate": 3.0400322648716223e-05, "loss": 0.4099, "step": 64165 }, { "epoch": 1.6288662410681423, "grad_norm": 0.34375, "learning_rate": 3.0380271396819332e-05, "loss": 0.4074, "step": 64170 }, { "epoch": 1.628993159117158, "grad_norm": 0.333984375, "learning_rate": 3.0360226014614892e-05, "loss": 0.4085, "step": 64175 }, { "epoch": 1.6291200771661738, "grad_norm": 0.36328125, "learning_rate": 3.0340186503086477e-05, "loss": 0.4733, "step": 64180 }, { "epoch": 1.6292469952151896, "grad_norm": 0.33203125, "learning_rate": 3.0320152863217457e-05, "loss": 0.4106, "step": 64185 }, { "epoch": 1.6293739132642053, "grad_norm": 0.333984375, "learning_rate": 3.0300125095990856e-05, "loss": 0.4046, "step": 64190 }, { "epoch": 1.629500831313221, "grad_norm": 0.330078125, "learning_rate": 3.0280103202389412e-05, "loss": 0.3894, "step": 64195 }, { "epoch": 1.6296277493622369, "grad_norm": 0.330078125, "learning_rate": 3.0260087183395647e-05, "loss": 0.3968, "step": 64200 }, { "epoch": 1.6297546674112526, "grad_norm": 0.333984375, "learning_rate": 3.02400770399917e-05, "loss": 0.423, "step": 64205 }, { "epoch": 1.6298815854602684, "grad_norm": 0.32421875, "learning_rate": 3.0220072773159466e-05, "loss": 0.4119, "step": 64210 }, { "epoch": 1.6300085035092842, "grad_norm": 0.345703125, "learning_rate": 3.0200074383880558e-05, "loss": 0.4148, "step": 64215 }, { "epoch": 1.6301354215583, "grad_norm": 0.3671875, "learning_rate": 3.0180081873136274e-05, "loss": 0.4247, "step": 64220 }, { "epoch": 1.6302623396073157, "grad_norm": 0.365234375, "learning_rate": 3.0160095241907635e-05, "loss": 0.4265, "step": 64225 }, { "epoch": 1.6303892576563315, "grad_norm": 0.384765625, "learning_rate": 3.014011449117535e-05, "loss": 0.4162, "step": 64230 }, { "epoch": 1.630516175705347, "grad_norm": 0.337890625, "learning_rate": 3.0120139621919952e-05, "loss": 0.4148, "step": 64235 }, { "epoch": 1.6306430937543628, "grad_norm": 0.35546875, "learning_rate": 3.0100170635121545e-05, "loss": 0.4419, "step": 64240 }, { "epoch": 1.6307700118033785, "grad_norm": 0.359375, "learning_rate": 3.0080207531759993e-05, "loss": 0.4275, "step": 64245 }, { "epoch": 1.6308969298523943, "grad_norm": 0.341796875, "learning_rate": 3.0060250312814892e-05, "loss": 0.4153, "step": 64250 }, { "epoch": 1.63102384790141, "grad_norm": 0.322265625, "learning_rate": 3.0040298979265547e-05, "loss": 0.4221, "step": 64255 }, { "epoch": 1.6311507659504259, "grad_norm": 0.369140625, "learning_rate": 3.002035353209089e-05, "loss": 0.4177, "step": 64260 }, { "epoch": 1.6312776839994414, "grad_norm": 0.322265625, "learning_rate": 3.0000413972269737e-05, "loss": 0.4139, "step": 64265 }, { "epoch": 1.6314046020484572, "grad_norm": 0.341796875, "learning_rate": 2.9980480300780475e-05, "loss": 0.4087, "step": 64270 }, { "epoch": 1.631531520097473, "grad_norm": 0.333984375, "learning_rate": 2.9960552518601232e-05, "loss": 0.4407, "step": 64275 }, { "epoch": 1.6316584381464887, "grad_norm": 0.369140625, "learning_rate": 2.994063062670985e-05, "loss": 0.4152, "step": 64280 }, { "epoch": 1.6317853561955045, "grad_norm": 0.341796875, "learning_rate": 2.992071462608391e-05, "loss": 0.4238, "step": 64285 }, { "epoch": 1.6319122742445202, "grad_norm": 0.35546875, "learning_rate": 2.9900804517700677e-05, "loss": 0.4422, "step": 64290 }, { "epoch": 1.632039192293536, "grad_norm": 0.310546875, "learning_rate": 2.988090030253711e-05, "loss": 0.3749, "step": 64295 }, { "epoch": 1.6321661103425518, "grad_norm": 0.333984375, "learning_rate": 2.986100198156992e-05, "loss": 0.4268, "step": 64300 }, { "epoch": 1.6322930283915675, "grad_norm": 0.333984375, "learning_rate": 2.9841109555775518e-05, "loss": 0.394, "step": 64305 }, { "epoch": 1.6324199464405833, "grad_norm": 0.359375, "learning_rate": 2.9821223026129948e-05, "loss": 0.4446, "step": 64310 }, { "epoch": 1.632546864489599, "grad_norm": 0.333984375, "learning_rate": 2.980134239360915e-05, "loss": 0.3781, "step": 64315 }, { "epoch": 1.6326737825386148, "grad_norm": 0.34765625, "learning_rate": 2.9781467659188564e-05, "loss": 0.4048, "step": 64320 }, { "epoch": 1.6328007005876306, "grad_norm": 0.349609375, "learning_rate": 2.9761598823843487e-05, "loss": 0.4141, "step": 64325 }, { "epoch": 1.6329276186366464, "grad_norm": 0.369140625, "learning_rate": 2.9741735888548874e-05, "loss": 0.4449, "step": 64330 }, { "epoch": 1.6330545366856621, "grad_norm": 0.33203125, "learning_rate": 2.972187885427938e-05, "loss": 0.415, "step": 64335 }, { "epoch": 1.633181454734678, "grad_norm": 0.349609375, "learning_rate": 2.9702027722009375e-05, "loss": 0.4168, "step": 64340 }, { "epoch": 1.6333083727836937, "grad_norm": 0.3515625, "learning_rate": 2.9682182492712963e-05, "loss": 0.4293, "step": 64345 }, { "epoch": 1.6334352908327094, "grad_norm": 0.34765625, "learning_rate": 2.9662343167363915e-05, "loss": 0.4086, "step": 64350 }, { "epoch": 1.6335622088817252, "grad_norm": 0.3359375, "learning_rate": 2.9642509746935755e-05, "loss": 0.4076, "step": 64355 }, { "epoch": 1.633689126930741, "grad_norm": 0.33984375, "learning_rate": 2.9622682232401684e-05, "loss": 0.4169, "step": 64360 }, { "epoch": 1.6338160449797565, "grad_norm": 0.322265625, "learning_rate": 2.9602860624734657e-05, "loss": 0.4295, "step": 64365 }, { "epoch": 1.6339429630287723, "grad_norm": 0.365234375, "learning_rate": 2.9583044924907318e-05, "loss": 0.4356, "step": 64370 }, { "epoch": 1.634069881077788, "grad_norm": 0.416015625, "learning_rate": 2.9563235133891966e-05, "loss": 0.4698, "step": 64375 }, { "epoch": 1.6341967991268038, "grad_norm": 0.34765625, "learning_rate": 2.954343125266071e-05, "loss": 0.4112, "step": 64380 }, { "epoch": 1.6343237171758196, "grad_norm": 0.3671875, "learning_rate": 2.9523633282185255e-05, "loss": 0.3863, "step": 64385 }, { "epoch": 1.6344506352248354, "grad_norm": 0.3671875, "learning_rate": 2.9503841223437174e-05, "loss": 0.4156, "step": 64390 }, { "epoch": 1.6345775532738511, "grad_norm": 0.349609375, "learning_rate": 2.948405507738759e-05, "loss": 0.4125, "step": 64395 }, { "epoch": 1.6347044713228667, "grad_norm": 0.3671875, "learning_rate": 2.946427484500738e-05, "loss": 0.4345, "step": 64400 }, { "epoch": 1.6348313893718824, "grad_norm": 0.35546875, "learning_rate": 2.9444500527267245e-05, "loss": 0.3839, "step": 64405 }, { "epoch": 1.6349583074208982, "grad_norm": 0.365234375, "learning_rate": 2.942473212513743e-05, "loss": 0.4304, "step": 64410 }, { "epoch": 1.635085225469914, "grad_norm": 0.3359375, "learning_rate": 2.9404969639587988e-05, "loss": 0.419, "step": 64415 }, { "epoch": 1.6352121435189297, "grad_norm": 0.3671875, "learning_rate": 2.9385213071588633e-05, "loss": 0.4355, "step": 64420 }, { "epoch": 1.6353390615679455, "grad_norm": 0.357421875, "learning_rate": 2.936546242210885e-05, "loss": 0.4403, "step": 64425 }, { "epoch": 1.6354659796169613, "grad_norm": 0.357421875, "learning_rate": 2.934571769211775e-05, "loss": 0.4308, "step": 64430 }, { "epoch": 1.635592897665977, "grad_norm": 0.345703125, "learning_rate": 2.9325978882584238e-05, "loss": 0.3998, "step": 64435 }, { "epoch": 1.6357198157149928, "grad_norm": 0.375, "learning_rate": 2.9306245994476864e-05, "loss": 0.4342, "step": 64440 }, { "epoch": 1.6358467337640086, "grad_norm": 0.361328125, "learning_rate": 2.9286519028763928e-05, "loss": 0.3713, "step": 64445 }, { "epoch": 1.6359736518130243, "grad_norm": 0.34765625, "learning_rate": 2.9266797986413375e-05, "loss": 0.3791, "step": 64450 }, { "epoch": 1.63610056986204, "grad_norm": 0.3671875, "learning_rate": 2.9247082868392984e-05, "loss": 0.4164, "step": 64455 }, { "epoch": 1.6362274879110559, "grad_norm": 0.3515625, "learning_rate": 2.9227373675670146e-05, "loss": 0.4052, "step": 64460 }, { "epoch": 1.6363544059600716, "grad_norm": 0.34765625, "learning_rate": 2.920767040921197e-05, "loss": 0.3851, "step": 64465 }, { "epoch": 1.6364813240090874, "grad_norm": 0.353515625, "learning_rate": 2.9187973069985283e-05, "loss": 0.4227, "step": 64470 }, { "epoch": 1.6366082420581032, "grad_norm": 0.333984375, "learning_rate": 2.916828165895661e-05, "loss": 0.4296, "step": 64475 }, { "epoch": 1.636735160107119, "grad_norm": 0.341796875, "learning_rate": 2.9148596177092248e-05, "loss": 0.4254, "step": 64480 }, { "epoch": 1.6368620781561347, "grad_norm": 0.359375, "learning_rate": 2.912891662535814e-05, "loss": 0.4182, "step": 64485 }, { "epoch": 1.6369889962051505, "grad_norm": 0.349609375, "learning_rate": 2.9109243004719945e-05, "loss": 0.4589, "step": 64490 }, { "epoch": 1.6371159142541662, "grad_norm": 0.34765625, "learning_rate": 2.9089575316143042e-05, "loss": 0.4169, "step": 64495 }, { "epoch": 1.6372428323031818, "grad_norm": 0.314453125, "learning_rate": 2.906991356059251e-05, "loss": 0.4052, "step": 64500 }, { "epoch": 1.6373697503521976, "grad_norm": 0.36328125, "learning_rate": 2.9050257739033144e-05, "loss": 0.4371, "step": 64505 }, { "epoch": 1.6374966684012133, "grad_norm": 0.3359375, "learning_rate": 2.9030607852429423e-05, "loss": 0.4077, "step": 64510 }, { "epoch": 1.637623586450229, "grad_norm": 0.328125, "learning_rate": 2.901096390174562e-05, "loss": 0.4142, "step": 64515 }, { "epoch": 1.6377505044992449, "grad_norm": 0.33203125, "learning_rate": 2.899132588794562e-05, "loss": 0.4236, "step": 64520 }, { "epoch": 1.6378774225482606, "grad_norm": 0.36328125, "learning_rate": 2.897169381199305e-05, "loss": 0.4297, "step": 64525 }, { "epoch": 1.6380043405972762, "grad_norm": 0.359375, "learning_rate": 2.8952067674851274e-05, "loss": 0.4173, "step": 64530 }, { "epoch": 1.638131258646292, "grad_norm": 0.357421875, "learning_rate": 2.8932447477483307e-05, "loss": 0.4473, "step": 64535 }, { "epoch": 1.6382581766953077, "grad_norm": 0.365234375, "learning_rate": 2.891283322085192e-05, "loss": 0.4267, "step": 64540 }, { "epoch": 1.6383850947443235, "grad_norm": 0.326171875, "learning_rate": 2.889322490591957e-05, "loss": 0.4058, "step": 64545 }, { "epoch": 1.6385120127933392, "grad_norm": 0.35546875, "learning_rate": 2.8873622533648434e-05, "loss": 0.4079, "step": 64550 }, { "epoch": 1.638638930842355, "grad_norm": 0.33203125, "learning_rate": 2.8854026105000355e-05, "loss": 0.4196, "step": 64555 }, { "epoch": 1.6387658488913708, "grad_norm": 0.357421875, "learning_rate": 2.8834435620936986e-05, "loss": 0.4439, "step": 64560 }, { "epoch": 1.6388927669403865, "grad_norm": 0.318359375, "learning_rate": 2.8814851082419606e-05, "loss": 0.3544, "step": 64565 }, { "epoch": 1.6390196849894023, "grad_norm": 0.345703125, "learning_rate": 2.879527249040921e-05, "loss": 0.4142, "step": 64570 }, { "epoch": 1.639146603038418, "grad_norm": 0.369140625, "learning_rate": 2.8775699845866468e-05, "loss": 0.426, "step": 64575 }, { "epoch": 1.6392735210874338, "grad_norm": 0.31640625, "learning_rate": 2.8756133149751892e-05, "loss": 0.4232, "step": 64580 }, { "epoch": 1.6394004391364496, "grad_norm": 0.37890625, "learning_rate": 2.8736572403025566e-05, "loss": 0.4187, "step": 64585 }, { "epoch": 1.6395273571854654, "grad_norm": 0.341796875, "learning_rate": 2.871701760664732e-05, "loss": 0.4135, "step": 64590 }, { "epoch": 1.6396542752344812, "grad_norm": 0.30859375, "learning_rate": 2.86974687615767e-05, "loss": 0.406, "step": 64595 }, { "epoch": 1.639781193283497, "grad_norm": 0.341796875, "learning_rate": 2.8677925868772983e-05, "loss": 0.437, "step": 64600 }, { "epoch": 1.6399081113325127, "grad_norm": 0.345703125, "learning_rate": 2.865838892919511e-05, "loss": 0.3966, "step": 64605 }, { "epoch": 1.6400350293815285, "grad_norm": 0.33203125, "learning_rate": 2.8638857943801762e-05, "loss": 0.4225, "step": 64610 }, { "epoch": 1.6401619474305442, "grad_norm": 0.376953125, "learning_rate": 2.8619332913551312e-05, "loss": 0.4553, "step": 64615 }, { "epoch": 1.64028886547956, "grad_norm": 0.36328125, "learning_rate": 2.859981383940184e-05, "loss": 0.3881, "step": 64620 }, { "epoch": 1.6404157835285758, "grad_norm": 0.380859375, "learning_rate": 2.858030072231115e-05, "loss": 0.4302, "step": 64625 }, { "epoch": 1.6405427015775913, "grad_norm": 0.34765625, "learning_rate": 2.856079356323669e-05, "loss": 0.4052, "step": 64630 }, { "epoch": 1.640669619626607, "grad_norm": 0.330078125, "learning_rate": 2.854129236313576e-05, "loss": 0.4145, "step": 64635 }, { "epoch": 1.6407965376756228, "grad_norm": 0.34375, "learning_rate": 2.852179712296523e-05, "loss": 0.4018, "step": 64640 }, { "epoch": 1.6409234557246386, "grad_norm": 0.33984375, "learning_rate": 2.850230784368168e-05, "loss": 0.4009, "step": 64645 }, { "epoch": 1.6410503737736544, "grad_norm": 0.345703125, "learning_rate": 2.8482824526241536e-05, "loss": 0.4286, "step": 64650 }, { "epoch": 1.6411772918226701, "grad_norm": 0.357421875, "learning_rate": 2.84633471716008e-05, "loss": 0.4372, "step": 64655 }, { "epoch": 1.641304209871686, "grad_norm": 0.35546875, "learning_rate": 2.844387578071518e-05, "loss": 0.4006, "step": 64660 }, { "epoch": 1.6414311279207014, "grad_norm": 0.357421875, "learning_rate": 2.8424410354540157e-05, "loss": 0.4043, "step": 64665 }, { "epoch": 1.6415580459697172, "grad_norm": 0.361328125, "learning_rate": 2.840495089403091e-05, "loss": 0.401, "step": 64670 }, { "epoch": 1.641684964018733, "grad_norm": 0.375, "learning_rate": 2.838549740014226e-05, "loss": 0.423, "step": 64675 }, { "epoch": 1.6418118820677488, "grad_norm": 0.34765625, "learning_rate": 2.8366049873828828e-05, "loss": 0.4151, "step": 64680 }, { "epoch": 1.6419388001167645, "grad_norm": 0.345703125, "learning_rate": 2.8346608316044868e-05, "loss": 0.4304, "step": 64685 }, { "epoch": 1.6420657181657803, "grad_norm": 0.337890625, "learning_rate": 2.8327172727744378e-05, "loss": 0.4139, "step": 64690 }, { "epoch": 1.642192636214796, "grad_norm": 0.341796875, "learning_rate": 2.830774310988107e-05, "loss": 0.3954, "step": 64695 }, { "epoch": 1.6423195542638118, "grad_norm": 0.306640625, "learning_rate": 2.8288319463408276e-05, "loss": 0.3898, "step": 64700 }, { "epoch": 1.6424464723128276, "grad_norm": 0.357421875, "learning_rate": 2.82689017892792e-05, "loss": 0.4234, "step": 64705 }, { "epoch": 1.6425733903618434, "grad_norm": 0.353515625, "learning_rate": 2.8249490088446635e-05, "loss": 0.3966, "step": 64710 }, { "epoch": 1.6427003084108591, "grad_norm": 0.34375, "learning_rate": 2.8230084361863082e-05, "loss": 0.4195, "step": 64715 }, { "epoch": 1.642827226459875, "grad_norm": 0.3046875, "learning_rate": 2.821068461048076e-05, "loss": 0.4186, "step": 64720 }, { "epoch": 1.6429541445088907, "grad_norm": 0.3515625, "learning_rate": 2.8191290835251662e-05, "loss": 0.4383, "step": 64725 }, { "epoch": 1.6430810625579064, "grad_norm": 0.322265625, "learning_rate": 2.817190303712739e-05, "loss": 0.4005, "step": 64730 }, { "epoch": 1.6432079806069222, "grad_norm": 0.34375, "learning_rate": 2.815252121705932e-05, "loss": 0.3812, "step": 64735 }, { "epoch": 1.643334898655938, "grad_norm": 0.349609375, "learning_rate": 2.813314537599849e-05, "loss": 0.3987, "step": 64740 }, { "epoch": 1.6434618167049537, "grad_norm": 0.3359375, "learning_rate": 2.8113775514895686e-05, "loss": 0.403, "step": 64745 }, { "epoch": 1.6435887347539695, "grad_norm": 0.359375, "learning_rate": 2.8094411634701354e-05, "loss": 0.4188, "step": 64750 }, { "epoch": 1.6437156528029853, "grad_norm": 0.373046875, "learning_rate": 2.8075053736365677e-05, "loss": 0.3962, "step": 64755 }, { "epoch": 1.643842570852001, "grad_norm": 0.333984375, "learning_rate": 2.805570182083855e-05, "loss": 0.4128, "step": 64760 }, { "epoch": 1.6439694889010166, "grad_norm": 0.35546875, "learning_rate": 2.8036355889069527e-05, "loss": 0.4163, "step": 64765 }, { "epoch": 1.6440964069500323, "grad_norm": 0.341796875, "learning_rate": 2.801701594200797e-05, "loss": 0.4012, "step": 64770 }, { "epoch": 1.644223324999048, "grad_norm": 0.32421875, "learning_rate": 2.799768198060285e-05, "loss": 0.4103, "step": 64775 }, { "epoch": 1.6443502430480639, "grad_norm": 0.34765625, "learning_rate": 2.7978354005802888e-05, "loss": 0.4172, "step": 64780 }, { "epoch": 1.6444771610970796, "grad_norm": 0.359375, "learning_rate": 2.7959032018556464e-05, "loss": 0.4173, "step": 64785 }, { "epoch": 1.6446040791460954, "grad_norm": 0.326171875, "learning_rate": 2.793971601981175e-05, "loss": 0.4005, "step": 64790 }, { "epoch": 1.644730997195111, "grad_norm": 0.341796875, "learning_rate": 2.7920406010516545e-05, "loss": 0.4293, "step": 64795 }, { "epoch": 1.6448579152441267, "grad_norm": 0.357421875, "learning_rate": 2.790110199161833e-05, "loss": 0.4438, "step": 64800 }, { "epoch": 1.6449848332931425, "grad_norm": 0.34765625, "learning_rate": 2.788180396406447e-05, "loss": 0.4535, "step": 64805 }, { "epoch": 1.6451117513421583, "grad_norm": 0.373046875, "learning_rate": 2.786251192880182e-05, "loss": 0.4355, "step": 64810 }, { "epoch": 1.645238669391174, "grad_norm": 0.318359375, "learning_rate": 2.7843225886777064e-05, "loss": 0.3948, "step": 64815 }, { "epoch": 1.6453655874401898, "grad_norm": 0.373046875, "learning_rate": 2.7823945838936555e-05, "loss": 0.4533, "step": 64820 }, { "epoch": 1.6454925054892056, "grad_norm": 0.375, "learning_rate": 2.7804671786226325e-05, "loss": 0.4484, "step": 64825 }, { "epoch": 1.6456194235382213, "grad_norm": 0.349609375, "learning_rate": 2.778540372959221e-05, "loss": 0.4093, "step": 64830 }, { "epoch": 1.645746341587237, "grad_norm": 0.345703125, "learning_rate": 2.7766141669979657e-05, "loss": 0.3902, "step": 64835 }, { "epoch": 1.6458732596362529, "grad_norm": 0.345703125, "learning_rate": 2.774688560833383e-05, "loss": 0.412, "step": 64840 }, { "epoch": 1.6460001776852686, "grad_norm": 0.345703125, "learning_rate": 2.7727635545599635e-05, "loss": 0.424, "step": 64845 }, { "epoch": 1.6461270957342844, "grad_norm": 0.35546875, "learning_rate": 2.7708391482721653e-05, "loss": 0.4363, "step": 64850 }, { "epoch": 1.6462540137833002, "grad_norm": 0.376953125, "learning_rate": 2.768915342064418e-05, "loss": 0.434, "step": 64855 }, { "epoch": 1.646380931832316, "grad_norm": 0.357421875, "learning_rate": 2.7669921360311236e-05, "loss": 0.406, "step": 64860 }, { "epoch": 1.6465078498813317, "grad_norm": 0.310546875, "learning_rate": 2.7650695302666525e-05, "loss": 0.3876, "step": 64865 }, { "epoch": 1.6466347679303475, "grad_norm": 0.326171875, "learning_rate": 2.7631475248653457e-05, "loss": 0.3882, "step": 64870 }, { "epoch": 1.6467616859793632, "grad_norm": 0.333984375, "learning_rate": 2.761226119921511e-05, "loss": 0.4146, "step": 64875 }, { "epoch": 1.646888604028379, "grad_norm": 0.3515625, "learning_rate": 2.7593053155294394e-05, "loss": 0.4011, "step": 64880 }, { "epoch": 1.6470155220773948, "grad_norm": 0.37109375, "learning_rate": 2.7573851117833806e-05, "loss": 0.4268, "step": 64885 }, { "epoch": 1.6471424401264105, "grad_norm": 0.330078125, "learning_rate": 2.755465508777553e-05, "loss": 0.4021, "step": 64890 }, { "epoch": 1.647269358175426, "grad_norm": 0.349609375, "learning_rate": 2.7535465066061606e-05, "loss": 0.4111, "step": 64895 }, { "epoch": 1.6473962762244418, "grad_norm": 0.373046875, "learning_rate": 2.7516281053633622e-05, "loss": 0.4263, "step": 64900 }, { "epoch": 1.6475231942734576, "grad_norm": 0.37890625, "learning_rate": 2.7497103051432918e-05, "loss": 0.4223, "step": 64905 }, { "epoch": 1.6476501123224734, "grad_norm": 0.341796875, "learning_rate": 2.7477931060400593e-05, "loss": 0.4205, "step": 64910 }, { "epoch": 1.6477770303714891, "grad_norm": 0.345703125, "learning_rate": 2.745876508147738e-05, "loss": 0.3947, "step": 64915 }, { "epoch": 1.647903948420505, "grad_norm": 0.369140625, "learning_rate": 2.7439605115603742e-05, "loss": 0.4152, "step": 64920 }, { "epoch": 1.6480308664695207, "grad_norm": 0.341796875, "learning_rate": 2.7420451163719877e-05, "loss": 0.4044, "step": 64925 }, { "epoch": 1.6481577845185362, "grad_norm": 0.37890625, "learning_rate": 2.740130322676563e-05, "loss": 0.4164, "step": 64930 }, { "epoch": 1.648284702567552, "grad_norm": 0.337890625, "learning_rate": 2.7382161305680604e-05, "loss": 0.4226, "step": 64935 }, { "epoch": 1.6484116206165678, "grad_norm": 0.361328125, "learning_rate": 2.7363025401404066e-05, "loss": 0.3985, "step": 64940 }, { "epoch": 1.6485385386655835, "grad_norm": 0.337890625, "learning_rate": 2.7343895514875042e-05, "loss": 0.4085, "step": 64945 }, { "epoch": 1.6486654567145993, "grad_norm": 0.349609375, "learning_rate": 2.732477164703215e-05, "loss": 0.4318, "step": 64950 }, { "epoch": 1.648792374763615, "grad_norm": 0.33984375, "learning_rate": 2.7305653798813892e-05, "loss": 0.3938, "step": 64955 }, { "epoch": 1.6489192928126308, "grad_norm": 0.322265625, "learning_rate": 2.7286541971158334e-05, "loss": 0.4114, "step": 64960 }, { "epoch": 1.6490462108616466, "grad_norm": 0.333984375, "learning_rate": 2.7267436165003235e-05, "loss": 0.4089, "step": 64965 }, { "epoch": 1.6491731289106624, "grad_norm": 0.380859375, "learning_rate": 2.724833638128619e-05, "loss": 0.3884, "step": 64970 }, { "epoch": 1.6493000469596781, "grad_norm": 0.34765625, "learning_rate": 2.7229242620944392e-05, "loss": 0.4207, "step": 64975 }, { "epoch": 1.649426965008694, "grad_norm": 0.3671875, "learning_rate": 2.721015488491474e-05, "loss": 0.4087, "step": 64980 }, { "epoch": 1.6495538830577097, "grad_norm": 0.388671875, "learning_rate": 2.7191073174133878e-05, "loss": 0.4353, "step": 64985 }, { "epoch": 1.6496808011067254, "grad_norm": 0.333984375, "learning_rate": 2.7171997489538143e-05, "loss": 0.3849, "step": 64990 }, { "epoch": 1.6498077191557412, "grad_norm": 0.333984375, "learning_rate": 2.7152927832063564e-05, "loss": 0.4135, "step": 64995 }, { "epoch": 1.649934637204757, "grad_norm": 0.34765625, "learning_rate": 2.7133864202645873e-05, "loss": 0.405, "step": 65000 }, { "epoch": 1.6500742470586742, "grad_norm": 0.353515625, "learning_rate": 2.7114806602220534e-05, "loss": 0.4227, "step": 65005 }, { "epoch": 1.65020116510769, "grad_norm": 0.357421875, "learning_rate": 2.709575503172267e-05, "loss": 0.431, "step": 65010 }, { "epoch": 1.6503280831567058, "grad_norm": 0.37109375, "learning_rate": 2.7076709492087117e-05, "loss": 0.4223, "step": 65015 }, { "epoch": 1.6504550012057213, "grad_norm": 0.333984375, "learning_rate": 2.705766998424851e-05, "loss": 0.4229, "step": 65020 }, { "epoch": 1.650581919254737, "grad_norm": 0.3125, "learning_rate": 2.7038636509141067e-05, "loss": 0.4026, "step": 65025 }, { "epoch": 1.6507088373037528, "grad_norm": 0.314453125, "learning_rate": 2.7019609067698738e-05, "loss": 0.4245, "step": 65030 }, { "epoch": 1.6508357553527686, "grad_norm": 0.34765625, "learning_rate": 2.70005876608552e-05, "loss": 0.4285, "step": 65035 }, { "epoch": 1.6509626734017844, "grad_norm": 0.3515625, "learning_rate": 2.6981572289543833e-05, "loss": 0.4115, "step": 65040 }, { "epoch": 1.6510895914508001, "grad_norm": 0.330078125, "learning_rate": 2.696256295469768e-05, "loss": 0.4054, "step": 65045 }, { "epoch": 1.651216509499816, "grad_norm": 0.357421875, "learning_rate": 2.6943559657249593e-05, "loss": 0.4165, "step": 65050 }, { "epoch": 1.6513434275488317, "grad_norm": 0.328125, "learning_rate": 2.6924562398132003e-05, "loss": 0.4152, "step": 65055 }, { "epoch": 1.6514703455978474, "grad_norm": 0.365234375, "learning_rate": 2.6905571178277107e-05, "loss": 0.4382, "step": 65060 }, { "epoch": 1.6515972636468632, "grad_norm": 0.34375, "learning_rate": 2.6886585998616822e-05, "loss": 0.4389, "step": 65065 }, { "epoch": 1.651724181695879, "grad_norm": 0.3359375, "learning_rate": 2.6867606860082703e-05, "loss": 0.4259, "step": 65070 }, { "epoch": 1.6518510997448947, "grad_norm": 0.361328125, "learning_rate": 2.684863376360607e-05, "loss": 0.4181, "step": 65075 }, { "epoch": 1.6519780177939105, "grad_norm": 0.349609375, "learning_rate": 2.682966671011788e-05, "loss": 0.4323, "step": 65080 }, { "epoch": 1.6521049358429263, "grad_norm": 0.353515625, "learning_rate": 2.6810705700548924e-05, "loss": 0.4364, "step": 65085 }, { "epoch": 1.652231853891942, "grad_norm": 0.35546875, "learning_rate": 2.679175073582957e-05, "loss": 0.4409, "step": 65090 }, { "epoch": 1.6523587719409578, "grad_norm": 0.333984375, "learning_rate": 2.6772801816889926e-05, "loss": 0.435, "step": 65095 }, { "epoch": 1.6524856899899736, "grad_norm": 0.318359375, "learning_rate": 2.6753858944659817e-05, "loss": 0.3788, "step": 65100 }, { "epoch": 1.6526126080389894, "grad_norm": 0.34765625, "learning_rate": 2.6734922120068747e-05, "loss": 0.4126, "step": 65105 }, { "epoch": 1.6527395260880051, "grad_norm": 0.357421875, "learning_rate": 2.6715991344045957e-05, "loss": 0.4078, "step": 65110 }, { "epoch": 1.6528664441370209, "grad_norm": 0.365234375, "learning_rate": 2.669706661752035e-05, "loss": 0.4295, "step": 65115 }, { "epoch": 1.6529933621860364, "grad_norm": 0.3515625, "learning_rate": 2.6678147941420525e-05, "loss": 0.407, "step": 65120 }, { "epoch": 1.6531202802350522, "grad_norm": 0.333984375, "learning_rate": 2.6659235316674915e-05, "loss": 0.4061, "step": 65125 }, { "epoch": 1.653247198284068, "grad_norm": 0.376953125, "learning_rate": 2.6640328744211482e-05, "loss": 0.4352, "step": 65130 }, { "epoch": 1.6533741163330837, "grad_norm": 0.369140625, "learning_rate": 2.662142822495798e-05, "loss": 0.4204, "step": 65135 }, { "epoch": 1.6535010343820995, "grad_norm": 0.36328125, "learning_rate": 2.6602533759841803e-05, "loss": 0.398, "step": 65140 }, { "epoch": 1.6536279524311153, "grad_norm": 0.35546875, "learning_rate": 2.6583645349790194e-05, "loss": 0.4202, "step": 65145 }, { "epoch": 1.653754870480131, "grad_norm": 0.3515625, "learning_rate": 2.6564762995729943e-05, "loss": 0.4063, "step": 65150 }, { "epoch": 1.6538817885291466, "grad_norm": 0.341796875, "learning_rate": 2.6545886698587592e-05, "loss": 0.4425, "step": 65155 }, { "epoch": 1.6540087065781623, "grad_norm": 0.326171875, "learning_rate": 2.652701645928942e-05, "loss": 0.4102, "step": 65160 }, { "epoch": 1.6541356246271781, "grad_norm": 0.333984375, "learning_rate": 2.6508152278761345e-05, "loss": 0.3895, "step": 65165 }, { "epoch": 1.6542625426761939, "grad_norm": 0.3359375, "learning_rate": 2.648929415792907e-05, "loss": 0.4144, "step": 65170 }, { "epoch": 1.6543894607252096, "grad_norm": 0.35546875, "learning_rate": 2.6470442097717913e-05, "loss": 0.3844, "step": 65175 }, { "epoch": 1.6545163787742254, "grad_norm": 0.349609375, "learning_rate": 2.6451596099052956e-05, "loss": 0.4187, "step": 65180 }, { "epoch": 1.6546432968232412, "grad_norm": 0.3359375, "learning_rate": 2.6432756162858976e-05, "loss": 0.428, "step": 65185 }, { "epoch": 1.654770214872257, "grad_norm": 0.353515625, "learning_rate": 2.6413922290060414e-05, "loss": 0.4133, "step": 65190 }, { "epoch": 1.6548971329212727, "grad_norm": 0.328125, "learning_rate": 2.6395094481581464e-05, "loss": 0.4771, "step": 65195 }, { "epoch": 1.6550240509702885, "grad_norm": 0.34765625, "learning_rate": 2.6376272738345956e-05, "loss": 0.4163, "step": 65200 }, { "epoch": 1.6551509690193043, "grad_norm": 0.34765625, "learning_rate": 2.635745706127753e-05, "loss": 0.4013, "step": 65205 }, { "epoch": 1.65527788706832, "grad_norm": 0.36328125, "learning_rate": 2.6338647451299406e-05, "loss": 0.439, "step": 65210 }, { "epoch": 1.6554048051173358, "grad_norm": 0.353515625, "learning_rate": 2.6319843909334622e-05, "loss": 0.4125, "step": 65215 }, { "epoch": 1.6555317231663516, "grad_norm": 0.359375, "learning_rate": 2.6301046436305838e-05, "loss": 0.4267, "step": 65220 }, { "epoch": 1.6556586412153673, "grad_norm": 0.353515625, "learning_rate": 2.6282255033135425e-05, "loss": 0.4136, "step": 65225 }, { "epoch": 1.655785559264383, "grad_norm": 0.361328125, "learning_rate": 2.6263469700745487e-05, "loss": 0.4207, "step": 65230 }, { "epoch": 1.6559124773133989, "grad_norm": 0.3515625, "learning_rate": 2.6244690440057793e-05, "loss": 0.3992, "step": 65235 }, { "epoch": 1.6560393953624146, "grad_norm": 0.33203125, "learning_rate": 2.6225917251993832e-05, "loss": 0.3831, "step": 65240 }, { "epoch": 1.6561663134114304, "grad_norm": 0.310546875, "learning_rate": 2.6207150137474825e-05, "loss": 0.3958, "step": 65245 }, { "epoch": 1.656293231460446, "grad_norm": 0.337890625, "learning_rate": 2.618838909742163e-05, "loss": 0.4249, "step": 65250 }, { "epoch": 1.6564201495094617, "grad_norm": 0.3359375, "learning_rate": 2.6169634132754878e-05, "loss": 0.4288, "step": 65255 }, { "epoch": 1.6565470675584775, "grad_norm": 0.3515625, "learning_rate": 2.6150885244394853e-05, "loss": 0.4503, "step": 65260 }, { "epoch": 1.6566739856074932, "grad_norm": 0.35546875, "learning_rate": 2.6132142433261498e-05, "loss": 0.4589, "step": 65265 }, { "epoch": 1.656800903656509, "grad_norm": 0.33203125, "learning_rate": 2.611340570027463e-05, "loss": 0.4049, "step": 65270 }, { "epoch": 1.6569278217055248, "grad_norm": 0.3515625, "learning_rate": 2.6094675046353598e-05, "loss": 0.4191, "step": 65275 }, { "epoch": 1.6570547397545405, "grad_norm": 0.306640625, "learning_rate": 2.6075950472417494e-05, "loss": 0.4114, "step": 65280 }, { "epoch": 1.657181657803556, "grad_norm": 0.361328125, "learning_rate": 2.6057231979385142e-05, "loss": 0.4293, "step": 65285 }, { "epoch": 1.6573085758525719, "grad_norm": 0.34765625, "learning_rate": 2.6038519568174997e-05, "loss": 0.4351, "step": 65290 }, { "epoch": 1.6574354939015876, "grad_norm": 0.337890625, "learning_rate": 2.6019813239705367e-05, "loss": 0.4102, "step": 65295 }, { "epoch": 1.6575624119506034, "grad_norm": 0.330078125, "learning_rate": 2.6001112994894095e-05, "loss": 0.4415, "step": 65300 }, { "epoch": 1.6576893299996192, "grad_norm": 0.36328125, "learning_rate": 2.5982418834658836e-05, "loss": 0.3995, "step": 65305 }, { "epoch": 1.657816248048635, "grad_norm": 0.35546875, "learning_rate": 2.5963730759916885e-05, "loss": 0.4429, "step": 65310 }, { "epoch": 1.6579431660976507, "grad_norm": 0.349609375, "learning_rate": 2.594504877158524e-05, "loss": 0.4044, "step": 65315 }, { "epoch": 1.6580700841466665, "grad_norm": 0.353515625, "learning_rate": 2.5926372870580642e-05, "loss": 0.4101, "step": 65320 }, { "epoch": 1.6581970021956822, "grad_norm": 0.3515625, "learning_rate": 2.5907703057819517e-05, "loss": 0.4284, "step": 65325 }, { "epoch": 1.658323920244698, "grad_norm": 0.353515625, "learning_rate": 2.588903933421792e-05, "loss": 0.4146, "step": 65330 }, { "epoch": 1.6584508382937138, "grad_norm": 0.388671875, "learning_rate": 2.5870381700691767e-05, "loss": 0.4264, "step": 65335 }, { "epoch": 1.6585777563427295, "grad_norm": 0.333984375, "learning_rate": 2.5851730158156542e-05, "loss": 0.41, "step": 65340 }, { "epoch": 1.6587046743917453, "grad_norm": 0.375, "learning_rate": 2.5833084707527445e-05, "loss": 0.4187, "step": 65345 }, { "epoch": 1.658831592440761, "grad_norm": 0.341796875, "learning_rate": 2.5814445349719448e-05, "loss": 0.41, "step": 65350 }, { "epoch": 1.6589585104897768, "grad_norm": 0.333984375, "learning_rate": 2.5795812085647134e-05, "loss": 0.4197, "step": 65355 }, { "epoch": 1.6590854285387926, "grad_norm": 0.3828125, "learning_rate": 2.5777184916224854e-05, "loss": 0.4487, "step": 65360 }, { "epoch": 1.6592123465878084, "grad_norm": 0.384765625, "learning_rate": 2.5758563842366575e-05, "loss": 0.4305, "step": 65365 }, { "epoch": 1.6593392646368241, "grad_norm": 0.380859375, "learning_rate": 2.573994886498612e-05, "loss": 0.4037, "step": 65370 }, { "epoch": 1.65946618268584, "grad_norm": 0.318359375, "learning_rate": 2.572133998499689e-05, "loss": 0.411, "step": 65375 }, { "epoch": 1.6595931007348557, "grad_norm": 0.375, "learning_rate": 2.5702737203311992e-05, "loss": 0.4133, "step": 65380 }, { "epoch": 1.6597200187838712, "grad_norm": 0.35546875, "learning_rate": 2.5684140520844272e-05, "loss": 0.4201, "step": 65385 }, { "epoch": 1.659846936832887, "grad_norm": 0.36328125, "learning_rate": 2.5665549938506235e-05, "loss": 0.407, "step": 65390 }, { "epoch": 1.6599738548819027, "grad_norm": 0.33984375, "learning_rate": 2.5646965457210124e-05, "loss": 0.424, "step": 65395 }, { "epoch": 1.6601007729309185, "grad_norm": 0.349609375, "learning_rate": 2.5628387077867903e-05, "loss": 0.4246, "step": 65400 }, { "epoch": 1.6602276909799343, "grad_norm": 0.33984375, "learning_rate": 2.5609814801391192e-05, "loss": 0.4162, "step": 65405 }, { "epoch": 1.66035460902895, "grad_norm": 0.35546875, "learning_rate": 2.5591248628691334e-05, "loss": 0.4491, "step": 65410 }, { "epoch": 1.6604815270779656, "grad_norm": 0.3359375, "learning_rate": 2.557268856067933e-05, "loss": 0.4194, "step": 65415 }, { "epoch": 1.6606084451269814, "grad_norm": 0.32421875, "learning_rate": 2.555413459826594e-05, "loss": 0.4016, "step": 65420 }, { "epoch": 1.6607353631759971, "grad_norm": 0.3359375, "learning_rate": 2.5535586742361598e-05, "loss": 0.3977, "step": 65425 }, { "epoch": 1.660862281225013, "grad_norm": 0.337890625, "learning_rate": 2.5517044993876422e-05, "loss": 0.4181, "step": 65430 }, { "epoch": 1.6609891992740287, "grad_norm": 0.337890625, "learning_rate": 2.5498509353720276e-05, "loss": 0.4042, "step": 65435 }, { "epoch": 1.6611161173230444, "grad_norm": 0.359375, "learning_rate": 2.5479979822802678e-05, "loss": 0.3739, "step": 65440 }, { "epoch": 1.6612430353720602, "grad_norm": 0.365234375, "learning_rate": 2.5461456402032814e-05, "loss": 0.4228, "step": 65445 }, { "epoch": 1.661369953421076, "grad_norm": 0.3359375, "learning_rate": 2.544293909231973e-05, "loss": 0.3894, "step": 65450 }, { "epoch": 1.6614968714700917, "grad_norm": 0.302734375, "learning_rate": 2.5424427894571964e-05, "loss": 0.3941, "step": 65455 }, { "epoch": 1.6616237895191075, "grad_norm": 0.34765625, "learning_rate": 2.5405922809697954e-05, "loss": 0.4144, "step": 65460 }, { "epoch": 1.6617507075681233, "grad_norm": 0.353515625, "learning_rate": 2.5387423838605657e-05, "loss": 0.4083, "step": 65465 }, { "epoch": 1.661877625617139, "grad_norm": 0.34765625, "learning_rate": 2.536893098220285e-05, "loss": 0.4012, "step": 65470 }, { "epoch": 1.6620045436661548, "grad_norm": 0.365234375, "learning_rate": 2.535044424139694e-05, "loss": 0.3995, "step": 65475 }, { "epoch": 1.6621314617151706, "grad_norm": 0.345703125, "learning_rate": 2.533196361709508e-05, "loss": 0.4086, "step": 65480 }, { "epoch": 1.6622583797641863, "grad_norm": 0.353515625, "learning_rate": 2.5313489110204117e-05, "loss": 0.4323, "step": 65485 }, { "epoch": 1.662385297813202, "grad_norm": 0.39453125, "learning_rate": 2.5295020721630572e-05, "loss": 0.4466, "step": 65490 }, { "epoch": 1.6625122158622179, "grad_norm": 0.349609375, "learning_rate": 2.527655845228069e-05, "loss": 0.4087, "step": 65495 }, { "epoch": 1.6626391339112336, "grad_norm": 0.376953125, "learning_rate": 2.525810230306039e-05, "loss": 0.415, "step": 65500 }, { "epoch": 1.6627660519602494, "grad_norm": 0.34765625, "learning_rate": 2.523965227487532e-05, "loss": 0.374, "step": 65505 }, { "epoch": 1.6628929700092652, "grad_norm": 0.345703125, "learning_rate": 2.5221208368630823e-05, "loss": 0.3925, "step": 65510 }, { "epoch": 1.6630198880582807, "grad_norm": 0.341796875, "learning_rate": 2.5202770585231923e-05, "loss": 0.4096, "step": 65515 }, { "epoch": 1.6631468061072965, "grad_norm": 0.35546875, "learning_rate": 2.5184338925583313e-05, "loss": 0.4121, "step": 65520 }, { "epoch": 1.6632737241563122, "grad_norm": 0.34375, "learning_rate": 2.516591339058952e-05, "loss": 0.425, "step": 65525 }, { "epoch": 1.663400642205328, "grad_norm": 0.37109375, "learning_rate": 2.5147493981154638e-05, "loss": 0.4426, "step": 65530 }, { "epoch": 1.6635275602543438, "grad_norm": 0.3359375, "learning_rate": 2.512908069818244e-05, "loss": 0.4139, "step": 65535 }, { "epoch": 1.6636544783033596, "grad_norm": 0.3203125, "learning_rate": 2.511067354257656e-05, "loss": 0.4071, "step": 65540 }, { "epoch": 1.6637813963523753, "grad_norm": 0.380859375, "learning_rate": 2.5092272515240184e-05, "loss": 0.4316, "step": 65545 }, { "epoch": 1.6639083144013909, "grad_norm": 0.359375, "learning_rate": 2.5073877617076232e-05, "loss": 0.4161, "step": 65550 }, { "epoch": 1.6640352324504066, "grad_norm": 0.328125, "learning_rate": 2.505548884898734e-05, "loss": 0.3968, "step": 65555 }, { "epoch": 1.6641621504994224, "grad_norm": 0.3515625, "learning_rate": 2.5037106211875847e-05, "loss": 0.4222, "step": 65560 }, { "epoch": 1.6642890685484382, "grad_norm": 0.353515625, "learning_rate": 2.5018729706643752e-05, "loss": 0.4129, "step": 65565 }, { "epoch": 1.664415986597454, "grad_norm": 0.361328125, "learning_rate": 2.5000359334192828e-05, "loss": 0.4336, "step": 65570 }, { "epoch": 1.6645429046464697, "grad_norm": 0.34765625, "learning_rate": 2.4981995095424477e-05, "loss": 0.4032, "step": 65575 }, { "epoch": 1.6646698226954855, "grad_norm": 0.306640625, "learning_rate": 2.4963636991239823e-05, "loss": 0.3985, "step": 65580 }, { "epoch": 1.6647967407445012, "grad_norm": 0.349609375, "learning_rate": 2.494528502253965e-05, "loss": 0.4127, "step": 65585 }, { "epoch": 1.664923658793517, "grad_norm": 0.326171875, "learning_rate": 2.4926939190224583e-05, "loss": 0.4002, "step": 65590 }, { "epoch": 1.6650505768425328, "grad_norm": 0.35546875, "learning_rate": 2.4908599495194775e-05, "loss": 0.4352, "step": 65595 }, { "epoch": 1.6651774948915485, "grad_norm": 0.357421875, "learning_rate": 2.4890265938350162e-05, "loss": 0.4397, "step": 65600 }, { "epoch": 1.6653044129405643, "grad_norm": 0.359375, "learning_rate": 2.4871938520590358e-05, "loss": 0.4253, "step": 65605 }, { "epoch": 1.66543133098958, "grad_norm": 0.37109375, "learning_rate": 2.4853617242814654e-05, "loss": 0.4084, "step": 65610 }, { "epoch": 1.6655582490385958, "grad_norm": 0.359375, "learning_rate": 2.483530210592214e-05, "loss": 0.4119, "step": 65615 }, { "epoch": 1.6656851670876116, "grad_norm": 0.341796875, "learning_rate": 2.4816993110811507e-05, "loss": 0.4015, "step": 65620 }, { "epoch": 1.6658120851366274, "grad_norm": 0.349609375, "learning_rate": 2.479869025838114e-05, "loss": 0.433, "step": 65625 }, { "epoch": 1.6659390031856431, "grad_norm": 0.361328125, "learning_rate": 2.4780393549529164e-05, "loss": 0.4459, "step": 65630 }, { "epoch": 1.666065921234659, "grad_norm": 0.361328125, "learning_rate": 2.4762102985153403e-05, "loss": 0.4123, "step": 65635 }, { "epoch": 1.6661928392836747, "grad_norm": 0.349609375, "learning_rate": 2.4743818566151368e-05, "loss": 0.4326, "step": 65640 }, { "epoch": 1.6663197573326904, "grad_norm": 0.361328125, "learning_rate": 2.472554029342021e-05, "loss": 0.4208, "step": 65645 }, { "epoch": 1.666446675381706, "grad_norm": 0.33984375, "learning_rate": 2.470726816785694e-05, "loss": 0.424, "step": 65650 }, { "epoch": 1.6665735934307218, "grad_norm": 0.380859375, "learning_rate": 2.4689002190358098e-05, "loss": 0.3971, "step": 65655 }, { "epoch": 1.6667005114797375, "grad_norm": 0.34765625, "learning_rate": 2.4670742361820012e-05, "loss": 0.4069, "step": 65660 }, { "epoch": 1.6668274295287533, "grad_norm": 0.349609375, "learning_rate": 2.465248868313868e-05, "loss": 0.4067, "step": 65665 }, { "epoch": 1.666954347577769, "grad_norm": 0.365234375, "learning_rate": 2.4634241155209793e-05, "loss": 0.4345, "step": 65670 }, { "epoch": 1.6670812656267848, "grad_norm": 0.38671875, "learning_rate": 2.4615999778928747e-05, "loss": 0.4539, "step": 65675 }, { "epoch": 1.6672081836758004, "grad_norm": 0.333984375, "learning_rate": 2.459776455519064e-05, "loss": 0.3933, "step": 65680 }, { "epoch": 1.6673351017248161, "grad_norm": 0.330078125, "learning_rate": 2.4579535484890295e-05, "loss": 0.4198, "step": 65685 }, { "epoch": 1.667462019773832, "grad_norm": 0.369140625, "learning_rate": 2.4561312568922125e-05, "loss": 0.4157, "step": 65690 }, { "epoch": 1.6675889378228477, "grad_norm": 0.365234375, "learning_rate": 2.4543095808180436e-05, "loss": 0.4255, "step": 65695 }, { "epoch": 1.6677158558718634, "grad_norm": 0.349609375, "learning_rate": 2.452488520355904e-05, "loss": 0.404, "step": 65700 }, { "epoch": 1.6678427739208792, "grad_norm": 0.337890625, "learning_rate": 2.4506680755951565e-05, "loss": 0.4135, "step": 65705 }, { "epoch": 1.667969691969895, "grad_norm": 0.33984375, "learning_rate": 2.4488482466251236e-05, "loss": 0.3979, "step": 65710 }, { "epoch": 1.6680966100189107, "grad_norm": 0.337890625, "learning_rate": 2.4470290335351112e-05, "loss": 0.4176, "step": 65715 }, { "epoch": 1.6682235280679265, "grad_norm": 0.341796875, "learning_rate": 2.4452104364143825e-05, "loss": 0.4163, "step": 65720 }, { "epoch": 1.6683504461169423, "grad_norm": 0.34375, "learning_rate": 2.4433924553521783e-05, "loss": 0.427, "step": 65725 }, { "epoch": 1.668477364165958, "grad_norm": 0.365234375, "learning_rate": 2.4415750904377035e-05, "loss": 0.4179, "step": 65730 }, { "epoch": 1.6686042822149738, "grad_norm": 0.36328125, "learning_rate": 2.439758341760137e-05, "loss": 0.4137, "step": 65735 }, { "epoch": 1.6687312002639896, "grad_norm": 0.33984375, "learning_rate": 2.4379422094086237e-05, "loss": 0.4049, "step": 65740 }, { "epoch": 1.6688581183130053, "grad_norm": 0.353515625, "learning_rate": 2.4361266934722847e-05, "loss": 0.4005, "step": 65745 }, { "epoch": 1.6689850363620211, "grad_norm": 0.341796875, "learning_rate": 2.434311794040203e-05, "loss": 0.4228, "step": 65750 }, { "epoch": 1.6691119544110369, "grad_norm": 0.3515625, "learning_rate": 2.432497511201436e-05, "loss": 0.4369, "step": 65755 }, { "epoch": 1.6692388724600526, "grad_norm": 0.341796875, "learning_rate": 2.4306838450450095e-05, "loss": 0.4091, "step": 65760 }, { "epoch": 1.6693657905090684, "grad_norm": 0.333984375, "learning_rate": 2.428870795659917e-05, "loss": 0.4246, "step": 65765 }, { "epoch": 1.6694927085580842, "grad_norm": 0.357421875, "learning_rate": 2.4270583631351302e-05, "loss": 0.4083, "step": 65770 }, { "epoch": 1.6696196266071, "grad_norm": 0.326171875, "learning_rate": 2.4252465475595823e-05, "loss": 0.3953, "step": 65775 }, { "epoch": 1.6697465446561155, "grad_norm": 0.337890625, "learning_rate": 2.4234353490221732e-05, "loss": 0.4242, "step": 65780 }, { "epoch": 1.6698734627051313, "grad_norm": 0.375, "learning_rate": 2.4216247676117835e-05, "loss": 0.4486, "step": 65785 }, { "epoch": 1.670000380754147, "grad_norm": 0.359375, "learning_rate": 2.4198148034172588e-05, "loss": 0.4283, "step": 65790 }, { "epoch": 1.6701272988031628, "grad_norm": 0.333984375, "learning_rate": 2.418005456527408e-05, "loss": 0.4369, "step": 65795 }, { "epoch": 1.6702542168521786, "grad_norm": 0.33984375, "learning_rate": 2.416196727031019e-05, "loss": 0.4201, "step": 65800 }, { "epoch": 1.6703811349011943, "grad_norm": 0.33203125, "learning_rate": 2.4143886150168434e-05, "loss": 0.3827, "step": 65805 }, { "epoch": 1.67050805295021, "grad_norm": 0.3515625, "learning_rate": 2.4125811205736055e-05, "loss": 0.4224, "step": 65810 }, { "epoch": 1.6706349709992256, "grad_norm": 0.373046875, "learning_rate": 2.410774243789998e-05, "loss": 0.4254, "step": 65815 }, { "epoch": 1.6707618890482414, "grad_norm": 0.375, "learning_rate": 2.408967984754683e-05, "loss": 0.4319, "step": 65820 }, { "epoch": 1.6708888070972572, "grad_norm": 0.37109375, "learning_rate": 2.4071623435562944e-05, "loss": 0.4335, "step": 65825 }, { "epoch": 1.671015725146273, "grad_norm": 0.3671875, "learning_rate": 2.405357320283433e-05, "loss": 0.4015, "step": 65830 }, { "epoch": 1.6711426431952887, "grad_norm": 0.349609375, "learning_rate": 2.403552915024667e-05, "loss": 0.4105, "step": 65835 }, { "epoch": 1.6712695612443045, "grad_norm": 0.36328125, "learning_rate": 2.4017491278685448e-05, "loss": 0.45, "step": 65840 }, { "epoch": 1.6713964792933202, "grad_norm": 0.345703125, "learning_rate": 2.399945958903576e-05, "loss": 0.3795, "step": 65845 }, { "epoch": 1.671523397342336, "grad_norm": 0.353515625, "learning_rate": 2.3981434082182376e-05, "loss": 0.4335, "step": 65850 }, { "epoch": 1.6716503153913518, "grad_norm": 0.373046875, "learning_rate": 2.3963414759009815e-05, "loss": 0.4246, "step": 65855 }, { "epoch": 1.6717772334403675, "grad_norm": 0.376953125, "learning_rate": 2.394540162040231e-05, "loss": 0.4259, "step": 65860 }, { "epoch": 1.6719041514893833, "grad_norm": 0.345703125, "learning_rate": 2.3927394667243727e-05, "loss": 0.4113, "step": 65865 }, { "epoch": 1.672031069538399, "grad_norm": 0.3203125, "learning_rate": 2.3909393900417672e-05, "loss": 0.4072, "step": 65870 }, { "epoch": 1.6721579875874149, "grad_norm": 0.373046875, "learning_rate": 2.389139932080744e-05, "loss": 0.4224, "step": 65875 }, { "epoch": 1.6722849056364306, "grad_norm": 0.341796875, "learning_rate": 2.3873410929295992e-05, "loss": 0.4111, "step": 65880 }, { "epoch": 1.6724118236854464, "grad_norm": 0.35546875, "learning_rate": 2.385542872676604e-05, "loss": 0.438, "step": 65885 }, { "epoch": 1.6725387417344622, "grad_norm": 0.33984375, "learning_rate": 2.3837452714099958e-05, "loss": 0.406, "step": 65890 }, { "epoch": 1.672665659783478, "grad_norm": 0.384765625, "learning_rate": 2.381948289217981e-05, "loss": 0.397, "step": 65895 }, { "epoch": 1.6727925778324937, "grad_norm": 0.376953125, "learning_rate": 2.3801519261887335e-05, "loss": 0.4276, "step": 65900 }, { "epoch": 1.6729194958815095, "grad_norm": 0.337890625, "learning_rate": 2.378356182410407e-05, "loss": 0.3846, "step": 65905 }, { "epoch": 1.6730464139305252, "grad_norm": 0.353515625, "learning_rate": 2.376561057971117e-05, "loss": 0.4404, "step": 65910 }, { "epoch": 1.6731733319795408, "grad_norm": 0.361328125, "learning_rate": 2.3747665529589448e-05, "loss": 0.3949, "step": 65915 }, { "epoch": 1.6733002500285565, "grad_norm": 0.3359375, "learning_rate": 2.3729726674619505e-05, "loss": 0.4383, "step": 65920 }, { "epoch": 1.6734271680775723, "grad_norm": 0.361328125, "learning_rate": 2.3711794015681577e-05, "loss": 0.4162, "step": 65925 }, { "epoch": 1.673554086126588, "grad_norm": 0.349609375, "learning_rate": 2.3693867553655605e-05, "loss": 0.4102, "step": 65930 }, { "epoch": 1.6736810041756038, "grad_norm": 0.361328125, "learning_rate": 2.3675947289421194e-05, "loss": 0.4343, "step": 65935 }, { "epoch": 1.6738079222246196, "grad_norm": 0.349609375, "learning_rate": 2.3658033223857774e-05, "loss": 0.4372, "step": 65940 }, { "epoch": 1.6739348402736351, "grad_norm": 0.34765625, "learning_rate": 2.364012535784433e-05, "loss": 0.3887, "step": 65945 }, { "epoch": 1.674061758322651, "grad_norm": 0.37890625, "learning_rate": 2.3622223692259623e-05, "loss": 0.4277, "step": 65950 }, { "epoch": 1.6741886763716667, "grad_norm": 0.341796875, "learning_rate": 2.3604328227982035e-05, "loss": 0.3861, "step": 65955 }, { "epoch": 1.6743155944206825, "grad_norm": 0.3515625, "learning_rate": 2.3586438965889677e-05, "loss": 0.4275, "step": 65960 }, { "epoch": 1.6744425124696982, "grad_norm": 0.359375, "learning_rate": 2.3568555906860446e-05, "loss": 0.4317, "step": 65965 }, { "epoch": 1.674569430518714, "grad_norm": 0.35546875, "learning_rate": 2.3550679051771813e-05, "loss": 0.4377, "step": 65970 }, { "epoch": 1.6746963485677298, "grad_norm": 0.361328125, "learning_rate": 2.353280840150099e-05, "loss": 0.399, "step": 65975 }, { "epoch": 1.6748232666167455, "grad_norm": 0.34765625, "learning_rate": 2.3514943956924893e-05, "loss": 0.4219, "step": 65980 }, { "epoch": 1.6749501846657613, "grad_norm": 0.3515625, "learning_rate": 2.34970857189201e-05, "loss": 0.4115, "step": 65985 }, { "epoch": 1.675077102714777, "grad_norm": 0.3515625, "learning_rate": 2.3479233688362914e-05, "loss": 0.443, "step": 65990 }, { "epoch": 1.6752040207637928, "grad_norm": 0.326171875, "learning_rate": 2.3461387866129332e-05, "loss": 0.3967, "step": 65995 }, { "epoch": 1.6753309388128086, "grad_norm": 0.333984375, "learning_rate": 2.3443548253095055e-05, "loss": 0.4281, "step": 66000 }, { "epoch": 1.6754578568618244, "grad_norm": 0.365234375, "learning_rate": 2.3425714850135436e-05, "loss": 0.4741, "step": 66005 }, { "epoch": 1.6755847749108401, "grad_norm": 0.392578125, "learning_rate": 2.3407887658125553e-05, "loss": 0.4221, "step": 66010 }, { "epoch": 1.675711692959856, "grad_norm": 0.353515625, "learning_rate": 2.3390066677940223e-05, "loss": 0.4226, "step": 66015 }, { "epoch": 1.6758386110088717, "grad_norm": 0.34765625, "learning_rate": 2.337225191045388e-05, "loss": 0.4387, "step": 66020 }, { "epoch": 1.6759655290578874, "grad_norm": 0.3515625, "learning_rate": 2.3354443356540663e-05, "loss": 0.4354, "step": 66025 }, { "epoch": 1.6760924471069032, "grad_norm": 0.3515625, "learning_rate": 2.3336641017074502e-05, "loss": 0.4275, "step": 66030 }, { "epoch": 1.676219365155919, "grad_norm": 0.3203125, "learning_rate": 2.3318844892928917e-05, "loss": 0.3838, "step": 66035 }, { "epoch": 1.6763462832049347, "grad_norm": 0.33203125, "learning_rate": 2.3301054984977142e-05, "loss": 0.4098, "step": 66040 }, { "epoch": 1.6764732012539503, "grad_norm": 0.36328125, "learning_rate": 2.3283271294092125e-05, "loss": 0.415, "step": 66045 }, { "epoch": 1.676600119302966, "grad_norm": 0.345703125, "learning_rate": 2.326549382114651e-05, "loss": 0.4448, "step": 66050 }, { "epoch": 1.6767270373519818, "grad_norm": 0.32421875, "learning_rate": 2.3247722567012638e-05, "loss": 0.4219, "step": 66055 }, { "epoch": 1.6768539554009976, "grad_norm": 0.3515625, "learning_rate": 2.322995753256252e-05, "loss": 0.4048, "step": 66060 }, { "epoch": 1.6769808734500133, "grad_norm": 0.326171875, "learning_rate": 2.321219871866789e-05, "loss": 0.4224, "step": 66065 }, { "epoch": 1.677107791499029, "grad_norm": 0.37890625, "learning_rate": 2.3194446126200183e-05, "loss": 0.4003, "step": 66070 }, { "epoch": 1.6772347095480449, "grad_norm": 0.3359375, "learning_rate": 2.317669975603047e-05, "loss": 0.3958, "step": 66075 }, { "epoch": 1.6773616275970604, "grad_norm": 0.3515625, "learning_rate": 2.315895960902959e-05, "loss": 0.4228, "step": 66080 }, { "epoch": 1.6774885456460762, "grad_norm": 0.345703125, "learning_rate": 2.3141225686068003e-05, "loss": 0.4016, "step": 66085 }, { "epoch": 1.677615463695092, "grad_norm": 0.376953125, "learning_rate": 2.312349798801596e-05, "loss": 0.4336, "step": 66090 }, { "epoch": 1.6777423817441077, "grad_norm": 0.3515625, "learning_rate": 2.310577651574334e-05, "loss": 0.3755, "step": 66095 }, { "epoch": 1.6778692997931235, "grad_norm": 0.361328125, "learning_rate": 2.3088061270119684e-05, "loss": 0.4312, "step": 66100 }, { "epoch": 1.6779962178421393, "grad_norm": 0.33984375, "learning_rate": 2.3070352252014346e-05, "loss": 0.4236, "step": 66105 }, { "epoch": 1.678123135891155, "grad_norm": 0.41015625, "learning_rate": 2.3052649462296262e-05, "loss": 0.4707, "step": 66110 }, { "epoch": 1.6782500539401708, "grad_norm": 0.375, "learning_rate": 2.3034952901834093e-05, "loss": 0.4362, "step": 66115 }, { "epoch": 1.6783769719891866, "grad_norm": 0.34375, "learning_rate": 2.301726257149622e-05, "loss": 0.3998, "step": 66120 }, { "epoch": 1.6785038900382023, "grad_norm": 0.33984375, "learning_rate": 2.299957847215069e-05, "loss": 0.4213, "step": 66125 }, { "epoch": 1.678630808087218, "grad_norm": 0.341796875, "learning_rate": 2.2981900604665248e-05, "loss": 0.4097, "step": 66130 }, { "epoch": 1.6787577261362339, "grad_norm": 0.34765625, "learning_rate": 2.2964228969907344e-05, "loss": 0.4032, "step": 66135 }, { "epoch": 1.6788846441852496, "grad_norm": 0.3359375, "learning_rate": 2.2946563568744132e-05, "loss": 0.3946, "step": 66140 }, { "epoch": 1.6790115622342654, "grad_norm": 0.37109375, "learning_rate": 2.2928904402042414e-05, "loss": 0.4075, "step": 66145 }, { "epoch": 1.6791384802832812, "grad_norm": 0.37109375, "learning_rate": 2.291125147066872e-05, "loss": 0.4492, "step": 66150 }, { "epoch": 1.679265398332297, "grad_norm": 0.353515625, "learning_rate": 2.2893604775489334e-05, "loss": 0.427, "step": 66155 }, { "epoch": 1.6793923163813127, "grad_norm": 0.33984375, "learning_rate": 2.2875964317370117e-05, "loss": 0.4302, "step": 66160 }, { "epoch": 1.6795192344303285, "grad_norm": 0.345703125, "learning_rate": 2.285833009717669e-05, "loss": 0.4179, "step": 66165 }, { "epoch": 1.6796461524793442, "grad_norm": 0.33203125, "learning_rate": 2.2840702115774366e-05, "loss": 0.4167, "step": 66170 }, { "epoch": 1.67977307052836, "grad_norm": 0.337890625, "learning_rate": 2.2823080374028137e-05, "loss": 0.4281, "step": 66175 }, { "epoch": 1.6798999885773755, "grad_norm": 0.337890625, "learning_rate": 2.280546487280267e-05, "loss": 0.4132, "step": 66180 }, { "epoch": 1.6800269066263913, "grad_norm": 0.345703125, "learning_rate": 2.2787855612962398e-05, "loss": 0.4193, "step": 66185 }, { "epoch": 1.680153824675407, "grad_norm": 0.3515625, "learning_rate": 2.2770252595371374e-05, "loss": 0.4236, "step": 66190 }, { "epoch": 1.6802807427244228, "grad_norm": 0.357421875, "learning_rate": 2.275265582089338e-05, "loss": 0.4174, "step": 66195 }, { "epoch": 1.6804076607734386, "grad_norm": 0.322265625, "learning_rate": 2.273506529039187e-05, "loss": 0.4203, "step": 66200 }, { "epoch": 1.6805345788224544, "grad_norm": 0.37890625, "learning_rate": 2.271748100473002e-05, "loss": 0.439, "step": 66205 }, { "epoch": 1.68066149687147, "grad_norm": 0.40234375, "learning_rate": 2.269990296477066e-05, "loss": 0.4274, "step": 66210 }, { "epoch": 1.6807884149204857, "grad_norm": 0.322265625, "learning_rate": 2.2682331171376334e-05, "loss": 0.4095, "step": 66215 }, { "epoch": 1.6809153329695015, "grad_norm": 0.376953125, "learning_rate": 2.2664765625409332e-05, "loss": 0.4219, "step": 66220 }, { "epoch": 1.6810422510185172, "grad_norm": 0.357421875, "learning_rate": 2.2647206327731564e-05, "loss": 0.413, "step": 66225 }, { "epoch": 1.681169169067533, "grad_norm": 0.361328125, "learning_rate": 2.2629653279204647e-05, "loss": 0.4216, "step": 66230 }, { "epoch": 1.6812960871165488, "grad_norm": 0.36328125, "learning_rate": 2.26121064806899e-05, "loss": 0.4241, "step": 66235 }, { "epoch": 1.6814230051655645, "grad_norm": 0.35546875, "learning_rate": 2.2594565933048343e-05, "loss": 0.4149, "step": 66240 }, { "epoch": 1.6815499232145803, "grad_norm": 0.3515625, "learning_rate": 2.2577031637140696e-05, "loss": 0.4156, "step": 66245 }, { "epoch": 1.681676841263596, "grad_norm": 0.349609375, "learning_rate": 2.2559503593827343e-05, "loss": 0.4226, "step": 66250 }, { "epoch": 1.6818037593126118, "grad_norm": 0.353515625, "learning_rate": 2.2541981803968362e-05, "loss": 0.4303, "step": 66255 }, { "epoch": 1.6819306773616276, "grad_norm": 0.349609375, "learning_rate": 2.252446626842358e-05, "loss": 0.4209, "step": 66260 }, { "epoch": 1.6820575954106434, "grad_norm": 0.35546875, "learning_rate": 2.250695698805246e-05, "loss": 0.4176, "step": 66265 }, { "epoch": 1.6821845134596591, "grad_norm": 0.359375, "learning_rate": 2.2489453963714182e-05, "loss": 0.4105, "step": 66270 }, { "epoch": 1.682311431508675, "grad_norm": 0.330078125, "learning_rate": 2.247195719626756e-05, "loss": 0.4268, "step": 66275 }, { "epoch": 1.6824383495576907, "grad_norm": 0.365234375, "learning_rate": 2.2454466686571253e-05, "loss": 0.4249, "step": 66280 }, { "epoch": 1.6825652676067064, "grad_norm": 0.341796875, "learning_rate": 2.2436982435483446e-05, "loss": 0.4215, "step": 66285 }, { "epoch": 1.6826921856557222, "grad_norm": 0.361328125, "learning_rate": 2.2419504443862102e-05, "loss": 0.4119, "step": 66290 }, { "epoch": 1.682819103704738, "grad_norm": 0.36328125, "learning_rate": 2.2402032712564848e-05, "loss": 0.4136, "step": 66295 }, { "epoch": 1.6829460217537537, "grad_norm": 0.34765625, "learning_rate": 2.238456724244902e-05, "loss": 0.412, "step": 66300 }, { "epoch": 1.6830729398027695, "grad_norm": 0.33984375, "learning_rate": 2.2367108034371645e-05, "loss": 0.4139, "step": 66305 }, { "epoch": 1.683199857851785, "grad_norm": 0.35546875, "learning_rate": 2.234965508918944e-05, "loss": 0.4161, "step": 66310 }, { "epoch": 1.6833267759008008, "grad_norm": 0.3203125, "learning_rate": 2.2332208407758816e-05, "loss": 0.3928, "step": 66315 }, { "epoch": 1.6834536939498166, "grad_norm": 0.345703125, "learning_rate": 2.231476799093586e-05, "loss": 0.4111, "step": 66320 }, { "epoch": 1.6835806119988324, "grad_norm": 0.36328125, "learning_rate": 2.2297333839576385e-05, "loss": 0.4352, "step": 66325 }, { "epoch": 1.6837075300478481, "grad_norm": 0.3671875, "learning_rate": 2.2279905954535876e-05, "loss": 0.4224, "step": 66330 }, { "epoch": 1.683834448096864, "grad_norm": 0.34765625, "learning_rate": 2.2262484336669462e-05, "loss": 0.4187, "step": 66335 }, { "epoch": 1.6839613661458797, "grad_norm": 0.35546875, "learning_rate": 2.2245068986832094e-05, "loss": 0.4289, "step": 66340 }, { "epoch": 1.6840882841948952, "grad_norm": 0.3515625, "learning_rate": 2.2227659905878286e-05, "loss": 0.408, "step": 66345 }, { "epoch": 1.684215202243911, "grad_norm": 0.361328125, "learning_rate": 2.2210257094662294e-05, "loss": 0.4156, "step": 66350 }, { "epoch": 1.6843421202929267, "grad_norm": 0.3515625, "learning_rate": 2.219286055403811e-05, "loss": 0.3967, "step": 66355 }, { "epoch": 1.6844690383419425, "grad_norm": 0.369140625, "learning_rate": 2.2175470284859358e-05, "loss": 0.4113, "step": 66360 }, { "epoch": 1.6845959563909583, "grad_norm": 0.376953125, "learning_rate": 2.2158086287979353e-05, "loss": 0.4565, "step": 66365 }, { "epoch": 1.684722874439974, "grad_norm": 0.369140625, "learning_rate": 2.214070856425113e-05, "loss": 0.4445, "step": 66370 }, { "epoch": 1.6848497924889898, "grad_norm": 0.35546875, "learning_rate": 2.2123337114527406e-05, "loss": 0.423, "step": 66375 }, { "epoch": 1.6849767105380056, "grad_norm": 0.369140625, "learning_rate": 2.2105971939660583e-05, "loss": 0.4164, "step": 66380 }, { "epoch": 1.6851036285870213, "grad_norm": 0.35546875, "learning_rate": 2.2088613040502777e-05, "loss": 0.4074, "step": 66385 }, { "epoch": 1.685230546636037, "grad_norm": 0.34765625, "learning_rate": 2.207126041790578e-05, "loss": 0.4303, "step": 66390 }, { "epoch": 1.6853574646850529, "grad_norm": 0.3671875, "learning_rate": 2.205391407272107e-05, "loss": 0.4281, "step": 66395 }, { "epoch": 1.6854843827340686, "grad_norm": 0.3125, "learning_rate": 2.20365740057998e-05, "loss": 0.3938, "step": 66400 }, { "epoch": 1.6856113007830844, "grad_norm": 0.353515625, "learning_rate": 2.201924021799291e-05, "loss": 0.4368, "step": 66405 }, { "epoch": 1.6857382188321002, "grad_norm": 0.380859375, "learning_rate": 2.2001912710150922e-05, "loss": 0.4433, "step": 66410 }, { "epoch": 1.685865136881116, "grad_norm": 0.33203125, "learning_rate": 2.198459148312408e-05, "loss": 0.4256, "step": 66415 }, { "epoch": 1.6859920549301317, "grad_norm": 0.3515625, "learning_rate": 2.1967276537762346e-05, "loss": 0.4256, "step": 66420 }, { "epoch": 1.6861189729791475, "grad_norm": 0.345703125, "learning_rate": 2.1949967874915318e-05, "loss": 0.394, "step": 66425 }, { "epoch": 1.6862458910281632, "grad_norm": 0.337890625, "learning_rate": 2.1932665495432388e-05, "loss": 0.4077, "step": 66430 }, { "epoch": 1.686372809077179, "grad_norm": 0.345703125, "learning_rate": 2.1915369400162558e-05, "loss": 0.406, "step": 66435 }, { "epoch": 1.6864997271261948, "grad_norm": 0.3125, "learning_rate": 2.1898079589954516e-05, "loss": 0.3887, "step": 66440 }, { "epoch": 1.6866266451752103, "grad_norm": 0.328125, "learning_rate": 2.1880796065656696e-05, "loss": 0.4197, "step": 66445 }, { "epoch": 1.686753563224226, "grad_norm": 0.337890625, "learning_rate": 2.186351882811716e-05, "loss": 0.3922, "step": 66450 }, { "epoch": 1.6868804812732419, "grad_norm": 0.337890625, "learning_rate": 2.1846247878183725e-05, "loss": 0.4149, "step": 66455 }, { "epoch": 1.6870073993222576, "grad_norm": 0.373046875, "learning_rate": 2.1828983216703866e-05, "loss": 0.437, "step": 66460 }, { "epoch": 1.6871343173712734, "grad_norm": 0.333984375, "learning_rate": 2.1811724844524685e-05, "loss": 0.4129, "step": 66465 }, { "epoch": 1.6872612354202892, "grad_norm": 0.337890625, "learning_rate": 2.179447276249316e-05, "loss": 0.4144, "step": 66470 }, { "epoch": 1.6873881534693047, "grad_norm": 0.34375, "learning_rate": 2.1777226971455775e-05, "loss": 0.4093, "step": 66475 }, { "epoch": 1.6875150715183205, "grad_norm": 0.375, "learning_rate": 2.1759987472258788e-05, "loss": 0.4364, "step": 66480 }, { "epoch": 1.6876419895673362, "grad_norm": 0.314453125, "learning_rate": 2.1742754265748143e-05, "loss": 0.3961, "step": 66485 }, { "epoch": 1.687768907616352, "grad_norm": 0.337890625, "learning_rate": 2.1725527352769457e-05, "loss": 0.4206, "step": 66490 }, { "epoch": 1.6878958256653678, "grad_norm": 0.32421875, "learning_rate": 2.1708306734168023e-05, "loss": 0.4316, "step": 66495 }, { "epoch": 1.6880227437143835, "grad_norm": 0.38671875, "learning_rate": 2.169109241078887e-05, "loss": 0.4158, "step": 66500 }, { "epoch": 1.6881496617633993, "grad_norm": 0.3515625, "learning_rate": 2.1673884383476713e-05, "loss": 0.3914, "step": 66505 }, { "epoch": 1.688276579812415, "grad_norm": 0.333984375, "learning_rate": 2.1656682653075934e-05, "loss": 0.4246, "step": 66510 }, { "epoch": 1.6884034978614308, "grad_norm": 0.341796875, "learning_rate": 2.1639487220430624e-05, "loss": 0.4153, "step": 66515 }, { "epoch": 1.6885304159104466, "grad_norm": 0.353515625, "learning_rate": 2.162229808638454e-05, "loss": 0.3932, "step": 66520 }, { "epoch": 1.6886573339594624, "grad_norm": 0.3359375, "learning_rate": 2.1605115251781152e-05, "loss": 0.4054, "step": 66525 }, { "epoch": 1.6887842520084781, "grad_norm": 0.337890625, "learning_rate": 2.1587938717463575e-05, "loss": 0.4225, "step": 66530 }, { "epoch": 1.688911170057494, "grad_norm": 0.369140625, "learning_rate": 2.1570768484274736e-05, "loss": 0.4307, "step": 66535 }, { "epoch": 1.6890380881065097, "grad_norm": 0.318359375, "learning_rate": 2.1553604553057126e-05, "loss": 0.4147, "step": 66540 }, { "epoch": 1.6891650061555255, "grad_norm": 0.33203125, "learning_rate": 2.1536446924652968e-05, "loss": 0.4108, "step": 66545 }, { "epoch": 1.6892919242045412, "grad_norm": 0.34375, "learning_rate": 2.1519295599904192e-05, "loss": 0.4049, "step": 66550 }, { "epoch": 1.689418842253557, "grad_norm": 0.3359375, "learning_rate": 2.1502150579652404e-05, "loss": 0.3984, "step": 66555 }, { "epoch": 1.6895457603025728, "grad_norm": 0.35546875, "learning_rate": 2.148501186473889e-05, "loss": 0.4405, "step": 66560 }, { "epoch": 1.6896726783515885, "grad_norm": 0.35546875, "learning_rate": 2.1467879456004667e-05, "loss": 0.4184, "step": 66565 }, { "epoch": 1.6897995964006043, "grad_norm": 0.365234375, "learning_rate": 2.1450753354290385e-05, "loss": 0.4301, "step": 66570 }, { "epoch": 1.6899265144496198, "grad_norm": 0.373046875, "learning_rate": 2.1433633560436437e-05, "loss": 0.4174, "step": 66575 }, { "epoch": 1.6900534324986356, "grad_norm": 0.3515625, "learning_rate": 2.141652007528285e-05, "loss": 0.4147, "step": 66580 }, { "epoch": 1.6901803505476514, "grad_norm": 0.349609375, "learning_rate": 2.139941289966942e-05, "loss": 0.4073, "step": 66585 }, { "epoch": 1.6903072685966671, "grad_norm": 0.333984375, "learning_rate": 2.138231203443558e-05, "loss": 0.4212, "step": 66590 }, { "epoch": 1.690434186645683, "grad_norm": 0.35546875, "learning_rate": 2.136521748042043e-05, "loss": 0.4339, "step": 66595 }, { "epoch": 1.6905611046946987, "grad_norm": 0.3828125, "learning_rate": 2.1348129238462847e-05, "loss": 0.4035, "step": 66600 }, { "epoch": 1.6906880227437144, "grad_norm": 0.34765625, "learning_rate": 2.13310473094013e-05, "loss": 0.405, "step": 66605 }, { "epoch": 1.69081494079273, "grad_norm": 0.380859375, "learning_rate": 2.1313971694074023e-05, "loss": 0.4451, "step": 66610 }, { "epoch": 1.6909418588417457, "grad_norm": 0.341796875, "learning_rate": 2.1296902393318893e-05, "loss": 0.4267, "step": 66615 }, { "epoch": 1.6910687768907615, "grad_norm": 0.349609375, "learning_rate": 2.1279839407973493e-05, "loss": 0.3876, "step": 66620 }, { "epoch": 1.6911956949397773, "grad_norm": 0.3203125, "learning_rate": 2.12627827388751e-05, "loss": 0.3959, "step": 66625 }, { "epoch": 1.691322612988793, "grad_norm": 0.345703125, "learning_rate": 2.1245732386860665e-05, "loss": 0.3726, "step": 66630 }, { "epoch": 1.6914495310378088, "grad_norm": 0.37109375, "learning_rate": 2.1228688352766864e-05, "loss": 0.437, "step": 66635 }, { "epoch": 1.6915764490868246, "grad_norm": 0.322265625, "learning_rate": 2.1211650637430032e-05, "loss": 0.4113, "step": 66640 }, { "epoch": 1.6917033671358404, "grad_norm": 0.33984375, "learning_rate": 2.1194619241686202e-05, "loss": 0.4077, "step": 66645 }, { "epoch": 1.6918302851848561, "grad_norm": 0.396484375, "learning_rate": 2.1177594166371097e-05, "loss": 0.4211, "step": 66650 }, { "epoch": 1.6919572032338719, "grad_norm": 0.369140625, "learning_rate": 2.116057541232009e-05, "loss": 0.4286, "step": 66655 }, { "epoch": 1.6920841212828877, "grad_norm": 0.34765625, "learning_rate": 2.1143562980368362e-05, "loss": 0.3759, "step": 66660 }, { "epoch": 1.6922110393319034, "grad_norm": 0.353515625, "learning_rate": 2.1126556871350675e-05, "loss": 0.4148, "step": 66665 }, { "epoch": 1.6923379573809192, "grad_norm": 0.3515625, "learning_rate": 2.1109557086101463e-05, "loss": 0.4484, "step": 66670 }, { "epoch": 1.692464875429935, "grad_norm": 0.330078125, "learning_rate": 2.1092563625454994e-05, "loss": 0.401, "step": 66675 }, { "epoch": 1.6925917934789507, "grad_norm": 0.318359375, "learning_rate": 2.1075576490245065e-05, "loss": 0.3852, "step": 66680 }, { "epoch": 1.6927187115279665, "grad_norm": 0.341796875, "learning_rate": 2.1058595681305247e-05, "loss": 0.4027, "step": 66685 }, { "epoch": 1.6928456295769823, "grad_norm": 0.3359375, "learning_rate": 2.1041621199468767e-05, "loss": 0.4249, "step": 66690 }, { "epoch": 1.692972547625998, "grad_norm": 0.326171875, "learning_rate": 2.102465304556858e-05, "loss": 0.4103, "step": 66695 }, { "epoch": 1.6930994656750138, "grad_norm": 0.373046875, "learning_rate": 2.1007691220437283e-05, "loss": 0.4173, "step": 66700 }, { "epoch": 1.6932263837240296, "grad_norm": 0.35546875, "learning_rate": 2.09907357249072e-05, "loss": 0.3949, "step": 66705 }, { "epoch": 1.693353301773045, "grad_norm": 0.349609375, "learning_rate": 2.0973786559810323e-05, "loss": 0.4282, "step": 66710 }, { "epoch": 1.6934802198220609, "grad_norm": 0.318359375, "learning_rate": 2.0956843725978346e-05, "loss": 0.406, "step": 66715 }, { "epoch": 1.6936071378710766, "grad_norm": 0.349609375, "learning_rate": 2.0939907224242596e-05, "loss": 0.4417, "step": 66720 }, { "epoch": 1.6937340559200924, "grad_norm": 0.35546875, "learning_rate": 2.092297705543423e-05, "loss": 0.4172, "step": 66725 }, { "epoch": 1.6938609739691082, "grad_norm": 0.359375, "learning_rate": 2.0906053220383967e-05, "loss": 0.3792, "step": 66730 }, { "epoch": 1.693987892018124, "grad_norm": 0.35546875, "learning_rate": 2.0889135719922256e-05, "loss": 0.4218, "step": 66735 }, { "epoch": 1.6941148100671395, "grad_norm": 0.345703125, "learning_rate": 2.0872224554879217e-05, "loss": 0.4096, "step": 66740 }, { "epoch": 1.6942417281161553, "grad_norm": 0.357421875, "learning_rate": 2.085531972608464e-05, "loss": 0.3983, "step": 66745 }, { "epoch": 1.694368646165171, "grad_norm": 0.376953125, "learning_rate": 2.0838421234368126e-05, "loss": 0.4227, "step": 66750 }, { "epoch": 1.6944955642141868, "grad_norm": 0.3515625, "learning_rate": 2.0821529080558823e-05, "loss": 0.4005, "step": 66755 }, { "epoch": 1.6946224822632026, "grad_norm": 0.35546875, "learning_rate": 2.0804643265485643e-05, "loss": 0.3879, "step": 66760 }, { "epoch": 1.6947494003122183, "grad_norm": 0.357421875, "learning_rate": 2.0787763789977153e-05, "loss": 0.4082, "step": 66765 }, { "epoch": 1.694876318361234, "grad_norm": 0.373046875, "learning_rate": 2.0770890654861606e-05, "loss": 0.4055, "step": 66770 }, { "epoch": 1.6950032364102499, "grad_norm": 0.337890625, "learning_rate": 2.075402386096699e-05, "loss": 0.4091, "step": 66775 }, { "epoch": 1.6951301544592656, "grad_norm": 0.3515625, "learning_rate": 2.073716340912091e-05, "loss": 0.4234, "step": 66780 }, { "epoch": 1.6952570725082814, "grad_norm": 0.337890625, "learning_rate": 2.0720309300150757e-05, "loss": 0.4264, "step": 66785 }, { "epoch": 1.6953839905572972, "grad_norm": 0.34375, "learning_rate": 2.070346153488353e-05, "loss": 0.435, "step": 66790 }, { "epoch": 1.695510908606313, "grad_norm": 0.35546875, "learning_rate": 2.068662011414594e-05, "loss": 0.4253, "step": 66795 }, { "epoch": 1.6956378266553287, "grad_norm": 0.349609375, "learning_rate": 2.066978503876439e-05, "loss": 0.4197, "step": 66800 }, { "epoch": 1.6957647447043445, "grad_norm": 0.330078125, "learning_rate": 2.065295630956499e-05, "loss": 0.3815, "step": 66805 }, { "epoch": 1.6958916627533602, "grad_norm": 0.330078125, "learning_rate": 2.063613392737347e-05, "loss": 0.4086, "step": 66810 }, { "epoch": 1.696018580802376, "grad_norm": 0.375, "learning_rate": 2.0619317893015348e-05, "loss": 0.4345, "step": 66815 }, { "epoch": 1.6961454988513918, "grad_norm": 0.3359375, "learning_rate": 2.0602508207315756e-05, "loss": 0.433, "step": 66820 }, { "epoch": 1.6962724169004075, "grad_norm": 0.3671875, "learning_rate": 2.05857048710995e-05, "loss": 0.4175, "step": 66825 }, { "epoch": 1.6963993349494233, "grad_norm": 0.328125, "learning_rate": 2.0568907885191193e-05, "loss": 0.4277, "step": 66830 }, { "epoch": 1.696526252998439, "grad_norm": 0.341796875, "learning_rate": 2.0552117250415024e-05, "loss": 0.4048, "step": 66835 }, { "epoch": 1.6966531710474546, "grad_norm": 0.353515625, "learning_rate": 2.053533296759488e-05, "loss": 0.4171, "step": 66840 }, { "epoch": 1.6967800890964704, "grad_norm": 0.3515625, "learning_rate": 2.0518555037554358e-05, "loss": 0.4133, "step": 66845 }, { "epoch": 1.6969070071454861, "grad_norm": 0.341796875, "learning_rate": 2.0501783461116796e-05, "loss": 0.3822, "step": 66850 }, { "epoch": 1.697033925194502, "grad_norm": 0.3515625, "learning_rate": 2.0485018239105133e-05, "loss": 0.4003, "step": 66855 }, { "epoch": 1.6971608432435177, "grad_norm": 0.357421875, "learning_rate": 2.0468259372342033e-05, "loss": 0.4374, "step": 66860 }, { "epoch": 1.6972877612925334, "grad_norm": 0.357421875, "learning_rate": 2.0451506861649834e-05, "loss": 0.4377, "step": 66865 }, { "epoch": 1.6974146793415492, "grad_norm": 0.375, "learning_rate": 2.0434760707850618e-05, "loss": 0.4189, "step": 66870 }, { "epoch": 1.6975415973905648, "grad_norm": 0.345703125, "learning_rate": 2.0418020911766053e-05, "loss": 0.4274, "step": 66875 }, { "epoch": 1.6976685154395805, "grad_norm": 0.365234375, "learning_rate": 2.0401287474217604e-05, "loss": 0.4362, "step": 66880 }, { "epoch": 1.6977954334885963, "grad_norm": 0.36328125, "learning_rate": 2.0384560396026346e-05, "loss": 0.4356, "step": 66885 }, { "epoch": 1.697922351537612, "grad_norm": 0.337890625, "learning_rate": 2.0367839678013088e-05, "loss": 0.4446, "step": 66890 }, { "epoch": 1.6980492695866278, "grad_norm": 0.314453125, "learning_rate": 2.035112532099829e-05, "loss": 0.386, "step": 66895 }, { "epoch": 1.6981761876356436, "grad_norm": 0.3203125, "learning_rate": 2.033441732580208e-05, "loss": 0.4206, "step": 66900 }, { "epoch": 1.6983031056846594, "grad_norm": 0.3515625, "learning_rate": 2.0317715693244424e-05, "loss": 0.4351, "step": 66905 }, { "epoch": 1.6984300237336751, "grad_norm": 0.341796875, "learning_rate": 2.0301020424144775e-05, "loss": 0.3943, "step": 66910 }, { "epoch": 1.698556941782691, "grad_norm": 0.337890625, "learning_rate": 2.0284331519322385e-05, "loss": 0.4128, "step": 66915 }, { "epoch": 1.6986838598317067, "grad_norm": 0.365234375, "learning_rate": 2.026764897959619e-05, "loss": 0.4073, "step": 66920 }, { "epoch": 1.6988107778807224, "grad_norm": 0.337890625, "learning_rate": 2.0250972805784777e-05, "loss": 0.4154, "step": 66925 }, { "epoch": 1.6989376959297382, "grad_norm": 0.34375, "learning_rate": 2.0234302998706454e-05, "loss": 0.4437, "step": 66930 }, { "epoch": 1.699064613978754, "grad_norm": 0.34765625, "learning_rate": 2.02176395591792e-05, "loss": 0.3954, "step": 66935 }, { "epoch": 1.6991915320277697, "grad_norm": 0.373046875, "learning_rate": 2.020098248802066e-05, "loss": 0.4071, "step": 66940 }, { "epoch": 1.6993184500767855, "grad_norm": 0.36328125, "learning_rate": 2.0184331786048202e-05, "loss": 0.4326, "step": 66945 }, { "epoch": 1.6994453681258013, "grad_norm": 0.349609375, "learning_rate": 2.0167687454078884e-05, "loss": 0.3957, "step": 66950 }, { "epoch": 1.699572286174817, "grad_norm": 0.35546875, "learning_rate": 2.0151049492929422e-05, "loss": 0.4071, "step": 66955 }, { "epoch": 1.6996992042238328, "grad_norm": 0.34375, "learning_rate": 2.0134417903416246e-05, "loss": 0.4032, "step": 66960 }, { "epoch": 1.6998261222728486, "grad_norm": 0.349609375, "learning_rate": 2.0117792686355433e-05, "loss": 0.4096, "step": 66965 }, { "epoch": 1.6999530403218641, "grad_norm": 0.2890625, "learning_rate": 2.0101173842562768e-05, "loss": 0.3812, "step": 66970 }, { "epoch": 1.7000799583708799, "grad_norm": 0.333984375, "learning_rate": 2.0084561372853797e-05, "loss": 0.4051, "step": 66975 }, { "epoch": 1.7002068764198957, "grad_norm": 0.34765625, "learning_rate": 2.0067955278043652e-05, "loss": 0.4169, "step": 66980 }, { "epoch": 1.7003337944689114, "grad_norm": 0.34375, "learning_rate": 2.005135555894718e-05, "loss": 0.4127, "step": 66985 }, { "epoch": 1.7004607125179272, "grad_norm": 0.365234375, "learning_rate": 2.003476221637893e-05, "loss": 0.4324, "step": 66990 }, { "epoch": 1.700587630566943, "grad_norm": 0.353515625, "learning_rate": 2.00181752511531e-05, "loss": 0.4373, "step": 66995 }, { "epoch": 1.7007145486159587, "grad_norm": 0.31640625, "learning_rate": 2.0001594664083658e-05, "loss": 0.399, "step": 67000 }, { "epoch": 1.7008414666649743, "grad_norm": 0.375, "learning_rate": 1.9985020455984186e-05, "loss": 0.4264, "step": 67005 }, { "epoch": 1.70096838471399, "grad_norm": 0.33984375, "learning_rate": 1.9968452627667964e-05, "loss": 0.41, "step": 67010 }, { "epoch": 1.7010953027630058, "grad_norm": 0.369140625, "learning_rate": 1.9951891179947975e-05, "loss": 0.4284, "step": 67015 }, { "epoch": 1.7012222208120216, "grad_norm": 0.3515625, "learning_rate": 1.9935336113636873e-05, "loss": 0.4055, "step": 67020 }, { "epoch": 1.7013491388610373, "grad_norm": 0.359375, "learning_rate": 1.991878742954704e-05, "loss": 0.3989, "step": 67025 }, { "epoch": 1.701476056910053, "grad_norm": 0.35546875, "learning_rate": 1.9902245128490474e-05, "loss": 0.3708, "step": 67030 }, { "epoch": 1.7016029749590689, "grad_norm": 0.3671875, "learning_rate": 1.9885709211278894e-05, "loss": 0.4254, "step": 67035 }, { "epoch": 1.7017298930080846, "grad_norm": 0.330078125, "learning_rate": 1.986917967872377e-05, "loss": 0.4009, "step": 67040 }, { "epoch": 1.7018568110571004, "grad_norm": 0.34765625, "learning_rate": 1.9852656531636147e-05, "loss": 0.4007, "step": 67045 }, { "epoch": 1.7019837291061162, "grad_norm": 0.34375, "learning_rate": 1.983613977082683e-05, "loss": 0.4136, "step": 67050 }, { "epoch": 1.702110647155132, "grad_norm": 0.353515625, "learning_rate": 1.981962939710629e-05, "loss": 0.4365, "step": 67055 }, { "epoch": 1.7022375652041477, "grad_norm": 0.36328125, "learning_rate": 1.9803125411284676e-05, "loss": 0.4076, "step": 67060 }, { "epoch": 1.7023644832531635, "grad_norm": 0.3828125, "learning_rate": 1.9786627814171842e-05, "loss": 0.4601, "step": 67065 }, { "epoch": 1.7024914013021792, "grad_norm": 0.3671875, "learning_rate": 1.977013660657727e-05, "loss": 0.4179, "step": 67070 }, { "epoch": 1.702618319351195, "grad_norm": 0.375, "learning_rate": 1.975365178931025e-05, "loss": 0.3945, "step": 67075 }, { "epoch": 1.7027452374002108, "grad_norm": 0.34375, "learning_rate": 1.9737173363179665e-05, "loss": 0.4168, "step": 67080 }, { "epoch": 1.7028721554492265, "grad_norm": 0.353515625, "learning_rate": 1.9720701328994087e-05, "loss": 0.4127, "step": 67085 }, { "epoch": 1.7029990734982423, "grad_norm": 0.3515625, "learning_rate": 1.9704235687561803e-05, "loss": 0.429, "step": 67090 }, { "epoch": 1.703125991547258, "grad_norm": 0.357421875, "learning_rate": 1.9687776439690733e-05, "loss": 0.4164, "step": 67095 }, { "epoch": 1.7032529095962738, "grad_norm": 0.3515625, "learning_rate": 1.9671323586188598e-05, "loss": 0.4293, "step": 67100 }, { "epoch": 1.7033798276452894, "grad_norm": 0.365234375, "learning_rate": 1.96548771278627e-05, "loss": 0.4042, "step": 67105 }, { "epoch": 1.7035067456943052, "grad_norm": 0.33984375, "learning_rate": 1.963843706552006e-05, "loss": 0.4076, "step": 67110 }, { "epoch": 1.703633663743321, "grad_norm": 0.357421875, "learning_rate": 1.962200339996739e-05, "loss": 0.4296, "step": 67115 }, { "epoch": 1.7037605817923367, "grad_norm": 0.326171875, "learning_rate": 1.9605576132011086e-05, "loss": 0.4124, "step": 67120 }, { "epoch": 1.7038874998413525, "grad_norm": 0.39453125, "learning_rate": 1.9589155262457206e-05, "loss": 0.4234, "step": 67125 }, { "epoch": 1.7040144178903682, "grad_norm": 0.357421875, "learning_rate": 1.9572740792111523e-05, "loss": 0.4132, "step": 67130 }, { "epoch": 1.704141335939384, "grad_norm": 0.365234375, "learning_rate": 1.955633272177951e-05, "loss": 0.4291, "step": 67135 }, { "epoch": 1.7042682539883995, "grad_norm": 0.345703125, "learning_rate": 1.9539931052266274e-05, "loss": 0.3992, "step": 67140 }, { "epoch": 1.7043951720374153, "grad_norm": 0.361328125, "learning_rate": 1.9523535784376632e-05, "loss": 0.4132, "step": 67145 }, { "epoch": 1.704522090086431, "grad_norm": 0.34765625, "learning_rate": 1.9507146918915146e-05, "loss": 0.4253, "step": 67150 }, { "epoch": 1.7046490081354468, "grad_norm": 0.341796875, "learning_rate": 1.9490764456685987e-05, "loss": 0.4064, "step": 67155 }, { "epoch": 1.7047759261844626, "grad_norm": 0.349609375, "learning_rate": 1.947438839849298e-05, "loss": 0.4054, "step": 67160 }, { "epoch": 1.7049028442334784, "grad_norm": 0.333984375, "learning_rate": 1.945801874513978e-05, "loss": 0.3822, "step": 67165 }, { "epoch": 1.7050297622824941, "grad_norm": 0.345703125, "learning_rate": 1.9441655497429613e-05, "loss": 0.441, "step": 67170 }, { "epoch": 1.70515668033151, "grad_norm": 0.376953125, "learning_rate": 1.942529865616539e-05, "loss": 0.4121, "step": 67175 }, { "epoch": 1.7052835983805257, "grad_norm": 0.328125, "learning_rate": 1.9408948222149747e-05, "loss": 0.4121, "step": 67180 }, { "epoch": 1.7054105164295414, "grad_norm": 0.373046875, "learning_rate": 1.9392604196185006e-05, "loss": 0.4321, "step": 67185 }, { "epoch": 1.7055374344785572, "grad_norm": 0.357421875, "learning_rate": 1.9376266579073136e-05, "loss": 0.4506, "step": 67190 }, { "epoch": 1.705664352527573, "grad_norm": 0.3515625, "learning_rate": 1.9359935371615854e-05, "loss": 0.4135, "step": 67195 }, { "epoch": 1.7057912705765887, "grad_norm": 0.3515625, "learning_rate": 1.9343610574614486e-05, "loss": 0.4122, "step": 67200 }, { "epoch": 1.7059181886256045, "grad_norm": 0.36328125, "learning_rate": 1.9327292188870097e-05, "loss": 0.4149, "step": 67205 }, { "epoch": 1.7060451066746203, "grad_norm": 0.337890625, "learning_rate": 1.9310980215183442e-05, "loss": 0.42, "step": 67210 }, { "epoch": 1.706172024723636, "grad_norm": 0.337890625, "learning_rate": 1.929467465435493e-05, "loss": 0.4224, "step": 67215 }, { "epoch": 1.7062989427726518, "grad_norm": 0.34765625, "learning_rate": 1.9278375507184623e-05, "loss": 0.4119, "step": 67220 }, { "epoch": 1.7064258608216676, "grad_norm": 0.365234375, "learning_rate": 1.92620827744724e-05, "loss": 0.4116, "step": 67225 }, { "epoch": 1.7065527788706834, "grad_norm": 0.359375, "learning_rate": 1.9245796457017698e-05, "loss": 0.4062, "step": 67230 }, { "epoch": 1.706679696919699, "grad_norm": 0.37109375, "learning_rate": 1.9229516555619668e-05, "loss": 0.4109, "step": 67235 }, { "epoch": 1.7068066149687147, "grad_norm": 0.359375, "learning_rate": 1.921324307107715e-05, "loss": 0.4383, "step": 67240 }, { "epoch": 1.7069335330177304, "grad_norm": 0.361328125, "learning_rate": 1.9196976004188724e-05, "loss": 0.4213, "step": 67245 }, { "epoch": 1.7070604510667462, "grad_norm": 0.3359375, "learning_rate": 1.918071535575257e-05, "loss": 0.424, "step": 67250 }, { "epoch": 1.707187369115762, "grad_norm": 0.361328125, "learning_rate": 1.916446112656661e-05, "loss": 0.437, "step": 67255 }, { "epoch": 1.7073142871647777, "grad_norm": 0.3515625, "learning_rate": 1.914821331742843e-05, "loss": 0.4222, "step": 67260 }, { "epoch": 1.7074412052137935, "grad_norm": 0.337890625, "learning_rate": 1.9131971929135305e-05, "loss": 0.3926, "step": 67265 }, { "epoch": 1.707568123262809, "grad_norm": 0.34375, "learning_rate": 1.911573696248418e-05, "loss": 0.4008, "step": 67270 }, { "epoch": 1.7076950413118248, "grad_norm": 0.345703125, "learning_rate": 1.9099508418271704e-05, "loss": 0.4355, "step": 67275 }, { "epoch": 1.7078219593608406, "grad_norm": 0.34375, "learning_rate": 1.9083286297294205e-05, "loss": 0.404, "step": 67280 }, { "epoch": 1.7079488774098563, "grad_norm": 0.333984375, "learning_rate": 1.9067070600347677e-05, "loss": 0.3771, "step": 67285 }, { "epoch": 1.7080757954588721, "grad_norm": 0.353515625, "learning_rate": 1.9050861328227856e-05, "loss": 0.415, "step": 67290 }, { "epoch": 1.7082027135078879, "grad_norm": 0.34375, "learning_rate": 1.903465848173012e-05, "loss": 0.3865, "step": 67295 }, { "epoch": 1.7083296315569036, "grad_norm": 0.357421875, "learning_rate": 1.9018462061649515e-05, "loss": 0.4436, "step": 67300 }, { "epoch": 1.7084565496059194, "grad_norm": 0.34765625, "learning_rate": 1.9002272068780806e-05, "loss": 0.4015, "step": 67305 }, { "epoch": 1.7085834676549352, "grad_norm": 0.365234375, "learning_rate": 1.898608850391842e-05, "loss": 0.4591, "step": 67310 }, { "epoch": 1.708710385703951, "grad_norm": 0.37890625, "learning_rate": 1.896991136785645e-05, "loss": 0.434, "step": 67315 }, { "epoch": 1.7088373037529667, "grad_norm": 0.34765625, "learning_rate": 1.8953740661388784e-05, "loss": 0.4335, "step": 67320 }, { "epoch": 1.7089642218019825, "grad_norm": 0.353515625, "learning_rate": 1.8937576385308837e-05, "loss": 0.4193, "step": 67325 }, { "epoch": 1.7090911398509983, "grad_norm": 0.337890625, "learning_rate": 1.8921418540409833e-05, "loss": 0.411, "step": 67330 }, { "epoch": 1.709218057900014, "grad_norm": 0.37890625, "learning_rate": 1.890526712748458e-05, "loss": 0.395, "step": 67335 }, { "epoch": 1.7093449759490298, "grad_norm": 0.35546875, "learning_rate": 1.8889122147325662e-05, "loss": 0.4047, "step": 67340 }, { "epoch": 1.7094718939980456, "grad_norm": 0.322265625, "learning_rate": 1.88729836007253e-05, "loss": 0.4252, "step": 67345 }, { "epoch": 1.7095988120470613, "grad_norm": 0.34375, "learning_rate": 1.885685148847537e-05, "loss": 0.4365, "step": 67350 }, { "epoch": 1.709725730096077, "grad_norm": 0.337890625, "learning_rate": 1.8840725811367513e-05, "loss": 0.4184, "step": 67355 }, { "epoch": 1.7098526481450929, "grad_norm": 0.341796875, "learning_rate": 1.8824606570193008e-05, "loss": 0.4099, "step": 67360 }, { "epoch": 1.7099795661941086, "grad_norm": 0.36328125, "learning_rate": 1.8808493765742804e-05, "loss": 0.4026, "step": 67365 }, { "epoch": 1.7101064842431242, "grad_norm": 0.349609375, "learning_rate": 1.8792387398807558e-05, "loss": 0.4162, "step": 67370 }, { "epoch": 1.71023340229214, "grad_norm": 0.337890625, "learning_rate": 1.877628747017758e-05, "loss": 0.3727, "step": 67375 }, { "epoch": 1.7103603203411557, "grad_norm": 0.3515625, "learning_rate": 1.8760193980642913e-05, "loss": 0.4082, "step": 67380 }, { "epoch": 1.7104872383901715, "grad_norm": 0.33984375, "learning_rate": 1.8744106930993254e-05, "loss": 0.4318, "step": 67385 }, { "epoch": 1.7106141564391872, "grad_norm": 0.349609375, "learning_rate": 1.8728026322017954e-05, "loss": 0.3959, "step": 67390 }, { "epoch": 1.710741074488203, "grad_norm": 0.333984375, "learning_rate": 1.8711952154506133e-05, "loss": 0.4129, "step": 67395 }, { "epoch": 1.7108679925372186, "grad_norm": 0.333984375, "learning_rate": 1.8695884429246527e-05, "loss": 0.4142, "step": 67400 }, { "epoch": 1.7109949105862343, "grad_norm": 0.34375, "learning_rate": 1.8679823147027555e-05, "loss": 0.4529, "step": 67405 }, { "epoch": 1.71112182863525, "grad_norm": 0.365234375, "learning_rate": 1.866376830863734e-05, "loss": 0.4222, "step": 67410 }, { "epoch": 1.7112487466842659, "grad_norm": 0.326171875, "learning_rate": 1.864771991486371e-05, "loss": 0.4153, "step": 67415 }, { "epoch": 1.7113756647332816, "grad_norm": 0.37109375, "learning_rate": 1.863167796649416e-05, "loss": 0.4102, "step": 67420 }, { "epoch": 1.7115025827822974, "grad_norm": 0.357421875, "learning_rate": 1.8615642464315828e-05, "loss": 0.4242, "step": 67425 }, { "epoch": 1.7116295008313132, "grad_norm": 0.34375, "learning_rate": 1.8599613409115595e-05, "loss": 0.4265, "step": 67430 }, { "epoch": 1.711756418880329, "grad_norm": 0.373046875, "learning_rate": 1.8583590801679983e-05, "loss": 0.4299, "step": 67435 }, { "epoch": 1.7118833369293447, "grad_norm": 0.322265625, "learning_rate": 1.856757464279523e-05, "loss": 0.4029, "step": 67440 }, { "epoch": 1.7120102549783605, "grad_norm": 0.345703125, "learning_rate": 1.8551564933247225e-05, "loss": 0.398, "step": 67445 }, { "epoch": 1.7121371730273762, "grad_norm": 0.34375, "learning_rate": 1.8535561673821576e-05, "loss": 0.4036, "step": 67450 }, { "epoch": 1.712264091076392, "grad_norm": 0.34375, "learning_rate": 1.8519564865303532e-05, "loss": 0.4295, "step": 67455 }, { "epoch": 1.7123910091254078, "grad_norm": 0.341796875, "learning_rate": 1.8503574508478085e-05, "loss": 0.4131, "step": 67460 }, { "epoch": 1.7125179271744235, "grad_norm": 0.35546875, "learning_rate": 1.8487590604129843e-05, "loss": 0.4009, "step": 67465 }, { "epoch": 1.7126448452234393, "grad_norm": 0.31640625, "learning_rate": 1.8471613153043126e-05, "loss": 0.418, "step": 67470 }, { "epoch": 1.712771763272455, "grad_norm": 0.341796875, "learning_rate": 1.845564215600197e-05, "loss": 0.3904, "step": 67475 }, { "epoch": 1.7128986813214708, "grad_norm": 0.337890625, "learning_rate": 1.843967761379007e-05, "loss": 0.4268, "step": 67480 }, { "epoch": 1.7130255993704866, "grad_norm": 0.380859375, "learning_rate": 1.842371952719075e-05, "loss": 0.4382, "step": 67485 }, { "epoch": 1.7131525174195024, "grad_norm": 0.357421875, "learning_rate": 1.840776789698713e-05, "loss": 0.4328, "step": 67490 }, { "epoch": 1.7132794354685181, "grad_norm": 0.328125, "learning_rate": 1.839182272396192e-05, "loss": 0.4102, "step": 67495 }, { "epoch": 1.7134063535175337, "grad_norm": 0.357421875, "learning_rate": 1.837588400889754e-05, "loss": 0.4149, "step": 67500 }, { "epoch": 1.7135332715665494, "grad_norm": 0.34765625, "learning_rate": 1.8359951752576102e-05, "loss": 0.3904, "step": 67505 }, { "epoch": 1.7136601896155652, "grad_norm": 0.3515625, "learning_rate": 1.8344025955779395e-05, "loss": 0.4354, "step": 67510 }, { "epoch": 1.713787107664581, "grad_norm": 0.341796875, "learning_rate": 1.8328106619288896e-05, "loss": 0.4108, "step": 67515 }, { "epoch": 1.7139140257135967, "grad_norm": 0.34765625, "learning_rate": 1.8312193743885745e-05, "loss": 0.3838, "step": 67520 }, { "epoch": 1.7140409437626125, "grad_norm": 0.341796875, "learning_rate": 1.8296287330350802e-05, "loss": 0.4108, "step": 67525 }, { "epoch": 1.7141678618116283, "grad_norm": 0.349609375, "learning_rate": 1.8280387379464573e-05, "loss": 0.4162, "step": 67530 }, { "epoch": 1.7142947798606438, "grad_norm": 0.341796875, "learning_rate": 1.8264493892007238e-05, "loss": 0.422, "step": 67535 }, { "epoch": 1.7144216979096596, "grad_norm": 0.3671875, "learning_rate": 1.8248606868758736e-05, "loss": 0.4338, "step": 67540 }, { "epoch": 1.7145486159586754, "grad_norm": 0.369140625, "learning_rate": 1.8232726310498613e-05, "loss": 0.3869, "step": 67545 }, { "epoch": 1.7146755340076911, "grad_norm": 0.3828125, "learning_rate": 1.821685221800611e-05, "loss": 0.4273, "step": 67550 }, { "epoch": 1.714802452056707, "grad_norm": 0.384765625, "learning_rate": 1.820098459206019e-05, "loss": 0.446, "step": 67555 }, { "epoch": 1.7149293701057227, "grad_norm": 0.3515625, "learning_rate": 1.818512343343943e-05, "loss": 0.4388, "step": 67560 }, { "epoch": 1.7150562881547384, "grad_norm": 0.373046875, "learning_rate": 1.816926874292217e-05, "loss": 0.4029, "step": 67565 }, { "epoch": 1.7151832062037542, "grad_norm": 0.365234375, "learning_rate": 1.8153420521286393e-05, "loss": 0.4171, "step": 67570 }, { "epoch": 1.71531012425277, "grad_norm": 0.35546875, "learning_rate": 1.813757876930974e-05, "loss": 0.4265, "step": 67575 }, { "epoch": 1.7154370423017857, "grad_norm": 0.33203125, "learning_rate": 1.812174348776959e-05, "loss": 0.4027, "step": 67580 }, { "epoch": 1.7155639603508015, "grad_norm": 0.333984375, "learning_rate": 1.810591467744294e-05, "loss": 0.4315, "step": 67585 }, { "epoch": 1.7156908783998173, "grad_norm": 0.36328125, "learning_rate": 1.8090092339106516e-05, "loss": 0.4054, "step": 67590 }, { "epoch": 1.715817796448833, "grad_norm": 0.357421875, "learning_rate": 1.8074276473536715e-05, "loss": 0.4204, "step": 67595 }, { "epoch": 1.7159447144978488, "grad_norm": 0.38671875, "learning_rate": 1.805846708150958e-05, "loss": 0.4245, "step": 67600 }, { "epoch": 1.7160716325468646, "grad_norm": 0.3828125, "learning_rate": 1.8042664163800963e-05, "loss": 0.4563, "step": 67605 }, { "epoch": 1.7161985505958803, "grad_norm": 0.34765625, "learning_rate": 1.802686772118622e-05, "loss": 0.4257, "step": 67610 }, { "epoch": 1.716325468644896, "grad_norm": 0.3515625, "learning_rate": 1.8011077754440538e-05, "loss": 0.4078, "step": 67615 }, { "epoch": 1.7164523866939119, "grad_norm": 0.345703125, "learning_rate": 1.799529426433868e-05, "loss": 0.42, "step": 67620 }, { "epoch": 1.7165793047429276, "grad_norm": 0.365234375, "learning_rate": 1.797951725165515e-05, "loss": 0.4021, "step": 67625 }, { "epoch": 1.7167062227919434, "grad_norm": 0.326171875, "learning_rate": 1.7963746717164105e-05, "loss": 0.4104, "step": 67630 }, { "epoch": 1.716833140840959, "grad_norm": 0.345703125, "learning_rate": 1.7947982661639426e-05, "loss": 0.4118, "step": 67635 }, { "epoch": 1.7169600588899747, "grad_norm": 0.345703125, "learning_rate": 1.7932225085854608e-05, "loss": 0.4225, "step": 67640 }, { "epoch": 1.7170869769389905, "grad_norm": 0.361328125, "learning_rate": 1.791647399058293e-05, "loss": 0.3992, "step": 67645 }, { "epoch": 1.7172138949880063, "grad_norm": 0.35546875, "learning_rate": 1.790072937659726e-05, "loss": 0.3793, "step": 67650 }, { "epoch": 1.717340813037022, "grad_norm": 0.345703125, "learning_rate": 1.7884991244670167e-05, "loss": 0.4389, "step": 67655 }, { "epoch": 1.7174677310860378, "grad_norm": 0.333984375, "learning_rate": 1.7869259595573944e-05, "loss": 0.4212, "step": 67660 }, { "epoch": 1.7175946491350533, "grad_norm": 0.373046875, "learning_rate": 1.7853534430080475e-05, "loss": 0.4171, "step": 67665 }, { "epoch": 1.717721567184069, "grad_norm": 0.341796875, "learning_rate": 1.7837815748961458e-05, "loss": 0.4003, "step": 67670 }, { "epoch": 1.7178484852330849, "grad_norm": 0.33984375, "learning_rate": 1.7822103552988182e-05, "loss": 0.3877, "step": 67675 }, { "epoch": 1.7179754032821006, "grad_norm": 0.345703125, "learning_rate": 1.7806397842931642e-05, "loss": 0.3885, "step": 67680 }, { "epoch": 1.7181023213311164, "grad_norm": 0.35546875, "learning_rate": 1.7790698619562487e-05, "loss": 0.4329, "step": 67685 }, { "epoch": 1.7182292393801322, "grad_norm": 0.337890625, "learning_rate": 1.7775005883651105e-05, "loss": 0.4145, "step": 67690 }, { "epoch": 1.718356157429148, "grad_norm": 0.357421875, "learning_rate": 1.7759319635967495e-05, "loss": 0.4327, "step": 67695 }, { "epoch": 1.7184830754781637, "grad_norm": 0.326171875, "learning_rate": 1.7743639877281406e-05, "loss": 0.4066, "step": 67700 }, { "epoch": 1.7186099935271795, "grad_norm": 0.328125, "learning_rate": 1.7727966608362225e-05, "loss": 0.4019, "step": 67705 }, { "epoch": 1.7187369115761952, "grad_norm": 0.322265625, "learning_rate": 1.771229982997902e-05, "loss": 0.3983, "step": 67710 }, { "epoch": 1.718863829625211, "grad_norm": 0.33203125, "learning_rate": 1.769663954290056e-05, "loss": 0.4246, "step": 67715 }, { "epoch": 1.7189907476742268, "grad_norm": 0.349609375, "learning_rate": 1.768098574789531e-05, "loss": 0.4216, "step": 67720 }, { "epoch": 1.7191176657232425, "grad_norm": 0.353515625, "learning_rate": 1.7665338445731375e-05, "loss": 0.4067, "step": 67725 }, { "epoch": 1.7192445837722583, "grad_norm": 0.318359375, "learning_rate": 1.764969763717654e-05, "loss": 0.4093, "step": 67730 }, { "epoch": 1.719371501821274, "grad_norm": 0.375, "learning_rate": 1.763406332299836e-05, "loss": 0.4074, "step": 67735 }, { "epoch": 1.7194984198702898, "grad_norm": 0.322265625, "learning_rate": 1.761843550396397e-05, "loss": 0.3983, "step": 67740 }, { "epoch": 1.7196253379193056, "grad_norm": 0.345703125, "learning_rate": 1.76028141808402e-05, "loss": 0.4088, "step": 67745 }, { "epoch": 1.7197522559683214, "grad_norm": 0.337890625, "learning_rate": 1.7587199354393615e-05, "loss": 0.4186, "step": 67750 }, { "epoch": 1.7198791740173371, "grad_norm": 0.35546875, "learning_rate": 1.7571591025390414e-05, "loss": 0.4245, "step": 67755 }, { "epoch": 1.720006092066353, "grad_norm": 0.345703125, "learning_rate": 1.755598919459648e-05, "loss": 0.395, "step": 67760 }, { "epoch": 1.7201330101153685, "grad_norm": 0.341796875, "learning_rate": 1.754039386277741e-05, "loss": 0.4044, "step": 67765 }, { "epoch": 1.7202599281643842, "grad_norm": 0.419921875, "learning_rate": 1.752480503069843e-05, "loss": 0.435, "step": 67770 }, { "epoch": 1.7203868462134, "grad_norm": 0.34375, "learning_rate": 1.7509222699124503e-05, "loss": 0.4147, "step": 67775 }, { "epoch": 1.7205137642624158, "grad_norm": 0.35546875, "learning_rate": 1.7493646868820248e-05, "loss": 0.4198, "step": 67780 }, { "epoch": 1.7206406823114315, "grad_norm": 0.341796875, "learning_rate": 1.7478077540549957e-05, "loss": 0.4098, "step": 67785 }, { "epoch": 1.7207676003604473, "grad_norm": 0.337890625, "learning_rate": 1.746251471507758e-05, "loss": 0.4004, "step": 67790 }, { "epoch": 1.720894518409463, "grad_norm": 0.357421875, "learning_rate": 1.7446958393166842e-05, "loss": 0.389, "step": 67795 }, { "epoch": 1.7210214364584786, "grad_norm": 0.35546875, "learning_rate": 1.7431408575581052e-05, "loss": 0.4195, "step": 67800 }, { "epoch": 1.7211483545074944, "grad_norm": 0.328125, "learning_rate": 1.741586526308321e-05, "loss": 0.3965, "step": 67805 }, { "epoch": 1.7212752725565101, "grad_norm": 0.365234375, "learning_rate": 1.740032845643608e-05, "loss": 0.4252, "step": 67810 }, { "epoch": 1.721402190605526, "grad_norm": 0.373046875, "learning_rate": 1.7384798156401996e-05, "loss": 0.4506, "step": 67815 }, { "epoch": 1.7215291086545417, "grad_norm": 0.384765625, "learning_rate": 1.736927436374305e-05, "loss": 0.4105, "step": 67820 }, { "epoch": 1.7216560267035574, "grad_norm": 0.33984375, "learning_rate": 1.7353757079220988e-05, "loss": 0.4346, "step": 67825 }, { "epoch": 1.7217829447525732, "grad_norm": 0.357421875, "learning_rate": 1.733824630359721e-05, "loss": 0.4154, "step": 67830 }, { "epoch": 1.721909862801589, "grad_norm": 0.349609375, "learning_rate": 1.7322742037632858e-05, "loss": 0.3998, "step": 67835 }, { "epoch": 1.7220367808506047, "grad_norm": 0.380859375, "learning_rate": 1.7307244282088693e-05, "loss": 0.4074, "step": 67840 }, { "epoch": 1.7221636988996205, "grad_norm": 0.361328125, "learning_rate": 1.729175303772519e-05, "loss": 0.429, "step": 67845 }, { "epoch": 1.7222906169486363, "grad_norm": 0.322265625, "learning_rate": 1.727626830530251e-05, "loss": 0.383, "step": 67850 }, { "epoch": 1.722417534997652, "grad_norm": 0.33984375, "learning_rate": 1.7260790085580457e-05, "loss": 0.4187, "step": 67855 }, { "epoch": 1.7225444530466678, "grad_norm": 0.361328125, "learning_rate": 1.7245318379318574e-05, "loss": 0.393, "step": 67860 }, { "epoch": 1.7226713710956836, "grad_norm": 0.34375, "learning_rate": 1.7229853187276038e-05, "loss": 0.4333, "step": 67865 }, { "epoch": 1.7227982891446993, "grad_norm": 0.341796875, "learning_rate": 1.721439451021171e-05, "loss": 0.4259, "step": 67870 }, { "epoch": 1.7229252071937151, "grad_norm": 0.310546875, "learning_rate": 1.7198942348884164e-05, "loss": 0.4001, "step": 67875 }, { "epoch": 1.7230521252427309, "grad_norm": 0.33984375, "learning_rate": 1.7183496704051612e-05, "loss": 0.396, "step": 67880 }, { "epoch": 1.7231790432917466, "grad_norm": 0.37890625, "learning_rate": 1.716805757647195e-05, "loss": 0.419, "step": 67885 }, { "epoch": 1.7233059613407624, "grad_norm": 0.349609375, "learning_rate": 1.71526249669028e-05, "loss": 0.4138, "step": 67890 }, { "epoch": 1.7234328793897782, "grad_norm": 0.345703125, "learning_rate": 1.7137198876101445e-05, "loss": 0.3816, "step": 67895 }, { "epoch": 1.7235597974387937, "grad_norm": 0.333984375, "learning_rate": 1.7121779304824808e-05, "loss": 0.4159, "step": 67900 }, { "epoch": 1.7236867154878095, "grad_norm": 0.369140625, "learning_rate": 1.7106366253829516e-05, "loss": 0.4257, "step": 67905 }, { "epoch": 1.7238136335368253, "grad_norm": 0.36328125, "learning_rate": 1.7090959723871904e-05, "loss": 0.4513, "step": 67910 }, { "epoch": 1.723940551585841, "grad_norm": 0.322265625, "learning_rate": 1.707555971570793e-05, "loss": 0.4036, "step": 67915 }, { "epoch": 1.7240674696348568, "grad_norm": 0.34765625, "learning_rate": 1.7060166230093326e-05, "loss": 0.4084, "step": 67920 }, { "epoch": 1.7241943876838726, "grad_norm": 0.357421875, "learning_rate": 1.7044779267783397e-05, "loss": 0.4159, "step": 67925 }, { "epoch": 1.724321305732888, "grad_norm": 0.33984375, "learning_rate": 1.7029398829533197e-05, "loss": 0.4121, "step": 67930 }, { "epoch": 1.7244482237819039, "grad_norm": 0.34765625, "learning_rate": 1.7014024916097418e-05, "loss": 0.438, "step": 67935 }, { "epoch": 1.7245751418309196, "grad_norm": 0.33984375, "learning_rate": 1.6998657528230474e-05, "loss": 0.4055, "step": 67940 }, { "epoch": 1.7247020598799354, "grad_norm": 0.3828125, "learning_rate": 1.698329666668643e-05, "loss": 0.445, "step": 67945 }, { "epoch": 1.7248289779289512, "grad_norm": 0.37109375, "learning_rate": 1.6967942332219016e-05, "loss": 0.4508, "step": 67950 }, { "epoch": 1.724955895977967, "grad_norm": 0.330078125, "learning_rate": 1.6952594525581692e-05, "loss": 0.3867, "step": 67955 }, { "epoch": 1.7250828140269827, "grad_norm": 0.3359375, "learning_rate": 1.6937253247527526e-05, "loss": 0.4159, "step": 67960 }, { "epoch": 1.7252097320759985, "grad_norm": 0.3671875, "learning_rate": 1.692191849880937e-05, "loss": 0.3972, "step": 67965 }, { "epoch": 1.7253366501250142, "grad_norm": 0.376953125, "learning_rate": 1.6906590280179645e-05, "loss": 0.4397, "step": 67970 }, { "epoch": 1.72546356817403, "grad_norm": 0.33984375, "learning_rate": 1.6891268592390538e-05, "loss": 0.409, "step": 67975 }, { "epoch": 1.7255904862230458, "grad_norm": 0.349609375, "learning_rate": 1.6875953436193816e-05, "loss": 0.4508, "step": 67980 }, { "epoch": 1.7257174042720616, "grad_norm": 0.34375, "learning_rate": 1.686064481234108e-05, "loss": 0.3896, "step": 67985 }, { "epoch": 1.7258443223210773, "grad_norm": 0.341796875, "learning_rate": 1.6845342721583456e-05, "loss": 0.4297, "step": 67990 }, { "epoch": 1.725971240370093, "grad_norm": 0.3671875, "learning_rate": 1.6830047164671813e-05, "loss": 0.4323, "step": 67995 }, { "epoch": 1.7260981584191089, "grad_norm": 0.345703125, "learning_rate": 1.6814758142356716e-05, "loss": 0.4562, "step": 68000 }, { "epoch": 1.7262250764681246, "grad_norm": 0.341796875, "learning_rate": 1.6799475655388384e-05, "loss": 0.4213, "step": 68005 }, { "epoch": 1.7263519945171404, "grad_norm": 0.349609375, "learning_rate": 1.678419970451673e-05, "loss": 0.4223, "step": 68010 }, { "epoch": 1.7264789125661562, "grad_norm": 0.314453125, "learning_rate": 1.6768930290491328e-05, "loss": 0.4113, "step": 68015 }, { "epoch": 1.726605830615172, "grad_norm": 0.306640625, "learning_rate": 1.675366741406144e-05, "loss": 0.386, "step": 68020 }, { "epoch": 1.7267327486641877, "grad_norm": 0.328125, "learning_rate": 1.6738411075976015e-05, "loss": 0.4011, "step": 68025 }, { "epoch": 1.7268596667132032, "grad_norm": 0.3515625, "learning_rate": 1.6723161276983676e-05, "loss": 0.405, "step": 68030 }, { "epoch": 1.726986584762219, "grad_norm": 0.33203125, "learning_rate": 1.670791801783269e-05, "loss": 0.4357, "step": 68035 }, { "epoch": 1.7271135028112348, "grad_norm": 0.353515625, "learning_rate": 1.6692681299271104e-05, "loss": 0.4073, "step": 68040 }, { "epoch": 1.7272404208602505, "grad_norm": 0.318359375, "learning_rate": 1.667745112204656e-05, "loss": 0.4016, "step": 68045 }, { "epoch": 1.7273673389092663, "grad_norm": 0.359375, "learning_rate": 1.6662227486906337e-05, "loss": 0.4228, "step": 68050 }, { "epoch": 1.727494256958282, "grad_norm": 0.341796875, "learning_rate": 1.6647010394597528e-05, "loss": 0.4087, "step": 68055 }, { "epoch": 1.7276211750072978, "grad_norm": 0.33203125, "learning_rate": 1.6631799845866796e-05, "loss": 0.4044, "step": 68060 }, { "epoch": 1.7277480930563134, "grad_norm": 0.33203125, "learning_rate": 1.6616595841460527e-05, "loss": 0.403, "step": 68065 }, { "epoch": 1.7278750111053292, "grad_norm": 0.3359375, "learning_rate": 1.6601398382124763e-05, "loss": 0.3938, "step": 68070 }, { "epoch": 1.728001929154345, "grad_norm": 0.357421875, "learning_rate": 1.658620746860524e-05, "loss": 0.4211, "step": 68075 }, { "epoch": 1.7281288472033607, "grad_norm": 0.337890625, "learning_rate": 1.6571023101647373e-05, "loss": 0.4274, "step": 68080 }, { "epoch": 1.7282557652523765, "grad_norm": 0.34375, "learning_rate": 1.6555845281996257e-05, "loss": 0.3903, "step": 68085 }, { "epoch": 1.7283826833013922, "grad_norm": 0.328125, "learning_rate": 1.6540674010396653e-05, "loss": 0.4157, "step": 68090 }, { "epoch": 1.728509601350408, "grad_norm": 0.369140625, "learning_rate": 1.652550928759301e-05, "loss": 0.4304, "step": 68095 }, { "epoch": 1.7286365193994238, "grad_norm": 0.341796875, "learning_rate": 1.6510351114329474e-05, "loss": 0.3901, "step": 68100 }, { "epoch": 1.7287634374484395, "grad_norm": 0.34765625, "learning_rate": 1.6495199491349787e-05, "loss": 0.418, "step": 68105 }, { "epoch": 1.7288903554974553, "grad_norm": 0.388671875, "learning_rate": 1.648005441939752e-05, "loss": 0.4161, "step": 68110 }, { "epoch": 1.729017273546471, "grad_norm": 0.341796875, "learning_rate": 1.646491589921578e-05, "loss": 0.4376, "step": 68115 }, { "epoch": 1.7291441915954868, "grad_norm": 0.359375, "learning_rate": 1.6449783931547438e-05, "loss": 0.4149, "step": 68120 }, { "epoch": 1.7292711096445026, "grad_norm": 0.349609375, "learning_rate": 1.6434658517135002e-05, "loss": 0.4133, "step": 68125 }, { "epoch": 1.7293980276935184, "grad_norm": 0.353515625, "learning_rate": 1.6419539656720644e-05, "loss": 0.4022, "step": 68130 }, { "epoch": 1.7295249457425341, "grad_norm": 0.35546875, "learning_rate": 1.6404427351046285e-05, "loss": 0.4221, "step": 68135 }, { "epoch": 1.72965186379155, "grad_norm": 0.3515625, "learning_rate": 1.638932160085345e-05, "loss": 0.4112, "step": 68140 }, { "epoch": 1.7297787818405657, "grad_norm": 0.369140625, "learning_rate": 1.6374222406883396e-05, "loss": 0.4153, "step": 68145 }, { "epoch": 1.7299056998895814, "grad_norm": 0.361328125, "learning_rate": 1.6359129769877018e-05, "loss": 0.4103, "step": 68150 }, { "epoch": 1.7300326179385972, "grad_norm": 0.33203125, "learning_rate": 1.63440436905749e-05, "loss": 0.3952, "step": 68155 }, { "epoch": 1.730159535987613, "grad_norm": 0.3671875, "learning_rate": 1.632896416971732e-05, "loss": 0.3984, "step": 68160 }, { "epoch": 1.7302864540366285, "grad_norm": 0.36328125, "learning_rate": 1.6313891208044233e-05, "loss": 0.4238, "step": 68165 }, { "epoch": 1.7304133720856443, "grad_norm": 0.328125, "learning_rate": 1.6298824806295202e-05, "loss": 0.4339, "step": 68170 }, { "epoch": 1.73054029013466, "grad_norm": 0.3515625, "learning_rate": 1.6283764965209635e-05, "loss": 0.3946, "step": 68175 }, { "epoch": 1.7306672081836758, "grad_norm": 0.34765625, "learning_rate": 1.6268711685526437e-05, "loss": 0.4126, "step": 68180 }, { "epoch": 1.7307941262326916, "grad_norm": 0.3515625, "learning_rate": 1.6253664967984287e-05, "loss": 0.4015, "step": 68185 }, { "epoch": 1.7309210442817073, "grad_norm": 0.3515625, "learning_rate": 1.6238624813321527e-05, "loss": 0.38, "step": 68190 }, { "epoch": 1.731047962330723, "grad_norm": 0.37890625, "learning_rate": 1.622359122227615e-05, "loss": 0.4185, "step": 68195 }, { "epoch": 1.7311748803797387, "grad_norm": 0.3515625, "learning_rate": 1.620856419558588e-05, "loss": 0.4291, "step": 68200 }, { "epoch": 1.7313017984287544, "grad_norm": 0.34765625, "learning_rate": 1.6193543733988036e-05, "loss": 0.4031, "step": 68205 }, { "epoch": 1.7314287164777702, "grad_norm": 0.34375, "learning_rate": 1.6178529838219732e-05, "loss": 0.4133, "step": 68210 }, { "epoch": 1.731555634526786, "grad_norm": 0.35546875, "learning_rate": 1.6163522509017655e-05, "loss": 0.4288, "step": 68215 }, { "epoch": 1.7316825525758017, "grad_norm": 0.34375, "learning_rate": 1.6148521747118215e-05, "loss": 0.4179, "step": 68220 }, { "epoch": 1.7318094706248175, "grad_norm": 0.337890625, "learning_rate": 1.6133527553257504e-05, "loss": 0.4271, "step": 68225 }, { "epoch": 1.7319363886738333, "grad_norm": 0.294921875, "learning_rate": 1.6118539928171248e-05, "loss": 0.417, "step": 68230 }, { "epoch": 1.732063306722849, "grad_norm": 0.357421875, "learning_rate": 1.6103558872594912e-05, "loss": 0.4259, "step": 68235 }, { "epoch": 1.7321902247718648, "grad_norm": 0.34375, "learning_rate": 1.6088584387263626e-05, "loss": 0.3991, "step": 68240 }, { "epoch": 1.7323171428208806, "grad_norm": 0.357421875, "learning_rate": 1.6073616472912164e-05, "loss": 0.4238, "step": 68245 }, { "epoch": 1.7324440608698963, "grad_norm": 0.32421875, "learning_rate": 1.6058655130274978e-05, "loss": 0.404, "step": 68250 }, { "epoch": 1.732570978918912, "grad_norm": 0.345703125, "learning_rate": 1.6043700360086242e-05, "loss": 0.426, "step": 68255 }, { "epoch": 1.7326978969679279, "grad_norm": 0.3515625, "learning_rate": 1.6028752163079755e-05, "loss": 0.392, "step": 68260 }, { "epoch": 1.7328248150169436, "grad_norm": 0.3515625, "learning_rate": 1.6013810539989047e-05, "loss": 0.4188, "step": 68265 }, { "epoch": 1.7329517330659594, "grad_norm": 0.359375, "learning_rate": 1.5998875491547282e-05, "loss": 0.4229, "step": 68270 }, { "epoch": 1.7330786511149752, "grad_norm": 0.369140625, "learning_rate": 1.598394701848732e-05, "loss": 0.4323, "step": 68275 }, { "epoch": 1.733205569163991, "grad_norm": 0.330078125, "learning_rate": 1.596902512154168e-05, "loss": 0.4217, "step": 68280 }, { "epoch": 1.7333324872130067, "grad_norm": 0.349609375, "learning_rate": 1.5954109801442573e-05, "loss": 0.4195, "step": 68285 }, { "epoch": 1.7334594052620225, "grad_norm": 0.357421875, "learning_rate": 1.593920105892193e-05, "loss": 0.3966, "step": 68290 }, { "epoch": 1.733586323311038, "grad_norm": 0.330078125, "learning_rate": 1.592429889471125e-05, "loss": 0.4186, "step": 68295 }, { "epoch": 1.7337132413600538, "grad_norm": 0.341796875, "learning_rate": 1.590940330954185e-05, "loss": 0.4077, "step": 68300 }, { "epoch": 1.7338401594090695, "grad_norm": 0.357421875, "learning_rate": 1.589451430414461e-05, "loss": 0.3791, "step": 68305 }, { "epoch": 1.7339670774580853, "grad_norm": 0.359375, "learning_rate": 1.5879631879250148e-05, "loss": 0.4373, "step": 68310 }, { "epoch": 1.734093995507101, "grad_norm": 0.3515625, "learning_rate": 1.586475603558871e-05, "loss": 0.4203, "step": 68315 }, { "epoch": 1.7342209135561168, "grad_norm": 0.365234375, "learning_rate": 1.584988677389028e-05, "loss": 0.4373, "step": 68320 }, { "epoch": 1.7343478316051326, "grad_norm": 0.359375, "learning_rate": 1.5835024094884457e-05, "loss": 0.4308, "step": 68325 }, { "epoch": 1.7344747496541482, "grad_norm": 0.361328125, "learning_rate": 1.582016799930054e-05, "loss": 0.4334, "step": 68330 }, { "epoch": 1.734601667703164, "grad_norm": 0.3515625, "learning_rate": 1.580531848786755e-05, "loss": 0.4025, "step": 68335 }, { "epoch": 1.7347285857521797, "grad_norm": 0.34375, "learning_rate": 1.5790475561314137e-05, "loss": 0.4304, "step": 68340 }, { "epoch": 1.7348555038011955, "grad_norm": 0.34375, "learning_rate": 1.5775639220368613e-05, "loss": 0.3863, "step": 68345 }, { "epoch": 1.7349824218502112, "grad_norm": 0.365234375, "learning_rate": 1.5760809465759e-05, "loss": 0.4223, "step": 68350 }, { "epoch": 1.735109339899227, "grad_norm": 0.341796875, "learning_rate": 1.5745986298212965e-05, "loss": 0.4242, "step": 68355 }, { "epoch": 1.7352362579482428, "grad_norm": 0.357421875, "learning_rate": 1.573116971845794e-05, "loss": 0.4132, "step": 68360 }, { "epoch": 1.7353631759972585, "grad_norm": 0.3671875, "learning_rate": 1.5716359727220935e-05, "loss": 0.4221, "step": 68365 }, { "epoch": 1.7354900940462743, "grad_norm": 0.3515625, "learning_rate": 1.570155632522866e-05, "loss": 0.4082, "step": 68370 }, { "epoch": 1.73561701209529, "grad_norm": 0.349609375, "learning_rate": 1.5686759513207503e-05, "loss": 0.4236, "step": 68375 }, { "epoch": 1.7357439301443058, "grad_norm": 0.365234375, "learning_rate": 1.567196929188357e-05, "loss": 0.4336, "step": 68380 }, { "epoch": 1.7358708481933216, "grad_norm": 0.3828125, "learning_rate": 1.5657185661982607e-05, "loss": 0.4296, "step": 68385 }, { "epoch": 1.7359977662423374, "grad_norm": 0.3515625, "learning_rate": 1.5642408624230007e-05, "loss": 0.4039, "step": 68390 }, { "epoch": 1.7361246842913531, "grad_norm": 0.349609375, "learning_rate": 1.5627638179350922e-05, "loss": 0.4095, "step": 68395 }, { "epoch": 1.736251602340369, "grad_norm": 0.369140625, "learning_rate": 1.5612874328070084e-05, "loss": 0.4544, "step": 68400 }, { "epoch": 1.7363785203893847, "grad_norm": 0.337890625, "learning_rate": 1.5598117071111986e-05, "loss": 0.4226, "step": 68405 }, { "epoch": 1.7365054384384004, "grad_norm": 0.3125, "learning_rate": 1.5583366409200747e-05, "loss": 0.3979, "step": 68410 }, { "epoch": 1.7366323564874162, "grad_norm": 0.345703125, "learning_rate": 1.5568622343060157e-05, "loss": 0.4137, "step": 68415 }, { "epoch": 1.736759274536432, "grad_norm": 0.3515625, "learning_rate": 1.5553884873413714e-05, "loss": 0.3823, "step": 68420 }, { "epoch": 1.7368861925854477, "grad_norm": 0.34765625, "learning_rate": 1.5539154000984594e-05, "loss": 0.4271, "step": 68425 }, { "epoch": 1.7370131106344633, "grad_norm": 0.345703125, "learning_rate": 1.5524429726495635e-05, "loss": 0.4254, "step": 68430 }, { "epoch": 1.737140028683479, "grad_norm": 0.34375, "learning_rate": 1.5509712050669343e-05, "loss": 0.4271, "step": 68435 }, { "epoch": 1.7372669467324948, "grad_norm": 0.337890625, "learning_rate": 1.5495000974227907e-05, "loss": 0.3893, "step": 68440 }, { "epoch": 1.7373938647815106, "grad_norm": 0.34375, "learning_rate": 1.548029649789318e-05, "loss": 0.418, "step": 68445 }, { "epoch": 1.7375207828305264, "grad_norm": 0.353515625, "learning_rate": 1.5465598622386705e-05, "loss": 0.4244, "step": 68450 }, { "epoch": 1.7376477008795421, "grad_norm": 0.3359375, "learning_rate": 1.5450907348429738e-05, "loss": 0.4001, "step": 68455 }, { "epoch": 1.7377746189285577, "grad_norm": 0.357421875, "learning_rate": 1.5436222676743167e-05, "loss": 0.4401, "step": 68460 }, { "epoch": 1.7379015369775734, "grad_norm": 0.3515625, "learning_rate": 1.5421544608047533e-05, "loss": 0.4071, "step": 68465 }, { "epoch": 1.7380284550265892, "grad_norm": 0.341796875, "learning_rate": 1.540687314306309e-05, "loss": 0.4277, "step": 68470 }, { "epoch": 1.738155373075605, "grad_norm": 0.33984375, "learning_rate": 1.5392208282509788e-05, "loss": 0.408, "step": 68475 }, { "epoch": 1.7382822911246207, "grad_norm": 0.34375, "learning_rate": 1.5377550027107195e-05, "loss": 0.398, "step": 68480 }, { "epoch": 1.7384092091736365, "grad_norm": 0.375, "learning_rate": 1.536289837757455e-05, "loss": 0.4232, "step": 68485 }, { "epoch": 1.7385361272226523, "grad_norm": 0.34765625, "learning_rate": 1.53482533346309e-05, "loss": 0.4332, "step": 68490 }, { "epoch": 1.738663045271668, "grad_norm": 0.34375, "learning_rate": 1.5333614898994817e-05, "loss": 0.4302, "step": 68495 }, { "epoch": 1.7387899633206838, "grad_norm": 0.345703125, "learning_rate": 1.531898307138461e-05, "loss": 0.3992, "step": 68500 }, { "epoch": 1.7389168813696996, "grad_norm": 0.373046875, "learning_rate": 1.5304357852518256e-05, "loss": 0.4265, "step": 68505 }, { "epoch": 1.7390437994187153, "grad_norm": 0.328125, "learning_rate": 1.5289739243113398e-05, "loss": 0.406, "step": 68510 }, { "epoch": 1.739170717467731, "grad_norm": 0.34765625, "learning_rate": 1.5275127243887392e-05, "loss": 0.4079, "step": 68515 }, { "epoch": 1.7392976355167469, "grad_norm": 0.369140625, "learning_rate": 1.5260521855557217e-05, "loss": 0.4054, "step": 68520 }, { "epoch": 1.7394245535657626, "grad_norm": 0.353515625, "learning_rate": 1.524592307883955e-05, "loss": 0.381, "step": 68525 }, { "epoch": 1.7395514716147784, "grad_norm": 0.36328125, "learning_rate": 1.523133091445075e-05, "loss": 0.4301, "step": 68530 }, { "epoch": 1.7396783896637942, "grad_norm": 0.365234375, "learning_rate": 1.5216745363106864e-05, "loss": 0.3987, "step": 68535 }, { "epoch": 1.73980530771281, "grad_norm": 0.333984375, "learning_rate": 1.5202166425523615e-05, "loss": 0.3856, "step": 68540 }, { "epoch": 1.7399322257618257, "grad_norm": 0.330078125, "learning_rate": 1.5187594102416335e-05, "loss": 0.4027, "step": 68545 }, { "epoch": 1.7400591438108415, "grad_norm": 0.33984375, "learning_rate": 1.517302839450013e-05, "loss": 0.4327, "step": 68550 }, { "epoch": 1.7401860618598572, "grad_norm": 0.369140625, "learning_rate": 1.5158469302489717e-05, "loss": 0.4561, "step": 68555 }, { "epoch": 1.7403129799088728, "grad_norm": 0.37109375, "learning_rate": 1.514391682709949e-05, "loss": 0.4336, "step": 68560 }, { "epoch": 1.7404398979578886, "grad_norm": 0.3515625, "learning_rate": 1.5129370969043558e-05, "loss": 0.3988, "step": 68565 }, { "epoch": 1.7405668160069043, "grad_norm": 0.375, "learning_rate": 1.511483172903567e-05, "loss": 0.453, "step": 68570 }, { "epoch": 1.74069373405592, "grad_norm": 0.365234375, "learning_rate": 1.5100299107789255e-05, "loss": 0.4173, "step": 68575 }, { "epoch": 1.7408206521049359, "grad_norm": 0.328125, "learning_rate": 1.508577310601744e-05, "loss": 0.4131, "step": 68580 }, { "epoch": 1.7409475701539516, "grad_norm": 0.314453125, "learning_rate": 1.5071253724432974e-05, "loss": 0.4176, "step": 68585 }, { "epoch": 1.7410744882029674, "grad_norm": 0.3359375, "learning_rate": 1.5056740963748353e-05, "loss": 0.404, "step": 68590 }, { "epoch": 1.741201406251983, "grad_norm": 0.341796875, "learning_rate": 1.5042234824675709e-05, "loss": 0.4089, "step": 68595 }, { "epoch": 1.7413283243009987, "grad_norm": 0.34765625, "learning_rate": 1.502773530792682e-05, "loss": 0.4186, "step": 68600 }, { "epoch": 1.7414552423500145, "grad_norm": 0.337890625, "learning_rate": 1.5013242414213184e-05, "loss": 0.4001, "step": 68605 }, { "epoch": 1.7415821603990302, "grad_norm": 0.337890625, "learning_rate": 1.4998756144245998e-05, "loss": 0.4173, "step": 68610 }, { "epoch": 1.741709078448046, "grad_norm": 0.353515625, "learning_rate": 1.4984276498736065e-05, "loss": 0.4015, "step": 68615 }, { "epoch": 1.7418359964970618, "grad_norm": 0.3515625, "learning_rate": 1.4969803478393877e-05, "loss": 0.4077, "step": 68620 }, { "epoch": 1.7419629145460775, "grad_norm": 0.369140625, "learning_rate": 1.495533708392967e-05, "loss": 0.4118, "step": 68625 }, { "epoch": 1.7420898325950933, "grad_norm": 0.37890625, "learning_rate": 1.4940877316053273e-05, "loss": 0.396, "step": 68630 }, { "epoch": 1.742216750644109, "grad_norm": 0.3359375, "learning_rate": 1.492642417547424e-05, "loss": 0.4349, "step": 68635 }, { "epoch": 1.7423436686931248, "grad_norm": 0.330078125, "learning_rate": 1.4911977662901763e-05, "loss": 0.413, "step": 68640 }, { "epoch": 1.7424705867421406, "grad_norm": 0.337890625, "learning_rate": 1.4897537779044732e-05, "loss": 0.4303, "step": 68645 }, { "epoch": 1.7425975047911564, "grad_norm": 0.380859375, "learning_rate": 1.4883104524611695e-05, "loss": 0.4244, "step": 68650 }, { "epoch": 1.7427244228401721, "grad_norm": 0.37890625, "learning_rate": 1.4868677900310916e-05, "loss": 0.4381, "step": 68655 }, { "epoch": 1.742851340889188, "grad_norm": 0.3125, "learning_rate": 1.4854257906850265e-05, "loss": 0.392, "step": 68660 }, { "epoch": 1.7429782589382037, "grad_norm": 0.33203125, "learning_rate": 1.4839844544937358e-05, "loss": 0.4224, "step": 68665 }, { "epoch": 1.7431051769872195, "grad_norm": 0.349609375, "learning_rate": 1.4825437815279395e-05, "loss": 0.4301, "step": 68670 }, { "epoch": 1.7432320950362352, "grad_norm": 0.31640625, "learning_rate": 1.4811037718583396e-05, "loss": 0.4227, "step": 68675 }, { "epoch": 1.743359013085251, "grad_norm": 0.35546875, "learning_rate": 1.4796644255555912e-05, "loss": 0.4177, "step": 68680 }, { "epoch": 1.7434859311342668, "grad_norm": 0.357421875, "learning_rate": 1.4782257426903243e-05, "loss": 0.4132, "step": 68685 }, { "epoch": 1.7436128491832825, "grad_norm": 0.34765625, "learning_rate": 1.4767877233331343e-05, "loss": 0.4158, "step": 68690 }, { "epoch": 1.743739767232298, "grad_norm": 0.33984375, "learning_rate": 1.4753503675545813e-05, "loss": 0.388, "step": 68695 }, { "epoch": 1.7438666852813138, "grad_norm": 0.35546875, "learning_rate": 1.4739136754252007e-05, "loss": 0.3983, "step": 68700 }, { "epoch": 1.7439936033303296, "grad_norm": 0.349609375, "learning_rate": 1.4724776470154876e-05, "loss": 0.4275, "step": 68705 }, { "epoch": 1.7441205213793454, "grad_norm": 0.3359375, "learning_rate": 1.471042282395909e-05, "loss": 0.4088, "step": 68710 }, { "epoch": 1.7442474394283611, "grad_norm": 0.36328125, "learning_rate": 1.4696075816368952e-05, "loss": 0.4077, "step": 68715 }, { "epoch": 1.744374357477377, "grad_norm": 0.337890625, "learning_rate": 1.4681735448088484e-05, "loss": 0.4129, "step": 68720 }, { "epoch": 1.7445012755263924, "grad_norm": 0.35546875, "learning_rate": 1.4667401719821354e-05, "loss": 0.4208, "step": 68725 }, { "epoch": 1.7446281935754082, "grad_norm": 0.333984375, "learning_rate": 1.4653074632270917e-05, "loss": 0.4257, "step": 68730 }, { "epoch": 1.744755111624424, "grad_norm": 0.33984375, "learning_rate": 1.4638754186140161e-05, "loss": 0.4493, "step": 68735 }, { "epoch": 1.7448820296734397, "grad_norm": 0.3359375, "learning_rate": 1.4624440382131841e-05, "loss": 0.4023, "step": 68740 }, { "epoch": 1.7450089477224555, "grad_norm": 0.345703125, "learning_rate": 1.4610133220948312e-05, "loss": 0.4161, "step": 68745 }, { "epoch": 1.7451358657714713, "grad_norm": 0.333984375, "learning_rate": 1.459583270329161e-05, "loss": 0.4379, "step": 68750 }, { "epoch": 1.745262783820487, "grad_norm": 0.349609375, "learning_rate": 1.4581538829863459e-05, "loss": 0.4416, "step": 68755 }, { "epoch": 1.7453897018695028, "grad_norm": 0.34375, "learning_rate": 1.4567251601365266e-05, "loss": 0.4321, "step": 68760 }, { "epoch": 1.7455166199185186, "grad_norm": 0.37109375, "learning_rate": 1.4552971018498083e-05, "loss": 0.4196, "step": 68765 }, { "epoch": 1.7456435379675344, "grad_norm": 0.373046875, "learning_rate": 1.4538697081962652e-05, "loss": 0.4311, "step": 68770 }, { "epoch": 1.7457704560165501, "grad_norm": 0.359375, "learning_rate": 1.4524429792459364e-05, "loss": 0.4128, "step": 68775 }, { "epoch": 1.7458973740655659, "grad_norm": 0.373046875, "learning_rate": 1.4510169150688388e-05, "loss": 0.4088, "step": 68780 }, { "epoch": 1.7460242921145817, "grad_norm": 0.34375, "learning_rate": 1.449591515734942e-05, "loss": 0.4141, "step": 68785 }, { "epoch": 1.7461512101635974, "grad_norm": 0.359375, "learning_rate": 1.4481667813141928e-05, "loss": 0.4255, "step": 68790 }, { "epoch": 1.7462781282126132, "grad_norm": 0.36328125, "learning_rate": 1.4467427118765024e-05, "loss": 0.3993, "step": 68795 }, { "epoch": 1.746405046261629, "grad_norm": 0.359375, "learning_rate": 1.445319307491743e-05, "loss": 0.4187, "step": 68800 }, { "epoch": 1.7465319643106447, "grad_norm": 0.34765625, "learning_rate": 1.4438965682297703e-05, "loss": 0.4133, "step": 68805 }, { "epoch": 1.7466588823596605, "grad_norm": 0.359375, "learning_rate": 1.4424744941603921e-05, "loss": 0.4287, "step": 68810 }, { "epoch": 1.7467858004086763, "grad_norm": 0.373046875, "learning_rate": 1.4410530853533909e-05, "loss": 0.4201, "step": 68815 }, { "epoch": 1.746912718457692, "grad_norm": 0.33984375, "learning_rate": 1.4396323418785138e-05, "loss": 0.4232, "step": 68820 }, { "epoch": 1.7470396365067076, "grad_norm": 0.365234375, "learning_rate": 1.4382122638054755e-05, "loss": 0.4346, "step": 68825 }, { "epoch": 1.7471665545557233, "grad_norm": 0.34765625, "learning_rate": 1.43679285120396e-05, "loss": 0.418, "step": 68830 }, { "epoch": 1.747293472604739, "grad_norm": 0.314453125, "learning_rate": 1.435374104143615e-05, "loss": 0.3839, "step": 68835 }, { "epoch": 1.7474203906537549, "grad_norm": 0.306640625, "learning_rate": 1.4339560226940616e-05, "loss": 0.3997, "step": 68840 }, { "epoch": 1.7475473087027706, "grad_norm": 0.349609375, "learning_rate": 1.4325386069248806e-05, "loss": 0.4088, "step": 68845 }, { "epoch": 1.7476742267517864, "grad_norm": 0.322265625, "learning_rate": 1.4311218569056231e-05, "loss": 0.3975, "step": 68850 }, { "epoch": 1.7478011448008022, "grad_norm": 0.3515625, "learning_rate": 1.4297057727058136e-05, "loss": 0.4245, "step": 68855 }, { "epoch": 1.7479280628498177, "grad_norm": 0.3359375, "learning_rate": 1.4282903543949365e-05, "loss": 0.4132, "step": 68860 }, { "epoch": 1.7480549808988335, "grad_norm": 0.34375, "learning_rate": 1.4268756020424427e-05, "loss": 0.3985, "step": 68865 }, { "epoch": 1.7481818989478493, "grad_norm": 0.361328125, "learning_rate": 1.42546151571776e-05, "loss": 0.441, "step": 68870 }, { "epoch": 1.748308816996865, "grad_norm": 0.36328125, "learning_rate": 1.4240480954902717e-05, "loss": 0.4354, "step": 68875 }, { "epoch": 1.7484357350458808, "grad_norm": 0.36328125, "learning_rate": 1.4226353414293367e-05, "loss": 0.4362, "step": 68880 }, { "epoch": 1.7485626530948966, "grad_norm": 0.359375, "learning_rate": 1.4212232536042766e-05, "loss": 0.4285, "step": 68885 }, { "epoch": 1.7486895711439123, "grad_norm": 0.341796875, "learning_rate": 1.4198118320843827e-05, "loss": 0.3842, "step": 68890 }, { "epoch": 1.748816489192928, "grad_norm": 0.404296875, "learning_rate": 1.4184010769389126e-05, "loss": 0.439, "step": 68895 }, { "epoch": 1.7489434072419439, "grad_norm": 0.322265625, "learning_rate": 1.4169909882370927e-05, "loss": 0.4315, "step": 68900 }, { "epoch": 1.7490703252909596, "grad_norm": 0.349609375, "learning_rate": 1.4155815660481129e-05, "loss": 0.44, "step": 68905 }, { "epoch": 1.7491972433399754, "grad_norm": 0.34765625, "learning_rate": 1.4141728104411343e-05, "loss": 0.3999, "step": 68910 }, { "epoch": 1.7493241613889912, "grad_norm": 0.384765625, "learning_rate": 1.412764721485285e-05, "loss": 0.4283, "step": 68915 }, { "epoch": 1.749451079438007, "grad_norm": 0.359375, "learning_rate": 1.411357299249658e-05, "loss": 0.4235, "step": 68920 }, { "epoch": 1.7495779974870227, "grad_norm": 0.36328125, "learning_rate": 1.4099505438033132e-05, "loss": 0.3924, "step": 68925 }, { "epoch": 1.7497049155360385, "grad_norm": 0.32421875, "learning_rate": 1.4085444552152836e-05, "loss": 0.41, "step": 68930 }, { "epoch": 1.7498318335850542, "grad_norm": 0.357421875, "learning_rate": 1.407139033554564e-05, "loss": 0.411, "step": 68935 }, { "epoch": 1.74995875163407, "grad_norm": 0.345703125, "learning_rate": 1.4057342788901144e-05, "loss": 0.4068, "step": 68940 }, { "epoch": 1.7500856696830858, "grad_norm": 0.337890625, "learning_rate": 1.4043301912908728e-05, "loss": 0.4288, "step": 68945 }, { "epoch": 1.7502125877321015, "grad_norm": 0.33203125, "learning_rate": 1.4029267708257308e-05, "loss": 0.4112, "step": 68950 }, { "epoch": 1.750339505781117, "grad_norm": 0.330078125, "learning_rate": 1.4015240175635567e-05, "loss": 0.405, "step": 68955 }, { "epoch": 1.7504664238301328, "grad_norm": 0.359375, "learning_rate": 1.400121931573182e-05, "loss": 0.4248, "step": 68960 }, { "epoch": 1.7505933418791486, "grad_norm": 0.361328125, "learning_rate": 1.3987205129234086e-05, "loss": 0.4298, "step": 68965 }, { "epoch": 1.7507202599281644, "grad_norm": 0.349609375, "learning_rate": 1.3973197616829996e-05, "loss": 0.4393, "step": 68970 }, { "epoch": 1.7508471779771801, "grad_norm": 0.345703125, "learning_rate": 1.3959196779206915e-05, "loss": 0.4486, "step": 68975 }, { "epoch": 1.750974096026196, "grad_norm": 0.33984375, "learning_rate": 1.3945202617051864e-05, "loss": 0.4192, "step": 68980 }, { "epoch": 1.7511010140752117, "grad_norm": 0.3671875, "learning_rate": 1.3931215131051526e-05, "loss": 0.4326, "step": 68985 }, { "epoch": 1.7512279321242272, "grad_norm": 0.349609375, "learning_rate": 1.3917234321892235e-05, "loss": 0.4351, "step": 68990 }, { "epoch": 1.751354850173243, "grad_norm": 0.365234375, "learning_rate": 1.3903260190260073e-05, "loss": 0.4121, "step": 68995 }, { "epoch": 1.7514817682222588, "grad_norm": 0.33203125, "learning_rate": 1.3889292736840712e-05, "loss": 0.4156, "step": 69000 }, { "epoch": 1.7516086862712745, "grad_norm": 0.341796875, "learning_rate": 1.3875331962319536e-05, "loss": 0.4248, "step": 69005 }, { "epoch": 1.7517356043202903, "grad_norm": 0.345703125, "learning_rate": 1.3861377867381612e-05, "loss": 0.4132, "step": 69010 }, { "epoch": 1.751862522369306, "grad_norm": 0.337890625, "learning_rate": 1.3847430452711644e-05, "loss": 0.4179, "step": 69015 }, { "epoch": 1.7519894404183218, "grad_norm": 0.373046875, "learning_rate": 1.3833489718993984e-05, "loss": 0.4316, "step": 69020 }, { "epoch": 1.7521163584673376, "grad_norm": 0.3671875, "learning_rate": 1.3819555666912784e-05, "loss": 0.4327, "step": 69025 }, { "epoch": 1.7522432765163534, "grad_norm": 0.34765625, "learning_rate": 1.380562829715175e-05, "loss": 0.4091, "step": 69030 }, { "epoch": 1.7523701945653691, "grad_norm": 0.369140625, "learning_rate": 1.3791707610394264e-05, "loss": 0.4172, "step": 69035 }, { "epoch": 1.752497112614385, "grad_norm": 0.33984375, "learning_rate": 1.3777793607323434e-05, "loss": 0.4079, "step": 69040 }, { "epoch": 1.7526240306634007, "grad_norm": 0.34375, "learning_rate": 1.3763886288622012e-05, "loss": 0.3871, "step": 69045 }, { "epoch": 1.7527509487124164, "grad_norm": 0.34375, "learning_rate": 1.37499856549724e-05, "loss": 0.4425, "step": 69050 }, { "epoch": 1.7528778667614322, "grad_norm": 0.353515625, "learning_rate": 1.373609170705674e-05, "loss": 0.4205, "step": 69055 }, { "epoch": 1.753004784810448, "grad_norm": 0.345703125, "learning_rate": 1.3722204445556784e-05, "loss": 0.418, "step": 69060 }, { "epoch": 1.7531317028594637, "grad_norm": 0.337890625, "learning_rate": 1.370832387115397e-05, "loss": 0.4001, "step": 69065 }, { "epoch": 1.7532586209084795, "grad_norm": 0.392578125, "learning_rate": 1.3694449984529405e-05, "loss": 0.4075, "step": 69070 }, { "epoch": 1.7533855389574953, "grad_norm": 0.359375, "learning_rate": 1.3680582786363892e-05, "loss": 0.4425, "step": 69075 }, { "epoch": 1.753512457006511, "grad_norm": 0.34765625, "learning_rate": 1.3666722277337871e-05, "loss": 0.3917, "step": 69080 }, { "epoch": 1.7536393750555268, "grad_norm": 0.34765625, "learning_rate": 1.3652868458131483e-05, "loss": 0.4238, "step": 69085 }, { "epoch": 1.7537662931045424, "grad_norm": 0.3515625, "learning_rate": 1.3639021329424532e-05, "loss": 0.3982, "step": 69090 }, { "epoch": 1.7538932111535581, "grad_norm": 0.345703125, "learning_rate": 1.3625180891896459e-05, "loss": 0.4105, "step": 69095 }, { "epoch": 1.7540201292025739, "grad_norm": 0.361328125, "learning_rate": 1.3611347146226454e-05, "loss": 0.4187, "step": 69100 }, { "epoch": 1.7541470472515897, "grad_norm": 0.345703125, "learning_rate": 1.3597520093093323e-05, "loss": 0.4153, "step": 69105 }, { "epoch": 1.7542739653006054, "grad_norm": 0.34765625, "learning_rate": 1.3583699733175557e-05, "loss": 0.4135, "step": 69110 }, { "epoch": 1.7544008833496212, "grad_norm": 0.361328125, "learning_rate": 1.3569886067151265e-05, "loss": 0.4204, "step": 69115 }, { "epoch": 1.754527801398637, "grad_norm": 0.34765625, "learning_rate": 1.3556079095698336e-05, "loss": 0.4324, "step": 69120 }, { "epoch": 1.7546547194476525, "grad_norm": 0.345703125, "learning_rate": 1.3542278819494279e-05, "loss": 0.4005, "step": 69125 }, { "epoch": 1.7547816374966683, "grad_norm": 0.36328125, "learning_rate": 1.352848523921622e-05, "loss": 0.4467, "step": 69130 }, { "epoch": 1.754908555545684, "grad_norm": 0.365234375, "learning_rate": 1.3514698355541049e-05, "loss": 0.4391, "step": 69135 }, { "epoch": 1.7550354735946998, "grad_norm": 0.37109375, "learning_rate": 1.3500918169145242e-05, "loss": 0.4053, "step": 69140 }, { "epoch": 1.7551623916437156, "grad_norm": 0.373046875, "learning_rate": 1.3487144680705025e-05, "loss": 0.4312, "step": 69145 }, { "epoch": 1.7552893096927313, "grad_norm": 0.33203125, "learning_rate": 1.3473377890896226e-05, "loss": 0.3885, "step": 69150 }, { "epoch": 1.755416227741747, "grad_norm": 0.412109375, "learning_rate": 1.34596178003944e-05, "loss": 0.4122, "step": 69155 }, { "epoch": 1.7555431457907629, "grad_norm": 0.36328125, "learning_rate": 1.344586440987473e-05, "loss": 0.4207, "step": 69160 }, { "epoch": 1.7556700638397786, "grad_norm": 0.37109375, "learning_rate": 1.3432117720012103e-05, "loss": 0.4377, "step": 69165 }, { "epoch": 1.7557969818887944, "grad_norm": 0.33203125, "learning_rate": 1.341837773148105e-05, "loss": 0.401, "step": 69170 }, { "epoch": 1.7559238999378102, "grad_norm": 0.337890625, "learning_rate": 1.3404644444955781e-05, "loss": 0.4082, "step": 69175 }, { "epoch": 1.756050817986826, "grad_norm": 0.34375, "learning_rate": 1.3390917861110224e-05, "loss": 0.3975, "step": 69180 }, { "epoch": 1.7561777360358417, "grad_norm": 0.345703125, "learning_rate": 1.3377197980617871e-05, "loss": 0.4752, "step": 69185 }, { "epoch": 1.7563046540848575, "grad_norm": 0.30859375, "learning_rate": 1.3363484804152036e-05, "loss": 0.4098, "step": 69190 }, { "epoch": 1.7564315721338732, "grad_norm": 0.333984375, "learning_rate": 1.3349778332385563e-05, "loss": 0.4225, "step": 69195 }, { "epoch": 1.756558490182889, "grad_norm": 0.3359375, "learning_rate": 1.3336078565991031e-05, "loss": 0.4101, "step": 69200 }, { "epoch": 1.7566854082319048, "grad_norm": 0.3359375, "learning_rate": 1.3322385505640687e-05, "loss": 0.4367, "step": 69205 }, { "epoch": 1.7568123262809205, "grad_norm": 0.35546875, "learning_rate": 1.3308699152006458e-05, "loss": 0.4164, "step": 69210 }, { "epoch": 1.7569392443299363, "grad_norm": 0.359375, "learning_rate": 1.329501950575989e-05, "loss": 0.4136, "step": 69215 }, { "epoch": 1.7570661623789519, "grad_norm": 0.375, "learning_rate": 1.3281346567572265e-05, "loss": 0.4337, "step": 69220 }, { "epoch": 1.7571930804279676, "grad_norm": 0.3515625, "learning_rate": 1.3267680338114511e-05, "loss": 0.4227, "step": 69225 }, { "epoch": 1.7573199984769834, "grad_norm": 0.359375, "learning_rate": 1.3254020818057209e-05, "loss": 0.3876, "step": 69230 }, { "epoch": 1.7574469165259992, "grad_norm": 0.361328125, "learning_rate": 1.324036800807064e-05, "loss": 0.419, "step": 69235 }, { "epoch": 1.757573834575015, "grad_norm": 0.333984375, "learning_rate": 1.32267219088247e-05, "loss": 0.4313, "step": 69240 }, { "epoch": 1.7577007526240307, "grad_norm": 0.345703125, "learning_rate": 1.3213082520989071e-05, "loss": 0.3902, "step": 69245 }, { "epoch": 1.7578276706730465, "grad_norm": 0.349609375, "learning_rate": 1.3199449845232984e-05, "loss": 0.4056, "step": 69250 }, { "epoch": 1.757954588722062, "grad_norm": 0.34375, "learning_rate": 1.3185823882225404e-05, "loss": 0.4283, "step": 69255 }, { "epoch": 1.7580815067710778, "grad_norm": 0.3515625, "learning_rate": 1.3172204632634964e-05, "loss": 0.4282, "step": 69260 }, { "epoch": 1.7582084248200935, "grad_norm": 0.375, "learning_rate": 1.3158592097129894e-05, "loss": 0.4279, "step": 69265 }, { "epoch": 1.7583353428691093, "grad_norm": 0.3515625, "learning_rate": 1.3144986276378228e-05, "loss": 0.4318, "step": 69270 }, { "epoch": 1.758462260918125, "grad_norm": 0.34375, "learning_rate": 1.3131387171047564e-05, "loss": 0.4321, "step": 69275 }, { "epoch": 1.7585891789671408, "grad_norm": 0.34375, "learning_rate": 1.3117794781805219e-05, "loss": 0.4027, "step": 69280 }, { "epoch": 1.7587160970161566, "grad_norm": 0.36328125, "learning_rate": 1.3104209109318159e-05, "loss": 0.4392, "step": 69285 }, { "epoch": 1.7588430150651724, "grad_norm": 0.330078125, "learning_rate": 1.3090630154253035e-05, "loss": 0.3897, "step": 69290 }, { "epoch": 1.7589699331141881, "grad_norm": 0.365234375, "learning_rate": 1.3077057917276146e-05, "loss": 0.4176, "step": 69295 }, { "epoch": 1.759096851163204, "grad_norm": 0.353515625, "learning_rate": 1.3063492399053477e-05, "loss": 0.4105, "step": 69300 }, { "epoch": 1.7592237692122197, "grad_norm": 0.326171875, "learning_rate": 1.3049933600250661e-05, "loss": 0.4225, "step": 69305 }, { "epoch": 1.7593506872612354, "grad_norm": 0.36328125, "learning_rate": 1.3036381521533084e-05, "loss": 0.4187, "step": 69310 }, { "epoch": 1.7594776053102512, "grad_norm": 0.333984375, "learning_rate": 1.3022836163565715e-05, "loss": 0.4007, "step": 69315 }, { "epoch": 1.759604523359267, "grad_norm": 0.34375, "learning_rate": 1.3009297527013201e-05, "loss": 0.4066, "step": 69320 }, { "epoch": 1.7597314414082827, "grad_norm": 0.36328125, "learning_rate": 1.2995765612539898e-05, "loss": 0.4447, "step": 69325 }, { "epoch": 1.7598583594572985, "grad_norm": 0.341796875, "learning_rate": 1.2982240420809792e-05, "loss": 0.419, "step": 69330 }, { "epoch": 1.7599852775063143, "grad_norm": 0.359375, "learning_rate": 1.2968721952486566e-05, "loss": 0.4203, "step": 69335 }, { "epoch": 1.76011219555533, "grad_norm": 0.33203125, "learning_rate": 1.2955210208233541e-05, "loss": 0.4032, "step": 69340 }, { "epoch": 1.7602391136043458, "grad_norm": 0.333984375, "learning_rate": 1.2941705188713803e-05, "loss": 0.4068, "step": 69345 }, { "epoch": 1.7603660316533616, "grad_norm": 0.341796875, "learning_rate": 1.2928206894589987e-05, "loss": 0.3502, "step": 69350 }, { "epoch": 1.7604929497023771, "grad_norm": 0.328125, "learning_rate": 1.291471532652445e-05, "loss": 0.4186, "step": 69355 }, { "epoch": 1.760619867751393, "grad_norm": 0.359375, "learning_rate": 1.2901230485179225e-05, "loss": 0.4149, "step": 69360 }, { "epoch": 1.7607467858004087, "grad_norm": 0.34765625, "learning_rate": 1.2887752371216003e-05, "loss": 0.3823, "step": 69365 }, { "epoch": 1.7608737038494244, "grad_norm": 0.365234375, "learning_rate": 1.2874280985296153e-05, "loss": 0.4048, "step": 69370 }, { "epoch": 1.7610006218984402, "grad_norm": 0.345703125, "learning_rate": 1.286081632808073e-05, "loss": 0.4119, "step": 69375 }, { "epoch": 1.761127539947456, "grad_norm": 0.35546875, "learning_rate": 1.2847358400230423e-05, "loss": 0.4068, "step": 69380 }, { "epoch": 1.7612544579964715, "grad_norm": 0.353515625, "learning_rate": 1.2833907202405602e-05, "loss": 0.4204, "step": 69385 }, { "epoch": 1.7613813760454873, "grad_norm": 0.353515625, "learning_rate": 1.2820462735266308e-05, "loss": 0.4044, "step": 69390 }, { "epoch": 1.761508294094503, "grad_norm": 0.34765625, "learning_rate": 1.2807024999472276e-05, "loss": 0.4174, "step": 69395 }, { "epoch": 1.7616352121435188, "grad_norm": 0.341796875, "learning_rate": 1.2793593995682865e-05, "loss": 0.4127, "step": 69400 }, { "epoch": 1.7617621301925346, "grad_norm": 0.353515625, "learning_rate": 1.2780169724557149e-05, "loss": 0.4358, "step": 69405 }, { "epoch": 1.7618890482415503, "grad_norm": 0.33984375, "learning_rate": 1.2766752186753848e-05, "loss": 0.4181, "step": 69410 }, { "epoch": 1.7620159662905661, "grad_norm": 0.34375, "learning_rate": 1.2753341382931336e-05, "loss": 0.3976, "step": 69415 }, { "epoch": 1.7621428843395819, "grad_norm": 0.333984375, "learning_rate": 1.2739937313747673e-05, "loss": 0.4065, "step": 69420 }, { "epoch": 1.7622698023885977, "grad_norm": 0.375, "learning_rate": 1.2726539979860645e-05, "loss": 0.4138, "step": 69425 }, { "epoch": 1.7623967204376134, "grad_norm": 0.3359375, "learning_rate": 1.2713149381927579e-05, "loss": 0.3793, "step": 69430 }, { "epoch": 1.7625236384866292, "grad_norm": 0.380859375, "learning_rate": 1.2699765520605632e-05, "loss": 0.4101, "step": 69435 }, { "epoch": 1.762650556535645, "grad_norm": 0.341796875, "learning_rate": 1.2686388396551478e-05, "loss": 0.415, "step": 69440 }, { "epoch": 1.7627774745846607, "grad_norm": 0.31640625, "learning_rate": 1.267301801042156e-05, "loss": 0.4359, "step": 69445 }, { "epoch": 1.7629043926336765, "grad_norm": 0.34765625, "learning_rate": 1.2659654362871934e-05, "loss": 0.4227, "step": 69450 }, { "epoch": 1.7630313106826923, "grad_norm": 0.369140625, "learning_rate": 1.2646297454558362e-05, "loss": 0.4109, "step": 69455 }, { "epoch": 1.763158228731708, "grad_norm": 0.3515625, "learning_rate": 1.2632947286136269e-05, "loss": 0.4023, "step": 69460 }, { "epoch": 1.7632851467807238, "grad_norm": 0.34765625, "learning_rate": 1.261960385826073e-05, "loss": 0.4309, "step": 69465 }, { "epoch": 1.7634120648297396, "grad_norm": 0.376953125, "learning_rate": 1.2606267171586505e-05, "loss": 0.4183, "step": 69470 }, { "epoch": 1.7635389828787553, "grad_norm": 0.361328125, "learning_rate": 1.2592937226768019e-05, "loss": 0.4535, "step": 69475 }, { "epoch": 1.763665900927771, "grad_norm": 0.33203125, "learning_rate": 1.2579614024459366e-05, "loss": 0.4291, "step": 69480 }, { "epoch": 1.7637928189767866, "grad_norm": 0.341796875, "learning_rate": 1.2566297565314326e-05, "loss": 0.4056, "step": 69485 }, { "epoch": 1.7639197370258024, "grad_norm": 0.345703125, "learning_rate": 1.2552987849986273e-05, "loss": 0.4176, "step": 69490 }, { "epoch": 1.7640466550748182, "grad_norm": 0.33203125, "learning_rate": 1.2539684879128404e-05, "loss": 0.3994, "step": 69495 }, { "epoch": 1.764173573123834, "grad_norm": 0.337890625, "learning_rate": 1.2526388653393426e-05, "loss": 0.3723, "step": 69500 }, { "epoch": 1.7643004911728497, "grad_norm": 0.33984375, "learning_rate": 1.2513099173433805e-05, "loss": 0.401, "step": 69505 }, { "epoch": 1.7644274092218655, "grad_norm": 0.37109375, "learning_rate": 1.24998164399016e-05, "loss": 0.4375, "step": 69510 }, { "epoch": 1.7645543272708812, "grad_norm": 0.34765625, "learning_rate": 1.2486540453448674e-05, "loss": 0.4287, "step": 69515 }, { "epoch": 1.7646812453198968, "grad_norm": 0.34375, "learning_rate": 1.2473271214726422e-05, "loss": 0.3705, "step": 69520 }, { "epoch": 1.7648081633689126, "grad_norm": 0.341796875, "learning_rate": 1.2460008724385973e-05, "loss": 0.4189, "step": 69525 }, { "epoch": 1.7649350814179283, "grad_norm": 0.345703125, "learning_rate": 1.244675298307809e-05, "loss": 0.4179, "step": 69530 }, { "epoch": 1.765061999466944, "grad_norm": 0.34375, "learning_rate": 1.243350399145327e-05, "loss": 0.4121, "step": 69535 }, { "epoch": 1.7651889175159599, "grad_norm": 0.34765625, "learning_rate": 1.2420261750161593e-05, "loss": 0.407, "step": 69540 }, { "epoch": 1.7653158355649756, "grad_norm": 0.33984375, "learning_rate": 1.2407026259852887e-05, "loss": 0.3936, "step": 69545 }, { "epoch": 1.7654427536139914, "grad_norm": 0.353515625, "learning_rate": 1.2393797521176569e-05, "loss": 0.4121, "step": 69550 }, { "epoch": 1.7655696716630072, "grad_norm": 0.33203125, "learning_rate": 1.2380575534781784e-05, "loss": 0.4383, "step": 69555 }, { "epoch": 1.765696589712023, "grad_norm": 0.34765625, "learning_rate": 1.2367360301317363e-05, "loss": 0.4077, "step": 69560 }, { "epoch": 1.7658235077610387, "grad_norm": 0.376953125, "learning_rate": 1.2354151821431741e-05, "loss": 0.4134, "step": 69565 }, { "epoch": 1.7659504258100545, "grad_norm": 0.3671875, "learning_rate": 1.234095009577306e-05, "loss": 0.438, "step": 69570 }, { "epoch": 1.7660773438590702, "grad_norm": 0.326171875, "learning_rate": 1.2327755124989125e-05, "loss": 0.43, "step": 69575 }, { "epoch": 1.766204261908086, "grad_norm": 0.341796875, "learning_rate": 1.2314566909727413e-05, "loss": 0.3955, "step": 69580 }, { "epoch": 1.7663311799571018, "grad_norm": 0.353515625, "learning_rate": 1.2301385450635026e-05, "loss": 0.4025, "step": 69585 }, { "epoch": 1.7664580980061175, "grad_norm": 0.359375, "learning_rate": 1.2288210748358812e-05, "loss": 0.4081, "step": 69590 }, { "epoch": 1.7665850160551333, "grad_norm": 0.337890625, "learning_rate": 1.2275042803545271e-05, "loss": 0.4107, "step": 69595 }, { "epoch": 1.766711934104149, "grad_norm": 0.380859375, "learning_rate": 1.2261881616840502e-05, "loss": 0.3925, "step": 69600 }, { "epoch": 1.7668388521531648, "grad_norm": 0.29296875, "learning_rate": 1.2248727188890323e-05, "loss": 0.3593, "step": 69605 }, { "epoch": 1.7669657702021806, "grad_norm": 0.31640625, "learning_rate": 1.223557952034025e-05, "loss": 0.4155, "step": 69610 }, { "epoch": 1.7670926882511964, "grad_norm": 0.36328125, "learning_rate": 1.22224386118354e-05, "loss": 0.43, "step": 69615 }, { "epoch": 1.767219606300212, "grad_norm": 0.3203125, "learning_rate": 1.2209304464020592e-05, "loss": 0.3959, "step": 69620 }, { "epoch": 1.7673465243492277, "grad_norm": 0.349609375, "learning_rate": 1.2196177077540343e-05, "loss": 0.4259, "step": 69625 }, { "epoch": 1.7674734423982434, "grad_norm": 0.380859375, "learning_rate": 1.2183056453038787e-05, "loss": 0.4473, "step": 69630 }, { "epoch": 1.7676003604472592, "grad_norm": 0.361328125, "learning_rate": 1.2169942591159759e-05, "loss": 0.4423, "step": 69635 }, { "epoch": 1.767727278496275, "grad_norm": 0.32421875, "learning_rate": 1.2156835492546746e-05, "loss": 0.375, "step": 69640 }, { "epoch": 1.7678541965452907, "grad_norm": 0.359375, "learning_rate": 1.2143735157842898e-05, "loss": 0.4317, "step": 69645 }, { "epoch": 1.7679811145943063, "grad_norm": 0.337890625, "learning_rate": 1.213064158769107e-05, "loss": 0.4001, "step": 69650 }, { "epoch": 1.768108032643322, "grad_norm": 0.349609375, "learning_rate": 1.2117554782733729e-05, "loss": 0.4097, "step": 69655 }, { "epoch": 1.7682349506923378, "grad_norm": 0.333984375, "learning_rate": 1.2104474743613046e-05, "loss": 0.4156, "step": 69660 }, { "epoch": 1.7683618687413536, "grad_norm": 0.337890625, "learning_rate": 1.209140147097084e-05, "loss": 0.3904, "step": 69665 }, { "epoch": 1.7684887867903694, "grad_norm": 0.341796875, "learning_rate": 1.207833496544865e-05, "loss": 0.3908, "step": 69670 }, { "epoch": 1.7686157048393851, "grad_norm": 0.36328125, "learning_rate": 1.2065275227687626e-05, "loss": 0.4252, "step": 69675 }, { "epoch": 1.768742622888401, "grad_norm": 0.361328125, "learning_rate": 1.2052222258328576e-05, "loss": 0.433, "step": 69680 }, { "epoch": 1.7688695409374167, "grad_norm": 0.34375, "learning_rate": 1.203917605801205e-05, "loss": 0.3985, "step": 69685 }, { "epoch": 1.7689964589864324, "grad_norm": 0.345703125, "learning_rate": 1.2026136627378209e-05, "loss": 0.4206, "step": 69690 }, { "epoch": 1.7691233770354482, "grad_norm": 0.349609375, "learning_rate": 1.2013103967066867e-05, "loss": 0.414, "step": 69695 }, { "epoch": 1.769250295084464, "grad_norm": 0.345703125, "learning_rate": 1.2000078077717567e-05, "loss": 0.4541, "step": 69700 }, { "epoch": 1.7693772131334797, "grad_norm": 0.375, "learning_rate": 1.1987058959969449e-05, "loss": 0.4298, "step": 69705 }, { "epoch": 1.7695041311824955, "grad_norm": 0.34375, "learning_rate": 1.197404661446138e-05, "loss": 0.389, "step": 69710 }, { "epoch": 1.7696310492315113, "grad_norm": 0.3359375, "learning_rate": 1.196104104183187e-05, "loss": 0.4273, "step": 69715 }, { "epoch": 1.769757967280527, "grad_norm": 0.349609375, "learning_rate": 1.1948042242719075e-05, "loss": 0.4235, "step": 69720 }, { "epoch": 1.7698848853295428, "grad_norm": 0.369140625, "learning_rate": 1.1935050217760866e-05, "loss": 0.4058, "step": 69725 }, { "epoch": 1.7700118033785586, "grad_norm": 0.33984375, "learning_rate": 1.1922064967594735e-05, "loss": 0.411, "step": 69730 }, { "epoch": 1.7701387214275743, "grad_norm": 0.3359375, "learning_rate": 1.1909086492857888e-05, "loss": 0.4145, "step": 69735 }, { "epoch": 1.77026563947659, "grad_norm": 0.359375, "learning_rate": 1.1896114794187117e-05, "loss": 0.3883, "step": 69740 }, { "epoch": 1.7703925575256059, "grad_norm": 0.34375, "learning_rate": 1.1883149872219012e-05, "loss": 0.4235, "step": 69745 }, { "epoch": 1.7705194755746214, "grad_norm": 0.322265625, "learning_rate": 1.1870191727589729e-05, "loss": 0.3937, "step": 69750 }, { "epoch": 1.7706463936236372, "grad_norm": 0.3515625, "learning_rate": 1.1857240360935094e-05, "loss": 0.3888, "step": 69755 }, { "epoch": 1.770773311672653, "grad_norm": 0.330078125, "learning_rate": 1.1844295772890667e-05, "loss": 0.412, "step": 69760 }, { "epoch": 1.7709002297216687, "grad_norm": 0.310546875, "learning_rate": 1.183135796409162e-05, "loss": 0.4085, "step": 69765 }, { "epoch": 1.7710271477706845, "grad_norm": 0.333984375, "learning_rate": 1.1818426935172781e-05, "loss": 0.4304, "step": 69770 }, { "epoch": 1.7711540658197003, "grad_norm": 0.375, "learning_rate": 1.180550268676871e-05, "loss": 0.4133, "step": 69775 }, { "epoch": 1.771280983868716, "grad_norm": 0.359375, "learning_rate": 1.1792585219513562e-05, "loss": 0.4318, "step": 69780 }, { "epoch": 1.7714079019177316, "grad_norm": 0.314453125, "learning_rate": 1.17796745340412e-05, "loss": 0.3952, "step": 69785 }, { "epoch": 1.7715348199667473, "grad_norm": 0.341796875, "learning_rate": 1.1766770630985168e-05, "loss": 0.4302, "step": 69790 }, { "epoch": 1.771661738015763, "grad_norm": 0.34375, "learning_rate": 1.1753873510978623e-05, "loss": 0.415, "step": 69795 }, { "epoch": 1.7717886560647789, "grad_norm": 0.349609375, "learning_rate": 1.1740983174654428e-05, "loss": 0.4053, "step": 69800 }, { "epoch": 1.7719155741137946, "grad_norm": 0.328125, "learning_rate": 1.172809962264511e-05, "loss": 0.4247, "step": 69805 }, { "epoch": 1.7720424921628104, "grad_norm": 0.3515625, "learning_rate": 1.1715222855582879e-05, "loss": 0.3946, "step": 69810 }, { "epoch": 1.7721694102118262, "grad_norm": 0.34765625, "learning_rate": 1.1702352874099564e-05, "loss": 0.4038, "step": 69815 }, { "epoch": 1.772296328260842, "grad_norm": 0.349609375, "learning_rate": 1.1689489678826724e-05, "loss": 0.4291, "step": 69820 }, { "epoch": 1.7724232463098577, "grad_norm": 0.359375, "learning_rate": 1.1676633270395525e-05, "loss": 0.4151, "step": 69825 }, { "epoch": 1.7725501643588735, "grad_norm": 0.349609375, "learning_rate": 1.1663783649436808e-05, "loss": 0.4054, "step": 69830 }, { "epoch": 1.7726770824078892, "grad_norm": 0.349609375, "learning_rate": 1.1650940816581139e-05, "loss": 0.4253, "step": 69835 }, { "epoch": 1.772804000456905, "grad_norm": 0.302734375, "learning_rate": 1.1638104772458695e-05, "loss": 0.3781, "step": 69840 }, { "epoch": 1.7729309185059208, "grad_norm": 0.333984375, "learning_rate": 1.162527551769934e-05, "loss": 0.3974, "step": 69845 }, { "epoch": 1.7730578365549365, "grad_norm": 0.3359375, "learning_rate": 1.1612453052932602e-05, "loss": 0.4242, "step": 69850 }, { "epoch": 1.7731847546039523, "grad_norm": 0.32421875, "learning_rate": 1.1599637378787663e-05, "loss": 0.4097, "step": 69855 }, { "epoch": 1.773311672652968, "grad_norm": 0.341796875, "learning_rate": 1.1586828495893386e-05, "loss": 0.4049, "step": 69860 }, { "epoch": 1.7734385907019838, "grad_norm": 0.357421875, "learning_rate": 1.1574026404878284e-05, "loss": 0.3878, "step": 69865 }, { "epoch": 1.7735655087509996, "grad_norm": 0.35546875, "learning_rate": 1.1561231106370555e-05, "loss": 0.4083, "step": 69870 }, { "epoch": 1.7736924268000154, "grad_norm": 0.333984375, "learning_rate": 1.1548442600998099e-05, "loss": 0.409, "step": 69875 }, { "epoch": 1.7738193448490311, "grad_norm": 0.3359375, "learning_rate": 1.1535660889388409e-05, "loss": 0.4104, "step": 69880 }, { "epoch": 1.7739462628980467, "grad_norm": 0.34765625, "learning_rate": 1.1522885972168672e-05, "loss": 0.4261, "step": 69885 }, { "epoch": 1.7740731809470625, "grad_norm": 0.32421875, "learning_rate": 1.1510117849965784e-05, "loss": 0.4146, "step": 69890 }, { "epoch": 1.7742000989960782, "grad_norm": 0.34375, "learning_rate": 1.1497356523406226e-05, "loss": 0.3978, "step": 69895 }, { "epoch": 1.774327017045094, "grad_norm": 0.36328125, "learning_rate": 1.1484601993116216e-05, "loss": 0.4107, "step": 69900 }, { "epoch": 1.7744539350941098, "grad_norm": 0.3671875, "learning_rate": 1.1471854259721618e-05, "loss": 0.4199, "step": 69905 }, { "epoch": 1.7745808531431255, "grad_norm": 0.361328125, "learning_rate": 1.1459113323847918e-05, "loss": 0.4022, "step": 69910 }, { "epoch": 1.774707771192141, "grad_norm": 0.357421875, "learning_rate": 1.1446379186120364e-05, "loss": 0.4675, "step": 69915 }, { "epoch": 1.7748346892411568, "grad_norm": 0.3515625, "learning_rate": 1.1433651847163805e-05, "loss": 0.4028, "step": 69920 }, { "epoch": 1.7749616072901726, "grad_norm": 0.361328125, "learning_rate": 1.1420931307602743e-05, "loss": 0.43, "step": 69925 }, { "epoch": 1.7750885253391884, "grad_norm": 0.353515625, "learning_rate": 1.1408217568061379e-05, "loss": 0.4334, "step": 69930 }, { "epoch": 1.7752154433882041, "grad_norm": 0.353515625, "learning_rate": 1.1395510629163546e-05, "loss": 0.4416, "step": 69935 }, { "epoch": 1.77534236143722, "grad_norm": 0.37890625, "learning_rate": 1.1382810491532812e-05, "loss": 0.4092, "step": 69940 }, { "epoch": 1.7754692794862357, "grad_norm": 0.376953125, "learning_rate": 1.1370117155792362e-05, "loss": 0.4413, "step": 69945 }, { "epoch": 1.7755961975352514, "grad_norm": 0.337890625, "learning_rate": 1.135743062256505e-05, "loss": 0.3851, "step": 69950 }, { "epoch": 1.7757231155842672, "grad_norm": 0.34375, "learning_rate": 1.1344750892473375e-05, "loss": 0.4263, "step": 69955 }, { "epoch": 1.775850033633283, "grad_norm": 0.330078125, "learning_rate": 1.133207796613954e-05, "loss": 0.4115, "step": 69960 }, { "epoch": 1.7759769516822987, "grad_norm": 0.345703125, "learning_rate": 1.1319411844185416e-05, "loss": 0.3972, "step": 69965 }, { "epoch": 1.7761038697313145, "grad_norm": 0.33984375, "learning_rate": 1.1306752527232504e-05, "loss": 0.3856, "step": 69970 }, { "epoch": 1.7762307877803303, "grad_norm": 0.345703125, "learning_rate": 1.129410001590199e-05, "loss": 0.4076, "step": 69975 }, { "epoch": 1.776357705829346, "grad_norm": 0.34765625, "learning_rate": 1.1281454310814747e-05, "loss": 0.4056, "step": 69980 }, { "epoch": 1.7764846238783618, "grad_norm": 0.357421875, "learning_rate": 1.1268815412591259e-05, "loss": 0.4059, "step": 69985 }, { "epoch": 1.7766115419273776, "grad_norm": 0.341796875, "learning_rate": 1.1256183321851763e-05, "loss": 0.4095, "step": 69990 }, { "epoch": 1.7767384599763933, "grad_norm": 0.359375, "learning_rate": 1.1243558039216066e-05, "loss": 0.4232, "step": 69995 }, { "epoch": 1.7768653780254091, "grad_norm": 0.345703125, "learning_rate": 1.1230939565303703e-05, "loss": 0.4069, "step": 70000 }, { "epoch": 1.7769922960744249, "grad_norm": 0.380859375, "learning_rate": 1.1218327900733865e-05, "loss": 0.4238, "step": 70005 }, { "epoch": 1.7771192141234406, "grad_norm": 0.341796875, "learning_rate": 1.1205723046125402e-05, "loss": 0.4192, "step": 70010 }, { "epoch": 1.7772461321724562, "grad_norm": 0.361328125, "learning_rate": 1.1193125002096826e-05, "loss": 0.4083, "step": 70015 }, { "epoch": 1.777373050221472, "grad_norm": 0.3671875, "learning_rate": 1.1180533769266303e-05, "loss": 0.4229, "step": 70020 }, { "epoch": 1.7774999682704877, "grad_norm": 0.34375, "learning_rate": 1.1167949348251692e-05, "loss": 0.4088, "step": 70025 }, { "epoch": 1.7776268863195035, "grad_norm": 0.357421875, "learning_rate": 1.1155371739670516e-05, "loss": 0.4104, "step": 70030 }, { "epoch": 1.7777538043685193, "grad_norm": 0.326171875, "learning_rate": 1.114280094413993e-05, "loss": 0.3666, "step": 70035 }, { "epoch": 1.777880722417535, "grad_norm": 0.345703125, "learning_rate": 1.113023696227679e-05, "loss": 0.4245, "step": 70040 }, { "epoch": 1.7780076404665508, "grad_norm": 0.35546875, "learning_rate": 1.1117679794697603e-05, "loss": 0.427, "step": 70045 }, { "epoch": 1.7781345585155663, "grad_norm": 0.36328125, "learning_rate": 1.1105129442018562e-05, "loss": 0.3885, "step": 70050 }, { "epoch": 1.778261476564582, "grad_norm": 0.359375, "learning_rate": 1.1092585904855472e-05, "loss": 0.4488, "step": 70055 }, { "epoch": 1.7783883946135979, "grad_norm": 0.34375, "learning_rate": 1.1080049183823842e-05, "loss": 0.413, "step": 70060 }, { "epoch": 1.7785153126626136, "grad_norm": 0.337890625, "learning_rate": 1.1067519279538895e-05, "loss": 0.4164, "step": 70065 }, { "epoch": 1.7786422307116294, "grad_norm": 0.341796875, "learning_rate": 1.1054996192615423e-05, "loss": 0.4282, "step": 70070 }, { "epoch": 1.7787691487606452, "grad_norm": 0.34375, "learning_rate": 1.1042479923667918e-05, "loss": 0.4401, "step": 70075 }, { "epoch": 1.778896066809661, "grad_norm": 0.3359375, "learning_rate": 1.102997047331059e-05, "loss": 0.4122, "step": 70080 }, { "epoch": 1.7790229848586767, "grad_norm": 0.349609375, "learning_rate": 1.1017467842157262e-05, "loss": 0.4098, "step": 70085 }, { "epoch": 1.7791499029076925, "grad_norm": 0.388671875, "learning_rate": 1.1004972030821414e-05, "loss": 0.4205, "step": 70090 }, { "epoch": 1.7792768209567082, "grad_norm": 0.330078125, "learning_rate": 1.0992483039916233e-05, "loss": 0.4124, "step": 70095 }, { "epoch": 1.779403739005724, "grad_norm": 0.369140625, "learning_rate": 1.0980000870054534e-05, "loss": 0.4124, "step": 70100 }, { "epoch": 1.7795306570547398, "grad_norm": 0.37890625, "learning_rate": 1.0967525521848808e-05, "loss": 0.4435, "step": 70105 }, { "epoch": 1.7796575751037556, "grad_norm": 0.314453125, "learning_rate": 1.0955056995911232e-05, "loss": 0.4191, "step": 70110 }, { "epoch": 1.7797844931527713, "grad_norm": 0.357421875, "learning_rate": 1.0942595292853617e-05, "loss": 0.4222, "step": 70115 }, { "epoch": 1.779911411201787, "grad_norm": 0.35546875, "learning_rate": 1.0930140413287458e-05, "loss": 0.3998, "step": 70120 }, { "epoch": 1.7800383292508029, "grad_norm": 0.349609375, "learning_rate": 1.0917692357823899e-05, "loss": 0.4189, "step": 70125 }, { "epoch": 1.7801652472998186, "grad_norm": 0.34375, "learning_rate": 1.0905251127073783e-05, "loss": 0.3976, "step": 70130 }, { "epoch": 1.7802921653488344, "grad_norm": 0.322265625, "learning_rate": 1.089281672164759e-05, "loss": 0.3763, "step": 70135 }, { "epoch": 1.7804190833978502, "grad_norm": 0.333984375, "learning_rate": 1.0880389142155466e-05, "loss": 0.4081, "step": 70140 }, { "epoch": 1.780546001446866, "grad_norm": 0.3515625, "learning_rate": 1.0867968389207237e-05, "loss": 0.4037, "step": 70145 }, { "epoch": 1.7806729194958815, "grad_norm": 0.357421875, "learning_rate": 1.085555446341237e-05, "loss": 0.4327, "step": 70150 }, { "epoch": 1.7807998375448972, "grad_norm": 0.3671875, "learning_rate": 1.0843147365380006e-05, "loss": 0.4286, "step": 70155 }, { "epoch": 1.780926755593913, "grad_norm": 0.341796875, "learning_rate": 1.0830747095718978e-05, "loss": 0.4008, "step": 70160 }, { "epoch": 1.7810536736429288, "grad_norm": 0.314453125, "learning_rate": 1.0818353655037766e-05, "loss": 0.3944, "step": 70165 }, { "epoch": 1.7811805916919445, "grad_norm": 0.419921875, "learning_rate": 1.0805967043944497e-05, "loss": 0.4042, "step": 70170 }, { "epoch": 1.7813075097409603, "grad_norm": 0.345703125, "learning_rate": 1.0793587263046972e-05, "loss": 0.4085, "step": 70175 }, { "epoch": 1.7814344277899758, "grad_norm": 0.37109375, "learning_rate": 1.0781214312952668e-05, "loss": 0.457, "step": 70180 }, { "epoch": 1.7815613458389916, "grad_norm": 0.34375, "learning_rate": 1.0768848194268703e-05, "loss": 0.418, "step": 70185 }, { "epoch": 1.7816882638880074, "grad_norm": 0.357421875, "learning_rate": 1.0756488907601924e-05, "loss": 0.3916, "step": 70190 }, { "epoch": 1.7818151819370232, "grad_norm": 0.33984375, "learning_rate": 1.074413645355876e-05, "loss": 0.396, "step": 70195 }, { "epoch": 1.781942099986039, "grad_norm": 0.34375, "learning_rate": 1.0731790832745347e-05, "loss": 0.3958, "step": 70200 }, { "epoch": 1.7820690180350547, "grad_norm": 0.353515625, "learning_rate": 1.0719452045767479e-05, "loss": 0.4243, "step": 70205 }, { "epoch": 1.7821959360840705, "grad_norm": 0.373046875, "learning_rate": 1.0707120093230625e-05, "loss": 0.4582, "step": 70210 }, { "epoch": 1.7823228541330862, "grad_norm": 0.345703125, "learning_rate": 1.0694794975739902e-05, "loss": 0.4033, "step": 70215 }, { "epoch": 1.782449772182102, "grad_norm": 0.365234375, "learning_rate": 1.0682476693900104e-05, "loss": 0.4501, "step": 70220 }, { "epoch": 1.7825766902311178, "grad_norm": 0.328125, "learning_rate": 1.0670165248315671e-05, "loss": 0.4111, "step": 70225 }, { "epoch": 1.7827036082801335, "grad_norm": 0.3359375, "learning_rate": 1.0657860639590698e-05, "loss": 0.4356, "step": 70230 }, { "epoch": 1.7828305263291493, "grad_norm": 0.33203125, "learning_rate": 1.0645562868329039e-05, "loss": 0.4266, "step": 70235 }, { "epoch": 1.782957444378165, "grad_norm": 0.392578125, "learning_rate": 1.0633271935134075e-05, "loss": 0.4298, "step": 70240 }, { "epoch": 1.7830843624271808, "grad_norm": 0.35546875, "learning_rate": 1.0620987840608958e-05, "loss": 0.4257, "step": 70245 }, { "epoch": 1.7832112804761966, "grad_norm": 0.330078125, "learning_rate": 1.0608710585356422e-05, "loss": 0.4327, "step": 70250 }, { "epoch": 1.7833381985252124, "grad_norm": 0.34375, "learning_rate": 1.0596440169978953e-05, "loss": 0.4241, "step": 70255 }, { "epoch": 1.7834651165742281, "grad_norm": 0.375, "learning_rate": 1.0584176595078635e-05, "loss": 0.4617, "step": 70260 }, { "epoch": 1.783592034623244, "grad_norm": 0.3671875, "learning_rate": 1.0571919861257256e-05, "loss": 0.4287, "step": 70265 }, { "epoch": 1.7837189526722597, "grad_norm": 0.365234375, "learning_rate": 1.0559669969116197e-05, "loss": 0.4, "step": 70270 }, { "epoch": 1.7838458707212754, "grad_norm": 0.365234375, "learning_rate": 1.0547426919256614e-05, "loss": 0.4424, "step": 70275 }, { "epoch": 1.783972788770291, "grad_norm": 0.34765625, "learning_rate": 1.0535190712279224e-05, "loss": 0.4171, "step": 70280 }, { "epoch": 1.7840997068193067, "grad_norm": 0.3828125, "learning_rate": 1.052296134878448e-05, "loss": 0.4315, "step": 70285 }, { "epoch": 1.7842266248683225, "grad_norm": 0.33984375, "learning_rate": 1.0510738829372473e-05, "loss": 0.4437, "step": 70290 }, { "epoch": 1.7843535429173383, "grad_norm": 0.345703125, "learning_rate": 1.0498523154642935e-05, "loss": 0.4141, "step": 70295 }, { "epoch": 1.784480460966354, "grad_norm": 0.361328125, "learning_rate": 1.0486314325195288e-05, "loss": 0.408, "step": 70300 }, { "epoch": 1.7846073790153698, "grad_norm": 0.33984375, "learning_rate": 1.0474112341628638e-05, "loss": 0.4246, "step": 70305 }, { "epoch": 1.7847342970643856, "grad_norm": 0.34765625, "learning_rate": 1.0461917204541686e-05, "loss": 0.4097, "step": 70310 }, { "epoch": 1.7848612151134011, "grad_norm": 0.333984375, "learning_rate": 1.0449728914532907e-05, "loss": 0.3993, "step": 70315 }, { "epoch": 1.784988133162417, "grad_norm": 0.359375, "learning_rate": 1.0437547472200302e-05, "loss": 0.4198, "step": 70320 }, { "epoch": 1.7851150512114327, "grad_norm": 0.34765625, "learning_rate": 1.0425372878141681e-05, "loss": 0.4006, "step": 70325 }, { "epoch": 1.7852419692604484, "grad_norm": 0.34375, "learning_rate": 1.041320513295441e-05, "loss": 0.4274, "step": 70330 }, { "epoch": 1.7853688873094642, "grad_norm": 0.353515625, "learning_rate": 1.0401044237235568e-05, "loss": 0.4212, "step": 70335 }, { "epoch": 1.78549580535848, "grad_norm": 0.3359375, "learning_rate": 1.0388890191581872e-05, "loss": 0.4052, "step": 70340 }, { "epoch": 1.7856227234074957, "grad_norm": 0.3359375, "learning_rate": 1.0376742996589731e-05, "loss": 0.4288, "step": 70345 }, { "epoch": 1.7857496414565115, "grad_norm": 0.32421875, "learning_rate": 1.0364602652855187e-05, "loss": 0.3999, "step": 70350 }, { "epoch": 1.7858765595055273, "grad_norm": 0.35546875, "learning_rate": 1.0352469160973958e-05, "loss": 0.4252, "step": 70355 }, { "epoch": 1.786003477554543, "grad_norm": 0.3359375, "learning_rate": 1.0340342521541455e-05, "loss": 0.4106, "step": 70360 }, { "epoch": 1.7861303956035588, "grad_norm": 0.33203125, "learning_rate": 1.0328222735152703e-05, "loss": 0.4218, "step": 70365 }, { "epoch": 1.7862573136525746, "grad_norm": 0.345703125, "learning_rate": 1.0316109802402423e-05, "loss": 0.4115, "step": 70370 }, { "epoch": 1.7863842317015903, "grad_norm": 0.30078125, "learning_rate": 1.0304003723884973e-05, "loss": 0.389, "step": 70375 }, { "epoch": 1.786511149750606, "grad_norm": 0.345703125, "learning_rate": 1.0291904500194448e-05, "loss": 0.3998, "step": 70380 }, { "epoch": 1.7866380677996219, "grad_norm": 0.328125, "learning_rate": 1.0279812131924503e-05, "loss": 0.4079, "step": 70385 }, { "epoch": 1.7867649858486376, "grad_norm": 0.369140625, "learning_rate": 1.0267726619668531e-05, "loss": 0.4236, "step": 70390 }, { "epoch": 1.7868919038976534, "grad_norm": 0.302734375, "learning_rate": 1.0255647964019542e-05, "loss": 0.3956, "step": 70395 }, { "epoch": 1.7870188219466692, "grad_norm": 0.34375, "learning_rate": 1.0243576165570227e-05, "loss": 0.3983, "step": 70400 }, { "epoch": 1.787145739995685, "grad_norm": 0.33984375, "learning_rate": 1.023151122491298e-05, "loss": 0.4276, "step": 70405 }, { "epoch": 1.7872726580447007, "grad_norm": 0.345703125, "learning_rate": 1.0219453142639793e-05, "loss": 0.4055, "step": 70410 }, { "epoch": 1.7873995760937162, "grad_norm": 0.330078125, "learning_rate": 1.0207401919342362e-05, "loss": 0.42, "step": 70415 }, { "epoch": 1.787526494142732, "grad_norm": 0.34765625, "learning_rate": 1.0195357555612044e-05, "loss": 0.4121, "step": 70420 }, { "epoch": 1.7876534121917478, "grad_norm": 0.345703125, "learning_rate": 1.0183320052039833e-05, "loss": 0.4048, "step": 70425 }, { "epoch": 1.7877803302407635, "grad_norm": 0.39453125, "learning_rate": 1.0171289409216427e-05, "loss": 0.4356, "step": 70430 }, { "epoch": 1.7879072482897793, "grad_norm": 0.33203125, "learning_rate": 1.0159265627732133e-05, "loss": 0.4263, "step": 70435 }, { "epoch": 1.788034166338795, "grad_norm": 0.341796875, "learning_rate": 1.0147248708176947e-05, "loss": 0.4017, "step": 70440 }, { "epoch": 1.7881610843878106, "grad_norm": 0.3515625, "learning_rate": 1.0135238651140598e-05, "loss": 0.4042, "step": 70445 }, { "epoch": 1.7882880024368264, "grad_norm": 0.361328125, "learning_rate": 1.0123235457212365e-05, "loss": 0.3962, "step": 70450 }, { "epoch": 1.7884149204858422, "grad_norm": 0.33984375, "learning_rate": 1.011123912698124e-05, "loss": 0.429, "step": 70455 }, { "epoch": 1.788541838534858, "grad_norm": 0.34375, "learning_rate": 1.0099249661035892e-05, "loss": 0.3924, "step": 70460 }, { "epoch": 1.7886687565838737, "grad_norm": 0.357421875, "learning_rate": 1.0087267059964643e-05, "loss": 0.4334, "step": 70465 }, { "epoch": 1.7887956746328895, "grad_norm": 0.3515625, "learning_rate": 1.0075291324355462e-05, "loss": 0.4255, "step": 70470 }, { "epoch": 1.7889225926819052, "grad_norm": 0.3671875, "learning_rate": 1.0063322454795975e-05, "loss": 0.4121, "step": 70475 }, { "epoch": 1.789049510730921, "grad_norm": 0.33203125, "learning_rate": 1.005136045187353e-05, "loss": 0.418, "step": 70480 }, { "epoch": 1.7891764287799368, "grad_norm": 0.33984375, "learning_rate": 1.0039405316175091e-05, "loss": 0.4414, "step": 70485 }, { "epoch": 1.7893033468289525, "grad_norm": 0.34765625, "learning_rate": 1.0027457048287274e-05, "loss": 0.3979, "step": 70490 }, { "epoch": 1.7894302648779683, "grad_norm": 0.34375, "learning_rate": 1.0015515648796374e-05, "loss": 0.4164, "step": 70495 }, { "epoch": 1.789557182926984, "grad_norm": 0.330078125, "learning_rate": 1.0003581118288356e-05, "loss": 0.3979, "step": 70500 }, { "epoch": 1.7896841009759998, "grad_norm": 0.37109375, "learning_rate": 9.991653457348853e-06, "loss": 0.4342, "step": 70505 }, { "epoch": 1.7898110190250156, "grad_norm": 0.34375, "learning_rate": 9.979732666563162e-06, "loss": 0.4188, "step": 70510 }, { "epoch": 1.7899379370740314, "grad_norm": 0.3359375, "learning_rate": 9.967818746516199e-06, "loss": 0.4084, "step": 70515 }, { "epoch": 1.7900648551230471, "grad_norm": 0.345703125, "learning_rate": 9.955911697792595e-06, "loss": 0.4115, "step": 70520 }, { "epoch": 1.790191773172063, "grad_norm": 0.345703125, "learning_rate": 9.94401152097662e-06, "loss": 0.4377, "step": 70525 }, { "epoch": 1.7903186912210787, "grad_norm": 0.33984375, "learning_rate": 9.932118216652202e-06, "loss": 0.4447, "step": 70530 }, { "epoch": 1.7904456092700944, "grad_norm": 0.3515625, "learning_rate": 9.920231785402944e-06, "loss": 0.4147, "step": 70535 }, { "epoch": 1.7905725273191102, "grad_norm": 0.35546875, "learning_rate": 9.908352227812127e-06, "loss": 0.4075, "step": 70540 }, { "epoch": 1.7906994453681258, "grad_norm": 0.359375, "learning_rate": 9.896479544462655e-06, "loss": 0.3901, "step": 70545 }, { "epoch": 1.7908263634171415, "grad_norm": 0.353515625, "learning_rate": 9.884613735937125e-06, "loss": 0.4221, "step": 70550 }, { "epoch": 1.7909532814661573, "grad_norm": 0.3671875, "learning_rate": 9.872754802817757e-06, "loss": 0.415, "step": 70555 }, { "epoch": 1.791080199515173, "grad_norm": 0.345703125, "learning_rate": 9.860902745686534e-06, "loss": 0.4321, "step": 70560 }, { "epoch": 1.7912071175641888, "grad_norm": 0.349609375, "learning_rate": 9.84905756512494e-06, "loss": 0.4096, "step": 70565 }, { "epoch": 1.7913340356132046, "grad_norm": 0.35546875, "learning_rate": 9.837219261714297e-06, "loss": 0.4227, "step": 70570 }, { "epoch": 1.7914609536622204, "grad_norm": 0.349609375, "learning_rate": 9.825387836035487e-06, "loss": 0.4401, "step": 70575 }, { "epoch": 1.791587871711236, "grad_norm": 0.3515625, "learning_rate": 9.813563288669046e-06, "loss": 0.4239, "step": 70580 }, { "epoch": 1.7917147897602517, "grad_norm": 0.36328125, "learning_rate": 9.801745620195211e-06, "loss": 0.4036, "step": 70585 }, { "epoch": 1.7918417078092674, "grad_norm": 0.310546875, "learning_rate": 9.789934831193869e-06, "loss": 0.3768, "step": 70590 }, { "epoch": 1.7919686258582832, "grad_norm": 0.359375, "learning_rate": 9.778130922244586e-06, "loss": 0.4223, "step": 70595 }, { "epoch": 1.792095543907299, "grad_norm": 0.359375, "learning_rate": 9.766333893926553e-06, "loss": 0.3989, "step": 70600 }, { "epoch": 1.7922224619563147, "grad_norm": 0.36328125, "learning_rate": 9.754543746818655e-06, "loss": 0.3914, "step": 70605 }, { "epoch": 1.7923493800053305, "grad_norm": 0.34375, "learning_rate": 9.74276048149943e-06, "loss": 0.4072, "step": 70610 }, { "epoch": 1.7924762980543463, "grad_norm": 0.35546875, "learning_rate": 9.73098409854708e-06, "loss": 0.4145, "step": 70615 }, { "epoch": 1.792603216103362, "grad_norm": 0.345703125, "learning_rate": 9.719214598539459e-06, "loss": 0.4205, "step": 70620 }, { "epoch": 1.7927301341523778, "grad_norm": 0.330078125, "learning_rate": 9.707451982054076e-06, "loss": 0.3976, "step": 70625 }, { "epoch": 1.7928570522013936, "grad_norm": 0.373046875, "learning_rate": 9.695696249668184e-06, "loss": 0.4183, "step": 70630 }, { "epoch": 1.7929839702504093, "grad_norm": 0.376953125, "learning_rate": 9.68394740195857e-06, "loss": 0.4338, "step": 70635 }, { "epoch": 1.793110888299425, "grad_norm": 0.330078125, "learning_rate": 9.672205439501756e-06, "loss": 0.4145, "step": 70640 }, { "epoch": 1.7932378063484409, "grad_norm": 0.341796875, "learning_rate": 9.660470362873919e-06, "loss": 0.3952, "step": 70645 }, { "epoch": 1.7933647243974566, "grad_norm": 0.349609375, "learning_rate": 9.648742172650908e-06, "loss": 0.4114, "step": 70650 }, { "epoch": 1.7934916424464724, "grad_norm": 0.3515625, "learning_rate": 9.637020869408203e-06, "loss": 0.4138, "step": 70655 }, { "epoch": 1.7936185604954882, "grad_norm": 0.34765625, "learning_rate": 9.625306453720988e-06, "loss": 0.4025, "step": 70660 }, { "epoch": 1.793745478544504, "grad_norm": 0.359375, "learning_rate": 9.613598926164073e-06, "loss": 0.4253, "step": 70665 }, { "epoch": 1.7938723965935197, "grad_norm": 0.36328125, "learning_rate": 9.601898287311915e-06, "loss": 0.4422, "step": 70670 }, { "epoch": 1.7939993146425355, "grad_norm": 0.376953125, "learning_rate": 9.590204537738705e-06, "loss": 0.3973, "step": 70675 }, { "epoch": 1.794126232691551, "grad_norm": 0.365234375, "learning_rate": 9.578517678018215e-06, "loss": 0.4291, "step": 70680 }, { "epoch": 1.7942531507405668, "grad_norm": 0.34765625, "learning_rate": 9.566837708723923e-06, "loss": 0.406, "step": 70685 }, { "epoch": 1.7943800687895826, "grad_norm": 0.345703125, "learning_rate": 9.555164630428951e-06, "loss": 0.4099, "step": 70690 }, { "epoch": 1.7945069868385983, "grad_norm": 0.359375, "learning_rate": 9.543498443706126e-06, "loss": 0.4165, "step": 70695 }, { "epoch": 1.794633904887614, "grad_norm": 0.3203125, "learning_rate": 9.531839149127873e-06, "loss": 0.4328, "step": 70700 }, { "epoch": 1.7947608229366299, "grad_norm": 0.3515625, "learning_rate": 9.520186747266334e-06, "loss": 0.4109, "step": 70705 }, { "epoch": 1.7948877409856454, "grad_norm": 0.326171875, "learning_rate": 9.508541238693267e-06, "loss": 0.388, "step": 70710 }, { "epoch": 1.7950146590346612, "grad_norm": 0.36328125, "learning_rate": 9.496902623980118e-06, "loss": 0.4215, "step": 70715 }, { "epoch": 1.795141577083677, "grad_norm": 0.361328125, "learning_rate": 9.48527090369796e-06, "loss": 0.4208, "step": 70720 }, { "epoch": 1.7952684951326927, "grad_norm": 0.330078125, "learning_rate": 9.473646078417607e-06, "loss": 0.394, "step": 70725 }, { "epoch": 1.7953954131817085, "grad_norm": 0.34765625, "learning_rate": 9.462028148709467e-06, "loss": 0.4143, "step": 70730 }, { "epoch": 1.7955223312307242, "grad_norm": 0.37109375, "learning_rate": 9.450417115143633e-06, "loss": 0.4314, "step": 70735 }, { "epoch": 1.79564924927974, "grad_norm": 0.35546875, "learning_rate": 9.438812978289833e-06, "loss": 0.4301, "step": 70740 }, { "epoch": 1.7957761673287558, "grad_norm": 0.341796875, "learning_rate": 9.427215738717497e-06, "loss": 0.4368, "step": 70745 }, { "epoch": 1.7959030853777715, "grad_norm": 0.36328125, "learning_rate": 9.415625396995686e-06, "loss": 0.4206, "step": 70750 }, { "epoch": 1.7960300034267873, "grad_norm": 0.35546875, "learning_rate": 9.404041953693109e-06, "loss": 0.4334, "step": 70755 }, { "epoch": 1.796156921475803, "grad_norm": 0.33203125, "learning_rate": 9.39246540937823e-06, "loss": 0.3784, "step": 70760 }, { "epoch": 1.7962838395248188, "grad_norm": 0.35546875, "learning_rate": 9.380895764619045e-06, "loss": 0.4305, "step": 70765 }, { "epoch": 1.7964107575738346, "grad_norm": 0.376953125, "learning_rate": 9.3693330199833e-06, "loss": 0.4307, "step": 70770 }, { "epoch": 1.7965376756228504, "grad_norm": 0.328125, "learning_rate": 9.357777176038355e-06, "loss": 0.4429, "step": 70775 }, { "epoch": 1.7966645936718662, "grad_norm": 0.447265625, "learning_rate": 9.34622823335126e-06, "loss": 0.3749, "step": 70780 }, { "epoch": 1.796791511720882, "grad_norm": 0.380859375, "learning_rate": 9.334686192488727e-06, "loss": 0.4141, "step": 70785 }, { "epoch": 1.7969184297698977, "grad_norm": 0.34765625, "learning_rate": 9.323151054017102e-06, "loss": 0.446, "step": 70790 }, { "epoch": 1.7970453478189135, "grad_norm": 0.33203125, "learning_rate": 9.311622818502417e-06, "loss": 0.4359, "step": 70795 }, { "epoch": 1.7971722658679292, "grad_norm": 0.349609375, "learning_rate": 9.300101486510336e-06, "loss": 0.4257, "step": 70800 }, { "epoch": 1.797299183916945, "grad_norm": 0.349609375, "learning_rate": 9.288587058606256e-06, "loss": 0.4084, "step": 70805 }, { "epoch": 1.7974261019659605, "grad_norm": 0.3359375, "learning_rate": 9.277079535355159e-06, "loss": 0.4242, "step": 70810 }, { "epoch": 1.7975530200149763, "grad_norm": 0.34375, "learning_rate": 9.265578917321692e-06, "loss": 0.3879, "step": 70815 }, { "epoch": 1.797679938063992, "grad_norm": 0.35546875, "learning_rate": 9.254085205070221e-06, "loss": 0.4283, "step": 70820 }, { "epoch": 1.7978068561130078, "grad_norm": 0.3515625, "learning_rate": 9.24259839916473e-06, "loss": 0.4385, "step": 70825 }, { "epoch": 1.7979337741620236, "grad_norm": 0.33984375, "learning_rate": 9.231118500168866e-06, "loss": 0.3976, "step": 70830 }, { "epoch": 1.7980606922110394, "grad_norm": 0.341796875, "learning_rate": 9.219645508645945e-06, "loss": 0.3932, "step": 70835 }, { "epoch": 1.7981876102600551, "grad_norm": 0.353515625, "learning_rate": 9.208179425158935e-06, "loss": 0.4063, "step": 70840 }, { "epoch": 1.7983145283090707, "grad_norm": 0.294921875, "learning_rate": 9.196720250270485e-06, "loss": 0.4074, "step": 70845 }, { "epoch": 1.7984414463580864, "grad_norm": 0.341796875, "learning_rate": 9.185267984542876e-06, "loss": 0.4126, "step": 70850 }, { "epoch": 1.7985683644071022, "grad_norm": 0.353515625, "learning_rate": 9.17382262853808e-06, "loss": 0.4225, "step": 70855 }, { "epoch": 1.798695282456118, "grad_norm": 0.357421875, "learning_rate": 9.162384182817711e-06, "loss": 0.4206, "step": 70860 }, { "epoch": 1.7988222005051338, "grad_norm": 0.353515625, "learning_rate": 9.150952647943038e-06, "loss": 0.4405, "step": 70865 }, { "epoch": 1.7989491185541495, "grad_norm": 0.345703125, "learning_rate": 9.139528024475028e-06, "loss": 0.4155, "step": 70870 }, { "epoch": 1.7990760366031653, "grad_norm": 0.34375, "learning_rate": 9.128110312974234e-06, "loss": 0.4316, "step": 70875 }, { "epoch": 1.799202954652181, "grad_norm": 0.35546875, "learning_rate": 9.116699514000991e-06, "loss": 0.4209, "step": 70880 }, { "epoch": 1.7993298727011968, "grad_norm": 0.3203125, "learning_rate": 9.105295628115167e-06, "loss": 0.4061, "step": 70885 }, { "epoch": 1.7994567907502126, "grad_norm": 0.337890625, "learning_rate": 9.093898655876346e-06, "loss": 0.4128, "step": 70890 }, { "epoch": 1.7995837087992284, "grad_norm": 0.361328125, "learning_rate": 9.082508597843801e-06, "loss": 0.4264, "step": 70895 }, { "epoch": 1.7997106268482441, "grad_norm": 0.328125, "learning_rate": 9.071125454576434e-06, "loss": 0.4152, "step": 70900 }, { "epoch": 1.7998375448972599, "grad_norm": 0.35546875, "learning_rate": 9.059749226632813e-06, "loss": 0.4236, "step": 70905 }, { "epoch": 1.7999644629462757, "grad_norm": 0.3359375, "learning_rate": 9.048379914571142e-06, "loss": 0.4138, "step": 70910 }, { "epoch": 1.8000913809952914, "grad_norm": 0.353515625, "learning_rate": 9.037017518949324e-06, "loss": 0.4353, "step": 70915 }, { "epoch": 1.8002182990443072, "grad_norm": 0.361328125, "learning_rate": 9.025662040324916e-06, "loss": 0.4382, "step": 70920 }, { "epoch": 1.800345217093323, "grad_norm": 0.341796875, "learning_rate": 9.014313479255102e-06, "loss": 0.4065, "step": 70925 }, { "epoch": 1.8004721351423387, "grad_norm": 0.34375, "learning_rate": 9.002971836296773e-06, "loss": 0.4277, "step": 70930 }, { "epoch": 1.8005990531913545, "grad_norm": 0.35546875, "learning_rate": 8.99163711200645e-06, "loss": 0.4447, "step": 70935 }, { "epoch": 1.80072597124037, "grad_norm": 0.349609375, "learning_rate": 8.980309306940304e-06, "loss": 0.401, "step": 70940 }, { "epoch": 1.8008528892893858, "grad_norm": 0.3515625, "learning_rate": 8.968988421654221e-06, "loss": 0.411, "step": 70945 }, { "epoch": 1.8009798073384016, "grad_norm": 0.34375, "learning_rate": 8.957674456703695e-06, "loss": 0.3972, "step": 70950 }, { "epoch": 1.8011067253874173, "grad_norm": 0.337890625, "learning_rate": 8.946367412643912e-06, "loss": 0.4, "step": 70955 }, { "epoch": 1.801233643436433, "grad_norm": 0.37109375, "learning_rate": 8.93506729002968e-06, "loss": 0.4171, "step": 70960 }, { "epoch": 1.8013605614854489, "grad_norm": 0.33984375, "learning_rate": 8.923774089415503e-06, "loss": 0.402, "step": 70965 }, { "epoch": 1.8014874795344646, "grad_norm": 0.359375, "learning_rate": 8.91248781135554e-06, "loss": 0.4323, "step": 70970 }, { "epoch": 1.8016143975834802, "grad_norm": 0.37109375, "learning_rate": 8.901208456403597e-06, "loss": 0.4132, "step": 70975 }, { "epoch": 1.801741315632496, "grad_norm": 0.34765625, "learning_rate": 8.889936025113164e-06, "loss": 0.4478, "step": 70980 }, { "epoch": 1.8018682336815117, "grad_norm": 0.34765625, "learning_rate": 8.878670518037351e-06, "loss": 0.4211, "step": 70985 }, { "epoch": 1.8019951517305275, "grad_norm": 0.32421875, "learning_rate": 8.867411935728963e-06, "loss": 0.3981, "step": 70990 }, { "epoch": 1.8021220697795433, "grad_norm": 0.3359375, "learning_rate": 8.856160278740442e-06, "loss": 0.3972, "step": 70995 }, { "epoch": 1.802248987828559, "grad_norm": 0.341796875, "learning_rate": 8.844915547623932e-06, "loss": 0.3896, "step": 71000 }, { "epoch": 1.8023759058775748, "grad_norm": 0.36328125, "learning_rate": 8.833677742931155e-06, "loss": 0.4073, "step": 71005 }, { "epoch": 1.8025028239265906, "grad_norm": 0.361328125, "learning_rate": 8.822446865213606e-06, "loss": 0.4026, "step": 71010 }, { "epoch": 1.8026297419756063, "grad_norm": 0.33984375, "learning_rate": 8.811222915022342e-06, "loss": 0.4332, "step": 71015 }, { "epoch": 1.802756660024622, "grad_norm": 0.345703125, "learning_rate": 8.80000589290814e-06, "loss": 0.377, "step": 71020 }, { "epoch": 1.8028835780736379, "grad_norm": 0.341796875, "learning_rate": 8.788795799421394e-06, "loss": 0.4367, "step": 71025 }, { "epoch": 1.8030104961226536, "grad_norm": 0.353515625, "learning_rate": 8.777592635112196e-06, "loss": 0.3994, "step": 71030 }, { "epoch": 1.8031374141716694, "grad_norm": 0.376953125, "learning_rate": 8.766396400530273e-06, "loss": 0.4042, "step": 71035 }, { "epoch": 1.8032643322206852, "grad_norm": 0.345703125, "learning_rate": 8.75520709622502e-06, "loss": 0.4012, "step": 71040 }, { "epoch": 1.803391250269701, "grad_norm": 0.337890625, "learning_rate": 8.744024722745464e-06, "loss": 0.431, "step": 71045 }, { "epoch": 1.8035181683187167, "grad_norm": 0.3359375, "learning_rate": 8.732849280640363e-06, "loss": 0.4124, "step": 71050 }, { "epoch": 1.8036450863677325, "grad_norm": 0.33984375, "learning_rate": 8.721680770458084e-06, "loss": 0.4116, "step": 71055 }, { "epoch": 1.8037720044167482, "grad_norm": 0.37109375, "learning_rate": 8.710519192746651e-06, "loss": 0.4289, "step": 71060 }, { "epoch": 1.803898922465764, "grad_norm": 0.357421875, "learning_rate": 8.699364548053761e-06, "loss": 0.384, "step": 71065 }, { "epoch": 1.8040258405147798, "grad_norm": 0.361328125, "learning_rate": 8.688216836926743e-06, "loss": 0.4459, "step": 71070 }, { "epoch": 1.8041527585637953, "grad_norm": 0.376953125, "learning_rate": 8.677076059912658e-06, "loss": 0.4153, "step": 71075 }, { "epoch": 1.804279676612811, "grad_norm": 0.345703125, "learning_rate": 8.665942217558152e-06, "loss": 0.4012, "step": 71080 }, { "epoch": 1.8044065946618268, "grad_norm": 0.34375, "learning_rate": 8.654815310409574e-06, "loss": 0.4223, "step": 71085 }, { "epoch": 1.8045335127108426, "grad_norm": 0.357421875, "learning_rate": 8.643695339012902e-06, "loss": 0.3957, "step": 71090 }, { "epoch": 1.8046604307598584, "grad_norm": 0.341796875, "learning_rate": 8.632582303913782e-06, "loss": 0.4253, "step": 71095 }, { "epoch": 1.8047873488088741, "grad_norm": 0.353515625, "learning_rate": 8.62147620565753e-06, "loss": 0.4255, "step": 71100 }, { "epoch": 1.80491426685789, "grad_norm": 0.333984375, "learning_rate": 8.61037704478914e-06, "loss": 0.4083, "step": 71105 }, { "epoch": 1.8050411849069055, "grad_norm": 0.365234375, "learning_rate": 8.599284821853231e-06, "loss": 0.4421, "step": 71110 }, { "epoch": 1.8051681029559212, "grad_norm": 0.3203125, "learning_rate": 8.588199537394081e-06, "loss": 0.415, "step": 71115 }, { "epoch": 1.805295021004937, "grad_norm": 0.330078125, "learning_rate": 8.577121191955622e-06, "loss": 0.4185, "step": 71120 }, { "epoch": 1.8054219390539528, "grad_norm": 0.349609375, "learning_rate": 8.56604978608152e-06, "loss": 0.4364, "step": 71125 }, { "epoch": 1.8055488571029685, "grad_norm": 0.3359375, "learning_rate": 8.554985320315005e-06, "loss": 0.402, "step": 71130 }, { "epoch": 1.8056757751519843, "grad_norm": 0.41015625, "learning_rate": 8.543927795199012e-06, "loss": 0.4443, "step": 71135 }, { "epoch": 1.805802693201, "grad_norm": 0.34765625, "learning_rate": 8.532877211276141e-06, "loss": 0.4232, "step": 71140 }, { "epoch": 1.8059296112500158, "grad_norm": 0.353515625, "learning_rate": 8.521833569088654e-06, "loss": 0.4213, "step": 71145 }, { "epoch": 1.8060565292990316, "grad_norm": 0.333984375, "learning_rate": 8.510796869178421e-06, "loss": 0.4216, "step": 71150 }, { "epoch": 1.8061834473480474, "grad_norm": 0.322265625, "learning_rate": 8.499767112087025e-06, "loss": 0.4297, "step": 71155 }, { "epoch": 1.8063103653970631, "grad_norm": 0.333984375, "learning_rate": 8.488744298355698e-06, "loss": 0.3871, "step": 71160 }, { "epoch": 1.806437283446079, "grad_norm": 0.365234375, "learning_rate": 8.47772842852531e-06, "loss": 0.4304, "step": 71165 }, { "epoch": 1.8065642014950947, "grad_norm": 0.36328125, "learning_rate": 8.466719503136427e-06, "loss": 0.4252, "step": 71170 }, { "epoch": 1.8066911195441104, "grad_norm": 0.337890625, "learning_rate": 8.455717522729234e-06, "loss": 0.3948, "step": 71175 }, { "epoch": 1.8068180375931262, "grad_norm": 0.326171875, "learning_rate": 8.44472248784358e-06, "loss": 0.4363, "step": 71180 }, { "epoch": 1.806944955642142, "grad_norm": 0.33984375, "learning_rate": 8.433734399019037e-06, "loss": 0.3722, "step": 71185 }, { "epoch": 1.8070718736911577, "grad_norm": 0.34765625, "learning_rate": 8.42275325679474e-06, "loss": 0.4252, "step": 71190 }, { "epoch": 1.8071987917401735, "grad_norm": 0.3359375, "learning_rate": 8.411779061709522e-06, "loss": 0.4519, "step": 71195 }, { "epoch": 1.8073257097891893, "grad_norm": 0.349609375, "learning_rate": 8.400811814301921e-06, "loss": 0.434, "step": 71200 }, { "epoch": 1.8074526278382048, "grad_norm": 0.30078125, "learning_rate": 8.389851515110092e-06, "loss": 0.4061, "step": 71205 }, { "epoch": 1.8075795458872206, "grad_norm": 0.34765625, "learning_rate": 8.378898164671816e-06, "loss": 0.4027, "step": 71210 }, { "epoch": 1.8077064639362364, "grad_norm": 0.375, "learning_rate": 8.367951763524616e-06, "loss": 0.4193, "step": 71215 }, { "epoch": 1.8078333819852521, "grad_norm": 0.34375, "learning_rate": 8.357012312205596e-06, "loss": 0.4031, "step": 71220 }, { "epoch": 1.8079603000342679, "grad_norm": 0.35546875, "learning_rate": 8.346079811251577e-06, "loss": 0.4198, "step": 71225 }, { "epoch": 1.8080872180832837, "grad_norm": 0.373046875, "learning_rate": 8.33515426119898e-06, "loss": 0.4329, "step": 71230 }, { "epoch": 1.8082141361322994, "grad_norm": 0.34375, "learning_rate": 8.324235662583938e-06, "loss": 0.4315, "step": 71235 }, { "epoch": 1.808341054181315, "grad_norm": 0.34375, "learning_rate": 8.313324015942213e-06, "loss": 0.4054, "step": 71240 }, { "epoch": 1.8084679722303307, "grad_norm": 0.349609375, "learning_rate": 8.302419321809239e-06, "loss": 0.4005, "step": 71245 }, { "epoch": 1.8085948902793465, "grad_norm": 0.361328125, "learning_rate": 8.291521580720106e-06, "loss": 0.4259, "step": 71250 }, { "epoch": 1.8087218083283623, "grad_norm": 0.33203125, "learning_rate": 8.280630793209537e-06, "loss": 0.391, "step": 71255 }, { "epoch": 1.808848726377378, "grad_norm": 0.357421875, "learning_rate": 8.269746959812002e-06, "loss": 0.4059, "step": 71260 }, { "epoch": 1.8089756444263938, "grad_norm": 0.357421875, "learning_rate": 8.258870081061508e-06, "loss": 0.4203, "step": 71265 }, { "epoch": 1.8091025624754096, "grad_norm": 0.34765625, "learning_rate": 8.248000157491797e-06, "loss": 0.4084, "step": 71270 }, { "epoch": 1.8092294805244253, "grad_norm": 0.353515625, "learning_rate": 8.237137189636256e-06, "loss": 0.4156, "step": 71275 }, { "epoch": 1.809356398573441, "grad_norm": 0.349609375, "learning_rate": 8.226281178027927e-06, "loss": 0.4244, "step": 71280 }, { "epoch": 1.8094833166224569, "grad_norm": 0.357421875, "learning_rate": 8.2154321231995e-06, "loss": 0.4138, "step": 71285 }, { "epoch": 1.8096102346714726, "grad_norm": 0.35546875, "learning_rate": 8.204590025683334e-06, "loss": 0.411, "step": 71290 }, { "epoch": 1.8097371527204884, "grad_norm": 0.3515625, "learning_rate": 8.19375488601145e-06, "loss": 0.3928, "step": 71295 }, { "epoch": 1.8098640707695042, "grad_norm": 0.330078125, "learning_rate": 8.182926704715542e-06, "loss": 0.3931, "step": 71300 }, { "epoch": 1.80999098881852, "grad_norm": 0.365234375, "learning_rate": 8.172105482326935e-06, "loss": 0.4238, "step": 71305 }, { "epoch": 1.8101179068675357, "grad_norm": 0.359375, "learning_rate": 8.161291219376603e-06, "loss": 0.4268, "step": 71310 }, { "epoch": 1.8102448249165515, "grad_norm": 0.337890625, "learning_rate": 8.150483916395223e-06, "loss": 0.4135, "step": 71315 }, { "epoch": 1.8103717429655672, "grad_norm": 0.341796875, "learning_rate": 8.139683573913053e-06, "loss": 0.4295, "step": 71320 }, { "epoch": 1.810498661014583, "grad_norm": 0.353515625, "learning_rate": 8.128890192460136e-06, "loss": 0.4251, "step": 71325 }, { "epoch": 1.8106255790635988, "grad_norm": 0.333984375, "learning_rate": 8.118103772566065e-06, "loss": 0.3822, "step": 71330 }, { "epoch": 1.8107524971126145, "grad_norm": 0.333984375, "learning_rate": 8.107324314760117e-06, "loss": 0.4258, "step": 71335 }, { "epoch": 1.81087941516163, "grad_norm": 0.353515625, "learning_rate": 8.09655181957125e-06, "loss": 0.4327, "step": 71340 }, { "epoch": 1.8110063332106459, "grad_norm": 0.3671875, "learning_rate": 8.085786287528063e-06, "loss": 0.4251, "step": 71345 }, { "epoch": 1.8111332512596616, "grad_norm": 0.345703125, "learning_rate": 8.075027719158794e-06, "loss": 0.4074, "step": 71350 }, { "epoch": 1.8112601693086774, "grad_norm": 0.353515625, "learning_rate": 8.064276114991391e-06, "loss": 0.4173, "step": 71355 }, { "epoch": 1.8113870873576932, "grad_norm": 0.3828125, "learning_rate": 8.053531475553433e-06, "loss": 0.4307, "step": 71360 }, { "epoch": 1.811514005406709, "grad_norm": 0.330078125, "learning_rate": 8.042793801372111e-06, "loss": 0.4287, "step": 71365 }, { "epoch": 1.8116409234557245, "grad_norm": 0.353515625, "learning_rate": 8.032063092974373e-06, "loss": 0.4095, "step": 71370 }, { "epoch": 1.8117678415047402, "grad_norm": 0.33984375, "learning_rate": 8.02133935088673e-06, "loss": 0.4036, "step": 71375 }, { "epoch": 1.811894759553756, "grad_norm": 0.34375, "learning_rate": 8.010622575635428e-06, "loss": 0.3988, "step": 71380 }, { "epoch": 1.8120216776027718, "grad_norm": 0.365234375, "learning_rate": 7.999912767746297e-06, "loss": 0.4107, "step": 71385 }, { "epoch": 1.8121485956517875, "grad_norm": 0.361328125, "learning_rate": 7.9892099277449e-06, "loss": 0.4124, "step": 71390 }, { "epoch": 1.8122755137008033, "grad_norm": 0.349609375, "learning_rate": 7.978514056156399e-06, "loss": 0.4127, "step": 71395 }, { "epoch": 1.812402431749819, "grad_norm": 0.345703125, "learning_rate": 7.967825153505642e-06, "loss": 0.4042, "step": 71400 }, { "epoch": 1.8125293497988348, "grad_norm": 0.357421875, "learning_rate": 7.957143220317125e-06, "loss": 0.4323, "step": 71405 }, { "epoch": 1.8126562678478506, "grad_norm": 0.3125, "learning_rate": 7.946468257115013e-06, "loss": 0.3917, "step": 71410 }, { "epoch": 1.8127831858968664, "grad_norm": 0.34765625, "learning_rate": 7.935800264423103e-06, "loss": 0.4181, "step": 71415 }, { "epoch": 1.8129101039458821, "grad_norm": 0.345703125, "learning_rate": 7.925139242764894e-06, "loss": 0.3999, "step": 71420 }, { "epoch": 1.813037021994898, "grad_norm": 0.3203125, "learning_rate": 7.914485192663516e-06, "loss": 0.3707, "step": 71425 }, { "epoch": 1.8131639400439137, "grad_norm": 0.337890625, "learning_rate": 7.903838114641753e-06, "loss": 0.397, "step": 71430 }, { "epoch": 1.8132908580929294, "grad_norm": 0.345703125, "learning_rate": 7.893198009222034e-06, "loss": 0.3971, "step": 71435 }, { "epoch": 1.8134177761419452, "grad_norm": 0.36328125, "learning_rate": 7.882564876926495e-06, "loss": 0.4183, "step": 71440 }, { "epoch": 1.813544694190961, "grad_norm": 0.36328125, "learning_rate": 7.871938718276865e-06, "loss": 0.4553, "step": 71445 }, { "epoch": 1.8136716122399767, "grad_norm": 0.345703125, "learning_rate": 7.861319533794597e-06, "loss": 0.4255, "step": 71450 }, { "epoch": 1.8137985302889925, "grad_norm": 0.375, "learning_rate": 7.850707324000755e-06, "loss": 0.4441, "step": 71455 }, { "epoch": 1.8139254483380083, "grad_norm": 0.359375, "learning_rate": 7.84010208941609e-06, "loss": 0.413, "step": 71460 }, { "epoch": 1.814052366387024, "grad_norm": 0.35546875, "learning_rate": 7.829503830560986e-06, "loss": 0.4484, "step": 71465 }, { "epoch": 1.8141792844360396, "grad_norm": 0.3671875, "learning_rate": 7.818912547955509e-06, "loss": 0.4201, "step": 71470 }, { "epoch": 1.8143062024850554, "grad_norm": 0.365234375, "learning_rate": 7.808328242119344e-06, "loss": 0.4324, "step": 71475 }, { "epoch": 1.8144331205340711, "grad_norm": 0.3515625, "learning_rate": 7.797750913571876e-06, "loss": 0.3965, "step": 71480 }, { "epoch": 1.814560038583087, "grad_norm": 0.341796875, "learning_rate": 7.787180562832124e-06, "loss": 0.4421, "step": 71485 }, { "epoch": 1.8146869566321027, "grad_norm": 0.359375, "learning_rate": 7.77661719041877e-06, "loss": 0.421, "step": 71490 }, { "epoch": 1.8148138746811184, "grad_norm": 0.34375, "learning_rate": 7.766060796850171e-06, "loss": 0.4359, "step": 71495 }, { "epoch": 1.8149407927301342, "grad_norm": 0.33203125, "learning_rate": 7.755511382644292e-06, "loss": 0.3978, "step": 71500 }, { "epoch": 1.8150677107791497, "grad_norm": 0.34375, "learning_rate": 7.74496894831882e-06, "loss": 0.4055, "step": 71505 }, { "epoch": 1.8151946288281655, "grad_norm": 0.353515625, "learning_rate": 7.734433494391041e-06, "loss": 0.4359, "step": 71510 }, { "epoch": 1.8153215468771813, "grad_norm": 0.34375, "learning_rate": 7.72390502137794e-06, "loss": 0.3857, "step": 71515 }, { "epoch": 1.815448464926197, "grad_norm": 0.37109375, "learning_rate": 7.713383529796157e-06, "loss": 0.4343, "step": 71520 }, { "epoch": 1.8155753829752128, "grad_norm": 0.34765625, "learning_rate": 7.702869020161978e-06, "loss": 0.4104, "step": 71525 }, { "epoch": 1.8157023010242286, "grad_norm": 0.33203125, "learning_rate": 7.692361492991323e-06, "loss": 0.4259, "step": 71530 }, { "epoch": 1.8158292190732443, "grad_norm": 0.3515625, "learning_rate": 7.681860948799779e-06, "loss": 0.4229, "step": 71535 }, { "epoch": 1.8159561371222601, "grad_norm": 0.36328125, "learning_rate": 7.67136738810265e-06, "loss": 0.4194, "step": 71540 }, { "epoch": 1.8160830551712759, "grad_norm": 0.33984375, "learning_rate": 7.660880811414827e-06, "loss": 0.3952, "step": 71545 }, { "epoch": 1.8162099732202917, "grad_norm": 0.365234375, "learning_rate": 7.650401219250878e-06, "loss": 0.4379, "step": 71550 }, { "epoch": 1.8163368912693074, "grad_norm": 0.341796875, "learning_rate": 7.639928612125045e-06, "loss": 0.4065, "step": 71555 }, { "epoch": 1.8164638093183232, "grad_norm": 0.357421875, "learning_rate": 7.6294629905512e-06, "loss": 0.404, "step": 71560 }, { "epoch": 1.816590727367339, "grad_norm": 0.3515625, "learning_rate": 7.6190043550428955e-06, "loss": 0.396, "step": 71565 }, { "epoch": 1.8167176454163547, "grad_norm": 0.376953125, "learning_rate": 7.608552706113341e-06, "loss": 0.3923, "step": 71570 }, { "epoch": 1.8168445634653705, "grad_norm": 0.392578125, "learning_rate": 7.598108044275359e-06, "loss": 0.4105, "step": 71575 }, { "epoch": 1.8169714815143863, "grad_norm": 0.357421875, "learning_rate": 7.587670370041504e-06, "loss": 0.4104, "step": 71580 }, { "epoch": 1.817098399563402, "grad_norm": 0.357421875, "learning_rate": 7.577239683923936e-06, "loss": 0.45, "step": 71585 }, { "epoch": 1.8172253176124178, "grad_norm": 0.353515625, "learning_rate": 7.5668159864344935e-06, "loss": 0.4045, "step": 71590 }, { "epoch": 1.8173522356614336, "grad_norm": 0.353515625, "learning_rate": 7.556399278084652e-06, "loss": 0.4157, "step": 71595 }, { "epoch": 1.8174791537104493, "grad_norm": 0.3359375, "learning_rate": 7.545989559385551e-06, "loss": 0.4125, "step": 71600 }, { "epoch": 1.8176060717594649, "grad_norm": 0.345703125, "learning_rate": 7.5355868308480005e-06, "loss": 0.43, "step": 71605 }, { "epoch": 1.8177329898084806, "grad_norm": 0.33203125, "learning_rate": 7.525191092982441e-06, "loss": 0.3859, "step": 71610 }, { "epoch": 1.8178599078574964, "grad_norm": 0.328125, "learning_rate": 7.514802346299015e-06, "loss": 0.3995, "step": 71615 }, { "epoch": 1.8179868259065122, "grad_norm": 0.35546875, "learning_rate": 7.504420591307481e-06, "loss": 0.4331, "step": 71620 }, { "epoch": 1.818113743955528, "grad_norm": 0.3515625, "learning_rate": 7.494045828517265e-06, "loss": 0.4019, "step": 71625 }, { "epoch": 1.8182406620045437, "grad_norm": 0.333984375, "learning_rate": 7.483678058437459e-06, "loss": 0.4216, "step": 71630 }, { "epoch": 1.8183675800535593, "grad_norm": 0.35546875, "learning_rate": 7.473317281576774e-06, "loss": 0.4571, "step": 71635 }, { "epoch": 1.818494498102575, "grad_norm": 0.337890625, "learning_rate": 7.462963498443653e-06, "loss": 0.403, "step": 71640 }, { "epoch": 1.8186214161515908, "grad_norm": 0.3359375, "learning_rate": 7.452616709546139e-06, "loss": 0.9059, "step": 71645 }, { "epoch": 1.8187483342006066, "grad_norm": 0.32421875, "learning_rate": 7.4422769153919426e-06, "loss": 0.3911, "step": 71650 }, { "epoch": 1.8188752522496223, "grad_norm": 0.361328125, "learning_rate": 7.431944116488425e-06, "loss": 0.4314, "step": 71655 }, { "epoch": 1.819002170298638, "grad_norm": 0.36328125, "learning_rate": 7.421618313342631e-06, "loss": 0.4057, "step": 71660 }, { "epoch": 1.8191290883476539, "grad_norm": 0.365234375, "learning_rate": 7.4112995064612215e-06, "loss": 0.4151, "step": 71665 }, { "epoch": 1.8192560063966696, "grad_norm": 0.35546875, "learning_rate": 7.4009876963505575e-06, "loss": 0.4203, "step": 71670 }, { "epoch": 1.8193829244456854, "grad_norm": 0.337890625, "learning_rate": 7.390682883516619e-06, "loss": 0.4113, "step": 71675 }, { "epoch": 1.8195098424947012, "grad_norm": 0.359375, "learning_rate": 7.38038506846505e-06, "loss": 0.4362, "step": 71680 }, { "epoch": 1.819636760543717, "grad_norm": 0.34765625, "learning_rate": 7.370094251701197e-06, "loss": 0.4335, "step": 71685 }, { "epoch": 1.8197636785927327, "grad_norm": 0.337890625, "learning_rate": 7.359810433729973e-06, "loss": 0.3946, "step": 71690 }, { "epoch": 1.8198905966417485, "grad_norm": 0.388671875, "learning_rate": 7.3495336150560416e-06, "loss": 0.4186, "step": 71695 }, { "epoch": 1.8200175146907642, "grad_norm": 0.34765625, "learning_rate": 7.3392637961836805e-06, "loss": 0.4019, "step": 71700 }, { "epoch": 1.82014443273978, "grad_norm": 0.33203125, "learning_rate": 7.329000977616806e-06, "loss": 0.409, "step": 71705 }, { "epoch": 1.8202713507887958, "grad_norm": 0.353515625, "learning_rate": 7.318745159859029e-06, "loss": 0.4296, "step": 71710 }, { "epoch": 1.8203982688378115, "grad_norm": 0.34765625, "learning_rate": 7.3084963434136e-06, "loss": 0.4058, "step": 71715 }, { "epoch": 1.8205251868868273, "grad_norm": 0.337890625, "learning_rate": 7.298254528783415e-06, "loss": 0.3991, "step": 71720 }, { "epoch": 1.820652104935843, "grad_norm": 0.357421875, "learning_rate": 7.288019716471039e-06, "loss": 0.4112, "step": 71725 }, { "epoch": 1.8207790229848588, "grad_norm": 0.34375, "learning_rate": 7.27779190697867e-06, "loss": 0.4347, "step": 71730 }, { "epoch": 1.8209059410338744, "grad_norm": 0.36328125, "learning_rate": 7.267571100808223e-06, "loss": 0.4478, "step": 71735 }, { "epoch": 1.8210328590828901, "grad_norm": 0.353515625, "learning_rate": 7.257357298461197e-06, "loss": 0.4465, "step": 71740 }, { "epoch": 1.821159777131906, "grad_norm": 0.328125, "learning_rate": 7.247150500438775e-06, "loss": 0.4291, "step": 71745 }, { "epoch": 1.8212866951809217, "grad_norm": 0.36328125, "learning_rate": 7.236950707241823e-06, "loss": 0.4184, "step": 71750 }, { "epoch": 1.8214136132299374, "grad_norm": 0.35546875, "learning_rate": 7.22675791937084e-06, "loss": 0.4253, "step": 71755 }, { "epoch": 1.8215405312789532, "grad_norm": 0.349609375, "learning_rate": 7.216572137325943e-06, "loss": 0.4336, "step": 71760 }, { "epoch": 1.821667449327969, "grad_norm": 0.359375, "learning_rate": 7.206393361606999e-06, "loss": 0.4307, "step": 71765 }, { "epoch": 1.8217943673769845, "grad_norm": 0.330078125, "learning_rate": 7.196221592713458e-06, "loss": 0.4215, "step": 71770 }, { "epoch": 1.8219212854260003, "grad_norm": 0.353515625, "learning_rate": 7.186056831144438e-06, "loss": 0.425, "step": 71775 }, { "epoch": 1.822048203475016, "grad_norm": 0.3046875, "learning_rate": 7.175899077398689e-06, "loss": 0.3725, "step": 71780 }, { "epoch": 1.8221751215240318, "grad_norm": 0.34375, "learning_rate": 7.165748331974713e-06, "loss": 0.4091, "step": 71785 }, { "epoch": 1.8223020395730476, "grad_norm": 0.33203125, "learning_rate": 7.155604595370562e-06, "loss": 0.3935, "step": 71790 }, { "epoch": 1.8224289576220634, "grad_norm": 0.34375, "learning_rate": 7.145467868083987e-06, "loss": 0.4159, "step": 71795 }, { "epoch": 1.8225558756710791, "grad_norm": 0.33984375, "learning_rate": 7.135338150612407e-06, "loss": 0.4184, "step": 71800 }, { "epoch": 1.822682793720095, "grad_norm": 0.28515625, "learning_rate": 7.125215443452875e-06, "loss": 0.3723, "step": 71805 }, { "epoch": 1.8228097117691107, "grad_norm": 0.333984375, "learning_rate": 7.11509974710211e-06, "loss": 0.4325, "step": 71810 }, { "epoch": 1.8229366298181264, "grad_norm": 0.328125, "learning_rate": 7.104991062056481e-06, "loss": 0.384, "step": 71815 }, { "epoch": 1.8230635478671422, "grad_norm": 0.3515625, "learning_rate": 7.094889388812009e-06, "loss": 0.4294, "step": 71820 }, { "epoch": 1.823190465916158, "grad_norm": 0.341796875, "learning_rate": 7.084794727864379e-06, "loss": 0.4096, "step": 71825 }, { "epoch": 1.8233173839651737, "grad_norm": 0.37109375, "learning_rate": 7.074707079708963e-06, "loss": 0.4322, "step": 71830 }, { "epoch": 1.8234443020141895, "grad_norm": 0.37890625, "learning_rate": 7.064626444840749e-06, "loss": 0.4167, "step": 71835 }, { "epoch": 1.8235712200632053, "grad_norm": 0.35546875, "learning_rate": 7.0545528237543725e-06, "loss": 0.4286, "step": 71840 }, { "epoch": 1.823698138112221, "grad_norm": 0.34375, "learning_rate": 7.044486216944139e-06, "loss": 0.4324, "step": 71845 }, { "epoch": 1.8238250561612368, "grad_norm": 0.373046875, "learning_rate": 7.034426624904021e-06, "loss": 0.4095, "step": 71850 }, { "epoch": 1.8239519742102526, "grad_norm": 0.3359375, "learning_rate": 7.024374048127623e-06, "loss": 0.3782, "step": 71855 }, { "epoch": 1.8240788922592683, "grad_norm": 0.373046875, "learning_rate": 7.014328487108267e-06, "loss": 0.4519, "step": 71860 }, { "epoch": 1.824205810308284, "grad_norm": 0.373046875, "learning_rate": 7.004289942338842e-06, "loss": 0.43, "step": 71865 }, { "epoch": 1.8243327283572996, "grad_norm": 0.33984375, "learning_rate": 6.994258414311954e-06, "loss": 0.4313, "step": 71870 }, { "epoch": 1.8244596464063154, "grad_norm": 0.349609375, "learning_rate": 6.984233903519843e-06, "loss": 0.4494, "step": 71875 }, { "epoch": 1.8245865644553312, "grad_norm": 0.3515625, "learning_rate": 6.974216410454398e-06, "loss": 0.4158, "step": 71880 }, { "epoch": 1.824713482504347, "grad_norm": 0.361328125, "learning_rate": 6.964205935607192e-06, "loss": 0.4058, "step": 71885 }, { "epoch": 1.8248404005533627, "grad_norm": 0.369140625, "learning_rate": 6.954202479469384e-06, "loss": 0.415, "step": 71890 }, { "epoch": 1.8249673186023785, "grad_norm": 0.333984375, "learning_rate": 6.944206042531914e-06, "loss": 0.4199, "step": 71895 }, { "epoch": 1.825094236651394, "grad_norm": 0.341796875, "learning_rate": 6.9342166252852715e-06, "loss": 0.3866, "step": 71900 }, { "epoch": 1.8252211547004098, "grad_norm": 0.341796875, "learning_rate": 6.924234228219616e-06, "loss": 0.4208, "step": 71905 }, { "epoch": 1.8253480727494256, "grad_norm": 0.34765625, "learning_rate": 6.914258851824805e-06, "loss": 0.4103, "step": 71910 }, { "epoch": 1.8254749907984413, "grad_norm": 0.302734375, "learning_rate": 6.9042904965903146e-06, "loss": 0.3783, "step": 71915 }, { "epoch": 1.825601908847457, "grad_norm": 0.357421875, "learning_rate": 6.894329163005286e-06, "loss": 0.4308, "step": 71920 }, { "epoch": 1.8257288268964729, "grad_norm": 0.330078125, "learning_rate": 6.884374851558528e-06, "loss": 0.4047, "step": 71925 }, { "epoch": 1.8258557449454886, "grad_norm": 0.369140625, "learning_rate": 6.874427562738483e-06, "loss": 0.4165, "step": 71930 }, { "epoch": 1.8259826629945044, "grad_norm": 0.33203125, "learning_rate": 6.864487297033244e-06, "loss": 0.4027, "step": 71935 }, { "epoch": 1.8261095810435202, "grad_norm": 0.34765625, "learning_rate": 6.854554054930622e-06, "loss": 0.4341, "step": 71940 }, { "epoch": 1.826236499092536, "grad_norm": 0.37109375, "learning_rate": 6.844627836917993e-06, "loss": 0.425, "step": 71945 }, { "epoch": 1.8263634171415517, "grad_norm": 0.34375, "learning_rate": 6.8347086434824506e-06, "loss": 0.3838, "step": 71950 }, { "epoch": 1.8264903351905675, "grad_norm": 0.330078125, "learning_rate": 6.824796475110739e-06, "loss": 0.4074, "step": 71955 }, { "epoch": 1.8266172532395832, "grad_norm": 0.353515625, "learning_rate": 6.814891332289235e-06, "loss": 0.4453, "step": 71960 }, { "epoch": 1.826744171288599, "grad_norm": 0.330078125, "learning_rate": 6.804993215503967e-06, "loss": 0.442, "step": 71965 }, { "epoch": 1.8268710893376148, "grad_norm": 0.34765625, "learning_rate": 6.7951021252406475e-06, "loss": 0.4305, "step": 71970 }, { "epoch": 1.8269980073866305, "grad_norm": 0.357421875, "learning_rate": 6.785218061984638e-06, "loss": 0.4371, "step": 71975 }, { "epoch": 1.8271249254356463, "grad_norm": 0.353515625, "learning_rate": 6.775341026220915e-06, "loss": 0.4442, "step": 71980 }, { "epoch": 1.827251843484662, "grad_norm": 0.349609375, "learning_rate": 6.765471018434143e-06, "loss": 0.4398, "step": 71985 }, { "epoch": 1.8273787615336778, "grad_norm": 0.375, "learning_rate": 6.755608039108668e-06, "loss": 0.4066, "step": 71990 }, { "epoch": 1.8275056795826936, "grad_norm": 0.349609375, "learning_rate": 6.745752088728451e-06, "loss": 0.4026, "step": 71995 }, { "epoch": 1.8276325976317092, "grad_norm": 0.37890625, "learning_rate": 6.735903167777107e-06, "loss": 0.425, "step": 72000 }, { "epoch": 1.827759515680725, "grad_norm": 0.349609375, "learning_rate": 6.726061276737932e-06, "loss": 0.3963, "step": 72005 }, { "epoch": 1.8278864337297407, "grad_norm": 0.365234375, "learning_rate": 6.716226416093823e-06, "loss": 0.4015, "step": 72010 }, { "epoch": 1.8280133517787565, "grad_norm": 0.33203125, "learning_rate": 6.706398586327444e-06, "loss": 0.4266, "step": 72015 }, { "epoch": 1.8281402698277722, "grad_norm": 0.322265625, "learning_rate": 6.696577787920993e-06, "loss": 0.3964, "step": 72020 }, { "epoch": 1.828267187876788, "grad_norm": 0.3828125, "learning_rate": 6.686764021356383e-06, "loss": 0.4416, "step": 72025 }, { "epoch": 1.8283941059258038, "grad_norm": 0.341796875, "learning_rate": 6.676957287115181e-06, "loss": 0.4085, "step": 72030 }, { "epoch": 1.8285210239748193, "grad_norm": 0.37890625, "learning_rate": 6.6671575856786005e-06, "loss": 0.4299, "step": 72035 }, { "epoch": 1.828647942023835, "grad_norm": 0.345703125, "learning_rate": 6.657364917527508e-06, "loss": 0.4288, "step": 72040 }, { "epoch": 1.8287748600728508, "grad_norm": 0.3515625, "learning_rate": 6.6475792831424015e-06, "loss": 0.419, "step": 72045 }, { "epoch": 1.8289017781218666, "grad_norm": 0.357421875, "learning_rate": 6.637800683003497e-06, "loss": 0.4317, "step": 72050 }, { "epoch": 1.8290286961708824, "grad_norm": 0.322265625, "learning_rate": 6.628029117590594e-06, "loss": 0.4089, "step": 72055 }, { "epoch": 1.8291556142198981, "grad_norm": 0.318359375, "learning_rate": 6.618264587383193e-06, "loss": 0.4044, "step": 72060 }, { "epoch": 1.829282532268914, "grad_norm": 0.359375, "learning_rate": 6.6085070928604265e-06, "loss": 0.415, "step": 72065 }, { "epoch": 1.8294094503179297, "grad_norm": 0.357421875, "learning_rate": 6.598756634501113e-06, "loss": 0.4397, "step": 72070 }, { "epoch": 1.8295363683669454, "grad_norm": 0.33984375, "learning_rate": 6.589013212783667e-06, "loss": 0.4262, "step": 72075 }, { "epoch": 1.8296632864159612, "grad_norm": 0.3515625, "learning_rate": 6.579276828186225e-06, "loss": 0.3845, "step": 72080 }, { "epoch": 1.829790204464977, "grad_norm": 0.34375, "learning_rate": 6.569547481186555e-06, "loss": 0.4154, "step": 72085 }, { "epoch": 1.8299171225139927, "grad_norm": 0.3359375, "learning_rate": 6.5598251722620395e-06, "loss": 0.4188, "step": 72090 }, { "epoch": 1.8300440405630085, "grad_norm": 0.330078125, "learning_rate": 6.550109901889766e-06, "loss": 0.3703, "step": 72095 }, { "epoch": 1.8301709586120243, "grad_norm": 0.361328125, "learning_rate": 6.540401670546453e-06, "loss": 0.4136, "step": 72100 }, { "epoch": 1.83029787666104, "grad_norm": 0.3671875, "learning_rate": 6.530700478708484e-06, "loss": 0.4129, "step": 72105 }, { "epoch": 1.8304247947100558, "grad_norm": 0.373046875, "learning_rate": 6.521006326851913e-06, "loss": 0.4246, "step": 72110 }, { "epoch": 1.8305517127590716, "grad_norm": 0.337890625, "learning_rate": 6.511319215452393e-06, "loss": 0.3776, "step": 72115 }, { "epoch": 1.8306786308080873, "grad_norm": 0.322265625, "learning_rate": 6.5016391449852935e-06, "loss": 0.4134, "step": 72120 }, { "epoch": 1.8308055488571031, "grad_norm": 0.326171875, "learning_rate": 6.491966115925584e-06, "loss": 0.393, "step": 72125 }, { "epoch": 1.8309324669061189, "grad_norm": 0.35546875, "learning_rate": 6.482300128747936e-06, "loss": 0.4166, "step": 72130 }, { "epoch": 1.8310593849551344, "grad_norm": 0.341796875, "learning_rate": 6.472641183926669e-06, "loss": 0.3997, "step": 72135 }, { "epoch": 1.8311863030041502, "grad_norm": 0.3359375, "learning_rate": 6.462989281935688e-06, "loss": 0.4593, "step": 72140 }, { "epoch": 1.831313221053166, "grad_norm": 0.33203125, "learning_rate": 6.45334442324868e-06, "loss": 0.3997, "step": 72145 }, { "epoch": 1.8314401391021817, "grad_norm": 0.3515625, "learning_rate": 6.443706608338867e-06, "loss": 0.4268, "step": 72150 }, { "epoch": 1.8315670571511975, "grad_norm": 0.33984375, "learning_rate": 6.434075837679187e-06, "loss": 0.3808, "step": 72155 }, { "epoch": 1.8316939752002133, "grad_norm": 0.345703125, "learning_rate": 6.424452111742228e-06, "loss": 0.4251, "step": 72160 }, { "epoch": 1.8318208932492288, "grad_norm": 0.361328125, "learning_rate": 6.4148354310001985e-06, "loss": 0.4409, "step": 72165 }, { "epoch": 1.8319478112982446, "grad_norm": 0.337890625, "learning_rate": 6.405225795925017e-06, "loss": 0.4214, "step": 72170 }, { "epoch": 1.8320747293472603, "grad_norm": 0.357421875, "learning_rate": 6.395623206988193e-06, "loss": 0.4243, "step": 72175 }, { "epoch": 1.8322016473962761, "grad_norm": 0.34765625, "learning_rate": 6.386027664660914e-06, "loss": 0.419, "step": 72180 }, { "epoch": 1.8323285654452919, "grad_norm": 0.357421875, "learning_rate": 6.376439169414072e-06, "loss": 0.4176, "step": 72185 }, { "epoch": 1.8324554834943076, "grad_norm": 0.310546875, "learning_rate": 6.366857721718155e-06, "loss": 0.4136, "step": 72190 }, { "epoch": 1.8325824015433234, "grad_norm": 0.349609375, "learning_rate": 6.357283322043305e-06, "loss": 0.4521, "step": 72195 }, { "epoch": 1.8327093195923392, "grad_norm": 0.341796875, "learning_rate": 6.347715970859346e-06, "loss": 0.3742, "step": 72200 }, { "epoch": 1.832836237641355, "grad_norm": 0.380859375, "learning_rate": 6.338155668635736e-06, "loss": 0.3782, "step": 72205 }, { "epoch": 1.8329631556903707, "grad_norm": 0.36328125, "learning_rate": 6.328602415841599e-06, "loss": 0.4071, "step": 72210 }, { "epoch": 1.8330900737393865, "grad_norm": 0.37109375, "learning_rate": 6.319056212945728e-06, "loss": 0.4238, "step": 72215 }, { "epoch": 1.8332169917884023, "grad_norm": 0.337890625, "learning_rate": 6.30951706041653e-06, "loss": 0.3727, "step": 72220 }, { "epoch": 1.833343909837418, "grad_norm": 0.341796875, "learning_rate": 6.299984958722082e-06, "loss": 0.4121, "step": 72225 }, { "epoch": 1.8334708278864338, "grad_norm": 0.322265625, "learning_rate": 6.290459908330142e-06, "loss": 0.4053, "step": 72230 }, { "epoch": 1.8335977459354496, "grad_norm": 0.35546875, "learning_rate": 6.280941909708087e-06, "loss": 0.3978, "step": 72235 }, { "epoch": 1.8337246639844653, "grad_norm": 0.361328125, "learning_rate": 6.27143096332296e-06, "loss": 0.4343, "step": 72240 }, { "epoch": 1.833851582033481, "grad_norm": 0.34375, "learning_rate": 6.26192706964147e-06, "loss": 0.3951, "step": 72245 }, { "epoch": 1.8339785000824969, "grad_norm": 0.34765625, "learning_rate": 6.252430229129962e-06, "loss": 0.4234, "step": 72250 }, { "epoch": 1.8341054181315126, "grad_norm": 0.345703125, "learning_rate": 6.242940442254413e-06, "loss": 0.3842, "step": 72255 }, { "epoch": 1.8342323361805284, "grad_norm": 0.341796875, "learning_rate": 6.233457709480532e-06, "loss": 0.4322, "step": 72260 }, { "epoch": 1.834359254229544, "grad_norm": 0.369140625, "learning_rate": 6.223982031273617e-06, "loss": 0.4044, "step": 72265 }, { "epoch": 1.8344861722785597, "grad_norm": 0.34375, "learning_rate": 6.214513408098609e-06, "loss": 0.4096, "step": 72270 }, { "epoch": 1.8346130903275755, "grad_norm": 0.333984375, "learning_rate": 6.205051840420188e-06, "loss": 0.4445, "step": 72275 }, { "epoch": 1.8347400083765912, "grad_norm": 0.34375, "learning_rate": 6.195597328702567e-06, "loss": 0.4056, "step": 72280 }, { "epoch": 1.834866926425607, "grad_norm": 0.3671875, "learning_rate": 6.186149873409723e-06, "loss": 0.4206, "step": 72285 }, { "epoch": 1.8349938444746228, "grad_norm": 0.330078125, "learning_rate": 6.176709475005204e-06, "loss": 0.392, "step": 72290 }, { "epoch": 1.8351207625236385, "grad_norm": 0.34765625, "learning_rate": 6.16727613395227e-06, "loss": 0.3984, "step": 72295 }, { "epoch": 1.835247680572654, "grad_norm": 0.365234375, "learning_rate": 6.157849850713803e-06, "loss": 0.4212, "step": 72300 }, { "epoch": 1.8353745986216699, "grad_norm": 0.330078125, "learning_rate": 6.148430625752349e-06, "loss": 0.4132, "step": 72305 }, { "epoch": 1.8355015166706856, "grad_norm": 0.326171875, "learning_rate": 6.139018459530087e-06, "loss": 0.3949, "step": 72310 }, { "epoch": 1.8356284347197014, "grad_norm": 0.333984375, "learning_rate": 6.1296133525089e-06, "loss": 0.4017, "step": 72315 }, { "epoch": 1.8357553527687172, "grad_norm": 0.345703125, "learning_rate": 6.120215305150266e-06, "loss": 0.4149, "step": 72320 }, { "epoch": 1.835882270817733, "grad_norm": 0.34765625, "learning_rate": 6.110824317915353e-06, "loss": 0.4221, "step": 72325 }, { "epoch": 1.8360091888667487, "grad_norm": 0.3515625, "learning_rate": 6.1014403912649555e-06, "loss": 0.409, "step": 72330 }, { "epoch": 1.8361361069157645, "grad_norm": 0.33203125, "learning_rate": 6.092063525659574e-06, "loss": 0.4322, "step": 72335 }, { "epoch": 1.8362630249647802, "grad_norm": 0.375, "learning_rate": 6.082693721559323e-06, "loss": 0.4352, "step": 72340 }, { "epoch": 1.836389943013796, "grad_norm": 0.341796875, "learning_rate": 6.073330979423952e-06, "loss": 0.4248, "step": 72345 }, { "epoch": 1.8365168610628118, "grad_norm": 0.357421875, "learning_rate": 6.063975299712876e-06, "loss": 0.4022, "step": 72350 }, { "epoch": 1.8366437791118275, "grad_norm": 0.3515625, "learning_rate": 6.054626682885228e-06, "loss": 0.4148, "step": 72355 }, { "epoch": 1.8367706971608433, "grad_norm": 0.373046875, "learning_rate": 6.04528512939969e-06, "loss": 0.4463, "step": 72360 }, { "epoch": 1.836897615209859, "grad_norm": 0.3203125, "learning_rate": 6.035950639714682e-06, "loss": 0.3981, "step": 72365 }, { "epoch": 1.8370245332588748, "grad_norm": 0.376953125, "learning_rate": 6.026623214288234e-06, "loss": 0.3978, "step": 72370 }, { "epoch": 1.8371514513078906, "grad_norm": 0.361328125, "learning_rate": 6.017302853578032e-06, "loss": 0.4161, "step": 72375 }, { "epoch": 1.8372783693569064, "grad_norm": 0.34765625, "learning_rate": 6.007989558041409e-06, "loss": 0.385, "step": 72380 }, { "epoch": 1.8374052874059221, "grad_norm": 0.35546875, "learning_rate": 5.998683328135401e-06, "loss": 0.446, "step": 72385 }, { "epoch": 1.837532205454938, "grad_norm": 0.359375, "learning_rate": 5.989384164316608e-06, "loss": 0.4396, "step": 72390 }, { "epoch": 1.8376591235039537, "grad_norm": 0.328125, "learning_rate": 5.980092067041398e-06, "loss": 0.4417, "step": 72395 }, { "epoch": 1.8377860415529692, "grad_norm": 0.3125, "learning_rate": 5.97080703676569e-06, "loss": 0.3898, "step": 72400 }, { "epoch": 1.837912959601985, "grad_norm": 0.345703125, "learning_rate": 5.9615290739451205e-06, "loss": 0.4238, "step": 72405 }, { "epoch": 1.8380398776510007, "grad_norm": 0.35546875, "learning_rate": 5.952258179034925e-06, "loss": 0.4268, "step": 72410 }, { "epoch": 1.8381667957000165, "grad_norm": 0.369140625, "learning_rate": 5.942994352490055e-06, "loss": 0.434, "step": 72415 }, { "epoch": 1.8382937137490323, "grad_norm": 0.369140625, "learning_rate": 5.933737594765064e-06, "loss": 0.3914, "step": 72420 }, { "epoch": 1.838420631798048, "grad_norm": 0.359375, "learning_rate": 5.9244879063141736e-06, "loss": 0.4113, "step": 72425 }, { "epoch": 1.8385475498470636, "grad_norm": 0.33203125, "learning_rate": 5.915245287591269e-06, "loss": 0.4151, "step": 72430 }, { "epoch": 1.8386744678960794, "grad_norm": 0.345703125, "learning_rate": 5.906009739049905e-06, "loss": 0.427, "step": 72435 }, { "epoch": 1.8388013859450951, "grad_norm": 0.3359375, "learning_rate": 5.8967812611432186e-06, "loss": 0.4194, "step": 72440 }, { "epoch": 1.838928303994111, "grad_norm": 0.369140625, "learning_rate": 5.887559854324097e-06, "loss": 0.4332, "step": 72445 }, { "epoch": 1.8390552220431267, "grad_norm": 0.337890625, "learning_rate": 5.878345519044997e-06, "loss": 0.3864, "step": 72450 }, { "epoch": 1.8391821400921424, "grad_norm": 0.3671875, "learning_rate": 5.8691382557580545e-06, "loss": 0.4309, "step": 72455 }, { "epoch": 1.8393090581411582, "grad_norm": 0.3671875, "learning_rate": 5.859938064915109e-06, "loss": 0.4208, "step": 72460 }, { "epoch": 1.839435976190174, "grad_norm": 0.345703125, "learning_rate": 5.8507449469675835e-06, "loss": 0.4135, "step": 72465 }, { "epoch": 1.8395628942391897, "grad_norm": 0.365234375, "learning_rate": 5.8415589023665835e-06, "loss": 0.3975, "step": 72470 }, { "epoch": 1.8396898122882055, "grad_norm": 0.34375, "learning_rate": 5.832379931562847e-06, "loss": 0.3826, "step": 72475 }, { "epoch": 1.8398167303372213, "grad_norm": 0.52734375, "learning_rate": 5.823208035006815e-06, "loss": 0.4069, "step": 72480 }, { "epoch": 1.839943648386237, "grad_norm": 0.349609375, "learning_rate": 5.814043213148545e-06, "loss": 0.4287, "step": 72485 }, { "epoch": 1.8400705664352528, "grad_norm": 0.3515625, "learning_rate": 5.804885466437725e-06, "loss": 0.4334, "step": 72490 }, { "epoch": 1.8401974844842686, "grad_norm": 0.341796875, "learning_rate": 5.795734795323731e-06, "loss": 0.429, "step": 72495 }, { "epoch": 1.8403244025332843, "grad_norm": 0.3515625, "learning_rate": 5.7865912002555855e-06, "loss": 0.4336, "step": 72500 }, { "epoch": 1.8404513205823, "grad_norm": 0.345703125, "learning_rate": 5.7774546816819644e-06, "loss": 0.4019, "step": 72505 }, { "epoch": 1.8405782386313159, "grad_norm": 0.33203125, "learning_rate": 5.768325240051208e-06, "loss": 0.4033, "step": 72510 }, { "epoch": 1.8407051566803316, "grad_norm": 0.361328125, "learning_rate": 5.759202875811259e-06, "loss": 0.424, "step": 72515 }, { "epoch": 1.8408320747293474, "grad_norm": 0.375, "learning_rate": 5.750087589409774e-06, "loss": 0.4438, "step": 72520 }, { "epoch": 1.8409589927783632, "grad_norm": 0.345703125, "learning_rate": 5.740979381294031e-06, "loss": 0.4025, "step": 72525 }, { "epoch": 1.8410859108273787, "grad_norm": 0.34765625, "learning_rate": 5.7318782519109705e-06, "loss": 0.4201, "step": 72530 }, { "epoch": 1.8412128288763945, "grad_norm": 0.345703125, "learning_rate": 5.722784201707186e-06, "loss": 0.4324, "step": 72535 }, { "epoch": 1.8413397469254102, "grad_norm": 0.345703125, "learning_rate": 5.713697231128889e-06, "loss": 0.434, "step": 72540 }, { "epoch": 1.841466664974426, "grad_norm": 0.353515625, "learning_rate": 5.704617340622003e-06, "loss": 0.4237, "step": 72545 }, { "epoch": 1.8415935830234418, "grad_norm": 0.314453125, "learning_rate": 5.695544530632073e-06, "loss": 0.3842, "step": 72550 }, { "epoch": 1.8417205010724575, "grad_norm": 0.36328125, "learning_rate": 5.686478801604294e-06, "loss": 0.4176, "step": 72555 }, { "epoch": 1.8418474191214733, "grad_norm": 0.35546875, "learning_rate": 5.6774201539834914e-06, "loss": 0.4358, "step": 72560 }, { "epoch": 1.8419743371704889, "grad_norm": 0.33984375, "learning_rate": 5.668368588214212e-06, "loss": 0.4131, "step": 72565 }, { "epoch": 1.8421012552195046, "grad_norm": 0.322265625, "learning_rate": 5.659324104740582e-06, "loss": 0.3699, "step": 72570 }, { "epoch": 1.8422281732685204, "grad_norm": 0.3515625, "learning_rate": 5.650286704006429e-06, "loss": 0.3906, "step": 72575 }, { "epoch": 1.8423550913175362, "grad_norm": 0.34765625, "learning_rate": 5.641256386455184e-06, "loss": 0.4308, "step": 72580 }, { "epoch": 1.842482009366552, "grad_norm": 0.349609375, "learning_rate": 5.632233152530008e-06, "loss": 0.4114, "step": 72585 }, { "epoch": 1.8426089274155677, "grad_norm": 0.3515625, "learning_rate": 5.623217002673646e-06, "loss": 0.4181, "step": 72590 }, { "epoch": 1.8427358454645835, "grad_norm": 0.337890625, "learning_rate": 5.61420793732848e-06, "loss": 0.4182, "step": 72595 }, { "epoch": 1.8428627635135992, "grad_norm": 0.35546875, "learning_rate": 5.605205956936653e-06, "loss": 0.422, "step": 72600 }, { "epoch": 1.842989681562615, "grad_norm": 0.38671875, "learning_rate": 5.5962110619398314e-06, "loss": 0.4185, "step": 72605 }, { "epoch": 1.8431165996116308, "grad_norm": 0.353515625, "learning_rate": 5.587223252779427e-06, "loss": 0.4269, "step": 72610 }, { "epoch": 1.8432435176606465, "grad_norm": 0.328125, "learning_rate": 5.578242529896454e-06, "loss": 0.4073, "step": 72615 }, { "epoch": 1.8433704357096623, "grad_norm": 0.33203125, "learning_rate": 5.569268893731576e-06, "loss": 0.4323, "step": 72620 }, { "epoch": 1.843497353758678, "grad_norm": 0.369140625, "learning_rate": 5.560302344725159e-06, "loss": 0.3946, "step": 72625 }, { "epoch": 1.8436242718076938, "grad_norm": 0.361328125, "learning_rate": 5.551342883317151e-06, "loss": 0.4404, "step": 72630 }, { "epoch": 1.8437511898567096, "grad_norm": 0.36328125, "learning_rate": 5.542390509947214e-06, "loss": 0.4232, "step": 72635 }, { "epoch": 1.8438781079057254, "grad_norm": 0.357421875, "learning_rate": 5.533445225054634e-06, "loss": 0.4337, "step": 72640 }, { "epoch": 1.8440050259547411, "grad_norm": 0.34375, "learning_rate": 5.52450702907834e-06, "loss": 0.4211, "step": 72645 }, { "epoch": 1.844131944003757, "grad_norm": 0.365234375, "learning_rate": 5.51557592245695e-06, "loss": 0.4045, "step": 72650 }, { "epoch": 1.8442588620527727, "grad_norm": 0.337890625, "learning_rate": 5.5066519056286785e-06, "loss": 0.4049, "step": 72655 }, { "epoch": 1.8443857801017884, "grad_norm": 0.3828125, "learning_rate": 5.4977349790314595e-06, "loss": 0.4257, "step": 72660 }, { "epoch": 1.844512698150804, "grad_norm": 0.3359375, "learning_rate": 5.48882514310281e-06, "loss": 0.4041, "step": 72665 }, { "epoch": 1.8446396161998198, "grad_norm": 0.353515625, "learning_rate": 5.479922398279945e-06, "loss": 0.4087, "step": 72670 }, { "epoch": 1.8447665342488355, "grad_norm": 0.390625, "learning_rate": 5.471026744999718e-06, "loss": 0.4206, "step": 72675 }, { "epoch": 1.8448934522978513, "grad_norm": 0.349609375, "learning_rate": 5.4621381836986615e-06, "loss": 0.4349, "step": 72680 }, { "epoch": 1.845020370346867, "grad_norm": 0.328125, "learning_rate": 5.453256714812893e-06, "loss": 0.4089, "step": 72685 }, { "epoch": 1.8451472883958828, "grad_norm": 0.353515625, "learning_rate": 5.44438233877823e-06, "loss": 0.3927, "step": 72690 }, { "epoch": 1.8452742064448984, "grad_norm": 0.357421875, "learning_rate": 5.43551505603016e-06, "loss": 0.4337, "step": 72695 }, { "epoch": 1.8454011244939141, "grad_norm": 0.35546875, "learning_rate": 5.426654867003766e-06, "loss": 0.4169, "step": 72700 }, { "epoch": 1.84552804254293, "grad_norm": 0.3359375, "learning_rate": 5.417801772133834e-06, "loss": 0.4222, "step": 72705 }, { "epoch": 1.8456549605919457, "grad_norm": 0.34765625, "learning_rate": 5.408955771854767e-06, "loss": 0.4247, "step": 72710 }, { "epoch": 1.8457818786409614, "grad_norm": 0.357421875, "learning_rate": 5.400116866600651e-06, "loss": 0.423, "step": 72715 }, { "epoch": 1.8459087966899772, "grad_norm": 0.36328125, "learning_rate": 5.391285056805206e-06, "loss": 0.4076, "step": 72720 }, { "epoch": 1.846035714738993, "grad_norm": 0.361328125, "learning_rate": 5.382460342901801e-06, "loss": 0.4176, "step": 72725 }, { "epoch": 1.8461626327880087, "grad_norm": 0.376953125, "learning_rate": 5.373642725323457e-06, "loss": 0.4321, "step": 72730 }, { "epoch": 1.8462895508370245, "grad_norm": 0.380859375, "learning_rate": 5.364832204502845e-06, "loss": 0.4296, "step": 72735 }, { "epoch": 1.8464164688860403, "grad_norm": 0.33984375, "learning_rate": 5.3560287808723176e-06, "loss": 0.4431, "step": 72740 }, { "epoch": 1.846543386935056, "grad_norm": 0.373046875, "learning_rate": 5.347232454863815e-06, "loss": 0.4441, "step": 72745 }, { "epoch": 1.8466703049840718, "grad_norm": 0.34765625, "learning_rate": 5.3384432269090064e-06, "loss": 0.4198, "step": 72750 }, { "epoch": 1.8467972230330876, "grad_norm": 0.36328125, "learning_rate": 5.3296610974391804e-06, "loss": 0.4169, "step": 72755 }, { "epoch": 1.8469241410821033, "grad_norm": 0.36328125, "learning_rate": 5.320886066885244e-06, "loss": 0.4144, "step": 72760 }, { "epoch": 1.847051059131119, "grad_norm": 0.34765625, "learning_rate": 5.312118135677801e-06, "loss": 0.4237, "step": 72765 }, { "epoch": 1.8471779771801349, "grad_norm": 0.65625, "learning_rate": 5.3033573042470575e-06, "loss": 0.4265, "step": 72770 }, { "epoch": 1.8473048952291506, "grad_norm": 0.365234375, "learning_rate": 5.294603573022971e-06, "loss": 0.4297, "step": 72775 }, { "epoch": 1.8474318132781664, "grad_norm": 0.34375, "learning_rate": 5.28585694243503e-06, "loss": 0.4107, "step": 72780 }, { "epoch": 1.8475587313271822, "grad_norm": 0.373046875, "learning_rate": 5.277117412912441e-06, "loss": 0.4293, "step": 72785 }, { "epoch": 1.847685649376198, "grad_norm": 0.357421875, "learning_rate": 5.268384984884061e-06, "loss": 0.4091, "step": 72790 }, { "epoch": 1.8478125674252135, "grad_norm": 0.359375, "learning_rate": 5.259659658778365e-06, "loss": 0.4335, "step": 72795 }, { "epoch": 1.8479394854742293, "grad_norm": 0.35546875, "learning_rate": 5.250941435023526e-06, "loss": 0.393, "step": 72800 }, { "epoch": 1.848066403523245, "grad_norm": 0.326171875, "learning_rate": 5.242230314047336e-06, "loss": 0.4047, "step": 72805 }, { "epoch": 1.8481933215722608, "grad_norm": 0.361328125, "learning_rate": 5.233526296277218e-06, "loss": 0.4351, "step": 72810 }, { "epoch": 1.8483202396212766, "grad_norm": 0.34765625, "learning_rate": 5.2248293821403165e-06, "loss": 0.4128, "step": 72815 }, { "epoch": 1.8484471576702923, "grad_norm": 0.333984375, "learning_rate": 5.2161395720633715e-06, "loss": 0.4131, "step": 72820 }, { "epoch": 1.848574075719308, "grad_norm": 0.36328125, "learning_rate": 5.207456866472759e-06, "loss": 0.4175, "step": 72825 }, { "epoch": 1.8487009937683236, "grad_norm": 0.349609375, "learning_rate": 5.198781265794571e-06, "loss": 0.4158, "step": 72830 }, { "epoch": 1.8488279118173394, "grad_norm": 0.333984375, "learning_rate": 5.190112770454519e-06, "loss": 0.4093, "step": 72835 }, { "epoch": 1.8489548298663552, "grad_norm": 0.34765625, "learning_rate": 5.1814513808779265e-06, "loss": 0.431, "step": 72840 }, { "epoch": 1.849081747915371, "grad_norm": 0.34375, "learning_rate": 5.172797097489856e-06, "loss": 0.4251, "step": 72845 }, { "epoch": 1.8492086659643867, "grad_norm": 0.36328125, "learning_rate": 5.164149920714916e-06, "loss": 0.4079, "step": 72850 }, { "epoch": 1.8493355840134025, "grad_norm": 0.34765625, "learning_rate": 5.155509850977468e-06, "loss": 0.4068, "step": 72855 }, { "epoch": 1.8494625020624182, "grad_norm": 0.345703125, "learning_rate": 5.14687688870144e-06, "loss": 0.3951, "step": 72860 }, { "epoch": 1.849589420111434, "grad_norm": 0.34375, "learning_rate": 5.138251034310475e-06, "loss": 0.4208, "step": 72865 }, { "epoch": 1.8497163381604498, "grad_norm": 0.369140625, "learning_rate": 5.12963228822782e-06, "loss": 0.4329, "step": 72870 }, { "epoch": 1.8498432562094655, "grad_norm": 0.380859375, "learning_rate": 5.1210206508764e-06, "loss": 0.4392, "step": 72875 }, { "epoch": 1.8499701742584813, "grad_norm": 0.34375, "learning_rate": 5.112416122678797e-06, "loss": 0.4291, "step": 72880 }, { "epoch": 1.850097092307497, "grad_norm": 0.361328125, "learning_rate": 5.1038187040572055e-06, "loss": 0.415, "step": 72885 }, { "epoch": 1.8502240103565128, "grad_norm": 0.34765625, "learning_rate": 5.0952283954335205e-06, "loss": 0.4083, "step": 72890 }, { "epoch": 1.8503509284055286, "grad_norm": 0.330078125, "learning_rate": 5.0866451972292385e-06, "loss": 0.4111, "step": 72895 }, { "epoch": 1.8504778464545444, "grad_norm": 0.361328125, "learning_rate": 5.078069109865557e-06, "loss": 0.4339, "step": 72900 }, { "epoch": 1.8506047645035602, "grad_norm": 0.3671875, "learning_rate": 5.0695001337633045e-06, "loss": 0.379, "step": 72905 }, { "epoch": 1.850731682552576, "grad_norm": 0.345703125, "learning_rate": 5.060938269342963e-06, "loss": 0.4215, "step": 72910 }, { "epoch": 1.8508586006015917, "grad_norm": 0.333984375, "learning_rate": 5.052383517024611e-06, "loss": 0.4213, "step": 72915 }, { "epoch": 1.8509855186506075, "grad_norm": 0.3359375, "learning_rate": 5.043835877228081e-06, "loss": 0.4051, "step": 72920 }, { "epoch": 1.851112436699623, "grad_norm": 0.3671875, "learning_rate": 5.035295350372787e-06, "loss": 0.4219, "step": 72925 }, { "epoch": 1.8512393547486388, "grad_norm": 0.3515625, "learning_rate": 5.026761936877794e-06, "loss": 0.4141, "step": 72930 }, { "epoch": 1.8513662727976545, "grad_norm": 0.330078125, "learning_rate": 5.0182356371618494e-06, "loss": 0.4126, "step": 72935 }, { "epoch": 1.8514931908466703, "grad_norm": 0.359375, "learning_rate": 5.009716451643336e-06, "loss": 0.4171, "step": 72940 }, { "epoch": 1.851620108895686, "grad_norm": 0.34765625, "learning_rate": 5.001204380740287e-06, "loss": 0.4002, "step": 72945 }, { "epoch": 1.8517470269447018, "grad_norm": 0.34765625, "learning_rate": 4.992699424870366e-06, "loss": 0.4167, "step": 72950 }, { "epoch": 1.8518739449937176, "grad_norm": 0.353515625, "learning_rate": 4.984201584450925e-06, "loss": 0.4218, "step": 72955 }, { "epoch": 1.8520008630427331, "grad_norm": 0.326171875, "learning_rate": 4.975710859898929e-06, "loss": 0.3894, "step": 72960 }, { "epoch": 1.852127781091749, "grad_norm": 0.36328125, "learning_rate": 4.967227251631045e-06, "loss": 0.4087, "step": 72965 }, { "epoch": 1.8522546991407647, "grad_norm": 0.34765625, "learning_rate": 4.958750760063557e-06, "loss": 0.4296, "step": 72970 }, { "epoch": 1.8523816171897804, "grad_norm": 0.345703125, "learning_rate": 4.950281385612398e-06, "loss": 0.4198, "step": 72975 }, { "epoch": 1.8525085352387962, "grad_norm": 0.333984375, "learning_rate": 4.941819128693136e-06, "loss": 0.415, "step": 72980 }, { "epoch": 1.852635453287812, "grad_norm": 0.33203125, "learning_rate": 4.93336398972104e-06, "loss": 0.4216, "step": 72985 }, { "epoch": 1.8527623713368278, "grad_norm": 0.349609375, "learning_rate": 4.9249159691109595e-06, "loss": 0.4151, "step": 72990 }, { "epoch": 1.8528892893858435, "grad_norm": 0.33984375, "learning_rate": 4.916475067277481e-06, "loss": 0.4083, "step": 72995 }, { "epoch": 1.8530162074348593, "grad_norm": 0.3359375, "learning_rate": 4.908041284634789e-06, "loss": 0.3989, "step": 73000 }, { "epoch": 1.853143125483875, "grad_norm": 0.314453125, "learning_rate": 4.8996146215967034e-06, "loss": 0.3946, "step": 73005 }, { "epoch": 1.8532700435328908, "grad_norm": 0.36328125, "learning_rate": 4.891195078576743e-06, "loss": 0.4147, "step": 73010 }, { "epoch": 1.8533969615819066, "grad_norm": 0.369140625, "learning_rate": 4.882782655988027e-06, "loss": 0.4211, "step": 73015 }, { "epoch": 1.8535238796309224, "grad_norm": 0.330078125, "learning_rate": 4.874377354243359e-06, "loss": 0.4264, "step": 73020 }, { "epoch": 1.8536507976799381, "grad_norm": 0.55859375, "learning_rate": 4.865979173755174e-06, "loss": 0.4087, "step": 73025 }, { "epoch": 1.853777715728954, "grad_norm": 0.3671875, "learning_rate": 4.857588114935579e-06, "loss": 0.4232, "step": 73030 }, { "epoch": 1.8539046337779697, "grad_norm": 0.341796875, "learning_rate": 4.849204178196342e-06, "loss": 0.3969, "step": 73035 }, { "epoch": 1.8540315518269854, "grad_norm": 0.365234375, "learning_rate": 4.840827363948818e-06, "loss": 0.4259, "step": 73040 }, { "epoch": 1.8541584698760012, "grad_norm": 0.361328125, "learning_rate": 4.832457672604079e-06, "loss": 0.4285, "step": 73045 }, { "epoch": 1.854285387925017, "grad_norm": 0.353515625, "learning_rate": 4.824095104572811e-06, "loss": 0.4219, "step": 73050 }, { "epoch": 1.8544123059740327, "grad_norm": 0.33984375, "learning_rate": 4.815739660265389e-06, "loss": 0.4037, "step": 73055 }, { "epoch": 1.8545392240230483, "grad_norm": 0.33203125, "learning_rate": 4.807391340091765e-06, "loss": 0.4037, "step": 73060 }, { "epoch": 1.854666142072064, "grad_norm": 0.349609375, "learning_rate": 4.799050144461613e-06, "loss": 0.4252, "step": 73065 }, { "epoch": 1.8547930601210798, "grad_norm": 0.349609375, "learning_rate": 4.790716073784223e-06, "loss": 0.4371, "step": 73070 }, { "epoch": 1.8549199781700956, "grad_norm": 0.3359375, "learning_rate": 4.7823891284685664e-06, "loss": 0.383, "step": 73075 }, { "epoch": 1.8550468962191113, "grad_norm": 0.349609375, "learning_rate": 4.774069308923234e-06, "loss": 0.4077, "step": 73080 }, { "epoch": 1.855173814268127, "grad_norm": 0.33984375, "learning_rate": 4.765756615556449e-06, "loss": 0.4223, "step": 73085 }, { "epoch": 1.8553007323171429, "grad_norm": 0.3515625, "learning_rate": 4.757451048776151e-06, "loss": 0.4088, "step": 73090 }, { "epoch": 1.8554276503661584, "grad_norm": 0.3515625, "learning_rate": 4.749152608989881e-06, "loss": 0.4382, "step": 73095 }, { "epoch": 1.8555545684151742, "grad_norm": 0.359375, "learning_rate": 4.7408612966048146e-06, "loss": 0.4303, "step": 73100 }, { "epoch": 1.85568148646419, "grad_norm": 0.322265625, "learning_rate": 4.732577112027841e-06, "loss": 0.3698, "step": 73105 }, { "epoch": 1.8558084045132057, "grad_norm": 0.34375, "learning_rate": 4.724300055665436e-06, "loss": 0.4218, "step": 73110 }, { "epoch": 1.8559353225622215, "grad_norm": 0.365234375, "learning_rate": 4.716030127923759e-06, "loss": 0.3889, "step": 73115 }, { "epoch": 1.8560622406112373, "grad_norm": 0.3359375, "learning_rate": 4.7077673292086e-06, "loss": 0.4037, "step": 73120 }, { "epoch": 1.856189158660253, "grad_norm": 0.341796875, "learning_rate": 4.6995116599254365e-06, "loss": 0.4057, "step": 73125 }, { "epoch": 1.8563160767092688, "grad_norm": 0.345703125, "learning_rate": 4.691263120479361e-06, "loss": 0.4119, "step": 73130 }, { "epoch": 1.8564429947582846, "grad_norm": 0.357421875, "learning_rate": 4.683021711275114e-06, "loss": 0.4128, "step": 73135 }, { "epoch": 1.8565699128073003, "grad_norm": 0.3515625, "learning_rate": 4.674787432717092e-06, "loss": 0.4068, "step": 73140 }, { "epoch": 1.856696830856316, "grad_norm": 0.337890625, "learning_rate": 4.66656028520937e-06, "loss": 0.4011, "step": 73145 }, { "epoch": 1.8568237489053319, "grad_norm": 0.33984375, "learning_rate": 4.658340269155642e-06, "loss": 0.412, "step": 73150 }, { "epoch": 1.8569506669543476, "grad_norm": 0.375, "learning_rate": 4.650127384959268e-06, "loss": 0.4439, "step": 73155 }, { "epoch": 1.8570775850033634, "grad_norm": 0.322265625, "learning_rate": 4.641921633023227e-06, "loss": 0.4059, "step": 73160 }, { "epoch": 1.8572045030523792, "grad_norm": 0.3515625, "learning_rate": 4.6337230137502126e-06, "loss": 0.4312, "step": 73165 }, { "epoch": 1.857331421101395, "grad_norm": 0.353515625, "learning_rate": 4.625531527542486e-06, "loss": 0.4279, "step": 73170 }, { "epoch": 1.8574583391504107, "grad_norm": 0.3359375, "learning_rate": 4.617347174802028e-06, "loss": 0.3861, "step": 73175 }, { "epoch": 1.8575852571994265, "grad_norm": 0.34375, "learning_rate": 4.609169955930447e-06, "loss": 0.4057, "step": 73180 }, { "epoch": 1.8577121752484422, "grad_norm": 0.31640625, "learning_rate": 4.6009998713289585e-06, "loss": 0.4178, "step": 73185 }, { "epoch": 1.8578390932974578, "grad_norm": 0.3046875, "learning_rate": 4.592836921398507e-06, "loss": 0.3914, "step": 73190 }, { "epoch": 1.8579660113464735, "grad_norm": 0.349609375, "learning_rate": 4.584681106539606e-06, "loss": 0.4489, "step": 73195 }, { "epoch": 1.8580929293954893, "grad_norm": 0.330078125, "learning_rate": 4.576532427152502e-06, "loss": 0.4098, "step": 73200 }, { "epoch": 1.858219847444505, "grad_norm": 0.349609375, "learning_rate": 4.568390883637008e-06, "loss": 0.4001, "step": 73205 }, { "epoch": 1.8583467654935208, "grad_norm": 0.326171875, "learning_rate": 4.560256476392621e-06, "loss": 0.4212, "step": 73210 }, { "epoch": 1.8584736835425366, "grad_norm": 0.345703125, "learning_rate": 4.552129205818539e-06, "loss": 0.4261, "step": 73215 }, { "epoch": 1.8586006015915524, "grad_norm": 0.345703125, "learning_rate": 4.544009072313543e-06, "loss": 0.4275, "step": 73220 }, { "epoch": 1.858727519640568, "grad_norm": 0.359375, "learning_rate": 4.53589607627608e-06, "loss": 0.4025, "step": 73225 }, { "epoch": 1.8588544376895837, "grad_norm": 0.341796875, "learning_rate": 4.527790218104249e-06, "loss": 0.4195, "step": 73230 }, { "epoch": 1.8589813557385995, "grad_norm": 0.349609375, "learning_rate": 4.519691498195799e-06, "loss": 0.4133, "step": 73235 }, { "epoch": 1.8591082737876152, "grad_norm": 0.3828125, "learning_rate": 4.5115999169481274e-06, "loss": 0.4397, "step": 73240 }, { "epoch": 1.859235191836631, "grad_norm": 0.3671875, "learning_rate": 4.503515474758301e-06, "loss": 0.4013, "step": 73245 }, { "epoch": 1.8593621098856468, "grad_norm": 0.3515625, "learning_rate": 4.495438172023019e-06, "loss": 0.42, "step": 73250 }, { "epoch": 1.8594890279346625, "grad_norm": 0.345703125, "learning_rate": 4.487368009138631e-06, "loss": 0.422, "step": 73255 }, { "epoch": 1.8596159459836783, "grad_norm": 0.341796875, "learning_rate": 4.47930498650112e-06, "loss": 0.3854, "step": 73260 }, { "epoch": 1.859742864032694, "grad_norm": 0.333984375, "learning_rate": 4.471249104506153e-06, "loss": 0.4161, "step": 73265 }, { "epoch": 1.8598697820817098, "grad_norm": 0.34375, "learning_rate": 4.46320036354903e-06, "loss": 0.4078, "step": 73270 }, { "epoch": 1.8599967001307256, "grad_norm": 0.345703125, "learning_rate": 4.455158764024669e-06, "loss": 0.4199, "step": 73275 }, { "epoch": 1.8601236181797414, "grad_norm": 0.357421875, "learning_rate": 4.447124306327704e-06, "loss": 0.3879, "step": 73280 }, { "epoch": 1.8602505362287571, "grad_norm": 0.349609375, "learning_rate": 4.43909699085237e-06, "loss": 0.4153, "step": 73285 }, { "epoch": 1.860377454277773, "grad_norm": 0.365234375, "learning_rate": 4.431076817992568e-06, "loss": 0.4305, "step": 73290 }, { "epoch": 1.8605043723267887, "grad_norm": 0.35546875, "learning_rate": 4.423063788141834e-06, "loss": 0.4032, "step": 73295 }, { "epoch": 1.8606312903758044, "grad_norm": 0.384765625, "learning_rate": 4.415057901693386e-06, "loss": 0.4323, "step": 73300 }, { "epoch": 1.8607582084248202, "grad_norm": 0.345703125, "learning_rate": 4.407059159040044e-06, "loss": 0.4418, "step": 73305 }, { "epoch": 1.860885126473836, "grad_norm": 0.337890625, "learning_rate": 4.3990675605743095e-06, "loss": 0.4301, "step": 73310 }, { "epoch": 1.8610120445228517, "grad_norm": 0.349609375, "learning_rate": 4.39108310668832e-06, "loss": 0.4182, "step": 73315 }, { "epoch": 1.8611389625718675, "grad_norm": 0.33984375, "learning_rate": 4.383105797773895e-06, "loss": 0.4135, "step": 73320 }, { "epoch": 1.861265880620883, "grad_norm": 0.375, "learning_rate": 4.375135634222454e-06, "loss": 0.4404, "step": 73325 }, { "epoch": 1.8613927986698988, "grad_norm": 0.322265625, "learning_rate": 4.367172616425102e-06, "loss": 0.4033, "step": 73330 }, { "epoch": 1.8615197167189146, "grad_norm": 0.37109375, "learning_rate": 4.359216744772593e-06, "loss": 0.4257, "step": 73335 }, { "epoch": 1.8616466347679304, "grad_norm": 0.359375, "learning_rate": 4.351268019655279e-06, "loss": 0.4269, "step": 73340 }, { "epoch": 1.8617735528169461, "grad_norm": 0.32421875, "learning_rate": 4.343326441463235e-06, "loss": 0.4169, "step": 73345 }, { "epoch": 1.8619004708659619, "grad_norm": 0.359375, "learning_rate": 4.335392010586147e-06, "loss": 0.4017, "step": 73350 }, { "epoch": 1.8620273889149774, "grad_norm": 0.35546875, "learning_rate": 4.327464727413355e-06, "loss": 0.3914, "step": 73355 }, { "epoch": 1.8621543069639932, "grad_norm": 0.302734375, "learning_rate": 4.319544592333829e-06, "loss": 0.4144, "step": 73360 }, { "epoch": 1.862281225013009, "grad_norm": 0.326171875, "learning_rate": 4.311631605736226e-06, "loss": 0.4232, "step": 73365 }, { "epoch": 1.8624081430620247, "grad_norm": 0.345703125, "learning_rate": 4.3037257680088365e-06, "loss": 0.4056, "step": 73370 }, { "epoch": 1.8625350611110405, "grad_norm": 0.341796875, "learning_rate": 4.295827079539582e-06, "loss": 0.4249, "step": 73375 }, { "epoch": 1.8626619791600563, "grad_norm": 0.345703125, "learning_rate": 4.2879355407160366e-06, "loss": 0.4136, "step": 73380 }, { "epoch": 1.862788897209072, "grad_norm": 0.349609375, "learning_rate": 4.2800511519254735e-06, "loss": 0.4008, "step": 73385 }, { "epoch": 1.8629158152580878, "grad_norm": 0.322265625, "learning_rate": 4.272173913554716e-06, "loss": 0.3969, "step": 73390 }, { "epoch": 1.8630427333071036, "grad_norm": 0.318359375, "learning_rate": 4.264303825990373e-06, "loss": 0.4257, "step": 73395 }, { "epoch": 1.8631696513561193, "grad_norm": 0.35546875, "learning_rate": 4.256440889618568e-06, "loss": 0.4144, "step": 73400 }, { "epoch": 1.863296569405135, "grad_norm": 0.41796875, "learning_rate": 4.248585104825159e-06, "loss": 0.4254, "step": 73405 }, { "epoch": 1.8634234874541509, "grad_norm": 0.33984375, "learning_rate": 4.240736471995621e-06, "loss": 0.4352, "step": 73410 }, { "epoch": 1.8635504055031666, "grad_norm": 0.353515625, "learning_rate": 4.232894991515079e-06, "loss": 0.412, "step": 73415 }, { "epoch": 1.8636773235521824, "grad_norm": 0.380859375, "learning_rate": 4.225060663768326e-06, "loss": 0.404, "step": 73420 }, { "epoch": 1.8638042416011982, "grad_norm": 0.34375, "learning_rate": 4.2172334891397705e-06, "loss": 0.4123, "step": 73425 }, { "epoch": 1.863931159650214, "grad_norm": 0.330078125, "learning_rate": 4.209413468013506e-06, "loss": 0.4057, "step": 73430 }, { "epoch": 1.8640580776992297, "grad_norm": 0.357421875, "learning_rate": 4.201600600773241e-06, "loss": 0.4292, "step": 73435 }, { "epoch": 1.8641849957482455, "grad_norm": 0.36328125, "learning_rate": 4.193794887802354e-06, "loss": 0.4122, "step": 73440 }, { "epoch": 1.8643119137972612, "grad_norm": 0.36328125, "learning_rate": 4.185996329483871e-06, "loss": 0.4136, "step": 73445 }, { "epoch": 1.864438831846277, "grad_norm": 0.37109375, "learning_rate": 4.178204926200468e-06, "loss": 0.4716, "step": 73450 }, { "epoch": 1.8645657498952926, "grad_norm": 0.353515625, "learning_rate": 4.1704206783344744e-06, "loss": 0.4289, "step": 73455 }, { "epoch": 1.8646926679443083, "grad_norm": 0.380859375, "learning_rate": 4.162643586267833e-06, "loss": 0.4485, "step": 73460 }, { "epoch": 1.864819585993324, "grad_norm": 0.353515625, "learning_rate": 4.154873650382173e-06, "loss": 0.4447, "step": 73465 }, { "epoch": 1.8649465040423399, "grad_norm": 0.330078125, "learning_rate": 4.147110871058773e-06, "loss": 0.3845, "step": 73470 }, { "epoch": 1.8650734220913556, "grad_norm": 0.35546875, "learning_rate": 4.1393552486785445e-06, "loss": 0.3968, "step": 73475 }, { "epoch": 1.8652003401403714, "grad_norm": 0.3359375, "learning_rate": 4.131606783622049e-06, "loss": 0.409, "step": 73480 }, { "epoch": 1.8653272581893872, "grad_norm": 0.337890625, "learning_rate": 4.1238654762695e-06, "loss": 0.4108, "step": 73485 }, { "epoch": 1.8654541762384027, "grad_norm": 0.341796875, "learning_rate": 4.11613132700076e-06, "loss": 0.4139, "step": 73490 }, { "epoch": 1.8655810942874185, "grad_norm": 0.33203125, "learning_rate": 4.108404336195359e-06, "loss": 0.4258, "step": 73495 }, { "epoch": 1.8657080123364342, "grad_norm": 0.373046875, "learning_rate": 4.100684504232443e-06, "loss": 0.4214, "step": 73500 }, { "epoch": 1.86583493038545, "grad_norm": 0.376953125, "learning_rate": 4.09297183149081e-06, "loss": 0.4119, "step": 73505 }, { "epoch": 1.8659618484344658, "grad_norm": 0.341796875, "learning_rate": 4.08526631834894e-06, "loss": 0.4144, "step": 73510 }, { "epoch": 1.8660887664834815, "grad_norm": 0.333984375, "learning_rate": 4.0775679651849305e-06, "loss": 0.4004, "step": 73515 }, { "epoch": 1.8662156845324973, "grad_norm": 0.390625, "learning_rate": 4.069876772376513e-06, "loss": 0.429, "step": 73520 }, { "epoch": 1.866342602581513, "grad_norm": 0.349609375, "learning_rate": 4.062192740301118e-06, "loss": 0.4013, "step": 73525 }, { "epoch": 1.8664695206305288, "grad_norm": 0.33984375, "learning_rate": 4.054515869335795e-06, "loss": 0.4364, "step": 73530 }, { "epoch": 1.8665964386795446, "grad_norm": 0.33984375, "learning_rate": 4.0468461598572585e-06, "loss": 0.4001, "step": 73535 }, { "epoch": 1.8667233567285604, "grad_norm": 0.333984375, "learning_rate": 4.039183612241825e-06, "loss": 0.4296, "step": 73540 }, { "epoch": 1.8668502747775761, "grad_norm": 0.3671875, "learning_rate": 4.03152822686551e-06, "loss": 0.4016, "step": 73545 }, { "epoch": 1.866977192826592, "grad_norm": 0.365234375, "learning_rate": 4.02388000410398e-06, "loss": 0.424, "step": 73550 }, { "epoch": 1.8671041108756077, "grad_norm": 0.361328125, "learning_rate": 4.016238944332484e-06, "loss": 0.4158, "step": 73555 }, { "epoch": 1.8672310289246234, "grad_norm": 0.35546875, "learning_rate": 4.008605047925989e-06, "loss": 0.4426, "step": 73560 }, { "epoch": 1.8673579469736392, "grad_norm": 0.376953125, "learning_rate": 4.000978315259112e-06, "loss": 0.439, "step": 73565 }, { "epoch": 1.867484865022655, "grad_norm": 0.353515625, "learning_rate": 3.993358746706054e-06, "loss": 0.4092, "step": 73570 }, { "epoch": 1.8676117830716708, "grad_norm": 0.361328125, "learning_rate": 3.985746342640733e-06, "loss": 0.4297, "step": 73575 }, { "epoch": 1.8677387011206865, "grad_norm": 0.34765625, "learning_rate": 3.9781411034366825e-06, "loss": 0.4191, "step": 73580 }, { "epoch": 1.8678656191697023, "grad_norm": 0.376953125, "learning_rate": 3.970543029467071e-06, "loss": 0.4476, "step": 73585 }, { "epoch": 1.8679925372187178, "grad_norm": 0.35546875, "learning_rate": 3.962952121104734e-06, "loss": 0.4311, "step": 73590 }, { "epoch": 1.8681194552677336, "grad_norm": 0.35546875, "learning_rate": 3.955368378722173e-06, "loss": 0.4026, "step": 73595 }, { "epoch": 1.8682463733167494, "grad_norm": 0.33984375, "learning_rate": 3.947791802691508e-06, "loss": 0.4175, "step": 73600 }, { "epoch": 1.8683732913657651, "grad_norm": 0.373046875, "learning_rate": 3.9402223933845235e-06, "loss": 0.4413, "step": 73605 }, { "epoch": 1.868500209414781, "grad_norm": 0.365234375, "learning_rate": 3.932660151172639e-06, "loss": 0.4226, "step": 73610 }, { "epoch": 1.8686271274637967, "grad_norm": 0.3515625, "learning_rate": 3.92510507642696e-06, "loss": 0.4224, "step": 73615 }, { "epoch": 1.8687540455128122, "grad_norm": 0.37890625, "learning_rate": 3.917557169518171e-06, "loss": 0.4335, "step": 73620 }, { "epoch": 1.868880963561828, "grad_norm": 0.35546875, "learning_rate": 3.9100164308166595e-06, "loss": 0.3898, "step": 73625 }, { "epoch": 1.8690078816108437, "grad_norm": 0.359375, "learning_rate": 3.9024828606924465e-06, "loss": 0.42, "step": 73630 }, { "epoch": 1.8691347996598595, "grad_norm": 0.34375, "learning_rate": 3.894956459515186e-06, "loss": 0.4139, "step": 73635 }, { "epoch": 1.8692617177088753, "grad_norm": 0.33203125, "learning_rate": 3.887437227654233e-06, "loss": 0.399, "step": 73640 }, { "epoch": 1.869388635757891, "grad_norm": 0.349609375, "learning_rate": 3.879925165478526e-06, "loss": 0.4171, "step": 73645 }, { "epoch": 1.8695155538069068, "grad_norm": 0.34765625, "learning_rate": 3.872420273356702e-06, "loss": 0.4305, "step": 73650 }, { "epoch": 1.8696424718559226, "grad_norm": 0.333984375, "learning_rate": 3.864922551656985e-06, "loss": 0.4236, "step": 73655 }, { "epoch": 1.8697693899049384, "grad_norm": 0.33984375, "learning_rate": 3.857432000747312e-06, "loss": 0.4425, "step": 73660 }, { "epoch": 1.8698963079539541, "grad_norm": 0.349609375, "learning_rate": 3.849948620995242e-06, "loss": 0.436, "step": 73665 }, { "epoch": 1.8700232260029699, "grad_norm": 0.34375, "learning_rate": 3.842472412767977e-06, "loss": 0.4183, "step": 73670 }, { "epoch": 1.8701501440519857, "grad_norm": 0.373046875, "learning_rate": 3.8350033764323604e-06, "loss": 0.4063, "step": 73675 }, { "epoch": 1.8702770621010014, "grad_norm": 0.36328125, "learning_rate": 3.8275415123549144e-06, "loss": 0.4352, "step": 73680 }, { "epoch": 1.8704039801500172, "grad_norm": 0.357421875, "learning_rate": 3.82008682090178e-06, "loss": 0.4265, "step": 73685 }, { "epoch": 1.870530898199033, "grad_norm": 0.33203125, "learning_rate": 3.8126393024387637e-06, "loss": 0.3986, "step": 73690 }, { "epoch": 1.8706578162480487, "grad_norm": 0.34375, "learning_rate": 3.80519895733129e-06, "loss": 0.4413, "step": 73695 }, { "epoch": 1.8707847342970645, "grad_norm": 0.3828125, "learning_rate": 3.7977657859444677e-06, "loss": 0.451, "step": 73700 }, { "epoch": 1.8709116523460803, "grad_norm": 0.33984375, "learning_rate": 3.790339788643054e-06, "loss": 0.4028, "step": 73705 }, { "epoch": 1.871038570395096, "grad_norm": 0.365234375, "learning_rate": 3.782920965791425e-06, "loss": 0.4163, "step": 73710 }, { "epoch": 1.8711654884441118, "grad_norm": 0.353515625, "learning_rate": 3.775509317753589e-06, "loss": 0.4067, "step": 73715 }, { "epoch": 1.8712924064931273, "grad_norm": 0.318359375, "learning_rate": 3.7681048448932883e-06, "loss": 0.4078, "step": 73720 }, { "epoch": 1.871419324542143, "grad_norm": 0.33984375, "learning_rate": 3.7607075475738324e-06, "loss": 0.3935, "step": 73725 }, { "epoch": 1.8715462425911589, "grad_norm": 0.369140625, "learning_rate": 3.753317426158181e-06, "loss": 0.4424, "step": 73730 }, { "epoch": 1.8716731606401746, "grad_norm": 0.34765625, "learning_rate": 3.7459344810090108e-06, "loss": 0.4222, "step": 73735 }, { "epoch": 1.8718000786891904, "grad_norm": 0.361328125, "learning_rate": 3.738558712488582e-06, "loss": 0.4033, "step": 73740 }, { "epoch": 1.8719269967382062, "grad_norm": 0.322265625, "learning_rate": 3.7311901209588046e-06, "loss": 0.4091, "step": 73745 }, { "epoch": 1.872053914787222, "grad_norm": 0.326171875, "learning_rate": 3.7238287067812735e-06, "loss": 0.4033, "step": 73750 }, { "epoch": 1.8721808328362375, "grad_norm": 0.275390625, "learning_rate": 3.7164744703171823e-06, "loss": 0.3994, "step": 73755 }, { "epoch": 1.8723077508852533, "grad_norm": 0.359375, "learning_rate": 3.709127411927443e-06, "loss": 0.4222, "step": 73760 }, { "epoch": 1.872434668934269, "grad_norm": 0.3359375, "learning_rate": 3.7017875319725332e-06, "loss": 0.4173, "step": 73765 }, { "epoch": 1.8725615869832848, "grad_norm": 0.359375, "learning_rate": 3.694454830812632e-06, "loss": 0.4086, "step": 73770 }, { "epoch": 1.8726885050323006, "grad_norm": 0.34765625, "learning_rate": 3.6871293088075517e-06, "loss": 0.4021, "step": 73775 }, { "epoch": 1.8728154230813163, "grad_norm": 0.373046875, "learning_rate": 3.6798109663167375e-06, "loss": 0.4483, "step": 73780 }, { "epoch": 1.872942341130332, "grad_norm": 0.349609375, "learning_rate": 3.672499803699336e-06, "loss": 0.4103, "step": 73785 }, { "epoch": 1.8730692591793479, "grad_norm": 0.380859375, "learning_rate": 3.6651958213140765e-06, "loss": 0.4424, "step": 73790 }, { "epoch": 1.8731961772283636, "grad_norm": 0.353515625, "learning_rate": 3.6578990195193723e-06, "loss": 0.418, "step": 73795 }, { "epoch": 1.8733230952773794, "grad_norm": 0.345703125, "learning_rate": 3.650609398673271e-06, "loss": 0.3931, "step": 73800 }, { "epoch": 1.8734500133263952, "grad_norm": 0.37109375, "learning_rate": 3.6433269591334356e-06, "loss": 0.4227, "step": 73805 }, { "epoch": 1.873576931375411, "grad_norm": 0.33984375, "learning_rate": 3.636051701257281e-06, "loss": 0.4156, "step": 73810 }, { "epoch": 1.8737038494244267, "grad_norm": 0.337890625, "learning_rate": 3.628783625401754e-06, "loss": 0.4158, "step": 73815 }, { "epoch": 1.8738307674734425, "grad_norm": 0.359375, "learning_rate": 3.621522731923521e-06, "loss": 0.4257, "step": 73820 }, { "epoch": 1.8739576855224582, "grad_norm": 0.35546875, "learning_rate": 3.6142690211788616e-06, "loss": 0.4191, "step": 73825 }, { "epoch": 1.874084603571474, "grad_norm": 0.353515625, "learning_rate": 3.6070224935236926e-06, "loss": 0.4418, "step": 73830 }, { "epoch": 1.8742115216204898, "grad_norm": 0.37890625, "learning_rate": 3.5997831493136288e-06, "loss": 0.3922, "step": 73835 }, { "epoch": 1.8743384396695055, "grad_norm": 0.318359375, "learning_rate": 3.592550988903886e-06, "loss": 0.4027, "step": 73840 }, { "epoch": 1.8744653577185213, "grad_norm": 0.349609375, "learning_rate": 3.585326012649331e-06, "loss": 0.4063, "step": 73845 }, { "epoch": 1.874592275767537, "grad_norm": 0.34375, "learning_rate": 3.5781082209045133e-06, "loss": 0.4072, "step": 73850 }, { "epoch": 1.8747191938165526, "grad_norm": 0.337890625, "learning_rate": 3.5708976140235988e-06, "loss": 0.4179, "step": 73855 }, { "epoch": 1.8748461118655684, "grad_norm": 0.353515625, "learning_rate": 3.5636941923604222e-06, "loss": 0.4012, "step": 73860 }, { "epoch": 1.8749730299145841, "grad_norm": 0.330078125, "learning_rate": 3.5564979562684336e-06, "loss": 0.4162, "step": 73865 }, { "epoch": 1.8750999479636, "grad_norm": 0.37109375, "learning_rate": 3.5493089061007507e-06, "loss": 0.3969, "step": 73870 }, { "epoch": 1.8752268660126157, "grad_norm": 0.365234375, "learning_rate": 3.5421270422101578e-06, "loss": 0.429, "step": 73875 }, { "epoch": 1.8753537840616314, "grad_norm": 0.337890625, "learning_rate": 3.5349523649490395e-06, "loss": 0.4052, "step": 73880 }, { "epoch": 1.875480702110647, "grad_norm": 0.35546875, "learning_rate": 3.527784874669448e-06, "loss": 0.3979, "step": 73885 }, { "epoch": 1.8756076201596628, "grad_norm": 0.3515625, "learning_rate": 3.520624571723135e-06, "loss": 0.404, "step": 73890 }, { "epoch": 1.8757345382086785, "grad_norm": 0.337890625, "learning_rate": 3.513471456461403e-06, "loss": 0.4106, "step": 73895 }, { "epoch": 1.8758614562576943, "grad_norm": 0.3359375, "learning_rate": 3.5063255292352875e-06, "loss": 0.4158, "step": 73900 }, { "epoch": 1.87598837430671, "grad_norm": 0.30078125, "learning_rate": 3.499186790395425e-06, "loss": 0.4092, "step": 73905 }, { "epoch": 1.8761152923557258, "grad_norm": 0.341796875, "learning_rate": 3.492055240292102e-06, "loss": 0.4005, "step": 73910 }, { "epoch": 1.8762422104047416, "grad_norm": 0.353515625, "learning_rate": 3.484930879275272e-06, "loss": 0.4239, "step": 73915 }, { "epoch": 1.8763691284537574, "grad_norm": 0.37109375, "learning_rate": 3.477813707694521e-06, "loss": 0.4324, "step": 73920 }, { "epoch": 1.8764960465027731, "grad_norm": 0.3515625, "learning_rate": 3.4707037258991042e-06, "loss": 0.4087, "step": 73925 }, { "epoch": 1.876622964551789, "grad_norm": 0.34375, "learning_rate": 3.463600934237859e-06, "loss": 0.3837, "step": 73930 }, { "epoch": 1.8767498826008047, "grad_norm": 0.33203125, "learning_rate": 3.4565053330593563e-06, "loss": 0.4256, "step": 73935 }, { "epoch": 1.8768768006498204, "grad_norm": 0.34375, "learning_rate": 3.4494169227117674e-06, "loss": 0.4251, "step": 73940 }, { "epoch": 1.8770037186988362, "grad_norm": 0.3515625, "learning_rate": 3.4423357035429143e-06, "loss": 0.4352, "step": 73945 }, { "epoch": 1.877130636747852, "grad_norm": 0.345703125, "learning_rate": 3.435261675900253e-06, "loss": 0.3847, "step": 73950 }, { "epoch": 1.8772575547968677, "grad_norm": 0.337890625, "learning_rate": 3.4281948401309377e-06, "loss": 0.3991, "step": 73955 }, { "epoch": 1.8773844728458835, "grad_norm": 0.33984375, "learning_rate": 3.4211351965816926e-06, "loss": 0.4032, "step": 73960 }, { "epoch": 1.8775113908948993, "grad_norm": 0.359375, "learning_rate": 3.4140827455989726e-06, "loss": 0.3867, "step": 73965 }, { "epoch": 1.877638308943915, "grad_norm": 0.333984375, "learning_rate": 3.4070374875288186e-06, "loss": 0.4245, "step": 73970 }, { "epoch": 1.8777652269929308, "grad_norm": 0.37109375, "learning_rate": 3.39999942271692e-06, "loss": 0.4377, "step": 73975 }, { "epoch": 1.8778921450419466, "grad_norm": 0.3515625, "learning_rate": 3.392968551508668e-06, "loss": 0.4187, "step": 73980 }, { "epoch": 1.8780190630909621, "grad_norm": 0.35546875, "learning_rate": 3.385944874249069e-06, "loss": 0.3961, "step": 73985 }, { "epoch": 1.8781459811399779, "grad_norm": 0.373046875, "learning_rate": 3.3789283912827313e-06, "loss": 0.4297, "step": 73990 }, { "epoch": 1.8782728991889936, "grad_norm": 0.349609375, "learning_rate": 3.3719191029539792e-06, "loss": 0.4168, "step": 73995 }, { "epoch": 1.8783998172380094, "grad_norm": 0.337890625, "learning_rate": 3.3649170096067546e-06, "loss": 0.4264, "step": 74000 }, { "epoch": 1.8785267352870252, "grad_norm": 0.34375, "learning_rate": 3.3579221115846322e-06, "loss": 0.4292, "step": 74005 }, { "epoch": 1.878653653336041, "grad_norm": 0.34375, "learning_rate": 3.3509344092308543e-06, "loss": 0.4193, "step": 74010 }, { "epoch": 1.8787805713850567, "grad_norm": 0.3046875, "learning_rate": 3.34395390288833e-06, "loss": 0.394, "step": 74015 }, { "epoch": 1.8789074894340723, "grad_norm": 0.35546875, "learning_rate": 3.336980592899552e-06, "loss": 0.4443, "step": 74020 }, { "epoch": 1.879034407483088, "grad_norm": 0.380859375, "learning_rate": 3.330014479606713e-06, "loss": 0.4591, "step": 74025 }, { "epoch": 1.8791613255321038, "grad_norm": 0.384765625, "learning_rate": 3.3230555633516396e-06, "loss": 0.4492, "step": 74030 }, { "epoch": 1.8792882435811196, "grad_norm": 0.328125, "learning_rate": 3.3161038444757927e-06, "loss": 0.4039, "step": 74035 }, { "epoch": 1.8794151616301353, "grad_norm": 0.328125, "learning_rate": 3.309159323320315e-06, "loss": 0.3973, "step": 74040 }, { "epoch": 1.879542079679151, "grad_norm": 0.337890625, "learning_rate": 3.3022220002259512e-06, "loss": 0.4054, "step": 74045 }, { "epoch": 1.8796689977281669, "grad_norm": 0.34375, "learning_rate": 3.2952918755331127e-06, "loss": 0.3847, "step": 74050 }, { "epoch": 1.8797959157771826, "grad_norm": 0.3359375, "learning_rate": 3.2883689495818765e-06, "loss": 0.4203, "step": 74055 }, { "epoch": 1.8799228338261984, "grad_norm": 0.36328125, "learning_rate": 3.281453222711938e-06, "loss": 0.4487, "step": 74060 }, { "epoch": 1.8800497518752142, "grad_norm": 0.34375, "learning_rate": 3.2745446952626266e-06, "loss": 0.4375, "step": 74065 }, { "epoch": 1.88017666992423, "grad_norm": 0.337890625, "learning_rate": 3.2676433675729697e-06, "loss": 0.402, "step": 74070 }, { "epoch": 1.8803035879732457, "grad_norm": 0.36328125, "learning_rate": 3.2607492399816137e-06, "loss": 0.4237, "step": 74075 }, { "epoch": 1.8804305060222615, "grad_norm": 0.322265625, "learning_rate": 3.2538623128268384e-06, "loss": 0.373, "step": 74080 }, { "epoch": 1.8805574240712772, "grad_norm": 0.36328125, "learning_rate": 3.2469825864465724e-06, "loss": 0.42, "step": 74085 }, { "epoch": 1.880684342120293, "grad_norm": 0.349609375, "learning_rate": 3.2401100611784136e-06, "loss": 0.403, "step": 74090 }, { "epoch": 1.8808112601693088, "grad_norm": 0.341796875, "learning_rate": 3.2332447373595915e-06, "loss": 0.4373, "step": 74095 }, { "epoch": 1.8809381782183245, "grad_norm": 0.34375, "learning_rate": 3.226386615326987e-06, "loss": 0.3966, "step": 74100 }, { "epoch": 1.8810650962673403, "grad_norm": 0.33984375, "learning_rate": 3.21953569541713e-06, "loss": 0.4363, "step": 74105 }, { "epoch": 1.881192014316356, "grad_norm": 0.353515625, "learning_rate": 3.212691977966203e-06, "loss": 0.4181, "step": 74110 }, { "epoch": 1.8813189323653718, "grad_norm": 0.337890625, "learning_rate": 3.2058554633099865e-06, "loss": 0.412, "step": 74115 }, { "epoch": 1.8814458504143874, "grad_norm": 0.326171875, "learning_rate": 3.199026151783979e-06, "loss": 0.3866, "step": 74120 }, { "epoch": 1.8815727684634032, "grad_norm": 0.369140625, "learning_rate": 3.1922040437232955e-06, "loss": 0.4307, "step": 74125 }, { "epoch": 1.881699686512419, "grad_norm": 0.353515625, "learning_rate": 3.1853891394626516e-06, "loss": 0.4087, "step": 74130 }, { "epoch": 1.8818266045614347, "grad_norm": 0.296875, "learning_rate": 3.1785814393364805e-06, "loss": 0.3884, "step": 74135 }, { "epoch": 1.8819535226104505, "grad_norm": 0.3359375, "learning_rate": 3.1717809436788476e-06, "loss": 0.4319, "step": 74140 }, { "epoch": 1.8820804406594662, "grad_norm": 0.345703125, "learning_rate": 3.164987652823436e-06, "loss": 0.4238, "step": 74145 }, { "epoch": 1.8822073587084818, "grad_norm": 0.33203125, "learning_rate": 3.1582015671035963e-06, "loss": 0.4084, "step": 74150 }, { "epoch": 1.8823342767574975, "grad_norm": 0.353515625, "learning_rate": 3.151422686852312e-06, "loss": 0.4183, "step": 74155 }, { "epoch": 1.8824611948065133, "grad_norm": 0.3515625, "learning_rate": 3.1446510124021995e-06, "loss": 0.4506, "step": 74160 }, { "epoch": 1.882588112855529, "grad_norm": 0.333984375, "learning_rate": 3.137886544085577e-06, "loss": 0.4085, "step": 74165 }, { "epoch": 1.8827150309045448, "grad_norm": 0.341796875, "learning_rate": 3.1311292822343627e-06, "loss": 0.4151, "step": 74170 }, { "epoch": 1.8828419489535606, "grad_norm": 0.3359375, "learning_rate": 3.124379227180124e-06, "loss": 0.4188, "step": 74175 }, { "epoch": 1.8829688670025764, "grad_norm": 0.318359375, "learning_rate": 3.117636379254096e-06, "loss": 0.3934, "step": 74180 }, { "epoch": 1.8830957850515921, "grad_norm": 0.3515625, "learning_rate": 3.110900738787148e-06, "loss": 0.3947, "step": 74185 }, { "epoch": 1.883222703100608, "grad_norm": 0.3359375, "learning_rate": 3.1041723061097814e-06, "loss": 0.4375, "step": 74190 }, { "epoch": 1.8833496211496237, "grad_norm": 0.337890625, "learning_rate": 3.097451081552166e-06, "loss": 0.3976, "step": 74195 }, { "epoch": 1.8834765391986394, "grad_norm": 0.361328125, "learning_rate": 3.0907370654441045e-06, "loss": 0.4134, "step": 74200 }, { "epoch": 1.8836034572476552, "grad_norm": 0.3515625, "learning_rate": 3.08403025811505e-06, "loss": 0.43, "step": 74205 }, { "epoch": 1.883730375296671, "grad_norm": 0.345703125, "learning_rate": 3.077330659894106e-06, "loss": 0.4343, "step": 74210 }, { "epoch": 1.8838572933456867, "grad_norm": 0.345703125, "learning_rate": 3.070638271110043e-06, "loss": 0.4368, "step": 74215 }, { "epoch": 1.8839842113947025, "grad_norm": 0.32421875, "learning_rate": 3.0639530920911982e-06, "loss": 0.4062, "step": 74220 }, { "epoch": 1.8841111294437183, "grad_norm": 0.3828125, "learning_rate": 3.0572751231656754e-06, "loss": 0.401, "step": 74225 }, { "epoch": 1.884238047492734, "grad_norm": 0.34375, "learning_rate": 3.05060436466113e-06, "loss": 0.4063, "step": 74230 }, { "epoch": 1.8843649655417498, "grad_norm": 0.330078125, "learning_rate": 3.0439408169048828e-06, "loss": 0.3986, "step": 74235 }, { "epoch": 1.8844918835907656, "grad_norm": 0.3125, "learning_rate": 3.0372844802239226e-06, "loss": 0.3679, "step": 74240 }, { "epoch": 1.8846188016397813, "grad_norm": 0.3359375, "learning_rate": 3.0306353549448716e-06, "loss": 0.4081, "step": 74245 }, { "epoch": 1.884745719688797, "grad_norm": 0.33203125, "learning_rate": 3.0239934413940016e-06, "loss": 0.429, "step": 74250 }, { "epoch": 1.8848726377378127, "grad_norm": 0.376953125, "learning_rate": 3.017358739897219e-06, "loss": 0.4427, "step": 74255 }, { "epoch": 1.8849995557868284, "grad_norm": 0.44140625, "learning_rate": 3.010731250780096e-06, "loss": 0.4018, "step": 74260 }, { "epoch": 1.8851264738358442, "grad_norm": 0.3515625, "learning_rate": 3.0041109743678405e-06, "loss": 0.4567, "step": 74265 }, { "epoch": 1.88525339188486, "grad_norm": 0.322265625, "learning_rate": 2.997497910985308e-06, "loss": 0.3918, "step": 74270 }, { "epoch": 1.8853803099338757, "grad_norm": 0.369140625, "learning_rate": 2.9908920609570062e-06, "loss": 0.4178, "step": 74275 }, { "epoch": 1.8855072279828915, "grad_norm": 0.3515625, "learning_rate": 2.9842934246070425e-06, "loss": 0.4017, "step": 74280 }, { "epoch": 1.885634146031907, "grad_norm": 0.36328125, "learning_rate": 2.977702002259258e-06, "loss": 0.4107, "step": 74285 }, { "epoch": 1.8857610640809228, "grad_norm": 0.33984375, "learning_rate": 2.97111779423706e-06, "loss": 0.4363, "step": 74290 }, { "epoch": 1.8858879821299386, "grad_norm": 0.359375, "learning_rate": 2.9645408008635407e-06, "loss": 0.4172, "step": 74295 }, { "epoch": 1.8860149001789543, "grad_norm": 0.337890625, "learning_rate": 2.9579710224614585e-06, "loss": 0.4322, "step": 74300 }, { "epoch": 1.8861418182279701, "grad_norm": 0.37109375, "learning_rate": 2.9514084593531553e-06, "loss": 0.4027, "step": 74305 }, { "epoch": 1.8862687362769859, "grad_norm": 0.33984375, "learning_rate": 2.944853111860657e-06, "loss": 0.4163, "step": 74310 }, { "epoch": 1.8863956543260016, "grad_norm": 0.349609375, "learning_rate": 2.9383049803056393e-06, "loss": 0.4267, "step": 74315 }, { "epoch": 1.8865225723750174, "grad_norm": 0.34375, "learning_rate": 2.9317640650094122e-06, "loss": 0.4227, "step": 74320 }, { "epoch": 1.8866494904240332, "grad_norm": 0.34375, "learning_rate": 2.925230366292952e-06, "loss": 0.4476, "step": 74325 }, { "epoch": 1.886776408473049, "grad_norm": 0.33984375, "learning_rate": 2.918703884476853e-06, "loss": 0.398, "step": 74330 }, { "epoch": 1.8869033265220647, "grad_norm": 0.33984375, "learning_rate": 2.912184619881358e-06, "loss": 0.4137, "step": 74335 }, { "epoch": 1.8870302445710805, "grad_norm": 0.361328125, "learning_rate": 2.9056725728263785e-06, "loss": 0.41, "step": 74340 }, { "epoch": 1.8871571626200963, "grad_norm": 0.333984375, "learning_rate": 2.8991677436314255e-06, "loss": 0.4442, "step": 74345 }, { "epoch": 1.887284080669112, "grad_norm": 0.345703125, "learning_rate": 2.892670132615743e-06, "loss": 0.4093, "step": 74350 }, { "epoch": 1.8874109987181278, "grad_norm": 0.357421875, "learning_rate": 2.886179740098127e-06, "loss": 0.4434, "step": 74355 }, { "epoch": 1.8875379167671436, "grad_norm": 0.3203125, "learning_rate": 2.879696566397072e-06, "loss": 0.3859, "step": 74360 }, { "epoch": 1.8876648348161593, "grad_norm": 0.33203125, "learning_rate": 2.8732206118307066e-06, "loss": 0.4213, "step": 74365 }, { "epoch": 1.887791752865175, "grad_norm": 0.27734375, "learning_rate": 2.866751876716794e-06, "loss": 0.3782, "step": 74370 }, { "epoch": 1.8879186709141909, "grad_norm": 0.375, "learning_rate": 2.8602903613727466e-06, "loss": 0.4413, "step": 74375 }, { "epoch": 1.8880455889632066, "grad_norm": 0.37109375, "learning_rate": 2.8538360661156614e-06, "loss": 0.4141, "step": 74380 }, { "epoch": 1.8881725070122222, "grad_norm": 0.359375, "learning_rate": 2.8473889912622173e-06, "loss": 0.4318, "step": 74385 }, { "epoch": 1.888299425061238, "grad_norm": 0.353515625, "learning_rate": 2.8409491371287787e-06, "loss": 0.3983, "step": 74390 }, { "epoch": 1.8884263431102537, "grad_norm": 0.33203125, "learning_rate": 2.8345165040313587e-06, "loss": 0.375, "step": 74395 }, { "epoch": 1.8885532611592695, "grad_norm": 0.369140625, "learning_rate": 2.8280910922855725e-06, "loss": 0.4316, "step": 74400 }, { "epoch": 1.8886801792082852, "grad_norm": 0.357421875, "learning_rate": 2.8216729022067508e-06, "loss": 0.4147, "step": 74405 }, { "epoch": 1.888807097257301, "grad_norm": 0.357421875, "learning_rate": 2.8152619341097748e-06, "loss": 0.4259, "step": 74410 }, { "epoch": 1.8889340153063165, "grad_norm": 0.38671875, "learning_rate": 2.808858188309293e-06, "loss": 0.4252, "step": 74415 }, { "epoch": 1.8890609333553323, "grad_norm": 0.357421875, "learning_rate": 2.802461665119521e-06, "loss": 0.4066, "step": 74420 }, { "epoch": 1.889187851404348, "grad_norm": 0.3671875, "learning_rate": 2.796072364854307e-06, "loss": 0.4127, "step": 74425 }, { "epoch": 1.8893147694533639, "grad_norm": 0.32421875, "learning_rate": 2.789690287827184e-06, "loss": 0.4001, "step": 74430 }, { "epoch": 1.8894416875023796, "grad_norm": 0.35546875, "learning_rate": 2.7833154343513187e-06, "loss": 0.4217, "step": 74435 }, { "epoch": 1.8895686055513954, "grad_norm": 0.353515625, "learning_rate": 2.7769478047395265e-06, "loss": 0.44, "step": 74440 }, { "epoch": 1.8896955236004112, "grad_norm": 0.357421875, "learning_rate": 2.7705873993042584e-06, "loss": 0.4082, "step": 74445 }, { "epoch": 1.889822441649427, "grad_norm": 0.3671875, "learning_rate": 2.7642342183575974e-06, "loss": 0.4019, "step": 74450 }, { "epoch": 1.8899493596984427, "grad_norm": 0.333984375, "learning_rate": 2.757888262211344e-06, "loss": 0.4038, "step": 74455 }, { "epoch": 1.8900762777474585, "grad_norm": 0.33984375, "learning_rate": 2.75154953117685e-06, "loss": 0.4133, "step": 74460 }, { "epoch": 1.8902031957964742, "grad_norm": 0.373046875, "learning_rate": 2.745218025565166e-06, "loss": 0.4164, "step": 74465 }, { "epoch": 1.89033011384549, "grad_norm": 0.369140625, "learning_rate": 2.7388937456869766e-06, "loss": 0.4219, "step": 74470 }, { "epoch": 1.8904570318945058, "grad_norm": 0.359375, "learning_rate": 2.7325766918526002e-06, "loss": 0.4296, "step": 74475 }, { "epoch": 1.8905839499435215, "grad_norm": 0.375, "learning_rate": 2.726266864372023e-06, "loss": 0.4137, "step": 74480 }, { "epoch": 1.8907108679925373, "grad_norm": 0.361328125, "learning_rate": 2.71996426355488e-06, "loss": 0.3902, "step": 74485 }, { "epoch": 1.890837786041553, "grad_norm": 0.33203125, "learning_rate": 2.713668889710424e-06, "loss": 0.3978, "step": 74490 }, { "epoch": 1.8909647040905688, "grad_norm": 0.34765625, "learning_rate": 2.707380743147558e-06, "loss": 0.4142, "step": 74495 }, { "epoch": 1.8910916221395846, "grad_norm": 0.33984375, "learning_rate": 2.701099824174852e-06, "loss": 0.4197, "step": 74500 }, { "epoch": 1.8912185401886004, "grad_norm": 0.359375, "learning_rate": 2.694826133100492e-06, "loss": 0.4156, "step": 74505 }, { "epoch": 1.8913454582376161, "grad_norm": 0.345703125, "learning_rate": 2.6885596702323488e-06, "loss": 0.4083, "step": 74510 }, { "epoch": 1.8914723762866317, "grad_norm": 0.365234375, "learning_rate": 2.682300435877893e-06, "loss": 0.3676, "step": 74515 }, { "epoch": 1.8915992943356474, "grad_norm": 0.361328125, "learning_rate": 2.67604843034428e-06, "loss": 0.4728, "step": 74520 }, { "epoch": 1.8917262123846632, "grad_norm": 0.357421875, "learning_rate": 2.6698036539382794e-06, "loss": 0.4069, "step": 74525 }, { "epoch": 1.891853130433679, "grad_norm": 0.33984375, "learning_rate": 2.6635661069663137e-06, "loss": 0.4325, "step": 74530 }, { "epoch": 1.8919800484826947, "grad_norm": 0.345703125, "learning_rate": 2.657335789734488e-06, "loss": 0.4141, "step": 74535 }, { "epoch": 1.8921069665317105, "grad_norm": 0.365234375, "learning_rate": 2.651112702548491e-06, "loss": 0.4097, "step": 74540 }, { "epoch": 1.8922338845807263, "grad_norm": 0.28515625, "learning_rate": 2.6448968457136953e-06, "loss": 0.3972, "step": 74545 }, { "epoch": 1.8923608026297418, "grad_norm": 0.333984375, "learning_rate": 2.638688219535123e-06, "loss": 0.3535, "step": 74550 }, { "epoch": 1.8924877206787576, "grad_norm": 0.328125, "learning_rate": 2.6324868243174314e-06, "loss": 0.3692, "step": 74555 }, { "epoch": 1.8926146387277734, "grad_norm": 0.33984375, "learning_rate": 2.6262926603648927e-06, "loss": 0.4166, "step": 74560 }, { "epoch": 1.8927415567767891, "grad_norm": 0.3515625, "learning_rate": 2.6201057279814806e-06, "loss": 0.4352, "step": 74565 }, { "epoch": 1.892868474825805, "grad_norm": 0.3671875, "learning_rate": 2.613926027470753e-06, "loss": 0.4306, "step": 74570 }, { "epoch": 1.8929953928748207, "grad_norm": 0.326171875, "learning_rate": 2.607753559135983e-06, "loss": 0.3717, "step": 74575 }, { "epoch": 1.8931223109238364, "grad_norm": 0.345703125, "learning_rate": 2.6015883232800294e-06, "loss": 0.3963, "step": 74580 }, { "epoch": 1.8932492289728522, "grad_norm": 0.3515625, "learning_rate": 2.595430320205416e-06, "loss": 0.4591, "step": 74585 }, { "epoch": 1.893376147021868, "grad_norm": 0.369140625, "learning_rate": 2.589279550214335e-06, "loss": 0.4067, "step": 74590 }, { "epoch": 1.8935030650708837, "grad_norm": 0.345703125, "learning_rate": 2.583136013608578e-06, "loss": 0.3893, "step": 74595 }, { "epoch": 1.8936299831198995, "grad_norm": 0.353515625, "learning_rate": 2.5769997106896035e-06, "loss": 0.4175, "step": 74600 }, { "epoch": 1.8937569011689153, "grad_norm": 0.33203125, "learning_rate": 2.570870641758538e-06, "loss": 0.3954, "step": 74605 }, { "epoch": 1.893883819217931, "grad_norm": 0.349609375, "learning_rate": 2.564748807116124e-06, "loss": 0.4283, "step": 74610 }, { "epoch": 1.8940107372669468, "grad_norm": 0.3515625, "learning_rate": 2.558634207062754e-06, "loss": 0.3884, "step": 74615 }, { "epoch": 1.8941376553159626, "grad_norm": 0.3671875, "learning_rate": 2.552526841898456e-06, "loss": 0.4322, "step": 74620 }, { "epoch": 1.8942645733649783, "grad_norm": 0.3125, "learning_rate": 2.5464267119229554e-06, "loss": 0.4079, "step": 74625 }, { "epoch": 1.894391491413994, "grad_norm": 0.32421875, "learning_rate": 2.540333817435547e-06, "loss": 0.4335, "step": 74630 }, { "epoch": 1.8945184094630099, "grad_norm": 0.353515625, "learning_rate": 2.534248158735225e-06, "loss": 0.42, "step": 74635 }, { "epoch": 1.8946453275120256, "grad_norm": 0.37109375, "learning_rate": 2.5281697361206e-06, "loss": 0.4443, "step": 74640 }, { "epoch": 1.8947722455610414, "grad_norm": 0.34375, "learning_rate": 2.5220985498899505e-06, "loss": 0.4393, "step": 74645 }, { "epoch": 1.894899163610057, "grad_norm": 0.33203125, "learning_rate": 2.516034600341171e-06, "loss": 0.4001, "step": 74650 }, { "epoch": 1.8950260816590727, "grad_norm": 0.375, "learning_rate": 2.5099778877718415e-06, "loss": 0.4349, "step": 74655 }, { "epoch": 1.8951529997080885, "grad_norm": 0.3203125, "learning_rate": 2.503928412479106e-06, "loss": 0.4079, "step": 74660 }, { "epoch": 1.8952799177571042, "grad_norm": 0.373046875, "learning_rate": 2.4978861747598778e-06, "loss": 0.4253, "step": 74665 }, { "epoch": 1.89540683580612, "grad_norm": 0.357421875, "learning_rate": 2.4918511749106196e-06, "loss": 0.4375, "step": 74670 }, { "epoch": 1.8955337538551358, "grad_norm": 0.375, "learning_rate": 2.485823413227461e-06, "loss": 0.4355, "step": 74675 }, { "epoch": 1.8956606719041513, "grad_norm": 0.37109375, "learning_rate": 2.4798028900061995e-06, "loss": 0.4454, "step": 74680 }, { "epoch": 1.895787589953167, "grad_norm": 0.337890625, "learning_rate": 2.4737896055422313e-06, "loss": 0.3841, "step": 74685 }, { "epoch": 1.8959145080021829, "grad_norm": 0.357421875, "learning_rate": 2.4677835601306375e-06, "loss": 0.4092, "step": 74690 }, { "epoch": 1.8960414260511986, "grad_norm": 0.33984375, "learning_rate": 2.4617847540661485e-06, "loss": 0.4298, "step": 74695 }, { "epoch": 1.8961683441002144, "grad_norm": 0.341796875, "learning_rate": 2.455793187643096e-06, "loss": 0.4151, "step": 74700 }, { "epoch": 1.8962952621492302, "grad_norm": 0.31640625, "learning_rate": 2.449808861155511e-06, "loss": 0.4101, "step": 74705 }, { "epoch": 1.896422180198246, "grad_norm": 0.333984375, "learning_rate": 2.443831774897026e-06, "loss": 0.4222, "step": 74710 }, { "epoch": 1.8965490982472617, "grad_norm": 0.326171875, "learning_rate": 2.4378619291609556e-06, "loss": 0.4161, "step": 74715 }, { "epoch": 1.8966760162962775, "grad_norm": 0.33203125, "learning_rate": 2.4318993242401995e-06, "loss": 0.417, "step": 74720 }, { "epoch": 1.8968029343452932, "grad_norm": 0.34765625, "learning_rate": 2.4259439604273567e-06, "loss": 0.3998, "step": 74725 }, { "epoch": 1.896929852394309, "grad_norm": 0.375, "learning_rate": 2.4199958380146766e-06, "loss": 0.4324, "step": 74730 }, { "epoch": 1.8970567704433248, "grad_norm": 0.333984375, "learning_rate": 2.4140549572939927e-06, "loss": 0.4105, "step": 74735 }, { "epoch": 1.8971836884923405, "grad_norm": 0.365234375, "learning_rate": 2.4081213185568716e-06, "loss": 0.4165, "step": 74740 }, { "epoch": 1.8973106065413563, "grad_norm": 0.361328125, "learning_rate": 2.4021949220944303e-06, "loss": 0.4098, "step": 74745 }, { "epoch": 1.897437524590372, "grad_norm": 0.3203125, "learning_rate": 2.3962757681974866e-06, "loss": 0.4102, "step": 74750 }, { "epoch": 1.8975644426393878, "grad_norm": 0.357421875, "learning_rate": 2.390363857156491e-06, "loss": 0.404, "step": 74755 }, { "epoch": 1.8976913606884036, "grad_norm": 0.34375, "learning_rate": 2.3844591892615615e-06, "loss": 0.3753, "step": 74760 }, { "epoch": 1.8978182787374194, "grad_norm": 0.35546875, "learning_rate": 2.3785617648024004e-06, "loss": 0.4138, "step": 74765 }, { "epoch": 1.8979451967864351, "grad_norm": 0.341796875, "learning_rate": 2.3726715840684085e-06, "loss": 0.412, "step": 74770 }, { "epoch": 1.898072114835451, "grad_norm": 0.33984375, "learning_rate": 2.3667886473486218e-06, "loss": 0.4334, "step": 74775 }, { "epoch": 1.8981990328844665, "grad_norm": 0.35546875, "learning_rate": 2.3609129549317095e-06, "loss": 0.4206, "step": 74780 }, { "epoch": 1.8983259509334822, "grad_norm": 0.345703125, "learning_rate": 2.355044507105991e-06, "loss": 0.4244, "step": 74785 }, { "epoch": 1.898452868982498, "grad_norm": 0.345703125, "learning_rate": 2.349183304159402e-06, "loss": 0.3973, "step": 74790 }, { "epoch": 1.8985797870315138, "grad_norm": 0.357421875, "learning_rate": 2.3433293463795966e-06, "loss": 0.416, "step": 74795 }, { "epoch": 1.8987067050805295, "grad_norm": 0.359375, "learning_rate": 2.3374826340537946e-06, "loss": 0.4193, "step": 74800 }, { "epoch": 1.8988336231295453, "grad_norm": 0.365234375, "learning_rate": 2.3316431674688995e-06, "loss": 0.4252, "step": 74805 }, { "epoch": 1.898960541178561, "grad_norm": 0.34375, "learning_rate": 2.3258109469114663e-06, "loss": 0.4142, "step": 74810 }, { "epoch": 1.8990874592275766, "grad_norm": 0.341796875, "learning_rate": 2.3199859726676485e-06, "loss": 0.4385, "step": 74815 }, { "epoch": 1.8992143772765924, "grad_norm": 0.353515625, "learning_rate": 2.314168245023301e-06, "loss": 0.4181, "step": 74820 }, { "epoch": 1.8993412953256081, "grad_norm": 0.34375, "learning_rate": 2.3083577642638795e-06, "loss": 0.4118, "step": 74825 }, { "epoch": 1.899468213374624, "grad_norm": 0.34375, "learning_rate": 2.3025545306745218e-06, "loss": 0.4196, "step": 74830 }, { "epoch": 1.8995951314236397, "grad_norm": 0.345703125, "learning_rate": 2.296758544539984e-06, "loss": 0.3996, "step": 74835 }, { "epoch": 1.8997220494726554, "grad_norm": 0.376953125, "learning_rate": 2.2909698061446547e-06, "loss": 0.4295, "step": 74840 }, { "epoch": 1.8998489675216712, "grad_norm": 0.337890625, "learning_rate": 2.285188315772607e-06, "loss": 0.3867, "step": 74845 }, { "epoch": 1.899975885570687, "grad_norm": 0.33984375, "learning_rate": 2.2794140737075138e-06, "loss": 0.4479, "step": 74850 }, { "epoch": 1.9001028036197027, "grad_norm": 0.32421875, "learning_rate": 2.2736470802327323e-06, "loss": 0.4466, "step": 74855 }, { "epoch": 1.9002297216687185, "grad_norm": 0.341796875, "learning_rate": 2.267887335631252e-06, "loss": 0.4181, "step": 74860 }, { "epoch": 1.9003566397177343, "grad_norm": 0.34765625, "learning_rate": 2.2621348401856807e-06, "loss": 0.4226, "step": 74865 }, { "epoch": 1.90048355776675, "grad_norm": 0.337890625, "learning_rate": 2.2563895941783096e-06, "loss": 0.4141, "step": 74870 }, { "epoch": 1.9006104758157658, "grad_norm": 0.330078125, "learning_rate": 2.2506515978910456e-06, "loss": 0.4132, "step": 74875 }, { "epoch": 1.9007373938647816, "grad_norm": 0.357421875, "learning_rate": 2.244920851605464e-06, "loss": 0.4264, "step": 74880 }, { "epoch": 1.9008643119137973, "grad_norm": 0.3515625, "learning_rate": 2.2391973556027566e-06, "loss": 0.402, "step": 74885 }, { "epoch": 1.9009912299628131, "grad_norm": 0.353515625, "learning_rate": 2.233481110163765e-06, "loss": 0.4102, "step": 74890 }, { "epoch": 1.9011181480118289, "grad_norm": 0.35546875, "learning_rate": 2.227772115568999e-06, "loss": 0.4467, "step": 74895 }, { "epoch": 1.9012450660608446, "grad_norm": 0.30859375, "learning_rate": 2.2220703720986e-06, "loss": 0.4034, "step": 74900 }, { "epoch": 1.9013719841098604, "grad_norm": 0.37109375, "learning_rate": 2.216375880032345e-06, "loss": 0.4357, "step": 74905 }, { "epoch": 1.901498902158876, "grad_norm": 0.35546875, "learning_rate": 2.2106886396496436e-06, "loss": 0.4029, "step": 74910 }, { "epoch": 1.9016258202078917, "grad_norm": 0.349609375, "learning_rate": 2.205008651229573e-06, "loss": 0.3809, "step": 74915 }, { "epoch": 1.9017527382569075, "grad_norm": 0.36328125, "learning_rate": 2.199335915050876e-06, "loss": 0.4394, "step": 74920 }, { "epoch": 1.9018796563059233, "grad_norm": 0.35546875, "learning_rate": 2.193670431391881e-06, "loss": 0.423, "step": 74925 }, { "epoch": 1.902006574354939, "grad_norm": 0.375, "learning_rate": 2.188012200530598e-06, "loss": 0.4081, "step": 74930 }, { "epoch": 1.9021334924039548, "grad_norm": 0.3515625, "learning_rate": 2.1823612227446718e-06, "loss": 0.3974, "step": 74935 }, { "epoch": 1.9022604104529706, "grad_norm": 0.361328125, "learning_rate": 2.1767174983113977e-06, "loss": 0.4328, "step": 74940 }, { "epoch": 1.902387328501986, "grad_norm": 0.375, "learning_rate": 2.1710810275077206e-06, "loss": 0.4376, "step": 74945 }, { "epoch": 1.9025142465510019, "grad_norm": 0.361328125, "learning_rate": 2.1654518106102192e-06, "loss": 0.4196, "step": 74950 }, { "epoch": 1.9026411646000176, "grad_norm": 0.37109375, "learning_rate": 2.159829847895106e-06, "loss": 0.4047, "step": 74955 }, { "epoch": 1.9027680826490334, "grad_norm": 0.3359375, "learning_rate": 2.1542151396382436e-06, "loss": 0.4351, "step": 74960 }, { "epoch": 1.9028950006980492, "grad_norm": 0.376953125, "learning_rate": 2.148607686115161e-06, "loss": 0.4014, "step": 74965 }, { "epoch": 1.903021918747065, "grad_norm": 0.29296875, "learning_rate": 2.1430074876009896e-06, "loss": 0.3945, "step": 74970 }, { "epoch": 1.9031488367960807, "grad_norm": 0.333984375, "learning_rate": 2.137414544370558e-06, "loss": 0.3864, "step": 74975 }, { "epoch": 1.9032757548450965, "grad_norm": 0.326171875, "learning_rate": 2.1318288566982645e-06, "loss": 0.4135, "step": 74980 }, { "epoch": 1.9034026728941122, "grad_norm": 0.359375, "learning_rate": 2.1262504248582557e-06, "loss": 0.4285, "step": 74985 }, { "epoch": 1.903529590943128, "grad_norm": 0.359375, "learning_rate": 2.1206792491242295e-06, "loss": 0.4204, "step": 74990 }, { "epoch": 1.9036565089921438, "grad_norm": 0.388671875, "learning_rate": 2.115115329769568e-06, "loss": 0.3982, "step": 74995 }, { "epoch": 1.9037834270411595, "grad_norm": 0.345703125, "learning_rate": 2.1095586670672847e-06, "loss": 0.423, "step": 75000 }, { "epoch": 1.9039103450901753, "grad_norm": 0.33203125, "learning_rate": 2.1040092612900628e-06, "loss": 0.4419, "step": 75005 }, { "epoch": 1.904037263139191, "grad_norm": 0.345703125, "learning_rate": 2.0984671127101673e-06, "loss": 0.441, "step": 75010 }, { "epoch": 1.9041641811882069, "grad_norm": 0.34765625, "learning_rate": 2.092932221599597e-06, "loss": 0.4145, "step": 75015 }, { "epoch": 1.9042910992372226, "grad_norm": 0.34375, "learning_rate": 2.087404588229902e-06, "loss": 0.4121, "step": 75020 }, { "epoch": 1.9044180172862384, "grad_norm": 0.341796875, "learning_rate": 2.0818842128723646e-06, "loss": 0.4098, "step": 75025 }, { "epoch": 1.9045449353352542, "grad_norm": 0.369140625, "learning_rate": 2.076371095797835e-06, "loss": 0.409, "step": 75030 }, { "epoch": 1.90467185338427, "grad_norm": 0.353515625, "learning_rate": 2.070865237276864e-06, "loss": 0.3959, "step": 75035 }, { "epoch": 1.9047987714332857, "grad_norm": 0.330078125, "learning_rate": 2.065366637579585e-06, "loss": 0.4078, "step": 75040 }, { "epoch": 1.9049256894823012, "grad_norm": 0.361328125, "learning_rate": 2.059875296975849e-06, "loss": 0.4384, "step": 75045 }, { "epoch": 1.905052607531317, "grad_norm": 0.35546875, "learning_rate": 2.054391215735107e-06, "loss": 0.4135, "step": 75050 }, { "epoch": 1.9051795255803328, "grad_norm": 0.36328125, "learning_rate": 2.0489143941264606e-06, "loss": 0.4252, "step": 75055 }, { "epoch": 1.9053064436293485, "grad_norm": 0.34765625, "learning_rate": 2.043444832418645e-06, "loss": 0.4191, "step": 75060 }, { "epoch": 1.9054333616783643, "grad_norm": 0.33984375, "learning_rate": 2.037982530880061e-06, "loss": 0.4115, "step": 75065 }, { "epoch": 1.90556027972738, "grad_norm": 0.328125, "learning_rate": 2.032527489778729e-06, "loss": 0.3994, "step": 75070 }, { "epoch": 1.9056871977763958, "grad_norm": 0.34765625, "learning_rate": 2.0270797093823177e-06, "loss": 0.3936, "step": 75075 }, { "epoch": 1.9058141158254114, "grad_norm": 0.330078125, "learning_rate": 2.0216391899581798e-06, "loss": 0.4153, "step": 75080 }, { "epoch": 1.9059410338744271, "grad_norm": 0.337890625, "learning_rate": 2.0162059317732516e-06, "loss": 0.3997, "step": 75085 }, { "epoch": 1.906067951923443, "grad_norm": 0.357421875, "learning_rate": 2.0107799350941533e-06, "loss": 0.438, "step": 75090 }, { "epoch": 1.9061948699724587, "grad_norm": 0.353515625, "learning_rate": 2.005361200187122e-06, "loss": 0.42, "step": 75095 }, { "epoch": 1.9063217880214745, "grad_norm": 0.35546875, "learning_rate": 1.999949727318079e-06, "loss": 0.4141, "step": 75100 }, { "epoch": 1.9064487060704902, "grad_norm": 0.333984375, "learning_rate": 1.9945455167525605e-06, "loss": 0.4107, "step": 75105 }, { "epoch": 1.906575624119506, "grad_norm": 0.34375, "learning_rate": 1.989148568755705e-06, "loss": 0.4202, "step": 75110 }, { "epoch": 1.9067025421685218, "grad_norm": 0.375, "learning_rate": 1.9837588835923843e-06, "loss": 0.4125, "step": 75115 }, { "epoch": 1.9068294602175375, "grad_norm": 0.34765625, "learning_rate": 1.9783764615270536e-06, "loss": 0.4409, "step": 75120 }, { "epoch": 1.9069563782665533, "grad_norm": 0.373046875, "learning_rate": 1.9730013028238347e-06, "loss": 0.4244, "step": 75125 }, { "epoch": 1.907083296315569, "grad_norm": 0.330078125, "learning_rate": 1.9676334077464827e-06, "loss": 0.396, "step": 75130 }, { "epoch": 1.9072102143645848, "grad_norm": 0.369140625, "learning_rate": 1.962272776558371e-06, "loss": 0.4365, "step": 75135 }, { "epoch": 1.9073371324136006, "grad_norm": 0.328125, "learning_rate": 1.956919409522589e-06, "loss": 0.4329, "step": 75140 }, { "epoch": 1.9074640504626164, "grad_norm": 0.359375, "learning_rate": 1.951573306901777e-06, "loss": 0.4206, "step": 75145 }, { "epoch": 1.9075909685116321, "grad_norm": 0.34375, "learning_rate": 1.9462344689583074e-06, "loss": 0.4349, "step": 75150 }, { "epoch": 1.907717886560648, "grad_norm": 0.3046875, "learning_rate": 1.9409028959541216e-06, "loss": 0.3721, "step": 75155 }, { "epoch": 1.9078448046096637, "grad_norm": 0.359375, "learning_rate": 1.93557858815086e-06, "loss": 0.4127, "step": 75160 }, { "epoch": 1.9079717226586794, "grad_norm": 0.365234375, "learning_rate": 1.9302615458097636e-06, "loss": 0.4097, "step": 75165 }, { "epoch": 1.9080986407076952, "grad_norm": 0.357421875, "learning_rate": 1.924951769191774e-06, "loss": 0.3985, "step": 75170 }, { "epoch": 1.9082255587567107, "grad_norm": 0.3359375, "learning_rate": 1.9196492585573985e-06, "loss": 0.4192, "step": 75175 }, { "epoch": 1.9083524768057265, "grad_norm": 0.34375, "learning_rate": 1.9143540141668635e-06, "loss": 0.4223, "step": 75180 }, { "epoch": 1.9084793948547423, "grad_norm": 0.35546875, "learning_rate": 1.90906603627996e-06, "loss": 0.4195, "step": 75185 }, { "epoch": 1.908606312903758, "grad_norm": 0.333984375, "learning_rate": 1.9037853251562152e-06, "loss": 0.4031, "step": 75190 }, { "epoch": 1.9087332309527738, "grad_norm": 0.34375, "learning_rate": 1.8985118810547373e-06, "loss": 0.4379, "step": 75195 }, { "epoch": 1.9088601490017896, "grad_norm": 0.322265625, "learning_rate": 1.8932457042342863e-06, "loss": 0.4179, "step": 75200 }, { "epoch": 1.9089870670508053, "grad_norm": 0.33984375, "learning_rate": 1.8879867949532557e-06, "loss": 0.4012, "step": 75205 }, { "epoch": 1.9091139850998209, "grad_norm": 0.349609375, "learning_rate": 1.8827351534697388e-06, "loss": 0.4111, "step": 75210 }, { "epoch": 1.9092409031488367, "grad_norm": 0.361328125, "learning_rate": 1.8774907800413797e-06, "loss": 0.4124, "step": 75215 }, { "epoch": 1.9093678211978524, "grad_norm": 0.337890625, "learning_rate": 1.8722536749255723e-06, "loss": 0.4107, "step": 75220 }, { "epoch": 1.9094947392468682, "grad_norm": 0.34765625, "learning_rate": 1.8670238383792445e-06, "loss": 0.4274, "step": 75225 }, { "epoch": 1.909621657295884, "grad_norm": 0.35546875, "learning_rate": 1.861801270659058e-06, "loss": 0.4164, "step": 75230 }, { "epoch": 1.9097485753448997, "grad_norm": 0.359375, "learning_rate": 1.856585972021274e-06, "loss": 0.4019, "step": 75235 }, { "epoch": 1.9098754933939155, "grad_norm": 0.36328125, "learning_rate": 1.8513779427218211e-06, "loss": 0.4518, "step": 75240 }, { "epoch": 1.9100024114429313, "grad_norm": 0.353515625, "learning_rate": 1.8461771830162287e-06, "loss": 0.4415, "step": 75245 }, { "epoch": 1.910129329491947, "grad_norm": 0.369140625, "learning_rate": 1.8409836931596922e-06, "loss": 0.4055, "step": 75250 }, { "epoch": 1.9102562475409628, "grad_norm": 0.353515625, "learning_rate": 1.8357974734070913e-06, "loss": 0.4064, "step": 75255 }, { "epoch": 1.9103831655899786, "grad_norm": 0.33984375, "learning_rate": 1.8306185240128723e-06, "loss": 0.4139, "step": 75260 }, { "epoch": 1.9105100836389943, "grad_norm": 0.361328125, "learning_rate": 1.825446845231182e-06, "loss": 0.4144, "step": 75265 }, { "epoch": 1.91063700168801, "grad_norm": 0.3046875, "learning_rate": 1.8202824373157844e-06, "loss": 0.3927, "step": 75270 }, { "epoch": 1.9107639197370259, "grad_norm": 0.322265625, "learning_rate": 1.8151253005201094e-06, "loss": 0.4155, "step": 75275 }, { "epoch": 1.9108908377860416, "grad_norm": 0.357421875, "learning_rate": 1.8099754350972218e-06, "loss": 0.4261, "step": 75280 }, { "epoch": 1.9110177558350574, "grad_norm": 0.34375, "learning_rate": 1.8048328412997858e-06, "loss": 0.3949, "step": 75285 }, { "epoch": 1.9111446738840732, "grad_norm": 0.353515625, "learning_rate": 1.7996975193801832e-06, "loss": 0.4123, "step": 75290 }, { "epoch": 1.911271591933089, "grad_norm": 0.35546875, "learning_rate": 1.794569469590379e-06, "loss": 0.401, "step": 75295 }, { "epoch": 1.9113985099821047, "grad_norm": 0.3828125, "learning_rate": 1.7894486921820217e-06, "loss": 0.4397, "step": 75300 }, { "epoch": 1.9115254280311205, "grad_norm": 0.357421875, "learning_rate": 1.7843351874063938e-06, "loss": 0.4338, "step": 75305 }, { "epoch": 1.911652346080136, "grad_norm": 0.333984375, "learning_rate": 1.7792289555143779e-06, "loss": 0.4214, "step": 75310 }, { "epoch": 1.9117792641291518, "grad_norm": 0.34765625, "learning_rate": 1.774129996756557e-06, "loss": 0.4542, "step": 75315 }, { "epoch": 1.9119061821781675, "grad_norm": 0.326171875, "learning_rate": 1.7690383113831474e-06, "loss": 0.411, "step": 75320 }, { "epoch": 1.9120331002271833, "grad_norm": 0.37890625, "learning_rate": 1.7639538996439662e-06, "loss": 0.4589, "step": 75325 }, { "epoch": 1.912160018276199, "grad_norm": 0.35546875, "learning_rate": 1.7588767617885302e-06, "loss": 0.4165, "step": 75330 }, { "epoch": 1.9122869363252148, "grad_norm": 0.32421875, "learning_rate": 1.753806898065957e-06, "loss": 0.3891, "step": 75335 }, { "epoch": 1.9124138543742304, "grad_norm": 0.375, "learning_rate": 1.7487443087250141e-06, "loss": 0.4152, "step": 75340 }, { "epoch": 1.9125407724232462, "grad_norm": 0.328125, "learning_rate": 1.7436889940141364e-06, "loss": 0.4072, "step": 75345 }, { "epoch": 1.912667690472262, "grad_norm": 0.3515625, "learning_rate": 1.7386409541813918e-06, "loss": 0.4249, "step": 75350 }, { "epoch": 1.9127946085212777, "grad_norm": 0.369140625, "learning_rate": 1.7336001894744655e-06, "loss": 0.3969, "step": 75355 }, { "epoch": 1.9129215265702935, "grad_norm": 0.369140625, "learning_rate": 1.7285667001407267e-06, "loss": 0.432, "step": 75360 }, { "epoch": 1.9130484446193092, "grad_norm": 0.328125, "learning_rate": 1.7235404864271606e-06, "loss": 0.3983, "step": 75365 }, { "epoch": 1.913175362668325, "grad_norm": 0.3828125, "learning_rate": 1.7185215485804039e-06, "loss": 0.4556, "step": 75370 }, { "epoch": 1.9133022807173408, "grad_norm": 0.361328125, "learning_rate": 1.713509886846709e-06, "loss": 0.4457, "step": 75375 }, { "epoch": 1.9134291987663565, "grad_norm": 0.384765625, "learning_rate": 1.7085055014720294e-06, "loss": 0.4441, "step": 75380 }, { "epoch": 1.9135561168153723, "grad_norm": 0.353515625, "learning_rate": 1.7035083927019189e-06, "loss": 0.4175, "step": 75385 }, { "epoch": 1.913683034864388, "grad_norm": 0.365234375, "learning_rate": 1.6985185607815644e-06, "loss": 0.4422, "step": 75390 }, { "epoch": 1.9138099529134038, "grad_norm": 0.36328125, "learning_rate": 1.6935360059558367e-06, "loss": 0.4129, "step": 75395 }, { "epoch": 1.9139368709624196, "grad_norm": 0.33984375, "learning_rate": 1.6885607284692237e-06, "loss": 0.4023, "step": 75400 }, { "epoch": 1.9140637890114354, "grad_norm": 0.35546875, "learning_rate": 1.6835927285658635e-06, "loss": 0.4085, "step": 75405 }, { "epoch": 1.9141907070604511, "grad_norm": 0.322265625, "learning_rate": 1.6786320064895109e-06, "loss": 0.4344, "step": 75410 }, { "epoch": 1.914317625109467, "grad_norm": 0.318359375, "learning_rate": 1.6736785624836047e-06, "loss": 0.3969, "step": 75415 }, { "epoch": 1.9144445431584827, "grad_norm": 0.361328125, "learning_rate": 1.6687323967912169e-06, "loss": 0.4357, "step": 75420 }, { "epoch": 1.9145714612074984, "grad_norm": 0.34765625, "learning_rate": 1.663793509655037e-06, "loss": 0.4043, "step": 75425 }, { "epoch": 1.9146983792565142, "grad_norm": 0.333984375, "learning_rate": 1.6588619013174043e-06, "loss": 0.413, "step": 75430 }, { "epoch": 1.91482529730553, "grad_norm": 0.33203125, "learning_rate": 1.6539375720203585e-06, "loss": 0.3739, "step": 75435 }, { "epoch": 1.9149522153545455, "grad_norm": 0.318359375, "learning_rate": 1.6490205220054897e-06, "loss": 0.4123, "step": 75440 }, { "epoch": 1.9150791334035613, "grad_norm": 0.345703125, "learning_rate": 1.6441107515140884e-06, "loss": 0.4138, "step": 75445 }, { "epoch": 1.915206051452577, "grad_norm": 0.337890625, "learning_rate": 1.6392082607870948e-06, "loss": 0.403, "step": 75450 }, { "epoch": 1.9153329695015928, "grad_norm": 0.357421875, "learning_rate": 1.6343130500650336e-06, "loss": 0.4063, "step": 75455 }, { "epoch": 1.9154598875506086, "grad_norm": 0.345703125, "learning_rate": 1.6294251195881293e-06, "loss": 0.4106, "step": 75460 }, { "epoch": 1.9155868055996244, "grad_norm": 0.337890625, "learning_rate": 1.6245444695962395e-06, "loss": 0.4364, "step": 75465 }, { "epoch": 1.9157137236486401, "grad_norm": 0.37109375, "learning_rate": 1.6196711003288564e-06, "loss": 0.4211, "step": 75470 }, { "epoch": 1.9158406416976557, "grad_norm": 0.376953125, "learning_rate": 1.6148050120251056e-06, "loss": 0.4377, "step": 75475 }, { "epoch": 1.9159675597466714, "grad_norm": 0.361328125, "learning_rate": 1.6099462049237455e-06, "loss": 0.4105, "step": 75480 }, { "epoch": 1.9160944777956872, "grad_norm": 0.369140625, "learning_rate": 1.6050946792632358e-06, "loss": 0.4391, "step": 75485 }, { "epoch": 1.916221395844703, "grad_norm": 0.34375, "learning_rate": 1.6002504352816025e-06, "loss": 0.4143, "step": 75490 }, { "epoch": 1.9163483138937187, "grad_norm": 0.349609375, "learning_rate": 1.5954134732165724e-06, "loss": 0.4055, "step": 75495 }, { "epoch": 1.9164752319427345, "grad_norm": 0.36328125, "learning_rate": 1.5905837933054888e-06, "loss": 0.4041, "step": 75500 }, { "epoch": 1.9166021499917503, "grad_norm": 0.337890625, "learning_rate": 1.5857613957853454e-06, "loss": 0.398, "step": 75505 }, { "epoch": 1.916729068040766, "grad_norm": 0.33984375, "learning_rate": 1.5809462808927697e-06, "loss": 0.3985, "step": 75510 }, { "epoch": 1.9168559860897818, "grad_norm": 0.3984375, "learning_rate": 1.5761384488640227e-06, "loss": 0.4252, "step": 75515 }, { "epoch": 1.9169829041387976, "grad_norm": 0.33203125, "learning_rate": 1.5713378999350655e-06, "loss": 0.4162, "step": 75520 }, { "epoch": 1.9171098221878133, "grad_norm": 0.349609375, "learning_rate": 1.56654463434141e-06, "loss": 0.4208, "step": 75525 }, { "epoch": 1.917236740236829, "grad_norm": 0.345703125, "learning_rate": 1.5617586523183012e-06, "loss": 0.4537, "step": 75530 }, { "epoch": 1.9173636582858449, "grad_norm": 0.375, "learning_rate": 1.5569799541005513e-06, "loss": 0.4263, "step": 75535 }, { "epoch": 1.9174905763348606, "grad_norm": 0.33984375, "learning_rate": 1.5522085399226725e-06, "loss": 0.4076, "step": 75540 }, { "epoch": 1.9176174943838764, "grad_norm": 0.337890625, "learning_rate": 1.5474444100187778e-06, "loss": 0.4202, "step": 75545 }, { "epoch": 1.9177444124328922, "grad_norm": 0.3671875, "learning_rate": 1.5426875646226633e-06, "loss": 0.3969, "step": 75550 }, { "epoch": 1.917871330481908, "grad_norm": 0.35546875, "learning_rate": 1.5379380039677259e-06, "loss": 0.4335, "step": 75555 }, { "epoch": 1.9179982485309237, "grad_norm": 0.33203125, "learning_rate": 1.533195728287029e-06, "loss": 0.413, "step": 75560 }, { "epoch": 1.9181251665799395, "grad_norm": 0.36328125, "learning_rate": 1.5284607378132864e-06, "loss": 0.4284, "step": 75565 }, { "epoch": 1.9182520846289552, "grad_norm": 0.345703125, "learning_rate": 1.5237330327788289e-06, "loss": 0.4277, "step": 75570 }, { "epoch": 1.9183790026779708, "grad_norm": 0.365234375, "learning_rate": 1.5190126134156544e-06, "loss": 0.4158, "step": 75575 }, { "epoch": 1.9185059207269866, "grad_norm": 0.3515625, "learning_rate": 1.5142994799553775e-06, "loss": 0.4127, "step": 75580 }, { "epoch": 1.9186328387760023, "grad_norm": 0.333984375, "learning_rate": 1.5095936326292634e-06, "loss": 0.4336, "step": 75585 }, { "epoch": 1.918759756825018, "grad_norm": 0.35546875, "learning_rate": 1.5048950716682607e-06, "loss": 0.4248, "step": 75590 }, { "epoch": 1.9188866748740339, "grad_norm": 0.37109375, "learning_rate": 1.5002037973029013e-06, "loss": 0.4049, "step": 75595 }, { "epoch": 1.9190135929230496, "grad_norm": 0.345703125, "learning_rate": 1.4955198097633847e-06, "loss": 0.4219, "step": 75600 }, { "epoch": 1.9191405109720652, "grad_norm": 0.3515625, "learning_rate": 1.4908431092795602e-06, "loss": 0.4083, "step": 75605 }, { "epoch": 1.919267429021081, "grad_norm": 0.337890625, "learning_rate": 1.4861736960808945e-06, "loss": 0.4243, "step": 75610 }, { "epoch": 1.9193943470700967, "grad_norm": 0.365234375, "learning_rate": 1.4815115703965374e-06, "loss": 0.4402, "step": 75615 }, { "epoch": 1.9195212651191125, "grad_norm": 0.345703125, "learning_rate": 1.4768567324552393e-06, "loss": 0.4042, "step": 75620 }, { "epoch": 1.9196481831681282, "grad_norm": 0.353515625, "learning_rate": 1.4722091824854343e-06, "loss": 0.4225, "step": 75625 }, { "epoch": 1.919775101217144, "grad_norm": 0.359375, "learning_rate": 1.4675689207151398e-06, "loss": 0.4267, "step": 75630 }, { "epoch": 1.9199020192661598, "grad_norm": 0.36328125, "learning_rate": 1.4629359473720902e-06, "loss": 0.416, "step": 75635 }, { "epoch": 1.9200289373151755, "grad_norm": 0.34765625, "learning_rate": 1.4583102626835874e-06, "loss": 0.4091, "step": 75640 }, { "epoch": 1.9201558553641913, "grad_norm": 0.35546875, "learning_rate": 1.4536918668766495e-06, "loss": 0.4216, "step": 75645 }, { "epoch": 1.920282773413207, "grad_norm": 0.36328125, "learning_rate": 1.449080760177862e-06, "loss": 0.4331, "step": 75650 }, { "epoch": 1.9204096914622228, "grad_norm": 0.384765625, "learning_rate": 1.4444769428135272e-06, "loss": 0.4287, "step": 75655 }, { "epoch": 1.9205366095112386, "grad_norm": 0.341796875, "learning_rate": 1.4398804150095145e-06, "loss": 0.4046, "step": 75660 }, { "epoch": 1.9206635275602544, "grad_norm": 0.33984375, "learning_rate": 1.4352911769913932e-06, "loss": 0.4324, "step": 75665 }, { "epoch": 1.9207904456092701, "grad_norm": 0.361328125, "learning_rate": 1.4307092289843503e-06, "loss": 0.4122, "step": 75670 }, { "epoch": 1.920917363658286, "grad_norm": 0.34375, "learning_rate": 1.4261345712132388e-06, "loss": 0.4109, "step": 75675 }, { "epoch": 1.9210442817073017, "grad_norm": 0.322265625, "learning_rate": 1.4215672039025128e-06, "loss": 0.4461, "step": 75680 }, { "epoch": 1.9211711997563174, "grad_norm": 0.34765625, "learning_rate": 1.417007127276293e-06, "loss": 0.4134, "step": 75685 }, { "epoch": 1.9212981178053332, "grad_norm": 0.345703125, "learning_rate": 1.4124543415583668e-06, "loss": 0.4344, "step": 75690 }, { "epoch": 1.921425035854349, "grad_norm": 0.32421875, "learning_rate": 1.4079088469721056e-06, "loss": 0.4305, "step": 75695 }, { "epoch": 1.9215519539033648, "grad_norm": 0.353515625, "learning_rate": 1.4033706437405646e-06, "loss": 0.4068, "step": 75700 }, { "epoch": 1.9216788719523803, "grad_norm": 0.3515625, "learning_rate": 1.3988397320864486e-06, "loss": 0.425, "step": 75705 }, { "epoch": 1.921805790001396, "grad_norm": 0.3671875, "learning_rate": 1.3943161122320635e-06, "loss": 0.4075, "step": 75710 }, { "epoch": 1.9219327080504118, "grad_norm": 0.36328125, "learning_rate": 1.3897997843993814e-06, "loss": 0.4367, "step": 75715 }, { "epoch": 1.9220596260994276, "grad_norm": 0.349609375, "learning_rate": 1.3852907488100417e-06, "loss": 0.3717, "step": 75720 }, { "epoch": 1.9221865441484434, "grad_norm": 0.353515625, "learning_rate": 1.3807890056852677e-06, "loss": 0.4227, "step": 75725 }, { "epoch": 1.9223134621974591, "grad_norm": 0.361328125, "learning_rate": 1.3762945552459992e-06, "loss": 0.4239, "step": 75730 }, { "epoch": 1.922440380246475, "grad_norm": 0.36328125, "learning_rate": 1.3718073977127263e-06, "loss": 0.4192, "step": 75735 }, { "epoch": 1.9225672982954904, "grad_norm": 0.326171875, "learning_rate": 1.3673275333056733e-06, "loss": 0.443, "step": 75740 }, { "epoch": 1.9226942163445062, "grad_norm": 0.365234375, "learning_rate": 1.3628549622446638e-06, "loss": 0.436, "step": 75745 }, { "epoch": 1.922821134393522, "grad_norm": 0.361328125, "learning_rate": 1.3583896847491393e-06, "loss": 0.4148, "step": 75750 }, { "epoch": 1.9229480524425377, "grad_norm": 0.357421875, "learning_rate": 1.3539317010382245e-06, "loss": 0.4242, "step": 75755 }, { "epoch": 1.9230749704915535, "grad_norm": 0.3515625, "learning_rate": 1.349481011330661e-06, "loss": 0.4283, "step": 75760 }, { "epoch": 1.9232018885405693, "grad_norm": 0.34765625, "learning_rate": 1.3450376158448739e-06, "loss": 0.3946, "step": 75765 }, { "epoch": 1.923328806589585, "grad_norm": 0.361328125, "learning_rate": 1.3406015147988557e-06, "loss": 0.4042, "step": 75770 }, { "epoch": 1.9234557246386008, "grad_norm": 0.345703125, "learning_rate": 1.3361727084103158e-06, "loss": 0.4273, "step": 75775 }, { "epoch": 1.9235826426876166, "grad_norm": 0.34375, "learning_rate": 1.3317511968965633e-06, "loss": 0.4195, "step": 75780 }, { "epoch": 1.9237095607366324, "grad_norm": 0.359375, "learning_rate": 1.3273369804745416e-06, "loss": 0.4216, "step": 75785 }, { "epoch": 1.9238364787856481, "grad_norm": 0.3203125, "learning_rate": 1.3229300593608938e-06, "loss": 0.423, "step": 75790 }, { "epoch": 1.9239633968346639, "grad_norm": 0.34375, "learning_rate": 1.3185304337718138e-06, "loss": 0.4093, "step": 75795 }, { "epoch": 1.9240903148836797, "grad_norm": 0.396484375, "learning_rate": 1.3141381039232457e-06, "loss": 0.4047, "step": 75800 }, { "epoch": 1.9242172329326954, "grad_norm": 0.33203125, "learning_rate": 1.3097530700306835e-06, "loss": 0.3946, "step": 75805 }, { "epoch": 1.9243441509817112, "grad_norm": 0.34375, "learning_rate": 1.305375332309322e-06, "loss": 0.4475, "step": 75810 }, { "epoch": 1.924471069030727, "grad_norm": 0.345703125, "learning_rate": 1.3010048909739556e-06, "loss": 0.4345, "step": 75815 }, { "epoch": 1.9245979870797427, "grad_norm": 0.35546875, "learning_rate": 1.29664174623903e-06, "loss": 0.4189, "step": 75820 }, { "epoch": 1.9247249051287585, "grad_norm": 0.357421875, "learning_rate": 1.2922858983186735e-06, "loss": 0.4279, "step": 75825 }, { "epoch": 1.9248518231777743, "grad_norm": 0.31640625, "learning_rate": 1.2879373474265987e-06, "loss": 0.4036, "step": 75830 }, { "epoch": 1.92497874122679, "grad_norm": 0.302734375, "learning_rate": 1.283596093776218e-06, "loss": 0.4214, "step": 75835 }, { "epoch": 1.9251056592758056, "grad_norm": 0.330078125, "learning_rate": 1.279262137580528e-06, "loss": 0.4262, "step": 75840 }, { "epoch": 1.9252325773248213, "grad_norm": 0.341796875, "learning_rate": 1.274935479052208e-06, "loss": 0.4163, "step": 75845 }, { "epoch": 1.925359495373837, "grad_norm": 0.34375, "learning_rate": 1.2706161184035557e-06, "loss": 0.405, "step": 75850 }, { "epoch": 1.9254864134228529, "grad_norm": 0.326171875, "learning_rate": 1.2663040558465343e-06, "loss": 0.4223, "step": 75855 }, { "epoch": 1.9256133314718686, "grad_norm": 0.34765625, "learning_rate": 1.261999291592708e-06, "loss": 0.4103, "step": 75860 }, { "epoch": 1.9257402495208844, "grad_norm": 0.353515625, "learning_rate": 1.2577018258533412e-06, "loss": 0.4177, "step": 75865 }, { "epoch": 1.9258671675699, "grad_norm": 0.330078125, "learning_rate": 1.2534116588392983e-06, "loss": 0.3938, "step": 75870 }, { "epoch": 1.9259940856189157, "grad_norm": 0.3984375, "learning_rate": 1.2491287907610948e-06, "loss": 0.43, "step": 75875 }, { "epoch": 1.9261210036679315, "grad_norm": 0.369140625, "learning_rate": 1.2448532218288787e-06, "loss": 0.4211, "step": 75880 }, { "epoch": 1.9262479217169473, "grad_norm": 0.33984375, "learning_rate": 1.2405849522524659e-06, "loss": 0.4027, "step": 75885 }, { "epoch": 1.926374839765963, "grad_norm": 0.349609375, "learning_rate": 1.2363239822412884e-06, "loss": 0.4371, "step": 75890 }, { "epoch": 1.9265017578149788, "grad_norm": 0.349609375, "learning_rate": 1.2320703120044462e-06, "loss": 0.3899, "step": 75895 }, { "epoch": 1.9266286758639946, "grad_norm": 0.349609375, "learning_rate": 1.2278239417506385e-06, "loss": 0.431, "step": 75900 }, { "epoch": 1.9267555939130103, "grad_norm": 0.333984375, "learning_rate": 1.2235848716882656e-06, "loss": 0.4017, "step": 75905 }, { "epoch": 1.926882511962026, "grad_norm": 0.37890625, "learning_rate": 1.2193531020252944e-06, "loss": 0.4297, "step": 75910 }, { "epoch": 1.9270094300110419, "grad_norm": 0.341796875, "learning_rate": 1.2151286329694088e-06, "loss": 0.4174, "step": 75915 }, { "epoch": 1.9271363480600576, "grad_norm": 0.353515625, "learning_rate": 1.2109114647279094e-06, "loss": 0.4058, "step": 75920 }, { "epoch": 1.9272632661090734, "grad_norm": 0.33984375, "learning_rate": 1.2067015975076978e-06, "loss": 0.3925, "step": 75925 }, { "epoch": 1.9273901841580892, "grad_norm": 0.361328125, "learning_rate": 1.2024990315153748e-06, "loss": 0.4347, "step": 75930 }, { "epoch": 1.927517102207105, "grad_norm": 0.35546875, "learning_rate": 1.1983037669571427e-06, "loss": 0.4246, "step": 75935 }, { "epoch": 1.9276440202561207, "grad_norm": 0.3359375, "learning_rate": 1.1941158040388865e-06, "loss": 0.4405, "step": 75940 }, { "epoch": 1.9277709383051365, "grad_norm": 0.326171875, "learning_rate": 1.189935142966092e-06, "loss": 0.4213, "step": 75945 }, { "epoch": 1.9278978563541522, "grad_norm": 0.3515625, "learning_rate": 1.1857617839438949e-06, "loss": 0.4207, "step": 75950 }, { "epoch": 1.928024774403168, "grad_norm": 0.349609375, "learning_rate": 1.1815957271770982e-06, "loss": 0.4127, "step": 75955 }, { "epoch": 1.9281516924521838, "grad_norm": 0.34375, "learning_rate": 1.1774369728701049e-06, "loss": 0.414, "step": 75960 }, { "epoch": 1.9282786105011995, "grad_norm": 0.3671875, "learning_rate": 1.1732855212270188e-06, "loss": 0.4441, "step": 75965 }, { "epoch": 1.928405528550215, "grad_norm": 0.34375, "learning_rate": 1.1691413724515097e-06, "loss": 0.4111, "step": 75970 }, { "epoch": 1.9285324465992308, "grad_norm": 0.34765625, "learning_rate": 1.1650045267469488e-06, "loss": 0.4188, "step": 75975 }, { "epoch": 1.9286593646482466, "grad_norm": 0.353515625, "learning_rate": 1.16087498431634e-06, "loss": 0.431, "step": 75980 }, { "epoch": 1.9287862826972624, "grad_norm": 0.330078125, "learning_rate": 1.1567527453622882e-06, "loss": 0.3936, "step": 75985 }, { "epoch": 1.9289132007462781, "grad_norm": 0.353515625, "learning_rate": 1.1526378100870982e-06, "loss": 0.3855, "step": 75990 }, { "epoch": 1.929040118795294, "grad_norm": 0.353515625, "learning_rate": 1.1485301786926916e-06, "loss": 0.4288, "step": 75995 }, { "epoch": 1.9291670368443097, "grad_norm": 0.369140625, "learning_rate": 1.1444298513805905e-06, "loss": 0.4161, "step": 76000 }, { "epoch": 1.9292939548933252, "grad_norm": 0.369140625, "learning_rate": 1.1403368283520342e-06, "loss": 0.4169, "step": 76005 }, { "epoch": 1.929420872942341, "grad_norm": 0.373046875, "learning_rate": 1.136251109807862e-06, "loss": 0.4252, "step": 76010 }, { "epoch": 1.9295477909913568, "grad_norm": 0.361328125, "learning_rate": 1.13217269594853e-06, "loss": 0.4051, "step": 76015 }, { "epoch": 1.9296747090403725, "grad_norm": 0.359375, "learning_rate": 1.128101586974195e-06, "loss": 0.4304, "step": 76020 }, { "epoch": 1.9298016270893883, "grad_norm": 0.322265625, "learning_rate": 1.1240377830846303e-06, "loss": 0.4213, "step": 76025 }, { "epoch": 1.929928545138404, "grad_norm": 0.333984375, "learning_rate": 1.11998128447921e-06, "loss": 0.41, "step": 76030 }, { "epoch": 1.9300554631874198, "grad_norm": 0.353515625, "learning_rate": 1.1159320913569914e-06, "loss": 0.4269, "step": 76035 }, { "epoch": 1.9301823812364356, "grad_norm": 0.326171875, "learning_rate": 1.111890203916699e-06, "loss": 0.4096, "step": 76040 }, { "epoch": 1.9303092992854514, "grad_norm": 0.326171875, "learning_rate": 1.1078556223566404e-06, "loss": 0.4061, "step": 76045 }, { "epoch": 1.9304362173344671, "grad_norm": 0.330078125, "learning_rate": 1.103828346874791e-06, "loss": 0.4392, "step": 76050 }, { "epoch": 1.930563135383483, "grad_norm": 0.34765625, "learning_rate": 1.099808377668776e-06, "loss": 0.4374, "step": 76055 }, { "epoch": 1.9306900534324987, "grad_norm": 0.345703125, "learning_rate": 1.0957957149358544e-06, "loss": 0.4051, "step": 76060 }, { "epoch": 1.9308169714815144, "grad_norm": 0.353515625, "learning_rate": 1.0917903588729183e-06, "loss": 0.441, "step": 76065 }, { "epoch": 1.9309438895305302, "grad_norm": 0.392578125, "learning_rate": 1.0877923096765107e-06, "loss": 0.3961, "step": 76070 }, { "epoch": 1.931070807579546, "grad_norm": 0.341796875, "learning_rate": 1.0838015675428246e-06, "loss": 0.418, "step": 76075 }, { "epoch": 1.9311977256285617, "grad_norm": 0.345703125, "learning_rate": 1.0798181326676703e-06, "loss": 0.4194, "step": 76080 }, { "epoch": 1.9313246436775775, "grad_norm": 0.37890625, "learning_rate": 1.0758420052465244e-06, "loss": 0.4338, "step": 76085 }, { "epoch": 1.9314515617265933, "grad_norm": 0.353515625, "learning_rate": 1.071873185474481e-06, "loss": 0.3987, "step": 76090 }, { "epoch": 1.931578479775609, "grad_norm": 0.345703125, "learning_rate": 1.0679116735463012e-06, "loss": 0.4262, "step": 76095 }, { "epoch": 1.9317053978246248, "grad_norm": 0.365234375, "learning_rate": 1.0639574696563623e-06, "loss": 0.4244, "step": 76100 }, { "epoch": 1.9318323158736403, "grad_norm": 0.365234375, "learning_rate": 1.0600105739987098e-06, "loss": 0.4352, "step": 76105 }, { "epoch": 1.9319592339226561, "grad_norm": 0.3515625, "learning_rate": 1.0560709867670213e-06, "loss": 0.4174, "step": 76110 }, { "epoch": 1.9320861519716719, "grad_norm": 0.3359375, "learning_rate": 1.0521387081545762e-06, "loss": 0.3963, "step": 76115 }, { "epoch": 1.9322130700206877, "grad_norm": 0.33984375, "learning_rate": 1.0482137383543698e-06, "loss": 0.4197, "step": 76120 }, { "epoch": 1.9323399880697034, "grad_norm": 0.349609375, "learning_rate": 1.0442960775589648e-06, "loss": 0.4207, "step": 76125 }, { "epoch": 1.9324669061187192, "grad_norm": 0.361328125, "learning_rate": 1.0403857259606408e-06, "loss": 0.4123, "step": 76130 }, { "epoch": 1.9325938241677347, "grad_norm": 0.34765625, "learning_rate": 1.0364826837512275e-06, "loss": 0.4267, "step": 76135 }, { "epoch": 1.9327207422167505, "grad_norm": 0.34765625, "learning_rate": 1.0325869511222883e-06, "loss": 0.404, "step": 76140 }, { "epoch": 1.9328476602657663, "grad_norm": 0.330078125, "learning_rate": 1.0286985282649707e-06, "loss": 0.4259, "step": 76145 }, { "epoch": 1.932974578314782, "grad_norm": 0.34375, "learning_rate": 1.024817415370055e-06, "loss": 0.4301, "step": 76150 }, { "epoch": 1.9331014963637978, "grad_norm": 0.357421875, "learning_rate": 1.0209436126280057e-06, "loss": 0.4245, "step": 76155 }, { "epoch": 1.9332284144128136, "grad_norm": 0.388671875, "learning_rate": 1.0170771202289208e-06, "loss": 0.4271, "step": 76160 }, { "epoch": 1.9333553324618293, "grad_norm": 0.33984375, "learning_rate": 1.013217938362515e-06, "loss": 0.4041, "step": 76165 }, { "epoch": 1.933482250510845, "grad_norm": 0.3359375, "learning_rate": 1.0093660672181703e-06, "loss": 0.421, "step": 76170 }, { "epoch": 1.9336091685598609, "grad_norm": 0.3671875, "learning_rate": 1.0055215069848522e-06, "loss": 0.4433, "step": 76175 }, { "epoch": 1.9337360866088766, "grad_norm": 0.330078125, "learning_rate": 1.0016842578512762e-06, "loss": 0.4363, "step": 76180 }, { "epoch": 1.9338630046578924, "grad_norm": 0.35546875, "learning_rate": 9.978543200056754e-07, "loss": 0.3987, "step": 76185 }, { "epoch": 1.9339899227069082, "grad_norm": 0.349609375, "learning_rate": 9.940316936360326e-07, "loss": 0.4596, "step": 76190 }, { "epoch": 1.934116840755924, "grad_norm": 0.341796875, "learning_rate": 9.902163789298812e-07, "loss": 0.3997, "step": 76195 }, { "epoch": 1.9342437588049397, "grad_norm": 0.369140625, "learning_rate": 9.864083760744545e-07, "loss": 0.4089, "step": 76200 }, { "epoch": 1.9343706768539555, "grad_norm": 0.37109375, "learning_rate": 9.8260768525662e-07, "loss": 0.432, "step": 76205 }, { "epoch": 1.9344975949029712, "grad_norm": 0.359375, "learning_rate": 9.788143066628783e-07, "loss": 0.4304, "step": 76210 }, { "epoch": 1.934624512951987, "grad_norm": 0.359375, "learning_rate": 9.750282404793308e-07, "loss": 0.4377, "step": 76215 }, { "epoch": 1.9347514310010028, "grad_norm": 0.357421875, "learning_rate": 9.712494868918118e-07, "loss": 0.4228, "step": 76220 }, { "epoch": 1.9348783490500185, "grad_norm": 0.361328125, "learning_rate": 9.674780460856901e-07, "loss": 0.3763, "step": 76225 }, { "epoch": 1.9350052670990343, "grad_norm": 0.337890625, "learning_rate": 9.637139182460674e-07, "loss": 0.4418, "step": 76230 }, { "epoch": 1.9351321851480499, "grad_norm": 0.33984375, "learning_rate": 9.599571035576292e-07, "loss": 0.4197, "step": 76235 }, { "epoch": 1.9352591031970656, "grad_norm": 0.34375, "learning_rate": 9.56207602204745e-07, "loss": 0.436, "step": 76240 }, { "epoch": 1.9353860212460814, "grad_norm": 0.392578125, "learning_rate": 9.524654143713506e-07, "loss": 0.4269, "step": 76245 }, { "epoch": 1.9355129392950972, "grad_norm": 0.384765625, "learning_rate": 9.487305402411328e-07, "loss": 0.4046, "step": 76250 }, { "epoch": 1.935639857344113, "grad_norm": 0.349609375, "learning_rate": 9.45002979997328e-07, "loss": 0.42, "step": 76255 }, { "epoch": 1.9357667753931287, "grad_norm": 0.361328125, "learning_rate": 9.412827338228568e-07, "loss": 0.4124, "step": 76260 }, { "epoch": 1.9358936934421445, "grad_norm": 0.337890625, "learning_rate": 9.375698019002564e-07, "loss": 0.3998, "step": 76265 }, { "epoch": 1.93602061149116, "grad_norm": 0.353515625, "learning_rate": 9.338641844117479e-07, "loss": 0.4364, "step": 76270 }, { "epoch": 1.9361475295401758, "grad_norm": 0.345703125, "learning_rate": 9.301658815391189e-07, "loss": 0.4301, "step": 76275 }, { "epoch": 1.9362744475891915, "grad_norm": 0.349609375, "learning_rate": 9.264748934638911e-07, "loss": 0.4221, "step": 76280 }, { "epoch": 1.9364013656382073, "grad_norm": 0.373046875, "learning_rate": 9.227912203671528e-07, "loss": 0.4332, "step": 76285 }, { "epoch": 1.936528283687223, "grad_norm": 0.359375, "learning_rate": 9.191148624296596e-07, "loss": 0.8811, "step": 76290 }, { "epoch": 1.9366552017362388, "grad_norm": 0.35546875, "learning_rate": 9.154458198318171e-07, "loss": 0.3887, "step": 76295 }, { "epoch": 1.9367821197852546, "grad_norm": 0.32421875, "learning_rate": 9.11784092753648e-07, "loss": 0.4085, "step": 76300 }, { "epoch": 1.9369090378342704, "grad_norm": 0.37109375, "learning_rate": 9.081296813748584e-07, "loss": 0.4125, "step": 76305 }, { "epoch": 1.9370359558832861, "grad_norm": 0.337890625, "learning_rate": 9.044825858747551e-07, "loss": 0.4412, "step": 76310 }, { "epoch": 1.937162873932302, "grad_norm": 0.341796875, "learning_rate": 9.008428064323114e-07, "loss": 0.4102, "step": 76315 }, { "epoch": 1.9372897919813177, "grad_norm": 0.37109375, "learning_rate": 8.972103432261013e-07, "loss": 0.4106, "step": 76320 }, { "epoch": 1.9374167100303334, "grad_norm": 0.294921875, "learning_rate": 8.935851964343987e-07, "loss": 0.4032, "step": 76325 }, { "epoch": 1.9375436280793492, "grad_norm": 0.36328125, "learning_rate": 8.899673662350615e-07, "loss": 0.4129, "step": 76330 }, { "epoch": 1.937670546128365, "grad_norm": 0.3515625, "learning_rate": 8.863568528056475e-07, "loss": 0.4279, "step": 76335 }, { "epoch": 1.9377974641773807, "grad_norm": 0.375, "learning_rate": 8.827536563232985e-07, "loss": 0.432, "step": 76340 }, { "epoch": 1.9379243822263965, "grad_norm": 0.353515625, "learning_rate": 8.791577769648229e-07, "loss": 0.4323, "step": 76345 }, { "epoch": 1.9380513002754123, "grad_norm": 0.3515625, "learning_rate": 8.755692149066795e-07, "loss": 0.4027, "step": 76350 }, { "epoch": 1.938178218324428, "grad_norm": 0.33984375, "learning_rate": 8.719879703249443e-07, "loss": 0.4035, "step": 76355 }, { "epoch": 1.9383051363734438, "grad_norm": 0.361328125, "learning_rate": 8.684140433953602e-07, "loss": 0.4162, "step": 76360 }, { "epoch": 1.9384320544224596, "grad_norm": 0.333984375, "learning_rate": 8.648474342932865e-07, "loss": 0.4148, "step": 76365 }, { "epoch": 1.9385589724714751, "grad_norm": 0.35546875, "learning_rate": 8.612881431937502e-07, "loss": 0.4323, "step": 76370 }, { "epoch": 1.938685890520491, "grad_norm": 0.341796875, "learning_rate": 8.577361702713947e-07, "loss": 0.4168, "step": 76375 }, { "epoch": 1.9388128085695067, "grad_norm": 0.369140625, "learning_rate": 8.541915157005142e-07, "loss": 0.433, "step": 76380 }, { "epoch": 1.9389397266185224, "grad_norm": 0.3515625, "learning_rate": 8.506541796550526e-07, "loss": 0.4353, "step": 76385 }, { "epoch": 1.9390666446675382, "grad_norm": 0.33984375, "learning_rate": 8.471241623085711e-07, "loss": 0.4227, "step": 76390 }, { "epoch": 1.939193562716554, "grad_norm": 0.361328125, "learning_rate": 8.436014638342981e-07, "loss": 0.3997, "step": 76395 }, { "epoch": 1.9393204807655695, "grad_norm": 0.36328125, "learning_rate": 8.400860844050783e-07, "loss": 0.4205, "step": 76400 }, { "epoch": 1.9394473988145853, "grad_norm": 0.35546875, "learning_rate": 8.365780241934239e-07, "loss": 0.43, "step": 76405 }, { "epoch": 1.939574316863601, "grad_norm": 0.361328125, "learning_rate": 8.330772833714805e-07, "loss": 0.4029, "step": 76410 }, { "epoch": 1.9397012349126168, "grad_norm": 0.3359375, "learning_rate": 8.29583862110994e-07, "loss": 0.4198, "step": 76415 }, { "epoch": 1.9398281529616326, "grad_norm": 0.349609375, "learning_rate": 8.260977605834273e-07, "loss": 0.3928, "step": 76420 }, { "epoch": 1.9399550710106483, "grad_norm": 0.353515625, "learning_rate": 8.226189789598103e-07, "loss": 0.3873, "step": 76425 }, { "epoch": 1.9400819890596641, "grad_norm": 0.326171875, "learning_rate": 8.191475174108397e-07, "loss": 0.411, "step": 76430 }, { "epoch": 1.9402089071086799, "grad_norm": 0.55078125, "learning_rate": 8.156833761068959e-07, "loss": 0.4437, "step": 76435 }, { "epoch": 1.9403358251576956, "grad_norm": 0.365234375, "learning_rate": 8.12226555217943e-07, "loss": 0.4064, "step": 76440 }, { "epoch": 1.9404627432067114, "grad_norm": 0.34765625, "learning_rate": 8.087770549135952e-07, "loss": 0.399, "step": 76445 }, { "epoch": 1.9405896612557272, "grad_norm": 0.34765625, "learning_rate": 8.053348753631339e-07, "loss": 0.4399, "step": 76450 }, { "epoch": 1.940716579304743, "grad_norm": 0.365234375, "learning_rate": 8.019000167354573e-07, "loss": 0.4344, "step": 76455 }, { "epoch": 1.9408434973537587, "grad_norm": 0.375, "learning_rate": 7.984724791991304e-07, "loss": 0.446, "step": 76460 }, { "epoch": 1.9409704154027745, "grad_norm": 0.369140625, "learning_rate": 7.950522629223188e-07, "loss": 0.4527, "step": 76465 }, { "epoch": 1.9410973334517903, "grad_norm": 0.3515625, "learning_rate": 7.916393680728383e-07, "loss": 0.4235, "step": 76470 }, { "epoch": 1.941224251500806, "grad_norm": 0.341796875, "learning_rate": 7.882337948181883e-07, "loss": 0.4019, "step": 76475 }, { "epoch": 1.9413511695498218, "grad_norm": 0.349609375, "learning_rate": 7.84835543325485e-07, "loss": 0.3967, "step": 76480 }, { "epoch": 1.9414780875988376, "grad_norm": 0.341796875, "learning_rate": 7.814446137614449e-07, "loss": 0.4129, "step": 76485 }, { "epoch": 1.9416050056478533, "grad_norm": 0.3671875, "learning_rate": 7.780610062924853e-07, "loss": 0.3895, "step": 76490 }, { "epoch": 1.941731923696869, "grad_norm": 0.33203125, "learning_rate": 7.74684721084623e-07, "loss": 0.4063, "step": 76495 }, { "epoch": 1.9418588417458846, "grad_norm": 0.36328125, "learning_rate": 7.713157583035423e-07, "loss": 0.4023, "step": 76500 }, { "epoch": 1.9419857597949004, "grad_norm": 0.33984375, "learning_rate": 7.67954118114561e-07, "loss": 0.4136, "step": 76505 }, { "epoch": 1.9421126778439162, "grad_norm": 0.333984375, "learning_rate": 7.645998006826304e-07, "loss": 0.4171, "step": 76510 }, { "epoch": 1.942239595892932, "grad_norm": 0.318359375, "learning_rate": 7.612528061723356e-07, "loss": 0.4031, "step": 76515 }, { "epoch": 1.9423665139419477, "grad_norm": 0.349609375, "learning_rate": 7.579131347479118e-07, "loss": 0.4147, "step": 76520 }, { "epoch": 1.9424934319909635, "grad_norm": 0.3671875, "learning_rate": 7.545807865732445e-07, "loss": 0.4073, "step": 76525 }, { "epoch": 1.9426203500399792, "grad_norm": 0.353515625, "learning_rate": 7.512557618118531e-07, "loss": 0.4358, "step": 76530 }, { "epoch": 1.9427472680889948, "grad_norm": 0.33203125, "learning_rate": 7.479380606268737e-07, "loss": 0.4299, "step": 76535 }, { "epoch": 1.9428741861380106, "grad_norm": 0.3359375, "learning_rate": 7.446276831811426e-07, "loss": 0.415, "step": 76540 }, { "epoch": 1.9430011041870263, "grad_norm": 0.341796875, "learning_rate": 7.413246296370634e-07, "loss": 0.4363, "step": 76545 }, { "epoch": 1.943128022236042, "grad_norm": 0.359375, "learning_rate": 7.38028900156723e-07, "loss": 0.401, "step": 76550 }, { "epoch": 1.9432549402850579, "grad_norm": 0.365234375, "learning_rate": 7.347404949018587e-07, "loss": 0.4014, "step": 76555 }, { "epoch": 1.9433818583340736, "grad_norm": 0.365234375, "learning_rate": 7.314594140338081e-07, "loss": 0.4132, "step": 76560 }, { "epoch": 1.9435087763830894, "grad_norm": 0.3984375, "learning_rate": 7.281856577135925e-07, "loss": 0.4384, "step": 76565 }, { "epoch": 1.9436356944321052, "grad_norm": 0.33984375, "learning_rate": 7.249192261018333e-07, "loss": 0.4148, "step": 76570 }, { "epoch": 1.943762612481121, "grad_norm": 0.328125, "learning_rate": 7.216601193588356e-07, "loss": 0.4106, "step": 76575 }, { "epoch": 1.9438895305301367, "grad_norm": 0.341796875, "learning_rate": 7.184083376445216e-07, "loss": 0.4086, "step": 76580 }, { "epoch": 1.9440164485791525, "grad_norm": 0.345703125, "learning_rate": 7.151638811184301e-07, "loss": 0.4067, "step": 76585 }, { "epoch": 1.9441433666281682, "grad_norm": 0.3515625, "learning_rate": 7.119267499398007e-07, "loss": 0.4196, "step": 76590 }, { "epoch": 1.944270284677184, "grad_norm": 0.37890625, "learning_rate": 7.086969442674396e-07, "loss": 0.4201, "step": 76595 }, { "epoch": 1.9443972027261998, "grad_norm": 0.326171875, "learning_rate": 7.054744642598531e-07, "loss": 0.4032, "step": 76600 }, { "epoch": 1.9445241207752155, "grad_norm": 0.3515625, "learning_rate": 7.022593100751816e-07, "loss": 0.4192, "step": 76605 }, { "epoch": 1.9446510388242313, "grad_norm": 0.34375, "learning_rate": 6.99051481871149e-07, "loss": 0.4099, "step": 76610 }, { "epoch": 1.944777956873247, "grad_norm": 0.35546875, "learning_rate": 6.958509798051959e-07, "loss": 0.4174, "step": 76615 }, { "epoch": 1.9449048749222628, "grad_norm": 0.36328125, "learning_rate": 6.926578040343634e-07, "loss": 0.4002, "step": 76620 }, { "epoch": 1.9450317929712786, "grad_norm": 0.333984375, "learning_rate": 6.894719547153427e-07, "loss": 0.4063, "step": 76625 }, { "epoch": 1.9451587110202944, "grad_norm": 0.365234375, "learning_rate": 6.862934320044589e-07, "loss": 0.4409, "step": 76630 }, { "epoch": 1.94528562906931, "grad_norm": 0.3359375, "learning_rate": 6.831222360576705e-07, "loss": 0.4145, "step": 76635 }, { "epoch": 1.9454125471183257, "grad_norm": 0.37109375, "learning_rate": 6.799583670305864e-07, "loss": 0.4146, "step": 76640 }, { "epoch": 1.9455394651673414, "grad_norm": 0.345703125, "learning_rate": 6.768018250784823e-07, "loss": 0.4124, "step": 76645 }, { "epoch": 1.9456663832163572, "grad_norm": 0.345703125, "learning_rate": 6.736526103562178e-07, "loss": 0.4223, "step": 76650 }, { "epoch": 1.945793301265373, "grad_norm": 0.34375, "learning_rate": 6.70510723018336e-07, "loss": 0.4243, "step": 76655 }, { "epoch": 1.9459202193143887, "grad_norm": 0.341796875, "learning_rate": 6.6737616321903e-07, "loss": 0.3873, "step": 76660 }, { "epoch": 1.9460471373634043, "grad_norm": 0.34375, "learning_rate": 6.642489311120769e-07, "loss": 0.4001, "step": 76665 }, { "epoch": 1.94617405541242, "grad_norm": 0.375, "learning_rate": 6.611290268509373e-07, "loss": 0.4105, "step": 76670 }, { "epoch": 1.9463009734614358, "grad_norm": 0.349609375, "learning_rate": 6.580164505887053e-07, "loss": 0.4219, "step": 76675 }, { "epoch": 1.9464278915104516, "grad_norm": 0.349609375, "learning_rate": 6.549112024781256e-07, "loss": 0.4311, "step": 76680 }, { "epoch": 1.9465548095594674, "grad_norm": 0.3671875, "learning_rate": 6.518132826715594e-07, "loss": 0.4308, "step": 76685 }, { "epoch": 1.9466817276084831, "grad_norm": 0.375, "learning_rate": 6.487226913210353e-07, "loss": 0.4112, "step": 76690 }, { "epoch": 1.946808645657499, "grad_norm": 0.34765625, "learning_rate": 6.456394285781818e-07, "loss": 0.4117, "step": 76695 }, { "epoch": 1.9469355637065147, "grad_norm": 0.349609375, "learning_rate": 6.425634945943115e-07, "loss": 0.4156, "step": 76700 }, { "epoch": 1.9470624817555304, "grad_norm": 0.34765625, "learning_rate": 6.394948895203534e-07, "loss": 0.455, "step": 76705 }, { "epoch": 1.9471893998045462, "grad_norm": 0.373046875, "learning_rate": 6.364336135069037e-07, "loss": 0.4112, "step": 76710 }, { "epoch": 1.947316317853562, "grad_norm": 0.337890625, "learning_rate": 6.333796667041424e-07, "loss": 0.418, "step": 76715 }, { "epoch": 1.9474432359025777, "grad_norm": 0.341796875, "learning_rate": 6.303330492619329e-07, "loss": 0.4213, "step": 76720 }, { "epoch": 1.9475701539515935, "grad_norm": 0.34765625, "learning_rate": 6.27293761329789e-07, "loss": 0.418, "step": 76725 }, { "epoch": 1.9476970720006093, "grad_norm": 0.361328125, "learning_rate": 6.242618030568413e-07, "loss": 0.3916, "step": 76730 }, { "epoch": 1.947823990049625, "grad_norm": 0.33984375, "learning_rate": 6.21237174591871e-07, "loss": 0.4258, "step": 76735 }, { "epoch": 1.9479509080986408, "grad_norm": 0.34765625, "learning_rate": 6.182198760832757e-07, "loss": 0.4332, "step": 76740 }, { "epoch": 1.9480778261476566, "grad_norm": 0.37109375, "learning_rate": 6.152099076791206e-07, "loss": 0.44, "step": 76745 }, { "epoch": 1.9482047441966723, "grad_norm": 0.33203125, "learning_rate": 6.122072695271207e-07, "loss": 0.4195, "step": 76750 }, { "epoch": 1.948331662245688, "grad_norm": 0.30078125, "learning_rate": 6.092119617746082e-07, "loss": 0.3723, "step": 76755 }, { "epoch": 1.9484585802947039, "grad_norm": 0.345703125, "learning_rate": 6.06223984568549e-07, "loss": 0.4245, "step": 76760 }, { "epoch": 1.9485854983437194, "grad_norm": 0.341796875, "learning_rate": 6.032433380555757e-07, "loss": 0.4203, "step": 76765 }, { "epoch": 1.9487124163927352, "grad_norm": 0.369140625, "learning_rate": 6.002700223819378e-07, "loss": 0.43, "step": 76770 }, { "epoch": 1.948839334441751, "grad_norm": 0.341796875, "learning_rate": 5.973040376935357e-07, "loss": 0.4191, "step": 76775 }, { "epoch": 1.9489662524907667, "grad_norm": 0.345703125, "learning_rate": 5.943453841359191e-07, "loss": 0.4059, "step": 76780 }, { "epoch": 1.9490931705397825, "grad_norm": 0.365234375, "learning_rate": 5.913940618542556e-07, "loss": 0.4421, "step": 76785 }, { "epoch": 1.9492200885887982, "grad_norm": 0.3203125, "learning_rate": 5.884500709933626e-07, "loss": 0.3911, "step": 76790 }, { "epoch": 1.949347006637814, "grad_norm": 0.373046875, "learning_rate": 5.855134116977078e-07, "loss": 0.4491, "step": 76795 }, { "epoch": 1.9494739246868296, "grad_norm": 0.34765625, "learning_rate": 5.825840841113927e-07, "loss": 0.4064, "step": 76800 }, { "epoch": 1.9496008427358453, "grad_norm": 0.33203125, "learning_rate": 5.796620883781688e-07, "loss": 0.4161, "step": 76805 }, { "epoch": 1.949727760784861, "grad_norm": 0.353515625, "learning_rate": 5.767474246413884e-07, "loss": 0.4141, "step": 76810 }, { "epoch": 1.9498546788338769, "grad_norm": 0.337890625, "learning_rate": 5.738400930441034e-07, "loss": 0.4004, "step": 76815 }, { "epoch": 1.9499815968828926, "grad_norm": 0.36328125, "learning_rate": 5.709400937289666e-07, "loss": 0.401, "step": 76820 }, { "epoch": 1.9501085149319084, "grad_norm": 0.361328125, "learning_rate": 5.68047426838264e-07, "loss": 0.3948, "step": 76825 }, { "epoch": 1.9502354329809242, "grad_norm": 0.33984375, "learning_rate": 5.651620925139655e-07, "loss": 0.4074, "step": 76830 }, { "epoch": 1.95036235102994, "grad_norm": 0.345703125, "learning_rate": 5.622840908976245e-07, "loss": 0.4064, "step": 76835 }, { "epoch": 1.9504892690789557, "grad_norm": 0.345703125, "learning_rate": 5.59413422130478e-07, "loss": 0.41, "step": 76840 }, { "epoch": 1.9506161871279715, "grad_norm": 0.361328125, "learning_rate": 5.5655008635338e-07, "loss": 0.4394, "step": 76845 }, { "epoch": 1.9507431051769872, "grad_norm": 0.35546875, "learning_rate": 5.536940837068516e-07, "loss": 0.42, "step": 76850 }, { "epoch": 1.950870023226003, "grad_norm": 0.384765625, "learning_rate": 5.508454143310137e-07, "loss": 0.4256, "step": 76855 }, { "epoch": 1.9509969412750188, "grad_norm": 0.34375, "learning_rate": 5.480040783656714e-07, "loss": 0.4335, "step": 76860 }, { "epoch": 1.9511238593240345, "grad_norm": 0.359375, "learning_rate": 5.451700759502298e-07, "loss": 0.4311, "step": 76865 }, { "epoch": 1.9512507773730503, "grad_norm": 0.337890625, "learning_rate": 5.423434072237443e-07, "loss": 0.3954, "step": 76870 }, { "epoch": 1.951377695422066, "grad_norm": 0.37890625, "learning_rate": 5.395240723249539e-07, "loss": 0.392, "step": 76875 }, { "epoch": 1.9515046134710818, "grad_norm": 0.35546875, "learning_rate": 5.367120713921813e-07, "loss": 0.4107, "step": 76880 }, { "epoch": 1.9516315315200976, "grad_norm": 0.375, "learning_rate": 5.339074045633996e-07, "loss": 0.4338, "step": 76885 }, { "epoch": 1.9517584495691134, "grad_norm": 0.359375, "learning_rate": 5.311100719762318e-07, "loss": 0.3956, "step": 76890 }, { "epoch": 1.951885367618129, "grad_norm": 0.337890625, "learning_rate": 5.283200737679683e-07, "loss": 0.4196, "step": 76895 }, { "epoch": 1.9520122856671447, "grad_norm": 0.341796875, "learning_rate": 5.255374100754994e-07, "loss": 0.4303, "step": 76900 }, { "epoch": 1.9521392037161605, "grad_norm": 0.337890625, "learning_rate": 5.227620810353661e-07, "loss": 0.386, "step": 76905 }, { "epoch": 1.9522661217651762, "grad_norm": 0.357421875, "learning_rate": 5.199940867837593e-07, "loss": 0.4329, "step": 76910 }, { "epoch": 1.952393039814192, "grad_norm": 0.33984375, "learning_rate": 5.172334274564871e-07, "loss": 0.4243, "step": 76915 }, { "epoch": 1.9525199578632078, "grad_norm": 0.330078125, "learning_rate": 5.144801031890411e-07, "loss": 0.4137, "step": 76920 }, { "epoch": 1.9526468759122235, "grad_norm": 0.34765625, "learning_rate": 5.117341141164966e-07, "loss": 0.4267, "step": 76925 }, { "epoch": 1.952773793961239, "grad_norm": 0.3515625, "learning_rate": 5.089954603736124e-07, "loss": 0.4289, "step": 76930 }, { "epoch": 1.9529007120102548, "grad_norm": 0.349609375, "learning_rate": 5.062641420947811e-07, "loss": 0.4116, "step": 76935 }, { "epoch": 1.9530276300592706, "grad_norm": 0.396484375, "learning_rate": 5.03540159414012e-07, "loss": 0.4242, "step": 76940 }, { "epoch": 1.9531545481082864, "grad_norm": 0.337890625, "learning_rate": 5.008235124649818e-07, "loss": 0.4162, "step": 76945 }, { "epoch": 1.9532814661573021, "grad_norm": 0.361328125, "learning_rate": 4.981142013809836e-07, "loss": 0.3802, "step": 76950 }, { "epoch": 1.953408384206318, "grad_norm": 0.3125, "learning_rate": 4.954122262949778e-07, "loss": 0.4147, "step": 76955 }, { "epoch": 1.9535353022553337, "grad_norm": 0.314453125, "learning_rate": 4.927175873395417e-07, "loss": 0.4035, "step": 76960 }, { "epoch": 1.9536622203043494, "grad_norm": 0.322265625, "learning_rate": 4.900302846468862e-07, "loss": 0.3965, "step": 76965 }, { "epoch": 1.9537891383533652, "grad_norm": 0.3515625, "learning_rate": 4.873503183488891e-07, "loss": 0.4085, "step": 76970 }, { "epoch": 1.953916056402381, "grad_norm": 0.361328125, "learning_rate": 4.84677688577062e-07, "loss": 0.434, "step": 76975 }, { "epoch": 1.9540429744513967, "grad_norm": 0.33984375, "learning_rate": 4.820123954625332e-07, "loss": 0.41, "step": 76980 }, { "epoch": 1.9541698925004125, "grad_norm": 0.291015625, "learning_rate": 4.793544391361148e-07, "loss": 0.3966, "step": 76985 }, { "epoch": 1.9542968105494283, "grad_norm": 0.35546875, "learning_rate": 4.767038197282025e-07, "loss": 0.4252, "step": 76990 }, { "epoch": 1.954423728598444, "grad_norm": 0.33984375, "learning_rate": 4.7406053736887553e-07, "loss": 0.4198, "step": 76995 }, { "epoch": 1.9545506466474598, "grad_norm": 0.353515625, "learning_rate": 4.7142459218783014e-07, "loss": 0.4383, "step": 77000 }, { "epoch": 1.9546775646964756, "grad_norm": 0.34765625, "learning_rate": 4.687959843144129e-07, "loss": 0.4282, "step": 77005 }, { "epoch": 1.9548044827454913, "grad_norm": 0.361328125, "learning_rate": 4.661747138776206e-07, "loss": 0.4273, "step": 77010 }, { "epoch": 1.9549314007945071, "grad_norm": 0.365234375, "learning_rate": 4.6356078100608374e-07, "loss": 0.43, "step": 77015 }, { "epoch": 1.9550583188435229, "grad_norm": 0.33984375, "learning_rate": 4.60954185828033e-07, "loss": 0.3985, "step": 77020 }, { "epoch": 1.9551852368925386, "grad_norm": 0.3828125, "learning_rate": 4.583549284713994e-07, "loss": 0.4202, "step": 77025 }, { "epoch": 1.9553121549415542, "grad_norm": 0.36328125, "learning_rate": 4.557630090637143e-07, "loss": 0.3762, "step": 77030 }, { "epoch": 1.95543907299057, "grad_norm": 0.349609375, "learning_rate": 4.5317842773217596e-07, "loss": 0.4168, "step": 77035 }, { "epoch": 1.9555659910395857, "grad_norm": 0.3359375, "learning_rate": 4.506011846035995e-07, "loss": 0.4187, "step": 77040 }, { "epoch": 1.9556929090886015, "grad_norm": 0.34375, "learning_rate": 4.4803127980445054e-07, "loss": 0.4333, "step": 77045 }, { "epoch": 1.9558198271376173, "grad_norm": 0.35546875, "learning_rate": 4.4546871346084476e-07, "loss": 0.4202, "step": 77050 }, { "epoch": 1.955946745186633, "grad_norm": 0.359375, "learning_rate": 4.429134856984984e-07, "loss": 0.4059, "step": 77055 }, { "epoch": 1.9560736632356488, "grad_norm": 0.3359375, "learning_rate": 4.40365596642811e-07, "loss": 0.4559, "step": 77060 }, { "epoch": 1.9562005812846643, "grad_norm": 0.34765625, "learning_rate": 4.37825046418816e-07, "loss": 0.4048, "step": 77065 }, { "epoch": 1.95632749933368, "grad_norm": 0.345703125, "learning_rate": 4.3529183515118026e-07, "loss": 0.4147, "step": 77070 }, { "epoch": 1.9564544173826959, "grad_norm": 0.357421875, "learning_rate": 4.3276596296418776e-07, "loss": 0.436, "step": 77075 }, { "epoch": 1.9565813354317116, "grad_norm": 0.353515625, "learning_rate": 4.302474299817893e-07, "loss": 0.4229, "step": 77080 }, { "epoch": 1.9567082534807274, "grad_norm": 0.35546875, "learning_rate": 4.277362363275694e-07, "loss": 0.3969, "step": 77085 }, { "epoch": 1.9568351715297432, "grad_norm": 0.330078125, "learning_rate": 4.252323821247461e-07, "loss": 0.4125, "step": 77090 }, { "epoch": 1.956962089578759, "grad_norm": 0.361328125, "learning_rate": 4.227358674962045e-07, "loss": 0.408, "step": 77095 }, { "epoch": 1.9570890076277747, "grad_norm": 0.345703125, "learning_rate": 4.2024669256441324e-07, "loss": 0.4231, "step": 77100 }, { "epoch": 1.9572159256767905, "grad_norm": 0.341796875, "learning_rate": 4.177648574515413e-07, "loss": 0.4222, "step": 77105 }, { "epoch": 1.9573428437258062, "grad_norm": 0.359375, "learning_rate": 4.15290362279358e-07, "loss": 0.4217, "step": 77110 }, { "epoch": 1.957469761774822, "grad_norm": 0.333984375, "learning_rate": 4.128232071692994e-07, "loss": 0.3854, "step": 77115 }, { "epoch": 1.9575966798238378, "grad_norm": 0.3359375, "learning_rate": 4.1036339224241876e-07, "loss": 0.4245, "step": 77120 }, { "epoch": 1.9577235978728535, "grad_norm": 0.349609375, "learning_rate": 4.079109176194195e-07, "loss": 0.4514, "step": 77125 }, { "epoch": 1.9578505159218693, "grad_norm": 0.345703125, "learning_rate": 4.054657834206387e-07, "loss": 0.4054, "step": 77130 }, { "epoch": 1.957977433970885, "grad_norm": 0.337890625, "learning_rate": 4.030279897660471e-07, "loss": 0.4124, "step": 77135 }, { "epoch": 1.9581043520199009, "grad_norm": 0.357421875, "learning_rate": 4.0059753677529893e-07, "loss": 0.4313, "step": 77140 }, { "epoch": 1.9582312700689166, "grad_norm": 0.34375, "learning_rate": 3.9817442456763217e-07, "loss": 0.4142, "step": 77145 }, { "epoch": 1.9583581881179324, "grad_norm": 0.345703125, "learning_rate": 3.957586532619683e-07, "loss": 0.4117, "step": 77150 }, { "epoch": 1.9584851061669482, "grad_norm": 0.330078125, "learning_rate": 3.933502229768126e-07, "loss": 0.3883, "step": 77155 }, { "epoch": 1.9586120242159637, "grad_norm": 0.353515625, "learning_rate": 3.909491338303872e-07, "loss": 0.4309, "step": 77160 }, { "epoch": 1.9587389422649795, "grad_norm": 0.376953125, "learning_rate": 3.885553859404811e-07, "loss": 0.424, "step": 77165 }, { "epoch": 1.9588658603139952, "grad_norm": 0.3515625, "learning_rate": 3.8616897942456703e-07, "loss": 0.4535, "step": 77170 }, { "epoch": 1.958992778363011, "grad_norm": 0.36328125, "learning_rate": 3.8378991439975135e-07, "loss": 0.4556, "step": 77175 }, { "epoch": 1.9591196964120268, "grad_norm": 0.3359375, "learning_rate": 3.814181909827574e-07, "loss": 0.4156, "step": 77180 }, { "epoch": 1.9592466144610425, "grad_norm": 0.36328125, "learning_rate": 3.7905380928995864e-07, "loss": 0.4486, "step": 77185 }, { "epoch": 1.9593735325100583, "grad_norm": 0.353515625, "learning_rate": 3.766967694374123e-07, "loss": 0.4374, "step": 77190 }, { "epoch": 1.9595004505590738, "grad_norm": 0.32421875, "learning_rate": 3.7434707154074263e-07, "loss": 0.4191, "step": 77195 }, { "epoch": 1.9596273686080896, "grad_norm": 0.345703125, "learning_rate": 3.720047157152739e-07, "loss": 0.3799, "step": 77200 }, { "epoch": 1.9597542866571054, "grad_norm": 0.357421875, "learning_rate": 3.696697020759309e-07, "loss": 0.4132, "step": 77205 }, { "epoch": 1.9598812047061211, "grad_norm": 0.33203125, "learning_rate": 3.673420307372721e-07, "loss": 0.3787, "step": 77210 }, { "epoch": 1.960008122755137, "grad_norm": 0.3515625, "learning_rate": 3.650217018135393e-07, "loss": 0.3941, "step": 77215 }, { "epoch": 1.9601350408041527, "grad_norm": 0.34765625, "learning_rate": 3.627087154186081e-07, "loss": 0.4101, "step": 77220 }, { "epoch": 1.9602619588531685, "grad_norm": 0.357421875, "learning_rate": 3.6040307166593783e-07, "loss": 0.4241, "step": 77225 }, { "epoch": 1.9603888769021842, "grad_norm": 0.345703125, "learning_rate": 3.5810477066867126e-07, "loss": 0.4018, "step": 77230 }, { "epoch": 1.9605157949512, "grad_norm": 0.365234375, "learning_rate": 3.5581381253961813e-07, "loss": 0.4244, "step": 77235 }, { "epoch": 1.9606427130002158, "grad_norm": 0.337890625, "learning_rate": 3.535301973911553e-07, "loss": 0.4258, "step": 77240 }, { "epoch": 1.9607696310492315, "grad_norm": 0.373046875, "learning_rate": 3.512539253353597e-07, "loss": 0.4328, "step": 77245 }, { "epoch": 1.9608965490982473, "grad_norm": 0.345703125, "learning_rate": 3.4898499648390866e-07, "loss": 0.4063, "step": 77250 }, { "epoch": 1.961023467147263, "grad_norm": 0.365234375, "learning_rate": 3.467234109481798e-07, "loss": 0.414, "step": 77255 }, { "epoch": 1.9611503851962788, "grad_norm": 0.349609375, "learning_rate": 3.44469168839101e-07, "loss": 0.4235, "step": 77260 }, { "epoch": 1.9612773032452946, "grad_norm": 0.38671875, "learning_rate": 3.4222227026731717e-07, "loss": 0.4149, "step": 77265 }, { "epoch": 1.9614042212943104, "grad_norm": 0.3203125, "learning_rate": 3.3998271534305677e-07, "loss": 0.3902, "step": 77270 }, { "epoch": 1.9615311393433261, "grad_norm": 0.36328125, "learning_rate": 3.377505041762485e-07, "loss": 0.379, "step": 77275 }, { "epoch": 1.961658057392342, "grad_norm": 0.34375, "learning_rate": 3.3552563687638813e-07, "loss": 0.4322, "step": 77280 }, { "epoch": 1.9617849754413577, "grad_norm": 0.33984375, "learning_rate": 3.3330811355267163e-07, "loss": 0.4233, "step": 77285 }, { "epoch": 1.9619118934903734, "grad_norm": 0.328125, "learning_rate": 3.3109793431391196e-07, "loss": 0.4167, "step": 77290 }, { "epoch": 1.962038811539389, "grad_norm": 0.353515625, "learning_rate": 3.288950992685557e-07, "loss": 0.4118, "step": 77295 }, { "epoch": 1.9621657295884047, "grad_norm": 0.35546875, "learning_rate": 3.266996085246998e-07, "loss": 0.4122, "step": 77300 }, { "epoch": 1.9622926476374205, "grad_norm": 0.34375, "learning_rate": 3.245114621900746e-07, "loss": 0.4293, "step": 77305 }, { "epoch": 1.9624195656864363, "grad_norm": 0.35546875, "learning_rate": 3.2233066037204436e-07, "loss": 0.4098, "step": 77310 }, { "epoch": 1.962546483735452, "grad_norm": 0.34375, "learning_rate": 3.201572031776234e-07, "loss": 0.404, "step": 77315 }, { "epoch": 1.9626734017844678, "grad_norm": 0.365234375, "learning_rate": 3.1799109071345976e-07, "loss": 0.4317, "step": 77320 }, { "epoch": 1.9628003198334834, "grad_norm": 0.33984375, "learning_rate": 3.158323230858684e-07, "loss": 0.4414, "step": 77325 }, { "epoch": 1.9629272378824991, "grad_norm": 0.3515625, "learning_rate": 3.136809004007479e-07, "loss": 0.411, "step": 77330 }, { "epoch": 1.9630541559315149, "grad_norm": 0.375, "learning_rate": 3.115368227636805e-07, "loss": 0.4427, "step": 77335 }, { "epoch": 1.9631810739805307, "grad_norm": 0.314453125, "learning_rate": 3.094000902798821e-07, "loss": 0.4029, "step": 77340 }, { "epoch": 1.9633079920295464, "grad_norm": 0.32421875, "learning_rate": 3.072707030541854e-07, "loss": 0.4123, "step": 77345 }, { "epoch": 1.9634349100785622, "grad_norm": 0.33203125, "learning_rate": 3.051486611910736e-07, "loss": 0.4012, "step": 77350 }, { "epoch": 1.963561828127578, "grad_norm": 0.353515625, "learning_rate": 3.030339647947133e-07, "loss": 0.4252, "step": 77355 }, { "epoch": 1.9636887461765937, "grad_norm": 0.361328125, "learning_rate": 3.009266139688382e-07, "loss": 0.4346, "step": 77360 }, { "epoch": 1.9638156642256095, "grad_norm": 0.361328125, "learning_rate": 2.9882660881684894e-07, "loss": 0.4285, "step": 77365 }, { "epoch": 1.9639425822746253, "grad_norm": 0.35546875, "learning_rate": 2.9673394944181304e-07, "loss": 0.4119, "step": 77370 }, { "epoch": 1.964069500323641, "grad_norm": 0.349609375, "learning_rate": 2.946486359464151e-07, "loss": 0.4138, "step": 77375 }, { "epoch": 1.9641964183726568, "grad_norm": 0.333984375, "learning_rate": 2.925706684329565e-07, "loss": 0.4023, "step": 77380 }, { "epoch": 1.9643233364216726, "grad_norm": 0.326171875, "learning_rate": 2.905000470034391e-07, "loss": 0.3932, "step": 77385 }, { "epoch": 1.9644502544706883, "grad_norm": 0.345703125, "learning_rate": 2.8843677175944825e-07, "loss": 0.4272, "step": 77390 }, { "epoch": 1.964577172519704, "grad_norm": 0.365234375, "learning_rate": 2.8638084280223627e-07, "loss": 0.4546, "step": 77395 }, { "epoch": 1.9647040905687199, "grad_norm": 0.33203125, "learning_rate": 2.8433226023265586e-07, "loss": 0.4049, "step": 77400 }, { "epoch": 1.9648310086177356, "grad_norm": 0.37109375, "learning_rate": 2.8229102415127657e-07, "loss": 0.4068, "step": 77405 }, { "epoch": 1.9649579266667514, "grad_norm": 0.345703125, "learning_rate": 2.802571346582183e-07, "loss": 0.3923, "step": 77410 }, { "epoch": 1.9650848447157672, "grad_norm": 0.333984375, "learning_rate": 2.782305918533179e-07, "loss": 0.4071, "step": 77415 }, { "epoch": 1.965211762764783, "grad_norm": 0.34765625, "learning_rate": 2.762113958359957e-07, "loss": 0.4145, "step": 77420 }, { "epoch": 1.9653386808137985, "grad_norm": 0.373046875, "learning_rate": 2.7419954670533927e-07, "loss": 0.4281, "step": 77425 }, { "epoch": 1.9654655988628142, "grad_norm": 0.33984375, "learning_rate": 2.7219504456006955e-07, "loss": 0.4147, "step": 77430 }, { "epoch": 1.96559251691183, "grad_norm": 0.37109375, "learning_rate": 2.7019788949854124e-07, "loss": 0.3833, "step": 77435 }, { "epoch": 1.9657194349608458, "grad_norm": 0.34765625, "learning_rate": 2.6820808161877596e-07, "loss": 0.4312, "step": 77440 }, { "epoch": 1.9658463530098615, "grad_norm": 0.3359375, "learning_rate": 2.6622562101837883e-07, "loss": 0.4009, "step": 77445 }, { "epoch": 1.9659732710588773, "grad_norm": 0.333984375, "learning_rate": 2.642505077946555e-07, "loss": 0.408, "step": 77450 }, { "epoch": 1.966100189107893, "grad_norm": 0.330078125, "learning_rate": 2.622827420445117e-07, "loss": 0.3997, "step": 77455 }, { "epoch": 1.9662271071569086, "grad_norm": 0.3359375, "learning_rate": 2.603223238645036e-07, "loss": 0.419, "step": 77460 }, { "epoch": 1.9663540252059244, "grad_norm": 0.33984375, "learning_rate": 2.583692533508208e-07, "loss": 0.4441, "step": 77465 }, { "epoch": 1.9664809432549402, "grad_norm": 0.345703125, "learning_rate": 2.5642353059933675e-07, "loss": 0.435, "step": 77470 }, { "epoch": 1.966607861303956, "grad_norm": 0.375, "learning_rate": 2.54485155705475e-07, "loss": 0.4199, "step": 77475 }, { "epoch": 1.9667347793529717, "grad_norm": 0.33203125, "learning_rate": 2.5255412876439283e-07, "loss": 0.4076, "step": 77480 }, { "epoch": 1.9668616974019875, "grad_norm": 0.37109375, "learning_rate": 2.5063044987083113e-07, "loss": 0.4143, "step": 77485 }, { "epoch": 1.9669886154510032, "grad_norm": 0.279296875, "learning_rate": 2.4871411911918105e-07, "loss": 0.3845, "step": 77490 }, { "epoch": 1.967115533500019, "grad_norm": 0.357421875, "learning_rate": 2.4680513660346736e-07, "loss": 0.3866, "step": 77495 }, { "epoch": 1.9672424515490348, "grad_norm": 0.361328125, "learning_rate": 2.4490350241736514e-07, "loss": 0.4135, "step": 77500 }, { "epoch": 1.9673693695980505, "grad_norm": 0.353515625, "learning_rate": 2.4300921665421634e-07, "loss": 0.4121, "step": 77505 }, { "epoch": 1.9674962876470663, "grad_norm": 0.376953125, "learning_rate": 2.4112227940693007e-07, "loss": 0.4258, "step": 77510 }, { "epoch": 1.967623205696082, "grad_norm": 0.369140625, "learning_rate": 2.392426907681322e-07, "loss": 0.4206, "step": 77515 }, { "epoch": 1.9677501237450978, "grad_norm": 0.34375, "learning_rate": 2.373704508300489e-07, "loss": 0.4492, "step": 77520 }, { "epoch": 1.9678770417941136, "grad_norm": 0.373046875, "learning_rate": 2.3550555968452344e-07, "loss": 0.407, "step": 77525 }, { "epoch": 1.9680039598431294, "grad_norm": 0.35546875, "learning_rate": 2.336480174230826e-07, "loss": 0.409, "step": 77530 }, { "epoch": 1.9681308778921451, "grad_norm": 0.345703125, "learning_rate": 2.3179782413688674e-07, "loss": 0.4161, "step": 77535 }, { "epoch": 1.968257795941161, "grad_norm": 0.3671875, "learning_rate": 2.299549799167133e-07, "loss": 0.4388, "step": 77540 }, { "epoch": 1.9683847139901767, "grad_norm": 0.30859375, "learning_rate": 2.2811948485297326e-07, "loss": 0.3857, "step": 77545 }, { "epoch": 1.9685116320391924, "grad_norm": 0.365234375, "learning_rate": 2.2629133903576124e-07, "loss": 0.4399, "step": 77550 }, { "epoch": 1.9686385500882082, "grad_norm": 0.345703125, "learning_rate": 2.2447054255477215e-07, "loss": 0.3919, "step": 77555 }, { "epoch": 1.9687654681372238, "grad_norm": 0.3671875, "learning_rate": 2.2265709549935118e-07, "loss": 0.4421, "step": 77560 }, { "epoch": 1.9688923861862395, "grad_norm": 0.33984375, "learning_rate": 2.2085099795847716e-07, "loss": 0.3986, "step": 77565 }, { "epoch": 1.9690193042352553, "grad_norm": 0.353515625, "learning_rate": 2.1905225002079585e-07, "loss": 0.3951, "step": 77570 }, { "epoch": 1.969146222284271, "grad_norm": 0.341796875, "learning_rate": 2.1726085177455334e-07, "loss": 0.4018, "step": 77575 }, { "epoch": 1.9692731403332868, "grad_norm": 0.375, "learning_rate": 2.1547680330764593e-07, "loss": 0.4175, "step": 77580 }, { "epoch": 1.9694000583823026, "grad_norm": 0.33984375, "learning_rate": 2.13700104707637e-07, "loss": 0.4047, "step": 77585 }, { "epoch": 1.9695269764313181, "grad_norm": 0.3203125, "learning_rate": 2.1193075606169008e-07, "loss": 0.4102, "step": 77590 }, { "epoch": 1.969653894480334, "grad_norm": 0.35546875, "learning_rate": 2.1016875745663575e-07, "loss": 0.4311, "step": 77595 }, { "epoch": 1.9697808125293497, "grad_norm": 0.359375, "learning_rate": 2.0841410897893818e-07, "loss": 0.4266, "step": 77600 }, { "epoch": 1.9699077305783654, "grad_norm": 0.341796875, "learning_rate": 2.0666681071469515e-07, "loss": 0.3932, "step": 77605 }, { "epoch": 1.9700346486273812, "grad_norm": 0.3671875, "learning_rate": 2.049268627496381e-07, "loss": 0.4229, "step": 77610 }, { "epoch": 1.970161566676397, "grad_norm": 0.359375, "learning_rate": 2.0319426516914872e-07, "loss": 0.4013, "step": 77615 }, { "epoch": 1.9702884847254127, "grad_norm": 0.36328125, "learning_rate": 2.0146901805824233e-07, "loss": 0.4144, "step": 77620 }, { "epoch": 1.9704154027744285, "grad_norm": 0.31640625, "learning_rate": 1.9975112150158458e-07, "loss": 0.399, "step": 77625 }, { "epoch": 1.9705423208234443, "grad_norm": 0.3515625, "learning_rate": 1.9804057558347464e-07, "loss": 0.4002, "step": 77630 }, { "epoch": 1.97066923887246, "grad_norm": 0.330078125, "learning_rate": 1.9633738038782875e-07, "loss": 0.3971, "step": 77635 }, { "epoch": 1.9707961569214758, "grad_norm": 0.333984375, "learning_rate": 1.9464153599824674e-07, "loss": 0.3855, "step": 77640 }, { "epoch": 1.9709230749704916, "grad_norm": 0.3515625, "learning_rate": 1.92953042497912e-07, "loss": 0.4251, "step": 77645 }, { "epoch": 1.9710499930195073, "grad_norm": 0.341796875, "learning_rate": 1.9127189996972493e-07, "loss": 0.3909, "step": 77650 }, { "epoch": 1.971176911068523, "grad_norm": 0.345703125, "learning_rate": 1.895981084961362e-07, "loss": 0.4363, "step": 77655 }, { "epoch": 1.9713038291175389, "grad_norm": 0.341796875, "learning_rate": 1.8793166815928018e-07, "loss": 0.42, "step": 77660 }, { "epoch": 1.9714307471665546, "grad_norm": 0.375, "learning_rate": 1.8627257904095804e-07, "loss": 0.4027, "step": 77665 }, { "epoch": 1.9715576652155704, "grad_norm": 0.3359375, "learning_rate": 1.846208412225547e-07, "loss": 0.3941, "step": 77670 }, { "epoch": 1.9716845832645862, "grad_norm": 0.33203125, "learning_rate": 1.8297645478513868e-07, "loss": 0.4226, "step": 77675 }, { "epoch": 1.971811501313602, "grad_norm": 0.34375, "learning_rate": 1.8133941980937872e-07, "loss": 0.4325, "step": 77680 }, { "epoch": 1.9719384193626177, "grad_norm": 0.35546875, "learning_rate": 1.7970973637562725e-07, "loss": 0.4201, "step": 77685 }, { "epoch": 1.9720653374116333, "grad_norm": 0.3359375, "learning_rate": 1.7808740456383697e-07, "loss": 0.4039, "step": 77690 }, { "epoch": 1.972192255460649, "grad_norm": 0.3515625, "learning_rate": 1.7647242445361086e-07, "loss": 0.4218, "step": 77695 }, { "epoch": 1.9723191735096648, "grad_norm": 0.2890625, "learning_rate": 1.7486479612420224e-07, "loss": 0.3872, "step": 77700 }, { "epoch": 1.9724460915586806, "grad_norm": 0.33203125, "learning_rate": 1.7326451965449795e-07, "loss": 0.414, "step": 77705 }, { "epoch": 1.9725730096076963, "grad_norm": 0.36328125, "learning_rate": 1.7167159512301853e-07, "loss": 0.4055, "step": 77710 }, { "epoch": 1.972699927656712, "grad_norm": 0.32421875, "learning_rate": 1.700860226079348e-07, "loss": 0.3986, "step": 77715 }, { "epoch": 1.9728268457057279, "grad_norm": 0.373046875, "learning_rate": 1.6850780218705117e-07, "loss": 0.4139, "step": 77720 }, { "epoch": 1.9729537637547434, "grad_norm": 0.33203125, "learning_rate": 1.669369339378057e-07, "loss": 0.4052, "step": 77725 }, { "epoch": 1.9730806818037592, "grad_norm": 0.33984375, "learning_rate": 1.653734179372701e-07, "loss": 0.4271, "step": 77730 }, { "epoch": 1.973207599852775, "grad_norm": 0.36328125, "learning_rate": 1.6381725426218294e-07, "loss": 0.4358, "step": 77735 }, { "epoch": 1.9733345179017907, "grad_norm": 0.359375, "learning_rate": 1.6226844298889984e-07, "loss": 0.4196, "step": 77740 }, { "epoch": 1.9734614359508065, "grad_norm": 0.353515625, "learning_rate": 1.6072698419341002e-07, "loss": 0.4137, "step": 77745 }, { "epoch": 1.9735883539998222, "grad_norm": 0.330078125, "learning_rate": 1.5919287795135293e-07, "loss": 0.415, "step": 77750 }, { "epoch": 1.973715272048838, "grad_norm": 0.349609375, "learning_rate": 1.5766612433803505e-07, "loss": 0.4312, "step": 77755 }, { "epoch": 1.9738421900978538, "grad_norm": 0.34375, "learning_rate": 1.561467234283298e-07, "loss": 0.4036, "step": 77760 }, { "epoch": 1.9739691081468695, "grad_norm": 0.33984375, "learning_rate": 1.546346752968275e-07, "loss": 0.394, "step": 77765 }, { "epoch": 1.9740960261958853, "grad_norm": 0.365234375, "learning_rate": 1.5312998001771882e-07, "loss": 0.4194, "step": 77770 }, { "epoch": 1.974222944244901, "grad_norm": 0.373046875, "learning_rate": 1.5163263766482803e-07, "loss": 0.4413, "step": 77775 }, { "epoch": 1.9743498622939168, "grad_norm": 0.373046875, "learning_rate": 1.501426483116297e-07, "loss": 0.4336, "step": 77780 }, { "epoch": 1.9744767803429326, "grad_norm": 0.33203125, "learning_rate": 1.48660012031232e-07, "loss": 0.403, "step": 77785 }, { "epoch": 1.9746036983919484, "grad_norm": 0.328125, "learning_rate": 1.471847288964101e-07, "loss": 0.3891, "step": 77790 }, { "epoch": 1.9747306164409641, "grad_norm": 0.36328125, "learning_rate": 1.4571679897953936e-07, "loss": 0.416, "step": 77795 }, { "epoch": 1.97485753448998, "grad_norm": 0.36328125, "learning_rate": 1.4425622235264555e-07, "loss": 0.3912, "step": 77800 }, { "epoch": 1.9749844525389957, "grad_norm": 0.3359375, "learning_rate": 1.4280299908740468e-07, "loss": 0.3981, "step": 77805 }, { "epoch": 1.9751113705880115, "grad_norm": 0.34765625, "learning_rate": 1.4135712925512633e-07, "loss": 0.4025, "step": 77810 }, { "epoch": 1.9752382886370272, "grad_norm": 0.3515625, "learning_rate": 1.3991861292677042e-07, "loss": 0.3946, "step": 77815 }, { "epoch": 1.975365206686043, "grad_norm": 0.369140625, "learning_rate": 1.384874501728972e-07, "loss": 0.4188, "step": 77820 }, { "epoch": 1.9754921247350585, "grad_norm": 0.333984375, "learning_rate": 1.3706364106375046e-07, "loss": 0.3867, "step": 77825 }, { "epoch": 1.9756190427840743, "grad_norm": 0.32421875, "learning_rate": 1.3564718566920762e-07, "loss": 0.4126, "step": 77830 }, { "epoch": 1.97574596083309, "grad_norm": 0.35546875, "learning_rate": 1.3423808405874647e-07, "loss": 0.4054, "step": 77835 }, { "epoch": 1.9758728788821058, "grad_norm": 0.357421875, "learning_rate": 1.3283633630152834e-07, "loss": 0.4444, "step": 77840 }, { "epoch": 1.9759997969311216, "grad_norm": 0.369140625, "learning_rate": 1.314419424663482e-07, "loss": 0.4242, "step": 77845 }, { "epoch": 1.9761267149801374, "grad_norm": 0.34375, "learning_rate": 1.3005490262160135e-07, "loss": 0.3975, "step": 77850 }, { "epoch": 1.976253633029153, "grad_norm": 0.33984375, "learning_rate": 1.2867521683534998e-07, "loss": 0.4331, "step": 77855 }, { "epoch": 1.9763805510781687, "grad_norm": 0.359375, "learning_rate": 1.2730288517532327e-07, "loss": 0.4201, "step": 77860 }, { "epoch": 1.9765074691271844, "grad_norm": 0.341796875, "learning_rate": 1.2593790770883405e-07, "loss": 0.4246, "step": 77865 }, { "epoch": 1.9766343871762002, "grad_norm": 0.349609375, "learning_rate": 1.2458028450286207e-07, "loss": 0.4144, "step": 77870 }, { "epoch": 1.976761305225216, "grad_norm": 0.37109375, "learning_rate": 1.2323001562405399e-07, "loss": 0.4066, "step": 77875 }, { "epoch": 1.9768882232742317, "grad_norm": 0.34765625, "learning_rate": 1.218871011386402e-07, "loss": 0.3992, "step": 77880 }, { "epoch": 1.9770151413232475, "grad_norm": 0.36328125, "learning_rate": 1.2055154111251796e-07, "loss": 0.4082, "step": 77885 }, { "epoch": 1.9771420593722633, "grad_norm": 0.375, "learning_rate": 1.1922333561123486e-07, "loss": 0.4641, "step": 77890 }, { "epoch": 1.977268977421279, "grad_norm": 0.318359375, "learning_rate": 1.1790248469995544e-07, "loss": 0.3908, "step": 77895 }, { "epoch": 1.9773958954702948, "grad_norm": 0.318359375, "learning_rate": 1.1658898844349451e-07, "loss": 0.4102, "step": 77900 }, { "epoch": 1.9775228135193106, "grad_norm": 0.37109375, "learning_rate": 1.1528284690631718e-07, "loss": 0.44, "step": 77905 }, { "epoch": 1.9776497315683264, "grad_norm": 0.357421875, "learning_rate": 1.1398406015248884e-07, "loss": 0.4398, "step": 77910 }, { "epoch": 1.9777766496173421, "grad_norm": 0.291015625, "learning_rate": 1.1269262824577519e-07, "loss": 0.4199, "step": 77915 }, { "epoch": 1.9779035676663579, "grad_norm": 0.349609375, "learning_rate": 1.1140855124950887e-07, "loss": 0.4031, "step": 77920 }, { "epoch": 1.9780304857153737, "grad_norm": 0.33984375, "learning_rate": 1.1013182922673947e-07, "loss": 0.4157, "step": 77925 }, { "epoch": 1.9781574037643894, "grad_norm": 0.330078125, "learning_rate": 1.0886246224008354e-07, "loss": 0.409, "step": 77930 }, { "epoch": 1.9782843218134052, "grad_norm": 0.359375, "learning_rate": 1.0760045035184129e-07, "loss": 0.4108, "step": 77935 }, { "epoch": 1.978411239862421, "grad_norm": 0.3125, "learning_rate": 1.0634579362392981e-07, "loss": 0.3835, "step": 77940 }, { "epoch": 1.9785381579114367, "grad_norm": 0.365234375, "learning_rate": 1.0509849211791655e-07, "loss": 0.4206, "step": 77945 }, { "epoch": 1.9786650759604525, "grad_norm": 0.35546875, "learning_rate": 1.0385854589501918e-07, "loss": 0.4116, "step": 77950 }, { "epoch": 1.978791994009468, "grad_norm": 0.365234375, "learning_rate": 1.026259550160724e-07, "loss": 0.4581, "step": 77955 }, { "epoch": 1.9789189120584838, "grad_norm": 0.35546875, "learning_rate": 1.0140071954156115e-07, "loss": 0.4069, "step": 77960 }, { "epoch": 1.9790458301074996, "grad_norm": 0.365234375, "learning_rate": 1.0018283953162066e-07, "loss": 0.4191, "step": 77965 }, { "epoch": 1.9791727481565153, "grad_norm": 0.359375, "learning_rate": 9.897231504596981e-08, "loss": 0.3965, "step": 77970 }, { "epoch": 1.979299666205531, "grad_norm": 0.349609375, "learning_rate": 9.776914614406107e-08, "loss": 0.4344, "step": 77975 }, { "epoch": 1.9794265842545469, "grad_norm": 0.359375, "learning_rate": 9.657333288489721e-08, "loss": 0.407, "step": 77980 }, { "epoch": 1.9795535023035626, "grad_norm": 0.349609375, "learning_rate": 9.538487532716465e-08, "loss": 0.4112, "step": 77985 }, { "epoch": 1.9796804203525782, "grad_norm": 0.333984375, "learning_rate": 9.42037735291834e-08, "loss": 0.4238, "step": 77990 }, { "epoch": 1.979807338401594, "grad_norm": 0.380859375, "learning_rate": 9.303002754890709e-08, "loss": 0.4382, "step": 77995 }, { "epoch": 1.9799342564506097, "grad_norm": 0.337890625, "learning_rate": 9.186363744393966e-08, "loss": 0.3905, "step": 78000 }, { "epoch": 1.9800611744996255, "grad_norm": 0.333984375, "learning_rate": 9.070460327150198e-08, "loss": 0.4079, "step": 78005 }, { "epoch": 1.9801880925486413, "grad_norm": 0.357421875, "learning_rate": 8.955292508848188e-08, "loss": 0.3999, "step": 78010 }, { "epoch": 1.980315010597657, "grad_norm": 0.3671875, "learning_rate": 8.840860295136753e-08, "loss": 0.4113, "step": 78015 }, { "epoch": 1.9804419286466728, "grad_norm": 0.330078125, "learning_rate": 8.727163691634731e-08, "loss": 0.4067, "step": 78020 }, { "epoch": 1.9805688466956886, "grad_norm": 0.353515625, "learning_rate": 8.614202703917661e-08, "loss": 0.4038, "step": 78025 }, { "epoch": 1.9806957647447043, "grad_norm": 0.330078125, "learning_rate": 8.501977337531107e-08, "loss": 0.3907, "step": 78030 }, { "epoch": 1.98082268279372, "grad_norm": 0.341796875, "learning_rate": 8.390487597980666e-08, "loss": 0.4224, "step": 78035 }, { "epoch": 1.9809496008427359, "grad_norm": 0.349609375, "learning_rate": 8.279733490736962e-08, "loss": 0.4273, "step": 78040 }, { "epoch": 1.9810765188917516, "grad_norm": 0.333984375, "learning_rate": 8.169715021235645e-08, "loss": 0.4256, "step": 78045 }, { "epoch": 1.9812034369407674, "grad_norm": 0.353515625, "learning_rate": 8.060432194874066e-08, "loss": 0.4125, "step": 78050 }, { "epoch": 1.9813303549897832, "grad_norm": 0.369140625, "learning_rate": 7.951885017014603e-08, "loss": 0.3984, "step": 78055 }, { "epoch": 1.981457273038799, "grad_norm": 0.373046875, "learning_rate": 7.84407349298466e-08, "loss": 0.4467, "step": 78060 }, { "epoch": 1.9815841910878147, "grad_norm": 0.333984375, "learning_rate": 7.736997628073338e-08, "loss": 0.4215, "step": 78065 }, { "epoch": 1.9817111091368305, "grad_norm": 0.3515625, "learning_rate": 7.630657427536435e-08, "loss": 0.4374, "step": 78070 }, { "epoch": 1.9818380271858462, "grad_norm": 0.34375, "learning_rate": 7.525052896589778e-08, "loss": 0.4141, "step": 78075 }, { "epoch": 1.981964945234862, "grad_norm": 0.353515625, "learning_rate": 7.420184040419219e-08, "loss": 0.4011, "step": 78080 }, { "epoch": 1.9820918632838778, "grad_norm": 0.35546875, "learning_rate": 7.316050864165646e-08, "loss": 0.4054, "step": 78085 }, { "epoch": 1.9822187813328933, "grad_norm": 0.3359375, "learning_rate": 7.212653372941634e-08, "loss": 0.3956, "step": 78090 }, { "epoch": 1.982345699381909, "grad_norm": 0.33203125, "learning_rate": 7.10999157182146e-08, "loss": 0.4053, "step": 78095 }, { "epoch": 1.9824726174309248, "grad_norm": 0.34375, "learning_rate": 7.008065465841095e-08, "loss": 0.4301, "step": 78100 }, { "epoch": 1.9825995354799406, "grad_norm": 0.36328125, "learning_rate": 6.906875060003203e-08, "loss": 0.4039, "step": 78105 }, { "epoch": 1.9827264535289564, "grad_norm": 0.328125, "learning_rate": 6.806420359272147e-08, "loss": 0.4164, "step": 78110 }, { "epoch": 1.9828533715779721, "grad_norm": 0.400390625, "learning_rate": 6.706701368577317e-08, "loss": 0.4427, "step": 78115 }, { "epoch": 1.9829802896269877, "grad_norm": 0.330078125, "learning_rate": 6.60771809281313e-08, "loss": 0.4083, "step": 78120 }, { "epoch": 1.9831072076760035, "grad_norm": 0.328125, "learning_rate": 6.509470536835703e-08, "loss": 0.3787, "step": 78125 }, { "epoch": 1.9832341257250192, "grad_norm": 0.365234375, "learning_rate": 6.411958705464514e-08, "loss": 0.4465, "step": 78130 }, { "epoch": 1.983361043774035, "grad_norm": 0.3515625, "learning_rate": 6.315182603487401e-08, "loss": 0.4119, "step": 78135 }, { "epoch": 1.9834879618230508, "grad_norm": 0.376953125, "learning_rate": 6.219142235650565e-08, "loss": 0.4344, "step": 78140 }, { "epoch": 1.9836148798720665, "grad_norm": 0.333984375, "learning_rate": 6.12383760666857e-08, "loss": 0.4204, "step": 78145 }, { "epoch": 1.9837417979210823, "grad_norm": 0.33203125, "learning_rate": 6.029268721217672e-08, "loss": 0.4156, "step": 78150 }, { "epoch": 1.983868715970098, "grad_norm": 0.361328125, "learning_rate": 5.9354355839374976e-08, "loss": 0.4147, "step": 78155 }, { "epoch": 1.9839956340191138, "grad_norm": 0.298828125, "learning_rate": 5.842338199431029e-08, "loss": 0.3728, "step": 78160 }, { "epoch": 1.9841225520681296, "grad_norm": 0.390625, "learning_rate": 5.749976572271276e-08, "loss": 0.4274, "step": 78165 }, { "epoch": 1.9842494701171454, "grad_norm": 0.341796875, "learning_rate": 5.6583507069846156e-08, "loss": 0.4329, "step": 78170 }, { "epoch": 1.9843763881661611, "grad_norm": 0.3515625, "learning_rate": 5.567460608070784e-08, "loss": 0.4284, "step": 78175 }, { "epoch": 1.984503306215177, "grad_norm": 0.36328125, "learning_rate": 5.477306279989546e-08, "loss": 0.4239, "step": 78180 }, { "epoch": 1.9846302242641927, "grad_norm": 0.3671875, "learning_rate": 5.387887727164031e-08, "loss": 0.4385, "step": 78185 }, { "epoch": 1.9847571423132084, "grad_norm": 0.34375, "learning_rate": 5.299204953980729e-08, "loss": 0.3899, "step": 78190 }, { "epoch": 1.9848840603622242, "grad_norm": 0.341796875, "learning_rate": 5.211257964792826e-08, "loss": 0.4176, "step": 78195 }, { "epoch": 1.98501097841124, "grad_norm": 0.345703125, "learning_rate": 5.124046763915202e-08, "loss": 0.3936, "step": 78200 }, { "epoch": 1.9851378964602557, "grad_norm": 0.349609375, "learning_rate": 5.0375713556277677e-08, "loss": 0.4441, "step": 78205 }, { "epoch": 1.9852648145092715, "grad_norm": 0.32421875, "learning_rate": 4.951831744173795e-08, "loss": 0.3975, "step": 78210 }, { "epoch": 1.9853917325582873, "grad_norm": 0.330078125, "learning_rate": 4.8668279337599204e-08, "loss": 0.4024, "step": 78215 }, { "epoch": 1.9855186506073028, "grad_norm": 0.3359375, "learning_rate": 4.7825599285578054e-08, "loss": 0.3993, "step": 78220 }, { "epoch": 1.9856455686563186, "grad_norm": 0.353515625, "learning_rate": 4.699027732702476e-08, "loss": 0.4046, "step": 78225 }, { "epoch": 1.9857724867053343, "grad_norm": 0.3046875, "learning_rate": 4.6162313502939865e-08, "loss": 0.4085, "step": 78230 }, { "epoch": 1.9858994047543501, "grad_norm": 0.33203125, "learning_rate": 4.5341707853924215e-08, "loss": 0.3798, "step": 78235 }, { "epoch": 1.9860263228033659, "grad_norm": 0.369140625, "learning_rate": 4.4528460420245604e-08, "loss": 0.4211, "step": 78240 }, { "epoch": 1.9861532408523817, "grad_norm": 0.314453125, "learning_rate": 4.372257124183876e-08, "loss": 0.4039, "step": 78245 }, { "epoch": 1.9862801589013974, "grad_norm": 0.3125, "learning_rate": 4.292404035822205e-08, "loss": 0.3834, "step": 78250 }, { "epoch": 1.986407076950413, "grad_norm": 0.365234375, "learning_rate": 4.213286780859748e-08, "loss": 0.4146, "step": 78255 }, { "epoch": 1.9865339949994287, "grad_norm": 0.373046875, "learning_rate": 4.134905363176733e-08, "loss": 0.3781, "step": 78260 }, { "epoch": 1.9866609130484445, "grad_norm": 0.32421875, "learning_rate": 4.057259786621747e-08, "loss": 0.4027, "step": 78265 }, { "epoch": 1.9867878310974603, "grad_norm": 0.328125, "learning_rate": 3.9803500550017466e-08, "loss": 0.3999, "step": 78270 }, { "epoch": 1.986914749146476, "grad_norm": 0.3671875, "learning_rate": 3.904176172093709e-08, "loss": 0.4157, "step": 78275 }, { "epoch": 1.9870416671954918, "grad_norm": 0.369140625, "learning_rate": 3.828738141634646e-08, "loss": 0.4191, "step": 78280 }, { "epoch": 1.9871685852445076, "grad_norm": 0.34375, "learning_rate": 3.754035967324931e-08, "loss": 0.3995, "step": 78285 }, { "epoch": 1.9872955032935233, "grad_norm": 0.291015625, "learning_rate": 3.6800696528316296e-08, "loss": 0.4138, "step": 78290 }, { "epoch": 1.987422421342539, "grad_norm": 0.35546875, "learning_rate": 3.606839201783507e-08, "loss": 0.3868, "step": 78295 }, { "epoch": 1.9875493393915549, "grad_norm": 0.361328125, "learning_rate": 3.534344617774354e-08, "loss": 0.4142, "step": 78300 }, { "epoch": 1.9876762574405706, "grad_norm": 0.359375, "learning_rate": 3.4625859043613256e-08, "loss": 0.4158, "step": 78305 }, { "epoch": 1.9878031754895864, "grad_norm": 0.33203125, "learning_rate": 3.3915630650649396e-08, "loss": 0.4056, "step": 78310 }, { "epoch": 1.9879300935386022, "grad_norm": 0.34765625, "learning_rate": 3.32127610337074e-08, "loss": 0.4336, "step": 78315 }, { "epoch": 1.988057011587618, "grad_norm": 0.310546875, "learning_rate": 3.2517250227276356e-08, "loss": 0.3957, "step": 78320 }, { "epoch": 1.9881839296366337, "grad_norm": 0.341796875, "learning_rate": 3.182909826547897e-08, "loss": 0.4103, "step": 78325 }, { "epoch": 1.9883108476856495, "grad_norm": 0.3359375, "learning_rate": 3.1148305182104874e-08, "loss": 0.4178, "step": 78330 }, { "epoch": 1.9884377657346652, "grad_norm": 0.369140625, "learning_rate": 3.047487101054402e-08, "loss": 0.4009, "step": 78335 }, { "epoch": 1.988564683783681, "grad_norm": 0.3203125, "learning_rate": 2.9808795783836656e-08, "loss": 0.3856, "step": 78340 }, { "epoch": 1.9886916018326968, "grad_norm": 0.35546875, "learning_rate": 2.9150079534656644e-08, "loss": 0.4301, "step": 78345 }, { "epoch": 1.9888185198817125, "grad_norm": 0.353515625, "learning_rate": 2.849872229536143e-08, "loss": 0.427, "step": 78350 }, { "epoch": 1.988945437930728, "grad_norm": 0.341796875, "learning_rate": 2.7854724097892135e-08, "loss": 0.432, "step": 78355 }, { "epoch": 1.9890723559797439, "grad_norm": 0.353515625, "learning_rate": 2.72180849738568e-08, "loss": 0.4291, "step": 78360 }, { "epoch": 1.9891992740287596, "grad_norm": 0.32421875, "learning_rate": 2.6588804954480457e-08, "loss": 0.4142, "step": 78365 }, { "epoch": 1.9893261920777754, "grad_norm": 0.337890625, "learning_rate": 2.5966884070655058e-08, "loss": 0.4123, "step": 78370 }, { "epoch": 1.9894531101267912, "grad_norm": 0.357421875, "learning_rate": 2.535232235288953e-08, "loss": 0.4054, "step": 78375 }, { "epoch": 1.989580028175807, "grad_norm": 0.396484375, "learning_rate": 2.4745119831343084e-08, "loss": 0.4674, "step": 78380 }, { "epoch": 1.9897069462248225, "grad_norm": 0.36328125, "learning_rate": 2.4145276535825208e-08, "loss": 0.4034, "step": 78385 }, { "epoch": 1.9898338642738382, "grad_norm": 0.341796875, "learning_rate": 2.355279249574571e-08, "loss": 0.4344, "step": 78390 }, { "epoch": 1.989960782322854, "grad_norm": 0.330078125, "learning_rate": 2.2967667740181326e-08, "loss": 0.4015, "step": 78395 }, { "epoch": 1.9900877003718698, "grad_norm": 0.359375, "learning_rate": 2.2389902297875738e-08, "loss": 0.4087, "step": 78400 }, { "epoch": 1.9902146184208855, "grad_norm": 0.361328125, "learning_rate": 2.181949619713963e-08, "loss": 0.4086, "step": 78405 }, { "epoch": 1.9903415364699013, "grad_norm": 0.3671875, "learning_rate": 2.1256449465967273e-08, "loss": 0.4157, "step": 78410 }, { "epoch": 1.990468454518917, "grad_norm": 0.3515625, "learning_rate": 2.0700762132019877e-08, "loss": 0.4247, "step": 78415 }, { "epoch": 1.9905953725679328, "grad_norm": 0.35546875, "learning_rate": 2.0152434222542313e-08, "loss": 0.4192, "step": 78420 }, { "epoch": 1.9907222906169486, "grad_norm": 0.349609375, "learning_rate": 1.9611465764429734e-08, "loss": 0.4123, "step": 78425 }, { "epoch": 1.9908492086659644, "grad_norm": 0.341796875, "learning_rate": 1.907785678424423e-08, "loss": 0.4247, "step": 78430 }, { "epoch": 1.9909761267149801, "grad_norm": 0.361328125, "learning_rate": 1.855160730816485e-08, "loss": 0.4454, "step": 78435 }, { "epoch": 1.991103044763996, "grad_norm": 0.306640625, "learning_rate": 1.803271736200429e-08, "loss": 0.3921, "step": 78440 }, { "epoch": 1.9912299628130117, "grad_norm": 0.36328125, "learning_rate": 1.752118697124216e-08, "loss": 0.4251, "step": 78445 }, { "epoch": 1.9913568808620274, "grad_norm": 0.365234375, "learning_rate": 1.7017016160975063e-08, "loss": 0.4254, "step": 78450 }, { "epoch": 1.9914837989110432, "grad_norm": 0.365234375, "learning_rate": 1.6520204955933204e-08, "loss": 0.4287, "step": 78455 }, { "epoch": 1.991610716960059, "grad_norm": 0.375, "learning_rate": 1.603075338051374e-08, "loss": 0.4403, "step": 78460 }, { "epoch": 1.9917376350090747, "grad_norm": 0.375, "learning_rate": 1.5548661458697488e-08, "loss": 0.447, "step": 78465 }, { "epoch": 1.9918645530580905, "grad_norm": 0.3671875, "learning_rate": 1.5073929214182158e-08, "loss": 0.4402, "step": 78470 }, { "epoch": 1.9919914711071063, "grad_norm": 0.345703125, "learning_rate": 1.4606556670232472e-08, "loss": 0.415, "step": 78475 }, { "epoch": 1.992118389156122, "grad_norm": 0.32421875, "learning_rate": 1.4146543849813397e-08, "loss": 0.4093, "step": 78480 }, { "epoch": 1.9922453072051376, "grad_norm": 0.353515625, "learning_rate": 1.3693890775456906e-08, "loss": 0.4334, "step": 78485 }, { "epoch": 1.9923722252541534, "grad_norm": 0.33984375, "learning_rate": 1.3248597469395216e-08, "loss": 0.4319, "step": 78490 }, { "epoch": 1.9924991433031691, "grad_norm": 0.333984375, "learning_rate": 1.281066395349417e-08, "loss": 0.3976, "step": 78495 }, { "epoch": 1.992626061352185, "grad_norm": 0.35546875, "learning_rate": 1.2380090249219931e-08, "loss": 0.4265, "step": 78500 }, { "epoch": 1.9927529794012007, "grad_norm": 0.376953125, "learning_rate": 1.1956876377705593e-08, "loss": 0.4344, "step": 78505 }, { "epoch": 1.9928798974502164, "grad_norm": 0.349609375, "learning_rate": 1.1541022359717877e-08, "loss": 0.4242, "step": 78510 }, { "epoch": 1.9930068154992322, "grad_norm": 0.373046875, "learning_rate": 1.1132528215673787e-08, "loss": 0.4028, "step": 78515 }, { "epoch": 1.9931337335482477, "grad_norm": 0.328125, "learning_rate": 1.0731393965607293e-08, "loss": 0.4016, "step": 78520 }, { "epoch": 1.9932606515972635, "grad_norm": 0.373046875, "learning_rate": 1.0337619629202653e-08, "loss": 0.449, "step": 78525 }, { "epoch": 1.9933875696462793, "grad_norm": 0.3515625, "learning_rate": 9.951205225794402e-09, "loss": 0.418, "step": 78530 }, { "epoch": 1.993514487695295, "grad_norm": 0.3515625, "learning_rate": 9.572150774317388e-09, "loss": 0.4206, "step": 78535 }, { "epoch": 1.9936414057443108, "grad_norm": 0.35546875, "learning_rate": 9.20045629340671e-09, "loss": 0.4064, "step": 78540 }, { "epoch": 1.9937683237933266, "grad_norm": 0.369140625, "learning_rate": 8.836121801264473e-09, "loss": 0.4638, "step": 78545 }, { "epoch": 1.9938952418423423, "grad_norm": 0.353515625, "learning_rate": 8.479147315793022e-09, "loss": 0.4069, "step": 78550 }, { "epoch": 1.9940221598913581, "grad_norm": 0.34765625, "learning_rate": 8.129532854495025e-09, "loss": 0.4271, "step": 78555 }, { "epoch": 1.9941490779403739, "grad_norm": 0.369140625, "learning_rate": 7.787278434540078e-09, "loss": 0.4577, "step": 78560 }, { "epoch": 1.9942759959893896, "grad_norm": 0.345703125, "learning_rate": 7.452384072714757e-09, "loss": 0.4297, "step": 78565 }, { "epoch": 1.9944029140384054, "grad_norm": 0.349609375, "learning_rate": 7.124849785455911e-09, "loss": 0.3718, "step": 78570 }, { "epoch": 1.9945298320874212, "grad_norm": 0.33984375, "learning_rate": 6.804675588834019e-09, "loss": 0.4158, "step": 78575 }, { "epoch": 1.994656750136437, "grad_norm": 0.3359375, "learning_rate": 6.491861498553186e-09, "loss": 0.4303, "step": 78580 }, { "epoch": 1.9947836681854527, "grad_norm": 0.3359375, "learning_rate": 6.1864075299677965e-09, "loss": 0.3934, "step": 78585 }, { "epoch": 1.9949105862344685, "grad_norm": 0.369140625, "learning_rate": 5.888313698065861e-09, "loss": 0.3965, "step": 78590 }, { "epoch": 1.9950375042834843, "grad_norm": 0.330078125, "learning_rate": 5.5975800174690164e-09, "loss": 0.4003, "step": 78595 }, { "epoch": 1.9951644223325, "grad_norm": 0.31640625, "learning_rate": 5.314206502465835e-09, "loss": 0.3892, "step": 78600 }, { "epoch": 1.9952913403815158, "grad_norm": 0.359375, "learning_rate": 5.03819316692855e-09, "loss": 0.4288, "step": 78605 }, { "epoch": 1.9954182584305316, "grad_norm": 0.3671875, "learning_rate": 4.769540024429641e-09, "loss": 0.4103, "step": 78610 }, { "epoch": 1.9955451764795473, "grad_norm": 0.3671875, "learning_rate": 4.5082470881419034e-09, "loss": 0.4203, "step": 78615 }, { "epoch": 1.9956720945285629, "grad_norm": 0.359375, "learning_rate": 4.254314370888412e-09, "loss": 0.4369, "step": 78620 }, { "epoch": 1.9957990125775786, "grad_norm": 0.365234375, "learning_rate": 4.007741885109217e-09, "loss": 0.3946, "step": 78625 }, { "epoch": 1.9959259306265944, "grad_norm": 0.34765625, "learning_rate": 3.7685296429279535e-09, "loss": 0.4093, "step": 78630 }, { "epoch": 1.9960528486756102, "grad_norm": 0.376953125, "learning_rate": 3.5366776560852294e-09, "loss": 0.4506, "step": 78635 }, { "epoch": 1.996179766724626, "grad_norm": 0.345703125, "learning_rate": 3.312185935938627e-09, "loss": 0.4029, "step": 78640 }, { "epoch": 1.9963066847736417, "grad_norm": 0.33203125, "learning_rate": 3.095054493512661e-09, "loss": 0.3843, "step": 78645 }, { "epoch": 1.9964336028226572, "grad_norm": 0.384765625, "learning_rate": 2.885283339465472e-09, "loss": 0.4475, "step": 78650 }, { "epoch": 1.996560520871673, "grad_norm": 0.310546875, "learning_rate": 2.682872484088827e-09, "loss": 0.4161, "step": 78655 }, { "epoch": 1.9966874389206888, "grad_norm": 0.369140625, "learning_rate": 2.4878219373081208e-09, "loss": 0.4066, "step": 78660 }, { "epoch": 1.9968143569697046, "grad_norm": 0.337890625, "learning_rate": 2.300131708699027e-09, "loss": 0.4165, "step": 78665 }, { "epoch": 1.9969412750187203, "grad_norm": 0.36328125, "learning_rate": 2.1198018074874977e-09, "loss": 0.4112, "step": 78670 }, { "epoch": 1.997068193067736, "grad_norm": 0.369140625, "learning_rate": 1.946832242483154e-09, "loss": 0.4169, "step": 78675 }, { "epoch": 1.9971951111167519, "grad_norm": 0.345703125, "learning_rate": 1.781223022212508e-09, "loss": 0.4398, "step": 78680 }, { "epoch": 1.9973220291657676, "grad_norm": 0.35546875, "learning_rate": 1.622974154785739e-09, "loss": 0.4206, "step": 78685 }, { "epoch": 1.9974489472147834, "grad_norm": 0.333984375, "learning_rate": 1.4720856479633059e-09, "loss": 0.4338, "step": 78690 }, { "epoch": 1.9975758652637992, "grad_norm": 0.3515625, "learning_rate": 1.3285575091726009e-09, "loss": 0.4258, "step": 78695 }, { "epoch": 1.997702783312815, "grad_norm": 0.345703125, "learning_rate": 1.1923897454246822e-09, "loss": 0.3976, "step": 78700 }, { "epoch": 1.9978297013618307, "grad_norm": 0.3203125, "learning_rate": 1.063582363414195e-09, "loss": 0.4139, "step": 78705 }, { "epoch": 1.9979566194108465, "grad_norm": 0.365234375, "learning_rate": 9.4213536946941e-10, "loss": 0.4297, "step": 78710 }, { "epoch": 1.9980835374598622, "grad_norm": 0.322265625, "learning_rate": 8.28048769535572e-10, "loss": 0.4281, "step": 78715 }, { "epoch": 1.998210455508878, "grad_norm": 0.337890625, "learning_rate": 7.213225692248581e-10, "loss": 0.3828, "step": 78720 }, { "epoch": 1.9983373735578938, "grad_norm": 0.369140625, "learning_rate": 6.219567737664188e-10, "loss": 0.412, "step": 78725 }, { "epoch": 1.9984642916069095, "grad_norm": 0.3515625, "learning_rate": 5.299513880396844e-10, "loss": 0.4466, "step": 78730 }, { "epoch": 1.9985912096559253, "grad_norm": 0.361328125, "learning_rate": 4.4530641654105805e-10, "loss": 0.428, "step": 78735 }, { "epoch": 1.998718127704941, "grad_norm": 0.3515625, "learning_rate": 3.6802186346718275e-10, "loss": 0.4106, "step": 78740 }, { "epoch": 1.9988450457539568, "grad_norm": 0.341796875, "learning_rate": 2.980977325650613e-10, "loss": 0.3995, "step": 78745 }, { "epoch": 1.9989719638029724, "grad_norm": 0.3515625, "learning_rate": 2.355340272819362e-10, "loss": 0.4326, "step": 78750 }, { "epoch": 1.9990988818519881, "grad_norm": 0.349609375, "learning_rate": 1.8033075069867618e-10, "loss": 0.4056, "step": 78755 }, { "epoch": 1.999225799901004, "grad_norm": 0.330078125, "learning_rate": 1.3248790551312337e-10, "loss": 0.405, "step": 78760 }, { "epoch": 1.9993527179500197, "grad_norm": 0.302734375, "learning_rate": 9.200549407339942e-11, "loss": 0.4023, "step": 78765 }, { "epoch": 1.9994796359990354, "grad_norm": 0.375, "learning_rate": 5.888351837790573e-11, "loss": 0.4285, "step": 78770 }, { "epoch": 1.9996065540480512, "grad_norm": 0.3828125, "learning_rate": 3.312198004201683e-11, "loss": 0.4198, "step": 78775 }, { "epoch": 1.999733472097067, "grad_norm": 0.326171875, "learning_rate": 1.4720880314733618e-11, "loss": 0.4101, "step": 78780 }, { "epoch": 1.9998603901460825, "grad_norm": 0.3671875, "learning_rate": 3.6802201286434405e-12, "loss": 0.4206, "step": 78785 }, { "epoch": 1.9999873081950983, "grad_norm": 0.341796875, "learning_rate": 0.0, "loss": 0.454, "step": 78790 }, { "epoch": 1.9999873081950983, "eval_loss": 0.44442203640937805, "eval_runtime": 33.1271, "eval_samples_per_second": 4.347, "eval_steps_per_second": 4.347, "step": 78790 }, { "epoch": 1.9999873081950983, "step": 78790, "total_flos": 2.495114461908566e+19, "train_loss": 0.07288063374066779, "train_runtime": 50123.8574, "train_samples_per_second": 6.288, "train_steps_per_second": 1.572 } ], "logging_steps": 5, "max_steps": 78790, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.495114461908566e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }