C3QG / trainer_state.json
PhunvVi's picture
Upload folder using huggingface_hub
c70b193 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 48870,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003069367710251688,
"grad_norm": 6.029524326324463,
"learning_rate": 2.9969920196439535e-05,
"loss": 2.626,
"step": 50
},
{
"epoch": 0.006138735420503376,
"grad_norm": 4.651051044464111,
"learning_rate": 2.993922651933702e-05,
"loss": 2.5542,
"step": 100
},
{
"epoch": 0.009208103130755065,
"grad_norm": 4.611798286437988,
"learning_rate": 2.99085328422345e-05,
"loss": 2.4749,
"step": 150
},
{
"epoch": 0.012277470841006752,
"grad_norm": 5.144523620605469,
"learning_rate": 2.9877839165131984e-05,
"loss": 2.4706,
"step": 200
},
{
"epoch": 0.015346838551258441,
"grad_norm": 5.4743242263793945,
"learning_rate": 2.9847145488029468e-05,
"loss": 2.5232,
"step": 250
},
{
"epoch": 0.01841620626151013,
"grad_norm": 4.009519100189209,
"learning_rate": 2.981645181092695e-05,
"loss": 2.4524,
"step": 300
},
{
"epoch": 0.021485573971761818,
"grad_norm": 4.539732456207275,
"learning_rate": 2.9785758133824433e-05,
"loss": 2.4725,
"step": 350
},
{
"epoch": 0.024554941682013505,
"grad_norm": 3.896458148956299,
"learning_rate": 2.9755064456721917e-05,
"loss": 2.4763,
"step": 400
},
{
"epoch": 0.027624309392265192,
"grad_norm": 4.5362982749938965,
"learning_rate": 2.9724370779619398e-05,
"loss": 2.4475,
"step": 450
},
{
"epoch": 0.030693677102516883,
"grad_norm": 3.903672456741333,
"learning_rate": 2.9693677102516883e-05,
"loss": 2.4884,
"step": 500
},
{
"epoch": 0.03376304481276857,
"grad_norm": 3.7690269947052,
"learning_rate": 2.9662983425414363e-05,
"loss": 2.4525,
"step": 550
},
{
"epoch": 0.03683241252302026,
"grad_norm": 3.7247776985168457,
"learning_rate": 2.9632289748311848e-05,
"loss": 2.4778,
"step": 600
},
{
"epoch": 0.03990178023327195,
"grad_norm": 5.1837310791015625,
"learning_rate": 2.9601596071209332e-05,
"loss": 2.4917,
"step": 650
},
{
"epoch": 0.042971147943523635,
"grad_norm": 4.13453483581543,
"learning_rate": 2.9570902394106813e-05,
"loss": 2.4869,
"step": 700
},
{
"epoch": 0.04604051565377532,
"grad_norm": 3.7461624145507812,
"learning_rate": 2.9540208717004297e-05,
"loss": 2.4408,
"step": 750
},
{
"epoch": 0.04910988336402701,
"grad_norm": 3.429506778717041,
"learning_rate": 2.950951503990178e-05,
"loss": 2.442,
"step": 800
},
{
"epoch": 0.0521792510742787,
"grad_norm": 4.001476287841797,
"learning_rate": 2.9478821362799262e-05,
"loss": 2.3645,
"step": 850
},
{
"epoch": 0.055248618784530384,
"grad_norm": 4.0728960037231445,
"learning_rate": 2.9448127685696746e-05,
"loss": 2.5016,
"step": 900
},
{
"epoch": 0.05831798649478207,
"grad_norm": 4.214133262634277,
"learning_rate": 2.941743400859423e-05,
"loss": 2.4142,
"step": 950
},
{
"epoch": 0.061387354205033766,
"grad_norm": 4.556704521179199,
"learning_rate": 2.938674033149171e-05,
"loss": 2.5025,
"step": 1000
},
{
"epoch": 0.06445672191528545,
"grad_norm": 4.376175403594971,
"learning_rate": 2.9356046654389195e-05,
"loss": 2.3906,
"step": 1050
},
{
"epoch": 0.06752608962553713,
"grad_norm": 4.843928813934326,
"learning_rate": 2.932535297728668e-05,
"loss": 2.4598,
"step": 1100
},
{
"epoch": 0.07059545733578883,
"grad_norm": 4.809575080871582,
"learning_rate": 2.9294659300184164e-05,
"loss": 2.4699,
"step": 1150
},
{
"epoch": 0.07366482504604052,
"grad_norm": 4.783190727233887,
"learning_rate": 2.9263965623081648e-05,
"loss": 2.4256,
"step": 1200
},
{
"epoch": 0.0767341927562922,
"grad_norm": 3.410017728805542,
"learning_rate": 2.923327194597913e-05,
"loss": 2.469,
"step": 1250
},
{
"epoch": 0.0798035604665439,
"grad_norm": 4.03816556930542,
"learning_rate": 2.9202578268876613e-05,
"loss": 2.3954,
"step": 1300
},
{
"epoch": 0.08287292817679558,
"grad_norm": 4.535765171051025,
"learning_rate": 2.9171884591774097e-05,
"loss": 2.3857,
"step": 1350
},
{
"epoch": 0.08594229588704727,
"grad_norm": 3.941899061203003,
"learning_rate": 2.9141190914671578e-05,
"loss": 2.4259,
"step": 1400
},
{
"epoch": 0.08901166359729895,
"grad_norm": 3.957204818725586,
"learning_rate": 2.9110497237569062e-05,
"loss": 2.4487,
"step": 1450
},
{
"epoch": 0.09208103130755065,
"grad_norm": 4.369974136352539,
"learning_rate": 2.9079803560466547e-05,
"loss": 2.4166,
"step": 1500
},
{
"epoch": 0.09515039901780234,
"grad_norm": 3.611785650253296,
"learning_rate": 2.9049109883364027e-05,
"loss": 2.4958,
"step": 1550
},
{
"epoch": 0.09821976672805402,
"grad_norm": 5.152332305908203,
"learning_rate": 2.901841620626151e-05,
"loss": 2.4116,
"step": 1600
},
{
"epoch": 0.10128913443830571,
"grad_norm": 3.69728684425354,
"learning_rate": 2.8987722529158996e-05,
"loss": 2.3373,
"step": 1650
},
{
"epoch": 0.1043585021485574,
"grad_norm": 4.104907512664795,
"learning_rate": 2.8957028852056477e-05,
"loss": 2.3961,
"step": 1700
},
{
"epoch": 0.10742786985880909,
"grad_norm": 4.160801887512207,
"learning_rate": 2.892633517495396e-05,
"loss": 2.3749,
"step": 1750
},
{
"epoch": 0.11049723756906077,
"grad_norm": 4.3206329345703125,
"learning_rate": 2.8895641497851445e-05,
"loss": 2.3582,
"step": 1800
},
{
"epoch": 0.11356660527931246,
"grad_norm": 5.133695125579834,
"learning_rate": 2.8864947820748926e-05,
"loss": 2.4267,
"step": 1850
},
{
"epoch": 0.11663597298956414,
"grad_norm": 3.392789125442505,
"learning_rate": 2.883425414364641e-05,
"loss": 2.4299,
"step": 1900
},
{
"epoch": 0.11970534069981584,
"grad_norm": 4.9408135414123535,
"learning_rate": 2.880356046654389e-05,
"loss": 2.3965,
"step": 1950
},
{
"epoch": 0.12277470841006753,
"grad_norm": 4.085551738739014,
"learning_rate": 2.8772866789441375e-05,
"loss": 2.4148,
"step": 2000
},
{
"epoch": 0.12584407612031923,
"grad_norm": 4.733358860015869,
"learning_rate": 2.874217311233886e-05,
"loss": 2.4431,
"step": 2050
},
{
"epoch": 0.1289134438305709,
"grad_norm": 3.3507819175720215,
"learning_rate": 2.871147943523634e-05,
"loss": 2.3127,
"step": 2100
},
{
"epoch": 0.1319828115408226,
"grad_norm": 5.074892520904541,
"learning_rate": 2.8680785758133825e-05,
"loss": 2.2961,
"step": 2150
},
{
"epoch": 0.13505217925107427,
"grad_norm": 4.0248332023620605,
"learning_rate": 2.865009208103131e-05,
"loss": 2.4545,
"step": 2200
},
{
"epoch": 0.13812154696132597,
"grad_norm": 3.959451675415039,
"learning_rate": 2.861939840392879e-05,
"loss": 2.4582,
"step": 2250
},
{
"epoch": 0.14119091467157766,
"grad_norm": 5.6295294761657715,
"learning_rate": 2.8588704726826274e-05,
"loss": 2.4184,
"step": 2300
},
{
"epoch": 0.14426028238182934,
"grad_norm": 4.008995056152344,
"learning_rate": 2.8558011049723758e-05,
"loss": 2.436,
"step": 2350
},
{
"epoch": 0.14732965009208104,
"grad_norm": 4.01780891418457,
"learning_rate": 2.852731737262124e-05,
"loss": 2.4243,
"step": 2400
},
{
"epoch": 0.15039901780233272,
"grad_norm": 3.839801549911499,
"learning_rate": 2.8496623695518723e-05,
"loss": 2.4003,
"step": 2450
},
{
"epoch": 0.1534683855125844,
"grad_norm": 3.6963717937469482,
"learning_rate": 2.8465930018416207e-05,
"loss": 2.4496,
"step": 2500
},
{
"epoch": 0.15653775322283608,
"grad_norm": 4.555826187133789,
"learning_rate": 2.8435236341313688e-05,
"loss": 2.4101,
"step": 2550
},
{
"epoch": 0.1596071209330878,
"grad_norm": 3.507671356201172,
"learning_rate": 2.8404542664211172e-05,
"loss": 2.4258,
"step": 2600
},
{
"epoch": 0.16267648864333947,
"grad_norm": 4.644598007202148,
"learning_rate": 2.8373848987108653e-05,
"loss": 2.3937,
"step": 2650
},
{
"epoch": 0.16574585635359115,
"grad_norm": 3.737030506134033,
"learning_rate": 2.834315531000614e-05,
"loss": 2.3961,
"step": 2700
},
{
"epoch": 0.16881522406384286,
"grad_norm": 3.4786527156829834,
"learning_rate": 2.8312461632903625e-05,
"loss": 2.4414,
"step": 2750
},
{
"epoch": 0.17188459177409454,
"grad_norm": 3.9619481563568115,
"learning_rate": 2.8281767955801106e-05,
"loss": 2.4952,
"step": 2800
},
{
"epoch": 0.17495395948434622,
"grad_norm": 4.628708362579346,
"learning_rate": 2.825107427869859e-05,
"loss": 2.4284,
"step": 2850
},
{
"epoch": 0.1780233271945979,
"grad_norm": 3.561638593673706,
"learning_rate": 2.8220380601596074e-05,
"loss": 2.3983,
"step": 2900
},
{
"epoch": 0.1810926949048496,
"grad_norm": 4.126139163970947,
"learning_rate": 2.8189686924493555e-05,
"loss": 2.3642,
"step": 2950
},
{
"epoch": 0.1841620626151013,
"grad_norm": 3.888535737991333,
"learning_rate": 2.815899324739104e-05,
"loss": 2.2939,
"step": 3000
},
{
"epoch": 0.18723143032535297,
"grad_norm": 4.639522552490234,
"learning_rate": 2.8128299570288524e-05,
"loss": 2.5008,
"step": 3050
},
{
"epoch": 0.19030079803560468,
"grad_norm": 4.336818695068359,
"learning_rate": 2.8097605893186004e-05,
"loss": 2.3622,
"step": 3100
},
{
"epoch": 0.19337016574585636,
"grad_norm": 3.351541519165039,
"learning_rate": 2.806691221608349e-05,
"loss": 2.3638,
"step": 3150
},
{
"epoch": 0.19643953345610804,
"grad_norm": 7.246761798858643,
"learning_rate": 2.8036218538980973e-05,
"loss": 2.4054,
"step": 3200
},
{
"epoch": 0.19950890116635972,
"grad_norm": 3.76177978515625,
"learning_rate": 2.8005524861878454e-05,
"loss": 2.4991,
"step": 3250
},
{
"epoch": 0.20257826887661143,
"grad_norm": 3.489014148712158,
"learning_rate": 2.7974831184775938e-05,
"loss": 2.3624,
"step": 3300
},
{
"epoch": 0.2056476365868631,
"grad_norm": 3.3915023803710938,
"learning_rate": 2.794413750767342e-05,
"loss": 2.3862,
"step": 3350
},
{
"epoch": 0.2087170042971148,
"grad_norm": 3.8997626304626465,
"learning_rate": 2.7913443830570903e-05,
"loss": 2.2968,
"step": 3400
},
{
"epoch": 0.21178637200736647,
"grad_norm": 3.1932547092437744,
"learning_rate": 2.7882750153468387e-05,
"loss": 2.3592,
"step": 3450
},
{
"epoch": 0.21485573971761818,
"grad_norm": 4.888554096221924,
"learning_rate": 2.7852056476365868e-05,
"loss": 2.4128,
"step": 3500
},
{
"epoch": 0.21792510742786986,
"grad_norm": 4.414622783660889,
"learning_rate": 2.7821362799263352e-05,
"loss": 2.4198,
"step": 3550
},
{
"epoch": 0.22099447513812154,
"grad_norm": 3.756305456161499,
"learning_rate": 2.7790669122160836e-05,
"loss": 2.3664,
"step": 3600
},
{
"epoch": 0.22406384284837325,
"grad_norm": 4.073141098022461,
"learning_rate": 2.7759975445058317e-05,
"loss": 2.3876,
"step": 3650
},
{
"epoch": 0.22713321055862493,
"grad_norm": 2.9590511322021484,
"learning_rate": 2.77292817679558e-05,
"loss": 2.3702,
"step": 3700
},
{
"epoch": 0.2302025782688766,
"grad_norm": 3.9760727882385254,
"learning_rate": 2.7698588090853286e-05,
"loss": 2.3944,
"step": 3750
},
{
"epoch": 0.2332719459791283,
"grad_norm": 3.8550660610198975,
"learning_rate": 2.7667894413750767e-05,
"loss": 2.3543,
"step": 3800
},
{
"epoch": 0.23634131368938,
"grad_norm": 5.260376930236816,
"learning_rate": 2.763720073664825e-05,
"loss": 2.3058,
"step": 3850
},
{
"epoch": 0.23941068139963168,
"grad_norm": 3.408364772796631,
"learning_rate": 2.7606507059545735e-05,
"loss": 2.4173,
"step": 3900
},
{
"epoch": 0.24248004910988336,
"grad_norm": 3.482743263244629,
"learning_rate": 2.7575813382443216e-05,
"loss": 2.3997,
"step": 3950
},
{
"epoch": 0.24554941682013506,
"grad_norm": 3.7070858478546143,
"learning_rate": 2.75451197053407e-05,
"loss": 2.4266,
"step": 4000
},
{
"epoch": 0.24861878453038674,
"grad_norm": 4.024835109710693,
"learning_rate": 2.751442602823818e-05,
"loss": 2.3201,
"step": 4050
},
{
"epoch": 0.25168815224063845,
"grad_norm": 4.50393533706665,
"learning_rate": 2.7483732351135665e-05,
"loss": 2.3541,
"step": 4100
},
{
"epoch": 0.25475751995089013,
"grad_norm": 4.806800365447998,
"learning_rate": 2.745303867403315e-05,
"loss": 2.4102,
"step": 4150
},
{
"epoch": 0.2578268876611418,
"grad_norm": 3.5676867961883545,
"learning_rate": 2.742234499693063e-05,
"loss": 2.2263,
"step": 4200
},
{
"epoch": 0.2608962553713935,
"grad_norm": 3.5269808769226074,
"learning_rate": 2.7391651319828114e-05,
"loss": 2.2747,
"step": 4250
},
{
"epoch": 0.2639656230816452,
"grad_norm": 4.45748233795166,
"learning_rate": 2.73609576427256e-05,
"loss": 2.3761,
"step": 4300
},
{
"epoch": 0.26703499079189685,
"grad_norm": 4.212084770202637,
"learning_rate": 2.7330263965623083e-05,
"loss": 2.4948,
"step": 4350
},
{
"epoch": 0.27010435850214853,
"grad_norm": 4.344183444976807,
"learning_rate": 2.7299570288520567e-05,
"loss": 2.4308,
"step": 4400
},
{
"epoch": 0.27317372621240027,
"grad_norm": 3.9603850841522217,
"learning_rate": 2.726887661141805e-05,
"loss": 2.4254,
"step": 4450
},
{
"epoch": 0.27624309392265195,
"grad_norm": 4.453806400299072,
"learning_rate": 2.7238182934315532e-05,
"loss": 2.4717,
"step": 4500
},
{
"epoch": 0.27931246163290363,
"grad_norm": 3.924269437789917,
"learning_rate": 2.7207489257213016e-05,
"loss": 2.3845,
"step": 4550
},
{
"epoch": 0.2823818293431553,
"grad_norm": 4.238902568817139,
"learning_rate": 2.71767955801105e-05,
"loss": 2.4194,
"step": 4600
},
{
"epoch": 0.285451197053407,
"grad_norm": 3.7095208168029785,
"learning_rate": 2.714610190300798e-05,
"loss": 2.3036,
"step": 4650
},
{
"epoch": 0.28852056476365867,
"grad_norm": 3.765899419784546,
"learning_rate": 2.7115408225905466e-05,
"loss": 2.4129,
"step": 4700
},
{
"epoch": 0.29158993247391035,
"grad_norm": 4.793846130371094,
"learning_rate": 2.7084714548802946e-05,
"loss": 2.485,
"step": 4750
},
{
"epoch": 0.2946593001841621,
"grad_norm": 3.294372797012329,
"learning_rate": 2.705402087170043e-05,
"loss": 2.3896,
"step": 4800
},
{
"epoch": 0.29772866789441377,
"grad_norm": 4.122358798980713,
"learning_rate": 2.7023327194597915e-05,
"loss": 2.3746,
"step": 4850
},
{
"epoch": 0.30079803560466545,
"grad_norm": 3.1560487747192383,
"learning_rate": 2.6992633517495396e-05,
"loss": 2.346,
"step": 4900
},
{
"epoch": 0.30386740331491713,
"grad_norm": 3.7786178588867188,
"learning_rate": 2.696193984039288e-05,
"loss": 2.3669,
"step": 4950
},
{
"epoch": 0.3069367710251688,
"grad_norm": 3.4450085163116455,
"learning_rate": 2.6931246163290364e-05,
"loss": 2.3475,
"step": 5000
},
{
"epoch": 0.3100061387354205,
"grad_norm": 4.380395889282227,
"learning_rate": 2.6900552486187845e-05,
"loss": 2.3755,
"step": 5050
},
{
"epoch": 0.31307550644567217,
"grad_norm": 4.272715091705322,
"learning_rate": 2.686985880908533e-05,
"loss": 2.3517,
"step": 5100
},
{
"epoch": 0.3161448741559239,
"grad_norm": 5.976320266723633,
"learning_rate": 2.6839165131982813e-05,
"loss": 2.3665,
"step": 5150
},
{
"epoch": 0.3192142418661756,
"grad_norm": 4.1683759689331055,
"learning_rate": 2.6808471454880294e-05,
"loss": 2.2517,
"step": 5200
},
{
"epoch": 0.32228360957642727,
"grad_norm": 3.623004674911499,
"learning_rate": 2.677777777777778e-05,
"loss": 2.4741,
"step": 5250
},
{
"epoch": 0.32535297728667895,
"grad_norm": 4.282279968261719,
"learning_rate": 2.6747084100675263e-05,
"loss": 2.3191,
"step": 5300
},
{
"epoch": 0.3284223449969306,
"grad_norm": 3.761108636856079,
"learning_rate": 2.6716390423572744e-05,
"loss": 2.4249,
"step": 5350
},
{
"epoch": 0.3314917127071823,
"grad_norm": 3.7316195964813232,
"learning_rate": 2.6685696746470228e-05,
"loss": 2.3679,
"step": 5400
},
{
"epoch": 0.334561080417434,
"grad_norm": 3.8297908306121826,
"learning_rate": 2.665500306936771e-05,
"loss": 2.4157,
"step": 5450
},
{
"epoch": 0.3376304481276857,
"grad_norm": 4.17257833480835,
"learning_rate": 2.6624309392265193e-05,
"loss": 2.2362,
"step": 5500
},
{
"epoch": 0.3406998158379374,
"grad_norm": 4.0827484130859375,
"learning_rate": 2.6593615715162677e-05,
"loss": 2.3369,
"step": 5550
},
{
"epoch": 0.3437691835481891,
"grad_norm": 3.3072879314422607,
"learning_rate": 2.6562922038060158e-05,
"loss": 2.3797,
"step": 5600
},
{
"epoch": 0.34683855125844076,
"grad_norm": 3.316751480102539,
"learning_rate": 2.6532228360957642e-05,
"loss": 2.4562,
"step": 5650
},
{
"epoch": 0.34990791896869244,
"grad_norm": 4.482247829437256,
"learning_rate": 2.6501534683855126e-05,
"loss": 2.373,
"step": 5700
},
{
"epoch": 0.3529772866789441,
"grad_norm": 4.056297779083252,
"learning_rate": 2.6470841006752607e-05,
"loss": 2.4294,
"step": 5750
},
{
"epoch": 0.3560466543891958,
"grad_norm": 3.596730947494507,
"learning_rate": 2.644014732965009e-05,
"loss": 2.3219,
"step": 5800
},
{
"epoch": 0.35911602209944754,
"grad_norm": 4.089284896850586,
"learning_rate": 2.6409453652547576e-05,
"loss": 2.4088,
"step": 5850
},
{
"epoch": 0.3621853898096992,
"grad_norm": 4.409155368804932,
"learning_rate": 2.6378759975445056e-05,
"loss": 2.3561,
"step": 5900
},
{
"epoch": 0.3652547575199509,
"grad_norm": 5.397696018218994,
"learning_rate": 2.6348066298342544e-05,
"loss": 2.3602,
"step": 5950
},
{
"epoch": 0.3683241252302026,
"grad_norm": 3.62880277633667,
"learning_rate": 2.6317372621240028e-05,
"loss": 2.4074,
"step": 6000
},
{
"epoch": 0.37139349294045426,
"grad_norm": 3.9512007236480713,
"learning_rate": 2.628667894413751e-05,
"loss": 2.3754,
"step": 6050
},
{
"epoch": 0.37446286065070594,
"grad_norm": 4.806766033172607,
"learning_rate": 2.6255985267034993e-05,
"loss": 2.362,
"step": 6100
},
{
"epoch": 0.3775322283609576,
"grad_norm": 4.055029392242432,
"learning_rate": 2.6225291589932474e-05,
"loss": 2.3769,
"step": 6150
},
{
"epoch": 0.38060159607120936,
"grad_norm": 4.231038570404053,
"learning_rate": 2.619459791282996e-05,
"loss": 2.4561,
"step": 6200
},
{
"epoch": 0.38367096378146104,
"grad_norm": 3.5260846614837646,
"learning_rate": 2.6163904235727443e-05,
"loss": 2.4241,
"step": 6250
},
{
"epoch": 0.3867403314917127,
"grad_norm": 4.557247161865234,
"learning_rate": 2.6133210558624923e-05,
"loss": 2.3357,
"step": 6300
},
{
"epoch": 0.3898096992019644,
"grad_norm": 3.3867931365966797,
"learning_rate": 2.6102516881522408e-05,
"loss": 2.2559,
"step": 6350
},
{
"epoch": 0.3928790669122161,
"grad_norm": 4.224850177764893,
"learning_rate": 2.6071823204419892e-05,
"loss": 2.3338,
"step": 6400
},
{
"epoch": 0.39594843462246776,
"grad_norm": 4.095287322998047,
"learning_rate": 2.6041129527317373e-05,
"loss": 2.3384,
"step": 6450
},
{
"epoch": 0.39901780233271944,
"grad_norm": 2.982825517654419,
"learning_rate": 2.6010435850214857e-05,
"loss": 2.2956,
"step": 6500
},
{
"epoch": 0.4020871700429711,
"grad_norm": 4.3587164878845215,
"learning_rate": 2.597974217311234e-05,
"loss": 2.313,
"step": 6550
},
{
"epoch": 0.40515653775322286,
"grad_norm": 4.225688457489014,
"learning_rate": 2.5949048496009822e-05,
"loss": 2.3302,
"step": 6600
},
{
"epoch": 0.40822590546347454,
"grad_norm": 3.7242987155914307,
"learning_rate": 2.5918354818907306e-05,
"loss": 2.3699,
"step": 6650
},
{
"epoch": 0.4112952731737262,
"grad_norm": 4.286329746246338,
"learning_rate": 2.588766114180479e-05,
"loss": 2.3835,
"step": 6700
},
{
"epoch": 0.4143646408839779,
"grad_norm": 3.054641008377075,
"learning_rate": 2.585696746470227e-05,
"loss": 2.3717,
"step": 6750
},
{
"epoch": 0.4174340085942296,
"grad_norm": 3.183530330657959,
"learning_rate": 2.5826273787599756e-05,
"loss": 2.4187,
"step": 6800
},
{
"epoch": 0.42050337630448126,
"grad_norm": 3.300554037094116,
"learning_rate": 2.5795580110497236e-05,
"loss": 2.381,
"step": 6850
},
{
"epoch": 0.42357274401473294,
"grad_norm": 3.742980718612671,
"learning_rate": 2.576488643339472e-05,
"loss": 2.3156,
"step": 6900
},
{
"epoch": 0.4266421117249847,
"grad_norm": 4.5531182289123535,
"learning_rate": 2.5734192756292205e-05,
"loss": 2.2977,
"step": 6950
},
{
"epoch": 0.42971147943523635,
"grad_norm": 4.084106922149658,
"learning_rate": 2.5703499079189686e-05,
"loss": 2.3399,
"step": 7000
},
{
"epoch": 0.43278084714548803,
"grad_norm": 3.3674798011779785,
"learning_rate": 2.567280540208717e-05,
"loss": 2.4205,
"step": 7050
},
{
"epoch": 0.4358502148557397,
"grad_norm": 3.401578664779663,
"learning_rate": 2.5642111724984654e-05,
"loss": 2.3318,
"step": 7100
},
{
"epoch": 0.4389195825659914,
"grad_norm": 3.7515037059783936,
"learning_rate": 2.5611418047882135e-05,
"loss": 2.3514,
"step": 7150
},
{
"epoch": 0.4419889502762431,
"grad_norm": 3.1685400009155273,
"learning_rate": 2.558072437077962e-05,
"loss": 2.3682,
"step": 7200
},
{
"epoch": 0.44505831798649476,
"grad_norm": 3.990283489227295,
"learning_rate": 2.5550030693677103e-05,
"loss": 2.2508,
"step": 7250
},
{
"epoch": 0.4481276856967465,
"grad_norm": 4.544641971588135,
"learning_rate": 2.5519337016574584e-05,
"loss": 2.3624,
"step": 7300
},
{
"epoch": 0.45119705340699817,
"grad_norm": 3.986804962158203,
"learning_rate": 2.548864333947207e-05,
"loss": 2.384,
"step": 7350
},
{
"epoch": 0.45426642111724985,
"grad_norm": 4.338369846343994,
"learning_rate": 2.5457949662369553e-05,
"loss": 2.3311,
"step": 7400
},
{
"epoch": 0.45733578882750153,
"grad_norm": 3.8103485107421875,
"learning_rate": 2.5427255985267033e-05,
"loss": 2.2405,
"step": 7450
},
{
"epoch": 0.4604051565377532,
"grad_norm": 3.461311101913452,
"learning_rate": 2.5396562308164518e-05,
"loss": 2.3843,
"step": 7500
},
{
"epoch": 0.4634745242480049,
"grad_norm": 3.6048476696014404,
"learning_rate": 2.5365868631062002e-05,
"loss": 2.395,
"step": 7550
},
{
"epoch": 0.4665438919582566,
"grad_norm": 3.4888088703155518,
"learning_rate": 2.5335174953959486e-05,
"loss": 2.3449,
"step": 7600
},
{
"epoch": 0.4696132596685083,
"grad_norm": 3.9866726398468018,
"learning_rate": 2.530448127685697e-05,
"loss": 2.3806,
"step": 7650
},
{
"epoch": 0.47268262737876,
"grad_norm": 3.5875093936920166,
"learning_rate": 2.527378759975445e-05,
"loss": 2.32,
"step": 7700
},
{
"epoch": 0.47575199508901167,
"grad_norm": 3.5421364307403564,
"learning_rate": 2.5243093922651935e-05,
"loss": 2.4109,
"step": 7750
},
{
"epoch": 0.47882136279926335,
"grad_norm": 3.4700534343719482,
"learning_rate": 2.521240024554942e-05,
"loss": 2.38,
"step": 7800
},
{
"epoch": 0.48189073050951503,
"grad_norm": 3.879786729812622,
"learning_rate": 2.51817065684469e-05,
"loss": 2.389,
"step": 7850
},
{
"epoch": 0.4849600982197667,
"grad_norm": 4.261646747589111,
"learning_rate": 2.5151012891344385e-05,
"loss": 2.3983,
"step": 7900
},
{
"epoch": 0.4880294659300184,
"grad_norm": 3.6179709434509277,
"learning_rate": 2.512031921424187e-05,
"loss": 2.3239,
"step": 7950
},
{
"epoch": 0.4910988336402701,
"grad_norm": 3.9921975135803223,
"learning_rate": 2.508962553713935e-05,
"loss": 2.3513,
"step": 8000
},
{
"epoch": 0.4941682013505218,
"grad_norm": 3.7387430667877197,
"learning_rate": 2.5058931860036834e-05,
"loss": 2.3613,
"step": 8050
},
{
"epoch": 0.4972375690607735,
"grad_norm": 5.033226013183594,
"learning_rate": 2.5028238182934318e-05,
"loss": 2.3512,
"step": 8100
},
{
"epoch": 0.5003069367710252,
"grad_norm": 3.8006532192230225,
"learning_rate": 2.49975445058318e-05,
"loss": 2.4073,
"step": 8150
},
{
"epoch": 0.5033763044812769,
"grad_norm": 3.866370916366577,
"learning_rate": 2.4966850828729283e-05,
"loss": 2.2788,
"step": 8200
},
{
"epoch": 0.5064456721915286,
"grad_norm": 3.6110663414001465,
"learning_rate": 2.4936157151626764e-05,
"loss": 2.3755,
"step": 8250
},
{
"epoch": 0.5095150399017803,
"grad_norm": 3.55604887008667,
"learning_rate": 2.4905463474524248e-05,
"loss": 2.3332,
"step": 8300
},
{
"epoch": 0.512584407612032,
"grad_norm": 3.8975794315338135,
"learning_rate": 2.4874769797421732e-05,
"loss": 2.3691,
"step": 8350
},
{
"epoch": 0.5156537753222836,
"grad_norm": 3.8415000438690186,
"learning_rate": 2.4844076120319213e-05,
"loss": 2.3481,
"step": 8400
},
{
"epoch": 0.5187231430325353,
"grad_norm": 2.9400956630706787,
"learning_rate": 2.4813382443216698e-05,
"loss": 2.3491,
"step": 8450
},
{
"epoch": 0.521792510742787,
"grad_norm": 3.419921875,
"learning_rate": 2.4782688766114182e-05,
"loss": 2.2612,
"step": 8500
},
{
"epoch": 0.5248618784530387,
"grad_norm": 3.066887617111206,
"learning_rate": 2.4751995089011663e-05,
"loss": 2.3013,
"step": 8550
},
{
"epoch": 0.5279312461632903,
"grad_norm": 3.7565064430236816,
"learning_rate": 2.4721301411909147e-05,
"loss": 2.3407,
"step": 8600
},
{
"epoch": 0.531000613873542,
"grad_norm": 3.6040759086608887,
"learning_rate": 2.469060773480663e-05,
"loss": 2.3678,
"step": 8650
},
{
"epoch": 0.5340699815837937,
"grad_norm": 3.4393301010131836,
"learning_rate": 2.4659914057704112e-05,
"loss": 2.321,
"step": 8700
},
{
"epoch": 0.5371393492940454,
"grad_norm": 4.549371242523193,
"learning_rate": 2.4629220380601596e-05,
"loss": 2.235,
"step": 8750
},
{
"epoch": 0.5402087170042971,
"grad_norm": 3.2161850929260254,
"learning_rate": 2.459852670349908e-05,
"loss": 2.4156,
"step": 8800
},
{
"epoch": 0.5432780847145487,
"grad_norm": 4.914154052734375,
"learning_rate": 2.456783302639656e-05,
"loss": 2.3127,
"step": 8850
},
{
"epoch": 0.5463474524248005,
"grad_norm": 2.948514699935913,
"learning_rate": 2.4537139349294045e-05,
"loss": 2.3924,
"step": 8900
},
{
"epoch": 0.5494168201350522,
"grad_norm": 3.669295072555542,
"learning_rate": 2.4506445672191526e-05,
"loss": 2.3639,
"step": 8950
},
{
"epoch": 0.5524861878453039,
"grad_norm": 4.5282793045043945,
"learning_rate": 2.447575199508901e-05,
"loss": 2.2916,
"step": 9000
},
{
"epoch": 0.5555555555555556,
"grad_norm": 3.6461524963378906,
"learning_rate": 2.4445058317986495e-05,
"loss": 2.4551,
"step": 9050
},
{
"epoch": 0.5586249232658073,
"grad_norm": 3.558283567428589,
"learning_rate": 2.4414364640883975e-05,
"loss": 2.3189,
"step": 9100
},
{
"epoch": 0.5616942909760589,
"grad_norm": 3.31160569190979,
"learning_rate": 2.4383670963781463e-05,
"loss": 2.2667,
"step": 9150
},
{
"epoch": 0.5647636586863106,
"grad_norm": 4.318973541259766,
"learning_rate": 2.4352977286678947e-05,
"loss": 2.3747,
"step": 9200
},
{
"epoch": 0.5678330263965623,
"grad_norm": 3.0922465324401855,
"learning_rate": 2.4322283609576428e-05,
"loss": 2.2472,
"step": 9250
},
{
"epoch": 0.570902394106814,
"grad_norm": 3.615382432937622,
"learning_rate": 2.4291589932473912e-05,
"loss": 2.3784,
"step": 9300
},
{
"epoch": 0.5739717618170657,
"grad_norm": 4.567912578582764,
"learning_rate": 2.4260896255371397e-05,
"loss": 2.3389,
"step": 9350
},
{
"epoch": 0.5770411295273173,
"grad_norm": 6.014679431915283,
"learning_rate": 2.4230202578268877e-05,
"loss": 2.2728,
"step": 9400
},
{
"epoch": 0.580110497237569,
"grad_norm": 3.3985538482666016,
"learning_rate": 2.419950890116636e-05,
"loss": 2.3619,
"step": 9450
},
{
"epoch": 0.5831798649478207,
"grad_norm": 3.6719815731048584,
"learning_rate": 2.4168815224063846e-05,
"loss": 2.3639,
"step": 9500
},
{
"epoch": 0.5862492326580724,
"grad_norm": 4.365039348602295,
"learning_rate": 2.4138121546961327e-05,
"loss": 2.3359,
"step": 9550
},
{
"epoch": 0.5893186003683242,
"grad_norm": 4.179307460784912,
"learning_rate": 2.410742786985881e-05,
"loss": 2.3482,
"step": 9600
},
{
"epoch": 0.5923879680785759,
"grad_norm": 3.4102518558502197,
"learning_rate": 2.4076734192756292e-05,
"loss": 2.3535,
"step": 9650
},
{
"epoch": 0.5954573357888275,
"grad_norm": 4.294723033905029,
"learning_rate": 2.4046040515653776e-05,
"loss": 2.3686,
"step": 9700
},
{
"epoch": 0.5985267034990792,
"grad_norm": 4.105884552001953,
"learning_rate": 2.401534683855126e-05,
"loss": 2.3453,
"step": 9750
},
{
"epoch": 0.6015960712093309,
"grad_norm": 3.321399450302124,
"learning_rate": 2.398465316144874e-05,
"loss": 2.4262,
"step": 9800
},
{
"epoch": 0.6046654389195826,
"grad_norm": 4.505974292755127,
"learning_rate": 2.3953959484346225e-05,
"loss": 2.3449,
"step": 9850
},
{
"epoch": 0.6077348066298343,
"grad_norm": 3.7572021484375,
"learning_rate": 2.392326580724371e-05,
"loss": 2.3861,
"step": 9900
},
{
"epoch": 0.6108041743400859,
"grad_norm": 3.6873481273651123,
"learning_rate": 2.389257213014119e-05,
"loss": 2.3733,
"step": 9950
},
{
"epoch": 0.6138735420503376,
"grad_norm": 4.152860641479492,
"learning_rate": 2.3861878453038675e-05,
"loss": 2.3398,
"step": 10000
},
{
"epoch": 0.6169429097605893,
"grad_norm": 3.437129497528076,
"learning_rate": 2.383118477593616e-05,
"loss": 2.3327,
"step": 10050
},
{
"epoch": 0.620012277470841,
"grad_norm": 4.148296356201172,
"learning_rate": 2.380049109883364e-05,
"loss": 2.278,
"step": 10100
},
{
"epoch": 0.6230816451810927,
"grad_norm": 3.596064805984497,
"learning_rate": 2.3769797421731124e-05,
"loss": 2.38,
"step": 10150
},
{
"epoch": 0.6261510128913443,
"grad_norm": 3.819524049758911,
"learning_rate": 2.3739103744628608e-05,
"loss": 2.2876,
"step": 10200
},
{
"epoch": 0.629220380601596,
"grad_norm": 4.420265197753906,
"learning_rate": 2.370841006752609e-05,
"loss": 2.3627,
"step": 10250
},
{
"epoch": 0.6322897483118478,
"grad_norm": 4.018989562988281,
"learning_rate": 2.3677716390423573e-05,
"loss": 2.3528,
"step": 10300
},
{
"epoch": 0.6353591160220995,
"grad_norm": 3.5963191986083984,
"learning_rate": 2.3647022713321054e-05,
"loss": 2.3546,
"step": 10350
},
{
"epoch": 0.6384284837323512,
"grad_norm": 4.6964874267578125,
"learning_rate": 2.3616329036218538e-05,
"loss": 2.4427,
"step": 10400
},
{
"epoch": 0.6414978514426029,
"grad_norm": 3.6673765182495117,
"learning_rate": 2.3585635359116022e-05,
"loss": 2.3476,
"step": 10450
},
{
"epoch": 0.6445672191528545,
"grad_norm": 4.190234184265137,
"learning_rate": 2.3554941682013503e-05,
"loss": 2.3326,
"step": 10500
},
{
"epoch": 0.6476365868631062,
"grad_norm": 3.8142518997192383,
"learning_rate": 2.3524248004910987e-05,
"loss": 2.2891,
"step": 10550
},
{
"epoch": 0.6507059545733579,
"grad_norm": 3.8022055625915527,
"learning_rate": 2.349355432780847e-05,
"loss": 2.3652,
"step": 10600
},
{
"epoch": 0.6537753222836096,
"grad_norm": 3.8915176391601562,
"learning_rate": 2.3462860650705952e-05,
"loss": 2.3169,
"step": 10650
},
{
"epoch": 0.6568446899938613,
"grad_norm": 3.6545536518096924,
"learning_rate": 2.3432166973603437e-05,
"loss": 2.3371,
"step": 10700
},
{
"epoch": 0.6599140577041129,
"grad_norm": 3.763852596282959,
"learning_rate": 2.3401473296500924e-05,
"loss": 2.3069,
"step": 10750
},
{
"epoch": 0.6629834254143646,
"grad_norm": 3.8284034729003906,
"learning_rate": 2.3370779619398405e-05,
"loss": 2.3334,
"step": 10800
},
{
"epoch": 0.6660527931246163,
"grad_norm": 7.501564979553223,
"learning_rate": 2.334008594229589e-05,
"loss": 2.4061,
"step": 10850
},
{
"epoch": 0.669122160834868,
"grad_norm": 3.7189035415649414,
"learning_rate": 2.3309392265193374e-05,
"loss": 2.3226,
"step": 10900
},
{
"epoch": 0.6721915285451197,
"grad_norm": 3.5018155574798584,
"learning_rate": 2.3278698588090854e-05,
"loss": 2.2974,
"step": 10950
},
{
"epoch": 0.6752608962553714,
"grad_norm": 4.443968772888184,
"learning_rate": 2.324800491098834e-05,
"loss": 2.3936,
"step": 11000
},
{
"epoch": 0.6783302639656231,
"grad_norm": 3.740535020828247,
"learning_rate": 2.321731123388582e-05,
"loss": 2.2829,
"step": 11050
},
{
"epoch": 0.6813996316758748,
"grad_norm": 3.538335084915161,
"learning_rate": 2.3186617556783304e-05,
"loss": 2.2824,
"step": 11100
},
{
"epoch": 0.6844689993861265,
"grad_norm": 3.7425730228424072,
"learning_rate": 2.3155923879680788e-05,
"loss": 2.2808,
"step": 11150
},
{
"epoch": 0.6875383670963782,
"grad_norm": 3.8225579261779785,
"learning_rate": 2.312523020257827e-05,
"loss": 2.3258,
"step": 11200
},
{
"epoch": 0.6906077348066298,
"grad_norm": 4.689228057861328,
"learning_rate": 2.3094536525475753e-05,
"loss": 2.2818,
"step": 11250
},
{
"epoch": 0.6936771025168815,
"grad_norm": 3.968703031539917,
"learning_rate": 2.3063842848373237e-05,
"loss": 2.3767,
"step": 11300
},
{
"epoch": 0.6967464702271332,
"grad_norm": 4.036931037902832,
"learning_rate": 2.3033149171270718e-05,
"loss": 2.3459,
"step": 11350
},
{
"epoch": 0.6998158379373849,
"grad_norm": 4.426519870758057,
"learning_rate": 2.3002455494168202e-05,
"loss": 2.327,
"step": 11400
},
{
"epoch": 0.7028852056476366,
"grad_norm": 3.6122524738311768,
"learning_rate": 2.2971761817065686e-05,
"loss": 2.3813,
"step": 11450
},
{
"epoch": 0.7059545733578882,
"grad_norm": 5.523836612701416,
"learning_rate": 2.2941068139963167e-05,
"loss": 2.3577,
"step": 11500
},
{
"epoch": 0.7090239410681399,
"grad_norm": 3.1946020126342773,
"learning_rate": 2.291037446286065e-05,
"loss": 2.3005,
"step": 11550
},
{
"epoch": 0.7120933087783916,
"grad_norm": 4.517838001251221,
"learning_rate": 2.2879680785758136e-05,
"loss": 2.3537,
"step": 11600
},
{
"epoch": 0.7151626764886433,
"grad_norm": 3.4100501537323,
"learning_rate": 2.2848987108655617e-05,
"loss": 2.3526,
"step": 11650
},
{
"epoch": 0.7182320441988951,
"grad_norm": 4.370871067047119,
"learning_rate": 2.28182934315531e-05,
"loss": 2.2843,
"step": 11700
},
{
"epoch": 0.7213014119091468,
"grad_norm": 3.3597848415374756,
"learning_rate": 2.2787599754450585e-05,
"loss": 2.4289,
"step": 11750
},
{
"epoch": 0.7243707796193984,
"grad_norm": 4.361307144165039,
"learning_rate": 2.2756906077348066e-05,
"loss": 2.3334,
"step": 11800
},
{
"epoch": 0.7274401473296501,
"grad_norm": 3.5192790031433105,
"learning_rate": 2.272621240024555e-05,
"loss": 2.3935,
"step": 11850
},
{
"epoch": 0.7305095150399018,
"grad_norm": 3.058115005493164,
"learning_rate": 2.269551872314303e-05,
"loss": 2.2948,
"step": 11900
},
{
"epoch": 0.7335788827501535,
"grad_norm": 3.7125728130340576,
"learning_rate": 2.2664825046040515e-05,
"loss": 2.2702,
"step": 11950
},
{
"epoch": 0.7366482504604052,
"grad_norm": 3.5723328590393066,
"learning_rate": 2.2634131368938e-05,
"loss": 2.3361,
"step": 12000
},
{
"epoch": 0.7397176181706568,
"grad_norm": 4.428549766540527,
"learning_rate": 2.260343769183548e-05,
"loss": 2.2938,
"step": 12050
},
{
"epoch": 0.7427869858809085,
"grad_norm": 3.8374624252319336,
"learning_rate": 2.2572744014732964e-05,
"loss": 2.2387,
"step": 12100
},
{
"epoch": 0.7458563535911602,
"grad_norm": 4.0965657234191895,
"learning_rate": 2.254205033763045e-05,
"loss": 2.2988,
"step": 12150
},
{
"epoch": 0.7489257213014119,
"grad_norm": 3.138101816177368,
"learning_rate": 2.251135666052793e-05,
"loss": 2.3083,
"step": 12200
},
{
"epoch": 0.7519950890116636,
"grad_norm": 3.7243382930755615,
"learning_rate": 2.2480662983425414e-05,
"loss": 2.3094,
"step": 12250
},
{
"epoch": 0.7550644567219152,
"grad_norm": 4.791036605834961,
"learning_rate": 2.2449969306322898e-05,
"loss": 2.317,
"step": 12300
},
{
"epoch": 0.7581338244321669,
"grad_norm": 4.1747236251831055,
"learning_rate": 2.2419275629220382e-05,
"loss": 2.462,
"step": 12350
},
{
"epoch": 0.7612031921424187,
"grad_norm": 4.427381992340088,
"learning_rate": 2.2388581952117866e-05,
"loss": 2.3935,
"step": 12400
},
{
"epoch": 0.7642725598526704,
"grad_norm": 4.517187118530273,
"learning_rate": 2.235788827501535e-05,
"loss": 2.3619,
"step": 12450
},
{
"epoch": 0.7673419275629221,
"grad_norm": 3.2976391315460205,
"learning_rate": 2.232719459791283e-05,
"loss": 2.3363,
"step": 12500
},
{
"epoch": 0.7704112952731738,
"grad_norm": 3.017157793045044,
"learning_rate": 2.2296500920810316e-05,
"loss": 2.254,
"step": 12550
},
{
"epoch": 0.7734806629834254,
"grad_norm": 4.820321083068848,
"learning_rate": 2.2265807243707796e-05,
"loss": 2.2243,
"step": 12600
},
{
"epoch": 0.7765500306936771,
"grad_norm": 4.536325454711914,
"learning_rate": 2.223511356660528e-05,
"loss": 2.3242,
"step": 12650
},
{
"epoch": 0.7796193984039288,
"grad_norm": 4.465803623199463,
"learning_rate": 2.2204419889502765e-05,
"loss": 2.2615,
"step": 12700
},
{
"epoch": 0.7826887661141805,
"grad_norm": 4.061604022979736,
"learning_rate": 2.2173726212400246e-05,
"loss": 2.2753,
"step": 12750
},
{
"epoch": 0.7857581338244322,
"grad_norm": 3.7470462322235107,
"learning_rate": 2.214303253529773e-05,
"loss": 2.3521,
"step": 12800
},
{
"epoch": 0.7888275015346838,
"grad_norm": 3.7258481979370117,
"learning_rate": 2.2112338858195214e-05,
"loss": 2.2881,
"step": 12850
},
{
"epoch": 0.7918968692449355,
"grad_norm": 4.148687839508057,
"learning_rate": 2.2081645181092695e-05,
"loss": 2.2603,
"step": 12900
},
{
"epoch": 0.7949662369551872,
"grad_norm": 3.804433584213257,
"learning_rate": 2.205095150399018e-05,
"loss": 2.2985,
"step": 12950
},
{
"epoch": 0.7980356046654389,
"grad_norm": 4.394881248474121,
"learning_rate": 2.2020257826887663e-05,
"loss": 2.353,
"step": 13000
},
{
"epoch": 0.8011049723756906,
"grad_norm": 5.619194030761719,
"learning_rate": 2.1989564149785144e-05,
"loss": 2.3212,
"step": 13050
},
{
"epoch": 0.8041743400859422,
"grad_norm": 3.7602977752685547,
"learning_rate": 2.195887047268263e-05,
"loss": 2.2732,
"step": 13100
},
{
"epoch": 0.807243707796194,
"grad_norm": 3.5865325927734375,
"learning_rate": 2.1928176795580113e-05,
"loss": 2.2786,
"step": 13150
},
{
"epoch": 0.8103130755064457,
"grad_norm": 4.248644828796387,
"learning_rate": 2.1897483118477594e-05,
"loss": 2.2986,
"step": 13200
},
{
"epoch": 0.8133824432166974,
"grad_norm": 3.960653781890869,
"learning_rate": 2.1866789441375078e-05,
"loss": 2.3929,
"step": 13250
},
{
"epoch": 0.8164518109269491,
"grad_norm": 4.417232990264893,
"learning_rate": 2.183609576427256e-05,
"loss": 2.331,
"step": 13300
},
{
"epoch": 0.8195211786372008,
"grad_norm": 4.520796298980713,
"learning_rate": 2.1805402087170043e-05,
"loss": 2.3343,
"step": 13350
},
{
"epoch": 0.8225905463474524,
"grad_norm": 3.5469796657562256,
"learning_rate": 2.1774708410067527e-05,
"loss": 2.3623,
"step": 13400
},
{
"epoch": 0.8256599140577041,
"grad_norm": 3.0526225566864014,
"learning_rate": 2.1744014732965008e-05,
"loss": 2.2649,
"step": 13450
},
{
"epoch": 0.8287292817679558,
"grad_norm": 3.904680013656616,
"learning_rate": 2.1713321055862492e-05,
"loss": 2.3419,
"step": 13500
},
{
"epoch": 0.8317986494782075,
"grad_norm": 3.709381580352783,
"learning_rate": 2.1682627378759976e-05,
"loss": 2.37,
"step": 13550
},
{
"epoch": 0.8348680171884592,
"grad_norm": 3.5717175006866455,
"learning_rate": 2.1651933701657457e-05,
"loss": 2.3169,
"step": 13600
},
{
"epoch": 0.8379373848987108,
"grad_norm": 4.073272228240967,
"learning_rate": 2.162124002455494e-05,
"loss": 2.3328,
"step": 13650
},
{
"epoch": 0.8410067526089625,
"grad_norm": 3.8410749435424805,
"learning_rate": 2.1590546347452426e-05,
"loss": 2.3072,
"step": 13700
},
{
"epoch": 0.8440761203192142,
"grad_norm": 4.8291144371032715,
"learning_rate": 2.1559852670349906e-05,
"loss": 2.3592,
"step": 13750
},
{
"epoch": 0.8471454880294659,
"grad_norm": 4.293553352355957,
"learning_rate": 2.152915899324739e-05,
"loss": 2.3657,
"step": 13800
},
{
"epoch": 0.8502148557397177,
"grad_norm": 4.011140823364258,
"learning_rate": 2.1498465316144875e-05,
"loss": 2.304,
"step": 13850
},
{
"epoch": 0.8532842234499693,
"grad_norm": 3.8303871154785156,
"learning_rate": 2.1467771639042356e-05,
"loss": 2.2724,
"step": 13900
},
{
"epoch": 0.856353591160221,
"grad_norm": 4.488582611083984,
"learning_rate": 2.143707796193984e-05,
"loss": 2.2965,
"step": 13950
},
{
"epoch": 0.8594229588704727,
"grad_norm": 3.4766058921813965,
"learning_rate": 2.1406384284837324e-05,
"loss": 2.3577,
"step": 14000
},
{
"epoch": 0.8624923265807244,
"grad_norm": 4.232321262359619,
"learning_rate": 2.137569060773481e-05,
"loss": 2.3609,
"step": 14050
},
{
"epoch": 0.8655616942909761,
"grad_norm": 4.51991605758667,
"learning_rate": 2.1344996930632293e-05,
"loss": 2.3213,
"step": 14100
},
{
"epoch": 0.8686310620012277,
"grad_norm": 3.356311321258545,
"learning_rate": 2.1314303253529773e-05,
"loss": 2.3987,
"step": 14150
},
{
"epoch": 0.8717004297114794,
"grad_norm": 3.596140146255493,
"learning_rate": 2.1283609576427258e-05,
"loss": 2.362,
"step": 14200
},
{
"epoch": 0.8747697974217311,
"grad_norm": 5.02532958984375,
"learning_rate": 2.1252915899324742e-05,
"loss": 2.4156,
"step": 14250
},
{
"epoch": 0.8778391651319828,
"grad_norm": 3.531360626220703,
"learning_rate": 2.1222222222222223e-05,
"loss": 2.312,
"step": 14300
},
{
"epoch": 0.8809085328422345,
"grad_norm": 4.262710094451904,
"learning_rate": 2.1191528545119707e-05,
"loss": 2.3432,
"step": 14350
},
{
"epoch": 0.8839779005524862,
"grad_norm": 4.449579238891602,
"learning_rate": 2.116083486801719e-05,
"loss": 2.3759,
"step": 14400
},
{
"epoch": 0.8870472682627378,
"grad_norm": 4.485136032104492,
"learning_rate": 2.1130141190914672e-05,
"loss": 2.2131,
"step": 14450
},
{
"epoch": 0.8901166359729895,
"grad_norm": 3.402162551879883,
"learning_rate": 2.1099447513812156e-05,
"loss": 2.3088,
"step": 14500
},
{
"epoch": 0.8931860036832413,
"grad_norm": 3.7395241260528564,
"learning_rate": 2.106875383670964e-05,
"loss": 2.3447,
"step": 14550
},
{
"epoch": 0.896255371393493,
"grad_norm": 3.98075008392334,
"learning_rate": 2.103806015960712e-05,
"loss": 2.3214,
"step": 14600
},
{
"epoch": 0.8993247391037447,
"grad_norm": 3.871152400970459,
"learning_rate": 2.1007366482504605e-05,
"loss": 2.3396,
"step": 14650
},
{
"epoch": 0.9023941068139963,
"grad_norm": 3.5539169311523438,
"learning_rate": 2.0976672805402086e-05,
"loss": 2.269,
"step": 14700
},
{
"epoch": 0.905463474524248,
"grad_norm": 4.932919502258301,
"learning_rate": 2.094597912829957e-05,
"loss": 2.3098,
"step": 14750
},
{
"epoch": 0.9085328422344997,
"grad_norm": 3.794808864593506,
"learning_rate": 2.0915285451197055e-05,
"loss": 2.277,
"step": 14800
},
{
"epoch": 0.9116022099447514,
"grad_norm": 3.729210138320923,
"learning_rate": 2.0884591774094536e-05,
"loss": 2.4038,
"step": 14850
},
{
"epoch": 0.9146715776550031,
"grad_norm": 4.467114448547363,
"learning_rate": 2.085389809699202e-05,
"loss": 2.2834,
"step": 14900
},
{
"epoch": 0.9177409453652547,
"grad_norm": 4.059779644012451,
"learning_rate": 2.0823204419889504e-05,
"loss": 2.3535,
"step": 14950
},
{
"epoch": 0.9208103130755064,
"grad_norm": 4.626834392547607,
"learning_rate": 2.0792510742786985e-05,
"loss": 2.2687,
"step": 15000
},
{
"epoch": 0.9238796807857581,
"grad_norm": 4.254091739654541,
"learning_rate": 2.076181706568447e-05,
"loss": 2.3406,
"step": 15050
},
{
"epoch": 0.9269490484960098,
"grad_norm": 4.225036144256592,
"learning_rate": 2.0731123388581953e-05,
"loss": 2.2917,
"step": 15100
},
{
"epoch": 0.9300184162062615,
"grad_norm": 3.2555954456329346,
"learning_rate": 2.0700429711479434e-05,
"loss": 2.2775,
"step": 15150
},
{
"epoch": 0.9330877839165131,
"grad_norm": 4.839592933654785,
"learning_rate": 2.066973603437692e-05,
"loss": 2.2714,
"step": 15200
},
{
"epoch": 0.9361571516267649,
"grad_norm": 4.091184616088867,
"learning_rate": 2.0639042357274403e-05,
"loss": 2.2871,
"step": 15250
},
{
"epoch": 0.9392265193370166,
"grad_norm": 3.666154384613037,
"learning_rate": 2.0608348680171883e-05,
"loss": 2.3108,
"step": 15300
},
{
"epoch": 0.9422958870472683,
"grad_norm": 4.287258625030518,
"learning_rate": 2.0577655003069368e-05,
"loss": 2.2785,
"step": 15350
},
{
"epoch": 0.94536525475752,
"grad_norm": 3.8487017154693604,
"learning_rate": 2.054696132596685e-05,
"loss": 2.3108,
"step": 15400
},
{
"epoch": 0.9484346224677717,
"grad_norm": 3.3819682598114014,
"learning_rate": 2.0516267648864333e-05,
"loss": 2.3336,
"step": 15450
},
{
"epoch": 0.9515039901780233,
"grad_norm": 4.332981109619141,
"learning_rate": 2.0485573971761817e-05,
"loss": 2.2581,
"step": 15500
},
{
"epoch": 0.954573357888275,
"grad_norm": 4.729110240936279,
"learning_rate": 2.0454880294659298e-05,
"loss": 2.3418,
"step": 15550
},
{
"epoch": 0.9576427255985267,
"grad_norm": 3.3030595779418945,
"learning_rate": 2.0424186617556785e-05,
"loss": 2.2136,
"step": 15600
},
{
"epoch": 0.9607120933087784,
"grad_norm": 4.455896377563477,
"learning_rate": 2.039349294045427e-05,
"loss": 2.2574,
"step": 15650
},
{
"epoch": 0.9637814610190301,
"grad_norm": 4.112648010253906,
"learning_rate": 2.036279926335175e-05,
"loss": 2.3621,
"step": 15700
},
{
"epoch": 0.9668508287292817,
"grad_norm": 3.577320098876953,
"learning_rate": 2.0332105586249235e-05,
"loss": 2.2561,
"step": 15750
},
{
"epoch": 0.9699201964395334,
"grad_norm": 4.021890163421631,
"learning_rate": 2.030141190914672e-05,
"loss": 2.3077,
"step": 15800
},
{
"epoch": 0.9729895641497851,
"grad_norm": 3.277940273284912,
"learning_rate": 2.02707182320442e-05,
"loss": 2.2639,
"step": 15850
},
{
"epoch": 0.9760589318600368,
"grad_norm": 4.129881858825684,
"learning_rate": 2.0240024554941684e-05,
"loss": 2.3038,
"step": 15900
},
{
"epoch": 0.9791282995702886,
"grad_norm": 3.3244733810424805,
"learning_rate": 2.0209330877839168e-05,
"loss": 2.346,
"step": 15950
},
{
"epoch": 0.9821976672805403,
"grad_norm": 3.341198444366455,
"learning_rate": 2.017863720073665e-05,
"loss": 2.321,
"step": 16000
},
{
"epoch": 0.9852670349907919,
"grad_norm": 3.724945306777954,
"learning_rate": 2.0147943523634133e-05,
"loss": 2.2743,
"step": 16050
},
{
"epoch": 0.9883364027010436,
"grad_norm": 3.501654624938965,
"learning_rate": 2.0117249846531614e-05,
"loss": 2.2676,
"step": 16100
},
{
"epoch": 0.9914057704112953,
"grad_norm": 4.758657932281494,
"learning_rate": 2.0086556169429098e-05,
"loss": 2.3508,
"step": 16150
},
{
"epoch": 0.994475138121547,
"grad_norm": 3.5216405391693115,
"learning_rate": 2.0055862492326582e-05,
"loss": 2.2924,
"step": 16200
},
{
"epoch": 0.9975445058317987,
"grad_norm": 12.397980690002441,
"learning_rate": 2.0025168815224063e-05,
"loss": 2.2766,
"step": 16250
},
{
"epoch": 1.0006138735420504,
"grad_norm": 3.59272837638855,
"learning_rate": 1.9994475138121548e-05,
"loss": 2.271,
"step": 16300
},
{
"epoch": 1.003683241252302,
"grad_norm": 2.9649710655212402,
"learning_rate": 1.9963781461019032e-05,
"loss": 2.2595,
"step": 16350
},
{
"epoch": 1.0067526089625538,
"grad_norm": 3.3721020221710205,
"learning_rate": 1.9933087783916513e-05,
"loss": 2.2275,
"step": 16400
},
{
"epoch": 1.0098219766728054,
"grad_norm": 3.2784862518310547,
"learning_rate": 1.9902394106813997e-05,
"loss": 2.3262,
"step": 16450
},
{
"epoch": 1.0128913443830572,
"grad_norm": 3.301400661468506,
"learning_rate": 1.987170042971148e-05,
"loss": 2.2383,
"step": 16500
},
{
"epoch": 1.0159607120933087,
"grad_norm": 2.7174506187438965,
"learning_rate": 1.9841006752608962e-05,
"loss": 2.266,
"step": 16550
},
{
"epoch": 1.0190300798035605,
"grad_norm": 4.05548095703125,
"learning_rate": 1.9810313075506446e-05,
"loss": 2.2317,
"step": 16600
},
{
"epoch": 1.022099447513812,
"grad_norm": 3.362386703491211,
"learning_rate": 1.977961939840393e-05,
"loss": 2.2317,
"step": 16650
},
{
"epoch": 1.025168815224064,
"grad_norm": 3.4570345878601074,
"learning_rate": 1.974892572130141e-05,
"loss": 2.3333,
"step": 16700
},
{
"epoch": 1.0282381829343155,
"grad_norm": 3.4020121097564697,
"learning_rate": 1.9718232044198895e-05,
"loss": 2.2965,
"step": 16750
},
{
"epoch": 1.0313075506445673,
"grad_norm": 3.3160858154296875,
"learning_rate": 1.9687538367096376e-05,
"loss": 2.2659,
"step": 16800
},
{
"epoch": 1.0343769183548188,
"grad_norm": 3.5787899494171143,
"learning_rate": 1.965684468999386e-05,
"loss": 2.3484,
"step": 16850
},
{
"epoch": 1.0374462860650706,
"grad_norm": 4.029461860656738,
"learning_rate": 1.9626151012891345e-05,
"loss": 2.3333,
"step": 16900
},
{
"epoch": 1.0405156537753222,
"grad_norm": 3.743760824203491,
"learning_rate": 1.9595457335788825e-05,
"loss": 2.2458,
"step": 16950
},
{
"epoch": 1.043585021485574,
"grad_norm": 3.3272945880889893,
"learning_rate": 1.956476365868631e-05,
"loss": 2.3081,
"step": 17000
},
{
"epoch": 1.0466543891958258,
"grad_norm": 3.3701705932617188,
"learning_rate": 1.9534069981583794e-05,
"loss": 2.19,
"step": 17050
},
{
"epoch": 1.0497237569060773,
"grad_norm": 4.10990571975708,
"learning_rate": 1.9503376304481275e-05,
"loss": 2.2962,
"step": 17100
},
{
"epoch": 1.0527931246163291,
"grad_norm": 3.226930856704712,
"learning_rate": 1.947268262737876e-05,
"loss": 2.3523,
"step": 17150
},
{
"epoch": 1.0558624923265807,
"grad_norm": 3.901716947555542,
"learning_rate": 1.9441988950276247e-05,
"loss": 2.244,
"step": 17200
},
{
"epoch": 1.0589318600368325,
"grad_norm": 3.943704128265381,
"learning_rate": 1.9411295273173727e-05,
"loss": 2.3179,
"step": 17250
},
{
"epoch": 1.062001227747084,
"grad_norm": 3.7991671562194824,
"learning_rate": 1.938060159607121e-05,
"loss": 2.1783,
"step": 17300
},
{
"epoch": 1.0650705954573358,
"grad_norm": 3.63051438331604,
"learning_rate": 1.9349907918968696e-05,
"loss": 2.2307,
"step": 17350
},
{
"epoch": 1.0681399631675874,
"grad_norm": 3.9437952041625977,
"learning_rate": 1.9319214241866177e-05,
"loss": 2.2864,
"step": 17400
},
{
"epoch": 1.0712093308778392,
"grad_norm": 3.544645309448242,
"learning_rate": 1.928852056476366e-05,
"loss": 2.3161,
"step": 17450
},
{
"epoch": 1.0742786985880908,
"grad_norm": 3.8155930042266846,
"learning_rate": 1.9257826887661142e-05,
"loss": 2.2036,
"step": 17500
},
{
"epoch": 1.0773480662983426,
"grad_norm": 3.956928014755249,
"learning_rate": 1.9227133210558626e-05,
"loss": 2.3254,
"step": 17550
},
{
"epoch": 1.0804174340085941,
"grad_norm": 2.997299909591675,
"learning_rate": 1.919643953345611e-05,
"loss": 2.2741,
"step": 17600
},
{
"epoch": 1.083486801718846,
"grad_norm": 3.1786820888519287,
"learning_rate": 1.916574585635359e-05,
"loss": 2.2991,
"step": 17650
},
{
"epoch": 1.0865561694290977,
"grad_norm": 3.475252151489258,
"learning_rate": 1.9135052179251075e-05,
"loss": 2.2742,
"step": 17700
},
{
"epoch": 1.0896255371393493,
"grad_norm": 3.8195457458496094,
"learning_rate": 1.910435850214856e-05,
"loss": 2.2437,
"step": 17750
},
{
"epoch": 1.092694904849601,
"grad_norm": 3.205800771713257,
"learning_rate": 1.907366482504604e-05,
"loss": 2.3202,
"step": 17800
},
{
"epoch": 1.0957642725598526,
"grad_norm": 4.425097465515137,
"learning_rate": 1.9042971147943524e-05,
"loss": 2.2275,
"step": 17850
},
{
"epoch": 1.0988336402701044,
"grad_norm": 3.6546781063079834,
"learning_rate": 1.901227747084101e-05,
"loss": 2.2484,
"step": 17900
},
{
"epoch": 1.101903007980356,
"grad_norm": 3.9607748985290527,
"learning_rate": 1.898158379373849e-05,
"loss": 2.3012,
"step": 17950
},
{
"epoch": 1.1049723756906078,
"grad_norm": 3.728654623031616,
"learning_rate": 1.8950890116635974e-05,
"loss": 2.2324,
"step": 18000
},
{
"epoch": 1.1080417434008594,
"grad_norm": 4.3351149559021,
"learning_rate": 1.8920196439533458e-05,
"loss": 2.2714,
"step": 18050
},
{
"epoch": 1.1111111111111112,
"grad_norm": 3.8495333194732666,
"learning_rate": 1.888950276243094e-05,
"loss": 2.2416,
"step": 18100
},
{
"epoch": 1.1141804788213627,
"grad_norm": 3.4237616062164307,
"learning_rate": 1.8858809085328423e-05,
"loss": 2.2531,
"step": 18150
},
{
"epoch": 1.1172498465316145,
"grad_norm": 4.699343681335449,
"learning_rate": 1.8828115408225904e-05,
"loss": 2.299,
"step": 18200
},
{
"epoch": 1.120319214241866,
"grad_norm": 3.130164861679077,
"learning_rate": 1.8797421731123388e-05,
"loss": 2.2208,
"step": 18250
},
{
"epoch": 1.1233885819521179,
"grad_norm": 3.84944224357605,
"learning_rate": 1.8766728054020872e-05,
"loss": 2.2946,
"step": 18300
},
{
"epoch": 1.1264579496623695,
"grad_norm": 3.48579478263855,
"learning_rate": 1.8736034376918353e-05,
"loss": 2.2766,
"step": 18350
},
{
"epoch": 1.1295273173726212,
"grad_norm": 3.44059157371521,
"learning_rate": 1.8705340699815837e-05,
"loss": 2.3324,
"step": 18400
},
{
"epoch": 1.132596685082873,
"grad_norm": 4.056128978729248,
"learning_rate": 1.867464702271332e-05,
"loss": 2.2425,
"step": 18450
},
{
"epoch": 1.1356660527931246,
"grad_norm": 3.4333908557891846,
"learning_rate": 1.8643953345610802e-05,
"loss": 2.2956,
"step": 18500
},
{
"epoch": 1.1387354205033764,
"grad_norm": 3.3986668586730957,
"learning_rate": 1.8613259668508287e-05,
"loss": 2.2745,
"step": 18550
},
{
"epoch": 1.141804788213628,
"grad_norm": 3.854893684387207,
"learning_rate": 1.858256599140577e-05,
"loss": 2.2604,
"step": 18600
},
{
"epoch": 1.1448741559238798,
"grad_norm": 3.557697296142578,
"learning_rate": 1.8551872314303252e-05,
"loss": 2.2547,
"step": 18650
},
{
"epoch": 1.1479435236341313,
"grad_norm": 3.741943597793579,
"learning_rate": 1.8521178637200736e-05,
"loss": 2.3682,
"step": 18700
},
{
"epoch": 1.1510128913443831,
"grad_norm": 4.78795862197876,
"learning_rate": 1.849048496009822e-05,
"loss": 2.2635,
"step": 18750
},
{
"epoch": 1.1540822590546347,
"grad_norm": 2.904060125350952,
"learning_rate": 1.8459791282995704e-05,
"loss": 2.2896,
"step": 18800
},
{
"epoch": 1.1571516267648865,
"grad_norm": 4.221961498260498,
"learning_rate": 1.842909760589319e-05,
"loss": 2.2288,
"step": 18850
},
{
"epoch": 1.160220994475138,
"grad_norm": 3.9817323684692383,
"learning_rate": 1.839840392879067e-05,
"loss": 2.314,
"step": 18900
},
{
"epoch": 1.1632903621853898,
"grad_norm": 4.283735275268555,
"learning_rate": 1.8367710251688154e-05,
"loss": 2.3065,
"step": 18950
},
{
"epoch": 1.1663597298956414,
"grad_norm": 4.681687831878662,
"learning_rate": 1.8337016574585638e-05,
"loss": 2.2481,
"step": 19000
},
{
"epoch": 1.1694290976058932,
"grad_norm": 5.139055252075195,
"learning_rate": 1.830632289748312e-05,
"loss": 2.2553,
"step": 19050
},
{
"epoch": 1.1724984653161448,
"grad_norm": 2.987617015838623,
"learning_rate": 1.8275629220380603e-05,
"loss": 2.2837,
"step": 19100
},
{
"epoch": 1.1755678330263966,
"grad_norm": 3.6754627227783203,
"learning_rate": 1.8244935543278087e-05,
"loss": 2.3089,
"step": 19150
},
{
"epoch": 1.1786372007366483,
"grad_norm": 4.376922607421875,
"learning_rate": 1.8214241866175568e-05,
"loss": 2.3423,
"step": 19200
},
{
"epoch": 1.1817065684469,
"grad_norm": 3.4154927730560303,
"learning_rate": 1.8183548189073052e-05,
"loss": 2.2326,
"step": 19250
},
{
"epoch": 1.1847759361571517,
"grad_norm": 3.935561418533325,
"learning_rate": 1.8152854511970536e-05,
"loss": 2.2588,
"step": 19300
},
{
"epoch": 1.1878453038674033,
"grad_norm": 3.814129114151001,
"learning_rate": 1.8122160834868017e-05,
"loss": 2.2798,
"step": 19350
},
{
"epoch": 1.190914671577655,
"grad_norm": 4.349081516265869,
"learning_rate": 1.80914671577655e-05,
"loss": 2.2618,
"step": 19400
},
{
"epoch": 1.1939840392879066,
"grad_norm": 4.567361354827881,
"learning_rate": 1.8060773480662986e-05,
"loss": 2.326,
"step": 19450
},
{
"epoch": 1.1970534069981584,
"grad_norm": 4.0694427490234375,
"learning_rate": 1.8030079803560467e-05,
"loss": 2.3532,
"step": 19500
},
{
"epoch": 1.20012277470841,
"grad_norm": 4.104779243469238,
"learning_rate": 1.799938612645795e-05,
"loss": 2.2995,
"step": 19550
},
{
"epoch": 1.2031921424186618,
"grad_norm": 3.412951707839966,
"learning_rate": 1.796869244935543e-05,
"loss": 2.3195,
"step": 19600
},
{
"epoch": 1.2062615101289134,
"grad_norm": 3.1561272144317627,
"learning_rate": 1.7937998772252916e-05,
"loss": 2.2425,
"step": 19650
},
{
"epoch": 1.2093308778391652,
"grad_norm": 3.319150924682617,
"learning_rate": 1.79073050951504e-05,
"loss": 2.3061,
"step": 19700
},
{
"epoch": 1.212400245549417,
"grad_norm": 3.917623281478882,
"learning_rate": 1.787661141804788e-05,
"loss": 2.2989,
"step": 19750
},
{
"epoch": 1.2154696132596685,
"grad_norm": 3.800072193145752,
"learning_rate": 1.7845917740945365e-05,
"loss": 2.2609,
"step": 19800
},
{
"epoch": 1.21853898096992,
"grad_norm": 3.723968505859375,
"learning_rate": 1.781522406384285e-05,
"loss": 2.3172,
"step": 19850
},
{
"epoch": 1.2216083486801719,
"grad_norm": 4.040971755981445,
"learning_rate": 1.778453038674033e-05,
"loss": 2.224,
"step": 19900
},
{
"epoch": 1.2246777163904237,
"grad_norm": 3.918321132659912,
"learning_rate": 1.7753836709637814e-05,
"loss": 2.3556,
"step": 19950
},
{
"epoch": 1.2277470841006752,
"grad_norm": 4.419713973999023,
"learning_rate": 1.77231430325353e-05,
"loss": 2.3278,
"step": 20000
},
{
"epoch": 1.230816451810927,
"grad_norm": 4.213504791259766,
"learning_rate": 1.769244935543278e-05,
"loss": 2.2026,
"step": 20050
},
{
"epoch": 1.2338858195211786,
"grad_norm": 3.972687005996704,
"learning_rate": 1.7661755678330264e-05,
"loss": 2.2754,
"step": 20100
},
{
"epoch": 1.2369551872314304,
"grad_norm": 4.094639301300049,
"learning_rate": 1.7631062001227748e-05,
"loss": 2.2452,
"step": 20150
},
{
"epoch": 1.240024554941682,
"grad_norm": 2.83046817779541,
"learning_rate": 1.760036832412523e-05,
"loss": 2.3677,
"step": 20200
},
{
"epoch": 1.2430939226519337,
"grad_norm": 3.1770524978637695,
"learning_rate": 1.7569674647022713e-05,
"loss": 2.2744,
"step": 20250
},
{
"epoch": 1.2461632903621853,
"grad_norm": 3.803001880645752,
"learning_rate": 1.7538980969920194e-05,
"loss": 2.1896,
"step": 20300
},
{
"epoch": 1.249232658072437,
"grad_norm": 2.9435923099517822,
"learning_rate": 1.7508287292817678e-05,
"loss": 2.2679,
"step": 20350
},
{
"epoch": 1.2523020257826887,
"grad_norm": 3.0736653804779053,
"learning_rate": 1.7477593615715166e-05,
"loss": 2.2378,
"step": 20400
},
{
"epoch": 1.2553713934929405,
"grad_norm": 4.1547627449035645,
"learning_rate": 1.7446899938612646e-05,
"loss": 2.2726,
"step": 20450
},
{
"epoch": 1.2584407612031923,
"grad_norm": 4.235386848449707,
"learning_rate": 1.741620626151013e-05,
"loss": 2.291,
"step": 20500
},
{
"epoch": 1.2615101289134438,
"grad_norm": 3.412493944168091,
"learning_rate": 1.7385512584407615e-05,
"loss": 2.3055,
"step": 20550
},
{
"epoch": 1.2645794966236954,
"grad_norm": 3.837425947189331,
"learning_rate": 1.7354818907305096e-05,
"loss": 2.3072,
"step": 20600
},
{
"epoch": 1.2676488643339472,
"grad_norm": 3.7470505237579346,
"learning_rate": 1.732412523020258e-05,
"loss": 2.2997,
"step": 20650
},
{
"epoch": 1.270718232044199,
"grad_norm": 4.290903568267822,
"learning_rate": 1.7293431553100064e-05,
"loss": 2.3537,
"step": 20700
},
{
"epoch": 1.2737875997544506,
"grad_norm": 4.011292457580566,
"learning_rate": 1.7262737875997545e-05,
"loss": 2.1966,
"step": 20750
},
{
"epoch": 1.2768569674647023,
"grad_norm": 3.7366220951080322,
"learning_rate": 1.723204419889503e-05,
"loss": 2.2752,
"step": 20800
},
{
"epoch": 1.279926335174954,
"grad_norm": 3.9714245796203613,
"learning_rate": 1.7201350521792513e-05,
"loss": 2.2664,
"step": 20850
},
{
"epoch": 1.2829957028852057,
"grad_norm": 3.8838491439819336,
"learning_rate": 1.7170656844689994e-05,
"loss": 2.2699,
"step": 20900
},
{
"epoch": 1.2860650705954573,
"grad_norm": 4.699042320251465,
"learning_rate": 1.713996316758748e-05,
"loss": 2.314,
"step": 20950
},
{
"epoch": 1.289134438305709,
"grad_norm": 3.9477968215942383,
"learning_rate": 1.710926949048496e-05,
"loss": 2.1777,
"step": 21000
},
{
"epoch": 1.2922038060159606,
"grad_norm": 3.690079927444458,
"learning_rate": 1.7078575813382444e-05,
"loss": 2.2705,
"step": 21050
},
{
"epoch": 1.2952731737262124,
"grad_norm": 3.5377986431121826,
"learning_rate": 1.7047882136279928e-05,
"loss": 2.2185,
"step": 21100
},
{
"epoch": 1.298342541436464,
"grad_norm": 4.657019138336182,
"learning_rate": 1.701718845917741e-05,
"loss": 2.3273,
"step": 21150
},
{
"epoch": 1.3014119091467158,
"grad_norm": 4.9236040115356445,
"learning_rate": 1.6986494782074893e-05,
"loss": 2.2613,
"step": 21200
},
{
"epoch": 1.3044812768569676,
"grad_norm": 3.1163363456726074,
"learning_rate": 1.6955801104972377e-05,
"loss": 2.2433,
"step": 21250
},
{
"epoch": 1.3075506445672191,
"grad_norm": 3.7134690284729004,
"learning_rate": 1.6925107427869858e-05,
"loss": 2.3434,
"step": 21300
},
{
"epoch": 1.3106200122774707,
"grad_norm": 3.383561372756958,
"learning_rate": 1.6894413750767342e-05,
"loss": 2.1809,
"step": 21350
},
{
"epoch": 1.3136893799877225,
"grad_norm": 4.129547595977783,
"learning_rate": 1.6863720073664826e-05,
"loss": 2.3241,
"step": 21400
},
{
"epoch": 1.3167587476979743,
"grad_norm": 3.206624984741211,
"learning_rate": 1.6833026396562307e-05,
"loss": 2.2917,
"step": 21450
},
{
"epoch": 1.3198281154082259,
"grad_norm": 3.993472099304199,
"learning_rate": 1.680233271945979e-05,
"loss": 2.2888,
"step": 21500
},
{
"epoch": 1.3228974831184777,
"grad_norm": 3.8573262691497803,
"learning_rate": 1.6771639042357276e-05,
"loss": 2.3034,
"step": 21550
},
{
"epoch": 1.3259668508287292,
"grad_norm": 4.161017417907715,
"learning_rate": 1.6740945365254756e-05,
"loss": 2.3173,
"step": 21600
},
{
"epoch": 1.329036218538981,
"grad_norm": 3.6462788581848145,
"learning_rate": 1.671025168815224e-05,
"loss": 2.2411,
"step": 21650
},
{
"epoch": 1.3321055862492326,
"grad_norm": 5.160103797912598,
"learning_rate": 1.667955801104972e-05,
"loss": 2.3113,
"step": 21700
},
{
"epoch": 1.3351749539594844,
"grad_norm": 8.680712699890137,
"learning_rate": 1.6648864333947206e-05,
"loss": 2.2769,
"step": 21750
},
{
"epoch": 1.3382443216697362,
"grad_norm": 4.962557315826416,
"learning_rate": 1.661817065684469e-05,
"loss": 2.2755,
"step": 21800
},
{
"epoch": 1.3413136893799877,
"grad_norm": 4.264275074005127,
"learning_rate": 1.658747697974217e-05,
"loss": 2.2863,
"step": 21850
},
{
"epoch": 1.3443830570902393,
"grad_norm": 4.581940650939941,
"learning_rate": 1.6556783302639655e-05,
"loss": 2.4296,
"step": 21900
},
{
"epoch": 1.347452424800491,
"grad_norm": 3.814467430114746,
"learning_rate": 1.652608962553714e-05,
"loss": 2.258,
"step": 21950
},
{
"epoch": 1.350521792510743,
"grad_norm": 3.2274892330169678,
"learning_rate": 1.6495395948434623e-05,
"loss": 2.2739,
"step": 22000
},
{
"epoch": 1.3535911602209945,
"grad_norm": 3.3576676845550537,
"learning_rate": 1.6464702271332108e-05,
"loss": 2.3536,
"step": 22050
},
{
"epoch": 1.356660527931246,
"grad_norm": 3.561453104019165,
"learning_rate": 1.6434008594229592e-05,
"loss": 2.338,
"step": 22100
},
{
"epoch": 1.3597298956414978,
"grad_norm": 3.8528378009796143,
"learning_rate": 1.6403314917127073e-05,
"loss": 2.2749,
"step": 22150
},
{
"epoch": 1.3627992633517496,
"grad_norm": 3.7933218479156494,
"learning_rate": 1.6372621240024557e-05,
"loss": 2.1698,
"step": 22200
},
{
"epoch": 1.3658686310620012,
"grad_norm": 4.1472578048706055,
"learning_rate": 1.634192756292204e-05,
"loss": 2.2975,
"step": 22250
},
{
"epoch": 1.368937998772253,
"grad_norm": 4.129203796386719,
"learning_rate": 1.6311233885819522e-05,
"loss": 2.2525,
"step": 22300
},
{
"epoch": 1.3720073664825045,
"grad_norm": 4.041978359222412,
"learning_rate": 1.6280540208717006e-05,
"loss": 2.3245,
"step": 22350
},
{
"epoch": 1.3750767341927563,
"grad_norm": 3.7860097885131836,
"learning_rate": 1.6249846531614487e-05,
"loss": 2.2678,
"step": 22400
},
{
"epoch": 1.378146101903008,
"grad_norm": 3.5321691036224365,
"learning_rate": 1.621915285451197e-05,
"loss": 2.2928,
"step": 22450
},
{
"epoch": 1.3812154696132597,
"grad_norm": 3.8494341373443604,
"learning_rate": 1.6188459177409455e-05,
"loss": 2.3158,
"step": 22500
},
{
"epoch": 1.3842848373235115,
"grad_norm": 3.3036093711853027,
"learning_rate": 1.6157765500306936e-05,
"loss": 2.1744,
"step": 22550
},
{
"epoch": 1.387354205033763,
"grad_norm": 3.412515163421631,
"learning_rate": 1.612707182320442e-05,
"loss": 2.2236,
"step": 22600
},
{
"epoch": 1.3904235727440146,
"grad_norm": 3.514040470123291,
"learning_rate": 1.6096378146101905e-05,
"loss": 2.2406,
"step": 22650
},
{
"epoch": 1.3934929404542664,
"grad_norm": 3.6580166816711426,
"learning_rate": 1.6065684468999386e-05,
"loss": 2.2914,
"step": 22700
},
{
"epoch": 1.3965623081645182,
"grad_norm": 4.870865821838379,
"learning_rate": 1.603499079189687e-05,
"loss": 2.2971,
"step": 22750
},
{
"epoch": 1.3996316758747698,
"grad_norm": 4.047878742218018,
"learning_rate": 1.6004297114794354e-05,
"loss": 2.3312,
"step": 22800
},
{
"epoch": 1.4027010435850216,
"grad_norm": 3.317620038986206,
"learning_rate": 1.5973603437691835e-05,
"loss": 2.2121,
"step": 22850
},
{
"epoch": 1.4057704112952731,
"grad_norm": 3.2293405532836914,
"learning_rate": 1.594290976058932e-05,
"loss": 2.268,
"step": 22900
},
{
"epoch": 1.408839779005525,
"grad_norm": 3.658886194229126,
"learning_rate": 1.5912216083486803e-05,
"loss": 2.2022,
"step": 22950
},
{
"epoch": 1.4119091467157765,
"grad_norm": 4.797260761260986,
"learning_rate": 1.5881522406384284e-05,
"loss": 2.2916,
"step": 23000
},
{
"epoch": 1.4149785144260283,
"grad_norm": 4.262215614318848,
"learning_rate": 1.585082872928177e-05,
"loss": 2.2257,
"step": 23050
},
{
"epoch": 1.4180478821362799,
"grad_norm": 3.0167343616485596,
"learning_rate": 1.582013505217925e-05,
"loss": 2.2285,
"step": 23100
},
{
"epoch": 1.4211172498465316,
"grad_norm": 3.6330764293670654,
"learning_rate": 1.5789441375076733e-05,
"loss": 2.3057,
"step": 23150
},
{
"epoch": 1.4241866175567832,
"grad_norm": 4.605088233947754,
"learning_rate": 1.5758747697974218e-05,
"loss": 2.2406,
"step": 23200
},
{
"epoch": 1.427255985267035,
"grad_norm": 3.957474708557129,
"learning_rate": 1.57280540208717e-05,
"loss": 2.2519,
"step": 23250
},
{
"epoch": 1.4303253529772868,
"grad_norm": 3.5939078330993652,
"learning_rate": 1.5697360343769183e-05,
"loss": 2.2892,
"step": 23300
},
{
"epoch": 1.4333947206875384,
"grad_norm": 3.805011034011841,
"learning_rate": 1.5666666666666667e-05,
"loss": 2.2179,
"step": 23350
},
{
"epoch": 1.43646408839779,
"grad_norm": 3.5911526679992676,
"learning_rate": 1.5635972989564148e-05,
"loss": 2.313,
"step": 23400
},
{
"epoch": 1.4395334561080417,
"grad_norm": 3.6143059730529785,
"learning_rate": 1.5605279312461632e-05,
"loss": 2.2352,
"step": 23450
},
{
"epoch": 1.4426028238182935,
"grad_norm": 4.9773077964782715,
"learning_rate": 1.5574585635359116e-05,
"loss": 2.266,
"step": 23500
},
{
"epoch": 1.445672191528545,
"grad_norm": 3.49001407623291,
"learning_rate": 1.5543891958256597e-05,
"loss": 2.4199,
"step": 23550
},
{
"epoch": 1.4487415592387969,
"grad_norm": 4.041284084320068,
"learning_rate": 1.551319828115408e-05,
"loss": 2.2682,
"step": 23600
},
{
"epoch": 1.4518109269490485,
"grad_norm": 4.0507121086120605,
"learning_rate": 1.548250460405157e-05,
"loss": 2.3086,
"step": 23650
},
{
"epoch": 1.4548802946593002,
"grad_norm": 4.48442268371582,
"learning_rate": 1.545181092694905e-05,
"loss": 2.2863,
"step": 23700
},
{
"epoch": 1.4579496623695518,
"grad_norm": 4.268632888793945,
"learning_rate": 1.5421117249846534e-05,
"loss": 2.2778,
"step": 23750
},
{
"epoch": 1.4610190300798036,
"grad_norm": 3.334290027618408,
"learning_rate": 1.5390423572744015e-05,
"loss": 2.2268,
"step": 23800
},
{
"epoch": 1.4640883977900552,
"grad_norm": 4.395374774932861,
"learning_rate": 1.53597298956415e-05,
"loss": 2.3163,
"step": 23850
},
{
"epoch": 1.467157765500307,
"grad_norm": 4.427293300628662,
"learning_rate": 1.5329036218538983e-05,
"loss": 2.3159,
"step": 23900
},
{
"epoch": 1.4702271332105585,
"grad_norm": 3.552321195602417,
"learning_rate": 1.5298342541436464e-05,
"loss": 2.3377,
"step": 23950
},
{
"epoch": 1.4732965009208103,
"grad_norm": 3.2035748958587646,
"learning_rate": 1.5267648864333948e-05,
"loss": 2.2654,
"step": 24000
},
{
"epoch": 1.4763658686310621,
"grad_norm": 3.877993106842041,
"learning_rate": 1.5236955187231432e-05,
"loss": 2.3279,
"step": 24050
},
{
"epoch": 1.4794352363413137,
"grad_norm": 4.105770111083984,
"learning_rate": 1.5206261510128913e-05,
"loss": 2.2001,
"step": 24100
},
{
"epoch": 1.4825046040515653,
"grad_norm": 5.055785655975342,
"learning_rate": 1.5175567833026397e-05,
"loss": 2.2383,
"step": 24150
},
{
"epoch": 1.485573971761817,
"grad_norm": 3.5279541015625,
"learning_rate": 1.5144874155923882e-05,
"loss": 2.2956,
"step": 24200
},
{
"epoch": 1.4886433394720688,
"grad_norm": 3.2130086421966553,
"learning_rate": 1.5114180478821363e-05,
"loss": 2.2689,
"step": 24250
},
{
"epoch": 1.4917127071823204,
"grad_norm": 3.5005886554718018,
"learning_rate": 1.5083486801718847e-05,
"loss": 2.226,
"step": 24300
},
{
"epoch": 1.4947820748925722,
"grad_norm": 3.9424734115600586,
"learning_rate": 1.5052793124616331e-05,
"loss": 2.207,
"step": 24350
},
{
"epoch": 1.4978514426028238,
"grad_norm": 3.7467117309570312,
"learning_rate": 1.5022099447513812e-05,
"loss": 2.2265,
"step": 24400
},
{
"epoch": 1.5009208103130756,
"grad_norm": 3.600050926208496,
"learning_rate": 1.4991405770411296e-05,
"loss": 2.3054,
"step": 24450
},
{
"epoch": 1.5039901780233271,
"grad_norm": 3.9778027534484863,
"learning_rate": 1.4960712093308779e-05,
"loss": 2.2486,
"step": 24500
},
{
"epoch": 1.507059545733579,
"grad_norm": 5.874206066131592,
"learning_rate": 1.4930018416206261e-05,
"loss": 2.2713,
"step": 24550
},
{
"epoch": 1.5101289134438307,
"grad_norm": 3.219372034072876,
"learning_rate": 1.4899324739103745e-05,
"loss": 2.2801,
"step": 24600
},
{
"epoch": 1.5131982811540823,
"grad_norm": 4.86896276473999,
"learning_rate": 1.4868631062001228e-05,
"loss": 2.3152,
"step": 24650
},
{
"epoch": 1.5162676488643339,
"grad_norm": 3.7367022037506104,
"learning_rate": 1.483793738489871e-05,
"loss": 2.2584,
"step": 24700
},
{
"epoch": 1.5193370165745856,
"grad_norm": 6.774600028991699,
"learning_rate": 1.4807243707796193e-05,
"loss": 2.2155,
"step": 24750
},
{
"epoch": 1.5224063842848374,
"grad_norm": 3.1714091300964355,
"learning_rate": 1.4776550030693677e-05,
"loss": 2.317,
"step": 24800
},
{
"epoch": 1.525475751995089,
"grad_norm": 3.4561657905578613,
"learning_rate": 1.4745856353591161e-05,
"loss": 2.2814,
"step": 24850
},
{
"epoch": 1.5285451197053406,
"grad_norm": 3.321249485015869,
"learning_rate": 1.4715162676488644e-05,
"loss": 2.2776,
"step": 24900
},
{
"epoch": 1.5316144874155924,
"grad_norm": 2.9775593280792236,
"learning_rate": 1.4684468999386128e-05,
"loss": 2.2629,
"step": 24950
},
{
"epoch": 1.5346838551258442,
"grad_norm": 2.6327016353607178,
"learning_rate": 1.465377532228361e-05,
"loss": 2.2041,
"step": 25000
},
{
"epoch": 1.5377532228360957,
"grad_norm": 4.254408359527588,
"learning_rate": 1.4623081645181093e-05,
"loss": 2.2525,
"step": 25050
},
{
"epoch": 1.5408225905463473,
"grad_norm": 4.363503932952881,
"learning_rate": 1.4592387968078576e-05,
"loss": 2.211,
"step": 25100
},
{
"epoch": 1.5438919582565993,
"grad_norm": 4.6273579597473145,
"learning_rate": 1.456169429097606e-05,
"loss": 2.308,
"step": 25150
},
{
"epoch": 1.5469613259668509,
"grad_norm": 3.543792247772217,
"learning_rate": 1.4531000613873542e-05,
"loss": 2.2355,
"step": 25200
},
{
"epoch": 1.5500306936771024,
"grad_norm": 3.429605007171631,
"learning_rate": 1.4500306936771025e-05,
"loss": 2.2436,
"step": 25250
},
{
"epoch": 1.5531000613873542,
"grad_norm": 4.589274883270264,
"learning_rate": 1.446961325966851e-05,
"loss": 2.3328,
"step": 25300
},
{
"epoch": 1.556169429097606,
"grad_norm": 3.7569265365600586,
"learning_rate": 1.4438919582565992e-05,
"loss": 2.318,
"step": 25350
},
{
"epoch": 1.5592387968078576,
"grad_norm": 4.732515811920166,
"learning_rate": 1.4408225905463474e-05,
"loss": 2.2883,
"step": 25400
},
{
"epoch": 1.5623081645181092,
"grad_norm": 3.1093533039093018,
"learning_rate": 1.4377532228360957e-05,
"loss": 2.3087,
"step": 25450
},
{
"epoch": 1.565377532228361,
"grad_norm": 5.486563682556152,
"learning_rate": 1.4346838551258441e-05,
"loss": 2.3276,
"step": 25500
},
{
"epoch": 1.5684468999386127,
"grad_norm": 4.043442249298096,
"learning_rate": 1.4316144874155923e-05,
"loss": 2.1952,
"step": 25550
},
{
"epoch": 1.5715162676488643,
"grad_norm": 3.298995018005371,
"learning_rate": 1.4285451197053406e-05,
"loss": 2.2533,
"step": 25600
},
{
"epoch": 1.5745856353591159,
"grad_norm": 3.928128719329834,
"learning_rate": 1.4254757519950892e-05,
"loss": 2.2624,
"step": 25650
},
{
"epoch": 1.5776550030693677,
"grad_norm": 4.050337791442871,
"learning_rate": 1.4224063842848374e-05,
"loss": 2.1662,
"step": 25700
},
{
"epoch": 1.5807243707796195,
"grad_norm": 4.037144660949707,
"learning_rate": 1.4193370165745857e-05,
"loss": 2.3193,
"step": 25750
},
{
"epoch": 1.583793738489871,
"grad_norm": 5.330986976623535,
"learning_rate": 1.416267648864334e-05,
"loss": 2.2778,
"step": 25800
},
{
"epoch": 1.5868631062001226,
"grad_norm": 4.488786697387695,
"learning_rate": 1.4131982811540824e-05,
"loss": 2.2893,
"step": 25850
},
{
"epoch": 1.5899324739103746,
"grad_norm": 3.4088134765625,
"learning_rate": 1.4101289134438306e-05,
"loss": 2.3159,
"step": 25900
},
{
"epoch": 1.5930018416206262,
"grad_norm": 4.404228687286377,
"learning_rate": 1.4070595457335789e-05,
"loss": 2.2411,
"step": 25950
},
{
"epoch": 1.5960712093308778,
"grad_norm": 3.9491429328918457,
"learning_rate": 1.4039901780233273e-05,
"loss": 2.2032,
"step": 26000
},
{
"epoch": 1.5991405770411296,
"grad_norm": 3.8530337810516357,
"learning_rate": 1.4009208103130756e-05,
"loss": 2.1635,
"step": 26050
},
{
"epoch": 1.6022099447513813,
"grad_norm": 6.127511978149414,
"learning_rate": 1.3978514426028238e-05,
"loss": 2.287,
"step": 26100
},
{
"epoch": 1.605279312461633,
"grad_norm": 3.831045389175415,
"learning_rate": 1.394782074892572e-05,
"loss": 2.2275,
"step": 26150
},
{
"epoch": 1.6083486801718845,
"grad_norm": 3.487755060195923,
"learning_rate": 1.3917127071823205e-05,
"loss": 2.302,
"step": 26200
},
{
"epoch": 1.6114180478821363,
"grad_norm": 3.54748272895813,
"learning_rate": 1.3886433394720687e-05,
"loss": 2.2559,
"step": 26250
},
{
"epoch": 1.614487415592388,
"grad_norm": 3.1733102798461914,
"learning_rate": 1.385573971761817e-05,
"loss": 2.2246,
"step": 26300
},
{
"epoch": 1.6175567833026396,
"grad_norm": 3.280029773712158,
"learning_rate": 1.3825046040515654e-05,
"loss": 2.223,
"step": 26350
},
{
"epoch": 1.6206261510128912,
"grad_norm": 4.188273906707764,
"learning_rate": 1.3794352363413137e-05,
"loss": 2.2478,
"step": 26400
},
{
"epoch": 1.623695518723143,
"grad_norm": 4.134437084197998,
"learning_rate": 1.376365868631062e-05,
"loss": 2.1995,
"step": 26450
},
{
"epoch": 1.6267648864333948,
"grad_norm": 3.6614558696746826,
"learning_rate": 1.3732965009208103e-05,
"loss": 2.3007,
"step": 26500
},
{
"epoch": 1.6298342541436464,
"grad_norm": 3.6023659706115723,
"learning_rate": 1.3702271332105588e-05,
"loss": 2.2745,
"step": 26550
},
{
"epoch": 1.6329036218538981,
"grad_norm": 4.1788201332092285,
"learning_rate": 1.367157765500307e-05,
"loss": 2.2729,
"step": 26600
},
{
"epoch": 1.63597298956415,
"grad_norm": 3.9169983863830566,
"learning_rate": 1.3640883977900553e-05,
"loss": 2.2836,
"step": 26650
},
{
"epoch": 1.6390423572744015,
"grad_norm": 3.853062152862549,
"learning_rate": 1.3610190300798037e-05,
"loss": 2.271,
"step": 26700
},
{
"epoch": 1.642111724984653,
"grad_norm": 4.5239667892456055,
"learning_rate": 1.357949662369552e-05,
"loss": 2.2514,
"step": 26750
},
{
"epoch": 1.6451810926949049,
"grad_norm": 4.2847065925598145,
"learning_rate": 1.3548802946593002e-05,
"loss": 2.2787,
"step": 26800
},
{
"epoch": 1.6482504604051567,
"grad_norm": 4.32819128036499,
"learning_rate": 1.3518109269490484e-05,
"loss": 2.1954,
"step": 26850
},
{
"epoch": 1.6513198281154082,
"grad_norm": 4.206119537353516,
"learning_rate": 1.3487415592387969e-05,
"loss": 2.2867,
"step": 26900
},
{
"epoch": 1.6543891958256598,
"grad_norm": 3.985600709915161,
"learning_rate": 1.3456721915285451e-05,
"loss": 2.2126,
"step": 26950
},
{
"epoch": 1.6574585635359116,
"grad_norm": 3.822664976119995,
"learning_rate": 1.3426028238182934e-05,
"loss": 2.2767,
"step": 27000
},
{
"epoch": 1.6605279312461634,
"grad_norm": 4.091802597045898,
"learning_rate": 1.3395334561080418e-05,
"loss": 2.2247,
"step": 27050
},
{
"epoch": 1.663597298956415,
"grad_norm": 4.74222993850708,
"learning_rate": 1.33646408839779e-05,
"loss": 2.2001,
"step": 27100
},
{
"epoch": 1.6666666666666665,
"grad_norm": 3.1740357875823975,
"learning_rate": 1.3333947206875383e-05,
"loss": 2.2442,
"step": 27150
},
{
"epoch": 1.6697360343769183,
"grad_norm": 5.706885814666748,
"learning_rate": 1.3303253529772866e-05,
"loss": 2.2529,
"step": 27200
},
{
"epoch": 1.67280540208717,
"grad_norm": 4.168138027191162,
"learning_rate": 1.3272559852670351e-05,
"loss": 2.1694,
"step": 27250
},
{
"epoch": 1.6758747697974217,
"grad_norm": 3.907432794570923,
"learning_rate": 1.3241866175567834e-05,
"loss": 2.2338,
"step": 27300
},
{
"epoch": 1.6789441375076735,
"grad_norm": 3.9594688415527344,
"learning_rate": 1.3211172498465316e-05,
"loss": 2.2013,
"step": 27350
},
{
"epoch": 1.6820135052179253,
"grad_norm": 3.2740478515625,
"learning_rate": 1.31804788213628e-05,
"loss": 2.2376,
"step": 27400
},
{
"epoch": 1.6850828729281768,
"grad_norm": 5.300954341888428,
"learning_rate": 1.3149785144260283e-05,
"loss": 2.2501,
"step": 27450
},
{
"epoch": 1.6881522406384284,
"grad_norm": 3.6815123558044434,
"learning_rate": 1.3119091467157766e-05,
"loss": 2.3304,
"step": 27500
},
{
"epoch": 1.6912216083486802,
"grad_norm": 4.4728684425354,
"learning_rate": 1.3088397790055248e-05,
"loss": 2.2966,
"step": 27550
},
{
"epoch": 1.694290976058932,
"grad_norm": 3.806849241256714,
"learning_rate": 1.3057704112952733e-05,
"loss": 2.1784,
"step": 27600
},
{
"epoch": 1.6973603437691835,
"grad_norm": 3.8693387508392334,
"learning_rate": 1.3027010435850215e-05,
"loss": 2.1768,
"step": 27650
},
{
"epoch": 1.7004297114794351,
"grad_norm": 3.4431064128875732,
"learning_rate": 1.2996316758747698e-05,
"loss": 2.2657,
"step": 27700
},
{
"epoch": 1.703499079189687,
"grad_norm": 4.247345924377441,
"learning_rate": 1.2965623081645182e-05,
"loss": 2.2104,
"step": 27750
},
{
"epoch": 1.7065684468999387,
"grad_norm": 4.055105209350586,
"learning_rate": 1.2934929404542664e-05,
"loss": 2.274,
"step": 27800
},
{
"epoch": 1.7096378146101903,
"grad_norm": 3.7587838172912598,
"learning_rate": 1.2904235727440147e-05,
"loss": 2.278,
"step": 27850
},
{
"epoch": 1.7127071823204418,
"grad_norm": 3.716425657272339,
"learning_rate": 1.287354205033763e-05,
"loss": 2.3438,
"step": 27900
},
{
"epoch": 1.7157765500306936,
"grad_norm": 3.8528246879577637,
"learning_rate": 1.2842848373235114e-05,
"loss": 2.3489,
"step": 27950
},
{
"epoch": 1.7188459177409454,
"grad_norm": 3.5920658111572266,
"learning_rate": 1.2812154696132596e-05,
"loss": 2.3107,
"step": 28000
},
{
"epoch": 1.721915285451197,
"grad_norm": 3.0533790588378906,
"learning_rate": 1.278146101903008e-05,
"loss": 2.3603,
"step": 28050
},
{
"epoch": 1.7249846531614488,
"grad_norm": 4.115893363952637,
"learning_rate": 1.2750767341927565e-05,
"loss": 2.2572,
"step": 28100
},
{
"epoch": 1.7280540208717006,
"grad_norm": 3.350722074508667,
"learning_rate": 1.2720073664825047e-05,
"loss": 2.2774,
"step": 28150
},
{
"epoch": 1.7311233885819521,
"grad_norm": 4.05141544342041,
"learning_rate": 1.268937998772253e-05,
"loss": 2.1578,
"step": 28200
},
{
"epoch": 1.7341927562922037,
"grad_norm": 3.764138698577881,
"learning_rate": 1.2658686310620012e-05,
"loss": 2.2309,
"step": 28250
},
{
"epoch": 1.7372621240024555,
"grad_norm": 3.2544310092926025,
"learning_rate": 1.2627992633517496e-05,
"loss": 2.2146,
"step": 28300
},
{
"epoch": 1.7403314917127073,
"grad_norm": 4.030269622802734,
"learning_rate": 1.2597298956414979e-05,
"loss": 2.2981,
"step": 28350
},
{
"epoch": 1.7434008594229589,
"grad_norm": 3.6446919441223145,
"learning_rate": 1.2566605279312461e-05,
"loss": 2.1891,
"step": 28400
},
{
"epoch": 1.7464702271332104,
"grad_norm": 3.7096481323242188,
"learning_rate": 1.2535911602209946e-05,
"loss": 2.272,
"step": 28450
},
{
"epoch": 1.7495395948434622,
"grad_norm": 3.4253058433532715,
"learning_rate": 1.2505217925107428e-05,
"loss": 2.2502,
"step": 28500
},
{
"epoch": 1.752608962553714,
"grad_norm": 3.299448013305664,
"learning_rate": 1.247452424800491e-05,
"loss": 2.2742,
"step": 28550
},
{
"epoch": 1.7556783302639656,
"grad_norm": 4.302381992340088,
"learning_rate": 1.2443830570902393e-05,
"loss": 2.2315,
"step": 28600
},
{
"epoch": 1.7587476979742172,
"grad_norm": 3.4078803062438965,
"learning_rate": 1.2413136893799877e-05,
"loss": 2.3013,
"step": 28650
},
{
"epoch": 1.7618170656844692,
"grad_norm": 2.8105528354644775,
"learning_rate": 1.238244321669736e-05,
"loss": 2.2035,
"step": 28700
},
{
"epoch": 1.7648864333947207,
"grad_norm": 4.302020072937012,
"learning_rate": 1.2351749539594843e-05,
"loss": 2.306,
"step": 28750
},
{
"epoch": 1.7679558011049723,
"grad_norm": 5.1633219718933105,
"learning_rate": 1.2321055862492327e-05,
"loss": 2.2469,
"step": 28800
},
{
"epoch": 1.771025168815224,
"grad_norm": 3.7127487659454346,
"learning_rate": 1.2290362185389811e-05,
"loss": 2.2803,
"step": 28850
},
{
"epoch": 1.7740945365254759,
"grad_norm": 3.1988329887390137,
"learning_rate": 1.2259668508287293e-05,
"loss": 2.2693,
"step": 28900
},
{
"epoch": 1.7771639042357275,
"grad_norm": 4.184259414672852,
"learning_rate": 1.2228974831184776e-05,
"loss": 2.1331,
"step": 28950
},
{
"epoch": 1.780233271945979,
"grad_norm": 4.31723690032959,
"learning_rate": 1.219828115408226e-05,
"loss": 2.3265,
"step": 29000
},
{
"epoch": 1.7833026396562308,
"grad_norm": 3.367295742034912,
"learning_rate": 1.2167587476979743e-05,
"loss": 2.2231,
"step": 29050
},
{
"epoch": 1.7863720073664826,
"grad_norm": 3.7550508975982666,
"learning_rate": 1.2136893799877225e-05,
"loss": 2.1928,
"step": 29100
},
{
"epoch": 1.7894413750767342,
"grad_norm": 3.3911259174346924,
"learning_rate": 1.210620012277471e-05,
"loss": 2.2118,
"step": 29150
},
{
"epoch": 1.7925107427869857,
"grad_norm": 3.8555543422698975,
"learning_rate": 1.2075506445672192e-05,
"loss": 2.2831,
"step": 29200
},
{
"epoch": 1.7955801104972375,
"grad_norm": 3.8747925758361816,
"learning_rate": 1.2044812768569675e-05,
"loss": 2.1612,
"step": 29250
},
{
"epoch": 1.7986494782074893,
"grad_norm": 4.418224334716797,
"learning_rate": 1.2014119091467157e-05,
"loss": 2.1782,
"step": 29300
},
{
"epoch": 1.801718845917741,
"grad_norm": 3.63905668258667,
"learning_rate": 1.1983425414364641e-05,
"loss": 2.2919,
"step": 29350
},
{
"epoch": 1.8047882136279927,
"grad_norm": 3.302374839782715,
"learning_rate": 1.1952731737262124e-05,
"loss": 2.2046,
"step": 29400
},
{
"epoch": 1.8078575813382445,
"grad_norm": 4.592925548553467,
"learning_rate": 1.1922038060159606e-05,
"loss": 2.238,
"step": 29450
},
{
"epoch": 1.810926949048496,
"grad_norm": 3.654604434967041,
"learning_rate": 1.189134438305709e-05,
"loss": 2.1764,
"step": 29500
},
{
"epoch": 1.8139963167587476,
"grad_norm": 3.7106800079345703,
"learning_rate": 1.1860650705954573e-05,
"loss": 2.2601,
"step": 29550
},
{
"epoch": 1.8170656844689994,
"grad_norm": 3.459660291671753,
"learning_rate": 1.1829957028852056e-05,
"loss": 2.2503,
"step": 29600
},
{
"epoch": 1.8201350521792512,
"grad_norm": 3.504185676574707,
"learning_rate": 1.179926335174954e-05,
"loss": 2.258,
"step": 29650
},
{
"epoch": 1.8232044198895028,
"grad_norm": 4.167102336883545,
"learning_rate": 1.1768569674647024e-05,
"loss": 2.1789,
"step": 29700
},
{
"epoch": 1.8262737875997543,
"grad_norm": 4.083024978637695,
"learning_rate": 1.1737875997544507e-05,
"loss": 2.2965,
"step": 29750
},
{
"epoch": 1.8293431553100061,
"grad_norm": 3.6207692623138428,
"learning_rate": 1.1707182320441989e-05,
"loss": 2.3554,
"step": 29800
},
{
"epoch": 1.832412523020258,
"grad_norm": 3.8433992862701416,
"learning_rate": 1.1676488643339473e-05,
"loss": 2.2148,
"step": 29850
},
{
"epoch": 1.8354818907305095,
"grad_norm": 4.200483798980713,
"learning_rate": 1.1645794966236956e-05,
"loss": 2.2467,
"step": 29900
},
{
"epoch": 1.838551258440761,
"grad_norm": 4.590367794036865,
"learning_rate": 1.1615101289134438e-05,
"loss": 2.313,
"step": 29950
},
{
"epoch": 1.8416206261510129,
"grad_norm": 4.230051040649414,
"learning_rate": 1.1584407612031921e-05,
"loss": 2.2457,
"step": 30000
},
{
"epoch": 1.8446899938612646,
"grad_norm": 3.817789077758789,
"learning_rate": 1.1553713934929405e-05,
"loss": 2.2483,
"step": 30050
},
{
"epoch": 1.8477593615715162,
"grad_norm": 3.726513147354126,
"learning_rate": 1.1523020257826888e-05,
"loss": 2.2662,
"step": 30100
},
{
"epoch": 1.850828729281768,
"grad_norm": 3.6397483348846436,
"learning_rate": 1.149232658072437e-05,
"loss": 2.1713,
"step": 30150
},
{
"epoch": 1.8538980969920198,
"grad_norm": 3.6421852111816406,
"learning_rate": 1.1461632903621854e-05,
"loss": 2.1855,
"step": 30200
},
{
"epoch": 1.8569674647022714,
"grad_norm": 3.6123268604278564,
"learning_rate": 1.1430939226519337e-05,
"loss": 2.3794,
"step": 30250
},
{
"epoch": 1.860036832412523,
"grad_norm": 3.842371940612793,
"learning_rate": 1.140024554941682e-05,
"loss": 2.3057,
"step": 30300
},
{
"epoch": 1.8631062001227747,
"grad_norm": 5.15551233291626,
"learning_rate": 1.1369551872314302e-05,
"loss": 2.1755,
"step": 30350
},
{
"epoch": 1.8661755678330265,
"grad_norm": 3.2684996128082275,
"learning_rate": 1.1338858195211786e-05,
"loss": 2.2181,
"step": 30400
},
{
"epoch": 1.869244935543278,
"grad_norm": 3.720906972885132,
"learning_rate": 1.130816451810927e-05,
"loss": 2.2057,
"step": 30450
},
{
"epoch": 1.8723143032535297,
"grad_norm": 3.2957749366760254,
"learning_rate": 1.1277470841006753e-05,
"loss": 2.2404,
"step": 30500
},
{
"epoch": 1.8753836709637814,
"grad_norm": 3.4108922481536865,
"learning_rate": 1.1246777163904237e-05,
"loss": 2.3055,
"step": 30550
},
{
"epoch": 1.8784530386740332,
"grad_norm": 2.9891228675842285,
"learning_rate": 1.121608348680172e-05,
"loss": 2.2714,
"step": 30600
},
{
"epoch": 1.8815224063842848,
"grad_norm": 5.469006538391113,
"learning_rate": 1.1185389809699202e-05,
"loss": 2.2108,
"step": 30650
},
{
"epoch": 1.8845917740945364,
"grad_norm": 3.9105262756347656,
"learning_rate": 1.1154696132596686e-05,
"loss": 2.2818,
"step": 30700
},
{
"epoch": 1.8876611418047882,
"grad_norm": 3.2086987495422363,
"learning_rate": 1.1124002455494169e-05,
"loss": 2.2281,
"step": 30750
},
{
"epoch": 1.89073050951504,
"grad_norm": 4.461240291595459,
"learning_rate": 1.1093308778391652e-05,
"loss": 2.2431,
"step": 30800
},
{
"epoch": 1.8937998772252915,
"grad_norm": 4.049542427062988,
"learning_rate": 1.1062615101289134e-05,
"loss": 2.2089,
"step": 30850
},
{
"epoch": 1.8968692449355433,
"grad_norm": 3.2396111488342285,
"learning_rate": 1.1031921424186618e-05,
"loss": 2.3097,
"step": 30900
},
{
"epoch": 1.899938612645795,
"grad_norm": 3.4000086784362793,
"learning_rate": 1.10012277470841e-05,
"loss": 2.2472,
"step": 30950
},
{
"epoch": 1.9030079803560467,
"grad_norm": 3.818934917449951,
"learning_rate": 1.0970534069981583e-05,
"loss": 2.2014,
"step": 31000
},
{
"epoch": 1.9060773480662982,
"grad_norm": 3.150446891784668,
"learning_rate": 1.0939840392879068e-05,
"loss": 2.2697,
"step": 31050
},
{
"epoch": 1.90914671577655,
"grad_norm": 3.2145376205444336,
"learning_rate": 1.090914671577655e-05,
"loss": 2.2698,
"step": 31100
},
{
"epoch": 1.9122160834868018,
"grad_norm": 3.603330135345459,
"learning_rate": 1.0878453038674033e-05,
"loss": 2.2337,
"step": 31150
},
{
"epoch": 1.9152854511970534,
"grad_norm": 3.6672143936157227,
"learning_rate": 1.0847759361571515e-05,
"loss": 2.2064,
"step": 31200
},
{
"epoch": 1.918354818907305,
"grad_norm": 3.2886476516723633,
"learning_rate": 1.0817065684469001e-05,
"loss": 2.2159,
"step": 31250
},
{
"epoch": 1.9214241866175568,
"grad_norm": 3.3169350624084473,
"learning_rate": 1.0786372007366484e-05,
"loss": 2.2242,
"step": 31300
},
{
"epoch": 1.9244935543278086,
"grad_norm": 3.8866281509399414,
"learning_rate": 1.0755678330263966e-05,
"loss": 2.212,
"step": 31350
},
{
"epoch": 1.9275629220380601,
"grad_norm": 3.3577752113342285,
"learning_rate": 1.072498465316145e-05,
"loss": 2.3055,
"step": 31400
},
{
"epoch": 1.9306322897483117,
"grad_norm": 3.503736972808838,
"learning_rate": 1.0694290976058933e-05,
"loss": 2.1698,
"step": 31450
},
{
"epoch": 1.9337016574585635,
"grad_norm": 5.08292818069458,
"learning_rate": 1.0663597298956415e-05,
"loss": 2.2281,
"step": 31500
},
{
"epoch": 1.9367710251688153,
"grad_norm": 6.739192485809326,
"learning_rate": 1.0632903621853898e-05,
"loss": 2.2375,
"step": 31550
},
{
"epoch": 1.9398403928790668,
"grad_norm": 5.141798496246338,
"learning_rate": 1.0602209944751382e-05,
"loss": 2.2684,
"step": 31600
},
{
"epoch": 1.9429097605893186,
"grad_norm": 3.4031152725219727,
"learning_rate": 1.0571516267648865e-05,
"loss": 2.2104,
"step": 31650
},
{
"epoch": 1.9459791282995704,
"grad_norm": 3.678633451461792,
"learning_rate": 1.0540822590546347e-05,
"loss": 2.2351,
"step": 31700
},
{
"epoch": 1.949048496009822,
"grad_norm": 4.1313700675964355,
"learning_rate": 1.0510128913443831e-05,
"loss": 2.2951,
"step": 31750
},
{
"epoch": 1.9521178637200736,
"grad_norm": 3.0364913940429688,
"learning_rate": 1.0479435236341314e-05,
"loss": 2.2499,
"step": 31800
},
{
"epoch": 1.9551872314303254,
"grad_norm": 3.7849690914154053,
"learning_rate": 1.0448741559238796e-05,
"loss": 2.2005,
"step": 31850
},
{
"epoch": 1.9582565991405771,
"grad_norm": 4.416446208953857,
"learning_rate": 1.0418047882136279e-05,
"loss": 2.3114,
"step": 31900
},
{
"epoch": 1.9613259668508287,
"grad_norm": 3.4799766540527344,
"learning_rate": 1.0387354205033763e-05,
"loss": 2.2727,
"step": 31950
},
{
"epoch": 1.9643953345610803,
"grad_norm": 5.180732727050781,
"learning_rate": 1.0356660527931246e-05,
"loss": 2.2864,
"step": 32000
},
{
"epoch": 1.967464702271332,
"grad_norm": 3.589080810546875,
"learning_rate": 1.032596685082873e-05,
"loss": 2.2939,
"step": 32050
},
{
"epoch": 1.9705340699815839,
"grad_norm": 4.802340984344482,
"learning_rate": 1.0295273173726214e-05,
"loss": 2.2003,
"step": 32100
},
{
"epoch": 1.9736034376918354,
"grad_norm": 3.132723331451416,
"learning_rate": 1.0264579496623697e-05,
"loss": 2.2338,
"step": 32150
},
{
"epoch": 1.976672805402087,
"grad_norm": 5.015474796295166,
"learning_rate": 1.023388581952118e-05,
"loss": 2.2431,
"step": 32200
},
{
"epoch": 1.979742173112339,
"grad_norm": 3.432023286819458,
"learning_rate": 1.0203192142418662e-05,
"loss": 2.2862,
"step": 32250
},
{
"epoch": 1.9828115408225906,
"grad_norm": 3.8772900104522705,
"learning_rate": 1.0172498465316146e-05,
"loss": 2.2758,
"step": 32300
},
{
"epoch": 1.9858809085328422,
"grad_norm": 3.640902042388916,
"learning_rate": 1.0141804788213629e-05,
"loss": 2.2174,
"step": 32350
},
{
"epoch": 1.988950276243094,
"grad_norm": 3.8185462951660156,
"learning_rate": 1.0111111111111111e-05,
"loss": 2.2102,
"step": 32400
},
{
"epoch": 1.9920196439533457,
"grad_norm": 4.0993499755859375,
"learning_rate": 1.0080417434008595e-05,
"loss": 2.2474,
"step": 32450
},
{
"epoch": 1.9950890116635973,
"grad_norm": 3.5613911151885986,
"learning_rate": 1.0049723756906078e-05,
"loss": 2.2452,
"step": 32500
},
{
"epoch": 1.9981583793738489,
"grad_norm": 3.9277961254119873,
"learning_rate": 1.001903007980356e-05,
"loss": 2.2498,
"step": 32550
},
{
"epoch": 2.001227747084101,
"grad_norm": 3.8902101516723633,
"learning_rate": 9.988336402701043e-06,
"loss": 2.3092,
"step": 32600
},
{
"epoch": 2.0042971147943525,
"grad_norm": 3.29555606842041,
"learning_rate": 9.957642725598527e-06,
"loss": 2.2777,
"step": 32650
},
{
"epoch": 2.007366482504604,
"grad_norm": 3.297602653503418,
"learning_rate": 9.92694904849601e-06,
"loss": 2.2509,
"step": 32700
},
{
"epoch": 2.0104358502148556,
"grad_norm": 3.616257905960083,
"learning_rate": 9.896255371393492e-06,
"loss": 2.2828,
"step": 32750
},
{
"epoch": 2.0135052179251076,
"grad_norm": 3.872678518295288,
"learning_rate": 9.865561694290976e-06,
"loss": 2.2431,
"step": 32800
},
{
"epoch": 2.016574585635359,
"grad_norm": 4.2430338859558105,
"learning_rate": 9.83486801718846e-06,
"loss": 2.143,
"step": 32850
},
{
"epoch": 2.0196439533456108,
"grad_norm": 4.328212738037109,
"learning_rate": 9.804174340085943e-06,
"loss": 2.3134,
"step": 32900
},
{
"epoch": 2.0227133210558623,
"grad_norm": 3.488384246826172,
"learning_rate": 9.773480662983426e-06,
"loss": 2.2116,
"step": 32950
},
{
"epoch": 2.0257826887661143,
"grad_norm": 4.3153910636901855,
"learning_rate": 9.74278698588091e-06,
"loss": 2.1732,
"step": 33000
},
{
"epoch": 2.028852056476366,
"grad_norm": 3.968754768371582,
"learning_rate": 9.712093308778392e-06,
"loss": 2.2158,
"step": 33050
},
{
"epoch": 2.0319214241866175,
"grad_norm": 3.1098225116729736,
"learning_rate": 9.681399631675875e-06,
"loss": 2.2657,
"step": 33100
},
{
"epoch": 2.034990791896869,
"grad_norm": 3.7003393173217773,
"learning_rate": 9.650705954573359e-06,
"loss": 2.2534,
"step": 33150
},
{
"epoch": 2.038060159607121,
"grad_norm": 3.1514766216278076,
"learning_rate": 9.620012277470842e-06,
"loss": 2.1827,
"step": 33200
},
{
"epoch": 2.0411295273173726,
"grad_norm": 3.5508854389190674,
"learning_rate": 9.589318600368324e-06,
"loss": 2.2009,
"step": 33250
},
{
"epoch": 2.044198895027624,
"grad_norm": 4.060067653656006,
"learning_rate": 9.558624923265807e-06,
"loss": 2.2533,
"step": 33300
},
{
"epoch": 2.047268262737876,
"grad_norm": 3.6063380241394043,
"learning_rate": 9.527931246163291e-06,
"loss": 2.254,
"step": 33350
},
{
"epoch": 2.050337630448128,
"grad_norm": 5.065506458282471,
"learning_rate": 9.497237569060773e-06,
"loss": 2.2285,
"step": 33400
},
{
"epoch": 2.0534069981583793,
"grad_norm": 3.942070245742798,
"learning_rate": 9.466543891958256e-06,
"loss": 2.2693,
"step": 33450
},
{
"epoch": 2.056476365868631,
"grad_norm": 4.165147304534912,
"learning_rate": 9.43585021485574e-06,
"loss": 2.1426,
"step": 33500
},
{
"epoch": 2.059545733578883,
"grad_norm": 9.669456481933594,
"learning_rate": 9.405156537753223e-06,
"loss": 2.2249,
"step": 33550
},
{
"epoch": 2.0626151012891345,
"grad_norm": 3.426900625228882,
"learning_rate": 9.374462860650705e-06,
"loss": 2.2908,
"step": 33600
},
{
"epoch": 2.065684468999386,
"grad_norm": 4.799295902252197,
"learning_rate": 9.34376918354819e-06,
"loss": 2.226,
"step": 33650
},
{
"epoch": 2.0687538367096376,
"grad_norm": 3.066361427307129,
"learning_rate": 9.313075506445674e-06,
"loss": 2.2653,
"step": 33700
},
{
"epoch": 2.0718232044198897,
"grad_norm": 4.229564666748047,
"learning_rate": 9.282381829343156e-06,
"loss": 2.2778,
"step": 33750
},
{
"epoch": 2.074892572130141,
"grad_norm": 3.7543585300445557,
"learning_rate": 9.251688152240639e-06,
"loss": 2.1851,
"step": 33800
},
{
"epoch": 2.077961939840393,
"grad_norm": 4.075713634490967,
"learning_rate": 9.220994475138123e-06,
"loss": 2.189,
"step": 33850
},
{
"epoch": 2.0810313075506444,
"grad_norm": 4.204864978790283,
"learning_rate": 9.190300798035606e-06,
"loss": 2.2357,
"step": 33900
},
{
"epoch": 2.0841006752608964,
"grad_norm": 4.006982326507568,
"learning_rate": 9.159607120933088e-06,
"loss": 2.231,
"step": 33950
},
{
"epoch": 2.087170042971148,
"grad_norm": 3.0241997241973877,
"learning_rate": 9.12891344383057e-06,
"loss": 2.1866,
"step": 34000
},
{
"epoch": 2.0902394106813995,
"grad_norm": 3.5990588665008545,
"learning_rate": 9.098219766728055e-06,
"loss": 2.145,
"step": 34050
},
{
"epoch": 2.0933087783916515,
"grad_norm": 3.6155498027801514,
"learning_rate": 9.067526089625537e-06,
"loss": 2.1855,
"step": 34100
},
{
"epoch": 2.096378146101903,
"grad_norm": 3.9599666595458984,
"learning_rate": 9.03683241252302e-06,
"loss": 2.2783,
"step": 34150
},
{
"epoch": 2.0994475138121547,
"grad_norm": 3.8327977657318115,
"learning_rate": 9.006138735420504e-06,
"loss": 2.1919,
"step": 34200
},
{
"epoch": 2.1025168815224062,
"grad_norm": 3.3617892265319824,
"learning_rate": 8.975445058317987e-06,
"loss": 2.1101,
"step": 34250
},
{
"epoch": 2.1055862492326582,
"grad_norm": 3.5898163318634033,
"learning_rate": 8.944751381215469e-06,
"loss": 2.1866,
"step": 34300
},
{
"epoch": 2.10865561694291,
"grad_norm": 4.3782525062561035,
"learning_rate": 8.914057704112952e-06,
"loss": 2.233,
"step": 34350
},
{
"epoch": 2.1117249846531614,
"grad_norm": 3.649711847305298,
"learning_rate": 8.883364027010436e-06,
"loss": 2.2422,
"step": 34400
},
{
"epoch": 2.114794352363413,
"grad_norm": 3.99489164352417,
"learning_rate": 8.852670349907918e-06,
"loss": 2.1979,
"step": 34450
},
{
"epoch": 2.117863720073665,
"grad_norm": 4.443358421325684,
"learning_rate": 8.821976672805403e-06,
"loss": 2.1984,
"step": 34500
},
{
"epoch": 2.1209330877839165,
"grad_norm": 2.918077230453491,
"learning_rate": 8.791282995702887e-06,
"loss": 2.1995,
"step": 34550
},
{
"epoch": 2.124002455494168,
"grad_norm": 3.6200385093688965,
"learning_rate": 8.76058931860037e-06,
"loss": 2.2225,
"step": 34600
},
{
"epoch": 2.12707182320442,
"grad_norm": 3.616900682449341,
"learning_rate": 8.729895641497852e-06,
"loss": 2.1904,
"step": 34650
},
{
"epoch": 2.1301411909146717,
"grad_norm": 3.1443259716033936,
"learning_rate": 8.699201964395334e-06,
"loss": 2.1765,
"step": 34700
},
{
"epoch": 2.1332105586249233,
"grad_norm": 3.3852028846740723,
"learning_rate": 8.668508287292819e-06,
"loss": 2.2505,
"step": 34750
},
{
"epoch": 2.136279926335175,
"grad_norm": 3.266024112701416,
"learning_rate": 8.637814610190301e-06,
"loss": 2.1815,
"step": 34800
},
{
"epoch": 2.139349294045427,
"grad_norm": 4.952578067779541,
"learning_rate": 8.607120933087784e-06,
"loss": 2.2401,
"step": 34850
},
{
"epoch": 2.1424186617556784,
"grad_norm": 4.235185623168945,
"learning_rate": 8.576427255985268e-06,
"loss": 2.2374,
"step": 34900
},
{
"epoch": 2.14548802946593,
"grad_norm": 3.80965256690979,
"learning_rate": 8.54573357888275e-06,
"loss": 2.2094,
"step": 34950
},
{
"epoch": 2.1485573971761815,
"grad_norm": 5.098249435424805,
"learning_rate": 8.515039901780233e-06,
"loss": 2.1882,
"step": 35000
},
{
"epoch": 2.1516267648864336,
"grad_norm": 3.5961649417877197,
"learning_rate": 8.484346224677715e-06,
"loss": 2.2372,
"step": 35050
},
{
"epoch": 2.154696132596685,
"grad_norm": 3.425548791885376,
"learning_rate": 8.4536525475752e-06,
"loss": 2.2271,
"step": 35100
},
{
"epoch": 2.1577655003069367,
"grad_norm": 3.765516996383667,
"learning_rate": 8.422958870472682e-06,
"loss": 2.2176,
"step": 35150
},
{
"epoch": 2.1608348680171883,
"grad_norm": 4.038573265075684,
"learning_rate": 8.392265193370165e-06,
"loss": 2.214,
"step": 35200
},
{
"epoch": 2.1639042357274403,
"grad_norm": 4.2313385009765625,
"learning_rate": 8.361571516267649e-06,
"loss": 2.2596,
"step": 35250
},
{
"epoch": 2.166973603437692,
"grad_norm": 3.4800400733947754,
"learning_rate": 8.330877839165133e-06,
"loss": 2.2576,
"step": 35300
},
{
"epoch": 2.1700429711479434,
"grad_norm": 3.5811681747436523,
"learning_rate": 8.300184162062616e-06,
"loss": 2.1885,
"step": 35350
},
{
"epoch": 2.1731123388581954,
"grad_norm": 3.505411148071289,
"learning_rate": 8.269490484960098e-06,
"loss": 2.1773,
"step": 35400
},
{
"epoch": 2.176181706568447,
"grad_norm": 3.3958797454833984,
"learning_rate": 8.238796807857582e-06,
"loss": 2.2335,
"step": 35450
},
{
"epoch": 2.1792510742786986,
"grad_norm": 4.680118083953857,
"learning_rate": 8.208103130755065e-06,
"loss": 2.2351,
"step": 35500
},
{
"epoch": 2.18232044198895,
"grad_norm": 3.314845323562622,
"learning_rate": 8.177409453652548e-06,
"loss": 2.2563,
"step": 35550
},
{
"epoch": 2.185389809699202,
"grad_norm": 3.014174461364746,
"learning_rate": 8.146715776550032e-06,
"loss": 2.2946,
"step": 35600
},
{
"epoch": 2.1884591774094537,
"grad_norm": 6.031067848205566,
"learning_rate": 8.116022099447514e-06,
"loss": 2.2273,
"step": 35650
},
{
"epoch": 2.1915285451197053,
"grad_norm": 4.454038143157959,
"learning_rate": 8.085328422344997e-06,
"loss": 2.226,
"step": 35700
},
{
"epoch": 2.194597912829957,
"grad_norm": 4.110731601715088,
"learning_rate": 8.05463474524248e-06,
"loss": 2.3176,
"step": 35750
},
{
"epoch": 2.197667280540209,
"grad_norm": 3.230386972427368,
"learning_rate": 8.023941068139964e-06,
"loss": 2.2589,
"step": 35800
},
{
"epoch": 2.2007366482504604,
"grad_norm": 4.837220668792725,
"learning_rate": 7.993247391037446e-06,
"loss": 2.2341,
"step": 35850
},
{
"epoch": 2.203806015960712,
"grad_norm": 4.532881259918213,
"learning_rate": 7.962553713934929e-06,
"loss": 2.2125,
"step": 35900
},
{
"epoch": 2.2068753836709636,
"grad_norm": 3.323784828186035,
"learning_rate": 7.931860036832413e-06,
"loss": 2.19,
"step": 35950
},
{
"epoch": 2.2099447513812156,
"grad_norm": 4.003852844238281,
"learning_rate": 7.901166359729895e-06,
"loss": 2.2215,
"step": 36000
},
{
"epoch": 2.213014119091467,
"grad_norm": 3.0279271602630615,
"learning_rate": 7.870472682627378e-06,
"loss": 2.2177,
"step": 36050
},
{
"epoch": 2.2160834868017187,
"grad_norm": 4.593332290649414,
"learning_rate": 7.839779005524862e-06,
"loss": 2.1596,
"step": 36100
},
{
"epoch": 2.2191528545119708,
"grad_norm": 3.9358561038970947,
"learning_rate": 7.809085328422346e-06,
"loss": 2.3147,
"step": 36150
},
{
"epoch": 2.2222222222222223,
"grad_norm": 4.021229267120361,
"learning_rate": 7.778391651319829e-06,
"loss": 2.1922,
"step": 36200
},
{
"epoch": 2.225291589932474,
"grad_norm": 3.740377426147461,
"learning_rate": 7.747697974217311e-06,
"loss": 2.2208,
"step": 36250
},
{
"epoch": 2.2283609576427255,
"grad_norm": 3.133218765258789,
"learning_rate": 7.717004297114796e-06,
"loss": 2.181,
"step": 36300
},
{
"epoch": 2.2314303253529775,
"grad_norm": 4.224998950958252,
"learning_rate": 7.686310620012278e-06,
"loss": 2.1571,
"step": 36350
},
{
"epoch": 2.234499693063229,
"grad_norm": 3.908095359802246,
"learning_rate": 7.65561694290976e-06,
"loss": 2.2142,
"step": 36400
},
{
"epoch": 2.2375690607734806,
"grad_norm": 4.671231746673584,
"learning_rate": 7.624923265807243e-06,
"loss": 2.2114,
"step": 36450
},
{
"epoch": 2.240638428483732,
"grad_norm": 3.268892765045166,
"learning_rate": 7.5942295887047274e-06,
"loss": 2.2469,
"step": 36500
},
{
"epoch": 2.243707796193984,
"grad_norm": 2.9523401260375977,
"learning_rate": 7.56353591160221e-06,
"loss": 2.171,
"step": 36550
},
{
"epoch": 2.2467771639042358,
"grad_norm": 3.850844383239746,
"learning_rate": 7.5328422344996925e-06,
"loss": 2.1936,
"step": 36600
},
{
"epoch": 2.2498465316144873,
"grad_norm": 3.288367509841919,
"learning_rate": 7.502148557397177e-06,
"loss": 2.1882,
"step": 36650
},
{
"epoch": 2.252915899324739,
"grad_norm": 3.225170612335205,
"learning_rate": 7.47145488029466e-06,
"loss": 2.2253,
"step": 36700
},
{
"epoch": 2.255985267034991,
"grad_norm": 3.7475740909576416,
"learning_rate": 7.440761203192143e-06,
"loss": 2.2224,
"step": 36750
},
{
"epoch": 2.2590546347452425,
"grad_norm": 4.108501434326172,
"learning_rate": 7.410067526089626e-06,
"loss": 2.2538,
"step": 36800
},
{
"epoch": 2.262124002455494,
"grad_norm": 4.399234771728516,
"learning_rate": 7.3793738489871085e-06,
"loss": 2.2093,
"step": 36850
},
{
"epoch": 2.265193370165746,
"grad_norm": 4.0335235595703125,
"learning_rate": 7.348680171884592e-06,
"loss": 2.2265,
"step": 36900
},
{
"epoch": 2.2682627378759976,
"grad_norm": 3.3310387134552,
"learning_rate": 7.317986494782075e-06,
"loss": 2.2195,
"step": 36950
},
{
"epoch": 2.271332105586249,
"grad_norm": 14.169954299926758,
"learning_rate": 7.287292817679558e-06,
"loss": 2.3107,
"step": 37000
},
{
"epoch": 2.2744014732965008,
"grad_norm": 4.349920272827148,
"learning_rate": 7.256599140577041e-06,
"loss": 2.2233,
"step": 37050
},
{
"epoch": 2.277470841006753,
"grad_norm": 3.546018362045288,
"learning_rate": 7.2259054634745245e-06,
"loss": 2.1851,
"step": 37100
},
{
"epoch": 2.2805402087170044,
"grad_norm": 3.578289270401001,
"learning_rate": 7.195211786372008e-06,
"loss": 2.1984,
"step": 37150
},
{
"epoch": 2.283609576427256,
"grad_norm": 3.5594937801361084,
"learning_rate": 7.1645181092694904e-06,
"loss": 2.2544,
"step": 37200
},
{
"epoch": 2.2866789441375075,
"grad_norm": 3.502493143081665,
"learning_rate": 7.133824432166974e-06,
"loss": 2.2898,
"step": 37250
},
{
"epoch": 2.2897483118477595,
"grad_norm": 3.839489459991455,
"learning_rate": 7.103130755064457e-06,
"loss": 2.2333,
"step": 37300
},
{
"epoch": 2.292817679558011,
"grad_norm": 3.7720537185668945,
"learning_rate": 7.07243707796194e-06,
"loss": 2.2484,
"step": 37350
},
{
"epoch": 2.2958870472682626,
"grad_norm": 3.5186944007873535,
"learning_rate": 7.041743400859423e-06,
"loss": 2.2974,
"step": 37400
},
{
"epoch": 2.298956414978514,
"grad_norm": 3.9113717079162598,
"learning_rate": 7.011049723756906e-06,
"loss": 2.2212,
"step": 37450
},
{
"epoch": 2.3020257826887662,
"grad_norm": 3.704716920852661,
"learning_rate": 6.98035604665439e-06,
"loss": 2.2158,
"step": 37500
},
{
"epoch": 2.305095150399018,
"grad_norm": 3.8221049308776855,
"learning_rate": 6.949662369551872e-06,
"loss": 2.1804,
"step": 37550
},
{
"epoch": 2.3081645181092694,
"grad_norm": 3.8908891677856445,
"learning_rate": 6.918968692449356e-06,
"loss": 2.171,
"step": 37600
},
{
"epoch": 2.3112338858195214,
"grad_norm": 3.604534149169922,
"learning_rate": 6.888275015346839e-06,
"loss": 2.2704,
"step": 37650
},
{
"epoch": 2.314303253529773,
"grad_norm": 3.2667436599731445,
"learning_rate": 6.857581338244322e-06,
"loss": 2.2706,
"step": 37700
},
{
"epoch": 2.3173726212400245,
"grad_norm": 3.7572014331817627,
"learning_rate": 6.826887661141805e-06,
"loss": 2.3158,
"step": 37750
},
{
"epoch": 2.320441988950276,
"grad_norm": 3.4231903553009033,
"learning_rate": 6.7961939840392875e-06,
"loss": 2.2766,
"step": 37800
},
{
"epoch": 2.323511356660528,
"grad_norm": 3.4527835845947266,
"learning_rate": 6.765500306936771e-06,
"loss": 2.1798,
"step": 37850
},
{
"epoch": 2.3265807243707797,
"grad_norm": 4.387216091156006,
"learning_rate": 6.734806629834254e-06,
"loss": 2.2615,
"step": 37900
},
{
"epoch": 2.3296500920810312,
"grad_norm": 3.5280401706695557,
"learning_rate": 6.704112952731738e-06,
"loss": 2.2263,
"step": 37950
},
{
"epoch": 2.332719459791283,
"grad_norm": 3.647169351577759,
"learning_rate": 6.673419275629221e-06,
"loss": 2.2117,
"step": 38000
},
{
"epoch": 2.335788827501535,
"grad_norm": 3.3504931926727295,
"learning_rate": 6.6427255985267036e-06,
"loss": 2.2202,
"step": 38050
},
{
"epoch": 2.3388581952117864,
"grad_norm": 3.1713030338287354,
"learning_rate": 6.612031921424187e-06,
"loss": 2.1531,
"step": 38100
},
{
"epoch": 2.341927562922038,
"grad_norm": 4.14404821395874,
"learning_rate": 6.5813382443216695e-06,
"loss": 2.2549,
"step": 38150
},
{
"epoch": 2.3449969306322895,
"grad_norm": 4.7959065437316895,
"learning_rate": 6.550644567219153e-06,
"loss": 2.2079,
"step": 38200
},
{
"epoch": 2.3480662983425415,
"grad_norm": 3.699985980987549,
"learning_rate": 6.519950890116635e-06,
"loss": 2.2061,
"step": 38250
},
{
"epoch": 2.351135666052793,
"grad_norm": 3.93282151222229,
"learning_rate": 6.48925721301412e-06,
"loss": 2.1875,
"step": 38300
},
{
"epoch": 2.3542050337630447,
"grad_norm": 3.5464470386505127,
"learning_rate": 6.458563535911603e-06,
"loss": 2.3065,
"step": 38350
},
{
"epoch": 2.3572744014732967,
"grad_norm": 4.367957592010498,
"learning_rate": 6.4278698588090855e-06,
"loss": 2.2946,
"step": 38400
},
{
"epoch": 2.3603437691835483,
"grad_norm": 4.520755767822266,
"learning_rate": 6.397176181706569e-06,
"loss": 2.2403,
"step": 38450
},
{
"epoch": 2.3634131368938,
"grad_norm": 3.270214557647705,
"learning_rate": 6.366482504604051e-06,
"loss": 2.276,
"step": 38500
},
{
"epoch": 2.3664825046040514,
"grad_norm": 4.663724422454834,
"learning_rate": 6.335788827501535e-06,
"loss": 2.2394,
"step": 38550
},
{
"epoch": 2.3695518723143034,
"grad_norm": 3.735618829727173,
"learning_rate": 6.305095150399017e-06,
"loss": 2.1831,
"step": 38600
},
{
"epoch": 2.372621240024555,
"grad_norm": 4.269412040710449,
"learning_rate": 6.274401473296501e-06,
"loss": 2.2155,
"step": 38650
},
{
"epoch": 2.3756906077348066,
"grad_norm": 4.040123462677002,
"learning_rate": 6.243707796193985e-06,
"loss": 2.2135,
"step": 38700
},
{
"epoch": 2.378759975445058,
"grad_norm": 3.0279011726379395,
"learning_rate": 6.213014119091467e-06,
"loss": 2.2062,
"step": 38750
},
{
"epoch": 2.38182934315531,
"grad_norm": 4.656242370605469,
"learning_rate": 6.182320441988951e-06,
"loss": 2.2526,
"step": 38800
},
{
"epoch": 2.3848987108655617,
"grad_norm": 4.1057233810424805,
"learning_rate": 6.151626764886433e-06,
"loss": 2.1878,
"step": 38850
},
{
"epoch": 2.3879680785758133,
"grad_norm": 4.058590888977051,
"learning_rate": 6.120933087783917e-06,
"loss": 2.2473,
"step": 38900
},
{
"epoch": 2.391037446286065,
"grad_norm": 3.7655320167541504,
"learning_rate": 6.090239410681399e-06,
"loss": 2.1313,
"step": 38950
},
{
"epoch": 2.394106813996317,
"grad_norm": 3.7537214756011963,
"learning_rate": 6.059545733578883e-06,
"loss": 2.2779,
"step": 39000
},
{
"epoch": 2.3971761817065684,
"grad_norm": 3.464635133743286,
"learning_rate": 6.028852056476366e-06,
"loss": 2.1035,
"step": 39050
},
{
"epoch": 2.40024554941682,
"grad_norm": 3.9705522060394287,
"learning_rate": 5.998158379373849e-06,
"loss": 2.2249,
"step": 39100
},
{
"epoch": 2.403314917127072,
"grad_norm": 2.9240760803222656,
"learning_rate": 5.967464702271333e-06,
"loss": 2.1655,
"step": 39150
},
{
"epoch": 2.4063842848373236,
"grad_norm": 4.480701923370361,
"learning_rate": 5.936771025168815e-06,
"loss": 2.1998,
"step": 39200
},
{
"epoch": 2.409453652547575,
"grad_norm": 3.32859468460083,
"learning_rate": 5.906077348066299e-06,
"loss": 2.2133,
"step": 39250
},
{
"epoch": 2.4125230202578267,
"grad_norm": 2.9386136531829834,
"learning_rate": 5.875383670963781e-06,
"loss": 2.1524,
"step": 39300
},
{
"epoch": 2.4155923879680787,
"grad_norm": 3.8305766582489014,
"learning_rate": 5.8446899938612645e-06,
"loss": 2.2915,
"step": 39350
},
{
"epoch": 2.4186617556783303,
"grad_norm": 3.4347639083862305,
"learning_rate": 5.813996316758748e-06,
"loss": 2.1909,
"step": 39400
},
{
"epoch": 2.421731123388582,
"grad_norm": 4.805240631103516,
"learning_rate": 5.78330263965623e-06,
"loss": 2.2795,
"step": 39450
},
{
"epoch": 2.424800491098834,
"grad_norm": 3.2844135761260986,
"learning_rate": 5.752608962553715e-06,
"loss": 2.1176,
"step": 39500
},
{
"epoch": 2.4278698588090855,
"grad_norm": 3.550025701522827,
"learning_rate": 5.721915285451197e-06,
"loss": 2.2356,
"step": 39550
},
{
"epoch": 2.430939226519337,
"grad_norm": 3.8909902572631836,
"learning_rate": 5.6912216083486805e-06,
"loss": 2.2591,
"step": 39600
},
{
"epoch": 2.4340085942295886,
"grad_norm": 3.584829330444336,
"learning_rate": 5.660527931246163e-06,
"loss": 2.2678,
"step": 39650
},
{
"epoch": 2.43707796193984,
"grad_norm": 3.7134439945220947,
"learning_rate": 5.6298342541436464e-06,
"loss": 2.1648,
"step": 39700
},
{
"epoch": 2.440147329650092,
"grad_norm": 4.022806167602539,
"learning_rate": 5.59914057704113e-06,
"loss": 2.1818,
"step": 39750
},
{
"epoch": 2.4432166973603437,
"grad_norm": 3.5967869758605957,
"learning_rate": 5.568446899938612e-06,
"loss": 2.2368,
"step": 39800
},
{
"epoch": 2.4462860650705953,
"grad_norm": 4.099997520446777,
"learning_rate": 5.537753222836096e-06,
"loss": 2.2196,
"step": 39850
},
{
"epoch": 2.4493554327808473,
"grad_norm": 4.131256103515625,
"learning_rate": 5.507059545733579e-06,
"loss": 2.3592,
"step": 39900
},
{
"epoch": 2.452424800491099,
"grad_norm": 3.403428077697754,
"learning_rate": 5.4763658686310625e-06,
"loss": 2.2484,
"step": 39950
},
{
"epoch": 2.4554941682013505,
"grad_norm": 3.4898879528045654,
"learning_rate": 5.445672191528546e-06,
"loss": 2.2361,
"step": 40000
},
{
"epoch": 2.458563535911602,
"grad_norm": 4.398887634277344,
"learning_rate": 5.414978514426028e-06,
"loss": 2.2462,
"step": 40050
},
{
"epoch": 2.461632903621854,
"grad_norm": 4.28602409362793,
"learning_rate": 5.384284837323512e-06,
"loss": 2.3204,
"step": 40100
},
{
"epoch": 2.4647022713321056,
"grad_norm": 4.812078475952148,
"learning_rate": 5.353591160220994e-06,
"loss": 2.2206,
"step": 40150
},
{
"epoch": 2.467771639042357,
"grad_norm": 5.229348659515381,
"learning_rate": 5.322897483118478e-06,
"loss": 2.299,
"step": 40200
},
{
"epoch": 2.470841006752609,
"grad_norm": 5.011894226074219,
"learning_rate": 5.29220380601596e-06,
"loss": 2.2308,
"step": 40250
},
{
"epoch": 2.4739103744628608,
"grad_norm": 3.229832410812378,
"learning_rate": 5.261510128913444e-06,
"loss": 2.1293,
"step": 40300
},
{
"epoch": 2.4769797421731123,
"grad_norm": 4.192412376403809,
"learning_rate": 5.230816451810928e-06,
"loss": 2.2252,
"step": 40350
},
{
"epoch": 2.480049109883364,
"grad_norm": 4.124536037445068,
"learning_rate": 5.20012277470841e-06,
"loss": 2.1729,
"step": 40400
},
{
"epoch": 2.4831184775936155,
"grad_norm": 3.670736789703369,
"learning_rate": 5.169429097605894e-06,
"loss": 2.2571,
"step": 40450
},
{
"epoch": 2.4861878453038675,
"grad_norm": 5.001986026763916,
"learning_rate": 5.138735420503376e-06,
"loss": 2.2075,
"step": 40500
},
{
"epoch": 2.489257213014119,
"grad_norm": 3.8158419132232666,
"learning_rate": 5.1080417434008596e-06,
"loss": 2.2931,
"step": 40550
},
{
"epoch": 2.4923265807243706,
"grad_norm": 3.6598846912384033,
"learning_rate": 5.077348066298342e-06,
"loss": 2.2037,
"step": 40600
},
{
"epoch": 2.4953959484346226,
"grad_norm": 4.0994110107421875,
"learning_rate": 5.0466543891958255e-06,
"loss": 2.2622,
"step": 40650
},
{
"epoch": 2.498465316144874,
"grad_norm": 3.9565281867980957,
"learning_rate": 5.01596071209331e-06,
"loss": 2.1952,
"step": 40700
},
{
"epoch": 2.501534683855126,
"grad_norm": 3.9254519939422607,
"learning_rate": 4.985267034990792e-06,
"loss": 2.2506,
"step": 40750
},
{
"epoch": 2.5046040515653774,
"grad_norm": 4.242046356201172,
"learning_rate": 4.954573357888276e-06,
"loss": 2.2135,
"step": 40800
},
{
"epoch": 2.5076734192756294,
"grad_norm": 3.1262447834014893,
"learning_rate": 4.923879680785758e-06,
"loss": 2.2056,
"step": 40850
},
{
"epoch": 2.510742786985881,
"grad_norm": 4.857666015625,
"learning_rate": 4.8931860036832415e-06,
"loss": 2.2238,
"step": 40900
},
{
"epoch": 2.5138121546961325,
"grad_norm": 4.507630348205566,
"learning_rate": 4.862492326580724e-06,
"loss": 2.2332,
"step": 40950
},
{
"epoch": 2.5168815224063845,
"grad_norm": 4.321670055389404,
"learning_rate": 4.831798649478207e-06,
"loss": 2.2581,
"step": 41000
},
{
"epoch": 2.519950890116636,
"grad_norm": 3.4853837490081787,
"learning_rate": 4.801104972375691e-06,
"loss": 2.3517,
"step": 41050
},
{
"epoch": 2.5230202578268877,
"grad_norm": 4.295222759246826,
"learning_rate": 4.770411295273174e-06,
"loss": 2.1357,
"step": 41100
},
{
"epoch": 2.5260896255371392,
"grad_norm": 3.4203784465789795,
"learning_rate": 4.7397176181706575e-06,
"loss": 2.1721,
"step": 41150
},
{
"epoch": 2.529158993247391,
"grad_norm": 4.489879608154297,
"learning_rate": 4.70902394106814e-06,
"loss": 2.196,
"step": 41200
},
{
"epoch": 2.532228360957643,
"grad_norm": 3.238175392150879,
"learning_rate": 4.6783302639656234e-06,
"loss": 2.2373,
"step": 41250
},
{
"epoch": 2.5352977286678944,
"grad_norm": 4.743640422821045,
"learning_rate": 4.647636586863106e-06,
"loss": 2.309,
"step": 41300
},
{
"epoch": 2.538367096378146,
"grad_norm": 2.7739622592926025,
"learning_rate": 4.616942909760589e-06,
"loss": 2.1396,
"step": 41350
},
{
"epoch": 2.541436464088398,
"grad_norm": 3.4076218605041504,
"learning_rate": 4.586249232658073e-06,
"loss": 2.2281,
"step": 41400
},
{
"epoch": 2.5445058317986495,
"grad_norm": 4.367641448974609,
"learning_rate": 4.555555555555555e-06,
"loss": 2.2136,
"step": 41450
},
{
"epoch": 2.547575199508901,
"grad_norm": 3.8523755073547363,
"learning_rate": 4.5248618784530395e-06,
"loss": 2.265,
"step": 41500
},
{
"epoch": 2.5506445672191527,
"grad_norm": 3.5632312297821045,
"learning_rate": 4.494168201350522e-06,
"loss": 2.2266,
"step": 41550
},
{
"epoch": 2.5537139349294047,
"grad_norm": 4.128525733947754,
"learning_rate": 4.463474524248005e-06,
"loss": 2.209,
"step": 41600
},
{
"epoch": 2.5567833026396563,
"grad_norm": 3.2727203369140625,
"learning_rate": 4.432780847145488e-06,
"loss": 2.167,
"step": 41650
},
{
"epoch": 2.559852670349908,
"grad_norm": 4.561786651611328,
"learning_rate": 4.402087170042971e-06,
"loss": 2.2009,
"step": 41700
},
{
"epoch": 2.56292203806016,
"grad_norm": 3.624037742614746,
"learning_rate": 4.371393492940455e-06,
"loss": 2.2852,
"step": 41750
},
{
"epoch": 2.5659914057704114,
"grad_norm": 4.098727226257324,
"learning_rate": 4.340699815837937e-06,
"loss": 2.1527,
"step": 41800
},
{
"epoch": 2.569060773480663,
"grad_norm": 3.3079962730407715,
"learning_rate": 4.3100061387354205e-06,
"loss": 2.2701,
"step": 41850
},
{
"epoch": 2.5721301411909145,
"grad_norm": 3.725670099258423,
"learning_rate": 4.279312461632904e-06,
"loss": 2.2145,
"step": 41900
},
{
"epoch": 2.575199508901166,
"grad_norm": 4.11065673828125,
"learning_rate": 4.248618784530387e-06,
"loss": 2.2223,
"step": 41950
},
{
"epoch": 2.578268876611418,
"grad_norm": 3.768911123275757,
"learning_rate": 4.21792510742787e-06,
"loss": 2.2406,
"step": 42000
},
{
"epoch": 2.5813382443216697,
"grad_norm": 3.27990984916687,
"learning_rate": 4.187231430325353e-06,
"loss": 2.2224,
"step": 42050
},
{
"epoch": 2.5844076120319213,
"grad_norm": 3.7315287590026855,
"learning_rate": 4.1565377532228366e-06,
"loss": 2.3168,
"step": 42100
},
{
"epoch": 2.5874769797421733,
"grad_norm": 3.7368297576904297,
"learning_rate": 4.125844076120319e-06,
"loss": 2.3103,
"step": 42150
},
{
"epoch": 2.590546347452425,
"grad_norm": 3.973989725112915,
"learning_rate": 4.0951503990178025e-06,
"loss": 2.2719,
"step": 42200
},
{
"epoch": 2.5936157151626764,
"grad_norm": 3.1219234466552734,
"learning_rate": 4.064456721915285e-06,
"loss": 2.2401,
"step": 42250
},
{
"epoch": 2.596685082872928,
"grad_norm": 3.8633742332458496,
"learning_rate": 4.033763044812769e-06,
"loss": 2.1432,
"step": 42300
},
{
"epoch": 2.59975445058318,
"grad_norm": 3.6198391914367676,
"learning_rate": 4.003069367710252e-06,
"loss": 2.2469,
"step": 42350
},
{
"epoch": 2.6028238182934316,
"grad_norm": 4.8632707595825195,
"learning_rate": 3.972375690607735e-06,
"loss": 2.3461,
"step": 42400
},
{
"epoch": 2.605893186003683,
"grad_norm": 3.7397594451904297,
"learning_rate": 3.9416820135052185e-06,
"loss": 2.2278,
"step": 42450
},
{
"epoch": 2.608962553713935,
"grad_norm": 3.7671289443969727,
"learning_rate": 3.910988336402701e-06,
"loss": 2.2099,
"step": 42500
},
{
"epoch": 2.6120319214241867,
"grad_norm": 3.9413743019104004,
"learning_rate": 3.880294659300184e-06,
"loss": 2.2534,
"step": 42550
},
{
"epoch": 2.6151012891344383,
"grad_norm": 3.448629856109619,
"learning_rate": 3.849600982197667e-06,
"loss": 2.2862,
"step": 42600
},
{
"epoch": 2.61817065684469,
"grad_norm": 5.043887138366699,
"learning_rate": 3.81890730509515e-06,
"loss": 2.1931,
"step": 42650
},
{
"epoch": 2.6212400245549414,
"grad_norm": 3.661371946334839,
"learning_rate": 3.7882136279926332e-06,
"loss": 2.2653,
"step": 42700
},
{
"epoch": 2.6243093922651934,
"grad_norm": 4.375932216644287,
"learning_rate": 3.7575199508901166e-06,
"loss": 2.1996,
"step": 42750
},
{
"epoch": 2.627378759975445,
"grad_norm": 4.304765701293945,
"learning_rate": 3.7268262737876e-06,
"loss": 2.1938,
"step": 42800
},
{
"epoch": 2.630448127685697,
"grad_norm": 3.3659396171569824,
"learning_rate": 3.696132596685083e-06,
"loss": 2.2412,
"step": 42850
},
{
"epoch": 2.6335174953959486,
"grad_norm": 3.610954999923706,
"learning_rate": 3.665438919582566e-06,
"loss": 2.238,
"step": 42900
},
{
"epoch": 2.6365868631062,
"grad_norm": 3.3917031288146973,
"learning_rate": 3.6347452424800493e-06,
"loss": 2.2998,
"step": 42950
},
{
"epoch": 2.6396562308164517,
"grad_norm": 3.4687845706939697,
"learning_rate": 3.6040515653775326e-06,
"loss": 2.2232,
"step": 43000
},
{
"epoch": 2.6427255985267033,
"grad_norm": 4.230668544769287,
"learning_rate": 3.5733578882750156e-06,
"loss": 2.1685,
"step": 43050
},
{
"epoch": 2.6457949662369553,
"grad_norm": 3.617204189300537,
"learning_rate": 3.5426642111724985e-06,
"loss": 2.218,
"step": 43100
},
{
"epoch": 2.648864333947207,
"grad_norm": 3.763354778289795,
"learning_rate": 3.5119705340699815e-06,
"loss": 2.321,
"step": 43150
},
{
"epoch": 2.6519337016574585,
"grad_norm": 3.923051357269287,
"learning_rate": 3.481276856967465e-06,
"loss": 2.2174,
"step": 43200
},
{
"epoch": 2.6550030693677105,
"grad_norm": 4.259540557861328,
"learning_rate": 3.450583179864948e-06,
"loss": 2.2633,
"step": 43250
},
{
"epoch": 2.658072437077962,
"grad_norm": 4.246336936950684,
"learning_rate": 3.419889502762431e-06,
"loss": 2.2235,
"step": 43300
},
{
"epoch": 2.6611418047882136,
"grad_norm": 3.1326816082000732,
"learning_rate": 3.389195825659914e-06,
"loss": 2.1769,
"step": 43350
},
{
"epoch": 2.664211172498465,
"grad_norm": 5.116452217102051,
"learning_rate": 3.3585021485573975e-06,
"loss": 2.1907,
"step": 43400
},
{
"epoch": 2.6672805402087167,
"grad_norm": 3.177436113357544,
"learning_rate": 3.3278084714548805e-06,
"loss": 2.2524,
"step": 43450
},
{
"epoch": 2.6703499079189688,
"grad_norm": 2.992366313934326,
"learning_rate": 3.2971147943523634e-06,
"loss": 2.1998,
"step": 43500
},
{
"epoch": 2.6734192756292203,
"grad_norm": 3.997972249984741,
"learning_rate": 3.2664211172498464e-06,
"loss": 2.234,
"step": 43550
},
{
"epoch": 2.6764886433394723,
"grad_norm": 4.2181267738342285,
"learning_rate": 3.2357274401473297e-06,
"loss": 2.1645,
"step": 43600
},
{
"epoch": 2.679558011049724,
"grad_norm": 3.3036773204803467,
"learning_rate": 3.205033763044813e-06,
"loss": 2.2462,
"step": 43650
},
{
"epoch": 2.6826273787599755,
"grad_norm": 4.222419738769531,
"learning_rate": 3.174340085942296e-06,
"loss": 2.283,
"step": 43700
},
{
"epoch": 2.685696746470227,
"grad_norm": 3.980220079421997,
"learning_rate": 3.143646408839779e-06,
"loss": 2.2752,
"step": 43750
},
{
"epoch": 2.6887661141804786,
"grad_norm": 3.438683271408081,
"learning_rate": 3.1129527317372624e-06,
"loss": 2.1897,
"step": 43800
},
{
"epoch": 2.6918354818907306,
"grad_norm": 3.9108569622039795,
"learning_rate": 3.0822590546347453e-06,
"loss": 2.2084,
"step": 43850
},
{
"epoch": 2.694904849600982,
"grad_norm": 3.4712257385253906,
"learning_rate": 3.0515653775322283e-06,
"loss": 2.0835,
"step": 43900
},
{
"epoch": 2.6979742173112338,
"grad_norm": 3.4415714740753174,
"learning_rate": 3.0208717004297112e-06,
"loss": 2.3018,
"step": 43950
},
{
"epoch": 2.701043585021486,
"grad_norm": 4.478912353515625,
"learning_rate": 2.990178023327195e-06,
"loss": 2.1982,
"step": 44000
},
{
"epoch": 2.7041129527317374,
"grad_norm": 4.173290729522705,
"learning_rate": 2.959484346224678e-06,
"loss": 2.1964,
"step": 44050
},
{
"epoch": 2.707182320441989,
"grad_norm": 3.7616212368011475,
"learning_rate": 2.928790669122161e-06,
"loss": 2.2183,
"step": 44100
},
{
"epoch": 2.7102516881522405,
"grad_norm": 5.122647285461426,
"learning_rate": 2.898096992019644e-06,
"loss": 2.2466,
"step": 44150
},
{
"epoch": 2.713321055862492,
"grad_norm": 3.6268253326416016,
"learning_rate": 2.8674033149171273e-06,
"loss": 2.2719,
"step": 44200
},
{
"epoch": 2.716390423572744,
"grad_norm": 4.107768535614014,
"learning_rate": 2.8367096378146102e-06,
"loss": 2.148,
"step": 44250
},
{
"epoch": 2.7194597912829956,
"grad_norm": 3.9949638843536377,
"learning_rate": 2.806015960712093e-06,
"loss": 2.2333,
"step": 44300
},
{
"epoch": 2.7225291589932477,
"grad_norm": 3.9412174224853516,
"learning_rate": 2.7753222836095765e-06,
"loss": 2.1901,
"step": 44350
},
{
"epoch": 2.7255985267034992,
"grad_norm": 3.243807792663574,
"learning_rate": 2.74462860650706e-06,
"loss": 2.163,
"step": 44400
},
{
"epoch": 2.728667894413751,
"grad_norm": 4.045169353485107,
"learning_rate": 2.713934929404543e-06,
"loss": 2.1712,
"step": 44450
},
{
"epoch": 2.7317372621240024,
"grad_norm": 3.781874418258667,
"learning_rate": 2.683241252302026e-06,
"loss": 2.2034,
"step": 44500
},
{
"epoch": 2.734806629834254,
"grad_norm": 3.88508677482605,
"learning_rate": 2.6525475751995088e-06,
"loss": 2.1833,
"step": 44550
},
{
"epoch": 2.737875997544506,
"grad_norm": 4.135626792907715,
"learning_rate": 2.621853898096992e-06,
"loss": 2.2339,
"step": 44600
},
{
"epoch": 2.7409453652547575,
"grad_norm": 3.489367723464966,
"learning_rate": 2.591160220994475e-06,
"loss": 2.2991,
"step": 44650
},
{
"epoch": 2.744014732965009,
"grad_norm": 3.8391823768615723,
"learning_rate": 2.5604665438919585e-06,
"loss": 2.1783,
"step": 44700
},
{
"epoch": 2.747084100675261,
"grad_norm": 3.0692577362060547,
"learning_rate": 2.5297728667894414e-06,
"loss": 2.195,
"step": 44750
},
{
"epoch": 2.7501534683855127,
"grad_norm": 3.084923267364502,
"learning_rate": 2.4990791896869244e-06,
"loss": 2.2347,
"step": 44800
},
{
"epoch": 2.7532228360957642,
"grad_norm": 3.383420705795288,
"learning_rate": 2.4683855125844077e-06,
"loss": 2.168,
"step": 44850
},
{
"epoch": 2.756292203806016,
"grad_norm": 3.1771624088287354,
"learning_rate": 2.4376918354818907e-06,
"loss": 2.217,
"step": 44900
},
{
"epoch": 2.7593615715162674,
"grad_norm": 3.8641133308410645,
"learning_rate": 2.4069981583793737e-06,
"loss": 2.2976,
"step": 44950
},
{
"epoch": 2.7624309392265194,
"grad_norm": 3.6356940269470215,
"learning_rate": 2.376304481276857e-06,
"loss": 2.2688,
"step": 45000
},
{
"epoch": 2.765500306936771,
"grad_norm": 3.3960859775543213,
"learning_rate": 2.3456108041743404e-06,
"loss": 2.2591,
"step": 45050
},
{
"epoch": 2.768569674647023,
"grad_norm": 4.219804286956787,
"learning_rate": 2.3149171270718234e-06,
"loss": 2.2724,
"step": 45100
},
{
"epoch": 2.7716390423572745,
"grad_norm": 4.273275852203369,
"learning_rate": 2.2842234499693063e-06,
"loss": 2.2016,
"step": 45150
},
{
"epoch": 2.774708410067526,
"grad_norm": 4.720740795135498,
"learning_rate": 2.2535297728667893e-06,
"loss": 2.2711,
"step": 45200
},
{
"epoch": 2.7777777777777777,
"grad_norm": 3.9274086952209473,
"learning_rate": 2.2228360957642726e-06,
"loss": 2.2219,
"step": 45250
},
{
"epoch": 2.7808471454880292,
"grad_norm": 3.7379603385925293,
"learning_rate": 2.1921424186617556e-06,
"loss": 2.2034,
"step": 45300
},
{
"epoch": 2.7839165131982813,
"grad_norm": 3.719149112701416,
"learning_rate": 2.161448741559239e-06,
"loss": 2.2461,
"step": 45350
},
{
"epoch": 2.786985880908533,
"grad_norm": 3.402672529220581,
"learning_rate": 2.130755064456722e-06,
"loss": 2.2349,
"step": 45400
},
{
"epoch": 2.7900552486187844,
"grad_norm": 3.5159754753112793,
"learning_rate": 2.1000613873542053e-06,
"loss": 2.1584,
"step": 45450
},
{
"epoch": 2.7931246163290364,
"grad_norm": 3.4366443157196045,
"learning_rate": 2.0693677102516882e-06,
"loss": 2.1867,
"step": 45500
},
{
"epoch": 2.796193984039288,
"grad_norm": 3.5394604206085205,
"learning_rate": 2.038674033149171e-06,
"loss": 2.301,
"step": 45550
},
{
"epoch": 2.7992633517495396,
"grad_norm": 5.54389762878418,
"learning_rate": 2.007980356046654e-06,
"loss": 2.2846,
"step": 45600
},
{
"epoch": 2.802332719459791,
"grad_norm": 12.670145988464355,
"learning_rate": 1.9772866789441375e-06,
"loss": 2.2808,
"step": 45650
},
{
"epoch": 2.805402087170043,
"grad_norm": 4.009146690368652,
"learning_rate": 1.946593001841621e-06,
"loss": 2.1663,
"step": 45700
},
{
"epoch": 2.8084714548802947,
"grad_norm": 4.112977504730225,
"learning_rate": 1.915899324739104e-06,
"loss": 2.2551,
"step": 45750
},
{
"epoch": 2.8115408225905463,
"grad_norm": 5.213067054748535,
"learning_rate": 1.885205647636587e-06,
"loss": 2.2628,
"step": 45800
},
{
"epoch": 2.8146101903007983,
"grad_norm": 3.289320230484009,
"learning_rate": 1.85451197053407e-06,
"loss": 2.2012,
"step": 45850
},
{
"epoch": 2.81767955801105,
"grad_norm": 3.8698418140411377,
"learning_rate": 1.8238182934315531e-06,
"loss": 2.1918,
"step": 45900
},
{
"epoch": 2.8207489257213014,
"grad_norm": 3.230456829071045,
"learning_rate": 1.7931246163290363e-06,
"loss": 2.1566,
"step": 45950
},
{
"epoch": 2.823818293431553,
"grad_norm": 3.878119945526123,
"learning_rate": 1.7624309392265194e-06,
"loss": 2.1233,
"step": 46000
},
{
"epoch": 2.8268876611418046,
"grad_norm": 3.892206907272339,
"learning_rate": 1.7317372621240024e-06,
"loss": 2.2653,
"step": 46050
},
{
"epoch": 2.8299570288520566,
"grad_norm": 5.064377784729004,
"learning_rate": 1.7010435850214855e-06,
"loss": 2.1615,
"step": 46100
},
{
"epoch": 2.833026396562308,
"grad_norm": 3.4874629974365234,
"learning_rate": 1.6703499079189687e-06,
"loss": 2.1768,
"step": 46150
},
{
"epoch": 2.8360957642725597,
"grad_norm": 4.075310230255127,
"learning_rate": 1.6396562308164519e-06,
"loss": 2.1583,
"step": 46200
},
{
"epoch": 2.8391651319828117,
"grad_norm": 6.029613018035889,
"learning_rate": 1.6089625537139348e-06,
"loss": 2.2742,
"step": 46250
},
{
"epoch": 2.8422344996930633,
"grad_norm": 3.3309133052825928,
"learning_rate": 1.5782688766114182e-06,
"loss": 2.2977,
"step": 46300
},
{
"epoch": 2.845303867403315,
"grad_norm": 3.7084951400756836,
"learning_rate": 1.5475751995089011e-06,
"loss": 2.2776,
"step": 46350
},
{
"epoch": 2.8483732351135664,
"grad_norm": 3.8084752559661865,
"learning_rate": 1.5168815224063843e-06,
"loss": 2.2517,
"step": 46400
},
{
"epoch": 2.8514426028238185,
"grad_norm": 3.2854843139648438,
"learning_rate": 1.4861878453038673e-06,
"loss": 2.2082,
"step": 46450
},
{
"epoch": 2.85451197053407,
"grad_norm": 3.1363027095794678,
"learning_rate": 1.4554941682013506e-06,
"loss": 2.1851,
"step": 46500
},
{
"epoch": 2.8575813382443216,
"grad_norm": 2.982666492462158,
"learning_rate": 1.4248004910988336e-06,
"loss": 2.2236,
"step": 46550
},
{
"epoch": 2.8606507059545736,
"grad_norm": 3.61039662361145,
"learning_rate": 1.3941068139963167e-06,
"loss": 2.2663,
"step": 46600
},
{
"epoch": 2.863720073664825,
"grad_norm": 3.5564205646514893,
"learning_rate": 1.3634131368938e-06,
"loss": 2.2026,
"step": 46650
},
{
"epoch": 2.8667894413750767,
"grad_norm": 3.3528811931610107,
"learning_rate": 1.332719459791283e-06,
"loss": 2.2235,
"step": 46700
},
{
"epoch": 2.8698588090853283,
"grad_norm": 3.672039270401001,
"learning_rate": 1.302025782688766e-06,
"loss": 2.2037,
"step": 46750
},
{
"epoch": 2.87292817679558,
"grad_norm": 3.8955376148223877,
"learning_rate": 1.2713321055862492e-06,
"loss": 2.2178,
"step": 46800
},
{
"epoch": 2.875997544505832,
"grad_norm": 4.099259376525879,
"learning_rate": 1.2406384284837323e-06,
"loss": 2.1855,
"step": 46850
},
{
"epoch": 2.8790669122160835,
"grad_norm": 3.968477964401245,
"learning_rate": 1.2099447513812155e-06,
"loss": 2.3346,
"step": 46900
},
{
"epoch": 2.882136279926335,
"grad_norm": 4.449561595916748,
"learning_rate": 1.1792510742786985e-06,
"loss": 2.1736,
"step": 46950
},
{
"epoch": 2.885205647636587,
"grad_norm": 3.945478916168213,
"learning_rate": 1.1485573971761818e-06,
"loss": 2.2412,
"step": 47000
},
{
"epoch": 2.8882750153468386,
"grad_norm": 3.1608164310455322,
"learning_rate": 1.1178637200736648e-06,
"loss": 2.2238,
"step": 47050
},
{
"epoch": 2.89134438305709,
"grad_norm": 4.12243127822876,
"learning_rate": 1.087170042971148e-06,
"loss": 2.1938,
"step": 47100
},
{
"epoch": 2.8944137507673418,
"grad_norm": 3.392117977142334,
"learning_rate": 1.056476365868631e-06,
"loss": 2.1843,
"step": 47150
},
{
"epoch": 2.8974831184775938,
"grad_norm": 3.5791280269622803,
"learning_rate": 1.0257826887661143e-06,
"loss": 2.192,
"step": 47200
},
{
"epoch": 2.9005524861878453,
"grad_norm": 3.195387363433838,
"learning_rate": 9.950890116635972e-07,
"loss": 2.221,
"step": 47250
},
{
"epoch": 2.903621853898097,
"grad_norm": 4.41968297958374,
"learning_rate": 9.643953345610804e-07,
"loss": 2.2192,
"step": 47300
},
{
"epoch": 2.906691221608349,
"grad_norm": 3.8508644104003906,
"learning_rate": 9.337016574585636e-07,
"loss": 2.1182,
"step": 47350
},
{
"epoch": 2.9097605893186005,
"grad_norm": 3.493018865585327,
"learning_rate": 9.030079803560467e-07,
"loss": 2.1742,
"step": 47400
},
{
"epoch": 2.912829957028852,
"grad_norm": 3.8892369270324707,
"learning_rate": 8.723143032535298e-07,
"loss": 2.1974,
"step": 47450
},
{
"epoch": 2.9158993247391036,
"grad_norm": 3.238802671432495,
"learning_rate": 8.416206261510129e-07,
"loss": 2.3495,
"step": 47500
},
{
"epoch": 2.918968692449355,
"grad_norm": 3.5974628925323486,
"learning_rate": 8.109269490484961e-07,
"loss": 2.2413,
"step": 47550
},
{
"epoch": 2.922038060159607,
"grad_norm": 3.806520938873291,
"learning_rate": 7.802332719459792e-07,
"loss": 2.2885,
"step": 47600
},
{
"epoch": 2.925107427869859,
"grad_norm": 4.2564496994018555,
"learning_rate": 7.495395948434623e-07,
"loss": 2.2423,
"step": 47650
},
{
"epoch": 2.9281767955801103,
"grad_norm": 3.9945321083068848,
"learning_rate": 7.188459177409454e-07,
"loss": 2.2334,
"step": 47700
},
{
"epoch": 2.9312461632903624,
"grad_norm": 3.3918559551239014,
"learning_rate": 6.881522406384285e-07,
"loss": 2.1502,
"step": 47750
},
{
"epoch": 2.934315531000614,
"grad_norm": 4.716714859008789,
"learning_rate": 6.574585635359117e-07,
"loss": 2.2168,
"step": 47800
},
{
"epoch": 2.9373848987108655,
"grad_norm": 4.033308506011963,
"learning_rate": 6.267648864333948e-07,
"loss": 2.2624,
"step": 47850
},
{
"epoch": 2.940454266421117,
"grad_norm": 5.724266052246094,
"learning_rate": 5.960712093308779e-07,
"loss": 2.1917,
"step": 47900
},
{
"epoch": 2.943523634131369,
"grad_norm": 3.851032257080078,
"learning_rate": 5.65377532228361e-07,
"loss": 2.2129,
"step": 47950
},
{
"epoch": 2.9465930018416207,
"grad_norm": 3.436573028564453,
"learning_rate": 5.346838551258441e-07,
"loss": 2.3044,
"step": 48000
},
{
"epoch": 2.949662369551872,
"grad_norm": 3.4812095165252686,
"learning_rate": 5.039901780233272e-07,
"loss": 2.2165,
"step": 48050
},
{
"epoch": 2.9527317372621242,
"grad_norm": 3.4163248538970947,
"learning_rate": 4.732965009208103e-07,
"loss": 2.1441,
"step": 48100
},
{
"epoch": 2.955801104972376,
"grad_norm": 4.04727840423584,
"learning_rate": 4.426028238182934e-07,
"loss": 2.2139,
"step": 48150
},
{
"epoch": 2.9588704726826274,
"grad_norm": 3.314655065536499,
"learning_rate": 4.119091467157765e-07,
"loss": 2.2098,
"step": 48200
},
{
"epoch": 2.961939840392879,
"grad_norm": 4.228841304779053,
"learning_rate": 3.812154696132597e-07,
"loss": 2.1626,
"step": 48250
},
{
"epoch": 2.9650092081031305,
"grad_norm": 4.127499580383301,
"learning_rate": 3.505217925107428e-07,
"loss": 2.3014,
"step": 48300
},
{
"epoch": 2.9680785758133825,
"grad_norm": 3.7830405235290527,
"learning_rate": 3.198281154082259e-07,
"loss": 2.2795,
"step": 48350
},
{
"epoch": 2.971147943523634,
"grad_norm": 5.398400783538818,
"learning_rate": 2.89134438305709e-07,
"loss": 2.2314,
"step": 48400
},
{
"epoch": 2.9742173112338857,
"grad_norm": 4.308445453643799,
"learning_rate": 2.584407612031921e-07,
"loss": 2.3097,
"step": 48450
},
{
"epoch": 2.9772866789441377,
"grad_norm": 3.5713133811950684,
"learning_rate": 2.277470841006753e-07,
"loss": 2.1968,
"step": 48500
},
{
"epoch": 2.9803560466543892,
"grad_norm": 3.5215930938720703,
"learning_rate": 1.970534069981584e-07,
"loss": 2.2354,
"step": 48550
},
{
"epoch": 2.983425414364641,
"grad_norm": 3.120506763458252,
"learning_rate": 1.663597298956415e-07,
"loss": 2.2237,
"step": 48600
},
{
"epoch": 2.9864947820748924,
"grad_norm": 6.137388706207275,
"learning_rate": 1.3566605279312464e-07,
"loss": 2.235,
"step": 48650
},
{
"epoch": 2.9895641497851444,
"grad_norm": 3.3079631328582764,
"learning_rate": 1.0497237569060774e-07,
"loss": 2.1549,
"step": 48700
},
{
"epoch": 2.992633517495396,
"grad_norm": 4.21074914932251,
"learning_rate": 7.427869858809085e-08,
"loss": 2.1818,
"step": 48750
},
{
"epoch": 2.9957028852056475,
"grad_norm": 3.8667783737182617,
"learning_rate": 4.358502148557397e-08,
"loss": 2.3276,
"step": 48800
},
{
"epoch": 2.9987722529158995,
"grad_norm": 4.376747131347656,
"learning_rate": 1.2891344383057091e-08,
"loss": 2.2735,
"step": 48850
}
],
"logging_steps": 50,
"max_steps": 48870,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.816879851189043e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}