{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 17429,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 5.7375638303976134e-05,
      "grad_norm": 0.435546875,
      "learning_rate": 1.1474469305794606e-07,
      "loss": 1.3588,
      "step": 1
    },
    {
      "epoch": 0.00028687819151988064,
      "grad_norm": 0.59375,
      "learning_rate": 5.737234652897304e-07,
      "loss": 1.0991,
      "step": 5
    },
    {
      "epoch": 0.0005737563830397613,
      "grad_norm": 0.349609375,
      "learning_rate": 1.1474469305794607e-06,
      "loss": 1.1268,
      "step": 10
    },
    {
      "epoch": 0.0008606345745596419,
      "grad_norm": 0.421875,
      "learning_rate": 1.721170395869191e-06,
      "loss": 1.1761,
      "step": 15
    },
    {
      "epoch": 0.0011475127660795226,
      "grad_norm": 0.33984375,
      "learning_rate": 2.2948938611589215e-06,
      "loss": 1.1986,
      "step": 20
    },
    {
      "epoch": 0.0014343909575994032,
      "grad_norm": 0.359375,
      "learning_rate": 2.868617326448652e-06,
      "loss": 1.1268,
      "step": 25
    },
    {
      "epoch": 0.0017212691491192839,
      "grad_norm": 0.296875,
      "learning_rate": 3.442340791738382e-06,
      "loss": 1.0619,
      "step": 30
    },
    {
      "epoch": 0.0020081473406391645,
      "grad_norm": 0.337890625,
      "learning_rate": 4.016064257028113e-06,
      "loss": 1.1175,
      "step": 35
    },
    {
      "epoch": 0.002295025532159045,
      "grad_norm": 0.328125,
      "learning_rate": 4.589787722317843e-06,
      "loss": 1.1421,
      "step": 40
    },
    {
      "epoch": 0.002581903723678926,
      "grad_norm": 0.32421875,
      "learning_rate": 5.163511187607573e-06,
      "loss": 1.1015,
      "step": 45
    },
    {
      "epoch": 0.0028687819151988064,
      "grad_norm": 0.33984375,
      "learning_rate": 5.737234652897304e-06,
      "loss": 1.138,
      "step": 50
    },
    {
      "epoch": 0.003155660106718687,
      "grad_norm": 0.345703125,
      "learning_rate": 6.310958118187034e-06,
      "loss": 1.1281,
      "step": 55
    },
    {
      "epoch": 0.0034425382982385677,
      "grad_norm": 0.318359375,
      "learning_rate": 6.884681583476764e-06,
      "loss": 1.1071,
      "step": 60
    },
    {
      "epoch": 0.0037294164897584484,
      "grad_norm": 0.322265625,
      "learning_rate": 7.4584050487664955e-06,
      "loss": 1.1443,
      "step": 65
    },
    {
      "epoch": 0.004016294681278329,
      "grad_norm": 0.326171875,
      "learning_rate": 8.032128514056226e-06,
      "loss": 1.1746,
      "step": 70
    },
    {
      "epoch": 0.00430317287279821,
      "grad_norm": 0.291015625,
      "learning_rate": 8.605851979345956e-06,
      "loss": 1.0971,
      "step": 75
    },
    {
      "epoch": 0.00459005106431809,
      "grad_norm": 0.314453125,
      "learning_rate": 9.179575444635686e-06,
      "loss": 1.1356,
      "step": 80
    },
    {
      "epoch": 0.004876929255837971,
      "grad_norm": 0.28125,
      "learning_rate": 9.753298909925416e-06,
      "loss": 1.0908,
      "step": 85
    },
    {
      "epoch": 0.005163807447357852,
      "grad_norm": 0.30078125,
      "learning_rate": 1.0327022375215146e-05,
      "loss": 1.0388,
      "step": 90
    },
    {
      "epoch": 0.005450685638877732,
      "grad_norm": 0.296875,
      "learning_rate": 1.0900745840504876e-05,
      "loss": 1.1107,
      "step": 95
    },
    {
      "epoch": 0.005737563830397613,
      "grad_norm": 0.294921875,
      "learning_rate": 1.1474469305794608e-05,
      "loss": 1.0713,
      "step": 100
    },
    {
      "epoch": 0.0060244420219174935,
      "grad_norm": 0.37890625,
      "learning_rate": 1.2048192771084338e-05,
      "loss": 1.1581,
      "step": 105
    },
    {
      "epoch": 0.006311320213437374,
      "grad_norm": 0.376953125,
      "learning_rate": 1.2621916236374069e-05,
      "loss": 1.0059,
      "step": 110
    },
    {
      "epoch": 0.006598198404957255,
      "grad_norm": 0.255859375,
      "learning_rate": 1.3195639701663797e-05,
      "loss": 0.9732,
      "step": 115
    },
    {
      "epoch": 0.0068850765964771354,
      "grad_norm": 0.2890625,
      "learning_rate": 1.3769363166953527e-05,
      "loss": 0.9736,
      "step": 120
    },
    {
      "epoch": 0.007171954787997016,
      "grad_norm": 0.2734375,
      "learning_rate": 1.434308663224326e-05,
      "loss": 1.044,
      "step": 125
    },
    {
      "epoch": 0.007458832979516897,
      "grad_norm": 0.314453125,
      "learning_rate": 1.4916810097532991e-05,
      "loss": 1.0218,
      "step": 130
    },
    {
      "epoch": 0.007745711171036777,
      "grad_norm": 0.236328125,
      "learning_rate": 1.549053356282272e-05,
      "loss": 0.9904,
      "step": 135
    },
    {
      "epoch": 0.008032589362556658,
      "grad_norm": 0.2490234375,
      "learning_rate": 1.606425702811245e-05,
      "loss": 0.9875,
      "step": 140
    },
    {
      "epoch": 0.008319467554076539,
      "grad_norm": 0.25390625,
      "learning_rate": 1.663798049340218e-05,
      "loss": 0.9874,
      "step": 145
    },
    {
      "epoch": 0.00860634574559642,
      "grad_norm": 0.29296875,
      "learning_rate": 1.721170395869191e-05,
      "loss": 0.9866,
      "step": 150
    },
    {
      "epoch": 0.0088932239371163,
      "grad_norm": 0.26171875,
      "learning_rate": 1.7785427423981642e-05,
      "loss": 0.9857,
      "step": 155
    },
    {
      "epoch": 0.00918010212863618,
      "grad_norm": 0.38671875,
      "learning_rate": 1.8359150889271372e-05,
      "loss": 1.0017,
      "step": 160
    },
    {
      "epoch": 0.009466980320156061,
      "grad_norm": 0.287109375,
      "learning_rate": 1.8932874354561102e-05,
      "loss": 1.025,
      "step": 165
    },
    {
      "epoch": 0.009753858511675942,
      "grad_norm": 0.28515625,
      "learning_rate": 1.9506597819850832e-05,
      "loss": 1.015,
      "step": 170
    },
    {
      "epoch": 0.010040736703195823,
      "grad_norm": 0.27734375,
      "learning_rate": 2.0080321285140562e-05,
      "loss": 0.9669,
      "step": 175
    },
    {
      "epoch": 0.010327614894715703,
      "grad_norm": 0.2451171875,
      "learning_rate": 2.0654044750430293e-05,
      "loss": 1.035,
      "step": 180
    },
    {
      "epoch": 0.010614493086235584,
      "grad_norm": 0.2578125,
      "learning_rate": 2.1227768215720023e-05,
      "loss": 1.0398,
      "step": 185
    },
    {
      "epoch": 0.010901371277755464,
      "grad_norm": 0.271484375,
      "learning_rate": 2.1801491681009753e-05,
      "loss": 1.05,
      "step": 190
    },
    {
      "epoch": 0.011188249469275345,
      "grad_norm": 0.267578125,
      "learning_rate": 2.2375215146299486e-05,
      "loss": 0.9228,
      "step": 195
    },
    {
      "epoch": 0.011475127660795226,
      "grad_norm": 0.263671875,
      "learning_rate": 2.2948938611589217e-05,
      "loss": 1.0787,
      "step": 200
    },
    {
      "epoch": 0.011762005852315106,
      "grad_norm": 0.314453125,
      "learning_rate": 2.3522662076878947e-05,
      "loss": 1.0928,
      "step": 205
    },
    {
      "epoch": 0.012048884043834987,
      "grad_norm": 0.28515625,
      "learning_rate": 2.4096385542168677e-05,
      "loss": 1.0675,
      "step": 210
    },
    {
      "epoch": 0.012335762235354868,
      "grad_norm": 0.2734375,
      "learning_rate": 2.4670109007458407e-05,
      "loss": 1.02,
      "step": 215
    },
    {
      "epoch": 0.012622640426874748,
      "grad_norm": 0.291015625,
      "learning_rate": 2.5243832472748137e-05,
      "loss": 0.9736,
      "step": 220
    },
    {
      "epoch": 0.012909518618394629,
      "grad_norm": 0.271484375,
      "learning_rate": 2.5817555938037867e-05,
      "loss": 1.0255,
      "step": 225
    },
    {
      "epoch": 0.01319639680991451,
      "grad_norm": 0.33203125,
      "learning_rate": 2.6391279403327594e-05,
      "loss": 1.1085,
      "step": 230
    },
    {
      "epoch": 0.01348327500143439,
      "grad_norm": 0.2734375,
      "learning_rate": 2.6965002868617328e-05,
      "loss": 1.0057,
      "step": 235
    },
    {
      "epoch": 0.013770153192954271,
      "grad_norm": 0.291015625,
      "learning_rate": 2.7538726333907055e-05,
      "loss": 1.0452,
      "step": 240
    },
    {
      "epoch": 0.014057031384474152,
      "grad_norm": 0.2890625,
      "learning_rate": 2.8112449799196788e-05,
      "loss": 1.0544,
      "step": 245
    },
    {
      "epoch": 0.014343909575994032,
      "grad_norm": 0.3046875,
      "learning_rate": 2.868617326448652e-05,
      "loss": 1.0181,
      "step": 250
    },
    {
      "epoch": 0.014630787767513913,
      "grad_norm": 0.28125,
      "learning_rate": 2.925989672977625e-05,
      "loss": 0.9926,
      "step": 255
    },
    {
      "epoch": 0.014917665959033793,
      "grad_norm": 0.34375,
      "learning_rate": 2.9833620195065982e-05,
      "loss": 0.9662,
      "step": 260
    },
    {
      "epoch": 0.015204544150553674,
      "grad_norm": 0.30078125,
      "learning_rate": 3.040734366035571e-05,
      "loss": 1.0339,
      "step": 265
    },
    {
      "epoch": 0.015491422342073555,
      "grad_norm": 0.283203125,
      "learning_rate": 3.098106712564544e-05,
      "loss": 0.9458,
      "step": 270
    },
    {
      "epoch": 0.015778300533593435,
      "grad_norm": 0.287109375,
      "learning_rate": 3.155479059093517e-05,
      "loss": 0.96,
      "step": 275
    },
    {
      "epoch": 0.016065178725113316,
      "grad_norm": 0.3125,
      "learning_rate": 3.21285140562249e-05,
      "loss": 0.9781,
      "step": 280
    },
    {
      "epoch": 0.016352056916633197,
      "grad_norm": 0.291015625,
      "learning_rate": 3.2702237521514636e-05,
      "loss": 1.0201,
      "step": 285
    },
    {
      "epoch": 0.016638935108153077,
      "grad_norm": 0.306640625,
      "learning_rate": 3.327596098680436e-05,
      "loss": 1.0515,
      "step": 290
    },
    {
      "epoch": 0.016925813299672958,
      "grad_norm": 0.3125,
      "learning_rate": 3.3849684452094096e-05,
      "loss": 1.0647,
      "step": 295
    },
    {
      "epoch": 0.01721269149119284,
      "grad_norm": 0.30078125,
      "learning_rate": 3.442340791738382e-05,
      "loss": 0.9632,
      "step": 300
    },
    {
      "epoch": 0.01749956968271272,
      "grad_norm": 0.29296875,
      "learning_rate": 3.499713138267356e-05,
      "loss": 0.9992,
      "step": 305
    },
    {
      "epoch": 0.0177864478742326,
      "grad_norm": 0.287109375,
      "learning_rate": 3.5570854847963284e-05,
      "loss": 0.9193,
      "step": 310
    },
    {
      "epoch": 0.01807332606575248,
      "grad_norm": 0.328125,
      "learning_rate": 3.614457831325301e-05,
      "loss": 1.0958,
      "step": 315
    },
    {
      "epoch": 0.01836020425727236,
      "grad_norm": 0.3046875,
      "learning_rate": 3.6718301778542744e-05,
      "loss": 0.9924,
      "step": 320
    },
    {
      "epoch": 0.018647082448792242,
      "grad_norm": 0.3046875,
      "learning_rate": 3.729202524383247e-05,
      "loss": 1.0167,
      "step": 325
    },
    {
      "epoch": 0.018933960640312122,
      "grad_norm": 0.314453125,
      "learning_rate": 3.7865748709122204e-05,
      "loss": 1.0477,
      "step": 330
    },
    {
      "epoch": 0.019220838831832003,
      "grad_norm": 0.30859375,
      "learning_rate": 3.843947217441193e-05,
      "loss": 1.0352,
      "step": 335
    },
    {
      "epoch": 0.019507717023351884,
      "grad_norm": 0.296875,
      "learning_rate": 3.9013195639701665e-05,
      "loss": 1.0158,
      "step": 340
    },
    {
      "epoch": 0.019794595214871764,
      "grad_norm": 0.279296875,
      "learning_rate": 3.958691910499139e-05,
      "loss": 1.0116,
      "step": 345
    },
    {
      "epoch": 0.020081473406391645,
      "grad_norm": 0.306640625,
      "learning_rate": 4.0160642570281125e-05,
      "loss": 1.0194,
      "step": 350
    },
    {
      "epoch": 0.020368351597911526,
      "grad_norm": 0.298828125,
      "learning_rate": 4.073436603557086e-05,
      "loss": 1.0497,
      "step": 355
    },
    {
      "epoch": 0.020655229789431406,
      "grad_norm": 0.306640625,
      "learning_rate": 4.1308089500860585e-05,
      "loss": 1.0213,
      "step": 360
    },
    {
      "epoch": 0.020942107980951287,
      "grad_norm": 0.2890625,
      "learning_rate": 4.188181296615032e-05,
      "loss": 1.0122,
      "step": 365
    },
    {
      "epoch": 0.021228986172471168,
      "grad_norm": 0.296875,
      "learning_rate": 4.2455536431440046e-05,
      "loss": 0.9192,
      "step": 370
    },
    {
      "epoch": 0.021515864363991048,
      "grad_norm": 0.287109375,
      "learning_rate": 4.302925989672978e-05,
      "loss": 0.9714,
      "step": 375
    },
    {
      "epoch": 0.02180274255551093,
      "grad_norm": 0.287109375,
      "learning_rate": 4.3602983362019506e-05,
      "loss": 0.9926,
      "step": 380
    },
    {
      "epoch": 0.02208962074703081,
      "grad_norm": 0.345703125,
      "learning_rate": 4.417670682730924e-05,
      "loss": 0.9874,
      "step": 385
    },
    {
      "epoch": 0.02237649893855069,
      "grad_norm": 0.310546875,
      "learning_rate": 4.475043029259897e-05,
      "loss": 0.9786,
      "step": 390
    },
    {
      "epoch": 0.02266337713007057,
      "grad_norm": 0.28515625,
      "learning_rate": 4.53241537578887e-05,
      "loss": 1.0112,
      "step": 395
    },
    {
      "epoch": 0.02295025532159045,
      "grad_norm": 0.27734375,
      "learning_rate": 4.589787722317843e-05,
      "loss": 0.9551,
      "step": 400
    },
    {
      "epoch": 0.023237133513110332,
      "grad_norm": 0.28125,
      "learning_rate": 4.647160068846816e-05,
      "loss": 0.9801,
      "step": 405
    },
    {
      "epoch": 0.023524011704630213,
      "grad_norm": 0.28515625,
      "learning_rate": 4.7045324153757894e-05,
      "loss": 0.9705,
      "step": 410
    },
    {
      "epoch": 0.023810889896150093,
      "grad_norm": 0.283203125,
      "learning_rate": 4.761904761904762e-05,
      "loss": 1.0404,
      "step": 415
    },
    {
      "epoch": 0.024097768087669974,
      "grad_norm": 0.296875,
      "learning_rate": 4.8192771084337354e-05,
      "loss": 0.9633,
      "step": 420
    },
    {
      "epoch": 0.024384646279189855,
      "grad_norm": 0.29296875,
      "learning_rate": 4.876649454962709e-05,
      "loss": 1.0002,
      "step": 425
    },
    {
      "epoch": 0.024671524470709735,
      "grad_norm": 0.3125,
      "learning_rate": 4.9340218014916814e-05,
      "loss": 1.0285,
      "step": 430
    },
    {
      "epoch": 0.024958402662229616,
      "grad_norm": 0.2734375,
      "learning_rate": 4.991394148020654e-05,
      "loss": 1.0558,
      "step": 435
    },
    {
      "epoch": 0.025245280853749497,
      "grad_norm": 0.26171875,
      "learning_rate": 5.0487664945496275e-05,
      "loss": 0.9442,
      "step": 440
    },
    {
      "epoch": 0.025532159045269377,
      "grad_norm": 0.2578125,
      "learning_rate": 5.1061388410786e-05,
      "loss": 1.0159,
      "step": 445
    },
    {
      "epoch": 0.025819037236789258,
      "grad_norm": 0.275390625,
      "learning_rate": 5.1635111876075735e-05,
      "loss": 0.9705,
      "step": 450
    },
    {
      "epoch": 0.02610591542830914,
      "grad_norm": 0.279296875,
      "learning_rate": 5.220883534136547e-05,
      "loss": 0.9864,
      "step": 455
    },
    {
      "epoch": 0.02639279361982902,
      "grad_norm": 0.291015625,
      "learning_rate": 5.278255880665519e-05,
      "loss": 1.0527,
      "step": 460
    },
    {
      "epoch": 0.0266796718113489,
      "grad_norm": 0.275390625,
      "learning_rate": 5.335628227194492e-05,
      "loss": 0.9403,
      "step": 465
    },
    {
      "epoch": 0.02696655000286878,
      "grad_norm": 0.26953125,
      "learning_rate": 5.3930005737234656e-05,
      "loss": 1.0662,
      "step": 470
    },
    {
      "epoch": 0.02725342819438866,
      "grad_norm": 0.291015625,
      "learning_rate": 5.450372920252439e-05,
      "loss": 0.9985,
      "step": 475
    },
    {
      "epoch": 0.027540306385908542,
      "grad_norm": 0.27734375,
      "learning_rate": 5.507745266781411e-05,
      "loss": 0.9654,
      "step": 480
    },
    {
      "epoch": 0.027827184577428422,
      "grad_norm": 0.310546875,
      "learning_rate": 5.565117613310384e-05,
      "loss": 1.0284,
      "step": 485
    },
    {
      "epoch": 0.028114062768948303,
      "grad_norm": 0.296875,
      "learning_rate": 5.6224899598393576e-05,
      "loss": 0.9302,
      "step": 490
    },
    {
      "epoch": 0.028400940960468184,
      "grad_norm": 0.322265625,
      "learning_rate": 5.679862306368331e-05,
      "loss": 0.9556,
      "step": 495
    },
    {
      "epoch": 0.028687819151988064,
      "grad_norm": 0.291015625,
      "learning_rate": 5.737234652897304e-05,
      "loss": 1.0002,
      "step": 500
    },
    {
      "epoch": 0.028974697343507945,
      "grad_norm": 0.306640625,
      "learning_rate": 5.794606999426276e-05,
      "loss": 0.9644,
      "step": 505
    },
    {
      "epoch": 0.029261575535027826,
      "grad_norm": 0.345703125,
      "learning_rate": 5.85197934595525e-05,
      "loss": 0.9905,
      "step": 510
    },
    {
      "epoch": 0.029548453726547706,
      "grad_norm": 0.291015625,
      "learning_rate": 5.909351692484223e-05,
      "loss": 1.0985,
      "step": 515
    },
    {
      "epoch": 0.029835331918067587,
      "grad_norm": 0.287109375,
      "learning_rate": 5.9667240390131964e-05,
      "loss": 1.0632,
      "step": 520
    },
    {
      "epoch": 0.030122210109587468,
      "grad_norm": 0.267578125,
      "learning_rate": 6.02409638554217e-05,
      "loss": 1.0096,
      "step": 525
    },
    {
      "epoch": 0.030409088301107348,
      "grad_norm": 0.291015625,
      "learning_rate": 6.081468732071142e-05,
      "loss": 0.9602,
      "step": 530
    },
    {
      "epoch": 0.03069596649262723,
      "grad_norm": 0.263671875,
      "learning_rate": 6.138841078600115e-05,
      "loss": 0.9651,
      "step": 535
    },
    {
      "epoch": 0.03098284468414711,
      "grad_norm": 0.26171875,
      "learning_rate": 6.196213425129088e-05,
      "loss": 1.0011,
      "step": 540
    },
    {
      "epoch": 0.03126972287566699,
      "grad_norm": 0.26953125,
      "learning_rate": 6.253585771658062e-05,
      "loss": 0.9452,
      "step": 545
    },
    {
      "epoch": 0.03155660106718687,
      "grad_norm": 0.2890625,
      "learning_rate": 6.310958118187034e-05,
      "loss": 1.0467,
      "step": 550
    },
    {
      "epoch": 0.03184347925870675,
      "grad_norm": 0.265625,
      "learning_rate": 6.368330464716007e-05,
      "loss": 0.9509,
      "step": 555
    },
    {
      "epoch": 0.03213035745022663,
      "grad_norm": 0.291015625,
      "learning_rate": 6.42570281124498e-05,
      "loss": 1.0041,
      "step": 560
    },
    {
      "epoch": 0.03241723564174651,
      "grad_norm": 0.283203125,
      "learning_rate": 6.483075157773954e-05,
      "loss": 0.9926,
      "step": 565
    },
    {
      "epoch": 0.03270411383326639,
      "grad_norm": 0.25390625,
      "learning_rate": 6.540447504302927e-05,
      "loss": 1.03,
      "step": 570
    },
    {
      "epoch": 0.032990992024786274,
      "grad_norm": 0.275390625,
      "learning_rate": 6.597819850831899e-05,
      "loss": 1.0156,
      "step": 575
    },
    {
      "epoch": 0.033277870216306155,
      "grad_norm": 0.30078125,
      "learning_rate": 6.655192197360873e-05,
      "loss": 0.9382,
      "step": 580
    },
    {
      "epoch": 0.033564748407826035,
      "grad_norm": 0.2578125,
      "learning_rate": 6.712564543889846e-05,
      "loss": 1.0226,
      "step": 585
    },
    {
      "epoch": 0.033851626599345916,
      "grad_norm": 0.275390625,
      "learning_rate": 6.769936890418819e-05,
      "loss": 1.0166,
      "step": 590
    },
    {
      "epoch": 0.0341385047908658,
      "grad_norm": 0.271484375,
      "learning_rate": 6.827309236947793e-05,
      "loss": 1.0474,
      "step": 595
    },
    {
      "epoch": 0.03442538298238568,
      "grad_norm": 0.271484375,
      "learning_rate": 6.884681583476765e-05,
      "loss": 1.0629,
      "step": 600
    },
    {
      "epoch": 0.03471226117390556,
      "grad_norm": 0.26953125,
      "learning_rate": 6.942053930005738e-05,
      "loss": 0.9475,
      "step": 605
    },
    {
      "epoch": 0.03499913936542544,
      "grad_norm": 0.2734375,
      "learning_rate": 6.999426276534711e-05,
      "loss": 0.9764,
      "step": 610
    },
    {
      "epoch": 0.03528601755694532,
      "grad_norm": 0.25,
      "learning_rate": 7.056798623063683e-05,
      "loss": 0.9431,
      "step": 615
    },
    {
      "epoch": 0.0355728957484652,
      "grad_norm": 0.287109375,
      "learning_rate": 7.114170969592657e-05,
      "loss": 1.0624,
      "step": 620
    },
    {
      "epoch": 0.03585977393998508,
      "grad_norm": 0.255859375,
      "learning_rate": 7.17154331612163e-05,
      "loss": 1.0391,
      "step": 625
    },
    {
      "epoch": 0.03614665213150496,
      "grad_norm": 0.27734375,
      "learning_rate": 7.228915662650602e-05,
      "loss": 0.9731,
      "step": 630
    },
    {
      "epoch": 0.03643353032302484,
      "grad_norm": 0.26171875,
      "learning_rate": 7.286288009179575e-05,
      "loss": 0.9593,
      "step": 635
    },
    {
      "epoch": 0.03672040851454472,
      "grad_norm": 0.26953125,
      "learning_rate": 7.343660355708549e-05,
      "loss": 0.9843,
      "step": 640
    },
    {
      "epoch": 0.0370072867060646,
      "grad_norm": 0.26171875,
      "learning_rate": 7.401032702237521e-05,
      "loss": 0.977,
      "step": 645
    },
    {
      "epoch": 0.037294164897584484,
      "grad_norm": 0.26171875,
      "learning_rate": 7.458405048766494e-05,
      "loss": 0.9185,
      "step": 650
    },
    {
      "epoch": 0.037581043089104364,
      "grad_norm": 0.2578125,
      "learning_rate": 7.515777395295467e-05,
      "loss": 1.0353,
      "step": 655
    },
    {
      "epoch": 0.037867921280624245,
      "grad_norm": 0.259765625,
      "learning_rate": 7.573149741824441e-05,
      "loss": 0.9667,
      "step": 660
    },
    {
      "epoch": 0.038154799472144126,
      "grad_norm": 0.267578125,
      "learning_rate": 7.630522088353414e-05,
      "loss": 0.9865,
      "step": 665
    },
    {
      "epoch": 0.038441677663664006,
      "grad_norm": 0.25,
      "learning_rate": 7.687894434882386e-05,
      "loss": 0.8748,
      "step": 670
    },
    {
      "epoch": 0.03872855585518389,
      "grad_norm": 0.26171875,
      "learning_rate": 7.74526678141136e-05,
      "loss": 1.0654,
      "step": 675
    },
    {
      "epoch": 0.03901543404670377,
      "grad_norm": 0.255859375,
      "learning_rate": 7.802639127940333e-05,
      "loss": 1.0622,
      "step": 680
    },
    {
      "epoch": 0.03930231223822365,
      "grad_norm": 0.259765625,
      "learning_rate": 7.860011474469306e-05,
      "loss": 0.9676,
      "step": 685
    },
    {
      "epoch": 0.03958919042974353,
      "grad_norm": 0.275390625,
      "learning_rate": 7.917383820998278e-05,
      "loss": 0.9626,
      "step": 690
    },
    {
      "epoch": 0.03987606862126341,
      "grad_norm": 0.259765625,
      "learning_rate": 7.974756167527252e-05,
      "loss": 0.9586,
      "step": 695
    },
    {
      "epoch": 0.04016294681278329,
      "grad_norm": 0.267578125,
      "learning_rate": 8.032128514056225e-05,
      "loss": 1.0465,
      "step": 700
    },
    {
      "epoch": 0.04044982500430317,
      "grad_norm": 0.2470703125,
      "learning_rate": 8.089500860585198e-05,
      "loss": 0.9695,
      "step": 705
    },
    {
      "epoch": 0.04073670319582305,
      "grad_norm": 0.2578125,
      "learning_rate": 8.146873207114172e-05,
      "loss": 0.9291,
      "step": 710
    },
    {
      "epoch": 0.04102358138734293,
      "grad_norm": 0.26171875,
      "learning_rate": 8.204245553643144e-05,
      "loss": 1.0255,
      "step": 715
    },
    {
      "epoch": 0.04131045957886281,
      "grad_norm": 0.25,
      "learning_rate": 8.261617900172117e-05,
      "loss": 0.9625,
      "step": 720
    },
    {
      "epoch": 0.04159733777038269,
      "grad_norm": 0.26171875,
      "learning_rate": 8.31899024670109e-05,
      "loss": 0.9827,
      "step": 725
    },
    {
      "epoch": 0.041884215961902574,
      "grad_norm": 0.236328125,
      "learning_rate": 8.376362593230064e-05,
      "loss": 1.0375,
      "step": 730
    },
    {
      "epoch": 0.042171094153422455,
      "grad_norm": 0.279296875,
      "learning_rate": 8.433734939759037e-05,
      "loss": 1.0185,
      "step": 735
    },
    {
      "epoch": 0.042457972344942335,
      "grad_norm": 0.2470703125,
      "learning_rate": 8.491107286288009e-05,
      "loss": 0.9531,
      "step": 740
    },
    {
      "epoch": 0.042744850536462216,
      "grad_norm": 0.25,
      "learning_rate": 8.548479632816982e-05,
      "loss": 1.0196,
      "step": 745
    },
    {
      "epoch": 0.043031728727982096,
      "grad_norm": 0.2421875,
      "learning_rate": 8.605851979345956e-05,
      "loss": 0.9729,
      "step": 750
    },
    {
      "epoch": 0.04331860691950198,
      "grad_norm": 0.251953125,
      "learning_rate": 8.663224325874929e-05,
      "loss": 0.9538,
      "step": 755
    },
    {
      "epoch": 0.04360548511102186,
      "grad_norm": 0.2490234375,
      "learning_rate": 8.720596672403901e-05,
      "loss": 0.9903,
      "step": 760
    },
    {
      "epoch": 0.04389236330254174,
      "grad_norm": 0.2490234375,
      "learning_rate": 8.777969018932875e-05,
      "loss": 0.9379,
      "step": 765
    },
    {
      "epoch": 0.04417924149406162,
      "grad_norm": 0.255859375,
      "learning_rate": 8.835341365461848e-05,
      "loss": 1.0388,
      "step": 770
    },
    {
      "epoch": 0.0444661196855815,
      "grad_norm": 0.23828125,
      "learning_rate": 8.892713711990821e-05,
      "loss": 0.9352,
      "step": 775
    },
    {
      "epoch": 0.04475299787710138,
      "grad_norm": 0.259765625,
      "learning_rate": 8.950086058519795e-05,
      "loss": 1.0,
      "step": 780
    },
    {
      "epoch": 0.04503987606862126,
      "grad_norm": 0.2470703125,
      "learning_rate": 9.007458405048767e-05,
      "loss": 0.9976,
      "step": 785
    },
    {
      "epoch": 0.04532675426014114,
      "grad_norm": 0.2451171875,
      "learning_rate": 9.06483075157774e-05,
      "loss": 0.9689,
      "step": 790
    },
    {
      "epoch": 0.04561363245166102,
      "grad_norm": 0.248046875,
      "learning_rate": 9.122203098106713e-05,
      "loss": 1.0199,
      "step": 795
    },
    {
      "epoch": 0.0459005106431809,
      "grad_norm": 0.24609375,
      "learning_rate": 9.179575444635687e-05,
      "loss": 0.9226,
      "step": 800
    },
    {
      "epoch": 0.046187388834700784,
      "grad_norm": 0.248046875,
      "learning_rate": 9.23694779116466e-05,
      "loss": 0.9483,
      "step": 805
    },
    {
      "epoch": 0.046474267026220664,
      "grad_norm": 0.2470703125,
      "learning_rate": 9.294320137693632e-05,
      "loss": 0.9711,
      "step": 810
    },
    {
      "epoch": 0.046761145217740545,
      "grad_norm": 0.248046875,
      "learning_rate": 9.351692484222605e-05,
      "loss": 0.9429,
      "step": 815
    },
    {
      "epoch": 0.047048023409260425,
      "grad_norm": 0.240234375,
      "learning_rate": 9.409064830751579e-05,
      "loss": 0.9849,
      "step": 820
    },
    {
      "epoch": 0.047334901600780306,
      "grad_norm": 0.234375,
      "learning_rate": 9.466437177280552e-05,
      "loss": 1.0336,
      "step": 825
    },
    {
      "epoch": 0.04762177979230019,
      "grad_norm": 0.24609375,
      "learning_rate": 9.523809523809524e-05,
      "loss": 1.0258,
      "step": 830
    },
    {
      "epoch": 0.04790865798382007,
      "grad_norm": 0.24609375,
      "learning_rate": 9.581181870338497e-05,
      "loss": 0.9852,
      "step": 835
    },
    {
      "epoch": 0.04819553617533995,
      "grad_norm": 0.232421875,
      "learning_rate": 9.638554216867471e-05,
      "loss": 0.9533,
      "step": 840
    },
    {
      "epoch": 0.04848241436685983,
      "grad_norm": 0.2451171875,
      "learning_rate": 9.695926563396444e-05,
      "loss": 0.9448,
      "step": 845
    },
    {
      "epoch": 0.04876929255837971,
      "grad_norm": 0.25390625,
      "learning_rate": 9.753298909925417e-05,
      "loss": 1.0124,
      "step": 850
    },
    {
      "epoch": 0.04905617074989959,
      "grad_norm": 0.25,
      "learning_rate": 9.81067125645439e-05,
      "loss": 0.9959,
      "step": 855
    },
    {
      "epoch": 0.04934304894141947,
      "grad_norm": 0.25390625,
      "learning_rate": 9.868043602983363e-05,
      "loss": 1.0287,
      "step": 860
    },
    {
      "epoch": 0.04962992713293935,
      "grad_norm": 0.25390625,
      "learning_rate": 9.925415949512336e-05,
      "loss": 0.9831,
      "step": 865
    },
    {
      "epoch": 0.04991680532445923,
      "grad_norm": 0.255859375,
      "learning_rate": 9.982788296041308e-05,
      "loss": 0.9833,
      "step": 870
    },
    {
      "epoch": 0.05020368351597911,
      "grad_norm": 0.23828125,
      "learning_rate": 0.00010040160642570282,
      "loss": 0.9969,
      "step": 875
    },
    {
      "epoch": 0.05049056170749899,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00010097532989099255,
      "loss": 1.0437,
      "step": 880
    },
    {
      "epoch": 0.050777439899018874,
      "grad_norm": 0.234375,
      "learning_rate": 0.00010154905335628228,
      "loss": 1.031,
      "step": 885
    },
    {
      "epoch": 0.051064318090538754,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.000102122776821572,
      "loss": 1.0094,
      "step": 890
    },
    {
      "epoch": 0.051351196282058635,
      "grad_norm": 0.2314453125,
      "learning_rate": 0.00010269650028686174,
      "loss": 0.9602,
      "step": 895
    },
    {
      "epoch": 0.051638074473578516,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00010327022375215147,
      "loss": 1.0467,
      "step": 900
    },
    {
      "epoch": 0.051924952665098396,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00010384394721744119,
      "loss": 0.9441,
      "step": 905
    },
    {
      "epoch": 0.05221183085661828,
      "grad_norm": 0.234375,
      "learning_rate": 0.00010441767068273094,
      "loss": 1.0901,
      "step": 910
    },
    {
      "epoch": 0.05249870904813816,
      "grad_norm": 0.232421875,
      "learning_rate": 0.00010499139414802066,
      "loss": 0.9532,
      "step": 915
    },
    {
      "epoch": 0.05278558723965804,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00010556511761331038,
      "loss": 1.0329,
      "step": 920
    },
    {
      "epoch": 0.05307246543117792,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00010613884107860012,
      "loss": 0.9893,
      "step": 925
    },
    {
      "epoch": 0.0533593436226978,
      "grad_norm": 0.25,
      "learning_rate": 0.00010671256454388984,
      "loss": 0.9567,
      "step": 930
    },
    {
      "epoch": 0.05364622181421768,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010728628800917956,
      "loss": 1.0186,
      "step": 935
    },
    {
      "epoch": 0.05393310000573756,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00010786001147446931,
      "loss": 0.9697,
      "step": 940
    },
    {
      "epoch": 0.05421997819725744,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00010843373493975903,
      "loss": 0.967,
      "step": 945
    },
    {
      "epoch": 0.05450685638877732,
      "grad_norm": 0.220703125,
      "learning_rate": 0.00010900745840504878,
      "loss": 0.9157,
      "step": 950
    },
    {
      "epoch": 0.0547937345802972,
      "grad_norm": 0.2373046875,
      "learning_rate": 0.0001095811818703385,
      "loss": 0.9864,
      "step": 955
    },
    {
      "epoch": 0.055080612771817083,
      "grad_norm": 0.25,
      "learning_rate": 0.00011015490533562822,
      "loss": 0.9616,
      "step": 960
    },
    {
      "epoch": 0.055367490963336964,
      "grad_norm": 0.232421875,
      "learning_rate": 0.00011072862880091797,
      "loss": 0.9466,
      "step": 965
    },
    {
      "epoch": 0.055654369154856845,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00011130235226620769,
      "loss": 0.966,
      "step": 970
    },
    {
      "epoch": 0.055941247346376725,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00011187607573149743,
      "loss": 0.9292,
      "step": 975
    },
    {
      "epoch": 0.056228125537896606,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00011244979919678715,
      "loss": 0.9724,
      "step": 980
    },
    {
      "epoch": 0.05651500372941649,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00011302352266207687,
      "loss": 0.9683,
      "step": 985
    },
    {
      "epoch": 0.05680188192093637,
      "grad_norm": 0.2373046875,
      "learning_rate": 0.00011359724612736662,
      "loss": 0.9164,
      "step": 990
    },
    {
      "epoch": 0.05708876011245625,
      "grad_norm": 0.23828125,
      "learning_rate": 0.00011417096959265634,
      "loss": 0.9309,
      "step": 995
    },
    {
      "epoch": 0.05737563830397613,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00011474469305794609,
      "loss": 1.0243,
      "step": 1000
    },
    {
      "epoch": 0.05766251649549601,
      "grad_norm": 0.2255859375,
      "learning_rate": 0.0001153184165232358,
      "loss": 0.9735,
      "step": 1005
    },
    {
      "epoch": 0.05794939468701589,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00011589213998852553,
      "loss": 1.0049,
      "step": 1010
    },
    {
      "epoch": 0.05823627287853577,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00011646586345381527,
      "loss": 1.0304,
      "step": 1015
    },
    {
      "epoch": 0.05852315107005565,
      "grad_norm": 0.25390625,
      "learning_rate": 0.000117039586919105,
      "loss": 0.9818,
      "step": 1020
    },
    {
      "epoch": 0.05881002926157553,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00011761331038439474,
      "loss": 0.9646,
      "step": 1025
    },
    {
      "epoch": 0.05909690745309541,
      "grad_norm": 0.2255859375,
      "learning_rate": 0.00011818703384968446,
      "loss": 0.9753,
      "step": 1030
    },
    {
      "epoch": 0.05938378564461529,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00011876075731497418,
      "loss": 0.9675,
      "step": 1035
    },
    {
      "epoch": 0.059670663836135174,
      "grad_norm": 0.2109375,
      "learning_rate": 0.00011933448078026393,
      "loss": 0.9606,
      "step": 1040
    },
    {
      "epoch": 0.059957542027655054,
      "grad_norm": 0.2265625,
      "learning_rate": 0.00011990820424555365,
      "loss": 1.0847,
      "step": 1045
    },
    {
      "epoch": 0.060244420219174935,
      "grad_norm": 0.2265625,
      "learning_rate": 0.0001204819277108434,
      "loss": 1.0064,
      "step": 1050
    },
    {
      "epoch": 0.060531298410694816,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00012105565117613311,
      "loss": 0.967,
      "step": 1055
    },
    {
      "epoch": 0.060818176602214696,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00012162937464142283,
      "loss": 0.9456,
      "step": 1060
    },
    {
      "epoch": 0.06110505479373458,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.00012220309810671257,
      "loss": 1.0216,
      "step": 1065
    },
    {
      "epoch": 0.06139193298525446,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.0001227768215720023,
      "loss": 0.9022,
      "step": 1070
    },
    {
      "epoch": 0.06167881117677434,
      "grad_norm": 0.23828125,
      "learning_rate": 0.000123350545037292,
      "loss": 0.9295,
      "step": 1075
    },
    {
      "epoch": 0.06196568936829422,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00012392426850258177,
      "loss": 0.983,
      "step": 1080
    },
    {
      "epoch": 0.0622525675598141,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00012449799196787148,
      "loss": 0.9705,
      "step": 1085
    },
    {
      "epoch": 0.06253944575133398,
      "grad_norm": 0.2236328125,
      "learning_rate": 0.00012507171543316124,
      "loss": 0.9243,
      "step": 1090
    },
    {
      "epoch": 0.06282632394285387,
      "grad_norm": 0.31640625,
      "learning_rate": 0.00012564543889845094,
      "loss": 0.9758,
      "step": 1095
    },
    {
      "epoch": 0.06311320213437374,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.00012621916236374068,
      "loss": 0.9814,
      "step": 1100
    },
    {
      "epoch": 0.06340008032589363,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.0001267928858290304,
      "loss": 0.942,
      "step": 1105
    },
    {
      "epoch": 0.0636869585174135,
      "grad_norm": 0.23046875,
      "learning_rate": 0.00012736660929432014,
      "loss": 0.9782,
      "step": 1110
    },
    {
      "epoch": 0.06397383670893339,
      "grad_norm": 0.2275390625,
      "learning_rate": 0.00012794033275960988,
      "loss": 0.9643,
      "step": 1115
    },
    {
      "epoch": 0.06426071490045326,
      "grad_norm": 0.2255859375,
      "learning_rate": 0.0001285140562248996,
      "loss": 0.9427,
      "step": 1120
    },
    {
      "epoch": 0.06454759309197315,
      "grad_norm": 0.220703125,
      "learning_rate": 0.00012908777969018932,
      "loss": 0.987,
      "step": 1125
    },
    {
      "epoch": 0.06483447128349303,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00012966150315547908,
      "loss": 0.9821,
      "step": 1130
    },
    {
      "epoch": 0.06512134947501291,
      "grad_norm": 0.234375,
      "learning_rate": 0.00013023522662076878,
      "loss": 1.052,
      "step": 1135
    },
    {
      "epoch": 0.06540822766653279,
      "grad_norm": 0.220703125,
      "learning_rate": 0.00013080895008605854,
      "loss": 0.9462,
      "step": 1140
    },
    {
      "epoch": 0.06569510585805267,
      "grad_norm": 0.2294921875,
      "learning_rate": 0.00013138267355134825,
      "loss": 1.0105,
      "step": 1145
    },
    {
      "epoch": 0.06598198404957255,
      "grad_norm": 0.23828125,
      "learning_rate": 0.00013195639701663798,
      "loss": 1.014,
      "step": 1150
    },
    {
      "epoch": 0.06626886224109244,
      "grad_norm": 0.2265625,
      "learning_rate": 0.00013253012048192772,
      "loss": 0.991,
      "step": 1155
    },
    {
      "epoch": 0.06655574043261231,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00013310384394721745,
      "loss": 1.0098,
      "step": 1160
    },
    {
      "epoch": 0.0668426186241322,
      "grad_norm": 0.2314453125,
      "learning_rate": 0.00013367756741250719,
      "loss": 0.9866,
      "step": 1165
    },
    {
      "epoch": 0.06712949681565207,
      "grad_norm": 0.2294921875,
      "learning_rate": 0.00013425129087779692,
      "loss": 0.9608,
      "step": 1170
    },
    {
      "epoch": 0.06741637500717196,
      "grad_norm": 0.234375,
      "learning_rate": 0.00013482501434308663,
      "loss": 0.9711,
      "step": 1175
    },
    {
      "epoch": 0.06770325319869183,
      "grad_norm": 0.228515625,
      "learning_rate": 0.00013539873780837639,
      "loss": 0.9319,
      "step": 1180
    },
    {
      "epoch": 0.06799013139021172,
      "grad_norm": 0.2314453125,
      "learning_rate": 0.0001359724612736661,
      "loss": 0.9614,
      "step": 1185
    },
    {
      "epoch": 0.0682770095817316,
      "grad_norm": 0.22265625,
      "learning_rate": 0.00013654618473895585,
      "loss": 0.9333,
      "step": 1190
    },
    {
      "epoch": 0.06856388777325148,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00013711990820424556,
      "loss": 1.0135,
      "step": 1195
    },
    {
      "epoch": 0.06885076596477135,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.0001376936316695353,
      "loss": 0.9592,
      "step": 1200
    },
    {
      "epoch": 0.06913764415629124,
      "grad_norm": 0.2265625,
      "learning_rate": 0.00013826735513482503,
      "loss": 0.9512,
      "step": 1205
    },
    {
      "epoch": 0.06942452234781112,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00013884107860011476,
      "loss": 0.9239,
      "step": 1210
    },
    {
      "epoch": 0.069711400539331,
      "grad_norm": 0.234375,
      "learning_rate": 0.00013941480206540447,
      "loss": 0.9585,
      "step": 1215
    },
    {
      "epoch": 0.06999827873085088,
      "grad_norm": 0.2177734375,
      "learning_rate": 0.00013998852553069423,
      "loss": 0.9734,
      "step": 1220
    },
    {
      "epoch": 0.07028515692237076,
      "grad_norm": 0.224609375,
      "learning_rate": 0.00014056224899598393,
      "loss": 0.9214,
      "step": 1225
    },
    {
      "epoch": 0.07057203511389064,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00014113597246127367,
      "loss": 0.9652,
      "step": 1230
    },
    {
      "epoch": 0.07085891330541053,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001417096959265634,
      "loss": 0.9355,
      "step": 1235
    },
    {
      "epoch": 0.0711457914969304,
      "grad_norm": 0.2294921875,
      "learning_rate": 0.00014228341939185313,
      "loss": 0.9577,
      "step": 1240
    },
    {
      "epoch": 0.07143266968845029,
      "grad_norm": 0.234375,
      "learning_rate": 0.00014285714285714287,
      "loss": 0.9403,
      "step": 1245
    },
    {
      "epoch": 0.07171954787997016,
      "grad_norm": 0.23046875,
      "learning_rate": 0.0001434308663224326,
      "loss": 1.0237,
      "step": 1250
    },
    {
      "epoch": 0.07200642607149005,
      "grad_norm": 0.2431640625,
      "learning_rate": 0.00014400458978772233,
      "loss": 0.994,
      "step": 1255
    },
    {
      "epoch": 0.07229330426300992,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00014457831325301204,
      "loss": 0.9788,
      "step": 1260
    },
    {
      "epoch": 0.07258018245452981,
      "grad_norm": 0.2294921875,
      "learning_rate": 0.00014515203671830177,
      "loss": 1.028,
      "step": 1265
    },
    {
      "epoch": 0.07286706064604968,
      "grad_norm": 0.228515625,
      "learning_rate": 0.0001457257601835915,
      "loss": 0.9475,
      "step": 1270
    },
    {
      "epoch": 0.07315393883756957,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00014629948364888124,
      "loss": 0.9661,
      "step": 1275
    },
    {
      "epoch": 0.07344081702908944,
      "grad_norm": 0.23046875,
      "learning_rate": 0.00014687320711417098,
      "loss": 1.0316,
      "step": 1280
    },
    {
      "epoch": 0.07372769522060933,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.0001474469305794607,
      "loss": 1.0089,
      "step": 1285
    },
    {
      "epoch": 0.0740145734121292,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00014802065404475042,
      "loss": 0.9406,
      "step": 1290
    },
    {
      "epoch": 0.0743014516036491,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00014859437751004018,
      "loss": 1.0317,
      "step": 1295
    },
    {
      "epoch": 0.07458832979516897,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.00014916810097532988,
      "loss": 0.953,
      "step": 1300
    },
    {
      "epoch": 0.07487520798668885,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014974182444061964,
      "loss": 0.9658,
      "step": 1305
    },
    {
      "epoch": 0.07516208617820873,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00015031554790590935,
      "loss": 0.927,
      "step": 1310
    },
    {
      "epoch": 0.07544896436972862,
      "grad_norm": 0.2216796875,
      "learning_rate": 0.00015088927137119908,
      "loss": 0.9357,
      "step": 1315
    },
    {
      "epoch": 0.07573584256124849,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.00015146299483648882,
      "loss": 0.9423,
      "step": 1320
    },
    {
      "epoch": 0.07602272075276838,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00015203671830177855,
      "loss": 0.9861,
      "step": 1325
    },
    {
      "epoch": 0.07630959894428825,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00015261044176706828,
      "loss": 0.9871,
      "step": 1330
    },
    {
      "epoch": 0.07659647713580814,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00015318416523235802,
      "loss": 0.9405,
      "step": 1335
    },
    {
      "epoch": 0.07688335532732801,
      "grad_norm": 0.2294921875,
      "learning_rate": 0.00015375788869764772,
      "loss": 0.9789,
      "step": 1340
    },
    {
      "epoch": 0.0771702335188479,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00015433161216293748,
      "loss": 1.0267,
      "step": 1345
    },
    {
      "epoch": 0.07745711171036777,
      "grad_norm": 0.2138671875,
      "learning_rate": 0.0001549053356282272,
      "loss": 0.9546,
      "step": 1350
    },
    {
      "epoch": 0.07774398990188766,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00015547905909351695,
      "loss": 0.9847,
      "step": 1355
    },
    {
      "epoch": 0.07803086809340753,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00015605278255880666,
      "loss": 0.9393,
      "step": 1360
    },
    {
      "epoch": 0.07831774628492742,
      "grad_norm": 0.2294921875,
      "learning_rate": 0.0001566265060240964,
      "loss": 1.0087,
      "step": 1365
    },
    {
      "epoch": 0.0786046244764473,
      "grad_norm": 0.2255859375,
      "learning_rate": 0.00015720022948938613,
      "loss": 0.9367,
      "step": 1370
    },
    {
      "epoch": 0.07889150266796718,
      "grad_norm": 0.2138671875,
      "learning_rate": 0.00015777395295467586,
      "loss": 0.9651,
      "step": 1375
    },
    {
      "epoch": 0.07917838085948706,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015834767641996557,
      "loss": 1.0075,
      "step": 1380
    },
    {
      "epoch": 0.07946525905100695,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00015892139988525533,
      "loss": 0.95,
      "step": 1385
    },
    {
      "epoch": 0.07975213724252682,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00015949512335054503,
      "loss": 0.9539,
      "step": 1390
    },
    {
      "epoch": 0.0800390154340467,
      "grad_norm": 0.2216796875,
      "learning_rate": 0.0001600688468158348,
      "loss": 0.9659,
      "step": 1395
    },
    {
      "epoch": 0.08032589362556658,
      "grad_norm": 0.23046875,
      "learning_rate": 0.0001606425702811245,
      "loss": 0.9324,
      "step": 1400
    },
    {
      "epoch": 0.08061277181708647,
      "grad_norm": 0.23828125,
      "learning_rate": 0.00016121629374641423,
      "loss": 1.0698,
      "step": 1405
    },
    {
      "epoch": 0.08089965000860634,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00016179001721170397,
      "loss": 1.0436,
      "step": 1410
    },
    {
      "epoch": 0.08118652820012623,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.0001623637406769937,
      "loss": 0.9879,
      "step": 1415
    },
    {
      "epoch": 0.0814734063916461,
      "grad_norm": 0.224609375,
      "learning_rate": 0.00016293746414228343,
      "loss": 0.9993,
      "step": 1420
    },
    {
      "epoch": 0.08176028458316599,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00016351118760757317,
      "loss": 0.9543,
      "step": 1425
    },
    {
      "epoch": 0.08204716277468586,
      "grad_norm": 0.2265625,
      "learning_rate": 0.00016408491107286287,
      "loss": 1.0417,
      "step": 1430
    },
    {
      "epoch": 0.08233404096620575,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00016465863453815263,
      "loss": 0.935,
      "step": 1435
    },
    {
      "epoch": 0.08262091915772563,
      "grad_norm": 0.265625,
      "learning_rate": 0.00016523235800344234,
      "loss": 1.041,
      "step": 1440
    },
    {
      "epoch": 0.08290779734924551,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001658060814687321,
      "loss": 0.9485,
      "step": 1445
    },
    {
      "epoch": 0.08319467554076539,
      "grad_norm": 0.21484375,
      "learning_rate": 0.0001663798049340218,
      "loss": 0.9971,
      "step": 1450
    },
    {
      "epoch": 0.08348155373228527,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00016695352839931154,
      "loss": 0.9906,
      "step": 1455
    },
    {
      "epoch": 0.08376843192380515,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00016752725186460127,
      "loss": 0.905,
      "step": 1460
    },
    {
      "epoch": 0.08405531011532504,
      "grad_norm": 0.23828125,
      "learning_rate": 0.000168100975329891,
      "loss": 0.9967,
      "step": 1465
    },
    {
      "epoch": 0.08434218830684491,
      "grad_norm": 0.265625,
      "learning_rate": 0.00016867469879518074,
      "loss": 1.0322,
      "step": 1470
    },
    {
      "epoch": 0.0846290664983648,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00016924842226047048,
      "loss": 1.0861,
      "step": 1475
    },
    {
      "epoch": 0.08491594468988467,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00016982214572576018,
      "loss": 1.0018,
      "step": 1480
    },
    {
      "epoch": 0.08520282288140456,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00017039586919104992,
      "loss": 0.9241,
      "step": 1485
    },
    {
      "epoch": 0.08548970107292443,
      "grad_norm": 0.216796875,
      "learning_rate": 0.00017096959265633965,
      "loss": 0.9617,
      "step": 1490
    },
    {
      "epoch": 0.08577657926444432,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00017154331612162938,
      "loss": 0.9043,
      "step": 1495
    },
    {
      "epoch": 0.08606345745596419,
      "grad_norm": 0.232421875,
      "learning_rate": 0.00017211703958691912,
      "loss": 1.0095,
      "step": 1500
    },
    {
      "epoch": 0.08635033564748408,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00017269076305220885,
      "loss": 1.0159,
      "step": 1505
    },
    {
      "epoch": 0.08663721383900395,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00017326448651749858,
      "loss": 1.05,
      "step": 1510
    },
    {
      "epoch": 0.08692409203052384,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001738382099827883,
      "loss": 0.9889,
      "step": 1515
    },
    {
      "epoch": 0.08721097022204372,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00017441193344807802,
      "loss": 1.0014,
      "step": 1520
    },
    {
      "epoch": 0.0874978484135636,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00017498565691336776,
      "loss": 1.0155,
      "step": 1525
    },
    {
      "epoch": 0.08778472660508348,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.0001755593803786575,
      "loss": 0.9529,
      "step": 1530
    },
    {
      "epoch": 0.08807160479660336,
      "grad_norm": 0.2373046875,
      "learning_rate": 0.00017613310384394722,
      "loss": 0.9015,
      "step": 1535
    },
    {
      "epoch": 0.08835848298812324,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00017670682730923696,
      "loss": 0.958,
      "step": 1540
    },
    {
      "epoch": 0.08864536117964313,
      "grad_norm": 0.234375,
      "learning_rate": 0.00017728055077452666,
      "loss": 1.0366,
      "step": 1545
    },
    {
      "epoch": 0.088932239371163,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00017785427423981642,
      "loss": 1.0001,
      "step": 1550
    },
    {
      "epoch": 0.08921911756268289,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00017842799770510613,
      "loss": 0.9472,
      "step": 1555
    },
    {
      "epoch": 0.08950599575420276,
      "grad_norm": 0.232421875,
      "learning_rate": 0.0001790017211703959,
      "loss": 0.9278,
      "step": 1560
    },
    {
      "epoch": 0.08979287394572265,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001795754446356856,
      "loss": 0.9875,
      "step": 1565
    },
    {
      "epoch": 0.09007975213724252,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00018014916810097533,
      "loss": 0.9641,
      "step": 1570
    },
    {
      "epoch": 0.09036663032876241,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00018072289156626507,
      "loss": 0.9106,
      "step": 1575
    },
    {
      "epoch": 0.09065350852028228,
      "grad_norm": 0.248046875,
      "learning_rate": 0.0001812966150315548,
      "loss": 0.9187,
      "step": 1580
    },
    {
      "epoch": 0.09094038671180217,
      "grad_norm": 0.228515625,
      "learning_rate": 0.00018187033849684453,
      "loss": 0.9488,
      "step": 1585
    },
    {
      "epoch": 0.09122726490332204,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018244406196213427,
      "loss": 0.9628,
      "step": 1590
    },
    {
      "epoch": 0.09151414309484193,
      "grad_norm": 0.2431640625,
      "learning_rate": 0.00018301778542742397,
      "loss": 0.9592,
      "step": 1595
    },
    {
      "epoch": 0.0918010212863618,
      "grad_norm": 0.25,
      "learning_rate": 0.00018359150889271373,
      "loss": 1.0228,
      "step": 1600
    },
    {
      "epoch": 0.0920878994778817,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00018416523235800344,
      "loss": 0.9926,
      "step": 1605
    },
    {
      "epoch": 0.09237477766940157,
      "grad_norm": 0.25,
      "learning_rate": 0.0001847389558232932,
      "loss": 1.0007,
      "step": 1610
    },
    {
      "epoch": 0.09266165586092145,
      "grad_norm": 0.23828125,
      "learning_rate": 0.0001853126792885829,
      "loss": 1.0219,
      "step": 1615
    },
    {
      "epoch": 0.09294853405244133,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018588640275387264,
      "loss": 0.9644,
      "step": 1620
    },
    {
      "epoch": 0.09323541224396122,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00018646012621916237,
      "loss": 1.0031,
      "step": 1625
    },
    {
      "epoch": 0.09352229043548109,
      "grad_norm": 0.2255859375,
      "learning_rate": 0.0001870338496844521,
      "loss": 1.0037,
      "step": 1630
    },
    {
      "epoch": 0.09380916862700098,
      "grad_norm": 0.224609375,
      "learning_rate": 0.00018760757314974184,
      "loss": 0.9305,
      "step": 1635
    },
    {
      "epoch": 0.09409604681852085,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00018818129661503157,
      "loss": 0.9534,
      "step": 1640
    },
    {
      "epoch": 0.09438292501004074,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00018875502008032128,
      "loss": 1.0235,
      "step": 1645
    },
    {
      "epoch": 0.09466980320156061,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00018932874354561104,
      "loss": 0.9046,
      "step": 1650
    },
    {
      "epoch": 0.0949566813930805,
      "grad_norm": 0.232421875,
      "learning_rate": 0.00018990246701090075,
      "loss": 1.004,
      "step": 1655
    },
    {
      "epoch": 0.09524355958460037,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00019047619047619048,
      "loss": 0.9329,
      "step": 1660
    },
    {
      "epoch": 0.09553043777612026,
      "grad_norm": 0.2373046875,
      "learning_rate": 0.00019104991394148021,
      "loss": 0.9596,
      "step": 1665
    },
    {
      "epoch": 0.09581731596764013,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00019162363740676995,
      "loss": 0.9716,
      "step": 1670
    },
    {
      "epoch": 0.09610419415916002,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019219736087205968,
      "loss": 0.9608,
      "step": 1675
    },
    {
      "epoch": 0.0963910723506799,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019277108433734942,
      "loss": 1.0023,
      "step": 1680
    },
    {
      "epoch": 0.09667795054219978,
      "grad_norm": 0.25,
      "learning_rate": 0.00019334480780263912,
      "loss": 0.9665,
      "step": 1685
    },
    {
      "epoch": 0.09696482873371966,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00019391853126792888,
      "loss": 0.9806,
      "step": 1690
    },
    {
      "epoch": 0.09725170692523954,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.0001944922547332186,
      "loss": 1.0139,
      "step": 1695
    },
    {
      "epoch": 0.09753858511675942,
      "grad_norm": 0.25,
      "learning_rate": 0.00019506597819850835,
      "loss": 0.9764,
      "step": 1700
    },
    {
      "epoch": 0.0978254633082793,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.00019563970166379806,
      "loss": 0.9552,
      "step": 1705
    },
    {
      "epoch": 0.09811234149979918,
      "grad_norm": 0.248046875,
      "learning_rate": 0.0001962134251290878,
      "loss": 0.9531,
      "step": 1710
    },
    {
      "epoch": 0.09839921969131907,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019678714859437752,
      "loss": 1.0003,
      "step": 1715
    },
    {
      "epoch": 0.09868609788283894,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019736087205966726,
      "loss": 1.0355,
      "step": 1720
    },
    {
      "epoch": 0.09897297607435883,
      "grad_norm": 0.265625,
      "learning_rate": 0.000197934595524957,
      "loss": 1.0075,
      "step": 1725
    },
    {
      "epoch": 0.0992598542658787,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00019850831899024672,
      "loss": 0.9431,
      "step": 1730
    },
    {
      "epoch": 0.09954673245739859,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00019908204245553643,
      "loss": 0.9683,
      "step": 1735
    },
    {
      "epoch": 0.09983361064891846,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.00019965576592082616,
      "loss": 0.9644,
      "step": 1740
    },
    {
      "epoch": 0.10012048884043835,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001999999919775815,
      "loss": 0.988,
      "step": 1745
    },
    {
      "epoch": 0.10040736703195823,
      "grad_norm": 0.25,
      "learning_rate": 0.00019999990172538815,
      "loss": 0.9723,
      "step": 1750
    },
    {
      "epoch": 0.10069424522347811,
      "grad_norm": 0.2431640625,
      "learning_rate": 0.00019999971119306908,
      "loss": 0.9437,
      "step": 1755
    },
    {
      "epoch": 0.10098112341499799,
      "grad_norm": 0.23828125,
      "learning_rate": 0.0001999994203808154,
      "loss": 1.0246,
      "step": 1760
    },
    {
      "epoch": 0.10126800160651787,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019999902928891875,
      "loss": 0.9988,
      "step": 1765
    },
    {
      "epoch": 0.10155487979803775,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019999853791777126,
      "loss": 0.9811,
      "step": 1770
    },
    {
      "epoch": 0.10184175798955764,
      "grad_norm": 0.30078125,
      "learning_rate": 0.00019999794626786573,
      "loss": 1.0372,
      "step": 1775
    },
    {
      "epoch": 0.10212863618107751,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019999725433979544,
      "loss": 0.935,
      "step": 1780
    },
    {
      "epoch": 0.1024155143725974,
      "grad_norm": 0.25,
      "learning_rate": 0.00019999646213425426,
      "loss": 0.9745,
      "step": 1785
    },
    {
      "epoch": 0.10270239256411727,
      "grad_norm": 0.2314453125,
      "learning_rate": 0.00019999556965203663,
      "loss": 0.9369,
      "step": 1790
    },
    {
      "epoch": 0.10298927075563716,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019999457689403753,
      "loss": 0.8631,
      "step": 1795
    },
    {
      "epoch": 0.10327614894715703,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.0001999934838612525,
      "loss": 0.9291,
      "step": 1800
    },
    {
      "epoch": 0.10356302713867692,
      "grad_norm": 0.2353515625,
      "learning_rate": 0.0001999922905547776,
      "loss": 0.9067,
      "step": 1805
    },
    {
      "epoch": 0.10384990533019679,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019999099697580954,
      "loss": 0.9654,
      "step": 1810
    },
    {
      "epoch": 0.10413678352171668,
      "grad_norm": 0.25,
      "learning_rate": 0.00019998960312564548,
      "loss": 0.9231,
      "step": 1815
    },
    {
      "epoch": 0.10442366171323655,
      "grad_norm": 0.248046875,
      "learning_rate": 0.0001999881090056832,
      "loss": 1.0005,
      "step": 1820
    },
    {
      "epoch": 0.10471053990475644,
      "grad_norm": 0.2578125,
      "learning_rate": 0.000199986514617421,
      "loss": 0.9831,
      "step": 1825
    },
    {
      "epoch": 0.10499741809627632,
      "grad_norm": 0.232421875,
      "learning_rate": 0.00019998481996245772,
      "loss": 0.995,
      "step": 1830
    },
    {
      "epoch": 0.1052842962877962,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019998302504249278,
      "loss": 0.9808,
      "step": 1835
    },
    {
      "epoch": 0.10557117447931608,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00019998112985932613,
      "loss": 0.9961,
      "step": 1840
    },
    {
      "epoch": 0.10585805267083596,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00019997913441485826,
      "loss": 0.9614,
      "step": 1845
    },
    {
      "epoch": 0.10614493086235584,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00019997703871109021,
      "loss": 1.0427,
      "step": 1850
    },
    {
      "epoch": 0.10643180905387573,
      "grad_norm": 0.234375,
      "learning_rate": 0.00019997484275012357,
      "loss": 0.979,
      "step": 1855
    },
    {
      "epoch": 0.1067186872453956,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00019997254653416043,
      "loss": 0.9046,
      "step": 1860
    },
    {
      "epoch": 0.10700556543691549,
      "grad_norm": 0.25,
      "learning_rate": 0.00019997015006550342,
      "loss": 0.9862,
      "step": 1865
    },
    {
      "epoch": 0.10729244362843536,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.0001999676533465558,
      "loss": 0.9117,
      "step": 1870
    },
    {
      "epoch": 0.10757932181995525,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019996505637982122,
      "loss": 0.9843,
      "step": 1875
    },
    {
      "epoch": 0.10786620001147512,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00019996235916790392,
      "loss": 0.9766,
      "step": 1880
    },
    {
      "epoch": 0.10815307820299501,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001999595617135087,
      "loss": 0.9147,
      "step": 1885
    },
    {
      "epoch": 0.10843995639451488,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019995666401944085,
      "loss": 0.9491,
      "step": 1890
    },
    {
      "epoch": 0.10872683458603477,
      "grad_norm": 0.2255859375,
      "learning_rate": 0.0001999536660886062,
      "loss": 0.9517,
      "step": 1895
    },
    {
      "epoch": 0.10901371277755464,
      "grad_norm": 0.2294921875,
      "learning_rate": 0.00019995056792401105,
      "loss": 1.0407,
      "step": 1900
    },
    {
      "epoch": 0.10930059096907453,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00019994736952876226,
      "loss": 1.0085,
      "step": 1905
    },
    {
      "epoch": 0.1095874691605944,
      "grad_norm": 0.234375,
      "learning_rate": 0.0001999440709060672,
      "loss": 0.914,
      "step": 1910
    },
    {
      "epoch": 0.1098743473521143,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001999406720592337,
      "loss": 0.9376,
      "step": 1915
    },
    {
      "epoch": 0.11016122554363417,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019993717299167014,
      "loss": 0.8981,
      "step": 1920
    },
    {
      "epoch": 0.11044810373515405,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019993357370688542,
      "loss": 1.0067,
      "step": 1925
    },
    {
      "epoch": 0.11073498192667393,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019992987420848891,
      "loss": 0.9061,
      "step": 1930
    },
    {
      "epoch": 0.11102186011819382,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00019992607450019048,
      "loss": 1.0008,
      "step": 1935
    },
    {
      "epoch": 0.11130873830971369,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019992217458580043,
      "loss": 0.9274,
      "step": 1940
    },
    {
      "epoch": 0.11159561650123358,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00019991817446922964,
      "loss": 1.0122,
      "step": 1945
    },
    {
      "epoch": 0.11188249469275345,
      "grad_norm": 0.234375,
      "learning_rate": 0.00019991407415448947,
      "loss": 0.9451,
      "step": 1950
    },
    {
      "epoch": 0.11216937288427334,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001999098736456917,
      "loss": 0.9165,
      "step": 1955
    },
    {
      "epoch": 0.11245625107579321,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019990557294704856,
      "loss": 0.9794,
      "step": 1960
    },
    {
      "epoch": 0.1127431292673131,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019990117206287287,
      "loss": 0.9801,
      "step": 1965
    },
    {
      "epoch": 0.11303000745883297,
      "grad_norm": 0.25,
      "learning_rate": 0.0001998966709975778,
      "loss": 0.9645,
      "step": 1970
    },
    {
      "epoch": 0.11331688565035286,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00019989206975567708,
      "loss": 0.9787,
      "step": 1975
    },
    {
      "epoch": 0.11360376384187273,
      "grad_norm": 0.25,
      "learning_rate": 0.0001998873683417848,
      "loss": 0.9984,
      "step": 1980
    },
    {
      "epoch": 0.11389064203339262,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019988256676061554,
      "loss": 0.995,
      "step": 1985
    },
    {
      "epoch": 0.1141775202249125,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00019987766501698438,
      "loss": 0.9607,
      "step": 1990
    },
    {
      "epoch": 0.11446439841643238,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019987266311580676,
      "loss": 1.0015,
      "step": 1995
    },
    {
      "epoch": 0.11475127660795226,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00019986756106209864,
      "loss": 0.9125,
      "step": 2000
    },
    {
      "epoch": 0.11503815479947214,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001998623588609763,
      "loss": 0.9281,
      "step": 2005
    },
    {
      "epoch": 0.11532503299099202,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001998570565176566,
      "loss": 1.0259,
      "step": 2010
    },
    {
      "epoch": 0.1156119111825119,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001998516540374567,
      "loss": 1.003,
      "step": 2015
    },
    {
      "epoch": 0.11589878937403178,
      "grad_norm": 0.234375,
      "learning_rate": 0.00019984615142579424,
      "loss": 0.9298,
      "step": 2020
    },
    {
      "epoch": 0.11618566756555167,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019984054868818724,
      "loss": 0.9373,
      "step": 2025
    },
    {
      "epoch": 0.11647254575707154,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.0001998348458302541,
      "loss": 0.8953,
      "step": 2030
    },
    {
      "epoch": 0.11675942394859143,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001998290428577137,
      "loss": 0.9788,
      "step": 2035
    },
    {
      "epoch": 0.1170463021401113,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019982313977638528,
      "loss": 0.9672,
      "step": 2040
    },
    {
      "epoch": 0.11733318033163119,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00019981713659218846,
      "loss": 0.8816,
      "step": 2045
    },
    {
      "epoch": 0.11762005852315106,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019981103331114322,
      "loss": 0.9887,
      "step": 2050
    },
    {
      "epoch": 0.11790693671467095,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00019980482993936995,
      "loss": 1.0521,
      "step": 2055
    },
    {
      "epoch": 0.11819381490619082,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00019979852648308947,
      "loss": 0.9699,
      "step": 2060
    },
    {
      "epoch": 0.11848069309771071,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001997921229486228,
      "loss": 0.9402,
      "step": 2065
    },
    {
      "epoch": 0.11876757128923059,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019978561934239149,
      "loss": 0.9328,
      "step": 2070
    },
    {
      "epoch": 0.11905444948075047,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001997790156709173,
      "loss": 0.9643,
      "step": 2075
    },
    {
      "epoch": 0.11934132767227035,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019977231194082248,
      "loss": 1.0274,
      "step": 2080
    },
    {
      "epoch": 0.11962820586379024,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00019976550815882952,
      "loss": 0.9102,
      "step": 2085
    },
    {
      "epoch": 0.11991508405531011,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00019975860433176128,
      "loss": 0.918,
      "step": 2090
    },
    {
      "epoch": 0.12020196224683,
      "grad_norm": 0.248046875,
      "learning_rate": 0.0001997516004665409,
      "loss": 0.9157,
      "step": 2095
    },
    {
      "epoch": 0.12048884043834987,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001997444965701919,
      "loss": 0.9264,
      "step": 2100
    },
    {
      "epoch": 0.12077571862986976,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019973729264983808,
      "loss": 0.9099,
      "step": 2105
    },
    {
      "epoch": 0.12106259682138963,
      "grad_norm": 0.31640625,
      "learning_rate": 0.00019972998871270353,
      "loss": 0.9819,
      "step": 2110
    },
    {
      "epoch": 0.12134947501290952,
      "grad_norm": 0.25,
      "learning_rate": 0.0001997225847661127,
      "loss": 0.9286,
      "step": 2115
    },
    {
      "epoch": 0.12163635320442939,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019971508081749023,
      "loss": 1.0012,
      "step": 2120
    },
    {
      "epoch": 0.12192323139594928,
      "grad_norm": 0.244140625,
      "learning_rate": 0.0001997074768743611,
      "loss": 0.9616,
      "step": 2125
    },
    {
      "epoch": 0.12221010958746915,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019969977294435057,
      "loss": 0.9717,
      "step": 2130
    },
    {
      "epoch": 0.12249698777898904,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001996919690351842,
      "loss": 0.9775,
      "step": 2135
    },
    {
      "epoch": 0.12278386597050892,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001996840651546877,
      "loss": 0.9049,
      "step": 2140
    },
    {
      "epoch": 0.1230707441620288,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00019967606131078718,
      "loss": 0.9141,
      "step": 2145
    },
    {
      "epoch": 0.12335762235354868,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019966795751150885,
      "loss": 1.0031,
      "step": 2150
    },
    {
      "epoch": 0.12364450054506856,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019965975376497918,
      "loss": 0.9594,
      "step": 2155
    },
    {
      "epoch": 0.12393137873658844,
      "grad_norm": 0.24609375,
      "learning_rate": 0.000199651450079425,
      "loss": 0.9046,
      "step": 2160
    },
    {
      "epoch": 0.12421825692810833,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00019964304646317323,
      "loss": 0.961,
      "step": 2165
    },
    {
      "epoch": 0.1245051351196282,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019963454292465103,
      "loss": 0.9989,
      "step": 2170
    },
    {
      "epoch": 0.12479201331114809,
      "grad_norm": 0.2431640625,
      "learning_rate": 0.00019962593947238578,
      "loss": 0.9993,
      "step": 2175
    },
    {
      "epoch": 0.12507889150266796,
      "grad_norm": 0.259765625,
      "learning_rate": 0.000199617236115005,
      "loss": 0.9633,
      "step": 2180
    },
    {
      "epoch": 0.12536576969418783,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019960843286123648,
      "loss": 0.9134,
      "step": 2185
    },
    {
      "epoch": 0.12565264788570774,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001995995297199081,
      "loss": 1.0059,
      "step": 2190
    },
    {
      "epoch": 0.1259395260772276,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.000199590526699948,
      "loss": 1.0153,
      "step": 2195
    },
    {
      "epoch": 0.12622640426874748,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.0001995814238103844,
      "loss": 0.9359,
      "step": 2200
    },
    {
      "epoch": 0.12651328246026736,
      "grad_norm": 0.25,
      "learning_rate": 0.00019957222106034572,
      "loss": 0.9862,
      "step": 2205
    },
    {
      "epoch": 0.12680016065178726,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.00019956291845906046,
      "loss": 0.9502,
      "step": 2210
    },
    {
      "epoch": 0.12708703884330713,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019955351601585731,
      "loss": 0.979,
      "step": 2215
    },
    {
      "epoch": 0.127373917034827,
      "grad_norm": 0.23828125,
      "learning_rate": 0.0001995440137401651,
      "loss": 0.8981,
      "step": 2220
    },
    {
      "epoch": 0.12766079522634688,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00019953441164151264,
      "loss": 1.0073,
      "step": 2225
    },
    {
      "epoch": 0.12794767341786678,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00019952470972952902,
      "loss": 0.9045,
      "step": 2230
    },
    {
      "epoch": 0.12823455160938665,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.0001995149080139433,
      "loss": 0.9948,
      "step": 2235
    },
    {
      "epoch": 0.12852142980090653,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.0001995050065045847,
      "loss": 0.9548,
      "step": 2240
    },
    {
      "epoch": 0.12880830799242643,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019949500521138243,
      "loss": 0.9024,
      "step": 2245
    },
    {
      "epoch": 0.1290951861839463,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00019948490414436584,
      "loss": 1.0311,
      "step": 2250
    },
    {
      "epoch": 0.12938206437546618,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00019947470331366427,
      "loss": 0.9478,
      "step": 2255
    },
    {
      "epoch": 0.12966894256698605,
      "grad_norm": 0.25,
      "learning_rate": 0.00019946440272950716,
      "loss": 0.8834,
      "step": 2260
    },
    {
      "epoch": 0.12995582075850595,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019945400240222396,
      "loss": 1.0765,
      "step": 2265
    },
    {
      "epoch": 0.13024269895002583,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019944350234224416,
      "loss": 0.9275,
      "step": 2270
    },
    {
      "epoch": 0.1305295771415457,
      "grad_norm": 0.25,
      "learning_rate": 0.0001994329025600972,
      "loss": 0.938,
      "step": 2275
    },
    {
      "epoch": 0.13081645533306557,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019942220306641258,
      "loss": 1.0348,
      "step": 2280
    },
    {
      "epoch": 0.13110333352458547,
      "grad_norm": 0.24609375,
      "learning_rate": 0.0001994114038719198,
      "loss": 0.9627,
      "step": 2285
    },
    {
      "epoch": 0.13139021171610535,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019940050498744828,
      "loss": 0.9749,
      "step": 2290
    },
    {
      "epoch": 0.13167708990762522,
      "grad_norm": 0.2431640625,
      "learning_rate": 0.00019938950642392746,
      "loss": 0.8878,
      "step": 2295
    },
    {
      "epoch": 0.1319639680991451,
      "grad_norm": 0.28125,
      "learning_rate": 0.00019937840819238677,
      "loss": 1.0061,
      "step": 2300
    },
    {
      "epoch": 0.132250846290665,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019936721030395547,
      "loss": 0.9872,
      "step": 2305
    },
    {
      "epoch": 0.13253772448218487,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019935591276986286,
      "loss": 1.0265,
      "step": 2310
    },
    {
      "epoch": 0.13282460267370474,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00019934451560143815,
      "loss": 1.0217,
      "step": 2315
    },
    {
      "epoch": 0.13311148086522462,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001993330188101104,
      "loss": 0.9342,
      "step": 2320
    },
    {
      "epoch": 0.13339835905674452,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019932142240740866,
      "loss": 0.9407,
      "step": 2325
    },
    {
      "epoch": 0.1336852372482644,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001993097264049618,
      "loss": 0.9405,
      "step": 2330
    },
    {
      "epoch": 0.13397211543978427,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001992979308144986,
      "loss": 0.971,
      "step": 2335
    },
    {
      "epoch": 0.13425899363130414,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00019928603564784773,
      "loss": 1.0423,
      "step": 2340
    },
    {
      "epoch": 0.13454587182282404,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001992740409169377,
      "loss": 1.0122,
      "step": 2345
    },
    {
      "epoch": 0.13483275001434392,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019926194663379677,
      "loss": 0.9608,
      "step": 2350
    },
    {
      "epoch": 0.1351196282058638,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00019924975281055324,
      "loss": 0.905,
      "step": 2355
    },
    {
      "epoch": 0.13540650639738366,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019923745945943502,
      "loss": 0.9108,
      "step": 2360
    },
    {
      "epoch": 0.13569338458890357,
      "grad_norm": 0.2412109375,
      "learning_rate": 0.0001992250665927699,
      "loss": 0.9587,
      "step": 2365
    },
    {
      "epoch": 0.13598026278042344,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001992125742229855,
      "loss": 0.9459,
      "step": 2370
    },
    {
      "epoch": 0.1362671409719433,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019919998236260923,
      "loss": 1.0325,
      "step": 2375
    },
    {
      "epoch": 0.1365540191634632,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019918729102426816,
      "loss": 1.0031,
      "step": 2380
    },
    {
      "epoch": 0.1368408973549831,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019917450022068927,
      "loss": 0.9159,
      "step": 2385
    },
    {
      "epoch": 0.13712777554650296,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019916160996469914,
      "loss": 0.9884,
      "step": 2390
    },
    {
      "epoch": 0.13741465373802284,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001991486202692242,
      "loss": 0.9934,
      "step": 2395
    },
    {
      "epoch": 0.1377015319295427,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019913553114729053,
      "loss": 0.9287,
      "step": 2400
    },
    {
      "epoch": 0.1379884101210626,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001991223426120239,
      "loss": 0.955,
      "step": 2405
    },
    {
      "epoch": 0.13827528831258248,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019910905467664987,
      "loss": 0.9791,
      "step": 2410
    },
    {
      "epoch": 0.13856216650410236,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019909566735449354,
      "loss": 1.0237,
      "step": 2415
    },
    {
      "epoch": 0.13884904469562223,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00019908218065897978,
      "loss": 0.9546,
      "step": 2420
    },
    {
      "epoch": 0.13913592288714213,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00019906859460363307,
      "loss": 0.8988,
      "step": 2425
    },
    {
      "epoch": 0.139422801078662,
      "grad_norm": 0.25,
      "learning_rate": 0.00019905490920207755,
      "loss": 0.9675,
      "step": 2430
    },
    {
      "epoch": 0.13970967927018188,
      "grad_norm": 0.232421875,
      "learning_rate": 0.00019904112446803699,
      "loss": 0.9773,
      "step": 2435
    },
    {
      "epoch": 0.13999655746170175,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001990272404153347,
      "loss": 1.053,
      "step": 2440
    },
    {
      "epoch": 0.14028343565322166,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019901325705789366,
      "loss": 0.9634,
      "step": 2445
    },
    {
      "epoch": 0.14057031384474153,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001989991744097364,
      "loss": 1.0375,
      "step": 2450
    },
    {
      "epoch": 0.1408571920362614,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001989849924849851,
      "loss": 1.0092,
      "step": 2455
    },
    {
      "epoch": 0.14114407022778128,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019897071129786132,
      "loss": 1.0077,
      "step": 2460
    },
    {
      "epoch": 0.14143094841930118,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019895633086268637,
      "loss": 1.0083,
      "step": 2465
    },
    {
      "epoch": 0.14171782661082105,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001989418511938809,
      "loss": 0.9727,
      "step": 2470
    },
    {
      "epoch": 0.14200470480234093,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019892727230596519,
      "loss": 1.0221,
      "step": 2475
    },
    {
      "epoch": 0.1422915829938608,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019891259421355895,
      "loss": 0.9697,
      "step": 2480
    },
    {
      "epoch": 0.1425784611853807,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.0001988978169313815,
      "loss": 0.9837,
      "step": 2485
    },
    {
      "epoch": 0.14286533937690057,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00019888294047425143,
      "loss": 1.0065,
      "step": 2490
    },
    {
      "epoch": 0.14315221756842045,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019886796485708692,
      "loss": 0.9125,
      "step": 2495
    },
    {
      "epoch": 0.14343909575994032,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019885289009490556,
      "loss": 1.0038,
      "step": 2500
    },
    {
      "epoch": 0.14372597395146022,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001988377162028243,
      "loss": 0.926,
      "step": 2505
    },
    {
      "epoch": 0.1440128521429801,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019882244319605966,
      "loss": 0.9902,
      "step": 2510
    },
    {
      "epoch": 0.14429973033449997,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019880707108992738,
      "loss": 1.0494,
      "step": 2515
    },
    {
      "epoch": 0.14458660852601984,
      "grad_norm": 0.2890625,
      "learning_rate": 0.0001987915998998426,
      "loss": 0.9435,
      "step": 2520
    },
    {
      "epoch": 0.14487348671753975,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019877602964131995,
      "loss": 0.9293,
      "step": 2525
    },
    {
      "epoch": 0.14516036490905962,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001987603603299733,
      "loss": 0.9658,
      "step": 2530
    },
    {
      "epoch": 0.1454472431005795,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019874459198151583,
      "loss": 0.9757,
      "step": 2535
    },
    {
      "epoch": 0.14573412129209937,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.0001987287246117601,
      "loss": 1.0029,
      "step": 2540
    },
    {
      "epoch": 0.14602099948361927,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019871275823661795,
      "loss": 0.9861,
      "step": 2545
    },
    {
      "epoch": 0.14630787767513914,
      "grad_norm": 0.25,
      "learning_rate": 0.00019869669287210046,
      "loss": 0.9809,
      "step": 2550
    },
    {
      "epoch": 0.14659475586665902,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019868052853431808,
      "loss": 0.9763,
      "step": 2555
    },
    {
      "epoch": 0.1468816340581789,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019866426523948037,
      "loss": 0.9973,
      "step": 2560
    },
    {
      "epoch": 0.1471685122496988,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019864790300389625,
      "loss": 1.0361,
      "step": 2565
    },
    {
      "epoch": 0.14745539044121866,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00019863144184397376,
      "loss": 0.9603,
      "step": 2570
    },
    {
      "epoch": 0.14774226863273854,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001986148817762203,
      "loss": 0.9825,
      "step": 2575
    },
    {
      "epoch": 0.1480291468242584,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001985982228172422,
      "loss": 1.0327,
      "step": 2580
    },
    {
      "epoch": 0.1483160250157783,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001985814649837452,
      "loss": 1.0019,
      "step": 2585
    },
    {
      "epoch": 0.1486029032072982,
      "grad_norm": 0.255859375,
      "learning_rate": 0.000198564608292534,
      "loss": 0.9411,
      "step": 2590
    },
    {
      "epoch": 0.14888978139881806,
      "grad_norm": 0.25,
      "learning_rate": 0.00019854765276051264,
      "loss": 0.9216,
      "step": 2595
    },
    {
      "epoch": 0.14917665959033793,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019853059840468408,
      "loss": 0.9562,
      "step": 2600
    },
    {
      "epoch": 0.14946353778185784,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001985134452421505,
      "loss": 1.0468,
      "step": 2605
    },
    {
      "epoch": 0.1497504159733777,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019849619329011315,
      "loss": 1.0086,
      "step": 2610
    },
    {
      "epoch": 0.15003729416489758,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001984788425658723,
      "loss": 1.0496,
      "step": 2615
    },
    {
      "epoch": 0.15032417235641746,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00019846139308682729,
      "loss": 1.0036,
      "step": 2620
    },
    {
      "epoch": 0.15061105054793736,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001984438448704765,
      "loss": 0.9403,
      "step": 2625
    },
    {
      "epoch": 0.15089792873945723,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001984261979344173,
      "loss": 0.9519,
      "step": 2630
    },
    {
      "epoch": 0.1511848069309771,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00019840845229634612,
      "loss": 0.9608,
      "step": 2635
    },
    {
      "epoch": 0.15147168512249698,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019839060797405833,
      "loss": 1.0074,
      "step": 2640
    },
    {
      "epoch": 0.15175856331401688,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001983726649854482,
      "loss": 0.9188,
      "step": 2645
    },
    {
      "epoch": 0.15204544150553675,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001983546233485091,
      "loss": 1.0233,
      "step": 2650
    },
    {
      "epoch": 0.15233231969705663,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001983364830813331,
      "loss": 1.0333,
      "step": 2655
    },
    {
      "epoch": 0.1526191978885765,
      "grad_norm": 0.25,
      "learning_rate": 0.00019831824420211137,
      "loss": 0.993,
      "step": 2660
    },
    {
      "epoch": 0.1529060760800964,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019829990672913387,
      "loss": 0.9822,
      "step": 2665
    },
    {
      "epoch": 0.15319295427161628,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001982814706807895,
      "loss": 0.962,
      "step": 2670
    },
    {
      "epoch": 0.15347983246313615,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019826293607556593,
      "loss": 0.9029,
      "step": 2675
    },
    {
      "epoch": 0.15376671065465602,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019824430293204973,
      "loss": 0.922,
      "step": 2680
    },
    {
      "epoch": 0.15405358884617593,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019822557126892627,
      "loss": 0.9965,
      "step": 2685
    },
    {
      "epoch": 0.1543404670376958,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019820674110497966,
      "loss": 0.927,
      "step": 2690
    },
    {
      "epoch": 0.15462734522921567,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019818781245909285,
      "loss": 0.9637,
      "step": 2695
    },
    {
      "epoch": 0.15491422342073555,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019816878535024754,
      "loss": 1.0141,
      "step": 2700
    },
    {
      "epoch": 0.15520110161225545,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019814965979752413,
      "loss": 1.0148,
      "step": 2705
    },
    {
      "epoch": 0.15548797980377532,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001981304358201018,
      "loss": 1.0374,
      "step": 2710
    },
    {
      "epoch": 0.1557748579952952,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019811111343725842,
      "loss": 0.9639,
      "step": 2715
    },
    {
      "epoch": 0.15606173618681507,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019809169266837043,
      "loss": 0.9544,
      "step": 2720
    },
    {
      "epoch": 0.15634861437833497,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001980721735329131,
      "loss": 0.9553,
      "step": 2725
    },
    {
      "epoch": 0.15663549256985485,
      "grad_norm": 0.2392578125,
      "learning_rate": 0.0001980525560504602,
      "loss": 0.9162,
      "step": 2730
    },
    {
      "epoch": 0.15692237076137472,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019803284024068427,
      "loss": 0.997,
      "step": 2735
    },
    {
      "epoch": 0.1572092489528946,
      "grad_norm": 0.232421875,
      "learning_rate": 0.00019801302612335628,
      "loss": 0.9371,
      "step": 2740
    },
    {
      "epoch": 0.1574961271444145,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019799311371834595,
      "loss": 0.9677,
      "step": 2745
    },
    {
      "epoch": 0.15778300533593437,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019797310304562143,
      "loss": 0.9503,
      "step": 2750
    },
    {
      "epoch": 0.15806988352745424,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019795299412524945,
      "loss": 1.0382,
      "step": 2755
    },
    {
      "epoch": 0.15835676171897412,
      "grad_norm": 0.25,
      "learning_rate": 0.00019793278697739533,
      "loss": 0.9606,
      "step": 2760
    },
    {
      "epoch": 0.15864363991049402,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019791248162232285,
      "loss": 0.9368,
      "step": 2765
    },
    {
      "epoch": 0.1589305181020139,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019789207808039425,
      "loss": 0.9581,
      "step": 2770
    },
    {
      "epoch": 0.15921739629353376,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001978715763720702,
      "loss": 1.0179,
      "step": 2775
    },
    {
      "epoch": 0.15950427448505364,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019785097651790992,
      "loss": 0.9556,
      "step": 2780
    },
    {
      "epoch": 0.15979115267657354,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019783027853857097,
      "loss": 1.0694,
      "step": 2785
    },
    {
      "epoch": 0.1600780308680934,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019780948245480933,
      "loss": 1.0073,
      "step": 2790
    },
    {
      "epoch": 0.1603649090596133,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019778858828747934,
      "loss": 0.9729,
      "step": 2795
    },
    {
      "epoch": 0.16065178725113316,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00019776759605753377,
      "loss": 1.0349,
      "step": 2800
    },
    {
      "epoch": 0.16093866544265306,
      "grad_norm": 0.248046875,
      "learning_rate": 0.0001977465057860236,
      "loss": 0.9904,
      "step": 2805
    },
    {
      "epoch": 0.16122554363417294,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00019772531749409828,
      "loss": 0.9545,
      "step": 2810
    },
    {
      "epoch": 0.1615124218256928,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001977040312030054,
      "loss": 0.9768,
      "step": 2815
    },
    {
      "epoch": 0.16179930001721268,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019768264693409098,
      "loss": 0.9905,
      "step": 2820
    },
    {
      "epoch": 0.16208617820873258,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019766116470879913,
      "loss": 0.9736,
      "step": 2825
    },
    {
      "epoch": 0.16237305640025246,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019763958454867235,
      "loss": 1.0114,
      "step": 2830
    },
    {
      "epoch": 0.16265993459177233,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019761790647535124,
      "loss": 0.9847,
      "step": 2835
    },
    {
      "epoch": 0.1629468127832922,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00019759613051057462,
      "loss": 0.959,
      "step": 2840
    },
    {
      "epoch": 0.1632336909748121,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019757425667617945,
      "loss": 0.93,
      "step": 2845
    },
    {
      "epoch": 0.16352056916633198,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019755228499410092,
      "loss": 1.0175,
      "step": 2850
    },
    {
      "epoch": 0.16380744735785185,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00019753021548637222,
      "loss": 0.9568,
      "step": 2855
    },
    {
      "epoch": 0.16409432554937173,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019750804817512477,
      "loss": 0.9666,
      "step": 2860
    },
    {
      "epoch": 0.16438120374089163,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001974857830825879,
      "loss": 0.9816,
      "step": 2865
    },
    {
      "epoch": 0.1646680819324115,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001974634202310892,
      "loss": 0.9241,
      "step": 2870
    },
    {
      "epoch": 0.16495496012393138,
      "grad_norm": 0.244140625,
      "learning_rate": 0.00019744095964305413,
      "loss": 0.9971,
      "step": 2875
    },
    {
      "epoch": 0.16524183831545125,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019741840134100623,
      "loss": 0.9578,
      "step": 2880
    },
    {
      "epoch": 0.16552871650697115,
      "grad_norm": 0.26171875,
      "learning_rate": 0.000197395745347567,
      "loss": 0.9817,
      "step": 2885
    },
    {
      "epoch": 0.16581559469849103,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019737299168545597,
      "loss": 0.9887,
      "step": 2890
    },
    {
      "epoch": 0.1661024728900109,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019735014037749053,
      "loss": 1.0605,
      "step": 2895
    },
    {
      "epoch": 0.16638935108153077,
      "grad_norm": 0.25,
      "learning_rate": 0.000197327191446586,
      "loss": 0.9337,
      "step": 2900
    },
    {
      "epoch": 0.16667622927305067,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019730414491575564,
      "loss": 0.876,
      "step": 2905
    },
    {
      "epoch": 0.16696310746457055,
      "grad_norm": 0.296875,
      "learning_rate": 0.00019728100080811057,
      "loss": 0.9747,
      "step": 2910
    },
    {
      "epoch": 0.16724998565609042,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00019725775914685977,
      "loss": 0.9694,
      "step": 2915
    },
    {
      "epoch": 0.1675368638476103,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019723441995531,
      "loss": 0.955,
      "step": 2920
    },
    {
      "epoch": 0.1678237420391302,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019721098325686584,
      "loss": 0.9547,
      "step": 2925
    },
    {
      "epoch": 0.16811062023065007,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001971874490750297,
      "loss": 0.9627,
      "step": 2930
    },
    {
      "epoch": 0.16839749842216994,
      "grad_norm": 0.23828125,
      "learning_rate": 0.0001971638174334017,
      "loss": 1.0038,
      "step": 2935
    },
    {
      "epoch": 0.16868437661368982,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001971400883556797,
      "loss": 0.9428,
      "step": 2940
    },
    {
      "epoch": 0.16897125480520972,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019711626186565929,
      "loss": 1.0165,
      "step": 2945
    },
    {
      "epoch": 0.1692581329967296,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001970923379872337,
      "loss": 0.9777,
      "step": 2950
    },
    {
      "epoch": 0.16954501118824947,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019706831674439382,
      "loss": 1.0453,
      "step": 2955
    },
    {
      "epoch": 0.16983188937976934,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019704419816122826,
      "loss": 0.9889,
      "step": 2960
    },
    {
      "epoch": 0.17011876757128924,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019701998226192319,
      "loss": 0.9816,
      "step": 2965
    },
    {
      "epoch": 0.17040564576280912,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019699566907076236,
      "loss": 0.9419,
      "step": 2970
    },
    {
      "epoch": 0.170692523954329,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019697125861212707,
      "loss": 0.9346,
      "step": 2975
    },
    {
      "epoch": 0.17097940214584886,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019694675091049617,
      "loss": 0.9696,
      "step": 2980
    },
    {
      "epoch": 0.17126628033736876,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001969221459904461,
      "loss": 0.9879,
      "step": 2985
    },
    {
      "epoch": 0.17155315852888864,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001968974438766507,
      "loss": 1.0363,
      "step": 2990
    },
    {
      "epoch": 0.1718400367204085,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001968726445938813,
      "loss": 0.9803,
      "step": 2995
    },
    {
      "epoch": 0.17212691491192839,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019684774816700664,
      "loss": 0.9079,
      "step": 3000
    },
    {
      "epoch": 0.1724137931034483,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019682275462099298,
      "loss": 0.9684,
      "step": 3005
    },
    {
      "epoch": 0.17270067129496816,
      "grad_norm": 0.25,
      "learning_rate": 0.00019679766398090383,
      "loss": 1.009,
      "step": 3010
    },
    {
      "epoch": 0.17298754948648803,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019677247627190026,
      "loss": 0.9769,
      "step": 3015
    },
    {
      "epoch": 0.1732744276780079,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019674719151924043,
      "loss": 1.0395,
      "step": 3020
    },
    {
      "epoch": 0.1735613058695278,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019672180974828,
      "loss": 1.0476,
      "step": 3025
    },
    {
      "epoch": 0.17384818406104768,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001966963309844719,
      "loss": 0.9697,
      "step": 3030
    },
    {
      "epoch": 0.17413506225256756,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019667075525336622,
      "loss": 0.9243,
      "step": 3035
    },
    {
      "epoch": 0.17442194044408743,
      "grad_norm": 0.306640625,
      "learning_rate": 0.00019664508258061044,
      "loss": 1.0036,
      "step": 3040
    },
    {
      "epoch": 0.17470881863560733,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001966193129919491,
      "loss": 0.955,
      "step": 3045
    },
    {
      "epoch": 0.1749956968271272,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019659344651322405,
      "loss": 0.9717,
      "step": 3050
    },
    {
      "epoch": 0.17528257501864708,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019656748317037424,
      "loss": 0.9715,
      "step": 3055
    },
    {
      "epoch": 0.17556945321016695,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00019654142298943574,
      "loss": 0.9769,
      "step": 3060
    },
    {
      "epoch": 0.17585633140168686,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019651526599654182,
      "loss": 0.9292,
      "step": 3065
    },
    {
      "epoch": 0.17614320959320673,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001964890122179227,
      "loss": 0.9528,
      "step": 3070
    },
    {
      "epoch": 0.1764300877847266,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019646266167990578,
      "loss": 0.9587,
      "step": 3075
    },
    {
      "epoch": 0.17671696597624648,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019643621440891543,
      "loss": 0.927,
      "step": 3080
    },
    {
      "epoch": 0.17700384416776638,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019640967043147302,
      "loss": 0.9888,
      "step": 3085
    },
    {
      "epoch": 0.17729072235928625,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001963830297741969,
      "loss": 0.9385,
      "step": 3090
    },
    {
      "epoch": 0.17757760055080613,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001963562924638024,
      "loss": 1.009,
      "step": 3095
    },
    {
      "epoch": 0.177864478742326,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00019632945852710173,
      "loss": 0.9866,
      "step": 3100
    },
    {
      "epoch": 0.1781513569338459,
      "grad_norm": 0.28125,
      "learning_rate": 0.00019630252799100409,
      "loss": 0.9238,
      "step": 3105
    },
    {
      "epoch": 0.17843823512536577,
      "grad_norm": 0.302734375,
      "learning_rate": 0.0001962755008825154,
      "loss": 0.9603,
      "step": 3110
    },
    {
      "epoch": 0.17872511331688565,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019624837722873856,
      "loss": 0.9602,
      "step": 3115
    },
    {
      "epoch": 0.17901199150840552,
      "grad_norm": 0.28125,
      "learning_rate": 0.00019622115705687318,
      "loss": 0.9651,
      "step": 3120
    },
    {
      "epoch": 0.17929886969992542,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00019619384039421575,
      "loss": 0.98,
      "step": 3125
    },
    {
      "epoch": 0.1795857478914453,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019616642726815947,
      "loss": 0.896,
      "step": 3130
    },
    {
      "epoch": 0.17987262608296517,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001961389177061943,
      "loss": 0.9217,
      "step": 3135
    },
    {
      "epoch": 0.18015950427448504,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019611131173590687,
      "loss": 0.9536,
      "step": 3140
    },
    {
      "epoch": 0.18044638246600495,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001960836093849805,
      "loss": 0.9958,
      "step": 3145
    },
    {
      "epoch": 0.18073326065752482,
      "grad_norm": 0.435546875,
      "learning_rate": 0.00019605581068119518,
      "loss": 0.9922,
      "step": 3150
    },
    {
      "epoch": 0.1810201388490447,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019602791565242754,
      "loss": 0.9009,
      "step": 3155
    },
    {
      "epoch": 0.18130701704056457,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019599992432665073,
      "loss": 0.9346,
      "step": 3160
    },
    {
      "epoch": 0.18159389523208447,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019597183673193452,
      "loss": 0.9644,
      "step": 3165
    },
    {
      "epoch": 0.18188077342360434,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019594365289644529,
      "loss": 0.8938,
      "step": 3170
    },
    {
      "epoch": 0.18216765161512422,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019591537284844573,
      "loss": 1.0112,
      "step": 3175
    },
    {
      "epoch": 0.1824545298066441,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019588699661629523,
      "loss": 0.9286,
      "step": 3180
    },
    {
      "epoch": 0.182741407998164,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001958585242284495,
      "loss": 0.9738,
      "step": 3185
    },
    {
      "epoch": 0.18302828618968386,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019582995571346072,
      "loss": 0.9947,
      "step": 3190
    },
    {
      "epoch": 0.18331516438120374,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001958012910999775,
      "loss": 0.9578,
      "step": 3195
    },
    {
      "epoch": 0.1836020425727236,
      "grad_norm": 0.3515625,
      "learning_rate": 0.0001957725304167447,
      "loss": 1.0321,
      "step": 3200
    },
    {
      "epoch": 0.1838889207642435,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019574367369260364,
      "loss": 0.9091,
      "step": 3205
    },
    {
      "epoch": 0.1841757989557634,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019571472095649192,
      "loss": 0.9775,
      "step": 3210
    },
    {
      "epoch": 0.18446267714728326,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019568567223744339,
      "loss": 1.0474,
      "step": 3215
    },
    {
      "epoch": 0.18474955533880313,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019565652756458818,
      "loss": 0.9921,
      "step": 3220
    },
    {
      "epoch": 0.18503643353032304,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00019562728696715263,
      "loss": 0.9533,
      "step": 3225
    },
    {
      "epoch": 0.1853233117218429,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019559795047445927,
      "loss": 0.9658,
      "step": 3230
    },
    {
      "epoch": 0.18561018991336278,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001955685181159268,
      "loss": 0.9816,
      "step": 3235
    },
    {
      "epoch": 0.18589706810488266,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001955389899210701,
      "loss": 0.9574,
      "step": 3240
    },
    {
      "epoch": 0.18618394629640256,
      "grad_norm": 0.2333984375,
      "learning_rate": 0.00019550936591950006,
      "loss": 0.9079,
      "step": 3245
    },
    {
      "epoch": 0.18647082448792243,
      "grad_norm": 0.29296875,
      "learning_rate": 0.0001954796461409237,
      "loss": 0.9293,
      "step": 3250
    },
    {
      "epoch": 0.1867577026794423,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001954498306151441,
      "loss": 0.944,
      "step": 3255
    },
    {
      "epoch": 0.18704458087096218,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001954199193720603,
      "loss": 1.0102,
      "step": 3260
    },
    {
      "epoch": 0.18733145906248208,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019538991244166738,
      "loss": 0.9339,
      "step": 3265
    },
    {
      "epoch": 0.18761833725400195,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019535980985405639,
      "loss": 0.9956,
      "step": 3270
    },
    {
      "epoch": 0.18790521544552183,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019532961163941422,
      "loss": 0.9283,
      "step": 3275
    },
    {
      "epoch": 0.1881920936370417,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00019529931782802376,
      "loss": 1.0013,
      "step": 3280
    },
    {
      "epoch": 0.1884789718285616,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019526892845026365,
      "loss": 1.0276,
      "step": 3285
    },
    {
      "epoch": 0.18876585002008148,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00019523844353660849,
      "loss": 0.9785,
      "step": 3290
    },
    {
      "epoch": 0.18905272821160135,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001952078631176286,
      "loss": 0.9132,
      "step": 3295
    },
    {
      "epoch": 0.18933960640312122,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019517718722399002,
      "loss": 0.9568,
      "step": 3300
    },
    {
      "epoch": 0.18962648459464113,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019514641588645471,
      "loss": 0.9375,
      "step": 3305
    },
    {
      "epoch": 0.189913362786161,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001951155491358802,
      "loss": 0.944,
      "step": 3310
    },
    {
      "epoch": 0.19020024097768087,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00019508458700321973,
      "loss": 1.0012,
      "step": 3315
    },
    {
      "epoch": 0.19048711916920075,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019505352951952221,
      "loss": 0.9707,
      "step": 3320
    },
    {
      "epoch": 0.19077399736072065,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019502237671593212,
      "loss": 0.9405,
      "step": 3325
    },
    {
      "epoch": 0.19106087555224052,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001949911286236896,
      "loss": 0.8607,
      "step": 3330
    },
    {
      "epoch": 0.1913477537437604,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019495978527413028,
      "loss": 0.9797,
      "step": 3335
    },
    {
      "epoch": 0.19163463193528027,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019492834669868536,
      "loss": 0.9488,
      "step": 3340
    },
    {
      "epoch": 0.19192151012680017,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00019489681292888148,
      "loss": 1.012,
      "step": 3345
    },
    {
      "epoch": 0.19220838831832004,
      "grad_norm": 0.25,
      "learning_rate": 0.00019486518399634083,
      "loss": 0.9622,
      "step": 3350
    },
    {
      "epoch": 0.19249526650983992,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019483345993278093,
      "loss": 0.9597,
      "step": 3355
    },
    {
      "epoch": 0.1927821447013598,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019480164077001475,
      "loss": 1.0033,
      "step": 3360
    },
    {
      "epoch": 0.1930690228928797,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019476972653995062,
      "loss": 0.9518,
      "step": 3365
    },
    {
      "epoch": 0.19335590108439957,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00019473771727459224,
      "loss": 1.0118,
      "step": 3370
    },
    {
      "epoch": 0.19364277927591944,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019470561300603852,
      "loss": 0.9938,
      "step": 3375
    },
    {
      "epoch": 0.19392965746743931,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019467341376648372,
      "loss": 0.9653,
      "step": 3380
    },
    {
      "epoch": 0.19421653565895922,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00019464111958821727,
      "loss": 0.9561,
      "step": 3385
    },
    {
      "epoch": 0.1945034138504791,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001946087305036239,
      "loss": 1.0023,
      "step": 3390
    },
    {
      "epoch": 0.19479029204199896,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019457624654518343,
      "loss": 1.001,
      "step": 3395
    },
    {
      "epoch": 0.19507717023351884,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019454366774547083,
      "loss": 0.9172,
      "step": 3400
    },
    {
      "epoch": 0.19536404842503874,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019451099413715626,
      "loss": 0.9451,
      "step": 3405
    },
    {
      "epoch": 0.1956509266165586,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001944782257530048,
      "loss": 0.9521,
      "step": 3410
    },
    {
      "epoch": 0.1959378048080785,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019444536262587669,
      "loss": 0.9749,
      "step": 3415
    },
    {
      "epoch": 0.19622468299959836,
      "grad_norm": 0.345703125,
      "learning_rate": 0.00019441240478872718,
      "loss": 0.9371,
      "step": 3420
    },
    {
      "epoch": 0.19651156119111826,
      "grad_norm": 0.2890625,
      "learning_rate": 0.0001943793522746064,
      "loss": 0.9343,
      "step": 3425
    },
    {
      "epoch": 0.19679843938263814,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019434620511665958,
      "loss": 1.0255,
      "step": 3430
    },
    {
      "epoch": 0.197085317574158,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001943129633481267,
      "loss": 0.9707,
      "step": 3435
    },
    {
      "epoch": 0.19737219576567788,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00019427962700234268,
      "loss": 1.0431,
      "step": 3440
    },
    {
      "epoch": 0.19765907395719778,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019424619611273727,
      "loss": 0.9646,
      "step": 3445
    },
    {
      "epoch": 0.19794595214871766,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001942126707128351,
      "loss": 0.9781,
      "step": 3450
    },
    {
      "epoch": 0.19823283034023753,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019417905083625545,
      "loss": 1.0096,
      "step": 3455
    },
    {
      "epoch": 0.1985197085317574,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019414533651671242,
      "loss": 0.964,
      "step": 3460
    },
    {
      "epoch": 0.1988065867232773,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019411152778801486,
      "loss": 0.9573,
      "step": 3465
    },
    {
      "epoch": 0.19909346491479718,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019407762468406619,
      "loss": 0.9138,
      "step": 3470
    },
    {
      "epoch": 0.19938034310631705,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019404362723886452,
      "loss": 0.9735,
      "step": 3475
    },
    {
      "epoch": 0.19966722129783693,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019400953548650258,
      "loss": 0.9722,
      "step": 3480
    },
    {
      "epoch": 0.19995409948935683,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019397534946116762,
      "loss": 1.0176,
      "step": 3485
    },
    {
      "epoch": 0.2002409776808767,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019394106919714155,
      "loss": 0.9016,
      "step": 3490
    },
    {
      "epoch": 0.20052785587239658,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001939066947288006,
      "loss": 1.0353,
      "step": 3495
    },
    {
      "epoch": 0.20081473406391645,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001938722260906156,
      "loss": 0.9489,
      "step": 3500
    },
    {
      "epoch": 0.20110161225543635,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019383766331715178,
      "loss": 1.0551,
      "step": 3505
    },
    {
      "epoch": 0.20138849044695623,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019380300644306878,
      "loss": 1.0359,
      "step": 3510
    },
    {
      "epoch": 0.2016753686384761,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019376825550312057,
      "loss": 1.0009,
      "step": 3515
    },
    {
      "epoch": 0.20196224682999597,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019373341053215547,
      "loss": 0.942,
      "step": 3520
    },
    {
      "epoch": 0.20224912502151587,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001936984715651161,
      "loss": 0.9778,
      "step": 3525
    },
    {
      "epoch": 0.20253600321303575,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019366343863703932,
      "loss": 0.9932,
      "step": 3530
    },
    {
      "epoch": 0.20282288140455562,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001936283117830563,
      "loss": 0.9191,
      "step": 3535
    },
    {
      "epoch": 0.2031097595960755,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019359309103839225,
      "loss": 0.9005,
      "step": 3540
    },
    {
      "epoch": 0.2033966377875954,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001935577764383666,
      "loss": 0.9952,
      "step": 3545
    },
    {
      "epoch": 0.20368351597911527,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00019352236801839298,
      "loss": 0.922,
      "step": 3550
    },
    {
      "epoch": 0.20397039417063514,
      "grad_norm": 0.263671875,
      "learning_rate": 0.000193486865813979,
      "loss": 0.9369,
      "step": 3555
    },
    {
      "epoch": 0.20425727236215502,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00019345126986072635,
      "loss": 1.0501,
      "step": 3560
    },
    {
      "epoch": 0.20454415055367492,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001934155801943307,
      "loss": 0.9395,
      "step": 3565
    },
    {
      "epoch": 0.2048310287451948,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001933797968505818,
      "loss": 0.9643,
      "step": 3570
    },
    {
      "epoch": 0.20511790693671467,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001933439198653632,
      "loss": 0.9855,
      "step": 3575
    },
    {
      "epoch": 0.20540478512823454,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00019330794927465247,
      "loss": 0.9532,
      "step": 3580
    },
    {
      "epoch": 0.20569166331975444,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019327188511452094,
      "loss": 1.0038,
      "step": 3585
    },
    {
      "epoch": 0.20597854151127432,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019323572742113387,
      "loss": 0.9216,
      "step": 3590
    },
    {
      "epoch": 0.2062654197027942,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001931994762307503,
      "loss": 0.9706,
      "step": 3595
    },
    {
      "epoch": 0.20655229789431406,
      "grad_norm": 0.28125,
      "learning_rate": 0.00019316313157972297,
      "loss": 0.9644,
      "step": 3600
    },
    {
      "epoch": 0.20683917608583396,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00019312669350449836,
      "loss": 0.973,
      "step": 3605
    },
    {
      "epoch": 0.20712605427735384,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001930901620416167,
      "loss": 0.8716,
      "step": 3610
    },
    {
      "epoch": 0.2074129324688737,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001930535372277118,
      "loss": 1.0108,
      "step": 3615
    },
    {
      "epoch": 0.20769981066039359,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001930168190995111,
      "loss": 0.9474,
      "step": 3620
    },
    {
      "epoch": 0.2079866888519135,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019298000769383565,
      "loss": 0.9808,
      "step": 3625
    },
    {
      "epoch": 0.20827356704343336,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019294310304759994,
      "loss": 0.9813,
      "step": 3630
    },
    {
      "epoch": 0.20856044523495323,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00019290610519781212,
      "loss": 0.9752,
      "step": 3635
    },
    {
      "epoch": 0.2088473234264731,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019286901418157367,
      "loss": 1.0081,
      "step": 3640
    },
    {
      "epoch": 0.209134201617993,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019283183003607955,
      "loss": 1.0368,
      "step": 3645
    },
    {
      "epoch": 0.20942107980951288,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001927945527986181,
      "loss": 1.0126,
      "step": 3650
    },
    {
      "epoch": 0.20970795800103276,
      "grad_norm": 0.263671875,
      "learning_rate": 0.000192757182506571,
      "loss": 0.9539,
      "step": 3655
    },
    {
      "epoch": 0.20999483619255263,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019271971919741332,
      "loss": 0.9511,
      "step": 3660
    },
    {
      "epoch": 0.21028171438407253,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001926821629087133,
      "loss": 0.9761,
      "step": 3665
    },
    {
      "epoch": 0.2105685925755924,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001926445136781325,
      "loss": 0.9101,
      "step": 3670
    },
    {
      "epoch": 0.21085547076711228,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00019260677154342564,
      "loss": 1.0644,
      "step": 3675
    },
    {
      "epoch": 0.21114234895863215,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001925689365424406,
      "loss": 1.0595,
      "step": 3680
    },
    {
      "epoch": 0.21142922715015205,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00019253100871311843,
      "loss": 0.9295,
      "step": 3685
    },
    {
      "epoch": 0.21171610534167193,
      "grad_norm": 0.28125,
      "learning_rate": 0.00019249298809349323,
      "loss": 0.9397,
      "step": 3690
    },
    {
      "epoch": 0.2120029835331918,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00019245487472169216,
      "loss": 0.9591,
      "step": 3695
    },
    {
      "epoch": 0.21228986172471168,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001924166686359354,
      "loss": 0.9513,
      "step": 3700
    },
    {
      "epoch": 0.21257673991623158,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019237836987453613,
      "loss": 0.9952,
      "step": 3705
    },
    {
      "epoch": 0.21286361810775145,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019233997847590035,
      "loss": 0.9506,
      "step": 3710
    },
    {
      "epoch": 0.21315049629927132,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00019230149447852714,
      "loss": 0.9978,
      "step": 3715
    },
    {
      "epoch": 0.2134373744907912,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00019226291792100826,
      "loss": 0.9213,
      "step": 3720
    },
    {
      "epoch": 0.2137242526823111,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001922242488420284,
      "loss": 1.0118,
      "step": 3725
    },
    {
      "epoch": 0.21401113087383097,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019218548728036503,
      "loss": 0.9238,
      "step": 3730
    },
    {
      "epoch": 0.21429800906535085,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00019214663327488828,
      "loss": 0.966,
      "step": 3735
    },
    {
      "epoch": 0.21458488725687072,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019210768686456106,
      "loss": 1.0034,
      "step": 3740
    },
    {
      "epoch": 0.21487176544839062,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019206864808843892,
      "loss": 0.9623,
      "step": 3745
    },
    {
      "epoch": 0.2151586436399105,
      "grad_norm": 0.337890625,
      "learning_rate": 0.00019202951698566999,
      "loss": 0.9571,
      "step": 3750
    },
    {
      "epoch": 0.21544552183143037,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001919902935954951,
      "loss": 0.95,
      "step": 3755
    },
    {
      "epoch": 0.21573240002295024,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00019195097795724747,
      "loss": 1.026,
      "step": 3760
    },
    {
      "epoch": 0.21601927821447015,
      "grad_norm": 0.271484375,
      "learning_rate": 0.000191911570110353,
      "loss": 1.0171,
      "step": 3765
    },
    {
      "epoch": 0.21630615640599002,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001918720700943299,
      "loss": 0.99,
      "step": 3770
    },
    {
      "epoch": 0.2165930345975099,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001918324779487889,
      "loss": 0.9584,
      "step": 3775
    },
    {
      "epoch": 0.21687991278902977,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001917927937134331,
      "loss": 1.0006,
      "step": 3780
    },
    {
      "epoch": 0.21716679098054967,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019175301742805793,
      "loss": 0.9589,
      "step": 3785
    },
    {
      "epoch": 0.21745366917206954,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019171314913255113,
      "loss": 1.013,
      "step": 3790
    },
    {
      "epoch": 0.21774054736358942,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00019167318886689273,
      "loss": 0.9847,
      "step": 3795
    },
    {
      "epoch": 0.2180274255551093,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00019163313667115497,
      "loss": 0.9847,
      "step": 3800
    },
    {
      "epoch": 0.2183143037466292,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019159299258550227,
      "loss": 0.9766,
      "step": 3805
    },
    {
      "epoch": 0.21860118193814906,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001915527566501912,
      "loss": 0.9583,
      "step": 3810
    },
    {
      "epoch": 0.21888806012966894,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001915124289055705,
      "loss": 0.9287,
      "step": 3815
    },
    {
      "epoch": 0.2191749383211888,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00019147200939208088,
      "loss": 0.9871,
      "step": 3820
    },
    {
      "epoch": 0.2194618165127087,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001914314981502551,
      "loss": 0.9836,
      "step": 3825
    },
    {
      "epoch": 0.2197486947042286,
      "grad_norm": 0.265625,
      "learning_rate": 0.000191390895220718,
      "loss": 0.8962,
      "step": 3830
    },
    {
      "epoch": 0.22003557289574846,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001913502006441862,
      "loss": 0.9405,
      "step": 3835
    },
    {
      "epoch": 0.22032245108726833,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00019130941446146837,
      "loss": 0.8986,
      "step": 3840
    },
    {
      "epoch": 0.22060932927878824,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019126853671346496,
      "loss": 0.9524,
      "step": 3845
    },
    {
      "epoch": 0.2208962074703081,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00019122756744116828,
      "loss": 0.9778,
      "step": 3850
    },
    {
      "epoch": 0.22118308566182798,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001911865066856624,
      "loss": 1.0214,
      "step": 3855
    },
    {
      "epoch": 0.22146996385334786,
      "grad_norm": 0.28125,
      "learning_rate": 0.00019114535448812311,
      "loss": 0.9422,
      "step": 3860
    },
    {
      "epoch": 0.22175684204486776,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00019110411088981802,
      "loss": 0.9638,
      "step": 3865
    },
    {
      "epoch": 0.22204372023638763,
      "grad_norm": 0.2421875,
      "learning_rate": 0.0001910627759321062,
      "loss": 0.9094,
      "step": 3870
    },
    {
      "epoch": 0.2223305984279075,
      "grad_norm": 0.298828125,
      "learning_rate": 0.00019102134965643847,
      "loss": 0.9825,
      "step": 3875
    },
    {
      "epoch": 0.22261747661942738,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.0001909798321043572,
      "loss": 1.0358,
      "step": 3880
    },
    {
      "epoch": 0.22290435481094728,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00019093822331749634,
      "loss": 0.904,
      "step": 3885
    },
    {
      "epoch": 0.22319123300246715,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019089652333758114,
      "loss": 0.9756,
      "step": 3890
    },
    {
      "epoch": 0.22347811119398703,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019085473220642856,
      "loss": 0.972,
      "step": 3895
    },
    {
      "epoch": 0.2237649893855069,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001908128499659468,
      "loss": 0.9302,
      "step": 3900
    },
    {
      "epoch": 0.2240518675770268,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019077087665813545,
      "loss": 0.9643,
      "step": 3905
    },
    {
      "epoch": 0.22433874576854668,
      "grad_norm": 0.296875,
      "learning_rate": 0.0001907288123250854,
      "loss": 0.9786,
      "step": 3910
    },
    {
      "epoch": 0.22462562396006655,
      "grad_norm": 0.28125,
      "learning_rate": 0.00019068665700897896,
      "loss": 0.9587,
      "step": 3915
    },
    {
      "epoch": 0.22491250215158642,
      "grad_norm": 0.302734375,
      "learning_rate": 0.0001906444107520895,
      "loss": 1.006,
      "step": 3920
    },
    {
      "epoch": 0.22519938034310633,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00019060207359678164,
      "loss": 0.8882,
      "step": 3925
    },
    {
      "epoch": 0.2254862585346262,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00019055964558551124,
      "loss": 0.9823,
      "step": 3930
    },
    {
      "epoch": 0.22577313672614607,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00019051712676082522,
      "loss": 0.9457,
      "step": 3935
    },
    {
      "epoch": 0.22606001491766595,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019047451716536147,
      "loss": 0.9735,
      "step": 3940
    },
    {
      "epoch": 0.22634689310918585,
      "grad_norm": 0.291015625,
      "learning_rate": 0.000190431816841849,
      "loss": 0.9697,
      "step": 3945
    },
    {
      "epoch": 0.22663377130070572,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00019038902583310785,
      "loss": 1.0229,
      "step": 3950
    },
    {
      "epoch": 0.2269206494922256,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00019034614418204893,
      "loss": 0.9805,
      "step": 3955
    },
    {
      "epoch": 0.22720752768374547,
      "grad_norm": 0.259765625,
      "learning_rate": 0.000190303171931674,
      "loss": 0.9696,
      "step": 3960
    },
    {
      "epoch": 0.22749440587526537,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00019026010912507577,
      "loss": 0.9885,
      "step": 3965
    },
    {
      "epoch": 0.22778128406678524,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00019021695580543772,
      "loss": 0.9571,
      "step": 3970
    },
    {
      "epoch": 0.22806816225830512,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019017371201603407,
      "loss": 0.8998,
      "step": 3975
    },
    {
      "epoch": 0.228355040449825,
      "grad_norm": 0.265625,
      "learning_rate": 0.00019013037780022982,
      "loss": 1.0427,
      "step": 3980
    },
    {
      "epoch": 0.2286419186413449,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00019008695320148062,
      "loss": 0.9227,
      "step": 3985
    },
    {
      "epoch": 0.22892879683286477,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001900434382633327,
      "loss": 0.9698,
      "step": 3990
    },
    {
      "epoch": 0.22921567502438464,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018999983302942302,
      "loss": 0.9134,
      "step": 3995
    },
    {
      "epoch": 0.22950255321590451,
      "grad_norm": 0.302734375,
      "learning_rate": 0.00018995613754347893,
      "loss": 0.9699,
      "step": 4000
    },
    {
      "epoch": 0.22978943140742442,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00018991235184931843,
      "loss": 0.896,
      "step": 4005
    },
    {
      "epoch": 0.2300763095989443,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00018986847599084986,
      "loss": 0.9285,
      "step": 4010
    },
    {
      "epoch": 0.23036318779046416,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018982451001207205,
      "loss": 0.984,
      "step": 4015
    },
    {
      "epoch": 0.23065006598198404,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018978045395707418,
      "loss": 0.9619,
      "step": 4020
    },
    {
      "epoch": 0.23093694417350394,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018973630787003575,
      "loss": 0.9796,
      "step": 4025
    },
    {
      "epoch": 0.2312238223650238,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001896920717952266,
      "loss": 0.9083,
      "step": 4030
    },
    {
      "epoch": 0.23151070055654369,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018964774577700667,
      "loss": 1.0115,
      "step": 4035
    },
    {
      "epoch": 0.23179757874806356,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018960332985982627,
      "loss": 0.9947,
      "step": 4040
    },
    {
      "epoch": 0.23208445693958346,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001895588240882258,
      "loss": 0.9585,
      "step": 4045
    },
    {
      "epoch": 0.23237133513110333,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001895142285068357,
      "loss": 0.9033,
      "step": 4050
    },
    {
      "epoch": 0.2326582133226232,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018946954316037648,
      "loss": 0.9287,
      "step": 4055
    },
    {
      "epoch": 0.23294509151414308,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001894247680936588,
      "loss": 0.9409,
      "step": 4060
    },
    {
      "epoch": 0.23323196970566298,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00018937990335158312,
      "loss": 0.9404,
      "step": 4065
    },
    {
      "epoch": 0.23351884789718286,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018933494897913997,
      "loss": 0.99,
      "step": 4070
    },
    {
      "epoch": 0.23380572608870273,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018928990502140963,
      "loss": 0.9034,
      "step": 4075
    },
    {
      "epoch": 0.2340926042802226,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018924477152356233,
      "loss": 1.0379,
      "step": 4080
    },
    {
      "epoch": 0.2343794824717425,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018919954853085803,
      "loss": 0.948,
      "step": 4085
    },
    {
      "epoch": 0.23466636066326238,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001891542360886464,
      "loss": 1.0377,
      "step": 4090
    },
    {
      "epoch": 0.23495323885478225,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018910883424236695,
      "loss": 0.9948,
      "step": 4095
    },
    {
      "epoch": 0.23524011704630213,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001890633430375487,
      "loss": 0.9823,
      "step": 4100
    },
    {
      "epoch": 0.23552699523782203,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018901776251981032,
      "loss": 0.9425,
      "step": 4105
    },
    {
      "epoch": 0.2358138734293419,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001889720927348601,
      "loss": 0.9568,
      "step": 4110
    },
    {
      "epoch": 0.23610075162086178,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018892633372849575,
      "loss": 0.9233,
      "step": 4115
    },
    {
      "epoch": 0.23638762981238165,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018888048554660454,
      "loss": 0.9648,
      "step": 4120
    },
    {
      "epoch": 0.23667450800390155,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018883454823516313,
      "loss": 0.9916,
      "step": 4125
    },
    {
      "epoch": 0.23696138619542143,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001887885218402375,
      "loss": 1.0255,
      "step": 4130
    },
    {
      "epoch": 0.2372482643869413,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00018874240640798316,
      "loss": 0.9522,
      "step": 4135
    },
    {
      "epoch": 0.23753514257846117,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001886962019846446,
      "loss": 0.9887,
      "step": 4140
    },
    {
      "epoch": 0.23782202076998107,
      "grad_norm": 0.3046875,
      "learning_rate": 0.00018864990861655584,
      "loss": 0.9113,
      "step": 4145
    },
    {
      "epoch": 0.23810889896150095,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001886035263501399,
      "loss": 0.9509,
      "step": 4150
    },
    {
      "epoch": 0.23839577715302082,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018855705523190908,
      "loss": 0.9792,
      "step": 4155
    },
    {
      "epoch": 0.2386826553445407,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001885104953084647,
      "loss": 0.9923,
      "step": 4160
    },
    {
      "epoch": 0.2389695335360606,
      "grad_norm": 0.28125,
      "learning_rate": 0.00018846384662649714,
      "loss": 0.9719,
      "step": 4165
    },
    {
      "epoch": 0.23925641172758047,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018841710923278582,
      "loss": 0.9501,
      "step": 4170
    },
    {
      "epoch": 0.23954328991910034,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00018837028317419908,
      "loss": 0.9205,
      "step": 4175
    },
    {
      "epoch": 0.23983016811062022,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018832336849769425,
      "loss": 0.9432,
      "step": 4180
    },
    {
      "epoch": 0.24011704630214012,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001882763652503174,
      "loss": 1.0014,
      "step": 4185
    },
    {
      "epoch": 0.24040392449366,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018822927347920355,
      "loss": 0.9548,
      "step": 4190
    },
    {
      "epoch": 0.24069080268517987,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018818209323157638,
      "loss": 1.0308,
      "step": 4195
    },
    {
      "epoch": 0.24097768087669974,
      "grad_norm": 0.314453125,
      "learning_rate": 0.0001881348245547484,
      "loss": 1.0047,
      "step": 4200
    },
    {
      "epoch": 0.24126455906821964,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00018808746749612073,
      "loss": 0.919,
      "step": 4205
    },
    {
      "epoch": 0.24155143725973952,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001880400221031831,
      "loss": 0.9214,
      "step": 4210
    },
    {
      "epoch": 0.2418383154512594,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018799248842351393,
      "loss": 0.9631,
      "step": 4215
    },
    {
      "epoch": 0.24212519364277926,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00018794486650478004,
      "loss": 0.9936,
      "step": 4220
    },
    {
      "epoch": 0.24241207183429916,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001878971563947368,
      "loss": 0.9386,
      "step": 4225
    },
    {
      "epoch": 0.24269895002581904,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00018784935814122804,
      "loss": 1.0508,
      "step": 4230
    },
    {
      "epoch": 0.2429858282173389,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001878014717921859,
      "loss": 0.8909,
      "step": 4235
    },
    {
      "epoch": 0.24327270640885879,
      "grad_norm": 0.271484375,
      "learning_rate": 0.000187753497395631,
      "loss": 0.9797,
      "step": 4240
    },
    {
      "epoch": 0.2435595846003787,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00018770543499967217,
      "loss": 0.9766,
      "step": 4245
    },
    {
      "epoch": 0.24384646279189856,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018765728465250644,
      "loss": 1.0364,
      "step": 4250
    },
    {
      "epoch": 0.24413334098341843,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00018760904640241906,
      "loss": 0.9276,
      "step": 4255
    },
    {
      "epoch": 0.2444202191749383,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018756072029778352,
      "loss": 0.9165,
      "step": 4260
    },
    {
      "epoch": 0.2447070973664582,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018751230638706131,
      "loss": 0.9561,
      "step": 4265
    },
    {
      "epoch": 0.24499397555797808,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018746380471880203,
      "loss": 0.9377,
      "step": 4270
    },
    {
      "epoch": 0.24528085374949796,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018741521534164325,
      "loss": 0.9992,
      "step": 4275
    },
    {
      "epoch": 0.24556773194101783,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018736653830431048,
      "loss": 1.0328,
      "step": 4280
    },
    {
      "epoch": 0.24585461013253773,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001873177736556172,
      "loss": 0.9225,
      "step": 4285
    },
    {
      "epoch": 0.2461414883240576,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018726892144446474,
      "loss": 0.9061,
      "step": 4290
    },
    {
      "epoch": 0.24642836651557748,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001872199817198421,
      "loss": 0.9639,
      "step": 4295
    },
    {
      "epoch": 0.24671524470709735,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018717095453082627,
      "loss": 0.9764,
      "step": 4300
    },
    {
      "epoch": 0.24700212289861725,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018712183992658174,
      "loss": 0.9006,
      "step": 4305
    },
    {
      "epoch": 0.24728900109013713,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00018707263795636077,
      "loss": 0.9212,
      "step": 4310
    },
    {
      "epoch": 0.247575879281657,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001870233486695032,
      "loss": 0.8946,
      "step": 4315
    },
    {
      "epoch": 0.24786275747317688,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001869739721154365,
      "loss": 1.0377,
      "step": 4320
    },
    {
      "epoch": 0.24814963566469678,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00018692450834367546,
      "loss": 1.0284,
      "step": 4325
    },
    {
      "epoch": 0.24843651385621665,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00018687495740382258,
      "loss": 0.9688,
      "step": 4330
    },
    {
      "epoch": 0.24872339204773652,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00018682531934556757,
      "loss": 0.989,
      "step": 4335
    },
    {
      "epoch": 0.2490102702392564,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018677559421868766,
      "loss": 0.9916,
      "step": 4340
    },
    {
      "epoch": 0.2492971484307763,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00018672578207304727,
      "loss": 0.9724,
      "step": 4345
    },
    {
      "epoch": 0.24958402662229617,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018667588295859816,
      "loss": 0.9535,
      "step": 4350
    },
    {
      "epoch": 0.24987090481381605,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001866258969253792,
      "loss": 1.0108,
      "step": 4355
    },
    {
      "epoch": 0.2501577830053359,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018657582402351663,
      "loss": 1.0017,
      "step": 4360
    },
    {
      "epoch": 0.2504446611968558,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018652566430322356,
      "loss": 1.0202,
      "step": 4365
    },
    {
      "epoch": 0.25073153938837567,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001864754178148003,
      "loss": 0.9738,
      "step": 4370
    },
    {
      "epoch": 0.25101841757989557,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00018642508460863416,
      "loss": 0.9779,
      "step": 4375
    },
    {
      "epoch": 0.25130529577141547,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00018637466473519937,
      "loss": 0.966,
      "step": 4380
    },
    {
      "epoch": 0.2515921739629353,
      "grad_norm": 0.3203125,
      "learning_rate": 0.0001863241582450571,
      "loss": 1.0283,
      "step": 4385
    },
    {
      "epoch": 0.2518790521544552,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00018627356518885536,
      "loss": 0.9777,
      "step": 4390
    },
    {
      "epoch": 0.2521659303459751,
      "grad_norm": 0.27734375,
      "learning_rate": 0.000186222885617329,
      "loss": 0.9649,
      "step": 4395
    },
    {
      "epoch": 0.25245280853749497,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018617211958129958,
      "loss": 0.8768,
      "step": 4400
    },
    {
      "epoch": 0.25273968672901487,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018612126713167542,
      "loss": 1.0189,
      "step": 4405
    },
    {
      "epoch": 0.2530265649205347,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001860703283194515,
      "loss": 0.9404,
      "step": 4410
    },
    {
      "epoch": 0.2533134431120546,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018601930319570934,
      "loss": 0.9849,
      "step": 4415
    },
    {
      "epoch": 0.2536003213035745,
      "grad_norm": 0.2734375,
      "learning_rate": 0.000185968191811617,
      "loss": 0.9844,
      "step": 4420
    },
    {
      "epoch": 0.25388719949509436,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00018591699421842916,
      "loss": 0.8693,
      "step": 4425
    },
    {
      "epoch": 0.25417407768661426,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00018586571046748685,
      "loss": 1.0033,
      "step": 4430
    },
    {
      "epoch": 0.25446095587813417,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00018581434061021754,
      "loss": 0.9435,
      "step": 4435
    },
    {
      "epoch": 0.254747834069654,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018576288469813505,
      "loss": 1.019,
      "step": 4440
    },
    {
      "epoch": 0.2550347122611739,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018571134278283946,
      "loss": 1.0081,
      "step": 4445
    },
    {
      "epoch": 0.25532159045269376,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001856597149160171,
      "loss": 0.9636,
      "step": 4450
    },
    {
      "epoch": 0.25560846864421366,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018560800114944063,
      "loss": 1.0263,
      "step": 4455
    },
    {
      "epoch": 0.25589534683573356,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00018555620153496856,
      "loss": 0.9449,
      "step": 4460
    },
    {
      "epoch": 0.2561822250272534,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018550431612454578,
      "loss": 1.0175,
      "step": 4465
    },
    {
      "epoch": 0.2564691032187733,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00018545234497020302,
      "loss": 0.9296,
      "step": 4470
    },
    {
      "epoch": 0.2567559814102932,
      "grad_norm": 0.25,
      "learning_rate": 0.00018540028812405717,
      "loss": 0.9648,
      "step": 4475
    },
    {
      "epoch": 0.25704285960181306,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00018534814563831082,
      "loss": 0.9098,
      "step": 4480
    },
    {
      "epoch": 0.25732973779333296,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00018529591756525268,
      "loss": 1.0392,
      "step": 4485
    },
    {
      "epoch": 0.25761661598485286,
      "grad_norm": 0.3125,
      "learning_rate": 0.00018524360395725715,
      "loss": 0.9497,
      "step": 4490
    },
    {
      "epoch": 0.2579034941763727,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001851912048667844,
      "loss": 0.9545,
      "step": 4495
    },
    {
      "epoch": 0.2581903723678926,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018513872034638037,
      "loss": 0.9736,
      "step": 4500
    },
    {
      "epoch": 0.25847725055941245,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018508615044867668,
      "loss": 0.9045,
      "step": 4505
    },
    {
      "epoch": 0.25876412875093235,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00018503349522639052,
      "loss": 0.9952,
      "step": 4510
    },
    {
      "epoch": 0.25905100694245226,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00018498075473232469,
      "loss": 0.9679,
      "step": 4515
    },
    {
      "epoch": 0.2593378851339721,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018492792901936742,
      "loss": 0.994,
      "step": 4520
    },
    {
      "epoch": 0.259624763325492,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001848750181404925,
      "loss": 0.9577,
      "step": 4525
    },
    {
      "epoch": 0.2599116415170119,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018482202214875908,
      "loss": 0.981,
      "step": 4530
    },
    {
      "epoch": 0.26019851970853175,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00018476894109731166,
      "loss": 1.0434,
      "step": 4535
    },
    {
      "epoch": 0.26048539790005165,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00018471577503938,
      "loss": 0.9877,
      "step": 4540
    },
    {
      "epoch": 0.2607722760915715,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018466252402827915,
      "loss": 1.0051,
      "step": 4545
    },
    {
      "epoch": 0.2610591542830914,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00018460918811740937,
      "loss": 0.9629,
      "step": 4550
    },
    {
      "epoch": 0.2613460324746113,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018455576736025602,
      "loss": 0.9279,
      "step": 4555
    },
    {
      "epoch": 0.26163291066613115,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018450226181038955,
      "loss": 0.9259,
      "step": 4560
    },
    {
      "epoch": 0.26191978885765105,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00018444867152146545,
      "loss": 0.9689,
      "step": 4565
    },
    {
      "epoch": 0.26220666704917095,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018439499654722421,
      "loss": 0.9728,
      "step": 4570
    },
    {
      "epoch": 0.2624935452406908,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018434123694149117,
      "loss": 0.9916,
      "step": 4575
    },
    {
      "epoch": 0.2627804234322107,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001842873927581766,
      "loss": 1.033,
      "step": 4580
    },
    {
      "epoch": 0.26306730162373054,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018423346405127555,
      "loss": 0.9902,
      "step": 4585
    },
    {
      "epoch": 0.26335417981525044,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001841794508748679,
      "loss": 0.9779,
      "step": 4590
    },
    {
      "epoch": 0.26364105800677035,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00018412535328311814,
      "loss": 1.0043,
      "step": 4595
    },
    {
      "epoch": 0.2639279361982902,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00018407117133027544,
      "loss": 0.9412,
      "step": 4600
    },
    {
      "epoch": 0.2642148143898101,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001840169050706736,
      "loss": 0.9353,
      "step": 4605
    },
    {
      "epoch": 0.26450169258133,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018396255455873102,
      "loss": 0.962,
      "step": 4610
    },
    {
      "epoch": 0.26478857077284984,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001839081198489504,
      "loss": 1.0103,
      "step": 4615
    },
    {
      "epoch": 0.26507544896436974,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001838536009959191,
      "loss": 1.0115,
      "step": 4620
    },
    {
      "epoch": 0.2653623271558896,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00018379899805430862,
      "loss": 0.9928,
      "step": 4625
    },
    {
      "epoch": 0.2656492053474095,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00018374431107887502,
      "loss": 0.9347,
      "step": 4630
    },
    {
      "epoch": 0.2659360835389294,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00018368954012445846,
      "loss": 0.9674,
      "step": 4635
    },
    {
      "epoch": 0.26622296173044924,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018363468524598342,
      "loss": 0.918,
      "step": 4640
    },
    {
      "epoch": 0.26650983992196914,
      "grad_norm": 0.298828125,
      "learning_rate": 0.0001835797464984585,
      "loss": 0.9558,
      "step": 4645
    },
    {
      "epoch": 0.26679671811348904,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00018352472393697632,
      "loss": 0.9257,
      "step": 4650
    },
    {
      "epoch": 0.2670835963050089,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001834696176167137,
      "loss": 0.9475,
      "step": 4655
    },
    {
      "epoch": 0.2673704744965288,
      "grad_norm": 0.265625,
      "learning_rate": 0.00018341442759293137,
      "loss": 0.9681,
      "step": 4660
    },
    {
      "epoch": 0.26765735268804863,
      "grad_norm": 0.28125,
      "learning_rate": 0.00018335915392097402,
      "loss": 0.9579,
      "step": 4665
    },
    {
      "epoch": 0.26794423087956853,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018330379665627014,
      "loss": 0.9203,
      "step": 4670
    },
    {
      "epoch": 0.26823110907108844,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00018324835585433225,
      "loss": 0.927,
      "step": 4675
    },
    {
      "epoch": 0.2685179872626083,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018319283157075636,
      "loss": 1.0078,
      "step": 4680
    },
    {
      "epoch": 0.2688048654541282,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00018313722386122247,
      "loss": 0.9204,
      "step": 4685
    },
    {
      "epoch": 0.2690917436456481,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00018308153278149406,
      "loss": 0.9469,
      "step": 4690
    },
    {
      "epoch": 0.26937862183716793,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018302575838741826,
      "loss": 1.0207,
      "step": 4695
    },
    {
      "epoch": 0.26966550002868783,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001829699007349258,
      "loss": 0.9881,
      "step": 4700
    },
    {
      "epoch": 0.2699523782202077,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001829139598800308,
      "loss": 0.8757,
      "step": 4705
    },
    {
      "epoch": 0.2702392564117276,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018285793587883092,
      "loss": 1.0423,
      "step": 4710
    },
    {
      "epoch": 0.2705261346032475,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018280182878750717,
      "loss": 1.0039,
      "step": 4715
    },
    {
      "epoch": 0.2708130127947673,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001827456386623238,
      "loss": 0.9478,
      "step": 4720
    },
    {
      "epoch": 0.27109989098628723,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018268936555962845,
      "loss": 1.0252,
      "step": 4725
    },
    {
      "epoch": 0.27138676917780713,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001826330095358519,
      "loss": 0.9751,
      "step": 4730
    },
    {
      "epoch": 0.271673647369327,
      "grad_norm": 0.3046875,
      "learning_rate": 0.00018257657064750808,
      "loss": 0.9103,
      "step": 4735
    },
    {
      "epoch": 0.2719605255608469,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00018252004895119404,
      "loss": 0.9596,
      "step": 4740
    },
    {
      "epoch": 0.2722474037523667,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018246344450358986,
      "loss": 0.9718,
      "step": 4745
    },
    {
      "epoch": 0.2725342819438866,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018240675736145865,
      "loss": 1.0375,
      "step": 4750
    },
    {
      "epoch": 0.2728211601354065,
      "grad_norm": 0.296875,
      "learning_rate": 0.0001823499875816464,
      "loss": 0.966,
      "step": 4755
    },
    {
      "epoch": 0.2731080383269264,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00018229313522108187,
      "loss": 1.0054,
      "step": 4760
    },
    {
      "epoch": 0.2733949165184463,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00018223620033677685,
      "loss": 0.9734,
      "step": 4765
    },
    {
      "epoch": 0.2736817947099662,
      "grad_norm": 0.35546875,
      "learning_rate": 0.00018217918298582572,
      "loss": 0.9234,
      "step": 4770
    },
    {
      "epoch": 0.273968672901486,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001821220832254056,
      "loss": 0.9812,
      "step": 4775
    },
    {
      "epoch": 0.2742555510930059,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00018206490111277629,
      "loss": 0.9476,
      "step": 4780
    },
    {
      "epoch": 0.27454242928452577,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018200763670528011,
      "loss": 0.9397,
      "step": 4785
    },
    {
      "epoch": 0.27482930747604567,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018195029006034193,
      "loss": 0.947,
      "step": 4790
    },
    {
      "epoch": 0.27511618566756557,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018189286123546916,
      "loss": 1.0403,
      "step": 4795
    },
    {
      "epoch": 0.2754030638590854,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018183535028825149,
      "loss": 0.9509,
      "step": 4800
    },
    {
      "epoch": 0.2756899420506053,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018177775727636105,
      "loss": 0.9679,
      "step": 4805
    },
    {
      "epoch": 0.2759768202421252,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018172008225755224,
      "loss": 0.9199,
      "step": 4810
    },
    {
      "epoch": 0.27626369843364507,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00018166232528966169,
      "loss": 0.9235,
      "step": 4815
    },
    {
      "epoch": 0.27655057662516497,
      "grad_norm": 0.30078125,
      "learning_rate": 0.0001816044864306082,
      "loss": 0.9656,
      "step": 4820
    },
    {
      "epoch": 0.2768374548166848,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00018154656573839275,
      "loss": 1.1683,
      "step": 4825
    },
    {
      "epoch": 0.2771243330082047,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00018148856327109832,
      "loss": 0.9441,
      "step": 4830
    },
    {
      "epoch": 0.2774112111997246,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00018143047908688993,
      "loss": 0.9285,
      "step": 4835
    },
    {
      "epoch": 0.27769808939124446,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018137231324401448,
      "loss": 1.0174,
      "step": 4840
    },
    {
      "epoch": 0.27798496758276436,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018131406580080084,
      "loss": 0.9398,
      "step": 4845
    },
    {
      "epoch": 0.27827184577428427,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018125573681565969,
      "loss": 0.9351,
      "step": 4850
    },
    {
      "epoch": 0.2785587239658041,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018119732634708346,
      "loss": 0.98,
      "step": 4855
    },
    {
      "epoch": 0.278845602157324,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001811388344536463,
      "loss": 0.936,
      "step": 4860
    },
    {
      "epoch": 0.27913248034884386,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018108026119400397,
      "loss": 0.9626,
      "step": 4865
    },
    {
      "epoch": 0.27941935854036376,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018102160662689394,
      "loss": 0.9362,
      "step": 4870
    },
    {
      "epoch": 0.27970623673188366,
      "grad_norm": 0.345703125,
      "learning_rate": 0.0001809628708111351,
      "loss": 0.9749,
      "step": 4875
    },
    {
      "epoch": 0.2799931149234035,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018090405380562786,
      "loss": 0.9594,
      "step": 4880
    },
    {
      "epoch": 0.2802799931149234,
      "grad_norm": 0.302734375,
      "learning_rate": 0.00018084515566935402,
      "loss": 1.0839,
      "step": 4885
    },
    {
      "epoch": 0.2805668713064433,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00018078617646137682,
      "loss": 0.9831,
      "step": 4890
    },
    {
      "epoch": 0.28085374949796316,
      "grad_norm": 0.326171875,
      "learning_rate": 0.00018072711624084068,
      "loss": 0.9761,
      "step": 4895
    },
    {
      "epoch": 0.28114062768948306,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00018066797506697136,
      "loss": 1.0078,
      "step": 4900
    },
    {
      "epoch": 0.2814275058810029,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001806087529990758,
      "loss": 0.9566,
      "step": 4905
    },
    {
      "epoch": 0.2817143840725228,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00018054945009654194,
      "loss": 0.9637,
      "step": 4910
    },
    {
      "epoch": 0.2820012622640427,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00018049006641883888,
      "loss": 0.9415,
      "step": 4915
    },
    {
      "epoch": 0.28228814045556255,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00018043060202551674,
      "loss": 1.0057,
      "step": 4920
    },
    {
      "epoch": 0.28257501864708245,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00018037105697620655,
      "loss": 0.9708,
      "step": 4925
    },
    {
      "epoch": 0.28286189683860236,
      "grad_norm": 0.2890625,
      "learning_rate": 0.0001803114313306202,
      "loss": 0.9813,
      "step": 4930
    },
    {
      "epoch": 0.2831487750301222,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00018025172514855043,
      "loss": 0.9325,
      "step": 4935
    },
    {
      "epoch": 0.2834356532216421,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001801919384898707,
      "loss": 0.9666,
      "step": 4940
    },
    {
      "epoch": 0.28372253141316195,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00018013207141453523,
      "loss": 0.962,
      "step": 4945
    },
    {
      "epoch": 0.28400940960468185,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00018007212398257888,
      "loss": 0.9707,
      "step": 4950
    },
    {
      "epoch": 0.28429628779620175,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00018001209625411705,
      "loss": 1.0216,
      "step": 4955
    },
    {
      "epoch": 0.2845831659877216,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017995198828934568,
      "loss": 0.9455,
      "step": 4960
    },
    {
      "epoch": 0.2848700441792415,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00017989180014854115,
      "loss": 0.9858,
      "step": 4965
    },
    {
      "epoch": 0.2851569223707614,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001798315318920603,
      "loss": 0.9292,
      "step": 4970
    },
    {
      "epoch": 0.28544380056228125,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017977118358034023,
      "loss": 0.9592,
      "step": 4975
    },
    {
      "epoch": 0.28573067875380115,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001797107552738984,
      "loss": 0.9366,
      "step": 4980
    },
    {
      "epoch": 0.286017556945321,
      "grad_norm": 0.25,
      "learning_rate": 0.00017965024703333246,
      "loss": 0.9745,
      "step": 4985
    },
    {
      "epoch": 0.2863044351368409,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001795896589193202,
      "loss": 0.9848,
      "step": 4990
    },
    {
      "epoch": 0.2865913133283608,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00017952899099261943,
      "loss": 0.978,
      "step": 4995
    },
    {
      "epoch": 0.28687819151988064,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017946824331406823,
      "loss": 0.9613,
      "step": 5000
    },
    {
      "epoch": 0.28716506971140054,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017940741594458444,
      "loss": 1.0832,
      "step": 5005
    },
    {
      "epoch": 0.28745194790292045,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017934650894516584,
      "loss": 0.9122,
      "step": 5010
    },
    {
      "epoch": 0.2877388260944403,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00017928552237689015,
      "loss": 0.9856,
      "step": 5015
    },
    {
      "epoch": 0.2880257042859602,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00017922445630091485,
      "loss": 0.9846,
      "step": 5020
    },
    {
      "epoch": 0.28831258247748004,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001791633107784771,
      "loss": 0.9521,
      "step": 5025
    },
    {
      "epoch": 0.28859946066899994,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001791020858708938,
      "loss": 0.9727,
      "step": 5030
    },
    {
      "epoch": 0.28888633886051984,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017904078163956142,
      "loss": 0.9935,
      "step": 5035
    },
    {
      "epoch": 0.2891732170520397,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00017897939814595596,
      "loss": 0.9994,
      "step": 5040
    },
    {
      "epoch": 0.2894600952435596,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017891793545163297,
      "loss": 0.9973,
      "step": 5045
    },
    {
      "epoch": 0.2897469734350795,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017885639361822728,
      "loss": 0.9503,
      "step": 5050
    },
    {
      "epoch": 0.29003385162659934,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00017879477270745328,
      "loss": 0.9388,
      "step": 5055
    },
    {
      "epoch": 0.29032072981811924,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001787330727811045,
      "loss": 0.9239,
      "step": 5060
    },
    {
      "epoch": 0.2906076080096391,
      "grad_norm": 0.25,
      "learning_rate": 0.00017867129390105384,
      "loss": 0.9892,
      "step": 5065
    },
    {
      "epoch": 0.290894486201159,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001786094361292532,
      "loss": 0.9695,
      "step": 5070
    },
    {
      "epoch": 0.2911813643926789,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00017854749952773372,
      "loss": 0.9476,
      "step": 5075
    },
    {
      "epoch": 0.29146824258419873,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00017848548415860563,
      "loss": 0.9391,
      "step": 5080
    },
    {
      "epoch": 0.29175512077571863,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00017842339008405803,
      "loss": 1.0145,
      "step": 5085
    },
    {
      "epoch": 0.29204199896723854,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00017836121736635894,
      "loss": 0.9003,
      "step": 5090
    },
    {
      "epoch": 0.2923288771587584,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00017829896606785543,
      "loss": 0.9675,
      "step": 5095
    },
    {
      "epoch": 0.2926157553502783,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00017823663625097312,
      "loss": 0.951,
      "step": 5100
    },
    {
      "epoch": 0.29290263354179813,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00017817422797821656,
      "loss": 0.9628,
      "step": 5105
    },
    {
      "epoch": 0.29318951173331803,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001781117413121689,
      "loss": 0.9483,
      "step": 5110
    },
    {
      "epoch": 0.29347638992483793,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00017804917631549189,
      "loss": 1.0401,
      "step": 5115
    },
    {
      "epoch": 0.2937632681163578,
      "grad_norm": 0.28125,
      "learning_rate": 0.00017798653305092584,
      "loss": 0.9795,
      "step": 5120
    },
    {
      "epoch": 0.2940501463078777,
      "grad_norm": 0.296875,
      "learning_rate": 0.00017792381158128956,
      "loss": 0.9764,
      "step": 5125
    },
    {
      "epoch": 0.2943370244993976,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00017786101196948034,
      "loss": 0.9816,
      "step": 5130
    },
    {
      "epoch": 0.2946239026909174,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00017779813427847368,
      "loss": 1.0152,
      "step": 5135
    },
    {
      "epoch": 0.29491078088243733,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017773517857132355,
      "loss": 0.9595,
      "step": 5140
    },
    {
      "epoch": 0.2951976590739572,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00017767214491116198,
      "loss": 0.947,
      "step": 5145
    },
    {
      "epoch": 0.2954845372654771,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00017760903336119937,
      "loss": 1.0049,
      "step": 5150
    },
    {
      "epoch": 0.295771415456997,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017754584398472405,
      "loss": 0.8602,
      "step": 5155
    },
    {
      "epoch": 0.2960582936485168,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001774825768451025,
      "loss": 0.9468,
      "step": 5160
    },
    {
      "epoch": 0.2963451718400367,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00017741923200577917,
      "loss": 0.9763,
      "step": 5165
    },
    {
      "epoch": 0.2966320500315566,
      "grad_norm": 0.28125,
      "learning_rate": 0.00017735580953027636,
      "loss": 1.0153,
      "step": 5170
    },
    {
      "epoch": 0.2969189282230765,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00017729230948219428,
      "loss": 0.9664,
      "step": 5175
    },
    {
      "epoch": 0.2972058064145964,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017722873192521096,
      "loss": 0.9508,
      "step": 5180
    },
    {
      "epoch": 0.2974926846061162,
      "grad_norm": 0.25,
      "learning_rate": 0.00017716507692308207,
      "loss": 0.9688,
      "step": 5185
    },
    {
      "epoch": 0.2977795627976361,
      "grad_norm": 0.251953125,
      "learning_rate": 0.000177101344539641,
      "loss": 1.0038,
      "step": 5190
    },
    {
      "epoch": 0.298066440989156,
      "grad_norm": 0.3046875,
      "learning_rate": 0.00017703753483879874,
      "loss": 1.0215,
      "step": 5195
    },
    {
      "epoch": 0.29835331918067587,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001769736478845438,
      "loss": 1.0174,
      "step": 5200
    },
    {
      "epoch": 0.29864019737219577,
      "grad_norm": 0.28125,
      "learning_rate": 0.00017690968374094217,
      "loss": 0.9131,
      "step": 5205
    },
    {
      "epoch": 0.29892707556371567,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017684564247213722,
      "loss": 0.9034,
      "step": 5210
    },
    {
      "epoch": 0.2992139537552355,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00017678152414234968,
      "loss": 0.9721,
      "step": 5215
    },
    {
      "epoch": 0.2995008319467554,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017671732881587756,
      "loss": 1.0016,
      "step": 5220
    },
    {
      "epoch": 0.29978771013827527,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001766530565570961,
      "loss": 0.9473,
      "step": 5225
    },
    {
      "epoch": 0.30007458832979517,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017658870743045757,
      "loss": 1.0014,
      "step": 5230
    },
    {
      "epoch": 0.30036146652131507,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00017652428150049152,
      "loss": 0.9541,
      "step": 5235
    },
    {
      "epoch": 0.3006483447128349,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001764597788318044,
      "loss": 0.9472,
      "step": 5240
    },
    {
      "epoch": 0.3009352229043548,
      "grad_norm": 0.28125,
      "learning_rate": 0.00017639519948907961,
      "loss": 0.9852,
      "step": 5245
    },
    {
      "epoch": 0.3012221010958747,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017633054353707745,
      "loss": 0.946,
      "step": 5250
    },
    {
      "epoch": 0.30150897928739456,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001762658110406351,
      "loss": 0.9516,
      "step": 5255
    },
    {
      "epoch": 0.30179585747891446,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00017620100206466635,
      "loss": 0.9726,
      "step": 5260
    },
    {
      "epoch": 0.3020827356704343,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00017613611667416192,
      "loss": 0.94,
      "step": 5265
    },
    {
      "epoch": 0.3023696138619542,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00017607115493418896,
      "loss": 0.9733,
      "step": 5270
    },
    {
      "epoch": 0.3026564920534741,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00017600611690989123,
      "loss": 1.0138,
      "step": 5275
    },
    {
      "epoch": 0.30294337024499396,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00017594100266648906,
      "loss": 0.9298,
      "step": 5280
    },
    {
      "epoch": 0.30323024843651386,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001758758122692791,
      "loss": 0.9409,
      "step": 5285
    },
    {
      "epoch": 0.30351712662803376,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00017581054578363445,
      "loss": 0.9214,
      "step": 5290
    },
    {
      "epoch": 0.3038040048195536,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017574520327500451,
      "loss": 0.9794,
      "step": 5295
    },
    {
      "epoch": 0.3040908830110735,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001756797848089149,
      "loss": 0.9635,
      "step": 5300
    },
    {
      "epoch": 0.30437776120259336,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00017561429045096733,
      "loss": 0.9972,
      "step": 5305
    },
    {
      "epoch": 0.30466463939411326,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017554872026683978,
      "loss": 0.9549,
      "step": 5310
    },
    {
      "epoch": 0.30495151758563316,
      "grad_norm": 0.32421875,
      "learning_rate": 0.00017548307432228608,
      "loss": 1.0005,
      "step": 5315
    },
    {
      "epoch": 0.305238395777153,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017541735268313623,
      "loss": 0.9812,
      "step": 5320
    },
    {
      "epoch": 0.3055252739686729,
      "grad_norm": 0.2890625,
      "learning_rate": 0.000175351555415296,
      "loss": 0.983,
      "step": 5325
    },
    {
      "epoch": 0.3058121521601928,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017528568258474704,
      "loss": 0.9497,
      "step": 5330
    },
    {
      "epoch": 0.30609903035171265,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00017521973425754675,
      "loss": 0.9057,
      "step": 5335
    },
    {
      "epoch": 0.30638590854323255,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00017515371049982824,
      "loss": 0.9332,
      "step": 5340
    },
    {
      "epoch": 0.3066727867347524,
      "grad_norm": 0.3046875,
      "learning_rate": 0.00017508761137780037,
      "loss": 0.969,
      "step": 5345
    },
    {
      "epoch": 0.3069596649262723,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00017502143695774741,
      "loss": 0.9633,
      "step": 5350
    },
    {
      "epoch": 0.3072465431177922,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017495518730602924,
      "loss": 0.9164,
      "step": 5355
    },
    {
      "epoch": 0.30753342130931205,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00017488886248908118,
      "loss": 0.9805,
      "step": 5360
    },
    {
      "epoch": 0.30782029950083195,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001748224625734139,
      "loss": 0.921,
      "step": 5365
    },
    {
      "epoch": 0.30810717769235185,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00017475598762561333,
      "loss": 0.9671,
      "step": 5370
    },
    {
      "epoch": 0.3083940558838717,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00017468943771234075,
      "loss": 0.9246,
      "step": 5375
    },
    {
      "epoch": 0.3086809340753916,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00017462281290033256,
      "loss": 0.9271,
      "step": 5380
    },
    {
      "epoch": 0.30896781226691145,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017455611325640024,
      "loss": 0.9343,
      "step": 5385
    },
    {
      "epoch": 0.30925469045843135,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00017448933884743037,
      "loss": 0.9428,
      "step": 5390
    },
    {
      "epoch": 0.30954156864995125,
      "grad_norm": 0.29296875,
      "learning_rate": 0.0001744224897403845,
      "loss": 0.986,
      "step": 5395
    },
    {
      "epoch": 0.3098284468414711,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017435556600229902,
      "loss": 0.9702,
      "step": 5400
    },
    {
      "epoch": 0.310115325032991,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001742885677002852,
      "loss": 1.0214,
      "step": 5405
    },
    {
      "epoch": 0.3104022032245109,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00017422149490152914,
      "loss": 0.9759,
      "step": 5410
    },
    {
      "epoch": 0.31068908141603074,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017415434767329154,
      "loss": 1.042,
      "step": 5415
    },
    {
      "epoch": 0.31097595960755064,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001740871260829078,
      "loss": 1.0227,
      "step": 5420
    },
    {
      "epoch": 0.3112628377990705,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001740198301977879,
      "loss": 0.9925,
      "step": 5425
    },
    {
      "epoch": 0.3115497159905904,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001739524600854163,
      "loss": 0.9934,
      "step": 5430
    },
    {
      "epoch": 0.3118365941821103,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001738850158133519,
      "loss": 0.9855,
      "step": 5435
    },
    {
      "epoch": 0.31212347237363014,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00017381749744922796,
      "loss": 0.9412,
      "step": 5440
    },
    {
      "epoch": 0.31241035056515004,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017374990506075207,
      "loss": 0.9927,
      "step": 5445
    },
    {
      "epoch": 0.31269722875666994,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00017368223871570596,
      "loss": 1.0166,
      "step": 5450
    },
    {
      "epoch": 0.3129841069481898,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001736144984819457,
      "loss": 1.0061,
      "step": 5455
    },
    {
      "epoch": 0.3132709851397097,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017354668442740126,
      "loss": 0.9393,
      "step": 5460
    },
    {
      "epoch": 0.31355786333122954,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00017347879662007676,
      "loss": 1.0184,
      "step": 5465
    },
    {
      "epoch": 0.31384474152274944,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017341083512805025,
      "loss": 0.9302,
      "step": 5470
    },
    {
      "epoch": 0.31413161971426934,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00017334280001947362,
      "loss": 0.9412,
      "step": 5475
    },
    {
      "epoch": 0.3144184979057892,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00017327469136257272,
      "loss": 0.9884,
      "step": 5480
    },
    {
      "epoch": 0.3147053760973091,
      "grad_norm": 0.271484375,
      "learning_rate": 0.000173206509225647,
      "loss": 0.9041,
      "step": 5485
    },
    {
      "epoch": 0.314992254288829,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00017313825367706967,
      "loss": 0.9434,
      "step": 5490
    },
    {
      "epoch": 0.31527913248034883,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00017306992478528753,
      "loss": 1.0095,
      "step": 5495
    },
    {
      "epoch": 0.31556601067186874,
      "grad_norm": 0.271484375,
      "learning_rate": 0.000173001522618821,
      "loss": 0.9315,
      "step": 5500
    },
    {
      "epoch": 0.3158528888633886,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017293304724626385,
      "loss": 0.9799,
      "step": 5505
    },
    {
      "epoch": 0.3161397670549085,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001728644987362834,
      "loss": 1.0289,
      "step": 5510
    },
    {
      "epoch": 0.3164266452464284,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00017279587715762022,
      "loss": 0.9204,
      "step": 5515
    },
    {
      "epoch": 0.31671352343794823,
      "grad_norm": 0.314453125,
      "learning_rate": 0.0001727271825790882,
      "loss": 0.9878,
      "step": 5520
    },
    {
      "epoch": 0.31700040162946813,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001726584150695744,
      "loss": 0.9265,
      "step": 5525
    },
    {
      "epoch": 0.31728727982098803,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00017258957469803906,
      "loss": 0.9716,
      "step": 5530
    },
    {
      "epoch": 0.3175741580125079,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001725206615335154,
      "loss": 0.9631,
      "step": 5535
    },
    {
      "epoch": 0.3178610362040278,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00017245167564510974,
      "loss": 0.909,
      "step": 5540
    },
    {
      "epoch": 0.3181479143955476,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00017238261710200128,
      "loss": 0.9312,
      "step": 5545
    },
    {
      "epoch": 0.31843479258706753,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001723134859734421,
      "loss": 0.9891,
      "step": 5550
    },
    {
      "epoch": 0.31872167077858743,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00017224428232875703,
      "loss": 0.8686,
      "step": 5555
    },
    {
      "epoch": 0.3190085489701073,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00017217500623734365,
      "loss": 0.9146,
      "step": 5560
    },
    {
      "epoch": 0.3192954271616272,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00017210565776867216,
      "loss": 0.9702,
      "step": 5565
    },
    {
      "epoch": 0.3195823053531471,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017203623699228537,
      "loss": 0.9483,
      "step": 5570
    },
    {
      "epoch": 0.3198691835446669,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001719667439777986,
      "loss": 0.9595,
      "step": 5575
    },
    {
      "epoch": 0.3201560617361868,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00017189717879489958,
      "loss": 0.9104,
      "step": 5580
    },
    {
      "epoch": 0.32044293992770667,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00017182754151334842,
      "loss": 0.929,
      "step": 5585
    },
    {
      "epoch": 0.3207298181192266,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00017175783220297762,
      "loss": 0.9127,
      "step": 5590
    },
    {
      "epoch": 0.3210166963107465,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017168805093369173,
      "loss": 0.9491,
      "step": 5595
    },
    {
      "epoch": 0.3213035745022663,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00017161819777546767,
      "loss": 0.9266,
      "step": 5600
    },
    {
      "epoch": 0.3215904526937862,
      "grad_norm": 0.3359375,
      "learning_rate": 0.0001715482727983542,
      "loss": 1.014,
      "step": 5605
    },
    {
      "epoch": 0.3218773308853061,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00017147827607247242,
      "loss": 0.9368,
      "step": 5610
    },
    {
      "epoch": 0.32216420907682597,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017140820766801506,
      "loss": 1.0029,
      "step": 5615
    },
    {
      "epoch": 0.32245108726834587,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00017133806765524693,
      "loss": 0.9271,
      "step": 5620
    },
    {
      "epoch": 0.3227379654598657,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001712678561045046,
      "loss": 0.9974,
      "step": 5625
    },
    {
      "epoch": 0.3230248436513856,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00017119757308619639,
      "loss": 0.9648,
      "step": 5630
    },
    {
      "epoch": 0.3233117218429055,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00017112721867080217,
      "loss": 0.9823,
      "step": 5635
    },
    {
      "epoch": 0.32359860003442537,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001710567929288736,
      "loss": 0.9823,
      "step": 5640
    },
    {
      "epoch": 0.32388547822594527,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00017098629593103378,
      "loss": 0.9614,
      "step": 5645
    },
    {
      "epoch": 0.32417235641746517,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00017091572774797714,
      "loss": 1.014,
      "step": 5650
    },
    {
      "epoch": 0.324459234608985,
      "grad_norm": 0.265625,
      "learning_rate": 0.00017084508845046975,
      "loss": 1.0164,
      "step": 5655
    },
    {
      "epoch": 0.3247461128005049,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00017077437810934882,
      "loss": 0.9267,
      "step": 5660
    },
    {
      "epoch": 0.32503299099202476,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001707035967955228,
      "loss": 0.9279,
      "step": 5665
    },
    {
      "epoch": 0.32531986918354466,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00017063274457997137,
      "loss": 0.9867,
      "step": 5670
    },
    {
      "epoch": 0.32560674737506456,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00017056182153374526,
      "loss": 0.9867,
      "step": 5675
    },
    {
      "epoch": 0.3258936255665844,
      "grad_norm": 0.310546875,
      "learning_rate": 0.00017049082772796633,
      "loss": 0.9746,
      "step": 5680
    },
    {
      "epoch": 0.3261805037581043,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00017041976323382726,
      "loss": 0.9462,
      "step": 5685
    },
    {
      "epoch": 0.3264673819496242,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00017034862812259174,
      "loss": 1.0081,
      "step": 5690
    },
    {
      "epoch": 0.32675426014114406,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00017027742246559417,
      "loss": 0.9711,
      "step": 5695
    },
    {
      "epoch": 0.32704113833266396,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00017020614633423976,
      "loss": 0.9295,
      "step": 5700
    },
    {
      "epoch": 0.3273280165241838,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00017013479980000436,
      "loss": 0.9887,
      "step": 5705
    },
    {
      "epoch": 0.3276148947157037,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00017006338293443446,
      "loss": 1.0193,
      "step": 5710
    },
    {
      "epoch": 0.3279017729072236,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00016999189580914708,
      "loss": 0.9274,
      "step": 5715
    },
    {
      "epoch": 0.32818865109874346,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00016992033849582962,
      "loss": 0.8722,
      "step": 5720
    },
    {
      "epoch": 0.32847552929026336,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00016984871106623988,
      "loss": 0.8897,
      "step": 5725
    },
    {
      "epoch": 0.32876240748178326,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00016977701359220613,
      "loss": 0.9323,
      "step": 5730
    },
    {
      "epoch": 0.3290492856733031,
      "grad_norm": 0.3515625,
      "learning_rate": 0.00016970524614562664,
      "loss": 0.9681,
      "step": 5735
    },
    {
      "epoch": 0.329336163864823,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00016963340879847002,
      "loss": 0.9279,
      "step": 5740
    },
    {
      "epoch": 0.32962304205634285,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001695615016227749,
      "loss": 0.9294,
      "step": 5745
    },
    {
      "epoch": 0.32990992024786275,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00016948952469065,
      "loss": 0.9861,
      "step": 5750
    },
    {
      "epoch": 0.33019679843938265,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00016941747807427387,
      "loss": 0.9409,
      "step": 5755
    },
    {
      "epoch": 0.3304836766309025,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00016934536184589512,
      "loss": 0.9838,
      "step": 5760
    },
    {
      "epoch": 0.3307705548224224,
      "grad_norm": 0.28125,
      "learning_rate": 0.00016927317607783198,
      "loss": 0.9714,
      "step": 5765
    },
    {
      "epoch": 0.3310574330139423,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00016920092084247255,
      "loss": 0.9408,
      "step": 5770
    },
    {
      "epoch": 0.33134431120546215,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001691285962122745,
      "loss": 1.0076,
      "step": 5775
    },
    {
      "epoch": 0.33163118939698205,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00016905620225976517,
      "loss": 0.9663,
      "step": 5780
    },
    {
      "epoch": 0.3319180675885019,
      "grad_norm": 0.298828125,
      "learning_rate": 0.00016898373905754137,
      "loss": 0.9457,
      "step": 5785
    },
    {
      "epoch": 0.3322049457800218,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00016891120667826933,
      "loss": 0.9358,
      "step": 5790
    },
    {
      "epoch": 0.3324918239715417,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00016883860519468472,
      "loss": 0.922,
      "step": 5795
    },
    {
      "epoch": 0.33277870216306155,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001687659346795925,
      "loss": 0.8944,
      "step": 5800
    },
    {
      "epoch": 0.33306558035458145,
      "grad_norm": 0.28125,
      "learning_rate": 0.00016869319520586675,
      "loss": 0.9289,
      "step": 5805
    },
    {
      "epoch": 0.33335245854610135,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00016862038684645078,
      "loss": 0.959,
      "step": 5810
    },
    {
      "epoch": 0.3336393367376212,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00016854750967435704,
      "loss": 1.01,
      "step": 5815
    },
    {
      "epoch": 0.3339262149291411,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001684745637626669,
      "loss": 1.0157,
      "step": 5820
    },
    {
      "epoch": 0.33421309312066094,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00016840154918453063,
      "loss": 0.9816,
      "step": 5825
    },
    {
      "epoch": 0.33449997131218084,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00016832846601316749,
      "loss": 0.9529,
      "step": 5830
    },
    {
      "epoch": 0.33478684950370075,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00016825531432186543,
      "loss": 0.9697,
      "step": 5835
    },
    {
      "epoch": 0.3350737276952206,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00016818209418398107,
      "loss": 0.9617,
      "step": 5840
    },
    {
      "epoch": 0.3353606058867405,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001681088056729398,
      "loss": 1.0015,
      "step": 5845
    },
    {
      "epoch": 0.3356474840782604,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00016803544886223547,
      "loss": 0.9556,
      "step": 5850
    },
    {
      "epoch": 0.33593436226978024,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00016796202382543047,
      "loss": 0.9678,
      "step": 5855
    },
    {
      "epoch": 0.33622124046130014,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00016788853063615556,
      "loss": 0.9375,
      "step": 5860
    },
    {
      "epoch": 0.33650811865282,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001678149693681099,
      "loss": 0.9212,
      "step": 5865
    },
    {
      "epoch": 0.3367949968443399,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001677413400950609,
      "loss": 1.0206,
      "step": 5870
    },
    {
      "epoch": 0.3370818750358598,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00016766764289084414,
      "loss": 0.9991,
      "step": 5875
    },
    {
      "epoch": 0.33736875322737964,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00016759387782936335,
      "loss": 0.8914,
      "step": 5880
    },
    {
      "epoch": 0.33765563141889954,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00016752004498459032,
      "loss": 0.9712,
      "step": 5885
    },
    {
      "epoch": 0.33794250961041944,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00016744614443056475,
      "loss": 0.9701,
      "step": 5890
    },
    {
      "epoch": 0.3382293878019393,
      "grad_norm": 0.28125,
      "learning_rate": 0.00016737217624139433,
      "loss": 0.9325,
      "step": 5895
    },
    {
      "epoch": 0.3385162659934592,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001672981404912545,
      "loss": 0.8982,
      "step": 5900
    },
    {
      "epoch": 0.33880314418497903,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00016722403725438845,
      "loss": 0.9592,
      "step": 5905
    },
    {
      "epoch": 0.33909002237649893,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00016714986660510715,
      "loss": 0.8901,
      "step": 5910
    },
    {
      "epoch": 0.33937690056801884,
      "grad_norm": 0.259765625,
      "learning_rate": 0.000167075628617789,
      "loss": 1.0316,
      "step": 5915
    },
    {
      "epoch": 0.3396637787595387,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00016700132336688005,
      "loss": 0.9365,
      "step": 5920
    },
    {
      "epoch": 0.3399506569510586,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001669269509268938,
      "loss": 1.0252,
      "step": 5925
    },
    {
      "epoch": 0.3402375351425785,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00016685251137241113,
      "loss": 1.0248,
      "step": 5930
    },
    {
      "epoch": 0.34052441333409833,
      "grad_norm": 0.2890625,
      "learning_rate": 0.0001667780047780801,
      "loss": 0.954,
      "step": 5935
    },
    {
      "epoch": 0.34081129152561823,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00016670343121861613,
      "loss": 0.9679,
      "step": 5940
    },
    {
      "epoch": 0.3410981697171381,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00016662879076880178,
      "loss": 1.0479,
      "step": 5945
    },
    {
      "epoch": 0.341385047908658,
      "grad_norm": 0.302734375,
      "learning_rate": 0.00016655408350348664,
      "loss": 0.8797,
      "step": 5950
    },
    {
      "epoch": 0.3416719261001779,
      "grad_norm": 0.55859375,
      "learning_rate": 0.0001664793094975873,
      "loss": 0.9549,
      "step": 5955
    },
    {
      "epoch": 0.3419588042916977,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00016640446882608737,
      "loss": 0.9245,
      "step": 5960
    },
    {
      "epoch": 0.34224568248321763,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00016632956156403716,
      "loss": 1.0159,
      "step": 5965
    },
    {
      "epoch": 0.34253256067473753,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00016625458778655387,
      "loss": 0.9792,
      "step": 5970
    },
    {
      "epoch": 0.3428194388662574,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00016617954756882144,
      "loss": 1.0467,
      "step": 5975
    },
    {
      "epoch": 0.3431063170577773,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00016610444098609026,
      "loss": 0.9083,
      "step": 5980
    },
    {
      "epoch": 0.3433931952492971,
      "grad_norm": 0.28125,
      "learning_rate": 0.00016602926811367744,
      "loss": 0.9455,
      "step": 5985
    },
    {
      "epoch": 0.343680073440817,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00016595402902696646,
      "loss": 0.973,
      "step": 5990
    },
    {
      "epoch": 0.3439669516323369,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001658787238014073,
      "loss": 1.0536,
      "step": 5995
    },
    {
      "epoch": 0.34425382982385677,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00016580335251251623,
      "loss": 0.984,
      "step": 6000
    },
    {
      "epoch": 0.3445407080153767,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00016572791523587562,
      "loss": 0.9788,
      "step": 6005
    },
    {
      "epoch": 0.3448275862068966,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00016565241204713428,
      "loss": 1.0371,
      "step": 6010
    },
    {
      "epoch": 0.3451144643984164,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001655768430220069,
      "loss": 0.9485,
      "step": 6015
    },
    {
      "epoch": 0.3454013425899363,
      "grad_norm": 0.318359375,
      "learning_rate": 0.0001655012082362743,
      "loss": 0.8763,
      "step": 6020
    },
    {
      "epoch": 0.34568822078145617,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00016542550776578322,
      "loss": 0.9322,
      "step": 6025
    },
    {
      "epoch": 0.34597509897297607,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00016534974168644625,
      "loss": 0.9894,
      "step": 6030
    },
    {
      "epoch": 0.34626197716449597,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001652739100742417,
      "loss": 1.0145,
      "step": 6035
    },
    {
      "epoch": 0.3465488553560158,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00016519801300521385,
      "loss": 1.0105,
      "step": 6040
    },
    {
      "epoch": 0.3468357335475357,
      "grad_norm": 0.294921875,
      "learning_rate": 0.0001651220505554723,
      "loss": 0.9631,
      "step": 6045
    },
    {
      "epoch": 0.3471226117390556,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00016504602280119243,
      "loss": 0.9493,
      "step": 6050
    },
    {
      "epoch": 0.34740948993057547,
      "grad_norm": 0.298828125,
      "learning_rate": 0.0001649699298186151,
      "loss": 0.9545,
      "step": 6055
    },
    {
      "epoch": 0.34769636812209537,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001648937716840464,
      "loss": 0.9363,
      "step": 6060
    },
    {
      "epoch": 0.3479832463136152,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00016481754847385793,
      "loss": 1.0452,
      "step": 6065
    },
    {
      "epoch": 0.3482701245051351,
      "grad_norm": 0.265625,
      "learning_rate": 0.00016474126026448652,
      "loss": 0.9987,
      "step": 6070
    },
    {
      "epoch": 0.348557002696655,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00016466490713243416,
      "loss": 0.8785,
      "step": 6075
    },
    {
      "epoch": 0.34884388088817486,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00016458848915426792,
      "loss": 0.9639,
      "step": 6080
    },
    {
      "epoch": 0.34913075907969476,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00016451200640661993,
      "loss": 1.0097,
      "step": 6085
    },
    {
      "epoch": 0.34941763727121466,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00016443545896618723,
      "loss": 0.9978,
      "step": 6090
    },
    {
      "epoch": 0.3497045154627345,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001643588469097318,
      "loss": 1.0066,
      "step": 6095
    },
    {
      "epoch": 0.3499913936542544,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00016428217031408038,
      "loss": 0.9327,
      "step": 6100
    },
    {
      "epoch": 0.35027827184577426,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001642054292561244,
      "loss": 0.9271,
      "step": 6105
    },
    {
      "epoch": 0.35056515003729416,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00016412862381282004,
      "loss": 1.0217,
      "step": 6110
    },
    {
      "epoch": 0.35085202822881406,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00016405175406118786,
      "loss": 1.0257,
      "step": 6115
    },
    {
      "epoch": 0.3511389064203339,
      "grad_norm": 0.265625,
      "learning_rate": 0.00016397482007831312,
      "loss": 0.9198,
      "step": 6120
    },
    {
      "epoch": 0.3514257846118538,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00016389782194134534,
      "loss": 0.9217,
      "step": 6125
    },
    {
      "epoch": 0.3517126628033737,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00016382075972749843,
      "loss": 0.9233,
      "step": 6130
    },
    {
      "epoch": 0.35199954099489356,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00016374363351405054,
      "loss": 0.9766,
      "step": 6135
    },
    {
      "epoch": 0.35228641918641346,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00016366644337834405,
      "loss": 0.9753,
      "step": 6140
    },
    {
      "epoch": 0.3525732973779333,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00016358918939778536,
      "loss": 0.9589,
      "step": 6145
    },
    {
      "epoch": 0.3528601755694532,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00016351187164984494,
      "loss": 0.9289,
      "step": 6150
    },
    {
      "epoch": 0.3531470537609731,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00016343449021205726,
      "loss": 0.9931,
      "step": 6155
    },
    {
      "epoch": 0.35343393195249295,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00016335704516202051,
      "loss": 0.9516,
      "step": 6160
    },
    {
      "epoch": 0.35372081014401285,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00016327953657739678,
      "loss": 0.971,
      "step": 6165
    },
    {
      "epoch": 0.35400768833553276,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001632019645359119,
      "loss": 1.0291,
      "step": 6170
    },
    {
      "epoch": 0.3542945665270526,
      "grad_norm": 0.298828125,
      "learning_rate": 0.00016312432911535528,
      "loss": 0.9575,
      "step": 6175
    },
    {
      "epoch": 0.3545814447185725,
      "grad_norm": 0.345703125,
      "learning_rate": 0.00016304663039357986,
      "loss": 1.0393,
      "step": 6180
    },
    {
      "epoch": 0.35486832291009235,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001629688684485021,
      "loss": 0.9207,
      "step": 6185
    },
    {
      "epoch": 0.35515520110161225,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00016289104335810185,
      "loss": 0.9745,
      "step": 6190
    },
    {
      "epoch": 0.35544207929313215,
      "grad_norm": 0.265625,
      "learning_rate": 0.00016281315520042233,
      "loss": 0.9181,
      "step": 6195
    },
    {
      "epoch": 0.355728957484652,
      "grad_norm": 0.25,
      "learning_rate": 0.0001627352040535699,
      "loss": 0.8665,
      "step": 6200
    },
    {
      "epoch": 0.3560158356761719,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00016265718999571415,
      "loss": 0.9876,
      "step": 6205
    },
    {
      "epoch": 0.3563027138676918,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001625791131050878,
      "loss": 0.9732,
      "step": 6210
    },
    {
      "epoch": 0.35658959205921165,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001625009734599865,
      "loss": 0.9622,
      "step": 6215
    },
    {
      "epoch": 0.35687647025073155,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00016242277113876887,
      "loss": 0.9373,
      "step": 6220
    },
    {
      "epoch": 0.3571633484422514,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00016234450621985635,
      "loss": 0.9816,
      "step": 6225
    },
    {
      "epoch": 0.3574502266337713,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00016226617878173317,
      "loss": 1.0049,
      "step": 6230
    },
    {
      "epoch": 0.3577371048252912,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00016218778890294636,
      "loss": 0.9598,
      "step": 6235
    },
    {
      "epoch": 0.35802398301681104,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00016210933666210533,
      "loss": 0.9202,
      "step": 6240
    },
    {
      "epoch": 0.35831086120833094,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001620308221378822,
      "loss": 0.9408,
      "step": 6245
    },
    {
      "epoch": 0.35859773939985085,
      "grad_norm": 0.302734375,
      "learning_rate": 0.00016195224540901156,
      "loss": 0.9816,
      "step": 6250
    },
    {
      "epoch": 0.3588846175913707,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00016187360655429034,
      "loss": 0.994,
      "step": 6255
    },
    {
      "epoch": 0.3591714957828906,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001617949056525777,
      "loss": 0.9918,
      "step": 6260
    },
    {
      "epoch": 0.35945837397441044,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001617161427827951,
      "loss": 0.9715,
      "step": 6265
    },
    {
      "epoch": 0.35974525216593034,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001616373180239261,
      "loss": 0.9482,
      "step": 6270
    },
    {
      "epoch": 0.36003213035745024,
      "grad_norm": 0.310546875,
      "learning_rate": 0.0001615584314550164,
      "loss": 0.9451,
      "step": 6275
    },
    {
      "epoch": 0.3603190085489701,
      "grad_norm": 0.28125,
      "learning_rate": 0.00016147948315517357,
      "loss": 0.9452,
      "step": 6280
    },
    {
      "epoch": 0.36060588674049,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00016140047320356723,
      "loss": 0.9466,
      "step": 6285
    },
    {
      "epoch": 0.3608927649320099,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00016132140167942862,
      "loss": 0.9951,
      "step": 6290
    },
    {
      "epoch": 0.36117964312352974,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001612422686620509,
      "loss": 0.8864,
      "step": 6295
    },
    {
      "epoch": 0.36146652131504964,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001611630742307889,
      "loss": 0.9521,
      "step": 6300
    },
    {
      "epoch": 0.3617533995065695,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00016108381846505885,
      "loss": 0.9414,
      "step": 6305
    },
    {
      "epoch": 0.3620402776980894,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001610045014443387,
      "loss": 0.9188,
      "step": 6310
    },
    {
      "epoch": 0.3623271558896093,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00016092512324816772,
      "loss": 0.9346,
      "step": 6315
    },
    {
      "epoch": 0.36261403408112913,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00016084568395614648,
      "loss": 0.9766,
      "step": 6320
    },
    {
      "epoch": 0.36290091227264903,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00016076618364793696,
      "loss": 0.9934,
      "step": 6325
    },
    {
      "epoch": 0.36318779046416894,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001606866224032622,
      "loss": 0.9688,
      "step": 6330
    },
    {
      "epoch": 0.3634746686556888,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001606070003019064,
      "loss": 0.8889,
      "step": 6335
    },
    {
      "epoch": 0.3637615468472087,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00016052731742371485,
      "loss": 0.9842,
      "step": 6340
    },
    {
      "epoch": 0.36404842503872853,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00016044757384859365,
      "loss": 0.9755,
      "step": 6345
    },
    {
      "epoch": 0.36433530323024843,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001603677696565098,
      "loss": 0.8822,
      "step": 6350
    },
    {
      "epoch": 0.36462218142176833,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00016028790492749118,
      "loss": 0.8748,
      "step": 6355
    },
    {
      "epoch": 0.3649090596132882,
      "grad_norm": 0.35546875,
      "learning_rate": 0.00016020797974162636,
      "loss": 0.9934,
      "step": 6360
    },
    {
      "epoch": 0.3651959378048081,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001601279941790644,
      "loss": 0.9297,
      "step": 6365
    },
    {
      "epoch": 0.365482815996328,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00016004794832001507,
      "loss": 0.9782,
      "step": 6370
    },
    {
      "epoch": 0.3657696941878478,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001599678422447485,
      "loss": 0.9823,
      "step": 6375
    },
    {
      "epoch": 0.36605657237936773,
      "grad_norm": 0.265625,
      "learning_rate": 0.00015988767603359526,
      "loss": 1.0366,
      "step": 6380
    },
    {
      "epoch": 0.3663434505708876,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015980744976694622,
      "loss": 0.9987,
      "step": 6385
    },
    {
      "epoch": 0.3666303287624075,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015972716352525242,
      "loss": 0.9615,
      "step": 6390
    },
    {
      "epoch": 0.3669172069539274,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001596468173890251,
      "loss": 0.9545,
      "step": 6395
    },
    {
      "epoch": 0.3672040851454472,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001595664114388356,
      "loss": 0.922,
      "step": 6400
    },
    {
      "epoch": 0.3674909633369671,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015948594575531508,
      "loss": 0.9663,
      "step": 6405
    },
    {
      "epoch": 0.367777841528487,
      "grad_norm": 0.25,
      "learning_rate": 0.00015940542041915478,
      "loss": 0.9883,
      "step": 6410
    },
    {
      "epoch": 0.36806471972000687,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00015932483551110572,
      "loss": 0.9278,
      "step": 6415
    },
    {
      "epoch": 0.3683515979115268,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00015924419111197852,
      "loss": 0.9985,
      "step": 6420
    },
    {
      "epoch": 0.3686384761030466,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00015916348730264367,
      "loss": 0.9238,
      "step": 6425
    },
    {
      "epoch": 0.3689253542945665,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015908272416403105,
      "loss": 0.9527,
      "step": 6430
    },
    {
      "epoch": 0.3692122324860864,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00015900190177713016,
      "loss": 0.9884,
      "step": 6435
    },
    {
      "epoch": 0.36949911067760627,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015892102022298986,
      "loss": 0.9004,
      "step": 6440
    },
    {
      "epoch": 0.36978598886912617,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001588400795827184,
      "loss": 0.9567,
      "step": 6445
    },
    {
      "epoch": 0.37007286706064607,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015875907993748314,
      "loss": 0.9362,
      "step": 6450
    },
    {
      "epoch": 0.3703597452521659,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001586780213685108,
      "loss": 0.9851,
      "step": 6455
    },
    {
      "epoch": 0.3706466234436858,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00015859690395708702,
      "loss": 0.9376,
      "step": 6460
    },
    {
      "epoch": 0.37093350163520566,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00015851572778455657,
      "loss": 0.9481,
      "step": 6465
    },
    {
      "epoch": 0.37122037982672557,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015843449293232307,
      "loss": 0.9841,
      "step": 6470
    },
    {
      "epoch": 0.37150725801824547,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00015835319948184903,
      "loss": 0.962,
      "step": 6475
    },
    {
      "epoch": 0.3717941362097653,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.0001582718475146557,
      "loss": 0.9342,
      "step": 6480
    },
    {
      "epoch": 0.3720810144012852,
      "grad_norm": 0.265625,
      "learning_rate": 0.000158190437112323,
      "loss": 0.9377,
      "step": 6485
    },
    {
      "epoch": 0.3723678925928051,
      "grad_norm": 0.296875,
      "learning_rate": 0.00015810896835648952,
      "loss": 0.9895,
      "step": 6490
    },
    {
      "epoch": 0.37265477078432496,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00015802744132885227,
      "loss": 1.0321,
      "step": 6495
    },
    {
      "epoch": 0.37294164897584486,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001579458561111667,
      "loss": 0.9234,
      "step": 6500
    },
    {
      "epoch": 0.3732285271673647,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001578642127852467,
      "loss": 0.9092,
      "step": 6505
    },
    {
      "epoch": 0.3735154053588846,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00015778251143296437,
      "loss": 0.9752,
      "step": 6510
    },
    {
      "epoch": 0.3738022835504045,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00015770075213625,
      "loss": 0.9296,
      "step": 6515
    },
    {
      "epoch": 0.37408916174192436,
      "grad_norm": 0.2578125,
      "learning_rate": 0.000157618934977092,
      "loss": 0.9304,
      "step": 6520
    },
    {
      "epoch": 0.37437603993344426,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00015753706003753678,
      "loss": 1.0235,
      "step": 6525
    },
    {
      "epoch": 0.37466291812496416,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00015745512739968878,
      "loss": 0.9754,
      "step": 6530
    },
    {
      "epoch": 0.374949796316484,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015737313714571017,
      "loss": 0.9551,
      "step": 6535
    },
    {
      "epoch": 0.3752366745080039,
      "grad_norm": 0.265625,
      "learning_rate": 0.00015729108935782094,
      "loss": 0.9059,
      "step": 6540
    },
    {
      "epoch": 0.37552355269952375,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00015720898411829889,
      "loss": 0.9985,
      "step": 6545
    },
    {
      "epoch": 0.37581043089104366,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015712682150947923,
      "loss": 0.9303,
      "step": 6550
    },
    {
      "epoch": 0.37609730908256356,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001570446016137549,
      "loss": 0.9074,
      "step": 6555
    },
    {
      "epoch": 0.3763841872740834,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015696232451357616,
      "loss": 0.9487,
      "step": 6560
    },
    {
      "epoch": 0.3766710654656033,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001568799902914506,
      "loss": 1.0237,
      "step": 6565
    },
    {
      "epoch": 0.3769579436571232,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00015679759902994332,
      "loss": 0.9194,
      "step": 6570
    },
    {
      "epoch": 0.37724482184864305,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001567151508116763,
      "loss": 0.9603,
      "step": 6575
    },
    {
      "epoch": 0.37753170004016295,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015663264571932892,
      "loss": 0.9517,
      "step": 6580
    },
    {
      "epoch": 0.3778185782316828,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001565500838356374,
      "loss": 0.9255,
      "step": 6585
    },
    {
      "epoch": 0.3781054564232027,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00015646746524339497,
      "loss": 1.0131,
      "step": 6590
    },
    {
      "epoch": 0.3783923346147226,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00015638479002545182,
      "loss": 0.946,
      "step": 6595
    },
    {
      "epoch": 0.37867921280624245,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015630205826471478,
      "loss": 1.008,
      "step": 6600
    },
    {
      "epoch": 0.37896609099776235,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015621927004414747,
      "loss": 1.0618,
      "step": 6605
    },
    {
      "epoch": 0.37925296918928225,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001561364254467701,
      "loss": 0.9527,
      "step": 6610
    },
    {
      "epoch": 0.3795398473808021,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00015605352455565937,
      "loss": 0.9921,
      "step": 6615
    },
    {
      "epoch": 0.379826725572322,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001559705674539486,
      "loss": 1.021,
      "step": 6620
    },
    {
      "epoch": 0.38011360376384185,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001558875542248272,
      "loss": 0.9598,
      "step": 6625
    },
    {
      "epoch": 0.38040048195536175,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001558044849515411,
      "loss": 0.9692,
      "step": 6630
    },
    {
      "epoch": 0.38068736014688165,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00015572135971739242,
      "loss": 0.9218,
      "step": 6635
    },
    {
      "epoch": 0.3809742383384015,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001556381786057392,
      "loss": 0.926,
      "step": 6640
    },
    {
      "epoch": 0.3812611165299214,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015555494169999578,
      "loss": 0.935,
      "step": 6645
    },
    {
      "epoch": 0.3815479947214413,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00015547164908363224,
      "loss": 0.9532,
      "step": 6650
    },
    {
      "epoch": 0.38183487291296114,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015538830084017456,
      "loss": 0.968,
      "step": 6655
    },
    {
      "epoch": 0.38212175110448104,
      "grad_norm": 0.28125,
      "learning_rate": 0.00015530489705320463,
      "loss": 0.9956,
      "step": 6660
    },
    {
      "epoch": 0.3824086292960009,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001552214378063599,
      "loss": 1.0108,
      "step": 6665
    },
    {
      "epoch": 0.3826955074875208,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001551379231833335,
      "loss": 0.9581,
      "step": 6670
    },
    {
      "epoch": 0.3829823856790407,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00015505435326787414,
      "loss": 1.0014,
      "step": 6675
    },
    {
      "epoch": 0.38326926387056054,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00015497072814378584,
      "loss": 0.9081,
      "step": 6680
    },
    {
      "epoch": 0.38355614206208044,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001548870478949281,
      "loss": 0.9399,
      "step": 6685
    },
    {
      "epoch": 0.38384302025360034,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015480331260521565,
      "loss": 0.9639,
      "step": 6690
    },
    {
      "epoch": 0.3841298984451202,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015471952235861843,
      "loss": 0.9984,
      "step": 6695
    },
    {
      "epoch": 0.3844167766366401,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001546356772391615,
      "loss": 0.9647,
      "step": 6700
    },
    {
      "epoch": 0.38470365482816,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001545517773309249,
      "loss": 0.9908,
      "step": 6705
    },
    {
      "epoch": 0.38499053301967984,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00015446782271804366,
      "loss": 0.9489,
      "step": 6710
    },
    {
      "epoch": 0.38527741121119974,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00015438381348470767,
      "loss": 0.9667,
      "step": 6715
    },
    {
      "epoch": 0.3855642894027196,
      "grad_norm": 0.28125,
      "learning_rate": 0.00015429974971516156,
      "loss": 0.9159,
      "step": 6720
    },
    {
      "epoch": 0.3858511675942395,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001542156314937047,
      "loss": 0.8806,
      "step": 6725
    },
    {
      "epoch": 0.3861380457857594,
      "grad_norm": 0.271484375,
      "learning_rate": 0.000154131458904691,
      "loss": 0.9385,
      "step": 6730
    },
    {
      "epoch": 0.38642492397727923,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00015404723203252894,
      "loss": 0.9425,
      "step": 6735
    },
    {
      "epoch": 0.38671180216879913,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001539629509616814,
      "loss": 1.0188,
      "step": 6740
    },
    {
      "epoch": 0.38699868036031904,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00015387861577666559,
      "loss": 0.9811,
      "step": 6745
    },
    {
      "epoch": 0.3872855585518389,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015379422656205307,
      "loss": 0.956,
      "step": 6750
    },
    {
      "epoch": 0.3875724367433588,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00015370978340246955,
      "loss": 0.9814,
      "step": 6755
    },
    {
      "epoch": 0.38785931493487863,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015362528638259478,
      "loss": 0.9368,
      "step": 6760
    },
    {
      "epoch": 0.38814619312639853,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001535407355871626,
      "loss": 0.8959,
      "step": 6765
    },
    {
      "epoch": 0.38843307131791843,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015345613110096068,
      "loss": 0.8967,
      "step": 6770
    },
    {
      "epoch": 0.3887199495094383,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015337147300883066,
      "loss": 1.0397,
      "step": 6775
    },
    {
      "epoch": 0.3890068277009582,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001532867613956678,
      "loss": 0.9463,
      "step": 6780
    },
    {
      "epoch": 0.3892937058924781,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001532019963464211,
      "loss": 0.9226,
      "step": 6785
    },
    {
      "epoch": 0.3895805840839979,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00015311717794609325,
      "loss": 0.9226,
      "step": 6790
    },
    {
      "epoch": 0.38986746227551783,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001530323062797402,
      "loss": 0.9181,
      "step": 6795
    },
    {
      "epoch": 0.3901543404670377,
      "grad_norm": 0.30859375,
      "learning_rate": 0.00015294738143247148,
      "loss": 0.9984,
      "step": 6800
    },
    {
      "epoch": 0.3904412186585576,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00015286240348944997,
      "loss": 0.9387,
      "step": 6805
    },
    {
      "epoch": 0.3907280968500775,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00015277737253589164,
      "loss": 0.956,
      "step": 6810
    },
    {
      "epoch": 0.3910149750415973,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00015269228865706584,
      "loss": 1.0067,
      "step": 6815
    },
    {
      "epoch": 0.3913018532331172,
      "grad_norm": 0.3125,
      "learning_rate": 0.0001526071519382948,
      "loss": 0.9523,
      "step": 6820
    },
    {
      "epoch": 0.3915887314246371,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015252196246495382,
      "loss": 0.9101,
      "step": 6825
    },
    {
      "epoch": 0.391875609616157,
      "grad_norm": 0.3203125,
      "learning_rate": 0.00015243672032247112,
      "loss": 1.0325,
      "step": 6830
    },
    {
      "epoch": 0.3921624878076769,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015235142559632766,
      "loss": 0.9415,
      "step": 6835
    },
    {
      "epoch": 0.3924493659991967,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00015226607837205727,
      "loss": 0.9169,
      "step": 6840
    },
    {
      "epoch": 0.3927362441907166,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00015218067873524625,
      "loss": 0.9431,
      "step": 6845
    },
    {
      "epoch": 0.3930231223822365,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00015209522677153364,
      "loss": 0.9736,
      "step": 6850
    },
    {
      "epoch": 0.39331000057375637,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00015200972256661075,
      "loss": 1.0248,
      "step": 6855
    },
    {
      "epoch": 0.39359687876527627,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00015192416620622145,
      "loss": 1.0525,
      "step": 6860
    },
    {
      "epoch": 0.39388375695679617,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00015183855777616188,
      "loss": 1.0149,
      "step": 6865
    },
    {
      "epoch": 0.394170635148316,
      "grad_norm": 0.3359375,
      "learning_rate": 0.0001517528973622803,
      "loss": 0.9726,
      "step": 6870
    },
    {
      "epoch": 0.3944575133398359,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00015166718505047722,
      "loss": 0.9891,
      "step": 6875
    },
    {
      "epoch": 0.39474439153135576,
      "grad_norm": 0.2890625,
      "learning_rate": 0.0001515814209267051,
      "loss": 1.0325,
      "step": 6880
    },
    {
      "epoch": 0.39503126972287567,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00015149560507696837,
      "loss": 0.9378,
      "step": 6885
    },
    {
      "epoch": 0.39531814791439557,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00015140973758732347,
      "loss": 0.972,
      "step": 6890
    },
    {
      "epoch": 0.3956050261059154,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001513238185438784,
      "loss": 0.8973,
      "step": 6895
    },
    {
      "epoch": 0.3958919042974353,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00015123784803279302,
      "loss": 0.9166,
      "step": 6900
    },
    {
      "epoch": 0.3961787824889552,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00015115182614027872,
      "loss": 1.0208,
      "step": 6905
    },
    {
      "epoch": 0.39646566068047506,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00015106575295259847,
      "loss": 0.891,
      "step": 6910
    },
    {
      "epoch": 0.39675253887199496,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00015097962855606663,
      "loss": 0.9247,
      "step": 6915
    },
    {
      "epoch": 0.3970394170635148,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00015089345303704902,
      "loss": 0.9241,
      "step": 6920
    },
    {
      "epoch": 0.3973262952550347,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00015080722648196253,
      "loss": 1.0022,
      "step": 6925
    },
    {
      "epoch": 0.3976131734465546,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001507209489772754,
      "loss": 0.8877,
      "step": 6930
    },
    {
      "epoch": 0.39790005163807446,
      "grad_norm": 0.29296875,
      "learning_rate": 0.0001506346206095069,
      "loss": 1.0253,
      "step": 6935
    },
    {
      "epoch": 0.39818692982959436,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001505482414652273,
      "loss": 1.005,
      "step": 6940
    },
    {
      "epoch": 0.39847380802111426,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00015046181163105786,
      "loss": 0.9344,
      "step": 6945
    },
    {
      "epoch": 0.3987606862126341,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00015037533119367053,
      "loss": 1.0191,
      "step": 6950
    },
    {
      "epoch": 0.399047564404154,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001502888002397881,
      "loss": 0.8942,
      "step": 6955
    },
    {
      "epoch": 0.39933444259567386,
      "grad_norm": 0.28125,
      "learning_rate": 0.00015020221885618407,
      "loss": 1.0012,
      "step": 6960
    },
    {
      "epoch": 0.39962132078719376,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00015011558712968234,
      "loss": 0.974,
      "step": 6965
    },
    {
      "epoch": 0.39990819897871366,
      "grad_norm": 0.3046875,
      "learning_rate": 0.0001500289051471575,
      "loss": 0.9002,
      "step": 6970
    },
    {
      "epoch": 0.4001950771702335,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001499421729955344,
      "loss": 0.9575,
      "step": 6975
    },
    {
      "epoch": 0.4004819553617534,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001498553907617882,
      "loss": 1.0227,
      "step": 6980
    },
    {
      "epoch": 0.4007688335532733,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014976855853294436,
      "loss": 0.9818,
      "step": 6985
    },
    {
      "epoch": 0.40105571174479315,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00014968167639607845,
      "loss": 1.0078,
      "step": 6990
    },
    {
      "epoch": 0.40134258993631305,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00014959474443831597,
      "loss": 0.9494,
      "step": 6995
    },
    {
      "epoch": 0.4016294681278329,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00014950776274683266,
      "loss": 0.9803,
      "step": 7000
    },
    {
      "epoch": 0.4019163463193528,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00014942073140885377,
      "loss": 0.9356,
      "step": 7005
    },
    {
      "epoch": 0.4022032245108727,
      "grad_norm": 0.294921875,
      "learning_rate": 0.0001493336505116546,
      "loss": 0.9966,
      "step": 7010
    },
    {
      "epoch": 0.40249010270239255,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00014924652014256014,
      "loss": 0.9121,
      "step": 7015
    },
    {
      "epoch": 0.40277698089391245,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001491593403889448,
      "loss": 1.0084,
      "step": 7020
    },
    {
      "epoch": 0.40306385908543235,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00014907211133823273,
      "loss": 0.9533,
      "step": 7025
    },
    {
      "epoch": 0.4033507372769522,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001489848330778973,
      "loss": 0.8849,
      "step": 7030
    },
    {
      "epoch": 0.4036376154684721,
      "grad_norm": 0.2890625,
      "learning_rate": 0.0001488975056954615,
      "loss": 0.9374,
      "step": 7035
    },
    {
      "epoch": 0.40392449365999195,
      "grad_norm": 0.265625,
      "learning_rate": 0.00014881012927849728,
      "loss": 0.9389,
      "step": 7040
    },
    {
      "epoch": 0.40421137185151185,
      "grad_norm": 0.267578125,
      "learning_rate": 0.000148722703914626,
      "loss": 0.9596,
      "step": 7045
    },
    {
      "epoch": 0.40449825004303175,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014863522969151796,
      "loss": 0.9214,
      "step": 7050
    },
    {
      "epoch": 0.4047851282345516,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00014854770669689253,
      "loss": 0.9462,
      "step": 7055
    },
    {
      "epoch": 0.4050720064260715,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00014846013501851796,
      "loss": 1.0052,
      "step": 7060
    },
    {
      "epoch": 0.4053588846175914,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014837251474421133,
      "loss": 0.9172,
      "step": 7065
    },
    {
      "epoch": 0.40564576280911124,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014828484596183844,
      "loss": 0.9318,
      "step": 7070
    },
    {
      "epoch": 0.40593264100063114,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001481971287593138,
      "loss": 0.9865,
      "step": 7075
    },
    {
      "epoch": 0.406219519192151,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001481093632246003,
      "loss": 0.9302,
      "step": 7080
    },
    {
      "epoch": 0.4065063973836709,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00014802154944570952,
      "loss": 0.9485,
      "step": 7085
    },
    {
      "epoch": 0.4067932755751908,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00014793368751070125,
      "loss": 0.9007,
      "step": 7090
    },
    {
      "epoch": 0.40708015376671064,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00014784577750768363,
      "loss": 0.9349,
      "step": 7095
    },
    {
      "epoch": 0.40736703195823054,
      "grad_norm": 0.25,
      "learning_rate": 0.0001477578195248131,
      "loss": 0.9483,
      "step": 7100
    },
    {
      "epoch": 0.40765391014975044,
      "grad_norm": 0.27734375,
      "learning_rate": 0.000147669813650294,
      "loss": 1.0298,
      "step": 7105
    },
    {
      "epoch": 0.4079407883412703,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001475817599723789,
      "loss": 1.0117,
      "step": 7110
    },
    {
      "epoch": 0.4082276665327902,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00014749365857936824,
      "loss": 0.9019,
      "step": 7115
    },
    {
      "epoch": 0.40851454472431004,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00014740550955961022,
      "loss": 0.9884,
      "step": 7120
    },
    {
      "epoch": 0.40880142291582994,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001473173130015009,
      "loss": 0.9628,
      "step": 7125
    },
    {
      "epoch": 0.40908830110734984,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00014722906899348402,
      "loss": 0.913,
      "step": 7130
    },
    {
      "epoch": 0.4093751792988697,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00014714077762405085,
      "loss": 0.9841,
      "step": 7135
    },
    {
      "epoch": 0.4096620574903896,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00014705243898174017,
      "loss": 1.0012,
      "step": 7140
    },
    {
      "epoch": 0.4099489356819095,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00014696405315513814,
      "loss": 0.9609,
      "step": 7145
    },
    {
      "epoch": 0.41023581387342933,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00014687562023287833,
      "loss": 0.9618,
      "step": 7150
    },
    {
      "epoch": 0.41052269206494924,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00014678714030364143,
      "loss": 0.9723,
      "step": 7155
    },
    {
      "epoch": 0.4108095702564691,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00014669861345615532,
      "loss": 0.9298,
      "step": 7160
    },
    {
      "epoch": 0.411096448447989,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00014661003977919492,
      "loss": 0.9379,
      "step": 7165
    },
    {
      "epoch": 0.4113833266395089,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001465214193615821,
      "loss": 0.9979,
      "step": 7170
    },
    {
      "epoch": 0.41167020483102873,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00014643275229218563,
      "loss": 0.9358,
      "step": 7175
    },
    {
      "epoch": 0.41195708302254863,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014634403865992107,
      "loss": 0.9752,
      "step": 7180
    },
    {
      "epoch": 0.41224396121406853,
      "grad_norm": 0.283203125,
      "learning_rate": 0.0001462552785537506,
      "loss": 0.9811,
      "step": 7185
    },
    {
      "epoch": 0.4125308394055884,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00014616647206268306,
      "loss": 0.9357,
      "step": 7190
    },
    {
      "epoch": 0.4128177175971083,
      "grad_norm": 0.3046875,
      "learning_rate": 0.0001460776192757738,
      "loss": 0.9227,
      "step": 7195
    },
    {
      "epoch": 0.4131045957886281,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00014598872028212463,
      "loss": 0.9438,
      "step": 7200
    },
    {
      "epoch": 0.413391473980148,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00014589977517088365,
      "loss": 0.976,
      "step": 7205
    },
    {
      "epoch": 0.41367835217166793,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001458107840312452,
      "loss": 0.9441,
      "step": 7210
    },
    {
      "epoch": 0.4139652303631878,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00014572174695244976,
      "loss": 0.9262,
      "step": 7215
    },
    {
      "epoch": 0.4142521085547077,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.000145632664023784,
      "loss": 1.0197,
      "step": 7220
    },
    {
      "epoch": 0.4145389867462276,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00014554353533458042,
      "loss": 0.9544,
      "step": 7225
    },
    {
      "epoch": 0.4148258649377474,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00014545436097421744,
      "loss": 0.9347,
      "step": 7230
    },
    {
      "epoch": 0.4151127431292673,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001453651410321194,
      "loss": 0.9956,
      "step": 7235
    },
    {
      "epoch": 0.41539962132078717,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00014527587559775616,
      "loss": 0.9399,
      "step": 7240
    },
    {
      "epoch": 0.4156864995123071,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001451865647606434,
      "loss": 0.9771,
      "step": 7245
    },
    {
      "epoch": 0.415973377703827,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00014509720861034212,
      "loss": 0.9387,
      "step": 7250
    },
    {
      "epoch": 0.4162602558953468,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00014500780723645897,
      "loss": 0.9079,
      "step": 7255
    },
    {
      "epoch": 0.4165471340868667,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00014491836072864578,
      "loss": 1.007,
      "step": 7260
    },
    {
      "epoch": 0.4168340122783866,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001448288691765997,
      "loss": 0.9495,
      "step": 7265
    },
    {
      "epoch": 0.41712089046990647,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001447393326700631,
      "loss": 0.8996,
      "step": 7270
    },
    {
      "epoch": 0.41740776866142637,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001446497512988234,
      "loss": 0.9924,
      "step": 7275
    },
    {
      "epoch": 0.4176946468529462,
      "grad_norm": 0.423828125,
      "learning_rate": 0.00014456012515271294,
      "loss": 0.9394,
      "step": 7280
    },
    {
      "epoch": 0.4179815250444661,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001444704543216091,
      "loss": 0.8852,
      "step": 7285
    },
    {
      "epoch": 0.418268403235986,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001443807388954339,
      "loss": 0.997,
      "step": 7290
    },
    {
      "epoch": 0.41855528142750587,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00014429097896415425,
      "loss": 0.9559,
      "step": 7295
    },
    {
      "epoch": 0.41884215961902577,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00014420117461778155,
      "loss": 0.9138,
      "step": 7300
    },
    {
      "epoch": 0.41912903781054567,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00014411132594637185,
      "loss": 0.9079,
      "step": 7305
    },
    {
      "epoch": 0.4194159160020655,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001440214330400256,
      "loss": 0.9497,
      "step": 7310
    },
    {
      "epoch": 0.4197027941935854,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00014393149598888752,
      "loss": 0.966,
      "step": 7315
    },
    {
      "epoch": 0.41998967238510526,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001438415148831468,
      "loss": 0.9842,
      "step": 7320
    },
    {
      "epoch": 0.42027655057662516,
      "grad_norm": 0.28125,
      "learning_rate": 0.00014375148981303663,
      "loss": 0.9818,
      "step": 7325
    },
    {
      "epoch": 0.42056342876814506,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00014366142086883436,
      "loss": 0.9759,
      "step": 7330
    },
    {
      "epoch": 0.4208503069596649,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00014357130814086135,
      "loss": 0.9687,
      "step": 7335
    },
    {
      "epoch": 0.4211371851511848,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00014348115171948283,
      "loss": 0.9834,
      "step": 7340
    },
    {
      "epoch": 0.4214240633427047,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014339095169510786,
      "loss": 0.9655,
      "step": 7345
    },
    {
      "epoch": 0.42171094153422456,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00014330070815818922,
      "loss": 0.9959,
      "step": 7350
    },
    {
      "epoch": 0.42199781972574446,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00014321042119922337,
      "loss": 0.9207,
      "step": 7355
    },
    {
      "epoch": 0.4222846979172643,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00014312009090875025,
      "loss": 0.9809,
      "step": 7360
    },
    {
      "epoch": 0.4225715761087842,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00014302971737735324,
      "loss": 0.9781,
      "step": 7365
    },
    {
      "epoch": 0.4228584543003041,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001429393006956592,
      "loss": 0.9526,
      "step": 7370
    },
    {
      "epoch": 0.42314533249182396,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001428488409543381,
      "loss": 0.9646,
      "step": 7375
    },
    {
      "epoch": 0.42343221068334386,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001427583382441032,
      "loss": 0.9268,
      "step": 7380
    },
    {
      "epoch": 0.42371908887486376,
      "grad_norm": 0.28125,
      "learning_rate": 0.00014266779265571087,
      "loss": 0.9645,
      "step": 7385
    },
    {
      "epoch": 0.4240059670663836,
      "grad_norm": 0.28125,
      "learning_rate": 0.00014257720427996037,
      "loss": 0.9863,
      "step": 7390
    },
    {
      "epoch": 0.4242928452579035,
      "grad_norm": 0.28125,
      "learning_rate": 0.00014248657320769392,
      "loss": 1.0031,
      "step": 7395
    },
    {
      "epoch": 0.42457972344942335,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014239589952979662,
      "loss": 1.058,
      "step": 7400
    },
    {
      "epoch": 0.42486660164094325,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00014230518333719616,
      "loss": 0.9156,
      "step": 7405
    },
    {
      "epoch": 0.42515347983246315,
      "grad_norm": 0.296875,
      "learning_rate": 0.00014221442472086304,
      "loss": 0.9806,
      "step": 7410
    },
    {
      "epoch": 0.425440358023983,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001421236237718101,
      "loss": 0.9215,
      "step": 7415
    },
    {
      "epoch": 0.4257272362155029,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00014203278058109282,
      "loss": 1.0512,
      "step": 7420
    },
    {
      "epoch": 0.4260141144070228,
      "grad_norm": 0.298828125,
      "learning_rate": 0.0001419418952398089,
      "loss": 1.0363,
      "step": 7425
    },
    {
      "epoch": 0.42630099259854265,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00014185096783909837,
      "loss": 0.9783,
      "step": 7430
    },
    {
      "epoch": 0.42658787079006255,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014175999847014346,
      "loss": 0.9456,
      "step": 7435
    },
    {
      "epoch": 0.4268747489815824,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00014166898722416845,
      "loss": 0.9197,
      "step": 7440
    },
    {
      "epoch": 0.4271616271731023,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00014157793419243962,
      "loss": 0.9843,
      "step": 7445
    },
    {
      "epoch": 0.4274485053646222,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00014148683946626516,
      "loss": 0.8788,
      "step": 7450
    },
    {
      "epoch": 0.42773538355614205,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00014139570313699502,
      "loss": 0.8889,
      "step": 7455
    },
    {
      "epoch": 0.42802226174766195,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00014130452529602096,
      "loss": 0.9524,
      "step": 7460
    },
    {
      "epoch": 0.42830913993918185,
      "grad_norm": 0.265625,
      "learning_rate": 0.00014121330603477633,
      "loss": 0.9771,
      "step": 7465
    },
    {
      "epoch": 0.4285960181307017,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00014112204544473598,
      "loss": 0.9544,
      "step": 7470
    },
    {
      "epoch": 0.4288828963222216,
      "grad_norm": 0.28125,
      "learning_rate": 0.00014103074361741623,
      "loss": 0.9991,
      "step": 7475
    },
    {
      "epoch": 0.42916977451374144,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00014093940064437477,
      "loss": 0.9808,
      "step": 7480
    },
    {
      "epoch": 0.42945665270526134,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001408480166172106,
      "loss": 0.98,
      "step": 7485
    },
    {
      "epoch": 0.42974353089678125,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00014075659162756372,
      "loss": 0.9992,
      "step": 7490
    },
    {
      "epoch": 0.4300304090883011,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00014066512576711536,
      "loss": 0.9738,
      "step": 7495
    },
    {
      "epoch": 0.430317287279821,
      "grad_norm": 0.3125,
      "learning_rate": 0.0001405736191275877,
      "loss": 0.9805,
      "step": 7500
    },
    {
      "epoch": 0.4306041654713409,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00014048207180074383,
      "loss": 0.9712,
      "step": 7505
    },
    {
      "epoch": 0.43089104366286074,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00014039048387838756,
      "loss": 0.9586,
      "step": 7510
    },
    {
      "epoch": 0.43117792185438064,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00014029885545236348,
      "loss": 0.9223,
      "step": 7515
    },
    {
      "epoch": 0.4314648000459005,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00014020718661455678,
      "loss": 0.9958,
      "step": 7520
    },
    {
      "epoch": 0.4317516782374204,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001401154774568932,
      "loss": 0.8981,
      "step": 7525
    },
    {
      "epoch": 0.4320385564289403,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00014002372807133887,
      "loss": 0.9039,
      "step": 7530
    },
    {
      "epoch": 0.43232543462046014,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00013993193854990027,
      "loss": 0.9583,
      "step": 7535
    },
    {
      "epoch": 0.43261231281198004,
      "grad_norm": 0.306640625,
      "learning_rate": 0.00013984010898462416,
      "loss": 0.951,
      "step": 7540
    },
    {
      "epoch": 0.43289919100349994,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00013974823946759742,
      "loss": 0.9705,
      "step": 7545
    },
    {
      "epoch": 0.4331860691950198,
      "grad_norm": 0.25390625,
      "learning_rate": 0.000139656330090947,
      "loss": 0.9752,
      "step": 7550
    },
    {
      "epoch": 0.4334729473865397,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00013956438094683986,
      "loss": 1.0097,
      "step": 7555
    },
    {
      "epoch": 0.43375982557805953,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00013947239212748277,
      "loss": 1.046,
      "step": 7560
    },
    {
      "epoch": 0.43404670376957943,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00013938036372512235,
      "loss": 0.9044,
      "step": 7565
    },
    {
      "epoch": 0.43433358196109934,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001392882958320449,
      "loss": 0.9208,
      "step": 7570
    },
    {
      "epoch": 0.4346204601526192,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00013919618854057626,
      "loss": 0.961,
      "step": 7575
    },
    {
      "epoch": 0.4349073383441391,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00013910404194308188,
      "loss": 0.9867,
      "step": 7580
    },
    {
      "epoch": 0.435194216535659,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00013901185613196654,
      "loss": 0.946,
      "step": 7585
    },
    {
      "epoch": 0.43548109472717883,
      "grad_norm": 0.236328125,
      "learning_rate": 0.00013891963119967439,
      "loss": 0.9274,
      "step": 7590
    },
    {
      "epoch": 0.43576797291869873,
      "grad_norm": 0.265625,
      "learning_rate": 0.00013882736723868884,
      "loss": 0.9327,
      "step": 7595
    },
    {
      "epoch": 0.4360548511102186,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00013873506434153228,
      "loss": 0.9173,
      "step": 7600
    },
    {
      "epoch": 0.4363417293017385,
      "grad_norm": 0.28515625,
      "learning_rate": 0.0001386427226007664,
      "loss": 0.9253,
      "step": 7605
    },
    {
      "epoch": 0.4366286074932584,
      "grad_norm": 0.265625,
      "learning_rate": 0.00013855034210899161,
      "loss": 0.9487,
      "step": 7610
    },
    {
      "epoch": 0.4369154856847782,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00013845792295884735,
      "loss": 0.9742,
      "step": 7615
    },
    {
      "epoch": 0.43720236387629813,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001383654652430117,
      "loss": 0.9517,
      "step": 7620
    },
    {
      "epoch": 0.43748924206781803,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00013827296905420143,
      "loss": 1.016,
      "step": 7625
    },
    {
      "epoch": 0.4377761202593379,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00013818043448517202,
      "loss": 1.0417,
      "step": 7630
    },
    {
      "epoch": 0.4380629984508578,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00013808786162871728,
      "loss": 0.9953,
      "step": 7635
    },
    {
      "epoch": 0.4383498766423776,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00013799525057766948,
      "loss": 1.0229,
      "step": 7640
    },
    {
      "epoch": 0.4386367548338975,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00013790260142489922,
      "loss": 0.9127,
      "step": 7645
    },
    {
      "epoch": 0.4389236330254174,
      "grad_norm": 0.265625,
      "learning_rate": 0.00013780991426331522,
      "loss": 0.9701,
      "step": 7650
    },
    {
      "epoch": 0.43921051121693727,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00013771718918586444,
      "loss": 0.9483,
      "step": 7655
    },
    {
      "epoch": 0.4394973894084572,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00013762442628553179,
      "loss": 0.947,
      "step": 7660
    },
    {
      "epoch": 0.4397842675999771,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00013753162565534004,
      "loss": 0.9881,
      "step": 7665
    },
    {
      "epoch": 0.4400711457914969,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00013743878738834998,
      "loss": 0.9753,
      "step": 7670
    },
    {
      "epoch": 0.4403580239830168,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00013734591157765994,
      "loss": 0.9692,
      "step": 7675
    },
    {
      "epoch": 0.44064490217453667,
      "grad_norm": 0.271484375,
      "learning_rate": 0.000137252998316406,
      "loss": 0.9459,
      "step": 7680
    },
    {
      "epoch": 0.44093178036605657,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00013716004769776189,
      "loss": 0.9521,
      "step": 7685
    },
    {
      "epoch": 0.44121865855757647,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00013706705981493853,
      "loss": 0.9345,
      "step": 7690
    },
    {
      "epoch": 0.4415055367490963,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00013697403476118454,
      "loss": 0.9281,
      "step": 7695
    },
    {
      "epoch": 0.4417924149406162,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00013688097262978555,
      "loss": 0.9223,
      "step": 7700
    },
    {
      "epoch": 0.4420792931321361,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001367878735140645,
      "loss": 0.9012,
      "step": 7705
    },
    {
      "epoch": 0.44236617132365597,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00013669473750738142,
      "loss": 0.97,
      "step": 7710
    },
    {
      "epoch": 0.44265304951517587,
      "grad_norm": 0.2431640625,
      "learning_rate": 0.00013660156470313327,
      "loss": 0.9504,
      "step": 7715
    },
    {
      "epoch": 0.4429399277066957,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00013650835519475395,
      "loss": 1.0034,
      "step": 7720
    },
    {
      "epoch": 0.4432268058982156,
      "grad_norm": 0.298828125,
      "learning_rate": 0.0001364151090757142,
      "loss": 0.9862,
      "step": 7725
    },
    {
      "epoch": 0.4435136840897355,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001363218264395214,
      "loss": 0.9397,
      "step": 7730
    },
    {
      "epoch": 0.44380056228125536,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00013622850737971963,
      "loss": 0.9846,
      "step": 7735
    },
    {
      "epoch": 0.44408744047277526,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00013613515198988938,
      "loss": 1.0255,
      "step": 7740
    },
    {
      "epoch": 0.44437431866429516,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001360417603636477,
      "loss": 0.9535,
      "step": 7745
    },
    {
      "epoch": 0.444661196855815,
      "grad_norm": 0.30859375,
      "learning_rate": 0.0001359483325946479,
      "loss": 0.9717,
      "step": 7750
    },
    {
      "epoch": 0.4449480750473349,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00013585486877657957,
      "loss": 0.9772,
      "step": 7755
    },
    {
      "epoch": 0.44523495323885476,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00013576136900316844,
      "loss": 0.9284,
      "step": 7760
    },
    {
      "epoch": 0.44552183143037466,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00013566783336817627,
      "loss": 1.0055,
      "step": 7765
    },
    {
      "epoch": 0.44580870962189456,
      "grad_norm": 0.28125,
      "learning_rate": 0.00013557426196540083,
      "loss": 0.9923,
      "step": 7770
    },
    {
      "epoch": 0.4460955878134144,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00013548065488867573,
      "loss": 0.9646,
      "step": 7775
    },
    {
      "epoch": 0.4463824660049343,
      "grad_norm": 0.28125,
      "learning_rate": 0.00013538701223187033,
      "loss": 0.9368,
      "step": 7780
    },
    {
      "epoch": 0.4466693441964542,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001352933340888897,
      "loss": 0.9225,
      "step": 7785
    },
    {
      "epoch": 0.44695622238797406,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001351996205536745,
      "loss": 0.9874,
      "step": 7790
    },
    {
      "epoch": 0.44724310057949396,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001351058717202009,
      "loss": 0.9323,
      "step": 7795
    },
    {
      "epoch": 0.4475299787710138,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00013501208768248042,
      "loss": 0.9731,
      "step": 7800
    },
    {
      "epoch": 0.4478168569625337,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001349182685345599,
      "loss": 0.9228,
      "step": 7805
    },
    {
      "epoch": 0.4481037351540536,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00013482441437052134,
      "loss": 1.051,
      "step": 7810
    },
    {
      "epoch": 0.44839061334557345,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00013473052528448201,
      "loss": 0.9272,
      "step": 7815
    },
    {
      "epoch": 0.44867749153709335,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00013463660137059407,
      "loss": 0.9549,
      "step": 7820
    },
    {
      "epoch": 0.44896436972861326,
      "grad_norm": 0.248046875,
      "learning_rate": 0.0001345426427230446,
      "loss": 0.8974,
      "step": 7825
    },
    {
      "epoch": 0.4492512479201331,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001344486494360555,
      "loss": 0.9423,
      "step": 7830
    },
    {
      "epoch": 0.449538126111653,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00013435462160388351,
      "loss": 0.9031,
      "step": 7835
    },
    {
      "epoch": 0.44982500430317285,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00013426055932081997,
      "loss": 0.908,
      "step": 7840
    },
    {
      "epoch": 0.45011188249469275,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00013416646268119074,
      "loss": 1.0104,
      "step": 7845
    },
    {
      "epoch": 0.45039876068621265,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00013407233177935608,
      "loss": 1.0248,
      "step": 7850
    },
    {
      "epoch": 0.4506856388777325,
      "grad_norm": 0.296875,
      "learning_rate": 0.00013397816670971072,
      "loss": 0.9396,
      "step": 7855
    },
    {
      "epoch": 0.4509725170692524,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00013388396756668354,
      "loss": 0.907,
      "step": 7860
    },
    {
      "epoch": 0.4512593952607723,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00013378973444473776,
      "loss": 0.9295,
      "step": 7865
    },
    {
      "epoch": 0.45154627345229215,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001336954674383705,
      "loss": 0.9777,
      "step": 7870
    },
    {
      "epoch": 0.45183315164381205,
      "grad_norm": 0.25,
      "learning_rate": 0.00013360116664211293,
      "loss": 0.8774,
      "step": 7875
    },
    {
      "epoch": 0.4521200298353319,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00013350683215053013,
      "loss": 0.9259,
      "step": 7880
    },
    {
      "epoch": 0.4524069080268518,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00013341246405822088,
      "loss": 0.9176,
      "step": 7885
    },
    {
      "epoch": 0.4526937862183717,
      "grad_norm": 0.2451171875,
      "learning_rate": 0.00013331806245981775,
      "loss": 0.9333,
      "step": 7890
    },
    {
      "epoch": 0.45298066440989154,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001332236274499869,
      "loss": 1.0022,
      "step": 7895
    },
    {
      "epoch": 0.45326754260141144,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00013312915912342793,
      "loss": 0.9649,
      "step": 7900
    },
    {
      "epoch": 0.45355442079293135,
      "grad_norm": 0.30078125,
      "learning_rate": 0.0001330346575748739,
      "loss": 0.9092,
      "step": 7905
    },
    {
      "epoch": 0.4538412989844512,
      "grad_norm": 0.265625,
      "learning_rate": 0.00013294012289909114,
      "loss": 0.9798,
      "step": 7910
    },
    {
      "epoch": 0.4541281771759711,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00013284555519087933,
      "loss": 0.9607,
      "step": 7915
    },
    {
      "epoch": 0.45441505536749094,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001327509545450711,
      "loss": 0.9416,
      "step": 7920
    },
    {
      "epoch": 0.45470193355901084,
      "grad_norm": 0.30078125,
      "learning_rate": 0.0001326563210565322,
      "loss": 0.9335,
      "step": 7925
    },
    {
      "epoch": 0.45498881175053074,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00013256165482016137,
      "loss": 0.977,
      "step": 7930
    },
    {
      "epoch": 0.4552756899420506,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00013246695593089,
      "loss": 0.9757,
      "step": 7935
    },
    {
      "epoch": 0.4555625681335705,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00013237222448368247,
      "loss": 0.9762,
      "step": 7940
    },
    {
      "epoch": 0.4558494463250904,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00013227746057353562,
      "loss": 1.0154,
      "step": 7945
    },
    {
      "epoch": 0.45613632451661024,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001321826642954789,
      "loss": 0.9798,
      "step": 7950
    },
    {
      "epoch": 0.45642320270813014,
      "grad_norm": 0.2470703125,
      "learning_rate": 0.00013208783574457432,
      "loss": 1.0161,
      "step": 7955
    },
    {
      "epoch": 0.45671008089965,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00013199297501591603,
      "loss": 0.9881,
      "step": 7960
    },
    {
      "epoch": 0.4569969590911699,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00013189808220463072,
      "loss": 0.9788,
      "step": 7965
    },
    {
      "epoch": 0.4572838372826898,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00013180315740587701,
      "loss": 1.0485,
      "step": 7970
    },
    {
      "epoch": 0.45757071547420963,
      "grad_norm": 0.294921875,
      "learning_rate": 0.00013170820071484572,
      "loss": 0.951,
      "step": 7975
    },
    {
      "epoch": 0.45785759366572953,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001316132122267597,
      "loss": 0.9431,
      "step": 7980
    },
    {
      "epoch": 0.45814447185724944,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00013151819203687356,
      "loss": 0.8649,
      "step": 7985
    },
    {
      "epoch": 0.4584313500487693,
      "grad_norm": 0.3046875,
      "learning_rate": 0.00013142314024047375,
      "loss": 0.9131,
      "step": 7990
    },
    {
      "epoch": 0.4587182282402892,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00013132805693287844,
      "loss": 0.9522,
      "step": 7995
    },
    {
      "epoch": 0.45900510643180903,
      "grad_norm": 0.6171875,
      "learning_rate": 0.0001312329422094374,
      "loss": 0.9952,
      "step": 8000
    },
    {
      "epoch": 0.45929198462332893,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001311377961655319,
      "loss": 0.8869,
      "step": 8005
    },
    {
      "epoch": 0.45957886281484883,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00013104261889657453,
      "loss": 0.9416,
      "step": 8010
    },
    {
      "epoch": 0.4598657410063687,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00013094741049800936,
      "loss": 0.933,
      "step": 8015
    },
    {
      "epoch": 0.4601526191978886,
      "grad_norm": 0.265625,
      "learning_rate": 0.00013085217106531153,
      "loss": 1.0033,
      "step": 8020
    },
    {
      "epoch": 0.4604394973894085,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00013075690069398738,
      "loss": 0.9582,
      "step": 8025
    },
    {
      "epoch": 0.4607263755809283,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00013066159947957426,
      "loss": 0.9989,
      "step": 8030
    },
    {
      "epoch": 0.46101325377244823,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001305662675176404,
      "loss": 0.9598,
      "step": 8035
    },
    {
      "epoch": 0.4613001319639681,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00013047090490378495,
      "loss": 0.9588,
      "step": 8040
    },
    {
      "epoch": 0.461587010155488,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00013037551173363774,
      "loss": 0.9166,
      "step": 8045
    },
    {
      "epoch": 0.4618738883470079,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00013028008810285924,
      "loss": 0.9633,
      "step": 8050
    },
    {
      "epoch": 0.4621607665385277,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00013018463410714048,
      "loss": 0.8813,
      "step": 8055
    },
    {
      "epoch": 0.4624476447300476,
      "grad_norm": 0.30078125,
      "learning_rate": 0.00013008914984220294,
      "loss": 0.9441,
      "step": 8060
    },
    {
      "epoch": 0.4627345229215675,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00012999363540379852,
      "loss": 0.9441,
      "step": 8065
    },
    {
      "epoch": 0.46302140111308737,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00012989809088770923,
      "loss": 0.9317,
      "step": 8070
    },
    {
      "epoch": 0.4633082793046073,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00012980251638974733,
      "loss": 0.9065,
      "step": 8075
    },
    {
      "epoch": 0.4635951574961271,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001297069120057552,
      "loss": 0.9926,
      "step": 8080
    },
    {
      "epoch": 0.463882035687647,
      "grad_norm": 0.2734375,
      "learning_rate": 0.0001296112778316051,
      "loss": 0.9544,
      "step": 8085
    },
    {
      "epoch": 0.4641689138791669,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012951561396319918,
      "loss": 0.9443,
      "step": 8090
    },
    {
      "epoch": 0.46445579207068677,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012941992049646936,
      "loss": 0.996,
      "step": 8095
    },
    {
      "epoch": 0.46474267026220667,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00012932419752737735,
      "loss": 0.9842,
      "step": 8100
    },
    {
      "epoch": 0.46502954845372657,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00012922844515191425,
      "loss": 0.9353,
      "step": 8105
    },
    {
      "epoch": 0.4653164266452464,
      "grad_norm": 0.28125,
      "learning_rate": 0.00012913266346610086,
      "loss": 0.9293,
      "step": 8110
    },
    {
      "epoch": 0.4656033048367663,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001290368525659872,
      "loss": 0.9549,
      "step": 8115
    },
    {
      "epoch": 0.46589018302828616,
      "grad_norm": 0.2421875,
      "learning_rate": 0.00012894101254765268,
      "loss": 0.98,
      "step": 8120
    },
    {
      "epoch": 0.46617706121980607,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00012884514350720586,
      "loss": 0.8901,
      "step": 8125
    },
    {
      "epoch": 0.46646393941132597,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00012874924554078448,
      "loss": 0.9632,
      "step": 8130
    },
    {
      "epoch": 0.4667508176028458,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00012865331874455517,
      "loss": 0.9504,
      "step": 8135
    },
    {
      "epoch": 0.4670376957943657,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001285573632147136,
      "loss": 1.0337,
      "step": 8140
    },
    {
      "epoch": 0.4673245739858856,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00012846137904748414,
      "loss": 0.9788,
      "step": 8145
    },
    {
      "epoch": 0.46761145217740546,
      "grad_norm": 0.28125,
      "learning_rate": 0.00012836536633911995,
      "loss": 0.9813,
      "step": 8150
    },
    {
      "epoch": 0.46789833036892536,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001282693251859028,
      "loss": 0.9625,
      "step": 8155
    },
    {
      "epoch": 0.4681852085604452,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00012817325568414297,
      "loss": 0.9879,
      "step": 8160
    },
    {
      "epoch": 0.4684720867519651,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00012807715793017918,
      "loss": 0.9737,
      "step": 8165
    },
    {
      "epoch": 0.468758964943485,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00012798103202037842,
      "loss": 0.9963,
      "step": 8170
    },
    {
      "epoch": 0.46904584313500486,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00012788487805113602,
      "loss": 0.9001,
      "step": 8175
    },
    {
      "epoch": 0.46933272132652476,
      "grad_norm": 0.291015625,
      "learning_rate": 0.0001277886961188754,
      "loss": 1.0332,
      "step": 8180
    },
    {
      "epoch": 0.46961959951804466,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00012769248632004795,
      "loss": 1.0109,
      "step": 8185
    },
    {
      "epoch": 0.4699064777095645,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001275962487511332,
      "loss": 0.9768,
      "step": 8190
    },
    {
      "epoch": 0.4701933559010844,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00012749998350863827,
      "loss": 0.9383,
      "step": 8195
    },
    {
      "epoch": 0.47048023409260425,
      "grad_norm": 0.248046875,
      "learning_rate": 0.0001274036906890982,
      "loss": 0.9907,
      "step": 8200
    },
    {
      "epoch": 0.47076711228412416,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012730737038907567,
      "loss": 0.9722,
      "step": 8205
    },
    {
      "epoch": 0.47105399047564406,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00012721102270516087,
      "loss": 0.9917,
      "step": 8210
    },
    {
      "epoch": 0.4713408686671639,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00012711464773397152,
      "loss": 0.9361,
      "step": 8215
    },
    {
      "epoch": 0.4716277468586838,
      "grad_norm": 0.396484375,
      "learning_rate": 0.0001270182455721526,
      "loss": 0.9083,
      "step": 8220
    },
    {
      "epoch": 0.4719146250502037,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012692181631637642,
      "loss": 0.9438,
      "step": 8225
    },
    {
      "epoch": 0.47220150324172355,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00012682536006334248,
      "loss": 0.9491,
      "step": 8230
    },
    {
      "epoch": 0.47248838143324345,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00012672887690977732,
      "loss": 0.9634,
      "step": 8235
    },
    {
      "epoch": 0.4727752596247633,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012663236695243448,
      "loss": 0.96,
      "step": 8240
    },
    {
      "epoch": 0.4730621378162832,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001265358302880943,
      "loss": 0.9325,
      "step": 8245
    },
    {
      "epoch": 0.4733490160078031,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00012643926701356404,
      "loss": 0.9878,
      "step": 8250
    },
    {
      "epoch": 0.47363589419932295,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00012634267722567752,
      "loss": 0.9569,
      "step": 8255
    },
    {
      "epoch": 0.47392277239084285,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00012624606102129516,
      "loss": 0.9397,
      "step": 8260
    },
    {
      "epoch": 0.47420965058236275,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00012614941849730405,
      "loss": 0.9127,
      "step": 8265
    },
    {
      "epoch": 0.4744965287738826,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00012605274975061736,
      "loss": 1.03,
      "step": 8270
    },
    {
      "epoch": 0.4747834069654025,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00012595605487817482,
      "loss": 1.0017,
      "step": 8275
    },
    {
      "epoch": 0.47507028515692235,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00012585933397694224,
      "loss": 0.9802,
      "step": 8280
    },
    {
      "epoch": 0.47535716334844225,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00012576258714391155,
      "loss": 0.9441,
      "step": 8285
    },
    {
      "epoch": 0.47564404153996215,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00012566581447610072,
      "loss": 1.005,
      "step": 8290
    },
    {
      "epoch": 0.475930919731482,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001255690160705536,
      "loss": 0.9187,
      "step": 8295
    },
    {
      "epoch": 0.4762177979230019,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001254721920243398,
      "loss": 0.9247,
      "step": 8300
    },
    {
      "epoch": 0.4765046761145218,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00012537534243455472,
      "loss": 0.9306,
      "step": 8305
    },
    {
      "epoch": 0.47679155430604164,
      "grad_norm": 0.30078125,
      "learning_rate": 0.00012527846739831934,
      "loss": 1.0118,
      "step": 8310
    },
    {
      "epoch": 0.47707843249756154,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00012518156701278019,
      "loss": 0.887,
      "step": 8315
    },
    {
      "epoch": 0.4773653106890814,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001250846413751092,
      "loss": 1.0722,
      "step": 8320
    },
    {
      "epoch": 0.4776521888806013,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00012498769058250355,
      "loss": 0.9375,
      "step": 8325
    },
    {
      "epoch": 0.4779390670721212,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00012489071473218574,
      "loss": 0.946,
      "step": 8330
    },
    {
      "epoch": 0.47822594526364104,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001247937139214034,
      "loss": 0.9898,
      "step": 8335
    },
    {
      "epoch": 0.47851282345516094,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00012469668824742914,
      "loss": 0.9774,
      "step": 8340
    },
    {
      "epoch": 0.47879970164668084,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00012459963780756054,
      "loss": 0.986,
      "step": 8345
    },
    {
      "epoch": 0.4790865798382007,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00012450256269911996,
      "loss": 0.9345,
      "step": 8350
    },
    {
      "epoch": 0.4793734580297206,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001244054630194546,
      "loss": 0.9338,
      "step": 8355
    },
    {
      "epoch": 0.47966033622124044,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00012430833886593613,
      "loss": 0.8914,
      "step": 8360
    },
    {
      "epoch": 0.47994721441276034,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00012421119033596102,
      "loss": 0.9065,
      "step": 8365
    },
    {
      "epoch": 0.48023409260428024,
      "grad_norm": 0.27734375,
      "learning_rate": 0.0001241140175269499,
      "loss": 1.027,
      "step": 8370
    },
    {
      "epoch": 0.4805209707958001,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012401682053634792,
      "loss": 0.9085,
      "step": 8375
    },
    {
      "epoch": 0.48080784898732,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00012391959946162447,
      "loss": 0.976,
      "step": 8380
    },
    {
      "epoch": 0.4810947271788399,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00012382235440027307,
      "loss": 0.8829,
      "step": 8385
    },
    {
      "epoch": 0.48138160537035973,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001237250854498112,
      "loss": 0.9961,
      "step": 8390
    },
    {
      "epoch": 0.48166848356187963,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00012362779270778048,
      "loss": 0.9896,
      "step": 8395
    },
    {
      "epoch": 0.4819553617533995,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00012353047627174625,
      "loss": 0.9684,
      "step": 8400
    },
    {
      "epoch": 0.4822422399449194,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00012343313623929764,
      "loss": 0.9758,
      "step": 8405
    },
    {
      "epoch": 0.4825291181364393,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00012333577270804745,
      "loss": 0.9922,
      "step": 8410
    },
    {
      "epoch": 0.48281599632795913,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001232383857756321,
      "loss": 0.9422,
      "step": 8415
    },
    {
      "epoch": 0.48310287451947903,
      "grad_norm": 0.298828125,
      "learning_rate": 0.00012314097553971137,
      "loss": 0.9648,
      "step": 8420
    },
    {
      "epoch": 0.48338975271099893,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00012304354209796846,
      "loss": 0.9822,
      "step": 8425
    },
    {
      "epoch": 0.4836766309025188,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00012294608554810988,
      "loss": 0.9452,
      "step": 8430
    },
    {
      "epoch": 0.4839635090940387,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012284860598786525,
      "loss": 0.9833,
      "step": 8435
    },
    {
      "epoch": 0.4842503872855585,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001227511035149873,
      "loss": 0.981,
      "step": 8440
    },
    {
      "epoch": 0.4845372654770784,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00012265357822725172,
      "loss": 0.987,
      "step": 8445
    },
    {
      "epoch": 0.48482414366859833,
      "grad_norm": 0.28125,
      "learning_rate": 0.00012255603022245712,
      "loss": 1.0278,
      "step": 8450
    },
    {
      "epoch": 0.4851110218601182,
      "grad_norm": 0.298828125,
      "learning_rate": 0.0001224584595984248,
      "loss": 0.9869,
      "step": 8455
    },
    {
      "epoch": 0.4853979000516381,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00012236086645299888,
      "loss": 1.0075,
      "step": 8460
    },
    {
      "epoch": 0.485684778243158,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00012226325088404588,
      "loss": 0.9499,
      "step": 8465
    },
    {
      "epoch": 0.4859716564346778,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012216561298945502,
      "loss": 0.9547,
      "step": 8470
    },
    {
      "epoch": 0.4862585346261977,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00012206795286713774,
      "loss": 0.9796,
      "step": 8475
    },
    {
      "epoch": 0.48654541281771757,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00012197027061502781,
      "loss": 0.9642,
      "step": 8480
    },
    {
      "epoch": 0.48683229100923747,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00012187256633108129,
      "loss": 0.9616,
      "step": 8485
    },
    {
      "epoch": 0.4871191692007574,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012177484011327618,
      "loss": 0.9035,
      "step": 8490
    },
    {
      "epoch": 0.4874060473922772,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00012167709205961256,
      "loss": 0.9275,
      "step": 8495
    },
    {
      "epoch": 0.4876929255837971,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00012157932226811246,
      "loss": 0.9153,
      "step": 8500
    },
    {
      "epoch": 0.487979803775317,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00012148153083681954,
      "loss": 1.0136,
      "step": 8505
    },
    {
      "epoch": 0.48826668196683687,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00012138371786379938,
      "loss": 0.943,
      "step": 8510
    },
    {
      "epoch": 0.48855356015835677,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00012128588344713899,
      "loss": 0.9297,
      "step": 8515
    },
    {
      "epoch": 0.4888404383498766,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001211880276849469,
      "loss": 0.96,
      "step": 8520
    },
    {
      "epoch": 0.4891273165413965,
      "grad_norm": 0.265625,
      "learning_rate": 0.00012109015067535321,
      "loss": 0.9569,
      "step": 8525
    },
    {
      "epoch": 0.4894141947329164,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00012099225251650907,
      "loss": 0.9621,
      "step": 8530
    },
    {
      "epoch": 0.48970107292443626,
      "grad_norm": 0.29296875,
      "learning_rate": 0.00012089433330658705,
      "loss": 1.0271,
      "step": 8535
    },
    {
      "epoch": 0.48998795111595617,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00012079639314378075,
      "loss": 0.9216,
      "step": 8540
    },
    {
      "epoch": 0.49027482930747607,
      "grad_norm": 0.314453125,
      "learning_rate": 0.00012069843212630474,
      "loss": 0.8654,
      "step": 8545
    },
    {
      "epoch": 0.4905617074989959,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00012060045035239465,
      "loss": 0.8843,
      "step": 8550
    },
    {
      "epoch": 0.4908485856905158,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00012050244792030667,
      "loss": 0.9073,
      "step": 8555
    },
    {
      "epoch": 0.49113546388203566,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00012040442492831798,
      "loss": 0.9647,
      "step": 8560
    },
    {
      "epoch": 0.49142234207355556,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00012030638147472623,
      "loss": 0.9147,
      "step": 8565
    },
    {
      "epoch": 0.49170922026507546,
      "grad_norm": 0.28125,
      "learning_rate": 0.00012020831765784957,
      "loss": 0.9259,
      "step": 8570
    },
    {
      "epoch": 0.4919960984565953,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00012011023357602668,
      "loss": 0.9544,
      "step": 8575
    },
    {
      "epoch": 0.4922829766481152,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00012001212932761645,
      "loss": 0.8561,
      "step": 8580
    },
    {
      "epoch": 0.4925698548396351,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00011991400501099805,
      "loss": 0.9172,
      "step": 8585
    },
    {
      "epoch": 0.49285673303115496,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00011981586072457078,
      "loss": 0.9463,
      "step": 8590
    },
    {
      "epoch": 0.49314361122267486,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00011971769656675391,
      "loss": 0.9951,
      "step": 8595
    },
    {
      "epoch": 0.4934304894141947,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00011961951263598677,
      "loss": 0.9508,
      "step": 8600
    },
    {
      "epoch": 0.4937173676057146,
      "grad_norm": 0.298828125,
      "learning_rate": 0.00011952130903072832,
      "loss": 0.9669,
      "step": 8605
    },
    {
      "epoch": 0.4940042457972345,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00011942308584945741,
      "loss": 1.0121,
      "step": 8610
    },
    {
      "epoch": 0.49429112398875436,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00011932484319067245,
      "loss": 0.983,
      "step": 8615
    },
    {
      "epoch": 0.49457800218027426,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00011922658115289141,
      "loss": 0.9413,
      "step": 8620
    },
    {
      "epoch": 0.49486488037179416,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00011912829983465168,
      "loss": 0.9039,
      "step": 8625
    },
    {
      "epoch": 0.495151758563314,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00011902999933450997,
      "loss": 0.9747,
      "step": 8630
    },
    {
      "epoch": 0.4954386367548339,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001189316797510423,
      "loss": 0.9519,
      "step": 8635
    },
    {
      "epoch": 0.49572551494635375,
      "grad_norm": 0.306640625,
      "learning_rate": 0.00011883334118284369,
      "loss": 0.9836,
      "step": 8640
    },
    {
      "epoch": 0.49601239313787365,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00011873498372852828,
      "loss": 1.0067,
      "step": 8645
    },
    {
      "epoch": 0.49629927132939355,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001186366074867292,
      "loss": 0.8766,
      "step": 8650
    },
    {
      "epoch": 0.4965861495209134,
      "grad_norm": 0.310546875,
      "learning_rate": 0.00011853821255609836,
      "loss": 1.0009,
      "step": 8655
    },
    {
      "epoch": 0.4968730277124333,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00011843979903530638,
      "loss": 0.9815,
      "step": 8660
    },
    {
      "epoch": 0.4971599059039532,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00011834136702304257,
      "loss": 0.9317,
      "step": 8665
    },
    {
      "epoch": 0.49744678409547305,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00011824291661801479,
      "loss": 0.9523,
      "step": 8670
    },
    {
      "epoch": 0.49773366228699295,
      "grad_norm": 0.265625,
      "learning_rate": 0.00011814444791894934,
      "loss": 0.9784,
      "step": 8675
    },
    {
      "epoch": 0.4980205404785128,
      "grad_norm": 0.294921875,
      "learning_rate": 0.0001180459610245908,
      "loss": 1.0339,
      "step": 8680
    },
    {
      "epoch": 0.4983074186700327,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00011794745603370212,
      "loss": 0.9443,
      "step": 8685
    },
    {
      "epoch": 0.4985942968615526,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00011784893304506424,
      "loss": 0.9769,
      "step": 8690
    },
    {
      "epoch": 0.49888117505307245,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001177503921574763,
      "loss": 0.9559,
      "step": 8695
    },
    {
      "epoch": 0.49916805324459235,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00011765183346975528,
      "loss": 0.9302,
      "step": 8700
    },
    {
      "epoch": 0.49945493143611225,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001175532570807361,
      "loss": 0.9505,
      "step": 8705
    },
    {
      "epoch": 0.4997418096276321,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00011745466308927136,
      "loss": 1.001,
      "step": 8710
    },
    {
      "epoch": 0.500028687819152,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00011735605159423131,
      "loss": 0.8966,
      "step": 8715
    },
    {
      "epoch": 0.5003155660106718,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00011725742269450382,
      "loss": 0.8901,
      "step": 8720
    },
    {
      "epoch": 0.5006024442021918,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00011715877648899413,
      "loss": 0.9536,
      "step": 8725
    },
    {
      "epoch": 0.5008893223937116,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001170601130766249,
      "loss": 0.9002,
      "step": 8730
    },
    {
      "epoch": 0.5011762005852315,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00011696143255633607,
      "loss": 0.9495,
      "step": 8735
    },
    {
      "epoch": 0.5014630787767513,
      "grad_norm": 0.26171875,
      "learning_rate": 0.0001168627350270846,
      "loss": 0.9705,
      "step": 8740
    },
    {
      "epoch": 0.5017499569682713,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00011676402058784463,
      "loss": 0.9148,
      "step": 8745
    },
    {
      "epoch": 0.5020368351597911,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00011666528933760725,
      "loss": 0.9683,
      "step": 8750
    },
    {
      "epoch": 0.502323713351311,
      "grad_norm": 0.30859375,
      "learning_rate": 0.00011656654137538032,
      "loss": 0.9706,
      "step": 8755
    },
    {
      "epoch": 0.5026105915428309,
      "grad_norm": 0.255859375,
      "learning_rate": 0.0001164677768001886,
      "loss": 0.9102,
      "step": 8760
    },
    {
      "epoch": 0.5028974697343508,
      "grad_norm": 0.265625,
      "learning_rate": 0.00011636899571107333,
      "loss": 0.8819,
      "step": 8765
    },
    {
      "epoch": 0.5031843479258706,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00011627019820709246,
      "loss": 0.9267,
      "step": 8770
    },
    {
      "epoch": 0.5034712261173906,
      "grad_norm": 0.28125,
      "learning_rate": 0.00011617138438732036,
      "loss": 0.9334,
      "step": 8775
    },
    {
      "epoch": 0.5037581043089104,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00011607255435084772,
      "loss": 0.9174,
      "step": 8780
    },
    {
      "epoch": 0.5040449825004303,
      "grad_norm": 0.265625,
      "learning_rate": 0.00011597370819678157,
      "loss": 0.9466,
      "step": 8785
    },
    {
      "epoch": 0.5043318606919502,
      "grad_norm": 0.58203125,
      "learning_rate": 0.00011587484602424499,
      "loss": 0.9153,
      "step": 8790
    },
    {
      "epoch": 0.5046187388834701,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00011577596793237722,
      "loss": 0.9667,
      "step": 8795
    },
    {
      "epoch": 0.5049056170749899,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00011567707402033345,
      "loss": 0.9371,
      "step": 8800
    },
    {
      "epoch": 0.5051924952665099,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00011557816438728467,
      "loss": 0.9089,
      "step": 8805
    },
    {
      "epoch": 0.5054793734580297,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00011547923913241774,
      "loss": 0.9599,
      "step": 8810
    },
    {
      "epoch": 0.5057662516495496,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00011538029835493507,
      "loss": 0.9268,
      "step": 8815
    },
    {
      "epoch": 0.5060531298410694,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00011528134215405473,
      "loss": 0.955,
      "step": 8820
    },
    {
      "epoch": 0.5063400080325894,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00011518237062901023,
      "loss": 1.008,
      "step": 8825
    },
    {
      "epoch": 0.5066268862241092,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00011508338387905038,
      "loss": 1.0123,
      "step": 8830
    },
    {
      "epoch": 0.5069137644156291,
      "grad_norm": 0.25390625,
      "learning_rate": 0.0001149843820034394,
      "loss": 0.8719,
      "step": 8835
    },
    {
      "epoch": 0.507200642607149,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00011488536510145651,
      "loss": 0.9107,
      "step": 8840
    },
    {
      "epoch": 0.5074875207986689,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00011478633327239614,
      "loss": 0.9925,
      "step": 8845
    },
    {
      "epoch": 0.5077743989901887,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001146872866155676,
      "loss": 0.9601,
      "step": 8850
    },
    {
      "epoch": 0.5080612771817087,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00011458822523029509,
      "loss": 0.9667,
      "step": 8855
    },
    {
      "epoch": 0.5083481553732285,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00011448914921591765,
      "loss": 0.9374,
      "step": 8860
    },
    {
      "epoch": 0.5086350335647484,
      "grad_norm": 0.35546875,
      "learning_rate": 0.00011439005867178884,
      "loss": 0.9557,
      "step": 8865
    },
    {
      "epoch": 0.5089219117562683,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00011429095369727696,
      "loss": 0.8917,
      "step": 8870
    },
    {
      "epoch": 0.5092087899477882,
      "grad_norm": 0.24609375,
      "learning_rate": 0.00011419183439176464,
      "loss": 0.9163,
      "step": 8875
    },
    {
      "epoch": 0.509495668139308,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00011409270085464898,
      "loss": 0.9374,
      "step": 8880
    },
    {
      "epoch": 0.509782546330828,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001139935531853413,
      "loss": 1.0086,
      "step": 8885
    },
    {
      "epoch": 0.5100694245223478,
      "grad_norm": 0.259765625,
      "learning_rate": 0.0001138943914832671,
      "loss": 0.9003,
      "step": 8890
    },
    {
      "epoch": 0.5103563027138677,
      "grad_norm": 0.265625,
      "learning_rate": 0.00011379521584786599,
      "loss": 0.9237,
      "step": 8895
    },
    {
      "epoch": 0.5106431809053875,
      "grad_norm": 0.279296875,
      "learning_rate": 0.0001136960263785915,
      "loss": 0.9486,
      "step": 8900
    },
    {
      "epoch": 0.5109300590969075,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00011359682317491098,
      "loss": 0.9745,
      "step": 8905
    },
    {
      "epoch": 0.5112169372884273,
      "grad_norm": 0.28125,
      "learning_rate": 0.00011349760633630575,
      "loss": 0.9179,
      "step": 8910
    },
    {
      "epoch": 0.5115038154799472,
      "grad_norm": 0.265625,
      "learning_rate": 0.00011339837596227061,
      "loss": 0.9705,
      "step": 8915
    },
    {
      "epoch": 0.5117906936714671,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00011329913215231401,
      "loss": 0.9673,
      "step": 8920
    },
    {
      "epoch": 0.512077571862987,
      "grad_norm": 0.298828125,
      "learning_rate": 0.00011319987500595785,
      "loss": 0.9442,
      "step": 8925
    },
    {
      "epoch": 0.5123644500545068,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00011310060462273744,
      "loss": 0.9693,
      "step": 8930
    },
    {
      "epoch": 0.5126513282460268,
      "grad_norm": 0.265625,
      "learning_rate": 0.00011300132110220134,
      "loss": 0.9383,
      "step": 8935
    },
    {
      "epoch": 0.5129382064375466,
      "grad_norm": 0.28125,
      "learning_rate": 0.0001129020245439113,
      "loss": 0.9474,
      "step": 8940
    },
    {
      "epoch": 0.5132250846290665,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00011280271504744208,
      "loss": 0.9741,
      "step": 8945
    },
    {
      "epoch": 0.5135119628205864,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00011270339271238153,
      "loss": 0.9523,
      "step": 8950
    },
    {
      "epoch": 0.5137988410121063,
      "grad_norm": 0.265625,
      "learning_rate": 0.00011260405763833029,
      "loss": 0.92,
      "step": 8955
    },
    {
      "epoch": 0.5140857192036261,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00011250470992490176,
      "loss": 0.9989,
      "step": 8960
    },
    {
      "epoch": 0.5143725973951461,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001124053496717221,
      "loss": 0.9641,
      "step": 8965
    },
    {
      "epoch": 0.5146594755866659,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00011230597697842998,
      "loss": 0.9714,
      "step": 8970
    },
    {
      "epoch": 0.5149463537781858,
      "grad_norm": 0.26953125,
      "learning_rate": 0.0001122065919446765,
      "loss": 0.9023,
      "step": 8975
    },
    {
      "epoch": 0.5152332319697057,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00011210719467012529,
      "loss": 0.8879,
      "step": 8980
    },
    {
      "epoch": 0.5155201101612256,
      "grad_norm": 0.29296875,
      "learning_rate": 0.0001120077852544521,
      "loss": 1.0173,
      "step": 8985
    },
    {
      "epoch": 0.5158069883527454,
      "grad_norm": 0.306640625,
      "learning_rate": 0.00011190836379734495,
      "loss": 0.99,
      "step": 8990
    },
    {
      "epoch": 0.5160938665442653,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00011180893039850388,
      "loss": 0.9472,
      "step": 8995
    },
    {
      "epoch": 0.5163807447357852,
      "grad_norm": 0.279296875,
      "learning_rate": 0.00011170948515764088,
      "loss": 0.9285,
      "step": 9000
    },
    {
      "epoch": 0.5166676229273051,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00011161002817447996,
      "loss": 1.0298,
      "step": 9005
    },
    {
      "epoch": 0.5169545011188249,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00011151055954875673,
      "loss": 0.8893,
      "step": 9010
    },
    {
      "epoch": 0.5172413793103449,
      "grad_norm": 0.28125,
      "learning_rate": 0.00011141107938021858,
      "loss": 0.9669,
      "step": 9015
    },
    {
      "epoch": 0.5175282575018647,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00011131158776862445,
      "loss": 0.9924,
      "step": 9020
    },
    {
      "epoch": 0.5178151356933846,
      "grad_norm": 0.271484375,
      "learning_rate": 0.0001112120848137447,
      "loss": 0.9652,
      "step": 9025
    },
    {
      "epoch": 0.5181020138849045,
      "grad_norm": 0.251953125,
      "learning_rate": 0.0001111125706153612,
      "loss": 0.8898,
      "step": 9030
    },
    {
      "epoch": 0.5183888920764244,
      "grad_norm": 0.296875,
      "learning_rate": 0.00011101304527326695,
      "loss": 0.9597,
      "step": 9035
    },
    {
      "epoch": 0.5186757702679442,
      "grad_norm": 0.28125,
      "learning_rate": 0.00011091350888726619,
      "loss": 1.0008,
      "step": 9040
    },
    {
      "epoch": 0.5189626484594642,
      "grad_norm": 0.294921875,
      "learning_rate": 0.0001108139615571743,
      "loss": 0.9604,
      "step": 9045
    },
    {
      "epoch": 0.519249526650984,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00011071440338281745,
      "loss": 0.9893,
      "step": 9050
    },
    {
      "epoch": 0.5195364048425039,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00011061483446403289,
      "loss": 0.9492,
      "step": 9055
    },
    {
      "epoch": 0.5198232830340238,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00011051525490066852,
      "loss": 0.9726,
      "step": 9060
    },
    {
      "epoch": 0.5201101612255437,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00011041566479258294,
      "loss": 0.9528,
      "step": 9065
    },
    {
      "epoch": 0.5203970394170635,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001103160642396454,
      "loss": 0.9567,
      "step": 9070
    },
    {
      "epoch": 0.5206839176085833,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00011021645334173547,
      "loss": 0.9617,
      "step": 9075
    },
    {
      "epoch": 0.5209707958001033,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00011011683219874323,
      "loss": 0.9781,
      "step": 9080
    },
    {
      "epoch": 0.5212576739916231,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00011001720091056897,
      "loss": 0.9156,
      "step": 9085
    },
    {
      "epoch": 0.521544552183143,
      "grad_norm": 0.240234375,
      "learning_rate": 0.00010991755957712318,
      "loss": 0.9074,
      "step": 9090
    },
    {
      "epoch": 0.521831430374663,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010981790829832641,
      "loss": 0.9704,
      "step": 9095
    },
    {
      "epoch": 0.5221183085661828,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00010971824717410917,
      "loss": 0.9449,
      "step": 9100
    },
    {
      "epoch": 0.5224051867577026,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00010961857630441187,
      "loss": 0.9592,
      "step": 9105
    },
    {
      "epoch": 0.5226920649492226,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010951889578918471,
      "loss": 0.9461,
      "step": 9110
    },
    {
      "epoch": 0.5229789431407424,
      "grad_norm": 0.2490234375,
      "learning_rate": 0.00010941920572838747,
      "loss": 0.9055,
      "step": 9115
    },
    {
      "epoch": 0.5232658213322623,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00010931950622198965,
      "loss": 0.9994,
      "step": 9120
    },
    {
      "epoch": 0.5235526995237823,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00010921979736997006,
      "loss": 0.9529,
      "step": 9125
    },
    {
      "epoch": 0.5238395777153021,
      "grad_norm": 0.271484375,
      "learning_rate": 0.000109120079272317,
      "loss": 0.9471,
      "step": 9130
    },
    {
      "epoch": 0.5241264559068219,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010902035202902798,
      "loss": 0.9856,
      "step": 9135
    },
    {
      "epoch": 0.5244133340983419,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00010892061574010972,
      "loss": 1.0034,
      "step": 9140
    },
    {
      "epoch": 0.5247002122898617,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010882087050557803,
      "loss": 0.9817,
      "step": 9145
    },
    {
      "epoch": 0.5249870904813816,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00010872111642545759,
      "loss": 0.9766,
      "step": 9150
    },
    {
      "epoch": 0.5252739686729014,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010862135359978205,
      "loss": 0.9329,
      "step": 9155
    },
    {
      "epoch": 0.5255608468644214,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00010852158212859378,
      "loss": 0.9674,
      "step": 9160
    },
    {
      "epoch": 0.5258477250559412,
      "grad_norm": 0.265625,
      "learning_rate": 0.00010842180211194384,
      "loss": 0.9627,
      "step": 9165
    },
    {
      "epoch": 0.5261346032474611,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00010832201364989186,
      "loss": 0.8957,
      "step": 9170
    },
    {
      "epoch": 0.526421481438981,
      "grad_norm": 0.265625,
      "learning_rate": 0.00010822221684250593,
      "loss": 0.9555,
      "step": 9175
    },
    {
      "epoch": 0.5267083596305009,
      "grad_norm": 0.27734375,
      "learning_rate": 0.00010812241178986254,
      "loss": 0.9301,
      "step": 9180
    },
    {
      "epoch": 0.5269952378220207,
      "grad_norm": 0.265625,
      "learning_rate": 0.00010802259859204635,
      "loss": 0.9894,
      "step": 9185
    },
    {
      "epoch": 0.5272821160135407,
      "grad_norm": 0.375,
      "learning_rate": 0.00010792277734915033,
      "loss": 0.9212,
      "step": 9190
    },
    {
      "epoch": 0.5275689942050605,
      "grad_norm": 0.2578125,
      "learning_rate": 0.0001078229481612754,
      "loss": 0.9731,
      "step": 9195
    },
    {
      "epoch": 0.5278558723965804,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00010772311112853053,
      "loss": 1.0153,
      "step": 9200
    },
    {
      "epoch": 0.5281427505881003,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00010762326635103251,
      "loss": 1.0092,
      "step": 9205
    },
    {
      "epoch": 0.5284296287796202,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010752341392890587,
      "loss": 0.997,
      "step": 9210
    },
    {
      "epoch": 0.52871650697114,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00010742355396228287,
      "loss": 0.9526,
      "step": 9215
    },
    {
      "epoch": 0.52900338516266,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00010732368655130333,
      "loss": 0.8901,
      "step": 9220
    },
    {
      "epoch": 0.5292902633541798,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00010722381179611449,
      "loss": 1.0495,
      "step": 9225
    },
    {
      "epoch": 0.5295771415456997,
      "grad_norm": 0.28125,
      "learning_rate": 0.000107123929796871,
      "loss": 0.8588,
      "step": 9230
    },
    {
      "epoch": 0.5298640197372195,
      "grad_norm": 0.267578125,
      "learning_rate": 0.0001070240406537347,
      "loss": 0.9422,
      "step": 9235
    },
    {
      "epoch": 0.5301508979287395,
      "grad_norm": 0.265625,
      "learning_rate": 0.00010692414446687471,
      "loss": 0.9645,
      "step": 9240
    },
    {
      "epoch": 0.5304377761202593,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001068242413364671,
      "loss": 0.9756,
      "step": 9245
    },
    {
      "epoch": 0.5307246543117792,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00010672433136269499,
      "loss": 0.9063,
      "step": 9250
    },
    {
      "epoch": 0.5310115325032991,
      "grad_norm": 0.302734375,
      "learning_rate": 0.00010662441464574833,
      "loss": 0.9121,
      "step": 9255
    },
    {
      "epoch": 0.531298410694819,
      "grad_norm": 0.296875,
      "learning_rate": 0.00010652449128582376,
      "loss": 0.9811,
      "step": 9260
    },
    {
      "epoch": 0.5315852888863388,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010642456138312473,
      "loss": 0.9502,
      "step": 9265
    },
    {
      "epoch": 0.5318721670778588,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00010632462503786114,
      "loss": 0.9152,
      "step": 9270
    },
    {
      "epoch": 0.5321590452693786,
      "grad_norm": 0.265625,
      "learning_rate": 0.00010622468235024936,
      "loss": 0.9361,
      "step": 9275
    },
    {
      "epoch": 0.5324459234608985,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00010612473342051219,
      "loss": 0.8926,
      "step": 9280
    },
    {
      "epoch": 0.5327328016524184,
      "grad_norm": 0.25,
      "learning_rate": 0.00010602477834887858,
      "loss": 0.8839,
      "step": 9285
    },
    {
      "epoch": 0.5330196798439383,
      "grad_norm": 0.255859375,
      "learning_rate": 0.00010592481723558374,
      "loss": 0.9026,
      "step": 9290
    },
    {
      "epoch": 0.5333065580354581,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00010582485018086891,
      "loss": 0.9264,
      "step": 9295
    },
    {
      "epoch": 0.5335934362269781,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00010572487728498127,
      "loss": 0.9399,
      "step": 9300
    },
    {
      "epoch": 0.5338803144184979,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00010562489864817382,
      "loss": 0.9237,
      "step": 9305
    },
    {
      "epoch": 0.5341671926100178,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00010552491437070537,
      "loss": 0.9861,
      "step": 9310
    },
    {
      "epoch": 0.5344540708015376,
      "grad_norm": 0.291015625,
      "learning_rate": 0.00010542492455284043,
      "loss": 0.944,
      "step": 9315
    },
    {
      "epoch": 0.5347409489930576,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00010532492929484898,
      "loss": 0.9637,
      "step": 9320
    },
    {
      "epoch": 0.5350278271845774,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00010522492869700648,
      "loss": 0.9383,
      "step": 9325
    },
    {
      "epoch": 0.5353147053760973,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00010512492285959382,
      "loss": 0.9444,
      "step": 9330
    },
    {
      "epoch": 0.5356015835676172,
      "grad_norm": 0.28515625,
      "learning_rate": 0.00010502491188289695,
      "loss": 0.9563,
      "step": 9335
    },
    {
      "epoch": 0.5358884617591371,
      "grad_norm": 0.265625,
      "learning_rate": 0.00010492489586720724,
      "loss": 0.9869,
      "step": 9340
    },
    {
      "epoch": 0.5361753399506569,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00010482487491282089,
      "loss": 0.874,
      "step": 9345
    },
    {
      "epoch": 0.5364622181421769,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00010472484912003913,
      "loss": 1.016,
      "step": 9350
    },
    {
      "epoch": 0.5367490963336967,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00010462481858916812,
      "loss": 0.9813,
      "step": 9355
    },
    {
      "epoch": 0.5370359745252166,
      "grad_norm": 0.287109375,
      "learning_rate": 0.0001045247834205186,
      "loss": 0.9562,
      "step": 9360
    },
    {
      "epoch": 0.5373228527167365,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00010442474371440618,
      "loss": 0.9599,
      "step": 9365
    },
    {
      "epoch": 0.5376097309082564,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010432469957115083,
      "loss": 0.9684,
      "step": 9370
    },
    {
      "epoch": 0.5378966090997762,
      "grad_norm": 0.30859375,
      "learning_rate": 0.00010422465109107702,
      "loss": 0.982,
      "step": 9375
    },
    {
      "epoch": 0.5381834872912962,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00010412459837451367,
      "loss": 1.018,
      "step": 9380
    },
    {
      "epoch": 0.538470365482816,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00010402454152179377,
      "loss": 0.9333,
      "step": 9385
    },
    {
      "epoch": 0.5387572436743359,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010392448063325463,
      "loss": 0.9124,
      "step": 9390
    },
    {
      "epoch": 0.5390441218658557,
      "grad_norm": 0.283203125,
      "learning_rate": 0.00010382441580923752,
      "loss": 0.978,
      "step": 9395
    },
    {
      "epoch": 0.5393310000573757,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00010372434715008763,
      "loss": 0.9982,
      "step": 9400
    },
    {
      "epoch": 0.5396178782488955,
      "grad_norm": 0.25,
      "learning_rate": 0.00010362427475615413,
      "loss": 0.9829,
      "step": 9405
    },
    {
      "epoch": 0.5399047564404154,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010352419872778971,
      "loss": 0.8918,
      "step": 9410
    },
    {
      "epoch": 0.5401916346319353,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00010342411916535093,
      "loss": 0.9446,
      "step": 9415
    },
    {
      "epoch": 0.5404785128234552,
      "grad_norm": 0.263671875,
      "learning_rate": 0.00010332403616919779,
      "loss": 0.963,
      "step": 9420
    },
    {
      "epoch": 0.540765391014975,
      "grad_norm": 0.298828125,
      "learning_rate": 0.00010322394983969368,
      "loss": 0.9848,
      "step": 9425
    },
    {
      "epoch": 0.541052269206495,
      "grad_norm": 0.333984375,
      "learning_rate": 0.0001031238602772055,
      "loss": 0.921,
      "step": 9430
    },
    {
      "epoch": 0.5413391473980148,
      "grad_norm": 0.275390625,
      "learning_rate": 0.00010302376758210319,
      "loss": 0.9669,
      "step": 9435
    },
    {
      "epoch": 0.5416260255895347,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010292367185475997,
      "loss": 0.979,
      "step": 9440
    },
    {
      "epoch": 0.5419129037810546,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010282357319555207,
      "loss": 0.963,
      "step": 9445
    },
    {
      "epoch": 0.5421997819725745,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00010272347170485863,
      "loss": 0.977,
      "step": 9450
    },
    {
      "epoch": 0.5424866601640943,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00010262336748306165,
      "loss": 0.9735,
      "step": 9455
    },
    {
      "epoch": 0.5427735383556143,
      "grad_norm": 0.263671875,
      "learning_rate": 0.0001025232606305459,
      "loss": 0.9852,
      "step": 9460
    },
    {
      "epoch": 0.5430604165471341,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010242315124769872,
      "loss": 0.9374,
      "step": 9465
    },
    {
      "epoch": 0.543347294738654,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010232303943491004,
      "loss": 1.0392,
      "step": 9470
    },
    {
      "epoch": 0.5436341729301738,
      "grad_norm": 0.26171875,
      "learning_rate": 0.00010222292529257217,
      "loss": 0.9898,
      "step": 9475
    },
    {
      "epoch": 0.5439210511216938,
      "grad_norm": 0.28125,
      "learning_rate": 0.00010212280892107988,
      "loss": 0.956,
      "step": 9480
    },
    {
      "epoch": 0.5442079293132136,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00010202269042083001,
      "loss": 0.9132,
      "step": 9485
    },
    {
      "epoch": 0.5444948075047334,
      "grad_norm": 0.30859375,
      "learning_rate": 0.00010192256989222169,
      "loss": 0.9389,
      "step": 9490
    },
    {
      "epoch": 0.5447816856962534,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00010182244743565594,
      "loss": 0.8723,
      "step": 9495
    },
    {
      "epoch": 0.5450685638877732,
      "grad_norm": 0.322265625,
      "learning_rate": 0.0001017223231515358,
      "loss": 0.9266,
      "step": 9500
    },
    {
      "epoch": 0.5453554420792931,
      "grad_norm": 0.271484375,
      "learning_rate": 0.00010162219714026617,
      "loss": 0.9138,
      "step": 9505
    },
    {
      "epoch": 0.545642320270813,
      "grad_norm": 0.275390625,
      "learning_rate": 0.0001015220695022536,
      "loss": 0.9252,
      "step": 9510
    },
    {
      "epoch": 0.5459291984623329,
      "grad_norm": 0.25390625,
      "learning_rate": 0.00010142194033790633,
      "loss": 0.908,
      "step": 9515
    },
    {
      "epoch": 0.5462160766538527,
      "grad_norm": 0.265625,
      "learning_rate": 0.0001013218097476341,
      "loss": 0.9012,
      "step": 9520
    },
    {
      "epoch": 0.5465029548453727,
      "grad_norm": 0.296875,
      "learning_rate": 0.00010122167783184806,
      "loss": 0.9661,
      "step": 9525
    },
    {
      "epoch": 0.5467898330368925,
      "grad_norm": 0.287109375,
      "learning_rate": 0.00010112154469096078,
      "loss": 0.9656,
      "step": 9530
    },
    {
      "epoch": 0.5470767112284124,
      "grad_norm": 0.2734375,
      "learning_rate": 0.00010102141042538597,
      "loss": 0.9999,
      "step": 9535
    },
    {
      "epoch": 0.5473635894199324,
      "grad_norm": 0.33984375,
      "learning_rate": 0.0001009212751355385,
      "loss": 0.8986,
      "step": 9540
    },
    {
      "epoch": 0.5476504676114522,
      "grad_norm": 0.251953125,
      "learning_rate": 0.00010082113892183423,
      "loss": 0.9011,
      "step": 9545
    },
    {
      "epoch": 0.547937345802972,
      "grad_norm": 0.248046875,
      "learning_rate": 0.00010072100188469002,
      "loss": 0.9079,
      "step": 9550
    },
    {
      "epoch": 0.5482242239944919,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00010062086412452352,
      "loss": 0.9173,
      "step": 9555
    },
    {
      "epoch": 0.5485111021860118,
      "grad_norm": 0.2578125,
      "learning_rate": 0.00010052072574175306,
      "loss": 0.9978,
      "step": 9560
    },
    {
      "epoch": 0.5487979803775317,
      "grad_norm": 0.267578125,
      "learning_rate": 0.00010042058683679769,
      "loss": 0.9124,
      "step": 9565
    },
    {
      "epoch": 0.5490848585690515,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00010032044751007685,
      "loss": 0.9591,
      "step": 9570
    },
    {
      "epoch": 0.5493717367605715,
      "grad_norm": 0.265625,
      "learning_rate": 0.00010022030786201058,
      "loss": 0.9951,
      "step": 9575
    },
    {
      "epoch": 0.5496586149520913,
      "grad_norm": 0.26953125,
      "learning_rate": 0.00010012016799301907,
      "loss": 0.9088,
      "step": 9580
    },
    {
      "epoch": 0.5499454931436112,
      "grad_norm": 0.259765625,
      "learning_rate": 0.00010002002800352281,
      "loss": 0.9513,
      "step": 9585
    },
    {
      "epoch": 0.5502323713351311,
      "grad_norm": 0.255859375,
      "learning_rate": 9.991988799394245e-05,
      "loss": 0.8671,
      "step": 9590
    },
    {
      "epoch": 0.550519249526651,
      "grad_norm": 0.26953125,
      "learning_rate": 9.981974806469858e-05,
      "loss": 0.9622,
      "step": 9595
    },
    {
      "epoch": 0.5508061277181708,
      "grad_norm": 0.26953125,
      "learning_rate": 9.971960831621173e-05,
      "loss": 0.9097,
      "step": 9600
    },
    {
      "epoch": 0.5510930059096908,
      "grad_norm": 0.28515625,
      "learning_rate": 9.961946884890232e-05,
      "loss": 1.0333,
      "step": 9605
    },
    {
      "epoch": 0.5513798841012106,
      "grad_norm": 0.275390625,
      "learning_rate": 9.951932976319041e-05,
      "loss": 0.9286,
      "step": 9610
    },
    {
      "epoch": 0.5516667622927305,
      "grad_norm": 0.26171875,
      "learning_rate": 9.941919115949565e-05,
      "loss": 0.8962,
      "step": 9615
    },
    {
      "epoch": 0.5519536404842504,
      "grad_norm": 0.30078125,
      "learning_rate": 9.931905313823733e-05,
      "loss": 1.0276,
      "step": 9620
    },
    {
      "epoch": 0.5522405186757703,
      "grad_norm": 0.271484375,
      "learning_rate": 9.921891579983404e-05,
      "loss": 0.9283,
      "step": 9625
    },
    {
      "epoch": 0.5525273968672901,
      "grad_norm": 0.265625,
      "learning_rate": 9.911877924470373e-05,
      "loss": 0.9175,
      "step": 9630
    },
    {
      "epoch": 0.55281427505881,
      "grad_norm": 0.267578125,
      "learning_rate": 9.901864357326358e-05,
      "loss": 0.9713,
      "step": 9635
    },
    {
      "epoch": 0.5531011532503299,
      "grad_norm": 0.25390625,
      "learning_rate": 9.891850888592987e-05,
      "loss": 0.9024,
      "step": 9640
    },
    {
      "epoch": 0.5533880314418498,
      "grad_norm": 0.330078125,
      "learning_rate": 9.881837528311787e-05,
      "loss": 0.9778,
      "step": 9645
    },
    {
      "epoch": 0.5536749096333696,
      "grad_norm": 0.263671875,
      "learning_rate": 9.871824286524175e-05,
      "loss": 1.0332,
      "step": 9650
    },
    {
      "epoch": 0.5539617878248896,
      "grad_norm": 0.2734375,
      "learning_rate": 9.861811173271459e-05,
      "loss": 0.9709,
      "step": 9655
    },
    {
      "epoch": 0.5542486660164094,
      "grad_norm": 0.30078125,
      "learning_rate": 9.851798198594809e-05,
      "loss": 0.9844,
      "step": 9660
    },
    {
      "epoch": 0.5545355442079293,
      "grad_norm": 0.275390625,
      "learning_rate": 9.841785372535254e-05,
      "loss": 0.9886,
      "step": 9665
    },
    {
      "epoch": 0.5548224223994492,
      "grad_norm": 0.2734375,
      "learning_rate": 9.831772705133685e-05,
      "loss": 0.9194,
      "step": 9670
    },
    {
      "epoch": 0.5551093005909691,
      "grad_norm": 0.2578125,
      "learning_rate": 9.821760206430825e-05,
      "loss": 1.0253,
      "step": 9675
    },
    {
      "epoch": 0.5553961787824889,
      "grad_norm": 0.271484375,
      "learning_rate": 9.811747886467226e-05,
      "loss": 0.8885,
      "step": 9680
    },
    {
      "epoch": 0.5556830569740089,
      "grad_norm": 0.2734375,
      "learning_rate": 9.801735755283273e-05,
      "loss": 0.9267,
      "step": 9685
    },
    {
      "epoch": 0.5559699351655287,
      "grad_norm": 0.25390625,
      "learning_rate": 9.791723822919149e-05,
      "loss": 0.9513,
      "step": 9690
    },
    {
      "epoch": 0.5562568133570486,
      "grad_norm": 0.263671875,
      "learning_rate": 9.781712099414842e-05,
      "loss": 0.9175,
      "step": 9695
    },
    {
      "epoch": 0.5565436915485685,
      "grad_norm": 0.267578125,
      "learning_rate": 9.771700594810128e-05,
      "loss": 0.9682,
      "step": 9700
    },
    {
      "epoch": 0.5568305697400884,
      "grad_norm": 0.2578125,
      "learning_rate": 9.761689319144573e-05,
      "loss": 0.8967,
      "step": 9705
    },
    {
      "epoch": 0.5571174479316082,
      "grad_norm": 0.265625,
      "learning_rate": 9.751678282457501e-05,
      "loss": 0.8832,
      "step": 9710
    },
    {
      "epoch": 0.5574043261231281,
      "grad_norm": 0.263671875,
      "learning_rate": 9.741667494788003e-05,
      "loss": 0.9154,
      "step": 9715
    },
    {
      "epoch": 0.557691204314648,
      "grad_norm": 0.259765625,
      "learning_rate": 9.731656966174924e-05,
      "loss": 0.9135,
      "step": 9720
    },
    {
      "epoch": 0.5579780825061679,
      "grad_norm": 0.267578125,
      "learning_rate": 9.721646706656839e-05,
      "loss": 0.9047,
      "step": 9725
    },
    {
      "epoch": 0.5582649606976877,
      "grad_norm": 0.271484375,
      "learning_rate": 9.71163672627206e-05,
      "loss": 1.0324,
      "step": 9730
    },
    {
      "epoch": 0.5585518388892077,
      "grad_norm": 0.2890625,
      "learning_rate": 9.70162703505862e-05,
      "loss": 0.8719,
      "step": 9735
    },
    {
      "epoch": 0.5588387170807275,
      "grad_norm": 0.26953125,
      "learning_rate": 9.69161764305426e-05,
      "loss": 0.9669,
      "step": 9740
    },
    {
      "epoch": 0.5591255952722474,
      "grad_norm": 0.275390625,
      "learning_rate": 9.681608560296413e-05,
      "loss": 0.9382,
      "step": 9745
    },
    {
      "epoch": 0.5594124734637673,
      "grad_norm": 0.267578125,
      "learning_rate": 9.671599796822223e-05,
      "loss": 0.9281,
      "step": 9750
    },
    {
      "epoch": 0.5596993516552872,
      "grad_norm": 0.271484375,
      "learning_rate": 9.661591362668491e-05,
      "loss": 0.9439,
      "step": 9755
    },
    {
      "epoch": 0.559986229846807,
      "grad_norm": 0.283203125,
      "learning_rate": 9.651583267871697e-05,
      "loss": 0.9541,
      "step": 9760
    },
    {
      "epoch": 0.560273108038327,
      "grad_norm": 0.2890625,
      "learning_rate": 9.641575522467984e-05,
      "loss": 0.974,
      "step": 9765
    },
    {
      "epoch": 0.5605599862298468,
      "grad_norm": 0.328125,
      "learning_rate": 9.631568136493142e-05,
      "loss": 0.9612,
      "step": 9770
    },
    {
      "epoch": 0.5608468644213667,
      "grad_norm": 0.26953125,
      "learning_rate": 9.621561119982598e-05,
      "loss": 0.8891,
      "step": 9775
    },
    {
      "epoch": 0.5611337426128866,
      "grad_norm": 0.275390625,
      "learning_rate": 9.61155448297141e-05,
      "loss": 1.0655,
      "step": 9780
    },
    {
      "epoch": 0.5614206208044065,
      "grad_norm": 0.26171875,
      "learning_rate": 9.60154823549426e-05,
      "loss": 0.9472,
      "step": 9785
    },
    {
      "epoch": 0.5617074989959263,
      "grad_norm": 0.25390625,
      "learning_rate": 9.591542387585434e-05,
      "loss": 0.9663,
      "step": 9790
    },
    {
      "epoch": 0.5619943771874462,
      "grad_norm": 0.275390625,
      "learning_rate": 9.581536949278814e-05,
      "loss": 0.9145,
      "step": 9795
    },
    {
      "epoch": 0.5622812553789661,
      "grad_norm": 0.271484375,
      "learning_rate": 9.571531930607884e-05,
      "loss": 0.9514,
      "step": 9800
    },
    {
      "epoch": 0.562568133570486,
      "grad_norm": 0.279296875,
      "learning_rate": 9.561527341605691e-05,
      "loss": 0.9254,
      "step": 9805
    },
    {
      "epoch": 0.5628550117620058,
      "grad_norm": 0.28125,
      "learning_rate": 9.551523192304863e-05,
      "loss": 0.9761,
      "step": 9810
    },
    {
      "epoch": 0.5631418899535258,
      "grad_norm": 0.265625,
      "learning_rate": 9.541519492737586e-05,
      "loss": 0.959,
      "step": 9815
    },
    {
      "epoch": 0.5634287681450456,
      "grad_norm": 0.265625,
      "learning_rate": 9.531516252935588e-05,
      "loss": 0.993,
      "step": 9820
    },
    {
      "epoch": 0.5637156463365655,
      "grad_norm": 0.251953125,
      "learning_rate": 9.521513482930144e-05,
      "loss": 0.8727,
      "step": 9825
    },
    {
      "epoch": 0.5640025245280854,
      "grad_norm": 0.2734375,
      "learning_rate": 9.511511192752049e-05,
      "loss": 0.9709,
      "step": 9830
    },
    {
      "epoch": 0.5642894027196053,
      "grad_norm": 0.263671875,
      "learning_rate": 9.501509392431627e-05,
      "loss": 0.9213,
      "step": 9835
    },
    {
      "epoch": 0.5645762809111251,
      "grad_norm": 0.259765625,
      "learning_rate": 9.491508091998707e-05,
      "loss": 0.9336,
      "step": 9840
    },
    {
      "epoch": 0.5648631591026451,
      "grad_norm": 0.275390625,
      "learning_rate": 9.481507301482604e-05,
      "loss": 0.9194,
      "step": 9845
    },
    {
      "epoch": 0.5651500372941649,
      "grad_norm": 0.322265625,
      "learning_rate": 9.471507030912151e-05,
      "loss": 0.8929,
      "step": 9850
    },
    {
      "epoch": 0.5654369154856848,
      "grad_norm": 0.26171875,
      "learning_rate": 9.46150729031563e-05,
      "loss": 0.9276,
      "step": 9855
    },
    {
      "epoch": 0.5657237936772047,
      "grad_norm": 0.296875,
      "learning_rate": 9.451508089720803e-05,
      "loss": 0.9965,
      "step": 9860
    },
    {
      "epoch": 0.5660106718687246,
      "grad_norm": 0.294921875,
      "learning_rate": 9.441509439154895e-05,
      "loss": 0.9921,
      "step": 9865
    },
    {
      "epoch": 0.5662975500602444,
      "grad_norm": 0.265625,
      "learning_rate": 9.431511348644575e-05,
      "loss": 0.979,
      "step": 9870
    },
    {
      "epoch": 0.5665844282517642,
      "grad_norm": 0.26953125,
      "learning_rate": 9.421513828215946e-05,
      "loss": 0.9518,
      "step": 9875
    },
    {
      "epoch": 0.5668713064432842,
      "grad_norm": 0.267578125,
      "learning_rate": 9.41151688789455e-05,
      "loss": 0.9859,
      "step": 9880
    },
    {
      "epoch": 0.567158184634804,
      "grad_norm": 0.259765625,
      "learning_rate": 9.401520537705339e-05,
      "loss": 0.9291,
      "step": 9885
    },
    {
      "epoch": 0.5674450628263239,
      "grad_norm": 0.259765625,
      "learning_rate": 9.391524787672676e-05,
      "loss": 0.8935,
      "step": 9890
    },
    {
      "epoch": 0.5677319410178439,
      "grad_norm": 0.279296875,
      "learning_rate": 9.381529647820314e-05,
      "loss": 0.9242,
      "step": 9895
    },
    {
      "epoch": 0.5680188192093637,
      "grad_norm": 0.29296875,
      "learning_rate": 9.371535128171416e-05,
      "loss": 0.9461,
      "step": 9900
    },
    {
      "epoch": 0.5683056974008835,
      "grad_norm": 0.29296875,
      "learning_rate": 9.361541238748496e-05,
      "loss": 0.9622,
      "step": 9905
    },
    {
      "epoch": 0.5685925755924035,
      "grad_norm": 0.26953125,
      "learning_rate": 9.351547989573453e-05,
      "loss": 0.9434,
      "step": 9910
    },
    {
      "epoch": 0.5688794537839234,
      "grad_norm": 0.26953125,
      "learning_rate": 9.341555390667542e-05,
      "loss": 0.9533,
      "step": 9915
    },
    {
      "epoch": 0.5691663319754432,
      "grad_norm": 0.259765625,
      "learning_rate": 9.331563452051362e-05,
      "loss": 0.9506,
      "step": 9920
    },
    {
      "epoch": 0.5694532101669632,
      "grad_norm": 0.251953125,
      "learning_rate": 9.321572183744849e-05,
      "loss": 0.9087,
      "step": 9925
    },
    {
      "epoch": 0.569740088358483,
      "grad_norm": 0.263671875,
      "learning_rate": 9.311581595767273e-05,
      "loss": 0.9403,
      "step": 9930
    },
    {
      "epoch": 0.5700269665500028,
      "grad_norm": 0.255859375,
      "learning_rate": 9.301591698137217e-05,
      "loss": 0.9441,
      "step": 9935
    },
    {
      "epoch": 0.5703138447415228,
      "grad_norm": 0.26953125,
      "learning_rate": 9.29160250087257e-05,
      "loss": 0.9683,
      "step": 9940
    },
    {
      "epoch": 0.5706007229330426,
      "grad_norm": 0.3046875,
      "learning_rate": 9.281614013990526e-05,
      "loss": 1.0119,
      "step": 9945
    },
    {
      "epoch": 0.5708876011245625,
      "grad_norm": 0.26953125,
      "learning_rate": 9.271626247507561e-05,
      "loss": 0.9057,
      "step": 9950
    },
    {
      "epoch": 0.5711744793160823,
      "grad_norm": 0.267578125,
      "learning_rate": 9.261639211439427e-05,
      "loss": 0.9917,
      "step": 9955
    },
    {
      "epoch": 0.5714613575076023,
      "grad_norm": 0.271484375,
      "learning_rate": 9.251652915801144e-05,
      "loss": 0.9661,
      "step": 9960
    },
    {
      "epoch": 0.5717482356991221,
      "grad_norm": 0.265625,
      "learning_rate": 9.241667370607e-05,
      "loss": 0.9123,
      "step": 9965
    },
    {
      "epoch": 0.572035113890642,
      "grad_norm": 0.2578125,
      "learning_rate": 9.231682585870514e-05,
      "loss": 0.9157,
      "step": 9970
    },
    {
      "epoch": 0.572321992082162,
      "grad_norm": 0.265625,
      "learning_rate": 9.221698571604453e-05,
      "loss": 0.9181,
      "step": 9975
    },
    {
      "epoch": 0.5726088702736818,
      "grad_norm": 0.26953125,
      "learning_rate": 9.211715337820811e-05,
      "loss": 0.952,
      "step": 9980
    },
    {
      "epoch": 0.5728957484652016,
      "grad_norm": 0.255859375,
      "learning_rate": 9.201732894530797e-05,
      "loss": 0.9692,
      "step": 9985
    },
    {
      "epoch": 0.5731826266567216,
      "grad_norm": 0.2734375,
      "learning_rate": 9.191751251744823e-05,
      "loss": 0.9705,
      "step": 9990
    },
    {
      "epoch": 0.5734695048482414,
      "grad_norm": 0.26171875,
      "learning_rate": 9.181770419472509e-05,
      "loss": 0.9723,
      "step": 9995
    },
    {
      "epoch": 0.5737563830397613,
      "grad_norm": 0.265625,
      "learning_rate": 9.171790407722656e-05,
      "loss": 0.9305,
      "step": 10000
    },
    {
      "epoch": 0.5740432612312812,
      "grad_norm": 0.29296875,
      "learning_rate": 9.161811226503233e-05,
      "loss": 0.8814,
      "step": 10005
    },
    {
      "epoch": 0.5743301394228011,
      "grad_norm": 0.26171875,
      "learning_rate": 9.151832885821396e-05,
      "loss": 0.9779,
      "step": 10010
    },
    {
      "epoch": 0.5746170176143209,
      "grad_norm": 0.263671875,
      "learning_rate": 9.141855395683444e-05,
      "loss": 0.9638,
      "step": 10015
    },
    {
      "epoch": 0.5749038958058409,
      "grad_norm": 0.28125,
      "learning_rate": 9.131878766094822e-05,
      "loss": 0.9235,
      "step": 10020
    },
    {
      "epoch": 0.5751907739973607,
      "grad_norm": 0.26953125,
      "learning_rate": 9.121903007060121e-05,
      "loss": 0.9527,
      "step": 10025
    },
    {
      "epoch": 0.5754776521888806,
      "grad_norm": 0.2421875,
      "learning_rate": 9.111928128583054e-05,
      "loss": 0.9512,
      "step": 10030
    },
    {
      "epoch": 0.5757645303804004,
      "grad_norm": 0.326171875,
      "learning_rate": 9.101954140666451e-05,
      "loss": 0.9147,
      "step": 10035
    },
    {
      "epoch": 0.5760514085719204,
      "grad_norm": 0.26171875,
      "learning_rate": 9.091981053312247e-05,
      "loss": 0.8988,
      "step": 10040
    },
    {
      "epoch": 0.5763382867634402,
      "grad_norm": 0.251953125,
      "learning_rate": 9.082008876521481e-05,
      "loss": 0.9102,
      "step": 10045
    },
    {
      "epoch": 0.5766251649549601,
      "grad_norm": 0.27734375,
      "learning_rate": 9.072037620294275e-05,
      "loss": 0.9565,
      "step": 10050
    },
    {
      "epoch": 0.57691204314648,
      "grad_norm": 0.314453125,
      "learning_rate": 9.06206729462982e-05,
      "loss": 0.9252,
      "step": 10055
    },
    {
      "epoch": 0.5771989213379999,
      "grad_norm": 0.2490234375,
      "learning_rate": 9.052097909526388e-05,
      "loss": 0.9098,
      "step": 10060
    },
    {
      "epoch": 0.5774857995295197,
      "grad_norm": 0.3125,
      "learning_rate": 9.042129474981297e-05,
      "loss": 0.9509,
      "step": 10065
    },
    {
      "epoch": 0.5777726777210397,
      "grad_norm": 0.25390625,
      "learning_rate": 9.032162000990914e-05,
      "loss": 0.8907,
      "step": 10070
    },
    {
      "epoch": 0.5780595559125595,
      "grad_norm": 0.265625,
      "learning_rate": 9.02219549755065e-05,
      "loss": 0.998,
      "step": 10075
    },
    {
      "epoch": 0.5783464341040794,
      "grad_norm": 0.275390625,
      "learning_rate": 9.012229974654932e-05,
      "loss": 0.9134,
      "step": 10080
    },
    {
      "epoch": 0.5786333122955993,
      "grad_norm": 0.267578125,
      "learning_rate": 9.002265442297212e-05,
      "loss": 0.9198,
      "step": 10085
    },
    {
      "epoch": 0.5789201904871192,
      "grad_norm": 0.2578125,
      "learning_rate": 8.99230191046994e-05,
      "loss": 0.9489,
      "step": 10090
    },
    {
      "epoch": 0.579207068678639,
      "grad_norm": 0.24609375,
      "learning_rate": 8.982339389164575e-05,
      "loss": 0.9774,
      "step": 10095
    },
    {
      "epoch": 0.579493946870159,
      "grad_norm": 0.26953125,
      "learning_rate": 8.972377888371555e-05,
      "loss": 0.922,
      "step": 10100
    },
    {
      "epoch": 0.5797808250616788,
      "grad_norm": 0.265625,
      "learning_rate": 8.962417418080285e-05,
      "loss": 0.9175,
      "step": 10105
    },
    {
      "epoch": 0.5800677032531987,
      "grad_norm": 0.26953125,
      "learning_rate": 8.952457988279161e-05,
      "loss": 0.9818,
      "step": 10110
    },
    {
      "epoch": 0.5803545814447185,
      "grad_norm": 0.275390625,
      "learning_rate": 8.942499608955516e-05,
      "loss": 0.99,
      "step": 10115
    },
    {
      "epoch": 0.5806414596362385,
      "grad_norm": 0.255859375,
      "learning_rate": 8.93254229009563e-05,
      "loss": 0.9201,
      "step": 10120
    },
    {
      "epoch": 0.5809283378277583,
      "grad_norm": 0.24609375,
      "learning_rate": 8.922586041684732e-05,
      "loss": 0.9708,
      "step": 10125
    },
    {
      "epoch": 0.5812152160192782,
      "grad_norm": 0.244140625,
      "learning_rate": 8.912630873706967e-05,
      "loss": 0.9485,
      "step": 10130
    },
    {
      "epoch": 0.5815020942107981,
      "grad_norm": 0.271484375,
      "learning_rate": 8.902676796145403e-05,
      "loss": 0.9604,
      "step": 10135
    },
    {
      "epoch": 0.581788972402318,
      "grad_norm": 0.263671875,
      "learning_rate": 8.892723818982001e-05,
      "loss": 0.9469,
      "step": 10140
    },
    {
      "epoch": 0.5820758505938378,
      "grad_norm": 0.2451171875,
      "learning_rate": 8.882771952197642e-05,
      "loss": 0.9425,
      "step": 10145
    },
    {
      "epoch": 0.5823627287853578,
      "grad_norm": 0.27734375,
      "learning_rate": 8.872821205772074e-05,
      "loss": 0.932,
      "step": 10150
    },
    {
      "epoch": 0.5826496069768776,
      "grad_norm": 0.251953125,
      "learning_rate": 8.862871589683924e-05,
      "loss": 0.9634,
      "step": 10155
    },
    {
      "epoch": 0.5829364851683975,
      "grad_norm": 0.2578125,
      "learning_rate": 8.8529231139107e-05,
      "loss": 0.9211,
      "step": 10160
    },
    {
      "epoch": 0.5832233633599174,
      "grad_norm": 0.27734375,
      "learning_rate": 8.842975788428748e-05,
      "loss": 0.9365,
      "step": 10165
    },
    {
      "epoch": 0.5835102415514373,
      "grad_norm": 0.26171875,
      "learning_rate": 8.833029623213267e-05,
      "loss": 0.8496,
      "step": 10170
    },
    {
      "epoch": 0.5837971197429571,
      "grad_norm": 0.271484375,
      "learning_rate": 8.823084628238298e-05,
      "loss": 0.9707,
      "step": 10175
    },
    {
      "epoch": 0.5840839979344771,
      "grad_norm": 0.2578125,
      "learning_rate": 8.813140813476704e-05,
      "loss": 0.9237,
      "step": 10180
    },
    {
      "epoch": 0.5843708761259969,
      "grad_norm": 0.28125,
      "learning_rate": 8.803198188900161e-05,
      "loss": 0.9181,
      "step": 10185
    },
    {
      "epoch": 0.5846577543175168,
      "grad_norm": 0.2578125,
      "learning_rate": 8.79325676447916e-05,
      "loss": 0.9793,
      "step": 10190
    },
    {
      "epoch": 0.5849446325090366,
      "grad_norm": 0.263671875,
      "learning_rate": 8.783316550182982e-05,
      "loss": 0.9985,
      "step": 10195
    },
    {
      "epoch": 0.5852315107005566,
      "grad_norm": 0.244140625,
      "learning_rate": 8.773377555979699e-05,
      "loss": 0.9507,
      "step": 10200
    },
    {
      "epoch": 0.5855183888920764,
      "grad_norm": 0.29296875,
      "learning_rate": 8.763439791836145e-05,
      "loss": 0.8859,
      "step": 10205
    },
    {
      "epoch": 0.5858052670835963,
      "grad_norm": 0.283203125,
      "learning_rate": 8.753503267717948e-05,
      "loss": 0.9309,
      "step": 10210
    },
    {
      "epoch": 0.5860921452751162,
      "grad_norm": 0.26953125,
      "learning_rate": 8.743567993589466e-05,
      "loss": 0.9578,
      "step": 10215
    },
    {
      "epoch": 0.5863790234666361,
      "grad_norm": 0.267578125,
      "learning_rate": 8.733633979413817e-05,
      "loss": 1.0202,
      "step": 10220
    },
    {
      "epoch": 0.5866659016581559,
      "grad_norm": 0.271484375,
      "learning_rate": 8.723701235152854e-05,
      "loss": 0.986,
      "step": 10225
    },
    {
      "epoch": 0.5869527798496759,
      "grad_norm": 0.2578125,
      "learning_rate": 8.713769770767155e-05,
      "loss": 0.8414,
      "step": 10230
    },
    {
      "epoch": 0.5872396580411957,
      "grad_norm": 0.265625,
      "learning_rate": 8.703839596216012e-05,
      "loss": 1.0018,
      "step": 10235
    },
    {
      "epoch": 0.5875265362327156,
      "grad_norm": 0.271484375,
      "learning_rate": 8.69391072145743e-05,
      "loss": 0.9342,
      "step": 10240
    },
    {
      "epoch": 0.5878134144242355,
      "grad_norm": 0.255859375,
      "learning_rate": 8.683983156448104e-05,
      "loss": 0.8877,
      "step": 10245
    },
    {
      "epoch": 0.5881002926157554,
      "grad_norm": 0.291015625,
      "learning_rate": 8.67405691114342e-05,
      "loss": 0.9437,
      "step": 10250
    },
    {
      "epoch": 0.5883871708072752,
      "grad_norm": 0.259765625,
      "learning_rate": 8.664131995497439e-05,
      "loss": 0.9819,
      "step": 10255
    },
    {
      "epoch": 0.5886740489987952,
      "grad_norm": 0.271484375,
      "learning_rate": 8.654208419462893e-05,
      "loss": 0.9168,
      "step": 10260
    },
    {
      "epoch": 0.588960927190315,
      "grad_norm": 0.25390625,
      "learning_rate": 8.644286192991158e-05,
      "loss": 0.9045,
      "step": 10265
    },
    {
      "epoch": 0.5892478053818349,
      "grad_norm": 0.2734375,
      "learning_rate": 8.634365326032265e-05,
      "loss": 0.9478,
      "step": 10270
    },
    {
      "epoch": 0.5895346835733547,
      "grad_norm": 0.263671875,
      "learning_rate": 8.62444582853489e-05,
      "loss": 0.9661,
      "step": 10275
    },
    {
      "epoch": 0.5898215617648747,
      "grad_norm": 0.248046875,
      "learning_rate": 8.614527710446322e-05,
      "loss": 0.9886,
      "step": 10280
    },
    {
      "epoch": 0.5901084399563945,
      "grad_norm": 0.263671875,
      "learning_rate": 8.604610981712471e-05,
      "loss": 0.9704,
      "step": 10285
    },
    {
      "epoch": 0.5903953181479143,
      "grad_norm": 0.271484375,
      "learning_rate": 8.594695652277858e-05,
      "loss": 0.9389,
      "step": 10290
    },
    {
      "epoch": 0.5906821963394343,
      "grad_norm": 0.314453125,
      "learning_rate": 8.584781732085598e-05,
      "loss": 0.9359,
      "step": 10295
    },
    {
      "epoch": 0.5909690745309542,
      "grad_norm": 0.298828125,
      "learning_rate": 8.574869231077383e-05,
      "loss": 1.0032,
      "step": 10300
    },
    {
      "epoch": 0.591255952722474,
      "grad_norm": 0.25390625,
      "learning_rate": 8.564958159193506e-05,
      "loss": 0.9773,
      "step": 10305
    },
    {
      "epoch": 0.591542830913994,
      "grad_norm": 0.255859375,
      "learning_rate": 8.555048526372805e-05,
      "loss": 0.9306,
      "step": 10310
    },
    {
      "epoch": 0.5918297091055138,
      "grad_norm": 0.279296875,
      "learning_rate": 8.545140342552676e-05,
      "loss": 0.9393,
      "step": 10315
    },
    {
      "epoch": 0.5921165872970336,
      "grad_norm": 0.27734375,
      "learning_rate": 8.53523361766908e-05,
      "loss": 0.9635,
      "step": 10320
    },
    {
      "epoch": 0.5924034654885536,
      "grad_norm": 0.2734375,
      "learning_rate": 8.525328361656494e-05,
      "loss": 0.9303,
      "step": 10325
    },
    {
      "epoch": 0.5926903436800735,
      "grad_norm": 0.2578125,
      "learning_rate": 8.515424584447935e-05,
      "loss": 0.874,
      "step": 10330
    },
    {
      "epoch": 0.5929772218715933,
      "grad_norm": 0.2734375,
      "learning_rate": 8.505522295974929e-05,
      "loss": 0.972,
      "step": 10335
    },
    {
      "epoch": 0.5932641000631133,
      "grad_norm": 0.275390625,
      "learning_rate": 8.495621506167519e-05,
      "loss": 0.9765,
      "step": 10340
    },
    {
      "epoch": 0.5935509782546331,
      "grad_norm": 0.26953125,
      "learning_rate": 8.485722224954237e-05,
      "loss": 1.0924,
      "step": 10345
    },
    {
      "epoch": 0.593837856446153,
      "grad_norm": 0.279296875,
      "learning_rate": 8.475824462262096e-05,
      "loss": 0.9808,
      "step": 10350
    },
    {
      "epoch": 0.5941247346376728,
      "grad_norm": 0.26953125,
      "learning_rate": 8.465928228016608e-05,
      "loss": 0.968,
      "step": 10355
    },
    {
      "epoch": 0.5944116128291927,
      "grad_norm": 0.265625,
      "learning_rate": 8.456033532141735e-05,
      "loss": 0.96,
      "step": 10360
    },
    {
      "epoch": 0.5946984910207126,
      "grad_norm": 0.287109375,
      "learning_rate": 8.44614038455989e-05,
      "loss": 0.9754,
      "step": 10365
    },
    {
      "epoch": 0.5949853692122324,
      "grad_norm": 0.267578125,
      "learning_rate": 8.436248795191961e-05,
      "loss": 0.9828,
      "step": 10370
    },
    {
      "epoch": 0.5952722474037524,
      "grad_norm": 0.28125,
      "learning_rate": 8.426358773957243e-05,
      "loss": 0.9586,
      "step": 10375
    },
    {
      "epoch": 0.5955591255952722,
      "grad_norm": 0.283203125,
      "learning_rate": 8.416470330773471e-05,
      "loss": 0.9082,
      "step": 10380
    },
    {
      "epoch": 0.5958460037867921,
      "grad_norm": 0.26171875,
      "learning_rate": 8.406583475556807e-05,
      "loss": 0.959,
      "step": 10385
    },
    {
      "epoch": 0.596132881978312,
      "grad_norm": 0.2890625,
      "learning_rate": 8.396698218221807e-05,
      "loss": 0.8862,
      "step": 10390
    },
    {
      "epoch": 0.5964197601698319,
      "grad_norm": 0.265625,
      "learning_rate": 8.386814568681429e-05,
      "loss": 0.9277,
      "step": 10395
    },
    {
      "epoch": 0.5967066383613517,
      "grad_norm": 0.267578125,
      "learning_rate": 8.376932536847014e-05,
      "loss": 0.9745,
      "step": 10400
    },
    {
      "epoch": 0.5969935165528717,
      "grad_norm": 0.271484375,
      "learning_rate": 8.367052132628294e-05,
      "loss": 0.973,
      "step": 10405
    },
    {
      "epoch": 0.5972803947443915,
      "grad_norm": 0.26171875,
      "learning_rate": 8.35717336593336e-05,
      "loss": 0.9606,
      "step": 10410
    },
    {
      "epoch": 0.5975672729359114,
      "grad_norm": 0.255859375,
      "learning_rate": 8.347296246668653e-05,
      "loss": 0.8933,
      "step": 10415
    },
    {
      "epoch": 0.5978541511274313,
      "grad_norm": 0.2578125,
      "learning_rate": 8.33742078473898e-05,
      "loss": 0.979,
      "step": 10420
    },
    {
      "epoch": 0.5981410293189512,
      "grad_norm": 0.26171875,
      "learning_rate": 8.327546990047471e-05,
      "loss": 0.9329,
      "step": 10425
    },
    {
      "epoch": 0.598427907510471,
      "grad_norm": 0.271484375,
      "learning_rate": 8.317674872495589e-05,
      "loss": 0.9221,
      "step": 10430
    },
    {
      "epoch": 0.5987147857019909,
      "grad_norm": 0.26953125,
      "learning_rate": 8.30780444198312e-05,
      "loss": 0.9305,
      "step": 10435
    },
    {
      "epoch": 0.5990016638935108,
      "grad_norm": 0.26953125,
      "learning_rate": 8.29793570840815e-05,
      "loss": 1.0169,
      "step": 10440
    },
    {
      "epoch": 0.5992885420850307,
      "grad_norm": 0.259765625,
      "learning_rate": 8.288068681667065e-05,
      "loss": 0.9563,
      "step": 10445
    },
    {
      "epoch": 0.5995754202765505,
      "grad_norm": 0.255859375,
      "learning_rate": 8.278203371654549e-05,
      "loss": 0.9658,
      "step": 10450
    },
    {
      "epoch": 0.5998622984680705,
      "grad_norm": 0.26953125,
      "learning_rate": 8.268339788263551e-05,
      "loss": 0.9455,
      "step": 10455
    },
    {
      "epoch": 0.6001491766595903,
      "grad_norm": 0.259765625,
      "learning_rate": 8.2584779413853e-05,
      "loss": 0.9489,
      "step": 10460
    },
    {
      "epoch": 0.6004360548511102,
      "grad_norm": 0.26953125,
      "learning_rate": 8.248617840909268e-05,
      "loss": 0.9188,
      "step": 10465
    },
    {
      "epoch": 0.6007229330426301,
      "grad_norm": 0.28125,
      "learning_rate": 8.238759496723199e-05,
      "loss": 1.0094,
      "step": 10470
    },
    {
      "epoch": 0.60100981123415,
      "grad_norm": 0.310546875,
      "learning_rate": 8.228902918713053e-05,
      "loss": 0.9488,
      "step": 10475
    },
    {
      "epoch": 0.6012966894256698,
      "grad_norm": 0.275390625,
      "learning_rate": 8.21904811676303e-05,
      "loss": 0.9419,
      "step": 10480
    },
    {
      "epoch": 0.6015835676171898,
      "grad_norm": 0.259765625,
      "learning_rate": 8.209195100755551e-05,
      "loss": 0.9301,
      "step": 10485
    },
    {
      "epoch": 0.6018704458087096,
      "grad_norm": 0.263671875,
      "learning_rate": 8.199343880571241e-05,
      "loss": 1.0017,
      "step": 10490
    },
    {
      "epoch": 0.6021573240002295,
      "grad_norm": 0.271484375,
      "learning_rate": 8.189494466088923e-05,
      "loss": 1.0361,
      "step": 10495
    },
    {
      "epoch": 0.6024442021917494,
      "grad_norm": 0.2578125,
      "learning_rate": 8.179646867185617e-05,
      "loss": 0.9334,
      "step": 10500
    },
    {
      "epoch": 0.6027310803832693,
      "grad_norm": 0.279296875,
      "learning_rate": 8.169801093736515e-05,
      "loss": 1.027,
      "step": 10505
    },
    {
      "epoch": 0.6030179585747891,
      "grad_norm": 0.2890625,
      "learning_rate": 8.159957155614974e-05,
      "loss": 0.9183,
      "step": 10510
    },
    {
      "epoch": 0.603304836766309,
      "grad_norm": 0.2578125,
      "learning_rate": 8.15011506269253e-05,
      "loss": 0.9329,
      "step": 10515
    },
    {
      "epoch": 0.6035917149578289,
      "grad_norm": 0.279296875,
      "learning_rate": 8.140274824838849e-05,
      "loss": 0.9626,
      "step": 10520
    },
    {
      "epoch": 0.6038785931493488,
      "grad_norm": 0.263671875,
      "learning_rate": 8.130436451921743e-05,
      "loss": 0.9523,
      "step": 10525
    },
    {
      "epoch": 0.6041654713408686,
      "grad_norm": 0.271484375,
      "learning_rate": 8.120599953807153e-05,
      "loss": 0.9433,
      "step": 10530
    },
    {
      "epoch": 0.6044523495323886,
      "grad_norm": 0.271484375,
      "learning_rate": 8.110765340359145e-05,
      "loss": 1.0139,
      "step": 10535
    },
    {
      "epoch": 0.6047392277239084,
      "grad_norm": 0.28515625,
      "learning_rate": 8.10093262143989e-05,
      "loss": 0.9758,
      "step": 10540
    },
    {
      "epoch": 0.6050261059154283,
      "grad_norm": 0.265625,
      "learning_rate": 8.09110180690966e-05,
      "loss": 0.9715,
      "step": 10545
    },
    {
      "epoch": 0.6053129841069482,
      "grad_norm": 0.265625,
      "learning_rate": 8.08127290662682e-05,
      "loss": 0.9328,
      "step": 10550
    },
    {
      "epoch": 0.6055998622984681,
      "grad_norm": 0.28515625,
      "learning_rate": 8.071445930447815e-05,
      "loss": 1.0168,
      "step": 10555
    },
    {
      "epoch": 0.6058867404899879,
      "grad_norm": 0.25390625,
      "learning_rate": 8.061620888227145e-05,
      "loss": 0.9434,
      "step": 10560
    },
    {
      "epoch": 0.6061736186815079,
      "grad_norm": 0.26171875,
      "learning_rate": 8.051797789817403e-05,
      "loss": 0.8857,
      "step": 10565
    },
    {
      "epoch": 0.6064604968730277,
      "grad_norm": 0.2734375,
      "learning_rate": 8.041976645069207e-05,
      "loss": 0.9153,
      "step": 10570
    },
    {
      "epoch": 0.6067473750645476,
      "grad_norm": 0.275390625,
      "learning_rate": 8.032157463831216e-05,
      "loss": 0.9559,
      "step": 10575
    },
    {
      "epoch": 0.6070342532560675,
      "grad_norm": 0.263671875,
      "learning_rate": 8.022340255950138e-05,
      "loss": 0.9526,
      "step": 10580
    },
    {
      "epoch": 0.6073211314475874,
      "grad_norm": 0.263671875,
      "learning_rate": 8.012525031270685e-05,
      "loss": 1.005,
      "step": 10585
    },
    {
      "epoch": 0.6076080096391072,
      "grad_norm": 0.275390625,
      "learning_rate": 8.002711799635588e-05,
      "loss": 0.9495,
      "step": 10590
    },
    {
      "epoch": 0.6078948878306271,
      "grad_norm": 0.26171875,
      "learning_rate": 7.992900570885572e-05,
      "loss": 0.9172,
      "step": 10595
    },
    {
      "epoch": 0.608181766022147,
      "grad_norm": 0.275390625,
      "learning_rate": 7.983091354859369e-05,
      "loss": 0.9883,
      "step": 10600
    },
    {
      "epoch": 0.6084686442136669,
      "grad_norm": 0.271484375,
      "learning_rate": 7.97328416139368e-05,
      "loss": 0.9507,
      "step": 10605
    },
    {
      "epoch": 0.6087555224051867,
      "grad_norm": 0.2734375,
      "learning_rate": 7.963479000323171e-05,
      "loss": 0.9506,
      "step": 10610
    },
    {
      "epoch": 0.6090424005967067,
      "grad_norm": 0.259765625,
      "learning_rate": 7.953675881480493e-05,
      "loss": 0.9825,
      "step": 10615
    },
    {
      "epoch": 0.6093292787882265,
      "grad_norm": 0.275390625,
      "learning_rate": 7.94387481469623e-05,
      "loss": 0.9435,
      "step": 10620
    },
    {
      "epoch": 0.6096161569797464,
      "grad_norm": 0.251953125,
      "learning_rate": 7.934075809798908e-05,
      "loss": 1.0158,
      "step": 10625
    },
    {
      "epoch": 0.6099030351712663,
      "grad_norm": 0.255859375,
      "learning_rate": 7.924278876615004e-05,
      "loss": 0.8788,
      "step": 10630
    },
    {
      "epoch": 0.6101899133627862,
      "grad_norm": 0.275390625,
      "learning_rate": 7.914484024968893e-05,
      "loss": 0.9347,
      "step": 10635
    },
    {
      "epoch": 0.610476791554306,
      "grad_norm": 0.267578125,
      "learning_rate": 7.90469126468288e-05,
      "loss": 0.9054,
      "step": 10640
    },
    {
      "epoch": 0.610763669745826,
      "grad_norm": 0.24609375,
      "learning_rate": 7.894900605577161e-05,
      "loss": 0.963,
      "step": 10645
    },
    {
      "epoch": 0.6110505479373458,
      "grad_norm": 0.2578125,
      "learning_rate": 7.885112057469839e-05,
      "loss": 0.9641,
      "step": 10650
    },
    {
      "epoch": 0.6113374261288657,
      "grad_norm": 0.265625,
      "learning_rate": 7.87532563017689e-05,
      "loss": 0.9087,
      "step": 10655
    },
    {
      "epoch": 0.6116243043203856,
      "grad_norm": 0.267578125,
      "learning_rate": 7.865541333512157e-05,
      "loss": 0.9185,
      "step": 10660
    },
    {
      "epoch": 0.6119111825119055,
      "grad_norm": 0.265625,
      "learning_rate": 7.855759177287368e-05,
      "loss": 0.9213,
      "step": 10665
    },
    {
      "epoch": 0.6121980607034253,
      "grad_norm": 0.267578125,
      "learning_rate": 7.84597917131208e-05,
      "loss": 0.9388,
      "step": 10670
    },
    {
      "epoch": 0.6124849388949452,
      "grad_norm": 0.2578125,
      "learning_rate": 7.836201325393706e-05,
      "loss": 0.969,
      "step": 10675
    },
    {
      "epoch": 0.6127718170864651,
      "grad_norm": 0.265625,
      "learning_rate": 7.826425649337501e-05,
      "loss": 0.9655,
      "step": 10680
    },
    {
      "epoch": 0.613058695277985,
      "grad_norm": 0.25390625,
      "learning_rate": 7.816652152946528e-05,
      "loss": 1.0169,
      "step": 10685
    },
    {
      "epoch": 0.6133455734695048,
      "grad_norm": 0.251953125,
      "learning_rate": 7.806880846021669e-05,
      "loss": 0.9382,
      "step": 10690
    },
    {
      "epoch": 0.6136324516610248,
      "grad_norm": 0.26953125,
      "learning_rate": 7.797111738361618e-05,
      "loss": 0.9416,
      "step": 10695
    },
    {
      "epoch": 0.6139193298525446,
      "grad_norm": 0.267578125,
      "learning_rate": 7.787344839762855e-05,
      "loss": 0.9647,
      "step": 10700
    },
    {
      "epoch": 0.6142062080440645,
      "grad_norm": 0.2578125,
      "learning_rate": 7.777580160019649e-05,
      "loss": 0.905,
      "step": 10705
    },
    {
      "epoch": 0.6144930862355844,
      "grad_norm": 0.279296875,
      "learning_rate": 7.767817708924038e-05,
      "loss": 0.9983,
      "step": 10710
    },
    {
      "epoch": 0.6147799644271043,
      "grad_norm": 0.271484375,
      "learning_rate": 7.758057496265839e-05,
      "loss": 0.9257,
      "step": 10715
    },
    {
      "epoch": 0.6150668426186241,
      "grad_norm": 0.2734375,
      "learning_rate": 7.748299531832609e-05,
      "loss": 0.9679,
      "step": 10720
    },
    {
      "epoch": 0.6153537208101441,
      "grad_norm": 0.275390625,
      "learning_rate": 7.738543825409652e-05,
      "loss": 0.9438,
      "step": 10725
    },
    {
      "epoch": 0.6156405990016639,
      "grad_norm": 0.275390625,
      "learning_rate": 7.728790386780025e-05,
      "loss": 0.9388,
      "step": 10730
    },
    {
      "epoch": 0.6159274771931837,
      "grad_norm": 0.25390625,
      "learning_rate": 7.71903922572449e-05,
      "loss": 1.0314,
      "step": 10735
    },
    {
      "epoch": 0.6162143553847037,
      "grad_norm": 0.26953125,
      "learning_rate": 7.70929035202153e-05,
      "loss": 0.8882,
      "step": 10740
    },
    {
      "epoch": 0.6165012335762236,
      "grad_norm": 0.26171875,
      "learning_rate": 7.699543775447345e-05,
      "loss": 0.9499,
      "step": 10745
    },
    {
      "epoch": 0.6167881117677434,
      "grad_norm": 0.2578125,
      "learning_rate": 7.689799505775822e-05,
      "loss": 0.9593,
      "step": 10750
    },
    {
      "epoch": 0.6170749899592632,
      "grad_norm": 0.26171875,
      "learning_rate": 7.68005755277853e-05,
      "loss": 0.8896,
      "step": 10755
    },
    {
      "epoch": 0.6173618681507832,
      "grad_norm": 0.25,
      "learning_rate": 7.67031792622473e-05,
      "loss": 1.0001,
      "step": 10760
    },
    {
      "epoch": 0.617648746342303,
      "grad_norm": 0.2490234375,
      "learning_rate": 7.660580635881338e-05,
      "loss": 1.0065,
      "step": 10765
    },
    {
      "epoch": 0.6179356245338229,
      "grad_norm": 0.26171875,
      "learning_rate": 7.65084569151293e-05,
      "loss": 0.9113,
      "step": 10770
    },
    {
      "epoch": 0.6182225027253428,
      "grad_norm": 0.263671875,
      "learning_rate": 7.641113102881726e-05,
      "loss": 0.9221,
      "step": 10775
    },
    {
      "epoch": 0.6185093809168627,
      "grad_norm": 0.26953125,
      "learning_rate": 7.631382879747597e-05,
      "loss": 0.9337,
      "step": 10780
    },
    {
      "epoch": 0.6187962591083825,
      "grad_norm": 0.25390625,
      "learning_rate": 7.621655031868026e-05,
      "loss": 0.9811,
      "step": 10785
    },
    {
      "epoch": 0.6190831372999025,
      "grad_norm": 0.27734375,
      "learning_rate": 7.61192956899812e-05,
      "loss": 1.0128,
      "step": 10790
    },
    {
      "epoch": 0.6193700154914223,
      "grad_norm": 0.279296875,
      "learning_rate": 7.6022065008906e-05,
      "loss": 0.9482,
      "step": 10795
    },
    {
      "epoch": 0.6196568936829422,
      "grad_norm": 0.25390625,
      "learning_rate": 7.592485837295777e-05,
      "loss": 0.9472,
      "step": 10800
    },
    {
      "epoch": 0.6199437718744621,
      "grad_norm": 0.26171875,
      "learning_rate": 7.582767587961552e-05,
      "loss": 0.9221,
      "step": 10805
    },
    {
      "epoch": 0.620230650065982,
      "grad_norm": 0.287109375,
      "learning_rate": 7.573051762633414e-05,
      "loss": 0.9532,
      "step": 10810
    },
    {
      "epoch": 0.6205175282575018,
      "grad_norm": 0.259765625,
      "learning_rate": 7.563338371054412e-05,
      "loss": 1.0231,
      "step": 10815
    },
    {
      "epoch": 0.6208044064490218,
      "grad_norm": 0.2578125,
      "learning_rate": 7.553627422965148e-05,
      "loss": 0.9752,
      "step": 10820
    },
    {
      "epoch": 0.6210912846405416,
      "grad_norm": 0.26171875,
      "learning_rate": 7.543918928103795e-05,
      "loss": 0.9337,
      "step": 10825
    },
    {
      "epoch": 0.6213781628320615,
      "grad_norm": 0.2734375,
      "learning_rate": 7.534212896206051e-05,
      "loss": 0.9364,
      "step": 10830
    },
    {
      "epoch": 0.6216650410235813,
      "grad_norm": 0.2890625,
      "learning_rate": 7.524509337005141e-05,
      "loss": 0.9254,
      "step": 10835
    },
    {
      "epoch": 0.6219519192151013,
      "grad_norm": 0.25,
      "learning_rate": 7.514808260231818e-05,
      "loss": 0.9011,
      "step": 10840
    },
    {
      "epoch": 0.6222387974066211,
      "grad_norm": 0.271484375,
      "learning_rate": 7.505109675614346e-05,
      "loss": 0.9011,
      "step": 10845
    },
    {
      "epoch": 0.622525675598141,
      "grad_norm": 0.255859375,
      "learning_rate": 7.495413592878484e-05,
      "loss": 0.9105,
      "step": 10850
    },
    {
      "epoch": 0.6228125537896609,
      "grad_norm": 0.2578125,
      "learning_rate": 7.485720021747486e-05,
      "loss": 0.9194,
      "step": 10855
    },
    {
      "epoch": 0.6230994319811808,
      "grad_norm": 0.263671875,
      "learning_rate": 7.476028971942093e-05,
      "loss": 0.9168,
      "step": 10860
    },
    {
      "epoch": 0.6233863101727006,
      "grad_norm": 0.265625,
      "learning_rate": 7.466340453180505e-05,
      "loss": 0.9753,
      "step": 10865
    },
    {
      "epoch": 0.6236731883642206,
      "grad_norm": 0.263671875,
      "learning_rate": 7.456654475178389e-05,
      "loss": 0.9741,
      "step": 10870
    },
    {
      "epoch": 0.6239600665557404,
      "grad_norm": 0.263671875,
      "learning_rate": 7.446971047648873e-05,
      "loss": 0.9154,
      "step": 10875
    },
    {
      "epoch": 0.6242469447472603,
      "grad_norm": 0.275390625,
      "learning_rate": 7.437290180302512e-05,
      "loss": 0.9548,
      "step": 10880
    },
    {
      "epoch": 0.6245338229387802,
      "grad_norm": 0.259765625,
      "learning_rate": 7.427611882847301e-05,
      "loss": 0.9339,
      "step": 10885
    },
    {
      "epoch": 0.6248207011303001,
      "grad_norm": 0.265625,
      "learning_rate": 7.41793616498867e-05,
      "loss": 0.9884,
      "step": 10890
    },
    {
      "epoch": 0.6251075793218199,
      "grad_norm": 0.251953125,
      "learning_rate": 7.40826303642944e-05,
      "loss": 0.8957,
      "step": 10895
    },
    {
      "epoch": 0.6253944575133399,
      "grad_norm": 0.25,
      "learning_rate": 7.398592506869849e-05,
      "loss": 0.9553,
      "step": 10900
    },
    {
      "epoch": 0.6256813357048597,
      "grad_norm": 0.296875,
      "learning_rate": 7.388924586007523e-05,
      "loss": 0.9071,
      "step": 10905
    },
    {
      "epoch": 0.6259682138963796,
      "grad_norm": 0.2734375,
      "learning_rate": 7.379259283537479e-05,
      "loss": 0.9648,
      "step": 10910
    },
    {
      "epoch": 0.6262550920878994,
      "grad_norm": 0.263671875,
      "learning_rate": 7.369596609152105e-05,
      "loss": 0.9983,
      "step": 10915
    },
    {
      "epoch": 0.6265419702794194,
      "grad_norm": 0.279296875,
      "learning_rate": 7.359936572541142e-05,
      "loss": 1.0115,
      "step": 10920
    },
    {
      "epoch": 0.6268288484709392,
      "grad_norm": 0.283203125,
      "learning_rate": 7.350279183391712e-05,
      "loss": 0.932,
      "step": 10925
    },
    {
      "epoch": 0.6271157266624591,
      "grad_norm": 0.267578125,
      "learning_rate": 7.340624451388257e-05,
      "loss": 0.9518,
      "step": 10930
    },
    {
      "epoch": 0.627402604853979,
      "grad_norm": 0.2470703125,
      "learning_rate": 7.330972386212558e-05,
      "loss": 0.932,
      "step": 10935
    },
    {
      "epoch": 0.6276894830454989,
      "grad_norm": 0.2890625,
      "learning_rate": 7.321322997543743e-05,
      "loss": 0.9442,
      "step": 10940
    },
    {
      "epoch": 0.6279763612370187,
      "grad_norm": 0.26171875,
      "learning_rate": 7.311676295058232e-05,
      "loss": 0.8918,
      "step": 10945
    },
    {
      "epoch": 0.6282632394285387,
      "grad_norm": 0.263671875,
      "learning_rate": 7.302032288429756e-05,
      "loss": 0.9165,
      "step": 10950
    },
    {
      "epoch": 0.6285501176200585,
      "grad_norm": 0.283203125,
      "learning_rate": 7.292390987329356e-05,
      "loss": 0.9855,
      "step": 10955
    },
    {
      "epoch": 0.6288369958115784,
      "grad_norm": 0.283203125,
      "learning_rate": 7.282752401425343e-05,
      "loss": 0.9264,
      "step": 10960
    },
    {
      "epoch": 0.6291238740030983,
      "grad_norm": 0.263671875,
      "learning_rate": 7.273116540383319e-05,
      "loss": 1.0362,
      "step": 10965
    },
    {
      "epoch": 0.6294107521946182,
      "grad_norm": 0.259765625,
      "learning_rate": 7.263483413866135e-05,
      "loss": 0.9872,
      "step": 10970
    },
    {
      "epoch": 0.629697630386138,
      "grad_norm": 0.26171875,
      "learning_rate": 7.253853031533928e-05,
      "loss": 0.9462,
      "step": 10975
    },
    {
      "epoch": 0.629984508577658,
      "grad_norm": 0.27734375,
      "learning_rate": 7.244225403044056e-05,
      "loss": 0.989,
      "step": 10980
    },
    {
      "epoch": 0.6302713867691778,
      "grad_norm": 0.27734375,
      "learning_rate": 7.234600538051124e-05,
      "loss": 0.8998,
      "step": 10985
    },
    {
      "epoch": 0.6305582649606977,
      "grad_norm": 0.27734375,
      "learning_rate": 7.22497844620698e-05,
      "loss": 0.9069,
      "step": 10990
    },
    {
      "epoch": 0.6308451431522175,
      "grad_norm": 0.2373046875,
      "learning_rate": 7.215359137160673e-05,
      "loss": 0.9017,
      "step": 10995
    },
    {
      "epoch": 0.6311320213437375,
      "grad_norm": 0.298828125,
      "learning_rate": 7.205742620558464e-05,
      "loss": 0.9396,
      "step": 11000
    },
    {
      "epoch": 0.6314188995352573,
      "grad_norm": 0.279296875,
      "learning_rate": 7.196128906043822e-05,
      "loss": 1.0328,
      "step": 11005
    },
    {
      "epoch": 0.6317057777267772,
      "grad_norm": 0.263671875,
      "learning_rate": 7.1865180032574e-05,
      "loss": 0.8883,
      "step": 11010
    },
    {
      "epoch": 0.6319926559182971,
      "grad_norm": 0.265625,
      "learning_rate": 7.176909921837033e-05,
      "loss": 1.0588,
      "step": 11015
    },
    {
      "epoch": 0.632279534109817,
      "grad_norm": 0.26171875,
      "learning_rate": 7.167304671417729e-05,
      "loss": 0.918,
      "step": 11020
    },
    {
      "epoch": 0.6325664123013368,
      "grad_norm": 0.26171875,
      "learning_rate": 7.157702261631653e-05,
      "loss": 0.965,
      "step": 11025
    },
    {
      "epoch": 0.6328532904928568,
      "grad_norm": 0.26171875,
      "learning_rate": 7.148102702108122e-05,
      "loss": 0.9823,
      "step": 11030
    },
    {
      "epoch": 0.6331401686843766,
      "grad_norm": 0.26953125,
      "learning_rate": 7.138506002473591e-05,
      "loss": 0.981,
      "step": 11035
    },
    {
      "epoch": 0.6334270468758965,
      "grad_norm": 0.294921875,
      "learning_rate": 7.128912172351664e-05,
      "loss": 0.9348,
      "step": 11040
    },
    {
      "epoch": 0.6337139250674164,
      "grad_norm": 0.259765625,
      "learning_rate": 7.119321221363047e-05,
      "loss": 0.9774,
      "step": 11045
    },
    {
      "epoch": 0.6340008032589363,
      "grad_norm": 0.26171875,
      "learning_rate": 7.109733159125566e-05,
      "loss": 0.9297,
      "step": 11050
    },
    {
      "epoch": 0.6342876814504561,
      "grad_norm": 0.26171875,
      "learning_rate": 7.100147995254156e-05,
      "loss": 0.9165,
      "step": 11055
    },
    {
      "epoch": 0.6345745596419761,
      "grad_norm": 0.2490234375,
      "learning_rate": 7.09056573936084e-05,
      "loss": 0.9639,
      "step": 11060
    },
    {
      "epoch": 0.6348614378334959,
      "grad_norm": 0.251953125,
      "learning_rate": 7.080986401054721e-05,
      "loss": 0.8885,
      "step": 11065
    },
    {
      "epoch": 0.6351483160250158,
      "grad_norm": 0.283203125,
      "learning_rate": 7.071409989941989e-05,
      "loss": 0.9485,
      "step": 11070
    },
    {
      "epoch": 0.6354351942165356,
      "grad_norm": 0.26171875,
      "learning_rate": 7.061836515625886e-05,
      "loss": 0.9421,
      "step": 11075
    },
    {
      "epoch": 0.6357220724080556,
      "grad_norm": 0.255859375,
      "learning_rate": 7.052265987706708e-05,
      "loss": 0.9441,
      "step": 11080
    },
    {
      "epoch": 0.6360089505995754,
      "grad_norm": 0.25390625,
      "learning_rate": 7.042698415781813e-05,
      "loss": 0.9537,
      "step": 11085
    },
    {
      "epoch": 0.6362958287910953,
      "grad_norm": 0.26953125,
      "learning_rate": 7.033133809445577e-05,
      "loss": 0.9135,
      "step": 11090
    },
    {
      "epoch": 0.6365827069826152,
      "grad_norm": 0.271484375,
      "learning_rate": 7.02357217828941e-05,
      "loss": 0.9283,
      "step": 11095
    },
    {
      "epoch": 0.6368695851741351,
      "grad_norm": 0.2578125,
      "learning_rate": 7.014013531901733e-05,
      "loss": 0.889,
      "step": 11100
    },
    {
      "epoch": 0.6371564633656549,
      "grad_norm": 0.28515625,
      "learning_rate": 7.004457879867986e-05,
      "loss": 0.9422,
      "step": 11105
    },
    {
      "epoch": 0.6374433415571749,
      "grad_norm": 0.2734375,
      "learning_rate": 6.994905231770593e-05,
      "loss": 0.9034,
      "step": 11110
    },
    {
      "epoch": 0.6377302197486947,
      "grad_norm": 0.26953125,
      "learning_rate": 6.985355597188971e-05,
      "loss": 0.9111,
      "step": 11115
    },
    {
      "epoch": 0.6380170979402146,
      "grad_norm": 0.302734375,
      "learning_rate": 6.975808985699518e-05,
      "loss": 0.9939,
      "step": 11120
    },
    {
      "epoch": 0.6383039761317345,
      "grad_norm": 0.2578125,
      "learning_rate": 6.966265406875597e-05,
      "loss": 0.9296,
      "step": 11125
    },
    {
      "epoch": 0.6385908543232544,
      "grad_norm": 0.2470703125,
      "learning_rate": 6.956724870287524e-05,
      "loss": 0.9218,
      "step": 11130
    },
    {
      "epoch": 0.6388777325147742,
      "grad_norm": 0.251953125,
      "learning_rate": 6.94718738550258e-05,
      "loss": 0.9668,
      "step": 11135
    },
    {
      "epoch": 0.6391646107062942,
      "grad_norm": 0.2890625,
      "learning_rate": 6.93765296208497e-05,
      "loss": 0.9111,
      "step": 11140
    },
    {
      "epoch": 0.639451488897814,
      "grad_norm": 0.26953125,
      "learning_rate": 6.928121609595835e-05,
      "loss": 0.9802,
      "step": 11145
    },
    {
      "epoch": 0.6397383670893338,
      "grad_norm": 0.26171875,
      "learning_rate": 6.918593337593238e-05,
      "loss": 0.9536,
      "step": 11150
    },
    {
      "epoch": 0.6400252452808537,
      "grad_norm": 0.263671875,
      "learning_rate": 6.909068155632153e-05,
      "loss": 0.9412,
      "step": 11155
    },
    {
      "epoch": 0.6403121234723737,
      "grad_norm": 0.26171875,
      "learning_rate": 6.899546073264454e-05,
      "loss": 0.9634,
      "step": 11160
    },
    {
      "epoch": 0.6405990016638935,
      "grad_norm": 0.2734375,
      "learning_rate": 6.890027100038901e-05,
      "loss": 0.9113,
      "step": 11165
    },
    {
      "epoch": 0.6408858798554133,
      "grad_norm": 0.2734375,
      "learning_rate": 6.880511245501149e-05,
      "loss": 1.0384,
      "step": 11170
    },
    {
      "epoch": 0.6411727580469333,
      "grad_norm": 0.26953125,
      "learning_rate": 6.870998519193717e-05,
      "loss": 0.9608,
      "step": 11175
    },
    {
      "epoch": 0.6414596362384531,
      "grad_norm": 0.28125,
      "learning_rate": 6.861488930655979e-05,
      "loss": 0.9531,
      "step": 11180
    },
    {
      "epoch": 0.641746514429973,
      "grad_norm": 0.271484375,
      "learning_rate": 6.851982489424187e-05,
      "loss": 0.9631,
      "step": 11185
    },
    {
      "epoch": 0.642033392621493,
      "grad_norm": 0.25390625,
      "learning_rate": 6.842479205031411e-05,
      "loss": 0.9673,
      "step": 11190
    },
    {
      "epoch": 0.6423202708130128,
      "grad_norm": 0.26171875,
      "learning_rate": 6.832979087007565e-05,
      "loss": 0.976,
      "step": 11195
    },
    {
      "epoch": 0.6426071490045326,
      "grad_norm": 0.259765625,
      "learning_rate": 6.823482144879398e-05,
      "loss": 0.9439,
      "step": 11200
    },
    {
      "epoch": 0.6428940271960526,
      "grad_norm": 0.2578125,
      "learning_rate": 6.813988388170456e-05,
      "loss": 0.9968,
      "step": 11205
    },
    {
      "epoch": 0.6431809053875724,
      "grad_norm": 0.2734375,
      "learning_rate": 6.804497826401105e-05,
      "loss": 0.9747,
      "step": 11210
    },
    {
      "epoch": 0.6434677835790923,
      "grad_norm": 0.2578125,
      "learning_rate": 6.795010469088495e-05,
      "loss": 0.9963,
      "step": 11215
    },
    {
      "epoch": 0.6437546617706122,
      "grad_norm": 0.283203125,
      "learning_rate": 6.785526325746576e-05,
      "loss": 0.9884,
      "step": 11220
    },
    {
      "epoch": 0.6440415399621321,
      "grad_norm": 0.26953125,
      "learning_rate": 6.776045405886066e-05,
      "loss": 1.0205,
      "step": 11225
    },
    {
      "epoch": 0.6443284181536519,
      "grad_norm": 0.275390625,
      "learning_rate": 6.766567719014449e-05,
      "loss": 0.9534,
      "step": 11230
    },
    {
      "epoch": 0.6446152963451719,
      "grad_norm": 0.255859375,
      "learning_rate": 6.75709327463598e-05,
      "loss": 0.9204,
      "step": 11235
    },
    {
      "epoch": 0.6449021745366917,
      "grad_norm": 0.25,
      "learning_rate": 6.747622082251643e-05,
      "loss": 0.8916,
      "step": 11240
    },
    {
      "epoch": 0.6451890527282116,
      "grad_norm": 0.26171875,
      "learning_rate": 6.738154151359172e-05,
      "loss": 0.9242,
      "step": 11245
    },
    {
      "epoch": 0.6454759309197314,
      "grad_norm": 0.283203125,
      "learning_rate": 6.728689491453039e-05,
      "loss": 0.9652,
      "step": 11250
    },
    {
      "epoch": 0.6457628091112514,
      "grad_norm": 0.2431640625,
      "learning_rate": 6.719228112024417e-05,
      "loss": 0.9385,
      "step": 11255
    },
    {
      "epoch": 0.6460496873027712,
      "grad_norm": 0.271484375,
      "learning_rate": 6.709770022561198e-05,
      "loss": 0.9938,
      "step": 11260
    },
    {
      "epoch": 0.6463365654942911,
      "grad_norm": 0.259765625,
      "learning_rate": 6.700315232547981e-05,
      "loss": 0.9901,
      "step": 11265
    },
    {
      "epoch": 0.646623443685811,
      "grad_norm": 0.25390625,
      "learning_rate": 6.690863751466048e-05,
      "loss": 0.9271,
      "step": 11270
    },
    {
      "epoch": 0.6469103218773309,
      "grad_norm": 0.279296875,
      "learning_rate": 6.681415588793367e-05,
      "loss": 0.9564,
      "step": 11275
    },
    {
      "epoch": 0.6471972000688507,
      "grad_norm": 0.25390625,
      "learning_rate": 6.67197075400457e-05,
      "loss": 0.9223,
      "step": 11280
    },
    {
      "epoch": 0.6474840782603707,
      "grad_norm": 0.263671875,
      "learning_rate": 6.662529256570969e-05,
      "loss": 0.9456,
      "step": 11285
    },
    {
      "epoch": 0.6477709564518905,
      "grad_norm": 0.25390625,
      "learning_rate": 6.653091105960512e-05,
      "loss": 0.9252,
      "step": 11290
    },
    {
      "epoch": 0.6480578346434104,
      "grad_norm": 0.255859375,
      "learning_rate": 6.643656311637796e-05,
      "loss": 0.9373,
      "step": 11295
    },
    {
      "epoch": 0.6483447128349303,
      "grad_norm": 0.28125,
      "learning_rate": 6.634224883064059e-05,
      "loss": 0.9379,
      "step": 11300
    },
    {
      "epoch": 0.6486315910264502,
      "grad_norm": 0.279296875,
      "learning_rate": 6.624796829697158e-05,
      "loss": 1.0462,
      "step": 11305
    },
    {
      "epoch": 0.64891846921797,
      "grad_norm": 0.267578125,
      "learning_rate": 6.615372160991561e-05,
      "loss": 1.0275,
      "step": 11310
    },
    {
      "epoch": 0.64920534740949,
      "grad_norm": 0.267578125,
      "learning_rate": 6.605950886398353e-05,
      "loss": 0.8971,
      "step": 11315
    },
    {
      "epoch": 0.6494922256010098,
      "grad_norm": 0.26171875,
      "learning_rate": 6.596533015365207e-05,
      "loss": 0.887,
      "step": 11320
    },
    {
      "epoch": 0.6497791037925297,
      "grad_norm": 0.26171875,
      "learning_rate": 6.587118557336382e-05,
      "loss": 0.915,
      "step": 11325
    },
    {
      "epoch": 0.6500659819840495,
      "grad_norm": 0.263671875,
      "learning_rate": 6.577707521752725e-05,
      "loss": 0.9363,
      "step": 11330
    },
    {
      "epoch": 0.6503528601755695,
      "grad_norm": 0.27734375,
      "learning_rate": 6.56829991805164e-05,
      "loss": 0.9977,
      "step": 11335
    },
    {
      "epoch": 0.6506397383670893,
      "grad_norm": 0.265625,
      "learning_rate": 6.558895755667091e-05,
      "loss": 0.9611,
      "step": 11340
    },
    {
      "epoch": 0.6509266165586092,
      "grad_norm": 0.26953125,
      "learning_rate": 6.549495044029592e-05,
      "loss": 0.9674,
      "step": 11345
    },
    {
      "epoch": 0.6512134947501291,
      "grad_norm": 0.275390625,
      "learning_rate": 6.540097792566202e-05,
      "loss": 0.9338,
      "step": 11350
    },
    {
      "epoch": 0.651500372941649,
      "grad_norm": 0.26171875,
      "learning_rate": 6.530704010700504e-05,
      "loss": 0.9329,
      "step": 11355
    },
    {
      "epoch": 0.6517872511331688,
      "grad_norm": 0.2734375,
      "learning_rate": 6.521313707852601e-05,
      "loss": 0.9615,
      "step": 11360
    },
    {
      "epoch": 0.6520741293246888,
      "grad_norm": 0.267578125,
      "learning_rate": 6.511926893439115e-05,
      "loss": 0.9741,
      "step": 11365
    },
    {
      "epoch": 0.6523610075162086,
      "grad_norm": 0.248046875,
      "learning_rate": 6.502543576873163e-05,
      "loss": 0.929,
      "step": 11370
    },
    {
      "epoch": 0.6526478857077285,
      "grad_norm": 0.279296875,
      "learning_rate": 6.493163767564352e-05,
      "loss": 0.9553,
      "step": 11375
    },
    {
      "epoch": 0.6529347638992484,
      "grad_norm": 0.27734375,
      "learning_rate": 6.483787474918779e-05,
      "loss": 0.9487,
      "step": 11380
    },
    {
      "epoch": 0.6532216420907683,
      "grad_norm": 0.267578125,
      "learning_rate": 6.474414708339013e-05,
      "loss": 1.0413,
      "step": 11385
    },
    {
      "epoch": 0.6535085202822881,
      "grad_norm": 0.2734375,
      "learning_rate": 6.465045477224079e-05,
      "loss": 0.962,
      "step": 11390
    },
    {
      "epoch": 0.6537953984738081,
      "grad_norm": 0.259765625,
      "learning_rate": 6.455679790969473e-05,
      "loss": 0.9672,
      "step": 11395
    },
    {
      "epoch": 0.6540822766653279,
      "grad_norm": 0.2734375,
      "learning_rate": 6.446317658967119e-05,
      "loss": 0.9116,
      "step": 11400
    },
    {
      "epoch": 0.6543691548568478,
      "grad_norm": 0.2578125,
      "learning_rate": 6.436959090605383e-05,
      "loss": 0.9604,
      "step": 11405
    },
    {
      "epoch": 0.6546560330483676,
      "grad_norm": 0.279296875,
      "learning_rate": 6.42760409526906e-05,
      "loss": 0.9871,
      "step": 11410
    },
    {
      "epoch": 0.6549429112398876,
      "grad_norm": 0.26171875,
      "learning_rate": 6.418252682339361e-05,
      "loss": 0.955,
      "step": 11415
    },
    {
      "epoch": 0.6552297894314074,
      "grad_norm": 0.2734375,
      "learning_rate": 6.408904861193906e-05,
      "loss": 0.9585,
      "step": 11420
    },
    {
      "epoch": 0.6555166676229273,
      "grad_norm": 0.255859375,
      "learning_rate": 6.399560641206706e-05,
      "loss": 0.9315,
      "step": 11425
    },
    {
      "epoch": 0.6558035458144472,
      "grad_norm": 0.267578125,
      "learning_rate": 6.39022003174817e-05,
      "loss": 0.9814,
      "step": 11430
    },
    {
      "epoch": 0.6560904240059671,
      "grad_norm": 0.25,
      "learning_rate": 6.380883042185084e-05,
      "loss": 0.9402,
      "step": 11435
    },
    {
      "epoch": 0.6563773021974869,
      "grad_norm": 0.294921875,
      "learning_rate": 6.371549681880593e-05,
      "loss": 0.9459,
      "step": 11440
    },
    {
      "epoch": 0.6566641803890069,
      "grad_norm": 0.259765625,
      "learning_rate": 6.362219960194223e-05,
      "loss": 0.8998,
      "step": 11445
    },
    {
      "epoch": 0.6569510585805267,
      "grad_norm": 0.267578125,
      "learning_rate": 6.352893886481829e-05,
      "loss": 0.9532,
      "step": 11450
    },
    {
      "epoch": 0.6572379367720466,
      "grad_norm": 0.27734375,
      "learning_rate": 6.343571470095625e-05,
      "loss": 0.9078,
      "step": 11455
    },
    {
      "epoch": 0.6575248149635665,
      "grad_norm": 0.279296875,
      "learning_rate": 6.334252720384153e-05,
      "loss": 0.9404,
      "step": 11460
    },
    {
      "epoch": 0.6578116931550864,
      "grad_norm": 0.25,
      "learning_rate": 6.32493764669227e-05,
      "loss": 0.9324,
      "step": 11465
    },
    {
      "epoch": 0.6580985713466062,
      "grad_norm": 0.26171875,
      "learning_rate": 6.315626258361158e-05,
      "loss": 1.0012,
      "step": 11470
    },
    {
      "epoch": 0.6583854495381262,
      "grad_norm": 0.283203125,
      "learning_rate": 6.306318564728294e-05,
      "loss": 0.9721,
      "step": 11475
    },
    {
      "epoch": 0.658672327729646,
      "grad_norm": 0.251953125,
      "learning_rate": 6.297014575127455e-05,
      "loss": 0.9619,
      "step": 11480
    },
    {
      "epoch": 0.6589592059211659,
      "grad_norm": 0.275390625,
      "learning_rate": 6.287714298888709e-05,
      "loss": 1.0384,
      "step": 11485
    },
    {
      "epoch": 0.6592460841126857,
      "grad_norm": 0.2578125,
      "learning_rate": 6.27841774533838e-05,
      "loss": 0.9609,
      "step": 11490
    },
    {
      "epoch": 0.6595329623042057,
      "grad_norm": 0.259765625,
      "learning_rate": 6.26912492379909e-05,
      "loss": 0.9154,
      "step": 11495
    },
    {
      "epoch": 0.6598198404957255,
      "grad_norm": 0.27734375,
      "learning_rate": 6.259835843589688e-05,
      "loss": 1.0205,
      "step": 11500
    },
    {
      "epoch": 0.6601067186872454,
      "grad_norm": 0.2578125,
      "learning_rate": 6.250550514025287e-05,
      "loss": 1.014,
      "step": 11505
    },
    {
      "epoch": 0.6603935968787653,
      "grad_norm": 0.271484375,
      "learning_rate": 6.24126894441724e-05,
      "loss": 0.9599,
      "step": 11510
    },
    {
      "epoch": 0.6606804750702852,
      "grad_norm": 0.2578125,
      "learning_rate": 6.231991144073126e-05,
      "loss": 0.9182,
      "step": 11515
    },
    {
      "epoch": 0.660967353261805,
      "grad_norm": 0.263671875,
      "learning_rate": 6.222717122296739e-05,
      "loss": 0.9562,
      "step": 11520
    },
    {
      "epoch": 0.661254231453325,
      "grad_norm": 0.271484375,
      "learning_rate": 6.213446888388093e-05,
      "loss": 0.9325,
      "step": 11525
    },
    {
      "epoch": 0.6615411096448448,
      "grad_norm": 0.244140625,
      "learning_rate": 6.204180451643399e-05,
      "loss": 0.8369,
      "step": 11530
    },
    {
      "epoch": 0.6618279878363647,
      "grad_norm": 0.263671875,
      "learning_rate": 6.194917821355062e-05,
      "loss": 0.9454,
      "step": 11535
    },
    {
      "epoch": 0.6621148660278846,
      "grad_norm": 0.2734375,
      "learning_rate": 6.18565900681166e-05,
      "loss": 0.9957,
      "step": 11540
    },
    {
      "epoch": 0.6624017442194045,
      "grad_norm": 0.287109375,
      "learning_rate": 6.176404017297965e-05,
      "loss": 1.0129,
      "step": 11545
    },
    {
      "epoch": 0.6626886224109243,
      "grad_norm": 0.25390625,
      "learning_rate": 6.167152862094893e-05,
      "loss": 0.903,
      "step": 11550
    },
    {
      "epoch": 0.6629755006024443,
      "grad_norm": 0.296875,
      "learning_rate": 6.157905550479525e-05,
      "loss": 0.9487,
      "step": 11555
    },
    {
      "epoch": 0.6632623787939641,
      "grad_norm": 0.255859375,
      "learning_rate": 6.148662091725087e-05,
      "loss": 0.928,
      "step": 11560
    },
    {
      "epoch": 0.663549256985484,
      "grad_norm": 0.27734375,
      "learning_rate": 6.139422495100939e-05,
      "loss": 1.0175,
      "step": 11565
    },
    {
      "epoch": 0.6638361351770038,
      "grad_norm": 0.25390625,
      "learning_rate": 6.13018676987257e-05,
      "loss": 0.9285,
      "step": 11570
    },
    {
      "epoch": 0.6641230133685238,
      "grad_norm": 0.267578125,
      "learning_rate": 6.120954925301587e-05,
      "loss": 0.9314,
      "step": 11575
    },
    {
      "epoch": 0.6644098915600436,
      "grad_norm": 0.263671875,
      "learning_rate": 6.111726970645703e-05,
      "loss": 0.9802,
      "step": 11580
    },
    {
      "epoch": 0.6646967697515634,
      "grad_norm": 0.26953125,
      "learning_rate": 6.102502915158733e-05,
      "loss": 0.9253,
      "step": 11585
    },
    {
      "epoch": 0.6649836479430834,
      "grad_norm": 0.275390625,
      "learning_rate": 6.093282768090574e-05,
      "loss": 0.8891,
      "step": 11590
    },
    {
      "epoch": 0.6652705261346032,
      "grad_norm": 0.29296875,
      "learning_rate": 6.084066538687222e-05,
      "loss": 0.9175,
      "step": 11595
    },
    {
      "epoch": 0.6655574043261231,
      "grad_norm": 0.255859375,
      "learning_rate": 6.074854236190723e-05,
      "loss": 0.9622,
      "step": 11600
    },
    {
      "epoch": 0.665844282517643,
      "grad_norm": 0.279296875,
      "learning_rate": 6.065645869839196e-05,
      "loss": 0.9369,
      "step": 11605
    },
    {
      "epoch": 0.6661311607091629,
      "grad_norm": 0.27734375,
      "learning_rate": 6.0564414488668165e-05,
      "loss": 0.9495,
      "step": 11610
    },
    {
      "epoch": 0.6664180389006827,
      "grad_norm": 0.275390625,
      "learning_rate": 6.0472409825037926e-05,
      "loss": 1.0065,
      "step": 11615
    },
    {
      "epoch": 0.6667049170922027,
      "grad_norm": 0.265625,
      "learning_rate": 6.038044479976375e-05,
      "loss": 0.9119,
      "step": 11620
    },
    {
      "epoch": 0.6669917952837225,
      "grad_norm": 0.275390625,
      "learning_rate": 6.0288519505068375e-05,
      "loss": 0.9656,
      "step": 11625
    },
    {
      "epoch": 0.6672786734752424,
      "grad_norm": 0.271484375,
      "learning_rate": 6.01966340331347e-05,
      "loss": 0.9607,
      "step": 11630
    },
    {
      "epoch": 0.6675655516667623,
      "grad_norm": 0.263671875,
      "learning_rate": 6.010478847610565e-05,
      "loss": 0.988,
      "step": 11635
    },
    {
      "epoch": 0.6678524298582822,
      "grad_norm": 0.25390625,
      "learning_rate": 6.0012982926084195e-05,
      "loss": 0.9958,
      "step": 11640
    },
    {
      "epoch": 0.668139308049802,
      "grad_norm": 0.275390625,
      "learning_rate": 5.992121747513315e-05,
      "loss": 0.9578,
      "step": 11645
    },
    {
      "epoch": 0.6684261862413219,
      "grad_norm": 0.26171875,
      "learning_rate": 5.982949221527506e-05,
      "loss": 0.9389,
      "step": 11650
    },
    {
      "epoch": 0.6687130644328418,
      "grad_norm": 0.275390625,
      "learning_rate": 5.973780723849225e-05,
      "loss": 0.9213,
      "step": 11655
    },
    {
      "epoch": 0.6689999426243617,
      "grad_norm": 0.26953125,
      "learning_rate": 5.9646162636726634e-05,
      "loss": 0.8939,
      "step": 11660
    },
    {
      "epoch": 0.6692868208158815,
      "grad_norm": 0.265625,
      "learning_rate": 5.955455850187962e-05,
      "loss": 0.912,
      "step": 11665
    },
    {
      "epoch": 0.6695736990074015,
      "grad_norm": 0.279296875,
      "learning_rate": 5.946299492581201e-05,
      "loss": 0.964,
      "step": 11670
    },
    {
      "epoch": 0.6698605771989213,
      "grad_norm": 0.259765625,
      "learning_rate": 5.9371472000344006e-05,
      "loss": 0.9815,
      "step": 11675
    },
    {
      "epoch": 0.6701474553904412,
      "grad_norm": 0.2578125,
      "learning_rate": 5.9279989817255e-05,
      "loss": 1.0095,
      "step": 11680
    },
    {
      "epoch": 0.6704343335819611,
      "grad_norm": 0.26171875,
      "learning_rate": 5.9188548468283475e-05,
      "loss": 0.8657,
      "step": 11685
    },
    {
      "epoch": 0.670721211773481,
      "grad_norm": 0.2734375,
      "learning_rate": 5.9097148045127095e-05,
      "loss": 0.941,
      "step": 11690
    },
    {
      "epoch": 0.6710080899650008,
      "grad_norm": 0.279296875,
      "learning_rate": 5.9005788639442394e-05,
      "loss": 0.931,
      "step": 11695
    },
    {
      "epoch": 0.6712949681565208,
      "grad_norm": 0.2734375,
      "learning_rate": 5.8914470342844694e-05,
      "loss": 0.897,
      "step": 11700
    },
    {
      "epoch": 0.6715818463480406,
      "grad_norm": 0.263671875,
      "learning_rate": 5.8823193246908346e-05,
      "loss": 0.9791,
      "step": 11705
    },
    {
      "epoch": 0.6718687245395605,
      "grad_norm": 0.2734375,
      "learning_rate": 5.873195744316611e-05,
      "loss": 0.9706,
      "step": 11710
    },
    {
      "epoch": 0.6721556027310804,
      "grad_norm": 0.294921875,
      "learning_rate": 5.86407630231095e-05,
      "loss": 0.9738,
      "step": 11715
    },
    {
      "epoch": 0.6724424809226003,
      "grad_norm": 0.255859375,
      "learning_rate": 5.8549610078188446e-05,
      "loss": 0.9373,
      "step": 11720
    },
    {
      "epoch": 0.6727293591141201,
      "grad_norm": 0.279296875,
      "learning_rate": 5.845849869981137e-05,
      "loss": 0.9755,
      "step": 11725
    },
    {
      "epoch": 0.67301623730564,
      "grad_norm": 0.263671875,
      "learning_rate": 5.836742897934497e-05,
      "loss": 0.8923,
      "step": 11730
    },
    {
      "epoch": 0.6733031154971599,
      "grad_norm": 0.251953125,
      "learning_rate": 5.827640100811409e-05,
      "loss": 0.9374,
      "step": 11735
    },
    {
      "epoch": 0.6735899936886798,
      "grad_norm": 0.26171875,
      "learning_rate": 5.8185414877401876e-05,
      "loss": 0.9589,
      "step": 11740
    },
    {
      "epoch": 0.6738768718801996,
      "grad_norm": 0.251953125,
      "learning_rate": 5.80944706784494e-05,
      "loss": 0.9999,
      "step": 11745
    },
    {
      "epoch": 0.6741637500717196,
      "grad_norm": 0.279296875,
      "learning_rate": 5.8003568502455676e-05,
      "loss": 0.9712,
      "step": 11750
    },
    {
      "epoch": 0.6744506282632394,
      "grad_norm": 0.265625,
      "learning_rate": 5.7912708440577635e-05,
      "loss": 0.9159,
      "step": 11755
    },
    {
      "epoch": 0.6747375064547593,
      "grad_norm": 0.283203125,
      "learning_rate": 5.782189058392995e-05,
      "loss": 1.0858,
      "step": 11760
    },
    {
      "epoch": 0.6750243846462792,
      "grad_norm": 0.27734375,
      "learning_rate": 5.773111502358492e-05,
      "loss": 0.9238,
      "step": 11765
    },
    {
      "epoch": 0.6753112628377991,
      "grad_norm": 0.259765625,
      "learning_rate": 5.764038185057259e-05,
      "loss": 0.8468,
      "step": 11770
    },
    {
      "epoch": 0.6755981410293189,
      "grad_norm": 0.25390625,
      "learning_rate": 5.754969115588034e-05,
      "loss": 0.9542,
      "step": 11775
    },
    {
      "epoch": 0.6758850192208389,
      "grad_norm": 0.265625,
      "learning_rate": 5.7459043030452966e-05,
      "loss": 0.9437,
      "step": 11780
    },
    {
      "epoch": 0.6761718974123587,
      "grad_norm": 0.2578125,
      "learning_rate": 5.736843756519259e-05,
      "loss": 1.0084,
      "step": 11785
    },
    {
      "epoch": 0.6764587756038786,
      "grad_norm": 0.275390625,
      "learning_rate": 5.727787485095866e-05,
      "loss": 1.0258,
      "step": 11790
    },
    {
      "epoch": 0.6767456537953985,
      "grad_norm": 0.2578125,
      "learning_rate": 5.718735497856762e-05,
      "loss": 0.9685,
      "step": 11795
    },
    {
      "epoch": 0.6770325319869184,
      "grad_norm": 0.27734375,
      "learning_rate": 5.709687803879301e-05,
      "loss": 0.9179,
      "step": 11800
    },
    {
      "epoch": 0.6773194101784382,
      "grad_norm": 0.2734375,
      "learning_rate": 5.700644412236531e-05,
      "loss": 0.871,
      "step": 11805
    },
    {
      "epoch": 0.6776062883699581,
      "grad_norm": 0.2734375,
      "learning_rate": 5.691605331997185e-05,
      "loss": 1.0171,
      "step": 11810
    },
    {
      "epoch": 0.677893166561478,
      "grad_norm": 0.263671875,
      "learning_rate": 5.682570572225671e-05,
      "loss": 0.9522,
      "step": 11815
    },
    {
      "epoch": 0.6781800447529979,
      "grad_norm": 0.283203125,
      "learning_rate": 5.67354014198207e-05,
      "loss": 0.9242,
      "step": 11820
    },
    {
      "epoch": 0.6784669229445177,
      "grad_norm": 0.265625,
      "learning_rate": 5.664514050322122e-05,
      "loss": 0.9277,
      "step": 11825
    },
    {
      "epoch": 0.6787538011360377,
      "grad_norm": 0.24609375,
      "learning_rate": 5.6554923062971966e-05,
      "loss": 1.019,
      "step": 11830
    },
    {
      "epoch": 0.6790406793275575,
      "grad_norm": 0.26171875,
      "learning_rate": 5.646474918954334e-05,
      "loss": 0.9273,
      "step": 11835
    },
    {
      "epoch": 0.6793275575190774,
      "grad_norm": 0.265625,
      "learning_rate": 5.637461897336185e-05,
      "loss": 0.9329,
      "step": 11840
    },
    {
      "epoch": 0.6796144357105973,
      "grad_norm": 0.267578125,
      "learning_rate": 5.628453250481026e-05,
      "loss": 0.9303,
      "step": 11845
    },
    {
      "epoch": 0.6799013139021172,
      "grad_norm": 0.26953125,
      "learning_rate": 5.6194489874227504e-05,
      "loss": 0.8853,
      "step": 11850
    },
    {
      "epoch": 0.680188192093637,
      "grad_norm": 0.26171875,
      "learning_rate": 5.610449117190855e-05,
      "loss": 0.9082,
      "step": 11855
    },
    {
      "epoch": 0.680475070285157,
      "grad_norm": 0.279296875,
      "learning_rate": 5.601453648810426e-05,
      "loss": 0.8877,
      "step": 11860
    },
    {
      "epoch": 0.6807619484766768,
      "grad_norm": 0.2578125,
      "learning_rate": 5.5924625913021386e-05,
      "loss": 0.9318,
      "step": 11865
    },
    {
      "epoch": 0.6810488266681967,
      "grad_norm": 0.259765625,
      "learning_rate": 5.583475953682251e-05,
      "loss": 0.9631,
      "step": 11870
    },
    {
      "epoch": 0.6813357048597166,
      "grad_norm": 0.26953125,
      "learning_rate": 5.5744937449625854e-05,
      "loss": 0.9611,
      "step": 11875
    },
    {
      "epoch": 0.6816225830512365,
      "grad_norm": 0.28515625,
      "learning_rate": 5.565515974150508e-05,
      "loss": 0.9942,
      "step": 11880
    },
    {
      "epoch": 0.6819094612427563,
      "grad_norm": 0.28125,
      "learning_rate": 5.556542650248959e-05,
      "loss": 0.9043,
      "step": 11885
    },
    {
      "epoch": 0.6821963394342762,
      "grad_norm": 0.24609375,
      "learning_rate": 5.547573782256403e-05,
      "loss": 0.9597,
      "step": 11890
    },
    {
      "epoch": 0.6824832176257961,
      "grad_norm": 0.26953125,
      "learning_rate": 5.538609379166845e-05,
      "loss": 0.9716,
      "step": 11895
    },
    {
      "epoch": 0.682770095817316,
      "grad_norm": 0.25390625,
      "learning_rate": 5.529649449969804e-05,
      "loss": 0.907,
      "step": 11900
    },
    {
      "epoch": 0.6830569740088358,
      "grad_norm": 0.259765625,
      "learning_rate": 5.5206940036503194e-05,
      "loss": 0.9749,
      "step": 11905
    },
    {
      "epoch": 0.6833438522003558,
      "grad_norm": 0.279296875,
      "learning_rate": 5.511743049188931e-05,
      "loss": 0.9953,
      "step": 11910
    },
    {
      "epoch": 0.6836307303918756,
      "grad_norm": 0.298828125,
      "learning_rate": 5.5027965955616743e-05,
      "loss": 0.9719,
      "step": 11915
    },
    {
      "epoch": 0.6839176085833955,
      "grad_norm": 0.2431640625,
      "learning_rate": 5.49385465174008e-05,
      "loss": 0.9471,
      "step": 11920
    },
    {
      "epoch": 0.6842044867749154,
      "grad_norm": 0.25390625,
      "learning_rate": 5.48491722669115e-05,
      "loss": 1.0521,
      "step": 11925
    },
    {
      "epoch": 0.6844913649664353,
      "grad_norm": 0.267578125,
      "learning_rate": 5.47598432937734e-05,
      "loss": 0.9806,
      "step": 11930
    },
    {
      "epoch": 0.6847782431579551,
      "grad_norm": 0.2578125,
      "learning_rate": 5.467055968756595e-05,
      "loss": 0.9911,
      "step": 11935
    },
    {
      "epoch": 0.6850651213494751,
      "grad_norm": 0.251953125,
      "learning_rate": 5.4581321537822875e-05,
      "loss": 0.8815,
      "step": 11940
    },
    {
      "epoch": 0.6853519995409949,
      "grad_norm": 0.259765625,
      "learning_rate": 5.4492128934032416e-05,
      "loss": 1.0047,
      "step": 11945
    },
    {
      "epoch": 0.6856388777325148,
      "grad_norm": 0.296875,
      "learning_rate": 5.440298196563711e-05,
      "loss": 0.9995,
      "step": 11950
    },
    {
      "epoch": 0.6859257559240347,
      "grad_norm": 0.259765625,
      "learning_rate": 5.431388072203373e-05,
      "loss": 0.954,
      "step": 11955
    },
    {
      "epoch": 0.6862126341155546,
      "grad_norm": 0.283203125,
      "learning_rate": 5.4224825292573154e-05,
      "loss": 0.9492,
      "step": 11960
    },
    {
      "epoch": 0.6864995123070744,
      "grad_norm": 0.267578125,
      "learning_rate": 5.4135815766560486e-05,
      "loss": 1.0314,
      "step": 11965
    },
    {
      "epoch": 0.6867863904985942,
      "grad_norm": 0.2578125,
      "learning_rate": 5.40468522332546e-05,
      "loss": 0.9593,
      "step": 11970
    },
    {
      "epoch": 0.6870732686901142,
      "grad_norm": 0.25390625,
      "learning_rate": 5.395793478186838e-05,
      "loss": 0.9984,
      "step": 11975
    },
    {
      "epoch": 0.687360146881634,
      "grad_norm": 0.275390625,
      "learning_rate": 5.386906350156833e-05,
      "loss": 0.9697,
      "step": 11980
    },
    {
      "epoch": 0.6876470250731539,
      "grad_norm": 0.287109375,
      "learning_rate": 5.378023848147487e-05,
      "loss": 0.9866,
      "step": 11985
    },
    {
      "epoch": 0.6879339032646739,
      "grad_norm": 0.267578125,
      "learning_rate": 5.36914598106619e-05,
      "loss": 1.0016,
      "step": 11990
    },
    {
      "epoch": 0.6882207814561937,
      "grad_norm": 0.267578125,
      "learning_rate": 5.3602727578156895e-05,
      "loss": 0.9289,
      "step": 11995
    },
    {
      "epoch": 0.6885076596477135,
      "grad_norm": 0.287109375,
      "learning_rate": 5.35140418729407e-05,
      "loss": 0.9322,
      "step": 12000
    },
    {
      "epoch": 0.6887945378392335,
      "grad_norm": 0.265625,
      "learning_rate": 5.3425402783947564e-05,
      "loss": 0.9132,
      "step": 12005
    },
    {
      "epoch": 0.6890814160307533,
      "grad_norm": 0.27734375,
      "learning_rate": 5.3336810400064904e-05,
      "loss": 1.0212,
      "step": 12010
    },
    {
      "epoch": 0.6893682942222732,
      "grad_norm": 0.27734375,
      "learning_rate": 5.324826481013345e-05,
      "loss": 0.9593,
      "step": 12015
    },
    {
      "epoch": 0.6896551724137931,
      "grad_norm": 0.271484375,
      "learning_rate": 5.315976610294689e-05,
      "loss": 0.9577,
      "step": 12020
    },
    {
      "epoch": 0.689942050605313,
      "grad_norm": 0.26171875,
      "learning_rate": 5.307131436725191e-05,
      "loss": 0.9461,
      "step": 12025
    },
    {
      "epoch": 0.6902289287968328,
      "grad_norm": 0.275390625,
      "learning_rate": 5.298290969174812e-05,
      "loss": 0.9663,
      "step": 12030
    },
    {
      "epoch": 0.6905158069883528,
      "grad_norm": 0.2412109375,
      "learning_rate": 5.2894552165087916e-05,
      "loss": 1.0194,
      "step": 12035
    },
    {
      "epoch": 0.6908026851798726,
      "grad_norm": 0.2890625,
      "learning_rate": 5.2806241875876426e-05,
      "loss": 1.0011,
      "step": 12040
    },
    {
      "epoch": 0.6910895633713925,
      "grad_norm": 0.244140625,
      "learning_rate": 5.271797891267142e-05,
      "loss": 0.8782,
      "step": 12045
    },
    {
      "epoch": 0.6913764415629123,
      "grad_norm": 0.275390625,
      "learning_rate": 5.262976336398318e-05,
      "loss": 0.9568,
      "step": 12050
    },
    {
      "epoch": 0.6916633197544323,
      "grad_norm": 0.279296875,
      "learning_rate": 5.254159531827445e-05,
      "loss": 1.0285,
      "step": 12055
    },
    {
      "epoch": 0.6919501979459521,
      "grad_norm": 0.2734375,
      "learning_rate": 5.245347486396033e-05,
      "loss": 0.9552,
      "step": 12060
    },
    {
      "epoch": 0.692237076137472,
      "grad_norm": 0.25390625,
      "learning_rate": 5.236540208940827e-05,
      "loss": 0.917,
      "step": 12065
    },
    {
      "epoch": 0.6925239543289919,
      "grad_norm": 0.28125,
      "learning_rate": 5.2277377082937806e-05,
      "loss": 0.9708,
      "step": 12070
    },
    {
      "epoch": 0.6928108325205118,
      "grad_norm": 0.271484375,
      "learning_rate": 5.2189399932820616e-05,
      "loss": 0.9109,
      "step": 12075
    },
    {
      "epoch": 0.6930977107120316,
      "grad_norm": 0.279296875,
      "learning_rate": 5.210147072728038e-05,
      "loss": 0.934,
      "step": 12080
    },
    {
      "epoch": 0.6933845889035516,
      "grad_norm": 0.2734375,
      "learning_rate": 5.2013589554492714e-05,
      "loss": 0.9386,
      "step": 12085
    },
    {
      "epoch": 0.6936714670950714,
      "grad_norm": 0.2734375,
      "learning_rate": 5.192575650258503e-05,
      "loss": 0.9219,
      "step": 12090
    },
    {
      "epoch": 0.6939583452865913,
      "grad_norm": 0.2578125,
      "learning_rate": 5.1837971659636545e-05,
      "loss": 0.8816,
      "step": 12095
    },
    {
      "epoch": 0.6942452234781112,
      "grad_norm": 0.279296875,
      "learning_rate": 5.175023511367807e-05,
      "loss": 0.9099,
      "step": 12100
    },
    {
      "epoch": 0.6945321016696311,
      "grad_norm": 0.302734375,
      "learning_rate": 5.1662546952692015e-05,
      "loss": 0.9268,
      "step": 12105
    },
    {
      "epoch": 0.6948189798611509,
      "grad_norm": 0.265625,
      "learning_rate": 5.1574907264612224e-05,
      "loss": 0.8738,
      "step": 12110
    },
    {
      "epoch": 0.6951058580526709,
      "grad_norm": 0.26953125,
      "learning_rate": 5.148731613732407e-05,
      "loss": 0.8922,
      "step": 12115
    },
    {
      "epoch": 0.6953927362441907,
      "grad_norm": 0.291015625,
      "learning_rate": 5.139977365866406e-05,
      "loss": 0.8938,
      "step": 12120
    },
    {
      "epoch": 0.6956796144357106,
      "grad_norm": 0.2578125,
      "learning_rate": 5.131227991642001e-05,
      "loss": 0.9463,
      "step": 12125
    },
    {
      "epoch": 0.6959664926272304,
      "grad_norm": 0.2734375,
      "learning_rate": 5.122483499833084e-05,
      "loss": 0.9507,
      "step": 12130
    },
    {
      "epoch": 0.6962533708187504,
      "grad_norm": 0.3359375,
      "learning_rate": 5.1137438992086506e-05,
      "loss": 0.9741,
      "step": 12135
    },
    {
      "epoch": 0.6965402490102702,
      "grad_norm": 0.263671875,
      "learning_rate": 5.1050091985327884e-05,
      "loss": 0.9182,
      "step": 12140
    },
    {
      "epoch": 0.6968271272017901,
      "grad_norm": 0.28125,
      "learning_rate": 5.096279406564686e-05,
      "loss": 0.9731,
      "step": 12145
    },
    {
      "epoch": 0.69711400539331,
      "grad_norm": 0.265625,
      "learning_rate": 5.087554532058586e-05,
      "loss": 0.9596,
      "step": 12150
    },
    {
      "epoch": 0.6974008835848299,
      "grad_norm": 0.259765625,
      "learning_rate": 5.078834583763817e-05,
      "loss": 0.867,
      "step": 12155
    },
    {
      "epoch": 0.6976877617763497,
      "grad_norm": 0.271484375,
      "learning_rate": 5.0701195704247595e-05,
      "loss": 0.9869,
      "step": 12160
    },
    {
      "epoch": 0.6979746399678697,
      "grad_norm": 0.275390625,
      "learning_rate": 5.061409500780854e-05,
      "loss": 0.9413,
      "step": 12165
    },
    {
      "epoch": 0.6982615181593895,
      "grad_norm": 0.255859375,
      "learning_rate": 5.052704383566577e-05,
      "loss": 0.9569,
      "step": 12170
    },
    {
      "epoch": 0.6985483963509094,
      "grad_norm": 0.25,
      "learning_rate": 5.044004227511436e-05,
      "loss": 0.949,
      "step": 12175
    },
    {
      "epoch": 0.6988352745424293,
      "grad_norm": 0.271484375,
      "learning_rate": 5.0353090413399705e-05,
      "loss": 0.9525,
      "step": 12180
    },
    {
      "epoch": 0.6991221527339492,
      "grad_norm": 0.26171875,
      "learning_rate": 5.02661883377173e-05,
      "loss": 0.942,
      "step": 12185
    },
    {
      "epoch": 0.699409030925469,
      "grad_norm": 0.2734375,
      "learning_rate": 5.017933613521273e-05,
      "loss": 0.9621,
      "step": 12190
    },
    {
      "epoch": 0.699695909116989,
      "grad_norm": 0.267578125,
      "learning_rate": 5.009253389298165e-05,
      "loss": 0.976,
      "step": 12195
    },
    {
      "epoch": 0.6999827873085088,
      "grad_norm": 0.271484375,
      "learning_rate": 5.0005781698069474e-05,
      "loss": 0.9563,
      "step": 12200
    },
    {
      "epoch": 0.7002696655000287,
      "grad_norm": 0.255859375,
      "learning_rate": 4.991907963747148e-05,
      "loss": 0.9092,
      "step": 12205
    },
    {
      "epoch": 0.7005565436915485,
      "grad_norm": 0.259765625,
      "learning_rate": 4.983242779813276e-05,
      "loss": 0.8882,
      "step": 12210
    },
    {
      "epoch": 0.7008434218830685,
      "grad_norm": 0.267578125,
      "learning_rate": 4.9745826266947934e-05,
      "loss": 0.9087,
      "step": 12215
    },
    {
      "epoch": 0.7011303000745883,
      "grad_norm": 0.255859375,
      "learning_rate": 4.965927513076123e-05,
      "loss": 0.9406,
      "step": 12220
    },
    {
      "epoch": 0.7014171782661082,
      "grad_norm": 0.271484375,
      "learning_rate": 4.957277447636629e-05,
      "loss": 0.9818,
      "step": 12225
    },
    {
      "epoch": 0.7017040564576281,
      "grad_norm": 0.2451171875,
      "learning_rate": 4.94863243905062e-05,
      "loss": 0.9895,
      "step": 12230
    },
    {
      "epoch": 0.701990934649148,
      "grad_norm": 0.23828125,
      "learning_rate": 4.939992495987327e-05,
      "loss": 0.9026,
      "step": 12235
    },
    {
      "epoch": 0.7022778128406678,
      "grad_norm": 0.279296875,
      "learning_rate": 4.931357627110902e-05,
      "loss": 0.9303,
      "step": 12240
    },
    {
      "epoch": 0.7025646910321878,
      "grad_norm": 0.408203125,
      "learning_rate": 4.9227278410804225e-05,
      "loss": 0.9506,
      "step": 12245
    },
    {
      "epoch": 0.7028515692237076,
      "grad_norm": 0.275390625,
      "learning_rate": 4.914103146549844e-05,
      "loss": 0.9547,
      "step": 12250
    },
    {
      "epoch": 0.7031384474152275,
      "grad_norm": 0.279296875,
      "learning_rate": 4.905483552168032e-05,
      "loss": 0.925,
      "step": 12255
    },
    {
      "epoch": 0.7034253256067474,
      "grad_norm": 0.25390625,
      "learning_rate": 4.896869066578741e-05,
      "loss": 0.971,
      "step": 12260
    },
    {
      "epoch": 0.7037122037982673,
      "grad_norm": 0.294921875,
      "learning_rate": 4.888259698420594e-05,
      "loss": 1.0059,
      "step": 12265
    },
    {
      "epoch": 0.7039990819897871,
      "grad_norm": 0.265625,
      "learning_rate": 4.879655456327083e-05,
      "loss": 0.9339,
      "step": 12270
    },
    {
      "epoch": 0.7042859601813071,
      "grad_norm": 0.279296875,
      "learning_rate": 4.8710563489265624e-05,
      "loss": 0.9611,
      "step": 12275
    },
    {
      "epoch": 0.7045728383728269,
      "grad_norm": 0.251953125,
      "learning_rate": 4.862462384842237e-05,
      "loss": 1.0341,
      "step": 12280
    },
    {
      "epoch": 0.7048597165643468,
      "grad_norm": 0.287109375,
      "learning_rate": 4.853873572692151e-05,
      "loss": 1.0407,
      "step": 12285
    },
    {
      "epoch": 0.7051465947558666,
      "grad_norm": 0.251953125,
      "learning_rate": 4.845289921089182e-05,
      "loss": 0.9762,
      "step": 12290
    },
    {
      "epoch": 0.7054334729473866,
      "grad_norm": 0.291015625,
      "learning_rate": 4.8367114386410486e-05,
      "loss": 1.0233,
      "step": 12295
    },
    {
      "epoch": 0.7057203511389064,
      "grad_norm": 0.279296875,
      "learning_rate": 4.8281381339502565e-05,
      "loss": 0.9128,
      "step": 12300
    },
    {
      "epoch": 0.7060072293304263,
      "grad_norm": 0.279296875,
      "learning_rate": 4.8195700156141386e-05,
      "loss": 1.0109,
      "step": 12305
    },
    {
      "epoch": 0.7062941075219462,
      "grad_norm": 0.263671875,
      "learning_rate": 4.8110070922248284e-05,
      "loss": 1.0136,
      "step": 12310
    },
    {
      "epoch": 0.7065809857134661,
      "grad_norm": 0.275390625,
      "learning_rate": 4.802449372369242e-05,
      "loss": 0.925,
      "step": 12315
    },
    {
      "epoch": 0.7068678639049859,
      "grad_norm": 0.255859375,
      "learning_rate": 4.79389686462908e-05,
      "loss": 0.9956,
      "step": 12320
    },
    {
      "epoch": 0.7071547420965059,
      "grad_norm": 0.27734375,
      "learning_rate": 4.785349577580817e-05,
      "loss": 0.9552,
      "step": 12325
    },
    {
      "epoch": 0.7074416202880257,
      "grad_norm": 0.267578125,
      "learning_rate": 4.77680751979569e-05,
      "loss": 0.9871,
      "step": 12330
    },
    {
      "epoch": 0.7077284984795456,
      "grad_norm": 0.271484375,
      "learning_rate": 4.768270699839691e-05,
      "loss": 0.9543,
      "step": 12335
    },
    {
      "epoch": 0.7080153766710655,
      "grad_norm": 0.267578125,
      "learning_rate": 4.759739126273569e-05,
      "loss": 0.9342,
      "step": 12340
    },
    {
      "epoch": 0.7083022548625854,
      "grad_norm": 0.26171875,
      "learning_rate": 4.751212807652806e-05,
      "loss": 0.9207,
      "step": 12345
    },
    {
      "epoch": 0.7085891330541052,
      "grad_norm": 0.263671875,
      "learning_rate": 4.742691752527606e-05,
      "loss": 0.9694,
      "step": 12350
    },
    {
      "epoch": 0.7088760112456252,
      "grad_norm": 0.2490234375,
      "learning_rate": 4.7341759694429014e-05,
      "loss": 0.8722,
      "step": 12355
    },
    {
      "epoch": 0.709162889437145,
      "grad_norm": 0.275390625,
      "learning_rate": 4.725665466938346e-05,
      "loss": 0.9764,
      "step": 12360
    },
    {
      "epoch": 0.7094497676286649,
      "grad_norm": 0.26953125,
      "learning_rate": 4.717160253548287e-05,
      "loss": 0.8684,
      "step": 12365
    },
    {
      "epoch": 0.7097366458201847,
      "grad_norm": 0.2578125,
      "learning_rate": 4.708660337801773e-05,
      "loss": 0.921,
      "step": 12370
    },
    {
      "epoch": 0.7100235240117047,
      "grad_norm": 0.318359375,
      "learning_rate": 4.700165728222538e-05,
      "loss": 0.9742,
      "step": 12375
    },
    {
      "epoch": 0.7103104022032245,
      "grad_norm": 0.271484375,
      "learning_rate": 4.6916764333289934e-05,
      "loss": 1.0064,
      "step": 12380
    },
    {
      "epoch": 0.7105972803947443,
      "grad_norm": 0.283203125,
      "learning_rate": 4.6831924616342217e-05,
      "loss": 0.9463,
      "step": 12385
    },
    {
      "epoch": 0.7108841585862643,
      "grad_norm": 0.30078125,
      "learning_rate": 4.674713821645975e-05,
      "loss": 0.8921,
      "step": 12390
    },
    {
      "epoch": 0.7111710367777841,
      "grad_norm": 0.271484375,
      "learning_rate": 4.6662405218666525e-05,
      "loss": 0.9079,
      "step": 12395
    },
    {
      "epoch": 0.711457914969304,
      "grad_norm": 0.259765625,
      "learning_rate": 4.657772570793289e-05,
      "loss": 0.9219,
      "step": 12400
    },
    {
      "epoch": 0.711744793160824,
      "grad_norm": 0.283203125,
      "learning_rate": 4.649309976917574e-05,
      "loss": 0.9785,
      "step": 12405
    },
    {
      "epoch": 0.7120316713523438,
      "grad_norm": 0.2470703125,
      "learning_rate": 4.6408527487258124e-05,
      "loss": 0.9714,
      "step": 12410
    },
    {
      "epoch": 0.7123185495438636,
      "grad_norm": 0.2421875,
      "learning_rate": 4.6324008946989314e-05,
      "loss": 0.9786,
      "step": 12415
    },
    {
      "epoch": 0.7126054277353836,
      "grad_norm": 0.255859375,
      "learning_rate": 4.62395442331247e-05,
      "loss": 0.9278,
      "step": 12420
    },
    {
      "epoch": 0.7128923059269034,
      "grad_norm": 0.259765625,
      "learning_rate": 4.615513343036567e-05,
      "loss": 0.9442,
      "step": 12425
    },
    {
      "epoch": 0.7131791841184233,
      "grad_norm": 0.279296875,
      "learning_rate": 4.607077662335959e-05,
      "loss": 0.9334,
      "step": 12430
    },
    {
      "epoch": 0.7134660623099432,
      "grad_norm": 0.263671875,
      "learning_rate": 4.59864738966996e-05,
      "loss": 0.9619,
      "step": 12435
    },
    {
      "epoch": 0.7137529405014631,
      "grad_norm": 0.25390625,
      "learning_rate": 4.590222533492473e-05,
      "loss": 1.0088,
      "step": 12440
    },
    {
      "epoch": 0.7140398186929829,
      "grad_norm": 0.28515625,
      "learning_rate": 4.581803102251966e-05,
      "loss": 1.0121,
      "step": 12445
    },
    {
      "epoch": 0.7143266968845028,
      "grad_norm": 0.26953125,
      "learning_rate": 4.573389104391449e-05,
      "loss": 0.9071,
      "step": 12450
    },
    {
      "epoch": 0.7146135750760227,
      "grad_norm": 0.251953125,
      "learning_rate": 4.564980548348511e-05,
      "loss": 0.9467,
      "step": 12455
    },
    {
      "epoch": 0.7149004532675426,
      "grad_norm": 0.263671875,
      "learning_rate": 4.556577442555265e-05,
      "loss": 0.9162,
      "step": 12460
    },
    {
      "epoch": 0.7151873314590624,
      "grad_norm": 0.2734375,
      "learning_rate": 4.5481797954383674e-05,
      "loss": 0.9336,
      "step": 12465
    },
    {
      "epoch": 0.7154742096505824,
      "grad_norm": 0.275390625,
      "learning_rate": 4.5397876154189956e-05,
      "loss": 0.973,
      "step": 12470
    },
    {
      "epoch": 0.7157610878421022,
      "grad_norm": 0.365234375,
      "learning_rate": 4.5314009109128464e-05,
      "loss": 0.9593,
      "step": 12475
    },
    {
      "epoch": 0.7160479660336221,
      "grad_norm": 0.232421875,
      "learning_rate": 4.5230196903301266e-05,
      "loss": 0.885,
      "step": 12480
    },
    {
      "epoch": 0.716334844225142,
      "grad_norm": 0.25390625,
      "learning_rate": 4.51464396207554e-05,
      "loss": 0.8933,
      "step": 12485
    },
    {
      "epoch": 0.7166217224166619,
      "grad_norm": 0.26953125,
      "learning_rate": 4.506273734548292e-05,
      "loss": 0.9838,
      "step": 12490
    },
    {
      "epoch": 0.7169086006081817,
      "grad_norm": 0.251953125,
      "learning_rate": 4.4979090161420645e-05,
      "loss": 0.9826,
      "step": 12495
    },
    {
      "epoch": 0.7171954787997017,
      "grad_norm": 0.251953125,
      "learning_rate": 4.489549815245008e-05,
      "loss": 0.9192,
      "step": 12500
    },
    {
      "epoch": 0.7174823569912215,
      "grad_norm": 0.263671875,
      "learning_rate": 4.4811961402397554e-05,
      "loss": 0.919,
      "step": 12505
    },
    {
      "epoch": 0.7177692351827414,
      "grad_norm": 0.265625,
      "learning_rate": 4.472847999503389e-05,
      "loss": 0.9777,
      "step": 12510
    },
    {
      "epoch": 0.7180561133742613,
      "grad_norm": 0.255859375,
      "learning_rate": 4.4645054014074426e-05,
      "loss": 0.931,
      "step": 12515
    },
    {
      "epoch": 0.7183429915657812,
      "grad_norm": 0.3046875,
      "learning_rate": 4.456168354317892e-05,
      "loss": 0.9206,
      "step": 12520
    },
    {
      "epoch": 0.718629869757301,
      "grad_norm": 0.279296875,
      "learning_rate": 4.4478368665951476e-05,
      "loss": 0.9084,
      "step": 12525
    },
    {
      "epoch": 0.7189167479488209,
      "grad_norm": 0.248046875,
      "learning_rate": 4.43951094659404e-05,
      "loss": 0.9422,
      "step": 12530
    },
    {
      "epoch": 0.7192036261403408,
      "grad_norm": 0.28515625,
      "learning_rate": 4.431190602663827e-05,
      "loss": 0.9395,
      "step": 12535
    },
    {
      "epoch": 0.7194905043318607,
      "grad_norm": 0.2734375,
      "learning_rate": 4.422875843148165e-05,
      "loss": 0.9637,
      "step": 12540
    },
    {
      "epoch": 0.7197773825233805,
      "grad_norm": 0.259765625,
      "learning_rate": 4.414566676385118e-05,
      "loss": 0.9839,
      "step": 12545
    },
    {
      "epoch": 0.7200642607149005,
      "grad_norm": 0.271484375,
      "learning_rate": 4.406263110707125e-05,
      "loss": 0.9328,
      "step": 12550
    },
    {
      "epoch": 0.7203511389064203,
      "grad_norm": 0.271484375,
      "learning_rate": 4.39796515444103e-05,
      "loss": 0.999,
      "step": 12555
    },
    {
      "epoch": 0.7206380170979402,
      "grad_norm": 0.26953125,
      "learning_rate": 4.3896728159080424e-05,
      "loss": 0.9816,
      "step": 12560
    },
    {
      "epoch": 0.7209248952894601,
      "grad_norm": 0.251953125,
      "learning_rate": 4.381386103423735e-05,
      "loss": 1.0244,
      "step": 12565
    },
    {
      "epoch": 0.72121177348098,
      "grad_norm": 0.29296875,
      "learning_rate": 4.373105025298041e-05,
      "loss": 0.9107,
      "step": 12570
    },
    {
      "epoch": 0.7214986516724998,
      "grad_norm": 0.27734375,
      "learning_rate": 4.364829589835245e-05,
      "loss": 0.9351,
      "step": 12575
    },
    {
      "epoch": 0.7217855298640198,
      "grad_norm": 0.26171875,
      "learning_rate": 4.356559805333971e-05,
      "loss": 0.9963,
      "step": 12580
    },
    {
      "epoch": 0.7220724080555396,
      "grad_norm": 0.259765625,
      "learning_rate": 4.348295680087181e-05,
      "loss": 0.9584,
      "step": 12585
    },
    {
      "epoch": 0.7223592862470595,
      "grad_norm": 0.30078125,
      "learning_rate": 4.340037222382156e-05,
      "loss": 0.9093,
      "step": 12590
    },
    {
      "epoch": 0.7226461644385794,
      "grad_norm": 0.279296875,
      "learning_rate": 4.3317844405005e-05,
      "loss": 0.9699,
      "step": 12595
    },
    {
      "epoch": 0.7229330426300993,
      "grad_norm": 0.26953125,
      "learning_rate": 4.323537342718111e-05,
      "loss": 0.9292,
      "step": 12600
    },
    {
      "epoch": 0.7232199208216191,
      "grad_norm": 0.26953125,
      "learning_rate": 4.315295937305207e-05,
      "loss": 0.8625,
      "step": 12605
    },
    {
      "epoch": 0.723506799013139,
      "grad_norm": 0.26953125,
      "learning_rate": 4.307060232526283e-05,
      "loss": 0.9948,
      "step": 12610
    },
    {
      "epoch": 0.7237936772046589,
      "grad_norm": 0.283203125,
      "learning_rate": 4.2988302366401254e-05,
      "loss": 0.9497,
      "step": 12615
    },
    {
      "epoch": 0.7240805553961788,
      "grad_norm": 0.2734375,
      "learning_rate": 4.2906059578997896e-05,
      "loss": 0.9112,
      "step": 12620
    },
    {
      "epoch": 0.7243674335876986,
      "grad_norm": 0.267578125,
      "learning_rate": 4.2823874045526026e-05,
      "loss": 0.9787,
      "step": 12625
    },
    {
      "epoch": 0.7246543117792186,
      "grad_norm": 0.2470703125,
      "learning_rate": 4.274174584840143e-05,
      "loss": 0.9606,
      "step": 12630
    },
    {
      "epoch": 0.7249411899707384,
      "grad_norm": 0.26171875,
      "learning_rate": 4.265967506998253e-05,
      "loss": 0.9875,
      "step": 12635
    },
    {
      "epoch": 0.7252280681622583,
      "grad_norm": 0.2421875,
      "learning_rate": 4.257766179257005e-05,
      "loss": 0.9702,
      "step": 12640
    },
    {
      "epoch": 0.7255149463537782,
      "grad_norm": 0.259765625,
      "learning_rate": 4.2495706098407085e-05,
      "loss": 0.9266,
      "step": 12645
    },
    {
      "epoch": 0.7258018245452981,
      "grad_norm": 0.271484375,
      "learning_rate": 4.2413808069678996e-05,
      "loss": 0.9093,
      "step": 12650
    },
    {
      "epoch": 0.7260887027368179,
      "grad_norm": 0.296875,
      "learning_rate": 4.2331967788513295e-05,
      "loss": 0.9782,
      "step": 12655
    },
    {
      "epoch": 0.7263755809283379,
      "grad_norm": 0.2470703125,
      "learning_rate": 4.225018533697962e-05,
      "loss": 0.9237,
      "step": 12660
    },
    {
      "epoch": 0.7266624591198577,
      "grad_norm": 0.26953125,
      "learning_rate": 4.216846079708958e-05,
      "loss": 1.0039,
      "step": 12665
    },
    {
      "epoch": 0.7269493373113776,
      "grad_norm": 0.275390625,
      "learning_rate": 4.2086794250796734e-05,
      "loss": 0.957,
      "step": 12670
    },
    {
      "epoch": 0.7272362155028975,
      "grad_norm": 0.255859375,
      "learning_rate": 4.2005185779996484e-05,
      "loss": 0.9602,
      "step": 12675
    },
    {
      "epoch": 0.7275230936944174,
      "grad_norm": 0.24609375,
      "learning_rate": 4.1923635466525936e-05,
      "loss": 0.9461,
      "step": 12680
    },
    {
      "epoch": 0.7278099718859372,
      "grad_norm": 0.275390625,
      "learning_rate": 4.1842143392164004e-05,
      "loss": 0.969,
      "step": 12685
    },
    {
      "epoch": 0.7280968500774571,
      "grad_norm": 0.259765625,
      "learning_rate": 4.17607096386311e-05,
      "loss": 0.9375,
      "step": 12690
    },
    {
      "epoch": 0.728383728268977,
      "grad_norm": 0.26171875,
      "learning_rate": 4.167933428758916e-05,
      "loss": 0.9121,
      "step": 12695
    },
    {
      "epoch": 0.7286706064604969,
      "grad_norm": 0.2470703125,
      "learning_rate": 4.159801742064158e-05,
      "loss": 0.8924,
      "step": 12700
    },
    {
      "epoch": 0.7289574846520167,
      "grad_norm": 0.3046875,
      "learning_rate": 4.151675911933308e-05,
      "loss": 0.9526,
      "step": 12705
    },
    {
      "epoch": 0.7292443628435367,
      "grad_norm": 0.26171875,
      "learning_rate": 4.143555946514964e-05,
      "loss": 0.9478,
      "step": 12710
    },
    {
      "epoch": 0.7295312410350565,
      "grad_norm": 0.263671875,
      "learning_rate": 4.135441853951857e-05,
      "loss": 0.9295,
      "step": 12715
    },
    {
      "epoch": 0.7298181192265764,
      "grad_norm": 0.25390625,
      "learning_rate": 4.1273336423808065e-05,
      "loss": 0.8896,
      "step": 12720
    },
    {
      "epoch": 0.7301049974180963,
      "grad_norm": 0.2734375,
      "learning_rate": 4.119231319932747e-05,
      "loss": 0.9014,
      "step": 12725
    },
    {
      "epoch": 0.7303918756096162,
      "grad_norm": 0.2470703125,
      "learning_rate": 4.1111348947327034e-05,
      "loss": 0.9335,
      "step": 12730
    },
    {
      "epoch": 0.730678753801136,
      "grad_norm": 0.259765625,
      "learning_rate": 4.1030443748997974e-05,
      "loss": 0.9627,
      "step": 12735
    },
    {
      "epoch": 0.730965631992656,
      "grad_norm": 0.271484375,
      "learning_rate": 4.094959768547214e-05,
      "loss": 0.9359,
      "step": 12740
    },
    {
      "epoch": 0.7312525101841758,
      "grad_norm": 0.255859375,
      "learning_rate": 4.086881083782216e-05,
      "loss": 0.9178,
      "step": 12745
    },
    {
      "epoch": 0.7315393883756957,
      "grad_norm": 0.265625,
      "learning_rate": 4.078808328706127e-05,
      "loss": 0.9652,
      "step": 12750
    },
    {
      "epoch": 0.7318262665672156,
      "grad_norm": 0.251953125,
      "learning_rate": 4.070741511414323e-05,
      "loss": 0.8837,
      "step": 12755
    },
    {
      "epoch": 0.7321131447587355,
      "grad_norm": 0.26171875,
      "learning_rate": 4.062680639996225e-05,
      "loss": 0.9901,
      "step": 12760
    },
    {
      "epoch": 0.7324000229502553,
      "grad_norm": 0.2734375,
      "learning_rate": 4.054625722535301e-05,
      "loss": 0.9346,
      "step": 12765
    },
    {
      "epoch": 0.7326869011417751,
      "grad_norm": 0.25,
      "learning_rate": 4.0465767671090304e-05,
      "loss": 0.8749,
      "step": 12770
    },
    {
      "epoch": 0.7329737793332951,
      "grad_norm": 0.25,
      "learning_rate": 4.038533781788924e-05,
      "loss": 0.9063,
      "step": 12775
    },
    {
      "epoch": 0.733260657524815,
      "grad_norm": 0.26171875,
      "learning_rate": 4.030496774640514e-05,
      "loss": 0.9602,
      "step": 12780
    },
    {
      "epoch": 0.7335475357163348,
      "grad_norm": 0.26953125,
      "learning_rate": 4.022465753723323e-05,
      "loss": 0.9149,
      "step": 12785
    },
    {
      "epoch": 0.7338344139078548,
      "grad_norm": 0.27734375,
      "learning_rate": 4.014440727090879e-05,
      "loss": 0.9483,
      "step": 12790
    },
    {
      "epoch": 0.7341212920993746,
      "grad_norm": 0.28125,
      "learning_rate": 4.0064217027906945e-05,
      "loss": 0.9309,
      "step": 12795
    },
    {
      "epoch": 0.7344081702908944,
      "grad_norm": 0.265625,
      "learning_rate": 3.998408688864267e-05,
      "loss": 0.8794,
      "step": 12800
    },
    {
      "epoch": 0.7346950484824144,
      "grad_norm": 0.255859375,
      "learning_rate": 3.990401693347065e-05,
      "loss": 0.9702,
      "step": 12805
    },
    {
      "epoch": 0.7349819266739342,
      "grad_norm": 0.27734375,
      "learning_rate": 3.982400724268516e-05,
      "loss": 0.9087,
      "step": 12810
    },
    {
      "epoch": 0.7352688048654541,
      "grad_norm": 0.2470703125,
      "learning_rate": 3.974405789652022e-05,
      "loss": 0.9429,
      "step": 12815
    },
    {
      "epoch": 0.735555683056974,
      "grad_norm": 0.263671875,
      "learning_rate": 3.96641689751491e-05,
      "loss": 0.9047,
      "step": 12820
    },
    {
      "epoch": 0.7358425612484939,
      "grad_norm": 0.251953125,
      "learning_rate": 3.95843405586846e-05,
      "loss": 0.973,
      "step": 12825
    },
    {
      "epoch": 0.7361294394400137,
      "grad_norm": 0.294921875,
      "learning_rate": 3.950457272717889e-05,
      "loss": 1.0204,
      "step": 12830
    },
    {
      "epoch": 0.7364163176315337,
      "grad_norm": 0.265625,
      "learning_rate": 3.9424865560623305e-05,
      "loss": 1.0043,
      "step": 12835
    },
    {
      "epoch": 0.7367031958230535,
      "grad_norm": 0.259765625,
      "learning_rate": 3.9345219138948365e-05,
      "loss": 0.9604,
      "step": 12840
    },
    {
      "epoch": 0.7369900740145734,
      "grad_norm": 0.3125,
      "learning_rate": 3.9265633542023684e-05,
      "loss": 0.9756,
      "step": 12845
    },
    {
      "epoch": 0.7372769522060932,
      "grad_norm": 0.310546875,
      "learning_rate": 3.9186108849657885e-05,
      "loss": 1.0192,
      "step": 12850
    },
    {
      "epoch": 0.7375638303976132,
      "grad_norm": 0.26171875,
      "learning_rate": 3.91066451415985e-05,
      "loss": 0.9264,
      "step": 12855
    },
    {
      "epoch": 0.737850708589133,
      "grad_norm": 0.271484375,
      "learning_rate": 3.9027242497531865e-05,
      "loss": 1.0112,
      "step": 12860
    },
    {
      "epoch": 0.7381375867806529,
      "grad_norm": 0.279296875,
      "learning_rate": 3.8947900997083255e-05,
      "loss": 0.9489,
      "step": 12865
    },
    {
      "epoch": 0.7384244649721728,
      "grad_norm": 0.271484375,
      "learning_rate": 3.8868620719816395e-05,
      "loss": 0.9954,
      "step": 12870
    },
    {
      "epoch": 0.7387113431636927,
      "grad_norm": 0.2734375,
      "learning_rate": 3.878940174523371e-05,
      "loss": 0.9128,
      "step": 12875
    },
    {
      "epoch": 0.7389982213552125,
      "grad_norm": 0.26953125,
      "learning_rate": 3.8710244152776264e-05,
      "loss": 0.9386,
      "step": 12880
    },
    {
      "epoch": 0.7392850995467325,
      "grad_norm": 0.2734375,
      "learning_rate": 3.8631148021823406e-05,
      "loss": 0.927,
      "step": 12885
    },
    {
      "epoch": 0.7395719777382523,
      "grad_norm": 0.2431640625,
      "learning_rate": 3.8552113431692925e-05,
      "loss": 0.8867,
      "step": 12890
    },
    {
      "epoch": 0.7398588559297722,
      "grad_norm": 0.265625,
      "learning_rate": 3.847314046164089e-05,
      "loss": 0.9352,
      "step": 12895
    },
    {
      "epoch": 0.7401457341212921,
      "grad_norm": 0.2734375,
      "learning_rate": 3.8394229190861567e-05,
      "loss": 0.9445,
      "step": 12900
    },
    {
      "epoch": 0.740432612312812,
      "grad_norm": 0.28515625,
      "learning_rate": 3.831537969848731e-05,
      "loss": 0.9806,
      "step": 12905
    },
    {
      "epoch": 0.7407194905043318,
      "grad_norm": 0.296875,
      "learning_rate": 3.823659206358865e-05,
      "loss": 0.9799,
      "step": 12910
    },
    {
      "epoch": 0.7410063686958518,
      "grad_norm": 0.27734375,
      "learning_rate": 3.8157866365174e-05,
      "loss": 0.9522,
      "step": 12915
    },
    {
      "epoch": 0.7412932468873716,
      "grad_norm": 0.255859375,
      "learning_rate": 3.807920268218961e-05,
      "loss": 0.9113,
      "step": 12920
    },
    {
      "epoch": 0.7415801250788915,
      "grad_norm": 0.26953125,
      "learning_rate": 3.800060109351957e-05,
      "loss": 0.9087,
      "step": 12925
    },
    {
      "epoch": 0.7418670032704113,
      "grad_norm": 0.26171875,
      "learning_rate": 3.792206167798582e-05,
      "loss": 0.89,
      "step": 12930
    },
    {
      "epoch": 0.7421538814619313,
      "grad_norm": 0.259765625,
      "learning_rate": 3.784358451434783e-05,
      "loss": 0.9056,
      "step": 12935
    },
    {
      "epoch": 0.7424407596534511,
      "grad_norm": 0.353515625,
      "learning_rate": 3.776516968130266e-05,
      "loss": 0.9744,
      "step": 12940
    },
    {
      "epoch": 0.742727637844971,
      "grad_norm": 0.271484375,
      "learning_rate": 3.768681725748488e-05,
      "loss": 0.925,
      "step": 12945
    },
    {
      "epoch": 0.7430145160364909,
      "grad_norm": 0.2890625,
      "learning_rate": 3.760852732146649e-05,
      "loss": 0.88,
      "step": 12950
    },
    {
      "epoch": 0.7433013942280108,
      "grad_norm": 0.267578125,
      "learning_rate": 3.753029995175677e-05,
      "loss": 0.9313,
      "step": 12955
    },
    {
      "epoch": 0.7435882724195306,
      "grad_norm": 0.2734375,
      "learning_rate": 3.7452135226802385e-05,
      "loss": 0.9604,
      "step": 12960
    },
    {
      "epoch": 0.7438751506110506,
      "grad_norm": 0.27734375,
      "learning_rate": 3.7374033224987084e-05,
      "loss": 0.9876,
      "step": 12965
    },
    {
      "epoch": 0.7441620288025704,
      "grad_norm": 0.279296875,
      "learning_rate": 3.729599402463162e-05,
      "loss": 0.9691,
      "step": 12970
    },
    {
      "epoch": 0.7444489069940903,
      "grad_norm": 0.2734375,
      "learning_rate": 3.7218017703993994e-05,
      "loss": 0.9409,
      "step": 12975
    },
    {
      "epoch": 0.7447357851856102,
      "grad_norm": 0.3046875,
      "learning_rate": 3.714010434126899e-05,
      "loss": 0.9514,
      "step": 12980
    },
    {
      "epoch": 0.7450226633771301,
      "grad_norm": 0.267578125,
      "learning_rate": 3.706225401458831e-05,
      "loss": 0.9097,
      "step": 12985
    },
    {
      "epoch": 0.7453095415686499,
      "grad_norm": 0.25,
      "learning_rate": 3.6984466802020436e-05,
      "loss": 0.9851,
      "step": 12990
    },
    {
      "epoch": 0.7455964197601699,
      "grad_norm": 0.287109375,
      "learning_rate": 3.690674278157056e-05,
      "loss": 0.9324,
      "step": 12995
    },
    {
      "epoch": 0.7458832979516897,
      "grad_norm": 0.26171875,
      "learning_rate": 3.6829082031180496e-05,
      "loss": 0.9267,
      "step": 13000
    },
    {
      "epoch": 0.7461701761432096,
      "grad_norm": 0.259765625,
      "learning_rate": 3.6751484628728594e-05,
      "loss": 0.9357,
      "step": 13005
    },
    {
      "epoch": 0.7464570543347294,
      "grad_norm": 0.2734375,
      "learning_rate": 3.6673950652029766e-05,
      "loss": 0.9016,
      "step": 13010
    },
    {
      "epoch": 0.7467439325262494,
      "grad_norm": 0.263671875,
      "learning_rate": 3.659648017883526e-05,
      "loss": 0.9252,
      "step": 13015
    },
    {
      "epoch": 0.7470308107177692,
      "grad_norm": 0.255859375,
      "learning_rate": 3.651907328683254e-05,
      "loss": 0.896,
      "step": 13020
    },
    {
      "epoch": 0.7473176889092891,
      "grad_norm": 0.275390625,
      "learning_rate": 3.6441730053645506e-05,
      "loss": 0.9848,
      "step": 13025
    },
    {
      "epoch": 0.747604567100809,
      "grad_norm": 0.251953125,
      "learning_rate": 3.6364450556834097e-05,
      "loss": 0.9676,
      "step": 13030
    },
    {
      "epoch": 0.7478914452923289,
      "grad_norm": 0.26953125,
      "learning_rate": 3.628723487389437e-05,
      "loss": 0.9408,
      "step": 13035
    },
    {
      "epoch": 0.7481783234838487,
      "grad_norm": 0.26171875,
      "learning_rate": 3.621008308225837e-05,
      "loss": 0.8881,
      "step": 13040
    },
    {
      "epoch": 0.7484652016753687,
      "grad_norm": 0.267578125,
      "learning_rate": 3.61329952592941e-05,
      "loss": 0.9676,
      "step": 13045
    },
    {
      "epoch": 0.7487520798668885,
      "grad_norm": 0.28515625,
      "learning_rate": 3.605597148230541e-05,
      "loss": 0.9577,
      "step": 13050
    },
    {
      "epoch": 0.7490389580584084,
      "grad_norm": 0.275390625,
      "learning_rate": 3.597901182853185e-05,
      "loss": 0.9754,
      "step": 13055
    },
    {
      "epoch": 0.7493258362499283,
      "grad_norm": 0.26953125,
      "learning_rate": 3.590211637514884e-05,
      "loss": 0.9921,
      "step": 13060
    },
    {
      "epoch": 0.7496127144414482,
      "grad_norm": 0.263671875,
      "learning_rate": 3.582528519926729e-05,
      "loss": 0.9823,
      "step": 13065
    },
    {
      "epoch": 0.749899592632968,
      "grad_norm": 0.255859375,
      "learning_rate": 3.574851837793357e-05,
      "loss": 0.9203,
      "step": 13070
    },
    {
      "epoch": 0.750186470824488,
      "grad_norm": 0.25390625,
      "learning_rate": 3.567181598812973e-05,
      "loss": 0.9569,
      "step": 13075
    },
    {
      "epoch": 0.7504733490160078,
      "grad_norm": 0.265625,
      "learning_rate": 3.559517810677308e-05,
      "loss": 0.9337,
      "step": 13080
    },
    {
      "epoch": 0.7507602272075277,
      "grad_norm": 0.255859375,
      "learning_rate": 3.551860481071624e-05,
      "loss": 0.8669,
      "step": 13085
    },
    {
      "epoch": 0.7510471053990475,
      "grad_norm": 0.263671875,
      "learning_rate": 3.544209617674707e-05,
      "loss": 0.9714,
      "step": 13090
    },
    {
      "epoch": 0.7513339835905675,
      "grad_norm": 0.255859375,
      "learning_rate": 3.536565228158864e-05,
      "loss": 0.8875,
      "step": 13095
    },
    {
      "epoch": 0.7516208617820873,
      "grad_norm": 0.26953125,
      "learning_rate": 3.528927320189903e-05,
      "loss": 0.8935,
      "step": 13100
    },
    {
      "epoch": 0.7519077399736072,
      "grad_norm": 0.26171875,
      "learning_rate": 3.521295901427132e-05,
      "loss": 0.9711,
      "step": 13105
    },
    {
      "epoch": 0.7521946181651271,
      "grad_norm": 0.279296875,
      "learning_rate": 3.5136709795233626e-05,
      "loss": 0.9369,
      "step": 13110
    },
    {
      "epoch": 0.752481496356647,
      "grad_norm": 0.2431640625,
      "learning_rate": 3.506052562124883e-05,
      "loss": 0.875,
      "step": 13115
    },
    {
      "epoch": 0.7527683745481668,
      "grad_norm": 0.271484375,
      "learning_rate": 3.498440656871449e-05,
      "loss": 0.9301,
      "step": 13120
    },
    {
      "epoch": 0.7530552527396868,
      "grad_norm": 0.26953125,
      "learning_rate": 3.4908352713963077e-05,
      "loss": 0.9551,
      "step": 13125
    },
    {
      "epoch": 0.7533421309312066,
      "grad_norm": 0.251953125,
      "learning_rate": 3.483236413326151e-05,
      "loss": 0.902,
      "step": 13130
    },
    {
      "epoch": 0.7536290091227265,
      "grad_norm": 0.255859375,
      "learning_rate": 3.475644090281133e-05,
      "loss": 0.9567,
      "step": 13135
    },
    {
      "epoch": 0.7539158873142464,
      "grad_norm": 0.265625,
      "learning_rate": 3.468058309874851e-05,
      "loss": 0.9256,
      "step": 13140
    },
    {
      "epoch": 0.7542027655057663,
      "grad_norm": 0.25390625,
      "learning_rate": 3.460479079714343e-05,
      "loss": 0.9339,
      "step": 13145
    },
    {
      "epoch": 0.7544896436972861,
      "grad_norm": 0.255859375,
      "learning_rate": 3.452906407400074e-05,
      "loss": 0.9711,
      "step": 13150
    },
    {
      "epoch": 0.7547765218888061,
      "grad_norm": 0.2451171875,
      "learning_rate": 3.4453403005259444e-05,
      "loss": 0.9775,
      "step": 13155
    },
    {
      "epoch": 0.7550634000803259,
      "grad_norm": 0.2890625,
      "learning_rate": 3.43778076667926e-05,
      "loss": 0.9288,
      "step": 13160
    },
    {
      "epoch": 0.7553502782718458,
      "grad_norm": 0.2490234375,
      "learning_rate": 3.43022781344074e-05,
      "loss": 0.8841,
      "step": 13165
    },
    {
      "epoch": 0.7556371564633656,
      "grad_norm": 0.265625,
      "learning_rate": 3.4226814483844946e-05,
      "loss": 0.9771,
      "step": 13170
    },
    {
      "epoch": 0.7559240346548856,
      "grad_norm": 0.265625,
      "learning_rate": 3.4151416790780456e-05,
      "loss": 0.9575,
      "step": 13175
    },
    {
      "epoch": 0.7562109128464054,
      "grad_norm": 0.259765625,
      "learning_rate": 3.4076085130822866e-05,
      "loss": 0.9298,
      "step": 13180
    },
    {
      "epoch": 0.7564977910379252,
      "grad_norm": 0.26171875,
      "learning_rate": 3.400081957951492e-05,
      "loss": 0.8998,
      "step": 13185
    },
    {
      "epoch": 0.7567846692294452,
      "grad_norm": 0.26171875,
      "learning_rate": 3.392562021233311e-05,
      "loss": 0.902,
      "step": 13190
    },
    {
      "epoch": 0.757071547420965,
      "grad_norm": 0.27734375,
      "learning_rate": 3.38504871046875e-05,
      "loss": 0.9792,
      "step": 13195
    },
    {
      "epoch": 0.7573584256124849,
      "grad_norm": 0.291015625,
      "learning_rate": 3.3775420331921736e-05,
      "loss": 0.9386,
      "step": 13200
    },
    {
      "epoch": 0.7576453038040049,
      "grad_norm": 0.271484375,
      "learning_rate": 3.3700419969312994e-05,
      "loss": 0.932,
      "step": 13205
    },
    {
      "epoch": 0.7579321819955247,
      "grad_norm": 0.26171875,
      "learning_rate": 3.362548609207177e-05,
      "loss": 0.9206,
      "step": 13210
    },
    {
      "epoch": 0.7582190601870445,
      "grad_norm": 0.259765625,
      "learning_rate": 3.355061877534192e-05,
      "loss": 0.9754,
      "step": 13215
    },
    {
      "epoch": 0.7585059383785645,
      "grad_norm": 0.2578125,
      "learning_rate": 3.3475818094200585e-05,
      "loss": 0.9253,
      "step": 13220
    },
    {
      "epoch": 0.7587928165700843,
      "grad_norm": 0.2734375,
      "learning_rate": 3.340108412365803e-05,
      "loss": 0.9826,
      "step": 13225
    },
    {
      "epoch": 0.7590796947616042,
      "grad_norm": 0.265625,
      "learning_rate": 3.332641693865766e-05,
      "loss": 0.9125,
      "step": 13230
    },
    {
      "epoch": 0.7593665729531242,
      "grad_norm": 0.259765625,
      "learning_rate": 3.3251816614075884e-05,
      "loss": 0.9322,
      "step": 13235
    },
    {
      "epoch": 0.759653451144644,
      "grad_norm": 0.263671875,
      "learning_rate": 3.317728322472209e-05,
      "loss": 0.9654,
      "step": 13240
    },
    {
      "epoch": 0.7599403293361638,
      "grad_norm": 0.283203125,
      "learning_rate": 3.310281684533852e-05,
      "loss": 0.9085,
      "step": 13245
    },
    {
      "epoch": 0.7602272075276837,
      "grad_norm": 0.2734375,
      "learning_rate": 3.302841755060018e-05,
      "loss": 0.9488,
      "step": 13250
    },
    {
      "epoch": 0.7605140857192036,
      "grad_norm": 0.26953125,
      "learning_rate": 3.2954085415114946e-05,
      "loss": 0.922,
      "step": 13255
    },
    {
      "epoch": 0.7608009639107235,
      "grad_norm": 0.283203125,
      "learning_rate": 3.2879820513423184e-05,
      "loss": 0.9653,
      "step": 13260
    },
    {
      "epoch": 0.7610878421022433,
      "grad_norm": 0.25390625,
      "learning_rate": 3.2805622919997934e-05,
      "loss": 0.8962,
      "step": 13265
    },
    {
      "epoch": 0.7613747202937633,
      "grad_norm": 0.265625,
      "learning_rate": 3.273149270924468e-05,
      "loss": 0.9213,
      "step": 13270
    },
    {
      "epoch": 0.7616615984852831,
      "grad_norm": 0.265625,
      "learning_rate": 3.2657429955501394e-05,
      "loss": 0.903,
      "step": 13275
    },
    {
      "epoch": 0.761948476676803,
      "grad_norm": 0.263671875,
      "learning_rate": 3.258343473303832e-05,
      "loss": 0.9081,
      "step": 13280
    },
    {
      "epoch": 0.762235354868323,
      "grad_norm": 0.2578125,
      "learning_rate": 3.2509507116058134e-05,
      "loss": 0.9147,
      "step": 13285
    },
    {
      "epoch": 0.7625222330598428,
      "grad_norm": 0.291015625,
      "learning_rate": 3.243564717869552e-05,
      "loss": 0.9349,
      "step": 13290
    },
    {
      "epoch": 0.7628091112513626,
      "grad_norm": 0.255859375,
      "learning_rate": 3.2361854995017416e-05,
      "loss": 0.9296,
      "step": 13295
    },
    {
      "epoch": 0.7630959894428826,
      "grad_norm": 0.2734375,
      "learning_rate": 3.228813063902276e-05,
      "loss": 0.8695,
      "step": 13300
    },
    {
      "epoch": 0.7633828676344024,
      "grad_norm": 0.259765625,
      "learning_rate": 3.2214474184642574e-05,
      "loss": 0.9618,
      "step": 13305
    },
    {
      "epoch": 0.7636697458259223,
      "grad_norm": 0.275390625,
      "learning_rate": 3.2140885705739674e-05,
      "loss": 0.9476,
      "step": 13310
    },
    {
      "epoch": 0.7639566240174422,
      "grad_norm": 0.259765625,
      "learning_rate": 3.2067365276108754e-05,
      "loss": 0.9865,
      "step": 13315
    },
    {
      "epoch": 0.7642435022089621,
      "grad_norm": 0.287109375,
      "learning_rate": 3.199391296947627e-05,
      "loss": 0.9788,
      "step": 13320
    },
    {
      "epoch": 0.7645303804004819,
      "grad_norm": 0.263671875,
      "learning_rate": 3.192052885950034e-05,
      "loss": 0.9549,
      "step": 13325
    },
    {
      "epoch": 0.7648172585920018,
      "grad_norm": 0.2412109375,
      "learning_rate": 3.1847213019770716e-05,
      "loss": 0.9153,
      "step": 13330
    },
    {
      "epoch": 0.7651041367835217,
      "grad_norm": 0.255859375,
      "learning_rate": 3.1773965523808754e-05,
      "loss": 1.0291,
      "step": 13335
    },
    {
      "epoch": 0.7653910149750416,
      "grad_norm": 0.263671875,
      "learning_rate": 3.1700786445067135e-05,
      "loss": 0.9117,
      "step": 13340
    },
    {
      "epoch": 0.7656778931665614,
      "grad_norm": 0.267578125,
      "learning_rate": 3.162767585692997e-05,
      "loss": 0.9118,
      "step": 13345
    },
    {
      "epoch": 0.7659647713580814,
      "grad_norm": 0.25390625,
      "learning_rate": 3.155463383271282e-05,
      "loss": 0.9031,
      "step": 13350
    },
    {
      "epoch": 0.7662516495496012,
      "grad_norm": 0.263671875,
      "learning_rate": 3.148166044566233e-05,
      "loss": 0.9564,
      "step": 13355
    },
    {
      "epoch": 0.7665385277411211,
      "grad_norm": 0.275390625,
      "learning_rate": 3.14087557689564e-05,
      "loss": 0.9529,
      "step": 13360
    },
    {
      "epoch": 0.766825405932641,
      "grad_norm": 0.298828125,
      "learning_rate": 3.133591987570399e-05,
      "loss": 0.9671,
      "step": 13365
    },
    {
      "epoch": 0.7671122841241609,
      "grad_norm": 0.2578125,
      "learning_rate": 3.1263152838945095e-05,
      "loss": 0.9353,
      "step": 13370
    },
    {
      "epoch": 0.7673991623156807,
      "grad_norm": 0.267578125,
      "learning_rate": 3.1190454731650675e-05,
      "loss": 0.9341,
      "step": 13375
    },
    {
      "epoch": 0.7676860405072007,
      "grad_norm": 0.263671875,
      "learning_rate": 3.111782562672251e-05,
      "loss": 0.9831,
      "step": 13380
    },
    {
      "epoch": 0.7679729186987205,
      "grad_norm": 0.28125,
      "learning_rate": 3.104526559699333e-05,
      "loss": 0.9083,
      "step": 13385
    },
    {
      "epoch": 0.7682597968902404,
      "grad_norm": 0.267578125,
      "learning_rate": 3.0972774715226406e-05,
      "loss": 0.9044,
      "step": 13390
    },
    {
      "epoch": 0.7685466750817603,
      "grad_norm": 0.271484375,
      "learning_rate": 3.090035305411575e-05,
      "loss": 0.9071,
      "step": 13395
    },
    {
      "epoch": 0.7688335532732802,
      "grad_norm": 0.271484375,
      "learning_rate": 3.0828000686286027e-05,
      "loss": 0.9348,
      "step": 13400
    },
    {
      "epoch": 0.7691204314648,
      "grad_norm": 0.251953125,
      "learning_rate": 3.075571768429233e-05,
      "loss": 0.9485,
      "step": 13405
    },
    {
      "epoch": 0.76940730965632,
      "grad_norm": 0.26171875,
      "learning_rate": 3.06835041206202e-05,
      "loss": 0.9058,
      "step": 13410
    },
    {
      "epoch": 0.7696941878478398,
      "grad_norm": 0.2734375,
      "learning_rate": 3.0611360067685576e-05,
      "loss": 0.9473,
      "step": 13415
    },
    {
      "epoch": 0.7699810660393597,
      "grad_norm": 0.2490234375,
      "learning_rate": 3.0539285597834675e-05,
      "loss": 0.9342,
      "step": 13420
    },
    {
      "epoch": 0.7702679442308795,
      "grad_norm": 0.25390625,
      "learning_rate": 3.0467280783343944e-05,
      "loss": 0.9746,
      "step": 13425
    },
    {
      "epoch": 0.7705548224223995,
      "grad_norm": 0.25,
      "learning_rate": 3.0395345696419918e-05,
      "loss": 0.941,
      "step": 13430
    },
    {
      "epoch": 0.7708417006139193,
      "grad_norm": 0.259765625,
      "learning_rate": 3.0323480409199378e-05,
      "loss": 0.9427,
      "step": 13435
    },
    {
      "epoch": 0.7711285788054392,
      "grad_norm": 0.267578125,
      "learning_rate": 3.0251684993748886e-05,
      "loss": 0.9854,
      "step": 13440
    },
    {
      "epoch": 0.7714154569969591,
      "grad_norm": 0.2734375,
      "learning_rate": 3.017995952206506e-05,
      "loss": 1.0031,
      "step": 13445
    },
    {
      "epoch": 0.771702335188479,
      "grad_norm": 0.26171875,
      "learning_rate": 3.010830406607441e-05,
      "loss": 0.9125,
      "step": 13450
    },
    {
      "epoch": 0.7719892133799988,
      "grad_norm": 0.27734375,
      "learning_rate": 3.003671869763317e-05,
      "loss": 1.0298,
      "step": 13455
    },
    {
      "epoch": 0.7722760915715188,
      "grad_norm": 0.26953125,
      "learning_rate": 2.9965203488527317e-05,
      "loss": 0.9565,
      "step": 13460
    },
    {
      "epoch": 0.7725629697630386,
      "grad_norm": 0.259765625,
      "learning_rate": 2.9893758510472436e-05,
      "loss": 0.9615,
      "step": 13465
    },
    {
      "epoch": 0.7728498479545585,
      "grad_norm": 0.279296875,
      "learning_rate": 2.982238383511373e-05,
      "loss": 0.97,
      "step": 13470
    },
    {
      "epoch": 0.7731367261460784,
      "grad_norm": 0.27734375,
      "learning_rate": 2.975107953402585e-05,
      "loss": 0.9835,
      "step": 13475
    },
    {
      "epoch": 0.7734236043375983,
      "grad_norm": 0.28515625,
      "learning_rate": 2.967984567871297e-05,
      "loss": 0.9662,
      "step": 13480
    },
    {
      "epoch": 0.7737104825291181,
      "grad_norm": 0.275390625,
      "learning_rate": 2.960868234060855e-05,
      "loss": 0.9375,
      "step": 13485
    },
    {
      "epoch": 0.7739973607206381,
      "grad_norm": 0.26953125,
      "learning_rate": 2.9537589591075298e-05,
      "loss": 0.9713,
      "step": 13490
    },
    {
      "epoch": 0.7742842389121579,
      "grad_norm": 0.259765625,
      "learning_rate": 2.9466567501405185e-05,
      "loss": 0.8821,
      "step": 13495
    },
    {
      "epoch": 0.7745711171036778,
      "grad_norm": 0.27734375,
      "learning_rate": 2.939561614281936e-05,
      "loss": 1.0189,
      "step": 13500
    },
    {
      "epoch": 0.7748579952951976,
      "grad_norm": 0.28515625,
      "learning_rate": 2.9324735586468e-05,
      "loss": 0.9269,
      "step": 13505
    },
    {
      "epoch": 0.7751448734867176,
      "grad_norm": 0.259765625,
      "learning_rate": 2.9253925903430267e-05,
      "loss": 0.9318,
      "step": 13510
    },
    {
      "epoch": 0.7754317516782374,
      "grad_norm": 0.263671875,
      "learning_rate": 2.9183187164714288e-05,
      "loss": 0.9497,
      "step": 13515
    },
    {
      "epoch": 0.7757186298697573,
      "grad_norm": 0.26953125,
      "learning_rate": 2.9112519441257e-05,
      "loss": 0.8565,
      "step": 13520
    },
    {
      "epoch": 0.7760055080612772,
      "grad_norm": 0.259765625,
      "learning_rate": 2.9041922803924158e-05,
      "loss": 0.9198,
      "step": 13525
    },
    {
      "epoch": 0.7762923862527971,
      "grad_norm": 0.259765625,
      "learning_rate": 2.8971397323510275e-05,
      "loss": 0.9625,
      "step": 13530
    },
    {
      "epoch": 0.7765792644443169,
      "grad_norm": 0.2734375,
      "learning_rate": 2.890094307073845e-05,
      "loss": 0.8976,
      "step": 13535
    },
    {
      "epoch": 0.7768661426358369,
      "grad_norm": 0.3046875,
      "learning_rate": 2.883056011626032e-05,
      "loss": 0.9786,
      "step": 13540
    },
    {
      "epoch": 0.7771530208273567,
      "grad_norm": 0.26171875,
      "learning_rate": 2.8760248530656063e-05,
      "loss": 0.96,
      "step": 13545
    },
    {
      "epoch": 0.7774398990188766,
      "grad_norm": 0.259765625,
      "learning_rate": 2.8690008384434363e-05,
      "loss": 0.903,
      "step": 13550
    },
    {
      "epoch": 0.7777267772103965,
      "grad_norm": 0.267578125,
      "learning_rate": 2.861983974803215e-05,
      "loss": 0.9419,
      "step": 13555
    },
    {
      "epoch": 0.7780136554019164,
      "grad_norm": 0.267578125,
      "learning_rate": 2.8549742691814705e-05,
      "loss": 0.9764,
      "step": 13560
    },
    {
      "epoch": 0.7783005335934362,
      "grad_norm": 0.263671875,
      "learning_rate": 2.8479717286075502e-05,
      "loss": 0.9253,
      "step": 13565
    },
    {
      "epoch": 0.7785874117849562,
      "grad_norm": 0.265625,
      "learning_rate": 2.8409763601036188e-05,
      "loss": 0.9614,
      "step": 13570
    },
    {
      "epoch": 0.778874289976476,
      "grad_norm": 0.267578125,
      "learning_rate": 2.8339881706846427e-05,
      "loss": 1.0581,
      "step": 13575
    },
    {
      "epoch": 0.7791611681679959,
      "grad_norm": 0.267578125,
      "learning_rate": 2.8270071673584008e-05,
      "loss": 1.0102,
      "step": 13580
    },
    {
      "epoch": 0.7794480463595157,
      "grad_norm": 0.478515625,
      "learning_rate": 2.82003335712546e-05,
      "loss": 0.9411,
      "step": 13585
    },
    {
      "epoch": 0.7797349245510357,
      "grad_norm": 0.265625,
      "learning_rate": 2.8130667469791626e-05,
      "loss": 0.858,
      "step": 13590
    },
    {
      "epoch": 0.7800218027425555,
      "grad_norm": 0.2578125,
      "learning_rate": 2.8061073439056507e-05,
      "loss": 0.8923,
      "step": 13595
    },
    {
      "epoch": 0.7803086809340753,
      "grad_norm": 0.275390625,
      "learning_rate": 2.799155154883826e-05,
      "loss": 0.9096,
      "step": 13600
    },
    {
      "epoch": 0.7805955591255953,
      "grad_norm": 0.271484375,
      "learning_rate": 2.7922101868853577e-05,
      "loss": 0.9464,
      "step": 13605
    },
    {
      "epoch": 0.7808824373171152,
      "grad_norm": 0.251953125,
      "learning_rate": 2.785272446874677e-05,
      "loss": 0.9081,
      "step": 13610
    },
    {
      "epoch": 0.781169315508635,
      "grad_norm": 0.298828125,
      "learning_rate": 2.778341941808965e-05,
      "loss": 0.9735,
      "step": 13615
    },
    {
      "epoch": 0.781456193700155,
      "grad_norm": 0.275390625,
      "learning_rate": 2.771418678638147e-05,
      "loss": 0.9514,
      "step": 13620
    },
    {
      "epoch": 0.7817430718916748,
      "grad_norm": 0.28125,
      "learning_rate": 2.7645026643048855e-05,
      "loss": 0.9684,
      "step": 13625
    },
    {
      "epoch": 0.7820299500831946,
      "grad_norm": 0.2734375,
      "learning_rate": 2.7575939057445786e-05,
      "loss": 0.9782,
      "step": 13630
    },
    {
      "epoch": 0.7823168282747146,
      "grad_norm": 0.2578125,
      "learning_rate": 2.750692409885347e-05,
      "loss": 0.9351,
      "step": 13635
    },
    {
      "epoch": 0.7826037064662345,
      "grad_norm": 0.2734375,
      "learning_rate": 2.7437981836480166e-05,
      "loss": 0.9496,
      "step": 13640
    },
    {
      "epoch": 0.7828905846577543,
      "grad_norm": 0.2578125,
      "learning_rate": 2.736911233946141e-05,
      "loss": 0.8933,
      "step": 13645
    },
    {
      "epoch": 0.7831774628492743,
      "grad_norm": 0.2578125,
      "learning_rate": 2.730031567685968e-05,
      "loss": 0.9402,
      "step": 13650
    },
    {
      "epoch": 0.7834643410407941,
      "grad_norm": 0.287109375,
      "learning_rate": 2.723159191766439e-05,
      "loss": 0.9332,
      "step": 13655
    },
    {
      "epoch": 0.783751219232314,
      "grad_norm": 0.248046875,
      "learning_rate": 2.716294113079192e-05,
      "loss": 0.8929,
      "step": 13660
    },
    {
      "epoch": 0.7840380974238338,
      "grad_norm": 0.267578125,
      "learning_rate": 2.7094363385085398e-05,
      "loss": 1.0431,
      "step": 13665
    },
    {
      "epoch": 0.7843249756153537,
      "grad_norm": 0.291015625,
      "learning_rate": 2.7025858749314758e-05,
      "loss": 0.9892,
      "step": 13670
    },
    {
      "epoch": 0.7846118538068736,
      "grad_norm": 0.265625,
      "learning_rate": 2.6957427292176572e-05,
      "loss": 0.8728,
      "step": 13675
    },
    {
      "epoch": 0.7848987319983934,
      "grad_norm": 0.279296875,
      "learning_rate": 2.6889069082294114e-05,
      "loss": 0.9862,
      "step": 13680
    },
    {
      "epoch": 0.7851856101899134,
      "grad_norm": 0.28125,
      "learning_rate": 2.6820784188217164e-05,
      "loss": 0.9781,
      "step": 13685
    },
    {
      "epoch": 0.7854724883814332,
      "grad_norm": 0.2578125,
      "learning_rate": 2.675257267842185e-05,
      "loss": 0.9384,
      "step": 13690
    },
    {
      "epoch": 0.7857593665729531,
      "grad_norm": 0.26171875,
      "learning_rate": 2.668443462131094e-05,
      "loss": 0.9279,
      "step": 13695
    },
    {
      "epoch": 0.786046244764473,
      "grad_norm": 0.271484375,
      "learning_rate": 2.6616370085213394e-05,
      "loss": 1.0333,
      "step": 13700
    },
    {
      "epoch": 0.7863331229559929,
      "grad_norm": 0.26953125,
      "learning_rate": 2.6548379138384483e-05,
      "loss": 0.9812,
      "step": 13705
    },
    {
      "epoch": 0.7866200011475127,
      "grad_norm": 0.291015625,
      "learning_rate": 2.648046184900568e-05,
      "loss": 0.8958,
      "step": 13710
    },
    {
      "epoch": 0.7869068793390327,
      "grad_norm": 0.263671875,
      "learning_rate": 2.6412618285184587e-05,
      "loss": 0.9758,
      "step": 13715
    },
    {
      "epoch": 0.7871937575305525,
      "grad_norm": 0.275390625,
      "learning_rate": 2.6344848514954856e-05,
      "loss": 0.9736,
      "step": 13720
    },
    {
      "epoch": 0.7874806357220724,
      "grad_norm": 0.26171875,
      "learning_rate": 2.6277152606276234e-05,
      "loss": 0.9742,
      "step": 13725
    },
    {
      "epoch": 0.7877675139135923,
      "grad_norm": 0.267578125,
      "learning_rate": 2.6209530627034295e-05,
      "loss": 0.9122,
      "step": 13730
    },
    {
      "epoch": 0.7880543921051122,
      "grad_norm": 0.26953125,
      "learning_rate": 2.614198264504053e-05,
      "loss": 0.9363,
      "step": 13735
    },
    {
      "epoch": 0.788341270296632,
      "grad_norm": 0.25390625,
      "learning_rate": 2.607450872803213e-05,
      "loss": 1.0,
      "step": 13740
    },
    {
      "epoch": 0.7886281484881519,
      "grad_norm": 0.2421875,
      "learning_rate": 2.600710894367219e-05,
      "loss": 0.8937,
      "step": 13745
    },
    {
      "epoch": 0.7889150266796718,
      "grad_norm": 0.267578125,
      "learning_rate": 2.5939783359549306e-05,
      "loss": 0.9911,
      "step": 13750
    },
    {
      "epoch": 0.7892019048711917,
      "grad_norm": 0.2470703125,
      "learning_rate": 2.5872532043177743e-05,
      "loss": 0.9314,
      "step": 13755
    },
    {
      "epoch": 0.7894887830627115,
      "grad_norm": 0.267578125,
      "learning_rate": 2.580535506199727e-05,
      "loss": 0.9753,
      "step": 13760
    },
    {
      "epoch": 0.7897756612542315,
      "grad_norm": 0.287109375,
      "learning_rate": 2.5738252483373117e-05,
      "loss": 0.9208,
      "step": 13765
    },
    {
      "epoch": 0.7900625394457513,
      "grad_norm": 0.275390625,
      "learning_rate": 2.567122437459586e-05,
      "loss": 0.9189,
      "step": 13770
    },
    {
      "epoch": 0.7903494176372712,
      "grad_norm": 0.265625,
      "learning_rate": 2.5604270802881503e-05,
      "loss": 0.9401,
      "step": 13775
    },
    {
      "epoch": 0.7906362958287911,
      "grad_norm": 0.265625,
      "learning_rate": 2.5537391835371217e-05,
      "loss": 1.05,
      "step": 13780
    },
    {
      "epoch": 0.790923174020311,
      "grad_norm": 0.259765625,
      "learning_rate": 2.5470587539131362e-05,
      "loss": 0.9251,
      "step": 13785
    },
    {
      "epoch": 0.7912100522118308,
      "grad_norm": 0.2890625,
      "learning_rate": 2.5403857981153457e-05,
      "loss": 0.9218,
      "step": 13790
    },
    {
      "epoch": 0.7914969304033508,
      "grad_norm": 0.255859375,
      "learning_rate": 2.5337203228354035e-05,
      "loss": 0.9932,
      "step": 13795
    },
    {
      "epoch": 0.7917838085948706,
      "grad_norm": 0.255859375,
      "learning_rate": 2.527062334757464e-05,
      "loss": 1.03,
      "step": 13800
    },
    {
      "epoch": 0.7920706867863905,
      "grad_norm": 0.26953125,
      "learning_rate": 2.5204118405581724e-05,
      "loss": 0.9819,
      "step": 13805
    },
    {
      "epoch": 0.7923575649779104,
      "grad_norm": 0.26953125,
      "learning_rate": 2.513768846906659e-05,
      "loss": 1.0,
      "step": 13810
    },
    {
      "epoch": 0.7926444431694303,
      "grad_norm": 0.26171875,
      "learning_rate": 2.507133360464533e-05,
      "loss": 0.9784,
      "step": 13815
    },
    {
      "epoch": 0.7929313213609501,
      "grad_norm": 0.2578125,
      "learning_rate": 2.500505387885872e-05,
      "loss": 0.9411,
      "step": 13820
    },
    {
      "epoch": 0.79321819955247,
      "grad_norm": 0.26953125,
      "learning_rate": 2.493884935817228e-05,
      "loss": 0.9736,
      "step": 13825
    },
    {
      "epoch": 0.7935050777439899,
      "grad_norm": 0.265625,
      "learning_rate": 2.487272010897601e-05,
      "loss": 0.9139,
      "step": 13830
    },
    {
      "epoch": 0.7937919559355098,
      "grad_norm": 0.263671875,
      "learning_rate": 2.4806666197584483e-05,
      "loss": 0.8969,
      "step": 13835
    },
    {
      "epoch": 0.7940788341270296,
      "grad_norm": 0.25390625,
      "learning_rate": 2.474068769023671e-05,
      "loss": 0.914,
      "step": 13840
    },
    {
      "epoch": 0.7943657123185496,
      "grad_norm": 0.283203125,
      "learning_rate": 2.4674784653096083e-05,
      "loss": 0.9689,
      "step": 13845
    },
    {
      "epoch": 0.7946525905100694,
      "grad_norm": 0.25,
      "learning_rate": 2.460895715225028e-05,
      "loss": 1.0079,
      "step": 13850
    },
    {
      "epoch": 0.7949394687015893,
      "grad_norm": 0.283203125,
      "learning_rate": 2.4543205253711355e-05,
      "loss": 0.9578,
      "step": 13855
    },
    {
      "epoch": 0.7952263468931092,
      "grad_norm": 0.26953125,
      "learning_rate": 2.447752902341538e-05,
      "loss": 0.9168,
      "step": 13860
    },
    {
      "epoch": 0.7955132250846291,
      "grad_norm": 0.2578125,
      "learning_rate": 2.441192852722265e-05,
      "loss": 0.9152,
      "step": 13865
    },
    {
      "epoch": 0.7958001032761489,
      "grad_norm": 0.275390625,
      "learning_rate": 2.4346403830917464e-05,
      "loss": 0.914,
      "step": 13870
    },
    {
      "epoch": 0.7960869814676689,
      "grad_norm": 0.28515625,
      "learning_rate": 2.4280955000208184e-05,
      "loss": 0.9085,
      "step": 13875
    },
    {
      "epoch": 0.7963738596591887,
      "grad_norm": 0.2578125,
      "learning_rate": 2.421558210072702e-05,
      "loss": 0.9926,
      "step": 13880
    },
    {
      "epoch": 0.7966607378507086,
      "grad_norm": 0.25,
      "learning_rate": 2.4150285198030066e-05,
      "loss": 0.9283,
      "step": 13885
    },
    {
      "epoch": 0.7969476160422285,
      "grad_norm": 0.275390625,
      "learning_rate": 2.4085064357597197e-05,
      "loss": 0.9488,
      "step": 13890
    },
    {
      "epoch": 0.7972344942337484,
      "grad_norm": 0.25390625,
      "learning_rate": 2.4019919644832023e-05,
      "loss": 0.9676,
      "step": 13895
    },
    {
      "epoch": 0.7975213724252682,
      "grad_norm": 0.291015625,
      "learning_rate": 2.395485112506177e-05,
      "loss": 1.0144,
      "step": 13900
    },
    {
      "epoch": 0.7978082506167881,
      "grad_norm": 0.259765625,
      "learning_rate": 2.3889858863537396e-05,
      "loss": 0.9616,
      "step": 13905
    },
    {
      "epoch": 0.798095128808308,
      "grad_norm": 0.271484375,
      "learning_rate": 2.382494292543319e-05,
      "loss": 0.9342,
      "step": 13910
    },
    {
      "epoch": 0.7983820069998279,
      "grad_norm": 0.25,
      "learning_rate": 2.376010337584701e-05,
      "loss": 0.955,
      "step": 13915
    },
    {
      "epoch": 0.7986688851913477,
      "grad_norm": 0.2451171875,
      "learning_rate": 2.369534027980015e-05,
      "loss": 0.9769,
      "step": 13920
    },
    {
      "epoch": 0.7989557633828677,
      "grad_norm": 0.26171875,
      "learning_rate": 2.363065370223716e-05,
      "loss": 0.939,
      "step": 13925
    },
    {
      "epoch": 0.7992426415743875,
      "grad_norm": 0.275390625,
      "learning_rate": 2.3566043708025874e-05,
      "loss": 1.0056,
      "step": 13930
    },
    {
      "epoch": 0.7995295197659074,
      "grad_norm": 0.28125,
      "learning_rate": 2.3501510361957367e-05,
      "loss": 0.9896,
      "step": 13935
    },
    {
      "epoch": 0.7998163979574273,
      "grad_norm": 0.26953125,
      "learning_rate": 2.3437053728745807e-05,
      "loss": 0.9721,
      "step": 13940
    },
    {
      "epoch": 0.8001032761489472,
      "grad_norm": 0.26953125,
      "learning_rate": 2.337267387302845e-05,
      "loss": 0.9788,
      "step": 13945
    },
    {
      "epoch": 0.800390154340467,
      "grad_norm": 0.2890625,
      "learning_rate": 2.3308370859365523e-05,
      "loss": 0.9456,
      "step": 13950
    },
    {
      "epoch": 0.800677032531987,
      "grad_norm": 0.25390625,
      "learning_rate": 2.324414475224034e-05,
      "loss": 0.9169,
      "step": 13955
    },
    {
      "epoch": 0.8009639107235068,
      "grad_norm": 0.259765625,
      "learning_rate": 2.317999561605888e-05,
      "loss": 0.9856,
      "step": 13960
    },
    {
      "epoch": 0.8012507889150267,
      "grad_norm": 0.26171875,
      "learning_rate": 2.311592351515004e-05,
      "loss": 0.9333,
      "step": 13965
    },
    {
      "epoch": 0.8015376671065466,
      "grad_norm": 0.263671875,
      "learning_rate": 2.3051928513765542e-05,
      "loss": 0.9138,
      "step": 13970
    },
    {
      "epoch": 0.8018245452980665,
      "grad_norm": 0.2734375,
      "learning_rate": 2.2988010676079674e-05,
      "loss": 0.8617,
      "step": 13975
    },
    {
      "epoch": 0.8021114234895863,
      "grad_norm": 0.279296875,
      "learning_rate": 2.292417006618939e-05,
      "loss": 0.9493,
      "step": 13980
    },
    {
      "epoch": 0.8023983016811062,
      "grad_norm": 0.318359375,
      "learning_rate": 2.2860406748114195e-05,
      "loss": 1.0224,
      "step": 13985
    },
    {
      "epoch": 0.8026851798726261,
      "grad_norm": 0.259765625,
      "learning_rate": 2.279672078579609e-05,
      "loss": 0.997,
      "step": 13990
    },
    {
      "epoch": 0.802972058064146,
      "grad_norm": 0.2734375,
      "learning_rate": 2.2733112243099507e-05,
      "loss": 0.9755,
      "step": 13995
    },
    {
      "epoch": 0.8032589362556658,
      "grad_norm": 0.283203125,
      "learning_rate": 2.2669581183811196e-05,
      "loss": 0.9347,
      "step": 14000
    },
    {
      "epoch": 0.8035458144471858,
      "grad_norm": 0.259765625,
      "learning_rate": 2.2606127671640333e-05,
      "loss": 0.9454,
      "step": 14005
    },
    {
      "epoch": 0.8038326926387056,
      "grad_norm": 0.267578125,
      "learning_rate": 2.254275177021816e-05,
      "loss": 0.8952,
      "step": 14010
    },
    {
      "epoch": 0.8041195708302254,
      "grad_norm": 0.255859375,
      "learning_rate": 2.247945354309817e-05,
      "loss": 0.9592,
      "step": 14015
    },
    {
      "epoch": 0.8044064490217454,
      "grad_norm": 0.2734375,
      "learning_rate": 2.2416233053756032e-05,
      "loss": 0.9797,
      "step": 14020
    },
    {
      "epoch": 0.8046933272132653,
      "grad_norm": 0.2412109375,
      "learning_rate": 2.2353090365589348e-05,
      "loss": 0.9639,
      "step": 14025
    },
    {
      "epoch": 0.8049802054047851,
      "grad_norm": 0.255859375,
      "learning_rate": 2.2290025541917768e-05,
      "loss": 0.9881,
      "step": 14030
    },
    {
      "epoch": 0.8052670835963051,
      "grad_norm": 0.267578125,
      "learning_rate": 2.2227038645982833e-05,
      "loss": 0.9101,
      "step": 14035
    },
    {
      "epoch": 0.8055539617878249,
      "grad_norm": 0.265625,
      "learning_rate": 2.2164129740947935e-05,
      "loss": 0.929,
      "step": 14040
    },
    {
      "epoch": 0.8058408399793447,
      "grad_norm": 0.30078125,
      "learning_rate": 2.210129888989827e-05,
      "loss": 0.9931,
      "step": 14045
    },
    {
      "epoch": 0.8061277181708647,
      "grad_norm": 0.283203125,
      "learning_rate": 2.2038546155840735e-05,
      "loss": 0.9698,
      "step": 14050
    },
    {
      "epoch": 0.8064145963623846,
      "grad_norm": 0.265625,
      "learning_rate": 2.1975871601703977e-05,
      "loss": 0.9716,
      "step": 14055
    },
    {
      "epoch": 0.8067014745539044,
      "grad_norm": 0.310546875,
      "learning_rate": 2.191327529033812e-05,
      "loss": 0.9704,
      "step": 14060
    },
    {
      "epoch": 0.8069883527454242,
      "grad_norm": 0.24609375,
      "learning_rate": 2.1850757284514877e-05,
      "loss": 0.9215,
      "step": 14065
    },
    {
      "epoch": 0.8072752309369442,
      "grad_norm": 0.26953125,
      "learning_rate": 2.178831764692749e-05,
      "loss": 0.9289,
      "step": 14070
    },
    {
      "epoch": 0.807562109128464,
      "grad_norm": 0.2734375,
      "learning_rate": 2.1725956440190542e-05,
      "loss": 0.9023,
      "step": 14075
    },
    {
      "epoch": 0.8078489873199839,
      "grad_norm": 0.25,
      "learning_rate": 2.1663673726840006e-05,
      "loss": 0.9553,
      "step": 14080
    },
    {
      "epoch": 0.8081358655115038,
      "grad_norm": 0.251953125,
      "learning_rate": 2.160146956933311e-05,
      "loss": 0.9908,
      "step": 14085
    },
    {
      "epoch": 0.8084227437030237,
      "grad_norm": 0.25390625,
      "learning_rate": 2.1539344030048337e-05,
      "loss": 0.9315,
      "step": 14090
    },
    {
      "epoch": 0.8087096218945435,
      "grad_norm": 0.3046875,
      "learning_rate": 2.1477297171285282e-05,
      "loss": 0.8911,
      "step": 14095
    },
    {
      "epoch": 0.8089965000860635,
      "grad_norm": 0.265625,
      "learning_rate": 2.141532905526472e-05,
      "loss": 0.8708,
      "step": 14100
    },
    {
      "epoch": 0.8092833782775833,
      "grad_norm": 0.287109375,
      "learning_rate": 2.1353439744128434e-05,
      "loss": 0.9597,
      "step": 14105
    },
    {
      "epoch": 0.8095702564691032,
      "grad_norm": 0.28515625,
      "learning_rate": 2.1291629299939097e-05,
      "loss": 1.0855,
      "step": 14110
    },
    {
      "epoch": 0.8098571346606231,
      "grad_norm": 0.255859375,
      "learning_rate": 2.1229897784680365e-05,
      "loss": 0.9425,
      "step": 14115
    },
    {
      "epoch": 0.810144012852143,
      "grad_norm": 0.267578125,
      "learning_rate": 2.116824526025679e-05,
      "loss": 0.8716,
      "step": 14120
    },
    {
      "epoch": 0.8104308910436628,
      "grad_norm": 0.251953125,
      "learning_rate": 2.1106671788493636e-05,
      "loss": 1.0174,
      "step": 14125
    },
    {
      "epoch": 0.8107177692351828,
      "grad_norm": 0.275390625,
      "learning_rate": 2.104517743113693e-05,
      "loss": 0.9206,
      "step": 14130
    },
    {
      "epoch": 0.8110046474267026,
      "grad_norm": 0.271484375,
      "learning_rate": 2.0983762249853344e-05,
      "loss": 0.9666,
      "step": 14135
    },
    {
      "epoch": 0.8112915256182225,
      "grad_norm": 0.283203125,
      "learning_rate": 2.092242630623016e-05,
      "loss": 0.9522,
      "step": 14140
    },
    {
      "epoch": 0.8115784038097423,
      "grad_norm": 0.2734375,
      "learning_rate": 2.086116966177516e-05,
      "loss": 0.9182,
      "step": 14145
    },
    {
      "epoch": 0.8118652820012623,
      "grad_norm": 0.267578125,
      "learning_rate": 2.079999237791672e-05,
      "loss": 0.9693,
      "step": 14150
    },
    {
      "epoch": 0.8121521601927821,
      "grad_norm": 0.26171875,
      "learning_rate": 2.0738894516003536e-05,
      "loss": 0.9591,
      "step": 14155
    },
    {
      "epoch": 0.812439038384302,
      "grad_norm": 0.267578125,
      "learning_rate": 2.067787613730462e-05,
      "loss": 0.961,
      "step": 14160
    },
    {
      "epoch": 0.8127259165758219,
      "grad_norm": 0.296875,
      "learning_rate": 2.0616937303009408e-05,
      "loss": 0.9661,
      "step": 14165
    },
    {
      "epoch": 0.8130127947673418,
      "grad_norm": 0.267578125,
      "learning_rate": 2.055607807422748e-05,
      "loss": 1.0048,
      "step": 14170
    },
    {
      "epoch": 0.8132996729588616,
      "grad_norm": 0.28125,
      "learning_rate": 2.0495298511988602e-05,
      "loss": 0.9954,
      "step": 14175
    },
    {
      "epoch": 0.8135865511503816,
      "grad_norm": 0.2734375,
      "learning_rate": 2.0434598677242656e-05,
      "loss": 0.9824,
      "step": 14180
    },
    {
      "epoch": 0.8138734293419014,
      "grad_norm": 0.2734375,
      "learning_rate": 2.037397863085957e-05,
      "loss": 1.0136,
      "step": 14185
    },
    {
      "epoch": 0.8141603075334213,
      "grad_norm": 0.25390625,
      "learning_rate": 2.0313438433629263e-05,
      "loss": 0.9508,
      "step": 14190
    },
    {
      "epoch": 0.8144471857249412,
      "grad_norm": 0.3046875,
      "learning_rate": 2.0252978146261557e-05,
      "loss": 0.9738,
      "step": 14195
    },
    {
      "epoch": 0.8147340639164611,
      "grad_norm": 0.275390625,
      "learning_rate": 2.0192597829386217e-05,
      "loss": 1.0262,
      "step": 14200
    },
    {
      "epoch": 0.8150209421079809,
      "grad_norm": 0.26953125,
      "learning_rate": 2.0132297543552757e-05,
      "loss": 0.9072,
      "step": 14205
    },
    {
      "epoch": 0.8153078202995009,
      "grad_norm": 0.2490234375,
      "learning_rate": 2.0072077349230357e-05,
      "loss": 0.8962,
      "step": 14210
    },
    {
      "epoch": 0.8155946984910207,
      "grad_norm": 0.267578125,
      "learning_rate": 2.0011937306808048e-05,
      "loss": 0.9166,
      "step": 14215
    },
    {
      "epoch": 0.8158815766825406,
      "grad_norm": 0.265625,
      "learning_rate": 1.9951877476594382e-05,
      "loss": 0.8863,
      "step": 14220
    },
    {
      "epoch": 0.8161684548740604,
      "grad_norm": 0.2734375,
      "learning_rate": 1.9891897918817472e-05,
      "loss": 0.9848,
      "step": 14225
    },
    {
      "epoch": 0.8164553330655804,
      "grad_norm": 0.259765625,
      "learning_rate": 1.9831998693624964e-05,
      "loss": 0.9138,
      "step": 14230
    },
    {
      "epoch": 0.8167422112571002,
      "grad_norm": 0.251953125,
      "learning_rate": 1.977217986108393e-05,
      "loss": 0.9695,
      "step": 14235
    },
    {
      "epoch": 0.8170290894486201,
      "grad_norm": 0.279296875,
      "learning_rate": 1.9712441481180833e-05,
      "loss": 0.9367,
      "step": 14240
    },
    {
      "epoch": 0.81731596764014,
      "grad_norm": 0.263671875,
      "learning_rate": 1.9652783613821435e-05,
      "loss": 0.9687,
      "step": 14245
    },
    {
      "epoch": 0.8176028458316599,
      "grad_norm": 0.2734375,
      "learning_rate": 1.9593206318830815e-05,
      "loss": 0.9353,
      "step": 14250
    },
    {
      "epoch": 0.8178897240231797,
      "grad_norm": 0.255859375,
      "learning_rate": 1.9533709655953235e-05,
      "loss": 0.9681,
      "step": 14255
    },
    {
      "epoch": 0.8181766022146997,
      "grad_norm": 0.25390625,
      "learning_rate": 1.9474293684851984e-05,
      "loss": 0.9158,
      "step": 14260
    },
    {
      "epoch": 0.8184634804062195,
      "grad_norm": 0.25,
      "learning_rate": 1.9414958465109635e-05,
      "loss": 0.9798,
      "step": 14265
    },
    {
      "epoch": 0.8187503585977394,
      "grad_norm": 0.271484375,
      "learning_rate": 1.9355704056227632e-05,
      "loss": 1.0429,
      "step": 14270
    },
    {
      "epoch": 0.8190372367892593,
      "grad_norm": 0.2890625,
      "learning_rate": 1.9296530517626445e-05,
      "loss": 0.8955,
      "step": 14275
    },
    {
      "epoch": 0.8193241149807792,
      "grad_norm": 0.2578125,
      "learning_rate": 1.9237437908645417e-05,
      "loss": 0.9144,
      "step": 14280
    },
    {
      "epoch": 0.819610993172299,
      "grad_norm": 0.26171875,
      "learning_rate": 1.917842628854275e-05,
      "loss": 0.9887,
      "step": 14285
    },
    {
      "epoch": 0.819897871363819,
      "grad_norm": 0.263671875,
      "learning_rate": 1.9119495716495417e-05,
      "loss": 0.9999,
      "step": 14290
    },
    {
      "epoch": 0.8201847495553388,
      "grad_norm": 0.2578125,
      "learning_rate": 1.9060646251599157e-05,
      "loss": 0.9903,
      "step": 14295
    },
    {
      "epoch": 0.8204716277468587,
      "grad_norm": 0.265625,
      "learning_rate": 1.900187795286834e-05,
      "loss": 0.9522,
      "step": 14300
    },
    {
      "epoch": 0.8207585059383785,
      "grad_norm": 0.28125,
      "learning_rate": 1.8943190879235972e-05,
      "loss": 0.9792,
      "step": 14305
    },
    {
      "epoch": 0.8210453841298985,
      "grad_norm": 0.265625,
      "learning_rate": 1.8884585089553498e-05,
      "loss": 0.961,
      "step": 14310
    },
    {
      "epoch": 0.8213322623214183,
      "grad_norm": 0.2734375,
      "learning_rate": 1.8826060642591005e-05,
      "loss": 0.9782,
      "step": 14315
    },
    {
      "epoch": 0.8216191405129382,
      "grad_norm": 0.2412109375,
      "learning_rate": 1.8767617597036925e-05,
      "loss": 0.8703,
      "step": 14320
    },
    {
      "epoch": 0.8219060187044581,
      "grad_norm": 0.302734375,
      "learning_rate": 1.8709256011498076e-05,
      "loss": 0.9159,
      "step": 14325
    },
    {
      "epoch": 0.822192896895978,
      "grad_norm": 0.2578125,
      "learning_rate": 1.865097594449958e-05,
      "loss": 0.8634,
      "step": 14330
    },
    {
      "epoch": 0.8224797750874978,
      "grad_norm": 0.267578125,
      "learning_rate": 1.8592777454484835e-05,
      "loss": 0.9098,
      "step": 14335
    },
    {
      "epoch": 0.8227666532790178,
      "grad_norm": 0.26171875,
      "learning_rate": 1.8534660599815368e-05,
      "loss": 0.9266,
      "step": 14340
    },
    {
      "epoch": 0.8230535314705376,
      "grad_norm": 0.27734375,
      "learning_rate": 1.8476625438770944e-05,
      "loss": 0.9914,
      "step": 14345
    },
    {
      "epoch": 0.8233404096620575,
      "grad_norm": 0.259765625,
      "learning_rate": 1.8418672029549355e-05,
      "loss": 0.977,
      "step": 14350
    },
    {
      "epoch": 0.8236272878535774,
      "grad_norm": 0.2578125,
      "learning_rate": 1.836080043026638e-05,
      "loss": 0.9636,
      "step": 14355
    },
    {
      "epoch": 0.8239141660450973,
      "grad_norm": 0.2578125,
      "learning_rate": 1.8303010698955804e-05,
      "loss": 0.9023,
      "step": 14360
    },
    {
      "epoch": 0.8242010442366171,
      "grad_norm": 0.271484375,
      "learning_rate": 1.8245302893569295e-05,
      "loss": 0.9805,
      "step": 14365
    },
    {
      "epoch": 0.8244879224281371,
      "grad_norm": 0.265625,
      "learning_rate": 1.818767707197636e-05,
      "loss": 0.8912,
      "step": 14370
    },
    {
      "epoch": 0.8247748006196569,
      "grad_norm": 0.251953125,
      "learning_rate": 1.8130133291964323e-05,
      "loss": 0.9084,
      "step": 14375
    },
    {
      "epoch": 0.8250616788111768,
      "grad_norm": 0.25390625,
      "learning_rate": 1.80726716112382e-05,
      "loss": 0.8483,
      "step": 14380
    },
    {
      "epoch": 0.8253485570026966,
      "grad_norm": 0.265625,
      "learning_rate": 1.80152920874207e-05,
      "loss": 0.8901,
      "step": 14385
    },
    {
      "epoch": 0.8256354351942166,
      "grad_norm": 0.259765625,
      "learning_rate": 1.7957994778052112e-05,
      "loss": 0.8935,
      "step": 14390
    },
    {
      "epoch": 0.8259223133857364,
      "grad_norm": 0.27734375,
      "learning_rate": 1.7900779740590344e-05,
      "loss": 1.0121,
      "step": 14395
    },
    {
      "epoch": 0.8262091915772563,
      "grad_norm": 0.263671875,
      "learning_rate": 1.784364703241076e-05,
      "loss": 0.952,
      "step": 14400
    },
    {
      "epoch": 0.8264960697687762,
      "grad_norm": 0.2578125,
      "learning_rate": 1.778659671080616e-05,
      "loss": 0.9186,
      "step": 14405
    },
    {
      "epoch": 0.826782947960296,
      "grad_norm": 0.27734375,
      "learning_rate": 1.7729628832986722e-05,
      "loss": 1.0093,
      "step": 14410
    },
    {
      "epoch": 0.8270698261518159,
      "grad_norm": 0.27734375,
      "learning_rate": 1.7672743456079976e-05,
      "loss": 0.9438,
      "step": 14415
    },
    {
      "epoch": 0.8273567043433359,
      "grad_norm": 0.2890625,
      "learning_rate": 1.761594063713068e-05,
      "loss": 0.965,
      "step": 14420
    },
    {
      "epoch": 0.8276435825348557,
      "grad_norm": 0.271484375,
      "learning_rate": 1.75592204331009e-05,
      "loss": 0.9737,
      "step": 14425
    },
    {
      "epoch": 0.8279304607263755,
      "grad_norm": 0.263671875,
      "learning_rate": 1.7502582900869702e-05,
      "loss": 0.8891,
      "step": 14430
    },
    {
      "epoch": 0.8282173389178955,
      "grad_norm": 0.265625,
      "learning_rate": 1.744602809723337e-05,
      "loss": 0.9375,
      "step": 14435
    },
    {
      "epoch": 0.8285042171094154,
      "grad_norm": 0.26953125,
      "learning_rate": 1.7389556078905144e-05,
      "loss": 1.0189,
      "step": 14440
    },
    {
      "epoch": 0.8287910953009352,
      "grad_norm": 0.251953125,
      "learning_rate": 1.7333166902515363e-05,
      "loss": 0.8845,
      "step": 14445
    },
    {
      "epoch": 0.8290779734924552,
      "grad_norm": 0.265625,
      "learning_rate": 1.727686062461118e-05,
      "loss": 0.9842,
      "step": 14450
    },
    {
      "epoch": 0.829364851683975,
      "grad_norm": 0.263671875,
      "learning_rate": 1.722063730165665e-05,
      "loss": 0.867,
      "step": 14455
    },
    {
      "epoch": 0.8296517298754948,
      "grad_norm": 0.259765625,
      "learning_rate": 1.7164496990032665e-05,
      "loss": 0.9246,
      "step": 14460
    },
    {
      "epoch": 0.8299386080670147,
      "grad_norm": 0.2578125,
      "learning_rate": 1.7108439746036842e-05,
      "loss": 0.8562,
      "step": 14465
    },
    {
      "epoch": 0.8302254862585347,
      "grad_norm": 0.263671875,
      "learning_rate": 1.7052465625883494e-05,
      "loss": 0.9346,
      "step": 14470
    },
    {
      "epoch": 0.8305123644500545,
      "grad_norm": 0.26953125,
      "learning_rate": 1.699657468570367e-05,
      "loss": 0.9476,
      "step": 14475
    },
    {
      "epoch": 0.8307992426415743,
      "grad_norm": 0.255859375,
      "learning_rate": 1.694076698154484e-05,
      "loss": 0.9324,
      "step": 14480
    },
    {
      "epoch": 0.8310861208330943,
      "grad_norm": 0.25390625,
      "learning_rate": 1.6885042569371146e-05,
      "loss": 1.0023,
      "step": 14485
    },
    {
      "epoch": 0.8313729990246141,
      "grad_norm": 0.26953125,
      "learning_rate": 1.68294015050631e-05,
      "loss": 0.9223,
      "step": 14490
    },
    {
      "epoch": 0.831659877216134,
      "grad_norm": 0.265625,
      "learning_rate": 1.677384384441776e-05,
      "loss": 0.8871,
      "step": 14495
    },
    {
      "epoch": 0.831946755407654,
      "grad_norm": 0.3046875,
      "learning_rate": 1.6718369643148435e-05,
      "loss": 0.908,
      "step": 14500
    },
    {
      "epoch": 0.8322336335991738,
      "grad_norm": 0.263671875,
      "learning_rate": 1.6662978956884778e-05,
      "loss": 0.9022,
      "step": 14505
    },
    {
      "epoch": 0.8325205117906936,
      "grad_norm": 0.259765625,
      "learning_rate": 1.66076718411727e-05,
      "loss": 0.889,
      "step": 14510
    },
    {
      "epoch": 0.8328073899822136,
      "grad_norm": 0.263671875,
      "learning_rate": 1.6552448351474304e-05,
      "loss": 0.9465,
      "step": 14515
    },
    {
      "epoch": 0.8330942681737334,
      "grad_norm": 0.265625,
      "learning_rate": 1.649730854316779e-05,
      "loss": 0.9313,
      "step": 14520
    },
    {
      "epoch": 0.8333811463652533,
      "grad_norm": 0.263671875,
      "learning_rate": 1.644225247154756e-05,
      "loss": 0.8882,
      "step": 14525
    },
    {
      "epoch": 0.8336680245567732,
      "grad_norm": 0.263671875,
      "learning_rate": 1.6387280191823896e-05,
      "loss": 0.9555,
      "step": 14530
    },
    {
      "epoch": 0.8339549027482931,
      "grad_norm": 0.2578125,
      "learning_rate": 1.6332391759123123e-05,
      "loss": 0.9272,
      "step": 14535
    },
    {
      "epoch": 0.8342417809398129,
      "grad_norm": 0.283203125,
      "learning_rate": 1.6277587228487533e-05,
      "loss": 1.0969,
      "step": 14540
    },
    {
      "epoch": 0.8345286591313328,
      "grad_norm": 0.263671875,
      "learning_rate": 1.6222866654875213e-05,
      "loss": 1.0165,
      "step": 14545
    },
    {
      "epoch": 0.8348155373228527,
      "grad_norm": 0.298828125,
      "learning_rate": 1.6168230093160062e-05,
      "loss": 1.0093,
      "step": 14550
    },
    {
      "epoch": 0.8351024155143726,
      "grad_norm": 0.25,
      "learning_rate": 1.611367759813176e-05,
      "loss": 0.9414,
      "step": 14555
    },
    {
      "epoch": 0.8353892937058924,
      "grad_norm": 0.244140625,
      "learning_rate": 1.6059209224495676e-05,
      "loss": 0.9592,
      "step": 14560
    },
    {
      "epoch": 0.8356761718974124,
      "grad_norm": 0.25,
      "learning_rate": 1.6004825026872806e-05,
      "loss": 0.9276,
      "step": 14565
    },
    {
      "epoch": 0.8359630500889322,
      "grad_norm": 0.26171875,
      "learning_rate": 1.5950525059799714e-05,
      "loss": 0.9925,
      "step": 14570
    },
    {
      "epoch": 0.8362499282804521,
      "grad_norm": 0.28515625,
      "learning_rate": 1.5896309377728624e-05,
      "loss": 0.9718,
      "step": 14575
    },
    {
      "epoch": 0.836536806471972,
      "grad_norm": 0.271484375,
      "learning_rate": 1.5842178035027044e-05,
      "loss": 0.9949,
      "step": 14580
    },
    {
      "epoch": 0.8368236846634919,
      "grad_norm": 0.279296875,
      "learning_rate": 1.5788131085978032e-05,
      "loss": 0.9233,
      "step": 14585
    },
    {
      "epoch": 0.8371105628550117,
      "grad_norm": 0.259765625,
      "learning_rate": 1.573416858478003e-05,
      "loss": 0.9577,
      "step": 14590
    },
    {
      "epoch": 0.8373974410465317,
      "grad_norm": 0.263671875,
      "learning_rate": 1.568029058554672e-05,
      "loss": 0.9751,
      "step": 14595
    },
    {
      "epoch": 0.8376843192380515,
      "grad_norm": 0.2890625,
      "learning_rate": 1.5626497142307084e-05,
      "loss": 0.9637,
      "step": 14600
    },
    {
      "epoch": 0.8379711974295714,
      "grad_norm": 0.36328125,
      "learning_rate": 1.5572788309005315e-05,
      "loss": 0.9275,
      "step": 14605
    },
    {
      "epoch": 0.8382580756210913,
      "grad_norm": 0.294921875,
      "learning_rate": 1.5519164139500743e-05,
      "loss": 0.9635,
      "step": 14610
    },
    {
      "epoch": 0.8385449538126112,
      "grad_norm": 0.251953125,
      "learning_rate": 1.5465624687567816e-05,
      "loss": 0.9621,
      "step": 14615
    },
    {
      "epoch": 0.838831832004131,
      "grad_norm": 0.263671875,
      "learning_rate": 1.5412170006895986e-05,
      "loss": 0.8839,
      "step": 14620
    },
    {
      "epoch": 0.8391187101956509,
      "grad_norm": 0.26953125,
      "learning_rate": 1.5358800151089803e-05,
      "loss": 0.9448,
      "step": 14625
    },
    {
      "epoch": 0.8394055883871708,
      "grad_norm": 0.263671875,
      "learning_rate": 1.5305515173668594e-05,
      "loss": 0.9798,
      "step": 14630
    },
    {
      "epoch": 0.8396924665786907,
      "grad_norm": 0.267578125,
      "learning_rate": 1.5252315128066663e-05,
      "loss": 0.9255,
      "step": 14635
    },
    {
      "epoch": 0.8399793447702105,
      "grad_norm": 0.27734375,
      "learning_rate": 1.519920006763319e-05,
      "loss": 1.0368,
      "step": 14640
    },
    {
      "epoch": 0.8402662229617305,
      "grad_norm": 0.279296875,
      "learning_rate": 1.5146170045632035e-05,
      "loss": 0.9312,
      "step": 14645
    },
    {
      "epoch": 0.8405531011532503,
      "grad_norm": 0.255859375,
      "learning_rate": 1.5093225115241838e-05,
      "loss": 0.8995,
      "step": 14650
    },
    {
      "epoch": 0.8408399793447702,
      "grad_norm": 0.259765625,
      "learning_rate": 1.5040365329555895e-05,
      "loss": 0.9117,
      "step": 14655
    },
    {
      "epoch": 0.8411268575362901,
      "grad_norm": 0.283203125,
      "learning_rate": 1.4987590741582102e-05,
      "loss": 0.9783,
      "step": 14660
    },
    {
      "epoch": 0.84141373572781,
      "grad_norm": 0.2431640625,
      "learning_rate": 1.493490140424293e-05,
      "loss": 0.8676,
      "step": 14665
    },
    {
      "epoch": 0.8417006139193298,
      "grad_norm": 0.2470703125,
      "learning_rate": 1.4882297370375387e-05,
      "loss": 0.891,
      "step": 14670
    },
    {
      "epoch": 0.8419874921108498,
      "grad_norm": 0.294921875,
      "learning_rate": 1.4829778692730944e-05,
      "loss": 0.9868,
      "step": 14675
    },
    {
      "epoch": 0.8422743703023696,
      "grad_norm": 0.28515625,
      "learning_rate": 1.4777345423975375e-05,
      "loss": 0.9805,
      "step": 14680
    },
    {
      "epoch": 0.8425612484938895,
      "grad_norm": 0.265625,
      "learning_rate": 1.4724997616688907e-05,
      "loss": 0.9984,
      "step": 14685
    },
    {
      "epoch": 0.8428481266854094,
      "grad_norm": 0.255859375,
      "learning_rate": 1.4672735323366061e-05,
      "loss": 0.9138,
      "step": 14690
    },
    {
      "epoch": 0.8431350048769293,
      "grad_norm": 0.267578125,
      "learning_rate": 1.4620558596415578e-05,
      "loss": 0.8917,
      "step": 14695
    },
    {
      "epoch": 0.8434218830684491,
      "grad_norm": 0.2734375,
      "learning_rate": 1.4568467488160386e-05,
      "loss": 0.8793,
      "step": 14700
    },
    {
      "epoch": 0.843708761259969,
      "grad_norm": 0.265625,
      "learning_rate": 1.4516462050837564e-05,
      "loss": 0.9941,
      "step": 14705
    },
    {
      "epoch": 0.8439956394514889,
      "grad_norm": 0.275390625,
      "learning_rate": 1.4464542336598274e-05,
      "loss": 0.9185,
      "step": 14710
    },
    {
      "epoch": 0.8442825176430088,
      "grad_norm": 0.267578125,
      "learning_rate": 1.4412708397507724e-05,
      "loss": 0.9379,
      "step": 14715
    },
    {
      "epoch": 0.8445693958345286,
      "grad_norm": 0.279296875,
      "learning_rate": 1.4360960285545133e-05,
      "loss": 0.9381,
      "step": 14720
    },
    {
      "epoch": 0.8448562740260486,
      "grad_norm": 0.275390625,
      "learning_rate": 1.4309298052603626e-05,
      "loss": 0.9562,
      "step": 14725
    },
    {
      "epoch": 0.8451431522175684,
      "grad_norm": 0.271484375,
      "learning_rate": 1.4257721750490127e-05,
      "loss": 0.9234,
      "step": 14730
    },
    {
      "epoch": 0.8454300304090883,
      "grad_norm": 0.2490234375,
      "learning_rate": 1.4206231430925553e-05,
      "loss": 0.9905,
      "step": 14735
    },
    {
      "epoch": 0.8457169086006082,
      "grad_norm": 0.259765625,
      "learning_rate": 1.4154827145544492e-05,
      "loss": 0.9166,
      "step": 14740
    },
    {
      "epoch": 0.8460037867921281,
      "grad_norm": 0.25390625,
      "learning_rate": 1.410350894589525e-05,
      "loss": 0.9698,
      "step": 14745
    },
    {
      "epoch": 0.8462906649836479,
      "grad_norm": 0.271484375,
      "learning_rate": 1.4052276883439864e-05,
      "loss": 0.9123,
      "step": 14750
    },
    {
      "epoch": 0.8465775431751679,
      "grad_norm": 0.2734375,
      "learning_rate": 1.4001131009553936e-05,
      "loss": 0.9739,
      "step": 14755
    },
    {
      "epoch": 0.8468644213666877,
      "grad_norm": 0.25390625,
      "learning_rate": 1.3950071375526685e-05,
      "loss": 1.0007,
      "step": 14760
    },
    {
      "epoch": 0.8471512995582076,
      "grad_norm": 0.26953125,
      "learning_rate": 1.3899098032560787e-05,
      "loss": 0.8987,
      "step": 14765
    },
    {
      "epoch": 0.8474381777497275,
      "grad_norm": 0.263671875,
      "learning_rate": 1.3848211031772473e-05,
      "loss": 0.9215,
      "step": 14770
    },
    {
      "epoch": 0.8477250559412474,
      "grad_norm": 0.25390625,
      "learning_rate": 1.3797410424191337e-05,
      "loss": 0.9238,
      "step": 14775
    },
    {
      "epoch": 0.8480119341327672,
      "grad_norm": 0.265625,
      "learning_rate": 1.3746696260760295e-05,
      "loss": 0.916,
      "step": 14780
    },
    {
      "epoch": 0.848298812324287,
      "grad_norm": 0.283203125,
      "learning_rate": 1.3696068592335676e-05,
      "loss": 0.9393,
      "step": 14785
    },
    {
      "epoch": 0.848585690515807,
      "grad_norm": 0.244140625,
      "learning_rate": 1.3645527469686992e-05,
      "loss": 0.9651,
      "step": 14790
    },
    {
      "epoch": 0.8488725687073269,
      "grad_norm": 0.259765625,
      "learning_rate": 1.3595072943497011e-05,
      "loss": 0.9529,
      "step": 14795
    },
    {
      "epoch": 0.8491594468988467,
      "grad_norm": 0.25390625,
      "learning_rate": 1.3544705064361629e-05,
      "loss": 0.9006,
      "step": 14800
    },
    {
      "epoch": 0.8494463250903667,
      "grad_norm": 0.26171875,
      "learning_rate": 1.3494423882789874e-05,
      "loss": 0.9573,
      "step": 14805
    },
    {
      "epoch": 0.8497332032818865,
      "grad_norm": 0.314453125,
      "learning_rate": 1.3444229449203827e-05,
      "loss": 1.0288,
      "step": 14810
    },
    {
      "epoch": 0.8500200814734064,
      "grad_norm": 0.259765625,
      "learning_rate": 1.3394121813938554e-05,
      "loss": 0.9184,
      "step": 14815
    },
    {
      "epoch": 0.8503069596649263,
      "grad_norm": 0.267578125,
      "learning_rate": 1.3344101027242161e-05,
      "loss": 0.9045,
      "step": 14820
    },
    {
      "epoch": 0.8505938378564462,
      "grad_norm": 0.255859375,
      "learning_rate": 1.3294167139275593e-05,
      "loss": 0.9593,
      "step": 14825
    },
    {
      "epoch": 0.850880716047966,
      "grad_norm": 0.26171875,
      "learning_rate": 1.3244320200112592e-05,
      "loss": 0.9066,
      "step": 14830
    },
    {
      "epoch": 0.851167594239486,
      "grad_norm": 0.2490234375,
      "learning_rate": 1.3194560259739863e-05,
      "loss": 0.9236,
      "step": 14835
    },
    {
      "epoch": 0.8514544724310058,
      "grad_norm": 0.25,
      "learning_rate": 1.3144887368056757e-05,
      "loss": 0.923,
      "step": 14840
    },
    {
      "epoch": 0.8517413506225257,
      "grad_norm": 0.2578125,
      "learning_rate": 1.3095301574875363e-05,
      "loss": 0.9765,
      "step": 14845
    },
    {
      "epoch": 0.8520282288140456,
      "grad_norm": 0.279296875,
      "learning_rate": 1.3045802929920414e-05,
      "loss": 0.9518,
      "step": 14850
    },
    {
      "epoch": 0.8523151070055655,
      "grad_norm": 0.3125,
      "learning_rate": 1.2996391482829273e-05,
      "loss": 0.8879,
      "step": 14855
    },
    {
      "epoch": 0.8526019851970853,
      "grad_norm": 0.26953125,
      "learning_rate": 1.2947067283151837e-05,
      "loss": 0.8637,
      "step": 14860
    },
    {
      "epoch": 0.8528888633886051,
      "grad_norm": 0.26171875,
      "learning_rate": 1.289783038035055e-05,
      "loss": 1.0178,
      "step": 14865
    },
    {
      "epoch": 0.8531757415801251,
      "grad_norm": 0.29296875,
      "learning_rate": 1.2848680823800275e-05,
      "loss": 0.8906,
      "step": 14870
    },
    {
      "epoch": 0.853462619771645,
      "grad_norm": 0.2578125,
      "learning_rate": 1.2799618662788315e-05,
      "loss": 0.9234,
      "step": 14875
    },
    {
      "epoch": 0.8537494979631648,
      "grad_norm": 0.283203125,
      "learning_rate": 1.2750643946514252e-05,
      "loss": 0.9215,
      "step": 14880
    },
    {
      "epoch": 0.8540363761546848,
      "grad_norm": 0.259765625,
      "learning_rate": 1.2701756724090108e-05,
      "loss": 0.893,
      "step": 14885
    },
    {
      "epoch": 0.8543232543462046,
      "grad_norm": 0.26171875,
      "learning_rate": 1.2652957044540082e-05,
      "loss": 0.935,
      "step": 14890
    },
    {
      "epoch": 0.8546101325377244,
      "grad_norm": 0.2470703125,
      "learning_rate": 1.2604244956800593e-05,
      "loss": 0.9196,
      "step": 14895
    },
    {
      "epoch": 0.8548970107292444,
      "grad_norm": 0.255859375,
      "learning_rate": 1.2555620509720233e-05,
      "loss": 0.9674,
      "step": 14900
    },
    {
      "epoch": 0.8551838889207642,
      "grad_norm": 0.2890625,
      "learning_rate": 1.2507083752059723e-05,
      "loss": 0.9899,
      "step": 14905
    },
    {
      "epoch": 0.8554707671122841,
      "grad_norm": 0.31640625,
      "learning_rate": 1.2458634732491781e-05,
      "loss": 0.9745,
      "step": 14910
    },
    {
      "epoch": 0.855757645303804,
      "grad_norm": 0.279296875,
      "learning_rate": 1.2410273499601266e-05,
      "loss": 0.9971,
      "step": 14915
    },
    {
      "epoch": 0.8560445234953239,
      "grad_norm": 0.26171875,
      "learning_rate": 1.2362000101884885e-05,
      "loss": 0.9514,
      "step": 14920
    },
    {
      "epoch": 0.8563314016868437,
      "grad_norm": 0.26171875,
      "learning_rate": 1.2313814587751316e-05,
      "loss": 0.9545,
      "step": 14925
    },
    {
      "epoch": 0.8566182798783637,
      "grad_norm": 0.265625,
      "learning_rate": 1.2265717005521115e-05,
      "loss": 0.9368,
      "step": 14930
    },
    {
      "epoch": 0.8569051580698835,
      "grad_norm": 0.26953125,
      "learning_rate": 1.2217707403426627e-05,
      "loss": 0.9405,
      "step": 14935
    },
    {
      "epoch": 0.8571920362614034,
      "grad_norm": 0.263671875,
      "learning_rate": 1.2169785829612001e-05,
      "loss": 0.9264,
      "step": 14940
    },
    {
      "epoch": 0.8574789144529232,
      "grad_norm": 0.27734375,
      "learning_rate": 1.2121952332133091e-05,
      "loss": 1.0209,
      "step": 14945
    },
    {
      "epoch": 0.8577657926444432,
      "grad_norm": 0.26953125,
      "learning_rate": 1.2074206958957447e-05,
      "loss": 0.9528,
      "step": 14950
    },
    {
      "epoch": 0.858052670835963,
      "grad_norm": 0.2333984375,
      "learning_rate": 1.2026549757964212e-05,
      "loss": 0.8766,
      "step": 14955
    },
    {
      "epoch": 0.8583395490274829,
      "grad_norm": 0.27734375,
      "learning_rate": 1.1978980776944137e-05,
      "loss": 0.9347,
      "step": 14960
    },
    {
      "epoch": 0.8586264272190028,
      "grad_norm": 0.259765625,
      "learning_rate": 1.1931500063599543e-05,
      "loss": 0.9569,
      "step": 14965
    },
    {
      "epoch": 0.8589133054105227,
      "grad_norm": 0.255859375,
      "learning_rate": 1.1884107665544164e-05,
      "loss": 0.9079,
      "step": 14970
    },
    {
      "epoch": 0.8592001836020425,
      "grad_norm": 0.255859375,
      "learning_rate": 1.1836803630303206e-05,
      "loss": 0.9977,
      "step": 14975
    },
    {
      "epoch": 0.8594870617935625,
      "grad_norm": 0.275390625,
      "learning_rate": 1.1789588005313257e-05,
      "loss": 0.9528,
      "step": 14980
    },
    {
      "epoch": 0.8597739399850823,
      "grad_norm": 0.279296875,
      "learning_rate": 1.1742460837922265e-05,
      "loss": 0.936,
      "step": 14985
    },
    {
      "epoch": 0.8600608181766022,
      "grad_norm": 0.2314453125,
      "learning_rate": 1.1695422175389447e-05,
      "loss": 0.9134,
      "step": 14990
    },
    {
      "epoch": 0.8603476963681221,
      "grad_norm": 0.2734375,
      "learning_rate": 1.1648472064885286e-05,
      "loss": 0.991,
      "step": 14995
    },
    {
      "epoch": 0.860634574559642,
      "grad_norm": 0.25,
      "learning_rate": 1.160161055349146e-05,
      "loss": 0.8948,
      "step": 15000
    },
    {
      "epoch": 0.8609214527511618,
      "grad_norm": 0.27734375,
      "learning_rate": 1.1554837688200793e-05,
      "loss": 0.9878,
      "step": 15005
    },
    {
      "epoch": 0.8612083309426818,
      "grad_norm": 0.2451171875,
      "learning_rate": 1.1508153515917196e-05,
      "loss": 0.9045,
      "step": 15010
    },
    {
      "epoch": 0.8614952091342016,
      "grad_norm": 0.248046875,
      "learning_rate": 1.1461558083455704e-05,
      "loss": 0.9272,
      "step": 15015
    },
    {
      "epoch": 0.8617820873257215,
      "grad_norm": 0.2412109375,
      "learning_rate": 1.1415051437542302e-05,
      "loss": 0.9048,
      "step": 15020
    },
    {
      "epoch": 0.8620689655172413,
      "grad_norm": 0.28125,
      "learning_rate": 1.1368633624813974e-05,
      "loss": 0.9666,
      "step": 15025
    },
    {
      "epoch": 0.8623558437087613,
      "grad_norm": 0.2431640625,
      "learning_rate": 1.1322304691818575e-05,
      "loss": 1.013,
      "step": 15030
    },
    {
      "epoch": 0.8626427219002811,
      "grad_norm": 0.267578125,
      "learning_rate": 1.1276064685014886e-05,
      "loss": 0.9382,
      "step": 15035
    },
    {
      "epoch": 0.862929600091801,
      "grad_norm": 0.263671875,
      "learning_rate": 1.1229913650772472e-05,
      "loss": 0.9891,
      "step": 15040
    },
    {
      "epoch": 0.8632164782833209,
      "grad_norm": 0.267578125,
      "learning_rate": 1.1183851635371734e-05,
      "loss": 0.9336,
      "step": 15045
    },
    {
      "epoch": 0.8635033564748408,
      "grad_norm": 0.3046875,
      "learning_rate": 1.1137878685003722e-05,
      "loss": 0.9662,
      "step": 15050
    },
    {
      "epoch": 0.8637902346663606,
      "grad_norm": 0.2490234375,
      "learning_rate": 1.1091994845770226e-05,
      "loss": 0.9321,
      "step": 15055
    },
    {
      "epoch": 0.8640771128578806,
      "grad_norm": 0.259765625,
      "learning_rate": 1.104620016368364e-05,
      "loss": 1.0532,
      "step": 15060
    },
    {
      "epoch": 0.8643639910494004,
      "grad_norm": 0.26953125,
      "learning_rate": 1.1000494684667017e-05,
      "loss": 0.9349,
      "step": 15065
    },
    {
      "epoch": 0.8646508692409203,
      "grad_norm": 0.25,
      "learning_rate": 1.0954878454553908e-05,
      "loss": 0.8553,
      "step": 15070
    },
    {
      "epoch": 0.8649377474324402,
      "grad_norm": 0.265625,
      "learning_rate": 1.0909351519088352e-05,
      "loss": 0.9111,
      "step": 15075
    },
    {
      "epoch": 0.8652246256239601,
      "grad_norm": 0.263671875,
      "learning_rate": 1.0863913923924862e-05,
      "loss": 0.9779,
      "step": 15080
    },
    {
      "epoch": 0.8655115038154799,
      "grad_norm": 0.298828125,
      "learning_rate": 1.081856571462837e-05,
      "loss": 0.9526,
      "step": 15085
    },
    {
      "epoch": 0.8657983820069999,
      "grad_norm": 0.294921875,
      "learning_rate": 1.0773306936674133e-05,
      "loss": 0.9902,
      "step": 15090
    },
    {
      "epoch": 0.8660852601985197,
      "grad_norm": 0.369140625,
      "learning_rate": 1.0728137635447821e-05,
      "loss": 1.0987,
      "step": 15095
    },
    {
      "epoch": 0.8663721383900396,
      "grad_norm": 0.2412109375,
      "learning_rate": 1.0683057856245259e-05,
      "loss": 0.9029,
      "step": 15100
    },
    {
      "epoch": 0.8666590165815594,
      "grad_norm": 0.255859375,
      "learning_rate": 1.0638067644272532e-05,
      "loss": 0.9804,
      "step": 15105
    },
    {
      "epoch": 0.8669458947730794,
      "grad_norm": 0.2578125,
      "learning_rate": 1.059316704464598e-05,
      "loss": 0.9642,
      "step": 15110
    },
    {
      "epoch": 0.8672327729645992,
      "grad_norm": 0.291015625,
      "learning_rate": 1.0548356102391999e-05,
      "loss": 0.9328,
      "step": 15115
    },
    {
      "epoch": 0.8675196511561191,
      "grad_norm": 0.26171875,
      "learning_rate": 1.0503634862447099e-05,
      "loss": 0.9363,
      "step": 15120
    },
    {
      "epoch": 0.867806529347639,
      "grad_norm": 0.265625,
      "learning_rate": 1.0459003369657849e-05,
      "loss": 0.9308,
      "step": 15125
    },
    {
      "epoch": 0.8680934075391589,
      "grad_norm": 0.26171875,
      "learning_rate": 1.0414461668780806e-05,
      "loss": 0.889,
      "step": 15130
    },
    {
      "epoch": 0.8683802857306787,
      "grad_norm": 0.27734375,
      "learning_rate": 1.0370009804482483e-05,
      "loss": 0.8834,
      "step": 15135
    },
    {
      "epoch": 0.8686671639221987,
      "grad_norm": 0.255859375,
      "learning_rate": 1.032564782133929e-05,
      "loss": 0.9435,
      "step": 15140
    },
    {
      "epoch": 0.8689540421137185,
      "grad_norm": 0.255859375,
      "learning_rate": 1.0281375763837598e-05,
      "loss": 0.932,
      "step": 15145
    },
    {
      "epoch": 0.8692409203052384,
      "grad_norm": 0.2451171875,
      "learning_rate": 1.0237193676373435e-05,
      "loss": 0.9304,
      "step": 15150
    },
    {
      "epoch": 0.8695277984967583,
      "grad_norm": 0.267578125,
      "learning_rate": 1.019310160325273e-05,
      "loss": 0.9592,
      "step": 15155
    },
    {
      "epoch": 0.8698146766882782,
      "grad_norm": 0.26171875,
      "learning_rate": 1.0149099588691135e-05,
      "loss": 0.9544,
      "step": 15160
    },
    {
      "epoch": 0.870101554879798,
      "grad_norm": 0.275390625,
      "learning_rate": 1.0105187676813954e-05,
      "loss": 1.026,
      "step": 15165
    },
    {
      "epoch": 0.870388433071318,
      "grad_norm": 0.333984375,
      "learning_rate": 1.006136591165614e-05,
      "loss": 1.0247,
      "step": 15170
    },
    {
      "epoch": 0.8706753112628378,
      "grad_norm": 0.26171875,
      "learning_rate": 1.0017634337162275e-05,
      "loss": 0.9566,
      "step": 15175
    },
    {
      "epoch": 0.8709621894543577,
      "grad_norm": 0.26953125,
      "learning_rate": 9.973992997186465e-06,
      "loss": 0.9185,
      "step": 15180
    },
    {
      "epoch": 0.8712490676458775,
      "grad_norm": 0.265625,
      "learning_rate": 9.930441935492363e-06,
      "loss": 0.9289,
      "step": 15185
    },
    {
      "epoch": 0.8715359458373975,
      "grad_norm": 0.259765625,
      "learning_rate": 9.88698119575302e-06,
      "loss": 0.9605,
      "step": 15190
    },
    {
      "epoch": 0.8718228240289173,
      "grad_norm": 0.265625,
      "learning_rate": 9.843610821551053e-06,
      "loss": 0.9547,
      "step": 15195
    },
    {
      "epoch": 0.8721097022204372,
      "grad_norm": 0.265625,
      "learning_rate": 9.800330856378303e-06,
      "loss": 0.9544,
      "step": 15200
    },
    {
      "epoch": 0.8723965804119571,
      "grad_norm": 0.267578125,
      "learning_rate": 9.757141343636e-06,
      "loss": 0.9572,
      "step": 15205
    },
    {
      "epoch": 0.872683458603477,
      "grad_norm": 0.29296875,
      "learning_rate": 9.714042326634743e-06,
      "loss": 0.9438,
      "step": 15210
    },
    {
      "epoch": 0.8729703367949968,
      "grad_norm": 0.275390625,
      "learning_rate": 9.671033848594301e-06,
      "loss": 0.9435,
      "step": 15215
    },
    {
      "epoch": 0.8732572149865168,
      "grad_norm": 0.2578125,
      "learning_rate": 9.628115952643657e-06,
      "loss": 0.995,
      "step": 15220
    },
    {
      "epoch": 0.8735440931780366,
      "grad_norm": 0.35546875,
      "learning_rate": 9.585288681820992e-06,
      "loss": 0.997,
      "step": 15225
    },
    {
      "epoch": 0.8738309713695565,
      "grad_norm": 0.271484375,
      "learning_rate": 9.542552079073586e-06,
      "loss": 0.9715,
      "step": 15230
    },
    {
      "epoch": 0.8741178495610764,
      "grad_norm": 0.263671875,
      "learning_rate": 9.499906187257768e-06,
      "loss": 0.9445,
      "step": 15235
    },
    {
      "epoch": 0.8744047277525963,
      "grad_norm": 0.2578125,
      "learning_rate": 9.457351049138974e-06,
      "loss": 0.9681,
      "step": 15240
    },
    {
      "epoch": 0.8746916059441161,
      "grad_norm": 0.279296875,
      "learning_rate": 9.414886707391613e-06,
      "loss": 0.9653,
      "step": 15245
    },
    {
      "epoch": 0.8749784841356361,
      "grad_norm": 0.27734375,
      "learning_rate": 9.372513204598954e-06,
      "loss": 0.8629,
      "step": 15250
    },
    {
      "epoch": 0.8752653623271559,
      "grad_norm": 0.25,
      "learning_rate": 9.330230583253263e-06,
      "loss": 0.9324,
      "step": 15255
    },
    {
      "epoch": 0.8755522405186758,
      "grad_norm": 0.2578125,
      "learning_rate": 9.288038885755679e-06,
      "loss": 0.9968,
      "step": 15260
    },
    {
      "epoch": 0.8758391187101956,
      "grad_norm": 0.283203125,
      "learning_rate": 9.245938154416112e-06,
      "loss": 0.9753,
      "step": 15265
    },
    {
      "epoch": 0.8761259969017156,
      "grad_norm": 0.259765625,
      "learning_rate": 9.203928431453269e-06,
      "loss": 0.9914,
      "step": 15270
    },
    {
      "epoch": 0.8764128750932354,
      "grad_norm": 0.255859375,
      "learning_rate": 9.162009758994593e-06,
      "loss": 0.9888,
      "step": 15275
    },
    {
      "epoch": 0.8766997532847552,
      "grad_norm": 0.271484375,
      "learning_rate": 9.12018217907622e-06,
      "loss": 0.9092,
      "step": 15280
    },
    {
      "epoch": 0.8769866314762752,
      "grad_norm": 0.283203125,
      "learning_rate": 9.078445733642926e-06,
      "loss": 0.9634,
      "step": 15285
    },
    {
      "epoch": 0.877273509667795,
      "grad_norm": 0.2734375,
      "learning_rate": 9.036800464548157e-06,
      "loss": 0.9656,
      "step": 15290
    },
    {
      "epoch": 0.8775603878593149,
      "grad_norm": 0.2578125,
      "learning_rate": 8.995246413553871e-06,
      "loss": 0.8617,
      "step": 15295
    },
    {
      "epoch": 0.8778472660508349,
      "grad_norm": 0.341796875,
      "learning_rate": 8.953783622330515e-06,
      "loss": 0.925,
      "step": 15300
    },
    {
      "epoch": 0.8781341442423547,
      "grad_norm": 0.27734375,
      "learning_rate": 8.912412132457116e-06,
      "loss": 0.9452,
      "step": 15305
    },
    {
      "epoch": 0.8784210224338745,
      "grad_norm": 0.26171875,
      "learning_rate": 8.871131985421089e-06,
      "loss": 0.9806,
      "step": 15310
    },
    {
      "epoch": 0.8787079006253945,
      "grad_norm": 0.2578125,
      "learning_rate": 8.829943222618242e-06,
      "loss": 0.9902,
      "step": 15315
    },
    {
      "epoch": 0.8789947788169143,
      "grad_norm": 0.265625,
      "learning_rate": 8.788845885352782e-06,
      "loss": 0.8548,
      "step": 15320
    },
    {
      "epoch": 0.8792816570084342,
      "grad_norm": 0.248046875,
      "learning_rate": 8.747840014837194e-06,
      "loss": 0.9579,
      "step": 15325
    },
    {
      "epoch": 0.8795685351999541,
      "grad_norm": 0.275390625,
      "learning_rate": 8.706925652192255e-06,
      "loss": 0.9575,
      "step": 15330
    },
    {
      "epoch": 0.879855413391474,
      "grad_norm": 0.2734375,
      "learning_rate": 8.666102838446976e-06,
      "loss": 0.9764,
      "step": 15335
    },
    {
      "epoch": 0.8801422915829938,
      "grad_norm": 0.259765625,
      "learning_rate": 8.625371614538591e-06,
      "loss": 0.9222,
      "step": 15340
    },
    {
      "epoch": 0.8804291697745137,
      "grad_norm": 0.2451171875,
      "learning_rate": 8.584732021312469e-06,
      "loss": 0.9307,
      "step": 15345
    },
    {
      "epoch": 0.8807160479660336,
      "grad_norm": 0.310546875,
      "learning_rate": 8.544184099522024e-06,
      "loss": 0.9791,
      "step": 15350
    },
    {
      "epoch": 0.8810029261575535,
      "grad_norm": 0.271484375,
      "learning_rate": 8.50372788982886e-06,
      "loss": 0.9455,
      "step": 15355
    },
    {
      "epoch": 0.8812898043490733,
      "grad_norm": 0.25390625,
      "learning_rate": 8.46336343280254e-06,
      "loss": 0.8782,
      "step": 15360
    },
    {
      "epoch": 0.8815766825405933,
      "grad_norm": 0.279296875,
      "learning_rate": 8.423090768920628e-06,
      "loss": 0.9239,
      "step": 15365
    },
    {
      "epoch": 0.8818635607321131,
      "grad_norm": 0.279296875,
      "learning_rate": 8.38290993856865e-06,
      "loss": 0.9948,
      "step": 15370
    },
    {
      "epoch": 0.882150438923633,
      "grad_norm": 0.271484375,
      "learning_rate": 8.342820982040011e-06,
      "loss": 1.0173,
      "step": 15375
    },
    {
      "epoch": 0.8824373171151529,
      "grad_norm": 0.263671875,
      "learning_rate": 8.30282393953603e-06,
      "loss": 0.9433,
      "step": 15380
    },
    {
      "epoch": 0.8827241953066728,
      "grad_norm": 0.251953125,
      "learning_rate": 8.262918851165813e-06,
      "loss": 0.8967,
      "step": 15385
    },
    {
      "epoch": 0.8830110734981926,
      "grad_norm": 0.26171875,
      "learning_rate": 8.223105756946292e-06,
      "loss": 0.9086,
      "step": 15390
    },
    {
      "epoch": 0.8832979516897126,
      "grad_norm": 0.25,
      "learning_rate": 8.183384696802132e-06,
      "loss": 0.9384,
      "step": 15395
    },
    {
      "epoch": 0.8835848298812324,
      "grad_norm": 0.279296875,
      "learning_rate": 8.143755710565648e-06,
      "loss": 0.9808,
      "step": 15400
    },
    {
      "epoch": 0.8838717080727523,
      "grad_norm": 0.279296875,
      "learning_rate": 8.10421883797694e-06,
      "loss": 0.9723,
      "step": 15405
    },
    {
      "epoch": 0.8841585862642722,
      "grad_norm": 0.2353515625,
      "learning_rate": 8.064774118683638e-06,
      "loss": 0.8979,
      "step": 15410
    },
    {
      "epoch": 0.8844454644557921,
      "grad_norm": 0.259765625,
      "learning_rate": 8.025421592241012e-06,
      "loss": 0.9903,
      "step": 15415
    },
    {
      "epoch": 0.8847323426473119,
      "grad_norm": 0.275390625,
      "learning_rate": 7.98616129811185e-06,
      "loss": 0.9551,
      "step": 15420
    },
    {
      "epoch": 0.8850192208388318,
      "grad_norm": 0.28515625,
      "learning_rate": 7.94699327566647e-06,
      "loss": 0.995,
      "step": 15425
    },
    {
      "epoch": 0.8853060990303517,
      "grad_norm": 0.267578125,
      "learning_rate": 7.907917564182631e-06,
      "loss": 0.8506,
      "step": 15430
    },
    {
      "epoch": 0.8855929772218716,
      "grad_norm": 0.259765625,
      "learning_rate": 7.86893420284559e-06,
      "loss": 0.8804,
      "step": 15435
    },
    {
      "epoch": 0.8858798554133914,
      "grad_norm": 0.287109375,
      "learning_rate": 7.830043230747918e-06,
      "loss": 0.9101,
      "step": 15440
    },
    {
      "epoch": 0.8861667336049114,
      "grad_norm": 0.265625,
      "learning_rate": 7.791244686889588e-06,
      "loss": 0.9703,
      "step": 15445
    },
    {
      "epoch": 0.8864536117964312,
      "grad_norm": 0.271484375,
      "learning_rate": 7.752538610177817e-06,
      "loss": 0.922,
      "step": 15450
    },
    {
      "epoch": 0.8867404899879511,
      "grad_norm": 0.255859375,
      "learning_rate": 7.713925039427206e-06,
      "loss": 1.0184,
      "step": 15455
    },
    {
      "epoch": 0.887027368179471,
      "grad_norm": 0.267578125,
      "learning_rate": 7.67540401335951e-06,
      "loss": 0.9566,
      "step": 15460
    },
    {
      "epoch": 0.8873142463709909,
      "grad_norm": 0.2734375,
      "learning_rate": 7.636975570603689e-06,
      "loss": 0.9193,
      "step": 15465
    },
    {
      "epoch": 0.8876011245625107,
      "grad_norm": 0.26953125,
      "learning_rate": 7.5986397496958796e-06,
      "loss": 0.9882,
      "step": 15470
    },
    {
      "epoch": 0.8878880027540307,
      "grad_norm": 0.27734375,
      "learning_rate": 7.560396589079322e-06,
      "loss": 0.9726,
      "step": 15475
    },
    {
      "epoch": 0.8881748809455505,
      "grad_norm": 0.2578125,
      "learning_rate": 7.522246127104348e-06,
      "loss": 0.9203,
      "step": 15480
    },
    {
      "epoch": 0.8884617591370704,
      "grad_norm": 0.236328125,
      "learning_rate": 7.484188402028336e-06,
      "loss": 0.9681,
      "step": 15485
    },
    {
      "epoch": 0.8887486373285903,
      "grad_norm": 0.26953125,
      "learning_rate": 7.446223452015644e-06,
      "loss": 0.972,
      "step": 15490
    },
    {
      "epoch": 0.8890355155201102,
      "grad_norm": 0.265625,
      "learning_rate": 7.40835131513764e-06,
      "loss": 1.0296,
      "step": 15495
    },
    {
      "epoch": 0.88932239371163,
      "grad_norm": 0.25390625,
      "learning_rate": 7.3705720293725245e-06,
      "loss": 0.9597,
      "step": 15500
    },
    {
      "epoch": 0.8896092719031499,
      "grad_norm": 0.255859375,
      "learning_rate": 7.332885632605513e-06,
      "loss": 0.8605,
      "step": 15505
    },
    {
      "epoch": 0.8898961500946698,
      "grad_norm": 0.248046875,
      "learning_rate": 7.295292162628575e-06,
      "loss": 0.9628,
      "step": 15510
    },
    {
      "epoch": 0.8901830282861897,
      "grad_norm": 0.271484375,
      "learning_rate": 7.257791657140545e-06,
      "loss": 0.9441,
      "step": 15515
    },
    {
      "epoch": 0.8904699064777095,
      "grad_norm": 0.26171875,
      "learning_rate": 7.220384153746995e-06,
      "loss": 0.9413,
      "step": 15520
    },
    {
      "epoch": 0.8907567846692295,
      "grad_norm": 0.26171875,
      "learning_rate": 7.183069689960265e-06,
      "loss": 0.9683,
      "step": 15525
    },
    {
      "epoch": 0.8910436628607493,
      "grad_norm": 0.2890625,
      "learning_rate": 7.145848303199365e-06,
      "loss": 0.9836,
      "step": 15530
    },
    {
      "epoch": 0.8913305410522692,
      "grad_norm": 0.26171875,
      "learning_rate": 7.108720030790028e-06,
      "loss": 0.9379,
      "step": 15535
    },
    {
      "epoch": 0.8916174192437891,
      "grad_norm": 0.291015625,
      "learning_rate": 7.071684909964526e-06,
      "loss": 0.9567,
      "step": 15540
    },
    {
      "epoch": 0.891904297435309,
      "grad_norm": 0.26171875,
      "learning_rate": 7.034742977861786e-06,
      "loss": 0.9171,
      "step": 15545
    },
    {
      "epoch": 0.8921911756268288,
      "grad_norm": 0.26953125,
      "learning_rate": 6.99789427152725e-06,
      "loss": 0.9089,
      "step": 15550
    },
    {
      "epoch": 0.8924780538183488,
      "grad_norm": 0.251953125,
      "learning_rate": 6.9611388279128835e-06,
      "loss": 0.9761,
      "step": 15555
    },
    {
      "epoch": 0.8927649320098686,
      "grad_norm": 0.259765625,
      "learning_rate": 6.9244766838771235e-06,
      "loss": 0.9449,
      "step": 15560
    },
    {
      "epoch": 0.8930518102013885,
      "grad_norm": 0.2734375,
      "learning_rate": 6.887907876184862e-06,
      "loss": 0.9312,
      "step": 15565
    },
    {
      "epoch": 0.8933386883929084,
      "grad_norm": 0.25,
      "learning_rate": 6.851432441507377e-06,
      "loss": 0.9072,
      "step": 15570
    },
    {
      "epoch": 0.8936255665844283,
      "grad_norm": 0.263671875,
      "learning_rate": 6.8150504164223085e-06,
      "loss": 0.9976,
      "step": 15575
    },
    {
      "epoch": 0.8939124447759481,
      "grad_norm": 0.27734375,
      "learning_rate": 6.778761837413627e-06,
      "loss": 0.9691,
      "step": 15580
    },
    {
      "epoch": 0.894199322967468,
      "grad_norm": 0.27734375,
      "learning_rate": 6.742566740871625e-06,
      "loss": 0.9596,
      "step": 15585
    },
    {
      "epoch": 0.8944862011589879,
      "grad_norm": 0.2734375,
      "learning_rate": 6.706465163092823e-06,
      "loss": 0.9941,
      "step": 15590
    },
    {
      "epoch": 0.8947730793505078,
      "grad_norm": 0.2578125,
      "learning_rate": 6.67045714027994e-06,
      "loss": 0.9539,
      "step": 15595
    },
    {
      "epoch": 0.8950599575420276,
      "grad_norm": 0.267578125,
      "learning_rate": 6.634542708541935e-06,
      "loss": 0.931,
      "step": 15600
    },
    {
      "epoch": 0.8953468357335476,
      "grad_norm": 0.30078125,
      "learning_rate": 6.5987219038938455e-06,
      "loss": 0.9465,
      "step": 15605
    },
    {
      "epoch": 0.8956337139250674,
      "grad_norm": 0.251953125,
      "learning_rate": 6.562994762256869e-06,
      "loss": 0.8939,
      "step": 15610
    },
    {
      "epoch": 0.8959205921165873,
      "grad_norm": 0.25,
      "learning_rate": 6.527361319458292e-06,
      "loss": 0.932,
      "step": 15615
    },
    {
      "epoch": 0.8962074703081072,
      "grad_norm": 0.2578125,
      "learning_rate": 6.491821611231364e-06,
      "loss": 0.9207,
      "step": 15620
    },
    {
      "epoch": 0.8964943484996271,
      "grad_norm": 0.2890625,
      "learning_rate": 6.456375673215409e-06,
      "loss": 1.001,
      "step": 15625
    },
    {
      "epoch": 0.8967812266911469,
      "grad_norm": 0.2578125,
      "learning_rate": 6.421023540955684e-06,
      "loss": 0.8778,
      "step": 15630
    },
    {
      "epoch": 0.8970681048826669,
      "grad_norm": 0.2734375,
      "learning_rate": 6.3857652499033974e-06,
      "loss": 0.8923,
      "step": 15635
    },
    {
      "epoch": 0.8973549830741867,
      "grad_norm": 0.259765625,
      "learning_rate": 6.350600835415632e-06,
      "loss": 0.9465,
      "step": 15640
    },
    {
      "epoch": 0.8976418612657066,
      "grad_norm": 0.263671875,
      "learning_rate": 6.31553033275536e-06,
      "loss": 0.9561,
      "step": 15645
    },
    {
      "epoch": 0.8979287394572265,
      "grad_norm": 0.279296875,
      "learning_rate": 6.2805537770913356e-06,
      "loss": 0.9631,
      "step": 15650
    },
    {
      "epoch": 0.8982156176487464,
      "grad_norm": 0.251953125,
      "learning_rate": 6.245671203498149e-06,
      "loss": 0.9159,
      "step": 15655
    },
    {
      "epoch": 0.8985024958402662,
      "grad_norm": 0.259765625,
      "learning_rate": 6.210882646956084e-06,
      "loss": 0.894,
      "step": 15660
    },
    {
      "epoch": 0.8987893740317862,
      "grad_norm": 0.265625,
      "learning_rate": 6.176188142351247e-06,
      "loss": 0.9014,
      "step": 15665
    },
    {
      "epoch": 0.899076252223306,
      "grad_norm": 0.2890625,
      "learning_rate": 6.141587724475317e-06,
      "loss": 0.9285,
      "step": 15670
    },
    {
      "epoch": 0.8993631304148259,
      "grad_norm": 0.2578125,
      "learning_rate": 6.107081428025674e-06,
      "loss": 0.9748,
      "step": 15675
    },
    {
      "epoch": 0.8996500086063457,
      "grad_norm": 0.267578125,
      "learning_rate": 6.072669287605326e-06,
      "loss": 1.0066,
      "step": 15680
    },
    {
      "epoch": 0.8999368867978657,
      "grad_norm": 0.271484375,
      "learning_rate": 6.038351337722836e-06,
      "loss": 0.9463,
      "step": 15685
    },
    {
      "epoch": 0.9002237649893855,
      "grad_norm": 0.271484375,
      "learning_rate": 6.004127612792332e-06,
      "loss": 0.8783,
      "step": 15690
    },
    {
      "epoch": 0.9005106431809053,
      "grad_norm": 0.265625,
      "learning_rate": 5.969998147133415e-06,
      "loss": 0.9672,
      "step": 15695
    },
    {
      "epoch": 0.9007975213724253,
      "grad_norm": 0.251953125,
      "learning_rate": 5.935962974971221e-06,
      "loss": 0.9173,
      "step": 15700
    },
    {
      "epoch": 0.9010843995639451,
      "grad_norm": 0.2734375,
      "learning_rate": 5.9020221304362686e-06,
      "loss": 0.9124,
      "step": 15705
    },
    {
      "epoch": 0.901371277755465,
      "grad_norm": 0.2578125,
      "learning_rate": 5.868175647564522e-06,
      "loss": 0.9556,
      "step": 15710
    },
    {
      "epoch": 0.901658155946985,
      "grad_norm": 0.27734375,
      "learning_rate": 5.834423560297353e-06,
      "loss": 0.9977,
      "step": 15715
    },
    {
      "epoch": 0.9019450341385048,
      "grad_norm": 0.263671875,
      "learning_rate": 5.800765902481364e-06,
      "loss": 0.9459,
      "step": 15720
    },
    {
      "epoch": 0.9022319123300246,
      "grad_norm": 0.251953125,
      "learning_rate": 5.767202707868558e-06,
      "loss": 0.9167,
      "step": 15725
    },
    {
      "epoch": 0.9025187905215446,
      "grad_norm": 0.251953125,
      "learning_rate": 5.733734010116188e-06,
      "loss": 0.9177,
      "step": 15730
    },
    {
      "epoch": 0.9028056687130644,
      "grad_norm": 0.279296875,
      "learning_rate": 5.700359842786729e-06,
      "loss": 0.8644,
      "step": 15735
    },
    {
      "epoch": 0.9030925469045843,
      "grad_norm": 0.267578125,
      "learning_rate": 5.667080239347889e-06,
      "loss": 0.9814,
      "step": 15740
    },
    {
      "epoch": 0.9033794250961042,
      "grad_norm": 0.26953125,
      "learning_rate": 5.633895233172504e-06,
      "loss": 0.9664,
      "step": 15745
    },
    {
      "epoch": 0.9036663032876241,
      "grad_norm": 0.2734375,
      "learning_rate": 5.600804857538588e-06,
      "loss": 0.9869,
      "step": 15750
    },
    {
      "epoch": 0.9039531814791439,
      "grad_norm": 0.259765625,
      "learning_rate": 5.567809145629244e-06,
      "loss": 0.9679,
      "step": 15755
    },
    {
      "epoch": 0.9042400596706638,
      "grad_norm": 0.263671875,
      "learning_rate": 5.534908130532623e-06,
      "loss": 0.9669,
      "step": 15760
    },
    {
      "epoch": 0.9045269378621837,
      "grad_norm": 0.27734375,
      "learning_rate": 5.50210184524198e-06,
      "loss": 0.9085,
      "step": 15765
    },
    {
      "epoch": 0.9048138160537036,
      "grad_norm": 0.263671875,
      "learning_rate": 5.469390322655498e-06,
      "loss": 0.8902,
      "step": 15770
    },
    {
      "epoch": 0.9051006942452234,
      "grad_norm": 0.251953125,
      "learning_rate": 5.436773595576361e-06,
      "loss": 0.8946,
      "step": 15775
    },
    {
      "epoch": 0.9053875724367434,
      "grad_norm": 0.263671875,
      "learning_rate": 5.404251696712714e-06,
      "loss": 0.9744,
      "step": 15780
    },
    {
      "epoch": 0.9056744506282632,
      "grad_norm": 0.265625,
      "learning_rate": 5.371824658677594e-06,
      "loss": 1.0072,
      "step": 15785
    },
    {
      "epoch": 0.9059613288197831,
      "grad_norm": 0.248046875,
      "learning_rate": 5.339492513988897e-06,
      "loss": 0.9326,
      "step": 15790
    },
    {
      "epoch": 0.906248207011303,
      "grad_norm": 0.25,
      "learning_rate": 5.307255295069369e-06,
      "loss": 0.9251,
      "step": 15795
    },
    {
      "epoch": 0.9065350852028229,
      "grad_norm": 0.2734375,
      "learning_rate": 5.275113034246571e-06,
      "loss": 1.0251,
      "step": 15800
    },
    {
      "epoch": 0.9068219633943427,
      "grad_norm": 0.263671875,
      "learning_rate": 5.243065763752819e-06,
      "loss": 0.9648,
      "step": 15805
    },
    {
      "epoch": 0.9071088415858627,
      "grad_norm": 0.28515625,
      "learning_rate": 5.2111135157252076e-06,
      "loss": 0.9644,
      "step": 15810
    },
    {
      "epoch": 0.9073957197773825,
      "grad_norm": 0.28125,
      "learning_rate": 5.179256322205539e-06,
      "loss": 0.9843,
      "step": 15815
    },
    {
      "epoch": 0.9076825979689024,
      "grad_norm": 0.32421875,
      "learning_rate": 5.147494215140236e-06,
      "loss": 0.9976,
      "step": 15820
    },
    {
      "epoch": 0.9079694761604223,
      "grad_norm": 0.2470703125,
      "learning_rate": 5.115827226380421e-06,
      "loss": 0.8557,
      "step": 15825
    },
    {
      "epoch": 0.9082563543519422,
      "grad_norm": 0.279296875,
      "learning_rate": 5.084255387681836e-06,
      "loss": 0.9765,
      "step": 15830
    },
    {
      "epoch": 0.908543232543462,
      "grad_norm": 0.3046875,
      "learning_rate": 5.052778730704788e-06,
      "loss": 0.9705,
      "step": 15835
    },
    {
      "epoch": 0.9088301107349819,
      "grad_norm": 0.26953125,
      "learning_rate": 5.021397287014129e-06,
      "loss": 1.0047,
      "step": 15840
    },
    {
      "epoch": 0.9091169889265018,
      "grad_norm": 0.263671875,
      "learning_rate": 4.990111088079263e-06,
      "loss": 0.92,
      "step": 15845
    },
    {
      "epoch": 0.9094038671180217,
      "grad_norm": 0.265625,
      "learning_rate": 4.958920165274039e-06,
      "loss": 1.0487,
      "step": 15850
    },
    {
      "epoch": 0.9096907453095415,
      "grad_norm": 0.287109375,
      "learning_rate": 4.92782454987678e-06,
      "loss": 0.9803,
      "step": 15855
    },
    {
      "epoch": 0.9099776235010615,
      "grad_norm": 0.251953125,
      "learning_rate": 4.896824273070255e-06,
      "loss": 0.8652,
      "step": 15860
    },
    {
      "epoch": 0.9102645016925813,
      "grad_norm": 0.267578125,
      "learning_rate": 4.865919365941629e-06,
      "loss": 1.0166,
      "step": 15865
    },
    {
      "epoch": 0.9105513798841012,
      "grad_norm": 0.265625,
      "learning_rate": 4.8351098594823674e-06,
      "loss": 0.9448,
      "step": 15870
    },
    {
      "epoch": 0.9108382580756211,
      "grad_norm": 0.2451171875,
      "learning_rate": 4.804395784588334e-06,
      "loss": 0.9409,
      "step": 15875
    },
    {
      "epoch": 0.911125136267141,
      "grad_norm": 0.25,
      "learning_rate": 4.77377717205969e-06,
      "loss": 0.9747,
      "step": 15880
    },
    {
      "epoch": 0.9114120144586608,
      "grad_norm": 0.2470703125,
      "learning_rate": 4.7432540526008205e-06,
      "loss": 0.883,
      "step": 15885
    },
    {
      "epoch": 0.9116988926501808,
      "grad_norm": 0.279296875,
      "learning_rate": 4.712826456820385e-06,
      "loss": 0.9437,
      "step": 15890
    },
    {
      "epoch": 0.9119857708417006,
      "grad_norm": 0.29296875,
      "learning_rate": 4.682494415231253e-06,
      "loss": 0.8496,
      "step": 15895
    },
    {
      "epoch": 0.9122726490332205,
      "grad_norm": 0.265625,
      "learning_rate": 4.652257958250461e-06,
      "loss": 0.9518,
      "step": 15900
    },
    {
      "epoch": 0.9125595272247404,
      "grad_norm": 0.255859375,
      "learning_rate": 4.6221171161991874e-06,
      "loss": 0.9091,
      "step": 15905
    },
    {
      "epoch": 0.9128464054162603,
      "grad_norm": 0.2734375,
      "learning_rate": 4.592071919302743e-06,
      "loss": 0.9391,
      "step": 15910
    },
    {
      "epoch": 0.9131332836077801,
      "grad_norm": 0.2734375,
      "learning_rate": 4.562122397690538e-06,
      "loss": 0.9312,
      "step": 15915
    },
    {
      "epoch": 0.9134201617993,
      "grad_norm": 0.259765625,
      "learning_rate": 4.532268581395982e-06,
      "loss": 0.889,
      "step": 15920
    },
    {
      "epoch": 0.9137070399908199,
      "grad_norm": 0.265625,
      "learning_rate": 4.502510500356571e-06,
      "loss": 0.909,
      "step": 15925
    },
    {
      "epoch": 0.9139939181823398,
      "grad_norm": 0.26171875,
      "learning_rate": 4.472848184413769e-06,
      "loss": 0.9407,
      "step": 15930
    },
    {
      "epoch": 0.9142807963738596,
      "grad_norm": 0.259765625,
      "learning_rate": 4.443281663313026e-06,
      "loss": 0.9368,
      "step": 15935
    },
    {
      "epoch": 0.9145676745653796,
      "grad_norm": 0.26953125,
      "learning_rate": 4.413810966703702e-06,
      "loss": 1.0034,
      "step": 15940
    },
    {
      "epoch": 0.9148545527568994,
      "grad_norm": 0.26953125,
      "learning_rate": 4.3844361241390795e-06,
      "loss": 0.8068,
      "step": 15945
    },
    {
      "epoch": 0.9151414309484193,
      "grad_norm": 0.251953125,
      "learning_rate": 4.355157165076318e-06,
      "loss": 0.8796,
      "step": 15950
    },
    {
      "epoch": 0.9154283091399392,
      "grad_norm": 0.251953125,
      "learning_rate": 4.325974118876408e-06,
      "loss": 0.9913,
      "step": 15955
    },
    {
      "epoch": 0.9157151873314591,
      "grad_norm": 0.251953125,
      "learning_rate": 4.296887014804207e-06,
      "loss": 0.9241,
      "step": 15960
    },
    {
      "epoch": 0.9160020655229789,
      "grad_norm": 0.26953125,
      "learning_rate": 4.267895882028328e-06,
      "loss": 0.9562,
      "step": 15965
    },
    {
      "epoch": 0.9162889437144989,
      "grad_norm": 0.25390625,
      "learning_rate": 4.239000749621092e-06,
      "loss": 0.9201,
      "step": 15970
    },
    {
      "epoch": 0.9165758219060187,
      "grad_norm": 0.25,
      "learning_rate": 4.210201646558653e-06,
      "loss": 0.9493,
      "step": 15975
    },
    {
      "epoch": 0.9168627000975386,
      "grad_norm": 0.255859375,
      "learning_rate": 4.181498601720801e-06,
      "loss": 0.9421,
      "step": 15980
    },
    {
      "epoch": 0.9171495782890585,
      "grad_norm": 0.2734375,
      "learning_rate": 4.15289164389101e-06,
      "loss": 0.9286,
      "step": 15985
    },
    {
      "epoch": 0.9174364564805784,
      "grad_norm": 0.279296875,
      "learning_rate": 4.124380801756411e-06,
      "loss": 0.9408,
      "step": 15990
    },
    {
      "epoch": 0.9177233346720982,
      "grad_norm": 0.275390625,
      "learning_rate": 4.095966103907723e-06,
      "loss": 1.0005,
      "step": 15995
    },
    {
      "epoch": 0.9180102128636181,
      "grad_norm": 0.25390625,
      "learning_rate": 4.0676475788392845e-06,
      "loss": 0.9564,
      "step": 16000
    },
    {
      "epoch": 0.918297091055138,
      "grad_norm": 0.28125,
      "learning_rate": 4.039425254948958e-06,
      "loss": 0.9974,
      "step": 16005
    },
    {
      "epoch": 0.9185839692466579,
      "grad_norm": 0.287109375,
      "learning_rate": 4.011299160538185e-06,
      "loss": 0.9932,
      "step": 16010
    },
    {
      "epoch": 0.9188708474381777,
      "grad_norm": 0.2578125,
      "learning_rate": 3.983269323811856e-06,
      "loss": 0.9562,
      "step": 16015
    },
    {
      "epoch": 0.9191577256296977,
      "grad_norm": 0.2578125,
      "learning_rate": 3.955335772878343e-06,
      "loss": 0.9051,
      "step": 16020
    },
    {
      "epoch": 0.9194446038212175,
      "grad_norm": 0.26171875,
      "learning_rate": 3.927498535749486e-06,
      "loss": 0.9962,
      "step": 16025
    },
    {
      "epoch": 0.9197314820127374,
      "grad_norm": 0.275390625,
      "learning_rate": 3.89975764034054e-06,
      "loss": 0.8909,
      "step": 16030
    },
    {
      "epoch": 0.9200183602042573,
      "grad_norm": 0.275390625,
      "learning_rate": 3.872113114470122e-06,
      "loss": 0.9961,
      "step": 16035
    },
    {
      "epoch": 0.9203052383957772,
      "grad_norm": 0.263671875,
      "learning_rate": 3.844564985860222e-06,
      "loss": 0.9576,
      "step": 16040
    },
    {
      "epoch": 0.920592116587297,
      "grad_norm": 0.267578125,
      "learning_rate": 3.817113282136176e-06,
      "loss": 0.8964,
      "step": 16045
    },
    {
      "epoch": 0.920878994778817,
      "grad_norm": 0.265625,
      "learning_rate": 3.7897580308265954e-06,
      "loss": 0.9571,
      "step": 16050
    },
    {
      "epoch": 0.9211658729703368,
      "grad_norm": 0.265625,
      "learning_rate": 3.762499259363417e-06,
      "loss": 0.9314,
      "step": 16055
    },
    {
      "epoch": 0.9214527511618567,
      "grad_norm": 0.263671875,
      "learning_rate": 3.735336995081795e-06,
      "loss": 0.9395,
      "step": 16060
    },
    {
      "epoch": 0.9217396293533766,
      "grad_norm": 0.2734375,
      "learning_rate": 3.7082712652200867e-06,
      "loss": 0.958,
      "step": 16065
    },
    {
      "epoch": 0.9220265075448965,
      "grad_norm": 0.28515625,
      "learning_rate": 3.6813020969198585e-06,
      "loss": 0.9532,
      "step": 16070
    },
    {
      "epoch": 0.9223133857364163,
      "grad_norm": 0.26953125,
      "learning_rate": 3.654429517225877e-06,
      "loss": 0.9759,
      "step": 16075
    },
    {
      "epoch": 0.9226002639279361,
      "grad_norm": 0.251953125,
      "learning_rate": 3.62765355308603e-06,
      "loss": 0.9062,
      "step": 16080
    },
    {
      "epoch": 0.9228871421194561,
      "grad_norm": 0.26171875,
      "learning_rate": 3.600974231351306e-06,
      "loss": 0.9079,
      "step": 16085
    },
    {
      "epoch": 0.923174020310976,
      "grad_norm": 0.259765625,
      "learning_rate": 3.574391578775771e-06,
      "loss": 0.9881,
      "step": 16090
    },
    {
      "epoch": 0.9234608985024958,
      "grad_norm": 0.275390625,
      "learning_rate": 3.547905622016601e-06,
      "loss": 1.0151,
      "step": 16095
    },
    {
      "epoch": 0.9237477766940158,
      "grad_norm": 0.275390625,
      "learning_rate": 3.5215163876339274e-06,
      "loss": 0.8687,
      "step": 16100
    },
    {
      "epoch": 0.9240346548855356,
      "grad_norm": 0.279296875,
      "learning_rate": 3.495223902090983e-06,
      "loss": 0.9379,
      "step": 16105
    },
    {
      "epoch": 0.9243215330770554,
      "grad_norm": 0.26171875,
      "learning_rate": 3.4690281917539203e-06,
      "loss": 0.969,
      "step": 16110
    },
    {
      "epoch": 0.9246084112685754,
      "grad_norm": 0.267578125,
      "learning_rate": 3.442929282891827e-06,
      "loss": 0.9878,
      "step": 16115
    },
    {
      "epoch": 0.9248952894600952,
      "grad_norm": 0.267578125,
      "learning_rate": 3.416927201676767e-06,
      "loss": 0.9008,
      "step": 16120
    },
    {
      "epoch": 0.9251821676516151,
      "grad_norm": 0.2734375,
      "learning_rate": 3.3910219741836944e-06,
      "loss": 0.9432,
      "step": 16125
    },
    {
      "epoch": 0.925469045843135,
      "grad_norm": 0.2470703125,
      "learning_rate": 3.365213626390418e-06,
      "loss": 0.9459,
      "step": 16130
    },
    {
      "epoch": 0.9257559240346549,
      "grad_norm": 0.28515625,
      "learning_rate": 3.339502184177612e-06,
      "loss": 0.9924,
      "step": 16135
    },
    {
      "epoch": 0.9260428022261747,
      "grad_norm": 0.310546875,
      "learning_rate": 3.3138876733287638e-06,
      "loss": 0.9324,
      "step": 16140
    },
    {
      "epoch": 0.9263296804176947,
      "grad_norm": 0.275390625,
      "learning_rate": 3.28837011953016e-06,
      "loss": 0.9471,
      "step": 16145
    },
    {
      "epoch": 0.9266165586092145,
      "grad_norm": 0.263671875,
      "learning_rate": 3.262949548370853e-06,
      "loss": 0.94,
      "step": 16150
    },
    {
      "epoch": 0.9269034368007344,
      "grad_norm": 0.267578125,
      "learning_rate": 3.237625985342674e-06,
      "loss": 0.9803,
      "step": 16155
    },
    {
      "epoch": 0.9271903149922542,
      "grad_norm": 0.283203125,
      "learning_rate": 3.212399455840154e-06,
      "loss": 0.9429,
      "step": 16160
    },
    {
      "epoch": 0.9274771931837742,
      "grad_norm": 0.263671875,
      "learning_rate": 3.187269985160457e-06,
      "loss": 0.9188,
      "step": 16165
    },
    {
      "epoch": 0.927764071375294,
      "grad_norm": 0.26171875,
      "learning_rate": 3.1622375985035367e-06,
      "loss": 0.9285,
      "step": 16170
    },
    {
      "epoch": 0.9280509495668139,
      "grad_norm": 0.287109375,
      "learning_rate": 3.137302320971891e-06,
      "loss": 0.9227,
      "step": 16175
    },
    {
      "epoch": 0.9283378277583338,
      "grad_norm": 0.28125,
      "learning_rate": 3.112464177570662e-06,
      "loss": 0.9039,
      "step": 16180
    },
    {
      "epoch": 0.9286247059498537,
      "grad_norm": 0.259765625,
      "learning_rate": 3.087723193207648e-06,
      "loss": 0.9231,
      "step": 16185
    },
    {
      "epoch": 0.9289115841413735,
      "grad_norm": 0.271484375,
      "learning_rate": 3.0630793926931132e-06,
      "loss": 0.8935,
      "step": 16190
    },
    {
      "epoch": 0.9291984623328935,
      "grad_norm": 0.26171875,
      "learning_rate": 3.038532800739935e-06,
      "loss": 0.8984,
      "step": 16195
    },
    {
      "epoch": 0.9294853405244133,
      "grad_norm": 0.341796875,
      "learning_rate": 3.014083441963478e-06,
      "loss": 0.9787,
      "step": 16200
    },
    {
      "epoch": 0.9297722187159332,
      "grad_norm": 0.2578125,
      "learning_rate": 2.9897313408816407e-06,
      "loss": 0.9824,
      "step": 16205
    },
    {
      "epoch": 0.9300590969074531,
      "grad_norm": 0.28125,
      "learning_rate": 2.9654765219147563e-06,
      "loss": 0.8349,
      "step": 16210
    },
    {
      "epoch": 0.930345975098973,
      "grad_norm": 0.263671875,
      "learning_rate": 2.941319009385579e-06,
      "loss": 0.909,
      "step": 16215
    },
    {
      "epoch": 0.9306328532904928,
      "grad_norm": 0.26171875,
      "learning_rate": 2.9172588275193534e-06,
      "loss": 0.8929,
      "step": 16220
    },
    {
      "epoch": 0.9309197314820128,
      "grad_norm": 0.271484375,
      "learning_rate": 2.8932960004436795e-06,
      "loss": 0.9595,
      "step": 16225
    },
    {
      "epoch": 0.9312066096735326,
      "grad_norm": 0.267578125,
      "learning_rate": 2.869430552188501e-06,
      "loss": 0.9288,
      "step": 16230
    },
    {
      "epoch": 0.9314934878650525,
      "grad_norm": 0.3359375,
      "learning_rate": 2.8456625066861973e-06,
      "loss": 0.9109,
      "step": 16235
    },
    {
      "epoch": 0.9317803660565723,
      "grad_norm": 0.279296875,
      "learning_rate": 2.8219918877713804e-06,
      "loss": 0.9129,
      "step": 16240
    },
    {
      "epoch": 0.9320672442480923,
      "grad_norm": 0.267578125,
      "learning_rate": 2.7984187191810063e-06,
      "loss": 0.8989,
      "step": 16245
    },
    {
      "epoch": 0.9323541224396121,
      "grad_norm": 0.26171875,
      "learning_rate": 2.7749430245542997e-06,
      "loss": 0.9914,
      "step": 16250
    },
    {
      "epoch": 0.932641000631132,
      "grad_norm": 0.302734375,
      "learning_rate": 2.751564827432751e-06,
      "loss": 0.9854,
      "step": 16255
    },
    {
      "epoch": 0.9329278788226519,
      "grad_norm": 0.267578125,
      "learning_rate": 2.7282841512600632e-06,
      "loss": 0.8519,
      "step": 16260
    },
    {
      "epoch": 0.9332147570141718,
      "grad_norm": 0.2734375,
      "learning_rate": 2.705101019382139e-06,
      "loss": 1.001,
      "step": 16265
    },
    {
      "epoch": 0.9335016352056916,
      "grad_norm": 0.28125,
      "learning_rate": 2.682015455047093e-06,
      "loss": 1.0089,
      "step": 16270
    },
    {
      "epoch": 0.9337885133972116,
      "grad_norm": 0.263671875,
      "learning_rate": 2.659027481405163e-06,
      "loss": 0.9404,
      "step": 16275
    },
    {
      "epoch": 0.9340753915887314,
      "grad_norm": 0.275390625,
      "learning_rate": 2.636137121508753e-06,
      "loss": 0.9803,
      "step": 16280
    },
    {
      "epoch": 0.9343622697802513,
      "grad_norm": 0.283203125,
      "learning_rate": 2.6133443983123785e-06,
      "loss": 1.0476,
      "step": 16285
    },
    {
      "epoch": 0.9346491479717712,
      "grad_norm": 0.263671875,
      "learning_rate": 2.5906493346726126e-06,
      "loss": 0.895,
      "step": 16290
    },
    {
      "epoch": 0.9349360261632911,
      "grad_norm": 0.28515625,
      "learning_rate": 2.5680519533481052e-06,
      "loss": 0.9463,
      "step": 16295
    },
    {
      "epoch": 0.9352229043548109,
      "grad_norm": 0.251953125,
      "learning_rate": 2.5455522769995966e-06,
      "loss": 0.9837,
      "step": 16300
    },
    {
      "epoch": 0.9355097825463309,
      "grad_norm": 0.259765625,
      "learning_rate": 2.523150328189783e-06,
      "loss": 0.9138,
      "step": 16305
    },
    {
      "epoch": 0.9357966607378507,
      "grad_norm": 0.26171875,
      "learning_rate": 2.500846129383416e-06,
      "loss": 0.9509,
      "step": 16310
    },
    {
      "epoch": 0.9360835389293706,
      "grad_norm": 0.25,
      "learning_rate": 2.478639702947172e-06,
      "loss": 0.9884,
      "step": 16315
    },
    {
      "epoch": 0.9363704171208904,
      "grad_norm": 0.26953125,
      "learning_rate": 2.4565310711497146e-06,
      "loss": 0.9741,
      "step": 16320
    },
    {
      "epoch": 0.9366572953124104,
      "grad_norm": 0.2734375,
      "learning_rate": 2.434520256161632e-06,
      "loss": 0.9581,
      "step": 16325
    },
    {
      "epoch": 0.9369441735039302,
      "grad_norm": 0.283203125,
      "learning_rate": 2.412607280055401e-06,
      "loss": 0.9786,
      "step": 16330
    },
    {
      "epoch": 0.9372310516954501,
      "grad_norm": 0.255859375,
      "learning_rate": 2.390792164805433e-06,
      "loss": 0.8686,
      "step": 16335
    },
    {
      "epoch": 0.93751792988697,
      "grad_norm": 0.279296875,
      "learning_rate": 2.3690749322879624e-06,
      "loss": 1.0135,
      "step": 16340
    },
    {
      "epoch": 0.9378048080784899,
      "grad_norm": 0.2578125,
      "learning_rate": 2.347455604281057e-06,
      "loss": 0.9064,
      "step": 16345
    },
    {
      "epoch": 0.9380916862700097,
      "grad_norm": 0.27734375,
      "learning_rate": 2.3259342024646524e-06,
      "loss": 0.9567,
      "step": 16350
    },
    {
      "epoch": 0.9383785644615297,
      "grad_norm": 0.2578125,
      "learning_rate": 2.304510748420463e-06,
      "loss": 1.0151,
      "step": 16355
    },
    {
      "epoch": 0.9386654426530495,
      "grad_norm": 0.2412109375,
      "learning_rate": 2.2831852636319594e-06,
      "loss": 0.9201,
      "step": 16360
    },
    {
      "epoch": 0.9389523208445694,
      "grad_norm": 0.25390625,
      "learning_rate": 2.2619577694843907e-06,
      "loss": 0.9121,
      "step": 16365
    },
    {
      "epoch": 0.9392391990360893,
      "grad_norm": 0.2578125,
      "learning_rate": 2.240828287264729e-06,
      "loss": 0.9018,
      "step": 16370
    },
    {
      "epoch": 0.9395260772276092,
      "grad_norm": 0.26171875,
      "learning_rate": 2.219796838161681e-06,
      "loss": 0.8755,
      "step": 16375
    },
    {
      "epoch": 0.939812955419129,
      "grad_norm": 0.26171875,
      "learning_rate": 2.1988634432656197e-06,
      "loss": 0.9599,
      "step": 16380
    },
    {
      "epoch": 0.940099833610649,
      "grad_norm": 0.26953125,
      "learning_rate": 2.1780281235686206e-06,
      "loss": 0.9062,
      "step": 16385
    },
    {
      "epoch": 0.9403867118021688,
      "grad_norm": 0.25390625,
      "learning_rate": 2.1572908999643705e-06,
      "loss": 0.9129,
      "step": 16390
    },
    {
      "epoch": 0.9406735899936887,
      "grad_norm": 0.26171875,
      "learning_rate": 2.13665179324819e-06,
      "loss": 0.8914,
      "step": 16395
    },
    {
      "epoch": 0.9409604681852085,
      "grad_norm": 0.26171875,
      "learning_rate": 2.116110824117046e-06,
      "loss": 0.88,
      "step": 16400
    },
    {
      "epoch": 0.9412473463767285,
      "grad_norm": 0.271484375,
      "learning_rate": 2.0956680131694604e-06,
      "loss": 0.944,
      "step": 16405
    },
    {
      "epoch": 0.9415342245682483,
      "grad_norm": 0.271484375,
      "learning_rate": 2.075323380905536e-06,
      "loss": 0.9598,
      "step": 16410
    },
    {
      "epoch": 0.9418211027597682,
      "grad_norm": 0.27734375,
      "learning_rate": 2.0550769477269084e-06,
      "loss": 0.986,
      "step": 16415
    },
    {
      "epoch": 0.9421079809512881,
      "grad_norm": 0.263671875,
      "learning_rate": 2.0349287339367364e-06,
      "loss": 0.9844,
      "step": 16420
    },
    {
      "epoch": 0.942394859142808,
      "grad_norm": 0.279296875,
      "learning_rate": 2.0148787597397136e-06,
      "loss": 0.9443,
      "step": 16425
    },
    {
      "epoch": 0.9426817373343278,
      "grad_norm": 0.255859375,
      "learning_rate": 1.99492704524199e-06,
      "loss": 0.9414,
      "step": 16430
    },
    {
      "epoch": 0.9429686155258478,
      "grad_norm": 0.263671875,
      "learning_rate": 1.9750736104511947e-06,
      "loss": 0.9172,
      "step": 16435
    },
    {
      "epoch": 0.9432554937173676,
      "grad_norm": 0.265625,
      "learning_rate": 1.955318475276391e-06,
      "loss": 0.9453,
      "step": 16440
    },
    {
      "epoch": 0.9435423719088875,
      "grad_norm": 0.251953125,
      "learning_rate": 1.935661659528054e-06,
      "loss": 0.9433,
      "step": 16445
    },
    {
      "epoch": 0.9438292501004074,
      "grad_norm": 0.28515625,
      "learning_rate": 1.9161031829181275e-06,
      "loss": 0.9827,
      "step": 16450
    },
    {
      "epoch": 0.9441161282919273,
      "grad_norm": 0.271484375,
      "learning_rate": 1.8966430650598554e-06,
      "loss": 0.9666,
      "step": 16455
    },
    {
      "epoch": 0.9444030064834471,
      "grad_norm": 0.283203125,
      "learning_rate": 1.8772813254679166e-06,
      "loss": 0.9609,
      "step": 16460
    },
    {
      "epoch": 0.9446898846749671,
      "grad_norm": 0.2451171875,
      "learning_rate": 1.85801798355828e-06,
      "loss": 0.9515,
      "step": 16465
    },
    {
      "epoch": 0.9449767628664869,
      "grad_norm": 0.267578125,
      "learning_rate": 1.8388530586482932e-06,
      "loss": 0.9266,
      "step": 16470
    },
    {
      "epoch": 0.9452636410580068,
      "grad_norm": 0.259765625,
      "learning_rate": 1.8197865699565497e-06,
      "loss": 0.9186,
      "step": 16475
    },
    {
      "epoch": 0.9455505192495266,
      "grad_norm": 0.26953125,
      "learning_rate": 1.8008185366030217e-06,
      "loss": 0.9243,
      "step": 16480
    },
    {
      "epoch": 0.9458373974410466,
      "grad_norm": 0.27734375,
      "learning_rate": 1.7819489776088493e-06,
      "loss": 0.9013,
      "step": 16485
    },
    {
      "epoch": 0.9461242756325664,
      "grad_norm": 0.27734375,
      "learning_rate": 1.7631779118964852e-06,
      "loss": 0.9348,
      "step": 16490
    },
    {
      "epoch": 0.9464111538240862,
      "grad_norm": 0.27734375,
      "learning_rate": 1.7445053582895944e-06,
      "loss": 0.9947,
      "step": 16495
    },
    {
      "epoch": 0.9466980320156062,
      "grad_norm": 0.26171875,
      "learning_rate": 1.7259313355130647e-06,
      "loss": 0.9336,
      "step": 16500
    },
    {
      "epoch": 0.946984910207126,
      "grad_norm": 0.271484375,
      "learning_rate": 1.7074558621929526e-06,
      "loss": 0.9448,
      "step": 16505
    },
    {
      "epoch": 0.9472717883986459,
      "grad_norm": 0.259765625,
      "learning_rate": 1.6890789568565156e-06,
      "loss": 0.9379,
      "step": 16510
    },
    {
      "epoch": 0.9475586665901659,
      "grad_norm": 0.28515625,
      "learning_rate": 1.670800637932146e-06,
      "loss": 0.9013,
      "step": 16515
    },
    {
      "epoch": 0.9478455447816857,
      "grad_norm": 0.25390625,
      "learning_rate": 1.6526209237493928e-06,
      "loss": 0.9358,
      "step": 16520
    },
    {
      "epoch": 0.9481324229732055,
      "grad_norm": 0.263671875,
      "learning_rate": 1.634539832538895e-06,
      "loss": 0.9984,
      "step": 16525
    },
    {
      "epoch": 0.9484193011647255,
      "grad_norm": 0.263671875,
      "learning_rate": 1.6165573824324488e-06,
      "loss": 0.9786,
      "step": 16530
    },
    {
      "epoch": 0.9487061793562453,
      "grad_norm": 0.263671875,
      "learning_rate": 1.5986735914628625e-06,
      "loss": 0.9573,
      "step": 16535
    },
    {
      "epoch": 0.9489930575477652,
      "grad_norm": 0.314453125,
      "learning_rate": 1.5808884775640464e-06,
      "loss": 0.9199,
      "step": 16540
    },
    {
      "epoch": 0.9492799357392852,
      "grad_norm": 0.29296875,
      "learning_rate": 1.5632020585709673e-06,
      "loss": 0.9071,
      "step": 16545
    },
    {
      "epoch": 0.949566813930805,
      "grad_norm": 0.275390625,
      "learning_rate": 1.5456143522195931e-06,
      "loss": 0.9682,
      "step": 16550
    },
    {
      "epoch": 0.9498536921223248,
      "grad_norm": 0.25390625,
      "learning_rate": 1.5281253761469161e-06,
      "loss": 0.9099,
      "step": 16555
    },
    {
      "epoch": 0.9501405703138447,
      "grad_norm": 0.314453125,
      "learning_rate": 1.5107351478909293e-06,
      "loss": 0.9268,
      "step": 16560
    },
    {
      "epoch": 0.9504274485053646,
      "grad_norm": 0.28125,
      "learning_rate": 1.493443684890583e-06,
      "loss": 0.9555,
      "step": 16565
    },
    {
      "epoch": 0.9507143266968845,
      "grad_norm": 0.263671875,
      "learning_rate": 1.4762510044857957e-06,
      "loss": 0.9207,
      "step": 16570
    },
    {
      "epoch": 0.9510012048884043,
      "grad_norm": 0.265625,
      "learning_rate": 1.4591571239174317e-06,
      "loss": 0.9716,
      "step": 16575
    },
    {
      "epoch": 0.9512880830799243,
      "grad_norm": 0.255859375,
      "learning_rate": 1.4421620603272789e-06,
      "loss": 0.9015,
      "step": 16580
    },
    {
      "epoch": 0.9515749612714441,
      "grad_norm": 0.255859375,
      "learning_rate": 1.4252658307580048e-06,
      "loss": 0.9391,
      "step": 16585
    },
    {
      "epoch": 0.951861839462964,
      "grad_norm": 0.267578125,
      "learning_rate": 1.4084684521531887e-06,
      "loss": 0.9711,
      "step": 16590
    },
    {
      "epoch": 0.952148717654484,
      "grad_norm": 0.275390625,
      "learning_rate": 1.3917699413573014e-06,
      "loss": 0.955,
      "step": 16595
    },
    {
      "epoch": 0.9524355958460038,
      "grad_norm": 0.255859375,
      "learning_rate": 1.375170315115637e-06,
      "loss": 0.9687,
      "step": 16600
    },
    {
      "epoch": 0.9527224740375236,
      "grad_norm": 0.27734375,
      "learning_rate": 1.3586695900743352e-06,
      "loss": 1.0067,
      "step": 16605
    },
    {
      "epoch": 0.9530093522290436,
      "grad_norm": 0.28125,
      "learning_rate": 1.3422677827803599e-06,
      "loss": 0.9846,
      "step": 16610
    },
    {
      "epoch": 0.9532962304205634,
      "grad_norm": 0.251953125,
      "learning_rate": 1.3259649096814763e-06,
      "loss": 0.9407,
      "step": 16615
    },
    {
      "epoch": 0.9535831086120833,
      "grad_norm": 0.28125,
      "learning_rate": 1.3097609871262295e-06,
      "loss": 1.0087,
      "step": 16620
    },
    {
      "epoch": 0.9538699868036032,
      "grad_norm": 0.2734375,
      "learning_rate": 1.293656031363988e-06,
      "loss": 1.0001,
      "step": 16625
    },
    {
      "epoch": 0.9541568649951231,
      "grad_norm": 0.25390625,
      "learning_rate": 1.2776500585448215e-06,
      "loss": 0.9324,
      "step": 16630
    },
    {
      "epoch": 0.9544437431866429,
      "grad_norm": 0.26953125,
      "learning_rate": 1.2617430847195356e-06,
      "loss": 0.8486,
      "step": 16635
    },
    {
      "epoch": 0.9547306213781628,
      "grad_norm": 0.25390625,
      "learning_rate": 1.2459351258396812e-06,
      "loss": 0.982,
      "step": 16640
    },
    {
      "epoch": 0.9550174995696827,
      "grad_norm": 0.25390625,
      "learning_rate": 1.2302261977575447e-06,
      "loss": 0.9321,
      "step": 16645
    },
    {
      "epoch": 0.9553043777612026,
      "grad_norm": 0.271484375,
      "learning_rate": 1.2146163162260581e-06,
      "loss": 1.0542,
      "step": 16650
    },
    {
      "epoch": 0.9555912559527224,
      "grad_norm": 0.26171875,
      "learning_rate": 1.1991054968988336e-06,
      "loss": 0.9529,
      "step": 16655
    },
    {
      "epoch": 0.9558781341442424,
      "grad_norm": 0.263671875,
      "learning_rate": 1.183693755330173e-06,
      "loss": 0.9219,
      "step": 16660
    },
    {
      "epoch": 0.9561650123357622,
      "grad_norm": 0.28515625,
      "learning_rate": 1.1683811069749916e-06,
      "loss": 0.9837,
      "step": 16665
    },
    {
      "epoch": 0.9564518905272821,
      "grad_norm": 0.298828125,
      "learning_rate": 1.1531675671888619e-06,
      "loss": 0.9397,
      "step": 16670
    },
    {
      "epoch": 0.956738768718802,
      "grad_norm": 0.265625,
      "learning_rate": 1.1380531512279469e-06,
      "loss": 0.9552,
      "step": 16675
    },
    {
      "epoch": 0.9570256469103219,
      "grad_norm": 0.28125,
      "learning_rate": 1.1230378742490222e-06,
      "loss": 0.9718,
      "step": 16680
    },
    {
      "epoch": 0.9573125251018417,
      "grad_norm": 0.2734375,
      "learning_rate": 1.1081217513094212e-06,
      "loss": 0.9079,
      "step": 16685
    },
    {
      "epoch": 0.9575994032933617,
      "grad_norm": 0.330078125,
      "learning_rate": 1.0933047973670896e-06,
      "loss": 0.97,
      "step": 16690
    },
    {
      "epoch": 0.9578862814848815,
      "grad_norm": 0.28125,
      "learning_rate": 1.0785870272804977e-06,
      "loss": 0.899,
      "step": 16695
    },
    {
      "epoch": 0.9581731596764014,
      "grad_norm": 0.283203125,
      "learning_rate": 1.0639684558086504e-06,
      "loss": 0.9541,
      "step": 16700
    },
    {
      "epoch": 0.9584600378679213,
      "grad_norm": 0.26953125,
      "learning_rate": 1.0494490976110883e-06,
      "loss": 0.9538,
      "step": 16705
    },
    {
      "epoch": 0.9587469160594412,
      "grad_norm": 0.271484375,
      "learning_rate": 1.035028967247864e-06,
      "loss": 0.995,
      "step": 16710
    },
    {
      "epoch": 0.959033794250961,
      "grad_norm": 0.275390625,
      "learning_rate": 1.0207080791794998e-06,
      "loss": 0.9229,
      "step": 16715
    },
    {
      "epoch": 0.9593206724424809,
      "grad_norm": 0.279296875,
      "learning_rate": 1.006486447767019e-06,
      "loss": 0.9349,
      "step": 16720
    },
    {
      "epoch": 0.9596075506340008,
      "grad_norm": 0.28515625,
      "learning_rate": 9.923640872719131e-07,
      "loss": 0.9353,
      "step": 16725
    },
    {
      "epoch": 0.9598944288255207,
      "grad_norm": 0.25,
      "learning_rate": 9.78341011856121e-07,
      "loss": 0.9204,
      "step": 16730
    },
    {
      "epoch": 0.9601813070170405,
      "grad_norm": 0.279296875,
      "learning_rate": 9.644172355819936e-07,
      "loss": 1.047,
      "step": 16735
    },
    {
      "epoch": 0.9604681852085605,
      "grad_norm": 0.267578125,
      "learning_rate": 9.505927724123509e-07,
      "loss": 0.9377,
      "step": 16740
    },
    {
      "epoch": 0.9607550634000803,
      "grad_norm": 0.265625,
      "learning_rate": 9.368676362103701e-07,
      "loss": 0.968,
      "step": 16745
    },
    {
      "epoch": 0.9610419415916002,
      "grad_norm": 0.2490234375,
      "learning_rate": 9.232418407396636e-07,
      "loss": 0.9917,
      "step": 16750
    },
    {
      "epoch": 0.9613288197831201,
      "grad_norm": 0.251953125,
      "learning_rate": 9.097153996642238e-07,
      "loss": 0.949,
      "step": 16755
    },
    {
      "epoch": 0.96161569797464,
      "grad_norm": 0.259765625,
      "learning_rate": 8.962883265483668e-07,
      "loss": 0.9995,
      "step": 16760
    },
    {
      "epoch": 0.9619025761661598,
      "grad_norm": 0.283203125,
      "learning_rate": 8.829606348567999e-07,
      "loss": 0.937,
      "step": 16765
    },
    {
      "epoch": 0.9621894543576798,
      "grad_norm": 0.2734375,
      "learning_rate": 8.697323379545653e-07,
      "loss": 0.9242,
      "step": 16770
    },
    {
      "epoch": 0.9624763325491996,
      "grad_norm": 0.279296875,
      "learning_rate": 8.566034491070407e-07,
      "loss": 0.9806,
      "step": 16775
    },
    {
      "epoch": 0.9627632107407195,
      "grad_norm": 0.25390625,
      "learning_rate": 8.435739814798949e-07,
      "loss": 0.9588,
      "step": 16780
    },
    {
      "epoch": 0.9630500889322394,
      "grad_norm": 0.23828125,
      "learning_rate": 8.30643948139087e-07,
      "loss": 0.9256,
      "step": 16785
    },
    {
      "epoch": 0.9633369671237593,
      "grad_norm": 0.271484375,
      "learning_rate": 8.178133620509232e-07,
      "loss": 0.8975,
      "step": 16790
    },
    {
      "epoch": 0.9636238453152791,
      "grad_norm": 0.25,
      "learning_rate": 8.050822360819221e-07,
      "loss": 1.0417,
      "step": 16795
    },
    {
      "epoch": 0.963910723506799,
      "grad_norm": 0.263671875,
      "learning_rate": 7.924505829988716e-07,
      "loss": 0.9042,
      "step": 16800
    },
    {
      "epoch": 0.9641976016983189,
      "grad_norm": 0.265625,
      "learning_rate": 7.79918415468861e-07,
      "loss": 0.8764,
      "step": 16805
    },
    {
      "epoch": 0.9644844798898388,
      "grad_norm": 0.255859375,
      "learning_rate": 7.674857460591379e-07,
      "loss": 0.9449,
      "step": 16810
    },
    {
      "epoch": 0.9647713580813586,
      "grad_norm": 0.26953125,
      "learning_rate": 7.551525872372289e-07,
      "loss": 0.9472,
      "step": 16815
    },
    {
      "epoch": 0.9650582362728786,
      "grad_norm": 0.287109375,
      "learning_rate": 7.429189513708524e-07,
      "loss": 0.9963,
      "step": 16820
    },
    {
      "epoch": 0.9653451144643984,
      "grad_norm": 0.263671875,
      "learning_rate": 7.307848507279169e-07,
      "loss": 0.9572,
      "step": 16825
    },
    {
      "epoch": 0.9656319926559183,
      "grad_norm": 0.25390625,
      "learning_rate": 7.187502974765448e-07,
      "loss": 0.9461,
      "step": 16830
    },
    {
      "epoch": 0.9659188708474382,
      "grad_norm": 0.27734375,
      "learning_rate": 7.068153036849934e-07,
      "loss": 0.9776,
      "step": 16835
    },
    {
      "epoch": 0.9662057490389581,
      "grad_norm": 0.28125,
      "learning_rate": 6.949798813217001e-07,
      "loss": 0.9549,
      "step": 16840
    },
    {
      "epoch": 0.9664926272304779,
      "grad_norm": 0.263671875,
      "learning_rate": 6.83244042255271e-07,
      "loss": 0.9784,
      "step": 16845
    },
    {
      "epoch": 0.9667795054219979,
      "grad_norm": 0.265625,
      "learning_rate": 6.716077982544256e-07,
      "loss": 0.9131,
      "step": 16850
    },
    {
      "epoch": 0.9670663836135177,
      "grad_norm": 0.251953125,
      "learning_rate": 6.600711609880072e-07,
      "loss": 0.952,
      "step": 16855
    },
    {
      "epoch": 0.9673532618050376,
      "grad_norm": 0.25,
      "learning_rate": 6.486341420249842e-07,
      "loss": 1.0066,
      "step": 16860
    },
    {
      "epoch": 0.9676401399965575,
      "grad_norm": 0.271484375,
      "learning_rate": 6.372967528344264e-07,
      "loss": 0.9966,
      "step": 16865
    },
    {
      "epoch": 0.9679270181880774,
      "grad_norm": 0.251953125,
      "learning_rate": 6.260590047854952e-07,
      "loss": 0.9964,
      "step": 16870
    },
    {
      "epoch": 0.9682138963795972,
      "grad_norm": 0.26953125,
      "learning_rate": 6.149209091474318e-07,
      "loss": 0.8882,
      "step": 16875
    },
    {
      "epoch": 0.968500774571117,
      "grad_norm": 0.279296875,
      "learning_rate": 6.038824770895457e-07,
      "loss": 0.9484,
      "step": 16880
    },
    {
      "epoch": 0.968787652762637,
      "grad_norm": 0.2734375,
      "learning_rate": 5.929437196811827e-07,
      "loss": 0.9168,
      "step": 16885
    },
    {
      "epoch": 0.9690745309541569,
      "grad_norm": 0.265625,
      "learning_rate": 5.821046478917791e-07,
      "loss": 0.9508,
      "step": 16890
    },
    {
      "epoch": 0.9693614091456767,
      "grad_norm": 0.255859375,
      "learning_rate": 5.713652725907626e-07,
      "loss": 1.0207,
      "step": 16895
    },
    {
      "epoch": 0.9696482873371967,
      "grad_norm": 0.259765625,
      "learning_rate": 5.607256045475961e-07,
      "loss": 0.9338,
      "step": 16900
    },
    {
      "epoch": 0.9699351655287165,
      "grad_norm": 0.255859375,
      "learning_rate": 5.501856544317896e-07,
      "loss": 0.8846,
      "step": 16905
    },
    {
      "epoch": 0.9702220437202363,
      "grad_norm": 0.2890625,
      "learning_rate": 5.397454328128104e-07,
      "loss": 0.9225,
      "step": 16910
    },
    {
      "epoch": 0.9705089219117563,
      "grad_norm": 0.267578125,
      "learning_rate": 5.294049501601283e-07,
      "loss": 0.9467,
      "step": 16915
    },
    {
      "epoch": 0.9707958001032762,
      "grad_norm": 0.25390625,
      "learning_rate": 5.191642168432154e-07,
      "loss": 0.9385,
      "step": 16920
    },
    {
      "epoch": 0.971082678294796,
      "grad_norm": 0.263671875,
      "learning_rate": 5.090232431315123e-07,
      "loss": 0.9474,
      "step": 16925
    },
    {
      "epoch": 0.971369556486316,
      "grad_norm": 0.271484375,
      "learning_rate": 4.989820391943845e-07,
      "loss": 0.9796,
      "step": 16930
    },
    {
      "epoch": 0.9716564346778358,
      "grad_norm": 0.2734375,
      "learning_rate": 4.890406151011884e-07,
      "loss": 0.9807,
      "step": 16935
    },
    {
      "epoch": 0.9719433128693556,
      "grad_norm": 0.2490234375,
      "learning_rate": 4.79198980821216e-07,
      "loss": 0.9716,
      "step": 16940
    },
    {
      "epoch": 0.9722301910608756,
      "grad_norm": 0.267578125,
      "learning_rate": 4.694571462236619e-07,
      "loss": 0.9392,
      "step": 16945
    },
    {
      "epoch": 0.9725170692523954,
      "grad_norm": 0.26171875,
      "learning_rate": 4.5981512107766687e-07,
      "loss": 0.9309,
      "step": 16950
    },
    {
      "epoch": 0.9728039474439153,
      "grad_norm": 0.23828125,
      "learning_rate": 4.5027291505227443e-07,
      "loss": 0.848,
      "step": 16955
    },
    {
      "epoch": 0.9730908256354351,
      "grad_norm": 0.25,
      "learning_rate": 4.408305377164301e-07,
      "loss": 0.8948,
      "step": 16960
    },
    {
      "epoch": 0.9733777038269551,
      "grad_norm": 0.248046875,
      "learning_rate": 4.314879985389708e-07,
      "loss": 0.8556,
      "step": 16965
    },
    {
      "epoch": 0.9736645820184749,
      "grad_norm": 0.287109375,
      "learning_rate": 4.222453068886245e-07,
      "loss": 0.9207,
      "step": 16970
    },
    {
      "epoch": 0.9739514602099948,
      "grad_norm": 0.25390625,
      "learning_rate": 4.13102472033966e-07,
      "loss": 0.9779,
      "step": 16975
    },
    {
      "epoch": 0.9742383384015147,
      "grad_norm": 0.248046875,
      "learning_rate": 4.0405950314347243e-07,
      "loss": 0.9539,
      "step": 16980
    },
    {
      "epoch": 0.9745252165930346,
      "grad_norm": 0.28125,
      "learning_rate": 3.951164092854343e-07,
      "loss": 0.9088,
      "step": 16985
    },
    {
      "epoch": 0.9748120947845544,
      "grad_norm": 0.275390625,
      "learning_rate": 3.862731994280111e-07,
      "loss": 0.9245,
      "step": 16990
    },
    {
      "epoch": 0.9750989729760744,
      "grad_norm": 0.302734375,
      "learning_rate": 3.775298824391982e-07,
      "loss": 0.9384,
      "step": 16995
    },
    {
      "epoch": 0.9753858511675942,
      "grad_norm": 0.3125,
      "learning_rate": 3.688864670868153e-07,
      "loss": 0.9567,
      "step": 17000
    },
    {
      "epoch": 0.9756727293591141,
      "grad_norm": 0.28125,
      "learning_rate": 3.6034296203848463e-07,
      "loss": 0.9531,
      "step": 17005
    },
    {
      "epoch": 0.975959607550634,
      "grad_norm": 0.263671875,
      "learning_rate": 3.51899375861664e-07,
      "loss": 0.9382,
      "step": 17010
    },
    {
      "epoch": 0.9762464857421539,
      "grad_norm": 0.255859375,
      "learning_rate": 3.435557170236026e-07,
      "loss": 0.9319,
      "step": 17015
    },
    {
      "epoch": 0.9765333639336737,
      "grad_norm": 0.2490234375,
      "learning_rate": 3.3531199389132963e-07,
      "loss": 0.9408,
      "step": 17020
    },
    {
      "epoch": 0.9768202421251937,
      "grad_norm": 0.265625,
      "learning_rate": 3.271682147316879e-07,
      "loss": 0.8888,
      "step": 17025
    },
    {
      "epoch": 0.9771071203167135,
      "grad_norm": 0.27734375,
      "learning_rate": 3.1912438771125594e-07,
      "loss": 0.9604,
      "step": 17030
    },
    {
      "epoch": 0.9773939985082334,
      "grad_norm": 0.251953125,
      "learning_rate": 3.111805208964036e-07,
      "loss": 0.9139,
      "step": 17035
    },
    {
      "epoch": 0.9776808766997532,
      "grad_norm": 0.267578125,
      "learning_rate": 3.0333662225328074e-07,
      "loss": 0.979,
      "step": 17040
    },
    {
      "epoch": 0.9779677548912732,
      "grad_norm": 0.2578125,
      "learning_rate": 2.955926996477398e-07,
      "loss": 0.9427,
      "step": 17045
    },
    {
      "epoch": 0.978254633082793,
      "grad_norm": 0.271484375,
      "learning_rate": 2.8794876084541346e-07,
      "loss": 0.9177,
      "step": 17050
    },
    {
      "epoch": 0.9785415112743129,
      "grad_norm": 0.2578125,
      "learning_rate": 2.8040481351166993e-07,
      "loss": 0.9348,
      "step": 17055
    },
    {
      "epoch": 0.9788283894658328,
      "grad_norm": 0.287109375,
      "learning_rate": 2.7296086521158003e-07,
      "loss": 0.9361,
      "step": 17060
    },
    {
      "epoch": 0.9791152676573527,
      "grad_norm": 0.29296875,
      "learning_rate": 2.6561692340997255e-07,
      "loss": 0.9358,
      "step": 17065
    },
    {
      "epoch": 0.9794021458488725,
      "grad_norm": 0.28515625,
      "learning_rate": 2.583729954713454e-07,
      "loss": 0.9239,
      "step": 17070
    },
    {
      "epoch": 0.9796890240403925,
      "grad_norm": 0.287109375,
      "learning_rate": 2.512290886599433e-07,
      "loss": 0.9068,
      "step": 17075
    },
    {
      "epoch": 0.9799759022319123,
      "grad_norm": 0.267578125,
      "learning_rate": 2.441852101396802e-07,
      "loss": 0.9934,
      "step": 17080
    },
    {
      "epoch": 0.9802627804234322,
      "grad_norm": 0.255859375,
      "learning_rate": 2.3724136697418353e-07,
      "loss": 0.9389,
      "step": 17085
    },
    {
      "epoch": 0.9805496586149521,
      "grad_norm": 0.271484375,
      "learning_rate": 2.303975661267499e-07,
      "loss": 0.908,
      "step": 17090
    },
    {
      "epoch": 0.980836536806472,
      "grad_norm": 0.287109375,
      "learning_rate": 2.2365381446035617e-07,
      "loss": 0.9184,
      "step": 17095
    },
    {
      "epoch": 0.9811234149979918,
      "grad_norm": 0.275390625,
      "learning_rate": 2.170101187376594e-07,
      "loss": 1.0126,
      "step": 17100
    },
    {
      "epoch": 0.9814102931895118,
      "grad_norm": 0.26171875,
      "learning_rate": 2.104664856209637e-07,
      "loss": 0.894,
      "step": 17105
    },
    {
      "epoch": 0.9816971713810316,
      "grad_norm": 0.263671875,
      "learning_rate": 2.0402292167224225e-07,
      "loss": 0.9465,
      "step": 17110
    },
    {
      "epoch": 0.9819840495725515,
      "grad_norm": 0.29296875,
      "learning_rate": 1.976794333531151e-07,
      "loss": 0.9665,
      "step": 17115
    },
    {
      "epoch": 0.9822709277640713,
      "grad_norm": 0.26171875,
      "learning_rate": 1.9143602702484942e-07,
      "loss": 0.9468,
      "step": 17120
    },
    {
      "epoch": 0.9825578059555913,
      "grad_norm": 0.28515625,
      "learning_rate": 1.8529270894833694e-07,
      "loss": 0.9169,
      "step": 17125
    },
    {
      "epoch": 0.9828446841471111,
      "grad_norm": 0.267578125,
      "learning_rate": 1.7924948528412755e-07,
      "loss": 0.8872,
      "step": 17130
    },
    {
      "epoch": 0.983131562338631,
      "grad_norm": 0.263671875,
      "learning_rate": 1.733063620923625e-07,
      "loss": 0.9435,
      "step": 17135
    },
    {
      "epoch": 0.9834184405301509,
      "grad_norm": 0.263671875,
      "learning_rate": 1.6746334533284115e-07,
      "loss": 0.9701,
      "step": 17140
    },
    {
      "epoch": 0.9837053187216708,
      "grad_norm": 0.263671875,
      "learning_rate": 1.6172044086492088e-07,
      "loss": 0.884,
      "step": 17145
    },
    {
      "epoch": 0.9839921969131906,
      "grad_norm": 0.244140625,
      "learning_rate": 1.5607765444762834e-07,
      "loss": 0.9289,
      "step": 17150
    },
    {
      "epoch": 0.9842790751047106,
      "grad_norm": 0.275390625,
      "learning_rate": 1.5053499173955933e-07,
      "loss": 0.9762,
      "step": 17155
    },
    {
      "epoch": 0.9845659532962304,
      "grad_norm": 0.275390625,
      "learning_rate": 1.4509245829888996e-07,
      "loss": 0.96,
      "step": 17160
    },
    {
      "epoch": 0.9848528314877503,
      "grad_norm": 0.265625,
      "learning_rate": 1.3975005958341003e-07,
      "loss": 0.9242,
      "step": 17165
    },
    {
      "epoch": 0.9851397096792702,
      "grad_norm": 0.275390625,
      "learning_rate": 1.3450780095051186e-07,
      "loss": 0.8535,
      "step": 17170
    },
    {
      "epoch": 0.9854265878707901,
      "grad_norm": 0.271484375,
      "learning_rate": 1.2936568765711254e-07,
      "loss": 0.8634,
      "step": 17175
    },
    {
      "epoch": 0.9857134660623099,
      "grad_norm": 0.2431640625,
      "learning_rate": 1.2432372485975395e-07,
      "loss": 0.9243,
      "step": 17180
    },
    {
      "epoch": 0.9860003442538299,
      "grad_norm": 0.28515625,
      "learning_rate": 1.193819176145361e-07,
      "loss": 0.9684,
      "step": 17185
    },
    {
      "epoch": 0.9862872224453497,
      "grad_norm": 0.24609375,
      "learning_rate": 1.1454027087708375e-07,
      "loss": 0.836,
      "step": 17190
    },
    {
      "epoch": 0.9865741006368696,
      "grad_norm": 0.265625,
      "learning_rate": 1.0979878950263534e-07,
      "loss": 0.9529,
      "step": 17195
    },
    {
      "epoch": 0.9868609788283894,
      "grad_norm": 0.2734375,
      "learning_rate": 1.0515747824595413e-07,
      "loss": 1.0052,
      "step": 17200
    },
    {
      "epoch": 0.9871478570199094,
      "grad_norm": 0.314453125,
      "learning_rate": 1.0061634176136148e-07,
      "loss": 0.9857,
      "step": 17205
    },
    {
      "epoch": 0.9874347352114292,
      "grad_norm": 0.2470703125,
      "learning_rate": 9.617538460270358e-08,
      "loss": 0.9079,
      "step": 17210
    },
    {
      "epoch": 0.9877216134029491,
      "grad_norm": 0.263671875,
      "learning_rate": 9.183461122339587e-08,
      "loss": 0.8843,
      "step": 17215
    },
    {
      "epoch": 0.988008491594469,
      "grad_norm": 0.27734375,
      "learning_rate": 8.759402597637855e-08,
      "loss": 0.9595,
      "step": 17220
    },
    {
      "epoch": 0.9882953697859889,
      "grad_norm": 0.265625,
      "learning_rate": 8.345363311410559e-08,
      "loss": 1.0177,
      "step": 17225
    },
    {
      "epoch": 0.9885822479775087,
      "grad_norm": 0.2734375,
      "learning_rate": 7.941343678857794e-08,
      "loss": 0.9305,
      "step": 17230
    },
    {
      "epoch": 0.9888691261690287,
      "grad_norm": 0.26953125,
      "learning_rate": 7.547344105132137e-08,
      "loss": 0.9569,
      "step": 17235
    },
    {
      "epoch": 0.9891560043605485,
      "grad_norm": 0.26953125,
      "learning_rate": 7.16336498533643e-08,
      "loss": 0.9185,
      "step": 17240
    },
    {
      "epoch": 0.9894428825520684,
      "grad_norm": 0.2734375,
      "learning_rate": 6.789406704527102e-08,
      "loss": 0.9569,
      "step": 17245
    },
    {
      "epoch": 0.9897297607435883,
      "grad_norm": 0.2578125,
      "learning_rate": 6.425469637708625e-08,
      "loss": 0.9679,
      "step": 17250
    },
    {
      "epoch": 0.9900166389351082,
      "grad_norm": 0.271484375,
      "learning_rate": 6.071554149837955e-08,
      "loss": 0.9106,
      "step": 17255
    },
    {
      "epoch": 0.990303517126628,
      "grad_norm": 0.271484375,
      "learning_rate": 5.727660595823414e-08,
      "loss": 0.947,
      "step": 17260
    },
    {
      "epoch": 0.990590395318148,
      "grad_norm": 0.259765625,
      "learning_rate": 5.39378932052248e-08,
      "loss": 0.9619,
      "step": 17265
    },
    {
      "epoch": 0.9908772735096678,
      "grad_norm": 0.267578125,
      "learning_rate": 5.069940658740668e-08,
      "loss": 0.953,
      "step": 17270
    },
    {
      "epoch": 0.9911641517011877,
      "grad_norm": 0.26171875,
      "learning_rate": 4.7561149352348675e-08,
      "loss": 0.9723,
      "step": 17275
    },
    {
      "epoch": 0.9914510298927075,
      "grad_norm": 0.265625,
      "learning_rate": 4.4523124647100065e-08,
      "loss": 0.988,
      "step": 17280
    },
    {
      "epoch": 0.9917379080842275,
      "grad_norm": 0.26171875,
      "learning_rate": 4.158533551820165e-08,
      "loss": 0.9843,
      "step": 17285
    },
    {
      "epoch": 0.9920247862757473,
      "grad_norm": 0.26171875,
      "learning_rate": 3.874778491167463e-08,
      "loss": 0.9379,
      "step": 17290
    },
    {
      "epoch": 0.9923116644672672,
      "grad_norm": 0.259765625,
      "learning_rate": 3.6010475673009524e-08,
      "loss": 0.9815,
      "step": 17295
    },
    {
      "epoch": 0.9925985426587871,
      "grad_norm": 0.25390625,
      "learning_rate": 3.337341054721055e-08,
      "loss": 0.9384,
      "step": 17300
    },
    {
      "epoch": 0.992885420850307,
      "grad_norm": 0.265625,
      "learning_rate": 3.0836592178717926e-08,
      "loss": 0.9017,
      "step": 17305
    },
    {
      "epoch": 0.9931722990418268,
      "grad_norm": 0.2490234375,
      "learning_rate": 2.840002311145229e-08,
      "loss": 0.8892,
      "step": 17310
    },
    {
      "epoch": 0.9934591772333468,
      "grad_norm": 0.287109375,
      "learning_rate": 2.6063705788825776e-08,
      "loss": 0.9695,
      "step": 17315
    },
    {
      "epoch": 0.9937460554248666,
      "grad_norm": 0.267578125,
      "learning_rate": 2.3827642553686523e-08,
      "loss": 0.9772,
      "step": 17320
    },
    {
      "epoch": 0.9940329336163864,
      "grad_norm": 0.267578125,
      "learning_rate": 2.169183564837418e-08,
      "loss": 0.9066,
      "step": 17325
    },
    {
      "epoch": 0.9943198118079064,
      "grad_norm": 0.26171875,
      "learning_rate": 1.9656287214686598e-08,
      "loss": 0.8939,
      "step": 17330
    },
    {
      "epoch": 0.9946066899994263,
      "grad_norm": 0.2734375,
      "learning_rate": 1.772099929385762e-08,
      "loss": 0.9562,
      "step": 17335
    },
    {
      "epoch": 0.9948935681909461,
      "grad_norm": 0.265625,
      "learning_rate": 1.588597382661261e-08,
      "loss": 1.017,
      "step": 17340
    },
    {
      "epoch": 0.995180446382466,
      "grad_norm": 0.265625,
      "learning_rate": 1.4151212653112922e-08,
      "loss": 0.9434,
      "step": 17345
    },
    {
      "epoch": 0.9954673245739859,
      "grad_norm": 0.291015625,
      "learning_rate": 1.2516717512989219e-08,
      "loss": 0.9375,
      "step": 17350
    },
    {
      "epoch": 0.9957542027655057,
      "grad_norm": 0.267578125,
      "learning_rate": 1.0982490045308157e-08,
      "loss": 0.9094,
      "step": 17355
    },
    {
      "epoch": 0.9960410809570256,
      "grad_norm": 0.265625,
      "learning_rate": 9.548531788605707e-09,
      "loss": 0.9988,
      "step": 17360
    },
    {
      "epoch": 0.9963279591485455,
      "grad_norm": 0.271484375,
      "learning_rate": 8.21484418084273e-09,
      "loss": 0.9424,
      "step": 17365
    },
    {
      "epoch": 0.9966148373400654,
      "grad_norm": 0.25,
      "learning_rate": 6.98142855946049e-09,
      "loss": 0.9161,
      "step": 17370
    },
    {
      "epoch": 0.9969017155315852,
      "grad_norm": 0.287109375,
      "learning_rate": 5.848286161314054e-09,
      "loss": 1.0036,
      "step": 17375
    },
    {
      "epoch": 0.9971885937231052,
      "grad_norm": 0.287109375,
      "learning_rate": 4.81541812273889e-09,
      "loss": 0.982,
      "step": 17380
    },
    {
      "epoch": 0.997475471914625,
      "grad_norm": 0.265625,
      "learning_rate": 3.882825479495367e-09,
      "loss": 0.9417,
      "step": 17385
    },
    {
      "epoch": 0.9977623501061449,
      "grad_norm": 0.2578125,
      "learning_rate": 3.050509166779847e-09,
      "loss": 0.9789,
      "step": 17390
    },
    {
      "epoch": 0.9980492282976648,
      "grad_norm": 0.314453125,
      "learning_rate": 2.3184700192357966e-09,
      "loss": 0.9634,
      "step": 17395
    },
    {
      "epoch": 0.9983361064891847,
      "grad_norm": 0.259765625,
      "learning_rate": 1.6867087709759866e-09,
      "loss": 0.9081,
      "step": 17400
    },
    {
      "epoch": 0.9986229846807045,
      "grad_norm": 0.259765625,
      "learning_rate": 1.1552260555047767e-09,
      "loss": 0.9128,
      "step": 17405
    },
    {
      "epoch": 0.9989098628722245,
      "grad_norm": 0.275390625,
      "learning_rate": 7.240224058180367e-10,
      "loss": 0.9465,
      "step": 17410
    },
    {
      "epoch": 0.9991967410637443,
      "grad_norm": 0.267578125,
      "learning_rate": 3.93098254314328e-10,
      "loss": 0.9759,
      "step": 17415
    },
    {
      "epoch": 0.9994836192552642,
      "grad_norm": 0.26953125,
      "learning_rate": 1.624539328615171e-10,
      "loss": 0.9412,
      "step": 17420
    },
    {
      "epoch": 0.9997704974467841,
      "grad_norm": 0.2890625,
      "learning_rate": 3.208967271906005e-11,
      "loss": 0.9467,
      "step": 17425
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.9523706436157227,
      "eval_runtime": 525.7052,
      "eval_samples_per_second": 29.353,
      "eval_steps_per_second": 0.46,
      "step": 17429
    },
    {
      "epoch": 1.0,
      "step": 17429,
      "total_flos": 1.2254844500131709e+19,
      "train_loss": 0.9352519262663926,
      "train_runtime": 32149.2413,
      "train_samples_per_second": 4.337,
      "train_steps_per_second": 0.542
    }
  ],
  "logging_steps": 5,
  "max_steps": 17429,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.2254844500131709e+19,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}