{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 17429, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.7375638303976134e-05, "grad_norm": 0.435546875, "learning_rate": 1.1474469305794606e-07, "loss": 1.3588, "step": 1 }, { "epoch": 0.00028687819151988064, "grad_norm": 0.59375, "learning_rate": 5.737234652897304e-07, "loss": 1.0991, "step": 5 }, { "epoch": 0.0005737563830397613, "grad_norm": 0.349609375, "learning_rate": 1.1474469305794607e-06, "loss": 1.1268, "step": 10 }, { "epoch": 0.0008606345745596419, "grad_norm": 0.421875, "learning_rate": 1.721170395869191e-06, "loss": 1.1761, "step": 15 }, { "epoch": 0.0011475127660795226, "grad_norm": 0.33984375, "learning_rate": 2.2948938611589215e-06, "loss": 1.1986, "step": 20 }, { "epoch": 0.0014343909575994032, "grad_norm": 0.359375, "learning_rate": 2.868617326448652e-06, "loss": 1.1268, "step": 25 }, { "epoch": 0.0017212691491192839, "grad_norm": 0.296875, "learning_rate": 3.442340791738382e-06, "loss": 1.0619, "step": 30 }, { "epoch": 0.0020081473406391645, "grad_norm": 0.337890625, "learning_rate": 4.016064257028113e-06, "loss": 1.1175, "step": 35 }, { "epoch": 0.002295025532159045, "grad_norm": 0.328125, "learning_rate": 4.589787722317843e-06, "loss": 1.1421, "step": 40 }, { "epoch": 0.002581903723678926, "grad_norm": 0.32421875, "learning_rate": 5.163511187607573e-06, "loss": 1.1015, "step": 45 }, { "epoch": 0.0028687819151988064, "grad_norm": 0.33984375, "learning_rate": 5.737234652897304e-06, "loss": 1.138, "step": 50 }, { "epoch": 0.003155660106718687, "grad_norm": 0.345703125, "learning_rate": 6.310958118187034e-06, "loss": 1.1281, "step": 55 }, { "epoch": 0.0034425382982385677, "grad_norm": 0.318359375, "learning_rate": 6.884681583476764e-06, "loss": 1.1071, "step": 60 }, { "epoch": 0.0037294164897584484, "grad_norm": 0.322265625, "learning_rate": 7.4584050487664955e-06, "loss": 1.1443, "step": 65 }, { "epoch": 0.004016294681278329, "grad_norm": 0.326171875, "learning_rate": 8.032128514056226e-06, "loss": 1.1746, "step": 70 }, { "epoch": 0.00430317287279821, "grad_norm": 0.291015625, "learning_rate": 8.605851979345956e-06, "loss": 1.0971, "step": 75 }, { "epoch": 0.00459005106431809, "grad_norm": 0.314453125, "learning_rate": 9.179575444635686e-06, "loss": 1.1356, "step": 80 }, { "epoch": 0.004876929255837971, "grad_norm": 0.28125, "learning_rate": 9.753298909925416e-06, "loss": 1.0908, "step": 85 }, { "epoch": 0.005163807447357852, "grad_norm": 0.30078125, "learning_rate": 1.0327022375215146e-05, "loss": 1.0388, "step": 90 }, { "epoch": 0.005450685638877732, "grad_norm": 0.296875, "learning_rate": 1.0900745840504876e-05, "loss": 1.1107, "step": 95 }, { "epoch": 0.005737563830397613, "grad_norm": 0.294921875, "learning_rate": 1.1474469305794608e-05, "loss": 1.0713, "step": 100 }, { "epoch": 0.0060244420219174935, "grad_norm": 0.37890625, "learning_rate": 1.2048192771084338e-05, "loss": 1.1581, "step": 105 }, { "epoch": 0.006311320213437374, "grad_norm": 0.376953125, "learning_rate": 1.2621916236374069e-05, "loss": 1.0059, "step": 110 }, { "epoch": 0.006598198404957255, "grad_norm": 0.255859375, "learning_rate": 1.3195639701663797e-05, "loss": 0.9732, "step": 115 }, { "epoch": 0.0068850765964771354, "grad_norm": 0.2890625, "learning_rate": 1.3769363166953527e-05, "loss": 0.9736, "step": 120 }, { "epoch": 0.007171954787997016, "grad_norm": 0.2734375, "learning_rate": 1.434308663224326e-05, "loss": 1.044, "step": 125 }, { "epoch": 0.007458832979516897, "grad_norm": 0.314453125, "learning_rate": 1.4916810097532991e-05, "loss": 1.0218, "step": 130 }, { "epoch": 0.007745711171036777, "grad_norm": 0.236328125, "learning_rate": 1.549053356282272e-05, "loss": 0.9904, "step": 135 }, { "epoch": 0.008032589362556658, "grad_norm": 0.2490234375, "learning_rate": 1.606425702811245e-05, "loss": 0.9875, "step": 140 }, { "epoch": 0.008319467554076539, "grad_norm": 0.25390625, "learning_rate": 1.663798049340218e-05, "loss": 0.9874, "step": 145 }, { "epoch": 0.00860634574559642, "grad_norm": 0.29296875, "learning_rate": 1.721170395869191e-05, "loss": 0.9866, "step": 150 }, { "epoch": 0.0088932239371163, "grad_norm": 0.26171875, "learning_rate": 1.7785427423981642e-05, "loss": 0.9857, "step": 155 }, { "epoch": 0.00918010212863618, "grad_norm": 0.38671875, "learning_rate": 1.8359150889271372e-05, "loss": 1.0017, "step": 160 }, { "epoch": 0.009466980320156061, "grad_norm": 0.287109375, "learning_rate": 1.8932874354561102e-05, "loss": 1.025, "step": 165 }, { "epoch": 0.009753858511675942, "grad_norm": 0.28515625, "learning_rate": 1.9506597819850832e-05, "loss": 1.015, "step": 170 }, { "epoch": 0.010040736703195823, "grad_norm": 0.27734375, "learning_rate": 2.0080321285140562e-05, "loss": 0.9669, "step": 175 }, { "epoch": 0.010327614894715703, "grad_norm": 0.2451171875, "learning_rate": 2.0654044750430293e-05, "loss": 1.035, "step": 180 }, { "epoch": 0.010614493086235584, "grad_norm": 0.2578125, "learning_rate": 2.1227768215720023e-05, "loss": 1.0398, "step": 185 }, { "epoch": 0.010901371277755464, "grad_norm": 0.271484375, "learning_rate": 2.1801491681009753e-05, "loss": 1.05, "step": 190 }, { "epoch": 0.011188249469275345, "grad_norm": 0.267578125, "learning_rate": 2.2375215146299486e-05, "loss": 0.9228, "step": 195 }, { "epoch": 0.011475127660795226, "grad_norm": 0.263671875, "learning_rate": 2.2948938611589217e-05, "loss": 1.0787, "step": 200 }, { "epoch": 0.011762005852315106, "grad_norm": 0.314453125, "learning_rate": 2.3522662076878947e-05, "loss": 1.0928, "step": 205 }, { "epoch": 0.012048884043834987, "grad_norm": 0.28515625, "learning_rate": 2.4096385542168677e-05, "loss": 1.0675, "step": 210 }, { "epoch": 0.012335762235354868, "grad_norm": 0.2734375, "learning_rate": 2.4670109007458407e-05, "loss": 1.02, "step": 215 }, { "epoch": 0.012622640426874748, "grad_norm": 0.291015625, "learning_rate": 2.5243832472748137e-05, "loss": 0.9736, "step": 220 }, { "epoch": 0.012909518618394629, "grad_norm": 0.271484375, "learning_rate": 2.5817555938037867e-05, "loss": 1.0255, "step": 225 }, { "epoch": 0.01319639680991451, "grad_norm": 0.33203125, "learning_rate": 2.6391279403327594e-05, "loss": 1.1085, "step": 230 }, { "epoch": 0.01348327500143439, "grad_norm": 0.2734375, "learning_rate": 2.6965002868617328e-05, "loss": 1.0057, "step": 235 }, { "epoch": 0.013770153192954271, "grad_norm": 0.291015625, "learning_rate": 2.7538726333907055e-05, "loss": 1.0452, "step": 240 }, { "epoch": 0.014057031384474152, "grad_norm": 0.2890625, "learning_rate": 2.8112449799196788e-05, "loss": 1.0544, "step": 245 }, { "epoch": 0.014343909575994032, "grad_norm": 0.3046875, "learning_rate": 2.868617326448652e-05, "loss": 1.0181, "step": 250 }, { "epoch": 0.014630787767513913, "grad_norm": 0.28125, "learning_rate": 2.925989672977625e-05, "loss": 0.9926, "step": 255 }, { "epoch": 0.014917665959033793, "grad_norm": 0.34375, "learning_rate": 2.9833620195065982e-05, "loss": 0.9662, "step": 260 }, { "epoch": 0.015204544150553674, "grad_norm": 0.30078125, "learning_rate": 3.040734366035571e-05, "loss": 1.0339, "step": 265 }, { "epoch": 0.015491422342073555, "grad_norm": 0.283203125, "learning_rate": 3.098106712564544e-05, "loss": 0.9458, "step": 270 }, { "epoch": 0.015778300533593435, "grad_norm": 0.287109375, "learning_rate": 3.155479059093517e-05, "loss": 0.96, "step": 275 }, { "epoch": 0.016065178725113316, "grad_norm": 0.3125, "learning_rate": 3.21285140562249e-05, "loss": 0.9781, "step": 280 }, { "epoch": 0.016352056916633197, "grad_norm": 0.291015625, "learning_rate": 3.2702237521514636e-05, "loss": 1.0201, "step": 285 }, { "epoch": 0.016638935108153077, "grad_norm": 0.306640625, "learning_rate": 3.327596098680436e-05, "loss": 1.0515, "step": 290 }, { "epoch": 0.016925813299672958, "grad_norm": 0.3125, "learning_rate": 3.3849684452094096e-05, "loss": 1.0647, "step": 295 }, { "epoch": 0.01721269149119284, "grad_norm": 0.30078125, "learning_rate": 3.442340791738382e-05, "loss": 0.9632, "step": 300 }, { "epoch": 0.01749956968271272, "grad_norm": 0.29296875, "learning_rate": 3.499713138267356e-05, "loss": 0.9992, "step": 305 }, { "epoch": 0.0177864478742326, "grad_norm": 0.287109375, "learning_rate": 3.5570854847963284e-05, "loss": 0.9193, "step": 310 }, { "epoch": 0.01807332606575248, "grad_norm": 0.328125, "learning_rate": 3.614457831325301e-05, "loss": 1.0958, "step": 315 }, { "epoch": 0.01836020425727236, "grad_norm": 0.3046875, "learning_rate": 3.6718301778542744e-05, "loss": 0.9924, "step": 320 }, { "epoch": 0.018647082448792242, "grad_norm": 0.3046875, "learning_rate": 3.729202524383247e-05, "loss": 1.0167, "step": 325 }, { "epoch": 0.018933960640312122, "grad_norm": 0.314453125, "learning_rate": 3.7865748709122204e-05, "loss": 1.0477, "step": 330 }, { "epoch": 0.019220838831832003, "grad_norm": 0.30859375, "learning_rate": 3.843947217441193e-05, "loss": 1.0352, "step": 335 }, { "epoch": 0.019507717023351884, "grad_norm": 0.296875, "learning_rate": 3.9013195639701665e-05, "loss": 1.0158, "step": 340 }, { "epoch": 0.019794595214871764, "grad_norm": 0.279296875, "learning_rate": 3.958691910499139e-05, "loss": 1.0116, "step": 345 }, { "epoch": 0.020081473406391645, "grad_norm": 0.306640625, "learning_rate": 4.0160642570281125e-05, "loss": 1.0194, "step": 350 }, { "epoch": 0.020368351597911526, "grad_norm": 0.298828125, "learning_rate": 4.073436603557086e-05, "loss": 1.0497, "step": 355 }, { "epoch": 0.020655229789431406, "grad_norm": 0.306640625, "learning_rate": 4.1308089500860585e-05, "loss": 1.0213, "step": 360 }, { "epoch": 0.020942107980951287, "grad_norm": 0.2890625, "learning_rate": 4.188181296615032e-05, "loss": 1.0122, "step": 365 }, { "epoch": 0.021228986172471168, "grad_norm": 0.296875, "learning_rate": 4.2455536431440046e-05, "loss": 0.9192, "step": 370 }, { "epoch": 0.021515864363991048, "grad_norm": 0.287109375, "learning_rate": 4.302925989672978e-05, "loss": 0.9714, "step": 375 }, { "epoch": 0.02180274255551093, "grad_norm": 0.287109375, "learning_rate": 4.3602983362019506e-05, "loss": 0.9926, "step": 380 }, { "epoch": 0.02208962074703081, "grad_norm": 0.345703125, "learning_rate": 4.417670682730924e-05, "loss": 0.9874, "step": 385 }, { "epoch": 0.02237649893855069, "grad_norm": 0.310546875, "learning_rate": 4.475043029259897e-05, "loss": 0.9786, "step": 390 }, { "epoch": 0.02266337713007057, "grad_norm": 0.28515625, "learning_rate": 4.53241537578887e-05, "loss": 1.0112, "step": 395 }, { "epoch": 0.02295025532159045, "grad_norm": 0.27734375, "learning_rate": 4.589787722317843e-05, "loss": 0.9551, "step": 400 }, { "epoch": 0.023237133513110332, "grad_norm": 0.28125, "learning_rate": 4.647160068846816e-05, "loss": 0.9801, "step": 405 }, { "epoch": 0.023524011704630213, "grad_norm": 0.28515625, "learning_rate": 4.7045324153757894e-05, "loss": 0.9705, "step": 410 }, { "epoch": 0.023810889896150093, "grad_norm": 0.283203125, "learning_rate": 4.761904761904762e-05, "loss": 1.0404, "step": 415 }, { "epoch": 0.024097768087669974, "grad_norm": 0.296875, "learning_rate": 4.8192771084337354e-05, "loss": 0.9633, "step": 420 }, { "epoch": 0.024384646279189855, "grad_norm": 0.29296875, "learning_rate": 4.876649454962709e-05, "loss": 1.0002, "step": 425 }, { "epoch": 0.024671524470709735, "grad_norm": 0.3125, "learning_rate": 4.9340218014916814e-05, "loss": 1.0285, "step": 430 }, { "epoch": 0.024958402662229616, "grad_norm": 0.2734375, "learning_rate": 4.991394148020654e-05, "loss": 1.0558, "step": 435 }, { "epoch": 0.025245280853749497, "grad_norm": 0.26171875, "learning_rate": 5.0487664945496275e-05, "loss": 0.9442, "step": 440 }, { "epoch": 0.025532159045269377, "grad_norm": 0.2578125, "learning_rate": 5.1061388410786e-05, "loss": 1.0159, "step": 445 }, { "epoch": 0.025819037236789258, "grad_norm": 0.275390625, "learning_rate": 5.1635111876075735e-05, "loss": 0.9705, "step": 450 }, { "epoch": 0.02610591542830914, "grad_norm": 0.279296875, "learning_rate": 5.220883534136547e-05, "loss": 0.9864, "step": 455 }, { "epoch": 0.02639279361982902, "grad_norm": 0.291015625, "learning_rate": 5.278255880665519e-05, "loss": 1.0527, "step": 460 }, { "epoch": 0.0266796718113489, "grad_norm": 0.275390625, "learning_rate": 5.335628227194492e-05, "loss": 0.9403, "step": 465 }, { "epoch": 0.02696655000286878, "grad_norm": 0.26953125, "learning_rate": 5.3930005737234656e-05, "loss": 1.0662, "step": 470 }, { "epoch": 0.02725342819438866, "grad_norm": 0.291015625, "learning_rate": 5.450372920252439e-05, "loss": 0.9985, "step": 475 }, { "epoch": 0.027540306385908542, "grad_norm": 0.27734375, "learning_rate": 5.507745266781411e-05, "loss": 0.9654, "step": 480 }, { "epoch": 0.027827184577428422, "grad_norm": 0.310546875, "learning_rate": 5.565117613310384e-05, "loss": 1.0284, "step": 485 }, { "epoch": 0.028114062768948303, "grad_norm": 0.296875, "learning_rate": 5.6224899598393576e-05, "loss": 0.9302, "step": 490 }, { "epoch": 0.028400940960468184, "grad_norm": 0.322265625, "learning_rate": 5.679862306368331e-05, "loss": 0.9556, "step": 495 }, { "epoch": 0.028687819151988064, "grad_norm": 0.291015625, "learning_rate": 5.737234652897304e-05, "loss": 1.0002, "step": 500 }, { "epoch": 0.028974697343507945, "grad_norm": 0.306640625, "learning_rate": 5.794606999426276e-05, "loss": 0.9644, "step": 505 }, { "epoch": 0.029261575535027826, "grad_norm": 0.345703125, "learning_rate": 5.85197934595525e-05, "loss": 0.9905, "step": 510 }, { "epoch": 0.029548453726547706, "grad_norm": 0.291015625, "learning_rate": 5.909351692484223e-05, "loss": 1.0985, "step": 515 }, { "epoch": 0.029835331918067587, "grad_norm": 0.287109375, "learning_rate": 5.9667240390131964e-05, "loss": 1.0632, "step": 520 }, { "epoch": 0.030122210109587468, "grad_norm": 0.267578125, "learning_rate": 6.02409638554217e-05, "loss": 1.0096, "step": 525 }, { "epoch": 0.030409088301107348, "grad_norm": 0.291015625, "learning_rate": 6.081468732071142e-05, "loss": 0.9602, "step": 530 }, { "epoch": 0.03069596649262723, "grad_norm": 0.263671875, "learning_rate": 6.138841078600115e-05, "loss": 0.9651, "step": 535 }, { "epoch": 0.03098284468414711, "grad_norm": 0.26171875, "learning_rate": 6.196213425129088e-05, "loss": 1.0011, "step": 540 }, { "epoch": 0.03126972287566699, "grad_norm": 0.26953125, "learning_rate": 6.253585771658062e-05, "loss": 0.9452, "step": 545 }, { "epoch": 0.03155660106718687, "grad_norm": 0.2890625, "learning_rate": 6.310958118187034e-05, "loss": 1.0467, "step": 550 }, { "epoch": 0.03184347925870675, "grad_norm": 0.265625, "learning_rate": 6.368330464716007e-05, "loss": 0.9509, "step": 555 }, { "epoch": 0.03213035745022663, "grad_norm": 0.291015625, "learning_rate": 6.42570281124498e-05, "loss": 1.0041, "step": 560 }, { "epoch": 0.03241723564174651, "grad_norm": 0.283203125, "learning_rate": 6.483075157773954e-05, "loss": 0.9926, "step": 565 }, { "epoch": 0.03270411383326639, "grad_norm": 0.25390625, "learning_rate": 6.540447504302927e-05, "loss": 1.03, "step": 570 }, { "epoch": 0.032990992024786274, "grad_norm": 0.275390625, "learning_rate": 6.597819850831899e-05, "loss": 1.0156, "step": 575 }, { "epoch": 0.033277870216306155, "grad_norm": 0.30078125, "learning_rate": 6.655192197360873e-05, "loss": 0.9382, "step": 580 }, { "epoch": 0.033564748407826035, "grad_norm": 0.2578125, "learning_rate": 6.712564543889846e-05, "loss": 1.0226, "step": 585 }, { "epoch": 0.033851626599345916, "grad_norm": 0.275390625, "learning_rate": 6.769936890418819e-05, "loss": 1.0166, "step": 590 }, { "epoch": 0.0341385047908658, "grad_norm": 0.271484375, "learning_rate": 6.827309236947793e-05, "loss": 1.0474, "step": 595 }, { "epoch": 0.03442538298238568, "grad_norm": 0.271484375, "learning_rate": 6.884681583476765e-05, "loss": 1.0629, "step": 600 }, { "epoch": 0.03471226117390556, "grad_norm": 0.26953125, "learning_rate": 6.942053930005738e-05, "loss": 0.9475, "step": 605 }, { "epoch": 0.03499913936542544, "grad_norm": 0.2734375, "learning_rate": 6.999426276534711e-05, "loss": 0.9764, "step": 610 }, { "epoch": 0.03528601755694532, "grad_norm": 0.25, "learning_rate": 7.056798623063683e-05, "loss": 0.9431, "step": 615 }, { "epoch": 0.0355728957484652, "grad_norm": 0.287109375, "learning_rate": 7.114170969592657e-05, "loss": 1.0624, "step": 620 }, { "epoch": 0.03585977393998508, "grad_norm": 0.255859375, "learning_rate": 7.17154331612163e-05, "loss": 1.0391, "step": 625 }, { "epoch": 0.03614665213150496, "grad_norm": 0.27734375, "learning_rate": 7.228915662650602e-05, "loss": 0.9731, "step": 630 }, { "epoch": 0.03643353032302484, "grad_norm": 0.26171875, "learning_rate": 7.286288009179575e-05, "loss": 0.9593, "step": 635 }, { "epoch": 0.03672040851454472, "grad_norm": 0.26953125, "learning_rate": 7.343660355708549e-05, "loss": 0.9843, "step": 640 }, { "epoch": 0.0370072867060646, "grad_norm": 0.26171875, "learning_rate": 7.401032702237521e-05, "loss": 0.977, "step": 645 }, { "epoch": 0.037294164897584484, "grad_norm": 0.26171875, "learning_rate": 7.458405048766494e-05, "loss": 0.9185, "step": 650 }, { "epoch": 0.037581043089104364, "grad_norm": 0.2578125, "learning_rate": 7.515777395295467e-05, "loss": 1.0353, "step": 655 }, { "epoch": 0.037867921280624245, "grad_norm": 0.259765625, "learning_rate": 7.573149741824441e-05, "loss": 0.9667, "step": 660 }, { "epoch": 0.038154799472144126, "grad_norm": 0.267578125, "learning_rate": 7.630522088353414e-05, "loss": 0.9865, "step": 665 }, { "epoch": 0.038441677663664006, "grad_norm": 0.25, "learning_rate": 7.687894434882386e-05, "loss": 0.8748, "step": 670 }, { "epoch": 0.03872855585518389, "grad_norm": 0.26171875, "learning_rate": 7.74526678141136e-05, "loss": 1.0654, "step": 675 }, { "epoch": 0.03901543404670377, "grad_norm": 0.255859375, "learning_rate": 7.802639127940333e-05, "loss": 1.0622, "step": 680 }, { "epoch": 0.03930231223822365, "grad_norm": 0.259765625, "learning_rate": 7.860011474469306e-05, "loss": 0.9676, "step": 685 }, { "epoch": 0.03958919042974353, "grad_norm": 0.275390625, "learning_rate": 7.917383820998278e-05, "loss": 0.9626, "step": 690 }, { "epoch": 0.03987606862126341, "grad_norm": 0.259765625, "learning_rate": 7.974756167527252e-05, "loss": 0.9586, "step": 695 }, { "epoch": 0.04016294681278329, "grad_norm": 0.267578125, "learning_rate": 8.032128514056225e-05, "loss": 1.0465, "step": 700 }, { "epoch": 0.04044982500430317, "grad_norm": 0.2470703125, "learning_rate": 8.089500860585198e-05, "loss": 0.9695, "step": 705 }, { "epoch": 0.04073670319582305, "grad_norm": 0.2578125, "learning_rate": 8.146873207114172e-05, "loss": 0.9291, "step": 710 }, { "epoch": 0.04102358138734293, "grad_norm": 0.26171875, "learning_rate": 8.204245553643144e-05, "loss": 1.0255, "step": 715 }, { "epoch": 0.04131045957886281, "grad_norm": 0.25, "learning_rate": 8.261617900172117e-05, "loss": 0.9625, "step": 720 }, { "epoch": 0.04159733777038269, "grad_norm": 0.26171875, "learning_rate": 8.31899024670109e-05, "loss": 0.9827, "step": 725 }, { "epoch": 0.041884215961902574, "grad_norm": 0.236328125, "learning_rate": 8.376362593230064e-05, "loss": 1.0375, "step": 730 }, { "epoch": 0.042171094153422455, "grad_norm": 0.279296875, "learning_rate": 8.433734939759037e-05, "loss": 1.0185, "step": 735 }, { "epoch": 0.042457972344942335, "grad_norm": 0.2470703125, "learning_rate": 8.491107286288009e-05, "loss": 0.9531, "step": 740 }, { "epoch": 0.042744850536462216, "grad_norm": 0.25, "learning_rate": 8.548479632816982e-05, "loss": 1.0196, "step": 745 }, { "epoch": 0.043031728727982096, "grad_norm": 0.2421875, "learning_rate": 8.605851979345956e-05, "loss": 0.9729, "step": 750 }, { "epoch": 0.04331860691950198, "grad_norm": 0.251953125, "learning_rate": 8.663224325874929e-05, "loss": 0.9538, "step": 755 }, { "epoch": 0.04360548511102186, "grad_norm": 0.2490234375, "learning_rate": 8.720596672403901e-05, "loss": 0.9903, "step": 760 }, { "epoch": 0.04389236330254174, "grad_norm": 0.2490234375, "learning_rate": 8.777969018932875e-05, "loss": 0.9379, "step": 765 }, { "epoch": 0.04417924149406162, "grad_norm": 0.255859375, "learning_rate": 8.835341365461848e-05, "loss": 1.0388, "step": 770 }, { "epoch": 0.0444661196855815, "grad_norm": 0.23828125, "learning_rate": 8.892713711990821e-05, "loss": 0.9352, "step": 775 }, { "epoch": 0.04475299787710138, "grad_norm": 0.259765625, "learning_rate": 8.950086058519795e-05, "loss": 1.0, "step": 780 }, { "epoch": 0.04503987606862126, "grad_norm": 0.2470703125, "learning_rate": 9.007458405048767e-05, "loss": 0.9976, "step": 785 }, { "epoch": 0.04532675426014114, "grad_norm": 0.2451171875, "learning_rate": 9.06483075157774e-05, "loss": 0.9689, "step": 790 }, { "epoch": 0.04561363245166102, "grad_norm": 0.248046875, "learning_rate": 9.122203098106713e-05, "loss": 1.0199, "step": 795 }, { "epoch": 0.0459005106431809, "grad_norm": 0.24609375, "learning_rate": 9.179575444635687e-05, "loss": 0.9226, "step": 800 }, { "epoch": 0.046187388834700784, "grad_norm": 0.248046875, "learning_rate": 9.23694779116466e-05, "loss": 0.9483, "step": 805 }, { "epoch": 0.046474267026220664, "grad_norm": 0.2470703125, "learning_rate": 9.294320137693632e-05, "loss": 0.9711, "step": 810 }, { "epoch": 0.046761145217740545, "grad_norm": 0.248046875, "learning_rate": 9.351692484222605e-05, "loss": 0.9429, "step": 815 }, { "epoch": 0.047048023409260425, "grad_norm": 0.240234375, "learning_rate": 9.409064830751579e-05, "loss": 0.9849, "step": 820 }, { "epoch": 0.047334901600780306, "grad_norm": 0.234375, "learning_rate": 9.466437177280552e-05, "loss": 1.0336, "step": 825 }, { "epoch": 0.04762177979230019, "grad_norm": 0.24609375, "learning_rate": 9.523809523809524e-05, "loss": 1.0258, "step": 830 }, { "epoch": 0.04790865798382007, "grad_norm": 0.24609375, "learning_rate": 9.581181870338497e-05, "loss": 0.9852, "step": 835 }, { "epoch": 0.04819553617533995, "grad_norm": 0.232421875, "learning_rate": 9.638554216867471e-05, "loss": 0.9533, "step": 840 }, { "epoch": 0.04848241436685983, "grad_norm": 0.2451171875, "learning_rate": 9.695926563396444e-05, "loss": 0.9448, "step": 845 }, { "epoch": 0.04876929255837971, "grad_norm": 0.25390625, "learning_rate": 9.753298909925417e-05, "loss": 1.0124, "step": 850 }, { "epoch": 0.04905617074989959, "grad_norm": 0.25, "learning_rate": 9.81067125645439e-05, "loss": 0.9959, "step": 855 }, { "epoch": 0.04934304894141947, "grad_norm": 0.25390625, "learning_rate": 9.868043602983363e-05, "loss": 1.0287, "step": 860 }, { "epoch": 0.04962992713293935, "grad_norm": 0.25390625, "learning_rate": 9.925415949512336e-05, "loss": 0.9831, "step": 865 }, { "epoch": 0.04991680532445923, "grad_norm": 0.255859375, "learning_rate": 9.982788296041308e-05, "loss": 0.9833, "step": 870 }, { "epoch": 0.05020368351597911, "grad_norm": 0.23828125, "learning_rate": 0.00010040160642570282, "loss": 0.9969, "step": 875 }, { "epoch": 0.05049056170749899, "grad_norm": 0.251953125, "learning_rate": 0.00010097532989099255, "loss": 1.0437, "step": 880 }, { "epoch": 0.050777439899018874, "grad_norm": 0.234375, "learning_rate": 0.00010154905335628228, "loss": 1.031, "step": 885 }, { "epoch": 0.051064318090538754, "grad_norm": 0.2412109375, "learning_rate": 0.000102122776821572, "loss": 1.0094, "step": 890 }, { "epoch": 0.051351196282058635, "grad_norm": 0.2314453125, "learning_rate": 0.00010269650028686174, "loss": 0.9602, "step": 895 }, { "epoch": 0.051638074473578516, "grad_norm": 0.2451171875, "learning_rate": 0.00010327022375215147, "loss": 1.0467, "step": 900 }, { "epoch": 0.051924952665098396, "grad_norm": 0.248046875, "learning_rate": 0.00010384394721744119, "loss": 0.9441, "step": 905 }, { "epoch": 0.05221183085661828, "grad_norm": 0.234375, "learning_rate": 0.00010441767068273094, "loss": 1.0901, "step": 910 }, { "epoch": 0.05249870904813816, "grad_norm": 0.232421875, "learning_rate": 0.00010499139414802066, "loss": 0.9532, "step": 915 }, { "epoch": 0.05278558723965804, "grad_norm": 0.2451171875, "learning_rate": 0.00010556511761331038, "loss": 1.0329, "step": 920 }, { "epoch": 0.05307246543117792, "grad_norm": 0.2353515625, "learning_rate": 0.00010613884107860012, "loss": 0.9893, "step": 925 }, { "epoch": 0.0533593436226978, "grad_norm": 0.25, "learning_rate": 0.00010671256454388984, "loss": 0.9567, "step": 930 }, { "epoch": 0.05364622181421768, "grad_norm": 0.26171875, "learning_rate": 0.00010728628800917956, "loss": 1.0186, "step": 935 }, { "epoch": 0.05393310000573756, "grad_norm": 0.2353515625, "learning_rate": 0.00010786001147446931, "loss": 0.9697, "step": 940 }, { "epoch": 0.05421997819725744, "grad_norm": 0.2353515625, "learning_rate": 0.00010843373493975903, "loss": 0.967, "step": 945 }, { "epoch": 0.05450685638877732, "grad_norm": 0.220703125, "learning_rate": 0.00010900745840504878, "loss": 0.9157, "step": 950 }, { "epoch": 0.0547937345802972, "grad_norm": 0.2373046875, "learning_rate": 0.0001095811818703385, "loss": 0.9864, "step": 955 }, { "epoch": 0.055080612771817083, "grad_norm": 0.25, "learning_rate": 0.00011015490533562822, "loss": 0.9616, "step": 960 }, { "epoch": 0.055367490963336964, "grad_norm": 0.232421875, "learning_rate": 0.00011072862880091797, "loss": 0.9466, "step": 965 }, { "epoch": 0.055654369154856845, "grad_norm": 0.2421875, "learning_rate": 0.00011130235226620769, "loss": 0.966, "step": 970 }, { "epoch": 0.055941247346376725, "grad_norm": 0.255859375, "learning_rate": 0.00011187607573149743, "loss": 0.9292, "step": 975 }, { "epoch": 0.056228125537896606, "grad_norm": 0.236328125, "learning_rate": 0.00011244979919678715, "loss": 0.9724, "step": 980 }, { "epoch": 0.05651500372941649, "grad_norm": 0.2353515625, "learning_rate": 0.00011302352266207687, "loss": 0.9683, "step": 985 }, { "epoch": 0.05680188192093637, "grad_norm": 0.2373046875, "learning_rate": 0.00011359724612736662, "loss": 0.9164, "step": 990 }, { "epoch": 0.05708876011245625, "grad_norm": 0.23828125, "learning_rate": 0.00011417096959265634, "loss": 0.9309, "step": 995 }, { "epoch": 0.05737563830397613, "grad_norm": 0.24609375, "learning_rate": 0.00011474469305794609, "loss": 1.0243, "step": 1000 }, { "epoch": 0.05766251649549601, "grad_norm": 0.2255859375, "learning_rate": 0.0001153184165232358, "loss": 0.9735, "step": 1005 }, { "epoch": 0.05794939468701589, "grad_norm": 0.2412109375, "learning_rate": 0.00011589213998852553, "loss": 1.0049, "step": 1010 }, { "epoch": 0.05823627287853577, "grad_norm": 0.25390625, "learning_rate": 0.00011646586345381527, "loss": 1.0304, "step": 1015 }, { "epoch": 0.05852315107005565, "grad_norm": 0.25390625, "learning_rate": 0.000117039586919105, "loss": 0.9818, "step": 1020 }, { "epoch": 0.05881002926157553, "grad_norm": 0.2353515625, "learning_rate": 0.00011761331038439474, "loss": 0.9646, "step": 1025 }, { "epoch": 0.05909690745309541, "grad_norm": 0.2255859375, "learning_rate": 0.00011818703384968446, "loss": 0.9753, "step": 1030 }, { "epoch": 0.05938378564461529, "grad_norm": 0.2353515625, "learning_rate": 0.00011876075731497418, "loss": 0.9675, "step": 1035 }, { "epoch": 0.059670663836135174, "grad_norm": 0.2109375, "learning_rate": 0.00011933448078026393, "loss": 0.9606, "step": 1040 }, { "epoch": 0.059957542027655054, "grad_norm": 0.2265625, "learning_rate": 0.00011990820424555365, "loss": 1.0847, "step": 1045 }, { "epoch": 0.060244420219174935, "grad_norm": 0.2265625, "learning_rate": 0.0001204819277108434, "loss": 1.0064, "step": 1050 }, { "epoch": 0.060531298410694816, "grad_norm": 0.2578125, "learning_rate": 0.00012105565117613311, "loss": 0.967, "step": 1055 }, { "epoch": 0.060818176602214696, "grad_norm": 0.2333984375, "learning_rate": 0.00012162937464142283, "loss": 0.9456, "step": 1060 }, { "epoch": 0.06110505479373458, "grad_norm": 0.2392578125, "learning_rate": 0.00012220309810671257, "loss": 1.0216, "step": 1065 }, { "epoch": 0.06139193298525446, "grad_norm": 0.2392578125, "learning_rate": 0.0001227768215720023, "loss": 0.9022, "step": 1070 }, { "epoch": 0.06167881117677434, "grad_norm": 0.23828125, "learning_rate": 0.000123350545037292, "loss": 0.9295, "step": 1075 }, { "epoch": 0.06196568936829422, "grad_norm": 0.236328125, "learning_rate": 0.00012392426850258177, "loss": 0.983, "step": 1080 }, { "epoch": 0.0622525675598141, "grad_norm": 0.2333984375, "learning_rate": 0.00012449799196787148, "loss": 0.9705, "step": 1085 }, { "epoch": 0.06253944575133398, "grad_norm": 0.2236328125, "learning_rate": 0.00012507171543316124, "loss": 0.9243, "step": 1090 }, { "epoch": 0.06282632394285387, "grad_norm": 0.31640625, "learning_rate": 0.00012564543889845094, "loss": 0.9758, "step": 1095 }, { "epoch": 0.06311320213437374, "grad_norm": 0.2392578125, "learning_rate": 0.00012621916236374068, "loss": 0.9814, "step": 1100 }, { "epoch": 0.06340008032589363, "grad_norm": 0.2470703125, "learning_rate": 0.0001267928858290304, "loss": 0.942, "step": 1105 }, { "epoch": 0.0636869585174135, "grad_norm": 0.23046875, "learning_rate": 0.00012736660929432014, "loss": 0.9782, "step": 1110 }, { "epoch": 0.06397383670893339, "grad_norm": 0.2275390625, "learning_rate": 0.00012794033275960988, "loss": 0.9643, "step": 1115 }, { "epoch": 0.06426071490045326, "grad_norm": 0.2255859375, "learning_rate": 0.0001285140562248996, "loss": 0.9427, "step": 1120 }, { "epoch": 0.06454759309197315, "grad_norm": 0.220703125, "learning_rate": 0.00012908777969018932, "loss": 0.987, "step": 1125 }, { "epoch": 0.06483447128349303, "grad_norm": 0.236328125, "learning_rate": 0.00012966150315547908, "loss": 0.9821, "step": 1130 }, { "epoch": 0.06512134947501291, "grad_norm": 0.234375, "learning_rate": 0.00013023522662076878, "loss": 1.052, "step": 1135 }, { "epoch": 0.06540822766653279, "grad_norm": 0.220703125, "learning_rate": 0.00013080895008605854, "loss": 0.9462, "step": 1140 }, { "epoch": 0.06569510585805267, "grad_norm": 0.2294921875, "learning_rate": 0.00013138267355134825, "loss": 1.0105, "step": 1145 }, { "epoch": 0.06598198404957255, "grad_norm": 0.23828125, "learning_rate": 0.00013195639701663798, "loss": 1.014, "step": 1150 }, { "epoch": 0.06626886224109244, "grad_norm": 0.2265625, "learning_rate": 0.00013253012048192772, "loss": 0.991, "step": 1155 }, { "epoch": 0.06655574043261231, "grad_norm": 0.248046875, "learning_rate": 0.00013310384394721745, "loss": 1.0098, "step": 1160 }, { "epoch": 0.0668426186241322, "grad_norm": 0.2314453125, "learning_rate": 0.00013367756741250719, "loss": 0.9866, "step": 1165 }, { "epoch": 0.06712949681565207, "grad_norm": 0.2294921875, "learning_rate": 0.00013425129087779692, "loss": 0.9608, "step": 1170 }, { "epoch": 0.06741637500717196, "grad_norm": 0.234375, "learning_rate": 0.00013482501434308663, "loss": 0.9711, "step": 1175 }, { "epoch": 0.06770325319869183, "grad_norm": 0.228515625, "learning_rate": 0.00013539873780837639, "loss": 0.9319, "step": 1180 }, { "epoch": 0.06799013139021172, "grad_norm": 0.2314453125, "learning_rate": 0.0001359724612736661, "loss": 0.9614, "step": 1185 }, { "epoch": 0.0682770095817316, "grad_norm": 0.22265625, "learning_rate": 0.00013654618473895585, "loss": 0.9333, "step": 1190 }, { "epoch": 0.06856388777325148, "grad_norm": 0.236328125, "learning_rate": 0.00013711990820424556, "loss": 1.0135, "step": 1195 }, { "epoch": 0.06885076596477135, "grad_norm": 0.2333984375, "learning_rate": 0.0001376936316695353, "loss": 0.9592, "step": 1200 }, { "epoch": 0.06913764415629124, "grad_norm": 0.2265625, "learning_rate": 0.00013826735513482503, "loss": 0.9512, "step": 1205 }, { "epoch": 0.06942452234781112, "grad_norm": 0.2421875, "learning_rate": 0.00013884107860011476, "loss": 0.9239, "step": 1210 }, { "epoch": 0.069711400539331, "grad_norm": 0.234375, "learning_rate": 0.00013941480206540447, "loss": 0.9585, "step": 1215 }, { "epoch": 0.06999827873085088, "grad_norm": 0.2177734375, "learning_rate": 0.00013998852553069423, "loss": 0.9734, "step": 1220 }, { "epoch": 0.07028515692237076, "grad_norm": 0.224609375, "learning_rate": 0.00014056224899598393, "loss": 0.9214, "step": 1225 }, { "epoch": 0.07057203511389064, "grad_norm": 0.2421875, "learning_rate": 0.00014113597246127367, "loss": 0.9652, "step": 1230 }, { "epoch": 0.07085891330541053, "grad_norm": 0.2421875, "learning_rate": 0.0001417096959265634, "loss": 0.9355, "step": 1235 }, { "epoch": 0.0711457914969304, "grad_norm": 0.2294921875, "learning_rate": 0.00014228341939185313, "loss": 0.9577, "step": 1240 }, { "epoch": 0.07143266968845029, "grad_norm": 0.234375, "learning_rate": 0.00014285714285714287, "loss": 0.9403, "step": 1245 }, { "epoch": 0.07171954787997016, "grad_norm": 0.23046875, "learning_rate": 0.0001434308663224326, "loss": 1.0237, "step": 1250 }, { "epoch": 0.07200642607149005, "grad_norm": 0.2431640625, "learning_rate": 0.00014400458978772233, "loss": 0.994, "step": 1255 }, { "epoch": 0.07229330426300992, "grad_norm": 0.2333984375, "learning_rate": 0.00014457831325301204, "loss": 0.9788, "step": 1260 }, { "epoch": 0.07258018245452981, "grad_norm": 0.2294921875, "learning_rate": 0.00014515203671830177, "loss": 1.028, "step": 1265 }, { "epoch": 0.07286706064604968, "grad_norm": 0.228515625, "learning_rate": 0.0001457257601835915, "loss": 0.9475, "step": 1270 }, { "epoch": 0.07315393883756957, "grad_norm": 0.2333984375, "learning_rate": 0.00014629948364888124, "loss": 0.9661, "step": 1275 }, { "epoch": 0.07344081702908944, "grad_norm": 0.23046875, "learning_rate": 0.00014687320711417098, "loss": 1.0316, "step": 1280 }, { "epoch": 0.07372769522060933, "grad_norm": 0.2490234375, "learning_rate": 0.0001474469305794607, "loss": 1.0089, "step": 1285 }, { "epoch": 0.0740145734121292, "grad_norm": 0.236328125, "learning_rate": 0.00014802065404475042, "loss": 0.9406, "step": 1290 }, { "epoch": 0.0743014516036491, "grad_norm": 0.251953125, "learning_rate": 0.00014859437751004018, "loss": 1.0317, "step": 1295 }, { "epoch": 0.07458832979516897, "grad_norm": 0.2392578125, "learning_rate": 0.00014916810097532988, "loss": 0.953, "step": 1300 }, { "epoch": 0.07487520798668885, "grad_norm": 0.263671875, "learning_rate": 0.00014974182444061964, "loss": 0.9658, "step": 1305 }, { "epoch": 0.07516208617820873, "grad_norm": 0.2333984375, "learning_rate": 0.00015031554790590935, "loss": 0.927, "step": 1310 }, { "epoch": 0.07544896436972862, "grad_norm": 0.2216796875, "learning_rate": 0.00015088927137119908, "loss": 0.9357, "step": 1315 }, { "epoch": 0.07573584256124849, "grad_norm": 0.2392578125, "learning_rate": 0.00015146299483648882, "loss": 0.9423, "step": 1320 }, { "epoch": 0.07602272075276838, "grad_norm": 0.259765625, "learning_rate": 0.00015203671830177855, "loss": 0.9861, "step": 1325 }, { "epoch": 0.07630959894428825, "grad_norm": 0.244140625, "learning_rate": 0.00015261044176706828, "loss": 0.9871, "step": 1330 }, { "epoch": 0.07659647713580814, "grad_norm": 0.24609375, "learning_rate": 0.00015318416523235802, "loss": 0.9405, "step": 1335 }, { "epoch": 0.07688335532732801, "grad_norm": 0.2294921875, "learning_rate": 0.00015375788869764772, "loss": 0.9789, "step": 1340 }, { "epoch": 0.0771702335188479, "grad_norm": 0.25390625, "learning_rate": 0.00015433161216293748, "loss": 1.0267, "step": 1345 }, { "epoch": 0.07745711171036777, "grad_norm": 0.2138671875, "learning_rate": 0.0001549053356282272, "loss": 0.9546, "step": 1350 }, { "epoch": 0.07774398990188766, "grad_norm": 0.2353515625, "learning_rate": 0.00015547905909351695, "loss": 0.9847, "step": 1355 }, { "epoch": 0.07803086809340753, "grad_norm": 0.2353515625, "learning_rate": 0.00015605278255880666, "loss": 0.9393, "step": 1360 }, { "epoch": 0.07831774628492742, "grad_norm": 0.2294921875, "learning_rate": 0.0001566265060240964, "loss": 1.0087, "step": 1365 }, { "epoch": 0.0786046244764473, "grad_norm": 0.2255859375, "learning_rate": 0.00015720022948938613, "loss": 0.9367, "step": 1370 }, { "epoch": 0.07889150266796718, "grad_norm": 0.2138671875, "learning_rate": 0.00015777395295467586, "loss": 0.9651, "step": 1375 }, { "epoch": 0.07917838085948706, "grad_norm": 0.271484375, "learning_rate": 0.00015834767641996557, "loss": 1.0075, "step": 1380 }, { "epoch": 0.07946525905100695, "grad_norm": 0.2490234375, "learning_rate": 0.00015892139988525533, "loss": 0.95, "step": 1385 }, { "epoch": 0.07975213724252682, "grad_norm": 0.2421875, "learning_rate": 0.00015949512335054503, "loss": 0.9539, "step": 1390 }, { "epoch": 0.0800390154340467, "grad_norm": 0.2216796875, "learning_rate": 0.0001600688468158348, "loss": 0.9659, "step": 1395 }, { "epoch": 0.08032589362556658, "grad_norm": 0.23046875, "learning_rate": 0.0001606425702811245, "loss": 0.9324, "step": 1400 }, { "epoch": 0.08061277181708647, "grad_norm": 0.23828125, "learning_rate": 0.00016121629374641423, "loss": 1.0698, "step": 1405 }, { "epoch": 0.08089965000860634, "grad_norm": 0.2412109375, "learning_rate": 0.00016179001721170397, "loss": 1.0436, "step": 1410 }, { "epoch": 0.08118652820012623, "grad_norm": 0.2392578125, "learning_rate": 0.0001623637406769937, "loss": 0.9879, "step": 1415 }, { "epoch": 0.0814734063916461, "grad_norm": 0.224609375, "learning_rate": 0.00016293746414228343, "loss": 0.9993, "step": 1420 }, { "epoch": 0.08176028458316599, "grad_norm": 0.2412109375, "learning_rate": 0.00016351118760757317, "loss": 0.9543, "step": 1425 }, { "epoch": 0.08204716277468586, "grad_norm": 0.2265625, "learning_rate": 0.00016408491107286287, "loss": 1.0417, "step": 1430 }, { "epoch": 0.08233404096620575, "grad_norm": 0.24609375, "learning_rate": 0.00016465863453815263, "loss": 0.935, "step": 1435 }, { "epoch": 0.08262091915772563, "grad_norm": 0.265625, "learning_rate": 0.00016523235800344234, "loss": 1.041, "step": 1440 }, { "epoch": 0.08290779734924551, "grad_norm": 0.2421875, "learning_rate": 0.0001658060814687321, "loss": 0.9485, "step": 1445 }, { "epoch": 0.08319467554076539, "grad_norm": 0.21484375, "learning_rate": 0.0001663798049340218, "loss": 0.9971, "step": 1450 }, { "epoch": 0.08348155373228527, "grad_norm": 0.2353515625, "learning_rate": 0.00016695352839931154, "loss": 0.9906, "step": 1455 }, { "epoch": 0.08376843192380515, "grad_norm": 0.2333984375, "learning_rate": 0.00016752725186460127, "loss": 0.905, "step": 1460 }, { "epoch": 0.08405531011532504, "grad_norm": 0.23828125, "learning_rate": 0.000168100975329891, "loss": 0.9967, "step": 1465 }, { "epoch": 0.08434218830684491, "grad_norm": 0.265625, "learning_rate": 0.00016867469879518074, "loss": 1.0322, "step": 1470 }, { "epoch": 0.0846290664983648, "grad_norm": 0.259765625, "learning_rate": 0.00016924842226047048, "loss": 1.0861, "step": 1475 }, { "epoch": 0.08491594468988467, "grad_norm": 0.2412109375, "learning_rate": 0.00016982214572576018, "loss": 1.0018, "step": 1480 }, { "epoch": 0.08520282288140456, "grad_norm": 0.2353515625, "learning_rate": 0.00017039586919104992, "loss": 0.9241, "step": 1485 }, { "epoch": 0.08548970107292443, "grad_norm": 0.216796875, "learning_rate": 0.00017096959265633965, "loss": 0.9617, "step": 1490 }, { "epoch": 0.08577657926444432, "grad_norm": 0.2421875, "learning_rate": 0.00017154331612162938, "loss": 0.9043, "step": 1495 }, { "epoch": 0.08606345745596419, "grad_norm": 0.232421875, "learning_rate": 0.00017211703958691912, "loss": 1.0095, "step": 1500 }, { "epoch": 0.08635033564748408, "grad_norm": 0.2333984375, "learning_rate": 0.00017269076305220885, "loss": 1.0159, "step": 1505 }, { "epoch": 0.08663721383900395, "grad_norm": 0.25390625, "learning_rate": 0.00017326448651749858, "loss": 1.05, "step": 1510 }, { "epoch": 0.08692409203052384, "grad_norm": 0.2421875, "learning_rate": 0.0001738382099827883, "loss": 0.9889, "step": 1515 }, { "epoch": 0.08721097022204372, "grad_norm": 0.2490234375, "learning_rate": 0.00017441193344807802, "loss": 1.0014, "step": 1520 }, { "epoch": 0.0874978484135636, "grad_norm": 0.25390625, "learning_rate": 0.00017498565691336776, "loss": 1.0155, "step": 1525 }, { "epoch": 0.08778472660508348, "grad_norm": 0.2333984375, "learning_rate": 0.0001755593803786575, "loss": 0.9529, "step": 1530 }, { "epoch": 0.08807160479660336, "grad_norm": 0.2373046875, "learning_rate": 0.00017613310384394722, "loss": 0.9015, "step": 1535 }, { "epoch": 0.08835848298812324, "grad_norm": 0.2421875, "learning_rate": 0.00017670682730923696, "loss": 0.958, "step": 1540 }, { "epoch": 0.08864536117964313, "grad_norm": 0.234375, "learning_rate": 0.00017728055077452666, "loss": 1.0366, "step": 1545 }, { "epoch": 0.088932239371163, "grad_norm": 0.2490234375, "learning_rate": 0.00017785427423981642, "loss": 1.0001, "step": 1550 }, { "epoch": 0.08921911756268289, "grad_norm": 0.25390625, "learning_rate": 0.00017842799770510613, "loss": 0.9472, "step": 1555 }, { "epoch": 0.08950599575420276, "grad_norm": 0.232421875, "learning_rate": 0.0001790017211703959, "loss": 0.9278, "step": 1560 }, { "epoch": 0.08979287394572265, "grad_norm": 0.2421875, "learning_rate": 0.0001795754446356856, "loss": 0.9875, "step": 1565 }, { "epoch": 0.09007975213724252, "grad_norm": 0.244140625, "learning_rate": 0.00018014916810097533, "loss": 0.9641, "step": 1570 }, { "epoch": 0.09036663032876241, "grad_norm": 0.240234375, "learning_rate": 0.00018072289156626507, "loss": 0.9106, "step": 1575 }, { "epoch": 0.09065350852028228, "grad_norm": 0.248046875, "learning_rate": 0.0001812966150315548, "loss": 0.9187, "step": 1580 }, { "epoch": 0.09094038671180217, "grad_norm": 0.228515625, "learning_rate": 0.00018187033849684453, "loss": 0.9488, "step": 1585 }, { "epoch": 0.09122726490332204, "grad_norm": 0.25390625, "learning_rate": 0.00018244406196213427, "loss": 0.9628, "step": 1590 }, { "epoch": 0.09151414309484193, "grad_norm": 0.2431640625, "learning_rate": 0.00018301778542742397, "loss": 0.9592, "step": 1595 }, { "epoch": 0.0918010212863618, "grad_norm": 0.25, "learning_rate": 0.00018359150889271373, "loss": 1.0228, "step": 1600 }, { "epoch": 0.0920878994778817, "grad_norm": 0.236328125, "learning_rate": 0.00018416523235800344, "loss": 0.9926, "step": 1605 }, { "epoch": 0.09237477766940157, "grad_norm": 0.25, "learning_rate": 0.0001847389558232932, "loss": 1.0007, "step": 1610 }, { "epoch": 0.09266165586092145, "grad_norm": 0.23828125, "learning_rate": 0.0001853126792885829, "loss": 1.0219, "step": 1615 }, { "epoch": 0.09294853405244133, "grad_norm": 0.25390625, "learning_rate": 0.00018588640275387264, "loss": 0.9644, "step": 1620 }, { "epoch": 0.09323541224396122, "grad_norm": 0.2421875, "learning_rate": 0.00018646012621916237, "loss": 1.0031, "step": 1625 }, { "epoch": 0.09352229043548109, "grad_norm": 0.2255859375, "learning_rate": 0.0001870338496844521, "loss": 1.0037, "step": 1630 }, { "epoch": 0.09380916862700098, "grad_norm": 0.224609375, "learning_rate": 0.00018760757314974184, "loss": 0.9305, "step": 1635 }, { "epoch": 0.09409604681852085, "grad_norm": 0.244140625, "learning_rate": 0.00018818129661503157, "loss": 0.9534, "step": 1640 }, { "epoch": 0.09438292501004074, "grad_norm": 0.2578125, "learning_rate": 0.00018875502008032128, "loss": 1.0235, "step": 1645 }, { "epoch": 0.09466980320156061, "grad_norm": 0.236328125, "learning_rate": 0.00018932874354561104, "loss": 0.9046, "step": 1650 }, { "epoch": 0.0949566813930805, "grad_norm": 0.232421875, "learning_rate": 0.00018990246701090075, "loss": 1.004, "step": 1655 }, { "epoch": 0.09524355958460037, "grad_norm": 0.2451171875, "learning_rate": 0.00019047619047619048, "loss": 0.9329, "step": 1660 }, { "epoch": 0.09553043777612026, "grad_norm": 0.2373046875, "learning_rate": 0.00019104991394148021, "loss": 0.9596, "step": 1665 }, { "epoch": 0.09581731596764013, "grad_norm": 0.2421875, "learning_rate": 0.00019162363740676995, "loss": 0.9716, "step": 1670 }, { "epoch": 0.09610419415916002, "grad_norm": 0.248046875, "learning_rate": 0.00019219736087205968, "loss": 0.9608, "step": 1675 }, { "epoch": 0.0963910723506799, "grad_norm": 0.259765625, "learning_rate": 0.00019277108433734942, "loss": 1.0023, "step": 1680 }, { "epoch": 0.09667795054219978, "grad_norm": 0.25, "learning_rate": 0.00019334480780263912, "loss": 0.9665, "step": 1685 }, { "epoch": 0.09696482873371966, "grad_norm": 0.244140625, "learning_rate": 0.00019391853126792888, "loss": 0.9806, "step": 1690 }, { "epoch": 0.09725170692523954, "grad_norm": 0.2392578125, "learning_rate": 0.0001944922547332186, "loss": 1.0139, "step": 1695 }, { "epoch": 0.09753858511675942, "grad_norm": 0.25, "learning_rate": 0.00019506597819850835, "loss": 0.9764, "step": 1700 }, { "epoch": 0.0978254633082793, "grad_norm": 0.2392578125, "learning_rate": 0.00019563970166379806, "loss": 0.9552, "step": 1705 }, { "epoch": 0.09811234149979918, "grad_norm": 0.248046875, "learning_rate": 0.0001962134251290878, "loss": 0.9531, "step": 1710 }, { "epoch": 0.09839921969131907, "grad_norm": 0.2578125, "learning_rate": 0.00019678714859437752, "loss": 1.0003, "step": 1715 }, { "epoch": 0.09868609788283894, "grad_norm": 0.251953125, "learning_rate": 0.00019736087205966726, "loss": 1.0355, "step": 1720 }, { "epoch": 0.09897297607435883, "grad_norm": 0.265625, "learning_rate": 0.000197934595524957, "loss": 1.0075, "step": 1725 }, { "epoch": 0.0992598542658787, "grad_norm": 0.2421875, "learning_rate": 0.00019850831899024672, "loss": 0.9431, "step": 1730 }, { "epoch": 0.09954673245739859, "grad_norm": 0.2451171875, "learning_rate": 0.00019908204245553643, "loss": 0.9683, "step": 1735 }, { "epoch": 0.09983361064891846, "grad_norm": 0.2353515625, "learning_rate": 0.00019965576592082616, "loss": 0.9644, "step": 1740 }, { "epoch": 0.10012048884043835, "grad_norm": 0.2421875, "learning_rate": 0.0001999999919775815, "loss": 0.988, "step": 1745 }, { "epoch": 0.10040736703195823, "grad_norm": 0.25, "learning_rate": 0.00019999990172538815, "loss": 0.9723, "step": 1750 }, { "epoch": 0.10069424522347811, "grad_norm": 0.2431640625, "learning_rate": 0.00019999971119306908, "loss": 0.9437, "step": 1755 }, { "epoch": 0.10098112341499799, "grad_norm": 0.23828125, "learning_rate": 0.0001999994203808154, "loss": 1.0246, "step": 1760 }, { "epoch": 0.10126800160651787, "grad_norm": 0.26171875, "learning_rate": 0.00019999902928891875, "loss": 0.9988, "step": 1765 }, { "epoch": 0.10155487979803775, "grad_norm": 0.248046875, "learning_rate": 0.00019999853791777126, "loss": 0.9811, "step": 1770 }, { "epoch": 0.10184175798955764, "grad_norm": 0.30078125, "learning_rate": 0.00019999794626786573, "loss": 1.0372, "step": 1775 }, { "epoch": 0.10212863618107751, "grad_norm": 0.25390625, "learning_rate": 0.00019999725433979544, "loss": 0.935, "step": 1780 }, { "epoch": 0.1024155143725974, "grad_norm": 0.25, "learning_rate": 0.00019999646213425426, "loss": 0.9745, "step": 1785 }, { "epoch": 0.10270239256411727, "grad_norm": 0.2314453125, "learning_rate": 0.00019999556965203663, "loss": 0.9369, "step": 1790 }, { "epoch": 0.10298927075563716, "grad_norm": 0.24609375, "learning_rate": 0.00019999457689403753, "loss": 0.8631, "step": 1795 }, { "epoch": 0.10327614894715703, "grad_norm": 0.2412109375, "learning_rate": 0.0001999934838612525, "loss": 0.9291, "step": 1800 }, { "epoch": 0.10356302713867692, "grad_norm": 0.2353515625, "learning_rate": 0.0001999922905547776, "loss": 0.9067, "step": 1805 }, { "epoch": 0.10384990533019679, "grad_norm": 0.26171875, "learning_rate": 0.00019999099697580954, "loss": 0.9654, "step": 1810 }, { "epoch": 0.10413678352171668, "grad_norm": 0.25, "learning_rate": 0.00019998960312564548, "loss": 0.9231, "step": 1815 }, { "epoch": 0.10442366171323655, "grad_norm": 0.248046875, "learning_rate": 0.0001999881090056832, "loss": 1.0005, "step": 1820 }, { "epoch": 0.10471053990475644, "grad_norm": 0.2578125, "learning_rate": 0.000199986514617421, "loss": 0.9831, "step": 1825 }, { "epoch": 0.10499741809627632, "grad_norm": 0.232421875, "learning_rate": 0.00019998481996245772, "loss": 0.995, "step": 1830 }, { "epoch": 0.1052842962877962, "grad_norm": 0.259765625, "learning_rate": 0.00019998302504249278, "loss": 0.9808, "step": 1835 }, { "epoch": 0.10557117447931608, "grad_norm": 0.283203125, "learning_rate": 0.00019998112985932613, "loss": 0.9961, "step": 1840 }, { "epoch": 0.10585805267083596, "grad_norm": 0.236328125, "learning_rate": 0.00019997913441485826, "loss": 0.9614, "step": 1845 }, { "epoch": 0.10614493086235584, "grad_norm": 0.2412109375, "learning_rate": 0.00019997703871109021, "loss": 1.0427, "step": 1850 }, { "epoch": 0.10643180905387573, "grad_norm": 0.234375, "learning_rate": 0.00019997484275012357, "loss": 0.979, "step": 1855 }, { "epoch": 0.1067186872453956, "grad_norm": 0.2490234375, "learning_rate": 0.00019997254653416043, "loss": 0.9046, "step": 1860 }, { "epoch": 0.10700556543691549, "grad_norm": 0.25, "learning_rate": 0.00019997015006550342, "loss": 0.9862, "step": 1865 }, { "epoch": 0.10729244362843536, "grad_norm": 0.2451171875, "learning_rate": 0.0001999676533465558, "loss": 0.9117, "step": 1870 }, { "epoch": 0.10757932181995525, "grad_norm": 0.255859375, "learning_rate": 0.00019996505637982122, "loss": 0.9843, "step": 1875 }, { "epoch": 0.10786620001147512, "grad_norm": 0.244140625, "learning_rate": 0.00019996235916790392, "loss": 0.9766, "step": 1880 }, { "epoch": 0.10815307820299501, "grad_norm": 0.263671875, "learning_rate": 0.0001999595617135087, "loss": 0.9147, "step": 1885 }, { "epoch": 0.10843995639451488, "grad_norm": 0.265625, "learning_rate": 0.00019995666401944085, "loss": 0.9491, "step": 1890 }, { "epoch": 0.10872683458603477, "grad_norm": 0.2255859375, "learning_rate": 0.0001999536660886062, "loss": 0.9517, "step": 1895 }, { "epoch": 0.10901371277755464, "grad_norm": 0.2294921875, "learning_rate": 0.00019995056792401105, "loss": 1.0407, "step": 1900 }, { "epoch": 0.10930059096907453, "grad_norm": 0.244140625, "learning_rate": 0.00019994736952876226, "loss": 1.0085, "step": 1905 }, { "epoch": 0.1095874691605944, "grad_norm": 0.234375, "learning_rate": 0.0001999440709060672, "loss": 0.914, "step": 1910 }, { "epoch": 0.1098743473521143, "grad_norm": 0.25390625, "learning_rate": 0.0001999406720592337, "loss": 0.9376, "step": 1915 }, { "epoch": 0.11016122554363417, "grad_norm": 0.248046875, "learning_rate": 0.00019993717299167014, "loss": 0.8981, "step": 1920 }, { "epoch": 0.11044810373515405, "grad_norm": 0.251953125, "learning_rate": 0.00019993357370688542, "loss": 1.0067, "step": 1925 }, { "epoch": 0.11073498192667393, "grad_norm": 0.248046875, "learning_rate": 0.00019992987420848891, "loss": 0.9061, "step": 1930 }, { "epoch": 0.11102186011819382, "grad_norm": 0.2421875, "learning_rate": 0.00019992607450019048, "loss": 1.0008, "step": 1935 }, { "epoch": 0.11130873830971369, "grad_norm": 0.255859375, "learning_rate": 0.00019992217458580043, "loss": 0.9274, "step": 1940 }, { "epoch": 0.11159561650123358, "grad_norm": 0.291015625, "learning_rate": 0.00019991817446922964, "loss": 1.0122, "step": 1945 }, { "epoch": 0.11188249469275345, "grad_norm": 0.234375, "learning_rate": 0.00019991407415448947, "loss": 0.9451, "step": 1950 }, { "epoch": 0.11216937288427334, "grad_norm": 0.279296875, "learning_rate": 0.0001999098736456917, "loss": 0.9165, "step": 1955 }, { "epoch": 0.11245625107579321, "grad_norm": 0.251953125, "learning_rate": 0.00019990557294704856, "loss": 0.9794, "step": 1960 }, { "epoch": 0.1127431292673131, "grad_norm": 0.271484375, "learning_rate": 0.00019990117206287287, "loss": 0.9801, "step": 1965 }, { "epoch": 0.11303000745883297, "grad_norm": 0.25, "learning_rate": 0.0001998966709975778, "loss": 0.9645, "step": 1970 }, { "epoch": 0.11331688565035286, "grad_norm": 0.240234375, "learning_rate": 0.00019989206975567708, "loss": 0.9787, "step": 1975 }, { "epoch": 0.11360376384187273, "grad_norm": 0.25, "learning_rate": 0.0001998873683417848, "loss": 0.9984, "step": 1980 }, { "epoch": 0.11389064203339262, "grad_norm": 0.24609375, "learning_rate": 0.00019988256676061554, "loss": 0.995, "step": 1985 }, { "epoch": 0.1141775202249125, "grad_norm": 0.244140625, "learning_rate": 0.00019987766501698438, "loss": 0.9607, "step": 1990 }, { "epoch": 0.11446439841643238, "grad_norm": 0.251953125, "learning_rate": 0.00019987266311580676, "loss": 1.0015, "step": 1995 }, { "epoch": 0.11475127660795226, "grad_norm": 0.240234375, "learning_rate": 0.00019986756106209864, "loss": 0.9125, "step": 2000 }, { "epoch": 0.11503815479947214, "grad_norm": 0.2578125, "learning_rate": 0.0001998623588609763, "loss": 0.9281, "step": 2005 }, { "epoch": 0.11532503299099202, "grad_norm": 0.2578125, "learning_rate": 0.0001998570565176566, "loss": 1.0259, "step": 2010 }, { "epoch": 0.1156119111825119, "grad_norm": 0.271484375, "learning_rate": 0.0001998516540374567, "loss": 1.003, "step": 2015 }, { "epoch": 0.11589878937403178, "grad_norm": 0.234375, "learning_rate": 0.00019984615142579424, "loss": 0.9298, "step": 2020 }, { "epoch": 0.11618566756555167, "grad_norm": 0.265625, "learning_rate": 0.00019984054868818724, "loss": 0.9373, "step": 2025 }, { "epoch": 0.11647254575707154, "grad_norm": 0.2392578125, "learning_rate": 0.0001998348458302541, "loss": 0.8953, "step": 2030 }, { "epoch": 0.11675942394859143, "grad_norm": 0.255859375, "learning_rate": 0.0001998290428577137, "loss": 0.9788, "step": 2035 }, { "epoch": 0.1170463021401113, "grad_norm": 0.259765625, "learning_rate": 0.00019982313977638528, "loss": 0.9672, "step": 2040 }, { "epoch": 0.11733318033163119, "grad_norm": 0.2412109375, "learning_rate": 0.00019981713659218846, "loss": 0.8816, "step": 2045 }, { "epoch": 0.11762005852315106, "grad_norm": 0.263671875, "learning_rate": 0.00019981103331114322, "loss": 0.9887, "step": 2050 }, { "epoch": 0.11790693671467095, "grad_norm": 0.2470703125, "learning_rate": 0.00019980482993936995, "loss": 1.0521, "step": 2055 }, { "epoch": 0.11819381490619082, "grad_norm": 0.2412109375, "learning_rate": 0.00019979852648308947, "loss": 0.9699, "step": 2060 }, { "epoch": 0.11848069309771071, "grad_norm": 0.267578125, "learning_rate": 0.0001997921229486228, "loss": 0.9402, "step": 2065 }, { "epoch": 0.11876757128923059, "grad_norm": 0.265625, "learning_rate": 0.00019978561934239149, "loss": 0.9328, "step": 2070 }, { "epoch": 0.11905444948075047, "grad_norm": 0.25390625, "learning_rate": 0.0001997790156709173, "loss": 0.9643, "step": 2075 }, { "epoch": 0.11934132767227035, "grad_norm": 0.259765625, "learning_rate": 0.00019977231194082248, "loss": 1.0274, "step": 2080 }, { "epoch": 0.11962820586379024, "grad_norm": 0.2470703125, "learning_rate": 0.00019976550815882952, "loss": 0.9102, "step": 2085 }, { "epoch": 0.11991508405531011, "grad_norm": 0.2890625, "learning_rate": 0.00019975860433176128, "loss": 0.918, "step": 2090 }, { "epoch": 0.12020196224683, "grad_norm": 0.248046875, "learning_rate": 0.0001997516004665409, "loss": 0.9157, "step": 2095 }, { "epoch": 0.12048884043834987, "grad_norm": 0.259765625, "learning_rate": 0.0001997444965701919, "loss": 0.9264, "step": 2100 }, { "epoch": 0.12077571862986976, "grad_norm": 0.26171875, "learning_rate": 0.00019973729264983808, "loss": 0.9099, "step": 2105 }, { "epoch": 0.12106259682138963, "grad_norm": 0.31640625, "learning_rate": 0.00019972998871270353, "loss": 0.9819, "step": 2110 }, { "epoch": 0.12134947501290952, "grad_norm": 0.25, "learning_rate": 0.0001997225847661127, "loss": 0.9286, "step": 2115 }, { "epoch": 0.12163635320442939, "grad_norm": 0.25390625, "learning_rate": 0.00019971508081749023, "loss": 1.0012, "step": 2120 }, { "epoch": 0.12192323139594928, "grad_norm": 0.244140625, "learning_rate": 0.0001997074768743611, "loss": 0.9616, "step": 2125 }, { "epoch": 0.12221010958746915, "grad_norm": 0.255859375, "learning_rate": 0.00019969977294435057, "loss": 0.9717, "step": 2130 }, { "epoch": 0.12249698777898904, "grad_norm": 0.26171875, "learning_rate": 0.0001996919690351842, "loss": 0.9775, "step": 2135 }, { "epoch": 0.12278386597050892, "grad_norm": 0.291015625, "learning_rate": 0.0001996840651546877, "loss": 0.9049, "step": 2140 }, { "epoch": 0.1230707441620288, "grad_norm": 0.2451171875, "learning_rate": 0.00019967606131078718, "loss": 0.9141, "step": 2145 }, { "epoch": 0.12335762235354868, "grad_norm": 0.26953125, "learning_rate": 0.00019966795751150885, "loss": 1.0031, "step": 2150 }, { "epoch": 0.12364450054506856, "grad_norm": 0.25390625, "learning_rate": 0.00019965975376497918, "loss": 0.9594, "step": 2155 }, { "epoch": 0.12393137873658844, "grad_norm": 0.24609375, "learning_rate": 0.000199651450079425, "loss": 0.9046, "step": 2160 }, { "epoch": 0.12421825692810833, "grad_norm": 0.28515625, "learning_rate": 0.00019964304646317323, "loss": 0.961, "step": 2165 }, { "epoch": 0.1245051351196282, "grad_norm": 0.263671875, "learning_rate": 0.00019963454292465103, "loss": 0.9989, "step": 2170 }, { "epoch": 0.12479201331114809, "grad_norm": 0.2431640625, "learning_rate": 0.00019962593947238578, "loss": 0.9993, "step": 2175 }, { "epoch": 0.12507889150266796, "grad_norm": 0.259765625, "learning_rate": 0.000199617236115005, "loss": 0.9633, "step": 2180 }, { "epoch": 0.12536576969418783, "grad_norm": 0.24609375, "learning_rate": 0.00019960843286123648, "loss": 0.9134, "step": 2185 }, { "epoch": 0.12565264788570774, "grad_norm": 0.2578125, "learning_rate": 0.0001995995297199081, "loss": 1.0059, "step": 2190 }, { "epoch": 0.1259395260772276, "grad_norm": 0.2490234375, "learning_rate": 0.000199590526699948, "loss": 1.0153, "step": 2195 }, { "epoch": 0.12622640426874748, "grad_norm": 0.2451171875, "learning_rate": 0.0001995814238103844, "loss": 0.9359, "step": 2200 }, { "epoch": 0.12651328246026736, "grad_norm": 0.25, "learning_rate": 0.00019957222106034572, "loss": 0.9862, "step": 2205 }, { "epoch": 0.12680016065178726, "grad_norm": 0.2412109375, "learning_rate": 0.00019956291845906046, "loss": 0.9502, "step": 2210 }, { "epoch": 0.12708703884330713, "grad_norm": 0.265625, "learning_rate": 0.00019955351601585731, "loss": 0.979, "step": 2215 }, { "epoch": 0.127373917034827, "grad_norm": 0.23828125, "learning_rate": 0.0001995440137401651, "loss": 0.8981, "step": 2220 }, { "epoch": 0.12766079522634688, "grad_norm": 0.240234375, "learning_rate": 0.00019953441164151264, "loss": 1.0073, "step": 2225 }, { "epoch": 0.12794767341786678, "grad_norm": 0.240234375, "learning_rate": 0.00019952470972952902, "loss": 0.9045, "step": 2230 }, { "epoch": 0.12823455160938665, "grad_norm": 0.2490234375, "learning_rate": 0.0001995149080139433, "loss": 0.9948, "step": 2235 }, { "epoch": 0.12852142980090653, "grad_norm": 0.2392578125, "learning_rate": 0.0001995050065045847, "loss": 0.9548, "step": 2240 }, { "epoch": 0.12880830799242643, "grad_norm": 0.271484375, "learning_rate": 0.00019949500521138243, "loss": 0.9024, "step": 2245 }, { "epoch": 0.1290951861839463, "grad_norm": 0.2470703125, "learning_rate": 0.00019948490414436584, "loss": 1.0311, "step": 2250 }, { "epoch": 0.12938206437546618, "grad_norm": 0.2451171875, "learning_rate": 0.00019947470331366427, "loss": 0.9478, "step": 2255 }, { "epoch": 0.12966894256698605, "grad_norm": 0.25, "learning_rate": 0.00019946440272950716, "loss": 0.8834, "step": 2260 }, { "epoch": 0.12995582075850595, "grad_norm": 0.248046875, "learning_rate": 0.00019945400240222396, "loss": 1.0765, "step": 2265 }, { "epoch": 0.13024269895002583, "grad_norm": 0.25390625, "learning_rate": 0.00019944350234224416, "loss": 0.9275, "step": 2270 }, { "epoch": 0.1305295771415457, "grad_norm": 0.25, "learning_rate": 0.0001994329025600972, "loss": 0.938, "step": 2275 }, { "epoch": 0.13081645533306557, "grad_norm": 0.263671875, "learning_rate": 0.00019942220306641258, "loss": 1.0348, "step": 2280 }, { "epoch": 0.13110333352458547, "grad_norm": 0.24609375, "learning_rate": 0.0001994114038719198, "loss": 0.9627, "step": 2285 }, { "epoch": 0.13139021171610535, "grad_norm": 0.25390625, "learning_rate": 0.00019940050498744828, "loss": 0.9749, "step": 2290 }, { "epoch": 0.13167708990762522, "grad_norm": 0.2431640625, "learning_rate": 0.00019938950642392746, "loss": 0.8878, "step": 2295 }, { "epoch": 0.1319639680991451, "grad_norm": 0.28125, "learning_rate": 0.00019937840819238677, "loss": 1.0061, "step": 2300 }, { "epoch": 0.132250846290665, "grad_norm": 0.267578125, "learning_rate": 0.00019936721030395547, "loss": 0.9872, "step": 2305 }, { "epoch": 0.13253772448218487, "grad_norm": 0.2578125, "learning_rate": 0.00019935591276986286, "loss": 1.0265, "step": 2310 }, { "epoch": 0.13282460267370474, "grad_norm": 0.29296875, "learning_rate": 0.00019934451560143815, "loss": 1.0217, "step": 2315 }, { "epoch": 0.13311148086522462, "grad_norm": 0.2421875, "learning_rate": 0.0001993330188101104, "loss": 0.9342, "step": 2320 }, { "epoch": 0.13339835905674452, "grad_norm": 0.275390625, "learning_rate": 0.00019932142240740866, "loss": 0.9407, "step": 2325 }, { "epoch": 0.1336852372482644, "grad_norm": 0.251953125, "learning_rate": 0.0001993097264049618, "loss": 0.9405, "step": 2330 }, { "epoch": 0.13397211543978427, "grad_norm": 0.25390625, "learning_rate": 0.0001992979308144986, "loss": 0.971, "step": 2335 }, { "epoch": 0.13425899363130414, "grad_norm": 0.283203125, "learning_rate": 0.00019928603564784773, "loss": 1.0423, "step": 2340 }, { "epoch": 0.13454587182282404, "grad_norm": 0.283203125, "learning_rate": 0.0001992740409169377, "loss": 1.0122, "step": 2345 }, { "epoch": 0.13483275001434392, "grad_norm": 0.255859375, "learning_rate": 0.00019926194663379677, "loss": 0.9608, "step": 2350 }, { "epoch": 0.1351196282058638, "grad_norm": 0.2470703125, "learning_rate": 0.00019924975281055324, "loss": 0.905, "step": 2355 }, { "epoch": 0.13540650639738366, "grad_norm": 0.2578125, "learning_rate": 0.00019923745945943502, "loss": 0.9108, "step": 2360 }, { "epoch": 0.13569338458890357, "grad_norm": 0.2412109375, "learning_rate": 0.0001992250665927699, "loss": 0.9587, "step": 2365 }, { "epoch": 0.13598026278042344, "grad_norm": 0.2734375, "learning_rate": 0.0001992125742229855, "loss": 0.9459, "step": 2370 }, { "epoch": 0.1362671409719433, "grad_norm": 0.2578125, "learning_rate": 0.00019919998236260923, "loss": 1.0325, "step": 2375 }, { "epoch": 0.1365540191634632, "grad_norm": 0.279296875, "learning_rate": 0.00019918729102426816, "loss": 1.0031, "step": 2380 }, { "epoch": 0.1368408973549831, "grad_norm": 0.251953125, "learning_rate": 0.00019917450022068927, "loss": 0.9159, "step": 2385 }, { "epoch": 0.13712777554650296, "grad_norm": 0.259765625, "learning_rate": 0.00019916160996469914, "loss": 0.9884, "step": 2390 }, { "epoch": 0.13741465373802284, "grad_norm": 0.251953125, "learning_rate": 0.0001991486202692242, "loss": 0.9934, "step": 2395 }, { "epoch": 0.1377015319295427, "grad_norm": 0.255859375, "learning_rate": 0.00019913553114729053, "loss": 0.9287, "step": 2400 }, { "epoch": 0.1379884101210626, "grad_norm": 0.2578125, "learning_rate": 0.0001991223426120239, "loss": 0.955, "step": 2405 }, { "epoch": 0.13827528831258248, "grad_norm": 0.259765625, "learning_rate": 0.00019910905467664987, "loss": 0.9791, "step": 2410 }, { "epoch": 0.13856216650410236, "grad_norm": 0.2578125, "learning_rate": 0.00019909566735449354, "loss": 1.0237, "step": 2415 }, { "epoch": 0.13884904469562223, "grad_norm": 0.240234375, "learning_rate": 0.00019908218065897978, "loss": 0.9546, "step": 2420 }, { "epoch": 0.13913592288714213, "grad_norm": 0.244140625, "learning_rate": 0.00019906859460363307, "loss": 0.8988, "step": 2425 }, { "epoch": 0.139422801078662, "grad_norm": 0.25, "learning_rate": 0.00019905490920207755, "loss": 0.9675, "step": 2430 }, { "epoch": 0.13970967927018188, "grad_norm": 0.232421875, "learning_rate": 0.00019904112446803699, "loss": 0.9773, "step": 2435 }, { "epoch": 0.13999655746170175, "grad_norm": 0.259765625, "learning_rate": 0.0001990272404153347, "loss": 1.053, "step": 2440 }, { "epoch": 0.14028343565322166, "grad_norm": 0.255859375, "learning_rate": 0.00019901325705789366, "loss": 0.9634, "step": 2445 }, { "epoch": 0.14057031384474153, "grad_norm": 0.251953125, "learning_rate": 0.0001989991744097364, "loss": 1.0375, "step": 2450 }, { "epoch": 0.1408571920362614, "grad_norm": 0.2421875, "learning_rate": 0.0001989849924849851, "loss": 1.0092, "step": 2455 }, { "epoch": 0.14114407022778128, "grad_norm": 0.259765625, "learning_rate": 0.00019897071129786132, "loss": 1.0077, "step": 2460 }, { "epoch": 0.14143094841930118, "grad_norm": 0.24609375, "learning_rate": 0.00019895633086268637, "loss": 1.0083, "step": 2465 }, { "epoch": 0.14171782661082105, "grad_norm": 0.26171875, "learning_rate": 0.0001989418511938809, "loss": 0.9727, "step": 2470 }, { "epoch": 0.14200470480234093, "grad_norm": 0.265625, "learning_rate": 0.00019892727230596519, "loss": 1.0221, "step": 2475 }, { "epoch": 0.1422915829938608, "grad_norm": 0.251953125, "learning_rate": 0.00019891259421355895, "loss": 0.9697, "step": 2480 }, { "epoch": 0.1425784611853807, "grad_norm": 0.2451171875, "learning_rate": 0.0001988978169313815, "loss": 0.9837, "step": 2485 }, { "epoch": 0.14286533937690057, "grad_norm": 0.2421875, "learning_rate": 0.00019888294047425143, "loss": 1.0065, "step": 2490 }, { "epoch": 0.14315221756842045, "grad_norm": 0.255859375, "learning_rate": 0.00019886796485708692, "loss": 0.9125, "step": 2495 }, { "epoch": 0.14343909575994032, "grad_norm": 0.263671875, "learning_rate": 0.00019885289009490556, "loss": 1.0038, "step": 2500 }, { "epoch": 0.14372597395146022, "grad_norm": 0.25390625, "learning_rate": 0.0001988377162028243, "loss": 0.926, "step": 2505 }, { "epoch": 0.1440128521429801, "grad_norm": 0.263671875, "learning_rate": 0.00019882244319605966, "loss": 0.9902, "step": 2510 }, { "epoch": 0.14429973033449997, "grad_norm": 0.2578125, "learning_rate": 0.00019880707108992738, "loss": 1.0494, "step": 2515 }, { "epoch": 0.14458660852601984, "grad_norm": 0.2890625, "learning_rate": 0.0001987915998998426, "loss": 0.9435, "step": 2520 }, { "epoch": 0.14487348671753975, "grad_norm": 0.251953125, "learning_rate": 0.00019877602964131995, "loss": 0.9293, "step": 2525 }, { "epoch": 0.14516036490905962, "grad_norm": 0.2734375, "learning_rate": 0.0001987603603299733, "loss": 0.9658, "step": 2530 }, { "epoch": 0.1454472431005795, "grad_norm": 0.26171875, "learning_rate": 0.00019874459198151583, "loss": 0.9757, "step": 2535 }, { "epoch": 0.14573412129209937, "grad_norm": 0.2470703125, "learning_rate": 0.0001987287246117601, "loss": 1.0029, "step": 2540 }, { "epoch": 0.14602099948361927, "grad_norm": 0.279296875, "learning_rate": 0.00019871275823661795, "loss": 0.9861, "step": 2545 }, { "epoch": 0.14630787767513914, "grad_norm": 0.25, "learning_rate": 0.00019869669287210046, "loss": 0.9809, "step": 2550 }, { "epoch": 0.14659475586665902, "grad_norm": 0.255859375, "learning_rate": 0.00019868052853431808, "loss": 0.9763, "step": 2555 }, { "epoch": 0.1468816340581789, "grad_norm": 0.2578125, "learning_rate": 0.00019866426523948037, "loss": 0.9973, "step": 2560 }, { "epoch": 0.1471685122496988, "grad_norm": 0.275390625, "learning_rate": 0.00019864790300389625, "loss": 1.0361, "step": 2565 }, { "epoch": 0.14745539044121866, "grad_norm": 0.2490234375, "learning_rate": 0.00019863144184397376, "loss": 0.9603, "step": 2570 }, { "epoch": 0.14774226863273854, "grad_norm": 0.2734375, "learning_rate": 0.0001986148817762203, "loss": 0.9825, "step": 2575 }, { "epoch": 0.1480291468242584, "grad_norm": 0.255859375, "learning_rate": 0.0001985982228172422, "loss": 1.0327, "step": 2580 }, { "epoch": 0.1483160250157783, "grad_norm": 0.26953125, "learning_rate": 0.0001985814649837452, "loss": 1.0019, "step": 2585 }, { "epoch": 0.1486029032072982, "grad_norm": 0.255859375, "learning_rate": 0.000198564608292534, "loss": 0.9411, "step": 2590 }, { "epoch": 0.14888978139881806, "grad_norm": 0.25, "learning_rate": 0.00019854765276051264, "loss": 0.9216, "step": 2595 }, { "epoch": 0.14917665959033793, "grad_norm": 0.271484375, "learning_rate": 0.00019853059840468408, "loss": 0.9562, "step": 2600 }, { "epoch": 0.14946353778185784, "grad_norm": 0.265625, "learning_rate": 0.0001985134452421505, "loss": 1.0468, "step": 2605 }, { "epoch": 0.1497504159733777, "grad_norm": 0.265625, "learning_rate": 0.00019849619329011315, "loss": 1.0086, "step": 2610 }, { "epoch": 0.15003729416489758, "grad_norm": 0.267578125, "learning_rate": 0.0001984788425658723, "loss": 1.0496, "step": 2615 }, { "epoch": 0.15032417235641746, "grad_norm": 0.2734375, "learning_rate": 0.00019846139308682729, "loss": 1.0036, "step": 2620 }, { "epoch": 0.15061105054793736, "grad_norm": 0.267578125, "learning_rate": 0.0001984438448704765, "loss": 0.9403, "step": 2625 }, { "epoch": 0.15089792873945723, "grad_norm": 0.2421875, "learning_rate": 0.0001984261979344173, "loss": 0.9519, "step": 2630 }, { "epoch": 0.1511848069309771, "grad_norm": 0.236328125, "learning_rate": 0.00019840845229634612, "loss": 0.9608, "step": 2635 }, { "epoch": 0.15147168512249698, "grad_norm": 0.27734375, "learning_rate": 0.00019839060797405833, "loss": 1.0074, "step": 2640 }, { "epoch": 0.15175856331401688, "grad_norm": 0.267578125, "learning_rate": 0.0001983726649854482, "loss": 0.9188, "step": 2645 }, { "epoch": 0.15204544150553675, "grad_norm": 0.25390625, "learning_rate": 0.0001983546233485091, "loss": 1.0233, "step": 2650 }, { "epoch": 0.15233231969705663, "grad_norm": 0.287109375, "learning_rate": 0.0001983364830813331, "loss": 1.0333, "step": 2655 }, { "epoch": 0.1526191978885765, "grad_norm": 0.25, "learning_rate": 0.00019831824420211137, "loss": 0.993, "step": 2660 }, { "epoch": 0.1529060760800964, "grad_norm": 0.271484375, "learning_rate": 0.00019829990672913387, "loss": 0.9822, "step": 2665 }, { "epoch": 0.15319295427161628, "grad_norm": 0.2578125, "learning_rate": 0.0001982814706807895, "loss": 0.962, "step": 2670 }, { "epoch": 0.15347983246313615, "grad_norm": 0.267578125, "learning_rate": 0.00019826293607556593, "loss": 0.9029, "step": 2675 }, { "epoch": 0.15376671065465602, "grad_norm": 0.259765625, "learning_rate": 0.00019824430293204973, "loss": 0.922, "step": 2680 }, { "epoch": 0.15405358884617593, "grad_norm": 0.251953125, "learning_rate": 0.00019822557126892627, "loss": 0.9965, "step": 2685 }, { "epoch": 0.1543404670376958, "grad_norm": 0.259765625, "learning_rate": 0.00019820674110497966, "loss": 0.927, "step": 2690 }, { "epoch": 0.15462734522921567, "grad_norm": 0.2578125, "learning_rate": 0.00019818781245909285, "loss": 0.9637, "step": 2695 }, { "epoch": 0.15491422342073555, "grad_norm": 0.263671875, "learning_rate": 0.00019816878535024754, "loss": 1.0141, "step": 2700 }, { "epoch": 0.15520110161225545, "grad_norm": 0.2578125, "learning_rate": 0.00019814965979752413, "loss": 1.0148, "step": 2705 }, { "epoch": 0.15548797980377532, "grad_norm": 0.251953125, "learning_rate": 0.0001981304358201018, "loss": 1.0374, "step": 2710 }, { "epoch": 0.1557748579952952, "grad_norm": 0.24609375, "learning_rate": 0.00019811111343725842, "loss": 0.9639, "step": 2715 }, { "epoch": 0.15606173618681507, "grad_norm": 0.267578125, "learning_rate": 0.00019809169266837043, "loss": 0.9544, "step": 2720 }, { "epoch": 0.15634861437833497, "grad_norm": 0.263671875, "learning_rate": 0.0001980721735329131, "loss": 0.9553, "step": 2725 }, { "epoch": 0.15663549256985485, "grad_norm": 0.2392578125, "learning_rate": 0.0001980525560504602, "loss": 0.9162, "step": 2730 }, { "epoch": 0.15692237076137472, "grad_norm": 0.26171875, "learning_rate": 0.00019803284024068427, "loss": 0.997, "step": 2735 }, { "epoch": 0.1572092489528946, "grad_norm": 0.232421875, "learning_rate": 0.00019801302612335628, "loss": 0.9371, "step": 2740 }, { "epoch": 0.1574961271444145, "grad_norm": 0.26953125, "learning_rate": 0.00019799311371834595, "loss": 0.9677, "step": 2745 }, { "epoch": 0.15778300533593437, "grad_norm": 0.27734375, "learning_rate": 0.00019797310304562143, "loss": 0.9503, "step": 2750 }, { "epoch": 0.15806988352745424, "grad_norm": 0.24609375, "learning_rate": 0.00019795299412524945, "loss": 1.0382, "step": 2755 }, { "epoch": 0.15835676171897412, "grad_norm": 0.25, "learning_rate": 0.00019793278697739533, "loss": 0.9606, "step": 2760 }, { "epoch": 0.15864363991049402, "grad_norm": 0.251953125, "learning_rate": 0.00019791248162232285, "loss": 0.9368, "step": 2765 }, { "epoch": 0.1589305181020139, "grad_norm": 0.26171875, "learning_rate": 0.00019789207808039425, "loss": 0.9581, "step": 2770 }, { "epoch": 0.15921739629353376, "grad_norm": 0.279296875, "learning_rate": 0.0001978715763720702, "loss": 1.0179, "step": 2775 }, { "epoch": 0.15950427448505364, "grad_norm": 0.259765625, "learning_rate": 0.00019785097651790992, "loss": 0.9556, "step": 2780 }, { "epoch": 0.15979115267657354, "grad_norm": 0.255859375, "learning_rate": 0.00019783027853857097, "loss": 1.0694, "step": 2785 }, { "epoch": 0.1600780308680934, "grad_norm": 0.251953125, "learning_rate": 0.00019780948245480933, "loss": 1.0073, "step": 2790 }, { "epoch": 0.1603649090596133, "grad_norm": 0.267578125, "learning_rate": 0.00019778858828747934, "loss": 0.9729, "step": 2795 }, { "epoch": 0.16065178725113316, "grad_norm": 0.2490234375, "learning_rate": 0.00019776759605753377, "loss": 1.0349, "step": 2800 }, { "epoch": 0.16093866544265306, "grad_norm": 0.248046875, "learning_rate": 0.0001977465057860236, "loss": 0.9904, "step": 2805 }, { "epoch": 0.16122554363417294, "grad_norm": 0.2490234375, "learning_rate": 0.00019772531749409828, "loss": 0.9545, "step": 2810 }, { "epoch": 0.1615124218256928, "grad_norm": 0.271484375, "learning_rate": 0.0001977040312030054, "loss": 0.9768, "step": 2815 }, { "epoch": 0.16179930001721268, "grad_norm": 0.26953125, "learning_rate": 0.00019768264693409098, "loss": 0.9905, "step": 2820 }, { "epoch": 0.16208617820873258, "grad_norm": 0.271484375, "learning_rate": 0.00019766116470879913, "loss": 0.9736, "step": 2825 }, { "epoch": 0.16237305640025246, "grad_norm": 0.263671875, "learning_rate": 0.00019763958454867235, "loss": 1.0114, "step": 2830 }, { "epoch": 0.16265993459177233, "grad_norm": 0.24609375, "learning_rate": 0.00019761790647535124, "loss": 0.9847, "step": 2835 }, { "epoch": 0.1629468127832922, "grad_norm": 0.28515625, "learning_rate": 0.00019759613051057462, "loss": 0.959, "step": 2840 }, { "epoch": 0.1632336909748121, "grad_norm": 0.265625, "learning_rate": 0.00019757425667617945, "loss": 0.93, "step": 2845 }, { "epoch": 0.16352056916633198, "grad_norm": 0.263671875, "learning_rate": 0.00019755228499410092, "loss": 1.0175, "step": 2850 }, { "epoch": 0.16380744735785185, "grad_norm": 0.2470703125, "learning_rate": 0.00019753021548637222, "loss": 0.9568, "step": 2855 }, { "epoch": 0.16409432554937173, "grad_norm": 0.259765625, "learning_rate": 0.00019750804817512477, "loss": 0.9666, "step": 2860 }, { "epoch": 0.16438120374089163, "grad_norm": 0.265625, "learning_rate": 0.0001974857830825879, "loss": 0.9816, "step": 2865 }, { "epoch": 0.1646680819324115, "grad_norm": 0.26953125, "learning_rate": 0.0001974634202310892, "loss": 0.9241, "step": 2870 }, { "epoch": 0.16495496012393138, "grad_norm": 0.244140625, "learning_rate": 0.00019744095964305413, "loss": 0.9971, "step": 2875 }, { "epoch": 0.16524183831545125, "grad_norm": 0.251953125, "learning_rate": 0.00019741840134100623, "loss": 0.9578, "step": 2880 }, { "epoch": 0.16552871650697115, "grad_norm": 0.26171875, "learning_rate": 0.000197395745347567, "loss": 0.9817, "step": 2885 }, { "epoch": 0.16581559469849103, "grad_norm": 0.255859375, "learning_rate": 0.00019737299168545597, "loss": 0.9887, "step": 2890 }, { "epoch": 0.1661024728900109, "grad_norm": 0.27734375, "learning_rate": 0.00019735014037749053, "loss": 1.0605, "step": 2895 }, { "epoch": 0.16638935108153077, "grad_norm": 0.25, "learning_rate": 0.000197327191446586, "loss": 0.9337, "step": 2900 }, { "epoch": 0.16667622927305067, "grad_norm": 0.271484375, "learning_rate": 0.00019730414491575564, "loss": 0.876, "step": 2905 }, { "epoch": 0.16696310746457055, "grad_norm": 0.296875, "learning_rate": 0.00019728100080811057, "loss": 0.9747, "step": 2910 }, { "epoch": 0.16724998565609042, "grad_norm": 0.2490234375, "learning_rate": 0.00019725775914685977, "loss": 0.9694, "step": 2915 }, { "epoch": 0.1675368638476103, "grad_norm": 0.271484375, "learning_rate": 0.00019723441995531, "loss": 0.955, "step": 2920 }, { "epoch": 0.1678237420391302, "grad_norm": 0.275390625, "learning_rate": 0.00019721098325686584, "loss": 0.9547, "step": 2925 }, { "epoch": 0.16811062023065007, "grad_norm": 0.255859375, "learning_rate": 0.0001971874490750297, "loss": 0.9627, "step": 2930 }, { "epoch": 0.16839749842216994, "grad_norm": 0.23828125, "learning_rate": 0.0001971638174334017, "loss": 1.0038, "step": 2935 }, { "epoch": 0.16868437661368982, "grad_norm": 0.2578125, "learning_rate": 0.0001971400883556797, "loss": 0.9428, "step": 2940 }, { "epoch": 0.16897125480520972, "grad_norm": 0.267578125, "learning_rate": 0.00019711626186565929, "loss": 1.0165, "step": 2945 }, { "epoch": 0.1692581329967296, "grad_norm": 0.26953125, "learning_rate": 0.0001970923379872337, "loss": 0.9777, "step": 2950 }, { "epoch": 0.16954501118824947, "grad_norm": 0.27734375, "learning_rate": 0.00019706831674439382, "loss": 1.0453, "step": 2955 }, { "epoch": 0.16983188937976934, "grad_norm": 0.26171875, "learning_rate": 0.00019704419816122826, "loss": 0.9889, "step": 2960 }, { "epoch": 0.17011876757128924, "grad_norm": 0.263671875, "learning_rate": 0.00019701998226192319, "loss": 0.9816, "step": 2965 }, { "epoch": 0.17040564576280912, "grad_norm": 0.248046875, "learning_rate": 0.00019699566907076236, "loss": 0.9419, "step": 2970 }, { "epoch": 0.170692523954329, "grad_norm": 0.271484375, "learning_rate": 0.00019697125861212707, "loss": 0.9346, "step": 2975 }, { "epoch": 0.17097940214584886, "grad_norm": 0.259765625, "learning_rate": 0.00019694675091049617, "loss": 0.9696, "step": 2980 }, { "epoch": 0.17126628033736876, "grad_norm": 0.25390625, "learning_rate": 0.0001969221459904461, "loss": 0.9879, "step": 2985 }, { "epoch": 0.17155315852888864, "grad_norm": 0.28125, "learning_rate": 0.0001968974438766507, "loss": 1.0363, "step": 2990 }, { "epoch": 0.1718400367204085, "grad_norm": 0.259765625, "learning_rate": 0.0001968726445938813, "loss": 0.9803, "step": 2995 }, { "epoch": 0.17212691491192839, "grad_norm": 0.259765625, "learning_rate": 0.00019684774816700664, "loss": 0.9079, "step": 3000 }, { "epoch": 0.1724137931034483, "grad_norm": 0.251953125, "learning_rate": 0.00019682275462099298, "loss": 0.9684, "step": 3005 }, { "epoch": 0.17270067129496816, "grad_norm": 0.25, "learning_rate": 0.00019679766398090383, "loss": 1.009, "step": 3010 }, { "epoch": 0.17298754948648803, "grad_norm": 0.248046875, "learning_rate": 0.00019677247627190026, "loss": 0.9769, "step": 3015 }, { "epoch": 0.1732744276780079, "grad_norm": 0.263671875, "learning_rate": 0.00019674719151924043, "loss": 1.0395, "step": 3020 }, { "epoch": 0.1735613058695278, "grad_norm": 0.255859375, "learning_rate": 0.00019672180974828, "loss": 1.0476, "step": 3025 }, { "epoch": 0.17384818406104768, "grad_norm": 0.279296875, "learning_rate": 0.0001966963309844719, "loss": 0.9697, "step": 3030 }, { "epoch": 0.17413506225256756, "grad_norm": 0.24609375, "learning_rate": 0.00019667075525336622, "loss": 0.9243, "step": 3035 }, { "epoch": 0.17442194044408743, "grad_norm": 0.306640625, "learning_rate": 0.00019664508258061044, "loss": 1.0036, "step": 3040 }, { "epoch": 0.17470881863560733, "grad_norm": 0.255859375, "learning_rate": 0.0001966193129919491, "loss": 0.955, "step": 3045 }, { "epoch": 0.1749956968271272, "grad_norm": 0.248046875, "learning_rate": 0.00019659344651322405, "loss": 0.9717, "step": 3050 }, { "epoch": 0.17528257501864708, "grad_norm": 0.26171875, "learning_rate": 0.00019656748317037424, "loss": 0.9715, "step": 3055 }, { "epoch": 0.17556945321016695, "grad_norm": 0.2734375, "learning_rate": 0.00019654142298943574, "loss": 0.9769, "step": 3060 }, { "epoch": 0.17585633140168686, "grad_norm": 0.26953125, "learning_rate": 0.00019651526599654182, "loss": 0.9292, "step": 3065 }, { "epoch": 0.17614320959320673, "grad_norm": 0.255859375, "learning_rate": 0.0001964890122179227, "loss": 0.9528, "step": 3070 }, { "epoch": 0.1764300877847266, "grad_norm": 0.26171875, "learning_rate": 0.00019646266167990578, "loss": 0.9587, "step": 3075 }, { "epoch": 0.17671696597624648, "grad_norm": 0.279296875, "learning_rate": 0.00019643621440891543, "loss": 0.927, "step": 3080 }, { "epoch": 0.17700384416776638, "grad_norm": 0.255859375, "learning_rate": 0.00019640967043147302, "loss": 0.9888, "step": 3085 }, { "epoch": 0.17729072235928625, "grad_norm": 0.263671875, "learning_rate": 0.0001963830297741969, "loss": 0.9385, "step": 3090 }, { "epoch": 0.17757760055080613, "grad_norm": 0.251953125, "learning_rate": 0.0001963562924638024, "loss": 1.009, "step": 3095 }, { "epoch": 0.177864478742326, "grad_norm": 0.291015625, "learning_rate": 0.00019632945852710173, "loss": 0.9866, "step": 3100 }, { "epoch": 0.1781513569338459, "grad_norm": 0.28125, "learning_rate": 0.00019630252799100409, "loss": 0.9238, "step": 3105 }, { "epoch": 0.17843823512536577, "grad_norm": 0.302734375, "learning_rate": 0.0001962755008825154, "loss": 0.9603, "step": 3110 }, { "epoch": 0.17872511331688565, "grad_norm": 0.27734375, "learning_rate": 0.00019624837722873856, "loss": 0.9602, "step": 3115 }, { "epoch": 0.17901199150840552, "grad_norm": 0.28125, "learning_rate": 0.00019622115705687318, "loss": 0.9651, "step": 3120 }, { "epoch": 0.17929886969992542, "grad_norm": 0.24609375, "learning_rate": 0.00019619384039421575, "loss": 0.98, "step": 3125 }, { "epoch": 0.1795857478914453, "grad_norm": 0.2578125, "learning_rate": 0.00019616642726815947, "loss": 0.896, "step": 3130 }, { "epoch": 0.17987262608296517, "grad_norm": 0.259765625, "learning_rate": 0.0001961389177061943, "loss": 0.9217, "step": 3135 }, { "epoch": 0.18015950427448504, "grad_norm": 0.263671875, "learning_rate": 0.00019611131173590687, "loss": 0.9536, "step": 3140 }, { "epoch": 0.18044638246600495, "grad_norm": 0.28515625, "learning_rate": 0.0001960836093849805, "loss": 0.9958, "step": 3145 }, { "epoch": 0.18073326065752482, "grad_norm": 0.435546875, "learning_rate": 0.00019605581068119518, "loss": 0.9922, "step": 3150 }, { "epoch": 0.1810201388490447, "grad_norm": 0.26953125, "learning_rate": 0.00019602791565242754, "loss": 0.9009, "step": 3155 }, { "epoch": 0.18130701704056457, "grad_norm": 0.26953125, "learning_rate": 0.00019599992432665073, "loss": 0.9346, "step": 3160 }, { "epoch": 0.18159389523208447, "grad_norm": 0.271484375, "learning_rate": 0.00019597183673193452, "loss": 0.9644, "step": 3165 }, { "epoch": 0.18188077342360434, "grad_norm": 0.267578125, "learning_rate": 0.00019594365289644529, "loss": 0.8938, "step": 3170 }, { "epoch": 0.18216765161512422, "grad_norm": 0.27734375, "learning_rate": 0.00019591537284844573, "loss": 1.0112, "step": 3175 }, { "epoch": 0.1824545298066441, "grad_norm": 0.25390625, "learning_rate": 0.00019588699661629523, "loss": 0.9286, "step": 3180 }, { "epoch": 0.182741407998164, "grad_norm": 0.263671875, "learning_rate": 0.0001958585242284495, "loss": 0.9738, "step": 3185 }, { "epoch": 0.18302828618968386, "grad_norm": 0.271484375, "learning_rate": 0.00019582995571346072, "loss": 0.9947, "step": 3190 }, { "epoch": 0.18331516438120374, "grad_norm": 0.265625, "learning_rate": 0.0001958012910999775, "loss": 0.9578, "step": 3195 }, { "epoch": 0.1836020425727236, "grad_norm": 0.3515625, "learning_rate": 0.0001957725304167447, "loss": 1.0321, "step": 3200 }, { "epoch": 0.1838889207642435, "grad_norm": 0.275390625, "learning_rate": 0.00019574367369260364, "loss": 0.9091, "step": 3205 }, { "epoch": 0.1841757989557634, "grad_norm": 0.267578125, "learning_rate": 0.00019571472095649192, "loss": 0.9775, "step": 3210 }, { "epoch": 0.18446267714728326, "grad_norm": 0.25390625, "learning_rate": 0.00019568567223744339, "loss": 1.0474, "step": 3215 }, { "epoch": 0.18474955533880313, "grad_norm": 0.267578125, "learning_rate": 0.00019565652756458818, "loss": 0.9921, "step": 3220 }, { "epoch": 0.18503643353032304, "grad_norm": 0.2890625, "learning_rate": 0.00019562728696715263, "loss": 0.9533, "step": 3225 }, { "epoch": 0.1853233117218429, "grad_norm": 0.265625, "learning_rate": 0.00019559795047445927, "loss": 0.9658, "step": 3230 }, { "epoch": 0.18561018991336278, "grad_norm": 0.271484375, "learning_rate": 0.0001955685181159268, "loss": 0.9816, "step": 3235 }, { "epoch": 0.18589706810488266, "grad_norm": 0.2734375, "learning_rate": 0.0001955389899210701, "loss": 0.9574, "step": 3240 }, { "epoch": 0.18618394629640256, "grad_norm": 0.2333984375, "learning_rate": 0.00019550936591950006, "loss": 0.9079, "step": 3245 }, { "epoch": 0.18647082448792243, "grad_norm": 0.29296875, "learning_rate": 0.0001954796461409237, "loss": 0.9293, "step": 3250 }, { "epoch": 0.1867577026794423, "grad_norm": 0.259765625, "learning_rate": 0.0001954498306151441, "loss": 0.944, "step": 3255 }, { "epoch": 0.18704458087096218, "grad_norm": 0.263671875, "learning_rate": 0.0001954199193720603, "loss": 1.0102, "step": 3260 }, { "epoch": 0.18733145906248208, "grad_norm": 0.263671875, "learning_rate": 0.00019538991244166738, "loss": 0.9339, "step": 3265 }, { "epoch": 0.18761833725400195, "grad_norm": 0.275390625, "learning_rate": 0.00019535980985405639, "loss": 0.9956, "step": 3270 }, { "epoch": 0.18790521544552183, "grad_norm": 0.27734375, "learning_rate": 0.00019532961163941422, "loss": 0.9283, "step": 3275 }, { "epoch": 0.1881920936370417, "grad_norm": 0.28515625, "learning_rate": 0.00019529931782802376, "loss": 1.0013, "step": 3280 }, { "epoch": 0.1884789718285616, "grad_norm": 0.26171875, "learning_rate": 0.00019526892845026365, "loss": 1.0276, "step": 3285 }, { "epoch": 0.18876585002008148, "grad_norm": 0.2470703125, "learning_rate": 0.00019523844353660849, "loss": 0.9785, "step": 3290 }, { "epoch": 0.18905272821160135, "grad_norm": 0.259765625, "learning_rate": 0.0001952078631176286, "loss": 0.9132, "step": 3295 }, { "epoch": 0.18933960640312122, "grad_norm": 0.263671875, "learning_rate": 0.00019517718722399002, "loss": 0.9568, "step": 3300 }, { "epoch": 0.18962648459464113, "grad_norm": 0.259765625, "learning_rate": 0.00019514641588645471, "loss": 0.9375, "step": 3305 }, { "epoch": 0.189913362786161, "grad_norm": 0.279296875, "learning_rate": 0.0001951155491358802, "loss": 0.944, "step": 3310 }, { "epoch": 0.19020024097768087, "grad_norm": 0.2890625, "learning_rate": 0.00019508458700321973, "loss": 1.0012, "step": 3315 }, { "epoch": 0.19048711916920075, "grad_norm": 0.27734375, "learning_rate": 0.00019505352951952221, "loss": 0.9707, "step": 3320 }, { "epoch": 0.19077399736072065, "grad_norm": 0.2578125, "learning_rate": 0.00019502237671593212, "loss": 0.9405, "step": 3325 }, { "epoch": 0.19106087555224052, "grad_norm": 0.2578125, "learning_rate": 0.0001949911286236896, "loss": 0.8607, "step": 3330 }, { "epoch": 0.1913477537437604, "grad_norm": 0.279296875, "learning_rate": 0.00019495978527413028, "loss": 0.9797, "step": 3335 }, { "epoch": 0.19163463193528027, "grad_norm": 0.2578125, "learning_rate": 0.00019492834669868536, "loss": 0.9488, "step": 3340 }, { "epoch": 0.19192151012680017, "grad_norm": 0.2451171875, "learning_rate": 0.00019489681292888148, "loss": 1.012, "step": 3345 }, { "epoch": 0.19220838831832004, "grad_norm": 0.25, "learning_rate": 0.00019486518399634083, "loss": 0.9622, "step": 3350 }, { "epoch": 0.19249526650983992, "grad_norm": 0.255859375, "learning_rate": 0.00019483345993278093, "loss": 0.9597, "step": 3355 }, { "epoch": 0.1927821447013598, "grad_norm": 0.265625, "learning_rate": 0.00019480164077001475, "loss": 1.0033, "step": 3360 }, { "epoch": 0.1930690228928797, "grad_norm": 0.26171875, "learning_rate": 0.00019476972653995062, "loss": 0.9518, "step": 3365 }, { "epoch": 0.19335590108439957, "grad_norm": 0.29296875, "learning_rate": 0.00019473771727459224, "loss": 1.0118, "step": 3370 }, { "epoch": 0.19364277927591944, "grad_norm": 0.259765625, "learning_rate": 0.00019470561300603852, "loss": 0.9938, "step": 3375 }, { "epoch": 0.19392965746743931, "grad_norm": 0.271484375, "learning_rate": 0.00019467341376648372, "loss": 0.9653, "step": 3380 }, { "epoch": 0.19421653565895922, "grad_norm": 0.2734375, "learning_rate": 0.00019464111958821727, "loss": 0.9561, "step": 3385 }, { "epoch": 0.1945034138504791, "grad_norm": 0.27734375, "learning_rate": 0.0001946087305036239, "loss": 1.0023, "step": 3390 }, { "epoch": 0.19479029204199896, "grad_norm": 0.271484375, "learning_rate": 0.00019457624654518343, "loss": 1.001, "step": 3395 }, { "epoch": 0.19507717023351884, "grad_norm": 0.248046875, "learning_rate": 0.00019454366774547083, "loss": 0.9172, "step": 3400 }, { "epoch": 0.19536404842503874, "grad_norm": 0.25390625, "learning_rate": 0.00019451099413715626, "loss": 0.9451, "step": 3405 }, { "epoch": 0.1956509266165586, "grad_norm": 0.259765625, "learning_rate": 0.0001944782257530048, "loss": 0.9521, "step": 3410 }, { "epoch": 0.1959378048080785, "grad_norm": 0.263671875, "learning_rate": 0.00019444536262587669, "loss": 0.9749, "step": 3415 }, { "epoch": 0.19622468299959836, "grad_norm": 0.345703125, "learning_rate": 0.00019441240478872718, "loss": 0.9371, "step": 3420 }, { "epoch": 0.19651156119111826, "grad_norm": 0.2890625, "learning_rate": 0.0001943793522746064, "loss": 0.9343, "step": 3425 }, { "epoch": 0.19679843938263814, "grad_norm": 0.25390625, "learning_rate": 0.00019434620511665958, "loss": 1.0255, "step": 3430 }, { "epoch": 0.197085317574158, "grad_norm": 0.26953125, "learning_rate": 0.0001943129633481267, "loss": 0.9707, "step": 3435 }, { "epoch": 0.19737219576567788, "grad_norm": 0.248046875, "learning_rate": 0.00019427962700234268, "loss": 1.0431, "step": 3440 }, { "epoch": 0.19765907395719778, "grad_norm": 0.259765625, "learning_rate": 0.00019424619611273727, "loss": 0.9646, "step": 3445 }, { "epoch": 0.19794595214871766, "grad_norm": 0.263671875, "learning_rate": 0.0001942126707128351, "loss": 0.9781, "step": 3450 }, { "epoch": 0.19823283034023753, "grad_norm": 0.251953125, "learning_rate": 0.00019417905083625545, "loss": 1.0096, "step": 3455 }, { "epoch": 0.1985197085317574, "grad_norm": 0.2578125, "learning_rate": 0.00019414533651671242, "loss": 0.964, "step": 3460 }, { "epoch": 0.1988065867232773, "grad_norm": 0.259765625, "learning_rate": 0.00019411152778801486, "loss": 0.9573, "step": 3465 }, { "epoch": 0.19909346491479718, "grad_norm": 0.263671875, "learning_rate": 0.00019407762468406619, "loss": 0.9138, "step": 3470 }, { "epoch": 0.19938034310631705, "grad_norm": 0.26953125, "learning_rate": 0.00019404362723886452, "loss": 0.9735, "step": 3475 }, { "epoch": 0.19966722129783693, "grad_norm": 0.2578125, "learning_rate": 0.00019400953548650258, "loss": 0.9722, "step": 3480 }, { "epoch": 0.19995409948935683, "grad_norm": 0.271484375, "learning_rate": 0.00019397534946116762, "loss": 1.0176, "step": 3485 }, { "epoch": 0.2002409776808767, "grad_norm": 0.275390625, "learning_rate": 0.00019394106919714155, "loss": 0.9016, "step": 3490 }, { "epoch": 0.20052785587239658, "grad_norm": 0.263671875, "learning_rate": 0.0001939066947288006, "loss": 1.0353, "step": 3495 }, { "epoch": 0.20081473406391645, "grad_norm": 0.28125, "learning_rate": 0.0001938722260906156, "loss": 0.9489, "step": 3500 }, { "epoch": 0.20110161225543635, "grad_norm": 0.265625, "learning_rate": 0.00019383766331715178, "loss": 1.0551, "step": 3505 }, { "epoch": 0.20138849044695623, "grad_norm": 0.2578125, "learning_rate": 0.00019380300644306878, "loss": 1.0359, "step": 3510 }, { "epoch": 0.2016753686384761, "grad_norm": 0.265625, "learning_rate": 0.00019376825550312057, "loss": 1.0009, "step": 3515 }, { "epoch": 0.20196224682999597, "grad_norm": 0.275390625, "learning_rate": 0.00019373341053215547, "loss": 0.942, "step": 3520 }, { "epoch": 0.20224912502151587, "grad_norm": 0.28125, "learning_rate": 0.0001936984715651161, "loss": 0.9778, "step": 3525 }, { "epoch": 0.20253600321303575, "grad_norm": 0.2578125, "learning_rate": 0.00019366343863703932, "loss": 0.9932, "step": 3530 }, { "epoch": 0.20282288140455562, "grad_norm": 0.251953125, "learning_rate": 0.0001936283117830563, "loss": 0.9191, "step": 3535 }, { "epoch": 0.2031097595960755, "grad_norm": 0.255859375, "learning_rate": 0.00019359309103839225, "loss": 0.9005, "step": 3540 }, { "epoch": 0.2033966377875954, "grad_norm": 0.263671875, "learning_rate": 0.0001935577764383666, "loss": 0.9952, "step": 3545 }, { "epoch": 0.20368351597911527, "grad_norm": 0.283203125, "learning_rate": 0.00019352236801839298, "loss": 0.922, "step": 3550 }, { "epoch": 0.20397039417063514, "grad_norm": 0.263671875, "learning_rate": 0.000193486865813979, "loss": 0.9369, "step": 3555 }, { "epoch": 0.20425727236215502, "grad_norm": 0.2734375, "learning_rate": 0.00019345126986072635, "loss": 1.0501, "step": 3560 }, { "epoch": 0.20454415055367492, "grad_norm": 0.271484375, "learning_rate": 0.0001934155801943307, "loss": 0.9395, "step": 3565 }, { "epoch": 0.2048310287451948, "grad_norm": 0.279296875, "learning_rate": 0.0001933797968505818, "loss": 0.9643, "step": 3570 }, { "epoch": 0.20511790693671467, "grad_norm": 0.26171875, "learning_rate": 0.0001933439198653632, "loss": 0.9855, "step": 3575 }, { "epoch": 0.20540478512823454, "grad_norm": 0.294921875, "learning_rate": 0.00019330794927465247, "loss": 0.9532, "step": 3580 }, { "epoch": 0.20569166331975444, "grad_norm": 0.251953125, "learning_rate": 0.00019327188511452094, "loss": 1.0038, "step": 3585 }, { "epoch": 0.20597854151127432, "grad_norm": 0.26171875, "learning_rate": 0.00019323572742113387, "loss": 0.9216, "step": 3590 }, { "epoch": 0.2062654197027942, "grad_norm": 0.265625, "learning_rate": 0.0001931994762307503, "loss": 0.9706, "step": 3595 }, { "epoch": 0.20655229789431406, "grad_norm": 0.28125, "learning_rate": 0.00019316313157972297, "loss": 0.9644, "step": 3600 }, { "epoch": 0.20683917608583396, "grad_norm": 0.287109375, "learning_rate": 0.00019312669350449836, "loss": 0.973, "step": 3605 }, { "epoch": 0.20712605427735384, "grad_norm": 0.2578125, "learning_rate": 0.0001930901620416167, "loss": 0.8716, "step": 3610 }, { "epoch": 0.2074129324688737, "grad_norm": 0.2734375, "learning_rate": 0.0001930535372277118, "loss": 1.0108, "step": 3615 }, { "epoch": 0.20769981066039359, "grad_norm": 0.259765625, "learning_rate": 0.0001930168190995111, "loss": 0.9474, "step": 3620 }, { "epoch": 0.2079866888519135, "grad_norm": 0.279296875, "learning_rate": 0.00019298000769383565, "loss": 0.9808, "step": 3625 }, { "epoch": 0.20827356704343336, "grad_norm": 0.263671875, "learning_rate": 0.00019294310304759994, "loss": 0.9813, "step": 3630 }, { "epoch": 0.20856044523495323, "grad_norm": 0.283203125, "learning_rate": 0.00019290610519781212, "loss": 0.9752, "step": 3635 }, { "epoch": 0.2088473234264731, "grad_norm": 0.279296875, "learning_rate": 0.00019286901418157367, "loss": 1.0081, "step": 3640 }, { "epoch": 0.209134201617993, "grad_norm": 0.26171875, "learning_rate": 0.00019283183003607955, "loss": 1.0368, "step": 3645 }, { "epoch": 0.20942107980951288, "grad_norm": 0.275390625, "learning_rate": 0.0001927945527986181, "loss": 1.0126, "step": 3650 }, { "epoch": 0.20970795800103276, "grad_norm": 0.263671875, "learning_rate": 0.000192757182506571, "loss": 0.9539, "step": 3655 }, { "epoch": 0.20999483619255263, "grad_norm": 0.271484375, "learning_rate": 0.00019271971919741332, "loss": 0.9511, "step": 3660 }, { "epoch": 0.21028171438407253, "grad_norm": 0.27734375, "learning_rate": 0.0001926821629087133, "loss": 0.9761, "step": 3665 }, { "epoch": 0.2105685925755924, "grad_norm": 0.25390625, "learning_rate": 0.0001926445136781325, "loss": 0.9101, "step": 3670 }, { "epoch": 0.21085547076711228, "grad_norm": 0.26953125, "learning_rate": 0.00019260677154342564, "loss": 1.0644, "step": 3675 }, { "epoch": 0.21114234895863215, "grad_norm": 0.283203125, "learning_rate": 0.0001925689365424406, "loss": 1.0595, "step": 3680 }, { "epoch": 0.21142922715015205, "grad_norm": 0.255859375, "learning_rate": 0.00019253100871311843, "loss": 0.9295, "step": 3685 }, { "epoch": 0.21171610534167193, "grad_norm": 0.28125, "learning_rate": 0.00019249298809349323, "loss": 0.9397, "step": 3690 }, { "epoch": 0.2120029835331918, "grad_norm": 0.27734375, "learning_rate": 0.00019245487472169216, "loss": 0.9591, "step": 3695 }, { "epoch": 0.21228986172471168, "grad_norm": 0.265625, "learning_rate": 0.0001924166686359354, "loss": 0.9513, "step": 3700 }, { "epoch": 0.21257673991623158, "grad_norm": 0.26171875, "learning_rate": 0.00019237836987453613, "loss": 0.9952, "step": 3705 }, { "epoch": 0.21286361810775145, "grad_norm": 0.263671875, "learning_rate": 0.00019233997847590035, "loss": 0.9506, "step": 3710 }, { "epoch": 0.21315049629927132, "grad_norm": 0.25390625, "learning_rate": 0.00019230149447852714, "loss": 0.9978, "step": 3715 }, { "epoch": 0.2134373744907912, "grad_norm": 0.2890625, "learning_rate": 0.00019226291792100826, "loss": 0.9213, "step": 3720 }, { "epoch": 0.2137242526823111, "grad_norm": 0.28515625, "learning_rate": 0.0001922242488420284, "loss": 1.0118, "step": 3725 }, { "epoch": 0.21401113087383097, "grad_norm": 0.251953125, "learning_rate": 0.00019218548728036503, "loss": 0.9238, "step": 3730 }, { "epoch": 0.21429800906535085, "grad_norm": 0.287109375, "learning_rate": 0.00019214663327488828, "loss": 0.966, "step": 3735 }, { "epoch": 0.21458488725687072, "grad_norm": 0.259765625, "learning_rate": 0.00019210768686456106, "loss": 1.0034, "step": 3740 }, { "epoch": 0.21487176544839062, "grad_norm": 0.275390625, "learning_rate": 0.00019206864808843892, "loss": 0.9623, "step": 3745 }, { "epoch": 0.2151586436399105, "grad_norm": 0.337890625, "learning_rate": 0.00019202951698566999, "loss": 0.9571, "step": 3750 }, { "epoch": 0.21544552183143037, "grad_norm": 0.251953125, "learning_rate": 0.0001919902935954951, "loss": 0.95, "step": 3755 }, { "epoch": 0.21573240002295024, "grad_norm": 0.29296875, "learning_rate": 0.00019195097795724747, "loss": 1.026, "step": 3760 }, { "epoch": 0.21601927821447015, "grad_norm": 0.271484375, "learning_rate": 0.000191911570110353, "loss": 1.0171, "step": 3765 }, { "epoch": 0.21630615640599002, "grad_norm": 0.263671875, "learning_rate": 0.0001918720700943299, "loss": 0.99, "step": 3770 }, { "epoch": 0.2165930345975099, "grad_norm": 0.26953125, "learning_rate": 0.0001918324779487889, "loss": 0.9584, "step": 3775 }, { "epoch": 0.21687991278902977, "grad_norm": 0.26953125, "learning_rate": 0.0001917927937134331, "loss": 1.0006, "step": 3780 }, { "epoch": 0.21716679098054967, "grad_norm": 0.259765625, "learning_rate": 0.00019175301742805793, "loss": 0.9589, "step": 3785 }, { "epoch": 0.21745366917206954, "grad_norm": 0.271484375, "learning_rate": 0.00019171314913255113, "loss": 1.013, "step": 3790 }, { "epoch": 0.21774054736358942, "grad_norm": 0.2490234375, "learning_rate": 0.00019167318886689273, "loss": 0.9847, "step": 3795 }, { "epoch": 0.2180274255551093, "grad_norm": 0.283203125, "learning_rate": 0.00019163313667115497, "loss": 0.9847, "step": 3800 }, { "epoch": 0.2183143037466292, "grad_norm": 0.279296875, "learning_rate": 0.00019159299258550227, "loss": 0.9766, "step": 3805 }, { "epoch": 0.21860118193814906, "grad_norm": 0.275390625, "learning_rate": 0.0001915527566501912, "loss": 0.9583, "step": 3810 }, { "epoch": 0.21888806012966894, "grad_norm": 0.259765625, "learning_rate": 0.0001915124289055705, "loss": 0.9287, "step": 3815 }, { "epoch": 0.2191749383211888, "grad_norm": 0.2490234375, "learning_rate": 0.00019147200939208088, "loss": 0.9871, "step": 3820 }, { "epoch": 0.2194618165127087, "grad_norm": 0.265625, "learning_rate": 0.0001914314981502551, "loss": 0.9836, "step": 3825 }, { "epoch": 0.2197486947042286, "grad_norm": 0.265625, "learning_rate": 0.000191390895220718, "loss": 0.8962, "step": 3830 }, { "epoch": 0.22003557289574846, "grad_norm": 0.2734375, "learning_rate": 0.0001913502006441862, "loss": 0.9405, "step": 3835 }, { "epoch": 0.22032245108726833, "grad_norm": 0.2578125, "learning_rate": 0.00019130941446146837, "loss": 0.8986, "step": 3840 }, { "epoch": 0.22060932927878824, "grad_norm": 0.265625, "learning_rate": 0.00019126853671346496, "loss": 0.9524, "step": 3845 }, { "epoch": 0.2208962074703081, "grad_norm": 0.251953125, "learning_rate": 0.00019122756744116828, "loss": 0.9778, "step": 3850 }, { "epoch": 0.22118308566182798, "grad_norm": 0.27734375, "learning_rate": 0.0001911865066856624, "loss": 1.0214, "step": 3855 }, { "epoch": 0.22146996385334786, "grad_norm": 0.28125, "learning_rate": 0.00019114535448812311, "loss": 0.9422, "step": 3860 }, { "epoch": 0.22175684204486776, "grad_norm": 0.28515625, "learning_rate": 0.00019110411088981802, "loss": 0.9638, "step": 3865 }, { "epoch": 0.22204372023638763, "grad_norm": 0.2421875, "learning_rate": 0.0001910627759321062, "loss": 0.9094, "step": 3870 }, { "epoch": 0.2223305984279075, "grad_norm": 0.298828125, "learning_rate": 0.00019102134965643847, "loss": 0.9825, "step": 3875 }, { "epoch": 0.22261747661942738, "grad_norm": 0.2470703125, "learning_rate": 0.0001909798321043572, "loss": 1.0358, "step": 3880 }, { "epoch": 0.22290435481094728, "grad_norm": 0.287109375, "learning_rate": 0.00019093822331749634, "loss": 0.904, "step": 3885 }, { "epoch": 0.22319123300246715, "grad_norm": 0.279296875, "learning_rate": 0.00019089652333758114, "loss": 0.9756, "step": 3890 }, { "epoch": 0.22347811119398703, "grad_norm": 0.26171875, "learning_rate": 0.00019085473220642856, "loss": 0.972, "step": 3895 }, { "epoch": 0.2237649893855069, "grad_norm": 0.27734375, "learning_rate": 0.0001908128499659468, "loss": 0.9302, "step": 3900 }, { "epoch": 0.2240518675770268, "grad_norm": 0.275390625, "learning_rate": 0.00019077087665813545, "loss": 0.9643, "step": 3905 }, { "epoch": 0.22433874576854668, "grad_norm": 0.296875, "learning_rate": 0.0001907288123250854, "loss": 0.9786, "step": 3910 }, { "epoch": 0.22462562396006655, "grad_norm": 0.28125, "learning_rate": 0.00019068665700897896, "loss": 0.9587, "step": 3915 }, { "epoch": 0.22491250215158642, "grad_norm": 0.302734375, "learning_rate": 0.0001906444107520895, "loss": 1.006, "step": 3920 }, { "epoch": 0.22519938034310633, "grad_norm": 0.26171875, "learning_rate": 0.00019060207359678164, "loss": 0.8882, "step": 3925 }, { "epoch": 0.2254862585346262, "grad_norm": 0.2890625, "learning_rate": 0.00019055964558551124, "loss": 0.9823, "step": 3930 }, { "epoch": 0.22577313672614607, "grad_norm": 0.263671875, "learning_rate": 0.00019051712676082522, "loss": 0.9457, "step": 3935 }, { "epoch": 0.22606001491766595, "grad_norm": 0.279296875, "learning_rate": 0.00019047451716536147, "loss": 0.9735, "step": 3940 }, { "epoch": 0.22634689310918585, "grad_norm": 0.291015625, "learning_rate": 0.000190431816841849, "loss": 0.9697, "step": 3945 }, { "epoch": 0.22663377130070572, "grad_norm": 0.275390625, "learning_rate": 0.00019038902583310785, "loss": 1.0229, "step": 3950 }, { "epoch": 0.2269206494922256, "grad_norm": 0.259765625, "learning_rate": 0.00019034614418204893, "loss": 0.9805, "step": 3955 }, { "epoch": 0.22720752768374547, "grad_norm": 0.259765625, "learning_rate": 0.000190303171931674, "loss": 0.9696, "step": 3960 }, { "epoch": 0.22749440587526537, "grad_norm": 0.271484375, "learning_rate": 0.00019026010912507577, "loss": 0.9885, "step": 3965 }, { "epoch": 0.22778128406678524, "grad_norm": 0.279296875, "learning_rate": 0.00019021695580543772, "loss": 0.9571, "step": 3970 }, { "epoch": 0.22806816225830512, "grad_norm": 0.267578125, "learning_rate": 0.00019017371201603407, "loss": 0.8998, "step": 3975 }, { "epoch": 0.228355040449825, "grad_norm": 0.265625, "learning_rate": 0.00019013037780022982, "loss": 1.0427, "step": 3980 }, { "epoch": 0.2286419186413449, "grad_norm": 0.267578125, "learning_rate": 0.00019008695320148062, "loss": 0.9227, "step": 3985 }, { "epoch": 0.22892879683286477, "grad_norm": 0.267578125, "learning_rate": 0.0001900434382633327, "loss": 0.9698, "step": 3990 }, { "epoch": 0.22921567502438464, "grad_norm": 0.255859375, "learning_rate": 0.00018999983302942302, "loss": 0.9134, "step": 3995 }, { "epoch": 0.22950255321590451, "grad_norm": 0.302734375, "learning_rate": 0.00018995613754347893, "loss": 0.9699, "step": 4000 }, { "epoch": 0.22978943140742442, "grad_norm": 0.26171875, "learning_rate": 0.00018991235184931843, "loss": 0.896, "step": 4005 }, { "epoch": 0.2300763095989443, "grad_norm": 0.26171875, "learning_rate": 0.00018986847599084986, "loss": 0.9285, "step": 4010 }, { "epoch": 0.23036318779046416, "grad_norm": 0.26953125, "learning_rate": 0.00018982451001207205, "loss": 0.984, "step": 4015 }, { "epoch": 0.23065006598198404, "grad_norm": 0.255859375, "learning_rate": 0.00018978045395707418, "loss": 0.9619, "step": 4020 }, { "epoch": 0.23093694417350394, "grad_norm": 0.279296875, "learning_rate": 0.00018973630787003575, "loss": 0.9796, "step": 4025 }, { "epoch": 0.2312238223650238, "grad_norm": 0.275390625, "learning_rate": 0.0001896920717952266, "loss": 0.9083, "step": 4030 }, { "epoch": 0.23151070055654369, "grad_norm": 0.271484375, "learning_rate": 0.00018964774577700667, "loss": 1.0115, "step": 4035 }, { "epoch": 0.23179757874806356, "grad_norm": 0.25390625, "learning_rate": 0.00018960332985982627, "loss": 0.9947, "step": 4040 }, { "epoch": 0.23208445693958346, "grad_norm": 0.259765625, "learning_rate": 0.0001895588240882258, "loss": 0.9585, "step": 4045 }, { "epoch": 0.23237133513110333, "grad_norm": 0.2734375, "learning_rate": 0.0001895142285068357, "loss": 0.9033, "step": 4050 }, { "epoch": 0.2326582133226232, "grad_norm": 0.265625, "learning_rate": 0.00018946954316037648, "loss": 0.9287, "step": 4055 }, { "epoch": 0.23294509151414308, "grad_norm": 0.259765625, "learning_rate": 0.0001894247680936588, "loss": 0.9409, "step": 4060 }, { "epoch": 0.23323196970566298, "grad_norm": 0.251953125, "learning_rate": 0.00018937990335158312, "loss": 0.9404, "step": 4065 }, { "epoch": 0.23351884789718286, "grad_norm": 0.26953125, "learning_rate": 0.00018933494897913997, "loss": 0.99, "step": 4070 }, { "epoch": 0.23380572608870273, "grad_norm": 0.279296875, "learning_rate": 0.00018928990502140963, "loss": 0.9034, "step": 4075 }, { "epoch": 0.2340926042802226, "grad_norm": 0.25390625, "learning_rate": 0.00018924477152356233, "loss": 1.0379, "step": 4080 }, { "epoch": 0.2343794824717425, "grad_norm": 0.255859375, "learning_rate": 0.00018919954853085803, "loss": 0.948, "step": 4085 }, { "epoch": 0.23466636066326238, "grad_norm": 0.28515625, "learning_rate": 0.0001891542360886464, "loss": 1.0377, "step": 4090 }, { "epoch": 0.23495323885478225, "grad_norm": 0.255859375, "learning_rate": 0.00018910883424236695, "loss": 0.9948, "step": 4095 }, { "epoch": 0.23524011704630213, "grad_norm": 0.291015625, "learning_rate": 0.0001890633430375487, "loss": 0.9823, "step": 4100 }, { "epoch": 0.23552699523782203, "grad_norm": 0.279296875, "learning_rate": 0.00018901776251981032, "loss": 0.9425, "step": 4105 }, { "epoch": 0.2358138734293419, "grad_norm": 0.2578125, "learning_rate": 0.0001889720927348601, "loss": 0.9568, "step": 4110 }, { "epoch": 0.23610075162086178, "grad_norm": 0.265625, "learning_rate": 0.00018892633372849575, "loss": 0.9233, "step": 4115 }, { "epoch": 0.23638762981238165, "grad_norm": 0.267578125, "learning_rate": 0.00018888048554660454, "loss": 0.9648, "step": 4120 }, { "epoch": 0.23667450800390155, "grad_norm": 0.271484375, "learning_rate": 0.00018883454823516313, "loss": 0.9916, "step": 4125 }, { "epoch": 0.23696138619542143, "grad_norm": 0.27734375, "learning_rate": 0.0001887885218402375, "loss": 1.0255, "step": 4130 }, { "epoch": 0.2372482643869413, "grad_norm": 0.27734375, "learning_rate": 0.00018874240640798316, "loss": 0.9522, "step": 4135 }, { "epoch": 0.23753514257846117, "grad_norm": 0.255859375, "learning_rate": 0.0001886962019846446, "loss": 0.9887, "step": 4140 }, { "epoch": 0.23782202076998107, "grad_norm": 0.3046875, "learning_rate": 0.00018864990861655584, "loss": 0.9113, "step": 4145 }, { "epoch": 0.23810889896150095, "grad_norm": 0.255859375, "learning_rate": 0.0001886035263501399, "loss": 0.9509, "step": 4150 }, { "epoch": 0.23839577715302082, "grad_norm": 0.279296875, "learning_rate": 0.00018855705523190908, "loss": 0.9792, "step": 4155 }, { "epoch": 0.2386826553445407, "grad_norm": 0.25390625, "learning_rate": 0.0001885104953084647, "loss": 0.9923, "step": 4160 }, { "epoch": 0.2389695335360606, "grad_norm": 0.28125, "learning_rate": 0.00018846384662649714, "loss": 0.9719, "step": 4165 }, { "epoch": 0.23925641172758047, "grad_norm": 0.2734375, "learning_rate": 0.00018841710923278582, "loss": 0.9501, "step": 4170 }, { "epoch": 0.23954328991910034, "grad_norm": 0.2578125, "learning_rate": 0.00018837028317419908, "loss": 0.9205, "step": 4175 }, { "epoch": 0.23983016811062022, "grad_norm": 0.25390625, "learning_rate": 0.00018832336849769425, "loss": 0.9432, "step": 4180 }, { "epoch": 0.24011704630214012, "grad_norm": 0.291015625, "learning_rate": 0.0001882763652503174, "loss": 1.0014, "step": 4185 }, { "epoch": 0.24040392449366, "grad_norm": 0.275390625, "learning_rate": 0.00018822927347920355, "loss": 0.9548, "step": 4190 }, { "epoch": 0.24069080268517987, "grad_norm": 0.255859375, "learning_rate": 0.00018818209323157638, "loss": 1.0308, "step": 4195 }, { "epoch": 0.24097768087669974, "grad_norm": 0.314453125, "learning_rate": 0.0001881348245547484, "loss": 1.0047, "step": 4200 }, { "epoch": 0.24126455906821964, "grad_norm": 0.291015625, "learning_rate": 0.00018808746749612073, "loss": 0.919, "step": 4205 }, { "epoch": 0.24155143725973952, "grad_norm": 0.263671875, "learning_rate": 0.0001880400221031831, "loss": 0.9214, "step": 4210 }, { "epoch": 0.2418383154512594, "grad_norm": 0.26953125, "learning_rate": 0.00018799248842351393, "loss": 0.9631, "step": 4215 }, { "epoch": 0.24212519364277926, "grad_norm": 0.2578125, "learning_rate": 0.00018794486650478004, "loss": 0.9936, "step": 4220 }, { "epoch": 0.24241207183429916, "grad_norm": 0.26171875, "learning_rate": 0.0001878971563947368, "loss": 0.9386, "step": 4225 }, { "epoch": 0.24269895002581904, "grad_norm": 0.28515625, "learning_rate": 0.00018784935814122804, "loss": 1.0508, "step": 4230 }, { "epoch": 0.2429858282173389, "grad_norm": 0.251953125, "learning_rate": 0.0001878014717921859, "loss": 0.8909, "step": 4235 }, { "epoch": 0.24327270640885879, "grad_norm": 0.271484375, "learning_rate": 0.000187753497395631, "loss": 0.9797, "step": 4240 }, { "epoch": 0.2435595846003787, "grad_norm": 0.287109375, "learning_rate": 0.00018770543499967217, "loss": 0.9766, "step": 4245 }, { "epoch": 0.24384646279189856, "grad_norm": 0.275390625, "learning_rate": 0.00018765728465250644, "loss": 1.0364, "step": 4250 }, { "epoch": 0.24413334098341843, "grad_norm": 0.26171875, "learning_rate": 0.00018760904640241906, "loss": 0.9276, "step": 4255 }, { "epoch": 0.2444202191749383, "grad_norm": 0.275390625, "learning_rate": 0.00018756072029778352, "loss": 0.9165, "step": 4260 }, { "epoch": 0.2447070973664582, "grad_norm": 0.26953125, "learning_rate": 0.00018751230638706131, "loss": 0.9561, "step": 4265 }, { "epoch": 0.24499397555797808, "grad_norm": 0.2734375, "learning_rate": 0.00018746380471880203, "loss": 0.9377, "step": 4270 }, { "epoch": 0.24528085374949796, "grad_norm": 0.275390625, "learning_rate": 0.00018741521534164325, "loss": 0.9992, "step": 4275 }, { "epoch": 0.24556773194101783, "grad_norm": 0.275390625, "learning_rate": 0.00018736653830431048, "loss": 1.0328, "step": 4280 }, { "epoch": 0.24585461013253773, "grad_norm": 0.2578125, "learning_rate": 0.0001873177736556172, "loss": 0.9225, "step": 4285 }, { "epoch": 0.2461414883240576, "grad_norm": 0.271484375, "learning_rate": 0.00018726892144446474, "loss": 0.9061, "step": 4290 }, { "epoch": 0.24642836651557748, "grad_norm": 0.271484375, "learning_rate": 0.0001872199817198421, "loss": 0.9639, "step": 4295 }, { "epoch": 0.24671524470709735, "grad_norm": 0.2734375, "learning_rate": 0.00018717095453082627, "loss": 0.9764, "step": 4300 }, { "epoch": 0.24700212289861725, "grad_norm": 0.265625, "learning_rate": 0.00018712183992658174, "loss": 0.9006, "step": 4305 }, { "epoch": 0.24728900109013713, "grad_norm": 0.27734375, "learning_rate": 0.00018707263795636077, "loss": 0.9212, "step": 4310 }, { "epoch": 0.247575879281657, "grad_norm": 0.267578125, "learning_rate": 0.0001870233486695032, "loss": 0.8946, "step": 4315 }, { "epoch": 0.24786275747317688, "grad_norm": 0.28125, "learning_rate": 0.0001869739721154365, "loss": 1.0377, "step": 4320 }, { "epoch": 0.24814963566469678, "grad_norm": 0.29296875, "learning_rate": 0.00018692450834367546, "loss": 1.0284, "step": 4325 }, { "epoch": 0.24843651385621665, "grad_norm": 0.283203125, "learning_rate": 0.00018687495740382258, "loss": 0.9688, "step": 4330 }, { "epoch": 0.24872339204773652, "grad_norm": 0.263671875, "learning_rate": 0.00018682531934556757, "loss": 0.989, "step": 4335 }, { "epoch": 0.2490102702392564, "grad_norm": 0.265625, "learning_rate": 0.00018677559421868766, "loss": 0.9916, "step": 4340 }, { "epoch": 0.2492971484307763, "grad_norm": 0.2890625, "learning_rate": 0.00018672578207304727, "loss": 0.9724, "step": 4345 }, { "epoch": 0.24958402662229617, "grad_norm": 0.2734375, "learning_rate": 0.00018667588295859816, "loss": 0.9535, "step": 4350 }, { "epoch": 0.24987090481381605, "grad_norm": 0.275390625, "learning_rate": 0.0001866258969253792, "loss": 1.0108, "step": 4355 }, { "epoch": 0.2501577830053359, "grad_norm": 0.2734375, "learning_rate": 0.00018657582402351663, "loss": 1.0017, "step": 4360 }, { "epoch": 0.2504446611968558, "grad_norm": 0.2734375, "learning_rate": 0.00018652566430322356, "loss": 1.0202, "step": 4365 }, { "epoch": 0.25073153938837567, "grad_norm": 0.27734375, "learning_rate": 0.0001864754178148003, "loss": 0.9738, "step": 4370 }, { "epoch": 0.25101841757989557, "grad_norm": 0.26171875, "learning_rate": 0.00018642508460863416, "loss": 0.9779, "step": 4375 }, { "epoch": 0.25130529577141547, "grad_norm": 0.27734375, "learning_rate": 0.00018637466473519937, "loss": 0.966, "step": 4380 }, { "epoch": 0.2515921739629353, "grad_norm": 0.3203125, "learning_rate": 0.0001863241582450571, "loss": 1.0283, "step": 4385 }, { "epoch": 0.2518790521544552, "grad_norm": 0.259765625, "learning_rate": 0.00018627356518885536, "loss": 0.9777, "step": 4390 }, { "epoch": 0.2521659303459751, "grad_norm": 0.27734375, "learning_rate": 0.000186222885617329, "loss": 0.9649, "step": 4395 }, { "epoch": 0.25245280853749497, "grad_norm": 0.271484375, "learning_rate": 0.00018617211958129958, "loss": 0.8768, "step": 4400 }, { "epoch": 0.25273968672901487, "grad_norm": 0.265625, "learning_rate": 0.00018612126713167542, "loss": 1.0189, "step": 4405 }, { "epoch": 0.2530265649205347, "grad_norm": 0.2578125, "learning_rate": 0.0001860703283194515, "loss": 0.9404, "step": 4410 }, { "epoch": 0.2533134431120546, "grad_norm": 0.275390625, "learning_rate": 0.00018601930319570934, "loss": 0.9849, "step": 4415 }, { "epoch": 0.2536003213035745, "grad_norm": 0.2734375, "learning_rate": 0.000185968191811617, "loss": 0.9844, "step": 4420 }, { "epoch": 0.25388719949509436, "grad_norm": 0.259765625, "learning_rate": 0.00018591699421842916, "loss": 0.8693, "step": 4425 }, { "epoch": 0.25417407768661426, "grad_norm": 0.283203125, "learning_rate": 0.00018586571046748685, "loss": 1.0033, "step": 4430 }, { "epoch": 0.25446095587813417, "grad_norm": 0.263671875, "learning_rate": 0.00018581434061021754, "loss": 0.9435, "step": 4435 }, { "epoch": 0.254747834069654, "grad_norm": 0.275390625, "learning_rate": 0.00018576288469813505, "loss": 1.019, "step": 4440 }, { "epoch": 0.2550347122611739, "grad_norm": 0.25390625, "learning_rate": 0.00018571134278283946, "loss": 1.0081, "step": 4445 }, { "epoch": 0.25532159045269376, "grad_norm": 0.279296875, "learning_rate": 0.0001856597149160171, "loss": 0.9636, "step": 4450 }, { "epoch": 0.25560846864421366, "grad_norm": 0.255859375, "learning_rate": 0.00018560800114944063, "loss": 1.0263, "step": 4455 }, { "epoch": 0.25589534683573356, "grad_norm": 0.248046875, "learning_rate": 0.00018555620153496856, "loss": 0.9449, "step": 4460 }, { "epoch": 0.2561822250272534, "grad_norm": 0.26953125, "learning_rate": 0.00018550431612454578, "loss": 1.0175, "step": 4465 }, { "epoch": 0.2564691032187733, "grad_norm": 0.263671875, "learning_rate": 0.00018545234497020302, "loss": 0.9296, "step": 4470 }, { "epoch": 0.2567559814102932, "grad_norm": 0.25, "learning_rate": 0.00018540028812405717, "loss": 0.9648, "step": 4475 }, { "epoch": 0.25704285960181306, "grad_norm": 0.26171875, "learning_rate": 0.00018534814563831082, "loss": 0.9098, "step": 4480 }, { "epoch": 0.25732973779333296, "grad_norm": 0.287109375, "learning_rate": 0.00018529591756525268, "loss": 1.0392, "step": 4485 }, { "epoch": 0.25761661598485286, "grad_norm": 0.3125, "learning_rate": 0.00018524360395725715, "loss": 0.9497, "step": 4490 }, { "epoch": 0.2579034941763727, "grad_norm": 0.255859375, "learning_rate": 0.0001851912048667844, "loss": 0.9545, "step": 4495 }, { "epoch": 0.2581903723678926, "grad_norm": 0.25390625, "learning_rate": 0.00018513872034638037, "loss": 0.9736, "step": 4500 }, { "epoch": 0.25847725055941245, "grad_norm": 0.265625, "learning_rate": 0.00018508615044867668, "loss": 0.9045, "step": 4505 }, { "epoch": 0.25876412875093235, "grad_norm": 0.259765625, "learning_rate": 0.00018503349522639052, "loss": 0.9952, "step": 4510 }, { "epoch": 0.25905100694245226, "grad_norm": 0.287109375, "learning_rate": 0.00018498075473232469, "loss": 0.9679, "step": 4515 }, { "epoch": 0.2593378851339721, "grad_norm": 0.255859375, "learning_rate": 0.00018492792901936742, "loss": 0.994, "step": 4520 }, { "epoch": 0.259624763325492, "grad_norm": 0.267578125, "learning_rate": 0.0001848750181404925, "loss": 0.9577, "step": 4525 }, { "epoch": 0.2599116415170119, "grad_norm": 0.267578125, "learning_rate": 0.00018482202214875908, "loss": 0.981, "step": 4530 }, { "epoch": 0.26019851970853175, "grad_norm": 0.283203125, "learning_rate": 0.00018476894109731166, "loss": 1.0434, "step": 4535 }, { "epoch": 0.26048539790005165, "grad_norm": 0.263671875, "learning_rate": 0.00018471577503938, "loss": 0.9877, "step": 4540 }, { "epoch": 0.2607722760915715, "grad_norm": 0.265625, "learning_rate": 0.00018466252402827915, "loss": 1.0051, "step": 4545 }, { "epoch": 0.2610591542830914, "grad_norm": 0.283203125, "learning_rate": 0.00018460918811740937, "loss": 0.9629, "step": 4550 }, { "epoch": 0.2613460324746113, "grad_norm": 0.279296875, "learning_rate": 0.00018455576736025602, "loss": 0.9279, "step": 4555 }, { "epoch": 0.26163291066613115, "grad_norm": 0.26953125, "learning_rate": 0.00018450226181038955, "loss": 0.9259, "step": 4560 }, { "epoch": 0.26191978885765105, "grad_norm": 0.2578125, "learning_rate": 0.00018444867152146545, "loss": 0.9689, "step": 4565 }, { "epoch": 0.26220666704917095, "grad_norm": 0.2734375, "learning_rate": 0.00018439499654722421, "loss": 0.9728, "step": 4570 }, { "epoch": 0.2624935452406908, "grad_norm": 0.279296875, "learning_rate": 0.00018434123694149117, "loss": 0.9916, "step": 4575 }, { "epoch": 0.2627804234322107, "grad_norm": 0.259765625, "learning_rate": 0.0001842873927581766, "loss": 1.033, "step": 4580 }, { "epoch": 0.26306730162373054, "grad_norm": 0.275390625, "learning_rate": 0.00018423346405127555, "loss": 0.9902, "step": 4585 }, { "epoch": 0.26335417981525044, "grad_norm": 0.291015625, "learning_rate": 0.0001841794508748679, "loss": 0.9779, "step": 4590 }, { "epoch": 0.26364105800677035, "grad_norm": 0.275390625, "learning_rate": 0.00018412535328311814, "loss": 1.0043, "step": 4595 }, { "epoch": 0.2639279361982902, "grad_norm": 0.240234375, "learning_rate": 0.00018407117133027544, "loss": 0.9412, "step": 4600 }, { "epoch": 0.2642148143898101, "grad_norm": 0.267578125, "learning_rate": 0.0001840169050706736, "loss": 0.9353, "step": 4605 }, { "epoch": 0.26450169258133, "grad_norm": 0.267578125, "learning_rate": 0.00018396255455873102, "loss": 0.962, "step": 4610 }, { "epoch": 0.26478857077284984, "grad_norm": 0.26953125, "learning_rate": 0.0001839081198489504, "loss": 1.0103, "step": 4615 }, { "epoch": 0.26507544896436974, "grad_norm": 0.275390625, "learning_rate": 0.0001838536009959191, "loss": 1.0115, "step": 4620 }, { "epoch": 0.2653623271558896, "grad_norm": 0.2578125, "learning_rate": 0.00018379899805430862, "loss": 0.9928, "step": 4625 }, { "epoch": 0.2656492053474095, "grad_norm": 0.283203125, "learning_rate": 0.00018374431107887502, "loss": 0.9347, "step": 4630 }, { "epoch": 0.2659360835389294, "grad_norm": 0.291015625, "learning_rate": 0.00018368954012445846, "loss": 0.9674, "step": 4635 }, { "epoch": 0.26622296173044924, "grad_norm": 0.267578125, "learning_rate": 0.00018363468524598342, "loss": 0.918, "step": 4640 }, { "epoch": 0.26650983992196914, "grad_norm": 0.298828125, "learning_rate": 0.0001835797464984585, "loss": 0.9558, "step": 4645 }, { "epoch": 0.26679671811348904, "grad_norm": 0.259765625, "learning_rate": 0.00018352472393697632, "loss": 0.9257, "step": 4650 }, { "epoch": 0.2670835963050089, "grad_norm": 0.28125, "learning_rate": 0.0001834696176167137, "loss": 0.9475, "step": 4655 }, { "epoch": 0.2673704744965288, "grad_norm": 0.265625, "learning_rate": 0.00018341442759293137, "loss": 0.9681, "step": 4660 }, { "epoch": 0.26765735268804863, "grad_norm": 0.28125, "learning_rate": 0.00018335915392097402, "loss": 0.9579, "step": 4665 }, { "epoch": 0.26794423087956853, "grad_norm": 0.279296875, "learning_rate": 0.00018330379665627014, "loss": 0.9203, "step": 4670 }, { "epoch": 0.26823110907108844, "grad_norm": 0.279296875, "learning_rate": 0.00018324835585433225, "loss": 0.927, "step": 4675 }, { "epoch": 0.2685179872626083, "grad_norm": 0.267578125, "learning_rate": 0.00018319283157075636, "loss": 1.0078, "step": 4680 }, { "epoch": 0.2688048654541282, "grad_norm": 0.259765625, "learning_rate": 0.00018313722386122247, "loss": 0.9204, "step": 4685 }, { "epoch": 0.2690917436456481, "grad_norm": 0.28515625, "learning_rate": 0.00018308153278149406, "loss": 0.9469, "step": 4690 }, { "epoch": 0.26937862183716793, "grad_norm": 0.2734375, "learning_rate": 0.00018302575838741826, "loss": 1.0207, "step": 4695 }, { "epoch": 0.26966550002868783, "grad_norm": 0.27734375, "learning_rate": 0.0001829699007349258, "loss": 0.9881, "step": 4700 }, { "epoch": 0.2699523782202077, "grad_norm": 0.26953125, "learning_rate": 0.0001829139598800308, "loss": 0.8757, "step": 4705 }, { "epoch": 0.2702392564117276, "grad_norm": 0.271484375, "learning_rate": 0.00018285793587883092, "loss": 1.0423, "step": 4710 }, { "epoch": 0.2705261346032475, "grad_norm": 0.255859375, "learning_rate": 0.00018280182878750717, "loss": 1.0039, "step": 4715 }, { "epoch": 0.2708130127947673, "grad_norm": 0.2734375, "learning_rate": 0.0001827456386623238, "loss": 0.9478, "step": 4720 }, { "epoch": 0.27109989098628723, "grad_norm": 0.271484375, "learning_rate": 0.00018268936555962845, "loss": 1.0252, "step": 4725 }, { "epoch": 0.27138676917780713, "grad_norm": 0.263671875, "learning_rate": 0.0001826330095358519, "loss": 0.9751, "step": 4730 }, { "epoch": 0.271673647369327, "grad_norm": 0.3046875, "learning_rate": 0.00018257657064750808, "loss": 0.9103, "step": 4735 }, { "epoch": 0.2719605255608469, "grad_norm": 0.263671875, "learning_rate": 0.00018252004895119404, "loss": 0.9596, "step": 4740 }, { "epoch": 0.2722474037523667, "grad_norm": 0.26953125, "learning_rate": 0.00018246344450358986, "loss": 0.9718, "step": 4745 }, { "epoch": 0.2725342819438866, "grad_norm": 0.255859375, "learning_rate": 0.00018240675736145865, "loss": 1.0375, "step": 4750 }, { "epoch": 0.2728211601354065, "grad_norm": 0.296875, "learning_rate": 0.0001823499875816464, "loss": 0.966, "step": 4755 }, { "epoch": 0.2731080383269264, "grad_norm": 0.294921875, "learning_rate": 0.00018229313522108187, "loss": 1.0054, "step": 4760 }, { "epoch": 0.2733949165184463, "grad_norm": 0.27734375, "learning_rate": 0.00018223620033677685, "loss": 0.9734, "step": 4765 }, { "epoch": 0.2736817947099662, "grad_norm": 0.35546875, "learning_rate": 0.00018217918298582572, "loss": 0.9234, "step": 4770 }, { "epoch": 0.273968672901486, "grad_norm": 0.275390625, "learning_rate": 0.0001821220832254056, "loss": 0.9812, "step": 4775 }, { "epoch": 0.2742555510930059, "grad_norm": 0.259765625, "learning_rate": 0.00018206490111277629, "loss": 0.9476, "step": 4780 }, { "epoch": 0.27454242928452577, "grad_norm": 0.26953125, "learning_rate": 0.00018200763670528011, "loss": 0.9397, "step": 4785 }, { "epoch": 0.27482930747604567, "grad_norm": 0.271484375, "learning_rate": 0.00018195029006034193, "loss": 0.947, "step": 4790 }, { "epoch": 0.27511618566756557, "grad_norm": 0.271484375, "learning_rate": 0.00018189286123546916, "loss": 1.0403, "step": 4795 }, { "epoch": 0.2754030638590854, "grad_norm": 0.271484375, "learning_rate": 0.00018183535028825149, "loss": 0.9509, "step": 4800 }, { "epoch": 0.2756899420506053, "grad_norm": 0.267578125, "learning_rate": 0.00018177775727636105, "loss": 0.9679, "step": 4805 }, { "epoch": 0.2759768202421252, "grad_norm": 0.271484375, "learning_rate": 0.00018172008225755224, "loss": 0.9199, "step": 4810 }, { "epoch": 0.27626369843364507, "grad_norm": 0.251953125, "learning_rate": 0.00018166232528966169, "loss": 0.9235, "step": 4815 }, { "epoch": 0.27655057662516497, "grad_norm": 0.30078125, "learning_rate": 0.0001816044864306082, "loss": 0.9656, "step": 4820 }, { "epoch": 0.2768374548166848, "grad_norm": 0.27734375, "learning_rate": 0.00018154656573839275, "loss": 1.1683, "step": 4825 }, { "epoch": 0.2771243330082047, "grad_norm": 0.2734375, "learning_rate": 0.00018148856327109832, "loss": 0.9441, "step": 4830 }, { "epoch": 0.2774112111997246, "grad_norm": 0.263671875, "learning_rate": 0.00018143047908688993, "loss": 0.9285, "step": 4835 }, { "epoch": 0.27769808939124446, "grad_norm": 0.255859375, "learning_rate": 0.00018137231324401448, "loss": 1.0174, "step": 4840 }, { "epoch": 0.27798496758276436, "grad_norm": 0.271484375, "learning_rate": 0.00018131406580080084, "loss": 0.9398, "step": 4845 }, { "epoch": 0.27827184577428427, "grad_norm": 0.267578125, "learning_rate": 0.00018125573681565969, "loss": 0.9351, "step": 4850 }, { "epoch": 0.2785587239658041, "grad_norm": 0.267578125, "learning_rate": 0.00018119732634708346, "loss": 0.98, "step": 4855 }, { "epoch": 0.278845602157324, "grad_norm": 0.275390625, "learning_rate": 0.0001811388344536463, "loss": 0.936, "step": 4860 }, { "epoch": 0.27913248034884386, "grad_norm": 0.26953125, "learning_rate": 0.00018108026119400397, "loss": 0.9626, "step": 4865 }, { "epoch": 0.27941935854036376, "grad_norm": 0.26953125, "learning_rate": 0.00018102160662689394, "loss": 0.9362, "step": 4870 }, { "epoch": 0.27970623673188366, "grad_norm": 0.345703125, "learning_rate": 0.0001809628708111351, "loss": 0.9749, "step": 4875 }, { "epoch": 0.2799931149234035, "grad_norm": 0.267578125, "learning_rate": 0.00018090405380562786, "loss": 0.9594, "step": 4880 }, { "epoch": 0.2802799931149234, "grad_norm": 0.302734375, "learning_rate": 0.00018084515566935402, "loss": 1.0839, "step": 4885 }, { "epoch": 0.2805668713064433, "grad_norm": 0.259765625, "learning_rate": 0.00018078617646137682, "loss": 0.9831, "step": 4890 }, { "epoch": 0.28085374949796316, "grad_norm": 0.326171875, "learning_rate": 0.00018072711624084068, "loss": 0.9761, "step": 4895 }, { "epoch": 0.28114062768948306, "grad_norm": 0.2578125, "learning_rate": 0.00018066797506697136, "loss": 1.0078, "step": 4900 }, { "epoch": 0.2814275058810029, "grad_norm": 0.291015625, "learning_rate": 0.0001806087529990758, "loss": 0.9566, "step": 4905 }, { "epoch": 0.2817143840725228, "grad_norm": 0.267578125, "learning_rate": 0.00018054945009654194, "loss": 0.9637, "step": 4910 }, { "epoch": 0.2820012622640427, "grad_norm": 0.25390625, "learning_rate": 0.00018049006641883888, "loss": 0.9415, "step": 4915 }, { "epoch": 0.28228814045556255, "grad_norm": 0.28515625, "learning_rate": 0.00018043060202551674, "loss": 1.0057, "step": 4920 }, { "epoch": 0.28257501864708245, "grad_norm": 0.27734375, "learning_rate": 0.00018037105697620655, "loss": 0.9708, "step": 4925 }, { "epoch": 0.28286189683860236, "grad_norm": 0.2890625, "learning_rate": 0.0001803114313306202, "loss": 0.9813, "step": 4930 }, { "epoch": 0.2831487750301222, "grad_norm": 0.255859375, "learning_rate": 0.00018025172514855043, "loss": 0.9325, "step": 4935 }, { "epoch": 0.2834356532216421, "grad_norm": 0.26953125, "learning_rate": 0.0001801919384898707, "loss": 0.9666, "step": 4940 }, { "epoch": 0.28372253141316195, "grad_norm": 0.271484375, "learning_rate": 0.00018013207141453523, "loss": 0.962, "step": 4945 }, { "epoch": 0.28400940960468185, "grad_norm": 0.283203125, "learning_rate": 0.00018007212398257888, "loss": 0.9707, "step": 4950 }, { "epoch": 0.28429628779620175, "grad_norm": 0.26953125, "learning_rate": 0.00018001209625411705, "loss": 1.0216, "step": 4955 }, { "epoch": 0.2845831659877216, "grad_norm": 0.263671875, "learning_rate": 0.00017995198828934568, "loss": 0.9455, "step": 4960 }, { "epoch": 0.2848700441792415, "grad_norm": 0.2734375, "learning_rate": 0.00017989180014854115, "loss": 0.9858, "step": 4965 }, { "epoch": 0.2851569223707614, "grad_norm": 0.259765625, "learning_rate": 0.0001798315318920603, "loss": 0.9292, "step": 4970 }, { "epoch": 0.28544380056228125, "grad_norm": 0.265625, "learning_rate": 0.00017977118358034023, "loss": 0.9592, "step": 4975 }, { "epoch": 0.28573067875380115, "grad_norm": 0.259765625, "learning_rate": 0.0001797107552738984, "loss": 0.9366, "step": 4980 }, { "epoch": 0.286017556945321, "grad_norm": 0.25, "learning_rate": 0.00017965024703333246, "loss": 0.9745, "step": 4985 }, { "epoch": 0.2863044351368409, "grad_norm": 0.267578125, "learning_rate": 0.0001795896589193202, "loss": 0.9848, "step": 4990 }, { "epoch": 0.2865913133283608, "grad_norm": 0.259765625, "learning_rate": 0.00017952899099261943, "loss": 0.978, "step": 4995 }, { "epoch": 0.28687819151988064, "grad_norm": 0.265625, "learning_rate": 0.00017946824331406823, "loss": 0.9613, "step": 5000 }, { "epoch": 0.28716506971140054, "grad_norm": 0.267578125, "learning_rate": 0.00017940741594458444, "loss": 1.0832, "step": 5005 }, { "epoch": 0.28745194790292045, "grad_norm": 0.267578125, "learning_rate": 0.00017934650894516584, "loss": 0.9122, "step": 5010 }, { "epoch": 0.2877388260944403, "grad_norm": 0.2578125, "learning_rate": 0.00017928552237689015, "loss": 0.9856, "step": 5015 }, { "epoch": 0.2880257042859602, "grad_norm": 0.26953125, "learning_rate": 0.00017922445630091485, "loss": 0.9846, "step": 5020 }, { "epoch": 0.28831258247748004, "grad_norm": 0.2578125, "learning_rate": 0.0001791633107784771, "loss": 0.9521, "step": 5025 }, { "epoch": 0.28859946066899994, "grad_norm": 0.251953125, "learning_rate": 0.0001791020858708938, "loss": 0.9727, "step": 5030 }, { "epoch": 0.28888633886051984, "grad_norm": 0.265625, "learning_rate": 0.00017904078163956142, "loss": 0.9935, "step": 5035 }, { "epoch": 0.2891732170520397, "grad_norm": 0.271484375, "learning_rate": 0.00017897939814595596, "loss": 0.9994, "step": 5040 }, { "epoch": 0.2894600952435596, "grad_norm": 0.267578125, "learning_rate": 0.00017891793545163297, "loss": 0.9973, "step": 5045 }, { "epoch": 0.2897469734350795, "grad_norm": 0.267578125, "learning_rate": 0.00017885639361822728, "loss": 0.9503, "step": 5050 }, { "epoch": 0.29003385162659934, "grad_norm": 0.275390625, "learning_rate": 0.00017879477270745328, "loss": 0.9388, "step": 5055 }, { "epoch": 0.29032072981811924, "grad_norm": 0.2734375, "learning_rate": 0.0001787330727811045, "loss": 0.9239, "step": 5060 }, { "epoch": 0.2906076080096391, "grad_norm": 0.25, "learning_rate": 0.00017867129390105384, "loss": 0.9892, "step": 5065 }, { "epoch": 0.290894486201159, "grad_norm": 0.263671875, "learning_rate": 0.0001786094361292532, "loss": 0.9695, "step": 5070 }, { "epoch": 0.2911813643926789, "grad_norm": 0.2734375, "learning_rate": 0.00017854749952773372, "loss": 0.9476, "step": 5075 }, { "epoch": 0.29146824258419873, "grad_norm": 0.259765625, "learning_rate": 0.00017848548415860563, "loss": 0.9391, "step": 5080 }, { "epoch": 0.29175512077571863, "grad_norm": 0.279296875, "learning_rate": 0.00017842339008405803, "loss": 1.0145, "step": 5085 }, { "epoch": 0.29204199896723854, "grad_norm": 0.251953125, "learning_rate": 0.00017836121736635894, "loss": 0.9003, "step": 5090 }, { "epoch": 0.2923288771587584, "grad_norm": 0.28515625, "learning_rate": 0.00017829896606785543, "loss": 0.9675, "step": 5095 }, { "epoch": 0.2926157553502783, "grad_norm": 0.2734375, "learning_rate": 0.00017823663625097312, "loss": 0.951, "step": 5100 }, { "epoch": 0.29290263354179813, "grad_norm": 0.275390625, "learning_rate": 0.00017817422797821656, "loss": 0.9628, "step": 5105 }, { "epoch": 0.29318951173331803, "grad_norm": 0.265625, "learning_rate": 0.0001781117413121689, "loss": 0.9483, "step": 5110 }, { "epoch": 0.29347638992483793, "grad_norm": 0.28515625, "learning_rate": 0.00017804917631549189, "loss": 1.0401, "step": 5115 }, { "epoch": 0.2937632681163578, "grad_norm": 0.28125, "learning_rate": 0.00017798653305092584, "loss": 0.9795, "step": 5120 }, { "epoch": 0.2940501463078777, "grad_norm": 0.296875, "learning_rate": 0.00017792381158128956, "loss": 0.9764, "step": 5125 }, { "epoch": 0.2943370244993976, "grad_norm": 0.2578125, "learning_rate": 0.00017786101196948034, "loss": 0.9816, "step": 5130 }, { "epoch": 0.2946239026909174, "grad_norm": 0.279296875, "learning_rate": 0.00017779813427847368, "loss": 1.0152, "step": 5135 }, { "epoch": 0.29491078088243733, "grad_norm": 0.267578125, "learning_rate": 0.00017773517857132355, "loss": 0.9595, "step": 5140 }, { "epoch": 0.2951976590739572, "grad_norm": 0.287109375, "learning_rate": 0.00017767214491116198, "loss": 0.947, "step": 5145 }, { "epoch": 0.2954845372654771, "grad_norm": 0.27734375, "learning_rate": 0.00017760903336119937, "loss": 1.0049, "step": 5150 }, { "epoch": 0.295771415456997, "grad_norm": 0.26171875, "learning_rate": 0.00017754584398472405, "loss": 0.8602, "step": 5155 }, { "epoch": 0.2960582936485168, "grad_norm": 0.26953125, "learning_rate": 0.0001774825768451025, "loss": 0.9468, "step": 5160 }, { "epoch": 0.2963451718400367, "grad_norm": 0.255859375, "learning_rate": 0.00017741923200577917, "loss": 0.9763, "step": 5165 }, { "epoch": 0.2966320500315566, "grad_norm": 0.28125, "learning_rate": 0.00017735580953027636, "loss": 1.0153, "step": 5170 }, { "epoch": 0.2969189282230765, "grad_norm": 0.27734375, "learning_rate": 0.00017729230948219428, "loss": 0.9664, "step": 5175 }, { "epoch": 0.2972058064145964, "grad_norm": 0.267578125, "learning_rate": 0.00017722873192521096, "loss": 0.9508, "step": 5180 }, { "epoch": 0.2974926846061162, "grad_norm": 0.25, "learning_rate": 0.00017716507692308207, "loss": 0.9688, "step": 5185 }, { "epoch": 0.2977795627976361, "grad_norm": 0.251953125, "learning_rate": 0.000177101344539641, "loss": 1.0038, "step": 5190 }, { "epoch": 0.298066440989156, "grad_norm": 0.3046875, "learning_rate": 0.00017703753483879874, "loss": 1.0215, "step": 5195 }, { "epoch": 0.29835331918067587, "grad_norm": 0.283203125, "learning_rate": 0.0001769736478845438, "loss": 1.0174, "step": 5200 }, { "epoch": 0.29864019737219577, "grad_norm": 0.28125, "learning_rate": 0.00017690968374094217, "loss": 0.9131, "step": 5205 }, { "epoch": 0.29892707556371567, "grad_norm": 0.26171875, "learning_rate": 0.00017684564247213722, "loss": 0.9034, "step": 5210 }, { "epoch": 0.2992139537552355, "grad_norm": 0.291015625, "learning_rate": 0.00017678152414234968, "loss": 0.9721, "step": 5215 }, { "epoch": 0.2995008319467554, "grad_norm": 0.265625, "learning_rate": 0.00017671732881587756, "loss": 1.0016, "step": 5220 }, { "epoch": 0.29978771013827527, "grad_norm": 0.279296875, "learning_rate": 0.0001766530565570961, "loss": 0.9473, "step": 5225 }, { "epoch": 0.30007458832979517, "grad_norm": 0.26171875, "learning_rate": 0.00017658870743045757, "loss": 1.0014, "step": 5230 }, { "epoch": 0.30036146652131507, "grad_norm": 0.275390625, "learning_rate": 0.00017652428150049152, "loss": 0.9541, "step": 5235 }, { "epoch": 0.3006483447128349, "grad_norm": 0.265625, "learning_rate": 0.0001764597788318044, "loss": 0.9472, "step": 5240 }, { "epoch": 0.3009352229043548, "grad_norm": 0.28125, "learning_rate": 0.00017639519948907961, "loss": 0.9852, "step": 5245 }, { "epoch": 0.3012221010958747, "grad_norm": 0.265625, "learning_rate": 0.00017633054353707745, "loss": 0.946, "step": 5250 }, { "epoch": 0.30150897928739456, "grad_norm": 0.27734375, "learning_rate": 0.0001762658110406351, "loss": 0.9516, "step": 5255 }, { "epoch": 0.30179585747891446, "grad_norm": 0.2890625, "learning_rate": 0.00017620100206466635, "loss": 0.9726, "step": 5260 }, { "epoch": 0.3020827356704343, "grad_norm": 0.2578125, "learning_rate": 0.00017613611667416192, "loss": 0.94, "step": 5265 }, { "epoch": 0.3023696138619542, "grad_norm": 0.255859375, "learning_rate": 0.00017607115493418896, "loss": 0.9733, "step": 5270 }, { "epoch": 0.3026564920534741, "grad_norm": 0.25390625, "learning_rate": 0.00017600611690989123, "loss": 1.0138, "step": 5275 }, { "epoch": 0.30294337024499396, "grad_norm": 0.27734375, "learning_rate": 0.00017594100266648906, "loss": 0.9298, "step": 5280 }, { "epoch": 0.30323024843651386, "grad_norm": 0.28515625, "learning_rate": 0.0001758758122692791, "loss": 0.9409, "step": 5285 }, { "epoch": 0.30351712662803376, "grad_norm": 0.271484375, "learning_rate": 0.00017581054578363445, "loss": 0.9214, "step": 5290 }, { "epoch": 0.3038040048195536, "grad_norm": 0.263671875, "learning_rate": 0.00017574520327500451, "loss": 0.9794, "step": 5295 }, { "epoch": 0.3040908830110735, "grad_norm": 0.267578125, "learning_rate": 0.0001756797848089149, "loss": 0.9635, "step": 5300 }, { "epoch": 0.30437776120259336, "grad_norm": 0.271484375, "learning_rate": 0.00017561429045096733, "loss": 0.9972, "step": 5305 }, { "epoch": 0.30466463939411326, "grad_norm": 0.263671875, "learning_rate": 0.00017554872026683978, "loss": 0.9549, "step": 5310 }, { "epoch": 0.30495151758563316, "grad_norm": 0.32421875, "learning_rate": 0.00017548307432228608, "loss": 1.0005, "step": 5315 }, { "epoch": 0.305238395777153, "grad_norm": 0.265625, "learning_rate": 0.00017541735268313623, "loss": 0.9812, "step": 5320 }, { "epoch": 0.3055252739686729, "grad_norm": 0.2890625, "learning_rate": 0.000175351555415296, "loss": 0.983, "step": 5325 }, { "epoch": 0.3058121521601928, "grad_norm": 0.26171875, "learning_rate": 0.00017528568258474704, "loss": 0.9497, "step": 5330 }, { "epoch": 0.30609903035171265, "grad_norm": 0.259765625, "learning_rate": 0.00017521973425754675, "loss": 0.9057, "step": 5335 }, { "epoch": 0.30638590854323255, "grad_norm": 0.25390625, "learning_rate": 0.00017515371049982824, "loss": 0.9332, "step": 5340 }, { "epoch": 0.3066727867347524, "grad_norm": 0.3046875, "learning_rate": 0.00017508761137780037, "loss": 0.969, "step": 5345 }, { "epoch": 0.3069596649262723, "grad_norm": 0.2734375, "learning_rate": 0.00017502143695774741, "loss": 0.9633, "step": 5350 }, { "epoch": 0.3072465431177922, "grad_norm": 0.26171875, "learning_rate": 0.00017495518730602924, "loss": 0.9164, "step": 5355 }, { "epoch": 0.30753342130931205, "grad_norm": 0.28515625, "learning_rate": 0.00017488886248908118, "loss": 0.9805, "step": 5360 }, { "epoch": 0.30782029950083195, "grad_norm": 0.2734375, "learning_rate": 0.0001748224625734139, "loss": 0.921, "step": 5365 }, { "epoch": 0.30810717769235185, "grad_norm": 0.275390625, "learning_rate": 0.00017475598762561333, "loss": 0.9671, "step": 5370 }, { "epoch": 0.3083940558838717, "grad_norm": 0.251953125, "learning_rate": 0.00017468943771234075, "loss": 0.9246, "step": 5375 }, { "epoch": 0.3086809340753916, "grad_norm": 0.2734375, "learning_rate": 0.00017462281290033256, "loss": 0.9271, "step": 5380 }, { "epoch": 0.30896781226691145, "grad_norm": 0.26171875, "learning_rate": 0.00017455611325640024, "loss": 0.9343, "step": 5385 }, { "epoch": 0.30925469045843135, "grad_norm": 0.255859375, "learning_rate": 0.00017448933884743037, "loss": 0.9428, "step": 5390 }, { "epoch": 0.30954156864995125, "grad_norm": 0.29296875, "learning_rate": 0.0001744224897403845, "loss": 0.986, "step": 5395 }, { "epoch": 0.3098284468414711, "grad_norm": 0.263671875, "learning_rate": 0.00017435556600229902, "loss": 0.9702, "step": 5400 }, { "epoch": 0.310115325032991, "grad_norm": 0.271484375, "learning_rate": 0.0001742885677002852, "loss": 1.0214, "step": 5405 }, { "epoch": 0.3104022032245109, "grad_norm": 0.2734375, "learning_rate": 0.00017422149490152914, "loss": 0.9759, "step": 5410 }, { "epoch": 0.31068908141603074, "grad_norm": 0.267578125, "learning_rate": 0.00017415434767329154, "loss": 1.042, "step": 5415 }, { "epoch": 0.31097595960755064, "grad_norm": 0.275390625, "learning_rate": 0.0001740871260829078, "loss": 1.0227, "step": 5420 }, { "epoch": 0.3112628377990705, "grad_norm": 0.283203125, "learning_rate": 0.0001740198301977879, "loss": 0.9925, "step": 5425 }, { "epoch": 0.3115497159905904, "grad_norm": 0.265625, "learning_rate": 0.0001739524600854163, "loss": 0.9934, "step": 5430 }, { "epoch": 0.3118365941821103, "grad_norm": 0.251953125, "learning_rate": 0.0001738850158133519, "loss": 0.9855, "step": 5435 }, { "epoch": 0.31212347237363014, "grad_norm": 0.287109375, "learning_rate": 0.00017381749744922796, "loss": 0.9412, "step": 5440 }, { "epoch": 0.31241035056515004, "grad_norm": 0.263671875, "learning_rate": 0.00017374990506075207, "loss": 0.9927, "step": 5445 }, { "epoch": 0.31269722875666994, "grad_norm": 0.294921875, "learning_rate": 0.00017368223871570596, "loss": 1.0166, "step": 5450 }, { "epoch": 0.3129841069481898, "grad_norm": 0.267578125, "learning_rate": 0.0001736144984819457, "loss": 1.0061, "step": 5455 }, { "epoch": 0.3132709851397097, "grad_norm": 0.267578125, "learning_rate": 0.00017354668442740126, "loss": 0.9393, "step": 5460 }, { "epoch": 0.31355786333122954, "grad_norm": 0.27734375, "learning_rate": 0.00017347879662007676, "loss": 1.0184, "step": 5465 }, { "epoch": 0.31384474152274944, "grad_norm": 0.267578125, "learning_rate": 0.00017341083512805025, "loss": 0.9302, "step": 5470 }, { "epoch": 0.31413161971426934, "grad_norm": 0.283203125, "learning_rate": 0.00017334280001947362, "loss": 0.9412, "step": 5475 }, { "epoch": 0.3144184979057892, "grad_norm": 0.2734375, "learning_rate": 0.00017327469136257272, "loss": 0.9884, "step": 5480 }, { "epoch": 0.3147053760973091, "grad_norm": 0.271484375, "learning_rate": 0.000173206509225647, "loss": 0.9041, "step": 5485 }, { "epoch": 0.314992254288829, "grad_norm": 0.267578125, "learning_rate": 0.00017313825367706967, "loss": 0.9434, "step": 5490 }, { "epoch": 0.31527913248034883, "grad_norm": 0.26953125, "learning_rate": 0.00017306992478528753, "loss": 1.0095, "step": 5495 }, { "epoch": 0.31556601067186874, "grad_norm": 0.271484375, "learning_rate": 0.000173001522618821, "loss": 0.9315, "step": 5500 }, { "epoch": 0.3158528888633886, "grad_norm": 0.265625, "learning_rate": 0.00017293304724626385, "loss": 0.9799, "step": 5505 }, { "epoch": 0.3161397670549085, "grad_norm": 0.27734375, "learning_rate": 0.0001728644987362834, "loss": 1.0289, "step": 5510 }, { "epoch": 0.3164266452464284, "grad_norm": 0.248046875, "learning_rate": 0.00017279587715762022, "loss": 0.9204, "step": 5515 }, { "epoch": 0.31671352343794823, "grad_norm": 0.314453125, "learning_rate": 0.0001727271825790882, "loss": 0.9878, "step": 5520 }, { "epoch": 0.31700040162946813, "grad_norm": 0.255859375, "learning_rate": 0.0001726584150695744, "loss": 0.9265, "step": 5525 }, { "epoch": 0.31728727982098803, "grad_norm": 0.251953125, "learning_rate": 0.00017258957469803906, "loss": 0.9716, "step": 5530 }, { "epoch": 0.3175741580125079, "grad_norm": 0.275390625, "learning_rate": 0.0001725206615335154, "loss": 0.9631, "step": 5535 }, { "epoch": 0.3178610362040278, "grad_norm": 0.259765625, "learning_rate": 0.00017245167564510974, "loss": 0.909, "step": 5540 }, { "epoch": 0.3181479143955476, "grad_norm": 0.27734375, "learning_rate": 0.00017238261710200128, "loss": 0.9312, "step": 5545 }, { "epoch": 0.31843479258706753, "grad_norm": 0.287109375, "learning_rate": 0.0001723134859734421, "loss": 0.9891, "step": 5550 }, { "epoch": 0.31872167077858743, "grad_norm": 0.26953125, "learning_rate": 0.00017224428232875703, "loss": 0.8686, "step": 5555 }, { "epoch": 0.3190085489701073, "grad_norm": 0.255859375, "learning_rate": 0.00017217500623734365, "loss": 0.9146, "step": 5560 }, { "epoch": 0.3192954271616272, "grad_norm": 0.279296875, "learning_rate": 0.00017210565776867216, "loss": 0.9702, "step": 5565 }, { "epoch": 0.3195823053531471, "grad_norm": 0.263671875, "learning_rate": 0.00017203623699228537, "loss": 0.9483, "step": 5570 }, { "epoch": 0.3198691835446669, "grad_norm": 0.279296875, "learning_rate": 0.0001719667439777986, "loss": 0.9595, "step": 5575 }, { "epoch": 0.3201560617361868, "grad_norm": 0.29296875, "learning_rate": 0.00017189717879489958, "loss": 0.9104, "step": 5580 }, { "epoch": 0.32044293992770667, "grad_norm": 0.28515625, "learning_rate": 0.00017182754151334842, "loss": 0.929, "step": 5585 }, { "epoch": 0.3207298181192266, "grad_norm": 0.275390625, "learning_rate": 0.00017175783220297762, "loss": 0.9127, "step": 5590 }, { "epoch": 0.3210166963107465, "grad_norm": 0.263671875, "learning_rate": 0.00017168805093369173, "loss": 0.9491, "step": 5595 }, { "epoch": 0.3213035745022663, "grad_norm": 0.251953125, "learning_rate": 0.00017161819777546767, "loss": 0.9266, "step": 5600 }, { "epoch": 0.3215904526937862, "grad_norm": 0.3359375, "learning_rate": 0.0001715482727983542, "loss": 1.014, "step": 5605 }, { "epoch": 0.3218773308853061, "grad_norm": 0.255859375, "learning_rate": 0.00017147827607247242, "loss": 0.9368, "step": 5610 }, { "epoch": 0.32216420907682597, "grad_norm": 0.26171875, "learning_rate": 0.00017140820766801506, "loss": 1.0029, "step": 5615 }, { "epoch": 0.32245108726834587, "grad_norm": 0.275390625, "learning_rate": 0.00017133806765524693, "loss": 0.9271, "step": 5620 }, { "epoch": 0.3227379654598657, "grad_norm": 0.28125, "learning_rate": 0.0001712678561045046, "loss": 0.9974, "step": 5625 }, { "epoch": 0.3230248436513856, "grad_norm": 0.27734375, "learning_rate": 0.00017119757308619639, "loss": 0.9648, "step": 5630 }, { "epoch": 0.3233117218429055, "grad_norm": 0.26171875, "learning_rate": 0.00017112721867080217, "loss": 0.9823, "step": 5635 }, { "epoch": 0.32359860003442537, "grad_norm": 0.275390625, "learning_rate": 0.0001710567929288736, "loss": 0.9823, "step": 5640 }, { "epoch": 0.32388547822594527, "grad_norm": 0.25390625, "learning_rate": 0.00017098629593103378, "loss": 0.9614, "step": 5645 }, { "epoch": 0.32417235641746517, "grad_norm": 0.2490234375, "learning_rate": 0.00017091572774797714, "loss": 1.014, "step": 5650 }, { "epoch": 0.324459234608985, "grad_norm": 0.265625, "learning_rate": 0.00017084508845046975, "loss": 1.0164, "step": 5655 }, { "epoch": 0.3247461128005049, "grad_norm": 0.25390625, "learning_rate": 0.00017077437810934882, "loss": 0.9267, "step": 5660 }, { "epoch": 0.32503299099202476, "grad_norm": 0.267578125, "learning_rate": 0.0001707035967955228, "loss": 0.9279, "step": 5665 }, { "epoch": 0.32531986918354466, "grad_norm": 0.279296875, "learning_rate": 0.00017063274457997137, "loss": 0.9867, "step": 5670 }, { "epoch": 0.32560674737506456, "grad_norm": 0.271484375, "learning_rate": 0.00017056182153374526, "loss": 0.9867, "step": 5675 }, { "epoch": 0.3258936255665844, "grad_norm": 0.310546875, "learning_rate": 0.00017049082772796633, "loss": 0.9746, "step": 5680 }, { "epoch": 0.3261805037581043, "grad_norm": 0.271484375, "learning_rate": 0.00017041976323382726, "loss": 0.9462, "step": 5685 }, { "epoch": 0.3264673819496242, "grad_norm": 0.279296875, "learning_rate": 0.00017034862812259174, "loss": 1.0081, "step": 5690 }, { "epoch": 0.32675426014114406, "grad_norm": 0.2578125, "learning_rate": 0.00017027742246559417, "loss": 0.9711, "step": 5695 }, { "epoch": 0.32704113833266396, "grad_norm": 0.263671875, "learning_rate": 0.00017020614633423976, "loss": 0.9295, "step": 5700 }, { "epoch": 0.3273280165241838, "grad_norm": 0.275390625, "learning_rate": 0.00017013479980000436, "loss": 0.9887, "step": 5705 }, { "epoch": 0.3276148947157037, "grad_norm": 0.2578125, "learning_rate": 0.00017006338293443446, "loss": 1.0193, "step": 5710 }, { "epoch": 0.3279017729072236, "grad_norm": 0.26171875, "learning_rate": 0.00016999189580914708, "loss": 0.9274, "step": 5715 }, { "epoch": 0.32818865109874346, "grad_norm": 0.2578125, "learning_rate": 0.00016992033849582962, "loss": 0.8722, "step": 5720 }, { "epoch": 0.32847552929026336, "grad_norm": 0.255859375, "learning_rate": 0.00016984871106623988, "loss": 0.8897, "step": 5725 }, { "epoch": 0.32876240748178326, "grad_norm": 0.294921875, "learning_rate": 0.00016977701359220613, "loss": 0.9323, "step": 5730 }, { "epoch": 0.3290492856733031, "grad_norm": 0.3515625, "learning_rate": 0.00016970524614562664, "loss": 0.9681, "step": 5735 }, { "epoch": 0.329336163864823, "grad_norm": 0.2734375, "learning_rate": 0.00016963340879847002, "loss": 0.9279, "step": 5740 }, { "epoch": 0.32962304205634285, "grad_norm": 0.28125, "learning_rate": 0.0001695615016227749, "loss": 0.9294, "step": 5745 }, { "epoch": 0.32990992024786275, "grad_norm": 0.28515625, "learning_rate": 0.00016948952469065, "loss": 0.9861, "step": 5750 }, { "epoch": 0.33019679843938265, "grad_norm": 0.279296875, "learning_rate": 0.00016941747807427387, "loss": 0.9409, "step": 5755 }, { "epoch": 0.3304836766309025, "grad_norm": 0.2578125, "learning_rate": 0.00016934536184589512, "loss": 0.9838, "step": 5760 }, { "epoch": 0.3307705548224224, "grad_norm": 0.28125, "learning_rate": 0.00016927317607783198, "loss": 0.9714, "step": 5765 }, { "epoch": 0.3310574330139423, "grad_norm": 0.251953125, "learning_rate": 0.00016920092084247255, "loss": 0.9408, "step": 5770 }, { "epoch": 0.33134431120546215, "grad_norm": 0.28125, "learning_rate": 0.0001691285962122745, "loss": 1.0076, "step": 5775 }, { "epoch": 0.33163118939698205, "grad_norm": 0.25390625, "learning_rate": 0.00016905620225976517, "loss": 0.9663, "step": 5780 }, { "epoch": 0.3319180675885019, "grad_norm": 0.298828125, "learning_rate": 0.00016898373905754137, "loss": 0.9457, "step": 5785 }, { "epoch": 0.3322049457800218, "grad_norm": 0.2890625, "learning_rate": 0.00016891120667826933, "loss": 0.9358, "step": 5790 }, { "epoch": 0.3324918239715417, "grad_norm": 0.291015625, "learning_rate": 0.00016883860519468472, "loss": 0.922, "step": 5795 }, { "epoch": 0.33277870216306155, "grad_norm": 0.267578125, "learning_rate": 0.0001687659346795925, "loss": 0.8944, "step": 5800 }, { "epoch": 0.33306558035458145, "grad_norm": 0.28125, "learning_rate": 0.00016869319520586675, "loss": 0.9289, "step": 5805 }, { "epoch": 0.33335245854610135, "grad_norm": 0.27734375, "learning_rate": 0.00016862038684645078, "loss": 0.959, "step": 5810 }, { "epoch": 0.3336393367376212, "grad_norm": 0.267578125, "learning_rate": 0.00016854750967435704, "loss": 1.01, "step": 5815 }, { "epoch": 0.3339262149291411, "grad_norm": 0.265625, "learning_rate": 0.0001684745637626669, "loss": 1.0157, "step": 5820 }, { "epoch": 0.33421309312066094, "grad_norm": 0.283203125, "learning_rate": 0.00016840154918453063, "loss": 0.9816, "step": 5825 }, { "epoch": 0.33449997131218084, "grad_norm": 0.251953125, "learning_rate": 0.00016832846601316749, "loss": 0.9529, "step": 5830 }, { "epoch": 0.33478684950370075, "grad_norm": 0.251953125, "learning_rate": 0.00016825531432186543, "loss": 0.9697, "step": 5835 }, { "epoch": 0.3350737276952206, "grad_norm": 0.25390625, "learning_rate": 0.00016818209418398107, "loss": 0.9617, "step": 5840 }, { "epoch": 0.3353606058867405, "grad_norm": 0.28515625, "learning_rate": 0.0001681088056729398, "loss": 1.0015, "step": 5845 }, { "epoch": 0.3356474840782604, "grad_norm": 0.26171875, "learning_rate": 0.00016803544886223547, "loss": 0.9556, "step": 5850 }, { "epoch": 0.33593436226978024, "grad_norm": 0.26953125, "learning_rate": 0.00016796202382543047, "loss": 0.9678, "step": 5855 }, { "epoch": 0.33622124046130014, "grad_norm": 0.271484375, "learning_rate": 0.00016788853063615556, "loss": 0.9375, "step": 5860 }, { "epoch": 0.33650811865282, "grad_norm": 0.259765625, "learning_rate": 0.0001678149693681099, "loss": 0.9212, "step": 5865 }, { "epoch": 0.3367949968443399, "grad_norm": 0.267578125, "learning_rate": 0.0001677413400950609, "loss": 1.0206, "step": 5870 }, { "epoch": 0.3370818750358598, "grad_norm": 0.255859375, "learning_rate": 0.00016766764289084414, "loss": 0.9991, "step": 5875 }, { "epoch": 0.33736875322737964, "grad_norm": 0.26171875, "learning_rate": 0.00016759387782936335, "loss": 0.8914, "step": 5880 }, { "epoch": 0.33765563141889954, "grad_norm": 0.2890625, "learning_rate": 0.00016752004498459032, "loss": 0.9712, "step": 5885 }, { "epoch": 0.33794250961041944, "grad_norm": 0.251953125, "learning_rate": 0.00016744614443056475, "loss": 0.9701, "step": 5890 }, { "epoch": 0.3382293878019393, "grad_norm": 0.28125, "learning_rate": 0.00016737217624139433, "loss": 0.9325, "step": 5895 }, { "epoch": 0.3385162659934592, "grad_norm": 0.267578125, "learning_rate": 0.0001672981404912545, "loss": 0.8982, "step": 5900 }, { "epoch": 0.33880314418497903, "grad_norm": 0.267578125, "learning_rate": 0.00016722403725438845, "loss": 0.9592, "step": 5905 }, { "epoch": 0.33909002237649893, "grad_norm": 0.2578125, "learning_rate": 0.00016714986660510715, "loss": 0.8901, "step": 5910 }, { "epoch": 0.33937690056801884, "grad_norm": 0.259765625, "learning_rate": 0.000167075628617789, "loss": 1.0316, "step": 5915 }, { "epoch": 0.3396637787595387, "grad_norm": 0.259765625, "learning_rate": 0.00016700132336688005, "loss": 0.9365, "step": 5920 }, { "epoch": 0.3399506569510586, "grad_norm": 0.267578125, "learning_rate": 0.0001669269509268938, "loss": 1.0252, "step": 5925 }, { "epoch": 0.3402375351425785, "grad_norm": 0.271484375, "learning_rate": 0.00016685251137241113, "loss": 1.0248, "step": 5930 }, { "epoch": 0.34052441333409833, "grad_norm": 0.2890625, "learning_rate": 0.0001667780047780801, "loss": 0.954, "step": 5935 }, { "epoch": 0.34081129152561823, "grad_norm": 0.255859375, "learning_rate": 0.00016670343121861613, "loss": 0.9679, "step": 5940 }, { "epoch": 0.3410981697171381, "grad_norm": 0.27734375, "learning_rate": 0.00016662879076880178, "loss": 1.0479, "step": 5945 }, { "epoch": 0.341385047908658, "grad_norm": 0.302734375, "learning_rate": 0.00016655408350348664, "loss": 0.8797, "step": 5950 }, { "epoch": 0.3416719261001779, "grad_norm": 0.55859375, "learning_rate": 0.0001664793094975873, "loss": 0.9549, "step": 5955 }, { "epoch": 0.3419588042916977, "grad_norm": 0.25390625, "learning_rate": 0.00016640446882608737, "loss": 0.9245, "step": 5960 }, { "epoch": 0.34224568248321763, "grad_norm": 0.279296875, "learning_rate": 0.00016632956156403716, "loss": 1.0159, "step": 5965 }, { "epoch": 0.34253256067473753, "grad_norm": 0.275390625, "learning_rate": 0.00016625458778655387, "loss": 0.9792, "step": 5970 }, { "epoch": 0.3428194388662574, "grad_norm": 0.2890625, "learning_rate": 0.00016617954756882144, "loss": 1.0467, "step": 5975 }, { "epoch": 0.3431063170577773, "grad_norm": 0.26953125, "learning_rate": 0.00016610444098609026, "loss": 0.9083, "step": 5980 }, { "epoch": 0.3433931952492971, "grad_norm": 0.28125, "learning_rate": 0.00016602926811367744, "loss": 0.9455, "step": 5985 }, { "epoch": 0.343680073440817, "grad_norm": 0.275390625, "learning_rate": 0.00016595402902696646, "loss": 0.973, "step": 5990 }, { "epoch": 0.3439669516323369, "grad_norm": 0.26171875, "learning_rate": 0.0001658787238014073, "loss": 1.0536, "step": 5995 }, { "epoch": 0.34425382982385677, "grad_norm": 0.267578125, "learning_rate": 0.00016580335251251623, "loss": 0.984, "step": 6000 }, { "epoch": 0.3445407080153767, "grad_norm": 0.275390625, "learning_rate": 0.00016572791523587562, "loss": 0.9788, "step": 6005 }, { "epoch": 0.3448275862068966, "grad_norm": 0.271484375, "learning_rate": 0.00016565241204713428, "loss": 1.0371, "step": 6010 }, { "epoch": 0.3451144643984164, "grad_norm": 0.265625, "learning_rate": 0.0001655768430220069, "loss": 0.9485, "step": 6015 }, { "epoch": 0.3454013425899363, "grad_norm": 0.318359375, "learning_rate": 0.0001655012082362743, "loss": 0.8763, "step": 6020 }, { "epoch": 0.34568822078145617, "grad_norm": 0.287109375, "learning_rate": 0.00016542550776578322, "loss": 0.9322, "step": 6025 }, { "epoch": 0.34597509897297607, "grad_norm": 0.275390625, "learning_rate": 0.00016534974168644625, "loss": 0.9894, "step": 6030 }, { "epoch": 0.34626197716449597, "grad_norm": 0.287109375, "learning_rate": 0.0001652739100742417, "loss": 1.0145, "step": 6035 }, { "epoch": 0.3465488553560158, "grad_norm": 0.27734375, "learning_rate": 0.00016519801300521385, "loss": 1.0105, "step": 6040 }, { "epoch": 0.3468357335475357, "grad_norm": 0.294921875, "learning_rate": 0.0001651220505554723, "loss": 0.9631, "step": 6045 }, { "epoch": 0.3471226117390556, "grad_norm": 0.26171875, "learning_rate": 0.00016504602280119243, "loss": 0.9493, "step": 6050 }, { "epoch": 0.34740948993057547, "grad_norm": 0.298828125, "learning_rate": 0.0001649699298186151, "loss": 0.9545, "step": 6055 }, { "epoch": 0.34769636812209537, "grad_norm": 0.26953125, "learning_rate": 0.0001648937716840464, "loss": 0.9363, "step": 6060 }, { "epoch": 0.3479832463136152, "grad_norm": 0.2890625, "learning_rate": 0.00016481754847385793, "loss": 1.0452, "step": 6065 }, { "epoch": 0.3482701245051351, "grad_norm": 0.265625, "learning_rate": 0.00016474126026448652, "loss": 0.9987, "step": 6070 }, { "epoch": 0.348557002696655, "grad_norm": 0.263671875, "learning_rate": 0.00016466490713243416, "loss": 0.8785, "step": 6075 }, { "epoch": 0.34884388088817486, "grad_norm": 0.287109375, "learning_rate": 0.00016458848915426792, "loss": 0.9639, "step": 6080 }, { "epoch": 0.34913075907969476, "grad_norm": 0.271484375, "learning_rate": 0.00016451200640661993, "loss": 1.0097, "step": 6085 }, { "epoch": 0.34941763727121466, "grad_norm": 0.259765625, "learning_rate": 0.00016443545896618723, "loss": 0.9978, "step": 6090 }, { "epoch": 0.3497045154627345, "grad_norm": 0.271484375, "learning_rate": 0.0001643588469097318, "loss": 1.0066, "step": 6095 }, { "epoch": 0.3499913936542544, "grad_norm": 0.283203125, "learning_rate": 0.00016428217031408038, "loss": 0.9327, "step": 6100 }, { "epoch": 0.35027827184577426, "grad_norm": 0.267578125, "learning_rate": 0.0001642054292561244, "loss": 0.9271, "step": 6105 }, { "epoch": 0.35056515003729416, "grad_norm": 0.279296875, "learning_rate": 0.00016412862381282004, "loss": 1.0217, "step": 6110 }, { "epoch": 0.35085202822881406, "grad_norm": 0.26171875, "learning_rate": 0.00016405175406118786, "loss": 1.0257, "step": 6115 }, { "epoch": 0.3511389064203339, "grad_norm": 0.265625, "learning_rate": 0.00016397482007831312, "loss": 0.9198, "step": 6120 }, { "epoch": 0.3514257846118538, "grad_norm": 0.267578125, "learning_rate": 0.00016389782194134534, "loss": 0.9217, "step": 6125 }, { "epoch": 0.3517126628033737, "grad_norm": 0.259765625, "learning_rate": 0.00016382075972749843, "loss": 0.9233, "step": 6130 }, { "epoch": 0.35199954099489356, "grad_norm": 0.26171875, "learning_rate": 0.00016374363351405054, "loss": 0.9766, "step": 6135 }, { "epoch": 0.35228641918641346, "grad_norm": 0.263671875, "learning_rate": 0.00016366644337834405, "loss": 0.9753, "step": 6140 }, { "epoch": 0.3525732973779333, "grad_norm": 0.263671875, "learning_rate": 0.00016358918939778536, "loss": 0.9589, "step": 6145 }, { "epoch": 0.3528601755694532, "grad_norm": 0.255859375, "learning_rate": 0.00016351187164984494, "loss": 0.9289, "step": 6150 }, { "epoch": 0.3531470537609731, "grad_norm": 0.251953125, "learning_rate": 0.00016343449021205726, "loss": 0.9931, "step": 6155 }, { "epoch": 0.35343393195249295, "grad_norm": 0.29296875, "learning_rate": 0.00016335704516202051, "loss": 0.9516, "step": 6160 }, { "epoch": 0.35372081014401285, "grad_norm": 0.251953125, "learning_rate": 0.00016327953657739678, "loss": 0.971, "step": 6165 }, { "epoch": 0.35400768833553276, "grad_norm": 0.275390625, "learning_rate": 0.0001632019645359119, "loss": 1.0291, "step": 6170 }, { "epoch": 0.3542945665270526, "grad_norm": 0.298828125, "learning_rate": 0.00016312432911535528, "loss": 0.9575, "step": 6175 }, { "epoch": 0.3545814447185725, "grad_norm": 0.345703125, "learning_rate": 0.00016304663039357986, "loss": 1.0393, "step": 6180 }, { "epoch": 0.35486832291009235, "grad_norm": 0.275390625, "learning_rate": 0.0001629688684485021, "loss": 0.9207, "step": 6185 }, { "epoch": 0.35515520110161225, "grad_norm": 0.263671875, "learning_rate": 0.00016289104335810185, "loss": 0.9745, "step": 6190 }, { "epoch": 0.35544207929313215, "grad_norm": 0.265625, "learning_rate": 0.00016281315520042233, "loss": 0.9181, "step": 6195 }, { "epoch": 0.355728957484652, "grad_norm": 0.25, "learning_rate": 0.0001627352040535699, "loss": 0.8665, "step": 6200 }, { "epoch": 0.3560158356761719, "grad_norm": 0.271484375, "learning_rate": 0.00016265718999571415, "loss": 0.9876, "step": 6205 }, { "epoch": 0.3563027138676918, "grad_norm": 0.27734375, "learning_rate": 0.0001625791131050878, "loss": 0.9732, "step": 6210 }, { "epoch": 0.35658959205921165, "grad_norm": 0.283203125, "learning_rate": 0.0001625009734599865, "loss": 0.9622, "step": 6215 }, { "epoch": 0.35687647025073155, "grad_norm": 0.275390625, "learning_rate": 0.00016242277113876887, "loss": 0.9373, "step": 6220 }, { "epoch": 0.3571633484422514, "grad_norm": 0.271484375, "learning_rate": 0.00016234450621985635, "loss": 0.9816, "step": 6225 }, { "epoch": 0.3574502266337713, "grad_norm": 0.275390625, "learning_rate": 0.00016226617878173317, "loss": 1.0049, "step": 6230 }, { "epoch": 0.3577371048252912, "grad_norm": 0.259765625, "learning_rate": 0.00016218778890294636, "loss": 0.9598, "step": 6235 }, { "epoch": 0.35802398301681104, "grad_norm": 0.283203125, "learning_rate": 0.00016210933666210533, "loss": 0.9202, "step": 6240 }, { "epoch": 0.35831086120833094, "grad_norm": 0.26953125, "learning_rate": 0.0001620308221378822, "loss": 0.9408, "step": 6245 }, { "epoch": 0.35859773939985085, "grad_norm": 0.302734375, "learning_rate": 0.00016195224540901156, "loss": 0.9816, "step": 6250 }, { "epoch": 0.3588846175913707, "grad_norm": 0.25390625, "learning_rate": 0.00016187360655429034, "loss": 0.994, "step": 6255 }, { "epoch": 0.3591714957828906, "grad_norm": 0.265625, "learning_rate": 0.0001617949056525777, "loss": 0.9918, "step": 6260 }, { "epoch": 0.35945837397441044, "grad_norm": 0.267578125, "learning_rate": 0.0001617161427827951, "loss": 0.9715, "step": 6265 }, { "epoch": 0.35974525216593034, "grad_norm": 0.26953125, "learning_rate": 0.0001616373180239261, "loss": 0.9482, "step": 6270 }, { "epoch": 0.36003213035745024, "grad_norm": 0.310546875, "learning_rate": 0.0001615584314550164, "loss": 0.9451, "step": 6275 }, { "epoch": 0.3603190085489701, "grad_norm": 0.28125, "learning_rate": 0.00016147948315517357, "loss": 0.9452, "step": 6280 }, { "epoch": 0.36060588674049, "grad_norm": 0.25390625, "learning_rate": 0.00016140047320356723, "loss": 0.9466, "step": 6285 }, { "epoch": 0.3608927649320099, "grad_norm": 0.255859375, "learning_rate": 0.00016132140167942862, "loss": 0.9951, "step": 6290 }, { "epoch": 0.36117964312352974, "grad_norm": 0.27734375, "learning_rate": 0.0001612422686620509, "loss": 0.8864, "step": 6295 }, { "epoch": 0.36146652131504964, "grad_norm": 0.26171875, "learning_rate": 0.0001611630742307889, "loss": 0.9521, "step": 6300 }, { "epoch": 0.3617533995065695, "grad_norm": 0.2734375, "learning_rate": 0.00016108381846505885, "loss": 0.9414, "step": 6305 }, { "epoch": 0.3620402776980894, "grad_norm": 0.255859375, "learning_rate": 0.0001610045014443387, "loss": 0.9188, "step": 6310 }, { "epoch": 0.3623271558896093, "grad_norm": 0.259765625, "learning_rate": 0.00016092512324816772, "loss": 0.9346, "step": 6315 }, { "epoch": 0.36261403408112913, "grad_norm": 0.263671875, "learning_rate": 0.00016084568395614648, "loss": 0.9766, "step": 6320 }, { "epoch": 0.36290091227264903, "grad_norm": 0.287109375, "learning_rate": 0.00016076618364793696, "loss": 0.9934, "step": 6325 }, { "epoch": 0.36318779046416894, "grad_norm": 0.283203125, "learning_rate": 0.0001606866224032622, "loss": 0.9688, "step": 6330 }, { "epoch": 0.3634746686556888, "grad_norm": 0.255859375, "learning_rate": 0.0001606070003019064, "loss": 0.8889, "step": 6335 }, { "epoch": 0.3637615468472087, "grad_norm": 0.2890625, "learning_rate": 0.00016052731742371485, "loss": 0.9842, "step": 6340 }, { "epoch": 0.36404842503872853, "grad_norm": 0.287109375, "learning_rate": 0.00016044757384859365, "loss": 0.9755, "step": 6345 }, { "epoch": 0.36433530323024843, "grad_norm": 0.267578125, "learning_rate": 0.0001603677696565098, "loss": 0.8822, "step": 6350 }, { "epoch": 0.36462218142176833, "grad_norm": 0.271484375, "learning_rate": 0.00016028790492749118, "loss": 0.8748, "step": 6355 }, { "epoch": 0.3649090596132882, "grad_norm": 0.35546875, "learning_rate": 0.00016020797974162636, "loss": 0.9934, "step": 6360 }, { "epoch": 0.3651959378048081, "grad_norm": 0.28125, "learning_rate": 0.0001601279941790644, "loss": 0.9297, "step": 6365 }, { "epoch": 0.365482815996328, "grad_norm": 0.259765625, "learning_rate": 0.00016004794832001507, "loss": 0.9782, "step": 6370 }, { "epoch": 0.3657696941878478, "grad_norm": 0.28125, "learning_rate": 0.0001599678422447485, "loss": 0.9823, "step": 6375 }, { "epoch": 0.36605657237936773, "grad_norm": 0.265625, "learning_rate": 0.00015988767603359526, "loss": 1.0366, "step": 6380 }, { "epoch": 0.3663434505708876, "grad_norm": 0.271484375, "learning_rate": 0.00015980744976694622, "loss": 0.9987, "step": 6385 }, { "epoch": 0.3666303287624075, "grad_norm": 0.271484375, "learning_rate": 0.00015972716352525242, "loss": 0.9615, "step": 6390 }, { "epoch": 0.3669172069539274, "grad_norm": 0.283203125, "learning_rate": 0.0001596468173890251, "loss": 0.9545, "step": 6395 }, { "epoch": 0.3672040851454472, "grad_norm": 0.26171875, "learning_rate": 0.0001595664114388356, "loss": 0.922, "step": 6400 }, { "epoch": 0.3674909633369671, "grad_norm": 0.271484375, "learning_rate": 0.00015948594575531508, "loss": 0.9663, "step": 6405 }, { "epoch": 0.367777841528487, "grad_norm": 0.25, "learning_rate": 0.00015940542041915478, "loss": 0.9883, "step": 6410 }, { "epoch": 0.36806471972000687, "grad_norm": 0.263671875, "learning_rate": 0.00015932483551110572, "loss": 0.9278, "step": 6415 }, { "epoch": 0.3683515979115268, "grad_norm": 0.283203125, "learning_rate": 0.00015924419111197852, "loss": 0.9985, "step": 6420 }, { "epoch": 0.3686384761030466, "grad_norm": 0.26171875, "learning_rate": 0.00015916348730264367, "loss": 0.9238, "step": 6425 }, { "epoch": 0.3689253542945665, "grad_norm": 0.271484375, "learning_rate": 0.00015908272416403105, "loss": 0.9527, "step": 6430 }, { "epoch": 0.3692122324860864, "grad_norm": 0.267578125, "learning_rate": 0.00015900190177713016, "loss": 0.9884, "step": 6435 }, { "epoch": 0.36949911067760627, "grad_norm": 0.2578125, "learning_rate": 0.00015892102022298986, "loss": 0.9004, "step": 6440 }, { "epoch": 0.36978598886912617, "grad_norm": 0.279296875, "learning_rate": 0.0001588400795827184, "loss": 0.9567, "step": 6445 }, { "epoch": 0.37007286706064607, "grad_norm": 0.27734375, "learning_rate": 0.00015875907993748314, "loss": 0.9362, "step": 6450 }, { "epoch": 0.3703597452521659, "grad_norm": 0.267578125, "learning_rate": 0.0001586780213685108, "loss": 0.9851, "step": 6455 }, { "epoch": 0.3706466234436858, "grad_norm": 0.263671875, "learning_rate": 0.00015859690395708702, "loss": 0.9376, "step": 6460 }, { "epoch": 0.37093350163520566, "grad_norm": 0.26171875, "learning_rate": 0.00015851572778455657, "loss": 0.9481, "step": 6465 }, { "epoch": 0.37122037982672557, "grad_norm": 0.271484375, "learning_rate": 0.00015843449293232307, "loss": 0.9841, "step": 6470 }, { "epoch": 0.37150725801824547, "grad_norm": 0.26953125, "learning_rate": 0.00015835319948184903, "loss": 0.962, "step": 6475 }, { "epoch": 0.3717941362097653, "grad_norm": 0.2490234375, "learning_rate": 0.0001582718475146557, "loss": 0.9342, "step": 6480 }, { "epoch": 0.3720810144012852, "grad_norm": 0.265625, "learning_rate": 0.000158190437112323, "loss": 0.9377, "step": 6485 }, { "epoch": 0.3723678925928051, "grad_norm": 0.296875, "learning_rate": 0.00015810896835648952, "loss": 0.9895, "step": 6490 }, { "epoch": 0.37265477078432496, "grad_norm": 0.26953125, "learning_rate": 0.00015802744132885227, "loss": 1.0321, "step": 6495 }, { "epoch": 0.37294164897584486, "grad_norm": 0.267578125, "learning_rate": 0.0001579458561111667, "loss": 0.9234, "step": 6500 }, { "epoch": 0.3732285271673647, "grad_norm": 0.271484375, "learning_rate": 0.0001578642127852467, "loss": 0.9092, "step": 6505 }, { "epoch": 0.3735154053588846, "grad_norm": 0.255859375, "learning_rate": 0.00015778251143296437, "loss": 0.9752, "step": 6510 }, { "epoch": 0.3738022835504045, "grad_norm": 0.263671875, "learning_rate": 0.00015770075213625, "loss": 0.9296, "step": 6515 }, { "epoch": 0.37408916174192436, "grad_norm": 0.2578125, "learning_rate": 0.000157618934977092, "loss": 0.9304, "step": 6520 }, { "epoch": 0.37437603993344426, "grad_norm": 0.267578125, "learning_rate": 0.00015753706003753678, "loss": 1.0235, "step": 6525 }, { "epoch": 0.37466291812496416, "grad_norm": 0.28515625, "learning_rate": 0.00015745512739968878, "loss": 0.9754, "step": 6530 }, { "epoch": 0.374949796316484, "grad_norm": 0.2578125, "learning_rate": 0.00015737313714571017, "loss": 0.9551, "step": 6535 }, { "epoch": 0.3752366745080039, "grad_norm": 0.265625, "learning_rate": 0.00015729108935782094, "loss": 0.9059, "step": 6540 }, { "epoch": 0.37552355269952375, "grad_norm": 0.287109375, "learning_rate": 0.00015720898411829889, "loss": 0.9985, "step": 6545 }, { "epoch": 0.37581043089104366, "grad_norm": 0.2578125, "learning_rate": 0.00015712682150947923, "loss": 0.9303, "step": 6550 }, { "epoch": 0.37609730908256356, "grad_norm": 0.255859375, "learning_rate": 0.0001570446016137549, "loss": 0.9074, "step": 6555 }, { "epoch": 0.3763841872740834, "grad_norm": 0.27734375, "learning_rate": 0.00015696232451357616, "loss": 0.9487, "step": 6560 }, { "epoch": 0.3766710654656033, "grad_norm": 0.28515625, "learning_rate": 0.0001568799902914506, "loss": 1.0237, "step": 6565 }, { "epoch": 0.3769579436571232, "grad_norm": 0.2890625, "learning_rate": 0.00015679759902994332, "loss": 0.9194, "step": 6570 }, { "epoch": 0.37724482184864305, "grad_norm": 0.26953125, "learning_rate": 0.0001567151508116763, "loss": 0.9603, "step": 6575 }, { "epoch": 0.37753170004016295, "grad_norm": 0.27734375, "learning_rate": 0.00015663264571932892, "loss": 0.9517, "step": 6580 }, { "epoch": 0.3778185782316828, "grad_norm": 0.265625, "learning_rate": 0.0001565500838356374, "loss": 0.9255, "step": 6585 }, { "epoch": 0.3781054564232027, "grad_norm": 0.28515625, "learning_rate": 0.00015646746524339497, "loss": 1.0131, "step": 6590 }, { "epoch": 0.3783923346147226, "grad_norm": 0.259765625, "learning_rate": 0.00015638479002545182, "loss": 0.946, "step": 6595 }, { "epoch": 0.37867921280624245, "grad_norm": 0.271484375, "learning_rate": 0.00015630205826471478, "loss": 1.008, "step": 6600 }, { "epoch": 0.37896609099776235, "grad_norm": 0.2578125, "learning_rate": 0.00015621927004414747, "loss": 1.0618, "step": 6605 }, { "epoch": 0.37925296918928225, "grad_norm": 0.279296875, "learning_rate": 0.0001561364254467701, "loss": 0.9527, "step": 6610 }, { "epoch": 0.3795398473808021, "grad_norm": 0.255859375, "learning_rate": 0.00015605352455565937, "loss": 0.9921, "step": 6615 }, { "epoch": 0.379826725572322, "grad_norm": 0.267578125, "learning_rate": 0.0001559705674539486, "loss": 1.021, "step": 6620 }, { "epoch": 0.38011360376384185, "grad_norm": 0.265625, "learning_rate": 0.0001558875542248272, "loss": 0.9598, "step": 6625 }, { "epoch": 0.38040048195536175, "grad_norm": 0.265625, "learning_rate": 0.0001558044849515411, "loss": 0.9692, "step": 6630 }, { "epoch": 0.38068736014688165, "grad_norm": 0.251953125, "learning_rate": 0.00015572135971739242, "loss": 0.9218, "step": 6635 }, { "epoch": 0.3809742383384015, "grad_norm": 0.271484375, "learning_rate": 0.0001556381786057392, "loss": 0.926, "step": 6640 }, { "epoch": 0.3812611165299214, "grad_norm": 0.27734375, "learning_rate": 0.00015555494169999578, "loss": 0.935, "step": 6645 }, { "epoch": 0.3815479947214413, "grad_norm": 0.267578125, "learning_rate": 0.00015547164908363224, "loss": 0.9532, "step": 6650 }, { "epoch": 0.38183487291296114, "grad_norm": 0.2578125, "learning_rate": 0.00015538830084017456, "loss": 0.968, "step": 6655 }, { "epoch": 0.38212175110448104, "grad_norm": 0.28125, "learning_rate": 0.00015530489705320463, "loss": 0.9956, "step": 6660 }, { "epoch": 0.3824086292960009, "grad_norm": 0.271484375, "learning_rate": 0.0001552214378063599, "loss": 1.0108, "step": 6665 }, { "epoch": 0.3826955074875208, "grad_norm": 0.2734375, "learning_rate": 0.0001551379231833335, "loss": 0.9581, "step": 6670 }, { "epoch": 0.3829823856790407, "grad_norm": 0.283203125, "learning_rate": 0.00015505435326787414, "loss": 1.0014, "step": 6675 }, { "epoch": 0.38326926387056054, "grad_norm": 0.28515625, "learning_rate": 0.00015497072814378584, "loss": 0.9081, "step": 6680 }, { "epoch": 0.38355614206208044, "grad_norm": 0.28125, "learning_rate": 0.0001548870478949281, "loss": 0.9399, "step": 6685 }, { "epoch": 0.38384302025360034, "grad_norm": 0.27734375, "learning_rate": 0.00015480331260521565, "loss": 0.9639, "step": 6690 }, { "epoch": 0.3841298984451202, "grad_norm": 0.271484375, "learning_rate": 0.00015471952235861843, "loss": 0.9984, "step": 6695 }, { "epoch": 0.3844167766366401, "grad_norm": 0.28515625, "learning_rate": 0.0001546356772391615, "loss": 0.9647, "step": 6700 }, { "epoch": 0.38470365482816, "grad_norm": 0.267578125, "learning_rate": 0.0001545517773309249, "loss": 0.9908, "step": 6705 }, { "epoch": 0.38499053301967984, "grad_norm": 0.255859375, "learning_rate": 0.00015446782271804366, "loss": 0.9489, "step": 6710 }, { "epoch": 0.38527741121119974, "grad_norm": 0.26171875, "learning_rate": 0.00015438381348470767, "loss": 0.9667, "step": 6715 }, { "epoch": 0.3855642894027196, "grad_norm": 0.28125, "learning_rate": 0.00015429974971516156, "loss": 0.9159, "step": 6720 }, { "epoch": 0.3858511675942395, "grad_norm": 0.275390625, "learning_rate": 0.0001542156314937047, "loss": 0.8806, "step": 6725 }, { "epoch": 0.3861380457857594, "grad_norm": 0.271484375, "learning_rate": 0.000154131458904691, "loss": 0.9385, "step": 6730 }, { "epoch": 0.38642492397727923, "grad_norm": 0.26171875, "learning_rate": 0.00015404723203252894, "loss": 0.9425, "step": 6735 }, { "epoch": 0.38671180216879913, "grad_norm": 0.2734375, "learning_rate": 0.0001539629509616814, "loss": 1.0188, "step": 6740 }, { "epoch": 0.38699868036031904, "grad_norm": 0.251953125, "learning_rate": 0.00015387861577666559, "loss": 0.9811, "step": 6745 }, { "epoch": 0.3872855585518389, "grad_norm": 0.27734375, "learning_rate": 0.00015379422656205307, "loss": 0.956, "step": 6750 }, { "epoch": 0.3875724367433588, "grad_norm": 0.275390625, "learning_rate": 0.00015370978340246955, "loss": 0.9814, "step": 6755 }, { "epoch": 0.38785931493487863, "grad_norm": 0.271484375, "learning_rate": 0.00015362528638259478, "loss": 0.9368, "step": 6760 }, { "epoch": 0.38814619312639853, "grad_norm": 0.26171875, "learning_rate": 0.0001535407355871626, "loss": 0.8959, "step": 6765 }, { "epoch": 0.38843307131791843, "grad_norm": 0.2578125, "learning_rate": 0.00015345613110096068, "loss": 0.8967, "step": 6770 }, { "epoch": 0.3887199495094383, "grad_norm": 0.27734375, "learning_rate": 0.00015337147300883066, "loss": 1.0397, "step": 6775 }, { "epoch": 0.3890068277009582, "grad_norm": 0.267578125, "learning_rate": 0.0001532867613956678, "loss": 0.9463, "step": 6780 }, { "epoch": 0.3892937058924781, "grad_norm": 0.267578125, "learning_rate": 0.0001532019963464211, "loss": 0.9226, "step": 6785 }, { "epoch": 0.3895805840839979, "grad_norm": 0.29296875, "learning_rate": 0.00015311717794609325, "loss": 0.9226, "step": 6790 }, { "epoch": 0.38986746227551783, "grad_norm": 0.263671875, "learning_rate": 0.0001530323062797402, "loss": 0.9181, "step": 6795 }, { "epoch": 0.3901543404670377, "grad_norm": 0.30859375, "learning_rate": 0.00015294738143247148, "loss": 0.9984, "step": 6800 }, { "epoch": 0.3904412186585576, "grad_norm": 0.26171875, "learning_rate": 0.00015286240348944997, "loss": 0.9387, "step": 6805 }, { "epoch": 0.3907280968500775, "grad_norm": 0.291015625, "learning_rate": 0.00015277737253589164, "loss": 0.956, "step": 6810 }, { "epoch": 0.3910149750415973, "grad_norm": 0.291015625, "learning_rate": 0.00015269228865706584, "loss": 1.0067, "step": 6815 }, { "epoch": 0.3913018532331172, "grad_norm": 0.3125, "learning_rate": 0.0001526071519382948, "loss": 0.9523, "step": 6820 }, { "epoch": 0.3915887314246371, "grad_norm": 0.27734375, "learning_rate": 0.00015252196246495382, "loss": 0.9101, "step": 6825 }, { "epoch": 0.391875609616157, "grad_norm": 0.3203125, "learning_rate": 0.00015243672032247112, "loss": 1.0325, "step": 6830 }, { "epoch": 0.3921624878076769, "grad_norm": 0.2578125, "learning_rate": 0.00015235142559632766, "loss": 0.9415, "step": 6835 }, { "epoch": 0.3924493659991967, "grad_norm": 0.26953125, "learning_rate": 0.00015226607837205727, "loss": 0.9169, "step": 6840 }, { "epoch": 0.3927362441907166, "grad_norm": 0.2470703125, "learning_rate": 0.00015218067873524625, "loss": 0.9431, "step": 6845 }, { "epoch": 0.3930231223822365, "grad_norm": 0.267578125, "learning_rate": 0.00015209522677153364, "loss": 0.9736, "step": 6850 }, { "epoch": 0.39331000057375637, "grad_norm": 0.2578125, "learning_rate": 0.00015200972256661075, "loss": 1.0248, "step": 6855 }, { "epoch": 0.39359687876527627, "grad_norm": 0.27734375, "learning_rate": 0.00015192416620622145, "loss": 1.0525, "step": 6860 }, { "epoch": 0.39388375695679617, "grad_norm": 0.271484375, "learning_rate": 0.00015183855777616188, "loss": 1.0149, "step": 6865 }, { "epoch": 0.394170635148316, "grad_norm": 0.3359375, "learning_rate": 0.0001517528973622803, "loss": 0.9726, "step": 6870 }, { "epoch": 0.3944575133398359, "grad_norm": 0.26171875, "learning_rate": 0.00015166718505047722, "loss": 0.9891, "step": 6875 }, { "epoch": 0.39474439153135576, "grad_norm": 0.2890625, "learning_rate": 0.0001515814209267051, "loss": 1.0325, "step": 6880 }, { "epoch": 0.39503126972287567, "grad_norm": 0.283203125, "learning_rate": 0.00015149560507696837, "loss": 0.9378, "step": 6885 }, { "epoch": 0.39531814791439557, "grad_norm": 0.2734375, "learning_rate": 0.00015140973758732347, "loss": 0.972, "step": 6890 }, { "epoch": 0.3956050261059154, "grad_norm": 0.2578125, "learning_rate": 0.0001513238185438784, "loss": 0.8973, "step": 6895 }, { "epoch": 0.3958919042974353, "grad_norm": 0.263671875, "learning_rate": 0.00015123784803279302, "loss": 0.9166, "step": 6900 }, { "epoch": 0.3961787824889552, "grad_norm": 0.26953125, "learning_rate": 0.00015115182614027872, "loss": 1.0208, "step": 6905 }, { "epoch": 0.39646566068047506, "grad_norm": 0.263671875, "learning_rate": 0.00015106575295259847, "loss": 0.891, "step": 6910 }, { "epoch": 0.39675253887199496, "grad_norm": 0.287109375, "learning_rate": 0.00015097962855606663, "loss": 0.9247, "step": 6915 }, { "epoch": 0.3970394170635148, "grad_norm": 0.2490234375, "learning_rate": 0.00015089345303704902, "loss": 0.9241, "step": 6920 }, { "epoch": 0.3973262952550347, "grad_norm": 0.2734375, "learning_rate": 0.00015080722648196253, "loss": 1.0022, "step": 6925 }, { "epoch": 0.3976131734465546, "grad_norm": 0.263671875, "learning_rate": 0.0001507209489772754, "loss": 0.8877, "step": 6930 }, { "epoch": 0.39790005163807446, "grad_norm": 0.29296875, "learning_rate": 0.0001506346206095069, "loss": 1.0253, "step": 6935 }, { "epoch": 0.39818692982959436, "grad_norm": 0.267578125, "learning_rate": 0.0001505482414652273, "loss": 1.005, "step": 6940 }, { "epoch": 0.39847380802111426, "grad_norm": 0.29296875, "learning_rate": 0.00015046181163105786, "loss": 0.9344, "step": 6945 }, { "epoch": 0.3987606862126341, "grad_norm": 0.25390625, "learning_rate": 0.00015037533119367053, "loss": 1.0191, "step": 6950 }, { "epoch": 0.399047564404154, "grad_norm": 0.263671875, "learning_rate": 0.0001502888002397881, "loss": 0.8942, "step": 6955 }, { "epoch": 0.39933444259567386, "grad_norm": 0.28125, "learning_rate": 0.00015020221885618407, "loss": 1.0012, "step": 6960 }, { "epoch": 0.39962132078719376, "grad_norm": 0.26953125, "learning_rate": 0.00015011558712968234, "loss": 0.974, "step": 6965 }, { "epoch": 0.39990819897871366, "grad_norm": 0.3046875, "learning_rate": 0.0001500289051471575, "loss": 0.9002, "step": 6970 }, { "epoch": 0.4001950771702335, "grad_norm": 0.26953125, "learning_rate": 0.0001499421729955344, "loss": 0.9575, "step": 6975 }, { "epoch": 0.4004819553617534, "grad_norm": 0.2578125, "learning_rate": 0.0001498553907617882, "loss": 1.0227, "step": 6980 }, { "epoch": 0.4007688335532733, "grad_norm": 0.263671875, "learning_rate": 0.00014976855853294436, "loss": 0.9818, "step": 6985 }, { "epoch": 0.40105571174479315, "grad_norm": 0.25390625, "learning_rate": 0.00014968167639607845, "loss": 1.0078, "step": 6990 }, { "epoch": 0.40134258993631305, "grad_norm": 0.275390625, "learning_rate": 0.00014959474443831597, "loss": 0.9494, "step": 6995 }, { "epoch": 0.4016294681278329, "grad_norm": 0.287109375, "learning_rate": 0.00014950776274683266, "loss": 0.9803, "step": 7000 }, { "epoch": 0.4019163463193528, "grad_norm": 0.287109375, "learning_rate": 0.00014942073140885377, "loss": 0.9356, "step": 7005 }, { "epoch": 0.4022032245108727, "grad_norm": 0.294921875, "learning_rate": 0.0001493336505116546, "loss": 0.9966, "step": 7010 }, { "epoch": 0.40249010270239255, "grad_norm": 0.255859375, "learning_rate": 0.00014924652014256014, "loss": 0.9121, "step": 7015 }, { "epoch": 0.40277698089391245, "grad_norm": 0.26171875, "learning_rate": 0.0001491593403889448, "loss": 1.0084, "step": 7020 }, { "epoch": 0.40306385908543235, "grad_norm": 0.294921875, "learning_rate": 0.00014907211133823273, "loss": 0.9533, "step": 7025 }, { "epoch": 0.4033507372769522, "grad_norm": 0.25390625, "learning_rate": 0.0001489848330778973, "loss": 0.8849, "step": 7030 }, { "epoch": 0.4036376154684721, "grad_norm": 0.2890625, "learning_rate": 0.0001488975056954615, "loss": 0.9374, "step": 7035 }, { "epoch": 0.40392449365999195, "grad_norm": 0.265625, "learning_rate": 0.00014881012927849728, "loss": 0.9389, "step": 7040 }, { "epoch": 0.40421137185151185, "grad_norm": 0.267578125, "learning_rate": 0.000148722703914626, "loss": 0.9596, "step": 7045 }, { "epoch": 0.40449825004303175, "grad_norm": 0.263671875, "learning_rate": 0.00014863522969151796, "loss": 0.9214, "step": 7050 }, { "epoch": 0.4047851282345516, "grad_norm": 0.287109375, "learning_rate": 0.00014854770669689253, "loss": 0.9462, "step": 7055 }, { "epoch": 0.4050720064260715, "grad_norm": 0.275390625, "learning_rate": 0.00014846013501851796, "loss": 1.0052, "step": 7060 }, { "epoch": 0.4053588846175914, "grad_norm": 0.263671875, "learning_rate": 0.00014837251474421133, "loss": 0.9172, "step": 7065 }, { "epoch": 0.40564576280911124, "grad_norm": 0.263671875, "learning_rate": 0.00014828484596183844, "loss": 0.9318, "step": 7070 }, { "epoch": 0.40593264100063114, "grad_norm": 0.263671875, "learning_rate": 0.0001481971287593138, "loss": 0.9865, "step": 7075 }, { "epoch": 0.406219519192151, "grad_norm": 0.28125, "learning_rate": 0.0001481093632246003, "loss": 0.9302, "step": 7080 }, { "epoch": 0.4065063973836709, "grad_norm": 0.26171875, "learning_rate": 0.00014802154944570952, "loss": 0.9485, "step": 7085 }, { "epoch": 0.4067932755751908, "grad_norm": 0.271484375, "learning_rate": 0.00014793368751070125, "loss": 0.9007, "step": 7090 }, { "epoch": 0.40708015376671064, "grad_norm": 0.283203125, "learning_rate": 0.00014784577750768363, "loss": 0.9349, "step": 7095 }, { "epoch": 0.40736703195823054, "grad_norm": 0.25, "learning_rate": 0.0001477578195248131, "loss": 0.9483, "step": 7100 }, { "epoch": 0.40765391014975044, "grad_norm": 0.27734375, "learning_rate": 0.000147669813650294, "loss": 1.0298, "step": 7105 }, { "epoch": 0.4079407883412703, "grad_norm": 0.267578125, "learning_rate": 0.0001475817599723789, "loss": 1.0117, "step": 7110 }, { "epoch": 0.4082276665327902, "grad_norm": 0.267578125, "learning_rate": 0.00014749365857936824, "loss": 0.9019, "step": 7115 }, { "epoch": 0.40851454472431004, "grad_norm": 0.26953125, "learning_rate": 0.00014740550955961022, "loss": 0.9884, "step": 7120 }, { "epoch": 0.40880142291582994, "grad_norm": 0.279296875, "learning_rate": 0.0001473173130015009, "loss": 0.9628, "step": 7125 }, { "epoch": 0.40908830110734984, "grad_norm": 0.271484375, "learning_rate": 0.00014722906899348402, "loss": 0.913, "step": 7130 }, { "epoch": 0.4093751792988697, "grad_norm": 0.259765625, "learning_rate": 0.00014714077762405085, "loss": 0.9841, "step": 7135 }, { "epoch": 0.4096620574903896, "grad_norm": 0.28515625, "learning_rate": 0.00014705243898174017, "loss": 1.0012, "step": 7140 }, { "epoch": 0.4099489356819095, "grad_norm": 0.2470703125, "learning_rate": 0.00014696405315513814, "loss": 0.9609, "step": 7145 }, { "epoch": 0.41023581387342933, "grad_norm": 0.27734375, "learning_rate": 0.00014687562023287833, "loss": 0.9618, "step": 7150 }, { "epoch": 0.41052269206494924, "grad_norm": 0.29296875, "learning_rate": 0.00014678714030364143, "loss": 0.9723, "step": 7155 }, { "epoch": 0.4108095702564691, "grad_norm": 0.248046875, "learning_rate": 0.00014669861345615532, "loss": 0.9298, "step": 7160 }, { "epoch": 0.411096448447989, "grad_norm": 0.271484375, "learning_rate": 0.00014661003977919492, "loss": 0.9379, "step": 7165 }, { "epoch": 0.4113833266395089, "grad_norm": 0.275390625, "learning_rate": 0.0001465214193615821, "loss": 0.9979, "step": 7170 }, { "epoch": 0.41167020483102873, "grad_norm": 0.26953125, "learning_rate": 0.00014643275229218563, "loss": 0.9358, "step": 7175 }, { "epoch": 0.41195708302254863, "grad_norm": 0.263671875, "learning_rate": 0.00014634403865992107, "loss": 0.9752, "step": 7180 }, { "epoch": 0.41224396121406853, "grad_norm": 0.283203125, "learning_rate": 0.0001462552785537506, "loss": 0.9811, "step": 7185 }, { "epoch": 0.4125308394055884, "grad_norm": 0.25390625, "learning_rate": 0.00014616647206268306, "loss": 0.9357, "step": 7190 }, { "epoch": 0.4128177175971083, "grad_norm": 0.3046875, "learning_rate": 0.0001460776192757738, "loss": 0.9227, "step": 7195 }, { "epoch": 0.4131045957886281, "grad_norm": 0.26953125, "learning_rate": 0.00014598872028212463, "loss": 0.9438, "step": 7200 }, { "epoch": 0.413391473980148, "grad_norm": 0.2890625, "learning_rate": 0.00014589977517088365, "loss": 0.976, "step": 7205 }, { "epoch": 0.41367835217166793, "grad_norm": 0.26171875, "learning_rate": 0.0001458107840312452, "loss": 0.9441, "step": 7210 }, { "epoch": 0.4139652303631878, "grad_norm": 0.287109375, "learning_rate": 0.00014572174695244976, "loss": 0.9262, "step": 7215 }, { "epoch": 0.4142521085547077, "grad_norm": 0.2490234375, "learning_rate": 0.000145632664023784, "loss": 1.0197, "step": 7220 }, { "epoch": 0.4145389867462276, "grad_norm": 0.27734375, "learning_rate": 0.00014554353533458042, "loss": 0.9544, "step": 7225 }, { "epoch": 0.4148258649377474, "grad_norm": 0.259765625, "learning_rate": 0.00014545436097421744, "loss": 0.9347, "step": 7230 }, { "epoch": 0.4151127431292673, "grad_norm": 0.2734375, "learning_rate": 0.0001453651410321194, "loss": 0.9956, "step": 7235 }, { "epoch": 0.41539962132078717, "grad_norm": 0.26171875, "learning_rate": 0.00014527587559775616, "loss": 0.9399, "step": 7240 }, { "epoch": 0.4156864995123071, "grad_norm": 0.2578125, "learning_rate": 0.0001451865647606434, "loss": 0.9771, "step": 7245 }, { "epoch": 0.415973377703827, "grad_norm": 0.275390625, "learning_rate": 0.00014509720861034212, "loss": 0.9387, "step": 7250 }, { "epoch": 0.4162602558953468, "grad_norm": 0.251953125, "learning_rate": 0.00014500780723645897, "loss": 0.9079, "step": 7255 }, { "epoch": 0.4165471340868667, "grad_norm": 0.291015625, "learning_rate": 0.00014491836072864578, "loss": 1.007, "step": 7260 }, { "epoch": 0.4168340122783866, "grad_norm": 0.251953125, "learning_rate": 0.0001448288691765997, "loss": 0.9495, "step": 7265 }, { "epoch": 0.41712089046990647, "grad_norm": 0.28125, "learning_rate": 0.0001447393326700631, "loss": 0.8996, "step": 7270 }, { "epoch": 0.41740776866142637, "grad_norm": 0.263671875, "learning_rate": 0.0001446497512988234, "loss": 0.9924, "step": 7275 }, { "epoch": 0.4176946468529462, "grad_norm": 0.423828125, "learning_rate": 0.00014456012515271294, "loss": 0.9394, "step": 7280 }, { "epoch": 0.4179815250444661, "grad_norm": 0.2578125, "learning_rate": 0.0001444704543216091, "loss": 0.8852, "step": 7285 }, { "epoch": 0.418268403235986, "grad_norm": 0.291015625, "learning_rate": 0.0001443807388954339, "loss": 0.997, "step": 7290 }, { "epoch": 0.41855528142750587, "grad_norm": 0.26171875, "learning_rate": 0.00014429097896415425, "loss": 0.9559, "step": 7295 }, { "epoch": 0.41884215961902577, "grad_norm": 0.25390625, "learning_rate": 0.00014420117461778155, "loss": 0.9138, "step": 7300 }, { "epoch": 0.41912903781054567, "grad_norm": 0.255859375, "learning_rate": 0.00014411132594637185, "loss": 0.9079, "step": 7305 }, { "epoch": 0.4194159160020655, "grad_norm": 0.26171875, "learning_rate": 0.0001440214330400256, "loss": 0.9497, "step": 7310 }, { "epoch": 0.4197027941935854, "grad_norm": 0.2578125, "learning_rate": 0.00014393149598888752, "loss": 0.966, "step": 7315 }, { "epoch": 0.41998967238510526, "grad_norm": 0.263671875, "learning_rate": 0.0001438415148831468, "loss": 0.9842, "step": 7320 }, { "epoch": 0.42027655057662516, "grad_norm": 0.28125, "learning_rate": 0.00014375148981303663, "loss": 0.9818, "step": 7325 }, { "epoch": 0.42056342876814506, "grad_norm": 0.28515625, "learning_rate": 0.00014366142086883436, "loss": 0.9759, "step": 7330 }, { "epoch": 0.4208503069596649, "grad_norm": 0.26953125, "learning_rate": 0.00014357130814086135, "loss": 0.9687, "step": 7335 }, { "epoch": 0.4211371851511848, "grad_norm": 0.259765625, "learning_rate": 0.00014348115171948283, "loss": 0.9834, "step": 7340 }, { "epoch": 0.4214240633427047, "grad_norm": 0.263671875, "learning_rate": 0.00014339095169510786, "loss": 0.9655, "step": 7345 }, { "epoch": 0.42171094153422456, "grad_norm": 0.2890625, "learning_rate": 0.00014330070815818922, "loss": 0.9959, "step": 7350 }, { "epoch": 0.42199781972574446, "grad_norm": 0.267578125, "learning_rate": 0.00014321042119922337, "loss": 0.9207, "step": 7355 }, { "epoch": 0.4222846979172643, "grad_norm": 0.267578125, "learning_rate": 0.00014312009090875025, "loss": 0.9809, "step": 7360 }, { "epoch": 0.4225715761087842, "grad_norm": 0.275390625, "learning_rate": 0.00014302971737735324, "loss": 0.9781, "step": 7365 }, { "epoch": 0.4228584543003041, "grad_norm": 0.279296875, "learning_rate": 0.0001429393006956592, "loss": 0.9526, "step": 7370 }, { "epoch": 0.42314533249182396, "grad_norm": 0.267578125, "learning_rate": 0.0001428488409543381, "loss": 0.9646, "step": 7375 }, { "epoch": 0.42343221068334386, "grad_norm": 0.279296875, "learning_rate": 0.0001427583382441032, "loss": 0.9268, "step": 7380 }, { "epoch": 0.42371908887486376, "grad_norm": 0.28125, "learning_rate": 0.00014266779265571087, "loss": 0.9645, "step": 7385 }, { "epoch": 0.4240059670663836, "grad_norm": 0.28125, "learning_rate": 0.00014257720427996037, "loss": 0.9863, "step": 7390 }, { "epoch": 0.4242928452579035, "grad_norm": 0.28125, "learning_rate": 0.00014248657320769392, "loss": 1.0031, "step": 7395 }, { "epoch": 0.42457972344942335, "grad_norm": 0.263671875, "learning_rate": 0.00014239589952979662, "loss": 1.058, "step": 7400 }, { "epoch": 0.42486660164094325, "grad_norm": 0.2734375, "learning_rate": 0.00014230518333719616, "loss": 0.9156, "step": 7405 }, { "epoch": 0.42515347983246315, "grad_norm": 0.296875, "learning_rate": 0.00014221442472086304, "loss": 0.9806, "step": 7410 }, { "epoch": 0.425440358023983, "grad_norm": 0.287109375, "learning_rate": 0.0001421236237718101, "loss": 0.9215, "step": 7415 }, { "epoch": 0.4257272362155029, "grad_norm": 0.251953125, "learning_rate": 0.00014203278058109282, "loss": 1.0512, "step": 7420 }, { "epoch": 0.4260141144070228, "grad_norm": 0.298828125, "learning_rate": 0.0001419418952398089, "loss": 1.0363, "step": 7425 }, { "epoch": 0.42630099259854265, "grad_norm": 0.275390625, "learning_rate": 0.00014185096783909837, "loss": 0.9783, "step": 7430 }, { "epoch": 0.42658787079006255, "grad_norm": 0.263671875, "learning_rate": 0.00014175999847014346, "loss": 0.9456, "step": 7435 }, { "epoch": 0.4268747489815824, "grad_norm": 0.271484375, "learning_rate": 0.00014166898722416845, "loss": 0.9197, "step": 7440 }, { "epoch": 0.4271616271731023, "grad_norm": 0.263671875, "learning_rate": 0.00014157793419243962, "loss": 0.9843, "step": 7445 }, { "epoch": 0.4274485053646222, "grad_norm": 0.2578125, "learning_rate": 0.00014148683946626516, "loss": 0.8788, "step": 7450 }, { "epoch": 0.42773538355614205, "grad_norm": 0.275390625, "learning_rate": 0.00014139570313699502, "loss": 0.8889, "step": 7455 }, { "epoch": 0.42802226174766195, "grad_norm": 0.279296875, "learning_rate": 0.00014130452529602096, "loss": 0.9524, "step": 7460 }, { "epoch": 0.42830913993918185, "grad_norm": 0.265625, "learning_rate": 0.00014121330603477633, "loss": 0.9771, "step": 7465 }, { "epoch": 0.4285960181307017, "grad_norm": 0.294921875, "learning_rate": 0.00014112204544473598, "loss": 0.9544, "step": 7470 }, { "epoch": 0.4288828963222216, "grad_norm": 0.28125, "learning_rate": 0.00014103074361741623, "loss": 0.9991, "step": 7475 }, { "epoch": 0.42916977451374144, "grad_norm": 0.267578125, "learning_rate": 0.00014093940064437477, "loss": 0.9808, "step": 7480 }, { "epoch": 0.42945665270526134, "grad_norm": 0.2734375, "learning_rate": 0.0001408480166172106, "loss": 0.98, "step": 7485 }, { "epoch": 0.42974353089678125, "grad_norm": 0.279296875, "learning_rate": 0.00014075659162756372, "loss": 0.9992, "step": 7490 }, { "epoch": 0.4300304090883011, "grad_norm": 0.26171875, "learning_rate": 0.00014066512576711536, "loss": 0.9738, "step": 7495 }, { "epoch": 0.430317287279821, "grad_norm": 0.3125, "learning_rate": 0.0001405736191275877, "loss": 0.9805, "step": 7500 }, { "epoch": 0.4306041654713409, "grad_norm": 0.259765625, "learning_rate": 0.00014048207180074383, "loss": 0.9712, "step": 7505 }, { "epoch": 0.43089104366286074, "grad_norm": 0.27734375, "learning_rate": 0.00014039048387838756, "loss": 0.9586, "step": 7510 }, { "epoch": 0.43117792185438064, "grad_norm": 0.267578125, "learning_rate": 0.00014029885545236348, "loss": 0.9223, "step": 7515 }, { "epoch": 0.4314648000459005, "grad_norm": 0.2890625, "learning_rate": 0.00014020718661455678, "loss": 0.9958, "step": 7520 }, { "epoch": 0.4317516782374204, "grad_norm": 0.259765625, "learning_rate": 0.0001401154774568932, "loss": 0.8981, "step": 7525 }, { "epoch": 0.4320385564289403, "grad_norm": 0.259765625, "learning_rate": 0.00014002372807133887, "loss": 0.9039, "step": 7530 }, { "epoch": 0.43232543462046014, "grad_norm": 0.275390625, "learning_rate": 0.00013993193854990027, "loss": 0.9583, "step": 7535 }, { "epoch": 0.43261231281198004, "grad_norm": 0.306640625, "learning_rate": 0.00013984010898462416, "loss": 0.951, "step": 7540 }, { "epoch": 0.43289919100349994, "grad_norm": 0.259765625, "learning_rate": 0.00013974823946759742, "loss": 0.9705, "step": 7545 }, { "epoch": 0.4331860691950198, "grad_norm": 0.25390625, "learning_rate": 0.000139656330090947, "loss": 0.9752, "step": 7550 }, { "epoch": 0.4334729473865397, "grad_norm": 0.26953125, "learning_rate": 0.00013956438094683986, "loss": 1.0097, "step": 7555 }, { "epoch": 0.43375982557805953, "grad_norm": 0.279296875, "learning_rate": 0.00013947239212748277, "loss": 1.046, "step": 7560 }, { "epoch": 0.43404670376957943, "grad_norm": 0.271484375, "learning_rate": 0.00013938036372512235, "loss": 0.9044, "step": 7565 }, { "epoch": 0.43433358196109934, "grad_norm": 0.265625, "learning_rate": 0.0001392882958320449, "loss": 0.9208, "step": 7570 }, { "epoch": 0.4346204601526192, "grad_norm": 0.2578125, "learning_rate": 0.00013919618854057626, "loss": 0.961, "step": 7575 }, { "epoch": 0.4349073383441391, "grad_norm": 0.287109375, "learning_rate": 0.00013910404194308188, "loss": 0.9867, "step": 7580 }, { "epoch": 0.435194216535659, "grad_norm": 0.255859375, "learning_rate": 0.00013901185613196654, "loss": 0.946, "step": 7585 }, { "epoch": 0.43548109472717883, "grad_norm": 0.236328125, "learning_rate": 0.00013891963119967439, "loss": 0.9274, "step": 7590 }, { "epoch": 0.43576797291869873, "grad_norm": 0.265625, "learning_rate": 0.00013882736723868884, "loss": 0.9327, "step": 7595 }, { "epoch": 0.4360548511102186, "grad_norm": 0.263671875, "learning_rate": 0.00013873506434153228, "loss": 0.9173, "step": 7600 }, { "epoch": 0.4363417293017385, "grad_norm": 0.28515625, "learning_rate": 0.0001386427226007664, "loss": 0.9253, "step": 7605 }, { "epoch": 0.4366286074932584, "grad_norm": 0.265625, "learning_rate": 0.00013855034210899161, "loss": 0.9487, "step": 7610 }, { "epoch": 0.4369154856847782, "grad_norm": 0.26953125, "learning_rate": 0.00013845792295884735, "loss": 0.9742, "step": 7615 }, { "epoch": 0.43720236387629813, "grad_norm": 0.265625, "learning_rate": 0.0001383654652430117, "loss": 0.9517, "step": 7620 }, { "epoch": 0.43748924206781803, "grad_norm": 0.26171875, "learning_rate": 0.00013827296905420143, "loss": 1.016, "step": 7625 }, { "epoch": 0.4377761202593379, "grad_norm": 0.275390625, "learning_rate": 0.00013818043448517202, "loss": 1.0417, "step": 7630 }, { "epoch": 0.4380629984508578, "grad_norm": 0.271484375, "learning_rate": 0.00013808786162871728, "loss": 0.9953, "step": 7635 }, { "epoch": 0.4383498766423776, "grad_norm": 0.263671875, "learning_rate": 0.00013799525057766948, "loss": 1.0229, "step": 7640 }, { "epoch": 0.4386367548338975, "grad_norm": 0.28515625, "learning_rate": 0.00013790260142489922, "loss": 0.9127, "step": 7645 }, { "epoch": 0.4389236330254174, "grad_norm": 0.265625, "learning_rate": 0.00013780991426331522, "loss": 0.9701, "step": 7650 }, { "epoch": 0.43921051121693727, "grad_norm": 0.267578125, "learning_rate": 0.00013771718918586444, "loss": 0.9483, "step": 7655 }, { "epoch": 0.4394973894084572, "grad_norm": 0.291015625, "learning_rate": 0.00013762442628553179, "loss": 0.947, "step": 7660 }, { "epoch": 0.4397842675999771, "grad_norm": 0.2890625, "learning_rate": 0.00013753162565534004, "loss": 0.9881, "step": 7665 }, { "epoch": 0.4400711457914969, "grad_norm": 0.25390625, "learning_rate": 0.00013743878738834998, "loss": 0.9753, "step": 7670 }, { "epoch": 0.4403580239830168, "grad_norm": 0.248046875, "learning_rate": 0.00013734591157765994, "loss": 0.9692, "step": 7675 }, { "epoch": 0.44064490217453667, "grad_norm": 0.271484375, "learning_rate": 0.000137252998316406, "loss": 0.9459, "step": 7680 }, { "epoch": 0.44093178036605657, "grad_norm": 0.26171875, "learning_rate": 0.00013716004769776189, "loss": 0.9521, "step": 7685 }, { "epoch": 0.44121865855757647, "grad_norm": 0.271484375, "learning_rate": 0.00013706705981493853, "loss": 0.9345, "step": 7690 }, { "epoch": 0.4415055367490963, "grad_norm": 0.279296875, "learning_rate": 0.00013697403476118454, "loss": 0.9281, "step": 7695 }, { "epoch": 0.4417924149406162, "grad_norm": 0.29296875, "learning_rate": 0.00013688097262978555, "loss": 0.9223, "step": 7700 }, { "epoch": 0.4420792931321361, "grad_norm": 0.263671875, "learning_rate": 0.0001367878735140645, "loss": 0.9012, "step": 7705 }, { "epoch": 0.44236617132365597, "grad_norm": 0.267578125, "learning_rate": 0.00013669473750738142, "loss": 0.97, "step": 7710 }, { "epoch": 0.44265304951517587, "grad_norm": 0.2431640625, "learning_rate": 0.00013660156470313327, "loss": 0.9504, "step": 7715 }, { "epoch": 0.4429399277066957, "grad_norm": 0.29296875, "learning_rate": 0.00013650835519475395, "loss": 1.0034, "step": 7720 }, { "epoch": 0.4432268058982156, "grad_norm": 0.298828125, "learning_rate": 0.0001364151090757142, "loss": 0.9862, "step": 7725 }, { "epoch": 0.4435136840897355, "grad_norm": 0.2578125, "learning_rate": 0.0001363218264395214, "loss": 0.9397, "step": 7730 }, { "epoch": 0.44380056228125536, "grad_norm": 0.2890625, "learning_rate": 0.00013622850737971963, "loss": 0.9846, "step": 7735 }, { "epoch": 0.44408744047277526, "grad_norm": 0.267578125, "learning_rate": 0.00013613515198988938, "loss": 1.0255, "step": 7740 }, { "epoch": 0.44437431866429516, "grad_norm": 0.287109375, "learning_rate": 0.0001360417603636477, "loss": 0.9535, "step": 7745 }, { "epoch": 0.444661196855815, "grad_norm": 0.30859375, "learning_rate": 0.0001359483325946479, "loss": 0.9717, "step": 7750 }, { "epoch": 0.4449480750473349, "grad_norm": 0.294921875, "learning_rate": 0.00013585486877657957, "loss": 0.9772, "step": 7755 }, { "epoch": 0.44523495323885476, "grad_norm": 0.2578125, "learning_rate": 0.00013576136900316844, "loss": 0.9284, "step": 7760 }, { "epoch": 0.44552183143037466, "grad_norm": 0.29296875, "learning_rate": 0.00013566783336817627, "loss": 1.0055, "step": 7765 }, { "epoch": 0.44580870962189456, "grad_norm": 0.28125, "learning_rate": 0.00013557426196540083, "loss": 0.9923, "step": 7770 }, { "epoch": 0.4460955878134144, "grad_norm": 0.294921875, "learning_rate": 0.00013548065488867573, "loss": 0.9646, "step": 7775 }, { "epoch": 0.4463824660049343, "grad_norm": 0.28125, "learning_rate": 0.00013538701223187033, "loss": 0.9368, "step": 7780 }, { "epoch": 0.4466693441964542, "grad_norm": 0.265625, "learning_rate": 0.0001352933340888897, "loss": 0.9225, "step": 7785 }, { "epoch": 0.44695622238797406, "grad_norm": 0.263671875, "learning_rate": 0.0001351996205536745, "loss": 0.9874, "step": 7790 }, { "epoch": 0.44724310057949396, "grad_norm": 0.291015625, "learning_rate": 0.0001351058717202009, "loss": 0.9323, "step": 7795 }, { "epoch": 0.4475299787710138, "grad_norm": 0.283203125, "learning_rate": 0.00013501208768248042, "loss": 0.9731, "step": 7800 }, { "epoch": 0.4478168569625337, "grad_norm": 0.263671875, "learning_rate": 0.0001349182685345599, "loss": 0.9228, "step": 7805 }, { "epoch": 0.4481037351540536, "grad_norm": 0.279296875, "learning_rate": 0.00013482441437052134, "loss": 1.051, "step": 7810 }, { "epoch": 0.44839061334557345, "grad_norm": 0.26953125, "learning_rate": 0.00013473052528448201, "loss": 0.9272, "step": 7815 }, { "epoch": 0.44867749153709335, "grad_norm": 0.294921875, "learning_rate": 0.00013463660137059407, "loss": 0.9549, "step": 7820 }, { "epoch": 0.44896436972861326, "grad_norm": 0.248046875, "learning_rate": 0.0001345426427230446, "loss": 0.8974, "step": 7825 }, { "epoch": 0.4492512479201331, "grad_norm": 0.26953125, "learning_rate": 0.0001344486494360555, "loss": 0.9423, "step": 7830 }, { "epoch": 0.449538126111653, "grad_norm": 0.2734375, "learning_rate": 0.00013435462160388351, "loss": 0.9031, "step": 7835 }, { "epoch": 0.44982500430317285, "grad_norm": 0.26953125, "learning_rate": 0.00013426055932081997, "loss": 0.908, "step": 7840 }, { "epoch": 0.45011188249469275, "grad_norm": 0.26953125, "learning_rate": 0.00013416646268119074, "loss": 1.0104, "step": 7845 }, { "epoch": 0.45039876068621265, "grad_norm": 0.263671875, "learning_rate": 0.00013407233177935608, "loss": 1.0248, "step": 7850 }, { "epoch": 0.4506856388777325, "grad_norm": 0.296875, "learning_rate": 0.00013397816670971072, "loss": 0.9396, "step": 7855 }, { "epoch": 0.4509725170692524, "grad_norm": 0.279296875, "learning_rate": 0.00013388396756668354, "loss": 0.907, "step": 7860 }, { "epoch": 0.4512593952607723, "grad_norm": 0.26171875, "learning_rate": 0.00013378973444473776, "loss": 0.9295, "step": 7865 }, { "epoch": 0.45154627345229215, "grad_norm": 0.259765625, "learning_rate": 0.0001336954674383705, "loss": 0.9777, "step": 7870 }, { "epoch": 0.45183315164381205, "grad_norm": 0.25, "learning_rate": 0.00013360116664211293, "loss": 0.8774, "step": 7875 }, { "epoch": 0.4521200298353319, "grad_norm": 0.2490234375, "learning_rate": 0.00013350683215053013, "loss": 0.9259, "step": 7880 }, { "epoch": 0.4524069080268518, "grad_norm": 0.271484375, "learning_rate": 0.00013341246405822088, "loss": 0.9176, "step": 7885 }, { "epoch": 0.4526937862183717, "grad_norm": 0.2451171875, "learning_rate": 0.00013331806245981775, "loss": 0.9333, "step": 7890 }, { "epoch": 0.45298066440989154, "grad_norm": 0.265625, "learning_rate": 0.0001332236274499869, "loss": 1.0022, "step": 7895 }, { "epoch": 0.45326754260141144, "grad_norm": 0.271484375, "learning_rate": 0.00013312915912342793, "loss": 0.9649, "step": 7900 }, { "epoch": 0.45355442079293135, "grad_norm": 0.30078125, "learning_rate": 0.0001330346575748739, "loss": 0.9092, "step": 7905 }, { "epoch": 0.4538412989844512, "grad_norm": 0.265625, "learning_rate": 0.00013294012289909114, "loss": 0.9798, "step": 7910 }, { "epoch": 0.4541281771759711, "grad_norm": 0.2578125, "learning_rate": 0.00013284555519087933, "loss": 0.9607, "step": 7915 }, { "epoch": 0.45441505536749094, "grad_norm": 0.267578125, "learning_rate": 0.0001327509545450711, "loss": 0.9416, "step": 7920 }, { "epoch": 0.45470193355901084, "grad_norm": 0.30078125, "learning_rate": 0.0001326563210565322, "loss": 0.9335, "step": 7925 }, { "epoch": 0.45498881175053074, "grad_norm": 0.2890625, "learning_rate": 0.00013256165482016137, "loss": 0.977, "step": 7930 }, { "epoch": 0.4552756899420506, "grad_norm": 0.26953125, "learning_rate": 0.00013246695593089, "loss": 0.9757, "step": 7935 }, { "epoch": 0.4555625681335705, "grad_norm": 0.283203125, "learning_rate": 0.00013237222448368247, "loss": 0.9762, "step": 7940 }, { "epoch": 0.4558494463250904, "grad_norm": 0.263671875, "learning_rate": 0.00013227746057353562, "loss": 1.0154, "step": 7945 }, { "epoch": 0.45613632451661024, "grad_norm": 0.263671875, "learning_rate": 0.0001321826642954789, "loss": 0.9798, "step": 7950 }, { "epoch": 0.45642320270813014, "grad_norm": 0.2470703125, "learning_rate": 0.00013208783574457432, "loss": 1.0161, "step": 7955 }, { "epoch": 0.45671008089965, "grad_norm": 0.275390625, "learning_rate": 0.00013199297501591603, "loss": 0.9881, "step": 7960 }, { "epoch": 0.4569969590911699, "grad_norm": 0.263671875, "learning_rate": 0.00013189808220463072, "loss": 0.9788, "step": 7965 }, { "epoch": 0.4572838372826898, "grad_norm": 0.283203125, "learning_rate": 0.00013180315740587701, "loss": 1.0485, "step": 7970 }, { "epoch": 0.45757071547420963, "grad_norm": 0.294921875, "learning_rate": 0.00013170820071484572, "loss": 0.951, "step": 7975 }, { "epoch": 0.45785759366572953, "grad_norm": 0.263671875, "learning_rate": 0.0001316132122267597, "loss": 0.9431, "step": 7980 }, { "epoch": 0.45814447185724944, "grad_norm": 0.267578125, "learning_rate": 0.00013151819203687356, "loss": 0.8649, "step": 7985 }, { "epoch": 0.4584313500487693, "grad_norm": 0.3046875, "learning_rate": 0.00013142314024047375, "loss": 0.9131, "step": 7990 }, { "epoch": 0.4587182282402892, "grad_norm": 0.267578125, "learning_rate": 0.00013132805693287844, "loss": 0.9522, "step": 7995 }, { "epoch": 0.45900510643180903, "grad_norm": 0.6171875, "learning_rate": 0.0001312329422094374, "loss": 0.9952, "step": 8000 }, { "epoch": 0.45929198462332893, "grad_norm": 0.271484375, "learning_rate": 0.0001311377961655319, "loss": 0.8869, "step": 8005 }, { "epoch": 0.45957886281484883, "grad_norm": 0.275390625, "learning_rate": 0.00013104261889657453, "loss": 0.9416, "step": 8010 }, { "epoch": 0.4598657410063687, "grad_norm": 0.25390625, "learning_rate": 0.00013094741049800936, "loss": 0.933, "step": 8015 }, { "epoch": 0.4601526191978886, "grad_norm": 0.265625, "learning_rate": 0.00013085217106531153, "loss": 1.0033, "step": 8020 }, { "epoch": 0.4604394973894085, "grad_norm": 0.267578125, "learning_rate": 0.00013075690069398738, "loss": 0.9582, "step": 8025 }, { "epoch": 0.4607263755809283, "grad_norm": 0.2578125, "learning_rate": 0.00013066159947957426, "loss": 0.9989, "step": 8030 }, { "epoch": 0.46101325377244823, "grad_norm": 0.287109375, "learning_rate": 0.0001305662675176404, "loss": 0.9598, "step": 8035 }, { "epoch": 0.4613001319639681, "grad_norm": 0.26953125, "learning_rate": 0.00013047090490378495, "loss": 0.9588, "step": 8040 }, { "epoch": 0.461587010155488, "grad_norm": 0.259765625, "learning_rate": 0.00013037551173363774, "loss": 0.9166, "step": 8045 }, { "epoch": 0.4618738883470079, "grad_norm": 0.26171875, "learning_rate": 0.00013028008810285924, "loss": 0.9633, "step": 8050 }, { "epoch": 0.4621607665385277, "grad_norm": 0.25390625, "learning_rate": 0.00013018463410714048, "loss": 0.8813, "step": 8055 }, { "epoch": 0.4624476447300476, "grad_norm": 0.30078125, "learning_rate": 0.00013008914984220294, "loss": 0.9441, "step": 8060 }, { "epoch": 0.4627345229215675, "grad_norm": 0.2890625, "learning_rate": 0.00012999363540379852, "loss": 0.9441, "step": 8065 }, { "epoch": 0.46302140111308737, "grad_norm": 0.275390625, "learning_rate": 0.00012989809088770923, "loss": 0.9317, "step": 8070 }, { "epoch": 0.4633082793046073, "grad_norm": 0.27734375, "learning_rate": 0.00012980251638974733, "loss": 0.9065, "step": 8075 }, { "epoch": 0.4635951574961271, "grad_norm": 0.291015625, "learning_rate": 0.0001297069120057552, "loss": 0.9926, "step": 8080 }, { "epoch": 0.463882035687647, "grad_norm": 0.2734375, "learning_rate": 0.0001296112778316051, "loss": 0.9544, "step": 8085 }, { "epoch": 0.4641689138791669, "grad_norm": 0.265625, "learning_rate": 0.00012951561396319918, "loss": 0.9443, "step": 8090 }, { "epoch": 0.46445579207068677, "grad_norm": 0.265625, "learning_rate": 0.00012941992049646936, "loss": 0.996, "step": 8095 }, { "epoch": 0.46474267026220667, "grad_norm": 0.27734375, "learning_rate": 0.00012932419752737735, "loss": 0.9842, "step": 8100 }, { "epoch": 0.46502954845372657, "grad_norm": 0.27734375, "learning_rate": 0.00012922844515191425, "loss": 0.9353, "step": 8105 }, { "epoch": 0.4653164266452464, "grad_norm": 0.28125, "learning_rate": 0.00012913266346610086, "loss": 0.9293, "step": 8110 }, { "epoch": 0.4656033048367663, "grad_norm": 0.271484375, "learning_rate": 0.0001290368525659872, "loss": 0.9549, "step": 8115 }, { "epoch": 0.46589018302828616, "grad_norm": 0.2421875, "learning_rate": 0.00012894101254765268, "loss": 0.98, "step": 8120 }, { "epoch": 0.46617706121980607, "grad_norm": 0.2578125, "learning_rate": 0.00012884514350720586, "loss": 0.8901, "step": 8125 }, { "epoch": 0.46646393941132597, "grad_norm": 0.275390625, "learning_rate": 0.00012874924554078448, "loss": 0.9632, "step": 8130 }, { "epoch": 0.4667508176028458, "grad_norm": 0.27734375, "learning_rate": 0.00012865331874455517, "loss": 0.9504, "step": 8135 }, { "epoch": 0.4670376957943657, "grad_norm": 0.25390625, "learning_rate": 0.0001285573632147136, "loss": 1.0337, "step": 8140 }, { "epoch": 0.4673245739858856, "grad_norm": 0.2578125, "learning_rate": 0.00012846137904748414, "loss": 0.9788, "step": 8145 }, { "epoch": 0.46761145217740546, "grad_norm": 0.28125, "learning_rate": 0.00012836536633911995, "loss": 0.9813, "step": 8150 }, { "epoch": 0.46789833036892536, "grad_norm": 0.265625, "learning_rate": 0.0001282693251859028, "loss": 0.9625, "step": 8155 }, { "epoch": 0.4681852085604452, "grad_norm": 0.279296875, "learning_rate": 0.00012817325568414297, "loss": 0.9879, "step": 8160 }, { "epoch": 0.4684720867519651, "grad_norm": 0.259765625, "learning_rate": 0.00012807715793017918, "loss": 0.9737, "step": 8165 }, { "epoch": 0.468758964943485, "grad_norm": 0.2578125, "learning_rate": 0.00012798103202037842, "loss": 0.9963, "step": 8170 }, { "epoch": 0.46904584313500486, "grad_norm": 0.2734375, "learning_rate": 0.00012788487805113602, "loss": 0.9001, "step": 8175 }, { "epoch": 0.46933272132652476, "grad_norm": 0.291015625, "learning_rate": 0.0001277886961188754, "loss": 1.0332, "step": 8180 }, { "epoch": 0.46961959951804466, "grad_norm": 0.275390625, "learning_rate": 0.00012769248632004795, "loss": 1.0109, "step": 8185 }, { "epoch": 0.4699064777095645, "grad_norm": 0.271484375, "learning_rate": 0.0001275962487511332, "loss": 0.9768, "step": 8190 }, { "epoch": 0.4701933559010844, "grad_norm": 0.26953125, "learning_rate": 0.00012749998350863827, "loss": 0.9383, "step": 8195 }, { "epoch": 0.47048023409260425, "grad_norm": 0.248046875, "learning_rate": 0.0001274036906890982, "loss": 0.9907, "step": 8200 }, { "epoch": 0.47076711228412416, "grad_norm": 0.265625, "learning_rate": 0.00012730737038907567, "loss": 0.9722, "step": 8205 }, { "epoch": 0.47105399047564406, "grad_norm": 0.26953125, "learning_rate": 0.00012721102270516087, "loss": 0.9917, "step": 8210 }, { "epoch": 0.4713408686671639, "grad_norm": 0.255859375, "learning_rate": 0.00012711464773397152, "loss": 0.9361, "step": 8215 }, { "epoch": 0.4716277468586838, "grad_norm": 0.396484375, "learning_rate": 0.0001270182455721526, "loss": 0.9083, "step": 8220 }, { "epoch": 0.4719146250502037, "grad_norm": 0.265625, "learning_rate": 0.00012692181631637642, "loss": 0.9438, "step": 8225 }, { "epoch": 0.47220150324172355, "grad_norm": 0.275390625, "learning_rate": 0.00012682536006334248, "loss": 0.9491, "step": 8230 }, { "epoch": 0.47248838143324345, "grad_norm": 0.259765625, "learning_rate": 0.00012672887690977732, "loss": 0.9634, "step": 8235 }, { "epoch": 0.4727752596247633, "grad_norm": 0.265625, "learning_rate": 0.00012663236695243448, "loss": 0.96, "step": 8240 }, { "epoch": 0.4730621378162832, "grad_norm": 0.271484375, "learning_rate": 0.0001265358302880943, "loss": 0.9325, "step": 8245 }, { "epoch": 0.4733490160078031, "grad_norm": 0.283203125, "learning_rate": 0.00012643926701356404, "loss": 0.9878, "step": 8250 }, { "epoch": 0.47363589419932295, "grad_norm": 0.271484375, "learning_rate": 0.00012634267722567752, "loss": 0.9569, "step": 8255 }, { "epoch": 0.47392277239084285, "grad_norm": 0.267578125, "learning_rate": 0.00012624606102129516, "loss": 0.9397, "step": 8260 }, { "epoch": 0.47420965058236275, "grad_norm": 0.2890625, "learning_rate": 0.00012614941849730405, "loss": 0.9127, "step": 8265 }, { "epoch": 0.4744965287738826, "grad_norm": 0.279296875, "learning_rate": 0.00012605274975061736, "loss": 1.03, "step": 8270 }, { "epoch": 0.4747834069654025, "grad_norm": 0.26171875, "learning_rate": 0.00012595605487817482, "loss": 1.0017, "step": 8275 }, { "epoch": 0.47507028515692235, "grad_norm": 0.26171875, "learning_rate": 0.00012585933397694224, "loss": 0.9802, "step": 8280 }, { "epoch": 0.47535716334844225, "grad_norm": 0.2490234375, "learning_rate": 0.00012576258714391155, "loss": 0.9441, "step": 8285 }, { "epoch": 0.47564404153996215, "grad_norm": 0.251953125, "learning_rate": 0.00012566581447610072, "loss": 1.005, "step": 8290 }, { "epoch": 0.475930919731482, "grad_norm": 0.2578125, "learning_rate": 0.0001255690160705536, "loss": 0.9187, "step": 8295 }, { "epoch": 0.4762177979230019, "grad_norm": 0.263671875, "learning_rate": 0.0001254721920243398, "loss": 0.9247, "step": 8300 }, { "epoch": 0.4765046761145218, "grad_norm": 0.279296875, "learning_rate": 0.00012537534243455472, "loss": 0.9306, "step": 8305 }, { "epoch": 0.47679155430604164, "grad_norm": 0.30078125, "learning_rate": 0.00012527846739831934, "loss": 1.0118, "step": 8310 }, { "epoch": 0.47707843249756154, "grad_norm": 0.26171875, "learning_rate": 0.00012518156701278019, "loss": 0.887, "step": 8315 }, { "epoch": 0.4773653106890814, "grad_norm": 0.275390625, "learning_rate": 0.0001250846413751092, "loss": 1.0722, "step": 8320 }, { "epoch": 0.4776521888806013, "grad_norm": 0.283203125, "learning_rate": 0.00012498769058250355, "loss": 0.9375, "step": 8325 }, { "epoch": 0.4779390670721212, "grad_norm": 0.26953125, "learning_rate": 0.00012489071473218574, "loss": 0.946, "step": 8330 }, { "epoch": 0.47822594526364104, "grad_norm": 0.26171875, "learning_rate": 0.0001247937139214034, "loss": 0.9898, "step": 8335 }, { "epoch": 0.47851282345516094, "grad_norm": 0.255859375, "learning_rate": 0.00012469668824742914, "loss": 0.9774, "step": 8340 }, { "epoch": 0.47879970164668084, "grad_norm": 0.275390625, "learning_rate": 0.00012459963780756054, "loss": 0.986, "step": 8345 }, { "epoch": 0.4790865798382007, "grad_norm": 0.263671875, "learning_rate": 0.00012450256269911996, "loss": 0.9345, "step": 8350 }, { "epoch": 0.4793734580297206, "grad_norm": 0.267578125, "learning_rate": 0.0001244054630194546, "loss": 0.9338, "step": 8355 }, { "epoch": 0.47966033622124044, "grad_norm": 0.255859375, "learning_rate": 0.00012430833886593613, "loss": 0.8914, "step": 8360 }, { "epoch": 0.47994721441276034, "grad_norm": 0.2578125, "learning_rate": 0.00012421119033596102, "loss": 0.9065, "step": 8365 }, { "epoch": 0.48023409260428024, "grad_norm": 0.27734375, "learning_rate": 0.0001241140175269499, "loss": 1.027, "step": 8370 }, { "epoch": 0.4805209707958001, "grad_norm": 0.265625, "learning_rate": 0.00012401682053634792, "loss": 0.9085, "step": 8375 }, { "epoch": 0.48080784898732, "grad_norm": 0.283203125, "learning_rate": 0.00012391959946162447, "loss": 0.976, "step": 8380 }, { "epoch": 0.4810947271788399, "grad_norm": 0.267578125, "learning_rate": 0.00012382235440027307, "loss": 0.8829, "step": 8385 }, { "epoch": 0.48138160537035973, "grad_norm": 0.28125, "learning_rate": 0.0001237250854498112, "loss": 0.9961, "step": 8390 }, { "epoch": 0.48166848356187963, "grad_norm": 0.291015625, "learning_rate": 0.00012362779270778048, "loss": 0.9896, "step": 8395 }, { "epoch": 0.4819553617533995, "grad_norm": 0.2734375, "learning_rate": 0.00012353047627174625, "loss": 0.9684, "step": 8400 }, { "epoch": 0.4822422399449194, "grad_norm": 0.263671875, "learning_rate": 0.00012343313623929764, "loss": 0.9758, "step": 8405 }, { "epoch": 0.4825291181364393, "grad_norm": 0.279296875, "learning_rate": 0.00012333577270804745, "loss": 0.9922, "step": 8410 }, { "epoch": 0.48281599632795913, "grad_norm": 0.255859375, "learning_rate": 0.0001232383857756321, "loss": 0.9422, "step": 8415 }, { "epoch": 0.48310287451947903, "grad_norm": 0.298828125, "learning_rate": 0.00012314097553971137, "loss": 0.9648, "step": 8420 }, { "epoch": 0.48338975271099893, "grad_norm": 0.26171875, "learning_rate": 0.00012304354209796846, "loss": 0.9822, "step": 8425 }, { "epoch": 0.4836766309025188, "grad_norm": 0.275390625, "learning_rate": 0.00012294608554810988, "loss": 0.9452, "step": 8430 }, { "epoch": 0.4839635090940387, "grad_norm": 0.265625, "learning_rate": 0.00012284860598786525, "loss": 0.9833, "step": 8435 }, { "epoch": 0.4842503872855585, "grad_norm": 0.265625, "learning_rate": 0.0001227511035149873, "loss": 0.981, "step": 8440 }, { "epoch": 0.4845372654770784, "grad_norm": 0.26953125, "learning_rate": 0.00012265357822725172, "loss": 0.987, "step": 8445 }, { "epoch": 0.48482414366859833, "grad_norm": 0.28125, "learning_rate": 0.00012255603022245712, "loss": 1.0278, "step": 8450 }, { "epoch": 0.4851110218601182, "grad_norm": 0.298828125, "learning_rate": 0.0001224584595984248, "loss": 0.9869, "step": 8455 }, { "epoch": 0.4853979000516381, "grad_norm": 0.271484375, "learning_rate": 0.00012236086645299888, "loss": 1.0075, "step": 8460 }, { "epoch": 0.485684778243158, "grad_norm": 0.255859375, "learning_rate": 0.00012226325088404588, "loss": 0.9499, "step": 8465 }, { "epoch": 0.4859716564346778, "grad_norm": 0.265625, "learning_rate": 0.00012216561298945502, "loss": 0.9547, "step": 8470 }, { "epoch": 0.4862585346261977, "grad_norm": 0.263671875, "learning_rate": 0.00012206795286713774, "loss": 0.9796, "step": 8475 }, { "epoch": 0.48654541281771757, "grad_norm": 0.255859375, "learning_rate": 0.00012197027061502781, "loss": 0.9642, "step": 8480 }, { "epoch": 0.48683229100923747, "grad_norm": 0.251953125, "learning_rate": 0.00012187256633108129, "loss": 0.9616, "step": 8485 }, { "epoch": 0.4871191692007574, "grad_norm": 0.265625, "learning_rate": 0.00012177484011327618, "loss": 0.9035, "step": 8490 }, { "epoch": 0.4874060473922772, "grad_norm": 0.26171875, "learning_rate": 0.00012167709205961256, "loss": 0.9275, "step": 8495 }, { "epoch": 0.4876929255837971, "grad_norm": 0.26171875, "learning_rate": 0.00012157932226811246, "loss": 0.9153, "step": 8500 }, { "epoch": 0.487979803775317, "grad_norm": 0.2578125, "learning_rate": 0.00012148153083681954, "loss": 1.0136, "step": 8505 }, { "epoch": 0.48826668196683687, "grad_norm": 0.251953125, "learning_rate": 0.00012138371786379938, "loss": 0.943, "step": 8510 }, { "epoch": 0.48855356015835677, "grad_norm": 0.251953125, "learning_rate": 0.00012128588344713899, "loss": 0.9297, "step": 8515 }, { "epoch": 0.4888404383498766, "grad_norm": 0.265625, "learning_rate": 0.0001211880276849469, "loss": 0.96, "step": 8520 }, { "epoch": 0.4891273165413965, "grad_norm": 0.265625, "learning_rate": 0.00012109015067535321, "loss": 0.9569, "step": 8525 }, { "epoch": 0.4894141947329164, "grad_norm": 0.283203125, "learning_rate": 0.00012099225251650907, "loss": 0.9621, "step": 8530 }, { "epoch": 0.48970107292443626, "grad_norm": 0.29296875, "learning_rate": 0.00012089433330658705, "loss": 1.0271, "step": 8535 }, { "epoch": 0.48998795111595617, "grad_norm": 0.251953125, "learning_rate": 0.00012079639314378075, "loss": 0.9216, "step": 8540 }, { "epoch": 0.49027482930747607, "grad_norm": 0.314453125, "learning_rate": 0.00012069843212630474, "loss": 0.8654, "step": 8545 }, { "epoch": 0.4905617074989959, "grad_norm": 0.26953125, "learning_rate": 0.00012060045035239465, "loss": 0.8843, "step": 8550 }, { "epoch": 0.4908485856905158, "grad_norm": 0.267578125, "learning_rate": 0.00012050244792030667, "loss": 0.9073, "step": 8555 }, { "epoch": 0.49113546388203566, "grad_norm": 0.259765625, "learning_rate": 0.00012040442492831798, "loss": 0.9647, "step": 8560 }, { "epoch": 0.49142234207355556, "grad_norm": 0.251953125, "learning_rate": 0.00012030638147472623, "loss": 0.9147, "step": 8565 }, { "epoch": 0.49170922026507546, "grad_norm": 0.28125, "learning_rate": 0.00012020831765784957, "loss": 0.9259, "step": 8570 }, { "epoch": 0.4919960984565953, "grad_norm": 0.267578125, "learning_rate": 0.00012011023357602668, "loss": 0.9544, "step": 8575 }, { "epoch": 0.4922829766481152, "grad_norm": 0.25390625, "learning_rate": 0.00012001212932761645, "loss": 0.8561, "step": 8580 }, { "epoch": 0.4925698548396351, "grad_norm": 0.2734375, "learning_rate": 0.00011991400501099805, "loss": 0.9172, "step": 8585 }, { "epoch": 0.49285673303115496, "grad_norm": 0.28515625, "learning_rate": 0.00011981586072457078, "loss": 0.9463, "step": 8590 }, { "epoch": 0.49314361122267486, "grad_norm": 0.2490234375, "learning_rate": 0.00011971769656675391, "loss": 0.9951, "step": 8595 }, { "epoch": 0.4934304894141947, "grad_norm": 0.259765625, "learning_rate": 0.00011961951263598677, "loss": 0.9508, "step": 8600 }, { "epoch": 0.4937173676057146, "grad_norm": 0.298828125, "learning_rate": 0.00011952130903072832, "loss": 0.9669, "step": 8605 }, { "epoch": 0.4940042457972345, "grad_norm": 0.263671875, "learning_rate": 0.00011942308584945741, "loss": 1.0121, "step": 8610 }, { "epoch": 0.49429112398875436, "grad_norm": 0.279296875, "learning_rate": 0.00011932484319067245, "loss": 0.983, "step": 8615 }, { "epoch": 0.49457800218027426, "grad_norm": 0.26953125, "learning_rate": 0.00011922658115289141, "loss": 0.9413, "step": 8620 }, { "epoch": 0.49486488037179416, "grad_norm": 0.271484375, "learning_rate": 0.00011912829983465168, "loss": 0.9039, "step": 8625 }, { "epoch": 0.495151758563314, "grad_norm": 0.275390625, "learning_rate": 0.00011902999933450997, "loss": 0.9747, "step": 8630 }, { "epoch": 0.4954386367548339, "grad_norm": 0.26953125, "learning_rate": 0.0001189316797510423, "loss": 0.9519, "step": 8635 }, { "epoch": 0.49572551494635375, "grad_norm": 0.306640625, "learning_rate": 0.00011883334118284369, "loss": 0.9836, "step": 8640 }, { "epoch": 0.49601239313787365, "grad_norm": 0.26171875, "learning_rate": 0.00011873498372852828, "loss": 1.0067, "step": 8645 }, { "epoch": 0.49629927132939355, "grad_norm": 0.28125, "learning_rate": 0.0001186366074867292, "loss": 0.8766, "step": 8650 }, { "epoch": 0.4965861495209134, "grad_norm": 0.310546875, "learning_rate": 0.00011853821255609836, "loss": 1.0009, "step": 8655 }, { "epoch": 0.4968730277124333, "grad_norm": 0.26171875, "learning_rate": 0.00011843979903530638, "loss": 0.9815, "step": 8660 }, { "epoch": 0.4971599059039532, "grad_norm": 0.267578125, "learning_rate": 0.00011834136702304257, "loss": 0.9317, "step": 8665 }, { "epoch": 0.49744678409547305, "grad_norm": 0.2734375, "learning_rate": 0.00011824291661801479, "loss": 0.9523, "step": 8670 }, { "epoch": 0.49773366228699295, "grad_norm": 0.265625, "learning_rate": 0.00011814444791894934, "loss": 0.9784, "step": 8675 }, { "epoch": 0.4980205404785128, "grad_norm": 0.294921875, "learning_rate": 0.0001180459610245908, "loss": 1.0339, "step": 8680 }, { "epoch": 0.4983074186700327, "grad_norm": 0.24609375, "learning_rate": 0.00011794745603370212, "loss": 0.9443, "step": 8685 }, { "epoch": 0.4985942968615526, "grad_norm": 0.279296875, "learning_rate": 0.00011784893304506424, "loss": 0.9769, "step": 8690 }, { "epoch": 0.49888117505307245, "grad_norm": 0.287109375, "learning_rate": 0.0001177503921574763, "loss": 0.9559, "step": 8695 }, { "epoch": 0.49916805324459235, "grad_norm": 0.279296875, "learning_rate": 0.00011765183346975528, "loss": 0.9302, "step": 8700 }, { "epoch": 0.49945493143611225, "grad_norm": 0.275390625, "learning_rate": 0.0001175532570807361, "loss": 0.9505, "step": 8705 }, { "epoch": 0.4997418096276321, "grad_norm": 0.263671875, "learning_rate": 0.00011745466308927136, "loss": 1.001, "step": 8710 }, { "epoch": 0.500028687819152, "grad_norm": 0.2734375, "learning_rate": 0.00011735605159423131, "loss": 0.8966, "step": 8715 }, { "epoch": 0.5003155660106718, "grad_norm": 0.2490234375, "learning_rate": 0.00011725742269450382, "loss": 0.8901, "step": 8720 }, { "epoch": 0.5006024442021918, "grad_norm": 0.287109375, "learning_rate": 0.00011715877648899413, "loss": 0.9536, "step": 8725 }, { "epoch": 0.5008893223937116, "grad_norm": 0.259765625, "learning_rate": 0.0001170601130766249, "loss": 0.9002, "step": 8730 }, { "epoch": 0.5011762005852315, "grad_norm": 0.2734375, "learning_rate": 0.00011696143255633607, "loss": 0.9495, "step": 8735 }, { "epoch": 0.5014630787767513, "grad_norm": 0.26171875, "learning_rate": 0.0001168627350270846, "loss": 0.9705, "step": 8740 }, { "epoch": 0.5017499569682713, "grad_norm": 0.279296875, "learning_rate": 0.00011676402058784463, "loss": 0.9148, "step": 8745 }, { "epoch": 0.5020368351597911, "grad_norm": 0.271484375, "learning_rate": 0.00011666528933760725, "loss": 0.9683, "step": 8750 }, { "epoch": 0.502323713351311, "grad_norm": 0.30859375, "learning_rate": 0.00011656654137538032, "loss": 0.9706, "step": 8755 }, { "epoch": 0.5026105915428309, "grad_norm": 0.255859375, "learning_rate": 0.0001164677768001886, "loss": 0.9102, "step": 8760 }, { "epoch": 0.5028974697343508, "grad_norm": 0.265625, "learning_rate": 0.00011636899571107333, "loss": 0.8819, "step": 8765 }, { "epoch": 0.5031843479258706, "grad_norm": 0.28515625, "learning_rate": 0.00011627019820709246, "loss": 0.9267, "step": 8770 }, { "epoch": 0.5034712261173906, "grad_norm": 0.28125, "learning_rate": 0.00011617138438732036, "loss": 0.9334, "step": 8775 }, { "epoch": 0.5037581043089104, "grad_norm": 0.279296875, "learning_rate": 0.00011607255435084772, "loss": 0.9174, "step": 8780 }, { "epoch": 0.5040449825004303, "grad_norm": 0.265625, "learning_rate": 0.00011597370819678157, "loss": 0.9466, "step": 8785 }, { "epoch": 0.5043318606919502, "grad_norm": 0.58203125, "learning_rate": 0.00011587484602424499, "loss": 0.9153, "step": 8790 }, { "epoch": 0.5046187388834701, "grad_norm": 0.267578125, "learning_rate": 0.00011577596793237722, "loss": 0.9667, "step": 8795 }, { "epoch": 0.5049056170749899, "grad_norm": 0.259765625, "learning_rate": 0.00011567707402033345, "loss": 0.9371, "step": 8800 }, { "epoch": 0.5051924952665099, "grad_norm": 0.267578125, "learning_rate": 0.00011557816438728467, "loss": 0.9089, "step": 8805 }, { "epoch": 0.5054793734580297, "grad_norm": 0.263671875, "learning_rate": 0.00011547923913241774, "loss": 0.9599, "step": 8810 }, { "epoch": 0.5057662516495496, "grad_norm": 0.275390625, "learning_rate": 0.00011538029835493507, "loss": 0.9268, "step": 8815 }, { "epoch": 0.5060531298410694, "grad_norm": 0.251953125, "learning_rate": 0.00011528134215405473, "loss": 0.955, "step": 8820 }, { "epoch": 0.5063400080325894, "grad_norm": 0.287109375, "learning_rate": 0.00011518237062901023, "loss": 1.008, "step": 8825 }, { "epoch": 0.5066268862241092, "grad_norm": 0.279296875, "learning_rate": 0.00011508338387905038, "loss": 1.0123, "step": 8830 }, { "epoch": 0.5069137644156291, "grad_norm": 0.25390625, "learning_rate": 0.0001149843820034394, "loss": 0.8719, "step": 8835 }, { "epoch": 0.507200642607149, "grad_norm": 0.27734375, "learning_rate": 0.00011488536510145651, "loss": 0.9107, "step": 8840 }, { "epoch": 0.5074875207986689, "grad_norm": 0.263671875, "learning_rate": 0.00011478633327239614, "loss": 0.9925, "step": 8845 }, { "epoch": 0.5077743989901887, "grad_norm": 0.267578125, "learning_rate": 0.0001146872866155676, "loss": 0.9601, "step": 8850 }, { "epoch": 0.5080612771817087, "grad_norm": 0.283203125, "learning_rate": 0.00011458822523029509, "loss": 0.9667, "step": 8855 }, { "epoch": 0.5083481553732285, "grad_norm": 0.287109375, "learning_rate": 0.00011448914921591765, "loss": 0.9374, "step": 8860 }, { "epoch": 0.5086350335647484, "grad_norm": 0.35546875, "learning_rate": 0.00011439005867178884, "loss": 0.9557, "step": 8865 }, { "epoch": 0.5089219117562683, "grad_norm": 0.2734375, "learning_rate": 0.00011429095369727696, "loss": 0.8917, "step": 8870 }, { "epoch": 0.5092087899477882, "grad_norm": 0.24609375, "learning_rate": 0.00011419183439176464, "loss": 0.9163, "step": 8875 }, { "epoch": 0.509495668139308, "grad_norm": 0.267578125, "learning_rate": 0.00011409270085464898, "loss": 0.9374, "step": 8880 }, { "epoch": 0.509782546330828, "grad_norm": 0.265625, "learning_rate": 0.0001139935531853413, "loss": 1.0086, "step": 8885 }, { "epoch": 0.5100694245223478, "grad_norm": 0.259765625, "learning_rate": 0.0001138943914832671, "loss": 0.9003, "step": 8890 }, { "epoch": 0.5103563027138677, "grad_norm": 0.265625, "learning_rate": 0.00011379521584786599, "loss": 0.9237, "step": 8895 }, { "epoch": 0.5106431809053875, "grad_norm": 0.279296875, "learning_rate": 0.0001136960263785915, "loss": 0.9486, "step": 8900 }, { "epoch": 0.5109300590969075, "grad_norm": 0.263671875, "learning_rate": 0.00011359682317491098, "loss": 0.9745, "step": 8905 }, { "epoch": 0.5112169372884273, "grad_norm": 0.28125, "learning_rate": 0.00011349760633630575, "loss": 0.9179, "step": 8910 }, { "epoch": 0.5115038154799472, "grad_norm": 0.265625, "learning_rate": 0.00011339837596227061, "loss": 0.9705, "step": 8915 }, { "epoch": 0.5117906936714671, "grad_norm": 0.255859375, "learning_rate": 0.00011329913215231401, "loss": 0.9673, "step": 8920 }, { "epoch": 0.512077571862987, "grad_norm": 0.298828125, "learning_rate": 0.00011319987500595785, "loss": 0.9442, "step": 8925 }, { "epoch": 0.5123644500545068, "grad_norm": 0.271484375, "learning_rate": 0.00011310060462273744, "loss": 0.9693, "step": 8930 }, { "epoch": 0.5126513282460268, "grad_norm": 0.265625, "learning_rate": 0.00011300132110220134, "loss": 0.9383, "step": 8935 }, { "epoch": 0.5129382064375466, "grad_norm": 0.28125, "learning_rate": 0.0001129020245439113, "loss": 0.9474, "step": 8940 }, { "epoch": 0.5132250846290665, "grad_norm": 0.25390625, "learning_rate": 0.00011280271504744208, "loss": 0.9741, "step": 8945 }, { "epoch": 0.5135119628205864, "grad_norm": 0.263671875, "learning_rate": 0.00011270339271238153, "loss": 0.9523, "step": 8950 }, { "epoch": 0.5137988410121063, "grad_norm": 0.265625, "learning_rate": 0.00011260405763833029, "loss": 0.92, "step": 8955 }, { "epoch": 0.5140857192036261, "grad_norm": 0.251953125, "learning_rate": 0.00011250470992490176, "loss": 0.9989, "step": 8960 }, { "epoch": 0.5143725973951461, "grad_norm": 0.263671875, "learning_rate": 0.0001124053496717221, "loss": 0.9641, "step": 8965 }, { "epoch": 0.5146594755866659, "grad_norm": 0.259765625, "learning_rate": 0.00011230597697842998, "loss": 0.9714, "step": 8970 }, { "epoch": 0.5149463537781858, "grad_norm": 0.26953125, "learning_rate": 0.0001122065919446765, "loss": 0.9023, "step": 8975 }, { "epoch": 0.5152332319697057, "grad_norm": 0.259765625, "learning_rate": 0.00011210719467012529, "loss": 0.8879, "step": 8980 }, { "epoch": 0.5155201101612256, "grad_norm": 0.29296875, "learning_rate": 0.0001120077852544521, "loss": 1.0173, "step": 8985 }, { "epoch": 0.5158069883527454, "grad_norm": 0.306640625, "learning_rate": 0.00011190836379734495, "loss": 0.99, "step": 8990 }, { "epoch": 0.5160938665442653, "grad_norm": 0.263671875, "learning_rate": 0.00011180893039850388, "loss": 0.9472, "step": 8995 }, { "epoch": 0.5163807447357852, "grad_norm": 0.279296875, "learning_rate": 0.00011170948515764088, "loss": 0.9285, "step": 9000 }, { "epoch": 0.5166676229273051, "grad_norm": 0.28515625, "learning_rate": 0.00011161002817447996, "loss": 1.0298, "step": 9005 }, { "epoch": 0.5169545011188249, "grad_norm": 0.2578125, "learning_rate": 0.00011151055954875673, "loss": 0.8893, "step": 9010 }, { "epoch": 0.5172413793103449, "grad_norm": 0.28125, "learning_rate": 0.00011141107938021858, "loss": 0.9669, "step": 9015 }, { "epoch": 0.5175282575018647, "grad_norm": 0.283203125, "learning_rate": 0.00011131158776862445, "loss": 0.9924, "step": 9020 }, { "epoch": 0.5178151356933846, "grad_norm": 0.271484375, "learning_rate": 0.0001112120848137447, "loss": 0.9652, "step": 9025 }, { "epoch": 0.5181020138849045, "grad_norm": 0.251953125, "learning_rate": 0.0001111125706153612, "loss": 0.8898, "step": 9030 }, { "epoch": 0.5183888920764244, "grad_norm": 0.296875, "learning_rate": 0.00011101304527326695, "loss": 0.9597, "step": 9035 }, { "epoch": 0.5186757702679442, "grad_norm": 0.28125, "learning_rate": 0.00011091350888726619, "loss": 1.0008, "step": 9040 }, { "epoch": 0.5189626484594642, "grad_norm": 0.294921875, "learning_rate": 0.0001108139615571743, "loss": 0.9604, "step": 9045 }, { "epoch": 0.519249526650984, "grad_norm": 0.26171875, "learning_rate": 0.00011071440338281745, "loss": 0.9893, "step": 9050 }, { "epoch": 0.5195364048425039, "grad_norm": 0.275390625, "learning_rate": 0.00011061483446403289, "loss": 0.9492, "step": 9055 }, { "epoch": 0.5198232830340238, "grad_norm": 0.271484375, "learning_rate": 0.00011051525490066852, "loss": 0.9726, "step": 9060 }, { "epoch": 0.5201101612255437, "grad_norm": 0.271484375, "learning_rate": 0.00011041566479258294, "loss": 0.9528, "step": 9065 }, { "epoch": 0.5203970394170635, "grad_norm": 0.267578125, "learning_rate": 0.0001103160642396454, "loss": 0.9567, "step": 9070 }, { "epoch": 0.5206839176085833, "grad_norm": 0.2734375, "learning_rate": 0.00011021645334173547, "loss": 0.9617, "step": 9075 }, { "epoch": 0.5209707958001033, "grad_norm": 0.283203125, "learning_rate": 0.00011011683219874323, "loss": 0.9781, "step": 9080 }, { "epoch": 0.5212576739916231, "grad_norm": 0.291015625, "learning_rate": 0.00011001720091056897, "loss": 0.9156, "step": 9085 }, { "epoch": 0.521544552183143, "grad_norm": 0.240234375, "learning_rate": 0.00010991755957712318, "loss": 0.9074, "step": 9090 }, { "epoch": 0.521831430374663, "grad_norm": 0.26171875, "learning_rate": 0.00010981790829832641, "loss": 0.9704, "step": 9095 }, { "epoch": 0.5221183085661828, "grad_norm": 0.271484375, "learning_rate": 0.00010971824717410917, "loss": 0.9449, "step": 9100 }, { "epoch": 0.5224051867577026, "grad_norm": 0.251953125, "learning_rate": 0.00010961857630441187, "loss": 0.9592, "step": 9105 }, { "epoch": 0.5226920649492226, "grad_norm": 0.26171875, "learning_rate": 0.00010951889578918471, "loss": 0.9461, "step": 9110 }, { "epoch": 0.5229789431407424, "grad_norm": 0.2490234375, "learning_rate": 0.00010941920572838747, "loss": 0.9055, "step": 9115 }, { "epoch": 0.5232658213322623, "grad_norm": 0.26953125, "learning_rate": 0.00010931950622198965, "loss": 0.9994, "step": 9120 }, { "epoch": 0.5235526995237823, "grad_norm": 0.267578125, "learning_rate": 0.00010921979736997006, "loss": 0.9529, "step": 9125 }, { "epoch": 0.5238395777153021, "grad_norm": 0.271484375, "learning_rate": 0.000109120079272317, "loss": 0.9471, "step": 9130 }, { "epoch": 0.5241264559068219, "grad_norm": 0.28125, "learning_rate": 0.00010902035202902798, "loss": 0.9856, "step": 9135 }, { "epoch": 0.5244133340983419, "grad_norm": 0.255859375, "learning_rate": 0.00010892061574010972, "loss": 1.0034, "step": 9140 }, { "epoch": 0.5247002122898617, "grad_norm": 0.28125, "learning_rate": 0.00010882087050557803, "loss": 0.9817, "step": 9145 }, { "epoch": 0.5249870904813816, "grad_norm": 0.291015625, "learning_rate": 0.00010872111642545759, "loss": 0.9766, "step": 9150 }, { "epoch": 0.5252739686729014, "grad_norm": 0.26171875, "learning_rate": 0.00010862135359978205, "loss": 0.9329, "step": 9155 }, { "epoch": 0.5255608468644214, "grad_norm": 0.251953125, "learning_rate": 0.00010852158212859378, "loss": 0.9674, "step": 9160 }, { "epoch": 0.5258477250559412, "grad_norm": 0.265625, "learning_rate": 0.00010842180211194384, "loss": 0.9627, "step": 9165 }, { "epoch": 0.5261346032474611, "grad_norm": 0.283203125, "learning_rate": 0.00010832201364989186, "loss": 0.8957, "step": 9170 }, { "epoch": 0.526421481438981, "grad_norm": 0.265625, "learning_rate": 0.00010822221684250593, "loss": 0.9555, "step": 9175 }, { "epoch": 0.5267083596305009, "grad_norm": 0.27734375, "learning_rate": 0.00010812241178986254, "loss": 0.9301, "step": 9180 }, { "epoch": 0.5269952378220207, "grad_norm": 0.265625, "learning_rate": 0.00010802259859204635, "loss": 0.9894, "step": 9185 }, { "epoch": 0.5272821160135407, "grad_norm": 0.375, "learning_rate": 0.00010792277734915033, "loss": 0.9212, "step": 9190 }, { "epoch": 0.5275689942050605, "grad_norm": 0.2578125, "learning_rate": 0.0001078229481612754, "loss": 0.9731, "step": 9195 }, { "epoch": 0.5278558723965804, "grad_norm": 0.2734375, "learning_rate": 0.00010772311112853053, "loss": 1.0153, "step": 9200 }, { "epoch": 0.5281427505881003, "grad_norm": 0.271484375, "learning_rate": 0.00010762326635103251, "loss": 1.0092, "step": 9205 }, { "epoch": 0.5284296287796202, "grad_norm": 0.28125, "learning_rate": 0.00010752341392890587, "loss": 0.997, "step": 9210 }, { "epoch": 0.52871650697114, "grad_norm": 0.26953125, "learning_rate": 0.00010742355396228287, "loss": 0.9526, "step": 9215 }, { "epoch": 0.52900338516266, "grad_norm": 0.263671875, "learning_rate": 0.00010732368655130333, "loss": 0.8901, "step": 9220 }, { "epoch": 0.5292902633541798, "grad_norm": 0.283203125, "learning_rate": 0.00010722381179611449, "loss": 1.0495, "step": 9225 }, { "epoch": 0.5295771415456997, "grad_norm": 0.28125, "learning_rate": 0.000107123929796871, "loss": 0.8588, "step": 9230 }, { "epoch": 0.5298640197372195, "grad_norm": 0.267578125, "learning_rate": 0.0001070240406537347, "loss": 0.9422, "step": 9235 }, { "epoch": 0.5301508979287395, "grad_norm": 0.265625, "learning_rate": 0.00010692414446687471, "loss": 0.9645, "step": 9240 }, { "epoch": 0.5304377761202593, "grad_norm": 0.275390625, "learning_rate": 0.0001068242413364671, "loss": 0.9756, "step": 9245 }, { "epoch": 0.5307246543117792, "grad_norm": 0.291015625, "learning_rate": 0.00010672433136269499, "loss": 0.9063, "step": 9250 }, { "epoch": 0.5310115325032991, "grad_norm": 0.302734375, "learning_rate": 0.00010662441464574833, "loss": 0.9121, "step": 9255 }, { "epoch": 0.531298410694819, "grad_norm": 0.296875, "learning_rate": 0.00010652449128582376, "loss": 0.9811, "step": 9260 }, { "epoch": 0.5315852888863388, "grad_norm": 0.26171875, "learning_rate": 0.00010642456138312473, "loss": 0.9502, "step": 9265 }, { "epoch": 0.5318721670778588, "grad_norm": 0.26953125, "learning_rate": 0.00010632462503786114, "loss": 0.9152, "step": 9270 }, { "epoch": 0.5321590452693786, "grad_norm": 0.265625, "learning_rate": 0.00010622468235024936, "loss": 0.9361, "step": 9275 }, { "epoch": 0.5324459234608985, "grad_norm": 0.28515625, "learning_rate": 0.00010612473342051219, "loss": 0.8926, "step": 9280 }, { "epoch": 0.5327328016524184, "grad_norm": 0.25, "learning_rate": 0.00010602477834887858, "loss": 0.8839, "step": 9285 }, { "epoch": 0.5330196798439383, "grad_norm": 0.255859375, "learning_rate": 0.00010592481723558374, "loss": 0.9026, "step": 9290 }, { "epoch": 0.5333065580354581, "grad_norm": 0.267578125, "learning_rate": 0.00010582485018086891, "loss": 0.9264, "step": 9295 }, { "epoch": 0.5335934362269781, "grad_norm": 0.263671875, "learning_rate": 0.00010572487728498127, "loss": 0.9399, "step": 9300 }, { "epoch": 0.5338803144184979, "grad_norm": 0.275390625, "learning_rate": 0.00010562489864817382, "loss": 0.9237, "step": 9305 }, { "epoch": 0.5341671926100178, "grad_norm": 0.267578125, "learning_rate": 0.00010552491437070537, "loss": 0.9861, "step": 9310 }, { "epoch": 0.5344540708015376, "grad_norm": 0.291015625, "learning_rate": 0.00010542492455284043, "loss": 0.944, "step": 9315 }, { "epoch": 0.5347409489930576, "grad_norm": 0.25390625, "learning_rate": 0.00010532492929484898, "loss": 0.9637, "step": 9320 }, { "epoch": 0.5350278271845774, "grad_norm": 0.2578125, "learning_rate": 0.00010522492869700648, "loss": 0.9383, "step": 9325 }, { "epoch": 0.5353147053760973, "grad_norm": 0.2578125, "learning_rate": 0.00010512492285959382, "loss": 0.9444, "step": 9330 }, { "epoch": 0.5356015835676172, "grad_norm": 0.28515625, "learning_rate": 0.00010502491188289695, "loss": 0.9563, "step": 9335 }, { "epoch": 0.5358884617591371, "grad_norm": 0.265625, "learning_rate": 0.00010492489586720724, "loss": 0.9869, "step": 9340 }, { "epoch": 0.5361753399506569, "grad_norm": 0.271484375, "learning_rate": 0.00010482487491282089, "loss": 0.874, "step": 9345 }, { "epoch": 0.5364622181421769, "grad_norm": 0.287109375, "learning_rate": 0.00010472484912003913, "loss": 1.016, "step": 9350 }, { "epoch": 0.5367490963336967, "grad_norm": 0.2734375, "learning_rate": 0.00010462481858916812, "loss": 0.9813, "step": 9355 }, { "epoch": 0.5370359745252166, "grad_norm": 0.287109375, "learning_rate": 0.0001045247834205186, "loss": 0.9562, "step": 9360 }, { "epoch": 0.5373228527167365, "grad_norm": 0.259765625, "learning_rate": 0.00010442474371440618, "loss": 0.9599, "step": 9365 }, { "epoch": 0.5376097309082564, "grad_norm": 0.26171875, "learning_rate": 0.00010432469957115083, "loss": 0.9684, "step": 9370 }, { "epoch": 0.5378966090997762, "grad_norm": 0.30859375, "learning_rate": 0.00010422465109107702, "loss": 0.982, "step": 9375 }, { "epoch": 0.5381834872912962, "grad_norm": 0.287109375, "learning_rate": 0.00010412459837451367, "loss": 1.018, "step": 9380 }, { "epoch": 0.538470365482816, "grad_norm": 0.275390625, "learning_rate": 0.00010402454152179377, "loss": 0.9333, "step": 9385 }, { "epoch": 0.5387572436743359, "grad_norm": 0.28125, "learning_rate": 0.00010392448063325463, "loss": 0.9124, "step": 9390 }, { "epoch": 0.5390441218658557, "grad_norm": 0.283203125, "learning_rate": 0.00010382441580923752, "loss": 0.978, "step": 9395 }, { "epoch": 0.5393310000573757, "grad_norm": 0.287109375, "learning_rate": 0.00010372434715008763, "loss": 0.9982, "step": 9400 }, { "epoch": 0.5396178782488955, "grad_norm": 0.25, "learning_rate": 0.00010362427475615413, "loss": 0.9829, "step": 9405 }, { "epoch": 0.5399047564404154, "grad_norm": 0.28125, "learning_rate": 0.00010352419872778971, "loss": 0.8918, "step": 9410 }, { "epoch": 0.5401916346319353, "grad_norm": 0.263671875, "learning_rate": 0.00010342411916535093, "loss": 0.9446, "step": 9415 }, { "epoch": 0.5404785128234552, "grad_norm": 0.263671875, "learning_rate": 0.00010332403616919779, "loss": 0.963, "step": 9420 }, { "epoch": 0.540765391014975, "grad_norm": 0.298828125, "learning_rate": 0.00010322394983969368, "loss": 0.9848, "step": 9425 }, { "epoch": 0.541052269206495, "grad_norm": 0.333984375, "learning_rate": 0.0001031238602772055, "loss": 0.921, "step": 9430 }, { "epoch": 0.5413391473980148, "grad_norm": 0.275390625, "learning_rate": 0.00010302376758210319, "loss": 0.9669, "step": 9435 }, { "epoch": 0.5416260255895347, "grad_norm": 0.28125, "learning_rate": 0.00010292367185475997, "loss": 0.979, "step": 9440 }, { "epoch": 0.5419129037810546, "grad_norm": 0.26171875, "learning_rate": 0.00010282357319555207, "loss": 0.963, "step": 9445 }, { "epoch": 0.5421997819725745, "grad_norm": 0.259765625, "learning_rate": 0.00010272347170485863, "loss": 0.977, "step": 9450 }, { "epoch": 0.5424866601640943, "grad_norm": 0.26953125, "learning_rate": 0.00010262336748306165, "loss": 0.9735, "step": 9455 }, { "epoch": 0.5427735383556143, "grad_norm": 0.263671875, "learning_rate": 0.0001025232606305459, "loss": 0.9852, "step": 9460 }, { "epoch": 0.5430604165471341, "grad_norm": 0.26171875, "learning_rate": 0.00010242315124769872, "loss": 0.9374, "step": 9465 }, { "epoch": 0.543347294738654, "grad_norm": 0.28125, "learning_rate": 0.00010232303943491004, "loss": 1.0392, "step": 9470 }, { "epoch": 0.5436341729301738, "grad_norm": 0.26171875, "learning_rate": 0.00010222292529257217, "loss": 0.9898, "step": 9475 }, { "epoch": 0.5439210511216938, "grad_norm": 0.28125, "learning_rate": 0.00010212280892107988, "loss": 0.956, "step": 9480 }, { "epoch": 0.5442079293132136, "grad_norm": 0.26953125, "learning_rate": 0.00010202269042083001, "loss": 0.9132, "step": 9485 }, { "epoch": 0.5444948075047334, "grad_norm": 0.30859375, "learning_rate": 0.00010192256989222169, "loss": 0.9389, "step": 9490 }, { "epoch": 0.5447816856962534, "grad_norm": 0.267578125, "learning_rate": 0.00010182244743565594, "loss": 0.8723, "step": 9495 }, { "epoch": 0.5450685638877732, "grad_norm": 0.322265625, "learning_rate": 0.0001017223231515358, "loss": 0.9266, "step": 9500 }, { "epoch": 0.5453554420792931, "grad_norm": 0.271484375, "learning_rate": 0.00010162219714026617, "loss": 0.9138, "step": 9505 }, { "epoch": 0.545642320270813, "grad_norm": 0.275390625, "learning_rate": 0.0001015220695022536, "loss": 0.9252, "step": 9510 }, { "epoch": 0.5459291984623329, "grad_norm": 0.25390625, "learning_rate": 0.00010142194033790633, "loss": 0.908, "step": 9515 }, { "epoch": 0.5462160766538527, "grad_norm": 0.265625, "learning_rate": 0.0001013218097476341, "loss": 0.9012, "step": 9520 }, { "epoch": 0.5465029548453727, "grad_norm": 0.296875, "learning_rate": 0.00010122167783184806, "loss": 0.9661, "step": 9525 }, { "epoch": 0.5467898330368925, "grad_norm": 0.287109375, "learning_rate": 0.00010112154469096078, "loss": 0.9656, "step": 9530 }, { "epoch": 0.5470767112284124, "grad_norm": 0.2734375, "learning_rate": 0.00010102141042538597, "loss": 0.9999, "step": 9535 }, { "epoch": 0.5473635894199324, "grad_norm": 0.33984375, "learning_rate": 0.0001009212751355385, "loss": 0.8986, "step": 9540 }, { "epoch": 0.5476504676114522, "grad_norm": 0.251953125, "learning_rate": 0.00010082113892183423, "loss": 0.9011, "step": 9545 }, { "epoch": 0.547937345802972, "grad_norm": 0.248046875, "learning_rate": 0.00010072100188469002, "loss": 0.9079, "step": 9550 }, { "epoch": 0.5482242239944919, "grad_norm": 0.259765625, "learning_rate": 0.00010062086412452352, "loss": 0.9173, "step": 9555 }, { "epoch": 0.5485111021860118, "grad_norm": 0.2578125, "learning_rate": 0.00010052072574175306, "loss": 0.9978, "step": 9560 }, { "epoch": 0.5487979803775317, "grad_norm": 0.267578125, "learning_rate": 0.00010042058683679769, "loss": 0.9124, "step": 9565 }, { "epoch": 0.5490848585690515, "grad_norm": 0.26953125, "learning_rate": 0.00010032044751007685, "loss": 0.9591, "step": 9570 }, { "epoch": 0.5493717367605715, "grad_norm": 0.265625, "learning_rate": 0.00010022030786201058, "loss": 0.9951, "step": 9575 }, { "epoch": 0.5496586149520913, "grad_norm": 0.26953125, "learning_rate": 0.00010012016799301907, "loss": 0.9088, "step": 9580 }, { "epoch": 0.5499454931436112, "grad_norm": 0.259765625, "learning_rate": 0.00010002002800352281, "loss": 0.9513, "step": 9585 }, { "epoch": 0.5502323713351311, "grad_norm": 0.255859375, "learning_rate": 9.991988799394245e-05, "loss": 0.8671, "step": 9590 }, { "epoch": 0.550519249526651, "grad_norm": 0.26953125, "learning_rate": 9.981974806469858e-05, "loss": 0.9622, "step": 9595 }, { "epoch": 0.5508061277181708, "grad_norm": 0.26953125, "learning_rate": 9.971960831621173e-05, "loss": 0.9097, "step": 9600 }, { "epoch": 0.5510930059096908, "grad_norm": 0.28515625, "learning_rate": 9.961946884890232e-05, "loss": 1.0333, "step": 9605 }, { "epoch": 0.5513798841012106, "grad_norm": 0.275390625, "learning_rate": 9.951932976319041e-05, "loss": 0.9286, "step": 9610 }, { "epoch": 0.5516667622927305, "grad_norm": 0.26171875, "learning_rate": 9.941919115949565e-05, "loss": 0.8962, "step": 9615 }, { "epoch": 0.5519536404842504, "grad_norm": 0.30078125, "learning_rate": 9.931905313823733e-05, "loss": 1.0276, "step": 9620 }, { "epoch": 0.5522405186757703, "grad_norm": 0.271484375, "learning_rate": 9.921891579983404e-05, "loss": 0.9283, "step": 9625 }, { "epoch": 0.5525273968672901, "grad_norm": 0.265625, "learning_rate": 9.911877924470373e-05, "loss": 0.9175, "step": 9630 }, { "epoch": 0.55281427505881, "grad_norm": 0.267578125, "learning_rate": 9.901864357326358e-05, "loss": 0.9713, "step": 9635 }, { "epoch": 0.5531011532503299, "grad_norm": 0.25390625, "learning_rate": 9.891850888592987e-05, "loss": 0.9024, "step": 9640 }, { "epoch": 0.5533880314418498, "grad_norm": 0.330078125, "learning_rate": 9.881837528311787e-05, "loss": 0.9778, "step": 9645 }, { "epoch": 0.5536749096333696, "grad_norm": 0.263671875, "learning_rate": 9.871824286524175e-05, "loss": 1.0332, "step": 9650 }, { "epoch": 0.5539617878248896, "grad_norm": 0.2734375, "learning_rate": 9.861811173271459e-05, "loss": 0.9709, "step": 9655 }, { "epoch": 0.5542486660164094, "grad_norm": 0.30078125, "learning_rate": 9.851798198594809e-05, "loss": 0.9844, "step": 9660 }, { "epoch": 0.5545355442079293, "grad_norm": 0.275390625, "learning_rate": 9.841785372535254e-05, "loss": 0.9886, "step": 9665 }, { "epoch": 0.5548224223994492, "grad_norm": 0.2734375, "learning_rate": 9.831772705133685e-05, "loss": 0.9194, "step": 9670 }, { "epoch": 0.5551093005909691, "grad_norm": 0.2578125, "learning_rate": 9.821760206430825e-05, "loss": 1.0253, "step": 9675 }, { "epoch": 0.5553961787824889, "grad_norm": 0.271484375, "learning_rate": 9.811747886467226e-05, "loss": 0.8885, "step": 9680 }, { "epoch": 0.5556830569740089, "grad_norm": 0.2734375, "learning_rate": 9.801735755283273e-05, "loss": 0.9267, "step": 9685 }, { "epoch": 0.5559699351655287, "grad_norm": 0.25390625, "learning_rate": 9.791723822919149e-05, "loss": 0.9513, "step": 9690 }, { "epoch": 0.5562568133570486, "grad_norm": 0.263671875, "learning_rate": 9.781712099414842e-05, "loss": 0.9175, "step": 9695 }, { "epoch": 0.5565436915485685, "grad_norm": 0.267578125, "learning_rate": 9.771700594810128e-05, "loss": 0.9682, "step": 9700 }, { "epoch": 0.5568305697400884, "grad_norm": 0.2578125, "learning_rate": 9.761689319144573e-05, "loss": 0.8967, "step": 9705 }, { "epoch": 0.5571174479316082, "grad_norm": 0.265625, "learning_rate": 9.751678282457501e-05, "loss": 0.8832, "step": 9710 }, { "epoch": 0.5574043261231281, "grad_norm": 0.263671875, "learning_rate": 9.741667494788003e-05, "loss": 0.9154, "step": 9715 }, { "epoch": 0.557691204314648, "grad_norm": 0.259765625, "learning_rate": 9.731656966174924e-05, "loss": 0.9135, "step": 9720 }, { "epoch": 0.5579780825061679, "grad_norm": 0.267578125, "learning_rate": 9.721646706656839e-05, "loss": 0.9047, "step": 9725 }, { "epoch": 0.5582649606976877, "grad_norm": 0.271484375, "learning_rate": 9.71163672627206e-05, "loss": 1.0324, "step": 9730 }, { "epoch": 0.5585518388892077, "grad_norm": 0.2890625, "learning_rate": 9.70162703505862e-05, "loss": 0.8719, "step": 9735 }, { "epoch": 0.5588387170807275, "grad_norm": 0.26953125, "learning_rate": 9.69161764305426e-05, "loss": 0.9669, "step": 9740 }, { "epoch": 0.5591255952722474, "grad_norm": 0.275390625, "learning_rate": 9.681608560296413e-05, "loss": 0.9382, "step": 9745 }, { "epoch": 0.5594124734637673, "grad_norm": 0.267578125, "learning_rate": 9.671599796822223e-05, "loss": 0.9281, "step": 9750 }, { "epoch": 0.5596993516552872, "grad_norm": 0.271484375, "learning_rate": 9.661591362668491e-05, "loss": 0.9439, "step": 9755 }, { "epoch": 0.559986229846807, "grad_norm": 0.283203125, "learning_rate": 9.651583267871697e-05, "loss": 0.9541, "step": 9760 }, { "epoch": 0.560273108038327, "grad_norm": 0.2890625, "learning_rate": 9.641575522467984e-05, "loss": 0.974, "step": 9765 }, { "epoch": 0.5605599862298468, "grad_norm": 0.328125, "learning_rate": 9.631568136493142e-05, "loss": 0.9612, "step": 9770 }, { "epoch": 0.5608468644213667, "grad_norm": 0.26953125, "learning_rate": 9.621561119982598e-05, "loss": 0.8891, "step": 9775 }, { "epoch": 0.5611337426128866, "grad_norm": 0.275390625, "learning_rate": 9.61155448297141e-05, "loss": 1.0655, "step": 9780 }, { "epoch": 0.5614206208044065, "grad_norm": 0.26171875, "learning_rate": 9.60154823549426e-05, "loss": 0.9472, "step": 9785 }, { "epoch": 0.5617074989959263, "grad_norm": 0.25390625, "learning_rate": 9.591542387585434e-05, "loss": 0.9663, "step": 9790 }, { "epoch": 0.5619943771874462, "grad_norm": 0.275390625, "learning_rate": 9.581536949278814e-05, "loss": 0.9145, "step": 9795 }, { "epoch": 0.5622812553789661, "grad_norm": 0.271484375, "learning_rate": 9.571531930607884e-05, "loss": 0.9514, "step": 9800 }, { "epoch": 0.562568133570486, "grad_norm": 0.279296875, "learning_rate": 9.561527341605691e-05, "loss": 0.9254, "step": 9805 }, { "epoch": 0.5628550117620058, "grad_norm": 0.28125, "learning_rate": 9.551523192304863e-05, "loss": 0.9761, "step": 9810 }, { "epoch": 0.5631418899535258, "grad_norm": 0.265625, "learning_rate": 9.541519492737586e-05, "loss": 0.959, "step": 9815 }, { "epoch": 0.5634287681450456, "grad_norm": 0.265625, "learning_rate": 9.531516252935588e-05, "loss": 0.993, "step": 9820 }, { "epoch": 0.5637156463365655, "grad_norm": 0.251953125, "learning_rate": 9.521513482930144e-05, "loss": 0.8727, "step": 9825 }, { "epoch": 0.5640025245280854, "grad_norm": 0.2734375, "learning_rate": 9.511511192752049e-05, "loss": 0.9709, "step": 9830 }, { "epoch": 0.5642894027196053, "grad_norm": 0.263671875, "learning_rate": 9.501509392431627e-05, "loss": 0.9213, "step": 9835 }, { "epoch": 0.5645762809111251, "grad_norm": 0.259765625, "learning_rate": 9.491508091998707e-05, "loss": 0.9336, "step": 9840 }, { "epoch": 0.5648631591026451, "grad_norm": 0.275390625, "learning_rate": 9.481507301482604e-05, "loss": 0.9194, "step": 9845 }, { "epoch": 0.5651500372941649, "grad_norm": 0.322265625, "learning_rate": 9.471507030912151e-05, "loss": 0.8929, "step": 9850 }, { "epoch": 0.5654369154856848, "grad_norm": 0.26171875, "learning_rate": 9.46150729031563e-05, "loss": 0.9276, "step": 9855 }, { "epoch": 0.5657237936772047, "grad_norm": 0.296875, "learning_rate": 9.451508089720803e-05, "loss": 0.9965, "step": 9860 }, { "epoch": 0.5660106718687246, "grad_norm": 0.294921875, "learning_rate": 9.441509439154895e-05, "loss": 0.9921, "step": 9865 }, { "epoch": 0.5662975500602444, "grad_norm": 0.265625, "learning_rate": 9.431511348644575e-05, "loss": 0.979, "step": 9870 }, { "epoch": 0.5665844282517642, "grad_norm": 0.26953125, "learning_rate": 9.421513828215946e-05, "loss": 0.9518, "step": 9875 }, { "epoch": 0.5668713064432842, "grad_norm": 0.267578125, "learning_rate": 9.41151688789455e-05, "loss": 0.9859, "step": 9880 }, { "epoch": 0.567158184634804, "grad_norm": 0.259765625, "learning_rate": 9.401520537705339e-05, "loss": 0.9291, "step": 9885 }, { "epoch": 0.5674450628263239, "grad_norm": 0.259765625, "learning_rate": 9.391524787672676e-05, "loss": 0.8935, "step": 9890 }, { "epoch": 0.5677319410178439, "grad_norm": 0.279296875, "learning_rate": 9.381529647820314e-05, "loss": 0.9242, "step": 9895 }, { "epoch": 0.5680188192093637, "grad_norm": 0.29296875, "learning_rate": 9.371535128171416e-05, "loss": 0.9461, "step": 9900 }, { "epoch": 0.5683056974008835, "grad_norm": 0.29296875, "learning_rate": 9.361541238748496e-05, "loss": 0.9622, "step": 9905 }, { "epoch": 0.5685925755924035, "grad_norm": 0.26953125, "learning_rate": 9.351547989573453e-05, "loss": 0.9434, "step": 9910 }, { "epoch": 0.5688794537839234, "grad_norm": 0.26953125, "learning_rate": 9.341555390667542e-05, "loss": 0.9533, "step": 9915 }, { "epoch": 0.5691663319754432, "grad_norm": 0.259765625, "learning_rate": 9.331563452051362e-05, "loss": 0.9506, "step": 9920 }, { "epoch": 0.5694532101669632, "grad_norm": 0.251953125, "learning_rate": 9.321572183744849e-05, "loss": 0.9087, "step": 9925 }, { "epoch": 0.569740088358483, "grad_norm": 0.263671875, "learning_rate": 9.311581595767273e-05, "loss": 0.9403, "step": 9930 }, { "epoch": 0.5700269665500028, "grad_norm": 0.255859375, "learning_rate": 9.301591698137217e-05, "loss": 0.9441, "step": 9935 }, { "epoch": 0.5703138447415228, "grad_norm": 0.26953125, "learning_rate": 9.29160250087257e-05, "loss": 0.9683, "step": 9940 }, { "epoch": 0.5706007229330426, "grad_norm": 0.3046875, "learning_rate": 9.281614013990526e-05, "loss": 1.0119, "step": 9945 }, { "epoch": 0.5708876011245625, "grad_norm": 0.26953125, "learning_rate": 9.271626247507561e-05, "loss": 0.9057, "step": 9950 }, { "epoch": 0.5711744793160823, "grad_norm": 0.267578125, "learning_rate": 9.261639211439427e-05, "loss": 0.9917, "step": 9955 }, { "epoch": 0.5714613575076023, "grad_norm": 0.271484375, "learning_rate": 9.251652915801144e-05, "loss": 0.9661, "step": 9960 }, { "epoch": 0.5717482356991221, "grad_norm": 0.265625, "learning_rate": 9.241667370607e-05, "loss": 0.9123, "step": 9965 }, { "epoch": 0.572035113890642, "grad_norm": 0.2578125, "learning_rate": 9.231682585870514e-05, "loss": 0.9157, "step": 9970 }, { "epoch": 0.572321992082162, "grad_norm": 0.265625, "learning_rate": 9.221698571604453e-05, "loss": 0.9181, "step": 9975 }, { "epoch": 0.5726088702736818, "grad_norm": 0.26953125, "learning_rate": 9.211715337820811e-05, "loss": 0.952, "step": 9980 }, { "epoch": 0.5728957484652016, "grad_norm": 0.255859375, "learning_rate": 9.201732894530797e-05, "loss": 0.9692, "step": 9985 }, { "epoch": 0.5731826266567216, "grad_norm": 0.2734375, "learning_rate": 9.191751251744823e-05, "loss": 0.9705, "step": 9990 }, { "epoch": 0.5734695048482414, "grad_norm": 0.26171875, "learning_rate": 9.181770419472509e-05, "loss": 0.9723, "step": 9995 }, { "epoch": 0.5737563830397613, "grad_norm": 0.265625, "learning_rate": 9.171790407722656e-05, "loss": 0.9305, "step": 10000 }, { "epoch": 0.5740432612312812, "grad_norm": 0.29296875, "learning_rate": 9.161811226503233e-05, "loss": 0.8814, "step": 10005 }, { "epoch": 0.5743301394228011, "grad_norm": 0.26171875, "learning_rate": 9.151832885821396e-05, "loss": 0.9779, "step": 10010 }, { "epoch": 0.5746170176143209, "grad_norm": 0.263671875, "learning_rate": 9.141855395683444e-05, "loss": 0.9638, "step": 10015 }, { "epoch": 0.5749038958058409, "grad_norm": 0.28125, "learning_rate": 9.131878766094822e-05, "loss": 0.9235, "step": 10020 }, { "epoch": 0.5751907739973607, "grad_norm": 0.26953125, "learning_rate": 9.121903007060121e-05, "loss": 0.9527, "step": 10025 }, { "epoch": 0.5754776521888806, "grad_norm": 0.2421875, "learning_rate": 9.111928128583054e-05, "loss": 0.9512, "step": 10030 }, { "epoch": 0.5757645303804004, "grad_norm": 0.326171875, "learning_rate": 9.101954140666451e-05, "loss": 0.9147, "step": 10035 }, { "epoch": 0.5760514085719204, "grad_norm": 0.26171875, "learning_rate": 9.091981053312247e-05, "loss": 0.8988, "step": 10040 }, { "epoch": 0.5763382867634402, "grad_norm": 0.251953125, "learning_rate": 9.082008876521481e-05, "loss": 0.9102, "step": 10045 }, { "epoch": 0.5766251649549601, "grad_norm": 0.27734375, "learning_rate": 9.072037620294275e-05, "loss": 0.9565, "step": 10050 }, { "epoch": 0.57691204314648, "grad_norm": 0.314453125, "learning_rate": 9.06206729462982e-05, "loss": 0.9252, "step": 10055 }, { "epoch": 0.5771989213379999, "grad_norm": 0.2490234375, "learning_rate": 9.052097909526388e-05, "loss": 0.9098, "step": 10060 }, { "epoch": 0.5774857995295197, "grad_norm": 0.3125, "learning_rate": 9.042129474981297e-05, "loss": 0.9509, "step": 10065 }, { "epoch": 0.5777726777210397, "grad_norm": 0.25390625, "learning_rate": 9.032162000990914e-05, "loss": 0.8907, "step": 10070 }, { "epoch": 0.5780595559125595, "grad_norm": 0.265625, "learning_rate": 9.02219549755065e-05, "loss": 0.998, "step": 10075 }, { "epoch": 0.5783464341040794, "grad_norm": 0.275390625, "learning_rate": 9.012229974654932e-05, "loss": 0.9134, "step": 10080 }, { "epoch": 0.5786333122955993, "grad_norm": 0.267578125, "learning_rate": 9.002265442297212e-05, "loss": 0.9198, "step": 10085 }, { "epoch": 0.5789201904871192, "grad_norm": 0.2578125, "learning_rate": 8.99230191046994e-05, "loss": 0.9489, "step": 10090 }, { "epoch": 0.579207068678639, "grad_norm": 0.24609375, "learning_rate": 8.982339389164575e-05, "loss": 0.9774, "step": 10095 }, { "epoch": 0.579493946870159, "grad_norm": 0.26953125, "learning_rate": 8.972377888371555e-05, "loss": 0.922, "step": 10100 }, { "epoch": 0.5797808250616788, "grad_norm": 0.265625, "learning_rate": 8.962417418080285e-05, "loss": 0.9175, "step": 10105 }, { "epoch": 0.5800677032531987, "grad_norm": 0.26953125, "learning_rate": 8.952457988279161e-05, "loss": 0.9818, "step": 10110 }, { "epoch": 0.5803545814447185, "grad_norm": 0.275390625, "learning_rate": 8.942499608955516e-05, "loss": 0.99, "step": 10115 }, { "epoch": 0.5806414596362385, "grad_norm": 0.255859375, "learning_rate": 8.93254229009563e-05, "loss": 0.9201, "step": 10120 }, { "epoch": 0.5809283378277583, "grad_norm": 0.24609375, "learning_rate": 8.922586041684732e-05, "loss": 0.9708, "step": 10125 }, { "epoch": 0.5812152160192782, "grad_norm": 0.244140625, "learning_rate": 8.912630873706967e-05, "loss": 0.9485, "step": 10130 }, { "epoch": 0.5815020942107981, "grad_norm": 0.271484375, "learning_rate": 8.902676796145403e-05, "loss": 0.9604, "step": 10135 }, { "epoch": 0.581788972402318, "grad_norm": 0.263671875, "learning_rate": 8.892723818982001e-05, "loss": 0.9469, "step": 10140 }, { "epoch": 0.5820758505938378, "grad_norm": 0.2451171875, "learning_rate": 8.882771952197642e-05, "loss": 0.9425, "step": 10145 }, { "epoch": 0.5823627287853578, "grad_norm": 0.27734375, "learning_rate": 8.872821205772074e-05, "loss": 0.932, "step": 10150 }, { "epoch": 0.5826496069768776, "grad_norm": 0.251953125, "learning_rate": 8.862871589683924e-05, "loss": 0.9634, "step": 10155 }, { "epoch": 0.5829364851683975, "grad_norm": 0.2578125, "learning_rate": 8.8529231139107e-05, "loss": 0.9211, "step": 10160 }, { "epoch": 0.5832233633599174, "grad_norm": 0.27734375, "learning_rate": 8.842975788428748e-05, "loss": 0.9365, "step": 10165 }, { "epoch": 0.5835102415514373, "grad_norm": 0.26171875, "learning_rate": 8.833029623213267e-05, "loss": 0.8496, "step": 10170 }, { "epoch": 0.5837971197429571, "grad_norm": 0.271484375, "learning_rate": 8.823084628238298e-05, "loss": 0.9707, "step": 10175 }, { "epoch": 0.5840839979344771, "grad_norm": 0.2578125, "learning_rate": 8.813140813476704e-05, "loss": 0.9237, "step": 10180 }, { "epoch": 0.5843708761259969, "grad_norm": 0.28125, "learning_rate": 8.803198188900161e-05, "loss": 0.9181, "step": 10185 }, { "epoch": 0.5846577543175168, "grad_norm": 0.2578125, "learning_rate": 8.79325676447916e-05, "loss": 0.9793, "step": 10190 }, { "epoch": 0.5849446325090366, "grad_norm": 0.263671875, "learning_rate": 8.783316550182982e-05, "loss": 0.9985, "step": 10195 }, { "epoch": 0.5852315107005566, "grad_norm": 0.244140625, "learning_rate": 8.773377555979699e-05, "loss": 0.9507, "step": 10200 }, { "epoch": 0.5855183888920764, "grad_norm": 0.29296875, "learning_rate": 8.763439791836145e-05, "loss": 0.8859, "step": 10205 }, { "epoch": 0.5858052670835963, "grad_norm": 0.283203125, "learning_rate": 8.753503267717948e-05, "loss": 0.9309, "step": 10210 }, { "epoch": 0.5860921452751162, "grad_norm": 0.26953125, "learning_rate": 8.743567993589466e-05, "loss": 0.9578, "step": 10215 }, { "epoch": 0.5863790234666361, "grad_norm": 0.267578125, "learning_rate": 8.733633979413817e-05, "loss": 1.0202, "step": 10220 }, { "epoch": 0.5866659016581559, "grad_norm": 0.271484375, "learning_rate": 8.723701235152854e-05, "loss": 0.986, "step": 10225 }, { "epoch": 0.5869527798496759, "grad_norm": 0.2578125, "learning_rate": 8.713769770767155e-05, "loss": 0.8414, "step": 10230 }, { "epoch": 0.5872396580411957, "grad_norm": 0.265625, "learning_rate": 8.703839596216012e-05, "loss": 1.0018, "step": 10235 }, { "epoch": 0.5875265362327156, "grad_norm": 0.271484375, "learning_rate": 8.69391072145743e-05, "loss": 0.9342, "step": 10240 }, { "epoch": 0.5878134144242355, "grad_norm": 0.255859375, "learning_rate": 8.683983156448104e-05, "loss": 0.8877, "step": 10245 }, { "epoch": 0.5881002926157554, "grad_norm": 0.291015625, "learning_rate": 8.67405691114342e-05, "loss": 0.9437, "step": 10250 }, { "epoch": 0.5883871708072752, "grad_norm": 0.259765625, "learning_rate": 8.664131995497439e-05, "loss": 0.9819, "step": 10255 }, { "epoch": 0.5886740489987952, "grad_norm": 0.271484375, "learning_rate": 8.654208419462893e-05, "loss": 0.9168, "step": 10260 }, { "epoch": 0.588960927190315, "grad_norm": 0.25390625, "learning_rate": 8.644286192991158e-05, "loss": 0.9045, "step": 10265 }, { "epoch": 0.5892478053818349, "grad_norm": 0.2734375, "learning_rate": 8.634365326032265e-05, "loss": 0.9478, "step": 10270 }, { "epoch": 0.5895346835733547, "grad_norm": 0.263671875, "learning_rate": 8.62444582853489e-05, "loss": 0.9661, "step": 10275 }, { "epoch": 0.5898215617648747, "grad_norm": 0.248046875, "learning_rate": 8.614527710446322e-05, "loss": 0.9886, "step": 10280 }, { "epoch": 0.5901084399563945, "grad_norm": 0.263671875, "learning_rate": 8.604610981712471e-05, "loss": 0.9704, "step": 10285 }, { "epoch": 0.5903953181479143, "grad_norm": 0.271484375, "learning_rate": 8.594695652277858e-05, "loss": 0.9389, "step": 10290 }, { "epoch": 0.5906821963394343, "grad_norm": 0.314453125, "learning_rate": 8.584781732085598e-05, "loss": 0.9359, "step": 10295 }, { "epoch": 0.5909690745309542, "grad_norm": 0.298828125, "learning_rate": 8.574869231077383e-05, "loss": 1.0032, "step": 10300 }, { "epoch": 0.591255952722474, "grad_norm": 0.25390625, "learning_rate": 8.564958159193506e-05, "loss": 0.9773, "step": 10305 }, { "epoch": 0.591542830913994, "grad_norm": 0.255859375, "learning_rate": 8.555048526372805e-05, "loss": 0.9306, "step": 10310 }, { "epoch": 0.5918297091055138, "grad_norm": 0.279296875, "learning_rate": 8.545140342552676e-05, "loss": 0.9393, "step": 10315 }, { "epoch": 0.5921165872970336, "grad_norm": 0.27734375, "learning_rate": 8.53523361766908e-05, "loss": 0.9635, "step": 10320 }, { "epoch": 0.5924034654885536, "grad_norm": 0.2734375, "learning_rate": 8.525328361656494e-05, "loss": 0.9303, "step": 10325 }, { "epoch": 0.5926903436800735, "grad_norm": 0.2578125, "learning_rate": 8.515424584447935e-05, "loss": 0.874, "step": 10330 }, { "epoch": 0.5929772218715933, "grad_norm": 0.2734375, "learning_rate": 8.505522295974929e-05, "loss": 0.972, "step": 10335 }, { "epoch": 0.5932641000631133, "grad_norm": 0.275390625, "learning_rate": 8.495621506167519e-05, "loss": 0.9765, "step": 10340 }, { "epoch": 0.5935509782546331, "grad_norm": 0.26953125, "learning_rate": 8.485722224954237e-05, "loss": 1.0924, "step": 10345 }, { "epoch": 0.593837856446153, "grad_norm": 0.279296875, "learning_rate": 8.475824462262096e-05, "loss": 0.9808, "step": 10350 }, { "epoch": 0.5941247346376728, "grad_norm": 0.26953125, "learning_rate": 8.465928228016608e-05, "loss": 0.968, "step": 10355 }, { "epoch": 0.5944116128291927, "grad_norm": 0.265625, "learning_rate": 8.456033532141735e-05, "loss": 0.96, "step": 10360 }, { "epoch": 0.5946984910207126, "grad_norm": 0.287109375, "learning_rate": 8.44614038455989e-05, "loss": 0.9754, "step": 10365 }, { "epoch": 0.5949853692122324, "grad_norm": 0.267578125, "learning_rate": 8.436248795191961e-05, "loss": 0.9828, "step": 10370 }, { "epoch": 0.5952722474037524, "grad_norm": 0.28125, "learning_rate": 8.426358773957243e-05, "loss": 0.9586, "step": 10375 }, { "epoch": 0.5955591255952722, "grad_norm": 0.283203125, "learning_rate": 8.416470330773471e-05, "loss": 0.9082, "step": 10380 }, { "epoch": 0.5958460037867921, "grad_norm": 0.26171875, "learning_rate": 8.406583475556807e-05, "loss": 0.959, "step": 10385 }, { "epoch": 0.596132881978312, "grad_norm": 0.2890625, "learning_rate": 8.396698218221807e-05, "loss": 0.8862, "step": 10390 }, { "epoch": 0.5964197601698319, "grad_norm": 0.265625, "learning_rate": 8.386814568681429e-05, "loss": 0.9277, "step": 10395 }, { "epoch": 0.5967066383613517, "grad_norm": 0.267578125, "learning_rate": 8.376932536847014e-05, "loss": 0.9745, "step": 10400 }, { "epoch": 0.5969935165528717, "grad_norm": 0.271484375, "learning_rate": 8.367052132628294e-05, "loss": 0.973, "step": 10405 }, { "epoch": 0.5972803947443915, "grad_norm": 0.26171875, "learning_rate": 8.35717336593336e-05, "loss": 0.9606, "step": 10410 }, { "epoch": 0.5975672729359114, "grad_norm": 0.255859375, "learning_rate": 8.347296246668653e-05, "loss": 0.8933, "step": 10415 }, { "epoch": 0.5978541511274313, "grad_norm": 0.2578125, "learning_rate": 8.33742078473898e-05, "loss": 0.979, "step": 10420 }, { "epoch": 0.5981410293189512, "grad_norm": 0.26171875, "learning_rate": 8.327546990047471e-05, "loss": 0.9329, "step": 10425 }, { "epoch": 0.598427907510471, "grad_norm": 0.271484375, "learning_rate": 8.317674872495589e-05, "loss": 0.9221, "step": 10430 }, { "epoch": 0.5987147857019909, "grad_norm": 0.26953125, "learning_rate": 8.30780444198312e-05, "loss": 0.9305, "step": 10435 }, { "epoch": 0.5990016638935108, "grad_norm": 0.26953125, "learning_rate": 8.29793570840815e-05, "loss": 1.0169, "step": 10440 }, { "epoch": 0.5992885420850307, "grad_norm": 0.259765625, "learning_rate": 8.288068681667065e-05, "loss": 0.9563, "step": 10445 }, { "epoch": 0.5995754202765505, "grad_norm": 0.255859375, "learning_rate": 8.278203371654549e-05, "loss": 0.9658, "step": 10450 }, { "epoch": 0.5998622984680705, "grad_norm": 0.26953125, "learning_rate": 8.268339788263551e-05, "loss": 0.9455, "step": 10455 }, { "epoch": 0.6001491766595903, "grad_norm": 0.259765625, "learning_rate": 8.2584779413853e-05, "loss": 0.9489, "step": 10460 }, { "epoch": 0.6004360548511102, "grad_norm": 0.26953125, "learning_rate": 8.248617840909268e-05, "loss": 0.9188, "step": 10465 }, { "epoch": 0.6007229330426301, "grad_norm": 0.28125, "learning_rate": 8.238759496723199e-05, "loss": 1.0094, "step": 10470 }, { "epoch": 0.60100981123415, "grad_norm": 0.310546875, "learning_rate": 8.228902918713053e-05, "loss": 0.9488, "step": 10475 }, { "epoch": 0.6012966894256698, "grad_norm": 0.275390625, "learning_rate": 8.21904811676303e-05, "loss": 0.9419, "step": 10480 }, { "epoch": 0.6015835676171898, "grad_norm": 0.259765625, "learning_rate": 8.209195100755551e-05, "loss": 0.9301, "step": 10485 }, { "epoch": 0.6018704458087096, "grad_norm": 0.263671875, "learning_rate": 8.199343880571241e-05, "loss": 1.0017, "step": 10490 }, { "epoch": 0.6021573240002295, "grad_norm": 0.271484375, "learning_rate": 8.189494466088923e-05, "loss": 1.0361, "step": 10495 }, { "epoch": 0.6024442021917494, "grad_norm": 0.2578125, "learning_rate": 8.179646867185617e-05, "loss": 0.9334, "step": 10500 }, { "epoch": 0.6027310803832693, "grad_norm": 0.279296875, "learning_rate": 8.169801093736515e-05, "loss": 1.027, "step": 10505 }, { "epoch": 0.6030179585747891, "grad_norm": 0.2890625, "learning_rate": 8.159957155614974e-05, "loss": 0.9183, "step": 10510 }, { "epoch": 0.603304836766309, "grad_norm": 0.2578125, "learning_rate": 8.15011506269253e-05, "loss": 0.9329, "step": 10515 }, { "epoch": 0.6035917149578289, "grad_norm": 0.279296875, "learning_rate": 8.140274824838849e-05, "loss": 0.9626, "step": 10520 }, { "epoch": 0.6038785931493488, "grad_norm": 0.263671875, "learning_rate": 8.130436451921743e-05, "loss": 0.9523, "step": 10525 }, { "epoch": 0.6041654713408686, "grad_norm": 0.271484375, "learning_rate": 8.120599953807153e-05, "loss": 0.9433, "step": 10530 }, { "epoch": 0.6044523495323886, "grad_norm": 0.271484375, "learning_rate": 8.110765340359145e-05, "loss": 1.0139, "step": 10535 }, { "epoch": 0.6047392277239084, "grad_norm": 0.28515625, "learning_rate": 8.10093262143989e-05, "loss": 0.9758, "step": 10540 }, { "epoch": 0.6050261059154283, "grad_norm": 0.265625, "learning_rate": 8.09110180690966e-05, "loss": 0.9715, "step": 10545 }, { "epoch": 0.6053129841069482, "grad_norm": 0.265625, "learning_rate": 8.08127290662682e-05, "loss": 0.9328, "step": 10550 }, { "epoch": 0.6055998622984681, "grad_norm": 0.28515625, "learning_rate": 8.071445930447815e-05, "loss": 1.0168, "step": 10555 }, { "epoch": 0.6058867404899879, "grad_norm": 0.25390625, "learning_rate": 8.061620888227145e-05, "loss": 0.9434, "step": 10560 }, { "epoch": 0.6061736186815079, "grad_norm": 0.26171875, "learning_rate": 8.051797789817403e-05, "loss": 0.8857, "step": 10565 }, { "epoch": 0.6064604968730277, "grad_norm": 0.2734375, "learning_rate": 8.041976645069207e-05, "loss": 0.9153, "step": 10570 }, { "epoch": 0.6067473750645476, "grad_norm": 0.275390625, "learning_rate": 8.032157463831216e-05, "loss": 0.9559, "step": 10575 }, { "epoch": 0.6070342532560675, "grad_norm": 0.263671875, "learning_rate": 8.022340255950138e-05, "loss": 0.9526, "step": 10580 }, { "epoch": 0.6073211314475874, "grad_norm": 0.263671875, "learning_rate": 8.012525031270685e-05, "loss": 1.005, "step": 10585 }, { "epoch": 0.6076080096391072, "grad_norm": 0.275390625, "learning_rate": 8.002711799635588e-05, "loss": 0.9495, "step": 10590 }, { "epoch": 0.6078948878306271, "grad_norm": 0.26171875, "learning_rate": 7.992900570885572e-05, "loss": 0.9172, "step": 10595 }, { "epoch": 0.608181766022147, "grad_norm": 0.275390625, "learning_rate": 7.983091354859369e-05, "loss": 0.9883, "step": 10600 }, { "epoch": 0.6084686442136669, "grad_norm": 0.271484375, "learning_rate": 7.97328416139368e-05, "loss": 0.9507, "step": 10605 }, { "epoch": 0.6087555224051867, "grad_norm": 0.2734375, "learning_rate": 7.963479000323171e-05, "loss": 0.9506, "step": 10610 }, { "epoch": 0.6090424005967067, "grad_norm": 0.259765625, "learning_rate": 7.953675881480493e-05, "loss": 0.9825, "step": 10615 }, { "epoch": 0.6093292787882265, "grad_norm": 0.275390625, "learning_rate": 7.94387481469623e-05, "loss": 0.9435, "step": 10620 }, { "epoch": 0.6096161569797464, "grad_norm": 0.251953125, "learning_rate": 7.934075809798908e-05, "loss": 1.0158, "step": 10625 }, { "epoch": 0.6099030351712663, "grad_norm": 0.255859375, "learning_rate": 7.924278876615004e-05, "loss": 0.8788, "step": 10630 }, { "epoch": 0.6101899133627862, "grad_norm": 0.275390625, "learning_rate": 7.914484024968893e-05, "loss": 0.9347, "step": 10635 }, { "epoch": 0.610476791554306, "grad_norm": 0.267578125, "learning_rate": 7.90469126468288e-05, "loss": 0.9054, "step": 10640 }, { "epoch": 0.610763669745826, "grad_norm": 0.24609375, "learning_rate": 7.894900605577161e-05, "loss": 0.963, "step": 10645 }, { "epoch": 0.6110505479373458, "grad_norm": 0.2578125, "learning_rate": 7.885112057469839e-05, "loss": 0.9641, "step": 10650 }, { "epoch": 0.6113374261288657, "grad_norm": 0.265625, "learning_rate": 7.87532563017689e-05, "loss": 0.9087, "step": 10655 }, { "epoch": 0.6116243043203856, "grad_norm": 0.267578125, "learning_rate": 7.865541333512157e-05, "loss": 0.9185, "step": 10660 }, { "epoch": 0.6119111825119055, "grad_norm": 0.265625, "learning_rate": 7.855759177287368e-05, "loss": 0.9213, "step": 10665 }, { "epoch": 0.6121980607034253, "grad_norm": 0.267578125, "learning_rate": 7.84597917131208e-05, "loss": 0.9388, "step": 10670 }, { "epoch": 0.6124849388949452, "grad_norm": 0.2578125, "learning_rate": 7.836201325393706e-05, "loss": 0.969, "step": 10675 }, { "epoch": 0.6127718170864651, "grad_norm": 0.265625, "learning_rate": 7.826425649337501e-05, "loss": 0.9655, "step": 10680 }, { "epoch": 0.613058695277985, "grad_norm": 0.25390625, "learning_rate": 7.816652152946528e-05, "loss": 1.0169, "step": 10685 }, { "epoch": 0.6133455734695048, "grad_norm": 0.251953125, "learning_rate": 7.806880846021669e-05, "loss": 0.9382, "step": 10690 }, { "epoch": 0.6136324516610248, "grad_norm": 0.26953125, "learning_rate": 7.797111738361618e-05, "loss": 0.9416, "step": 10695 }, { "epoch": 0.6139193298525446, "grad_norm": 0.267578125, "learning_rate": 7.787344839762855e-05, "loss": 0.9647, "step": 10700 }, { "epoch": 0.6142062080440645, "grad_norm": 0.2578125, "learning_rate": 7.777580160019649e-05, "loss": 0.905, "step": 10705 }, { "epoch": 0.6144930862355844, "grad_norm": 0.279296875, "learning_rate": 7.767817708924038e-05, "loss": 0.9983, "step": 10710 }, { "epoch": 0.6147799644271043, "grad_norm": 0.271484375, "learning_rate": 7.758057496265839e-05, "loss": 0.9257, "step": 10715 }, { "epoch": 0.6150668426186241, "grad_norm": 0.2734375, "learning_rate": 7.748299531832609e-05, "loss": 0.9679, "step": 10720 }, { "epoch": 0.6153537208101441, "grad_norm": 0.275390625, "learning_rate": 7.738543825409652e-05, "loss": 0.9438, "step": 10725 }, { "epoch": 0.6156405990016639, "grad_norm": 0.275390625, "learning_rate": 7.728790386780025e-05, "loss": 0.9388, "step": 10730 }, { "epoch": 0.6159274771931837, "grad_norm": 0.25390625, "learning_rate": 7.71903922572449e-05, "loss": 1.0314, "step": 10735 }, { "epoch": 0.6162143553847037, "grad_norm": 0.26953125, "learning_rate": 7.70929035202153e-05, "loss": 0.8882, "step": 10740 }, { "epoch": 0.6165012335762236, "grad_norm": 0.26171875, "learning_rate": 7.699543775447345e-05, "loss": 0.9499, "step": 10745 }, { "epoch": 0.6167881117677434, "grad_norm": 0.2578125, "learning_rate": 7.689799505775822e-05, "loss": 0.9593, "step": 10750 }, { "epoch": 0.6170749899592632, "grad_norm": 0.26171875, "learning_rate": 7.68005755277853e-05, "loss": 0.8896, "step": 10755 }, { "epoch": 0.6173618681507832, "grad_norm": 0.25, "learning_rate": 7.67031792622473e-05, "loss": 1.0001, "step": 10760 }, { "epoch": 0.617648746342303, "grad_norm": 0.2490234375, "learning_rate": 7.660580635881338e-05, "loss": 1.0065, "step": 10765 }, { "epoch": 0.6179356245338229, "grad_norm": 0.26171875, "learning_rate": 7.65084569151293e-05, "loss": 0.9113, "step": 10770 }, { "epoch": 0.6182225027253428, "grad_norm": 0.263671875, "learning_rate": 7.641113102881726e-05, "loss": 0.9221, "step": 10775 }, { "epoch": 0.6185093809168627, "grad_norm": 0.26953125, "learning_rate": 7.631382879747597e-05, "loss": 0.9337, "step": 10780 }, { "epoch": 0.6187962591083825, "grad_norm": 0.25390625, "learning_rate": 7.621655031868026e-05, "loss": 0.9811, "step": 10785 }, { "epoch": 0.6190831372999025, "grad_norm": 0.27734375, "learning_rate": 7.61192956899812e-05, "loss": 1.0128, "step": 10790 }, { "epoch": 0.6193700154914223, "grad_norm": 0.279296875, "learning_rate": 7.6022065008906e-05, "loss": 0.9482, "step": 10795 }, { "epoch": 0.6196568936829422, "grad_norm": 0.25390625, "learning_rate": 7.592485837295777e-05, "loss": 0.9472, "step": 10800 }, { "epoch": 0.6199437718744621, "grad_norm": 0.26171875, "learning_rate": 7.582767587961552e-05, "loss": 0.9221, "step": 10805 }, { "epoch": 0.620230650065982, "grad_norm": 0.287109375, "learning_rate": 7.573051762633414e-05, "loss": 0.9532, "step": 10810 }, { "epoch": 0.6205175282575018, "grad_norm": 0.259765625, "learning_rate": 7.563338371054412e-05, "loss": 1.0231, "step": 10815 }, { "epoch": 0.6208044064490218, "grad_norm": 0.2578125, "learning_rate": 7.553627422965148e-05, "loss": 0.9752, "step": 10820 }, { "epoch": 0.6210912846405416, "grad_norm": 0.26171875, "learning_rate": 7.543918928103795e-05, "loss": 0.9337, "step": 10825 }, { "epoch": 0.6213781628320615, "grad_norm": 0.2734375, "learning_rate": 7.534212896206051e-05, "loss": 0.9364, "step": 10830 }, { "epoch": 0.6216650410235813, "grad_norm": 0.2890625, "learning_rate": 7.524509337005141e-05, "loss": 0.9254, "step": 10835 }, { "epoch": 0.6219519192151013, "grad_norm": 0.25, "learning_rate": 7.514808260231818e-05, "loss": 0.9011, "step": 10840 }, { "epoch": 0.6222387974066211, "grad_norm": 0.271484375, "learning_rate": 7.505109675614346e-05, "loss": 0.9011, "step": 10845 }, { "epoch": 0.622525675598141, "grad_norm": 0.255859375, "learning_rate": 7.495413592878484e-05, "loss": 0.9105, "step": 10850 }, { "epoch": 0.6228125537896609, "grad_norm": 0.2578125, "learning_rate": 7.485720021747486e-05, "loss": 0.9194, "step": 10855 }, { "epoch": 0.6230994319811808, "grad_norm": 0.263671875, "learning_rate": 7.476028971942093e-05, "loss": 0.9168, "step": 10860 }, { "epoch": 0.6233863101727006, "grad_norm": 0.265625, "learning_rate": 7.466340453180505e-05, "loss": 0.9753, "step": 10865 }, { "epoch": 0.6236731883642206, "grad_norm": 0.263671875, "learning_rate": 7.456654475178389e-05, "loss": 0.9741, "step": 10870 }, { "epoch": 0.6239600665557404, "grad_norm": 0.263671875, "learning_rate": 7.446971047648873e-05, "loss": 0.9154, "step": 10875 }, { "epoch": 0.6242469447472603, "grad_norm": 0.275390625, "learning_rate": 7.437290180302512e-05, "loss": 0.9548, "step": 10880 }, { "epoch": 0.6245338229387802, "grad_norm": 0.259765625, "learning_rate": 7.427611882847301e-05, "loss": 0.9339, "step": 10885 }, { "epoch": 0.6248207011303001, "grad_norm": 0.265625, "learning_rate": 7.41793616498867e-05, "loss": 0.9884, "step": 10890 }, { "epoch": 0.6251075793218199, "grad_norm": 0.251953125, "learning_rate": 7.40826303642944e-05, "loss": 0.8957, "step": 10895 }, { "epoch": 0.6253944575133399, "grad_norm": 0.25, "learning_rate": 7.398592506869849e-05, "loss": 0.9553, "step": 10900 }, { "epoch": 0.6256813357048597, "grad_norm": 0.296875, "learning_rate": 7.388924586007523e-05, "loss": 0.9071, "step": 10905 }, { "epoch": 0.6259682138963796, "grad_norm": 0.2734375, "learning_rate": 7.379259283537479e-05, "loss": 0.9648, "step": 10910 }, { "epoch": 0.6262550920878994, "grad_norm": 0.263671875, "learning_rate": 7.369596609152105e-05, "loss": 0.9983, "step": 10915 }, { "epoch": 0.6265419702794194, "grad_norm": 0.279296875, "learning_rate": 7.359936572541142e-05, "loss": 1.0115, "step": 10920 }, { "epoch": 0.6268288484709392, "grad_norm": 0.283203125, "learning_rate": 7.350279183391712e-05, "loss": 0.932, "step": 10925 }, { "epoch": 0.6271157266624591, "grad_norm": 0.267578125, "learning_rate": 7.340624451388257e-05, "loss": 0.9518, "step": 10930 }, { "epoch": 0.627402604853979, "grad_norm": 0.2470703125, "learning_rate": 7.330972386212558e-05, "loss": 0.932, "step": 10935 }, { "epoch": 0.6276894830454989, "grad_norm": 0.2890625, "learning_rate": 7.321322997543743e-05, "loss": 0.9442, "step": 10940 }, { "epoch": 0.6279763612370187, "grad_norm": 0.26171875, "learning_rate": 7.311676295058232e-05, "loss": 0.8918, "step": 10945 }, { "epoch": 0.6282632394285387, "grad_norm": 0.263671875, "learning_rate": 7.302032288429756e-05, "loss": 0.9165, "step": 10950 }, { "epoch": 0.6285501176200585, "grad_norm": 0.283203125, "learning_rate": 7.292390987329356e-05, "loss": 0.9855, "step": 10955 }, { "epoch": 0.6288369958115784, "grad_norm": 0.283203125, "learning_rate": 7.282752401425343e-05, "loss": 0.9264, "step": 10960 }, { "epoch": 0.6291238740030983, "grad_norm": 0.263671875, "learning_rate": 7.273116540383319e-05, "loss": 1.0362, "step": 10965 }, { "epoch": 0.6294107521946182, "grad_norm": 0.259765625, "learning_rate": 7.263483413866135e-05, "loss": 0.9872, "step": 10970 }, { "epoch": 0.629697630386138, "grad_norm": 0.26171875, "learning_rate": 7.253853031533928e-05, "loss": 0.9462, "step": 10975 }, { "epoch": 0.629984508577658, "grad_norm": 0.27734375, "learning_rate": 7.244225403044056e-05, "loss": 0.989, "step": 10980 }, { "epoch": 0.6302713867691778, "grad_norm": 0.27734375, "learning_rate": 7.234600538051124e-05, "loss": 0.8998, "step": 10985 }, { "epoch": 0.6305582649606977, "grad_norm": 0.27734375, "learning_rate": 7.22497844620698e-05, "loss": 0.9069, "step": 10990 }, { "epoch": 0.6308451431522175, "grad_norm": 0.2373046875, "learning_rate": 7.215359137160673e-05, "loss": 0.9017, "step": 10995 }, { "epoch": 0.6311320213437375, "grad_norm": 0.298828125, "learning_rate": 7.205742620558464e-05, "loss": 0.9396, "step": 11000 }, { "epoch": 0.6314188995352573, "grad_norm": 0.279296875, "learning_rate": 7.196128906043822e-05, "loss": 1.0328, "step": 11005 }, { "epoch": 0.6317057777267772, "grad_norm": 0.263671875, "learning_rate": 7.1865180032574e-05, "loss": 0.8883, "step": 11010 }, { "epoch": 0.6319926559182971, "grad_norm": 0.265625, "learning_rate": 7.176909921837033e-05, "loss": 1.0588, "step": 11015 }, { "epoch": 0.632279534109817, "grad_norm": 0.26171875, "learning_rate": 7.167304671417729e-05, "loss": 0.918, "step": 11020 }, { "epoch": 0.6325664123013368, "grad_norm": 0.26171875, "learning_rate": 7.157702261631653e-05, "loss": 0.965, "step": 11025 }, { "epoch": 0.6328532904928568, "grad_norm": 0.26171875, "learning_rate": 7.148102702108122e-05, "loss": 0.9823, "step": 11030 }, { "epoch": 0.6331401686843766, "grad_norm": 0.26953125, "learning_rate": 7.138506002473591e-05, "loss": 0.981, "step": 11035 }, { "epoch": 0.6334270468758965, "grad_norm": 0.294921875, "learning_rate": 7.128912172351664e-05, "loss": 0.9348, "step": 11040 }, { "epoch": 0.6337139250674164, "grad_norm": 0.259765625, "learning_rate": 7.119321221363047e-05, "loss": 0.9774, "step": 11045 }, { "epoch": 0.6340008032589363, "grad_norm": 0.26171875, "learning_rate": 7.109733159125566e-05, "loss": 0.9297, "step": 11050 }, { "epoch": 0.6342876814504561, "grad_norm": 0.26171875, "learning_rate": 7.100147995254156e-05, "loss": 0.9165, "step": 11055 }, { "epoch": 0.6345745596419761, "grad_norm": 0.2490234375, "learning_rate": 7.09056573936084e-05, "loss": 0.9639, "step": 11060 }, { "epoch": 0.6348614378334959, "grad_norm": 0.251953125, "learning_rate": 7.080986401054721e-05, "loss": 0.8885, "step": 11065 }, { "epoch": 0.6351483160250158, "grad_norm": 0.283203125, "learning_rate": 7.071409989941989e-05, "loss": 0.9485, "step": 11070 }, { "epoch": 0.6354351942165356, "grad_norm": 0.26171875, "learning_rate": 7.061836515625886e-05, "loss": 0.9421, "step": 11075 }, { "epoch": 0.6357220724080556, "grad_norm": 0.255859375, "learning_rate": 7.052265987706708e-05, "loss": 0.9441, "step": 11080 }, { "epoch": 0.6360089505995754, "grad_norm": 0.25390625, "learning_rate": 7.042698415781813e-05, "loss": 0.9537, "step": 11085 }, { "epoch": 0.6362958287910953, "grad_norm": 0.26953125, "learning_rate": 7.033133809445577e-05, "loss": 0.9135, "step": 11090 }, { "epoch": 0.6365827069826152, "grad_norm": 0.271484375, "learning_rate": 7.02357217828941e-05, "loss": 0.9283, "step": 11095 }, { "epoch": 0.6368695851741351, "grad_norm": 0.2578125, "learning_rate": 7.014013531901733e-05, "loss": 0.889, "step": 11100 }, { "epoch": 0.6371564633656549, "grad_norm": 0.28515625, "learning_rate": 7.004457879867986e-05, "loss": 0.9422, "step": 11105 }, { "epoch": 0.6374433415571749, "grad_norm": 0.2734375, "learning_rate": 6.994905231770593e-05, "loss": 0.9034, "step": 11110 }, { "epoch": 0.6377302197486947, "grad_norm": 0.26953125, "learning_rate": 6.985355597188971e-05, "loss": 0.9111, "step": 11115 }, { "epoch": 0.6380170979402146, "grad_norm": 0.302734375, "learning_rate": 6.975808985699518e-05, "loss": 0.9939, "step": 11120 }, { "epoch": 0.6383039761317345, "grad_norm": 0.2578125, "learning_rate": 6.966265406875597e-05, "loss": 0.9296, "step": 11125 }, { "epoch": 0.6385908543232544, "grad_norm": 0.2470703125, "learning_rate": 6.956724870287524e-05, "loss": 0.9218, "step": 11130 }, { "epoch": 0.6388777325147742, "grad_norm": 0.251953125, "learning_rate": 6.94718738550258e-05, "loss": 0.9668, "step": 11135 }, { "epoch": 0.6391646107062942, "grad_norm": 0.2890625, "learning_rate": 6.93765296208497e-05, "loss": 0.9111, "step": 11140 }, { "epoch": 0.639451488897814, "grad_norm": 0.26953125, "learning_rate": 6.928121609595835e-05, "loss": 0.9802, "step": 11145 }, { "epoch": 0.6397383670893338, "grad_norm": 0.26171875, "learning_rate": 6.918593337593238e-05, "loss": 0.9536, "step": 11150 }, { "epoch": 0.6400252452808537, "grad_norm": 0.263671875, "learning_rate": 6.909068155632153e-05, "loss": 0.9412, "step": 11155 }, { "epoch": 0.6403121234723737, "grad_norm": 0.26171875, "learning_rate": 6.899546073264454e-05, "loss": 0.9634, "step": 11160 }, { "epoch": 0.6405990016638935, "grad_norm": 0.2734375, "learning_rate": 6.890027100038901e-05, "loss": 0.9113, "step": 11165 }, { "epoch": 0.6408858798554133, "grad_norm": 0.2734375, "learning_rate": 6.880511245501149e-05, "loss": 1.0384, "step": 11170 }, { "epoch": 0.6411727580469333, "grad_norm": 0.26953125, "learning_rate": 6.870998519193717e-05, "loss": 0.9608, "step": 11175 }, { "epoch": 0.6414596362384531, "grad_norm": 0.28125, "learning_rate": 6.861488930655979e-05, "loss": 0.9531, "step": 11180 }, { "epoch": 0.641746514429973, "grad_norm": 0.271484375, "learning_rate": 6.851982489424187e-05, "loss": 0.9631, "step": 11185 }, { "epoch": 0.642033392621493, "grad_norm": 0.25390625, "learning_rate": 6.842479205031411e-05, "loss": 0.9673, "step": 11190 }, { "epoch": 0.6423202708130128, "grad_norm": 0.26171875, "learning_rate": 6.832979087007565e-05, "loss": 0.976, "step": 11195 }, { "epoch": 0.6426071490045326, "grad_norm": 0.259765625, "learning_rate": 6.823482144879398e-05, "loss": 0.9439, "step": 11200 }, { "epoch": 0.6428940271960526, "grad_norm": 0.2578125, "learning_rate": 6.813988388170456e-05, "loss": 0.9968, "step": 11205 }, { "epoch": 0.6431809053875724, "grad_norm": 0.2734375, "learning_rate": 6.804497826401105e-05, "loss": 0.9747, "step": 11210 }, { "epoch": 0.6434677835790923, "grad_norm": 0.2578125, "learning_rate": 6.795010469088495e-05, "loss": 0.9963, "step": 11215 }, { "epoch": 0.6437546617706122, "grad_norm": 0.283203125, "learning_rate": 6.785526325746576e-05, "loss": 0.9884, "step": 11220 }, { "epoch": 0.6440415399621321, "grad_norm": 0.26953125, "learning_rate": 6.776045405886066e-05, "loss": 1.0205, "step": 11225 }, { "epoch": 0.6443284181536519, "grad_norm": 0.275390625, "learning_rate": 6.766567719014449e-05, "loss": 0.9534, "step": 11230 }, { "epoch": 0.6446152963451719, "grad_norm": 0.255859375, "learning_rate": 6.75709327463598e-05, "loss": 0.9204, "step": 11235 }, { "epoch": 0.6449021745366917, "grad_norm": 0.25, "learning_rate": 6.747622082251643e-05, "loss": 0.8916, "step": 11240 }, { "epoch": 0.6451890527282116, "grad_norm": 0.26171875, "learning_rate": 6.738154151359172e-05, "loss": 0.9242, "step": 11245 }, { "epoch": 0.6454759309197314, "grad_norm": 0.283203125, "learning_rate": 6.728689491453039e-05, "loss": 0.9652, "step": 11250 }, { "epoch": 0.6457628091112514, "grad_norm": 0.2431640625, "learning_rate": 6.719228112024417e-05, "loss": 0.9385, "step": 11255 }, { "epoch": 0.6460496873027712, "grad_norm": 0.271484375, "learning_rate": 6.709770022561198e-05, "loss": 0.9938, "step": 11260 }, { "epoch": 0.6463365654942911, "grad_norm": 0.259765625, "learning_rate": 6.700315232547981e-05, "loss": 0.9901, "step": 11265 }, { "epoch": 0.646623443685811, "grad_norm": 0.25390625, "learning_rate": 6.690863751466048e-05, "loss": 0.9271, "step": 11270 }, { "epoch": 0.6469103218773309, "grad_norm": 0.279296875, "learning_rate": 6.681415588793367e-05, "loss": 0.9564, "step": 11275 }, { "epoch": 0.6471972000688507, "grad_norm": 0.25390625, "learning_rate": 6.67197075400457e-05, "loss": 0.9223, "step": 11280 }, { "epoch": 0.6474840782603707, "grad_norm": 0.263671875, "learning_rate": 6.662529256570969e-05, "loss": 0.9456, "step": 11285 }, { "epoch": 0.6477709564518905, "grad_norm": 0.25390625, "learning_rate": 6.653091105960512e-05, "loss": 0.9252, "step": 11290 }, { "epoch": 0.6480578346434104, "grad_norm": 0.255859375, "learning_rate": 6.643656311637796e-05, "loss": 0.9373, "step": 11295 }, { "epoch": 0.6483447128349303, "grad_norm": 0.28125, "learning_rate": 6.634224883064059e-05, "loss": 0.9379, "step": 11300 }, { "epoch": 0.6486315910264502, "grad_norm": 0.279296875, "learning_rate": 6.624796829697158e-05, "loss": 1.0462, "step": 11305 }, { "epoch": 0.64891846921797, "grad_norm": 0.267578125, "learning_rate": 6.615372160991561e-05, "loss": 1.0275, "step": 11310 }, { "epoch": 0.64920534740949, "grad_norm": 0.267578125, "learning_rate": 6.605950886398353e-05, "loss": 0.8971, "step": 11315 }, { "epoch": 0.6494922256010098, "grad_norm": 0.26171875, "learning_rate": 6.596533015365207e-05, "loss": 0.887, "step": 11320 }, { "epoch": 0.6497791037925297, "grad_norm": 0.26171875, "learning_rate": 6.587118557336382e-05, "loss": 0.915, "step": 11325 }, { "epoch": 0.6500659819840495, "grad_norm": 0.263671875, "learning_rate": 6.577707521752725e-05, "loss": 0.9363, "step": 11330 }, { "epoch": 0.6503528601755695, "grad_norm": 0.27734375, "learning_rate": 6.56829991805164e-05, "loss": 0.9977, "step": 11335 }, { "epoch": 0.6506397383670893, "grad_norm": 0.265625, "learning_rate": 6.558895755667091e-05, "loss": 0.9611, "step": 11340 }, { "epoch": 0.6509266165586092, "grad_norm": 0.26953125, "learning_rate": 6.549495044029592e-05, "loss": 0.9674, "step": 11345 }, { "epoch": 0.6512134947501291, "grad_norm": 0.275390625, "learning_rate": 6.540097792566202e-05, "loss": 0.9338, "step": 11350 }, { "epoch": 0.651500372941649, "grad_norm": 0.26171875, "learning_rate": 6.530704010700504e-05, "loss": 0.9329, "step": 11355 }, { "epoch": 0.6517872511331688, "grad_norm": 0.2734375, "learning_rate": 6.521313707852601e-05, "loss": 0.9615, "step": 11360 }, { "epoch": 0.6520741293246888, "grad_norm": 0.267578125, "learning_rate": 6.511926893439115e-05, "loss": 0.9741, "step": 11365 }, { "epoch": 0.6523610075162086, "grad_norm": 0.248046875, "learning_rate": 6.502543576873163e-05, "loss": 0.929, "step": 11370 }, { "epoch": 0.6526478857077285, "grad_norm": 0.279296875, "learning_rate": 6.493163767564352e-05, "loss": 0.9553, "step": 11375 }, { "epoch": 0.6529347638992484, "grad_norm": 0.27734375, "learning_rate": 6.483787474918779e-05, "loss": 0.9487, "step": 11380 }, { "epoch": 0.6532216420907683, "grad_norm": 0.267578125, "learning_rate": 6.474414708339013e-05, "loss": 1.0413, "step": 11385 }, { "epoch": 0.6535085202822881, "grad_norm": 0.2734375, "learning_rate": 6.465045477224079e-05, "loss": 0.962, "step": 11390 }, { "epoch": 0.6537953984738081, "grad_norm": 0.259765625, "learning_rate": 6.455679790969473e-05, "loss": 0.9672, "step": 11395 }, { "epoch": 0.6540822766653279, "grad_norm": 0.2734375, "learning_rate": 6.446317658967119e-05, "loss": 0.9116, "step": 11400 }, { "epoch": 0.6543691548568478, "grad_norm": 0.2578125, "learning_rate": 6.436959090605383e-05, "loss": 0.9604, "step": 11405 }, { "epoch": 0.6546560330483676, "grad_norm": 0.279296875, "learning_rate": 6.42760409526906e-05, "loss": 0.9871, "step": 11410 }, { "epoch": 0.6549429112398876, "grad_norm": 0.26171875, "learning_rate": 6.418252682339361e-05, "loss": 0.955, "step": 11415 }, { "epoch": 0.6552297894314074, "grad_norm": 0.2734375, "learning_rate": 6.408904861193906e-05, "loss": 0.9585, "step": 11420 }, { "epoch": 0.6555166676229273, "grad_norm": 0.255859375, "learning_rate": 6.399560641206706e-05, "loss": 0.9315, "step": 11425 }, { "epoch": 0.6558035458144472, "grad_norm": 0.267578125, "learning_rate": 6.39022003174817e-05, "loss": 0.9814, "step": 11430 }, { "epoch": 0.6560904240059671, "grad_norm": 0.25, "learning_rate": 6.380883042185084e-05, "loss": 0.9402, "step": 11435 }, { "epoch": 0.6563773021974869, "grad_norm": 0.294921875, "learning_rate": 6.371549681880593e-05, "loss": 0.9459, "step": 11440 }, { "epoch": 0.6566641803890069, "grad_norm": 0.259765625, "learning_rate": 6.362219960194223e-05, "loss": 0.8998, "step": 11445 }, { "epoch": 0.6569510585805267, "grad_norm": 0.267578125, "learning_rate": 6.352893886481829e-05, "loss": 0.9532, "step": 11450 }, { "epoch": 0.6572379367720466, "grad_norm": 0.27734375, "learning_rate": 6.343571470095625e-05, "loss": 0.9078, "step": 11455 }, { "epoch": 0.6575248149635665, "grad_norm": 0.279296875, "learning_rate": 6.334252720384153e-05, "loss": 0.9404, "step": 11460 }, { "epoch": 0.6578116931550864, "grad_norm": 0.25, "learning_rate": 6.32493764669227e-05, "loss": 0.9324, "step": 11465 }, { "epoch": 0.6580985713466062, "grad_norm": 0.26171875, "learning_rate": 6.315626258361158e-05, "loss": 1.0012, "step": 11470 }, { "epoch": 0.6583854495381262, "grad_norm": 0.283203125, "learning_rate": 6.306318564728294e-05, "loss": 0.9721, "step": 11475 }, { "epoch": 0.658672327729646, "grad_norm": 0.251953125, "learning_rate": 6.297014575127455e-05, "loss": 0.9619, "step": 11480 }, { "epoch": 0.6589592059211659, "grad_norm": 0.275390625, "learning_rate": 6.287714298888709e-05, "loss": 1.0384, "step": 11485 }, { "epoch": 0.6592460841126857, "grad_norm": 0.2578125, "learning_rate": 6.27841774533838e-05, "loss": 0.9609, "step": 11490 }, { "epoch": 0.6595329623042057, "grad_norm": 0.259765625, "learning_rate": 6.26912492379909e-05, "loss": 0.9154, "step": 11495 }, { "epoch": 0.6598198404957255, "grad_norm": 0.27734375, "learning_rate": 6.259835843589688e-05, "loss": 1.0205, "step": 11500 }, { "epoch": 0.6601067186872454, "grad_norm": 0.2578125, "learning_rate": 6.250550514025287e-05, "loss": 1.014, "step": 11505 }, { "epoch": 0.6603935968787653, "grad_norm": 0.271484375, "learning_rate": 6.24126894441724e-05, "loss": 0.9599, "step": 11510 }, { "epoch": 0.6606804750702852, "grad_norm": 0.2578125, "learning_rate": 6.231991144073126e-05, "loss": 0.9182, "step": 11515 }, { "epoch": 0.660967353261805, "grad_norm": 0.263671875, "learning_rate": 6.222717122296739e-05, "loss": 0.9562, "step": 11520 }, { "epoch": 0.661254231453325, "grad_norm": 0.271484375, "learning_rate": 6.213446888388093e-05, "loss": 0.9325, "step": 11525 }, { "epoch": 0.6615411096448448, "grad_norm": 0.244140625, "learning_rate": 6.204180451643399e-05, "loss": 0.8369, "step": 11530 }, { "epoch": 0.6618279878363647, "grad_norm": 0.263671875, "learning_rate": 6.194917821355062e-05, "loss": 0.9454, "step": 11535 }, { "epoch": 0.6621148660278846, "grad_norm": 0.2734375, "learning_rate": 6.18565900681166e-05, "loss": 0.9957, "step": 11540 }, { "epoch": 0.6624017442194045, "grad_norm": 0.287109375, "learning_rate": 6.176404017297965e-05, "loss": 1.0129, "step": 11545 }, { "epoch": 0.6626886224109243, "grad_norm": 0.25390625, "learning_rate": 6.167152862094893e-05, "loss": 0.903, "step": 11550 }, { "epoch": 0.6629755006024443, "grad_norm": 0.296875, "learning_rate": 6.157905550479525e-05, "loss": 0.9487, "step": 11555 }, { "epoch": 0.6632623787939641, "grad_norm": 0.255859375, "learning_rate": 6.148662091725087e-05, "loss": 0.928, "step": 11560 }, { "epoch": 0.663549256985484, "grad_norm": 0.27734375, "learning_rate": 6.139422495100939e-05, "loss": 1.0175, "step": 11565 }, { "epoch": 0.6638361351770038, "grad_norm": 0.25390625, "learning_rate": 6.13018676987257e-05, "loss": 0.9285, "step": 11570 }, { "epoch": 0.6641230133685238, "grad_norm": 0.267578125, "learning_rate": 6.120954925301587e-05, "loss": 0.9314, "step": 11575 }, { "epoch": 0.6644098915600436, "grad_norm": 0.263671875, "learning_rate": 6.111726970645703e-05, "loss": 0.9802, "step": 11580 }, { "epoch": 0.6646967697515634, "grad_norm": 0.26953125, "learning_rate": 6.102502915158733e-05, "loss": 0.9253, "step": 11585 }, { "epoch": 0.6649836479430834, "grad_norm": 0.275390625, "learning_rate": 6.093282768090574e-05, "loss": 0.8891, "step": 11590 }, { "epoch": 0.6652705261346032, "grad_norm": 0.29296875, "learning_rate": 6.084066538687222e-05, "loss": 0.9175, "step": 11595 }, { "epoch": 0.6655574043261231, "grad_norm": 0.255859375, "learning_rate": 6.074854236190723e-05, "loss": 0.9622, "step": 11600 }, { "epoch": 0.665844282517643, "grad_norm": 0.279296875, "learning_rate": 6.065645869839196e-05, "loss": 0.9369, "step": 11605 }, { "epoch": 0.6661311607091629, "grad_norm": 0.27734375, "learning_rate": 6.0564414488668165e-05, "loss": 0.9495, "step": 11610 }, { "epoch": 0.6664180389006827, "grad_norm": 0.275390625, "learning_rate": 6.0472409825037926e-05, "loss": 1.0065, "step": 11615 }, { "epoch": 0.6667049170922027, "grad_norm": 0.265625, "learning_rate": 6.038044479976375e-05, "loss": 0.9119, "step": 11620 }, { "epoch": 0.6669917952837225, "grad_norm": 0.275390625, "learning_rate": 6.0288519505068375e-05, "loss": 0.9656, "step": 11625 }, { "epoch": 0.6672786734752424, "grad_norm": 0.271484375, "learning_rate": 6.01966340331347e-05, "loss": 0.9607, "step": 11630 }, { "epoch": 0.6675655516667623, "grad_norm": 0.263671875, "learning_rate": 6.010478847610565e-05, "loss": 0.988, "step": 11635 }, { "epoch": 0.6678524298582822, "grad_norm": 0.25390625, "learning_rate": 6.0012982926084195e-05, "loss": 0.9958, "step": 11640 }, { "epoch": 0.668139308049802, "grad_norm": 0.275390625, "learning_rate": 5.992121747513315e-05, "loss": 0.9578, "step": 11645 }, { "epoch": 0.6684261862413219, "grad_norm": 0.26171875, "learning_rate": 5.982949221527506e-05, "loss": 0.9389, "step": 11650 }, { "epoch": 0.6687130644328418, "grad_norm": 0.275390625, "learning_rate": 5.973780723849225e-05, "loss": 0.9213, "step": 11655 }, { "epoch": 0.6689999426243617, "grad_norm": 0.26953125, "learning_rate": 5.9646162636726634e-05, "loss": 0.8939, "step": 11660 }, { "epoch": 0.6692868208158815, "grad_norm": 0.265625, "learning_rate": 5.955455850187962e-05, "loss": 0.912, "step": 11665 }, { "epoch": 0.6695736990074015, "grad_norm": 0.279296875, "learning_rate": 5.946299492581201e-05, "loss": 0.964, "step": 11670 }, { "epoch": 0.6698605771989213, "grad_norm": 0.259765625, "learning_rate": 5.9371472000344006e-05, "loss": 0.9815, "step": 11675 }, { "epoch": 0.6701474553904412, "grad_norm": 0.2578125, "learning_rate": 5.9279989817255e-05, "loss": 1.0095, "step": 11680 }, { "epoch": 0.6704343335819611, "grad_norm": 0.26171875, "learning_rate": 5.9188548468283475e-05, "loss": 0.8657, "step": 11685 }, { "epoch": 0.670721211773481, "grad_norm": 0.2734375, "learning_rate": 5.9097148045127095e-05, "loss": 0.941, "step": 11690 }, { "epoch": 0.6710080899650008, "grad_norm": 0.279296875, "learning_rate": 5.9005788639442394e-05, "loss": 0.931, "step": 11695 }, { "epoch": 0.6712949681565208, "grad_norm": 0.2734375, "learning_rate": 5.8914470342844694e-05, "loss": 0.897, "step": 11700 }, { "epoch": 0.6715818463480406, "grad_norm": 0.263671875, "learning_rate": 5.8823193246908346e-05, "loss": 0.9791, "step": 11705 }, { "epoch": 0.6718687245395605, "grad_norm": 0.2734375, "learning_rate": 5.873195744316611e-05, "loss": 0.9706, "step": 11710 }, { "epoch": 0.6721556027310804, "grad_norm": 0.294921875, "learning_rate": 5.86407630231095e-05, "loss": 0.9738, "step": 11715 }, { "epoch": 0.6724424809226003, "grad_norm": 0.255859375, "learning_rate": 5.8549610078188446e-05, "loss": 0.9373, "step": 11720 }, { "epoch": 0.6727293591141201, "grad_norm": 0.279296875, "learning_rate": 5.845849869981137e-05, "loss": 0.9755, "step": 11725 }, { "epoch": 0.67301623730564, "grad_norm": 0.263671875, "learning_rate": 5.836742897934497e-05, "loss": 0.8923, "step": 11730 }, { "epoch": 0.6733031154971599, "grad_norm": 0.251953125, "learning_rate": 5.827640100811409e-05, "loss": 0.9374, "step": 11735 }, { "epoch": 0.6735899936886798, "grad_norm": 0.26171875, "learning_rate": 5.8185414877401876e-05, "loss": 0.9589, "step": 11740 }, { "epoch": 0.6738768718801996, "grad_norm": 0.251953125, "learning_rate": 5.80944706784494e-05, "loss": 0.9999, "step": 11745 }, { "epoch": 0.6741637500717196, "grad_norm": 0.279296875, "learning_rate": 5.8003568502455676e-05, "loss": 0.9712, "step": 11750 }, { "epoch": 0.6744506282632394, "grad_norm": 0.265625, "learning_rate": 5.7912708440577635e-05, "loss": 0.9159, "step": 11755 }, { "epoch": 0.6747375064547593, "grad_norm": 0.283203125, "learning_rate": 5.782189058392995e-05, "loss": 1.0858, "step": 11760 }, { "epoch": 0.6750243846462792, "grad_norm": 0.27734375, "learning_rate": 5.773111502358492e-05, "loss": 0.9238, "step": 11765 }, { "epoch": 0.6753112628377991, "grad_norm": 0.259765625, "learning_rate": 5.764038185057259e-05, "loss": 0.8468, "step": 11770 }, { "epoch": 0.6755981410293189, "grad_norm": 0.25390625, "learning_rate": 5.754969115588034e-05, "loss": 0.9542, "step": 11775 }, { "epoch": 0.6758850192208389, "grad_norm": 0.265625, "learning_rate": 5.7459043030452966e-05, "loss": 0.9437, "step": 11780 }, { "epoch": 0.6761718974123587, "grad_norm": 0.2578125, "learning_rate": 5.736843756519259e-05, "loss": 1.0084, "step": 11785 }, { "epoch": 0.6764587756038786, "grad_norm": 0.275390625, "learning_rate": 5.727787485095866e-05, "loss": 1.0258, "step": 11790 }, { "epoch": 0.6767456537953985, "grad_norm": 0.2578125, "learning_rate": 5.718735497856762e-05, "loss": 0.9685, "step": 11795 }, { "epoch": 0.6770325319869184, "grad_norm": 0.27734375, "learning_rate": 5.709687803879301e-05, "loss": 0.9179, "step": 11800 }, { "epoch": 0.6773194101784382, "grad_norm": 0.2734375, "learning_rate": 5.700644412236531e-05, "loss": 0.871, "step": 11805 }, { "epoch": 0.6776062883699581, "grad_norm": 0.2734375, "learning_rate": 5.691605331997185e-05, "loss": 1.0171, "step": 11810 }, { "epoch": 0.677893166561478, "grad_norm": 0.263671875, "learning_rate": 5.682570572225671e-05, "loss": 0.9522, "step": 11815 }, { "epoch": 0.6781800447529979, "grad_norm": 0.283203125, "learning_rate": 5.67354014198207e-05, "loss": 0.9242, "step": 11820 }, { "epoch": 0.6784669229445177, "grad_norm": 0.265625, "learning_rate": 5.664514050322122e-05, "loss": 0.9277, "step": 11825 }, { "epoch": 0.6787538011360377, "grad_norm": 0.24609375, "learning_rate": 5.6554923062971966e-05, "loss": 1.019, "step": 11830 }, { "epoch": 0.6790406793275575, "grad_norm": 0.26171875, "learning_rate": 5.646474918954334e-05, "loss": 0.9273, "step": 11835 }, { "epoch": 0.6793275575190774, "grad_norm": 0.265625, "learning_rate": 5.637461897336185e-05, "loss": 0.9329, "step": 11840 }, { "epoch": 0.6796144357105973, "grad_norm": 0.267578125, "learning_rate": 5.628453250481026e-05, "loss": 0.9303, "step": 11845 }, { "epoch": 0.6799013139021172, "grad_norm": 0.26953125, "learning_rate": 5.6194489874227504e-05, "loss": 0.8853, "step": 11850 }, { "epoch": 0.680188192093637, "grad_norm": 0.26171875, "learning_rate": 5.610449117190855e-05, "loss": 0.9082, "step": 11855 }, { "epoch": 0.680475070285157, "grad_norm": 0.279296875, "learning_rate": 5.601453648810426e-05, "loss": 0.8877, "step": 11860 }, { "epoch": 0.6807619484766768, "grad_norm": 0.2578125, "learning_rate": 5.5924625913021386e-05, "loss": 0.9318, "step": 11865 }, { "epoch": 0.6810488266681967, "grad_norm": 0.259765625, "learning_rate": 5.583475953682251e-05, "loss": 0.9631, "step": 11870 }, { "epoch": 0.6813357048597166, "grad_norm": 0.26953125, "learning_rate": 5.5744937449625854e-05, "loss": 0.9611, "step": 11875 }, { "epoch": 0.6816225830512365, "grad_norm": 0.28515625, "learning_rate": 5.565515974150508e-05, "loss": 0.9942, "step": 11880 }, { "epoch": 0.6819094612427563, "grad_norm": 0.28125, "learning_rate": 5.556542650248959e-05, "loss": 0.9043, "step": 11885 }, { "epoch": 0.6821963394342762, "grad_norm": 0.24609375, "learning_rate": 5.547573782256403e-05, "loss": 0.9597, "step": 11890 }, { "epoch": 0.6824832176257961, "grad_norm": 0.26953125, "learning_rate": 5.538609379166845e-05, "loss": 0.9716, "step": 11895 }, { "epoch": 0.682770095817316, "grad_norm": 0.25390625, "learning_rate": 5.529649449969804e-05, "loss": 0.907, "step": 11900 }, { "epoch": 0.6830569740088358, "grad_norm": 0.259765625, "learning_rate": 5.5206940036503194e-05, "loss": 0.9749, "step": 11905 }, { "epoch": 0.6833438522003558, "grad_norm": 0.279296875, "learning_rate": 5.511743049188931e-05, "loss": 0.9953, "step": 11910 }, { "epoch": 0.6836307303918756, "grad_norm": 0.298828125, "learning_rate": 5.5027965955616743e-05, "loss": 0.9719, "step": 11915 }, { "epoch": 0.6839176085833955, "grad_norm": 0.2431640625, "learning_rate": 5.49385465174008e-05, "loss": 0.9471, "step": 11920 }, { "epoch": 0.6842044867749154, "grad_norm": 0.25390625, "learning_rate": 5.48491722669115e-05, "loss": 1.0521, "step": 11925 }, { "epoch": 0.6844913649664353, "grad_norm": 0.267578125, "learning_rate": 5.47598432937734e-05, "loss": 0.9806, "step": 11930 }, { "epoch": 0.6847782431579551, "grad_norm": 0.2578125, "learning_rate": 5.467055968756595e-05, "loss": 0.9911, "step": 11935 }, { "epoch": 0.6850651213494751, "grad_norm": 0.251953125, "learning_rate": 5.4581321537822875e-05, "loss": 0.8815, "step": 11940 }, { "epoch": 0.6853519995409949, "grad_norm": 0.259765625, "learning_rate": 5.4492128934032416e-05, "loss": 1.0047, "step": 11945 }, { "epoch": 0.6856388777325148, "grad_norm": 0.296875, "learning_rate": 5.440298196563711e-05, "loss": 0.9995, "step": 11950 }, { "epoch": 0.6859257559240347, "grad_norm": 0.259765625, "learning_rate": 5.431388072203373e-05, "loss": 0.954, "step": 11955 }, { "epoch": 0.6862126341155546, "grad_norm": 0.283203125, "learning_rate": 5.4224825292573154e-05, "loss": 0.9492, "step": 11960 }, { "epoch": 0.6864995123070744, "grad_norm": 0.267578125, "learning_rate": 5.4135815766560486e-05, "loss": 1.0314, "step": 11965 }, { "epoch": 0.6867863904985942, "grad_norm": 0.2578125, "learning_rate": 5.40468522332546e-05, "loss": 0.9593, "step": 11970 }, { "epoch": 0.6870732686901142, "grad_norm": 0.25390625, "learning_rate": 5.395793478186838e-05, "loss": 0.9984, "step": 11975 }, { "epoch": 0.687360146881634, "grad_norm": 0.275390625, "learning_rate": 5.386906350156833e-05, "loss": 0.9697, "step": 11980 }, { "epoch": 0.6876470250731539, "grad_norm": 0.287109375, "learning_rate": 5.378023848147487e-05, "loss": 0.9866, "step": 11985 }, { "epoch": 0.6879339032646739, "grad_norm": 0.267578125, "learning_rate": 5.36914598106619e-05, "loss": 1.0016, "step": 11990 }, { "epoch": 0.6882207814561937, "grad_norm": 0.267578125, "learning_rate": 5.3602727578156895e-05, "loss": 0.9289, "step": 11995 }, { "epoch": 0.6885076596477135, "grad_norm": 0.287109375, "learning_rate": 5.35140418729407e-05, "loss": 0.9322, "step": 12000 }, { "epoch": 0.6887945378392335, "grad_norm": 0.265625, "learning_rate": 5.3425402783947564e-05, "loss": 0.9132, "step": 12005 }, { "epoch": 0.6890814160307533, "grad_norm": 0.27734375, "learning_rate": 5.3336810400064904e-05, "loss": 1.0212, "step": 12010 }, { "epoch": 0.6893682942222732, "grad_norm": 0.27734375, "learning_rate": 5.324826481013345e-05, "loss": 0.9593, "step": 12015 }, { "epoch": 0.6896551724137931, "grad_norm": 0.271484375, "learning_rate": 5.315976610294689e-05, "loss": 0.9577, "step": 12020 }, { "epoch": 0.689942050605313, "grad_norm": 0.26171875, "learning_rate": 5.307131436725191e-05, "loss": 0.9461, "step": 12025 }, { "epoch": 0.6902289287968328, "grad_norm": 0.275390625, "learning_rate": 5.298290969174812e-05, "loss": 0.9663, "step": 12030 }, { "epoch": 0.6905158069883528, "grad_norm": 0.2412109375, "learning_rate": 5.2894552165087916e-05, "loss": 1.0194, "step": 12035 }, { "epoch": 0.6908026851798726, "grad_norm": 0.2890625, "learning_rate": 5.2806241875876426e-05, "loss": 1.0011, "step": 12040 }, { "epoch": 0.6910895633713925, "grad_norm": 0.244140625, "learning_rate": 5.271797891267142e-05, "loss": 0.8782, "step": 12045 }, { "epoch": 0.6913764415629123, "grad_norm": 0.275390625, "learning_rate": 5.262976336398318e-05, "loss": 0.9568, "step": 12050 }, { "epoch": 0.6916633197544323, "grad_norm": 0.279296875, "learning_rate": 5.254159531827445e-05, "loss": 1.0285, "step": 12055 }, { "epoch": 0.6919501979459521, "grad_norm": 0.2734375, "learning_rate": 5.245347486396033e-05, "loss": 0.9552, "step": 12060 }, { "epoch": 0.692237076137472, "grad_norm": 0.25390625, "learning_rate": 5.236540208940827e-05, "loss": 0.917, "step": 12065 }, { "epoch": 0.6925239543289919, "grad_norm": 0.28125, "learning_rate": 5.2277377082937806e-05, "loss": 0.9708, "step": 12070 }, { "epoch": 0.6928108325205118, "grad_norm": 0.271484375, "learning_rate": 5.2189399932820616e-05, "loss": 0.9109, "step": 12075 }, { "epoch": 0.6930977107120316, "grad_norm": 0.279296875, "learning_rate": 5.210147072728038e-05, "loss": 0.934, "step": 12080 }, { "epoch": 0.6933845889035516, "grad_norm": 0.2734375, "learning_rate": 5.2013589554492714e-05, "loss": 0.9386, "step": 12085 }, { "epoch": 0.6936714670950714, "grad_norm": 0.2734375, "learning_rate": 5.192575650258503e-05, "loss": 0.9219, "step": 12090 }, { "epoch": 0.6939583452865913, "grad_norm": 0.2578125, "learning_rate": 5.1837971659636545e-05, "loss": 0.8816, "step": 12095 }, { "epoch": 0.6942452234781112, "grad_norm": 0.279296875, "learning_rate": 5.175023511367807e-05, "loss": 0.9099, "step": 12100 }, { "epoch": 0.6945321016696311, "grad_norm": 0.302734375, "learning_rate": 5.1662546952692015e-05, "loss": 0.9268, "step": 12105 }, { "epoch": 0.6948189798611509, "grad_norm": 0.265625, "learning_rate": 5.1574907264612224e-05, "loss": 0.8738, "step": 12110 }, { "epoch": 0.6951058580526709, "grad_norm": 0.26953125, "learning_rate": 5.148731613732407e-05, "loss": 0.8922, "step": 12115 }, { "epoch": 0.6953927362441907, "grad_norm": 0.291015625, "learning_rate": 5.139977365866406e-05, "loss": 0.8938, "step": 12120 }, { "epoch": 0.6956796144357106, "grad_norm": 0.2578125, "learning_rate": 5.131227991642001e-05, "loss": 0.9463, "step": 12125 }, { "epoch": 0.6959664926272304, "grad_norm": 0.2734375, "learning_rate": 5.122483499833084e-05, "loss": 0.9507, "step": 12130 }, { "epoch": 0.6962533708187504, "grad_norm": 0.3359375, "learning_rate": 5.1137438992086506e-05, "loss": 0.9741, "step": 12135 }, { "epoch": 0.6965402490102702, "grad_norm": 0.263671875, "learning_rate": 5.1050091985327884e-05, "loss": 0.9182, "step": 12140 }, { "epoch": 0.6968271272017901, "grad_norm": 0.28125, "learning_rate": 5.096279406564686e-05, "loss": 0.9731, "step": 12145 }, { "epoch": 0.69711400539331, "grad_norm": 0.265625, "learning_rate": 5.087554532058586e-05, "loss": 0.9596, "step": 12150 }, { "epoch": 0.6974008835848299, "grad_norm": 0.259765625, "learning_rate": 5.078834583763817e-05, "loss": 0.867, "step": 12155 }, { "epoch": 0.6976877617763497, "grad_norm": 0.271484375, "learning_rate": 5.0701195704247595e-05, "loss": 0.9869, "step": 12160 }, { "epoch": 0.6979746399678697, "grad_norm": 0.275390625, "learning_rate": 5.061409500780854e-05, "loss": 0.9413, "step": 12165 }, { "epoch": 0.6982615181593895, "grad_norm": 0.255859375, "learning_rate": 5.052704383566577e-05, "loss": 0.9569, "step": 12170 }, { "epoch": 0.6985483963509094, "grad_norm": 0.25, "learning_rate": 5.044004227511436e-05, "loss": 0.949, "step": 12175 }, { "epoch": 0.6988352745424293, "grad_norm": 0.271484375, "learning_rate": 5.0353090413399705e-05, "loss": 0.9525, "step": 12180 }, { "epoch": 0.6991221527339492, "grad_norm": 0.26171875, "learning_rate": 5.02661883377173e-05, "loss": 0.942, "step": 12185 }, { "epoch": 0.699409030925469, "grad_norm": 0.2734375, "learning_rate": 5.017933613521273e-05, "loss": 0.9621, "step": 12190 }, { "epoch": 0.699695909116989, "grad_norm": 0.267578125, "learning_rate": 5.009253389298165e-05, "loss": 0.976, "step": 12195 }, { "epoch": 0.6999827873085088, "grad_norm": 0.271484375, "learning_rate": 5.0005781698069474e-05, "loss": 0.9563, "step": 12200 }, { "epoch": 0.7002696655000287, "grad_norm": 0.255859375, "learning_rate": 4.991907963747148e-05, "loss": 0.9092, "step": 12205 }, { "epoch": 0.7005565436915485, "grad_norm": 0.259765625, "learning_rate": 4.983242779813276e-05, "loss": 0.8882, "step": 12210 }, { "epoch": 0.7008434218830685, "grad_norm": 0.267578125, "learning_rate": 4.9745826266947934e-05, "loss": 0.9087, "step": 12215 }, { "epoch": 0.7011303000745883, "grad_norm": 0.255859375, "learning_rate": 4.965927513076123e-05, "loss": 0.9406, "step": 12220 }, { "epoch": 0.7014171782661082, "grad_norm": 0.271484375, "learning_rate": 4.957277447636629e-05, "loss": 0.9818, "step": 12225 }, { "epoch": 0.7017040564576281, "grad_norm": 0.2451171875, "learning_rate": 4.94863243905062e-05, "loss": 0.9895, "step": 12230 }, { "epoch": 0.701990934649148, "grad_norm": 0.23828125, "learning_rate": 4.939992495987327e-05, "loss": 0.9026, "step": 12235 }, { "epoch": 0.7022778128406678, "grad_norm": 0.279296875, "learning_rate": 4.931357627110902e-05, "loss": 0.9303, "step": 12240 }, { "epoch": 0.7025646910321878, "grad_norm": 0.408203125, "learning_rate": 4.9227278410804225e-05, "loss": 0.9506, "step": 12245 }, { "epoch": 0.7028515692237076, "grad_norm": 0.275390625, "learning_rate": 4.914103146549844e-05, "loss": 0.9547, "step": 12250 }, { "epoch": 0.7031384474152275, "grad_norm": 0.279296875, "learning_rate": 4.905483552168032e-05, "loss": 0.925, "step": 12255 }, { "epoch": 0.7034253256067474, "grad_norm": 0.25390625, "learning_rate": 4.896869066578741e-05, "loss": 0.971, "step": 12260 }, { "epoch": 0.7037122037982673, "grad_norm": 0.294921875, "learning_rate": 4.888259698420594e-05, "loss": 1.0059, "step": 12265 }, { "epoch": 0.7039990819897871, "grad_norm": 0.265625, "learning_rate": 4.879655456327083e-05, "loss": 0.9339, "step": 12270 }, { "epoch": 0.7042859601813071, "grad_norm": 0.279296875, "learning_rate": 4.8710563489265624e-05, "loss": 0.9611, "step": 12275 }, { "epoch": 0.7045728383728269, "grad_norm": 0.251953125, "learning_rate": 4.862462384842237e-05, "loss": 1.0341, "step": 12280 }, { "epoch": 0.7048597165643468, "grad_norm": 0.287109375, "learning_rate": 4.853873572692151e-05, "loss": 1.0407, "step": 12285 }, { "epoch": 0.7051465947558666, "grad_norm": 0.251953125, "learning_rate": 4.845289921089182e-05, "loss": 0.9762, "step": 12290 }, { "epoch": 0.7054334729473866, "grad_norm": 0.291015625, "learning_rate": 4.8367114386410486e-05, "loss": 1.0233, "step": 12295 }, { "epoch": 0.7057203511389064, "grad_norm": 0.279296875, "learning_rate": 4.8281381339502565e-05, "loss": 0.9128, "step": 12300 }, { "epoch": 0.7060072293304263, "grad_norm": 0.279296875, "learning_rate": 4.8195700156141386e-05, "loss": 1.0109, "step": 12305 }, { "epoch": 0.7062941075219462, "grad_norm": 0.263671875, "learning_rate": 4.8110070922248284e-05, "loss": 1.0136, "step": 12310 }, { "epoch": 0.7065809857134661, "grad_norm": 0.275390625, "learning_rate": 4.802449372369242e-05, "loss": 0.925, "step": 12315 }, { "epoch": 0.7068678639049859, "grad_norm": 0.255859375, "learning_rate": 4.79389686462908e-05, "loss": 0.9956, "step": 12320 }, { "epoch": 0.7071547420965059, "grad_norm": 0.27734375, "learning_rate": 4.785349577580817e-05, "loss": 0.9552, "step": 12325 }, { "epoch": 0.7074416202880257, "grad_norm": 0.267578125, "learning_rate": 4.77680751979569e-05, "loss": 0.9871, "step": 12330 }, { "epoch": 0.7077284984795456, "grad_norm": 0.271484375, "learning_rate": 4.768270699839691e-05, "loss": 0.9543, "step": 12335 }, { "epoch": 0.7080153766710655, "grad_norm": 0.267578125, "learning_rate": 4.759739126273569e-05, "loss": 0.9342, "step": 12340 }, { "epoch": 0.7083022548625854, "grad_norm": 0.26171875, "learning_rate": 4.751212807652806e-05, "loss": 0.9207, "step": 12345 }, { "epoch": 0.7085891330541052, "grad_norm": 0.263671875, "learning_rate": 4.742691752527606e-05, "loss": 0.9694, "step": 12350 }, { "epoch": 0.7088760112456252, "grad_norm": 0.2490234375, "learning_rate": 4.7341759694429014e-05, "loss": 0.8722, "step": 12355 }, { "epoch": 0.709162889437145, "grad_norm": 0.275390625, "learning_rate": 4.725665466938346e-05, "loss": 0.9764, "step": 12360 }, { "epoch": 0.7094497676286649, "grad_norm": 0.26953125, "learning_rate": 4.717160253548287e-05, "loss": 0.8684, "step": 12365 }, { "epoch": 0.7097366458201847, "grad_norm": 0.2578125, "learning_rate": 4.708660337801773e-05, "loss": 0.921, "step": 12370 }, { "epoch": 0.7100235240117047, "grad_norm": 0.318359375, "learning_rate": 4.700165728222538e-05, "loss": 0.9742, "step": 12375 }, { "epoch": 0.7103104022032245, "grad_norm": 0.271484375, "learning_rate": 4.6916764333289934e-05, "loss": 1.0064, "step": 12380 }, { "epoch": 0.7105972803947443, "grad_norm": 0.283203125, "learning_rate": 4.6831924616342217e-05, "loss": 0.9463, "step": 12385 }, { "epoch": 0.7108841585862643, "grad_norm": 0.30078125, "learning_rate": 4.674713821645975e-05, "loss": 0.8921, "step": 12390 }, { "epoch": 0.7111710367777841, "grad_norm": 0.271484375, "learning_rate": 4.6662405218666525e-05, "loss": 0.9079, "step": 12395 }, { "epoch": 0.711457914969304, "grad_norm": 0.259765625, "learning_rate": 4.657772570793289e-05, "loss": 0.9219, "step": 12400 }, { "epoch": 0.711744793160824, "grad_norm": 0.283203125, "learning_rate": 4.649309976917574e-05, "loss": 0.9785, "step": 12405 }, { "epoch": 0.7120316713523438, "grad_norm": 0.2470703125, "learning_rate": 4.6408527487258124e-05, "loss": 0.9714, "step": 12410 }, { "epoch": 0.7123185495438636, "grad_norm": 0.2421875, "learning_rate": 4.6324008946989314e-05, "loss": 0.9786, "step": 12415 }, { "epoch": 0.7126054277353836, "grad_norm": 0.255859375, "learning_rate": 4.62395442331247e-05, "loss": 0.9278, "step": 12420 }, { "epoch": 0.7128923059269034, "grad_norm": 0.259765625, "learning_rate": 4.615513343036567e-05, "loss": 0.9442, "step": 12425 }, { "epoch": 0.7131791841184233, "grad_norm": 0.279296875, "learning_rate": 4.607077662335959e-05, "loss": 0.9334, "step": 12430 }, { "epoch": 0.7134660623099432, "grad_norm": 0.263671875, "learning_rate": 4.59864738966996e-05, "loss": 0.9619, "step": 12435 }, { "epoch": 0.7137529405014631, "grad_norm": 0.25390625, "learning_rate": 4.590222533492473e-05, "loss": 1.0088, "step": 12440 }, { "epoch": 0.7140398186929829, "grad_norm": 0.28515625, "learning_rate": 4.581803102251966e-05, "loss": 1.0121, "step": 12445 }, { "epoch": 0.7143266968845028, "grad_norm": 0.26953125, "learning_rate": 4.573389104391449e-05, "loss": 0.9071, "step": 12450 }, { "epoch": 0.7146135750760227, "grad_norm": 0.251953125, "learning_rate": 4.564980548348511e-05, "loss": 0.9467, "step": 12455 }, { "epoch": 0.7149004532675426, "grad_norm": 0.263671875, "learning_rate": 4.556577442555265e-05, "loss": 0.9162, "step": 12460 }, { "epoch": 0.7151873314590624, "grad_norm": 0.2734375, "learning_rate": 4.5481797954383674e-05, "loss": 0.9336, "step": 12465 }, { "epoch": 0.7154742096505824, "grad_norm": 0.275390625, "learning_rate": 4.5397876154189956e-05, "loss": 0.973, "step": 12470 }, { "epoch": 0.7157610878421022, "grad_norm": 0.365234375, "learning_rate": 4.5314009109128464e-05, "loss": 0.9593, "step": 12475 }, { "epoch": 0.7160479660336221, "grad_norm": 0.232421875, "learning_rate": 4.5230196903301266e-05, "loss": 0.885, "step": 12480 }, { "epoch": 0.716334844225142, "grad_norm": 0.25390625, "learning_rate": 4.51464396207554e-05, "loss": 0.8933, "step": 12485 }, { "epoch": 0.7166217224166619, "grad_norm": 0.26953125, "learning_rate": 4.506273734548292e-05, "loss": 0.9838, "step": 12490 }, { "epoch": 0.7169086006081817, "grad_norm": 0.251953125, "learning_rate": 4.4979090161420645e-05, "loss": 0.9826, "step": 12495 }, { "epoch": 0.7171954787997017, "grad_norm": 0.251953125, "learning_rate": 4.489549815245008e-05, "loss": 0.9192, "step": 12500 }, { "epoch": 0.7174823569912215, "grad_norm": 0.263671875, "learning_rate": 4.4811961402397554e-05, "loss": 0.919, "step": 12505 }, { "epoch": 0.7177692351827414, "grad_norm": 0.265625, "learning_rate": 4.472847999503389e-05, "loss": 0.9777, "step": 12510 }, { "epoch": 0.7180561133742613, "grad_norm": 0.255859375, "learning_rate": 4.4645054014074426e-05, "loss": 0.931, "step": 12515 }, { "epoch": 0.7183429915657812, "grad_norm": 0.3046875, "learning_rate": 4.456168354317892e-05, "loss": 0.9206, "step": 12520 }, { "epoch": 0.718629869757301, "grad_norm": 0.279296875, "learning_rate": 4.4478368665951476e-05, "loss": 0.9084, "step": 12525 }, { "epoch": 0.7189167479488209, "grad_norm": 0.248046875, "learning_rate": 4.43951094659404e-05, "loss": 0.9422, "step": 12530 }, { "epoch": 0.7192036261403408, "grad_norm": 0.28515625, "learning_rate": 4.431190602663827e-05, "loss": 0.9395, "step": 12535 }, { "epoch": 0.7194905043318607, "grad_norm": 0.2734375, "learning_rate": 4.422875843148165e-05, "loss": 0.9637, "step": 12540 }, { "epoch": 0.7197773825233805, "grad_norm": 0.259765625, "learning_rate": 4.414566676385118e-05, "loss": 0.9839, "step": 12545 }, { "epoch": 0.7200642607149005, "grad_norm": 0.271484375, "learning_rate": 4.406263110707125e-05, "loss": 0.9328, "step": 12550 }, { "epoch": 0.7203511389064203, "grad_norm": 0.271484375, "learning_rate": 4.39796515444103e-05, "loss": 0.999, "step": 12555 }, { "epoch": 0.7206380170979402, "grad_norm": 0.26953125, "learning_rate": 4.3896728159080424e-05, "loss": 0.9816, "step": 12560 }, { "epoch": 0.7209248952894601, "grad_norm": 0.251953125, "learning_rate": 4.381386103423735e-05, "loss": 1.0244, "step": 12565 }, { "epoch": 0.72121177348098, "grad_norm": 0.29296875, "learning_rate": 4.373105025298041e-05, "loss": 0.9107, "step": 12570 }, { "epoch": 0.7214986516724998, "grad_norm": 0.27734375, "learning_rate": 4.364829589835245e-05, "loss": 0.9351, "step": 12575 }, { "epoch": 0.7217855298640198, "grad_norm": 0.26171875, "learning_rate": 4.356559805333971e-05, "loss": 0.9963, "step": 12580 }, { "epoch": 0.7220724080555396, "grad_norm": 0.259765625, "learning_rate": 4.348295680087181e-05, "loss": 0.9584, "step": 12585 }, { "epoch": 0.7223592862470595, "grad_norm": 0.30078125, "learning_rate": 4.340037222382156e-05, "loss": 0.9093, "step": 12590 }, { "epoch": 0.7226461644385794, "grad_norm": 0.279296875, "learning_rate": 4.3317844405005e-05, "loss": 0.9699, "step": 12595 }, { "epoch": 0.7229330426300993, "grad_norm": 0.26953125, "learning_rate": 4.323537342718111e-05, "loss": 0.9292, "step": 12600 }, { "epoch": 0.7232199208216191, "grad_norm": 0.26953125, "learning_rate": 4.315295937305207e-05, "loss": 0.8625, "step": 12605 }, { "epoch": 0.723506799013139, "grad_norm": 0.26953125, "learning_rate": 4.307060232526283e-05, "loss": 0.9948, "step": 12610 }, { "epoch": 0.7237936772046589, "grad_norm": 0.283203125, "learning_rate": 4.2988302366401254e-05, "loss": 0.9497, "step": 12615 }, { "epoch": 0.7240805553961788, "grad_norm": 0.2734375, "learning_rate": 4.2906059578997896e-05, "loss": 0.9112, "step": 12620 }, { "epoch": 0.7243674335876986, "grad_norm": 0.267578125, "learning_rate": 4.2823874045526026e-05, "loss": 0.9787, "step": 12625 }, { "epoch": 0.7246543117792186, "grad_norm": 0.2470703125, "learning_rate": 4.274174584840143e-05, "loss": 0.9606, "step": 12630 }, { "epoch": 0.7249411899707384, "grad_norm": 0.26171875, "learning_rate": 4.265967506998253e-05, "loss": 0.9875, "step": 12635 }, { "epoch": 0.7252280681622583, "grad_norm": 0.2421875, "learning_rate": 4.257766179257005e-05, "loss": 0.9702, "step": 12640 }, { "epoch": 0.7255149463537782, "grad_norm": 0.259765625, "learning_rate": 4.2495706098407085e-05, "loss": 0.9266, "step": 12645 }, { "epoch": 0.7258018245452981, "grad_norm": 0.271484375, "learning_rate": 4.2413808069678996e-05, "loss": 0.9093, "step": 12650 }, { "epoch": 0.7260887027368179, "grad_norm": 0.296875, "learning_rate": 4.2331967788513295e-05, "loss": 0.9782, "step": 12655 }, { "epoch": 0.7263755809283379, "grad_norm": 0.2470703125, "learning_rate": 4.225018533697962e-05, "loss": 0.9237, "step": 12660 }, { "epoch": 0.7266624591198577, "grad_norm": 0.26953125, "learning_rate": 4.216846079708958e-05, "loss": 1.0039, "step": 12665 }, { "epoch": 0.7269493373113776, "grad_norm": 0.275390625, "learning_rate": 4.2086794250796734e-05, "loss": 0.957, "step": 12670 }, { "epoch": 0.7272362155028975, "grad_norm": 0.255859375, "learning_rate": 4.2005185779996484e-05, "loss": 0.9602, "step": 12675 }, { "epoch": 0.7275230936944174, "grad_norm": 0.24609375, "learning_rate": 4.1923635466525936e-05, "loss": 0.9461, "step": 12680 }, { "epoch": 0.7278099718859372, "grad_norm": 0.275390625, "learning_rate": 4.1842143392164004e-05, "loss": 0.969, "step": 12685 }, { "epoch": 0.7280968500774571, "grad_norm": 0.259765625, "learning_rate": 4.17607096386311e-05, "loss": 0.9375, "step": 12690 }, { "epoch": 0.728383728268977, "grad_norm": 0.26171875, "learning_rate": 4.167933428758916e-05, "loss": 0.9121, "step": 12695 }, { "epoch": 0.7286706064604969, "grad_norm": 0.2470703125, "learning_rate": 4.159801742064158e-05, "loss": 0.8924, "step": 12700 }, { "epoch": 0.7289574846520167, "grad_norm": 0.3046875, "learning_rate": 4.151675911933308e-05, "loss": 0.9526, "step": 12705 }, { "epoch": 0.7292443628435367, "grad_norm": 0.26171875, "learning_rate": 4.143555946514964e-05, "loss": 0.9478, "step": 12710 }, { "epoch": 0.7295312410350565, "grad_norm": 0.263671875, "learning_rate": 4.135441853951857e-05, "loss": 0.9295, "step": 12715 }, { "epoch": 0.7298181192265764, "grad_norm": 0.25390625, "learning_rate": 4.1273336423808065e-05, "loss": 0.8896, "step": 12720 }, { "epoch": 0.7301049974180963, "grad_norm": 0.2734375, "learning_rate": 4.119231319932747e-05, "loss": 0.9014, "step": 12725 }, { "epoch": 0.7303918756096162, "grad_norm": 0.2470703125, "learning_rate": 4.1111348947327034e-05, "loss": 0.9335, "step": 12730 }, { "epoch": 0.730678753801136, "grad_norm": 0.259765625, "learning_rate": 4.1030443748997974e-05, "loss": 0.9627, "step": 12735 }, { "epoch": 0.730965631992656, "grad_norm": 0.271484375, "learning_rate": 4.094959768547214e-05, "loss": 0.9359, "step": 12740 }, { "epoch": 0.7312525101841758, "grad_norm": 0.255859375, "learning_rate": 4.086881083782216e-05, "loss": 0.9178, "step": 12745 }, { "epoch": 0.7315393883756957, "grad_norm": 0.265625, "learning_rate": 4.078808328706127e-05, "loss": 0.9652, "step": 12750 }, { "epoch": 0.7318262665672156, "grad_norm": 0.251953125, "learning_rate": 4.070741511414323e-05, "loss": 0.8837, "step": 12755 }, { "epoch": 0.7321131447587355, "grad_norm": 0.26171875, "learning_rate": 4.062680639996225e-05, "loss": 0.9901, "step": 12760 }, { "epoch": 0.7324000229502553, "grad_norm": 0.2734375, "learning_rate": 4.054625722535301e-05, "loss": 0.9346, "step": 12765 }, { "epoch": 0.7326869011417751, "grad_norm": 0.25, "learning_rate": 4.0465767671090304e-05, "loss": 0.8749, "step": 12770 }, { "epoch": 0.7329737793332951, "grad_norm": 0.25, "learning_rate": 4.038533781788924e-05, "loss": 0.9063, "step": 12775 }, { "epoch": 0.733260657524815, "grad_norm": 0.26171875, "learning_rate": 4.030496774640514e-05, "loss": 0.9602, "step": 12780 }, { "epoch": 0.7335475357163348, "grad_norm": 0.26953125, "learning_rate": 4.022465753723323e-05, "loss": 0.9149, "step": 12785 }, { "epoch": 0.7338344139078548, "grad_norm": 0.27734375, "learning_rate": 4.014440727090879e-05, "loss": 0.9483, "step": 12790 }, { "epoch": 0.7341212920993746, "grad_norm": 0.28125, "learning_rate": 4.0064217027906945e-05, "loss": 0.9309, "step": 12795 }, { "epoch": 0.7344081702908944, "grad_norm": 0.265625, "learning_rate": 3.998408688864267e-05, "loss": 0.8794, "step": 12800 }, { "epoch": 0.7346950484824144, "grad_norm": 0.255859375, "learning_rate": 3.990401693347065e-05, "loss": 0.9702, "step": 12805 }, { "epoch": 0.7349819266739342, "grad_norm": 0.27734375, "learning_rate": 3.982400724268516e-05, "loss": 0.9087, "step": 12810 }, { "epoch": 0.7352688048654541, "grad_norm": 0.2470703125, "learning_rate": 3.974405789652022e-05, "loss": 0.9429, "step": 12815 }, { "epoch": 0.735555683056974, "grad_norm": 0.263671875, "learning_rate": 3.96641689751491e-05, "loss": 0.9047, "step": 12820 }, { "epoch": 0.7358425612484939, "grad_norm": 0.251953125, "learning_rate": 3.95843405586846e-05, "loss": 0.973, "step": 12825 }, { "epoch": 0.7361294394400137, "grad_norm": 0.294921875, "learning_rate": 3.950457272717889e-05, "loss": 1.0204, "step": 12830 }, { "epoch": 0.7364163176315337, "grad_norm": 0.265625, "learning_rate": 3.9424865560623305e-05, "loss": 1.0043, "step": 12835 }, { "epoch": 0.7367031958230535, "grad_norm": 0.259765625, "learning_rate": 3.9345219138948365e-05, "loss": 0.9604, "step": 12840 }, { "epoch": 0.7369900740145734, "grad_norm": 0.3125, "learning_rate": 3.9265633542023684e-05, "loss": 0.9756, "step": 12845 }, { "epoch": 0.7372769522060932, "grad_norm": 0.310546875, "learning_rate": 3.9186108849657885e-05, "loss": 1.0192, "step": 12850 }, { "epoch": 0.7375638303976132, "grad_norm": 0.26171875, "learning_rate": 3.91066451415985e-05, "loss": 0.9264, "step": 12855 }, { "epoch": 0.737850708589133, "grad_norm": 0.271484375, "learning_rate": 3.9027242497531865e-05, "loss": 1.0112, "step": 12860 }, { "epoch": 0.7381375867806529, "grad_norm": 0.279296875, "learning_rate": 3.8947900997083255e-05, "loss": 0.9489, "step": 12865 }, { "epoch": 0.7384244649721728, "grad_norm": 0.271484375, "learning_rate": 3.8868620719816395e-05, "loss": 0.9954, "step": 12870 }, { "epoch": 0.7387113431636927, "grad_norm": 0.2734375, "learning_rate": 3.878940174523371e-05, "loss": 0.9128, "step": 12875 }, { "epoch": 0.7389982213552125, "grad_norm": 0.26953125, "learning_rate": 3.8710244152776264e-05, "loss": 0.9386, "step": 12880 }, { "epoch": 0.7392850995467325, "grad_norm": 0.2734375, "learning_rate": 3.8631148021823406e-05, "loss": 0.927, "step": 12885 }, { "epoch": 0.7395719777382523, "grad_norm": 0.2431640625, "learning_rate": 3.8552113431692925e-05, "loss": 0.8867, "step": 12890 }, { "epoch": 0.7398588559297722, "grad_norm": 0.265625, "learning_rate": 3.847314046164089e-05, "loss": 0.9352, "step": 12895 }, { "epoch": 0.7401457341212921, "grad_norm": 0.2734375, "learning_rate": 3.8394229190861567e-05, "loss": 0.9445, "step": 12900 }, { "epoch": 0.740432612312812, "grad_norm": 0.28515625, "learning_rate": 3.831537969848731e-05, "loss": 0.9806, "step": 12905 }, { "epoch": 0.7407194905043318, "grad_norm": 0.296875, "learning_rate": 3.823659206358865e-05, "loss": 0.9799, "step": 12910 }, { "epoch": 0.7410063686958518, "grad_norm": 0.27734375, "learning_rate": 3.8157866365174e-05, "loss": 0.9522, "step": 12915 }, { "epoch": 0.7412932468873716, "grad_norm": 0.255859375, "learning_rate": 3.807920268218961e-05, "loss": 0.9113, "step": 12920 }, { "epoch": 0.7415801250788915, "grad_norm": 0.26953125, "learning_rate": 3.800060109351957e-05, "loss": 0.9087, "step": 12925 }, { "epoch": 0.7418670032704113, "grad_norm": 0.26171875, "learning_rate": 3.792206167798582e-05, "loss": 0.89, "step": 12930 }, { "epoch": 0.7421538814619313, "grad_norm": 0.259765625, "learning_rate": 3.784358451434783e-05, "loss": 0.9056, "step": 12935 }, { "epoch": 0.7424407596534511, "grad_norm": 0.353515625, "learning_rate": 3.776516968130266e-05, "loss": 0.9744, "step": 12940 }, { "epoch": 0.742727637844971, "grad_norm": 0.271484375, "learning_rate": 3.768681725748488e-05, "loss": 0.925, "step": 12945 }, { "epoch": 0.7430145160364909, "grad_norm": 0.2890625, "learning_rate": 3.760852732146649e-05, "loss": 0.88, "step": 12950 }, { "epoch": 0.7433013942280108, "grad_norm": 0.267578125, "learning_rate": 3.753029995175677e-05, "loss": 0.9313, "step": 12955 }, { "epoch": 0.7435882724195306, "grad_norm": 0.2734375, "learning_rate": 3.7452135226802385e-05, "loss": 0.9604, "step": 12960 }, { "epoch": 0.7438751506110506, "grad_norm": 0.27734375, "learning_rate": 3.7374033224987084e-05, "loss": 0.9876, "step": 12965 }, { "epoch": 0.7441620288025704, "grad_norm": 0.279296875, "learning_rate": 3.729599402463162e-05, "loss": 0.9691, "step": 12970 }, { "epoch": 0.7444489069940903, "grad_norm": 0.2734375, "learning_rate": 3.7218017703993994e-05, "loss": 0.9409, "step": 12975 }, { "epoch": 0.7447357851856102, "grad_norm": 0.3046875, "learning_rate": 3.714010434126899e-05, "loss": 0.9514, "step": 12980 }, { "epoch": 0.7450226633771301, "grad_norm": 0.267578125, "learning_rate": 3.706225401458831e-05, "loss": 0.9097, "step": 12985 }, { "epoch": 0.7453095415686499, "grad_norm": 0.25, "learning_rate": 3.6984466802020436e-05, "loss": 0.9851, "step": 12990 }, { "epoch": 0.7455964197601699, "grad_norm": 0.287109375, "learning_rate": 3.690674278157056e-05, "loss": 0.9324, "step": 12995 }, { "epoch": 0.7458832979516897, "grad_norm": 0.26171875, "learning_rate": 3.6829082031180496e-05, "loss": 0.9267, "step": 13000 }, { "epoch": 0.7461701761432096, "grad_norm": 0.259765625, "learning_rate": 3.6751484628728594e-05, "loss": 0.9357, "step": 13005 }, { "epoch": 0.7464570543347294, "grad_norm": 0.2734375, "learning_rate": 3.6673950652029766e-05, "loss": 0.9016, "step": 13010 }, { "epoch": 0.7467439325262494, "grad_norm": 0.263671875, "learning_rate": 3.659648017883526e-05, "loss": 0.9252, "step": 13015 }, { "epoch": 0.7470308107177692, "grad_norm": 0.255859375, "learning_rate": 3.651907328683254e-05, "loss": 0.896, "step": 13020 }, { "epoch": 0.7473176889092891, "grad_norm": 0.275390625, "learning_rate": 3.6441730053645506e-05, "loss": 0.9848, "step": 13025 }, { "epoch": 0.747604567100809, "grad_norm": 0.251953125, "learning_rate": 3.6364450556834097e-05, "loss": 0.9676, "step": 13030 }, { "epoch": 0.7478914452923289, "grad_norm": 0.26953125, "learning_rate": 3.628723487389437e-05, "loss": 0.9408, "step": 13035 }, { "epoch": 0.7481783234838487, "grad_norm": 0.26171875, "learning_rate": 3.621008308225837e-05, "loss": 0.8881, "step": 13040 }, { "epoch": 0.7484652016753687, "grad_norm": 0.267578125, "learning_rate": 3.61329952592941e-05, "loss": 0.9676, "step": 13045 }, { "epoch": 0.7487520798668885, "grad_norm": 0.28515625, "learning_rate": 3.605597148230541e-05, "loss": 0.9577, "step": 13050 }, { "epoch": 0.7490389580584084, "grad_norm": 0.275390625, "learning_rate": 3.597901182853185e-05, "loss": 0.9754, "step": 13055 }, { "epoch": 0.7493258362499283, "grad_norm": 0.26953125, "learning_rate": 3.590211637514884e-05, "loss": 0.9921, "step": 13060 }, { "epoch": 0.7496127144414482, "grad_norm": 0.263671875, "learning_rate": 3.582528519926729e-05, "loss": 0.9823, "step": 13065 }, { "epoch": 0.749899592632968, "grad_norm": 0.255859375, "learning_rate": 3.574851837793357e-05, "loss": 0.9203, "step": 13070 }, { "epoch": 0.750186470824488, "grad_norm": 0.25390625, "learning_rate": 3.567181598812973e-05, "loss": 0.9569, "step": 13075 }, { "epoch": 0.7504733490160078, "grad_norm": 0.265625, "learning_rate": 3.559517810677308e-05, "loss": 0.9337, "step": 13080 }, { "epoch": 0.7507602272075277, "grad_norm": 0.255859375, "learning_rate": 3.551860481071624e-05, "loss": 0.8669, "step": 13085 }, { "epoch": 0.7510471053990475, "grad_norm": 0.263671875, "learning_rate": 3.544209617674707e-05, "loss": 0.9714, "step": 13090 }, { "epoch": 0.7513339835905675, "grad_norm": 0.255859375, "learning_rate": 3.536565228158864e-05, "loss": 0.8875, "step": 13095 }, { "epoch": 0.7516208617820873, "grad_norm": 0.26953125, "learning_rate": 3.528927320189903e-05, "loss": 0.8935, "step": 13100 }, { "epoch": 0.7519077399736072, "grad_norm": 0.26171875, "learning_rate": 3.521295901427132e-05, "loss": 0.9711, "step": 13105 }, { "epoch": 0.7521946181651271, "grad_norm": 0.279296875, "learning_rate": 3.5136709795233626e-05, "loss": 0.9369, "step": 13110 }, { "epoch": 0.752481496356647, "grad_norm": 0.2431640625, "learning_rate": 3.506052562124883e-05, "loss": 0.875, "step": 13115 }, { "epoch": 0.7527683745481668, "grad_norm": 0.271484375, "learning_rate": 3.498440656871449e-05, "loss": 0.9301, "step": 13120 }, { "epoch": 0.7530552527396868, "grad_norm": 0.26953125, "learning_rate": 3.4908352713963077e-05, "loss": 0.9551, "step": 13125 }, { "epoch": 0.7533421309312066, "grad_norm": 0.251953125, "learning_rate": 3.483236413326151e-05, "loss": 0.902, "step": 13130 }, { "epoch": 0.7536290091227265, "grad_norm": 0.255859375, "learning_rate": 3.475644090281133e-05, "loss": 0.9567, "step": 13135 }, { "epoch": 0.7539158873142464, "grad_norm": 0.265625, "learning_rate": 3.468058309874851e-05, "loss": 0.9256, "step": 13140 }, { "epoch": 0.7542027655057663, "grad_norm": 0.25390625, "learning_rate": 3.460479079714343e-05, "loss": 0.9339, "step": 13145 }, { "epoch": 0.7544896436972861, "grad_norm": 0.255859375, "learning_rate": 3.452906407400074e-05, "loss": 0.9711, "step": 13150 }, { "epoch": 0.7547765218888061, "grad_norm": 0.2451171875, "learning_rate": 3.4453403005259444e-05, "loss": 0.9775, "step": 13155 }, { "epoch": 0.7550634000803259, "grad_norm": 0.2890625, "learning_rate": 3.43778076667926e-05, "loss": 0.9288, "step": 13160 }, { "epoch": 0.7553502782718458, "grad_norm": 0.2490234375, "learning_rate": 3.43022781344074e-05, "loss": 0.8841, "step": 13165 }, { "epoch": 0.7556371564633656, "grad_norm": 0.265625, "learning_rate": 3.4226814483844946e-05, "loss": 0.9771, "step": 13170 }, { "epoch": 0.7559240346548856, "grad_norm": 0.265625, "learning_rate": 3.4151416790780456e-05, "loss": 0.9575, "step": 13175 }, { "epoch": 0.7562109128464054, "grad_norm": 0.259765625, "learning_rate": 3.4076085130822866e-05, "loss": 0.9298, "step": 13180 }, { "epoch": 0.7564977910379252, "grad_norm": 0.26171875, "learning_rate": 3.400081957951492e-05, "loss": 0.8998, "step": 13185 }, { "epoch": 0.7567846692294452, "grad_norm": 0.26171875, "learning_rate": 3.392562021233311e-05, "loss": 0.902, "step": 13190 }, { "epoch": 0.757071547420965, "grad_norm": 0.27734375, "learning_rate": 3.38504871046875e-05, "loss": 0.9792, "step": 13195 }, { "epoch": 0.7573584256124849, "grad_norm": 0.291015625, "learning_rate": 3.3775420331921736e-05, "loss": 0.9386, "step": 13200 }, { "epoch": 0.7576453038040049, "grad_norm": 0.271484375, "learning_rate": 3.3700419969312994e-05, "loss": 0.932, "step": 13205 }, { "epoch": 0.7579321819955247, "grad_norm": 0.26171875, "learning_rate": 3.362548609207177e-05, "loss": 0.9206, "step": 13210 }, { "epoch": 0.7582190601870445, "grad_norm": 0.259765625, "learning_rate": 3.355061877534192e-05, "loss": 0.9754, "step": 13215 }, { "epoch": 0.7585059383785645, "grad_norm": 0.2578125, "learning_rate": 3.3475818094200585e-05, "loss": 0.9253, "step": 13220 }, { "epoch": 0.7587928165700843, "grad_norm": 0.2734375, "learning_rate": 3.340108412365803e-05, "loss": 0.9826, "step": 13225 }, { "epoch": 0.7590796947616042, "grad_norm": 0.265625, "learning_rate": 3.332641693865766e-05, "loss": 0.9125, "step": 13230 }, { "epoch": 0.7593665729531242, "grad_norm": 0.259765625, "learning_rate": 3.3251816614075884e-05, "loss": 0.9322, "step": 13235 }, { "epoch": 0.759653451144644, "grad_norm": 0.263671875, "learning_rate": 3.317728322472209e-05, "loss": 0.9654, "step": 13240 }, { "epoch": 0.7599403293361638, "grad_norm": 0.283203125, "learning_rate": 3.310281684533852e-05, "loss": 0.9085, "step": 13245 }, { "epoch": 0.7602272075276837, "grad_norm": 0.2734375, "learning_rate": 3.302841755060018e-05, "loss": 0.9488, "step": 13250 }, { "epoch": 0.7605140857192036, "grad_norm": 0.26953125, "learning_rate": 3.2954085415114946e-05, "loss": 0.922, "step": 13255 }, { "epoch": 0.7608009639107235, "grad_norm": 0.283203125, "learning_rate": 3.2879820513423184e-05, "loss": 0.9653, "step": 13260 }, { "epoch": 0.7610878421022433, "grad_norm": 0.25390625, "learning_rate": 3.2805622919997934e-05, "loss": 0.8962, "step": 13265 }, { "epoch": 0.7613747202937633, "grad_norm": 0.265625, "learning_rate": 3.273149270924468e-05, "loss": 0.9213, "step": 13270 }, { "epoch": 0.7616615984852831, "grad_norm": 0.265625, "learning_rate": 3.2657429955501394e-05, "loss": 0.903, "step": 13275 }, { "epoch": 0.761948476676803, "grad_norm": 0.263671875, "learning_rate": 3.258343473303832e-05, "loss": 0.9081, "step": 13280 }, { "epoch": 0.762235354868323, "grad_norm": 0.2578125, "learning_rate": 3.2509507116058134e-05, "loss": 0.9147, "step": 13285 }, { "epoch": 0.7625222330598428, "grad_norm": 0.291015625, "learning_rate": 3.243564717869552e-05, "loss": 0.9349, "step": 13290 }, { "epoch": 0.7628091112513626, "grad_norm": 0.255859375, "learning_rate": 3.2361854995017416e-05, "loss": 0.9296, "step": 13295 }, { "epoch": 0.7630959894428826, "grad_norm": 0.2734375, "learning_rate": 3.228813063902276e-05, "loss": 0.8695, "step": 13300 }, { "epoch": 0.7633828676344024, "grad_norm": 0.259765625, "learning_rate": 3.2214474184642574e-05, "loss": 0.9618, "step": 13305 }, { "epoch": 0.7636697458259223, "grad_norm": 0.275390625, "learning_rate": 3.2140885705739674e-05, "loss": 0.9476, "step": 13310 }, { "epoch": 0.7639566240174422, "grad_norm": 0.259765625, "learning_rate": 3.2067365276108754e-05, "loss": 0.9865, "step": 13315 }, { "epoch": 0.7642435022089621, "grad_norm": 0.287109375, "learning_rate": 3.199391296947627e-05, "loss": 0.9788, "step": 13320 }, { "epoch": 0.7645303804004819, "grad_norm": 0.263671875, "learning_rate": 3.192052885950034e-05, "loss": 0.9549, "step": 13325 }, { "epoch": 0.7648172585920018, "grad_norm": 0.2412109375, "learning_rate": 3.1847213019770716e-05, "loss": 0.9153, "step": 13330 }, { "epoch": 0.7651041367835217, "grad_norm": 0.255859375, "learning_rate": 3.1773965523808754e-05, "loss": 1.0291, "step": 13335 }, { "epoch": 0.7653910149750416, "grad_norm": 0.263671875, "learning_rate": 3.1700786445067135e-05, "loss": 0.9117, "step": 13340 }, { "epoch": 0.7656778931665614, "grad_norm": 0.267578125, "learning_rate": 3.162767585692997e-05, "loss": 0.9118, "step": 13345 }, { "epoch": 0.7659647713580814, "grad_norm": 0.25390625, "learning_rate": 3.155463383271282e-05, "loss": 0.9031, "step": 13350 }, { "epoch": 0.7662516495496012, "grad_norm": 0.263671875, "learning_rate": 3.148166044566233e-05, "loss": 0.9564, "step": 13355 }, { "epoch": 0.7665385277411211, "grad_norm": 0.275390625, "learning_rate": 3.14087557689564e-05, "loss": 0.9529, "step": 13360 }, { "epoch": 0.766825405932641, "grad_norm": 0.298828125, "learning_rate": 3.133591987570399e-05, "loss": 0.9671, "step": 13365 }, { "epoch": 0.7671122841241609, "grad_norm": 0.2578125, "learning_rate": 3.1263152838945095e-05, "loss": 0.9353, "step": 13370 }, { "epoch": 0.7673991623156807, "grad_norm": 0.267578125, "learning_rate": 3.1190454731650675e-05, "loss": 0.9341, "step": 13375 }, { "epoch": 0.7676860405072007, "grad_norm": 0.263671875, "learning_rate": 3.111782562672251e-05, "loss": 0.9831, "step": 13380 }, { "epoch": 0.7679729186987205, "grad_norm": 0.28125, "learning_rate": 3.104526559699333e-05, "loss": 0.9083, "step": 13385 }, { "epoch": 0.7682597968902404, "grad_norm": 0.267578125, "learning_rate": 3.0972774715226406e-05, "loss": 0.9044, "step": 13390 }, { "epoch": 0.7685466750817603, "grad_norm": 0.271484375, "learning_rate": 3.090035305411575e-05, "loss": 0.9071, "step": 13395 }, { "epoch": 0.7688335532732802, "grad_norm": 0.271484375, "learning_rate": 3.0828000686286027e-05, "loss": 0.9348, "step": 13400 }, { "epoch": 0.7691204314648, "grad_norm": 0.251953125, "learning_rate": 3.075571768429233e-05, "loss": 0.9485, "step": 13405 }, { "epoch": 0.76940730965632, "grad_norm": 0.26171875, "learning_rate": 3.06835041206202e-05, "loss": 0.9058, "step": 13410 }, { "epoch": 0.7696941878478398, "grad_norm": 0.2734375, "learning_rate": 3.0611360067685576e-05, "loss": 0.9473, "step": 13415 }, { "epoch": 0.7699810660393597, "grad_norm": 0.2490234375, "learning_rate": 3.0539285597834675e-05, "loss": 0.9342, "step": 13420 }, { "epoch": 0.7702679442308795, "grad_norm": 0.25390625, "learning_rate": 3.0467280783343944e-05, "loss": 0.9746, "step": 13425 }, { "epoch": 0.7705548224223995, "grad_norm": 0.25, "learning_rate": 3.0395345696419918e-05, "loss": 0.941, "step": 13430 }, { "epoch": 0.7708417006139193, "grad_norm": 0.259765625, "learning_rate": 3.0323480409199378e-05, "loss": 0.9427, "step": 13435 }, { "epoch": 0.7711285788054392, "grad_norm": 0.267578125, "learning_rate": 3.0251684993748886e-05, "loss": 0.9854, "step": 13440 }, { "epoch": 0.7714154569969591, "grad_norm": 0.2734375, "learning_rate": 3.017995952206506e-05, "loss": 1.0031, "step": 13445 }, { "epoch": 0.771702335188479, "grad_norm": 0.26171875, "learning_rate": 3.010830406607441e-05, "loss": 0.9125, "step": 13450 }, { "epoch": 0.7719892133799988, "grad_norm": 0.27734375, "learning_rate": 3.003671869763317e-05, "loss": 1.0298, "step": 13455 }, { "epoch": 0.7722760915715188, "grad_norm": 0.26953125, "learning_rate": 2.9965203488527317e-05, "loss": 0.9565, "step": 13460 }, { "epoch": 0.7725629697630386, "grad_norm": 0.259765625, "learning_rate": 2.9893758510472436e-05, "loss": 0.9615, "step": 13465 }, { "epoch": 0.7728498479545585, "grad_norm": 0.279296875, "learning_rate": 2.982238383511373e-05, "loss": 0.97, "step": 13470 }, { "epoch": 0.7731367261460784, "grad_norm": 0.27734375, "learning_rate": 2.975107953402585e-05, "loss": 0.9835, "step": 13475 }, { "epoch": 0.7734236043375983, "grad_norm": 0.28515625, "learning_rate": 2.967984567871297e-05, "loss": 0.9662, "step": 13480 }, { "epoch": 0.7737104825291181, "grad_norm": 0.275390625, "learning_rate": 2.960868234060855e-05, "loss": 0.9375, "step": 13485 }, { "epoch": 0.7739973607206381, "grad_norm": 0.26953125, "learning_rate": 2.9537589591075298e-05, "loss": 0.9713, "step": 13490 }, { "epoch": 0.7742842389121579, "grad_norm": 0.259765625, "learning_rate": 2.9466567501405185e-05, "loss": 0.8821, "step": 13495 }, { "epoch": 0.7745711171036778, "grad_norm": 0.27734375, "learning_rate": 2.939561614281936e-05, "loss": 1.0189, "step": 13500 }, { "epoch": 0.7748579952951976, "grad_norm": 0.28515625, "learning_rate": 2.9324735586468e-05, "loss": 0.9269, "step": 13505 }, { "epoch": 0.7751448734867176, "grad_norm": 0.259765625, "learning_rate": 2.9253925903430267e-05, "loss": 0.9318, "step": 13510 }, { "epoch": 0.7754317516782374, "grad_norm": 0.263671875, "learning_rate": 2.9183187164714288e-05, "loss": 0.9497, "step": 13515 }, { "epoch": 0.7757186298697573, "grad_norm": 0.26953125, "learning_rate": 2.9112519441257e-05, "loss": 0.8565, "step": 13520 }, { "epoch": 0.7760055080612772, "grad_norm": 0.259765625, "learning_rate": 2.9041922803924158e-05, "loss": 0.9198, "step": 13525 }, { "epoch": 0.7762923862527971, "grad_norm": 0.259765625, "learning_rate": 2.8971397323510275e-05, "loss": 0.9625, "step": 13530 }, { "epoch": 0.7765792644443169, "grad_norm": 0.2734375, "learning_rate": 2.890094307073845e-05, "loss": 0.8976, "step": 13535 }, { "epoch": 0.7768661426358369, "grad_norm": 0.3046875, "learning_rate": 2.883056011626032e-05, "loss": 0.9786, "step": 13540 }, { "epoch": 0.7771530208273567, "grad_norm": 0.26171875, "learning_rate": 2.8760248530656063e-05, "loss": 0.96, "step": 13545 }, { "epoch": 0.7774398990188766, "grad_norm": 0.259765625, "learning_rate": 2.8690008384434363e-05, "loss": 0.903, "step": 13550 }, { "epoch": 0.7777267772103965, "grad_norm": 0.267578125, "learning_rate": 2.861983974803215e-05, "loss": 0.9419, "step": 13555 }, { "epoch": 0.7780136554019164, "grad_norm": 0.267578125, "learning_rate": 2.8549742691814705e-05, "loss": 0.9764, "step": 13560 }, { "epoch": 0.7783005335934362, "grad_norm": 0.263671875, "learning_rate": 2.8479717286075502e-05, "loss": 0.9253, "step": 13565 }, { "epoch": 0.7785874117849562, "grad_norm": 0.265625, "learning_rate": 2.8409763601036188e-05, "loss": 0.9614, "step": 13570 }, { "epoch": 0.778874289976476, "grad_norm": 0.267578125, "learning_rate": 2.8339881706846427e-05, "loss": 1.0581, "step": 13575 }, { "epoch": 0.7791611681679959, "grad_norm": 0.267578125, "learning_rate": 2.8270071673584008e-05, "loss": 1.0102, "step": 13580 }, { "epoch": 0.7794480463595157, "grad_norm": 0.478515625, "learning_rate": 2.82003335712546e-05, "loss": 0.9411, "step": 13585 }, { "epoch": 0.7797349245510357, "grad_norm": 0.265625, "learning_rate": 2.8130667469791626e-05, "loss": 0.858, "step": 13590 }, { "epoch": 0.7800218027425555, "grad_norm": 0.2578125, "learning_rate": 2.8061073439056507e-05, "loss": 0.8923, "step": 13595 }, { "epoch": 0.7803086809340753, "grad_norm": 0.275390625, "learning_rate": 2.799155154883826e-05, "loss": 0.9096, "step": 13600 }, { "epoch": 0.7805955591255953, "grad_norm": 0.271484375, "learning_rate": 2.7922101868853577e-05, "loss": 0.9464, "step": 13605 }, { "epoch": 0.7808824373171152, "grad_norm": 0.251953125, "learning_rate": 2.785272446874677e-05, "loss": 0.9081, "step": 13610 }, { "epoch": 0.781169315508635, "grad_norm": 0.298828125, "learning_rate": 2.778341941808965e-05, "loss": 0.9735, "step": 13615 }, { "epoch": 0.781456193700155, "grad_norm": 0.275390625, "learning_rate": 2.771418678638147e-05, "loss": 0.9514, "step": 13620 }, { "epoch": 0.7817430718916748, "grad_norm": 0.28125, "learning_rate": 2.7645026643048855e-05, "loss": 0.9684, "step": 13625 }, { "epoch": 0.7820299500831946, "grad_norm": 0.2734375, "learning_rate": 2.7575939057445786e-05, "loss": 0.9782, "step": 13630 }, { "epoch": 0.7823168282747146, "grad_norm": 0.2578125, "learning_rate": 2.750692409885347e-05, "loss": 0.9351, "step": 13635 }, { "epoch": 0.7826037064662345, "grad_norm": 0.2734375, "learning_rate": 2.7437981836480166e-05, "loss": 0.9496, "step": 13640 }, { "epoch": 0.7828905846577543, "grad_norm": 0.2578125, "learning_rate": 2.736911233946141e-05, "loss": 0.8933, "step": 13645 }, { "epoch": 0.7831774628492743, "grad_norm": 0.2578125, "learning_rate": 2.730031567685968e-05, "loss": 0.9402, "step": 13650 }, { "epoch": 0.7834643410407941, "grad_norm": 0.287109375, "learning_rate": 2.723159191766439e-05, "loss": 0.9332, "step": 13655 }, { "epoch": 0.783751219232314, "grad_norm": 0.248046875, "learning_rate": 2.716294113079192e-05, "loss": 0.8929, "step": 13660 }, { "epoch": 0.7840380974238338, "grad_norm": 0.267578125, "learning_rate": 2.7094363385085398e-05, "loss": 1.0431, "step": 13665 }, { "epoch": 0.7843249756153537, "grad_norm": 0.291015625, "learning_rate": 2.7025858749314758e-05, "loss": 0.9892, "step": 13670 }, { "epoch": 0.7846118538068736, "grad_norm": 0.265625, "learning_rate": 2.6957427292176572e-05, "loss": 0.8728, "step": 13675 }, { "epoch": 0.7848987319983934, "grad_norm": 0.279296875, "learning_rate": 2.6889069082294114e-05, "loss": 0.9862, "step": 13680 }, { "epoch": 0.7851856101899134, "grad_norm": 0.28125, "learning_rate": 2.6820784188217164e-05, "loss": 0.9781, "step": 13685 }, { "epoch": 0.7854724883814332, "grad_norm": 0.2578125, "learning_rate": 2.675257267842185e-05, "loss": 0.9384, "step": 13690 }, { "epoch": 0.7857593665729531, "grad_norm": 0.26171875, "learning_rate": 2.668443462131094e-05, "loss": 0.9279, "step": 13695 }, { "epoch": 0.786046244764473, "grad_norm": 0.271484375, "learning_rate": 2.6616370085213394e-05, "loss": 1.0333, "step": 13700 }, { "epoch": 0.7863331229559929, "grad_norm": 0.26953125, "learning_rate": 2.6548379138384483e-05, "loss": 0.9812, "step": 13705 }, { "epoch": 0.7866200011475127, "grad_norm": 0.291015625, "learning_rate": 2.648046184900568e-05, "loss": 0.8958, "step": 13710 }, { "epoch": 0.7869068793390327, "grad_norm": 0.263671875, "learning_rate": 2.6412618285184587e-05, "loss": 0.9758, "step": 13715 }, { "epoch": 0.7871937575305525, "grad_norm": 0.275390625, "learning_rate": 2.6344848514954856e-05, "loss": 0.9736, "step": 13720 }, { "epoch": 0.7874806357220724, "grad_norm": 0.26171875, "learning_rate": 2.6277152606276234e-05, "loss": 0.9742, "step": 13725 }, { "epoch": 0.7877675139135923, "grad_norm": 0.267578125, "learning_rate": 2.6209530627034295e-05, "loss": 0.9122, "step": 13730 }, { "epoch": 0.7880543921051122, "grad_norm": 0.26953125, "learning_rate": 2.614198264504053e-05, "loss": 0.9363, "step": 13735 }, { "epoch": 0.788341270296632, "grad_norm": 0.25390625, "learning_rate": 2.607450872803213e-05, "loss": 1.0, "step": 13740 }, { "epoch": 0.7886281484881519, "grad_norm": 0.2421875, "learning_rate": 2.600710894367219e-05, "loss": 0.8937, "step": 13745 }, { "epoch": 0.7889150266796718, "grad_norm": 0.267578125, "learning_rate": 2.5939783359549306e-05, "loss": 0.9911, "step": 13750 }, { "epoch": 0.7892019048711917, "grad_norm": 0.2470703125, "learning_rate": 2.5872532043177743e-05, "loss": 0.9314, "step": 13755 }, { "epoch": 0.7894887830627115, "grad_norm": 0.267578125, "learning_rate": 2.580535506199727e-05, "loss": 0.9753, "step": 13760 }, { "epoch": 0.7897756612542315, "grad_norm": 0.287109375, "learning_rate": 2.5738252483373117e-05, "loss": 0.9208, "step": 13765 }, { "epoch": 0.7900625394457513, "grad_norm": 0.275390625, "learning_rate": 2.567122437459586e-05, "loss": 0.9189, "step": 13770 }, { "epoch": 0.7903494176372712, "grad_norm": 0.265625, "learning_rate": 2.5604270802881503e-05, "loss": 0.9401, "step": 13775 }, { "epoch": 0.7906362958287911, "grad_norm": 0.265625, "learning_rate": 2.5537391835371217e-05, "loss": 1.05, "step": 13780 }, { "epoch": 0.790923174020311, "grad_norm": 0.259765625, "learning_rate": 2.5470587539131362e-05, "loss": 0.9251, "step": 13785 }, { "epoch": 0.7912100522118308, "grad_norm": 0.2890625, "learning_rate": 2.5403857981153457e-05, "loss": 0.9218, "step": 13790 }, { "epoch": 0.7914969304033508, "grad_norm": 0.255859375, "learning_rate": 2.5337203228354035e-05, "loss": 0.9932, "step": 13795 }, { "epoch": 0.7917838085948706, "grad_norm": 0.255859375, "learning_rate": 2.527062334757464e-05, "loss": 1.03, "step": 13800 }, { "epoch": 0.7920706867863905, "grad_norm": 0.26953125, "learning_rate": 2.5204118405581724e-05, "loss": 0.9819, "step": 13805 }, { "epoch": 0.7923575649779104, "grad_norm": 0.26953125, "learning_rate": 2.513768846906659e-05, "loss": 1.0, "step": 13810 }, { "epoch": 0.7926444431694303, "grad_norm": 0.26171875, "learning_rate": 2.507133360464533e-05, "loss": 0.9784, "step": 13815 }, { "epoch": 0.7929313213609501, "grad_norm": 0.2578125, "learning_rate": 2.500505387885872e-05, "loss": 0.9411, "step": 13820 }, { "epoch": 0.79321819955247, "grad_norm": 0.26953125, "learning_rate": 2.493884935817228e-05, "loss": 0.9736, "step": 13825 }, { "epoch": 0.7935050777439899, "grad_norm": 0.265625, "learning_rate": 2.487272010897601e-05, "loss": 0.9139, "step": 13830 }, { "epoch": 0.7937919559355098, "grad_norm": 0.263671875, "learning_rate": 2.4806666197584483e-05, "loss": 0.8969, "step": 13835 }, { "epoch": 0.7940788341270296, "grad_norm": 0.25390625, "learning_rate": 2.474068769023671e-05, "loss": 0.914, "step": 13840 }, { "epoch": 0.7943657123185496, "grad_norm": 0.283203125, "learning_rate": 2.4674784653096083e-05, "loss": 0.9689, "step": 13845 }, { "epoch": 0.7946525905100694, "grad_norm": 0.25, "learning_rate": 2.460895715225028e-05, "loss": 1.0079, "step": 13850 }, { "epoch": 0.7949394687015893, "grad_norm": 0.283203125, "learning_rate": 2.4543205253711355e-05, "loss": 0.9578, "step": 13855 }, { "epoch": 0.7952263468931092, "grad_norm": 0.26953125, "learning_rate": 2.447752902341538e-05, "loss": 0.9168, "step": 13860 }, { "epoch": 0.7955132250846291, "grad_norm": 0.2578125, "learning_rate": 2.441192852722265e-05, "loss": 0.9152, "step": 13865 }, { "epoch": 0.7958001032761489, "grad_norm": 0.275390625, "learning_rate": 2.4346403830917464e-05, "loss": 0.914, "step": 13870 }, { "epoch": 0.7960869814676689, "grad_norm": 0.28515625, "learning_rate": 2.4280955000208184e-05, "loss": 0.9085, "step": 13875 }, { "epoch": 0.7963738596591887, "grad_norm": 0.2578125, "learning_rate": 2.421558210072702e-05, "loss": 0.9926, "step": 13880 }, { "epoch": 0.7966607378507086, "grad_norm": 0.25, "learning_rate": 2.4150285198030066e-05, "loss": 0.9283, "step": 13885 }, { "epoch": 0.7969476160422285, "grad_norm": 0.275390625, "learning_rate": 2.4085064357597197e-05, "loss": 0.9488, "step": 13890 }, { "epoch": 0.7972344942337484, "grad_norm": 0.25390625, "learning_rate": 2.4019919644832023e-05, "loss": 0.9676, "step": 13895 }, { "epoch": 0.7975213724252682, "grad_norm": 0.291015625, "learning_rate": 2.395485112506177e-05, "loss": 1.0144, "step": 13900 }, { "epoch": 0.7978082506167881, "grad_norm": 0.259765625, "learning_rate": 2.3889858863537396e-05, "loss": 0.9616, "step": 13905 }, { "epoch": 0.798095128808308, "grad_norm": 0.271484375, "learning_rate": 2.382494292543319e-05, "loss": 0.9342, "step": 13910 }, { "epoch": 0.7983820069998279, "grad_norm": 0.25, "learning_rate": 2.376010337584701e-05, "loss": 0.955, "step": 13915 }, { "epoch": 0.7986688851913477, "grad_norm": 0.2451171875, "learning_rate": 2.369534027980015e-05, "loss": 0.9769, "step": 13920 }, { "epoch": 0.7989557633828677, "grad_norm": 0.26171875, "learning_rate": 2.363065370223716e-05, "loss": 0.939, "step": 13925 }, { "epoch": 0.7992426415743875, "grad_norm": 0.275390625, "learning_rate": 2.3566043708025874e-05, "loss": 1.0056, "step": 13930 }, { "epoch": 0.7995295197659074, "grad_norm": 0.28125, "learning_rate": 2.3501510361957367e-05, "loss": 0.9896, "step": 13935 }, { "epoch": 0.7998163979574273, "grad_norm": 0.26953125, "learning_rate": 2.3437053728745807e-05, "loss": 0.9721, "step": 13940 }, { "epoch": 0.8001032761489472, "grad_norm": 0.26953125, "learning_rate": 2.337267387302845e-05, "loss": 0.9788, "step": 13945 }, { "epoch": 0.800390154340467, "grad_norm": 0.2890625, "learning_rate": 2.3308370859365523e-05, "loss": 0.9456, "step": 13950 }, { "epoch": 0.800677032531987, "grad_norm": 0.25390625, "learning_rate": 2.324414475224034e-05, "loss": 0.9169, "step": 13955 }, { "epoch": 0.8009639107235068, "grad_norm": 0.259765625, "learning_rate": 2.317999561605888e-05, "loss": 0.9856, "step": 13960 }, { "epoch": 0.8012507889150267, "grad_norm": 0.26171875, "learning_rate": 2.311592351515004e-05, "loss": 0.9333, "step": 13965 }, { "epoch": 0.8015376671065466, "grad_norm": 0.263671875, "learning_rate": 2.3051928513765542e-05, "loss": 0.9138, "step": 13970 }, { "epoch": 0.8018245452980665, "grad_norm": 0.2734375, "learning_rate": 2.2988010676079674e-05, "loss": 0.8617, "step": 13975 }, { "epoch": 0.8021114234895863, "grad_norm": 0.279296875, "learning_rate": 2.292417006618939e-05, "loss": 0.9493, "step": 13980 }, { "epoch": 0.8023983016811062, "grad_norm": 0.318359375, "learning_rate": 2.2860406748114195e-05, "loss": 1.0224, "step": 13985 }, { "epoch": 0.8026851798726261, "grad_norm": 0.259765625, "learning_rate": 2.279672078579609e-05, "loss": 0.997, "step": 13990 }, { "epoch": 0.802972058064146, "grad_norm": 0.2734375, "learning_rate": 2.2733112243099507e-05, "loss": 0.9755, "step": 13995 }, { "epoch": 0.8032589362556658, "grad_norm": 0.283203125, "learning_rate": 2.2669581183811196e-05, "loss": 0.9347, "step": 14000 }, { "epoch": 0.8035458144471858, "grad_norm": 0.259765625, "learning_rate": 2.2606127671640333e-05, "loss": 0.9454, "step": 14005 }, { "epoch": 0.8038326926387056, "grad_norm": 0.267578125, "learning_rate": 2.254275177021816e-05, "loss": 0.8952, "step": 14010 }, { "epoch": 0.8041195708302254, "grad_norm": 0.255859375, "learning_rate": 2.247945354309817e-05, "loss": 0.9592, "step": 14015 }, { "epoch": 0.8044064490217454, "grad_norm": 0.2734375, "learning_rate": 2.2416233053756032e-05, "loss": 0.9797, "step": 14020 }, { "epoch": 0.8046933272132653, "grad_norm": 0.2412109375, "learning_rate": 2.2353090365589348e-05, "loss": 0.9639, "step": 14025 }, { "epoch": 0.8049802054047851, "grad_norm": 0.255859375, "learning_rate": 2.2290025541917768e-05, "loss": 0.9881, "step": 14030 }, { "epoch": 0.8052670835963051, "grad_norm": 0.267578125, "learning_rate": 2.2227038645982833e-05, "loss": 0.9101, "step": 14035 }, { "epoch": 0.8055539617878249, "grad_norm": 0.265625, "learning_rate": 2.2164129740947935e-05, "loss": 0.929, "step": 14040 }, { "epoch": 0.8058408399793447, "grad_norm": 0.30078125, "learning_rate": 2.210129888989827e-05, "loss": 0.9931, "step": 14045 }, { "epoch": 0.8061277181708647, "grad_norm": 0.283203125, "learning_rate": 2.2038546155840735e-05, "loss": 0.9698, "step": 14050 }, { "epoch": 0.8064145963623846, "grad_norm": 0.265625, "learning_rate": 2.1975871601703977e-05, "loss": 0.9716, "step": 14055 }, { "epoch": 0.8067014745539044, "grad_norm": 0.310546875, "learning_rate": 2.191327529033812e-05, "loss": 0.9704, "step": 14060 }, { "epoch": 0.8069883527454242, "grad_norm": 0.24609375, "learning_rate": 2.1850757284514877e-05, "loss": 0.9215, "step": 14065 }, { "epoch": 0.8072752309369442, "grad_norm": 0.26953125, "learning_rate": 2.178831764692749e-05, "loss": 0.9289, "step": 14070 }, { "epoch": 0.807562109128464, "grad_norm": 0.2734375, "learning_rate": 2.1725956440190542e-05, "loss": 0.9023, "step": 14075 }, { "epoch": 0.8078489873199839, "grad_norm": 0.25, "learning_rate": 2.1663673726840006e-05, "loss": 0.9553, "step": 14080 }, { "epoch": 0.8081358655115038, "grad_norm": 0.251953125, "learning_rate": 2.160146956933311e-05, "loss": 0.9908, "step": 14085 }, { "epoch": 0.8084227437030237, "grad_norm": 0.25390625, "learning_rate": 2.1539344030048337e-05, "loss": 0.9315, "step": 14090 }, { "epoch": 0.8087096218945435, "grad_norm": 0.3046875, "learning_rate": 2.1477297171285282e-05, "loss": 0.8911, "step": 14095 }, { "epoch": 0.8089965000860635, "grad_norm": 0.265625, "learning_rate": 2.141532905526472e-05, "loss": 0.8708, "step": 14100 }, { "epoch": 0.8092833782775833, "grad_norm": 0.287109375, "learning_rate": 2.1353439744128434e-05, "loss": 0.9597, "step": 14105 }, { "epoch": 0.8095702564691032, "grad_norm": 0.28515625, "learning_rate": 2.1291629299939097e-05, "loss": 1.0855, "step": 14110 }, { "epoch": 0.8098571346606231, "grad_norm": 0.255859375, "learning_rate": 2.1229897784680365e-05, "loss": 0.9425, "step": 14115 }, { "epoch": 0.810144012852143, "grad_norm": 0.267578125, "learning_rate": 2.116824526025679e-05, "loss": 0.8716, "step": 14120 }, { "epoch": 0.8104308910436628, "grad_norm": 0.251953125, "learning_rate": 2.1106671788493636e-05, "loss": 1.0174, "step": 14125 }, { "epoch": 0.8107177692351828, "grad_norm": 0.275390625, "learning_rate": 2.104517743113693e-05, "loss": 0.9206, "step": 14130 }, { "epoch": 0.8110046474267026, "grad_norm": 0.271484375, "learning_rate": 2.0983762249853344e-05, "loss": 0.9666, "step": 14135 }, { "epoch": 0.8112915256182225, "grad_norm": 0.283203125, "learning_rate": 2.092242630623016e-05, "loss": 0.9522, "step": 14140 }, { "epoch": 0.8115784038097423, "grad_norm": 0.2734375, "learning_rate": 2.086116966177516e-05, "loss": 0.9182, "step": 14145 }, { "epoch": 0.8118652820012623, "grad_norm": 0.267578125, "learning_rate": 2.079999237791672e-05, "loss": 0.9693, "step": 14150 }, { "epoch": 0.8121521601927821, "grad_norm": 0.26171875, "learning_rate": 2.0738894516003536e-05, "loss": 0.9591, "step": 14155 }, { "epoch": 0.812439038384302, "grad_norm": 0.267578125, "learning_rate": 2.067787613730462e-05, "loss": 0.961, "step": 14160 }, { "epoch": 0.8127259165758219, "grad_norm": 0.296875, "learning_rate": 2.0616937303009408e-05, "loss": 0.9661, "step": 14165 }, { "epoch": 0.8130127947673418, "grad_norm": 0.267578125, "learning_rate": 2.055607807422748e-05, "loss": 1.0048, "step": 14170 }, { "epoch": 0.8132996729588616, "grad_norm": 0.28125, "learning_rate": 2.0495298511988602e-05, "loss": 0.9954, "step": 14175 }, { "epoch": 0.8135865511503816, "grad_norm": 0.2734375, "learning_rate": 2.0434598677242656e-05, "loss": 0.9824, "step": 14180 }, { "epoch": 0.8138734293419014, "grad_norm": 0.2734375, "learning_rate": 2.037397863085957e-05, "loss": 1.0136, "step": 14185 }, { "epoch": 0.8141603075334213, "grad_norm": 0.25390625, "learning_rate": 2.0313438433629263e-05, "loss": 0.9508, "step": 14190 }, { "epoch": 0.8144471857249412, "grad_norm": 0.3046875, "learning_rate": 2.0252978146261557e-05, "loss": 0.9738, "step": 14195 }, { "epoch": 0.8147340639164611, "grad_norm": 0.275390625, "learning_rate": 2.0192597829386217e-05, "loss": 1.0262, "step": 14200 }, { "epoch": 0.8150209421079809, "grad_norm": 0.26953125, "learning_rate": 2.0132297543552757e-05, "loss": 0.9072, "step": 14205 }, { "epoch": 0.8153078202995009, "grad_norm": 0.2490234375, "learning_rate": 2.0072077349230357e-05, "loss": 0.8962, "step": 14210 }, { "epoch": 0.8155946984910207, "grad_norm": 0.267578125, "learning_rate": 2.0011937306808048e-05, "loss": 0.9166, "step": 14215 }, { "epoch": 0.8158815766825406, "grad_norm": 0.265625, "learning_rate": 1.9951877476594382e-05, "loss": 0.8863, "step": 14220 }, { "epoch": 0.8161684548740604, "grad_norm": 0.2734375, "learning_rate": 1.9891897918817472e-05, "loss": 0.9848, "step": 14225 }, { "epoch": 0.8164553330655804, "grad_norm": 0.259765625, "learning_rate": 1.9831998693624964e-05, "loss": 0.9138, "step": 14230 }, { "epoch": 0.8167422112571002, "grad_norm": 0.251953125, "learning_rate": 1.977217986108393e-05, "loss": 0.9695, "step": 14235 }, { "epoch": 0.8170290894486201, "grad_norm": 0.279296875, "learning_rate": 1.9712441481180833e-05, "loss": 0.9367, "step": 14240 }, { "epoch": 0.81731596764014, "grad_norm": 0.263671875, "learning_rate": 1.9652783613821435e-05, "loss": 0.9687, "step": 14245 }, { "epoch": 0.8176028458316599, "grad_norm": 0.2734375, "learning_rate": 1.9593206318830815e-05, "loss": 0.9353, "step": 14250 }, { "epoch": 0.8178897240231797, "grad_norm": 0.255859375, "learning_rate": 1.9533709655953235e-05, "loss": 0.9681, "step": 14255 }, { "epoch": 0.8181766022146997, "grad_norm": 0.25390625, "learning_rate": 1.9474293684851984e-05, "loss": 0.9158, "step": 14260 }, { "epoch": 0.8184634804062195, "grad_norm": 0.25, "learning_rate": 1.9414958465109635e-05, "loss": 0.9798, "step": 14265 }, { "epoch": 0.8187503585977394, "grad_norm": 0.271484375, "learning_rate": 1.9355704056227632e-05, "loss": 1.0429, "step": 14270 }, { "epoch": 0.8190372367892593, "grad_norm": 0.2890625, "learning_rate": 1.9296530517626445e-05, "loss": 0.8955, "step": 14275 }, { "epoch": 0.8193241149807792, "grad_norm": 0.2578125, "learning_rate": 1.9237437908645417e-05, "loss": 0.9144, "step": 14280 }, { "epoch": 0.819610993172299, "grad_norm": 0.26171875, "learning_rate": 1.917842628854275e-05, "loss": 0.9887, "step": 14285 }, { "epoch": 0.819897871363819, "grad_norm": 0.263671875, "learning_rate": 1.9119495716495417e-05, "loss": 0.9999, "step": 14290 }, { "epoch": 0.8201847495553388, "grad_norm": 0.2578125, "learning_rate": 1.9060646251599157e-05, "loss": 0.9903, "step": 14295 }, { "epoch": 0.8204716277468587, "grad_norm": 0.265625, "learning_rate": 1.900187795286834e-05, "loss": 0.9522, "step": 14300 }, { "epoch": 0.8207585059383785, "grad_norm": 0.28125, "learning_rate": 1.8943190879235972e-05, "loss": 0.9792, "step": 14305 }, { "epoch": 0.8210453841298985, "grad_norm": 0.265625, "learning_rate": 1.8884585089553498e-05, "loss": 0.961, "step": 14310 }, { "epoch": 0.8213322623214183, "grad_norm": 0.2734375, "learning_rate": 1.8826060642591005e-05, "loss": 0.9782, "step": 14315 }, { "epoch": 0.8216191405129382, "grad_norm": 0.2412109375, "learning_rate": 1.8767617597036925e-05, "loss": 0.8703, "step": 14320 }, { "epoch": 0.8219060187044581, "grad_norm": 0.302734375, "learning_rate": 1.8709256011498076e-05, "loss": 0.9159, "step": 14325 }, { "epoch": 0.822192896895978, "grad_norm": 0.2578125, "learning_rate": 1.865097594449958e-05, "loss": 0.8634, "step": 14330 }, { "epoch": 0.8224797750874978, "grad_norm": 0.267578125, "learning_rate": 1.8592777454484835e-05, "loss": 0.9098, "step": 14335 }, { "epoch": 0.8227666532790178, "grad_norm": 0.26171875, "learning_rate": 1.8534660599815368e-05, "loss": 0.9266, "step": 14340 }, { "epoch": 0.8230535314705376, "grad_norm": 0.27734375, "learning_rate": 1.8476625438770944e-05, "loss": 0.9914, "step": 14345 }, { "epoch": 0.8233404096620575, "grad_norm": 0.259765625, "learning_rate": 1.8418672029549355e-05, "loss": 0.977, "step": 14350 }, { "epoch": 0.8236272878535774, "grad_norm": 0.2578125, "learning_rate": 1.836080043026638e-05, "loss": 0.9636, "step": 14355 }, { "epoch": 0.8239141660450973, "grad_norm": 0.2578125, "learning_rate": 1.8303010698955804e-05, "loss": 0.9023, "step": 14360 }, { "epoch": 0.8242010442366171, "grad_norm": 0.271484375, "learning_rate": 1.8245302893569295e-05, "loss": 0.9805, "step": 14365 }, { "epoch": 0.8244879224281371, "grad_norm": 0.265625, "learning_rate": 1.818767707197636e-05, "loss": 0.8912, "step": 14370 }, { "epoch": 0.8247748006196569, "grad_norm": 0.251953125, "learning_rate": 1.8130133291964323e-05, "loss": 0.9084, "step": 14375 }, { "epoch": 0.8250616788111768, "grad_norm": 0.25390625, "learning_rate": 1.80726716112382e-05, "loss": 0.8483, "step": 14380 }, { "epoch": 0.8253485570026966, "grad_norm": 0.265625, "learning_rate": 1.80152920874207e-05, "loss": 0.8901, "step": 14385 }, { "epoch": 0.8256354351942166, "grad_norm": 0.259765625, "learning_rate": 1.7957994778052112e-05, "loss": 0.8935, "step": 14390 }, { "epoch": 0.8259223133857364, "grad_norm": 0.27734375, "learning_rate": 1.7900779740590344e-05, "loss": 1.0121, "step": 14395 }, { "epoch": 0.8262091915772563, "grad_norm": 0.263671875, "learning_rate": 1.784364703241076e-05, "loss": 0.952, "step": 14400 }, { "epoch": 0.8264960697687762, "grad_norm": 0.2578125, "learning_rate": 1.778659671080616e-05, "loss": 0.9186, "step": 14405 }, { "epoch": 0.826782947960296, "grad_norm": 0.27734375, "learning_rate": 1.7729628832986722e-05, "loss": 1.0093, "step": 14410 }, { "epoch": 0.8270698261518159, "grad_norm": 0.27734375, "learning_rate": 1.7672743456079976e-05, "loss": 0.9438, "step": 14415 }, { "epoch": 0.8273567043433359, "grad_norm": 0.2890625, "learning_rate": 1.761594063713068e-05, "loss": 0.965, "step": 14420 }, { "epoch": 0.8276435825348557, "grad_norm": 0.271484375, "learning_rate": 1.75592204331009e-05, "loss": 0.9737, "step": 14425 }, { "epoch": 0.8279304607263755, "grad_norm": 0.263671875, "learning_rate": 1.7502582900869702e-05, "loss": 0.8891, "step": 14430 }, { "epoch": 0.8282173389178955, "grad_norm": 0.265625, "learning_rate": 1.744602809723337e-05, "loss": 0.9375, "step": 14435 }, { "epoch": 0.8285042171094154, "grad_norm": 0.26953125, "learning_rate": 1.7389556078905144e-05, "loss": 1.0189, "step": 14440 }, { "epoch": 0.8287910953009352, "grad_norm": 0.251953125, "learning_rate": 1.7333166902515363e-05, "loss": 0.8845, "step": 14445 }, { "epoch": 0.8290779734924552, "grad_norm": 0.265625, "learning_rate": 1.727686062461118e-05, "loss": 0.9842, "step": 14450 }, { "epoch": 0.829364851683975, "grad_norm": 0.263671875, "learning_rate": 1.722063730165665e-05, "loss": 0.867, "step": 14455 }, { "epoch": 0.8296517298754948, "grad_norm": 0.259765625, "learning_rate": 1.7164496990032665e-05, "loss": 0.9246, "step": 14460 }, { "epoch": 0.8299386080670147, "grad_norm": 0.2578125, "learning_rate": 1.7108439746036842e-05, "loss": 0.8562, "step": 14465 }, { "epoch": 0.8302254862585347, "grad_norm": 0.263671875, "learning_rate": 1.7052465625883494e-05, "loss": 0.9346, "step": 14470 }, { "epoch": 0.8305123644500545, "grad_norm": 0.26953125, "learning_rate": 1.699657468570367e-05, "loss": 0.9476, "step": 14475 }, { "epoch": 0.8307992426415743, "grad_norm": 0.255859375, "learning_rate": 1.694076698154484e-05, "loss": 0.9324, "step": 14480 }, { "epoch": 0.8310861208330943, "grad_norm": 0.25390625, "learning_rate": 1.6885042569371146e-05, "loss": 1.0023, "step": 14485 }, { "epoch": 0.8313729990246141, "grad_norm": 0.26953125, "learning_rate": 1.68294015050631e-05, "loss": 0.9223, "step": 14490 }, { "epoch": 0.831659877216134, "grad_norm": 0.265625, "learning_rate": 1.677384384441776e-05, "loss": 0.8871, "step": 14495 }, { "epoch": 0.831946755407654, "grad_norm": 0.3046875, "learning_rate": 1.6718369643148435e-05, "loss": 0.908, "step": 14500 }, { "epoch": 0.8322336335991738, "grad_norm": 0.263671875, "learning_rate": 1.6662978956884778e-05, "loss": 0.9022, "step": 14505 }, { "epoch": 0.8325205117906936, "grad_norm": 0.259765625, "learning_rate": 1.66076718411727e-05, "loss": 0.889, "step": 14510 }, { "epoch": 0.8328073899822136, "grad_norm": 0.263671875, "learning_rate": 1.6552448351474304e-05, "loss": 0.9465, "step": 14515 }, { "epoch": 0.8330942681737334, "grad_norm": 0.265625, "learning_rate": 1.649730854316779e-05, "loss": 0.9313, "step": 14520 }, { "epoch": 0.8333811463652533, "grad_norm": 0.263671875, "learning_rate": 1.644225247154756e-05, "loss": 0.8882, "step": 14525 }, { "epoch": 0.8336680245567732, "grad_norm": 0.263671875, "learning_rate": 1.6387280191823896e-05, "loss": 0.9555, "step": 14530 }, { "epoch": 0.8339549027482931, "grad_norm": 0.2578125, "learning_rate": 1.6332391759123123e-05, "loss": 0.9272, "step": 14535 }, { "epoch": 0.8342417809398129, "grad_norm": 0.283203125, "learning_rate": 1.6277587228487533e-05, "loss": 1.0969, "step": 14540 }, { "epoch": 0.8345286591313328, "grad_norm": 0.263671875, "learning_rate": 1.6222866654875213e-05, "loss": 1.0165, "step": 14545 }, { "epoch": 0.8348155373228527, "grad_norm": 0.298828125, "learning_rate": 1.6168230093160062e-05, "loss": 1.0093, "step": 14550 }, { "epoch": 0.8351024155143726, "grad_norm": 0.25, "learning_rate": 1.611367759813176e-05, "loss": 0.9414, "step": 14555 }, { "epoch": 0.8353892937058924, "grad_norm": 0.244140625, "learning_rate": 1.6059209224495676e-05, "loss": 0.9592, "step": 14560 }, { "epoch": 0.8356761718974124, "grad_norm": 0.25, "learning_rate": 1.6004825026872806e-05, "loss": 0.9276, "step": 14565 }, { "epoch": 0.8359630500889322, "grad_norm": 0.26171875, "learning_rate": 1.5950525059799714e-05, "loss": 0.9925, "step": 14570 }, { "epoch": 0.8362499282804521, "grad_norm": 0.28515625, "learning_rate": 1.5896309377728624e-05, "loss": 0.9718, "step": 14575 }, { "epoch": 0.836536806471972, "grad_norm": 0.271484375, "learning_rate": 1.5842178035027044e-05, "loss": 0.9949, "step": 14580 }, { "epoch": 0.8368236846634919, "grad_norm": 0.279296875, "learning_rate": 1.5788131085978032e-05, "loss": 0.9233, "step": 14585 }, { "epoch": 0.8371105628550117, "grad_norm": 0.259765625, "learning_rate": 1.573416858478003e-05, "loss": 0.9577, "step": 14590 }, { "epoch": 0.8373974410465317, "grad_norm": 0.263671875, "learning_rate": 1.568029058554672e-05, "loss": 0.9751, "step": 14595 }, { "epoch": 0.8376843192380515, "grad_norm": 0.2890625, "learning_rate": 1.5626497142307084e-05, "loss": 0.9637, "step": 14600 }, { "epoch": 0.8379711974295714, "grad_norm": 0.36328125, "learning_rate": 1.5572788309005315e-05, "loss": 0.9275, "step": 14605 }, { "epoch": 0.8382580756210913, "grad_norm": 0.294921875, "learning_rate": 1.5519164139500743e-05, "loss": 0.9635, "step": 14610 }, { "epoch": 0.8385449538126112, "grad_norm": 0.251953125, "learning_rate": 1.5465624687567816e-05, "loss": 0.9621, "step": 14615 }, { "epoch": 0.838831832004131, "grad_norm": 0.263671875, "learning_rate": 1.5412170006895986e-05, "loss": 0.8839, "step": 14620 }, { "epoch": 0.8391187101956509, "grad_norm": 0.26953125, "learning_rate": 1.5358800151089803e-05, "loss": 0.9448, "step": 14625 }, { "epoch": 0.8394055883871708, "grad_norm": 0.263671875, "learning_rate": 1.5305515173668594e-05, "loss": 0.9798, "step": 14630 }, { "epoch": 0.8396924665786907, "grad_norm": 0.267578125, "learning_rate": 1.5252315128066663e-05, "loss": 0.9255, "step": 14635 }, { "epoch": 0.8399793447702105, "grad_norm": 0.27734375, "learning_rate": 1.519920006763319e-05, "loss": 1.0368, "step": 14640 }, { "epoch": 0.8402662229617305, "grad_norm": 0.279296875, "learning_rate": 1.5146170045632035e-05, "loss": 0.9312, "step": 14645 }, { "epoch": 0.8405531011532503, "grad_norm": 0.255859375, "learning_rate": 1.5093225115241838e-05, "loss": 0.8995, "step": 14650 }, { "epoch": 0.8408399793447702, "grad_norm": 0.259765625, "learning_rate": 1.5040365329555895e-05, "loss": 0.9117, "step": 14655 }, { "epoch": 0.8411268575362901, "grad_norm": 0.283203125, "learning_rate": 1.4987590741582102e-05, "loss": 0.9783, "step": 14660 }, { "epoch": 0.84141373572781, "grad_norm": 0.2431640625, "learning_rate": 1.493490140424293e-05, "loss": 0.8676, "step": 14665 }, { "epoch": 0.8417006139193298, "grad_norm": 0.2470703125, "learning_rate": 1.4882297370375387e-05, "loss": 0.891, "step": 14670 }, { "epoch": 0.8419874921108498, "grad_norm": 0.294921875, "learning_rate": 1.4829778692730944e-05, "loss": 0.9868, "step": 14675 }, { "epoch": 0.8422743703023696, "grad_norm": 0.28515625, "learning_rate": 1.4777345423975375e-05, "loss": 0.9805, "step": 14680 }, { "epoch": 0.8425612484938895, "grad_norm": 0.265625, "learning_rate": 1.4724997616688907e-05, "loss": 0.9984, "step": 14685 }, { "epoch": 0.8428481266854094, "grad_norm": 0.255859375, "learning_rate": 1.4672735323366061e-05, "loss": 0.9138, "step": 14690 }, { "epoch": 0.8431350048769293, "grad_norm": 0.267578125, "learning_rate": 1.4620558596415578e-05, "loss": 0.8917, "step": 14695 }, { "epoch": 0.8434218830684491, "grad_norm": 0.2734375, "learning_rate": 1.4568467488160386e-05, "loss": 0.8793, "step": 14700 }, { "epoch": 0.843708761259969, "grad_norm": 0.265625, "learning_rate": 1.4516462050837564e-05, "loss": 0.9941, "step": 14705 }, { "epoch": 0.8439956394514889, "grad_norm": 0.275390625, "learning_rate": 1.4464542336598274e-05, "loss": 0.9185, "step": 14710 }, { "epoch": 0.8442825176430088, "grad_norm": 0.267578125, "learning_rate": 1.4412708397507724e-05, "loss": 0.9379, "step": 14715 }, { "epoch": 0.8445693958345286, "grad_norm": 0.279296875, "learning_rate": 1.4360960285545133e-05, "loss": 0.9381, "step": 14720 }, { "epoch": 0.8448562740260486, "grad_norm": 0.275390625, "learning_rate": 1.4309298052603626e-05, "loss": 0.9562, "step": 14725 }, { "epoch": 0.8451431522175684, "grad_norm": 0.271484375, "learning_rate": 1.4257721750490127e-05, "loss": 0.9234, "step": 14730 }, { "epoch": 0.8454300304090883, "grad_norm": 0.2490234375, "learning_rate": 1.4206231430925553e-05, "loss": 0.9905, "step": 14735 }, { "epoch": 0.8457169086006082, "grad_norm": 0.259765625, "learning_rate": 1.4154827145544492e-05, "loss": 0.9166, "step": 14740 }, { "epoch": 0.8460037867921281, "grad_norm": 0.25390625, "learning_rate": 1.410350894589525e-05, "loss": 0.9698, "step": 14745 }, { "epoch": 0.8462906649836479, "grad_norm": 0.271484375, "learning_rate": 1.4052276883439864e-05, "loss": 0.9123, "step": 14750 }, { "epoch": 0.8465775431751679, "grad_norm": 0.2734375, "learning_rate": 1.4001131009553936e-05, "loss": 0.9739, "step": 14755 }, { "epoch": 0.8468644213666877, "grad_norm": 0.25390625, "learning_rate": 1.3950071375526685e-05, "loss": 1.0007, "step": 14760 }, { "epoch": 0.8471512995582076, "grad_norm": 0.26953125, "learning_rate": 1.3899098032560787e-05, "loss": 0.8987, "step": 14765 }, { "epoch": 0.8474381777497275, "grad_norm": 0.263671875, "learning_rate": 1.3848211031772473e-05, "loss": 0.9215, "step": 14770 }, { "epoch": 0.8477250559412474, "grad_norm": 0.25390625, "learning_rate": 1.3797410424191337e-05, "loss": 0.9238, "step": 14775 }, { "epoch": 0.8480119341327672, "grad_norm": 0.265625, "learning_rate": 1.3746696260760295e-05, "loss": 0.916, "step": 14780 }, { "epoch": 0.848298812324287, "grad_norm": 0.283203125, "learning_rate": 1.3696068592335676e-05, "loss": 0.9393, "step": 14785 }, { "epoch": 0.848585690515807, "grad_norm": 0.244140625, "learning_rate": 1.3645527469686992e-05, "loss": 0.9651, "step": 14790 }, { "epoch": 0.8488725687073269, "grad_norm": 0.259765625, "learning_rate": 1.3595072943497011e-05, "loss": 0.9529, "step": 14795 }, { "epoch": 0.8491594468988467, "grad_norm": 0.25390625, "learning_rate": 1.3544705064361629e-05, "loss": 0.9006, "step": 14800 }, { "epoch": 0.8494463250903667, "grad_norm": 0.26171875, "learning_rate": 1.3494423882789874e-05, "loss": 0.9573, "step": 14805 }, { "epoch": 0.8497332032818865, "grad_norm": 0.314453125, "learning_rate": 1.3444229449203827e-05, "loss": 1.0288, "step": 14810 }, { "epoch": 0.8500200814734064, "grad_norm": 0.259765625, "learning_rate": 1.3394121813938554e-05, "loss": 0.9184, "step": 14815 }, { "epoch": 0.8503069596649263, "grad_norm": 0.267578125, "learning_rate": 1.3344101027242161e-05, "loss": 0.9045, "step": 14820 }, { "epoch": 0.8505938378564462, "grad_norm": 0.255859375, "learning_rate": 1.3294167139275593e-05, "loss": 0.9593, "step": 14825 }, { "epoch": 0.850880716047966, "grad_norm": 0.26171875, "learning_rate": 1.3244320200112592e-05, "loss": 0.9066, "step": 14830 }, { "epoch": 0.851167594239486, "grad_norm": 0.2490234375, "learning_rate": 1.3194560259739863e-05, "loss": 0.9236, "step": 14835 }, { "epoch": 0.8514544724310058, "grad_norm": 0.25, "learning_rate": 1.3144887368056757e-05, "loss": 0.923, "step": 14840 }, { "epoch": 0.8517413506225257, "grad_norm": 0.2578125, "learning_rate": 1.3095301574875363e-05, "loss": 0.9765, "step": 14845 }, { "epoch": 0.8520282288140456, "grad_norm": 0.279296875, "learning_rate": 1.3045802929920414e-05, "loss": 0.9518, "step": 14850 }, { "epoch": 0.8523151070055655, "grad_norm": 0.3125, "learning_rate": 1.2996391482829273e-05, "loss": 0.8879, "step": 14855 }, { "epoch": 0.8526019851970853, "grad_norm": 0.26953125, "learning_rate": 1.2947067283151837e-05, "loss": 0.8637, "step": 14860 }, { "epoch": 0.8528888633886051, "grad_norm": 0.26171875, "learning_rate": 1.289783038035055e-05, "loss": 1.0178, "step": 14865 }, { "epoch": 0.8531757415801251, "grad_norm": 0.29296875, "learning_rate": 1.2848680823800275e-05, "loss": 0.8906, "step": 14870 }, { "epoch": 0.853462619771645, "grad_norm": 0.2578125, "learning_rate": 1.2799618662788315e-05, "loss": 0.9234, "step": 14875 }, { "epoch": 0.8537494979631648, "grad_norm": 0.283203125, "learning_rate": 1.2750643946514252e-05, "loss": 0.9215, "step": 14880 }, { "epoch": 0.8540363761546848, "grad_norm": 0.259765625, "learning_rate": 1.2701756724090108e-05, "loss": 0.893, "step": 14885 }, { "epoch": 0.8543232543462046, "grad_norm": 0.26171875, "learning_rate": 1.2652957044540082e-05, "loss": 0.935, "step": 14890 }, { "epoch": 0.8546101325377244, "grad_norm": 0.2470703125, "learning_rate": 1.2604244956800593e-05, "loss": 0.9196, "step": 14895 }, { "epoch": 0.8548970107292444, "grad_norm": 0.255859375, "learning_rate": 1.2555620509720233e-05, "loss": 0.9674, "step": 14900 }, { "epoch": 0.8551838889207642, "grad_norm": 0.2890625, "learning_rate": 1.2507083752059723e-05, "loss": 0.9899, "step": 14905 }, { "epoch": 0.8554707671122841, "grad_norm": 0.31640625, "learning_rate": 1.2458634732491781e-05, "loss": 0.9745, "step": 14910 }, { "epoch": 0.855757645303804, "grad_norm": 0.279296875, "learning_rate": 1.2410273499601266e-05, "loss": 0.9971, "step": 14915 }, { "epoch": 0.8560445234953239, "grad_norm": 0.26171875, "learning_rate": 1.2362000101884885e-05, "loss": 0.9514, "step": 14920 }, { "epoch": 0.8563314016868437, "grad_norm": 0.26171875, "learning_rate": 1.2313814587751316e-05, "loss": 0.9545, "step": 14925 }, { "epoch": 0.8566182798783637, "grad_norm": 0.265625, "learning_rate": 1.2265717005521115e-05, "loss": 0.9368, "step": 14930 }, { "epoch": 0.8569051580698835, "grad_norm": 0.26953125, "learning_rate": 1.2217707403426627e-05, "loss": 0.9405, "step": 14935 }, { "epoch": 0.8571920362614034, "grad_norm": 0.263671875, "learning_rate": 1.2169785829612001e-05, "loss": 0.9264, "step": 14940 }, { "epoch": 0.8574789144529232, "grad_norm": 0.27734375, "learning_rate": 1.2121952332133091e-05, "loss": 1.0209, "step": 14945 }, { "epoch": 0.8577657926444432, "grad_norm": 0.26953125, "learning_rate": 1.2074206958957447e-05, "loss": 0.9528, "step": 14950 }, { "epoch": 0.858052670835963, "grad_norm": 0.2333984375, "learning_rate": 1.2026549757964212e-05, "loss": 0.8766, "step": 14955 }, { "epoch": 0.8583395490274829, "grad_norm": 0.27734375, "learning_rate": 1.1978980776944137e-05, "loss": 0.9347, "step": 14960 }, { "epoch": 0.8586264272190028, "grad_norm": 0.259765625, "learning_rate": 1.1931500063599543e-05, "loss": 0.9569, "step": 14965 }, { "epoch": 0.8589133054105227, "grad_norm": 0.255859375, "learning_rate": 1.1884107665544164e-05, "loss": 0.9079, "step": 14970 }, { "epoch": 0.8592001836020425, "grad_norm": 0.255859375, "learning_rate": 1.1836803630303206e-05, "loss": 0.9977, "step": 14975 }, { "epoch": 0.8594870617935625, "grad_norm": 0.275390625, "learning_rate": 1.1789588005313257e-05, "loss": 0.9528, "step": 14980 }, { "epoch": 0.8597739399850823, "grad_norm": 0.279296875, "learning_rate": 1.1742460837922265e-05, "loss": 0.936, "step": 14985 }, { "epoch": 0.8600608181766022, "grad_norm": 0.2314453125, "learning_rate": 1.1695422175389447e-05, "loss": 0.9134, "step": 14990 }, { "epoch": 0.8603476963681221, "grad_norm": 0.2734375, "learning_rate": 1.1648472064885286e-05, "loss": 0.991, "step": 14995 }, { "epoch": 0.860634574559642, "grad_norm": 0.25, "learning_rate": 1.160161055349146e-05, "loss": 0.8948, "step": 15000 }, { "epoch": 0.8609214527511618, "grad_norm": 0.27734375, "learning_rate": 1.1554837688200793e-05, "loss": 0.9878, "step": 15005 }, { "epoch": 0.8612083309426818, "grad_norm": 0.2451171875, "learning_rate": 1.1508153515917196e-05, "loss": 0.9045, "step": 15010 }, { "epoch": 0.8614952091342016, "grad_norm": 0.248046875, "learning_rate": 1.1461558083455704e-05, "loss": 0.9272, "step": 15015 }, { "epoch": 0.8617820873257215, "grad_norm": 0.2412109375, "learning_rate": 1.1415051437542302e-05, "loss": 0.9048, "step": 15020 }, { "epoch": 0.8620689655172413, "grad_norm": 0.28125, "learning_rate": 1.1368633624813974e-05, "loss": 0.9666, "step": 15025 }, { "epoch": 0.8623558437087613, "grad_norm": 0.2431640625, "learning_rate": 1.1322304691818575e-05, "loss": 1.013, "step": 15030 }, { "epoch": 0.8626427219002811, "grad_norm": 0.267578125, "learning_rate": 1.1276064685014886e-05, "loss": 0.9382, "step": 15035 }, { "epoch": 0.862929600091801, "grad_norm": 0.263671875, "learning_rate": 1.1229913650772472e-05, "loss": 0.9891, "step": 15040 }, { "epoch": 0.8632164782833209, "grad_norm": 0.267578125, "learning_rate": 1.1183851635371734e-05, "loss": 0.9336, "step": 15045 }, { "epoch": 0.8635033564748408, "grad_norm": 0.3046875, "learning_rate": 1.1137878685003722e-05, "loss": 0.9662, "step": 15050 }, { "epoch": 0.8637902346663606, "grad_norm": 0.2490234375, "learning_rate": 1.1091994845770226e-05, "loss": 0.9321, "step": 15055 }, { "epoch": 0.8640771128578806, "grad_norm": 0.259765625, "learning_rate": 1.104620016368364e-05, "loss": 1.0532, "step": 15060 }, { "epoch": 0.8643639910494004, "grad_norm": 0.26953125, "learning_rate": 1.1000494684667017e-05, "loss": 0.9349, "step": 15065 }, { "epoch": 0.8646508692409203, "grad_norm": 0.25, "learning_rate": 1.0954878454553908e-05, "loss": 0.8553, "step": 15070 }, { "epoch": 0.8649377474324402, "grad_norm": 0.265625, "learning_rate": 1.0909351519088352e-05, "loss": 0.9111, "step": 15075 }, { "epoch": 0.8652246256239601, "grad_norm": 0.263671875, "learning_rate": 1.0863913923924862e-05, "loss": 0.9779, "step": 15080 }, { "epoch": 0.8655115038154799, "grad_norm": 0.298828125, "learning_rate": 1.081856571462837e-05, "loss": 0.9526, "step": 15085 }, { "epoch": 0.8657983820069999, "grad_norm": 0.294921875, "learning_rate": 1.0773306936674133e-05, "loss": 0.9902, "step": 15090 }, { "epoch": 0.8660852601985197, "grad_norm": 0.369140625, "learning_rate": 1.0728137635447821e-05, "loss": 1.0987, "step": 15095 }, { "epoch": 0.8663721383900396, "grad_norm": 0.2412109375, "learning_rate": 1.0683057856245259e-05, "loss": 0.9029, "step": 15100 }, { "epoch": 0.8666590165815594, "grad_norm": 0.255859375, "learning_rate": 1.0638067644272532e-05, "loss": 0.9804, "step": 15105 }, { "epoch": 0.8669458947730794, "grad_norm": 0.2578125, "learning_rate": 1.059316704464598e-05, "loss": 0.9642, "step": 15110 }, { "epoch": 0.8672327729645992, "grad_norm": 0.291015625, "learning_rate": 1.0548356102391999e-05, "loss": 0.9328, "step": 15115 }, { "epoch": 0.8675196511561191, "grad_norm": 0.26171875, "learning_rate": 1.0503634862447099e-05, "loss": 0.9363, "step": 15120 }, { "epoch": 0.867806529347639, "grad_norm": 0.265625, "learning_rate": 1.0459003369657849e-05, "loss": 0.9308, "step": 15125 }, { "epoch": 0.8680934075391589, "grad_norm": 0.26171875, "learning_rate": 1.0414461668780806e-05, "loss": 0.889, "step": 15130 }, { "epoch": 0.8683802857306787, "grad_norm": 0.27734375, "learning_rate": 1.0370009804482483e-05, "loss": 0.8834, "step": 15135 }, { "epoch": 0.8686671639221987, "grad_norm": 0.255859375, "learning_rate": 1.032564782133929e-05, "loss": 0.9435, "step": 15140 }, { "epoch": 0.8689540421137185, "grad_norm": 0.255859375, "learning_rate": 1.0281375763837598e-05, "loss": 0.932, "step": 15145 }, { "epoch": 0.8692409203052384, "grad_norm": 0.2451171875, "learning_rate": 1.0237193676373435e-05, "loss": 0.9304, "step": 15150 }, { "epoch": 0.8695277984967583, "grad_norm": 0.267578125, "learning_rate": 1.019310160325273e-05, "loss": 0.9592, "step": 15155 }, { "epoch": 0.8698146766882782, "grad_norm": 0.26171875, "learning_rate": 1.0149099588691135e-05, "loss": 0.9544, "step": 15160 }, { "epoch": 0.870101554879798, "grad_norm": 0.275390625, "learning_rate": 1.0105187676813954e-05, "loss": 1.026, "step": 15165 }, { "epoch": 0.870388433071318, "grad_norm": 0.333984375, "learning_rate": 1.006136591165614e-05, "loss": 1.0247, "step": 15170 }, { "epoch": 0.8706753112628378, "grad_norm": 0.26171875, "learning_rate": 1.0017634337162275e-05, "loss": 0.9566, "step": 15175 }, { "epoch": 0.8709621894543577, "grad_norm": 0.26953125, "learning_rate": 9.973992997186465e-06, "loss": 0.9185, "step": 15180 }, { "epoch": 0.8712490676458775, "grad_norm": 0.265625, "learning_rate": 9.930441935492363e-06, "loss": 0.9289, "step": 15185 }, { "epoch": 0.8715359458373975, "grad_norm": 0.259765625, "learning_rate": 9.88698119575302e-06, "loss": 0.9605, "step": 15190 }, { "epoch": 0.8718228240289173, "grad_norm": 0.265625, "learning_rate": 9.843610821551053e-06, "loss": 0.9547, "step": 15195 }, { "epoch": 0.8721097022204372, "grad_norm": 0.265625, "learning_rate": 9.800330856378303e-06, "loss": 0.9544, "step": 15200 }, { "epoch": 0.8723965804119571, "grad_norm": 0.267578125, "learning_rate": 9.757141343636e-06, "loss": 0.9572, "step": 15205 }, { "epoch": 0.872683458603477, "grad_norm": 0.29296875, "learning_rate": 9.714042326634743e-06, "loss": 0.9438, "step": 15210 }, { "epoch": 0.8729703367949968, "grad_norm": 0.275390625, "learning_rate": 9.671033848594301e-06, "loss": 0.9435, "step": 15215 }, { "epoch": 0.8732572149865168, "grad_norm": 0.2578125, "learning_rate": 9.628115952643657e-06, "loss": 0.995, "step": 15220 }, { "epoch": 0.8735440931780366, "grad_norm": 0.35546875, "learning_rate": 9.585288681820992e-06, "loss": 0.997, "step": 15225 }, { "epoch": 0.8738309713695565, "grad_norm": 0.271484375, "learning_rate": 9.542552079073586e-06, "loss": 0.9715, "step": 15230 }, { "epoch": 0.8741178495610764, "grad_norm": 0.263671875, "learning_rate": 9.499906187257768e-06, "loss": 0.9445, "step": 15235 }, { "epoch": 0.8744047277525963, "grad_norm": 0.2578125, "learning_rate": 9.457351049138974e-06, "loss": 0.9681, "step": 15240 }, { "epoch": 0.8746916059441161, "grad_norm": 0.279296875, "learning_rate": 9.414886707391613e-06, "loss": 0.9653, "step": 15245 }, { "epoch": 0.8749784841356361, "grad_norm": 0.27734375, "learning_rate": 9.372513204598954e-06, "loss": 0.8629, "step": 15250 }, { "epoch": 0.8752653623271559, "grad_norm": 0.25, "learning_rate": 9.330230583253263e-06, "loss": 0.9324, "step": 15255 }, { "epoch": 0.8755522405186758, "grad_norm": 0.2578125, "learning_rate": 9.288038885755679e-06, "loss": 0.9968, "step": 15260 }, { "epoch": 0.8758391187101956, "grad_norm": 0.283203125, "learning_rate": 9.245938154416112e-06, "loss": 0.9753, "step": 15265 }, { "epoch": 0.8761259969017156, "grad_norm": 0.259765625, "learning_rate": 9.203928431453269e-06, "loss": 0.9914, "step": 15270 }, { "epoch": 0.8764128750932354, "grad_norm": 0.255859375, "learning_rate": 9.162009758994593e-06, "loss": 0.9888, "step": 15275 }, { "epoch": 0.8766997532847552, "grad_norm": 0.271484375, "learning_rate": 9.12018217907622e-06, "loss": 0.9092, "step": 15280 }, { "epoch": 0.8769866314762752, "grad_norm": 0.283203125, "learning_rate": 9.078445733642926e-06, "loss": 0.9634, "step": 15285 }, { "epoch": 0.877273509667795, "grad_norm": 0.2734375, "learning_rate": 9.036800464548157e-06, "loss": 0.9656, "step": 15290 }, { "epoch": 0.8775603878593149, "grad_norm": 0.2578125, "learning_rate": 8.995246413553871e-06, "loss": 0.8617, "step": 15295 }, { "epoch": 0.8778472660508349, "grad_norm": 0.341796875, "learning_rate": 8.953783622330515e-06, "loss": 0.925, "step": 15300 }, { "epoch": 0.8781341442423547, "grad_norm": 0.27734375, "learning_rate": 8.912412132457116e-06, "loss": 0.9452, "step": 15305 }, { "epoch": 0.8784210224338745, "grad_norm": 0.26171875, "learning_rate": 8.871131985421089e-06, "loss": 0.9806, "step": 15310 }, { "epoch": 0.8787079006253945, "grad_norm": 0.2578125, "learning_rate": 8.829943222618242e-06, "loss": 0.9902, "step": 15315 }, { "epoch": 0.8789947788169143, "grad_norm": 0.265625, "learning_rate": 8.788845885352782e-06, "loss": 0.8548, "step": 15320 }, { "epoch": 0.8792816570084342, "grad_norm": 0.248046875, "learning_rate": 8.747840014837194e-06, "loss": 0.9579, "step": 15325 }, { "epoch": 0.8795685351999541, "grad_norm": 0.275390625, "learning_rate": 8.706925652192255e-06, "loss": 0.9575, "step": 15330 }, { "epoch": 0.879855413391474, "grad_norm": 0.2734375, "learning_rate": 8.666102838446976e-06, "loss": 0.9764, "step": 15335 }, { "epoch": 0.8801422915829938, "grad_norm": 0.259765625, "learning_rate": 8.625371614538591e-06, "loss": 0.9222, "step": 15340 }, { "epoch": 0.8804291697745137, "grad_norm": 0.2451171875, "learning_rate": 8.584732021312469e-06, "loss": 0.9307, "step": 15345 }, { "epoch": 0.8807160479660336, "grad_norm": 0.310546875, "learning_rate": 8.544184099522024e-06, "loss": 0.9791, "step": 15350 }, { "epoch": 0.8810029261575535, "grad_norm": 0.271484375, "learning_rate": 8.50372788982886e-06, "loss": 0.9455, "step": 15355 }, { "epoch": 0.8812898043490733, "grad_norm": 0.25390625, "learning_rate": 8.46336343280254e-06, "loss": 0.8782, "step": 15360 }, { "epoch": 0.8815766825405933, "grad_norm": 0.279296875, "learning_rate": 8.423090768920628e-06, "loss": 0.9239, "step": 15365 }, { "epoch": 0.8818635607321131, "grad_norm": 0.279296875, "learning_rate": 8.38290993856865e-06, "loss": 0.9948, "step": 15370 }, { "epoch": 0.882150438923633, "grad_norm": 0.271484375, "learning_rate": 8.342820982040011e-06, "loss": 1.0173, "step": 15375 }, { "epoch": 0.8824373171151529, "grad_norm": 0.263671875, "learning_rate": 8.30282393953603e-06, "loss": 0.9433, "step": 15380 }, { "epoch": 0.8827241953066728, "grad_norm": 0.251953125, "learning_rate": 8.262918851165813e-06, "loss": 0.8967, "step": 15385 }, { "epoch": 0.8830110734981926, "grad_norm": 0.26171875, "learning_rate": 8.223105756946292e-06, "loss": 0.9086, "step": 15390 }, { "epoch": 0.8832979516897126, "grad_norm": 0.25, "learning_rate": 8.183384696802132e-06, "loss": 0.9384, "step": 15395 }, { "epoch": 0.8835848298812324, "grad_norm": 0.279296875, "learning_rate": 8.143755710565648e-06, "loss": 0.9808, "step": 15400 }, { "epoch": 0.8838717080727523, "grad_norm": 0.279296875, "learning_rate": 8.10421883797694e-06, "loss": 0.9723, "step": 15405 }, { "epoch": 0.8841585862642722, "grad_norm": 0.2353515625, "learning_rate": 8.064774118683638e-06, "loss": 0.8979, "step": 15410 }, { "epoch": 0.8844454644557921, "grad_norm": 0.259765625, "learning_rate": 8.025421592241012e-06, "loss": 0.9903, "step": 15415 }, { "epoch": 0.8847323426473119, "grad_norm": 0.275390625, "learning_rate": 7.98616129811185e-06, "loss": 0.9551, "step": 15420 }, { "epoch": 0.8850192208388318, "grad_norm": 0.28515625, "learning_rate": 7.94699327566647e-06, "loss": 0.995, "step": 15425 }, { "epoch": 0.8853060990303517, "grad_norm": 0.267578125, "learning_rate": 7.907917564182631e-06, "loss": 0.8506, "step": 15430 }, { "epoch": 0.8855929772218716, "grad_norm": 0.259765625, "learning_rate": 7.86893420284559e-06, "loss": 0.8804, "step": 15435 }, { "epoch": 0.8858798554133914, "grad_norm": 0.287109375, "learning_rate": 7.830043230747918e-06, "loss": 0.9101, "step": 15440 }, { "epoch": 0.8861667336049114, "grad_norm": 0.265625, "learning_rate": 7.791244686889588e-06, "loss": 0.9703, "step": 15445 }, { "epoch": 0.8864536117964312, "grad_norm": 0.271484375, "learning_rate": 7.752538610177817e-06, "loss": 0.922, "step": 15450 }, { "epoch": 0.8867404899879511, "grad_norm": 0.255859375, "learning_rate": 7.713925039427206e-06, "loss": 1.0184, "step": 15455 }, { "epoch": 0.887027368179471, "grad_norm": 0.267578125, "learning_rate": 7.67540401335951e-06, "loss": 0.9566, "step": 15460 }, { "epoch": 0.8873142463709909, "grad_norm": 0.2734375, "learning_rate": 7.636975570603689e-06, "loss": 0.9193, "step": 15465 }, { "epoch": 0.8876011245625107, "grad_norm": 0.26953125, "learning_rate": 7.5986397496958796e-06, "loss": 0.9882, "step": 15470 }, { "epoch": 0.8878880027540307, "grad_norm": 0.27734375, "learning_rate": 7.560396589079322e-06, "loss": 0.9726, "step": 15475 }, { "epoch": 0.8881748809455505, "grad_norm": 0.2578125, "learning_rate": 7.522246127104348e-06, "loss": 0.9203, "step": 15480 }, { "epoch": 0.8884617591370704, "grad_norm": 0.236328125, "learning_rate": 7.484188402028336e-06, "loss": 0.9681, "step": 15485 }, { "epoch": 0.8887486373285903, "grad_norm": 0.26953125, "learning_rate": 7.446223452015644e-06, "loss": 0.972, "step": 15490 }, { "epoch": 0.8890355155201102, "grad_norm": 0.265625, "learning_rate": 7.40835131513764e-06, "loss": 1.0296, "step": 15495 }, { "epoch": 0.88932239371163, "grad_norm": 0.25390625, "learning_rate": 7.3705720293725245e-06, "loss": 0.9597, "step": 15500 }, { "epoch": 0.8896092719031499, "grad_norm": 0.255859375, "learning_rate": 7.332885632605513e-06, "loss": 0.8605, "step": 15505 }, { "epoch": 0.8898961500946698, "grad_norm": 0.248046875, "learning_rate": 7.295292162628575e-06, "loss": 0.9628, "step": 15510 }, { "epoch": 0.8901830282861897, "grad_norm": 0.271484375, "learning_rate": 7.257791657140545e-06, "loss": 0.9441, "step": 15515 }, { "epoch": 0.8904699064777095, "grad_norm": 0.26171875, "learning_rate": 7.220384153746995e-06, "loss": 0.9413, "step": 15520 }, { "epoch": 0.8907567846692295, "grad_norm": 0.26171875, "learning_rate": 7.183069689960265e-06, "loss": 0.9683, "step": 15525 }, { "epoch": 0.8910436628607493, "grad_norm": 0.2890625, "learning_rate": 7.145848303199365e-06, "loss": 0.9836, "step": 15530 }, { "epoch": 0.8913305410522692, "grad_norm": 0.26171875, "learning_rate": 7.108720030790028e-06, "loss": 0.9379, "step": 15535 }, { "epoch": 0.8916174192437891, "grad_norm": 0.291015625, "learning_rate": 7.071684909964526e-06, "loss": 0.9567, "step": 15540 }, { "epoch": 0.891904297435309, "grad_norm": 0.26171875, "learning_rate": 7.034742977861786e-06, "loss": 0.9171, "step": 15545 }, { "epoch": 0.8921911756268288, "grad_norm": 0.26953125, "learning_rate": 6.99789427152725e-06, "loss": 0.9089, "step": 15550 }, { "epoch": 0.8924780538183488, "grad_norm": 0.251953125, "learning_rate": 6.9611388279128835e-06, "loss": 0.9761, "step": 15555 }, { "epoch": 0.8927649320098686, "grad_norm": 0.259765625, "learning_rate": 6.9244766838771235e-06, "loss": 0.9449, "step": 15560 }, { "epoch": 0.8930518102013885, "grad_norm": 0.2734375, "learning_rate": 6.887907876184862e-06, "loss": 0.9312, "step": 15565 }, { "epoch": 0.8933386883929084, "grad_norm": 0.25, "learning_rate": 6.851432441507377e-06, "loss": 0.9072, "step": 15570 }, { "epoch": 0.8936255665844283, "grad_norm": 0.263671875, "learning_rate": 6.8150504164223085e-06, "loss": 0.9976, "step": 15575 }, { "epoch": 0.8939124447759481, "grad_norm": 0.27734375, "learning_rate": 6.778761837413627e-06, "loss": 0.9691, "step": 15580 }, { "epoch": 0.894199322967468, "grad_norm": 0.27734375, "learning_rate": 6.742566740871625e-06, "loss": 0.9596, "step": 15585 }, { "epoch": 0.8944862011589879, "grad_norm": 0.2734375, "learning_rate": 6.706465163092823e-06, "loss": 0.9941, "step": 15590 }, { "epoch": 0.8947730793505078, "grad_norm": 0.2578125, "learning_rate": 6.67045714027994e-06, "loss": 0.9539, "step": 15595 }, { "epoch": 0.8950599575420276, "grad_norm": 0.267578125, "learning_rate": 6.634542708541935e-06, "loss": 0.931, "step": 15600 }, { "epoch": 0.8953468357335476, "grad_norm": 0.30078125, "learning_rate": 6.5987219038938455e-06, "loss": 0.9465, "step": 15605 }, { "epoch": 0.8956337139250674, "grad_norm": 0.251953125, "learning_rate": 6.562994762256869e-06, "loss": 0.8939, "step": 15610 }, { "epoch": 0.8959205921165873, "grad_norm": 0.25, "learning_rate": 6.527361319458292e-06, "loss": 0.932, "step": 15615 }, { "epoch": 0.8962074703081072, "grad_norm": 0.2578125, "learning_rate": 6.491821611231364e-06, "loss": 0.9207, "step": 15620 }, { "epoch": 0.8964943484996271, "grad_norm": 0.2890625, "learning_rate": 6.456375673215409e-06, "loss": 1.001, "step": 15625 }, { "epoch": 0.8967812266911469, "grad_norm": 0.2578125, "learning_rate": 6.421023540955684e-06, "loss": 0.8778, "step": 15630 }, { "epoch": 0.8970681048826669, "grad_norm": 0.2734375, "learning_rate": 6.3857652499033974e-06, "loss": 0.8923, "step": 15635 }, { "epoch": 0.8973549830741867, "grad_norm": 0.259765625, "learning_rate": 6.350600835415632e-06, "loss": 0.9465, "step": 15640 }, { "epoch": 0.8976418612657066, "grad_norm": 0.263671875, "learning_rate": 6.31553033275536e-06, "loss": 0.9561, "step": 15645 }, { "epoch": 0.8979287394572265, "grad_norm": 0.279296875, "learning_rate": 6.2805537770913356e-06, "loss": 0.9631, "step": 15650 }, { "epoch": 0.8982156176487464, "grad_norm": 0.251953125, "learning_rate": 6.245671203498149e-06, "loss": 0.9159, "step": 15655 }, { "epoch": 0.8985024958402662, "grad_norm": 0.259765625, "learning_rate": 6.210882646956084e-06, "loss": 0.894, "step": 15660 }, { "epoch": 0.8987893740317862, "grad_norm": 0.265625, "learning_rate": 6.176188142351247e-06, "loss": 0.9014, "step": 15665 }, { "epoch": 0.899076252223306, "grad_norm": 0.2890625, "learning_rate": 6.141587724475317e-06, "loss": 0.9285, "step": 15670 }, { "epoch": 0.8993631304148259, "grad_norm": 0.2578125, "learning_rate": 6.107081428025674e-06, "loss": 0.9748, "step": 15675 }, { "epoch": 0.8996500086063457, "grad_norm": 0.267578125, "learning_rate": 6.072669287605326e-06, "loss": 1.0066, "step": 15680 }, { "epoch": 0.8999368867978657, "grad_norm": 0.271484375, "learning_rate": 6.038351337722836e-06, "loss": 0.9463, "step": 15685 }, { "epoch": 0.9002237649893855, "grad_norm": 0.271484375, "learning_rate": 6.004127612792332e-06, "loss": 0.8783, "step": 15690 }, { "epoch": 0.9005106431809053, "grad_norm": 0.265625, "learning_rate": 5.969998147133415e-06, "loss": 0.9672, "step": 15695 }, { "epoch": 0.9007975213724253, "grad_norm": 0.251953125, "learning_rate": 5.935962974971221e-06, "loss": 0.9173, "step": 15700 }, { "epoch": 0.9010843995639451, "grad_norm": 0.2734375, "learning_rate": 5.9020221304362686e-06, "loss": 0.9124, "step": 15705 }, { "epoch": 0.901371277755465, "grad_norm": 0.2578125, "learning_rate": 5.868175647564522e-06, "loss": 0.9556, "step": 15710 }, { "epoch": 0.901658155946985, "grad_norm": 0.27734375, "learning_rate": 5.834423560297353e-06, "loss": 0.9977, "step": 15715 }, { "epoch": 0.9019450341385048, "grad_norm": 0.263671875, "learning_rate": 5.800765902481364e-06, "loss": 0.9459, "step": 15720 }, { "epoch": 0.9022319123300246, "grad_norm": 0.251953125, "learning_rate": 5.767202707868558e-06, "loss": 0.9167, "step": 15725 }, { "epoch": 0.9025187905215446, "grad_norm": 0.251953125, "learning_rate": 5.733734010116188e-06, "loss": 0.9177, "step": 15730 }, { "epoch": 0.9028056687130644, "grad_norm": 0.279296875, "learning_rate": 5.700359842786729e-06, "loss": 0.8644, "step": 15735 }, { "epoch": 0.9030925469045843, "grad_norm": 0.267578125, "learning_rate": 5.667080239347889e-06, "loss": 0.9814, "step": 15740 }, { "epoch": 0.9033794250961042, "grad_norm": 0.26953125, "learning_rate": 5.633895233172504e-06, "loss": 0.9664, "step": 15745 }, { "epoch": 0.9036663032876241, "grad_norm": 0.2734375, "learning_rate": 5.600804857538588e-06, "loss": 0.9869, "step": 15750 }, { "epoch": 0.9039531814791439, "grad_norm": 0.259765625, "learning_rate": 5.567809145629244e-06, "loss": 0.9679, "step": 15755 }, { "epoch": 0.9042400596706638, "grad_norm": 0.263671875, "learning_rate": 5.534908130532623e-06, "loss": 0.9669, "step": 15760 }, { "epoch": 0.9045269378621837, "grad_norm": 0.27734375, "learning_rate": 5.50210184524198e-06, "loss": 0.9085, "step": 15765 }, { "epoch": 0.9048138160537036, "grad_norm": 0.263671875, "learning_rate": 5.469390322655498e-06, "loss": 0.8902, "step": 15770 }, { "epoch": 0.9051006942452234, "grad_norm": 0.251953125, "learning_rate": 5.436773595576361e-06, "loss": 0.8946, "step": 15775 }, { "epoch": 0.9053875724367434, "grad_norm": 0.263671875, "learning_rate": 5.404251696712714e-06, "loss": 0.9744, "step": 15780 }, { "epoch": 0.9056744506282632, "grad_norm": 0.265625, "learning_rate": 5.371824658677594e-06, "loss": 1.0072, "step": 15785 }, { "epoch": 0.9059613288197831, "grad_norm": 0.248046875, "learning_rate": 5.339492513988897e-06, "loss": 0.9326, "step": 15790 }, { "epoch": 0.906248207011303, "grad_norm": 0.25, "learning_rate": 5.307255295069369e-06, "loss": 0.9251, "step": 15795 }, { "epoch": 0.9065350852028229, "grad_norm": 0.2734375, "learning_rate": 5.275113034246571e-06, "loss": 1.0251, "step": 15800 }, { "epoch": 0.9068219633943427, "grad_norm": 0.263671875, "learning_rate": 5.243065763752819e-06, "loss": 0.9648, "step": 15805 }, { "epoch": 0.9071088415858627, "grad_norm": 0.28515625, "learning_rate": 5.2111135157252076e-06, "loss": 0.9644, "step": 15810 }, { "epoch": 0.9073957197773825, "grad_norm": 0.28125, "learning_rate": 5.179256322205539e-06, "loss": 0.9843, "step": 15815 }, { "epoch": 0.9076825979689024, "grad_norm": 0.32421875, "learning_rate": 5.147494215140236e-06, "loss": 0.9976, "step": 15820 }, { "epoch": 0.9079694761604223, "grad_norm": 0.2470703125, "learning_rate": 5.115827226380421e-06, "loss": 0.8557, "step": 15825 }, { "epoch": 0.9082563543519422, "grad_norm": 0.279296875, "learning_rate": 5.084255387681836e-06, "loss": 0.9765, "step": 15830 }, { "epoch": 0.908543232543462, "grad_norm": 0.3046875, "learning_rate": 5.052778730704788e-06, "loss": 0.9705, "step": 15835 }, { "epoch": 0.9088301107349819, "grad_norm": 0.26953125, "learning_rate": 5.021397287014129e-06, "loss": 1.0047, "step": 15840 }, { "epoch": 0.9091169889265018, "grad_norm": 0.263671875, "learning_rate": 4.990111088079263e-06, "loss": 0.92, "step": 15845 }, { "epoch": 0.9094038671180217, "grad_norm": 0.265625, "learning_rate": 4.958920165274039e-06, "loss": 1.0487, "step": 15850 }, { "epoch": 0.9096907453095415, "grad_norm": 0.287109375, "learning_rate": 4.92782454987678e-06, "loss": 0.9803, "step": 15855 }, { "epoch": 0.9099776235010615, "grad_norm": 0.251953125, "learning_rate": 4.896824273070255e-06, "loss": 0.8652, "step": 15860 }, { "epoch": 0.9102645016925813, "grad_norm": 0.267578125, "learning_rate": 4.865919365941629e-06, "loss": 1.0166, "step": 15865 }, { "epoch": 0.9105513798841012, "grad_norm": 0.265625, "learning_rate": 4.8351098594823674e-06, "loss": 0.9448, "step": 15870 }, { "epoch": 0.9108382580756211, "grad_norm": 0.2451171875, "learning_rate": 4.804395784588334e-06, "loss": 0.9409, "step": 15875 }, { "epoch": 0.911125136267141, "grad_norm": 0.25, "learning_rate": 4.77377717205969e-06, "loss": 0.9747, "step": 15880 }, { "epoch": 0.9114120144586608, "grad_norm": 0.2470703125, "learning_rate": 4.7432540526008205e-06, "loss": 0.883, "step": 15885 }, { "epoch": 0.9116988926501808, "grad_norm": 0.279296875, "learning_rate": 4.712826456820385e-06, "loss": 0.9437, "step": 15890 }, { "epoch": 0.9119857708417006, "grad_norm": 0.29296875, "learning_rate": 4.682494415231253e-06, "loss": 0.8496, "step": 15895 }, { "epoch": 0.9122726490332205, "grad_norm": 0.265625, "learning_rate": 4.652257958250461e-06, "loss": 0.9518, "step": 15900 }, { "epoch": 0.9125595272247404, "grad_norm": 0.255859375, "learning_rate": 4.6221171161991874e-06, "loss": 0.9091, "step": 15905 }, { "epoch": 0.9128464054162603, "grad_norm": 0.2734375, "learning_rate": 4.592071919302743e-06, "loss": 0.9391, "step": 15910 }, { "epoch": 0.9131332836077801, "grad_norm": 0.2734375, "learning_rate": 4.562122397690538e-06, "loss": 0.9312, "step": 15915 }, { "epoch": 0.9134201617993, "grad_norm": 0.259765625, "learning_rate": 4.532268581395982e-06, "loss": 0.889, "step": 15920 }, { "epoch": 0.9137070399908199, "grad_norm": 0.265625, "learning_rate": 4.502510500356571e-06, "loss": 0.909, "step": 15925 }, { "epoch": 0.9139939181823398, "grad_norm": 0.26171875, "learning_rate": 4.472848184413769e-06, "loss": 0.9407, "step": 15930 }, { "epoch": 0.9142807963738596, "grad_norm": 0.259765625, "learning_rate": 4.443281663313026e-06, "loss": 0.9368, "step": 15935 }, { "epoch": 0.9145676745653796, "grad_norm": 0.26953125, "learning_rate": 4.413810966703702e-06, "loss": 1.0034, "step": 15940 }, { "epoch": 0.9148545527568994, "grad_norm": 0.26953125, "learning_rate": 4.3844361241390795e-06, "loss": 0.8068, "step": 15945 }, { "epoch": 0.9151414309484193, "grad_norm": 0.251953125, "learning_rate": 4.355157165076318e-06, "loss": 0.8796, "step": 15950 }, { "epoch": 0.9154283091399392, "grad_norm": 0.251953125, "learning_rate": 4.325974118876408e-06, "loss": 0.9913, "step": 15955 }, { "epoch": 0.9157151873314591, "grad_norm": 0.251953125, "learning_rate": 4.296887014804207e-06, "loss": 0.9241, "step": 15960 }, { "epoch": 0.9160020655229789, "grad_norm": 0.26953125, "learning_rate": 4.267895882028328e-06, "loss": 0.9562, "step": 15965 }, { "epoch": 0.9162889437144989, "grad_norm": 0.25390625, "learning_rate": 4.239000749621092e-06, "loss": 0.9201, "step": 15970 }, { "epoch": 0.9165758219060187, "grad_norm": 0.25, "learning_rate": 4.210201646558653e-06, "loss": 0.9493, "step": 15975 }, { "epoch": 0.9168627000975386, "grad_norm": 0.255859375, "learning_rate": 4.181498601720801e-06, "loss": 0.9421, "step": 15980 }, { "epoch": 0.9171495782890585, "grad_norm": 0.2734375, "learning_rate": 4.15289164389101e-06, "loss": 0.9286, "step": 15985 }, { "epoch": 0.9174364564805784, "grad_norm": 0.279296875, "learning_rate": 4.124380801756411e-06, "loss": 0.9408, "step": 15990 }, { "epoch": 0.9177233346720982, "grad_norm": 0.275390625, "learning_rate": 4.095966103907723e-06, "loss": 1.0005, "step": 15995 }, { "epoch": 0.9180102128636181, "grad_norm": 0.25390625, "learning_rate": 4.0676475788392845e-06, "loss": 0.9564, "step": 16000 }, { "epoch": 0.918297091055138, "grad_norm": 0.28125, "learning_rate": 4.039425254948958e-06, "loss": 0.9974, "step": 16005 }, { "epoch": 0.9185839692466579, "grad_norm": 0.287109375, "learning_rate": 4.011299160538185e-06, "loss": 0.9932, "step": 16010 }, { "epoch": 0.9188708474381777, "grad_norm": 0.2578125, "learning_rate": 3.983269323811856e-06, "loss": 0.9562, "step": 16015 }, { "epoch": 0.9191577256296977, "grad_norm": 0.2578125, "learning_rate": 3.955335772878343e-06, "loss": 0.9051, "step": 16020 }, { "epoch": 0.9194446038212175, "grad_norm": 0.26171875, "learning_rate": 3.927498535749486e-06, "loss": 0.9962, "step": 16025 }, { "epoch": 0.9197314820127374, "grad_norm": 0.275390625, "learning_rate": 3.89975764034054e-06, "loss": 0.8909, "step": 16030 }, { "epoch": 0.9200183602042573, "grad_norm": 0.275390625, "learning_rate": 3.872113114470122e-06, "loss": 0.9961, "step": 16035 }, { "epoch": 0.9203052383957772, "grad_norm": 0.263671875, "learning_rate": 3.844564985860222e-06, "loss": 0.9576, "step": 16040 }, { "epoch": 0.920592116587297, "grad_norm": 0.267578125, "learning_rate": 3.817113282136176e-06, "loss": 0.8964, "step": 16045 }, { "epoch": 0.920878994778817, "grad_norm": 0.265625, "learning_rate": 3.7897580308265954e-06, "loss": 0.9571, "step": 16050 }, { "epoch": 0.9211658729703368, "grad_norm": 0.265625, "learning_rate": 3.762499259363417e-06, "loss": 0.9314, "step": 16055 }, { "epoch": 0.9214527511618567, "grad_norm": 0.263671875, "learning_rate": 3.735336995081795e-06, "loss": 0.9395, "step": 16060 }, { "epoch": 0.9217396293533766, "grad_norm": 0.2734375, "learning_rate": 3.7082712652200867e-06, "loss": 0.958, "step": 16065 }, { "epoch": 0.9220265075448965, "grad_norm": 0.28515625, "learning_rate": 3.6813020969198585e-06, "loss": 0.9532, "step": 16070 }, { "epoch": 0.9223133857364163, "grad_norm": 0.26953125, "learning_rate": 3.654429517225877e-06, "loss": 0.9759, "step": 16075 }, { "epoch": 0.9226002639279361, "grad_norm": 0.251953125, "learning_rate": 3.62765355308603e-06, "loss": 0.9062, "step": 16080 }, { "epoch": 0.9228871421194561, "grad_norm": 0.26171875, "learning_rate": 3.600974231351306e-06, "loss": 0.9079, "step": 16085 }, { "epoch": 0.923174020310976, "grad_norm": 0.259765625, "learning_rate": 3.574391578775771e-06, "loss": 0.9881, "step": 16090 }, { "epoch": 0.9234608985024958, "grad_norm": 0.275390625, "learning_rate": 3.547905622016601e-06, "loss": 1.0151, "step": 16095 }, { "epoch": 0.9237477766940158, "grad_norm": 0.275390625, "learning_rate": 3.5215163876339274e-06, "loss": 0.8687, "step": 16100 }, { "epoch": 0.9240346548855356, "grad_norm": 0.279296875, "learning_rate": 3.495223902090983e-06, "loss": 0.9379, "step": 16105 }, { "epoch": 0.9243215330770554, "grad_norm": 0.26171875, "learning_rate": 3.4690281917539203e-06, "loss": 0.969, "step": 16110 }, { "epoch": 0.9246084112685754, "grad_norm": 0.267578125, "learning_rate": 3.442929282891827e-06, "loss": 0.9878, "step": 16115 }, { "epoch": 0.9248952894600952, "grad_norm": 0.267578125, "learning_rate": 3.416927201676767e-06, "loss": 0.9008, "step": 16120 }, { "epoch": 0.9251821676516151, "grad_norm": 0.2734375, "learning_rate": 3.3910219741836944e-06, "loss": 0.9432, "step": 16125 }, { "epoch": 0.925469045843135, "grad_norm": 0.2470703125, "learning_rate": 3.365213626390418e-06, "loss": 0.9459, "step": 16130 }, { "epoch": 0.9257559240346549, "grad_norm": 0.28515625, "learning_rate": 3.339502184177612e-06, "loss": 0.9924, "step": 16135 }, { "epoch": 0.9260428022261747, "grad_norm": 0.310546875, "learning_rate": 3.3138876733287638e-06, "loss": 0.9324, "step": 16140 }, { "epoch": 0.9263296804176947, "grad_norm": 0.275390625, "learning_rate": 3.28837011953016e-06, "loss": 0.9471, "step": 16145 }, { "epoch": 0.9266165586092145, "grad_norm": 0.263671875, "learning_rate": 3.262949548370853e-06, "loss": 0.94, "step": 16150 }, { "epoch": 0.9269034368007344, "grad_norm": 0.267578125, "learning_rate": 3.237625985342674e-06, "loss": 0.9803, "step": 16155 }, { "epoch": 0.9271903149922542, "grad_norm": 0.283203125, "learning_rate": 3.212399455840154e-06, "loss": 0.9429, "step": 16160 }, { "epoch": 0.9274771931837742, "grad_norm": 0.263671875, "learning_rate": 3.187269985160457e-06, "loss": 0.9188, "step": 16165 }, { "epoch": 0.927764071375294, "grad_norm": 0.26171875, "learning_rate": 3.1622375985035367e-06, "loss": 0.9285, "step": 16170 }, { "epoch": 0.9280509495668139, "grad_norm": 0.287109375, "learning_rate": 3.137302320971891e-06, "loss": 0.9227, "step": 16175 }, { "epoch": 0.9283378277583338, "grad_norm": 0.28125, "learning_rate": 3.112464177570662e-06, "loss": 0.9039, "step": 16180 }, { "epoch": 0.9286247059498537, "grad_norm": 0.259765625, "learning_rate": 3.087723193207648e-06, "loss": 0.9231, "step": 16185 }, { "epoch": 0.9289115841413735, "grad_norm": 0.271484375, "learning_rate": 3.0630793926931132e-06, "loss": 0.8935, "step": 16190 }, { "epoch": 0.9291984623328935, "grad_norm": 0.26171875, "learning_rate": 3.038532800739935e-06, "loss": 0.8984, "step": 16195 }, { "epoch": 0.9294853405244133, "grad_norm": 0.341796875, "learning_rate": 3.014083441963478e-06, "loss": 0.9787, "step": 16200 }, { "epoch": 0.9297722187159332, "grad_norm": 0.2578125, "learning_rate": 2.9897313408816407e-06, "loss": 0.9824, "step": 16205 }, { "epoch": 0.9300590969074531, "grad_norm": 0.28125, "learning_rate": 2.9654765219147563e-06, "loss": 0.8349, "step": 16210 }, { "epoch": 0.930345975098973, "grad_norm": 0.263671875, "learning_rate": 2.941319009385579e-06, "loss": 0.909, "step": 16215 }, { "epoch": 0.9306328532904928, "grad_norm": 0.26171875, "learning_rate": 2.9172588275193534e-06, "loss": 0.8929, "step": 16220 }, { "epoch": 0.9309197314820128, "grad_norm": 0.271484375, "learning_rate": 2.8932960004436795e-06, "loss": 0.9595, "step": 16225 }, { "epoch": 0.9312066096735326, "grad_norm": 0.267578125, "learning_rate": 2.869430552188501e-06, "loss": 0.9288, "step": 16230 }, { "epoch": 0.9314934878650525, "grad_norm": 0.3359375, "learning_rate": 2.8456625066861973e-06, "loss": 0.9109, "step": 16235 }, { "epoch": 0.9317803660565723, "grad_norm": 0.279296875, "learning_rate": 2.8219918877713804e-06, "loss": 0.9129, "step": 16240 }, { "epoch": 0.9320672442480923, "grad_norm": 0.267578125, "learning_rate": 2.7984187191810063e-06, "loss": 0.8989, "step": 16245 }, { "epoch": 0.9323541224396121, "grad_norm": 0.26171875, "learning_rate": 2.7749430245542997e-06, "loss": 0.9914, "step": 16250 }, { "epoch": 0.932641000631132, "grad_norm": 0.302734375, "learning_rate": 2.751564827432751e-06, "loss": 0.9854, "step": 16255 }, { "epoch": 0.9329278788226519, "grad_norm": 0.267578125, "learning_rate": 2.7282841512600632e-06, "loss": 0.8519, "step": 16260 }, { "epoch": 0.9332147570141718, "grad_norm": 0.2734375, "learning_rate": 2.705101019382139e-06, "loss": 1.001, "step": 16265 }, { "epoch": 0.9335016352056916, "grad_norm": 0.28125, "learning_rate": 2.682015455047093e-06, "loss": 1.0089, "step": 16270 }, { "epoch": 0.9337885133972116, "grad_norm": 0.263671875, "learning_rate": 2.659027481405163e-06, "loss": 0.9404, "step": 16275 }, { "epoch": 0.9340753915887314, "grad_norm": 0.275390625, "learning_rate": 2.636137121508753e-06, "loss": 0.9803, "step": 16280 }, { "epoch": 0.9343622697802513, "grad_norm": 0.283203125, "learning_rate": 2.6133443983123785e-06, "loss": 1.0476, "step": 16285 }, { "epoch": 0.9346491479717712, "grad_norm": 0.263671875, "learning_rate": 2.5906493346726126e-06, "loss": 0.895, "step": 16290 }, { "epoch": 0.9349360261632911, "grad_norm": 0.28515625, "learning_rate": 2.5680519533481052e-06, "loss": 0.9463, "step": 16295 }, { "epoch": 0.9352229043548109, "grad_norm": 0.251953125, "learning_rate": 2.5455522769995966e-06, "loss": 0.9837, "step": 16300 }, { "epoch": 0.9355097825463309, "grad_norm": 0.259765625, "learning_rate": 2.523150328189783e-06, "loss": 0.9138, "step": 16305 }, { "epoch": 0.9357966607378507, "grad_norm": 0.26171875, "learning_rate": 2.500846129383416e-06, "loss": 0.9509, "step": 16310 }, { "epoch": 0.9360835389293706, "grad_norm": 0.25, "learning_rate": 2.478639702947172e-06, "loss": 0.9884, "step": 16315 }, { "epoch": 0.9363704171208904, "grad_norm": 0.26953125, "learning_rate": 2.4565310711497146e-06, "loss": 0.9741, "step": 16320 }, { "epoch": 0.9366572953124104, "grad_norm": 0.2734375, "learning_rate": 2.434520256161632e-06, "loss": 0.9581, "step": 16325 }, { "epoch": 0.9369441735039302, "grad_norm": 0.283203125, "learning_rate": 2.412607280055401e-06, "loss": 0.9786, "step": 16330 }, { "epoch": 0.9372310516954501, "grad_norm": 0.255859375, "learning_rate": 2.390792164805433e-06, "loss": 0.8686, "step": 16335 }, { "epoch": 0.93751792988697, "grad_norm": 0.279296875, "learning_rate": 2.3690749322879624e-06, "loss": 1.0135, "step": 16340 }, { "epoch": 0.9378048080784899, "grad_norm": 0.2578125, "learning_rate": 2.347455604281057e-06, "loss": 0.9064, "step": 16345 }, { "epoch": 0.9380916862700097, "grad_norm": 0.27734375, "learning_rate": 2.3259342024646524e-06, "loss": 0.9567, "step": 16350 }, { "epoch": 0.9383785644615297, "grad_norm": 0.2578125, "learning_rate": 2.304510748420463e-06, "loss": 1.0151, "step": 16355 }, { "epoch": 0.9386654426530495, "grad_norm": 0.2412109375, "learning_rate": 2.2831852636319594e-06, "loss": 0.9201, "step": 16360 }, { "epoch": 0.9389523208445694, "grad_norm": 0.25390625, "learning_rate": 2.2619577694843907e-06, "loss": 0.9121, "step": 16365 }, { "epoch": 0.9392391990360893, "grad_norm": 0.2578125, "learning_rate": 2.240828287264729e-06, "loss": 0.9018, "step": 16370 }, { "epoch": 0.9395260772276092, "grad_norm": 0.26171875, "learning_rate": 2.219796838161681e-06, "loss": 0.8755, "step": 16375 }, { "epoch": 0.939812955419129, "grad_norm": 0.26171875, "learning_rate": 2.1988634432656197e-06, "loss": 0.9599, "step": 16380 }, { "epoch": 0.940099833610649, "grad_norm": 0.26953125, "learning_rate": 2.1780281235686206e-06, "loss": 0.9062, "step": 16385 }, { "epoch": 0.9403867118021688, "grad_norm": 0.25390625, "learning_rate": 2.1572908999643705e-06, "loss": 0.9129, "step": 16390 }, { "epoch": 0.9406735899936887, "grad_norm": 0.26171875, "learning_rate": 2.13665179324819e-06, "loss": 0.8914, "step": 16395 }, { "epoch": 0.9409604681852085, "grad_norm": 0.26171875, "learning_rate": 2.116110824117046e-06, "loss": 0.88, "step": 16400 }, { "epoch": 0.9412473463767285, "grad_norm": 0.271484375, "learning_rate": 2.0956680131694604e-06, "loss": 0.944, "step": 16405 }, { "epoch": 0.9415342245682483, "grad_norm": 0.271484375, "learning_rate": 2.075323380905536e-06, "loss": 0.9598, "step": 16410 }, { "epoch": 0.9418211027597682, "grad_norm": 0.27734375, "learning_rate": 2.0550769477269084e-06, "loss": 0.986, "step": 16415 }, { "epoch": 0.9421079809512881, "grad_norm": 0.263671875, "learning_rate": 2.0349287339367364e-06, "loss": 0.9844, "step": 16420 }, { "epoch": 0.942394859142808, "grad_norm": 0.279296875, "learning_rate": 2.0148787597397136e-06, "loss": 0.9443, "step": 16425 }, { "epoch": 0.9426817373343278, "grad_norm": 0.255859375, "learning_rate": 1.99492704524199e-06, "loss": 0.9414, "step": 16430 }, { "epoch": 0.9429686155258478, "grad_norm": 0.263671875, "learning_rate": 1.9750736104511947e-06, "loss": 0.9172, "step": 16435 }, { "epoch": 0.9432554937173676, "grad_norm": 0.265625, "learning_rate": 1.955318475276391e-06, "loss": 0.9453, "step": 16440 }, { "epoch": 0.9435423719088875, "grad_norm": 0.251953125, "learning_rate": 1.935661659528054e-06, "loss": 0.9433, "step": 16445 }, { "epoch": 0.9438292501004074, "grad_norm": 0.28515625, "learning_rate": 1.9161031829181275e-06, "loss": 0.9827, "step": 16450 }, { "epoch": 0.9441161282919273, "grad_norm": 0.271484375, "learning_rate": 1.8966430650598554e-06, "loss": 0.9666, "step": 16455 }, { "epoch": 0.9444030064834471, "grad_norm": 0.283203125, "learning_rate": 1.8772813254679166e-06, "loss": 0.9609, "step": 16460 }, { "epoch": 0.9446898846749671, "grad_norm": 0.2451171875, "learning_rate": 1.85801798355828e-06, "loss": 0.9515, "step": 16465 }, { "epoch": 0.9449767628664869, "grad_norm": 0.267578125, "learning_rate": 1.8388530586482932e-06, "loss": 0.9266, "step": 16470 }, { "epoch": 0.9452636410580068, "grad_norm": 0.259765625, "learning_rate": 1.8197865699565497e-06, "loss": 0.9186, "step": 16475 }, { "epoch": 0.9455505192495266, "grad_norm": 0.26953125, "learning_rate": 1.8008185366030217e-06, "loss": 0.9243, "step": 16480 }, { "epoch": 0.9458373974410466, "grad_norm": 0.27734375, "learning_rate": 1.7819489776088493e-06, "loss": 0.9013, "step": 16485 }, { "epoch": 0.9461242756325664, "grad_norm": 0.27734375, "learning_rate": 1.7631779118964852e-06, "loss": 0.9348, "step": 16490 }, { "epoch": 0.9464111538240862, "grad_norm": 0.27734375, "learning_rate": 1.7445053582895944e-06, "loss": 0.9947, "step": 16495 }, { "epoch": 0.9466980320156062, "grad_norm": 0.26171875, "learning_rate": 1.7259313355130647e-06, "loss": 0.9336, "step": 16500 }, { "epoch": 0.946984910207126, "grad_norm": 0.271484375, "learning_rate": 1.7074558621929526e-06, "loss": 0.9448, "step": 16505 }, { "epoch": 0.9472717883986459, "grad_norm": 0.259765625, "learning_rate": 1.6890789568565156e-06, "loss": 0.9379, "step": 16510 }, { "epoch": 0.9475586665901659, "grad_norm": 0.28515625, "learning_rate": 1.670800637932146e-06, "loss": 0.9013, "step": 16515 }, { "epoch": 0.9478455447816857, "grad_norm": 0.25390625, "learning_rate": 1.6526209237493928e-06, "loss": 0.9358, "step": 16520 }, { "epoch": 0.9481324229732055, "grad_norm": 0.263671875, "learning_rate": 1.634539832538895e-06, "loss": 0.9984, "step": 16525 }, { "epoch": 0.9484193011647255, "grad_norm": 0.263671875, "learning_rate": 1.6165573824324488e-06, "loss": 0.9786, "step": 16530 }, { "epoch": 0.9487061793562453, "grad_norm": 0.263671875, "learning_rate": 1.5986735914628625e-06, "loss": 0.9573, "step": 16535 }, { "epoch": 0.9489930575477652, "grad_norm": 0.314453125, "learning_rate": 1.5808884775640464e-06, "loss": 0.9199, "step": 16540 }, { "epoch": 0.9492799357392852, "grad_norm": 0.29296875, "learning_rate": 1.5632020585709673e-06, "loss": 0.9071, "step": 16545 }, { "epoch": 0.949566813930805, "grad_norm": 0.275390625, "learning_rate": 1.5456143522195931e-06, "loss": 0.9682, "step": 16550 }, { "epoch": 0.9498536921223248, "grad_norm": 0.25390625, "learning_rate": 1.5281253761469161e-06, "loss": 0.9099, "step": 16555 }, { "epoch": 0.9501405703138447, "grad_norm": 0.314453125, "learning_rate": 1.5107351478909293e-06, "loss": 0.9268, "step": 16560 }, { "epoch": 0.9504274485053646, "grad_norm": 0.28125, "learning_rate": 1.493443684890583e-06, "loss": 0.9555, "step": 16565 }, { "epoch": 0.9507143266968845, "grad_norm": 0.263671875, "learning_rate": 1.4762510044857957e-06, "loss": 0.9207, "step": 16570 }, { "epoch": 0.9510012048884043, "grad_norm": 0.265625, "learning_rate": 1.4591571239174317e-06, "loss": 0.9716, "step": 16575 }, { "epoch": 0.9512880830799243, "grad_norm": 0.255859375, "learning_rate": 1.4421620603272789e-06, "loss": 0.9015, "step": 16580 }, { "epoch": 0.9515749612714441, "grad_norm": 0.255859375, "learning_rate": 1.4252658307580048e-06, "loss": 0.9391, "step": 16585 }, { "epoch": 0.951861839462964, "grad_norm": 0.267578125, "learning_rate": 1.4084684521531887e-06, "loss": 0.9711, "step": 16590 }, { "epoch": 0.952148717654484, "grad_norm": 0.275390625, "learning_rate": 1.3917699413573014e-06, "loss": 0.955, "step": 16595 }, { "epoch": 0.9524355958460038, "grad_norm": 0.255859375, "learning_rate": 1.375170315115637e-06, "loss": 0.9687, "step": 16600 }, { "epoch": 0.9527224740375236, "grad_norm": 0.27734375, "learning_rate": 1.3586695900743352e-06, "loss": 1.0067, "step": 16605 }, { "epoch": 0.9530093522290436, "grad_norm": 0.28125, "learning_rate": 1.3422677827803599e-06, "loss": 0.9846, "step": 16610 }, { "epoch": 0.9532962304205634, "grad_norm": 0.251953125, "learning_rate": 1.3259649096814763e-06, "loss": 0.9407, "step": 16615 }, { "epoch": 0.9535831086120833, "grad_norm": 0.28125, "learning_rate": 1.3097609871262295e-06, "loss": 1.0087, "step": 16620 }, { "epoch": 0.9538699868036032, "grad_norm": 0.2734375, "learning_rate": 1.293656031363988e-06, "loss": 1.0001, "step": 16625 }, { "epoch": 0.9541568649951231, "grad_norm": 0.25390625, "learning_rate": 1.2776500585448215e-06, "loss": 0.9324, "step": 16630 }, { "epoch": 0.9544437431866429, "grad_norm": 0.26953125, "learning_rate": 1.2617430847195356e-06, "loss": 0.8486, "step": 16635 }, { "epoch": 0.9547306213781628, "grad_norm": 0.25390625, "learning_rate": 1.2459351258396812e-06, "loss": 0.982, "step": 16640 }, { "epoch": 0.9550174995696827, "grad_norm": 0.25390625, "learning_rate": 1.2302261977575447e-06, "loss": 0.9321, "step": 16645 }, { "epoch": 0.9553043777612026, "grad_norm": 0.271484375, "learning_rate": 1.2146163162260581e-06, "loss": 1.0542, "step": 16650 }, { "epoch": 0.9555912559527224, "grad_norm": 0.26171875, "learning_rate": 1.1991054968988336e-06, "loss": 0.9529, "step": 16655 }, { "epoch": 0.9558781341442424, "grad_norm": 0.263671875, "learning_rate": 1.183693755330173e-06, "loss": 0.9219, "step": 16660 }, { "epoch": 0.9561650123357622, "grad_norm": 0.28515625, "learning_rate": 1.1683811069749916e-06, "loss": 0.9837, "step": 16665 }, { "epoch": 0.9564518905272821, "grad_norm": 0.298828125, "learning_rate": 1.1531675671888619e-06, "loss": 0.9397, "step": 16670 }, { "epoch": 0.956738768718802, "grad_norm": 0.265625, "learning_rate": 1.1380531512279469e-06, "loss": 0.9552, "step": 16675 }, { "epoch": 0.9570256469103219, "grad_norm": 0.28125, "learning_rate": 1.1230378742490222e-06, "loss": 0.9718, "step": 16680 }, { "epoch": 0.9573125251018417, "grad_norm": 0.2734375, "learning_rate": 1.1081217513094212e-06, "loss": 0.9079, "step": 16685 }, { "epoch": 0.9575994032933617, "grad_norm": 0.330078125, "learning_rate": 1.0933047973670896e-06, "loss": 0.97, "step": 16690 }, { "epoch": 0.9578862814848815, "grad_norm": 0.28125, "learning_rate": 1.0785870272804977e-06, "loss": 0.899, "step": 16695 }, { "epoch": 0.9581731596764014, "grad_norm": 0.283203125, "learning_rate": 1.0639684558086504e-06, "loss": 0.9541, "step": 16700 }, { "epoch": 0.9584600378679213, "grad_norm": 0.26953125, "learning_rate": 1.0494490976110883e-06, "loss": 0.9538, "step": 16705 }, { "epoch": 0.9587469160594412, "grad_norm": 0.271484375, "learning_rate": 1.035028967247864e-06, "loss": 0.995, "step": 16710 }, { "epoch": 0.959033794250961, "grad_norm": 0.275390625, "learning_rate": 1.0207080791794998e-06, "loss": 0.9229, "step": 16715 }, { "epoch": 0.9593206724424809, "grad_norm": 0.279296875, "learning_rate": 1.006486447767019e-06, "loss": 0.9349, "step": 16720 }, { "epoch": 0.9596075506340008, "grad_norm": 0.28515625, "learning_rate": 9.923640872719131e-07, "loss": 0.9353, "step": 16725 }, { "epoch": 0.9598944288255207, "grad_norm": 0.25, "learning_rate": 9.78341011856121e-07, "loss": 0.9204, "step": 16730 }, { "epoch": 0.9601813070170405, "grad_norm": 0.279296875, "learning_rate": 9.644172355819936e-07, "loss": 1.047, "step": 16735 }, { "epoch": 0.9604681852085605, "grad_norm": 0.267578125, "learning_rate": 9.505927724123509e-07, "loss": 0.9377, "step": 16740 }, { "epoch": 0.9607550634000803, "grad_norm": 0.265625, "learning_rate": 9.368676362103701e-07, "loss": 0.968, "step": 16745 }, { "epoch": 0.9610419415916002, "grad_norm": 0.2490234375, "learning_rate": 9.232418407396636e-07, "loss": 0.9917, "step": 16750 }, { "epoch": 0.9613288197831201, "grad_norm": 0.251953125, "learning_rate": 9.097153996642238e-07, "loss": 0.949, "step": 16755 }, { "epoch": 0.96161569797464, "grad_norm": 0.259765625, "learning_rate": 8.962883265483668e-07, "loss": 0.9995, "step": 16760 }, { "epoch": 0.9619025761661598, "grad_norm": 0.283203125, "learning_rate": 8.829606348567999e-07, "loss": 0.937, "step": 16765 }, { "epoch": 0.9621894543576798, "grad_norm": 0.2734375, "learning_rate": 8.697323379545653e-07, "loss": 0.9242, "step": 16770 }, { "epoch": 0.9624763325491996, "grad_norm": 0.279296875, "learning_rate": 8.566034491070407e-07, "loss": 0.9806, "step": 16775 }, { "epoch": 0.9627632107407195, "grad_norm": 0.25390625, "learning_rate": 8.435739814798949e-07, "loss": 0.9588, "step": 16780 }, { "epoch": 0.9630500889322394, "grad_norm": 0.23828125, "learning_rate": 8.30643948139087e-07, "loss": 0.9256, "step": 16785 }, { "epoch": 0.9633369671237593, "grad_norm": 0.271484375, "learning_rate": 8.178133620509232e-07, "loss": 0.8975, "step": 16790 }, { "epoch": 0.9636238453152791, "grad_norm": 0.25, "learning_rate": 8.050822360819221e-07, "loss": 1.0417, "step": 16795 }, { "epoch": 0.963910723506799, "grad_norm": 0.263671875, "learning_rate": 7.924505829988716e-07, "loss": 0.9042, "step": 16800 }, { "epoch": 0.9641976016983189, "grad_norm": 0.265625, "learning_rate": 7.79918415468861e-07, "loss": 0.8764, "step": 16805 }, { "epoch": 0.9644844798898388, "grad_norm": 0.255859375, "learning_rate": 7.674857460591379e-07, "loss": 0.9449, "step": 16810 }, { "epoch": 0.9647713580813586, "grad_norm": 0.26953125, "learning_rate": 7.551525872372289e-07, "loss": 0.9472, "step": 16815 }, { "epoch": 0.9650582362728786, "grad_norm": 0.287109375, "learning_rate": 7.429189513708524e-07, "loss": 0.9963, "step": 16820 }, { "epoch": 0.9653451144643984, "grad_norm": 0.263671875, "learning_rate": 7.307848507279169e-07, "loss": 0.9572, "step": 16825 }, { "epoch": 0.9656319926559183, "grad_norm": 0.25390625, "learning_rate": 7.187502974765448e-07, "loss": 0.9461, "step": 16830 }, { "epoch": 0.9659188708474382, "grad_norm": 0.27734375, "learning_rate": 7.068153036849934e-07, "loss": 0.9776, "step": 16835 }, { "epoch": 0.9662057490389581, "grad_norm": 0.28125, "learning_rate": 6.949798813217001e-07, "loss": 0.9549, "step": 16840 }, { "epoch": 0.9664926272304779, "grad_norm": 0.263671875, "learning_rate": 6.83244042255271e-07, "loss": 0.9784, "step": 16845 }, { "epoch": 0.9667795054219979, "grad_norm": 0.265625, "learning_rate": 6.716077982544256e-07, "loss": 0.9131, "step": 16850 }, { "epoch": 0.9670663836135177, "grad_norm": 0.251953125, "learning_rate": 6.600711609880072e-07, "loss": 0.952, "step": 16855 }, { "epoch": 0.9673532618050376, "grad_norm": 0.25, "learning_rate": 6.486341420249842e-07, "loss": 1.0066, "step": 16860 }, { "epoch": 0.9676401399965575, "grad_norm": 0.271484375, "learning_rate": 6.372967528344264e-07, "loss": 0.9966, "step": 16865 }, { "epoch": 0.9679270181880774, "grad_norm": 0.251953125, "learning_rate": 6.260590047854952e-07, "loss": 0.9964, "step": 16870 }, { "epoch": 0.9682138963795972, "grad_norm": 0.26953125, "learning_rate": 6.149209091474318e-07, "loss": 0.8882, "step": 16875 }, { "epoch": 0.968500774571117, "grad_norm": 0.279296875, "learning_rate": 6.038824770895457e-07, "loss": 0.9484, "step": 16880 }, { "epoch": 0.968787652762637, "grad_norm": 0.2734375, "learning_rate": 5.929437196811827e-07, "loss": 0.9168, "step": 16885 }, { "epoch": 0.9690745309541569, "grad_norm": 0.265625, "learning_rate": 5.821046478917791e-07, "loss": 0.9508, "step": 16890 }, { "epoch": 0.9693614091456767, "grad_norm": 0.255859375, "learning_rate": 5.713652725907626e-07, "loss": 1.0207, "step": 16895 }, { "epoch": 0.9696482873371967, "grad_norm": 0.259765625, "learning_rate": 5.607256045475961e-07, "loss": 0.9338, "step": 16900 }, { "epoch": 0.9699351655287165, "grad_norm": 0.255859375, "learning_rate": 5.501856544317896e-07, "loss": 0.8846, "step": 16905 }, { "epoch": 0.9702220437202363, "grad_norm": 0.2890625, "learning_rate": 5.397454328128104e-07, "loss": 0.9225, "step": 16910 }, { "epoch": 0.9705089219117563, "grad_norm": 0.267578125, "learning_rate": 5.294049501601283e-07, "loss": 0.9467, "step": 16915 }, { "epoch": 0.9707958001032762, "grad_norm": 0.25390625, "learning_rate": 5.191642168432154e-07, "loss": 0.9385, "step": 16920 }, { "epoch": 0.971082678294796, "grad_norm": 0.263671875, "learning_rate": 5.090232431315123e-07, "loss": 0.9474, "step": 16925 }, { "epoch": 0.971369556486316, "grad_norm": 0.271484375, "learning_rate": 4.989820391943845e-07, "loss": 0.9796, "step": 16930 }, { "epoch": 0.9716564346778358, "grad_norm": 0.2734375, "learning_rate": 4.890406151011884e-07, "loss": 0.9807, "step": 16935 }, { "epoch": 0.9719433128693556, "grad_norm": 0.2490234375, "learning_rate": 4.79198980821216e-07, "loss": 0.9716, "step": 16940 }, { "epoch": 0.9722301910608756, "grad_norm": 0.267578125, "learning_rate": 4.694571462236619e-07, "loss": 0.9392, "step": 16945 }, { "epoch": 0.9725170692523954, "grad_norm": 0.26171875, "learning_rate": 4.5981512107766687e-07, "loss": 0.9309, "step": 16950 }, { "epoch": 0.9728039474439153, "grad_norm": 0.23828125, "learning_rate": 4.5027291505227443e-07, "loss": 0.848, "step": 16955 }, { "epoch": 0.9730908256354351, "grad_norm": 0.25, "learning_rate": 4.408305377164301e-07, "loss": 0.8948, "step": 16960 }, { "epoch": 0.9733777038269551, "grad_norm": 0.248046875, "learning_rate": 4.314879985389708e-07, "loss": 0.8556, "step": 16965 }, { "epoch": 0.9736645820184749, "grad_norm": 0.287109375, "learning_rate": 4.222453068886245e-07, "loss": 0.9207, "step": 16970 }, { "epoch": 0.9739514602099948, "grad_norm": 0.25390625, "learning_rate": 4.13102472033966e-07, "loss": 0.9779, "step": 16975 }, { "epoch": 0.9742383384015147, "grad_norm": 0.248046875, "learning_rate": 4.0405950314347243e-07, "loss": 0.9539, "step": 16980 }, { "epoch": 0.9745252165930346, "grad_norm": 0.28125, "learning_rate": 3.951164092854343e-07, "loss": 0.9088, "step": 16985 }, { "epoch": 0.9748120947845544, "grad_norm": 0.275390625, "learning_rate": 3.862731994280111e-07, "loss": 0.9245, "step": 16990 }, { "epoch": 0.9750989729760744, "grad_norm": 0.302734375, "learning_rate": 3.775298824391982e-07, "loss": 0.9384, "step": 16995 }, { "epoch": 0.9753858511675942, "grad_norm": 0.3125, "learning_rate": 3.688864670868153e-07, "loss": 0.9567, "step": 17000 }, { "epoch": 0.9756727293591141, "grad_norm": 0.28125, "learning_rate": 3.6034296203848463e-07, "loss": 0.9531, "step": 17005 }, { "epoch": 0.975959607550634, "grad_norm": 0.263671875, "learning_rate": 3.51899375861664e-07, "loss": 0.9382, "step": 17010 }, { "epoch": 0.9762464857421539, "grad_norm": 0.255859375, "learning_rate": 3.435557170236026e-07, "loss": 0.9319, "step": 17015 }, { "epoch": 0.9765333639336737, "grad_norm": 0.2490234375, "learning_rate": 3.3531199389132963e-07, "loss": 0.9408, "step": 17020 }, { "epoch": 0.9768202421251937, "grad_norm": 0.265625, "learning_rate": 3.271682147316879e-07, "loss": 0.8888, "step": 17025 }, { "epoch": 0.9771071203167135, "grad_norm": 0.27734375, "learning_rate": 3.1912438771125594e-07, "loss": 0.9604, "step": 17030 }, { "epoch": 0.9773939985082334, "grad_norm": 0.251953125, "learning_rate": 3.111805208964036e-07, "loss": 0.9139, "step": 17035 }, { "epoch": 0.9776808766997532, "grad_norm": 0.267578125, "learning_rate": 3.0333662225328074e-07, "loss": 0.979, "step": 17040 }, { "epoch": 0.9779677548912732, "grad_norm": 0.2578125, "learning_rate": 2.955926996477398e-07, "loss": 0.9427, "step": 17045 }, { "epoch": 0.978254633082793, "grad_norm": 0.271484375, "learning_rate": 2.8794876084541346e-07, "loss": 0.9177, "step": 17050 }, { "epoch": 0.9785415112743129, "grad_norm": 0.2578125, "learning_rate": 2.8040481351166993e-07, "loss": 0.9348, "step": 17055 }, { "epoch": 0.9788283894658328, "grad_norm": 0.287109375, "learning_rate": 2.7296086521158003e-07, "loss": 0.9361, "step": 17060 }, { "epoch": 0.9791152676573527, "grad_norm": 0.29296875, "learning_rate": 2.6561692340997255e-07, "loss": 0.9358, "step": 17065 }, { "epoch": 0.9794021458488725, "grad_norm": 0.28515625, "learning_rate": 2.583729954713454e-07, "loss": 0.9239, "step": 17070 }, { "epoch": 0.9796890240403925, "grad_norm": 0.287109375, "learning_rate": 2.512290886599433e-07, "loss": 0.9068, "step": 17075 }, { "epoch": 0.9799759022319123, "grad_norm": 0.267578125, "learning_rate": 2.441852101396802e-07, "loss": 0.9934, "step": 17080 }, { "epoch": 0.9802627804234322, "grad_norm": 0.255859375, "learning_rate": 2.3724136697418353e-07, "loss": 0.9389, "step": 17085 }, { "epoch": 0.9805496586149521, "grad_norm": 0.271484375, "learning_rate": 2.303975661267499e-07, "loss": 0.908, "step": 17090 }, { "epoch": 0.980836536806472, "grad_norm": 0.287109375, "learning_rate": 2.2365381446035617e-07, "loss": 0.9184, "step": 17095 }, { "epoch": 0.9811234149979918, "grad_norm": 0.275390625, "learning_rate": 2.170101187376594e-07, "loss": 1.0126, "step": 17100 }, { "epoch": 0.9814102931895118, "grad_norm": 0.26171875, "learning_rate": 2.104664856209637e-07, "loss": 0.894, "step": 17105 }, { "epoch": 0.9816971713810316, "grad_norm": 0.263671875, "learning_rate": 2.0402292167224225e-07, "loss": 0.9465, "step": 17110 }, { "epoch": 0.9819840495725515, "grad_norm": 0.29296875, "learning_rate": 1.976794333531151e-07, "loss": 0.9665, "step": 17115 }, { "epoch": 0.9822709277640713, "grad_norm": 0.26171875, "learning_rate": 1.9143602702484942e-07, "loss": 0.9468, "step": 17120 }, { "epoch": 0.9825578059555913, "grad_norm": 0.28515625, "learning_rate": 1.8529270894833694e-07, "loss": 0.9169, "step": 17125 }, { "epoch": 0.9828446841471111, "grad_norm": 0.267578125, "learning_rate": 1.7924948528412755e-07, "loss": 0.8872, "step": 17130 }, { "epoch": 0.983131562338631, "grad_norm": 0.263671875, "learning_rate": 1.733063620923625e-07, "loss": 0.9435, "step": 17135 }, { "epoch": 0.9834184405301509, "grad_norm": 0.263671875, "learning_rate": 1.6746334533284115e-07, "loss": 0.9701, "step": 17140 }, { "epoch": 0.9837053187216708, "grad_norm": 0.263671875, "learning_rate": 1.6172044086492088e-07, "loss": 0.884, "step": 17145 }, { "epoch": 0.9839921969131906, "grad_norm": 0.244140625, "learning_rate": 1.5607765444762834e-07, "loss": 0.9289, "step": 17150 }, { "epoch": 0.9842790751047106, "grad_norm": 0.275390625, "learning_rate": 1.5053499173955933e-07, "loss": 0.9762, "step": 17155 }, { "epoch": 0.9845659532962304, "grad_norm": 0.275390625, "learning_rate": 1.4509245829888996e-07, "loss": 0.96, "step": 17160 }, { "epoch": 0.9848528314877503, "grad_norm": 0.265625, "learning_rate": 1.3975005958341003e-07, "loss": 0.9242, "step": 17165 }, { "epoch": 0.9851397096792702, "grad_norm": 0.275390625, "learning_rate": 1.3450780095051186e-07, "loss": 0.8535, "step": 17170 }, { "epoch": 0.9854265878707901, "grad_norm": 0.271484375, "learning_rate": 1.2936568765711254e-07, "loss": 0.8634, "step": 17175 }, { "epoch": 0.9857134660623099, "grad_norm": 0.2431640625, "learning_rate": 1.2432372485975395e-07, "loss": 0.9243, "step": 17180 }, { "epoch": 0.9860003442538299, "grad_norm": 0.28515625, "learning_rate": 1.193819176145361e-07, "loss": 0.9684, "step": 17185 }, { "epoch": 0.9862872224453497, "grad_norm": 0.24609375, "learning_rate": 1.1454027087708375e-07, "loss": 0.836, "step": 17190 }, { "epoch": 0.9865741006368696, "grad_norm": 0.265625, "learning_rate": 1.0979878950263534e-07, "loss": 0.9529, "step": 17195 }, { "epoch": 0.9868609788283894, "grad_norm": 0.2734375, "learning_rate": 1.0515747824595413e-07, "loss": 1.0052, "step": 17200 }, { "epoch": 0.9871478570199094, "grad_norm": 0.314453125, "learning_rate": 1.0061634176136148e-07, "loss": 0.9857, "step": 17205 }, { "epoch": 0.9874347352114292, "grad_norm": 0.2470703125, "learning_rate": 9.617538460270358e-08, "loss": 0.9079, "step": 17210 }, { "epoch": 0.9877216134029491, "grad_norm": 0.263671875, "learning_rate": 9.183461122339587e-08, "loss": 0.8843, "step": 17215 }, { "epoch": 0.988008491594469, "grad_norm": 0.27734375, "learning_rate": 8.759402597637855e-08, "loss": 0.9595, "step": 17220 }, { "epoch": 0.9882953697859889, "grad_norm": 0.265625, "learning_rate": 8.345363311410559e-08, "loss": 1.0177, "step": 17225 }, { "epoch": 0.9885822479775087, "grad_norm": 0.2734375, "learning_rate": 7.941343678857794e-08, "loss": 0.9305, "step": 17230 }, { "epoch": 0.9888691261690287, "grad_norm": 0.26953125, "learning_rate": 7.547344105132137e-08, "loss": 0.9569, "step": 17235 }, { "epoch": 0.9891560043605485, "grad_norm": 0.26953125, "learning_rate": 7.16336498533643e-08, "loss": 0.9185, "step": 17240 }, { "epoch": 0.9894428825520684, "grad_norm": 0.2734375, "learning_rate": 6.789406704527102e-08, "loss": 0.9569, "step": 17245 }, { "epoch": 0.9897297607435883, "grad_norm": 0.2578125, "learning_rate": 6.425469637708625e-08, "loss": 0.9679, "step": 17250 }, { "epoch": 0.9900166389351082, "grad_norm": 0.271484375, "learning_rate": 6.071554149837955e-08, "loss": 0.9106, "step": 17255 }, { "epoch": 0.990303517126628, "grad_norm": 0.271484375, "learning_rate": 5.727660595823414e-08, "loss": 0.947, "step": 17260 }, { "epoch": 0.990590395318148, "grad_norm": 0.259765625, "learning_rate": 5.39378932052248e-08, "loss": 0.9619, "step": 17265 }, { "epoch": 0.9908772735096678, "grad_norm": 0.267578125, "learning_rate": 5.069940658740668e-08, "loss": 0.953, "step": 17270 }, { "epoch": 0.9911641517011877, "grad_norm": 0.26171875, "learning_rate": 4.7561149352348675e-08, "loss": 0.9723, "step": 17275 }, { "epoch": 0.9914510298927075, "grad_norm": 0.265625, "learning_rate": 4.4523124647100065e-08, "loss": 0.988, "step": 17280 }, { "epoch": 0.9917379080842275, "grad_norm": 0.26171875, "learning_rate": 4.158533551820165e-08, "loss": 0.9843, "step": 17285 }, { "epoch": 0.9920247862757473, "grad_norm": 0.26171875, "learning_rate": 3.874778491167463e-08, "loss": 0.9379, "step": 17290 }, { "epoch": 0.9923116644672672, "grad_norm": 0.259765625, "learning_rate": 3.6010475673009524e-08, "loss": 0.9815, "step": 17295 }, { "epoch": 0.9925985426587871, "grad_norm": 0.25390625, "learning_rate": 3.337341054721055e-08, "loss": 0.9384, "step": 17300 }, { "epoch": 0.992885420850307, "grad_norm": 0.265625, "learning_rate": 3.0836592178717926e-08, "loss": 0.9017, "step": 17305 }, { "epoch": 0.9931722990418268, "grad_norm": 0.2490234375, "learning_rate": 2.840002311145229e-08, "loss": 0.8892, "step": 17310 }, { "epoch": 0.9934591772333468, "grad_norm": 0.287109375, "learning_rate": 2.6063705788825776e-08, "loss": 0.9695, "step": 17315 }, { "epoch": 0.9937460554248666, "grad_norm": 0.267578125, "learning_rate": 2.3827642553686523e-08, "loss": 0.9772, "step": 17320 }, { "epoch": 0.9940329336163864, "grad_norm": 0.267578125, "learning_rate": 2.169183564837418e-08, "loss": 0.9066, "step": 17325 }, { "epoch": 0.9943198118079064, "grad_norm": 0.26171875, "learning_rate": 1.9656287214686598e-08, "loss": 0.8939, "step": 17330 }, { "epoch": 0.9946066899994263, "grad_norm": 0.2734375, "learning_rate": 1.772099929385762e-08, "loss": 0.9562, "step": 17335 }, { "epoch": 0.9948935681909461, "grad_norm": 0.265625, "learning_rate": 1.588597382661261e-08, "loss": 1.017, "step": 17340 }, { "epoch": 0.995180446382466, "grad_norm": 0.265625, "learning_rate": 1.4151212653112922e-08, "loss": 0.9434, "step": 17345 }, { "epoch": 0.9954673245739859, "grad_norm": 0.291015625, "learning_rate": 1.2516717512989219e-08, "loss": 0.9375, "step": 17350 }, { "epoch": 0.9957542027655057, "grad_norm": 0.267578125, "learning_rate": 1.0982490045308157e-08, "loss": 0.9094, "step": 17355 }, { "epoch": 0.9960410809570256, "grad_norm": 0.265625, "learning_rate": 9.548531788605707e-09, "loss": 0.9988, "step": 17360 }, { "epoch": 0.9963279591485455, "grad_norm": 0.271484375, "learning_rate": 8.21484418084273e-09, "loss": 0.9424, "step": 17365 }, { "epoch": 0.9966148373400654, "grad_norm": 0.25, "learning_rate": 6.98142855946049e-09, "loss": 0.9161, "step": 17370 }, { "epoch": 0.9969017155315852, "grad_norm": 0.287109375, "learning_rate": 5.848286161314054e-09, "loss": 1.0036, "step": 17375 }, { "epoch": 0.9971885937231052, "grad_norm": 0.287109375, "learning_rate": 4.81541812273889e-09, "loss": 0.982, "step": 17380 }, { "epoch": 0.997475471914625, "grad_norm": 0.265625, "learning_rate": 3.882825479495367e-09, "loss": 0.9417, "step": 17385 }, { "epoch": 0.9977623501061449, "grad_norm": 0.2578125, "learning_rate": 3.050509166779847e-09, "loss": 0.9789, "step": 17390 }, { "epoch": 0.9980492282976648, "grad_norm": 0.314453125, "learning_rate": 2.3184700192357966e-09, "loss": 0.9634, "step": 17395 }, { "epoch": 0.9983361064891847, "grad_norm": 0.259765625, "learning_rate": 1.6867087709759866e-09, "loss": 0.9081, "step": 17400 }, { "epoch": 0.9986229846807045, "grad_norm": 0.259765625, "learning_rate": 1.1552260555047767e-09, "loss": 0.9128, "step": 17405 }, { "epoch": 0.9989098628722245, "grad_norm": 0.275390625, "learning_rate": 7.240224058180367e-10, "loss": 0.9465, "step": 17410 }, { "epoch": 0.9991967410637443, "grad_norm": 0.267578125, "learning_rate": 3.93098254314328e-10, "loss": 0.9759, "step": 17415 }, { "epoch": 0.9994836192552642, "grad_norm": 0.26953125, "learning_rate": 1.624539328615171e-10, "loss": 0.9412, "step": 17420 }, { "epoch": 0.9997704974467841, "grad_norm": 0.2890625, "learning_rate": 3.208967271906005e-11, "loss": 0.9467, "step": 17425 }, { "epoch": 1.0, "eval_loss": 0.9523706436157227, "eval_runtime": 525.7052, "eval_samples_per_second": 29.353, "eval_steps_per_second": 0.46, "step": 17429 }, { "epoch": 1.0, "step": 17429, "total_flos": 1.2254844500131709e+19, "train_loss": 0.9352519262663926, "train_runtime": 32149.2413, "train_samples_per_second": 4.337, "train_steps_per_second": 0.542 } ], "logging_steps": 5, "max_steps": 17429, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2254844500131709e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }