diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,48837 +1,24435 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9999856554731542, + "epoch": 1.0, "eval_steps": 500, - "global_step": 34856, + "global_step": 17429, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 1.4344629729245114e-05, - "grad_norm": 0.6328125, - "learning_rate": 2.8686173264486516e-08, - "loss": 1.4686, + "epoch": 5.7375638303976134e-05, + "grad_norm": 0.435546875, + "learning_rate": 1.1474469305794606e-07, + "loss": 1.3588, "step": 1 }, { - "epoch": 7.172314864622557e-05, - "grad_norm": 0.4375, - "learning_rate": 1.434308663224326e-07, - "loss": 1.186, + "epoch": 0.00028687819151988064, + "grad_norm": 0.59375, + "learning_rate": 5.737234652897304e-07, + "loss": 1.0991, "step": 5 }, { - "epoch": 0.00014344629729245114, - "grad_norm": 0.52734375, - "learning_rate": 2.868617326448652e-07, - "loss": 1.1009, + "epoch": 0.0005737563830397613, + "grad_norm": 0.349609375, + "learning_rate": 1.1474469305794607e-06, + "loss": 1.1268, "step": 10 }, { - "epoch": 0.00021516944593867672, - "grad_norm": 0.66015625, - "learning_rate": 4.3029259896729773e-07, - "loss": 1.0567, + "epoch": 0.0008606345745596419, + "grad_norm": 0.421875, + "learning_rate": 1.721170395869191e-06, + "loss": 1.1761, "step": 15 }, { - "epoch": 0.00028689259458490227, - "grad_norm": 0.63671875, - "learning_rate": 5.737234652897304e-07, - "loss": 1.2035, + "epoch": 0.0011475127660795226, + "grad_norm": 0.33984375, + "learning_rate": 2.2948938611589215e-06, + "loss": 1.1986, "step": 20 }, { - "epoch": 0.00035861574323112785, - "grad_norm": 0.578125, - "learning_rate": 7.17154331612163e-07, - "loss": 1.1921, + "epoch": 0.0014343909575994032, + "grad_norm": 0.359375, + "learning_rate": 2.868617326448652e-06, + "loss": 1.1268, "step": 25 }, { - "epoch": 0.00043033889187735344, - "grad_norm": 0.466796875, - "learning_rate": 8.605851979345955e-07, - "loss": 1.027, + "epoch": 0.0017212691491192839, + "grad_norm": 0.296875, + "learning_rate": 3.442340791738382e-06, + "loss": 1.0619, "step": 30 }, { - "epoch": 0.000502062040523579, - "grad_norm": 0.62890625, - "learning_rate": 1.0040160642570282e-06, - "loss": 1.1805, + "epoch": 0.0020081473406391645, + "grad_norm": 0.337890625, + "learning_rate": 4.016064257028113e-06, + "loss": 1.1175, "step": 35 }, { - "epoch": 0.0005737851891698045, - "grad_norm": 0.474609375, - "learning_rate": 1.1474469305794607e-06, - "loss": 1.1032, + "epoch": 0.002295025532159045, + "grad_norm": 0.328125, + "learning_rate": 4.589787722317843e-06, + "loss": 1.1421, "step": 40 }, { - "epoch": 0.0006455083378160301, - "grad_norm": 0.5390625, - "learning_rate": 1.2908777969018933e-06, - "loss": 1.1191, + "epoch": 0.002581903723678926, + "grad_norm": 0.32421875, + "learning_rate": 5.163511187607573e-06, + "loss": 1.1015, "step": 45 }, { - "epoch": 0.0007172314864622557, - "grad_norm": 0.5859375, - "learning_rate": 1.434308663224326e-06, - "loss": 1.2537, + "epoch": 0.0028687819151988064, + "grad_norm": 0.33984375, + "learning_rate": 5.737234652897304e-06, + "loss": 1.138, "step": 50 }, { - "epoch": 0.0007889546351084812, - "grad_norm": 0.5, - "learning_rate": 1.5777395295467586e-06, - "loss": 1.1569, + "epoch": 0.003155660106718687, + "grad_norm": 0.345703125, + "learning_rate": 6.310958118187034e-06, + "loss": 1.1281, "step": 55 }, { - "epoch": 0.0008606777837547069, - "grad_norm": 0.51171875, - "learning_rate": 1.721170395869191e-06, - "loss": 1.1609, + "epoch": 0.0034425382982385677, + "grad_norm": 0.318359375, + "learning_rate": 6.884681583476764e-06, + "loss": 1.1071, "step": 60 }, { - "epoch": 0.0009324009324009324, - "grad_norm": 0.52734375, - "learning_rate": 1.8646012621916239e-06, - "loss": 1.1905, + "epoch": 0.0037294164897584484, + "grad_norm": 0.322265625, + "learning_rate": 7.4584050487664955e-06, + "loss": 1.1443, "step": 65 }, { - "epoch": 0.001004124081047158, - "grad_norm": 0.5234375, - "learning_rate": 2.0080321285140564e-06, - "loss": 1.2491, + "epoch": 0.004016294681278329, + "grad_norm": 0.326171875, + "learning_rate": 8.032128514056226e-06, + "loss": 1.1746, "step": 70 }, { - "epoch": 0.0010758472296933835, - "grad_norm": 0.58984375, - "learning_rate": 2.151462994836489e-06, - "loss": 1.1753, + "epoch": 0.00430317287279821, + "grad_norm": 0.291015625, + "learning_rate": 8.605851979345956e-06, + "loss": 1.0971, "step": 75 }, { - "epoch": 0.001147570378339609, - "grad_norm": 0.55859375, - "learning_rate": 2.2948938611589215e-06, - "loss": 1.152, + "epoch": 0.00459005106431809, + "grad_norm": 0.314453125, + "learning_rate": 9.179575444635686e-06, + "loss": 1.1356, "step": 80 }, { - "epoch": 0.0012192935269858347, - "grad_norm": 0.53515625, - "learning_rate": 2.438324727481354e-06, - "loss": 1.1938, + "epoch": 0.004876929255837971, + "grad_norm": 0.28125, + "learning_rate": 9.753298909925416e-06, + "loss": 1.0908, "step": 85 }, { - "epoch": 0.0012910166756320601, - "grad_norm": 0.53515625, - "learning_rate": 2.5817555938037866e-06, - "loss": 1.1689, + "epoch": 0.005163807447357852, + "grad_norm": 0.30078125, + "learning_rate": 1.0327022375215146e-05, + "loss": 1.0388, "step": 90 }, { - "epoch": 0.0013627398242782858, - "grad_norm": 0.484375, - "learning_rate": 2.725186460126219e-06, - "loss": 0.9995, + "epoch": 0.005450685638877732, + "grad_norm": 0.296875, + "learning_rate": 1.0900745840504876e-05, + "loss": 1.1107, "step": 95 }, { - "epoch": 0.0014344629729245114, - "grad_norm": 0.5703125, - "learning_rate": 2.868617326448652e-06, - "loss": 1.0994, + "epoch": 0.005737563830397613, + "grad_norm": 0.294921875, + "learning_rate": 1.1474469305794608e-05, + "loss": 1.0713, "step": 100 }, { - "epoch": 0.0030123506376142184, - "grad_norm": 0.4765625, - "learning_rate": 6.024096385542169e-06, - "loss": 1.0807, + "epoch": 0.0060244420219174935, + "grad_norm": 0.37890625, + "learning_rate": 1.2048192771084338e-05, + "loss": 1.1581, "step": 105 }, { - "epoch": 0.003155795906072038, - "grad_norm": 0.451171875, - "learning_rate": 6.310958118187034e-06, - "loss": 1.1917, + "epoch": 0.006311320213437374, + "grad_norm": 0.376953125, + "learning_rate": 1.2621916236374069e-05, + "loss": 1.0059, "step": 110 }, { - "epoch": 0.003299241174529858, - "grad_norm": 0.435546875, - "learning_rate": 6.5978198508318986e-06, - "loss": 1.1005, + "epoch": 0.006598198404957255, + "grad_norm": 0.255859375, + "learning_rate": 1.3195639701663797e-05, + "loss": 0.9732, "step": 115 }, { - "epoch": 0.003442686442987678, - "grad_norm": 0.3984375, - "learning_rate": 6.884681583476764e-06, - "loss": 1.1224, + "epoch": 0.0068850765964771354, + "grad_norm": 0.2890625, + "learning_rate": 1.3769363166953527e-05, + "loss": 0.9736, "step": 120 }, { - "epoch": 0.0035861317114454978, - "grad_norm": 0.330078125, - "learning_rate": 7.17154331612163e-06, - "loss": 1.1184, + "epoch": 0.007171954787997016, + "grad_norm": 0.2734375, + "learning_rate": 1.434308663224326e-05, + "loss": 1.044, "step": 125 }, { - "epoch": 0.003729576979903318, - "grad_norm": 0.453125, - "learning_rate": 7.4584050487664955e-06, - "loss": 1.1697, + "epoch": 0.007458832979516897, + "grad_norm": 0.314453125, + "learning_rate": 1.4916810097532991e-05, + "loss": 1.0218, "step": 130 }, { - "epoch": 0.003873022248361138, - "grad_norm": 0.416015625, - "learning_rate": 7.74526678141136e-06, - "loss": 1.1933, + "epoch": 0.007745711171036777, + "grad_norm": 0.236328125, + "learning_rate": 1.549053356282272e-05, + "loss": 0.9904, "step": 135 }, { - "epoch": 0.0040164675168189575, - "grad_norm": 0.4375, - "learning_rate": 8.032128514056226e-06, - "loss": 1.1462, + "epoch": 0.008032589362556658, + "grad_norm": 0.2490234375, + "learning_rate": 1.606425702811245e-05, + "loss": 0.9875, "step": 140 }, { - "epoch": 0.004159912785276778, - "grad_norm": 0.40234375, - "learning_rate": 8.31899024670109e-06, - "loss": 1.0186, + "epoch": 0.008319467554076539, + "grad_norm": 0.25390625, + "learning_rate": 1.663798049340218e-05, + "loss": 0.9874, "step": 145 }, { - "epoch": 0.004303358053734598, - "grad_norm": 0.39453125, - "learning_rate": 8.605851979345956e-06, - "loss": 1.1564, + "epoch": 0.00860634574559642, + "grad_norm": 0.29296875, + "learning_rate": 1.721170395869191e-05, + "loss": 0.9866, "step": 150 }, { - "epoch": 0.004446803322192418, - "grad_norm": 0.361328125, - "learning_rate": 8.892713711990821e-06, - "loss": 1.0869, + "epoch": 0.0088932239371163, + "grad_norm": 0.26171875, + "learning_rate": 1.7785427423981642e-05, + "loss": 0.9857, "step": 155 }, { - "epoch": 0.004590248590650238, - "grad_norm": 0.380859375, - "learning_rate": 9.179575444635686e-06, - "loss": 1.1593, + "epoch": 0.00918010212863618, + "grad_norm": 0.38671875, + "learning_rate": 1.8359150889271372e-05, + "loss": 1.0017, "step": 160 }, { - "epoch": 0.004733693859108057, - "grad_norm": 0.376953125, - "learning_rate": 9.466437177280551e-06, - "loss": 1.1177, + "epoch": 0.009466980320156061, + "grad_norm": 0.287109375, + "learning_rate": 1.8932874354561102e-05, + "loss": 1.025, "step": 165 }, { - "epoch": 0.004877139127565877, - "grad_norm": 0.408203125, - "learning_rate": 9.753298909925416e-06, - "loss": 1.0336, + "epoch": 0.009753858511675942, + "grad_norm": 0.28515625, + "learning_rate": 1.9506597819850832e-05, + "loss": 1.015, "step": 170 }, { - "epoch": 0.005020584396023697, - "grad_norm": 0.43359375, - "learning_rate": 1.0040160642570281e-05, - "loss": 0.9723, + "epoch": 0.010040736703195823, + "grad_norm": 0.27734375, + "learning_rate": 2.0080321285140562e-05, + "loss": 0.9669, "step": 175 }, { - "epoch": 0.005164029664481517, - "grad_norm": 0.37890625, - "learning_rate": 1.0327022375215146e-05, - "loss": 1.0685, + "epoch": 0.010327614894715703, + "grad_norm": 0.2451171875, + "learning_rate": 2.0654044750430293e-05, + "loss": 1.035, "step": 180 }, { - "epoch": 0.005307474932939337, - "grad_norm": 0.52734375, - "learning_rate": 1.0613884107860011e-05, - "loss": 1.0941, + "epoch": 0.010614493086235584, + "grad_norm": 0.2578125, + "learning_rate": 2.1227768215720023e-05, + "loss": 1.0398, "step": 185 }, { - "epoch": 0.0054509202013971565, - "grad_norm": 0.390625, - "learning_rate": 1.0900745840504876e-05, - "loss": 1.0887, + "epoch": 0.010901371277755464, + "grad_norm": 0.271484375, + "learning_rate": 2.1801491681009753e-05, + "loss": 1.05, "step": 190 }, { - "epoch": 0.0055943654698549766, - "grad_norm": 0.35546875, - "learning_rate": 1.1187607573149743e-05, - "loss": 1.0567, + "epoch": 0.011188249469275345, + "grad_norm": 0.267578125, + "learning_rate": 2.2375215146299486e-05, + "loss": 0.9228, "step": 195 }, { - "epoch": 0.005737810738312797, - "grad_norm": 0.419921875, - "learning_rate": 1.1474469305794608e-05, - "loss": 1.053, + "epoch": 0.011475127660795226, + "grad_norm": 0.263671875, + "learning_rate": 2.2948938611589217e-05, + "loss": 1.0787, "step": 200 }, { - "epoch": 0.005881256006770617, - "grad_norm": 0.44921875, - "learning_rate": 1.1761331038439473e-05, - "loss": 1.1248, + "epoch": 0.011762005852315106, + "grad_norm": 0.314453125, + "learning_rate": 2.3522662076878947e-05, + "loss": 1.0928, "step": 205 }, { - "epoch": 0.006024701275228437, - "grad_norm": 0.51953125, - "learning_rate": 1.2048192771084338e-05, - "loss": 1.1648, + "epoch": 0.012048884043834987, + "grad_norm": 0.28515625, + "learning_rate": 2.4096385542168677e-05, + "loss": 1.0675, "step": 210 }, { - "epoch": 0.006168146543686257, - "grad_norm": 0.36328125, - "learning_rate": 1.2335054503729204e-05, - "loss": 0.9817, + "epoch": 0.012335762235354868, + "grad_norm": 0.2734375, + "learning_rate": 2.4670109007458407e-05, + "loss": 1.02, "step": 215 }, { - "epoch": 0.006311591812144076, - "grad_norm": 0.65625, - "learning_rate": 1.2621916236374069e-05, - "loss": 1.0075, + "epoch": 0.012622640426874748, + "grad_norm": 0.291015625, + "learning_rate": 2.5243832472748137e-05, + "loss": 0.9736, "step": 220 }, { - "epoch": 0.006455037080601896, - "grad_norm": 0.384765625, - "learning_rate": 1.2908777969018934e-05, - "loss": 0.9719, + "epoch": 0.012909518618394629, + "grad_norm": 0.271484375, + "learning_rate": 2.5817555938037867e-05, + "loss": 1.0255, "step": 225 }, { - "epoch": 0.006598482349059716, - "grad_norm": 0.3046875, - "learning_rate": 1.3195639701663797e-05, - "loss": 0.9571, + "epoch": 0.01319639680991451, + "grad_norm": 0.33203125, + "learning_rate": 2.6391279403327594e-05, + "loss": 1.1085, "step": 230 }, { - "epoch": 0.006741927617517536, - "grad_norm": 0.36328125, - "learning_rate": 1.3482501434308664e-05, - "loss": 0.9517, + "epoch": 0.01348327500143439, + "grad_norm": 0.2734375, + "learning_rate": 2.6965002868617328e-05, + "loss": 1.0057, "step": 235 }, { - "epoch": 0.006885372885975356, - "grad_norm": 0.40234375, - "learning_rate": 1.3769363166953527e-05, - "loss": 0.9781, + "epoch": 0.013770153192954271, + "grad_norm": 0.291015625, + "learning_rate": 2.7538726333907055e-05, + "loss": 1.0452, "step": 240 }, { - "epoch": 0.007028818154433176, - "grad_norm": 0.38671875, - "learning_rate": 1.4056224899598394e-05, - "loss": 0.9974, + "epoch": 0.014057031384474152, + "grad_norm": 0.2890625, + "learning_rate": 2.8112449799196788e-05, + "loss": 1.0544, "step": 245 }, { - "epoch": 0.0071722634228909956, - "grad_norm": 0.36328125, - "learning_rate": 1.434308663224326e-05, - "loss": 1.075, + "epoch": 0.014343909575994032, + "grad_norm": 0.3046875, + "learning_rate": 2.868617326448652e-05, + "loss": 1.0181, "step": 250 }, { - "epoch": 0.007315708691348816, - "grad_norm": 0.35546875, - "learning_rate": 1.4629948364888124e-05, - "loss": 0.9424, + "epoch": 0.014630787767513913, + "grad_norm": 0.28125, + "learning_rate": 2.925989672977625e-05, + "loss": 0.9926, "step": 255 }, { - "epoch": 0.007459153959806636, - "grad_norm": 0.427734375, - "learning_rate": 1.4916810097532991e-05, - "loss": 1.0849, + "epoch": 0.014917665959033793, + "grad_norm": 0.34375, + "learning_rate": 2.9833620195065982e-05, + "loss": 0.9662, "step": 260 }, { - "epoch": 0.007602599228264456, - "grad_norm": 0.375, - "learning_rate": 1.5203671830177854e-05, - "loss": 1.0073, + "epoch": 0.015204544150553674, + "grad_norm": 0.30078125, + "learning_rate": 3.040734366035571e-05, + "loss": 1.0339, "step": 265 }, { - "epoch": 0.007746044496722276, - "grad_norm": 0.361328125, - "learning_rate": 1.549053356282272e-05, - "loss": 0.962, + "epoch": 0.015491422342073555, + "grad_norm": 0.283203125, + "learning_rate": 3.098106712564544e-05, + "loss": 0.9458, "step": 270 }, { - "epoch": 0.007889489765180096, - "grad_norm": 0.40234375, - "learning_rate": 1.5777395295467585e-05, - "loss": 0.9362, + "epoch": 0.015778300533593435, + "grad_norm": 0.287109375, + "learning_rate": 3.155479059093517e-05, + "loss": 0.96, "step": 275 }, { - "epoch": 0.008032935033637915, - "grad_norm": 0.373046875, - "learning_rate": 1.606425702811245e-05, - "loss": 1.0267, + "epoch": 0.016065178725113316, + "grad_norm": 0.3125, + "learning_rate": 3.21285140562249e-05, + "loss": 0.9781, "step": 280 }, { - "epoch": 0.008176380302095736, - "grad_norm": 0.390625, - "learning_rate": 1.6351118760757318e-05, - "loss": 0.9735, + "epoch": 0.016352056916633197, + "grad_norm": 0.291015625, + "learning_rate": 3.2702237521514636e-05, + "loss": 1.0201, "step": 285 }, { - "epoch": 0.008319825570553555, - "grad_norm": 0.37890625, - "learning_rate": 1.663798049340218e-05, - "loss": 0.9895, + "epoch": 0.016638935108153077, + "grad_norm": 0.306640625, + "learning_rate": 3.327596098680436e-05, + "loss": 1.0515, "step": 290 }, { - "epoch": 0.008463270839011374, - "grad_norm": 0.34375, - "learning_rate": 1.6924842226047048e-05, - "loss": 0.928, + "epoch": 0.016925813299672958, + "grad_norm": 0.3125, + "learning_rate": 3.3849684452094096e-05, + "loss": 1.0647, "step": 295 }, { - "epoch": 0.008606716107469195, - "grad_norm": 0.42578125, - "learning_rate": 1.721170395869191e-05, - "loss": 1.0332, + "epoch": 0.01721269149119284, + "grad_norm": 0.30078125, + "learning_rate": 3.442340791738382e-05, + "loss": 0.9632, "step": 300 }, { - "epoch": 0.008750161375927015, - "grad_norm": 0.392578125, - "learning_rate": 1.749856569133678e-05, - "loss": 0.9845, + "epoch": 0.01749956968271272, + "grad_norm": 0.29296875, + "learning_rate": 3.499713138267356e-05, + "loss": 0.9992, "step": 305 }, { - "epoch": 0.008893606644384836, - "grad_norm": 0.35546875, - "learning_rate": 1.7785427423981642e-05, - "loss": 0.9726, + "epoch": 0.0177864478742326, + "grad_norm": 0.287109375, + "learning_rate": 3.5570854847963284e-05, + "loss": 0.9193, "step": 310 }, { - "epoch": 0.009037051912842655, - "grad_norm": 0.408203125, - "learning_rate": 1.8072289156626505e-05, - "loss": 0.9854, + "epoch": 0.01807332606575248, + "grad_norm": 0.328125, + "learning_rate": 3.614457831325301e-05, + "loss": 1.0958, "step": 315 }, { - "epoch": 0.009180497181300476, - "grad_norm": 0.412109375, - "learning_rate": 1.8359150889271372e-05, - "loss": 1.0072, + "epoch": 0.01836020425727236, + "grad_norm": 0.3046875, + "learning_rate": 3.6718301778542744e-05, + "loss": 0.9924, "step": 320 }, { - "epoch": 0.009323942449758295, - "grad_norm": 0.42578125, - "learning_rate": 1.8646012621916235e-05, - "loss": 1.1039, + "epoch": 0.018647082448792242, + "grad_norm": 0.3046875, + "learning_rate": 3.729202524383247e-05, + "loss": 1.0167, "step": 325 }, { - "epoch": 0.009467387718216114, - "grad_norm": 0.361328125, - "learning_rate": 1.8932874354561102e-05, - "loss": 0.9346, + "epoch": 0.018933960640312122, + "grad_norm": 0.314453125, + "learning_rate": 3.7865748709122204e-05, + "loss": 1.0477, "step": 330 }, { - "epoch": 0.009610832986673935, - "grad_norm": 0.376953125, - "learning_rate": 1.9219736087205965e-05, - "loss": 0.952, + "epoch": 0.019220838831832003, + "grad_norm": 0.30859375, + "learning_rate": 3.843947217441193e-05, + "loss": 1.0352, "step": 335 }, { - "epoch": 0.009754278255131754, - "grad_norm": 0.474609375, - "learning_rate": 1.9506597819850832e-05, - "loss": 1.0648, + "epoch": 0.019507717023351884, + "grad_norm": 0.296875, + "learning_rate": 3.9013195639701665e-05, + "loss": 1.0158, "step": 340 }, { - "epoch": 0.009897723523589575, - "grad_norm": 0.37890625, - "learning_rate": 1.9793459552495696e-05, - "loss": 0.9335, + "epoch": 0.019794595214871764, + "grad_norm": 0.279296875, + "learning_rate": 3.958691910499139e-05, + "loss": 1.0116, "step": 345 }, { - "epoch": 0.010041168792047394, - "grad_norm": 0.46484375, - "learning_rate": 2.0080321285140562e-05, - "loss": 0.9885, + "epoch": 0.020081473406391645, + "grad_norm": 0.306640625, + "learning_rate": 4.0160642570281125e-05, + "loss": 1.0194, "step": 350 }, { - "epoch": 0.010184614060505214, - "grad_norm": 0.380859375, - "learning_rate": 2.036718301778543e-05, - "loss": 1.0261, + "epoch": 0.020368351597911526, + "grad_norm": 0.298828125, + "learning_rate": 4.073436603557086e-05, + "loss": 1.0497, "step": 355 }, { - "epoch": 0.010328059328963034, - "grad_norm": 0.39453125, - "learning_rate": 2.0654044750430293e-05, - "loss": 1.0331, + "epoch": 0.020655229789431406, + "grad_norm": 0.306640625, + "learning_rate": 4.1308089500860585e-05, + "loss": 1.0213, "step": 360 }, { - "epoch": 0.010471504597420854, - "grad_norm": 0.4375, - "learning_rate": 2.094090648307516e-05, - "loss": 1.0636, + "epoch": 0.020942107980951287, + "grad_norm": 0.2890625, + "learning_rate": 4.188181296615032e-05, + "loss": 1.0122, "step": 365 }, { - "epoch": 0.010614949865878675, - "grad_norm": 0.390625, - "learning_rate": 2.1227768215720023e-05, - "loss": 1.0047, + "epoch": 0.021228986172471168, + "grad_norm": 0.296875, + "learning_rate": 4.2455536431440046e-05, + "loss": 0.9192, "step": 370 }, { - "epoch": 0.010758395134336494, - "grad_norm": 0.48046875, - "learning_rate": 2.151462994836489e-05, - "loss": 1.1399, + "epoch": 0.021515864363991048, + "grad_norm": 0.287109375, + "learning_rate": 4.302925989672978e-05, + "loss": 0.9714, "step": 375 }, { - "epoch": 0.010901840402794313, - "grad_norm": 0.40234375, - "learning_rate": 2.1801491681009753e-05, - "loss": 0.9485, + "epoch": 0.02180274255551093, + "grad_norm": 0.287109375, + "learning_rate": 4.3602983362019506e-05, + "loss": 0.9926, "step": 380 }, { - "epoch": 0.011045285671252134, - "grad_norm": 0.396484375, - "learning_rate": 2.208835341365462e-05, - "loss": 0.9379, + "epoch": 0.02208962074703081, + "grad_norm": 0.345703125, + "learning_rate": 4.417670682730924e-05, + "loss": 0.9874, "step": 385 }, { - "epoch": 0.011188730939709953, - "grad_norm": 0.40625, - "learning_rate": 2.2375215146299486e-05, - "loss": 0.8968, + "epoch": 0.02237649893855069, + "grad_norm": 0.310546875, + "learning_rate": 4.475043029259897e-05, + "loss": 0.9786, "step": 390 }, { - "epoch": 0.011332176208167774, - "grad_norm": 0.400390625, - "learning_rate": 2.266207687894435e-05, - "loss": 1.1983, + "epoch": 0.02266337713007057, + "grad_norm": 0.28515625, + "learning_rate": 4.53241537578887e-05, + "loss": 1.0112, "step": 395 }, { - "epoch": 0.011475621476625593, - "grad_norm": 0.40625, - "learning_rate": 2.2948938611589217e-05, - "loss": 0.9481, + "epoch": 0.02295025532159045, + "grad_norm": 0.27734375, + "learning_rate": 4.589787722317843e-05, + "loss": 0.9551, "step": 400 }, { - "epoch": 0.011619066745083414, - "grad_norm": 0.4375, - "learning_rate": 2.323580034423408e-05, - "loss": 1.0723, + "epoch": 0.023237133513110332, + "grad_norm": 0.28125, + "learning_rate": 4.647160068846816e-05, + "loss": 0.9801, "step": 405 }, { - "epoch": 0.011762512013541233, - "grad_norm": 0.458984375, - "learning_rate": 2.3522662076878947e-05, - "loss": 1.102, + "epoch": 0.023524011704630213, + "grad_norm": 0.28515625, + "learning_rate": 4.7045324153757894e-05, + "loss": 0.9705, "step": 410 }, { - "epoch": 0.011905957281999053, - "grad_norm": 0.41015625, - "learning_rate": 2.380952380952381e-05, - "loss": 1.0767, + "epoch": 0.023810889896150093, + "grad_norm": 0.283203125, + "learning_rate": 4.761904761904762e-05, + "loss": 1.0404, "step": 415 }, { - "epoch": 0.012049402550456874, - "grad_norm": 0.408203125, - "learning_rate": 2.4096385542168677e-05, - "loss": 1.0465, + "epoch": 0.024097768087669974, + "grad_norm": 0.296875, + "learning_rate": 4.8192771084337354e-05, + "loss": 0.9633, "step": 420 }, { - "epoch": 0.012192847818914693, - "grad_norm": 0.439453125, - "learning_rate": 2.4383247274813544e-05, - "loss": 1.0135, + "epoch": 0.024384646279189855, + "grad_norm": 0.29296875, + "learning_rate": 4.876649454962709e-05, + "loss": 1.0002, "step": 425 }, { - "epoch": 0.012336293087372514, - "grad_norm": 0.443359375, - "learning_rate": 2.4670109007458407e-05, - "loss": 1.0181, + "epoch": 0.024671524470709735, + "grad_norm": 0.3125, + "learning_rate": 4.9340218014916814e-05, + "loss": 1.0285, "step": 430 }, { - "epoch": 0.012479738355830333, - "grad_norm": 0.447265625, - "learning_rate": 2.495697074010327e-05, - "loss": 0.9883, + "epoch": 0.024958402662229616, + "grad_norm": 0.2734375, + "learning_rate": 4.991394148020654e-05, + "loss": 1.0558, "step": 435 }, { - "epoch": 0.012623183624288152, - "grad_norm": 0.4296875, - "learning_rate": 2.5243832472748137e-05, - "loss": 0.9481, + "epoch": 0.025245280853749497, + "grad_norm": 0.26171875, + "learning_rate": 5.0487664945496275e-05, + "loss": 0.9442, "step": 440 }, { - "epoch": 0.012766628892745973, - "grad_norm": 0.43359375, - "learning_rate": 2.5530694205393e-05, - "loss": 1.0318, + "epoch": 0.025532159045269377, + "grad_norm": 0.2578125, + "learning_rate": 5.1061388410786e-05, + "loss": 1.0159, "step": 445 }, { - "epoch": 0.012910074161203792, - "grad_norm": 0.416015625, - "learning_rate": 2.5817555938037867e-05, - "loss": 1.0097, + "epoch": 0.025819037236789258, + "grad_norm": 0.275390625, + "learning_rate": 5.1635111876075735e-05, + "loss": 0.9705, "step": 450 }, { - "epoch": 0.013053519429661613, - "grad_norm": 0.423828125, - "learning_rate": 2.6104417670682734e-05, - "loss": 1.0516, + "epoch": 0.02610591542830914, + "grad_norm": 0.279296875, + "learning_rate": 5.220883534136547e-05, + "loss": 0.9864, "step": 455 }, { - "epoch": 0.013196964698119432, - "grad_norm": 0.4609375, - "learning_rate": 2.6391279403327594e-05, - "loss": 1.1533, + "epoch": 0.02639279361982902, + "grad_norm": 0.291015625, + "learning_rate": 5.278255880665519e-05, + "loss": 1.0527, "step": 460 }, { - "epoch": 0.013340409966577253, - "grad_norm": 0.388671875, - "learning_rate": 2.667814113597246e-05, - "loss": 0.9956, + "epoch": 0.0266796718113489, + "grad_norm": 0.275390625, + "learning_rate": 5.335628227194492e-05, + "loss": 0.9403, "step": 465 }, { - "epoch": 0.013483855235035072, - "grad_norm": 0.40625, - "learning_rate": 2.6965002868617328e-05, - "loss": 1.0079, + "epoch": 0.02696655000286878, + "grad_norm": 0.26953125, + "learning_rate": 5.3930005737234656e-05, + "loss": 1.0662, "step": 470 }, { - "epoch": 0.013627300503492892, - "grad_norm": 0.396484375, - "learning_rate": 2.7251864601262195e-05, - "loss": 1.0362, + "epoch": 0.02725342819438866, + "grad_norm": 0.291015625, + "learning_rate": 5.450372920252439e-05, + "loss": 0.9985, "step": 475 }, { - "epoch": 0.013770745771950713, - "grad_norm": 0.4296875, - "learning_rate": 2.7538726333907055e-05, - "loss": 1.044, + "epoch": 0.027540306385908542, + "grad_norm": 0.27734375, + "learning_rate": 5.507745266781411e-05, + "loss": 0.9654, "step": 480 }, { - "epoch": 0.013914191040408532, - "grad_norm": 0.435546875, - "learning_rate": 2.782558806655192e-05, - "loss": 1.0479, + "epoch": 0.027827184577428422, + "grad_norm": 0.310546875, + "learning_rate": 5.565117613310384e-05, + "loss": 1.0284, "step": 485 }, { - "epoch": 0.014057636308866353, - "grad_norm": 0.390625, - "learning_rate": 2.8112449799196788e-05, - "loss": 1.0516, + "epoch": 0.028114062768948303, + "grad_norm": 0.296875, + "learning_rate": 5.6224899598393576e-05, + "loss": 0.9302, "step": 490 }, { - "epoch": 0.014201081577324172, - "grad_norm": 0.4140625, - "learning_rate": 2.8399311531841655e-05, - "loss": 0.9928, + "epoch": 0.028400940960468184, + "grad_norm": 0.322265625, + "learning_rate": 5.679862306368331e-05, + "loss": 0.9556, "step": 495 }, { - "epoch": 0.014344526845781991, - "grad_norm": 0.416015625, - "learning_rate": 2.868617326448652e-05, - "loss": 1.0354, + "epoch": 0.028687819151988064, + "grad_norm": 0.291015625, + "learning_rate": 5.737234652897304e-05, + "loss": 1.0002, "step": 500 }, { - "epoch": 0.014487972114239812, - "grad_norm": 0.44140625, - "learning_rate": 2.897303499713138e-05, - "loss": 1.0148, + "epoch": 0.028974697343507945, + "grad_norm": 0.306640625, + "learning_rate": 5.794606999426276e-05, + "loss": 0.9644, "step": 505 }, { - "epoch": 0.014631417382697631, - "grad_norm": 0.3984375, - "learning_rate": 2.925989672977625e-05, - "loss": 0.9611, + "epoch": 0.029261575535027826, + "grad_norm": 0.345703125, + "learning_rate": 5.85197934595525e-05, + "loss": 0.9905, "step": 510 }, { - "epoch": 0.014774862651155452, - "grad_norm": 0.3984375, - "learning_rate": 2.9546758462421115e-05, - "loss": 0.9191, + "epoch": 0.029548453726547706, + "grad_norm": 0.291015625, + "learning_rate": 5.909351692484223e-05, + "loss": 1.0985, "step": 515 }, { - "epoch": 0.014918307919613271, - "grad_norm": 0.60546875, - "learning_rate": 2.9833620195065982e-05, - "loss": 1.0039, + "epoch": 0.029835331918067587, + "grad_norm": 0.287109375, + "learning_rate": 5.9667240390131964e-05, + "loss": 1.0632, "step": 520 }, { - "epoch": 0.015061753188071092, - "grad_norm": 0.388671875, - "learning_rate": 3.012048192771085e-05, - "loss": 1.0747, + "epoch": 0.030122210109587468, + "grad_norm": 0.267578125, + "learning_rate": 6.02409638554217e-05, + "loss": 1.0096, "step": 525 }, { - "epoch": 0.015205198456528912, - "grad_norm": 0.419921875, - "learning_rate": 3.040734366035571e-05, - "loss": 0.984, + "epoch": 0.030409088301107348, + "grad_norm": 0.291015625, + "learning_rate": 6.081468732071142e-05, + "loss": 0.9602, "step": 530 }, { - "epoch": 0.01534864372498673, - "grad_norm": 0.40234375, - "learning_rate": 3.0694205393000576e-05, - "loss": 0.9299, + "epoch": 0.03069596649262723, + "grad_norm": 0.263671875, + "learning_rate": 6.138841078600115e-05, + "loss": 0.9651, "step": 535 }, { - "epoch": 0.015492088993444552, - "grad_norm": 0.447265625, - "learning_rate": 3.098106712564544e-05, - "loss": 0.9537, + "epoch": 0.03098284468414711, + "grad_norm": 0.26171875, + "learning_rate": 6.196213425129088e-05, + "loss": 1.0011, "step": 540 }, { - "epoch": 0.01563553426190237, - "grad_norm": 0.42578125, - "learning_rate": 3.126792885829031e-05, - "loss": 0.9536, + "epoch": 0.03126972287566699, + "grad_norm": 0.26953125, + "learning_rate": 6.253585771658062e-05, + "loss": 0.9452, "step": 545 }, { - "epoch": 0.015778979530360192, - "grad_norm": 0.40234375, - "learning_rate": 3.155479059093517e-05, - "loss": 0.9603, + "epoch": 0.03155660106718687, + "grad_norm": 0.2890625, + "learning_rate": 6.310958118187034e-05, + "loss": 1.0467, "step": 550 }, { - "epoch": 0.01592242479881801, - "grad_norm": 0.39453125, - "learning_rate": 3.1841652323580036e-05, - "loss": 0.9382, + "epoch": 0.03184347925870675, + "grad_norm": 0.265625, + "learning_rate": 6.368330464716007e-05, + "loss": 0.9509, "step": 555 }, { - "epoch": 0.01606587006727583, - "grad_norm": 0.44140625, - "learning_rate": 3.21285140562249e-05, - "loss": 1.0106, + "epoch": 0.03213035745022663, + "grad_norm": 0.291015625, + "learning_rate": 6.42570281124498e-05, + "loss": 1.0041, "step": 560 }, { - "epoch": 0.01620931533573365, - "grad_norm": 0.443359375, - "learning_rate": 3.241537578886977e-05, - "loss": 1.0412, + "epoch": 0.03241723564174651, + "grad_norm": 0.283203125, + "learning_rate": 6.483075157773954e-05, + "loss": 0.9926, "step": 565 }, { - "epoch": 0.016352760604191472, - "grad_norm": 0.44140625, - "learning_rate": 3.2702237521514636e-05, - "loss": 0.9914, + "epoch": 0.03270411383326639, + "grad_norm": 0.25390625, + "learning_rate": 6.540447504302927e-05, + "loss": 1.03, "step": 570 }, { - "epoch": 0.01649620587264929, - "grad_norm": 0.45703125, - "learning_rate": 3.2989099254159496e-05, - "loss": 1.0359, + "epoch": 0.032990992024786274, + "grad_norm": 0.275390625, + "learning_rate": 6.597819850831899e-05, + "loss": 1.0156, "step": 575 }, { - "epoch": 0.01663965114110711, - "grad_norm": 0.427734375, - "learning_rate": 3.327596098680436e-05, - "loss": 1.0596, + "epoch": 0.033277870216306155, + "grad_norm": 0.30078125, + "learning_rate": 6.655192197360873e-05, + "loss": 0.9382, "step": 580 }, { - "epoch": 0.01678309640956493, - "grad_norm": 0.3984375, - "learning_rate": 3.356282271944923e-05, - "loss": 1.0889, + "epoch": 0.033564748407826035, + "grad_norm": 0.2578125, + "learning_rate": 6.712564543889846e-05, + "loss": 1.0226, "step": 585 }, { - "epoch": 0.01692654167802275, - "grad_norm": 0.44140625, - "learning_rate": 3.3849684452094096e-05, - "loss": 1.0328, + "epoch": 0.033851626599345916, + "grad_norm": 0.275390625, + "learning_rate": 6.769936890418819e-05, + "loss": 1.0166, "step": 590 }, { - "epoch": 0.01706998694648057, - "grad_norm": 0.45703125, - "learning_rate": 3.413654618473896e-05, - "loss": 0.9897, + "epoch": 0.0341385047908658, + "grad_norm": 0.271484375, + "learning_rate": 6.827309236947793e-05, + "loss": 1.0474, "step": 595 }, { - "epoch": 0.01721343221493839, - "grad_norm": 0.41796875, - "learning_rate": 3.442340791738382e-05, - "loss": 0.9295, + "epoch": 0.03442538298238568, + "grad_norm": 0.271484375, + "learning_rate": 6.884681583476765e-05, + "loss": 1.0629, "step": 600 }, { - "epoch": 0.01735687748339621, - "grad_norm": 0.419921875, - "learning_rate": 3.471026965002869e-05, - "loss": 0.9724, + "epoch": 0.03471226117390556, + "grad_norm": 0.26953125, + "learning_rate": 6.942053930005738e-05, + "loss": 0.9475, "step": 605 }, { - "epoch": 0.01750032275185403, - "grad_norm": 0.462890625, - "learning_rate": 3.499713138267356e-05, - "loss": 1.0176, + "epoch": 0.03499913936542544, + "grad_norm": 0.2734375, + "learning_rate": 6.999426276534711e-05, + "loss": 0.9764, "step": 610 }, { - "epoch": 0.01764376802031185, - "grad_norm": 0.4296875, - "learning_rate": 3.528399311531842e-05, - "loss": 0.9022, + "epoch": 0.03528601755694532, + "grad_norm": 0.25, + "learning_rate": 7.056798623063683e-05, + "loss": 0.9431, "step": 615 }, { - "epoch": 0.01778721328876967, - "grad_norm": 0.396484375, - "learning_rate": 3.5570854847963284e-05, - "loss": 0.9294, + "epoch": 0.0355728957484652, + "grad_norm": 0.287109375, + "learning_rate": 7.114170969592657e-05, + "loss": 1.0624, "step": 620 }, { - "epoch": 0.01793065855722749, - "grad_norm": 0.40234375, - "learning_rate": 3.585771658060815e-05, - "loss": 1.0535, + "epoch": 0.03585977393998508, + "grad_norm": 0.255859375, + "learning_rate": 7.17154331612163e-05, + "loss": 1.0391, "step": 625 }, { - "epoch": 0.01807410382568531, - "grad_norm": 0.40234375, - "learning_rate": 3.614457831325301e-05, - "loss": 1.1311, + "epoch": 0.03614665213150496, + "grad_norm": 0.27734375, + "learning_rate": 7.228915662650602e-05, + "loss": 0.9731, "step": 630 }, { - "epoch": 0.01821754909414313, - "grad_norm": 0.43359375, - "learning_rate": 3.643144004589788e-05, - "loss": 1.0288, + "epoch": 0.03643353032302484, + "grad_norm": 0.26171875, + "learning_rate": 7.286288009179575e-05, + "loss": 0.9593, "step": 635 }, { - "epoch": 0.01836099436260095, - "grad_norm": 0.408203125, - "learning_rate": 3.6718301778542744e-05, - "loss": 0.9483, + "epoch": 0.03672040851454472, + "grad_norm": 0.26953125, + "learning_rate": 7.343660355708549e-05, + "loss": 0.9843, "step": 640 }, { - "epoch": 0.01850443963105877, - "grad_norm": 0.419921875, - "learning_rate": 3.7005163511187604e-05, - "loss": 1.033, + "epoch": 0.0370072867060646, + "grad_norm": 0.26171875, + "learning_rate": 7.401032702237521e-05, + "loss": 0.977, "step": 645 }, { - "epoch": 0.01864788489951659, - "grad_norm": 0.3984375, - "learning_rate": 3.729202524383247e-05, - "loss": 0.9929, + "epoch": 0.037294164897584484, + "grad_norm": 0.26171875, + "learning_rate": 7.458405048766494e-05, + "loss": 0.9185, "step": 650 }, { - "epoch": 0.01879133016797441, - "grad_norm": 0.451171875, - "learning_rate": 3.757888697647734e-05, - "loss": 0.9887, + "epoch": 0.037581043089104364, + "grad_norm": 0.2578125, + "learning_rate": 7.515777395295467e-05, + "loss": 1.0353, "step": 655 }, { - "epoch": 0.018934775436432228, - "grad_norm": 0.4140625, - "learning_rate": 3.7865748709122204e-05, - "loss": 1.0975, + "epoch": 0.037867921280624245, + "grad_norm": 0.259765625, + "learning_rate": 7.573149741824441e-05, + "loss": 0.9667, "step": 660 }, { - "epoch": 0.01907822070489005, - "grad_norm": 0.416015625, - "learning_rate": 3.815261044176707e-05, - "loss": 1.0061, + "epoch": 0.038154799472144126, + "grad_norm": 0.267578125, + "learning_rate": 7.630522088353414e-05, + "loss": 0.9865, "step": 665 }, { - "epoch": 0.01922166597334787, - "grad_norm": 0.41015625, - "learning_rate": 3.843947217441193e-05, - "loss": 1.0583, + "epoch": 0.038441677663664006, + "grad_norm": 0.25, + "learning_rate": 7.687894434882386e-05, + "loss": 0.8748, "step": 670 }, { - "epoch": 0.019365111241805687, - "grad_norm": 0.396484375, - "learning_rate": 3.87263339070568e-05, - "loss": 1.0586, + "epoch": 0.03872855585518389, + "grad_norm": 0.26171875, + "learning_rate": 7.74526678141136e-05, + "loss": 1.0654, "step": 675 }, { - "epoch": 0.01950855651026351, - "grad_norm": 0.423828125, - "learning_rate": 3.9013195639701665e-05, - "loss": 0.9666, + "epoch": 0.03901543404670377, + "grad_norm": 0.255859375, + "learning_rate": 7.802639127940333e-05, + "loss": 1.0622, "step": 680 }, { - "epoch": 0.01965200177872133, - "grad_norm": 0.3984375, - "learning_rate": 3.930005737234653e-05, - "loss": 0.9563, + "epoch": 0.03930231223822365, + "grad_norm": 0.259765625, + "learning_rate": 7.860011474469306e-05, + "loss": 0.9676, "step": 685 }, { - "epoch": 0.01979544704717915, - "grad_norm": 0.373046875, - "learning_rate": 3.958691910499139e-05, - "loss": 1.0623, + "epoch": 0.03958919042974353, + "grad_norm": 0.275390625, + "learning_rate": 7.917383820998278e-05, + "loss": 0.9626, "step": 690 }, { - "epoch": 0.019938892315636968, - "grad_norm": 0.388671875, - "learning_rate": 3.987378083763626e-05, - "loss": 1.063, + "epoch": 0.03987606862126341, + "grad_norm": 0.259765625, + "learning_rate": 7.974756167527252e-05, + "loss": 0.9586, "step": 695 }, { - "epoch": 0.02008233758409479, - "grad_norm": 0.423828125, - "learning_rate": 4.0160642570281125e-05, - "loss": 0.9686, + "epoch": 0.04016294681278329, + "grad_norm": 0.267578125, + "learning_rate": 8.032128514056225e-05, + "loss": 1.0465, "step": 700 }, { - "epoch": 0.02022578285255261, - "grad_norm": 0.59375, - "learning_rate": 4.044750430292599e-05, - "loss": 1.0522, + "epoch": 0.04044982500430317, + "grad_norm": 0.2470703125, + "learning_rate": 8.089500860585198e-05, + "loss": 0.9695, "step": 705 }, { - "epoch": 0.020369228121010427, - "grad_norm": 0.412109375, - "learning_rate": 4.073436603557086e-05, - "loss": 1.0416, + "epoch": 0.04073670319582305, + "grad_norm": 0.2578125, + "learning_rate": 8.146873207114172e-05, + "loss": 0.9291, "step": 710 }, { - "epoch": 0.020512673389468248, - "grad_norm": 0.443359375, - "learning_rate": 4.102122776821572e-05, - "loss": 1.0676, + "epoch": 0.04102358138734293, + "grad_norm": 0.26171875, + "learning_rate": 8.204245553643144e-05, + "loss": 1.0255, "step": 715 }, { - "epoch": 0.02065611865792607, - "grad_norm": 0.4609375, - "learning_rate": 4.1308089500860585e-05, - "loss": 0.969, + "epoch": 0.04131045957886281, + "grad_norm": 0.25, + "learning_rate": 8.261617900172117e-05, + "loss": 0.9625, "step": 720 }, { - "epoch": 0.02079956392638389, - "grad_norm": 0.42578125, - "learning_rate": 4.159495123350545e-05, - "loss": 1.0625, + "epoch": 0.04159733777038269, + "grad_norm": 0.26171875, + "learning_rate": 8.31899024670109e-05, + "loss": 0.9827, "step": 725 }, { - "epoch": 0.020943009194841707, - "grad_norm": 0.388671875, - "learning_rate": 4.188181296615032e-05, - "loss": 0.9574, + "epoch": 0.041884215961902574, + "grad_norm": 0.236328125, + "learning_rate": 8.376362593230064e-05, + "loss": 1.0375, "step": 730 }, { - "epoch": 0.021086454463299528, - "grad_norm": 0.39453125, - "learning_rate": 4.2168674698795186e-05, - "loss": 0.9056, + "epoch": 0.042171094153422455, + "grad_norm": 0.279296875, + "learning_rate": 8.433734939759037e-05, + "loss": 1.0185, "step": 735 }, { - "epoch": 0.02122989973175735, - "grad_norm": 0.396484375, - "learning_rate": 4.2455536431440046e-05, - "loss": 0.9276, - "step": 740 + "epoch": 0.042457972344942335, + "grad_norm": 0.2470703125, + "learning_rate": 8.491107286288009e-05, + "loss": 0.9531, + "step": 740 }, { - "epoch": 0.021373345000215167, - "grad_norm": 0.392578125, - "learning_rate": 4.274239816408491e-05, - "loss": 1.0148, + "epoch": 0.042744850536462216, + "grad_norm": 0.25, + "learning_rate": 8.548479632816982e-05, + "loss": 1.0196, "step": 745 }, { - "epoch": 0.021516790268672988, - "grad_norm": 0.408203125, - "learning_rate": 4.302925989672978e-05, - "loss": 0.9209, + "epoch": 0.043031728727982096, + "grad_norm": 0.2421875, + "learning_rate": 8.605851979345956e-05, + "loss": 0.9729, "step": 750 }, { - "epoch": 0.02166023553713081, - "grad_norm": 0.435546875, - "learning_rate": 4.3316121629374646e-05, - "loss": 1.0365, + "epoch": 0.04331860691950198, + "grad_norm": 0.251953125, + "learning_rate": 8.663224325874929e-05, + "loss": 0.9538, "step": 755 }, { - "epoch": 0.021803680805588626, - "grad_norm": 0.408203125, - "learning_rate": 4.3602983362019506e-05, - "loss": 0.9428, + "epoch": 0.04360548511102186, + "grad_norm": 0.2490234375, + "learning_rate": 8.720596672403901e-05, + "loss": 0.9903, "step": 760 }, { - "epoch": 0.021947126074046447, - "grad_norm": 0.38671875, - "learning_rate": 4.388984509466437e-05, - "loss": 0.9883, + "epoch": 0.04389236330254174, + "grad_norm": 0.2490234375, + "learning_rate": 8.777969018932875e-05, + "loss": 0.9379, "step": 765 }, { - "epoch": 0.022090571342504268, - "grad_norm": 0.443359375, - "learning_rate": 4.417670682730924e-05, - "loss": 0.9812, + "epoch": 0.04417924149406162, + "grad_norm": 0.255859375, + "learning_rate": 8.835341365461848e-05, + "loss": 1.0388, "step": 770 }, { - "epoch": 0.02223401661096209, - "grad_norm": 0.365234375, - "learning_rate": 4.4463568559954106e-05, - "loss": 0.9192, + "epoch": 0.0444661196855815, + "grad_norm": 0.23828125, + "learning_rate": 8.892713711990821e-05, + "loss": 0.9352, "step": 775 }, { - "epoch": 0.022377461879419906, - "grad_norm": 0.416015625, - "learning_rate": 4.475043029259897e-05, - "loss": 1.032, + "epoch": 0.04475299787710138, + "grad_norm": 0.259765625, + "learning_rate": 8.950086058519795e-05, + "loss": 1.0, "step": 780 }, { - "epoch": 0.022520907147877727, - "grad_norm": 0.392578125, - "learning_rate": 4.503729202524383e-05, - "loss": 1.0461, + "epoch": 0.04503987606862126, + "grad_norm": 0.2470703125, + "learning_rate": 9.007458405048767e-05, + "loss": 0.9976, "step": 785 }, { - "epoch": 0.022664352416335548, - "grad_norm": 0.40625, - "learning_rate": 4.53241537578887e-05, - "loss": 0.9711, + "epoch": 0.04532675426014114, + "grad_norm": 0.2451171875, + "learning_rate": 9.06483075157774e-05, + "loss": 0.9689, "step": 790 }, { - "epoch": 0.022807797684793366, - "grad_norm": 0.384765625, - "learning_rate": 4.5611015490533566e-05, - "loss": 1.0291, + "epoch": 0.04561363245166102, + "grad_norm": 0.248046875, + "learning_rate": 9.122203098106713e-05, + "loss": 1.0199, "step": 795 }, { - "epoch": 0.022951242953251186, - "grad_norm": 0.404296875, - "learning_rate": 4.589787722317843e-05, - "loss": 0.8768, + "epoch": 0.0459005106431809, + "grad_norm": 0.24609375, + "learning_rate": 9.179575444635687e-05, + "loss": 0.9226, "step": 800 }, { - "epoch": 0.023094688221709007, - "grad_norm": 0.37109375, - "learning_rate": 4.61847389558233e-05, - "loss": 0.9355, + "epoch": 0.046187388834700784, + "grad_norm": 0.248046875, + "learning_rate": 9.23694779116466e-05, + "loss": 0.9483, "step": 805 }, { - "epoch": 0.02323813349016683, - "grad_norm": 0.353515625, - "learning_rate": 4.647160068846816e-05, - "loss": 1.0195, + "epoch": 0.046474267026220664, + "grad_norm": 0.2470703125, + "learning_rate": 9.294320137693632e-05, + "loss": 0.9711, "step": 810 }, { - "epoch": 0.023381578758624646, - "grad_norm": 0.349609375, - "learning_rate": 4.675846242111303e-05, - "loss": 0.9915, + "epoch": 0.046761145217740545, + "grad_norm": 0.248046875, + "learning_rate": 9.351692484222605e-05, + "loss": 0.9429, "step": 815 }, { - "epoch": 0.023525024027082467, - "grad_norm": 0.384765625, - "learning_rate": 4.7045324153757894e-05, - "loss": 0.9434, + "epoch": 0.047048023409260425, + "grad_norm": 0.240234375, + "learning_rate": 9.409064830751579e-05, + "loss": 0.9849, "step": 820 }, { - "epoch": 0.023668469295540288, - "grad_norm": 0.369140625, - "learning_rate": 4.733218588640276e-05, - "loss": 1.035, + "epoch": 0.047334901600780306, + "grad_norm": 0.234375, + "learning_rate": 9.466437177280552e-05, + "loss": 1.0336, "step": 825 }, { - "epoch": 0.023811914563998105, - "grad_norm": 0.38671875, - "learning_rate": 4.761904761904762e-05, - "loss": 1.0405, + "epoch": 0.04762177979230019, + "grad_norm": 0.24609375, + "learning_rate": 9.523809523809524e-05, + "loss": 1.0258, "step": 830 }, { - "epoch": 0.023955359832455926, - "grad_norm": 0.357421875, - "learning_rate": 4.790590935169249e-05, - "loss": 0.9479, + "epoch": 0.04790865798382007, + "grad_norm": 0.24609375, + "learning_rate": 9.581181870338497e-05, + "loss": 0.9852, "step": 835 }, { - "epoch": 0.024098805100913747, - "grad_norm": 0.373046875, - "learning_rate": 4.8192771084337354e-05, - "loss": 0.9736, + "epoch": 0.04819553617533995, + "grad_norm": 0.232421875, + "learning_rate": 9.638554216867471e-05, + "loss": 0.9533, "step": 840 }, { - "epoch": 0.024242250369371568, - "grad_norm": 0.3828125, - "learning_rate": 4.847963281698222e-05, - "loss": 0.9449, + "epoch": 0.04848241436685983, + "grad_norm": 0.2451171875, + "learning_rate": 9.695926563396444e-05, + "loss": 0.9448, "step": 845 }, { - "epoch": 0.024385695637829385, - "grad_norm": 0.421875, - "learning_rate": 4.876649454962709e-05, - "loss": 1.0506, + "epoch": 0.04876929255837971, + "grad_norm": 0.25390625, + "learning_rate": 9.753298909925417e-05, + "loss": 1.0124, "step": 850 }, { - "epoch": 0.024529140906287206, - "grad_norm": 0.36328125, - "learning_rate": 4.905335628227195e-05, - "loss": 0.9795, + "epoch": 0.04905617074989959, + "grad_norm": 0.25, + "learning_rate": 9.81067125645439e-05, + "loss": 0.9959, "step": 855 }, { - "epoch": 0.024672586174745027, - "grad_norm": 0.392578125, - "learning_rate": 4.9340218014916814e-05, - "loss": 1.0718, + "epoch": 0.04934304894141947, + "grad_norm": 0.25390625, + "learning_rate": 9.868043602983363e-05, + "loss": 1.0287, "step": 860 }, { - "epoch": 0.024816031443202845, - "grad_norm": 0.365234375, - "learning_rate": 4.962707974756168e-05, - "loss": 1.0577, + "epoch": 0.04962992713293935, + "grad_norm": 0.25390625, + "learning_rate": 9.925415949512336e-05, + "loss": 0.9831, "step": 865 }, { - "epoch": 0.024959476711660666, - "grad_norm": 0.3828125, - "learning_rate": 4.991394148020654e-05, - "loss": 1.0496, + "epoch": 0.04991680532445923, + "grad_norm": 0.255859375, + "learning_rate": 9.982788296041308e-05, + "loss": 0.9833, "step": 870 }, { - "epoch": 0.025102921980118487, - "grad_norm": 0.392578125, - "learning_rate": 5.020080321285141e-05, - "loss": 0.9635, + "epoch": 0.05020368351597911, + "grad_norm": 0.23828125, + "learning_rate": 0.00010040160642570282, + "loss": 0.9969, "step": 875 }, { - "epoch": 0.025246367248576304, - "grad_norm": 0.349609375, - "learning_rate": 5.0487664945496275e-05, - "loss": 0.9209, + "epoch": 0.05049056170749899, + "grad_norm": 0.251953125, + "learning_rate": 0.00010097532989099255, + "loss": 1.0437, "step": 880 }, { - "epoch": 0.025389812517034125, - "grad_norm": 0.40234375, - "learning_rate": 5.077452667814114e-05, - "loss": 1.0667, + "epoch": 0.050777439899018874, + "grad_norm": 0.234375, + "learning_rate": 0.00010154905335628228, + "loss": 1.031, "step": 885 }, { - "epoch": 0.025533257785491946, - "grad_norm": 0.333984375, - "learning_rate": 5.1061388410786e-05, - "loss": 0.9609, + "epoch": 0.051064318090538754, + "grad_norm": 0.2412109375, + "learning_rate": 0.000102122776821572, + "loss": 1.0094, "step": 890 }, { - "epoch": 0.025676703053949767, - "grad_norm": 0.37890625, - "learning_rate": 5.134825014343087e-05, - "loss": 0.9506, + "epoch": 0.051351196282058635, + "grad_norm": 0.2314453125, + "learning_rate": 0.00010269650028686174, + "loss": 0.9602, "step": 895 }, { - "epoch": 0.025820148322407584, - "grad_norm": 0.361328125, - "learning_rate": 5.1635111876075735e-05, - "loss": 0.9871, + "epoch": 0.051638074473578516, + "grad_norm": 0.2451171875, + "learning_rate": 0.00010327022375215147, + "loss": 1.0467, "step": 900 }, { - "epoch": 0.025963593590865405, - "grad_norm": 0.392578125, - "learning_rate": 5.1921973608720595e-05, - "loss": 0.9825, + "epoch": 0.051924952665098396, + "grad_norm": 0.248046875, + "learning_rate": 0.00010384394721744119, + "loss": 0.9441, "step": 905 }, { - "epoch": 0.026107038859323226, - "grad_norm": 0.38671875, - "learning_rate": 5.220883534136547e-05, - "loss": 0.9855, + "epoch": 0.05221183085661828, + "grad_norm": 0.234375, + "learning_rate": 0.00010441767068273094, + "loss": 1.0901, "step": 910 }, { - "epoch": 0.026250484127781044, - "grad_norm": 0.359375, - "learning_rate": 5.249569707401033e-05, - "loss": 1.053, + "epoch": 0.05249870904813816, + "grad_norm": 0.232421875, + "learning_rate": 0.00010499139414802066, + "loss": 0.9532, "step": 915 }, { - "epoch": 0.026393929396238865, - "grad_norm": 0.40625, - "learning_rate": 5.278255880665519e-05, - "loss": 1.0475, + "epoch": 0.05278558723965804, + "grad_norm": 0.2451171875, + "learning_rate": 0.00010556511761331038, + "loss": 1.0329, "step": 920 }, { - "epoch": 0.026537374664696686, - "grad_norm": 0.34765625, - "learning_rate": 5.306942053930006e-05, - "loss": 0.9429, + "epoch": 0.05307246543117792, + "grad_norm": 0.2353515625, + "learning_rate": 0.00010613884107860012, + "loss": 0.9893, "step": 925 }, { - "epoch": 0.026680819933154507, - "grad_norm": 0.38671875, - "learning_rate": 5.335628227194492e-05, - "loss": 0.9329, + "epoch": 0.0533593436226978, + "grad_norm": 0.25, + "learning_rate": 0.00010671256454388984, + "loss": 0.9567, "step": 930 }, { - "epoch": 0.026824265201612324, - "grad_norm": 0.482421875, - "learning_rate": 5.364314400458978e-05, - "loss": 1.1025, + "epoch": 0.05364622181421768, + "grad_norm": 0.26171875, + "learning_rate": 0.00010728628800917956, + "loss": 1.0186, "step": 935 }, { - "epoch": 0.026967710470070145, - "grad_norm": 0.375, - "learning_rate": 5.3930005737234656e-05, - "loss": 1.0252, + "epoch": 0.05393310000573756, + "grad_norm": 0.2353515625, + "learning_rate": 0.00010786001147446931, + "loss": 0.9697, "step": 940 }, { - "epoch": 0.027111155738527966, - "grad_norm": 0.365234375, - "learning_rate": 5.4216867469879516e-05, - "loss": 1.0491, + "epoch": 0.05421997819725744, + "grad_norm": 0.2353515625, + "learning_rate": 0.00010843373493975903, + "loss": 0.967, "step": 945 }, { - "epoch": 0.027254601006985783, - "grad_norm": 0.35546875, - "learning_rate": 5.450372920252439e-05, - "loss": 0.9434, + "epoch": 0.05450685638877732, + "grad_norm": 0.220703125, + "learning_rate": 0.00010900745840504878, + "loss": 0.9157, "step": 950 }, { - "epoch": 0.027398046275443604, - "grad_norm": 0.36328125, - "learning_rate": 5.479059093516925e-05, - "loss": 0.9915, + "epoch": 0.0547937345802972, + "grad_norm": 0.2373046875, + "learning_rate": 0.0001095811818703385, + "loss": 0.9864, "step": 955 }, { - "epoch": 0.027541491543901425, - "grad_norm": 0.408203125, - "learning_rate": 5.507745266781411e-05, - "loss": 0.9351, + "epoch": 0.055080612771817083, + "grad_norm": 0.25, + "learning_rate": 0.00011015490533562822, + "loss": 0.9616, "step": 960 }, { - "epoch": 0.027684936812359243, - "grad_norm": 0.384765625, - "learning_rate": 5.536431440045898e-05, - "loss": 1.0392, + "epoch": 0.055367490963336964, + "grad_norm": 0.232421875, + "learning_rate": 0.00011072862880091797, + "loss": 0.9466, "step": 965 }, { - "epoch": 0.027828382080817064, - "grad_norm": 0.4140625, - "learning_rate": 5.565117613310384e-05, - "loss": 1.0126, + "epoch": 0.055654369154856845, + "grad_norm": 0.2421875, + "learning_rate": 0.00011130235226620769, + "loss": 0.966, "step": 970 }, { - "epoch": 0.027971827349274885, - "grad_norm": 0.359375, - "learning_rate": 5.5938037865748716e-05, - "loss": 0.9299, + "epoch": 0.055941247346376725, + "grad_norm": 0.255859375, + "learning_rate": 0.00011187607573149743, + "loss": 0.9292, "step": 975 }, { - "epoch": 0.028115272617732705, - "grad_norm": 0.41015625, - "learning_rate": 5.6224899598393576e-05, - "loss": 0.926, + "epoch": 0.056228125537896606, + "grad_norm": 0.236328125, + "learning_rate": 0.00011244979919678715, + "loss": 0.9724, "step": 980 }, { - "epoch": 0.028258717886190523, - "grad_norm": 0.376953125, - "learning_rate": 5.6511761331038436e-05, - "loss": 0.9563, + "epoch": 0.05651500372941649, + "grad_norm": 0.2353515625, + "learning_rate": 0.00011302352266207687, + "loss": 0.9683, "step": 985 }, { - "epoch": 0.028402163154648344, - "grad_norm": 0.38671875, - "learning_rate": 5.679862306368331e-05, - "loss": 0.9493, + "epoch": 0.05680188192093637, + "grad_norm": 0.2373046875, + "learning_rate": 0.00011359724612736662, + "loss": 0.9164, "step": 990 }, { - "epoch": 0.028545608423106165, - "grad_norm": 0.388671875, - "learning_rate": 5.708548479632817e-05, - "loss": 1.048, + "epoch": 0.05708876011245625, + "grad_norm": 0.23828125, + "learning_rate": 0.00011417096959265634, + "loss": 0.9309, "step": 995 }, { - "epoch": 0.028689053691563982, - "grad_norm": 0.373046875, - "learning_rate": 5.737234652897304e-05, - "loss": 0.9495, + "epoch": 0.05737563830397613, + "grad_norm": 0.24609375, + "learning_rate": 0.00011474469305794609, + "loss": 1.0243, "step": 1000 }, { - "epoch": 0.028832498960021803, - "grad_norm": 0.37109375, - "learning_rate": 5.76592082616179e-05, - "loss": 0.9564, + "epoch": 0.05766251649549601, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001153184165232358, + "loss": 0.9735, "step": 1005 }, { - "epoch": 0.028975944228479624, - "grad_norm": 0.39453125, - "learning_rate": 5.794606999426276e-05, - "loss": 0.9688, + "epoch": 0.05794939468701589, + "grad_norm": 0.2412109375, + "learning_rate": 0.00011589213998852553, + "loss": 1.0049, "step": 1010 }, { - "epoch": 0.029119389496937445, - "grad_norm": 0.3671875, - "learning_rate": 5.823293172690764e-05, - "loss": 0.985, + "epoch": 0.05823627287853577, + "grad_norm": 0.25390625, + "learning_rate": 0.00011646586345381527, + "loss": 1.0304, "step": 1015 }, { - "epoch": 0.029262834765395263, - "grad_norm": 0.337890625, - "learning_rate": 5.85197934595525e-05, - "loss": 0.9921, + "epoch": 0.05852315107005565, + "grad_norm": 0.25390625, + "learning_rate": 0.000117039586919105, + "loss": 0.9818, "step": 1020 }, { - "epoch": 0.029406280033853083, - "grad_norm": 0.36328125, - "learning_rate": 5.880665519219737e-05, - "loss": 1.0165, + "epoch": 0.05881002926157553, + "grad_norm": 0.2353515625, + "learning_rate": 0.00011761331038439474, + "loss": 0.9646, "step": 1025 }, { - "epoch": 0.029549725302310904, - "grad_norm": 0.37109375, - "learning_rate": 5.909351692484223e-05, - "loss": 1.176, + "epoch": 0.05909690745309541, + "grad_norm": 0.2255859375, + "learning_rate": 0.00011818703384968446, + "loss": 0.9753, "step": 1030 }, { - "epoch": 0.029693170570768722, - "grad_norm": 0.357421875, - "learning_rate": 5.938037865748709e-05, - "loss": 1.0102, + "epoch": 0.05938378564461529, + "grad_norm": 0.2353515625, + "learning_rate": 0.00011876075731497418, + "loss": 0.9675, "step": 1035 }, { - "epoch": 0.029836615839226543, - "grad_norm": 0.37109375, - "learning_rate": 5.9667240390131964e-05, - "loss": 1.1129, + "epoch": 0.059670663836135174, + "grad_norm": 0.2109375, + "learning_rate": 0.00011933448078026393, + "loss": 0.9606, "step": 1040 }, { - "epoch": 0.029980061107684364, - "grad_norm": 0.37109375, - "learning_rate": 5.9954102122776824e-05, - "loss": 0.9815, + "epoch": 0.059957542027655054, + "grad_norm": 0.2265625, + "learning_rate": 0.00011990820424555365, + "loss": 1.0847, "step": 1045 }, { - "epoch": 0.030123506376142185, - "grad_norm": 0.34375, - "learning_rate": 6.02409638554217e-05, - "loss": 1.0334, + "epoch": 0.060244420219174935, + "grad_norm": 0.2265625, + "learning_rate": 0.0001204819277108434, + "loss": 1.0064, "step": 1050 }, { - "epoch": 0.030266951644600002, - "grad_norm": 0.33984375, - "learning_rate": 6.052782558806656e-05, - "loss": 0.9865, + "epoch": 0.060531298410694816, + "grad_norm": 0.2578125, + "learning_rate": 0.00012105565117613311, + "loss": 0.967, "step": 1055 }, { - "epoch": 0.030410396913057823, - "grad_norm": 0.349609375, - "learning_rate": 6.081468732071142e-05, - "loss": 0.9296, + "epoch": 0.060818176602214696, + "grad_norm": 0.2333984375, + "learning_rate": 0.00012162937464142283, + "loss": 0.9456, "step": 1060 }, { - "epoch": 0.030553842181515644, - "grad_norm": 0.353515625, - "learning_rate": 6.110154905335628e-05, - "loss": 1.0374, + "epoch": 0.06110505479373458, + "grad_norm": 0.2392578125, + "learning_rate": 0.00012220309810671257, + "loss": 1.0216, "step": 1065 }, { - "epoch": 0.03069728744997346, - "grad_norm": 0.33984375, - "learning_rate": 6.138841078600115e-05, - "loss": 0.8896, + "epoch": 0.06139193298525446, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001227768215720023, + "loss": 0.9022, "step": 1070 }, { - "epoch": 0.030840732718431282, - "grad_norm": 0.376953125, - "learning_rate": 6.1675272518646e-05, - "loss": 1.0535, + "epoch": 0.06167881117677434, + "grad_norm": 0.23828125, + "learning_rate": 0.000123350545037292, + "loss": 0.9295, "step": 1075 }, { - "epoch": 0.030984177986889103, - "grad_norm": 0.35546875, - "learning_rate": 6.196213425129088e-05, - "loss": 0.9443, + "epoch": 0.06196568936829422, + "grad_norm": 0.236328125, + "learning_rate": 0.00012392426850258177, + "loss": 0.983, "step": 1080 }, { - "epoch": 0.03112762325534692, - "grad_norm": 0.35546875, - "learning_rate": 6.224899598393574e-05, - "loss": 0.934, + "epoch": 0.0622525675598141, + "grad_norm": 0.2333984375, + "learning_rate": 0.00012449799196787148, + "loss": 0.9705, "step": 1085 }, { - "epoch": 0.03127106852380474, - "grad_norm": 0.37890625, - "learning_rate": 6.253585771658062e-05, - "loss": 0.9519, + "epoch": 0.06253944575133398, + "grad_norm": 0.2236328125, + "learning_rate": 0.00012507171543316124, + "loss": 0.9243, "step": 1090 }, { - "epoch": 0.03141451379226256, - "grad_norm": 0.3515625, - "learning_rate": 6.282271944922547e-05, - "loss": 1.031, + "epoch": 0.06282632394285387, + "grad_norm": 0.31640625, + "learning_rate": 0.00012564543889845094, + "loss": 0.9758, "step": 1095 }, { - "epoch": 0.031557959060720384, - "grad_norm": 0.40625, - "learning_rate": 6.310958118187034e-05, - "loss": 1.0593, + "epoch": 0.06311320213437374, + "grad_norm": 0.2392578125, + "learning_rate": 0.00012621916236374068, + "loss": 0.9814, "step": 1100 }, { - "epoch": 0.0317014043291782, - "grad_norm": 0.3359375, - "learning_rate": 6.33964429145152e-05, - "loss": 0.9239, + "epoch": 0.06340008032589363, + "grad_norm": 0.2470703125, + "learning_rate": 0.0001267928858290304, + "loss": 0.942, "step": 1105 }, { - "epoch": 0.03184484959763602, - "grad_norm": 0.353515625, - "learning_rate": 6.368330464716007e-05, - "loss": 0.9742, + "epoch": 0.0636869585174135, + "grad_norm": 0.23046875, + "learning_rate": 0.00012736660929432014, + "loss": 0.9782, "step": 1110 }, { - "epoch": 0.03198829486609384, - "grad_norm": 0.4375, - "learning_rate": 6.397016637980494e-05, - "loss": 1.0651, + "epoch": 0.06397383670893339, + "grad_norm": 0.2275390625, + "learning_rate": 0.00012794033275960988, + "loss": 0.9643, "step": 1115 }, { - "epoch": 0.03213174013455166, - "grad_norm": 0.365234375, - "learning_rate": 6.42570281124498e-05, - "loss": 0.9396, + "epoch": 0.06426071490045326, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001285140562248996, + "loss": 0.9427, "step": 1120 }, { - "epoch": 0.032275185403009485, - "grad_norm": 0.361328125, - "learning_rate": 6.454388984509466e-05, - "loss": 0.9862, + "epoch": 0.06454759309197315, + "grad_norm": 0.220703125, + "learning_rate": 0.00012908777969018932, + "loss": 0.987, "step": 1125 }, { - "epoch": 0.0324186306714673, - "grad_norm": 0.41015625, - "learning_rate": 6.483075157773954e-05, - "loss": 0.9968, + "epoch": 0.06483447128349303, + "grad_norm": 0.236328125, + "learning_rate": 0.00012966150315547908, + "loss": 0.9821, "step": 1130 }, { - "epoch": 0.03256207593992512, - "grad_norm": 0.326171875, - "learning_rate": 6.511761331038439e-05, - "loss": 1.0133, + "epoch": 0.06512134947501291, + "grad_norm": 0.234375, + "learning_rate": 0.00013023522662076878, + "loss": 1.052, "step": 1135 }, { - "epoch": 0.032705521208382944, - "grad_norm": 0.326171875, - "learning_rate": 6.540447504302927e-05, - "loss": 1.0448, + "epoch": 0.06540822766653279, + "grad_norm": 0.220703125, + "learning_rate": 0.00013080895008605854, + "loss": 0.9462, "step": 1140 }, { - "epoch": 0.03284896647684076, - "grad_norm": 0.423828125, - "learning_rate": 6.569133677567413e-05, - "loss": 1.044, + "epoch": 0.06569510585805267, + "grad_norm": 0.2294921875, + "learning_rate": 0.00013138267355134825, + "loss": 1.0105, "step": 1145 }, { - "epoch": 0.03299241174529858, - "grad_norm": 0.330078125, - "learning_rate": 6.597819850831899e-05, - "loss": 0.9838, + "epoch": 0.06598198404957255, + "grad_norm": 0.23828125, + "learning_rate": 0.00013195639701663798, + "loss": 1.014, "step": 1150 }, { - "epoch": 0.0331358570137564, - "grad_norm": 0.375, - "learning_rate": 6.626506024096386e-05, - "loss": 1.022, + "epoch": 0.06626886224109244, + "grad_norm": 0.2265625, + "learning_rate": 0.00013253012048192772, + "loss": 0.991, "step": 1155 }, { - "epoch": 0.03327930228221422, - "grad_norm": 0.373046875, - "learning_rate": 6.655192197360873e-05, - "loss": 0.8519, + "epoch": 0.06655574043261231, + "grad_norm": 0.248046875, + "learning_rate": 0.00013310384394721745, + "loss": 1.0098, "step": 1160 }, { - "epoch": 0.03342274755067204, - "grad_norm": 0.33984375, - "learning_rate": 6.683878370625359e-05, - "loss": 0.9865, + "epoch": 0.0668426186241322, + "grad_norm": 0.2314453125, + "learning_rate": 0.00013367756741250719, + "loss": 0.9866, "step": 1165 }, { - "epoch": 0.03356619281912986, - "grad_norm": 0.333984375, - "learning_rate": 6.712564543889846e-05, - "loss": 1.0548, + "epoch": 0.06712949681565207, + "grad_norm": 0.2294921875, + "learning_rate": 0.00013425129087779692, + "loss": 0.9608, "step": 1170 }, { - "epoch": 0.03370963808758768, - "grad_norm": 0.37890625, - "learning_rate": 6.741250717154331e-05, - "loss": 0.9675, + "epoch": 0.06741637500717196, + "grad_norm": 0.234375, + "learning_rate": 0.00013482501434308663, + "loss": 0.9711, "step": 1175 }, { - "epoch": 0.0338530833560455, - "grad_norm": 0.36328125, - "learning_rate": 6.769936890418819e-05, - "loss": 1.062, + "epoch": 0.06770325319869183, + "grad_norm": 0.228515625, + "learning_rate": 0.00013539873780837639, + "loss": 0.9319, "step": 1180 }, { - "epoch": 0.03399652862450332, - "grad_norm": 0.3515625, - "learning_rate": 6.798623063683305e-05, - "loss": 1.0348, + "epoch": 0.06799013139021172, + "grad_norm": 0.2314453125, + "learning_rate": 0.0001359724612736661, + "loss": 0.9614, "step": 1185 }, { - "epoch": 0.03413997389296114, - "grad_norm": 0.33984375, - "learning_rate": 6.827309236947793e-05, - "loss": 1.0568, + "epoch": 0.0682770095817316, + "grad_norm": 0.22265625, + "learning_rate": 0.00013654618473895585, + "loss": 0.9333, "step": 1190 }, { - "epoch": 0.034283419161418964, - "grad_norm": 0.36328125, - "learning_rate": 6.855995410212278e-05, - "loss": 1.0876, + "epoch": 0.06856388777325148, + "grad_norm": 0.236328125, + "learning_rate": 0.00013711990820424556, + "loss": 1.0135, "step": 1195 }, { - "epoch": 0.03442686442987678, - "grad_norm": 0.345703125, - "learning_rate": 6.884681583476765e-05, - "loss": 1.0344, + "epoch": 0.06885076596477135, + "grad_norm": 0.2333984375, + "learning_rate": 0.0001376936316695353, + "loss": 0.9592, "step": 1200 }, { - "epoch": 0.0345703096983346, - "grad_norm": 0.333984375, - "learning_rate": 6.913367756741251e-05, - "loss": 1.0287, + "epoch": 0.06913764415629124, + "grad_norm": 0.2265625, + "learning_rate": 0.00013826735513482503, + "loss": 0.9512, "step": 1205 }, { - "epoch": 0.03471375496679242, - "grad_norm": 0.3515625, - "learning_rate": 6.942053930005738e-05, - "loss": 0.8621, + "epoch": 0.06942452234781112, + "grad_norm": 0.2421875, + "learning_rate": 0.00013884107860011476, + "loss": 0.9239, "step": 1210 }, { - "epoch": 0.03485720023525024, - "grad_norm": 0.375, - "learning_rate": 6.970740103270223e-05, - "loss": 1.0204, + "epoch": 0.069711400539331, + "grad_norm": 0.234375, + "learning_rate": 0.00013941480206540447, + "loss": 0.9585, "step": 1215 }, { - "epoch": 0.03500064550370806, - "grad_norm": 0.3359375, - "learning_rate": 6.999426276534711e-05, - "loss": 0.9297, + "epoch": 0.06999827873085088, + "grad_norm": 0.2177734375, + "learning_rate": 0.00013998852553069423, + "loss": 0.9734, "step": 1220 }, { - "epoch": 0.03514409077216588, - "grad_norm": 0.330078125, - "learning_rate": 7.028112449799197e-05, - "loss": 0.9371, + "epoch": 0.07028515692237076, + "grad_norm": 0.224609375, + "learning_rate": 0.00014056224899598393, + "loss": 0.9214, "step": 1225 }, { - "epoch": 0.0352875360406237, - "grad_norm": 0.34375, - "learning_rate": 7.056798623063683e-05, - "loss": 0.9454, + "epoch": 0.07057203511389064, + "grad_norm": 0.2421875, + "learning_rate": 0.00014113597246127367, + "loss": 0.9652, "step": 1230 }, { - "epoch": 0.03543098130908152, - "grad_norm": 0.34375, - "learning_rate": 7.08548479632817e-05, - "loss": 1.0111, + "epoch": 0.07085891330541053, + "grad_norm": 0.2421875, + "learning_rate": 0.0001417096959265634, + "loss": 0.9355, "step": 1235 }, { - "epoch": 0.03557442657753934, - "grad_norm": 0.390625, - "learning_rate": 7.114170969592657e-05, - "loss": 1.1106, + "epoch": 0.0711457914969304, + "grad_norm": 0.2294921875, + "learning_rate": 0.00014228341939185313, + "loss": 0.9577, "step": 1240 }, { - "epoch": 0.03571787184599716, - "grad_norm": 0.40234375, - "learning_rate": 7.142857142857143e-05, - "loss": 1.1232, + "epoch": 0.07143266968845029, + "grad_norm": 0.234375, + "learning_rate": 0.00014285714285714287, + "loss": 0.9403, "step": 1245 }, { - "epoch": 0.03586131711445498, - "grad_norm": 0.337890625, - "learning_rate": 7.17154331612163e-05, - "loss": 0.9529, + "epoch": 0.07171954787997016, + "grad_norm": 0.23046875, + "learning_rate": 0.0001434308663224326, + "loss": 1.0237, "step": 1250 }, { - "epoch": 0.0360047623829128, - "grad_norm": 0.341796875, - "learning_rate": 7.200229489386117e-05, - "loss": 0.9466, + "epoch": 0.07200642607149005, + "grad_norm": 0.2431640625, + "learning_rate": 0.00014400458978772233, + "loss": 0.994, "step": 1255 }, { - "epoch": 0.03614820765137062, - "grad_norm": 0.365234375, - "learning_rate": 7.228915662650602e-05, - "loss": 0.9964, + "epoch": 0.07229330426300992, + "grad_norm": 0.2333984375, + "learning_rate": 0.00014457831325301204, + "loss": 0.9788, "step": 1260 }, { - "epoch": 0.036291652919828436, - "grad_norm": 0.390625, - "learning_rate": 7.257601835915089e-05, - "loss": 0.9143, + "epoch": 0.07258018245452981, + "grad_norm": 0.2294921875, + "learning_rate": 0.00014515203671830177, + "loss": 1.028, "step": 1265 }, { - "epoch": 0.03643509818828626, - "grad_norm": 0.400390625, - "learning_rate": 7.286288009179575e-05, - "loss": 1.0026, + "epoch": 0.07286706064604968, + "grad_norm": 0.228515625, + "learning_rate": 0.0001457257601835915, + "loss": 0.9475, "step": 1270 }, { - "epoch": 0.03657854345674408, - "grad_norm": 0.341796875, - "learning_rate": 7.314974182444062e-05, - "loss": 0.9495, + "epoch": 0.07315393883756957, + "grad_norm": 0.2333984375, + "learning_rate": 0.00014629948364888124, + "loss": 0.9661, "step": 1275 }, { - "epoch": 0.0367219887252019, - "grad_norm": 0.361328125, - "learning_rate": 7.343660355708549e-05, - "loss": 1.0155, + "epoch": 0.07344081702908944, + "grad_norm": 0.23046875, + "learning_rate": 0.00014687320711417098, + "loss": 1.0316, "step": 1280 }, { - "epoch": 0.03686543399365972, - "grad_norm": 0.32421875, - "learning_rate": 7.372346528973035e-05, - "loss": 0.9394, + "epoch": 0.07372769522060933, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001474469305794607, + "loss": 1.0089, "step": 1285 }, { - "epoch": 0.03700887926211754, - "grad_norm": 0.3359375, - "learning_rate": 7.401032702237521e-05, - "loss": 1.0107, + "epoch": 0.0740145734121292, + "grad_norm": 0.236328125, + "learning_rate": 0.00014802065404475042, + "loss": 0.9406, "step": 1290 }, { - "epoch": 0.03715232453057536, - "grad_norm": 0.361328125, - "learning_rate": 7.429718875502009e-05, - "loss": 0.9539, + "epoch": 0.0743014516036491, + "grad_norm": 0.251953125, + "learning_rate": 0.00014859437751004018, + "loss": 1.0317, "step": 1295 }, { - "epoch": 0.03729576979903318, - "grad_norm": 0.3359375, - "learning_rate": 7.458405048766494e-05, - "loss": 0.88, + "epoch": 0.07458832979516897, + "grad_norm": 0.2392578125, + "learning_rate": 0.00014916810097532988, + "loss": 0.953, "step": 1300 }, { - "epoch": 0.037439215067491, - "grad_norm": 0.41015625, - "learning_rate": 7.487091222030982e-05, - "loss": 1.0456, + "epoch": 0.07487520798668885, + "grad_norm": 0.263671875, + "learning_rate": 0.00014974182444061964, + "loss": 0.9658, "step": 1305 }, { - "epoch": 0.03758266033594882, - "grad_norm": 0.341796875, - "learning_rate": 7.515777395295467e-05, - "loss": 1.0223, + "epoch": 0.07516208617820873, + "grad_norm": 0.2333984375, + "learning_rate": 0.00015031554790590935, + "loss": 0.927, "step": 1310 }, { - "epoch": 0.03772610560440664, - "grad_norm": 0.373046875, - "learning_rate": 7.544463568559954e-05, - "loss": 0.9479, + "epoch": 0.07544896436972862, + "grad_norm": 0.2216796875, + "learning_rate": 0.00015088927137119908, + "loss": 0.9357, "step": 1315 }, { - "epoch": 0.037869550872864456, - "grad_norm": 0.330078125, - "learning_rate": 7.573149741824441e-05, - "loss": 0.9815, + "epoch": 0.07573584256124849, + "grad_norm": 0.2392578125, + "learning_rate": 0.00015146299483648882, + "loss": 0.9423, "step": 1320 }, { - "epoch": 0.03801299614132228, - "grad_norm": 0.314453125, - "learning_rate": 7.601835915088928e-05, - "loss": 0.9556, + "epoch": 0.07602272075276838, + "grad_norm": 0.259765625, + "learning_rate": 0.00015203671830177855, + "loss": 0.9861, "step": 1325 }, { - "epoch": 0.0381564414097801, - "grad_norm": 0.353515625, - "learning_rate": 7.630522088353414e-05, - "loss": 1.0146, + "epoch": 0.07630959894428825, + "grad_norm": 0.244140625, + "learning_rate": 0.00015261044176706828, + "loss": 0.9871, "step": 1330 }, { - "epoch": 0.038299886678237915, - "grad_norm": 0.33984375, - "learning_rate": 7.659208261617901e-05, - "loss": 0.9497, + "epoch": 0.07659647713580814, + "grad_norm": 0.24609375, + "learning_rate": 0.00015318416523235802, + "loss": 0.9405, "step": 1335 }, { - "epoch": 0.03844333194669574, - "grad_norm": 0.337890625, - "learning_rate": 7.687894434882386e-05, - "loss": 0.797, + "epoch": 0.07688335532732801, + "grad_norm": 0.2294921875, + "learning_rate": 0.00015375788869764772, + "loss": 0.9789, "step": 1340 }, { - "epoch": 0.03858677721515356, - "grad_norm": 0.390625, - "learning_rate": 7.716580608146874e-05, - "loss": 1.157, + "epoch": 0.0771702335188479, + "grad_norm": 0.25390625, + "learning_rate": 0.00015433161216293748, + "loss": 1.0267, "step": 1345 }, { - "epoch": 0.038730222483611375, - "grad_norm": 0.345703125, - "learning_rate": 7.74526678141136e-05, - "loss": 0.9708, + "epoch": 0.07745711171036777, + "grad_norm": 0.2138671875, + "learning_rate": 0.0001549053356282272, + "loss": 0.9546, "step": 1350 }, { - "epoch": 0.0388736677520692, - "grad_norm": 0.3203125, - "learning_rate": 7.773952954675848e-05, - "loss": 1.0961, + "epoch": 0.07774398990188766, + "grad_norm": 0.2353515625, + "learning_rate": 0.00015547905909351695, + "loss": 0.9847, "step": 1355 }, { - "epoch": 0.03901711302052702, - "grad_norm": 0.333984375, - "learning_rate": 7.802639127940333e-05, - "loss": 1.0263, + "epoch": 0.07803086809340753, + "grad_norm": 0.2353515625, + "learning_rate": 0.00015605278255880666, + "loss": 0.9393, "step": 1360 }, { - "epoch": 0.03916055828898484, - "grad_norm": 0.33984375, - "learning_rate": 7.83132530120482e-05, - "loss": 0.894, + "epoch": 0.07831774628492742, + "grad_norm": 0.2294921875, + "learning_rate": 0.0001566265060240964, + "loss": 1.0087, "step": 1365 }, { - "epoch": 0.03930400355744266, - "grad_norm": 0.36328125, - "learning_rate": 7.860011474469306e-05, - "loss": 1.0383, + "epoch": 0.0786046244764473, + "grad_norm": 0.2255859375, + "learning_rate": 0.00015720022948938613, + "loss": 0.9367, "step": 1370 }, { - "epoch": 0.039447448825900476, - "grad_norm": 0.310546875, - "learning_rate": 7.888697647733793e-05, - "loss": 0.9609, + "epoch": 0.07889150266796718, + "grad_norm": 0.2138671875, + "learning_rate": 0.00015777395295467586, + "loss": 0.9651, "step": 1375 }, { - "epoch": 0.0395908940943583, - "grad_norm": 0.353515625, - "learning_rate": 7.917383820998278e-05, - "loss": 0.9616, + "epoch": 0.07917838085948706, + "grad_norm": 0.271484375, + "learning_rate": 0.00015834767641996557, + "loss": 1.0075, "step": 1380 }, { - "epoch": 0.03973433936281612, - "grad_norm": 0.341796875, - "learning_rate": 7.946069994262766e-05, - "loss": 0.9545, + "epoch": 0.07946525905100695, + "grad_norm": 0.2490234375, + "learning_rate": 0.00015892139988525533, + "loss": 0.95, "step": 1385 }, { - "epoch": 0.039877784631273935, - "grad_norm": 0.341796875, - "learning_rate": 7.974756167527252e-05, - "loss": 0.9593, + "epoch": 0.07975213724252682, + "grad_norm": 0.2421875, + "learning_rate": 0.00015949512335054503, + "loss": 0.9539, "step": 1390 }, { - "epoch": 0.04002122989973176, - "grad_norm": 0.33984375, - "learning_rate": 8.00344234079174e-05, - "loss": 1.0591, + "epoch": 0.0800390154340467, + "grad_norm": 0.2216796875, + "learning_rate": 0.0001600688468158348, + "loss": 0.9659, "step": 1395 }, { - "epoch": 0.04016467516818958, - "grad_norm": 0.349609375, - "learning_rate": 8.032128514056225e-05, - "loss": 1.0307, + "epoch": 0.08032589362556658, + "grad_norm": 0.23046875, + "learning_rate": 0.0001606425702811245, + "loss": 0.9324, "step": 1400 }, { - "epoch": 0.040308120436647395, - "grad_norm": 0.33203125, - "learning_rate": 8.060814687320712e-05, - "loss": 1.0063, + "epoch": 0.08061277181708647, + "grad_norm": 0.23828125, + "learning_rate": 0.00016121629374641423, + "loss": 1.0698, "step": 1405 }, { - "epoch": 0.04045156570510522, - "grad_norm": 0.3359375, - "learning_rate": 8.089500860585198e-05, - "loss": 0.9293, + "epoch": 0.08089965000860634, + "grad_norm": 0.2412109375, + "learning_rate": 0.00016179001721170397, + "loss": 1.0436, "step": 1410 }, { - "epoch": 0.04059501097356304, - "grad_norm": 0.3359375, - "learning_rate": 8.118187033849685e-05, - "loss": 0.8336, + "epoch": 0.08118652820012623, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001623637406769937, + "loss": 0.9879, "step": 1415 }, { - "epoch": 0.040738456242020854, - "grad_norm": 0.31640625, - "learning_rate": 8.146873207114172e-05, - "loss": 1.0212, + "epoch": 0.0814734063916461, + "grad_norm": 0.224609375, + "learning_rate": 0.00016293746414228343, + "loss": 0.9993, "step": 1420 }, { - "epoch": 0.04088190151047868, - "grad_norm": 0.349609375, - "learning_rate": 8.175559380378658e-05, - "loss": 1.0292, - "step": 1425 + "epoch": 0.08176028458316599, + "grad_norm": 0.2412109375, + "learning_rate": 0.00016351118760757317, + "loss": 0.9543, + "step": 1425 }, { - "epoch": 0.041025346778936496, - "grad_norm": 0.3515625, - "learning_rate": 8.204245553643144e-05, - "loss": 1.0195, + "epoch": 0.08204716277468586, + "grad_norm": 0.2265625, + "learning_rate": 0.00016408491107286287, + "loss": 1.0417, "step": 1430 }, { - "epoch": 0.04116879204739431, - "grad_norm": 0.32421875, - "learning_rate": 8.232931726907632e-05, - "loss": 0.9894, + "epoch": 0.08233404096620575, + "grad_norm": 0.24609375, + "learning_rate": 0.00016465863453815263, + "loss": 0.935, "step": 1435 }, { - "epoch": 0.04131223731585214, - "grad_norm": 0.337890625, - "learning_rate": 8.261617900172117e-05, - "loss": 0.9336, + "epoch": 0.08262091915772563, + "grad_norm": 0.265625, + "learning_rate": 0.00016523235800344234, + "loss": 1.041, "step": 1440 }, { - "epoch": 0.041455682584309955, - "grad_norm": 0.31640625, - "learning_rate": 8.290304073436605e-05, - "loss": 1.0609, + "epoch": 0.08290779734924551, + "grad_norm": 0.2421875, + "learning_rate": 0.0001658060814687321, + "loss": 0.9485, "step": 1445 }, { - "epoch": 0.04159912785276778, - "grad_norm": 0.328125, - "learning_rate": 8.31899024670109e-05, - "loss": 0.9008, + "epoch": 0.08319467554076539, + "grad_norm": 0.21484375, + "learning_rate": 0.0001663798049340218, + "loss": 0.9971, "step": 1450 }, { - "epoch": 0.0417425731212256, - "grad_norm": 0.33203125, - "learning_rate": 8.347676419965577e-05, - "loss": 1.0957, + "epoch": 0.08348155373228527, + "grad_norm": 0.2353515625, + "learning_rate": 0.00016695352839931154, + "loss": 0.9906, "step": 1455 }, { - "epoch": 0.041886018389683415, - "grad_norm": 0.322265625, - "learning_rate": 8.376362593230064e-05, - "loss": 0.9773, + "epoch": 0.08376843192380515, + "grad_norm": 0.2333984375, + "learning_rate": 0.00016752725186460127, + "loss": 0.905, "step": 1460 }, { - "epoch": 0.04202946365814124, - "grad_norm": 0.310546875, - "learning_rate": 8.40504876649455e-05, - "loss": 1.003, + "epoch": 0.08405531011532504, + "grad_norm": 0.23828125, + "learning_rate": 0.000168100975329891, + "loss": 0.9967, "step": 1465 }, { - "epoch": 0.042172908926599056, - "grad_norm": 0.37109375, - "learning_rate": 8.433734939759037e-05, - "loss": 1.0321, + "epoch": 0.08434218830684491, + "grad_norm": 0.265625, + "learning_rate": 0.00016867469879518074, + "loss": 1.0322, "step": 1470 }, { - "epoch": 0.042316354195056874, - "grad_norm": 0.345703125, - "learning_rate": 8.462421113023524e-05, - "loss": 1.005, + "epoch": 0.0846290664983648, + "grad_norm": 0.259765625, + "learning_rate": 0.00016924842226047048, + "loss": 1.0861, "step": 1475 }, { - "epoch": 0.0424597994635147, - "grad_norm": 0.3125, - "learning_rate": 8.491107286288009e-05, - "loss": 0.8993, + "epoch": 0.08491594468988467, + "grad_norm": 0.2412109375, + "learning_rate": 0.00016982214572576018, + "loss": 1.0018, "step": 1480 }, { - "epoch": 0.042603244731972516, - "grad_norm": 0.34765625, - "learning_rate": 8.519793459552496e-05, - "loss": 0.9987, + "epoch": 0.08520282288140456, + "grad_norm": 0.2353515625, + "learning_rate": 0.00017039586919104992, + "loss": 0.9241, "step": 1485 }, { - "epoch": 0.04274669000043033, - "grad_norm": 0.330078125, - "learning_rate": 8.548479632816982e-05, - "loss": 1.0375, + "epoch": 0.08548970107292443, + "grad_norm": 0.216796875, + "learning_rate": 0.00017096959265633965, + "loss": 0.9617, "step": 1490 }, { - "epoch": 0.04289013526888816, - "grad_norm": 0.333984375, - "learning_rate": 8.577165806081469e-05, - "loss": 0.9619, + "epoch": 0.08577657926444432, + "grad_norm": 0.2421875, + "learning_rate": 0.00017154331612162938, + "loss": 0.9043, "step": 1495 }, { - "epoch": 0.043033580537345975, - "grad_norm": 0.328125, - "learning_rate": 8.605851979345956e-05, - "loss": 0.9811, + "epoch": 0.08606345745596419, + "grad_norm": 0.232421875, + "learning_rate": 0.00017211703958691912, + "loss": 1.0095, "step": 1500 }, { - "epoch": 0.04317702580580379, - "grad_norm": 0.33203125, - "learning_rate": 8.634538152610442e-05, - "loss": 0.9148, + "epoch": 0.08635033564748408, + "grad_norm": 0.2333984375, + "learning_rate": 0.00017269076305220885, + "loss": 1.0159, "step": 1505 }, { - "epoch": 0.04332047107426162, - "grad_norm": 0.361328125, - "learning_rate": 8.663224325874929e-05, - "loss": 0.9898, + "epoch": 0.08663721383900395, + "grad_norm": 0.25390625, + "learning_rate": 0.00017326448651749858, + "loss": 1.05, "step": 1510 }, { - "epoch": 0.043463916342719434, - "grad_norm": 0.30078125, - "learning_rate": 8.691910499139414e-05, - "loss": 0.8854, + "epoch": 0.08692409203052384, + "grad_norm": 0.2421875, + "learning_rate": 0.0001738382099827883, + "loss": 0.9889, "step": 1515 }, { - "epoch": 0.04360736161117725, - "grad_norm": 0.306640625, - "learning_rate": 8.720596672403901e-05, - "loss": 1.0925, + "epoch": 0.08721097022204372, + "grad_norm": 0.2490234375, + "learning_rate": 0.00017441193344807802, + "loss": 1.0014, "step": 1520 }, { - "epoch": 0.043750806879635076, - "grad_norm": 0.328125, - "learning_rate": 8.749282845668388e-05, - "loss": 1.0257, + "epoch": 0.0874978484135636, + "grad_norm": 0.25390625, + "learning_rate": 0.00017498565691336776, + "loss": 1.0155, "step": 1525 }, { - "epoch": 0.043894252148092894, - "grad_norm": 0.330078125, - "learning_rate": 8.777969018932875e-05, - "loss": 0.8484, + "epoch": 0.08778472660508348, + "grad_norm": 0.2333984375, + "learning_rate": 0.0001755593803786575, + "loss": 0.9529, "step": 1530 }, { - "epoch": 0.04403769741655072, - "grad_norm": 0.32421875, - "learning_rate": 8.806655192197361e-05, - "loss": 1.021, + "epoch": 0.08807160479660336, + "grad_norm": 0.2373046875, + "learning_rate": 0.00017613310384394722, + "loss": 0.9015, "step": 1535 }, { - "epoch": 0.044181142685008536, - "grad_norm": 0.33984375, - "learning_rate": 8.835341365461848e-05, - "loss": 1.054, + "epoch": 0.08835848298812324, + "grad_norm": 0.2421875, + "learning_rate": 0.00017670682730923696, + "loss": 0.958, "step": 1540 }, { - "epoch": 0.04432458795346635, - "grad_norm": 0.35546875, - "learning_rate": 8.864027538726333e-05, - "loss": 0.9536, + "epoch": 0.08864536117964313, + "grad_norm": 0.234375, + "learning_rate": 0.00017728055077452666, + "loss": 1.0366, "step": 1545 }, { - "epoch": 0.04446803322192418, - "grad_norm": 0.318359375, - "learning_rate": 8.892713711990821e-05, - "loss": 0.9142, + "epoch": 0.088932239371163, + "grad_norm": 0.2490234375, + "learning_rate": 0.00017785427423981642, + "loss": 1.0001, "step": 1550 }, { - "epoch": 0.044611478490381995, - "grad_norm": 0.34375, - "learning_rate": 8.921399885255307e-05, - "loss": 1.0169, + "epoch": 0.08921911756268289, + "grad_norm": 0.25390625, + "learning_rate": 0.00017842799770510613, + "loss": 0.9472, "step": 1555 }, { - "epoch": 0.04475492375883981, - "grad_norm": 0.3515625, - "learning_rate": 8.950086058519795e-05, - "loss": 0.9813, + "epoch": 0.08950599575420276, + "grad_norm": 0.232421875, + "learning_rate": 0.0001790017211703959, + "loss": 0.9278, "step": 1560 }, { - "epoch": 0.04489836902729764, - "grad_norm": 0.3125, - "learning_rate": 8.97877223178428e-05, - "loss": 0.9642, + "epoch": 0.08979287394572265, + "grad_norm": 0.2421875, + "learning_rate": 0.0001795754446356856, + "loss": 0.9875, "step": 1565 }, { - "epoch": 0.045041814295755454, - "grad_norm": 0.3125, - "learning_rate": 9.007458405048767e-05, - "loss": 1.029, + "epoch": 0.09007975213724252, + "grad_norm": 0.244140625, + "learning_rate": 0.00018014916810097533, + "loss": 0.9641, "step": 1570 }, { - "epoch": 0.04518525956421327, - "grad_norm": 0.33984375, - "learning_rate": 9.036144578313253e-05, - "loss": 0.9139, + "epoch": 0.09036663032876241, + "grad_norm": 0.240234375, + "learning_rate": 0.00018072289156626507, + "loss": 0.9106, "step": 1575 }, { - "epoch": 0.045328704832671096, - "grad_norm": 0.314453125, - "learning_rate": 9.06483075157774e-05, - "loss": 1.0229, + "epoch": 0.09065350852028228, + "grad_norm": 0.248046875, + "learning_rate": 0.0001812966150315548, + "loss": 0.9187, "step": 1580 }, { - "epoch": 0.045472150101128914, - "grad_norm": 0.30859375, - "learning_rate": 9.093516924842227e-05, - "loss": 0.9704, + "epoch": 0.09094038671180217, + "grad_norm": 0.228515625, + "learning_rate": 0.00018187033849684453, + "loss": 0.9488, "step": 1585 }, { - "epoch": 0.04561559536958673, - "grad_norm": 0.3203125, - "learning_rate": 9.122203098106713e-05, - "loss": 1.0686, + "epoch": 0.09122726490332204, + "grad_norm": 0.25390625, + "learning_rate": 0.00018244406196213427, + "loss": 0.9628, "step": 1590 }, { - "epoch": 0.045759040638044556, - "grad_norm": 0.3203125, - "learning_rate": 9.150889271371199e-05, - "loss": 0.9897, + "epoch": 0.09151414309484193, + "grad_norm": 0.2431640625, + "learning_rate": 0.00018301778542742397, + "loss": 0.9592, "step": 1595 }, { - "epoch": 0.04590248590650237, - "grad_norm": 0.326171875, - "learning_rate": 9.179575444635687e-05, - "loss": 0.8538, + "epoch": 0.0918010212863618, + "grad_norm": 0.25, + "learning_rate": 0.00018359150889271373, + "loss": 1.0228, "step": 1600 }, { - "epoch": 0.0460459311749602, - "grad_norm": 0.32421875, - "learning_rate": 9.208261617900172e-05, - "loss": 0.9511, + "epoch": 0.0920878994778817, + "grad_norm": 0.236328125, + "learning_rate": 0.00018416523235800344, + "loss": 0.9926, "step": 1605 }, { - "epoch": 0.046189376443418015, - "grad_norm": 0.337890625, - "learning_rate": 9.23694779116466e-05, - "loss": 0.9433, + "epoch": 0.09237477766940157, + "grad_norm": 0.25, + "learning_rate": 0.0001847389558232932, + "loss": 1.0007, "step": 1610 }, { - "epoch": 0.04633282171187583, - "grad_norm": 0.3125, - "learning_rate": 9.265633964429145e-05, - "loss": 0.9202, + "epoch": 0.09266165586092145, + "grad_norm": 0.23828125, + "learning_rate": 0.0001853126792885829, + "loss": 1.0219, "step": 1615 }, { - "epoch": 0.04647626698033366, - "grad_norm": 0.36328125, - "learning_rate": 9.294320137693632e-05, - "loss": 1.0199, + "epoch": 0.09294853405244133, + "grad_norm": 0.25390625, + "learning_rate": 0.00018588640275387264, + "loss": 0.9644, "step": 1620 }, { - "epoch": 0.046619712248791474, - "grad_norm": 0.30078125, - "learning_rate": 9.323006310958119e-05, - "loss": 0.9694, + "epoch": 0.09323541224396122, + "grad_norm": 0.2421875, + "learning_rate": 0.00018646012621916237, + "loss": 1.0031, "step": 1625 }, { - "epoch": 0.04676315751724929, - "grad_norm": 0.341796875, - "learning_rate": 9.351692484222605e-05, - "loss": 0.9146, + "epoch": 0.09352229043548109, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001870338496844521, + "loss": 1.0037, "step": 1630 }, { - "epoch": 0.046906602785707116, - "grad_norm": 0.306640625, - "learning_rate": 9.380378657487092e-05, - "loss": 0.9729, + "epoch": 0.09380916862700098, + "grad_norm": 0.224609375, + "learning_rate": 0.00018760757314974184, + "loss": 0.9305, "step": 1635 }, { - "epoch": 0.047050048054164934, - "grad_norm": 0.322265625, - "learning_rate": 9.409064830751579e-05, - "loss": 0.9949, + "epoch": 0.09409604681852085, + "grad_norm": 0.244140625, + "learning_rate": 0.00018818129661503157, + "loss": 0.9534, "step": 1640 }, { - "epoch": 0.04719349332262275, - "grad_norm": 0.3125, - "learning_rate": 9.437751004016064e-05, - "loss": 0.9387, + "epoch": 0.09438292501004074, + "grad_norm": 0.2578125, + "learning_rate": 0.00018875502008032128, + "loss": 1.0235, "step": 1645 }, { - "epoch": 0.047336938591080575, - "grad_norm": 0.30078125, - "learning_rate": 9.466437177280552e-05, - "loss": 1.1272, + "epoch": 0.09466980320156061, + "grad_norm": 0.236328125, + "learning_rate": 0.00018932874354561104, + "loss": 0.9046, "step": 1650 }, { - "epoch": 0.04748038385953839, - "grad_norm": 0.33203125, - "learning_rate": 9.495123350545037e-05, - "loss": 1.0461, + "epoch": 0.0949566813930805, + "grad_norm": 0.232421875, + "learning_rate": 0.00018990246701090075, + "loss": 1.004, "step": 1655 }, { - "epoch": 0.04762382912799621, - "grad_norm": 0.353515625, - "learning_rate": 9.523809523809524e-05, - "loss": 1.0034, + "epoch": 0.09524355958460037, + "grad_norm": 0.2451171875, + "learning_rate": 0.00019047619047619048, + "loss": 0.9329, "step": 1660 }, { - "epoch": 0.047767274396454035, - "grad_norm": 0.31640625, - "learning_rate": 9.552495697074011e-05, - "loss": 1.0176, + "epoch": 0.09553043777612026, + "grad_norm": 0.2373046875, + "learning_rate": 0.00019104991394148021, + "loss": 0.9596, "step": 1665 }, { - "epoch": 0.04791071966491185, - "grad_norm": 0.33203125, - "learning_rate": 9.581181870338497e-05, - "loss": 0.9512, + "epoch": 0.09581731596764013, + "grad_norm": 0.2421875, + "learning_rate": 0.00019162363740676995, + "loss": 0.9716, "step": 1670 }, { - "epoch": 0.04805416493336967, - "grad_norm": 0.3046875, - "learning_rate": 9.609868043602984e-05, - "loss": 0.9496, + "epoch": 0.09610419415916002, + "grad_norm": 0.248046875, + "learning_rate": 0.00019219736087205968, + "loss": 0.9608, "step": 1675 }, { - "epoch": 0.048197610201827494, - "grad_norm": 0.306640625, - "learning_rate": 9.638554216867471e-05, - "loss": 0.9563, + "epoch": 0.0963910723506799, + "grad_norm": 0.259765625, + "learning_rate": 0.00019277108433734942, + "loss": 1.0023, "step": 1680 }, { - "epoch": 0.04834105547028531, - "grad_norm": 0.298828125, - "learning_rate": 9.667240390131956e-05, - "loss": 0.9574, + "epoch": 0.09667795054219978, + "grad_norm": 0.25, + "learning_rate": 0.00019334480780263912, + "loss": 0.9665, "step": 1685 }, { - "epoch": 0.048484500738743136, - "grad_norm": 0.32421875, - "learning_rate": 9.695926563396444e-05, - "loss": 0.9311, + "epoch": 0.09696482873371966, + "grad_norm": 0.244140625, + "learning_rate": 0.00019391853126792888, + "loss": 0.9806, "step": 1690 }, { - "epoch": 0.04862794600720095, - "grad_norm": 0.34375, - "learning_rate": 9.72461273666093e-05, - "loss": 1.0239, + "epoch": 0.09725170692523954, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001944922547332186, + "loss": 1.0139, "step": 1695 }, { - "epoch": 0.04877139127565877, - "grad_norm": 0.328125, - "learning_rate": 9.753298909925417e-05, - "loss": 0.9994, + "epoch": 0.09753858511675942, + "grad_norm": 0.25, + "learning_rate": 0.00019506597819850835, + "loss": 0.9764, "step": 1700 }, { - "epoch": 0.048914836544116595, - "grad_norm": 0.310546875, - "learning_rate": 9.781985083189903e-05, - "loss": 0.9493, + "epoch": 0.0978254633082793, + "grad_norm": 0.2392578125, + "learning_rate": 0.00019563970166379806, + "loss": 0.9552, "step": 1705 }, { - "epoch": 0.04905828181257441, - "grad_norm": 0.357421875, - "learning_rate": 9.81067125645439e-05, - "loss": 1.0398, + "epoch": 0.09811234149979918, + "grad_norm": 0.248046875, + "learning_rate": 0.0001962134251290878, + "loss": 0.9531, "step": 1710 }, { - "epoch": 0.04920172708103223, - "grad_norm": 0.35546875, - "learning_rate": 9.839357429718876e-05, - "loss": 0.945, + "epoch": 0.09839921969131907, + "grad_norm": 0.2578125, + "learning_rate": 0.00019678714859437752, + "loss": 1.0003, "step": 1715 }, { - "epoch": 0.049345172349490055, - "grad_norm": 0.32421875, - "learning_rate": 9.868043602983363e-05, - "loss": 1.1116, + "epoch": 0.09868609788283894, + "grad_norm": 0.251953125, + "learning_rate": 0.00019736087205966726, + "loss": 1.0355, "step": 1720 }, { - "epoch": 0.04948861761794787, - "grad_norm": 0.322265625, - "learning_rate": 9.89672977624785e-05, - "loss": 1.0097, + "epoch": 0.09897297607435883, + "grad_norm": 0.265625, + "learning_rate": 0.000197934595524957, + "loss": 1.0075, "step": 1725 }, { - "epoch": 0.04963206288640569, - "grad_norm": 0.306640625, - "learning_rate": 9.925415949512336e-05, - "loss": 0.9556, + "epoch": 0.0992598542658787, + "grad_norm": 0.2421875, + "learning_rate": 0.00019850831899024672, + "loss": 0.9431, "step": 1730 }, { - "epoch": 0.049775508154863514, - "grad_norm": 0.326171875, - "learning_rate": 9.954102122776822e-05, - "loss": 0.9968, + "epoch": 0.09954673245739859, + "grad_norm": 0.2451171875, + "learning_rate": 0.00019908204245553643, + "loss": 0.9683, "step": 1735 }, { - "epoch": 0.04991895342332133, - "grad_norm": 0.3359375, - "learning_rate": 9.982788296041308e-05, - "loss": 0.9678, + "epoch": 0.09983361064891846, + "grad_norm": 0.2353515625, + "learning_rate": 0.00019965576592082616, + "loss": 0.9644, "step": 1740 }, { - "epoch": 0.05006239869177915, - "grad_norm": 0.333984375, - "learning_rate": 0.00010011474469305795, - "loss": 1.0043, + "epoch": 0.10012048884043835, + "grad_norm": 0.2421875, + "learning_rate": 0.0001999999919775815, + "loss": 0.988, "step": 1745 }, { - "epoch": 0.05020584396023697, - "grad_norm": 0.3359375, - "learning_rate": 0.00010040160642570282, - "loss": 0.9874, + "epoch": 0.10040736703195823, + "grad_norm": 0.25, + "learning_rate": 0.00019999990172538815, + "loss": 0.9723, "step": 1750 }, { - "epoch": 0.05034928922869479, - "grad_norm": 0.33984375, - "learning_rate": 0.00010068846815834767, - "loss": 1.0694, + "epoch": 0.10069424522347811, + "grad_norm": 0.2431640625, + "learning_rate": 0.00019999971119306908, + "loss": 0.9437, "step": 1755 }, { - "epoch": 0.05049273449715261, - "grad_norm": 0.328125, - "learning_rate": 0.00010097532989099255, - "loss": 1.0178, + "epoch": 0.10098112341499799, + "grad_norm": 0.23828125, + "learning_rate": 0.0001999994203808154, + "loss": 1.0246, "step": 1760 }, { - "epoch": 0.05063617976561043, - "grad_norm": 0.318359375, - "learning_rate": 0.00010126219162363742, - "loss": 1.0493, + "epoch": 0.10126800160651787, + "grad_norm": 0.26171875, + "learning_rate": 0.00019999902928891875, + "loss": 0.9988, "step": 1765 }, { - "epoch": 0.05077962503406825, - "grad_norm": 0.30859375, - "learning_rate": 0.00010154905335628228, - "loss": 1.011, + "epoch": 0.10155487979803775, + "grad_norm": 0.248046875, + "learning_rate": 0.00019999853791777126, + "loss": 0.9811, "step": 1770 }, { - "epoch": 0.050923070302526074, - "grad_norm": 0.298828125, - "learning_rate": 0.00010183591508892714, - "loss": 0.9747, + "epoch": 0.10184175798955764, + "grad_norm": 0.30078125, + "learning_rate": 0.00019999794626786573, + "loss": 1.0372, "step": 1775 }, { - "epoch": 0.05106651557098389, - "grad_norm": 0.314453125, - "learning_rate": 0.000102122776821572, - "loss": 1.0426, + "epoch": 0.10212863618107751, + "grad_norm": 0.25390625, + "learning_rate": 0.00019999725433979544, + "loss": 0.935, "step": 1780 }, { - "epoch": 0.05120996083944171, - "grad_norm": 0.3203125, - "learning_rate": 0.00010240963855421688, - "loss": 0.9784, + "epoch": 0.1024155143725974, + "grad_norm": 0.25, + "learning_rate": 0.00019999646213425426, + "loss": 0.9745, "step": 1785 }, { - "epoch": 0.051353406107899534, - "grad_norm": 0.3125, - "learning_rate": 0.00010269650028686174, - "loss": 0.9411, + "epoch": 0.10270239256411727, + "grad_norm": 0.2314453125, + "learning_rate": 0.00019999556965203663, + "loss": 0.9369, "step": 1790 }, { - "epoch": 0.05149685137635735, - "grad_norm": 0.310546875, - "learning_rate": 0.0001029833620195066, - "loss": 1.0395, + "epoch": 0.10298927075563716, + "grad_norm": 0.24609375, + "learning_rate": 0.00019999457689403753, + "loss": 0.8631, "step": 1795 }, { - "epoch": 0.05164029664481517, - "grad_norm": 0.3203125, - "learning_rate": 0.00010327022375215147, - "loss": 1.0533, + "epoch": 0.10327614894715703, + "grad_norm": 0.2412109375, + "learning_rate": 0.0001999934838612525, + "loss": 0.9291, "step": 1800 }, { - "epoch": 0.05178374191327299, - "grad_norm": 0.33203125, - "learning_rate": 0.00010355708548479632, - "loss": 0.9395, + "epoch": 0.10356302713867692, + "grad_norm": 0.2353515625, + "learning_rate": 0.0001999922905547776, + "loss": 0.9067, "step": 1805 }, { - "epoch": 0.05192718718173081, - "grad_norm": 0.330078125, - "learning_rate": 0.00010384394721744119, - "loss": 0.9477, + "epoch": 0.10384990533019679, + "grad_norm": 0.26171875, + "learning_rate": 0.00019999099697580954, + "loss": 0.9654, "step": 1810 }, { - "epoch": 0.05207063245018863, - "grad_norm": 0.3046875, - "learning_rate": 0.00010413080895008607, - "loss": 1.0505, + "epoch": 0.10413678352171668, + "grad_norm": 0.25, + "learning_rate": 0.00019998960312564548, + "loss": 0.9231, "step": 1815 }, { - "epoch": 0.05221407771864645, - "grad_norm": 0.328125, - "learning_rate": 0.00010441767068273094, - "loss": 1.1292, + "epoch": 0.10442366171323655, + "grad_norm": 0.248046875, + "learning_rate": 0.0001999881090056832, + "loss": 1.0005, "step": 1820 }, { - "epoch": 0.05235752298710427, - "grad_norm": 0.298828125, - "learning_rate": 0.00010470453241537579, - "loss": 0.8619, + "epoch": 0.10471053990475644, + "grad_norm": 0.2578125, + "learning_rate": 0.000199986514617421, + "loss": 0.9831, "step": 1825 }, { - "epoch": 0.05250096825556209, - "grad_norm": 0.30078125, - "learning_rate": 0.00010499139414802066, - "loss": 1.0437, + "epoch": 0.10499741809627632, + "grad_norm": 0.232421875, + "learning_rate": 0.00019998481996245772, + "loss": 0.995, "step": 1830 }, { - "epoch": 0.05264441352401991, - "grad_norm": 0.33203125, - "learning_rate": 0.00010527825588066554, - "loss": 1.0338, + "epoch": 0.1052842962877962, + "grad_norm": 0.259765625, + "learning_rate": 0.00019998302504249278, + "loss": 0.9808, "step": 1835 }, { - "epoch": 0.05278785879247773, - "grad_norm": 0.31640625, - "learning_rate": 0.00010556511761331038, - "loss": 1.031, + "epoch": 0.10557117447931608, + "grad_norm": 0.283203125, + "learning_rate": 0.00019998112985932613, + "loss": 0.9961, "step": 1840 }, { - "epoch": 0.05293130406093555, - "grad_norm": 0.3203125, - "learning_rate": 0.00010585197934595526, - "loss": 0.9708, + "epoch": 0.10585805267083596, + "grad_norm": 0.236328125, + "learning_rate": 0.00019997913441485826, + "loss": 0.9614, "step": 1845 }, { - "epoch": 0.05307474932939337, - "grad_norm": 0.318359375, - "learning_rate": 0.00010613884107860012, - "loss": 1.0075, + "epoch": 0.10614493086235584, + "grad_norm": 0.2412109375, + "learning_rate": 0.00019997703871109021, + "loss": 1.0427, "step": 1850 }, { - "epoch": 0.05321819459785119, - "grad_norm": 0.326171875, - "learning_rate": 0.00010642570281124498, - "loss": 1.0381, + "epoch": 0.10643180905387573, + "grad_norm": 0.234375, + "learning_rate": 0.00019997484275012357, + "loss": 0.979, "step": 1855 }, { - "epoch": 0.05336163986630901, - "grad_norm": 0.3203125, - "learning_rate": 0.00010671256454388984, - "loss": 0.8753, + "epoch": 0.1067186872453956, + "grad_norm": 0.2490234375, + "learning_rate": 0.00019997254653416043, + "loss": 0.9046, "step": 1860 }, { - "epoch": 0.05350508513476683, - "grad_norm": 0.515625, - "learning_rate": 0.00010699942627653472, - "loss": 1.0139, + "epoch": 0.10700556543691549, + "grad_norm": 0.25, + "learning_rate": 0.00019997015006550342, + "loss": 0.9862, "step": 1865 }, { - "epoch": 0.05364853040322465, - "grad_norm": 0.3359375, - "learning_rate": 0.00010728628800917956, - "loss": 1.0233, + "epoch": 0.10729244362843536, + "grad_norm": 0.2451171875, + "learning_rate": 0.0001999676533465558, + "loss": 0.9117, "step": 1870 }, { - "epoch": 0.05379197567168247, - "grad_norm": 0.33203125, - "learning_rate": 0.00010757314974182444, - "loss": 0.9744, + "epoch": 0.10757932181995525, + "grad_norm": 0.255859375, + "learning_rate": 0.00019996505637982122, + "loss": 0.9843, "step": 1875 }, { - "epoch": 0.05393542094014029, - "grad_norm": 0.318359375, - "learning_rate": 0.00010786001147446931, - "loss": 0.9648, + "epoch": 0.10786620001147512, + "grad_norm": 0.244140625, + "learning_rate": 0.00019996235916790392, + "loss": 0.9766, "step": 1880 }, { - "epoch": 0.05407886620859811, - "grad_norm": 0.302734375, - "learning_rate": 0.00010814687320711419, - "loss": 0.9284, + "epoch": 0.10815307820299501, + "grad_norm": 0.263671875, + "learning_rate": 0.0001999595617135087, + "loss": 0.9147, "step": 1885 }, { - "epoch": 0.05422231147705593, - "grad_norm": 0.306640625, - "learning_rate": 0.00010843373493975903, - "loss": 1.0056, + "epoch": 0.10843995639451488, + "grad_norm": 0.265625, + "learning_rate": 0.00019995666401944085, + "loss": 0.9491, "step": 1890 }, { - "epoch": 0.05436575674551375, - "grad_norm": 0.326171875, - "learning_rate": 0.00010872059667240391, - "loss": 0.8545, + "epoch": 0.10872683458603477, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001999536660886062, + "loss": 0.9517, "step": 1895 }, { - "epoch": 0.05450920201397157, - "grad_norm": 0.291015625, - "learning_rate": 0.00010900745840504878, - "loss": 0.9754, + "epoch": 0.10901371277755464, + "grad_norm": 0.2294921875, + "learning_rate": 0.00019995056792401105, + "loss": 1.0407, "step": 1900 }, { - "epoch": 0.05465264728242939, - "grad_norm": 0.345703125, - "learning_rate": 0.00010929432013769363, - "loss": 0.9863, + "epoch": 0.10930059096907453, + "grad_norm": 0.244140625, + "learning_rate": 0.00019994736952876226, + "loss": 1.0085, "step": 1905 }, { - "epoch": 0.05479609255088721, - "grad_norm": 0.310546875, - "learning_rate": 0.0001095811818703385, - "loss": 0.9846, + "epoch": 0.1095874691605944, + "grad_norm": 0.234375, + "learning_rate": 0.0001999440709060672, + "loss": 0.914, "step": 1910 }, { - "epoch": 0.054939537819345026, - "grad_norm": 0.328125, - "learning_rate": 0.00010986804360298338, - "loss": 0.9824, + "epoch": 0.1098743473521143, + "grad_norm": 0.25390625, + "learning_rate": 0.0001999406720592337, + "loss": 0.9376, "step": 1915 }, { - "epoch": 0.05508298308780285, - "grad_norm": 0.333984375, - "learning_rate": 0.00011015490533562822, - "loss": 0.9397, + "epoch": 0.11016122554363417, + "grad_norm": 0.248046875, + "learning_rate": 0.00019993717299167014, + "loss": 0.8981, "step": 1920 }, { - "epoch": 0.05522642835626067, - "grad_norm": 0.328125, - "learning_rate": 0.0001104417670682731, - "loss": 1.0059, + "epoch": 0.11044810373515405, + "grad_norm": 0.251953125, + "learning_rate": 0.00019993357370688542, + "loss": 1.0067, "step": 1925 }, { - "epoch": 0.055369873624718485, - "grad_norm": 0.30859375, - "learning_rate": 0.00011072862880091797, - "loss": 0.8879, + "epoch": 0.11073498192667393, + "grad_norm": 0.248046875, + "learning_rate": 0.00019992987420848891, + "loss": 0.9061, "step": 1930 }, { - "epoch": 0.05551331889317631, - "grad_norm": 0.3203125, - "learning_rate": 0.00011101549053356283, - "loss": 0.9948, + "epoch": 0.11102186011819382, + "grad_norm": 0.2421875, + "learning_rate": 0.00019992607450019048, + "loss": 1.0008, "step": 1935 }, { - "epoch": 0.05565676416163413, - "grad_norm": 0.337890625, - "learning_rate": 0.00011130235226620769, - "loss": 0.9361, + "epoch": 0.11130873830971369, + "grad_norm": 0.255859375, + "learning_rate": 0.00019992217458580043, + "loss": 0.9274, "step": 1940 }, { - "epoch": 0.05580020943009195, - "grad_norm": 0.373046875, - "learning_rate": 0.00011158921399885257, - "loss": 0.8455, + "epoch": 0.11159561650123358, + "grad_norm": 0.291015625, + "learning_rate": 0.00019991817446922964, + "loss": 1.0122, "step": 1945 }, { - "epoch": 0.05594365469854977, - "grad_norm": 0.345703125, - "learning_rate": 0.00011187607573149743, - "loss": 1.012, + "epoch": 0.11188249469275345, + "grad_norm": 0.234375, + "learning_rate": 0.00019991407415448947, + "loss": 0.9451, "step": 1950 }, { - "epoch": 0.056087099967007586, - "grad_norm": 0.310546875, - "learning_rate": 0.00011216293746414229, - "loss": 0.9783, + "epoch": 0.11216937288427334, + "grad_norm": 0.279296875, + "learning_rate": 0.0001999098736456917, + "loss": 0.9165, "step": 1955 }, { - "epoch": 0.05623054523546541, - "grad_norm": 0.4140625, - "learning_rate": 0.00011244979919678715, - "loss": 0.9671, + "epoch": 0.11245625107579321, + "grad_norm": 0.251953125, + "learning_rate": 0.00019990557294704856, + "loss": 0.9794, "step": 1960 }, { - "epoch": 0.05637399050392323, - "grad_norm": 0.3046875, - "learning_rate": 0.00011273666092943202, - "loss": 0.9747, + "epoch": 0.1127431292673131, + "grad_norm": 0.271484375, + "learning_rate": 0.00019990117206287287, + "loss": 0.9801, "step": 1965 }, { - "epoch": 0.056517435772381046, - "grad_norm": 0.3203125, - "learning_rate": 0.00011302352266207687, - "loss": 0.9608, + "epoch": 0.11303000745883297, + "grad_norm": 0.25, + "learning_rate": 0.0001998966709975778, + "loss": 0.9645, "step": 1970 }, { - "epoch": 0.05666088104083887, - "grad_norm": 0.31640625, - "learning_rate": 0.00011331038439472175, - "loss": 0.8669, + "epoch": 0.11331688565035286, + "grad_norm": 0.240234375, + "learning_rate": 0.00019989206975567708, + "loss": 0.9787, "step": 1975 }, { - "epoch": 0.05680432630929669, - "grad_norm": 0.314453125, - "learning_rate": 0.00011359724612736662, - "loss": 0.9659, + "epoch": 0.11360376384187273, + "grad_norm": 0.25, + "learning_rate": 0.0001998873683417848, + "loss": 0.9984, "step": 1980 }, { - "epoch": 0.056947771577754505, - "grad_norm": 0.30859375, - "learning_rate": 0.00011388410786001149, - "loss": 0.9396, + "epoch": 0.11389064203339262, + "grad_norm": 0.24609375, + "learning_rate": 0.00019988256676061554, + "loss": 0.995, "step": 1985 }, { - "epoch": 0.05709121684621233, - "grad_norm": 0.330078125, - "learning_rate": 0.00011417096959265634, - "loss": 0.9218, + "epoch": 0.1141775202249125, + "grad_norm": 0.244140625, + "learning_rate": 0.00019987766501698438, + "loss": 0.9607, "step": 1990 }, { - "epoch": 0.05723466211467015, - "grad_norm": 0.28515625, - "learning_rate": 0.0001144578313253012, - "loss": 1.0271, + "epoch": 0.11446439841643238, + "grad_norm": 0.251953125, + "learning_rate": 0.00019987266311580676, + "loss": 1.0015, "step": 1995 }, { - "epoch": 0.057378107383127964, - "grad_norm": 0.333984375, - "learning_rate": 0.00011474469305794609, - "loss": 1.0223, + "epoch": 0.11475127660795226, + "grad_norm": 0.240234375, + "learning_rate": 0.00019986756106209864, + "loss": 0.9125, "step": 2000 }, { - "epoch": 0.05752155265158579, - "grad_norm": 0.318359375, - "learning_rate": 0.00011503155479059094, - "loss": 0.9844, + "epoch": 0.11503815479947214, + "grad_norm": 0.2578125, + "learning_rate": 0.0001998623588609763, + "loss": 0.9281, "step": 2005 }, { - "epoch": 0.057664997920043606, - "grad_norm": 0.337890625, - "learning_rate": 0.0001153184165232358, - "loss": 0.9625, + "epoch": 0.11532503299099202, + "grad_norm": 0.2578125, + "learning_rate": 0.0001998570565176566, + "loss": 1.0259, "step": 2010 }, { - "epoch": 0.057808443188501424, - "grad_norm": 0.33203125, - "learning_rate": 0.00011560527825588067, - "loss": 1.0147, + "epoch": 0.1156119111825119, + "grad_norm": 0.271484375, + "learning_rate": 0.0001998516540374567, + "loss": 1.003, "step": 2015 }, { - "epoch": 0.05795188845695925, - "grad_norm": 0.31640625, - "learning_rate": 0.00011589213998852553, - "loss": 0.9943, + "epoch": 0.11589878937403178, + "grad_norm": 0.234375, + "learning_rate": 0.00019984615142579424, + "loss": 0.9298, "step": 2020 }, { - "epoch": 0.058095333725417066, - "grad_norm": 0.306640625, - "learning_rate": 0.0001161790017211704, - "loss": 1.0123, + "epoch": 0.11618566756555167, + "grad_norm": 0.265625, + "learning_rate": 0.00019984054868818724, + "loss": 0.9373, "step": 2025 }, { - "epoch": 0.05823877899387489, - "grad_norm": 0.32421875, - "learning_rate": 0.00011646586345381527, - "loss": 1.0472, + "epoch": 0.11647254575707154, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001998348458302541, + "loss": 0.8953, "step": 2030 }, { - "epoch": 0.05838222426233271, - "grad_norm": 0.32421875, - "learning_rate": 0.00011675272518646013, - "loss": 1.0197, + "epoch": 0.11675942394859143, + "grad_norm": 0.255859375, + "learning_rate": 0.0001998290428577137, + "loss": 0.9788, "step": 2035 }, { - "epoch": 0.058525669530790525, - "grad_norm": 0.34765625, - "learning_rate": 0.000117039586919105, - "loss": 0.9435, + "epoch": 0.1170463021401113, + "grad_norm": 0.259765625, + "learning_rate": 0.00019982313977638528, + "loss": 0.9672, "step": 2040 }, { - "epoch": 0.05866911479924835, - "grad_norm": 0.294921875, - "learning_rate": 0.00011732644865174986, - "loss": 0.8775, + "epoch": 0.11733318033163119, + "grad_norm": 0.2412109375, + "learning_rate": 0.00019981713659218846, + "loss": 0.8816, "step": 2045 }, { - "epoch": 0.05881256006770617, - "grad_norm": 0.322265625, - "learning_rate": 0.00011761331038439474, - "loss": 1.0517, + "epoch": 0.11762005852315106, + "grad_norm": 0.263671875, + "learning_rate": 0.00019981103331114322, + "loss": 0.9887, "step": 2050 }, { - "epoch": 0.058956005336163984, - "grad_norm": 0.294921875, - "learning_rate": 0.00011790017211703958, - "loss": 1.0286, + "epoch": 0.11790693671467095, + "grad_norm": 0.2470703125, + "learning_rate": 0.00019980482993936995, + "loss": 1.0521, "step": 2055 }, { - "epoch": 0.05909945060462181, - "grad_norm": 0.314453125, - "learning_rate": 0.00011818703384968446, - "loss": 0.9211, + "epoch": 0.11819381490619082, + "grad_norm": 0.2412109375, + "learning_rate": 0.00019979852648308947, + "loss": 0.9699, "step": 2060 }, { - "epoch": 0.059242895873079626, - "grad_norm": 0.30859375, - "learning_rate": 0.00011847389558232933, - "loss": 0.9873, + "epoch": 0.11848069309771071, + "grad_norm": 0.267578125, + "learning_rate": 0.0001997921229486228, + "loss": 0.9402, "step": 2065 }, { - "epoch": 0.059386341141537444, - "grad_norm": 0.326171875, - "learning_rate": 0.00011876075731497418, - "loss": 0.9485, + "epoch": 0.11876757128923059, + "grad_norm": 0.265625, + "learning_rate": 0.00019978561934239149, + "loss": 0.9328, "step": 2070 }, { - "epoch": 0.05952978640999527, - "grad_norm": 0.326171875, - "learning_rate": 0.00011904761904761905, - "loss": 0.9675, + "epoch": 0.11905444948075047, + "grad_norm": 0.25390625, + "learning_rate": 0.0001997790156709173, + "loss": 0.9643, "step": 2075 }, { - "epoch": 0.059673231678453086, - "grad_norm": 0.302734375, - "learning_rate": 0.00011933448078026393, - "loss": 0.954, + "epoch": 0.11934132767227035, + "grad_norm": 0.259765625, + "learning_rate": 0.00019977231194082248, + "loss": 1.0274, "step": 2080 }, { - "epoch": 0.0598166769469109, - "grad_norm": 0.328125, - "learning_rate": 0.00011962134251290877, - "loss": 1.0853, + "epoch": 0.11962820586379024, + "grad_norm": 0.2470703125, + "learning_rate": 0.00019976550815882952, + "loss": 0.9102, "step": 2085 }, { - "epoch": 0.05996012221536873, - "grad_norm": 0.30078125, - "learning_rate": 0.00011990820424555365, - "loss": 1.0828, + "epoch": 0.11991508405531011, + "grad_norm": 0.2890625, + "learning_rate": 0.00019975860433176128, + "loss": 0.918, "step": 2090 }, { - "epoch": 0.060103567483826545, - "grad_norm": 0.341796875, - "learning_rate": 0.00012019506597819851, - "loss": 0.9306, + "epoch": 0.12020196224683, + "grad_norm": 0.248046875, + "learning_rate": 0.0001997516004665409, + "loss": 0.9157, "step": 2095 }, { - "epoch": 0.06024701275228437, - "grad_norm": 0.302734375, - "learning_rate": 0.0001204819277108434, - "loss": 1.0826, + "epoch": 0.12048884043834987, + "grad_norm": 0.259765625, + "learning_rate": 0.0001997444965701919, + "loss": 0.9264, "step": 2100 }, { - "epoch": 0.06039045802074219, - "grad_norm": 0.328125, - "learning_rate": 0.00012076878944348823, - "loss": 0.9491, + "epoch": 0.12077571862986976, + "grad_norm": 0.26171875, + "learning_rate": 0.00019973729264983808, + "loss": 0.9099, "step": 2105 }, { - "epoch": 0.060533903289200004, - "grad_norm": 0.365234375, - "learning_rate": 0.00012105565117613311, - "loss": 0.9851, + "epoch": 0.12106259682138963, + "grad_norm": 0.31640625, + "learning_rate": 0.00019972998871270353, + "loss": 0.9819, "step": 2110 }, { - "epoch": 0.06067734855765783, - "grad_norm": 0.3203125, - "learning_rate": 0.00012134251290877798, - "loss": 0.9996, + "epoch": 0.12134947501290952, + "grad_norm": 0.25, + "learning_rate": 0.0001997225847661127, + "loss": 0.9286, "step": 2115 }, { - "epoch": 0.060820793826115646, - "grad_norm": 0.31640625, - "learning_rate": 0.00012162937464142283, - "loss": 0.8923, + "epoch": 0.12163635320442939, + "grad_norm": 0.25390625, + "learning_rate": 0.00019971508081749023, + "loss": 1.0012, "step": 2120 }, { - "epoch": 0.060964239094573464, - "grad_norm": 0.3046875, - "learning_rate": 0.0001219162363740677, - "loss": 1.0667, + "epoch": 0.12192323139594928, + "grad_norm": 0.244140625, + "learning_rate": 0.0001997074768743611, + "loss": 0.9616, "step": 2125 }, { - "epoch": 0.06110768436303129, - "grad_norm": 0.345703125, - "learning_rate": 0.00012220309810671257, - "loss": 0.977, + "epoch": 0.12221010958746915, + "grad_norm": 0.255859375, + "learning_rate": 0.00019969977294435057, + "loss": 0.9717, "step": 2130 }, { - "epoch": 0.061251129631489105, - "grad_norm": 0.31640625, - "learning_rate": 0.00012248995983935742, - "loss": 0.9434, + "epoch": 0.12249698777898904, + "grad_norm": 0.26171875, + "learning_rate": 0.0001996919690351842, + "loss": 0.9775, "step": 2135 }, { - "epoch": 0.06139457489994692, - "grad_norm": 0.328125, - "learning_rate": 0.0001227768215720023, - "loss": 0.8608, - "step": 2140 + "epoch": 0.12278386597050892, + "grad_norm": 0.291015625, + "learning_rate": 0.0001996840651546877, + "loss": 0.9049, + "step": 2140 }, { - "epoch": 0.06153802016840475, - "grad_norm": 0.330078125, - "learning_rate": 0.00012306368330464718, - "loss": 0.9605, + "epoch": 0.1230707441620288, + "grad_norm": 0.2451171875, + "learning_rate": 0.00019967606131078718, + "loss": 0.9141, "step": 2145 }, { - "epoch": 0.061681465436862565, - "grad_norm": 0.353515625, - "learning_rate": 0.000123350545037292, - "loss": 0.8995, + "epoch": 0.12335762235354868, + "grad_norm": 0.26953125, + "learning_rate": 0.00019966795751150885, + "loss": 1.0031, "step": 2150 }, { - "epoch": 0.06182491070532038, - "grad_norm": 0.33203125, - "learning_rate": 0.0001236374067699369, - "loss": 0.9759, + "epoch": 0.12364450054506856, + "grad_norm": 0.25390625, + "learning_rate": 0.00019965975376497918, + "loss": 0.9594, "step": 2155 }, { - "epoch": 0.06196835597377821, - "grad_norm": 0.326171875, - "learning_rate": 0.00012392426850258177, - "loss": 0.9895, + "epoch": 0.12393137873658844, + "grad_norm": 0.24609375, + "learning_rate": 0.000199651450079425, + "loss": 0.9046, "step": 2160 }, { - "epoch": 0.062111801242236024, - "grad_norm": 0.314453125, - "learning_rate": 0.00012421113023522665, - "loss": 0.9338, + "epoch": 0.12421825692810833, + "grad_norm": 0.28515625, + "learning_rate": 0.00019964304646317323, + "loss": 0.961, "step": 2165 }, { - "epoch": 0.06225524651069384, - "grad_norm": 0.31640625, - "learning_rate": 0.00012449799196787148, - "loss": 1.0084, + "epoch": 0.1245051351196282, + "grad_norm": 0.263671875, + "learning_rate": 0.00019963454292465103, + "loss": 0.9989, "step": 2170 }, { - "epoch": 0.062398691779151666, - "grad_norm": 0.32421875, - "learning_rate": 0.00012478485370051636, - "loss": 0.8876, + "epoch": 0.12479201331114809, + "grad_norm": 0.2431640625, + "learning_rate": 0.00019962593947238578, + "loss": 0.9993, "step": 2175 }, { - "epoch": 0.06254213704760948, - "grad_norm": 0.32421875, - "learning_rate": 0.00012507171543316124, - "loss": 0.9615, + "epoch": 0.12507889150266796, + "grad_norm": 0.259765625, + "learning_rate": 0.000199617236115005, + "loss": 0.9633, "step": 2180 }, { - "epoch": 0.0626855823160673, - "grad_norm": 0.345703125, - "learning_rate": 0.0001253585771658061, - "loss": 1.091, + "epoch": 0.12536576969418783, + "grad_norm": 0.24609375, + "learning_rate": 0.00019960843286123648, + "loss": 0.9134, "step": 2185 }, { - "epoch": 0.06282902758452512, - "grad_norm": 0.55078125, - "learning_rate": 0.00012564543889845094, - "loss": 0.8616, + "epoch": 0.12565264788570774, + "grad_norm": 0.2578125, + "learning_rate": 0.0001995995297199081, + "loss": 1.0059, "step": 2190 }, { - "epoch": 0.06297247285298295, - "grad_norm": 0.330078125, - "learning_rate": 0.00012593230063109582, - "loss": 0.9669, + "epoch": 0.1259395260772276, + "grad_norm": 0.2490234375, + "learning_rate": 0.000199590526699948, + "loss": 1.0153, "step": 2195 }, { - "epoch": 0.06311591812144077, - "grad_norm": 0.32421875, - "learning_rate": 0.00012621916236374068, - "loss": 0.9977, + "epoch": 0.12622640426874748, + "grad_norm": 0.2451171875, + "learning_rate": 0.0001995814238103844, + "loss": 0.9359, "step": 2200 }, { - "epoch": 0.06325936338989858, - "grad_norm": 0.3046875, - "learning_rate": 0.00012650602409638556, - "loss": 0.9163, + "epoch": 0.12651328246026736, + "grad_norm": 0.25, + "learning_rate": 0.00019957222106034572, + "loss": 0.9862, "step": 2205 }, { - "epoch": 0.0634028086583564, - "grad_norm": 0.318359375, - "learning_rate": 0.0001267928858290304, - "loss": 0.9687, + "epoch": 0.12680016065178726, + "grad_norm": 0.2412109375, + "learning_rate": 0.00019956291845906046, + "loss": 0.9502, "step": 2210 }, { - "epoch": 0.06354625392681422, - "grad_norm": 0.33203125, - "learning_rate": 0.0001270797475616753, - "loss": 0.9884, + "epoch": 0.12708703884330713, + "grad_norm": 0.265625, + "learning_rate": 0.00019955351601585731, + "loss": 0.979, "step": 2215 }, { - "epoch": 0.06368969919527204, - "grad_norm": 0.33984375, - "learning_rate": 0.00012736660929432014, - "loss": 0.9691, + "epoch": 0.127373917034827, + "grad_norm": 0.23828125, + "learning_rate": 0.0001995440137401651, + "loss": 0.8981, "step": 2220 }, { - "epoch": 0.06383314446372987, - "grad_norm": 0.333984375, - "learning_rate": 0.00012765347102696502, - "loss": 1.0086, + "epoch": 0.12766079522634688, + "grad_norm": 0.240234375, + "learning_rate": 0.00019953441164151264, + "loss": 1.0073, "step": 2225 }, { - "epoch": 0.06397658973218769, - "grad_norm": 0.330078125, - "learning_rate": 0.00012794033275960988, - "loss": 0.9212, + "epoch": 0.12794767341786678, + "grad_norm": 0.240234375, + "learning_rate": 0.00019952470972952902, + "loss": 0.9045, "step": 2230 }, { - "epoch": 0.0641200350006455, - "grad_norm": 0.330078125, - "learning_rate": 0.00012822719449225473, - "loss": 0.9785, + "epoch": 0.12823455160938665, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001995149080139433, + "loss": 0.9948, "step": 2235 }, { - "epoch": 0.06426348026910332, - "grad_norm": 0.330078125, - "learning_rate": 0.0001285140562248996, - "loss": 0.9093, + "epoch": 0.12852142980090653, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001995050065045847, + "loss": 0.9548, "step": 2240 }, { - "epoch": 0.06440692553756114, - "grad_norm": 0.30859375, - "learning_rate": 0.00012880091795754446, - "loss": 0.9994, + "epoch": 0.12880830799242643, + "grad_norm": 0.271484375, + "learning_rate": 0.00019949500521138243, + "loss": 0.9024, "step": 2245 }, { - "epoch": 0.06455037080601897, - "grad_norm": 0.31640625, - "learning_rate": 0.00012908777969018932, - "loss": 0.9779, + "epoch": 0.1290951861839463, + "grad_norm": 0.2470703125, + "learning_rate": 0.00019948490414436584, + "loss": 1.0311, "step": 2250 }, { - "epoch": 0.06469381607447679, - "grad_norm": 0.345703125, - "learning_rate": 0.0001293746414228342, - "loss": 1.0281, + "epoch": 0.12938206437546618, + "grad_norm": 0.2451171875, + "learning_rate": 0.00019947470331366427, + "loss": 0.9478, "step": 2255 }, { - "epoch": 0.0648372613429346, - "grad_norm": 0.33984375, - "learning_rate": 0.00012966150315547908, - "loss": 0.9377, + "epoch": 0.12966894256698605, + "grad_norm": 0.25, + "learning_rate": 0.00019946440272950716, + "loss": 0.8834, "step": 2260 }, { - "epoch": 0.06498070661139242, - "grad_norm": 0.376953125, - "learning_rate": 0.00012994836488812393, - "loss": 1.0749, + "epoch": 0.12995582075850595, + "grad_norm": 0.248046875, + "learning_rate": 0.00019945400240222396, + "loss": 1.0765, "step": 2265 }, { - "epoch": 0.06512415187985024, - "grad_norm": 0.322265625, - "learning_rate": 0.00013023522662076878, - "loss": 1.0307, + "epoch": 0.13024269895002583, + "grad_norm": 0.25390625, + "learning_rate": 0.00019944350234224416, + "loss": 0.9275, "step": 2270 }, { - "epoch": 0.06526759714830806, - "grad_norm": 0.359375, - "learning_rate": 0.00013052208835341366, - "loss": 1.0039, + "epoch": 0.1305295771415457, + "grad_norm": 0.25, + "learning_rate": 0.0001994329025600972, + "loss": 0.938, "step": 2275 }, { - "epoch": 0.06541104241676589, - "grad_norm": 0.3203125, - "learning_rate": 0.00013080895008605854, - "loss": 0.8882, + "epoch": 0.13081645533306557, + "grad_norm": 0.263671875, + "learning_rate": 0.00019942220306641258, + "loss": 1.0348, "step": 2280 }, { - "epoch": 0.0655544876852237, - "grad_norm": 0.322265625, - "learning_rate": 0.0001310958118187034, - "loss": 1.1298, + "epoch": 0.13110333352458547, + "grad_norm": 0.24609375, + "learning_rate": 0.0001994114038719198, + "loss": 0.9627, "step": 2285 }, { - "epoch": 0.06569793295368152, - "grad_norm": 0.3203125, - "learning_rate": 0.00013138267355134825, - "loss": 0.895, + "epoch": 0.13139021171610535, + "grad_norm": 0.25390625, + "learning_rate": 0.00019940050498744828, + "loss": 0.9749, "step": 2290 }, { - "epoch": 0.06584137822213934, - "grad_norm": 0.3359375, - "learning_rate": 0.00013166953528399313, - "loss": 0.9949, + "epoch": 0.13167708990762522, + "grad_norm": 0.2431640625, + "learning_rate": 0.00019938950642392746, + "loss": 0.8878, "step": 2295 }, { - "epoch": 0.06598482349059716, - "grad_norm": 0.333984375, - "learning_rate": 0.00013195639701663798, - "loss": 1.034, + "epoch": 0.1319639680991451, + "grad_norm": 0.28125, + "learning_rate": 0.00019937840819238677, + "loss": 1.0061, "step": 2300 }, { - "epoch": 0.06612826875905499, - "grad_norm": 0.322265625, - "learning_rate": 0.00013224325874928284, - "loss": 0.9706, + "epoch": 0.132250846290665, + "grad_norm": 0.267578125, + "learning_rate": 0.00019936721030395547, + "loss": 0.9872, "step": 2305 }, { - "epoch": 0.0662717140275128, - "grad_norm": 0.326171875, - "learning_rate": 0.00013253012048192772, - "loss": 1.0131, + "epoch": 0.13253772448218487, + "grad_norm": 0.2578125, + "learning_rate": 0.00019935591276986286, + "loss": 1.0265, "step": 2310 }, { - "epoch": 0.06641515929597062, - "grad_norm": 0.326171875, - "learning_rate": 0.00013281698221457257, - "loss": 1.0511, + "epoch": 0.13282460267370474, + "grad_norm": 0.29296875, + "learning_rate": 0.00019934451560143815, + "loss": 1.0217, "step": 2315 }, { - "epoch": 0.06655860456442844, - "grad_norm": 0.3359375, - "learning_rate": 0.00013310384394721745, - "loss": 0.9692, + "epoch": 0.13311148086522462, + "grad_norm": 0.2421875, + "learning_rate": 0.0001993330188101104, + "loss": 0.9342, "step": 2320 }, { - "epoch": 0.06670204983288626, - "grad_norm": 0.33203125, - "learning_rate": 0.0001333907056798623, - "loss": 0.9917, + "epoch": 0.13339835905674452, + "grad_norm": 0.275390625, + "learning_rate": 0.00019932142240740866, + "loss": 0.9407, "step": 2325 }, { - "epoch": 0.06684549510134408, - "grad_norm": 0.32421875, - "learning_rate": 0.00013367756741250719, - "loss": 0.9834, + "epoch": 0.1336852372482644, + "grad_norm": 0.251953125, + "learning_rate": 0.0001993097264049618, + "loss": 0.9405, "step": 2330 }, { - "epoch": 0.06698894036980191, - "grad_norm": 0.359375, - "learning_rate": 0.00013396442914515204, - "loss": 1.0089, + "epoch": 0.13397211543978427, + "grad_norm": 0.25390625, + "learning_rate": 0.0001992979308144986, + "loss": 0.971, "step": 2335 }, { - "epoch": 0.06713238563825973, - "grad_norm": 0.33203125, - "learning_rate": 0.00013425129087779692, - "loss": 0.9125, + "epoch": 0.13425899363130414, + "grad_norm": 0.283203125, + "learning_rate": 0.00019928603564784773, + "loss": 1.0423, "step": 2340 }, { - "epoch": 0.06727583090671754, - "grad_norm": 0.3515625, - "learning_rate": 0.00013453815261044177, - "loss": 1.015, + "epoch": 0.13454587182282404, + "grad_norm": 0.283203125, + "learning_rate": 0.0001992740409169377, + "loss": 1.0122, "step": 2345 }, { - "epoch": 0.06741927617517536, - "grad_norm": 0.310546875, - "learning_rate": 0.00013482501434308663, - "loss": 0.9273, + "epoch": 0.13483275001434392, + "grad_norm": 0.255859375, + "learning_rate": 0.00019926194663379677, + "loss": 0.9608, "step": 2350 }, { - "epoch": 0.06756272144363318, - "grad_norm": 0.2890625, - "learning_rate": 0.0001351118760757315, - "loss": 0.9397, + "epoch": 0.1351196282058638, + "grad_norm": 0.2470703125, + "learning_rate": 0.00019924975281055324, + "loss": 0.905, "step": 2355 }, { - "epoch": 0.067706166712091, - "grad_norm": 0.34765625, - "learning_rate": 0.00013539873780837639, - "loss": 0.9253, + "epoch": 0.13540650639738366, + "grad_norm": 0.2578125, + "learning_rate": 0.00019923745945943502, + "loss": 0.9108, "step": 2360 }, { - "epoch": 0.06784961198054883, - "grad_norm": 0.37109375, - "learning_rate": 0.0001356855995410212, - "loss": 0.8941, + "epoch": 0.13569338458890357, + "grad_norm": 0.2412109375, + "learning_rate": 0.0001992250665927699, + "loss": 0.9587, "step": 2365 }, { - "epoch": 0.06799305724900664, - "grad_norm": 0.306640625, - "learning_rate": 0.0001359724612736661, - "loss": 1.0299, + "epoch": 0.13598026278042344, + "grad_norm": 0.2734375, + "learning_rate": 0.0001992125742229855, + "loss": 0.9459, "step": 2370 }, { - "epoch": 0.06813650251746446, - "grad_norm": 0.318359375, - "learning_rate": 0.00013625932300631097, - "loss": 0.9298, + "epoch": 0.1362671409719433, + "grad_norm": 0.2578125, + "learning_rate": 0.00019919998236260923, + "loss": 1.0325, "step": 2375 }, { - "epoch": 0.06827994778592228, - "grad_norm": 0.3125, - "learning_rate": 0.00013654618473895585, - "loss": 0.9392, + "epoch": 0.1365540191634632, + "grad_norm": 0.279296875, + "learning_rate": 0.00019918729102426816, + "loss": 1.0031, "step": 2380 }, { - "epoch": 0.0684233930543801, - "grad_norm": 0.3359375, - "learning_rate": 0.00013683304647160068, - "loss": 1.0448, + "epoch": 0.1368408973549831, + "grad_norm": 0.251953125, + "learning_rate": 0.00019917450022068927, + "loss": 0.9159, "step": 2385 }, { - "epoch": 0.06856683832283793, - "grad_norm": 0.31640625, - "learning_rate": 0.00013711990820424556, - "loss": 0.9832, + "epoch": 0.13712777554650296, + "grad_norm": 0.259765625, + "learning_rate": 0.00019916160996469914, + "loss": 0.9884, "step": 2390 }, { - "epoch": 0.06871028359129575, - "grad_norm": 0.34765625, - "learning_rate": 0.00013740676993689044, - "loss": 1.0194, + "epoch": 0.13741465373802284, + "grad_norm": 0.251953125, + "learning_rate": 0.0001991486202692242, + "loss": 0.9934, "step": 2395 }, { - "epoch": 0.06885372885975356, - "grad_norm": 0.318359375, - "learning_rate": 0.0001376936316695353, - "loss": 0.9005, + "epoch": 0.1377015319295427, + "grad_norm": 0.255859375, + "learning_rate": 0.00019913553114729053, + "loss": 0.9287, "step": 2400 }, { - "epoch": 0.06899717412821138, - "grad_norm": 0.287109375, - "learning_rate": 0.00013798049340218015, - "loss": 0.9715, + "epoch": 0.1379884101210626, + "grad_norm": 0.2578125, + "learning_rate": 0.0001991223426120239, + "loss": 0.955, "step": 2405 }, { - "epoch": 0.0691406193966692, - "grad_norm": 0.310546875, - "learning_rate": 0.00013826735513482503, - "loss": 0.9321, + "epoch": 0.13827528831258248, + "grad_norm": 0.259765625, + "learning_rate": 0.00019910905467664987, + "loss": 0.9791, "step": 2410 }, { - "epoch": 0.06928406466512702, - "grad_norm": 0.3046875, - "learning_rate": 0.00013855421686746988, - "loss": 0.8618, + "epoch": 0.13856216650410236, + "grad_norm": 0.2578125, + "learning_rate": 0.00019909566735449354, + "loss": 1.0237, "step": 2415 }, { - "epoch": 0.06942750993358485, - "grad_norm": 0.361328125, - "learning_rate": 0.00013884107860011476, - "loss": 0.9865, + "epoch": 0.13884904469562223, + "grad_norm": 0.240234375, + "learning_rate": 0.00019908218065897978, + "loss": 0.9546, "step": 2420 }, { - "epoch": 0.06957095520204266, - "grad_norm": 0.326171875, - "learning_rate": 0.0001391279403327596, - "loss": 0.9867, + "epoch": 0.13913592288714213, + "grad_norm": 0.244140625, + "learning_rate": 0.00019906859460363307, + "loss": 0.8988, "step": 2425 }, { - "epoch": 0.06971440047050048, - "grad_norm": 0.3359375, - "learning_rate": 0.00013941480206540447, - "loss": 0.9308, + "epoch": 0.139422801078662, + "grad_norm": 0.25, + "learning_rate": 0.00019905490920207755, + "loss": 0.9675, "step": 2430 }, { - "epoch": 0.0698578457389583, - "grad_norm": 0.33203125, - "learning_rate": 0.00013970166379804935, - "loss": 1.0243, + "epoch": 0.13970967927018188, + "grad_norm": 0.232421875, + "learning_rate": 0.00019904112446803699, + "loss": 0.9773, "step": 2435 }, { - "epoch": 0.07000129100741612, - "grad_norm": 0.310546875, - "learning_rate": 0.00013998852553069423, - "loss": 0.9256, + "epoch": 0.13999655746170175, + "grad_norm": 0.259765625, + "learning_rate": 0.0001990272404153347, + "loss": 1.053, "step": 2440 }, { - "epoch": 0.07014473627587393, - "grad_norm": 0.328125, - "learning_rate": 0.00014027538726333908, - "loss": 1.0127, + "epoch": 0.14028343565322166, + "grad_norm": 0.255859375, + "learning_rate": 0.00019901325705789366, + "loss": 0.9634, "step": 2445 }, { - "epoch": 0.07028818154433177, - "grad_norm": 0.328125, - "learning_rate": 0.00014056224899598393, - "loss": 0.8317, + "epoch": 0.14057031384474153, + "grad_norm": 0.251953125, + "learning_rate": 0.0001989991744097364, + "loss": 1.0375, "step": 2450 }, { - "epoch": 0.07043162681278958, - "grad_norm": 0.318359375, - "learning_rate": 0.00014084911072862881, - "loss": 0.9878, + "epoch": 0.1408571920362614, + "grad_norm": 0.2421875, + "learning_rate": 0.0001989849924849851, + "loss": 1.0092, "step": 2455 }, { - "epoch": 0.0705750720812474, - "grad_norm": 0.3203125, - "learning_rate": 0.00014113597246127367, - "loss": 0.9428, + "epoch": 0.14114407022778128, + "grad_norm": 0.259765625, + "learning_rate": 0.00019897071129786132, + "loss": 1.0077, "step": 2460 }, { - "epoch": 0.07071851734970522, - "grad_norm": 0.345703125, - "learning_rate": 0.00014142283419391852, - "loss": 0.9409, + "epoch": 0.14143094841930118, + "grad_norm": 0.24609375, + "learning_rate": 0.00019895633086268637, + "loss": 1.0083, "step": 2465 }, { - "epoch": 0.07086196261816304, - "grad_norm": 0.337890625, - "learning_rate": 0.0001417096959265634, - "loss": 0.9309, + "epoch": 0.14171782661082105, + "grad_norm": 0.26171875, + "learning_rate": 0.0001989418511938809, + "loss": 0.9727, "step": 2470 }, { - "epoch": 0.07100540788662087, - "grad_norm": 0.328125, - "learning_rate": 0.00014199655765920828, - "loss": 0.9569, + "epoch": 0.14200470480234093, + "grad_norm": 0.265625, + "learning_rate": 0.00019892727230596519, + "loss": 1.0221, "step": 2475 }, { - "epoch": 0.07114885315507868, - "grad_norm": 0.31640625, - "learning_rate": 0.00014228341939185313, - "loss": 0.9594, + "epoch": 0.1422915829938608, + "grad_norm": 0.251953125, + "learning_rate": 0.00019891259421355895, + "loss": 0.9697, "step": 2480 }, { - "epoch": 0.0712922984235365, - "grad_norm": 0.345703125, - "learning_rate": 0.000142570281124498, - "loss": 0.9251, + "epoch": 0.1425784611853807, + "grad_norm": 0.2451171875, + "learning_rate": 0.0001988978169313815, + "loss": 0.9837, "step": 2485 }, { - "epoch": 0.07143574369199432, - "grad_norm": 0.3125, - "learning_rate": 0.00014285714285714287, - "loss": 0.9571, + "epoch": 0.14286533937690057, + "grad_norm": 0.2421875, + "learning_rate": 0.00019888294047425143, + "loss": 1.0065, "step": 2490 }, { - "epoch": 0.07157918896045214, - "grad_norm": 0.353515625, - "learning_rate": 0.00014314400458978775, - "loss": 1.0072, + "epoch": 0.14315221756842045, + "grad_norm": 0.255859375, + "learning_rate": 0.00019886796485708692, + "loss": 0.9125, "step": 2495 }, { - "epoch": 0.07172263422890995, - "grad_norm": 0.32421875, - "learning_rate": 0.0001434308663224326, - "loss": 1.0424, + "epoch": 0.14343909575994032, + "grad_norm": 0.263671875, + "learning_rate": 0.00019885289009490556, + "loss": 1.0038, "step": 2500 }, { - "epoch": 0.07186607949736779, - "grad_norm": 0.310546875, - "learning_rate": 0.00014371772805507745, - "loss": 1.0232, + "epoch": 0.14372597395146022, + "grad_norm": 0.25390625, + "learning_rate": 0.0001988377162028243, + "loss": 0.926, "step": 2505 }, { - "epoch": 0.0720095247658256, - "grad_norm": 0.34765625, - "learning_rate": 0.00014400458978772233, - "loss": 0.9676, + "epoch": 0.1440128521429801, + "grad_norm": 0.263671875, + "learning_rate": 0.00019882244319605966, + "loss": 0.9902, "step": 2510 }, { - "epoch": 0.07215297003428342, - "grad_norm": 0.326171875, - "learning_rate": 0.0001442914515203672, - "loss": 1.0152, + "epoch": 0.14429973033449997, + "grad_norm": 0.2578125, + "learning_rate": 0.00019880707108992738, + "loss": 1.0494, "step": 2515 }, { - "epoch": 0.07229641530274124, - "grad_norm": 0.3203125, - "learning_rate": 0.00014457831325301204, - "loss": 0.9446, + "epoch": 0.14458660852601984, + "grad_norm": 0.2890625, + "learning_rate": 0.0001987915998998426, + "loss": 0.9435, "step": 2520 }, { - "epoch": 0.07243986057119906, - "grad_norm": 0.28125, - "learning_rate": 0.00014486517498565692, - "loss": 0.9264, + "epoch": 0.14487348671753975, + "grad_norm": 0.251953125, + "learning_rate": 0.00019877602964131995, + "loss": 0.9293, "step": 2525 }, { - "epoch": 0.07258330583965687, - "grad_norm": 0.330078125, - "learning_rate": 0.00014515203671830177, - "loss": 1.1308, + "epoch": 0.14516036490905962, + "grad_norm": 0.2734375, + "learning_rate": 0.0001987603603299733, + "loss": 0.9658, "step": 2530 }, { - "epoch": 0.0727267511081147, - "grad_norm": 0.318359375, - "learning_rate": 0.00014543889845094666, - "loss": 0.9983, + "epoch": 0.1454472431005795, + "grad_norm": 0.26171875, + "learning_rate": 0.00019874459198151583, + "loss": 0.9757, "step": 2535 }, { - "epoch": 0.07287019637657252, - "grad_norm": 0.322265625, - "learning_rate": 0.0001457257601835915, - "loss": 0.8992, + "epoch": 0.14573412129209937, + "grad_norm": 0.2470703125, + "learning_rate": 0.0001987287246117601, + "loss": 1.0029, "step": 2540 }, { - "epoch": 0.07301364164503034, - "grad_norm": 0.296875, - "learning_rate": 0.0001460126219162364, - "loss": 0.9443, + "epoch": 0.14602099948361927, + "grad_norm": 0.279296875, + "learning_rate": 0.00019871275823661795, + "loss": 0.9861, "step": 2545 }, { - "epoch": 0.07315708691348816, - "grad_norm": 0.31640625, - "learning_rate": 0.00014629948364888124, - "loss": 0.9898, + "epoch": 0.14630787767513914, + "grad_norm": 0.25, + "learning_rate": 0.00019869669287210046, + "loss": 0.9809, "step": 2550 }, { - "epoch": 0.07330053218194597, - "grad_norm": 0.3125, - "learning_rate": 0.00014658634538152612, - "loss": 1.0424, + "epoch": 0.14659475586665902, + "grad_norm": 0.255859375, + "learning_rate": 0.00019868052853431808, + "loss": 0.9763, "step": 2555 }, { - "epoch": 0.0734439774504038, - "grad_norm": 0.3359375, - "learning_rate": 0.00014687320711417098, - "loss": 1.0227, + "epoch": 0.1468816340581789, + "grad_norm": 0.2578125, + "learning_rate": 0.00019866426523948037, + "loss": 0.9973, "step": 2560 }, { - "epoch": 0.07358742271886162, - "grad_norm": 0.337890625, - "learning_rate": 0.00014716006884681583, - "loss": 1.0059, + "epoch": 0.1471685122496988, + "grad_norm": 0.275390625, + "learning_rate": 0.00019864790300389625, + "loss": 1.0361, "step": 2565 }, { - "epoch": 0.07373086798731944, - "grad_norm": 0.333984375, - "learning_rate": 0.0001474469305794607, - "loss": 1.0145, + "epoch": 0.14745539044121866, + "grad_norm": 0.2490234375, + "learning_rate": 0.00019863144184397376, + "loss": 0.9603, "step": 2570 }, { - "epoch": 0.07387431325577726, - "grad_norm": 0.341796875, - "learning_rate": 0.0001477337923121056, - "loss": 0.9633, + "epoch": 0.14774226863273854, + "grad_norm": 0.2734375, + "learning_rate": 0.0001986148817762203, + "loss": 0.9825, "step": 2575 }, { - "epoch": 0.07401775852423507, - "grad_norm": 0.30859375, - "learning_rate": 0.00014802065404475042, - "loss": 0.9183, + "epoch": 0.1480291468242584, + "grad_norm": 0.255859375, + "learning_rate": 0.0001985982228172422, + "loss": 1.0327, "step": 2580 }, { - "epoch": 0.07416120379269289, - "grad_norm": 0.330078125, - "learning_rate": 0.0001483075157773953, - "loss": 1.0152, + "epoch": 0.1483160250157783, + "grad_norm": 0.26953125, + "learning_rate": 0.0001985814649837452, + "loss": 1.0019, "step": 2585 }, { - "epoch": 0.07430464906115072, - "grad_norm": 0.310546875, - "learning_rate": 0.00014859437751004018, - "loss": 1.0492, + "epoch": 0.1486029032072982, + "grad_norm": 0.255859375, + "learning_rate": 0.000198564608292534, + "loss": 0.9411, "step": 2590 }, { - "epoch": 0.07444809432960854, - "grad_norm": 0.32421875, - "learning_rate": 0.00014888123924268503, - "loss": 0.8989, + "epoch": 0.14888978139881806, + "grad_norm": 0.25, + "learning_rate": 0.00019854765276051264, + "loss": 0.9216, "step": 2595 }, { - "epoch": 0.07459153959806636, - "grad_norm": 0.33984375, - "learning_rate": 0.00014916810097532988, - "loss": 1.0085, + "epoch": 0.14917665959033793, + "grad_norm": 0.271484375, + "learning_rate": 0.00019853059840468408, + "loss": 0.9562, "step": 2600 }, { - "epoch": 0.07473498486652418, - "grad_norm": 0.326171875, - "learning_rate": 0.00014945496270797476, - "loss": 0.9005, + "epoch": 0.14946353778185784, + "grad_norm": 0.265625, + "learning_rate": 0.0001985134452421505, + "loss": 1.0468, "step": 2605 }, { - "epoch": 0.074878430134982, - "grad_norm": 0.36328125, - "learning_rate": 0.00014974182444061964, - "loss": 1.0341, + "epoch": 0.1497504159733777, + "grad_norm": 0.265625, + "learning_rate": 0.00019849619329011315, + "loss": 1.0086, "step": 2610 }, { - "epoch": 0.07502187540343981, - "grad_norm": 0.349609375, - "learning_rate": 0.0001500286861732645, - "loss": 0.9141, + "epoch": 0.15003729416489758, + "grad_norm": 0.267578125, + "learning_rate": 0.0001984788425658723, + "loss": 1.0496, "step": 2615 }, { - "epoch": 0.07516532067189764, - "grad_norm": 0.31640625, - "learning_rate": 0.00015031554790590935, - "loss": 0.9424, + "epoch": 0.15032417235641746, + "grad_norm": 0.2734375, + "learning_rate": 0.00019846139308682729, + "loss": 1.0036, "step": 2620 }, { - "epoch": 0.07530876594035546, - "grad_norm": 0.318359375, - "learning_rate": 0.00015060240963855423, - "loss": 0.924, + "epoch": 0.15061105054793736, + "grad_norm": 0.267578125, + "learning_rate": 0.0001984438448704765, + "loss": 0.9403, "step": 2625 }, { - "epoch": 0.07545221120881328, - "grad_norm": 0.31640625, - "learning_rate": 0.00015088927137119908, - "loss": 0.9497, + "epoch": 0.15089792873945723, + "grad_norm": 0.2421875, + "learning_rate": 0.0001984261979344173, + "loss": 0.9519, "step": 2630 }, { - "epoch": 0.0755956564772711, - "grad_norm": 0.345703125, - "learning_rate": 0.00015117613310384396, - "loss": 0.93, + "epoch": 0.1511848069309771, + "grad_norm": 0.236328125, + "learning_rate": 0.00019840845229634612, + "loss": 0.9608, "step": 2635 }, { - "epoch": 0.07573910174572891, - "grad_norm": 0.328125, - "learning_rate": 0.00015146299483648882, - "loss": 0.9559, + "epoch": 0.15147168512249698, + "grad_norm": 0.27734375, + "learning_rate": 0.00019839060797405833, + "loss": 1.0074, "step": 2640 }, { - "epoch": 0.07588254701418674, - "grad_norm": 0.318359375, - "learning_rate": 0.00015174985656913367, - "loss": 0.8921, + "epoch": 0.15175856331401688, + "grad_norm": 0.267578125, + "learning_rate": 0.0001983726649854482, + "loss": 0.9188, "step": 2645 }, { - "epoch": 0.07602599228264456, - "grad_norm": 0.330078125, - "learning_rate": 0.00015203671830177855, - "loss": 1.0818, + "epoch": 0.15204544150553675, + "grad_norm": 0.25390625, + "learning_rate": 0.0001983546233485091, + "loss": 1.0233, "step": 2650 }, { - "epoch": 0.07616943755110238, - "grad_norm": 0.3046875, - "learning_rate": 0.00015232358003442343, - "loss": 0.9721, + "epoch": 0.15233231969705663, + "grad_norm": 0.287109375, + "learning_rate": 0.0001983364830813331, + "loss": 1.0333, "step": 2655 }, { - "epoch": 0.0763128828195602, - "grad_norm": 0.326171875, - "learning_rate": 0.00015261044176706828, - "loss": 1.0057, + "epoch": 0.1526191978885765, + "grad_norm": 0.25, + "learning_rate": 0.00019831824420211137, + "loss": 0.993, "step": 2660 }, { - "epoch": 0.07645632808801801, - "grad_norm": 0.32421875, - "learning_rate": 0.00015289730349971314, - "loss": 0.903, + "epoch": 0.1529060760800964, + "grad_norm": 0.271484375, + "learning_rate": 0.00019829990672913387, + "loss": 0.9822, "step": 2665 }, { - "epoch": 0.07659977335647583, - "grad_norm": 0.3203125, - "learning_rate": 0.00015318416523235802, - "loss": 0.9811, + "epoch": 0.15319295427161628, + "grad_norm": 0.2578125, + "learning_rate": 0.0001982814706807895, + "loss": 0.962, "step": 2670 }, { - "epoch": 0.07674321862493366, - "grad_norm": 0.345703125, - "learning_rate": 0.00015347102696500287, - "loss": 1.0887, + "epoch": 0.15347983246313615, + "grad_norm": 0.267578125, + "learning_rate": 0.00019826293607556593, + "loss": 0.9029, "step": 2675 }, { - "epoch": 0.07688666389339148, - "grad_norm": 0.337890625, - "learning_rate": 0.00015375788869764772, - "loss": 0.8715, + "epoch": 0.15376671065465602, + "grad_norm": 0.259765625, + "learning_rate": 0.00019824430293204973, + "loss": 0.922, "step": 2680 }, { - "epoch": 0.0770301091618493, - "grad_norm": 0.349609375, - "learning_rate": 0.0001540447504302926, - "loss": 0.9749, + "epoch": 0.15405358884617593, + "grad_norm": 0.251953125, + "learning_rate": 0.00019822557126892627, + "loss": 0.9965, "step": 2685 }, { - "epoch": 0.07717355443030711, - "grad_norm": 0.330078125, - "learning_rate": 0.00015433161216293748, - "loss": 1.0808, + "epoch": 0.1543404670376958, + "grad_norm": 0.259765625, + "learning_rate": 0.00019820674110497966, + "loss": 0.927, "step": 2690 }, { - "epoch": 0.07731699969876493, - "grad_norm": 0.390625, - "learning_rate": 0.00015461847389558234, - "loss": 0.9457, + "epoch": 0.15462734522921567, + "grad_norm": 0.2578125, + "learning_rate": 0.00019818781245909285, + "loss": 0.9637, "step": 2695 }, { - "epoch": 0.07746044496722275, - "grad_norm": 0.28515625, - "learning_rate": 0.0001549053356282272, - "loss": 0.9669, + "epoch": 0.15491422342073555, + "grad_norm": 0.263671875, + "learning_rate": 0.00019816878535024754, + "loss": 1.0141, "step": 2700 }, { - "epoch": 0.07760389023568058, - "grad_norm": 0.330078125, - "learning_rate": 0.00015519219736087207, - "loss": 0.9614, + "epoch": 0.15520110161225545, + "grad_norm": 0.2578125, + "learning_rate": 0.00019814965979752413, + "loss": 1.0148, "step": 2705 }, { - "epoch": 0.0777473355041384, - "grad_norm": 0.357421875, - "learning_rate": 0.00015547905909351695, - "loss": 1.011, + "epoch": 0.15548797980377532, + "grad_norm": 0.251953125, + "learning_rate": 0.0001981304358201018, + "loss": 1.0374, "step": 2710 }, { - "epoch": 0.07789078077259622, - "grad_norm": 0.33203125, - "learning_rate": 0.0001557659208261618, - "loss": 0.9804, + "epoch": 0.1557748579952952, + "grad_norm": 0.24609375, + "learning_rate": 0.00019811111343725842, + "loss": 0.9639, "step": 2715 }, { - "epoch": 0.07803422604105403, - "grad_norm": 0.32421875, - "learning_rate": 0.00015605278255880666, - "loss": 0.9013, + "epoch": 0.15606173618681507, + "grad_norm": 0.267578125, + "learning_rate": 0.00019809169266837043, + "loss": 0.9544, "step": 2720 }, { - "epoch": 0.07817767130951185, - "grad_norm": 0.349609375, - "learning_rate": 0.00015633964429145154, - "loss": 1.0235, + "epoch": 0.15634861437833497, + "grad_norm": 0.263671875, + "learning_rate": 0.0001980721735329131, + "loss": 0.9553, "step": 2725 }, { - "epoch": 0.07832111657796968, - "grad_norm": 0.330078125, - "learning_rate": 0.0001566265060240964, - "loss": 0.9973, + "epoch": 0.15663549256985485, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001980525560504602, + "loss": 0.9162, "step": 2730 }, { - "epoch": 0.0784645618464275, - "grad_norm": 0.314453125, - "learning_rate": 0.00015691336775674127, - "loss": 0.943, + "epoch": 0.15692237076137472, + "grad_norm": 0.26171875, + "learning_rate": 0.00019803284024068427, + "loss": 0.997, "step": 2735 }, { - "epoch": 0.07860800711488532, - "grad_norm": 0.3125, - "learning_rate": 0.00015720022948938613, - "loss": 0.9324, + "epoch": 0.1572092489528946, + "grad_norm": 0.232421875, + "learning_rate": 0.00019801302612335628, + "loss": 0.9371, "step": 2740 }, { - "epoch": 0.07875145238334313, - "grad_norm": 0.361328125, - "learning_rate": 0.00015748709122203098, - "loss": 1.0497, + "epoch": 0.1574961271444145, + "grad_norm": 0.26953125, + "learning_rate": 0.00019799311371834595, + "loss": 0.9677, "step": 2745 }, { - "epoch": 0.07889489765180095, - "grad_norm": 0.322265625, - "learning_rate": 0.00015777395295467586, - "loss": 0.8832, + "epoch": 0.15778300533593437, + "grad_norm": 0.27734375, + "learning_rate": 0.00019797310304562143, + "loss": 0.9503, "step": 2750 }, { - "epoch": 0.07903834292025877, - "grad_norm": 0.326171875, - "learning_rate": 0.0001580608146873207, - "loss": 1.0262, + "epoch": 0.15806988352745424, + "grad_norm": 0.24609375, + "learning_rate": 0.00019795299412524945, + "loss": 1.0382, "step": 2755 }, { - "epoch": 0.0791817881887166, - "grad_norm": 0.388671875, - "learning_rate": 0.00015834767641996557, - "loss": 0.9918, + "epoch": 0.15835676171897412, + "grad_norm": 0.25, + "learning_rate": 0.00019793278697739533, + "loss": 0.9606, "step": 2760 }, { - "epoch": 0.07932523345717442, - "grad_norm": 0.306640625, - "learning_rate": 0.00015863453815261045, - "loss": 0.9259, + "epoch": 0.15864363991049402, + "grad_norm": 0.251953125, + "learning_rate": 0.00019791248162232285, + "loss": 0.9368, "step": 2765 }, { - "epoch": 0.07946867872563224, - "grad_norm": 0.39453125, - "learning_rate": 0.00015892139988525533, - "loss": 0.9763, + "epoch": 0.1589305181020139, + "grad_norm": 0.26171875, + "learning_rate": 0.00019789207808039425, + "loss": 0.9581, "step": 2770 }, { - "epoch": 0.07961212399409005, - "grad_norm": 0.302734375, - "learning_rate": 0.00015920826161790018, - "loss": 0.9606, + "epoch": 0.15921739629353376, + "grad_norm": 0.279296875, + "learning_rate": 0.0001978715763720702, + "loss": 1.0179, "step": 2775 }, { - "epoch": 0.07975556926254787, - "grad_norm": 0.345703125, - "learning_rate": 0.00015949512335054503, - "loss": 0.9481, + "epoch": 0.15950427448505364, + "grad_norm": 0.259765625, + "learning_rate": 0.00019785097651790992, + "loss": 0.9556, "step": 2780 }, { - "epoch": 0.07989901453100569, - "grad_norm": 0.33203125, - "learning_rate": 0.0001597819850831899, - "loss": 0.9916, + "epoch": 0.15979115267657354, + "grad_norm": 0.255859375, + "learning_rate": 0.00019783027853857097, + "loss": 1.0694, "step": 2785 }, { - "epoch": 0.08004245979946352, - "grad_norm": 0.306640625, - "learning_rate": 0.0001600688468158348, - "loss": 0.9426, + "epoch": 0.1600780308680934, + "grad_norm": 0.251953125, + "learning_rate": 0.00019780948245480933, + "loss": 1.0073, "step": 2790 }, { - "epoch": 0.08018590506792134, - "grad_norm": 0.326171875, - "learning_rate": 0.00016035570854847965, - "loss": 0.9092, + "epoch": 0.1603649090596133, + "grad_norm": 0.267578125, + "learning_rate": 0.00019778858828747934, + "loss": 0.9729, "step": 2795 }, { - "epoch": 0.08032935033637915, - "grad_norm": 0.33984375, - "learning_rate": 0.0001606425702811245, - "loss": 0.9592, + "epoch": 0.16065178725113316, + "grad_norm": 0.2490234375, + "learning_rate": 0.00019776759605753377, + "loss": 1.0349, "step": 2800 }, { - "epoch": 0.08047279560483697, - "grad_norm": 0.328125, - "learning_rate": 0.00016092943201376938, - "loss": 1.067, + "epoch": 0.16093866544265306, + "grad_norm": 0.248046875, + "learning_rate": 0.0001977465057860236, + "loss": 0.9904, "step": 2805 }, { - "epoch": 0.08061624087329479, - "grad_norm": 0.337890625, - "learning_rate": 0.00016121629374641423, - "loss": 1.0768, + "epoch": 0.16122554363417294, + "grad_norm": 0.2490234375, + "learning_rate": 0.00019772531749409828, + "loss": 0.9545, "step": 2810 }, { - "epoch": 0.08075968614175262, - "grad_norm": 0.333984375, - "learning_rate": 0.00016150315547905909, - "loss": 1.0366, + "epoch": 0.1615124218256928, + "grad_norm": 0.271484375, + "learning_rate": 0.0001977040312030054, + "loss": 0.9768, "step": 2815 }, { - "epoch": 0.08090313141021044, - "grad_norm": 0.345703125, - "learning_rate": 0.00016179001721170397, - "loss": 1.0552, + "epoch": 0.16179930001721268, + "grad_norm": 0.26953125, + "learning_rate": 0.00019768264693409098, + "loss": 0.9905, "step": 2820 }, { - "epoch": 0.08104657667866826, - "grad_norm": 0.3203125, - "learning_rate": 0.00016207687894434885, - "loss": 0.9732, + "epoch": 0.16208617820873258, + "grad_norm": 0.271484375, + "learning_rate": 0.00019766116470879913, + "loss": 0.9736, "step": 2825 }, { - "epoch": 0.08119002194712607, - "grad_norm": 0.333984375, - "learning_rate": 0.0001623637406769937, - "loss": 1.0063, + "epoch": 0.16237305640025246, + "grad_norm": 0.263671875, + "learning_rate": 0.00019763958454867235, + "loss": 1.0114, "step": 2830 }, { - "epoch": 0.08133346721558389, - "grad_norm": 0.34765625, - "learning_rate": 0.00016265060240963855, - "loss": 1.0361, + "epoch": 0.16265993459177233, + "grad_norm": 0.24609375, + "learning_rate": 0.00019761790647535124, + "loss": 0.9847, "step": 2835 }, { - "epoch": 0.08147691248404171, - "grad_norm": 0.33984375, - "learning_rate": 0.00016293746414228343, - "loss": 0.9658, + "epoch": 0.1629468127832922, + "grad_norm": 0.28515625, + "learning_rate": 0.00019759613051057462, + "loss": 0.959, "step": 2840 }, { - "epoch": 0.08162035775249954, - "grad_norm": 0.333984375, - "learning_rate": 0.0001632243258749283, - "loss": 0.8923, + "epoch": 0.1632336909748121, + "grad_norm": 0.265625, + "learning_rate": 0.00019757425667617945, + "loss": 0.93, "step": 2845 }, { - "epoch": 0.08176380302095736, - "grad_norm": 0.33203125, - "learning_rate": 0.00016351118760757317, - "loss": 1.0203, + "epoch": 0.16352056916633198, + "grad_norm": 0.263671875, + "learning_rate": 0.00019755228499410092, + "loss": 1.0175, "step": 2850 }, { - "epoch": 0.08190724828941517, - "grad_norm": 0.337890625, - "learning_rate": 0.00016379804934021802, - "loss": 0.9931, + "epoch": 0.16380744735785185, + "grad_norm": 0.2470703125, + "learning_rate": 0.00019753021548637222, + "loss": 0.9568, "step": 2855 }, { - "epoch": 0.08205069355787299, - "grad_norm": 0.33984375, - "learning_rate": 0.00016408491107286287, - "loss": 1.0928, + "epoch": 0.16409432554937173, + "grad_norm": 0.259765625, + "learning_rate": 0.00019750804817512477, + "loss": 0.9666, "step": 2860 }, { - "epoch": 0.08219413882633081, - "grad_norm": 0.3515625, - "learning_rate": 0.00016437177280550775, - "loss": 0.9558, + "epoch": 0.16438120374089163, + "grad_norm": 0.265625, + "learning_rate": 0.0001974857830825879, + "loss": 0.9816, "step": 2865 }, { - "epoch": 0.08233758409478863, - "grad_norm": 0.326171875, - "learning_rate": 0.00016465863453815263, - "loss": 0.9175, + "epoch": 0.1646680819324115, + "grad_norm": 0.26953125, + "learning_rate": 0.0001974634202310892, + "loss": 0.9241, "step": 2870 }, { - "epoch": 0.08248102936324646, - "grad_norm": 0.33984375, - "learning_rate": 0.00016494549627079746, - "loss": 0.9878, + "epoch": 0.16495496012393138, + "grad_norm": 0.244140625, + "learning_rate": 0.00019744095964305413, + "loss": 0.9971, "step": 2875 }, { - "epoch": 0.08262447463170428, - "grad_norm": 0.3828125, - "learning_rate": 0.00016523235800344234, - "loss": 1.0972, + "epoch": 0.16524183831545125, + "grad_norm": 0.251953125, + "learning_rate": 0.00019741840134100623, + "loss": 0.9578, "step": 2880 }, { - "epoch": 0.08276791990016209, - "grad_norm": 0.337890625, - "learning_rate": 0.00016551921973608722, - "loss": 0.9583, + "epoch": 0.16552871650697115, + "grad_norm": 0.26171875, + "learning_rate": 0.000197395745347567, + "loss": 0.9817, "step": 2885 }, { - "epoch": 0.08291136516861991, - "grad_norm": 0.31640625, - "learning_rate": 0.0001658060814687321, - "loss": 0.9437, + "epoch": 0.16581559469849103, + "grad_norm": 0.255859375, + "learning_rate": 0.00019737299168545597, + "loss": 0.9887, "step": 2890 }, { - "epoch": 0.08305481043707773, - "grad_norm": 0.341796875, - "learning_rate": 0.00016609294320137693, - "loss": 1.0626, + "epoch": 0.1661024728900109, + "grad_norm": 0.27734375, + "learning_rate": 0.00019735014037749053, + "loss": 1.0605, "step": 2895 }, { - "epoch": 0.08319825570553556, - "grad_norm": 0.3203125, - "learning_rate": 0.0001663798049340218, - "loss": 0.936, + "epoch": 0.16638935108153077, + "grad_norm": 0.25, + "learning_rate": 0.000197327191446586, + "loss": 0.9337, "step": 2900 }, { - "epoch": 0.08334170097399338, - "grad_norm": 0.37890625, - "learning_rate": 0.0001666666666666667, - "loss": 1.0241, + "epoch": 0.16667622927305067, + "grad_norm": 0.271484375, + "learning_rate": 0.00019730414491575564, + "loss": 0.876, "step": 2905 }, { - "epoch": 0.0834851462424512, - "grad_norm": 0.328125, - "learning_rate": 0.00016695352839931154, - "loss": 0.9606, + "epoch": 0.16696310746457055, + "grad_norm": 0.296875, + "learning_rate": 0.00019728100080811057, + "loss": 0.9747, "step": 2910 }, { - "epoch": 0.08362859151090901, - "grad_norm": 0.35546875, - "learning_rate": 0.0001672403901319564, - "loss": 0.8437, + "epoch": 0.16724998565609042, + "grad_norm": 0.2490234375, + "learning_rate": 0.00019725775914685977, + "loss": 0.9694, "step": 2915 }, { - "epoch": 0.08377203677936683, - "grad_norm": 0.33203125, - "learning_rate": 0.00016752725186460127, - "loss": 0.9694, + "epoch": 0.1675368638476103, + "grad_norm": 0.271484375, + "learning_rate": 0.00019723441995531, + "loss": 0.955, "step": 2920 }, { - "epoch": 0.08391548204782465, - "grad_norm": 0.333984375, - "learning_rate": 0.00016781411359724613, - "loss": 0.9885, + "epoch": 0.1678237420391302, + "grad_norm": 0.275390625, + "learning_rate": 0.00019721098325686584, + "loss": 0.9547, "step": 2925 }, { - "epoch": 0.08405892731628248, - "grad_norm": 0.369140625, - "learning_rate": 0.000168100975329891, - "loss": 1.0101, + "epoch": 0.16811062023065007, + "grad_norm": 0.255859375, + "learning_rate": 0.0001971874490750297, + "loss": 0.9627, "step": 2930 }, { - "epoch": 0.0842023725847403, - "grad_norm": 0.38671875, - "learning_rate": 0.00016838783706253586, - "loss": 1.022, + "epoch": 0.16839749842216994, + "grad_norm": 0.23828125, + "learning_rate": 0.0001971638174334017, + "loss": 1.0038, "step": 2935 }, { - "epoch": 0.08434581785319811, - "grad_norm": 0.345703125, - "learning_rate": 0.00016867469879518074, - "loss": 1.0473, + "epoch": 0.16868437661368982, + "grad_norm": 0.2578125, + "learning_rate": 0.0001971400883556797, + "loss": 0.9428, "step": 2940 }, { - "epoch": 0.08448926312165593, - "grad_norm": 0.31640625, - "learning_rate": 0.0001689615605278256, - "loss": 1.0305, + "epoch": 0.16897125480520972, + "grad_norm": 0.267578125, + "learning_rate": 0.00019711626186565929, + "loss": 1.0165, "step": 2945 }, { - "epoch": 0.08463270839011375, - "grad_norm": 0.4140625, - "learning_rate": 0.00016924842226047048, - "loss": 1.1452, + "epoch": 0.1692581329967296, + "grad_norm": 0.26953125, + "learning_rate": 0.0001970923379872337, + "loss": 0.9777, "step": 2950 }, { - "epoch": 0.08477615365857157, - "grad_norm": 0.35546875, - "learning_rate": 0.00016953528399311533, - "loss": 1.0105, + "epoch": 0.16954501118824947, + "grad_norm": 0.27734375, + "learning_rate": 0.00019706831674439382, + "loss": 1.0453, "step": 2955 }, { - "epoch": 0.0849195989270294, - "grad_norm": 0.34375, - "learning_rate": 0.00016982214572576018, - "loss": 0.9968, + "epoch": 0.16983188937976934, + "grad_norm": 0.26171875, + "learning_rate": 0.00019704419816122826, + "loss": 0.9889, "step": 2960 }, { - "epoch": 0.08506304419548721, - "grad_norm": 0.326171875, - "learning_rate": 0.00017010900745840506, - "loss": 0.8688, + "epoch": 0.17011876757128924, + "grad_norm": 0.263671875, + "learning_rate": 0.00019701998226192319, + "loss": 0.9816, "step": 2965 }, { - "epoch": 0.08520648946394503, - "grad_norm": 0.3359375, - "learning_rate": 0.00017039586919104992, - "loss": 0.9839, + "epoch": 0.17040564576280912, + "grad_norm": 0.248046875, + "learning_rate": 0.00019699566907076236, + "loss": 0.9419, "step": 2970 }, { - "epoch": 0.08534993473240285, - "grad_norm": 0.3671875, - "learning_rate": 0.00017068273092369477, - "loss": 1.0472, + "epoch": 0.170692523954329, + "grad_norm": 0.271484375, + "learning_rate": 0.00019697125861212707, + "loss": 0.9346, "step": 2975 }, { - "epoch": 0.08549338000086067, - "grad_norm": 0.318359375, - "learning_rate": 0.00017096959265633965, - "loss": 0.8802, + "epoch": 0.17097940214584886, + "grad_norm": 0.259765625, + "learning_rate": 0.00019694675091049617, + "loss": 0.9696, "step": 2980 }, { - "epoch": 0.0856368252693185, - "grad_norm": 0.3828125, - "learning_rate": 0.00017125645438898453, - "loss": 0.9227, + "epoch": 0.17126628033736876, + "grad_norm": 0.25390625, + "learning_rate": 0.0001969221459904461, + "loss": 0.9879, "step": 2985 }, { - "epoch": 0.08578027053777632, - "grad_norm": 0.322265625, - "learning_rate": 0.00017154331612162938, - "loss": 0.8887, + "epoch": 0.17155315852888864, + "grad_norm": 0.28125, + "learning_rate": 0.0001968974438766507, + "loss": 1.0363, "step": 2990 }, { - "epoch": 0.08592371580623413, - "grad_norm": 0.34375, - "learning_rate": 0.00017183017785427424, - "loss": 0.96, + "epoch": 0.1718400367204085, + "grad_norm": 0.259765625, + "learning_rate": 0.0001968726445938813, + "loss": 0.9803, "step": 2995 }, { - "epoch": 0.08606716107469195, - "grad_norm": 0.34765625, - "learning_rate": 0.00017211703958691912, - "loss": 1.0642, + "epoch": 0.17212691491192839, + "grad_norm": 0.259765625, + "learning_rate": 0.00019684774816700664, + "loss": 0.9079, "step": 3000 }, { - "epoch": 0.08621060634314977, - "grad_norm": 0.32421875, - "learning_rate": 0.000172403901319564, - "loss": 0.9955, + "epoch": 0.1724137931034483, + "grad_norm": 0.251953125, + "learning_rate": 0.00019682275462099298, + "loss": 0.9684, "step": 3005 }, { - "epoch": 0.08635405161160759, - "grad_norm": 0.330078125, - "learning_rate": 0.00017269076305220885, - "loss": 1.0419, + "epoch": 0.17270067129496816, + "grad_norm": 0.25, + "learning_rate": 0.00019679766398090383, + "loss": 1.009, "step": 3010 }, { - "epoch": 0.08649749688006542, - "grad_norm": 0.396484375, - "learning_rate": 0.0001729776247848537, - "loss": 1.0248, + "epoch": 0.17298754948648803, + "grad_norm": 0.248046875, + "learning_rate": 0.00019677247627190026, + "loss": 0.9769, "step": 3015 }, { - "epoch": 0.08664094214852323, - "grad_norm": 0.3828125, - "learning_rate": 0.00017326448651749858, - "loss": 1.0789, + "epoch": 0.1732744276780079, + "grad_norm": 0.263671875, + "learning_rate": 0.00019674719151924043, + "loss": 1.0395, "step": 3020 }, { - "epoch": 0.08678438741698105, - "grad_norm": 0.357421875, - "learning_rate": 0.00017355134825014344, - "loss": 1.0382, + "epoch": 0.1735613058695278, + "grad_norm": 0.255859375, + "learning_rate": 0.00019672180974828, + "loss": 1.0476, "step": 3025 }, { - "epoch": 0.08692783268543887, - "grad_norm": 0.359375, - "learning_rate": 0.0001738382099827883, - "loss": 0.9435, + "epoch": 0.17384818406104768, + "grad_norm": 0.279296875, + "learning_rate": 0.0001966963309844719, + "loss": 0.9697, "step": 3030 }, { - "epoch": 0.08707127795389669, - "grad_norm": 0.353515625, - "learning_rate": 0.00017412507171543317, - "loss": 1.0148, + "epoch": 0.17413506225256756, + "grad_norm": 0.24609375, + "learning_rate": 0.00019667075525336622, + "loss": 0.9243, "step": 3035 }, { - "epoch": 0.0872147232223545, - "grad_norm": 0.345703125, - "learning_rate": 0.00017441193344807802, - "loss": 0.9918, + "epoch": 0.17442194044408743, + "grad_norm": 0.306640625, + "learning_rate": 0.00019664508258061044, + "loss": 1.0036, "step": 3040 }, { - "epoch": 0.08735816849081234, - "grad_norm": 0.34375, - "learning_rate": 0.0001746987951807229, - "loss": 0.9803, + "epoch": 0.17470881863560733, + "grad_norm": 0.255859375, + "learning_rate": 0.0001966193129919491, + "loss": 0.955, "step": 3045 }, { - "epoch": 0.08750161375927015, - "grad_norm": 0.396484375, - "learning_rate": 0.00017498565691336776, - "loss": 1.054, + "epoch": 0.1749956968271272, + "grad_norm": 0.248046875, + "learning_rate": 0.00019659344651322405, + "loss": 0.9717, "step": 3050 }, { - "epoch": 0.08764505902772797, - "grad_norm": 0.37109375, - "learning_rate": 0.00017527251864601264, - "loss": 0.9016, + "epoch": 0.17528257501864708, + "grad_norm": 0.26171875, + "learning_rate": 0.00019656748317037424, + "loss": 0.9715, "step": 3055 }, { - "epoch": 0.08778850429618579, - "grad_norm": 0.314453125, - "learning_rate": 0.0001755593803786575, - "loss": 1.0074, + "epoch": 0.17556945321016695, + "grad_norm": 0.2734375, + "learning_rate": 0.00019654142298943574, + "loss": 0.9769, "step": 3060 }, { - "epoch": 0.0879319495646436, - "grad_norm": 0.3359375, - "learning_rate": 0.00017584624211130237, - "loss": 0.954, + "epoch": 0.17585633140168686, + "grad_norm": 0.26953125, + "learning_rate": 0.00019651526599654182, + "loss": 0.9292, "step": 3065 }, { - "epoch": 0.08807539483310144, - "grad_norm": 0.349609375, - "learning_rate": 0.00017613310384394722, - "loss": 0.8514, + "epoch": 0.17614320959320673, + "grad_norm": 0.255859375, + "learning_rate": 0.0001964890122179227, + "loss": 0.9528, "step": 3070 }, { - "epoch": 0.08821884010155925, - "grad_norm": 0.36328125, - "learning_rate": 0.00017641996557659208, - "loss": 0.9563, + "epoch": 0.1764300877847266, + "grad_norm": 0.26171875, + "learning_rate": 0.00019646266167990578, + "loss": 0.9587, "step": 3075 }, { - "epoch": 0.08836228537001707, - "grad_norm": 0.328125, - "learning_rate": 0.00017670682730923696, - "loss": 0.9636, + "epoch": 0.17671696597624648, + "grad_norm": 0.279296875, + "learning_rate": 0.00019643621440891543, + "loss": 0.927, "step": 3080 }, { - "epoch": 0.08850573063847489, - "grad_norm": 0.3671875, - "learning_rate": 0.00017699368904188184, - "loss": 1.0381, + "epoch": 0.17700384416776638, + "grad_norm": 0.255859375, + "learning_rate": 0.00019640967043147302, + "loss": 0.9888, "step": 3085 }, { - "epoch": 0.0886491759069327, - "grad_norm": 0.361328125, - "learning_rate": 0.00017728055077452666, - "loss": 1.0406, + "epoch": 0.17729072235928625, + "grad_norm": 0.263671875, + "learning_rate": 0.0001963830297741969, + "loss": 0.9385, "step": 3090 }, { - "epoch": 0.08879262117539052, - "grad_norm": 0.36328125, - "learning_rate": 0.00017756741250717154, - "loss": 0.9803, + "epoch": 0.17757760055080613, + "grad_norm": 0.251953125, + "learning_rate": 0.0001963562924638024, + "loss": 1.009, "step": 3095 }, { - "epoch": 0.08893606644384836, - "grad_norm": 0.384765625, - "learning_rate": 0.00017785427423981642, - "loss": 1.0271, + "epoch": 0.177864478742326, + "grad_norm": 0.291015625, + "learning_rate": 0.00019632945852710173, + "loss": 0.9866, "step": 3100 }, { - "epoch": 0.08907951171230617, - "grad_norm": 0.333984375, - "learning_rate": 0.0001781411359724613, - "loss": 0.9693, + "epoch": 0.1781513569338459, + "grad_norm": 0.28125, + "learning_rate": 0.00019630252799100409, + "loss": 0.9238, "step": 3105 }, { - "epoch": 0.08922295698076399, - "grad_norm": 0.328125, - "learning_rate": 0.00017842799770510613, - "loss": 0.931, + "epoch": 0.17843823512536577, + "grad_norm": 0.302734375, + "learning_rate": 0.0001962755008825154, + "loss": 0.9603, "step": 3110 }, { - "epoch": 0.08936640224922181, - "grad_norm": 0.3359375, - "learning_rate": 0.000178714859437751, - "loss": 0.8514, + "epoch": 0.17872511331688565, + "grad_norm": 0.27734375, + "learning_rate": 0.00019624837722873856, + "loss": 0.9602, "step": 3115 }, { - "epoch": 0.08950984751767962, - "grad_norm": 0.310546875, - "learning_rate": 0.0001790017211703959, - "loss": 1.0086, + "epoch": 0.17901199150840552, + "grad_norm": 0.28125, + "learning_rate": 0.00019622115705687318, + "loss": 0.9651, "step": 3120 }, { - "epoch": 0.08965329278613744, - "grad_norm": 0.421875, - "learning_rate": 0.00017928858290304074, - "loss": 0.9669, + "epoch": 0.17929886969992542, + "grad_norm": 0.24609375, + "learning_rate": 0.00019619384039421575, + "loss": 0.98, "step": 3125 }, { - "epoch": 0.08979673805459527, - "grad_norm": 0.34765625, - "learning_rate": 0.0001795754446356856, - "loss": 1.0119, + "epoch": 0.1795857478914453, + "grad_norm": 0.2578125, + "learning_rate": 0.00019616642726815947, + "loss": 0.896, "step": 3130 }, { - "epoch": 0.08994018332305309, - "grad_norm": 0.3359375, - "learning_rate": 0.00017986230636833048, - "loss": 0.8722, + "epoch": 0.17987262608296517, + "grad_norm": 0.259765625, + "learning_rate": 0.0001961389177061943, + "loss": 0.9217, "step": 3135 }, { - "epoch": 0.09008362859151091, - "grad_norm": 0.3515625, - "learning_rate": 0.00018014916810097533, - "loss": 1.0598, + "epoch": 0.18015950427448504, + "grad_norm": 0.263671875, + "learning_rate": 0.00019611131173590687, + "loss": 0.9536, "step": 3140 }, { - "epoch": 0.09022707385996873, - "grad_norm": 0.3203125, - "learning_rate": 0.0001804360298336202, - "loss": 0.9438, + "epoch": 0.18044638246600495, + "grad_norm": 0.28515625, + "learning_rate": 0.0001960836093849805, + "loss": 0.9958, "step": 3145 }, { - "epoch": 0.09037051912842654, - "grad_norm": 0.3359375, - "learning_rate": 0.00018072289156626507, - "loss": 0.8828, + "epoch": 0.18073326065752482, + "grad_norm": 0.435546875, + "learning_rate": 0.00019605581068119518, + "loss": 0.9922, "step": 3150 }, { - "epoch": 0.09051396439688437, - "grad_norm": 0.328125, - "learning_rate": 0.00018100975329890992, - "loss": 0.9197, + "epoch": 0.1810201388490447, + "grad_norm": 0.26953125, + "learning_rate": 0.00019602791565242754, + "loss": 0.9009, "step": 3155 }, { - "epoch": 0.09065740966534219, - "grad_norm": 0.359375, - "learning_rate": 0.0001812966150315548, - "loss": 0.9237, + "epoch": 0.18130701704056457, + "grad_norm": 0.26953125, + "learning_rate": 0.00019599992432665073, + "loss": 0.9346, "step": 3160 }, { - "epoch": 0.09080085493380001, - "grad_norm": 0.36328125, - "learning_rate": 0.00018158347676419968, - "loss": 1.0093, + "epoch": 0.18159389523208447, + "grad_norm": 0.271484375, + "learning_rate": 0.00019597183673193452, + "loss": 0.9644, "step": 3165 }, { - "epoch": 0.09094430020225783, - "grad_norm": 0.345703125, - "learning_rate": 0.00018187033849684453, - "loss": 0.892, + "epoch": 0.18188077342360434, + "grad_norm": 0.267578125, + "learning_rate": 0.00019594365289644529, + "loss": 0.8938, "step": 3170 }, { - "epoch": 0.09108774547071564, - "grad_norm": 0.3515625, - "learning_rate": 0.00018215720022948939, - "loss": 0.9397, + "epoch": 0.18216765161512422, + "grad_norm": 0.27734375, + "learning_rate": 0.00019591537284844573, + "loss": 1.0112, "step": 3175 }, { - "epoch": 0.09123119073917346, - "grad_norm": 0.310546875, - "learning_rate": 0.00018244406196213427, - "loss": 0.9909, + "epoch": 0.1824545298066441, + "grad_norm": 0.25390625, + "learning_rate": 0.00019588699661629523, + "loss": 0.9286, "step": 3180 }, { - "epoch": 0.0913746360076313, - "grad_norm": 0.3828125, - "learning_rate": 0.00018273092369477912, - "loss": 0.9657, + "epoch": 0.182741407998164, + "grad_norm": 0.263671875, + "learning_rate": 0.0001958585242284495, + "loss": 0.9738, "step": 3185 }, { - "epoch": 0.09151808127608911, - "grad_norm": 0.357421875, - "learning_rate": 0.00018301778542742397, - "loss": 0.9575, + "epoch": 0.18302828618968386, + "grad_norm": 0.271484375, + "learning_rate": 0.00019582995571346072, + "loss": 0.9947, "step": 3190 }, { - "epoch": 0.09166152654454693, - "grad_norm": 0.353515625, - "learning_rate": 0.00018330464716006885, - "loss": 1.0114, + "epoch": 0.18331516438120374, + "grad_norm": 0.265625, + "learning_rate": 0.0001958012910999775, + "loss": 0.9578, "step": 3195 }, { - "epoch": 0.09180497181300475, - "grad_norm": 0.33984375, - "learning_rate": 0.00018359150889271373, - "loss": 1.0398, + "epoch": 0.1836020425727236, + "grad_norm": 0.3515625, + "learning_rate": 0.0001957725304167447, + "loss": 1.0321, "step": 3200 }, { - "epoch": 0.09194841708146256, - "grad_norm": 0.349609375, - "learning_rate": 0.00018387837062535859, - "loss": 0.981, + "epoch": 0.1838889207642435, + "grad_norm": 0.275390625, + "learning_rate": 0.00019574367369260364, + "loss": 0.9091, "step": 3205 }, { - "epoch": 0.0920918623499204, - "grad_norm": 0.349609375, - "learning_rate": 0.00018416523235800344, - "loss": 1.0119, + "epoch": 0.1841757989557634, + "grad_norm": 0.267578125, + "learning_rate": 0.00019571472095649192, + "loss": 0.9775, "step": 3210 }, { - "epoch": 0.09223530761837821, - "grad_norm": 0.390625, - "learning_rate": 0.00018445209409064832, - "loss": 1.0458, + "epoch": 0.18446267714728326, + "grad_norm": 0.25390625, + "learning_rate": 0.00019568567223744339, + "loss": 1.0474, "step": 3215 }, { - "epoch": 0.09237875288683603, - "grad_norm": 0.345703125, - "learning_rate": 0.0001847389558232932, - "loss": 0.9612, + "epoch": 0.18474955533880313, + "grad_norm": 0.267578125, + "learning_rate": 0.00019565652756458818, + "loss": 0.9921, "step": 3220 }, { - "epoch": 0.09252219815529385, - "grad_norm": 0.314453125, - "learning_rate": 0.00018502581755593805, - "loss": 1.0528, + "epoch": 0.18503643353032304, + "grad_norm": 0.2890625, + "learning_rate": 0.00019562728696715263, + "loss": 0.9533, "step": 3225 }, { - "epoch": 0.09266564342375166, - "grad_norm": 0.328125, - "learning_rate": 0.0001853126792885829, - "loss": 0.9952, + "epoch": 0.1853233117218429, + "grad_norm": 0.265625, + "learning_rate": 0.00019559795047445927, + "loss": 0.9658, "step": 3230 }, { - "epoch": 0.09280908869220948, - "grad_norm": 0.345703125, - "learning_rate": 0.0001855995410212278, - "loss": 0.9267, + "epoch": 0.18561018991336278, + "grad_norm": 0.271484375, + "learning_rate": 0.0001955685181159268, + "loss": 0.9816, "step": 3235 }, { - "epoch": 0.09295253396066731, - "grad_norm": 0.3828125, - "learning_rate": 0.00018588640275387264, - "loss": 1.0079, + "epoch": 0.18589706810488266, + "grad_norm": 0.2734375, + "learning_rate": 0.0001955389899210701, + "loss": 0.9574, "step": 3240 }, { - "epoch": 0.09309597922912513, - "grad_norm": 0.349609375, - "learning_rate": 0.0001861732644865175, - "loss": 1.0275, + "epoch": 0.18618394629640256, + "grad_norm": 0.2333984375, + "learning_rate": 0.00019550936591950006, + "loss": 0.9079, "step": 3245 }, { - "epoch": 0.09323942449758295, - "grad_norm": 0.3984375, - "learning_rate": 0.00018646012621916237, - "loss": 0.9837, + "epoch": 0.18647082448792243, + "grad_norm": 0.29296875, + "learning_rate": 0.0001954796461409237, + "loss": 0.9293, "step": 3250 }, { - "epoch": 0.09338286976604077, - "grad_norm": 0.322265625, - "learning_rate": 0.00018674698795180723, - "loss": 0.9264, + "epoch": 0.1867577026794423, + "grad_norm": 0.259765625, + "learning_rate": 0.0001954498306151441, + "loss": 0.944, "step": 3255 }, { - "epoch": 0.09352631503449858, - "grad_norm": 0.294921875, - "learning_rate": 0.0001870338496844521, - "loss": 1.0859, + "epoch": 0.18704458087096218, + "grad_norm": 0.263671875, + "learning_rate": 0.0001954199193720603, + "loss": 1.0102, "step": 3260 }, { - "epoch": 0.0936697603029564, - "grad_norm": 0.349609375, - "learning_rate": 0.00018732071141709696, - "loss": 0.9235, + "epoch": 0.18733145906248208, + "grad_norm": 0.263671875, + "learning_rate": 0.00019538991244166738, + "loss": 0.9339, "step": 3265 }, { - "epoch": 0.09381320557141423, - "grad_norm": 0.330078125, - "learning_rate": 0.00018760757314974184, - "loss": 0.9416, + "epoch": 0.18761833725400195, + "grad_norm": 0.275390625, + "learning_rate": 0.00019535980985405639, + "loss": 0.9956, "step": 3270 }, { - "epoch": 0.09395665083987205, - "grad_norm": 0.3203125, - "learning_rate": 0.0001878944348823867, - "loss": 1.0335, + "epoch": 0.18790521544552183, + "grad_norm": 0.27734375, + "learning_rate": 0.00019532961163941422, + "loss": 0.9283, "step": 3275 }, { - "epoch": 0.09410009610832987, - "grad_norm": 0.3671875, - "learning_rate": 0.00018818129661503157, - "loss": 0.8798, + "epoch": 0.1881920936370417, + "grad_norm": 0.28515625, + "learning_rate": 0.00019529931782802376, + "loss": 1.0013, "step": 3280 }, { - "epoch": 0.09424354137678768, - "grad_norm": 0.318359375, - "learning_rate": 0.00018846815834767643, - "loss": 0.9571, + "epoch": 0.1884789718285616, + "grad_norm": 0.26171875, + "learning_rate": 0.00019526892845026365, + "loss": 1.0276, "step": 3285 }, { - "epoch": 0.0943869866452455, - "grad_norm": 0.390625, - "learning_rate": 0.00018875502008032128, - "loss": 1.0964, + "epoch": 0.18876585002008148, + "grad_norm": 0.2470703125, + "learning_rate": 0.00019523844353660849, + "loss": 0.9785, "step": 3290 }, { - "epoch": 0.09453043191370333, - "grad_norm": 0.3359375, - "learning_rate": 0.00018904188181296616, - "loss": 0.9332, + "epoch": 0.18905272821160135, + "grad_norm": 0.259765625, + "learning_rate": 0.0001952078631176286, + "loss": 0.9132, "step": 3295 }, { - "epoch": 0.09467387718216115, - "grad_norm": 0.353515625, - "learning_rate": 0.00018932874354561104, - "loss": 0.8805, + "epoch": 0.18933960640312122, + "grad_norm": 0.263671875, + "learning_rate": 0.00019517718722399002, + "loss": 0.9568, "step": 3300 }, { - "epoch": 0.09481732245061897, - "grad_norm": 0.388671875, - "learning_rate": 0.0001896156052782559, - "loss": 1.0708, + "epoch": 0.18962648459464113, + "grad_norm": 0.259765625, + "learning_rate": 0.00019514641588645471, + "loss": 0.9375, "step": 3305 }, { - "epoch": 0.09496076771907679, - "grad_norm": 0.349609375, - "learning_rate": 0.00018990246701090075, - "loss": 0.9433, + "epoch": 0.189913362786161, + "grad_norm": 0.279296875, + "learning_rate": 0.0001951155491358802, + "loss": 0.944, "step": 3310 }, { - "epoch": 0.0951042129875346, - "grad_norm": 0.345703125, - "learning_rate": 0.00019018932874354563, - "loss": 0.9491, + "epoch": 0.19020024097768087, + "grad_norm": 0.2890625, + "learning_rate": 0.00019508458700321973, + "loss": 1.0012, "step": 3315 }, { - "epoch": 0.09524765825599242, - "grad_norm": 0.35546875, - "learning_rate": 0.00019047619047619048, - "loss": 0.9226, + "epoch": 0.19048711916920075, + "grad_norm": 0.27734375, + "learning_rate": 0.00019505352951952221, + "loss": 0.9707, "step": 3320 }, { - "epoch": 0.09539110352445025, - "grad_norm": 0.330078125, - "learning_rate": 0.00019076305220883533, - "loss": 0.9754, + "epoch": 0.19077399736072065, + "grad_norm": 0.2578125, + "learning_rate": 0.00019502237671593212, + "loss": 0.9405, "step": 3325 }, { - "epoch": 0.09553454879290807, - "grad_norm": 0.349609375, - "learning_rate": 0.00019104991394148021, - "loss": 0.9503, + "epoch": 0.19106087555224052, + "grad_norm": 0.2578125, + "learning_rate": 0.0001949911286236896, + "loss": 0.8607, "step": 3330 }, { - "epoch": 0.09567799406136589, - "grad_norm": 0.33203125, - "learning_rate": 0.0001913367756741251, - "loss": 0.9762, + "epoch": 0.1913477537437604, + "grad_norm": 0.279296875, + "learning_rate": 0.00019495978527413028, + "loss": 0.9797, "step": 3335 }, { - "epoch": 0.0958214393298237, - "grad_norm": 0.3203125, - "learning_rate": 0.00019162363740676995, - "loss": 0.9725, + "epoch": 0.19163463193528027, + "grad_norm": 0.2578125, + "learning_rate": 0.00019492834669868536, + "loss": 0.9488, "step": 3340 }, { - "epoch": 0.09596488459828152, - "grad_norm": 0.349609375, - "learning_rate": 0.0001919104991394148, - "loss": 0.9074, + "epoch": 0.19192151012680017, + "grad_norm": 0.2451171875, + "learning_rate": 0.00019489681292888148, + "loss": 1.012, "step": 3345 }, { - "epoch": 0.09610832986673934, - "grad_norm": 0.359375, - "learning_rate": 0.00019219736087205968, - "loss": 1.0202, + "epoch": 0.19220838831832004, + "grad_norm": 0.25, + "learning_rate": 0.00019486518399634083, + "loss": 0.9622, "step": 3350 }, { - "epoch": 0.09625177513519717, - "grad_norm": 0.369140625, - "learning_rate": 0.00019248422260470454, - "loss": 1.0064, + "epoch": 0.19249526650983992, + "grad_norm": 0.255859375, + "learning_rate": 0.00019483345993278093, + "loss": 0.9597, "step": 3355 }, { - "epoch": 0.09639522040365499, - "grad_norm": 0.36328125, - "learning_rate": 0.00019277108433734942, - "loss": 1.0052, + "epoch": 0.1927821447013598, + "grad_norm": 0.265625, + "learning_rate": 0.00019480164077001475, + "loss": 1.0033, "step": 3360 }, { - "epoch": 0.0965386656721128, - "grad_norm": 0.345703125, - "learning_rate": 0.00019305794606999427, - "loss": 0.9609, + "epoch": 0.1930690228928797, + "grad_norm": 0.26171875, + "learning_rate": 0.00019476972653995062, + "loss": 0.9518, "step": 3365 }, { - "epoch": 0.09668211094057062, - "grad_norm": 0.3515625, - "learning_rate": 0.00019334480780263912, - "loss": 0.9771, + "epoch": 0.19335590108439957, + "grad_norm": 0.29296875, + "learning_rate": 0.00019473771727459224, + "loss": 1.0118, "step": 3370 }, { - "epoch": 0.09682555620902844, - "grad_norm": 0.341796875, - "learning_rate": 0.000193631669535284, - "loss": 0.986, + "epoch": 0.19364277927591944, + "grad_norm": 0.259765625, + "learning_rate": 0.00019470561300603852, + "loss": 0.9938, "step": 3375 }, { - "epoch": 0.09696900147748627, - "grad_norm": 0.361328125, - "learning_rate": 0.00019391853126792888, - "loss": 0.9835, + "epoch": 0.19392965746743931, + "grad_norm": 0.271484375, + "learning_rate": 0.00019467341376648372, + "loss": 0.9653, "step": 3380 }, { - "epoch": 0.09711244674594409, - "grad_norm": 0.35546875, - "learning_rate": 0.00019420539300057374, - "loss": 0.9411, + "epoch": 0.19421653565895922, + "grad_norm": 0.2734375, + "learning_rate": 0.00019464111958821727, + "loss": 0.9561, "step": 3385 }, { - "epoch": 0.0972558920144019, - "grad_norm": 0.376953125, - "learning_rate": 0.0001944922547332186, - "loss": 1.0926, + "epoch": 0.1945034138504791, + "grad_norm": 0.27734375, + "learning_rate": 0.0001946087305036239, + "loss": 1.0023, "step": 3390 }, { - "epoch": 0.09739933728285972, - "grad_norm": 0.37890625, - "learning_rate": 0.00019477911646586347, - "loss": 1.0116, + "epoch": 0.19479029204199896, + "grad_norm": 0.271484375, + "learning_rate": 0.00019457624654518343, + "loss": 1.001, "step": 3395 }, { - "epoch": 0.09754278255131754, - "grad_norm": 0.34765625, - "learning_rate": 0.00019506597819850835, - "loss": 0.9474, + "epoch": 0.19507717023351884, + "grad_norm": 0.248046875, + "learning_rate": 0.00019454366774547083, + "loss": 0.9172, "step": 3400 }, { - "epoch": 0.09768622781977536, - "grad_norm": 0.361328125, - "learning_rate": 0.00019535283993115318, - "loss": 0.9395, + "epoch": 0.19536404842503874, + "grad_norm": 0.25390625, + "learning_rate": 0.00019451099413715626, + "loss": 0.9451, "step": 3405 }, { - "epoch": 0.09782967308823319, - "grad_norm": 0.341796875, - "learning_rate": 0.00019563970166379806, - "loss": 0.9772, + "epoch": 0.1956509266165586, + "grad_norm": 0.259765625, + "learning_rate": 0.0001944782257530048, + "loss": 0.9521, "step": 3410 }, { - "epoch": 0.09797311835669101, - "grad_norm": 0.345703125, - "learning_rate": 0.00019592656339644294, - "loss": 0.9413, + "epoch": 0.1959378048080785, + "grad_norm": 0.263671875, + "learning_rate": 0.00019444536262587669, + "loss": 0.9749, "step": 3415 }, { - "epoch": 0.09811656362514883, - "grad_norm": 0.3671875, - "learning_rate": 0.0001962134251290878, - "loss": 0.9713, + "epoch": 0.19622468299959836, + "grad_norm": 0.345703125, + "learning_rate": 0.00019441240478872718, + "loss": 0.9371, "step": 3420 }, { - "epoch": 0.09826000889360664, - "grad_norm": 0.369140625, - "learning_rate": 0.00019650028686173264, - "loss": 1.0001, + "epoch": 0.19651156119111826, + "grad_norm": 0.2890625, + "learning_rate": 0.0001943793522746064, + "loss": 0.9343, "step": 3425 }, { - "epoch": 0.09840345416206446, - "grad_norm": 0.515625, - "learning_rate": 0.00019678714859437752, - "loss": 1.0076, + "epoch": 0.19679843938263814, + "grad_norm": 0.25390625, + "learning_rate": 0.00019434620511665958, + "loss": 1.0255, "step": 3430 }, { - "epoch": 0.09854689943052228, - "grad_norm": 0.357421875, - "learning_rate": 0.00019707401032702238, - "loss": 1.0267, + "epoch": 0.197085317574158, + "grad_norm": 0.26953125, + "learning_rate": 0.0001943129633481267, + "loss": 0.9707, "step": 3435 }, { - "epoch": 0.09869034469898011, - "grad_norm": 0.357421875, - "learning_rate": 0.00019736087205966726, - "loss": 1.0497, + "epoch": 0.19737219576567788, + "grad_norm": 0.248046875, + "learning_rate": 0.00019427962700234268, + "loss": 1.0431, "step": 3440 }, { - "epoch": 0.09883378996743793, - "grad_norm": 0.328125, - "learning_rate": 0.0001976477337923121, - "loss": 1.0164, + "epoch": 0.19765907395719778, + "grad_norm": 0.259765625, + "learning_rate": 0.00019424619611273727, + "loss": 0.9646, "step": 3445 }, { - "epoch": 0.09897723523589574, - "grad_norm": 0.353515625, - "learning_rate": 0.000197934595524957, - "loss": 1.0053, + "epoch": 0.19794595214871766, + "grad_norm": 0.263671875, + "learning_rate": 0.0001942126707128351, + "loss": 0.9781, "step": 3450 }, { - "epoch": 0.09912068050435356, - "grad_norm": 0.78515625, - "learning_rate": 0.00019822145725760184, - "loss": 0.9357, + "epoch": 0.19823283034023753, + "grad_norm": 0.251953125, + "learning_rate": 0.00019417905083625545, + "loss": 1.0096, "step": 3455 }, { - "epoch": 0.09926412577281138, - "grad_norm": 0.3671875, - "learning_rate": 0.00019850831899024672, - "loss": 0.96, + "epoch": 0.1985197085317574, + "grad_norm": 0.2578125, + "learning_rate": 0.00019414533651671242, + "loss": 0.964, "step": 3460 }, { - "epoch": 0.09940757104126921, - "grad_norm": 0.357421875, - "learning_rate": 0.00019879518072289158, - "loss": 0.9871, + "epoch": 0.1988065867232773, + "grad_norm": 0.259765625, + "learning_rate": 0.00019411152778801486, + "loss": 0.9573, "step": 3465 }, { - "epoch": 0.09955101630972703, - "grad_norm": 0.337890625, - "learning_rate": 0.00019908204245553643, - "loss": 0.9591, + "epoch": 0.19909346491479718, + "grad_norm": 0.263671875, + "learning_rate": 0.00019407762468406619, + "loss": 0.9138, "step": 3470 }, { - "epoch": 0.09969446157818485, - "grad_norm": 0.34765625, - "learning_rate": 0.0001993689041881813, - "loss": 0.9766, + "epoch": 0.19938034310631705, + "grad_norm": 0.26953125, + "learning_rate": 0.00019404362723886452, + "loss": 0.9735, "step": 3475 }, { - "epoch": 0.09983790684664266, - "grad_norm": 0.353515625, - "learning_rate": 0.00019965576592082616, - "loss": 0.9575, + "epoch": 0.19966722129783693, + "grad_norm": 0.2578125, + "learning_rate": 0.00019400953548650258, + "loss": 0.9722, "step": 3480 }, { - "epoch": 0.09998135211510048, - "grad_norm": 0.3203125, - "learning_rate": 0.00019994262765347102, - "loss": 0.9376, + "epoch": 0.19995409948935683, + "grad_norm": 0.271484375, + "learning_rate": 0.00019397534946116762, + "loss": 1.0176, "step": 3485 }, { - "epoch": 0.1001247973835583, - "grad_norm": 0.34765625, - "learning_rate": 0.00019999999197655853, - "loss": 1.046, + "epoch": 0.2002409776808767, + "grad_norm": 0.275390625, + "learning_rate": 0.00019394106919714155, + "loss": 0.9016, "step": 3490 }, { - "epoch": 0.10026824265201613, - "grad_norm": 0.31640625, - "learning_rate": 0.00019999995938132974, - "loss": 0.9397, + "epoch": 0.20052785587239658, + "grad_norm": 0.263671875, + "learning_rate": 0.0001939066947288006, + "loss": 1.0353, "step": 3495 }, { - "epoch": 0.10041168792047395, - "grad_norm": 0.359375, - "learning_rate": 0.0001999999017128567, - "loss": 1.0107, + "epoch": 0.20081473406391645, + "grad_norm": 0.28125, + "learning_rate": 0.0001938722260906156, + "loss": 0.9489, "step": 3500 }, { - "epoch": 0.10055513318893176, - "grad_norm": 0.36328125, - "learning_rate": 0.0001999998189711539, - "loss": 0.951, + "epoch": 0.20110161225543635, + "grad_norm": 0.265625, + "learning_rate": 0.00019383766331715178, + "loss": 1.0551, "step": 3505 }, { - "epoch": 0.10069857845738958, - "grad_norm": 0.375, - "learning_rate": 0.00019999971115624204, - "loss": 0.9436, + "epoch": 0.20138849044695623, + "grad_norm": 0.2578125, + "learning_rate": 0.00019380300644306878, + "loss": 1.0359, "step": 3510 }, { - "epoch": 0.1008420237258474, - "grad_norm": 0.349609375, - "learning_rate": 0.0001999995782681482, - "loss": 1.038, + "epoch": 0.2016753686384761, + "grad_norm": 0.265625, + "learning_rate": 0.00019376825550312057, + "loss": 1.0009, "step": 3515 }, { - "epoch": 0.10098546899430522, - "grad_norm": 0.3515625, - "learning_rate": 0.00019999942030690567, - "loss": 1.0207, + "epoch": 0.20196224682999597, + "grad_norm": 0.275390625, + "learning_rate": 0.00019373341053215547, + "loss": 0.942, "step": 3520 }, { - "epoch": 0.10112891426276305, - "grad_norm": 0.361328125, - "learning_rate": 0.00019999923727255406, - "loss": 0.8822, + "epoch": 0.20224912502151587, + "grad_norm": 0.28125, + "learning_rate": 0.0001936984715651161, + "loss": 0.9778, "step": 3525 }, { - "epoch": 0.10127235953122087, - "grad_norm": 0.365234375, - "learning_rate": 0.0001999990291651393, - "loss": 1.12, + "epoch": 0.20253600321303575, + "grad_norm": 0.2578125, + "learning_rate": 0.00019366343863703932, + "loss": 0.9932, "step": 3530 }, { - "epoch": 0.10141580479967868, - "grad_norm": 0.345703125, - "learning_rate": 0.00019999879598471348, - "loss": 0.9899, + "epoch": 0.20282288140455562, + "grad_norm": 0.251953125, + "learning_rate": 0.0001936283117830563, + "loss": 0.9191, "step": 3535 }, { - "epoch": 0.1015592500681365, - "grad_norm": 0.37890625, - "learning_rate": 0.00019999853773133513, - "loss": 0.9808, + "epoch": 0.2031097595960755, + "grad_norm": 0.255859375, + "learning_rate": 0.00019359309103839225, + "loss": 0.9005, "step": 3540 }, { - "epoch": 0.10170269533659432, - "grad_norm": 0.33984375, - "learning_rate": 0.000199998254405069, - "loss": 0.9584, + "epoch": 0.2033966377875954, + "grad_norm": 0.263671875, + "learning_rate": 0.0001935577764383666, + "loss": 0.9952, "step": 3545 }, { - "epoch": 0.10184614060505215, - "grad_norm": 0.51953125, - "learning_rate": 0.00019999794600598616, - "loss": 1.1256, + "epoch": 0.20368351597911527, + "grad_norm": 0.283203125, + "learning_rate": 0.00019352236801839298, + "loss": 0.922, "step": 3550 }, { - "epoch": 0.10198958587350997, - "grad_norm": 0.3828125, - "learning_rate": 0.00019999761253416387, - "loss": 0.8989, + "epoch": 0.20397039417063514, + "grad_norm": 0.263671875, + "learning_rate": 0.000193486865813979, + "loss": 0.9369, "step": 3555 }, { - "epoch": 0.10213303114196778, - "grad_norm": 0.3671875, - "learning_rate": 0.00019999725398968577, - "loss": 0.9822, + "epoch": 0.20425727236215502, + "grad_norm": 0.2734375, + "learning_rate": 0.00019345126986072635, + "loss": 1.0501, "step": 3560 }, { - "epoch": 0.1022764764104256, - "grad_norm": 0.357421875, - "learning_rate": 0.00019999687037264174, - "loss": 1.0624, + "epoch": 0.20454415055367492, + "grad_norm": 0.271484375, + "learning_rate": 0.0001934155801943307, + "loss": 0.9395, "step": 3565 }, { - "epoch": 0.10241992167888342, - "grad_norm": 0.375, - "learning_rate": 0.000199996461683128, - "loss": 0.895, + "epoch": 0.2048310287451948, + "grad_norm": 0.279296875, + "learning_rate": 0.0001933797968505818, + "loss": 0.9643, "step": 3570 }, { - "epoch": 0.10256336694734124, - "grad_norm": 0.34375, - "learning_rate": 0.00019999602792124702, - "loss": 0.9154, + "epoch": 0.20511790693671467, + "grad_norm": 0.26171875, + "learning_rate": 0.0001933439198653632, + "loss": 0.9855, "step": 3575 }, { - "epoch": 0.10270681221579907, - "grad_norm": 0.359375, - "learning_rate": 0.00019999556908710753, - "loss": 0.9666, + "epoch": 0.20540478512823454, + "grad_norm": 0.294921875, + "learning_rate": 0.00019330794927465247, + "loss": 0.9532, "step": 3580 }, { - "epoch": 0.10285025748425689, - "grad_norm": 0.349609375, - "learning_rate": 0.0001999950851808246, - "loss": 0.8901, + "epoch": 0.20569166331975444, + "grad_norm": 0.251953125, + "learning_rate": 0.00019327188511452094, + "loss": 1.0038, "step": 3585 }, { - "epoch": 0.1029937027527147, - "grad_norm": 0.365234375, - "learning_rate": 0.00019999457620251953, - "loss": 0.8413, + "epoch": 0.20597854151127432, + "grad_norm": 0.26171875, + "learning_rate": 0.00019323572742113387, + "loss": 0.9216, "step": 3590 }, { - "epoch": 0.10313714802117252, - "grad_norm": 0.376953125, - "learning_rate": 0.00019999404215232, - "loss": 0.9478, + "epoch": 0.2062654197027942, + "grad_norm": 0.265625, + "learning_rate": 0.0001931994762307503, + "loss": 0.9706, "step": 3595 }, { - "epoch": 0.10328059328963034, - "grad_norm": 0.380859375, - "learning_rate": 0.0001999934830303598, - "loss": 0.9176, + "epoch": 0.20655229789431406, + "grad_norm": 0.28125, + "learning_rate": 0.00019316313157972297, + "loss": 0.9644, "step": 3600 }, { - "epoch": 0.10342403855808815, - "grad_norm": 0.337890625, - "learning_rate": 0.00019999289883677928, - "loss": 0.9794, + "epoch": 0.20683917608583396, + "grad_norm": 0.287109375, + "learning_rate": 0.00019312669350449836, + "loss": 0.973, "step": 3605 }, { - "epoch": 0.10356748382654599, - "grad_norm": 0.357421875, - "learning_rate": 0.00019999228957172477, - "loss": 0.8405, + "epoch": 0.20712605427735384, + "grad_norm": 0.2578125, + "learning_rate": 0.0001930901620416167, + "loss": 0.8716, "step": 3610 }, { - "epoch": 0.1037109290950038, - "grad_norm": 0.341796875, - "learning_rate": 0.00019999165523534908, - "loss": 0.9726, + "epoch": 0.2074129324688737, + "grad_norm": 0.2734375, + "learning_rate": 0.0001930535372277118, + "loss": 1.0108, "step": 3615 }, { - "epoch": 0.10385437436346162, - "grad_norm": 0.40234375, - "learning_rate": 0.0001999909958278113, - "loss": 0.9646, + "epoch": 0.20769981066039359, + "grad_norm": 0.259765625, + "learning_rate": 0.0001930168190995111, + "loss": 0.9474, "step": 3620 }, { - "epoch": 0.10399781963191944, - "grad_norm": 0.37890625, - "learning_rate": 0.00019999031134927675, - "loss": 0.9289, + "epoch": 0.2079866888519135, + "grad_norm": 0.279296875, + "learning_rate": 0.00019298000769383565, + "loss": 0.9808, "step": 3625 }, { - "epoch": 0.10414126490037726, - "grad_norm": 0.349609375, - "learning_rate": 0.000199989601799917, - "loss": 0.9262, + "epoch": 0.20827356704343336, + "grad_norm": 0.263671875, + "learning_rate": 0.00019294310304759994, + "loss": 0.9813, "step": 3630 }, { - "epoch": 0.10428471016883509, - "grad_norm": 0.36328125, - "learning_rate": 0.00019998886717990998, - "loss": 0.9788, + "epoch": 0.20856044523495323, + "grad_norm": 0.283203125, + "learning_rate": 0.00019290610519781212, + "loss": 0.9752, "step": 3635 }, { - "epoch": 0.1044281554372929, - "grad_norm": 0.34375, - "learning_rate": 0.00019998810748943994, - "loss": 1.0277, + "epoch": 0.2088473234264731, + "grad_norm": 0.279296875, + "learning_rate": 0.00019286901418157367, + "loss": 1.0081, "step": 3640 }, { - "epoch": 0.10457160070575072, - "grad_norm": 0.349609375, - "learning_rate": 0.0001999873227286973, - "loss": 0.9888, + "epoch": 0.209134201617993, + "grad_norm": 0.26171875, + "learning_rate": 0.00019283183003607955, + "loss": 1.0368, "step": 3645 }, { - "epoch": 0.10471504597420854, - "grad_norm": 0.357421875, - "learning_rate": 0.00019998651289787885, - "loss": 0.9837, + "epoch": 0.20942107980951288, + "grad_norm": 0.275390625, + "learning_rate": 0.0001927945527986181, + "loss": 1.0126, "step": 3650 }, { - "epoch": 0.10485849124266636, - "grad_norm": 0.34765625, - "learning_rate": 0.0001999856779971876, - "loss": 1.0336, + "epoch": 0.20970795800103276, + "grad_norm": 0.263671875, + "learning_rate": 0.000192757182506571, + "loss": 0.9539, "step": 3655 }, { - "epoch": 0.10500193651112417, - "grad_norm": 0.388671875, - "learning_rate": 0.00019998481802683293, - "loss": 0.9638, + "epoch": 0.20999483619255263, + "grad_norm": 0.271484375, + "learning_rate": 0.00019271971919741332, + "loss": 0.9511, "step": 3660 }, { - "epoch": 0.105145381779582, - "grad_norm": 0.376953125, - "learning_rate": 0.00019998393298703047, - "loss": 0.9367, + "epoch": 0.21028171438407253, + "grad_norm": 0.27734375, + "learning_rate": 0.0001926821629087133, + "loss": 0.9761, "step": 3665 }, { - "epoch": 0.10528882704803982, - "grad_norm": 0.365234375, - "learning_rate": 0.00019998302287800208, - "loss": 1.0349, + "epoch": 0.2105685925755924, + "grad_norm": 0.25390625, + "learning_rate": 0.0001926445136781325, + "loss": 0.9101, "step": 3670 }, { - "epoch": 0.10543227231649764, - "grad_norm": 0.35546875, - "learning_rate": 0.000199982087699976, - "loss": 0.9799, + "epoch": 0.21085547076711228, + "grad_norm": 0.26953125, + "learning_rate": 0.00019260677154342564, + "loss": 1.0644, "step": 3675 }, { - "epoch": 0.10557571758495546, - "grad_norm": 0.388671875, - "learning_rate": 0.00019998112745318669, - "loss": 1.0201, + "epoch": 0.21114234895863215, + "grad_norm": 0.283203125, + "learning_rate": 0.0001925689365424406, + "loss": 1.0595, "step": 3680 }, { - "epoch": 0.10571916285341328, - "grad_norm": 0.333984375, - "learning_rate": 0.0001999801421378749, - "loss": 0.9775, + "epoch": 0.21142922715015205, + "grad_norm": 0.255859375, + "learning_rate": 0.00019253100871311843, + "loss": 0.9295, "step": 3685 }, { - "epoch": 0.1058626081218711, - "grad_norm": 0.349609375, - "learning_rate": 0.00019997913175428771, - "loss": 0.9533, + "epoch": 0.21171610534167193, + "grad_norm": 0.28125, + "learning_rate": 0.00019249298809349323, + "loss": 0.9397, "step": 3690 }, { - "epoch": 0.10600605339032892, - "grad_norm": 0.396484375, - "learning_rate": 0.00019997809630267844, - "loss": 1.0515, + "epoch": 0.2120029835331918, + "grad_norm": 0.27734375, + "learning_rate": 0.00019245487472169216, + "loss": 0.9591, "step": 3695 }, { - "epoch": 0.10614949865878674, - "grad_norm": 0.384765625, - "learning_rate": 0.00019997703578330674, - "loss": 1.0437, + "epoch": 0.21228986172471168, + "grad_norm": 0.265625, + "learning_rate": 0.0001924166686359354, + "loss": 0.9513, "step": 3700 }, { - "epoch": 0.10629294392724456, - "grad_norm": 0.34765625, - "learning_rate": 0.00019997595019643845, - "loss": 1.0259, + "epoch": 0.21257673991623158, + "grad_norm": 0.26171875, + "learning_rate": 0.00019237836987453613, + "loss": 0.9952, "step": 3705 }, { - "epoch": 0.10643638919570238, - "grad_norm": 0.345703125, - "learning_rate": 0.00019997483954234583, - "loss": 0.9407, + "epoch": 0.21286361810775145, + "grad_norm": 0.263671875, + "learning_rate": 0.00019233997847590035, + "loss": 0.9506, "step": 3710 }, { - "epoch": 0.1065798344641602, - "grad_norm": 0.3984375, - "learning_rate": 0.00019997370382130732, - "loss": 0.9076, + "epoch": 0.21315049629927132, + "grad_norm": 0.25390625, + "learning_rate": 0.00019230149447852714, + "loss": 0.9978, "step": 3715 }, { - "epoch": 0.10672327973261803, - "grad_norm": 0.369140625, - "learning_rate": 0.00019997254303360772, - "loss": 0.9095, + "epoch": 0.2134373744907912, + "grad_norm": 0.2890625, + "learning_rate": 0.00019226291792100826, + "loss": 0.9213, "step": 3720 }, { - "epoch": 0.10686672500107584, - "grad_norm": 0.3828125, - "learning_rate": 0.00019997135717953802, - "loss": 0.9353, + "epoch": 0.2137242526823111, + "grad_norm": 0.28515625, + "learning_rate": 0.0001922242488420284, + "loss": 1.0118, "step": 3725 }, { - "epoch": 0.10701017026953366, - "grad_norm": 0.37109375, - "learning_rate": 0.0001999701462593956, - "loss": 1.0445, + "epoch": 0.21401113087383097, + "grad_norm": 0.251953125, + "learning_rate": 0.00019218548728036503, + "loss": 0.9238, "step": 3730 }, { - "epoch": 0.10715361553799148, - "grad_norm": 0.359375, - "learning_rate": 0.00019996891027348402, - "loss": 0.9156, + "epoch": 0.21429800906535085, + "grad_norm": 0.287109375, + "learning_rate": 0.00019214663327488828, + "loss": 0.966, "step": 3735 }, { - "epoch": 0.1072970608064493, - "grad_norm": 0.34765625, - "learning_rate": 0.00019996764922211328, - "loss": 0.915, + "epoch": 0.21458488725687072, + "grad_norm": 0.259765625, + "learning_rate": 0.00019210768686456106, + "loss": 1.0034, "step": 3740 }, { - "epoch": 0.10744050607490711, - "grad_norm": 0.3671875, - "learning_rate": 0.00019996636310559948, - "loss": 0.9822, + "epoch": 0.21487176544839062, + "grad_norm": 0.275390625, + "learning_rate": 0.00019206864808843892, + "loss": 0.9623, "step": 3745 }, { - "epoch": 0.10758395134336494, - "grad_norm": 0.400390625, - "learning_rate": 0.0001999650519242651, - "loss": 0.9963, + "epoch": 0.2151586436399105, + "grad_norm": 0.337890625, + "learning_rate": 0.00019202951698566999, + "loss": 0.9571, "step": 3750 }, { - "epoch": 0.10772739661182276, - "grad_norm": 0.376953125, - "learning_rate": 0.00019996371567843895, - "loss": 0.9389, + "epoch": 0.21544552183143037, + "grad_norm": 0.251953125, + "learning_rate": 0.0001919902935954951, + "loss": 0.95, "step": 3755 }, { - "epoch": 0.10787084188028058, - "grad_norm": 0.353515625, - "learning_rate": 0.00019996235436845603, - "loss": 1.0204, + "epoch": 0.21573240002295024, + "grad_norm": 0.29296875, + "learning_rate": 0.00019195097795724747, + "loss": 1.026, "step": 3760 }, { - "epoch": 0.1080142871487384, - "grad_norm": 0.341796875, - "learning_rate": 0.00019996096799465766, - "loss": 0.9341, + "epoch": 0.21601927821447015, + "grad_norm": 0.271484375, + "learning_rate": 0.000191911570110353, + "loss": 1.0171, "step": 3765 }, { - "epoch": 0.10815773241719621, - "grad_norm": 0.404296875, - "learning_rate": 0.00019995955655739147, - "loss": 0.9041, + "epoch": 0.21630615640599002, + "grad_norm": 0.263671875, + "learning_rate": 0.0001918720700943299, + "loss": 0.99, "step": 3770 }, { - "epoch": 0.10830117768565403, - "grad_norm": 0.30859375, - "learning_rate": 0.00019995812005701131, - "loss": 0.9326, + "epoch": 0.2165930345975099, + "grad_norm": 0.26953125, + "learning_rate": 0.0001918324779487889, + "loss": 0.9584, "step": 3775 }, { - "epoch": 0.10844462295411186, - "grad_norm": 0.43359375, - "learning_rate": 0.00019995665849387742, - "loss": 0.9738, + "epoch": 0.21687991278902977, + "grad_norm": 0.26953125, + "learning_rate": 0.0001917927937134331, + "loss": 1.0006, "step": 3780 }, { - "epoch": 0.10858806822256968, - "grad_norm": 0.380859375, - "learning_rate": 0.00019995517186835625, - "loss": 0.9981, + "epoch": 0.21716679098054967, + "grad_norm": 0.259765625, + "learning_rate": 0.00019175301742805793, + "loss": 0.9589, "step": 3785 }, { - "epoch": 0.1087315134910275, - "grad_norm": 0.31640625, - "learning_rate": 0.0001999536601808205, - "loss": 0.913, + "epoch": 0.21745366917206954, + "grad_norm": 0.271484375, + "learning_rate": 0.00019171314913255113, + "loss": 1.013, "step": 3790 }, { - "epoch": 0.10887495875948532, - "grad_norm": 0.373046875, - "learning_rate": 0.00019995212343164922, - "loss": 1.0694, + "epoch": 0.21774054736358942, + "grad_norm": 0.2490234375, + "learning_rate": 0.00019167318886689273, + "loss": 0.9847, "step": 3795 }, { - "epoch": 0.10901840402794313, - "grad_norm": 0.34375, - "learning_rate": 0.00019995056162122775, - "loss": 1.018, + "epoch": 0.2180274255551093, + "grad_norm": 0.283203125, + "learning_rate": 0.00019163313667115497, + "loss": 0.9847, "step": 3800 }, { - "epoch": 0.10916184929640096, - "grad_norm": 0.373046875, - "learning_rate": 0.00019994897474994762, - "loss": 1.0725, + "epoch": 0.2183143037466292, + "grad_norm": 0.279296875, + "learning_rate": 0.00019159299258550227, + "loss": 0.9766, "step": 3805 }, { - "epoch": 0.10930529456485878, - "grad_norm": 0.3671875, - "learning_rate": 0.0001999473628182068, - "loss": 0.9521, + "epoch": 0.21860118193814906, + "grad_norm": 0.275390625, + "learning_rate": 0.0001915527566501912, + "loss": 0.9583, "step": 3810 }, { - "epoch": 0.1094487398333166, - "grad_norm": 0.345703125, - "learning_rate": 0.0001999457258264094, - "loss": 0.9033, + "epoch": 0.21888806012966894, + "grad_norm": 0.259765625, + "learning_rate": 0.0001915124289055705, + "loss": 0.9287, "step": 3815 }, { - "epoch": 0.10959218510177442, - "grad_norm": 0.3515625, - "learning_rate": 0.00019994406377496588, - "loss": 0.9322, + "epoch": 0.2191749383211888, + "grad_norm": 0.2490234375, + "learning_rate": 0.00019147200939208088, + "loss": 0.9871, "step": 3820 }, { - "epoch": 0.10973563037023223, - "grad_norm": 0.384765625, - "learning_rate": 0.00019994237666429295, - "loss": 0.9365, + "epoch": 0.2194618165127087, + "grad_norm": 0.265625, + "learning_rate": 0.0001914314981502551, + "loss": 0.9836, "step": 3825 }, { - "epoch": 0.10987907563869005, - "grad_norm": 0.345703125, - "learning_rate": 0.00019994066449481368, - "loss": 0.9484, + "epoch": 0.2197486947042286, + "grad_norm": 0.265625, + "learning_rate": 0.000191390895220718, + "loss": 0.8962, "step": 3830 }, { - "epoch": 0.11002252090714788, - "grad_norm": 0.353515625, - "learning_rate": 0.0001999389272669573, - "loss": 0.8825, + "epoch": 0.22003557289574846, + "grad_norm": 0.2734375, + "learning_rate": 0.0001913502006441862, + "loss": 0.9405, "step": 3835 }, { - "epoch": 0.1101659661756057, - "grad_norm": 0.34765625, - "learning_rate": 0.0001999371649811594, - "loss": 0.9224, + "epoch": 0.22032245108726833, + "grad_norm": 0.2578125, + "learning_rate": 0.00019130941446146837, + "loss": 0.8986, "step": 3840 }, { - "epoch": 0.11030941144406352, - "grad_norm": 0.380859375, - "learning_rate": 0.0001999353776378619, - "loss": 0.9883, + "epoch": 0.22060932927878824, + "grad_norm": 0.265625, + "learning_rate": 0.00019126853671346496, + "loss": 0.9524, "step": 3845 }, { - "epoch": 0.11045285671252134, - "grad_norm": 0.357421875, - "learning_rate": 0.00019993356523751287, - "loss": 1.0323, + "epoch": 0.2208962074703081, + "grad_norm": 0.251953125, + "learning_rate": 0.00019122756744116828, + "loss": 0.9778, "step": 3850 }, { - "epoch": 0.11059630198097915, - "grad_norm": 0.36328125, - "learning_rate": 0.0001999317277805668, - "loss": 0.9269, + "epoch": 0.22118308566182798, + "grad_norm": 0.27734375, + "learning_rate": 0.0001911865066856624, + "loss": 1.0214, "step": 3855 }, { - "epoch": 0.11073974724943697, - "grad_norm": 0.337890625, - "learning_rate": 0.00019992986526748432, - "loss": 0.8932, + "epoch": 0.22146996385334786, + "grad_norm": 0.28125, + "learning_rate": 0.00019114535448812311, + "loss": 0.9422, "step": 3860 }, { - "epoch": 0.1108831925178948, - "grad_norm": 0.3515625, - "learning_rate": 0.00019992797769873252, - "loss": 0.9647, + "epoch": 0.22175684204486776, + "grad_norm": 0.28515625, + "learning_rate": 0.00019110411088981802, + "loss": 0.9638, "step": 3865 }, { - "epoch": 0.11102663778635262, - "grad_norm": 0.37890625, - "learning_rate": 0.00019992606507478463, - "loss": 1.0445, + "epoch": 0.22204372023638763, + "grad_norm": 0.2421875, + "learning_rate": 0.0001910627759321062, + "loss": 0.9094, "step": 3870 }, { - "epoch": 0.11117008305481044, - "grad_norm": 0.3828125, - "learning_rate": 0.0001999241273961202, - "loss": 0.9369, + "epoch": 0.2223305984279075, + "grad_norm": 0.298828125, + "learning_rate": 0.00019102134965643847, + "loss": 0.9825, "step": 3875 }, { - "epoch": 0.11131352832326825, - "grad_norm": 0.392578125, - "learning_rate": 0.0001999221646632251, - "loss": 0.926, + "epoch": 0.22261747661942738, + "grad_norm": 0.2470703125, + "learning_rate": 0.0001909798321043572, + "loss": 1.0358, "step": 3880 }, { - "epoch": 0.11145697359172607, - "grad_norm": 0.388671875, - "learning_rate": 0.0001999201768765914, - "loss": 0.9976, + "epoch": 0.22290435481094728, + "grad_norm": 0.287109375, + "learning_rate": 0.00019093822331749634, + "loss": 0.904, "step": 3885 }, { - "epoch": 0.1116004188601839, - "grad_norm": 0.408203125, - "learning_rate": 0.0001999181640367175, - "loss": 1.0355, + "epoch": 0.22319123300246715, + "grad_norm": 0.279296875, + "learning_rate": 0.00019089652333758114, + "loss": 0.9756, "step": 3890 }, { - "epoch": 0.11174386412864172, - "grad_norm": 0.36328125, - "learning_rate": 0.00019991612614410817, - "loss": 0.9945, + "epoch": 0.22347811119398703, + "grad_norm": 0.26171875, + "learning_rate": 0.00019085473220642856, + "loss": 0.972, "step": 3895 }, { - "epoch": 0.11188730939709954, - "grad_norm": 0.34375, - "learning_rate": 0.00019991406319927428, - "loss": 0.9022, + "epoch": 0.2237649893855069, + "grad_norm": 0.27734375, + "learning_rate": 0.0001908128499659468, + "loss": 0.9302, "step": 3900 }, { - "epoch": 0.11203075466555736, - "grad_norm": 0.36328125, - "learning_rate": 0.00019991197520273315, - "loss": 0.915, + "epoch": 0.2240518675770268, + "grad_norm": 0.275390625, + "learning_rate": 0.00019077087665813545, + "loss": 0.9643, "step": 3905 }, { - "epoch": 0.11217419993401517, - "grad_norm": 0.357421875, - "learning_rate": 0.00019990986215500826, - "loss": 0.9269, + "epoch": 0.22433874576854668, + "grad_norm": 0.296875, + "learning_rate": 0.0001907288123250854, + "loss": 0.9786, "step": 3910 }, { - "epoch": 0.11231764520247299, - "grad_norm": 0.330078125, - "learning_rate": 0.00019990772405662944, - "loss": 0.9921, + "epoch": 0.22462562396006655, + "grad_norm": 0.28125, + "learning_rate": 0.00019068665700897896, + "loss": 0.9587, "step": 3915 }, { - "epoch": 0.11246109047093082, - "grad_norm": 0.388671875, - "learning_rate": 0.00019990556090813277, - "loss": 0.9746, + "epoch": 0.22491250215158642, + "grad_norm": 0.302734375, + "learning_rate": 0.0001906444107520895, + "loss": 1.006, "step": 3920 }, { - "epoch": 0.11260453573938864, - "grad_norm": 0.35546875, - "learning_rate": 0.00019990337271006062, - "loss": 0.9372, + "epoch": 0.22519938034310633, + "grad_norm": 0.26171875, + "learning_rate": 0.00019060207359678164, + "loss": 0.8882, "step": 3925 }, { - "epoch": 0.11274798100784646, - "grad_norm": 0.353515625, - "learning_rate": 0.00019990115946296168, - "loss": 1.0304, + "epoch": 0.2254862585346262, + "grad_norm": 0.2890625, + "learning_rate": 0.00019055964558551124, + "loss": 0.9823, "step": 3930 }, { - "epoch": 0.11289142627630427, - "grad_norm": 0.333984375, - "learning_rate": 0.00019989892116739085, - "loss": 0.9308, + "epoch": 0.22577313672614607, + "grad_norm": 0.263671875, + "learning_rate": 0.00019051712676082522, + "loss": 0.9457, "step": 3935 }, { - "epoch": 0.11303487154476209, - "grad_norm": 0.353515625, - "learning_rate": 0.00019989665782390933, - "loss": 1.0056, + "epoch": 0.22606001491766595, + "grad_norm": 0.279296875, + "learning_rate": 0.00019047451716536147, + "loss": 0.9735, "step": 3940 }, { - "epoch": 0.11317831681321991, - "grad_norm": 0.376953125, - "learning_rate": 0.00019989436943308465, - "loss": 0.9556, + "epoch": 0.22634689310918585, + "grad_norm": 0.291015625, + "learning_rate": 0.000190431816841849, + "loss": 0.9697, "step": 3945 }, { - "epoch": 0.11332176208167774, - "grad_norm": 0.349609375, - "learning_rate": 0.00019989205599549057, - "loss": 1.0082, + "epoch": 0.22663377130070572, + "grad_norm": 0.275390625, + "learning_rate": 0.00019038902583310785, + "loss": 1.0229, "step": 3950 }, { - "epoch": 0.11346520735013556, - "grad_norm": 0.33984375, - "learning_rate": 0.0001998897175117071, - "loss": 1.0049, + "epoch": 0.2269206494922256, + "grad_norm": 0.259765625, + "learning_rate": 0.00019034614418204893, + "loss": 0.9805, "step": 3955 }, { - "epoch": 0.11360865261859338, - "grad_norm": 0.3984375, - "learning_rate": 0.00019988735398232066, - "loss": 1.0001, + "epoch": 0.22720752768374547, + "grad_norm": 0.259765625, + "learning_rate": 0.000190303171931674, + "loss": 0.9696, "step": 3960 }, { - "epoch": 0.11375209788705119, - "grad_norm": 0.36328125, - "learning_rate": 0.0001998849654079238, - "loss": 1.0119, + "epoch": 0.22749440587526537, + "grad_norm": 0.271484375, + "learning_rate": 0.00019026010912507577, + "loss": 0.9885, "step": 3965 }, { - "epoch": 0.11389554315550901, - "grad_norm": 0.353515625, - "learning_rate": 0.00019988255178911543, - "loss": 0.9873, + "epoch": 0.22778128406678524, + "grad_norm": 0.279296875, + "learning_rate": 0.00019021695580543772, + "loss": 0.9571, "step": 3970 }, { - "epoch": 0.11403898842396684, - "grad_norm": 0.392578125, - "learning_rate": 0.00019988011312650072, - "loss": 0.9845, + "epoch": 0.22806816225830512, + "grad_norm": 0.267578125, + "learning_rate": 0.00019017371201603407, + "loss": 0.8998, "step": 3975 }, { - "epoch": 0.11418243369242466, - "grad_norm": 0.3515625, - "learning_rate": 0.00019987764942069113, - "loss": 0.9453, + "epoch": 0.228355040449825, + "grad_norm": 0.265625, + "learning_rate": 0.00019013037780022982, + "loss": 1.0427, "step": 3980 }, { - "epoch": 0.11432587896088248, - "grad_norm": 0.365234375, - "learning_rate": 0.00019987516067230442, - "loss": 1.0745, + "epoch": 0.2286419186413449, + "grad_norm": 0.267578125, + "learning_rate": 0.00019008695320148062, + "loss": 0.9227, "step": 3985 }, { - "epoch": 0.1144693242293403, - "grad_norm": 0.376953125, - "learning_rate": 0.00019987264688196453, - "loss": 0.9367, + "epoch": 0.22892879683286477, + "grad_norm": 0.267578125, + "learning_rate": 0.0001900434382633327, + "loss": 0.9698, "step": 3990 }, { - "epoch": 0.11461276949779811, - "grad_norm": 0.384765625, - "learning_rate": 0.00019987010805030177, - "loss": 0.941, + "epoch": 0.22921567502438464, + "grad_norm": 0.255859375, + "learning_rate": 0.00018999983302942302, + "loss": 0.9134, "step": 3995 }, { - "epoch": 0.11475621476625593, - "grad_norm": 0.357421875, - "learning_rate": 0.00019986754417795278, - "loss": 0.8934, + "epoch": 0.22950255321590451, + "grad_norm": 0.302734375, + "learning_rate": 0.00018995613754347893, + "loss": 0.9699, "step": 4000 }, { - "epoch": 0.11489966003471376, - "grad_norm": 0.390625, - "learning_rate": 0.0001998649552655603, - "loss": 0.8483, + "epoch": 0.22978943140742442, + "grad_norm": 0.26171875, + "learning_rate": 0.00018991235184931843, + "loss": 0.896, "step": 4005 }, { - "epoch": 0.11504310530317158, - "grad_norm": 0.36328125, - "learning_rate": 0.00019986234131377353, - "loss": 1.0156, + "epoch": 0.2300763095989443, + "grad_norm": 0.26171875, + "learning_rate": 0.00018986847599084986, + "loss": 0.9285, "step": 4010 }, { - "epoch": 0.1151865505716294, - "grad_norm": 0.34375, - "learning_rate": 0.00019985970232324786, - "loss": 0.9678, + "epoch": 0.23036318779046416, + "grad_norm": 0.26953125, + "learning_rate": 0.00018982451001207205, + "loss": 0.984, "step": 4015 }, { - "epoch": 0.11532999584008721, - "grad_norm": 0.396484375, - "learning_rate": 0.00019985703829464492, - "loss": 1.0922, + "epoch": 0.23065006598198404, + "grad_norm": 0.255859375, + "learning_rate": 0.00018978045395707418, + "loss": 0.9619, "step": 4020 }, { - "epoch": 0.11547344110854503, - "grad_norm": 0.40234375, - "learning_rate": 0.0001998543492286327, - "loss": 0.9307, + "epoch": 0.23093694417350394, + "grad_norm": 0.279296875, + "learning_rate": 0.00018973630787003575, + "loss": 0.9796, "step": 4025 }, { - "epoch": 0.11561688637700285, - "grad_norm": 0.431640625, - "learning_rate": 0.0001998516351258855, - "loss": 1.085, + "epoch": 0.2312238223650238, + "grad_norm": 0.275390625, + "learning_rate": 0.0001896920717952266, + "loss": 0.9083, "step": 4030 }, { - "epoch": 0.11576033164546068, - "grad_norm": 0.35546875, - "learning_rate": 0.00019984889598708374, - "loss": 0.9591, + "epoch": 0.23151070055654369, + "grad_norm": 0.271484375, + "learning_rate": 0.00018964774577700667, + "loss": 1.0115, "step": 4035 }, { - "epoch": 0.1159037769139185, - "grad_norm": 0.349609375, - "learning_rate": 0.00019984613181291426, - "loss": 0.9099, + "epoch": 0.23179757874806356, + "grad_norm": 0.25390625, + "learning_rate": 0.00018960332985982627, + "loss": 0.9947, "step": 4040 }, { - "epoch": 0.11604722218237631, - "grad_norm": 0.375, - "learning_rate": 0.00019984334260407012, - "loss": 0.9541, + "epoch": 0.23208445693958346, + "grad_norm": 0.259765625, + "learning_rate": 0.0001895588240882258, + "loss": 0.9585, "step": 4045 }, { - "epoch": 0.11619066745083413, - "grad_norm": 0.390625, - "learning_rate": 0.00019984052836125065, - "loss": 0.9297, + "epoch": 0.23237133513110333, + "grad_norm": 0.2734375, + "learning_rate": 0.0001895142285068357, + "loss": 0.9033, "step": 4050 }, { - "epoch": 0.11633411271929195, - "grad_norm": 0.35546875, - "learning_rate": 0.00019983768908516148, - "loss": 0.8955, + "epoch": 0.2326582133226232, + "grad_norm": 0.265625, + "learning_rate": 0.00018946954316037648, + "loss": 0.9287, "step": 4055 }, { - "epoch": 0.11647755798774978, - "grad_norm": 0.3359375, - "learning_rate": 0.00019983482477651455, - "loss": 0.9035, + "epoch": 0.23294509151414308, + "grad_norm": 0.259765625, + "learning_rate": 0.0001894247680936588, + "loss": 0.9409, "step": 4060 }, { - "epoch": 0.1166210032562076, - "grad_norm": 0.375, - "learning_rate": 0.00019983193543602799, - "loss": 0.9516, + "epoch": 0.23323196970566298, + "grad_norm": 0.251953125, + "learning_rate": 0.00018937990335158312, + "loss": 0.9404, "step": 4065 }, { - "epoch": 0.11676444852466542, - "grad_norm": 0.400390625, - "learning_rate": 0.00019982902106442622, - "loss": 1.0164, + "epoch": 0.23351884789718286, + "grad_norm": 0.26953125, + "learning_rate": 0.00018933494897913997, + "loss": 0.99, "step": 4070 }, { - "epoch": 0.11690789379312323, - "grad_norm": 0.4140625, - "learning_rate": 0.00019982608166244003, - "loss": 1.0014, + "epoch": 0.23380572608870273, + "grad_norm": 0.279296875, + "learning_rate": 0.00018928990502140963, + "loss": 0.9034, "step": 4075 }, { - "epoch": 0.11705133906158105, - "grad_norm": 0.390625, - "learning_rate": 0.00019982311723080643, - "loss": 0.9418, + "epoch": 0.2340926042802226, + "grad_norm": 0.25390625, + "learning_rate": 0.00018924477152356233, + "loss": 1.0379, "step": 4080 }, { - "epoch": 0.11719478433003887, - "grad_norm": 0.36328125, - "learning_rate": 0.00019982012777026864, - "loss": 0.9753, + "epoch": 0.2343794824717425, + "grad_norm": 0.255859375, + "learning_rate": 0.00018919954853085803, + "loss": 0.948, "step": 4085 }, { - "epoch": 0.1173382295984967, - "grad_norm": 0.34375, - "learning_rate": 0.00019981711328157626, - "loss": 0.7955, + "epoch": 0.23466636066326238, + "grad_norm": 0.28515625, + "learning_rate": 0.0001891542360886464, + "loss": 1.0377, "step": 4090 }, { - "epoch": 0.11748167486695452, - "grad_norm": 0.34375, - "learning_rate": 0.0001998140737654851, - "loss": 0.9926, + "epoch": 0.23495323885478225, + "grad_norm": 0.255859375, + "learning_rate": 0.00018910883424236695, + "loss": 0.9948, "step": 4095 }, { - "epoch": 0.11762512013541233, - "grad_norm": 0.388671875, - "learning_rate": 0.00019981100922275728, - "loss": 0.9933, + "epoch": 0.23524011704630213, + "grad_norm": 0.291015625, + "learning_rate": 0.0001890633430375487, + "loss": 0.9823, "step": 4100 }, { - "epoch": 0.11776856540387015, - "grad_norm": 0.3828125, - "learning_rate": 0.00019980791965416118, - "loss": 1.1364, + "epoch": 0.23552699523782203, + "grad_norm": 0.279296875, + "learning_rate": 0.00018901776251981032, + "loss": 0.9425, "step": 4105 }, { - "epoch": 0.11791201067232797, - "grad_norm": 0.359375, - "learning_rate": 0.00019980480506047143, - "loss": 0.9771, + "epoch": 0.2358138734293419, + "grad_norm": 0.2578125, + "learning_rate": 0.0001889720927348601, + "loss": 0.9568, "step": 4110 }, { - "epoch": 0.1180554559407858, - "grad_norm": 0.361328125, - "learning_rate": 0.000199801665442469, - "loss": 0.9266, + "epoch": 0.23610075162086178, + "grad_norm": 0.265625, + "learning_rate": 0.00018892633372849575, + "loss": 0.9233, "step": 4115 }, { - "epoch": 0.11819890120924362, - "grad_norm": 0.349609375, - "learning_rate": 0.00019979850080094108, - "loss": 1.0254, + "epoch": 0.23638762981238165, + "grad_norm": 0.267578125, + "learning_rate": 0.00018888048554660454, + "loss": 0.9648, "step": 4120 }, { - "epoch": 0.11834234647770144, - "grad_norm": 0.33203125, - "learning_rate": 0.0001997953111366811, - "loss": 0.9365, + "epoch": 0.23667450800390155, + "grad_norm": 0.271484375, + "learning_rate": 0.00018883454823516313, + "loss": 0.9916, "step": 4125 }, { - "epoch": 0.11848579174615925, - "grad_norm": 0.37890625, - "learning_rate": 0.00019979209645048889, - "loss": 0.9525, + "epoch": 0.23696138619542143, + "grad_norm": 0.27734375, + "learning_rate": 0.0001887885218402375, + "loss": 1.0255, "step": 4130 }, { - "epoch": 0.11862923701461707, - "grad_norm": 0.373046875, - "learning_rate": 0.00019978885674317045, - "loss": 0.9207, + "epoch": 0.2372482643869413, + "grad_norm": 0.27734375, + "learning_rate": 0.00018874240640798316, + "loss": 0.9522, "step": 4135 }, { - "epoch": 0.11877268228307489, - "grad_norm": 0.39453125, - "learning_rate": 0.00019978559201553803, - "loss": 0.9542, + "epoch": 0.23753514257846117, + "grad_norm": 0.255859375, + "learning_rate": 0.0001886962019846446, + "loss": 0.9887, "step": 4140 }, { - "epoch": 0.11891612755153272, - "grad_norm": 0.359375, - "learning_rate": 0.00019978230226841027, - "loss": 0.9382, + "epoch": 0.23782202076998107, + "grad_norm": 0.3046875, + "learning_rate": 0.00018864990861655584, + "loss": 0.9113, "step": 4145 }, { - "epoch": 0.11905957281999054, - "grad_norm": 0.380859375, - "learning_rate": 0.000199778987502612, - "loss": 0.9977, + "epoch": 0.23810889896150095, + "grad_norm": 0.255859375, + "learning_rate": 0.0001886035263501399, + "loss": 0.9509, "step": 4150 }, { - "epoch": 0.11920301808844835, - "grad_norm": 0.37109375, - "learning_rate": 0.0001997756477189743, - "loss": 1.1, + "epoch": 0.23839577715302082, + "grad_norm": 0.279296875, + "learning_rate": 0.00018855705523190908, + "loss": 0.9792, "step": 4155 }, { - "epoch": 0.11934646335690617, - "grad_norm": 0.375, - "learning_rate": 0.00019977228291833462, - "loss": 0.9641, + "epoch": 0.2386826553445407, + "grad_norm": 0.25390625, + "learning_rate": 0.0001885104953084647, + "loss": 0.9923, "step": 4160 }, { - "epoch": 0.11948990862536399, - "grad_norm": 0.3359375, - "learning_rate": 0.0001997688931015366, - "loss": 0.9183, + "epoch": 0.2389695335360606, + "grad_norm": 0.28125, + "learning_rate": 0.00018846384662649714, + "loss": 0.9719, "step": 4165 }, { - "epoch": 0.1196333538938218, - "grad_norm": 0.365234375, - "learning_rate": 0.00019976547826943019, - "loss": 0.9094, + "epoch": 0.23925641172758047, + "grad_norm": 0.2734375, + "learning_rate": 0.00018841710923278582, + "loss": 0.9501, "step": 4170 }, { - "epoch": 0.11977679916227964, - "grad_norm": 0.3671875, - "learning_rate": 0.00019976203842287155, - "loss": 0.9856, + "epoch": 0.23954328991910034, + "grad_norm": 0.2578125, + "learning_rate": 0.00018837028317419908, + "loss": 0.9205, "step": 4175 }, { - "epoch": 0.11992024443073745, - "grad_norm": 0.37890625, - "learning_rate": 0.00019975857356272322, - "loss": 0.8588, + "epoch": 0.23983016811062022, + "grad_norm": 0.25390625, + "learning_rate": 0.00018832336849769425, + "loss": 0.9432, "step": 4180 }, { - "epoch": 0.12006368969919527, - "grad_norm": 0.35546875, - "learning_rate": 0.0001997550836898539, - "loss": 0.8297, + "epoch": 0.24011704630214012, + "grad_norm": 0.291015625, + "learning_rate": 0.0001882763652503174, + "loss": 1.0014, "step": 4185 }, { - "epoch": 0.12020713496765309, - "grad_norm": 0.376953125, - "learning_rate": 0.00019975156880513867, - "loss": 1.0082, + "epoch": 0.24040392449366, + "grad_norm": 0.275390625, + "learning_rate": 0.00018822927347920355, + "loss": 0.9548, "step": 4190 }, { - "epoch": 0.12035058023611091, - "grad_norm": 0.365234375, - "learning_rate": 0.00019974802890945878, - "loss": 0.8961, + "epoch": 0.24069080268517987, + "grad_norm": 0.255859375, + "learning_rate": 0.00018818209323157638, + "loss": 1.0308, "step": 4195 }, { - "epoch": 0.12049402550456874, - "grad_norm": 0.3671875, - "learning_rate": 0.00019974446400370185, - "loss": 0.9665, + "epoch": 0.24097768087669974, + "grad_norm": 0.314453125, + "learning_rate": 0.0001881348245547484, + "loss": 1.0047, "step": 4200 }, { - "epoch": 0.12063747077302656, - "grad_norm": 0.3359375, - "learning_rate": 0.00019974087408876167, - "loss": 0.8746, + "epoch": 0.24126455906821964, + "grad_norm": 0.291015625, + "learning_rate": 0.00018808746749612073, + "loss": 0.919, "step": 4205 }, { - "epoch": 0.12078091604148437, - "grad_norm": 0.353515625, - "learning_rate": 0.00019973725916553838, - "loss": 0.9513, + "epoch": 0.24155143725973952, + "grad_norm": 0.263671875, + "learning_rate": 0.0001880400221031831, + "loss": 0.9214, "step": 4210 }, { - "epoch": 0.12092436130994219, - "grad_norm": 0.318359375, - "learning_rate": 0.00019973361923493832, - "loss": 0.8829, + "epoch": 0.2418383154512594, + "grad_norm": 0.26953125, + "learning_rate": 0.00018799248842351393, + "loss": 0.9631, "step": 4215 }, { - "epoch": 0.12106780657840001, - "grad_norm": 0.412109375, - "learning_rate": 0.00019972995429787415, - "loss": 1.0887, + "epoch": 0.24212519364277926, + "grad_norm": 0.2578125, + "learning_rate": 0.00018794486650478004, + "loss": 0.9936, "step": 4220 }, { - "epoch": 0.12121125184685783, - "grad_norm": 0.36328125, - "learning_rate": 0.00019972626435526482, - "loss": 0.9436, + "epoch": 0.24241207183429916, + "grad_norm": 0.26171875, + "learning_rate": 0.0001878971563947368, + "loss": 0.9386, "step": 4225 }, { - "epoch": 0.12135469711531566, - "grad_norm": 0.3671875, - "learning_rate": 0.00019972254940803553, - "loss": 0.9202, + "epoch": 0.24269895002581904, + "grad_norm": 0.28515625, + "learning_rate": 0.00018784935814122804, + "loss": 1.0508, "step": 4230 }, { - "epoch": 0.12149814238377347, - "grad_norm": 0.361328125, - "learning_rate": 0.00019971880945711767, - "loss": 1.0407, + "epoch": 0.2429858282173389, + "grad_norm": 0.251953125, + "learning_rate": 0.0001878014717921859, + "loss": 0.8909, "step": 4235 }, { - "epoch": 0.12164158765223129, - "grad_norm": 0.361328125, - "learning_rate": 0.00019971504450344902, - "loss": 0.9714, + "epoch": 0.24327270640885879, + "grad_norm": 0.271484375, + "learning_rate": 0.000187753497395631, + "loss": 0.9797, "step": 4240 }, { - "epoch": 0.12178503292068911, - "grad_norm": 0.337890625, - "learning_rate": 0.00019971125454797356, - "loss": 0.958, + "epoch": 0.2435595846003787, + "grad_norm": 0.287109375, + "learning_rate": 0.00018770543499967217, + "loss": 0.9766, "step": 4245 }, { - "epoch": 0.12192847818914693, - "grad_norm": 0.345703125, - "learning_rate": 0.00019970743959164155, - "loss": 0.9713, + "epoch": 0.24384646279189856, + "grad_norm": 0.275390625, + "learning_rate": 0.00018765728465250644, + "loss": 1.0364, "step": 4250 }, { - "epoch": 0.12207192345760474, - "grad_norm": 0.427734375, - "learning_rate": 0.00019970359963540952, - "loss": 0.9339, + "epoch": 0.24413334098341843, + "grad_norm": 0.26171875, + "learning_rate": 0.00018760904640241906, + "loss": 0.9276, "step": 4255 }, { - "epoch": 0.12221536872606258, - "grad_norm": 0.365234375, - "learning_rate": 0.00019969973468024032, - "loss": 1.0173, + "epoch": 0.2444202191749383, + "grad_norm": 0.275390625, + "learning_rate": 0.00018756072029778352, + "loss": 0.9165, "step": 4260 }, { - "epoch": 0.1223588139945204, - "grad_norm": 0.3671875, - "learning_rate": 0.00019969584472710294, - "loss": 0.941, + "epoch": 0.2447070973664582, + "grad_norm": 0.26953125, + "learning_rate": 0.00018751230638706131, + "loss": 0.9561, "step": 4265 }, { - "epoch": 0.12250225926297821, - "grad_norm": 0.37109375, - "learning_rate": 0.0001996919297769728, - "loss": 1.0209, + "epoch": 0.24499397555797808, + "grad_norm": 0.2734375, + "learning_rate": 0.00018746380471880203, + "loss": 0.9377, "step": 4270 }, { - "epoch": 0.12264570453143603, - "grad_norm": 0.3828125, - "learning_rate": 0.00019968798983083144, - "loss": 0.8795, + "epoch": 0.24528085374949796, + "grad_norm": 0.275390625, + "learning_rate": 0.00018741521534164325, + "loss": 0.9992, "step": 4275 }, { - "epoch": 0.12278914979989385, - "grad_norm": 0.353515625, - "learning_rate": 0.00019968402488966677, - "loss": 0.9391, + "epoch": 0.24556773194101783, + "grad_norm": 0.275390625, + "learning_rate": 0.00018736653830431048, + "loss": 1.0328, "step": 4280 }, { - "epoch": 0.12293259506835168, - "grad_norm": 0.35546875, - "learning_rate": 0.00019968003495447292, - "loss": 0.8924, + "epoch": 0.24585461013253773, + "grad_norm": 0.2578125, + "learning_rate": 0.0001873177736556172, + "loss": 0.9225, "step": 4285 }, { - "epoch": 0.1230760403368095, - "grad_norm": 0.353515625, - "learning_rate": 0.00019967602002625027, - "loss": 0.9458, + "epoch": 0.2461414883240576, + "grad_norm": 0.271484375, + "learning_rate": 0.00018726892144446474, + "loss": 0.9061, "step": 4290 }, { - "epoch": 0.12321948560526731, - "grad_norm": 0.3515625, - "learning_rate": 0.00019967198010600553, - "loss": 0.8851, + "epoch": 0.24642836651557748, + "grad_norm": 0.271484375, + "learning_rate": 0.0001872199817198421, + "loss": 0.9639, "step": 4295 }, { - "epoch": 0.12336293087372513, - "grad_norm": 0.384765625, - "learning_rate": 0.00019966791519475169, - "loss": 1.1293, + "epoch": 0.24671524470709735, + "grad_norm": 0.2734375, + "learning_rate": 0.00018717095453082627, + "loss": 0.9764, "step": 4300 }, { - "epoch": 0.12350637614218295, - "grad_norm": 0.33984375, - "learning_rate": 0.0001996638252935078, - "loss": 0.979, + "epoch": 0.24700212289861725, + "grad_norm": 0.265625, + "learning_rate": 0.00018712183992658174, + "loss": 0.9006, "step": 4305 }, { - "epoch": 0.12364982141064076, - "grad_norm": 0.365234375, - "learning_rate": 0.00019965971040329948, - "loss": 0.947, + "epoch": 0.24728900109013713, + "grad_norm": 0.27734375, + "learning_rate": 0.00018707263795636077, + "loss": 0.9212, "step": 4310 }, { - "epoch": 0.1237932666790986, - "grad_norm": 0.37109375, - "learning_rate": 0.0001996555705251584, - "loss": 0.918, + "epoch": 0.247575879281657, + "grad_norm": 0.267578125, + "learning_rate": 0.0001870233486695032, + "loss": 0.8946, "step": 4315 }, { - "epoch": 0.12393671194755641, - "grad_norm": 0.369140625, - "learning_rate": 0.00019965140566012253, - "loss": 0.8982, + "epoch": 0.24786275747317688, + "grad_norm": 0.28125, + "learning_rate": 0.0001869739721154365, + "loss": 1.0377, "step": 4320 }, { - "epoch": 0.12408015721601423, - "grad_norm": 0.359375, - "learning_rate": 0.00019964721580923627, - "loss": 0.989, + "epoch": 0.24814963566469678, + "grad_norm": 0.29296875, + "learning_rate": 0.00018692450834367546, + "loss": 1.0284, "step": 4325 }, { - "epoch": 0.12422360248447205, - "grad_norm": 0.404296875, - "learning_rate": 0.00019964300097355, - "loss": 0.9421, + "epoch": 0.24843651385621665, + "grad_norm": 0.283203125, + "learning_rate": 0.00018687495740382258, + "loss": 0.9688, "step": 4330 }, { - "epoch": 0.12436704775292987, - "grad_norm": 0.3671875, - "learning_rate": 0.00019963876115412058, - "loss": 0.9791, + "epoch": 0.24872339204773652, + "grad_norm": 0.263671875, + "learning_rate": 0.00018682531934556757, + "loss": 0.989, "step": 4335 }, { - "epoch": 0.12451049302138768, - "grad_norm": 0.375, - "learning_rate": 0.0001996344963520111, - "loss": 1.0274, + "epoch": 0.2490102702392564, + "grad_norm": 0.265625, + "learning_rate": 0.00018677559421868766, + "loss": 0.9916, "step": 4340 }, { - "epoch": 0.12465393828984551, - "grad_norm": 0.357421875, - "learning_rate": 0.00019963020656829083, - "loss": 0.9797, + "epoch": 0.2492971484307763, + "grad_norm": 0.2890625, + "learning_rate": 0.00018672578207304727, + "loss": 0.9724, "step": 4345 }, { - "epoch": 0.12479738355830333, - "grad_norm": 0.357421875, - "learning_rate": 0.00019962589180403542, - "loss": 1.0272, + "epoch": 0.24958402662229617, + "grad_norm": 0.2734375, + "learning_rate": 0.00018667588295859816, + "loss": 0.9535, "step": 4350 }, { - "epoch": 0.12494082882676115, - "grad_norm": 0.37890625, - "learning_rate": 0.00019962155206032663, - "loss": 0.95, + "epoch": 0.24987090481381605, + "grad_norm": 0.275390625, + "learning_rate": 0.0001866258969253792, + "loss": 1.0108, "step": 4355 }, { - "epoch": 0.12508427409521897, - "grad_norm": 0.380859375, - "learning_rate": 0.00019961718733825268, - "loss": 0.9851, + "epoch": 0.2501577830053359, + "grad_norm": 0.2734375, + "learning_rate": 0.00018657582402351663, + "loss": 1.0017, "step": 4360 }, { - "epoch": 0.1252277193636768, - "grad_norm": 0.3828125, - "learning_rate": 0.00019961279763890786, - "loss": 0.8903, + "epoch": 0.2504446611968558, + "grad_norm": 0.2734375, + "learning_rate": 0.00018652566430322356, + "loss": 1.0202, "step": 4365 }, { - "epoch": 0.1253711646321346, - "grad_norm": 0.33203125, - "learning_rate": 0.00019960838296339287, - "loss": 0.9439, + "epoch": 0.25073153938837567, + "grad_norm": 0.27734375, + "learning_rate": 0.0001864754178148003, + "loss": 0.9738, "step": 4370 }, { - "epoch": 0.12551460990059243, - "grad_norm": 0.3984375, - "learning_rate": 0.00019960394331281457, - "loss": 0.9976, + "epoch": 0.25101841757989557, + "grad_norm": 0.26171875, + "learning_rate": 0.00018642508460863416, + "loss": 0.9779, "step": 4375 }, { - "epoch": 0.12565805516905024, - "grad_norm": 0.376953125, - "learning_rate": 0.00019959947868828618, - "loss": 1.0236, + "epoch": 0.25130529577141547, + "grad_norm": 0.27734375, + "learning_rate": 0.00018637466473519937, + "loss": 0.966, "step": 4380 }, { - "epoch": 0.12580150043750807, - "grad_norm": 0.376953125, - "learning_rate": 0.00019959498909092707, - "loss": 0.9622, + "epoch": 0.2515921739629353, + "grad_norm": 0.3203125, + "learning_rate": 0.0001863241582450571, + "loss": 1.0283, "step": 4385 }, { - "epoch": 0.1259449457059659, - "grad_norm": 0.353515625, - "learning_rate": 0.00019959047452186294, - "loss": 1.0756, + "epoch": 0.2518790521544552, + "grad_norm": 0.259765625, + "learning_rate": 0.00018627356518885536, + "loss": 0.9777, "step": 4390 }, { - "epoch": 0.1260883909744237, - "grad_norm": 0.376953125, - "learning_rate": 0.00019958593498222576, - "loss": 0.9165, + "epoch": 0.2521659303459751, + "grad_norm": 0.27734375, + "learning_rate": 0.000186222885617329, + "loss": 0.9649, "step": 4395 }, { - "epoch": 0.12623183624288153, - "grad_norm": 0.369140625, - "learning_rate": 0.00019958137047315375, - "loss": 0.9616, + "epoch": 0.25245280853749497, + "grad_norm": 0.271484375, + "learning_rate": 0.00018617211958129958, + "loss": 0.8768, "step": 4400 }, { - "epoch": 0.12637528151133934, - "grad_norm": 0.3359375, - "learning_rate": 0.00019957678099579137, - "loss": 0.9871, + "epoch": 0.25273968672901487, + "grad_norm": 0.265625, + "learning_rate": 0.00018612126713167542, + "loss": 1.0189, "step": 4405 }, { - "epoch": 0.12651872677979717, - "grad_norm": 0.38671875, - "learning_rate": 0.0001995721665512893, - "loss": 0.9971, + "epoch": 0.2530265649205347, + "grad_norm": 0.2578125, + "learning_rate": 0.0001860703283194515, + "loss": 0.9404, "step": 4410 }, { - "epoch": 0.126662172048255, - "grad_norm": 0.36328125, - "learning_rate": 0.00019956752714080458, - "loss": 0.9674, + "epoch": 0.2533134431120546, + "grad_norm": 0.275390625, + "learning_rate": 0.00018601930319570934, + "loss": 0.9849, "step": 4415 }, { - "epoch": 0.1268056173167128, - "grad_norm": 0.3359375, - "learning_rate": 0.00019956286276550048, - "loss": 0.9415, + "epoch": 0.2536003213035745, + "grad_norm": 0.2734375, + "learning_rate": 0.000185968191811617, + "loss": 0.9844, "step": 4420 }, { - "epoch": 0.12694906258517064, - "grad_norm": 0.396484375, - "learning_rate": 0.00019955817342654647, - "loss": 1.04, + "epoch": 0.25388719949509436, + "grad_norm": 0.259765625, + "learning_rate": 0.00018591699421842916, + "loss": 0.8693, "step": 4425 }, { - "epoch": 0.12709250785362844, - "grad_norm": 0.375, - "learning_rate": 0.00019955345912511837, - "loss": 0.9284, + "epoch": 0.25417407768661426, + "grad_norm": 0.283203125, + "learning_rate": 0.00018586571046748685, + "loss": 1.0033, "step": 4430 }, { - "epoch": 0.12723595312208627, - "grad_norm": 0.34375, - "learning_rate": 0.00019954871986239812, - "loss": 0.9391, + "epoch": 0.25446095587813417, + "grad_norm": 0.263671875, + "learning_rate": 0.00018581434061021754, + "loss": 0.9435, "step": 4435 }, { - "epoch": 0.12737939839054407, - "grad_norm": 0.35546875, - "learning_rate": 0.0001995439556395741, - "loss": 0.8645, + "epoch": 0.254747834069654, + "grad_norm": 0.275390625, + "learning_rate": 0.00018576288469813505, + "loss": 1.019, "step": 4440 }, { - "epoch": 0.1275228436590019, - "grad_norm": 0.361328125, - "learning_rate": 0.0001995391664578408, - "loss": 1.0845, + "epoch": 0.2550347122611739, + "grad_norm": 0.25390625, + "learning_rate": 0.00018571134278283946, + "loss": 1.0081, "step": 4445 }, { - "epoch": 0.12766628892745974, - "grad_norm": 0.375, - "learning_rate": 0.00019953435231839906, - "loss": 0.9377, + "epoch": 0.25532159045269376, + "grad_norm": 0.279296875, + "learning_rate": 0.0001856597149160171, + "loss": 0.9636, "step": 4450 }, { - "epoch": 0.12780973419591754, - "grad_norm": 0.35546875, - "learning_rate": 0.0001995295132224559, - "loss": 0.9855, + "epoch": 0.25560846864421366, + "grad_norm": 0.255859375, + "learning_rate": 0.00018560800114944063, + "loss": 1.0263, "step": 4455 }, { - "epoch": 0.12795317946437537, - "grad_norm": 0.359375, - "learning_rate": 0.0001995246491712247, - "loss": 0.8318, + "epoch": 0.25589534683573356, + "grad_norm": 0.248046875, + "learning_rate": 0.00018555620153496856, + "loss": 0.9449, "step": 4460 }, { - "epoch": 0.12809662473283318, - "grad_norm": 0.369140625, - "learning_rate": 0.00019951976016592497, - "loss": 1.0727, + "epoch": 0.2561822250272534, + "grad_norm": 0.26953125, + "learning_rate": 0.00018550431612454578, + "loss": 1.0175, "step": 4465 }, { - "epoch": 0.128240070001291, - "grad_norm": 0.357421875, - "learning_rate": 0.00019951484620778258, - "loss": 0.9268, + "epoch": 0.2564691032187733, + "grad_norm": 0.263671875, + "learning_rate": 0.00018545234497020302, + "loss": 0.9296, "step": 4470 }, { - "epoch": 0.12838351526974884, - "grad_norm": 0.58984375, - "learning_rate": 0.0001995099072980296, - "loss": 0.9919, + "epoch": 0.2567559814102932, + "grad_norm": 0.25, + "learning_rate": 0.00018540028812405717, + "loss": 0.9648, "step": 4475 }, { - "epoch": 0.12852696053820664, - "grad_norm": 0.37109375, - "learning_rate": 0.00019950494343790438, - "loss": 0.9283, + "epoch": 0.25704285960181306, + "grad_norm": 0.26171875, + "learning_rate": 0.00018534814563831082, + "loss": 0.9098, "step": 4480 }, { - "epoch": 0.12867040580666447, - "grad_norm": 0.388671875, - "learning_rate": 0.00019949995462865155, - "loss": 0.8858, + "epoch": 0.25732973779333296, + "grad_norm": 0.287109375, + "learning_rate": 0.00018529591756525268, + "loss": 1.0392, "step": 4485 }, { - "epoch": 0.12881385107512228, - "grad_norm": 0.37109375, - "learning_rate": 0.00019949494087152193, - "loss": 0.9261, + "epoch": 0.25761661598485286, + "grad_norm": 0.3125, + "learning_rate": 0.00018524360395725715, + "loss": 0.9497, "step": 4490 }, { - "epoch": 0.1289572963435801, - "grad_norm": 0.373046875, - "learning_rate": 0.00019948990216777263, - "loss": 1.0501, + "epoch": 0.2579034941763727, + "grad_norm": 0.255859375, + "learning_rate": 0.0001851912048667844, + "loss": 0.9545, "step": 4495 }, { - "epoch": 0.12910074161203794, - "grad_norm": 0.36328125, - "learning_rate": 0.00019948483851866703, - "loss": 1.0213, + "epoch": 0.2581903723678926, + "grad_norm": 0.25390625, + "learning_rate": 0.00018513872034638037, + "loss": 0.9736, "step": 4500 }, { - "epoch": 0.12924418688049574, - "grad_norm": 0.359375, - "learning_rate": 0.0001994797499254748, - "loss": 0.9168, + "epoch": 0.25847725055941245, + "grad_norm": 0.265625, + "learning_rate": 0.00018508615044867668, + "loss": 0.9045, "step": 4505 }, { - "epoch": 0.12938763214895357, - "grad_norm": 0.359375, - "learning_rate": 0.0001994746363894717, - "loss": 0.9865, + "epoch": 0.25876412875093235, + "grad_norm": 0.259765625, + "learning_rate": 0.00018503349522639052, + "loss": 0.9952, "step": 4510 }, { - "epoch": 0.12953107741741138, - "grad_norm": 0.392578125, - "learning_rate": 0.00019946949791194, - "loss": 0.8407, + "epoch": 0.25905100694245226, + "grad_norm": 0.287109375, + "learning_rate": 0.00018498075473232469, + "loss": 0.9679, "step": 4515 }, { - "epoch": 0.1296745226858692, - "grad_norm": 0.38671875, - "learning_rate": 0.000199464334494168, - "loss": 0.9362, + "epoch": 0.2593378851339721, + "grad_norm": 0.255859375, + "learning_rate": 0.00018492792901936742, + "loss": 0.994, "step": 4520 }, { - "epoch": 0.12981796795432704, - "grad_norm": 0.376953125, - "learning_rate": 0.00019945914613745034, - "loss": 1.1097, + "epoch": 0.259624763325492, + "grad_norm": 0.267578125, + "learning_rate": 0.0001848750181404925, + "loss": 0.9577, "step": 4525 }, { - "epoch": 0.12996141322278484, - "grad_norm": 0.376953125, - "learning_rate": 0.00019945393284308792, - "loss": 1.0537, + "epoch": 0.2599116415170119, + "grad_norm": 0.267578125, + "learning_rate": 0.00018482202214875908, + "loss": 0.981, "step": 4530 }, { - "epoch": 0.13010485849124268, - "grad_norm": 0.357421875, - "learning_rate": 0.00019944869461238786, - "loss": 0.9299, + "epoch": 0.26019851970853175, + "grad_norm": 0.283203125, + "learning_rate": 0.00018476894109731166, + "loss": 1.0434, "step": 4535 }, { - "epoch": 0.13024830375970048, - "grad_norm": 0.37109375, - "learning_rate": 0.0001994434314466636, - "loss": 0.9353, + "epoch": 0.26048539790005165, + "grad_norm": 0.263671875, + "learning_rate": 0.00018471577503938, + "loss": 0.9877, "step": 4540 }, { - "epoch": 0.1303917490281583, - "grad_norm": 0.39453125, - "learning_rate": 0.00019943814334723477, - "loss": 0.9161, + "epoch": 0.2607722760915715, + "grad_norm": 0.265625, + "learning_rate": 0.00018466252402827915, + "loss": 1.0051, "step": 4545 }, { - "epoch": 0.1305351942966161, - "grad_norm": 0.388671875, - "learning_rate": 0.00019943283031542726, - "loss": 0.9684, + "epoch": 0.2610591542830914, + "grad_norm": 0.283203125, + "learning_rate": 0.00018460918811740937, + "loss": 0.9629, "step": 4550 }, { - "epoch": 0.13067863956507395, - "grad_norm": 0.40625, - "learning_rate": 0.00019942749235257322, - "loss": 1.081, + "epoch": 0.2613460324746113, + "grad_norm": 0.279296875, + "learning_rate": 0.00018455576736025602, + "loss": 0.9279, "step": 4555 }, { - "epoch": 0.13082208483353178, - "grad_norm": 0.392578125, - "learning_rate": 0.00019942212946001103, - "loss": 0.9961, + "epoch": 0.26163291066613115, + "grad_norm": 0.26953125, + "learning_rate": 0.00018450226181038955, + "loss": 0.9259, "step": 4560 }, { - "epoch": 0.13096553010198958, - "grad_norm": 0.369140625, - "learning_rate": 0.00019941674163908538, - "loss": 1.0803, + "epoch": 0.26191978885765105, + "grad_norm": 0.2578125, + "learning_rate": 0.00018444867152146545, + "loss": 0.9689, "step": 4565 }, { - "epoch": 0.1311089753704474, - "grad_norm": 0.353515625, - "learning_rate": 0.00019941132889114718, - "loss": 0.8518, + "epoch": 0.26220666704917095, + "grad_norm": 0.2734375, + "learning_rate": 0.00018439499654722421, + "loss": 0.9728, "step": 4570 }, { - "epoch": 0.13125242063890522, - "grad_norm": 0.369140625, - "learning_rate": 0.00019940589121755355, - "loss": 0.9764, + "epoch": 0.2624935452406908, + "grad_norm": 0.279296875, + "learning_rate": 0.00018434123694149117, + "loss": 0.9916, "step": 4575 }, { - "epoch": 0.13139586590736305, - "grad_norm": 0.359375, - "learning_rate": 0.0001994004286196679, - "loss": 0.9812, + "epoch": 0.2627804234322107, + "grad_norm": 0.259765625, + "learning_rate": 0.0001842873927581766, + "loss": 1.033, "step": 4580 }, { - "epoch": 0.13153931117582088, - "grad_norm": 0.376953125, - "learning_rate": 0.0001993949410988599, - "loss": 0.8849, + "epoch": 0.26306730162373054, + "grad_norm": 0.275390625, + "learning_rate": 0.00018423346405127555, + "loss": 0.9902, "step": 4585 }, { - "epoch": 0.13168275644427868, - "grad_norm": 0.36328125, - "learning_rate": 0.0001993894286565054, - "loss": 0.9004, + "epoch": 0.26335417981525044, + "grad_norm": 0.291015625, + "learning_rate": 0.0001841794508748679, + "loss": 0.9779, "step": 4590 }, { - "epoch": 0.1318262017127365, - "grad_norm": 0.40234375, - "learning_rate": 0.0001993838912939866, - "loss": 0.9964, + "epoch": 0.26364105800677035, + "grad_norm": 0.275390625, + "learning_rate": 0.00018412535328311814, + "loss": 1.0043, "step": 4595 }, { - "epoch": 0.13196964698119432, - "grad_norm": 0.419921875, - "learning_rate": 0.00019937832901269187, - "loss": 1.0243, + "epoch": 0.2639279361982902, + "grad_norm": 0.240234375, + "learning_rate": 0.00018407117133027544, + "loss": 0.9412, "step": 4600 }, { - "epoch": 0.13211309224965215, - "grad_norm": 0.369140625, - "learning_rate": 0.00019937274181401588, - "loss": 1.0069, + "epoch": 0.2642148143898101, + "grad_norm": 0.267578125, + "learning_rate": 0.0001840169050706736, + "loss": 0.9353, "step": 4605 }, { - "epoch": 0.13225653751810998, - "grad_norm": 0.380859375, - "learning_rate": 0.00019936712969935948, - "loss": 0.9763, + "epoch": 0.26450169258133, + "grad_norm": 0.267578125, + "learning_rate": 0.00018396255455873102, + "loss": 0.962, "step": 4610 }, { - "epoch": 0.13239998278656778, - "grad_norm": 0.380859375, - "learning_rate": 0.00019936149267012985, - "loss": 1.0147, + "epoch": 0.26478857077284984, + "grad_norm": 0.26953125, + "learning_rate": 0.0001839081198489504, + "loss": 1.0103, "step": 4615 }, { - "epoch": 0.1325434280550256, - "grad_norm": 0.404296875, - "learning_rate": 0.00019935583072774039, - "loss": 1.0474, + "epoch": 0.26507544896436974, + "grad_norm": 0.275390625, + "learning_rate": 0.0001838536009959191, + "loss": 1.0115, "step": 4620 }, { - "epoch": 0.13268687332348342, - "grad_norm": 0.3515625, - "learning_rate": 0.00019935014387361066, - "loss": 1.0179, + "epoch": 0.2653623271558896, + "grad_norm": 0.2578125, + "learning_rate": 0.00018379899805430862, + "loss": 0.9928, "step": 4625 }, { - "epoch": 0.13283031859194125, - "grad_norm": 0.40234375, - "learning_rate": 0.0001993444321091666, - "loss": 1.0366, + "epoch": 0.2656492053474095, + "grad_norm": 0.283203125, + "learning_rate": 0.00018374431107887502, + "loss": 0.9347, "step": 4630 }, { - "epoch": 0.13297376386039905, - "grad_norm": 0.388671875, - "learning_rate": 0.00019933869543584034, - "loss": 0.9472, + "epoch": 0.2659360835389294, + "grad_norm": 0.291015625, + "learning_rate": 0.00018368954012445846, + "loss": 0.9674, "step": 4635 }, { - "epoch": 0.13311720912885688, - "grad_norm": 0.36328125, - "learning_rate": 0.0001993329338550702, - "loss": 0.9302, + "epoch": 0.26622296173044924, + "grad_norm": 0.267578125, + "learning_rate": 0.00018363468524598342, + "loss": 0.918, "step": 4640 }, { - "epoch": 0.13326065439731472, - "grad_norm": 0.35546875, - "learning_rate": 0.00019932714736830084, - "loss": 0.9195, + "epoch": 0.26650983992196914, + "grad_norm": 0.298828125, + "learning_rate": 0.0001835797464984585, + "loss": 0.9558, "step": 4645 }, { - "epoch": 0.13340409966577252, - "grad_norm": 0.408203125, - "learning_rate": 0.00019932133597698313, - "loss": 0.9712, + "epoch": 0.26679671811348904, + "grad_norm": 0.259765625, + "learning_rate": 0.00018352472393697632, + "loss": 0.9257, "step": 4650 }, { - "epoch": 0.13354754493423035, - "grad_norm": 0.3671875, - "learning_rate": 0.00019931549968257414, - "loss": 0.9656, + "epoch": 0.2670835963050089, + "grad_norm": 0.28125, + "learning_rate": 0.0001834696176167137, + "loss": 0.9475, "step": 4655 }, { - "epoch": 0.13369099020268815, - "grad_norm": 0.365234375, - "learning_rate": 0.0001993096384865372, - "loss": 0.9229, + "epoch": 0.2673704744965288, + "grad_norm": 0.265625, + "learning_rate": 0.00018341442759293137, + "loss": 0.9681, "step": 4660 }, { - "epoch": 0.13383443547114598, - "grad_norm": 0.3828125, - "learning_rate": 0.000199303752390342, - "loss": 1.002, + "epoch": 0.26765735268804863, + "grad_norm": 0.28125, + "learning_rate": 0.00018335915392097402, + "loss": 0.9579, "step": 4665 }, { - "epoch": 0.13397788073960382, - "grad_norm": 0.369140625, - "learning_rate": 0.00019929784139546426, - "loss": 0.9495, + "epoch": 0.26794423087956853, + "grad_norm": 0.279296875, + "learning_rate": 0.00018330379665627014, + "loss": 0.9203, "step": 4670 }, { - "epoch": 0.13412132600806162, - "grad_norm": 0.38671875, - "learning_rate": 0.0001992919055033861, - "loss": 1.0575, + "epoch": 0.26823110907108844, + "grad_norm": 0.279296875, + "learning_rate": 0.00018324835585433225, + "loss": 0.927, "step": 4675 }, { - "epoch": 0.13426477127651945, - "grad_norm": 0.46875, - "learning_rate": 0.00019928594471559587, - "loss": 1.037, + "epoch": 0.2685179872626083, + "grad_norm": 0.267578125, + "learning_rate": 0.00018319283157075636, + "loss": 1.0078, "step": 4680 }, { - "epoch": 0.13440821654497725, - "grad_norm": 0.361328125, - "learning_rate": 0.00019927995903358813, - "loss": 0.9606, + "epoch": 0.2688048654541282, + "grad_norm": 0.259765625, + "learning_rate": 0.00018313722386122247, + "loss": 0.9204, "step": 4685 }, { - "epoch": 0.1345516618134351, - "grad_norm": 0.369140625, - "learning_rate": 0.00019927394845886361, - "loss": 1.0714, + "epoch": 0.2690917436456481, + "grad_norm": 0.28515625, + "learning_rate": 0.00018308153278149406, + "loss": 0.9469, "step": 4690 }, { - "epoch": 0.13469510708189292, - "grad_norm": 0.41015625, - "learning_rate": 0.00019926791299292948, - "loss": 1.0026, + "epoch": 0.26937862183716793, + "grad_norm": 0.2734375, + "learning_rate": 0.00018302575838741826, + "loss": 1.0207, "step": 4695 }, { - "epoch": 0.13483855235035072, - "grad_norm": 0.39453125, - "learning_rate": 0.00019926185263729896, - "loss": 0.9285, + "epoch": 0.26966550002868783, + "grad_norm": 0.27734375, + "learning_rate": 0.0001829699007349258, + "loss": 0.9881, "step": 4700 }, { - "epoch": 0.13498199761880855, - "grad_norm": 0.33984375, - "learning_rate": 0.00019925576739349151, - "loss": 0.8224, + "epoch": 0.2699523782202077, + "grad_norm": 0.26953125, + "learning_rate": 0.0001829139598800308, + "loss": 0.8757, "step": 4705 }, { - "epoch": 0.13512544288726636, - "grad_norm": 0.34375, - "learning_rate": 0.00019924965726303304, - "loss": 0.9984, + "epoch": 0.2702392564117276, + "grad_norm": 0.271484375, + "learning_rate": 0.00018285793587883092, + "loss": 1.0423, "step": 4710 }, { - "epoch": 0.1352688881557242, - "grad_norm": 0.404296875, - "learning_rate": 0.00019924352224745548, - "loss": 0.8603, + "epoch": 0.2705261346032475, + "grad_norm": 0.255859375, + "learning_rate": 0.00018280182878750717, + "loss": 1.0039, "step": 4715 }, { - "epoch": 0.135412333424182, - "grad_norm": 0.384765625, - "learning_rate": 0.0001992373623482971, - "loss": 0.9693, + "epoch": 0.2708130127947673, + "grad_norm": 0.2734375, + "learning_rate": 0.0001827456386623238, + "loss": 0.9478, "step": 4720 }, { - "epoch": 0.13555577869263982, - "grad_norm": 0.357421875, - "learning_rate": 0.00019923117756710232, - "loss": 0.9594, + "epoch": 0.27109989098628723, + "grad_norm": 0.271484375, + "learning_rate": 0.00018268936555962845, + "loss": 1.0252, "step": 4725 }, { - "epoch": 0.13569922396109765, - "grad_norm": 0.37890625, - "learning_rate": 0.00019922496790542195, - "loss": 0.9637, + "epoch": 0.27138676917780713, + "grad_norm": 0.263671875, + "learning_rate": 0.0001826330095358519, + "loss": 0.9751, "step": 4730 }, { - "epoch": 0.13584266922955546, - "grad_norm": 0.353515625, - "learning_rate": 0.00019921873336481292, - "loss": 0.9087, + "epoch": 0.271673647369327, + "grad_norm": 0.3046875, + "learning_rate": 0.00018257657064750808, + "loss": 0.9103, "step": 4735 }, { - "epoch": 0.1359861144980133, - "grad_norm": 0.40234375, - "learning_rate": 0.00019921247394683844, - "loss": 0.992, + "epoch": 0.2719605255608469, + "grad_norm": 0.263671875, + "learning_rate": 0.00018252004895119404, + "loss": 0.9596, "step": 4740 }, { - "epoch": 0.1361295597664711, - "grad_norm": 0.390625, - "learning_rate": 0.00019920618965306797, - "loss": 1.1117, + "epoch": 0.2722474037523667, + "grad_norm": 0.26953125, + "learning_rate": 0.00018246344450358986, + "loss": 0.9718, "step": 4745 }, { - "epoch": 0.13627300503492892, - "grad_norm": 0.349609375, - "learning_rate": 0.00019919988048507713, - "loss": 0.9593, + "epoch": 0.2725342819438866, + "grad_norm": 0.255859375, + "learning_rate": 0.00018240675736145865, + "loss": 1.0375, "step": 4750 }, { - "epoch": 0.13641645030338675, - "grad_norm": 0.345703125, - "learning_rate": 0.00019919354644444787, - "loss": 1.0375, + "epoch": 0.2728211601354065, + "grad_norm": 0.296875, + "learning_rate": 0.0001823499875816464, + "loss": 0.966, "step": 4755 }, { - "epoch": 0.13655989557184456, - "grad_norm": 0.392578125, - "learning_rate": 0.00019918718753276834, - "loss": 0.9779, + "epoch": 0.2731080383269264, + "grad_norm": 0.294921875, + "learning_rate": 0.00018229313522108187, + "loss": 1.0054, "step": 4760 }, { - "epoch": 0.1367033408403024, - "grad_norm": 0.376953125, - "learning_rate": 0.00019918080375163296, - "loss": 0.8934, + "epoch": 0.2733949165184463, + "grad_norm": 0.27734375, + "learning_rate": 0.00018223620033677685, + "loss": 0.9734, "step": 4765 }, { - "epoch": 0.1368467861087602, - "grad_norm": 0.357421875, - "learning_rate": 0.00019917439510264227, - "loss": 0.948, + "epoch": 0.2736817947099662, + "grad_norm": 0.35546875, + "learning_rate": 0.00018217918298582572, + "loss": 0.9234, "step": 4770 }, { - "epoch": 0.13699023137721802, - "grad_norm": 0.361328125, - "learning_rate": 0.0001991679615874032, - "loss": 0.9964, + "epoch": 0.273968672901486, + "grad_norm": 0.275390625, + "learning_rate": 0.0001821220832254056, + "loss": 0.9812, "step": 4775 }, { - "epoch": 0.13713367664567586, - "grad_norm": 0.373046875, - "learning_rate": 0.0001991615032075288, - "loss": 0.9892, + "epoch": 0.2742555510930059, + "grad_norm": 0.259765625, + "learning_rate": 0.00018206490111277629, + "loss": 0.9476, "step": 4780 }, { - "epoch": 0.13727712191413366, - "grad_norm": 0.38671875, - "learning_rate": 0.00019915501996463835, - "loss": 0.9841, + "epoch": 0.27454242928452577, + "grad_norm": 0.26953125, + "learning_rate": 0.00018200763670528011, + "loss": 0.9397, "step": 4785 }, { - "epoch": 0.1374205671825915, - "grad_norm": 0.365234375, - "learning_rate": 0.00019914851186035755, - "loss": 1.0096, + "epoch": 0.27482930747604567, + "grad_norm": 0.271484375, + "learning_rate": 0.00018195029006034193, + "loss": 0.947, "step": 4790 }, { - "epoch": 0.1375640124510493, - "grad_norm": 0.3671875, - "learning_rate": 0.0001991419788963181, - "loss": 1.0167, + "epoch": 0.27511618566756557, + "grad_norm": 0.271484375, + "learning_rate": 0.00018189286123546916, + "loss": 1.0403, "step": 4795 }, { - "epoch": 0.13770745771950713, - "grad_norm": 0.404296875, - "learning_rate": 0.00019913542107415801, - "loss": 0.8497, + "epoch": 0.2754030638590854, + "grad_norm": 0.271484375, + "learning_rate": 0.00018183535028825149, + "loss": 0.9509, "step": 4800 }, { - "epoch": 0.13785090298796493, - "grad_norm": 0.3671875, - "learning_rate": 0.00019912883839552162, - "loss": 0.9096, + "epoch": 0.2756899420506053, + "grad_norm": 0.267578125, + "learning_rate": 0.00018177775727636105, + "loss": 0.9679, "step": 4805 }, { - "epoch": 0.13799434825642276, - "grad_norm": 0.380859375, - "learning_rate": 0.00019912223086205932, - "loss": 1.0098, + "epoch": 0.2759768202421252, + "grad_norm": 0.271484375, + "learning_rate": 0.00018172008225755224, + "loss": 0.9199, "step": 4810 }, { - "epoch": 0.1381377935248806, - "grad_norm": 0.39453125, - "learning_rate": 0.00019911559847542791, - "loss": 0.9323, + "epoch": 0.27626369843364507, + "grad_norm": 0.251953125, + "learning_rate": 0.00018166232528966169, + "loss": 0.9235, "step": 4815 }, { - "epoch": 0.1382812387933384, - "grad_norm": 0.373046875, - "learning_rate": 0.00019910894123729032, - "loss": 1.0356, + "epoch": 0.27655057662516497, + "grad_norm": 0.30078125, + "learning_rate": 0.0001816044864306082, + "loss": 0.9656, "step": 4820 }, { - "epoch": 0.13842468406179623, - "grad_norm": 0.3828125, - "learning_rate": 0.00019910225914931574, - "loss": 1.0298, + "epoch": 0.2768374548166848, + "grad_norm": 0.27734375, + "learning_rate": 0.00018154656573839275, + "loss": 1.1683, "step": 4825 }, { - "epoch": 0.13856812933025403, - "grad_norm": 0.3984375, - "learning_rate": 0.00019909555221317955, - "loss": 1.0277, + "epoch": 0.2771243330082047, + "grad_norm": 0.2734375, + "learning_rate": 0.00018148856327109832, + "loss": 0.9441, "step": 4830 }, { - "epoch": 0.13871157459871186, - "grad_norm": 0.427734375, - "learning_rate": 0.00019908882043056345, - "loss": 0.9226, + "epoch": 0.2774112111997246, + "grad_norm": 0.263671875, + "learning_rate": 0.00018143047908688993, + "loss": 0.9285, "step": 4835 }, { - "epoch": 0.1388550198671697, - "grad_norm": 0.33984375, - "learning_rate": 0.00019908206380315528, - "loss": 0.9958, + "epoch": 0.27769808939124446, + "grad_norm": 0.255859375, + "learning_rate": 0.00018137231324401448, + "loss": 1.0174, "step": 4840 }, { - "epoch": 0.1389984651356275, - "grad_norm": 0.38671875, - "learning_rate": 0.0001990752823326492, - "loss": 0.9406, + "epoch": 0.27798496758276436, + "grad_norm": 0.271484375, + "learning_rate": 0.00018131406580080084, + "loss": 0.9398, "step": 4845 }, { - "epoch": 0.13914191040408533, - "grad_norm": 0.3828125, - "learning_rate": 0.00019906847602074547, - "loss": 0.8654, + "epoch": 0.27827184577428427, + "grad_norm": 0.267578125, + "learning_rate": 0.00018125573681565969, + "loss": 0.9351, "step": 4850 }, { - "epoch": 0.13928535567254313, - "grad_norm": 0.37109375, - "learning_rate": 0.00019906164486915072, - "loss": 0.9634, + "epoch": 0.2785587239658041, + "grad_norm": 0.267578125, + "learning_rate": 0.00018119732634708346, + "loss": 0.98, "step": 4855 }, { - "epoch": 0.13942880094100096, - "grad_norm": 0.384765625, - "learning_rate": 0.00019905478887957773, - "loss": 0.9814, + "epoch": 0.278845602157324, + "grad_norm": 0.275390625, + "learning_rate": 0.0001811388344536463, + "loss": 0.936, "step": 4860 }, { - "epoch": 0.1395722462094588, - "grad_norm": 0.3984375, - "learning_rate": 0.00019904790805374547, - "loss": 0.9974, + "epoch": 0.27913248034884386, + "grad_norm": 0.26953125, + "learning_rate": 0.00018108026119400397, + "loss": 0.9626, "step": 4865 }, { - "epoch": 0.1397156914779166, - "grad_norm": 0.3515625, - "learning_rate": 0.00019904100239337923, - "loss": 0.9649, + "epoch": 0.27941935854036376, + "grad_norm": 0.26953125, + "learning_rate": 0.00018102160662689394, + "loss": 0.9362, "step": 4870 }, { - "epoch": 0.13985913674637443, - "grad_norm": 0.427734375, - "learning_rate": 0.00019903407190021048, - "loss": 1.0921, + "epoch": 0.27970623673188366, + "grad_norm": 0.345703125, + "learning_rate": 0.0001809628708111351, + "loss": 0.9749, "step": 4875 }, { - "epoch": 0.14000258201483223, - "grad_norm": 0.37109375, - "learning_rate": 0.00019902711657597693, - "loss": 1.0239, + "epoch": 0.2799931149234035, + "grad_norm": 0.267578125, + "learning_rate": 0.00018090405380562786, + "loss": 0.9594, "step": 4880 }, { - "epoch": 0.14014602728329006, - "grad_norm": 0.353515625, - "learning_rate": 0.00019902013642242246, - "loss": 0.9671, + "epoch": 0.2802799931149234, + "grad_norm": 0.302734375, + "learning_rate": 0.00018084515566935402, + "loss": 1.0839, "step": 4885 }, { - "epoch": 0.14028947255174787, - "grad_norm": 0.375, - "learning_rate": 0.00019901313144129727, - "loss": 0.9676, + "epoch": 0.2805668713064433, + "grad_norm": 0.259765625, + "learning_rate": 0.00018078617646137682, + "loss": 0.9831, "step": 4890 }, { - "epoch": 0.1404329178202057, - "grad_norm": 0.392578125, - "learning_rate": 0.00019900610163435772, - "loss": 1.0477, + "epoch": 0.28085374949796316, + "grad_norm": 0.326171875, + "learning_rate": 0.00018072711624084068, + "loss": 0.9761, "step": 4895 }, { - "epoch": 0.14057636308866353, - "grad_norm": 0.376953125, - "learning_rate": 0.00019899904700336642, - "loss": 1.0363, + "epoch": 0.28114062768948306, + "grad_norm": 0.2578125, + "learning_rate": 0.00018066797506697136, + "loss": 1.0078, "step": 4900 }, { - "epoch": 0.14071980835712133, - "grad_norm": 0.38671875, - "learning_rate": 0.00019899196755009216, - "loss": 1.015, + "epoch": 0.2814275058810029, + "grad_norm": 0.291015625, + "learning_rate": 0.0001806087529990758, + "loss": 0.9566, "step": 4905 }, { - "epoch": 0.14086325362557917, - "grad_norm": 0.376953125, - "learning_rate": 0.00019898486327631006, - "loss": 1.0132, + "epoch": 0.2817143840725228, + "grad_norm": 0.267578125, + "learning_rate": 0.00018054945009654194, + "loss": 0.9637, "step": 4910 }, { - "epoch": 0.14100669889403697, - "grad_norm": 0.3359375, - "learning_rate": 0.00019897773418380132, - "loss": 0.9459, + "epoch": 0.2820012622640427, + "grad_norm": 0.25390625, + "learning_rate": 0.00018049006641883888, + "loss": 0.9415, "step": 4915 }, { - "epoch": 0.1411501441624948, - "grad_norm": 0.5859375, - "learning_rate": 0.00019897058027435347, - "loss": 1.0785, + "epoch": 0.28228814045556255, + "grad_norm": 0.28515625, + "learning_rate": 0.00018043060202551674, + "loss": 1.0057, "step": 4920 }, { - "epoch": 0.14129358943095263, - "grad_norm": 0.38671875, - "learning_rate": 0.0001989634015497602, - "loss": 1.058, + "epoch": 0.28257501864708245, + "grad_norm": 0.27734375, + "learning_rate": 0.00018037105697620655, + "loss": 0.9708, "step": 4925 }, { - "epoch": 0.14143703469941044, - "grad_norm": 0.357421875, - "learning_rate": 0.00019895619801182148, - "loss": 0.9681, + "epoch": 0.28286189683860236, + "grad_norm": 0.2890625, + "learning_rate": 0.0001803114313306202, + "loss": 0.9813, "step": 4930 }, { - "epoch": 0.14158047996786827, - "grad_norm": 0.392578125, - "learning_rate": 0.00019894896966234348, - "loss": 0.993, + "epoch": 0.2831487750301222, + "grad_norm": 0.255859375, + "learning_rate": 0.00018025172514855043, + "loss": 0.9325, "step": 4935 }, { - "epoch": 0.14172392523632607, - "grad_norm": 0.3984375, - "learning_rate": 0.00019894171650313856, - "loss": 0.9623, + "epoch": 0.2834356532216421, + "grad_norm": 0.26953125, + "learning_rate": 0.0001801919384898707, + "loss": 0.9666, "step": 4940 }, { - "epoch": 0.1418673705047839, - "grad_norm": 0.39453125, - "learning_rate": 0.00019893443853602534, - "loss": 1.0055, + "epoch": 0.28372253141316195, + "grad_norm": 0.271484375, + "learning_rate": 0.00018013207141453523, + "loss": 0.962, "step": 4945 }, { - "epoch": 0.14201081577324173, - "grad_norm": 0.3828125, - "learning_rate": 0.00019892713576282863, - "loss": 1.0476, + "epoch": 0.28400940960468185, + "grad_norm": 0.283203125, + "learning_rate": 0.00018007212398257888, + "loss": 0.9707, "step": 4950 }, { - "epoch": 0.14215426104169954, - "grad_norm": 0.357421875, - "learning_rate": 0.00019891980818537944, - "loss": 1.0203, + "epoch": 0.28429628779620175, + "grad_norm": 0.26953125, + "learning_rate": 0.00018001209625411705, + "loss": 1.0216, "step": 4955 }, { - "epoch": 0.14229770631015737, - "grad_norm": 0.384765625, - "learning_rate": 0.00019891245580551509, - "loss": 0.9295, + "epoch": 0.2845831659877216, + "grad_norm": 0.263671875, + "learning_rate": 0.00017995198828934568, + "loss": 0.9455, "step": 4960 }, { - "epoch": 0.14244115157861517, - "grad_norm": 0.396484375, - "learning_rate": 0.00019890507862507906, - "loss": 1.0058, + "epoch": 0.2848700441792415, + "grad_norm": 0.2734375, + "learning_rate": 0.00017989180014854115, + "loss": 0.9858, "step": 4965 }, { - "epoch": 0.142584596847073, - "grad_norm": 0.365234375, - "learning_rate": 0.000198897676645921, - "loss": 0.9721, + "epoch": 0.2851569223707614, + "grad_norm": 0.259765625, + "learning_rate": 0.0001798315318920603, + "loss": 0.9292, "step": 4970 }, { - "epoch": 0.1427280421155308, - "grad_norm": 0.384765625, - "learning_rate": 0.00019889024986989687, - "loss": 1.0602, + "epoch": 0.28544380056228125, + "grad_norm": 0.265625, + "learning_rate": 0.00017977118358034023, + "loss": 0.9592, "step": 4975 }, { - "epoch": 0.14287148738398864, - "grad_norm": 0.3828125, - "learning_rate": 0.00019888279829886877, - "loss": 0.9611, + "epoch": 0.28573067875380115, + "grad_norm": 0.259765625, + "learning_rate": 0.0001797107552738984, + "loss": 0.9366, "step": 4980 }, { - "epoch": 0.14301493265244647, - "grad_norm": 0.384765625, - "learning_rate": 0.00019887532193470507, - "loss": 0.9685, + "epoch": 0.286017556945321, + "grad_norm": 0.25, + "learning_rate": 0.00017965024703333246, + "loss": 0.9745, "step": 4985 }, { - "epoch": 0.14315837792090427, - "grad_norm": 0.376953125, - "learning_rate": 0.00019886782077928038, - "loss": 0.8632, + "epoch": 0.2863044351368409, + "grad_norm": 0.267578125, + "learning_rate": 0.0001795896589193202, + "loss": 0.9848, "step": 4990 }, { - "epoch": 0.1433018231893621, - "grad_norm": 0.376953125, - "learning_rate": 0.00019886029483447541, - "loss": 1.0085, + "epoch": 0.2865913133283608, + "grad_norm": 0.259765625, + "learning_rate": 0.00017952899099261943, + "loss": 0.978, "step": 4995 }, { - "epoch": 0.1434452684578199, - "grad_norm": 0.392578125, - "learning_rate": 0.0001988527441021772, - "loss": 1.0076, + "epoch": 0.28687819151988064, + "grad_norm": 0.265625, + "learning_rate": 0.00017946824331406823, + "loss": 0.9613, "step": 5000 }, { - "epoch": 0.14358871372627774, - "grad_norm": 0.396484375, - "learning_rate": 0.00019884516858427893, - "loss": 0.9437, + "epoch": 0.28716506971140054, + "grad_norm": 0.267578125, + "learning_rate": 0.00017940741594458444, + "loss": 1.0832, "step": 5005 }, { - "epoch": 0.14373215899473557, - "grad_norm": 0.36328125, - "learning_rate": 0.0001988375682826801, - "loss": 0.9166, + "epoch": 0.28745194790292045, + "grad_norm": 0.267578125, + "learning_rate": 0.00017934650894516584, + "loss": 0.9122, "step": 5010 }, { - "epoch": 0.14387560426319337, - "grad_norm": 0.384765625, - "learning_rate": 0.00019882994319928626, - "loss": 0.9377, + "epoch": 0.2877388260944403, + "grad_norm": 0.2578125, + "learning_rate": 0.00017928552237689015, + "loss": 0.9856, "step": 5015 }, { - "epoch": 0.1440190495316512, - "grad_norm": 0.43359375, - "learning_rate": 0.00019882229333600932, - "loss": 1.0526, + "epoch": 0.2880257042859602, + "grad_norm": 0.26953125, + "learning_rate": 0.00017922445630091485, + "loss": 0.9846, "step": 5020 }, { - "epoch": 0.144162494800109, - "grad_norm": 0.40234375, - "learning_rate": 0.00019881461869476739, - "loss": 1.0008, + "epoch": 0.28831258247748004, + "grad_norm": 0.2578125, + "learning_rate": 0.0001791633107784771, + "loss": 0.9521, "step": 5025 }, { - "epoch": 0.14430594006856684, - "grad_norm": 0.37890625, - "learning_rate": 0.00019880691927748467, - "loss": 1.1069, + "epoch": 0.28859946066899994, + "grad_norm": 0.251953125, + "learning_rate": 0.0001791020858708938, + "loss": 0.9727, "step": 5030 }, { - "epoch": 0.14444938533702467, - "grad_norm": 0.345703125, - "learning_rate": 0.0001987991950860917, - "loss": 0.8695, + "epoch": 0.28888633886051984, + "grad_norm": 0.265625, + "learning_rate": 0.00017904078163956142, + "loss": 0.9935, "step": 5035 }, { - "epoch": 0.14459283060548248, - "grad_norm": 0.4453125, - "learning_rate": 0.00019879144612252515, - "loss": 1.026, + "epoch": 0.2891732170520397, + "grad_norm": 0.271484375, + "learning_rate": 0.00017897939814595596, + "loss": 0.9994, "step": 5040 }, { - "epoch": 0.1447362758739403, - "grad_norm": 0.365234375, - "learning_rate": 0.00019878367238872799, - "loss": 0.9005, + "epoch": 0.2894600952435596, + "grad_norm": 0.267578125, + "learning_rate": 0.00017891793545163297, + "loss": 0.9973, "step": 5045 }, { - "epoch": 0.1448797211423981, - "grad_norm": 0.373046875, - "learning_rate": 0.00019877587388664934, - "loss": 0.9686, + "epoch": 0.2897469734350795, + "grad_norm": 0.267578125, + "learning_rate": 0.00017885639361822728, + "loss": 0.9503, "step": 5050 }, { - "epoch": 0.14502316641085594, - "grad_norm": 0.390625, - "learning_rate": 0.00019876805061824445, - "loss": 0.9531, + "epoch": 0.29003385162659934, + "grad_norm": 0.275390625, + "learning_rate": 0.00017879477270745328, + "loss": 0.9388, "step": 5055 }, { - "epoch": 0.14516661167931375, - "grad_norm": 0.41015625, - "learning_rate": 0.000198760202585475, - "loss": 0.9869, + "epoch": 0.29032072981811924, + "grad_norm": 0.2734375, + "learning_rate": 0.0001787330727811045, + "loss": 0.9239, "step": 5060 }, { - "epoch": 0.14531005694777158, - "grad_norm": 0.37890625, - "learning_rate": 0.00019875232979030868, - "loss": 0.961, + "epoch": 0.2906076080096391, + "grad_norm": 0.25, + "learning_rate": 0.00017867129390105384, + "loss": 0.9892, "step": 5065 }, { - "epoch": 0.1454535022162294, - "grad_norm": 0.427734375, - "learning_rate": 0.00019874443223471945, - "loss": 1.0017, + "epoch": 0.290894486201159, + "grad_norm": 0.263671875, + "learning_rate": 0.0001786094361292532, + "loss": 0.9695, "step": 5070 }, { - "epoch": 0.1455969474846872, - "grad_norm": 0.388671875, - "learning_rate": 0.0001987365099206875, - "loss": 1.0162, + "epoch": 0.2911813643926789, + "grad_norm": 0.2734375, + "learning_rate": 0.00017854749952773372, + "loss": 0.9476, "step": 5075 }, { - "epoch": 0.14574039275314504, - "grad_norm": 0.373046875, - "learning_rate": 0.0001987285628501992, - "loss": 0.9997, + "epoch": 0.29146824258419873, + "grad_norm": 0.259765625, + "learning_rate": 0.00017848548415860563, + "loss": 0.9391, "step": 5080 }, { - "epoch": 0.14588383802160285, - "grad_norm": 0.35546875, - "learning_rate": 0.00019872059102524716, - "loss": 1.0157, + "epoch": 0.29175512077571863, + "grad_norm": 0.279296875, + "learning_rate": 0.00017842339008405803, + "loss": 1.0145, "step": 5085 }, { - "epoch": 0.14602728329006068, - "grad_norm": 0.412109375, - "learning_rate": 0.00019871259444783016, - "loss": 0.9633, + "epoch": 0.29204199896723854, + "grad_norm": 0.251953125, + "learning_rate": 0.00017836121736635894, + "loss": 0.9003, "step": 5090 }, { - "epoch": 0.1461707285585185, - "grad_norm": 0.376953125, - "learning_rate": 0.00019870457311995318, - "loss": 0.9256, + "epoch": 0.2923288771587584, + "grad_norm": 0.28515625, + "learning_rate": 0.00017829896606785543, + "loss": 0.9675, "step": 5095 }, { - "epoch": 0.1463141738269763, - "grad_norm": 0.35546875, - "learning_rate": 0.0001986965270436275, - "loss": 1.0445, + "epoch": 0.2926157553502783, + "grad_norm": 0.2734375, + "learning_rate": 0.00017823663625097312, + "loss": 0.951, "step": 5100 }, { - "epoch": 0.14645761909543414, - "grad_norm": 0.375, - "learning_rate": 0.00019868845622087044, - "loss": 0.9782, + "epoch": 0.29290263354179813, + "grad_norm": 0.275390625, + "learning_rate": 0.00017817422797821656, + "loss": 0.9628, "step": 5105 }, { - "epoch": 0.14660106436389195, - "grad_norm": 0.373046875, - "learning_rate": 0.00019868036065370567, - "loss": 0.9825, + "epoch": 0.29318951173331803, + "grad_norm": 0.265625, + "learning_rate": 0.0001781117413121689, + "loss": 0.9483, "step": 5110 }, { - "epoch": 0.14674450963234978, - "grad_norm": 0.349609375, - "learning_rate": 0.00019867224034416303, - "loss": 1.0217, + "epoch": 0.29347638992483793, + "grad_norm": 0.28515625, + "learning_rate": 0.00017804917631549189, + "loss": 1.0401, "step": 5115 }, { - "epoch": 0.1468879549008076, - "grad_norm": 0.37109375, - "learning_rate": 0.00019866409529427852, - "loss": 0.981, + "epoch": 0.2937632681163578, + "grad_norm": 0.28125, + "learning_rate": 0.00017798653305092584, + "loss": 0.9795, "step": 5120 }, { - "epoch": 0.1470314001692654, - "grad_norm": 0.37109375, - "learning_rate": 0.00019865592550609434, - "loss": 1.0775, + "epoch": 0.2940501463078777, + "grad_norm": 0.296875, + "learning_rate": 0.00017792381158128956, + "loss": 0.9764, "step": 5125 }, { - "epoch": 0.14717484543772325, - "grad_norm": 0.365234375, - "learning_rate": 0.00019864773098165898, - "loss": 1.0058, + "epoch": 0.2943370244993976, + "grad_norm": 0.2578125, + "learning_rate": 0.00017786101196948034, + "loss": 0.9816, "step": 5130 }, { - "epoch": 0.14731829070618105, - "grad_norm": 0.390625, - "learning_rate": 0.00019863951172302704, - "loss": 0.9876, + "epoch": 0.2946239026909174, + "grad_norm": 0.279296875, + "learning_rate": 0.00017779813427847368, + "loss": 1.0152, "step": 5135 }, { - "epoch": 0.14746173597463888, - "grad_norm": 0.3671875, - "learning_rate": 0.00019863126773225935, - "loss": 0.9418, + "epoch": 0.29491078088243733, + "grad_norm": 0.267578125, + "learning_rate": 0.00017773517857132355, + "loss": 0.9595, "step": 5140 }, { - "epoch": 0.14760518124309668, - "grad_norm": 0.376953125, - "learning_rate": 0.00019862299901142298, - "loss": 0.9942, + "epoch": 0.2951976590739572, + "grad_norm": 0.287109375, + "learning_rate": 0.00017767214491116198, + "loss": 0.947, "step": 5145 }, { - "epoch": 0.14774862651155451, - "grad_norm": 0.40625, - "learning_rate": 0.00019861470556259113, - "loss": 0.9789, + "epoch": 0.2954845372654771, + "grad_norm": 0.27734375, + "learning_rate": 0.00017760903336119937, + "loss": 1.0049, "step": 5150 }, { - "epoch": 0.14789207178001235, - "grad_norm": 0.404296875, - "learning_rate": 0.00019860638738784328, - "loss": 1.0674, + "epoch": 0.295771415456997, + "grad_norm": 0.26171875, + "learning_rate": 0.00017754584398472405, + "loss": 0.8602, "step": 5155 }, { - "epoch": 0.14803551704847015, - "grad_norm": 0.376953125, - "learning_rate": 0.00019859804448926503, - "loss": 1.0097, + "epoch": 0.2960582936485168, + "grad_norm": 0.26953125, + "learning_rate": 0.0001774825768451025, + "loss": 0.9468, "step": 5160 }, { - "epoch": 0.14817896231692798, - "grad_norm": 0.357421875, - "learning_rate": 0.00019858967686894823, - "loss": 1.028, + "epoch": 0.2963451718400367, + "grad_norm": 0.255859375, + "learning_rate": 0.00017741923200577917, + "loss": 0.9763, "step": 5165 }, { - "epoch": 0.14832240758538578, - "grad_norm": 0.404296875, - "learning_rate": 0.0001985812845289909, - "loss": 0.9835, + "epoch": 0.2966320500315566, + "grad_norm": 0.28125, + "learning_rate": 0.00017735580953027636, + "loss": 1.0153, "step": 5170 }, { - "epoch": 0.14846585285384362, - "grad_norm": 0.37109375, - "learning_rate": 0.00019857286747149727, - "loss": 0.9816, + "epoch": 0.2969189282230765, + "grad_norm": 0.27734375, + "learning_rate": 0.00017729230948219428, + "loss": 0.9664, "step": 5175 }, { - "epoch": 0.14860929812230145, - "grad_norm": 0.361328125, - "learning_rate": 0.0001985644256985778, - "loss": 0.9083, + "epoch": 0.2972058064145964, + "grad_norm": 0.267578125, + "learning_rate": 0.00017722873192521096, + "loss": 0.9508, "step": 5180 }, { - "epoch": 0.14875274339075925, - "grad_norm": 0.37109375, - "learning_rate": 0.00019855595921234912, - "loss": 0.9378, + "epoch": 0.2974926846061162, + "grad_norm": 0.25, + "learning_rate": 0.00017716507692308207, + "loss": 0.9688, "step": 5185 }, { - "epoch": 0.14889618865921708, - "grad_norm": 0.38671875, - "learning_rate": 0.000198547468014934, - "loss": 0.9135, + "epoch": 0.2977795627976361, + "grad_norm": 0.251953125, + "learning_rate": 0.000177101344539641, + "loss": 1.0038, "step": 5190 }, { - "epoch": 0.14903963392767489, - "grad_norm": 0.40625, - "learning_rate": 0.00019853895210846155, - "loss": 1.0023, + "epoch": 0.298066440989156, + "grad_norm": 0.3046875, + "learning_rate": 0.00017703753483879874, + "loss": 1.0215, "step": 5195 }, { - "epoch": 0.14918307919613272, - "grad_norm": 0.3828125, - "learning_rate": 0.00019853041149506687, - "loss": 0.9184, + "epoch": 0.29835331918067587, + "grad_norm": 0.283203125, + "learning_rate": 0.0001769736478845438, + "loss": 1.0174, "step": 5200 }, { - "epoch": 0.14932652446459055, - "grad_norm": 0.41796875, - "learning_rate": 0.00019852184617689147, - "loss": 1.0655, + "epoch": 0.29864019737219577, + "grad_norm": 0.28125, + "learning_rate": 0.00017690968374094217, + "loss": 0.9131, "step": 5205 }, { - "epoch": 0.14946996973304835, - "grad_norm": 0.423828125, - "learning_rate": 0.0001985132561560829, - "loss": 1.0374, + "epoch": 0.29892707556371567, + "grad_norm": 0.26171875, + "learning_rate": 0.00017684564247213722, + "loss": 0.9034, "step": 5210 }, { - "epoch": 0.14961341500150618, - "grad_norm": 0.388671875, - "learning_rate": 0.000198504641434795, - "loss": 1.0227, + "epoch": 0.2992139537552355, + "grad_norm": 0.291015625, + "learning_rate": 0.00017678152414234968, + "loss": 0.9721, "step": 5215 }, { - "epoch": 0.149756860269964, - "grad_norm": 0.419921875, - "learning_rate": 0.00019849600201518768, - "loss": 1.0049, + "epoch": 0.2995008319467554, + "grad_norm": 0.265625, + "learning_rate": 0.00017671732881587756, + "loss": 1.0016, "step": 5220 }, { - "epoch": 0.14990030553842182, - "grad_norm": 0.380859375, - "learning_rate": 0.0001984873378994272, - "loss": 1.0322, + "epoch": 0.29978771013827527, + "grad_norm": 0.279296875, + "learning_rate": 0.0001766530565570961, + "loss": 0.9473, "step": 5225 }, { - "epoch": 0.15004375080687962, - "grad_norm": 0.40234375, - "learning_rate": 0.00019847864908968592, - "loss": 1.0779, + "epoch": 0.30007458832979517, + "grad_norm": 0.26171875, + "learning_rate": 0.00017658870743045757, + "loss": 1.0014, "step": 5230 }, { - "epoch": 0.15018719607533745, - "grad_norm": 0.3984375, - "learning_rate": 0.0001984699355881424, - "loss": 1.0147, + "epoch": 0.30036146652131507, + "grad_norm": 0.275390625, + "learning_rate": 0.00017652428150049152, + "loss": 0.9541, "step": 5235 }, { - "epoch": 0.15033064134379528, - "grad_norm": 0.39453125, - "learning_rate": 0.0001984611973969814, - "loss": 1.0035, + "epoch": 0.3006483447128349, + "grad_norm": 0.265625, + "learning_rate": 0.0001764597788318044, + "loss": 0.9472, "step": 5240 }, { - "epoch": 0.1504740866122531, - "grad_norm": 0.3828125, - "learning_rate": 0.00019845243451839384, - "loss": 0.9827, + "epoch": 0.3009352229043548, + "grad_norm": 0.28125, + "learning_rate": 0.00017639519948907961, + "loss": 0.9852, "step": 5245 }, { - "epoch": 0.15061753188071092, - "grad_norm": 0.390625, - "learning_rate": 0.0001984436469545769, - "loss": 0.9091, + "epoch": 0.3012221010958747, + "grad_norm": 0.265625, + "learning_rate": 0.00017633054353707745, + "loss": 0.946, "step": 5250 }, { - "epoch": 0.15076097714916872, - "grad_norm": 0.3828125, - "learning_rate": 0.0001984348347077339, - "loss": 0.9714, + "epoch": 0.30150897928739456, + "grad_norm": 0.27734375, + "learning_rate": 0.0001762658110406351, + "loss": 0.9516, "step": 5255 }, { - "epoch": 0.15090442241762655, - "grad_norm": 0.3359375, - "learning_rate": 0.00019842599778007437, - "loss": 0.9415, + "epoch": 0.30179585747891446, + "grad_norm": 0.2890625, + "learning_rate": 0.00017620100206466635, + "loss": 0.9726, "step": 5260 }, { - "epoch": 0.15104786768608439, - "grad_norm": 0.443359375, - "learning_rate": 0.000198417136173814, - "loss": 0.9807, + "epoch": 0.3020827356704343, + "grad_norm": 0.2578125, + "learning_rate": 0.00017613611667416192, + "loss": 0.94, "step": 5265 }, { - "epoch": 0.1511913129545422, - "grad_norm": 0.36328125, - "learning_rate": 0.00019840824989117464, - "loss": 0.9482, + "epoch": 0.3023696138619542, + "grad_norm": 0.255859375, + "learning_rate": 0.00017607115493418896, + "loss": 0.9733, "step": 5270 }, { - "epoch": 0.15133475822300002, - "grad_norm": 0.37109375, - "learning_rate": 0.00019839933893438442, - "loss": 1.0559, + "epoch": 0.3026564920534741, + "grad_norm": 0.25390625, + "learning_rate": 0.00017600611690989123, + "loss": 1.0138, "step": 5275 }, { - "epoch": 0.15147820349145782, - "grad_norm": 0.41015625, - "learning_rate": 0.0001983904033056776, - "loss": 0.9692, + "epoch": 0.30294337024499396, + "grad_norm": 0.27734375, + "learning_rate": 0.00017594100266648906, + "loss": 0.9298, "step": 5280 }, { - "epoch": 0.15162164875991566, - "grad_norm": 0.361328125, - "learning_rate": 0.00019838144300729464, - "loss": 0.9174, + "epoch": 0.30323024843651386, + "grad_norm": 0.28515625, + "learning_rate": 0.0001758758122692791, + "loss": 0.9409, "step": 5285 }, { - "epoch": 0.1517650940283735, - "grad_norm": 0.369140625, - "learning_rate": 0.00019837245804148218, - "loss": 0.9278, + "epoch": 0.30351712662803376, + "grad_norm": 0.271484375, + "learning_rate": 0.00017581054578363445, + "loss": 0.9214, "step": 5290 }, { - "epoch": 0.1519085392968313, - "grad_norm": 0.396484375, - "learning_rate": 0.000198363448410493, - "loss": 0.9778, + "epoch": 0.3038040048195536, + "grad_norm": 0.263671875, + "learning_rate": 0.00017574520327500451, + "loss": 0.9794, "step": 5295 }, { - "epoch": 0.15205198456528912, - "grad_norm": 0.388671875, - "learning_rate": 0.00019835441411658616, - "loss": 1.0777, + "epoch": 0.3040908830110735, + "grad_norm": 0.267578125, + "learning_rate": 0.0001756797848089149, + "loss": 0.9635, "step": 5300 }, { - "epoch": 0.15219542983374693, - "grad_norm": 0.376953125, - "learning_rate": 0.00019834535516202682, - "loss": 1.1034, + "epoch": 0.30437776120259336, + "grad_norm": 0.271484375, + "learning_rate": 0.00017561429045096733, + "loss": 0.9972, "step": 5305 }, { - "epoch": 0.15233887510220476, - "grad_norm": 0.39453125, - "learning_rate": 0.00019833627154908635, - "loss": 0.9702, + "epoch": 0.30466463939411326, + "grad_norm": 0.263671875, + "learning_rate": 0.00017554872026683978, + "loss": 0.9549, "step": 5310 }, { - "epoch": 0.15248232037066256, - "grad_norm": 0.458984375, - "learning_rate": 0.00019832716328004236, - "loss": 0.946, + "epoch": 0.30495151758563316, + "grad_norm": 0.32421875, + "learning_rate": 0.00017548307432228608, + "loss": 1.0005, "step": 5315 }, { - "epoch": 0.1526257656391204, - "grad_norm": 0.33203125, - "learning_rate": 0.00019831803035717853, - "loss": 1.0495, + "epoch": 0.305238395777153, + "grad_norm": 0.265625, + "learning_rate": 0.00017541735268313623, + "loss": 0.9812, "step": 5320 }, { - "epoch": 0.15276921090757822, - "grad_norm": 0.392578125, - "learning_rate": 0.00019830887278278483, - "loss": 0.9664, + "epoch": 0.3055252739686729, + "grad_norm": 0.2890625, + "learning_rate": 0.000175351555415296, + "loss": 0.983, "step": 5325 }, { - "epoch": 0.15291265617603603, - "grad_norm": 0.41015625, - "learning_rate": 0.00019829969055915728, - "loss": 1.0085, + "epoch": 0.3058121521601928, + "grad_norm": 0.26171875, + "learning_rate": 0.00017528568258474704, + "loss": 0.9497, "step": 5330 }, { - "epoch": 0.15305610144449386, - "grad_norm": 0.365234375, - "learning_rate": 0.00019829048368859826, - "loss": 1.0463, + "epoch": 0.30609903035171265, + "grad_norm": 0.259765625, + "learning_rate": 0.00017521973425754675, + "loss": 0.9057, "step": 5335 }, { - "epoch": 0.15319954671295166, - "grad_norm": 0.380859375, - "learning_rate": 0.00019828125217341618, - "loss": 0.8869, + "epoch": 0.30638590854323255, + "grad_norm": 0.25390625, + "learning_rate": 0.00017515371049982824, + "loss": 0.9332, "step": 5340 }, { - "epoch": 0.1533429919814095, - "grad_norm": 0.392578125, - "learning_rate": 0.0001982719960159257, - "loss": 0.9227, + "epoch": 0.3066727867347524, + "grad_norm": 0.3046875, + "learning_rate": 0.00017508761137780037, + "loss": 0.969, "step": 5345 }, { - "epoch": 0.15348643724986732, - "grad_norm": 0.40625, - "learning_rate": 0.0001982627152184476, - "loss": 0.8901, + "epoch": 0.3069596649262723, + "grad_norm": 0.2734375, + "learning_rate": 0.00017502143695774741, + "loss": 0.9633, "step": 5350 }, { - "epoch": 0.15362988251832513, - "grad_norm": 0.3515625, - "learning_rate": 0.00019825340978330894, - "loss": 0.9522, + "epoch": 0.3072465431177922, + "grad_norm": 0.26171875, + "learning_rate": 0.00017495518730602924, + "loss": 0.9164, "step": 5355 }, { - "epoch": 0.15377332778678296, - "grad_norm": 0.37890625, - "learning_rate": 0.00019824407971284286, - "loss": 0.8976, + "epoch": 0.30753342130931205, + "grad_norm": 0.28515625, + "learning_rate": 0.00017488886248908118, + "loss": 0.9805, "step": 5360 }, { - "epoch": 0.15391677305524076, - "grad_norm": 0.3984375, - "learning_rate": 0.0001982347250093887, - "loss": 1.0267, + "epoch": 0.30782029950083195, + "grad_norm": 0.2734375, + "learning_rate": 0.0001748224625734139, + "loss": 0.921, "step": 5365 }, { - "epoch": 0.1540602183236986, - "grad_norm": 0.3359375, - "learning_rate": 0.000198225345675292, - "loss": 0.9762, + "epoch": 0.30810717769235185, + "grad_norm": 0.275390625, + "learning_rate": 0.00017475598762561333, + "loss": 0.9671, "step": 5370 }, { - "epoch": 0.15420366359215643, - "grad_norm": 0.369140625, - "learning_rate": 0.0001982159417129045, - "loss": 0.9711, + "epoch": 0.3083940558838717, + "grad_norm": 0.251953125, + "learning_rate": 0.00017468943771234075, + "loss": 0.9246, "step": 5375 }, { - "epoch": 0.15434710886061423, - "grad_norm": 0.369140625, - "learning_rate": 0.00019820651312458403, - "loss": 0.8932, + "epoch": 0.3086809340753916, + "grad_norm": 0.2734375, + "learning_rate": 0.00017462281290033256, + "loss": 0.9271, "step": 5380 }, { - "epoch": 0.15449055412907206, - "grad_norm": 0.341796875, - "learning_rate": 0.00019819705991269468, - "loss": 0.9594, + "epoch": 0.30896781226691145, + "grad_norm": 0.26171875, + "learning_rate": 0.00017455611325640024, + "loss": 0.9343, "step": 5385 }, { - "epoch": 0.15463399939752986, - "grad_norm": 0.373046875, - "learning_rate": 0.00019818758207960663, - "loss": 0.9786, + "epoch": 0.30925469045843135, + "grad_norm": 0.255859375, + "learning_rate": 0.00017448933884743037, + "loss": 0.9428, "step": 5390 }, { - "epoch": 0.1547774446659877, - "grad_norm": 0.369140625, - "learning_rate": 0.00019817807962769634, - "loss": 0.9982, + "epoch": 0.30954156864995125, + "grad_norm": 0.29296875, + "learning_rate": 0.0001744224897403845, + "loss": 0.986, "step": 5395 }, { - "epoch": 0.1549208899344455, - "grad_norm": 0.39453125, - "learning_rate": 0.00019816855255934635, - "loss": 1.0379, + "epoch": 0.3098284468414711, + "grad_norm": 0.263671875, + "learning_rate": 0.00017435556600229902, + "loss": 0.9702, "step": 5400 }, { - "epoch": 0.15506433520290333, - "grad_norm": 0.36328125, - "learning_rate": 0.00019815900087694538, - "loss": 0.9068, + "epoch": 0.310115325032991, + "grad_norm": 0.271484375, + "learning_rate": 0.0001742885677002852, + "loss": 1.0214, "step": 5405 }, { - "epoch": 0.15520778047136116, - "grad_norm": 0.408203125, - "learning_rate": 0.0001981494245828884, - "loss": 1.1326, + "epoch": 0.3104022032245109, + "grad_norm": 0.2734375, + "learning_rate": 0.00017422149490152914, + "loss": 0.9759, "step": 5410 }, { - "epoch": 0.15535122573981897, - "grad_norm": 0.39453125, - "learning_rate": 0.00019813982367957648, - "loss": 1.0661, + "epoch": 0.31068908141603074, + "grad_norm": 0.267578125, + "learning_rate": 0.00017415434767329154, + "loss": 1.042, "step": 5415 }, { - "epoch": 0.1554946710082768, - "grad_norm": 0.400390625, - "learning_rate": 0.00019813019816941689, - "loss": 1.0189, + "epoch": 0.31097595960755064, + "grad_norm": 0.275390625, + "learning_rate": 0.0001740871260829078, + "loss": 1.0227, "step": 5420 }, { - "epoch": 0.1556381162767346, - "grad_norm": 0.38671875, - "learning_rate": 0.00019812054805482302, - "loss": 0.9063, + "epoch": 0.3112628377990705, + "grad_norm": 0.283203125, + "learning_rate": 0.0001740198301977879, + "loss": 0.9925, "step": 5425 }, { - "epoch": 0.15578156154519243, - "grad_norm": 0.392578125, - "learning_rate": 0.00019811087333821453, - "loss": 1.0314, + "epoch": 0.3115497159905904, + "grad_norm": 0.265625, + "learning_rate": 0.0001739524600854163, + "loss": 0.9934, "step": 5430 }, { - "epoch": 0.15592500681365026, - "grad_norm": 0.373046875, - "learning_rate": 0.00019810117402201713, - "loss": 1.0584, + "epoch": 0.3118365941821103, + "grad_norm": 0.251953125, + "learning_rate": 0.0001738850158133519, + "loss": 0.9855, "step": 5435 }, { - "epoch": 0.15606845208210807, - "grad_norm": 0.384765625, - "learning_rate": 0.00019809145010866276, - "loss": 0.8615, + "epoch": 0.31212347237363014, + "grad_norm": 0.287109375, + "learning_rate": 0.00017381749744922796, + "loss": 0.9412, "step": 5440 }, { - "epoch": 0.1562118973505659, - "grad_norm": 0.380859375, - "learning_rate": 0.00019808170160058955, - "loss": 0.8997, + "epoch": 0.31241035056515004, + "grad_norm": 0.263671875, + "learning_rate": 0.00017374990506075207, + "loss": 0.9927, "step": 5445 }, { - "epoch": 0.1563553426190237, - "grad_norm": 0.392578125, - "learning_rate": 0.00019807192850024175, - "loss": 1.0225, + "epoch": 0.31269722875666994, + "grad_norm": 0.294921875, + "learning_rate": 0.00017368223871570596, + "loss": 1.0166, "step": 5450 }, { - "epoch": 0.15649878788748153, - "grad_norm": 0.4375, - "learning_rate": 0.00019806213081006982, - "loss": 0.958, + "epoch": 0.3129841069481898, + "grad_norm": 0.267578125, + "learning_rate": 0.0001736144984819457, + "loss": 1.0061, "step": 5455 }, { - "epoch": 0.15664223315593936, - "grad_norm": 0.35546875, - "learning_rate": 0.00019805230853253033, - "loss": 0.8833, + "epoch": 0.3132709851397097, + "grad_norm": 0.267578125, + "learning_rate": 0.00017354668442740126, + "loss": 0.9393, "step": 5460 }, { - "epoch": 0.15678567842439717, - "grad_norm": 0.388671875, - "learning_rate": 0.000198042461670086, - "loss": 1.0328, + "epoch": 0.31355786333122954, + "grad_norm": 0.27734375, + "learning_rate": 0.00017347879662007676, + "loss": 1.0184, "step": 5465 }, { - "epoch": 0.156929123692855, - "grad_norm": 0.390625, - "learning_rate": 0.00019803259022520587, - "loss": 0.9686, + "epoch": 0.31384474152274944, + "grad_norm": 0.267578125, + "learning_rate": 0.00017341083512805025, + "loss": 0.9302, "step": 5470 }, { - "epoch": 0.1570725689613128, - "grad_norm": 0.361328125, - "learning_rate": 0.00019802269420036495, - "loss": 0.9744, + "epoch": 0.31413161971426934, + "grad_norm": 0.283203125, + "learning_rate": 0.00017334280001947362, + "loss": 0.9412, "step": 5475 }, { - "epoch": 0.15721601422977063, - "grad_norm": 0.380859375, - "learning_rate": 0.0001980127735980445, - "loss": 0.9099, + "epoch": 0.3144184979057892, + "grad_norm": 0.2734375, + "learning_rate": 0.00017327469136257272, + "loss": 0.9884, "step": 5480 }, { - "epoch": 0.15735945949822844, - "grad_norm": 0.353515625, - "learning_rate": 0.000198002828420732, - "loss": 0.988, + "epoch": 0.3147053760973091, + "grad_norm": 0.271484375, + "learning_rate": 0.000173206509225647, + "loss": 0.9041, "step": 5485 }, { - "epoch": 0.15750290476668627, - "grad_norm": 0.3828125, - "learning_rate": 0.00019799285867092096, - "loss": 0.9558, + "epoch": 0.314992254288829, + "grad_norm": 0.267578125, + "learning_rate": 0.00017313825367706967, + "loss": 0.9434, "step": 5490 }, { - "epoch": 0.1576463500351441, - "grad_norm": 0.34765625, - "learning_rate": 0.00019798286435111114, - "loss": 0.9129, + "epoch": 0.31527913248034883, + "grad_norm": 0.26953125, + "learning_rate": 0.00017306992478528753, + "loss": 1.0095, "step": 5495 }, { - "epoch": 0.1577897953036019, - "grad_norm": 0.396484375, - "learning_rate": 0.00019797284546380848, - "loss": 0.9972, + "epoch": 0.31556601067186874, + "grad_norm": 0.271484375, + "learning_rate": 0.000173001522618821, + "loss": 0.9315, "step": 5500 }, { - "epoch": 0.15793324057205974, - "grad_norm": 0.36328125, - "learning_rate": 0.000197962802011525, - "loss": 1.0944, + "epoch": 0.3158528888633886, + "grad_norm": 0.265625, + "learning_rate": 0.00017293304724626385, + "loss": 0.9799, "step": 5505 }, { - "epoch": 0.15807668584051754, - "grad_norm": 0.3671875, - "learning_rate": 0.00019795273399677893, - "loss": 0.9911, + "epoch": 0.3161397670549085, + "grad_norm": 0.27734375, + "learning_rate": 0.0001728644987362834, + "loss": 1.0289, "step": 5510 }, { - "epoch": 0.15822013110897537, - "grad_norm": 0.384765625, - "learning_rate": 0.00019794264142209463, - "loss": 1.016, + "epoch": 0.3164266452464284, + "grad_norm": 0.248046875, + "learning_rate": 0.00017279587715762022, + "loss": 0.9204, "step": 5515 }, { - "epoch": 0.1583635763774332, - "grad_norm": 0.365234375, - "learning_rate": 0.00019793252429000266, - "loss": 0.914, + "epoch": 0.31671352343794823, + "grad_norm": 0.314453125, + "learning_rate": 0.0001727271825790882, + "loss": 0.9878, "step": 5520 }, { - "epoch": 0.158507021645891, - "grad_norm": 0.3828125, - "learning_rate": 0.00019792238260303972, - "loss": 0.9178, + "epoch": 0.31700040162946813, + "grad_norm": 0.255859375, + "learning_rate": 0.0001726584150695744, + "loss": 0.9265, "step": 5525 }, { - "epoch": 0.15865046691434884, - "grad_norm": 0.34375, - "learning_rate": 0.00019791221636374865, - "loss": 0.9677, + "epoch": 0.31728727982098803, + "grad_norm": 0.251953125, + "learning_rate": 0.00017258957469803906, + "loss": 0.9716, "step": 5530 }, { - "epoch": 0.15879391218280664, - "grad_norm": 0.388671875, - "learning_rate": 0.00019790202557467846, - "loss": 0.9576, + "epoch": 0.3175741580125079, + "grad_norm": 0.275390625, + "learning_rate": 0.0001725206615335154, + "loss": 0.9631, "step": 5535 }, { - "epoch": 0.15893735745126447, - "grad_norm": 0.349609375, - "learning_rate": 0.0001978918102383843, - "loss": 0.9682, + "epoch": 0.3178610362040278, + "grad_norm": 0.259765625, + "learning_rate": 0.00017245167564510974, + "loss": 0.909, "step": 5540 }, { - "epoch": 0.1590808027197223, - "grad_norm": 0.35546875, - "learning_rate": 0.0001978815703574275, - "loss": 0.9518, + "epoch": 0.3181479143955476, + "grad_norm": 0.27734375, + "learning_rate": 0.00017238261710200128, + "loss": 0.9312, "step": 5545 }, { - "epoch": 0.1592242479881801, - "grad_norm": 0.3984375, - "learning_rate": 0.00019787130593437553, - "loss": 1.0938, + "epoch": 0.31843479258706753, + "grad_norm": 0.287109375, + "learning_rate": 0.0001723134859734421, + "loss": 0.9891, "step": 5550 }, { - "epoch": 0.15936769325663794, - "grad_norm": 0.3828125, - "learning_rate": 0.00019786101697180205, - "loss": 0.8987, + "epoch": 0.31872167077858743, + "grad_norm": 0.26953125, + "learning_rate": 0.00017224428232875703, + "loss": 0.8686, "step": 5555 }, { - "epoch": 0.15951113852509574, - "grad_norm": 0.353515625, - "learning_rate": 0.00019785070347228673, - "loss": 1.0237, + "epoch": 0.3190085489701073, + "grad_norm": 0.255859375, + "learning_rate": 0.00017217500623734365, + "loss": 0.9146, "step": 5560 }, { - "epoch": 0.15965458379355357, - "grad_norm": 0.365234375, - "learning_rate": 0.00019784036543841563, - "loss": 1.1549, + "epoch": 0.3192954271616272, + "grad_norm": 0.279296875, + "learning_rate": 0.00017210565776867216, + "loss": 0.9702, "step": 5565 }, { - "epoch": 0.15979802906201138, - "grad_norm": 0.3671875, - "learning_rate": 0.00019783000287278078, - "loss": 0.9887, + "epoch": 0.3195823053531471, + "grad_norm": 0.263671875, + "learning_rate": 0.00017203623699228537, + "loss": 0.9483, "step": 5570 }, { - "epoch": 0.1599414743304692, - "grad_norm": 0.3671875, - "learning_rate": 0.00019781961577798035, - "loss": 0.9389, + "epoch": 0.3198691835446669, + "grad_norm": 0.279296875, + "learning_rate": 0.0001719667439777986, + "loss": 0.9595, "step": 5575 }, { - "epoch": 0.16008491959892704, - "grad_norm": 0.390625, - "learning_rate": 0.00019780920415661882, - "loss": 1.0852, + "epoch": 0.3201560617361868, + "grad_norm": 0.29296875, + "learning_rate": 0.00017189717879489958, + "loss": 0.9104, "step": 5580 }, { - "epoch": 0.16022836486738484, - "grad_norm": 0.423828125, - "learning_rate": 0.00019779876801130668, - "loss": 0.9682, + "epoch": 0.32044293992770667, + "grad_norm": 0.28515625, + "learning_rate": 0.00017182754151334842, + "loss": 0.929, "step": 5585 }, { - "epoch": 0.16037181013584267, - "grad_norm": 0.37890625, - "learning_rate": 0.0001977883073446606, - "loss": 0.9856, + "epoch": 0.3207298181192266, + "grad_norm": 0.275390625, + "learning_rate": 0.00017175783220297762, + "loss": 0.9127, "step": 5590 }, { - "epoch": 0.16051525540430048, - "grad_norm": 0.390625, - "learning_rate": 0.00019777782215930342, - "loss": 1.0265, + "epoch": 0.3210166963107465, + "grad_norm": 0.263671875, + "learning_rate": 0.00017168805093369173, + "loss": 0.9491, "step": 5595 }, { - "epoch": 0.1606587006727583, - "grad_norm": 0.373046875, - "learning_rate": 0.00019776731245786414, - "loss": 1.0538, + "epoch": 0.3213035745022663, + "grad_norm": 0.251953125, + "learning_rate": 0.00017161819777546767, + "loss": 0.9266, "step": 5600 }, { - "epoch": 0.16080214594121614, - "grad_norm": 0.369140625, - "learning_rate": 0.00019775677824297784, - "loss": 0.8994, + "epoch": 0.3215904526937862, + "grad_norm": 0.3359375, + "learning_rate": 0.0001715482727983542, + "loss": 1.014, "step": 5605 }, { - "epoch": 0.16094559120967394, - "grad_norm": 0.365234375, - "learning_rate": 0.00019774621951728583, - "loss": 1.0913, + "epoch": 0.3218773308853061, + "grad_norm": 0.255859375, + "learning_rate": 0.00017147827607247242, + "loss": 0.9368, "step": 5610 }, { - "epoch": 0.16108903647813178, - "grad_norm": 0.37890625, - "learning_rate": 0.0001977356362834355, - "loss": 1.0196, + "epoch": 0.32216420907682597, + "grad_norm": 0.26171875, + "learning_rate": 0.00017140820766801506, + "loss": 1.0029, "step": 5615 }, { - "epoch": 0.16123248174658958, - "grad_norm": 0.34375, - "learning_rate": 0.00019772502854408042, - "loss": 0.8995, + "epoch": 0.32245108726834587, + "grad_norm": 0.275390625, + "learning_rate": 0.00017133806765524693, + "loss": 0.9271, "step": 5620 }, { - "epoch": 0.1613759270150474, - "grad_norm": 0.412109375, - "learning_rate": 0.0001977143963018803, - "loss": 0.967, + "epoch": 0.3227379654598657, + "grad_norm": 0.28125, + "learning_rate": 0.0001712678561045046, + "loss": 0.9974, "step": 5625 }, { - "epoch": 0.16151937228350524, - "grad_norm": 0.38671875, - "learning_rate": 0.000197703739559501, - "loss": 0.9961, + "epoch": 0.3230248436513856, + "grad_norm": 0.27734375, + "learning_rate": 0.00017119757308619639, + "loss": 0.9648, "step": 5630 }, { - "epoch": 0.16166281755196305, - "grad_norm": 0.478515625, - "learning_rate": 0.00019769305831961447, - "loss": 0.9918, + "epoch": 0.3233117218429055, + "grad_norm": 0.26171875, + "learning_rate": 0.00017112721867080217, + "loss": 0.9823, "step": 5635 }, { - "epoch": 0.16180626282042088, - "grad_norm": 0.36328125, - "learning_rate": 0.00019768235258489888, - "loss": 0.9983, + "epoch": 0.32359860003442537, + "grad_norm": 0.275390625, + "learning_rate": 0.0001710567929288736, + "loss": 0.9823, "step": 5640 }, { - "epoch": 0.16194970808887868, - "grad_norm": 0.375, - "learning_rate": 0.00019767162235803852, - "loss": 0.9068, + "epoch": 0.32388547822594527, + "grad_norm": 0.25390625, + "learning_rate": 0.00017098629593103378, + "loss": 0.9614, "step": 5645 }, { - "epoch": 0.1620931533573365, - "grad_norm": 0.396484375, - "learning_rate": 0.00019766086764172377, - "loss": 1.0501, + "epoch": 0.32417235641746517, + "grad_norm": 0.2490234375, + "learning_rate": 0.00017091572774797714, + "loss": 1.014, "step": 5650 }, { - "epoch": 0.16223659862579431, - "grad_norm": 0.37890625, - "learning_rate": 0.00019765008843865118, - "loss": 0.9989, + "epoch": 0.324459234608985, + "grad_norm": 0.265625, + "learning_rate": 0.00017084508845046975, + "loss": 1.0164, "step": 5655 }, { - "epoch": 0.16238004389425215, - "grad_norm": 0.345703125, - "learning_rate": 0.00019763928475152352, - "loss": 1.0319, + "epoch": 0.3247461128005049, + "grad_norm": 0.25390625, + "learning_rate": 0.00017077437810934882, + "loss": 0.9267, "step": 5660 }, { - "epoch": 0.16252348916270998, - "grad_norm": 0.400390625, - "learning_rate": 0.00019762845658304954, - "loss": 1.076, + "epoch": 0.32503299099202476, + "grad_norm": 0.267578125, + "learning_rate": 0.0001707035967955228, + "loss": 0.9279, "step": 5665 }, { - "epoch": 0.16266693443116778, - "grad_norm": 0.34765625, - "learning_rate": 0.00019761760393594425, - "loss": 0.9005, + "epoch": 0.32531986918354466, + "grad_norm": 0.279296875, + "learning_rate": 0.00017063274457997137, + "loss": 0.9867, "step": 5670 }, { - "epoch": 0.1628103796996256, - "grad_norm": 0.38671875, - "learning_rate": 0.0001976067268129288, - "loss": 0.9326, + "epoch": 0.32560674737506456, + "grad_norm": 0.271484375, + "learning_rate": 0.00017056182153374526, + "loss": 0.9867, "step": 5675 }, { - "epoch": 0.16295382496808342, - "grad_norm": 0.388671875, - "learning_rate": 0.00019759582521673035, - "loss": 0.9933, + "epoch": 0.3258936255665844, + "grad_norm": 0.310546875, + "learning_rate": 0.00017049082772796633, + "loss": 0.9746, "step": 5680 }, { - "epoch": 0.16309727023654125, - "grad_norm": 0.3828125, - "learning_rate": 0.00019758489915008235, - "loss": 0.9199, + "epoch": 0.3261805037581043, + "grad_norm": 0.271484375, + "learning_rate": 0.00017041976323382726, + "loss": 0.9462, "step": 5685 }, { - "epoch": 0.16324071550499908, - "grad_norm": 0.40234375, - "learning_rate": 0.00019757394861572432, - "loss": 0.949, + "epoch": 0.3264673819496242, + "grad_norm": 0.279296875, + "learning_rate": 0.00017034862812259174, + "loss": 1.0081, "step": 5690 }, { - "epoch": 0.16338416077345688, - "grad_norm": 0.376953125, - "learning_rate": 0.00019756297361640189, - "loss": 0.9999, + "epoch": 0.32675426014114406, + "grad_norm": 0.2578125, + "learning_rate": 0.00017027742246559417, + "loss": 0.9711, "step": 5695 }, { - "epoch": 0.1635276060419147, - "grad_norm": 0.3671875, - "learning_rate": 0.00019755197415486685, - "loss": 1.042, + "epoch": 0.32704113833266396, + "grad_norm": 0.263671875, + "learning_rate": 0.00017020614633423976, + "loss": 0.9295, "step": 5700 }, { - "epoch": 0.16367105131037252, - "grad_norm": 0.35546875, - "learning_rate": 0.00019754095023387716, - "loss": 0.9114, + "epoch": 0.3273280165241838, + "grad_norm": 0.275390625, + "learning_rate": 0.00017013479980000436, + "loss": 0.9887, "step": 5705 }, { - "epoch": 0.16381449657883035, - "grad_norm": 0.36328125, - "learning_rate": 0.00019752990185619682, - "loss": 1.0092, + "epoch": 0.3276148947157037, + "grad_norm": 0.2578125, + "learning_rate": 0.00017006338293443446, + "loss": 1.0193, "step": 5710 }, { - "epoch": 0.16395794184728818, - "grad_norm": 0.375, - "learning_rate": 0.00019751882902459608, - "loss": 0.9085, + "epoch": 0.3279017729072236, + "grad_norm": 0.26171875, + "learning_rate": 0.00016999189580914708, + "loss": 0.9274, "step": 5715 }, { - "epoch": 0.16410138711574598, - "grad_norm": 0.384765625, - "learning_rate": 0.00019750773174185123, - "loss": 1.0335, + "epoch": 0.32818865109874346, + "grad_norm": 0.2578125, + "learning_rate": 0.00016992033849582962, + "loss": 0.8722, "step": 5720 }, { - "epoch": 0.16424483238420381, - "grad_norm": 0.380859375, - "learning_rate": 0.00019749661001074466, - "loss": 1.0083, + "epoch": 0.32847552929026336, + "grad_norm": 0.255859375, + "learning_rate": 0.00016984871106623988, + "loss": 0.8897, "step": 5725 }, { - "epoch": 0.16438827765266162, - "grad_norm": 0.421875, - "learning_rate": 0.00019748546383406508, - "loss": 0.9645, + "epoch": 0.32876240748178326, + "grad_norm": 0.294921875, + "learning_rate": 0.00016977701359220613, + "loss": 0.9323, "step": 5730 }, { - "epoch": 0.16453172292111945, - "grad_norm": 0.419921875, - "learning_rate": 0.00019747429321460705, - "loss": 0.9626, + "epoch": 0.3290492856733031, + "grad_norm": 0.3515625, + "learning_rate": 0.00016970524614562664, + "loss": 0.9681, "step": 5735 }, { - "epoch": 0.16467516818957725, - "grad_norm": 0.404296875, - "learning_rate": 0.00019746309815517153, - "loss": 0.8949, + "epoch": 0.329336163864823, + "grad_norm": 0.2734375, + "learning_rate": 0.00016963340879847002, + "loss": 0.9279, "step": 5740 }, { - "epoch": 0.16481861345803508, - "grad_norm": 0.396484375, - "learning_rate": 0.00019745187865856547, - "loss": 0.9255, + "epoch": 0.32962304205634285, + "grad_norm": 0.28125, + "learning_rate": 0.0001695615016227749, + "loss": 0.9294, "step": 5745 }, { - "epoch": 0.16496205872649292, - "grad_norm": 0.36328125, - "learning_rate": 0.0001974406347276019, - "loss": 1.0755, + "epoch": 0.32990992024786275, + "grad_norm": 0.28515625, + "learning_rate": 0.00016948952469065, + "loss": 0.9861, "step": 5750 }, { - "epoch": 0.16510550399495072, - "grad_norm": 0.361328125, - "learning_rate": 0.00019742936636510004, - "loss": 0.9178, + "epoch": 0.33019679843938265, + "grad_norm": 0.279296875, + "learning_rate": 0.00016941747807427387, + "loss": 0.9409, "step": 5755 }, { - "epoch": 0.16524894926340855, - "grad_norm": 0.390625, - "learning_rate": 0.0001974180735738853, - "loss": 1.0083, + "epoch": 0.3304836766309025, + "grad_norm": 0.2578125, + "learning_rate": 0.00016934536184589512, + "loss": 0.9838, "step": 5760 }, { - "epoch": 0.16539239453186635, - "grad_norm": 0.3515625, - "learning_rate": 0.00019740675635678912, - "loss": 0.9732, + "epoch": 0.3307705548224224, + "grad_norm": 0.28125, + "learning_rate": 0.00016927317607783198, + "loss": 0.9714, "step": 5765 }, { - "epoch": 0.16553583980032419, - "grad_norm": 0.390625, - "learning_rate": 0.00019739541471664907, - "loss": 0.999, + "epoch": 0.3310574330139423, + "grad_norm": 0.251953125, + "learning_rate": 0.00016920092084247255, + "loss": 0.9408, "step": 5770 }, { - "epoch": 0.16567928506878202, - "grad_norm": 0.478515625, - "learning_rate": 0.0001973840486563089, - "loss": 1.0517, + "epoch": 0.33134431120546215, + "grad_norm": 0.28125, + "learning_rate": 0.0001691285962122745, + "loss": 1.0076, "step": 5775 }, { - "epoch": 0.16582273033723982, - "grad_norm": 0.33984375, - "learning_rate": 0.00019737265817861845, - "loss": 0.9369, + "epoch": 0.33163118939698205, + "grad_norm": 0.25390625, + "learning_rate": 0.00016905620225976517, + "loss": 0.9663, "step": 5780 }, { - "epoch": 0.16596617560569765, - "grad_norm": 0.43359375, - "learning_rate": 0.00019736124328643365, - "loss": 1.1725, + "epoch": 0.3319180675885019, + "grad_norm": 0.298828125, + "learning_rate": 0.00016898373905754137, + "loss": 0.9457, "step": 5785 }, { - "epoch": 0.16610962087415546, - "grad_norm": 0.439453125, - "learning_rate": 0.00019734980398261666, - "loss": 0.957, + "epoch": 0.3322049457800218, + "grad_norm": 0.2890625, + "learning_rate": 0.00016891120667826933, + "loss": 0.9358, "step": 5790 }, { - "epoch": 0.1662530661426133, - "grad_norm": 0.388671875, - "learning_rate": 0.0001973383402700356, - "loss": 0.9379, + "epoch": 0.3324918239715417, + "grad_norm": 0.291015625, + "learning_rate": 0.00016883860519468472, + "loss": 0.922, "step": 5795 }, { - "epoch": 0.16639651141107112, - "grad_norm": 0.35546875, - "learning_rate": 0.00019732685215156483, - "loss": 0.9363, + "epoch": 0.33277870216306155, + "grad_norm": 0.267578125, + "learning_rate": 0.0001687659346795925, + "loss": 0.8944, "step": 5800 }, { - "epoch": 0.16653995667952892, - "grad_norm": 0.33984375, - "learning_rate": 0.00019731533963008482, - "loss": 0.8309, + "epoch": 0.33306558035458145, + "grad_norm": 0.28125, + "learning_rate": 0.00016869319520586675, + "loss": 0.9289, "step": 5805 }, { - "epoch": 0.16668340194798675, - "grad_norm": 0.396484375, - "learning_rate": 0.00019730380270848209, - "loss": 0.9321, + "epoch": 0.33335245854610135, + "grad_norm": 0.27734375, + "learning_rate": 0.00016862038684645078, + "loss": 0.959, "step": 5810 }, { - "epoch": 0.16682684721644456, - "grad_norm": 0.396484375, - "learning_rate": 0.00019729224138964933, - "loss": 0.9586, + "epoch": 0.3336393367376212, + "grad_norm": 0.267578125, + "learning_rate": 0.00016854750967435704, + "loss": 1.01, "step": 5815 }, { - "epoch": 0.1669702924849024, - "grad_norm": 0.462890625, - "learning_rate": 0.00019728065567648536, - "loss": 0.9978, + "epoch": 0.3339262149291411, + "grad_norm": 0.265625, + "learning_rate": 0.0001684745637626669, + "loss": 1.0157, "step": 5820 }, { - "epoch": 0.1671137377533602, - "grad_norm": 0.376953125, - "learning_rate": 0.0001972690455718951, - "loss": 0.995, + "epoch": 0.33421309312066094, + "grad_norm": 0.283203125, + "learning_rate": 0.00016840154918453063, + "loss": 0.9816, "step": 5825 }, { - "epoch": 0.16725718302181802, - "grad_norm": 0.388671875, - "learning_rate": 0.00019725741107878958, - "loss": 0.9501, + "epoch": 0.33449997131218084, + "grad_norm": 0.251953125, + "learning_rate": 0.00016832846601316749, + "loss": 0.9529, "step": 5830 }, { - "epoch": 0.16740062829027585, - "grad_norm": 0.423828125, - "learning_rate": 0.0001972457522000859, - "loss": 0.9035, + "epoch": 0.33478684950370075, + "grad_norm": 0.251953125, + "learning_rate": 0.00016825531432186543, + "loss": 0.9697, "step": 5835 }, { - "epoch": 0.16754407355873366, - "grad_norm": 0.40625, - "learning_rate": 0.00019723406893870738, - "loss": 1.0162, + "epoch": 0.3350737276952206, + "grad_norm": 0.25390625, + "learning_rate": 0.00016818209418398107, + "loss": 0.9617, "step": 5840 }, { - "epoch": 0.1676875188271915, - "grad_norm": 0.369140625, - "learning_rate": 0.00019722236129758334, - "loss": 0.9198, + "epoch": 0.3353606058867405, + "grad_norm": 0.28515625, + "learning_rate": 0.0001681088056729398, + "loss": 1.0015, "step": 5845 }, { - "epoch": 0.1678309640956493, - "grad_norm": 0.416015625, - "learning_rate": 0.0001972106292796493, - "loss": 0.9969, + "epoch": 0.3356474840782604, + "grad_norm": 0.26171875, + "learning_rate": 0.00016803544886223547, + "loss": 0.9556, "step": 5850 }, { - "epoch": 0.16797440936410712, - "grad_norm": 0.419921875, - "learning_rate": 0.00019719887288784687, - "loss": 0.9634, + "epoch": 0.33593436226978024, + "grad_norm": 0.26953125, + "learning_rate": 0.00016796202382543047, + "loss": 0.9678, "step": 5855 }, { - "epoch": 0.16811785463256496, - "grad_norm": 0.35546875, - "learning_rate": 0.00019718709212512373, - "loss": 0.9703, + "epoch": 0.33622124046130014, + "grad_norm": 0.271484375, + "learning_rate": 0.00016788853063615556, + "loss": 0.9375, "step": 5860 }, { - "epoch": 0.16826129990102276, - "grad_norm": 0.357421875, - "learning_rate": 0.0001971752869944337, - "loss": 1.1144, + "epoch": 0.33650811865282, + "grad_norm": 0.259765625, + "learning_rate": 0.0001678149693681099, + "loss": 0.9212, "step": 5865 }, { - "epoch": 0.1684047451694806, - "grad_norm": 0.361328125, - "learning_rate": 0.00019716345749873674, - "loss": 0.9017, + "epoch": 0.3367949968443399, + "grad_norm": 0.267578125, + "learning_rate": 0.0001677413400950609, + "loss": 1.0206, "step": 5870 }, { - "epoch": 0.1685481904379384, - "grad_norm": 0.42578125, - "learning_rate": 0.0001971516036409989, - "loss": 0.8874, + "epoch": 0.3370818750358598, + "grad_norm": 0.255859375, + "learning_rate": 0.00016766764289084414, + "loss": 0.9991, "step": 5875 }, { - "epoch": 0.16869163570639623, - "grad_norm": 0.384765625, - "learning_rate": 0.00019713972542419227, - "loss": 1.0007, + "epoch": 0.33736875322737964, + "grad_norm": 0.26171875, + "learning_rate": 0.00016759387782936335, + "loss": 0.8914, "step": 5880 }, { - "epoch": 0.16883508097485406, - "grad_norm": 0.392578125, - "learning_rate": 0.00019712782285129514, - "loss": 1.0692, + "epoch": 0.33765563141889954, + "grad_norm": 0.2890625, + "learning_rate": 0.00016752004498459032, + "loss": 0.9712, "step": 5885 }, { - "epoch": 0.16897852624331186, - "grad_norm": 0.419921875, - "learning_rate": 0.00019711589592529187, - "loss": 0.9732, + "epoch": 0.33794250961041944, + "grad_norm": 0.251953125, + "learning_rate": 0.00016744614443056475, + "loss": 0.9701, "step": 5890 }, { - "epoch": 0.1691219715117697, - "grad_norm": 0.4375, - "learning_rate": 0.00019710394464917292, - "loss": 0.9212, + "epoch": 0.3382293878019393, + "grad_norm": 0.28125, + "learning_rate": 0.00016737217624139433, + "loss": 0.9325, "step": 5895 }, { - "epoch": 0.1692654167802275, - "grad_norm": 0.37890625, - "learning_rate": 0.0001970919690259349, - "loss": 1.0424, + "epoch": 0.3385162659934592, + "grad_norm": 0.267578125, + "learning_rate": 0.0001672981404912545, + "loss": 0.8982, "step": 5900 }, { - "epoch": 0.16940886204868533, - "grad_norm": 0.392578125, - "learning_rate": 0.00019707996905858044, - "loss": 1.0483, + "epoch": 0.33880314418497903, + "grad_norm": 0.267578125, + "learning_rate": 0.00016722403725438845, + "loss": 0.9592, "step": 5905 }, { - "epoch": 0.16955230731714313, - "grad_norm": 0.3828125, - "learning_rate": 0.00019706794475011835, - "loss": 1.0509, + "epoch": 0.33909002237649893, + "grad_norm": 0.2578125, + "learning_rate": 0.00016714986660510715, + "loss": 0.8901, "step": 5910 }, { - "epoch": 0.16969575258560096, - "grad_norm": 0.384765625, - "learning_rate": 0.0001970558961035635, - "loss": 0.9266, + "epoch": 0.33937690056801884, + "grad_norm": 0.259765625, + "learning_rate": 0.000167075628617789, + "loss": 1.0316, "step": 5915 }, { - "epoch": 0.1698391978540588, - "grad_norm": 0.392578125, - "learning_rate": 0.00019704382312193687, - "loss": 1.0593, + "epoch": 0.3396637787595387, + "grad_norm": 0.259765625, + "learning_rate": 0.00016700132336688005, + "loss": 0.9365, "step": 5920 }, { - "epoch": 0.1699826431225166, - "grad_norm": 0.37890625, - "learning_rate": 0.00019703172580826559, - "loss": 1.0162, + "epoch": 0.3399506569510586, + "grad_norm": 0.267578125, + "learning_rate": 0.0001669269509268938, + "loss": 1.0252, "step": 5925 }, { - "epoch": 0.17012608839097443, - "grad_norm": 0.38671875, - "learning_rate": 0.00019701960416558282, - "loss": 0.9563, + "epoch": 0.3402375351425785, + "grad_norm": 0.271484375, + "learning_rate": 0.00016685251137241113, + "loss": 1.0248, "step": 5930 }, { - "epoch": 0.17026953365943223, - "grad_norm": 0.38671875, - "learning_rate": 0.00019700745819692785, - "loss": 0.9851, + "epoch": 0.34052441333409833, + "grad_norm": 0.2890625, + "learning_rate": 0.0001667780047780801, + "loss": 0.954, "step": 5935 }, { - "epoch": 0.17041297892789006, - "grad_norm": 0.388671875, - "learning_rate": 0.0001969952879053461, - "loss": 0.9088, + "epoch": 0.34081129152561823, + "grad_norm": 0.255859375, + "learning_rate": 0.00016670343121861613, + "loss": 0.9679, "step": 5940 }, { - "epoch": 0.1705564241963479, - "grad_norm": 0.40234375, - "learning_rate": 0.00019698309329388902, - "loss": 0.9276, + "epoch": 0.3410981697171381, + "grad_norm": 0.27734375, + "learning_rate": 0.00016662879076880178, + "loss": 1.0479, "step": 5945 }, { - "epoch": 0.1706998694648057, - "grad_norm": 0.43359375, - "learning_rate": 0.00019697087436561418, - "loss": 0.9503, + "epoch": 0.341385047908658, + "grad_norm": 0.302734375, + "learning_rate": 0.00016655408350348664, + "loss": 0.8797, "step": 5950 }, { - "epoch": 0.17084331473326353, - "grad_norm": 0.373046875, - "learning_rate": 0.0001969586311235853, - "loss": 0.9954, + "epoch": 0.3416719261001779, + "grad_norm": 0.55859375, + "learning_rate": 0.0001664793094975873, + "loss": 0.9549, "step": 5955 }, { - "epoch": 0.17098676000172133, - "grad_norm": 0.380859375, - "learning_rate": 0.0001969463635708722, - "loss": 0.9536, + "epoch": 0.3419588042916977, + "grad_norm": 0.25390625, + "learning_rate": 0.00016640446882608737, + "loss": 0.9245, "step": 5960 }, { - "epoch": 0.17113020527017916, - "grad_norm": 0.365234375, - "learning_rate": 0.00019693407171055065, - "loss": 1.0284, + "epoch": 0.34224568248321763, + "grad_norm": 0.279296875, + "learning_rate": 0.00016632956156403716, + "loss": 1.0159, "step": 5965 }, { - "epoch": 0.171273650538637, - "grad_norm": 0.369140625, - "learning_rate": 0.0001969217555457027, - "loss": 0.9575, + "epoch": 0.34253256067473753, + "grad_norm": 0.275390625, + "learning_rate": 0.00016625458778655387, + "loss": 0.9792, "step": 5970 }, { - "epoch": 0.1714170958070948, - "grad_norm": 0.38671875, - "learning_rate": 0.00019690941507941642, - "loss": 1.0139, + "epoch": 0.3428194388662574, + "grad_norm": 0.2890625, + "learning_rate": 0.00016617954756882144, + "loss": 1.0467, "step": 5975 }, { - "epoch": 0.17156054107555263, - "grad_norm": 0.388671875, - "learning_rate": 0.00019689705031478586, - "loss": 1.0685, + "epoch": 0.3431063170577773, + "grad_norm": 0.26953125, + "learning_rate": 0.00016610444098609026, + "loss": 0.9083, "step": 5980 }, { - "epoch": 0.17170398634401043, - "grad_norm": 0.388671875, - "learning_rate": 0.00019688466125491143, - "loss": 1.001, + "epoch": 0.3433931952492971, + "grad_norm": 0.28125, + "learning_rate": 0.00016602926811367744, + "loss": 0.9455, "step": 5985 }, { - "epoch": 0.17184743161246827, - "grad_norm": 0.388671875, - "learning_rate": 0.00019687224790289933, - "loss": 0.9703, + "epoch": 0.343680073440817, + "grad_norm": 0.275390625, + "learning_rate": 0.00016595402902696646, + "loss": 0.973, "step": 5990 }, { - "epoch": 0.17199087688092607, - "grad_norm": 0.380859375, - "learning_rate": 0.00019685981026186205, - "loss": 0.903, + "epoch": 0.3439669516323369, + "grad_norm": 0.26171875, + "learning_rate": 0.0001658787238014073, + "loss": 1.0536, "step": 5995 }, { - "epoch": 0.1721343221493839, - "grad_norm": 0.38671875, - "learning_rate": 0.00019684734833491811, - "loss": 0.9239, + "epoch": 0.34425382982385677, + "grad_norm": 0.267578125, + "learning_rate": 0.00016580335251251623, + "loss": 0.984, "step": 6000 }, { - "epoch": 0.17227776741784173, - "grad_norm": 0.369140625, - "learning_rate": 0.00019683486212519212, - "loss": 0.9914, + "epoch": 0.3445407080153767, + "grad_norm": 0.275390625, + "learning_rate": 0.00016572791523587562, + "loss": 0.9788, "step": 6005 }, { - "epoch": 0.17242121268629954, - "grad_norm": 0.376953125, - "learning_rate": 0.0001968223516358148, - "loss": 0.9533, + "epoch": 0.3448275862068966, + "grad_norm": 0.271484375, + "learning_rate": 0.00016565241204713428, + "loss": 1.0371, "step": 6010 }, { - "epoch": 0.17256465795475737, - "grad_norm": 0.396484375, - "learning_rate": 0.0001968098168699229, - "loss": 1.0884, + "epoch": 0.3451144643984164, + "grad_norm": 0.265625, + "learning_rate": 0.0001655768430220069, + "loss": 0.9485, "step": 6015 }, { - "epoch": 0.17270810322321517, - "grad_norm": 0.375, - "learning_rate": 0.0001967972578306593, - "loss": 0.937, + "epoch": 0.3454013425899363, + "grad_norm": 0.318359375, + "learning_rate": 0.0001655012082362743, + "loss": 0.8763, "step": 6020 }, { - "epoch": 0.172851548491673, - "grad_norm": 0.40625, - "learning_rate": 0.000196784674521173, - "loss": 0.959, + "epoch": 0.34568822078145617, + "grad_norm": 0.287109375, + "learning_rate": 0.00016542550776578322, + "loss": 0.9322, "step": 6025 }, { - "epoch": 0.17299499376013083, - "grad_norm": 0.3515625, - "learning_rate": 0.00019677206694461896, - "loss": 1.0079, + "epoch": 0.34597509897297607, + "grad_norm": 0.275390625, + "learning_rate": 0.00016534974168644625, + "loss": 0.9894, "step": 6030 }, { - "epoch": 0.17313843902858864, - "grad_norm": 0.412109375, - "learning_rate": 0.0001967594351041584, - "loss": 1.065, + "epoch": 0.34626197716449597, + "grad_norm": 0.287109375, + "learning_rate": 0.0001652739100742417, + "loss": 1.0145, "step": 6035 }, { - "epoch": 0.17328188429704647, - "grad_norm": 0.38671875, - "learning_rate": 0.0001967467790029585, - "loss": 1.0226, + "epoch": 0.3465488553560158, + "grad_norm": 0.27734375, + "learning_rate": 0.00016519801300521385, + "loss": 1.0105, "step": 6040 }, { - "epoch": 0.17342532956550427, - "grad_norm": 0.392578125, - "learning_rate": 0.0001967340986441925, - "loss": 1.1055, + "epoch": 0.3468357335475357, + "grad_norm": 0.294921875, + "learning_rate": 0.0001651220505554723, + "loss": 0.9631, "step": 6045 }, { - "epoch": 0.1735687748339621, - "grad_norm": 0.37890625, - "learning_rate": 0.0001967213940310399, - "loss": 0.9992, + "epoch": 0.3471226117390556, + "grad_norm": 0.26171875, + "learning_rate": 0.00016504602280119243, + "loss": 0.9493, "step": 6050 }, { - "epoch": 0.17371222010241993, - "grad_norm": 0.3984375, - "learning_rate": 0.00019670866516668606, - "loss": 0.9692, + "epoch": 0.34740948993057547, + "grad_norm": 0.298828125, + "learning_rate": 0.0001649699298186151, + "loss": 0.9545, "step": 6055 }, { - "epoch": 0.17385566537087774, - "grad_norm": 0.361328125, - "learning_rate": 0.00019669591205432254, - "loss": 0.9764, + "epoch": 0.34769636812209537, + "grad_norm": 0.26953125, + "learning_rate": 0.0001648937716840464, + "loss": 0.9363, "step": 6060 }, { - "epoch": 0.17399911063933557, - "grad_norm": 0.423828125, - "learning_rate": 0.00019668313469714698, - "loss": 0.9272, + "epoch": 0.3479832463136152, + "grad_norm": 0.2890625, + "learning_rate": 0.00016481754847385793, + "loss": 1.0452, "step": 6065 }, { - "epoch": 0.17414255590779337, - "grad_norm": 0.357421875, - "learning_rate": 0.0001966703330983631, - "loss": 0.9293, + "epoch": 0.3482701245051351, + "grad_norm": 0.265625, + "learning_rate": 0.00016474126026448652, + "loss": 0.9987, "step": 6070 }, { - "epoch": 0.1742860011762512, - "grad_norm": 0.404296875, - "learning_rate": 0.0001966575072611806, - "loss": 0.9389, + "epoch": 0.348557002696655, + "grad_norm": 0.263671875, + "learning_rate": 0.00016466490713243416, + "loss": 0.8785, "step": 6075 }, { - "epoch": 0.174429446444709, - "grad_norm": 0.40234375, - "learning_rate": 0.00019664465718881543, - "loss": 1.078, + "epoch": 0.34884388088817486, + "grad_norm": 0.287109375, + "learning_rate": 0.00016458848915426792, + "loss": 0.9639, "step": 6080 }, { - "epoch": 0.17457289171316684, - "grad_norm": 0.349609375, - "learning_rate": 0.00019663178288448948, - "loss": 0.9971, + "epoch": 0.34913075907969476, + "grad_norm": 0.271484375, + "learning_rate": 0.00016451200640661993, + "loss": 1.0097, "step": 6085 }, { - "epoch": 0.17471633698162467, - "grad_norm": 0.373046875, - "learning_rate": 0.00019661888435143073, - "loss": 0.9212, + "epoch": 0.34941763727121466, + "grad_norm": 0.259765625, + "learning_rate": 0.00016443545896618723, + "loss": 0.9978, "step": 6090 }, { - "epoch": 0.17485978225008247, - "grad_norm": 0.435546875, - "learning_rate": 0.00019660596159287326, - "loss": 1.0559, + "epoch": 0.3497045154627345, + "grad_norm": 0.271484375, + "learning_rate": 0.0001643588469097318, + "loss": 1.0066, "step": 6095 }, { - "epoch": 0.1750032275185403, - "grad_norm": 0.345703125, - "learning_rate": 0.00019659301461205728, - "loss": 0.8945, + "epoch": 0.3499913936542544, + "grad_norm": 0.283203125, + "learning_rate": 0.00016428217031408038, + "loss": 0.9327, "step": 6100 }, { - "epoch": 0.1751466727869981, - "grad_norm": 0.375, - "learning_rate": 0.000196580043412229, - "loss": 0.9854, + "epoch": 0.35027827184577426, + "grad_norm": 0.267578125, + "learning_rate": 0.0001642054292561244, + "loss": 0.9271, "step": 6105 }, { - "epoch": 0.17529011805545594, - "grad_norm": 0.392578125, - "learning_rate": 0.0001965670479966407, - "loss": 0.9659, + "epoch": 0.35056515003729416, + "grad_norm": 0.279296875, + "learning_rate": 0.00016412862381282004, + "loss": 1.0217, "step": 6110 }, { - "epoch": 0.17543356332391377, - "grad_norm": 0.3828125, - "learning_rate": 0.00019655402836855074, - "loss": 0.9669, + "epoch": 0.35085202822881406, + "grad_norm": 0.26171875, + "learning_rate": 0.00016405175406118786, + "loss": 1.0257, "step": 6115 }, { - "epoch": 0.17557700859237158, - "grad_norm": 0.37109375, - "learning_rate": 0.0001965409845312236, - "loss": 0.9932, + "epoch": 0.3511389064203339, + "grad_norm": 0.265625, + "learning_rate": 0.00016397482007831312, + "loss": 0.9198, "step": 6120 }, { - "epoch": 0.1757204538608294, - "grad_norm": 0.38671875, - "learning_rate": 0.00019652791648792982, - "loss": 0.9669, + "epoch": 0.3514257846118538, + "grad_norm": 0.267578125, + "learning_rate": 0.00016389782194134534, + "loss": 0.9217, "step": 6125 }, { - "epoch": 0.1758638991292872, - "grad_norm": 0.361328125, - "learning_rate": 0.0001965148242419459, - "loss": 0.8995, + "epoch": 0.3517126628033737, + "grad_norm": 0.259765625, + "learning_rate": 0.00016382075972749843, + "loss": 0.9233, "step": 6130 }, { - "epoch": 0.17600734439774504, - "grad_norm": 0.3828125, - "learning_rate": 0.00019650170779655457, - "loss": 0.9912, + "epoch": 0.35199954099489356, + "grad_norm": 0.26171875, + "learning_rate": 0.00016374363351405054, + "loss": 0.9766, "step": 6135 }, { - "epoch": 0.17615078966620287, - "grad_norm": 0.376953125, - "learning_rate": 0.0001964885671550445, - "loss": 0.9234, + "epoch": 0.35228641918641346, + "grad_norm": 0.263671875, + "learning_rate": 0.00016366644337834405, + "loss": 0.9753, "step": 6140 }, { - "epoch": 0.17629423493466068, - "grad_norm": 0.3828125, - "learning_rate": 0.00019647540232071054, - "loss": 1.0043, + "epoch": 0.3525732973779333, + "grad_norm": 0.263671875, + "learning_rate": 0.00016358918939778536, + "loss": 0.9589, "step": 6145 }, { - "epoch": 0.1764376802031185, - "grad_norm": 0.390625, - "learning_rate": 0.00019646221329685344, - "loss": 0.9235, + "epoch": 0.3528601755694532, + "grad_norm": 0.255859375, + "learning_rate": 0.00016351187164984494, + "loss": 0.9289, "step": 6150 }, { - "epoch": 0.1765811254715763, - "grad_norm": 0.365234375, - "learning_rate": 0.0001964490000867802, - "loss": 1.0069, + "epoch": 0.3531470537609731, + "grad_norm": 0.251953125, + "learning_rate": 0.00016343449021205726, + "loss": 0.9931, "step": 6155 }, { - "epoch": 0.17672457074003414, - "grad_norm": 0.447265625, - "learning_rate": 0.0001964357626938038, - "loss": 0.8575, + "epoch": 0.35343393195249295, + "grad_norm": 0.29296875, + "learning_rate": 0.00016335704516202051, + "loss": 0.9516, "step": 6160 }, { - "epoch": 0.17686801600849195, - "grad_norm": 0.40234375, - "learning_rate": 0.00019642250112124324, - "loss": 1.0123, + "epoch": 0.35372081014401285, + "grad_norm": 0.251953125, + "learning_rate": 0.00016327953657739678, + "loss": 0.971, "step": 6165 }, { - "epoch": 0.17701146127694978, - "grad_norm": 0.357421875, - "learning_rate": 0.00019640921537242365, - "loss": 0.9752, + "epoch": 0.35400768833553276, + "grad_norm": 0.275390625, + "learning_rate": 0.0001632019645359119, + "loss": 1.0291, "step": 6170 }, { - "epoch": 0.1771549065454076, - "grad_norm": 0.3671875, - "learning_rate": 0.00019639590545067622, - "loss": 0.873, + "epoch": 0.3542945665270526, + "grad_norm": 0.298828125, + "learning_rate": 0.00016312432911535528, + "loss": 0.9575, "step": 6175 }, { - "epoch": 0.1772983518138654, - "grad_norm": 0.37890625, - "learning_rate": 0.00019638257135933814, - "loss": 1.0131, + "epoch": 0.3545814447185725, + "grad_norm": 0.345703125, + "learning_rate": 0.00016304663039357986, + "loss": 1.0393, "step": 6180 }, { - "epoch": 0.17744179708232324, - "grad_norm": 0.41796875, - "learning_rate": 0.00019636921310175275, - "loss": 1.0252, + "epoch": 0.35486832291009235, + "grad_norm": 0.275390625, + "learning_rate": 0.0001629688684485021, + "loss": 0.9207, "step": 6185 }, { - "epoch": 0.17758524235078105, - "grad_norm": 0.416015625, - "learning_rate": 0.00019635583068126935, - "loss": 1.0032, + "epoch": 0.35515520110161225, + "grad_norm": 0.263671875, + "learning_rate": 0.00016289104335810185, + "loss": 0.9745, "step": 6190 }, { - "epoch": 0.17772868761923888, - "grad_norm": 0.361328125, - "learning_rate": 0.00019634242410124337, - "loss": 0.9363, + "epoch": 0.35544207929313215, + "grad_norm": 0.265625, + "learning_rate": 0.00016281315520042233, + "loss": 0.9181, "step": 6195 }, { - "epoch": 0.1778721328876967, - "grad_norm": 0.49609375, - "learning_rate": 0.0001963289933650363, - "loss": 1.0453, + "epoch": 0.355728957484652, + "grad_norm": 0.25, + "learning_rate": 0.0001627352040535699, + "loss": 0.8665, "step": 6200 }, { - "epoch": 0.1780155781561545, - "grad_norm": 0.390625, - "learning_rate": 0.0001963155384760156, - "loss": 0.9359, + "epoch": 0.3560158356761719, + "grad_norm": 0.271484375, + "learning_rate": 0.00016265718999571415, + "loss": 0.9876, "step": 6205 }, { - "epoch": 0.17815902342461234, - "grad_norm": 0.42578125, - "learning_rate": 0.0001963020594375549, - "loss": 0.9214, + "epoch": 0.3563027138676918, + "grad_norm": 0.27734375, + "learning_rate": 0.0001625791131050878, + "loss": 0.9732, "step": 6210 }, { - "epoch": 0.17830246869307015, - "grad_norm": 0.37890625, - "learning_rate": 0.00019628855625303384, - "loss": 0.9187, + "epoch": 0.35658959205921165, + "grad_norm": 0.283203125, + "learning_rate": 0.0001625009734599865, + "loss": 0.9622, "step": 6215 }, { - "epoch": 0.17844591396152798, - "grad_norm": 0.423828125, - "learning_rate": 0.00019627502892583806, - "loss": 1.0111, + "epoch": 0.35687647025073155, + "grad_norm": 0.275390625, + "learning_rate": 0.00016242277113876887, + "loss": 0.9373, "step": 6220 }, { - "epoch": 0.1785893592299858, - "grad_norm": 0.427734375, - "learning_rate": 0.00019626147745935936, - "loss": 0.959, + "epoch": 0.3571633484422514, + "grad_norm": 0.271484375, + "learning_rate": 0.00016234450621985635, + "loss": 0.9816, "step": 6225 }, { - "epoch": 0.17873280449844361, - "grad_norm": 0.435546875, - "learning_rate": 0.00019624790185699548, - "loss": 0.9706, + "epoch": 0.3574502266337713, + "grad_norm": 0.275390625, + "learning_rate": 0.00016226617878173317, + "loss": 1.0049, "step": 6230 }, { - "epoch": 0.17887624976690145, - "grad_norm": 0.373046875, - "learning_rate": 0.00019623430212215031, - "loss": 0.9551, + "epoch": 0.3577371048252912, + "grad_norm": 0.259765625, + "learning_rate": 0.00016218778890294636, + "loss": 0.9598, "step": 6235 }, { - "epoch": 0.17901969503535925, - "grad_norm": 0.40625, - "learning_rate": 0.0001962206782582337, - "loss": 0.9817, + "epoch": 0.35802398301681104, + "grad_norm": 0.283203125, + "learning_rate": 0.00016210933666210533, + "loss": 0.9202, "step": 6240 }, { - "epoch": 0.17916314030381708, - "grad_norm": 0.3828125, - "learning_rate": 0.00019620703026866166, - "loss": 0.9547, + "epoch": 0.35831086120833094, + "grad_norm": 0.26953125, + "learning_rate": 0.0001620308221378822, + "loss": 0.9408, "step": 6245 }, { - "epoch": 0.17930658557227488, - "grad_norm": 0.357421875, - "learning_rate": 0.0001961933581568561, - "loss": 1.0145, + "epoch": 0.35859773939985085, + "grad_norm": 0.302734375, + "learning_rate": 0.00016195224540901156, + "loss": 0.9816, "step": 6250 }, { - "epoch": 0.17945003084073272, - "grad_norm": 0.41796875, - "learning_rate": 0.00019617966192624512, - "loss": 0.9629, + "epoch": 0.3588846175913707, + "grad_norm": 0.25390625, + "learning_rate": 0.00016187360655429034, + "loss": 0.994, "step": 6255 }, { - "epoch": 0.17959347610919055, - "grad_norm": 0.3828125, - "learning_rate": 0.00019616594158026283, - "loss": 0.8404, + "epoch": 0.3591714957828906, + "grad_norm": 0.265625, + "learning_rate": 0.0001617949056525777, + "loss": 0.9918, "step": 6260 }, { - "epoch": 0.17973692137764835, - "grad_norm": 0.8828125, - "learning_rate": 0.00019615219712234932, - "loss": 0.965, + "epoch": 0.35945837397441044, + "grad_norm": 0.267578125, + "learning_rate": 0.0001617161427827951, + "loss": 0.9715, "step": 6265 }, { - "epoch": 0.17988036664610618, - "grad_norm": 0.390625, - "learning_rate": 0.0001961384285559508, - "loss": 0.8887, + "epoch": 0.35974525216593034, + "grad_norm": 0.26953125, + "learning_rate": 0.0001616373180239261, + "loss": 0.9482, "step": 6270 }, { - "epoch": 0.18002381191456399, - "grad_norm": 0.41015625, - "learning_rate": 0.00019612463588451946, - "loss": 0.8915, + "epoch": 0.36003213035745024, + "grad_norm": 0.310546875, + "learning_rate": 0.0001615584314550164, + "loss": 0.9451, "step": 6275 }, { - "epoch": 0.18016725718302182, - "grad_norm": 0.3828125, - "learning_rate": 0.0001961108191115136, - "loss": 1.024, + "epoch": 0.3603190085489701, + "grad_norm": 0.28125, + "learning_rate": 0.00016147948315517357, + "loss": 0.9452, "step": 6280 }, { - "epoch": 0.18031070245147965, - "grad_norm": 0.373046875, - "learning_rate": 0.00019609697824039756, - "loss": 1.0038, + "epoch": 0.36060588674049, + "grad_norm": 0.25390625, + "learning_rate": 0.00016140047320356723, + "loss": 0.9466, "step": 6285 }, { - "epoch": 0.18045414771993745, - "grad_norm": 0.376953125, - "learning_rate": 0.00019608311327464167, - "loss": 0.9959, + "epoch": 0.3608927649320099, + "grad_norm": 0.255859375, + "learning_rate": 0.00016132140167942862, + "loss": 0.9951, "step": 6290 }, { - "epoch": 0.18059759298839528, - "grad_norm": 0.443359375, - "learning_rate": 0.00019606922421772232, - "loss": 1.0151, + "epoch": 0.36117964312352974, + "grad_norm": 0.27734375, + "learning_rate": 0.0001612422686620509, + "loss": 0.8864, "step": 6295 }, { - "epoch": 0.1807410382568531, - "grad_norm": 0.8984375, - "learning_rate": 0.00019605531107312195, - "loss": 0.9907, + "epoch": 0.36146652131504964, + "grad_norm": 0.26171875, + "learning_rate": 0.0001611630742307889, + "loss": 0.9521, "step": 6300 }, { - "epoch": 0.18088448352531092, - "grad_norm": 0.3671875, - "learning_rate": 0.00019604137384432904, - "loss": 0.9296, + "epoch": 0.3617533995065695, + "grad_norm": 0.2734375, + "learning_rate": 0.00016108381846505885, + "loss": 0.9414, "step": 6305 }, { - "epoch": 0.18102792879376875, - "grad_norm": 0.3984375, - "learning_rate": 0.00019602741253483817, - "loss": 0.8809, + "epoch": 0.3620402776980894, + "grad_norm": 0.255859375, + "learning_rate": 0.0001610045014443387, + "loss": 0.9188, "step": 6310 }, { - "epoch": 0.18117137406222655, - "grad_norm": 0.4140625, - "learning_rate": 0.00019601342714814979, - "loss": 0.9384, + "epoch": 0.3623271558896093, + "grad_norm": 0.259765625, + "learning_rate": 0.00016092512324816772, + "loss": 0.9346, "step": 6315 }, { - "epoch": 0.18131481933068438, - "grad_norm": 0.412109375, - "learning_rate": 0.00019599941768777055, - "loss": 0.9414, + "epoch": 0.36261403408112913, + "grad_norm": 0.263671875, + "learning_rate": 0.00016084568395614648, + "loss": 0.9766, "step": 6320 }, { - "epoch": 0.1814582645991422, - "grad_norm": 0.369140625, - "learning_rate": 0.00019598538415721307, - "loss": 0.9796, + "epoch": 0.36290091227264903, + "grad_norm": 0.287109375, + "learning_rate": 0.00016076618364793696, + "loss": 0.9934, "step": 6325 }, { - "epoch": 0.18160170986760002, - "grad_norm": 0.423828125, - "learning_rate": 0.000195971326559996, - "loss": 0.9561, + "epoch": 0.36318779046416894, + "grad_norm": 0.283203125, + "learning_rate": 0.0001606866224032622, + "loss": 0.9688, "step": 6330 }, { - "epoch": 0.18174515513605785, - "grad_norm": 0.3828125, - "learning_rate": 0.00019595724489964404, - "loss": 0.8997, + "epoch": 0.3634746686556888, + "grad_norm": 0.255859375, + "learning_rate": 0.0001606070003019064, + "loss": 0.8889, "step": 6335 }, { - "epoch": 0.18188860040451565, - "grad_norm": 0.361328125, - "learning_rate": 0.00019594313917968795, - "loss": 0.8972, + "epoch": 0.3637615468472087, + "grad_norm": 0.2890625, + "learning_rate": 0.00016052731742371485, + "loss": 0.9842, "step": 6340 }, { - "epoch": 0.18203204567297349, - "grad_norm": 0.40234375, - "learning_rate": 0.00019592900940366447, - "loss": 0.9795, + "epoch": 0.36404842503872853, + "grad_norm": 0.287109375, + "learning_rate": 0.00016044757384859365, + "loss": 0.9755, "step": 6345 }, { - "epoch": 0.1821754909414313, - "grad_norm": 0.396484375, - "learning_rate": 0.00019591485557511636, - "loss": 1.0519, + "epoch": 0.36433530323024843, + "grad_norm": 0.267578125, + "learning_rate": 0.0001603677696565098, + "loss": 0.8822, "step": 6350 }, { - "epoch": 0.18231893620988912, - "grad_norm": 0.396484375, - "learning_rate": 0.0001959006776975925, - "loss": 0.92, + "epoch": 0.36462218142176833, + "grad_norm": 0.271484375, + "learning_rate": 0.00016028790492749118, + "loss": 0.8748, "step": 6355 }, { - "epoch": 0.18246238147834692, - "grad_norm": 0.357421875, - "learning_rate": 0.00019588647577464775, - "loss": 0.9472, + "epoch": 0.3649090596132882, + "grad_norm": 0.35546875, + "learning_rate": 0.00016020797974162636, + "loss": 0.9934, "step": 6360 }, { - "epoch": 0.18260582674680476, - "grad_norm": 0.3984375, - "learning_rate": 0.00019587224980984293, - "loss": 1.0292, + "epoch": 0.3651959378048081, + "grad_norm": 0.28125, + "learning_rate": 0.0001601279941790644, + "loss": 0.9297, "step": 6365 }, { - "epoch": 0.1827492720152626, - "grad_norm": 0.35546875, - "learning_rate": 0.000195857999806745, - "loss": 0.9282, + "epoch": 0.365482815996328, + "grad_norm": 0.259765625, + "learning_rate": 0.00016004794832001507, + "loss": 0.9782, "step": 6370 }, { - "epoch": 0.1828927172837204, - "grad_norm": 0.384765625, - "learning_rate": 0.0001958437257689269, - "loss": 0.9665, + "epoch": 0.3657696941878478, + "grad_norm": 0.28125, + "learning_rate": 0.0001599678422447485, + "loss": 0.9823, "step": 6375 }, { - "epoch": 0.18303616255217822, - "grad_norm": 0.380859375, - "learning_rate": 0.0001958294276999676, - "loss": 1.031, + "epoch": 0.36605657237936773, + "grad_norm": 0.265625, + "learning_rate": 0.00015988767603359526, + "loss": 1.0366, "step": 6380 }, { - "epoch": 0.18317960782063603, - "grad_norm": 0.37890625, - "learning_rate": 0.0001958151056034521, - "loss": 1.0242, + "epoch": 0.3663434505708876, + "grad_norm": 0.271484375, + "learning_rate": 0.00015980744976694622, + "loss": 0.9987, "step": 6385 }, { - "epoch": 0.18332305308909386, - "grad_norm": 0.373046875, - "learning_rate": 0.00019580075948297135, - "loss": 0.9016, + "epoch": 0.3666303287624075, + "grad_norm": 0.271484375, + "learning_rate": 0.00015972716352525242, + "loss": 0.9615, "step": 6390 }, { - "epoch": 0.1834664983575517, - "grad_norm": 0.443359375, - "learning_rate": 0.00019578638934212247, - "loss": 0.9948, + "epoch": 0.3669172069539274, + "grad_norm": 0.283203125, + "learning_rate": 0.0001596468173890251, + "loss": 0.9545, "step": 6395 }, { - "epoch": 0.1836099436260095, - "grad_norm": 0.376953125, - "learning_rate": 0.00019577199518450847, - "loss": 1.0806, + "epoch": 0.3672040851454472, + "grad_norm": 0.26171875, + "learning_rate": 0.0001595664114388356, + "loss": 0.922, "step": 6400 }, { - "epoch": 0.18375338889446732, - "grad_norm": 0.373046875, - "learning_rate": 0.00019575757701373843, - "loss": 0.9022, + "epoch": 0.3674909633369671, + "grad_norm": 0.271484375, + "learning_rate": 0.00015948594575531508, + "loss": 0.9663, "step": 6405 }, { - "epoch": 0.18389683416292513, - "grad_norm": 0.390625, - "learning_rate": 0.00019574313483342748, - "loss": 0.9254, + "epoch": 0.367777841528487, + "grad_norm": 0.25, + "learning_rate": 0.00015940542041915478, + "loss": 0.9883, "step": 6410 }, { - "epoch": 0.18404027943138296, - "grad_norm": 0.40625, - "learning_rate": 0.00019572866864719677, - "loss": 1.0437, + "epoch": 0.36806471972000687, + "grad_norm": 0.263671875, + "learning_rate": 0.00015932483551110572, + "loss": 0.9278, "step": 6415 }, { - "epoch": 0.1841837246998408, - "grad_norm": 0.345703125, - "learning_rate": 0.00019571417845867337, - "loss": 0.9177, + "epoch": 0.3683515979115268, + "grad_norm": 0.283203125, + "learning_rate": 0.00015924419111197852, + "loss": 0.9985, "step": 6420 }, { - "epoch": 0.1843271699682986, - "grad_norm": 0.38671875, - "learning_rate": 0.0001956996642714905, - "loss": 1.0754, + "epoch": 0.3686384761030466, + "grad_norm": 0.26171875, + "learning_rate": 0.00015916348730264367, + "loss": 0.9238, "step": 6425 }, { - "epoch": 0.18447061523675642, - "grad_norm": 0.36328125, - "learning_rate": 0.00019568512608928736, - "loss": 1.0279, + "epoch": 0.3689253542945665, + "grad_norm": 0.271484375, + "learning_rate": 0.00015908272416403105, + "loss": 0.9527, "step": 6430 }, { - "epoch": 0.18461406050521423, - "grad_norm": 0.359375, - "learning_rate": 0.00019567056391570905, - "loss": 0.9644, + "epoch": 0.3692122324860864, + "grad_norm": 0.267578125, + "learning_rate": 0.00015900190177713016, + "loss": 0.9884, "step": 6435 }, { - "epoch": 0.18475750577367206, - "grad_norm": 0.380859375, - "learning_rate": 0.00019565597775440688, - "loss": 1.0274, + "epoch": 0.36949911067760627, + "grad_norm": 0.2578125, + "learning_rate": 0.00015892102022298986, + "loss": 0.9004, "step": 6440 }, { - "epoch": 0.18490095104212986, - "grad_norm": 0.33203125, - "learning_rate": 0.00019564136760903802, - "loss": 0.9298, + "epoch": 0.36978598886912617, + "grad_norm": 0.279296875, + "learning_rate": 0.0001588400795827184, + "loss": 0.9567, "step": 6445 }, { - "epoch": 0.1850443963105877, - "grad_norm": 0.40234375, - "learning_rate": 0.00019562673348326573, - "loss": 0.985, + "epoch": 0.37007286706064607, + "grad_norm": 0.27734375, + "learning_rate": 0.00015875907993748314, + "loss": 0.9362, "step": 6450 }, { - "epoch": 0.18518784157904553, - "grad_norm": 0.365234375, - "learning_rate": 0.00019561207538075926, - "loss": 0.9414, + "epoch": 0.3703597452521659, + "grad_norm": 0.267578125, + "learning_rate": 0.0001586780213685108, + "loss": 0.9851, "step": 6455 }, { - "epoch": 0.18533128684750333, - "grad_norm": 0.369140625, - "learning_rate": 0.00019559739330519388, - "loss": 0.9986, + "epoch": 0.3706466234436858, + "grad_norm": 0.263671875, + "learning_rate": 0.00015859690395708702, + "loss": 0.9376, "step": 6460 }, { - "epoch": 0.18547473211596116, - "grad_norm": 0.40625, - "learning_rate": 0.00019558268726025082, - "loss": 1.0191, + "epoch": 0.37093350163520566, + "grad_norm": 0.26171875, + "learning_rate": 0.00015851572778455657, + "loss": 0.9481, "step": 6465 }, { - "epoch": 0.18561817738441896, - "grad_norm": 0.453125, - "learning_rate": 0.00019556795724961742, - "loss": 0.9567, + "epoch": 0.37122037982672557, + "grad_norm": 0.271484375, + "learning_rate": 0.00015843449293232307, + "loss": 0.9841, "step": 6470 }, { - "epoch": 0.1857616226528768, - "grad_norm": 0.396484375, - "learning_rate": 0.00019555320327698693, - "loss": 0.944, + "epoch": 0.37150725801824547, + "grad_norm": 0.26953125, + "learning_rate": 0.00015835319948184903, + "loss": 0.962, "step": 6475 }, { - "epoch": 0.18590506792133463, - "grad_norm": 0.41015625, - "learning_rate": 0.00019553842534605868, - "loss": 0.9815, + "epoch": 0.3717941362097653, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001582718475146557, + "loss": 0.9342, "step": 6480 }, { - "epoch": 0.18604851318979243, - "grad_norm": 0.3671875, - "learning_rate": 0.000195523623460538, - "loss": 0.9674, + "epoch": 0.3720810144012852, + "grad_norm": 0.265625, + "learning_rate": 0.000158190437112323, + "loss": 0.9377, "step": 6485 }, { - "epoch": 0.18619195845825026, - "grad_norm": 0.33203125, - "learning_rate": 0.00019550879762413615, - "loss": 0.8564, + "epoch": 0.3723678925928051, + "grad_norm": 0.296875, + "learning_rate": 0.00015810896835648952, + "loss": 0.9895, "step": 6490 }, { - "epoch": 0.18633540372670807, - "grad_norm": 0.40625, - "learning_rate": 0.00019549394784057044, - "loss": 0.9149, + "epoch": 0.37265477078432496, + "grad_norm": 0.26953125, + "learning_rate": 0.00015802744132885227, + "loss": 1.0321, "step": 6495 }, { - "epoch": 0.1864788489951659, - "grad_norm": 0.396484375, - "learning_rate": 0.00019547907411356427, - "loss": 0.9507, + "epoch": 0.37294164897584486, + "grad_norm": 0.267578125, + "learning_rate": 0.0001579458561111667, + "loss": 0.9234, "step": 6500 }, { - "epoch": 0.18662229426362373, - "grad_norm": 0.380859375, - "learning_rate": 0.00019546417644684692, - "loss": 0.979, + "epoch": 0.3732285271673647, + "grad_norm": 0.271484375, + "learning_rate": 0.0001578642127852467, + "loss": 0.9092, "step": 6505 }, { - "epoch": 0.18676573953208153, - "grad_norm": 0.369140625, - "learning_rate": 0.00019544925484415372, - "loss": 0.916, + "epoch": 0.3735154053588846, + "grad_norm": 0.255859375, + "learning_rate": 0.00015778251143296437, + "loss": 0.9752, "step": 6510 }, { - "epoch": 0.18690918480053936, - "grad_norm": 0.3984375, - "learning_rate": 0.000195434309309226, - "loss": 0.9639, + "epoch": 0.3738022835504045, + "grad_norm": 0.263671875, + "learning_rate": 0.00015770075213625, + "loss": 0.9296, "step": 6515 }, { - "epoch": 0.18705263006899717, - "grad_norm": 0.388671875, - "learning_rate": 0.0001954193398458111, - "loss": 1.0653, + "epoch": 0.37408916174192436, + "grad_norm": 0.2578125, + "learning_rate": 0.000157618934977092, + "loss": 0.9304, "step": 6520 }, { - "epoch": 0.187196075337455, - "grad_norm": 0.37890625, - "learning_rate": 0.00019540434645766238, - "loss": 0.9854, + "epoch": 0.37437603993344426, + "grad_norm": 0.267578125, + "learning_rate": 0.00015753706003753678, + "loss": 1.0235, "step": 6525 }, { - "epoch": 0.1873395206059128, - "grad_norm": 0.390625, - "learning_rate": 0.0001953893291485391, - "loss": 0.8902, + "epoch": 0.37466291812496416, + "grad_norm": 0.28515625, + "learning_rate": 0.00015745512739968878, + "loss": 0.9754, "step": 6530 }, { - "epoch": 0.18748296587437063, - "grad_norm": 0.359375, - "learning_rate": 0.00019537428792220664, - "loss": 0.8787, + "epoch": 0.374949796316484, + "grad_norm": 0.2578125, + "learning_rate": 0.00015737313714571017, + "loss": 0.9551, "step": 6535 }, { - "epoch": 0.18762641114282846, - "grad_norm": 0.453125, - "learning_rate": 0.00019535922278243634, - "loss": 1.1199, + "epoch": 0.3752366745080039, + "grad_norm": 0.265625, + "learning_rate": 0.00015729108935782094, + "loss": 0.9059, "step": 6540 }, { - "epoch": 0.18776985641128627, - "grad_norm": 0.349609375, - "learning_rate": 0.00019534413373300546, - "loss": 0.9153, + "epoch": 0.37552355269952375, + "grad_norm": 0.287109375, + "learning_rate": 0.00015720898411829889, + "loss": 0.9985, "step": 6545 }, { - "epoch": 0.1879133016797441, - "grad_norm": 0.40234375, - "learning_rate": 0.00019532902077769735, - "loss": 0.951, + "epoch": 0.37581043089104366, + "grad_norm": 0.2578125, + "learning_rate": 0.00015712682150947923, + "loss": 0.9303, "step": 6550 }, { - "epoch": 0.1880567469482019, - "grad_norm": 0.396484375, - "learning_rate": 0.00019531388392030134, - "loss": 0.9883, + "epoch": 0.37609730908256356, + "grad_norm": 0.255859375, + "learning_rate": 0.0001570446016137549, + "loss": 0.9074, "step": 6555 }, { - "epoch": 0.18820019221665973, - "grad_norm": 0.4296875, - "learning_rate": 0.00019529872316461272, - "loss": 1.0234, + "epoch": 0.3763841872740834, + "grad_norm": 0.27734375, + "learning_rate": 0.00015696232451357616, + "loss": 0.9487, "step": 6560 }, { - "epoch": 0.18834363748511757, - "grad_norm": 0.416015625, - "learning_rate": 0.00019528353851443276, - "loss": 1.1201, + "epoch": 0.3766710654656033, + "grad_norm": 0.28515625, + "learning_rate": 0.0001568799902914506, + "loss": 1.0237, "step": 6565 }, { - "epoch": 0.18848708275357537, - "grad_norm": 0.373046875, - "learning_rate": 0.00019526832997356875, - "loss": 0.9429, + "epoch": 0.3769579436571232, + "grad_norm": 0.2890625, + "learning_rate": 0.00015679759902994332, + "loss": 0.9194, "step": 6570 }, { - "epoch": 0.1886305280220332, - "grad_norm": 0.396484375, - "learning_rate": 0.000195253097545834, - "loss": 1.0335, + "epoch": 0.37724482184864305, + "grad_norm": 0.26953125, + "learning_rate": 0.0001567151508116763, + "loss": 0.9603, "step": 6575 }, { - "epoch": 0.188773973290491, - "grad_norm": 0.36328125, - "learning_rate": 0.00019523784123504775, - "loss": 0.9304, + "epoch": 0.37753170004016295, + "grad_norm": 0.27734375, + "learning_rate": 0.00015663264571932892, + "loss": 0.9517, "step": 6580 }, { - "epoch": 0.18891741855894884, - "grad_norm": 0.392578125, - "learning_rate": 0.00019522256104503524, - "loss": 0.8934, + "epoch": 0.3778185782316828, + "grad_norm": 0.265625, + "learning_rate": 0.0001565500838356374, + "loss": 0.9255, "step": 6585 }, { - "epoch": 0.18906086382740667, - "grad_norm": 0.345703125, - "learning_rate": 0.00019520725697962777, - "loss": 0.944, + "epoch": 0.3781054564232027, + "grad_norm": 0.28515625, + "learning_rate": 0.00015646746524339497, + "loss": 1.0131, "step": 6590 }, { - "epoch": 0.18920430909586447, - "grad_norm": 0.376953125, - "learning_rate": 0.0001951919290426625, - "loss": 0.9723, + "epoch": 0.3783923346147226, + "grad_norm": 0.259765625, + "learning_rate": 0.00015638479002545182, + "loss": 0.946, "step": 6595 }, { - "epoch": 0.1893477543643223, - "grad_norm": 0.380859375, - "learning_rate": 0.00019517657723798268, - "loss": 0.9497, + "epoch": 0.37867921280624245, + "grad_norm": 0.271484375, + "learning_rate": 0.00015630205826471478, + "loss": 1.008, "step": 6600 }, { - "epoch": 0.1894911996327801, - "grad_norm": 0.40234375, - "learning_rate": 0.0001951612015694375, - "loss": 0.9015, + "epoch": 0.37896609099776235, + "grad_norm": 0.2578125, + "learning_rate": 0.00015621927004414747, + "loss": 1.0618, "step": 6605 }, { - "epoch": 0.18963464490123794, - "grad_norm": 0.359375, - "learning_rate": 0.00019514580204088212, - "loss": 0.9811, + "epoch": 0.37925296918928225, + "grad_norm": 0.279296875, + "learning_rate": 0.0001561364254467701, + "loss": 0.9527, "step": 6610 }, { - "epoch": 0.18977809016969574, - "grad_norm": 0.39453125, - "learning_rate": 0.00019513037865617774, - "loss": 0.9086, + "epoch": 0.3795398473808021, + "grad_norm": 0.255859375, + "learning_rate": 0.00015605352455565937, + "loss": 0.9921, "step": 6615 }, { - "epoch": 0.18992153543815357, - "grad_norm": 0.423828125, - "learning_rate": 0.00019511493141919145, - "loss": 0.9885, + "epoch": 0.379826725572322, + "grad_norm": 0.267578125, + "learning_rate": 0.0001559705674539486, + "loss": 1.021, "step": 6620 }, { - "epoch": 0.1900649807066114, - "grad_norm": 0.4140625, - "learning_rate": 0.00019509946033379643, - "loss": 1.0193, + "epoch": 0.38011360376384185, + "grad_norm": 0.265625, + "learning_rate": 0.0001558875542248272, + "loss": 0.9598, "step": 6625 }, { - "epoch": 0.1902084259750692, - "grad_norm": 0.53125, - "learning_rate": 0.00019508396540387178, - "loss": 0.9914, + "epoch": 0.38040048195536175, + "grad_norm": 0.265625, + "learning_rate": 0.0001558044849515411, + "loss": 0.9692, "step": 6630 }, { - "epoch": 0.19035187124352704, - "grad_norm": 0.369140625, - "learning_rate": 0.00019506844663330254, - "loss": 0.9416, + "epoch": 0.38068736014688165, + "grad_norm": 0.251953125, + "learning_rate": 0.00015572135971739242, + "loss": 0.9218, "step": 6635 }, { - "epoch": 0.19049531651198484, - "grad_norm": 0.435546875, - "learning_rate": 0.0001950529040259798, - "loss": 1.0069, + "epoch": 0.3809742383384015, + "grad_norm": 0.271484375, + "learning_rate": 0.0001556381786057392, + "loss": 0.926, "step": 6640 }, { - "epoch": 0.19063876178044267, - "grad_norm": 0.421875, - "learning_rate": 0.0001950373375858006, - "loss": 0.9679, + "epoch": 0.3812611165299214, + "grad_norm": 0.27734375, + "learning_rate": 0.00015555494169999578, + "loss": 0.935, "step": 6645 }, { - "epoch": 0.1907822070489005, - "grad_norm": 0.384765625, - "learning_rate": 0.00019502174731666797, - "loss": 0.9218, + "epoch": 0.3815479947214413, + "grad_norm": 0.267578125, + "learning_rate": 0.00015547164908363224, + "loss": 0.9532, "step": 6650 }, { - "epoch": 0.1909256523173583, - "grad_norm": 0.380859375, - "learning_rate": 0.00019500613322249088, - "loss": 0.8931, + "epoch": 0.38183487291296114, + "grad_norm": 0.2578125, + "learning_rate": 0.00015538830084017456, + "loss": 0.968, "step": 6655 }, { - "epoch": 0.19106909758581614, - "grad_norm": 0.416015625, - "learning_rate": 0.00019499049530718424, - "loss": 0.8353, + "epoch": 0.38212175110448104, + "grad_norm": 0.28125, + "learning_rate": 0.00015530489705320463, + "loss": 0.9956, "step": 6660 }, { - "epoch": 0.19121254285427394, - "grad_norm": 0.3828125, - "learning_rate": 0.00019497483357466908, - "loss": 1.0173, + "epoch": 0.3824086292960009, + "grad_norm": 0.271484375, + "learning_rate": 0.0001552214378063599, + "loss": 1.0108, "step": 6665 }, { - "epoch": 0.19135598812273177, - "grad_norm": 0.40234375, - "learning_rate": 0.00019495914802887226, - "loss": 0.9503, + "epoch": 0.3826955074875208, + "grad_norm": 0.2734375, + "learning_rate": 0.0001551379231833335, + "loss": 0.9581, "step": 6670 }, { - "epoch": 0.1914994333911896, - "grad_norm": 0.416015625, - "learning_rate": 0.0001949434386737266, - "loss": 1.0098, + "epoch": 0.3829823856790407, + "grad_norm": 0.283203125, + "learning_rate": 0.00015505435326787414, + "loss": 1.0014, "step": 6675 }, { - "epoch": 0.1916428786596474, - "grad_norm": 0.404296875, - "learning_rate": 0.00019492770551317106, - "loss": 0.8962, + "epoch": 0.38326926387056054, + "grad_norm": 0.28515625, + "learning_rate": 0.00015497072814378584, + "loss": 0.9081, "step": 6680 }, { - "epoch": 0.19178632392810524, - "grad_norm": 0.404296875, - "learning_rate": 0.00019491194855115036, - "loss": 1.0694, + "epoch": 0.38355614206208044, + "grad_norm": 0.28125, + "learning_rate": 0.0001548870478949281, + "loss": 0.9399, "step": 6685 }, { - "epoch": 0.19192976919656304, - "grad_norm": 0.353515625, - "learning_rate": 0.00019489616779161533, - "loss": 0.9669, + "epoch": 0.38384302025360034, + "grad_norm": 0.27734375, + "learning_rate": 0.00015480331260521565, + "loss": 0.9639, "step": 6690 }, { - "epoch": 0.19207321446502088, - "grad_norm": 0.376953125, - "learning_rate": 0.00019488036323852273, - "loss": 1.0524, + "epoch": 0.3841298984451202, + "grad_norm": 0.271484375, + "learning_rate": 0.00015471952235861843, + "loss": 0.9984, "step": 6695 }, { - "epoch": 0.19221665973347868, - "grad_norm": 0.380859375, - "learning_rate": 0.00019486453489583525, - "loss": 0.879, + "epoch": 0.3844167766366401, + "grad_norm": 0.28515625, + "learning_rate": 0.0001546356772391615, + "loss": 0.9647, "step": 6700 }, { - "epoch": 0.1923601050019365, - "grad_norm": 0.380859375, - "learning_rate": 0.00019484868276752155, - "loss": 0.9133, + "epoch": 0.38470365482816, + "grad_norm": 0.267578125, + "learning_rate": 0.0001545517773309249, + "loss": 0.9908, "step": 6705 }, { - "epoch": 0.19250355027039434, - "grad_norm": 0.390625, - "learning_rate": 0.0001948328068575563, - "loss": 1.0136, + "epoch": 0.38499053301967984, + "grad_norm": 0.255859375, + "learning_rate": 0.00015446782271804366, + "loss": 0.9489, "step": 6710 }, { - "epoch": 0.19264699553885214, - "grad_norm": 0.404296875, - "learning_rate": 0.00019481690716992013, - "loss": 1.0644, + "epoch": 0.38527741121119974, + "grad_norm": 0.26171875, + "learning_rate": 0.00015438381348470767, + "loss": 0.9667, "step": 6715 }, { - "epoch": 0.19279044080730998, - "grad_norm": 0.40234375, - "learning_rate": 0.0001948009837085996, - "loss": 0.9521, + "epoch": 0.3855642894027196, + "grad_norm": 0.28125, + "learning_rate": 0.00015429974971516156, + "loss": 0.9159, "step": 6720 }, { - "epoch": 0.19293388607576778, - "grad_norm": 0.392578125, - "learning_rate": 0.00019478503647758723, - "loss": 0.9555, + "epoch": 0.3858511675942395, + "grad_norm": 0.275390625, + "learning_rate": 0.0001542156314937047, + "loss": 0.8806, "step": 6725 }, { - "epoch": 0.1930773313442256, - "grad_norm": 0.3984375, - "learning_rate": 0.00019476906548088148, - "loss": 0.9552, + "epoch": 0.3861380457857594, + "grad_norm": 0.271484375, + "learning_rate": 0.000154131458904691, + "loss": 0.9385, "step": 6730 }, { - "epoch": 0.19322077661268344, - "grad_norm": 0.44921875, - "learning_rate": 0.00019475307072248682, - "loss": 0.973, + "epoch": 0.38642492397727923, + "grad_norm": 0.26171875, + "learning_rate": 0.00015404723203252894, + "loss": 0.9425, "step": 6735 }, { - "epoch": 0.19336422188114125, - "grad_norm": 0.4296875, - "learning_rate": 0.00019473705220641367, - "loss": 1.0572, + "epoch": 0.38671180216879913, + "grad_norm": 0.2734375, + "learning_rate": 0.0001539629509616814, + "loss": 1.0188, "step": 6740 }, { - "epoch": 0.19350766714959908, - "grad_norm": 0.392578125, - "learning_rate": 0.0001947210099366784, - "loss": 1.0534, + "epoch": 0.38699868036031904, + "grad_norm": 0.251953125, + "learning_rate": 0.00015387861577666559, + "loss": 0.9811, "step": 6745 }, { - "epoch": 0.19365111241805688, - "grad_norm": 0.376953125, - "learning_rate": 0.0001947049439173033, - "loss": 0.9432, + "epoch": 0.3872855585518389, + "grad_norm": 0.27734375, + "learning_rate": 0.00015379422656205307, + "loss": 0.956, "step": 6750 }, { - "epoch": 0.1937945576865147, - "grad_norm": 0.3671875, - "learning_rate": 0.00019468885415231668, - "loss": 1.0214, + "epoch": 0.3875724367433588, + "grad_norm": 0.275390625, + "learning_rate": 0.00015370978340246955, + "loss": 0.9814, "step": 6755 }, { - "epoch": 0.19393800295497254, - "grad_norm": 0.412109375, - "learning_rate": 0.00019467274064575275, - "loss": 0.9158, + "epoch": 0.38785931493487863, + "grad_norm": 0.271484375, + "learning_rate": 0.00015362528638259478, + "loss": 0.9368, "step": 6760 }, { - "epoch": 0.19408144822343035, - "grad_norm": 0.361328125, - "learning_rate": 0.00019465660340165167, - "loss": 0.9696, + "epoch": 0.38814619312639853, + "grad_norm": 0.26171875, + "learning_rate": 0.0001535407355871626, + "loss": 0.8959, "step": 6765 }, { - "epoch": 0.19422489349188818, - "grad_norm": 0.3828125, - "learning_rate": 0.0001946404424240596, - "loss": 0.9509, + "epoch": 0.38843307131791843, + "grad_norm": 0.2578125, + "learning_rate": 0.00015345613110096068, + "loss": 0.8967, "step": 6770 }, { - "epoch": 0.19436833876034598, - "grad_norm": 0.365234375, - "learning_rate": 0.00019462425771702858, - "loss": 1.0353, + "epoch": 0.3887199495094383, + "grad_norm": 0.27734375, + "learning_rate": 0.00015337147300883066, + "loss": 1.0397, "step": 6775 }, { - "epoch": 0.1945117840288038, - "grad_norm": 0.375, - "learning_rate": 0.0001946080492846167, - "loss": 0.9767, + "epoch": 0.3890068277009582, + "grad_norm": 0.267578125, + "learning_rate": 0.0001532867613956678, + "loss": 0.9463, "step": 6780 }, { - "epoch": 0.19465522929726162, - "grad_norm": 0.404296875, - "learning_rate": 0.00019459181713088792, - "loss": 1.0067, + "epoch": 0.3892937058924781, + "grad_norm": 0.267578125, + "learning_rate": 0.0001532019963464211, + "loss": 0.9226, "step": 6785 }, { - "epoch": 0.19479867456571945, - "grad_norm": 0.396484375, - "learning_rate": 0.00019457556125991216, - "loss": 1.0046, + "epoch": 0.3895805840839979, + "grad_norm": 0.29296875, + "learning_rate": 0.00015311717794609325, + "loss": 0.9226, "step": 6790 }, { - "epoch": 0.19494211983417728, - "grad_norm": 0.38671875, - "learning_rate": 0.0001945592816757653, - "loss": 0.9236, + "epoch": 0.38986746227551783, + "grad_norm": 0.263671875, + "learning_rate": 0.0001530323062797402, + "loss": 0.9181, "step": 6795 }, { - "epoch": 0.19508556510263508, - "grad_norm": 0.361328125, - "learning_rate": 0.00019454297838252918, - "loss": 0.9194, + "epoch": 0.3901543404670377, + "grad_norm": 0.30859375, + "learning_rate": 0.00015294738143247148, + "loss": 0.9984, "step": 6800 }, { - "epoch": 0.19522901037109291, - "grad_norm": 0.39453125, - "learning_rate": 0.00019452665138429152, - "loss": 0.9597, + "epoch": 0.3904412186585576, + "grad_norm": 0.26171875, + "learning_rate": 0.00015286240348944997, + "loss": 0.9387, "step": 6805 }, { - "epoch": 0.19537245563955072, - "grad_norm": 0.3828125, - "learning_rate": 0.00019451030068514609, - "loss": 0.9398, + "epoch": 0.3907280968500775, + "grad_norm": 0.291015625, + "learning_rate": 0.00015277737253589164, + "loss": 0.956, "step": 6810 }, { - "epoch": 0.19551590090800855, - "grad_norm": 0.39453125, - "learning_rate": 0.0001944939262891925, - "loss": 0.9163, + "epoch": 0.3910149750415973, + "grad_norm": 0.291015625, + "learning_rate": 0.00015269228865706584, + "loss": 1.0067, "step": 6815 }, { - "epoch": 0.19565934617646638, - "grad_norm": 0.396484375, - "learning_rate": 0.00019447752820053634, - "loss": 0.9949, + "epoch": 0.3913018532331172, + "grad_norm": 0.3125, + "learning_rate": 0.0001526071519382948, + "loss": 0.9523, "step": 6820 }, { - "epoch": 0.19580279144492418, - "grad_norm": 0.39453125, - "learning_rate": 0.00019446110642328917, - "loss": 0.9618, + "epoch": 0.3915887314246371, + "grad_norm": 0.27734375, + "learning_rate": 0.00015252196246495382, + "loss": 0.9101, "step": 6825 }, { - "epoch": 0.19594623671338202, - "grad_norm": 0.396484375, - "learning_rate": 0.00019444466096156846, - "loss": 0.9944, + "epoch": 0.391875609616157, + "grad_norm": 0.3203125, + "learning_rate": 0.00015243672032247112, + "loss": 1.0325, "step": 6830 }, { - "epoch": 0.19608968198183982, - "grad_norm": 0.40625, - "learning_rate": 0.0001944281918194976, - "loss": 0.8835, + "epoch": 0.3921624878076769, + "grad_norm": 0.2578125, + "learning_rate": 0.00015235142559632766, + "loss": 0.9415, "step": 6835 }, { - "epoch": 0.19623312725029765, - "grad_norm": 0.404296875, - "learning_rate": 0.00019441169900120598, - "loss": 0.9982, + "epoch": 0.3924493659991967, + "grad_norm": 0.26953125, + "learning_rate": 0.00015226607837205727, + "loss": 0.9169, "step": 6840 }, { - "epoch": 0.19637657251875548, - "grad_norm": 0.421875, - "learning_rate": 0.00019439518251082886, - "loss": 0.9769, + "epoch": 0.3927362441907166, + "grad_norm": 0.2470703125, + "learning_rate": 0.00015218067873524625, + "loss": 0.9431, "step": 6845 }, { - "epoch": 0.19652001778721329, - "grad_norm": 0.453125, - "learning_rate": 0.00019437864235250744, - "loss": 0.9006, + "epoch": 0.3930231223822365, + "grad_norm": 0.267578125, + "learning_rate": 0.00015209522677153364, + "loss": 0.9736, "step": 6850 }, { - "epoch": 0.19666346305567112, - "grad_norm": 0.421875, - "learning_rate": 0.00019436207853038893, - "loss": 1.0589, + "epoch": 0.39331000057375637, + "grad_norm": 0.2578125, + "learning_rate": 0.00015200972256661075, + "loss": 1.0248, "step": 6855 }, { - "epoch": 0.19680690832412892, - "grad_norm": 0.369140625, - "learning_rate": 0.00019434549104862639, - "loss": 1.0009, + "epoch": 0.39359687876527627, + "grad_norm": 0.27734375, + "learning_rate": 0.00015192416620622145, + "loss": 1.0525, "step": 6860 }, { - "epoch": 0.19695035359258675, - "grad_norm": 0.3828125, - "learning_rate": 0.0001943288799113788, - "loss": 0.9733, + "epoch": 0.39388375695679617, + "grad_norm": 0.271484375, + "learning_rate": 0.00015183855777616188, + "loss": 1.0149, "step": 6865 }, { - "epoch": 0.19709379886104456, - "grad_norm": 0.384765625, - "learning_rate": 0.0001943122451228112, - "loss": 0.9762, + "epoch": 0.394170635148316, + "grad_norm": 0.3359375, + "learning_rate": 0.0001517528973622803, + "loss": 0.9726, "step": 6870 }, { - "epoch": 0.1972372441295024, - "grad_norm": 0.392578125, - "learning_rate": 0.0001942955866870944, - "loss": 1.1134, + "epoch": 0.3944575133398359, + "grad_norm": 0.26171875, + "learning_rate": 0.00015166718505047722, + "loss": 0.9891, "step": 6875 }, { - "epoch": 0.19738068939796022, - "grad_norm": 0.36328125, - "learning_rate": 0.00019427890460840526, - "loss": 0.9811, + "epoch": 0.39474439153135576, + "grad_norm": 0.2890625, + "learning_rate": 0.0001515814209267051, + "loss": 1.0325, "step": 6880 }, { - "epoch": 0.19752413466641802, - "grad_norm": 0.40234375, - "learning_rate": 0.00019426219889092647, - "loss": 1.0234, + "epoch": 0.39503126972287567, + "grad_norm": 0.283203125, + "learning_rate": 0.00015149560507696837, + "loss": 0.9378, "step": 6885 }, { - "epoch": 0.19766757993487585, - "grad_norm": 0.373046875, - "learning_rate": 0.00019424546953884675, - "loss": 0.9131, + "epoch": 0.39531814791439557, + "grad_norm": 0.2734375, + "learning_rate": 0.00015140973758732347, + "loss": 0.972, "step": 6890 }, { - "epoch": 0.19781102520333366, - "grad_norm": 0.390625, - "learning_rate": 0.00019422871655636065, - "loss": 0.986, + "epoch": 0.3956050261059154, + "grad_norm": 0.2578125, + "learning_rate": 0.0001513238185438784, + "loss": 0.8973, "step": 6895 }, { - "epoch": 0.1979544704717915, - "grad_norm": 0.3828125, - "learning_rate": 0.00019421193994766873, - "loss": 0.9785, + "epoch": 0.3958919042974353, + "grad_norm": 0.263671875, + "learning_rate": 0.00015123784803279302, + "loss": 0.9166, "step": 6900 }, { - "epoch": 0.19809791574024932, - "grad_norm": 0.466796875, - "learning_rate": 0.0001941951397169774, - "loss": 0.9507, + "epoch": 0.3961787824889552, + "grad_norm": 0.26953125, + "learning_rate": 0.00015115182614027872, + "loss": 1.0208, "step": 6905 }, { - "epoch": 0.19824136100870712, - "grad_norm": 0.37890625, - "learning_rate": 0.000194178315868499, - "loss": 1.0766, + "epoch": 0.39646566068047506, + "grad_norm": 0.263671875, + "learning_rate": 0.00015106575295259847, + "loss": 0.891, "step": 6910 }, { - "epoch": 0.19838480627716495, - "grad_norm": 0.3515625, - "learning_rate": 0.00019416146840645188, - "loss": 0.9981, + "epoch": 0.39675253887199496, + "grad_norm": 0.287109375, + "learning_rate": 0.00015097962855606663, + "loss": 0.9247, "step": 6915 }, { - "epoch": 0.19852825154562276, - "grad_norm": 0.37890625, - "learning_rate": 0.00019414459733506023, - "loss": 0.9361, + "epoch": 0.3970394170635148, + "grad_norm": 0.2490234375, + "learning_rate": 0.00015089345303704902, + "loss": 0.9241, "step": 6920 }, { - "epoch": 0.1986716968140806, - "grad_norm": 0.365234375, - "learning_rate": 0.00019412770265855414, - "loss": 0.9021, + "epoch": 0.3973262952550347, + "grad_norm": 0.2734375, + "learning_rate": 0.00015080722648196253, + "loss": 1.0022, "step": 6925 }, { - "epoch": 0.19881514208253842, - "grad_norm": 0.365234375, - "learning_rate": 0.00019411078438116969, - "loss": 1.0205, + "epoch": 0.3976131734465546, + "grad_norm": 0.263671875, + "learning_rate": 0.0001507209489772754, + "loss": 0.8877, "step": 6930 }, { - "epoch": 0.19895858735099622, - "grad_norm": 0.388671875, - "learning_rate": 0.00019409384250714882, - "loss": 0.8973, + "epoch": 0.39790005163807446, + "grad_norm": 0.29296875, + "learning_rate": 0.0001506346206095069, + "loss": 1.0253, "step": 6935 }, { - "epoch": 0.19910203261945406, - "grad_norm": 0.40234375, - "learning_rate": 0.00019407687704073943, - "loss": 0.9374, + "epoch": 0.39818692982959436, + "grad_norm": 0.267578125, + "learning_rate": 0.0001505482414652273, + "loss": 1.005, "step": 6940 }, { - "epoch": 0.19924547788791186, - "grad_norm": 0.39453125, - "learning_rate": 0.0001940598879861953, - "loss": 1.0014, + "epoch": 0.39847380802111426, + "grad_norm": 0.29296875, + "learning_rate": 0.00015046181163105786, + "loss": 0.9344, "step": 6945 }, { - "epoch": 0.1993889231563697, - "grad_norm": 0.39453125, - "learning_rate": 0.00019404287534777615, - "loss": 0.9518, + "epoch": 0.3987606862126341, + "grad_norm": 0.25390625, + "learning_rate": 0.00015037533119367053, + "loss": 1.0191, "step": 6950 }, { - "epoch": 0.1995323684248275, - "grad_norm": 0.3828125, - "learning_rate": 0.00019402583912974758, - "loss": 0.9958, + "epoch": 0.399047564404154, + "grad_norm": 0.263671875, + "learning_rate": 0.0001502888002397881, + "loss": 0.8942, "step": 6955 }, { - "epoch": 0.19967581369328533, - "grad_norm": 0.396484375, - "learning_rate": 0.00019400877933638114, - "loss": 0.9571, + "epoch": 0.39933444259567386, + "grad_norm": 0.28125, + "learning_rate": 0.00015020221885618407, + "loss": 1.0012, "step": 6960 }, { - "epoch": 0.19981925896174316, - "grad_norm": 0.408203125, - "learning_rate": 0.0001939916959719543, - "loss": 1.1132, + "epoch": 0.39962132078719376, + "grad_norm": 0.26953125, + "learning_rate": 0.00015011558712968234, + "loss": 0.974, "step": 6965 }, { - "epoch": 0.19996270423020096, - "grad_norm": 0.373046875, - "learning_rate": 0.00019397458904075036, - "loss": 0.932, + "epoch": 0.39990819897871366, + "grad_norm": 0.3046875, + "learning_rate": 0.0001500289051471575, + "loss": 0.9002, "step": 6970 }, { - "epoch": 0.2001061494986588, - "grad_norm": 0.365234375, - "learning_rate": 0.00019395745854705864, - "loss": 0.9168, + "epoch": 0.4001950771702335, + "grad_norm": 0.26953125, + "learning_rate": 0.0001499421729955344, + "loss": 0.9575, "step": 6975 }, { - "epoch": 0.2002495947671166, - "grad_norm": 0.392578125, - "learning_rate": 0.00019394030449517428, - "loss": 0.896, + "epoch": 0.4004819553617534, + "grad_norm": 0.2578125, + "learning_rate": 0.0001498553907617882, + "loss": 1.0227, "step": 6980 }, { - "epoch": 0.20039304003557443, - "grad_norm": 0.412109375, - "learning_rate": 0.00019392312688939838, - "loss": 1.0326, + "epoch": 0.4007688335532733, + "grad_norm": 0.263671875, + "learning_rate": 0.00014976855853294436, + "loss": 0.9818, "step": 6985 }, { - "epoch": 0.20053648530403226, - "grad_norm": 0.373046875, - "learning_rate": 0.00019390592573403787, - "loss": 1.0483, + "epoch": 0.40105571174479315, + "grad_norm": 0.25390625, + "learning_rate": 0.00014968167639607845, + "loss": 1.0078, "step": 6990 }, { - "epoch": 0.20067993057249006, - "grad_norm": 0.412109375, - "learning_rate": 0.00019388870103340572, - "loss": 0.9058, + "epoch": 0.40134258993631305, + "grad_norm": 0.275390625, + "learning_rate": 0.00014959474443831597, + "loss": 0.9494, "step": 6995 }, { - "epoch": 0.2008233758409479, - "grad_norm": 0.408203125, - "learning_rate": 0.0001938714527918207, - "loss": 0.9996, + "epoch": 0.4016294681278329, + "grad_norm": 0.287109375, + "learning_rate": 0.00014950776274683266, + "loss": 0.9803, "step": 7000 }, { - "epoch": 0.2009668211094057, - "grad_norm": 0.419921875, - "learning_rate": 0.00019385418101360743, - "loss": 1.136, + "epoch": 0.4019163463193528, + "grad_norm": 0.287109375, + "learning_rate": 0.00014942073140885377, + "loss": 0.9356, "step": 7005 }, { - "epoch": 0.20111026637786353, - "grad_norm": 0.365234375, - "learning_rate": 0.0001938368857030966, - "loss": 0.9851, + "epoch": 0.4022032245108727, + "grad_norm": 0.294921875, + "learning_rate": 0.0001493336505116546, + "loss": 0.9966, "step": 7010 }, { - "epoch": 0.20125371164632136, - "grad_norm": 0.421875, - "learning_rate": 0.00019381956686462468, - "loss": 1.0102, + "epoch": 0.40249010270239255, + "grad_norm": 0.255859375, + "learning_rate": 0.00014924652014256014, + "loss": 0.9121, "step": 7015 }, { - "epoch": 0.20139715691477916, - "grad_norm": 0.3515625, - "learning_rate": 0.00019380222450253405, - "loss": 1.0698, + "epoch": 0.40277698089391245, + "grad_norm": 0.26171875, + "learning_rate": 0.0001491593403889448, + "loss": 1.0084, "step": 7020 }, { - "epoch": 0.201540602183237, - "grad_norm": 0.36328125, - "learning_rate": 0.000193784858621173, - "loss": 1.0485, + "epoch": 0.40306385908543235, + "grad_norm": 0.294921875, + "learning_rate": 0.00014907211133823273, + "loss": 0.9533, "step": 7025 }, { - "epoch": 0.2016840474516948, - "grad_norm": 0.373046875, - "learning_rate": 0.00019376746922489577, - "loss": 0.9615, + "epoch": 0.4033507372769522, + "grad_norm": 0.25390625, + "learning_rate": 0.0001489848330778973, + "loss": 0.8849, "step": 7030 }, { - "epoch": 0.20182749272015263, - "grad_norm": 0.384765625, - "learning_rate": 0.0001937500563180624, - "loss": 0.9034, + "epoch": 0.4036376154684721, + "grad_norm": 0.2890625, + "learning_rate": 0.0001488975056954615, + "loss": 0.9374, "step": 7035 }, { - "epoch": 0.20197093798861043, - "grad_norm": 0.390625, - "learning_rate": 0.00019373261990503888, - "loss": 0.9912, + "epoch": 0.40392449365999195, + "grad_norm": 0.265625, + "learning_rate": 0.00014881012927849728, + "loss": 0.9389, "step": 7040 }, { - "epoch": 0.20211438325706826, - "grad_norm": 0.353515625, - "learning_rate": 0.0001937151599901971, - "loss": 0.9441, + "epoch": 0.40421137185151185, + "grad_norm": 0.267578125, + "learning_rate": 0.000148722703914626, + "loss": 0.9596, "step": 7045 }, { - "epoch": 0.2022578285255261, - "grad_norm": 0.416015625, - "learning_rate": 0.00019369767657791479, - "loss": 1.0198, + "epoch": 0.40449825004303175, + "grad_norm": 0.263671875, + "learning_rate": 0.00014863522969151796, + "loss": 0.9214, "step": 7050 }, { - "epoch": 0.2024012737939839, - "grad_norm": 0.42578125, - "learning_rate": 0.00019368016967257566, - "loss": 1.0151, + "epoch": 0.4047851282345516, + "grad_norm": 0.287109375, + "learning_rate": 0.00014854770669689253, + "loss": 0.9462, "step": 7055 }, { - "epoch": 0.20254471906244173, - "grad_norm": 0.375, - "learning_rate": 0.00019366263927856928, - "loss": 0.9792, + "epoch": 0.4050720064260715, + "grad_norm": 0.275390625, + "learning_rate": 0.00014846013501851796, + "loss": 1.0052, "step": 7060 }, { - "epoch": 0.20268816433089953, - "grad_norm": 0.392578125, - "learning_rate": 0.000193645085400291, - "loss": 0.9439, + "epoch": 0.4053588846175914, + "grad_norm": 0.263671875, + "learning_rate": 0.00014837251474421133, + "loss": 0.9172, "step": 7065 }, { - "epoch": 0.20283160959935737, - "grad_norm": 0.384765625, - "learning_rate": 0.00019362750804214222, - "loss": 0.9008, + "epoch": 0.40564576280911124, + "grad_norm": 0.263671875, + "learning_rate": 0.00014828484596183844, + "loss": 0.9318, "step": 7070 }, { - "epoch": 0.2029750548678152, - "grad_norm": 0.412109375, - "learning_rate": 0.00019360990720853014, - "loss": 0.931, + "epoch": 0.40593264100063114, + "grad_norm": 0.263671875, + "learning_rate": 0.0001481971287593138, + "loss": 0.9865, "step": 7075 }, { - "epoch": 0.203118500136273, - "grad_norm": 0.40234375, - "learning_rate": 0.0001935922829038679, - "loss": 0.8798, + "epoch": 0.406219519192151, + "grad_norm": 0.28125, + "learning_rate": 0.0001481093632246003, + "loss": 0.9302, "step": 7080 }, { - "epoch": 0.20326194540473083, - "grad_norm": 0.37109375, - "learning_rate": 0.00019357463513257437, - "loss": 0.9808, + "epoch": 0.4065063973836709, + "grad_norm": 0.26171875, + "learning_rate": 0.00014802154944570952, + "loss": 0.9485, "step": 7085 }, { - "epoch": 0.20340539067318864, - "grad_norm": 0.373046875, - "learning_rate": 0.00019355696389907455, - "loss": 1.0162, + "epoch": 0.4067932755751908, + "grad_norm": 0.271484375, + "learning_rate": 0.00014793368751070125, + "loss": 0.9007, "step": 7090 }, { - "epoch": 0.20354883594164647, - "grad_norm": 0.376953125, - "learning_rate": 0.00019353926920779912, - "loss": 0.8704, + "epoch": 0.40708015376671064, + "grad_norm": 0.283203125, + "learning_rate": 0.00014784577750768363, + "loss": 0.9349, "step": 7095 }, { - "epoch": 0.2036922812101043, - "grad_norm": 0.3984375, - "learning_rate": 0.00019352155106318471, - "loss": 0.9811, + "epoch": 0.40736703195823054, + "grad_norm": 0.25, + "learning_rate": 0.0001477578195248131, + "loss": 0.9483, "step": 7100 }, { - "epoch": 0.2038357264785621, - "grad_norm": 0.388671875, - "learning_rate": 0.00019350380946967391, - "loss": 0.9601, + "epoch": 0.40765391014975044, + "grad_norm": 0.27734375, + "learning_rate": 0.000147669813650294, + "loss": 1.0298, "step": 7105 }, { - "epoch": 0.20397917174701993, - "grad_norm": 0.40234375, - "learning_rate": 0.00019348604443171502, - "loss": 0.9222, + "epoch": 0.4079407883412703, + "grad_norm": 0.267578125, + "learning_rate": 0.0001475817599723789, + "loss": 1.0117, "step": 7110 }, { - "epoch": 0.20412261701547774, - "grad_norm": 0.40234375, - "learning_rate": 0.0001934682559537624, - "loss": 1.028, + "epoch": 0.4082276665327902, + "grad_norm": 0.267578125, + "learning_rate": 0.00014749365857936824, + "loss": 0.9019, "step": 7115 }, { - "epoch": 0.20426606228393557, - "grad_norm": 0.400390625, - "learning_rate": 0.00019345044404027613, - "loss": 1.0805, + "epoch": 0.40851454472431004, + "grad_norm": 0.26953125, + "learning_rate": 0.00014740550955961022, + "loss": 0.9884, "step": 7120 }, { - "epoch": 0.20440950755239337, - "grad_norm": 0.4765625, - "learning_rate": 0.00019343260869572227, - "loss": 0.9281, + "epoch": 0.40880142291582994, + "grad_norm": 0.279296875, + "learning_rate": 0.0001473173130015009, + "loss": 0.9628, "step": 7125 }, { - "epoch": 0.2045529528208512, - "grad_norm": 0.37890625, - "learning_rate": 0.0001934147499245727, - "loss": 0.958, + "epoch": 0.40908830110734984, + "grad_norm": 0.271484375, + "learning_rate": 0.00014722906899348402, + "loss": 0.913, "step": 7130 }, { - "epoch": 0.20469639808930903, - "grad_norm": 0.357421875, - "learning_rate": 0.00019339686773130524, - "loss": 0.9115, + "epoch": 0.4093751792988697, + "grad_norm": 0.259765625, + "learning_rate": 0.00014714077762405085, + "loss": 0.9841, "step": 7135 }, { - "epoch": 0.20483984335776684, - "grad_norm": 0.40625, - "learning_rate": 0.0001933789621204035, - "loss": 1.0251, + "epoch": 0.4096620574903896, + "grad_norm": 0.28515625, + "learning_rate": 0.00014705243898174017, + "loss": 1.0012, "step": 7140 }, { - "epoch": 0.20498328862622467, - "grad_norm": 0.3984375, - "learning_rate": 0.000193361033096357, - "loss": 0.9985, + "epoch": 0.4099489356819095, + "grad_norm": 0.2470703125, + "learning_rate": 0.00014696405315513814, + "loss": 0.9609, "step": 7145 }, { - "epoch": 0.20512673389468247, - "grad_norm": 0.396484375, - "learning_rate": 0.00019334308066366114, - "loss": 0.9838, + "epoch": 0.41023581387342933, + "grad_norm": 0.27734375, + "learning_rate": 0.00014687562023287833, + "loss": 0.9618, "step": 7150 }, { - "epoch": 0.2052701791631403, - "grad_norm": 0.447265625, - "learning_rate": 0.00019332510482681717, - "loss": 0.9351, + "epoch": 0.41052269206494924, + "grad_norm": 0.29296875, + "learning_rate": 0.00014678714030364143, + "loss": 0.9723, "step": 7155 }, { - "epoch": 0.20541362443159814, - "grad_norm": 0.404296875, - "learning_rate": 0.00019330710559033225, - "loss": 0.9805, + "epoch": 0.4108095702564691, + "grad_norm": 0.248046875, + "learning_rate": 0.00014669861345615532, + "loss": 0.9298, "step": 7160 }, { - "epoch": 0.20555706970005594, - "grad_norm": 0.40234375, - "learning_rate": 0.00019328908295871932, - "loss": 1.0575, + "epoch": 0.411096448447989, + "grad_norm": 0.271484375, + "learning_rate": 0.00014661003977919492, + "loss": 0.9379, "step": 7165 }, { - "epoch": 0.20570051496851377, - "grad_norm": 0.349609375, - "learning_rate": 0.0001932710369364973, - "loss": 0.9565, + "epoch": 0.4113833266395089, + "grad_norm": 0.275390625, + "learning_rate": 0.0001465214193615821, + "loss": 0.9979, "step": 7170 }, { - "epoch": 0.20584396023697157, - "grad_norm": 0.400390625, - "learning_rate": 0.00019325296752819085, - "loss": 0.9124, + "epoch": 0.41167020483102873, + "grad_norm": 0.26953125, + "learning_rate": 0.00014643275229218563, + "loss": 0.9358, "step": 7175 }, { - "epoch": 0.2059874055054294, - "grad_norm": 0.384765625, - "learning_rate": 0.00019323487473833062, - "loss": 0.938, + "epoch": 0.41195708302254863, + "grad_norm": 0.263671875, + "learning_rate": 0.00014634403865992107, + "loss": 0.9752, "step": 7180 }, { - "epoch": 0.20613085077388724, - "grad_norm": 0.353515625, - "learning_rate": 0.00019321675857145304, - "loss": 0.9454, + "epoch": 0.41224396121406853, + "grad_norm": 0.283203125, + "learning_rate": 0.0001462552785537506, + "loss": 0.9811, "step": 7185 }, { - "epoch": 0.20627429604234504, - "grad_norm": 0.40234375, - "learning_rate": 0.0001931986190321004, - "loss": 1.0053, + "epoch": 0.4125308394055884, + "grad_norm": 0.25390625, + "learning_rate": 0.00014616647206268306, + "loss": 0.9357, "step": 7190 }, { - "epoch": 0.20641774131080287, - "grad_norm": 0.384765625, - "learning_rate": 0.0001931804561248209, - "loss": 0.9768, - "step": 7195 + "epoch": 0.4128177175971083, + "grad_norm": 0.3046875, + "learning_rate": 0.0001460776192757738, + "loss": 0.9227, + "step": 7195 }, { - "epoch": 0.20656118657926067, - "grad_norm": 0.396484375, - "learning_rate": 0.00019316226985416853, - "loss": 0.9618, + "epoch": 0.4131045957886281, + "grad_norm": 0.26953125, + "learning_rate": 0.00014598872028212463, + "loss": 0.9438, "step": 7200 }, { - "epoch": 0.2067046318477185, - "grad_norm": 0.375, - "learning_rate": 0.00019314406022470326, - "loss": 0.9631, + "epoch": 0.413391473980148, + "grad_norm": 0.2890625, + "learning_rate": 0.00014589977517088365, + "loss": 0.976, "step": 7205 }, { - "epoch": 0.2068480771161763, - "grad_norm": 0.373046875, - "learning_rate": 0.00019312582724099076, - "loss": 0.9908, + "epoch": 0.41367835217166793, + "grad_norm": 0.26171875, + "learning_rate": 0.0001458107840312452, + "loss": 0.9441, "step": 7210 }, { - "epoch": 0.20699152238463414, - "grad_norm": 0.361328125, - "learning_rate": 0.00019310757090760267, - "loss": 0.9496, + "epoch": 0.4139652303631878, + "grad_norm": 0.287109375, + "learning_rate": 0.00014572174695244976, + "loss": 0.9262, "step": 7215 }, { - "epoch": 0.20713496765309197, - "grad_norm": 0.349609375, - "learning_rate": 0.00019308929122911642, - "loss": 0.8024, + "epoch": 0.4142521085547077, + "grad_norm": 0.2490234375, + "learning_rate": 0.000145632664023784, + "loss": 1.0197, "step": 7220 }, { - "epoch": 0.20727841292154978, - "grad_norm": 0.3671875, - "learning_rate": 0.00019307098821011535, - "loss": 1.0377, + "epoch": 0.4145389867462276, + "grad_norm": 0.27734375, + "learning_rate": 0.00014554353533458042, + "loss": 0.9544, "step": 7225 }, { - "epoch": 0.2074218581900076, - "grad_norm": 0.412109375, - "learning_rate": 0.0001930526618551886, - "loss": 0.993, + "epoch": 0.4148258649377474, + "grad_norm": 0.259765625, + "learning_rate": 0.00014545436097421744, + "loss": 0.9347, "step": 7230 }, { - "epoch": 0.2075653034584654, - "grad_norm": 0.423828125, - "learning_rate": 0.00019303431216893122, - "loss": 0.9719, + "epoch": 0.4151127431292673, + "grad_norm": 0.2734375, + "learning_rate": 0.0001453651410321194, + "loss": 0.9956, "step": 7235 }, { - "epoch": 0.20770874872692324, - "grad_norm": 0.365234375, - "learning_rate": 0.00019301593915594403, - "loss": 0.9323, + "epoch": 0.41539962132078717, + "grad_norm": 0.26171875, + "learning_rate": 0.00014527587559775616, + "loss": 0.9399, "step": 7240 }, { - "epoch": 0.20785219399538107, - "grad_norm": 0.41796875, - "learning_rate": 0.00019299754282083376, - "loss": 1.0419, + "epoch": 0.4156864995123071, + "grad_norm": 0.2578125, + "learning_rate": 0.0001451865647606434, + "loss": 0.9771, "step": 7245 }, { - "epoch": 0.20799563926383888, - "grad_norm": 0.447265625, - "learning_rate": 0.00019297912316821298, - "loss": 0.9272, + "epoch": 0.415973377703827, + "grad_norm": 0.275390625, + "learning_rate": 0.00014509720861034212, + "loss": 0.9387, "step": 7250 }, { - "epoch": 0.2081390845322967, - "grad_norm": 0.482421875, - "learning_rate": 0.00019296068020270007, - "loss": 1.0369, + "epoch": 0.4162602558953468, + "grad_norm": 0.251953125, + "learning_rate": 0.00014500780723645897, + "loss": 0.9079, "step": 7255 }, { - "epoch": 0.2082825298007545, - "grad_norm": 0.404296875, - "learning_rate": 0.00019294221392891932, - "loss": 0.9362, + "epoch": 0.4165471340868667, + "grad_norm": 0.291015625, + "learning_rate": 0.00014491836072864578, + "loss": 1.007, "step": 7260 }, { - "epoch": 0.20842597506921234, - "grad_norm": 0.396484375, - "learning_rate": 0.0001929237243515008, - "loss": 0.9878, + "epoch": 0.4168340122783866, + "grad_norm": 0.251953125, + "learning_rate": 0.0001448288691765997, + "loss": 0.9495, "step": 7265 }, { - "epoch": 0.20856942033767017, - "grad_norm": 0.37109375, - "learning_rate": 0.00019290521147508042, - "loss": 0.9699, + "epoch": 0.41712089046990647, + "grad_norm": 0.28125, + "learning_rate": 0.0001447393326700631, + "loss": 0.8996, "step": 7270 }, { - "epoch": 0.20871286560612798, - "grad_norm": 0.3828125, - "learning_rate": 0.00019288667530430002, - "loss": 1.0186, + "epoch": 0.41740776866142637, + "grad_norm": 0.263671875, + "learning_rate": 0.0001446497512988234, + "loss": 0.9924, "step": 7275 }, { - "epoch": 0.2088563108745858, - "grad_norm": 0.404296875, - "learning_rate": 0.0001928681158438072, - "loss": 1.0065, + "epoch": 0.4176946468529462, + "grad_norm": 0.423828125, + "learning_rate": 0.00014456012515271294, + "loss": 0.9394, "step": 7280 }, { - "epoch": 0.2089997561430436, - "grad_norm": 0.421875, - "learning_rate": 0.0001928495330982554, - "loss": 1.002, + "epoch": 0.4179815250444661, + "grad_norm": 0.2578125, + "learning_rate": 0.0001444704543216091, + "loss": 0.8852, "step": 7285 }, { - "epoch": 0.20914320141150144, - "grad_norm": 0.3984375, - "learning_rate": 0.00019283092707230392, - "loss": 1.0799, + "epoch": 0.418268403235986, + "grad_norm": 0.291015625, + "learning_rate": 0.0001443807388954339, + "loss": 0.997, "step": 7290 }, { - "epoch": 0.20928664667995925, - "grad_norm": 0.384765625, - "learning_rate": 0.00019281229777061795, - "loss": 0.9725, + "epoch": 0.41855528142750587, + "grad_norm": 0.26171875, + "learning_rate": 0.00014429097896415425, + "loss": 0.9559, "step": 7295 }, { - "epoch": 0.20943009194841708, - "grad_norm": 0.40625, - "learning_rate": 0.0001927936451978684, - "loss": 1.063, + "epoch": 0.41884215961902577, + "grad_norm": 0.25390625, + "learning_rate": 0.00014420117461778155, + "loss": 0.9138, "step": 7300 }, { - "epoch": 0.2095735372168749, - "grad_norm": 0.384765625, - "learning_rate": 0.00019277496935873209, - "loss": 0.9276, + "epoch": 0.41912903781054567, + "grad_norm": 0.255859375, + "learning_rate": 0.00014411132594637185, + "loss": 0.9079, "step": 7305 }, { - "epoch": 0.20971698248533271, - "grad_norm": 0.38671875, - "learning_rate": 0.0001927562702578917, - "loss": 0.9878, + "epoch": 0.4194159160020655, + "grad_norm": 0.26171875, + "learning_rate": 0.0001440214330400256, + "loss": 0.9497, "step": 7310 }, { - "epoch": 0.20986042775379055, - "grad_norm": 0.390625, - "learning_rate": 0.00019273754790003565, - "loss": 0.9348, + "epoch": 0.4197027941935854, + "grad_norm": 0.2578125, + "learning_rate": 0.00014393149598888752, + "loss": 0.966, "step": 7315 }, { - "epoch": 0.21000387302224835, - "grad_norm": 0.392578125, - "learning_rate": 0.00019271880228985828, - "loss": 0.9764, + "epoch": 0.41998967238510526, + "grad_norm": 0.263671875, + "learning_rate": 0.0001438415148831468, + "loss": 0.9842, "step": 7320 }, { - "epoch": 0.21014731829070618, - "grad_norm": 0.396484375, - "learning_rate": 0.0001927000334320597, - "loss": 0.8807, + "epoch": 0.42027655057662516, + "grad_norm": 0.28125, + "learning_rate": 0.00014375148981303663, + "loss": 0.9818, "step": 7325 }, { - "epoch": 0.210290763559164, - "grad_norm": 0.4140625, - "learning_rate": 0.00019268124133134588, - "loss": 1.0798, + "epoch": 0.42056342876814506, + "grad_norm": 0.28515625, + "learning_rate": 0.00014366142086883436, + "loss": 0.9759, "step": 7330 }, { - "epoch": 0.21043420882762182, - "grad_norm": 0.38671875, - "learning_rate": 0.00019266242599242863, - "loss": 0.8977, + "epoch": 0.4208503069596649, + "grad_norm": 0.26953125, + "learning_rate": 0.00014357130814086135, + "loss": 0.9687, "step": 7335 }, { - "epoch": 0.21057765409607965, - "grad_norm": 0.369140625, - "learning_rate": 0.00019264358742002556, - "loss": 0.9311, + "epoch": 0.4211371851511848, + "grad_norm": 0.259765625, + "learning_rate": 0.00014348115171948283, + "loss": 0.9834, "step": 7340 }, { - "epoch": 0.21072109936453745, - "grad_norm": 0.38671875, - "learning_rate": 0.0001926247256188601, - "loss": 1.1143, + "epoch": 0.4214240633427047, + "grad_norm": 0.263671875, + "learning_rate": 0.00014339095169510786, + "loss": 0.9655, "step": 7345 }, { - "epoch": 0.21086454463299528, - "grad_norm": 0.41015625, - "learning_rate": 0.00019260584059366153, - "loss": 1.0245, + "epoch": 0.42171094153422456, + "grad_norm": 0.2890625, + "learning_rate": 0.00014330070815818922, + "loss": 0.9959, "step": 7350 }, { - "epoch": 0.2110079899014531, - "grad_norm": 0.3671875, - "learning_rate": 0.00019258693234916493, - "loss": 1.0762, + "epoch": 0.42199781972574446, + "grad_norm": 0.267578125, + "learning_rate": 0.00014321042119922337, + "loss": 0.9207, "step": 7355 }, { - "epoch": 0.21115143516991092, - "grad_norm": 0.396484375, - "learning_rate": 0.00019256800089011123, - "loss": 1.0508, + "epoch": 0.4222846979172643, + "grad_norm": 0.267578125, + "learning_rate": 0.00014312009090875025, + "loss": 0.9809, "step": 7360 }, { - "epoch": 0.21129488043836875, - "grad_norm": 0.390625, - "learning_rate": 0.00019254904622124716, - "loss": 0.9036, + "epoch": 0.4225715761087842, + "grad_norm": 0.275390625, + "learning_rate": 0.00014302971737735324, + "loss": 0.9781, "step": 7365 }, { - "epoch": 0.21143832570682655, - "grad_norm": 0.365234375, - "learning_rate": 0.0001925300683473252, - "loss": 0.9638, + "epoch": 0.4228584543003041, + "grad_norm": 0.279296875, + "learning_rate": 0.0001429393006956592, + "loss": 0.9526, "step": 7370 }, { - "epoch": 0.21158177097528438, - "grad_norm": 0.3515625, - "learning_rate": 0.00019251106727310384, - "loss": 0.9318, + "epoch": 0.42314533249182396, + "grad_norm": 0.267578125, + "learning_rate": 0.0001428488409543381, + "loss": 0.9646, "step": 7375 }, { - "epoch": 0.2117252162437422, - "grad_norm": 0.427734375, - "learning_rate": 0.0001924920430033472, - "loss": 0.953, + "epoch": 0.42343221068334386, + "grad_norm": 0.279296875, + "learning_rate": 0.0001427583382441032, + "loss": 0.9268, "step": 7380 }, { - "epoch": 0.21186866151220002, - "grad_norm": 0.419921875, - "learning_rate": 0.0001924729955428253, - "loss": 0.9417, + "epoch": 0.42371908887486376, + "grad_norm": 0.28125, + "learning_rate": 0.00014266779265571087, + "loss": 0.9645, "step": 7385 }, { - "epoch": 0.21201210678065785, - "grad_norm": 0.390625, - "learning_rate": 0.00019245392489631392, - "loss": 0.9854, + "epoch": 0.4240059670663836, + "grad_norm": 0.28125, + "learning_rate": 0.00014257720427996037, + "loss": 0.9863, "step": 7390 }, { - "epoch": 0.21215555204911565, - "grad_norm": 0.455078125, - "learning_rate": 0.00019243483106859478, - "loss": 0.9789, + "epoch": 0.4242928452579035, + "grad_norm": 0.28125, + "learning_rate": 0.00014248657320769392, + "loss": 1.0031, "step": 7395 }, { - "epoch": 0.21229899731757348, - "grad_norm": 0.361328125, - "learning_rate": 0.00019241571406445525, - "loss": 0.9341, + "epoch": 0.42457972344942335, + "grad_norm": 0.263671875, + "learning_rate": 0.00014239589952979662, + "loss": 1.058, "step": 7400 }, { - "epoch": 0.2124424425860313, - "grad_norm": 0.396484375, - "learning_rate": 0.00019239657388868858, - "loss": 0.9716, + "epoch": 0.42486660164094325, + "grad_norm": 0.2734375, + "learning_rate": 0.00014230518333719616, + "loss": 0.9156, "step": 7405 }, { - "epoch": 0.21258588785448912, - "grad_norm": 0.37890625, - "learning_rate": 0.00019237741054609387, - "loss": 1.0287, + "epoch": 0.42515347983246315, + "grad_norm": 0.296875, + "learning_rate": 0.00014221442472086304, + "loss": 0.9806, "step": 7410 }, { - "epoch": 0.21272933312294695, - "grad_norm": 0.40234375, - "learning_rate": 0.000192358224041476, - "loss": 0.9047, + "epoch": 0.425440358023983, + "grad_norm": 0.287109375, + "learning_rate": 0.0001421236237718101, + "loss": 0.9215, "step": 7415 }, { - "epoch": 0.21287277839140475, - "grad_norm": 0.439453125, - "learning_rate": 0.00019233901437964562, - "loss": 1.0035, + "epoch": 0.4257272362155029, + "grad_norm": 0.251953125, + "learning_rate": 0.00014203278058109282, + "loss": 1.0512, "step": 7420 }, { - "epoch": 0.21301622365986259, - "grad_norm": 0.40625, - "learning_rate": 0.00019231978156541922, - "loss": 1.0443, + "epoch": 0.4260141144070228, + "grad_norm": 0.298828125, + "learning_rate": 0.0001419418952398089, + "loss": 1.0363, "step": 7425 }, { - "epoch": 0.2131596689283204, - "grad_norm": 0.3671875, - "learning_rate": 0.00019230052560361913, - "loss": 0.9597, + "epoch": 0.42630099259854265, + "grad_norm": 0.275390625, + "learning_rate": 0.00014185096783909837, + "loss": 0.9783, "step": 7430 }, { - "epoch": 0.21330311419677822, - "grad_norm": 0.361328125, - "learning_rate": 0.00019228124649907342, - "loss": 0.9585, + "epoch": 0.42658787079006255, + "grad_norm": 0.263671875, + "learning_rate": 0.00014175999847014346, + "loss": 0.9456, "step": 7435 }, { - "epoch": 0.21344655946523605, - "grad_norm": 0.42578125, - "learning_rate": 0.00019226194425661598, - "loss": 0.8917, + "epoch": 0.4268747489815824, + "grad_norm": 0.271484375, + "learning_rate": 0.00014166898722416845, + "loss": 0.9197, "step": 7440 }, { - "epoch": 0.21359000473369386, - "grad_norm": 0.400390625, - "learning_rate": 0.0001922426188810865, - "loss": 1.0287, + "epoch": 0.4271616271731023, + "grad_norm": 0.263671875, + "learning_rate": 0.00014157793419243962, + "loss": 0.9843, "step": 7445 }, { - "epoch": 0.2137334500021517, - "grad_norm": 0.42578125, - "learning_rate": 0.00019222327037733052, - "loss": 1.0023, + "epoch": 0.4274485053646222, + "grad_norm": 0.2578125, + "learning_rate": 0.00014148683946626516, + "loss": 0.8788, "step": 7450 }, { - "epoch": 0.2138768952706095, - "grad_norm": 0.388671875, - "learning_rate": 0.00019220389875019932, - "loss": 1.0172, + "epoch": 0.42773538355614205, + "grad_norm": 0.275390625, + "learning_rate": 0.00014139570313699502, + "loss": 0.8889, "step": 7455 }, { - "epoch": 0.21402034053906732, - "grad_norm": 0.40234375, - "learning_rate": 0.00019218450400454998, - "loss": 0.8369, + "epoch": 0.42802226174766195, + "grad_norm": 0.279296875, + "learning_rate": 0.00014130452529602096, + "loss": 0.9524, "step": 7460 }, { - "epoch": 0.21416378580752513, - "grad_norm": 0.384765625, - "learning_rate": 0.00019216508614524545, - "loss": 0.9224, + "epoch": 0.42830913993918185, + "grad_norm": 0.265625, + "learning_rate": 0.00014121330603477633, + "loss": 0.9771, "step": 7465 }, { - "epoch": 0.21430723107598296, - "grad_norm": 0.453125, - "learning_rate": 0.00019214564517715433, - "loss": 1.0162, + "epoch": 0.4285960181307017, + "grad_norm": 0.294921875, + "learning_rate": 0.00014112204544473598, + "loss": 0.9544, "step": 7470 }, { - "epoch": 0.2144506763444408, - "grad_norm": 0.3984375, - "learning_rate": 0.0001921261811051512, - "loss": 0.9914, + "epoch": 0.4288828963222216, + "grad_norm": 0.28125, + "learning_rate": 0.00014103074361741623, + "loss": 0.9991, "step": 7475 }, { - "epoch": 0.2145941216128986, - "grad_norm": 0.388671875, - "learning_rate": 0.00019210669393411624, - "loss": 1.0234, + "epoch": 0.42916977451374144, + "grad_norm": 0.267578125, + "learning_rate": 0.00014093940064437477, + "loss": 0.9808, "step": 7480 }, { - "epoch": 0.21473756688135642, - "grad_norm": 0.380859375, - "learning_rate": 0.0001920871836689356, - "loss": 0.9356, + "epoch": 0.42945665270526134, + "grad_norm": 0.2734375, + "learning_rate": 0.0001408480166172106, + "loss": 0.98, "step": 7485 }, { - "epoch": 0.21488101214981423, - "grad_norm": 0.384765625, - "learning_rate": 0.00019206765031450112, - "loss": 0.9962, + "epoch": 0.42974353089678125, + "grad_norm": 0.279296875, + "learning_rate": 0.00014075659162756372, + "loss": 0.9992, "step": 7490 }, { - "epoch": 0.21502445741827206, - "grad_norm": 0.44140625, - "learning_rate": 0.0001920480938757104, - "loss": 0.9544, + "epoch": 0.4300304090883011, + "grad_norm": 0.26171875, + "learning_rate": 0.00014066512576711536, + "loss": 0.9738, "step": 7495 }, { - "epoch": 0.2151679026867299, - "grad_norm": 0.5546875, - "learning_rate": 0.00019202851435746695, - "loss": 0.9683, + "epoch": 0.430317287279821, + "grad_norm": 0.3125, + "learning_rate": 0.0001405736191275877, + "loss": 0.9805, "step": 7500 }, { - "epoch": 0.2153113479551877, - "grad_norm": 0.365234375, - "learning_rate": 0.0001920089117646799, - "loss": 0.9164, + "epoch": 0.4306041654713409, + "grad_norm": 0.259765625, + "learning_rate": 0.00014048207180074383, + "loss": 0.9712, "step": 7505 }, { - "epoch": 0.21545479322364552, - "grad_norm": 0.375, - "learning_rate": 0.00019198928610226435, - "loss": 0.9905, + "epoch": 0.43089104366286074, + "grad_norm": 0.27734375, + "learning_rate": 0.00014039048387838756, + "loss": 0.9586, "step": 7510 }, { - "epoch": 0.21559823849210333, - "grad_norm": 0.375, - "learning_rate": 0.00019196963737514103, - "loss": 1.0863, + "epoch": 0.43117792185438064, + "grad_norm": 0.267578125, + "learning_rate": 0.00014029885545236348, + "loss": 0.9223, "step": 7515 }, { - "epoch": 0.21574168376056116, - "grad_norm": 0.41796875, - "learning_rate": 0.00019194996558823655, - "loss": 0.9702, + "epoch": 0.4314648000459005, + "grad_norm": 0.2890625, + "learning_rate": 0.00014020718661455678, + "loss": 0.9958, "step": 7520 }, { - "epoch": 0.215885129029019, - "grad_norm": 0.455078125, - "learning_rate": 0.00019193027074648324, - "loss": 1.0701, + "epoch": 0.4317516782374204, + "grad_norm": 0.259765625, + "learning_rate": 0.0001401154774568932, + "loss": 0.8981, "step": 7525 }, { - "epoch": 0.2160285742974768, - "grad_norm": 0.373046875, - "learning_rate": 0.00019191055285481927, - "loss": 0.9723, + "epoch": 0.4320385564289403, + "grad_norm": 0.259765625, + "learning_rate": 0.00014002372807133887, + "loss": 0.9039, "step": 7530 }, { - "epoch": 0.21617201956593463, - "grad_norm": 0.3671875, - "learning_rate": 0.00019189081191818853, - "loss": 0.9487, + "epoch": 0.43232543462046014, + "grad_norm": 0.275390625, + "learning_rate": 0.00013993193854990027, + "loss": 0.9583, "step": 7535 }, { - "epoch": 0.21631546483439243, - "grad_norm": 0.380859375, - "learning_rate": 0.00019187104794154074, - "loss": 1.04, + "epoch": 0.43261231281198004, + "grad_norm": 0.306640625, + "learning_rate": 0.00013984010898462416, + "loss": 0.951, "step": 7540 }, { - "epoch": 0.21645891010285026, - "grad_norm": 0.37109375, - "learning_rate": 0.00019185126092983133, - "loss": 0.9172, + "epoch": 0.43289919100349994, + "grad_norm": 0.259765625, + "learning_rate": 0.00013974823946759742, + "loss": 0.9705, "step": 7545 }, { - "epoch": 0.21660235537130806, - "grad_norm": 0.39453125, - "learning_rate": 0.00019183145088802158, - "loss": 1.0073, + "epoch": 0.4331860691950198, + "grad_norm": 0.25390625, + "learning_rate": 0.000139656330090947, + "loss": 0.9752, "step": 7550 }, { - "epoch": 0.2167458006397659, - "grad_norm": 0.392578125, - "learning_rate": 0.00019181161782107851, - "loss": 1.0155, + "epoch": 0.4334729473865397, + "grad_norm": 0.26953125, + "learning_rate": 0.00013956438094683986, + "loss": 1.0097, "step": 7555 }, { - "epoch": 0.21688924590822373, - "grad_norm": 0.404296875, - "learning_rate": 0.00019179176173397494, - "loss": 0.9949, + "epoch": 0.43375982557805953, + "grad_norm": 0.279296875, + "learning_rate": 0.00013947239212748277, + "loss": 1.046, "step": 7560 }, { - "epoch": 0.21703269117668153, - "grad_norm": 0.470703125, - "learning_rate": 0.0001917718826316894, - "loss": 0.9421, + "epoch": 0.43404670376957943, + "grad_norm": 0.271484375, + "learning_rate": 0.00013938036372512235, + "loss": 0.9044, "step": 7565 }, { - "epoch": 0.21717613644513936, - "grad_norm": 0.365234375, - "learning_rate": 0.0001917519805192062, - "loss": 0.9835, + "epoch": 0.43433358196109934, + "grad_norm": 0.265625, + "learning_rate": 0.0001392882958320449, + "loss": 0.9208, "step": 7570 }, { - "epoch": 0.21731958171359717, - "grad_norm": 0.44140625, - "learning_rate": 0.0001917320554015155, - "loss": 1.079, + "epoch": 0.4346204601526192, + "grad_norm": 0.2578125, + "learning_rate": 0.00013919618854057626, + "loss": 0.961, "step": 7575 }, { - "epoch": 0.217463026982055, - "grad_norm": 0.390625, - "learning_rate": 0.00019171210728361317, - "loss": 0.9574, + "epoch": 0.4349073383441391, + "grad_norm": 0.287109375, + "learning_rate": 0.00013910404194308188, + "loss": 0.9867, "step": 7580 }, { - "epoch": 0.21760647225051283, - "grad_norm": 0.38671875, - "learning_rate": 0.00019169213617050085, - "loss": 0.9433, + "epoch": 0.435194216535659, + "grad_norm": 0.255859375, + "learning_rate": 0.00013901185613196654, + "loss": 0.946, "step": 7585 }, { - "epoch": 0.21774991751897063, - "grad_norm": 0.380859375, - "learning_rate": 0.00019167214206718594, - "loss": 1.0346, + "epoch": 0.43548109472717883, + "grad_norm": 0.236328125, + "learning_rate": 0.00013891963119967439, + "loss": 0.9274, "step": 7590 }, { - "epoch": 0.21789336278742846, - "grad_norm": 0.384765625, - "learning_rate": 0.00019165212497868162, - "loss": 1.0376, + "epoch": 0.43576797291869873, + "grad_norm": 0.265625, + "learning_rate": 0.00013882736723868884, + "loss": 0.9327, "step": 7595 }, { - "epoch": 0.21803680805588627, - "grad_norm": 0.400390625, - "learning_rate": 0.0001916320849100068, - "loss": 0.9407, + "epoch": 0.4360548511102186, + "grad_norm": 0.263671875, + "learning_rate": 0.00013873506434153228, + "loss": 0.9173, "step": 7600 }, { - "epoch": 0.2181802533243441, - "grad_norm": 0.388671875, - "learning_rate": 0.00019161202186618618, - "loss": 0.979, + "epoch": 0.4363417293017385, + "grad_norm": 0.28515625, + "learning_rate": 0.0001386427226007664, + "loss": 0.9253, "step": 7605 }, { - "epoch": 0.21832369859280193, - "grad_norm": 0.396484375, - "learning_rate": 0.00019159193585225026, - "loss": 0.9824, + "epoch": 0.4366286074932584, + "grad_norm": 0.265625, + "learning_rate": 0.00013855034210899161, + "loss": 0.9487, "step": 7610 }, { - "epoch": 0.21846714386125973, - "grad_norm": 0.51953125, - "learning_rate": 0.00019157182687323522, - "loss": 0.9638, + "epoch": 0.4369154856847782, + "grad_norm": 0.26953125, + "learning_rate": 0.00013845792295884735, + "loss": 0.9742, "step": 7615 }, { - "epoch": 0.21861058912971756, - "grad_norm": 0.3984375, - "learning_rate": 0.00019155169493418304, - "loss": 0.9591, + "epoch": 0.43720236387629813, + "grad_norm": 0.265625, + "learning_rate": 0.0001383654652430117, + "loss": 0.9517, "step": 7620 }, { - "epoch": 0.21875403439817537, - "grad_norm": 0.388671875, - "learning_rate": 0.00019153154004014148, - "loss": 0.9445, + "epoch": 0.43748924206781803, + "grad_norm": 0.26171875, + "learning_rate": 0.00013827296905420143, + "loss": 1.016, "step": 7625 }, { - "epoch": 0.2188974796666332, - "grad_norm": 0.390625, - "learning_rate": 0.000191511362196164, - "loss": 0.9214, + "epoch": 0.4377761202593379, + "grad_norm": 0.275390625, + "learning_rate": 0.00013818043448517202, + "loss": 1.0417, "step": 7630 }, { - "epoch": 0.219040924935091, - "grad_norm": 0.421875, - "learning_rate": 0.00019149116140730984, - "loss": 1.0082, + "epoch": 0.4380629984508578, + "grad_norm": 0.271484375, + "learning_rate": 0.00013808786162871728, + "loss": 0.9953, "step": 7635 }, { - "epoch": 0.21918437020354883, - "grad_norm": 0.40234375, - "learning_rate": 0.00019147093767864402, - "loss": 0.9758, + "epoch": 0.4383498766423776, + "grad_norm": 0.263671875, + "learning_rate": 0.00013799525057766948, + "loss": 1.0229, "step": 7640 }, { - "epoch": 0.21932781547200667, - "grad_norm": 0.3515625, - "learning_rate": 0.00019145069101523724, - "loss": 1.0032, + "epoch": 0.4386367548338975, + "grad_norm": 0.28515625, + "learning_rate": 0.00013790260142489922, + "loss": 0.9127, "step": 7645 }, { - "epoch": 0.21947126074046447, - "grad_norm": 0.388671875, - "learning_rate": 0.00019143042142216607, - "loss": 0.971, + "epoch": 0.4389236330254174, + "grad_norm": 0.265625, + "learning_rate": 0.00013780991426331522, + "loss": 0.9701, "step": 7650 }, { - "epoch": 0.2196147060089223, - "grad_norm": 0.388671875, - "learning_rate": 0.0001914101289045127, - "loss": 0.8892, + "epoch": 0.43921051121693727, + "grad_norm": 0.267578125, + "learning_rate": 0.00013771718918586444, + "loss": 0.9483, "step": 7655 }, { - "epoch": 0.2197581512773801, - "grad_norm": 0.38671875, - "learning_rate": 0.00019138981346736514, - "loss": 0.9113, + "epoch": 0.4394973894084572, + "grad_norm": 0.291015625, + "learning_rate": 0.00013762442628553179, + "loss": 0.947, "step": 7660 }, { - "epoch": 0.21990159654583794, - "grad_norm": 0.40234375, - "learning_rate": 0.00019136947511581712, - "loss": 1.0411, + "epoch": 0.4397842675999771, + "grad_norm": 0.2890625, + "learning_rate": 0.00013753162565534004, + "loss": 0.9881, "step": 7665 }, { - "epoch": 0.22004504181429577, - "grad_norm": 0.37109375, - "learning_rate": 0.00019134911385496815, - "loss": 0.8451, + "epoch": 0.4400711457914969, + "grad_norm": 0.25390625, + "learning_rate": 0.00013743878738834998, + "loss": 0.9753, "step": 7670 }, { - "epoch": 0.22018848708275357, - "grad_norm": 0.43359375, - "learning_rate": 0.00019132872968992344, - "loss": 0.9114, + "epoch": 0.4403580239830168, + "grad_norm": 0.248046875, + "learning_rate": 0.00013734591157765994, + "loss": 0.9692, "step": 7675 }, { - "epoch": 0.2203319323512114, - "grad_norm": 0.390625, - "learning_rate": 0.00019130832262579398, - "loss": 0.8942, + "epoch": 0.44064490217453667, + "grad_norm": 0.271484375, + "learning_rate": 0.000137252998316406, + "loss": 0.9459, "step": 7680 }, { - "epoch": 0.2204753776196692, - "grad_norm": 0.4140625, - "learning_rate": 0.00019128789266769646, - "loss": 0.9329, + "epoch": 0.44093178036605657, + "grad_norm": 0.26171875, + "learning_rate": 0.00013716004769776189, + "loss": 0.9521, "step": 7685 }, { - "epoch": 0.22061882288812704, - "grad_norm": 0.37890625, - "learning_rate": 0.00019126743982075337, - "loss": 0.9803, + "epoch": 0.44121865855757647, + "grad_norm": 0.271484375, + "learning_rate": 0.00013706705981493853, + "loss": 0.9345, "step": 7690 }, { - "epoch": 0.22076226815658487, - "grad_norm": 0.4140625, - "learning_rate": 0.0001912469640900929, - "loss": 0.9591, + "epoch": 0.4415055367490963, + "grad_norm": 0.279296875, + "learning_rate": 0.00013697403476118454, + "loss": 0.9281, "step": 7695 }, { - "epoch": 0.22090571342504267, - "grad_norm": 0.392578125, - "learning_rate": 0.00019122646548084892, - "loss": 1.0045, + "epoch": 0.4417924149406162, + "grad_norm": 0.29296875, + "learning_rate": 0.00013688097262978555, + "loss": 0.9223, "step": 7700 }, { - "epoch": 0.2210491586935005, - "grad_norm": 0.3671875, - "learning_rate": 0.0001912059439981612, - "loss": 1.0634, + "epoch": 0.4420792931321361, + "grad_norm": 0.263671875, + "learning_rate": 0.0001367878735140645, + "loss": 0.9012, "step": 7705 }, { - "epoch": 0.2211926039619583, - "grad_norm": 0.3984375, - "learning_rate": 0.00019118539964717505, - "loss": 0.9882, + "epoch": 0.44236617132365597, + "grad_norm": 0.267578125, + "learning_rate": 0.00013669473750738142, + "loss": 0.97, "step": 7710 }, { - "epoch": 0.22133604923041614, - "grad_norm": 0.439453125, - "learning_rate": 0.00019116483243304167, - "loss": 0.94, + "epoch": 0.44265304951517587, + "grad_norm": 0.2431640625, + "learning_rate": 0.00013660156470313327, + "loss": 0.9504, "step": 7715 }, { - "epoch": 0.22147949449887394, - "grad_norm": 0.4296875, - "learning_rate": 0.0001911442423609179, - "loss": 0.9533, + "epoch": 0.4429399277066957, + "grad_norm": 0.29296875, + "learning_rate": 0.00013650835519475395, + "loss": 1.0034, "step": 7720 }, { - "epoch": 0.22162293976733177, - "grad_norm": 0.35546875, - "learning_rate": 0.00019112362943596637, - "loss": 0.9011, + "epoch": 0.4432268058982156, + "grad_norm": 0.298828125, + "learning_rate": 0.0001364151090757142, + "loss": 0.9862, "step": 7725 }, { - "epoch": 0.2217663850357896, - "grad_norm": 0.45703125, - "learning_rate": 0.00019110299366335536, - "loss": 1.0333, + "epoch": 0.4435136840897355, + "grad_norm": 0.2578125, + "learning_rate": 0.0001363218264395214, + "loss": 0.9397, "step": 7730 }, { - "epoch": 0.2219098303042474, - "grad_norm": 0.396484375, - "learning_rate": 0.000191082335048259, - "loss": 0.9268, + "epoch": 0.44380056228125536, + "grad_norm": 0.2890625, + "learning_rate": 0.00013622850737971963, + "loss": 0.9846, "step": 7735 }, { - "epoch": 0.22205327557270524, - "grad_norm": 0.349609375, - "learning_rate": 0.00019106165359585698, - "loss": 0.8996, + "epoch": 0.44408744047277526, + "grad_norm": 0.267578125, + "learning_rate": 0.00013613515198988938, + "loss": 1.0255, "step": 7740 }, { - "epoch": 0.22219672084116304, - "grad_norm": 0.396484375, - "learning_rate": 0.0001910409493113349, - "loss": 0.9519, + "epoch": 0.44437431866429516, + "grad_norm": 0.287109375, + "learning_rate": 0.0001360417603636477, + "loss": 0.9535, "step": 7745 }, { - "epoch": 0.22234016610962087, - "grad_norm": 0.453125, - "learning_rate": 0.00019102022219988398, - "loss": 1.0214, + "epoch": 0.444661196855815, + "grad_norm": 0.30859375, + "learning_rate": 0.0001359483325946479, + "loss": 0.9717, "step": 7750 }, { - "epoch": 0.2224836113780787, - "grad_norm": 0.447265625, - "learning_rate": 0.0001909994722667012, - "loss": 1.0596, + "epoch": 0.4449480750473349, + "grad_norm": 0.294921875, + "learning_rate": 0.00013585486877657957, + "loss": 0.9772, "step": 7755 }, { - "epoch": 0.2226270566465365, - "grad_norm": 0.34765625, - "learning_rate": 0.00019097869951698913, - "loss": 1.0198, + "epoch": 0.44523495323885476, + "grad_norm": 0.2578125, + "learning_rate": 0.00013576136900316844, + "loss": 0.9284, "step": 7760 }, { - "epoch": 0.22277050191499434, - "grad_norm": 0.38671875, - "learning_rate": 0.00019095790395595632, - "loss": 0.8779, + "epoch": 0.44552183143037466, + "grad_norm": 0.29296875, + "learning_rate": 0.00013566783336817627, + "loss": 1.0055, "step": 7765 }, { - "epoch": 0.22291394718345214, - "grad_norm": 0.357421875, - "learning_rate": 0.0001909370855888168, - "loss": 0.9376, + "epoch": 0.44580870962189456, + "grad_norm": 0.28125, + "learning_rate": 0.00013557426196540083, + "loss": 0.9923, "step": 7770 }, { - "epoch": 0.22305739245190997, - "grad_norm": 0.39453125, - "learning_rate": 0.00019091624442079045, - "loss": 0.9761, + "epoch": 0.4460955878134144, + "grad_norm": 0.294921875, + "learning_rate": 0.00013548065488867573, + "loss": 0.9646, "step": 7775 }, { - "epoch": 0.2232008377203678, - "grad_norm": 0.39453125, - "learning_rate": 0.00019089538045710284, - "loss": 0.9818, + "epoch": 0.4463824660049343, + "grad_norm": 0.28125, + "learning_rate": 0.00013538701223187033, + "loss": 0.9368, "step": 7780 }, { - "epoch": 0.2233442829888256, - "grad_norm": 0.431640625, - "learning_rate": 0.00019087449370298523, - "loss": 0.9953, + "epoch": 0.4466693441964542, + "grad_norm": 0.265625, + "learning_rate": 0.0001352933340888897, + "loss": 0.9225, "step": 7785 }, { - "epoch": 0.22348772825728344, - "grad_norm": 0.388671875, - "learning_rate": 0.00019085358416367457, - "loss": 0.9529, + "epoch": 0.44695622238797406, + "grad_norm": 0.263671875, + "learning_rate": 0.0001351996205536745, + "loss": 0.9874, "step": 7790 }, { - "epoch": 0.22363117352574124, - "grad_norm": 0.439453125, - "learning_rate": 0.00019083265184441364, - "loss": 0.9311, + "epoch": 0.44724310057949396, + "grad_norm": 0.291015625, + "learning_rate": 0.0001351058717202009, + "loss": 0.9323, "step": 7795 }, { - "epoch": 0.22377461879419908, - "grad_norm": 0.37109375, - "learning_rate": 0.0001908116967504508, - "loss": 0.9364, + "epoch": 0.4475299787710138, + "grad_norm": 0.283203125, + "learning_rate": 0.00013501208768248042, + "loss": 0.9731, "step": 7800 }, { - "epoch": 0.22391806406265688, - "grad_norm": 0.34375, - "learning_rate": 0.00019079071888704018, - "loss": 0.952, + "epoch": 0.4478168569625337, + "grad_norm": 0.263671875, + "learning_rate": 0.0001349182685345599, + "loss": 0.9228, "step": 7805 }, { - "epoch": 0.2240615093311147, - "grad_norm": 0.421875, - "learning_rate": 0.00019076971825944164, - "loss": 0.9835, + "epoch": 0.4481037351540536, + "grad_norm": 0.279296875, + "learning_rate": 0.00013482441437052134, + "loss": 1.051, "step": 7810 }, { - "epoch": 0.22420495459957254, - "grad_norm": 0.408203125, - "learning_rate": 0.00019074869487292068, - "loss": 0.9643, + "epoch": 0.44839061334557345, + "grad_norm": 0.26953125, + "learning_rate": 0.00013473052528448201, + "loss": 0.9272, "step": 7815 }, { - "epoch": 0.22434839986803035, - "grad_norm": 0.40234375, - "learning_rate": 0.00019072764873274856, - "loss": 1.0037, + "epoch": 0.44867749153709335, + "grad_norm": 0.294921875, + "learning_rate": 0.00013463660137059407, + "loss": 0.9549, "step": 7820 }, { - "epoch": 0.22449184513648818, - "grad_norm": 0.412109375, - "learning_rate": 0.00019070657984420226, - "loss": 0.9933, + "epoch": 0.44896436972861326, + "grad_norm": 0.248046875, + "learning_rate": 0.0001345426427230446, + "loss": 0.8974, "step": 7825 }, { - "epoch": 0.22463529040494598, - "grad_norm": 0.44140625, - "learning_rate": 0.0001906854882125644, - "loss": 0.9303, + "epoch": 0.4492512479201331, + "grad_norm": 0.26953125, + "learning_rate": 0.0001344486494360555, + "loss": 0.9423, "step": 7830 }, { - "epoch": 0.2247787356734038, - "grad_norm": 0.404296875, - "learning_rate": 0.00019066437384312334, - "loss": 0.9719, + "epoch": 0.449538126111653, + "grad_norm": 0.2734375, + "learning_rate": 0.00013435462160388351, + "loss": 0.9031, "step": 7835 }, { - "epoch": 0.22492218094186164, - "grad_norm": 0.43359375, - "learning_rate": 0.00019064323674117318, - "loss": 1.0467, + "epoch": 0.44982500430317285, + "grad_norm": 0.26953125, + "learning_rate": 0.00013426055932081997, + "loss": 0.908, "step": 7840 }, { - "epoch": 0.22506562621031945, - "grad_norm": 0.376953125, - "learning_rate": 0.00019062207691201367, - "loss": 0.8498, + "epoch": 0.45011188249469275, + "grad_norm": 0.26953125, + "learning_rate": 0.00013416646268119074, + "loss": 1.0104, "step": 7845 }, { - "epoch": 0.22520907147877728, - "grad_norm": 0.361328125, - "learning_rate": 0.0001906008943609502, - "loss": 0.9348, + "epoch": 0.45039876068621265, + "grad_norm": 0.263671875, + "learning_rate": 0.00013407233177935608, + "loss": 1.0248, "step": 7850 }, { - "epoch": 0.22535251674723508, - "grad_norm": 0.361328125, - "learning_rate": 0.000190579689093294, - "loss": 0.9587, + "epoch": 0.4506856388777325, + "grad_norm": 0.296875, + "learning_rate": 0.00013397816670971072, + "loss": 0.9396, "step": 7855 }, { - "epoch": 0.2254959620156929, - "grad_norm": 0.39453125, - "learning_rate": 0.0001905584611143619, - "loss": 1.0136, + "epoch": 0.4509725170692524, + "grad_norm": 0.279296875, + "learning_rate": 0.00013388396756668354, + "loss": 0.907, "step": 7860 }, { - "epoch": 0.22563940728415074, - "grad_norm": 0.373046875, - "learning_rate": 0.00019053721042947645, - "loss": 0.9712, + "epoch": 0.4512593952607723, + "grad_norm": 0.26171875, + "learning_rate": 0.00013378973444473776, + "loss": 0.9295, "step": 7865 }, { - "epoch": 0.22578285255260855, - "grad_norm": 0.384765625, - "learning_rate": 0.00019051593704396587, - "loss": 0.928, + "epoch": 0.45154627345229215, + "grad_norm": 0.259765625, + "learning_rate": 0.0001336954674383705, + "loss": 0.9777, "step": 7870 }, { - "epoch": 0.22592629782106638, - "grad_norm": 0.43359375, - "learning_rate": 0.0001904946409631641, - "loss": 0.9967, + "epoch": 0.45183315164381205, + "grad_norm": 0.25, + "learning_rate": 0.00013360116664211293, + "loss": 0.8774, "step": 7875 }, { - "epoch": 0.22606974308952418, - "grad_norm": 0.4140625, - "learning_rate": 0.00019047332219241078, - "loss": 0.9607, + "epoch": 0.4521200298353319, + "grad_norm": 0.2490234375, + "learning_rate": 0.00013350683215053013, + "loss": 0.9259, "step": 7880 }, { - "epoch": 0.22621318835798201, - "grad_norm": 0.35546875, - "learning_rate": 0.00019045198073705118, - "loss": 0.9828, + "epoch": 0.4524069080268518, + "grad_norm": 0.271484375, + "learning_rate": 0.00013341246405822088, + "loss": 0.9176, "step": 7885 }, { - "epoch": 0.22635663362643982, - "grad_norm": 0.439453125, - "learning_rate": 0.00019043061660243632, - "loss": 0.964, + "epoch": 0.4526937862183717, + "grad_norm": 0.2451171875, + "learning_rate": 0.00013331806245981775, + "loss": 0.9333, "step": 7890 }, { - "epoch": 0.22650007889489765, - "grad_norm": 0.427734375, - "learning_rate": 0.00019040922979392288, - "loss": 1.0641, + "epoch": 0.45298066440989154, + "grad_norm": 0.265625, + "learning_rate": 0.0001332236274499869, + "loss": 1.0022, "step": 7895 }, { - "epoch": 0.22664352416335548, - "grad_norm": 0.392578125, - "learning_rate": 0.00019038782031687325, - "loss": 0.9875, + "epoch": 0.45326754260141144, + "grad_norm": 0.271484375, + "learning_rate": 0.00013312915912342793, + "loss": 0.9649, "step": 7900 }, { - "epoch": 0.22678696943181328, - "grad_norm": 0.412109375, - "learning_rate": 0.00019036638817665542, - "loss": 0.9646, + "epoch": 0.45355442079293135, + "grad_norm": 0.30078125, + "learning_rate": 0.0001330346575748739, + "loss": 0.9092, "step": 7905 }, { - "epoch": 0.22693041470027112, - "grad_norm": 0.68359375, - "learning_rate": 0.0001903449333786432, - "loss": 1.0035, + "epoch": 0.4538412989844512, + "grad_norm": 0.265625, + "learning_rate": 0.00013294012289909114, + "loss": 0.9798, "step": 7910 }, { - "epoch": 0.22707385996872892, - "grad_norm": 0.400390625, - "learning_rate": 0.00019032345592821597, - "loss": 1.0024, + "epoch": 0.4541281771759711, + "grad_norm": 0.2578125, + "learning_rate": 0.00013284555519087933, + "loss": 0.9607, "step": 7915 }, { - "epoch": 0.22721730523718675, - "grad_norm": 0.3984375, - "learning_rate": 0.00019030195583075881, - "loss": 0.9472, + "epoch": 0.45441505536749094, + "grad_norm": 0.267578125, + "learning_rate": 0.0001327509545450711, + "loss": 0.9416, "step": 7920 }, { - "epoch": 0.22736075050564458, - "grad_norm": 0.45703125, - "learning_rate": 0.00019028043309166255, - "loss": 1.0769, + "epoch": 0.45470193355901084, + "grad_norm": 0.30078125, + "learning_rate": 0.0001326563210565322, + "loss": 0.9335, "step": 7925 }, { - "epoch": 0.22750419577410239, - "grad_norm": 0.388671875, - "learning_rate": 0.00019025888771632355, - "loss": 0.9071, + "epoch": 0.45498881175053074, + "grad_norm": 0.2890625, + "learning_rate": 0.00013256165482016137, + "loss": 0.977, "step": 7930 }, { - "epoch": 0.22764764104256022, - "grad_norm": 0.4140625, - "learning_rate": 0.00019023731971014405, - "loss": 0.9971, + "epoch": 0.4552756899420506, + "grad_norm": 0.26953125, + "learning_rate": 0.00013246695593089, + "loss": 0.9757, "step": 7935 }, { - "epoch": 0.22779108631101802, - "grad_norm": 0.4375, - "learning_rate": 0.00019021572907853177, - "loss": 0.9252, + "epoch": 0.4555625681335705, + "grad_norm": 0.283203125, + "learning_rate": 0.00013237222448368247, + "loss": 0.9762, "step": 7940 }, { - "epoch": 0.22793453157947585, - "grad_norm": 0.373046875, - "learning_rate": 0.0001901941158269002, - "loss": 0.885, + "epoch": 0.4558494463250904, + "grad_norm": 0.263671875, + "learning_rate": 0.00013227746057353562, + "loss": 1.0154, "step": 7945 }, { - "epoch": 0.22807797684793368, - "grad_norm": 0.390625, - "learning_rate": 0.00019017247996066852, - "loss": 0.923, + "epoch": 0.45613632451661024, + "grad_norm": 0.263671875, + "learning_rate": 0.0001321826642954789, + "loss": 0.9798, "step": 7950 }, { - "epoch": 0.2282214221163915, - "grad_norm": 0.42578125, - "learning_rate": 0.00019015082148526148, - "loss": 1.0611, + "epoch": 0.45642320270813014, + "grad_norm": 0.2470703125, + "learning_rate": 0.00013208783574457432, + "loss": 1.0161, "step": 7955 }, { - "epoch": 0.22836486738484932, - "grad_norm": 0.400390625, - "learning_rate": 0.00019012914040610963, - "loss": 1.0319, + "epoch": 0.45671008089965, + "grad_norm": 0.275390625, + "learning_rate": 0.00013199297501591603, + "loss": 0.9881, "step": 7960 }, { - "epoch": 0.22850831265330712, - "grad_norm": 0.435546875, - "learning_rate": 0.0001901074367286491, - "loss": 0.8917, + "epoch": 0.4569969590911699, + "grad_norm": 0.263671875, + "learning_rate": 0.00013189808220463072, + "loss": 0.9788, "step": 7965 }, { - "epoch": 0.22865175792176495, - "grad_norm": 0.384765625, - "learning_rate": 0.00019008571045832167, - "loss": 0.9621, + "epoch": 0.4572838372826898, + "grad_norm": 0.283203125, + "learning_rate": 0.00013180315740587701, + "loss": 1.0485, "step": 7970 }, { - "epoch": 0.22879520319022276, - "grad_norm": 0.357421875, - "learning_rate": 0.00019006396160057488, - "loss": 0.9565, + "epoch": 0.45757071547420963, + "grad_norm": 0.294921875, + "learning_rate": 0.00013170820071484572, + "loss": 0.951, "step": 7975 }, { - "epoch": 0.2289386484586806, - "grad_norm": 0.38671875, - "learning_rate": 0.00019004219016086188, - "loss": 0.9919, + "epoch": 0.45785759366572953, + "grad_norm": 0.263671875, + "learning_rate": 0.0001316132122267597, + "loss": 0.9431, "step": 7980 }, { - "epoch": 0.22908209372713842, - "grad_norm": 0.431640625, - "learning_rate": 0.0001900203961446414, - "loss": 0.9074, + "epoch": 0.45814447185724944, + "grad_norm": 0.267578125, + "learning_rate": 0.00013151819203687356, + "loss": 0.8649, "step": 7985 }, { - "epoch": 0.22922553899559622, - "grad_norm": 0.4140625, - "learning_rate": 0.00018999857955737798, - "loss": 0.9285, + "epoch": 0.4584313500487693, + "grad_norm": 0.3046875, + "learning_rate": 0.00013142314024047375, + "loss": 0.9131, "step": 7990 }, { - "epoch": 0.22936898426405405, - "grad_norm": 0.423828125, - "learning_rate": 0.00018997674040454175, - "loss": 0.9519, + "epoch": 0.4587182282402892, + "grad_norm": 0.267578125, + "learning_rate": 0.00013132805693287844, + "loss": 0.9522, "step": 7995 }, { - "epoch": 0.22951242953251186, - "grad_norm": 0.451171875, - "learning_rate": 0.00018995487869160845, - "loss": 0.9941, + "epoch": 0.45900510643180903, + "grad_norm": 0.6171875, + "learning_rate": 0.0001312329422094374, + "loss": 0.9952, "step": 8000 }, { - "epoch": 0.2296558748009697, - "grad_norm": 0.34765625, - "learning_rate": 0.00018993299442405953, - "loss": 0.8889, + "epoch": 0.45929198462332893, + "grad_norm": 0.271484375, + "learning_rate": 0.0001311377961655319, + "loss": 0.8869, "step": 8005 }, { - "epoch": 0.22979932006942752, - "grad_norm": 0.388671875, - "learning_rate": 0.00018991108760738214, - "loss": 0.9121, + "epoch": 0.45957886281484883, + "grad_norm": 0.275390625, + "learning_rate": 0.00013104261889657453, + "loss": 0.9416, "step": 8010 }, { - "epoch": 0.22994276533788532, - "grad_norm": 0.365234375, - "learning_rate": 0.00018988915824706898, - "loss": 0.9395, + "epoch": 0.4598657410063687, + "grad_norm": 0.25390625, + "learning_rate": 0.00013094741049800936, + "loss": 0.933, "step": 8015 }, { - "epoch": 0.23008621060634316, - "grad_norm": 0.400390625, - "learning_rate": 0.00018986720634861848, - "loss": 0.9259, + "epoch": 0.4601526191978886, + "grad_norm": 0.265625, + "learning_rate": 0.00013085217106531153, + "loss": 1.0033, "step": 8020 }, { - "epoch": 0.23022965587480096, - "grad_norm": 0.404296875, - "learning_rate": 0.0001898452319175347, - "loss": 0.9604, + "epoch": 0.4604394973894085, + "grad_norm": 0.267578125, + "learning_rate": 0.00013075690069398738, + "loss": 0.9582, "step": 8025 }, { - "epoch": 0.2303731011432588, - "grad_norm": 0.416015625, - "learning_rate": 0.00018982323495932732, - "loss": 1.0141, + "epoch": 0.4607263755809283, + "grad_norm": 0.2578125, + "learning_rate": 0.00013066159947957426, + "loss": 0.9989, "step": 8030 }, { - "epoch": 0.23051654641171662, - "grad_norm": 0.390625, - "learning_rate": 0.00018980121547951172, - "loss": 0.9314, + "epoch": 0.46101325377244823, + "grad_norm": 0.287109375, + "learning_rate": 0.0001305662675176404, + "loss": 0.9598, "step": 8035 }, { - "epoch": 0.23065999168017443, - "grad_norm": 0.412109375, - "learning_rate": 0.00018977917348360888, - "loss": 1.0016, + "epoch": 0.4613001319639681, + "grad_norm": 0.26953125, + "learning_rate": 0.00013047090490378495, + "loss": 0.9588, "step": 8040 }, { - "epoch": 0.23080343694863226, - "grad_norm": 0.56640625, - "learning_rate": 0.00018975710897714546, - "loss": 1.0202, + "epoch": 0.461587010155488, + "grad_norm": 0.259765625, + "learning_rate": 0.00013037551173363774, + "loss": 0.9166, "step": 8045 }, { - "epoch": 0.23094688221709006, - "grad_norm": 0.423828125, - "learning_rate": 0.0001897350219656537, - "loss": 0.9466, + "epoch": 0.4618738883470079, + "grad_norm": 0.26171875, + "learning_rate": 0.00013028008810285924, + "loss": 0.9633, "step": 8050 }, { - "epoch": 0.2310903274855479, - "grad_norm": 0.47265625, - "learning_rate": 0.0001897129124546716, - "loss": 0.9164, + "epoch": 0.4621607665385277, + "grad_norm": 0.25390625, + "learning_rate": 0.00013018463410714048, + "loss": 0.8813, "step": 8055 }, { - "epoch": 0.2312337727540057, - "grad_norm": 0.408203125, - "learning_rate": 0.0001896907804497427, - "loss": 0.9089, + "epoch": 0.4624476447300476, + "grad_norm": 0.30078125, + "learning_rate": 0.00013008914984220294, + "loss": 0.9441, "step": 8060 }, { - "epoch": 0.23137721802246353, - "grad_norm": 0.38671875, - "learning_rate": 0.00018966862595641624, - "loss": 1.0136, + "epoch": 0.4627345229215675, + "grad_norm": 0.2890625, + "learning_rate": 0.00012999363540379852, + "loss": 0.9441, "step": 8065 }, { - "epoch": 0.23152066329092136, - "grad_norm": 0.3828125, - "learning_rate": 0.00018964644898024707, - "loss": 1.0149, + "epoch": 0.46302140111308737, + "grad_norm": 0.275390625, + "learning_rate": 0.00012989809088770923, + "loss": 0.9317, "step": 8070 }, { - "epoch": 0.23166410855937916, - "grad_norm": 0.36328125, - "learning_rate": 0.00018962424952679563, - "loss": 0.9568, + "epoch": 0.4633082793046073, + "grad_norm": 0.27734375, + "learning_rate": 0.00012980251638974733, + "loss": 0.9065, "step": 8075 }, { - "epoch": 0.231807553827837, - "grad_norm": 0.3984375, - "learning_rate": 0.0001896020276016281, - "loss": 1.0407, + "epoch": 0.4635951574961271, + "grad_norm": 0.291015625, + "learning_rate": 0.0001297069120057552, + "loss": 0.9926, "step": 8080 }, { - "epoch": 0.2319509990962948, - "grad_norm": 0.427734375, - "learning_rate": 0.0001895797832103162, - "loss": 1.0178, + "epoch": 0.463882035687647, + "grad_norm": 0.2734375, + "learning_rate": 0.0001296112778316051, + "loss": 0.9544, "step": 8085 }, { - "epoch": 0.23209444436475263, - "grad_norm": 0.373046875, - "learning_rate": 0.00018955751635843737, - "loss": 0.9082, + "epoch": 0.4641689138791669, + "grad_norm": 0.265625, + "learning_rate": 0.00012951561396319918, + "loss": 0.9443, "step": 8090 }, { - "epoch": 0.23223788963321046, - "grad_norm": 0.396484375, - "learning_rate": 0.00018953522705157457, - "loss": 0.9055, + "epoch": 0.46445579207068677, + "grad_norm": 0.265625, + "learning_rate": 0.00012941992049646936, + "loss": 0.996, "step": 8095 }, { - "epoch": 0.23238133490166826, - "grad_norm": 0.357421875, - "learning_rate": 0.0001895129152953165, - "loss": 0.9104, + "epoch": 0.46474267026220667, + "grad_norm": 0.27734375, + "learning_rate": 0.00012932419752737735, + "loss": 0.9842, "step": 8100 }, { - "epoch": 0.2325247801701261, - "grad_norm": 0.400390625, - "learning_rate": 0.00018949058109525747, - "loss": 0.9373, + "epoch": 0.46502954845372657, + "grad_norm": 0.27734375, + "learning_rate": 0.00012922844515191425, + "loss": 0.9353, "step": 8105 }, { - "epoch": 0.2326682254385839, - "grad_norm": 0.37109375, - "learning_rate": 0.00018946822445699735, - "loss": 0.9279, + "epoch": 0.4653164266452464, + "grad_norm": 0.28125, + "learning_rate": 0.00012913266346610086, + "loss": 0.9293, "step": 8110 }, { - "epoch": 0.23281167070704173, - "grad_norm": 0.435546875, - "learning_rate": 0.00018944584538614168, - "loss": 0.9169, + "epoch": 0.4656033048367663, + "grad_norm": 0.271484375, + "learning_rate": 0.0001290368525659872, + "loss": 0.9549, "step": 8115 }, { - "epoch": 0.23295511597549956, - "grad_norm": 0.376953125, - "learning_rate": 0.00018942344388830158, - "loss": 0.9701, + "epoch": 0.46589018302828616, + "grad_norm": 0.2421875, + "learning_rate": 0.00012894101254765268, + "loss": 0.98, "step": 8120 }, { - "epoch": 0.23309856124395736, - "grad_norm": 0.419921875, - "learning_rate": 0.0001894010199690939, - "loss": 0.9203, + "epoch": 0.46617706121980607, + "grad_norm": 0.2578125, + "learning_rate": 0.00012884514350720586, + "loss": 0.8901, "step": 8125 }, { - "epoch": 0.2332420065124152, - "grad_norm": 0.36328125, - "learning_rate": 0.00018937857363414106, - "loss": 0.9659, + "epoch": 0.46646393941132597, + "grad_norm": 0.275390625, + "learning_rate": 0.00012874924554078448, + "loss": 0.9632, "step": 8130 }, { - "epoch": 0.233385451780873, - "grad_norm": 0.4140625, - "learning_rate": 0.00018935610488907104, - "loss": 0.9379, + "epoch": 0.4667508176028458, + "grad_norm": 0.27734375, + "learning_rate": 0.00012865331874455517, + "loss": 0.9504, "step": 8135 }, { - "epoch": 0.23352889704933083, - "grad_norm": 0.431640625, - "learning_rate": 0.00018933361373951746, - "loss": 1.0508, + "epoch": 0.4670376957943657, + "grad_norm": 0.25390625, + "learning_rate": 0.0001285573632147136, + "loss": 1.0337, "step": 8140 }, { - "epoch": 0.23367234231778866, - "grad_norm": 0.357421875, - "learning_rate": 0.00018931110019111965, - "loss": 0.9127, + "epoch": 0.4673245739858856, + "grad_norm": 0.2578125, + "learning_rate": 0.00012846137904748414, + "loss": 0.9788, "step": 8145 }, { - "epoch": 0.23381578758624647, - "grad_norm": 0.44140625, - "learning_rate": 0.00018928856424952245, - "loss": 0.9025, + "epoch": 0.46761145217740546, + "grad_norm": 0.28125, + "learning_rate": 0.00012836536633911995, + "loss": 0.9813, "step": 8150 }, { - "epoch": 0.2339592328547043, - "grad_norm": 0.439453125, - "learning_rate": 0.00018926600592037639, - "loss": 1.0677, + "epoch": 0.46789833036892536, + "grad_norm": 0.265625, + "learning_rate": 0.0001282693251859028, + "loss": 0.9625, "step": 8155 }, { - "epoch": 0.2341026781231621, - "grad_norm": 0.3671875, - "learning_rate": 0.0001892434252093375, - "loss": 1.0161, + "epoch": 0.4681852085604452, + "grad_norm": 0.279296875, + "learning_rate": 0.00012817325568414297, + "loss": 0.9879, "step": 8160 }, { - "epoch": 0.23424612339161993, - "grad_norm": 0.40234375, - "learning_rate": 0.0001892208221220676, - "loss": 1.0014, + "epoch": 0.4684720867519651, + "grad_norm": 0.259765625, + "learning_rate": 0.00012807715793017918, + "loss": 0.9737, "step": 8165 }, { - "epoch": 0.23438956866007774, - "grad_norm": 0.408203125, - "learning_rate": 0.00018919819666423396, - "loss": 0.9018, + "epoch": 0.468758964943485, + "grad_norm": 0.2578125, + "learning_rate": 0.00012798103202037842, + "loss": 0.9963, "step": 8170 }, { - "epoch": 0.23453301392853557, - "grad_norm": 0.39453125, - "learning_rate": 0.0001891755488415095, - "loss": 1.0689, + "epoch": 0.46904584313500486, + "grad_norm": 0.2734375, + "learning_rate": 0.00012788487805113602, + "loss": 0.9001, "step": 8175 }, { - "epoch": 0.2346764591969934, - "grad_norm": 0.41015625, - "learning_rate": 0.00018915287865957277, - "loss": 1.014, + "epoch": 0.46933272132652476, + "grad_norm": 0.291015625, + "learning_rate": 0.0001277886961188754, + "loss": 1.0332, "step": 8180 }, { - "epoch": 0.2348199044654512, - "grad_norm": 0.376953125, - "learning_rate": 0.00018913018612410802, - "loss": 1.0327, + "epoch": 0.46961959951804466, + "grad_norm": 0.275390625, + "learning_rate": 0.00012769248632004795, + "loss": 1.0109, "step": 8185 }, { - "epoch": 0.23496334973390903, - "grad_norm": 0.404296875, - "learning_rate": 0.0001891074712408049, - "loss": 0.9647, + "epoch": 0.4699064777095645, + "grad_norm": 0.271484375, + "learning_rate": 0.0001275962487511332, + "loss": 0.9768, "step": 8190 }, { - "epoch": 0.23510679500236684, - "grad_norm": 0.384765625, - "learning_rate": 0.0001890847340153588, - "loss": 0.8999, + "epoch": 0.4701933559010844, + "grad_norm": 0.26953125, + "learning_rate": 0.00012749998350863827, + "loss": 0.9383, "step": 8195 }, { - "epoch": 0.23525024027082467, - "grad_norm": 0.408203125, - "learning_rate": 0.00018906197445347068, - "loss": 1.0718, + "epoch": 0.47048023409260425, + "grad_norm": 0.248046875, + "learning_rate": 0.0001274036906890982, + "loss": 0.9907, "step": 8200 }, { - "epoch": 0.2353936855392825, - "grad_norm": 0.37890625, - "learning_rate": 0.00018903919256084715, - "loss": 0.9799, + "epoch": 0.47076711228412416, + "grad_norm": 0.265625, + "learning_rate": 0.00012730737038907567, + "loss": 0.9722, "step": 8205 }, { - "epoch": 0.2355371308077403, - "grad_norm": 0.390625, - "learning_rate": 0.0001890163883432003, - "loss": 0.9117, + "epoch": 0.47105399047564406, + "grad_norm": 0.26953125, + "learning_rate": 0.00012721102270516087, + "loss": 0.9917, "step": 8210 }, { - "epoch": 0.23568057607619813, - "grad_norm": 0.392578125, - "learning_rate": 0.0001889935618062479, - "loss": 0.9884, + "epoch": 0.4713408686671639, + "grad_norm": 0.255859375, + "learning_rate": 0.00012711464773397152, + "loss": 0.9361, "step": 8215 }, { - "epoch": 0.23582402134465594, - "grad_norm": 0.3828125, - "learning_rate": 0.00018897071295571335, - "loss": 0.9331, + "epoch": 0.4716277468586838, + "grad_norm": 0.396484375, + "learning_rate": 0.0001270182455721526, + "loss": 0.9083, "step": 8220 }, { - "epoch": 0.23596746661311377, - "grad_norm": 0.37890625, - "learning_rate": 0.00018894784179732558, - "loss": 0.9362, + "epoch": 0.4719146250502037, + "grad_norm": 0.265625, + "learning_rate": 0.00012692181631637642, + "loss": 0.9438, "step": 8225 }, { - "epoch": 0.2361109118815716, - "grad_norm": 0.376953125, - "learning_rate": 0.00018892494833681913, - "loss": 0.9171, + "epoch": 0.47220150324172355, + "grad_norm": 0.275390625, + "learning_rate": 0.00012682536006334248, + "loss": 0.9491, "step": 8230 }, { - "epoch": 0.2362543571500294, - "grad_norm": 0.3671875, - "learning_rate": 0.00018890203257993413, - "loss": 0.964, + "epoch": 0.47248838143324345, + "grad_norm": 0.259765625, + "learning_rate": 0.00012672887690977732, + "loss": 0.9634, "step": 8235 }, { - "epoch": 0.23639780241848724, - "grad_norm": 0.43359375, - "learning_rate": 0.00018887909453241632, - "loss": 0.97, + "epoch": 0.4727752596247633, + "grad_norm": 0.265625, + "learning_rate": 0.00012663236695243448, + "loss": 0.96, "step": 8240 }, { - "epoch": 0.23654124768694504, - "grad_norm": 0.40625, - "learning_rate": 0.00018885613420001703, - "loss": 1.0413, + "epoch": 0.4730621378162832, + "grad_norm": 0.271484375, + "learning_rate": 0.0001265358302880943, + "loss": 0.9325, "step": 8245 }, { - "epoch": 0.23668469295540287, - "grad_norm": 0.40234375, - "learning_rate": 0.0001888331515884931, - "loss": 0.9478, + "epoch": 0.4733490160078031, + "grad_norm": 0.283203125, + "learning_rate": 0.00012643926701356404, + "loss": 0.9878, "step": 8250 }, { - "epoch": 0.23682813822386067, - "grad_norm": 0.384765625, - "learning_rate": 0.00018881014670360706, - "loss": 1.1274, + "epoch": 0.47363589419932295, + "grad_norm": 0.271484375, + "learning_rate": 0.00012634267722567752, + "loss": 0.9569, "step": 8255 }, { - "epoch": 0.2369715834923185, - "grad_norm": 0.4453125, - "learning_rate": 0.000188787119551127, - "loss": 0.9325, + "epoch": 0.47392277239084285, + "grad_norm": 0.267578125, + "learning_rate": 0.00012624606102129516, + "loss": 0.9397, "step": 8260 }, { - "epoch": 0.23711502876077634, - "grad_norm": 0.37109375, - "learning_rate": 0.00018876407013682656, - "loss": 0.9697, + "epoch": 0.47420965058236275, + "grad_norm": 0.2890625, + "learning_rate": 0.00012614941849730405, + "loss": 0.9127, "step": 8265 }, { - "epoch": 0.23725847402923414, - "grad_norm": 0.427734375, - "learning_rate": 0.00018874099846648496, - "loss": 0.942, + "epoch": 0.4744965287738826, + "grad_norm": 0.279296875, + "learning_rate": 0.00012605274975061736, + "loss": 1.03, "step": 8270 }, { - "epoch": 0.23740191929769197, - "grad_norm": 0.39453125, - "learning_rate": 0.00018871790454588706, - "loss": 1.0186, + "epoch": 0.4747834069654025, + "grad_norm": 0.26171875, + "learning_rate": 0.00012595605487817482, + "loss": 1.0017, "step": 8275 }, { - "epoch": 0.23754536456614977, - "grad_norm": 0.37890625, - "learning_rate": 0.0001886947883808232, - "loss": 0.9668, + "epoch": 0.47507028515692235, + "grad_norm": 0.26171875, + "learning_rate": 0.00012585933397694224, + "loss": 0.9802, "step": 8280 }, { - "epoch": 0.2376888098346076, - "grad_norm": 0.390625, - "learning_rate": 0.00018867164997708944, - "loss": 0.8923, + "epoch": 0.47535716334844225, + "grad_norm": 0.2490234375, + "learning_rate": 0.00012576258714391155, + "loss": 0.9441, "step": 8285 }, { - "epoch": 0.23783225510306544, - "grad_norm": 0.50390625, - "learning_rate": 0.0001886484893404872, - "loss": 0.9397, + "epoch": 0.47564404153996215, + "grad_norm": 0.251953125, + "learning_rate": 0.00012566581447610072, + "loss": 1.005, "step": 8290 }, { - "epoch": 0.23797570037152324, - "grad_norm": 0.353515625, - "learning_rate": 0.00018862530647682378, - "loss": 0.9027, + "epoch": 0.475930919731482, + "grad_norm": 0.2578125, + "learning_rate": 0.0001255690160705536, + "loss": 0.9187, "step": 8295 }, { - "epoch": 0.23811914563998107, - "grad_norm": 0.388671875, - "learning_rate": 0.0001886021013919117, - "loss": 1.0061, + "epoch": 0.4762177979230019, + "grad_norm": 0.263671875, + "learning_rate": 0.0001254721920243398, + "loss": 0.9247, "step": 8300 }, { - "epoch": 0.23826259090843888, - "grad_norm": 0.40234375, - "learning_rate": 0.00018857887409156934, - "loss": 0.9828, + "epoch": 0.4765046761145218, + "grad_norm": 0.279296875, + "learning_rate": 0.00012537534243455472, + "loss": 0.9306, "step": 8305 }, { - "epoch": 0.2384060361768967, - "grad_norm": 0.447265625, - "learning_rate": 0.00018855562458162055, - "loss": 0.9842, + "epoch": 0.47679155430604164, + "grad_norm": 0.30078125, + "learning_rate": 0.00012527846739831934, + "loss": 1.0118, "step": 8310 }, { - "epoch": 0.23854948144535454, - "grad_norm": 0.3828125, - "learning_rate": 0.00018853235286789467, - "loss": 0.9809, + "epoch": 0.47707843249756154, + "grad_norm": 0.26171875, + "learning_rate": 0.00012518156701278019, + "loss": 0.887, "step": 8315 }, { - "epoch": 0.23869292671381234, - "grad_norm": 0.36328125, - "learning_rate": 0.0001885090589562267, - "loss": 1.0094, + "epoch": 0.4773653106890814, + "grad_norm": 0.275390625, + "learning_rate": 0.0001250846413751092, + "loss": 1.0722, "step": 8320 }, { - "epoch": 0.23883637198227017, - "grad_norm": 0.3671875, - "learning_rate": 0.00018848574285245718, - "loss": 1.0013, + "epoch": 0.4776521888806013, + "grad_norm": 0.283203125, + "learning_rate": 0.00012498769058250355, + "loss": 0.9375, "step": 8325 }, { - "epoch": 0.23897981725072798, - "grad_norm": 0.412109375, - "learning_rate": 0.00018846240456243225, - "loss": 0.9509, + "epoch": 0.4779390670721212, + "grad_norm": 0.26953125, + "learning_rate": 0.00012489071473218574, + "loss": 0.946, "step": 8330 }, { - "epoch": 0.2391232625191858, - "grad_norm": 0.3828125, - "learning_rate": 0.00018843904409200356, - "loss": 0.8689, + "epoch": 0.47822594526364104, + "grad_norm": 0.26171875, + "learning_rate": 0.0001247937139214034, + "loss": 0.9898, "step": 8335 }, { - "epoch": 0.2392667077876436, - "grad_norm": 0.380859375, - "learning_rate": 0.00018841566144702833, - "loss": 1.0403, + "epoch": 0.47851282345516094, + "grad_norm": 0.255859375, + "learning_rate": 0.00012469668824742914, + "loss": 0.9774, "step": 8340 }, { - "epoch": 0.23941015305610144, - "grad_norm": 0.4375, - "learning_rate": 0.0001883922566333693, - "loss": 0.9561, + "epoch": 0.47879970164668084, + "grad_norm": 0.275390625, + "learning_rate": 0.00012459963780756054, + "loss": 0.986, "step": 8345 }, { - "epoch": 0.23955359832455927, - "grad_norm": 0.3984375, - "learning_rate": 0.00018836882965689493, - "loss": 0.8932, + "epoch": 0.4790865798382007, + "grad_norm": 0.263671875, + "learning_rate": 0.00012450256269911996, + "loss": 0.9345, "step": 8350 }, { - "epoch": 0.23969704359301708, - "grad_norm": 0.380859375, - "learning_rate": 0.00018834538052347905, - "loss": 0.8948, + "epoch": 0.4793734580297206, + "grad_norm": 0.267578125, + "learning_rate": 0.0001244054630194546, + "loss": 0.9338, "step": 8355 }, { - "epoch": 0.2398404888614749, - "grad_norm": 0.373046875, - "learning_rate": 0.00018832190923900112, - "loss": 0.999, + "epoch": 0.47966033622124044, + "grad_norm": 0.255859375, + "learning_rate": 0.00012430833886593613, + "loss": 0.8914, "step": 8360 }, { - "epoch": 0.2399839341299327, - "grad_norm": 0.3671875, - "learning_rate": 0.00018829841580934623, - "loss": 1.019, + "epoch": 0.47994721441276034, + "grad_norm": 0.2578125, + "learning_rate": 0.00012421119033596102, + "loss": 0.9065, "step": 8365 }, { - "epoch": 0.24012737939839054, - "grad_norm": 0.384765625, - "learning_rate": 0.00018827490024040484, - "loss": 0.9921, + "epoch": 0.48023409260428024, + "grad_norm": 0.27734375, + "learning_rate": 0.0001241140175269499, + "loss": 1.027, "step": 8370 }, { - "epoch": 0.24027082466684838, - "grad_norm": 0.3828125, - "learning_rate": 0.00018825136253807314, - "loss": 0.9299, + "epoch": 0.4805209707958001, + "grad_norm": 0.265625, + "learning_rate": 0.00012401682053634792, + "loss": 0.9085, "step": 8375 }, { - "epoch": 0.24041426993530618, - "grad_norm": 0.390625, - "learning_rate": 0.00018822780270825277, - "loss": 0.9872, + "epoch": 0.48080784898732, + "grad_norm": 0.283203125, + "learning_rate": 0.00012391959946162447, + "loss": 0.976, "step": 8380 }, { - "epoch": 0.240557715203764, - "grad_norm": 0.796875, - "learning_rate": 0.00018820422075685095, - "loss": 1.0564, + "epoch": 0.4810947271788399, + "grad_norm": 0.267578125, + "learning_rate": 0.00012382235440027307, + "loss": 0.8829, "step": 8385 }, { - "epoch": 0.24070116047222181, - "grad_norm": 0.37890625, - "learning_rate": 0.00018818061668978046, - "loss": 1.0141, + "epoch": 0.48138160537035973, + "grad_norm": 0.28125, + "learning_rate": 0.0001237250854498112, + "loss": 0.9961, "step": 8390 }, { - "epoch": 0.24084460574067965, - "grad_norm": 0.416015625, - "learning_rate": 0.00018815699051295958, - "loss": 1.0202, + "epoch": 0.48166848356187963, + "grad_norm": 0.291015625, + "learning_rate": 0.00012362779270778048, + "loss": 0.9896, "step": 8395 }, { - "epoch": 0.24098805100913748, - "grad_norm": 0.51953125, - "learning_rate": 0.0001881333422323122, - "loss": 0.9977, + "epoch": 0.4819553617533995, + "grad_norm": 0.2734375, + "learning_rate": 0.00012353047627174625, + "loss": 0.9684, "step": 8400 }, { - "epoch": 0.24113149627759528, - "grad_norm": 0.3671875, - "learning_rate": 0.00018810967185376766, - "loss": 0.9245, + "epoch": 0.4822422399449194, + "grad_norm": 0.263671875, + "learning_rate": 0.00012343313623929764, + "loss": 0.9758, "step": 8405 }, { - "epoch": 0.2412749415460531, - "grad_norm": 0.4296875, - "learning_rate": 0.00018808597938326093, - "loss": 0.9198, + "epoch": 0.4825291181364393, + "grad_norm": 0.279296875, + "learning_rate": 0.00012333577270804745, + "loss": 0.9922, "step": 8410 }, { - "epoch": 0.24141838681451092, - "grad_norm": 0.5234375, - "learning_rate": 0.0001880622648267325, - "loss": 0.9596, + "epoch": 0.48281599632795913, + "grad_norm": 0.255859375, + "learning_rate": 0.0001232383857756321, + "loss": 0.9422, "step": 8415 }, { - "epoch": 0.24156183208296875, - "grad_norm": 0.3828125, - "learning_rate": 0.00018803852819012832, - "loss": 0.8908, + "epoch": 0.48310287451947903, + "grad_norm": 0.298828125, + "learning_rate": 0.00012314097553971137, + "loss": 0.9648, "step": 8420 }, { - "epoch": 0.24170527735142655, - "grad_norm": 0.36328125, - "learning_rate": 0.0001880147694794, - "loss": 0.9465, + "epoch": 0.48338975271099893, + "grad_norm": 0.26171875, + "learning_rate": 0.00012304354209796846, + "loss": 0.9822, "step": 8425 }, { - "epoch": 0.24184872261988438, - "grad_norm": 0.396484375, - "learning_rate": 0.0001879909887005046, - "loss": 0.9875, + "epoch": 0.4836766309025188, + "grad_norm": 0.275390625, + "learning_rate": 0.00012294608554810988, + "loss": 0.9452, "step": 8430 }, { - "epoch": 0.2419921678883422, - "grad_norm": 0.39453125, - "learning_rate": 0.00018796718585940473, - "loss": 0.9858, + "epoch": 0.4839635090940387, + "grad_norm": 0.265625, + "learning_rate": 0.00012284860598786525, + "loss": 0.9833, "step": 8435 }, { - "epoch": 0.24213561315680002, - "grad_norm": 0.365234375, - "learning_rate": 0.00018794336096206852, - "loss": 1.007, + "epoch": 0.4842503872855585, + "grad_norm": 0.265625, + "learning_rate": 0.0001227511035149873, + "loss": 0.981, "step": 8440 }, { - "epoch": 0.24227905842525785, - "grad_norm": 0.39453125, - "learning_rate": 0.0001879195140144697, - "loss": 0.9664, + "epoch": 0.4845372654770784, + "grad_norm": 0.26953125, + "learning_rate": 0.00012265357822725172, + "loss": 0.987, "step": 8445 }, { - "epoch": 0.24242250369371565, - "grad_norm": 0.384765625, - "learning_rate": 0.00018789564502258741, - "loss": 0.9199, + "epoch": 0.48482414366859833, + "grad_norm": 0.28125, + "learning_rate": 0.00012255603022245712, + "loss": 1.0278, "step": 8450 }, { - "epoch": 0.24256594896217348, - "grad_norm": 0.41015625, - "learning_rate": 0.00018787175399240642, - "loss": 1.1427, + "epoch": 0.4851110218601182, + "grad_norm": 0.298828125, + "learning_rate": 0.0001224584595984248, + "loss": 0.9869, "step": 8455 }, { - "epoch": 0.24270939423063131, - "grad_norm": 0.404296875, - "learning_rate": 0.000187847840929917, - "loss": 0.9667, + "epoch": 0.4853979000516381, + "grad_norm": 0.271484375, + "learning_rate": 0.00012236086645299888, + "loss": 1.0075, "step": 8460 }, { - "epoch": 0.24285283949908912, - "grad_norm": 0.380859375, - "learning_rate": 0.00018782390584111492, - "loss": 0.8837, + "epoch": 0.485684778243158, + "grad_norm": 0.255859375, + "learning_rate": 0.00012226325088404588, + "loss": 0.9499, "step": 8465 }, { - "epoch": 0.24299628476754695, - "grad_norm": 0.341796875, - "learning_rate": 0.00018779994873200146, - "loss": 0.9067, + "epoch": 0.4859716564346778, + "grad_norm": 0.265625, + "learning_rate": 0.00012216561298945502, + "loss": 0.9547, "step": 8470 }, { - "epoch": 0.24313973003600475, - "grad_norm": 0.38671875, - "learning_rate": 0.00018777596960858347, - "loss": 0.976, + "epoch": 0.4862585346261977, + "grad_norm": 0.263671875, + "learning_rate": 0.00012206795286713774, + "loss": 0.9796, "step": 8475 }, { - "epoch": 0.24328317530446258, - "grad_norm": 0.3828125, - "learning_rate": 0.00018775196847687332, - "loss": 0.9914, + "epoch": 0.48654541281771757, + "grad_norm": 0.255859375, + "learning_rate": 0.00012197027061502781, + "loss": 0.9642, "step": 8480 }, { - "epoch": 0.24342662057292042, - "grad_norm": 0.365234375, - "learning_rate": 0.00018772794534288885, - "loss": 0.9846, + "epoch": 0.48683229100923747, + "grad_norm": 0.251953125, + "learning_rate": 0.00012187256633108129, + "loss": 0.9616, "step": 8485 }, { - "epoch": 0.24357006584137822, - "grad_norm": 0.435546875, - "learning_rate": 0.0001877039002126534, - "loss": 0.9743, + "epoch": 0.4871191692007574, + "grad_norm": 0.265625, + "learning_rate": 0.00012177484011327618, + "loss": 0.9035, "step": 8490 }, { - "epoch": 0.24371351110983605, - "grad_norm": 0.419921875, - "learning_rate": 0.00018767983309219595, - "loss": 1.1145, + "epoch": 0.4874060473922772, + "grad_norm": 0.26171875, + "learning_rate": 0.00012167709205961256, + "loss": 0.9275, "step": 8495 }, { - "epoch": 0.24385695637829385, - "grad_norm": 0.421875, - "learning_rate": 0.00018765574398755085, - "loss": 0.9649, + "epoch": 0.4876929255837971, + "grad_norm": 0.26171875, + "learning_rate": 0.00012157932226811246, + "loss": 0.9153, "step": 8500 }, { - "epoch": 0.24400040164675169, - "grad_norm": 0.453125, - "learning_rate": 0.00018763163290475804, - "loss": 0.9352, + "epoch": 0.487979803775317, + "grad_norm": 0.2578125, + "learning_rate": 0.00012148153083681954, + "loss": 1.0136, "step": 8505 }, { - "epoch": 0.2441438469152095, - "grad_norm": 0.408203125, - "learning_rate": 0.00018760749984986298, - "loss": 0.9285, + "epoch": 0.48826668196683687, + "grad_norm": 0.251953125, + "learning_rate": 0.00012138371786379938, + "loss": 0.943, "step": 8510 }, { - "epoch": 0.24428729218366732, - "grad_norm": 0.4609375, - "learning_rate": 0.0001875833448289166, - "loss": 0.8847, + "epoch": 0.48855356015835677, + "grad_norm": 0.251953125, + "learning_rate": 0.00012128588344713899, + "loss": 0.9297, "step": 8515 }, { - "epoch": 0.24443073745212515, - "grad_norm": 0.421875, - "learning_rate": 0.0001875591678479753, - "loss": 0.9569, + "epoch": 0.4888404383498766, + "grad_norm": 0.265625, + "learning_rate": 0.0001211880276849469, + "loss": 0.96, "step": 8520 }, { - "epoch": 0.24457418272058296, - "grad_norm": 0.40234375, - "learning_rate": 0.0001875349689131011, - "loss": 1.0223, + "epoch": 0.4891273165413965, + "grad_norm": 0.265625, + "learning_rate": 0.00012109015067535321, + "loss": 0.9569, "step": 8525 }, { - "epoch": 0.2447176279890408, - "grad_norm": 0.373046875, - "learning_rate": 0.00018751074803036142, - "loss": 0.8997, + "epoch": 0.4894141947329164, + "grad_norm": 0.283203125, + "learning_rate": 0.00012099225251650907, + "loss": 0.9621, "step": 8530 }, { - "epoch": 0.2448610732574986, - "grad_norm": 0.435546875, - "learning_rate": 0.0001874865052058292, - "loss": 1.0131, + "epoch": 0.48970107292443626, + "grad_norm": 0.29296875, + "learning_rate": 0.00012089433330658705, + "loss": 1.0271, "step": 8535 }, { - "epoch": 0.24500451852595642, - "grad_norm": 0.4140625, - "learning_rate": 0.00018746224044558302, - "loss": 0.8679, + "epoch": 0.48998795111595617, + "grad_norm": 0.251953125, + "learning_rate": 0.00012079639314378075, + "loss": 0.9216, "step": 8540 }, { - "epoch": 0.24514796379441425, - "grad_norm": 0.37890625, - "learning_rate": 0.00018743795375570672, - "loss": 0.9701, + "epoch": 0.49027482930747607, + "grad_norm": 0.314453125, + "learning_rate": 0.00012069843212630474, + "loss": 0.8654, "step": 8545 }, { - "epoch": 0.24529140906287206, - "grad_norm": 0.400390625, - "learning_rate": 0.0001874136451422898, - "loss": 1.0366, + "epoch": 0.4905617074989959, + "grad_norm": 0.26953125, + "learning_rate": 0.00012060045035239465, + "loss": 0.8843, "step": 8550 }, { - "epoch": 0.2454348543313299, - "grad_norm": 0.396484375, - "learning_rate": 0.00018738931461142726, - "loss": 1.0303, + "epoch": 0.4908485856905158, + "grad_norm": 0.267578125, + "learning_rate": 0.00012050244792030667, + "loss": 0.9073, "step": 8555 }, { - "epoch": 0.2455782995997877, - "grad_norm": 0.390625, - "learning_rate": 0.0001873649621692195, - "loss": 1.042, + "epoch": 0.49113546388203566, + "grad_norm": 0.259765625, + "learning_rate": 0.00012040442492831798, + "loss": 0.9647, "step": 8560 }, { - "epoch": 0.24572174486824552, - "grad_norm": 0.42578125, - "learning_rate": 0.00018734058782177252, - "loss": 0.9917, + "epoch": 0.49142234207355556, + "grad_norm": 0.251953125, + "learning_rate": 0.00012030638147472623, + "loss": 0.9147, "step": 8565 }, { - "epoch": 0.24586519013670335, - "grad_norm": 0.375, - "learning_rate": 0.00018731619157519774, - "loss": 0.8602, + "epoch": 0.49170922026507546, + "grad_norm": 0.28125, + "learning_rate": 0.00012020831765784957, + "loss": 0.9259, "step": 8570 }, { - "epoch": 0.24600863540516116, - "grad_norm": 0.38671875, - "learning_rate": 0.00018729177343561207, - "loss": 0.9037, + "epoch": 0.4919960984565953, + "grad_norm": 0.267578125, + "learning_rate": 0.00012011023357602668, + "loss": 0.9544, "step": 8575 }, { - "epoch": 0.246152080673619, - "grad_norm": 0.40234375, - "learning_rate": 0.00018726733340913797, - "loss": 0.9154, + "epoch": 0.4922829766481152, + "grad_norm": 0.25390625, + "learning_rate": 0.00012001212932761645, + "loss": 0.8561, "step": 8580 }, { - "epoch": 0.2462955259420768, - "grad_norm": 0.39453125, - "learning_rate": 0.00018724287150190337, - "loss": 0.9562, + "epoch": 0.4925698548396351, + "grad_norm": 0.2734375, + "learning_rate": 0.00011991400501099805, + "loss": 0.9172, "step": 8585 }, { - "epoch": 0.24643897121053462, - "grad_norm": 0.380859375, - "learning_rate": 0.00018721838772004157, - "loss": 0.9812, + "epoch": 0.49285673303115496, + "grad_norm": 0.28515625, + "learning_rate": 0.00011981586072457078, + "loss": 0.9463, "step": 8590 }, { - "epoch": 0.24658241647899243, - "grad_norm": 0.423828125, - "learning_rate": 0.00018719388206969156, - "loss": 0.9062, + "epoch": 0.49314361122267486, + "grad_norm": 0.2490234375, + "learning_rate": 0.00011971769656675391, + "loss": 0.9951, "step": 8595 }, { - "epoch": 0.24672586174745026, - "grad_norm": 0.3671875, - "learning_rate": 0.0001871693545569976, - "loss": 1.0552, + "epoch": 0.4934304894141947, + "grad_norm": 0.259765625, + "learning_rate": 0.00011961951263598677, + "loss": 0.9508, "step": 8600 }, { - "epoch": 0.2468693070159081, - "grad_norm": 0.37109375, - "learning_rate": 0.00018714480518810964, - "loss": 0.9168, + "epoch": 0.4937173676057146, + "grad_norm": 0.298828125, + "learning_rate": 0.00011952130903072832, + "loss": 0.9669, "step": 8605 }, { - "epoch": 0.2470127522843659, - "grad_norm": 0.41015625, - "learning_rate": 0.00018712023396918293, - "loss": 0.896, + "epoch": 0.4940042457972345, + "grad_norm": 0.263671875, + "learning_rate": 0.00011942308584945741, + "loss": 1.0121, "step": 8610 }, { - "epoch": 0.24715619755282373, - "grad_norm": 0.416015625, - "learning_rate": 0.00018709564090637834, - "loss": 0.9481, + "epoch": 0.49429112398875436, + "grad_norm": 0.279296875, + "learning_rate": 0.00011932484319067245, + "loss": 0.983, "step": 8615 }, { - "epoch": 0.24729964282128153, - "grad_norm": 0.416015625, - "learning_rate": 0.0001870710260058621, - "loss": 0.9023, + "epoch": 0.49457800218027426, + "grad_norm": 0.26953125, + "learning_rate": 0.00011922658115289141, + "loss": 0.9413, "step": 8620 }, { - "epoch": 0.24744308808973936, - "grad_norm": 0.36328125, - "learning_rate": 0.00018704638927380596, - "loss": 0.8793, + "epoch": 0.49486488037179416, + "grad_norm": 0.271484375, + "learning_rate": 0.00011912829983465168, + "loss": 0.9039, "step": 8625 }, { - "epoch": 0.2475865333581972, - "grad_norm": 0.408203125, - "learning_rate": 0.00018702173071638716, - "loss": 0.9159, + "epoch": 0.495151758563314, + "grad_norm": 0.275390625, + "learning_rate": 0.00011902999933450997, + "loss": 0.9747, "step": 8630 }, { - "epoch": 0.247729978626655, - "grad_norm": 0.42578125, - "learning_rate": 0.00018699705033978843, - "loss": 1.0454, + "epoch": 0.4954386367548339, + "grad_norm": 0.26953125, + "learning_rate": 0.0001189316797510423, + "loss": 0.9519, "step": 8635 }, { - "epoch": 0.24787342389511283, - "grad_norm": 0.376953125, - "learning_rate": 0.00018697234815019792, - "loss": 1.0369, + "epoch": 0.49572551494635375, + "grad_norm": 0.306640625, + "learning_rate": 0.00011883334118284369, + "loss": 0.9836, "step": 8640 }, { - "epoch": 0.24801686916357063, - "grad_norm": 0.392578125, - "learning_rate": 0.00018694762415380928, - "loss": 1.021, + "epoch": 0.49601239313787365, + "grad_norm": 0.26171875, + "learning_rate": 0.00011873498372852828, + "loss": 1.0067, "step": 8645 }, { - "epoch": 0.24816031443202846, - "grad_norm": 0.4453125, - "learning_rate": 0.00018692287835682164, - "loss": 1.0432, + "epoch": 0.49629927132939355, + "grad_norm": 0.28125, + "learning_rate": 0.0001186366074867292, + "loss": 0.8766, "step": 8650 }, { - "epoch": 0.2483037597004863, - "grad_norm": 0.40234375, - "learning_rate": 0.00018689811076543953, - "loss": 0.9819, + "epoch": 0.4965861495209134, + "grad_norm": 0.310546875, + "learning_rate": 0.00011853821255609836, + "loss": 1.0009, "step": 8655 }, { - "epoch": 0.2484472049689441, - "grad_norm": 0.380859375, - "learning_rate": 0.00018687332138587302, - "loss": 0.9632, + "epoch": 0.4968730277124333, + "grad_norm": 0.26171875, + "learning_rate": 0.00011843979903530638, + "loss": 0.9815, "step": 8660 }, { - "epoch": 0.24859065023740193, - "grad_norm": 0.373046875, - "learning_rate": 0.00018684851022433762, - "loss": 1.0177, + "epoch": 0.4971599059039532, + "grad_norm": 0.267578125, + "learning_rate": 0.00011834136702304257, + "loss": 0.9317, "step": 8665 }, { - "epoch": 0.24873409550585973, - "grad_norm": 0.376953125, - "learning_rate": 0.0001868236772870543, - "loss": 0.9665, + "epoch": 0.49744678409547305, + "grad_norm": 0.2734375, + "learning_rate": 0.00011824291661801479, + "loss": 0.9523, "step": 8670 }, { - "epoch": 0.24887754077431756, - "grad_norm": 0.431640625, - "learning_rate": 0.00018679882258024945, - "loss": 1.0085, + "epoch": 0.49773366228699295, + "grad_norm": 0.265625, + "learning_rate": 0.00011814444791894934, + "loss": 0.9784, "step": 8675 }, { - "epoch": 0.24902098604277537, - "grad_norm": 0.373046875, - "learning_rate": 0.00018677394611015498, - "loss": 0.9828, + "epoch": 0.4980205404785128, + "grad_norm": 0.294921875, + "learning_rate": 0.0001180459610245908, + "loss": 1.0339, "step": 8680 }, { - "epoch": 0.2491644313112332, - "grad_norm": 0.39453125, - "learning_rate": 0.00018674904788300824, - "loss": 1.0281, + "epoch": 0.4983074186700327, + "grad_norm": 0.24609375, + "learning_rate": 0.00011794745603370212, + "loss": 0.9443, "step": 8685 }, { - "epoch": 0.24930787657969103, - "grad_norm": 0.43359375, - "learning_rate": 0.000186724127905052, - "loss": 0.9241, + "epoch": 0.4985942968615526, + "grad_norm": 0.279296875, + "learning_rate": 0.00011784893304506424, + "loss": 0.9769, "step": 8690 }, { - "epoch": 0.24945132184814883, - "grad_norm": 0.392578125, - "learning_rate": 0.00018669918618253455, - "loss": 0.9931, + "epoch": 0.49888117505307245, + "grad_norm": 0.287109375, + "learning_rate": 0.0001177503921574763, + "loss": 0.9559, "step": 8695 }, { - "epoch": 0.24959476711660666, - "grad_norm": 0.41796875, - "learning_rate": 0.00018667422272170955, - "loss": 0.9226, + "epoch": 0.49916805324459235, + "grad_norm": 0.279296875, + "learning_rate": 0.00011765183346975528, + "loss": 0.9302, "step": 8700 }, { - "epoch": 0.24973821238506447, - "grad_norm": 0.37109375, - "learning_rate": 0.00018664923752883614, - "loss": 1.0136, + "epoch": 0.49945493143611225, + "grad_norm": 0.275390625, + "learning_rate": 0.0001175532570807361, + "loss": 0.9505, "step": 8705 }, { - "epoch": 0.2498816576535223, - "grad_norm": 0.40234375, - "learning_rate": 0.00018662423061017896, - "loss": 1.0172, + "epoch": 0.4997418096276321, + "grad_norm": 0.263671875, + "learning_rate": 0.00011745466308927136, + "loss": 1.001, "step": 8710 }, { - "epoch": 0.2500251029219801, - "grad_norm": 0.384765625, - "learning_rate": 0.00018659920197200807, - "loss": 0.9211, + "epoch": 0.500028687819152, + "grad_norm": 0.2734375, + "learning_rate": 0.00011735605159423131, + "loss": 0.8966, "step": 8715 }, { - "epoch": 0.25016854819043793, - "grad_norm": 0.412109375, - "learning_rate": 0.00018657415162059892, - "loss": 1.0906, + "epoch": 0.5003155660106718, + "grad_norm": 0.2490234375, + "learning_rate": 0.00011725742269450382, + "loss": 0.8901, "step": 8720 }, { - "epoch": 0.25031199345889577, - "grad_norm": 0.40234375, - "learning_rate": 0.0001865490795622325, - "loss": 1.0185, + "epoch": 0.5006024442021918, + "grad_norm": 0.287109375, + "learning_rate": 0.00011715877648899413, + "loss": 0.9536, "step": 8725 }, { - "epoch": 0.2504554387273536, - "grad_norm": 0.412109375, - "learning_rate": 0.0001865239858031951, - "loss": 1.0293, + "epoch": 0.5008893223937116, + "grad_norm": 0.259765625, + "learning_rate": 0.0001170601130766249, + "loss": 0.9002, "step": 8730 }, { - "epoch": 0.2505988839958114, - "grad_norm": 0.37890625, - "learning_rate": 0.00018649887034977863, - "loss": 0.8834, + "epoch": 0.5011762005852315, + "grad_norm": 0.2734375, + "learning_rate": 0.00011696143255633607, + "loss": 0.9495, "step": 8735 }, { - "epoch": 0.2507423292642692, - "grad_norm": 0.40625, - "learning_rate": 0.00018647373320828035, - "loss": 1.0722, + "epoch": 0.5014630787767513, + "grad_norm": 0.26171875, + "learning_rate": 0.0001168627350270846, + "loss": 0.9705, "step": 8740 }, { - "epoch": 0.25088577453272703, - "grad_norm": 0.365234375, - "learning_rate": 0.00018644857438500289, - "loss": 1.0402, + "epoch": 0.5017499569682713, + "grad_norm": 0.279296875, + "learning_rate": 0.00011676402058784463, + "loss": 0.9148, "step": 8745 }, { - "epoch": 0.25102921980118487, - "grad_norm": 0.380859375, - "learning_rate": 0.00018642339388625444, - "loss": 0.9226, + "epoch": 0.5020368351597911, + "grad_norm": 0.271484375, + "learning_rate": 0.00011666528933760725, + "loss": 0.9683, "step": 8750 }, { - "epoch": 0.2511726650696427, - "grad_norm": 0.41796875, - "learning_rate": 0.00018639819171834855, - "loss": 0.968, + "epoch": 0.502323713351311, + "grad_norm": 0.30859375, + "learning_rate": 0.00011656654137538032, + "loss": 0.9706, "step": 8755 }, { - "epoch": 0.2513161103381005, - "grad_norm": 0.412109375, - "learning_rate": 0.0001863729678876043, - "loss": 0.9697, + "epoch": 0.5026105915428309, + "grad_norm": 0.255859375, + "learning_rate": 0.0001164677768001886, + "loss": 0.9102, "step": 8760 }, { - "epoch": 0.2514595556065583, - "grad_norm": 0.416015625, - "learning_rate": 0.00018634772240034596, - "loss": 1.041, + "epoch": 0.5028974697343508, + "grad_norm": 0.265625, + "learning_rate": 0.00011636899571107333, + "loss": 0.8819, "step": 8765 }, { - "epoch": 0.25160300087501614, - "grad_norm": 0.458984375, - "learning_rate": 0.00018632245526290352, - "loss": 1.0205, + "epoch": 0.5031843479258706, + "grad_norm": 0.28515625, + "learning_rate": 0.00011627019820709246, + "loss": 0.9267, "step": 8770 }, { - "epoch": 0.25174644614347397, - "grad_norm": 0.365234375, - "learning_rate": 0.00018629716648161228, - "loss": 0.9598, + "epoch": 0.5034712261173906, + "grad_norm": 0.28125, + "learning_rate": 0.00011617138438732036, + "loss": 0.9334, "step": 8775 }, { - "epoch": 0.2518898914119318, - "grad_norm": 0.38671875, - "learning_rate": 0.0001862718560628129, - "loss": 1.0026, + "epoch": 0.5037581043089104, + "grad_norm": 0.279296875, + "learning_rate": 0.00011607255435084772, + "loss": 0.9174, "step": 8780 }, { - "epoch": 0.2520333366803896, - "grad_norm": 0.369140625, - "learning_rate": 0.00018624652401285158, - "loss": 0.9622, + "epoch": 0.5040449825004303, + "grad_norm": 0.265625, + "learning_rate": 0.00011597370819678157, + "loss": 0.9466, "step": 8785 }, { - "epoch": 0.2521767819488474, - "grad_norm": 0.380859375, - "learning_rate": 0.00018622117033807985, - "loss": 0.9755, + "epoch": 0.5043318606919502, + "grad_norm": 0.58203125, + "learning_rate": 0.00011587484602424499, + "loss": 0.9153, "step": 8790 }, { - "epoch": 0.25232022721730524, - "grad_norm": 0.384765625, - "learning_rate": 0.0001861957950448547, - "loss": 0.8372, + "epoch": 0.5046187388834701, + "grad_norm": 0.267578125, + "learning_rate": 0.00011577596793237722, + "loss": 0.9667, "step": 8795 }, { - "epoch": 0.25246367248576307, - "grad_norm": 0.38671875, - "learning_rate": 0.00018617039813953855, - "loss": 0.9242, + "epoch": 0.5049056170749899, + "grad_norm": 0.259765625, + "learning_rate": 0.00011567707402033345, + "loss": 0.9371, "step": 8800 }, { - "epoch": 0.2526071177542209, - "grad_norm": 0.384765625, - "learning_rate": 0.00018614497962849926, - "loss": 1.0266, + "epoch": 0.5051924952665099, + "grad_norm": 0.267578125, + "learning_rate": 0.00011557816438728467, + "loss": 0.9089, "step": 8805 }, { - "epoch": 0.2527505630226787, - "grad_norm": 0.41015625, - "learning_rate": 0.00018611953951811004, - "loss": 1.021, + "epoch": 0.5054793734580297, + "grad_norm": 0.263671875, + "learning_rate": 0.00011547923913241774, + "loss": 0.9599, "step": 8810 }, { - "epoch": 0.2528940082911365, - "grad_norm": 0.404296875, - "learning_rate": 0.00018609407781474956, - "loss": 0.9824, + "epoch": 0.5057662516495496, + "grad_norm": 0.275390625, + "learning_rate": 0.00011538029835493507, + "loss": 0.9268, "step": 8815 }, { - "epoch": 0.25303745355959434, - "grad_norm": 0.36328125, - "learning_rate": 0.00018606859452480193, - "loss": 0.9062, + "epoch": 0.5060531298410694, + "grad_norm": 0.251953125, + "learning_rate": 0.00011528134215405473, + "loss": 0.955, "step": 8820 }, { - "epoch": 0.25318089882805217, - "grad_norm": 0.392578125, - "learning_rate": 0.00018604308965465655, - "loss": 1.0233, + "epoch": 0.5063400080325894, + "grad_norm": 0.287109375, + "learning_rate": 0.00011518237062901023, + "loss": 1.008, "step": 8825 }, { - "epoch": 0.25332434409651, - "grad_norm": 0.388671875, - "learning_rate": 0.00018601756321070845, - "loss": 0.9548, + "epoch": 0.5066268862241092, + "grad_norm": 0.279296875, + "learning_rate": 0.00011508338387905038, + "loss": 1.0123, "step": 8830 }, { - "epoch": 0.2534677893649678, - "grad_norm": 0.388671875, - "learning_rate": 0.00018599201519935786, - "loss": 1.0056, + "epoch": 0.5069137644156291, + "grad_norm": 0.25390625, + "learning_rate": 0.0001149843820034394, + "loss": 0.8719, "step": 8835 }, { - "epoch": 0.2536112346334256, - "grad_norm": 0.41796875, - "learning_rate": 0.0001859664456270105, - "loss": 0.9727, + "epoch": 0.507200642607149, + "grad_norm": 0.27734375, + "learning_rate": 0.00011488536510145651, + "loss": 0.9107, "step": 8840 }, { - "epoch": 0.25375467990188344, - "grad_norm": 0.40234375, - "learning_rate": 0.00018594085450007755, - "loss": 0.8439, + "epoch": 0.5074875207986689, + "grad_norm": 0.263671875, + "learning_rate": 0.00011478633327239614, + "loss": 0.9925, "step": 8845 }, { - "epoch": 0.25389812517034127, - "grad_norm": 0.380859375, - "learning_rate": 0.00018591524182497547, - "loss": 0.9005, + "epoch": 0.5077743989901887, + "grad_norm": 0.267578125, + "learning_rate": 0.0001146872866155676, + "loss": 0.9601, "step": 8850 }, { - "epoch": 0.2540415704387991, - "grad_norm": 0.376953125, - "learning_rate": 0.0001858896076081262, - "loss": 0.9749, + "epoch": 0.5080612771817087, + "grad_norm": 0.283203125, + "learning_rate": 0.00011458822523029509, + "loss": 0.9667, "step": 8855 }, { - "epoch": 0.2541850157072569, - "grad_norm": 0.419921875, - "learning_rate": 0.0001858639518559571, - "loss": 1.0402, + "epoch": 0.5083481553732285, + "grad_norm": 0.287109375, + "learning_rate": 0.00011448914921591765, + "loss": 0.9374, "step": 8860 }, { - "epoch": 0.2543284609757147, - "grad_norm": 0.376953125, - "learning_rate": 0.0001858382745749009, - "loss": 0.953, + "epoch": 0.5086350335647484, + "grad_norm": 0.35546875, + "learning_rate": 0.00011439005867178884, + "loss": 0.9557, "step": 8865 }, { - "epoch": 0.25447190624417254, - "grad_norm": 0.423828125, - "learning_rate": 0.00018581257577139572, - "loss": 0.9407, + "epoch": 0.5089219117562683, + "grad_norm": 0.2734375, + "learning_rate": 0.00011429095369727696, + "loss": 0.8917, "step": 8870 }, { - "epoch": 0.2546153515126304, - "grad_norm": 0.39453125, - "learning_rate": 0.00018578685545188507, - "loss": 1.0198, + "epoch": 0.5092087899477882, + "grad_norm": 0.24609375, + "learning_rate": 0.00011419183439176464, + "loss": 0.9163, "step": 8875 }, { - "epoch": 0.25475879678108815, - "grad_norm": 0.404296875, - "learning_rate": 0.00018576111362281794, - "loss": 1.0276, + "epoch": 0.509495668139308, + "grad_norm": 0.267578125, + "learning_rate": 0.00011409270085464898, + "loss": 0.9374, "step": 8880 }, { - "epoch": 0.254902242049546, - "grad_norm": 0.37109375, - "learning_rate": 0.00018573535029064853, - "loss": 1.0065, + "epoch": 0.509782546330828, + "grad_norm": 0.265625, + "learning_rate": 0.0001139935531853413, + "loss": 1.0086, "step": 8885 }, { - "epoch": 0.2550456873180038, - "grad_norm": 0.369140625, - "learning_rate": 0.00018570956546183666, - "loss": 1.0171, + "epoch": 0.5100694245223478, + "grad_norm": 0.259765625, + "learning_rate": 0.0001138943914832671, + "loss": 0.9003, "step": 8890 }, { - "epoch": 0.25518913258646164, - "grad_norm": 0.4296875, - "learning_rate": 0.00018568375914284737, - "loss": 0.9946, + "epoch": 0.5103563027138677, + "grad_norm": 0.265625, + "learning_rate": 0.00011379521584786599, + "loss": 0.9237, "step": 8895 }, { - "epoch": 0.2553325778549195, - "grad_norm": 0.43359375, - "learning_rate": 0.00018565793134015115, - "loss": 0.9383, + "epoch": 0.5106431809053875, + "grad_norm": 0.279296875, + "learning_rate": 0.0001136960263785915, + "loss": 0.9486, "step": 8900 }, { - "epoch": 0.25547602312337725, - "grad_norm": 0.421875, - "learning_rate": 0.00018563208206022385, - "loss": 1.0802, + "epoch": 0.5109300590969075, + "grad_norm": 0.263671875, + "learning_rate": 0.00011359682317491098, + "loss": 0.9745, "step": 8905 }, { - "epoch": 0.2556194683918351, - "grad_norm": 0.40234375, - "learning_rate": 0.00018560621130954674, - "loss": 0.9812, + "epoch": 0.5112169372884273, + "grad_norm": 0.28125, + "learning_rate": 0.00011349760633630575, + "loss": 0.9179, "step": 8910 }, { - "epoch": 0.2557629136602929, - "grad_norm": 0.41015625, - "learning_rate": 0.00018558031909460647, - "loss": 0.9601, + "epoch": 0.5115038154799472, + "grad_norm": 0.265625, + "learning_rate": 0.00011339837596227061, + "loss": 0.9705, "step": 8915 }, { - "epoch": 0.25590635892875074, - "grad_norm": 0.36328125, - "learning_rate": 0.00018555440542189508, - "loss": 0.9387, + "epoch": 0.5117906936714671, + "grad_norm": 0.255859375, + "learning_rate": 0.00011329913215231401, + "loss": 0.9673, "step": 8920 }, { - "epoch": 0.2560498041972086, - "grad_norm": 0.330078125, - "learning_rate": 0.00018552847029790993, - "loss": 0.9527, + "epoch": 0.512077571862987, + "grad_norm": 0.298828125, + "learning_rate": 0.00011319987500595785, + "loss": 0.9442, "step": 8925 }, { - "epoch": 0.25619324946566635, - "grad_norm": 0.39453125, - "learning_rate": 0.00018550251372915382, - "loss": 1.0892, + "epoch": 0.5123644500545068, + "grad_norm": 0.271484375, + "learning_rate": 0.00011310060462273744, + "loss": 0.9693, "step": 8930 }, { - "epoch": 0.2563366947341242, - "grad_norm": 0.384765625, - "learning_rate": 0.00018547653572213488, - "loss": 0.9214, + "epoch": 0.5126513282460268, + "grad_norm": 0.265625, + "learning_rate": 0.00011300132110220134, + "loss": 0.9383, "step": 8935 }, { - "epoch": 0.256480140002582, - "grad_norm": 0.369140625, - "learning_rate": 0.00018545053628336668, - "loss": 0.9453, + "epoch": 0.5129382064375466, + "grad_norm": 0.28125, + "learning_rate": 0.0001129020245439113, + "loss": 0.9474, "step": 8940 }, { - "epoch": 0.25662358527103984, - "grad_norm": 0.435546875, - "learning_rate": 0.0001854245154193681, - "loss": 1.0128, + "epoch": 0.5132250846290665, + "grad_norm": 0.25390625, + "learning_rate": 0.00011280271504744208, + "loss": 0.9741, "step": 8945 }, { - "epoch": 0.2567670305394977, - "grad_norm": 0.384765625, - "learning_rate": 0.00018539847313666345, - "loss": 0.9223, + "epoch": 0.5135119628205864, + "grad_norm": 0.263671875, + "learning_rate": 0.00011270339271238153, + "loss": 0.9523, "step": 8950 }, { - "epoch": 0.25691047580795545, - "grad_norm": 0.37890625, - "learning_rate": 0.00018537240944178234, - "loss": 0.8784, + "epoch": 0.5137988410121063, + "grad_norm": 0.265625, + "learning_rate": 0.00011260405763833029, + "loss": 0.92, "step": 8955 }, { - "epoch": 0.2570539210764133, - "grad_norm": 0.388671875, - "learning_rate": 0.00018534632434125982, - "loss": 0.9475, + "epoch": 0.5140857192036261, + "grad_norm": 0.251953125, + "learning_rate": 0.00011250470992490176, + "loss": 0.9989, "step": 8960 }, { - "epoch": 0.2571973663448711, - "grad_norm": 0.390625, - "learning_rate": 0.00018532021784163627, - "loss": 1.0277, + "epoch": 0.5143725973951461, + "grad_norm": 0.263671875, + "learning_rate": 0.0001124053496717221, + "loss": 0.9641, "step": 8965 }, { - "epoch": 0.25734081161332895, - "grad_norm": 0.41796875, - "learning_rate": 0.00018529408994945738, - "loss": 1.059, + "epoch": 0.5146594755866659, + "grad_norm": 0.259765625, + "learning_rate": 0.00011230597697842998, + "loss": 0.9714, "step": 8970 }, { - "epoch": 0.2574842568817868, - "grad_norm": 0.396484375, - "learning_rate": 0.00018526794067127433, - "loss": 0.9357, + "epoch": 0.5149463537781858, + "grad_norm": 0.26953125, + "learning_rate": 0.0001122065919446765, + "loss": 0.9023, "step": 8975 }, { - "epoch": 0.25762770215024455, - "grad_norm": 0.3828125, - "learning_rate": 0.0001852417700136436, - "loss": 0.9698, + "epoch": 0.5152332319697057, + "grad_norm": 0.259765625, + "learning_rate": 0.00011210719467012529, + "loss": 0.8879, "step": 8980 }, { - "epoch": 0.2577711474187024, - "grad_norm": 0.396484375, - "learning_rate": 0.00018521557798312696, - "loss": 0.9467, + "epoch": 0.5155201101612256, + "grad_norm": 0.29296875, + "learning_rate": 0.0001120077852544521, + "loss": 1.0173, "step": 8985 }, { - "epoch": 0.2579145926871602, - "grad_norm": 0.37109375, - "learning_rate": 0.00018518936458629165, - "loss": 0.9692, + "epoch": 0.5158069883527454, + "grad_norm": 0.306640625, + "learning_rate": 0.00011190836379734495, + "loss": 0.99, "step": 8990 }, { - "epoch": 0.25805803795561805, - "grad_norm": 0.396484375, - "learning_rate": 0.00018516312982971023, - "loss": 1.0614, + "epoch": 0.5160938665442653, + "grad_norm": 0.263671875, + "learning_rate": 0.00011180893039850388, + "loss": 0.9472, "step": 8995 }, { - "epoch": 0.2582014832240759, - "grad_norm": 0.369140625, - "learning_rate": 0.00018513687371996058, - "loss": 0.8925, + "epoch": 0.5163807447357852, + "grad_norm": 0.279296875, + "learning_rate": 0.00011170948515764088, + "loss": 0.9285, "step": 9000 }, { - "epoch": 0.25834492849253365, - "grad_norm": 0.373046875, - "learning_rate": 0.00018511059626362598, - "loss": 0.8799, + "epoch": 0.5166676229273051, + "grad_norm": 0.28515625, + "learning_rate": 0.00011161002817447996, + "loss": 1.0298, "step": 9005 }, { - "epoch": 0.2584883737609915, - "grad_norm": 0.357421875, - "learning_rate": 0.000185084297467295, - "loss": 0.9366, + "epoch": 0.5169545011188249, + "grad_norm": 0.2578125, + "learning_rate": 0.00011151055954875673, + "loss": 0.8893, "step": 9010 }, { - "epoch": 0.2586318190294493, - "grad_norm": 0.353515625, - "learning_rate": 0.00018505797733756168, - "loss": 1.0114, + "epoch": 0.5172413793103449, + "grad_norm": 0.28125, + "learning_rate": 0.00011141107938021858, + "loss": 0.9669, "step": 9015 }, { - "epoch": 0.25877526429790715, - "grad_norm": 0.384765625, - "learning_rate": 0.0001850316358810253, - "loss": 0.9862, + "epoch": 0.5175282575018647, + "grad_norm": 0.283203125, + "learning_rate": 0.00011131158776862445, + "loss": 0.9924, "step": 9020 }, { - "epoch": 0.258918709566365, - "grad_norm": 0.435546875, - "learning_rate": 0.00018500527310429048, - "loss": 0.9614, + "epoch": 0.5178151356933846, + "grad_norm": 0.271484375, + "learning_rate": 0.0001112120848137447, + "loss": 0.9652, "step": 9025 }, { - "epoch": 0.25906215483482276, - "grad_norm": 0.388671875, - "learning_rate": 0.0001849788890139673, - "loss": 0.981, + "epoch": 0.5181020138849045, + "grad_norm": 0.251953125, + "learning_rate": 0.0001111125706153612, + "loss": 0.8898, "step": 9030 }, { - "epoch": 0.2592056001032806, - "grad_norm": 0.57421875, - "learning_rate": 0.00018495248361667104, - "loss": 1.0198, + "epoch": 0.5183888920764244, + "grad_norm": 0.296875, + "learning_rate": 0.00011101304527326695, + "loss": 0.9597, "step": 9035 }, { - "epoch": 0.2593490453717384, - "grad_norm": 0.388671875, - "learning_rate": 0.00018492605691902242, - "loss": 0.9742, + "epoch": 0.5186757702679442, + "grad_norm": 0.28125, + "learning_rate": 0.00011091350888726619, + "loss": 1.0008, "step": 9040 }, { - "epoch": 0.25949249064019625, - "grad_norm": 0.3828125, - "learning_rate": 0.0001848996089276475, - "loss": 0.8914, + "epoch": 0.5189626484594642, + "grad_norm": 0.294921875, + "learning_rate": 0.0001108139615571743, + "loss": 0.9604, "step": 9045 }, { - "epoch": 0.2596359359086541, - "grad_norm": 0.380859375, - "learning_rate": 0.00018487313964917761, - "loss": 1.0319, + "epoch": 0.519249526650984, + "grad_norm": 0.26171875, + "learning_rate": 0.00011071440338281745, + "loss": 0.9893, "step": 9050 }, { - "epoch": 0.25977938117711186, - "grad_norm": 0.392578125, - "learning_rate": 0.00018484664909024947, - "loss": 0.9642, + "epoch": 0.5195364048425039, + "grad_norm": 0.275390625, + "learning_rate": 0.00011061483446403289, + "loss": 0.9492, "step": 9055 }, { - "epoch": 0.2599228264455697, - "grad_norm": 0.37890625, - "learning_rate": 0.00018482013725750512, - "loss": 1.0034, + "epoch": 0.5198232830340238, + "grad_norm": 0.271484375, + "learning_rate": 0.00011051525490066852, + "loss": 0.9726, "step": 9060 }, { - "epoch": 0.2600662717140275, - "grad_norm": 0.423828125, - "learning_rate": 0.000184793604157592, - "loss": 1.0906, + "epoch": 0.5201101612255437, + "grad_norm": 0.271484375, + "learning_rate": 0.00011041566479258294, + "loss": 0.9528, "step": 9065 }, { - "epoch": 0.26020971698248535, - "grad_norm": 0.43359375, - "learning_rate": 0.00018476704979716275, - "loss": 1.0049, + "epoch": 0.5203970394170635, + "grad_norm": 0.267578125, + "learning_rate": 0.0001103160642396454, + "loss": 0.9567, "step": 9070 }, { - "epoch": 0.2603531622509431, - "grad_norm": 0.37109375, - "learning_rate": 0.00018474047418287544, - "loss": 0.995, + "epoch": 0.5206839176085833, + "grad_norm": 0.2734375, + "learning_rate": 0.00011021645334173547, + "loss": 0.9617, "step": 9075 }, { - "epoch": 0.26049660751940096, - "grad_norm": 0.40625, - "learning_rate": 0.00018471387732139344, - "loss": 0.9878, + "epoch": 0.5209707958001033, + "grad_norm": 0.283203125, + "learning_rate": 0.00011011683219874323, + "loss": 0.9781, "step": 9080 }, { - "epoch": 0.2606400527878588, - "grad_norm": 0.41015625, - "learning_rate": 0.00018468725921938543, - "loss": 1.0129, + "epoch": 0.5212576739916231, + "grad_norm": 0.291015625, + "learning_rate": 0.00011001720091056897, + "loss": 0.9156, "step": 9085 }, { - "epoch": 0.2607834980563166, - "grad_norm": 0.384765625, - "learning_rate": 0.00018466061988352546, - "loss": 1.0047, + "epoch": 0.521544552183143, + "grad_norm": 0.240234375, + "learning_rate": 0.00010991755957712318, + "loss": 0.9074, "step": 9090 }, { - "epoch": 0.26092694332477445, - "grad_norm": 0.388671875, - "learning_rate": 0.00018463395932049288, - "loss": 0.963, + "epoch": 0.521831430374663, + "grad_norm": 0.26171875, + "learning_rate": 0.00010981790829832641, + "loss": 0.9704, "step": 9095 }, { - "epoch": 0.2610703885932322, - "grad_norm": 0.392578125, - "learning_rate": 0.00018460727753697234, - "loss": 0.9702, + "epoch": 0.5221183085661828, + "grad_norm": 0.271484375, + "learning_rate": 0.00010971824717410917, + "loss": 0.9449, "step": 9100 }, { - "epoch": 0.26121383386169006, - "grad_norm": 0.373046875, - "learning_rate": 0.00018458057453965383, - "loss": 0.9846, + "epoch": 0.5224051867577026, + "grad_norm": 0.251953125, + "learning_rate": 0.00010961857630441187, + "loss": 0.9592, "step": 9105 }, { - "epoch": 0.2613572791301479, - "grad_norm": 0.3828125, - "learning_rate": 0.00018455385033523268, - "loss": 0.8773, + "epoch": 0.5226920649492226, + "grad_norm": 0.26171875, + "learning_rate": 0.00010951889578918471, + "loss": 0.9461, "step": 9110 }, { - "epoch": 0.2615007243986057, - "grad_norm": 0.37109375, - "learning_rate": 0.00018452710493040953, - "loss": 0.9134, + "epoch": 0.5229789431407424, + "grad_norm": 0.2490234375, + "learning_rate": 0.00010941920572838747, + "loss": 0.9055, "step": 9115 }, { - "epoch": 0.26164416966706355, - "grad_norm": 0.3984375, - "learning_rate": 0.00018450033833189027, - "loss": 0.9453, + "epoch": 0.5232658213322623, + "grad_norm": 0.26953125, + "learning_rate": 0.00010931950622198965, + "loss": 0.9994, "step": 9120 }, { - "epoch": 0.26178761493552133, - "grad_norm": 0.408203125, - "learning_rate": 0.0001844735505463862, - "loss": 0.9285, + "epoch": 0.5235526995237823, + "grad_norm": 0.267578125, + "learning_rate": 0.00010921979736997006, + "loss": 0.9529, "step": 9125 }, { - "epoch": 0.26193106020397916, - "grad_norm": 0.3671875, - "learning_rate": 0.0001844467415806139, - "loss": 1.0174, + "epoch": 0.5238395777153021, + "grad_norm": 0.271484375, + "learning_rate": 0.000109120079272317, + "loss": 0.9471, "step": 9130 }, { - "epoch": 0.262074505472437, - "grad_norm": 0.42578125, - "learning_rate": 0.00018441991144129522, - "loss": 0.9898, + "epoch": 0.5241264559068219, + "grad_norm": 0.28125, + "learning_rate": 0.00010902035202902798, + "loss": 0.9856, "step": 9135 }, { - "epoch": 0.2622179507408948, - "grad_norm": 0.359375, - "learning_rate": 0.00018439306013515733, - "loss": 0.9631, + "epoch": 0.5244133340983419, + "grad_norm": 0.255859375, + "learning_rate": 0.00010892061574010972, + "loss": 1.0034, "step": 9140 }, { - "epoch": 0.26236139600935265, - "grad_norm": 0.412109375, - "learning_rate": 0.0001843661876689328, - "loss": 0.9607, + "epoch": 0.5247002122898617, + "grad_norm": 0.28125, + "learning_rate": 0.00010882087050557803, + "loss": 0.9817, "step": 9145 }, { - "epoch": 0.26250484127781043, - "grad_norm": 0.3984375, - "learning_rate": 0.00018433929404935935, - "loss": 1.0292, + "epoch": 0.5249870904813816, + "grad_norm": 0.291015625, + "learning_rate": 0.00010872111642545759, + "loss": 0.9766, "step": 9150 }, { - "epoch": 0.26264828654626826, - "grad_norm": 0.443359375, - "learning_rate": 0.00018431237928318015, - "loss": 1.0452, + "epoch": 0.5252739686729014, + "grad_norm": 0.26171875, + "learning_rate": 0.00010862135359978205, + "loss": 0.9329, "step": 9155 }, { - "epoch": 0.2627917318147261, - "grad_norm": 0.390625, - "learning_rate": 0.00018428544337714358, - "loss": 1.0279, + "epoch": 0.5255608468644214, + "grad_norm": 0.251953125, + "learning_rate": 0.00010852158212859378, + "loss": 0.9674, "step": 9160 }, { - "epoch": 0.2629351770831839, - "grad_norm": 0.36328125, - "learning_rate": 0.00018425848633800334, - "loss": 1.0396, + "epoch": 0.5258477250559412, + "grad_norm": 0.265625, + "learning_rate": 0.00010842180211194384, + "loss": 0.9627, "step": 9165 }, { - "epoch": 0.26307862235164176, - "grad_norm": 0.42578125, - "learning_rate": 0.00018423150817251845, - "loss": 0.9509, + "epoch": 0.5261346032474611, + "grad_norm": 0.283203125, + "learning_rate": 0.00010832201364989186, + "loss": 0.8957, "step": 9170 }, { - "epoch": 0.26322206762009953, - "grad_norm": 0.423828125, - "learning_rate": 0.0001842045088874532, - "loss": 1.0221, + "epoch": 0.526421481438981, + "grad_norm": 0.265625, + "learning_rate": 0.00010822221684250593, + "loss": 0.9555, "step": 9175 }, { - "epoch": 0.26336551288855736, - "grad_norm": 0.453125, - "learning_rate": 0.0001841774884895772, - "loss": 0.9436, + "epoch": 0.5267083596305009, + "grad_norm": 0.27734375, + "learning_rate": 0.00010812241178986254, + "loss": 0.9301, "step": 9180 }, { - "epoch": 0.2635089581570152, - "grad_norm": 0.396484375, - "learning_rate": 0.00018415044698566536, - "loss": 1.0424, + "epoch": 0.5269952378220207, + "grad_norm": 0.265625, + "learning_rate": 0.00010802259859204635, + "loss": 0.9894, "step": 9185 }, { - "epoch": 0.263652403425473, - "grad_norm": 0.421875, - "learning_rate": 0.00018412338438249782, - "loss": 0.9713, + "epoch": 0.5272821160135407, + "grad_norm": 0.375, + "learning_rate": 0.00010792277734915033, + "loss": 0.9212, "step": 9190 }, { - "epoch": 0.26379584869393086, - "grad_norm": 0.388671875, - "learning_rate": 0.00018409630068686012, - "loss": 0.9875, + "epoch": 0.5275689942050605, + "grad_norm": 0.2578125, + "learning_rate": 0.0001078229481612754, + "loss": 0.9731, "step": 9195 }, { - "epoch": 0.26393929396238863, - "grad_norm": 0.373046875, - "learning_rate": 0.00018406919590554296, - "loss": 0.9019, + "epoch": 0.5278558723965804, + "grad_norm": 0.2734375, + "learning_rate": 0.00010772311112853053, + "loss": 1.0153, "step": 9200 }, { - "epoch": 0.26408273923084646, - "grad_norm": 0.400390625, - "learning_rate": 0.00018404207004534242, - "loss": 1.0282, + "epoch": 0.5281427505881003, + "grad_norm": 0.271484375, + "learning_rate": 0.00010762326635103251, + "loss": 1.0092, "step": 9205 }, { - "epoch": 0.2642261844993043, - "grad_norm": 0.345703125, - "learning_rate": 0.00018401492311305985, - "loss": 0.8499, + "epoch": 0.5284296287796202, + "grad_norm": 0.28125, + "learning_rate": 0.00010752341392890587, + "loss": 0.997, "step": 9210 }, { - "epoch": 0.2643696297677621, - "grad_norm": 0.3828125, - "learning_rate": 0.00018398775511550182, - "loss": 0.9562, + "epoch": 0.52871650697114, + "grad_norm": 0.26953125, + "learning_rate": 0.00010742355396228287, + "loss": 0.9526, "step": 9215 }, { - "epoch": 0.26451307503621996, - "grad_norm": 0.396484375, - "learning_rate": 0.00018396056605948032, - "loss": 0.9743, + "epoch": 0.52900338516266, + "grad_norm": 0.263671875, + "learning_rate": 0.00010732368655130333, + "loss": 0.8901, "step": 9220 }, { - "epoch": 0.26465652030467773, - "grad_norm": 0.392578125, - "learning_rate": 0.00018393335595181243, - "loss": 1.0199, + "epoch": 0.5292902633541798, + "grad_norm": 0.283203125, + "learning_rate": 0.00010722381179611449, + "loss": 1.0495, "step": 9225 }, { - "epoch": 0.26479996557313557, - "grad_norm": 0.419921875, - "learning_rate": 0.00018390612479932066, - "loss": 1.0068, + "epoch": 0.5295771415456997, + "grad_norm": 0.28125, + "learning_rate": 0.000107123929796871, + "loss": 0.8588, "step": 9230 }, { - "epoch": 0.2649434108415934, - "grad_norm": 0.431640625, - "learning_rate": 0.00018387887260883274, - "loss": 1.0191, + "epoch": 0.5298640197372195, + "grad_norm": 0.267578125, + "learning_rate": 0.0001070240406537347, + "loss": 0.9422, "step": 9235 }, { - "epoch": 0.2650868561100512, - "grad_norm": 0.38671875, - "learning_rate": 0.00018385159938718172, - "loss": 1.01, + "epoch": 0.5301508979287395, + "grad_norm": 0.265625, + "learning_rate": 0.00010692414446687471, + "loss": 0.9645, "step": 9240 }, { - "epoch": 0.265230301378509, - "grad_norm": 0.400390625, - "learning_rate": 0.00018382430514120578, - "loss": 0.986, + "epoch": 0.5304377761202593, + "grad_norm": 0.275390625, + "learning_rate": 0.0001068242413364671, + "loss": 0.9756, "step": 9245 }, { - "epoch": 0.26537374664696683, - "grad_norm": 0.435546875, - "learning_rate": 0.00018379698987774858, - "loss": 1.0055, + "epoch": 0.5307246543117792, + "grad_norm": 0.291015625, + "learning_rate": 0.00010672433136269499, + "loss": 0.9063, "step": 9250 }, { - "epoch": 0.26551719191542467, - "grad_norm": 0.419921875, - "learning_rate": 0.00018376965360365893, - "loss": 0.9248, + "epoch": 0.5310115325032991, + "grad_norm": 0.302734375, + "learning_rate": 0.00010662441464574833, + "loss": 0.9121, "step": 9255 }, { - "epoch": 0.2656606371838825, - "grad_norm": 0.416015625, - "learning_rate": 0.00018374229632579087, - "loss": 0.9515, + "epoch": 0.531298410694819, + "grad_norm": 0.296875, + "learning_rate": 0.00010652449128582376, + "loss": 0.9811, "step": 9260 }, { - "epoch": 0.26580408245234033, - "grad_norm": 0.439453125, - "learning_rate": 0.00018371491805100379, - "loss": 0.9545, + "epoch": 0.5315852888863388, + "grad_norm": 0.26171875, + "learning_rate": 0.00010642456138312473, + "loss": 0.9502, "step": 9265 }, { - "epoch": 0.2659475277207981, - "grad_norm": 0.435546875, - "learning_rate": 0.00018368751878616234, - "loss": 0.9881, + "epoch": 0.5318721670778588, + "grad_norm": 0.26953125, + "learning_rate": 0.00010632462503786114, + "loss": 0.9152, "step": 9270 }, { - "epoch": 0.26609097298925594, - "grad_norm": 0.412109375, - "learning_rate": 0.00018366009853813635, - "loss": 0.9277, + "epoch": 0.5321590452693786, + "grad_norm": 0.265625, + "learning_rate": 0.00010622468235024936, + "loss": 0.9361, "step": 9275 }, { - "epoch": 0.26623441825771377, - "grad_norm": 0.416015625, - "learning_rate": 0.00018363265731380102, - "loss": 0.9162, + "epoch": 0.5324459234608985, + "grad_norm": 0.28515625, + "learning_rate": 0.00010612473342051219, + "loss": 0.8926, "step": 9280 }, { - "epoch": 0.2663778635261716, - "grad_norm": 0.3671875, - "learning_rate": 0.00018360519512003673, - "loss": 0.9547, + "epoch": 0.5327328016524184, + "grad_norm": 0.25, + "learning_rate": 0.00010602477834887858, + "loss": 0.8839, "step": 9285 }, { - "epoch": 0.26652130879462943, - "grad_norm": 0.4140625, - "learning_rate": 0.00018357771196372916, - "loss": 0.9659, + "epoch": 0.5330196798439383, + "grad_norm": 0.255859375, + "learning_rate": 0.00010592481723558374, + "loss": 0.9026, "step": 9290 }, { - "epoch": 0.2666647540630872, - "grad_norm": 0.408203125, - "learning_rate": 0.00018355020785176923, - "loss": 0.9653, + "epoch": 0.5333065580354581, + "grad_norm": 0.267578125, + "learning_rate": 0.00010582485018086891, + "loss": 0.9264, "step": 9295 }, { - "epoch": 0.26680819933154504, - "grad_norm": 0.392578125, - "learning_rate": 0.00018352268279105314, - "loss": 0.8918, + "epoch": 0.5335934362269781, + "grad_norm": 0.263671875, + "learning_rate": 0.00010572487728498127, + "loss": 0.9399, "step": 9300 }, { - "epoch": 0.26695164460000287, - "grad_norm": 0.404296875, - "learning_rate": 0.00018349513678848225, - "loss": 0.9319, + "epoch": 0.5338803144184979, + "grad_norm": 0.275390625, + "learning_rate": 0.00010562489864817382, + "loss": 0.9237, "step": 9305 }, { - "epoch": 0.2670950898684607, - "grad_norm": 0.390625, - "learning_rate": 0.0001834675698509633, - "loss": 0.9712, + "epoch": 0.5341671926100178, + "grad_norm": 0.267578125, + "learning_rate": 0.00010552491437070537, + "loss": 0.9861, "step": 9310 }, { - "epoch": 0.26723853513691853, - "grad_norm": 0.357421875, - "learning_rate": 0.0001834399819854082, - "loss": 0.9827, + "epoch": 0.5344540708015376, + "grad_norm": 0.291015625, + "learning_rate": 0.00010542492455284043, + "loss": 0.944, "step": 9315 }, { - "epoch": 0.2673819804053763, - "grad_norm": 0.369140625, - "learning_rate": 0.0001834123731987341, - "loss": 0.9605, + "epoch": 0.5347409489930576, + "grad_norm": 0.25390625, + "learning_rate": 0.00010532492929484898, + "loss": 0.9637, "step": 9320 }, { - "epoch": 0.26752542567383414, - "grad_norm": 0.39453125, - "learning_rate": 0.0001833847434978635, - "loss": 0.8734, + "epoch": 0.5350278271845774, + "grad_norm": 0.2578125, + "learning_rate": 0.00010522492869700648, + "loss": 0.9383, "step": 9325 }, { - "epoch": 0.26766887094229197, - "grad_norm": 0.384765625, - "learning_rate": 0.00018335709288972395, - "loss": 1.0493, + "epoch": 0.5353147053760973, + "grad_norm": 0.2578125, + "learning_rate": 0.00010512492285959382, + "loss": 0.9444, "step": 9330 }, { - "epoch": 0.2678123162107498, - "grad_norm": 0.455078125, - "learning_rate": 0.00018332942138124845, - "loss": 0.9273, + "epoch": 0.5356015835676172, + "grad_norm": 0.28515625, + "learning_rate": 0.00010502491188289695, + "loss": 0.9563, "step": 9335 }, { - "epoch": 0.26795576147920763, - "grad_norm": 0.357421875, - "learning_rate": 0.00018330172897937513, - "loss": 0.9193, + "epoch": 0.5358884617591371, + "grad_norm": 0.265625, + "learning_rate": 0.00010492489586720724, + "loss": 0.9869, "step": 9340 }, { - "epoch": 0.2680992067476654, - "grad_norm": 0.37890625, - "learning_rate": 0.00018327401569104735, - "loss": 0.9614, + "epoch": 0.5361753399506569, + "grad_norm": 0.271484375, + "learning_rate": 0.00010482487491282089, + "loss": 0.874, "step": 9345 }, { - "epoch": 0.26824265201612324, - "grad_norm": 0.3828125, - "learning_rate": 0.00018324628152321373, - "loss": 0.8992, + "epoch": 0.5364622181421769, + "grad_norm": 0.287109375, + "learning_rate": 0.00010472484912003913, + "loss": 1.016, "step": 9350 }, { - "epoch": 0.26838609728458107, - "grad_norm": 0.421875, - "learning_rate": 0.00018321852648282817, - "loss": 0.9797, + "epoch": 0.5367490963336967, + "grad_norm": 0.2734375, + "learning_rate": 0.00010462481858916812, + "loss": 0.9813, "step": 9355 }, { - "epoch": 0.2685295425530389, - "grad_norm": 0.353515625, - "learning_rate": 0.00018319075057684968, - "loss": 1.0422, + "epoch": 0.5370359745252166, + "grad_norm": 0.287109375, + "learning_rate": 0.0001045247834205186, + "loss": 0.9562, "step": 9360 }, { - "epoch": 0.26867298782149673, - "grad_norm": 0.412109375, - "learning_rate": 0.0001831629538122427, - "loss": 0.9832, + "epoch": 0.5373228527167365, + "grad_norm": 0.259765625, + "learning_rate": 0.00010442474371440618, + "loss": 0.9599, "step": 9365 }, { - "epoch": 0.2688164330899545, - "grad_norm": 0.37890625, - "learning_rate": 0.0001831351361959767, - "loss": 0.8659, + "epoch": 0.5376097309082564, + "grad_norm": 0.26171875, + "learning_rate": 0.00010432469957115083, + "loss": 0.9684, "step": 9370 }, { - "epoch": 0.26895987835841234, - "grad_norm": 0.427734375, - "learning_rate": 0.0001831072977350265, - "loss": 0.9263, + "epoch": 0.5378966090997762, + "grad_norm": 0.30859375, + "learning_rate": 0.00010422465109107702, + "loss": 0.982, "step": 9375 }, { - "epoch": 0.2691033236268702, - "grad_norm": 0.466796875, - "learning_rate": 0.0001830794384363721, - "loss": 0.975, + "epoch": 0.5381834872912962, + "grad_norm": 0.287109375, + "learning_rate": 0.00010412459837451367, + "loss": 1.018, "step": 9380 }, { - "epoch": 0.269246768895328, - "grad_norm": 0.453125, - "learning_rate": 0.0001830515583069987, - "loss": 1.0301, + "epoch": 0.538470365482816, + "grad_norm": 0.275390625, + "learning_rate": 0.00010402454152179377, + "loss": 0.9333, "step": 9385 }, { - "epoch": 0.26939021416378583, - "grad_norm": 0.404296875, - "learning_rate": 0.00018302365735389678, - "loss": 1.0179, + "epoch": 0.5387572436743359, + "grad_norm": 0.28125, + "learning_rate": 0.00010392448063325463, + "loss": 0.9124, "step": 9390 }, { - "epoch": 0.2695336594322436, - "grad_norm": 0.3671875, - "learning_rate": 0.00018299573558406204, - "loss": 0.9754, + "epoch": 0.5390441218658557, + "grad_norm": 0.283203125, + "learning_rate": 0.00010382441580923752, + "loss": 0.978, "step": 9395 }, { - "epoch": 0.26967710470070144, - "grad_norm": 0.421875, - "learning_rate": 0.00018296779300449535, - "loss": 1.0088, + "epoch": 0.5393310000573757, + "grad_norm": 0.287109375, + "learning_rate": 0.00010372434715008763, + "loss": 0.9982, "step": 9400 }, { - "epoch": 0.2698205499691593, - "grad_norm": 0.3984375, - "learning_rate": 0.00018293982962220282, - "loss": 0.9111, + "epoch": 0.5396178782488955, + "grad_norm": 0.25, + "learning_rate": 0.00010362427475615413, + "loss": 0.9829, "step": 9405 }, { - "epoch": 0.2699639952376171, - "grad_norm": 0.421875, - "learning_rate": 0.00018291184544419578, - "loss": 0.8484, + "epoch": 0.5399047564404154, + "grad_norm": 0.28125, + "learning_rate": 0.00010352419872778971, + "loss": 0.8918, "step": 9410 }, { - "epoch": 0.2701074405060749, - "grad_norm": 0.443359375, - "learning_rate": 0.0001828838404774908, - "loss": 1.0726, + "epoch": 0.5401916346319353, + "grad_norm": 0.263671875, + "learning_rate": 0.00010342411916535093, + "loss": 0.9446, "step": 9415 }, { - "epoch": 0.2702508857745327, - "grad_norm": 0.392578125, - "learning_rate": 0.00018285581472910964, - "loss": 1.0191, + "epoch": 0.5404785128234552, + "grad_norm": 0.263671875, + "learning_rate": 0.00010332403616919779, + "loss": 0.963, "step": 9420 }, { - "epoch": 0.27039433104299054, - "grad_norm": 0.4765625, - "learning_rate": 0.00018282776820607922, - "loss": 1.0502, + "epoch": 0.540765391014975, + "grad_norm": 0.298828125, + "learning_rate": 0.00010322394983969368, + "loss": 0.9848, "step": 9425 }, { - "epoch": 0.2705377763114484, - "grad_norm": 0.376953125, - "learning_rate": 0.00018279970091543174, - "loss": 0.9665, + "epoch": 0.541052269206495, + "grad_norm": 0.333984375, + "learning_rate": 0.0001031238602772055, + "loss": 0.921, "step": 9430 }, { - "epoch": 0.2706812215799062, - "grad_norm": 0.458984375, - "learning_rate": 0.00018277161286420462, - "loss": 0.9942, + "epoch": 0.5413391473980148, + "grad_norm": 0.275390625, + "learning_rate": 0.00010302376758210319, + "loss": 0.9669, "step": 9435 }, { - "epoch": 0.270824666848364, - "grad_norm": 0.369140625, - "learning_rate": 0.0001827435040594404, - "loss": 0.909, + "epoch": 0.5416260255895347, + "grad_norm": 0.28125, + "learning_rate": 0.00010292367185475997, + "loss": 0.979, "step": 9440 }, { - "epoch": 0.2709681121168218, - "grad_norm": 0.435546875, - "learning_rate": 0.00018271537450818687, - "loss": 1.0662, + "epoch": 0.5419129037810546, + "grad_norm": 0.26171875, + "learning_rate": 0.00010282357319555207, + "loss": 0.963, "step": 9445 }, { - "epoch": 0.27111155738527964, - "grad_norm": 0.37109375, - "learning_rate": 0.00018268722421749703, - "loss": 0.9933, + "epoch": 0.5421997819725745, + "grad_norm": 0.259765625, + "learning_rate": 0.00010272347170485863, + "loss": 0.977, "step": 9450 }, { - "epoch": 0.2712550026537375, - "grad_norm": 0.333984375, - "learning_rate": 0.00018265905319442914, - "loss": 1.0221, + "epoch": 0.5424866601640943, + "grad_norm": 0.26953125, + "learning_rate": 0.00010262336748306165, + "loss": 0.9735, "step": 9455 }, { - "epoch": 0.2713984479221953, - "grad_norm": 0.36328125, - "learning_rate": 0.0001826308614460465, - "loss": 0.9344, + "epoch": 0.5427735383556143, + "grad_norm": 0.263671875, + "learning_rate": 0.0001025232606305459, + "loss": 0.9852, "step": 9460 }, { - "epoch": 0.2715418931906531, - "grad_norm": 0.37109375, - "learning_rate": 0.00018260264897941773, - "loss": 0.8846, + "epoch": 0.5430604165471341, + "grad_norm": 0.26171875, + "learning_rate": 0.00010242315124769872, + "loss": 0.9374, "step": 9465 }, { - "epoch": 0.2716853384591109, - "grad_norm": 0.451171875, - "learning_rate": 0.00018257441580161663, - "loss": 0.9441, + "epoch": 0.543347294738654, + "grad_norm": 0.28125, + "learning_rate": 0.00010232303943491004, + "loss": 1.0392, "step": 9470 }, { - "epoch": 0.27182878372756875, - "grad_norm": 0.416015625, - "learning_rate": 0.00018254616191972213, - "loss": 0.9638, + "epoch": 0.5436341729301738, + "grad_norm": 0.26171875, + "learning_rate": 0.00010222292529257217, + "loss": 0.9898, "step": 9475 }, { - "epoch": 0.2719722289960266, - "grad_norm": 0.400390625, - "learning_rate": 0.00018251788734081849, - "loss": 0.9605, + "epoch": 0.5439210511216938, + "grad_norm": 0.28125, + "learning_rate": 0.00010212280892107988, + "loss": 0.956, "step": 9480 }, { - "epoch": 0.2721156742644844, - "grad_norm": 0.369140625, - "learning_rate": 0.00018248959207199498, - "loss": 0.9351, + "epoch": 0.5442079293132136, + "grad_norm": 0.26953125, + "learning_rate": 0.00010202269042083001, + "loss": 0.9132, "step": 9485 }, { - "epoch": 0.2722591195329422, - "grad_norm": 0.408203125, - "learning_rate": 0.0001824612761203462, - "loss": 1.0168, + "epoch": 0.5444948075047334, + "grad_norm": 0.30859375, + "learning_rate": 0.00010192256989222169, + "loss": 0.9389, "step": 9490 }, { - "epoch": 0.2724025648014, - "grad_norm": 0.404296875, - "learning_rate": 0.00018243293949297184, - "loss": 1.1269, + "epoch": 0.5447816856962534, + "grad_norm": 0.267578125, + "learning_rate": 0.00010182244743565594, + "loss": 0.8723, "step": 9495 }, { - "epoch": 0.27254601006985785, - "grad_norm": 0.39453125, - "learning_rate": 0.00018240458219697685, - "loss": 0.9563, + "epoch": 0.5450685638877732, + "grad_norm": 0.322265625, + "learning_rate": 0.0001017223231515358, + "loss": 0.9266, "step": 9500 }, { - "epoch": 0.2726894553383157, - "grad_norm": 0.396484375, - "learning_rate": 0.00018237620423947127, - "loss": 0.9766, + "epoch": 0.5453554420792931, + "grad_norm": 0.271484375, + "learning_rate": 0.00010162219714026617, + "loss": 0.9138, "step": 9505 }, { - "epoch": 0.2728329006067735, - "grad_norm": 0.37109375, - "learning_rate": 0.00018234780562757045, - "loss": 0.961, + "epoch": 0.545642320270813, + "grad_norm": 0.275390625, + "learning_rate": 0.0001015220695022536, + "loss": 0.9252, "step": 9510 }, { - "epoch": 0.2729763458752313, - "grad_norm": 0.443359375, - "learning_rate": 0.00018231938636839478, - "loss": 1.0182, + "epoch": 0.5459291984623329, + "grad_norm": 0.25390625, + "learning_rate": 0.00010142194033790633, + "loss": 0.908, "step": 9515 }, { - "epoch": 0.2731197911436891, - "grad_norm": 0.359375, - "learning_rate": 0.00018229094646906997, - "loss": 0.9979, + "epoch": 0.5462160766538527, + "grad_norm": 0.265625, + "learning_rate": 0.0001013218097476341, + "loss": 0.9012, "step": 9520 }, { - "epoch": 0.27326323641214695, - "grad_norm": 0.380859375, - "learning_rate": 0.00018226248593672674, - "loss": 0.9187, + "epoch": 0.5465029548453727, + "grad_norm": 0.296875, + "learning_rate": 0.00010122167783184806, + "loss": 0.9661, "step": 9525 }, { - "epoch": 0.2734066816806048, - "grad_norm": 0.390625, - "learning_rate": 0.00018223400477850117, - "loss": 1.0352, + "epoch": 0.5467898330368925, + "grad_norm": 0.287109375, + "learning_rate": 0.00010112154469096078, + "loss": 0.9656, "step": 9530 }, { - "epoch": 0.2735501269490626, - "grad_norm": 0.4609375, - "learning_rate": 0.00018220550300153429, - "loss": 0.9398, + "epoch": 0.5470767112284124, + "grad_norm": 0.2734375, + "learning_rate": 0.00010102141042538597, + "loss": 0.9999, "step": 9535 }, { - "epoch": 0.2736935722175204, - "grad_norm": 0.43359375, - "learning_rate": 0.00018217698061297254, - "loss": 0.9111, + "epoch": 0.5473635894199324, + "grad_norm": 0.33984375, + "learning_rate": 0.0001009212751355385, + "loss": 0.8986, "step": 9540 }, { - "epoch": 0.2738370174859782, - "grad_norm": 0.421875, - "learning_rate": 0.00018214843761996734, - "loss": 0.9889, + "epoch": 0.5476504676114522, + "grad_norm": 0.251953125, + "learning_rate": 0.00010082113892183423, + "loss": 0.9011, "step": 9545 }, { - "epoch": 0.27398046275443605, - "grad_norm": 0.42578125, - "learning_rate": 0.00018211987402967536, - "loss": 0.98, + "epoch": 0.547937345802972, + "grad_norm": 0.248046875, + "learning_rate": 0.00010072100188469002, + "loss": 0.9079, "step": 9550 }, { - "epoch": 0.2741239080228939, - "grad_norm": 0.466796875, - "learning_rate": 0.00018209128984925847, - "loss": 0.9985, + "epoch": 0.5482242239944919, + "grad_norm": 0.259765625, + "learning_rate": 0.00010062086412452352, + "loss": 0.9173, "step": 9555 }, { - "epoch": 0.2742673532913517, - "grad_norm": 0.3984375, - "learning_rate": 0.0001820626850858836, - "loss": 0.9078, + "epoch": 0.5485111021860118, + "grad_norm": 0.2578125, + "learning_rate": 0.00010052072574175306, + "loss": 0.9978, "step": 9560 }, { - "epoch": 0.2744107985598095, - "grad_norm": 0.3984375, - "learning_rate": 0.0001820340597467229, - "loss": 0.8734, + "epoch": 0.5487979803775317, + "grad_norm": 0.267578125, + "learning_rate": 0.00010042058683679769, + "loss": 0.9124, "step": 9565 }, { - "epoch": 0.2745542438282673, - "grad_norm": 0.384765625, - "learning_rate": 0.00018200541383895367, - "loss": 1.0133, + "epoch": 0.5490848585690515, + "grad_norm": 0.26953125, + "learning_rate": 0.00010032044751007685, + "loss": 0.9591, "step": 9570 }, { - "epoch": 0.27469768909672515, - "grad_norm": 0.466796875, - "learning_rate": 0.0001819767473697584, - "loss": 0.9061, + "epoch": 0.5493717367605715, + "grad_norm": 0.265625, + "learning_rate": 0.00010022030786201058, + "loss": 0.9951, "step": 9575 }, { - "epoch": 0.274841134365183, - "grad_norm": 0.40625, - "learning_rate": 0.0001819480603463247, - "loss": 0.9963, + "epoch": 0.5496586149520913, + "grad_norm": 0.26953125, + "learning_rate": 0.00010012016799301907, + "loss": 0.9088, "step": 9580 }, { - "epoch": 0.27498457963364076, - "grad_norm": 0.408203125, - "learning_rate": 0.00018191935277584532, - "loss": 1.0722, + "epoch": 0.5499454931436112, + "grad_norm": 0.259765625, + "learning_rate": 0.00010002002800352281, + "loss": 0.9513, "step": 9585 }, { - "epoch": 0.2751280249020986, - "grad_norm": 0.388671875, - "learning_rate": 0.00018189062466551824, - "loss": 1.0137, + "epoch": 0.5502323713351311, + "grad_norm": 0.255859375, + "learning_rate": 9.991988799394245e-05, + "loss": 0.8671, "step": 9590 }, { - "epoch": 0.2752714701705564, - "grad_norm": 0.3671875, - "learning_rate": 0.00018186187602254643, - "loss": 0.9072, + "epoch": 0.550519249526651, + "grad_norm": 0.26953125, + "learning_rate": 9.981974806469858e-05, + "loss": 0.9622, "step": 9595 }, { - "epoch": 0.27541491543901425, - "grad_norm": 0.369140625, - "learning_rate": 0.0001818331068541382, - "loss": 1.0006, + "epoch": 0.5508061277181708, + "grad_norm": 0.26953125, + "learning_rate": 9.971960831621173e-05, + "loss": 0.9097, "step": 9600 }, { - "epoch": 0.2755583607074721, - "grad_norm": 0.376953125, - "learning_rate": 0.00018180431716750687, - "loss": 1.0279, + "epoch": 0.5510930059096908, + "grad_norm": 0.28515625, + "learning_rate": 9.961946884890232e-05, + "loss": 1.0333, "step": 9605 }, { - "epoch": 0.27570180597592986, - "grad_norm": 0.400390625, - "learning_rate": 0.00018177550696987096, - "loss": 0.9158, + "epoch": 0.5513798841012106, + "grad_norm": 0.275390625, + "learning_rate": 9.951932976319041e-05, + "loss": 0.9286, "step": 9610 }, { - "epoch": 0.2758452512443877, - "grad_norm": 0.404296875, - "learning_rate": 0.00018174667626845415, - "loss": 0.9339, + "epoch": 0.5516667622927305, + "grad_norm": 0.26171875, + "learning_rate": 9.941919115949565e-05, + "loss": 0.8962, "step": 9615 }, { - "epoch": 0.2759886965128455, - "grad_norm": 0.416015625, - "learning_rate": 0.0001817178250704852, - "loss": 0.9143, + "epoch": 0.5519536404842504, + "grad_norm": 0.30078125, + "learning_rate": 9.931905313823733e-05, + "loss": 1.0276, "step": 9620 }, { - "epoch": 0.27613214178130335, - "grad_norm": 0.380859375, - "learning_rate": 0.00018168895338319807, - "loss": 0.917, + "epoch": 0.5522405186757703, + "grad_norm": 0.271484375, + "learning_rate": 9.921891579983404e-05, + "loss": 0.9283, "step": 9625 }, { - "epoch": 0.2762755870497612, - "grad_norm": 0.36328125, - "learning_rate": 0.00018166006121383185, - "loss": 0.9366, + "epoch": 0.5525273968672901, + "grad_norm": 0.265625, + "learning_rate": 9.911877924470373e-05, + "loss": 0.9175, "step": 9630 }, { - "epoch": 0.27641903231821896, - "grad_norm": 0.3671875, - "learning_rate": 0.00018163114856963066, - "loss": 0.9442, + "epoch": 0.55281427505881, + "grad_norm": 0.267578125, + "learning_rate": 9.901864357326358e-05, + "loss": 0.9713, "step": 9635 }, { - "epoch": 0.2765624775866768, - "grad_norm": 0.408203125, - "learning_rate": 0.00018160221545784392, - "loss": 0.9923, + "epoch": 0.5531011532503299, + "grad_norm": 0.25390625, + "learning_rate": 9.891850888592987e-05, + "loss": 0.9024, "step": 9640 }, { - "epoch": 0.2767059228551346, - "grad_norm": 0.486328125, - "learning_rate": 0.0001815732618857261, - "loss": 1.1424, + "epoch": 0.5533880314418498, + "grad_norm": 0.330078125, + "learning_rate": 9.881837528311787e-05, + "loss": 0.9778, "step": 9645 }, { - "epoch": 0.27684936812359245, - "grad_norm": 0.400390625, - "learning_rate": 0.00018154428786053677, - "loss": 1.1996, + "epoch": 0.5536749096333696, + "grad_norm": 0.263671875, + "learning_rate": 9.871824286524175e-05, + "loss": 1.0332, "step": 9650 }, { - "epoch": 0.2769928133920503, - "grad_norm": 0.412109375, - "learning_rate": 0.00018151529338954068, - "loss": 0.9609, + "epoch": 0.5539617878248896, + "grad_norm": 0.2734375, + "learning_rate": 9.861811173271459e-05, + "loss": 0.9709, "step": 9655 }, { - "epoch": 0.27713625866050806, - "grad_norm": 0.400390625, - "learning_rate": 0.00018148627848000768, - "loss": 0.9338, + "epoch": 0.5542486660164094, + "grad_norm": 0.30078125, + "learning_rate": 9.851798198594809e-05, + "loss": 0.9844, "step": 9660 }, { - "epoch": 0.2772797039289659, - "grad_norm": 0.408203125, - "learning_rate": 0.00018145724313921275, - "loss": 0.9318, + "epoch": 0.5545355442079293, + "grad_norm": 0.275390625, + "learning_rate": 9.841785372535254e-05, + "loss": 0.9886, "step": 9665 }, { - "epoch": 0.2774231491974237, - "grad_norm": 0.400390625, - "learning_rate": 0.00018142818737443603, - "loss": 0.9327, + "epoch": 0.5548224223994492, + "grad_norm": 0.2734375, + "learning_rate": 9.831772705133685e-05, + "loss": 0.9194, "step": 9670 }, { - "epoch": 0.27756659446588156, - "grad_norm": 0.412109375, - "learning_rate": 0.00018139911119296266, - "loss": 1.0018, + "epoch": 0.5551093005909691, + "grad_norm": 0.2578125, + "learning_rate": 9.821760206430825e-05, + "loss": 1.0253, "step": 9675 }, { - "epoch": 0.2777100397343394, - "grad_norm": 0.37890625, - "learning_rate": 0.00018137001460208309, - "loss": 1.0402, + "epoch": 0.5553961787824889, + "grad_norm": 0.271484375, + "learning_rate": 9.811747886467226e-05, + "loss": 0.8885, "step": 9680 }, { - "epoch": 0.27785348500279716, - "grad_norm": 0.4453125, - "learning_rate": 0.0001813408976090927, - "loss": 0.9646, + "epoch": 0.5556830569740089, + "grad_norm": 0.2734375, + "learning_rate": 9.801735755283273e-05, + "loss": 0.9267, "step": 9685 }, { - "epoch": 0.277996930271255, - "grad_norm": 0.404296875, - "learning_rate": 0.00018131176022129214, - "loss": 0.9245, + "epoch": 0.5559699351655287, + "grad_norm": 0.25390625, + "learning_rate": 9.791723822919149e-05, + "loss": 0.9513, "step": 9690 }, { - "epoch": 0.2781403755397128, - "grad_norm": 0.39453125, - "learning_rate": 0.00018128260244598706, - "loss": 0.8858, + "epoch": 0.5562568133570486, + "grad_norm": 0.263671875, + "learning_rate": 9.781712099414842e-05, + "loss": 0.9175, "step": 9695 }, { - "epoch": 0.27828382080817066, - "grad_norm": 0.38671875, - "learning_rate": 0.00018125342429048825, - "loss": 0.988, + "epoch": 0.5565436915485685, + "grad_norm": 0.267578125, + "learning_rate": 9.771700594810128e-05, + "loss": 0.9682, "step": 9700 }, { - "epoch": 0.2784272660766285, - "grad_norm": 0.39453125, - "learning_rate": 0.00018122422576211163, - "loss": 1.0528, + "epoch": 0.5568305697400884, + "grad_norm": 0.2578125, + "learning_rate": 9.761689319144573e-05, + "loss": 0.8967, "step": 9705 }, { - "epoch": 0.27857071134508626, - "grad_norm": 0.3828125, - "learning_rate": 0.00018119500686817824, - "loss": 0.9126, + "epoch": 0.5571174479316082, + "grad_norm": 0.265625, + "learning_rate": 9.751678282457501e-05, + "loss": 0.8832, "step": 9710 }, { - "epoch": 0.2787141566135441, - "grad_norm": 0.392578125, - "learning_rate": 0.00018116576761601421, - "loss": 1.0077, + "epoch": 0.5574043261231281, + "grad_norm": 0.263671875, + "learning_rate": 9.741667494788003e-05, + "loss": 0.9154, "step": 9715 }, { - "epoch": 0.2788576018820019, - "grad_norm": 0.40234375, - "learning_rate": 0.00018113650801295073, - "loss": 0.8798, + "epoch": 0.557691204314648, + "grad_norm": 0.259765625, + "learning_rate": 9.731656966174924e-05, + "loss": 0.9135, "step": 9720 }, { - "epoch": 0.27900104715045976, - "grad_norm": 0.392578125, - "learning_rate": 0.0001811072280663242, - "loss": 0.9886, + "epoch": 0.5579780825061679, + "grad_norm": 0.267578125, + "learning_rate": 9.721646706656839e-05, + "loss": 0.9047, "step": 9725 }, { - "epoch": 0.2791444924189176, - "grad_norm": 0.3515625, - "learning_rate": 0.000181077927783476, - "loss": 0.9437, + "epoch": 0.5582649606976877, + "grad_norm": 0.271484375, + "learning_rate": 9.71163672627206e-05, + "loss": 1.0324, "step": 9730 }, { - "epoch": 0.27928793768737536, - "grad_norm": 0.396484375, - "learning_rate": 0.00018104860717175266, - "loss": 0.9421, + "epoch": 0.5585518388892077, + "grad_norm": 0.2890625, + "learning_rate": 9.70162703505862e-05, + "loss": 0.8719, "step": 9735 }, { - "epoch": 0.2794313829558332, - "grad_norm": 0.390625, - "learning_rate": 0.00018101926623850586, - "loss": 0.9371, + "epoch": 0.5588387170807275, + "grad_norm": 0.26953125, + "learning_rate": 9.69161764305426e-05, + "loss": 0.9669, "step": 9740 }, { - "epoch": 0.279574828224291, - "grad_norm": 0.40234375, - "learning_rate": 0.00018098990499109232, - "loss": 0.9772, + "epoch": 0.5591255952722474, + "grad_norm": 0.275390625, + "learning_rate": 9.681608560296413e-05, + "loss": 0.9382, "step": 9745 }, { - "epoch": 0.27971827349274886, - "grad_norm": 0.39453125, - "learning_rate": 0.00018096052343687382, - "loss": 0.979, + "epoch": 0.5594124734637673, + "grad_norm": 0.267578125, + "learning_rate": 9.671599796822223e-05, + "loss": 0.9281, "step": 9750 }, { - "epoch": 0.27986171876120663, - "grad_norm": 0.384765625, - "learning_rate": 0.00018093112158321727, - "loss": 0.9196, + "epoch": 0.5596993516552872, + "grad_norm": 0.271484375, + "learning_rate": 9.661591362668491e-05, + "loss": 0.9439, "step": 9755 }, { - "epoch": 0.28000516402966447, - "grad_norm": 0.431640625, - "learning_rate": 0.00018090169943749476, - "loss": 1.0039, + "epoch": 0.559986229846807, + "grad_norm": 0.283203125, + "learning_rate": 9.651583267871697e-05, + "loss": 0.9541, "step": 9760 }, { - "epoch": 0.2801486092981223, - "grad_norm": 0.40625, - "learning_rate": 0.00018087225700708328, - "loss": 1.1294, + "epoch": 0.560273108038327, + "grad_norm": 0.2890625, + "learning_rate": 9.641575522467984e-05, + "loss": 0.974, "step": 9765 }, { - "epoch": 0.28029205456658013, - "grad_norm": 0.412109375, - "learning_rate": 0.00018084279429936504, - "loss": 1.0484, + "epoch": 0.5605599862298468, + "grad_norm": 0.328125, + "learning_rate": 9.631568136493142e-05, + "loss": 0.9612, "step": 9770 }, { - "epoch": 0.28043549983503796, - "grad_norm": 0.453125, - "learning_rate": 0.00018081331132172731, - "loss": 1.0496, + "epoch": 0.5608468644213667, + "grad_norm": 0.26953125, + "learning_rate": 9.621561119982598e-05, + "loss": 0.8891, "step": 9775 }, { - "epoch": 0.28057894510349574, - "grad_norm": 0.34765625, - "learning_rate": 0.00018078380808156245, - "loss": 0.9254, + "epoch": 0.5611337426128866, + "grad_norm": 0.275390625, + "learning_rate": 9.61155448297141e-05, + "loss": 1.0655, "step": 9780 }, { - "epoch": 0.28072239037195357, - "grad_norm": 0.392578125, - "learning_rate": 0.00018075428458626778, - "loss": 0.9821, + "epoch": 0.5614206208044065, + "grad_norm": 0.26171875, + "learning_rate": 9.60154823549426e-05, + "loss": 0.9472, "step": 9785 }, { - "epoch": 0.2808658356404114, - "grad_norm": 0.53125, - "learning_rate": 0.00018072474084324593, - "loss": 0.9728, + "epoch": 0.5617074989959263, + "grad_norm": 0.25390625, + "learning_rate": 9.591542387585434e-05, + "loss": 0.9663, "step": 9790 }, { - "epoch": 0.28100928090886923, - "grad_norm": 0.462890625, - "learning_rate": 0.00018069517685990444, - "loss": 1.0213, + "epoch": 0.5619943771874462, + "grad_norm": 0.275390625, + "learning_rate": 9.581536949278814e-05, + "loss": 0.9145, "step": 9795 }, { - "epoch": 0.28115272617732706, - "grad_norm": 0.384765625, - "learning_rate": 0.00018066559264365593, - "loss": 1.0018, + "epoch": 0.5622812553789661, + "grad_norm": 0.271484375, + "learning_rate": 9.571531930607884e-05, + "loss": 0.9514, "step": 9800 }, { - "epoch": 0.28129617144578484, - "grad_norm": 0.435546875, - "learning_rate": 0.00018063598820191814, - "loss": 0.9674, + "epoch": 0.562568133570486, + "grad_norm": 0.279296875, + "learning_rate": 9.561527341605691e-05, + "loss": 0.9254, "step": 9805 }, { - "epoch": 0.28143961671424267, - "grad_norm": 0.416015625, - "learning_rate": 0.00018060636354211385, - "loss": 0.9548, + "epoch": 0.5628550117620058, + "grad_norm": 0.28125, + "learning_rate": 9.551523192304863e-05, + "loss": 0.9761, "step": 9810 }, { - "epoch": 0.2815830619827005, - "grad_norm": 0.375, - "learning_rate": 0.00018057671867167098, - "loss": 0.9385, + "epoch": 0.5631418899535258, + "grad_norm": 0.265625, + "learning_rate": 9.541519492737586e-05, + "loss": 0.959, "step": 9815 }, { - "epoch": 0.28172650725115833, - "grad_norm": 0.3984375, - "learning_rate": 0.0001805470535980224, - "loss": 0.9969, + "epoch": 0.5634287681450456, + "grad_norm": 0.265625, + "learning_rate": 9.531516252935588e-05, + "loss": 0.993, "step": 9820 }, { - "epoch": 0.28186995251961616, - "grad_norm": 0.423828125, - "learning_rate": 0.00018051736832860616, - "loss": 0.979, + "epoch": 0.5637156463365655, + "grad_norm": 0.251953125, + "learning_rate": 9.521513482930144e-05, + "loss": 0.8727, "step": 9825 }, { - "epoch": 0.28201339778807394, - "grad_norm": 0.375, - "learning_rate": 0.0001804876628708653, - "loss": 0.9118, + "epoch": 0.5640025245280854, + "grad_norm": 0.2734375, + "learning_rate": 9.511511192752049e-05, + "loss": 0.9709, "step": 9830 }, { - "epoch": 0.28215684305653177, - "grad_norm": 5.46875, - "learning_rate": 0.00018045793723224794, - "loss": 1.051, + "epoch": 0.5642894027196053, + "grad_norm": 0.263671875, + "learning_rate": 9.501509392431627e-05, + "loss": 0.9213, "step": 9835 }, { - "epoch": 0.2823002883249896, - "grad_norm": 0.41015625, - "learning_rate": 0.00018042819142020727, - "loss": 0.9687, + "epoch": 0.5645762809111251, + "grad_norm": 0.259765625, + "learning_rate": 9.491508091998707e-05, + "loss": 0.9336, "step": 9840 }, { - "epoch": 0.28244373359344743, - "grad_norm": 0.42578125, - "learning_rate": 0.00018039842544220159, - "loss": 0.9951, + "epoch": 0.5648631591026451, + "grad_norm": 0.275390625, + "learning_rate": 9.481507301482604e-05, + "loss": 0.9194, "step": 9845 }, { - "epoch": 0.28258717886190526, - "grad_norm": 0.400390625, - "learning_rate": 0.00018036863930569408, - "loss": 0.9572, + "epoch": 0.5651500372941649, + "grad_norm": 0.322265625, + "learning_rate": 9.471507030912151e-05, + "loss": 0.8929, "step": 9850 }, { - "epoch": 0.28273062413036304, - "grad_norm": 0.3984375, - "learning_rate": 0.00018033883301815318, - "loss": 0.9993, + "epoch": 0.5654369154856848, + "grad_norm": 0.26171875, + "learning_rate": 9.46150729031563e-05, + "loss": 0.9276, "step": 9855 }, { - "epoch": 0.28287406939882087, - "grad_norm": 0.396484375, - "learning_rate": 0.00018030900658705227, - "loss": 0.9684, + "epoch": 0.5657237936772047, + "grad_norm": 0.296875, + "learning_rate": 9.451508089720803e-05, + "loss": 0.9965, "step": 9860 }, { - "epoch": 0.2830175146672787, - "grad_norm": 0.376953125, - "learning_rate": 0.00018027916001986982, - "loss": 0.8756, + "epoch": 0.5660106718687246, + "grad_norm": 0.294921875, + "learning_rate": 9.441509439154895e-05, + "loss": 0.9921, "step": 9865 }, { - "epoch": 0.28316095993573653, - "grad_norm": 0.400390625, - "learning_rate": 0.00018024929332408933, - "loss": 0.9971, + "epoch": 0.5662975500602444, + "grad_norm": 0.265625, + "learning_rate": 9.431511348644575e-05, + "loss": 0.979, "step": 9870 }, { - "epoch": 0.28330440520419437, - "grad_norm": 0.40625, - "learning_rate": 0.00018021940650719934, - "loss": 0.9094, + "epoch": 0.5665844282517642, + "grad_norm": 0.26953125, + "learning_rate": 9.421513828215946e-05, + "loss": 0.9518, "step": 9875 }, { - "epoch": 0.28344785047265214, - "grad_norm": 0.421875, - "learning_rate": 0.00018018949957669347, - "loss": 1.0331, + "epoch": 0.5668713064432842, + "grad_norm": 0.267578125, + "learning_rate": 9.41151688789455e-05, + "loss": 0.9859, "step": 9880 }, { - "epoch": 0.28359129574110997, - "grad_norm": 0.435546875, - "learning_rate": 0.00018015957254007033, - "loss": 1.0062, + "epoch": 0.567158184634804, + "grad_norm": 0.259765625, + "learning_rate": 9.401520537705339e-05, + "loss": 0.9291, "step": 9885 }, { - "epoch": 0.2837347410095678, - "grad_norm": 0.4375, - "learning_rate": 0.00018012962540483364, - "loss": 0.9256, + "epoch": 0.5674450628263239, + "grad_norm": 0.259765625, + "learning_rate": 9.391524787672676e-05, + "loss": 0.8935, "step": 9890 }, { - "epoch": 0.28387818627802563, - "grad_norm": 0.380859375, - "learning_rate": 0.00018009965817849212, - "loss": 0.9644, + "epoch": 0.5677319410178439, + "grad_norm": 0.279296875, + "learning_rate": 9.381529647820314e-05, + "loss": 0.9242, "step": 9895 }, { - "epoch": 0.28402163154648347, - "grad_norm": 0.384765625, - "learning_rate": 0.00018006967086855948, - "loss": 0.9855, + "epoch": 0.5680188192093637, + "grad_norm": 0.29296875, + "learning_rate": 9.371535128171416e-05, + "loss": 0.9461, "step": 9900 }, { - "epoch": 0.28416507681494124, - "grad_norm": 0.4296875, - "learning_rate": 0.00018003966348255457, - "loss": 1.0787, + "epoch": 0.5683056974008835, + "grad_norm": 0.29296875, + "learning_rate": 9.361541238748496e-05, + "loss": 0.9622, "step": 9905 }, { - "epoch": 0.2843085220833991, - "grad_norm": 0.404296875, - "learning_rate": 0.00018000963602800117, - "loss": 0.9706, + "epoch": 0.5685925755924035, + "grad_norm": 0.26953125, + "learning_rate": 9.351547989573453e-05, + "loss": 0.9434, "step": 9910 }, { - "epoch": 0.2844519673518569, - "grad_norm": 0.3828125, - "learning_rate": 0.00017997958851242817, - "loss": 0.9621, + "epoch": 0.5688794537839234, + "grad_norm": 0.26953125, + "learning_rate": 9.341555390667542e-05, + "loss": 0.9533, "step": 9915 }, { - "epoch": 0.28459541262031474, - "grad_norm": 0.38671875, - "learning_rate": 0.00017994952094336946, - "loss": 0.9335, + "epoch": 0.5691663319754432, + "grad_norm": 0.259765625, + "learning_rate": 9.331563452051362e-05, + "loss": 0.9506, "step": 9920 }, { - "epoch": 0.2847388578887725, - "grad_norm": 0.4453125, - "learning_rate": 0.00017991943332836395, - "loss": 1.008, + "epoch": 0.5694532101669632, + "grad_norm": 0.251953125, + "learning_rate": 9.321572183744849e-05, + "loss": 0.9087, "step": 9925 }, { - "epoch": 0.28488230315723034, - "grad_norm": 0.390625, - "learning_rate": 0.0001798893256749556, - "loss": 0.9701, + "epoch": 0.569740088358483, + "grad_norm": 0.263671875, + "learning_rate": 9.311581595767273e-05, + "loss": 0.9403, "step": 9930 }, { - "epoch": 0.2850257484256882, - "grad_norm": 0.36328125, - "learning_rate": 0.00017985919799069335, - "loss": 0.9324, + "epoch": 0.5700269665500028, + "grad_norm": 0.255859375, + "learning_rate": 9.301591698137217e-05, + "loss": 0.9441, "step": 9935 }, { - "epoch": 0.285169193694146, - "grad_norm": 0.376953125, - "learning_rate": 0.0001798290502831312, - "loss": 0.9335, + "epoch": 0.5703138447415228, + "grad_norm": 0.26953125, + "learning_rate": 9.29160250087257e-05, + "loss": 0.9683, "step": 9940 }, { - "epoch": 0.28531263896260384, - "grad_norm": 0.431640625, - "learning_rate": 0.0001797988825598282, - "loss": 0.9618, + "epoch": 0.5706007229330426, + "grad_norm": 0.3046875, + "learning_rate": 9.281614013990526e-05, + "loss": 1.0119, "step": 9945 }, { - "epoch": 0.2854560842310616, - "grad_norm": 0.396484375, - "learning_rate": 0.0001797686948283483, - "loss": 0.9652, + "epoch": 0.5708876011245625, + "grad_norm": 0.26953125, + "learning_rate": 9.271626247507561e-05, + "loss": 0.9057, "step": 9950 }, { - "epoch": 0.28559952949951944, - "grad_norm": 0.373046875, - "learning_rate": 0.00017973848709626063, - "loss": 0.9204, + "epoch": 0.5711744793160823, + "grad_norm": 0.267578125, + "learning_rate": 9.261639211439427e-05, + "loss": 0.9917, "step": 9955 }, { - "epoch": 0.2857429747679773, - "grad_norm": 0.39453125, - "learning_rate": 0.00017970825937113923, - "loss": 0.9593, + "epoch": 0.5714613575076023, + "grad_norm": 0.271484375, + "learning_rate": 9.251652915801144e-05, + "loss": 0.9661, "step": 9960 }, { - "epoch": 0.2858864200364351, - "grad_norm": 0.44921875, - "learning_rate": 0.0001796780116605631, - "loss": 1.0397, + "epoch": 0.5717482356991221, + "grad_norm": 0.265625, + "learning_rate": 9.241667370607e-05, + "loss": 0.9123, "step": 9965 }, { - "epoch": 0.28602986530489294, - "grad_norm": 0.361328125, - "learning_rate": 0.00017964774397211643, - "loss": 0.9174, + "epoch": 0.572035113890642, + "grad_norm": 0.2578125, + "learning_rate": 9.231682585870514e-05, + "loss": 0.9157, "step": 9970 }, { - "epoch": 0.2861733105733507, - "grad_norm": 0.42578125, - "learning_rate": 0.00017961745631338828, - "loss": 0.9475, + "epoch": 0.572321992082162, + "grad_norm": 0.265625, + "learning_rate": 9.221698571604453e-05, + "loss": 0.9181, "step": 9975 }, { - "epoch": 0.28631675584180855, - "grad_norm": 0.39453125, - "learning_rate": 0.00017958714869197273, - "loss": 1.0292, + "epoch": 0.5726088702736818, + "grad_norm": 0.26953125, + "learning_rate": 9.211715337820811e-05, + "loss": 0.952, "step": 9980 }, { - "epoch": 0.2864602011102664, - "grad_norm": 0.404296875, - "learning_rate": 0.00017955682111546887, - "loss": 0.982, + "epoch": 0.5728957484652016, + "grad_norm": 0.255859375, + "learning_rate": 9.201732894530797e-05, + "loss": 0.9692, "step": 9985 }, { - "epoch": 0.2866036463787242, - "grad_norm": 0.390625, - "learning_rate": 0.00017952647359148087, - "loss": 0.9821, + "epoch": 0.5731826266567216, + "grad_norm": 0.2734375, + "learning_rate": 9.191751251744823e-05, + "loss": 0.9705, "step": 9990 }, { - "epoch": 0.28674709164718204, - "grad_norm": 0.392578125, - "learning_rate": 0.00017949610612761783, - "loss": 0.9567, + "epoch": 0.5734695048482414, + "grad_norm": 0.26171875, + "learning_rate": 9.181770419472509e-05, + "loss": 0.9723, "step": 9995 }, { - "epoch": 0.2868905369156398, - "grad_norm": 0.369140625, - "learning_rate": 0.00017946571873149377, - "loss": 0.9743, + "epoch": 0.5737563830397613, + "grad_norm": 0.265625, + "learning_rate": 9.171790407722656e-05, + "loss": 0.9305, "step": 10000 }, { - "epoch": 0.28703398218409765, - "grad_norm": 0.439453125, - "learning_rate": 0.00017943531141072794, - "loss": 1.1444, + "epoch": 0.5740432612312812, + "grad_norm": 0.29296875, + "learning_rate": 9.161811226503233e-05, + "loss": 0.8814, "step": 10005 }, { - "epoch": 0.2871774274525555, - "grad_norm": 0.412109375, - "learning_rate": 0.00017940488417294437, - "loss": 1.029, + "epoch": 0.5743301394228011, + "grad_norm": 0.26171875, + "learning_rate": 9.151832885821396e-05, + "loss": 0.9779, "step": 10010 }, { - "epoch": 0.2873208727210133, - "grad_norm": 0.357421875, - "learning_rate": 0.00017937443702577215, - "loss": 0.9045, + "epoch": 0.5746170176143209, + "grad_norm": 0.263671875, + "learning_rate": 9.141855395683444e-05, + "loss": 0.9638, "step": 10015 }, { - "epoch": 0.28746431798947114, - "grad_norm": 0.384765625, - "learning_rate": 0.00017934396997684537, - "loss": 0.9258, + "epoch": 0.5749038958058409, + "grad_norm": 0.28125, + "learning_rate": 9.131878766094822e-05, + "loss": 0.9235, "step": 10020 }, { - "epoch": 0.2876077632579289, - "grad_norm": 0.41796875, - "learning_rate": 0.00017931348303380315, - "loss": 0.9782, + "epoch": 0.5751907739973607, + "grad_norm": 0.26953125, + "learning_rate": 9.121903007060121e-05, + "loss": 0.9527, "step": 10025 }, { - "epoch": 0.28775120852638675, - "grad_norm": 0.373046875, - "learning_rate": 0.00017928297620428953, - "loss": 0.999, + "epoch": 0.5754776521888806, + "grad_norm": 0.2421875, + "learning_rate": 9.111928128583054e-05, + "loss": 0.9512, "step": 10030 }, { - "epoch": 0.2878946537948446, - "grad_norm": 0.41015625, - "learning_rate": 0.00017925244949595359, - "loss": 0.9946, + "epoch": 0.5757645303804004, + "grad_norm": 0.326171875, + "learning_rate": 9.101954140666451e-05, + "loss": 0.9147, "step": 10035 }, { - "epoch": 0.2880380990633024, - "grad_norm": 0.423828125, - "learning_rate": 0.00017922190291644934, - "loss": 0.9812, + "epoch": 0.5760514085719204, + "grad_norm": 0.26171875, + "learning_rate": 9.091981053312247e-05, + "loss": 0.8988, "step": 10040 }, { - "epoch": 0.28818154433176024, - "grad_norm": 0.453125, - "learning_rate": 0.0001791913364734358, - "loss": 1.0601, + "epoch": 0.5763382867634402, + "grad_norm": 0.251953125, + "learning_rate": 9.082008876521481e-05, + "loss": 0.9102, "step": 10045 }, { - "epoch": 0.288324989600218, - "grad_norm": 0.37109375, - "learning_rate": 0.00017916075017457698, - "loss": 0.852, + "epoch": 0.5766251649549601, + "grad_norm": 0.27734375, + "learning_rate": 9.072037620294275e-05, + "loss": 0.9565, "step": 10050 }, { - "epoch": 0.28846843486867585, - "grad_norm": 0.361328125, - "learning_rate": 0.00017913014402754187, - "loss": 0.9712, + "epoch": 0.57691204314648, + "grad_norm": 0.314453125, + "learning_rate": 9.06206729462982e-05, + "loss": 0.9252, "step": 10055 }, { - "epoch": 0.2886118801371337, - "grad_norm": 0.353515625, - "learning_rate": 0.00017909951804000445, - "loss": 0.9844, + "epoch": 0.5771989213379999, + "grad_norm": 0.2490234375, + "learning_rate": 9.052097909526388e-05, + "loss": 0.9098, "step": 10060 }, { - "epoch": 0.2887553254055915, - "grad_norm": 0.388671875, - "learning_rate": 0.0001790688722196436, - "loss": 0.993, + "epoch": 0.5774857995295197, + "grad_norm": 0.3125, + "learning_rate": 9.042129474981297e-05, + "loss": 0.9509, "step": 10065 }, { - "epoch": 0.28889877067404934, - "grad_norm": 0.37890625, - "learning_rate": 0.0001790382065741432, - "loss": 1.0034, + "epoch": 0.5777726777210397, + "grad_norm": 0.25390625, + "learning_rate": 9.032162000990914e-05, + "loss": 0.8907, "step": 10070 }, { - "epoch": 0.2890422159425071, - "grad_norm": 0.3671875, - "learning_rate": 0.00017900752111119225, - "loss": 0.9118, + "epoch": 0.5780595559125595, + "grad_norm": 0.265625, + "learning_rate": 9.02219549755065e-05, + "loss": 0.998, "step": 10075 }, { - "epoch": 0.28918566121096495, - "grad_norm": 0.37109375, - "learning_rate": 0.00017897681583848449, - "loss": 1.091, + "epoch": 0.5783464341040794, + "grad_norm": 0.275390625, + "learning_rate": 9.012229974654932e-05, + "loss": 0.9134, "step": 10080 }, { - "epoch": 0.2893291064794228, - "grad_norm": 0.37109375, - "learning_rate": 0.00017894609076371873, - "loss": 0.9516, + "epoch": 0.5786333122955993, + "grad_norm": 0.267578125, + "learning_rate": 9.002265442297212e-05, + "loss": 0.9198, "step": 10085 }, { - "epoch": 0.2894725517478806, - "grad_norm": 0.390625, - "learning_rate": 0.00017891534589459883, - "loss": 1.0515, + "epoch": 0.5789201904871192, + "grad_norm": 0.2578125, + "learning_rate": 8.99230191046994e-05, + "loss": 0.9489, "step": 10090 }, { - "epoch": 0.2896159970163384, - "grad_norm": 0.3671875, - "learning_rate": 0.00017888458123883343, - "loss": 0.9404, + "epoch": 0.579207068678639, + "grad_norm": 0.24609375, + "learning_rate": 8.982339389164575e-05, + "loss": 0.9774, "step": 10095 }, { - "epoch": 0.2897594422847962, - "grad_norm": 0.41015625, - "learning_rate": 0.00017885379680413627, - "loss": 0.968, + "epoch": 0.579493946870159, + "grad_norm": 0.26953125, + "learning_rate": 8.972377888371555e-05, + "loss": 0.922, "step": 10100 }, { - "epoch": 0.28990288755325405, - "grad_norm": 0.390625, - "learning_rate": 0.000178822992598226, - "loss": 0.9761, + "epoch": 0.5797808250616788, + "grad_norm": 0.265625, + "learning_rate": 8.962417418080285e-05, + "loss": 0.9175, "step": 10105 }, { - "epoch": 0.2900463328217119, - "grad_norm": 0.4765625, - "learning_rate": 0.0001787921686288263, - "loss": 0.9077, + "epoch": 0.5800677032531987, + "grad_norm": 0.26953125, + "learning_rate": 8.952457988279161e-05, + "loss": 0.9818, "step": 10110 }, { - "epoch": 0.2901897780901697, - "grad_norm": 0.3984375, - "learning_rate": 0.00017876132490366568, - "loss": 0.9058, + "epoch": 0.5803545814447185, + "grad_norm": 0.275390625, + "learning_rate": 8.942499608955516e-05, + "loss": 0.99, "step": 10115 }, { - "epoch": 0.2903332233586275, - "grad_norm": 0.396484375, - "learning_rate": 0.00017873046143047767, - "loss": 0.9505, + "epoch": 0.5806414596362385, + "grad_norm": 0.255859375, + "learning_rate": 8.93254229009563e-05, + "loss": 0.9201, "step": 10120 }, { - "epoch": 0.2904766686270853, - "grad_norm": 0.423828125, - "learning_rate": 0.00017869957821700077, - "loss": 0.963, + "epoch": 0.5809283378277583, + "grad_norm": 0.24609375, + "learning_rate": 8.922586041684732e-05, + "loss": 0.9708, "step": 10125 }, { - "epoch": 0.29062011389554315, - "grad_norm": 0.37109375, - "learning_rate": 0.00017866867527097837, - "loss": 1.0216, + "epoch": 0.5812152160192782, + "grad_norm": 0.244140625, + "learning_rate": 8.912630873706967e-05, + "loss": 0.9485, "step": 10130 }, { - "epoch": 0.290763559164001, - "grad_norm": 0.4296875, - "learning_rate": 0.00017863775260015886, - "loss": 0.9424, + "epoch": 0.5815020942107981, + "grad_norm": 0.271484375, + "learning_rate": 8.902676796145403e-05, + "loss": 0.9604, "step": 10135 }, { - "epoch": 0.2909070044324588, - "grad_norm": 0.40234375, - "learning_rate": 0.0001786068102122956, - "loss": 1.0044, + "epoch": 0.581788972402318, + "grad_norm": 0.263671875, + "learning_rate": 8.892723818982001e-05, + "loss": 0.9469, "step": 10140 }, { - "epoch": 0.2910504497009166, - "grad_norm": 0.359375, - "learning_rate": 0.00017857584811514681, - "loss": 0.8767, + "epoch": 0.5820758505938378, + "grad_norm": 0.2451171875, + "learning_rate": 8.882771952197642e-05, + "loss": 0.9425, "step": 10145 }, { - "epoch": 0.2911938949693744, - "grad_norm": 0.41015625, - "learning_rate": 0.00017854486631647569, - "loss": 1.0272, + "epoch": 0.5823627287853578, + "grad_norm": 0.27734375, + "learning_rate": 8.872821205772074e-05, + "loss": 0.932, "step": 10150 }, { - "epoch": 0.29133734023783225, - "grad_norm": 0.390625, - "learning_rate": 0.00017851386482405042, - "loss": 0.9842, + "epoch": 0.5826496069768776, + "grad_norm": 0.251953125, + "learning_rate": 8.862871589683924e-05, + "loss": 0.9634, "step": 10155 }, { - "epoch": 0.2914807855062901, - "grad_norm": 0.359375, - "learning_rate": 0.00017848284364564406, - "loss": 0.9002, + "epoch": 0.5829364851683975, + "grad_norm": 0.2578125, + "learning_rate": 8.8529231139107e-05, + "loss": 0.9211, "step": 10160 }, { - "epoch": 0.2916242307747479, - "grad_norm": 0.40234375, - "learning_rate": 0.0001784518027890346, - "loss": 1.0668, + "epoch": 0.5832233633599174, + "grad_norm": 0.27734375, + "learning_rate": 8.842975788428748e-05, + "loss": 0.9365, "step": 10165 }, { - "epoch": 0.2917676760432057, - "grad_norm": 0.427734375, - "learning_rate": 0.00017842074226200505, - "loss": 0.9678, + "epoch": 0.5835102415514373, + "grad_norm": 0.26171875, + "learning_rate": 8.833029623213267e-05, + "loss": 0.8496, "step": 10170 }, { - "epoch": 0.2919111213116635, - "grad_norm": 0.37890625, - "learning_rate": 0.00017838966207234328, - "loss": 0.8909, + "epoch": 0.5837971197429571, + "grad_norm": 0.271484375, + "learning_rate": 8.823084628238298e-05, + "loss": 0.9707, "step": 10175 }, { - "epoch": 0.29205456658012136, - "grad_norm": 0.3671875, - "learning_rate": 0.0001783585622278421, - "loss": 0.912, + "epoch": 0.5840839979344771, + "grad_norm": 0.2578125, + "learning_rate": 8.813140813476704e-05, + "loss": 0.9237, "step": 10180 }, { - "epoch": 0.2921980118485792, - "grad_norm": 0.384765625, - "learning_rate": 0.00017832744273629922, - "loss": 0.9663, + "epoch": 0.5843708761259969, + "grad_norm": 0.28125, + "learning_rate": 8.803198188900161e-05, + "loss": 0.9181, "step": 10185 }, { - "epoch": 0.292341457117037, - "grad_norm": 0.41796875, - "learning_rate": 0.00017829630360551737, - "loss": 0.9775, + "epoch": 0.5846577543175168, + "grad_norm": 0.2578125, + "learning_rate": 8.79325676447916e-05, + "loss": 0.9793, "step": 10190 }, { - "epoch": 0.2924849023854948, - "grad_norm": 0.40234375, - "learning_rate": 0.0001782651448433041, - "loss": 0.9619, + "epoch": 0.5849446325090366, + "grad_norm": 0.263671875, + "learning_rate": 8.783316550182982e-05, + "loss": 0.9985, "step": 10195 }, { - "epoch": 0.2926283476539526, - "grad_norm": 0.39453125, - "learning_rate": 0.0001782339664574719, - "loss": 0.9484, + "epoch": 0.5852315107005566, + "grad_norm": 0.244140625, + "learning_rate": 8.773377555979699e-05, + "loss": 0.9507, "step": 10200 }, { - "epoch": 0.29277179292241046, - "grad_norm": 0.431640625, - "learning_rate": 0.00017820276845583832, - "loss": 0.9576, + "epoch": 0.5855183888920764, + "grad_norm": 0.29296875, + "learning_rate": 8.763439791836145e-05, + "loss": 0.8859, "step": 10205 }, { - "epoch": 0.2929152381908683, - "grad_norm": 0.408203125, - "learning_rate": 0.00017817155084622562, - "loss": 0.9773, + "epoch": 0.5858052670835963, + "grad_norm": 0.283203125, + "learning_rate": 8.753503267717948e-05, + "loss": 0.9309, "step": 10210 }, { - "epoch": 0.2930586834593261, - "grad_norm": 0.447265625, - "learning_rate": 0.00017814031363646104, - "loss": 0.9233, + "epoch": 0.5860921452751162, + "grad_norm": 0.26953125, + "learning_rate": 8.743567993589466e-05, + "loss": 0.9578, "step": 10215 }, { - "epoch": 0.2932021287277839, - "grad_norm": 0.40625, - "learning_rate": 0.00017810905683437683, - "loss": 0.9796, + "epoch": 0.5863790234666361, + "grad_norm": 0.267578125, + "learning_rate": 8.733633979413817e-05, + "loss": 1.0202, "step": 10220 }, { - "epoch": 0.2933455739962417, - "grad_norm": 0.40234375, - "learning_rate": 0.0001780777804478101, - "loss": 1.0272, + "epoch": 0.5866659016581559, + "grad_norm": 0.271484375, + "learning_rate": 8.723701235152854e-05, + "loss": 0.986, "step": 10225 }, { - "epoch": 0.29348901926469956, - "grad_norm": 0.419921875, - "learning_rate": 0.0001780464844846028, - "loss": 1.0593, + "epoch": 0.5869527798496759, + "grad_norm": 0.2578125, + "learning_rate": 8.713769770767155e-05, + "loss": 0.8414, "step": 10230 }, { - "epoch": 0.2936324645331574, - "grad_norm": 0.396484375, - "learning_rate": 0.00017801516895260192, - "loss": 1.016, + "epoch": 0.5872396580411957, + "grad_norm": 0.265625, + "learning_rate": 8.703839596216012e-05, + "loss": 1.0018, "step": 10235 }, { - "epoch": 0.2937759098016152, - "grad_norm": 0.36328125, - "learning_rate": 0.00017798383385965918, - "loss": 0.9488, + "epoch": 0.5875265362327156, + "grad_norm": 0.271484375, + "learning_rate": 8.69391072145743e-05, + "loss": 0.9342, "step": 10240 }, { - "epoch": 0.293919355070073, - "grad_norm": 0.39453125, - "learning_rate": 0.0001779524792136314, - "loss": 0.9891, + "epoch": 0.5878134144242355, + "grad_norm": 0.255859375, + "learning_rate": 8.683983156448104e-05, + "loss": 0.8877, "step": 10245 }, { - "epoch": 0.2940628003385308, - "grad_norm": 0.431640625, - "learning_rate": 0.00017792110502238016, - "loss": 0.9707, + "epoch": 0.5881002926157554, + "grad_norm": 0.291015625, + "learning_rate": 8.67405691114342e-05, + "loss": 0.9437, "step": 10250 }, { - "epoch": 0.29420624560698866, - "grad_norm": 0.37890625, - "learning_rate": 0.000177889711293772, - "loss": 1.0008, + "epoch": 0.5883871708072752, + "grad_norm": 0.259765625, + "learning_rate": 8.664131995497439e-05, + "loss": 0.9819, "step": 10255 }, { - "epoch": 0.2943496908754465, - "grad_norm": 0.396484375, - "learning_rate": 0.0001778582980356784, - "loss": 0.9703, + "epoch": 0.5886740489987952, + "grad_norm": 0.271484375, + "learning_rate": 8.654208419462893e-05, + "loss": 0.9168, "step": 10260 }, { - "epoch": 0.29449313614390427, - "grad_norm": 0.404296875, - "learning_rate": 0.00017782686525597558, - "loss": 1.025, + "epoch": 0.588960927190315, + "grad_norm": 0.25390625, + "learning_rate": 8.644286192991158e-05, + "loss": 0.9045, "step": 10265 }, { - "epoch": 0.2946365814123621, - "grad_norm": 0.431640625, - "learning_rate": 0.00017779541296254487, - "loss": 1.0161, + "epoch": 0.5892478053818349, + "grad_norm": 0.2734375, + "learning_rate": 8.634365326032265e-05, + "loss": 0.9478, "step": 10270 }, { - "epoch": 0.29478002668081993, - "grad_norm": 0.40625, - "learning_rate": 0.00017776394116327234, - "loss": 0.9371, + "epoch": 0.5895346835733547, + "grad_norm": 0.263671875, + "learning_rate": 8.62444582853489e-05, + "loss": 0.9661, "step": 10275 }, { - "epoch": 0.29492347194927776, - "grad_norm": 0.41796875, - "learning_rate": 0.00017773244986604895, - "loss": 0.9889, + "epoch": 0.5898215617648747, + "grad_norm": 0.248046875, + "learning_rate": 8.614527710446322e-05, + "loss": 0.9886, "step": 10280 }, { - "epoch": 0.2950669172177356, - "grad_norm": 0.4609375, - "learning_rate": 0.00017770093907877067, - "loss": 0.9207, + "epoch": 0.5901084399563945, + "grad_norm": 0.263671875, + "learning_rate": 8.604610981712471e-05, + "loss": 0.9704, "step": 10285 }, { - "epoch": 0.29521036248619337, - "grad_norm": 0.447265625, - "learning_rate": 0.00017766940880933825, - "loss": 0.9791, + "epoch": 0.5903953181479143, + "grad_norm": 0.271484375, + "learning_rate": 8.594695652277858e-05, + "loss": 0.9389, "step": 10290 }, { - "epoch": 0.2953538077546512, - "grad_norm": 0.38671875, - "learning_rate": 0.00017763785906565734, - "loss": 1.0612, + "epoch": 0.5906821963394343, + "grad_norm": 0.314453125, + "learning_rate": 8.584781732085598e-05, + "loss": 0.9359, "step": 10295 }, { - "epoch": 0.29549725302310903, - "grad_norm": 0.396484375, - "learning_rate": 0.00017760628985563845, - "loss": 0.9555, + "epoch": 0.5909690745309542, + "grad_norm": 0.298828125, + "learning_rate": 8.574869231077383e-05, + "loss": 1.0032, "step": 10300 }, { - "epoch": 0.29564069829156686, - "grad_norm": 0.37890625, - "learning_rate": 0.00017757470118719712, - "loss": 0.8586, + "epoch": 0.591255952722474, + "grad_norm": 0.25390625, + "learning_rate": 8.564958159193506e-05, + "loss": 0.9773, "step": 10305 }, { - "epoch": 0.2957841435600247, - "grad_norm": 0.3671875, - "learning_rate": 0.00017754309306825357, - "loss": 0.8709, + "epoch": 0.591542830913994, + "grad_norm": 0.255859375, + "learning_rate": 8.555048526372805e-05, + "loss": 0.9306, "step": 10310 }, { - "epoch": 0.29592758882848247, - "grad_norm": 0.416015625, - "learning_rate": 0.000177511465506733, - "loss": 0.9185, + "epoch": 0.5918297091055138, + "grad_norm": 0.279296875, + "learning_rate": 8.545140342552676e-05, + "loss": 0.9393, "step": 10315 }, { - "epoch": 0.2960710340969403, - "grad_norm": 0.392578125, - "learning_rate": 0.00017747981851056548, - "loss": 0.9803, + "epoch": 0.5921165872970336, + "grad_norm": 0.27734375, + "learning_rate": 8.53523361766908e-05, + "loss": 0.9635, "step": 10320 }, { - "epoch": 0.29621447936539813, - "grad_norm": 0.37109375, - "learning_rate": 0.00017744815208768592, - "loss": 0.9739, + "epoch": 0.5924034654885536, + "grad_norm": 0.2734375, + "learning_rate": 8.525328361656494e-05, + "loss": 0.9303, "step": 10325 }, { - "epoch": 0.29635792463385596, - "grad_norm": 0.380859375, - "learning_rate": 0.00017741646624603417, - "loss": 0.9871, + "epoch": 0.5926903436800735, + "grad_norm": 0.2578125, + "learning_rate": 8.515424584447935e-05, + "loss": 0.874, "step": 10330 }, { - "epoch": 0.2965013699023138, - "grad_norm": 0.39453125, - "learning_rate": 0.00017738476099355487, - "loss": 1.0408, + "epoch": 0.5929772218715933, + "grad_norm": 0.2734375, + "learning_rate": 8.505522295974929e-05, + "loss": 0.972, "step": 10335 }, { - "epoch": 0.29664481517077157, - "grad_norm": 0.40625, - "learning_rate": 0.00017735303633819753, - "loss": 0.9971, + "epoch": 0.5932641000631133, + "grad_norm": 0.275390625, + "learning_rate": 8.495621506167519e-05, + "loss": 0.9765, "step": 10340 }, { - "epoch": 0.2967882604392294, - "grad_norm": 0.39453125, - "learning_rate": 0.0001773212922879166, - "loss": 1.0061, + "epoch": 0.5935509782546331, + "grad_norm": 0.26953125, + "learning_rate": 8.485722224954237e-05, + "loss": 1.0924, "step": 10345 }, { - "epoch": 0.29693170570768723, - "grad_norm": 0.39453125, - "learning_rate": 0.00017728952885067133, - "loss": 0.9326, + "epoch": 0.593837856446153, + "grad_norm": 0.279296875, + "learning_rate": 8.475824462262096e-05, + "loss": 0.9808, "step": 10350 }, { - "epoch": 0.29707515097614506, - "grad_norm": 0.40234375, - "learning_rate": 0.00017725774603442586, - "loss": 0.9662, + "epoch": 0.5941247346376728, + "grad_norm": 0.26953125, + "learning_rate": 8.465928228016608e-05, + "loss": 0.968, "step": 10355 }, { - "epoch": 0.2972185962446029, - "grad_norm": 0.390625, - "learning_rate": 0.00017722594384714916, - "loss": 0.9412, + "epoch": 0.5944116128291927, + "grad_norm": 0.265625, + "learning_rate": 8.456033532141735e-05, + "loss": 0.96, "step": 10360 }, { - "epoch": 0.29736204151306067, - "grad_norm": 0.3828125, - "learning_rate": 0.00017719412229681507, - "loss": 1.0385, + "epoch": 0.5946984910207126, + "grad_norm": 0.287109375, + "learning_rate": 8.44614038455989e-05, + "loss": 0.9754, "step": 10365 }, { - "epoch": 0.2975054867815185, - "grad_norm": 0.341796875, - "learning_rate": 0.00017716228139140228, - "loss": 0.9054, + "epoch": 0.5949853692122324, + "grad_norm": 0.267578125, + "learning_rate": 8.436248795191961e-05, + "loss": 0.9828, "step": 10370 }, { - "epoch": 0.29764893204997633, - "grad_norm": 0.37890625, - "learning_rate": 0.00017713042113889438, - "loss": 1.0426, + "epoch": 0.5952722474037524, + "grad_norm": 0.28125, + "learning_rate": 8.426358773957243e-05, + "loss": 0.9586, "step": 10375 }, { - "epoch": 0.29779237731843416, - "grad_norm": 0.375, - "learning_rate": 0.00017709854154727975, - "loss": 0.9727, + "epoch": 0.5955591255952722, + "grad_norm": 0.283203125, + "learning_rate": 8.416470330773471e-05, + "loss": 0.9082, "step": 10380 }, { - "epoch": 0.297935822586892, - "grad_norm": 0.421875, - "learning_rate": 0.0001770666426245516, + "epoch": 0.5958460037867921, + "grad_norm": 0.26171875, + "learning_rate": 8.406583475556807e-05, "loss": 0.959, "step": 10385 }, { - "epoch": 0.29807926785534977, - "grad_norm": 0.474609375, - "learning_rate": 0.00017703472437870813, - "loss": 1.0916, + "epoch": 0.596132881978312, + "grad_norm": 0.2890625, + "learning_rate": 8.396698218221807e-05, + "loss": 0.8862, "step": 10390 }, { - "epoch": 0.2982227131238076, - "grad_norm": 0.361328125, - "learning_rate": 0.00017700278681775217, - "loss": 1.0001, + "epoch": 0.5964197601698319, + "grad_norm": 0.265625, + "learning_rate": 8.386814568681429e-05, + "loss": 0.9277, "step": 10395 }, { - "epoch": 0.29836615839226543, - "grad_norm": 0.419921875, - "learning_rate": 0.00017697082994969158, - "loss": 1.042, + "epoch": 0.5967066383613517, + "grad_norm": 0.267578125, + "learning_rate": 8.376932536847014e-05, + "loss": 0.9745, "step": 10400 }, { - "epoch": 0.29850960366072327, - "grad_norm": 0.373046875, - "learning_rate": 0.000176938853782539, - "loss": 0.9213, + "epoch": 0.5969935165528717, + "grad_norm": 0.271484375, + "learning_rate": 8.367052132628294e-05, + "loss": 0.973, "step": 10405 }, { - "epoch": 0.2986530489291811, - "grad_norm": 0.416015625, - "learning_rate": 0.0001769068583243118, - "loss": 0.9131, + "epoch": 0.5972803947443915, + "grad_norm": 0.26171875, + "learning_rate": 8.35717336593336e-05, + "loss": 0.9606, "step": 10410 }, { - "epoch": 0.2987964941976389, - "grad_norm": 0.419921875, - "learning_rate": 0.00017687484358303235, - "loss": 0.8884, + "epoch": 0.5975672729359114, + "grad_norm": 0.255859375, + "learning_rate": 8.347296246668653e-05, + "loss": 0.8933, "step": 10415 }, { - "epoch": 0.2989399394660967, - "grad_norm": 0.3984375, - "learning_rate": 0.0001768428095667278, - "loss": 0.9267, + "epoch": 0.5978541511274313, + "grad_norm": 0.2578125, + "learning_rate": 8.33742078473898e-05, + "loss": 0.979, "step": 10420 }, { - "epoch": 0.29908338473455454, - "grad_norm": 0.400390625, - "learning_rate": 0.00017681075628343012, - "loss": 0.9926, + "epoch": 0.5981410293189512, + "grad_norm": 0.26171875, + "learning_rate": 8.327546990047471e-05, + "loss": 0.9329, "step": 10425 }, { - "epoch": 0.29922683000301237, - "grad_norm": 0.408203125, - "learning_rate": 0.00017677868374117606, - "loss": 0.9593, + "epoch": 0.598427907510471, + "grad_norm": 0.271484375, + "learning_rate": 8.317674872495589e-05, + "loss": 0.9221, "step": 10430 }, { - "epoch": 0.29937027527147014, - "grad_norm": 0.39453125, - "learning_rate": 0.00017674659194800727, - "loss": 0.9665, + "epoch": 0.5987147857019909, + "grad_norm": 0.26953125, + "learning_rate": 8.30780444198312e-05, + "loss": 0.9305, "step": 10435 }, { - "epoch": 0.299513720539928, - "grad_norm": 0.388671875, - "learning_rate": 0.00017671448091197026, - "loss": 1.0428, + "epoch": 0.5990016638935108, + "grad_norm": 0.26953125, + "learning_rate": 8.29793570840815e-05, + "loss": 1.0169, "step": 10440 }, { - "epoch": 0.2996571658083858, - "grad_norm": 0.3984375, - "learning_rate": 0.0001766823506411162, - "loss": 0.9428, + "epoch": 0.5992885420850307, + "grad_norm": 0.259765625, + "learning_rate": 8.288068681667065e-05, + "loss": 0.9563, "step": 10445 }, { - "epoch": 0.29980061107684364, - "grad_norm": 0.41796875, - "learning_rate": 0.00017665020114350136, - "loss": 0.9581, + "epoch": 0.5995754202765505, + "grad_norm": 0.255859375, + "learning_rate": 8.278203371654549e-05, + "loss": 0.9658, "step": 10450 }, { - "epoch": 0.29994405634530147, - "grad_norm": 0.388671875, - "learning_rate": 0.00017661803242718647, - "loss": 1.0131, + "epoch": 0.5998622984680705, + "grad_norm": 0.26953125, + "learning_rate": 8.268339788263551e-05, + "loss": 0.9455, "step": 10455 }, { - "epoch": 0.30008750161375924, - "grad_norm": 0.369140625, - "learning_rate": 0.00017658584450023747, - "loss": 0.9971, + "epoch": 0.6001491766595903, + "grad_norm": 0.259765625, + "learning_rate": 8.2584779413853e-05, + "loss": 0.9489, "step": 10460 }, { - "epoch": 0.3002309468822171, - "grad_norm": 0.396484375, - "learning_rate": 0.00017655363737072478, - "loss": 0.9251, + "epoch": 0.6004360548511102, + "grad_norm": 0.26953125, + "learning_rate": 8.248617840909268e-05, + "loss": 0.9188, "step": 10465 }, { - "epoch": 0.3003743921506749, - "grad_norm": 0.404296875, - "learning_rate": 0.0001765214110467238, - "loss": 0.9915, + "epoch": 0.6007229330426301, + "grad_norm": 0.28125, + "learning_rate": 8.238759496723199e-05, + "loss": 1.0094, "step": 10470 }, { - "epoch": 0.30051783741913274, - "grad_norm": 0.40625, - "learning_rate": 0.00017648916553631477, - "loss": 0.9226, + "epoch": 0.60100981123415, + "grad_norm": 0.310546875, + "learning_rate": 8.228902918713053e-05, + "loss": 0.9488, "step": 10475 }, { - "epoch": 0.30066128268759057, - "grad_norm": 0.375, - "learning_rate": 0.00017645690084758267, - "loss": 0.9771, + "epoch": 0.6012966894256698, + "grad_norm": 0.275390625, + "learning_rate": 8.21904811676303e-05, + "loss": 0.9419, "step": 10480 }, { - "epoch": 0.30080472795604835, - "grad_norm": 0.39453125, - "learning_rate": 0.00017642461698861726, - "loss": 0.9966, + "epoch": 0.6015835676171898, + "grad_norm": 0.259765625, + "learning_rate": 8.209195100755551e-05, + "loss": 0.9301, "step": 10485 }, { - "epoch": 0.3009481732245062, - "grad_norm": 0.404296875, - "learning_rate": 0.00017639231396751322, - "loss": 0.9803, + "epoch": 0.6018704458087096, + "grad_norm": 0.263671875, + "learning_rate": 8.199343880571241e-05, + "loss": 1.0017, "step": 10490 }, { - "epoch": 0.301091618492964, - "grad_norm": 0.404296875, - "learning_rate": 0.0001763599917923699, - "loss": 1.0001, + "epoch": 0.6021573240002295, + "grad_norm": 0.271484375, + "learning_rate": 8.189494466088923e-05, + "loss": 1.0361, "step": 10495 }, { - "epoch": 0.30123506376142184, - "grad_norm": 0.41796875, - "learning_rate": 0.00017632765047129157, - "loss": 0.8989, + "epoch": 0.6024442021917494, + "grad_norm": 0.2578125, + "learning_rate": 8.179646867185617e-05, + "loss": 0.9334, "step": 10500 }, { - "epoch": 0.30137850902987967, - "grad_norm": 0.400390625, - "learning_rate": 0.00017629529001238727, - "loss": 1.0224, + "epoch": 0.6027310803832693, + "grad_norm": 0.279296875, + "learning_rate": 8.169801093736515e-05, + "loss": 1.027, "step": 10505 }, { - "epoch": 0.30152195429833745, - "grad_norm": 0.40625, - "learning_rate": 0.00017626291042377077, - "loss": 0.8894, + "epoch": 0.6030179585747891, + "grad_norm": 0.2890625, + "learning_rate": 8.159957155614974e-05, + "loss": 0.9183, "step": 10510 }, { - "epoch": 0.3016653995667953, - "grad_norm": 0.384765625, - "learning_rate": 0.0001762305117135607, - "loss": 0.9662, + "epoch": 0.603304836766309, + "grad_norm": 0.2578125, + "learning_rate": 8.15011506269253e-05, + "loss": 0.9329, "step": 10515 }, { - "epoch": 0.3018088448352531, - "grad_norm": 0.427734375, - "learning_rate": 0.00017619809388988049, - "loss": 0.9873, + "epoch": 0.6035917149578289, + "grad_norm": 0.279296875, + "learning_rate": 8.140274824838849e-05, + "loss": 0.9626, "step": 10520 }, { - "epoch": 0.30195229010371094, - "grad_norm": 0.416015625, - "learning_rate": 0.00017616565696085835, - "loss": 0.9242, + "epoch": 0.6038785931493488, + "grad_norm": 0.263671875, + "learning_rate": 8.130436451921743e-05, + "loss": 0.9523, "step": 10525 }, { - "epoch": 0.30209573537216877, - "grad_norm": 0.40234375, - "learning_rate": 0.00017613320093462723, - "loss": 0.9638, + "epoch": 0.6041654713408686, + "grad_norm": 0.271484375, + "learning_rate": 8.120599953807153e-05, + "loss": 0.9433, "step": 10530 }, { - "epoch": 0.30223918064062655, - "grad_norm": 0.375, - "learning_rate": 0.00017610072581932494, - "loss": 0.9258, + "epoch": 0.6044523495323886, + "grad_norm": 0.271484375, + "learning_rate": 8.110765340359145e-05, + "loss": 1.0139, "step": 10535 }, { - "epoch": 0.3023826259090844, - "grad_norm": 0.3671875, - "learning_rate": 0.00017606823162309406, - "loss": 1.0264, + "epoch": 0.6047392277239084, + "grad_norm": 0.28515625, + "learning_rate": 8.10093262143989e-05, + "loss": 0.9758, "step": 10540 }, { - "epoch": 0.3025260711775422, - "grad_norm": 0.404296875, - "learning_rate": 0.0001760357183540819, - "loss": 1.1044, + "epoch": 0.6050261059154283, + "grad_norm": 0.265625, + "learning_rate": 8.09110180690966e-05, + "loss": 0.9715, "step": 10545 }, { - "epoch": 0.30266951644600004, - "grad_norm": 0.380859375, - "learning_rate": 0.00017600318602044066, - "loss": 0.9318, + "epoch": 0.6053129841069482, + "grad_norm": 0.265625, + "learning_rate": 8.08127290662682e-05, + "loss": 0.9328, "step": 10550 }, { - "epoch": 0.3028129617144579, - "grad_norm": 0.40234375, - "learning_rate": 0.0001759706346303272, - "loss": 0.9345, + "epoch": 0.6055998622984681, + "grad_norm": 0.28515625, + "learning_rate": 8.071445930447815e-05, + "loss": 1.0168, "step": 10555 }, { - "epoch": 0.30295640698291565, - "grad_norm": 0.408203125, - "learning_rate": 0.00017593806419190325, - "loss": 0.9316, + "epoch": 0.6058867404899879, + "grad_norm": 0.25390625, + "learning_rate": 8.061620888227145e-05, + "loss": 0.9434, "step": 10560 }, { - "epoch": 0.3030998522513735, - "grad_norm": 0.44921875, - "learning_rate": 0.0001759054747133352, - "loss": 0.9279, + "epoch": 0.6061736186815079, + "grad_norm": 0.26171875, + "learning_rate": 8.051797789817403e-05, + "loss": 0.8857, "step": 10565 }, { - "epoch": 0.3032432975198313, - "grad_norm": 0.408203125, - "learning_rate": 0.00017587286620279443, - "loss": 0.9617, + "epoch": 0.6064604968730277, + "grad_norm": 0.2734375, + "learning_rate": 8.041976645069207e-05, + "loss": 0.9153, "step": 10570 }, { - "epoch": 0.30338674278828914, - "grad_norm": 0.412109375, - "learning_rate": 0.00017584023866845682, - "loss": 0.9556, + "epoch": 0.6067473750645476, + "grad_norm": 0.275390625, + "learning_rate": 8.032157463831216e-05, + "loss": 0.9559, "step": 10575 }, { - "epoch": 0.303530188056747, - "grad_norm": 0.40234375, - "learning_rate": 0.00017580759211850323, - "loss": 0.8931, + "epoch": 0.6070342532560675, + "grad_norm": 0.263671875, + "learning_rate": 8.022340255950138e-05, + "loss": 0.9526, "step": 10580 }, { - "epoch": 0.30367363332520475, - "grad_norm": 0.43359375, - "learning_rate": 0.0001757749265611192, - "loss": 0.9778, + "epoch": 0.6073211314475874, + "grad_norm": 0.263671875, + "learning_rate": 8.012525031270685e-05, + "loss": 1.005, "step": 10585 }, { - "epoch": 0.3038170785936626, - "grad_norm": 0.396484375, - "learning_rate": 0.00017574224200449506, - "loss": 0.9877, + "epoch": 0.6076080096391072, + "grad_norm": 0.275390625, + "learning_rate": 8.002711799635588e-05, + "loss": 0.9495, "step": 10590 }, { - "epoch": 0.3039605238621204, - "grad_norm": 0.392578125, - "learning_rate": 0.00017570953845682586, - "loss": 0.9496, + "epoch": 0.6078948878306271, + "grad_norm": 0.26171875, + "learning_rate": 7.992900570885572e-05, + "loss": 0.9172, "step": 10595 }, { - "epoch": 0.30410396913057824, - "grad_norm": 0.408203125, - "learning_rate": 0.00017567681592631145, - "loss": 0.986, + "epoch": 0.608181766022147, + "grad_norm": 0.275390625, + "learning_rate": 7.983091354859369e-05, + "loss": 0.9883, "step": 10600 }, { - "epoch": 0.304247414399036, - "grad_norm": 0.390625, - "learning_rate": 0.00017564407442115644, - "loss": 1.0626, + "epoch": 0.6084686442136669, + "grad_norm": 0.271484375, + "learning_rate": 7.97328416139368e-05, + "loss": 0.9507, "step": 10605 }, { - "epoch": 0.30439085966749385, - "grad_norm": 0.412109375, - "learning_rate": 0.00017561131394957022, - "loss": 0.9395, + "epoch": 0.6087555224051867, + "grad_norm": 0.2734375, + "learning_rate": 7.963479000323171e-05, + "loss": 0.9506, "step": 10610 }, { - "epoch": 0.3045343049359517, - "grad_norm": 0.4140625, - "learning_rate": 0.00017557853451976687, - "loss": 0.9627, + "epoch": 0.6090424005967067, + "grad_norm": 0.259765625, + "learning_rate": 7.953675881480493e-05, + "loss": 0.9825, "step": 10615 }, { - "epoch": 0.3046777502044095, - "grad_norm": 0.37890625, - "learning_rate": 0.00017554573613996524, - "loss": 0.957, + "epoch": 0.6093292787882265, + "grad_norm": 0.275390625, + "learning_rate": 7.94387481469623e-05, + "loss": 0.9435, "step": 10620 }, { - "epoch": 0.30482119547286735, - "grad_norm": 0.376953125, - "learning_rate": 0.000175512918818389, - "loss": 0.9759, + "epoch": 0.6096161569797464, + "grad_norm": 0.251953125, + "learning_rate": 7.934075809798908e-05, + "loss": 1.0158, "step": 10625 }, { - "epoch": 0.3049646407413251, - "grad_norm": 0.5234375, - "learning_rate": 0.00017548008256326655, - "loss": 1.0293, + "epoch": 0.6099030351712663, + "grad_norm": 0.255859375, + "learning_rate": 7.924278876615004e-05, + "loss": 0.8788, "step": 10630 }, { - "epoch": 0.30510808600978295, - "grad_norm": 0.38671875, - "learning_rate": 0.00017544722738283087, - "loss": 0.9611, + "epoch": 0.6101899133627862, + "grad_norm": 0.275390625, + "learning_rate": 7.914484024968893e-05, + "loss": 0.9347, "step": 10635 }, { - "epoch": 0.3052515312782408, - "grad_norm": 0.40234375, - "learning_rate": 0.00017541435328531996, - "loss": 1.0086, + "epoch": 0.610476791554306, + "grad_norm": 0.267578125, + "learning_rate": 7.90469126468288e-05, + "loss": 0.9054, "step": 10640 }, { - "epoch": 0.3053949765466986, - "grad_norm": 0.474609375, - "learning_rate": 0.00017538146027897637, - "loss": 1.0748, + "epoch": 0.610763669745826, + "grad_norm": 0.24609375, + "learning_rate": 7.894900605577161e-05, + "loss": 0.963, "step": 10645 }, { - "epoch": 0.30553842181515645, - "grad_norm": 0.4453125, - "learning_rate": 0.00017534854837204745, - "loss": 0.899, + "epoch": 0.6110505479373458, + "grad_norm": 0.2578125, + "learning_rate": 7.885112057469839e-05, + "loss": 0.9641, "step": 10650 }, { - "epoch": 0.3056818670836142, - "grad_norm": 0.380859375, - "learning_rate": 0.0001753156175727853, - "loss": 0.9888, + "epoch": 0.6113374261288657, + "grad_norm": 0.265625, + "learning_rate": 7.87532563017689e-05, + "loss": 0.9087, "step": 10655 }, { - "epoch": 0.30582531235207205, - "grad_norm": 0.380859375, - "learning_rate": 0.00017528266788944676, - "loss": 0.9182, + "epoch": 0.6116243043203856, + "grad_norm": 0.267578125, + "learning_rate": 7.865541333512157e-05, + "loss": 0.9185, "step": 10660 }, { - "epoch": 0.3059687576205299, - "grad_norm": 0.412109375, - "learning_rate": 0.00017524969933029332, - "loss": 0.9088, + "epoch": 0.6119111825119055, + "grad_norm": 0.265625, + "learning_rate": 7.855759177287368e-05, + "loss": 0.9213, "step": 10665 }, { - "epoch": 0.3061122028889877, - "grad_norm": 0.40234375, - "learning_rate": 0.00017521671190359132, - "loss": 0.9079, + "epoch": 0.6121980607034253, + "grad_norm": 0.267578125, + "learning_rate": 7.84597917131208e-05, + "loss": 0.9388, "step": 10670 }, { - "epoch": 0.30625564815744555, - "grad_norm": 0.365234375, - "learning_rate": 0.00017518370561761182, - "loss": 0.9623, + "epoch": 0.6124849388949452, + "grad_norm": 0.2578125, + "learning_rate": 7.836201325393706e-05, + "loss": 0.969, "step": 10675 }, { - "epoch": 0.3063990934259033, - "grad_norm": 0.375, - "learning_rate": 0.00017515068048063048, - "loss": 0.9112, + "epoch": 0.6127718170864651, + "grad_norm": 0.265625, + "learning_rate": 7.826425649337501e-05, + "loss": 0.9655, "step": 10680 }, { - "epoch": 0.30654253869436116, - "grad_norm": 0.341796875, - "learning_rate": 0.0001751176365009278, - "loss": 0.9012, + "epoch": 0.613058695277985, + "grad_norm": 0.25390625, + "learning_rate": 7.816652152946528e-05, + "loss": 1.0169, "step": 10685 }, { - "epoch": 0.306685983962819, - "grad_norm": 0.404296875, - "learning_rate": 0.00017508457368678904, - "loss": 1.0436, + "epoch": 0.6133455734695048, + "grad_norm": 0.251953125, + "learning_rate": 7.806880846021669e-05, + "loss": 0.9382, "step": 10690 }, { - "epoch": 0.3068294292312768, - "grad_norm": 0.431640625, - "learning_rate": 0.0001750514920465041, - "loss": 1.0134, + "epoch": 0.6136324516610248, + "grad_norm": 0.26953125, + "learning_rate": 7.797111738361618e-05, + "loss": 0.9416, "step": 10695 }, { - "epoch": 0.30697287449973465, - "grad_norm": 0.3515625, - "learning_rate": 0.00017501839158836756, - "loss": 0.9233, + "epoch": 0.6139193298525446, + "grad_norm": 0.267578125, + "learning_rate": 7.787344839762855e-05, + "loss": 0.9647, "step": 10700 }, { - "epoch": 0.3071163197681924, - "grad_norm": 0.408203125, - "learning_rate": 0.00017498527232067887, - "loss": 0.9514, + "epoch": 0.6142062080440645, + "grad_norm": 0.2578125, + "learning_rate": 7.777580160019649e-05, + "loss": 0.905, "step": 10705 }, { - "epoch": 0.30725976503665026, - "grad_norm": 0.39453125, - "learning_rate": 0.00017495213425174205, - "loss": 0.8878, + "epoch": 0.6144930862355844, + "grad_norm": 0.279296875, + "learning_rate": 7.767817708924038e-05, + "loss": 0.9983, "step": 10710 }, { - "epoch": 0.3074032103051081, - "grad_norm": 0.3671875, - "learning_rate": 0.00017491897738986594, - "loss": 0.9641, + "epoch": 0.6147799644271043, + "grad_norm": 0.271484375, + "learning_rate": 7.758057496265839e-05, + "loss": 0.9257, "step": 10715 }, { - "epoch": 0.3075466555735659, - "grad_norm": 0.4140625, - "learning_rate": 0.000174885801743364, - "loss": 1.0031, + "epoch": 0.6150668426186241, + "grad_norm": 0.2734375, + "learning_rate": 7.748299531832609e-05, + "loss": 0.9679, "step": 10720 }, { - "epoch": 0.30769010084202375, - "grad_norm": 0.3515625, - "learning_rate": 0.00017485260732055444, - "loss": 0.8743, + "epoch": 0.6153537208101441, + "grad_norm": 0.275390625, + "learning_rate": 7.738543825409652e-05, + "loss": 0.9438, "step": 10725 }, { - "epoch": 0.3078335461104815, - "grad_norm": 0.40234375, - "learning_rate": 0.00017481939412976024, - "loss": 0.9738, + "epoch": 0.6156405990016639, + "grad_norm": 0.275390625, + "learning_rate": 7.728790386780025e-05, + "loss": 0.9388, "step": 10730 }, { - "epoch": 0.30797699137893936, - "grad_norm": 0.421875, - "learning_rate": 0.00017478616217930895, - "loss": 0.9512, + "epoch": 0.6159274771931837, + "grad_norm": 0.25390625, + "learning_rate": 7.71903922572449e-05, + "loss": 1.0314, "step": 10735 }, { - "epoch": 0.3081204366473972, - "grad_norm": 0.435546875, - "learning_rate": 0.00017475291147753299, - "loss": 0.9908, + "epoch": 0.6162143553847037, + "grad_norm": 0.26953125, + "learning_rate": 7.70929035202153e-05, + "loss": 0.8882, "step": 10740 }, { - "epoch": 0.308263881915855, - "grad_norm": 0.40234375, - "learning_rate": 0.00017471964203276928, - "loss": 0.9653, + "epoch": 0.6165012335762236, + "grad_norm": 0.26171875, + "learning_rate": 7.699543775447345e-05, + "loss": 0.9499, "step": 10745 }, { - "epoch": 0.30840732718431285, - "grad_norm": 0.396484375, - "learning_rate": 0.0001746863538533597, - "loss": 0.8898, + "epoch": 0.6167881117677434, + "grad_norm": 0.2578125, + "learning_rate": 7.689799505775822e-05, + "loss": 0.9593, "step": 10750 }, { - "epoch": 0.3085507724527706, - "grad_norm": 0.390625, - "learning_rate": 0.00017465304694765056, - "loss": 0.9473, + "epoch": 0.6170749899592632, + "grad_norm": 0.26171875, + "learning_rate": 7.68005755277853e-05, + "loss": 0.8896, "step": 10755 }, { - "epoch": 0.30869421772122846, - "grad_norm": 0.388671875, - "learning_rate": 0.000174619721323993, - "loss": 0.9143, + "epoch": 0.6173618681507832, + "grad_norm": 0.25, + "learning_rate": 7.67031792622473e-05, + "loss": 1.0001, "step": 10760 }, { - "epoch": 0.3088376629896863, - "grad_norm": 0.400390625, - "learning_rate": 0.0001745863769907429, - "loss": 0.915, + "epoch": 0.617648746342303, + "grad_norm": 0.2490234375, + "learning_rate": 7.660580635881338e-05, + "loss": 1.0065, "step": 10765 }, { - "epoch": 0.3089811082581441, - "grad_norm": 0.404296875, - "learning_rate": 0.0001745530139562607, - "loss": 0.9592, + "epoch": 0.6179356245338229, + "grad_norm": 0.26171875, + "learning_rate": 7.65084569151293e-05, + "loss": 0.9113, "step": 10770 }, { - "epoch": 0.3091245535266019, - "grad_norm": 0.380859375, - "learning_rate": 0.00017451963222891163, - "loss": 0.9381, + "epoch": 0.6182225027253428, + "grad_norm": 0.263671875, + "learning_rate": 7.641113102881726e-05, + "loss": 0.9221, "step": 10775 }, { - "epoch": 0.30926799879505973, - "grad_norm": 0.3671875, - "learning_rate": 0.0001744862318170656, - "loss": 0.9548, + "epoch": 0.6185093809168627, + "grad_norm": 0.26953125, + "learning_rate": 7.631382879747597e-05, + "loss": 0.9337, "step": 10780 }, { - "epoch": 0.30941144406351756, - "grad_norm": 0.423828125, - "learning_rate": 0.00017445281272909715, - "loss": 0.9474, + "epoch": 0.6187962591083825, + "grad_norm": 0.25390625, + "learning_rate": 7.621655031868026e-05, + "loss": 0.9811, "step": 10785 }, { - "epoch": 0.3095548893319754, - "grad_norm": 0.41796875, - "learning_rate": 0.00017441937497338552, - "loss": 1.0337, + "epoch": 0.6190831372999025, + "grad_norm": 0.27734375, + "learning_rate": 7.61192956899812e-05, + "loss": 1.0128, "step": 10790 }, { - "epoch": 0.3096983346004332, - "grad_norm": 0.41796875, - "learning_rate": 0.0001743859185583147, - "loss": 1.0463, + "epoch": 0.6193700154914223, + "grad_norm": 0.279296875, + "learning_rate": 7.6022065008906e-05, + "loss": 0.9482, "step": 10795 }, { - "epoch": 0.309841779868891, - "grad_norm": 0.396484375, - "learning_rate": 0.0001743524434922732, - "loss": 0.9015, + "epoch": 0.6196568936829422, + "grad_norm": 0.25390625, + "learning_rate": 7.592485837295777e-05, + "loss": 0.9472, "step": 10800 }, { - "epoch": 0.30998522513734883, - "grad_norm": 0.404296875, - "learning_rate": 0.00017431894978365441, - "loss": 1.0308, + "epoch": 0.6199437718744621, + "grad_norm": 0.26171875, + "learning_rate": 7.582767587961552e-05, + "loss": 0.9221, "step": 10805 }, { - "epoch": 0.31012867040580666, - "grad_norm": 0.390625, - "learning_rate": 0.00017428543744085623, - "loss": 1.0186, + "epoch": 0.620230650065982, + "grad_norm": 0.287109375, + "learning_rate": 7.573051762633414e-05, + "loss": 0.9532, "step": 10810 }, { - "epoch": 0.3102721156742645, - "grad_norm": 0.375, - "learning_rate": 0.00017425190647228135, - "loss": 0.9642, + "epoch": 0.6205175282575018, + "grad_norm": 0.259765625, + "learning_rate": 7.563338371054412e-05, + "loss": 1.0231, "step": 10815 }, { - "epoch": 0.3104155609427223, - "grad_norm": 0.38671875, - "learning_rate": 0.00017421835688633704, - "loss": 0.994, + "epoch": 0.6208044064490218, + "grad_norm": 0.2578125, + "learning_rate": 7.553627422965148e-05, + "loss": 0.9752, "step": 10820 }, { - "epoch": 0.3105590062111801, - "grad_norm": 0.361328125, - "learning_rate": 0.00017418478869143527, - "loss": 1.0384, + "epoch": 0.6210912846405416, + "grad_norm": 0.26171875, + "learning_rate": 7.543918928103795e-05, + "loss": 0.9337, "step": 10825 }, { - "epoch": 0.31070245147963793, - "grad_norm": 0.400390625, - "learning_rate": 0.0001741512018959927, - "loss": 1.0494, + "epoch": 0.6213781628320615, + "grad_norm": 0.2734375, + "learning_rate": 7.534212896206051e-05, + "loss": 0.9364, "step": 10830 }, { - "epoch": 0.31084589674809576, - "grad_norm": 0.359375, - "learning_rate": 0.0001741175965084306, - "loss": 0.9925, + "epoch": 0.6216650410235813, + "grad_norm": 0.2890625, + "learning_rate": 7.524509337005141e-05, + "loss": 0.9254, "step": 10835 }, { - "epoch": 0.3109893420165536, - "grad_norm": 0.439453125, - "learning_rate": 0.00017408397253717496, - "loss": 1.0578, + "epoch": 0.6219519192151013, + "grad_norm": 0.25, + "learning_rate": 7.514808260231818e-05, + "loss": 0.9011, "step": 10840 }, { - "epoch": 0.3111327872850114, - "grad_norm": 0.46875, - "learning_rate": 0.0001740503299906564, - "loss": 0.9644, + "epoch": 0.6222387974066211, + "grad_norm": 0.271484375, + "learning_rate": 7.505109675614346e-05, + "loss": 0.9011, "step": 10845 }, { - "epoch": 0.3112762325534692, - "grad_norm": 0.376953125, - "learning_rate": 0.0001740166688773102, - "loss": 1.0274, + "epoch": 0.622525675598141, + "grad_norm": 0.255859375, + "learning_rate": 7.495413592878484e-05, + "loss": 0.9105, "step": 10850 }, { - "epoch": 0.31141967782192703, - "grad_norm": 0.4375, - "learning_rate": 0.00017398298920557633, - "loss": 0.9988, + "epoch": 0.6228125537896609, + "grad_norm": 0.2578125, + "learning_rate": 7.485720021747486e-05, + "loss": 0.9194, "step": 10855 }, { - "epoch": 0.31156312309038486, - "grad_norm": 0.404296875, - "learning_rate": 0.00017394929098389929, - "loss": 0.9941, + "epoch": 0.6230994319811808, + "grad_norm": 0.263671875, + "learning_rate": 7.476028971942093e-05, + "loss": 0.9168, "step": 10860 }, { - "epoch": 0.3117065683588427, - "grad_norm": 0.423828125, - "learning_rate": 0.0001739155742207284, - "loss": 0.975, + "epoch": 0.6233863101727006, + "grad_norm": 0.265625, + "learning_rate": 7.466340453180505e-05, + "loss": 0.9753, "step": 10865 }, { - "epoch": 0.3118500136273005, - "grad_norm": 0.353515625, - "learning_rate": 0.00017388183892451755, - "loss": 1.0002, + "epoch": 0.6236731883642206, + "grad_norm": 0.263671875, + "learning_rate": 7.456654475178389e-05, + "loss": 0.9741, "step": 10870 }, { - "epoch": 0.3119934588957583, - "grad_norm": 0.416015625, - "learning_rate": 0.00017384808510372522, - "loss": 0.9139, + "epoch": 0.6239600665557404, + "grad_norm": 0.263671875, + "learning_rate": 7.446971047648873e-05, + "loss": 0.9154, "step": 10875 }, { - "epoch": 0.31213690416421613, - "grad_norm": 0.43359375, - "learning_rate": 0.00017381431276681464, - "loss": 0.9753, + "epoch": 0.6242469447472603, + "grad_norm": 0.275390625, + "learning_rate": 7.437290180302512e-05, + "loss": 0.9548, "step": 10880 }, { - "epoch": 0.31228034943267396, - "grad_norm": 0.41796875, - "learning_rate": 0.0001737805219222536, - "loss": 0.9753, + "epoch": 0.6245338229387802, + "grad_norm": 0.259765625, + "learning_rate": 7.427611882847301e-05, + "loss": 0.9339, "step": 10885 }, { - "epoch": 0.3124237947011318, - "grad_norm": 0.369140625, - "learning_rate": 0.0001737467125785146, - "loss": 1.0171, + "epoch": 0.6248207011303001, + "grad_norm": 0.265625, + "learning_rate": 7.41793616498867e-05, + "loss": 0.9884, "step": 10890 }, { - "epoch": 0.3125672399695896, - "grad_norm": 0.380859375, - "learning_rate": 0.00017371288474407474, - "loss": 1.0348, + "epoch": 0.6251075793218199, + "grad_norm": 0.251953125, + "learning_rate": 7.40826303642944e-05, + "loss": 0.8957, "step": 10895 }, { - "epoch": 0.3127106852380474, - "grad_norm": 0.48046875, - "learning_rate": 0.0001736790384274157, - "loss": 1.004, + "epoch": 0.6253944575133399, + "grad_norm": 0.25, + "learning_rate": 7.398592506869849e-05, + "loss": 0.9553, "step": 10900 }, { - "epoch": 0.31285413050650523, - "grad_norm": 0.4140625, - "learning_rate": 0.00017364517363702392, - "loss": 1.0302, + "epoch": 0.6256813357048597, + "grad_norm": 0.296875, + "learning_rate": 7.388924586007523e-05, + "loss": 0.9071, "step": 10905 }, { - "epoch": 0.31299757577496307, - "grad_norm": 0.40234375, - "learning_rate": 0.00017361129038139038, - "loss": 0.9881, + "epoch": 0.6259682138963796, + "grad_norm": 0.2734375, + "learning_rate": 7.379259283537479e-05, + "loss": 0.9648, "step": 10910 }, { - "epoch": 0.3131410210434209, - "grad_norm": 0.39453125, - "learning_rate": 0.00017357738866901072, - "loss": 0.9185, + "epoch": 0.6262550920878994, + "grad_norm": 0.263671875, + "learning_rate": 7.369596609152105e-05, + "loss": 0.9983, "step": 10915 }, { - "epoch": 0.31328446631187873, - "grad_norm": 0.38671875, - "learning_rate": 0.0001735434685083852, - "loss": 0.9663, + "epoch": 0.6265419702794194, + "grad_norm": 0.279296875, + "learning_rate": 7.359936572541142e-05, + "loss": 1.0115, "step": 10920 }, { - "epoch": 0.3134279115803365, - "grad_norm": 0.48046875, - "learning_rate": 0.00017350952990801872, - "loss": 1.0423, + "epoch": 0.6268288484709392, + "grad_norm": 0.283203125, + "learning_rate": 7.350279183391712e-05, + "loss": 0.932, "step": 10925 }, { - "epoch": 0.31357135684879434, - "grad_norm": 0.427734375, - "learning_rate": 0.00017347557287642076, - "loss": 1.0032, + "epoch": 0.6271157266624591, + "grad_norm": 0.267578125, + "learning_rate": 7.340624451388257e-05, + "loss": 0.9518, "step": 10930 }, { - "epoch": 0.31371480211725217, - "grad_norm": 0.41015625, - "learning_rate": 0.00017344159742210547, - "loss": 0.925, + "epoch": 0.627402604853979, + "grad_norm": 0.2470703125, + "learning_rate": 7.330972386212558e-05, + "loss": 0.932, "step": 10935 }, { - "epoch": 0.31385824738571, - "grad_norm": 0.375, - "learning_rate": 0.00017340760355359161, - "loss": 0.9393, + "epoch": 0.6276894830454989, + "grad_norm": 0.2890625, + "learning_rate": 7.321322997543743e-05, + "loss": 0.9442, "step": 10940 }, { - "epoch": 0.31400169265416783, - "grad_norm": 0.423828125, - "learning_rate": 0.00017337359127940255, - "loss": 0.9313, + "epoch": 0.6279763612370187, + "grad_norm": 0.26171875, + "learning_rate": 7.311676295058232e-05, + "loss": 0.8918, "step": 10945 }, { - "epoch": 0.3141451379226256, - "grad_norm": 0.400390625, - "learning_rate": 0.0001733395606080663, - "loss": 0.9573, + "epoch": 0.6282632394285387, + "grad_norm": 0.263671875, + "learning_rate": 7.302032288429756e-05, + "loss": 0.9165, "step": 10950 }, { - "epoch": 0.31428858319108344, - "grad_norm": 0.435546875, - "learning_rate": 0.00017330551154811537, - "loss": 0.9691, + "epoch": 0.6285501176200585, + "grad_norm": 0.283203125, + "learning_rate": 7.292390987329356e-05, + "loss": 0.9855, "step": 10955 }, { - "epoch": 0.31443202845954127, - "grad_norm": 0.3671875, - "learning_rate": 0.00017327144410808707, - "loss": 1.0146, + "epoch": 0.6288369958115784, + "grad_norm": 0.283203125, + "learning_rate": 7.282752401425343e-05, + "loss": 0.9264, "step": 10960 }, { - "epoch": 0.3145754737279991, - "grad_norm": 0.404296875, - "learning_rate": 0.00017323735829652312, - "loss": 0.9369, + "epoch": 0.6291238740030983, + "grad_norm": 0.263671875, + "learning_rate": 7.273116540383319e-05, + "loss": 1.0362, "step": 10965 }, { - "epoch": 0.3147189189964569, - "grad_norm": 0.404296875, - "learning_rate": 0.00017320325412197, - "loss": 0.8759, + "epoch": 0.6294107521946182, + "grad_norm": 0.259765625, + "learning_rate": 7.263483413866135e-05, + "loss": 0.9872, "step": 10970 }, { - "epoch": 0.3148623642649147, - "grad_norm": 0.37109375, - "learning_rate": 0.00017316913159297874, - "loss": 0.9739, + "epoch": 0.629697630386138, + "grad_norm": 0.26171875, + "learning_rate": 7.253853031533928e-05, + "loss": 0.9462, "step": 10975 }, { - "epoch": 0.31500580953337254, - "grad_norm": 0.39453125, - "learning_rate": 0.00017313499071810497, - "loss": 0.9184, + "epoch": 0.629984508577658, + "grad_norm": 0.27734375, + "learning_rate": 7.244225403044056e-05, + "loss": 0.989, "step": 10980 }, { - "epoch": 0.31514925480183037, - "grad_norm": 0.42578125, - "learning_rate": 0.00017310083150590887, - "loss": 1.0231, + "epoch": 0.6302713867691778, + "grad_norm": 0.27734375, + "learning_rate": 7.234600538051124e-05, + "loss": 0.8998, "step": 10985 }, { - "epoch": 0.3152927000702882, - "grad_norm": 0.404296875, - "learning_rate": 0.00017306665396495534, - "loss": 1.0007, + "epoch": 0.6305582649606977, + "grad_norm": 0.27734375, + "learning_rate": 7.22497844620698e-05, + "loss": 0.9069, "step": 10990 }, { - "epoch": 0.315436145338746, - "grad_norm": 0.39453125, - "learning_rate": 0.00017303245810381375, - "loss": 0.8817, + "epoch": 0.6308451431522175, + "grad_norm": 0.2373046875, + "learning_rate": 7.215359137160673e-05, + "loss": 0.9017, "step": 10995 }, { - "epoch": 0.3155795906072038, - "grad_norm": 0.412109375, - "learning_rate": 0.0001729982439310581, - "loss": 0.9877, + "epoch": 0.6311320213437375, + "grad_norm": 0.298828125, + "learning_rate": 7.205742620558464e-05, + "loss": 0.9396, "step": 11000 }, { - "epoch": 0.31572303587566164, - "grad_norm": 0.388671875, - "learning_rate": 0.00017296401145526703, - "loss": 0.9541, + "epoch": 0.6314188995352573, + "grad_norm": 0.279296875, + "learning_rate": 7.196128906043822e-05, + "loss": 1.0328, "step": 11005 }, { - "epoch": 0.31586648114411947, - "grad_norm": 0.4609375, - "learning_rate": 0.00017292976068502376, - "loss": 1.0112, + "epoch": 0.6317057777267772, + "grad_norm": 0.263671875, + "learning_rate": 7.1865180032574e-05, + "loss": 0.8883, "step": 11010 }, { - "epoch": 0.3160099264125773, - "grad_norm": 0.421875, - "learning_rate": 0.000172895491628916, - "loss": 1.0434, + "epoch": 0.6319926559182971, + "grad_norm": 0.265625, + "learning_rate": 7.176909921837033e-05, + "loss": 1.0588, "step": 11015 }, { - "epoch": 0.3161533716810351, - "grad_norm": 0.375, - "learning_rate": 0.0001728612042955362, - "loss": 1.0215, + "epoch": 0.632279534109817, + "grad_norm": 0.26171875, + "learning_rate": 7.167304671417729e-05, + "loss": 0.918, "step": 11020 }, { - "epoch": 0.3162968169494929, - "grad_norm": 0.384765625, - "learning_rate": 0.00017282689869348126, - "loss": 0.9469, + "epoch": 0.6325664123013368, + "grad_norm": 0.26171875, + "learning_rate": 7.157702261631653e-05, + "loss": 0.965, "step": 11025 }, { - "epoch": 0.31644026221795074, - "grad_norm": 0.33984375, - "learning_rate": 0.00017279257483135272, - "loss": 0.9001, + "epoch": 0.6328532904928568, + "grad_norm": 0.26171875, + "learning_rate": 7.148102702108122e-05, + "loss": 0.9823, "step": 11030 }, { - "epoch": 0.31658370748640857, - "grad_norm": 0.380859375, - "learning_rate": 0.00017275823271775667, - "loss": 1.0125, + "epoch": 0.6331401686843766, + "grad_norm": 0.26953125, + "learning_rate": 7.138506002473591e-05, + "loss": 0.981, "step": 11035 }, { - "epoch": 0.3167271527548664, - "grad_norm": 0.51953125, - "learning_rate": 0.00017272387236130383, - "loss": 0.9692, + "epoch": 0.6334270468758965, + "grad_norm": 0.294921875, + "learning_rate": 7.128912172351664e-05, + "loss": 0.9348, "step": 11040 }, { - "epoch": 0.3168705980233242, - "grad_norm": 0.37109375, - "learning_rate": 0.00017268949377060946, - "loss": 0.9515, + "epoch": 0.6337139250674164, + "grad_norm": 0.259765625, + "learning_rate": 7.119321221363047e-05, + "loss": 0.9774, "step": 11045 }, { - "epoch": 0.317014043291782, - "grad_norm": 0.37109375, - "learning_rate": 0.00017265509695429335, - "loss": 0.9085, + "epoch": 0.6340008032589363, + "grad_norm": 0.26171875, + "learning_rate": 7.109733159125566e-05, + "loss": 0.9297, "step": 11050 }, { - "epoch": 0.31715748856023984, - "grad_norm": 0.404296875, - "learning_rate": 0.00017262068192097995, - "loss": 0.9447, + "epoch": 0.6342876814504561, + "grad_norm": 0.26171875, + "learning_rate": 7.100147995254156e-05, + "loss": 0.9165, "step": 11055 }, { - "epoch": 0.3173009338286977, - "grad_norm": 0.365234375, - "learning_rate": 0.00017258624867929817, - "loss": 1.0069, + "epoch": 0.6345745596419761, + "grad_norm": 0.2490234375, + "learning_rate": 7.09056573936084e-05, + "loss": 0.9639, "step": 11060 }, { - "epoch": 0.3174443790971555, - "grad_norm": 0.375, - "learning_rate": 0.00017255179723788162, - "loss": 0.9969, + "epoch": 0.6348614378334959, + "grad_norm": 0.251953125, + "learning_rate": 7.080986401054721e-05, + "loss": 0.8885, "step": 11065 }, { - "epoch": 0.3175878243656133, - "grad_norm": 0.4140625, - "learning_rate": 0.00017251732760536833, - "loss": 0.9369, + "epoch": 0.6351483160250158, + "grad_norm": 0.283203125, + "learning_rate": 7.071409989941989e-05, + "loss": 0.9485, "step": 11070 }, { - "epoch": 0.3177312696340711, - "grad_norm": 0.390625, - "learning_rate": 0.00017248283979040102, - "loss": 0.9613, + "epoch": 0.6354351942165356, + "grad_norm": 0.26171875, + "learning_rate": 7.061836515625886e-05, + "loss": 0.9421, "step": 11075 }, { - "epoch": 0.31787471490252894, - "grad_norm": 0.400390625, - "learning_rate": 0.00017244833380162687, - "loss": 0.8647, + "epoch": 0.6357220724080556, + "grad_norm": 0.255859375, + "learning_rate": 7.052265987706708e-05, + "loss": 0.9441, "step": 11080 }, { - "epoch": 0.3180181601709868, - "grad_norm": 0.375, - "learning_rate": 0.00017241380964769766, - "loss": 0.9074, + "epoch": 0.6360089505995754, + "grad_norm": 0.25390625, + "learning_rate": 7.042698415781813e-05, + "loss": 0.9537, "step": 11085 }, { - "epoch": 0.3181616054394446, - "grad_norm": 0.43359375, - "learning_rate": 0.0001723792673372697, - "loss": 0.9616, + "epoch": 0.6362958287910953, + "grad_norm": 0.26953125, + "learning_rate": 7.033133809445577e-05, + "loss": 0.9135, "step": 11090 }, { - "epoch": 0.3183050507079024, - "grad_norm": 0.40234375, - "learning_rate": 0.00017234470687900387, - "loss": 0.9839, + "epoch": 0.6365827069826152, + "grad_norm": 0.271484375, + "learning_rate": 7.02357217828941e-05, + "loss": 0.9283, "step": 11095 }, { - "epoch": 0.3184484959763602, - "grad_norm": 0.4296875, - "learning_rate": 0.00017231012828156566, - "loss": 1.0021, + "epoch": 0.6368695851741351, + "grad_norm": 0.2578125, + "learning_rate": 7.014013531901733e-05, + "loss": 0.889, "step": 11100 }, { - "epoch": 0.31859194124481804, - "grad_norm": 0.380859375, - "learning_rate": 0.000172275531553625, - "loss": 0.8501, + "epoch": 0.6371564633656549, + "grad_norm": 0.28515625, + "learning_rate": 7.004457879867986e-05, + "loss": 0.9422, "step": 11105 }, { - "epoch": 0.3187353865132759, - "grad_norm": 0.3984375, - "learning_rate": 0.00017224091670385642, - "loss": 0.893, + "epoch": 0.6374433415571749, + "grad_norm": 0.2734375, + "learning_rate": 6.994905231770593e-05, + "loss": 0.9034, "step": 11110 }, { - "epoch": 0.3188788317817337, - "grad_norm": 0.380859375, - "learning_rate": 0.000172206283740939, - "loss": 0.9136, + "epoch": 0.6377302197486947, + "grad_norm": 0.26953125, + "learning_rate": 6.985355597188971e-05, + "loss": 0.9111, "step": 11115 }, { - "epoch": 0.3190222770501915, - "grad_norm": 0.376953125, - "learning_rate": 0.00017217163267355638, - "loss": 0.9232, + "epoch": 0.6380170979402146, + "grad_norm": 0.302734375, + "learning_rate": 6.975808985699518e-05, + "loss": 0.9939, "step": 11120 }, { - "epoch": 0.3191657223186493, - "grad_norm": 0.388671875, - "learning_rate": 0.00017213696351039666, - "loss": 0.9363, + "epoch": 0.6383039761317345, + "grad_norm": 0.2578125, + "learning_rate": 6.966265406875597e-05, + "loss": 0.9296, "step": 11125 }, { - "epoch": 0.31930916758710715, - "grad_norm": 0.392578125, - "learning_rate": 0.00017210227626015252, - "loss": 1.0095, + "epoch": 0.6385908543232544, + "grad_norm": 0.2470703125, + "learning_rate": 6.956724870287524e-05, + "loss": 0.9218, "step": 11130 }, { - "epoch": 0.319452612855565, - "grad_norm": 0.373046875, - "learning_rate": 0.0001720675709315212, - "loss": 0.9859, + "epoch": 0.6388777325147742, + "grad_norm": 0.251953125, + "learning_rate": 6.94718738550258e-05, + "loss": 0.9668, "step": 11135 }, { - "epoch": 0.31959605812402275, - "grad_norm": 0.376953125, - "learning_rate": 0.00017203284753320447, - "loss": 0.9172, + "epoch": 0.6391646107062942, + "grad_norm": 0.2890625, + "learning_rate": 6.93765296208497e-05, + "loss": 0.9111, "step": 11140 }, { - "epoch": 0.3197395033924806, - "grad_norm": 0.38671875, - "learning_rate": 0.00017199810607390864, - "loss": 0.9928, + "epoch": 0.639451488897814, + "grad_norm": 0.26953125, + "learning_rate": 6.928121609595835e-05, + "loss": 0.9802, "step": 11145 }, { - "epoch": 0.3198829486609384, - "grad_norm": 0.40625, - "learning_rate": 0.00017196334656234446, - "loss": 0.9318, + "epoch": 0.6397383670893338, + "grad_norm": 0.26171875, + "learning_rate": 6.918593337593238e-05, + "loss": 0.9536, "step": 11150 }, { - "epoch": 0.32002639392939625, - "grad_norm": 0.41015625, - "learning_rate": 0.00017192856900722733, - "loss": 0.855, + "epoch": 0.6400252452808537, + "grad_norm": 0.263671875, + "learning_rate": 6.909068155632153e-05, + "loss": 0.9412, "step": 11155 }, { - "epoch": 0.3201698391978541, - "grad_norm": 0.43359375, - "learning_rate": 0.00017189377341727708, - "loss": 0.9724, + "epoch": 0.6403121234723737, + "grad_norm": 0.26171875, + "learning_rate": 6.899546073264454e-05, + "loss": 0.9634, "step": 11160 }, { - "epoch": 0.32031328446631185, - "grad_norm": 0.435546875, - "learning_rate": 0.0001718589598012181, - "loss": 0.8973, + "epoch": 0.6405990016638935, + "grad_norm": 0.2734375, + "learning_rate": 6.890027100038901e-05, + "loss": 0.9113, "step": 11165 }, { - "epoch": 0.3204567297347697, - "grad_norm": 0.40234375, - "learning_rate": 0.00017182412816777931, - "loss": 0.968, + "epoch": 0.6408858798554133, + "grad_norm": 0.2734375, + "learning_rate": 6.880511245501149e-05, + "loss": 1.0384, "step": 11170 }, { - "epoch": 0.3206001750032275, - "grad_norm": 0.388671875, - "learning_rate": 0.00017178927852569412, - "loss": 0.9114, + "epoch": 0.6411727580469333, + "grad_norm": 0.26953125, + "learning_rate": 6.870998519193717e-05, + "loss": 0.9608, "step": 11175 }, { - "epoch": 0.32074362027168535, - "grad_norm": 0.396484375, - "learning_rate": 0.00017175441088370045, - "loss": 0.9202, + "epoch": 0.6414596362384531, + "grad_norm": 0.28125, + "learning_rate": 6.861488930655979e-05, + "loss": 0.9531, "step": 11180 }, { - "epoch": 0.3208870655401432, - "grad_norm": 0.392578125, - "learning_rate": 0.00017171952525054082, - "loss": 0.9674, + "epoch": 0.641746514429973, + "grad_norm": 0.271484375, + "learning_rate": 6.851982489424187e-05, + "loss": 0.9631, "step": 11185 }, { - "epoch": 0.32103051080860096, - "grad_norm": 0.37890625, - "learning_rate": 0.00017168462163496214, - "loss": 0.9346, + "epoch": 0.642033392621493, + "grad_norm": 0.25390625, + "learning_rate": 6.842479205031411e-05, + "loss": 0.9673, "step": 11190 }, { - "epoch": 0.3211739560770588, - "grad_norm": 0.4140625, - "learning_rate": 0.00017164970004571588, - "loss": 0.9454, + "epoch": 0.6423202708130128, + "grad_norm": 0.26171875, + "learning_rate": 6.832979087007565e-05, + "loss": 0.976, "step": 11195 }, { - "epoch": 0.3213174013455166, - "grad_norm": 0.3671875, - "learning_rate": 0.00017161476049155807, - "loss": 0.9141, + "epoch": 0.6426071490045326, + "grad_norm": 0.259765625, + "learning_rate": 6.823482144879398e-05, + "loss": 0.9439, "step": 11200 }, { - "epoch": 0.32146084661397445, - "grad_norm": 0.3984375, - "learning_rate": 0.0001715798029812491, - "loss": 1.0691, + "epoch": 0.6428940271960526, + "grad_norm": 0.2578125, + "learning_rate": 6.813988388170456e-05, + "loss": 0.9968, "step": 11205 }, { - "epoch": 0.3216042918824323, - "grad_norm": 0.361328125, - "learning_rate": 0.00017154482752355406, - "loss": 0.9652, + "epoch": 0.6431809053875724, + "grad_norm": 0.2734375, + "learning_rate": 6.804497826401105e-05, + "loss": 0.9747, "step": 11210 }, { - "epoch": 0.32174773715089006, - "grad_norm": 0.416015625, - "learning_rate": 0.00017150983412724235, - "loss": 0.8989, + "epoch": 0.6434677835790923, + "grad_norm": 0.2578125, + "learning_rate": 6.795010469088495e-05, + "loss": 0.9963, "step": 11215 }, { - "epoch": 0.3218911824193479, - "grad_norm": 0.6171875, - "learning_rate": 0.00017147482280108802, - "loss": 0.983, + "epoch": 0.6437546617706122, + "grad_norm": 0.283203125, + "learning_rate": 6.785526325746576e-05, + "loss": 0.9884, "step": 11220 }, { - "epoch": 0.3220346276878057, - "grad_norm": 0.376953125, - "learning_rate": 0.0001714397935538695, - "loss": 0.9629, + "epoch": 0.6440415399621321, + "grad_norm": 0.26953125, + "learning_rate": 6.776045405886066e-05, + "loss": 1.0205, "step": 11225 }, { - "epoch": 0.32217807295626355, - "grad_norm": 0.3984375, - "learning_rate": 0.00017140474639436981, - "loss": 1.0512, + "epoch": 0.6443284181536519, + "grad_norm": 0.275390625, + "learning_rate": 6.766567719014449e-05, + "loss": 0.9534, "step": 11230 }, { - "epoch": 0.3223215182247214, - "grad_norm": 0.3515625, - "learning_rate": 0.00017136968133137637, - "loss": 0.9344, + "epoch": 0.6446152963451719, + "grad_norm": 0.255859375, + "learning_rate": 6.75709327463598e-05, + "loss": 0.9204, "step": 11235 }, { - "epoch": 0.32246496349317916, - "grad_norm": 0.40234375, - "learning_rate": 0.0001713345983736811, - "loss": 0.9237, + "epoch": 0.6449021745366917, + "grad_norm": 0.25, + "learning_rate": 6.747622082251643e-05, + "loss": 0.8916, "step": 11240 }, { - "epoch": 0.322608408761637, - "grad_norm": 0.357421875, - "learning_rate": 0.00017129949753008056, - "loss": 0.9977, + "epoch": 0.6451890527282116, + "grad_norm": 0.26171875, + "learning_rate": 6.738154151359172e-05, + "loss": 0.9242, "step": 11245 }, { - "epoch": 0.3227518540300948, - "grad_norm": 0.421875, - "learning_rate": 0.00017126437880937557, - "loss": 1.0011, + "epoch": 0.6454759309197314, + "grad_norm": 0.283203125, + "learning_rate": 6.728689491453039e-05, + "loss": 0.9652, "step": 11250 }, { - "epoch": 0.32289529929855265, - "grad_norm": 0.396484375, - "learning_rate": 0.00017122924222037156, - "loss": 0.9362, + "epoch": 0.6457628091112514, + "grad_norm": 0.2431640625, + "learning_rate": 6.719228112024417e-05, + "loss": 0.9385, "step": 11255 }, { - "epoch": 0.3230387445670105, - "grad_norm": 0.384765625, - "learning_rate": 0.00017119408777187842, - "loss": 0.999, + "epoch": 0.6460496873027712, + "grad_norm": 0.271484375, + "learning_rate": 6.709770022561198e-05, + "loss": 0.9938, "step": 11260 }, { - "epoch": 0.32318218983546826, - "grad_norm": 0.419921875, - "learning_rate": 0.00017115891547271055, - "loss": 0.9379, + "epoch": 0.6463365654942911, + "grad_norm": 0.259765625, + "learning_rate": 6.700315232547981e-05, + "loss": 0.9901, "step": 11265 }, { - "epoch": 0.3233256351039261, - "grad_norm": 0.388671875, - "learning_rate": 0.00017112372533168672, - "loss": 1.0307, + "epoch": 0.646623443685811, + "grad_norm": 0.25390625, + "learning_rate": 6.690863751466048e-05, + "loss": 0.9271, "step": 11270 }, { - "epoch": 0.3234690803723839, - "grad_norm": 0.36328125, - "learning_rate": 0.0001710885173576303, - "loss": 0.9022, + "epoch": 0.6469103218773309, + "grad_norm": 0.279296875, + "learning_rate": 6.681415588793367e-05, + "loss": 0.9564, "step": 11275 }, { - "epoch": 0.32361252564084175, - "grad_norm": 0.396484375, - "learning_rate": 0.00017105329155936905, - "loss": 1.0646, + "epoch": 0.6471972000688507, + "grad_norm": 0.25390625, + "learning_rate": 6.67197075400457e-05, + "loss": 0.9223, "step": 11280 }, { - "epoch": 0.3237559709092996, - "grad_norm": 0.3984375, - "learning_rate": 0.00017101804794573524, - "loss": 0.9812, + "epoch": 0.6474840782603707, + "grad_norm": 0.263671875, + "learning_rate": 6.662529256570969e-05, + "loss": 0.9456, "step": 11285 }, { - "epoch": 0.32389941617775736, - "grad_norm": 0.400390625, - "learning_rate": 0.0001709827865255656, - "loss": 0.9459, + "epoch": 0.6477709564518905, + "grad_norm": 0.25390625, + "learning_rate": 6.653091105960512e-05, + "loss": 0.9252, "step": 11290 }, { - "epoch": 0.3240428614462152, - "grad_norm": 0.388671875, - "learning_rate": 0.00017094750730770123, - "loss": 1.011, + "epoch": 0.6480578346434104, + "grad_norm": 0.255859375, + "learning_rate": 6.643656311637796e-05, + "loss": 0.9373, "step": 11295 }, { - "epoch": 0.324186306714673, - "grad_norm": 0.345703125, - "learning_rate": 0.0001709122103009879, - "loss": 1.023, + "epoch": 0.6483447128349303, + "grad_norm": 0.28125, + "learning_rate": 6.634224883064059e-05, + "loss": 0.9379, "step": 11300 }, { - "epoch": 0.32432975198313085, - "grad_norm": 0.44140625, - "learning_rate": 0.00017087689551427567, - "loss": 0.991, + "epoch": 0.6486315910264502, + "grad_norm": 0.279296875, + "learning_rate": 6.624796829697158e-05, + "loss": 1.0462, "step": 11305 }, { - "epoch": 0.32447319725158863, - "grad_norm": 0.369140625, - "learning_rate": 0.00017084156295641906, - "loss": 1.0497, + "epoch": 0.64891846921797, + "grad_norm": 0.267578125, + "learning_rate": 6.615372160991561e-05, + "loss": 1.0275, "step": 11310 }, { - "epoch": 0.32461664252004646, - "grad_norm": 0.419921875, - "learning_rate": 0.00017080621263627715, - "loss": 0.9808, + "epoch": 0.64920534740949, + "grad_norm": 0.267578125, + "learning_rate": 6.605950886398353e-05, + "loss": 0.8971, "step": 11315 }, { - "epoch": 0.3247600877885043, - "grad_norm": 0.375, - "learning_rate": 0.0001707708445627134, - "loss": 0.8787, + "epoch": 0.6494922256010098, + "grad_norm": 0.26171875, + "learning_rate": 6.596533015365207e-05, + "loss": 0.887, "step": 11320 }, { - "epoch": 0.3249035330569621, - "grad_norm": 0.318359375, - "learning_rate": 0.00017073545874459573, - "loss": 0.8218, + "epoch": 0.6497791037925297, + "grad_norm": 0.26171875, + "learning_rate": 6.587118557336382e-05, + "loss": 0.915, "step": 11325 }, { - "epoch": 0.32504697832541996, - "grad_norm": 0.392578125, - "learning_rate": 0.00017070005519079652, - "loss": 1.0376, + "epoch": 0.6500659819840495, + "grad_norm": 0.263671875, + "learning_rate": 6.577707521752725e-05, + "loss": 0.9363, "step": 11330 }, { - "epoch": 0.32519042359387773, - "grad_norm": 0.44140625, - "learning_rate": 0.00017066463391019257, - "loss": 1.02, + "epoch": 0.6503528601755695, + "grad_norm": 0.27734375, + "learning_rate": 6.56829991805164e-05, + "loss": 0.9977, "step": 11335 }, { - "epoch": 0.32533386886233556, - "grad_norm": 0.396484375, - "learning_rate": 0.00017062919491166523, - "loss": 0.9623, + "epoch": 0.6506397383670893, + "grad_norm": 0.265625, + "learning_rate": 6.558895755667091e-05, + "loss": 0.9611, "step": 11340 }, { - "epoch": 0.3254773141307934, - "grad_norm": 0.37890625, - "learning_rate": 0.00017059373820410011, - "loss": 0.9954, + "epoch": 0.6509266165586092, + "grad_norm": 0.26953125, + "learning_rate": 6.549495044029592e-05, + "loss": 0.9674, "step": 11345 }, { - "epoch": 0.3256207593992512, - "grad_norm": 0.412109375, - "learning_rate": 0.00017055826379638742, - "loss": 0.9841, + "epoch": 0.6512134947501291, + "grad_norm": 0.275390625, + "learning_rate": 6.540097792566202e-05, + "loss": 0.9338, "step": 11350 }, { - "epoch": 0.32576420466770906, - "grad_norm": 0.39453125, - "learning_rate": 0.00017052277169742172, - "loss": 1.0521, + "epoch": 0.651500372941649, + "grad_norm": 0.26171875, + "learning_rate": 6.530704010700504e-05, + "loss": 0.9329, "step": 11355 }, { - "epoch": 0.32590764993616683, - "grad_norm": 0.380859375, - "learning_rate": 0.00017048726191610202, - "loss": 0.9025, + "epoch": 0.6517872511331688, + "grad_norm": 0.2734375, + "learning_rate": 6.521313707852601e-05, + "loss": 0.9615, "step": 11360 }, { - "epoch": 0.32605109520462466, - "grad_norm": 0.4140625, - "learning_rate": 0.00017045173446133185, - "loss": 0.9558, + "epoch": 0.6520741293246888, + "grad_norm": 0.267578125, + "learning_rate": 6.511926893439115e-05, + "loss": 0.9741, "step": 11365 }, { - "epoch": 0.3261945404730825, - "grad_norm": 0.40625, - "learning_rate": 0.00017041618934201904, - "loss": 0.9442, + "epoch": 0.6523610075162086, + "grad_norm": 0.248046875, + "learning_rate": 6.502543576873163e-05, + "loss": 0.929, "step": 11370 }, { - "epoch": 0.3263379857415403, - "grad_norm": 0.3515625, - "learning_rate": 0.0001703806265670759, - "loss": 1.0075, + "epoch": 0.6526478857077285, + "grad_norm": 0.279296875, + "learning_rate": 6.493163767564352e-05, + "loss": 0.9553, "step": 11375 }, { - "epoch": 0.32648143100999816, - "grad_norm": 0.4296875, - "learning_rate": 0.0001703450461454192, - "loss": 1.0172, + "epoch": 0.6529347638992484, + "grad_norm": 0.27734375, + "learning_rate": 6.483787474918779e-05, + "loss": 0.9487, "step": 11380 }, { - "epoch": 0.32662487627845593, - "grad_norm": 0.40625, - "learning_rate": 0.0001703094480859701, - "loss": 0.9646, + "epoch": 0.6532216420907683, + "grad_norm": 0.267578125, + "learning_rate": 6.474414708339013e-05, + "loss": 1.0413, "step": 11385 }, { - "epoch": 0.32676832154691376, - "grad_norm": 0.400390625, - "learning_rate": 0.00017027383239765422, - "loss": 0.9856, + "epoch": 0.6535085202822881, + "grad_norm": 0.2734375, + "learning_rate": 6.465045477224079e-05, + "loss": 0.962, "step": 11390 }, { - "epoch": 0.3269117668153716, - "grad_norm": 0.41015625, - "learning_rate": 0.00017023819908940156, - "loss": 0.8835, + "epoch": 0.6537953984738081, + "grad_norm": 0.259765625, + "learning_rate": 6.455679790969473e-05, + "loss": 0.9672, "step": 11395 }, { - "epoch": 0.3270552120838294, - "grad_norm": 0.3984375, - "learning_rate": 0.0001702025481701465, - "loss": 0.9814, + "epoch": 0.6540822766653279, + "grad_norm": 0.2734375, + "learning_rate": 6.446317658967119e-05, + "loss": 0.9116, "step": 11400 }, { - "epoch": 0.32719865735228726, - "grad_norm": 0.4375, - "learning_rate": 0.00017016687964882795, - "loss": 0.9685, + "epoch": 0.6543691548568478, + "grad_norm": 0.2578125, + "learning_rate": 6.436959090605383e-05, + "loss": 0.9604, "step": 11405 }, { - "epoch": 0.32734210262074503, - "grad_norm": 0.384765625, - "learning_rate": 0.00017013119353438913, - "loss": 1.0135, + "epoch": 0.6546560330483676, + "grad_norm": 0.279296875, + "learning_rate": 6.42760409526906e-05, + "loss": 0.9871, "step": 11410 }, { - "epoch": 0.32748554788920287, - "grad_norm": 0.3828125, - "learning_rate": 0.00017009548983577772, - "loss": 1.0293, + "epoch": 0.6549429112398876, + "grad_norm": 0.26171875, + "learning_rate": 6.418252682339361e-05, + "loss": 0.955, "step": 11415 }, { - "epoch": 0.3276289931576607, - "grad_norm": 0.359375, - "learning_rate": 0.00017005976856194582, - "loss": 1.0178, + "epoch": 0.6552297894314074, + "grad_norm": 0.2734375, + "learning_rate": 6.408904861193906e-05, + "loss": 0.9585, "step": 11420 }, { - "epoch": 0.32777243842611853, - "grad_norm": 0.390625, - "learning_rate": 0.00017002402972184988, - "loss": 0.9104, + "epoch": 0.6555166676229273, + "grad_norm": 0.255859375, + "learning_rate": 6.399560641206706e-05, + "loss": 0.9315, "step": 11425 }, { - "epoch": 0.32791588369457636, - "grad_norm": 0.408203125, - "learning_rate": 0.00016998827332445084, - "loss": 0.9516, + "epoch": 0.6558035458144472, + "grad_norm": 0.267578125, + "learning_rate": 6.39022003174817e-05, + "loss": 0.9814, "step": 11430 }, { - "epoch": 0.32805932896303414, - "grad_norm": 0.41796875, - "learning_rate": 0.0001699524993787139, - "loss": 0.8316, + "epoch": 0.6560904240059671, + "grad_norm": 0.25, + "learning_rate": 6.380883042185084e-05, + "loss": 0.9402, "step": 11435 }, { - "epoch": 0.32820277423149197, - "grad_norm": 0.396484375, - "learning_rate": 0.0001699167078936089, - "loss": 0.9212, + "epoch": 0.6563773021974869, + "grad_norm": 0.294921875, + "learning_rate": 6.371549681880593e-05, + "loss": 0.9459, "step": 11440 }, { - "epoch": 0.3283462194999498, - "grad_norm": 0.4296875, - "learning_rate": 0.0001698808988781098, - "loss": 0.9081, + "epoch": 0.6566641803890069, + "grad_norm": 0.259765625, + "learning_rate": 6.362219960194223e-05, + "loss": 0.8998, "step": 11445 }, { - "epoch": 0.32848966476840763, - "grad_norm": 0.388671875, - "learning_rate": 0.0001698450723411951, - "loss": 0.876, + "epoch": 0.6569510585805267, + "grad_norm": 0.267578125, + "learning_rate": 6.352893886481829e-05, + "loss": 0.9532, "step": 11450 }, { - "epoch": 0.32863311003686546, - "grad_norm": 0.37109375, - "learning_rate": 0.00016980922829184776, - "loss": 0.9665, + "epoch": 0.6572379367720466, + "grad_norm": 0.27734375, + "learning_rate": 6.343571470095625e-05, + "loss": 0.9078, "step": 11455 }, { - "epoch": 0.32877655530532324, - "grad_norm": 0.408203125, - "learning_rate": 0.00016977336673905497, - "loss": 0.9036, + "epoch": 0.6575248149635665, + "grad_norm": 0.279296875, + "learning_rate": 6.334252720384153e-05, + "loss": 0.9404, "step": 11460 }, { - "epoch": 0.32892000057378107, - "grad_norm": 0.40625, - "learning_rate": 0.00016973748769180845, - "loss": 0.9879, + "epoch": 0.6578116931550864, + "grad_norm": 0.25, + "learning_rate": 6.32493764669227e-05, + "loss": 0.9324, "step": 11465 }, { - "epoch": 0.3290634458422389, - "grad_norm": 0.54296875, - "learning_rate": 0.00016970159115910417, - "loss": 0.9567, + "epoch": 0.6580985713466062, + "grad_norm": 0.26171875, + "learning_rate": 6.315626258361158e-05, + "loss": 1.0012, "step": 11470 }, { - "epoch": 0.32920689111069673, - "grad_norm": 0.390625, - "learning_rate": 0.00016966567714994261, - "loss": 0.9223, + "epoch": 0.6583854495381262, + "grad_norm": 0.283203125, + "learning_rate": 6.306318564728294e-05, + "loss": 0.9721, "step": 11475 }, { - "epoch": 0.3293503363791545, - "grad_norm": 0.392578125, - "learning_rate": 0.00016962974567332858, - "loss": 0.9411, + "epoch": 0.658672327729646, + "grad_norm": 0.251953125, + "learning_rate": 6.297014575127455e-05, + "loss": 0.9619, "step": 11480 }, { - "epoch": 0.32949378164761234, - "grad_norm": 0.38671875, - "learning_rate": 0.00016959379673827125, - "loss": 0.9205, + "epoch": 0.6589592059211659, + "grad_norm": 0.275390625, + "learning_rate": 6.287714298888709e-05, + "loss": 1.0384, "step": 11485 }, { - "epoch": 0.32963722691607017, - "grad_norm": 0.404296875, - "learning_rate": 0.00016955783035378424, - "loss": 0.9433, + "epoch": 0.6592460841126857, + "grad_norm": 0.2578125, + "learning_rate": 6.27841774533838e-05, + "loss": 0.9609, "step": 11490 }, { - "epoch": 0.329780672184528, - "grad_norm": 0.419921875, - "learning_rate": 0.0001695218465288854, - "loss": 0.9961, + "epoch": 0.6595329623042057, + "grad_norm": 0.259765625, + "learning_rate": 6.26912492379909e-05, + "loss": 0.9154, "step": 11495 }, { - "epoch": 0.32992411745298583, - "grad_norm": 0.38671875, - "learning_rate": 0.00016948584527259715, - "loss": 0.9832, + "epoch": 0.6598198404957255, + "grad_norm": 0.27734375, + "learning_rate": 6.259835843589688e-05, + "loss": 1.0205, "step": 11500 }, { - "epoch": 0.3300675627214436, - "grad_norm": 0.408203125, - "learning_rate": 0.00016944982659394608, - "loss": 0.9479, + "epoch": 0.6601067186872454, + "grad_norm": 0.2578125, + "learning_rate": 6.250550514025287e-05, + "loss": 1.014, "step": 11505 }, { - "epoch": 0.33021100798990144, - "grad_norm": 0.427734375, - "learning_rate": 0.0001694137905019633, - "loss": 0.9385, + "epoch": 0.6603935968787653, + "grad_norm": 0.271484375, + "learning_rate": 6.24126894441724e-05, + "loss": 0.9599, "step": 11510 }, { - "epoch": 0.33035445325835927, - "grad_norm": 0.3828125, - "learning_rate": 0.00016937773700568425, - "loss": 1.0358, + "epoch": 0.6606804750702852, + "grad_norm": 0.2578125, + "learning_rate": 6.231991144073126e-05, + "loss": 0.9182, "step": 11515 }, { - "epoch": 0.3304978985268171, - "grad_norm": 0.38671875, - "learning_rate": 0.00016934166611414867, - "loss": 0.9357, + "epoch": 0.660967353261805, + "grad_norm": 0.263671875, + "learning_rate": 6.222717122296739e-05, + "loss": 0.9562, "step": 11520 }, { - "epoch": 0.33064134379527493, - "grad_norm": 0.421875, - "learning_rate": 0.0001693055778364007, - "loss": 0.9981, + "epoch": 0.661254231453325, + "grad_norm": 0.271484375, + "learning_rate": 6.213446888388093e-05, + "loss": 0.9325, "step": 11525 }, { - "epoch": 0.3307847890637327, - "grad_norm": 0.470703125, - "learning_rate": 0.0001692694721814889, - "loss": 0.9511, + "epoch": 0.6615411096448448, + "grad_norm": 0.244140625, + "learning_rate": 6.204180451643399e-05, + "loss": 0.8369, "step": 11530 }, { - "epoch": 0.33092823433219054, - "grad_norm": 0.375, - "learning_rate": 0.00016923334915846608, - "loss": 0.9067, + "epoch": 0.6618279878363647, + "grad_norm": 0.263671875, + "learning_rate": 6.194917821355062e-05, + "loss": 0.9454, "step": 11535 }, { - "epoch": 0.33107167960064837, - "grad_norm": 0.376953125, - "learning_rate": 0.0001691972087763895, - "loss": 0.981, + "epoch": 0.6621148660278846, + "grad_norm": 0.2734375, + "learning_rate": 6.18565900681166e-05, + "loss": 0.9957, "step": 11540 }, { - "epoch": 0.3312151248691062, - "grad_norm": 0.4921875, - "learning_rate": 0.0001691610510443207, - "loss": 1.0467, + "epoch": 0.6624017442194045, + "grad_norm": 0.287109375, + "learning_rate": 6.176404017297965e-05, + "loss": 1.0129, "step": 11545 }, { - "epoch": 0.33135857013756403, - "grad_norm": 0.388671875, - "learning_rate": 0.0001691248759713256, - "loss": 0.9739, + "epoch": 0.6626886224109243, + "grad_norm": 0.25390625, + "learning_rate": 6.167152862094893e-05, + "loss": 0.903, "step": 11550 }, { - "epoch": 0.3315020154060218, - "grad_norm": 0.408203125, - "learning_rate": 0.0001690886835664745, - "loss": 0.8991, + "epoch": 0.6629755006024443, + "grad_norm": 0.296875, + "learning_rate": 6.157905550479525e-05, + "loss": 0.9487, "step": 11555 }, { - "epoch": 0.33164546067447964, - "grad_norm": 0.37109375, - "learning_rate": 0.00016905247383884196, - "loss": 1.0374, + "epoch": 0.6632623787939641, + "grad_norm": 0.255859375, + "learning_rate": 6.148662091725087e-05, + "loss": 0.928, "step": 11560 }, { - "epoch": 0.3317889059429375, - "grad_norm": 0.412109375, - "learning_rate": 0.000169016246797507, - "loss": 0.8902, + "epoch": 0.663549256985484, + "grad_norm": 0.27734375, + "learning_rate": 6.139422495100939e-05, + "loss": 1.0175, "step": 11565 }, { - "epoch": 0.3319323512113953, - "grad_norm": 0.447265625, - "learning_rate": 0.00016898000245155282, - "loss": 1.0061, + "epoch": 0.6638361351770038, + "grad_norm": 0.25390625, + "learning_rate": 6.13018676987257e-05, + "loss": 0.9285, "step": 11570 }, { - "epoch": 0.33207579647985314, - "grad_norm": 0.408203125, - "learning_rate": 0.0001689437408100672, - "loss": 0.8584, + "epoch": 0.6641230133685238, + "grad_norm": 0.267578125, + "learning_rate": 6.120954925301587e-05, + "loss": 0.9314, "step": 11575 }, { - "epoch": 0.3322192417483109, - "grad_norm": 0.376953125, - "learning_rate": 0.000168907461882142, - "loss": 1.0205, + "epoch": 0.6644098915600436, + "grad_norm": 0.263671875, + "learning_rate": 6.111726970645703e-05, + "loss": 0.9802, "step": 11580 }, { - "epoch": 0.33236268701676874, - "grad_norm": 0.357421875, - "learning_rate": 0.00016887116567687358, - "loss": 0.9883, + "epoch": 0.6646967697515634, + "grad_norm": 0.26953125, + "learning_rate": 6.102502915158733e-05, + "loss": 0.9253, "step": 11585 }, { - "epoch": 0.3325061322852266, - "grad_norm": 0.451171875, - "learning_rate": 0.00016883485220336257, - "loss": 0.862, + "epoch": 0.6649836479430834, + "grad_norm": 0.275390625, + "learning_rate": 6.093282768090574e-05, + "loss": 0.8891, "step": 11590 }, { - "epoch": 0.3326495775536844, - "grad_norm": 0.390625, - "learning_rate": 0.00016879852147071392, - "loss": 0.8866, + "epoch": 0.6652705261346032, + "grad_norm": 0.29296875, + "learning_rate": 6.084066538687222e-05, + "loss": 0.9175, "step": 11595 }, { - "epoch": 0.33279302282214224, - "grad_norm": 0.416015625, - "learning_rate": 0.00016876217348803693, - "loss": 0.9061, + "epoch": 0.6655574043261231, + "grad_norm": 0.255859375, + "learning_rate": 6.074854236190723e-05, + "loss": 0.9622, "step": 11600 }, { - "epoch": 0.3329364680906, - "grad_norm": 0.40234375, - "learning_rate": 0.00016872580826444525, - "loss": 0.9254, + "epoch": 0.665844282517643, + "grad_norm": 0.279296875, + "learning_rate": 6.065645869839196e-05, + "loss": 0.9369, "step": 11605 }, { - "epoch": 0.33307991335905784, - "grad_norm": 0.419921875, - "learning_rate": 0.00016868942580905677, - "loss": 0.9404, + "epoch": 0.6661311607091629, + "grad_norm": 0.27734375, + "learning_rate": 6.0564414488668165e-05, + "loss": 0.9495, "step": 11610 }, { - "epoch": 0.3332233586275157, - "grad_norm": 0.419921875, - "learning_rate": 0.00016865302613099385, - "loss": 0.8985, + "epoch": 0.6664180389006827, + "grad_norm": 0.275390625, + "learning_rate": 6.0472409825037926e-05, + "loss": 1.0065, "step": 11615 }, { - "epoch": 0.3333668038959735, - "grad_norm": 0.4140625, - "learning_rate": 0.000168616609239383, - "loss": 1.0259, + "epoch": 0.6667049170922027, + "grad_norm": 0.265625, + "learning_rate": 6.038044479976375e-05, + "loss": 0.9119, "step": 11620 }, { - "epoch": 0.33351024916443134, - "grad_norm": 0.443359375, - "learning_rate": 0.00016858017514335513, - "loss": 1.0272, + "epoch": 0.6669917952837225, + "grad_norm": 0.275390625, + "learning_rate": 6.0288519505068375e-05, + "loss": 0.9656, "step": 11625 }, { - "epoch": 0.3336536944328891, - "grad_norm": 0.388671875, - "learning_rate": 0.0001685437238520455, - "loss": 0.996, + "epoch": 0.6672786734752424, + "grad_norm": 0.271484375, + "learning_rate": 6.01966340331347e-05, + "loss": 0.9607, "step": 11630 }, { - "epoch": 0.33379713970134695, - "grad_norm": 0.41015625, - "learning_rate": 0.00016850725537459352, - "loss": 1.0059, + "epoch": 0.6675655516667623, + "grad_norm": 0.263671875, + "learning_rate": 6.010478847610565e-05, + "loss": 0.988, "step": 11635 }, { - "epoch": 0.3339405849698048, - "grad_norm": 0.431640625, - "learning_rate": 0.00016847076972014316, - "loss": 1.0323, + "epoch": 0.6678524298582822, + "grad_norm": 0.25390625, + "learning_rate": 6.0012982926084195e-05, + "loss": 0.9958, "step": 11640 }, { - "epoch": 0.3340840302382626, - "grad_norm": 0.37109375, - "learning_rate": 0.00016843426689784247, - "loss": 0.9072, + "epoch": 0.668139308049802, + "grad_norm": 0.275390625, + "learning_rate": 5.992121747513315e-05, + "loss": 0.9578, "step": 11645 }, { - "epoch": 0.3342274755067204, - "grad_norm": 0.431640625, - "learning_rate": 0.00016839774691684395, - "loss": 1.0607, + "epoch": 0.6684261862413219, + "grad_norm": 0.26171875, + "learning_rate": 5.982949221527506e-05, + "loss": 0.9389, "step": 11650 }, { - "epoch": 0.3343709207751782, - "grad_norm": 0.396484375, - "learning_rate": 0.00016836120978630433, - "loss": 0.9796, + "epoch": 0.6687130644328418, + "grad_norm": 0.275390625, + "learning_rate": 5.973780723849225e-05, + "loss": 0.9213, "step": 11655 }, { - "epoch": 0.33451436604363605, - "grad_norm": 0.40234375, - "learning_rate": 0.00016832465551538465, - "loss": 0.933, + "epoch": 0.6689999426243617, + "grad_norm": 0.26953125, + "learning_rate": 5.9646162636726634e-05, + "loss": 0.8939, "step": 11660 }, { - "epoch": 0.3346578113120939, - "grad_norm": 0.3828125, - "learning_rate": 0.00016828808411325025, - "loss": 0.9729, + "epoch": 0.6692868208158815, + "grad_norm": 0.265625, + "learning_rate": 5.955455850187962e-05, + "loss": 0.912, "step": 11665 }, { - "epoch": 0.3348012565805517, - "grad_norm": 0.380859375, - "learning_rate": 0.00016825149558907074, - "loss": 0.9718, + "epoch": 0.6695736990074015, + "grad_norm": 0.279296875, + "learning_rate": 5.946299492581201e-05, + "loss": 0.964, "step": 11670 }, { - "epoch": 0.3349447018490095, - "grad_norm": 0.3828125, - "learning_rate": 0.00016821488995202012, - "loss": 0.9894, + "epoch": 0.6698605771989213, + "grad_norm": 0.259765625, + "learning_rate": 5.9371472000344006e-05, + "loss": 0.9815, "step": 11675 }, { - "epoch": 0.3350881471174673, - "grad_norm": 0.37890625, - "learning_rate": 0.0001681782672112766, - "loss": 0.9405, + "epoch": 0.6701474553904412, + "grad_norm": 0.2578125, + "learning_rate": 5.9279989817255e-05, + "loss": 1.0095, "step": 11680 }, { - "epoch": 0.33523159238592515, - "grad_norm": 0.419921875, - "learning_rate": 0.00016814162737602268, - "loss": 1.0205, + "epoch": 0.6704343335819611, + "grad_norm": 0.26171875, + "learning_rate": 5.9188548468283475e-05, + "loss": 0.8657, "step": 11685 }, { - "epoch": 0.335375037654383, - "grad_norm": 0.40625, - "learning_rate": 0.00016810497045544515, - "loss": 0.9886, + "epoch": 0.670721211773481, + "grad_norm": 0.2734375, + "learning_rate": 5.9097148045127095e-05, + "loss": 0.941, "step": 11690 }, { - "epoch": 0.3355184829228408, - "grad_norm": 0.40625, - "learning_rate": 0.0001680682964587351, - "loss": 0.9323, + "epoch": 0.6710080899650008, + "grad_norm": 0.279296875, + "learning_rate": 5.9005788639442394e-05, + "loss": 0.931, "step": 11695 }, { - "epoch": 0.3356619281912986, - "grad_norm": 0.369140625, - "learning_rate": 0.0001680316053950879, - "loss": 0.9858, + "epoch": 0.6712949681565208, + "grad_norm": 0.2734375, + "learning_rate": 5.8914470342844694e-05, + "loss": 0.897, "step": 11700 }, { - "epoch": 0.3358053734597564, - "grad_norm": 0.40625, - "learning_rate": 0.00016799489727370318, - "loss": 0.9363, + "epoch": 0.6715818463480406, + "grad_norm": 0.263671875, + "learning_rate": 5.8823193246908346e-05, + "loss": 0.9791, "step": 11705 }, { - "epoch": 0.33594881872821425, - "grad_norm": 0.390625, - "learning_rate": 0.00016795817210378487, - "loss": 1.0028, + "epoch": 0.6718687245395605, + "grad_norm": 0.2734375, + "learning_rate": 5.873195744316611e-05, + "loss": 0.9706, "step": 11710 }, { - "epoch": 0.3360922639966721, - "grad_norm": 0.47265625, - "learning_rate": 0.0001679214298945412, - "loss": 0.9521, + "epoch": 0.6721556027310804, + "grad_norm": 0.294921875, + "learning_rate": 5.86407630231095e-05, + "loss": 0.9738, "step": 11715 }, { - "epoch": 0.3362357092651299, - "grad_norm": 0.376953125, - "learning_rate": 0.0001678846706551846, - "loss": 0.9271, + "epoch": 0.6724424809226003, + "grad_norm": 0.255859375, + "learning_rate": 5.8549610078188446e-05, + "loss": 0.9373, "step": 11720 }, { - "epoch": 0.3363791545335877, - "grad_norm": 0.45703125, - "learning_rate": 0.00016784789439493175, - "loss": 0.9232, + "epoch": 0.6727293591141201, + "grad_norm": 0.279296875, + "learning_rate": 5.845849869981137e-05, + "loss": 0.9755, "step": 11725 }, { - "epoch": 0.3365225998020455, - "grad_norm": 0.3828125, - "learning_rate": 0.00016781110112300377, - "loss": 0.9242, + "epoch": 0.67301623730564, + "grad_norm": 0.263671875, + "learning_rate": 5.836742897934497e-05, + "loss": 0.8923, "step": 11730 }, { - "epoch": 0.33666604507050335, - "grad_norm": 0.478515625, - "learning_rate": 0.00016777429084862587, - "loss": 1.0908, + "epoch": 0.6733031154971599, + "grad_norm": 0.251953125, + "learning_rate": 5.827640100811409e-05, + "loss": 0.9374, "step": 11735 }, { - "epoch": 0.3368094903389612, - "grad_norm": 0.36328125, - "learning_rate": 0.0001677374635810276, - "loss": 0.9565, + "epoch": 0.6735899936886798, + "grad_norm": 0.26171875, + "learning_rate": 5.8185414877401876e-05, + "loss": 0.9589, "step": 11740 }, { - "epoch": 0.336952935607419, - "grad_norm": 0.380859375, - "learning_rate": 0.00016770061932944272, - "loss": 1.0778, + "epoch": 0.6738768718801996, + "grad_norm": 0.251953125, + "learning_rate": 5.80944706784494e-05, + "loss": 0.9999, "step": 11745 }, { - "epoch": 0.3370963808758768, - "grad_norm": 0.36328125, - "learning_rate": 0.00016766375810310934, - "loss": 0.9262, + "epoch": 0.6741637500717196, + "grad_norm": 0.279296875, + "learning_rate": 5.8003568502455676e-05, + "loss": 0.9712, "step": 11750 }, { - "epoch": 0.3372398261443346, - "grad_norm": 0.390625, - "learning_rate": 0.0001676268799112697, - "loss": 0.8949, + "epoch": 0.6744506282632394, + "grad_norm": 0.265625, + "learning_rate": 5.7912708440577635e-05, + "loss": 0.9159, "step": 11755 }, { - "epoch": 0.33738327141279245, - "grad_norm": 0.373046875, - "learning_rate": 0.00016758998476317042, - "loss": 0.8936, + "epoch": 0.6747375064547593, + "grad_norm": 0.283203125, + "learning_rate": 5.782189058392995e-05, + "loss": 1.0858, "step": 11760 }, { - "epoch": 0.3375267166812503, - "grad_norm": 0.376953125, - "learning_rate": 0.0001675530726680623, - "loss": 0.9638, + "epoch": 0.6750243846462792, + "grad_norm": 0.27734375, + "learning_rate": 5.773111502358492e-05, + "loss": 0.9238, "step": 11765 }, { - "epoch": 0.3376701619497081, - "grad_norm": 0.474609375, - "learning_rate": 0.0001675161436352004, - "loss": 0.986, + "epoch": 0.6753112628377991, + "grad_norm": 0.259765625, + "learning_rate": 5.764038185057259e-05, + "loss": 0.8468, "step": 11770 }, { - "epoch": 0.3378136072181659, - "grad_norm": 0.423828125, - "learning_rate": 0.00016747919767384404, - "loss": 1.0252, + "epoch": 0.6755981410293189, + "grad_norm": 0.25390625, + "learning_rate": 5.754969115588034e-05, + "loss": 0.9542, "step": 11775 }, { - "epoch": 0.3379570524866237, - "grad_norm": 0.36328125, - "learning_rate": 0.0001674422347932567, - "loss": 0.9201, + "epoch": 0.6758850192208389, + "grad_norm": 0.265625, + "learning_rate": 5.7459043030452966e-05, + "loss": 0.9437, "step": 11780 }, { - "epoch": 0.33810049775508155, - "grad_norm": 0.369140625, - "learning_rate": 0.00016740525500270632, - "loss": 0.9041, + "epoch": 0.6761718974123587, + "grad_norm": 0.2578125, + "learning_rate": 5.736843756519259e-05, + "loss": 1.0084, "step": 11785 }, { - "epoch": 0.3382439430235394, - "grad_norm": 0.427734375, - "learning_rate": 0.00016736825831146482, - "loss": 0.9659, + "epoch": 0.6764587756038786, + "grad_norm": 0.275390625, + "learning_rate": 5.727787485095866e-05, + "loss": 1.0258, "step": 11790 }, { - "epoch": 0.3383873882919972, - "grad_norm": 0.37890625, - "learning_rate": 0.0001673312447288085, - "loss": 0.8396, + "epoch": 0.6767456537953985, + "grad_norm": 0.2578125, + "learning_rate": 5.718735497856762e-05, + "loss": 0.9685, "step": 11795 }, { - "epoch": 0.338530833560455, - "grad_norm": 0.41015625, - "learning_rate": 0.00016729421426401787, - "loss": 0.961, + "epoch": 0.6770325319869184, + "grad_norm": 0.27734375, + "learning_rate": 5.709687803879301e-05, + "loss": 0.9179, "step": 11800 }, { - "epoch": 0.3386742788289128, - "grad_norm": 0.35546875, - "learning_rate": 0.00016725716692637775, - "loss": 0.9192, + "epoch": 0.6773194101784382, + "grad_norm": 0.2734375, + "learning_rate": 5.700644412236531e-05, + "loss": 0.871, "step": 11805 }, { - "epoch": 0.33881772409737065, - "grad_norm": 0.41015625, - "learning_rate": 0.000167220102725177, - "loss": 1.0073, + "epoch": 0.6776062883699581, + "grad_norm": 0.2734375, + "learning_rate": 5.691605331997185e-05, + "loss": 1.0171, "step": 11810 }, { - "epoch": 0.3389611693658285, - "grad_norm": 0.37109375, - "learning_rate": 0.00016718302166970886, - "loss": 0.9385, + "epoch": 0.677893166561478, + "grad_norm": 0.263671875, + "learning_rate": 5.682570572225671e-05, + "loss": 0.9522, "step": 11815 }, { - "epoch": 0.33910461463428626, - "grad_norm": 0.3828125, - "learning_rate": 0.0001671459237692708, - "loss": 0.8465, + "epoch": 0.6781800447529979, + "grad_norm": 0.283203125, + "learning_rate": 5.67354014198207e-05, + "loss": 0.9242, "step": 11820 }, { - "epoch": 0.3392480599027441, - "grad_norm": 0.36328125, - "learning_rate": 0.00016710880903316442, - "loss": 1.0373, + "epoch": 0.6784669229445177, + "grad_norm": 0.265625, + "learning_rate": 5.664514050322122e-05, + "loss": 0.9277, "step": 11825 }, { - "epoch": 0.3393915051712019, - "grad_norm": 0.3984375, - "learning_rate": 0.00016707167747069562, - "loss": 1.0332, + "epoch": 0.6787538011360377, + "grad_norm": 0.24609375, + "learning_rate": 5.6554923062971966e-05, + "loss": 1.019, "step": 11830 }, { - "epoch": 0.33953495043965976, - "grad_norm": 0.3671875, - "learning_rate": 0.00016703452909117445, - "loss": 0.9031, + "epoch": 0.6790406793275575, + "grad_norm": 0.26171875, + "learning_rate": 5.646474918954334e-05, + "loss": 0.9273, "step": 11835 }, { - "epoch": 0.3396783957081176, - "grad_norm": 0.37109375, - "learning_rate": 0.0001669973639039153, - "loss": 0.9756, + "epoch": 0.6793275575190774, + "grad_norm": 0.265625, + "learning_rate": 5.637461897336185e-05, + "loss": 0.9329, "step": 11840 }, { - "epoch": 0.33982184097657536, - "grad_norm": 0.43359375, - "learning_rate": 0.0001669601819182366, - "loss": 1.0768, + "epoch": 0.6796144357105973, + "grad_norm": 0.267578125, + "learning_rate": 5.628453250481026e-05, + "loss": 0.9303, "step": 11845 }, { - "epoch": 0.3399652862450332, - "grad_norm": 0.404296875, - "learning_rate": 0.0001669229831434611, - "loss": 0.9782, + "epoch": 0.6799013139021172, + "grad_norm": 0.26953125, + "learning_rate": 5.6194489874227504e-05, + "loss": 0.8853, "step": 11850 }, { - "epoch": 0.340108731513491, - "grad_norm": 0.40234375, - "learning_rate": 0.00016688576758891582, - "loss": 1.0752, + "epoch": 0.680188192093637, + "grad_norm": 0.26171875, + "learning_rate": 5.610449117190855e-05, + "loss": 0.9082, "step": 11855 }, { - "epoch": 0.34025217678194886, - "grad_norm": 0.4140625, - "learning_rate": 0.00016684853526393185, - "loss": 0.9808, + "epoch": 0.680475070285157, + "grad_norm": 0.279296875, + "learning_rate": 5.601453648810426e-05, + "loss": 0.8877, "step": 11860 }, { - "epoch": 0.3403956220504067, - "grad_norm": 0.416015625, - "learning_rate": 0.00016681128617784455, - "loss": 0.9491, + "epoch": 0.6807619484766768, + "grad_norm": 0.2578125, + "learning_rate": 5.5924625913021386e-05, + "loss": 0.9318, "step": 11865 }, { - "epoch": 0.34053906731886446, - "grad_norm": 0.400390625, - "learning_rate": 0.00016677402033999346, - "loss": 0.9634, + "epoch": 0.6810488266681967, + "grad_norm": 0.259765625, + "learning_rate": 5.583475953682251e-05, + "loss": 0.9631, "step": 11870 }, { - "epoch": 0.3406825125873223, - "grad_norm": 0.388671875, - "learning_rate": 0.00016673673775972238, - "loss": 0.9321, + "epoch": 0.6813357048597166, + "grad_norm": 0.26953125, + "learning_rate": 5.5744937449625854e-05, + "loss": 0.9611, "step": 11875 }, { - "epoch": 0.3408259578557801, - "grad_norm": 0.365234375, - "learning_rate": 0.00016669943844637924, - "loss": 1.0107, + "epoch": 0.6816225830512365, + "grad_norm": 0.28515625, + "learning_rate": 5.565515974150508e-05, + "loss": 0.9942, "step": 11880 }, { - "epoch": 0.34096940312423796, - "grad_norm": 0.392578125, - "learning_rate": 0.0001666621224093162, - "loss": 1.0728, + "epoch": 0.6819094612427563, + "grad_norm": 0.28125, + "learning_rate": 5.556542650248959e-05, + "loss": 0.9043, "step": 11885 }, { - "epoch": 0.3411128483926958, - "grad_norm": 0.4140625, - "learning_rate": 0.00016662478965788962, - "loss": 1.0301, + "epoch": 0.6821963394342762, + "grad_norm": 0.24609375, + "learning_rate": 5.547573782256403e-05, + "loss": 0.9597, "step": 11890 }, { - "epoch": 0.34125629366115356, - "grad_norm": 0.353515625, - "learning_rate": 0.00016658744020146, - "loss": 0.8861, + "epoch": 0.6824832176257961, + "grad_norm": 0.26953125, + "learning_rate": 5.538609379166845e-05, + "loss": 0.9716, "step": 11895 }, { - "epoch": 0.3413997389296114, - "grad_norm": 0.416015625, - "learning_rate": 0.00016655007404939212, - "loss": 0.8831, + "epoch": 0.682770095817316, + "grad_norm": 0.25390625, + "learning_rate": 5.529649449969804e-05, + "loss": 0.907, "step": 11900 }, { - "epoch": 0.3415431841980692, - "grad_norm": 0.4140625, - "learning_rate": 0.0001665126912110548, - "loss": 0.9582, + "epoch": 0.6830569740088358, + "grad_norm": 0.259765625, + "learning_rate": 5.5206940036503194e-05, + "loss": 0.9749, "step": 11905 }, { - "epoch": 0.34168662946652706, - "grad_norm": 0.80078125, - "learning_rate": 0.00016647529169582122, - "loss": 0.9577, + "epoch": 0.6833438522003558, + "grad_norm": 0.279296875, + "learning_rate": 5.511743049188931e-05, + "loss": 0.9953, "step": 11910 }, { - "epoch": 0.3418300747349849, - "grad_norm": 0.3828125, - "learning_rate": 0.0001664378755130686, - "loss": 0.9447, + "epoch": 0.6836307303918756, + "grad_norm": 0.298828125, + "learning_rate": 5.5027965955616743e-05, + "loss": 0.9719, "step": 11915 }, { - "epoch": 0.34197352000344267, - "grad_norm": 0.390625, - "learning_rate": 0.00016640044267217846, - "loss": 0.9093, + "epoch": 0.6839176085833955, + "grad_norm": 0.2431640625, + "learning_rate": 5.49385465174008e-05, + "loss": 0.9471, "step": 11920 }, { - "epoch": 0.3421169652719005, - "grad_norm": 0.400390625, - "learning_rate": 0.00016636299318253635, - "loss": 1.0034, + "epoch": 0.6842044867749154, + "grad_norm": 0.25390625, + "learning_rate": 5.48491722669115e-05, + "loss": 1.0521, "step": 11925 }, { - "epoch": 0.34226041054035833, - "grad_norm": 0.390625, - "learning_rate": 0.00016632552705353213, - "loss": 1.036, + "epoch": 0.6844913649664353, + "grad_norm": 0.267578125, + "learning_rate": 5.47598432937734e-05, + "loss": 0.9806, "step": 11930 }, { - "epoch": 0.34240385580881616, - "grad_norm": 0.376953125, - "learning_rate": 0.00016628804429455975, - "loss": 0.9653, + "epoch": 0.6847782431579551, + "grad_norm": 0.2578125, + "learning_rate": 5.467055968756595e-05, + "loss": 0.9911, "step": 11935 }, { - "epoch": 0.342547301077274, - "grad_norm": 0.40234375, - "learning_rate": 0.00016625054491501738, - "loss": 0.9985, + "epoch": 0.6850651213494751, + "grad_norm": 0.251953125, + "learning_rate": 5.4581321537822875e-05, + "loss": 0.8815, "step": 11940 }, { - "epoch": 0.34269074634573177, - "grad_norm": 0.40234375, - "learning_rate": 0.00016621302892430736, - "loss": 1.0655, + "epoch": 0.6853519995409949, + "grad_norm": 0.259765625, + "learning_rate": 5.4492128934032416e-05, + "loss": 1.0047, "step": 11945 }, { - "epoch": 0.3428341916141896, - "grad_norm": 0.42578125, - "learning_rate": 0.00016617549633183608, - "loss": 1.0351, + "epoch": 0.6856388777325148, + "grad_norm": 0.296875, + "learning_rate": 5.440298196563711e-05, + "loss": 0.9995, "step": 11950 }, { - "epoch": 0.34297763688264743, - "grad_norm": 0.37109375, - "learning_rate": 0.00016613794714701426, - "loss": 0.9062, + "epoch": 0.6859257559240347, + "grad_norm": 0.259765625, + "learning_rate": 5.431388072203373e-05, + "loss": 0.954, "step": 11955 }, { - "epoch": 0.34312108215110526, - "grad_norm": 0.392578125, - "learning_rate": 0.00016610038137925668, - "loss": 0.9159, + "epoch": 0.6862126341155546, + "grad_norm": 0.283203125, + "learning_rate": 5.4224825292573154e-05, + "loss": 0.9492, "step": 11960 }, { - "epoch": 0.3432645274195631, - "grad_norm": 0.384765625, - "learning_rate": 0.0001660627990379823, - "loss": 0.9035, + "epoch": 0.6864995123070744, + "grad_norm": 0.267578125, + "learning_rate": 5.4135815766560486e-05, + "loss": 1.0314, "step": 11965 }, { - "epoch": 0.34340797268802087, - "grad_norm": 0.3515625, - "learning_rate": 0.00016602520013261424, - "loss": 0.993, + "epoch": 0.6867863904985942, + "grad_norm": 0.2578125, + "learning_rate": 5.40468522332546e-05, + "loss": 0.9593, "step": 11970 }, { - "epoch": 0.3435514179564787, - "grad_norm": 0.4296875, - "learning_rate": 0.00016598758467257975, - "loss": 1.0254, + "epoch": 0.6870732686901142, + "grad_norm": 0.25390625, + "learning_rate": 5.395793478186838e-05, + "loss": 0.9984, "step": 11975 }, { - "epoch": 0.34369486322493653, - "grad_norm": 0.392578125, - "learning_rate": 0.0001659499526673103, - "loss": 0.9278, + "epoch": 0.687360146881634, + "grad_norm": 0.275390625, + "learning_rate": 5.386906350156833e-05, + "loss": 0.9697, "step": 11980 }, { - "epoch": 0.34383830849339436, - "grad_norm": 0.421875, - "learning_rate": 0.0001659123041262414, - "loss": 1.085, + "epoch": 0.6876470250731539, + "grad_norm": 0.287109375, + "learning_rate": 5.378023848147487e-05, + "loss": 0.9866, "step": 11985 }, { - "epoch": 0.34398175376185214, - "grad_norm": 0.3671875, - "learning_rate": 0.0001658746390588128, - "loss": 1.029, + "epoch": 0.6879339032646739, + "grad_norm": 0.267578125, + "learning_rate": 5.36914598106619e-05, + "loss": 1.0016, "step": 11990 }, { - "epoch": 0.34412519903030997, - "grad_norm": 0.37109375, - "learning_rate": 0.00016583695747446835, - "loss": 1.0359, + "epoch": 0.6882207814561937, + "grad_norm": 0.267578125, + "learning_rate": 5.3602727578156895e-05, + "loss": 0.9289, "step": 11995 }, { - "epoch": 0.3442686442987678, - "grad_norm": 0.3984375, - "learning_rate": 0.00016579925938265606, - "loss": 0.9379, + "epoch": 0.6885076596477135, + "grad_norm": 0.287109375, + "learning_rate": 5.35140418729407e-05, + "loss": 0.9322, "step": 12000 }, { - "epoch": 0.34441208956722563, - "grad_norm": 0.404296875, - "learning_rate": 0.000165761544792828, - "loss": 1.0013, + "epoch": 0.6887945378392335, + "grad_norm": 0.265625, + "learning_rate": 5.3425402783947564e-05, + "loss": 0.9132, "step": 12005 }, { - "epoch": 0.34455553483568346, - "grad_norm": 0.392578125, - "learning_rate": 0.00016572381371444058, - "loss": 0.9627, + "epoch": 0.6890814160307533, + "grad_norm": 0.27734375, + "learning_rate": 5.3336810400064904e-05, + "loss": 1.0212, "step": 12010 }, { - "epoch": 0.34469898010414124, - "grad_norm": 0.408203125, - "learning_rate": 0.00016568606615695406, - "loss": 1.0622, + "epoch": 0.6893682942222732, + "grad_norm": 0.27734375, + "learning_rate": 5.324826481013345e-05, + "loss": 0.9593, "step": 12015 }, { - "epoch": 0.34484242537259907, - "grad_norm": 0.41015625, - "learning_rate": 0.0001656483021298331, - "loss": 1.0157, + "epoch": 0.6896551724137931, + "grad_norm": 0.271484375, + "learning_rate": 5.315976610294689e-05, + "loss": 0.9577, "step": 12020 }, { - "epoch": 0.3449858706410569, - "grad_norm": 0.41015625, - "learning_rate": 0.00016561052164254628, - "loss": 0.9304, + "epoch": 0.689942050605313, + "grad_norm": 0.26171875, + "learning_rate": 5.307131436725191e-05, + "loss": 0.9461, "step": 12025 }, { - "epoch": 0.34512931590951473, - "grad_norm": 0.412109375, - "learning_rate": 0.00016557272470456646, - "loss": 0.9734, + "epoch": 0.6902289287968328, + "grad_norm": 0.275390625, + "learning_rate": 5.298290969174812e-05, + "loss": 0.9663, "step": 12030 }, { - "epoch": 0.34527276117797256, - "grad_norm": 0.376953125, - "learning_rate": 0.00016553491132537053, - "loss": 0.8818, + "epoch": 0.6905158069883528, + "grad_norm": 0.2412109375, + "learning_rate": 5.2894552165087916e-05, + "loss": 1.0194, "step": 12035 }, { - "epoch": 0.34541620644643034, - "grad_norm": 0.451171875, - "learning_rate": 0.00016549708151443956, - "loss": 0.8791, + "epoch": 0.6908026851798726, + "grad_norm": 0.2890625, + "learning_rate": 5.2806241875876426e-05, + "loss": 1.0011, "step": 12040 }, { - "epoch": 0.34555965171488817, - "grad_norm": 0.3828125, - "learning_rate": 0.00016545923528125874, - "loss": 0.9428, + "epoch": 0.6910895633713925, + "grad_norm": 0.244140625, + "learning_rate": 5.271797891267142e-05, + "loss": 0.8782, "step": 12045 }, { - "epoch": 0.345703096983346, - "grad_norm": 0.369140625, - "learning_rate": 0.00016542137263531723, - "loss": 0.9275, + "epoch": 0.6913764415629123, + "grad_norm": 0.275390625, + "learning_rate": 5.262976336398318e-05, + "loss": 0.9568, "step": 12050 }, { - "epoch": 0.34584654225180383, - "grad_norm": 0.396484375, - "learning_rate": 0.00016538349358610855, - "loss": 1.0062, + "epoch": 0.6916633197544323, + "grad_norm": 0.279296875, + "learning_rate": 5.254159531827445e-05, + "loss": 1.0285, "step": 12055 }, { - "epoch": 0.34598998752026167, - "grad_norm": 0.375, - "learning_rate": 0.00016534559814313017, - "loss": 0.9795, + "epoch": 0.6919501979459521, + "grad_norm": 0.2734375, + "learning_rate": 5.245347486396033e-05, + "loss": 0.9552, "step": 12060 }, { - "epoch": 0.34613343278871944, - "grad_norm": 0.404296875, - "learning_rate": 0.00016530768631588374, - "loss": 1.0703, + "epoch": 0.692237076137472, + "grad_norm": 0.25390625, + "learning_rate": 5.236540208940827e-05, + "loss": 0.917, "step": 12065 }, { - "epoch": 0.3462768780571773, - "grad_norm": 0.41015625, - "learning_rate": 0.00016526975811387493, - "loss": 0.9636, + "epoch": 0.6925239543289919, + "grad_norm": 0.28125, + "learning_rate": 5.2277377082937806e-05, + "loss": 0.9708, "step": 12070 }, { - "epoch": 0.3464203233256351, - "grad_norm": 0.4375, - "learning_rate": 0.0001652318135466136, - "loss": 1.0272, + "epoch": 0.6928108325205118, + "grad_norm": 0.271484375, + "learning_rate": 5.2189399932820616e-05, + "loss": 0.9109, "step": 12075 }, { - "epoch": 0.34656376859409294, - "grad_norm": 0.41015625, - "learning_rate": 0.00016519385262361372, - "loss": 1.0024, + "epoch": 0.6930977107120316, + "grad_norm": 0.279296875, + "learning_rate": 5.210147072728038e-05, + "loss": 0.934, "step": 12080 }, { - "epoch": 0.34670721386255077, - "grad_norm": 0.412109375, - "learning_rate": 0.00016515587535439327, - "loss": 0.9316, + "epoch": 0.6933845889035516, + "grad_norm": 0.2734375, + "learning_rate": 5.2013589554492714e-05, + "loss": 0.9386, "step": 12085 }, { - "epoch": 0.34685065913100854, - "grad_norm": 0.396484375, - "learning_rate": 0.00016511788174847444, - "loss": 1.002, + "epoch": 0.6936714670950714, + "grad_norm": 0.2734375, + "learning_rate": 5.192575650258503e-05, + "loss": 0.9219, "step": 12090 }, { - "epoch": 0.3469941043994664, - "grad_norm": 0.40625, - "learning_rate": 0.00016507987181538347, - "loss": 1.0011, + "epoch": 0.6939583452865913, + "grad_norm": 0.2578125, + "learning_rate": 5.1837971659636545e-05, + "loss": 0.8816, "step": 12095 }, { - "epoch": 0.3471375496679242, - "grad_norm": 0.384765625, - "learning_rate": 0.0001650418455646506, - "loss": 0.9049, + "epoch": 0.6942452234781112, + "grad_norm": 0.279296875, + "learning_rate": 5.175023511367807e-05, + "loss": 0.9099, "step": 12100 }, { - "epoch": 0.34728099493638204, - "grad_norm": 0.376953125, - "learning_rate": 0.00016500380300581036, - "loss": 0.9054, + "epoch": 0.6945321016696311, + "grad_norm": 0.302734375, + "learning_rate": 5.1662546952692015e-05, + "loss": 0.9268, "step": 12105 }, { - "epoch": 0.34742444020483987, - "grad_norm": 0.458984375, - "learning_rate": 0.00016496574414840117, - "loss": 1.0109, + "epoch": 0.6948189798611509, + "grad_norm": 0.265625, + "learning_rate": 5.1574907264612224e-05, + "loss": 0.8738, "step": 12110 }, { - "epoch": 0.34756788547329764, - "grad_norm": 0.388671875, - "learning_rate": 0.0001649276690019657, - "loss": 0.943, + "epoch": 0.6951058580526709, + "grad_norm": 0.26953125, + "learning_rate": 5.148731613732407e-05, + "loss": 0.8922, "step": 12115 }, { - "epoch": 0.3477113307417555, - "grad_norm": 0.38671875, - "learning_rate": 0.00016488957757605056, - "loss": 0.933, + "epoch": 0.6953927362441907, + "grad_norm": 0.291015625, + "learning_rate": 5.139977365866406e-05, + "loss": 0.8938, "step": 12120 }, { - "epoch": 0.3478547760102133, - "grad_norm": 0.369140625, - "learning_rate": 0.00016485146988020656, - "loss": 1.0273, + "epoch": 0.6956796144357106, + "grad_norm": 0.2578125, + "learning_rate": 5.131227991642001e-05, + "loss": 0.9463, "step": 12125 }, { - "epoch": 0.34799822127867114, - "grad_norm": 0.455078125, - "learning_rate": 0.0001648133459239885, - "loss": 1.0711, + "epoch": 0.6959664926272304, + "grad_norm": 0.2734375, + "learning_rate": 5.122483499833084e-05, + "loss": 0.9507, "step": 12130 }, { - "epoch": 0.34814166654712897, - "grad_norm": 0.412109375, - "learning_rate": 0.0001647752057169553, - "loss": 1.0481, + "epoch": 0.6962533708187504, + "grad_norm": 0.3359375, + "learning_rate": 5.1137438992086506e-05, + "loss": 0.9741, "step": 12135 }, { - "epoch": 0.34828511181558675, - "grad_norm": 0.375, - "learning_rate": 0.00016473704926867, - "loss": 0.9536, + "epoch": 0.6965402490102702, + "grad_norm": 0.263671875, + "learning_rate": 5.1050091985327884e-05, + "loss": 0.9182, "step": 12140 }, { - "epoch": 0.3484285570840446, - "grad_norm": 0.341796875, - "learning_rate": 0.0001646988765886996, - "loss": 0.8839, + "epoch": 0.6968271272017901, + "grad_norm": 0.28125, + "learning_rate": 5.096279406564686e-05, + "loss": 0.9731, "step": 12145 }, { - "epoch": 0.3485720023525024, - "grad_norm": 0.34765625, - "learning_rate": 0.0001646606876866153, - "loss": 0.8799, + "epoch": 0.69711400539331, + "grad_norm": 0.265625, + "learning_rate": 5.087554532058586e-05, + "loss": 0.9596, "step": 12150 }, { - "epoch": 0.34871544762096024, - "grad_norm": 0.380859375, - "learning_rate": 0.00016462248257199225, - "loss": 0.9635, + "epoch": 0.6974008835848299, + "grad_norm": 0.259765625, + "learning_rate": 5.078834583763817e-05, + "loss": 0.867, "step": 12155 }, { - "epoch": 0.348858892889418, - "grad_norm": 0.47265625, - "learning_rate": 0.00016458426125440974, - "loss": 0.9716, + "epoch": 0.6976877617763497, + "grad_norm": 0.271484375, + "learning_rate": 5.0701195704247595e-05, + "loss": 0.9869, "step": 12160 }, { - "epoch": 0.34900233815787585, - "grad_norm": 0.404296875, - "learning_rate": 0.00016454602374345103, - "loss": 1.011, + "epoch": 0.6979746399678697, + "grad_norm": 0.275390625, + "learning_rate": 5.061409500780854e-05, + "loss": 0.9413, "step": 12165 }, { - "epoch": 0.3491457834263337, - "grad_norm": 0.39453125, - "learning_rate": 0.0001645077700487036, - "loss": 1.0155, + "epoch": 0.6982615181593895, + "grad_norm": 0.255859375, + "learning_rate": 5.052704383566577e-05, + "loss": 0.9569, "step": 12170 }, { - "epoch": 0.3492892286947915, - "grad_norm": 0.416015625, - "learning_rate": 0.00016446950017975887, - "loss": 0.989, + "epoch": 0.6985483963509094, + "grad_norm": 0.25, + "learning_rate": 5.044004227511436e-05, + "loss": 0.949, "step": 12175 }, { - "epoch": 0.34943267396324934, - "grad_norm": 0.361328125, - "learning_rate": 0.00016443121414621236, - "loss": 1.0141, + "epoch": 0.6988352745424293, + "grad_norm": 0.271484375, + "learning_rate": 5.0353090413399705e-05, + "loss": 0.9525, "step": 12180 }, { - "epoch": 0.3495761192317071, - "grad_norm": 0.416015625, - "learning_rate": 0.00016439291195766357, - "loss": 1.0203, + "epoch": 0.6991221527339492, + "grad_norm": 0.26171875, + "learning_rate": 5.02661883377173e-05, + "loss": 0.942, "step": 12185 }, { - "epoch": 0.34971956450016495, - "grad_norm": 0.388671875, - "learning_rate": 0.00016435459362371612, - "loss": 0.9981, + "epoch": 0.699409030925469, + "grad_norm": 0.2734375, + "learning_rate": 5.017933613521273e-05, + "loss": 0.9621, "step": 12190 }, { - "epoch": 0.3498630097686228, - "grad_norm": 0.3828125, - "learning_rate": 0.0001643162591539777, - "loss": 0.8897, + "epoch": 0.699695909116989, + "grad_norm": 0.267578125, + "learning_rate": 5.009253389298165e-05, + "loss": 0.976, "step": 12195 }, { - "epoch": 0.3500064550370806, - "grad_norm": 0.3828125, - "learning_rate": 0.00016427790855805995, - "loss": 0.9813, + "epoch": 0.6999827873085088, + "grad_norm": 0.271484375, + "learning_rate": 5.0005781698069474e-05, + "loss": 0.9563, "step": 12200 }, { - "epoch": 0.35014990030553844, - "grad_norm": 0.3828125, - "learning_rate": 0.0001642395418455787, - "loss": 0.9963, + "epoch": 0.7002696655000287, + "grad_norm": 0.255859375, + "learning_rate": 4.991907963747148e-05, + "loss": 0.9092, "step": 12205 }, { - "epoch": 0.3502933455739962, - "grad_norm": 0.40625, - "learning_rate": 0.00016420115902615365, - "loss": 0.8628, + "epoch": 0.7005565436915485, + "grad_norm": 0.259765625, + "learning_rate": 4.983242779813276e-05, + "loss": 0.8882, "step": 12210 }, { - "epoch": 0.35043679084245405, - "grad_norm": 0.376953125, - "learning_rate": 0.0001641627601094087, - "loss": 1.0425, + "epoch": 0.7008434218830685, + "grad_norm": 0.267578125, + "learning_rate": 4.9745826266947934e-05, + "loss": 0.9087, "step": 12215 }, { - "epoch": 0.3505802361109119, - "grad_norm": 0.404296875, - "learning_rate": 0.00016412434510497157, - "loss": 1.0089, + "epoch": 0.7011303000745883, + "grad_norm": 0.255859375, + "learning_rate": 4.965927513076123e-05, + "loss": 0.9406, "step": 12220 }, { - "epoch": 0.3507236813793697, - "grad_norm": 0.384765625, - "learning_rate": 0.0001640859140224743, - "loss": 1.035, + "epoch": 0.7014171782661082, + "grad_norm": 0.271484375, + "learning_rate": 4.957277447636629e-05, + "loss": 0.9818, "step": 12225 }, { - "epoch": 0.35086712664782754, - "grad_norm": 0.388671875, - "learning_rate": 0.00016404746687155277, - "loss": 1.0216, + "epoch": 0.7017040564576281, + "grad_norm": 0.2451171875, + "learning_rate": 4.94863243905062e-05, + "loss": 0.9895, "step": 12230 }, { - "epoch": 0.3510105719162853, - "grad_norm": 0.39453125, - "learning_rate": 0.0001640090036618469, - "loss": 0.8731, + "epoch": 0.701990934649148, + "grad_norm": 0.23828125, + "learning_rate": 4.939992495987327e-05, + "loss": 0.9026, "step": 12235 }, { - "epoch": 0.35115401718474315, - "grad_norm": 0.384765625, - "learning_rate": 0.00016397052440300067, - "loss": 0.9741, + "epoch": 0.7022778128406678, + "grad_norm": 0.279296875, + "learning_rate": 4.931357627110902e-05, + "loss": 0.9303, "step": 12240 }, { - "epoch": 0.351297462453201, - "grad_norm": 0.40234375, - "learning_rate": 0.0001639320291046621, - "loss": 0.9069, + "epoch": 0.7025646910321878, + "grad_norm": 0.408203125, + "learning_rate": 4.9227278410804225e-05, + "loss": 0.9506, "step": 12245 }, { - "epoch": 0.3514409077216588, - "grad_norm": 0.373046875, - "learning_rate": 0.00016389351777648325, - "loss": 0.942, + "epoch": 0.7028515692237076, + "grad_norm": 0.275390625, + "learning_rate": 4.914103146549844e-05, + "loss": 0.9547, "step": 12250 }, { - "epoch": 0.35158435299011664, - "grad_norm": 0.408203125, - "learning_rate": 0.0001638549904281201, - "loss": 0.9277, + "epoch": 0.7031384474152275, + "grad_norm": 0.279296875, + "learning_rate": 4.905483552168032e-05, + "loss": 0.925, "step": 12255 }, { - "epoch": 0.3517277982585744, - "grad_norm": 0.3828125, - "learning_rate": 0.00016381644706923277, - "loss": 0.926, + "epoch": 0.7034253256067474, + "grad_norm": 0.25390625, + "learning_rate": 4.896869066578741e-05, + "loss": 0.971, "step": 12260 }, { - "epoch": 0.35187124352703225, - "grad_norm": 0.400390625, - "learning_rate": 0.00016377788770948528, - "loss": 0.9971, + "epoch": 0.7037122037982673, + "grad_norm": 0.294921875, + "learning_rate": 4.888259698420594e-05, + "loss": 1.0059, "step": 12265 }, { - "epoch": 0.3520146887954901, - "grad_norm": 0.375, - "learning_rate": 0.00016373931235854573, - "loss": 0.9616, + "epoch": 0.7039990819897871, + "grad_norm": 0.265625, + "learning_rate": 4.879655456327083e-05, + "loss": 0.9339, "step": 12270 }, { - "epoch": 0.3521581340639479, - "grad_norm": 0.443359375, - "learning_rate": 0.00016370072102608622, - "loss": 0.9574, + "epoch": 0.7042859601813071, + "grad_norm": 0.279296875, + "learning_rate": 4.8710563489265624e-05, + "loss": 0.9611, "step": 12275 }, { - "epoch": 0.35230157933240575, - "grad_norm": 0.384765625, - "learning_rate": 0.00016366211372178285, - "loss": 0.999, + "epoch": 0.7045728383728269, + "grad_norm": 0.251953125, + "learning_rate": 4.862462384842237e-05, + "loss": 1.0341, "step": 12280 }, { - "epoch": 0.3524450246008635, - "grad_norm": 0.396484375, - "learning_rate": 0.00016362349045531577, - "loss": 0.9407, + "epoch": 0.7048597165643468, + "grad_norm": 0.287109375, + "learning_rate": 4.853873572692151e-05, + "loss": 1.0407, "step": 12285 }, { - "epoch": 0.35258846986932135, - "grad_norm": 0.4296875, - "learning_rate": 0.00016358485123636903, - "loss": 0.9835, + "epoch": 0.7051465947558666, + "grad_norm": 0.251953125, + "learning_rate": 4.845289921089182e-05, + "loss": 0.9762, "step": 12290 }, { - "epoch": 0.3527319151377792, - "grad_norm": 0.38671875, - "learning_rate": 0.00016354619607463074, - "loss": 0.9236, + "epoch": 0.7054334729473866, + "grad_norm": 0.291015625, + "learning_rate": 4.8367114386410486e-05, + "loss": 1.0233, "step": 12295 }, { - "epoch": 0.352875360406237, - "grad_norm": 0.388671875, - "learning_rate": 0.00016350752497979308, - "loss": 0.9407, + "epoch": 0.7057203511389064, + "grad_norm": 0.279296875, + "learning_rate": 4.8281381339502565e-05, + "loss": 0.9128, "step": 12300 }, { - "epoch": 0.35301880567469485, - "grad_norm": 0.44140625, - "learning_rate": 0.0001634688379615521, - "loss": 1.008, + "epoch": 0.7060072293304263, + "grad_norm": 0.279296875, + "learning_rate": 4.8195700156141386e-05, + "loss": 1.0109, "step": 12305 }, { - "epoch": 0.3531622509431526, - "grad_norm": 0.37109375, - "learning_rate": 0.00016343013502960786, - "loss": 0.9855, + "epoch": 0.7062941075219462, + "grad_norm": 0.263671875, + "learning_rate": 4.8110070922248284e-05, + "loss": 1.0136, "step": 12310 }, { - "epoch": 0.35330569621161045, - "grad_norm": 0.40625, - "learning_rate": 0.0001633914161936645, - "loss": 0.9976, + "epoch": 0.7065809857134661, + "grad_norm": 0.275390625, + "learning_rate": 4.802449372369242e-05, + "loss": 0.925, "step": 12315 }, { - "epoch": 0.3534491414800683, - "grad_norm": 0.36328125, - "learning_rate": 0.00016335268146343008, - "loss": 0.9118, + "epoch": 0.7068678639049859, + "grad_norm": 0.255859375, + "learning_rate": 4.79389686462908e-05, + "loss": 0.9956, "step": 12320 }, { - "epoch": 0.3535925867485261, - "grad_norm": 0.43359375, - "learning_rate": 0.00016331393084861664, - "loss": 0.9934, + "epoch": 0.7071547420965059, + "grad_norm": 0.27734375, + "learning_rate": 4.785349577580817e-05, + "loss": 0.9552, "step": 12325 }, { - "epoch": 0.3537360320169839, - "grad_norm": 0.3671875, - "learning_rate": 0.00016327516435894025, - "loss": 0.9537, + "epoch": 0.7074416202880257, + "grad_norm": 0.267578125, + "learning_rate": 4.77680751979569e-05, + "loss": 0.9871, "step": 12330 }, { - "epoch": 0.3538794772854417, - "grad_norm": 0.451171875, - "learning_rate": 0.00016323638200412092, - "loss": 0.9811, + "epoch": 0.7077284984795456, + "grad_norm": 0.271484375, + "learning_rate": 4.768270699839691e-05, + "loss": 0.9543, "step": 12335 }, { - "epoch": 0.35402292255389955, - "grad_norm": 0.38671875, - "learning_rate": 0.0001631975837938826, - "loss": 1.0833, + "epoch": 0.7080153766710655, + "grad_norm": 0.267578125, + "learning_rate": 4.759739126273569e-05, + "loss": 0.9342, "step": 12340 }, { - "epoch": 0.3541663678223574, - "grad_norm": 0.357421875, - "learning_rate": 0.00016315876973795333, - "loss": 0.9435, + "epoch": 0.7083022548625854, + "grad_norm": 0.26171875, + "learning_rate": 4.751212807652806e-05, + "loss": 0.9207, "step": 12345 }, { - "epoch": 0.3543098130908152, - "grad_norm": 0.41015625, - "learning_rate": 0.00016311993984606505, - "loss": 0.9772, + "epoch": 0.7085891330541052, + "grad_norm": 0.263671875, + "learning_rate": 4.742691752527606e-05, + "loss": 0.9694, "step": 12350 }, { - "epoch": 0.354453258359273, - "grad_norm": 0.357421875, - "learning_rate": 0.00016308109412795362, - "loss": 0.9713, + "epoch": 0.7088760112456252, + "grad_norm": 0.2490234375, + "learning_rate": 4.7341759694429014e-05, + "loss": 0.8722, "step": 12355 }, { - "epoch": 0.3545967036277308, - "grad_norm": 0.4140625, - "learning_rate": 0.00016304223259335898, - "loss": 1.1149, + "epoch": 0.709162889437145, + "grad_norm": 0.275390625, + "learning_rate": 4.725665466938346e-05, + "loss": 0.9764, "step": 12360 }, { - "epoch": 0.35474014889618866, - "grad_norm": 0.40234375, - "learning_rate": 0.00016300335525202498, - "loss": 0.978, + "epoch": 0.7094497676286649, + "grad_norm": 0.26953125, + "learning_rate": 4.717160253548287e-05, + "loss": 0.8684, "step": 12365 }, { - "epoch": 0.3548835941646465, - "grad_norm": 0.416015625, - "learning_rate": 0.00016296446211369942, - "loss": 0.8706, + "epoch": 0.7097366458201847, + "grad_norm": 0.2578125, + "learning_rate": 4.708660337801773e-05, + "loss": 0.921, "step": 12370 }, { - "epoch": 0.3550270394331043, - "grad_norm": 0.38671875, - "learning_rate": 0.00016292555318813404, - "loss": 0.9717, + "epoch": 0.7100235240117047, + "grad_norm": 0.318359375, + "learning_rate": 4.700165728222538e-05, + "loss": 0.9742, "step": 12375 }, { - "epoch": 0.3551704847015621, - "grad_norm": 0.375, - "learning_rate": 0.00016288662848508467, - "loss": 0.9819, + "epoch": 0.7103104022032245, + "grad_norm": 0.271484375, + "learning_rate": 4.6916764333289934e-05, + "loss": 1.0064, "step": 12380 }, { - "epoch": 0.3553139299700199, - "grad_norm": 0.431640625, - "learning_rate": 0.00016284768801431092, - "loss": 0.8796, + "epoch": 0.7105972803947443, + "grad_norm": 0.283203125, + "learning_rate": 4.6831924616342217e-05, + "loss": 0.9463, "step": 12385 }, { - "epoch": 0.35545737523847776, - "grad_norm": 0.37890625, - "learning_rate": 0.00016280873178557643, - "loss": 0.9628, + "epoch": 0.7108841585862643, + "grad_norm": 0.30078125, + "learning_rate": 4.674713821645975e-05, + "loss": 0.8921, "step": 12390 }, { - "epoch": 0.3556008205069356, - "grad_norm": 0.39453125, - "learning_rate": 0.00016276975980864883, - "loss": 0.8436, + "epoch": 0.7111710367777841, + "grad_norm": 0.271484375, + "learning_rate": 4.6662405218666525e-05, + "loss": 0.9079, "step": 12395 }, { - "epoch": 0.3557442657753934, - "grad_norm": 0.361328125, - "learning_rate": 0.00016273077209329968, - "loss": 0.8964, + "epoch": 0.711457914969304, + "grad_norm": 0.259765625, + "learning_rate": 4.657772570793289e-05, + "loss": 0.9219, "step": 12400 }, { - "epoch": 0.3558877110438512, - "grad_norm": 0.4296875, - "learning_rate": 0.0001626917686493044, - "loss": 0.9445, + "epoch": 0.711744793160824, + "grad_norm": 0.283203125, + "learning_rate": 4.649309976917574e-05, + "loss": 0.9785, "step": 12405 }, { - "epoch": 0.356031156312309, - "grad_norm": 0.41015625, - "learning_rate": 0.0001626527494864425, - "loss": 1.0354, + "epoch": 0.7120316713523438, + "grad_norm": 0.2470703125, + "learning_rate": 4.6408527487258124e-05, + "loss": 0.9714, "step": 12410 }, { - "epoch": 0.35617460158076686, - "grad_norm": 0.41015625, - "learning_rate": 0.0001626137146144973, - "loss": 0.988, + "epoch": 0.7123185495438636, + "grad_norm": 0.2421875, + "learning_rate": 4.6324008946989314e-05, + "loss": 0.9786, "step": 12415 }, { - "epoch": 0.3563180468492247, - "grad_norm": 0.416015625, - "learning_rate": 0.0001625746640432561, - "loss": 0.9643, + "epoch": 0.7126054277353836, + "grad_norm": 0.255859375, + "learning_rate": 4.62395442331247e-05, + "loss": 0.9278, "step": 12420 }, { - "epoch": 0.3564614921176825, - "grad_norm": 0.375, - "learning_rate": 0.0001625355977825102, - "loss": 0.8295, + "epoch": 0.7128923059269034, + "grad_norm": 0.259765625, + "learning_rate": 4.615513343036567e-05, + "loss": 0.9442, "step": 12425 }, { - "epoch": 0.3566049373861403, - "grad_norm": 0.408203125, - "learning_rate": 0.0001624965158420548, - "loss": 1.1004, + "epoch": 0.7131791841184233, + "grad_norm": 0.279296875, + "learning_rate": 4.607077662335959e-05, + "loss": 0.9334, "step": 12430 }, { - "epoch": 0.35674838265459813, - "grad_norm": 0.412109375, - "learning_rate": 0.00016245741823168894, - "loss": 1.0054, + "epoch": 0.7134660623099432, + "grad_norm": 0.263671875, + "learning_rate": 4.59864738966996e-05, + "loss": 0.9619, "step": 12435 }, { - "epoch": 0.35689182792305596, - "grad_norm": 0.369140625, - "learning_rate": 0.0001624183049612157, - "loss": 0.8751, + "epoch": 0.7137529405014631, + "grad_norm": 0.25390625, + "learning_rate": 4.590222533492473e-05, + "loss": 1.0088, "step": 12440 }, { - "epoch": 0.3570352731915138, - "grad_norm": 0.52734375, - "learning_rate": 0.00016237917604044206, - "loss": 0.9483, + "epoch": 0.7140398186929829, + "grad_norm": 0.28515625, + "learning_rate": 4.581803102251966e-05, + "loss": 1.0121, "step": 12445 }, { - "epoch": 0.3571787184599716, - "grad_norm": 0.396484375, - "learning_rate": 0.00016234003147917888, - "loss": 1.0226, + "epoch": 0.7143266968845028, + "grad_norm": 0.26953125, + "learning_rate": 4.573389104391449e-05, + "loss": 0.9071, "step": 12450 }, { - "epoch": 0.3573221637284294, - "grad_norm": 0.39453125, - "learning_rate": 0.00016230087128724102, - "loss": 0.9532, + "epoch": 0.7146135750760227, + "grad_norm": 0.251953125, + "learning_rate": 4.564980548348511e-05, + "loss": 0.9467, "step": 12455 }, { - "epoch": 0.35746560899688723, - "grad_norm": 0.3671875, - "learning_rate": 0.0001622616954744472, - "loss": 1.0616, + "epoch": 0.7149004532675426, + "grad_norm": 0.263671875, + "learning_rate": 4.556577442555265e-05, + "loss": 0.9162, "step": 12460 }, { - "epoch": 0.35760905426534506, - "grad_norm": 0.4140625, - "learning_rate": 0.00016222250405062004, - "loss": 0.9519, + "epoch": 0.7151873314590624, + "grad_norm": 0.2734375, + "learning_rate": 4.5481797954383674e-05, + "loss": 0.9336, "step": 12465 }, { - "epoch": 0.3577524995338029, - "grad_norm": 0.404296875, - "learning_rate": 0.00016218329702558616, - "loss": 0.9732, + "epoch": 0.7154742096505824, + "grad_norm": 0.275390625, + "learning_rate": 4.5397876154189956e-05, + "loss": 0.973, "step": 12470 }, { - "epoch": 0.3578959448022607, - "grad_norm": 0.38671875, - "learning_rate": 0.00016214407440917598, - "loss": 0.9362, + "epoch": 0.7157610878421022, + "grad_norm": 0.365234375, + "learning_rate": 4.5314009109128464e-05, + "loss": 0.9593, "step": 12475 }, { - "epoch": 0.3580393900707185, - "grad_norm": 0.388671875, - "learning_rate": 0.00016210483621122395, - "loss": 0.9087, + "epoch": 0.7160479660336221, + "grad_norm": 0.232421875, + "learning_rate": 4.5230196903301266e-05, + "loss": 0.885, "step": 12480 }, { - "epoch": 0.35818283533917633, - "grad_norm": 0.4375, - "learning_rate": 0.00016206558244156828, - "loss": 0.9189, + "epoch": 0.716334844225142, + "grad_norm": 0.25390625, + "learning_rate": 4.51464396207554e-05, + "loss": 0.8933, "step": 12485 }, { - "epoch": 0.35832628060763416, - "grad_norm": 0.376953125, - "learning_rate": 0.00016202631311005124, - "loss": 0.9683, + "epoch": 0.7166217224166619, + "grad_norm": 0.26953125, + "learning_rate": 4.506273734548292e-05, + "loss": 0.9838, "step": 12490 }, { - "epoch": 0.358469725876092, - "grad_norm": 0.392578125, - "learning_rate": 0.00016198702822651893, - "loss": 0.9479, + "epoch": 0.7169086006081817, + "grad_norm": 0.251953125, + "learning_rate": 4.4979090161420645e-05, + "loss": 0.9826, "step": 12495 }, { - "epoch": 0.35861317114454977, - "grad_norm": 0.474609375, - "learning_rate": 0.00016194772780082125, - "loss": 1.0226, + "epoch": 0.7171954787997017, + "grad_norm": 0.251953125, + "learning_rate": 4.489549815245008e-05, + "loss": 0.9192, "step": 12500 }, { - "epoch": 0.3587566164130076, - "grad_norm": 0.435546875, - "learning_rate": 0.00016190841184281224, - "loss": 1.0454, + "epoch": 0.7174823569912215, + "grad_norm": 0.263671875, + "learning_rate": 4.4811961402397554e-05, + "loss": 0.919, "step": 12505 }, { - "epoch": 0.35890006168146543, - "grad_norm": 0.3515625, - "learning_rate": 0.0001618690803623496, - "loss": 0.9492, + "epoch": 0.7177692351827414, + "grad_norm": 0.265625, + "learning_rate": 4.472847999503389e-05, + "loss": 0.9777, "step": 12510 }, { - "epoch": 0.35904350694992326, - "grad_norm": 0.404296875, - "learning_rate": 0.00016182973336929498, - "loss": 1.0092, + "epoch": 0.7180561133742613, + "grad_norm": 0.255859375, + "learning_rate": 4.4645054014074426e-05, + "loss": 0.931, "step": 12515 }, { - "epoch": 0.3591869522183811, - "grad_norm": 0.412109375, - "learning_rate": 0.00016179037087351406, - "loss": 0.9802, + "epoch": 0.7183429915657812, + "grad_norm": 0.3046875, + "learning_rate": 4.456168354317892e-05, + "loss": 0.9206, "step": 12520 }, { - "epoch": 0.35933039748683887, - "grad_norm": 0.4140625, - "learning_rate": 0.0001617509928848762, - "loss": 0.9417, + "epoch": 0.718629869757301, + "grad_norm": 0.279296875, + "learning_rate": 4.4478368665951476e-05, + "loss": 0.9084, "step": 12525 }, { - "epoch": 0.3594738427552967, - "grad_norm": 0.392578125, - "learning_rate": 0.00016171159941325483, - "loss": 1.0045, + "epoch": 0.7189167479488209, + "grad_norm": 0.248046875, + "learning_rate": 4.43951094659404e-05, + "loss": 0.9422, "step": 12530 }, { - "epoch": 0.35961728802375453, - "grad_norm": 0.41015625, - "learning_rate": 0.0001616721904685271, - "loss": 0.9538, + "epoch": 0.7192036261403408, + "grad_norm": 0.28515625, + "learning_rate": 4.431190602663827e-05, + "loss": 0.9395, "step": 12535 }, { - "epoch": 0.35976073329221236, - "grad_norm": 0.40625, - "learning_rate": 0.00016163276606057415, - "loss": 0.9457, + "epoch": 0.7194905043318607, + "grad_norm": 0.2734375, + "learning_rate": 4.422875843148165e-05, + "loss": 0.9637, "step": 12540 }, { - "epoch": 0.3599041785606702, - "grad_norm": 0.50390625, - "learning_rate": 0.00016159332619928093, - "loss": 0.9602, + "epoch": 0.7197773825233805, + "grad_norm": 0.259765625, + "learning_rate": 4.414566676385118e-05, + "loss": 0.9839, "step": 12545 }, { - "epoch": 0.36004762382912797, - "grad_norm": 0.37109375, - "learning_rate": 0.00016155387089453638, - "loss": 0.9371, + "epoch": 0.7200642607149005, + "grad_norm": 0.271484375, + "learning_rate": 4.406263110707125e-05, + "loss": 0.9328, "step": 12550 }, { - "epoch": 0.3601910690975858, - "grad_norm": 0.400390625, - "learning_rate": 0.00016151440015623313, - "loss": 0.9422, + "epoch": 0.7203511389064203, + "grad_norm": 0.271484375, + "learning_rate": 4.39796515444103e-05, + "loss": 0.999, "step": 12555 }, { - "epoch": 0.36033451436604363, - "grad_norm": 0.435546875, - "learning_rate": 0.00016147491399426785, - "loss": 0.9556, + "epoch": 0.7206380170979402, + "grad_norm": 0.26953125, + "learning_rate": 4.3896728159080424e-05, + "loss": 0.9816, "step": 12560 }, { - "epoch": 0.36047795963450147, - "grad_norm": 0.37890625, - "learning_rate": 0.00016143541241854094, - "loss": 0.9342, + "epoch": 0.7209248952894601, + "grad_norm": 0.251953125, + "learning_rate": 4.381386103423735e-05, + "loss": 1.0244, "step": 12565 }, { - "epoch": 0.3606214049029593, - "grad_norm": 0.396484375, - "learning_rate": 0.0001613958954389568, - "loss": 0.9662, + "epoch": 0.72121177348098, + "grad_norm": 0.29296875, + "learning_rate": 4.373105025298041e-05, + "loss": 0.9107, "step": 12570 }, { - "epoch": 0.3607648501714171, - "grad_norm": 0.390625, - "learning_rate": 0.00016135636306542357, - "loss": 1.0377, + "epoch": 0.7214986516724998, + "grad_norm": 0.27734375, + "learning_rate": 4.364829589835245e-05, + "loss": 0.9351, "step": 12575 }, { - "epoch": 0.3609082954398749, - "grad_norm": 0.3671875, - "learning_rate": 0.00016131681530785335, - "loss": 0.9563, + "epoch": 0.7217855298640198, + "grad_norm": 0.26171875, + "learning_rate": 4.356559805333971e-05, + "loss": 0.9963, "step": 12580 }, { - "epoch": 0.36105174070833274, - "grad_norm": 0.373046875, - "learning_rate": 0.00016127725217616199, - "loss": 0.865, + "epoch": 0.7220724080555396, + "grad_norm": 0.259765625, + "learning_rate": 4.348295680087181e-05, + "loss": 0.9584, "step": 12585 }, { - "epoch": 0.36119518597679057, - "grad_norm": 0.419921875, - "learning_rate": 0.00016123767368026929, - "loss": 0.9146, + "epoch": 0.7223592862470595, + "grad_norm": 0.30078125, + "learning_rate": 4.340037222382156e-05, + "loss": 0.9093, "step": 12590 }, { - "epoch": 0.3613386312452484, - "grad_norm": 0.40625, - "learning_rate": 0.0001611980798300989, - "loss": 0.8929, + "epoch": 0.7226461644385794, + "grad_norm": 0.279296875, + "learning_rate": 4.3317844405005e-05, + "loss": 0.9699, "step": 12595 }, { - "epoch": 0.3614820765137062, - "grad_norm": 0.3828125, - "learning_rate": 0.0001611584706355782, - "loss": 1.0151, + "epoch": 0.7229330426300993, + "grad_norm": 0.26953125, + "learning_rate": 4.323537342718111e-05, + "loss": 0.9292, "step": 12600 }, { - "epoch": 0.361625521782164, - "grad_norm": 0.419921875, - "learning_rate": 0.0001611188461066386, - "loss": 0.922, + "epoch": 0.7232199208216191, + "grad_norm": 0.26953125, + "learning_rate": 4.315295937305207e-05, + "loss": 0.8625, "step": 12605 }, { - "epoch": 0.36176896705062184, - "grad_norm": 0.392578125, - "learning_rate": 0.0001610792062532152, - "loss": 0.9691, + "epoch": 0.723506799013139, + "grad_norm": 0.26953125, + "learning_rate": 4.307060232526283e-05, + "loss": 0.9948, "step": 12610 }, { - "epoch": 0.36191241231907967, - "grad_norm": 0.384765625, - "learning_rate": 0.000161039551085247, - "loss": 0.8989, + "epoch": 0.7237936772046589, + "grad_norm": 0.283203125, + "learning_rate": 4.2988302366401254e-05, + "loss": 0.9497, "step": 12615 }, { - "epoch": 0.3620558575875375, - "grad_norm": 0.3828125, - "learning_rate": 0.00016099988061267688, - "loss": 0.943, + "epoch": 0.7240805553961788, + "grad_norm": 0.2734375, + "learning_rate": 4.2906059578997896e-05, + "loss": 0.9112, "step": 12620 }, { - "epoch": 0.3621993028559953, - "grad_norm": 0.416015625, - "learning_rate": 0.00016096019484545146, - "loss": 0.9447, + "epoch": 0.7243674335876986, + "grad_norm": 0.267578125, + "learning_rate": 4.2823874045526026e-05, + "loss": 0.9787, "step": 12625 }, { - "epoch": 0.3623427481244531, - "grad_norm": 0.37109375, - "learning_rate": 0.00016092049379352132, - "loss": 0.9295, + "epoch": 0.7246543117792186, + "grad_norm": 0.2470703125, + "learning_rate": 4.274174584840143e-05, + "loss": 0.9606, "step": 12630 }, { - "epoch": 0.36248619339291094, - "grad_norm": 0.34375, - "learning_rate": 0.0001608807774668408, - "loss": 0.9501, + "epoch": 0.7249411899707384, + "grad_norm": 0.26171875, + "learning_rate": 4.265967506998253e-05, + "loss": 0.9875, "step": 12635 }, { - "epoch": 0.36262963866136877, - "grad_norm": 0.375, - "learning_rate": 0.000160841045875368, - "loss": 1.0084, + "epoch": 0.7252280681622583, + "grad_norm": 0.2421875, + "learning_rate": 4.257766179257005e-05, + "loss": 0.9702, "step": 12640 }, { - "epoch": 0.3627730839298266, - "grad_norm": 0.373046875, - "learning_rate": 0.000160801299029065, - "loss": 0.9956, + "epoch": 0.7255149463537782, + "grad_norm": 0.259765625, + "learning_rate": 4.2495706098407085e-05, + "loss": 0.9266, "step": 12645 }, { - "epoch": 0.3629165291982844, - "grad_norm": 0.40234375, - "learning_rate": 0.0001607615369378976, - "loss": 0.9973, + "epoch": 0.7258018245452981, + "grad_norm": 0.271484375, + "learning_rate": 4.2413808069678996e-05, + "loss": 0.9093, "step": 12650 }, { - "epoch": 0.3630599744667422, - "grad_norm": 0.384765625, - "learning_rate": 0.0001607217596118354, - "loss": 0.9688, + "epoch": 0.7260887027368179, + "grad_norm": 0.296875, + "learning_rate": 4.2331967788513295e-05, + "loss": 0.9782, "step": 12655 }, { - "epoch": 0.36320341973520004, - "grad_norm": 0.37109375, - "learning_rate": 0.00016068196706085197, - "loss": 0.975, + "epoch": 0.7263755809283379, + "grad_norm": 0.2470703125, + "learning_rate": 4.225018533697962e-05, + "loss": 0.9237, "step": 12660 }, { - "epoch": 0.36334686500365787, - "grad_norm": 0.359375, - "learning_rate": 0.00016064215929492454, - "loss": 0.8701, + "epoch": 0.7266624591198577, + "grad_norm": 0.26953125, + "learning_rate": 4.216846079708958e-05, + "loss": 1.0039, "step": 12665 }, { - "epoch": 0.3634903102721157, - "grad_norm": 0.373046875, - "learning_rate": 0.00016060233632403422, - "loss": 0.9107, + "epoch": 0.7269493373113776, + "grad_norm": 0.275390625, + "learning_rate": 4.2086794250796734e-05, + "loss": 0.957, "step": 12670 }, { - "epoch": 0.3636337555405735, - "grad_norm": 0.43359375, - "learning_rate": 0.00016056249815816592, - "loss": 1.0136, + "epoch": 0.7272362155028975, + "grad_norm": 0.255859375, + "learning_rate": 4.2005185779996484e-05, + "loss": 0.9602, "step": 12675 }, { - "epoch": 0.3637772008090313, - "grad_norm": 0.453125, - "learning_rate": 0.0001605226448073084, - "loss": 0.9598, + "epoch": 0.7275230936944174, + "grad_norm": 0.24609375, + "learning_rate": 4.1923635466525936e-05, + "loss": 0.9461, "step": 12680 }, { - "epoch": 0.36392064607748914, - "grad_norm": 0.41015625, - "learning_rate": 0.00016048277628145417, - "loss": 1.0285, + "epoch": 0.7278099718859372, + "grad_norm": 0.275390625, + "learning_rate": 4.1842143392164004e-05, + "loss": 0.969, "step": 12685 }, { - "epoch": 0.36406409134594697, - "grad_norm": 0.470703125, - "learning_rate": 0.0001604428925905995, - "loss": 0.9275, + "epoch": 0.7280968500774571, + "grad_norm": 0.259765625, + "learning_rate": 4.17607096386311e-05, + "loss": 0.9375, "step": 12690 }, { - "epoch": 0.36420753661440475, - "grad_norm": 0.42578125, - "learning_rate": 0.00016040299374474461, - "loss": 0.8751, + "epoch": 0.728383728268977, + "grad_norm": 0.26171875, + "learning_rate": 4.167933428758916e-05, + "loss": 0.9121, "step": 12695 }, { - "epoch": 0.3643509818828626, - "grad_norm": 0.392578125, - "learning_rate": 0.00016036307975389344, - "loss": 0.8968, + "epoch": 0.7286706064604969, + "grad_norm": 0.2470703125, + "learning_rate": 4.159801742064158e-05, + "loss": 0.8924, "step": 12700 }, { - "epoch": 0.3644944271513204, - "grad_norm": 0.41015625, - "learning_rate": 0.0001603231506280537, - "loss": 0.9174, + "epoch": 0.7289574846520167, + "grad_norm": 0.3046875, + "learning_rate": 4.151675911933308e-05, + "loss": 0.9526, "step": 12705 }, { - "epoch": 0.36463787241977824, - "grad_norm": 0.447265625, - "learning_rate": 0.00016028320637723694, - "loss": 0.8389, + "epoch": 0.7292443628435367, + "grad_norm": 0.26171875, + "learning_rate": 4.143555946514964e-05, + "loss": 0.9478, "step": 12710 }, { - "epoch": 0.3647813176882361, - "grad_norm": 0.392578125, - "learning_rate": 0.00016024324701145845, - "loss": 0.8812, + "epoch": 0.7295312410350565, + "grad_norm": 0.263671875, + "learning_rate": 4.135441853951857e-05, + "loss": 0.9295, "step": 12715 }, { - "epoch": 0.36492476295669385, - "grad_norm": 0.42578125, - "learning_rate": 0.00016020327254073736, - "loss": 1.1132, + "epoch": 0.7298181192265764, + "grad_norm": 0.25390625, + "learning_rate": 4.1273336423808065e-05, + "loss": 0.8896, "step": 12720 }, { - "epoch": 0.3650682082251517, - "grad_norm": 0.390625, - "learning_rate": 0.00016016328297509657, - "loss": 0.923, + "epoch": 0.7301049974180963, + "grad_norm": 0.2734375, + "learning_rate": 4.119231319932747e-05, + "loss": 0.9014, "step": 12725 }, { - "epoch": 0.3652116534936095, - "grad_norm": 0.404296875, - "learning_rate": 0.0001601232783245628, - "loss": 0.9406, + "epoch": 0.7303918756096162, + "grad_norm": 0.2470703125, + "learning_rate": 4.1111348947327034e-05, + "loss": 0.9335, "step": 12730 }, { - "epoch": 0.36535509876206734, - "grad_norm": 0.3984375, - "learning_rate": 0.00016008325859916646, - "loss": 0.9496, + "epoch": 0.730678753801136, + "grad_norm": 0.259765625, + "learning_rate": 4.1030443748997974e-05, + "loss": 0.9627, "step": 12735 }, { - "epoch": 0.3654985440305252, - "grad_norm": 0.482421875, - "learning_rate": 0.00016004322380894182, - "loss": 1.0128, + "epoch": 0.730965631992656, + "grad_norm": 0.271484375, + "learning_rate": 4.094959768547214e-05, + "loss": 0.9359, "step": 12740 }, { - "epoch": 0.36564198929898295, - "grad_norm": 0.357421875, - "learning_rate": 0.00016000317396392693, - "loss": 0.969, + "epoch": 0.7312525101841758, + "grad_norm": 0.255859375, + "learning_rate": 4.086881083782216e-05, + "loss": 0.9178, "step": 12745 }, { - "epoch": 0.3657854345674408, - "grad_norm": 0.40234375, - "learning_rate": 0.00015996310907416355, - "loss": 1.0003, + "epoch": 0.7315393883756957, + "grad_norm": 0.265625, + "learning_rate": 4.078808328706127e-05, + "loss": 0.9652, "step": 12750 }, { - "epoch": 0.3659288798358986, - "grad_norm": 0.423828125, - "learning_rate": 0.00015992302914969727, - "loss": 1.0789, + "epoch": 0.7318262665672156, + "grad_norm": 0.251953125, + "learning_rate": 4.070741511414323e-05, + "loss": 0.8837, "step": 12755 }, { - "epoch": 0.36607232510435644, - "grad_norm": 0.376953125, - "learning_rate": 0.00015988293420057744, - "loss": 1.0006, + "epoch": 0.7321131447587355, + "grad_norm": 0.26171875, + "learning_rate": 4.062680639996225e-05, + "loss": 0.9901, "step": 12760 }, { - "epoch": 0.3662157703728143, - "grad_norm": 0.400390625, - "learning_rate": 0.00015984282423685717, - "loss": 1.011, + "epoch": 0.7324000229502553, + "grad_norm": 0.2734375, + "learning_rate": 4.054625722535301e-05, + "loss": 0.9346, "step": 12765 }, { - "epoch": 0.36635921564127205, - "grad_norm": 0.36328125, - "learning_rate": 0.0001598026992685933, - "loss": 0.9939, + "epoch": 0.7326869011417751, + "grad_norm": 0.25, + "learning_rate": 4.0465767671090304e-05, + "loss": 0.8749, "step": 12770 }, { - "epoch": 0.3665026609097299, - "grad_norm": 0.375, - "learning_rate": 0.00015976255930584654, - "loss": 0.9388, + "epoch": 0.7329737793332951, + "grad_norm": 0.25, + "learning_rate": 4.038533781788924e-05, + "loss": 0.9063, "step": 12775 }, { - "epoch": 0.3666461061781877, - "grad_norm": 0.3671875, - "learning_rate": 0.00015972240435868117, - "loss": 0.9893, + "epoch": 0.733260657524815, + "grad_norm": 0.26171875, + "learning_rate": 4.030496774640514e-05, + "loss": 0.9602, "step": 12780 }, { - "epoch": 0.36678955144664555, - "grad_norm": 0.388671875, - "learning_rate": 0.00015968223443716545, - "loss": 0.9257, + "epoch": 0.7335475357163348, + "grad_norm": 0.26953125, + "learning_rate": 4.022465753723323e-05, + "loss": 0.9149, "step": 12785 }, { - "epoch": 0.3669329967151034, - "grad_norm": 0.431640625, - "learning_rate": 0.00015964204955137124, - "loss": 0.9891, + "epoch": 0.7338344139078548, + "grad_norm": 0.27734375, + "learning_rate": 4.014440727090879e-05, + "loss": 0.9483, "step": 12790 }, { - "epoch": 0.36707644198356115, - "grad_norm": 0.384765625, - "learning_rate": 0.00015960184971137417, - "loss": 0.8896, + "epoch": 0.7341212920993746, + "grad_norm": 0.28125, + "learning_rate": 4.0064217027906945e-05, + "loss": 0.9309, "step": 12795 }, { - "epoch": 0.367219887252019, - "grad_norm": 0.384765625, - "learning_rate": 0.00015956163492725372, - "loss": 0.9586, + "epoch": 0.7344081702908944, + "grad_norm": 0.265625, + "learning_rate": 3.998408688864267e-05, + "loss": 0.8794, "step": 12800 }, { - "epoch": 0.3673633325204768, - "grad_norm": 0.396484375, - "learning_rate": 0.00015952140520909298, - "loss": 1.0285, + "epoch": 0.7346950484824144, + "grad_norm": 0.255859375, + "learning_rate": 3.990401693347065e-05, + "loss": 0.9702, "step": 12805 }, { - "epoch": 0.36750677778893465, - "grad_norm": 0.390625, - "learning_rate": 0.00015948116056697888, - "loss": 0.9096, + "epoch": 0.7349819266739342, + "grad_norm": 0.27734375, + "learning_rate": 3.982400724268516e-05, + "loss": 0.9087, "step": 12810 }, { - "epoch": 0.3676502230573925, - "grad_norm": 0.384765625, - "learning_rate": 0.00015944090101100206, - "loss": 1.0331, + "epoch": 0.7352688048654541, + "grad_norm": 0.2470703125, + "learning_rate": 3.974405789652022e-05, + "loss": 0.9429, "step": 12815 }, { - "epoch": 0.36779366832585025, - "grad_norm": 0.376953125, - "learning_rate": 0.0001594006265512569, - "loss": 0.9517, + "epoch": 0.735555683056974, + "grad_norm": 0.263671875, + "learning_rate": 3.96641689751491e-05, + "loss": 0.9047, "step": 12820 }, { - "epoch": 0.3679371135943081, - "grad_norm": 0.375, - "learning_rate": 0.00015936033719784153, - "loss": 0.9248, + "epoch": 0.7358425612484939, + "grad_norm": 0.251953125, + "learning_rate": 3.95843405586846e-05, + "loss": 0.973, "step": 12825 }, { - "epoch": 0.3680805588627659, - "grad_norm": 0.357421875, - "learning_rate": 0.00015932003296085774, - "loss": 0.9397, + "epoch": 0.7361294394400137, + "grad_norm": 0.294921875, + "learning_rate": 3.950457272717889e-05, + "loss": 1.0204, "step": 12830 }, { - "epoch": 0.36822400413122375, - "grad_norm": 0.4140625, - "learning_rate": 0.00015927971385041117, - "loss": 1.0426, + "epoch": 0.7364163176315337, + "grad_norm": 0.265625, + "learning_rate": 3.9424865560623305e-05, + "loss": 1.0043, "step": 12835 }, { - "epoch": 0.3683674493996816, - "grad_norm": 0.466796875, - "learning_rate": 0.00015923937987661116, - "loss": 0.9602, + "epoch": 0.7367031958230535, + "grad_norm": 0.259765625, + "learning_rate": 3.9345219138948365e-05, + "loss": 0.9604, "step": 12840 }, { - "epoch": 0.36851089466813935, - "grad_norm": 0.3828125, - "learning_rate": 0.0001591990310495707, - "loss": 0.9848, + "epoch": 0.7369900740145734, + "grad_norm": 0.3125, + "learning_rate": 3.9265633542023684e-05, + "loss": 0.9756, "step": 12845 }, { - "epoch": 0.3686543399365972, - "grad_norm": 0.369140625, - "learning_rate": 0.00015915866737940655, - "loss": 0.8689, + "epoch": 0.7372769522060932, + "grad_norm": 0.310546875, + "learning_rate": 3.9186108849657885e-05, + "loss": 1.0192, "step": 12850 }, { - "epoch": 0.368797785205055, - "grad_norm": 0.3828125, - "learning_rate": 0.00015911828887623926, - "loss": 0.9316, + "epoch": 0.7375638303976132, + "grad_norm": 0.26171875, + "learning_rate": 3.91066451415985e-05, + "loss": 0.9264, "step": 12855 }, { - "epoch": 0.36894123047351285, - "grad_norm": 0.380859375, - "learning_rate": 0.00015907789555019296, - "loss": 0.9763, + "epoch": 0.737850708589133, + "grad_norm": 0.271484375, + "learning_rate": 3.9027242497531865e-05, + "loss": 1.0112, "step": 12860 }, { - "epoch": 0.3690846757419706, - "grad_norm": 0.375, - "learning_rate": 0.00015903748741139556, - "loss": 0.9837, + "epoch": 0.7381375867806529, + "grad_norm": 0.279296875, + "learning_rate": 3.8947900997083255e-05, + "loss": 0.9489, "step": 12865 }, { - "epoch": 0.36922812101042846, - "grad_norm": 0.404296875, - "learning_rate": 0.0001589970644699788, - "loss": 1.0005, + "epoch": 0.7384244649721728, + "grad_norm": 0.271484375, + "learning_rate": 3.8868620719816395e-05, + "loss": 0.9954, "step": 12870 }, { - "epoch": 0.3693715662788863, - "grad_norm": 0.59375, - "learning_rate": 0.0001589566267360779, - "loss": 0.8706, + "epoch": 0.7387113431636927, + "grad_norm": 0.2734375, + "learning_rate": 3.878940174523371e-05, + "loss": 0.9128, "step": 12875 }, { - "epoch": 0.3695150115473441, - "grad_norm": 0.37890625, - "learning_rate": 0.00015891617421983205, - "loss": 0.9364, + "epoch": 0.7389982213552125, + "grad_norm": 0.26953125, + "learning_rate": 3.8710244152776264e-05, + "loss": 0.9386, "step": 12880 }, { - "epoch": 0.36965845681580195, - "grad_norm": 0.3828125, - "learning_rate": 0.0001588757069313839, - "loss": 0.9991, + "epoch": 0.7392850995467325, + "grad_norm": 0.2734375, + "learning_rate": 3.8631148021823406e-05, + "loss": 0.927, "step": 12885 }, { - "epoch": 0.3698019020842597, - "grad_norm": 0.390625, - "learning_rate": 0.00015883522488087994, - "loss": 0.9197, + "epoch": 0.7395719777382523, + "grad_norm": 0.2431640625, + "learning_rate": 3.8552113431692925e-05, + "loss": 0.8867, "step": 12890 }, { - "epoch": 0.36994534735271756, - "grad_norm": 0.41015625, - "learning_rate": 0.00015879472807847037, - "loss": 0.9249, + "epoch": 0.7398588559297722, + "grad_norm": 0.265625, + "learning_rate": 3.847314046164089e-05, + "loss": 0.9352, "step": 12895 }, { - "epoch": 0.3700887926211754, - "grad_norm": 0.3828125, - "learning_rate": 0.00015875421653430903, - "loss": 0.9532, + "epoch": 0.7401457341212921, + "grad_norm": 0.2734375, + "learning_rate": 3.8394229190861567e-05, + "loss": 0.9445, "step": 12900 }, { - "epoch": 0.3702322378896332, - "grad_norm": 0.39453125, - "learning_rate": 0.00015871369025855355, - "loss": 0.9365, + "epoch": 0.740432612312812, + "grad_norm": 0.28515625, + "learning_rate": 3.831537969848731e-05, + "loss": 0.9806, "step": 12905 }, { - "epoch": 0.37037568315809105, - "grad_norm": 0.423828125, - "learning_rate": 0.00015867314926136509, - "loss": 1.0382, + "epoch": 0.7407194905043318, + "grad_norm": 0.296875, + "learning_rate": 3.823659206358865e-05, + "loss": 0.9799, "step": 12910 }, { - "epoch": 0.3705191284265488, - "grad_norm": 0.41796875, - "learning_rate": 0.00015863259355290861, - "loss": 0.9688, + "epoch": 0.7410063686958518, + "grad_norm": 0.27734375, + "learning_rate": 3.8157866365174e-05, + "loss": 0.9522, "step": 12915 }, { - "epoch": 0.37066257369500666, - "grad_norm": 0.380859375, - "learning_rate": 0.0001585920231433528, - "loss": 0.9109, + "epoch": 0.7412932468873716, + "grad_norm": 0.255859375, + "learning_rate": 3.807920268218961e-05, + "loss": 0.9113, "step": 12920 }, { - "epoch": 0.3708060189634645, - "grad_norm": 0.359375, - "learning_rate": 0.00015855143804286996, - "loss": 0.909, + "epoch": 0.7415801250788915, + "grad_norm": 0.26953125, + "learning_rate": 3.800060109351957e-05, + "loss": 0.9087, "step": 12925 }, { - "epoch": 0.3709494642319223, - "grad_norm": 0.38671875, - "learning_rate": 0.00015851083826163607, - "loss": 0.9958, + "epoch": 0.7418670032704113, + "grad_norm": 0.26171875, + "learning_rate": 3.792206167798582e-05, + "loss": 0.89, "step": 12930 }, { - "epoch": 0.37109290950038015, - "grad_norm": 0.36328125, - "learning_rate": 0.00015847022380983088, - "loss": 0.9333, + "epoch": 0.7421538814619313, + "grad_norm": 0.259765625, + "learning_rate": 3.784358451434783e-05, + "loss": 0.9056, "step": 12935 }, { - "epoch": 0.37123635476883793, - "grad_norm": 0.396484375, - "learning_rate": 0.00015842959469763765, - "loss": 1.0394, + "epoch": 0.7424407596534511, + "grad_norm": 0.353515625, + "learning_rate": 3.776516968130266e-05, + "loss": 0.9744, "step": 12940 }, { - "epoch": 0.37137980003729576, - "grad_norm": 0.384765625, - "learning_rate": 0.0001583889509352435, - "loss": 0.926, + "epoch": 0.742727637844971, + "grad_norm": 0.271484375, + "learning_rate": 3.768681725748488e-05, + "loss": 0.925, "step": 12945 }, { - "epoch": 0.3715232453057536, - "grad_norm": 0.40234375, - "learning_rate": 0.00015834829253283915, - "loss": 1.0001, + "epoch": 0.7430145160364909, + "grad_norm": 0.2890625, + "learning_rate": 3.760852732146649e-05, + "loss": 0.88, "step": 12950 }, { - "epoch": 0.3716666905742114, - "grad_norm": 0.412109375, - "learning_rate": 0.00015830761950061896, - "loss": 0.9797, + "epoch": 0.7433013942280108, + "grad_norm": 0.267578125, + "learning_rate": 3.753029995175677e-05, + "loss": 0.9313, "step": 12955 }, { - "epoch": 0.37181013584266925, - "grad_norm": 0.326171875, - "learning_rate": 0.00015826693184878095, - "loss": 0.8923, + "epoch": 0.7435882724195306, + "grad_norm": 0.2734375, + "learning_rate": 3.7452135226802385e-05, + "loss": 0.9604, "step": 12960 }, { - "epoch": 0.37195358111112703, - "grad_norm": 0.353515625, - "learning_rate": 0.0001582262295875269, - "loss": 0.9167, + "epoch": 0.7438751506110506, + "grad_norm": 0.27734375, + "learning_rate": 3.7374033224987084e-05, + "loss": 0.9876, "step": 12965 }, { - "epoch": 0.37209702637958486, - "grad_norm": 0.3984375, - "learning_rate": 0.00015818551272706217, - "loss": 0.9649, + "epoch": 0.7441620288025704, + "grad_norm": 0.279296875, + "learning_rate": 3.729599402463162e-05, + "loss": 0.9691, "step": 12970 }, { - "epoch": 0.3722404716480427, - "grad_norm": 0.39453125, - "learning_rate": 0.00015814478127759577, - "loss": 0.9792, + "epoch": 0.7444489069940903, + "grad_norm": 0.2734375, + "learning_rate": 3.7218017703993994e-05, + "loss": 0.9409, "step": 12975 }, { - "epoch": 0.3723839169165005, - "grad_norm": 0.373046875, - "learning_rate": 0.00015810403524934042, - "loss": 1.0042, + "epoch": 0.7447357851856102, + "grad_norm": 0.3046875, + "learning_rate": 3.714010434126899e-05, + "loss": 0.9514, "step": 12980 }, { - "epoch": 0.37252736218495835, - "grad_norm": 0.400390625, - "learning_rate": 0.00015806327465251255, - "loss": 1.0022, + "epoch": 0.7450226633771301, + "grad_norm": 0.267578125, + "learning_rate": 3.706225401458831e-05, + "loss": 0.9097, "step": 12985 }, { - "epoch": 0.37267080745341613, - "grad_norm": 0.361328125, - "learning_rate": 0.00015802249949733202, - "loss": 1.065, + "epoch": 0.7453095415686499, + "grad_norm": 0.25, + "learning_rate": 3.6984466802020436e-05, + "loss": 0.9851, "step": 12990 }, { - "epoch": 0.37281425272187396, - "grad_norm": 0.388671875, - "learning_rate": 0.0001579817097940226, - "loss": 0.9418, + "epoch": 0.7455964197601699, + "grad_norm": 0.287109375, + "learning_rate": 3.690674278157056e-05, + "loss": 0.9324, "step": 12995 }, { - "epoch": 0.3729576979903318, - "grad_norm": 0.353515625, - "learning_rate": 0.00015794090555281155, - "loss": 0.9129, + "epoch": 0.7458832979516897, + "grad_norm": 0.26171875, + "learning_rate": 3.6829082031180496e-05, + "loss": 0.9267, "step": 13000 }, { - "epoch": 0.3731011432587896, - "grad_norm": 0.384765625, - "learning_rate": 0.00015790008678392986, - "loss": 0.8534, + "epoch": 0.7461701761432096, + "grad_norm": 0.259765625, + "learning_rate": 3.6751484628728594e-05, + "loss": 0.9357, "step": 13005 }, { - "epoch": 0.37324458852724746, - "grad_norm": 0.400390625, - "learning_rate": 0.00015785925349761208, - "loss": 0.9684, + "epoch": 0.7464570543347294, + "grad_norm": 0.2734375, + "learning_rate": 3.6673950652029766e-05, + "loss": 0.9016, "step": 13010 }, { - "epoch": 0.37338803379570523, - "grad_norm": 0.39453125, - "learning_rate": 0.00015781840570409648, - "loss": 1.0187, + "epoch": 0.7467439325262494, + "grad_norm": 0.263671875, + "learning_rate": 3.659648017883526e-05, + "loss": 0.9252, "step": 13015 }, { - "epoch": 0.37353147906416306, - "grad_norm": 0.3828125, - "learning_rate": 0.00015777754341362487, - "loss": 0.9366, + "epoch": 0.7470308107177692, + "grad_norm": 0.255859375, + "learning_rate": 3.651907328683254e-05, + "loss": 0.896, "step": 13020 }, { - "epoch": 0.3736749243326209, - "grad_norm": 0.380859375, - "learning_rate": 0.0001577366666364428, - "loss": 0.8822, + "epoch": 0.7473176889092891, + "grad_norm": 0.275390625, + "learning_rate": 3.6441730053645506e-05, + "loss": 0.9848, "step": 13025 }, { - "epoch": 0.3738183696010787, - "grad_norm": 0.390625, - "learning_rate": 0.00015769577538279949, - "loss": 0.9811, + "epoch": 0.747604567100809, + "grad_norm": 0.251953125, + "learning_rate": 3.6364450556834097e-05, + "loss": 0.9676, "step": 13030 }, { - "epoch": 0.3739618148695365, - "grad_norm": 0.353515625, - "learning_rate": 0.00015765486966294755, - "loss": 0.952, + "epoch": 0.7478914452923289, + "grad_norm": 0.26953125, + "learning_rate": 3.628723487389437e-05, + "loss": 0.9408, "step": 13035 }, { - "epoch": 0.37410526013799433, - "grad_norm": 0.380859375, - "learning_rate": 0.0001576139494871435, - "loss": 0.9143, + "epoch": 0.7481783234838487, + "grad_norm": 0.26171875, + "learning_rate": 3.621008308225837e-05, + "loss": 0.8881, "step": 13040 }, { - "epoch": 0.37424870540645216, - "grad_norm": 0.421875, - "learning_rate": 0.00015757301486564725, - "loss": 1.0309, + "epoch": 0.7484652016753687, + "grad_norm": 0.267578125, + "learning_rate": 3.61329952592941e-05, + "loss": 0.9676, "step": 13045 }, { - "epoch": 0.37439215067491, - "grad_norm": 0.4140625, - "learning_rate": 0.00015753206580872256, - "loss": 1.022, + "epoch": 0.7487520798668885, + "grad_norm": 0.28515625, + "learning_rate": 3.605597148230541e-05, + "loss": 0.9577, "step": 13050 }, { - "epoch": 0.3745355959433678, - "grad_norm": 0.37890625, - "learning_rate": 0.00015749110232663658, - "loss": 0.9215, + "epoch": 0.7490389580584084, + "grad_norm": 0.275390625, + "learning_rate": 3.597901182853185e-05, + "loss": 0.9754, "step": 13055 }, { - "epoch": 0.3746790412118256, - "grad_norm": 0.376953125, - "learning_rate": 0.0001574501244296603, - "loss": 1.0349, + "epoch": 0.7493258362499283, + "grad_norm": 0.26953125, + "learning_rate": 3.590211637514884e-05, + "loss": 0.9921, "step": 13060 }, { - "epoch": 0.37482248648028343, - "grad_norm": 0.38671875, - "learning_rate": 0.00015740913212806813, - "loss": 0.8908, + "epoch": 0.7496127144414482, + "grad_norm": 0.263671875, + "learning_rate": 3.582528519926729e-05, + "loss": 0.9823, "step": 13065 }, { - "epoch": 0.37496593174874127, - "grad_norm": 0.380859375, - "learning_rate": 0.0001573681254321382, - "loss": 1.0232, + "epoch": 0.749899592632968, + "grad_norm": 0.255859375, + "learning_rate": 3.574851837793357e-05, + "loss": 0.9203, "step": 13070 }, { - "epoch": 0.3751093770171991, - "grad_norm": 0.41015625, - "learning_rate": 0.0001573271043521522, - "loss": 0.9213, + "epoch": 0.750186470824488, + "grad_norm": 0.25390625, + "learning_rate": 3.567181598812973e-05, + "loss": 0.9569, "step": 13075 }, { - "epoch": 0.37525282228565693, - "grad_norm": 0.40234375, - "learning_rate": 0.00015728606889839553, - "loss": 0.8958, + "epoch": 0.7504733490160078, + "grad_norm": 0.265625, + "learning_rate": 3.559517810677308e-05, + "loss": 0.9337, "step": 13080 }, { - "epoch": 0.3753962675541147, - "grad_norm": 0.384765625, - "learning_rate": 0.000157245019081157, - "loss": 1.0365, + "epoch": 0.7507602272075277, + "grad_norm": 0.255859375, + "learning_rate": 3.551860481071624e-05, + "loss": 0.8669, "step": 13085 }, { - "epoch": 0.37553971282257254, - "grad_norm": 0.447265625, - "learning_rate": 0.00015720395491072918, - "loss": 0.967, + "epoch": 0.7510471053990475, + "grad_norm": 0.263671875, + "learning_rate": 3.544209617674707e-05, + "loss": 0.9714, "step": 13090 }, { - "epoch": 0.37568315809103037, - "grad_norm": 0.416015625, - "learning_rate": 0.00015716287639740826, - "loss": 0.9147, + "epoch": 0.7513339835905675, + "grad_norm": 0.255859375, + "learning_rate": 3.536565228158864e-05, + "loss": 0.8875, "step": 13095 }, { - "epoch": 0.3758266033594882, - "grad_norm": 0.349609375, - "learning_rate": 0.00015712178355149385, - "loss": 0.9507, + "epoch": 0.7516208617820873, + "grad_norm": 0.26953125, + "learning_rate": 3.528927320189903e-05, + "loss": 0.8935, "step": 13100 }, { - "epoch": 0.37597004862794603, - "grad_norm": 0.40625, - "learning_rate": 0.00015708067638328934, - "loss": 0.9393, + "epoch": 0.7519077399736072, + "grad_norm": 0.26171875, + "learning_rate": 3.521295901427132e-05, + "loss": 0.9711, "step": 13105 }, { - "epoch": 0.3761134938964038, - "grad_norm": 0.35546875, - "learning_rate": 0.00015703955490310162, - "loss": 0.88, + "epoch": 0.7521946181651271, + "grad_norm": 0.279296875, + "learning_rate": 3.5136709795233626e-05, + "loss": 0.9369, "step": 13110 }, { - "epoch": 0.37625693916486164, - "grad_norm": 0.388671875, - "learning_rate": 0.00015699841912124115, - "loss": 0.9085, + "epoch": 0.752481496356647, + "grad_norm": 0.2431640625, + "learning_rate": 3.506052562124883e-05, + "loss": 0.875, "step": 13115 }, { - "epoch": 0.37640038443331947, - "grad_norm": 0.412109375, - "learning_rate": 0.00015695726904802208, - "loss": 0.9968, + "epoch": 0.7527683745481668, + "grad_norm": 0.271484375, + "learning_rate": 3.498440656871449e-05, + "loss": 0.9301, "step": 13120 }, { - "epoch": 0.3765438297017773, - "grad_norm": 0.390625, - "learning_rate": 0.000156916104693762, - "loss": 1.0753, + "epoch": 0.7530552527396868, + "grad_norm": 0.26953125, + "learning_rate": 3.4908352713963077e-05, + "loss": 0.9551, "step": 13125 }, { - "epoch": 0.37668727497023513, - "grad_norm": 0.38671875, - "learning_rate": 0.0001568749260687822, - "loss": 0.9795, + "epoch": 0.7533421309312066, + "grad_norm": 0.251953125, + "learning_rate": 3.483236413326151e-05, + "loss": 0.902, "step": 13130 }, { - "epoch": 0.3768307202386929, - "grad_norm": 0.54296875, - "learning_rate": 0.00015683373318340745, - "loss": 0.9899, + "epoch": 0.7536290091227265, + "grad_norm": 0.255859375, + "learning_rate": 3.475644090281133e-05, + "loss": 0.9567, "step": 13135 }, { - "epoch": 0.37697416550715074, - "grad_norm": 0.3984375, - "learning_rate": 0.00015679252604796623, - "loss": 0.8556, + "epoch": 0.7539158873142464, + "grad_norm": 0.265625, + "learning_rate": 3.468058309874851e-05, + "loss": 0.9256, "step": 13140 }, { - "epoch": 0.37711761077560857, - "grad_norm": 0.41796875, - "learning_rate": 0.00015675130467279042, - "loss": 1.0245, + "epoch": 0.7542027655057663, + "grad_norm": 0.25390625, + "learning_rate": 3.460479079714343e-05, + "loss": 0.9339, "step": 13145 }, { - "epoch": 0.3772610560440664, - "grad_norm": 0.392578125, - "learning_rate": 0.0001567100690682156, - "loss": 0.9004, + "epoch": 0.7544896436972861, + "grad_norm": 0.255859375, + "learning_rate": 3.452906407400074e-05, + "loss": 0.9711, "step": 13150 }, { - "epoch": 0.37740450131252423, - "grad_norm": 0.41796875, - "learning_rate": 0.0001566688192445809, - "loss": 0.8911, + "epoch": 0.7547765218888061, + "grad_norm": 0.2451171875, + "learning_rate": 3.4453403005259444e-05, + "loss": 0.9775, "step": 13155 }, { - "epoch": 0.377547946580982, - "grad_norm": 0.392578125, - "learning_rate": 0.00015662755521222895, - "loss": 1.0173, + "epoch": 0.7550634000803259, + "grad_norm": 0.2890625, + "learning_rate": 3.43778076667926e-05, + "loss": 0.9288, "step": 13160 }, { - "epoch": 0.37769139184943984, - "grad_norm": 0.390625, - "learning_rate": 0.00015658627698150602, - "loss": 0.9582, + "epoch": 0.7553502782718458, + "grad_norm": 0.2490234375, + "learning_rate": 3.43022781344074e-05, + "loss": 0.8841, "step": 13165 }, { - "epoch": 0.37783483711789767, - "grad_norm": 0.369140625, - "learning_rate": 0.00015654498456276188, - "loss": 0.898, + "epoch": 0.7556371564633656, + "grad_norm": 0.265625, + "learning_rate": 3.4226814483844946e-05, + "loss": 0.9771, "step": 13170 }, { - "epoch": 0.3779782823863555, - "grad_norm": 0.4296875, - "learning_rate": 0.00015650367796634992, - "loss": 1.039, + "epoch": 0.7559240346548856, + "grad_norm": 0.265625, + "learning_rate": 3.4151416790780456e-05, + "loss": 0.9575, "step": 13175 }, { - "epoch": 0.37812172765481333, - "grad_norm": 0.37109375, - "learning_rate": 0.000156462357202627, - "loss": 0.9934, + "epoch": 0.7562109128464054, + "grad_norm": 0.259765625, + "learning_rate": 3.4076085130822866e-05, + "loss": 0.9298, "step": 13180 }, { - "epoch": 0.3782651729232711, - "grad_norm": 0.423828125, - "learning_rate": 0.0001564210222819536, - "loss": 1.0243, + "epoch": 0.7564977910379252, + "grad_norm": 0.26171875, + "learning_rate": 3.400081957951492e-05, + "loss": 0.8998, "step": 13185 }, { - "epoch": 0.37840861819172894, - "grad_norm": 0.380859375, - "learning_rate": 0.0001563796732146938, - "loss": 0.8689, + "epoch": 0.7567846692294452, + "grad_norm": 0.26171875, + "learning_rate": 3.392562021233311e-05, + "loss": 0.902, "step": 13190 }, { - "epoch": 0.37855206346018677, - "grad_norm": 0.384765625, - "learning_rate": 0.000156338310011215, - "loss": 0.9354, + "epoch": 0.757071547420965, + "grad_norm": 0.27734375, + "learning_rate": 3.38504871046875e-05, + "loss": 0.9792, "step": 13195 }, { - "epoch": 0.3786955087286446, - "grad_norm": 0.4375, - "learning_rate": 0.00015629693268188842, - "loss": 1.0861, + "epoch": 0.7573584256124849, + "grad_norm": 0.291015625, + "learning_rate": 3.3775420331921736e-05, + "loss": 0.9386, "step": 13200 }, { - "epoch": 0.3788389539971024, - "grad_norm": 0.435546875, - "learning_rate": 0.0001562555412370887, - "loss": 1.1923, + "epoch": 0.7576453038040049, + "grad_norm": 0.271484375, + "learning_rate": 3.3700419969312994e-05, + "loss": 0.932, "step": 13205 }, { - "epoch": 0.3789823992655602, - "grad_norm": 0.384765625, - "learning_rate": 0.000156214135687194, - "loss": 0.9385, + "epoch": 0.7579321819955247, + "grad_norm": 0.26171875, + "learning_rate": 3.362548609207177e-05, + "loss": 0.9206, "step": 13210 }, { - "epoch": 0.37912584453401804, - "grad_norm": 0.37890625, - "learning_rate": 0.000156172716042586, - "loss": 0.9412, + "epoch": 0.7582190601870445, + "grad_norm": 0.259765625, + "learning_rate": 3.355061877534192e-05, + "loss": 0.9754, "step": 13215 }, { - "epoch": 0.3792692898024759, - "grad_norm": 0.396484375, - "learning_rate": 0.00015613128231365002, - "loss": 0.9705, + "epoch": 0.7585059383785645, + "grad_norm": 0.2578125, + "learning_rate": 3.3475818094200585e-05, + "loss": 0.9253, "step": 13220 }, { - "epoch": 0.3794127350709337, - "grad_norm": 0.40234375, - "learning_rate": 0.00015608983451077482, - "loss": 0.9867, + "epoch": 0.7587928165700843, + "grad_norm": 0.2734375, + "learning_rate": 3.340108412365803e-05, + "loss": 0.9826, "step": 13225 }, { - "epoch": 0.3795561803393915, - "grad_norm": 0.3671875, - "learning_rate": 0.00015604837264435268, - "loss": 1.0024, + "epoch": 0.7590796947616042, + "grad_norm": 0.265625, + "learning_rate": 3.332641693865766e-05, + "loss": 0.9125, "step": 13230 }, { - "epoch": 0.3796996256078493, - "grad_norm": 0.4140625, - "learning_rate": 0.00015600689672477946, - "loss": 1.125, + "epoch": 0.7593665729531242, + "grad_norm": 0.259765625, + "learning_rate": 3.3251816614075884e-05, + "loss": 0.9322, "step": 13235 }, { - "epoch": 0.37984307087630714, - "grad_norm": 0.4140625, - "learning_rate": 0.00015596540676245454, - "loss": 0.925, + "epoch": 0.759653451144644, + "grad_norm": 0.263671875, + "learning_rate": 3.317728322472209e-05, + "loss": 0.9654, "step": 13240 }, { - "epoch": 0.379986516144765, - "grad_norm": 0.427734375, - "learning_rate": 0.0001559239027677808, - "loss": 0.9428, + "epoch": 0.7599403293361638, + "grad_norm": 0.283203125, + "learning_rate": 3.310281684533852e-05, + "loss": 0.9085, "step": 13245 }, { - "epoch": 0.3801299614132228, - "grad_norm": 0.384765625, - "learning_rate": 0.00015588238475116464, - "loss": 0.9818, + "epoch": 0.7602272075276837, + "grad_norm": 0.2734375, + "learning_rate": 3.302841755060018e-05, + "loss": 0.9488, "step": 13250 }, { - "epoch": 0.3802734066816806, - "grad_norm": 0.349609375, - "learning_rate": 0.00015584085272301595, - "loss": 0.9799, + "epoch": 0.7605140857192036, + "grad_norm": 0.26953125, + "learning_rate": 3.2954085415114946e-05, + "loss": 0.922, "step": 13255 }, { - "epoch": 0.3804168519501384, - "grad_norm": 0.4453125, - "learning_rate": 0.0001557993066937482, - "loss": 0.963, + "epoch": 0.7608009639107235, + "grad_norm": 0.283203125, + "learning_rate": 3.2879820513423184e-05, + "loss": 0.9653, "step": 13260 }, { - "epoch": 0.38056029721859624, - "grad_norm": 0.384765625, - "learning_rate": 0.00015575774667377833, - "loss": 0.9138, + "epoch": 0.7610878421022433, + "grad_norm": 0.25390625, + "learning_rate": 3.2805622919997934e-05, + "loss": 0.8962, "step": 13265 }, { - "epoch": 0.3807037424870541, - "grad_norm": 0.36328125, - "learning_rate": 0.0001557161726735268, - "loss": 0.9356, + "epoch": 0.7613747202937633, + "grad_norm": 0.265625, + "learning_rate": 3.273149270924468e-05, + "loss": 0.9213, "step": 13270 }, { - "epoch": 0.3808471877555119, - "grad_norm": 0.359375, - "learning_rate": 0.00015567458470341748, - "loss": 0.9485, + "epoch": 0.7616615984852831, + "grad_norm": 0.265625, + "learning_rate": 3.2657429955501394e-05, + "loss": 0.903, "step": 13275 }, { - "epoch": 0.3809906330239697, - "grad_norm": 0.4296875, - "learning_rate": 0.0001556329827738779, - "loss": 0.9113, + "epoch": 0.761948476676803, + "grad_norm": 0.263671875, + "learning_rate": 3.258343473303832e-05, + "loss": 0.9081, "step": 13280 }, { - "epoch": 0.3811340782924275, - "grad_norm": 0.41015625, - "learning_rate": 0.000155591366895339, - "loss": 0.9185, + "epoch": 0.762235354868323, + "grad_norm": 0.2578125, + "learning_rate": 3.2509507116058134e-05, + "loss": 0.9147, "step": 13285 }, { - "epoch": 0.38127752356088535, - "grad_norm": 0.4140625, - "learning_rate": 0.00015554973707823525, - "loss": 0.9575, + "epoch": 0.7625222330598428, + "grad_norm": 0.291015625, + "learning_rate": 3.243564717869552e-05, + "loss": 0.9349, "step": 13290 }, { - "epoch": 0.3814209688293432, - "grad_norm": 0.423828125, - "learning_rate": 0.0001555080933330046, - "loss": 0.9958, + "epoch": 0.7628091112513626, + "grad_norm": 0.255859375, + "learning_rate": 3.2361854995017416e-05, + "loss": 0.9296, "step": 13295 }, { - "epoch": 0.381564414097801, - "grad_norm": 0.390625, - "learning_rate": 0.00015546643567008848, - "loss": 0.9162, + "epoch": 0.7630959894428826, + "grad_norm": 0.2734375, + "learning_rate": 3.228813063902276e-05, + "loss": 0.8695, "step": 13300 }, { - "epoch": 0.3817078593662588, - "grad_norm": 0.392578125, - "learning_rate": 0.0001554247640999318, - "loss": 1.0714, + "epoch": 0.7633828676344024, + "grad_norm": 0.259765625, + "learning_rate": 3.2214474184642574e-05, + "loss": 0.9618, "step": 13305 }, { - "epoch": 0.3818513046347166, - "grad_norm": 0.361328125, - "learning_rate": 0.00015538307863298303, - "loss": 0.8706, + "epoch": 0.7636697458259223, + "grad_norm": 0.275390625, + "learning_rate": 3.2140885705739674e-05, + "loss": 0.9476, "step": 13310 }, { - "epoch": 0.38199474990317445, - "grad_norm": 0.400390625, - "learning_rate": 0.00015534137927969405, - "loss": 1.0037, + "epoch": 0.7639566240174422, + "grad_norm": 0.259765625, + "learning_rate": 3.2067365276108754e-05, + "loss": 0.9865, "step": 13315 }, { - "epoch": 0.3821381951716323, - "grad_norm": 0.443359375, - "learning_rate": 0.00015529966605052023, - "loss": 0.9937, + "epoch": 0.7642435022089621, + "grad_norm": 0.287109375, + "learning_rate": 3.199391296947627e-05, + "loss": 0.9788, "step": 13320 }, { - "epoch": 0.3822816404400901, - "grad_norm": 0.392578125, - "learning_rate": 0.00015525793895592046, - "loss": 1.0451, + "epoch": 0.7645303804004819, + "grad_norm": 0.263671875, + "learning_rate": 3.192052885950034e-05, + "loss": 0.9549, "step": 13325 }, { - "epoch": 0.3824250857085479, - "grad_norm": 0.38671875, - "learning_rate": 0.00015521619800635704, - "loss": 0.981, + "epoch": 0.7648172585920018, + "grad_norm": 0.2412109375, + "learning_rate": 3.1847213019770716e-05, + "loss": 0.9153, "step": 13330 }, { - "epoch": 0.3825685309770057, - "grad_norm": 0.408203125, - "learning_rate": 0.0001551744432122958, - "loss": 0.8988, + "epoch": 0.7651041367835217, + "grad_norm": 0.255859375, + "learning_rate": 3.1773965523808754e-05, + "loss": 1.0291, "step": 13335 }, { - "epoch": 0.38271197624546355, - "grad_norm": 0.78125, - "learning_rate": 0.00015513267458420606, - "loss": 1.0238, + "epoch": 0.7653910149750416, + "grad_norm": 0.263671875, + "learning_rate": 3.1700786445067135e-05, + "loss": 0.9117, "step": 13340 }, { - "epoch": 0.3828554215139214, - "grad_norm": 0.427734375, - "learning_rate": 0.00015509089213256054, - "loss": 0.9933, + "epoch": 0.7656778931665614, + "grad_norm": 0.267578125, + "learning_rate": 3.162767585692997e-05, + "loss": 0.9118, "step": 13345 }, { - "epoch": 0.3829988667823792, - "grad_norm": 0.40234375, - "learning_rate": 0.0001550490958678355, - "loss": 1.0137, + "epoch": 0.7659647713580814, + "grad_norm": 0.25390625, + "learning_rate": 3.155463383271282e-05, + "loss": 0.9031, "step": 13350 }, { - "epoch": 0.383142312050837, - "grad_norm": 0.43359375, - "learning_rate": 0.00015500728580051054, - "loss": 0.9123, + "epoch": 0.7662516495496012, + "grad_norm": 0.263671875, + "learning_rate": 3.148166044566233e-05, + "loss": 0.9564, "step": 13355 }, { - "epoch": 0.3832857573192948, - "grad_norm": 0.427734375, - "learning_rate": 0.00015496546194106888, - "loss": 0.9077, + "epoch": 0.7665385277411211, + "grad_norm": 0.275390625, + "learning_rate": 3.14087557689564e-05, + "loss": 0.9529, "step": 13360 }, { - "epoch": 0.38342920258775265, - "grad_norm": 0.451171875, - "learning_rate": 0.00015492362429999715, - "loss": 0.9732, + "epoch": 0.766825405932641, + "grad_norm": 0.298828125, + "learning_rate": 3.133591987570399e-05, + "loss": 0.9671, "step": 13365 }, { - "epoch": 0.3835726478562105, - "grad_norm": 0.41015625, - "learning_rate": 0.00015488177288778532, - "loss": 0.9127, + "epoch": 0.7671122841241609, + "grad_norm": 0.2578125, + "learning_rate": 3.1263152838945095e-05, + "loss": 0.9353, "step": 13370 }, { - "epoch": 0.38371609312466826, - "grad_norm": 0.3828125, - "learning_rate": 0.00015483990771492694, - "loss": 0.9691, + "epoch": 0.7673991623156807, + "grad_norm": 0.267578125, + "learning_rate": 3.1190454731650675e-05, + "loss": 0.9341, "step": 13375 }, { - "epoch": 0.3838595383931261, - "grad_norm": 0.375, - "learning_rate": 0.00015479802879191898, - "loss": 0.965, + "epoch": 0.7676860405072007, + "grad_norm": 0.263671875, + "learning_rate": 3.111782562672251e-05, + "loss": 0.9831, "step": 13380 }, { - "epoch": 0.3840029836615839, - "grad_norm": 0.392578125, - "learning_rate": 0.00015475613612926184, - "loss": 0.9837, + "epoch": 0.7679729186987205, + "grad_norm": 0.28125, + "learning_rate": 3.104526559699333e-05, + "loss": 0.9083, "step": 13385 }, { - "epoch": 0.38414642893004175, - "grad_norm": 0.39453125, - "learning_rate": 0.00015471422973745936, - "loss": 1.0171, + "epoch": 0.7682597968902404, + "grad_norm": 0.267578125, + "learning_rate": 3.0972774715226406e-05, + "loss": 0.9044, "step": 13390 }, { - "epoch": 0.3842898741984996, - "grad_norm": 0.390625, - "learning_rate": 0.00015467230962701886, - "loss": 1.0052, + "epoch": 0.7685466750817603, + "grad_norm": 0.271484375, + "learning_rate": 3.090035305411575e-05, + "loss": 0.9071, "step": 13395 }, { - "epoch": 0.38443331946695736, - "grad_norm": 0.37109375, - "learning_rate": 0.00015463037580845107, - "loss": 0.9308, + "epoch": 0.7688335532732802, + "grad_norm": 0.271484375, + "learning_rate": 3.0828000686286027e-05, + "loss": 0.9348, "step": 13400 }, { - "epoch": 0.3845767647354152, - "grad_norm": 0.380859375, - "learning_rate": 0.00015458842829227018, - "loss": 0.995, + "epoch": 0.7691204314648, + "grad_norm": 0.251953125, + "learning_rate": 3.075571768429233e-05, + "loss": 0.9485, "step": 13405 }, { - "epoch": 0.384720210003873, - "grad_norm": 0.423828125, - "learning_rate": 0.00015454646708899374, - "loss": 0.9924, + "epoch": 0.76940730965632, + "grad_norm": 0.26171875, + "learning_rate": 3.06835041206202e-05, + "loss": 0.9058, "step": 13410 }, { - "epoch": 0.38486365527233085, - "grad_norm": 0.361328125, - "learning_rate": 0.00015450449220914285, - "loss": 1.0266, + "epoch": 0.7696941878478398, + "grad_norm": 0.2734375, + "learning_rate": 3.0611360067685576e-05, + "loss": 0.9473, "step": 13415 }, { - "epoch": 0.3850071005407887, - "grad_norm": 0.361328125, - "learning_rate": 0.00015446250366324196, - "loss": 0.8743, + "epoch": 0.7699810660393597, + "grad_norm": 0.2490234375, + "learning_rate": 3.0539285597834675e-05, + "loss": 0.9342, "step": 13420 }, { - "epoch": 0.38515054580924646, - "grad_norm": 0.388671875, - "learning_rate": 0.00015442050146181893, - "loss": 0.941, + "epoch": 0.7702679442308795, + "grad_norm": 0.25390625, + "learning_rate": 3.0467280783343944e-05, + "loss": 0.9746, "step": 13425 }, { - "epoch": 0.3852939910777043, - "grad_norm": 0.392578125, - "learning_rate": 0.00015437848561540517, - "loss": 0.9982, + "epoch": 0.7705548224223995, + "grad_norm": 0.25, + "learning_rate": 3.0395345696419918e-05, + "loss": 0.941, "step": 13430 }, { - "epoch": 0.3854374363461621, - "grad_norm": 0.41015625, - "learning_rate": 0.0001543364561345353, - "loss": 0.92, + "epoch": 0.7708417006139193, + "grad_norm": 0.259765625, + "learning_rate": 3.0323480409199378e-05, + "loss": 0.9427, "step": 13435 }, { - "epoch": 0.38558088161461995, - "grad_norm": 0.412109375, - "learning_rate": 0.00015429441302974755, - "loss": 0.9154, + "epoch": 0.7711285788054392, + "grad_norm": 0.267578125, + "learning_rate": 3.0251684993748886e-05, + "loss": 0.9854, "step": 13440 }, { - "epoch": 0.3857243268830778, - "grad_norm": 0.392578125, - "learning_rate": 0.0001542523563115835, - "loss": 0.8358, + "epoch": 0.7714154569969591, + "grad_norm": 0.2734375, + "learning_rate": 3.017995952206506e-05, + "loss": 1.0031, "step": 13445 }, { - "epoch": 0.38586777215153556, - "grad_norm": 0.40625, - "learning_rate": 0.00015421028599058812, - "loss": 0.9326, + "epoch": 0.771702335188479, + "grad_norm": 0.26171875, + "learning_rate": 3.010830406607441e-05, + "loss": 0.9125, "step": 13450 }, { - "epoch": 0.3860112174199934, - "grad_norm": 0.384765625, - "learning_rate": 0.0001541682020773098, - "loss": 0.9456, + "epoch": 0.7719892133799988, + "grad_norm": 0.27734375, + "learning_rate": 3.003671869763317e-05, + "loss": 1.0298, "step": 13455 }, { - "epoch": 0.3861546626884512, - "grad_norm": 0.349609375, - "learning_rate": 0.00015412610458230037, - "loss": 0.9383, + "epoch": 0.7722760915715188, + "grad_norm": 0.26953125, + "learning_rate": 2.9965203488527317e-05, + "loss": 0.9565, "step": 13460 }, { - "epoch": 0.38629810795690905, - "grad_norm": 0.37109375, - "learning_rate": 0.000154083993516115, - "loss": 0.9081, + "epoch": 0.7725629697630386, + "grad_norm": 0.259765625, + "learning_rate": 2.9893758510472436e-05, + "loss": 0.9615, "step": 13465 }, { - "epoch": 0.3864415532253669, - "grad_norm": 0.396484375, - "learning_rate": 0.00015404186888931233, - "loss": 0.9811, + "epoch": 0.7728498479545585, + "grad_norm": 0.279296875, + "learning_rate": 2.982238383511373e-05, + "loss": 0.97, "step": 13470 }, { - "epoch": 0.38658499849382466, - "grad_norm": 0.423828125, - "learning_rate": 0.00015399973071245443, - "loss": 1.0074, + "epoch": 0.7731367261460784, + "grad_norm": 0.27734375, + "learning_rate": 2.975107953402585e-05, + "loss": 0.9835, "step": 13475 }, { - "epoch": 0.3867284437622825, - "grad_norm": 0.392578125, - "learning_rate": 0.00015395757899610662, - "loss": 1.0359, + "epoch": 0.7734236043375983, + "grad_norm": 0.28515625, + "learning_rate": 2.967984567871297e-05, + "loss": 0.9662, "step": 13480 }, { - "epoch": 0.3868718890307403, - "grad_norm": 0.373046875, - "learning_rate": 0.00015391541375083776, - "loss": 1.0095, + "epoch": 0.7737104825291181, + "grad_norm": 0.275390625, + "learning_rate": 2.960868234060855e-05, + "loss": 0.9375, "step": 13485 }, { - "epoch": 0.38701533429919815, - "grad_norm": 0.349609375, - "learning_rate": 0.00015387323498722, - "loss": 0.9565, + "epoch": 0.7739973607206381, + "grad_norm": 0.26953125, + "learning_rate": 2.9537589591075298e-05, + "loss": 0.9713, "step": 13490 }, { - "epoch": 0.387158779567656, - "grad_norm": 0.388671875, - "learning_rate": 0.00015383104271582903, - "loss": 0.938, + "epoch": 0.7742842389121579, + "grad_norm": 0.259765625, + "learning_rate": 2.9466567501405185e-05, + "loss": 0.8821, "step": 13495 }, { - "epoch": 0.38730222483611376, - "grad_norm": 0.41015625, - "learning_rate": 0.00015378883694724369, - "loss": 0.9793, + "epoch": 0.7745711171036778, + "grad_norm": 0.27734375, + "learning_rate": 2.939561614281936e-05, + "loss": 1.0189, "step": 13500 }, { - "epoch": 0.3874456701045716, - "grad_norm": 0.39453125, - "learning_rate": 0.00015374661769204643, - "loss": 0.984, + "epoch": 0.7748579952951976, + "grad_norm": 0.28515625, + "learning_rate": 2.9324735586468e-05, + "loss": 0.9269, "step": 13505 }, { - "epoch": 0.3875891153730294, - "grad_norm": 0.4140625, - "learning_rate": 0.00015370438496082302, - "loss": 0.9838, + "epoch": 0.7751448734867176, + "grad_norm": 0.259765625, + "learning_rate": 2.9253925903430267e-05, + "loss": 0.9318, "step": 13510 }, { - "epoch": 0.38773256064148726, - "grad_norm": 0.369140625, - "learning_rate": 0.00015366213876416248, - "loss": 0.8725, + "epoch": 0.7754317516782374, + "grad_norm": 0.263671875, + "learning_rate": 2.9183187164714288e-05, + "loss": 0.9497, "step": 13515 }, { - "epoch": 0.3878760059099451, - "grad_norm": 0.390625, - "learning_rate": 0.0001536198791126574, - "loss": 1.0043, + "epoch": 0.7757186298697573, + "grad_norm": 0.26953125, + "learning_rate": 2.9112519441257e-05, + "loss": 0.8565, "step": 13520 }, { - "epoch": 0.38801945117840286, - "grad_norm": 0.376953125, - "learning_rate": 0.00015357760601690355, - "loss": 0.8977, + "epoch": 0.7760055080612772, + "grad_norm": 0.259765625, + "learning_rate": 2.9041922803924158e-05, + "loss": 0.9198, "step": 13525 }, { - "epoch": 0.3881628964468607, - "grad_norm": 0.400390625, - "learning_rate": 0.00015353531948750026, - "loss": 0.9002, + "epoch": 0.7762923862527971, + "grad_norm": 0.259765625, + "learning_rate": 2.8971397323510275e-05, + "loss": 0.9625, "step": 13530 }, { - "epoch": 0.3883063417153185, - "grad_norm": 0.42578125, - "learning_rate": 0.0001534930195350501, - "loss": 0.9692, + "epoch": 0.7765792644443169, + "grad_norm": 0.2734375, + "learning_rate": 2.890094307073845e-05, + "loss": 0.8976, "step": 13535 }, { - "epoch": 0.38844978698377636, - "grad_norm": 0.39453125, - "learning_rate": 0.0001534507061701591, - "loss": 0.8257, + "epoch": 0.7768661426358369, + "grad_norm": 0.3046875, + "learning_rate": 2.883056011626032e-05, + "loss": 0.9786, "step": 13540 }, { - "epoch": 0.38859323225223413, - "grad_norm": 0.41015625, - "learning_rate": 0.0001534083794034365, - "loss": 0.9982, + "epoch": 0.7771530208273567, + "grad_norm": 0.26171875, + "learning_rate": 2.8760248530656063e-05, + "loss": 0.96, "step": 13545 }, { - "epoch": 0.38873667752069196, - "grad_norm": 0.486328125, - "learning_rate": 0.00015336603924549503, - "loss": 1.0869, + "epoch": 0.7774398990188766, + "grad_norm": 0.259765625, + "learning_rate": 2.8690008384434363e-05, + "loss": 0.903, "step": 13550 }, { - "epoch": 0.3888801227891498, - "grad_norm": 0.37109375, - "learning_rate": 0.00015332368570695082, - "loss": 0.9489, + "epoch": 0.7777267772103965, + "grad_norm": 0.267578125, + "learning_rate": 2.861983974803215e-05, + "loss": 0.9419, "step": 13555 }, { - "epoch": 0.3890235680576076, - "grad_norm": 0.380859375, - "learning_rate": 0.0001532813187984232, - "loss": 0.9499, + "epoch": 0.7780136554019164, + "grad_norm": 0.267578125, + "learning_rate": 2.8549742691814705e-05, + "loss": 0.9764, "step": 13560 }, { - "epoch": 0.38916701332606546, - "grad_norm": 0.40234375, - "learning_rate": 0.00015323893853053493, - "loss": 0.9485, + "epoch": 0.7783005335934362, + "grad_norm": 0.263671875, + "learning_rate": 2.8479717286075502e-05, + "loss": 0.9253, "step": 13565 }, { - "epoch": 0.38931045859452323, - "grad_norm": 0.396484375, - "learning_rate": 0.0001531965449139122, - "loss": 0.9037, + "epoch": 0.7785874117849562, + "grad_norm": 0.265625, + "learning_rate": 2.8409763601036188e-05, + "loss": 0.9614, "step": 13570 }, { - "epoch": 0.38945390386298107, - "grad_norm": 0.39453125, - "learning_rate": 0.0001531541379591844, - "loss": 0.87, + "epoch": 0.778874289976476, + "grad_norm": 0.267578125, + "learning_rate": 2.8339881706846427e-05, + "loss": 1.0581, "step": 13575 }, { - "epoch": 0.3895973491314389, - "grad_norm": 0.427734375, - "learning_rate": 0.00015311171767698435, - "loss": 0.9812, + "epoch": 0.7791611681679959, + "grad_norm": 0.267578125, + "learning_rate": 2.8270071673584008e-05, + "loss": 1.0102, "step": 13580 }, { - "epoch": 0.38974079439989673, - "grad_norm": 0.353515625, - "learning_rate": 0.00015306928407794818, - "loss": 0.9611, + "epoch": 0.7794480463595157, + "grad_norm": 0.478515625, + "learning_rate": 2.82003335712546e-05, + "loss": 0.9411, "step": 13585 }, { - "epoch": 0.38988423966835456, - "grad_norm": 0.41015625, - "learning_rate": 0.0001530268371727154, - "loss": 0.8829, + "epoch": 0.7797349245510357, + "grad_norm": 0.265625, + "learning_rate": 2.8130667469791626e-05, + "loss": 0.858, "step": 13590 }, { - "epoch": 0.39002768493681234, - "grad_norm": 0.41796875, - "learning_rate": 0.0001529843769719288, - "loss": 0.8953, + "epoch": 0.7800218027425555, + "grad_norm": 0.2578125, + "learning_rate": 2.8061073439056507e-05, + "loss": 0.8923, "step": 13595 }, { - "epoch": 0.39017113020527017, - "grad_norm": 0.53125, - "learning_rate": 0.0001529419034862346, - "loss": 1.1083, + "epoch": 0.7803086809340753, + "grad_norm": 0.275390625, + "learning_rate": 2.799155154883826e-05, + "loss": 0.9096, "step": 13600 }, { - "epoch": 0.390314575473728, - "grad_norm": 0.37890625, - "learning_rate": 0.00015289941672628224, - "loss": 0.9099, + "epoch": 0.7805955591255953, + "grad_norm": 0.271484375, + "learning_rate": 2.7922101868853577e-05, + "loss": 0.9464, "step": 13605 }, { - "epoch": 0.39045802074218583, - "grad_norm": 0.392578125, - "learning_rate": 0.00015285691670272451, - "loss": 0.9734, + "epoch": 0.7808824373171152, + "grad_norm": 0.251953125, + "learning_rate": 2.785272446874677e-05, + "loss": 0.9081, "step": 13610 }, { - "epoch": 0.39060146601064366, - "grad_norm": 0.41015625, - "learning_rate": 0.00015281440342621757, - "loss": 0.9153, + "epoch": 0.781169315508635, + "grad_norm": 0.298828125, + "learning_rate": 2.778341941808965e-05, + "loss": 0.9735, "step": 13615 }, { - "epoch": 0.39074491127910144, - "grad_norm": 0.4296875, - "learning_rate": 0.0001527718769074209, - "loss": 1.0035, + "epoch": 0.781456193700155, + "grad_norm": 0.275390625, + "learning_rate": 2.771418678638147e-05, + "loss": 0.9514, "step": 13620 }, { - "epoch": 0.39088835654755927, - "grad_norm": 0.4375, - "learning_rate": 0.00015272933715699727, - "loss": 1.0594, + "epoch": 0.7817430718916748, + "grad_norm": 0.28125, + "learning_rate": 2.7645026643048855e-05, + "loss": 0.9684, "step": 13625 }, { - "epoch": 0.3910318018160171, - "grad_norm": 0.419921875, - "learning_rate": 0.00015268678418561276, - "loss": 0.9599, + "epoch": 0.7820299500831946, + "grad_norm": 0.2734375, + "learning_rate": 2.7575939057445786e-05, + "loss": 0.9782, "step": 13630 }, { - "epoch": 0.39117524708447493, - "grad_norm": 0.373046875, - "learning_rate": 0.00015264421800393683, - "loss": 0.9359, + "epoch": 0.7823168282747146, + "grad_norm": 0.2578125, + "learning_rate": 2.750692409885347e-05, + "loss": 0.9351, "step": 13635 }, { - "epoch": 0.39131869235293276, - "grad_norm": 0.5078125, - "learning_rate": 0.00015260163862264217, - "loss": 0.9733, + "epoch": 0.7826037064662345, + "grad_norm": 0.2734375, + "learning_rate": 2.7437981836480166e-05, + "loss": 0.9496, "step": 13640 }, { - "epoch": 0.39146213762139054, - "grad_norm": 0.388671875, - "learning_rate": 0.00015255904605240484, - "loss": 0.9379, + "epoch": 0.7828905846577543, + "grad_norm": 0.2578125, + "learning_rate": 2.736911233946141e-05, + "loss": 0.8933, "step": 13645 }, { - "epoch": 0.39160558288984837, - "grad_norm": 0.396484375, - "learning_rate": 0.00015251644030390415, - "loss": 0.887, + "epoch": 0.7831774628492743, + "grad_norm": 0.2578125, + "learning_rate": 2.730031567685968e-05, + "loss": 0.9402, "step": 13650 }, { - "epoch": 0.3917490281583062, - "grad_norm": 0.421875, - "learning_rate": 0.00015247382138782278, - "loss": 1.1132, + "epoch": 0.7834643410407941, + "grad_norm": 0.287109375, + "learning_rate": 2.723159191766439e-05, + "loss": 0.9332, "step": 13655 }, { - "epoch": 0.39189247342676403, - "grad_norm": 0.400390625, - "learning_rate": 0.00015243118931484667, - "loss": 0.9577, + "epoch": 0.783751219232314, + "grad_norm": 0.248046875, + "learning_rate": 2.716294113079192e-05, + "loss": 0.8929, "step": 13660 }, { - "epoch": 0.39203591869522186, - "grad_norm": 0.38671875, - "learning_rate": 0.00015238854409566505, - "loss": 0.8694, + "epoch": 0.7840380974238338, + "grad_norm": 0.267578125, + "learning_rate": 2.7094363385085398e-05, + "loss": 1.0431, "step": 13665 }, { - "epoch": 0.39217936396367964, - "grad_norm": 0.3828125, - "learning_rate": 0.0001523458857409705, - "loss": 1.0165, + "epoch": 0.7843249756153537, + "grad_norm": 0.291015625, + "learning_rate": 2.7025858749314758e-05, + "loss": 0.9892, "step": 13670 }, { - "epoch": 0.39232280923213747, - "grad_norm": 0.357421875, - "learning_rate": 0.0001523032142614588, - "loss": 0.9502, + "epoch": 0.7846118538068736, + "grad_norm": 0.265625, + "learning_rate": 2.6957427292176572e-05, + "loss": 0.8728, "step": 13675 }, { - "epoch": 0.3924662545005953, - "grad_norm": 0.375, - "learning_rate": 0.00015226052966782914, - "loss": 0.8908, + "epoch": 0.7848987319983934, + "grad_norm": 0.279296875, + "learning_rate": 2.6889069082294114e-05, + "loss": 0.9862, "step": 13680 }, { - "epoch": 0.39260969976905313, - "grad_norm": 0.3671875, - "learning_rate": 0.0001522178319707839, - "loss": 1.0071, + "epoch": 0.7851856101899134, + "grad_norm": 0.28125, + "learning_rate": 2.6820784188217164e-05, + "loss": 0.9781, "step": 13685 }, { - "epoch": 0.39275314503751096, - "grad_norm": 0.3203125, - "learning_rate": 0.0001521751211810288, - "loss": 0.8845, + "epoch": 0.7854724883814332, + "grad_norm": 0.2578125, + "learning_rate": 2.675257267842185e-05, + "loss": 0.9384, "step": 13690 }, { - "epoch": 0.39289659030596874, - "grad_norm": 0.365234375, - "learning_rate": 0.00015213239730927286, - "loss": 0.9869, + "epoch": 0.7857593665729531, + "grad_norm": 0.26171875, + "learning_rate": 2.668443462131094e-05, + "loss": 0.9279, "step": 13695 }, { - "epoch": 0.39304003557442657, - "grad_norm": 0.400390625, - "learning_rate": 0.00015208966036622825, - "loss": 0.9652, + "epoch": 0.786046244764473, + "grad_norm": 0.271484375, + "learning_rate": 2.6616370085213394e-05, + "loss": 1.0333, "step": 13700 }, { - "epoch": 0.3931834808428844, - "grad_norm": 0.384765625, - "learning_rate": 0.00015204691036261057, - "loss": 1.0449, + "epoch": 0.7863331229559929, + "grad_norm": 0.26953125, + "learning_rate": 2.6548379138384483e-05, + "loss": 0.9812, "step": 13705 }, { - "epoch": 0.39332692611134223, - "grad_norm": 0.373046875, - "learning_rate": 0.00015200414730913865, - "loss": 1.01, + "epoch": 0.7866200011475127, + "grad_norm": 0.291015625, + "learning_rate": 2.648046184900568e-05, + "loss": 0.8958, "step": 13710 }, { - "epoch": 0.3934703713798, - "grad_norm": 0.4765625, - "learning_rate": 0.00015196137121653458, - "loss": 1.1422, + "epoch": 0.7869068793390327, + "grad_norm": 0.263671875, + "learning_rate": 2.6412618285184587e-05, + "loss": 0.9758, "step": 13715 }, { - "epoch": 0.39361381664825784, - "grad_norm": 0.4140625, - "learning_rate": 0.00015191858209552368, - "loss": 0.9698, + "epoch": 0.7871937575305525, + "grad_norm": 0.275390625, + "learning_rate": 2.6344848514954856e-05, + "loss": 0.9736, "step": 13720 }, { - "epoch": 0.3937572619167157, - "grad_norm": 0.419921875, - "learning_rate": 0.00015187577995683462, - "loss": 0.985, + "epoch": 0.7874806357220724, + "grad_norm": 0.26171875, + "learning_rate": 2.6277152606276234e-05, + "loss": 0.9742, "step": 13725 }, { - "epoch": 0.3939007071851735, - "grad_norm": 0.4296875, - "learning_rate": 0.00015183296481119924, - "loss": 1.0512, + "epoch": 0.7877675139135923, + "grad_norm": 0.267578125, + "learning_rate": 2.6209530627034295e-05, + "loss": 0.9122, "step": 13730 }, { - "epoch": 0.39404415245363134, - "grad_norm": 0.361328125, - "learning_rate": 0.00015179013666935274, - "loss": 0.9689, + "epoch": 0.7880543921051122, + "grad_norm": 0.26953125, + "learning_rate": 2.614198264504053e-05, + "loss": 0.9363, "step": 13735 }, { - "epoch": 0.3941875977220891, - "grad_norm": 0.5078125, - "learning_rate": 0.00015174729554203348, - "loss": 0.9812, + "epoch": 0.788341270296632, + "grad_norm": 0.25390625, + "learning_rate": 2.607450872803213e-05, + "loss": 1.0, "step": 13740 }, { - "epoch": 0.39433104299054694, - "grad_norm": 0.3984375, - "learning_rate": 0.00015170444143998316, - "loss": 1.0226, + "epoch": 0.7886281484881519, + "grad_norm": 0.2421875, + "learning_rate": 2.600710894367219e-05, + "loss": 0.8937, "step": 13745 }, { - "epoch": 0.3944744882590048, - "grad_norm": 0.373046875, - "learning_rate": 0.00015166157437394672, - "loss": 0.9615, + "epoch": 0.7889150266796718, + "grad_norm": 0.267578125, + "learning_rate": 2.5939783359549306e-05, + "loss": 0.9911, "step": 13750 }, { - "epoch": 0.3946179335274626, - "grad_norm": 0.408203125, - "learning_rate": 0.00015161869435467227, - "loss": 0.9863, + "epoch": 0.7892019048711917, + "grad_norm": 0.2470703125, + "learning_rate": 2.5872532043177743e-05, + "loss": 0.9314, "step": 13755 }, { - "epoch": 0.39476137879592044, - "grad_norm": 0.447265625, - "learning_rate": 0.00015157580139291124, - "loss": 1.0837, + "epoch": 0.7894887830627115, + "grad_norm": 0.267578125, + "learning_rate": 2.580535506199727e-05, + "loss": 0.9753, "step": 13760 }, { - "epoch": 0.3949048240643782, - "grad_norm": 0.392578125, - "learning_rate": 0.00015153289549941834, - "loss": 0.9651, + "epoch": 0.7897756612542315, + "grad_norm": 0.287109375, + "learning_rate": 2.5738252483373117e-05, + "loss": 0.9208, "step": 13765 }, { - "epoch": 0.39504826933283604, - "grad_norm": 0.4140625, - "learning_rate": 0.00015148997668495143, - "loss": 0.9182, + "epoch": 0.7900625394457513, + "grad_norm": 0.275390625, + "learning_rate": 2.567122437459586e-05, + "loss": 0.9189, "step": 13770 }, { - "epoch": 0.3951917146012939, - "grad_norm": 0.41015625, - "learning_rate": 0.00015144704496027162, - "loss": 1.0374, + "epoch": 0.7903494176372712, + "grad_norm": 0.265625, + "learning_rate": 2.5604270802881503e-05, + "loss": 0.9401, "step": 13775 }, { - "epoch": 0.3953351598697517, - "grad_norm": 0.388671875, - "learning_rate": 0.0001514041003361434, - "loss": 0.9105, + "epoch": 0.7906362958287911, + "grad_norm": 0.265625, + "learning_rate": 2.5537391835371217e-05, + "loss": 1.05, "step": 13780 }, { - "epoch": 0.39547860513820954, - "grad_norm": 0.357421875, - "learning_rate": 0.00015136114282333427, - "loss": 0.8841, + "epoch": 0.790923174020311, + "grad_norm": 0.259765625, + "learning_rate": 2.5470587539131362e-05, + "loss": 0.9251, "step": 13785 }, { - "epoch": 0.3956220504066673, - "grad_norm": 0.400390625, - "learning_rate": 0.00015131817243261512, - "loss": 0.9183, + "epoch": 0.7912100522118308, + "grad_norm": 0.2890625, + "learning_rate": 2.5403857981153457e-05, + "loss": 0.9218, "step": 13790 }, { - "epoch": 0.39576549567512515, - "grad_norm": 0.404296875, - "learning_rate": 0.00015127518917476006, - "loss": 0.9012, + "epoch": 0.7914969304033508, + "grad_norm": 0.255859375, + "learning_rate": 2.5337203228354035e-05, + "loss": 0.9932, "step": 13795 }, { - "epoch": 0.395908940943583, - "grad_norm": 0.337890625, - "learning_rate": 0.00015123219306054634, - "loss": 0.9369, + "epoch": 0.7917838085948706, + "grad_norm": 0.255859375, + "learning_rate": 2.527062334757464e-05, + "loss": 1.03, "step": 13800 }, { - "epoch": 0.3960523862120408, - "grad_norm": 0.384765625, - "learning_rate": 0.00015118918410075453, - "loss": 0.9573, + "epoch": 0.7920706867863905, + "grad_norm": 0.26953125, + "learning_rate": 2.5204118405581724e-05, + "loss": 0.9819, "step": 13805 }, { - "epoch": 0.39619583148049864, - "grad_norm": 0.416015625, - "learning_rate": 0.00015114616230616835, - "loss": 1.0908, + "epoch": 0.7923575649779104, + "grad_norm": 0.26953125, + "learning_rate": 2.513768846906659e-05, + "loss": 1.0, "step": 13810 }, { - "epoch": 0.3963392767489564, - "grad_norm": 0.3828125, - "learning_rate": 0.00015110312768757474, - "loss": 0.8387, + "epoch": 0.7926444431694303, + "grad_norm": 0.26171875, + "learning_rate": 2.507133360464533e-05, + "loss": 0.9784, "step": 13815 }, { - "epoch": 0.39648272201741425, - "grad_norm": 0.376953125, - "learning_rate": 0.00015106008025576393, - "loss": 0.9463, + "epoch": 0.7929313213609501, + "grad_norm": 0.2578125, + "learning_rate": 2.500505387885872e-05, + "loss": 0.9411, "step": 13820 }, { - "epoch": 0.3966261672858721, - "grad_norm": 0.37109375, - "learning_rate": 0.00015101702002152928, - "loss": 0.8969, + "epoch": 0.79321819955247, + "grad_norm": 0.26953125, + "learning_rate": 2.493884935817228e-05, + "loss": 0.9736, "step": 13825 }, { - "epoch": 0.3967696125543299, - "grad_norm": 0.423828125, - "learning_rate": 0.00015097394699566737, - "loss": 0.9579, + "epoch": 0.7935050777439899, + "grad_norm": 0.265625, + "learning_rate": 2.487272010897601e-05, + "loss": 0.9139, "step": 13830 }, { - "epoch": 0.39691305782278774, - "grad_norm": 0.376953125, - "learning_rate": 0.00015093086118897811, - "loss": 0.9295, + "epoch": 0.7937919559355098, + "grad_norm": 0.263671875, + "learning_rate": 2.4806666197584483e-05, + "loss": 0.8969, "step": 13835 }, { - "epoch": 0.3970565030912455, - "grad_norm": 0.359375, - "learning_rate": 0.0001508877626122644, - "loss": 0.9235, + "epoch": 0.7940788341270296, + "grad_norm": 0.25390625, + "learning_rate": 2.474068769023671e-05, + "loss": 0.914, "step": 13840 }, { - "epoch": 0.39719994835970335, - "grad_norm": 0.396484375, - "learning_rate": 0.0001508446512763325, - "loss": 1.1062, + "epoch": 0.7943657123185496, + "grad_norm": 0.283203125, + "learning_rate": 2.4674784653096083e-05, + "loss": 0.9689, "step": 13845 }, { - "epoch": 0.3973433936281612, - "grad_norm": 0.404296875, - "learning_rate": 0.00015080152719199183, - "loss": 0.9038, + "epoch": 0.7946525905100694, + "grad_norm": 0.25, + "learning_rate": 2.460895715225028e-05, + "loss": 1.0079, "step": 13850 }, { - "epoch": 0.397486838896619, - "grad_norm": 0.392578125, - "learning_rate": 0.000150758390370055, - "loss": 0.931, + "epoch": 0.7949394687015893, + "grad_norm": 0.283203125, + "learning_rate": 2.4543205253711355e-05, + "loss": 0.9578, "step": 13855 }, { - "epoch": 0.39763028416507684, - "grad_norm": 0.373046875, - "learning_rate": 0.00015071524082133778, - "loss": 0.8504, + "epoch": 0.7952263468931092, + "grad_norm": 0.26953125, + "learning_rate": 2.447752902341538e-05, + "loss": 0.9168, "step": 13860 }, { - "epoch": 0.3977737294335346, - "grad_norm": 0.478515625, - "learning_rate": 0.00015067207855665925, - "loss": 1.0029, + "epoch": 0.7955132250846291, + "grad_norm": 0.2578125, + "learning_rate": 2.441192852722265e-05, + "loss": 0.9152, "step": 13865 }, { - "epoch": 0.39791717470199245, - "grad_norm": 0.462890625, - "learning_rate": 0.00015062890358684148, - "loss": 1.0541, + "epoch": 0.7958001032761489, + "grad_norm": 0.275390625, + "learning_rate": 2.4346403830917464e-05, + "loss": 0.914, "step": 13870 }, { - "epoch": 0.3980606199704503, - "grad_norm": 0.427734375, - "learning_rate": 0.00015058571592270993, - "loss": 1.0839, + "epoch": 0.7960869814676689, + "grad_norm": 0.28515625, + "learning_rate": 2.4280955000208184e-05, + "loss": 0.9085, "step": 13875 }, { - "epoch": 0.3982040652389081, - "grad_norm": 0.396484375, - "learning_rate": 0.0001505425155750931, - "loss": 0.9304, + "epoch": 0.7963738596591887, + "grad_norm": 0.2578125, + "learning_rate": 2.421558210072702e-05, + "loss": 0.9926, "step": 13880 }, { - "epoch": 0.3983475105073659, - "grad_norm": 0.390625, - "learning_rate": 0.0001504993025548228, - "loss": 0.9343, + "epoch": 0.7966607378507086, + "grad_norm": 0.25, + "learning_rate": 2.4150285198030066e-05, + "loss": 0.9283, "step": 13885 }, { - "epoch": 0.3984909557758237, - "grad_norm": 0.40625, - "learning_rate": 0.00015045607687273383, - "loss": 0.9422, + "epoch": 0.7969476160422285, + "grad_norm": 0.275390625, + "learning_rate": 2.4085064357597197e-05, + "loss": 0.9488, "step": 13890 }, { - "epoch": 0.39863440104428155, - "grad_norm": 0.384765625, - "learning_rate": 0.0001504128385396644, - "loss": 1.1068, + "epoch": 0.7972344942337484, + "grad_norm": 0.25390625, + "learning_rate": 2.4019919644832023e-05, + "loss": 0.9676, "step": 13895 }, { - "epoch": 0.3987778463127394, - "grad_norm": 0.404296875, - "learning_rate": 0.00015036958756645564, - "loss": 0.9375, + "epoch": 0.7975213724252682, + "grad_norm": 0.291015625, + "learning_rate": 2.395485112506177e-05, + "loss": 1.0144, "step": 13900 }, { - "epoch": 0.3989212915811972, - "grad_norm": 0.400390625, - "learning_rate": 0.00015032632396395207, - "loss": 0.8914, + "epoch": 0.7978082506167881, + "grad_norm": 0.259765625, + "learning_rate": 2.3889858863537396e-05, + "loss": 0.9616, "step": 13905 }, { - "epoch": 0.399064736849655, - "grad_norm": 0.39453125, - "learning_rate": 0.00015028304774300123, - "loss": 0.9028, + "epoch": 0.798095128808308, + "grad_norm": 0.271484375, + "learning_rate": 2.382494292543319e-05, + "loss": 0.9342, "step": 13910 }, { - "epoch": 0.3992081821181128, - "grad_norm": 0.390625, - "learning_rate": 0.0001502397589144539, - "loss": 0.933, + "epoch": 0.7983820069998279, + "grad_norm": 0.25, + "learning_rate": 2.376010337584701e-05, + "loss": 0.955, "step": 13915 }, { - "epoch": 0.39935162738657065, - "grad_norm": 0.44921875, - "learning_rate": 0.00015019645748916402, - "loss": 1.073, + "epoch": 0.7986688851913477, + "grad_norm": 0.2451171875, + "learning_rate": 2.369534027980015e-05, + "loss": 0.9769, "step": 13920 }, { - "epoch": 0.3994950726550285, - "grad_norm": 0.46484375, - "learning_rate": 0.00015015314347798864, - "loss": 1.0135, + "epoch": 0.7989557633828677, + "grad_norm": 0.26171875, + "learning_rate": 2.363065370223716e-05, + "loss": 0.939, "step": 13925 }, { - "epoch": 0.3996385179234863, - "grad_norm": 0.369140625, - "learning_rate": 0.00015010981689178796, - "loss": 0.9411, + "epoch": 0.7992426415743875, + "grad_norm": 0.275390625, + "learning_rate": 2.3566043708025874e-05, + "loss": 1.0056, "step": 13930 }, { - "epoch": 0.3997819631919441, - "grad_norm": 0.37109375, - "learning_rate": 0.00015006647774142544, - "loss": 0.87, + "epoch": 0.7995295197659074, + "grad_norm": 0.28125, + "learning_rate": 2.3501510361957367e-05, + "loss": 0.9896, "step": 13935 }, { - "epoch": 0.3999254084604019, - "grad_norm": 0.43359375, - "learning_rate": 0.00015002312603776754, - "loss": 0.9337, + "epoch": 0.7998163979574273, + "grad_norm": 0.26953125, + "learning_rate": 2.3437053728745807e-05, + "loss": 0.9721, "step": 13940 }, { - "epoch": 0.40006885372885975, - "grad_norm": 0.427734375, - "learning_rate": 0.000149979761791684, - "loss": 0.9056, + "epoch": 0.8001032761489472, + "grad_norm": 0.26953125, + "learning_rate": 2.337267387302845e-05, + "loss": 0.9788, "step": 13945 }, { - "epoch": 0.4002122989973176, - "grad_norm": 0.408203125, - "learning_rate": 0.00014993638501404762, - "loss": 1.015, + "epoch": 0.800390154340467, + "grad_norm": 0.2890625, + "learning_rate": 2.3308370859365523e-05, + "loss": 0.9456, "step": 13950 }, { - "epoch": 0.4003557442657754, - "grad_norm": 0.4375, - "learning_rate": 0.00014989299571573436, - "loss": 1.002, + "epoch": 0.800677032531987, + "grad_norm": 0.25390625, + "learning_rate": 2.324414475224034e-05, + "loss": 0.9169, "step": 13955 }, { - "epoch": 0.4004991895342332, - "grad_norm": 0.37890625, - "learning_rate": 0.00014984959390762335, - "loss": 1.0471, + "epoch": 0.8009639107235068, + "grad_norm": 0.259765625, + "learning_rate": 2.317999561605888e-05, + "loss": 0.9856, "step": 13960 }, { - "epoch": 0.400642634802691, - "grad_norm": 0.359375, - "learning_rate": 0.00014980617960059681, - "loss": 0.9832, + "epoch": 0.8012507889150267, + "grad_norm": 0.26171875, + "learning_rate": 2.311592351515004e-05, + "loss": 0.9333, "step": 13965 }, { - "epoch": 0.40078608007114885, - "grad_norm": 0.369140625, - "learning_rate": 0.00014976275280554016, - "loss": 0.9866, + "epoch": 0.8015376671065466, + "grad_norm": 0.263671875, + "learning_rate": 2.3051928513765542e-05, + "loss": 0.9138, "step": 13970 }, { - "epoch": 0.4009295253396067, - "grad_norm": 0.40234375, - "learning_rate": 0.0001497193135333419, - "loss": 1.0311, + "epoch": 0.8018245452980665, + "grad_norm": 0.2734375, + "learning_rate": 2.2988010676079674e-05, + "loss": 0.8617, "step": 13975 }, { - "epoch": 0.4010729706080645, - "grad_norm": 0.3828125, - "learning_rate": 0.00014967586179489366, - "loss": 0.9907, + "epoch": 0.8021114234895863, + "grad_norm": 0.279296875, + "learning_rate": 2.292417006618939e-05, + "loss": 0.9493, "step": 13980 }, { - "epoch": 0.4012164158765223, - "grad_norm": 0.41796875, - "learning_rate": 0.0001496323976010902, - "loss": 0.9585, + "epoch": 0.8023983016811062, + "grad_norm": 0.318359375, + "learning_rate": 2.2860406748114195e-05, + "loss": 1.0224, "step": 13985 }, { - "epoch": 0.4013598611449801, - "grad_norm": 0.38671875, - "learning_rate": 0.0001495889209628294, - "loss": 0.945, + "epoch": 0.8026851798726261, + "grad_norm": 0.259765625, + "learning_rate": 2.279672078579609e-05, + "loss": 0.997, "step": 13990 }, { - "epoch": 0.40150330641343795, - "grad_norm": 0.42578125, - "learning_rate": 0.0001495454318910123, - "loss": 0.9564, + "epoch": 0.802972058064146, + "grad_norm": 0.2734375, + "learning_rate": 2.2733112243099507e-05, + "loss": 0.9755, "step": 13995 }, { - "epoch": 0.4016467516818958, - "grad_norm": 0.396484375, - "learning_rate": 0.00014950193039654297, - "loss": 1.0098, + "epoch": 0.8032589362556658, + "grad_norm": 0.283203125, + "learning_rate": 2.2669581183811196e-05, + "loss": 0.9347, "step": 14000 }, { - "epoch": 0.4017901969503536, - "grad_norm": 0.396484375, - "learning_rate": 0.00014945841649032873, - "loss": 0.9189, + "epoch": 0.8035458144471858, + "grad_norm": 0.259765625, + "learning_rate": 2.2606127671640333e-05, + "loss": 0.9454, "step": 14005 }, { - "epoch": 0.4019336422188114, - "grad_norm": 0.435546875, - "learning_rate": 0.00014941489018327988, - "loss": 0.9599, + "epoch": 0.8038326926387056, + "grad_norm": 0.267578125, + "learning_rate": 2.254275177021816e-05, + "loss": 0.8952, "step": 14010 }, { - "epoch": 0.4020770874872692, - "grad_norm": 0.3828125, - "learning_rate": 0.00014937135148630987, - "loss": 0.9246, + "epoch": 0.8041195708302254, + "grad_norm": 0.255859375, + "learning_rate": 2.247945354309817e-05, + "loss": 0.9592, "step": 14015 }, { - "epoch": 0.40222053275572706, - "grad_norm": 0.3984375, - "learning_rate": 0.0001493278004103353, - "loss": 1.0737, + "epoch": 0.8044064490217454, + "grad_norm": 0.2734375, + "learning_rate": 2.2416233053756032e-05, + "loss": 0.9797, "step": 14020 }, { - "epoch": 0.4023639780241849, - "grad_norm": 0.390625, - "learning_rate": 0.00014928423696627583, - "loss": 0.8956, + "epoch": 0.8046933272132653, + "grad_norm": 0.2412109375, + "learning_rate": 2.2353090365589348e-05, + "loss": 0.9639, "step": 14025 }, { - "epoch": 0.4025074232926427, - "grad_norm": 0.359375, - "learning_rate": 0.00014924066116505427, - "loss": 0.9337, + "epoch": 0.8049802054047851, + "grad_norm": 0.255859375, + "learning_rate": 2.2290025541917768e-05, + "loss": 0.9881, "step": 14030 }, { - "epoch": 0.4026508685611005, - "grad_norm": 0.376953125, - "learning_rate": 0.0001491970730175964, - "loss": 1.094, + "epoch": 0.8052670835963051, + "grad_norm": 0.267578125, + "learning_rate": 2.2227038645982833e-05, + "loss": 0.9101, "step": 14035 }, { - "epoch": 0.4027943138295583, - "grad_norm": 0.3671875, - "learning_rate": 0.00014915347253483126, - "loss": 0.9251, + "epoch": 0.8055539617878249, + "grad_norm": 0.265625, + "learning_rate": 2.2164129740947935e-05, + "loss": 0.929, "step": 14040 }, { - "epoch": 0.40293775909801616, - "grad_norm": 0.38671875, - "learning_rate": 0.0001491098597276909, - "loss": 0.9599, + "epoch": 0.8058408399793447, + "grad_norm": 0.30078125, + "learning_rate": 2.210129888989827e-05, + "loss": 0.9931, "step": 14045 }, { - "epoch": 0.403081204366474, - "grad_norm": 0.45703125, - "learning_rate": 0.00014906623460711046, - "loss": 0.9534, + "epoch": 0.8061277181708647, + "grad_norm": 0.283203125, + "learning_rate": 2.2038546155840735e-05, + "loss": 0.9698, "step": 14050 }, { - "epoch": 0.40322464963493176, - "grad_norm": 0.396484375, - "learning_rate": 0.00014902259718402818, - "loss": 0.8654, + "epoch": 0.8064145963623846, + "grad_norm": 0.265625, + "learning_rate": 2.1975871601703977e-05, + "loss": 0.9716, "step": 14055 }, { - "epoch": 0.4033680949033896, - "grad_norm": 0.34375, - "learning_rate": 0.00014897894746938536, - "loss": 0.9073, + "epoch": 0.8067014745539044, + "grad_norm": 0.310546875, + "learning_rate": 2.191327529033812e-05, + "loss": 0.9704, "step": 14060 }, { - "epoch": 0.4035115401718474, - "grad_norm": 0.388671875, - "learning_rate": 0.00014893528547412648, - "loss": 0.9378, + "epoch": 0.8069883527454242, + "grad_norm": 0.24609375, + "learning_rate": 2.1850757284514877e-05, + "loss": 0.9215, "step": 14065 }, { - "epoch": 0.40365498544030526, - "grad_norm": 0.458984375, - "learning_rate": 0.00014889161120919893, - "loss": 0.9422, + "epoch": 0.8072752309369442, + "grad_norm": 0.26953125, + "learning_rate": 2.178831764692749e-05, + "loss": 0.9289, "step": 14070 }, { - "epoch": 0.4037984307087631, - "grad_norm": 0.439453125, - "learning_rate": 0.0001488479246855533, - "loss": 0.8926, + "epoch": 0.807562109128464, + "grad_norm": 0.2734375, + "learning_rate": 2.1725956440190542e-05, + "loss": 0.9023, "step": 14075 }, { - "epoch": 0.40394187597722087, - "grad_norm": 0.384765625, - "learning_rate": 0.00014880422591414323, - "loss": 0.993, + "epoch": 0.8078489873199839, + "grad_norm": 0.25, + "learning_rate": 2.1663673726840006e-05, + "loss": 0.9553, "step": 14080 }, { - "epoch": 0.4040853212456787, - "grad_norm": 0.431640625, - "learning_rate": 0.00014876051490592547, - "loss": 0.9726, + "epoch": 0.8081358655115038, + "grad_norm": 0.251953125, + "learning_rate": 2.160146956933311e-05, + "loss": 0.9908, "step": 14085 }, { - "epoch": 0.40422876651413653, - "grad_norm": 0.396484375, - "learning_rate": 0.00014871679167185973, - "loss": 0.9524, + "epoch": 0.8084227437030237, + "grad_norm": 0.25390625, + "learning_rate": 2.1539344030048337e-05, + "loss": 0.9315, "step": 14090 }, { - "epoch": 0.40437221178259436, - "grad_norm": 0.388671875, - "learning_rate": 0.00014867305622290883, - "loss": 0.9508, + "epoch": 0.8087096218945435, + "grad_norm": 0.3046875, + "learning_rate": 2.1477297171285282e-05, + "loss": 0.8911, "step": 14095 }, { - "epoch": 0.4045156570510522, - "grad_norm": 0.388671875, - "learning_rate": 0.00014862930857003877, - "loss": 0.9001, + "epoch": 0.8089965000860635, + "grad_norm": 0.265625, + "learning_rate": 2.141532905526472e-05, + "loss": 0.8708, "step": 14100 }, { - "epoch": 0.40465910231950997, - "grad_norm": 0.419921875, - "learning_rate": 0.0001485855487242184, - "loss": 1.0116, + "epoch": 0.8092833782775833, + "grad_norm": 0.287109375, + "learning_rate": 2.1353439744128434e-05, + "loss": 0.9597, "step": 14105 }, { - "epoch": 0.4048025475879678, - "grad_norm": 0.419921875, - "learning_rate": 0.00014854177669641983, - "loss": 0.8911, + "epoch": 0.8095702564691032, + "grad_norm": 0.28515625, + "learning_rate": 2.1291629299939097e-05, + "loss": 1.0855, "step": 14110 }, { - "epoch": 0.40494599285642563, - "grad_norm": 0.421875, - "learning_rate": 0.00014849799249761804, - "loss": 1.0453, + "epoch": 0.8098571346606231, + "grad_norm": 0.255859375, + "learning_rate": 2.1229897784680365e-05, + "loss": 0.9425, "step": 14115 }, { - "epoch": 0.40508943812488346, - "grad_norm": 0.36328125, - "learning_rate": 0.0001484541961387912, - "loss": 0.9709, + "epoch": 0.810144012852143, + "grad_norm": 0.267578125, + "learning_rate": 2.116824526025679e-05, + "loss": 0.8716, "step": 14120 }, { - "epoch": 0.4052328833933413, - "grad_norm": 0.3671875, - "learning_rate": 0.00014841038763092052, - "loss": 0.9525, + "epoch": 0.8104308910436628, + "grad_norm": 0.251953125, + "learning_rate": 2.1106671788493636e-05, + "loss": 1.0174, "step": 14125 }, { - "epoch": 0.40537632866179907, - "grad_norm": 0.38671875, - "learning_rate": 0.00014836656698499016, - "loss": 0.8866, + "epoch": 0.8107177692351828, + "grad_norm": 0.275390625, + "learning_rate": 2.104517743113693e-05, + "loss": 0.9206, "step": 14130 }, { - "epoch": 0.4055197739302569, - "grad_norm": 0.39453125, - "learning_rate": 0.00014832273421198743, - "loss": 0.884, + "epoch": 0.8110046474267026, + "grad_norm": 0.271484375, + "learning_rate": 2.0983762249853344e-05, + "loss": 0.9666, "step": 14135 }, { - "epoch": 0.40566321919871473, - "grad_norm": 0.416015625, - "learning_rate": 0.00014827888932290257, - "loss": 0.9833, + "epoch": 0.8112915256182225, + "grad_norm": 0.283203125, + "learning_rate": 2.092242630623016e-05, + "loss": 0.9522, "step": 14140 }, { - "epoch": 0.40580666446717256, - "grad_norm": 0.376953125, - "learning_rate": 0.00014823503232872897, - "loss": 0.9856, + "epoch": 0.8115784038097423, + "grad_norm": 0.2734375, + "learning_rate": 2.086116966177516e-05, + "loss": 0.9182, "step": 14145 }, { - "epoch": 0.4059501097356304, - "grad_norm": 0.3828125, - "learning_rate": 0.000148191163240463, - "loss": 0.9937, + "epoch": 0.8118652820012623, + "grad_norm": 0.267578125, + "learning_rate": 2.079999237791672e-05, + "loss": 0.9693, "step": 14150 }, { - "epoch": 0.40609355500408817, - "grad_norm": 0.392578125, - "learning_rate": 0.00014814728206910403, - "loss": 0.9837, + "epoch": 0.8121521601927821, + "grad_norm": 0.26171875, + "learning_rate": 2.0738894516003536e-05, + "loss": 0.9591, "step": 14155 }, { - "epoch": 0.406237000272546, - "grad_norm": 0.388671875, - "learning_rate": 0.00014810338882565455, - "loss": 0.8821, + "epoch": 0.812439038384302, + "grad_norm": 0.267578125, + "learning_rate": 2.067787613730462e-05, + "loss": 0.961, "step": 14160 }, { - "epoch": 0.40638044554100383, - "grad_norm": 0.421875, - "learning_rate": 0.00014805948352111998, - "loss": 0.9195, + "epoch": 0.8127259165758219, + "grad_norm": 0.296875, + "learning_rate": 2.0616937303009408e-05, + "loss": 0.9661, "step": 14165 }, { - "epoch": 0.40652389080946166, - "grad_norm": 0.390625, - "learning_rate": 0.00014801556616650887, - "loss": 0.9821, + "epoch": 0.8130127947673418, + "grad_norm": 0.267578125, + "learning_rate": 2.055607807422748e-05, + "loss": 1.0048, "step": 14170 }, { - "epoch": 0.4066673360779195, - "grad_norm": 0.392578125, - "learning_rate": 0.00014797163677283267, - "loss": 1.0031, + "epoch": 0.8132996729588616, + "grad_norm": 0.28125, + "learning_rate": 2.0495298511988602e-05, + "loss": 0.9954, "step": 14175 }, { - "epoch": 0.40681078134637727, - "grad_norm": 0.40625, - "learning_rate": 0.00014792769535110597, - "loss": 0.803, + "epoch": 0.8135865511503816, + "grad_norm": 0.2734375, + "learning_rate": 2.0434598677242656e-05, + "loss": 0.9824, "step": 14180 }, { - "epoch": 0.4069542266148351, - "grad_norm": 0.396484375, - "learning_rate": 0.0001478837419123462, - "loss": 0.9326, + "epoch": 0.8138734293419014, + "grad_norm": 0.2734375, + "learning_rate": 2.037397863085957e-05, + "loss": 1.0136, "step": 14185 }, { - "epoch": 0.40709767188329293, - "grad_norm": 0.361328125, - "learning_rate": 0.00014783977646757403, - "loss": 0.9421, + "epoch": 0.8141603075334213, + "grad_norm": 0.25390625, + "learning_rate": 2.0313438433629263e-05, + "loss": 0.9508, "step": 14190 }, { - "epoch": 0.40724111715175076, - "grad_norm": 0.361328125, - "learning_rate": 0.000147795799027813, - "loss": 0.9792, + "epoch": 0.8144471857249412, + "grad_norm": 0.3046875, + "learning_rate": 2.0252978146261557e-05, + "loss": 0.9738, "step": 14195 }, { - "epoch": 0.4073845624202086, - "grad_norm": 0.3828125, - "learning_rate": 0.00014775180960408966, - "loss": 0.9197, + "epoch": 0.8147340639164611, + "grad_norm": 0.275390625, + "learning_rate": 2.0192597829386217e-05, + "loss": 1.0262, "step": 14200 }, { - "epoch": 0.40752800768866637, - "grad_norm": 0.3984375, - "learning_rate": 0.00014770780820743364, - "loss": 0.9679, + "epoch": 0.8150209421079809, + "grad_norm": 0.26953125, + "learning_rate": 2.0132297543552757e-05, + "loss": 0.9072, "step": 14205 }, { - "epoch": 0.4076714529571242, - "grad_norm": 0.40234375, - "learning_rate": 0.00014766379484887744, - "loss": 1.0943, + "epoch": 0.8153078202995009, + "grad_norm": 0.2490234375, + "learning_rate": 2.0072077349230357e-05, + "loss": 0.8962, "step": 14210 }, { - "epoch": 0.40781489822558203, - "grad_norm": 0.361328125, - "learning_rate": 0.00014761976953945672, - "loss": 1.054, + "epoch": 0.8155946984910207, + "grad_norm": 0.267578125, + "learning_rate": 2.0011937306808048e-05, + "loss": 0.9166, "step": 14215 }, { - "epoch": 0.40795834349403987, - "grad_norm": 0.416015625, - "learning_rate": 0.00014757573229021002, - "loss": 0.9723, + "epoch": 0.8158815766825406, + "grad_norm": 0.265625, + "learning_rate": 1.9951877476594382e-05, + "loss": 0.8863, "step": 14220 }, { - "epoch": 0.40810178876249764, - "grad_norm": 0.330078125, - "learning_rate": 0.00014753168311217892, - "loss": 0.8678, + "epoch": 0.8161684548740604, + "grad_norm": 0.2734375, + "learning_rate": 1.9891897918817472e-05, + "loss": 0.9848, "step": 14225 }, { - "epoch": 0.4082452340309555, - "grad_norm": 0.359375, - "learning_rate": 0.00014748762201640796, - "loss": 0.9409, + "epoch": 0.8164553330655804, + "grad_norm": 0.259765625, + "learning_rate": 1.9831998693624964e-05, + "loss": 0.9138, "step": 14230 }, { - "epoch": 0.4083886792994133, - "grad_norm": 0.43359375, - "learning_rate": 0.00014744354901394472, - "loss": 0.9272, + "epoch": 0.8167422112571002, + "grad_norm": 0.251953125, + "learning_rate": 1.977217986108393e-05, + "loss": 0.9695, "step": 14235 }, { - "epoch": 0.40853212456787114, - "grad_norm": 0.3828125, - "learning_rate": 0.00014739946411583977, - "loss": 1.055, + "epoch": 0.8170290894486201, + "grad_norm": 0.279296875, + "learning_rate": 1.9712441481180833e-05, + "loss": 0.9367, "step": 14240 }, { - "epoch": 0.40867556983632897, - "grad_norm": 0.392578125, - "learning_rate": 0.0001473553673331465, - "loss": 1.0266, + "epoch": 0.81731596764014, + "grad_norm": 0.263671875, + "learning_rate": 1.9652783613821435e-05, + "loss": 0.9687, "step": 14245 }, { - "epoch": 0.40881901510478674, - "grad_norm": 0.37109375, - "learning_rate": 0.00014731125867692158, - "loss": 0.9047, + "epoch": 0.8176028458316599, + "grad_norm": 0.2734375, + "learning_rate": 1.9593206318830815e-05, + "loss": 0.9353, "step": 14250 }, { - "epoch": 0.4089624603732446, - "grad_norm": 0.412109375, - "learning_rate": 0.00014726713815822436, - "loss": 0.9659, + "epoch": 0.8178897240231797, + "grad_norm": 0.255859375, + "learning_rate": 1.9533709655953235e-05, + "loss": 0.9681, "step": 14255 }, { - "epoch": 0.4091059056417024, - "grad_norm": 0.396484375, - "learning_rate": 0.00014722300578811734, - "loss": 0.8659, + "epoch": 0.8181766022146997, + "grad_norm": 0.25390625, + "learning_rate": 1.9474293684851984e-05, + "loss": 0.9158, "step": 14260 }, { - "epoch": 0.40924935091016024, - "grad_norm": 0.45703125, - "learning_rate": 0.0001471788615776659, - "loss": 1.001, + "epoch": 0.8184634804062195, + "grad_norm": 0.25, + "learning_rate": 1.9414958465109635e-05, + "loss": 0.9798, "step": 14265 }, { - "epoch": 0.40939279617861807, - "grad_norm": 0.388671875, - "learning_rate": 0.00014713470553793853, - "loss": 0.972, + "epoch": 0.8187503585977394, + "grad_norm": 0.271484375, + "learning_rate": 1.9355704056227632e-05, + "loss": 1.0429, "step": 14270 }, { - "epoch": 0.40953624144707584, - "grad_norm": 0.39453125, - "learning_rate": 0.00014709053768000648, - "loss": 0.9242, + "epoch": 0.8190372367892593, + "grad_norm": 0.2890625, + "learning_rate": 1.9296530517626445e-05, + "loss": 0.8955, "step": 14275 }, { - "epoch": 0.4096796867155337, - "grad_norm": 0.408203125, - "learning_rate": 0.0001470463580149441, - "loss": 1.0843, + "epoch": 0.8193241149807792, + "grad_norm": 0.2578125, + "learning_rate": 1.9237437908645417e-05, + "loss": 0.9144, "step": 14280 }, { - "epoch": 0.4098231319839915, - "grad_norm": 0.3671875, - "learning_rate": 0.00014700216655382867, - "loss": 0.9582, + "epoch": 0.819610993172299, + "grad_norm": 0.26171875, + "learning_rate": 1.917842628854275e-05, + "loss": 0.9887, "step": 14285 }, { - "epoch": 0.40996657725244934, - "grad_norm": 0.353515625, - "learning_rate": 0.00014695796330774048, - "loss": 0.9676, + "epoch": 0.819897871363819, + "grad_norm": 0.263671875, + "learning_rate": 1.9119495716495417e-05, + "loss": 0.9999, "step": 14290 }, { - "epoch": 0.41011002252090717, - "grad_norm": 0.375, - "learning_rate": 0.00014691374828776263, - "loss": 0.9886, + "epoch": 0.8201847495553388, + "grad_norm": 0.2578125, + "learning_rate": 1.9060646251599157e-05, + "loss": 0.9903, "step": 14295 }, { - "epoch": 0.41025346778936495, - "grad_norm": 0.404296875, - "learning_rate": 0.00014686952150498134, - "loss": 0.9407, + "epoch": 0.8204716277468587, + "grad_norm": 0.265625, + "learning_rate": 1.900187795286834e-05, + "loss": 0.9522, "step": 14300 }, { - "epoch": 0.4103969130578228, - "grad_norm": 0.373046875, - "learning_rate": 0.00014682528297048564, - "loss": 1.0445, + "epoch": 0.8207585059383785, + "grad_norm": 0.28125, + "learning_rate": 1.8943190879235972e-05, + "loss": 0.9792, "step": 14305 }, { - "epoch": 0.4105403583262806, - "grad_norm": 0.421875, - "learning_rate": 0.00014678103269536762, - "loss": 0.9068, + "epoch": 0.8210453841298985, + "grad_norm": 0.265625, + "learning_rate": 1.8884585089553498e-05, + "loss": 0.961, "step": 14310 }, { - "epoch": 0.41068380359473844, - "grad_norm": 0.4140625, - "learning_rate": 0.0001467367706907223, - "loss": 0.9964, + "epoch": 0.8213322623214183, + "grad_norm": 0.2734375, + "learning_rate": 1.8826060642591005e-05, + "loss": 0.9782, "step": 14315 }, { - "epoch": 0.41082724886319627, - "grad_norm": 0.353515625, - "learning_rate": 0.00014669249696764748, - "loss": 0.8724, + "epoch": 0.8216191405129382, + "grad_norm": 0.2412109375, + "learning_rate": 1.8767617597036925e-05, + "loss": 0.8703, "step": 14320 }, { - "epoch": 0.41097069413165405, - "grad_norm": 0.40234375, - "learning_rate": 0.00014664821153724414, - "loss": 0.9364, + "epoch": 0.8219060187044581, + "grad_norm": 0.302734375, + "learning_rate": 1.8709256011498076e-05, + "loss": 0.9159, "step": 14325 }, { - "epoch": 0.4111141394001119, - "grad_norm": 0.375, - "learning_rate": 0.00014660391441061603, - "loss": 0.9479, + "epoch": 0.822192896895978, + "grad_norm": 0.2578125, + "learning_rate": 1.865097594449958e-05, + "loss": 0.8634, "step": 14330 }, { - "epoch": 0.4112575846685697, - "grad_norm": 0.37890625, - "learning_rate": 0.00014655960559886987, - "loss": 1.0147, + "epoch": 0.8224797750874978, + "grad_norm": 0.267578125, + "learning_rate": 1.8592777454484835e-05, + "loss": 0.9098, "step": 14335 }, { - "epoch": 0.41140102993702754, - "grad_norm": 0.396484375, - "learning_rate": 0.00014651528511311538, - "loss": 0.9863, + "epoch": 0.8227666532790178, + "grad_norm": 0.26171875, + "learning_rate": 1.8534660599815368e-05, + "loss": 0.9266, "step": 14340 }, { - "epoch": 0.41154447520548537, - "grad_norm": 0.408203125, - "learning_rate": 0.00014647095296446507, - "loss": 0.9591, + "epoch": 0.8230535314705376, + "grad_norm": 0.27734375, + "learning_rate": 1.8476625438770944e-05, + "loss": 0.9914, "step": 14345 }, { - "epoch": 0.41168792047394315, - "grad_norm": 0.38671875, - "learning_rate": 0.0001464266091640345, - "loss": 0.9165, + "epoch": 0.8233404096620575, + "grad_norm": 0.259765625, + "learning_rate": 1.8418672029549355e-05, + "loss": 0.977, "step": 14350 }, { - "epoch": 0.411831365742401, - "grad_norm": 0.375, - "learning_rate": 0.0001463822537229421, - "loss": 0.9988, + "epoch": 0.8236272878535774, + "grad_norm": 0.2578125, + "learning_rate": 1.836080043026638e-05, + "loss": 0.9636, "step": 14355 }, { - "epoch": 0.4119748110108588, - "grad_norm": 0.39453125, - "learning_rate": 0.0001463378866523092, - "loss": 0.9585, + "epoch": 0.8239141660450973, + "grad_norm": 0.2578125, + "learning_rate": 1.8303010698955804e-05, + "loss": 0.9023, "step": 14360 }, { - "epoch": 0.41211825627931664, - "grad_norm": 0.38671875, - "learning_rate": 0.00014629350796326008, - "loss": 0.9833, + "epoch": 0.8242010442366171, + "grad_norm": 0.271484375, + "learning_rate": 1.8245302893569295e-05, + "loss": 0.9805, "step": 14365 }, { - "epoch": 0.4122617015477745, - "grad_norm": 0.423828125, - "learning_rate": 0.00014624911766692196, - "loss": 0.9857, + "epoch": 0.8244879224281371, + "grad_norm": 0.265625, + "learning_rate": 1.818767707197636e-05, + "loss": 0.8912, "step": 14370 }, { - "epoch": 0.41240514681623225, - "grad_norm": 0.36328125, - "learning_rate": 0.0001462047157744249, - "loss": 0.938, + "epoch": 0.8247748006196569, + "grad_norm": 0.251953125, + "learning_rate": 1.8130133291964323e-05, + "loss": 0.9084, "step": 14375 }, { - "epoch": 0.4125485920846901, - "grad_norm": 0.361328125, - "learning_rate": 0.00014616030229690186, - "loss": 0.939, + "epoch": 0.8250616788111768, + "grad_norm": 0.25390625, + "learning_rate": 1.80726716112382e-05, + "loss": 0.8483, "step": 14380 }, { - "epoch": 0.4126920373531479, - "grad_norm": 0.4453125, - "learning_rate": 0.0001461158772454888, - "loss": 0.8863, + "epoch": 0.8253485570026966, + "grad_norm": 0.265625, + "learning_rate": 1.80152920874207e-05, + "loss": 0.8901, "step": 14385 }, { - "epoch": 0.41283548262160574, - "grad_norm": 0.40234375, - "learning_rate": 0.0001460714406313245, - "loss": 0.9649, + "epoch": 0.8256354351942166, + "grad_norm": 0.259765625, + "learning_rate": 1.7957994778052112e-05, + "loss": 0.8935, "step": 14390 }, { - "epoch": 0.4129789278900635, - "grad_norm": 0.45703125, - "learning_rate": 0.00014602699246555072, - "loss": 0.9546, + "epoch": 0.8259223133857364, + "grad_norm": 0.27734375, + "learning_rate": 1.7900779740590344e-05, + "loss": 1.0121, "step": 14395 }, { - "epoch": 0.41312237315852135, - "grad_norm": 0.392578125, - "learning_rate": 0.00014598253275931197, - "loss": 0.9392, + "epoch": 0.8262091915772563, + "grad_norm": 0.263671875, + "learning_rate": 1.784364703241076e-05, + "loss": 0.952, "step": 14400 }, { - "epoch": 0.4132658184269792, - "grad_norm": 0.392578125, - "learning_rate": 0.0001459380615237558, - "loss": 0.9319, + "epoch": 0.8264960697687762, + "grad_norm": 0.2578125, + "learning_rate": 1.778659671080616e-05, + "loss": 0.9186, "step": 14405 }, { - "epoch": 0.413409263695437, - "grad_norm": 0.455078125, - "learning_rate": 0.00014589357877003257, - "loss": 1.0258, + "epoch": 0.826782947960296, + "grad_norm": 0.27734375, + "learning_rate": 1.7729628832986722e-05, + "loss": 1.0093, "step": 14410 }, { - "epoch": 0.41355270896389484, - "grad_norm": 0.392578125, - "learning_rate": 0.00014584908450929556, - "loss": 0.9376, + "epoch": 0.8270698261518159, + "grad_norm": 0.27734375, + "learning_rate": 1.7672743456079976e-05, + "loss": 0.9438, "step": 14415 }, { - "epoch": 0.4136961542323526, - "grad_norm": 0.373046875, - "learning_rate": 0.00014580457875270098, - "loss": 0.9554, + "epoch": 0.8273567043433359, + "grad_norm": 0.2890625, + "learning_rate": 1.761594063713068e-05, + "loss": 0.965, "step": 14420 }, { - "epoch": 0.41383959950081045, - "grad_norm": 0.423828125, - "learning_rate": 0.00014576006151140782, - "loss": 0.8856, + "epoch": 0.8276435825348557, + "grad_norm": 0.271484375, + "learning_rate": 1.75592204331009e-05, + "loss": 0.9737, "step": 14425 }, { - "epoch": 0.4139830447692683, - "grad_norm": 0.421875, - "learning_rate": 0.00014571553279657803, - "loss": 0.9723, + "epoch": 0.8279304607263755, + "grad_norm": 0.263671875, + "learning_rate": 1.7502582900869702e-05, + "loss": 0.8891, "step": 14430 }, { - "epoch": 0.4141264900377261, - "grad_norm": 0.40625, - "learning_rate": 0.00014567099261937632, - "loss": 1.0789, + "epoch": 0.8282173389178955, + "grad_norm": 0.265625, + "learning_rate": 1.744602809723337e-05, + "loss": 0.9375, "step": 14435 }, { - "epoch": 0.41426993530618395, - "grad_norm": 0.37890625, - "learning_rate": 0.00014562644099097048, - "loss": 0.968, + "epoch": 0.8285042171094154, + "grad_norm": 0.26953125, + "learning_rate": 1.7389556078905144e-05, + "loss": 1.0189, "step": 14440 }, { - "epoch": 0.4144133805746417, - "grad_norm": 0.4140625, - "learning_rate": 0.00014558187792253102, - "loss": 0.9501, + "epoch": 0.8287910953009352, + "grad_norm": 0.251953125, + "learning_rate": 1.7333166902515363e-05, + "loss": 0.8845, "step": 14445 }, { - "epoch": 0.41455682584309955, - "grad_norm": 0.412109375, - "learning_rate": 0.00014553730342523134, - "loss": 0.9644, + "epoch": 0.8290779734924552, + "grad_norm": 0.265625, + "learning_rate": 1.727686062461118e-05, + "loss": 0.9842, "step": 14450 }, { - "epoch": 0.4147002711115574, - "grad_norm": 0.380859375, - "learning_rate": 0.0001454927175102477, - "loss": 0.9954, + "epoch": 0.829364851683975, + "grad_norm": 0.263671875, + "learning_rate": 1.722063730165665e-05, + "loss": 0.867, "step": 14455 }, { - "epoch": 0.4148437163800152, - "grad_norm": 0.36328125, - "learning_rate": 0.0001454481201887592, - "loss": 0.8784, + "epoch": 0.8296517298754948, + "grad_norm": 0.259765625, + "learning_rate": 1.7164496990032665e-05, + "loss": 0.9246, "step": 14460 }, { - "epoch": 0.41498716164847305, - "grad_norm": 0.423828125, - "learning_rate": 0.00014540351147194797, - "loss": 1.0006, + "epoch": 0.8299386080670147, + "grad_norm": 0.2578125, + "learning_rate": 1.7108439746036842e-05, + "loss": 0.8562, "step": 14465 }, { - "epoch": 0.4151306069169308, - "grad_norm": 0.404296875, - "learning_rate": 0.00014535889137099877, - "loss": 0.9964, + "epoch": 0.8302254862585347, + "grad_norm": 0.263671875, + "learning_rate": 1.7052465625883494e-05, + "loss": 0.9346, "step": 14470 }, { - "epoch": 0.41527405218538865, - "grad_norm": 0.4140625, - "learning_rate": 0.00014531425989709932, - "loss": 0.9661, + "epoch": 0.8305123644500545, + "grad_norm": 0.26953125, + "learning_rate": 1.699657468570367e-05, + "loss": 0.9476, "step": 14475 }, { - "epoch": 0.4154174974538465, - "grad_norm": 0.390625, - "learning_rate": 0.00014526961706144023, - "loss": 0.9171, + "epoch": 0.8307992426415743, + "grad_norm": 0.255859375, + "learning_rate": 1.694076698154484e-05, + "loss": 0.9324, "step": 14480 }, { - "epoch": 0.4155609427223043, - "grad_norm": 0.427734375, - "learning_rate": 0.00014522496287521484, - "loss": 0.9794, + "epoch": 0.8310861208330943, + "grad_norm": 0.25390625, + "learning_rate": 1.6885042569371146e-05, + "loss": 1.0023, "step": 14485 }, { - "epoch": 0.41570438799076215, - "grad_norm": 0.40625, - "learning_rate": 0.00014518029734961947, - "loss": 0.9825, + "epoch": 0.8313729990246141, + "grad_norm": 0.26953125, + "learning_rate": 1.68294015050631e-05, + "loss": 0.9223, "step": 14490 }, { - "epoch": 0.4158478332592199, - "grad_norm": 0.390625, - "learning_rate": 0.00014513562049585317, - "loss": 0.9066, + "epoch": 0.831659877216134, + "grad_norm": 0.265625, + "learning_rate": 1.677384384441776e-05, + "loss": 0.8871, "step": 14495 }, { - "epoch": 0.41599127852767775, - "grad_norm": 0.41796875, - "learning_rate": 0.00014509093232511791, - "loss": 0.9756, + "epoch": 0.831946755407654, + "grad_norm": 0.3046875, + "learning_rate": 1.6718369643148435e-05, + "loss": 0.908, "step": 14500 }, { - "epoch": 0.4161347237961356, - "grad_norm": 0.392578125, - "learning_rate": 0.00014504623284861847, - "loss": 1.0353, + "epoch": 0.8322336335991738, + "grad_norm": 0.263671875, + "learning_rate": 1.6662978956884778e-05, + "loss": 0.9022, "step": 14505 }, { - "epoch": 0.4162781690645934, - "grad_norm": 0.37109375, - "learning_rate": 0.00014500152207756246, - "loss": 0.7859, + "epoch": 0.8325205117906936, + "grad_norm": 0.259765625, + "learning_rate": 1.66076718411727e-05, + "loss": 0.889, "step": 14510 }, { - "epoch": 0.41642161433305125, - "grad_norm": 0.375, - "learning_rate": 0.00014495680002316033, - "loss": 1.0085, + "epoch": 0.8328073899822136, + "grad_norm": 0.263671875, + "learning_rate": 1.6552448351474304e-05, + "loss": 0.9465, "step": 14515 }, { - "epoch": 0.416565059601509, - "grad_norm": 0.40625, - "learning_rate": 0.00014491206669662533, - "loss": 1.012, + "epoch": 0.8330942681737334, + "grad_norm": 0.265625, + "learning_rate": 1.649730854316779e-05, + "loss": 0.9313, "step": 14520 }, { - "epoch": 0.41670850486996686, - "grad_norm": 0.3828125, - "learning_rate": 0.0001448673221091736, - "loss": 0.9453, + "epoch": 0.8333811463652533, + "grad_norm": 0.263671875, + "learning_rate": 1.644225247154756e-05, + "loss": 0.8882, "step": 14525 }, { - "epoch": 0.4168519501384247, - "grad_norm": 0.396484375, - "learning_rate": 0.00014482256627202405, - "loss": 0.9606, + "epoch": 0.8336680245567732, + "grad_norm": 0.263671875, + "learning_rate": 1.6387280191823896e-05, + "loss": 0.9555, "step": 14530 }, { - "epoch": 0.4169953954068825, - "grad_norm": 0.384765625, - "learning_rate": 0.0001447777991963984, - "loss": 0.9381, + "epoch": 0.8339549027482931, + "grad_norm": 0.2578125, + "learning_rate": 1.6332391759123123e-05, + "loss": 0.9272, "step": 14535 }, { - "epoch": 0.41713884067534035, - "grad_norm": 0.41796875, - "learning_rate": 0.00014473302089352123, - "loss": 0.8662, + "epoch": 0.8342417809398129, + "grad_norm": 0.283203125, + "learning_rate": 1.6277587228487533e-05, + "loss": 1.0969, "step": 14540 }, { - "epoch": 0.4172822859437981, - "grad_norm": 0.375, - "learning_rate": 0.00014468823137461992, - "loss": 1.0437, + "epoch": 0.8345286591313328, + "grad_norm": 0.263671875, + "learning_rate": 1.6222866654875213e-05, + "loss": 1.0165, "step": 14545 }, { - "epoch": 0.41742573121225596, - "grad_norm": 0.3359375, - "learning_rate": 0.00014464343065092466, - "loss": 0.9436, + "epoch": 0.8348155373228527, + "grad_norm": 0.298828125, + "learning_rate": 1.6168230093160062e-05, + "loss": 1.0093, "step": 14550 }, { - "epoch": 0.4175691764807138, - "grad_norm": 0.3671875, - "learning_rate": 0.00014459861873366848, - "loss": 0.9377, + "epoch": 0.8351024155143726, + "grad_norm": 0.25, + "learning_rate": 1.611367759813176e-05, + "loss": 0.9414, "step": 14555 }, { - "epoch": 0.4177126217491716, - "grad_norm": 0.412109375, - "learning_rate": 0.00014455379563408713, - "loss": 0.9451, + "epoch": 0.8353892937058924, + "grad_norm": 0.244140625, + "learning_rate": 1.6059209224495676e-05, + "loss": 0.9592, "step": 14560 }, { - "epoch": 0.41785606701762945, - "grad_norm": 0.376953125, - "learning_rate": 0.00014450896136341924, - "loss": 0.9209, + "epoch": 0.8356761718974124, + "grad_norm": 0.25, + "learning_rate": 1.6004825026872806e-05, + "loss": 0.9276, "step": 14565 }, { - "epoch": 0.4179995122860872, - "grad_norm": 0.37890625, - "learning_rate": 0.00014446411593290625, - "loss": 0.8519, + "epoch": 0.8359630500889322, + "grad_norm": 0.26171875, + "learning_rate": 1.5950525059799714e-05, + "loss": 0.9925, "step": 14570 }, { - "epoch": 0.41814295755454506, - "grad_norm": 0.478515625, - "learning_rate": 0.00014441925935379234, - "loss": 0.9188, + "epoch": 0.8362499282804521, + "grad_norm": 0.28515625, + "learning_rate": 1.5896309377728624e-05, + "loss": 0.9718, "step": 14575 }, { - "epoch": 0.4182864028230029, - "grad_norm": 0.390625, - "learning_rate": 0.0001443743916373245, - "loss": 1.0796, + "epoch": 0.836536806471972, + "grad_norm": 0.271484375, + "learning_rate": 1.5842178035027044e-05, + "loss": 0.9949, "step": 14580 }, { - "epoch": 0.4184298480914607, - "grad_norm": 0.400390625, - "learning_rate": 0.00014432951279475255, - "loss": 0.9595, + "epoch": 0.8368236846634919, + "grad_norm": 0.279296875, + "learning_rate": 1.5788131085978032e-05, + "loss": 0.9233, "step": 14585 }, { - "epoch": 0.4185732933599185, - "grad_norm": 0.400390625, - "learning_rate": 0.00014428462283732908, - "loss": 0.9579, + "epoch": 0.8371105628550117, + "grad_norm": 0.259765625, + "learning_rate": 1.573416858478003e-05, + "loss": 0.9577, "step": 14590 }, { - "epoch": 0.41871673862837633, - "grad_norm": 0.3671875, - "learning_rate": 0.00014423972177630946, - "loss": 0.9664, + "epoch": 0.8373974410465317, + "grad_norm": 0.263671875, + "learning_rate": 1.568029058554672e-05, + "loss": 0.9751, "step": 14595 }, { - "epoch": 0.41886018389683416, - "grad_norm": 0.34765625, - "learning_rate": 0.0001441948096229518, - "loss": 0.8667, + "epoch": 0.8376843192380515, + "grad_norm": 0.2890625, + "learning_rate": 1.5626497142307084e-05, + "loss": 0.9637, "step": 14600 }, { - "epoch": 0.419003629165292, - "grad_norm": 0.443359375, - "learning_rate": 0.00014414988638851713, - "loss": 0.9519, + "epoch": 0.8379711974295714, + "grad_norm": 0.36328125, + "learning_rate": 1.5572788309005315e-05, + "loss": 0.9275, "step": 14605 }, { - "epoch": 0.4191470744337498, - "grad_norm": 0.345703125, - "learning_rate": 0.00014410495208426908, - "loss": 0.869, + "epoch": 0.8382580756210913, + "grad_norm": 0.294921875, + "learning_rate": 1.5519164139500743e-05, + "loss": 0.9635, "step": 14610 }, { - "epoch": 0.4192905197022076, - "grad_norm": 0.404296875, - "learning_rate": 0.0001440600067214742, - "loss": 0.9673, + "epoch": 0.8385449538126112, + "grad_norm": 0.251953125, + "learning_rate": 1.5465624687567816e-05, + "loss": 0.9621, "step": 14615 }, { - "epoch": 0.41943396497066543, - "grad_norm": 0.375, - "learning_rate": 0.00014401505031140171, - "loss": 0.9354, + "epoch": 0.838831832004131, + "grad_norm": 0.263671875, + "learning_rate": 1.5412170006895986e-05, + "loss": 0.8839, "step": 14620 }, { - "epoch": 0.41957741023912326, - "grad_norm": 0.4296875, - "learning_rate": 0.00014397008286532373, - "loss": 0.9192, + "epoch": 0.8391187101956509, + "grad_norm": 0.26953125, + "learning_rate": 1.5358800151089803e-05, + "loss": 0.9448, "step": 14625 }, { - "epoch": 0.4197208555075811, - "grad_norm": 0.36328125, - "learning_rate": 0.00014392510439451494, - "loss": 1.0207, + "epoch": 0.8394055883871708, + "grad_norm": 0.263671875, + "learning_rate": 1.5305515173668594e-05, + "loss": 0.9798, "step": 14630 }, { - "epoch": 0.4198643007760389, - "grad_norm": 0.40234375, - "learning_rate": 0.000143880114910253, - "loss": 0.986, + "epoch": 0.8396924665786907, + "grad_norm": 0.267578125, + "learning_rate": 1.5252315128066663e-05, + "loss": 0.9255, "step": 14635 }, { - "epoch": 0.4200077460444967, - "grad_norm": 0.38671875, - "learning_rate": 0.00014383511442381822, - "loss": 0.988, + "epoch": 0.8399793447702105, + "grad_norm": 0.27734375, + "learning_rate": 1.519920006763319e-05, + "loss": 1.0368, "step": 14640 }, { - "epoch": 0.42015119131295453, - "grad_norm": 0.357421875, - "learning_rate": 0.00014379010294649362, - "loss": 0.9588, + "epoch": 0.8402662229617305, + "grad_norm": 0.279296875, + "learning_rate": 1.5146170045632035e-05, + "loss": 0.9312, "step": 14645 }, { - "epoch": 0.42029463658141236, - "grad_norm": 0.4375, - "learning_rate": 0.00014374508048956515, - "loss": 1.0097, + "epoch": 0.8405531011532503, + "grad_norm": 0.255859375, + "learning_rate": 1.5093225115241838e-05, + "loss": 0.8995, "step": 14650 }, { - "epoch": 0.4204380818498702, - "grad_norm": 0.40234375, - "learning_rate": 0.00014370004706432135, - "loss": 1.0249, + "epoch": 0.8408399793447702, + "grad_norm": 0.259765625, + "learning_rate": 1.5040365329555895e-05, + "loss": 0.9117, "step": 14655 }, { - "epoch": 0.420581527118328, - "grad_norm": 0.390625, - "learning_rate": 0.00014365500268205352, - "loss": 0.9344, + "epoch": 0.8411268575362901, + "grad_norm": 0.283203125, + "learning_rate": 1.4987590741582102e-05, + "loss": 0.9783, "step": 14660 }, { - "epoch": 0.4207249723867858, - "grad_norm": 0.3671875, - "learning_rate": 0.00014360994735405585, - "loss": 0.9223, + "epoch": 0.84141373572781, + "grad_norm": 0.2431640625, + "learning_rate": 1.493490140424293e-05, + "loss": 0.8676, "step": 14665 }, { - "epoch": 0.42086841765524363, - "grad_norm": 0.3828125, - "learning_rate": 0.0001435648810916251, - "loss": 1.0185, + "epoch": 0.8417006139193298, + "grad_norm": 0.2470703125, + "learning_rate": 1.4882297370375387e-05, + "loss": 0.891, "step": 14670 }, { - "epoch": 0.42101186292370146, - "grad_norm": 0.390625, - "learning_rate": 0.00014351980390606088, - "loss": 0.9549, + "epoch": 0.8419874921108498, + "grad_norm": 0.294921875, + "learning_rate": 1.4829778692730944e-05, + "loss": 0.9868, "step": 14675 }, { - "epoch": 0.4211553081921593, - "grad_norm": 0.396484375, - "learning_rate": 0.0001434747158086655, - "loss": 1.0161, + "epoch": 0.8422743703023696, + "grad_norm": 0.28515625, + "learning_rate": 1.4777345423975375e-05, + "loss": 0.9805, "step": 14680 }, { - "epoch": 0.4212987534606171, - "grad_norm": 0.37109375, - "learning_rate": 0.000143429616810744, - "loss": 0.9394, + "epoch": 0.8425612484938895, + "grad_norm": 0.265625, + "learning_rate": 1.4724997616688907e-05, + "loss": 0.9984, "step": 14685 }, { - "epoch": 0.4214421987290749, - "grad_norm": 0.37109375, - "learning_rate": 0.00014338450692360418, - "loss": 0.9968, + "epoch": 0.8428481266854094, + "grad_norm": 0.255859375, + "learning_rate": 1.4672735323366061e-05, + "loss": 0.9138, "step": 14690 }, { - "epoch": 0.42158564399753273, - "grad_norm": 0.361328125, - "learning_rate": 0.00014333938615855658, - "loss": 0.9252, + "epoch": 0.8431350048769293, + "grad_norm": 0.267578125, + "learning_rate": 1.4620558596415578e-05, + "loss": 0.8917, "step": 14695 }, { - "epoch": 0.42172908926599056, - "grad_norm": 0.40625, - "learning_rate": 0.0001432942545269144, - "loss": 1.0688, + "epoch": 0.8434218830684491, + "grad_norm": 0.2734375, + "learning_rate": 1.4568467488160386e-05, + "loss": 0.8793, "step": 14700 }, { - "epoch": 0.4218725345344484, - "grad_norm": 0.39453125, - "learning_rate": 0.00014324911203999365, - "loss": 0.8951, + "epoch": 0.843708761259969, + "grad_norm": 0.265625, + "learning_rate": 1.4516462050837564e-05, + "loss": 0.9941, "step": 14705 }, { - "epoch": 0.4220159798029062, - "grad_norm": 0.359375, - "learning_rate": 0.000143203958709113, - "loss": 0.9515, + "epoch": 0.8439956394514889, + "grad_norm": 0.275390625, + "learning_rate": 1.4464542336598274e-05, + "loss": 0.9185, "step": 14710 }, { - "epoch": 0.422159425071364, - "grad_norm": 0.384765625, - "learning_rate": 0.00014315879454559387, - "loss": 1.0336, + "epoch": 0.8442825176430088, + "grad_norm": 0.267578125, + "learning_rate": 1.4412708397507724e-05, + "loss": 0.9379, "step": 14715 }, { - "epoch": 0.42230287033982183, - "grad_norm": 0.396484375, - "learning_rate": 0.00014311361956076036, - "loss": 0.9309, + "epoch": 0.8445693958345286, + "grad_norm": 0.279296875, + "learning_rate": 1.4360960285545133e-05, + "loss": 0.9381, "step": 14720 }, { - "epoch": 0.42244631560827967, - "grad_norm": 0.375, - "learning_rate": 0.0001430684337659393, - "loss": 0.9597, + "epoch": 0.8448562740260486, + "grad_norm": 0.275390625, + "learning_rate": 1.4309298052603626e-05, + "loss": 0.9562, "step": 14725 }, { - "epoch": 0.4225897608767375, - "grad_norm": 0.365234375, - "learning_rate": 0.00014302323717246032, - "loss": 0.9996, + "epoch": 0.8451431522175684, + "grad_norm": 0.271484375, + "learning_rate": 1.4257721750490127e-05, + "loss": 0.9234, "step": 14730 }, { - "epoch": 0.42273320614519533, - "grad_norm": 0.37109375, - "learning_rate": 0.00014297802979165556, - "loss": 0.9507, + "epoch": 0.8454300304090883, + "grad_norm": 0.2490234375, + "learning_rate": 1.4206231430925553e-05, + "loss": 0.9905, "step": 14735 }, { - "epoch": 0.4228766514136531, - "grad_norm": 0.3671875, - "learning_rate": 0.0001429328116348601, - "loss": 0.9587, + "epoch": 0.8457169086006082, + "grad_norm": 0.259765625, + "learning_rate": 1.4154827145544492e-05, + "loss": 0.9166, "step": 14740 }, { - "epoch": 0.42302009668211094, - "grad_norm": 0.41796875, - "learning_rate": 0.00014288758271341156, - "loss": 0.9366, + "epoch": 0.8460037867921281, + "grad_norm": 0.25390625, + "learning_rate": 1.410350894589525e-05, + "loss": 0.9698, "step": 14745 }, { - "epoch": 0.42316354195056877, - "grad_norm": 0.384765625, - "learning_rate": 0.00014284234303865026, - "loss": 0.9987, + "epoch": 0.8462906649836479, + "grad_norm": 0.271484375, + "learning_rate": 1.4052276883439864e-05, + "loss": 0.9123, "step": 14750 }, { - "epoch": 0.4233069872190266, - "grad_norm": 0.40625, - "learning_rate": 0.00014279709262191926, - "loss": 0.8927, + "epoch": 0.8465775431751679, + "grad_norm": 0.2734375, + "learning_rate": 1.4001131009553936e-05, + "loss": 0.9739, "step": 14755 }, { - "epoch": 0.4234504324874844, - "grad_norm": 0.41015625, - "learning_rate": 0.0001427518314745644, - "loss": 0.9655, + "epoch": 0.8468644213666877, + "grad_norm": 0.25390625, + "learning_rate": 1.3950071375526685e-05, + "loss": 1.0007, "step": 14760 }, { - "epoch": 0.4235938777559422, - "grad_norm": 0.4140625, - "learning_rate": 0.000142706559607934, - "loss": 0.9846, + "epoch": 0.8471512995582076, + "grad_norm": 0.26953125, + "learning_rate": 1.3899098032560787e-05, + "loss": 0.8987, "step": 14765 }, { - "epoch": 0.42373732302440004, - "grad_norm": 0.392578125, - "learning_rate": 0.0001426612770333793, - "loss": 0.952, + "epoch": 0.8474381777497275, + "grad_norm": 0.263671875, + "learning_rate": 1.3848211031772473e-05, + "loss": 0.9215, "step": 14770 }, { - "epoch": 0.42388076829285787, - "grad_norm": 0.408203125, - "learning_rate": 0.00014261598376225405, - "loss": 0.9126, + "epoch": 0.8477250559412474, + "grad_norm": 0.25390625, + "learning_rate": 1.3797410424191337e-05, + "loss": 0.9238, "step": 14775 }, { - "epoch": 0.4240242135613157, - "grad_norm": 0.39453125, - "learning_rate": 0.00014257067980591475, - "loss": 1.0636, + "epoch": 0.8480119341327672, + "grad_norm": 0.265625, + "learning_rate": 1.3746696260760295e-05, + "loss": 0.916, "step": 14780 }, { - "epoch": 0.4241676588297735, - "grad_norm": 0.373046875, - "learning_rate": 0.00014252536517572062, - "loss": 1.0732, + "epoch": 0.848298812324287, + "grad_norm": 0.283203125, + "learning_rate": 1.3696068592335676e-05, + "loss": 0.9393, "step": 14785 }, { - "epoch": 0.4243111040982313, - "grad_norm": 0.384765625, - "learning_rate": 0.00014248003988303346, - "loss": 0.938, + "epoch": 0.848585690515807, + "grad_norm": 0.244140625, + "learning_rate": 1.3645527469686992e-05, + "loss": 0.9651, "step": 14790 }, { - "epoch": 0.42445454936668914, - "grad_norm": 0.6640625, - "learning_rate": 0.00014243470393921786, - "loss": 1.1638, + "epoch": 0.8488725687073269, + "grad_norm": 0.259765625, + "learning_rate": 1.3595072943497011e-05, + "loss": 0.9529, "step": 14795 }, { - "epoch": 0.42459799463514697, - "grad_norm": 0.408203125, - "learning_rate": 0.00014238935735564094, - "loss": 0.9584, + "epoch": 0.8491594468988467, + "grad_norm": 0.25390625, + "learning_rate": 1.3544705064361629e-05, + "loss": 0.9006, "step": 14800 }, { - "epoch": 0.4247414399036048, - "grad_norm": 0.38671875, - "learning_rate": 0.0001423440001436726, - "loss": 0.9204, + "epoch": 0.8494463250903667, + "grad_norm": 0.26171875, + "learning_rate": 1.3494423882789874e-05, + "loss": 0.9573, "step": 14805 }, { - "epoch": 0.4248848851720626, - "grad_norm": 0.37890625, - "learning_rate": 0.00014229863231468538, - "loss": 0.9139, + "epoch": 0.8497332032818865, + "grad_norm": 0.314453125, + "learning_rate": 1.3444229449203827e-05, + "loss": 1.0288, "step": 14810 }, { - "epoch": 0.4250283304405204, - "grad_norm": 0.408203125, - "learning_rate": 0.00014225325388005447, - "loss": 0.9601, + "epoch": 0.8500200814734064, + "grad_norm": 0.259765625, + "learning_rate": 1.3394121813938554e-05, + "loss": 0.9184, "step": 14815 }, { - "epoch": 0.42517177570897824, - "grad_norm": 0.375, - "learning_rate": 0.00014220786485115772, - "loss": 1.0068, + "epoch": 0.8503069596649263, + "grad_norm": 0.267578125, + "learning_rate": 1.3344101027242161e-05, + "loss": 0.9045, "step": 14820 }, { - "epoch": 0.42531522097743607, - "grad_norm": 0.3828125, - "learning_rate": 0.0001421624652393756, - "loss": 0.9076, + "epoch": 0.8505938378564462, + "grad_norm": 0.255859375, + "learning_rate": 1.3294167139275593e-05, + "loss": 0.9593, "step": 14825 }, { - "epoch": 0.4254586662458939, - "grad_norm": 0.396484375, - "learning_rate": 0.0001421170550560913, - "loss": 0.94, + "epoch": 0.850880716047966, + "grad_norm": 0.26171875, + "learning_rate": 1.3244320200112592e-05, + "loss": 0.9066, "step": 14830 }, { - "epoch": 0.4256021115143517, - "grad_norm": 0.4375, - "learning_rate": 0.00014207163431269066, - "loss": 1.0972, + "epoch": 0.851167594239486, + "grad_norm": 0.2490234375, + "learning_rate": 1.3194560259739863e-05, + "loss": 0.9236, "step": 14835 }, { - "epoch": 0.4257455567828095, - "grad_norm": 0.396484375, - "learning_rate": 0.0001420262030205621, - "loss": 1.009, + "epoch": 0.8514544724310058, + "grad_norm": 0.25, + "learning_rate": 1.3144887368056757e-05, + "loss": 0.923, "step": 14840 }, { - "epoch": 0.42588900205126734, - "grad_norm": 0.416015625, - "learning_rate": 0.00014198076119109675, - "loss": 1.0604, + "epoch": 0.8517413506225257, + "grad_norm": 0.2578125, + "learning_rate": 1.3095301574875363e-05, + "loss": 0.9765, "step": 14845 }, { - "epoch": 0.42603244731972517, - "grad_norm": 0.423828125, - "learning_rate": 0.0001419353088356883, - "loss": 1.0185, + "epoch": 0.8520282288140456, + "grad_norm": 0.279296875, + "learning_rate": 1.3045802929920414e-05, + "loss": 0.9518, "step": 14850 }, { - "epoch": 0.426175892588183, - "grad_norm": 0.373046875, - "learning_rate": 0.00014188984596573317, - "loss": 0.9185, + "epoch": 0.8523151070055655, + "grad_norm": 0.3125, + "learning_rate": 1.2996391482829273e-05, + "loss": 0.8879, "step": 14855 }, { - "epoch": 0.4263193378566408, - "grad_norm": 0.388671875, - "learning_rate": 0.00014184437259263038, - "loss": 1.0431, + "epoch": 0.8526019851970853, + "grad_norm": 0.26953125, + "learning_rate": 1.2947067283151837e-05, + "loss": 0.8637, "step": 14860 }, { - "epoch": 0.4264627831250986, - "grad_norm": 0.435546875, - "learning_rate": 0.0001417988887277816, - "loss": 0.9603, + "epoch": 0.8528888633886051, + "grad_norm": 0.26171875, + "learning_rate": 1.289783038035055e-05, + "loss": 1.0178, "step": 14865 }, { - "epoch": 0.42660622839355644, - "grad_norm": 0.369140625, - "learning_rate": 0.00014175339438259112, - "loss": 0.9351, + "epoch": 0.8531757415801251, + "grad_norm": 0.29296875, + "learning_rate": 1.2848680823800275e-05, + "loss": 0.8906, "step": 14870 }, { - "epoch": 0.4267496736620143, - "grad_norm": 0.396484375, - "learning_rate": 0.00014170788956846582, - "loss": 0.9506, + "epoch": 0.853462619771645, + "grad_norm": 0.2578125, + "learning_rate": 1.2799618662788315e-05, + "loss": 0.9234, "step": 14875 }, { - "epoch": 0.4268931189304721, - "grad_norm": 0.431640625, - "learning_rate": 0.00014166237429681525, - "loss": 0.8922, + "epoch": 0.8537494979631648, + "grad_norm": 0.283203125, + "learning_rate": 1.2750643946514252e-05, + "loss": 0.9215, "step": 14880 }, { - "epoch": 0.4270365641989299, - "grad_norm": 0.419921875, - "learning_rate": 0.00014161684857905157, - "loss": 0.9371, + "epoch": 0.8540363761546848, + "grad_norm": 0.259765625, + "learning_rate": 1.2701756724090108e-05, + "loss": 0.893, "step": 14885 }, { - "epoch": 0.4271800094673877, - "grad_norm": 0.3515625, - "learning_rate": 0.00014157131242658957, - "loss": 1.0355, + "epoch": 0.8543232543462046, + "grad_norm": 0.26171875, + "learning_rate": 1.2652957044540082e-05, + "loss": 0.935, "step": 14890 }, { - "epoch": 0.42732345473584554, - "grad_norm": 0.41796875, - "learning_rate": 0.00014152576585084662, - "loss": 0.8861, + "epoch": 0.8546101325377244, + "grad_norm": 0.2470703125, + "learning_rate": 1.2604244956800593e-05, + "loss": 0.9196, "step": 14895 }, { - "epoch": 0.4274669000043034, - "grad_norm": 0.369140625, - "learning_rate": 0.0001414802088632428, - "loss": 0.8792, + "epoch": 0.8548970107292444, + "grad_norm": 0.255859375, + "learning_rate": 1.2555620509720233e-05, + "loss": 0.9674, "step": 14900 }, { - "epoch": 0.4276103452727612, - "grad_norm": 0.376953125, - "learning_rate": 0.00014143464147520064, - "loss": 0.8224, + "epoch": 0.8551838889207642, + "grad_norm": 0.2890625, + "learning_rate": 1.2507083752059723e-05, + "loss": 0.9899, "step": 14905 }, { - "epoch": 0.427753790541219, - "grad_norm": 0.416015625, - "learning_rate": 0.00014138906369814538, - "loss": 0.9611, + "epoch": 0.8554707671122841, + "grad_norm": 0.31640625, + "learning_rate": 1.2458634732491781e-05, + "loss": 0.9745, "step": 14910 }, { - "epoch": 0.4278972358096768, - "grad_norm": 0.408203125, - "learning_rate": 0.00014134347554350488, - "loss": 0.973, + "epoch": 0.855757645303804, + "grad_norm": 0.279296875, + "learning_rate": 1.2410273499601266e-05, + "loss": 0.9971, "step": 14915 }, { - "epoch": 0.42804068107813464, - "grad_norm": 0.3671875, - "learning_rate": 0.0001412978770227096, - "loss": 0.9369, + "epoch": 0.8560445234953239, + "grad_norm": 0.26171875, + "learning_rate": 1.2362000101884885e-05, + "loss": 0.9514, "step": 14920 }, { - "epoch": 0.4281841263465925, - "grad_norm": 0.400390625, - "learning_rate": 0.00014125226814719252, - "loss": 0.9343, + "epoch": 0.8563314016868437, + "grad_norm": 0.26171875, + "learning_rate": 1.2313814587751316e-05, + "loss": 0.9545, "step": 14925 }, { - "epoch": 0.42832757161505025, - "grad_norm": 0.416015625, - "learning_rate": 0.00014120664892838933, - "loss": 1.0246, + "epoch": 0.8566182798783637, + "grad_norm": 0.265625, + "learning_rate": 1.2265717005521115e-05, + "loss": 0.9368, "step": 14930 }, { - "epoch": 0.4284710168835081, - "grad_norm": 0.396484375, - "learning_rate": 0.00014116101937773818, - "loss": 0.9155, + "epoch": 0.8569051580698835, + "grad_norm": 0.26953125, + "learning_rate": 1.2217707403426627e-05, + "loss": 0.9405, "step": 14935 }, { - "epoch": 0.4286144621519659, - "grad_norm": 0.38671875, - "learning_rate": 0.0001411153795066799, - "loss": 0.9956, + "epoch": 0.8571920362614034, + "grad_norm": 0.263671875, + "learning_rate": 1.2169785829612001e-05, + "loss": 0.9264, "step": 14940 }, { - "epoch": 0.42875790742042375, - "grad_norm": 0.390625, - "learning_rate": 0.00014106972932665795, - "loss": 1.0208, + "epoch": 0.8574789144529232, + "grad_norm": 0.27734375, + "learning_rate": 1.2121952332133091e-05, + "loss": 1.0209, "step": 14945 }, { - "epoch": 0.4289013526888816, - "grad_norm": 0.392578125, - "learning_rate": 0.00014102406884911826, - "loss": 0.9807, + "epoch": 0.8577657926444432, + "grad_norm": 0.26953125, + "learning_rate": 1.2074206958957447e-05, + "loss": 0.9528, "step": 14950 }, { - "epoch": 0.42904479795733935, - "grad_norm": 0.3984375, - "learning_rate": 0.0001409783980855094, - "loss": 0.9437, + "epoch": 0.858052670835963, + "grad_norm": 0.2333984375, + "learning_rate": 1.2026549757964212e-05, + "loss": 0.8766, "step": 14955 }, { - "epoch": 0.4291882432257972, - "grad_norm": 0.373046875, - "learning_rate": 0.00014093271704728252, - "loss": 1.022, + "epoch": 0.8583395490274829, + "grad_norm": 0.27734375, + "learning_rate": 1.1978980776944137e-05, + "loss": 0.9347, "step": 14960 }, { - "epoch": 0.429331688494255, - "grad_norm": 0.3984375, - "learning_rate": 0.00014088702574589136, - "loss": 0.9925, + "epoch": 0.8586264272190028, + "grad_norm": 0.259765625, + "learning_rate": 1.1931500063599543e-05, + "loss": 0.9569, "step": 14965 }, { - "epoch": 0.42947513376271285, - "grad_norm": 0.39453125, - "learning_rate": 0.00014084132419279224, - "loss": 0.971, + "epoch": 0.8589133054105227, + "grad_norm": 0.255859375, + "learning_rate": 1.1884107665544164e-05, + "loss": 0.9079, "step": 14970 }, { - "epoch": 0.4296185790311707, - "grad_norm": 0.375, - "learning_rate": 0.00014079561239944396, - "loss": 1.0027, + "epoch": 0.8592001836020425, + "grad_norm": 0.255859375, + "learning_rate": 1.1836803630303206e-05, + "loss": 0.9977, "step": 14975 }, { - "epoch": 0.42976202429962845, - "grad_norm": 0.416015625, - "learning_rate": 0.00014074989037730798, - "loss": 1.0025, + "epoch": 0.8594870617935625, + "grad_norm": 0.275390625, + "learning_rate": 1.1789588005313257e-05, + "loss": 0.9528, "step": 14980 }, { - "epoch": 0.4299054695680863, - "grad_norm": 0.369140625, - "learning_rate": 0.00014070415813784834, - "loss": 1.0027, + "epoch": 0.8597739399850823, + "grad_norm": 0.279296875, + "learning_rate": 1.1742460837922265e-05, + "loss": 0.936, "step": 14985 }, { - "epoch": 0.4300489148365441, - "grad_norm": 0.404296875, - "learning_rate": 0.00014065841569253155, - "loss": 0.95, + "epoch": 0.8600608181766022, + "grad_norm": 0.2314453125, + "learning_rate": 1.1695422175389447e-05, + "loss": 0.9134, "step": 14990 }, { - "epoch": 0.43019236010500195, - "grad_norm": 0.404296875, - "learning_rate": 0.00014061266305282676, - "loss": 0.9817, + "epoch": 0.8603476963681221, + "grad_norm": 0.2734375, + "learning_rate": 1.1648472064885286e-05, + "loss": 0.991, "step": 14995 }, { - "epoch": 0.4303358053734598, - "grad_norm": 0.388671875, - "learning_rate": 0.00014056690023020566, - "loss": 0.9876, + "epoch": 0.860634574559642, + "grad_norm": 0.25, + "learning_rate": 1.160161055349146e-05, + "loss": 0.8948, "step": 15000 }, { - "epoch": 0.43047925064191755, - "grad_norm": 0.37890625, - "learning_rate": 0.0001405211272361424, - "loss": 1.0072, + "epoch": 0.8609214527511618, + "grad_norm": 0.27734375, + "learning_rate": 1.1554837688200793e-05, + "loss": 0.9878, "step": 15005 }, { - "epoch": 0.4306226959103754, - "grad_norm": 0.357421875, - "learning_rate": 0.00014047534408211383, - "loss": 0.9412, + "epoch": 0.8612083309426818, + "grad_norm": 0.2451171875, + "learning_rate": 1.1508153515917196e-05, + "loss": 0.9045, "step": 15010 }, { - "epoch": 0.4307661411788332, - "grad_norm": 0.40234375, - "learning_rate": 0.00014042955077959925, - "loss": 0.9912, + "epoch": 0.8614952091342016, + "grad_norm": 0.248046875, + "learning_rate": 1.1461558083455704e-05, + "loss": 0.9272, "step": 15015 }, { - "epoch": 0.43090958644729105, - "grad_norm": 0.37890625, - "learning_rate": 0.00014038374734008058, - "loss": 0.9333, + "epoch": 0.8617820873257215, + "grad_norm": 0.2412109375, + "learning_rate": 1.1415051437542302e-05, + "loss": 0.9048, "step": 15020 }, { - "epoch": 0.4310530317157489, - "grad_norm": 0.388671875, - "learning_rate": 0.00014033793377504213, - "loss": 0.9658, + "epoch": 0.8620689655172413, + "grad_norm": 0.28125, + "learning_rate": 1.1368633624813974e-05, + "loss": 0.9666, "step": 15025 }, { - "epoch": 0.43119647698420666, - "grad_norm": 0.35546875, - "learning_rate": 0.00014029211009597097, - "loss": 0.8828, + "epoch": 0.8623558437087613, + "grad_norm": 0.2431640625, + "learning_rate": 1.1322304691818575e-05, + "loss": 1.013, "step": 15030 }, { - "epoch": 0.4313399222526645, - "grad_norm": 0.365234375, - "learning_rate": 0.00014024627631435653, - "loss": 1.0459, + "epoch": 0.8626427219002811, + "grad_norm": 0.267578125, + "learning_rate": 1.1276064685014886e-05, + "loss": 0.9382, "step": 15035 }, { - "epoch": 0.4314833675211223, - "grad_norm": 0.39453125, - "learning_rate": 0.00014020043244169082, - "loss": 0.9513, + "epoch": 0.862929600091801, + "grad_norm": 0.263671875, + "learning_rate": 1.1229913650772472e-05, + "loss": 0.9891, "step": 15040 }, { - "epoch": 0.43162681278958015, - "grad_norm": 0.396484375, - "learning_rate": 0.0001401545784894684, - "loss": 0.981, + "epoch": 0.8632164782833209, + "grad_norm": 0.267578125, + "learning_rate": 1.1183851635371734e-05, + "loss": 0.9336, "step": 15045 }, { - "epoch": 0.431770258058038, - "grad_norm": 0.3671875, - "learning_rate": 0.00014010871446918635, - "loss": 0.821, + "epoch": 0.8635033564748408, + "grad_norm": 0.3046875, + "learning_rate": 1.1137878685003722e-05, + "loss": 0.9662, "step": 15050 }, { - "epoch": 0.43191370332649576, - "grad_norm": 0.380859375, - "learning_rate": 0.00014006284039234424, - "loss": 0.9453, + "epoch": 0.8637902346663606, + "grad_norm": 0.2490234375, + "learning_rate": 1.1091994845770226e-05, + "loss": 0.9321, "step": 15055 }, { - "epoch": 0.4320571485949536, - "grad_norm": 0.376953125, - "learning_rate": 0.00014001695627044428, - "loss": 0.8675, + "epoch": 0.8640771128578806, + "grad_norm": 0.259765625, + "learning_rate": 1.104620016368364e-05, + "loss": 1.0532, "step": 15060 }, { - "epoch": 0.4322005938634114, - "grad_norm": 0.376953125, - "learning_rate": 0.00013997106211499104, - "loss": 0.9946, + "epoch": 0.8643639910494004, + "grad_norm": 0.26953125, + "learning_rate": 1.1000494684667017e-05, + "loss": 0.9349, "step": 15065 }, { - "epoch": 0.43234403913186925, - "grad_norm": 0.369140625, - "learning_rate": 0.00013992515793749172, - "loss": 0.9263, + "epoch": 0.8646508692409203, + "grad_norm": 0.25, + "learning_rate": 1.0954878454553908e-05, + "loss": 0.8553, "step": 15070 }, { - "epoch": 0.4324874844003271, - "grad_norm": 0.34765625, - "learning_rate": 0.00013987924374945596, - "loss": 0.9118, + "epoch": 0.8649377474324402, + "grad_norm": 0.265625, + "learning_rate": 1.0909351519088352e-05, + "loss": 0.9111, "step": 15075 }, { - "epoch": 0.43263092966878486, - "grad_norm": 0.421875, - "learning_rate": 0.00013983331956239596, - "loss": 0.9953, + "epoch": 0.8652246256239601, + "grad_norm": 0.263671875, + "learning_rate": 1.0863913923924862e-05, + "loss": 0.9779, "step": 15080 }, { - "epoch": 0.4327743749372427, - "grad_norm": 0.357421875, - "learning_rate": 0.00013978738538782638, - "loss": 1.0045, + "epoch": 0.8655115038154799, + "grad_norm": 0.298828125, + "learning_rate": 1.081856571462837e-05, + "loss": 0.9526, "step": 15085 }, { - "epoch": 0.4329178202057005, - "grad_norm": 0.3671875, - "learning_rate": 0.00013974144123726442, - "loss": 0.9422, + "epoch": 0.8657983820069999, + "grad_norm": 0.294921875, + "learning_rate": 1.0773306936674133e-05, + "loss": 0.9902, "step": 15090 }, { - "epoch": 0.43306126547415835, - "grad_norm": 0.373046875, - "learning_rate": 0.00013969548712222985, - "loss": 0.9413, + "epoch": 0.8660852601985197, + "grad_norm": 0.369140625, + "learning_rate": 1.0728137635447821e-05, + "loss": 1.0987, "step": 15095 }, { - "epoch": 0.43320471074261613, - "grad_norm": 0.38671875, - "learning_rate": 0.00013964952305424474, - "loss": 1.0141, + "epoch": 0.8663721383900396, + "grad_norm": 0.2412109375, + "learning_rate": 1.0683057856245259e-05, + "loss": 0.9029, "step": 15100 }, { - "epoch": 0.43334815601107396, - "grad_norm": 0.36328125, - "learning_rate": 0.00013960354904483385, - "loss": 1.0132, + "epoch": 0.8666590165815594, + "grad_norm": 0.255859375, + "learning_rate": 1.0638067644272532e-05, + "loss": 0.9804, "step": 15105 }, { - "epoch": 0.4334916012795318, - "grad_norm": 0.40625, - "learning_rate": 0.00013955756510552437, - "loss": 1.0132, + "epoch": 0.8669458947730794, + "grad_norm": 0.2578125, + "learning_rate": 1.059316704464598e-05, + "loss": 0.9642, "step": 15110 }, { - "epoch": 0.4336350465479896, - "grad_norm": 0.384765625, - "learning_rate": 0.00013951157124784591, - "loss": 1.0243, + "epoch": 0.8672327729645992, + "grad_norm": 0.291015625, + "learning_rate": 1.0548356102391999e-05, + "loss": 0.9328, "step": 15115 }, { - "epoch": 0.43377849181644745, - "grad_norm": 0.41796875, - "learning_rate": 0.0001394655674833307, - "loss": 1.0685, + "epoch": 0.8675196511561191, + "grad_norm": 0.26171875, + "learning_rate": 1.0503634862447099e-05, + "loss": 0.9363, "step": 15120 }, { - "epoch": 0.43392193708490523, - "grad_norm": 0.380859375, - "learning_rate": 0.00013941955382351334, - "loss": 0.9235, + "epoch": 0.867806529347639, + "grad_norm": 0.265625, + "learning_rate": 1.0459003369657849e-05, + "loss": 0.9308, "step": 15125 }, { - "epoch": 0.43406538235336306, - "grad_norm": 0.419921875, - "learning_rate": 0.00013937353027993092, - "loss": 0.8905, + "epoch": 0.8680934075391589, + "grad_norm": 0.26171875, + "learning_rate": 1.0414461668780806e-05, + "loss": 0.889, "step": 15130 }, { - "epoch": 0.4342088276218209, - "grad_norm": 0.416015625, - "learning_rate": 0.0001393274968641231, - "loss": 0.9237, + "epoch": 0.8683802857306787, + "grad_norm": 0.27734375, + "learning_rate": 1.0370009804482483e-05, + "loss": 0.8834, "step": 15135 }, { - "epoch": 0.4343522728902787, - "grad_norm": 0.36328125, - "learning_rate": 0.00013928145358763194, - "loss": 0.9225, + "epoch": 0.8686671639221987, + "grad_norm": 0.255859375, + "learning_rate": 1.032564782133929e-05, + "loss": 0.9435, "step": 15140 }, { - "epoch": 0.43449571815873655, - "grad_norm": 0.359375, - "learning_rate": 0.00013923540046200197, - "loss": 0.9208, + "epoch": 0.8689540421137185, + "grad_norm": 0.255859375, + "learning_rate": 1.0281375763837598e-05, + "loss": 0.932, "step": 15145 }, { - "epoch": 0.43463916342719433, - "grad_norm": 0.380859375, - "learning_rate": 0.00013918933749878024, - "loss": 1.005, + "epoch": 0.8692409203052384, + "grad_norm": 0.2451171875, + "learning_rate": 1.0237193676373435e-05, + "loss": 0.9304, "step": 15150 }, { - "epoch": 0.43478260869565216, - "grad_norm": 0.41015625, - "learning_rate": 0.00013914326470951618, - "loss": 0.9634, + "epoch": 0.8695277984967583, + "grad_norm": 0.267578125, + "learning_rate": 1.019310160325273e-05, + "loss": 0.9592, "step": 15155 }, { - "epoch": 0.43492605396411, - "grad_norm": 0.42578125, - "learning_rate": 0.00013909718210576179, - "loss": 1.0169, + "epoch": 0.8698146766882782, + "grad_norm": 0.26171875, + "learning_rate": 1.0149099588691135e-05, + "loss": 0.9544, "step": 15160 }, { - "epoch": 0.4350694992325678, - "grad_norm": 0.42578125, - "learning_rate": 0.00013905108969907145, - "loss": 0.9786, + "epoch": 0.870101554879798, + "grad_norm": 0.275390625, + "learning_rate": 1.0105187676813954e-05, + "loss": 1.026, "step": 15165 }, { - "epoch": 0.43521294450102566, - "grad_norm": 0.36328125, - "learning_rate": 0.000139004987501002, - "loss": 0.9196, + "epoch": 0.870388433071318, + "grad_norm": 0.333984375, + "learning_rate": 1.006136591165614e-05, + "loss": 1.0247, "step": 15170 }, { - "epoch": 0.43535638976948343, - "grad_norm": 0.345703125, - "learning_rate": 0.00013895887552311284, - "loss": 0.9894, + "epoch": 0.8706753112628378, + "grad_norm": 0.26171875, + "learning_rate": 1.0017634337162275e-05, + "loss": 0.9566, "step": 15175 }, { - "epoch": 0.43549983503794126, - "grad_norm": 0.361328125, - "learning_rate": 0.0001389127537769657, - "loss": 0.8707, + "epoch": 0.8709621894543577, + "grad_norm": 0.26953125, + "learning_rate": 9.973992997186465e-06, + "loss": 0.9185, "step": 15180 }, { - "epoch": 0.4356432803063991, - "grad_norm": 0.3828125, - "learning_rate": 0.0001388666222741248, - "loss": 0.9646, + "epoch": 0.8712490676458775, + "grad_norm": 0.265625, + "learning_rate": 9.930441935492363e-06, + "loss": 0.9289, "step": 15185 }, { - "epoch": 0.4357867255748569, - "grad_norm": 0.37109375, - "learning_rate": 0.00013882048102615676, - "loss": 0.9041, + "epoch": 0.8715359458373975, + "grad_norm": 0.259765625, + "learning_rate": 9.88698119575302e-06, + "loss": 0.9605, "step": 15190 }, { - "epoch": 0.43593017084331476, - "grad_norm": 0.375, - "learning_rate": 0.0001387743300446308, - "loss": 0.8497, + "epoch": 0.8718228240289173, + "grad_norm": 0.265625, + "learning_rate": 9.843610821551053e-06, + "loss": 0.9547, "step": 15195 }, { - "epoch": 0.43607361611177253, - "grad_norm": 0.375, - "learning_rate": 0.00013872816934111838, - "loss": 0.988, + "epoch": 0.8721097022204372, + "grad_norm": 0.265625, + "learning_rate": 9.800330856378303e-06, + "loss": 0.9544, "step": 15200 }, { - "epoch": 0.43621706138023036, - "grad_norm": 0.357421875, - "learning_rate": 0.00013868199892719358, - "loss": 0.9253, + "epoch": 0.8723965804119571, + "grad_norm": 0.267578125, + "learning_rate": 9.757141343636e-06, + "loss": 0.9572, "step": 15205 }, { - "epoch": 0.4363605066486882, - "grad_norm": 0.427734375, - "learning_rate": 0.00013863581881443275, - "loss": 0.9306, + "epoch": 0.872683458603477, + "grad_norm": 0.29296875, + "learning_rate": 9.714042326634743e-06, + "loss": 0.9438, "step": 15210 }, { - "epoch": 0.436503951917146, - "grad_norm": 0.369140625, - "learning_rate": 0.00013858962901441474, - "loss": 1.03, + "epoch": 0.8729703367949968, + "grad_norm": 0.275390625, + "learning_rate": 9.671033848594301e-06, + "loss": 0.9435, "step": 15215 }, { - "epoch": 0.43664739718560386, - "grad_norm": 0.39453125, - "learning_rate": 0.0001385434295387209, - "loss": 0.8721, + "epoch": 0.8732572149865168, + "grad_norm": 0.2578125, + "learning_rate": 9.628115952643657e-06, + "loss": 0.995, "step": 15220 }, { - "epoch": 0.43679084245406163, - "grad_norm": 0.419921875, - "learning_rate": 0.0001384972203989349, - "loss": 0.9904, + "epoch": 0.8735440931780366, + "grad_norm": 0.35546875, + "learning_rate": 9.585288681820992e-06, + "loss": 0.997, "step": 15225 }, { - "epoch": 0.43693428772251947, - "grad_norm": 0.416015625, - "learning_rate": 0.00013845100160664287, - "loss": 0.9614, + "epoch": 0.8738309713695565, + "grad_norm": 0.271484375, + "learning_rate": 9.542552079073586e-06, + "loss": 0.9715, "step": 15230 }, { - "epoch": 0.4370777329909773, - "grad_norm": 0.412109375, - "learning_rate": 0.00013840477317343338, - "loss": 0.9085, + "epoch": 0.8741178495610764, + "grad_norm": 0.263671875, + "learning_rate": 9.499906187257768e-06, + "loss": 0.9445, "step": 15235 }, { - "epoch": 0.43722117825943513, - "grad_norm": 0.3828125, - "learning_rate": 0.0001383585351108974, - "loss": 1.0012, + "epoch": 0.8744047277525963, + "grad_norm": 0.2578125, + "learning_rate": 9.457351049138974e-06, + "loss": 0.9681, "step": 15240 }, { - "epoch": 0.43736462352789296, - "grad_norm": 0.4921875, - "learning_rate": 0.00013831228743062832, - "loss": 1.0841, + "epoch": 0.8746916059441161, + "grad_norm": 0.279296875, + "learning_rate": 9.414886707391613e-06, + "loss": 0.9653, "step": 15245 }, { - "epoch": 0.43750806879635074, - "grad_norm": 0.408203125, - "learning_rate": 0.00013826603014422192, - "loss": 0.9503, + "epoch": 0.8749784841356361, + "grad_norm": 0.27734375, + "learning_rate": 9.372513204598954e-06, + "loss": 0.8629, "step": 15250 }, { - "epoch": 0.43765151406480857, - "grad_norm": 0.41015625, - "learning_rate": 0.00013821976326327645, - "loss": 1.0835, + "epoch": 0.8752653623271559, + "grad_norm": 0.25, + "learning_rate": 9.330230583253263e-06, + "loss": 0.9324, "step": 15255 }, { - "epoch": 0.4377949593332664, - "grad_norm": 0.412109375, - "learning_rate": 0.0001381734867993925, - "loss": 1.0067, + "epoch": 0.8755522405186758, + "grad_norm": 0.2578125, + "learning_rate": 9.288038885755679e-06, + "loss": 0.9968, "step": 15260 }, { - "epoch": 0.43793840460172423, - "grad_norm": 0.38671875, - "learning_rate": 0.0001381272007641731, - "loss": 1.0134, + "epoch": 0.8758391187101956, + "grad_norm": 0.283203125, + "learning_rate": 9.245938154416112e-06, + "loss": 0.9753, "step": 15265 }, { - "epoch": 0.438081849870182, - "grad_norm": 0.404296875, - "learning_rate": 0.0001380809051692236, - "loss": 0.9812, + "epoch": 0.8761259969017156, + "grad_norm": 0.259765625, + "learning_rate": 9.203928431453269e-06, + "loss": 0.9914, "step": 15270 }, { - "epoch": 0.43822529513863984, - "grad_norm": 0.41015625, - "learning_rate": 0.00013803460002615188, - "loss": 1.1068, + "epoch": 0.8764128750932354, + "grad_norm": 0.255859375, + "learning_rate": 9.162009758994593e-06, + "loss": 0.9888, "step": 15275 }, { - "epoch": 0.43836874040709767, - "grad_norm": 0.380859375, - "learning_rate": 0.00013798828534656812, - "loss": 0.9444, + "epoch": 0.8766997532847552, + "grad_norm": 0.271484375, + "learning_rate": 9.12018217907622e-06, + "loss": 0.9092, "step": 15280 }, { - "epoch": 0.4385121856755555, - "grad_norm": 0.37890625, - "learning_rate": 0.00013794196114208493, - "loss": 0.9099, + "epoch": 0.8769866314762752, + "grad_norm": 0.283203125, + "learning_rate": 9.078445733642926e-06, + "loss": 0.9634, "step": 15285 }, { - "epoch": 0.43865563094401333, - "grad_norm": 0.3984375, - "learning_rate": 0.00013789562742431727, - "loss": 0.9219, + "epoch": 0.877273509667795, + "grad_norm": 0.2734375, + "learning_rate": 9.036800464548157e-06, + "loss": 0.9656, "step": 15290 }, { - "epoch": 0.4387990762124711, - "grad_norm": 0.416015625, - "learning_rate": 0.00013784928420488256, - "loss": 1.0304, + "epoch": 0.8775603878593149, + "grad_norm": 0.2578125, + "learning_rate": 8.995246413553871e-06, + "loss": 0.8617, "step": 15295 }, { - "epoch": 0.43894252148092894, - "grad_norm": 0.427734375, - "learning_rate": 0.00013780293149540053, - "loss": 0.9137, + "epoch": 0.8778472660508349, + "grad_norm": 0.341796875, + "learning_rate": 8.953783622330515e-06, + "loss": 0.925, "step": 15300 }, { - "epoch": 0.43908596674938677, - "grad_norm": 0.3671875, - "learning_rate": 0.00013775656930749327, - "loss": 0.9693, + "epoch": 0.8781341442423547, + "grad_norm": 0.27734375, + "learning_rate": 8.912412132457116e-06, + "loss": 0.9452, "step": 15305 }, { - "epoch": 0.4392294120178446, - "grad_norm": 0.357421875, - "learning_rate": 0.00013771019765278537, - "loss": 0.934, + "epoch": 0.8784210224338745, + "grad_norm": 0.26171875, + "learning_rate": 8.871131985421089e-06, + "loss": 0.9806, "step": 15310 }, { - "epoch": 0.43937285728630243, - "grad_norm": 0.3828125, - "learning_rate": 0.00013766381654290363, - "loss": 0.9039, + "epoch": 0.8787079006253945, + "grad_norm": 0.2578125, + "learning_rate": 8.829943222618242e-06, + "loss": 0.9902, "step": 15315 }, { - "epoch": 0.4395163025547602, - "grad_norm": 0.419921875, - "learning_rate": 0.00013761742598947734, - "loss": 0.9921, + "epoch": 0.8789947788169143, + "grad_norm": 0.265625, + "learning_rate": 8.788845885352782e-06, + "loss": 0.8548, "step": 15320 }, { - "epoch": 0.43965974782321804, - "grad_norm": 0.390625, - "learning_rate": 0.00013757102600413815, - "loss": 0.98, + "epoch": 0.8792816570084342, + "grad_norm": 0.248046875, + "learning_rate": 8.747840014837194e-06, + "loss": 0.9579, "step": 15325 }, { - "epoch": 0.43980319309167587, - "grad_norm": 0.373046875, - "learning_rate": 0.00013752461659852, - "loss": 0.9994, + "epoch": 0.8795685351999541, + "grad_norm": 0.275390625, + "learning_rate": 8.706925652192255e-06, + "loss": 0.9575, "step": 15330 }, { - "epoch": 0.4399466383601337, - "grad_norm": 0.408203125, - "learning_rate": 0.00013747819778425928, - "loss": 1.0342, + "epoch": 0.879855413391474, + "grad_norm": 0.2734375, + "learning_rate": 8.666102838446976e-06, + "loss": 0.9764, "step": 15335 }, { - "epoch": 0.44009008362859153, - "grad_norm": 0.3828125, - "learning_rate": 0.00013743176957299464, - "loss": 0.9217, + "epoch": 0.8801422915829938, + "grad_norm": 0.259765625, + "learning_rate": 8.625371614538591e-06, + "loss": 0.9222, "step": 15340 }, { - "epoch": 0.4402335288970493, - "grad_norm": 0.416015625, - "learning_rate": 0.0001373853319763672, - "loss": 0.9654, + "epoch": 0.8804291697745137, + "grad_norm": 0.2451171875, + "learning_rate": 8.584732021312469e-06, + "loss": 0.9307, "step": 15345 }, { - "epoch": 0.44037697416550714, - "grad_norm": 0.373046875, - "learning_rate": 0.00013733888500602038, - "loss": 0.9761, + "epoch": 0.8807160479660336, + "grad_norm": 0.310546875, + "learning_rate": 8.544184099522024e-06, + "loss": 0.9791, "step": 15350 }, { - "epoch": 0.44052041943396497, - "grad_norm": 0.37890625, - "learning_rate": 0.00013729242867359989, - "loss": 0.8837, + "epoch": 0.8810029261575535, + "grad_norm": 0.271484375, + "learning_rate": 8.50372788982886e-06, + "loss": 0.9455, "step": 15355 }, { - "epoch": 0.4406638647024228, - "grad_norm": 0.361328125, - "learning_rate": 0.00013724596299075388, - "loss": 1.0122, + "epoch": 0.8812898043490733, + "grad_norm": 0.25390625, + "learning_rate": 8.46336343280254e-06, + "loss": 0.8782, "step": 15360 }, { - "epoch": 0.44080730997088063, - "grad_norm": 0.34765625, - "learning_rate": 0.00013719948796913283, - "loss": 0.8826, + "epoch": 0.8815766825405933, + "grad_norm": 0.279296875, + "learning_rate": 8.423090768920628e-06, + "loss": 0.9239, "step": 15365 }, { - "epoch": 0.4409507552393384, - "grad_norm": 0.369140625, - "learning_rate": 0.0001371530036203895, - "loss": 1.0266, + "epoch": 0.8818635607321131, + "grad_norm": 0.279296875, + "learning_rate": 8.38290993856865e-06, + "loss": 0.9948, "step": 15370 }, { - "epoch": 0.44109420050779624, - "grad_norm": 0.359375, - "learning_rate": 0.00013710650995617902, - "loss": 0.8831, + "epoch": 0.882150438923633, + "grad_norm": 0.271484375, + "learning_rate": 8.342820982040011e-06, + "loss": 1.0173, "step": 15375 }, { - "epoch": 0.4412376457762541, - "grad_norm": 0.400390625, - "learning_rate": 0.00013706000698815893, - "loss": 0.9896, + "epoch": 0.8824373171151529, + "grad_norm": 0.263671875, + "learning_rate": 8.30282393953603e-06, + "loss": 0.9433, "step": 15380 }, { - "epoch": 0.4413810910447119, - "grad_norm": 0.427734375, - "learning_rate": 0.000137013494727989, - "loss": 0.9529, + "epoch": 0.8827241953066728, + "grad_norm": 0.251953125, + "learning_rate": 8.262918851165813e-06, + "loss": 0.8967, "step": 15385 }, { - "epoch": 0.44152453631316974, - "grad_norm": 0.390625, - "learning_rate": 0.00013696697318733134, - "loss": 0.9079, + "epoch": 0.8830110734981926, + "grad_norm": 0.26171875, + "learning_rate": 8.223105756946292e-06, + "loss": 0.9086, "step": 15390 }, { - "epoch": 0.4416679815816275, - "grad_norm": 0.3984375, - "learning_rate": 0.00013692044237785045, - "loss": 0.8783, + "epoch": 0.8832979516897126, + "grad_norm": 0.25, + "learning_rate": 8.183384696802132e-06, + "loss": 0.9384, "step": 15395 }, { - "epoch": 0.44181142685008534, - "grad_norm": 0.408203125, - "learning_rate": 0.00013687390231121314, - "loss": 0.9692, + "epoch": 0.8835848298812324, + "grad_norm": 0.279296875, + "learning_rate": 8.143755710565648e-06, + "loss": 0.9808, "step": 15400 }, { - "epoch": 0.4419548721185432, - "grad_norm": 0.408203125, - "learning_rate": 0.00013682735299908844, - "loss": 0.9314, + "epoch": 0.8838717080727523, + "grad_norm": 0.279296875, + "learning_rate": 8.10421883797694e-06, + "loss": 0.9723, "step": 15405 }, { - "epoch": 0.442098317387001, - "grad_norm": 0.39453125, - "learning_rate": 0.00013678079445314783, - "loss": 0.8762, + "epoch": 0.8841585862642722, + "grad_norm": 0.2353515625, + "learning_rate": 8.064774118683638e-06, + "loss": 0.8979, "step": 15410 }, { - "epoch": 0.44224176265545884, - "grad_norm": 0.37890625, - "learning_rate": 0.00013673422668506507, - "loss": 1.0317, + "epoch": 0.8844454644557921, + "grad_norm": 0.259765625, + "learning_rate": 8.025421592241012e-06, + "loss": 0.9903, "step": 15415 }, { - "epoch": 0.4423852079239166, - "grad_norm": 0.408203125, - "learning_rate": 0.00013668764970651615, - "loss": 0.913, + "epoch": 0.8847323426473119, + "grad_norm": 0.275390625, + "learning_rate": 7.98616129811185e-06, + "loss": 0.9551, "step": 15420 }, { - "epoch": 0.44252865319237444, - "grad_norm": 0.365234375, - "learning_rate": 0.00013664106352917954, - "loss": 0.8971, + "epoch": 0.8850192208388318, + "grad_norm": 0.28515625, + "learning_rate": 7.94699327566647e-06, + "loss": 0.995, "step": 15425 }, { - "epoch": 0.4426720984608323, - "grad_norm": 0.353515625, - "learning_rate": 0.0001365944681647358, - "loss": 1.0096, + "epoch": 0.8853060990303517, + "grad_norm": 0.267578125, + "learning_rate": 7.907917564182631e-06, + "loss": 0.8506, "step": 15430 }, { - "epoch": 0.4428155437292901, - "grad_norm": 0.421875, - "learning_rate": 0.00013654786362486795, - "loss": 0.9665, + "epoch": 0.8855929772218716, + "grad_norm": 0.259765625, + "learning_rate": 7.86893420284559e-06, + "loss": 0.8804, "step": 15435 }, { - "epoch": 0.4429589889977479, - "grad_norm": 0.42578125, - "learning_rate": 0.00013650124992126128, - "loss": 1.0466, + "epoch": 0.8858798554133914, + "grad_norm": 0.287109375, + "learning_rate": 7.830043230747918e-06, + "loss": 0.9101, "step": 15440 }, { - "epoch": 0.4431024342662057, - "grad_norm": 0.400390625, - "learning_rate": 0.00013645462706560337, - "loss": 0.942, + "epoch": 0.8861667336049114, + "grad_norm": 0.265625, + "learning_rate": 7.791244686889588e-06, + "loss": 0.9703, "step": 15445 }, { - "epoch": 0.44324587953466354, - "grad_norm": 0.419921875, - "learning_rate": 0.00013640799506958403, - "loss": 1.0345, + "epoch": 0.8864536117964312, + "grad_norm": 0.271484375, + "learning_rate": 7.752538610177817e-06, + "loss": 0.922, "step": 15450 }, { - "epoch": 0.4433893248031214, - "grad_norm": 0.39453125, - "learning_rate": 0.00013636135394489546, - "loss": 0.8735, + "epoch": 0.8867404899879511, + "grad_norm": 0.255859375, + "learning_rate": 7.713925039427206e-06, + "loss": 1.0184, "step": 15455 }, { - "epoch": 0.4435327700715792, - "grad_norm": 0.3828125, - "learning_rate": 0.00013631470370323214, - "loss": 1.0106, + "epoch": 0.887027368179471, + "grad_norm": 0.267578125, + "learning_rate": 7.67540401335951e-06, + "loss": 0.9566, "step": 15460 }, { - "epoch": 0.443676215340037, - "grad_norm": 0.375, - "learning_rate": 0.0001362680443562907, - "loss": 1.0346, + "epoch": 0.8873142463709909, + "grad_norm": 0.2734375, + "learning_rate": 7.636975570603689e-06, + "loss": 0.9193, "step": 15465 }, { - "epoch": 0.4438196606084948, - "grad_norm": 0.43359375, - "learning_rate": 0.0001362213759157703, - "loss": 0.9398, + "epoch": 0.8876011245625107, + "grad_norm": 0.26953125, + "learning_rate": 7.5986397496958796e-06, + "loss": 0.9882, "step": 15470 }, { - "epoch": 0.44396310587695265, - "grad_norm": 0.400390625, - "learning_rate": 0.00013617469839337213, - "loss": 1.0315, + "epoch": 0.8878880027540307, + "grad_norm": 0.27734375, + "learning_rate": 7.560396589079322e-06, + "loss": 0.9726, "step": 15475 }, { - "epoch": 0.4441065511454105, - "grad_norm": 0.39453125, - "learning_rate": 0.0001361280118007998, - "loss": 1.0261, + "epoch": 0.8881748809455505, + "grad_norm": 0.2578125, + "learning_rate": 7.522246127104348e-06, + "loss": 0.9203, "step": 15480 }, { - "epoch": 0.4442499964138683, - "grad_norm": 0.388671875, - "learning_rate": 0.00013608131614975915, - "loss": 0.9246, + "epoch": 0.8884617591370704, + "grad_norm": 0.236328125, + "learning_rate": 7.484188402028336e-06, + "loss": 0.9681, "step": 15485 }, { - "epoch": 0.4443934416823261, - "grad_norm": 0.3984375, - "learning_rate": 0.0001360346114519583, - "loss": 0.9839, + "epoch": 0.8887486373285903, + "grad_norm": 0.26953125, + "learning_rate": 7.446223452015644e-06, + "loss": 0.972, "step": 15490 }, { - "epoch": 0.4445368869507839, - "grad_norm": 0.384765625, - "learning_rate": 0.00013598789771910764, - "loss": 0.9258, + "epoch": 0.8890355155201102, + "grad_norm": 0.265625, + "learning_rate": 7.40835131513764e-06, + "loss": 1.0296, "step": 15495 }, { - "epoch": 0.44468033221924175, - "grad_norm": 0.43359375, - "learning_rate": 0.0001359411749629198, - "loss": 1.0225, + "epoch": 0.88932239371163, + "grad_norm": 0.25390625, + "learning_rate": 7.3705720293725245e-06, + "loss": 0.9597, "step": 15500 }, { - "epoch": 0.4448237774876996, - "grad_norm": 0.412109375, - "learning_rate": 0.00013589444319510975, - "loss": 0.965, + "epoch": 0.8896092719031499, + "grad_norm": 0.255859375, + "learning_rate": 7.332885632605513e-06, + "loss": 0.8605, "step": 15505 }, { - "epoch": 0.4449672227561574, - "grad_norm": 0.38671875, - "learning_rate": 0.0001358477024273946, - "loss": 0.9954, + "epoch": 0.8898961500946698, + "grad_norm": 0.248046875, + "learning_rate": 7.295292162628575e-06, + "loss": 0.9628, "step": 15510 }, { - "epoch": 0.4451106680246152, - "grad_norm": 0.416015625, - "learning_rate": 0.00013580095267149385, - "loss": 0.9487, + "epoch": 0.8901830282861897, + "grad_norm": 0.271484375, + "learning_rate": 7.257791657140545e-06, + "loss": 0.9441, "step": 15515 }, { - "epoch": 0.445254113293073, - "grad_norm": 0.384765625, - "learning_rate": 0.0001357541939391291, - "loss": 0.9146, + "epoch": 0.8904699064777095, + "grad_norm": 0.26171875, + "learning_rate": 7.220384153746995e-06, + "loss": 0.9413, "step": 15520 }, { - "epoch": 0.44539755856153085, - "grad_norm": 0.4140625, - "learning_rate": 0.0001357074262420244, - "loss": 1.0257, + "epoch": 0.8907567846692295, + "grad_norm": 0.26171875, + "learning_rate": 7.183069689960265e-06, + "loss": 0.9683, "step": 15525 }, { - "epoch": 0.4455410038299887, - "grad_norm": 0.40625, - "learning_rate": 0.00013566064959190583, - "loss": 0.9907, + "epoch": 0.8910436628607493, + "grad_norm": 0.2890625, + "learning_rate": 7.145848303199365e-06, + "loss": 0.9836, "step": 15530 }, { - "epoch": 0.4456844490984465, - "grad_norm": 0.396484375, - "learning_rate": 0.00013561386400050187, - "loss": 1.0162, + "epoch": 0.8913305410522692, + "grad_norm": 0.26171875, + "learning_rate": 7.108720030790028e-06, + "loss": 0.9379, "step": 15535 }, { - "epoch": 0.4458278943669043, - "grad_norm": 0.435546875, - "learning_rate": 0.0001355670694795432, - "loss": 0.973, + "epoch": 0.8916174192437891, + "grad_norm": 0.291015625, + "learning_rate": 7.071684909964526e-06, + "loss": 0.9567, "step": 15540 }, { - "epoch": 0.4459713396353621, - "grad_norm": 0.380859375, - "learning_rate": 0.00013552026604076267, - "loss": 0.9675, + "epoch": 0.891904297435309, + "grad_norm": 0.26171875, + "learning_rate": 7.034742977861786e-06, + "loss": 0.9171, "step": 15545 }, { - "epoch": 0.44611478490381995, - "grad_norm": 0.482421875, - "learning_rate": 0.0001354734536958955, - "loss": 0.9671, + "epoch": 0.8921911756268288, + "grad_norm": 0.26953125, + "learning_rate": 6.99789427152725e-06, + "loss": 0.9089, "step": 15550 }, { - "epoch": 0.4462582301722778, - "grad_norm": 0.416015625, - "learning_rate": 0.00013542663245667898, - "loss": 0.9556, + "epoch": 0.8924780538183488, + "grad_norm": 0.251953125, + "learning_rate": 6.9611388279128835e-06, + "loss": 0.9761, "step": 15555 }, { - "epoch": 0.4464016754407356, - "grad_norm": 0.392578125, - "learning_rate": 0.0001353798023348528, - "loss": 0.9214, + "epoch": 0.8927649320098686, + "grad_norm": 0.259765625, + "learning_rate": 6.9244766838771235e-06, + "loss": 0.9449, "step": 15560 }, { - "epoch": 0.4465451207091934, - "grad_norm": 0.37109375, - "learning_rate": 0.00013533296334215878, - "loss": 0.9421, + "epoch": 0.8930518102013885, + "grad_norm": 0.2734375, + "learning_rate": 6.887907876184862e-06, + "loss": 0.9312, "step": 15565 }, { - "epoch": 0.4466885659776512, - "grad_norm": 0.353515625, - "learning_rate": 0.00013528611549034096, - "loss": 0.9066, + "epoch": 0.8933386883929084, + "grad_norm": 0.25, + "learning_rate": 6.851432441507377e-06, + "loss": 0.9072, "step": 15570 }, { - "epoch": 0.44683201124610905, - "grad_norm": 0.390625, - "learning_rate": 0.0001352392587911456, - "loss": 0.9625, + "epoch": 0.8936255665844283, + "grad_norm": 0.263671875, + "learning_rate": 6.8150504164223085e-06, + "loss": 0.9976, "step": 15575 }, { - "epoch": 0.4469754565145669, - "grad_norm": 0.400390625, - "learning_rate": 0.00013519239325632124, - "loss": 1.016, + "epoch": 0.8939124447759481, + "grad_norm": 0.27734375, + "learning_rate": 6.778761837413627e-06, + "loss": 0.9691, "step": 15580 }, { - "epoch": 0.4471189017830247, - "grad_norm": 0.404296875, - "learning_rate": 0.0001351455188976186, - "loss": 0.9227, + "epoch": 0.894199322967468, + "grad_norm": 0.27734375, + "learning_rate": 6.742566740871625e-06, + "loss": 0.9596, "step": 15585 }, { - "epoch": 0.4472623470514825, - "grad_norm": 0.423828125, - "learning_rate": 0.00013509863572679057, - "loss": 0.9472, + "epoch": 0.8944862011589879, + "grad_norm": 0.2734375, + "learning_rate": 6.706465163092823e-06, + "loss": 0.9941, "step": 15590 }, { - "epoch": 0.4474057923199403, - "grad_norm": 0.375, - "learning_rate": 0.00013505174375559232, - "loss": 1.0017, + "epoch": 0.8947730793505078, + "grad_norm": 0.2578125, + "learning_rate": 6.67045714027994e-06, + "loss": 0.9539, "step": 15595 }, { - "epoch": 0.44754923758839815, - "grad_norm": 0.43359375, - "learning_rate": 0.00013500484299578116, - "loss": 0.9489, + "epoch": 0.8950599575420276, + "grad_norm": 0.267578125, + "learning_rate": 6.634542708541935e-06, + "loss": 0.931, "step": 15600 }, { - "epoch": 0.447692682856856, - "grad_norm": 0.369140625, - "learning_rate": 0.00013495793345911667, - "loss": 0.9339, + "epoch": 0.8953468357335476, + "grad_norm": 0.30078125, + "learning_rate": 6.5987219038938455e-06, + "loss": 0.9465, "step": 15605 }, { - "epoch": 0.44783612812531376, - "grad_norm": 0.359375, - "learning_rate": 0.00013491101515736057, - "loss": 0.9158, + "epoch": 0.8956337139250674, + "grad_norm": 0.251953125, + "learning_rate": 6.562994762256869e-06, + "loss": 0.8939, "step": 15610 }, { - "epoch": 0.4479795733937716, - "grad_norm": 0.400390625, - "learning_rate": 0.0001348640881022768, - "loss": 1.1173, + "epoch": 0.8959205921165873, + "grad_norm": 0.25, + "learning_rate": 6.527361319458292e-06, + "loss": 0.932, "step": 15615 }, { - "epoch": 0.4481230186622294, - "grad_norm": 0.361328125, - "learning_rate": 0.00013481715230563153, - "loss": 0.9912, + "epoch": 0.8962074703081072, + "grad_norm": 0.2578125, + "learning_rate": 6.491821611231364e-06, + "loss": 0.9207, "step": 15620 }, { - "epoch": 0.44826646393068725, - "grad_norm": 0.388671875, - "learning_rate": 0.0001347702077791931, - "loss": 0.9716, + "epoch": 0.8964943484996271, + "grad_norm": 0.2890625, + "learning_rate": 6.456375673215409e-06, + "loss": 1.001, "step": 15625 }, { - "epoch": 0.4484099091991451, - "grad_norm": 0.39453125, - "learning_rate": 0.00013472325453473197, - "loss": 0.8893, + "epoch": 0.8967812266911469, + "grad_norm": 0.2578125, + "learning_rate": 6.421023540955684e-06, + "loss": 0.8778, "step": 15630 }, { - "epoch": 0.44855335446760286, - "grad_norm": 0.39453125, - "learning_rate": 0.00013467629258402092, - "loss": 0.9706, + "epoch": 0.8970681048826669, + "grad_norm": 0.2734375, + "learning_rate": 6.3857652499033974e-06, + "loss": 0.8923, "step": 15635 }, { - "epoch": 0.4486967997360607, - "grad_norm": 0.359375, - "learning_rate": 0.00013462932193883482, - "loss": 0.9437, + "epoch": 0.8973549830741867, + "grad_norm": 0.259765625, + "learning_rate": 6.350600835415632e-06, + "loss": 0.9465, "step": 15640 }, { - "epoch": 0.4488402450045185, - "grad_norm": 0.390625, - "learning_rate": 0.00013458234261095068, - "loss": 0.95, + "epoch": 0.8976418612657066, + "grad_norm": 0.263671875, + "learning_rate": 6.31553033275536e-06, + "loss": 0.9561, "step": 15645 }, { - "epoch": 0.44898369027297635, - "grad_norm": 0.36328125, - "learning_rate": 0.0001345353546121478, - "loss": 0.8485, + "epoch": 0.8979287394572265, + "grad_norm": 0.279296875, + "learning_rate": 6.2805537770913356e-06, + "loss": 0.9631, "step": 15650 }, { - "epoch": 0.4491271355414342, - "grad_norm": 0.375, - "learning_rate": 0.00013448835795420763, - "loss": 0.9353, + "epoch": 0.8982156176487464, + "grad_norm": 0.251953125, + "learning_rate": 6.245671203498149e-06, + "loss": 0.9159, "step": 15655 }, { - "epoch": 0.44927058080989196, - "grad_norm": 0.37109375, - "learning_rate": 0.00013444135264891371, - "loss": 0.9526, + "epoch": 0.8985024958402662, + "grad_norm": 0.259765625, + "learning_rate": 6.210882646956084e-06, + "loss": 0.894, "step": 15660 }, { - "epoch": 0.4494140260783498, - "grad_norm": 0.357421875, - "learning_rate": 0.00013439433870805181, - "loss": 0.9132, + "epoch": 0.8987893740317862, + "grad_norm": 0.265625, + "learning_rate": 6.176188142351247e-06, + "loss": 0.9014, "step": 15665 }, { - "epoch": 0.4495574713468076, - "grad_norm": 0.384765625, - "learning_rate": 0.00013434731614340984, - "loss": 0.8953, + "epoch": 0.899076252223306, + "grad_norm": 0.2890625, + "learning_rate": 6.141587724475317e-06, + "loss": 0.9285, "step": 15670 }, { - "epoch": 0.44970091661526546, - "grad_norm": 0.3671875, - "learning_rate": 0.00013430028496677792, - "loss": 0.9532, + "epoch": 0.8993631304148259, + "grad_norm": 0.2578125, + "learning_rate": 6.107081428025674e-06, + "loss": 0.9748, "step": 15675 }, { - "epoch": 0.4498443618837233, - "grad_norm": 0.3984375, - "learning_rate": 0.00013425324518994826, - "loss": 0.866, + "epoch": 0.8996500086063457, + "grad_norm": 0.267578125, + "learning_rate": 6.072669287605326e-06, + "loss": 1.0066, "step": 15680 }, { - "epoch": 0.44998780715218106, - "grad_norm": 0.359375, - "learning_rate": 0.00013420619682471532, - "loss": 1.0409, + "epoch": 0.8999368867978657, + "grad_norm": 0.271484375, + "learning_rate": 6.038351337722836e-06, + "loss": 0.9463, "step": 15685 }, { - "epoch": 0.4501312524206389, - "grad_norm": 0.44140625, - "learning_rate": 0.0001341591398828756, - "loss": 0.9839, + "epoch": 0.9002237649893855, + "grad_norm": 0.271484375, + "learning_rate": 6.004127612792332e-06, + "loss": 0.8783, "step": 15690 }, { - "epoch": 0.4502746976890967, - "grad_norm": 0.443359375, - "learning_rate": 0.00013411207437622778, - "loss": 1.0468, + "epoch": 0.9005106431809053, + "grad_norm": 0.265625, + "learning_rate": 5.969998147133415e-06, + "loss": 0.9672, "step": 15695 }, { - "epoch": 0.45041814295755456, - "grad_norm": 0.380859375, - "learning_rate": 0.00013406500031657283, - "loss": 1.0069, + "epoch": 0.9007975213724253, + "grad_norm": 0.251953125, + "learning_rate": 5.935962974971221e-06, + "loss": 0.9173, "step": 15700 }, { - "epoch": 0.4505615882260124, - "grad_norm": 0.390625, - "learning_rate": 0.00013401791771571367, - "loss": 0.8444, + "epoch": 0.9010843995639451, + "grad_norm": 0.2734375, + "learning_rate": 5.9020221304362686e-06, + "loss": 0.9124, "step": 15705 }, { - "epoch": 0.45070503349447016, - "grad_norm": 0.380859375, - "learning_rate": 0.00013397082658545543, - "loss": 1.0412, + "epoch": 0.901371277755465, + "grad_norm": 0.2578125, + "learning_rate": 5.868175647564522e-06, + "loss": 0.9556, "step": 15710 }, { - "epoch": 0.450848478762928, - "grad_norm": 0.396484375, - "learning_rate": 0.00013392372693760542, - "loss": 0.9008, + "epoch": 0.901658155946985, + "grad_norm": 0.27734375, + "learning_rate": 5.834423560297353e-06, + "loss": 0.9977, "step": 15715 }, { - "epoch": 0.4509919240313858, - "grad_norm": 0.3828125, - "learning_rate": 0.00013387661878397307, - "loss": 0.9183, + "epoch": 0.9019450341385048, + "grad_norm": 0.263671875, + "learning_rate": 5.800765902481364e-06, + "loss": 0.9459, "step": 15720 }, { - "epoch": 0.45113536929984366, - "grad_norm": 0.373046875, - "learning_rate": 0.00013382950213636986, - "loss": 0.9291, + "epoch": 0.9022319123300246, + "grad_norm": 0.251953125, + "learning_rate": 5.767202707868558e-06, + "loss": 0.9167, "step": 15725 }, { - "epoch": 0.4512788145683015, - "grad_norm": 0.404296875, - "learning_rate": 0.00013378237700660957, - "loss": 0.9344, + "epoch": 0.9025187905215446, + "grad_norm": 0.251953125, + "learning_rate": 5.733734010116188e-06, + "loss": 0.9177, "step": 15730 }, { - "epoch": 0.45142225983675927, - "grad_norm": 0.365234375, - "learning_rate": 0.00013373524340650787, - "loss": 0.9206, + "epoch": 0.9028056687130644, + "grad_norm": 0.279296875, + "learning_rate": 5.700359842786729e-06, + "loss": 0.8644, "step": 15735 }, { - "epoch": 0.4515657051052171, - "grad_norm": 0.38671875, - "learning_rate": 0.00013368810134788278, - "loss": 1.0388, + "epoch": 0.9030925469045843, + "grad_norm": 0.267578125, + "learning_rate": 5.667080239347889e-06, + "loss": 0.9814, "step": 15740 }, { - "epoch": 0.45170915037367493, - "grad_norm": 0.4609375, - "learning_rate": 0.00013364095084255435, - "loss": 0.8681, + "epoch": 0.9033794250961042, + "grad_norm": 0.26953125, + "learning_rate": 5.633895233172504e-06, + "loss": 0.9664, "step": 15745 }, { - "epoch": 0.45185259564213276, - "grad_norm": 0.34765625, - "learning_rate": 0.00013359379190234472, - "loss": 0.8895, + "epoch": 0.9036663032876241, + "grad_norm": 0.2734375, + "learning_rate": 5.600804857538588e-06, + "loss": 0.9869, "step": 15750 }, { - "epoch": 0.4519960409105906, - "grad_norm": 0.38671875, - "learning_rate": 0.00013354662453907815, - "loss": 0.9577, + "epoch": 0.9039531814791439, + "grad_norm": 0.259765625, + "learning_rate": 5.567809145629244e-06, + "loss": 0.9679, "step": 15755 }, { - "epoch": 0.45213948617904837, - "grad_norm": 0.361328125, - "learning_rate": 0.00013349944876458108, - "loss": 0.8972, + "epoch": 0.9042400596706638, + "grad_norm": 0.263671875, + "learning_rate": 5.534908130532623e-06, + "loss": 0.9669, "step": 15760 }, { - "epoch": 0.4522829314475062, - "grad_norm": 0.36328125, - "learning_rate": 0.00013345226459068204, - "loss": 0.8627, + "epoch": 0.9045269378621837, + "grad_norm": 0.27734375, + "learning_rate": 5.50210184524198e-06, + "loss": 0.9085, "step": 15765 }, { - "epoch": 0.45242637671596403, - "grad_norm": 0.421875, - "learning_rate": 0.00013340507202921152, - "loss": 0.9762, + "epoch": 0.9048138160537036, + "grad_norm": 0.263671875, + "learning_rate": 5.469390322655498e-06, + "loss": 0.8902, "step": 15770 }, { - "epoch": 0.45256982198442186, - "grad_norm": 0.4140625, - "learning_rate": 0.00013335787109200229, - "loss": 0.9128, + "epoch": 0.9051006942452234, + "grad_norm": 0.251953125, + "learning_rate": 5.436773595576361e-06, + "loss": 0.8946, "step": 15775 }, { - "epoch": 0.45271326725287964, - "grad_norm": 0.353515625, - "learning_rate": 0.0001333106617908892, - "loss": 0.958, + "epoch": 0.9053875724367434, + "grad_norm": 0.263671875, + "learning_rate": 5.404251696712714e-06, + "loss": 0.9744, "step": 15780 }, { - "epoch": 0.45285671252133747, - "grad_norm": 0.419921875, - "learning_rate": 0.00013326344413770911, - "loss": 1.0367, + "epoch": 0.9056744506282632, + "grad_norm": 0.265625, + "learning_rate": 5.371824658677594e-06, + "loss": 1.0072, "step": 15785 }, { - "epoch": 0.4530001577897953, - "grad_norm": 0.373046875, - "learning_rate": 0.00013321621814430106, - "loss": 0.9742, + "epoch": 0.9059613288197831, + "grad_norm": 0.248046875, + "learning_rate": 5.339492513988897e-06, + "loss": 0.9326, "step": 15790 }, { - "epoch": 0.45314360305825313, - "grad_norm": 0.38671875, - "learning_rate": 0.00013316898382250612, - "loss": 1.0466, + "epoch": 0.906248207011303, + "grad_norm": 0.25, + "learning_rate": 5.307255295069369e-06, + "loss": 0.9251, "step": 15795 }, { - "epoch": 0.45328704832671096, - "grad_norm": 0.43359375, - "learning_rate": 0.0001331217411841675, - "loss": 0.8867, + "epoch": 0.9065350852028229, + "grad_norm": 0.2734375, + "learning_rate": 5.275113034246571e-06, + "loss": 1.0251, "step": 15800 }, { - "epoch": 0.45343049359516874, - "grad_norm": 0.419921875, - "learning_rate": 0.0001330744902411304, - "loss": 0.8838, + "epoch": 0.9068219633943427, + "grad_norm": 0.263671875, + "learning_rate": 5.243065763752819e-06, + "loss": 0.9648, "step": 15805 }, { - "epoch": 0.45357393886362657, - "grad_norm": 0.392578125, - "learning_rate": 0.00013302723100524222, - "loss": 0.9396, + "epoch": 0.9071088415858627, + "grad_norm": 0.28515625, + "learning_rate": 5.2111135157252076e-06, + "loss": 0.9644, "step": 15810 }, { - "epoch": 0.4537173841320844, - "grad_norm": 0.376953125, - "learning_rate": 0.0001329799634883524, - "loss": 0.8692, + "epoch": 0.9073957197773825, + "grad_norm": 0.28125, + "learning_rate": 5.179256322205539e-06, + "loss": 0.9843, "step": 15815 }, { - "epoch": 0.45386082940054223, - "grad_norm": 0.40234375, - "learning_rate": 0.0001329326877023124, - "loss": 1.0948, + "epoch": 0.9076825979689024, + "grad_norm": 0.32421875, + "learning_rate": 5.147494215140236e-06, + "loss": 0.9976, "step": 15820 }, { - "epoch": 0.45400427466900006, - "grad_norm": 0.361328125, - "learning_rate": 0.00013288540365897586, - "loss": 0.9997, + "epoch": 0.9079694761604223, + "grad_norm": 0.2470703125, + "learning_rate": 5.115827226380421e-06, + "loss": 0.8557, "step": 15825 }, { - "epoch": 0.45414771993745784, - "grad_norm": 0.400390625, - "learning_rate": 0.00013283811137019836, - "loss": 0.9243, + "epoch": 0.9082563543519422, + "grad_norm": 0.279296875, + "learning_rate": 5.084255387681836e-06, + "loss": 0.9765, "step": 15830 }, { - "epoch": 0.45429116520591567, - "grad_norm": 0.38671875, - "learning_rate": 0.0001327908108478377, - "loss": 0.9117, + "epoch": 0.908543232543462, + "grad_norm": 0.3046875, + "learning_rate": 5.052778730704788e-06, + "loss": 0.9705, "step": 15835 }, { - "epoch": 0.4544346104743735, - "grad_norm": 0.43359375, - "learning_rate": 0.00013274350210375357, - "loss": 0.979, + "epoch": 0.9088301107349819, + "grad_norm": 0.26953125, + "learning_rate": 5.021397287014129e-06, + "loss": 1.0047, "step": 15840 }, { - "epoch": 0.45457805574283133, - "grad_norm": 0.41015625, - "learning_rate": 0.00013269618514980783, - "loss": 0.8773, + "epoch": 0.9091169889265018, + "grad_norm": 0.263671875, + "learning_rate": 4.990111088079263e-06, + "loss": 0.92, "step": 15845 }, { - "epoch": 0.45472150101128916, - "grad_norm": 0.46484375, - "learning_rate": 0.00013264885999786442, - "loss": 0.9968, + "epoch": 0.9094038671180217, + "grad_norm": 0.265625, + "learning_rate": 4.958920165274039e-06, + "loss": 1.0487, "step": 15850 }, { - "epoch": 0.45486494627974694, - "grad_norm": 0.47265625, - "learning_rate": 0.00013260152665978928, - "loss": 0.9838, + "epoch": 0.9096907453095415, + "grad_norm": 0.287109375, + "learning_rate": 4.92782454987678e-06, + "loss": 0.9803, "step": 15855 }, { - "epoch": 0.45500839154820477, - "grad_norm": 0.44921875, - "learning_rate": 0.0001325541851474504, - "loss": 0.9736, + "epoch": 0.9099776235010615, + "grad_norm": 0.251953125, + "learning_rate": 4.896824273070255e-06, + "loss": 0.8652, "step": 15860 }, { - "epoch": 0.4551518368166626, - "grad_norm": 0.3671875, - "learning_rate": 0.00013250683547271787, - "loss": 0.9788, + "epoch": 0.9102645016925813, + "grad_norm": 0.267578125, + "learning_rate": 4.865919365941629e-06, + "loss": 1.0166, "step": 15865 }, { - "epoch": 0.45529528208512043, - "grad_norm": 0.408203125, - "learning_rate": 0.0001324594776474638, - "loss": 0.9763, + "epoch": 0.9105513798841012, + "grad_norm": 0.265625, + "learning_rate": 4.8351098594823674e-06, + "loss": 0.9448, "step": 15870 }, { - "epoch": 0.45543872735357827, - "grad_norm": 0.400390625, - "learning_rate": 0.00013241211168356224, - "loss": 0.9745, + "epoch": 0.9108382580756211, + "grad_norm": 0.2451171875, + "learning_rate": 4.804395784588334e-06, + "loss": 0.9409, "step": 15875 }, { - "epoch": 0.45558217262203604, - "grad_norm": 0.423828125, - "learning_rate": 0.0001323647375928895, - "loss": 0.9815, + "epoch": 0.911125136267141, + "grad_norm": 0.25, + "learning_rate": 4.77377717205969e-06, + "loss": 0.9747, "step": 15880 }, { - "epoch": 0.4557256178904939, - "grad_norm": 0.41015625, - "learning_rate": 0.00013231735538732375, - "loss": 1.0018, + "epoch": 0.9114120144586608, + "grad_norm": 0.2470703125, + "learning_rate": 4.7432540526008205e-06, + "loss": 0.883, "step": 15885 }, { - "epoch": 0.4558690631589517, - "grad_norm": 0.41015625, - "learning_rate": 0.00013226996507874526, - "loss": 1.0327, + "epoch": 0.9116988926501808, + "grad_norm": 0.279296875, + "learning_rate": 4.712826456820385e-06, + "loss": 0.9437, "step": 15890 }, { - "epoch": 0.45601250842740954, - "grad_norm": 0.396484375, - "learning_rate": 0.00013222256667903633, - "loss": 0.9015, + "epoch": 0.9119857708417006, + "grad_norm": 0.29296875, + "learning_rate": 4.682494415231253e-06, + "loss": 0.8496, "step": 15895 }, { - "epoch": 0.45615595369586737, - "grad_norm": 0.3671875, - "learning_rate": 0.00013217516020008128, - "loss": 1.0641, + "epoch": 0.9122726490332205, + "grad_norm": 0.265625, + "learning_rate": 4.652257958250461e-06, + "loss": 0.9518, "step": 15900 }, { - "epoch": 0.45629939896432514, - "grad_norm": 0.396484375, - "learning_rate": 0.00013212774565376644, - "loss": 1.0668, + "epoch": 0.9125595272247404, + "grad_norm": 0.255859375, + "learning_rate": 4.6221171161991874e-06, + "loss": 0.9091, "step": 15905 }, { - "epoch": 0.456442844232783, - "grad_norm": 0.373046875, - "learning_rate": 0.00013208032305198018, - "loss": 0.9696, + "epoch": 0.9128464054162603, + "grad_norm": 0.2734375, + "learning_rate": 4.592071919302743e-06, + "loss": 0.9391, "step": 15910 }, { - "epoch": 0.4565862895012408, - "grad_norm": 0.373046875, - "learning_rate": 0.00013203289240661293, - "loss": 0.9469, + "epoch": 0.9131332836077801, + "grad_norm": 0.2734375, + "learning_rate": 4.562122397690538e-06, + "loss": 0.9312, "step": 15915 }, { - "epoch": 0.45672973476969864, - "grad_norm": 0.41796875, - "learning_rate": 0.00013198545372955706, - "loss": 1.0336, + "epoch": 0.9134201617993, + "grad_norm": 0.259765625, + "learning_rate": 4.532268581395982e-06, + "loss": 0.889, "step": 15920 }, { - "epoch": 0.45687318003815647, - "grad_norm": 0.39453125, - "learning_rate": 0.000131938007032707, - "loss": 0.9679, + "epoch": 0.9137070399908199, + "grad_norm": 0.265625, + "learning_rate": 4.502510500356571e-06, + "loss": 0.909, "step": 15925 }, { - "epoch": 0.45701662530661424, - "grad_norm": 0.3984375, - "learning_rate": 0.00013189055232795915, - "loss": 0.9963, + "epoch": 0.9139939181823398, + "grad_norm": 0.26171875, + "learning_rate": 4.472848184413769e-06, + "loss": 0.9407, "step": 15930 }, { - "epoch": 0.4571600705750721, - "grad_norm": 0.455078125, - "learning_rate": 0.00013184308962721202, - "loss": 1.0001, + "epoch": 0.9142807963738596, + "grad_norm": 0.259765625, + "learning_rate": 4.443281663313026e-06, + "loss": 0.9368, "step": 15935 }, { - "epoch": 0.4573035158435299, - "grad_norm": 0.443359375, - "learning_rate": 0.00013179561894236598, - "loss": 1.101, + "epoch": 0.9145676745653796, + "grad_norm": 0.26953125, + "learning_rate": 4.413810966703702e-06, + "loss": 1.0034, "step": 15940 }, { - "epoch": 0.45744696111198774, - "grad_norm": 0.396484375, - "learning_rate": 0.0001317481402853235, - "loss": 0.9786, + "epoch": 0.9148545527568994, + "grad_norm": 0.26953125, + "learning_rate": 4.3844361241390795e-06, + "loss": 0.8068, "step": 15945 }, { - "epoch": 0.4575904063804455, - "grad_norm": 0.349609375, - "learning_rate": 0.00013170065366798907, - "loss": 0.9263, + "epoch": 0.9151414309484193, + "grad_norm": 0.251953125, + "learning_rate": 4.355157165076318e-06, + "loss": 0.8796, "step": 15950 }, { - "epoch": 0.45773385164890334, - "grad_norm": 0.400390625, - "learning_rate": 0.00013165315910226906, - "loss": 0.897, + "epoch": 0.9154283091399392, + "grad_norm": 0.251953125, + "learning_rate": 4.325974118876408e-06, + "loss": 0.9913, "step": 15955 }, { - "epoch": 0.4578772969173612, - "grad_norm": 0.375, - "learning_rate": 0.00013160565660007195, - "loss": 0.9934, + "epoch": 0.9157151873314591, + "grad_norm": 0.251953125, + "learning_rate": 4.296887014804207e-06, + "loss": 0.9241, "step": 15960 }, { - "epoch": 0.458020742185819, - "grad_norm": 0.39453125, - "learning_rate": 0.00013155814617330812, - "loss": 0.8292, + "epoch": 0.9160020655229789, + "grad_norm": 0.26953125, + "learning_rate": 4.267895882028328e-06, + "loss": 0.9562, "step": 15965 }, { - "epoch": 0.45816418745427684, - "grad_norm": 0.388671875, - "learning_rate": 0.00013151062783389007, - "loss": 0.9074, + "epoch": 0.9162889437144989, + "grad_norm": 0.25390625, + "learning_rate": 4.239000749621092e-06, + "loss": 0.9201, "step": 15970 }, { - "epoch": 0.4583076327227346, - "grad_norm": 0.388671875, - "learning_rate": 0.00013146310159373208, - "loss": 0.8615, + "epoch": 0.9165758219060187, + "grad_norm": 0.25, + "learning_rate": 4.210201646558653e-06, + "loss": 0.9493, "step": 15975 }, { - "epoch": 0.45845107799119245, - "grad_norm": 0.435546875, - "learning_rate": 0.00013141556746475058, - "loss": 0.9703, + "epoch": 0.9168627000975386, + "grad_norm": 0.255859375, + "learning_rate": 4.181498601720801e-06, + "loss": 0.9421, "step": 15980 }, { - "epoch": 0.4585945232596503, - "grad_norm": 0.345703125, - "learning_rate": 0.00013136802545886392, - "loss": 0.9841, + "epoch": 0.9171495782890585, + "grad_norm": 0.2734375, + "learning_rate": 4.15289164389101e-06, + "loss": 0.9286, "step": 15985 }, { - "epoch": 0.4587379685281081, - "grad_norm": 0.40234375, - "learning_rate": 0.00013132047558799242, - "loss": 0.9246, + "epoch": 0.9174364564805784, + "grad_norm": 0.279296875, + "learning_rate": 4.124380801756411e-06, + "loss": 0.9408, "step": 15990 }, { - "epoch": 0.45888141379656594, - "grad_norm": 0.388671875, - "learning_rate": 0.00013127291786405843, - "loss": 0.9905, + "epoch": 0.9177233346720982, + "grad_norm": 0.275390625, + "learning_rate": 4.095966103907723e-06, + "loss": 1.0005, "step": 15995 }, { - "epoch": 0.4590248590650237, - "grad_norm": 0.921875, - "learning_rate": 0.00013122535229898613, - "loss": 1.0051, + "epoch": 0.9180102128636181, + "grad_norm": 0.25390625, + "learning_rate": 4.0676475788392845e-06, + "loss": 0.9564, "step": 16000 }, { - "epoch": 0.45916830433348155, - "grad_norm": 0.384765625, - "learning_rate": 0.00013117777890470184, - "loss": 0.9296, + "epoch": 0.918297091055138, + "grad_norm": 0.28125, + "learning_rate": 4.039425254948958e-06, + "loss": 0.9974, "step": 16005 }, { - "epoch": 0.4593117496019394, - "grad_norm": 0.41015625, - "learning_rate": 0.0001311301976931337, - "loss": 0.8499, + "epoch": 0.9185839692466579, + "grad_norm": 0.287109375, + "learning_rate": 4.011299160538185e-06, + "loss": 0.9932, "step": 16010 }, { - "epoch": 0.4594551948703972, - "grad_norm": 0.40234375, - "learning_rate": 0.00013108260867621192, - "loss": 0.9221, + "epoch": 0.9188708474381777, + "grad_norm": 0.2578125, + "learning_rate": 3.983269323811856e-06, + "loss": 0.9562, "step": 16015 }, { - "epoch": 0.45959864013885504, - "grad_norm": 0.40234375, - "learning_rate": 0.00013103501186586855, - "loss": 0.967, + "epoch": 0.9191577256296977, + "grad_norm": 0.2578125, + "learning_rate": 3.955335772878343e-06, + "loss": 0.9051, "step": 16020 }, { - "epoch": 0.4597420854073128, - "grad_norm": 0.416015625, - "learning_rate": 0.0001309874072740377, - "loss": 0.924, + "epoch": 0.9194446038212175, + "grad_norm": 0.26171875, + "learning_rate": 3.927498535749486e-06, + "loss": 0.9962, "step": 16025 }, { - "epoch": 0.45988553067577065, - "grad_norm": 0.376953125, - "learning_rate": 0.00013093979491265542, - "loss": 0.9455, + "epoch": 0.9197314820127374, + "grad_norm": 0.275390625, + "learning_rate": 3.89975764034054e-06, + "loss": 0.8909, "step": 16030 }, { - "epoch": 0.4600289759442285, - "grad_norm": 0.44921875, - "learning_rate": 0.0001308921747936596, - "loss": 1.114, + "epoch": 0.9200183602042573, + "grad_norm": 0.275390625, + "learning_rate": 3.872113114470122e-06, + "loss": 0.9961, "step": 16035 }, { - "epoch": 0.4601724212126863, - "grad_norm": 0.37890625, - "learning_rate": 0.0001308445469289902, - "loss": 0.8961, + "epoch": 0.9203052383957772, + "grad_norm": 0.263671875, + "learning_rate": 3.844564985860222e-06, + "loss": 0.9576, "step": 16040 }, { - "epoch": 0.46031586648114414, - "grad_norm": 0.421875, - "learning_rate": 0.0001307969113305891, - "loss": 1.0152, + "epoch": 0.920592116587297, + "grad_norm": 0.267578125, + "learning_rate": 3.817113282136176e-06, + "loss": 0.8964, "step": 16045 }, { - "epoch": 0.4604593117496019, - "grad_norm": 0.365234375, - "learning_rate": 0.00013074926801040005, - "loss": 0.9025, + "epoch": 0.920878994778817, + "grad_norm": 0.265625, + "learning_rate": 3.7897580308265954e-06, + "loss": 0.9571, "step": 16050 }, { - "epoch": 0.46060275701805975, - "grad_norm": 0.353515625, - "learning_rate": 0.0001307016169803688, - "loss": 1.0371, + "epoch": 0.9211658729703368, + "grad_norm": 0.265625, + "learning_rate": 3.762499259363417e-06, + "loss": 0.9314, "step": 16055 }, { - "epoch": 0.4607462022865176, - "grad_norm": 0.419921875, - "learning_rate": 0.000130653958252443, - "loss": 0.963, + "epoch": 0.9214527511618567, + "grad_norm": 0.263671875, + "learning_rate": 3.735336995081795e-06, + "loss": 0.9395, "step": 16060 }, { - "epoch": 0.4608896475549754, - "grad_norm": 0.421875, - "learning_rate": 0.00013060629183857226, - "loss": 0.9151, + "epoch": 0.9217396293533766, + "grad_norm": 0.2734375, + "learning_rate": 3.7082712652200867e-06, + "loss": 0.958, "step": 16065 }, { - "epoch": 0.46103309282343324, - "grad_norm": 0.431640625, - "learning_rate": 0.0001305586177507081, - "loss": 1.0067, + "epoch": 0.9220265075448965, + "grad_norm": 0.28515625, + "learning_rate": 3.6813020969198585e-06, + "loss": 0.9532, "step": 16070 }, { - "epoch": 0.461176538091891, - "grad_norm": 0.373046875, - "learning_rate": 0.00013051093600080399, - "loss": 0.9703, + "epoch": 0.9223133857364163, + "grad_norm": 0.26953125, + "learning_rate": 3.654429517225877e-06, + "loss": 0.9759, "step": 16075 }, { - "epoch": 0.46131998336034885, - "grad_norm": 0.3828125, - "learning_rate": 0.00013046324660081525, - "loss": 0.9514, + "epoch": 0.9226002639279361, + "grad_norm": 0.251953125, + "learning_rate": 3.62765355308603e-06, + "loss": 0.9062, "step": 16080 }, { - "epoch": 0.4614634286288067, - "grad_norm": 0.423828125, - "learning_rate": 0.00013041554956269916, - "loss": 0.8635, + "epoch": 0.9228871421194561, + "grad_norm": 0.26171875, + "learning_rate": 3.600974231351306e-06, + "loss": 0.9079, "step": 16085 }, { - "epoch": 0.4616068738972645, - "grad_norm": 0.36328125, - "learning_rate": 0.00013036784489841495, - "loss": 0.9741, + "epoch": 0.923174020310976, + "grad_norm": 0.259765625, + "learning_rate": 3.574391578775771e-06, + "loss": 0.9881, "step": 16090 }, { - "epoch": 0.46175031916572234, - "grad_norm": 0.671875, - "learning_rate": 0.00013032013261992376, - "loss": 0.9669, + "epoch": 0.9234608985024958, + "grad_norm": 0.275390625, + "learning_rate": 3.547905622016601e-06, + "loss": 1.0151, "step": 16095 }, { - "epoch": 0.4618937644341801, - "grad_norm": 0.380859375, - "learning_rate": 0.00013027241273918855, - "loss": 0.966, + "epoch": 0.9237477766940158, + "grad_norm": 0.275390625, + "learning_rate": 3.5215163876339274e-06, + "loss": 0.8687, "step": 16100 }, { - "epoch": 0.46203720970263795, - "grad_norm": 0.345703125, - "learning_rate": 0.00013022468526817428, - "loss": 0.8968, + "epoch": 0.9240346548855356, + "grad_norm": 0.279296875, + "learning_rate": 3.495223902090983e-06, + "loss": 0.9379, "step": 16105 }, { - "epoch": 0.4621806549710958, - "grad_norm": 0.345703125, - "learning_rate": 0.00013017695021884777, - "loss": 0.8702, + "epoch": 0.9243215330770554, + "grad_norm": 0.26171875, + "learning_rate": 3.4690281917539203e-06, + "loss": 0.969, "step": 16110 }, { - "epoch": 0.4623241002395536, - "grad_norm": 0.39453125, - "learning_rate": 0.00013012920760317777, - "loss": 0.9265, + "epoch": 0.9246084112685754, + "grad_norm": 0.267578125, + "learning_rate": 3.442929282891827e-06, + "loss": 0.9878, "step": 16115 }, { - "epoch": 0.4624675455080114, - "grad_norm": 0.419921875, - "learning_rate": 0.00013008145743313487, - "loss": 0.9639, + "epoch": 0.9248952894600952, + "grad_norm": 0.267578125, + "learning_rate": 3.416927201676767e-06, + "loss": 0.9008, "step": 16120 }, { - "epoch": 0.4626109907764692, - "grad_norm": 0.3984375, - "learning_rate": 0.00013003369972069165, - "loss": 0.9727, + "epoch": 0.9251821676516151, + "grad_norm": 0.2734375, + "learning_rate": 3.3910219741836944e-06, + "loss": 0.9432, "step": 16125 }, { - "epoch": 0.46275443604492705, - "grad_norm": 0.38671875, - "learning_rate": 0.00012998593447782246, - "loss": 0.9189, + "epoch": 0.925469045843135, + "grad_norm": 0.2470703125, + "learning_rate": 3.365213626390418e-06, + "loss": 0.9459, "step": 16130 }, { - "epoch": 0.4628978813133849, - "grad_norm": 0.455078125, - "learning_rate": 0.0001299381617165036, - "loss": 0.9576, + "epoch": 0.9257559240346549, + "grad_norm": 0.28515625, + "learning_rate": 3.339502184177612e-06, + "loss": 0.9924, "step": 16135 }, { - "epoch": 0.4630413265818427, - "grad_norm": 0.388671875, - "learning_rate": 0.0001298903814487133, - "loss": 0.9099, + "epoch": 0.9260428022261747, + "grad_norm": 0.310546875, + "learning_rate": 3.3138876733287638e-06, + "loss": 0.9324, "step": 16140 }, { - "epoch": 0.4631847718503005, - "grad_norm": 0.419921875, - "learning_rate": 0.00012984259368643163, - "loss": 0.9661, + "epoch": 0.9263296804176947, + "grad_norm": 0.275390625, + "learning_rate": 3.28837011953016e-06, + "loss": 0.9471, "step": 16145 }, { - "epoch": 0.4633282171187583, - "grad_norm": 0.412109375, - "learning_rate": 0.0001297947984416405, - "loss": 0.8527, + "epoch": 0.9266165586092145, + "grad_norm": 0.263671875, + "learning_rate": 3.262949548370853e-06, + "loss": 0.94, "step": 16150 }, { - "epoch": 0.46347166238721615, - "grad_norm": 0.365234375, - "learning_rate": 0.00012974699572632377, - "loss": 0.8759, + "epoch": 0.9269034368007344, + "grad_norm": 0.267578125, + "learning_rate": 3.237625985342674e-06, + "loss": 0.9803, "step": 16155 }, { - "epoch": 0.463615107655674, - "grad_norm": 0.462890625, - "learning_rate": 0.0001296991855524671, - "loss": 1.1117, + "epoch": 0.9271903149922542, + "grad_norm": 0.283203125, + "learning_rate": 3.212399455840154e-06, + "loss": 0.9429, "step": 16160 }, { - "epoch": 0.4637585529241318, - "grad_norm": 0.41015625, - "learning_rate": 0.00012965136793205808, - "loss": 0.9661, + "epoch": 0.9274771931837742, + "grad_norm": 0.263671875, + "learning_rate": 3.187269985160457e-06, + "loss": 0.9188, "step": 16165 }, { - "epoch": 0.4639019981925896, - "grad_norm": 0.419921875, - "learning_rate": 0.0001296035428770861, - "loss": 0.9497, + "epoch": 0.927764071375294, + "grad_norm": 0.26171875, + "learning_rate": 3.1622375985035367e-06, + "loss": 0.9285, "step": 16170 }, { - "epoch": 0.4640454434610474, - "grad_norm": 0.400390625, - "learning_rate": 0.00012955571039954258, - "loss": 0.9331, + "epoch": 0.9280509495668139, + "grad_norm": 0.287109375, + "learning_rate": 3.137302320971891e-06, + "loss": 0.9227, "step": 16175 }, { - "epoch": 0.46418888872950526, - "grad_norm": 0.37890625, - "learning_rate": 0.00012950787051142052, - "loss": 0.9602, + "epoch": 0.9283378277583338, + "grad_norm": 0.28125, + "learning_rate": 3.112464177570662e-06, + "loss": 0.9039, "step": 16180 }, { - "epoch": 0.4643323339979631, - "grad_norm": 0.37109375, - "learning_rate": 0.000129460023224715, - "loss": 0.9812, + "epoch": 0.9286247059498537, + "grad_norm": 0.259765625, + "learning_rate": 3.087723193207648e-06, + "loss": 0.9231, "step": 16185 }, { - "epoch": 0.4644757792664209, - "grad_norm": 0.390625, - "learning_rate": 0.00012941216855142298, - "loss": 1.0167, + "epoch": 0.9289115841413735, + "grad_norm": 0.271484375, + "learning_rate": 3.0630793926931132e-06, + "loss": 0.8935, "step": 16190 }, { - "epoch": 0.4646192245348787, - "grad_norm": 0.40625, - "learning_rate": 0.00012936430650354306, - "loss": 0.9861, + "epoch": 0.9291984623328935, + "grad_norm": 0.26171875, + "learning_rate": 3.038532800739935e-06, + "loss": 0.8984, "step": 16195 }, { - "epoch": 0.4647626698033365, - "grad_norm": 0.400390625, - "learning_rate": 0.00012931643709307588, - "loss": 0.9866, + "epoch": 0.9294853405244133, + "grad_norm": 0.341796875, + "learning_rate": 3.014083441963478e-06, + "loss": 0.9787, "step": 16200 }, { - "epoch": 0.46490611507179436, - "grad_norm": 0.427734375, - "learning_rate": 0.00012926856033202385, - "loss": 0.9253, + "epoch": 0.9297722187159332, + "grad_norm": 0.2578125, + "learning_rate": 2.9897313408816407e-06, + "loss": 0.9824, "step": 16205 }, { - "epoch": 0.4650495603402522, - "grad_norm": 0.384765625, - "learning_rate": 0.00012922067623239117, - "loss": 0.9495, + "epoch": 0.9300590969074531, + "grad_norm": 0.28125, + "learning_rate": 2.9654765219147563e-06, + "loss": 0.8349, "step": 16210 }, { - "epoch": 0.46519300560871, - "grad_norm": 0.3828125, - "learning_rate": 0.00012917278480618403, - "loss": 0.961, + "epoch": 0.930345975098973, + "grad_norm": 0.263671875, + "learning_rate": 2.941319009385579e-06, + "loss": 0.909, "step": 16215 }, { - "epoch": 0.4653364508771678, - "grad_norm": 0.373046875, - "learning_rate": 0.00012912488606541035, - "loss": 0.9025, + "epoch": 0.9306328532904928, + "grad_norm": 0.26171875, + "learning_rate": 2.9172588275193534e-06, + "loss": 0.8929, "step": 16220 }, { - "epoch": 0.4654798961456256, - "grad_norm": 0.408203125, - "learning_rate": 0.00012907698002207987, - "loss": 0.9303, + "epoch": 0.9309197314820128, + "grad_norm": 0.271484375, + "learning_rate": 2.8932960004436795e-06, + "loss": 0.9595, "step": 16225 }, { - "epoch": 0.46562334141408346, - "grad_norm": 0.3984375, - "learning_rate": 0.00012902906668820418, - "loss": 0.9842, + "epoch": 0.9312066096735326, + "grad_norm": 0.267578125, + "learning_rate": 2.869430552188501e-06, + "loss": 0.9288, "step": 16230 }, { - "epoch": 0.4657667866825413, - "grad_norm": 0.396484375, - "learning_rate": 0.00012898114607579677, - "loss": 1.0149, + "epoch": 0.9314934878650525, + "grad_norm": 0.3359375, + "learning_rate": 2.8456625066861973e-06, + "loss": 0.9109, "step": 16235 }, { - "epoch": 0.4659102319509991, - "grad_norm": 0.328125, - "learning_rate": 0.00012893321819687286, - "loss": 0.9491, + "epoch": 0.9317803660565723, + "grad_norm": 0.279296875, + "learning_rate": 2.8219918877713804e-06, + "loss": 0.9129, "step": 16240 }, { - "epoch": 0.4660536772194569, - "grad_norm": 0.359375, - "learning_rate": 0.00012888528306344955, - "loss": 0.8839, + "epoch": 0.9320672442480923, + "grad_norm": 0.267578125, + "learning_rate": 2.7984187191810063e-06, + "loss": 0.8989, "step": 16245 }, { - "epoch": 0.46619712248791473, - "grad_norm": 0.392578125, - "learning_rate": 0.0001288373406875457, - "loss": 0.9, + "epoch": 0.9323541224396121, + "grad_norm": 0.26171875, + "learning_rate": 2.7749430245542997e-06, + "loss": 0.9914, "step": 16250 }, { - "epoch": 0.46634056775637256, - "grad_norm": 0.380859375, - "learning_rate": 0.0001287893910811821, - "loss": 0.9224, + "epoch": 0.932641000631132, + "grad_norm": 0.302734375, + "learning_rate": 2.751564827432751e-06, + "loss": 0.9854, "step": 16255 }, { - "epoch": 0.4664840130248304, - "grad_norm": 0.38671875, - "learning_rate": 0.00012874143425638116, - "loss": 1.0085, + "epoch": 0.9329278788226519, + "grad_norm": 0.267578125, + "learning_rate": 2.7282841512600632e-06, + "loss": 0.8519, "step": 16260 }, { - "epoch": 0.4666274582932882, - "grad_norm": 0.373046875, - "learning_rate": 0.0001286934702251673, - "loss": 0.9144, + "epoch": 0.9332147570141718, + "grad_norm": 0.2734375, + "learning_rate": 2.705101019382139e-06, + "loss": 1.001, "step": 16265 }, { - "epoch": 0.466770903561746, - "grad_norm": 0.40625, - "learning_rate": 0.0001286454989995667, - "loss": 0.993, + "epoch": 0.9335016352056916, + "grad_norm": 0.28125, + "learning_rate": 2.682015455047093e-06, + "loss": 1.0089, "step": 16270 }, { - "epoch": 0.46691434883020383, - "grad_norm": 0.380859375, - "learning_rate": 0.00012859752059160722, - "loss": 1.0597, + "epoch": 0.9337885133972116, + "grad_norm": 0.263671875, + "learning_rate": 2.659027481405163e-06, + "loss": 0.9404, "step": 16275 }, { - "epoch": 0.46705779409866166, - "grad_norm": 0.357421875, - "learning_rate": 0.00012854953501331863, - "loss": 1.0133, + "epoch": 0.9340753915887314, + "grad_norm": 0.275390625, + "learning_rate": 2.636137121508753e-06, + "loss": 0.9803, "step": 16280 }, { - "epoch": 0.4672012393671195, - "grad_norm": 0.39453125, - "learning_rate": 0.0001285015422767325, - "loss": 0.9965, + "epoch": 0.9343622697802513, + "grad_norm": 0.283203125, + "learning_rate": 2.6133443983123785e-06, + "loss": 1.0476, "step": 16285 }, { - "epoch": 0.4673446846355773, - "grad_norm": 0.38671875, - "learning_rate": 0.0001284535423938822, - "loss": 0.9649, + "epoch": 0.9346491479717712, + "grad_norm": 0.263671875, + "learning_rate": 2.5906493346726126e-06, + "loss": 0.895, "step": 16290 }, { - "epoch": 0.4674881299040351, - "grad_norm": 0.365234375, - "learning_rate": 0.00012840553537680276, - "loss": 0.898, + "epoch": 0.9349360261632911, + "grad_norm": 0.28515625, + "learning_rate": 2.5680519533481052e-06, + "loss": 0.9463, "step": 16295 }, { - "epoch": 0.46763157517249293, - "grad_norm": 0.412109375, - "learning_rate": 0.0001283575212375312, - "loss": 1.0698, + "epoch": 0.9352229043548109, + "grad_norm": 0.251953125, + "learning_rate": 2.5455522769995966e-06, + "loss": 0.9837, "step": 16300 }, { - "epoch": 0.46777502044095076, - "grad_norm": 0.380859375, - "learning_rate": 0.00012830949998810618, - "loss": 0.9974, + "epoch": 0.9355097825463309, + "grad_norm": 0.259765625, + "learning_rate": 2.523150328189783e-06, + "loss": 0.9138, "step": 16305 }, { - "epoch": 0.4679184657094086, - "grad_norm": 0.41015625, - "learning_rate": 0.00012826147164056822, - "loss": 0.931, + "epoch": 0.9357966607378507, + "grad_norm": 0.26171875, + "learning_rate": 2.500846129383416e-06, + "loss": 0.9509, "step": 16310 }, { - "epoch": 0.46806191097786637, - "grad_norm": 0.404296875, - "learning_rate": 0.00012821343620695954, - "loss": 1.0072, + "epoch": 0.9360835389293706, + "grad_norm": 0.25, + "learning_rate": 2.478639702947172e-06, + "loss": 0.9884, "step": 16315 }, { - "epoch": 0.4682053562463242, - "grad_norm": 0.388671875, - "learning_rate": 0.00012816539369932422, + "epoch": 0.9363704171208904, + "grad_norm": 0.26953125, + "learning_rate": 2.4565310711497146e-06, "loss": 0.9741, "step": 16320 }, { - "epoch": 0.46834880151478203, - "grad_norm": 0.419921875, - "learning_rate": 0.0001281173441297081, - "loss": 0.9937, + "epoch": 0.9366572953124104, + "grad_norm": 0.2734375, + "learning_rate": 2.434520256161632e-06, + "loss": 0.9581, "step": 16325 }, { - "epoch": 0.46849224678323986, - "grad_norm": 0.369140625, - "learning_rate": 0.00012806928751015874, - "loss": 0.9584, + "epoch": 0.9369441735039302, + "grad_norm": 0.283203125, + "learning_rate": 2.412607280055401e-06, + "loss": 0.9786, "step": 16330 }, { - "epoch": 0.4686356920516977, - "grad_norm": 0.408203125, - "learning_rate": 0.0001280212238527255, - "loss": 1.0317, + "epoch": 0.9372310516954501, + "grad_norm": 0.255859375, + "learning_rate": 2.390792164805433e-06, + "loss": 0.8686, "step": 16335 }, { - "epoch": 0.46877913732015547, - "grad_norm": 0.375, - "learning_rate": 0.0001279731531694595, - "loss": 0.9664, + "epoch": 0.93751792988697, + "grad_norm": 0.279296875, + "learning_rate": 2.3690749322879624e-06, + "loss": 1.0135, "step": 16340 }, { - "epoch": 0.4689225825886133, - "grad_norm": 0.39453125, - "learning_rate": 0.00012792507547241367, - "loss": 0.8745, + "epoch": 0.9378048080784899, + "grad_norm": 0.2578125, + "learning_rate": 2.347455604281057e-06, + "loss": 0.9064, "step": 16345 }, { - "epoch": 0.46906602785707113, - "grad_norm": 0.369140625, - "learning_rate": 0.00012787699077364262, - "loss": 0.9291, + "epoch": 0.9380916862700097, + "grad_norm": 0.27734375, + "learning_rate": 2.3259342024646524e-06, + "loss": 0.9567, "step": 16350 }, { - "epoch": 0.46920947312552896, - "grad_norm": 0.3671875, - "learning_rate": 0.00012782889908520271, - "loss": 0.9746, + "epoch": 0.9383785644615297, + "grad_norm": 0.2578125, + "learning_rate": 2.304510748420463e-06, + "loss": 1.0151, "step": 16355 }, { - "epoch": 0.4693529183939868, - "grad_norm": 0.40625, - "learning_rate": 0.00012778080041915215, - "loss": 1.0965, + "epoch": 0.9386654426530495, + "grad_norm": 0.2412109375, + "learning_rate": 2.2831852636319594e-06, + "loss": 0.9201, "step": 16360 }, { - "epoch": 0.46949636366244457, - "grad_norm": 0.408203125, - "learning_rate": 0.0001277326947875508, - "loss": 1.0736, + "epoch": 0.9389523208445694, + "grad_norm": 0.25390625, + "learning_rate": 2.2619577694843907e-06, + "loss": 0.9121, "step": 16365 }, { - "epoch": 0.4696398089309024, - "grad_norm": 0.357421875, - "learning_rate": 0.00012768458220246035, - "loss": 0.9522, + "epoch": 0.9392391990360893, + "grad_norm": 0.2578125, + "learning_rate": 2.240828287264729e-06, + "loss": 0.9018, "step": 16370 }, { - "epoch": 0.46978325419936023, - "grad_norm": 0.345703125, - "learning_rate": 0.00012763646267594417, - "loss": 1.1201, + "epoch": 0.9395260772276092, + "grad_norm": 0.26171875, + "learning_rate": 2.219796838161681e-06, + "loss": 0.8755, "step": 16375 }, { - "epoch": 0.46992669946781807, - "grad_norm": 0.36328125, - "learning_rate": 0.00012758833622006737, - "loss": 0.839, + "epoch": 0.939812955419129, + "grad_norm": 0.26171875, + "learning_rate": 2.1988634432656197e-06, + "loss": 0.9599, "step": 16380 }, { - "epoch": 0.4700701447362759, - "grad_norm": 0.359375, - "learning_rate": 0.00012754020284689685, - "loss": 0.9617, + "epoch": 0.940099833610649, + "grad_norm": 0.26953125, + "learning_rate": 2.1780281235686206e-06, + "loss": 0.9062, "step": 16385 }, { - "epoch": 0.4702135900047337, - "grad_norm": 0.384765625, - "learning_rate": 0.0001274920625685012, - "loss": 0.9195, + "epoch": 0.9403867118021688, + "grad_norm": 0.25390625, + "learning_rate": 2.1572908999643705e-06, + "loss": 0.9129, "step": 16390 }, { - "epoch": 0.4703570352731915, - "grad_norm": 0.404296875, - "learning_rate": 0.00012744391539695076, - "loss": 1.0717, + "epoch": 0.9406735899936887, + "grad_norm": 0.26171875, + "learning_rate": 2.13665179324819e-06, + "loss": 0.8914, "step": 16395 }, { - "epoch": 0.47050048054164934, - "grad_norm": 0.369140625, - "learning_rate": 0.0001273957613443176, - "loss": 0.9133, + "epoch": 0.9409604681852085, + "grad_norm": 0.26171875, + "learning_rate": 2.116110824117046e-06, + "loss": 0.88, "step": 16400 }, { - "epoch": 0.47064392581010717, - "grad_norm": 0.41796875, - "learning_rate": 0.00012734760042267549, - "loss": 1.0286, + "epoch": 0.9412473463767285, + "grad_norm": 0.271484375, + "learning_rate": 2.0956680131694604e-06, + "loss": 0.944, "step": 16405 }, { - "epoch": 0.470787371078565, - "grad_norm": 0.369140625, - "learning_rate": 0.00012729943264409992, - "loss": 0.9178, + "epoch": 0.9415342245682483, + "grad_norm": 0.271484375, + "learning_rate": 2.075323380905536e-06, + "loss": 0.9598, "step": 16410 }, { - "epoch": 0.4709308163470228, - "grad_norm": 0.359375, - "learning_rate": 0.00012725125802066816, - "loss": 1.0314, + "epoch": 0.9418211027597682, + "grad_norm": 0.27734375, + "learning_rate": 2.0550769477269084e-06, + "loss": 0.986, "step": 16415 }, { - "epoch": 0.4710742616154806, - "grad_norm": 0.353515625, - "learning_rate": 0.00012720307656445914, - "loss": 0.9567, + "epoch": 0.9421079809512881, + "grad_norm": 0.263671875, + "learning_rate": 2.0349287339367364e-06, + "loss": 0.9844, "step": 16420 }, { - "epoch": 0.47121770688393844, - "grad_norm": 0.37109375, - "learning_rate": 0.00012715488828755352, - "loss": 0.9682, + "epoch": 0.942394859142808, + "grad_norm": 0.279296875, + "learning_rate": 2.0148787597397136e-06, + "loss": 0.9443, "step": 16425 }, { - "epoch": 0.47136115215239627, - "grad_norm": 0.373046875, - "learning_rate": 0.0001271066932020337, - "loss": 0.9057, + "epoch": 0.9426817373343278, + "grad_norm": 0.255859375, + "learning_rate": 1.99492704524199e-06, + "loss": 0.9414, "step": 16430 }, { - "epoch": 0.4715045974208541, - "grad_norm": 0.3671875, - "learning_rate": 0.00012705849131998368, - "loss": 0.8981, + "epoch": 0.9429686155258478, + "grad_norm": 0.263671875, + "learning_rate": 1.9750736104511947e-06, + "loss": 0.9172, "step": 16435 }, { - "epoch": 0.4716480426893119, - "grad_norm": 0.419921875, - "learning_rate": 0.00012701028265348934, - "loss": 0.9268, + "epoch": 0.9432554937173676, + "grad_norm": 0.265625, + "learning_rate": 1.955318475276391e-06, + "loss": 0.9453, "step": 16440 }, { - "epoch": 0.4717914879577697, - "grad_norm": 0.375, - "learning_rate": 0.00012696206721463807, - "loss": 0.8547, + "epoch": 0.9435423719088875, + "grad_norm": 0.251953125, + "learning_rate": 1.935661659528054e-06, + "loss": 0.9433, "step": 16445 }, { - "epoch": 0.47193493322622754, - "grad_norm": 0.380859375, - "learning_rate": 0.0001269138450155191, - "loss": 1.0381, + "epoch": 0.9438292501004074, + "grad_norm": 0.28515625, + "learning_rate": 1.9161031829181275e-06, + "loss": 0.9827, "step": 16450 }, { - "epoch": 0.47207837849468537, - "grad_norm": 0.396484375, - "learning_rate": 0.0001268656160682233, - "loss": 0.9555, + "epoch": 0.9441161282919273, + "grad_norm": 0.271484375, + "learning_rate": 1.8966430650598554e-06, + "loss": 0.9666, "step": 16455 }, { - "epoch": 0.4722218237631432, - "grad_norm": 0.423828125, - "learning_rate": 0.00012681738038484324, - "loss": 0.9466, + "epoch": 0.9444030064834471, + "grad_norm": 0.283203125, + "learning_rate": 1.8772813254679166e-06, + "loss": 0.9609, "step": 16460 }, { - "epoch": 0.472365269031601, - "grad_norm": 0.3828125, - "learning_rate": 0.00012676913797747313, - "loss": 1.0146, + "epoch": 0.9446898846749671, + "grad_norm": 0.2451171875, + "learning_rate": 1.85801798355828e-06, + "loss": 0.9515, "step": 16465 }, { - "epoch": 0.4725087143000588, - "grad_norm": 0.375, - "learning_rate": 0.00012672088885820897, - "loss": 0.9146, + "epoch": 0.9449767628664869, + "grad_norm": 0.267578125, + "learning_rate": 1.8388530586482932e-06, + "loss": 0.9266, "step": 16470 }, { - "epoch": 0.47265215956851664, - "grad_norm": 0.384765625, - "learning_rate": 0.00012667263303914837, - "loss": 0.9556, + "epoch": 0.9452636410580068, + "grad_norm": 0.259765625, + "learning_rate": 1.8197865699565497e-06, + "loss": 0.9186, "step": 16475 }, { - "epoch": 0.47279560483697447, - "grad_norm": 0.388671875, - "learning_rate": 0.00012662437053239062, - "loss": 0.9714, + "epoch": 0.9455505192495266, + "grad_norm": 0.26953125, + "learning_rate": 1.8008185366030217e-06, + "loss": 0.9243, "step": 16480 }, { - "epoch": 0.47293905010543225, - "grad_norm": 0.373046875, - "learning_rate": 0.0001265761013500367, - "loss": 0.9275, + "epoch": 0.9458373974410466, + "grad_norm": 0.27734375, + "learning_rate": 1.7819489776088493e-06, + "loss": 0.9013, "step": 16485 }, { - "epoch": 0.4730824953738901, - "grad_norm": 0.40625, - "learning_rate": 0.0001265278255041893, - "loss": 0.9422, + "epoch": 0.9461242756325664, + "grad_norm": 0.27734375, + "learning_rate": 1.7631779118964852e-06, + "loss": 0.9348, "step": 16490 }, { - "epoch": 0.4732259406423479, - "grad_norm": 0.373046875, - "learning_rate": 0.0001264795430069527, - "loss": 1.0005, + "epoch": 0.9464111538240862, + "grad_norm": 0.27734375, + "learning_rate": 1.7445053582895944e-06, + "loss": 0.9947, "step": 16495 }, { - "epoch": 0.47336938591080574, - "grad_norm": 0.42578125, - "learning_rate": 0.0001264312538704329, - "loss": 0.98, + "epoch": 0.9466980320156062, + "grad_norm": 0.26171875, + "learning_rate": 1.7259313355130647e-06, + "loss": 0.9336, "step": 16500 }, { - "epoch": 0.47351283117926357, - "grad_norm": 0.380859375, - "learning_rate": 0.00012638295810673758, - "loss": 0.958, + "epoch": 0.946984910207126, + "grad_norm": 0.271484375, + "learning_rate": 1.7074558621929526e-06, + "loss": 0.9448, "step": 16505 }, { - "epoch": 0.47365627644772135, - "grad_norm": 0.392578125, - "learning_rate": 0.00012633465572797604, - "loss": 0.9574, + "epoch": 0.9472717883986459, + "grad_norm": 0.259765625, + "learning_rate": 1.6890789568565156e-06, + "loss": 0.9379, "step": 16510 }, { - "epoch": 0.4737997217161792, - "grad_norm": 0.3828125, - "learning_rate": 0.00012628634674625929, - "loss": 0.8947, + "epoch": 0.9475586665901659, + "grad_norm": 0.28515625, + "learning_rate": 1.670800637932146e-06, + "loss": 0.9013, "step": 16515 }, { - "epoch": 0.473943166984637, - "grad_norm": 0.380859375, - "learning_rate": 0.0001262380311736999, - "loss": 0.9906, + "epoch": 0.9478455447816857, + "grad_norm": 0.25390625, + "learning_rate": 1.6526209237493928e-06, + "loss": 0.9358, "step": 16520 }, { - "epoch": 0.47408661225309484, - "grad_norm": 0.400390625, - "learning_rate": 0.00012618970902241223, - "loss": 0.8893, + "epoch": 0.9481324229732055, + "grad_norm": 0.263671875, + "learning_rate": 1.634539832538895e-06, + "loss": 0.9984, "step": 16525 }, { - "epoch": 0.4742300575215527, - "grad_norm": 0.458984375, - "learning_rate": 0.0001261413803045122, - "loss": 0.9415, + "epoch": 0.9484193011647255, + "grad_norm": 0.263671875, + "learning_rate": 1.6165573824324488e-06, + "loss": 0.9786, "step": 16530 }, { - "epoch": 0.47437350279001045, - "grad_norm": 0.412109375, - "learning_rate": 0.00012609304503211733, - "loss": 1.071, + "epoch": 0.9487061793562453, + "grad_norm": 0.263671875, + "learning_rate": 1.5986735914628625e-06, + "loss": 0.9573, "step": 16535 }, { - "epoch": 0.4745169480584683, - "grad_norm": 0.42578125, - "learning_rate": 0.00012604470321734694, - "loss": 0.996, + "epoch": 0.9489930575477652, + "grad_norm": 0.314453125, + "learning_rate": 1.5808884775640464e-06, + "loss": 0.9199, "step": 16540 }, { - "epoch": 0.4746603933269261, - "grad_norm": 0.39453125, - "learning_rate": 0.00012599635487232179, - "loss": 1.0516, + "epoch": 0.9492799357392852, + "grad_norm": 0.29296875, + "learning_rate": 1.5632020585709673e-06, + "loss": 0.9071, "step": 16545 }, { - "epoch": 0.47480383859538394, - "grad_norm": 0.37109375, - "learning_rate": 0.00012594800000916448, - "loss": 0.9567, + "epoch": 0.949566813930805, + "grad_norm": 0.275390625, + "learning_rate": 1.5456143522195931e-06, + "loss": 0.9682, "step": 16550 }, { - "epoch": 0.4749472838638418, - "grad_norm": 0.400390625, - "learning_rate": 0.00012589963863999913, - "loss": 0.9323, + "epoch": 0.9498536921223248, + "grad_norm": 0.25390625, + "learning_rate": 1.5281253761469161e-06, + "loss": 0.9099, "step": 16555 }, { - "epoch": 0.47509072913229955, - "grad_norm": 0.39453125, - "learning_rate": 0.00012585127077695144, - "loss": 1.0324, + "epoch": 0.9501405703138447, + "grad_norm": 0.314453125, + "learning_rate": 1.5107351478909293e-06, + "loss": 0.9268, "step": 16560 }, { - "epoch": 0.4752341744007574, - "grad_norm": 0.3671875, - "learning_rate": 0.0001258028964321489, - "loss": 0.9827, + "epoch": 0.9504274485053646, + "grad_norm": 0.28125, + "learning_rate": 1.493443684890583e-06, + "loss": 0.9555, "step": 16565 }, { - "epoch": 0.4753776196692152, - "grad_norm": 0.376953125, - "learning_rate": 0.00012575451561772048, - "loss": 0.9081, + "epoch": 0.9507143266968845, + "grad_norm": 0.263671875, + "learning_rate": 1.4762510044857957e-06, + "loss": 0.9207, "step": 16570 }, { - "epoch": 0.47552106493767304, - "grad_norm": 0.40625, - "learning_rate": 0.0001257061283457968, - "loss": 0.9189, + "epoch": 0.9510012048884043, + "grad_norm": 0.265625, + "learning_rate": 1.4591571239174317e-06, + "loss": 0.9716, "step": 16575 }, { - "epoch": 0.4756645102061309, - "grad_norm": 0.380859375, - "learning_rate": 0.00012565773462851017, - "loss": 1.0923, + "epoch": 0.9512880830799243, + "grad_norm": 0.255859375, + "learning_rate": 1.4421620603272789e-06, + "loss": 0.9015, "step": 16580 }, { - "epoch": 0.47580795547458865, - "grad_norm": 0.41015625, - "learning_rate": 0.0001256093344779945, - "loss": 0.927, + "epoch": 0.9515749612714441, + "grad_norm": 0.255859375, + "learning_rate": 1.4252658307580048e-06, + "loss": 0.9391, "step": 16585 }, { - "epoch": 0.4759514007430465, - "grad_norm": 0.396484375, - "learning_rate": 0.00012556092790638518, - "loss": 0.9163, + "epoch": 0.951861839462964, + "grad_norm": 0.267578125, + "learning_rate": 1.4084684521531887e-06, + "loss": 0.9711, "step": 16590 }, { - "epoch": 0.4760948460115043, - "grad_norm": 0.40234375, - "learning_rate": 0.0001255125149258194, - "loss": 0.9236, + "epoch": 0.952148717654484, + "grad_norm": 0.275390625, + "learning_rate": 1.3917699413573014e-06, + "loss": 0.955, "step": 16595 }, { - "epoch": 0.47623829127996214, - "grad_norm": 0.392578125, - "learning_rate": 0.00012546409554843585, - "loss": 0.9319, + "epoch": 0.9524355958460038, + "grad_norm": 0.255859375, + "learning_rate": 1.375170315115637e-06, + "loss": 0.9687, "step": 16600 }, { - "epoch": 0.47638173654842, - "grad_norm": 0.357421875, - "learning_rate": 0.00012541566978637483, - "loss": 0.906, + "epoch": 0.9527224740375236, + "grad_norm": 0.27734375, + "learning_rate": 1.3586695900743352e-06, + "loss": 1.0067, "step": 16605 }, { - "epoch": 0.47652518181687775, - "grad_norm": 0.400390625, - "learning_rate": 0.00012536723765177826, - "loss": 0.963, + "epoch": 0.9530093522290436, + "grad_norm": 0.28125, + "learning_rate": 1.3422677827803599e-06, + "loss": 0.9846, "step": 16610 }, { - "epoch": 0.4766686270853356, - "grad_norm": 0.443359375, - "learning_rate": 0.00012531879915678962, - "loss": 0.965, + "epoch": 0.9532962304205634, + "grad_norm": 0.251953125, + "learning_rate": 1.3259649096814763e-06, + "loss": 0.9407, "step": 16615 }, { - "epoch": 0.4768120723537934, - "grad_norm": 0.478515625, - "learning_rate": 0.0001252703543135541, - "loss": 1.0622, + "epoch": 0.9535831086120833, + "grad_norm": 0.28125, + "learning_rate": 1.3097609871262295e-06, + "loss": 1.0087, "step": 16620 }, { - "epoch": 0.47695551762225125, - "grad_norm": 0.39453125, - "learning_rate": 0.00012522190313421833, - "loss": 0.9356, + "epoch": 0.9538699868036032, + "grad_norm": 0.2734375, + "learning_rate": 1.293656031363988e-06, + "loss": 1.0001, "step": 16625 }, { - "epoch": 0.4770989628907091, - "grad_norm": 0.359375, - "learning_rate": 0.0001251734456309306, - "loss": 0.8412, + "epoch": 0.9541568649951231, + "grad_norm": 0.25390625, + "learning_rate": 1.2776500585448215e-06, + "loss": 0.9324, "step": 16630 }, { - "epoch": 0.47724240815916685, - "grad_norm": 0.38671875, - "learning_rate": 0.00012512498181584082, - "loss": 1.0248, + "epoch": 0.9544437431866429, + "grad_norm": 0.26953125, + "learning_rate": 1.2617430847195356e-06, + "loss": 0.8486, "step": 16635 }, { - "epoch": 0.4773858534276247, - "grad_norm": 0.4140625, - "learning_rate": 0.00012507651170110042, - "loss": 1.1228, + "epoch": 0.9547306213781628, + "grad_norm": 0.25390625, + "learning_rate": 1.2459351258396812e-06, + "loss": 0.982, "step": 16640 }, { - "epoch": 0.4775292986960825, - "grad_norm": 0.435546875, - "learning_rate": 0.00012502803529886245, - "loss": 0.9529, + "epoch": 0.9550174995696827, + "grad_norm": 0.25390625, + "learning_rate": 1.2302261977575447e-06, + "loss": 0.9321, "step": 16645 }, { - "epoch": 0.47767274396454035, - "grad_norm": 0.435546875, - "learning_rate": 0.0001249795526212815, - "loss": 0.9277, + "epoch": 0.9553043777612026, + "grad_norm": 0.271484375, + "learning_rate": 1.2146163162260581e-06, + "loss": 1.0542, "step": 16650 }, { - "epoch": 0.4778161892329981, - "grad_norm": 0.376953125, - "learning_rate": 0.00012493106368051377, - "loss": 0.9235, + "epoch": 0.9555912559527224, + "grad_norm": 0.26171875, + "learning_rate": 1.1991054968988336e-06, + "loss": 0.9529, "step": 16655 }, { - "epoch": 0.47795963450145595, - "grad_norm": 0.3828125, - "learning_rate": 0.000124882568488717, - "loss": 0.9733, + "epoch": 0.9558781341442424, + "grad_norm": 0.263671875, + "learning_rate": 1.183693755330173e-06, + "loss": 0.9219, "step": 16660 }, { - "epoch": 0.4781030797699138, - "grad_norm": 0.431640625, - "learning_rate": 0.00012483406705805056, - "loss": 0.947, + "epoch": 0.9561650123357622, + "grad_norm": 0.28515625, + "learning_rate": 1.1683811069749916e-06, + "loss": 0.9837, "step": 16665 }, { - "epoch": 0.4782465250383716, - "grad_norm": 0.376953125, - "learning_rate": 0.00012478555940067528, - "loss": 1.037, + "epoch": 0.9564518905272821, + "grad_norm": 0.298828125, + "learning_rate": 1.1531675671888619e-06, + "loss": 0.9397, "step": 16670 }, { - "epoch": 0.47838997030682945, - "grad_norm": 0.373046875, - "learning_rate": 0.00012473704552875363, - "loss": 0.9502, + "epoch": 0.956738768718802, + "grad_norm": 0.265625, + "learning_rate": 1.1380531512279469e-06, + "loss": 0.9552, "step": 16675 }, { - "epoch": 0.4785334155752872, - "grad_norm": 0.375, - "learning_rate": 0.00012468852545444961, - "loss": 1.0088, + "epoch": 0.9570256469103219, + "grad_norm": 0.28125, + "learning_rate": 1.1230378742490222e-06, + "loss": 0.9718, "step": 16680 }, { - "epoch": 0.47867686084374506, - "grad_norm": 0.36328125, - "learning_rate": 0.0001246399991899288, - "loss": 1.0533, + "epoch": 0.9573125251018417, + "grad_norm": 0.2734375, + "learning_rate": 1.1081217513094212e-06, + "loss": 0.9079, "step": 16685 }, { - "epoch": 0.4788203061122029, - "grad_norm": 0.39453125, - "learning_rate": 0.0001245914667473583, - "loss": 0.9206, + "epoch": 0.9575994032933617, + "grad_norm": 0.330078125, + "learning_rate": 1.0933047973670896e-06, + "loss": 0.97, "step": 16690 }, { - "epoch": 0.4789637513806607, - "grad_norm": 0.447265625, - "learning_rate": 0.00012454292813890677, - "loss": 0.97, + "epoch": 0.9578862814848815, + "grad_norm": 0.28125, + "learning_rate": 1.0785870272804977e-06, + "loss": 0.899, "step": 16695 }, { - "epoch": 0.47910719664911855, - "grad_norm": 0.40234375, - "learning_rate": 0.00012449438337674447, - "loss": 0.9047, + "epoch": 0.9581731596764014, + "grad_norm": 0.283203125, + "learning_rate": 1.0639684558086504e-06, + "loss": 0.9541, "step": 16700 }, { - "epoch": 0.4792506419175763, - "grad_norm": 0.40625, - "learning_rate": 0.00012444583247304305, - "loss": 0.911, + "epoch": 0.9584600378679213, + "grad_norm": 0.26953125, + "learning_rate": 1.0494490976110883e-06, + "loss": 0.9538, "step": 16705 }, { - "epoch": 0.47939408718603416, - "grad_norm": 0.384765625, - "learning_rate": 0.00012439727543997586, - "loss": 0.9608, + "epoch": 0.9587469160594412, + "grad_norm": 0.271484375, + "learning_rate": 1.035028967247864e-06, + "loss": 0.995, "step": 16710 }, { - "epoch": 0.479537532454492, - "grad_norm": 0.369140625, - "learning_rate": 0.00012434871228971775, - "loss": 0.9448, + "epoch": 0.959033794250961, + "grad_norm": 0.275390625, + "learning_rate": 1.0207080791794998e-06, + "loss": 0.9229, "step": 16715 }, { - "epoch": 0.4796809777229498, - "grad_norm": 0.369140625, - "learning_rate": 0.00012430014303444503, - "loss": 0.8409, + "epoch": 0.9593206724424809, + "grad_norm": 0.279296875, + "learning_rate": 1.006486447767019e-06, + "loss": 0.9349, "step": 16720 }, { - "epoch": 0.47982442299140765, - "grad_norm": 0.412109375, - "learning_rate": 0.00012425156768633566, - "loss": 0.9046, + "epoch": 0.9596075506340008, + "grad_norm": 0.28515625, + "learning_rate": 9.923640872719131e-07, + "loss": 0.9353, "step": 16725 }, { - "epoch": 0.4799678682598654, - "grad_norm": 0.37109375, - "learning_rate": 0.00012420298625756898, - "loss": 0.9139, + "epoch": 0.9598944288255207, + "grad_norm": 0.25, + "learning_rate": 9.78341011856121e-07, + "loss": 0.9204, "step": 16730 }, { - "epoch": 0.48011131352832326, - "grad_norm": 0.419921875, - "learning_rate": 0.00012415439876032602, - "loss": 1.0755, + "epoch": 0.9601813070170405, + "grad_norm": 0.279296875, + "learning_rate": 9.644172355819936e-07, + "loss": 1.047, "step": 16735 }, { - "epoch": 0.4802547587967811, - "grad_norm": 0.43359375, - "learning_rate": 0.0001241058052067892, - "loss": 0.9815, + "epoch": 0.9604681852085605, + "grad_norm": 0.267578125, + "learning_rate": 9.505927724123509e-07, + "loss": 0.9377, "step": 16740 }, { - "epoch": 0.4803982040652389, - "grad_norm": 0.388671875, - "learning_rate": 0.00012405720560914249, - "loss": 0.8937, + "epoch": 0.9607550634000803, + "grad_norm": 0.265625, + "learning_rate": 9.368676362103701e-07, + "loss": 0.968, "step": 16745 }, { - "epoch": 0.48054164933369675, - "grad_norm": 0.375, - "learning_rate": 0.0001240085999795714, - "loss": 0.9275, + "epoch": 0.9610419415916002, + "grad_norm": 0.2490234375, + "learning_rate": 9.232418407396636e-07, + "loss": 0.9917, "step": 16750 }, { - "epoch": 0.48068509460215453, - "grad_norm": 0.376953125, - "learning_rate": 0.000123959988330263, - "loss": 0.8752, + "epoch": 0.9613288197831201, + "grad_norm": 0.251953125, + "learning_rate": 9.097153996642238e-07, + "loss": 0.949, "step": 16755 }, { - "epoch": 0.48082853987061236, - "grad_norm": 0.416015625, - "learning_rate": 0.00012391137067340572, - "loss": 1.0803, + "epoch": 0.96161569797464, + "grad_norm": 0.259765625, + "learning_rate": 8.962883265483668e-07, + "loss": 0.9995, "step": 16760 }, { - "epoch": 0.4809719851390702, - "grad_norm": 0.400390625, - "learning_rate": 0.00012386274702118962, - "loss": 0.8976, + "epoch": 0.9619025761661598, + "grad_norm": 0.283203125, + "learning_rate": 8.829606348567999e-07, + "loss": 0.937, "step": 16765 }, { - "epoch": 0.481115430407528, - "grad_norm": 0.384765625, - "learning_rate": 0.0001238141173858063, - "loss": 0.8721, + "epoch": 0.9621894543576798, + "grad_norm": 0.2734375, + "learning_rate": 8.697323379545653e-07, + "loss": 0.9242, "step": 16770 }, { - "epoch": 0.48125887567598585, - "grad_norm": 0.431640625, - "learning_rate": 0.0001237654817794487, - "loss": 1.0444, + "epoch": 0.9624763325491996, + "grad_norm": 0.279296875, + "learning_rate": 8.566034491070407e-07, + "loss": 0.9806, "step": 16775 }, { - "epoch": 0.48140232094444363, - "grad_norm": 0.384765625, - "learning_rate": 0.00012371684021431144, - "loss": 0.9543, + "epoch": 0.9627632107407195, + "grad_norm": 0.25390625, + "learning_rate": 8.435739814798949e-07, + "loss": 0.9588, "step": 16780 }, { - "epoch": 0.48154576621290146, - "grad_norm": 0.41015625, - "learning_rate": 0.0001236681927025904, - "loss": 1.0093, + "epoch": 0.9630500889322394, + "grad_norm": 0.23828125, + "learning_rate": 8.30643948139087e-07, + "loss": 0.9256, "step": 16785 }, { - "epoch": 0.4816892114813593, - "grad_norm": 0.5625, - "learning_rate": 0.00012361953925648327, - "loss": 0.9751, + "epoch": 0.9633369671237593, + "grad_norm": 0.271484375, + "learning_rate": 8.178133620509232e-07, + "loss": 0.8975, "step": 16790 }, { - "epoch": 0.4818326567498171, - "grad_norm": 0.431640625, - "learning_rate": 0.00012357087988818893, - "loss": 0.9897, + "epoch": 0.9636238453152791, + "grad_norm": 0.25, + "learning_rate": 8.050822360819221e-07, + "loss": 1.0417, "step": 16795 }, { - "epoch": 0.48197610201827495, - "grad_norm": 0.44140625, - "learning_rate": 0.0001235222146099079, - "loss": 0.9503, + "epoch": 0.963910723506799, + "grad_norm": 0.263671875, + "learning_rate": 7.924505829988716e-07, + "loss": 0.9042, "step": 16800 }, { - "epoch": 0.48211954728673273, - "grad_norm": 0.330078125, - "learning_rate": 0.0001234735434338422, - "loss": 0.926, + "epoch": 0.9641976016983189, + "grad_norm": 0.265625, + "learning_rate": 7.79918415468861e-07, + "loss": 0.8764, "step": 16805 }, { - "epoch": 0.48226299255519056, - "grad_norm": 0.423828125, - "learning_rate": 0.00012342486637219517, - "loss": 1.0283, + "epoch": 0.9644844798898388, + "grad_norm": 0.255859375, + "learning_rate": 7.674857460591379e-07, + "loss": 0.9449, "step": 16810 }, { - "epoch": 0.4824064378236484, - "grad_norm": 0.3984375, - "learning_rate": 0.00012337618343717183, - "loss": 1.088, + "epoch": 0.9647713580813586, + "grad_norm": 0.26953125, + "learning_rate": 7.551525872372289e-07, + "loss": 0.9472, "step": 16815 }, { - "epoch": 0.4825498830921062, - "grad_norm": 0.3828125, - "learning_rate": 0.00012332749464097855, - "loss": 0.9016, + "epoch": 0.9650582362728786, + "grad_norm": 0.287109375, + "learning_rate": 7.429189513708524e-07, + "loss": 0.9963, "step": 16820 }, { - "epoch": 0.482693328360564, - "grad_norm": 0.361328125, - "learning_rate": 0.00012327879999582316, - "loss": 0.9223, + "epoch": 0.9653451144643984, + "grad_norm": 0.263671875, + "learning_rate": 7.307848507279169e-07, + "loss": 0.9572, "step": 16825 }, { - "epoch": 0.48283677362902183, - "grad_norm": 0.3671875, - "learning_rate": 0.00012323009951391504, - "loss": 0.9653, + "epoch": 0.9656319926559183, + "grad_norm": 0.25390625, + "learning_rate": 7.187502974765448e-07, + "loss": 0.9461, "step": 16830 }, { - "epoch": 0.48298021889747966, - "grad_norm": 0.423828125, - "learning_rate": 0.00012318139320746495, - "loss": 0.973, + "epoch": 0.9659188708474382, + "grad_norm": 0.27734375, + "learning_rate": 7.068153036849934e-07, + "loss": 0.9776, "step": 16835 }, { - "epoch": 0.4831236641659375, - "grad_norm": 0.474609375, - "learning_rate": 0.00012313268108868518, - "loss": 0.9619, + "epoch": 0.9662057490389581, + "grad_norm": 0.28125, + "learning_rate": 6.949798813217001e-07, + "loss": 0.9549, "step": 16840 }, { - "epoch": 0.4832671094343953, - "grad_norm": 0.3828125, - "learning_rate": 0.00012308396316978944, - "loss": 0.9266, + "epoch": 0.9664926272304779, + "grad_norm": 0.263671875, + "learning_rate": 6.83244042255271e-07, + "loss": 0.9784, "step": 16845 }, { - "epoch": 0.4834105547028531, - "grad_norm": 0.390625, - "learning_rate": 0.00012303523946299285, - "loss": 1.0404, + "epoch": 0.9667795054219979, + "grad_norm": 0.265625, + "learning_rate": 6.716077982544256e-07, + "loss": 0.9131, "step": 16850 }, { - "epoch": 0.48355399997131093, - "grad_norm": 0.419921875, - "learning_rate": 0.0001229865099805121, - "loss": 0.9532, + "epoch": 0.9670663836135177, + "grad_norm": 0.251953125, + "learning_rate": 6.600711609880072e-07, + "loss": 0.952, "step": 16855 }, { - "epoch": 0.48369744523976876, - "grad_norm": 0.458984375, - "learning_rate": 0.00012293777473456518, - "loss": 0.9422, + "epoch": 0.9673532618050376, + "grad_norm": 0.25, + "learning_rate": 6.486341420249842e-07, + "loss": 1.0066, "step": 16860 }, { - "epoch": 0.4838408905082266, - "grad_norm": 0.388671875, - "learning_rate": 0.00012288903373737164, - "loss": 0.9721, + "epoch": 0.9676401399965575, + "grad_norm": 0.271484375, + "learning_rate": 6.372967528344264e-07, + "loss": 0.9966, "step": 16865 }, { - "epoch": 0.4839843357766844, - "grad_norm": 0.365234375, - "learning_rate": 0.00012284028700115245, - "loss": 0.9965, + "epoch": 0.9679270181880774, + "grad_norm": 0.251953125, + "learning_rate": 6.260590047854952e-07, + "loss": 0.9964, "step": 16870 }, { - "epoch": 0.4841277810451422, - "grad_norm": 0.423828125, - "learning_rate": 0.00012279153453812998, - "loss": 0.9673, + "epoch": 0.9682138963795972, + "grad_norm": 0.26953125, + "learning_rate": 6.149209091474318e-07, + "loss": 0.8882, "step": 16875 }, { - "epoch": 0.48427122631360003, - "grad_norm": 0.384765625, - "learning_rate": 0.0001227427763605281, - "loss": 0.9993, + "epoch": 0.968500774571117, + "grad_norm": 0.279296875, + "learning_rate": 6.038824770895457e-07, + "loss": 0.9484, "step": 16880 }, { - "epoch": 0.48441467158205787, - "grad_norm": 0.38671875, - "learning_rate": 0.00012269401248057198, - "loss": 1.0351, + "epoch": 0.968787652762637, + "grad_norm": 0.2734375, + "learning_rate": 5.929437196811827e-07, + "loss": 0.9168, "step": 16885 }, { - "epoch": 0.4845581168505157, - "grad_norm": 0.392578125, - "learning_rate": 0.0001226452429104884, - "loss": 0.946, + "epoch": 0.9690745309541569, + "grad_norm": 0.265625, + "learning_rate": 5.821046478917791e-07, + "loss": 0.9508, "step": 16890 }, { - "epoch": 0.48470156211897353, - "grad_norm": 0.42578125, - "learning_rate": 0.0001225964676625055, - "loss": 1.0004, + "epoch": 0.9693614091456767, + "grad_norm": 0.255859375, + "learning_rate": 5.713652725907626e-07, + "loss": 1.0207, "step": 16895 }, { - "epoch": 0.4848450073874313, - "grad_norm": 0.37890625, - "learning_rate": 0.0001225476867488527, - "loss": 1.058, + "epoch": 0.9696482873371967, + "grad_norm": 0.259765625, + "learning_rate": 5.607256045475961e-07, + "loss": 0.9338, "step": 16900 }, { - "epoch": 0.48498845265588914, - "grad_norm": 0.384765625, - "learning_rate": 0.0001224989001817611, - "loss": 0.9746, + "epoch": 0.9699351655287165, + "grad_norm": 0.255859375, + "learning_rate": 5.501856544317896e-07, + "loss": 0.8846, "step": 16905 }, { - "epoch": 0.48513189792434697, - "grad_norm": 0.400390625, - "learning_rate": 0.00012245010797346296, - "loss": 1.0068, + "epoch": 0.9702220437202363, + "grad_norm": 0.2890625, + "learning_rate": 5.397454328128104e-07, + "loss": 0.9225, "step": 16910 }, { - "epoch": 0.4852753431928048, - "grad_norm": 0.43359375, - "learning_rate": 0.00012240131013619213, - "loss": 1.0296, + "epoch": 0.9705089219117563, + "grad_norm": 0.267578125, + "learning_rate": 5.294049501601283e-07, + "loss": 0.9467, "step": 16915 }, { - "epoch": 0.48541878846126263, - "grad_norm": 0.365234375, - "learning_rate": 0.00012235250668218386, - "loss": 0.9892, + "epoch": 0.9707958001032762, + "grad_norm": 0.25390625, + "learning_rate": 5.191642168432154e-07, + "loss": 0.9385, "step": 16920 }, { - "epoch": 0.4855622337297204, - "grad_norm": 0.40625, - "learning_rate": 0.00012230369762367466, - "loss": 1.03, + "epoch": 0.971082678294796, + "grad_norm": 0.263671875, + "learning_rate": 5.090232431315123e-07, + "loss": 0.9474, "step": 16925 }, { - "epoch": 0.48570567899817824, - "grad_norm": 0.37109375, - "learning_rate": 0.00012225488297290266, - "loss": 0.8749, + "epoch": 0.971369556486316, + "grad_norm": 0.271484375, + "learning_rate": 4.989820391943845e-07, + "loss": 0.9796, "step": 16930 }, { - "epoch": 0.48584912426663607, - "grad_norm": 0.4296875, - "learning_rate": 0.00012220606274210725, - "loss": 0.9935, + "epoch": 0.9716564346778358, + "grad_norm": 0.2734375, + "learning_rate": 4.890406151011884e-07, + "loss": 0.9807, "step": 16935 }, { - "epoch": 0.4859925695350939, - "grad_norm": 0.3671875, - "learning_rate": 0.00012215723694352916, - "loss": 0.9198, + "epoch": 0.9719433128693556, + "grad_norm": 0.2490234375, + "learning_rate": 4.79198980821216e-07, + "loss": 0.9716, "step": 16940 }, { - "epoch": 0.48613601480355173, - "grad_norm": 0.40625, - "learning_rate": 0.00012210840558941072, - "loss": 0.9853, + "epoch": 0.9722301910608756, + "grad_norm": 0.267578125, + "learning_rate": 4.694571462236619e-07, + "loss": 0.9392, "step": 16945 }, { - "epoch": 0.4862794600720095, - "grad_norm": 0.38671875, - "learning_rate": 0.00012205956869199549, - "loss": 0.9771, + "epoch": 0.9725170692523954, + "grad_norm": 0.26171875, + "learning_rate": 4.5981512107766687e-07, + "loss": 0.9309, "step": 16950 }, { - "epoch": 0.48642290534046734, - "grad_norm": 0.365234375, - "learning_rate": 0.00012201072626352848, - "loss": 0.9408, + "epoch": 0.9728039474439153, + "grad_norm": 0.23828125, + "learning_rate": 4.5027291505227443e-07, + "loss": 0.848, "step": 16955 }, { - "epoch": 0.48656635060892517, - "grad_norm": 0.37109375, - "learning_rate": 0.00012196187831625605, - "loss": 0.9904, + "epoch": 0.9730908256354351, + "grad_norm": 0.25, + "learning_rate": 4.408305377164301e-07, + "loss": 0.8948, "step": 16960 }, { - "epoch": 0.486709795877383, - "grad_norm": 0.375, - "learning_rate": 0.00012191302486242601, - "loss": 0.9654, + "epoch": 0.9733777038269551, + "grad_norm": 0.248046875, + "learning_rate": 4.314879985389708e-07, + "loss": 0.8556, "step": 16965 }, { - "epoch": 0.48685324114584083, - "grad_norm": 0.376953125, - "learning_rate": 0.00012186416591428751, - "loss": 0.9612, + "epoch": 0.9736645820184749, + "grad_norm": 0.287109375, + "learning_rate": 4.222453068886245e-07, + "loss": 0.9207, "step": 16970 }, { - "epoch": 0.4869966864142986, - "grad_norm": 0.392578125, - "learning_rate": 0.00012181530148409105, - "loss": 0.9717, + "epoch": 0.9739514602099948, + "grad_norm": 0.25390625, + "learning_rate": 4.13102472033966e-07, + "loss": 0.9779, "step": 16975 }, { - "epoch": 0.48714013168275644, - "grad_norm": 0.376953125, - "learning_rate": 0.00012176643158408853, - "loss": 0.8398, + "epoch": 0.9742383384015147, + "grad_norm": 0.248046875, + "learning_rate": 4.0405950314347243e-07, + "loss": 0.9539, "step": 16980 }, { - "epoch": 0.48728357695121427, - "grad_norm": 0.3984375, - "learning_rate": 0.00012171755622653327, - "loss": 0.883, + "epoch": 0.9745252165930346, + "grad_norm": 0.28125, + "learning_rate": 3.951164092854343e-07, + "loss": 0.9088, "step": 16985 }, { - "epoch": 0.4874270222196721, - "grad_norm": 0.384765625, - "learning_rate": 0.00012166867542367985, - "loss": 0.974, + "epoch": 0.9748120947845544, + "grad_norm": 0.275390625, + "learning_rate": 3.862731994280111e-07, + "loss": 0.9245, "step": 16990 }, { - "epoch": 0.4875704674881299, - "grad_norm": 0.390625, - "learning_rate": 0.00012161978918778431, - "loss": 0.8605, + "epoch": 0.9750989729760744, + "grad_norm": 0.302734375, + "learning_rate": 3.775298824391982e-07, + "loss": 0.9384, "step": 16995 }, { - "epoch": 0.4877139127565877, - "grad_norm": 0.376953125, - "learning_rate": 0.00012157089753110406, - "loss": 0.9744, + "epoch": 0.9753858511675942, + "grad_norm": 0.3125, + "learning_rate": 3.688864670868153e-07, + "loss": 0.9567, "step": 17000 }, { - "epoch": 0.48785735802504554, - "grad_norm": 0.423828125, - "learning_rate": 0.00012152200046589774, - "loss": 1.0298, + "epoch": 0.9756727293591141, + "grad_norm": 0.28125, + "learning_rate": 3.6034296203848463e-07, + "loss": 0.9531, "step": 17005 }, { - "epoch": 0.48800080329350337, - "grad_norm": 0.357421875, - "learning_rate": 0.00012147309800442555, - "loss": 1.0024, + "epoch": 0.975959607550634, + "grad_norm": 0.263671875, + "learning_rate": 3.51899375861664e-07, + "loss": 0.9382, "step": 17010 }, { - "epoch": 0.4881442485619612, - "grad_norm": 0.36328125, - "learning_rate": 0.00012142419015894882, - "loss": 0.9316, + "epoch": 0.9762464857421539, + "grad_norm": 0.255859375, + "learning_rate": 3.435557170236026e-07, + "loss": 0.9319, "step": 17015 }, { - "epoch": 0.488287693830419, - "grad_norm": 0.3671875, - "learning_rate": 0.00012137527694173038, - "loss": 0.9611, + "epoch": 0.9765333639336737, + "grad_norm": 0.2490234375, + "learning_rate": 3.3531199389132963e-07, + "loss": 0.9408, "step": 17020 }, { - "epoch": 0.4884311390988768, - "grad_norm": 0.369140625, - "learning_rate": 0.00012132635836503437, - "loss": 0.9988, + "epoch": 0.9768202421251937, + "grad_norm": 0.265625, + "learning_rate": 3.271682147316879e-07, + "loss": 0.8888, "step": 17025 }, { - "epoch": 0.48857458436733464, - "grad_norm": 0.375, - "learning_rate": 0.00012127743444112629, - "loss": 0.863, + "epoch": 0.9771071203167135, + "grad_norm": 0.27734375, + "learning_rate": 3.1912438771125594e-07, + "loss": 0.9604, "step": 17030 }, { - "epoch": 0.4887180296357925, - "grad_norm": 0.3671875, - "learning_rate": 0.00012122850518227294, - "loss": 1.0021, + "epoch": 0.9773939985082334, + "grad_norm": 0.251953125, + "learning_rate": 3.111805208964036e-07, + "loss": 0.9139, "step": 17035 }, { - "epoch": 0.4888614749042503, - "grad_norm": 0.3984375, - "learning_rate": 0.00012117957060074245, - "loss": 0.9209, + "epoch": 0.9776808766997532, + "grad_norm": 0.267578125, + "learning_rate": 3.0333662225328074e-07, + "loss": 0.979, "step": 17040 }, { - "epoch": 0.4890049201727081, - "grad_norm": 0.35546875, - "learning_rate": 0.00012113063070880434, - "loss": 0.9474, + "epoch": 0.9779677548912732, + "grad_norm": 0.2578125, + "learning_rate": 2.955926996477398e-07, + "loss": 0.9427, "step": 17045 }, { - "epoch": 0.4891483654411659, - "grad_norm": 0.375, - "learning_rate": 0.00012108168551872944, - "loss": 0.9702, + "epoch": 0.978254633082793, + "grad_norm": 0.271484375, + "learning_rate": 2.8794876084541346e-07, + "loss": 0.9177, "step": 17050 }, { - "epoch": 0.48929181070962374, - "grad_norm": 0.447265625, - "learning_rate": 0.00012103273504278992, - "loss": 1.0064, + "epoch": 0.9785415112743129, + "grad_norm": 0.2578125, + "learning_rate": 2.8040481351166993e-07, + "loss": 0.9348, "step": 17055 }, { - "epoch": 0.4894352559780816, - "grad_norm": 0.4296875, - "learning_rate": 0.00012098377929325917, - "loss": 0.923, + "epoch": 0.9788283894658328, + "grad_norm": 0.287109375, + "learning_rate": 2.7296086521158003e-07, + "loss": 0.9361, "step": 17060 }, { - "epoch": 0.4895787012465394, - "grad_norm": 0.388671875, - "learning_rate": 0.00012093481828241209, - "loss": 0.9937, + "epoch": 0.9791152676573527, + "grad_norm": 0.29296875, + "learning_rate": 2.6561692340997255e-07, + "loss": 0.9358, "step": 17065 }, { - "epoch": 0.4897221465149972, - "grad_norm": 0.4765625, - "learning_rate": 0.00012088585202252474, - "loss": 1.065, + "epoch": 0.9794021458488725, + "grad_norm": 0.28515625, + "learning_rate": 2.583729954713454e-07, + "loss": 0.9239, "step": 17070 }, { - "epoch": 0.489865591783455, - "grad_norm": 0.34375, - "learning_rate": 0.00012083688052587456, - "loss": 0.891, + "epoch": 0.9796890240403925, + "grad_norm": 0.287109375, + "learning_rate": 2.512290886599433e-07, + "loss": 0.9068, "step": 17075 }, { - "epoch": 0.49000903705191284, - "grad_norm": 0.34765625, - "learning_rate": 0.00012078790380474037, - "loss": 0.9561, + "epoch": 0.9799759022319123, + "grad_norm": 0.267578125, + "learning_rate": 2.441852101396802e-07, + "loss": 0.9934, "step": 17080 }, { - "epoch": 0.4901524823203707, - "grad_norm": 0.419921875, - "learning_rate": 0.00012073892187140213, - "loss": 0.924, + "epoch": 0.9802627804234322, + "grad_norm": 0.255859375, + "learning_rate": 2.3724136697418353e-07, + "loss": 0.9389, "step": 17085 }, { - "epoch": 0.4902959275888285, - "grad_norm": 0.3828125, - "learning_rate": 0.00012068993473814126, - "loss": 0.8134, + "epoch": 0.9805496586149521, + "grad_norm": 0.271484375, + "learning_rate": 2.303975661267499e-07, + "loss": 0.908, "step": 17090 }, { - "epoch": 0.4904393728572863, - "grad_norm": 0.41796875, - "learning_rate": 0.00012064094241724038, - "loss": 0.8638, + "epoch": 0.980836536806472, + "grad_norm": 0.287109375, + "learning_rate": 2.2365381446035617e-07, + "loss": 0.9184, "step": 17095 }, { - "epoch": 0.4905828181257441, - "grad_norm": 0.392578125, - "learning_rate": 0.00012059194492098351, - "loss": 0.9104, + "epoch": 0.9811234149979918, + "grad_norm": 0.275390625, + "learning_rate": 2.170101187376594e-07, + "loss": 1.0126, "step": 17100 }, { - "epoch": 0.49072626339420194, - "grad_norm": 0.388671875, - "learning_rate": 0.0001205429422616559, - "loss": 0.9517, + "epoch": 0.9814102931895118, + "grad_norm": 0.26171875, + "learning_rate": 2.104664856209637e-07, + "loss": 0.894, "step": 17105 }, { - "epoch": 0.4908697086626598, - "grad_norm": 0.400390625, - "learning_rate": 0.00012049393445154411, - "loss": 0.865, + "epoch": 0.9816971713810316, + "grad_norm": 0.263671875, + "learning_rate": 2.0402292167224225e-07, + "loss": 0.9465, "step": 17110 }, { - "epoch": 0.4910131539311176, - "grad_norm": 0.35546875, - "learning_rate": 0.000120444921502936, - "loss": 0.9898, + "epoch": 0.9819840495725515, + "grad_norm": 0.29296875, + "learning_rate": 1.976794333531151e-07, + "loss": 0.9665, "step": 17115 }, { - "epoch": 0.4911565991995754, - "grad_norm": 0.384765625, - "learning_rate": 0.0001203959034281207, - "loss": 0.9429, + "epoch": 0.9822709277640713, + "grad_norm": 0.26171875, + "learning_rate": 1.9143602702484942e-07, + "loss": 0.9468, "step": 17120 }, { - "epoch": 0.4913000444680332, - "grad_norm": 0.462890625, - "learning_rate": 0.00012034688023938865, - "loss": 0.8978, + "epoch": 0.9825578059555913, + "grad_norm": 0.28515625, + "learning_rate": 1.8529270894833694e-07, + "loss": 0.9169, "step": 17125 }, { - "epoch": 0.49144348973649105, - "grad_norm": 0.361328125, - "learning_rate": 0.00012029785194903153, - "loss": 0.9354, + "epoch": 0.9828446841471111, + "grad_norm": 0.267578125, + "learning_rate": 1.7924948528412755e-07, + "loss": 0.8872, "step": 17130 }, { - "epoch": 0.4915869350049489, - "grad_norm": 0.390625, - "learning_rate": 0.00012024881856934239, - "loss": 0.9689, + "epoch": 0.983131562338631, + "grad_norm": 0.263671875, + "learning_rate": 1.733063620923625e-07, + "loss": 0.9435, "step": 17135 }, { - "epoch": 0.4917303802734067, - "grad_norm": 0.41015625, - "learning_rate": 0.00012019978011261541, - "loss": 0.889, + "epoch": 0.9834184405301509, + "grad_norm": 0.263671875, + "learning_rate": 1.6746334533284115e-07, + "loss": 0.9701, "step": 17140 }, { - "epoch": 0.4918738255418645, - "grad_norm": 0.390625, - "learning_rate": 0.0001201507365911462, - "loss": 0.9487, + "epoch": 0.9837053187216708, + "grad_norm": 0.263671875, + "learning_rate": 1.6172044086492088e-07, + "loss": 0.884, "step": 17145 }, { - "epoch": 0.4920172708103223, - "grad_norm": 0.376953125, - "learning_rate": 0.00012010168801723149, - "loss": 0.9646, + "epoch": 0.9839921969131906, + "grad_norm": 0.244140625, + "learning_rate": 1.5607765444762834e-07, + "loss": 0.9289, "step": 17150 }, { - "epoch": 0.49216071607878015, - "grad_norm": 0.412109375, - "learning_rate": 0.00012005263440316942, - "loss": 0.8292, + "epoch": 0.9842790751047106, + "grad_norm": 0.275390625, + "learning_rate": 1.5053499173955933e-07, + "loss": 0.9762, "step": 17155 }, { - "epoch": 0.492304161347238, - "grad_norm": 0.36328125, - "learning_rate": 0.00012000357576125932, - "loss": 0.887, + "epoch": 0.9845659532962304, + "grad_norm": 0.275390625, + "learning_rate": 1.4509245829888996e-07, + "loss": 0.96, "step": 17160 }, { - "epoch": 0.49244760661569575, - "grad_norm": 0.384765625, - "learning_rate": 0.00011995451210380176, - "loss": 0.862, + "epoch": 0.9848528314877503, + "grad_norm": 0.265625, + "learning_rate": 1.3975005958341003e-07, + "loss": 0.9242, "step": 17165 }, { - "epoch": 0.4925910518841536, - "grad_norm": 0.380859375, - "learning_rate": 0.00011990544344309865, - "loss": 0.9765, + "epoch": 0.9851397096792702, + "grad_norm": 0.275390625, + "learning_rate": 1.3450780095051186e-07, + "loss": 0.8535, "step": 17170 }, { - "epoch": 0.4927344971526114, - "grad_norm": 0.44140625, - "learning_rate": 0.000119856369791453, - "loss": 0.9985, + "epoch": 0.9854265878707901, + "grad_norm": 0.271484375, + "learning_rate": 1.2936568765711254e-07, + "loss": 0.8634, "step": 17175 }, { - "epoch": 0.49287794242106925, - "grad_norm": 0.408203125, - "learning_rate": 0.00011980729116116927, - "loss": 0.8978, + "epoch": 0.9857134660623099, + "grad_norm": 0.2431640625, + "learning_rate": 1.2432372485975395e-07, + "loss": 0.9243, "step": 17180 }, { - "epoch": 0.4930213876895271, - "grad_norm": 0.404296875, - "learning_rate": 0.00011975820756455303, - "loss": 0.9694, + "epoch": 0.9860003442538299, + "grad_norm": 0.28515625, + "learning_rate": 1.193819176145361e-07, + "loss": 0.9684, "step": 17185 }, { - "epoch": 0.49316483295798486, - "grad_norm": 0.388671875, - "learning_rate": 0.00011970911901391113, - "loss": 1.0276, + "epoch": 0.9862872224453497, + "grad_norm": 0.24609375, + "learning_rate": 1.1454027087708375e-07, + "loss": 0.836, "step": 17190 }, { - "epoch": 0.4933082782264427, - "grad_norm": 0.427734375, - "learning_rate": 0.00011966002552155169, - "loss": 0.9278, + "epoch": 0.9865741006368696, + "grad_norm": 0.265625, + "learning_rate": 1.0979878950263534e-07, + "loss": 0.9529, "step": 17195 }, { - "epoch": 0.4934517234949005, - "grad_norm": 0.3671875, - "learning_rate": 0.00011961092709978402, - "loss": 0.9798, + "epoch": 0.9868609788283894, + "grad_norm": 0.2734375, + "learning_rate": 1.0515747824595413e-07, + "loss": 1.0052, "step": 17200 }, { - "epoch": 0.49359516876335835, - "grad_norm": 0.38671875, - "learning_rate": 0.00011956182376091869, - "loss": 0.9165, + "epoch": 0.9871478570199094, + "grad_norm": 0.314453125, + "learning_rate": 1.0061634176136148e-07, + "loss": 0.9857, "step": 17205 }, { - "epoch": 0.4937386140318162, - "grad_norm": 0.392578125, - "learning_rate": 0.00011951271551726755, - "loss": 1.0208, + "epoch": 0.9874347352114292, + "grad_norm": 0.2470703125, + "learning_rate": 9.617538460270358e-08, + "loss": 0.9079, "step": 17210 }, { - "epoch": 0.49388205930027396, - "grad_norm": 0.41796875, - "learning_rate": 0.00011946360238114356, - "loss": 1.0142, + "epoch": 0.9877216134029491, + "grad_norm": 0.263671875, + "learning_rate": 9.183461122339587e-08, + "loss": 0.8843, "step": 17215 }, { - "epoch": 0.4940255045687318, - "grad_norm": 0.404296875, - "learning_rate": 0.00011941448436486106, - "loss": 1.0138, + "epoch": 0.988008491594469, + "grad_norm": 0.27734375, + "learning_rate": 8.759402597637855e-08, + "loss": 0.9595, "step": 17220 }, { - "epoch": 0.4941689498371896, - "grad_norm": 0.375, - "learning_rate": 0.00011936536148073551, - "loss": 0.9092, + "epoch": 0.9882953697859889, + "grad_norm": 0.265625, + "learning_rate": 8.345363311410559e-08, + "loss": 1.0177, "step": 17225 }, { - "epoch": 0.49431239510564745, - "grad_norm": 0.4140625, - "learning_rate": 0.00011931623374108358, - "loss": 1.0614, + "epoch": 0.9885822479775087, + "grad_norm": 0.2734375, + "learning_rate": 7.941343678857794e-08, + "loss": 0.9305, "step": 17230 }, { - "epoch": 0.4944558403741053, - "grad_norm": 0.388671875, - "learning_rate": 0.00011926710115822322, - "loss": 0.92, + "epoch": 0.9888691261690287, + "grad_norm": 0.26953125, + "learning_rate": 7.547344105132137e-08, + "loss": 0.9569, "step": 17235 }, { - "epoch": 0.49459928564256306, - "grad_norm": 0.4140625, - "learning_rate": 0.00011921796374447356, - "loss": 0.9681, + "epoch": 0.9891560043605485, + "grad_norm": 0.26953125, + "learning_rate": 7.16336498533643e-08, + "loss": 0.9185, "step": 17240 }, { - "epoch": 0.4947427309110209, - "grad_norm": 0.365234375, - "learning_rate": 0.00011916882151215496, - "loss": 0.8441, + "epoch": 0.9894428825520684, + "grad_norm": 0.2734375, + "learning_rate": 6.789406704527102e-08, + "loss": 0.9569, "step": 17245 }, { - "epoch": 0.4948861761794787, - "grad_norm": 0.376953125, - "learning_rate": 0.00011911967447358901, - "loss": 0.9683, + "epoch": 0.9897297607435883, + "grad_norm": 0.2578125, + "learning_rate": 6.425469637708625e-08, + "loss": 0.9679, "step": 17250 }, { - "epoch": 0.49502962144793655, - "grad_norm": 0.5, - "learning_rate": 0.00011907052264109836, - "loss": 1.0202, + "epoch": 0.9900166389351082, + "grad_norm": 0.271484375, + "learning_rate": 6.071554149837955e-08, + "loss": 0.9106, "step": 17255 }, { - "epoch": 0.4951730667163944, - "grad_norm": 0.34765625, - "learning_rate": 0.00011902136602700711, - "loss": 0.9331, + "epoch": 0.990303517126628, + "grad_norm": 0.271484375, + "learning_rate": 5.727660595823414e-08, + "loss": 0.947, "step": 17260 }, { - "epoch": 0.49531651198485216, - "grad_norm": 0.36328125, - "learning_rate": 0.00011897220464364034, - "loss": 0.9821, + "epoch": 0.990590395318148, + "grad_norm": 0.259765625, + "learning_rate": 5.39378932052248e-08, + "loss": 0.9619, "step": 17265 }, { - "epoch": 0.49545995725331, - "grad_norm": 0.3828125, - "learning_rate": 0.00011892303850332443, - "loss": 0.9256, + "epoch": 0.9908772735096678, + "grad_norm": 0.267578125, + "learning_rate": 5.069940658740668e-08, + "loss": 0.953, "step": 17270 }, { - "epoch": 0.4956034025217678, - "grad_norm": 0.427734375, - "learning_rate": 0.00011887386761838692, - "loss": 0.9822, + "epoch": 0.9911641517011877, + "grad_norm": 0.26171875, + "learning_rate": 4.7561149352348675e-08, + "loss": 0.9723, "step": 17275 }, { - "epoch": 0.49574684779022565, - "grad_norm": 0.48046875, - "learning_rate": 0.00011882469200115656, - "loss": 0.9886, + "epoch": 0.9914510298927075, + "grad_norm": 0.265625, + "learning_rate": 4.4523124647100065e-08, + "loss": 0.988, "step": 17280 }, { - "epoch": 0.4958902930586835, - "grad_norm": 0.408203125, - "learning_rate": 0.00011877551166396333, - "loss": 1.0365, + "epoch": 0.9917379080842275, + "grad_norm": 0.26171875, + "learning_rate": 4.158533551820165e-08, + "loss": 0.9843, "step": 17285 }, { - "epoch": 0.49603373832714126, - "grad_norm": 0.380859375, - "learning_rate": 0.00011872632661913823, - "loss": 0.9832, + "epoch": 0.9920247862757473, + "grad_norm": 0.26171875, + "learning_rate": 3.874778491167463e-08, + "loss": 0.9379, "step": 17290 }, { - "epoch": 0.4961771835955991, - "grad_norm": 0.41796875, - "learning_rate": 0.00011867713687901365, - "loss": 0.8573, + "epoch": 0.9923116644672672, + "grad_norm": 0.259765625, + "learning_rate": 3.6010475673009524e-08, + "loss": 0.9815, "step": 17295 }, { - "epoch": 0.4963206288640569, - "grad_norm": 0.4375, - "learning_rate": 0.00011862794245592301, - "loss": 0.9009, + "epoch": 0.9925985426587871, + "grad_norm": 0.25390625, + "learning_rate": 3.337341054721055e-08, + "loss": 0.9384, "step": 17300 }, { - "epoch": 0.49646407413251475, - "grad_norm": 0.466796875, - "learning_rate": 0.00011857874336220097, - "loss": 1.051, + "epoch": 0.992885420850307, + "grad_norm": 0.265625, + "learning_rate": 3.0836592178717926e-08, + "loss": 0.9017, "step": 17305 }, { - "epoch": 0.4966075194009726, - "grad_norm": 0.455078125, - "learning_rate": 0.00011852953961018332, - "loss": 0.957, + "epoch": 0.9931722990418268, + "grad_norm": 0.2490234375, + "learning_rate": 2.840002311145229e-08, + "loss": 0.8892, "step": 17310 }, { - "epoch": 0.49675096466943036, - "grad_norm": 0.416015625, - "learning_rate": 0.00011848033121220708, - "loss": 1.0266, + "epoch": 0.9934591772333468, + "grad_norm": 0.287109375, + "learning_rate": 2.6063705788825776e-08, + "loss": 0.9695, "step": 17315 }, { - "epoch": 0.4968944099378882, - "grad_norm": 0.37890625, - "learning_rate": 0.00011843111818061036, - "loss": 0.9393, + "epoch": 0.9937460554248666, + "grad_norm": 0.267578125, + "learning_rate": 2.3827642553686523e-08, + "loss": 0.9772, "step": 17320 }, { - "epoch": 0.497037855206346, - "grad_norm": 0.392578125, - "learning_rate": 0.00011838190052773248, - "loss": 0.9137, + "epoch": 0.9940329336163864, + "grad_norm": 0.267578125, + "learning_rate": 2.169183564837418e-08, + "loss": 0.9066, "step": 17325 }, { - "epoch": 0.49718130047480386, - "grad_norm": 0.349609375, - "learning_rate": 0.00011833267826591394, - "loss": 0.9568, + "epoch": 0.9943198118079064, + "grad_norm": 0.26171875, + "learning_rate": 1.9656287214686598e-08, + "loss": 0.8939, "step": 17330 }, { - "epoch": 0.49732474574326163, - "grad_norm": 0.376953125, - "learning_rate": 0.00011828345140749631, - "loss": 0.8782, + "epoch": 0.9946066899994263, + "grad_norm": 0.2734375, + "learning_rate": 1.772099929385762e-08, + "loss": 0.9562, "step": 17335 }, { - "epoch": 0.49746819101171946, - "grad_norm": 0.392578125, - "learning_rate": 0.0001182342199648224, - "loss": 1.0306, + "epoch": 0.9948935681909461, + "grad_norm": 0.265625, + "learning_rate": 1.588597382661261e-08, + "loss": 1.017, "step": 17340 }, { - "epoch": 0.4976116362801773, - "grad_norm": 0.416015625, - "learning_rate": 0.00011818498395023611, - "loss": 1.0112, + "epoch": 0.995180446382466, + "grad_norm": 0.265625, + "learning_rate": 1.4151212653112922e-08, + "loss": 0.9434, "step": 17345 }, { - "epoch": 0.4977550815486351, - "grad_norm": 0.404296875, - "learning_rate": 0.00011813574337608258, - "loss": 0.9508, + "epoch": 0.9954673245739859, + "grad_norm": 0.291015625, + "learning_rate": 1.2516717512989219e-08, + "loss": 0.9375, "step": 17350 }, { - "epoch": 0.49789852681709296, - "grad_norm": 0.41015625, - "learning_rate": 0.00011808649825470793, - "loss": 1.08, + "epoch": 0.9957542027655057, + "grad_norm": 0.267578125, + "learning_rate": 1.0982490045308157e-08, + "loss": 0.9094, "step": 17355 }, { - "epoch": 0.49804197208555073, - "grad_norm": 0.43359375, - "learning_rate": 0.00011803724859845952, - "loss": 0.9902, + "epoch": 0.9960410809570256, + "grad_norm": 0.265625, + "learning_rate": 9.548531788605707e-09, + "loss": 0.9988, "step": 17360 }, { - "epoch": 0.49818541735400856, - "grad_norm": 0.48046875, - "learning_rate": 0.00011798799441968595, - "loss": 0.9984, + "epoch": 0.9963279591485455, + "grad_norm": 0.271484375, + "learning_rate": 8.21484418084273e-09, + "loss": 0.9424, "step": 17365 }, { - "epoch": 0.4983288626224664, - "grad_norm": 0.35546875, - "learning_rate": 0.00011793873573073673, - "loss": 0.8959, + "epoch": 0.9966148373400654, + "grad_norm": 0.25, + "learning_rate": 6.98142855946049e-09, + "loss": 0.9161, "step": 17370 }, { - "epoch": 0.4984723078909242, - "grad_norm": 0.349609375, - "learning_rate": 0.00011788947254396268, - "loss": 1.0324, + "epoch": 0.9969017155315852, + "grad_norm": 0.287109375, + "learning_rate": 5.848286161314054e-09, + "loss": 1.0036, "step": 17375 }, { - "epoch": 0.49861575315938206, - "grad_norm": 0.408203125, - "learning_rate": 0.00011784020487171566, - "loss": 0.9266, + "epoch": 0.9971885937231052, + "grad_norm": 0.287109375, + "learning_rate": 4.81541812273889e-09, + "loss": 0.982, "step": 17380 }, { - "epoch": 0.49875919842783983, - "grad_norm": 0.4375, - "learning_rate": 0.00011779093272634867, - "loss": 0.9571, + "epoch": 0.997475471914625, + "grad_norm": 0.265625, + "learning_rate": 3.882825479495367e-09, + "loss": 0.9417, "step": 17385 }, { - "epoch": 0.49890264369629767, - "grad_norm": 0.37890625, - "learning_rate": 0.00011774165612021585, - "loss": 0.9616, + "epoch": 0.9977623501061449, + "grad_norm": 0.2578125, + "learning_rate": 3.050509166779847e-09, + "loss": 0.9789, "step": 17390 }, { - "epoch": 0.4990460889647555, - "grad_norm": 0.39453125, - "learning_rate": 0.00011769237506567247, - "loss": 0.926, + "epoch": 0.9980492282976648, + "grad_norm": 0.314453125, + "learning_rate": 2.3184700192357966e-09, + "loss": 0.9634, "step": 17395 }, { - "epoch": 0.49918953423321333, - "grad_norm": 0.353515625, - "learning_rate": 0.00011764308957507488, - "loss": 0.9409, + "epoch": 0.9983361064891847, + "grad_norm": 0.259765625, + "learning_rate": 1.6867087709759866e-09, + "loss": 0.9081, "step": 17400 }, { - "epoch": 0.49933297950167116, - "grad_norm": 0.38671875, - "learning_rate": 0.00011759379966078054, - "loss": 0.9721, + "epoch": 0.9986229846807045, + "grad_norm": 0.259765625, + "learning_rate": 1.1552260555047767e-09, + "loss": 0.9128, "step": 17405 }, { - "epoch": 0.49947642477012894, - "grad_norm": 0.392578125, - "learning_rate": 0.00011754450533514807, - "loss": 0.9318, + "epoch": 0.9989098628722245, + "grad_norm": 0.275390625, + "learning_rate": 7.240224058180367e-10, + "loss": 0.9465, "step": 17410 }, { - "epoch": 0.49961987003858677, - "grad_norm": 0.41796875, - "learning_rate": 0.0001174952066105371, - "loss": 1.0438, + "epoch": 0.9991967410637443, + "grad_norm": 0.267578125, + "learning_rate": 3.93098254314328e-10, + "loss": 0.9759, "step": 17415 }, { - "epoch": 0.4997633153070446, - "grad_norm": 0.40625, - "learning_rate": 0.00011744590349930849, - "loss": 0.9609, + "epoch": 0.9994836192552642, + "grad_norm": 0.26953125, + "learning_rate": 1.624539328615171e-10, + "loss": 0.9412, "step": 17420 }, { - "epoch": 0.49990676057550243, - "grad_norm": 0.39453125, - "learning_rate": 0.0001173965960138241, - "loss": 0.8769, + "epoch": 0.9997704974467841, + "grad_norm": 0.2890625, + "learning_rate": 3.208967271906005e-11, + "loss": 0.9467, "step": 17425 }, { - "epoch": 0.5000502058439602, - "grad_norm": 0.40234375, - "learning_rate": 0.00011734728416644694, - "loss": 0.9208, - "step": 17430 - }, - { - "epoch": 0.5001936511124181, - "grad_norm": 0.384765625, - "learning_rate": 0.00011729796796954107, - "loss": 0.8946, - "step": 17435 - }, - { - "epoch": 0.5003370963808759, - "grad_norm": 0.3671875, - "learning_rate": 0.00011724864743547168, - "loss": 0.8902, - "step": 17440 - }, - { - "epoch": 0.5004805416493336, - "grad_norm": 0.400390625, - "learning_rate": 0.00011719932257660504, - "loss": 0.899, - "step": 17445 - }, - { - "epoch": 0.5006239869177915, - "grad_norm": 0.38671875, - "learning_rate": 0.0001171499934053085, - "loss": 1.0134, - "step": 17450 - }, - { - "epoch": 0.5007674321862493, - "grad_norm": 0.408203125, - "learning_rate": 0.00011710065993395048, - "loss": 0.9793, - "step": 17455 - }, - { - "epoch": 0.5009108774547072, - "grad_norm": 0.361328125, - "learning_rate": 0.00011705132217490047, - "loss": 0.824, - "step": 17460 - }, - { - "epoch": 0.501054322723165, - "grad_norm": 0.38671875, - "learning_rate": 0.00011700198014052908, - "loss": 0.9061, - "step": 17465 - }, - { - "epoch": 0.5011977679916227, - "grad_norm": 0.419921875, - "learning_rate": 0.00011695263384320794, - "loss": 0.9971, - "step": 17470 - }, - { - "epoch": 0.5013412132600806, - "grad_norm": 0.345703125, - "learning_rate": 0.00011690328329530982, - "loss": 0.9777, - "step": 17475 - }, - { - "epoch": 0.5014846585285384, - "grad_norm": 0.408203125, - "learning_rate": 0.00011685392850920842, - "loss": 0.9678, - "step": 17480 - }, - { - "epoch": 0.5016281037969963, - "grad_norm": 0.4296875, - "learning_rate": 0.0001168045694972787, - "loss": 0.921, - "step": 17485 - }, - { - "epoch": 0.5017715490654541, - "grad_norm": 0.369140625, - "learning_rate": 0.00011675520627189648, - "loss": 0.9106, - "step": 17490 - }, - { - "epoch": 0.5019149943339118, - "grad_norm": 0.390625, - "learning_rate": 0.00011670583884543878, - "loss": 0.9243, - "step": 17495 - }, - { - "epoch": 0.5020584396023697, - "grad_norm": 0.431640625, - "learning_rate": 0.00011665646723028359, - "loss": 1.0181, - "step": 17500 - }, - { - "epoch": 0.5022018848708275, - "grad_norm": 0.38671875, - "learning_rate": 0.00011660709143880999, - "loss": 0.9612, - "step": 17505 - }, - { - "epoch": 0.5023453301392854, - "grad_norm": 0.5078125, - "learning_rate": 0.00011655771148339812, - "loss": 0.9865, - "step": 17510 - }, - { - "epoch": 0.5024887754077432, - "grad_norm": 0.404296875, - "learning_rate": 0.0001165083273764291, - "loss": 0.9229, - "step": 17515 - }, - { - "epoch": 0.502632220676201, - "grad_norm": 0.39453125, - "learning_rate": 0.00011645893913028514, - "loss": 0.9011, - "step": 17520 - }, - { - "epoch": 0.5027756659446588, - "grad_norm": 0.384765625, - "learning_rate": 0.00011640954675734954, - "loss": 0.8471, - "step": 17525 - }, - { - "epoch": 0.5029191112131166, - "grad_norm": 0.3671875, - "learning_rate": 0.00011636015027000651, - "loss": 0.9217, - "step": 17530 - }, - { - "epoch": 0.5030625564815745, - "grad_norm": 0.66015625, - "learning_rate": 0.00011631074968064137, - "loss": 0.9087, - "step": 17535 - }, - { - "epoch": 0.5032060017500323, - "grad_norm": 0.3671875, - "learning_rate": 0.00011626134500164047, - "loss": 0.9498, - "step": 17540 - }, - { - "epoch": 0.50334944701849, - "grad_norm": 0.349609375, - "learning_rate": 0.0001162119362453912, - "loss": 0.9006, - "step": 17545 - }, - { - "epoch": 0.5034928922869479, - "grad_norm": 0.384765625, - "learning_rate": 0.00011616252342428184, - "loss": 0.9717, - "step": 17550 - }, - { - "epoch": 0.5036363375554057, - "grad_norm": 0.47265625, - "learning_rate": 0.00011611310655070191, - "loss": 0.9557, - "step": 17555 - }, - { - "epoch": 0.5037797828238636, - "grad_norm": 0.396484375, - "learning_rate": 0.00011606368563704177, - "loss": 0.8845, - "step": 17560 - }, - { - "epoch": 0.5039232280923214, - "grad_norm": 0.4140625, - "learning_rate": 0.00011601426069569283, - "loss": 0.929, - "step": 17565 - }, - { - "epoch": 0.5040666733607791, - "grad_norm": 0.40234375, - "learning_rate": 0.00011596483173904762, - "loss": 0.9675, - "step": 17570 - }, - { - "epoch": 0.504210118629237, - "grad_norm": 0.408203125, - "learning_rate": 0.00011591539877949951, - "loss": 0.9225, - "step": 17575 - }, - { - "epoch": 0.5043535638976948, - "grad_norm": 0.392578125, - "learning_rate": 0.00011586596182944293, - "loss": 0.9133, - "step": 17580 - }, - { - "epoch": 0.5044970091661527, - "grad_norm": 0.36328125, - "learning_rate": 0.00011581652090127341, - "loss": 0.9303, - "step": 17585 - }, - { - "epoch": 0.5046404544346105, - "grad_norm": 0.392578125, - "learning_rate": 0.00011576707600738739, - "loss": 1.0083, - "step": 17590 - }, - { - "epoch": 0.5047838997030683, - "grad_norm": 0.408203125, - "learning_rate": 0.00011571762716018224, - "loss": 0.9578, - "step": 17595 - }, - { - "epoch": 0.5049273449715261, - "grad_norm": 0.3671875, - "learning_rate": 0.00011566817437205643, - "loss": 0.9212, - "step": 17600 - }, - { - "epoch": 0.5050707902399839, - "grad_norm": 0.408203125, - "learning_rate": 0.00011561871765540945, - "loss": 0.8919, - "step": 17605 - }, - { - "epoch": 0.5052142355084418, - "grad_norm": 0.4140625, - "learning_rate": 0.0001155692570226416, - "loss": 0.9306, - "step": 17610 - }, - { - "epoch": 0.5053576807768996, - "grad_norm": 0.37890625, - "learning_rate": 0.0001155197924861544, - "loss": 0.9965, - "step": 17615 - }, - { - "epoch": 0.5055011260453574, - "grad_norm": 0.357421875, - "learning_rate": 0.00011547032405835005, - "loss": 0.9272, - "step": 17620 - }, - { - "epoch": 0.5056445713138152, - "grad_norm": 0.388671875, - "learning_rate": 0.00011542085175163206, - "loss": 0.8318, - "step": 17625 - }, - { - "epoch": 0.505788016582273, - "grad_norm": 0.416015625, - "learning_rate": 0.00011537137557840463, - "loss": 1.0272, - "step": 17630 - }, - { - "epoch": 0.5059314618507309, - "grad_norm": 0.41015625, - "learning_rate": 0.00011532189555107313, - "loss": 1.0037, - "step": 17635 - }, - { - "epoch": 0.5060749071191887, - "grad_norm": 0.388671875, - "learning_rate": 0.00011527241168204375, - "loss": 0.911, - "step": 17640 - }, - { - "epoch": 0.5062183523876465, - "grad_norm": 0.3828125, - "learning_rate": 0.00011522292398372372, - "loss": 1.0787, - "step": 17645 - }, - { - "epoch": 0.5063617976561043, - "grad_norm": 0.400390625, - "learning_rate": 0.00011517343246852126, - "loss": 0.9436, - "step": 17650 - }, - { - "epoch": 0.5065052429245621, - "grad_norm": 0.404296875, - "learning_rate": 0.00011512393714884547, - "loss": 1.0107, - "step": 17655 - }, - { - "epoch": 0.50664868819302, - "grad_norm": 0.388671875, - "learning_rate": 0.00011507443803710643, - "loss": 1.0164, - "step": 17660 - }, - { - "epoch": 0.5067921334614778, - "grad_norm": 0.333984375, - "learning_rate": 0.00011502493514571519, - "loss": 0.807, - "step": 17665 - }, - { - "epoch": 0.5069355787299356, - "grad_norm": 0.375, - "learning_rate": 0.00011497542848708374, - "loss": 0.9413, - "step": 17670 - }, - { - "epoch": 0.5070790239983934, - "grad_norm": 0.400390625, - "learning_rate": 0.000114925918073625, - "loss": 0.9101, - "step": 17675 - }, - { - "epoch": 0.5072224692668512, - "grad_norm": 0.5234375, - "learning_rate": 0.00011487640391775283, - "loss": 0.9163, - "step": 17680 - }, - { - "epoch": 0.5073659145353091, - "grad_norm": 0.46875, - "learning_rate": 0.00011482688603188207, - "loss": 1.0399, - "step": 17685 - }, - { - "epoch": 0.5075093598037669, - "grad_norm": 0.384765625, - "learning_rate": 0.00011477736442842846, - "loss": 0.9485, - "step": 17690 - }, - { - "epoch": 0.5076528050722247, - "grad_norm": 0.396484375, - "learning_rate": 0.00011472783911980867, - "loss": 0.9578, - "step": 17695 - }, - { - "epoch": 0.5077962503406825, - "grad_norm": 0.392578125, - "learning_rate": 0.00011467831011844027, - "loss": 0.9678, - "step": 17700 - }, - { - "epoch": 0.5079396956091403, - "grad_norm": 0.384765625, - "learning_rate": 0.00011462877743674187, - "loss": 0.9419, - "step": 17705 - }, - { - "epoch": 0.5080831408775982, - "grad_norm": 0.392578125, - "learning_rate": 0.00011457924108713287, - "loss": 0.9968, - "step": 17710 - }, - { - "epoch": 0.508226586146056, - "grad_norm": 0.3359375, - "learning_rate": 0.00011452970108203367, - "loss": 0.9016, - "step": 17715 - }, - { - "epoch": 0.5083700314145138, - "grad_norm": 0.3828125, - "learning_rate": 0.00011448015743386553, - "loss": 0.9767, - "step": 17720 - }, - { - "epoch": 0.5085134766829716, - "grad_norm": 0.41015625, - "learning_rate": 0.00011443061015505067, - "loss": 0.8759, - "step": 17725 - }, - { - "epoch": 0.5086569219514294, - "grad_norm": 0.435546875, - "learning_rate": 0.00011438105925801224, - "loss": 1.0429, - "step": 17730 - }, - { - "epoch": 0.5088003672198873, - "grad_norm": 0.396484375, - "learning_rate": 0.0001143315047551742, - "loss": 0.8767, - "step": 17735 - }, - { - "epoch": 0.5089438124883451, - "grad_norm": 0.37890625, - "learning_rate": 0.00011428194665896155, - "loss": 0.913, - "step": 17740 - }, - { - "epoch": 0.5090872577568029, - "grad_norm": 0.353515625, - "learning_rate": 0.00011423238498180005, - "loss": 0.9204, - "step": 17745 - }, - { - "epoch": 0.5092307030252607, - "grad_norm": 0.35546875, - "learning_rate": 0.00011418281973611647, - "loss": 0.916, - "step": 17750 - }, - { - "epoch": 0.5093741482937185, - "grad_norm": 0.408203125, - "learning_rate": 0.00011413325093433837, - "loss": 0.9572, - "step": 17755 - }, - { - "epoch": 0.5095175935621763, - "grad_norm": 0.369140625, - "learning_rate": 0.00011408367858889437, - "loss": 0.9239, - "step": 17760 - }, - { - "epoch": 0.5096610388306342, - "grad_norm": 0.43359375, - "learning_rate": 0.00011403410271221377, - "loss": 1.0553, - "step": 17765 - }, - { - "epoch": 0.509804484099092, - "grad_norm": 0.388671875, - "learning_rate": 0.00011398452331672689, - "loss": 0.9672, - "step": 17770 - }, - { - "epoch": 0.5099479293675498, - "grad_norm": 0.421875, - "learning_rate": 0.0001139349404148649, - "loss": 0.8816, - "step": 17775 - }, - { - "epoch": 0.5100913746360076, - "grad_norm": 0.375, - "learning_rate": 0.00011388535401905985, - "loss": 0.9258, - "step": 17780 - }, - { - "epoch": 0.5102348199044654, - "grad_norm": 0.41015625, - "learning_rate": 0.00011383576414174466, - "loss": 0.9453, - "step": 17785 - }, - { - "epoch": 0.5103782651729233, - "grad_norm": 0.4140625, - "learning_rate": 0.00011378617079535312, - "loss": 0.9048, - "step": 17790 - }, - { - "epoch": 0.5105217104413811, - "grad_norm": 0.421875, - "learning_rate": 0.00011373657399231991, - "loss": 0.9672, - "step": 17795 - }, - { - "epoch": 0.510665155709839, - "grad_norm": 0.41015625, - "learning_rate": 0.00011368697374508052, - "loss": 0.9331, - "step": 17800 + "epoch": 1.0, + "eval_loss": 0.9523706436157227, + "eval_runtime": 525.7052, + "eval_samples_per_second": 29.353, + "eval_steps_per_second": 0.46, + "step": 17429 }, { - "epoch": 0.5108086009782967, - "grad_norm": 0.408203125, - "learning_rate": 0.0001136373700660714, - "loss": 0.9459, - "step": 17805 - }, - { - "epoch": 0.5109520462467545, - "grad_norm": 0.375, - "learning_rate": 0.00011358776296772982, - "loss": 1.0064, - "step": 17810 - }, - { - "epoch": 0.5110954915152124, - "grad_norm": 0.33984375, - "learning_rate": 0.00011353815246249382, - "loss": 0.9057, - "step": 17815 - }, - { - "epoch": 0.5112389367836702, - "grad_norm": 0.435546875, - "learning_rate": 0.00011348853856280244, - "loss": 0.933, - "step": 17820 - }, - { - "epoch": 0.511382382052128, - "grad_norm": 0.369140625, - "learning_rate": 0.00011343892128109543, - "loss": 0.9412, - "step": 17825 - }, - { - "epoch": 0.5115258273205858, - "grad_norm": 0.376953125, - "learning_rate": 0.00011338930062981352, - "loss": 1.0024, - "step": 17830 - }, - { - "epoch": 0.5116692725890436, - "grad_norm": 0.388671875, - "learning_rate": 0.0001133396766213982, - "loss": 0.985, - "step": 17835 - }, - { - "epoch": 0.5118127178575015, - "grad_norm": 0.37890625, - "learning_rate": 0.0001132900492682918, - "loss": 0.9519, - "step": 17840 - }, - { - "epoch": 0.5119561631259593, - "grad_norm": 0.353515625, - "learning_rate": 0.00011324041858293755, - "loss": 0.9186, - "step": 17845 - }, - { - "epoch": 0.5120996083944171, - "grad_norm": 0.455078125, - "learning_rate": 0.00011319078457777947, - "loss": 0.9717, - "step": 17850 - }, - { - "epoch": 0.5122430536628749, - "grad_norm": 0.400390625, - "learning_rate": 0.00011314114726526241, - "loss": 0.927, - "step": 17855 - }, - { - "epoch": 0.5123864989313327, - "grad_norm": 0.404296875, - "learning_rate": 0.00011309150665783204, - "loss": 1.0137, - "step": 17860 - }, - { - "epoch": 0.5125299441997906, - "grad_norm": 0.365234375, - "learning_rate": 0.00011304186276793491, - "loss": 0.9766, - "step": 17865 - }, - { - "epoch": 0.5126733894682484, - "grad_norm": 0.3828125, - "learning_rate": 0.00011299221560801836, - "loss": 0.9041, - "step": 17870 - }, - { - "epoch": 0.5128168347367063, - "grad_norm": 0.390625, - "learning_rate": 0.00011294256519053053, - "loss": 1.0019, - "step": 17875 - }, - { - "epoch": 0.512960280005164, - "grad_norm": 0.421875, - "learning_rate": 0.00011289291152792038, - "loss": 0.8974, - "step": 17880 - }, - { - "epoch": 0.5131037252736218, - "grad_norm": 0.390625, - "learning_rate": 0.00011284325463263772, - "loss": 0.9685, - "step": 17885 - }, - { - "epoch": 0.5132471705420797, - "grad_norm": 0.396484375, - "learning_rate": 0.00011279359451713318, - "loss": 0.985, - "step": 17890 - }, - { - "epoch": 0.5133906158105375, - "grad_norm": 0.416015625, - "learning_rate": 0.00011274393119385811, - "loss": 0.9889, - "step": 17895 - }, - { - "epoch": 0.5135340610789954, - "grad_norm": 0.380859375, - "learning_rate": 0.00011269426467526477, - "loss": 0.9221, - "step": 17900 - }, - { - "epoch": 0.5136775063474531, - "grad_norm": 0.427734375, - "learning_rate": 0.0001126445949738061, - "loss": 0.9723, - "step": 17905 - }, - { - "epoch": 0.5138209516159109, - "grad_norm": 0.41015625, - "learning_rate": 0.00011259492210193603, - "loss": 0.8721, - "step": 17910 - }, - { - "epoch": 0.5139643968843688, - "grad_norm": 0.3984375, - "learning_rate": 0.00011254524607210906, - "loss": 1.0239, - "step": 17915 - }, - { - "epoch": 0.5141078421528266, - "grad_norm": 0.3828125, - "learning_rate": 0.00011249556689678063, - "loss": 0.9777, - "step": 17920 - }, - { - "epoch": 0.5142512874212845, - "grad_norm": 0.38671875, - "learning_rate": 0.00011244588458840693, - "loss": 0.9537, - "step": 17925 - }, - { - "epoch": 0.5143947326897422, - "grad_norm": 0.3671875, - "learning_rate": 0.00011239619915944488, - "loss": 0.9772, - "step": 17930 - }, - { - "epoch": 0.5145381779582, - "grad_norm": 0.3828125, - "learning_rate": 0.00011234651062235233, - "loss": 1.0706, - "step": 17935 - }, - { - "epoch": 0.5146816232266579, - "grad_norm": 0.375, - "learning_rate": 0.00011229681898958775, - "loss": 0.8755, - "step": 17940 - }, - { - "epoch": 0.5148250684951157, - "grad_norm": 0.42578125, - "learning_rate": 0.00011224712427361042, - "loss": 0.8329, - "step": 17945 - }, - { - "epoch": 0.5149685137635736, - "grad_norm": 0.416015625, - "learning_rate": 0.0001121974264868805, - "loss": 0.9733, - "step": 17950 - }, - { - "epoch": 0.5151119590320313, - "grad_norm": 0.349609375, - "learning_rate": 0.0001121477256418588, - "loss": 0.8562, - "step": 17955 - }, - { - "epoch": 0.5152554043004891, - "grad_norm": 0.375, - "learning_rate": 0.00011209802175100692, - "loss": 0.9232, - "step": 17960 - }, - { - "epoch": 0.515398849568947, - "grad_norm": 0.44140625, - "learning_rate": 0.00011204831482678728, - "loss": 0.9427, - "step": 17965 - }, - { - "epoch": 0.5155422948374048, - "grad_norm": 0.376953125, - "learning_rate": 0.00011199860488166302, - "loss": 1.097, - "step": 17970 - }, - { - "epoch": 0.5156857401058627, - "grad_norm": 0.396484375, - "learning_rate": 0.00011194889192809801, - "loss": 0.9346, - "step": 17975 - }, - { - "epoch": 0.5158291853743204, - "grad_norm": 0.4140625, - "learning_rate": 0.00011189917597855694, - "loss": 1.0489, - "step": 17980 - }, - { - "epoch": 0.5159726306427782, - "grad_norm": 0.36328125, - "learning_rate": 0.00011184945704550518, - "loss": 0.9858, - "step": 17985 - }, - { - "epoch": 0.5161160759112361, - "grad_norm": 0.37890625, - "learning_rate": 0.00011179973514140896, - "loss": 0.9137, - "step": 17990 - }, - { - "epoch": 0.5162595211796939, - "grad_norm": 0.3515625, - "learning_rate": 0.00011175001027873506, - "loss": 0.9503, - "step": 17995 - }, - { - "epoch": 0.5164029664481518, - "grad_norm": 0.38671875, - "learning_rate": 0.00011170028246995123, - "loss": 0.9119, - "step": 18000 - }, - { - "epoch": 0.5165464117166095, - "grad_norm": 0.376953125, - "learning_rate": 0.00011165055172752578, - "loss": 0.9175, - "step": 18005 - }, - { - "epoch": 0.5166898569850673, - "grad_norm": 0.4140625, - "learning_rate": 0.00011160081806392788, - "loss": 1.1475, - "step": 18010 - }, - { - "epoch": 0.5168333022535252, - "grad_norm": 0.408203125, - "learning_rate": 0.00011155108149162735, - "loss": 0.8949, - "step": 18015 - }, - { - "epoch": 0.516976747521983, - "grad_norm": 0.3671875, - "learning_rate": 0.00011150134202309474, - "loss": 0.8876, - "step": 18020 - }, - { - "epoch": 0.5171201927904409, - "grad_norm": 0.369140625, - "learning_rate": 0.00011145159967080143, - "loss": 0.9105, - "step": 18025 - }, - { - "epoch": 0.5172636380588986, - "grad_norm": 0.404296875, - "learning_rate": 0.00011140185444721937, - "loss": 1.0275, - "step": 18030 - }, - { - "epoch": 0.5174070833273564, - "grad_norm": 0.42578125, - "learning_rate": 0.00011135210636482134, - "loss": 0.9608, - "step": 18035 - }, - { - "epoch": 0.5175505285958143, - "grad_norm": 0.3671875, - "learning_rate": 0.00011130235543608081, - "loss": 1.0306, - "step": 18040 - }, - { - "epoch": 0.5176939738642721, - "grad_norm": 0.36328125, - "learning_rate": 0.00011125260167347191, - "loss": 0.913, - "step": 18045 - }, - { - "epoch": 0.51783741913273, - "grad_norm": 0.392578125, - "learning_rate": 0.00011120284508946959, - "loss": 1.0218, - "step": 18050 - }, - { - "epoch": 0.5179808644011877, - "grad_norm": 0.33984375, - "learning_rate": 0.00011115308569654941, - "loss": 0.8212, - "step": 18055 - }, - { - "epoch": 0.5181243096696455, - "grad_norm": 0.373046875, - "learning_rate": 0.00011110332350718768, - "loss": 0.9627, - "step": 18060 - }, - { - "epoch": 0.5182677549381034, - "grad_norm": 0.37109375, - "learning_rate": 0.0001110535585338614, - "loss": 0.915, - "step": 18065 - }, - { - "epoch": 0.5184112002065612, - "grad_norm": 0.4453125, - "learning_rate": 0.00011100379078904828, - "loss": 1.0088, - "step": 18070 - }, - { - "epoch": 0.5185546454750191, - "grad_norm": 0.37109375, - "learning_rate": 0.00011095402028522666, - "loss": 0.9912, - "step": 18075 - }, - { - "epoch": 0.5186980907434768, - "grad_norm": 0.423828125, - "learning_rate": 0.00011090424703487569, - "loss": 1.0138, - "step": 18080 - }, - { - "epoch": 0.5188415360119346, - "grad_norm": 0.392578125, - "learning_rate": 0.00011085447105047511, - "loss": 0.8812, - "step": 18085 - }, - { - "epoch": 0.5189849812803925, - "grad_norm": 0.45703125, - "learning_rate": 0.0001108046923445054, - "loss": 1.0442, - "step": 18090 - }, - { - "epoch": 0.5191284265488503, - "grad_norm": 0.3671875, - "learning_rate": 0.00011075491092944768, - "loss": 0.9925, - "step": 18095 - }, - { - "epoch": 0.5192718718173082, - "grad_norm": 0.388671875, - "learning_rate": 0.00011070512681778375, - "loss": 0.9902, - "step": 18100 - }, - { - "epoch": 0.5194153170857659, - "grad_norm": 0.419921875, - "learning_rate": 0.00011065534002199619, - "loss": 0.9506, - "step": 18105 - }, - { - "epoch": 0.5195587623542237, - "grad_norm": 0.41015625, - "learning_rate": 0.00011060555055456807, - "loss": 0.9533, - "step": 18110 - }, - { - "epoch": 0.5197022076226816, - "grad_norm": 0.453125, - "learning_rate": 0.00011055575842798331, - "loss": 1.0268, - "step": 18115 - }, - { - "epoch": 0.5198456528911394, - "grad_norm": 0.400390625, - "learning_rate": 0.00011050596365472637, - "loss": 0.9192, - "step": 18120 - }, - { - "epoch": 0.5199890981595972, - "grad_norm": 0.3515625, - "learning_rate": 0.00011045616624728246, - "loss": 0.8664, - "step": 18125 - }, - { - "epoch": 0.520132543428055, - "grad_norm": 0.380859375, - "learning_rate": 0.00011040636621813736, - "loss": 1.0416, - "step": 18130 - }, - { - "epoch": 0.5202759886965128, - "grad_norm": 0.3828125, - "learning_rate": 0.00011035656357977757, - "loss": 0.9347, - "step": 18135 - }, - { - "epoch": 0.5204194339649707, - "grad_norm": 0.37109375, - "learning_rate": 0.00011030675834469026, - "loss": 0.9835, - "step": 18140 - }, - { - "epoch": 0.5205628792334285, - "grad_norm": 0.38671875, - "learning_rate": 0.0001102569505253632, - "loss": 0.9124, - "step": 18145 - }, - { - "epoch": 0.5207063245018863, - "grad_norm": 0.390625, - "learning_rate": 0.00011020714013428484, - "loss": 1.0143, - "step": 18150 - }, - { - "epoch": 0.5208497697703441, - "grad_norm": 0.400390625, - "learning_rate": 0.00011015732718394425, - "loss": 1.0284, - "step": 18155 - }, - { - "epoch": 0.5209932150388019, - "grad_norm": 0.412109375, - "learning_rate": 0.0001101075116868312, - "loss": 0.9307, - "step": 18160 - }, - { - "epoch": 0.5211366603072598, - "grad_norm": 0.375, - "learning_rate": 0.00011005769365543601, - "loss": 0.9186, - "step": 18165 - }, - { - "epoch": 0.5212801055757176, - "grad_norm": 0.419921875, - "learning_rate": 0.0001100078731022497, - "loss": 0.9153, - "step": 18170 - }, - { - "epoch": 0.5214235508441754, - "grad_norm": 0.41015625, - "learning_rate": 0.00010995805003976391, - "loss": 0.9522, - "step": 18175 - }, - { - "epoch": 0.5215669961126332, - "grad_norm": 0.34765625, - "learning_rate": 0.00010990822448047089, - "loss": 0.8662, - "step": 18180 - }, - { - "epoch": 0.521710441381091, - "grad_norm": 0.375, - "learning_rate": 0.00010985839643686355, - "loss": 0.9688, - "step": 18185 - }, - { - "epoch": 0.5218538866495489, - "grad_norm": 0.396484375, - "learning_rate": 0.00010980856592143538, - "loss": 0.9793, - "step": 18190 - }, - { - "epoch": 0.5219973319180067, - "grad_norm": 0.365234375, - "learning_rate": 0.00010975873294668054, - "loss": 0.9271, - "step": 18195 - }, - { - "epoch": 0.5221407771864645, - "grad_norm": 0.3671875, - "learning_rate": 0.00010970889752509374, - "loss": 0.9665, - "step": 18200 - }, - { - "epoch": 0.5222842224549223, - "grad_norm": 0.39453125, - "learning_rate": 0.00010965905966917038, - "loss": 0.8985, - "step": 18205 - }, - { - "epoch": 0.5224276677233801, - "grad_norm": 0.392578125, - "learning_rate": 0.00010960921939140638, - "loss": 1.0261, - "step": 18210 - }, - { - "epoch": 0.522571112991838, - "grad_norm": 0.41015625, - "learning_rate": 0.00010955937670429838, - "loss": 1.0431, - "step": 18215 - }, - { - "epoch": 0.5227145582602958, - "grad_norm": 0.3828125, - "learning_rate": 0.00010950953162034357, - "loss": 0.8528, - "step": 18220 - }, - { - "epoch": 0.5228580035287536, - "grad_norm": 0.40234375, - "learning_rate": 0.00010945968415203964, - "loss": 0.9506, - "step": 18225 - }, - { - "epoch": 0.5230014487972114, - "grad_norm": 0.376953125, - "learning_rate": 0.00010940983431188508, - "loss": 0.8645, - "step": 18230 - }, - { - "epoch": 0.5231448940656692, - "grad_norm": 0.384765625, - "learning_rate": 0.00010935998211237879, - "loss": 0.9481, - "step": 18235 - }, - { - "epoch": 0.5232883393341271, - "grad_norm": 0.400390625, - "learning_rate": 0.00010931012756602039, - "loss": 1.0558, - "step": 18240 - }, - { - "epoch": 0.5234317846025849, - "grad_norm": 0.466796875, - "learning_rate": 0.00010926027068530999, - "loss": 0.8659, - "step": 18245 - }, - { - "epoch": 0.5235752298710427, - "grad_norm": 0.412109375, - "learning_rate": 0.00010921041148274838, - "loss": 1.0441, - "step": 18250 - }, - { - "epoch": 0.5237186751395005, - "grad_norm": 0.40234375, - "learning_rate": 0.00010916054997083686, - "loss": 0.9402, - "step": 18255 - }, - { - "epoch": 0.5238621204079583, - "grad_norm": 0.380859375, - "learning_rate": 0.00010911068616207736, - "loss": 0.9596, - "step": 18260 - }, - { - "epoch": 0.5240055656764162, - "grad_norm": 0.408203125, - "learning_rate": 0.00010906082006897231, - "loss": 0.9685, - "step": 18265 - }, - { - "epoch": 0.524149010944874, - "grad_norm": 0.419921875, - "learning_rate": 0.00010901095170402479, - "loss": 1.0072, - "step": 18270 - }, - { - "epoch": 0.5242924562133318, - "grad_norm": 0.373046875, - "learning_rate": 0.00010896108107973848, - "loss": 1.0282, - "step": 18275 - }, - { - "epoch": 0.5244359014817896, - "grad_norm": 0.392578125, - "learning_rate": 0.00010891120820861745, - "loss": 0.9825, - "step": 18280 - }, - { - "epoch": 0.5245793467502474, - "grad_norm": 0.369140625, - "learning_rate": 0.00010886133310316655, - "loss": 0.9748, - "step": 18285 - }, - { - "epoch": 0.5247227920187053, - "grad_norm": 0.412109375, - "learning_rate": 0.00010881145577589103, - "loss": 0.9949, - "step": 18290 - }, - { - "epoch": 0.5248662372871631, - "grad_norm": 0.384765625, - "learning_rate": 0.00010876157623929677, - "loss": 1.0073, - "step": 18295 - }, - { - "epoch": 0.5250096825556209, - "grad_norm": 0.361328125, - "learning_rate": 0.00010871169450589025, - "loss": 0.9535, - "step": 18300 - }, - { - "epoch": 0.5251531278240787, - "grad_norm": 0.3515625, - "learning_rate": 0.00010866181058817839, - "loss": 0.9003, - "step": 18305 - }, - { - "epoch": 0.5252965730925365, - "grad_norm": 0.376953125, - "learning_rate": 0.00010861192449866871, - "loss": 0.9698, - "step": 18310 - }, - { - "epoch": 0.5254400183609944, - "grad_norm": 0.380859375, - "learning_rate": 0.00010856203624986931, - "loss": 0.9659, - "step": 18315 - }, - { - "epoch": 0.5255834636294522, - "grad_norm": 0.361328125, - "learning_rate": 0.00010851214585428878, - "loss": 0.9709, - "step": 18320 - }, - { - "epoch": 0.52572690889791, - "grad_norm": 0.3671875, - "learning_rate": 0.00010846225332443622, - "loss": 0.8615, - "step": 18325 - }, - { - "epoch": 0.5258703541663678, - "grad_norm": 0.390625, - "learning_rate": 0.00010841235867282137, - "loss": 1.0677, - "step": 18330 - }, - { - "epoch": 0.5260137994348256, - "grad_norm": 0.361328125, - "learning_rate": 0.00010836246191195448, - "loss": 0.9345, - "step": 18335 - }, - { - "epoch": 0.5261572447032835, - "grad_norm": 0.423828125, - "learning_rate": 0.00010831256305434616, - "loss": 0.8595, - "step": 18340 - }, - { - "epoch": 0.5263006899717413, - "grad_norm": 0.41796875, - "learning_rate": 0.00010826266211250782, - "loss": 0.919, - "step": 18345 - }, - { - "epoch": 0.5264441352401991, - "grad_norm": 0.3828125, - "learning_rate": 0.00010821275909895115, - "loss": 0.9946, - "step": 18350 - }, - { - "epoch": 0.526587580508657, - "grad_norm": 0.419921875, - "learning_rate": 0.00010816285402618851, - "loss": 0.8697, - "step": 18355 - }, - { - "epoch": 0.5267310257771147, - "grad_norm": 0.38671875, - "learning_rate": 0.00010811294690673271, - "loss": 0.9939, - "step": 18360 - }, - { - "epoch": 0.5268744710455726, - "grad_norm": 0.373046875, - "learning_rate": 0.00010806303775309707, - "loss": 0.9868, - "step": 18365 - }, - { - "epoch": 0.5270179163140304, - "grad_norm": 0.396484375, - "learning_rate": 0.00010801312657779547, - "loss": 0.9969, - "step": 18370 - }, - { - "epoch": 0.5271613615824882, - "grad_norm": 0.41015625, - "learning_rate": 0.00010796321339334226, - "loss": 0.8982, - "step": 18375 - }, - { - "epoch": 0.527304806850946, - "grad_norm": 0.625, - "learning_rate": 0.00010791329821225232, - "loss": 0.9467, - "step": 18380 - }, - { - "epoch": 0.5274482521194038, - "grad_norm": 0.390625, - "learning_rate": 0.00010786338104704095, - "loss": 0.9334, - "step": 18385 - }, - { - "epoch": 0.5275916973878617, - "grad_norm": 0.376953125, - "learning_rate": 0.00010781346191022405, - "loss": 1.0173, - "step": 18390 - }, - { - "epoch": 0.5277351426563195, - "grad_norm": 0.3828125, - "learning_rate": 0.000107763540814318, - "loss": 1.0428, - "step": 18395 - }, - { - "epoch": 0.5278785879247773, - "grad_norm": 0.388671875, - "learning_rate": 0.00010771361777183957, - "loss": 0.9918, - "step": 18400 - }, - { - "epoch": 0.5280220331932352, - "grad_norm": 0.359375, - "learning_rate": 0.00010766369279530615, - "loss": 0.9111, - "step": 18405 - }, - { - "epoch": 0.5281654784616929, - "grad_norm": 0.373046875, - "learning_rate": 0.00010761376589723553, - "loss": 1.1105, - "step": 18410 - }, - { - "epoch": 0.5283089237301508, - "grad_norm": 0.427734375, - "learning_rate": 0.00010756383709014602, - "loss": 0.95, - "step": 18415 - }, - { - "epoch": 0.5284523689986086, - "grad_norm": 0.416015625, - "learning_rate": 0.00010751390638655638, - "loss": 1.047, - "step": 18420 - }, - { - "epoch": 0.5285958142670664, - "grad_norm": 0.5, - "learning_rate": 0.00010746397379898589, - "loss": 0.9407, - "step": 18425 - }, - { - "epoch": 0.5287392595355243, - "grad_norm": 0.400390625, - "learning_rate": 0.00010741403933995424, - "loss": 0.9668, - "step": 18430 - }, - { - "epoch": 0.528882704803982, - "grad_norm": 0.359375, - "learning_rate": 0.00010736410302198168, - "loss": 0.9537, - "step": 18435 - }, - { - "epoch": 0.5290261500724399, - "grad_norm": 0.3828125, - "learning_rate": 0.00010731416485758879, - "loss": 0.8282, - "step": 18440 - }, - { - "epoch": 0.5291695953408977, - "grad_norm": 0.4296875, - "learning_rate": 0.00010726422485929677, - "loss": 1.1009, - "step": 18445 - }, - { - "epoch": 0.5293130406093555, - "grad_norm": 0.408203125, - "learning_rate": 0.00010721428303962713, - "loss": 1.0036, - "step": 18450 - }, - { - "epoch": 0.5294564858778134, - "grad_norm": 0.359375, - "learning_rate": 0.00010716433941110197, - "loss": 0.8247, - "step": 18455 - }, - { - "epoch": 0.5295999311462711, - "grad_norm": 0.369140625, - "learning_rate": 0.00010711439398624377, - "loss": 0.8979, - "step": 18460 - }, - { - "epoch": 0.5297433764147289, - "grad_norm": 0.419921875, - "learning_rate": 0.00010706444677757546, - "loss": 0.9516, - "step": 18465 - }, - { - "epoch": 0.5298868216831868, - "grad_norm": 0.404296875, - "learning_rate": 0.00010701449779762046, - "loss": 0.9373, - "step": 18470 - }, - { - "epoch": 0.5300302669516446, - "grad_norm": 0.39453125, - "learning_rate": 0.00010696454705890253, - "loss": 1.0494, - "step": 18475 - }, - { - "epoch": 0.5301737122201025, - "grad_norm": 0.384765625, - "learning_rate": 0.00010691459457394604, - "loss": 0.885, - "step": 18480 - }, - { - "epoch": 0.5303171574885602, - "grad_norm": 0.396484375, - "learning_rate": 0.00010686464035527561, - "loss": 0.9539, - "step": 18485 - }, - { - "epoch": 0.530460602757018, - "grad_norm": 0.41015625, - "learning_rate": 0.00010681468441541648, - "loss": 1.002, - "step": 18490 - }, - { - "epoch": 0.5306040480254759, - "grad_norm": 0.373046875, - "learning_rate": 0.0001067647267668942, - "loss": 0.8863, - "step": 18495 - }, - { - "epoch": 0.5307474932939337, - "grad_norm": 0.41796875, - "learning_rate": 0.00010671476742223474, - "loss": 0.9324, - "step": 18500 - }, - { - "epoch": 0.5308909385623916, - "grad_norm": 0.37890625, - "learning_rate": 0.00010666480639396456, - "loss": 0.8973, - "step": 18505 - }, - { - "epoch": 0.5310343838308493, - "grad_norm": 0.40234375, - "learning_rate": 0.00010661484369461052, - "loss": 0.93, - "step": 18510 - }, - { - "epoch": 0.5311778290993071, - "grad_norm": 0.369140625, - "learning_rate": 0.0001065648793366999, - "loss": 0.9793, - "step": 18515 - }, - { - "epoch": 0.531321274367765, - "grad_norm": 0.443359375, - "learning_rate": 0.00010651491333276036, - "loss": 0.9853, - "step": 18520 - }, - { - "epoch": 0.5314647196362228, - "grad_norm": 0.4453125, - "learning_rate": 0.00010646494569532006, - "loss": 0.9821, - "step": 18525 - }, - { - "epoch": 0.5316081649046807, - "grad_norm": 0.3515625, - "learning_rate": 0.00010641497643690743, - "loss": 0.9232, - "step": 18530 - }, - { - "epoch": 0.5317516101731384, - "grad_norm": 0.37109375, - "learning_rate": 0.00010636500557005145, - "loss": 0.9185, - "step": 18535 - }, - { - "epoch": 0.5318950554415962, - "grad_norm": 0.375, - "learning_rate": 0.00010631503310728146, - "loss": 0.9172, - "step": 18540 - }, - { - "epoch": 0.5320385007100541, - "grad_norm": 0.384765625, - "learning_rate": 0.00010626505906112711, - "loss": 0.891, - "step": 18545 - }, - { - "epoch": 0.5321819459785119, - "grad_norm": 0.359375, - "learning_rate": 0.00010621508344411861, - "loss": 0.9843, - "step": 18550 - }, - { - "epoch": 0.5323253912469698, - "grad_norm": 0.384765625, - "learning_rate": 0.0001061651062687864, - "loss": 0.9114, - "step": 18555 - }, - { - "epoch": 0.5324688365154275, - "grad_norm": 0.42578125, - "learning_rate": 0.0001061151275476614, - "loss": 0.8797, - "step": 18560 - }, - { - "epoch": 0.5326122817838853, - "grad_norm": 0.373046875, - "learning_rate": 0.00010606514729327493, - "loss": 0.9427, - "step": 18565 - }, - { - "epoch": 0.5327557270523432, - "grad_norm": 0.33984375, - "learning_rate": 0.00010601516551815865, - "loss": 0.8275, - "step": 18570 - }, - { - "epoch": 0.532899172320801, - "grad_norm": 0.359375, - "learning_rate": 0.00010596518223484457, - "loss": 0.8115, - "step": 18575 - }, - { - "epoch": 0.5330426175892589, - "grad_norm": 0.3828125, - "learning_rate": 0.00010591519745586522, - "loss": 0.9977, - "step": 18580 - }, - { - "epoch": 0.5331860628577166, - "grad_norm": 0.3359375, - "learning_rate": 0.00010586521119375337, - "loss": 0.9283, - "step": 18585 - }, - { - "epoch": 0.5333295081261744, - "grad_norm": 0.37890625, - "learning_rate": 0.00010581522346104215, - "loss": 0.9264, - "step": 18590 - }, - { - "epoch": 0.5334729533946323, - "grad_norm": 0.390625, - "learning_rate": 0.00010576523427026519, - "loss": 0.9449, - "step": 18595 - }, - { - "epoch": 0.5336163986630901, - "grad_norm": 0.384765625, - "learning_rate": 0.00010571524363395635, - "loss": 0.9393, - "step": 18600 - }, - { - "epoch": 0.533759843931548, - "grad_norm": 0.376953125, - "learning_rate": 0.00010566525156464997, - "loss": 0.884, - "step": 18605 - }, - { - "epoch": 0.5339032892000057, - "grad_norm": 0.408203125, - "learning_rate": 0.00010561525807488062, - "loss": 0.9672, - "step": 18610 - }, - { - "epoch": 0.5340467344684635, - "grad_norm": 0.404296875, - "learning_rate": 0.00010556526317718332, - "loss": 0.9707, - "step": 18615 - }, - { - "epoch": 0.5341901797369214, - "grad_norm": 0.3671875, - "learning_rate": 0.00010551526688409346, - "loss": 1.0074, - "step": 18620 - }, - { - "epoch": 0.5343336250053792, - "grad_norm": 0.390625, - "learning_rate": 0.00010546526920814665, - "loss": 0.9522, - "step": 18625 - }, - { - "epoch": 0.5344770702738371, - "grad_norm": 0.439453125, - "learning_rate": 0.00010541527016187903, - "loss": 0.9419, - "step": 18630 - }, - { - "epoch": 0.5346205155422948, - "grad_norm": 0.31640625, - "learning_rate": 0.00010536526975782692, - "loss": 0.9702, - "step": 18635 - }, - { - "epoch": 0.5347639608107526, - "grad_norm": 0.3671875, - "learning_rate": 0.00010531526800852709, - "loss": 0.9606, - "step": 18640 - }, - { - "epoch": 0.5349074060792105, - "grad_norm": 0.3984375, - "learning_rate": 0.00010526526492651659, - "loss": 0.9729, - "step": 18645 - }, - { - "epoch": 0.5350508513476683, - "grad_norm": 0.34765625, - "learning_rate": 0.00010521526052433282, - "loss": 0.9063, - "step": 18650 - }, - { - "epoch": 0.5351942966161262, - "grad_norm": 0.40625, - "learning_rate": 0.00010516525481451347, - "loss": 0.8851, - "step": 18655 - }, - { - "epoch": 0.5353377418845839, - "grad_norm": 0.376953125, - "learning_rate": 0.00010511524780959667, - "loss": 1.0077, - "step": 18660 - }, - { - "epoch": 0.5354811871530417, - "grad_norm": 0.34765625, - "learning_rate": 0.00010506523952212078, - "loss": 0.9595, - "step": 18665 - }, - { - "epoch": 0.5356246324214996, - "grad_norm": 0.427734375, - "learning_rate": 0.0001050152299646245, - "loss": 0.9556, - "step": 18670 - }, - { - "epoch": 0.5357680776899574, - "grad_norm": 0.46484375, - "learning_rate": 0.00010496521914964686, - "loss": 0.9614, - "step": 18675 - }, - { - "epoch": 0.5359115229584153, - "grad_norm": 0.390625, - "learning_rate": 0.00010491520708972716, - "loss": 1.0187, - "step": 18680 - }, - { - "epoch": 0.536054968226873, - "grad_norm": 0.36328125, - "learning_rate": 0.00010486519379740509, - "loss": 0.8422, - "step": 18685 - }, - { - "epoch": 0.5361984134953308, - "grad_norm": 0.4140625, - "learning_rate": 0.0001048151792852206, - "loss": 0.9111, - "step": 18690 - }, - { - "epoch": 0.5363418587637887, - "grad_norm": 0.41796875, - "learning_rate": 0.00010476516356571393, - "loss": 0.8937, - "step": 18695 - }, - { - "epoch": 0.5364853040322465, - "grad_norm": 0.388671875, - "learning_rate": 0.00010471514665142572, - "loss": 1.1437, - "step": 18700 - }, - { - "epoch": 0.5366287493007044, - "grad_norm": 0.4140625, - "learning_rate": 0.00010466512855489676, - "loss": 1.0241, - "step": 18705 - }, - { - "epoch": 0.5367721945691621, - "grad_norm": 0.40234375, - "learning_rate": 0.00010461510928866828, - "loss": 0.9424, - "step": 18710 - }, - { - "epoch": 0.5369156398376199, - "grad_norm": 0.380859375, - "learning_rate": 0.0001045650888652817, - "loss": 0.9635, - "step": 18715 - }, - { - "epoch": 0.5370590851060778, - "grad_norm": 0.42578125, - "learning_rate": 0.00010451506729727875, - "loss": 0.952, - "step": 18720 - }, - { - "epoch": 0.5372025303745356, - "grad_norm": 0.37109375, - "learning_rate": 0.0001044650445972015, - "loss": 0.9475, - "step": 18725 - }, - { - "epoch": 0.5373459756429935, - "grad_norm": 0.3671875, - "learning_rate": 0.00010441502077759229, - "loss": 0.9778, - "step": 18730 - }, - { - "epoch": 0.5374894209114512, - "grad_norm": 0.35546875, - "learning_rate": 0.00010436499585099365, - "loss": 0.9925, - "step": 18735 - }, - { - "epoch": 0.537632866179909, - "grad_norm": 0.3828125, - "learning_rate": 0.00010431496982994848, - "loss": 0.9462, - "step": 18740 - }, - { - "epoch": 0.5377763114483669, - "grad_norm": 0.38671875, - "learning_rate": 0.00010426494272699998, - "loss": 0.9282, - "step": 18745 - }, - { - "epoch": 0.5379197567168247, - "grad_norm": 0.484375, - "learning_rate": 0.00010421491455469153, - "loss": 1.0425, - "step": 18750 - }, - { - "epoch": 0.5380632019852826, - "grad_norm": 0.390625, - "learning_rate": 0.00010416488532556683, - "loss": 0.9785, - "step": 18755 - }, - { - "epoch": 0.5382066472537403, - "grad_norm": 0.40625, - "learning_rate": 0.00010411485505216984, - "loss": 1.0611, - "step": 18760 - }, - { - "epoch": 0.5383500925221981, - "grad_norm": 0.5078125, - "learning_rate": 0.00010406482374704478, - "loss": 0.9385, - "step": 18765 - }, - { - "epoch": 0.538493537790656, - "grad_norm": 0.40625, - "learning_rate": 0.00010401479142273611, - "loss": 0.9321, - "step": 18770 - }, - { - "epoch": 0.5386369830591138, - "grad_norm": 0.36328125, - "learning_rate": 0.00010396475809178856, - "loss": 0.9066, - "step": 18775 - }, - { - "epoch": 0.5387804283275717, - "grad_norm": 0.4140625, - "learning_rate": 0.00010391472376674716, - "loss": 0.9217, - "step": 18780 - }, - { - "epoch": 0.5389238735960294, - "grad_norm": 0.421875, - "learning_rate": 0.00010386468846015707, - "loss": 0.9776, - "step": 18785 - }, - { - "epoch": 0.5390673188644872, - "grad_norm": 0.3984375, - "learning_rate": 0.00010381465218456383, - "loss": 0.9823, - "step": 18790 - }, - { - "epoch": 0.5392107641329451, - "grad_norm": 0.439453125, - "learning_rate": 0.00010376461495251312, - "loss": 0.9932, - "step": 18795 - }, - { - "epoch": 0.5393542094014029, - "grad_norm": 0.451171875, - "learning_rate": 0.00010371457677655096, - "loss": 1.0069, - "step": 18800 - }, - { - "epoch": 0.5394976546698607, - "grad_norm": 0.400390625, - "learning_rate": 0.0001036645376692235, - "loss": 1.0239, - "step": 18805 - }, - { - "epoch": 0.5396410999383185, - "grad_norm": 0.361328125, - "learning_rate": 0.0001036144976430772, - "loss": 0.9467, - "step": 18810 - }, - { - "epoch": 0.5397845452067763, - "grad_norm": 0.375, - "learning_rate": 0.00010356445671065868, - "loss": 0.8844, - "step": 18815 - }, - { - "epoch": 0.5399279904752342, - "grad_norm": 0.419921875, - "learning_rate": 0.00010351441488451486, - "loss": 0.9034, - "step": 18820 - }, - { - "epoch": 0.540071435743692, - "grad_norm": 0.376953125, - "learning_rate": 0.0001034643721771929, - "loss": 0.8687, - "step": 18825 - }, - { - "epoch": 0.5402148810121498, - "grad_norm": 0.40234375, - "learning_rate": 0.00010341432860124003, - "loss": 1.0274, - "step": 18830 - }, - { - "epoch": 0.5403583262806076, - "grad_norm": 0.388671875, - "learning_rate": 0.00010336428416920392, - "loss": 0.9194, - "step": 18835 - }, - { - "epoch": 0.5405017715490654, - "grad_norm": 0.4140625, - "learning_rate": 0.00010331423889363223, - "loss": 1.0099, - "step": 18840 - }, - { - "epoch": 0.5406452168175233, - "grad_norm": 0.36328125, - "learning_rate": 0.00010326419278707303, - "loss": 0.9467, - "step": 18845 - }, - { - "epoch": 0.5407886620859811, - "grad_norm": 0.419921875, - "learning_rate": 0.00010321414586207443, - "loss": 1.0278, - "step": 18850 - }, - { - "epoch": 0.5409321073544389, - "grad_norm": 0.427734375, - "learning_rate": 0.0001031640981311849, - "loss": 0.8831, - "step": 18855 - }, - { - "epoch": 0.5410755526228967, - "grad_norm": 0.3828125, - "learning_rate": 0.00010311404960695299, - "loss": 0.9638, - "step": 18860 - }, - { - "epoch": 0.5412189978913545, - "grad_norm": 0.47265625, - "learning_rate": 0.00010306400030192747, - "loss": 1.0148, - "step": 18865 - }, - { - "epoch": 0.5413624431598124, - "grad_norm": 0.373046875, - "learning_rate": 0.00010301395022865738, - "loss": 0.9223, - "step": 18870 - }, - { - "epoch": 0.5415058884282702, - "grad_norm": 0.37109375, - "learning_rate": 0.00010296389939969187, - "loss": 0.8884, - "step": 18875 - }, - { - "epoch": 0.541649333696728, - "grad_norm": 0.408203125, - "learning_rate": 0.00010291384782758034, - "loss": 1.0749, - "step": 18880 - }, - { - "epoch": 0.5417927789651859, - "grad_norm": 0.40625, - "learning_rate": 0.0001028637955248723, - "loss": 1.0188, - "step": 18885 - }, - { - "epoch": 0.5419362242336436, - "grad_norm": 0.376953125, - "learning_rate": 0.00010281374250411755, - "loss": 0.9124, - "step": 18890 - }, - { - "epoch": 0.5420796695021015, - "grad_norm": 0.408203125, - "learning_rate": 0.00010276368877786598, - "loss": 1.0141, - "step": 18895 - }, - { - "epoch": 0.5422231147705593, - "grad_norm": 0.37890625, - "learning_rate": 0.00010271363435866765, - "loss": 0.9454, - "step": 18900 - }, - { - "epoch": 0.5423665600390171, - "grad_norm": 0.376953125, - "learning_rate": 0.00010266357925907293, - "loss": 0.925, - "step": 18905 - }, - { - "epoch": 0.542510005307475, - "grad_norm": 0.3984375, - "learning_rate": 0.00010261352349163218, - "loss": 1.0277, - "step": 18910 - }, - { - "epoch": 0.5426534505759327, - "grad_norm": 0.375, - "learning_rate": 0.00010256346706889604, - "loss": 0.999, - "step": 18915 - }, - { - "epoch": 0.5427968958443906, - "grad_norm": 0.3671875, - "learning_rate": 0.00010251341000341528, - "loss": 0.9752, - "step": 18920 - }, - { - "epoch": 0.5429403411128484, - "grad_norm": 0.37109375, - "learning_rate": 0.00010246335230774083, - "loss": 0.9511, - "step": 18925 - }, - { - "epoch": 0.5430837863813062, - "grad_norm": 0.359375, - "learning_rate": 0.00010241329399442379, - "loss": 0.9264, - "step": 18930 - }, - { - "epoch": 0.543227231649764, - "grad_norm": 0.3984375, - "learning_rate": 0.00010236323507601541, - "loss": 1.0965, - "step": 18935 - }, - { - "epoch": 0.5433706769182218, - "grad_norm": 0.447265625, - "learning_rate": 0.00010231317556506708, - "loss": 0.9873, - "step": 18940 - }, - { - "epoch": 0.5435141221866797, - "grad_norm": 0.40234375, - "learning_rate": 0.00010226311547413037, - "loss": 1.0404, - "step": 18945 - }, - { - "epoch": 0.5436575674551375, - "grad_norm": 0.369140625, - "learning_rate": 0.00010221305481575696, - "loss": 0.9429, - "step": 18950 - }, - { - "epoch": 0.5438010127235953, - "grad_norm": 0.400390625, - "learning_rate": 0.00010216299360249866, - "loss": 0.963, - "step": 18955 - }, - { - "epoch": 0.5439444579920532, - "grad_norm": 0.41015625, - "learning_rate": 0.00010211293184690751, - "loss": 0.9544, - "step": 18960 - }, - { - "epoch": 0.5440879032605109, - "grad_norm": 0.388671875, - "learning_rate": 0.00010206286956153554, - "loss": 0.8914, - "step": 18965 - }, - { - "epoch": 0.5442313485289688, - "grad_norm": 0.349609375, - "learning_rate": 0.00010201280675893507, - "loss": 0.9382, - "step": 18970 - }, - { - "epoch": 0.5443747937974266, - "grad_norm": 0.392578125, - "learning_rate": 0.00010196274345165841, - "loss": 0.861, - "step": 18975 - }, - { - "epoch": 0.5445182390658844, - "grad_norm": 0.41796875, - "learning_rate": 0.00010191267965225811, - "loss": 1.0212, - "step": 18980 - }, - { - "epoch": 0.5446616843343423, - "grad_norm": 0.41015625, - "learning_rate": 0.00010186261537328676, - "loss": 0.8776, - "step": 18985 - }, - { - "epoch": 0.5448051296028, - "grad_norm": 0.388671875, - "learning_rate": 0.00010181255062729713, - "loss": 0.8721, - "step": 18990 - }, - { - "epoch": 0.5449485748712579, - "grad_norm": 0.376953125, - "learning_rate": 0.00010176248542684208, - "loss": 0.8883, - "step": 18995 - }, - { - "epoch": 0.5450920201397157, - "grad_norm": 0.39453125, - "learning_rate": 0.00010171241978447455, - "loss": 0.9688, - "step": 19000 - }, - { - "epoch": 0.5452354654081735, - "grad_norm": 0.3828125, - "learning_rate": 0.00010166235371274768, - "loss": 0.9101, - "step": 19005 - }, - { - "epoch": 0.5453789106766314, - "grad_norm": 0.396484375, - "learning_rate": 0.0001016122872242146, - "loss": 0.9229, - "step": 19010 - }, - { - "epoch": 0.5455223559450891, - "grad_norm": 0.408203125, - "learning_rate": 0.00010156222033142868, - "loss": 0.9724, - "step": 19015 - }, - { - "epoch": 0.545665801213547, - "grad_norm": 0.419921875, - "learning_rate": 0.00010151215304694324, - "loss": 0.8807, - "step": 19020 - }, - { - "epoch": 0.5458092464820048, - "grad_norm": 0.408203125, - "learning_rate": 0.00010146208538331183, - "loss": 1.016, - "step": 19025 - }, - { - "epoch": 0.5459526917504626, - "grad_norm": 0.369140625, - "learning_rate": 0.00010141201735308805, - "loss": 0.8015, - "step": 19030 - }, - { - "epoch": 0.5460961370189205, - "grad_norm": 0.408203125, - "learning_rate": 0.00010136194896882558, - "loss": 0.8713, - "step": 19035 - }, - { - "epoch": 0.5462395822873782, - "grad_norm": 0.40234375, - "learning_rate": 0.00010131188024307817, - "loss": 0.9353, - "step": 19040 - }, - { - "epoch": 0.5463830275558361, - "grad_norm": 0.40625, - "learning_rate": 0.00010126181118839966, - "loss": 1.0191, - "step": 19045 - }, - { - "epoch": 0.5465264728242939, - "grad_norm": 0.447265625, - "learning_rate": 0.00010121174181734405, - "loss": 0.9156, - "step": 19050 - }, - { - "epoch": 0.5466699180927517, - "grad_norm": 0.384765625, - "learning_rate": 0.00010116167214246532, - "loss": 0.928, - "step": 19055 - }, - { - "epoch": 0.5468133633612096, - "grad_norm": 0.33984375, - "learning_rate": 0.00010111160217631756, - "loss": 1.0087, - "step": 19060 - }, - { - "epoch": 0.5469568086296673, - "grad_norm": 0.388671875, - "learning_rate": 0.000101061531931455, - "loss": 0.9161, - "step": 19065 - }, - { - "epoch": 0.5471002538981252, - "grad_norm": 0.373046875, - "learning_rate": 0.00010101146142043178, - "loss": 1.0885, - "step": 19070 - }, - { - "epoch": 0.547243699166583, - "grad_norm": 0.421875, - "learning_rate": 0.00010096139065580233, - "loss": 0.8919, - "step": 19075 - }, - { - "epoch": 0.5473871444350408, - "grad_norm": 0.400390625, - "learning_rate": 0.0001009113196501209, - "loss": 0.9114, - "step": 19080 - }, - { - "epoch": 0.5475305897034987, - "grad_norm": 0.400390625, - "learning_rate": 0.00010086124841594203, - "loss": 0.8846, - "step": 19085 - }, - { - "epoch": 0.5476740349719564, - "grad_norm": 0.390625, - "learning_rate": 0.0001008111769658201, - "loss": 0.9202, - "step": 19090 - }, - { - "epoch": 0.5478174802404143, - "grad_norm": 0.416015625, - "learning_rate": 0.00010076110531230973, - "loss": 0.9123, - "step": 19095 - }, - { - "epoch": 0.5479609255088721, - "grad_norm": 0.380859375, - "learning_rate": 0.00010071103346796549, - "loss": 0.9072, - "step": 19100 - }, - { - "epoch": 0.5481043707773299, - "grad_norm": 0.400390625, - "learning_rate": 0.000100660961445342, - "loss": 0.8626, - "step": 19105 - }, - { - "epoch": 0.5482478160457878, - "grad_norm": 0.376953125, - "learning_rate": 0.000100610889256994, - "loss": 0.9755, - "step": 19110 - }, - { - "epoch": 0.5483912613142455, - "grad_norm": 0.515625, - "learning_rate": 0.00010056081691547614, - "loss": 1.0637, - "step": 19115 - }, - { - "epoch": 0.5485347065827034, - "grad_norm": 0.380859375, - "learning_rate": 0.00010051074443334327, - "loss": 0.9364, - "step": 19120 - }, - { - "epoch": 0.5486781518511612, - "grad_norm": 0.3203125, - "learning_rate": 0.00010046067182315013, - "loss": 0.8618, - "step": 19125 - }, - { - "epoch": 0.548821597119619, - "grad_norm": 0.375, - "learning_rate": 0.00010041059909745156, - "loss": 0.9649, - "step": 19130 - }, - { - "epoch": 0.5489650423880769, - "grad_norm": 0.3984375, - "learning_rate": 0.00010036052626880246, - "loss": 0.9051, - "step": 19135 - }, - { - "epoch": 0.5491084876565346, - "grad_norm": 0.373046875, - "learning_rate": 0.00010031045334975768, - "loss": 1.0171, - "step": 19140 - }, - { - "epoch": 0.5492519329249924, - "grad_norm": 0.380859375, - "learning_rate": 0.00010026038035287216, - "loss": 0.9686, - "step": 19145 - }, - { - "epoch": 0.5493953781934503, - "grad_norm": 0.380859375, - "learning_rate": 0.00010021030729070076, - "loss": 1.0254, - "step": 19150 - }, - { - "epoch": 0.5495388234619081, - "grad_norm": 0.35546875, - "learning_rate": 0.00010016023417579852, - "loss": 0.9477, - "step": 19155 - }, - { - "epoch": 0.549682268730366, - "grad_norm": 0.423828125, - "learning_rate": 0.00010011016102072033, - "loss": 0.873, - "step": 19160 - }, - { - "epoch": 0.5498257139988237, - "grad_norm": 0.3828125, - "learning_rate": 0.00010006008783802123, - "loss": 0.9602, - "step": 19165 - }, - { - "epoch": 0.5499691592672815, - "grad_norm": 0.369140625, - "learning_rate": 0.0001000100146402561, - "loss": 0.945, - "step": 19170 - }, - { - "epoch": 0.5501126045357394, - "grad_norm": 0.365234375, - "learning_rate": 9.995994143998002e-05, - "loss": 0.8068, - "step": 19175 - }, - { - "epoch": 0.5502560498041972, - "grad_norm": 0.35546875, - "learning_rate": 9.990986824974788e-05, - "loss": 0.9287, - "step": 19180 - }, - { - "epoch": 0.5503994950726551, - "grad_norm": 0.384765625, - "learning_rate": 9.985979508211472e-05, - "loss": 0.9588, - "step": 19185 - }, - { - "epoch": 0.5505429403411128, - "grad_norm": 0.404296875, - "learning_rate": 9.980972194963552e-05, - "loss": 0.97, - "step": 19190 - }, - { - "epoch": 0.5506863856095706, - "grad_norm": 0.40234375, - "learning_rate": 9.975964886486522e-05, - "loss": 0.9136, - "step": 19195 - }, - { - "epoch": 0.5508298308780285, - "grad_norm": 0.400390625, - "learning_rate": 9.970957584035873e-05, - "loss": 0.9093, - "step": 19200 - }, - { - "epoch": 0.5509732761464863, - "grad_norm": 0.3828125, - "learning_rate": 9.965950288867106e-05, - "loss": 0.9383, - "step": 19205 - }, - { - "epoch": 0.5511167214149442, - "grad_norm": 0.435546875, - "learning_rate": 9.96094300223571e-05, - "loss": 1.1316, - "step": 19210 - }, - { - "epoch": 0.5512601666834019, - "grad_norm": 0.404296875, - "learning_rate": 9.955935725397174e-05, - "loss": 0.8931, - "step": 19215 - }, - { - "epoch": 0.5514036119518597, - "grad_norm": 0.421875, - "learning_rate": 9.950928459606984e-05, - "loss": 0.9679, - "step": 19220 - }, - { - "epoch": 0.5515470572203176, - "grad_norm": 0.44921875, - "learning_rate": 9.945921206120628e-05, - "loss": 0.9393, - "step": 19225 - }, - { - "epoch": 0.5516905024887754, - "grad_norm": 0.388671875, - "learning_rate": 9.940913966193586e-05, - "loss": 0.8589, - "step": 19230 - }, - { - "epoch": 0.5518339477572333, - "grad_norm": 0.39453125, - "learning_rate": 9.935906741081332e-05, - "loss": 1.1018, - "step": 19235 - }, - { - "epoch": 0.551977393025691, - "grad_norm": 0.40625, - "learning_rate": 9.930899532039347e-05, - "loss": 0.9583, - "step": 19240 - }, - { - "epoch": 0.5521208382941488, - "grad_norm": 0.3828125, - "learning_rate": 9.9258923403231e-05, - "loss": 0.9082, - "step": 19245 - }, - { - "epoch": 0.5522642835626067, - "grad_norm": 0.404296875, - "learning_rate": 9.920885167188054e-05, - "loss": 0.9512, - "step": 19250 - }, - { - "epoch": 0.5524077288310645, - "grad_norm": 0.373046875, - "learning_rate": 9.915878013889665e-05, - "loss": 0.9939, - "step": 19255 - }, - { - "epoch": 0.5525511740995224, - "grad_norm": 0.390625, - "learning_rate": 9.910870881683402e-05, - "loss": 0.8451, - "step": 19260 - }, - { - "epoch": 0.5526946193679801, - "grad_norm": 0.408203125, - "learning_rate": 9.905863771824709e-05, - "loss": 1.057, - "step": 19265 - }, - { - "epoch": 0.5528380646364379, - "grad_norm": 0.384765625, - "learning_rate": 9.900856685569027e-05, - "loss": 0.889, - "step": 19270 - }, - { - "epoch": 0.5529815099048958, - "grad_norm": 0.37109375, - "learning_rate": 9.895849624171807e-05, - "loss": 0.8898, - "step": 19275 - }, - { - "epoch": 0.5531249551733536, - "grad_norm": 0.384765625, - "learning_rate": 9.890842588888474e-05, - "loss": 0.9206, - "step": 19280 - }, - { - "epoch": 0.5532684004418115, - "grad_norm": 0.353515625, - "learning_rate": 9.885835580974458e-05, - "loss": 0.8825, - "step": 19285 - }, - { - "epoch": 0.5534118457102692, - "grad_norm": 0.43359375, - "learning_rate": 9.88082860168517e-05, - "loss": 1.0773, - "step": 19290 - }, - { - "epoch": 0.553555290978727, - "grad_norm": 0.416015625, - "learning_rate": 9.875821652276037e-05, - "loss": 1.1513, - "step": 19295 - }, - { - "epoch": 0.5536987362471849, - "grad_norm": 0.37109375, - "learning_rate": 9.870814734002456e-05, - "loss": 0.92, - "step": 19300 - }, - { - "epoch": 0.5538421815156427, - "grad_norm": 0.37890625, - "learning_rate": 9.865807848119823e-05, - "loss": 0.9375, - "step": 19305 - }, - { - "epoch": 0.5539856267841006, - "grad_norm": 0.384765625, - "learning_rate": 9.860800995883533e-05, - "loss": 1.007, - "step": 19310 - }, - { - "epoch": 0.5541290720525583, - "grad_norm": 0.37109375, - "learning_rate": 9.855794178548961e-05, - "loss": 0.9276, - "step": 19315 - }, - { - "epoch": 0.5542725173210161, - "grad_norm": 0.478515625, - "learning_rate": 9.850787397371482e-05, - "loss": 1.0477, - "step": 19320 - }, - { - "epoch": 0.554415962589474, - "grad_norm": 0.380859375, - "learning_rate": 9.845780653606456e-05, - "loss": 0.9475, - "step": 19325 - }, - { - "epoch": 0.5545594078579318, - "grad_norm": 0.376953125, - "learning_rate": 9.840773948509243e-05, - "loss": 1.0315, - "step": 19330 - }, - { - "epoch": 0.5547028531263897, - "grad_norm": 0.396484375, - "learning_rate": 9.835767283335184e-05, - "loss": 0.9122, - "step": 19335 - }, - { - "epoch": 0.5548462983948474, - "grad_norm": 0.37890625, - "learning_rate": 9.83076065933961e-05, - "loss": 0.9313, - "step": 19340 - }, - { - "epoch": 0.5549897436633052, - "grad_norm": 0.37109375, - "learning_rate": 9.825754077777842e-05, - "loss": 0.9787, - "step": 19345 - }, - { - "epoch": 0.5551331889317631, - "grad_norm": 0.376953125, - "learning_rate": 9.820747539905202e-05, - "loss": 1.0773, - "step": 19350 - }, - { - "epoch": 0.5552766342002209, - "grad_norm": 0.404296875, - "learning_rate": 9.815741046976988e-05, - "loss": 0.8703, - "step": 19355 - }, - { - "epoch": 0.5554200794686788, - "grad_norm": 0.392578125, - "learning_rate": 9.810734600248486e-05, - "loss": 0.9107, - "step": 19360 - }, - { - "epoch": 0.5555635247371365, - "grad_norm": 0.375, - "learning_rate": 9.805728200974984e-05, - "loss": 0.9768, - "step": 19365 - }, - { - "epoch": 0.5557069700055943, - "grad_norm": 0.408203125, - "learning_rate": 9.800721850411743e-05, - "loss": 0.8807, - "step": 19370 - }, - { - "epoch": 0.5558504152740522, - "grad_norm": 0.3828125, - "learning_rate": 9.79571554981402e-05, - "loss": 0.9435, - "step": 19375 - }, - { - "epoch": 0.55599386054251, - "grad_norm": 0.36328125, - "learning_rate": 9.790709300437052e-05, - "loss": 0.9609, - "step": 19380 - }, - { - "epoch": 0.5561373058109679, - "grad_norm": 0.416015625, - "learning_rate": 9.785703103536079e-05, - "loss": 0.9484, - "step": 19385 - }, - { - "epoch": 0.5562807510794257, - "grad_norm": 0.404296875, - "learning_rate": 9.780696960366311e-05, - "loss": 0.889, - "step": 19390 - }, - { - "epoch": 0.5564241963478834, - "grad_norm": 0.404296875, - "learning_rate": 9.77569087218295e-05, - "loss": 0.9312, - "step": 19395 - }, - { - "epoch": 0.5565676416163413, - "grad_norm": 0.396484375, - "learning_rate": 9.770684840241191e-05, - "loss": 1.0092, - "step": 19400 - }, - { - "epoch": 0.5567110868847991, - "grad_norm": 0.375, - "learning_rate": 9.765678865796204e-05, - "loss": 0.9261, - "step": 19405 - }, - { - "epoch": 0.556854532153257, - "grad_norm": 0.37109375, - "learning_rate": 9.76067295010315e-05, - "loss": 0.87, - "step": 19410 - }, - { - "epoch": 0.5569979774217148, - "grad_norm": 0.38671875, - "learning_rate": 9.755667094417176e-05, - "loss": 0.9694, - "step": 19415 - }, - { - "epoch": 0.5571414226901725, - "grad_norm": 0.37109375, - "learning_rate": 9.750661299993415e-05, - "loss": 0.8006, - "step": 19420 - }, - { - "epoch": 0.5572848679586304, - "grad_norm": 0.36328125, - "learning_rate": 9.74565556808698e-05, - "loss": 0.8875, - "step": 19425 - }, - { - "epoch": 0.5574283132270882, - "grad_norm": 0.34765625, - "learning_rate": 9.740649899952967e-05, - "loss": 0.9457, - "step": 19430 - }, - { - "epoch": 0.5575717584955461, - "grad_norm": 0.376953125, - "learning_rate": 9.73564429684647e-05, - "loss": 0.9682, - "step": 19435 - }, - { - "epoch": 0.5577152037640039, - "grad_norm": 0.345703125, - "learning_rate": 9.73063876002255e-05, - "loss": 0.8618, - "step": 19440 - }, - { - "epoch": 0.5578586490324616, - "grad_norm": 0.380859375, - "learning_rate": 9.725633290736258e-05, - "loss": 0.8742, - "step": 19445 - }, - { - "epoch": 0.5580020943009195, - "grad_norm": 0.421875, - "learning_rate": 9.720627890242628e-05, - "loss": 0.9387, - "step": 19450 - }, - { - "epoch": 0.5581455395693773, - "grad_norm": 0.37109375, - "learning_rate": 9.71562255979668e-05, - "loss": 1.0532, - "step": 19455 - }, - { - "epoch": 0.5582889848378352, - "grad_norm": 0.4140625, - "learning_rate": 9.710617300653412e-05, - "loss": 1.0155, - "step": 19460 - }, - { - "epoch": 0.558432430106293, - "grad_norm": 0.3671875, - "learning_rate": 9.705612114067799e-05, - "loss": 0.9372, - "step": 19465 - }, - { - "epoch": 0.5585758753747507, - "grad_norm": 0.3515625, - "learning_rate": 9.700607001294814e-05, - "loss": 0.8107, - "step": 19470 - }, - { - "epoch": 0.5587193206432086, - "grad_norm": 0.41796875, - "learning_rate": 9.6956019635894e-05, - "loss": 0.9873, - "step": 19475 - }, - { - "epoch": 0.5588627659116664, - "grad_norm": 0.44921875, - "learning_rate": 9.690597002206477e-05, - "loss": 0.9514, - "step": 19480 - }, - { - "epoch": 0.5590062111801242, - "grad_norm": 0.376953125, - "learning_rate": 9.685592118400953e-05, - "loss": 0.8878, - "step": 19485 - }, - { - "epoch": 0.559149656448582, - "grad_norm": 0.376953125, - "learning_rate": 9.68058731342772e-05, - "loss": 0.9926, - "step": 19490 - }, - { - "epoch": 0.5592931017170398, - "grad_norm": 0.345703125, - "learning_rate": 9.67558258854164e-05, - "loss": 0.9274, - "step": 19495 - }, - { - "epoch": 0.5594365469854977, - "grad_norm": 0.404296875, - "learning_rate": 9.670577944997566e-05, - "loss": 0.9333, - "step": 19500 - }, - { - "epoch": 0.5595799922539555, - "grad_norm": 0.388671875, - "learning_rate": 9.665573384050319e-05, - "loss": 0.948, - "step": 19505 - }, - { - "epoch": 0.5597234375224133, - "grad_norm": 0.392578125, - "learning_rate": 9.660568906954711e-05, - "loss": 0.9441, - "step": 19510 - }, - { - "epoch": 0.5598668827908712, - "grad_norm": 0.384765625, - "learning_rate": 9.655564514965526e-05, - "loss": 0.9803, - "step": 19515 - }, - { - "epoch": 0.5600103280593289, - "grad_norm": 0.408203125, - "learning_rate": 9.65056020933752e-05, - "loss": 0.9331, - "step": 19520 - }, - { - "epoch": 0.5601537733277868, - "grad_norm": 0.37109375, - "learning_rate": 9.645555991325446e-05, - "loss": 1.0118, - "step": 19525 - }, - { - "epoch": 0.5602972185962446, - "grad_norm": 0.451171875, - "learning_rate": 9.640551862184021e-05, - "loss": 0.9412, - "step": 19530 - }, - { - "epoch": 0.5604406638647024, - "grad_norm": 0.443359375, - "learning_rate": 9.63554782316794e-05, - "loss": 1.0649, - "step": 19535 - }, - { - "epoch": 0.5605841091331603, - "grad_norm": 0.5546875, - "learning_rate": 9.630543875531879e-05, - "loss": 0.8632, - "step": 19540 - }, - { - "epoch": 0.560727554401618, - "grad_norm": 0.3671875, - "learning_rate": 9.625540020530496e-05, - "loss": 0.8464, - "step": 19545 - }, - { - "epoch": 0.5608709996700759, - "grad_norm": 0.396484375, - "learning_rate": 9.620536259418416e-05, - "loss": 0.9371, - "step": 19550 - }, - { - "epoch": 0.5610144449385337, - "grad_norm": 0.35546875, - "learning_rate": 9.61553259345024e-05, - "loss": 1.0948, - "step": 19555 - }, - { - "epoch": 0.5611578902069915, - "grad_norm": 0.392578125, - "learning_rate": 9.610529023880561e-05, - "loss": 1.0418, - "step": 19560 - }, - { - "epoch": 0.5613013354754494, - "grad_norm": 0.369140625, - "learning_rate": 9.605525551963929e-05, - "loss": 0.9478, - "step": 19565 - }, - { - "epoch": 0.5614447807439071, - "grad_norm": 0.353515625, - "learning_rate": 9.600522178954879e-05, - "loss": 0.9493, - "step": 19570 - }, - { - "epoch": 0.561588226012365, - "grad_norm": 0.37109375, - "learning_rate": 9.595518906107918e-05, - "loss": 0.9851, - "step": 19575 - }, - { - "epoch": 0.5617316712808228, - "grad_norm": 0.3828125, - "learning_rate": 9.590515734677531e-05, - "loss": 0.9521, - "step": 19580 - }, - { - "epoch": 0.5618751165492806, - "grad_norm": 0.41015625, - "learning_rate": 9.585512665918177e-05, - "loss": 0.9843, - "step": 19585 - }, - { - "epoch": 0.5620185618177385, - "grad_norm": 0.40234375, - "learning_rate": 9.580509701084286e-05, - "loss": 0.8486, - "step": 19590 - }, - { - "epoch": 0.5621620070861962, - "grad_norm": 0.373046875, - "learning_rate": 9.575506841430266e-05, - "loss": 0.8768, - "step": 19595 - }, - { - "epoch": 0.5623054523546541, - "grad_norm": 0.41796875, - "learning_rate": 9.570504088210496e-05, - "loss": 1.0272, - "step": 19600 - }, - { - "epoch": 0.5624488976231119, - "grad_norm": 0.43359375, - "learning_rate": 9.565501442679329e-05, - "loss": 0.8792, - "step": 19605 - }, - { - "epoch": 0.5625923428915697, - "grad_norm": 0.404296875, - "learning_rate": 9.560498906091085e-05, - "loss": 0.9746, - "step": 19610 - }, - { - "epoch": 0.5627357881600276, - "grad_norm": 0.4140625, - "learning_rate": 9.555496479700074e-05, - "loss": 0.999, - "step": 19615 - }, - { - "epoch": 0.5628792334284853, - "grad_norm": 0.384765625, - "learning_rate": 9.550494164760562e-05, - "loss": 0.9568, - "step": 19620 - }, - { - "epoch": 0.5630226786969432, - "grad_norm": 0.404296875, - "learning_rate": 9.545491962526788e-05, - "loss": 0.9561, - "step": 19625 - }, - { - "epoch": 0.563166123965401, - "grad_norm": 0.388671875, - "learning_rate": 9.540489874252972e-05, - "loss": 0.967, - "step": 19630 - }, - { - "epoch": 0.5633095692338588, - "grad_norm": 0.384765625, - "learning_rate": 9.535487901193302e-05, - "loss": 1.0305, - "step": 19635 - }, - { - "epoch": 0.5634530145023167, - "grad_norm": 0.375, - "learning_rate": 9.53048604460193e-05, - "loss": 0.9585, - "step": 19640 - }, - { - "epoch": 0.5635964597707744, - "grad_norm": 0.396484375, - "learning_rate": 9.525484305732985e-05, - "loss": 0.856, - "step": 19645 - }, - { - "epoch": 0.5637399050392323, - "grad_norm": 0.37109375, - "learning_rate": 9.52048268584057e-05, - "loss": 0.8915, - "step": 19650 - }, - { - "epoch": 0.5638833503076901, - "grad_norm": 0.435546875, - "learning_rate": 9.51548118617875e-05, - "loss": 0.9484, - "step": 19655 - }, - { - "epoch": 0.5640267955761479, - "grad_norm": 0.38671875, - "learning_rate": 9.510479808001566e-05, - "loss": 0.9965, - "step": 19660 - }, - { - "epoch": 0.5641702408446058, - "grad_norm": 0.396484375, - "learning_rate": 9.505478552563023e-05, - "loss": 1.0383, - "step": 19665 - }, - { - "epoch": 0.5643136861130635, - "grad_norm": 0.380859375, - "learning_rate": 9.500477421117102e-05, - "loss": 0.8067, - "step": 19670 - }, - { - "epoch": 0.5644571313815214, - "grad_norm": 0.423828125, - "learning_rate": 9.495476414917749e-05, - "loss": 1.0376, - "step": 19675 - }, - { - "epoch": 0.5646005766499792, - "grad_norm": 0.3828125, - "learning_rate": 9.490475535218875e-05, - "loss": 0.8338, - "step": 19680 - }, - { - "epoch": 0.564744021918437, - "grad_norm": 0.373046875, - "learning_rate": 9.485474783274371e-05, - "loss": 0.9427, - "step": 19685 - }, - { - "epoch": 0.5648874671868949, - "grad_norm": 0.41796875, - "learning_rate": 9.480474160338082e-05, - "loss": 0.899, - "step": 19690 - }, - { - "epoch": 0.5650309124553526, - "grad_norm": 0.37109375, - "learning_rate": 9.475473667663829e-05, - "loss": 0.8461, - "step": 19695 - }, - { - "epoch": 0.5651743577238105, - "grad_norm": 0.388671875, - "learning_rate": 9.470473306505392e-05, - "loss": 0.9438, - "step": 19700 - }, - { - "epoch": 0.5653178029922683, - "grad_norm": 0.404296875, - "learning_rate": 9.465473078116536e-05, - "loss": 0.9196, - "step": 19705 - }, - { - "epoch": 0.5654612482607261, - "grad_norm": 0.38671875, - "learning_rate": 9.460472983750977e-05, - "loss": 0.9409, - "step": 19710 - }, - { - "epoch": 0.565604693529184, - "grad_norm": 0.376953125, - "learning_rate": 9.455473024662393e-05, - "loss": 0.9545, - "step": 19715 - }, - { - "epoch": 0.5657481387976417, - "grad_norm": 0.388671875, - "learning_rate": 9.450473202104448e-05, - "loss": 1.0432, - "step": 19720 - }, - { - "epoch": 0.5658915840660996, - "grad_norm": 0.3984375, - "learning_rate": 9.445473517330753e-05, - "loss": 0.9736, - "step": 19725 - }, - { - "epoch": 0.5660350293345574, - "grad_norm": 0.384765625, - "learning_rate": 9.440473971594895e-05, - "loss": 1.0114, - "step": 19730 - }, - { - "epoch": 0.5661784746030152, - "grad_norm": 0.3828125, - "learning_rate": 9.435474566150418e-05, - "loss": 0.9755, - "step": 19735 - }, - { - "epoch": 0.5663219198714731, - "grad_norm": 0.3828125, - "learning_rate": 9.430475302250844e-05, - "loss": 0.988, - "step": 19740 - }, - { - "epoch": 0.5664653651399308, - "grad_norm": 0.443359375, - "learning_rate": 9.425476181149646e-05, - "loss": 1.0047, - "step": 19745 - }, - { - "epoch": 0.5666088104083887, - "grad_norm": 0.38671875, - "learning_rate": 9.420477204100264e-05, - "loss": 0.9029, - "step": 19750 - }, - { - "epoch": 0.5667522556768465, - "grad_norm": 0.427734375, - "learning_rate": 9.415478372356109e-05, - "loss": 1.0412, - "step": 19755 - }, - { - "epoch": 0.5668957009453043, - "grad_norm": 0.376953125, - "learning_rate": 9.41047968717055e-05, - "loss": 0.9351, - "step": 19760 - }, - { - "epoch": 0.5670391462137622, - "grad_norm": 0.37890625, - "learning_rate": 9.405481149796918e-05, - "loss": 0.9373, - "step": 19765 - }, - { - "epoch": 0.5671825914822199, - "grad_norm": 0.353515625, - "learning_rate": 9.400482761488507e-05, - "loss": 0.9283, - "step": 19770 - }, - { - "epoch": 0.5673260367506778, - "grad_norm": 0.380859375, - "learning_rate": 9.395484523498582e-05, - "loss": 0.9202, - "step": 19775 - }, - { - "epoch": 0.5674694820191356, - "grad_norm": 0.3828125, - "learning_rate": 9.390486437080361e-05, - "loss": 0.8736, - "step": 19780 - }, - { - "epoch": 0.5676129272875934, - "grad_norm": 0.392578125, - "learning_rate": 9.385488503487026e-05, - "loss": 0.9279, - "step": 19785 - }, - { - "epoch": 0.5677563725560513, - "grad_norm": 0.40625, - "learning_rate": 9.380490723971717e-05, - "loss": 0.9247, - "step": 19790 - }, - { - "epoch": 0.567899817824509, - "grad_norm": 0.400390625, - "learning_rate": 9.37549309978755e-05, - "loss": 0.9728, - "step": 19795 - }, - { - "epoch": 0.5680432630929669, - "grad_norm": 0.44140625, - "learning_rate": 9.370495632187587e-05, - "loss": 0.9226, - "step": 19800 - }, - { - "epoch": 0.5681867083614247, - "grad_norm": 0.4140625, - "learning_rate": 9.365498322424851e-05, - "loss": 0.8661, - "step": 19805 - }, - { - "epoch": 0.5683301536298825, - "grad_norm": 0.458984375, - "learning_rate": 9.360501171752339e-05, - "loss": 1.061, - "step": 19810 - }, - { - "epoch": 0.5684735988983404, - "grad_norm": 0.373046875, - "learning_rate": 9.355504181422996e-05, - "loss": 0.8793, - "step": 19815 - }, - { - "epoch": 0.5686170441667981, - "grad_norm": 0.40625, - "learning_rate": 9.35050735268973e-05, - "loss": 1.0114, - "step": 19820 - }, - { - "epoch": 0.5687604894352559, - "grad_norm": 0.40625, - "learning_rate": 9.345510686805402e-05, - "loss": 0.9072, - "step": 19825 - }, - { - "epoch": 0.5689039347037138, - "grad_norm": 0.3984375, - "learning_rate": 9.340514185022851e-05, - "loss": 1.0032, - "step": 19830 - }, - { - "epoch": 0.5690473799721716, - "grad_norm": 0.4140625, - "learning_rate": 9.335517848594854e-05, - "loss": 0.9427, - "step": 19835 - }, - { - "epoch": 0.5691908252406295, - "grad_norm": 0.3984375, - "learning_rate": 9.330521678774157e-05, - "loss": 0.9654, - "step": 19840 - }, - { - "epoch": 0.5693342705090872, - "grad_norm": 0.376953125, - "learning_rate": 9.325525676813461e-05, - "loss": 0.8858, - "step": 19845 - }, - { - "epoch": 0.569477715777545, - "grad_norm": 0.373046875, - "learning_rate": 9.320529843965432e-05, - "loss": 0.9358, - "step": 19850 - }, - { - "epoch": 0.5696211610460029, - "grad_norm": 0.416015625, - "learning_rate": 9.315534181482683e-05, - "loss": 0.9526, - "step": 19855 - }, - { - "epoch": 0.5697646063144607, - "grad_norm": 0.375, - "learning_rate": 9.310538690617788e-05, - "loss": 0.9345, - "step": 19860 - }, - { - "epoch": 0.5699080515829186, - "grad_norm": 0.419921875, - "learning_rate": 9.305543372623285e-05, - "loss": 1.0049, - "step": 19865 - }, - { - "epoch": 0.5700514968513763, - "grad_norm": 0.353515625, - "learning_rate": 9.300548228751657e-05, - "loss": 0.8882, - "step": 19870 - }, - { - "epoch": 0.5701949421198341, - "grad_norm": 0.349609375, - "learning_rate": 9.295553260255347e-05, - "loss": 0.9745, - "step": 19875 - }, - { - "epoch": 0.570338387388292, - "grad_norm": 0.392578125, - "learning_rate": 9.290558468386765e-05, - "loss": 0.9653, - "step": 19880 - }, - { - "epoch": 0.5704818326567498, - "grad_norm": 0.416015625, - "learning_rate": 9.285563854398264e-05, - "loss": 0.951, - "step": 19885 - }, - { - "epoch": 0.5706252779252077, - "grad_norm": 0.474609375, - "learning_rate": 9.280569419542154e-05, - "loss": 1.0817, - "step": 19890 - }, - { - "epoch": 0.5707687231936655, - "grad_norm": 0.365234375, - "learning_rate": 9.275575165070701e-05, - "loss": 0.8962, - "step": 19895 - }, - { - "epoch": 0.5709121684621232, - "grad_norm": 0.384765625, - "learning_rate": 9.270581092236134e-05, - "loss": 0.9201, - "step": 19900 - }, - { - "epoch": 0.5710556137305811, - "grad_norm": 0.3828125, - "learning_rate": 9.265587202290625e-05, - "loss": 1.014, - "step": 19905 - }, - { - "epoch": 0.5711990589990389, - "grad_norm": 0.404296875, - "learning_rate": 9.260593496486302e-05, - "loss": 0.9726, - "step": 19910 - }, - { - "epoch": 0.5713425042674968, - "grad_norm": 0.328125, - "learning_rate": 9.255599976075256e-05, - "loss": 0.9619, - "step": 19915 - }, - { - "epoch": 0.5714859495359546, - "grad_norm": 0.388671875, - "learning_rate": 9.250606642309523e-05, - "loss": 0.9759, - "step": 19920 - }, - { - "epoch": 0.5716293948044123, - "grad_norm": 0.427734375, - "learning_rate": 9.245613496441093e-05, - "loss": 0.8686, - "step": 19925 - }, - { - "epoch": 0.5717728400728702, - "grad_norm": 0.3984375, - "learning_rate": 9.240620539721904e-05, - "loss": 0.9623, - "step": 19930 - }, - { - "epoch": 0.571916285341328, - "grad_norm": 0.390625, - "learning_rate": 9.235627773403866e-05, - "loss": 0.9285, - "step": 19935 - }, - { - "epoch": 0.5720597306097859, - "grad_norm": 0.37109375, - "learning_rate": 9.23063519873882e-05, - "loss": 0.9079, - "step": 19940 - }, - { - "epoch": 0.5722031758782437, - "grad_norm": 0.4453125, - "learning_rate": 9.22564281697857e-05, - "loss": 0.878, - "step": 19945 - }, - { - "epoch": 0.5723466211467014, - "grad_norm": 0.392578125, - "learning_rate": 9.220650629374862e-05, - "loss": 0.9632, - "step": 19950 - }, - { - "epoch": 0.5724900664151593, - "grad_norm": 0.384765625, - "learning_rate": 9.215658637179407e-05, - "loss": 1.0049, - "step": 19955 - }, - { - "epoch": 0.5726335116836171, - "grad_norm": 0.41796875, - "learning_rate": 9.210666841643857e-05, - "loss": 0.9034, - "step": 19960 - }, - { - "epoch": 0.572776956952075, - "grad_norm": 0.390625, - "learning_rate": 9.205675244019816e-05, - "loss": 0.9009, - "step": 19965 - }, - { - "epoch": 0.5729204022205328, - "grad_norm": 0.357421875, - "learning_rate": 9.200683845558845e-05, - "loss": 1.0399, - "step": 19970 - }, - { - "epoch": 0.5730638474889905, - "grad_norm": 0.396484375, - "learning_rate": 9.195692647512447e-05, - "loss": 1.0546, - "step": 19975 - }, - { - "epoch": 0.5732072927574484, - "grad_norm": 0.36328125, - "learning_rate": 9.190701651132079e-05, - "loss": 0.8911, - "step": 19980 - }, - { - "epoch": 0.5733507380259062, - "grad_norm": 0.388671875, - "learning_rate": 9.185710857669139e-05, - "loss": 0.9936, - "step": 19985 - }, - { - "epoch": 0.5734941832943641, - "grad_norm": 0.3828125, - "learning_rate": 9.180720268374992e-05, - "loss": 0.9574, - "step": 19990 - }, - { - "epoch": 0.5736376285628219, - "grad_norm": 0.3984375, - "learning_rate": 9.175729884500938e-05, - "loss": 0.9454, - "step": 19995 - }, - { - "epoch": 0.5737810738312796, - "grad_norm": 0.404296875, - "learning_rate": 9.170739707298221e-05, - "loss": 0.9193, - "step": 20000 - }, - { - "epoch": 0.5739245190997375, - "grad_norm": 0.375, - "learning_rate": 9.165749738018052e-05, - "loss": 0.8457, - "step": 20005 - }, - { - "epoch": 0.5740679643681953, - "grad_norm": 0.462890625, - "learning_rate": 9.160759977911576e-05, - "loss": 0.9208, - "step": 20010 - }, - { - "epoch": 0.5742114096366532, - "grad_norm": 0.427734375, - "learning_rate": 9.155770428229883e-05, - "loss": 1.0537, - "step": 20015 - }, - { - "epoch": 0.574354854905111, - "grad_norm": 0.357421875, - "learning_rate": 9.150781090224015e-05, - "loss": 0.9058, - "step": 20020 - }, - { - "epoch": 0.5744983001735687, - "grad_norm": 0.427734375, - "learning_rate": 9.145791965144968e-05, - "loss": 0.9691, - "step": 20025 - }, - { - "epoch": 0.5746417454420266, - "grad_norm": 0.39453125, - "learning_rate": 9.140803054243677e-05, - "loss": 0.9657, - "step": 20030 - }, - { - "epoch": 0.5747851907104844, - "grad_norm": 0.376953125, - "learning_rate": 9.13581435877102e-05, - "loss": 0.8342, - "step": 20035 - }, - { - "epoch": 0.5749286359789423, - "grad_norm": 0.431640625, - "learning_rate": 9.130825879977828e-05, - "loss": 1.0187, - "step": 20040 - }, - { - "epoch": 0.5750720812474001, - "grad_norm": 0.408203125, - "learning_rate": 9.125837619114875e-05, - "loss": 0.9106, - "step": 20045 - }, - { - "epoch": 0.5752155265158578, - "grad_norm": 0.427734375, - "learning_rate": 9.12084957743288e-05, - "loss": 0.9999, - "step": 20050 - }, - { - "epoch": 0.5753589717843157, - "grad_norm": 0.41015625, - "learning_rate": 9.115861756182504e-05, - "loss": 0.9616, - "step": 20055 - }, - { - "epoch": 0.5755024170527735, - "grad_norm": 0.341796875, - "learning_rate": 9.110874156614362e-05, - "loss": 0.9431, - "step": 20060 - }, - { - "epoch": 0.5756458623212314, - "grad_norm": 0.3359375, - "learning_rate": 9.105886779979007e-05, - "loss": 0.8078, - "step": 20065 - }, - { - "epoch": 0.5757893075896892, - "grad_norm": 0.490234375, - "learning_rate": 9.100899627526933e-05, - "loss": 1.0262, - "step": 20070 - }, - { - "epoch": 0.5759327528581469, - "grad_norm": 0.37890625, - "learning_rate": 9.095912700508579e-05, - "loss": 0.8889, - "step": 20075 - }, - { - "epoch": 0.5760761981266048, - "grad_norm": 0.390625, - "learning_rate": 9.090926000174338e-05, - "loss": 0.913, - "step": 20080 - }, - { - "epoch": 0.5762196433950626, - "grad_norm": 0.353515625, - "learning_rate": 9.085939527774534e-05, - "loss": 0.8531, - "step": 20085 - }, - { - "epoch": 0.5763630886635205, - "grad_norm": 0.37890625, - "learning_rate": 9.080953284559433e-05, - "loss": 0.9712, - "step": 20090 - }, - { - "epoch": 0.5765065339319783, - "grad_norm": 0.40625, - "learning_rate": 9.075967271779258e-05, - "loss": 1.0126, - "step": 20095 - }, - { - "epoch": 0.576649979200436, - "grad_norm": 0.384765625, - "learning_rate": 9.070981490684159e-05, - "loss": 0.9059, - "step": 20100 - }, - { - "epoch": 0.5767934244688939, - "grad_norm": 0.390625, - "learning_rate": 9.065995942524235e-05, - "loss": 0.8821, - "step": 20105 - }, - { - "epoch": 0.5769368697373517, - "grad_norm": 0.443359375, - "learning_rate": 9.061010628549522e-05, - "loss": 0.9746, - "step": 20110 - }, - { - "epoch": 0.5770803150058096, - "grad_norm": 0.369140625, - "learning_rate": 9.056025550010005e-05, - "loss": 0.9081, - "step": 20115 - }, - { - "epoch": 0.5772237602742674, - "grad_norm": 0.37109375, - "learning_rate": 9.051040708155606e-05, - "loss": 0.9151, - "step": 20120 - }, - { - "epoch": 0.5773672055427251, - "grad_norm": 0.361328125, - "learning_rate": 9.04605610423618e-05, - "loss": 0.9038, - "step": 20125 - }, - { - "epoch": 0.577510650811183, - "grad_norm": 0.482421875, - "learning_rate": 9.041071739501538e-05, - "loss": 1.0034, - "step": 20130 - }, - { - "epoch": 0.5776540960796408, - "grad_norm": 0.44921875, - "learning_rate": 9.036087615201419e-05, - "loss": 0.8725, - "step": 20135 - }, - { - "epoch": 0.5777975413480987, - "grad_norm": 0.35546875, - "learning_rate": 9.0311037325855e-05, - "loss": 0.9137, - "step": 20140 - }, - { - "epoch": 0.5779409866165565, - "grad_norm": 0.37890625, - "learning_rate": 9.026120092903408e-05, - "loss": 1.0661, - "step": 20145 - }, - { - "epoch": 0.5780844318850142, - "grad_norm": 0.390625, - "learning_rate": 9.021136697404706e-05, - "loss": 0.9337, - "step": 20150 - }, - { - "epoch": 0.5782278771534721, - "grad_norm": 0.359375, - "learning_rate": 9.01615354733889e-05, - "loss": 0.9329, - "step": 20155 - }, - { - "epoch": 0.5783713224219299, - "grad_norm": 0.390625, - "learning_rate": 9.011170643955394e-05, - "loss": 0.8987, - "step": 20160 - }, - { - "epoch": 0.5785147676903878, - "grad_norm": 0.390625, - "learning_rate": 9.006187988503602e-05, - "loss": 0.9454, - "step": 20165 - }, - { - "epoch": 0.5786582129588456, - "grad_norm": 0.35546875, - "learning_rate": 9.001205582232825e-05, - "loss": 0.8987, - "step": 20170 - }, - { - "epoch": 0.5788016582273033, - "grad_norm": 0.41015625, - "learning_rate": 8.996223426392314e-05, - "loss": 0.9914, - "step": 20175 - }, - { - "epoch": 0.5789451034957612, - "grad_norm": 0.373046875, - "learning_rate": 8.991241522231252e-05, - "loss": 0.9097, - "step": 20180 - }, - { - "epoch": 0.579088548764219, - "grad_norm": 0.3984375, - "learning_rate": 8.986259870998773e-05, - "loss": 1.1085, - "step": 20185 - }, - { - "epoch": 0.5792319940326768, - "grad_norm": 0.373046875, - "learning_rate": 8.981278473943936e-05, - "loss": 0.8491, - "step": 20190 - }, - { - "epoch": 0.5793754393011347, - "grad_norm": 0.4140625, - "learning_rate": 8.976297332315737e-05, - "loss": 0.9657, - "step": 20195 - }, - { - "epoch": 0.5795188845695924, - "grad_norm": 0.396484375, - "learning_rate": 8.971316447363115e-05, - "loss": 0.8815, - "step": 20200 - }, - { - "epoch": 0.5796623298380503, - "grad_norm": 0.365234375, - "learning_rate": 8.966335820334937e-05, - "loss": 0.8608, - "step": 20205 - }, - { - "epoch": 0.5798057751065081, - "grad_norm": 0.392578125, - "learning_rate": 8.96135545248001e-05, - "loss": 0.9763, - "step": 20210 - }, - { - "epoch": 0.5799492203749659, - "grad_norm": 0.369140625, - "learning_rate": 8.95637534504707e-05, - "loss": 0.9571, - "step": 20215 - }, - { - "epoch": 0.5800926656434238, - "grad_norm": 0.380859375, - "learning_rate": 8.951395499284797e-05, - "loss": 1.0099, - "step": 20220 - }, - { - "epoch": 0.5802361109118815, - "grad_norm": 0.423828125, - "learning_rate": 8.946415916441797e-05, - "loss": 0.983, - "step": 20225 - }, - { - "epoch": 0.5803795561803394, - "grad_norm": 0.419921875, - "learning_rate": 8.941436597766616e-05, - "loss": 1.0002, - "step": 20230 - }, - { - "epoch": 0.5805230014487972, - "grad_norm": 0.365234375, - "learning_rate": 8.936457544507728e-05, - "loss": 0.9588, - "step": 20235 - }, - { - "epoch": 0.580666446717255, - "grad_norm": 0.390625, - "learning_rate": 8.93147875791355e-05, - "loss": 0.886, - "step": 20240 - }, - { - "epoch": 0.5808098919857129, - "grad_norm": 0.3671875, - "learning_rate": 8.926500239232419e-05, - "loss": 0.9844, - "step": 20245 - }, - { - "epoch": 0.5809533372541706, - "grad_norm": 0.337890625, - "learning_rate": 8.921521989712611e-05, - "loss": 0.9606, - "step": 20250 - }, - { - "epoch": 0.5810967825226285, - "grad_norm": 0.3984375, - "learning_rate": 8.916544010602342e-05, - "loss": 0.9763, - "step": 20255 - }, - { - "epoch": 0.5812402277910863, - "grad_norm": 0.38671875, - "learning_rate": 8.91156630314975e-05, - "loss": 0.9237, - "step": 20260 - }, - { - "epoch": 0.5813836730595441, - "grad_norm": 0.404296875, - "learning_rate": 8.906588868602909e-05, - "loss": 1.0265, - "step": 20265 - }, - { - "epoch": 0.581527118328002, - "grad_norm": 0.41015625, - "learning_rate": 8.901611708209818e-05, - "loss": 0.8989, - "step": 20270 - }, - { - "epoch": 0.5816705635964597, - "grad_norm": 0.392578125, - "learning_rate": 8.896634823218423e-05, - "loss": 0.9824, - "step": 20275 - }, - { - "epoch": 0.5818140088649176, - "grad_norm": 0.375, - "learning_rate": 8.891658214876585e-05, - "loss": 0.916, - "step": 20280 - }, - { - "epoch": 0.5819574541333754, - "grad_norm": 0.4140625, - "learning_rate": 8.886681884432099e-05, - "loss": 0.9987, - "step": 20285 - }, - { - "epoch": 0.5821008994018332, - "grad_norm": 0.349609375, - "learning_rate": 8.8817058331327e-05, - "loss": 0.8898, - "step": 20290 - }, - { - "epoch": 0.5822443446702911, - "grad_norm": 0.35546875, - "learning_rate": 8.876730062226044e-05, - "loss": 0.8926, - "step": 20295 - }, - { - "epoch": 0.5823877899387488, - "grad_norm": 0.44921875, - "learning_rate": 8.871754572959716e-05, - "loss": 0.9742, - "step": 20300 - }, - { - "epoch": 0.5825312352072067, - "grad_norm": 0.3828125, - "learning_rate": 8.866779366581233e-05, - "loss": 0.965, - "step": 20305 - }, - { - "epoch": 0.5826746804756645, - "grad_norm": 0.375, - "learning_rate": 8.861804444338045e-05, - "loss": 0.9647, - "step": 20310 - }, - { - "epoch": 0.5828181257441223, - "grad_norm": 0.390625, - "learning_rate": 8.856829807477523e-05, - "loss": 0.9123, - "step": 20315 - }, - { - "epoch": 0.5829615710125802, - "grad_norm": 0.3671875, - "learning_rate": 8.851855457246972e-05, - "loss": 0.9343, - "step": 20320 - }, - { - "epoch": 0.583105016281038, - "grad_norm": 0.384765625, - "learning_rate": 8.846881394893624e-05, - "loss": 0.9427, - "step": 20325 - }, - { - "epoch": 0.5832484615494958, - "grad_norm": 0.40234375, - "learning_rate": 8.841907621664638e-05, - "loss": 0.934, - "step": 20330 - }, - { - "epoch": 0.5833919068179536, - "grad_norm": 0.365234375, - "learning_rate": 8.8369341388071e-05, - "loss": 0.9492, - "step": 20335 - }, - { - "epoch": 0.5835353520864114, - "grad_norm": 0.384765625, - "learning_rate": 8.83196094756802e-05, - "loss": 0.7545, - "step": 20340 - }, - { - "epoch": 0.5836787973548693, - "grad_norm": 0.37890625, - "learning_rate": 8.826988049194351e-05, - "loss": 0.9836, - "step": 20345 - }, - { - "epoch": 0.583822242623327, - "grad_norm": 0.421875, - "learning_rate": 8.82201544493295e-05, - "loss": 0.961, - "step": 20350 - }, - { - "epoch": 0.5839656878917849, - "grad_norm": 0.357421875, - "learning_rate": 8.81704313603061e-05, - "loss": 0.9589, - "step": 20355 - }, - { - "epoch": 0.5841091331602427, - "grad_norm": 0.3671875, - "learning_rate": 8.812071123734058e-05, - "loss": 0.8919, - "step": 20360 - }, - { - "epoch": 0.5842525784287005, - "grad_norm": 0.375, - "learning_rate": 8.807099409289936e-05, - "loss": 0.9117, - "step": 20365 - }, - { - "epoch": 0.5843960236971584, - "grad_norm": 0.396484375, - "learning_rate": 8.802127993944814e-05, - "loss": 0.9267, - "step": 20370 - }, - { - "epoch": 0.5845394689656161, - "grad_norm": 0.375, - "learning_rate": 8.797156878945183e-05, - "loss": 1.0388, - "step": 20375 - }, - { - "epoch": 0.584682914234074, - "grad_norm": 0.357421875, - "learning_rate": 8.792186065537473e-05, - "loss": 0.9243, - "step": 20380 - }, - { - "epoch": 0.5848263595025318, - "grad_norm": 0.400390625, - "learning_rate": 8.787215554968023e-05, - "loss": 0.98, - "step": 20385 - }, - { - "epoch": 0.5849698047709896, - "grad_norm": 0.400390625, - "learning_rate": 8.782245348483104e-05, - "loss": 1.0201, - "step": 20390 - }, - { - "epoch": 0.5851132500394475, - "grad_norm": 0.416015625, - "learning_rate": 8.777275447328905e-05, - "loss": 1.0021, - "step": 20395 - }, - { - "epoch": 0.5852566953079053, - "grad_norm": 0.36328125, - "learning_rate": 8.772305852751542e-05, - "loss": 0.9022, - "step": 20400 - }, - { - "epoch": 0.5854001405763631, - "grad_norm": 0.396484375, - "learning_rate": 8.767336565997059e-05, - "loss": 0.9097, - "step": 20405 - }, - { - "epoch": 0.5855435858448209, - "grad_norm": 0.373046875, - "learning_rate": 8.762367588311414e-05, - "loss": 0.8685, - "step": 20410 - }, - { - "epoch": 0.5856870311132787, - "grad_norm": 0.392578125, - "learning_rate": 8.757398920940492e-05, - "loss": 0.908, - "step": 20415 - }, - { - "epoch": 0.5858304763817366, - "grad_norm": 0.390625, - "learning_rate": 8.752430565130103e-05, - "loss": 0.9556, - "step": 20420 - }, - { - "epoch": 0.5859739216501944, - "grad_norm": 0.423828125, - "learning_rate": 8.74746252212597e-05, - "loss": 0.9081, - "step": 20425 - }, - { - "epoch": 0.5861173669186522, - "grad_norm": 0.390625, - "learning_rate": 8.742494793173743e-05, - "loss": 1.0093, - "step": 20430 - }, - { - "epoch": 0.58626081218711, - "grad_norm": 0.46875, - "learning_rate": 8.737527379519001e-05, - "loss": 0.9916, - "step": 20435 - }, - { - "epoch": 0.5864042574555678, - "grad_norm": 0.375, - "learning_rate": 8.73256028240723e-05, - "loss": 1.055, - "step": 20440 - }, - { - "epoch": 0.5865477027240257, - "grad_norm": 0.40234375, - "learning_rate": 8.72759350308384e-05, - "loss": 0.9123, - "step": 20445 - }, - { - "epoch": 0.5866911479924835, - "grad_norm": 0.4296875, - "learning_rate": 8.722627042794171e-05, - "loss": 1.0663, - "step": 20450 - }, - { - "epoch": 0.5868345932609413, - "grad_norm": 0.341796875, - "learning_rate": 8.717660902783475e-05, - "loss": 0.8121, - "step": 20455 - }, - { - "epoch": 0.5869780385293991, - "grad_norm": 0.3828125, - "learning_rate": 8.712695084296924e-05, - "loss": 0.8745, - "step": 20460 - }, - { - "epoch": 0.5871214837978569, - "grad_norm": 0.4296875, - "learning_rate": 8.707729588579604e-05, - "loss": 1.0558, - "step": 20465 - }, - { - "epoch": 0.5872649290663148, - "grad_norm": 0.3671875, - "learning_rate": 8.702764416876537e-05, - "loss": 0.951, - "step": 20470 - }, - { - "epoch": 0.5874083743347726, - "grad_norm": 0.37890625, - "learning_rate": 8.697799570432649e-05, - "loss": 0.9715, - "step": 20475 - }, - { - "epoch": 0.5875518196032304, - "grad_norm": 0.40625, - "learning_rate": 8.692835050492785e-05, - "loss": 0.9016, - "step": 20480 - }, - { - "epoch": 0.5876952648716882, - "grad_norm": 0.375, - "learning_rate": 8.687870858301715e-05, - "loss": 0.8613, - "step": 20485 - }, - { - "epoch": 0.587838710140146, - "grad_norm": 0.390625, - "learning_rate": 8.682906995104125e-05, - "loss": 0.9182, - "step": 20490 - }, - { - "epoch": 0.5879821554086039, - "grad_norm": 0.384765625, - "learning_rate": 8.677943462144614e-05, - "loss": 0.916, - "step": 20495 - }, - { - "epoch": 0.5881256006770617, - "grad_norm": 0.421875, - "learning_rate": 8.672980260667702e-05, - "loss": 0.9771, - "step": 20500 - }, - { - "epoch": 0.5882690459455195, - "grad_norm": 0.412109375, - "learning_rate": 8.668017391917827e-05, - "loss": 0.996, - "step": 20505 - }, - { - "epoch": 0.5884124912139773, - "grad_norm": 0.37890625, - "learning_rate": 8.663054857139339e-05, - "loss": 0.973, - "step": 20510 - }, - { - "epoch": 0.5885559364824351, - "grad_norm": 0.376953125, - "learning_rate": 8.658092657576509e-05, - "loss": 0.9201, - "step": 20515 - }, - { - "epoch": 0.588699381750893, - "grad_norm": 0.38671875, - "learning_rate": 8.653130794473517e-05, - "loss": 0.9155, - "step": 20520 - }, - { - "epoch": 0.5888428270193508, - "grad_norm": 0.353515625, - "learning_rate": 8.648169269074472e-05, - "loss": 0.9135, - "step": 20525 - }, - { - "epoch": 0.5889862722878085, - "grad_norm": 0.396484375, - "learning_rate": 8.643208082623386e-05, - "loss": 0.8986, - "step": 20530 - }, - { - "epoch": 0.5891297175562664, - "grad_norm": 0.392578125, - "learning_rate": 8.638247236364184e-05, - "loss": 0.9519, - "step": 20535 - }, - { - "epoch": 0.5892731628247242, - "grad_norm": 0.419921875, - "learning_rate": 8.633286731540722e-05, - "loss": 0.9475, - "step": 20540 - }, - { - "epoch": 0.5894166080931821, - "grad_norm": 0.384765625, - "learning_rate": 8.628326569396756e-05, - "loss": 0.978, - "step": 20545 - }, - { - "epoch": 0.5895600533616399, - "grad_norm": 0.427734375, - "learning_rate": 8.623366751175958e-05, - "loss": 0.9581, - "step": 20550 - }, - { - "epoch": 0.5897034986300976, - "grad_norm": 0.3671875, - "learning_rate": 8.618407278121912e-05, - "loss": 1.0095, - "step": 20555 - }, - { - "epoch": 0.5898469438985555, - "grad_norm": 0.35546875, - "learning_rate": 8.613448151478131e-05, - "loss": 0.9689, - "step": 20560 - }, - { - "epoch": 0.5899903891670133, - "grad_norm": 0.3984375, - "learning_rate": 8.60848937248802e-05, - "loss": 0.9981, - "step": 20565 - }, - { - "epoch": 0.5901338344354712, - "grad_norm": 0.369140625, - "learning_rate": 8.603530942394908e-05, - "loss": 0.947, - "step": 20570 - }, - { - "epoch": 0.590277279703929, - "grad_norm": 0.40625, - "learning_rate": 8.598572862442036e-05, - "loss": 0.8719, - "step": 20575 - }, - { - "epoch": 0.5904207249723867, - "grad_norm": 0.390625, - "learning_rate": 8.593615133872558e-05, - "loss": 1.0113, - "step": 20580 - }, - { - "epoch": 0.5905641702408446, - "grad_norm": 0.369140625, - "learning_rate": 8.588657757929536e-05, - "loss": 0.9412, - "step": 20585 - }, - { - "epoch": 0.5907076155093024, - "grad_norm": 0.412109375, - "learning_rate": 8.583700735855941e-05, - "loss": 0.9354, - "step": 20590 - }, - { - "epoch": 0.5908510607777603, - "grad_norm": 0.390625, - "learning_rate": 8.578744068894669e-05, - "loss": 1.0302, - "step": 20595 - }, - { - "epoch": 0.5909945060462181, - "grad_norm": 0.419921875, - "learning_rate": 8.57378775828851e-05, - "loss": 0.981, - "step": 20600 - }, - { - "epoch": 0.5911379513146758, - "grad_norm": 0.3828125, - "learning_rate": 8.568831805280173e-05, - "loss": 1.0294, - "step": 20605 - }, - { - "epoch": 0.5912813965831337, - "grad_norm": 0.369140625, - "learning_rate": 8.563876211112282e-05, - "loss": 0.9273, - "step": 20610 - }, - { - "epoch": 0.5914248418515915, - "grad_norm": 0.349609375, - "learning_rate": 8.558920977027362e-05, - "loss": 0.9765, - "step": 20615 - }, - { - "epoch": 0.5915682871200494, - "grad_norm": 0.369140625, - "learning_rate": 8.553966104267852e-05, - "loss": 0.8891, - "step": 20620 - }, - { - "epoch": 0.5917117323885072, - "grad_norm": 0.365234375, - "learning_rate": 8.549011594076095e-05, - "loss": 0.9554, - "step": 20625 - }, - { - "epoch": 0.5918551776569649, - "grad_norm": 0.40234375, - "learning_rate": 8.544057447694358e-05, - "loss": 0.9242, - "step": 20630 - }, - { - "epoch": 0.5919986229254228, - "grad_norm": 0.396484375, - "learning_rate": 8.539103666364801e-05, - "loss": 0.9112, - "step": 20635 - }, - { - "epoch": 0.5921420681938806, - "grad_norm": 0.421875, - "learning_rate": 8.534150251329494e-05, - "loss": 1.0206, - "step": 20640 - }, - { - "epoch": 0.5922855134623385, - "grad_norm": 0.38671875, - "learning_rate": 8.529197203830429e-05, - "loss": 0.9632, - "step": 20645 - }, - { - "epoch": 0.5924289587307963, - "grad_norm": 0.388671875, - "learning_rate": 8.52424452510949e-05, - "loss": 0.9033, - "step": 20650 - }, - { - "epoch": 0.592572403999254, - "grad_norm": 0.41015625, - "learning_rate": 8.519292216408478e-05, - "loss": 0.891, - "step": 20655 - }, - { - "epoch": 0.5927158492677119, - "grad_norm": 0.349609375, - "learning_rate": 8.514340278969089e-05, - "loss": 0.8633, - "step": 20660 - }, - { - "epoch": 0.5928592945361697, - "grad_norm": 0.3828125, - "learning_rate": 8.509388714032948e-05, - "loss": 0.9658, - "step": 20665 - }, - { - "epoch": 0.5930027398046276, - "grad_norm": 0.390625, - "learning_rate": 8.504437522841566e-05, - "loss": 0.984, - "step": 20670 - }, - { - "epoch": 0.5931461850730854, - "grad_norm": 0.3984375, - "learning_rate": 8.49948670663637e-05, - "loss": 0.9655, - "step": 20675 - }, - { - "epoch": 0.5932896303415431, - "grad_norm": 0.41015625, - "learning_rate": 8.494536266658687e-05, - "loss": 0.9919, - "step": 20680 - }, - { - "epoch": 0.593433075610001, - "grad_norm": 0.408203125, - "learning_rate": 8.489586204149757e-05, - "loss": 1.0782, - "step": 20685 - }, - { - "epoch": 0.5935765208784588, - "grad_norm": 0.396484375, - "learning_rate": 8.484636520350724e-05, - "loss": 1.1129, - "step": 20690 - }, - { - "epoch": 0.5937199661469167, - "grad_norm": 0.419921875, - "learning_rate": 8.479687216502625e-05, - "loss": 0.9289, - "step": 20695 - }, - { - "epoch": 0.5938634114153745, - "grad_norm": 0.40234375, - "learning_rate": 8.474738293846424e-05, - "loss": 1.0362, - "step": 20700 - }, - { - "epoch": 0.5940068566838322, - "grad_norm": 0.423828125, - "learning_rate": 8.46978975362297e-05, - "loss": 1.018, - "step": 20705 - }, - { - "epoch": 0.5941503019522901, - "grad_norm": 0.376953125, - "learning_rate": 8.464841597073024e-05, - "loss": 0.9211, - "step": 20710 - }, - { - "epoch": 0.5942937472207479, - "grad_norm": 0.427734375, - "learning_rate": 8.459893825437248e-05, - "loss": 0.9653, - "step": 20715 - }, - { - "epoch": 0.5944371924892058, - "grad_norm": 0.392578125, - "learning_rate": 8.454946439956213e-05, - "loss": 0.9574, - "step": 20720 - }, - { - "epoch": 0.5945806377576636, - "grad_norm": 0.357421875, - "learning_rate": 8.44999944187039e-05, - "loss": 1.008, - "step": 20725 - }, - { - "epoch": 0.5947240830261213, - "grad_norm": 0.369140625, - "learning_rate": 8.445052832420146e-05, - "loss": 0.9472, - "step": 20730 - }, - { - "epoch": 0.5948675282945792, - "grad_norm": 0.369140625, - "learning_rate": 8.440106612845766e-05, - "loss": 0.9931, - "step": 20735 - }, - { - "epoch": 0.595010973563037, - "grad_norm": 0.404296875, - "learning_rate": 8.435160784387423e-05, - "loss": 0.9755, - "step": 20740 - }, - { - "epoch": 0.5951544188314949, - "grad_norm": 0.421875, - "learning_rate": 8.430215348285198e-05, - "loss": 0.9119, - "step": 20745 - }, - { - "epoch": 0.5952978640999527, - "grad_norm": 0.431640625, - "learning_rate": 8.425270305779069e-05, - "loss": 1.01, - "step": 20750 - }, - { - "epoch": 0.5954413093684104, - "grad_norm": 0.3359375, - "learning_rate": 8.420325658108927e-05, - "loss": 0.8292, - "step": 20755 - }, - { - "epoch": 0.5955847546368683, - "grad_norm": 0.400390625, - "learning_rate": 8.415381406514551e-05, - "loss": 0.9916, - "step": 20760 - }, - { - "epoch": 0.5957281999053261, - "grad_norm": 0.357421875, - "learning_rate": 8.410437552235623e-05, - "loss": 0.8649, - "step": 20765 - }, - { - "epoch": 0.595871645173784, - "grad_norm": 0.359375, - "learning_rate": 8.405494096511737e-05, - "loss": 1.0581, - "step": 20770 - }, - { - "epoch": 0.5960150904422418, - "grad_norm": 0.384765625, - "learning_rate": 8.400551040582372e-05, - "loss": 0.8685, - "step": 20775 - }, - { - "epoch": 0.5961585357106995, - "grad_norm": 0.4453125, - "learning_rate": 8.395608385686911e-05, - "loss": 0.9053, - "step": 20780 - }, - { - "epoch": 0.5963019809791574, - "grad_norm": 0.376953125, - "learning_rate": 8.390666133064642e-05, - "loss": 0.9704, - "step": 20785 - }, - { - "epoch": 0.5964454262476152, - "grad_norm": 0.359375, - "learning_rate": 8.38572428395475e-05, - "loss": 0.889, - "step": 20790 - }, - { - "epoch": 0.5965888715160731, - "grad_norm": 0.359375, - "learning_rate": 8.380782839596316e-05, - "loss": 0.9637, - "step": 20795 - }, - { - "epoch": 0.5967323167845309, - "grad_norm": 0.376953125, - "learning_rate": 8.37584180122832e-05, - "loss": 0.9877, - "step": 20800 - }, - { - "epoch": 0.5968757620529886, - "grad_norm": 0.44921875, - "learning_rate": 8.370901170089638e-05, - "loss": 0.9718, - "step": 20805 - }, - { - "epoch": 0.5970192073214465, - "grad_norm": 0.44140625, - "learning_rate": 8.365960947419054e-05, - "loss": 0.977, - "step": 20810 - }, - { - "epoch": 0.5971626525899043, - "grad_norm": 0.37109375, - "learning_rate": 8.361021134455238e-05, - "loss": 1.0488, - "step": 20815 - }, - { - "epoch": 0.5973060978583622, - "grad_norm": 0.3984375, - "learning_rate": 8.356081732436759e-05, - "loss": 0.8752, - "step": 20820 - }, - { - "epoch": 0.59744954312682, - "grad_norm": 0.439453125, - "learning_rate": 8.351142742602095e-05, - "loss": 0.9027, - "step": 20825 - }, - { - "epoch": 0.5975929883952777, - "grad_norm": 0.37890625, - "learning_rate": 8.346204166189607e-05, - "loss": 0.8845, - "step": 20830 - }, - { - "epoch": 0.5977364336637356, - "grad_norm": 0.40625, - "learning_rate": 8.341266004437554e-05, - "loss": 0.9869, - "step": 20835 - }, - { - "epoch": 0.5978798789321934, - "grad_norm": 0.37109375, - "learning_rate": 8.336328258584093e-05, - "loss": 0.9732, - "step": 20840 - }, - { - "epoch": 0.5980233242006513, - "grad_norm": 0.37890625, - "learning_rate": 8.331390929867284e-05, - "loss": 0.8894, - "step": 20845 - }, - { - "epoch": 0.5981667694691091, - "grad_norm": 0.412109375, - "learning_rate": 8.326454019525072e-05, - "loss": 0.9815, - "step": 20850 - }, - { - "epoch": 0.5983102147375668, - "grad_norm": 0.373046875, - "learning_rate": 8.321517528795301e-05, - "loss": 0.9106, - "step": 20855 - }, - { - "epoch": 0.5984536600060247, - "grad_norm": 0.423828125, - "learning_rate": 8.316581458915711e-05, - "loss": 0.9359, - "step": 20860 - }, - { - "epoch": 0.5985971052744825, - "grad_norm": 0.388671875, - "learning_rate": 8.311645811123935e-05, - "loss": 0.8818, - "step": 20865 - }, - { - "epoch": 0.5987405505429403, - "grad_norm": 0.41015625, - "learning_rate": 8.3067105866575e-05, - "loss": 0.9823, - "step": 20870 - }, - { - "epoch": 0.5988839958113982, - "grad_norm": 0.37109375, - "learning_rate": 8.301775786753827e-05, - "loss": 1.0018, - "step": 20875 - }, - { - "epoch": 0.599027441079856, - "grad_norm": 0.431640625, - "learning_rate": 8.296841412650233e-05, - "loss": 1.0327, - "step": 20880 - }, - { - "epoch": 0.5991708863483138, - "grad_norm": 0.46875, - "learning_rate": 8.291907465583926e-05, - "loss": 0.9916, - "step": 20885 - }, - { - "epoch": 0.5993143316167716, - "grad_norm": 0.4140625, - "learning_rate": 8.286973946792e-05, - "loss": 0.9273, - "step": 20890 - }, - { - "epoch": 0.5994577768852294, - "grad_norm": 0.341796875, - "learning_rate": 8.28204085751146e-05, - "loss": 0.9621, - "step": 20895 - }, - { - "epoch": 0.5996012221536873, - "grad_norm": 0.400390625, - "learning_rate": 8.277108198979188e-05, - "loss": 0.9732, - "step": 20900 - }, - { - "epoch": 0.599744667422145, - "grad_norm": 0.384765625, - "learning_rate": 8.27217597243196e-05, - "loss": 0.9736, - "step": 20905 - }, - { - "epoch": 0.5998881126906029, - "grad_norm": 0.37890625, - "learning_rate": 8.267244179106441e-05, - "loss": 0.9205, - "step": 20910 - }, - { - "epoch": 0.6000315579590607, - "grad_norm": 0.365234375, - "learning_rate": 8.262312820239202e-05, - "loss": 0.9112, - "step": 20915 - }, - { - "epoch": 0.6001750032275185, - "grad_norm": 0.37109375, - "learning_rate": 8.257381897066691e-05, - "loss": 0.989, - "step": 20920 - }, - { - "epoch": 0.6003184484959764, - "grad_norm": 0.400390625, - "learning_rate": 8.252451410825245e-05, - "loss": 0.9063, - "step": 20925 - }, - { - "epoch": 0.6004618937644342, - "grad_norm": 0.427734375, - "learning_rate": 8.24752136275111e-05, - "loss": 0.9374, - "step": 20930 - }, - { - "epoch": 0.600605339032892, - "grad_norm": 0.37890625, - "learning_rate": 8.242591754080401e-05, - "loss": 0.9901, - "step": 20935 - }, - { - "epoch": 0.6007487843013498, - "grad_norm": 0.451171875, - "learning_rate": 8.237662586049133e-05, - "loss": 1.0356, - "step": 20940 - }, - { - "epoch": 0.6008922295698076, - "grad_norm": 0.388671875, - "learning_rate": 8.232733859893207e-05, - "loss": 0.9206, - "step": 20945 - }, - { - "epoch": 0.6010356748382655, - "grad_norm": 0.40625, - "learning_rate": 8.227805576848418e-05, - "loss": 0.9816, - "step": 20950 - }, - { - "epoch": 0.6011791201067233, - "grad_norm": 0.373046875, - "learning_rate": 8.222877738150442e-05, - "loss": 0.9277, - "step": 20955 - }, - { - "epoch": 0.6013225653751811, - "grad_norm": 0.38671875, - "learning_rate": 8.217950345034858e-05, - "loss": 0.9602, - "step": 20960 - }, - { - "epoch": 0.6014660106436389, - "grad_norm": 0.375, - "learning_rate": 8.213023398737113e-05, - "loss": 0.9853, - "step": 20965 - }, - { - "epoch": 0.6016094559120967, - "grad_norm": 0.40625, - "learning_rate": 8.208096900492562e-05, - "loss": 0.8776, - "step": 20970 - }, - { - "epoch": 0.6017529011805546, - "grad_norm": 0.390625, - "learning_rate": 8.203170851536433e-05, - "loss": 1.0153, - "step": 20975 - }, - { - "epoch": 0.6018963464490124, - "grad_norm": 0.38671875, - "learning_rate": 8.198245253103843e-05, - "loss": 0.9892, - "step": 20980 - }, - { - "epoch": 0.6020397917174702, - "grad_norm": 0.4296875, - "learning_rate": 8.19332010642981e-05, - "loss": 1.0264, - "step": 20985 - }, - { - "epoch": 0.602183236985928, - "grad_norm": 0.43359375, - "learning_rate": 8.188395412749223e-05, - "loss": 1.0492, - "step": 20990 - }, - { - "epoch": 0.6023266822543858, - "grad_norm": 0.3828125, - "learning_rate": 8.183471173296863e-05, - "loss": 0.9213, - "step": 20995 - }, - { - "epoch": 0.6024701275228437, - "grad_norm": 0.357421875, - "learning_rate": 8.178547389307393e-05, - "loss": 0.9503, - "step": 21000 - }, - { - "epoch": 0.6026135727913015, - "grad_norm": 0.396484375, - "learning_rate": 8.173624062015374e-05, - "loss": 1.0237, - "step": 21005 - }, - { - "epoch": 0.6027570180597593, - "grad_norm": 0.3671875, - "learning_rate": 8.168701192655243e-05, - "loss": 1.0341, - "step": 21010 - }, - { - "epoch": 0.6029004633282171, - "grad_norm": 0.369140625, - "learning_rate": 8.163778782461315e-05, - "loss": 0.9217, - "step": 21015 - }, - { - "epoch": 0.6030439085966749, - "grad_norm": 0.439453125, - "learning_rate": 8.158856832667811e-05, - "loss": 0.9171, - "step": 21020 - }, - { - "epoch": 0.6031873538651328, - "grad_norm": 0.37109375, - "learning_rate": 8.153935344508816e-05, - "loss": 0.9661, - "step": 21025 - }, - { - "epoch": 0.6033307991335906, - "grad_norm": 0.3984375, - "learning_rate": 8.14901431921831e-05, - "loss": 0.9047, - "step": 21030 - }, - { - "epoch": 0.6034742444020484, - "grad_norm": 0.384765625, - "learning_rate": 8.144093758030149e-05, - "loss": 0.9545, - "step": 21035 - }, - { - "epoch": 0.6036176896705062, - "grad_norm": 0.37109375, - "learning_rate": 8.139173662178086e-05, - "loss": 0.9762, - "step": 21040 - }, - { - "epoch": 0.603761134938964, - "grad_norm": 0.443359375, - "learning_rate": 8.134254032895742e-05, - "loss": 0.9207, - "step": 21045 - }, - { - "epoch": 0.6039045802074219, - "grad_norm": 0.423828125, - "learning_rate": 8.129334871416632e-05, - "loss": 0.9893, - "step": 21050 - }, - { - "epoch": 0.6040480254758797, - "grad_norm": 0.404296875, - "learning_rate": 8.12441617897415e-05, - "loss": 0.9134, - "step": 21055 - }, - { - "epoch": 0.6041914707443375, - "grad_norm": 0.37890625, - "learning_rate": 8.119497956801571e-05, - "loss": 0.9769, - "step": 21060 - }, - { - "epoch": 0.6043349160127953, - "grad_norm": 0.421875, - "learning_rate": 8.114580206132057e-05, - "loss": 0.9832, - "step": 21065 - }, - { - "epoch": 0.6044783612812531, - "grad_norm": 0.388671875, - "learning_rate": 8.109662928198638e-05, - "loss": 1.0498, - "step": 21070 - }, - { - "epoch": 0.604621806549711, - "grad_norm": 0.361328125, - "learning_rate": 8.104746124234247e-05, - "loss": 0.9655, - "step": 21075 - }, - { - "epoch": 0.6047652518181688, - "grad_norm": 0.380859375, - "learning_rate": 8.099829795471683e-05, - "loss": 0.9886, - "step": 21080 - }, - { - "epoch": 0.6049086970866266, - "grad_norm": 0.3828125, - "learning_rate": 8.094913943143623e-05, - "loss": 0.9461, - "step": 21085 - }, - { - "epoch": 0.6050521423550844, - "grad_norm": 0.359375, - "learning_rate": 8.089998568482643e-05, - "loss": 1.0023, - "step": 21090 - }, - { - "epoch": 0.6051955876235422, - "grad_norm": 0.419921875, - "learning_rate": 8.085083672721181e-05, - "loss": 0.9109, - "step": 21095 - }, - { - "epoch": 0.6053390328920001, - "grad_norm": 0.37890625, - "learning_rate": 8.080169257091562e-05, - "loss": 0.9568, - "step": 21100 - }, - { - "epoch": 0.6054824781604579, - "grad_norm": 0.439453125, - "learning_rate": 8.075255322825984e-05, - "loss": 1.0153, - "step": 21105 - }, - { - "epoch": 0.6056259234289157, - "grad_norm": 0.416015625, - "learning_rate": 8.070341871156541e-05, - "loss": 1.0222, - "step": 21110 - }, - { - "epoch": 0.6057693686973735, - "grad_norm": 0.37890625, - "learning_rate": 8.065428903315191e-05, - "loss": 0.9239, - "step": 21115 - }, - { - "epoch": 0.6059128139658313, - "grad_norm": 0.359375, - "learning_rate": 8.060516420533774e-05, - "loss": 0.9675, - "step": 21120 - }, - { - "epoch": 0.6060562592342892, - "grad_norm": 0.345703125, - "learning_rate": 8.055604424044007e-05, - "loss": 0.8849, - "step": 21125 - }, - { - "epoch": 0.606199704502747, - "grad_norm": 0.37890625, - "learning_rate": 8.050692915077489e-05, - "loss": 0.8903, - "step": 21130 - }, - { - "epoch": 0.6063431497712048, - "grad_norm": 0.390625, - "learning_rate": 8.0457818948657e-05, - "loss": 0.9126, - "step": 21135 - }, - { - "epoch": 0.6064865950396626, - "grad_norm": 0.3828125, - "learning_rate": 8.040871364639983e-05, - "loss": 0.925, - "step": 21140 - }, - { - "epoch": 0.6066300403081204, - "grad_norm": 0.40625, - "learning_rate": 8.035961325631579e-05, - "loss": 0.9343, - "step": 21145 - }, - { - "epoch": 0.6067734855765783, - "grad_norm": 0.388671875, - "learning_rate": 8.031051779071587e-05, - "loss": 0.9819, - "step": 21150 - }, - { - "epoch": 0.6069169308450361, - "grad_norm": 0.37109375, - "learning_rate": 8.026142726190993e-05, - "loss": 1.0132, - "step": 21155 - }, - { - "epoch": 0.607060376113494, - "grad_norm": 0.384765625, - "learning_rate": 8.021234168220649e-05, - "loss": 0.8954, - "step": 21160 - }, - { - "epoch": 0.6072038213819517, - "grad_norm": 0.380859375, - "learning_rate": 8.016326106391303e-05, - "loss": 1.0066, - "step": 21165 - }, - { - "epoch": 0.6073472666504095, - "grad_norm": 0.412109375, - "learning_rate": 8.011418541933558e-05, - "loss": 1.0061, - "step": 21170 - }, - { - "epoch": 0.6074907119188674, - "grad_norm": 0.3984375, - "learning_rate": 8.006511476077896e-05, - "loss": 0.9082, - "step": 21175 - }, - { - "epoch": 0.6076341571873252, - "grad_norm": 0.3984375, - "learning_rate": 8.00160491005469e-05, - "loss": 0.9933, - "step": 21180 - }, - { - "epoch": 0.607777602455783, - "grad_norm": 0.392578125, - "learning_rate": 7.996698845094169e-05, - "loss": 0.9065, - "step": 21185 - }, - { - "epoch": 0.6079210477242408, - "grad_norm": 0.365234375, - "learning_rate": 7.991793282426442e-05, - "loss": 0.9306, - "step": 21190 - }, - { - "epoch": 0.6080644929926986, - "grad_norm": 0.400390625, - "learning_rate": 7.986888223281491e-05, - "loss": 0.9287, - "step": 21195 - }, - { - "epoch": 0.6082079382611565, - "grad_norm": 0.423828125, - "learning_rate": 7.981983668889182e-05, - "loss": 1.0532, - "step": 21200 - }, - { - "epoch": 0.6083513835296143, - "grad_norm": 0.375, - "learning_rate": 7.977079620479244e-05, - "loss": 0.9927, - "step": 21205 - }, - { - "epoch": 0.608494828798072, - "grad_norm": 0.4140625, - "learning_rate": 7.972176079281275e-05, - "loss": 0.9124, - "step": 21210 - }, - { - "epoch": 0.6086382740665299, - "grad_norm": 0.380859375, - "learning_rate": 7.967273046524757e-05, - "loss": 0.9294, - "step": 21215 - }, - { - "epoch": 0.6087817193349877, - "grad_norm": 0.369140625, - "learning_rate": 7.962370523439044e-05, - "loss": 0.9738, - "step": 21220 - }, - { - "epoch": 0.6089251646034456, - "grad_norm": 0.4140625, - "learning_rate": 7.957468511253353e-05, - "loss": 1.0362, - "step": 21225 - }, - { - "epoch": 0.6090686098719034, - "grad_norm": 0.37890625, - "learning_rate": 7.952567011196774e-05, - "loss": 0.9331, - "step": 21230 - }, - { - "epoch": 0.6092120551403611, - "grad_norm": 0.439453125, - "learning_rate": 7.947666024498283e-05, - "loss": 1.0015, - "step": 21235 - }, - { - "epoch": 0.609355500408819, - "grad_norm": 0.41796875, - "learning_rate": 7.942765552386709e-05, - "loss": 0.8899, - "step": 21240 - }, - { - "epoch": 0.6094989456772768, - "grad_norm": 0.408203125, - "learning_rate": 7.937865596090763e-05, - "loss": 1.0798, - "step": 21245 - }, - { - "epoch": 0.6096423909457347, - "grad_norm": 0.3515625, - "learning_rate": 7.932966156839018e-05, - "loss": 0.9559, - "step": 21250 - }, - { - "epoch": 0.6097858362141925, - "grad_norm": 0.341796875, - "learning_rate": 7.928067235859934e-05, - "loss": 0.8375, - "step": 21255 - }, - { - "epoch": 0.6099292814826502, - "grad_norm": 0.365234375, - "learning_rate": 7.923168834381822e-05, - "loss": 0.9229, - "step": 21260 - }, - { - "epoch": 0.6100727267511081, - "grad_norm": 0.37890625, - "learning_rate": 7.918270953632869e-05, - "loss": 0.9884, - "step": 21265 - }, - { - "epoch": 0.6102161720195659, - "grad_norm": 0.400390625, - "learning_rate": 7.913373594841139e-05, - "loss": 0.8836, - "step": 21270 - }, - { - "epoch": 0.6103596172880238, - "grad_norm": 0.3671875, - "learning_rate": 7.908476759234555e-05, - "loss": 0.8951, - "step": 21275 - }, - { - "epoch": 0.6105030625564816, - "grad_norm": 0.361328125, - "learning_rate": 7.903580448040917e-05, - "loss": 0.9194, - "step": 21280 - }, - { - "epoch": 0.6106465078249393, - "grad_norm": 0.396484375, - "learning_rate": 7.898684662487882e-05, - "loss": 0.9885, - "step": 21285 - }, - { - "epoch": 0.6107899530933972, - "grad_norm": 0.37890625, - "learning_rate": 7.893789403802992e-05, - "loss": 0.9431, - "step": 21290 - }, - { - "epoch": 0.610933398361855, - "grad_norm": 0.392578125, - "learning_rate": 7.888894673213643e-05, - "loss": 1.0074, - "step": 21295 - }, - { - "epoch": 0.6110768436303129, - "grad_norm": 0.408203125, - "learning_rate": 7.884000471947104e-05, - "loss": 0.9255, - "step": 21300 - }, - { - "epoch": 0.6112202888987707, - "grad_norm": 0.37109375, - "learning_rate": 7.879106801230508e-05, - "loss": 0.892, - "step": 21305 - }, - { - "epoch": 0.6113637341672284, - "grad_norm": 0.455078125, - "learning_rate": 7.874213662290862e-05, - "loss": 0.9305, - "step": 21310 - }, - { - "epoch": 0.6115071794356863, - "grad_norm": 0.359375, - "learning_rate": 7.869321056355035e-05, - "loss": 0.9235, - "step": 21315 - }, - { - "epoch": 0.6116506247041441, - "grad_norm": 0.408203125, - "learning_rate": 7.864428984649757e-05, - "loss": 0.9152, - "step": 21320 - }, - { - "epoch": 0.611794069972602, - "grad_norm": 0.357421875, - "learning_rate": 7.859537448401634e-05, - "loss": 0.895, - "step": 21325 - }, - { - "epoch": 0.6119375152410598, - "grad_norm": 0.380859375, - "learning_rate": 7.854646448837134e-05, - "loss": 0.9504, - "step": 21330 - }, - { - "epoch": 0.6120809605095175, - "grad_norm": 0.376953125, - "learning_rate": 7.849755987182584e-05, - "loss": 0.9348, - "step": 21335 - }, - { - "epoch": 0.6122244057779754, - "grad_norm": 0.369140625, - "learning_rate": 7.844866064664189e-05, - "loss": 0.9448, - "step": 21340 - }, - { - "epoch": 0.6123678510464332, - "grad_norm": 0.390625, - "learning_rate": 7.83997668250801e-05, - "loss": 0.9875, - "step": 21345 - }, - { - "epoch": 0.6125112963148911, - "grad_norm": 0.3515625, - "learning_rate": 7.835087841939973e-05, - "loss": 0.9545, - "step": 21350 - }, - { - "epoch": 0.6126547415833489, - "grad_norm": 0.37890625, - "learning_rate": 7.830199544185864e-05, - "loss": 0.9234, - "step": 21355 - }, - { - "epoch": 0.6127981868518066, - "grad_norm": 0.41796875, - "learning_rate": 7.82531179047135e-05, - "loss": 1.0109, - "step": 21360 - }, - { - "epoch": 0.6129416321202645, - "grad_norm": 0.3828125, - "learning_rate": 7.820424582021942e-05, - "loss": 1.0446, - "step": 21365 - }, - { - "epoch": 0.6130850773887223, - "grad_norm": 0.380859375, - "learning_rate": 7.815537920063019e-05, - "loss": 0.9928, - "step": 21370 - }, - { - "epoch": 0.6132285226571802, - "grad_norm": 0.38671875, - "learning_rate": 7.810651805819838e-05, - "loss": 0.972, - "step": 21375 - }, - { - "epoch": 0.613371967925638, - "grad_norm": 0.3671875, - "learning_rate": 7.805766240517498e-05, - "loss": 0.91, - "step": 21380 - }, - { - "epoch": 0.6135154131940957, - "grad_norm": 0.3671875, - "learning_rate": 7.800881225380972e-05, - "loss": 0.9192, - "step": 21385 - }, - { - "epoch": 0.6136588584625536, - "grad_norm": 0.37890625, - "learning_rate": 7.795996761635087e-05, - "loss": 0.9674, - "step": 21390 - }, - { - "epoch": 0.6138023037310114, - "grad_norm": 0.376953125, - "learning_rate": 7.791112850504547e-05, - "loss": 1.0002, - "step": 21395 - }, - { - "epoch": 0.6139457489994693, - "grad_norm": 0.353515625, - "learning_rate": 7.786229493213901e-05, - "loss": 0.9335, - "step": 21400 - }, - { - "epoch": 0.6140891942679271, - "grad_norm": 0.3515625, - "learning_rate": 7.781346690987565e-05, - "loss": 0.8654, - "step": 21405 - }, - { - "epoch": 0.6142326395363849, - "grad_norm": 0.373046875, - "learning_rate": 7.776464445049817e-05, - "loss": 0.9485, - "step": 21410 - }, - { - "epoch": 0.6143760848048427, - "grad_norm": 0.470703125, - "learning_rate": 7.771582756624798e-05, - "loss": 0.9434, - "step": 21415 - }, - { - "epoch": 0.6145195300733005, - "grad_norm": 0.41015625, - "learning_rate": 7.766701626936505e-05, - "loss": 1.0581, - "step": 21420 - }, - { - "epoch": 0.6146629753417584, - "grad_norm": 0.3984375, - "learning_rate": 7.76182105720879e-05, - "loss": 0.9295, - "step": 21425 - }, - { - "epoch": 0.6148064206102162, - "grad_norm": 0.396484375, - "learning_rate": 7.75694104866538e-05, - "loss": 0.9253, - "step": 21430 - }, - { - "epoch": 0.614949865878674, - "grad_norm": 0.36328125, - "learning_rate": 7.752061602529847e-05, - "loss": 0.9076, - "step": 21435 - }, - { - "epoch": 0.6150933111471318, - "grad_norm": 0.380859375, - "learning_rate": 7.74718272002563e-05, - "loss": 1.0346, - "step": 21440 - }, - { - "epoch": 0.6152367564155896, - "grad_norm": 0.388671875, - "learning_rate": 7.742304402376015e-05, - "loss": 0.9229, - "step": 21445 - }, - { - "epoch": 0.6153802016840475, - "grad_norm": 0.3828125, - "learning_rate": 7.737426650804168e-05, - "loss": 0.9691, - "step": 21450 - }, - { - "epoch": 0.6155236469525053, - "grad_norm": 0.376953125, - "learning_rate": 7.73254946653309e-05, - "loss": 0.9029, - "step": 21455 - }, - { - "epoch": 0.615667092220963, - "grad_norm": 0.3984375, - "learning_rate": 7.727672850785651e-05, - "loss": 0.978, - "step": 21460 - }, - { - "epoch": 0.6158105374894209, - "grad_norm": 0.46484375, - "learning_rate": 7.722796804784583e-05, - "loss": 1.1564, - "step": 21465 - }, - { - "epoch": 0.6159539827578787, - "grad_norm": 0.375, - "learning_rate": 7.717921329752466e-05, - "loss": 0.9104, - "step": 21470 - }, - { - "epoch": 0.6160974280263366, - "grad_norm": 0.41796875, - "learning_rate": 7.713046426911742e-05, - "loss": 0.859, - "step": 21475 - }, - { - "epoch": 0.6162408732947944, - "grad_norm": 0.4140625, - "learning_rate": 7.708172097484699e-05, - "loss": 0.9225, - "step": 21480 - }, - { - "epoch": 0.6163843185632522, - "grad_norm": 0.40234375, - "learning_rate": 7.703298342693504e-05, - "loss": 0.9751, - "step": 21485 - }, - { - "epoch": 0.61652776383171, - "grad_norm": 0.404296875, - "learning_rate": 7.698425163760156e-05, - "loss": 0.93, - "step": 21490 - }, - { - "epoch": 0.6166712091001678, - "grad_norm": 0.41796875, - "learning_rate": 7.693552561906522e-05, - "loss": 0.9147, - "step": 21495 - }, - { - "epoch": 0.6168146543686257, - "grad_norm": 0.369140625, - "learning_rate": 7.688680538354323e-05, - "loss": 1.0092, - "step": 21500 - }, - { - "epoch": 0.6169580996370835, - "grad_norm": 0.376953125, - "learning_rate": 7.683809094325135e-05, - "loss": 0.9278, - "step": 21505 - }, - { - "epoch": 0.6171015449055413, - "grad_norm": 0.376953125, - "learning_rate": 7.678938231040383e-05, - "loss": 0.8557, - "step": 21510 - }, - { - "epoch": 0.6172449901739991, - "grad_norm": 0.39453125, - "learning_rate": 7.674067949721354e-05, - "loss": 1.0049, - "step": 21515 - }, - { - "epoch": 0.6173884354424569, - "grad_norm": 0.361328125, - "learning_rate": 7.669198251589188e-05, - "loss": 1.0002, - "step": 21520 - }, - { - "epoch": 0.6175318807109148, - "grad_norm": 0.376953125, - "learning_rate": 7.664329137864873e-05, - "loss": 1.0278, - "step": 21525 - }, - { - "epoch": 0.6176753259793726, - "grad_norm": 0.3515625, - "learning_rate": 7.659460609769252e-05, - "loss": 0.9906, - "step": 21530 - }, - { - "epoch": 0.6178187712478304, - "grad_norm": 0.375, - "learning_rate": 7.654592668523033e-05, - "loss": 0.9142, - "step": 21535 - }, - { - "epoch": 0.6179622165162882, - "grad_norm": 0.36328125, - "learning_rate": 7.649725315346761e-05, - "loss": 0.9108, - "step": 21540 - }, - { - "epoch": 0.618105661784746, - "grad_norm": 0.36328125, - "learning_rate": 7.644858551460838e-05, - "loss": 0.875, - "step": 21545 - }, - { - "epoch": 0.6182491070532038, - "grad_norm": 0.384765625, - "learning_rate": 7.639992378085521e-05, - "loss": 0.9746, - "step": 21550 - }, - { - "epoch": 0.6183925523216617, - "grad_norm": 0.365234375, - "learning_rate": 7.635126796440922e-05, - "loss": 0.9846, - "step": 21555 - }, - { - "epoch": 0.6185359975901195, - "grad_norm": 0.3828125, - "learning_rate": 7.630261807747e-05, - "loss": 0.8863, - "step": 21560 - }, - { - "epoch": 0.6186794428585773, - "grad_norm": 0.369140625, - "learning_rate": 7.625397413223563e-05, - "loss": 0.9732, - "step": 21565 - }, - { - "epoch": 0.6188228881270351, - "grad_norm": 0.365234375, - "learning_rate": 7.620533614090269e-05, - "loss": 0.9923, - "step": 21570 - }, - { - "epoch": 0.6189663333954929, - "grad_norm": 0.3515625, - "learning_rate": 7.615670411566643e-05, - "loss": 0.9732, - "step": 21575 - }, - { - "epoch": 0.6191097786639508, - "grad_norm": 0.447265625, - "learning_rate": 7.610807806872038e-05, - "loss": 1.054, - "step": 21580 - }, - { - "epoch": 0.6192532239324086, - "grad_norm": 0.416015625, - "learning_rate": 7.605945801225669e-05, - "loss": 0.958, - "step": 21585 - }, - { - "epoch": 0.6193966692008664, - "grad_norm": 0.451171875, - "learning_rate": 7.601084395846603e-05, - "loss": 0.9437, - "step": 21590 - }, - { - "epoch": 0.6195401144693242, - "grad_norm": 0.365234375, - "learning_rate": 7.596223591953749e-05, - "loss": 0.8995, - "step": 21595 - }, - { - "epoch": 0.619683559737782, - "grad_norm": 0.349609375, - "learning_rate": 7.591363390765868e-05, - "loss": 0.999, - "step": 21600 - }, - { - "epoch": 0.6198270050062399, - "grad_norm": 0.390625, - "learning_rate": 7.586503793501573e-05, - "loss": 0.9495, - "step": 21605 - }, - { - "epoch": 0.6199704502746977, - "grad_norm": 0.369140625, - "learning_rate": 7.581644801379324e-05, - "loss": 0.8973, - "step": 21610 - }, - { - "epoch": 0.6201138955431555, - "grad_norm": 0.365234375, - "learning_rate": 7.576786415617427e-05, - "loss": 0.9084, - "step": 21615 - }, - { - "epoch": 0.6202573408116133, - "grad_norm": 0.400390625, - "learning_rate": 7.571928637434031e-05, - "loss": 1.0043, - "step": 21620 - }, - { - "epoch": 0.6204007860800711, - "grad_norm": 0.4140625, - "learning_rate": 7.567071468047153e-05, - "loss": 1.0717, - "step": 21625 - }, - { - "epoch": 0.620544231348529, - "grad_norm": 0.38671875, - "learning_rate": 7.562214908674633e-05, - "loss": 0.9786, - "step": 21630 - }, - { - "epoch": 0.6206876766169868, - "grad_norm": 0.396484375, - "learning_rate": 7.557358960534174e-05, - "loss": 0.9657, - "step": 21635 - }, - { - "epoch": 0.6208311218854446, - "grad_norm": 0.39453125, - "learning_rate": 7.55250362484331e-05, - "loss": 0.9872, - "step": 21640 - }, - { - "epoch": 0.6209745671539024, - "grad_norm": 0.423828125, - "learning_rate": 7.547648902819445e-05, - "loss": 0.9504, - "step": 21645 - }, - { - "epoch": 0.6211180124223602, - "grad_norm": 0.35546875, - "learning_rate": 7.542794795679811e-05, - "loss": 0.9251, - "step": 21650 - }, - { - "epoch": 0.6212614576908181, - "grad_norm": 0.37890625, - "learning_rate": 7.537941304641483e-05, - "loss": 0.9248, - "step": 21655 - }, - { - "epoch": 0.6214049029592759, - "grad_norm": 0.42578125, - "learning_rate": 7.533088430921402e-05, - "loss": 0.9527, - "step": 21660 - }, - { - "epoch": 0.6215483482277337, - "grad_norm": 0.39453125, - "learning_rate": 7.528236175736334e-05, - "loss": 0.896, - "step": 21665 - }, - { - "epoch": 0.6216917934961915, - "grad_norm": 0.400390625, - "learning_rate": 7.5233845403029e-05, - "loss": 0.9592, - "step": 21670 - }, - { - "epoch": 0.6218352387646493, - "grad_norm": 0.392578125, - "learning_rate": 7.518533525837558e-05, - "loss": 0.9568, - "step": 21675 - }, - { - "epoch": 0.6219786840331072, - "grad_norm": 0.359375, - "learning_rate": 7.51368313355662e-05, - "loss": 0.8508, - "step": 21680 - }, - { - "epoch": 0.622122129301565, - "grad_norm": 0.396484375, - "learning_rate": 7.508833364676236e-05, - "loss": 0.8706, - "step": 21685 - }, - { - "epoch": 0.6222655745700229, - "grad_norm": 0.3984375, - "learning_rate": 7.5039842204124e-05, - "loss": 0.9336, - "step": 21690 - }, - { - "epoch": 0.6224090198384806, - "grad_norm": 0.3828125, - "learning_rate": 7.499135701980951e-05, - "loss": 0.8502, - "step": 21695 - }, - { - "epoch": 0.6225524651069384, - "grad_norm": 0.34375, - "learning_rate": 7.49428781059757e-05, - "loss": 0.9734, - "step": 21700 - }, - { - "epoch": 0.6226959103753963, - "grad_norm": 0.369140625, - "learning_rate": 7.489440547477782e-05, - "loss": 0.9173, - "step": 21705 - }, - { - "epoch": 0.6228393556438541, - "grad_norm": 0.361328125, - "learning_rate": 7.484593913836951e-05, - "loss": 0.9236, - "step": 21710 - }, - { - "epoch": 0.622982800912312, - "grad_norm": 0.359375, - "learning_rate": 7.47974791089029e-05, - "loss": 0.9499, - "step": 21715 - }, - { - "epoch": 0.6231262461807697, - "grad_norm": 0.392578125, - "learning_rate": 7.474902539852848e-05, - "loss": 0.8886, - "step": 21720 - }, - { - "epoch": 0.6232696914492275, - "grad_norm": 0.40234375, - "learning_rate": 7.470057801939516e-05, - "loss": 0.921, - "step": 21725 - }, - { - "epoch": 0.6234131367176854, - "grad_norm": 0.3984375, - "learning_rate": 7.465213698365026e-05, - "loss": 1.0339, - "step": 21730 - }, - { - "epoch": 0.6235565819861432, - "grad_norm": 0.37109375, - "learning_rate": 7.460370230343956e-05, - "loss": 1.0017, - "step": 21735 - }, - { - "epoch": 0.623700027254601, - "grad_norm": 0.408203125, - "learning_rate": 7.455527399090721e-05, - "loss": 0.9485, - "step": 21740 - }, - { - "epoch": 0.6238434725230588, - "grad_norm": 0.3671875, - "learning_rate": 7.450685205819571e-05, - "loss": 0.8801, - "step": 21745 - }, - { - "epoch": 0.6239869177915166, - "grad_norm": 0.3671875, - "learning_rate": 7.445843651744609e-05, - "loss": 0.9557, - "step": 21750 - }, - { - "epoch": 0.6241303630599745, - "grad_norm": 0.3984375, - "learning_rate": 7.441002738079768e-05, - "loss": 1.0078, - "step": 21755 - }, - { - "epoch": 0.6242738083284323, - "grad_norm": 0.380859375, - "learning_rate": 7.436162466038818e-05, - "loss": 0.9045, - "step": 21760 - }, - { - "epoch": 0.6244172535968902, - "grad_norm": 0.357421875, - "learning_rate": 7.431322836835377e-05, - "loss": 0.8905, - "step": 21765 - }, - { - "epoch": 0.6245606988653479, - "grad_norm": 0.380859375, - "learning_rate": 7.426483851682898e-05, - "loss": 0.9822, - "step": 21770 - }, - { - "epoch": 0.6247041441338057, - "grad_norm": 0.388671875, - "learning_rate": 7.421645511794668e-05, - "loss": 1.0354, - "step": 21775 - }, - { - "epoch": 0.6248475894022636, - "grad_norm": 0.376953125, - "learning_rate": 7.416807818383817e-05, - "loss": 0.9451, - "step": 21780 - }, - { - "epoch": 0.6249910346707214, - "grad_norm": 0.365234375, - "learning_rate": 7.41197077266332e-05, - "loss": 0.9095, - "step": 21785 - }, - { - "epoch": 0.6251344799391793, - "grad_norm": 0.353515625, - "learning_rate": 7.407134375845972e-05, - "loss": 0.8866, - "step": 21790 - }, - { - "epoch": 0.625277925207637, - "grad_norm": 0.369140625, - "learning_rate": 7.40229862914442e-05, - "loss": 0.9938, - "step": 21795 - }, - { - "epoch": 0.6254213704760948, - "grad_norm": 0.376953125, - "learning_rate": 7.397463533771139e-05, - "loss": 0.9253, - "step": 21800 - }, - { - "epoch": 0.6255648157445527, - "grad_norm": 0.3984375, - "learning_rate": 7.392629090938451e-05, - "loss": 0.8777, - "step": 21805 - }, - { - "epoch": 0.6257082610130105, - "grad_norm": 0.376953125, - "learning_rate": 7.387795301858504e-05, - "loss": 0.9373, - "step": 21810 - }, - { - "epoch": 0.6258517062814684, - "grad_norm": 0.3671875, - "learning_rate": 7.382962167743284e-05, - "loss": 1.0273, - "step": 21815 - }, - { - "epoch": 0.6259951515499261, - "grad_norm": 0.41015625, - "learning_rate": 7.378129689804623e-05, - "loss": 0.9064, - "step": 21820 - }, - { - "epoch": 0.6261385968183839, - "grad_norm": 0.3515625, - "learning_rate": 7.373297869254174e-05, - "loss": 0.9362, - "step": 21825 - }, - { - "epoch": 0.6262820420868418, - "grad_norm": 0.3984375, - "learning_rate": 7.368466707303434e-05, - "loss": 1.0643, - "step": 21830 - }, - { - "epoch": 0.6264254873552996, - "grad_norm": 0.41796875, - "learning_rate": 7.363636205163727e-05, - "loss": 0.9422, - "step": 21835 - }, - { - "epoch": 0.6265689326237575, - "grad_norm": 0.3984375, - "learning_rate": 7.358806364046226e-05, - "loss": 1.0835, - "step": 21840 - }, - { - "epoch": 0.6267123778922152, - "grad_norm": 0.3515625, - "learning_rate": 7.353977185161926e-05, - "loss": 0.8976, - "step": 21845 - }, - { - "epoch": 0.626855823160673, - "grad_norm": 0.43359375, - "learning_rate": 7.349148669721658e-05, - "loss": 0.972, - "step": 21850 - }, - { - "epoch": 0.6269992684291309, - "grad_norm": 0.3671875, - "learning_rate": 7.344320818936086e-05, - "loss": 1.0169, - "step": 21855 - }, - { - "epoch": 0.6271427136975887, - "grad_norm": 0.384765625, - "learning_rate": 7.339493634015711e-05, - "loss": 0.8915, - "step": 21860 - }, - { - "epoch": 0.6272861589660466, - "grad_norm": 0.37890625, - "learning_rate": 7.33466711617087e-05, - "loss": 0.9742, - "step": 21865 - }, - { - "epoch": 0.6274296042345043, - "grad_norm": 0.37109375, - "learning_rate": 7.329841266611721e-05, - "loss": 0.8932, - "step": 21870 - }, - { - "epoch": 0.6275730495029621, - "grad_norm": 0.376953125, - "learning_rate": 7.325016086548268e-05, - "loss": 0.8756, - "step": 21875 - }, - { - "epoch": 0.62771649477142, - "grad_norm": 0.42578125, - "learning_rate": 7.320191577190336e-05, - "loss": 1.0183, - "step": 21880 - }, - { - "epoch": 0.6278599400398778, - "grad_norm": 0.365234375, - "learning_rate": 7.315367739747589e-05, - "loss": 0.8635, - "step": 21885 - }, - { - "epoch": 0.6280033853083357, - "grad_norm": 0.375, - "learning_rate": 7.310544575429514e-05, - "loss": 0.9243, - "step": 21890 - }, - { - "epoch": 0.6281468305767934, - "grad_norm": 0.3515625, - "learning_rate": 7.305722085445445e-05, - "loss": 0.8433, - "step": 21895 - }, - { - "epoch": 0.6282902758452512, - "grad_norm": 0.392578125, - "learning_rate": 7.300900271004534e-05, - "loss": 0.9949, - "step": 21900 - }, - { - "epoch": 0.6284337211137091, - "grad_norm": 0.376953125, - "learning_rate": 7.296079133315759e-05, - "loss": 1.0129, - "step": 21905 - }, - { - "epoch": 0.6285771663821669, - "grad_norm": 0.427734375, - "learning_rate": 7.291258673587947e-05, - "loss": 0.9627, - "step": 21910 - }, - { - "epoch": 0.6287206116506247, - "grad_norm": 0.365234375, - "learning_rate": 7.286438893029739e-05, - "loss": 0.9461, - "step": 21915 - }, - { - "epoch": 0.6288640569190825, - "grad_norm": 0.36328125, - "learning_rate": 7.281619792849612e-05, - "loss": 0.9119, - "step": 21920 - }, - { - "epoch": 0.6290075021875403, - "grad_norm": 0.41015625, - "learning_rate": 7.276801374255866e-05, - "loss": 1.1292, - "step": 21925 - }, - { - "epoch": 0.6291509474559982, - "grad_norm": 0.390625, - "learning_rate": 7.271983638456644e-05, - "loss": 0.9486, - "step": 21930 - }, - { - "epoch": 0.629294392724456, - "grad_norm": 0.400390625, - "learning_rate": 7.267166586659906e-05, - "loss": 0.9921, - "step": 21935 - }, - { - "epoch": 0.6294378379929138, - "grad_norm": 0.384765625, - "learning_rate": 7.26235022007344e-05, - "loss": 0.9851, - "step": 21940 - }, - { - "epoch": 0.6295812832613716, - "grad_norm": 0.400390625, - "learning_rate": 7.25753453990487e-05, - "loss": 0.9384, - "step": 21945 - }, - { - "epoch": 0.6297247285298294, - "grad_norm": 0.3828125, - "learning_rate": 7.252719547361641e-05, - "loss": 0.9593, - "step": 21950 - }, - { - "epoch": 0.6298681737982873, - "grad_norm": 0.404296875, - "learning_rate": 7.247905243651032e-05, - "loss": 1.0005, - "step": 21955 - }, - { - "epoch": 0.6300116190667451, - "grad_norm": 0.38671875, - "learning_rate": 7.243091629980141e-05, - "loss": 0.9821, - "step": 21960 - }, - { - "epoch": 0.6301550643352029, - "grad_norm": 0.353515625, - "learning_rate": 7.238278707555901e-05, - "loss": 0.8479, - "step": 21965 - }, - { - "epoch": 0.6302985096036607, - "grad_norm": 0.41796875, - "learning_rate": 7.233466477585068e-05, - "loss": 0.9536, - "step": 21970 - }, - { - "epoch": 0.6304419548721185, - "grad_norm": 0.40625, - "learning_rate": 7.228654941274225e-05, - "loss": 0.9679, - "step": 21975 - }, - { - "epoch": 0.6305854001405764, - "grad_norm": 0.42578125, - "learning_rate": 7.223844099829773e-05, - "loss": 0.8515, - "step": 21980 - }, - { - "epoch": 0.6307288454090342, - "grad_norm": 0.37890625, - "learning_rate": 7.219033954457958e-05, - "loss": 0.935, - "step": 21985 - }, - { - "epoch": 0.630872290677492, - "grad_norm": 0.341796875, - "learning_rate": 7.214224506364834e-05, - "loss": 0.8715, - "step": 21990 - }, - { - "epoch": 0.6310157359459498, - "grad_norm": 0.388671875, - "learning_rate": 7.209415756756286e-05, - "loss": 0.9674, - "step": 21995 - }, - { - "epoch": 0.6311591812144076, - "grad_norm": 0.4375, - "learning_rate": 7.204607706838026e-05, - "loss": 0.9166, - "step": 22000 - }, - { - "epoch": 0.6313026264828655, - "grad_norm": 0.42578125, - "learning_rate": 7.199800357815592e-05, - "loss": 1.0205, - "step": 22005 - }, - { - "epoch": 0.6314460717513233, - "grad_norm": 0.40234375, - "learning_rate": 7.194993710894335e-05, - "loss": 1.0494, - "step": 22010 - }, - { - "epoch": 0.631589517019781, - "grad_norm": 0.34765625, - "learning_rate": 7.190187767279439e-05, - "loss": 0.9212, - "step": 22015 - }, - { - "epoch": 0.6317329622882389, - "grad_norm": 0.36328125, - "learning_rate": 7.185382528175917e-05, - "loss": 0.8585, - "step": 22020 - }, - { - "epoch": 0.6318764075566967, - "grad_norm": 0.4140625, - "learning_rate": 7.180577994788596e-05, - "loss": 1.1628, - "step": 22025 - }, - { - "epoch": 0.6320198528251546, - "grad_norm": 0.384765625, - "learning_rate": 7.175774168322123e-05, - "loss": 0.9598, - "step": 22030 - }, - { - "epoch": 0.6321632980936124, - "grad_norm": 0.359375, - "learning_rate": 7.170971049980978e-05, - "loss": 0.8924, - "step": 22035 - }, - { - "epoch": 0.6323067433620702, - "grad_norm": 0.38671875, - "learning_rate": 7.166168640969464e-05, - "loss": 0.9454, - "step": 22040 - }, - { - "epoch": 0.632450188630528, - "grad_norm": 0.34765625, - "learning_rate": 7.161366942491697e-05, - "loss": 1.035, - "step": 22045 - }, - { - "epoch": 0.6325936338989858, - "grad_norm": 0.36328125, - "learning_rate": 7.156565955751616e-05, - "loss": 0.9011, - "step": 22050 - }, - { - "epoch": 0.6327370791674437, - "grad_norm": 0.41796875, - "learning_rate": 7.151765681952986e-05, - "loss": 0.9409, - "step": 22055 - }, - { - "epoch": 0.6328805244359015, - "grad_norm": 0.349609375, - "learning_rate": 7.146966122299396e-05, - "loss": 1.0298, - "step": 22060 - }, - { - "epoch": 0.6330239697043593, - "grad_norm": 0.392578125, - "learning_rate": 7.142167277994245e-05, - "loss": 1.0644, - "step": 22065 - }, - { - "epoch": 0.6331674149728171, - "grad_norm": 0.396484375, - "learning_rate": 7.137369150240769e-05, - "loss": 0.902, - "step": 22070 - }, - { - "epoch": 0.6333108602412749, - "grad_norm": 0.384765625, - "learning_rate": 7.132571740242006e-05, - "loss": 0.8629, - "step": 22075 - }, - { - "epoch": 0.6334543055097328, - "grad_norm": 0.443359375, - "learning_rate": 7.127775049200828e-05, - "loss": 1.0122, - "step": 22080 - }, - { - "epoch": 0.6335977507781906, - "grad_norm": 0.380859375, - "learning_rate": 7.122979078319914e-05, - "loss": 0.9555, - "step": 22085 - }, - { - "epoch": 0.6337411960466484, - "grad_norm": 0.3984375, - "learning_rate": 7.118183828801781e-05, - "loss": 1.0008, - "step": 22090 - }, - { - "epoch": 0.6338846413151062, - "grad_norm": 0.41796875, - "learning_rate": 7.113389301848747e-05, - "loss": 0.9872, - "step": 22095 - }, - { - "epoch": 0.634028086583564, - "grad_norm": 0.38671875, - "learning_rate": 7.108595498662956e-05, - "loss": 0.8741, - "step": 22100 - }, - { - "epoch": 0.6341715318520219, - "grad_norm": 0.392578125, - "learning_rate": 7.103802420446374e-05, - "loss": 0.9038, - "step": 22105 - }, - { - "epoch": 0.6343149771204797, - "grad_norm": 0.3984375, - "learning_rate": 7.099010068400781e-05, - "loss": 0.9323, - "step": 22110 - }, - { - "epoch": 0.6344584223889375, - "grad_norm": 0.375, - "learning_rate": 7.094218443727773e-05, - "loss": 0.9892, - "step": 22115 - }, - { - "epoch": 0.6346018676573953, - "grad_norm": 0.359375, - "learning_rate": 7.089427547628766e-05, - "loss": 0.9455, - "step": 22120 - }, - { - "epoch": 0.6347453129258531, - "grad_norm": 0.3359375, - "learning_rate": 7.084637381304996e-05, - "loss": 0.8616, - "step": 22125 - }, - { - "epoch": 0.634888758194311, - "grad_norm": 0.349609375, - "learning_rate": 7.079847945957516e-05, - "loss": 0.919, - "step": 22130 - }, - { - "epoch": 0.6350322034627688, - "grad_norm": 0.37890625, - "learning_rate": 7.075059242787188e-05, - "loss": 0.958, - "step": 22135 - }, - { - "epoch": 0.6351756487312266, - "grad_norm": 0.416015625, - "learning_rate": 7.070271272994698e-05, - "loss": 0.9413, - "step": 22140 - }, - { - "epoch": 0.6353190939996844, - "grad_norm": 0.390625, - "learning_rate": 7.065484037780545e-05, - "loss": 0.9468, - "step": 22145 - }, - { - "epoch": 0.6354625392681422, - "grad_norm": 0.353515625, - "learning_rate": 7.060697538345048e-05, - "loss": 0.9393, - "step": 22150 - }, - { - "epoch": 0.6356059845366001, - "grad_norm": 0.376953125, - "learning_rate": 7.055911775888329e-05, - "loss": 0.992, - "step": 22155 - }, - { - "epoch": 0.6357494298050579, - "grad_norm": 0.37890625, - "learning_rate": 7.051126751610346e-05, - "loss": 0.8985, - "step": 22160 - }, - { - "epoch": 0.6358928750735157, - "grad_norm": 0.37890625, - "learning_rate": 7.046342466710856e-05, - "loss": 0.9764, - "step": 22165 - }, - { - "epoch": 0.6360363203419735, - "grad_norm": 0.3828125, - "learning_rate": 7.041558922389434e-05, - "loss": 0.9367, - "step": 22170 - }, - { - "epoch": 0.6361797656104313, - "grad_norm": 0.4296875, - "learning_rate": 7.036776119845464e-05, - "loss": 0.8477, - "step": 22175 - }, - { - "epoch": 0.6363232108788892, - "grad_norm": 0.38671875, - "learning_rate": 7.031994060278162e-05, - "loss": 0.9848, - "step": 22180 - }, - { - "epoch": 0.636466656147347, - "grad_norm": 0.40625, - "learning_rate": 7.027212744886542e-05, - "loss": 0.9566, - "step": 22185 - }, - { - "epoch": 0.6366101014158048, - "grad_norm": 0.41015625, - "learning_rate": 7.02243217486943e-05, - "loss": 0.9044, - "step": 22190 - }, - { - "epoch": 0.6367535466842627, - "grad_norm": 0.451171875, - "learning_rate": 7.017652351425476e-05, - "loss": 0.9257, - "step": 22195 - }, - { - "epoch": 0.6368969919527204, - "grad_norm": 0.3828125, - "learning_rate": 7.012873275753137e-05, - "loss": 0.8584, - "step": 22200 - }, - { - "epoch": 0.6370404372211783, - "grad_norm": 0.400390625, - "learning_rate": 7.008094949050681e-05, - "loss": 0.8923, - "step": 22205 - }, - { - "epoch": 0.6371838824896361, - "grad_norm": 0.408203125, - "learning_rate": 7.003317372516189e-05, - "loss": 0.9971, - "step": 22210 - }, - { - "epoch": 0.6373273277580939, - "grad_norm": 0.41015625, - "learning_rate": 6.998540547347558e-05, - "loss": 0.8969, - "step": 22215 - }, - { - "epoch": 0.6374707730265518, - "grad_norm": 0.380859375, - "learning_rate": 6.993764474742493e-05, - "loss": 0.9144, - "step": 22220 - }, - { - "epoch": 0.6376142182950095, - "grad_norm": 0.38671875, - "learning_rate": 6.988989155898507e-05, - "loss": 0.9727, - "step": 22225 - }, - { - "epoch": 0.6377576635634674, - "grad_norm": 0.37109375, - "learning_rate": 6.984214592012935e-05, - "loss": 0.8552, - "step": 22230 - }, - { - "epoch": 0.6379011088319252, - "grad_norm": 0.3515625, - "learning_rate": 6.979440784282909e-05, - "loss": 0.9194, - "step": 22235 - }, - { - "epoch": 0.638044554100383, - "grad_norm": 0.48046875, - "learning_rate": 6.974667733905377e-05, - "loss": 1.0728, - "step": 22240 - }, - { - "epoch": 0.6381879993688409, - "grad_norm": 0.369140625, - "learning_rate": 6.969895442077104e-05, - "loss": 0.8931, - "step": 22245 - }, - { - "epoch": 0.6383314446372986, - "grad_norm": 0.3828125, - "learning_rate": 6.965123909994658e-05, - "loss": 0.9699, - "step": 22250 - }, - { - "epoch": 0.6384748899057564, - "grad_norm": 0.41796875, - "learning_rate": 6.960353138854415e-05, - "loss": 0.989, - "step": 22255 - }, - { - "epoch": 0.6386183351742143, - "grad_norm": 0.349609375, - "learning_rate": 6.955583129852559e-05, - "loss": 0.8598, - "step": 22260 - }, - { - "epoch": 0.6387617804426721, - "grad_norm": 0.404296875, - "learning_rate": 6.950813884185094e-05, - "loss": 0.9907, - "step": 22265 - }, - { - "epoch": 0.63890522571113, - "grad_norm": 0.36328125, - "learning_rate": 6.946045403047821e-05, - "loss": 0.9464, - "step": 22270 - }, - { - "epoch": 0.6390486709795877, - "grad_norm": 0.3984375, - "learning_rate": 6.941277687636357e-05, - "loss": 0.9349, - "step": 22275 - }, - { - "epoch": 0.6391921162480455, - "grad_norm": 0.375, - "learning_rate": 6.936510739146113e-05, - "loss": 0.8903, - "step": 22280 - }, - { - "epoch": 0.6393355615165034, - "grad_norm": 0.373046875, - "learning_rate": 6.931744558772332e-05, - "loss": 0.9934, - "step": 22285 - }, - { - "epoch": 0.6394790067849612, - "grad_norm": 0.361328125, - "learning_rate": 6.926979147710044e-05, - "loss": 0.9705, - "step": 22290 - }, - { - "epoch": 0.639622452053419, - "grad_norm": 0.39453125, - "learning_rate": 6.922214507154092e-05, - "loss": 0.9187, - "step": 22295 - }, - { - "epoch": 0.6397658973218768, - "grad_norm": 0.34375, - "learning_rate": 6.917450638299123e-05, - "loss": 0.9928, - "step": 22300 - }, - { - "epoch": 0.6399093425903346, - "grad_norm": 0.349609375, - "learning_rate": 6.912687542339605e-05, - "loss": 0.9157, - "step": 22305 - }, - { - "epoch": 0.6400527878587925, - "grad_norm": 0.392578125, - "learning_rate": 6.90792522046979e-05, - "loss": 0.9701, - "step": 22310 - }, - { - "epoch": 0.6401962331272503, - "grad_norm": 0.361328125, - "learning_rate": 6.903163673883753e-05, - "loss": 0.9577, - "step": 22315 - }, - { - "epoch": 0.6403396783957082, - "grad_norm": 0.345703125, - "learning_rate": 6.898402903775369e-05, - "loss": 0.9724, - "step": 22320 - }, - { - "epoch": 0.6404831236641659, - "grad_norm": 0.37890625, - "learning_rate": 6.893642911338314e-05, - "loss": 0.9609, - "step": 22325 - }, - { - "epoch": 0.6406265689326237, - "grad_norm": 0.421875, - "learning_rate": 6.888883697766076e-05, - "loss": 0.8676, - "step": 22330 - }, - { - "epoch": 0.6407700142010816, - "grad_norm": 0.39453125, - "learning_rate": 6.884125264251941e-05, - "loss": 1.0393, - "step": 22335 - }, - { - "epoch": 0.6409134594695394, - "grad_norm": 0.38671875, - "learning_rate": 6.87936761198901e-05, - "loss": 1.0396, - "step": 22340 - }, - { - "epoch": 0.6410569047379973, - "grad_norm": 0.37890625, - "learning_rate": 6.874610742170175e-05, - "loss": 0.9224, - "step": 22345 - }, - { - "epoch": 0.641200350006455, - "grad_norm": 0.390625, - "learning_rate": 6.869854655988139e-05, - "loss": 1.0047, - "step": 22350 - }, - { - "epoch": 0.6413437952749128, - "grad_norm": 0.40234375, - "learning_rate": 6.865099354635412e-05, - "loss": 0.9248, - "step": 22355 - }, - { - "epoch": 0.6414872405433707, - "grad_norm": 0.3984375, - "learning_rate": 6.860344839304299e-05, - "loss": 0.9861, - "step": 22360 - }, - { - "epoch": 0.6416306858118285, - "grad_norm": 0.384765625, - "learning_rate": 6.855591111186915e-05, - "loss": 0.9226, - "step": 22365 - }, - { - "epoch": 0.6417741310802864, - "grad_norm": 0.416015625, - "learning_rate": 6.850838171475165e-05, - "loss": 1.0101, - "step": 22370 - }, - { - "epoch": 0.6419175763487441, - "grad_norm": 0.361328125, - "learning_rate": 6.846086021360777e-05, - "loss": 0.9914, - "step": 22375 - }, - { - "epoch": 0.6420610216172019, - "grad_norm": 0.361328125, - "learning_rate": 6.841334662035266e-05, - "loss": 0.9472, - "step": 22380 - }, - { - "epoch": 0.6422044668856598, - "grad_norm": 0.361328125, - "learning_rate": 6.836584094689948e-05, - "loss": 1.0088, - "step": 22385 - }, - { - "epoch": 0.6423479121541176, - "grad_norm": 0.376953125, - "learning_rate": 6.83183432051595e-05, - "loss": 0.9468, - "step": 22390 - }, - { - "epoch": 0.6424913574225755, - "grad_norm": 0.412109375, - "learning_rate": 6.827085340704196e-05, - "loss": 1.0133, - "step": 22395 - }, - { - "epoch": 0.6426348026910332, - "grad_norm": 0.376953125, - "learning_rate": 6.822337156445406e-05, - "loss": 0.8774, - "step": 22400 - }, - { - "epoch": 0.642778247959491, - "grad_norm": 0.412109375, - "learning_rate": 6.817589768930104e-05, - "loss": 0.9608, - "step": 22405 - }, - { - "epoch": 0.6429216932279489, - "grad_norm": 0.369140625, - "learning_rate": 6.812843179348618e-05, - "loss": 1.0371, - "step": 22410 - }, - { - "epoch": 0.6430651384964067, - "grad_norm": 0.38671875, - "learning_rate": 6.808097388891069e-05, - "loss": 1.0097, - "step": 22415 - }, - { - "epoch": 0.6432085837648646, - "grad_norm": 0.435546875, - "learning_rate": 6.803352398747384e-05, - "loss": 0.9425, - "step": 22420 - }, - { - "epoch": 0.6433520290333223, - "grad_norm": 0.376953125, - "learning_rate": 6.798608210107284e-05, - "loss": 0.9939, - "step": 22425 - }, - { - "epoch": 0.6434954743017801, - "grad_norm": 0.3828125, - "learning_rate": 6.793864824160295e-05, - "loss": 1.0029, - "step": 22430 - }, - { - "epoch": 0.643638919570238, - "grad_norm": 0.375, - "learning_rate": 6.789122242095733e-05, - "loss": 1.0094, - "step": 22435 - }, - { - "epoch": 0.6437823648386958, - "grad_norm": 0.44140625, - "learning_rate": 6.78438046510272e-05, - "loss": 0.9726, - "step": 22440 - }, - { - "epoch": 0.6439258101071537, - "grad_norm": 0.41796875, - "learning_rate": 6.779639494370175e-05, - "loss": 1.0586, - "step": 22445 - }, - { - "epoch": 0.6440692553756114, - "grad_norm": 0.390625, - "learning_rate": 6.774899331086814e-05, - "loss": 0.987, - "step": 22450 - }, - { - "epoch": 0.6442127006440692, - "grad_norm": 0.388671875, - "learning_rate": 6.770159976441151e-05, - "loss": 0.9947, - "step": 22455 - }, - { - "epoch": 0.6443561459125271, - "grad_norm": 0.384765625, - "learning_rate": 6.765421431621491e-05, - "loss": 0.9172, - "step": 22460 - }, - { - "epoch": 0.6444995911809849, - "grad_norm": 0.40234375, - "learning_rate": 6.760683697815946e-05, - "loss": 0.9127, - "step": 22465 - }, - { - "epoch": 0.6446430364494428, - "grad_norm": 0.365234375, - "learning_rate": 6.755946776212421e-05, - "loss": 0.9308, - "step": 22470 - }, - { - "epoch": 0.6447864817179005, - "grad_norm": 0.37890625, - "learning_rate": 6.75121066799861e-05, - "loss": 0.9306, - "step": 22475 - }, - { - "epoch": 0.6449299269863583, - "grad_norm": 0.33984375, - "learning_rate": 6.746475374362018e-05, - "loss": 0.8579, - "step": 22480 - }, - { - "epoch": 0.6450733722548162, - "grad_norm": 0.408203125, - "learning_rate": 6.741740896489932e-05, - "loss": 0.9879, - "step": 22485 - }, - { - "epoch": 0.645216817523274, - "grad_norm": 0.43359375, - "learning_rate": 6.737007235569442e-05, - "loss": 0.8655, - "step": 22490 - }, - { - "epoch": 0.6453602627917319, - "grad_norm": 0.39453125, - "learning_rate": 6.732274392787428e-05, - "loss": 0.9397, - "step": 22495 - }, - { - "epoch": 0.6455037080601896, - "grad_norm": 0.431640625, - "learning_rate": 6.727542369330571e-05, - "loss": 0.9944, - "step": 22500 - }, - { - "epoch": 0.6456471533286474, - "grad_norm": 0.388671875, - "learning_rate": 6.72281116638534e-05, - "loss": 0.9687, - "step": 22505 - }, - { - "epoch": 0.6457905985971053, - "grad_norm": 0.38671875, - "learning_rate": 6.718080785138002e-05, - "loss": 0.9123, - "step": 22510 - }, - { - "epoch": 0.6459340438655631, - "grad_norm": 0.388671875, - "learning_rate": 6.713351226774623e-05, - "loss": 1.0392, - "step": 22515 - }, - { - "epoch": 0.646077489134021, - "grad_norm": 0.39453125, - "learning_rate": 6.708622492481051e-05, - "loss": 0.9533, - "step": 22520 - }, - { - "epoch": 0.6462209344024787, - "grad_norm": 0.380859375, - "learning_rate": 6.703894583442935e-05, - "loss": 1.0849, - "step": 22525 - }, - { - "epoch": 0.6463643796709365, - "grad_norm": 0.390625, - "learning_rate": 6.699167500845714e-05, - "loss": 0.9, - "step": 22530 - }, - { - "epoch": 0.6465078249393944, - "grad_norm": 0.400390625, - "learning_rate": 6.694441245874628e-05, - "loss": 0.9051, - "step": 22535 - }, - { - "epoch": 0.6466512702078522, - "grad_norm": 0.34375, - "learning_rate": 6.689715819714697e-05, - "loss": 0.9524, - "step": 22540 - }, - { - "epoch": 0.6467947154763101, - "grad_norm": 0.373046875, - "learning_rate": 6.684991223550737e-05, - "loss": 0.9376, - "step": 22545 - }, - { - "epoch": 0.6469381607447678, - "grad_norm": 0.439453125, - "learning_rate": 6.680267458567366e-05, - "loss": 0.978, - "step": 22550 - }, - { - "epoch": 0.6470816060132256, - "grad_norm": 0.39453125, - "learning_rate": 6.675544525948981e-05, - "loss": 0.9397, - "step": 22555 - }, - { - "epoch": 0.6472250512816835, - "grad_norm": 0.361328125, - "learning_rate": 6.670822426879776e-05, - "loss": 0.9078, - "step": 22560 - }, - { - "epoch": 0.6473684965501413, - "grad_norm": 0.431640625, - "learning_rate": 6.66610116254373e-05, - "loss": 0.9772, - "step": 22565 - }, - { - "epoch": 0.6475119418185992, - "grad_norm": 0.373046875, - "learning_rate": 6.661380734124625e-05, - "loss": 0.9192, - "step": 22570 - }, - { - "epoch": 0.6476553870870569, - "grad_norm": 0.404296875, - "learning_rate": 6.656661142806024e-05, - "loss": 0.917, - "step": 22575 - }, - { - "epoch": 0.6477988323555147, - "grad_norm": 0.369140625, - "learning_rate": 6.65194238977128e-05, - "loss": 0.9375, - "step": 22580 - }, - { - "epoch": 0.6479422776239726, - "grad_norm": 0.416015625, - "learning_rate": 6.647224476203537e-05, - "loss": 0.9705, - "step": 22585 - }, - { - "epoch": 0.6480857228924304, - "grad_norm": 0.380859375, - "learning_rate": 6.642507403285732e-05, - "loss": 0.9078, - "step": 22590 - }, - { - "epoch": 0.6482291681608882, - "grad_norm": 0.388671875, - "learning_rate": 6.63779117220059e-05, - "loss": 0.9079, - "step": 22595 - }, - { - "epoch": 0.648372613429346, - "grad_norm": 0.400390625, - "learning_rate": 6.633075784130619e-05, - "loss": 0.9691, - "step": 22600 - }, - { - "epoch": 0.6485160586978038, - "grad_norm": 0.38671875, - "learning_rate": 6.628361240258126e-05, - "loss": 0.9808, - "step": 22605 - }, - { - "epoch": 0.6486595039662617, - "grad_norm": 0.37109375, - "learning_rate": 6.623647541765195e-05, - "loss": 1.1138, - "step": 22610 - }, - { - "epoch": 0.6488029492347195, - "grad_norm": 0.36328125, - "learning_rate": 6.61893468983371e-05, - "loss": 1.1167, - "step": 22615 - }, - { - "epoch": 0.6489463945031773, - "grad_norm": 0.400390625, - "learning_rate": 6.614222685645324e-05, - "loss": 0.9421, - "step": 22620 - }, - { - "epoch": 0.6490898397716351, - "grad_norm": 0.341796875, - "learning_rate": 6.609511530381505e-05, - "loss": 0.8798, - "step": 22625 - }, - { - "epoch": 0.6492332850400929, - "grad_norm": 0.3828125, - "learning_rate": 6.604801225223486e-05, - "loss": 0.9185, - "step": 22630 - }, - { - "epoch": 0.6493767303085508, - "grad_norm": 0.369140625, - "learning_rate": 6.600091771352291e-05, - "loss": 0.8959, - "step": 22635 - }, - { - "epoch": 0.6495201755770086, - "grad_norm": 0.361328125, - "learning_rate": 6.595383169948738e-05, - "loss": 0.881, - "step": 22640 - }, - { - "epoch": 0.6496636208454664, - "grad_norm": 0.40625, - "learning_rate": 6.590675422193426e-05, - "loss": 0.9906, - "step": 22645 - }, - { - "epoch": 0.6498070661139242, - "grad_norm": 0.369140625, - "learning_rate": 6.58596852926674e-05, - "loss": 0.842, - "step": 22650 - }, - { - "epoch": 0.649950511382382, - "grad_norm": 0.400390625, - "learning_rate": 6.581262492348846e-05, - "loss": 0.9095, - "step": 22655 - }, - { - "epoch": 0.6500939566508399, - "grad_norm": 0.33984375, - "learning_rate": 6.576557312619711e-05, - "loss": 0.9661, - "step": 22660 - }, - { - "epoch": 0.6502374019192977, - "grad_norm": 0.39453125, - "learning_rate": 6.571852991259069e-05, - "loss": 0.9971, - "step": 22665 - }, - { - "epoch": 0.6503808471877555, - "grad_norm": 0.416015625, - "learning_rate": 6.567149529446447e-05, - "loss": 1.0037, - "step": 22670 - }, - { - "epoch": 0.6505242924562133, - "grad_norm": 0.376953125, - "learning_rate": 6.562446928361161e-05, - "loss": 1.0768, - "step": 22675 - }, - { - "epoch": 0.6506677377246711, - "grad_norm": 0.365234375, - "learning_rate": 6.5577451891823e-05, - "loss": 0.8492, - "step": 22680 - }, - { - "epoch": 0.650811182993129, - "grad_norm": 0.365234375, - "learning_rate": 6.553044313088747e-05, - "loss": 0.9376, - "step": 22685 - }, - { - "epoch": 0.6509546282615868, - "grad_norm": 0.37109375, - "learning_rate": 6.548344301259161e-05, - "loss": 1.0024, - "step": 22690 - }, - { - "epoch": 0.6510980735300446, - "grad_norm": 0.404296875, - "learning_rate": 6.543645154871994e-05, - "loss": 0.9192, - "step": 22695 - }, - { - "epoch": 0.6512415187985025, - "grad_norm": 0.369140625, - "learning_rate": 6.53894687510547e-05, - "loss": 0.9516, - "step": 22700 - }, - { - "epoch": 0.6513849640669602, - "grad_norm": 0.3671875, - "learning_rate": 6.534249463137603e-05, - "loss": 0.8792, - "step": 22705 - }, - { - "epoch": 0.6515284093354181, - "grad_norm": 0.388671875, - "learning_rate": 6.52955292014618e-05, - "loss": 0.9887, - "step": 22710 - }, - { - "epoch": 0.6516718546038759, - "grad_norm": 0.3671875, - "learning_rate": 6.524857247308788e-05, - "loss": 0.9512, - "step": 22715 - }, - { - "epoch": 0.6518152998723337, - "grad_norm": 0.40625, - "learning_rate": 6.52016244580278e-05, - "loss": 0.9747, - "step": 22720 - }, - { - "epoch": 0.6519587451407916, - "grad_norm": 0.3828125, - "learning_rate": 6.51546851680529e-05, - "loss": 0.9309, - "step": 22725 - }, - { - "epoch": 0.6521021904092493, - "grad_norm": 0.373046875, - "learning_rate": 6.51077546149325e-05, - "loss": 1.0199, - "step": 22730 - }, - { - "epoch": 0.6522456356777072, - "grad_norm": 0.373046875, - "learning_rate": 6.506083281043356e-05, - "loss": 0.9959, - "step": 22735 - }, - { - "epoch": 0.652389080946165, - "grad_norm": 0.353515625, - "learning_rate": 6.50139197663209e-05, - "loss": 0.8657, - "step": 22740 - }, - { - "epoch": 0.6525325262146228, - "grad_norm": 0.392578125, - "learning_rate": 6.49670154943571e-05, - "loss": 0.9372, - "step": 22745 - }, - { - "epoch": 0.6526759714830807, - "grad_norm": 0.37890625, - "learning_rate": 6.492012000630269e-05, - "loss": 0.9763, - "step": 22750 - }, - { - "epoch": 0.6528194167515384, - "grad_norm": 0.423828125, - "learning_rate": 6.487323331391584e-05, - "loss": 0.9543, - "step": 22755 - }, - { - "epoch": 0.6529628620199963, - "grad_norm": 0.3828125, - "learning_rate": 6.482635542895255e-05, - "loss": 0.9459, - "step": 22760 - }, - { - "epoch": 0.6531063072884541, - "grad_norm": 0.443359375, - "learning_rate": 6.477948636316666e-05, - "loss": 1.1386, - "step": 22765 - }, - { - "epoch": 0.6532497525569119, - "grad_norm": 0.41796875, - "learning_rate": 6.473262612830977e-05, - "loss": 0.9466, - "step": 22770 - }, - { - "epoch": 0.6533931978253698, - "grad_norm": 0.3515625, - "learning_rate": 6.468577473613127e-05, - "loss": 0.9924, - "step": 22775 - }, - { - "epoch": 0.6535366430938275, - "grad_norm": 0.36328125, - "learning_rate": 6.46389321983783e-05, - "loss": 0.9351, - "step": 22780 - }, - { - "epoch": 0.6536800883622854, - "grad_norm": 0.412109375, - "learning_rate": 6.459209852679585e-05, - "loss": 0.9671, - "step": 22785 - }, - { - "epoch": 0.6538235336307432, - "grad_norm": 0.392578125, - "learning_rate": 6.45452737331266e-05, - "loss": 0.9708, - "step": 22790 - }, - { - "epoch": 0.653966978899201, - "grad_norm": 0.376953125, - "learning_rate": 6.449845782911105e-05, - "loss": 0.9734, - "step": 22795 - }, - { - "epoch": 0.6541104241676589, - "grad_norm": 0.36328125, - "learning_rate": 6.445165082648755e-05, - "loss": 0.8529, - "step": 22800 - }, - { - "epoch": 0.6542538694361166, - "grad_norm": 0.341796875, - "learning_rate": 6.440485273699206e-05, - "loss": 0.9502, - "step": 22805 - }, - { - "epoch": 0.6543973147045745, - "grad_norm": 0.380859375, - "learning_rate": 6.43580635723584e-05, - "loss": 0.9739, - "step": 22810 - }, - { - "epoch": 0.6545407599730323, - "grad_norm": 0.419921875, - "learning_rate": 6.431128334431813e-05, - "loss": 0.9035, - "step": 22815 - }, - { - "epoch": 0.6546842052414901, - "grad_norm": 0.376953125, - "learning_rate": 6.426451206460061e-05, - "loss": 1.0738, - "step": 22820 - }, - { - "epoch": 0.654827650509948, - "grad_norm": 0.44140625, - "learning_rate": 6.42177497449329e-05, - "loss": 0.9493, - "step": 22825 - }, - { - "epoch": 0.6549710957784057, - "grad_norm": 0.345703125, - "learning_rate": 6.417099639703979e-05, - "loss": 0.9641, - "step": 22830 - }, - { - "epoch": 0.6551145410468636, - "grad_norm": 0.396484375, - "learning_rate": 6.412425203264397e-05, - "loss": 0.9099, - "step": 22835 - }, - { - "epoch": 0.6552579863153214, - "grad_norm": 0.390625, - "learning_rate": 6.407751666346569e-05, - "loss": 1.0119, - "step": 22840 - }, - { - "epoch": 0.6554014315837792, - "grad_norm": 0.392578125, - "learning_rate": 6.403079030122307e-05, - "loss": 0.9712, - "step": 22845 - }, - { - "epoch": 0.6555448768522371, - "grad_norm": 0.357421875, - "learning_rate": 6.398407295763187e-05, - "loss": 0.8978, - "step": 22850 - }, - { - "epoch": 0.6556883221206948, - "grad_norm": 0.37109375, - "learning_rate": 6.393736464440572e-05, - "loss": 0.9431, - "step": 22855 - }, - { - "epoch": 0.6558317673891527, - "grad_norm": 0.396484375, - "learning_rate": 6.38906653732559e-05, - "loss": 1.0234, - "step": 22860 - }, - { - "epoch": 0.6559752126576105, - "grad_norm": 0.37890625, - "learning_rate": 6.384397515589141e-05, - "loss": 0.9684, - "step": 22865 - }, - { - "epoch": 0.6561186579260683, - "grad_norm": 0.34765625, - "learning_rate": 6.3797294004019e-05, - "loss": 0.9168, - "step": 22870 - }, - { - "epoch": 0.6562621031945262, - "grad_norm": 0.384765625, - "learning_rate": 6.375062192934321e-05, - "loss": 0.9159, - "step": 22875 - }, - { - "epoch": 0.6564055484629839, - "grad_norm": 0.462890625, - "learning_rate": 6.37039589435662e-05, - "loss": 0.9821, - "step": 22880 - }, - { - "epoch": 0.6565489937314418, - "grad_norm": 0.384765625, - "learning_rate": 6.365730505838786e-05, - "loss": 0.8642, - "step": 22885 - }, - { - "epoch": 0.6566924389998996, - "grad_norm": 0.3671875, - "learning_rate": 6.361066028550593e-05, - "loss": 0.9383, - "step": 22890 - }, - { - "epoch": 0.6568358842683574, - "grad_norm": 0.369140625, - "learning_rate": 6.356402463661575e-05, - "loss": 0.96, - "step": 22895 - }, - { - "epoch": 0.6569793295368153, - "grad_norm": 0.359375, - "learning_rate": 6.351739812341036e-05, - "loss": 0.9491, - "step": 22900 - }, - { - "epoch": 0.657122774805273, - "grad_norm": 0.37890625, - "learning_rate": 6.347078075758051e-05, - "loss": 0.8763, - "step": 22905 - }, - { - "epoch": 0.6572662200737309, - "grad_norm": 0.37109375, - "learning_rate": 6.342417255081479e-05, - "loss": 0.9424, - "step": 22910 - }, - { - "epoch": 0.6574096653421887, - "grad_norm": 0.38671875, - "learning_rate": 6.337757351479934e-05, - "loss": 0.8954, - "step": 22915 - }, - { - "epoch": 0.6575531106106465, - "grad_norm": 0.421875, - "learning_rate": 6.333098366121804e-05, - "loss": 0.9912, - "step": 22920 - }, - { - "epoch": 0.6576965558791044, - "grad_norm": 0.408203125, - "learning_rate": 6.32844030017525e-05, - "loss": 0.9676, - "step": 22925 - }, - { - "epoch": 0.6578400011475621, - "grad_norm": 0.345703125, - "learning_rate": 6.323783154808205e-05, - "loss": 0.9017, - "step": 22930 - }, - { - "epoch": 0.6579834464160199, - "grad_norm": 0.376953125, - "learning_rate": 6.319126931188361e-05, - "loss": 0.9466, - "step": 22935 - }, - { - "epoch": 0.6581268916844778, - "grad_norm": 0.3828125, - "learning_rate": 6.314471630483183e-05, - "loss": 1.0609, - "step": 22940 - }, - { - "epoch": 0.6582703369529356, - "grad_norm": 0.3828125, - "learning_rate": 6.309817253859913e-05, - "loss": 0.9525, - "step": 22945 - }, - { - "epoch": 0.6584137822213935, - "grad_norm": 0.408203125, - "learning_rate": 6.305163802485554e-05, - "loss": 0.9916, - "step": 22950 - }, - { - "epoch": 0.6585572274898512, - "grad_norm": 0.361328125, - "learning_rate": 6.300511277526872e-05, - "loss": 0.9641, - "step": 22955 - }, - { - "epoch": 0.658700672758309, - "grad_norm": 0.341796875, - "learning_rate": 6.29585968015041e-05, - "loss": 0.9641, - "step": 22960 - }, - { - "epoch": 0.6588441180267669, - "grad_norm": 0.40625, - "learning_rate": 6.291209011522478e-05, - "loss": 1.0502, - "step": 22965 - }, - { - "epoch": 0.6589875632952247, - "grad_norm": 0.38671875, - "learning_rate": 6.286559272809142e-05, - "loss": 1.0322, - "step": 22970 - }, - { - "epoch": 0.6591310085636826, - "grad_norm": 0.384765625, - "learning_rate": 6.281910465176249e-05, - "loss": 1.0051, - "step": 22975 - }, - { - "epoch": 0.6592744538321403, - "grad_norm": 0.375, - "learning_rate": 6.277262589789406e-05, - "loss": 0.9193, - "step": 22980 - }, - { - "epoch": 0.6594178991005981, - "grad_norm": 0.392578125, - "learning_rate": 6.272615647813985e-05, - "loss": 0.9072, - "step": 22985 - }, - { - "epoch": 0.659561344369056, - "grad_norm": 0.37890625, - "learning_rate": 6.267969640415124e-05, - "loss": 0.9301, - "step": 22990 - }, - { - "epoch": 0.6597047896375138, - "grad_norm": 0.375, - "learning_rate": 6.263324568757732e-05, - "loss": 0.9622, - "step": 22995 - }, - { - "epoch": 0.6598482349059717, - "grad_norm": 0.412109375, - "learning_rate": 6.258680434006478e-05, - "loss": 1.0831, - "step": 23000 - }, - { - "epoch": 0.6599916801744294, - "grad_norm": 0.400390625, - "learning_rate": 6.254037237325798e-05, - "loss": 1.019, - "step": 23005 - }, - { - "epoch": 0.6601351254428872, - "grad_norm": 0.388671875, - "learning_rate": 6.24939497987989e-05, - "loss": 1.0133, - "step": 23010 - }, - { - "epoch": 0.6602785707113451, - "grad_norm": 0.3984375, - "learning_rate": 6.244753662832723e-05, - "loss": 1.0319, - "step": 23015 - }, - { - "epoch": 0.6604220159798029, - "grad_norm": 0.416015625, - "learning_rate": 6.240113287348026e-05, - "loss": 0.8906, - "step": 23020 - }, - { - "epoch": 0.6605654612482608, - "grad_norm": 0.35546875, - "learning_rate": 6.235473854589289e-05, - "loss": 0.9166, - "step": 23025 - }, - { - "epoch": 0.6607089065167185, - "grad_norm": 0.3671875, - "learning_rate": 6.230835365719767e-05, - "loss": 0.9237, - "step": 23030 - }, - { - "epoch": 0.6608523517851763, - "grad_norm": 0.390625, - "learning_rate": 6.226197821902487e-05, - "loss": 0.8868, - "step": 23035 - }, - { - "epoch": 0.6609957970536342, - "grad_norm": 0.37109375, - "learning_rate": 6.22156122430023e-05, - "loss": 1.0295, - "step": 23040 - }, - { - "epoch": 0.661139242322092, - "grad_norm": 0.365234375, - "learning_rate": 6.216925574075536e-05, - "loss": 0.8574, - "step": 23045 - }, - { - "epoch": 0.6612826875905499, - "grad_norm": 0.388671875, - "learning_rate": 6.212290872390722e-05, - "loss": 1.011, - "step": 23050 - }, - { - "epoch": 0.6614261328590076, - "grad_norm": 0.3515625, - "learning_rate": 6.207657120407855e-05, - "loss": 0.8527, - "step": 23055 - }, - { - "epoch": 0.6615695781274654, - "grad_norm": 0.37109375, - "learning_rate": 6.203024319288762e-05, - "loss": 0.8245, - "step": 23060 - }, - { - "epoch": 0.6617130233959233, - "grad_norm": 0.392578125, - "learning_rate": 6.198392470195045e-05, - "loss": 0.9883, - "step": 23065 - }, - { - "epoch": 0.6618564686643811, - "grad_norm": 0.390625, - "learning_rate": 6.193761574288057e-05, - "loss": 0.9057, - "step": 23070 - }, - { - "epoch": 0.661999913932839, - "grad_norm": 0.4140625, - "learning_rate": 6.189131632728913e-05, - "loss": 0.9496, - "step": 23075 - }, - { - "epoch": 0.6621433592012967, - "grad_norm": 0.4453125, - "learning_rate": 6.184502646678486e-05, - "loss": 1.0464, - "step": 23080 - }, - { - "epoch": 0.6622868044697545, - "grad_norm": 0.40234375, - "learning_rate": 6.179874617297423e-05, - "loss": 1.0606, - "step": 23085 - }, - { - "epoch": 0.6624302497382124, - "grad_norm": 0.38671875, - "learning_rate": 6.175247545746116e-05, - "loss": 0.9674, - "step": 23090 - }, - { - "epoch": 0.6625736950066702, - "grad_norm": 0.376953125, - "learning_rate": 6.170621433184723e-05, - "loss": 0.8867, - "step": 23095 - }, - { - "epoch": 0.6627171402751281, - "grad_norm": 0.361328125, - "learning_rate": 6.165996280773157e-05, - "loss": 0.922, - "step": 23100 - }, - { - "epoch": 0.6628605855435858, - "grad_norm": 0.359375, - "learning_rate": 6.161372089671103e-05, - "loss": 0.9289, - "step": 23105 - }, - { - "epoch": 0.6630040308120436, - "grad_norm": 0.427734375, - "learning_rate": 6.156748861037991e-05, - "loss": 0.9726, - "step": 23110 - }, - { - "epoch": 0.6631474760805015, - "grad_norm": 0.482421875, - "learning_rate": 6.15212659603301e-05, - "loss": 1.0048, - "step": 23115 - }, - { - "epoch": 0.6632909213489593, - "grad_norm": 0.3671875, - "learning_rate": 6.147505295815124e-05, - "loss": 0.8558, - "step": 23120 - }, - { - "epoch": 0.6634343666174172, - "grad_norm": 0.369140625, - "learning_rate": 6.142884961543035e-05, - "loss": 0.9652, - "step": 23125 - }, - { - "epoch": 0.663577811885875, - "grad_norm": 0.388671875, - "learning_rate": 6.138265594375212e-05, - "loss": 1.0724, - "step": 23130 - }, - { - "epoch": 0.6637212571543327, - "grad_norm": 0.36328125, - "learning_rate": 6.133647195469882e-05, - "loss": 0.9174, - "step": 23135 - }, - { - "epoch": 0.6638647024227906, - "grad_norm": 0.390625, - "learning_rate": 6.129029765985028e-05, - "loss": 0.9431, - "step": 23140 - }, - { - "epoch": 0.6640081476912484, - "grad_norm": 0.5625, - "learning_rate": 6.12441330707839e-05, - "loss": 0.9224, - "step": 23145 - }, - { - "epoch": 0.6641515929597063, - "grad_norm": 0.365234375, - "learning_rate": 6.119797819907463e-05, - "loss": 0.9473, - "step": 23150 - }, - { - "epoch": 0.664295038228164, - "grad_norm": 0.349609375, - "learning_rate": 6.115183305629499e-05, - "loss": 0.9575, - "step": 23155 - }, - { - "epoch": 0.6644384834966218, - "grad_norm": 0.419921875, - "learning_rate": 6.110569765401513e-05, - "loss": 1.009, - "step": 23160 - }, - { - "epoch": 0.6645819287650797, - "grad_norm": 0.384765625, - "learning_rate": 6.105957200380264e-05, - "loss": 0.9102, - "step": 23165 - }, - { - "epoch": 0.6647253740335375, - "grad_norm": 0.390625, - "learning_rate": 6.1013456117222686e-05, - "loss": 0.9427, - "step": 23170 - }, - { - "epoch": 0.6648688193019954, - "grad_norm": 0.412109375, - "learning_rate": 6.096735000583813e-05, - "loss": 0.8871, - "step": 23175 - }, - { - "epoch": 0.6650122645704531, - "grad_norm": 0.419921875, - "learning_rate": 6.092125368120921e-05, - "loss": 0.896, - "step": 23180 - }, - { - "epoch": 0.6651557098389109, - "grad_norm": 0.400390625, - "learning_rate": 6.0875167154893784e-05, - "loss": 0.9034, - "step": 23185 - }, - { - "epoch": 0.6652991551073688, - "grad_norm": 0.423828125, - "learning_rate": 6.082909043844719e-05, - "loss": 0.9338, - "step": 23190 - }, - { - "epoch": 0.6654426003758266, - "grad_norm": 0.396484375, - "learning_rate": 6.078302354342246e-05, - "loss": 0.9292, - "step": 23195 - }, - { - "epoch": 0.6655860456442845, - "grad_norm": 0.36328125, - "learning_rate": 6.073696648137001e-05, - "loss": 1.0006, - "step": 23200 - }, - { - "epoch": 0.6657294909127422, - "grad_norm": 0.3671875, - "learning_rate": 6.069091926383781e-05, - "loss": 0.8283, - "step": 23205 - }, - { - "epoch": 0.6658729361812, - "grad_norm": 0.412109375, - "learning_rate": 6.0644881902371474e-05, - "loss": 1.0487, - "step": 23210 - }, - { - "epoch": 0.6660163814496579, - "grad_norm": 0.41015625, - "learning_rate": 6.059885440851403e-05, - "loss": 1.0419, - "step": 23215 - }, - { - "epoch": 0.6661598267181157, - "grad_norm": 0.396484375, - "learning_rate": 6.055283679380605e-05, - "loss": 0.8625, - "step": 23220 - }, - { - "epoch": 0.6663032719865736, - "grad_norm": 0.421875, - "learning_rate": 6.050682906978566e-05, - "loss": 0.993, - "step": 23225 - }, - { - "epoch": 0.6664467172550314, - "grad_norm": 0.41015625, - "learning_rate": 6.046083124798851e-05, - "loss": 1.0234, - "step": 23230 - }, - { - "epoch": 0.6665901625234891, - "grad_norm": 0.474609375, - "learning_rate": 6.0414843339947736e-05, - "loss": 0.8836, - "step": 23235 - }, - { - "epoch": 0.666733607791947, - "grad_norm": 0.4296875, - "learning_rate": 6.036886535719399e-05, - "loss": 0.9429, - "step": 23240 - }, - { - "epoch": 0.6668770530604048, - "grad_norm": 0.416015625, - "learning_rate": 6.032289731125549e-05, - "loss": 0.8821, - "step": 23245 - }, - { - "epoch": 0.6670204983288627, - "grad_norm": 0.396484375, - "learning_rate": 6.027693921365789e-05, - "loss": 1.0524, - "step": 23250 - }, - { - "epoch": 0.6671639435973205, - "grad_norm": 0.400390625, - "learning_rate": 6.02309910759244e-05, - "loss": 0.9345, - "step": 23255 - }, - { - "epoch": 0.6673073888657782, - "grad_norm": 0.384765625, - "learning_rate": 6.018505290957565e-05, - "loss": 0.9916, - "step": 23260 - }, - { - "epoch": 0.6674508341342361, - "grad_norm": 0.40234375, - "learning_rate": 6.0139124726129925e-05, - "loss": 1.0099, - "step": 23265 - }, - { - "epoch": 0.6675942794026939, - "grad_norm": 0.37109375, - "learning_rate": 6.0093206537102866e-05, - "loss": 0.9695, - "step": 23270 - }, - { - "epoch": 0.6677377246711517, - "grad_norm": 0.38671875, - "learning_rate": 6.0047298354007633e-05, - "loss": 1.0668, - "step": 23275 - }, - { - "epoch": 0.6678811699396096, - "grad_norm": 0.36328125, - "learning_rate": 6.000140018835497e-05, - "loss": 0.9279, - "step": 23280 - }, - { - "epoch": 0.6680246152080673, - "grad_norm": 0.39453125, - "learning_rate": 5.995551205165301e-05, - "loss": 0.9011, - "step": 23285 - }, - { - "epoch": 0.6681680604765252, - "grad_norm": 0.439453125, - "learning_rate": 5.990963395540739e-05, - "loss": 1.0166, - "step": 23290 - }, - { - "epoch": 0.668311505744983, - "grad_norm": 0.380859375, - "learning_rate": 5.986376591112121e-05, - "loss": 0.9832, - "step": 23295 - }, - { - "epoch": 0.6684549510134408, - "grad_norm": 0.361328125, - "learning_rate": 5.9817907930295155e-05, - "loss": 0.8996, - "step": 23300 - }, - { - "epoch": 0.6685983962818987, - "grad_norm": 0.365234375, - "learning_rate": 5.977206002442728e-05, - "loss": 0.9178, - "step": 23305 - }, - { - "epoch": 0.6687418415503564, - "grad_norm": 0.400390625, - "learning_rate": 5.972622220501315e-05, - "loss": 0.9299, - "step": 23310 - }, - { - "epoch": 0.6688852868188143, - "grad_norm": 0.35546875, - "learning_rate": 5.968039448354576e-05, - "loss": 0.8585, - "step": 23315 - }, - { - "epoch": 0.6690287320872721, - "grad_norm": 0.400390625, - "learning_rate": 5.9634576871515656e-05, - "loss": 0.9327, - "step": 23320 - }, - { - "epoch": 0.6691721773557299, - "grad_norm": 0.365234375, - "learning_rate": 5.9588769380410814e-05, - "loss": 0.8742, - "step": 23325 - }, - { - "epoch": 0.6693156226241878, - "grad_norm": 0.40234375, - "learning_rate": 5.9542972021716616e-05, - "loss": 0.9547, - "step": 23330 - }, - { - "epoch": 0.6694590678926455, - "grad_norm": 0.419921875, - "learning_rate": 5.9497184806915996e-05, - "loss": 0.9578, - "step": 23335 - }, - { - "epoch": 0.6696025131611034, - "grad_norm": 0.423828125, - "learning_rate": 5.945140774748929e-05, - "loss": 0.9751, - "step": 23340 - }, - { - "epoch": 0.6697459584295612, - "grad_norm": 0.376953125, - "learning_rate": 5.9405640854914266e-05, - "loss": 0.964, - "step": 23345 - }, - { - "epoch": 0.669889403698019, - "grad_norm": 0.416015625, - "learning_rate": 5.935988414066617e-05, - "loss": 1.005, - "step": 23350 - }, - { - "epoch": 0.6700328489664769, - "grad_norm": 0.39453125, - "learning_rate": 5.9314137616217746e-05, - "loss": 1.0371, - "step": 23355 - }, - { - "epoch": 0.6701762942349346, - "grad_norm": 0.369140625, - "learning_rate": 5.9268401293039125e-05, - "loss": 0.9873, - "step": 23360 - }, - { - "epoch": 0.6703197395033925, - "grad_norm": 0.423828125, - "learning_rate": 5.922267518259783e-05, - "loss": 0.8916, - "step": 23365 - }, - { - "epoch": 0.6704631847718503, - "grad_norm": 0.359375, - "learning_rate": 5.917695929635898e-05, - "loss": 0.8441, - "step": 23370 - }, - { - "epoch": 0.6706066300403081, - "grad_norm": 0.361328125, - "learning_rate": 5.913125364578498e-05, - "loss": 0.8323, - "step": 23375 - }, - { - "epoch": 0.670750075308766, - "grad_norm": 0.392578125, - "learning_rate": 5.908555824233575e-05, - "loss": 1.0542, - "step": 23380 - }, - { - "epoch": 0.6708935205772237, - "grad_norm": 0.37890625, - "learning_rate": 5.9039873097468545e-05, - "loss": 0.957, - "step": 23385 - }, - { - "epoch": 0.6710369658456816, - "grad_norm": 0.412109375, - "learning_rate": 5.899419822263822e-05, - "loss": 0.9096, - "step": 23390 - }, - { - "epoch": 0.6711804111141394, - "grad_norm": 0.353515625, - "learning_rate": 5.89485336292969e-05, - "loss": 0.8504, - "step": 23395 - }, - { - "epoch": 0.6713238563825972, - "grad_norm": 0.36328125, - "learning_rate": 5.8902879328894156e-05, - "loss": 0.9493, - "step": 23400 - }, - { - "epoch": 0.6714673016510551, - "grad_norm": 0.376953125, - "learning_rate": 5.885723533287708e-05, - "loss": 0.9323, - "step": 23405 - }, - { - "epoch": 0.6716107469195128, - "grad_norm": 0.376953125, - "learning_rate": 5.881160165269004e-05, - "loss": 1.033, - "step": 23410 - }, - { - "epoch": 0.6717541921879707, - "grad_norm": 0.388671875, - "learning_rate": 5.876597829977493e-05, - "loss": 0.8666, - "step": 23415 - }, - { - "epoch": 0.6718976374564285, - "grad_norm": 0.447265625, - "learning_rate": 5.872036528557096e-05, - "loss": 1.0794, - "step": 23420 - }, - { - "epoch": 0.6720410827248863, - "grad_norm": 0.375, - "learning_rate": 5.8674762621514855e-05, - "loss": 0.8966, - "step": 23425 - }, - { - "epoch": 0.6721845279933442, - "grad_norm": 0.400390625, - "learning_rate": 5.862917031904066e-05, - "loss": 1.0541, - "step": 23430 - }, - { - "epoch": 0.6723279732618019, - "grad_norm": 0.353515625, - "learning_rate": 5.85835883895798e-05, - "loss": 0.9664, - "step": 23435 - }, - { - "epoch": 0.6724714185302598, - "grad_norm": 0.3828125, - "learning_rate": 5.853801684456126e-05, - "loss": 0.9143, - "step": 23440 - }, - { - "epoch": 0.6726148637987176, - "grad_norm": 0.40625, - "learning_rate": 5.849245569541122e-05, - "loss": 0.9937, - "step": 23445 - }, - { - "epoch": 0.6727583090671754, - "grad_norm": 0.404296875, - "learning_rate": 5.844690495355338e-05, - "loss": 0.961, - "step": 23450 - }, - { - "epoch": 0.6729017543356333, - "grad_norm": 0.390625, - "learning_rate": 5.840136463040875e-05, - "loss": 0.8936, - "step": 23455 - }, - { - "epoch": 0.673045199604091, - "grad_norm": 0.388671875, - "learning_rate": 5.8355834737395856e-05, - "loss": 0.8974, - "step": 23460 - }, - { - "epoch": 0.6731886448725489, - "grad_norm": 0.37109375, - "learning_rate": 5.831031528593045e-05, - "loss": 0.8636, - "step": 23465 - }, - { - "epoch": 0.6733320901410067, - "grad_norm": 0.36328125, - "learning_rate": 5.8264806287425724e-05, - "loss": 1.0148, - "step": 23470 - }, - { - "epoch": 0.6734755354094645, - "grad_norm": 0.416015625, - "learning_rate": 5.821930775329234e-05, - "loss": 1.0475, - "step": 23475 - }, - { - "epoch": 0.6736189806779224, - "grad_norm": 0.396484375, - "learning_rate": 5.817381969493823e-05, - "loss": 0.8733, - "step": 23480 - }, - { - "epoch": 0.6737624259463801, - "grad_norm": 0.384765625, - "learning_rate": 5.812834212376872e-05, - "loss": 1.0064, - "step": 23485 - }, - { - "epoch": 0.673905871214838, - "grad_norm": 0.384765625, - "learning_rate": 5.808287505118647e-05, - "loss": 1.0, - "step": 23490 - }, - { - "epoch": 0.6740493164832958, - "grad_norm": 0.412109375, - "learning_rate": 5.8037418488591654e-05, - "loss": 0.9404, - "step": 23495 - }, - { - "epoch": 0.6741927617517536, - "grad_norm": 0.353515625, - "learning_rate": 5.799197244738166e-05, - "loss": 1.0065, - "step": 23500 - }, - { - "epoch": 0.6743362070202115, - "grad_norm": 0.419921875, - "learning_rate": 5.794653693895131e-05, - "loss": 0.9104, - "step": 23505 - }, - { - "epoch": 0.6744796522886692, - "grad_norm": 0.390625, - "learning_rate": 5.790111197469269e-05, - "loss": 0.9254, - "step": 23510 - }, - { - "epoch": 0.6746230975571271, - "grad_norm": 0.3984375, - "learning_rate": 5.7855697565995426e-05, - "loss": 1.1375, - "step": 23515 - }, - { - "epoch": 0.6747665428255849, - "grad_norm": 0.4140625, - "learning_rate": 5.781029372424633e-05, - "loss": 1.0396, - "step": 23520 - }, - { - "epoch": 0.6749099880940427, - "grad_norm": 0.37890625, - "learning_rate": 5.776490046082958e-05, - "loss": 0.9411, - "step": 23525 - }, - { - "epoch": 0.6750534333625006, - "grad_norm": 0.400390625, - "learning_rate": 5.7719517787126856e-05, - "loss": 0.9081, - "step": 23530 - }, - { - "epoch": 0.6751968786309583, - "grad_norm": 0.3828125, - "learning_rate": 5.7674145714516994e-05, - "loss": 0.8378, - "step": 23535 - }, - { - "epoch": 0.6753403238994162, - "grad_norm": 0.376953125, - "learning_rate": 5.762878425437627e-05, - "loss": 0.8581, - "step": 23540 - }, - { - "epoch": 0.675483769167874, - "grad_norm": 0.439453125, - "learning_rate": 5.758343341807822e-05, - "loss": 1.0462, - "step": 23545 - }, - { - "epoch": 0.6756272144363318, - "grad_norm": 0.37109375, - "learning_rate": 5.753809321699388e-05, - "loss": 0.8654, - "step": 23550 - }, - { - "epoch": 0.6757706597047897, - "grad_norm": 0.423828125, - "learning_rate": 5.749276366249147e-05, - "loss": 0.921, - "step": 23555 - }, - { - "epoch": 0.6759141049732474, - "grad_norm": 0.40234375, - "learning_rate": 5.744744476593652e-05, - "loss": 0.9704, - "step": 23560 - }, - { - "epoch": 0.6760575502417053, - "grad_norm": 0.36328125, - "learning_rate": 5.740213653869205e-05, - "loss": 1.0632, - "step": 23565 - }, - { - "epoch": 0.6762009955101631, - "grad_norm": 0.37890625, - "learning_rate": 5.7356838992118277e-05, - "loss": 0.9586, - "step": 23570 - }, - { - "epoch": 0.6763444407786209, - "grad_norm": 0.42578125, - "learning_rate": 5.7311552137572744e-05, - "loss": 0.9514, - "step": 23575 - }, - { - "epoch": 0.6764878860470788, - "grad_norm": 0.439453125, - "learning_rate": 5.7266275986410324e-05, - "loss": 1.1027, - "step": 23580 - }, - { - "epoch": 0.6766313313155365, - "grad_norm": 0.51171875, - "learning_rate": 5.722101054998328e-05, - "loss": 1.0703, - "step": 23585 - }, - { - "epoch": 0.6767747765839944, - "grad_norm": 0.390625, - "learning_rate": 5.717575583964111e-05, - "loss": 0.8694, - "step": 23590 - }, - { - "epoch": 0.6769182218524522, - "grad_norm": 0.435546875, - "learning_rate": 5.713051186673063e-05, - "loss": 0.948, - "step": 23595 - }, - { - "epoch": 0.67706166712091, - "grad_norm": 0.40625, - "learning_rate": 5.708527864259594e-05, - "loss": 0.8928, - "step": 23600 - }, - { - "epoch": 0.6772051123893679, - "grad_norm": 0.35546875, - "learning_rate": 5.704005617857857e-05, - "loss": 0.8962, - "step": 23605 - }, - { - "epoch": 0.6773485576578256, - "grad_norm": 0.44140625, - "learning_rate": 5.6994844486017204e-05, - "loss": 0.8506, - "step": 23610 - }, - { - "epoch": 0.6774920029262835, - "grad_norm": 0.3828125, - "learning_rate": 5.6949643576247856e-05, - "loss": 0.977, - "step": 23615 - }, - { - "epoch": 0.6776354481947413, - "grad_norm": 0.40234375, - "learning_rate": 5.6904453460603955e-05, - "loss": 1.0604, - "step": 23620 - }, - { - "epoch": 0.6777788934631991, - "grad_norm": 0.337890625, - "learning_rate": 5.685927415041607e-05, - "loss": 0.9164, - "step": 23625 - }, - { - "epoch": 0.677922338731657, - "grad_norm": 0.421875, - "learning_rate": 5.681410565701215e-05, - "loss": 0.9944, - "step": 23630 - }, - { - "epoch": 0.6780657840001147, - "grad_norm": 0.40625, - "learning_rate": 5.676894799171739e-05, - "loss": 0.9244, - "step": 23635 - }, - { - "epoch": 0.6782092292685725, - "grad_norm": 0.4375, - "learning_rate": 5.672380116585425e-05, - "loss": 0.9284, - "step": 23640 - }, - { - "epoch": 0.6783526745370304, - "grad_norm": 0.388671875, - "learning_rate": 5.6678665190742585e-05, - "loss": 0.9921, - "step": 23645 - }, - { - "epoch": 0.6784961198054882, - "grad_norm": 0.400390625, - "learning_rate": 5.663354007769943e-05, - "loss": 0.8695, - "step": 23650 - }, - { - "epoch": 0.6786395650739461, - "grad_norm": 0.345703125, - "learning_rate": 5.6588425838039094e-05, - "loss": 1.0013, - "step": 23655 - }, - { - "epoch": 0.6787830103424038, - "grad_norm": 0.33984375, - "learning_rate": 5.654332248307319e-05, - "loss": 1.039, - "step": 23660 - }, - { - "epoch": 0.6789264556108616, - "grad_norm": 0.384765625, - "learning_rate": 5.649823002411062e-05, - "loss": 0.96, - "step": 23665 - }, - { - "epoch": 0.6790699008793195, - "grad_norm": 0.40234375, - "learning_rate": 5.6453148472457476e-05, - "loss": 0.8995, - "step": 23670 - }, - { - "epoch": 0.6792133461477773, - "grad_norm": 0.3828125, - "learning_rate": 5.640807783941724e-05, - "loss": 0.9151, - "step": 23675 - }, - { - "epoch": 0.6793567914162352, - "grad_norm": 0.40234375, - "learning_rate": 5.636301813629057e-05, - "loss": 0.9534, - "step": 23680 - }, - { - "epoch": 0.679500236684693, - "grad_norm": 0.375, - "learning_rate": 5.631796937437532e-05, - "loss": 0.9519, - "step": 23685 - }, - { - "epoch": 0.6796436819531507, - "grad_norm": 0.365234375, - "learning_rate": 5.62729315649668e-05, - "loss": 0.9146, - "step": 23690 - }, - { - "epoch": 0.6797871272216086, - "grad_norm": 0.408203125, - "learning_rate": 5.6227904719357416e-05, - "loss": 0.9059, - "step": 23695 - }, - { - "epoch": 0.6799305724900664, - "grad_norm": 0.375, - "learning_rate": 5.618288884883684e-05, - "loss": 0.8699, - "step": 23700 - }, - { - "epoch": 0.6800740177585243, - "grad_norm": 0.376953125, - "learning_rate": 5.613788396469197e-05, - "loss": 0.9399, - "step": 23705 - }, - { - "epoch": 0.680217463026982, - "grad_norm": 0.390625, - "learning_rate": 5.6092890078207107e-05, - "loss": 0.8826, - "step": 23710 - }, - { - "epoch": 0.6803609082954398, - "grad_norm": 0.361328125, - "learning_rate": 5.604790720066362e-05, - "loss": 0.892, - "step": 23715 - }, - { - "epoch": 0.6805043535638977, - "grad_norm": 0.466796875, - "learning_rate": 5.600293534334014e-05, - "loss": 0.8876, - "step": 23720 - }, - { - "epoch": 0.6806477988323555, - "grad_norm": 0.48828125, - "learning_rate": 5.595797451751266e-05, - "loss": 0.9494, - "step": 23725 - }, - { - "epoch": 0.6807912441008134, - "grad_norm": 0.38671875, - "learning_rate": 5.591302473445429e-05, - "loss": 0.9173, - "step": 23730 - }, - { - "epoch": 0.6809346893692712, - "grad_norm": 0.388671875, - "learning_rate": 5.586808600543539e-05, - "loss": 0.9898, - "step": 23735 - }, - { - "epoch": 0.6810781346377289, - "grad_norm": 0.40234375, - "learning_rate": 5.582315834172353e-05, - "loss": 0.938, - "step": 23740 - }, - { - "epoch": 0.6812215799061868, - "grad_norm": 0.361328125, - "learning_rate": 5.5778241754583624e-05, - "loss": 1.0204, - "step": 23745 - }, - { - "epoch": 0.6813650251746446, - "grad_norm": 0.404296875, - "learning_rate": 5.573333625527767e-05, - "loss": 0.9076, - "step": 23750 - }, - { - "epoch": 0.6815084704431025, - "grad_norm": 0.373046875, - "learning_rate": 5.5688441855064946e-05, - "loss": 1.0305, - "step": 23755 - }, - { - "epoch": 0.6816519157115603, - "grad_norm": 0.44921875, - "learning_rate": 5.564355856520189e-05, - "loss": 0.9601, - "step": 23760 - }, - { - "epoch": 0.681795360980018, - "grad_norm": 0.37109375, - "learning_rate": 5.559868639694228e-05, - "loss": 0.8571, - "step": 23765 - }, - { - "epoch": 0.6819388062484759, - "grad_norm": 0.375, - "learning_rate": 5.555382536153702e-05, - "loss": 0.958, - "step": 23770 - }, - { - "epoch": 0.6820822515169337, - "grad_norm": 0.4296875, - "learning_rate": 5.550897547023415e-05, - "loss": 1.0083, - "step": 23775 - }, - { - "epoch": 0.6822256967853916, - "grad_norm": 0.375, - "learning_rate": 5.5464136734279094e-05, - "loss": 0.9138, - "step": 23780 - }, - { - "epoch": 0.6823691420538494, - "grad_norm": 0.3828125, - "learning_rate": 5.541930916491436e-05, - "loss": 1.0111, - "step": 23785 - }, - { - "epoch": 0.6825125873223071, - "grad_norm": 0.37890625, - "learning_rate": 5.537449277337965e-05, - "loss": 0.9341, - "step": 23790 - }, - { - "epoch": 0.682656032590765, - "grad_norm": 0.3828125, - "learning_rate": 5.532968757091187e-05, - "loss": 0.9096, - "step": 23795 - }, - { - "epoch": 0.6827994778592228, - "grad_norm": 0.365234375, - "learning_rate": 5.528489356874522e-05, - "loss": 0.9062, - "step": 23800 - }, - { - "epoch": 0.6829429231276807, - "grad_norm": 0.416015625, - "learning_rate": 5.5240110778110975e-05, - "loss": 0.9933, - "step": 23805 - }, - { - "epoch": 0.6830863683961385, - "grad_norm": 0.3515625, - "learning_rate": 5.5195339210237626e-05, - "loss": 0.9589, - "step": 23810 - }, - { - "epoch": 0.6832298136645962, - "grad_norm": 0.4140625, - "learning_rate": 5.515057887635083e-05, - "loss": 1.0148, - "step": 23815 - }, - { - "epoch": 0.6833732589330541, - "grad_norm": 0.380859375, - "learning_rate": 5.510582978767356e-05, - "loss": 0.9787, - "step": 23820 - }, - { - "epoch": 0.6835167042015119, - "grad_norm": 0.443359375, - "learning_rate": 5.506109195542579e-05, - "loss": 0.9726, - "step": 23825 - }, - { - "epoch": 0.6836601494699698, - "grad_norm": 0.42578125, - "learning_rate": 5.501636539082478e-05, - "loss": 0.9753, - "step": 23830 - }, - { - "epoch": 0.6838035947384276, - "grad_norm": 0.3671875, - "learning_rate": 5.497165010508492e-05, - "loss": 0.9076, - "step": 23835 - }, - { - "epoch": 0.6839470400068853, - "grad_norm": 0.345703125, - "learning_rate": 5.4926946109417775e-05, - "loss": 0.9912, - "step": 23840 - }, - { - "epoch": 0.6840904852753432, - "grad_norm": 0.380859375, - "learning_rate": 5.488225341503209e-05, - "loss": 1.0526, - "step": 23845 - }, - { - "epoch": 0.684233930543801, - "grad_norm": 0.36328125, - "learning_rate": 5.483757203313383e-05, - "loss": 1.0549, - "step": 23850 - }, - { - "epoch": 0.6843773758122589, - "grad_norm": 0.369140625, - "learning_rate": 5.479290197492605e-05, - "loss": 0.9556, - "step": 23855 - }, - { - "epoch": 0.6845208210807167, - "grad_norm": 0.396484375, - "learning_rate": 5.4748243251608965e-05, - "loss": 1.0075, - "step": 23860 - }, - { - "epoch": 0.6846642663491744, - "grad_norm": 0.375, - "learning_rate": 5.4703595874379943e-05, - "loss": 1.0199, - "step": 23865 - }, - { - "epoch": 0.6848077116176323, - "grad_norm": 0.435546875, - "learning_rate": 5.465895985443361e-05, - "loss": 0.9648, - "step": 23870 - }, - { - "epoch": 0.6849511568860901, - "grad_norm": 0.37890625, - "learning_rate": 5.461433520296164e-05, - "loss": 0.9248, - "step": 23875 - }, - { - "epoch": 0.685094602154548, - "grad_norm": 0.388671875, - "learning_rate": 5.4569721931152864e-05, - "loss": 0.8407, - "step": 23880 - }, - { - "epoch": 0.6852380474230058, - "grad_norm": 0.375, - "learning_rate": 5.4525120050193254e-05, - "loss": 1.0521, - "step": 23885 - }, - { - "epoch": 0.6853814926914635, - "grad_norm": 0.353515625, - "learning_rate": 5.448052957126606e-05, - "loss": 0.9631, - "step": 23890 - }, - { - "epoch": 0.6855249379599214, - "grad_norm": 0.365234375, - "learning_rate": 5.443595050555147e-05, - "loss": 0.9422, - "step": 23895 - }, - { - "epoch": 0.6856683832283792, - "grad_norm": 0.412109375, - "learning_rate": 5.4391382864226916e-05, - "loss": 1.059, - "step": 23900 - }, - { - "epoch": 0.6858118284968371, - "grad_norm": 0.404296875, - "learning_rate": 5.4346826658467e-05, - "loss": 0.9568, - "step": 23905 - }, - { - "epoch": 0.6859552737652949, - "grad_norm": 0.365234375, - "learning_rate": 5.4302281899443394e-05, - "loss": 0.9542, - "step": 23910 - }, - { - "epoch": 0.6860987190337526, - "grad_norm": 0.41015625, - "learning_rate": 5.425774859832491e-05, - "loss": 0.9718, - "step": 23915 - }, - { - "epoch": 0.6862421643022105, - "grad_norm": 0.41796875, - "learning_rate": 5.421322676627747e-05, - "loss": 0.9313, - "step": 23920 - }, - { - "epoch": 0.6863856095706683, - "grad_norm": 0.45703125, - "learning_rate": 5.4168716414464204e-05, - "loss": 1.0362, - "step": 23925 - }, - { - "epoch": 0.6865290548391262, - "grad_norm": 0.38671875, - "learning_rate": 5.412421755404529e-05, - "loss": 1.0307, - "step": 23930 - }, - { - "epoch": 0.686672500107584, - "grad_norm": 0.408203125, - "learning_rate": 5.407973019617798e-05, - "loss": 0.9777, - "step": 23935 - }, - { - "epoch": 0.6868159453760417, - "grad_norm": 0.376953125, - "learning_rate": 5.40352543520168e-05, - "loss": 0.9424, - "step": 23940 - }, - { - "epoch": 0.6869593906444996, - "grad_norm": 0.349609375, - "learning_rate": 5.399079003271322e-05, - "loss": 1.0117, - "step": 23945 - }, - { - "epoch": 0.6871028359129574, - "grad_norm": 0.33984375, - "learning_rate": 5.3946337249415936e-05, - "loss": 0.9896, - "step": 23950 - }, - { - "epoch": 0.6872462811814153, - "grad_norm": 0.37890625, - "learning_rate": 5.390189601327062e-05, - "loss": 0.9094, - "step": 23955 - }, - { - "epoch": 0.6873897264498731, - "grad_norm": 0.427734375, - "learning_rate": 5.385746633542027e-05, - "loss": 1.0342, - "step": 23960 - }, - { - "epoch": 0.6875331717183308, - "grad_norm": 0.39453125, - "learning_rate": 5.381304822700477e-05, - "loss": 1.0173, - "step": 23965 - }, - { - "epoch": 0.6876766169867887, - "grad_norm": 0.404296875, - "learning_rate": 5.376864169916116e-05, - "loss": 0.9602, - "step": 23970 - }, - { - "epoch": 0.6878200622552465, - "grad_norm": 0.380859375, - "learning_rate": 5.372424676302368e-05, - "loss": 1.0182, - "step": 23975 - }, - { - "epoch": 0.6879635075237043, - "grad_norm": 0.37890625, - "learning_rate": 5.367986342972355e-05, - "loss": 0.9877, - "step": 23980 - }, - { - "epoch": 0.6881069527921622, - "grad_norm": 0.365234375, - "learning_rate": 5.3635491710389105e-05, - "loss": 1.0052, - "step": 23985 - }, - { - "epoch": 0.6882503980606199, - "grad_norm": 0.400390625, - "learning_rate": 5.359113161614576e-05, - "loss": 0.8574, - "step": 23990 - }, - { - "epoch": 0.6883938433290778, - "grad_norm": 0.375, - "learning_rate": 5.35467831581161e-05, - "loss": 0.921, - "step": 23995 - }, - { - "epoch": 0.6885372885975356, - "grad_norm": 0.43359375, - "learning_rate": 5.3502446347419674e-05, - "loss": 0.9464, - "step": 24000 - }, - { - "epoch": 0.6886807338659934, - "grad_norm": 0.3828125, - "learning_rate": 5.345812119517317e-05, - "loss": 0.9223, - "step": 24005 - }, - { - "epoch": 0.6888241791344513, - "grad_norm": 0.404296875, - "learning_rate": 5.341380771249037e-05, - "loss": 0.9104, - "step": 24010 - }, - { - "epoch": 0.688967624402909, - "grad_norm": 0.380859375, - "learning_rate": 5.336950591048209e-05, - "loss": 0.9722, - "step": 24015 - }, - { - "epoch": 0.6891110696713669, - "grad_norm": 0.3828125, - "learning_rate": 5.332521580025622e-05, - "loss": 1.074, - "step": 24020 - }, - { - "epoch": 0.6892545149398247, - "grad_norm": 0.404296875, - "learning_rate": 5.328093739291771e-05, - "loss": 0.9374, - "step": 24025 - }, - { - "epoch": 0.6893979602082825, - "grad_norm": 0.384765625, - "learning_rate": 5.323667069956868e-05, - "loss": 0.9849, - "step": 24030 - }, - { - "epoch": 0.6895414054767404, - "grad_norm": 0.400390625, - "learning_rate": 5.319241573130818e-05, - "loss": 0.9149, - "step": 24035 - }, - { - "epoch": 0.6896848507451981, - "grad_norm": 0.404296875, - "learning_rate": 5.314817249923236e-05, - "loss": 1.0026, - "step": 24040 - }, - { - "epoch": 0.689828296013656, - "grad_norm": 0.40234375, - "learning_rate": 5.3103941014434435e-05, - "loss": 0.9814, - "step": 24045 - }, - { - "epoch": 0.6899717412821138, - "grad_norm": 0.375, - "learning_rate": 5.3059721288004714e-05, - "loss": 0.9141, - "step": 24050 - }, - { - "epoch": 0.6901151865505716, - "grad_norm": 0.388671875, - "learning_rate": 5.301551333103051e-05, - "loss": 0.902, - "step": 24055 - }, - { - "epoch": 0.6902586318190295, - "grad_norm": 0.376953125, - "learning_rate": 5.297131715459614e-05, - "loss": 1.034, - "step": 24060 - }, - { - "epoch": 0.6904020770874872, - "grad_norm": 0.392578125, - "learning_rate": 5.292713276978311e-05, - "loss": 1.1449, - "step": 24065 - }, - { - "epoch": 0.6905455223559451, - "grad_norm": 0.3515625, - "learning_rate": 5.288296018766987e-05, - "loss": 0.8987, - "step": 24070 - }, - { - "epoch": 0.6906889676244029, - "grad_norm": 0.4140625, - "learning_rate": 5.2838799419331884e-05, - "loss": 0.971, - "step": 24075 - }, - { - "epoch": 0.6908324128928607, - "grad_norm": 0.392578125, - "learning_rate": 5.2794650475841664e-05, - "loss": 1.0344, - "step": 24080 - }, - { - "epoch": 0.6909758581613186, - "grad_norm": 0.357421875, - "learning_rate": 5.275051336826888e-05, - "loss": 0.8089, - "step": 24085 - }, - { - "epoch": 0.6911193034297763, - "grad_norm": 0.36328125, - "learning_rate": 5.2706388107680095e-05, - "loss": 0.9508, - "step": 24090 - }, - { - "epoch": 0.6912627486982342, - "grad_norm": 0.4140625, - "learning_rate": 5.266227470513892e-05, - "loss": 0.9685, - "step": 24095 - }, - { - "epoch": 0.691406193966692, - "grad_norm": 0.380859375, - "learning_rate": 5.2618173171706064e-05, - "loss": 0.9502, - "step": 24100 - }, - { - "epoch": 0.6915496392351498, - "grad_norm": 0.447265625, - "learning_rate": 5.2574083518439196e-05, - "loss": 1.1002, - "step": 24105 - }, - { - "epoch": 0.6916930845036077, - "grad_norm": 0.390625, - "learning_rate": 5.253000575639305e-05, - "loss": 0.9611, - "step": 24110 - }, - { - "epoch": 0.6918365297720654, - "grad_norm": 0.349609375, - "learning_rate": 5.24859398966193e-05, - "loss": 0.9947, - "step": 24115 - }, - { - "epoch": 0.6919799750405233, - "grad_norm": 0.38671875, - "learning_rate": 5.2441885950166746e-05, - "loss": 0.9204, - "step": 24120 - }, - { - "epoch": 0.6921234203089811, - "grad_norm": 0.375, - "learning_rate": 5.239784392808114e-05, - "loss": 0.9519, - "step": 24125 - }, - { - "epoch": 0.6922668655774389, - "grad_norm": 0.36328125, - "learning_rate": 5.235381384140519e-05, - "loss": 0.886, - "step": 24130 - }, - { - "epoch": 0.6924103108458968, - "grad_norm": 0.41796875, - "learning_rate": 5.230979570117876e-05, - "loss": 0.9701, - "step": 24135 - }, - { - "epoch": 0.6925537561143545, - "grad_norm": 0.416015625, - "learning_rate": 5.226578951843859e-05, - "loss": 0.9738, - "step": 24140 - }, - { - "epoch": 0.6926972013828124, - "grad_norm": 0.39453125, - "learning_rate": 5.222179530421847e-05, - "loss": 0.8784, - "step": 24145 - }, - { - "epoch": 0.6928406466512702, - "grad_norm": 0.359375, - "learning_rate": 5.217781306954912e-05, - "loss": 0.9471, - "step": 24150 - }, - { - "epoch": 0.692984091919728, - "grad_norm": 0.388671875, - "learning_rate": 5.213384282545842e-05, - "loss": 0.9497, - "step": 24155 - }, - { - "epoch": 0.6931275371881859, - "grad_norm": 0.376953125, - "learning_rate": 5.208988458297109e-05, - "loss": 0.9203, - "step": 24160 - }, - { - "epoch": 0.6932709824566436, - "grad_norm": 0.373046875, - "learning_rate": 5.2045938353108845e-05, - "loss": 0.923, - "step": 24165 - }, - { - "epoch": 0.6934144277251015, - "grad_norm": 0.392578125, - "learning_rate": 5.2002004146890535e-05, - "loss": 0.9594, - "step": 24170 - }, - { - "epoch": 0.6935578729935593, - "grad_norm": 0.408203125, - "learning_rate": 5.195808197533185e-05, - "loss": 0.9376, - "step": 24175 - }, - { - "epoch": 0.6937013182620171, - "grad_norm": 0.408203125, - "learning_rate": 5.191417184944549e-05, - "loss": 0.9099, - "step": 24180 - }, - { - "epoch": 0.693844763530475, - "grad_norm": 0.38671875, - "learning_rate": 5.1870273780241164e-05, - "loss": 0.9021, - "step": 24185 - }, - { - "epoch": 0.6939882087989327, - "grad_norm": 0.35546875, - "learning_rate": 5.182638777872555e-05, - "loss": 0.8651, - "step": 24190 - }, - { - "epoch": 0.6941316540673906, - "grad_norm": 0.365234375, - "learning_rate": 5.1782513855902293e-05, - "loss": 0.853, - "step": 24195 - }, - { - "epoch": 0.6942750993358484, - "grad_norm": 0.408203125, - "learning_rate": 5.1738652022771974e-05, - "loss": 0.971, - "step": 24200 - }, - { - "epoch": 0.6944185446043062, - "grad_norm": 0.392578125, - "learning_rate": 5.169480229033227e-05, - "loss": 0.8783, - "step": 24205 - }, - { - "epoch": 0.6945619898727641, - "grad_norm": 0.375, - "learning_rate": 5.165096466957769e-05, - "loss": 0.9772, - "step": 24210 - }, - { - "epoch": 0.6947054351412218, - "grad_norm": 0.369140625, - "learning_rate": 5.160713917149973e-05, - "loss": 0.8332, - "step": 24215 - }, - { - "epoch": 0.6948488804096797, - "grad_norm": 0.380859375, - "learning_rate": 5.1563325807086856e-05, - "loss": 0.9186, - "step": 24220 - }, - { - "epoch": 0.6949923256781375, - "grad_norm": 0.38671875, - "learning_rate": 5.151952458732459e-05, - "loss": 0.9084, - "step": 24225 - }, - { - "epoch": 0.6951357709465953, - "grad_norm": 0.37890625, - "learning_rate": 5.147573552319526e-05, - "loss": 0.8777, - "step": 24230 - }, - { - "epoch": 0.6952792162150532, - "grad_norm": 0.404296875, - "learning_rate": 5.1431958625678224e-05, - "loss": 0.9139, - "step": 24235 - }, - { - "epoch": 0.695422661483511, - "grad_norm": 0.416015625, - "learning_rate": 5.138819390574972e-05, - "loss": 0.8804, - "step": 24240 - }, - { - "epoch": 0.6955661067519688, - "grad_norm": 0.376953125, - "learning_rate": 5.134444137438309e-05, - "loss": 0.9743, - "step": 24245 - }, - { - "epoch": 0.6957095520204266, - "grad_norm": 0.373046875, - "learning_rate": 5.130070104254847e-05, - "loss": 0.9208, - "step": 24250 - }, - { - "epoch": 0.6958529972888844, - "grad_norm": 0.37890625, - "learning_rate": 5.125697292121293e-05, - "loss": 0.9989, - "step": 24255 - }, - { - "epoch": 0.6959964425573423, - "grad_norm": 0.39453125, - "learning_rate": 5.121325702134063e-05, - "loss": 0.9056, - "step": 24260 - }, - { - "epoch": 0.6961398878258, - "grad_norm": 0.36328125, - "learning_rate": 5.116955335389252e-05, - "loss": 0.9803, - "step": 24265 - }, - { - "epoch": 0.6962833330942579, - "grad_norm": 0.41796875, - "learning_rate": 5.112586192982653e-05, - "loss": 0.9712, - "step": 24270 - }, - { - "epoch": 0.6964267783627157, - "grad_norm": 0.384765625, - "learning_rate": 5.1082182760097485e-05, - "loss": 0.8962, - "step": 24275 - }, - { - "epoch": 0.6965702236311735, - "grad_norm": 0.3359375, - "learning_rate": 5.1038515855657264e-05, - "loss": 0.9473, - "step": 24280 - }, - { - "epoch": 0.6967136688996314, - "grad_norm": 0.40234375, - "learning_rate": 5.099486122745454e-05, - "loss": 0.9659, - "step": 24285 - }, - { - "epoch": 0.6968571141680892, - "grad_norm": 0.419921875, - "learning_rate": 5.0951218886434884e-05, - "loss": 0.9842, - "step": 24290 - }, - { - "epoch": 0.697000559436547, - "grad_norm": 0.40234375, - "learning_rate": 5.090758884354096e-05, - "loss": 0.9358, - "step": 24295 - }, - { - "epoch": 0.6971440047050048, - "grad_norm": 0.380859375, - "learning_rate": 5.086397110971218e-05, - "loss": 0.9893, - "step": 24300 - }, - { - "epoch": 0.6972874499734626, - "grad_norm": 0.40234375, - "learning_rate": 5.0820365695884956e-05, - "loss": 0.8627, - "step": 24305 - }, - { - "epoch": 0.6974308952419205, - "grad_norm": 0.3828125, - "learning_rate": 5.077677261299251e-05, - "loss": 0.8785, - "step": 24310 - }, - { - "epoch": 0.6975743405103783, - "grad_norm": 0.3515625, - "learning_rate": 5.0733191871965145e-05, - "loss": 0.9187, - "step": 24315 - }, - { - "epoch": 0.697717785778836, - "grad_norm": 0.431640625, - "learning_rate": 5.068962348372992e-05, - "loss": 1.0564, - "step": 24320 - }, - { - "epoch": 0.6978612310472939, - "grad_norm": 0.37890625, - "learning_rate": 5.0646067459210875e-05, - "loss": 0.9979, - "step": 24325 - }, - { - "epoch": 0.6980046763157517, - "grad_norm": 0.3515625, - "learning_rate": 5.060252380932886e-05, - "loss": 0.8883, - "step": 24330 - }, - { - "epoch": 0.6981481215842096, - "grad_norm": 0.361328125, - "learning_rate": 5.055899254500176e-05, - "loss": 0.979, - "step": 24335 - }, - { - "epoch": 0.6982915668526674, - "grad_norm": 0.396484375, - "learning_rate": 5.0515473677144254e-05, - "loss": 0.941, - "step": 24340 - }, - { - "epoch": 0.6984350121211251, - "grad_norm": 0.388671875, - "learning_rate": 5.0471967216667894e-05, - "loss": 0.9579, - "step": 24345 - }, - { - "epoch": 0.698578457389583, - "grad_norm": 0.376953125, - "learning_rate": 5.042847317448125e-05, - "loss": 0.9454, - "step": 24350 - }, - { - "epoch": 0.6987219026580408, - "grad_norm": 0.39453125, - "learning_rate": 5.038499156148964e-05, - "loss": 0.9909, - "step": 24355 - }, - { - "epoch": 0.6988653479264987, - "grad_norm": 0.384765625, - "learning_rate": 5.034152238859533e-05, - "loss": 0.9171, - "step": 24360 - }, - { - "epoch": 0.6990087931949565, - "grad_norm": 0.333984375, - "learning_rate": 5.029806566669746e-05, - "loss": 0.9138, - "step": 24365 - }, - { - "epoch": 0.6991522384634142, - "grad_norm": 0.365234375, - "learning_rate": 5.025462140669204e-05, - "loss": 0.9738, - "step": 24370 - }, - { - "epoch": 0.6992956837318721, - "grad_norm": 0.412109375, - "learning_rate": 5.021118961947192e-05, - "loss": 0.962, - "step": 24375 - }, - { - "epoch": 0.6994391290003299, - "grad_norm": 0.384765625, - "learning_rate": 5.016777031592694e-05, - "loss": 0.9669, - "step": 24380 - }, - { - "epoch": 0.6995825742687878, - "grad_norm": 0.392578125, - "learning_rate": 5.012436350694369e-05, - "loss": 1.0352, - "step": 24385 - }, - { - "epoch": 0.6997260195372456, - "grad_norm": 0.376953125, - "learning_rate": 5.008096920340568e-05, - "loss": 0.9193, - "step": 24390 - }, - { - "epoch": 0.6998694648057033, - "grad_norm": 0.375, - "learning_rate": 5.0037587416193265e-05, - "loss": 0.9659, - "step": 24395 - }, - { - "epoch": 0.7000129100741612, - "grad_norm": 0.3671875, - "learning_rate": 4.999421815618364e-05, - "loss": 0.9505, - "step": 24400 - }, - { - "epoch": 0.700156355342619, - "grad_norm": 0.38671875, - "learning_rate": 4.995086143425095e-05, - "loss": 0.9449, - "step": 24405 - }, - { - "epoch": 0.7002998006110769, - "grad_norm": 0.37109375, - "learning_rate": 4.990751726126612e-05, - "loss": 0.8771, - "step": 24410 - }, - { - "epoch": 0.7004432458795347, - "grad_norm": 0.369140625, - "learning_rate": 4.9864185648096894e-05, - "loss": 0.8944, - "step": 24415 - }, - { - "epoch": 0.7005866911479924, - "grad_norm": 0.35546875, - "learning_rate": 4.9820866605607994e-05, - "loss": 0.885, - "step": 24420 - }, - { - "epoch": 0.7007301364164503, - "grad_norm": 0.3828125, - "learning_rate": 4.977756014466088e-05, - "loss": 0.8689, - "step": 24425 - }, - { - "epoch": 0.7008735816849081, - "grad_norm": 0.3984375, - "learning_rate": 4.973426627611389e-05, - "loss": 0.9528, - "step": 24430 - }, - { - "epoch": 0.701017026953366, - "grad_norm": 0.35546875, - "learning_rate": 4.969098501082217e-05, - "loss": 0.9437, - "step": 24435 - }, - { - "epoch": 0.7011604722218238, - "grad_norm": 0.37109375, - "learning_rate": 4.964771635963781e-05, - "loss": 0.9431, - "step": 24440 - }, - { - "epoch": 0.7013039174902815, - "grad_norm": 0.359375, - "learning_rate": 4.9604460333409644e-05, - "loss": 0.9005, - "step": 24445 - }, - { - "epoch": 0.7014473627587394, - "grad_norm": 0.375, - "learning_rate": 4.95612169429833e-05, - "loss": 1.069, - "step": 24450 - }, - { - "epoch": 0.7015908080271972, - "grad_norm": 0.365234375, - "learning_rate": 4.951798619920142e-05, - "loss": 1.0347, - "step": 24455 - }, - { - "epoch": 0.7017342532956551, - "grad_norm": 0.353515625, - "learning_rate": 4.9474768112903293e-05, - "loss": 0.9493, - "step": 24460 - }, - { - "epoch": 0.7018776985641129, - "grad_norm": 0.375, - "learning_rate": 4.9431562694925094e-05, - "loss": 0.938, - "step": 24465 - }, - { - "epoch": 0.7020211438325706, - "grad_norm": 0.365234375, - "learning_rate": 4.9388369956099815e-05, - "loss": 0.8696, - "step": 24470 - }, - { - "epoch": 0.7021645891010285, - "grad_norm": 0.3671875, - "learning_rate": 4.934518990725734e-05, - "loss": 0.9127, - "step": 24475 - }, - { - "epoch": 0.7023080343694863, - "grad_norm": 0.38671875, - "learning_rate": 4.930202255922427e-05, - "loss": 0.9525, - "step": 24480 - }, - { - "epoch": 0.7024514796379442, - "grad_norm": 0.41015625, - "learning_rate": 4.925886792282408e-05, - "loss": 0.8864, - "step": 24485 - }, - { - "epoch": 0.702594924906402, - "grad_norm": 0.38671875, - "learning_rate": 4.9215726008876995e-05, - "loss": 1.0211, - "step": 24490 - }, - { - "epoch": 0.7027383701748597, - "grad_norm": 0.38671875, - "learning_rate": 4.9172596828200166e-05, - "loss": 0.947, - "step": 24495 - }, - { - "epoch": 0.7028818154433176, - "grad_norm": 0.421875, - "learning_rate": 4.9129480391607465e-05, - "loss": 0.9677, - "step": 24500 - }, - { - "epoch": 0.7030252607117754, - "grad_norm": 0.4140625, - "learning_rate": 4.9086376709909534e-05, - "loss": 0.963, - "step": 24505 - }, - { - "epoch": 0.7031687059802333, - "grad_norm": 0.404296875, - "learning_rate": 4.904328579391393e-05, - "loss": 0.8912, - "step": 24510 - }, - { - "epoch": 0.7033121512486911, - "grad_norm": 0.416015625, - "learning_rate": 4.900020765442494e-05, - "loss": 1.0812, - "step": 24515 - }, - { - "epoch": 0.7034555965171488, - "grad_norm": 0.361328125, - "learning_rate": 4.895714230224363e-05, - "loss": 0.8665, - "step": 24520 - }, - { - "epoch": 0.7035990417856067, - "grad_norm": 0.388671875, - "learning_rate": 4.891408974816787e-05, - "loss": 1.0233, - "step": 24525 - }, - { - "epoch": 0.7037424870540645, - "grad_norm": 0.384765625, - "learning_rate": 4.887105000299239e-05, - "loss": 0.9945, - "step": 24530 - }, - { - "epoch": 0.7038859323225224, - "grad_norm": 0.380859375, - "learning_rate": 4.8828023077508634e-05, - "loss": 0.9927, - "step": 24535 - }, - { - "epoch": 0.7040293775909802, - "grad_norm": 0.349609375, - "learning_rate": 4.8785008982504845e-05, - "loss": 0.8786, - "step": 24540 - }, - { - "epoch": 0.7041728228594379, - "grad_norm": 0.400390625, - "learning_rate": 4.8742007728766014e-05, - "loss": 0.926, - "step": 24545 - }, - { - "epoch": 0.7043162681278958, - "grad_norm": 0.390625, - "learning_rate": 4.8699019327074035e-05, - "loss": 0.9987, - "step": 24550 - }, - { - "epoch": 0.7044597133963536, - "grad_norm": 0.38671875, - "learning_rate": 4.865604378820747e-05, - "loss": 1.0738, - "step": 24555 - }, - { - "epoch": 0.7046031586648115, - "grad_norm": 0.4140625, - "learning_rate": 4.861308112294168e-05, - "loss": 0.9966, - "step": 24560 - }, - { - "epoch": 0.7047466039332693, - "grad_norm": 0.357421875, - "learning_rate": 4.85701313420488e-05, - "loss": 1.0242, - "step": 24565 - }, - { - "epoch": 0.704890049201727, - "grad_norm": 0.421875, - "learning_rate": 4.852719445629773e-05, - "loss": 1.0624, - "step": 24570 - }, - { - "epoch": 0.7050334944701849, - "grad_norm": 0.38671875, - "learning_rate": 4.848427047645412e-05, - "loss": 0.9528, - "step": 24575 - }, - { - "epoch": 0.7051769397386427, - "grad_norm": 0.376953125, - "learning_rate": 4.844135941328048e-05, - "loss": 1.0042, - "step": 24580 - }, - { - "epoch": 0.7053203850071006, - "grad_norm": 0.39453125, - "learning_rate": 4.839846127753598e-05, - "loss": 0.987, - "step": 24585 - }, - { - "epoch": 0.7054638302755584, - "grad_norm": 0.4140625, - "learning_rate": 4.835557607997656e-05, - "loss": 1.0633, - "step": 24590 - }, - { - "epoch": 0.7056072755440161, - "grad_norm": 0.404296875, - "learning_rate": 4.8312703831354914e-05, - "loss": 0.8773, - "step": 24595 - }, - { - "epoch": 0.705750720812474, - "grad_norm": 0.404296875, - "learning_rate": 4.826984454242057e-05, - "loss": 0.9528, - "step": 24600 - }, - { - "epoch": 0.7058941660809318, - "grad_norm": 0.36328125, - "learning_rate": 4.822699822391973e-05, - "loss": 1.0501, - "step": 24605 - }, - { - "epoch": 0.7060376113493897, - "grad_norm": 0.4140625, - "learning_rate": 4.818416488659534e-05, - "loss": 0.9762, - "step": 24610 - }, - { - "epoch": 0.7061810566178475, - "grad_norm": 0.408203125, - "learning_rate": 4.81413445411871e-05, - "loss": 1.0769, - "step": 24615 - }, - { - "epoch": 0.7063245018863052, - "grad_norm": 0.3828125, - "learning_rate": 4.80985371984315e-05, - "loss": 0.9538, - "step": 24620 - }, - { - "epoch": 0.7064679471547631, - "grad_norm": 0.357421875, - "learning_rate": 4.805574286906173e-05, - "loss": 0.969, - "step": 24625 - }, - { - "epoch": 0.7066113924232209, - "grad_norm": 0.37890625, - "learning_rate": 4.801296156380767e-05, - "loss": 0.8848, - "step": 24630 - }, - { - "epoch": 0.7067548376916788, - "grad_norm": 0.375, - "learning_rate": 4.797019329339606e-05, - "loss": 0.9761, - "step": 24635 - }, - { - "epoch": 0.7068982829601366, - "grad_norm": 0.35546875, - "learning_rate": 4.7927438068550256e-05, - "loss": 1.0195, - "step": 24640 - }, - { - "epoch": 0.7070417282285943, - "grad_norm": 0.37890625, - "learning_rate": 4.788469589999041e-05, - "loss": 0.9848, - "step": 24645 - }, - { - "epoch": 0.7071851734970522, - "grad_norm": 0.3984375, - "learning_rate": 4.78419667984333e-05, - "loss": 0.9309, - "step": 24650 - }, - { - "epoch": 0.70732861876551, - "grad_norm": 0.3828125, - "learning_rate": 4.7799250774592594e-05, - "loss": 1.0571, - "step": 24655 - }, - { - "epoch": 0.7074720640339678, - "grad_norm": 0.37890625, - "learning_rate": 4.7756547839178564e-05, - "loss": 0.9213, - "step": 24660 - }, - { - "epoch": 0.7076155093024257, - "grad_norm": 0.365234375, - "learning_rate": 4.771385800289817e-05, - "loss": 0.8847, - "step": 24665 - }, - { - "epoch": 0.7077589545708834, - "grad_norm": 0.40625, - "learning_rate": 4.767118127645524e-05, - "loss": 1.0277, - "step": 24670 - }, - { - "epoch": 0.7079023998393413, - "grad_norm": 0.40625, - "learning_rate": 4.762851767055014e-05, - "loss": 0.9961, - "step": 24675 - }, - { - "epoch": 0.7080458451077991, - "grad_norm": 0.369140625, - "learning_rate": 4.758586719588007e-05, - "loss": 0.8766, - "step": 24680 - }, - { - "epoch": 0.7081892903762569, - "grad_norm": 0.453125, - "learning_rate": 4.754322986313884e-05, - "loss": 0.9204, - "step": 24685 - }, - { - "epoch": 0.7083327356447148, - "grad_norm": 0.3671875, - "learning_rate": 4.7500605683017076e-05, - "loss": 0.926, - "step": 24690 - }, - { - "epoch": 0.7084761809131725, - "grad_norm": 0.376953125, - "learning_rate": 4.745799466620201e-05, - "loss": 0.972, - "step": 24695 - }, - { - "epoch": 0.7086196261816304, - "grad_norm": 0.3984375, - "learning_rate": 4.74153968233776e-05, - "loss": 0.971, - "step": 24700 - }, - { - "epoch": 0.7087630714500882, - "grad_norm": 0.3828125, - "learning_rate": 4.7372812165224565e-05, - "loss": 0.9709, - "step": 24705 - }, - { - "epoch": 0.708906516718546, - "grad_norm": 0.35546875, - "learning_rate": 4.733024070242024e-05, - "loss": 0.7763, - "step": 24710 - }, - { - "epoch": 0.7090499619870039, - "grad_norm": 0.388671875, - "learning_rate": 4.7287682445638645e-05, - "loss": 0.9489, - "step": 24715 - }, - { - "epoch": 0.7091934072554616, - "grad_norm": 0.396484375, - "learning_rate": 4.724513740555053e-05, - "loss": 1.0059, - "step": 24720 - }, - { - "epoch": 0.7093368525239195, - "grad_norm": 0.396484375, - "learning_rate": 4.7202605592823365e-05, - "loss": 0.8501, - "step": 24725 - }, - { - "epoch": 0.7094802977923773, - "grad_norm": 0.427734375, - "learning_rate": 4.716008701812123e-05, - "loss": 0.8875, - "step": 24730 - }, - { - "epoch": 0.7096237430608351, - "grad_norm": 0.357421875, - "learning_rate": 4.711758169210491e-05, - "loss": 0.9226, - "step": 24735 - }, - { - "epoch": 0.709767188329293, - "grad_norm": 0.392578125, - "learning_rate": 4.707508962543188e-05, - "loss": 0.9218, - "step": 24740 - }, - { - "epoch": 0.7099106335977508, - "grad_norm": 0.39453125, - "learning_rate": 4.703261082875628e-05, - "loss": 0.9357, - "step": 24745 - }, - { - "epoch": 0.7100540788662086, - "grad_norm": 0.4375, - "learning_rate": 4.699014531272894e-05, - "loss": 1.017, - "step": 24750 - }, - { - "epoch": 0.7101975241346664, - "grad_norm": 0.37890625, - "learning_rate": 4.694769308799729e-05, - "loss": 0.9629, - "step": 24755 - }, - { - "epoch": 0.7103409694031242, - "grad_norm": 0.376953125, - "learning_rate": 4.690525416520557e-05, - "loss": 1.053, - "step": 24760 - }, - { - "epoch": 0.7104844146715821, - "grad_norm": 0.373046875, - "learning_rate": 4.6862828554994565e-05, - "loss": 0.9209, - "step": 24765 - }, - { - "epoch": 0.7106278599400399, - "grad_norm": 0.41015625, - "learning_rate": 4.6820416268001747e-05, - "loss": 0.9756, - "step": 24770 - }, - { - "epoch": 0.7107713052084977, - "grad_norm": 0.373046875, - "learning_rate": 4.677801731486121e-05, - "loss": 0.8319, - "step": 24775 - }, - { - "epoch": 0.7109147504769555, - "grad_norm": 0.44140625, - "learning_rate": 4.673563170620385e-05, - "loss": 0.9551, - "step": 24780 - }, - { - "epoch": 0.7110581957454133, - "grad_norm": 0.37109375, - "learning_rate": 4.669325945265707e-05, - "loss": 0.9423, - "step": 24785 - }, - { - "epoch": 0.7112016410138712, - "grad_norm": 0.404296875, - "learning_rate": 4.6650900564844935e-05, - "loss": 0.8773, - "step": 24790 - }, - { - "epoch": 0.711345086282329, - "grad_norm": 0.375, - "learning_rate": 4.660855505338827e-05, - "loss": 0.8376, - "step": 24795 - }, - { - "epoch": 0.7114885315507868, - "grad_norm": 0.3828125, - "learning_rate": 4.6566222928904436e-05, - "loss": 1.0095, - "step": 24800 - }, - { - "epoch": 0.7116319768192446, - "grad_norm": 0.400390625, - "learning_rate": 4.652390420200747e-05, - "loss": 0.9236, - "step": 24805 - }, - { - "epoch": 0.7117754220877024, - "grad_norm": 0.40234375, - "learning_rate": 4.648159888330804e-05, - "loss": 1.0399, - "step": 24810 - }, - { - "epoch": 0.7119188673561603, - "grad_norm": 0.35546875, - "learning_rate": 4.643930698341351e-05, - "loss": 0.9265, - "step": 24815 - }, - { - "epoch": 0.712062312624618, - "grad_norm": 0.33984375, - "learning_rate": 4.639702851292782e-05, - "loss": 1.022, - "step": 24820 - }, - { - "epoch": 0.7122057578930759, - "grad_norm": 0.373046875, - "learning_rate": 4.635476348245151e-05, - "loss": 1.001, - "step": 24825 - }, - { - "epoch": 0.7123492031615337, - "grad_norm": 0.373046875, - "learning_rate": 4.631251190258187e-05, - "loss": 0.9613, - "step": 24830 - }, - { - "epoch": 0.7124926484299915, - "grad_norm": 0.404296875, - "learning_rate": 4.627027378391272e-05, - "loss": 0.93, - "step": 24835 - }, - { - "epoch": 0.7126360936984494, - "grad_norm": 0.36328125, - "learning_rate": 4.622804913703452e-05, - "loss": 0.9283, - "step": 24840 - }, - { - "epoch": 0.7127795389669072, - "grad_norm": 0.388671875, - "learning_rate": 4.618583797253434e-05, - "loss": 0.9477, - "step": 24845 - }, - { - "epoch": 0.712922984235365, - "grad_norm": 0.3984375, - "learning_rate": 4.614364030099596e-05, - "loss": 0.9442, - "step": 24850 - }, - { - "epoch": 0.7130664295038228, - "grad_norm": 0.400390625, - "learning_rate": 4.610145613299967e-05, - "loss": 0.9328, - "step": 24855 - }, - { - "epoch": 0.7132098747722806, - "grad_norm": 0.38671875, - "learning_rate": 4.605928547912237e-05, - "loss": 0.9378, - "step": 24860 - }, - { - "epoch": 0.7133533200407385, - "grad_norm": 0.36328125, - "learning_rate": 4.601712834993771e-05, - "loss": 0.8988, - "step": 24865 - }, - { - "epoch": 0.7134967653091963, - "grad_norm": 0.3671875, - "learning_rate": 4.597498475601579e-05, - "loss": 1.0277, - "step": 24870 - }, - { - "epoch": 0.7136402105776541, - "grad_norm": 0.365234375, - "learning_rate": 4.5932854707923415e-05, - "loss": 0.9995, - "step": 24875 - }, - { - "epoch": 0.7137836558461119, - "grad_norm": 0.375, - "learning_rate": 4.5890738216223884e-05, - "loss": 1.0247, - "step": 24880 - }, - { - "epoch": 0.7139271011145697, - "grad_norm": 0.4140625, - "learning_rate": 4.5848635291477274e-05, - "loss": 1.0289, - "step": 24885 - }, - { - "epoch": 0.7140705463830276, - "grad_norm": 0.427734375, - "learning_rate": 4.58065459442401e-05, - "loss": 0.9999, - "step": 24890 - }, - { - "epoch": 0.7142139916514854, - "grad_norm": 0.361328125, - "learning_rate": 4.576447018506551e-05, - "loss": 0.9044, - "step": 24895 - }, - { - "epoch": 0.7143574369199432, - "grad_norm": 0.384765625, - "learning_rate": 4.572240802450335e-05, - "loss": 0.9145, - "step": 24900 - }, - { - "epoch": 0.714500882188401, - "grad_norm": 0.458984375, - "learning_rate": 4.568035947309991e-05, - "loss": 1.0242, - "step": 24905 - }, - { - "epoch": 0.7146443274568588, - "grad_norm": 0.375, - "learning_rate": 4.5638324541398136e-05, - "loss": 0.8746, - "step": 24910 - }, - { - "epoch": 0.7147877727253167, - "grad_norm": 0.38671875, - "learning_rate": 4.5596303239937563e-05, - "loss": 0.8674, - "step": 24915 - }, - { - "epoch": 0.7149312179937745, - "grad_norm": 0.3671875, - "learning_rate": 4.55542955792543e-05, - "loss": 0.9678, - "step": 24920 - }, - { - "epoch": 0.7150746632622323, - "grad_norm": 0.376953125, - "learning_rate": 4.551230156988103e-05, - "loss": 0.9489, - "step": 24925 - }, - { - "epoch": 0.7152181085306901, - "grad_norm": 0.412109375, - "learning_rate": 4.547032122234698e-05, - "loss": 0.9217, - "step": 24930 - }, - { - "epoch": 0.7153615537991479, - "grad_norm": 0.359375, - "learning_rate": 4.542835454717807e-05, - "loss": 0.9476, - "step": 24935 - }, - { - "epoch": 0.7155049990676058, - "grad_norm": 0.37890625, - "learning_rate": 4.538640155489666e-05, - "loss": 1.0018, - "step": 24940 - }, - { - "epoch": 0.7156484443360636, - "grad_norm": 0.3984375, - "learning_rate": 4.5344462256021734e-05, - "loss": 1.009, - "step": 24945 - }, - { - "epoch": 0.7157918896045214, - "grad_norm": 0.37109375, - "learning_rate": 4.5302536661068816e-05, - "loss": 0.9135, - "step": 24950 - }, - { - "epoch": 0.7159353348729792, - "grad_norm": 0.427734375, - "learning_rate": 4.5260624780550074e-05, - "loss": 0.9326, - "step": 24955 - }, - { - "epoch": 0.716078780141437, - "grad_norm": 0.33984375, - "learning_rate": 4.521872662497416e-05, - "loss": 0.8403, - "step": 24960 - }, - { - "epoch": 0.7162222254098949, - "grad_norm": 0.4609375, - "learning_rate": 4.517684220484629e-05, - "loss": 0.9272, - "step": 24965 - }, - { - "epoch": 0.7163656706783527, - "grad_norm": 0.3984375, - "learning_rate": 4.513497153066822e-05, - "loss": 0.8631, - "step": 24970 - }, - { - "epoch": 0.7165091159468105, - "grad_norm": 0.392578125, - "learning_rate": 4.509311461293837e-05, - "loss": 0.9617, - "step": 24975 - }, - { - "epoch": 0.7166525612152683, - "grad_norm": 0.419921875, - "learning_rate": 4.505127146215159e-05, - "loss": 1.008, - "step": 24980 - }, - { - "epoch": 0.7167960064837261, - "grad_norm": 0.40234375, - "learning_rate": 4.500944208879928e-05, - "loss": 1.0232, - "step": 24985 - }, - { - "epoch": 0.716939451752184, - "grad_norm": 0.37890625, - "learning_rate": 4.49676265033695e-05, - "loss": 0.9463, - "step": 24990 - }, - { - "epoch": 0.7170828970206418, - "grad_norm": 0.400390625, - "learning_rate": 4.492582471634674e-05, - "loss": 0.8572, - "step": 24995 - }, - { - "epoch": 0.7172263422890995, - "grad_norm": 0.388671875, - "learning_rate": 4.4884036738212074e-05, - "loss": 0.9854, - "step": 25000 - }, - { - "epoch": 0.7173697875575574, - "grad_norm": 0.388671875, - "learning_rate": 4.4842262579443074e-05, - "loss": 0.9001, - "step": 25005 - }, - { - "epoch": 0.7175132328260152, - "grad_norm": 0.400390625, - "learning_rate": 4.480050225051394e-05, - "loss": 0.9419, - "step": 25010 - }, - { - "epoch": 0.7176566780944731, - "grad_norm": 0.431640625, - "learning_rate": 4.4758755761895334e-05, - "loss": 1.0913, - "step": 25015 - }, - { - "epoch": 0.7178001233629309, - "grad_norm": 0.3984375, - "learning_rate": 4.4717023124054394e-05, - "loss": 0.8701, - "step": 25020 - }, - { - "epoch": 0.7179435686313886, - "grad_norm": 0.365234375, - "learning_rate": 4.467530434745494e-05, - "loss": 0.8985, - "step": 25025 - }, - { - "epoch": 0.7180870138998465, - "grad_norm": 0.376953125, - "learning_rate": 4.463359944255718e-05, - "loss": 0.9693, - "step": 25030 - }, - { - "epoch": 0.7182304591683043, - "grad_norm": 0.4140625, - "learning_rate": 4.4591908419817905e-05, - "loss": 0.9733, - "step": 25035 - }, - { - "epoch": 0.7183739044367622, - "grad_norm": 0.396484375, - "learning_rate": 4.455023128969036e-05, - "loss": 0.8719, - "step": 25040 - }, - { - "epoch": 0.71851734970522, - "grad_norm": 0.412109375, - "learning_rate": 4.450856806262445e-05, - "loss": 0.863, - "step": 25045 - }, - { - "epoch": 0.7186607949736777, - "grad_norm": 0.400390625, - "learning_rate": 4.446691874906645e-05, - "loss": 0.9602, - "step": 25050 - }, - { - "epoch": 0.7188042402421356, - "grad_norm": 0.3828125, - "learning_rate": 4.442528335945919e-05, - "loss": 0.9618, - "step": 25055 - }, - { - "epoch": 0.7189476855105934, - "grad_norm": 0.365234375, - "learning_rate": 4.4383661904242e-05, - "loss": 0.9262, - "step": 25060 - }, - { - "epoch": 0.7190911307790513, - "grad_norm": 0.38671875, - "learning_rate": 4.4342054393850796e-05, - "loss": 0.9108, - "step": 25065 - }, - { - "epoch": 0.7192345760475091, - "grad_norm": 0.455078125, - "learning_rate": 4.430046083871791e-05, - "loss": 0.9744, - "step": 25070 - }, - { - "epoch": 0.7193780213159668, - "grad_norm": 0.32421875, - "learning_rate": 4.4258881249272146e-05, - "loss": 0.9301, - "step": 25075 - }, - { - "epoch": 0.7195214665844247, - "grad_norm": 0.412109375, - "learning_rate": 4.421731563593895e-05, - "loss": 1.0015, - "step": 25080 - }, - { - "epoch": 0.7196649118528825, - "grad_norm": 0.38671875, - "learning_rate": 4.417576400914012e-05, - "loss": 1.0248, - "step": 25085 - }, - { - "epoch": 0.7198083571213404, - "grad_norm": 0.373046875, - "learning_rate": 4.413422637929402e-05, - "loss": 0.9472, - "step": 25090 - }, - { - "epoch": 0.7199518023897982, - "grad_norm": 0.38671875, - "learning_rate": 4.409270275681547e-05, - "loss": 0.8476, - "step": 25095 - }, - { - "epoch": 0.7200952476582559, - "grad_norm": 0.384765625, - "learning_rate": 4.40511931521158e-05, - "loss": 1.0227, - "step": 25100 - }, - { - "epoch": 0.7202386929267138, - "grad_norm": 0.40234375, - "learning_rate": 4.4009697575602796e-05, - "loss": 1.0569, - "step": 25105 - }, - { - "epoch": 0.7203821381951716, - "grad_norm": 0.40625, - "learning_rate": 4.396821603768079e-05, - "loss": 0.946, - "step": 25110 - }, - { - "epoch": 0.7205255834636295, - "grad_norm": 0.376953125, - "learning_rate": 4.392674854875054e-05, - "loss": 1.0022, - "step": 25115 - }, - { - "epoch": 0.7206690287320873, - "grad_norm": 0.392578125, - "learning_rate": 4.3885295119209294e-05, - "loss": 0.9617, - "step": 25120 - }, - { - "epoch": 0.720812474000545, - "grad_norm": 0.404296875, - "learning_rate": 4.384385575945077e-05, - "loss": 1.0739, - "step": 25125 - }, - { - "epoch": 0.7209559192690029, - "grad_norm": 0.375, - "learning_rate": 4.380243047986513e-05, - "loss": 0.9772, - "step": 25130 - }, - { - "epoch": 0.7210993645374607, - "grad_norm": 0.37890625, - "learning_rate": 4.376101929083911e-05, - "loss": 0.9761, - "step": 25135 - }, - { - "epoch": 0.7212428098059186, - "grad_norm": 0.3671875, - "learning_rate": 4.3719622202755816e-05, - "loss": 0.8484, - "step": 25140 - }, - { - "epoch": 0.7213862550743764, - "grad_norm": 0.35546875, - "learning_rate": 4.36782392259948e-05, - "loss": 0.8965, - "step": 25145 - }, - { - "epoch": 0.7215297003428341, - "grad_norm": 0.443359375, - "learning_rate": 4.3636870370932194e-05, - "loss": 0.9786, - "step": 25150 - }, - { - "epoch": 0.721673145611292, - "grad_norm": 0.443359375, - "learning_rate": 4.359551564794049e-05, - "loss": 0.9923, - "step": 25155 - }, - { - "epoch": 0.7218165908797498, - "grad_norm": 0.404296875, - "learning_rate": 4.3554175067388636e-05, - "loss": 1.0029, - "step": 25160 - }, - { - "epoch": 0.7219600361482077, - "grad_norm": 0.365234375, - "learning_rate": 4.351284863964206e-05, - "loss": 0.898, - "step": 25165 - }, - { - "epoch": 0.7221034814166655, - "grad_norm": 0.3828125, - "learning_rate": 4.3471536375062696e-05, - "loss": 1.0233, - "step": 25170 - }, - { - "epoch": 0.7222469266851232, - "grad_norm": 0.404296875, - "learning_rate": 4.3430238284008826e-05, - "loss": 0.9268, - "step": 25175 - }, - { - "epoch": 0.7223903719535811, - "grad_norm": 0.3984375, - "learning_rate": 4.338895437683521e-05, - "loss": 0.8961, - "step": 25180 - }, - { - "epoch": 0.7225338172220389, - "grad_norm": 0.470703125, - "learning_rate": 4.334768466389314e-05, - "loss": 1.0468, - "step": 25185 - }, - { - "epoch": 0.7226772624904968, - "grad_norm": 0.380859375, - "learning_rate": 4.330642915553023e-05, - "loss": 0.8956, - "step": 25190 - }, - { - "epoch": 0.7228207077589546, - "grad_norm": 0.408203125, - "learning_rate": 4.3265187862090576e-05, - "loss": 0.9844, - "step": 25195 - }, - { - "epoch": 0.7229641530274123, - "grad_norm": 0.408203125, - "learning_rate": 4.322396079391467e-05, - "loss": 0.8763, - "step": 25200 - }, - { - "epoch": 0.7231075982958702, - "grad_norm": 0.353515625, - "learning_rate": 4.3182747961339574e-05, - "loss": 0.8547, - "step": 25205 - }, - { - "epoch": 0.723251043564328, - "grad_norm": 0.39453125, - "learning_rate": 4.3141549374698645e-05, - "loss": 0.8742, - "step": 25210 - }, - { - "epoch": 0.7233944888327859, - "grad_norm": 0.40234375, - "learning_rate": 4.3100365044321686e-05, - "loss": 0.9297, - "step": 25215 - }, - { - "epoch": 0.7235379341012437, - "grad_norm": 0.373046875, - "learning_rate": 4.305919498053495e-05, - "loss": 1.063, - "step": 25220 - }, - { - "epoch": 0.7236813793697014, - "grad_norm": 0.390625, - "learning_rate": 4.301803919366114e-05, - "loss": 0.9138, - "step": 25225 - }, - { - "epoch": 0.7238248246381593, - "grad_norm": 0.41015625, - "learning_rate": 4.2976897694019356e-05, - "loss": 0.9893, - "step": 25230 - }, - { - "epoch": 0.7239682699066171, - "grad_norm": 0.36328125, - "learning_rate": 4.293577049192505e-05, - "loss": 0.8105, - "step": 25235 - }, - { - "epoch": 0.724111715175075, - "grad_norm": 0.404296875, - "learning_rate": 4.289465759769025e-05, - "loss": 1.0149, - "step": 25240 - }, - { - "epoch": 0.7242551604435328, - "grad_norm": 0.380859375, - "learning_rate": 4.285355902162321e-05, - "loss": 0.9592, - "step": 25245 - }, - { - "epoch": 0.7243986057119906, - "grad_norm": 0.38671875, - "learning_rate": 4.2812474774028735e-05, - "loss": 1.0005, - "step": 25250 - }, - { - "epoch": 0.7245420509804484, - "grad_norm": 0.35546875, - "learning_rate": 4.277140486520791e-05, - "loss": 0.9512, - "step": 25255 - }, - { - "epoch": 0.7246854962489062, - "grad_norm": 0.3515625, - "learning_rate": 4.27303493054584e-05, - "loss": 0.9749, - "step": 25260 - }, - { - "epoch": 0.7248289415173641, - "grad_norm": 0.43359375, - "learning_rate": 4.26893081050741e-05, - "loss": 1.049, - "step": 25265 - }, - { - "epoch": 0.7249723867858219, - "grad_norm": 0.369140625, - "learning_rate": 4.264828127434539e-05, - "loss": 0.9265, - "step": 25270 - }, - { - "epoch": 0.7251158320542797, - "grad_norm": 0.408203125, - "learning_rate": 4.2607268823559e-05, - "loss": 1.0478, - "step": 25275 - }, - { - "epoch": 0.7252592773227375, - "grad_norm": 0.3359375, - "learning_rate": 4.256627076299816e-05, - "loss": 0.897, - "step": 25280 - }, - { - "epoch": 0.7254027225911953, - "grad_norm": 0.3515625, - "learning_rate": 4.252528710294238e-05, - "loss": 0.9505, - "step": 25285 - }, - { - "epoch": 0.7255461678596532, - "grad_norm": 0.37890625, - "learning_rate": 4.248431785366759e-05, - "loss": 0.9054, - "step": 25290 - }, - { - "epoch": 0.725689613128111, - "grad_norm": 0.404296875, - "learning_rate": 4.2443363025446116e-05, - "loss": 0.8803, - "step": 25295 - }, - { - "epoch": 0.7258330583965688, - "grad_norm": 0.392578125, - "learning_rate": 4.2402422628546666e-05, - "loss": 0.9443, - "step": 25300 - }, - { - "epoch": 0.7259765036650266, - "grad_norm": 0.3828125, - "learning_rate": 4.23614966732343e-05, - "loss": 0.9274, - "step": 25305 - }, - { - "epoch": 0.7261199489334844, - "grad_norm": 0.44140625, - "learning_rate": 4.2320585169770565e-05, - "loss": 1.0335, - "step": 25310 - }, - { - "epoch": 0.7262633942019423, - "grad_norm": 0.390625, - "learning_rate": 4.227968812841325e-05, - "loss": 0.9579, - "step": 25315 - }, - { - "epoch": 0.7264068394704001, - "grad_norm": 0.35546875, - "learning_rate": 4.2238805559416594e-05, - "loss": 0.8951, - "step": 25320 - }, - { - "epoch": 0.7265502847388579, - "grad_norm": 0.3671875, - "learning_rate": 4.219793747303113e-05, - "loss": 0.9859, - "step": 25325 - }, - { - "epoch": 0.7266937300073157, - "grad_norm": 0.380859375, - "learning_rate": 4.215708387950391e-05, - "loss": 1.0249, - "step": 25330 - }, - { - "epoch": 0.7268371752757735, - "grad_norm": 0.376953125, - "learning_rate": 4.2116244789078217e-05, - "loss": 0.9478, - "step": 25335 - }, - { - "epoch": 0.7269806205442314, - "grad_norm": 0.388671875, - "learning_rate": 4.20754202119937e-05, - "loss": 0.9702, - "step": 25340 - }, - { - "epoch": 0.7271240658126892, - "grad_norm": 0.375, - "learning_rate": 4.2034610158486485e-05, - "loss": 0.9742, - "step": 25345 - }, - { - "epoch": 0.727267511081147, - "grad_norm": 0.38671875, - "learning_rate": 4.1993814638788944e-05, - "loss": 0.9533, - "step": 25350 - }, - { - "epoch": 0.7274109563496048, - "grad_norm": 0.3671875, - "learning_rate": 4.1953033663129836e-05, - "loss": 0.9479, - "step": 25355 - }, - { - "epoch": 0.7275544016180626, - "grad_norm": 0.34765625, - "learning_rate": 4.191226724173426e-05, - "loss": 0.9464, - "step": 25360 - }, - { - "epoch": 0.7276978468865204, - "grad_norm": 0.4140625, - "learning_rate": 4.187151538482373e-05, - "loss": 0.9785, - "step": 25365 - }, - { - "epoch": 0.7278412921549783, - "grad_norm": 0.40234375, - "learning_rate": 4.1830778102616055e-05, - "loss": 0.9632, - "step": 25370 - }, - { - "epoch": 0.7279847374234361, - "grad_norm": 0.396484375, - "learning_rate": 4.1790055405325365e-05, - "loss": 0.9474, - "step": 25375 - }, - { - "epoch": 0.7281281826918939, - "grad_norm": 0.3671875, - "learning_rate": 4.174934730316216e-05, - "loss": 0.9309, - "step": 25380 - }, - { - "epoch": 0.7282716279603517, - "grad_norm": 0.404296875, - "learning_rate": 4.170865380633334e-05, - "loss": 0.9293, - "step": 25385 - }, - { - "epoch": 0.7284150732288095, - "grad_norm": 0.369140625, - "learning_rate": 4.166797492504206e-05, - "loss": 0.8986, - "step": 25390 - }, - { - "epoch": 0.7285585184972674, - "grad_norm": 0.396484375, - "learning_rate": 4.162731066948781e-05, - "loss": 0.9319, - "step": 25395 - }, - { - "epoch": 0.7287019637657252, - "grad_norm": 0.37890625, - "learning_rate": 4.1586661049866496e-05, - "loss": 0.8573, - "step": 25400 - }, - { - "epoch": 0.728845409034183, - "grad_norm": 0.369140625, - "learning_rate": 4.154602607637028e-05, - "loss": 0.9198, - "step": 25405 - }, - { - "epoch": 0.7289888543026408, - "grad_norm": 0.412109375, - "learning_rate": 4.1505405759187666e-05, - "loss": 0.9886, - "step": 25410 - }, - { - "epoch": 0.7291322995710986, - "grad_norm": 0.380859375, - "learning_rate": 4.146480010850345e-05, - "loss": 0.9887, - "step": 25415 - }, - { - "epoch": 0.7292757448395565, - "grad_norm": 0.359375, - "learning_rate": 4.142420913449887e-05, - "loss": 0.9108, - "step": 25420 - }, - { - "epoch": 0.7294191901080143, - "grad_norm": 0.36328125, - "learning_rate": 4.138363284735137e-05, - "loss": 0.9583, - "step": 25425 - }, - { - "epoch": 0.7295626353764721, - "grad_norm": 0.337890625, - "learning_rate": 4.13430712572347e-05, - "loss": 0.905, - "step": 25430 - }, - { - "epoch": 0.7297060806449299, - "grad_norm": 0.400390625, - "learning_rate": 4.130252437431905e-05, - "loss": 0.9665, - "step": 25435 - }, - { - "epoch": 0.7298495259133877, - "grad_norm": 0.390625, - "learning_rate": 4.12619922087708e-05, - "loss": 0.8154, - "step": 25440 - }, - { - "epoch": 0.7299929711818456, - "grad_norm": 0.458984375, - "learning_rate": 4.12214747707527e-05, - "loss": 0.8537, - "step": 25445 - }, - { - "epoch": 0.7301364164503034, - "grad_norm": 0.3671875, - "learning_rate": 4.118097207042373e-05, - "loss": 0.951, - "step": 25450 - }, - { - "epoch": 0.7302798617187612, - "grad_norm": 0.380859375, - "learning_rate": 4.1140484117939316e-05, - "loss": 0.9337, - "step": 25455 - }, - { - "epoch": 0.730423306987219, - "grad_norm": 0.353515625, - "learning_rate": 4.1100010923451084e-05, - "loss": 0.9377, - "step": 25460 - }, - { - "epoch": 0.7305667522556768, - "grad_norm": 0.369140625, - "learning_rate": 4.105955249710696e-05, - "loss": 0.9787, - "step": 25465 - }, - { - "epoch": 0.7307101975241347, - "grad_norm": 0.41015625, - "learning_rate": 4.10191088490512e-05, - "loss": 0.9497, - "step": 25470 - }, - { - "epoch": 0.7308536427925925, - "grad_norm": 0.380859375, - "learning_rate": 4.097867998942434e-05, - "loss": 0.9324, - "step": 25475 - }, - { - "epoch": 0.7309970880610503, - "grad_norm": 0.4140625, - "learning_rate": 4.093826592836322e-05, - "loss": 0.9414, - "step": 25480 - }, - { - "epoch": 0.7311405333295081, - "grad_norm": 0.359375, - "learning_rate": 4.0897866676000895e-05, - "loss": 0.9266, - "step": 25485 - }, - { - "epoch": 0.7312839785979659, - "grad_norm": 0.34765625, - "learning_rate": 4.0857482242466885e-05, - "loss": 0.9141, - "step": 25490 - }, - { - "epoch": 0.7314274238664238, - "grad_norm": 0.3828125, - "learning_rate": 4.081711263788681e-05, - "loss": 0.9521, - "step": 25495 - }, - { - "epoch": 0.7315708691348816, - "grad_norm": 0.39453125, - "learning_rate": 4.077675787238267e-05, - "loss": 0.9805, - "step": 25500 - }, - { - "epoch": 0.7317143144033394, - "grad_norm": 0.365234375, - "learning_rate": 4.073641795607267e-05, - "loss": 0.9115, - "step": 25505 - }, - { - "epoch": 0.7318577596717972, - "grad_norm": 0.44921875, - "learning_rate": 4.0696092899071416e-05, - "loss": 0.8598, - "step": 25510 - }, - { - "epoch": 0.732001204940255, - "grad_norm": 0.3828125, - "learning_rate": 4.0655782711489666e-05, - "loss": 0.9094, - "step": 25515 - }, - { - "epoch": 0.7321446502087129, - "grad_norm": 0.396484375, - "learning_rate": 4.061548740343446e-05, - "loss": 1.0755, - "step": 25520 - }, - { - "epoch": 0.7322880954771707, - "grad_norm": 0.400390625, - "learning_rate": 4.057520698500924e-05, - "loss": 0.8659, - "step": 25525 - }, - { - "epoch": 0.7324315407456286, - "grad_norm": 0.39453125, - "learning_rate": 4.053494146631355e-05, - "loss": 1.0082, - "step": 25530 - }, - { - "epoch": 0.7325749860140863, - "grad_norm": 0.345703125, - "learning_rate": 4.0494690857443283e-05, - "loss": 0.9125, - "step": 25535 - }, - { - "epoch": 0.7327184312825441, - "grad_norm": 0.3828125, - "learning_rate": 4.045445516849055e-05, - "loss": 0.8407, - "step": 25540 - }, - { - "epoch": 0.732861876551002, - "grad_norm": 0.384765625, - "learning_rate": 4.0414234409543794e-05, - "loss": 0.9216, - "step": 25545 - }, - { - "epoch": 0.7330053218194598, - "grad_norm": 0.373046875, - "learning_rate": 4.037402859068764e-05, - "loss": 0.8945, - "step": 25550 - }, - { - "epoch": 0.7331487670879177, - "grad_norm": 0.380859375, - "learning_rate": 4.0333837722002974e-05, - "loss": 0.9911, - "step": 25555 - }, - { - "epoch": 0.7332922123563754, - "grad_norm": 0.3828125, - "learning_rate": 4.029366181356702e-05, - "loss": 0.9331, - "step": 25560 - }, - { - "epoch": 0.7334356576248332, - "grad_norm": 0.419921875, - "learning_rate": 4.025350087545314e-05, - "loss": 0.9172, - "step": 25565 - }, - { - "epoch": 0.7335791028932911, - "grad_norm": 0.376953125, - "learning_rate": 4.0213354917731004e-05, - "loss": 0.9201, - "step": 25570 - }, - { - "epoch": 0.7337225481617489, - "grad_norm": 0.37890625, - "learning_rate": 4.0173223950466464e-05, - "loss": 0.9644, - "step": 25575 - }, - { - "epoch": 0.7338659934302068, - "grad_norm": 0.451171875, - "learning_rate": 4.0133107983721726e-05, - "loss": 0.9365, - "step": 25580 - }, - { - "epoch": 0.7340094386986645, - "grad_norm": 0.388671875, - "learning_rate": 4.0093007027555144e-05, - "loss": 0.96, - "step": 25585 - }, - { - "epoch": 0.7341528839671223, - "grad_norm": 0.44921875, - "learning_rate": 4.005292109202129e-05, - "loss": 0.909, - "step": 25590 - }, - { - "epoch": 0.7342963292355802, - "grad_norm": 0.396484375, - "learning_rate": 4.0012850187171094e-05, - "loss": 0.8244, - "step": 25595 - }, - { - "epoch": 0.734439774504038, - "grad_norm": 0.44140625, - "learning_rate": 3.997279432305158e-05, - "loss": 0.9405, - "step": 25600 - }, - { - "epoch": 0.7345832197724959, - "grad_norm": 0.353515625, - "learning_rate": 3.993275350970608e-05, - "loss": 1.0121, - "step": 25605 - }, - { - "epoch": 0.7347266650409536, - "grad_norm": 0.361328125, - "learning_rate": 3.9892727757174074e-05, - "loss": 0.9317, - "step": 25610 - }, - { - "epoch": 0.7348701103094114, - "grad_norm": 0.380859375, - "learning_rate": 3.985271707549141e-05, - "loss": 0.9162, - "step": 25615 - }, - { - "epoch": 0.7350135555778693, - "grad_norm": 0.396484375, - "learning_rate": 3.981272147469002e-05, - "loss": 0.9033, - "step": 25620 - }, - { - "epoch": 0.7351570008463271, - "grad_norm": 0.3515625, - "learning_rate": 3.977274096479806e-05, - "loss": 0.9652, - "step": 25625 - }, - { - "epoch": 0.735300446114785, - "grad_norm": 0.359375, - "learning_rate": 3.973277555584004e-05, - "loss": 0.9215, - "step": 25630 - }, - { - "epoch": 0.7354438913832427, - "grad_norm": 0.37109375, - "learning_rate": 3.969282525783653e-05, - "loss": 0.9376, - "step": 25635 - }, - { - "epoch": 0.7355873366517005, - "grad_norm": 0.384765625, - "learning_rate": 3.965289008080438e-05, - "loss": 0.8752, - "step": 25640 - }, - { - "epoch": 0.7357307819201584, - "grad_norm": 0.408203125, - "learning_rate": 3.961297003475665e-05, - "loss": 1.1107, - "step": 25645 - }, - { - "epoch": 0.7358742271886162, - "grad_norm": 0.337890625, - "learning_rate": 3.957306512970258e-05, - "loss": 0.8375, - "step": 25650 - }, - { - "epoch": 0.7360176724570741, - "grad_norm": 0.365234375, - "learning_rate": 3.953317537564763e-05, - "loss": 1.0015, - "step": 25655 - }, - { - "epoch": 0.7361611177255318, - "grad_norm": 0.443359375, - "learning_rate": 3.9493300782593415e-05, - "loss": 1.0435, - "step": 25660 - }, - { - "epoch": 0.7363045629939896, - "grad_norm": 0.380859375, - "learning_rate": 3.9453441360537876e-05, - "loss": 1.0061, - "step": 25665 - }, - { - "epoch": 0.7364480082624475, - "grad_norm": 0.36328125, - "learning_rate": 3.9413597119475044e-05, - "loss": 1.0076, - "step": 25670 - }, - { - "epoch": 0.7365914535309053, - "grad_norm": 0.396484375, - "learning_rate": 3.937376806939513e-05, - "loss": 0.974, - "step": 25675 - }, - { - "epoch": 0.7367348987993632, - "grad_norm": 0.3828125, - "learning_rate": 3.9333954220284586e-05, - "loss": 0.9509, - "step": 25680 - }, - { - "epoch": 0.7368783440678209, - "grad_norm": 0.41796875, - "learning_rate": 3.929415558212607e-05, - "loss": 0.9417, - "step": 25685 - }, - { - "epoch": 0.7370217893362787, - "grad_norm": 0.40234375, - "learning_rate": 3.925437216489838e-05, - "loss": 1.0143, - "step": 25690 - }, - { - "epoch": 0.7371652346047366, - "grad_norm": 0.400390625, - "learning_rate": 3.9214603978576504e-05, - "loss": 0.9978, - "step": 25695 - }, - { - "epoch": 0.7373086798731944, - "grad_norm": 0.39453125, - "learning_rate": 3.91748510331316e-05, - "loss": 1.0428, - "step": 25700 - }, - { - "epoch": 0.7374521251416521, - "grad_norm": 0.357421875, - "learning_rate": 3.913511333853108e-05, - "loss": 0.9286, - "step": 25705 - }, - { - "epoch": 0.73759557041011, - "grad_norm": 0.365234375, - "learning_rate": 3.909539090473845e-05, - "loss": 0.9274, - "step": 25710 - }, - { - "epoch": 0.7377390156785678, - "grad_norm": 0.439453125, - "learning_rate": 3.905568374171338e-05, - "loss": 1.0798, - "step": 25715 - }, - { - "epoch": 0.7378824609470257, - "grad_norm": 0.375, - "learning_rate": 3.9015991859411815e-05, - "loss": 0.9454, - "step": 25720 - }, - { - "epoch": 0.7380259062154835, - "grad_norm": 0.3515625, - "learning_rate": 3.897631526778577e-05, - "loss": 0.9699, - "step": 25725 - }, - { - "epoch": 0.7381693514839412, - "grad_norm": 0.369140625, - "learning_rate": 3.8936653976783454e-05, - "loss": 0.9339, - "step": 25730 - }, - { - "epoch": 0.7383127967523991, - "grad_norm": 0.3671875, - "learning_rate": 3.889700799634921e-05, - "loss": 0.9837, - "step": 25735 - }, - { - "epoch": 0.7384562420208569, - "grad_norm": 0.41015625, - "learning_rate": 3.885737733642366e-05, - "loss": 1.0106, - "step": 25740 - }, - { - "epoch": 0.7385996872893148, - "grad_norm": 0.4375, - "learning_rate": 3.881776200694345e-05, - "loss": 0.9562, - "step": 25745 - }, - { - "epoch": 0.7387431325577726, - "grad_norm": 0.38671875, - "learning_rate": 3.877816201784139e-05, - "loss": 0.8754, - "step": 25750 - }, - { - "epoch": 0.7388865778262304, - "grad_norm": 0.375, - "learning_rate": 3.873857737904657e-05, - "loss": 0.8727, - "step": 25755 - }, - { - "epoch": 0.7390300230946882, - "grad_norm": 0.3828125, - "learning_rate": 3.86990081004841e-05, - "loss": 1.0075, - "step": 25760 - }, - { - "epoch": 0.739173468363146, - "grad_norm": 0.359375, - "learning_rate": 3.8659454192075304e-05, - "loss": 0.8865, - "step": 25765 - }, - { - "epoch": 0.7393169136316039, - "grad_norm": 0.3984375, - "learning_rate": 3.861991566373759e-05, - "loss": 0.9711, - "step": 25770 - }, - { - "epoch": 0.7394603589000617, - "grad_norm": 0.380859375, - "learning_rate": 3.8580392525384626e-05, - "loss": 0.8958, - "step": 25775 - }, - { - "epoch": 0.7396038041685195, - "grad_norm": 0.35546875, - "learning_rate": 3.85408847869261e-05, - "loss": 0.8833, - "step": 25780 - }, - { - "epoch": 0.7397472494369773, - "grad_norm": 0.380859375, - "learning_rate": 3.8501392458267906e-05, - "loss": 0.9094, - "step": 25785 - }, - { - "epoch": 0.7398906947054351, - "grad_norm": 0.392578125, - "learning_rate": 3.846191554931201e-05, - "loss": 0.9653, - "step": 25790 - }, - { - "epoch": 0.740034139973893, - "grad_norm": 0.369140625, - "learning_rate": 3.842245406995663e-05, - "loss": 0.8931, - "step": 25795 - }, - { - "epoch": 0.7401775852423508, - "grad_norm": 0.388671875, - "learning_rate": 3.838300803009601e-05, - "loss": 1.0017, - "step": 25800 - }, - { - "epoch": 0.7403210305108086, - "grad_norm": 0.37890625, - "learning_rate": 3.834357743962053e-05, - "loss": 1.0349, - "step": 25805 - }, - { - "epoch": 0.7404644757792664, - "grad_norm": 0.400390625, - "learning_rate": 3.8304162308416766e-05, - "loss": 0.9306, - "step": 25810 - }, - { - "epoch": 0.7406079210477242, - "grad_norm": 0.365234375, - "learning_rate": 3.826476264636737e-05, - "loss": 0.937, - "step": 25815 - }, - { - "epoch": 0.7407513663161821, - "grad_norm": 0.404296875, - "learning_rate": 3.822537846335109e-05, - "loss": 1.028, - "step": 25820 - }, - { - "epoch": 0.7408948115846399, - "grad_norm": 0.427734375, - "learning_rate": 3.8186009769242845e-05, - "loss": 1.0125, - "step": 25825 - }, - { - "epoch": 0.7410382568530977, - "grad_norm": 0.3828125, - "learning_rate": 3.814665657391365e-05, - "loss": 0.8979, - "step": 25830 - }, - { - "epoch": 0.7411817021215555, - "grad_norm": 0.396484375, - "learning_rate": 3.810731888723058e-05, - "loss": 0.9194, - "step": 25835 - }, - { - "epoch": 0.7413251473900133, - "grad_norm": 0.408203125, - "learning_rate": 3.806799671905695e-05, - "loss": 0.9074, - "step": 25840 - }, - { - "epoch": 0.7414685926584712, - "grad_norm": 0.341796875, - "learning_rate": 3.8028690079252085e-05, - "loss": 0.9294, - "step": 25845 - }, - { - "epoch": 0.741612037926929, - "grad_norm": 0.390625, - "learning_rate": 3.798939897767141e-05, - "loss": 0.8905, - "step": 25850 - }, - { - "epoch": 0.7417554831953868, - "grad_norm": 0.37109375, - "learning_rate": 3.7950123424166504e-05, - "loss": 0.8846, - "step": 25855 - }, - { - "epoch": 0.7418989284638446, - "grad_norm": 0.396484375, - "learning_rate": 3.7910863428584985e-05, - "loss": 0.8993, - "step": 25860 - }, - { - "epoch": 0.7420423737323024, - "grad_norm": 0.328125, - "learning_rate": 3.7871619000770695e-05, - "loss": 0.895, - "step": 25865 - }, - { - "epoch": 0.7421858190007603, - "grad_norm": 0.404296875, - "learning_rate": 3.783239015056343e-05, - "loss": 0.9212, - "step": 25870 - }, - { - "epoch": 0.7423292642692181, - "grad_norm": 0.37109375, - "learning_rate": 3.779317688779912e-05, - "loss": 1.0242, - "step": 25875 - }, - { - "epoch": 0.7424727095376759, - "grad_norm": 0.416015625, - "learning_rate": 3.7753979222309876e-05, - "loss": 0.9271, - "step": 25880 - }, - { - "epoch": 0.7426161548061337, - "grad_norm": 0.412109375, - "learning_rate": 3.77147971639238e-05, - "loss": 0.9337, - "step": 25885 - }, - { - "epoch": 0.7427596000745915, - "grad_norm": 0.3828125, - "learning_rate": 3.767563072246508e-05, - "loss": 0.9168, - "step": 25890 - }, - { - "epoch": 0.7429030453430494, - "grad_norm": 0.38671875, - "learning_rate": 3.7636479907754016e-05, - "loss": 0.8824, - "step": 25895 - }, - { - "epoch": 0.7430464906115072, - "grad_norm": 0.470703125, - "learning_rate": 3.7597344729607056e-05, - "loss": 0.8806, - "step": 25900 - }, - { - "epoch": 0.743189935879965, - "grad_norm": 0.375, - "learning_rate": 3.7558225197836604e-05, - "loss": 0.9222, - "step": 25905 - }, - { - "epoch": 0.7433333811484228, - "grad_norm": 0.396484375, - "learning_rate": 3.751912132225118e-05, - "loss": 0.9449, - "step": 25910 - }, - { - "epoch": 0.7434768264168806, - "grad_norm": 0.416015625, - "learning_rate": 3.7480033112655474e-05, - "loss": 0.9503, - "step": 25915 - }, - { - "epoch": 0.7436202716853385, - "grad_norm": 0.359375, - "learning_rate": 3.744096057885014e-05, - "loss": 0.9751, - "step": 25920 - }, - { - "epoch": 0.7437637169537963, - "grad_norm": 0.412109375, - "learning_rate": 3.740190373063192e-05, - "loss": 0.9552, - "step": 25925 - }, - { - "epoch": 0.7439071622222541, - "grad_norm": 0.423828125, - "learning_rate": 3.73628625777936e-05, - "loss": 1.0264, - "step": 25930 - }, - { - "epoch": 0.744050607490712, - "grad_norm": 0.34375, - "learning_rate": 3.7323837130124154e-05, - "loss": 0.9172, - "step": 25935 - }, - { - "epoch": 0.7441940527591697, - "grad_norm": 0.400390625, - "learning_rate": 3.7284827397408485e-05, - "loss": 1.0237, - "step": 25940 - }, - { - "epoch": 0.7443374980276276, - "grad_norm": 0.380859375, - "learning_rate": 3.72458333894276e-05, - "loss": 0.9164, - "step": 25945 - }, - { - "epoch": 0.7444809432960854, - "grad_norm": 0.41015625, - "learning_rate": 3.720685511595855e-05, - "loss": 0.9707, - "step": 25950 - }, - { - "epoch": 0.7446243885645432, - "grad_norm": 0.3828125, - "learning_rate": 3.71678925867745e-05, - "loss": 0.9617, - "step": 25955 - }, - { - "epoch": 0.744767833833001, - "grad_norm": 0.470703125, - "learning_rate": 3.712894581164461e-05, - "loss": 0.9467, - "step": 25960 - }, - { - "epoch": 0.7449112791014588, - "grad_norm": 0.3984375, - "learning_rate": 3.709001480033405e-05, - "loss": 0.8993, - "step": 25965 - }, - { - "epoch": 0.7450547243699167, - "grad_norm": 0.369140625, - "learning_rate": 3.705109956260419e-05, - "loss": 0.9226, - "step": 25970 - }, - { - "epoch": 0.7451981696383745, - "grad_norm": 0.361328125, - "learning_rate": 3.701220010821228e-05, - "loss": 1.0032, - "step": 25975 - }, - { - "epoch": 0.7453416149068323, - "grad_norm": 0.38671875, - "learning_rate": 3.69733164469117e-05, - "loss": 0.9746, - "step": 25980 - }, - { - "epoch": 0.7454850601752901, - "grad_norm": 0.365234375, - "learning_rate": 3.693444858845181e-05, - "loss": 0.8695, - "step": 25985 - }, - { - "epoch": 0.7456285054437479, - "grad_norm": 0.396484375, - "learning_rate": 3.68955965425781e-05, - "loss": 0.9995, - "step": 25990 - }, - { - "epoch": 0.7457719507122058, - "grad_norm": 0.36328125, - "learning_rate": 3.685676031903203e-05, - "loss": 0.9223, - "step": 25995 - }, - { - "epoch": 0.7459153959806636, - "grad_norm": 0.37109375, - "learning_rate": 3.6817939927551105e-05, - "loss": 0.9353, - "step": 26000 - }, - { - "epoch": 0.7460588412491214, - "grad_norm": 0.390625, - "learning_rate": 3.6779135377868856e-05, - "loss": 0.9503, - "step": 26005 - }, - { - "epoch": 0.7462022865175792, - "grad_norm": 0.41015625, - "learning_rate": 3.67403466797148e-05, - "loss": 0.9254, - "step": 26010 - }, - { - "epoch": 0.746345731786037, - "grad_norm": 0.34765625, - "learning_rate": 3.6701573842814595e-05, - "loss": 0.9389, - "step": 26015 - }, - { - "epoch": 0.7464891770544949, - "grad_norm": 0.40625, - "learning_rate": 3.6662816876889837e-05, - "loss": 0.8688, - "step": 26020 - }, - { - "epoch": 0.7466326223229527, - "grad_norm": 0.396484375, - "learning_rate": 3.6624075791658154e-05, - "loss": 0.8582, - "step": 26025 - }, - { - "epoch": 0.7467760675914105, - "grad_norm": 0.384765625, - "learning_rate": 3.658535059683318e-05, - "loss": 0.9986, - "step": 26030 - }, - { - "epoch": 0.7469195128598684, - "grad_norm": 0.408203125, - "learning_rate": 3.6546641302124576e-05, - "loss": 0.9018, - "step": 26035 - }, - { - "epoch": 0.7470629581283261, - "grad_norm": 0.361328125, - "learning_rate": 3.650794791723805e-05, - "loss": 0.8955, - "step": 26040 - }, - { - "epoch": 0.7472064033967839, - "grad_norm": 0.396484375, - "learning_rate": 3.6469270451875304e-05, - "loss": 1.005, - "step": 26045 - }, - { - "epoch": 0.7473498486652418, - "grad_norm": 0.369140625, - "learning_rate": 3.6430608915734e-05, - "loss": 0.9696, - "step": 26050 - }, - { - "epoch": 0.7474932939336996, - "grad_norm": 0.3515625, - "learning_rate": 3.639196331850784e-05, - "loss": 0.9526, - "step": 26055 - }, - { - "epoch": 0.7476367392021575, - "grad_norm": 0.35546875, - "learning_rate": 3.635333366988657e-05, - "loss": 0.9881, - "step": 26060 - }, - { - "epoch": 0.7477801844706152, - "grad_norm": 0.419921875, - "learning_rate": 3.631471997955589e-05, - "loss": 0.9935, - "step": 26065 - }, - { - "epoch": 0.747923629739073, - "grad_norm": 0.390625, - "learning_rate": 3.6276122257197465e-05, - "loss": 0.8939, - "step": 26070 - }, - { - "epoch": 0.7480670750075309, - "grad_norm": 0.384765625, - "learning_rate": 3.623754051248907e-05, - "loss": 0.9256, - "step": 26075 - }, - { - "epoch": 0.7482105202759887, - "grad_norm": 0.388671875, - "learning_rate": 3.6198974755104366e-05, - "loss": 0.8541, - "step": 26080 - }, - { - "epoch": 0.7483539655444466, - "grad_norm": 0.388671875, - "learning_rate": 3.6160424994713046e-05, - "loss": 0.9201, - "step": 26085 - }, - { - "epoch": 0.7484974108129043, - "grad_norm": 0.365234375, - "learning_rate": 3.6121891240980764e-05, - "loss": 1.0181, - "step": 26090 - }, - { - "epoch": 0.7486408560813621, - "grad_norm": 0.390625, - "learning_rate": 3.608337350356925e-05, - "loss": 0.945, - "step": 26095 - }, - { - "epoch": 0.74878430134982, - "grad_norm": 0.4453125, - "learning_rate": 3.604487179213612e-05, - "loss": 0.9745, - "step": 26100 - }, - { - "epoch": 0.7489277466182778, - "grad_norm": 0.40625, - "learning_rate": 3.600638611633499e-05, - "loss": 1.0072, - "step": 26105 - }, - { - "epoch": 0.7490711918867357, - "grad_norm": 0.40234375, - "learning_rate": 3.596791648581546e-05, - "loss": 0.9473, - "step": 26110 - }, - { - "epoch": 0.7492146371551934, - "grad_norm": 0.341796875, - "learning_rate": 3.592946291022317e-05, - "loss": 0.9678, - "step": 26115 - }, - { - "epoch": 0.7493580824236512, - "grad_norm": 0.390625, - "learning_rate": 3.589102539919965e-05, - "loss": 1.017, - "step": 26120 - }, - { - "epoch": 0.7495015276921091, - "grad_norm": 0.390625, - "learning_rate": 3.5852603962382414e-05, - "loss": 1.0097, - "step": 26125 - }, - { - "epoch": 0.7496449729605669, - "grad_norm": 0.40234375, - "learning_rate": 3.5814198609405024e-05, - "loss": 0.9596, - "step": 26130 - }, - { - "epoch": 0.7497884182290248, - "grad_norm": 0.357421875, - "learning_rate": 3.5775809349896925e-05, - "loss": 0.9361, - "step": 26135 - }, - { - "epoch": 0.7499318634974825, - "grad_norm": 0.3828125, - "learning_rate": 3.5737436193483555e-05, - "loss": 0.9098, - "step": 26140 - }, - { - "epoch": 0.7500753087659403, - "grad_norm": 0.396484375, - "learning_rate": 3.569907914978627e-05, - "loss": 1.0024, - "step": 26145 - }, - { - "epoch": 0.7502187540343982, - "grad_norm": 0.375, - "learning_rate": 3.56607382284225e-05, - "loss": 0.9148, - "step": 26150 - }, - { - "epoch": 0.750362199302856, - "grad_norm": 0.38671875, - "learning_rate": 3.562241343900553e-05, - "loss": 0.941, - "step": 26155 - }, - { - "epoch": 0.7505056445713139, - "grad_norm": 0.408203125, - "learning_rate": 3.5584104791144603e-05, - "loss": 0.9306, - "step": 26160 - }, - { - "epoch": 0.7506490898397716, - "grad_norm": 0.35546875, - "learning_rate": 3.554581229444499e-05, - "loss": 0.8562, - "step": 26165 - }, - { - "epoch": 0.7507925351082294, - "grad_norm": 0.34765625, - "learning_rate": 3.5507535958507864e-05, - "loss": 0.8793, - "step": 26170 - }, - { - "epoch": 0.7509359803766873, - "grad_norm": 0.404296875, - "learning_rate": 3.5469275792930325e-05, - "loss": 0.9227, - "step": 26175 - }, - { - "epoch": 0.7510794256451451, - "grad_norm": 0.392578125, - "learning_rate": 3.543103180730541e-05, - "loss": 1.0238, - "step": 26180 - }, - { - "epoch": 0.751222870913603, - "grad_norm": 0.365234375, - "learning_rate": 3.539280401122221e-05, - "loss": 0.9357, - "step": 26185 - }, - { - "epoch": 0.7513663161820607, - "grad_norm": 0.3828125, - "learning_rate": 3.535459241426563e-05, - "loss": 0.8443, - "step": 26190 - }, - { - "epoch": 0.7515097614505185, - "grad_norm": 0.3828125, - "learning_rate": 3.531639702601658e-05, - "loss": 0.8981, - "step": 26195 - }, - { - "epoch": 0.7516532067189764, - "grad_norm": 0.408203125, - "learning_rate": 3.5278217856051866e-05, - "loss": 0.8938, - "step": 26200 - }, - { - "epoch": 0.7517966519874342, - "grad_norm": 0.376953125, - "learning_rate": 3.5240054913944256e-05, - "loss": 0.9663, - "step": 26205 - }, - { - "epoch": 0.7519400972558921, - "grad_norm": 0.376953125, - "learning_rate": 3.5201908209262445e-05, - "loss": 0.9803, - "step": 26210 - }, - { - "epoch": 0.7520835425243498, - "grad_norm": 0.390625, - "learning_rate": 3.5163777751571013e-05, - "loss": 0.9789, - "step": 26215 - }, - { - "epoch": 0.7522269877928076, - "grad_norm": 0.3828125, - "learning_rate": 3.5125663550430585e-05, - "loss": 0.8982, - "step": 26220 - }, - { - "epoch": 0.7523704330612655, - "grad_norm": 0.37109375, - "learning_rate": 3.508756561539759e-05, - "loss": 0.8797, - "step": 26225 - }, - { - "epoch": 0.7525138783297233, - "grad_norm": 0.345703125, - "learning_rate": 3.504948395602442e-05, - "loss": 0.874, - "step": 26230 - }, - { - "epoch": 0.7526573235981812, - "grad_norm": 0.375, - "learning_rate": 3.501141858185935e-05, - "loss": 0.8978, - "step": 26235 - }, - { - "epoch": 0.7528007688666389, - "grad_norm": 0.390625, - "learning_rate": 3.4973369502446685e-05, - "loss": 0.9691, - "step": 26240 - }, - { - "epoch": 0.7529442141350967, - "grad_norm": 0.3671875, - "learning_rate": 3.493533672732653e-05, - "loss": 0.9272, - "step": 26245 - }, - { - "epoch": 0.7530876594035546, - "grad_norm": 0.44921875, - "learning_rate": 3.4897320266034905e-05, - "loss": 0.9875, - "step": 26250 - }, - { - "epoch": 0.7532311046720124, - "grad_norm": 0.357421875, - "learning_rate": 3.4859320128103835e-05, - "loss": 0.8967, - "step": 26255 - }, - { - "epoch": 0.7533745499404703, - "grad_norm": 0.36328125, - "learning_rate": 3.482133632306117e-05, - "loss": 0.9111, - "step": 26260 - }, - { - "epoch": 0.753517995208928, - "grad_norm": 0.404296875, - "learning_rate": 3.478336886043067e-05, - "loss": 0.9612, - "step": 26265 - }, - { - "epoch": 0.7536614404773858, - "grad_norm": 0.375, - "learning_rate": 3.4745417749732003e-05, - "loss": 0.9565, - "step": 26270 - }, - { - "epoch": 0.7538048857458437, - "grad_norm": 0.380859375, - "learning_rate": 3.470748300048079e-05, - "loss": 0.9639, - "step": 26275 - }, - { - "epoch": 0.7539483310143015, - "grad_norm": 0.359375, - "learning_rate": 3.466956462218849e-05, - "loss": 0.8917, - "step": 26280 - }, - { - "epoch": 0.7540917762827594, - "grad_norm": 0.3671875, - "learning_rate": 3.4631662624362424e-05, - "loss": 0.9339, - "step": 26285 - }, - { - "epoch": 0.7542352215512171, - "grad_norm": 0.380859375, - "learning_rate": 3.4593777016505946e-05, - "loss": 0.938, - "step": 26290 - }, - { - "epoch": 0.7543786668196749, - "grad_norm": 0.376953125, - "learning_rate": 3.455590780811817e-05, - "loss": 0.946, - "step": 26295 - }, - { - "epoch": 0.7545221120881328, - "grad_norm": 0.375, - "learning_rate": 3.451805500869413e-05, - "loss": 1.0001, - "step": 26300 - }, - { - "epoch": 0.7546655573565906, - "grad_norm": 0.39453125, - "learning_rate": 3.4480218627724725e-05, - "loss": 0.9731, - "step": 26305 - }, - { - "epoch": 0.7548090026250485, - "grad_norm": 0.359375, - "learning_rate": 3.444239867469683e-05, - "loss": 0.9871, - "step": 26310 - }, - { - "epoch": 0.7549524478935062, - "grad_norm": 0.4140625, - "learning_rate": 3.440459515909312e-05, - "loss": 0.972, - "step": 26315 - }, - { - "epoch": 0.755095893161964, - "grad_norm": 0.400390625, - "learning_rate": 3.4366808090392123e-05, - "loss": 0.8904, - "step": 26320 - }, - { - "epoch": 0.7552393384304219, - "grad_norm": 0.380859375, - "learning_rate": 3.432903747806835e-05, - "loss": 0.8743, - "step": 26325 - }, - { - "epoch": 0.7553827836988797, - "grad_norm": 0.365234375, - "learning_rate": 3.429128333159208e-05, - "loss": 0.8993, - "step": 26330 - }, - { - "epoch": 0.7555262289673376, - "grad_norm": 0.42578125, - "learning_rate": 3.4253545660429546e-05, - "loss": 1.0151, - "step": 26335 - }, - { - "epoch": 0.7556696742357953, - "grad_norm": 0.376953125, - "learning_rate": 3.421582447404273e-05, - "loss": 0.9441, - "step": 26340 - }, - { - "epoch": 0.7558131195042531, - "grad_norm": 0.35546875, - "learning_rate": 3.417811978188966e-05, - "loss": 0.9312, - "step": 26345 - }, - { - "epoch": 0.755956564772711, - "grad_norm": 0.408203125, - "learning_rate": 3.414043159342408e-05, - "loss": 0.9881, - "step": 26350 - }, - { - "epoch": 0.7561000100411688, - "grad_norm": 0.388671875, - "learning_rate": 3.410275991809562e-05, - "loss": 0.9598, - "step": 26355 - }, - { - "epoch": 0.7562434553096267, - "grad_norm": 0.3984375, - "learning_rate": 3.406510476534985e-05, - "loss": 0.903, - "step": 26360 - }, - { - "epoch": 0.7563869005780844, - "grad_norm": 0.375, - "learning_rate": 3.402746614462812e-05, - "loss": 0.9803, - "step": 26365 - }, - { - "epoch": 0.7565303458465422, - "grad_norm": 0.40234375, - "learning_rate": 3.398984406536765e-05, - "loss": 0.8223, - "step": 26370 - }, - { - "epoch": 0.7566737911150001, - "grad_norm": 0.39453125, - "learning_rate": 3.3952238537001535e-05, - "loss": 0.936, - "step": 26375 - }, - { - "epoch": 0.7568172363834579, - "grad_norm": 0.396484375, - "learning_rate": 3.391464956895869e-05, - "loss": 0.8727, - "step": 26380 - }, - { - "epoch": 0.7569606816519157, - "grad_norm": 0.3984375, - "learning_rate": 3.387707717066388e-05, - "loss": 0.9359, - "step": 26385 - }, - { - "epoch": 0.7571041269203735, - "grad_norm": 0.4140625, - "learning_rate": 3.3839521351537726e-05, - "loss": 1.0276, - "step": 26390 - }, - { - "epoch": 0.7572475721888313, - "grad_norm": 0.384765625, - "learning_rate": 3.380198212099674e-05, - "loss": 0.914, - "step": 26395 - }, - { - "epoch": 0.7573910174572892, - "grad_norm": 0.392578125, - "learning_rate": 3.376445948845322e-05, - "loss": 0.9674, - "step": 26400 - }, - { - "epoch": 0.757534462725747, - "grad_norm": 0.3828125, - "learning_rate": 3.372695346331527e-05, - "loss": 0.9247, - "step": 26405 - }, - { - "epoch": 0.7576779079942048, - "grad_norm": 0.412109375, - "learning_rate": 3.368946405498686e-05, - "loss": 0.9439, - "step": 26410 - }, - { - "epoch": 0.7578213532626626, - "grad_norm": 0.38671875, - "learning_rate": 3.365199127286788e-05, - "loss": 0.9437, - "step": 26415 - }, - { - "epoch": 0.7579647985311204, - "grad_norm": 0.3984375, - "learning_rate": 3.361453512635393e-05, - "loss": 0.901, - "step": 26420 - }, - { - "epoch": 0.7581082437995783, - "grad_norm": 0.41796875, - "learning_rate": 3.3577095624836493e-05, - "loss": 0.9998, - "step": 26425 - }, - { - "epoch": 0.7582516890680361, - "grad_norm": 0.37890625, - "learning_rate": 3.353967277770282e-05, - "loss": 0.9547, - "step": 26430 - }, - { - "epoch": 0.7583951343364939, - "grad_norm": 0.357421875, - "learning_rate": 3.350226659433613e-05, - "loss": 1.0041, - "step": 26435 - }, - { - "epoch": 0.7585385796049517, - "grad_norm": 0.376953125, - "learning_rate": 3.346487708411532e-05, - "loss": 0.8525, - "step": 26440 - }, - { - "epoch": 0.7586820248734095, - "grad_norm": 0.3515625, - "learning_rate": 3.342750425641513e-05, - "loss": 1.0674, - "step": 26445 - }, - { - "epoch": 0.7588254701418674, - "grad_norm": 0.365234375, - "learning_rate": 3.3390148120606204e-05, - "loss": 0.9019, - "step": 26450 - }, - { - "epoch": 0.7589689154103252, - "grad_norm": 0.375, - "learning_rate": 3.335280868605492e-05, - "loss": 0.8901, - "step": 26455 - }, - { - "epoch": 0.759112360678783, - "grad_norm": 0.400390625, - "learning_rate": 3.331548596212347e-05, - "loss": 0.9386, - "step": 26460 - }, - { - "epoch": 0.7592558059472408, - "grad_norm": 0.361328125, - "learning_rate": 3.3278179958169855e-05, - "loss": 0.9271, - "step": 26465 - }, - { - "epoch": 0.7593992512156986, - "grad_norm": 0.3828125, - "learning_rate": 3.324089068354797e-05, - "loss": 0.9389, - "step": 26470 - }, - { - "epoch": 0.7595426964841565, - "grad_norm": 0.369140625, - "learning_rate": 3.3203618147607415e-05, - "loss": 0.9232, - "step": 26475 - }, - { - "epoch": 0.7596861417526143, - "grad_norm": 0.3828125, - "learning_rate": 3.3166362359693596e-05, - "loss": 1.0113, - "step": 26480 - }, - { - "epoch": 0.7598295870210721, - "grad_norm": 0.42578125, - "learning_rate": 3.312912332914782e-05, - "loss": 0.9473, - "step": 26485 - }, - { - "epoch": 0.75997303228953, - "grad_norm": 0.373046875, - "learning_rate": 3.3091901065307084e-05, - "loss": 0.8714, - "step": 26490 - }, - { - "epoch": 0.7601164775579877, - "grad_norm": 0.373046875, - "learning_rate": 3.3054695577504224e-05, - "loss": 0.9525, - "step": 26495 - }, - { - "epoch": 0.7602599228264456, - "grad_norm": 0.365234375, - "learning_rate": 3.301750687506784e-05, - "loss": 0.9491, - "step": 26500 - }, - { - "epoch": 0.7604033680949034, - "grad_norm": 0.3828125, - "learning_rate": 3.29803349673224e-05, - "loss": 0.8599, - "step": 26505 - }, - { - "epoch": 0.7605468133633612, - "grad_norm": 0.380859375, - "learning_rate": 3.29431798635881e-05, - "loss": 0.988, - "step": 26510 - }, - { - "epoch": 0.760690258631819, - "grad_norm": 0.37890625, - "learning_rate": 3.290604157318088e-05, - "loss": 1.0127, - "step": 26515 - }, - { - "epoch": 0.7608337039002768, - "grad_norm": 0.439453125, - "learning_rate": 3.2868920105412594e-05, - "loss": 0.9223, - "step": 26520 - }, - { - "epoch": 0.7609771491687347, - "grad_norm": 0.369140625, - "learning_rate": 3.283181546959078e-05, - "loss": 0.8925, - "step": 26525 - }, - { - "epoch": 0.7611205944371925, - "grad_norm": 0.380859375, - "learning_rate": 3.279472767501876e-05, - "loss": 0.906, - "step": 26530 - }, - { - "epoch": 0.7612640397056503, - "grad_norm": 0.3984375, - "learning_rate": 3.275765673099561e-05, - "loss": 0.9289, - "step": 26535 - }, - { - "epoch": 0.7614074849741082, - "grad_norm": 0.41015625, - "learning_rate": 3.272060264681631e-05, - "loss": 0.9181, - "step": 26540 - }, - { - "epoch": 0.7615509302425659, - "grad_norm": 0.388671875, - "learning_rate": 3.268356543177149e-05, - "loss": 0.8995, - "step": 26545 - }, - { - "epoch": 0.7616943755110238, - "grad_norm": 0.36328125, - "learning_rate": 3.264654509514757e-05, - "loss": 0.91, - "step": 26550 - }, - { - "epoch": 0.7618378207794816, - "grad_norm": 0.3984375, - "learning_rate": 3.260954164622677e-05, - "loss": 0.9138, - "step": 26555 - }, - { - "epoch": 0.7619812660479394, - "grad_norm": 0.361328125, - "learning_rate": 3.257255509428705e-05, - "loss": 0.9056, - "step": 26560 - }, - { - "epoch": 0.7621247113163973, - "grad_norm": 0.359375, - "learning_rate": 3.25355854486021e-05, - "loss": 0.8941, - "step": 26565 - }, - { - "epoch": 0.762268156584855, - "grad_norm": 0.369140625, - "learning_rate": 3.24986327184415e-05, - "loss": 0.9396, - "step": 26570 - }, - { - "epoch": 0.7624116018533129, - "grad_norm": 0.376953125, - "learning_rate": 3.246169691307046e-05, - "loss": 0.8856, - "step": 26575 - }, - { - "epoch": 0.7625550471217707, - "grad_norm": 0.396484375, - "learning_rate": 3.2424778041749984e-05, - "loss": 0.9865, - "step": 26580 - }, - { - "epoch": 0.7626984923902285, - "grad_norm": 0.427734375, - "learning_rate": 3.2387876113736836e-05, - "loss": 0.9566, - "step": 26585 - }, - { - "epoch": 0.7628419376586864, - "grad_norm": 0.375, - "learning_rate": 3.235099113828351e-05, - "loss": 0.9057, - "step": 26590 - }, - { - "epoch": 0.7629853829271441, - "grad_norm": 0.3671875, - "learning_rate": 3.231412312463831e-05, - "loss": 0.8557, - "step": 26595 - }, - { - "epoch": 0.763128828195602, - "grad_norm": 0.390625, - "learning_rate": 3.227727208204523e-05, - "loss": 0.8883, - "step": 26600 - }, - { - "epoch": 0.7632722734640598, - "grad_norm": 0.41796875, - "learning_rate": 3.2240438019743994e-05, - "loss": 1.0393, - "step": 26605 - }, - { - "epoch": 0.7634157187325176, - "grad_norm": 0.369140625, - "learning_rate": 3.2203620946970156e-05, - "loss": 0.8865, - "step": 26610 - }, - { - "epoch": 0.7635591640009755, - "grad_norm": 0.384765625, - "learning_rate": 3.216682087295494e-05, - "loss": 0.9913, - "step": 26615 - }, - { - "epoch": 0.7637026092694332, - "grad_norm": 0.40625, - "learning_rate": 3.213003780692531e-05, - "loss": 0.9086, - "step": 26620 - }, - { - "epoch": 0.7638460545378911, - "grad_norm": 0.359375, - "learning_rate": 3.209327175810395e-05, - "loss": 0.9713, - "step": 26625 - }, - { - "epoch": 0.7639894998063489, - "grad_norm": 0.40234375, - "learning_rate": 3.2056522735709346e-05, - "loss": 1.0056, - "step": 26630 - }, - { - "epoch": 0.7641329450748067, - "grad_norm": 0.38671875, - "learning_rate": 3.201979074895568e-05, - "loss": 0.8975, - "step": 26635 - }, - { - "epoch": 0.7642763903432646, - "grad_norm": 0.443359375, - "learning_rate": 3.198307580705281e-05, - "loss": 1.063, - "step": 26640 - }, - { - "epoch": 0.7644198356117223, - "grad_norm": 0.4765625, - "learning_rate": 3.1946377919206416e-05, - "loss": 0.9643, - "step": 26645 - }, - { - "epoch": 0.7645632808801802, - "grad_norm": 0.388671875, - "learning_rate": 3.190969709461783e-05, - "loss": 0.9511, - "step": 26650 - }, - { - "epoch": 0.764706726148638, - "grad_norm": 0.4140625, - "learning_rate": 3.187303334248414e-05, - "loss": 0.916, - "step": 26655 - }, - { - "epoch": 0.7648501714170958, - "grad_norm": 0.3359375, - "learning_rate": 3.183638667199809e-05, - "loss": 0.9184, - "step": 26660 - }, - { - "epoch": 0.7649936166855537, - "grad_norm": 0.40625, - "learning_rate": 3.179975709234827e-05, - "loss": 1.0806, - "step": 26665 - }, - { - "epoch": 0.7651370619540114, - "grad_norm": 0.357421875, - "learning_rate": 3.176314461271887e-05, - "loss": 0.9831, - "step": 26670 - }, - { - "epoch": 0.7652805072224693, - "grad_norm": 0.390625, - "learning_rate": 3.172654924228984e-05, - "loss": 0.9467, - "step": 26675 - }, - { - "epoch": 0.7654239524909271, - "grad_norm": 0.388671875, - "learning_rate": 3.1689970990236784e-05, - "loss": 0.8824, - "step": 26680 - }, - { - "epoch": 0.7655673977593849, - "grad_norm": 0.427734375, - "learning_rate": 3.1653409865731134e-05, - "loss": 0.8469, - "step": 26685 - }, - { - "epoch": 0.7657108430278428, - "grad_norm": 0.361328125, - "learning_rate": 3.1616865877939915e-05, - "loss": 0.9807, - "step": 26690 - }, - { - "epoch": 0.7658542882963005, - "grad_norm": 0.359375, - "learning_rate": 3.158033903602586e-05, - "loss": 0.9628, - "step": 26695 - }, - { - "epoch": 0.7659977335647584, - "grad_norm": 0.37890625, - "learning_rate": 3.1543829349147523e-05, - "loss": 0.8465, - "step": 26700 - }, - { - "epoch": 0.7661411788332162, - "grad_norm": 0.408203125, - "learning_rate": 3.150733682645901e-05, - "loss": 1.0578, - "step": 26705 - }, - { - "epoch": 0.766284624101674, - "grad_norm": 0.376953125, - "learning_rate": 3.147086147711022e-05, - "loss": 0.8574, - "step": 26710 - }, - { - "epoch": 0.7664280693701319, - "grad_norm": 0.392578125, - "learning_rate": 3.143440331024663e-05, - "loss": 0.9724, - "step": 26715 - }, - { - "epoch": 0.7665715146385896, - "grad_norm": 0.40234375, - "learning_rate": 3.139796233500958e-05, - "loss": 0.9392, - "step": 26720 - }, - { - "epoch": 0.7667149599070474, - "grad_norm": 0.390625, - "learning_rate": 3.1361538560535984e-05, - "loss": 0.8621, - "step": 26725 - }, - { - "epoch": 0.7668584051755053, - "grad_norm": 0.392578125, - "learning_rate": 3.132513199595846e-05, - "loss": 1.0773, - "step": 26730 - }, - { - "epoch": 0.7670018504439631, - "grad_norm": 0.5078125, - "learning_rate": 3.12887426504053e-05, - "loss": 0.9605, - "step": 26735 - }, - { - "epoch": 0.767145295712421, - "grad_norm": 0.3828125, - "learning_rate": 3.1252370533000494e-05, - "loss": 0.9123, - "step": 26740 - }, - { - "epoch": 0.7672887409808787, - "grad_norm": 0.392578125, - "learning_rate": 3.1216015652863765e-05, - "loss": 0.9613, - "step": 26745 - }, - { - "epoch": 0.7674321862493365, - "grad_norm": 0.35546875, - "learning_rate": 3.1179678019110434e-05, - "loss": 0.9146, - "step": 26750 - }, - { - "epoch": 0.7675756315177944, - "grad_norm": 0.408203125, - "learning_rate": 3.1143357640851534e-05, - "loss": 1.0046, - "step": 26755 - }, - { - "epoch": 0.7677190767862522, - "grad_norm": 0.388671875, - "learning_rate": 3.110705452719376e-05, - "loss": 0.9653, - "step": 26760 - }, - { - "epoch": 0.7678625220547101, - "grad_norm": 0.37890625, - "learning_rate": 3.107076868723945e-05, - "loss": 0.933, - "step": 26765 - }, - { - "epoch": 0.7680059673231678, - "grad_norm": 0.427734375, - "learning_rate": 3.1034500130086706e-05, - "loss": 0.8857, - "step": 26770 - }, - { - "epoch": 0.7681494125916256, - "grad_norm": 0.392578125, - "learning_rate": 3.099824886482921e-05, - "loss": 0.9178, - "step": 26775 - }, - { - "epoch": 0.7682928578600835, - "grad_norm": 0.373046875, - "learning_rate": 3.096201490055635e-05, - "loss": 0.8959, - "step": 26780 - }, - { - "epoch": 0.7684363031285413, - "grad_norm": 0.373046875, - "learning_rate": 3.09257982463531e-05, - "loss": 0.941, - "step": 26785 - }, - { - "epoch": 0.7685797483969992, - "grad_norm": 0.3828125, - "learning_rate": 3.088959891130022e-05, - "loss": 0.8756, - "step": 26790 - }, - { - "epoch": 0.7687231936654569, - "grad_norm": 0.361328125, - "learning_rate": 3.0853416904474055e-05, - "loss": 0.9942, - "step": 26795 - }, - { - "epoch": 0.7688666389339147, - "grad_norm": 0.375, - "learning_rate": 3.081725223494656e-05, - "loss": 0.8801, - "step": 26800 - }, - { - "epoch": 0.7690100842023726, - "grad_norm": 0.388671875, - "learning_rate": 3.0781104911785465e-05, - "loss": 0.8895, - "step": 26805 - }, - { - "epoch": 0.7691535294708304, - "grad_norm": 0.3671875, - "learning_rate": 3.074497494405404e-05, - "loss": 1.0126, - "step": 26810 - }, - { - "epoch": 0.7692969747392883, - "grad_norm": 0.357421875, - "learning_rate": 3.070886234081125e-05, - "loss": 0.9076, - "step": 26815 - }, - { - "epoch": 0.769440420007746, - "grad_norm": 0.37109375, - "learning_rate": 3.0672767111111666e-05, - "loss": 0.9076, - "step": 26820 - }, - { - "epoch": 0.7695838652762038, - "grad_norm": 0.42578125, - "learning_rate": 3.06366892640056e-05, - "loss": 0.9237, - "step": 26825 - }, - { - "epoch": 0.7697273105446617, - "grad_norm": 0.37109375, - "learning_rate": 3.0600628808538915e-05, - "loss": 0.9731, - "step": 26830 - }, - { - "epoch": 0.7698707558131195, - "grad_norm": 0.392578125, - "learning_rate": 3.056458575375314e-05, - "loss": 0.9597, - "step": 26835 - }, - { - "epoch": 0.7700142010815774, - "grad_norm": 0.3515625, - "learning_rate": 3.05285601086854e-05, - "loss": 0.9124, - "step": 26840 - }, - { - "epoch": 0.7701576463500351, - "grad_norm": 0.361328125, - "learning_rate": 3.049255188236858e-05, - "loss": 0.9402, - "step": 26845 - }, - { - "epoch": 0.7703010916184929, - "grad_norm": 0.345703125, - "learning_rate": 3.045656108383106e-05, - "loss": 1.0135, - "step": 26850 - }, - { - "epoch": 0.7704445368869508, - "grad_norm": 0.38671875, - "learning_rate": 3.0420587722096895e-05, - "loss": 1.0349, - "step": 26855 - }, - { - "epoch": 0.7705879821554086, - "grad_norm": 0.333984375, - "learning_rate": 3.0384631806185815e-05, - "loss": 0.8509, - "step": 26860 - }, - { - "epoch": 0.7707314274238665, - "grad_norm": 0.390625, - "learning_rate": 3.0348693345113133e-05, - "loss": 0.8905, - "step": 26865 - }, - { - "epoch": 0.7708748726923242, - "grad_norm": 0.375, - "learning_rate": 3.0312772347889773e-05, - "loss": 0.9996, - "step": 26870 - }, - { - "epoch": 0.771018317960782, - "grad_norm": 0.375, - "learning_rate": 3.0276868823522274e-05, - "loss": 0.9427, - "step": 26875 - }, - { - "epoch": 0.7711617632292399, - "grad_norm": 0.375, - "learning_rate": 3.0240982781012873e-05, - "loss": 1.0351, - "step": 26880 - }, - { - "epoch": 0.7713052084976977, - "grad_norm": 0.396484375, - "learning_rate": 3.0205114229359356e-05, - "loss": 0.9741, - "step": 26885 - }, - { - "epoch": 0.7714486537661556, - "grad_norm": 0.396484375, - "learning_rate": 3.0169263177555085e-05, - "loss": 1.0344, - "step": 26890 - }, - { - "epoch": 0.7715920990346133, - "grad_norm": 0.388671875, - "learning_rate": 3.013342963458915e-05, - "loss": 0.8712, - "step": 26895 - }, - { - "epoch": 0.7717355443030711, - "grad_norm": 0.390625, - "learning_rate": 3.0097613609446172e-05, - "loss": 0.959, - "step": 26900 - }, - { - "epoch": 0.771878989571529, - "grad_norm": 0.416015625, - "learning_rate": 3.0061815111106374e-05, - "loss": 1.0485, - "step": 26905 - }, - { - "epoch": 0.7720224348399868, - "grad_norm": 0.39453125, - "learning_rate": 3.002603414854559e-05, - "loss": 1.0166, - "step": 26910 - }, - { - "epoch": 0.7721658801084447, - "grad_norm": 0.384765625, - "learning_rate": 2.9990270730735315e-05, - "loss": 0.9768, - "step": 26915 - }, - { - "epoch": 0.7723093253769024, - "grad_norm": 0.396484375, - "learning_rate": 2.9954524866642585e-05, - "loss": 0.9388, - "step": 26920 - }, - { - "epoch": 0.7724527706453602, - "grad_norm": 0.404296875, - "learning_rate": 2.9918796565230057e-05, - "loss": 0.9408, - "step": 26925 - }, - { - "epoch": 0.7725962159138181, - "grad_norm": 0.376953125, - "learning_rate": 2.988308583545596e-05, - "loss": 0.9855, - "step": 26930 - }, - { - "epoch": 0.7727396611822759, - "grad_norm": 0.412109375, - "learning_rate": 2.9847392686274157e-05, - "loss": 0.929, - "step": 26935 - }, - { - "epoch": 0.7728831064507338, - "grad_norm": 0.376953125, - "learning_rate": 2.9811717126634066e-05, - "loss": 1.0165, - "step": 26940 - }, - { - "epoch": 0.7730265517191915, - "grad_norm": 0.373046875, - "learning_rate": 2.9776059165480686e-05, - "loss": 0.8936, - "step": 26945 - }, - { - "epoch": 0.7731699969876493, - "grad_norm": 0.404296875, - "learning_rate": 2.974041881175468e-05, - "loss": 1.0783, - "step": 26950 - }, - { - "epoch": 0.7733134422561072, - "grad_norm": 0.40625, - "learning_rate": 2.9704796074392237e-05, - "loss": 0.9752, - "step": 26955 - }, - { - "epoch": 0.773456887524565, - "grad_norm": 0.3828125, - "learning_rate": 2.9669190962325112e-05, - "loss": 0.9622, - "step": 26960 - }, - { - "epoch": 0.7736003327930229, - "grad_norm": 0.41796875, - "learning_rate": 2.9633603484480655e-05, - "loss": 0.9426, - "step": 26965 - }, - { - "epoch": 0.7737437780614806, - "grad_norm": 0.38671875, - "learning_rate": 2.959803364978184e-05, - "loss": 0.9364, - "step": 26970 - }, - { - "epoch": 0.7738872233299384, - "grad_norm": 0.380859375, - "learning_rate": 2.9562481467147186e-05, - "loss": 0.9234, - "step": 26975 - }, - { - "epoch": 0.7740306685983963, - "grad_norm": 0.359375, - "learning_rate": 2.952694694549073e-05, - "loss": 1.0223, - "step": 26980 - }, - { - "epoch": 0.7741741138668541, - "grad_norm": 0.38671875, - "learning_rate": 2.9491430093722196e-05, - "loss": 0.9025, - "step": 26985 - }, - { - "epoch": 0.774317559135312, - "grad_norm": 0.380859375, - "learning_rate": 2.9455930920746778e-05, - "loss": 0.8677, - "step": 26990 - }, - { - "epoch": 0.7744610044037697, - "grad_norm": 0.419921875, - "learning_rate": 2.942044943546529e-05, - "loss": 1.1045, - "step": 26995 - }, - { - "epoch": 0.7746044496722275, - "grad_norm": 0.408203125, - "learning_rate": 2.9384985646774053e-05, - "loss": 0.9374, - "step": 27000 - }, - { - "epoch": 0.7747478949406854, - "grad_norm": 0.37890625, - "learning_rate": 2.9349539563565054e-05, - "loss": 0.946, - "step": 27005 - }, - { - "epoch": 0.7748913402091432, - "grad_norm": 0.408203125, - "learning_rate": 2.9314111194725757e-05, - "loss": 0.911, - "step": 27010 - }, - { - "epoch": 0.7750347854776011, - "grad_norm": 0.390625, - "learning_rate": 2.9278700549139173e-05, - "loss": 0.9571, - "step": 27015 - }, - { - "epoch": 0.7751782307460588, - "grad_norm": 0.373046875, - "learning_rate": 2.9243307635683957e-05, - "loss": 0.9072, - "step": 27020 - }, - { - "epoch": 0.7753216760145166, - "grad_norm": 0.369140625, - "learning_rate": 2.9207932463234255e-05, - "loss": 0.9412, - "step": 27025 - }, - { - "epoch": 0.7754651212829745, - "grad_norm": 0.376953125, - "learning_rate": 2.9172575040659744e-05, - "loss": 0.9635, - "step": 27030 - }, - { - "epoch": 0.7756085665514323, - "grad_norm": 0.3359375, - "learning_rate": 2.9137235376825678e-05, - "loss": 0.8482, - "step": 27035 - }, - { - "epoch": 0.7757520118198902, - "grad_norm": 0.396484375, - "learning_rate": 2.910191348059289e-05, - "loss": 0.868, - "step": 27040 - }, - { - "epoch": 0.775895457088348, - "grad_norm": 0.353515625, - "learning_rate": 2.906660936081773e-05, - "loss": 0.9198, - "step": 27045 - }, - { - "epoch": 0.7760389023568057, - "grad_norm": 0.37890625, - "learning_rate": 2.9031323026352053e-05, - "loss": 0.9245, - "step": 27050 - }, - { - "epoch": 0.7761823476252636, - "grad_norm": 0.369140625, - "learning_rate": 2.8996054486043334e-05, - "loss": 1.0111, - "step": 27055 - }, - { - "epoch": 0.7763257928937214, - "grad_norm": 0.40625, - "learning_rate": 2.8960803748734534e-05, - "loss": 0.9192, - "step": 27060 - }, - { - "epoch": 0.7764692381621792, - "grad_norm": 0.4140625, - "learning_rate": 2.8925570823264147e-05, - "loss": 0.9014, - "step": 27065 - }, - { - "epoch": 0.776612683430637, - "grad_norm": 0.40625, - "learning_rate": 2.8890355718466177e-05, - "loss": 0.899, - "step": 27070 - }, - { - "epoch": 0.7767561286990948, - "grad_norm": 0.365234375, - "learning_rate": 2.8855158443170284e-05, - "loss": 0.9878, - "step": 27075 - }, - { - "epoch": 0.7768995739675527, - "grad_norm": 0.388671875, - "learning_rate": 2.8819979006201526e-05, - "loss": 0.9737, - "step": 27080 - }, - { - "epoch": 0.7770430192360105, - "grad_norm": 0.384765625, - "learning_rate": 2.8784817416380484e-05, - "loss": 1.0483, - "step": 27085 - }, - { - "epoch": 0.7771864645044683, - "grad_norm": 0.380859375, - "learning_rate": 2.8749673682523404e-05, - "loss": 0.8767, - "step": 27090 - }, - { - "epoch": 0.7773299097729262, - "grad_norm": 0.43359375, - "learning_rate": 2.8714547813441917e-05, - "loss": 0.9063, - "step": 27095 - }, - { - "epoch": 0.7774733550413839, - "grad_norm": 0.41015625, - "learning_rate": 2.8679439817943232e-05, - "loss": 0.9028, - "step": 27100 - }, - { - "epoch": 0.7776168003098418, - "grad_norm": 0.380859375, - "learning_rate": 2.864434970483004e-05, - "loss": 0.9288, - "step": 27105 - }, - { - "epoch": 0.7777602455782996, - "grad_norm": 0.373046875, - "learning_rate": 2.860927748290061e-05, - "loss": 0.9615, - "step": 27110 - }, - { - "epoch": 0.7779036908467574, - "grad_norm": 0.40625, - "learning_rate": 2.8574223160948665e-05, - "loss": 0.9864, - "step": 27115 - }, - { - "epoch": 0.7780471361152153, - "grad_norm": 0.373046875, - "learning_rate": 2.853918674776345e-05, - "loss": 0.968, - "step": 27120 - }, - { - "epoch": 0.778190581383673, - "grad_norm": 0.375, - "learning_rate": 2.850416825212977e-05, - "loss": 0.9722, - "step": 27125 - }, - { - "epoch": 0.7783340266521309, - "grad_norm": 0.37890625, - "learning_rate": 2.84691676828279e-05, - "loss": 0.8803, - "step": 27130 - }, - { - "epoch": 0.7784774719205887, - "grad_norm": 0.390625, - "learning_rate": 2.843418504863361e-05, - "loss": 0.9974, - "step": 27135 - }, - { - "epoch": 0.7786209171890465, - "grad_norm": 0.373046875, - "learning_rate": 2.8399220358318148e-05, - "loss": 0.9291, - "step": 27140 - }, - { - "epoch": 0.7787643624575044, - "grad_norm": 0.40625, - "learning_rate": 2.8364273620648364e-05, - "loss": 1.1747, - "step": 27145 - }, - { - "epoch": 0.7789078077259621, - "grad_norm": 0.404296875, - "learning_rate": 2.832934484438652e-05, - "loss": 0.9448, - "step": 27150 - }, - { - "epoch": 0.77905125299442, - "grad_norm": 0.34375, - "learning_rate": 2.8294434038290386e-05, - "loss": 1.0102, - "step": 27155 - }, - { - "epoch": 0.7791946982628778, - "grad_norm": 0.3984375, - "learning_rate": 2.8259541211113216e-05, - "loss": 1.0123, - "step": 27160 - }, - { - "epoch": 0.7793381435313356, - "grad_norm": 0.341796875, - "learning_rate": 2.8224666371603825e-05, - "loss": 1.0168, - "step": 27165 - }, - { - "epoch": 0.7794815887997935, - "grad_norm": 0.359375, - "learning_rate": 2.8189809528506462e-05, - "loss": 0.8675, - "step": 27170 - }, - { - "epoch": 0.7796250340682512, - "grad_norm": 0.33984375, - "learning_rate": 2.8154970690560824e-05, - "loss": 0.8635, - "step": 27175 - }, - { - "epoch": 0.7797684793367091, - "grad_norm": 0.365234375, - "learning_rate": 2.81201498665022e-05, - "loss": 0.8547, - "step": 27180 - }, - { - "epoch": 0.7799119246051669, - "grad_norm": 0.359375, - "learning_rate": 2.8085347065061284e-05, - "loss": 0.8919, - "step": 27185 - }, - { - "epoch": 0.7800553698736247, - "grad_norm": 0.369140625, - "learning_rate": 2.8050562294964267e-05, - "loss": 0.8963, - "step": 27190 - }, - { - "epoch": 0.7801988151420826, - "grad_norm": 0.41796875, - "learning_rate": 2.8015795564932788e-05, - "loss": 0.9408, - "step": 27195 - }, - { - "epoch": 0.7803422604105403, - "grad_norm": 0.392578125, - "learning_rate": 2.798104688368407e-05, - "loss": 0.8821, - "step": 27200 - }, - { - "epoch": 0.7804857056789982, - "grad_norm": 0.400390625, - "learning_rate": 2.7946316259930706e-05, - "loss": 0.9795, - "step": 27205 - }, - { - "epoch": 0.780629150947456, - "grad_norm": 0.376953125, - "learning_rate": 2.791160370238075e-05, - "loss": 0.9172, - "step": 27210 - }, - { - "epoch": 0.7807725962159138, - "grad_norm": 0.447265625, - "learning_rate": 2.7876909219737835e-05, - "loss": 0.9665, - "step": 27215 - }, - { - "epoch": 0.7809160414843717, - "grad_norm": 0.376953125, - "learning_rate": 2.7842232820700977e-05, - "loss": 0.8539, - "step": 27220 - }, - { - "epoch": 0.7810594867528294, - "grad_norm": 0.400390625, - "learning_rate": 2.7807574513964675e-05, - "loss": 0.9436, - "step": 27225 - }, - { - "epoch": 0.7812029320212873, - "grad_norm": 0.404296875, - "learning_rate": 2.7772934308218846e-05, - "loss": 1.0071, - "step": 27230 - }, - { - "epoch": 0.7813463772897451, - "grad_norm": 0.384765625, - "learning_rate": 2.7738312212148998e-05, - "loss": 0.9441, - "step": 27235 - }, - { - "epoch": 0.7814898225582029, - "grad_norm": 0.400390625, - "learning_rate": 2.7703708234435988e-05, - "loss": 0.9637, - "step": 27240 - }, - { - "epoch": 0.7816332678266608, - "grad_norm": 0.37890625, - "learning_rate": 2.766912238375612e-05, - "loss": 0.9575, - "step": 27245 - }, - { - "epoch": 0.7817767130951185, - "grad_norm": 0.380859375, - "learning_rate": 2.7634554668781242e-05, - "loss": 0.9832, - "step": 27250 - }, - { - "epoch": 0.7819201583635764, - "grad_norm": 0.384765625, - "learning_rate": 2.7600005098178607e-05, - "loss": 1.0326, - "step": 27255 - }, - { - "epoch": 0.7820636036320342, - "grad_norm": 0.40625, - "learning_rate": 2.7565473680610887e-05, - "loss": 0.9295, - "step": 27260 - }, - { - "epoch": 0.782207048900492, - "grad_norm": 0.38671875, - "learning_rate": 2.7530960424736206e-05, - "loss": 0.8732, - "step": 27265 - }, - { - "epoch": 0.7823504941689499, - "grad_norm": 0.404296875, - "learning_rate": 2.7496465339208233e-05, - "loss": 1.001, - "step": 27270 - }, - { - "epoch": 0.7824939394374076, - "grad_norm": 0.37109375, - "learning_rate": 2.746198843267598e-05, - "loss": 0.9307, - "step": 27275 - }, - { - "epoch": 0.7826373847058655, - "grad_norm": 0.412109375, - "learning_rate": 2.7427529713783905e-05, - "loss": 0.9727, - "step": 27280 - }, - { - "epoch": 0.7827808299743233, - "grad_norm": 0.380859375, - "learning_rate": 2.7393089191171962e-05, - "loss": 0.9087, - "step": 27285 - }, - { - "epoch": 0.7829242752427811, - "grad_norm": 0.3828125, - "learning_rate": 2.7358666873475493e-05, - "loss": 0.8826, - "step": 27290 - }, - { - "epoch": 0.783067720511239, - "grad_norm": 0.361328125, - "learning_rate": 2.732426276932526e-05, - "loss": 0.9045, - "step": 27295 - }, - { - "epoch": 0.7832111657796967, - "grad_norm": 0.38671875, - "learning_rate": 2.7289876887347554e-05, - "loss": 0.9809, - "step": 27300 - }, - { - "epoch": 0.7833546110481546, - "grad_norm": 0.376953125, - "learning_rate": 2.7255509236164022e-05, - "loss": 0.9592, - "step": 27305 - }, - { - "epoch": 0.7834980563166124, - "grad_norm": 0.388671875, - "learning_rate": 2.722115982439173e-05, - "loss": 0.9132, - "step": 27310 - }, - { - "epoch": 0.7836415015850702, - "grad_norm": 0.333984375, - "learning_rate": 2.7186828660643203e-05, - "loss": 0.8828, - "step": 27315 - }, - { - "epoch": 0.7837849468535281, - "grad_norm": 0.375, - "learning_rate": 2.7152515753526364e-05, - "loss": 0.9079, - "step": 27320 - }, - { - "epoch": 0.7839283921219858, - "grad_norm": 0.353515625, - "learning_rate": 2.7118221111644605e-05, - "loss": 0.9875, - "step": 27325 - }, - { - "epoch": 0.7840718373904437, - "grad_norm": 0.37890625, - "learning_rate": 2.70839447435967e-05, - "loss": 1.1027, - "step": 27330 - }, - { - "epoch": 0.7842152826589015, - "grad_norm": 0.359375, - "learning_rate": 2.7049686657976824e-05, - "loss": 1.0167, - "step": 27335 - }, - { - "epoch": 0.7843587279273593, - "grad_norm": 0.4296875, - "learning_rate": 2.7015446863374637e-05, - "loss": 0.9694, - "step": 27340 - }, - { - "epoch": 0.7845021731958172, - "grad_norm": 0.384765625, - "learning_rate": 2.698122536837515e-05, - "loss": 0.8662, - "step": 27345 - }, - { - "epoch": 0.7846456184642749, - "grad_norm": 0.39453125, - "learning_rate": 2.6947022181558813e-05, - "loss": 0.8818, - "step": 27350 - }, - { - "epoch": 0.7847890637327328, - "grad_norm": 0.357421875, - "learning_rate": 2.6912837311501427e-05, - "loss": 0.9205, - "step": 27355 - }, - { - "epoch": 0.7849325090011906, - "grad_norm": 0.38671875, - "learning_rate": 2.6878670766774328e-05, - "loss": 1.0568, - "step": 27360 - }, - { - "epoch": 0.7850759542696484, - "grad_norm": 0.37890625, - "learning_rate": 2.6844522555944153e-05, - "loss": 1.0001, - "step": 27365 - }, - { - "epoch": 0.7852193995381063, - "grad_norm": 0.373046875, - "learning_rate": 2.6810392687572928e-05, - "loss": 0.9597, - "step": 27370 - }, - { - "epoch": 0.785362844806564, - "grad_norm": 0.431640625, - "learning_rate": 2.6776281170218187e-05, - "loss": 0.9824, - "step": 27375 - }, - { - "epoch": 0.7855062900750219, - "grad_norm": 0.376953125, - "learning_rate": 2.6742188012432767e-05, - "loss": 0.8989, - "step": 27380 - }, - { - "epoch": 0.7856497353434797, - "grad_norm": 0.37890625, - "learning_rate": 2.670811322276493e-05, - "loss": 0.947, - "step": 27385 - }, - { - "epoch": 0.7857931806119375, - "grad_norm": 0.36328125, - "learning_rate": 2.667405680975831e-05, - "loss": 0.9112, - "step": 27390 - }, - { - "epoch": 0.7859366258803954, - "grad_norm": 0.392578125, - "learning_rate": 2.6640018781952013e-05, - "loss": 1.02, - "step": 27395 - }, - { - "epoch": 0.7860800711488531, - "grad_norm": 0.396484375, - "learning_rate": 2.6605999147880456e-05, - "loss": 1.0485, - "step": 27400 - }, - { - "epoch": 0.786223516417311, - "grad_norm": 0.421875, - "learning_rate": 2.6571997916073465e-05, - "loss": 1.0341, - "step": 27405 - }, - { - "epoch": 0.7863669616857688, - "grad_norm": 0.388671875, - "learning_rate": 2.6538015095056223e-05, - "loss": 0.9326, - "step": 27410 - }, - { - "epoch": 0.7865104069542266, - "grad_norm": 0.37109375, - "learning_rate": 2.6504050693349392e-05, - "loss": 0.8874, - "step": 27415 - }, - { - "epoch": 0.7866538522226845, - "grad_norm": 0.40234375, - "learning_rate": 2.6470104719468925e-05, - "loss": 0.9108, - "step": 27420 - }, - { - "epoch": 0.7867972974911422, - "grad_norm": 0.3828125, - "learning_rate": 2.6436177181926146e-05, - "loss": 1.0634, - "step": 27425 - }, - { - "epoch": 0.7869407427596, - "grad_norm": 0.41015625, - "learning_rate": 2.6402268089227866e-05, - "loss": 0.8922, - "step": 27430 - }, - { - "epoch": 0.7870841880280579, - "grad_norm": 0.41796875, - "learning_rate": 2.6368377449876146e-05, - "loss": 1.048, - "step": 27435 - }, - { - "epoch": 0.7872276332965157, - "grad_norm": 0.37109375, - "learning_rate": 2.6334505272368493e-05, - "loss": 0.9036, - "step": 27440 - }, - { - "epoch": 0.7873710785649736, - "grad_norm": 0.419921875, - "learning_rate": 2.6300651565197733e-05, - "loss": 1.0209, - "step": 27445 - }, - { - "epoch": 0.7875145238334313, - "grad_norm": 0.380859375, - "learning_rate": 2.626681633685213e-05, - "loss": 0.932, - "step": 27450 - }, - { - "epoch": 0.7876579691018891, - "grad_norm": 0.38671875, - "learning_rate": 2.6232999595815266e-05, - "loss": 0.8596, - "step": 27455 - }, - { - "epoch": 0.787801414370347, - "grad_norm": 0.4140625, - "learning_rate": 2.6199201350566104e-05, - "loss": 0.9712, - "step": 27460 - }, - { - "epoch": 0.7879448596388048, - "grad_norm": 0.392578125, - "learning_rate": 2.6165421609578945e-05, - "loss": 0.9123, - "step": 27465 - }, - { - "epoch": 0.7880883049072627, - "grad_norm": 0.390625, - "learning_rate": 2.613166038132345e-05, - "loss": 0.9652, - "step": 27470 - }, - { - "epoch": 0.7882317501757204, - "grad_norm": 0.384765625, - "learning_rate": 2.6097917674264727e-05, - "loss": 1.0747, - "step": 27475 - }, - { - "epoch": 0.7883751954441782, - "grad_norm": 0.35546875, - "learning_rate": 2.606419349686312e-05, - "loss": 0.931, - "step": 27480 - }, - { - "epoch": 0.7885186407126361, - "grad_norm": 0.361328125, - "learning_rate": 2.6030487857574393e-05, - "loss": 0.8685, - "step": 27485 - }, - { - "epoch": 0.7886620859810939, - "grad_norm": 0.35546875, - "learning_rate": 2.5996800764849638e-05, - "loss": 0.9223, - "step": 27490 - }, - { - "epoch": 0.7888055312495518, - "grad_norm": 0.42578125, - "learning_rate": 2.596313222713529e-05, - "loss": 1.0125, - "step": 27495 - }, - { - "epoch": 0.7889489765180095, - "grad_norm": 0.3828125, - "learning_rate": 2.5929482252873183e-05, - "loss": 0.9752, - "step": 27500 - }, - { - "epoch": 0.7890924217864673, - "grad_norm": 0.369140625, - "learning_rate": 2.5895850850500457e-05, - "loss": 0.9028, - "step": 27505 - }, - { - "epoch": 0.7892358670549252, - "grad_norm": 0.388671875, - "learning_rate": 2.5862238028449582e-05, - "loss": 0.9619, - "step": 27510 - }, - { - "epoch": 0.789379312323383, - "grad_norm": 0.3515625, - "learning_rate": 2.5828643795148354e-05, - "loss": 0.9984, - "step": 27515 - }, - { - "epoch": 0.7895227575918409, - "grad_norm": 0.35546875, - "learning_rate": 2.579506815902002e-05, - "loss": 0.9581, - "step": 27520 - }, - { - "epoch": 0.7896662028602986, - "grad_norm": 0.380859375, - "learning_rate": 2.576151112848303e-05, - "loss": 0.9406, - "step": 27525 - }, - { - "epoch": 0.7898096481287564, - "grad_norm": 0.4296875, - "learning_rate": 2.5727972711951208e-05, - "loss": 0.9077, - "step": 27530 - }, - { - "epoch": 0.7899530933972143, - "grad_norm": 0.36328125, - "learning_rate": 2.569445291783379e-05, - "loss": 0.9287, - "step": 27535 - }, - { - "epoch": 0.7900965386656721, - "grad_norm": 0.404296875, - "learning_rate": 2.5660951754535245e-05, - "loss": 0.9139, - "step": 27540 - }, - { - "epoch": 0.79023998393413, - "grad_norm": 0.38671875, - "learning_rate": 2.56274692304554e-05, - "loss": 0.9579, - "step": 27545 - }, - { - "epoch": 0.7903834292025878, - "grad_norm": 0.400390625, - "learning_rate": 2.559400535398938e-05, - "loss": 0.9279, - "step": 27550 - }, - { - "epoch": 0.7905268744710455, - "grad_norm": 0.3671875, - "learning_rate": 2.5560560133527745e-05, - "loss": 1.0513, - "step": 27555 - }, - { - "epoch": 0.7906703197395034, - "grad_norm": 0.36328125, - "learning_rate": 2.5527133577456254e-05, - "loss": 1.0539, - "step": 27560 - }, - { - "epoch": 0.7908137650079612, - "grad_norm": 0.353515625, - "learning_rate": 2.5493725694156044e-05, - "loss": 0.9045, - "step": 27565 - }, - { - "epoch": 0.7909572102764191, - "grad_norm": 0.3359375, - "learning_rate": 2.5460336492003522e-05, - "loss": 0.9482, - "step": 27570 - }, - { - "epoch": 0.7911006555448769, - "grad_norm": 0.388671875, - "learning_rate": 2.542696597937051e-05, - "loss": 0.949, - "step": 27575 - }, - { - "epoch": 0.7912441008133346, - "grad_norm": 0.357421875, - "learning_rate": 2.5393614164624047e-05, - "loss": 0.8987, - "step": 27580 - }, - { - "epoch": 0.7913875460817925, - "grad_norm": 0.4140625, - "learning_rate": 2.536028105612649e-05, - "loss": 0.9965, - "step": 27585 - }, - { - "epoch": 0.7915309913502503, - "grad_norm": 0.3828125, - "learning_rate": 2.5326966662235597e-05, - "loss": 0.9948, - "step": 27590 - }, - { - "epoch": 0.7916744366187082, - "grad_norm": 0.4140625, - "learning_rate": 2.529367099130434e-05, - "loss": 1.0606, - "step": 27595 - }, - { - "epoch": 0.791817881887166, - "grad_norm": 0.3671875, - "learning_rate": 2.5260394051681024e-05, - "loss": 1.0025, - "step": 27600 - }, - { - "epoch": 0.7919613271556237, - "grad_norm": 0.40625, - "learning_rate": 2.5227135851709237e-05, - "loss": 1.0141, - "step": 27605 - }, - { - "epoch": 0.7921047724240816, - "grad_norm": 0.3828125, - "learning_rate": 2.5193896399727945e-05, - "loss": 0.9545, - "step": 27610 - }, - { - "epoch": 0.7922482176925394, - "grad_norm": 0.41796875, - "learning_rate": 2.5160675704071336e-05, - "loss": 1.0276, - "step": 27615 - }, - { - "epoch": 0.7923916629609973, - "grad_norm": 0.38671875, - "learning_rate": 2.5127473773068888e-05, - "loss": 0.9778, - "step": 27620 - }, - { - "epoch": 0.792535108229455, - "grad_norm": 0.4375, - "learning_rate": 2.5094290615045467e-05, - "loss": 1.0266, - "step": 27625 - }, - { - "epoch": 0.7926785534979128, - "grad_norm": 0.369140625, - "learning_rate": 2.506112623832113e-05, - "loss": 0.9356, - "step": 27630 - }, - { - "epoch": 0.7928219987663707, - "grad_norm": 0.400390625, - "learning_rate": 2.502798065121128e-05, - "loss": 0.9413, - "step": 27635 - }, - { - "epoch": 0.7929654440348285, - "grad_norm": 0.3515625, - "learning_rate": 2.499485386202659e-05, - "loss": 0.9463, - "step": 27640 - }, - { - "epoch": 0.7931088893032864, - "grad_norm": 0.37109375, - "learning_rate": 2.4961745879072997e-05, - "loss": 0.9834, - "step": 27645 - }, - { - "epoch": 0.7932523345717442, - "grad_norm": 0.35546875, - "learning_rate": 2.49286567106518e-05, - "loss": 0.9689, - "step": 27650 - }, - { - "epoch": 0.7933957798402019, - "grad_norm": 0.345703125, - "learning_rate": 2.489558636505951e-05, - "loss": 0.8898, - "step": 27655 - }, - { - "epoch": 0.7935392251086598, - "grad_norm": 0.396484375, - "learning_rate": 2.4862534850587925e-05, - "loss": 0.9415, - "step": 27660 - }, - { - "epoch": 0.7936826703771176, - "grad_norm": 0.353515625, - "learning_rate": 2.4829502175524155e-05, - "loss": 0.8445, - "step": 27665 - }, - { - "epoch": 0.7938261156455755, - "grad_norm": 0.390625, - "learning_rate": 2.4796488348150548e-05, - "loss": 0.9532, - "step": 27670 - }, - { - "epoch": 0.7939695609140333, - "grad_norm": 0.40234375, - "learning_rate": 2.476349337674473e-05, - "loss": 0.9284, - "step": 27675 - }, - { - "epoch": 0.794113006182491, - "grad_norm": 0.388671875, - "learning_rate": 2.4730517269579667e-05, - "loss": 0.9046, - "step": 27680 - }, - { - "epoch": 0.7942564514509489, - "grad_norm": 0.3671875, - "learning_rate": 2.4697560034923517e-05, - "loss": 0.9558, - "step": 27685 - }, - { - "epoch": 0.7943998967194067, - "grad_norm": 0.43359375, - "learning_rate": 2.4664621681039723e-05, - "loss": 0.9865, - "step": 27690 - }, - { - "epoch": 0.7945433419878646, - "grad_norm": 0.39453125, - "learning_rate": 2.463170221618698e-05, - "loss": 1.04, - "step": 27695 - }, - { - "epoch": 0.7946867872563224, - "grad_norm": 0.36328125, - "learning_rate": 2.459880164861932e-05, - "loss": 0.9797, - "step": 27700 - }, - { - "epoch": 0.7948302325247801, - "grad_norm": 0.408203125, - "learning_rate": 2.4565919986585962e-05, - "loss": 0.9177, - "step": 27705 - }, - { - "epoch": 0.794973677793238, - "grad_norm": 0.40625, - "learning_rate": 2.453305723833139e-05, - "loss": 1.002, - "step": 27710 - }, - { - "epoch": 0.7951171230616958, - "grad_norm": 0.388671875, - "learning_rate": 2.450021341209541e-05, - "loss": 0.9579, - "step": 27715 - }, - { - "epoch": 0.7952605683301537, - "grad_norm": 0.390625, - "learning_rate": 2.4467388516113e-05, - "loss": 0.8802, - "step": 27720 - }, - { - "epoch": 0.7954040135986115, - "grad_norm": 0.392578125, - "learning_rate": 2.443458255861445e-05, - "loss": 0.9336, - "step": 27725 - }, - { - "epoch": 0.7955474588670692, - "grad_norm": 0.3671875, - "learning_rate": 2.4401795547825234e-05, - "loss": 0.9023, - "step": 27730 - }, - { - "epoch": 0.7956909041355271, - "grad_norm": 0.42578125, - "learning_rate": 2.43690274919662e-05, - "loss": 0.9043, - "step": 27735 - }, - { - "epoch": 0.7958343494039849, - "grad_norm": 0.376953125, - "learning_rate": 2.433627839925332e-05, - "loss": 0.9277, - "step": 27740 - }, - { - "epoch": 0.7959777946724428, - "grad_norm": 0.384765625, - "learning_rate": 2.4303548277897835e-05, - "loss": 0.9098, - "step": 27745 - }, - { - "epoch": 0.7961212399409006, - "grad_norm": 0.435546875, - "learning_rate": 2.427083713610632e-05, - "loss": 0.9113, - "step": 27750 - }, - { - "epoch": 0.7962646852093583, - "grad_norm": 0.42578125, - "learning_rate": 2.423814498208048e-05, - "loss": 1.0142, - "step": 27755 - }, - { - "epoch": 0.7964081304778162, - "grad_norm": 0.4140625, - "learning_rate": 2.42054718240173e-05, - "loss": 0.9752, - "step": 27760 - }, - { - "epoch": 0.796551575746274, - "grad_norm": 0.39453125, - "learning_rate": 2.417281767010898e-05, - "loss": 0.927, - "step": 27765 - }, - { - "epoch": 0.7966950210147318, - "grad_norm": 0.365234375, - "learning_rate": 2.4140182528543044e-05, - "loss": 0.9349, - "step": 27770 - }, - { - "epoch": 0.7968384662831897, - "grad_norm": 0.353515625, - "learning_rate": 2.4107566407502136e-05, - "loss": 0.9783, - "step": 27775 - }, - { - "epoch": 0.7969819115516474, - "grad_norm": 0.390625, - "learning_rate": 2.4074969315164176e-05, - "loss": 0.924, - "step": 27780 - }, - { - "epoch": 0.7971253568201053, - "grad_norm": 0.40234375, - "learning_rate": 2.4042391259702356e-05, - "loss": 0.9722, - "step": 27785 - }, - { - "epoch": 0.7972688020885631, - "grad_norm": 0.390625, - "learning_rate": 2.4009832249285035e-05, - "loss": 0.9679, - "step": 27790 - }, - { - "epoch": 0.7974122473570209, - "grad_norm": 0.427734375, - "learning_rate": 2.3977292292075805e-05, - "loss": 1.0835, - "step": 27795 - }, - { - "epoch": 0.7975556926254788, - "grad_norm": 0.37890625, - "learning_rate": 2.3944771396233467e-05, - "loss": 0.948, - "step": 27800 - }, - { - "epoch": 0.7976991378939365, - "grad_norm": 0.38671875, - "learning_rate": 2.391226956991214e-05, - "loss": 0.9687, - "step": 27805 - }, - { - "epoch": 0.7978425831623944, - "grad_norm": 0.345703125, - "learning_rate": 2.387978682126104e-05, - "loss": 0.9561, - "step": 27810 - }, - { - "epoch": 0.7979860284308522, - "grad_norm": 0.392578125, - "learning_rate": 2.384732315842464e-05, - "loss": 0.9363, - "step": 27815 - }, - { - "epoch": 0.79812947369931, - "grad_norm": 0.38671875, - "learning_rate": 2.3814878589542678e-05, - "loss": 0.9371, - "step": 27820 - }, - { - "epoch": 0.7982729189677679, - "grad_norm": 0.359375, - "learning_rate": 2.378245312275005e-05, - "loss": 0.9573, - "step": 27825 - }, - { - "epoch": 0.7984163642362256, - "grad_norm": 0.373046875, - "learning_rate": 2.3750046766176846e-05, - "loss": 0.9558, - "step": 27830 - }, - { - "epoch": 0.7985598095046835, - "grad_norm": 0.41015625, - "learning_rate": 2.3717659527948432e-05, - "loss": 0.9843, - "step": 27835 - }, - { - "epoch": 0.7987032547731413, - "grad_norm": 0.341796875, - "learning_rate": 2.368529141618533e-05, - "loss": 0.9738, - "step": 27840 - }, - { - "epoch": 0.7988467000415991, - "grad_norm": 0.38671875, - "learning_rate": 2.3652942439003255e-05, - "loss": 0.9214, - "step": 27845 - }, - { - "epoch": 0.798990145310057, - "grad_norm": 0.38671875, - "learning_rate": 2.362061260451316e-05, - "loss": 0.9598, - "step": 27850 - }, - { - "epoch": 0.7991335905785147, - "grad_norm": 0.3828125, - "learning_rate": 2.358830192082121e-05, - "loss": 0.9905, - "step": 27855 - }, - { - "epoch": 0.7992770358469726, - "grad_norm": 0.419921875, - "learning_rate": 2.3556010396028737e-05, - "loss": 1.0247, - "step": 27860 - }, - { - "epoch": 0.7994204811154304, - "grad_norm": 0.37890625, - "learning_rate": 2.352373803823227e-05, - "loss": 0.9719, - "step": 27865 - }, - { - "epoch": 0.7995639263838882, - "grad_norm": 0.4140625, - "learning_rate": 2.3491484855523504e-05, - "loss": 1.0126, - "step": 27870 - }, - { - "epoch": 0.7997073716523461, - "grad_norm": 0.46484375, - "learning_rate": 2.3459250855989424e-05, - "loss": 0.9244, - "step": 27875 - }, - { - "epoch": 0.7998508169208038, - "grad_norm": 0.38671875, - "learning_rate": 2.3427036047712125e-05, - "loss": 1.026, - "step": 27880 - }, - { - "epoch": 0.7999942621892617, - "grad_norm": 0.40234375, - "learning_rate": 2.3394840438768895e-05, - "loss": 0.9416, - "step": 27885 - }, - { - "epoch": 0.8001377074577195, - "grad_norm": 0.369140625, - "learning_rate": 2.3362664037232184e-05, - "loss": 1.0194, - "step": 27890 - }, - { - "epoch": 0.8002811527261773, - "grad_norm": 0.37890625, - "learning_rate": 2.3330506851169744e-05, - "loss": 0.9066, - "step": 27895 - }, - { - "epoch": 0.8004245979946352, - "grad_norm": 0.431640625, - "learning_rate": 2.3298368888644386e-05, - "loss": 0.9894, - "step": 27900 - }, - { - "epoch": 0.8005680432630929, - "grad_norm": 0.388671875, - "learning_rate": 2.326625015771412e-05, - "loss": 0.9573, - "step": 27905 - }, - { - "epoch": 0.8007114885315508, - "grad_norm": 0.384765625, - "learning_rate": 2.323415066643221e-05, - "loss": 0.8807, - "step": 27910 - }, - { - "epoch": 0.8008549338000086, - "grad_norm": 0.3984375, - "learning_rate": 2.3202070422847e-05, - "loss": 0.9712, - "step": 27915 - }, - { - "epoch": 0.8009983790684664, - "grad_norm": 0.34765625, - "learning_rate": 2.3170009435002083e-05, - "loss": 1.0046, - "step": 27920 - }, - { - "epoch": 0.8011418243369243, - "grad_norm": 0.431640625, - "learning_rate": 2.3137967710936136e-05, - "loss": 0.9568, - "step": 27925 - }, - { - "epoch": 0.801285269605382, - "grad_norm": 0.3828125, - "learning_rate": 2.3105945258683125e-05, - "loss": 0.9151, - "step": 27930 - }, - { - "epoch": 0.8014287148738399, - "grad_norm": 0.37890625, - "learning_rate": 2.3073942086272106e-05, - "loss": 0.8703, - "step": 27935 - }, - { - "epoch": 0.8015721601422977, - "grad_norm": 0.375, - "learning_rate": 2.3041958201727275e-05, - "loss": 0.9618, - "step": 27940 - }, - { - "epoch": 0.8017156054107555, - "grad_norm": 0.37890625, - "learning_rate": 2.3009993613068104e-05, - "loss": 0.8871, - "step": 27945 - }, - { - "epoch": 0.8018590506792134, - "grad_norm": 0.376953125, - "learning_rate": 2.29780483283091e-05, - "loss": 0.8401, - "step": 27950 - }, - { - "epoch": 0.8020024959476711, - "grad_norm": 0.392578125, - "learning_rate": 2.294612235546001e-05, - "loss": 0.871, - "step": 27955 - }, - { - "epoch": 0.802145941216129, - "grad_norm": 0.404296875, - "learning_rate": 2.2914215702525677e-05, - "loss": 1.0315, - "step": 27960 - }, - { - "epoch": 0.8022893864845868, - "grad_norm": 0.357421875, - "learning_rate": 2.288232837750619e-05, - "loss": 1.016, - "step": 27965 - }, - { - "epoch": 0.8024328317530446, - "grad_norm": 0.47265625, - "learning_rate": 2.285046038839672e-05, - "loss": 1.0326, - "step": 27970 - }, - { - "epoch": 0.8025762770215025, - "grad_norm": 0.41015625, - "learning_rate": 2.2818611743187567e-05, - "loss": 1.0247, - "step": 27975 - }, - { - "epoch": 0.8027197222899602, - "grad_norm": 0.37109375, - "learning_rate": 2.2786782449864298e-05, - "loss": 0.9738, - "step": 27980 - }, - { - "epoch": 0.8028631675584181, - "grad_norm": 0.41015625, - "learning_rate": 2.2754972516407512e-05, - "loss": 0.9935, - "step": 27985 - }, - { - "epoch": 0.8030066128268759, - "grad_norm": 0.423828125, - "learning_rate": 2.2723181950793004e-05, - "loss": 0.9618, - "step": 27990 - }, - { - "epoch": 0.8031500580953337, - "grad_norm": 0.359375, - "learning_rate": 2.269141076099166e-05, - "loss": 0.8818, - "step": 27995 - }, - { - "epoch": 0.8032935033637916, - "grad_norm": 0.384765625, - "learning_rate": 2.265965895496962e-05, - "loss": 0.9879, - "step": 28000 - }, - { - "epoch": 0.8034369486322493, - "grad_norm": 0.37890625, - "learning_rate": 2.262792654068806e-05, - "loss": 0.8831, - "step": 28005 - }, - { - "epoch": 0.8035803939007072, - "grad_norm": 0.349609375, - "learning_rate": 2.259621352610333e-05, - "loss": 1.0114, - "step": 28010 - }, - { - "epoch": 0.803723839169165, - "grad_norm": 0.3828125, - "learning_rate": 2.2564519919166926e-05, - "loss": 0.9094, - "step": 28015 - }, - { - "epoch": 0.8038672844376228, - "grad_norm": 0.400390625, - "learning_rate": 2.2532845727825445e-05, - "loss": 0.8862, - "step": 28020 - }, - { - "epoch": 0.8040107297060807, - "grad_norm": 0.365234375, - "learning_rate": 2.2501190960020634e-05, - "loss": 0.9993, - "step": 28025 - }, - { - "epoch": 0.8041541749745384, - "grad_norm": 0.36328125, - "learning_rate": 2.246955562368941e-05, - "loss": 0.9246, - "step": 28030 - }, - { - "epoch": 0.8042976202429963, - "grad_norm": 0.4140625, - "learning_rate": 2.2437939726763766e-05, - "loss": 0.9176, - "step": 28035 - }, - { - "epoch": 0.8044410655114541, - "grad_norm": 0.388671875, - "learning_rate": 2.2406343277170827e-05, - "loss": 1.0445, - "step": 28040 - }, - { - "epoch": 0.8045845107799119, - "grad_norm": 0.369140625, - "learning_rate": 2.237476628283286e-05, - "loss": 1.0171, - "step": 28045 - }, - { - "epoch": 0.8047279560483698, - "grad_norm": 0.3515625, - "learning_rate": 2.234320875166721e-05, - "loss": 0.9144, - "step": 28050 - }, - { - "epoch": 0.8048714013168276, - "grad_norm": 0.44140625, - "learning_rate": 2.231167069158644e-05, - "loss": 0.977, - "step": 28055 - }, - { - "epoch": 0.8050148465852854, - "grad_norm": 0.373046875, - "learning_rate": 2.228015211049813e-05, - "loss": 1.0027, - "step": 28060 - }, - { - "epoch": 0.8051582918537432, - "grad_norm": 0.392578125, - "learning_rate": 2.2248653016305e-05, - "loss": 0.9146, - "step": 28065 - }, - { - "epoch": 0.805301737122201, - "grad_norm": 0.375, - "learning_rate": 2.2217173416904935e-05, - "loss": 0.913, - "step": 28070 - }, - { - "epoch": 0.8054451823906589, - "grad_norm": 0.40625, - "learning_rate": 2.218571332019088e-05, - "loss": 0.9279, - "step": 28075 - }, - { - "epoch": 0.8055886276591167, - "grad_norm": 0.384765625, - "learning_rate": 2.2154272734050908e-05, - "loss": 0.9338, - "step": 28080 - }, - { - "epoch": 0.8057320729275745, - "grad_norm": 0.333984375, - "learning_rate": 2.2122851666368162e-05, - "loss": 0.9905, - "step": 28085 - }, - { - "epoch": 0.8058755181960323, - "grad_norm": 0.466796875, - "learning_rate": 2.2091450125020986e-05, - "loss": 1.0021, - "step": 28090 - }, - { - "epoch": 0.8060189634644901, - "grad_norm": 0.35546875, - "learning_rate": 2.206006811788274e-05, - "loss": 0.9584, - "step": 28095 - }, - { - "epoch": 0.806162408732948, - "grad_norm": 0.4453125, - "learning_rate": 2.202870565282188e-05, - "loss": 0.9843, - "step": 28100 - }, - { - "epoch": 0.8063058540014058, - "grad_norm": 0.369140625, - "learning_rate": 2.1997362737702064e-05, - "loss": 1.0454, - "step": 28105 - }, - { - "epoch": 0.8064492992698635, - "grad_norm": 0.4375, - "learning_rate": 2.1966039380381944e-05, - "loss": 0.9002, - "step": 28110 - }, - { - "epoch": 0.8065927445383214, - "grad_norm": 0.375, - "learning_rate": 2.1934735588715306e-05, - "loss": 0.9084, - "step": 28115 - }, - { - "epoch": 0.8067361898067792, - "grad_norm": 0.4296875, - "learning_rate": 2.1903451370551e-05, - "loss": 1.0368, - "step": 28120 - }, - { - "epoch": 0.8068796350752371, - "grad_norm": 0.373046875, - "learning_rate": 2.1872186733733057e-05, - "loss": 0.9817, - "step": 28125 - }, - { - "epoch": 0.8070230803436949, - "grad_norm": 0.36328125, - "learning_rate": 2.1840941686100524e-05, - "loss": 0.8646, - "step": 28130 - }, - { - "epoch": 0.8071665256121526, - "grad_norm": 0.38671875, - "learning_rate": 2.1809716235487522e-05, - "loss": 0.91, - "step": 28135 - }, - { - "epoch": 0.8073099708806105, - "grad_norm": 0.33984375, - "learning_rate": 2.1778510389723283e-05, - "loss": 0.9496, - "step": 28140 - }, - { - "epoch": 0.8074534161490683, - "grad_norm": 0.46484375, - "learning_rate": 2.1747324156632177e-05, - "loss": 0.871, - "step": 28145 - }, - { - "epoch": 0.8075968614175262, - "grad_norm": 0.408203125, - "learning_rate": 2.1716157544033578e-05, - "loss": 0.9376, - "step": 28150 - }, - { - "epoch": 0.807740306685984, - "grad_norm": 0.388671875, - "learning_rate": 2.1685010559741947e-05, - "loss": 0.9384, - "step": 28155 - }, - { - "epoch": 0.8078837519544417, - "grad_norm": 0.388671875, - "learning_rate": 2.1653883211566895e-05, - "loss": 0.9763, - "step": 28160 - }, - { - "epoch": 0.8080271972228996, - "grad_norm": 0.40234375, - "learning_rate": 2.162277550731304e-05, - "loss": 1.0464, - "step": 28165 - }, - { - "epoch": 0.8081706424913574, - "grad_norm": 0.34765625, - "learning_rate": 2.1591687454780087e-05, - "loss": 0.9402, - "step": 28170 - }, - { - "epoch": 0.8083140877598153, - "grad_norm": 0.33203125, - "learning_rate": 2.156061906176281e-05, - "loss": 0.9633, - "step": 28175 - }, - { - "epoch": 0.8084575330282731, - "grad_norm": 0.37890625, - "learning_rate": 2.152957033605111e-05, - "loss": 0.904, - "step": 28180 - }, - { - "epoch": 0.8086009782967308, - "grad_norm": 0.39453125, - "learning_rate": 2.1498541285429886e-05, - "loss": 0.8829, - "step": 28185 - }, - { - "epoch": 0.8087444235651887, - "grad_norm": 0.53515625, - "learning_rate": 2.146753191767913e-05, - "loss": 0.9028, - "step": 28190 - }, - { - "epoch": 0.8088878688336465, - "grad_norm": 0.376953125, - "learning_rate": 2.1436542240573897e-05, - "loss": 0.8496, - "step": 28195 - }, - { - "epoch": 0.8090313141021044, - "grad_norm": 0.3984375, - "learning_rate": 2.14055722618843e-05, - "loss": 0.8935, - "step": 28200 - }, - { - "epoch": 0.8091747593705622, - "grad_norm": 0.412109375, - "learning_rate": 2.1374621989375554e-05, - "loss": 1.031, - "step": 28205 - }, - { - "epoch": 0.8093182046390199, - "grad_norm": 0.380859375, - "learning_rate": 2.1343691430807878e-05, - "loss": 0.8921, - "step": 28210 - }, - { - "epoch": 0.8094616499074778, - "grad_norm": 0.40234375, - "learning_rate": 2.1312780593936564e-05, - "loss": 1.1662, - "step": 28215 - }, - { - "epoch": 0.8096050951759356, - "grad_norm": 0.423828125, - "learning_rate": 2.1281889486511984e-05, - "loss": 1.0072, - "step": 28220 - }, - { - "epoch": 0.8097485404443935, - "grad_norm": 0.388671875, - "learning_rate": 2.1251018116279497e-05, - "loss": 0.9748, - "step": 28225 - }, - { - "epoch": 0.8098919857128513, - "grad_norm": 0.357421875, - "learning_rate": 2.122016649097962e-05, - "loss": 0.9145, - "step": 28230 - }, - { - "epoch": 0.810035430981309, - "grad_norm": 0.3515625, - "learning_rate": 2.1189334618347845e-05, - "loss": 0.8978, - "step": 28235 - }, - { - "epoch": 0.8101788762497669, - "grad_norm": 0.390625, - "learning_rate": 2.1158522506114696e-05, - "loss": 0.8506, - "step": 28240 - }, - { - "epoch": 0.8103223215182247, - "grad_norm": 0.3671875, - "learning_rate": 2.1127730162005778e-05, - "loss": 1.0498, - "step": 28245 - }, - { - "epoch": 0.8104657667866826, - "grad_norm": 0.63671875, - "learning_rate": 2.1096957593741772e-05, - "loss": 0.9896, - "step": 28250 - }, - { - "epoch": 0.8106092120551404, - "grad_norm": 0.380859375, - "learning_rate": 2.106620480903834e-05, - "loss": 0.867, - "step": 28255 - }, - { - "epoch": 0.8107526573235981, - "grad_norm": 0.388671875, - "learning_rate": 2.1035471815606178e-05, - "loss": 0.9776, - "step": 28260 - }, - { - "epoch": 0.810896102592056, - "grad_norm": 0.388671875, - "learning_rate": 2.1004758621151088e-05, - "loss": 0.9375, - "step": 28265 - }, - { - "epoch": 0.8110395478605138, - "grad_norm": 0.380859375, - "learning_rate": 2.0974065233373864e-05, - "loss": 1.0025, - "step": 28270 - }, - { - "epoch": 0.8111829931289717, - "grad_norm": 0.365234375, - "learning_rate": 2.0943391659970324e-05, - "loss": 0.8984, - "step": 28275 - }, - { - "epoch": 0.8113264383974295, - "grad_norm": 0.400390625, - "learning_rate": 2.0912737908631296e-05, - "loss": 1.0111, - "step": 28280 - }, - { - "epoch": 0.8114698836658872, - "grad_norm": 0.37109375, - "learning_rate": 2.0882103987042746e-05, - "loss": 0.8691, - "step": 28285 - }, - { - "epoch": 0.8116133289343451, - "grad_norm": 0.443359375, - "learning_rate": 2.0851489902885556e-05, - "loss": 0.9697, - "step": 28290 - }, - { - "epoch": 0.8117567742028029, - "grad_norm": 0.373046875, - "learning_rate": 2.082089566383567e-05, - "loss": 1.009, - "step": 28295 - }, - { - "epoch": 0.8119002194712608, - "grad_norm": 0.365234375, - "learning_rate": 2.079032127756405e-05, - "loss": 0.9341, - "step": 28300 - }, - { - "epoch": 0.8120436647397186, - "grad_norm": 0.369140625, - "learning_rate": 2.075976675173673e-05, - "loss": 0.9482, - "step": 28305 - }, - { - "epoch": 0.8121871100081763, - "grad_norm": 0.3671875, - "learning_rate": 2.0729232094014693e-05, - "loss": 0.9737, - "step": 28310 - }, - { - "epoch": 0.8123305552766342, - "grad_norm": 0.33984375, - "learning_rate": 2.069871731205395e-05, - "loss": 0.9198, - "step": 28315 - }, - { - "epoch": 0.812474000545092, - "grad_norm": 0.369140625, - "learning_rate": 2.066822241350561e-05, - "loss": 1.006, - "step": 28320 - }, - { - "epoch": 0.8126174458135499, - "grad_norm": 0.388671875, - "learning_rate": 2.0637747406015705e-05, - "loss": 0.9706, - "step": 28325 - }, - { - "epoch": 0.8127608910820077, - "grad_norm": 0.388671875, - "learning_rate": 2.06072922972253e-05, - "loss": 0.9666, - "step": 28330 - }, - { - "epoch": 0.8129043363504654, - "grad_norm": 0.376953125, - "learning_rate": 2.0576857094770473e-05, - "loss": 0.9912, - "step": 28335 - }, - { - "epoch": 0.8130477816189233, - "grad_norm": 0.41015625, - "learning_rate": 2.0546441806282367e-05, - "loss": 1.0242, - "step": 28340 - }, - { - "epoch": 0.8131912268873811, - "grad_norm": 0.443359375, - "learning_rate": 2.051604643938705e-05, - "loss": 1.03, - "step": 28345 - }, - { - "epoch": 0.813334672155839, - "grad_norm": 0.376953125, - "learning_rate": 2.048567100170562e-05, - "loss": 0.9647, - "step": 28350 - }, - { - "epoch": 0.8134781174242968, - "grad_norm": 0.37890625, - "learning_rate": 2.0455315500854222e-05, - "loss": 0.9321, - "step": 28355 - }, - { - "epoch": 0.8136215626927545, - "grad_norm": 0.373046875, - "learning_rate": 2.0424979944443946e-05, - "loss": 1.0355, - "step": 28360 - }, - { - "epoch": 0.8137650079612124, - "grad_norm": 0.40234375, - "learning_rate": 2.0394664340080914e-05, - "loss": 0.9733, - "step": 28365 - }, - { - "epoch": 0.8139084532296702, - "grad_norm": 0.40625, - "learning_rate": 2.0364368695366233e-05, - "loss": 1.0577, - "step": 28370 - }, - { - "epoch": 0.8140518984981281, - "grad_norm": 0.34375, - "learning_rate": 2.033409301789596e-05, - "loss": 0.9469, - "step": 28375 - }, - { - "epoch": 0.8141953437665859, - "grad_norm": 0.349609375, - "learning_rate": 2.0303837315261277e-05, - "loss": 0.9578, - "step": 28380 - }, - { - "epoch": 0.8143387890350436, - "grad_norm": 0.359375, - "learning_rate": 2.027360159504822e-05, - "loss": 0.934, - "step": 28385 - }, - { - "epoch": 0.8144822343035015, - "grad_norm": 0.490234375, - "learning_rate": 2.0243385864837884e-05, - "loss": 1.0197, - "step": 28390 - }, - { - "epoch": 0.8146256795719593, - "grad_norm": 0.392578125, - "learning_rate": 2.0213190132206326e-05, - "loss": 1.0003, - "step": 28395 - }, - { - "epoch": 0.8147691248404172, - "grad_norm": 0.392578125, - "learning_rate": 2.018301440472461e-05, - "loss": 1.0575, - "step": 28400 - }, - { - "epoch": 0.814912570108875, - "grad_norm": 0.37109375, - "learning_rate": 2.015285868995873e-05, - "loss": 0.9486, - "step": 28405 - }, - { - "epoch": 0.8150560153773327, - "grad_norm": 0.421875, - "learning_rate": 2.012272299546978e-05, - "loss": 0.8685, - "step": 28410 - }, - { - "epoch": 0.8151994606457906, - "grad_norm": 0.36328125, - "learning_rate": 2.0092607328813705e-05, - "loss": 0.8518, - "step": 28415 - }, - { - "epoch": 0.8153429059142484, - "grad_norm": 0.373046875, - "learning_rate": 2.0062511697541485e-05, - "loss": 0.9431, - "step": 28420 - }, - { - "epoch": 0.8154863511827063, - "grad_norm": 0.35546875, - "learning_rate": 2.0032436109199115e-05, - "loss": 0.9297, - "step": 28425 - }, - { - "epoch": 0.8156297964511641, - "grad_norm": 0.3671875, - "learning_rate": 2.0002380571327482e-05, - "loss": 0.9075, - "step": 28430 - }, - { - "epoch": 0.8157732417196218, - "grad_norm": 0.34765625, - "learning_rate": 1.9972345091462518e-05, - "loss": 0.8203, - "step": 28435 - }, - { - "epoch": 0.8159166869880797, - "grad_norm": 0.38671875, - "learning_rate": 1.994232967713505e-05, - "loss": 0.9566, - "step": 28440 - }, - { - "epoch": 0.8160601322565375, - "grad_norm": 0.349609375, - "learning_rate": 1.9912334335870975e-05, - "loss": 0.914, - "step": 28445 - }, - { - "epoch": 0.8162035775249953, - "grad_norm": 0.408203125, - "learning_rate": 1.988235907519107e-05, - "loss": 1.0598, - "step": 28450 - }, - { - "epoch": 0.8163470227934532, - "grad_norm": 0.392578125, - "learning_rate": 1.9852403902611117e-05, - "loss": 0.9081, - "step": 28455 - }, - { - "epoch": 0.816490468061911, - "grad_norm": 0.390625, - "learning_rate": 1.9822468825641816e-05, - "loss": 0.9243, - "step": 28460 - }, - { - "epoch": 0.8166339133303688, - "grad_norm": 0.38671875, - "learning_rate": 1.979255385178893e-05, - "loss": 0.9571, - "step": 28465 - }, - { - "epoch": 0.8167773585988266, - "grad_norm": 0.365234375, - "learning_rate": 1.9762658988553086e-05, - "loss": 0.9858, - "step": 28470 - }, - { - "epoch": 0.8169208038672844, - "grad_norm": 0.375, - "learning_rate": 1.973278424342987e-05, - "loss": 0.9686, - "step": 28475 - }, - { - "epoch": 0.8170642491357423, - "grad_norm": 0.427734375, - "learning_rate": 1.9702929623909903e-05, - "loss": 0.9096, - "step": 28480 - }, - { - "epoch": 0.8172076944042, - "grad_norm": 0.3671875, - "learning_rate": 1.967309513747868e-05, - "loss": 0.9723, - "step": 28485 - }, - { - "epoch": 0.8173511396726579, - "grad_norm": 0.375, - "learning_rate": 1.9643280791616702e-05, - "loss": 0.9686, - "step": 28490 - }, - { - "epoch": 0.8174945849411157, - "grad_norm": 0.39453125, - "learning_rate": 1.9613486593799335e-05, - "loss": 0.9727, - "step": 28495 - }, - { - "epoch": 0.8176380302095735, - "grad_norm": 0.373046875, - "learning_rate": 1.9583712551497036e-05, - "loss": 0.9015, - "step": 28500 - }, - { - "epoch": 0.8177814754780314, - "grad_norm": 0.3671875, - "learning_rate": 1.955395867217509e-05, - "loss": 0.9474, - "step": 28505 - }, - { - "epoch": 0.8179249207464891, - "grad_norm": 0.376953125, - "learning_rate": 1.9524224963293726e-05, - "loss": 0.9935, - "step": 28510 - }, - { - "epoch": 0.818068366014947, - "grad_norm": 0.3828125, - "learning_rate": 1.9494511432308216e-05, - "loss": 0.9341, - "step": 28515 - }, - { - "epoch": 0.8182118112834048, - "grad_norm": 0.365234375, - "learning_rate": 1.946481808666868e-05, - "loss": 0.9026, - "step": 28520 - }, - { - "epoch": 0.8183552565518626, - "grad_norm": 0.423828125, - "learning_rate": 1.9435144933820204e-05, - "loss": 1.0303, - "step": 28525 - }, - { - "epoch": 0.8184987018203205, - "grad_norm": 0.357421875, - "learning_rate": 1.9405491981202794e-05, - "loss": 0.9334, - "step": 28530 - }, - { - "epoch": 0.8186421470887782, - "grad_norm": 0.40625, - "learning_rate": 1.9375859236251458e-05, - "loss": 1.037, - "step": 28535 - }, - { - "epoch": 0.8187855923572361, - "grad_norm": 0.384765625, - "learning_rate": 1.9346246706396066e-05, - "loss": 1.0515, - "step": 28540 - }, - { - "epoch": 0.8189290376256939, - "grad_norm": 0.3828125, - "learning_rate": 1.9316654399061408e-05, - "loss": 0.9345, - "step": 28545 - }, - { - "epoch": 0.8190724828941517, - "grad_norm": 0.392578125, - "learning_rate": 1.9287082321667295e-05, - "loss": 0.8611, - "step": 28550 - }, - { - "epoch": 0.8192159281626096, - "grad_norm": 0.396484375, - "learning_rate": 1.9257530481628394e-05, - "loss": 0.9306, - "step": 28555 - }, - { - "epoch": 0.8193593734310674, - "grad_norm": 0.357421875, - "learning_rate": 1.9227998886354305e-05, - "loss": 0.9009, - "step": 28560 - }, - { - "epoch": 0.8195028186995252, - "grad_norm": 0.390625, - "learning_rate": 1.9198487543249545e-05, - "loss": 1.0455, - "step": 28565 - }, - { - "epoch": 0.819646263967983, - "grad_norm": 0.408203125, - "learning_rate": 1.9168996459713594e-05, - "loss": 0.9381, - "step": 28570 - }, - { - "epoch": 0.8197897092364408, - "grad_norm": 0.357421875, - "learning_rate": 1.913952564314081e-05, - "loss": 1.0304, - "step": 28575 - }, - { - "epoch": 0.8199331545048987, - "grad_norm": 0.392578125, - "learning_rate": 1.9110075100920466e-05, - "loss": 0.9733, - "step": 28580 - }, - { - "epoch": 0.8200765997733565, - "grad_norm": 0.388671875, - "learning_rate": 1.9080644840436822e-05, - "loss": 1.0059, - "step": 28585 - }, - { - "epoch": 0.8202200450418143, - "grad_norm": 0.384765625, - "learning_rate": 1.9051234869068978e-05, - "loss": 0.9811, - "step": 28590 - }, - { - "epoch": 0.8203634903102721, - "grad_norm": 0.35546875, - "learning_rate": 1.9021845194190968e-05, - "loss": 0.9695, - "step": 28595 - }, - { - "epoch": 0.8205069355787299, - "grad_norm": 0.40234375, - "learning_rate": 1.8992475823171717e-05, - "loss": 0.941, - "step": 28600 - }, - { - "epoch": 0.8206503808471878, - "grad_norm": 0.365234375, - "learning_rate": 1.8963126763375128e-05, - "loss": 0.9757, - "step": 28605 - }, - { - "epoch": 0.8207938261156456, - "grad_norm": 0.37890625, - "learning_rate": 1.8933798022159943e-05, - "loss": 0.9894, - "step": 28610 - }, - { - "epoch": 0.8209372713841034, - "grad_norm": 0.453125, - "learning_rate": 1.8904489606879826e-05, - "loss": 0.9678, - "step": 28615 - }, - { - "epoch": 0.8210807166525612, - "grad_norm": 0.39453125, - "learning_rate": 1.8875201524883347e-05, - "loss": 0.9588, - "step": 28620 - }, - { - "epoch": 0.821224161921019, - "grad_norm": 0.3671875, - "learning_rate": 1.8845933783514003e-05, - "loss": 0.9178, - "step": 28625 - }, - { - "epoch": 0.8213676071894769, - "grad_norm": 0.39453125, - "learning_rate": 1.8816686390110172e-05, - "loss": 1.0432, - "step": 28630 - }, - { - "epoch": 0.8215110524579347, - "grad_norm": 0.4296875, - "learning_rate": 1.8787459352005077e-05, - "loss": 0.9266, - "step": 28635 - }, - { - "epoch": 0.8216544977263925, - "grad_norm": 0.34375, - "learning_rate": 1.8758252676526945e-05, - "loss": 0.8182, - "step": 28640 - }, - { - "epoch": 0.8217979429948503, - "grad_norm": 0.392578125, - "learning_rate": 1.8729066370998825e-05, - "loss": 0.9049, - "step": 28645 - }, - { - "epoch": 0.8219413882633081, - "grad_norm": 0.484375, - "learning_rate": 1.869990044273867e-05, - "loss": 0.9304, - "step": 28650 - }, - { - "epoch": 0.822084833531766, - "grad_norm": 0.380859375, - "learning_rate": 1.8670754899059295e-05, - "loss": 0.8757, - "step": 28655 - }, - { - "epoch": 0.8222282788002238, - "grad_norm": 0.314453125, - "learning_rate": 1.8641629747268486e-05, - "loss": 0.8551, - "step": 28660 - }, - { - "epoch": 0.8223717240686816, - "grad_norm": 0.3828125, - "learning_rate": 1.8612524994668856e-05, - "loss": 0.9326, - "step": 28665 - }, - { - "epoch": 0.8225151693371394, - "grad_norm": 0.4375, - "learning_rate": 1.858344064855787e-05, - "loss": 0.8921, - "step": 28670 - }, - { - "epoch": 0.8226586146055972, - "grad_norm": 0.353515625, - "learning_rate": 1.8554376716227984e-05, - "loss": 0.9124, - "step": 28675 - }, - { - "epoch": 0.8228020598740551, - "grad_norm": 0.37109375, - "learning_rate": 1.8525333204966443e-05, - "loss": 0.9457, - "step": 28680 - }, - { - "epoch": 0.8229455051425129, - "grad_norm": 0.46484375, - "learning_rate": 1.8496310122055393e-05, - "loss": 1.0222, - "step": 28685 - }, - { - "epoch": 0.8230889504109707, - "grad_norm": 0.376953125, - "learning_rate": 1.8467307474771855e-05, - "loss": 0.9653, - "step": 28690 - }, - { - "epoch": 0.8232323956794285, - "grad_norm": 0.384765625, - "learning_rate": 1.8438325270387778e-05, - "loss": 1.0186, - "step": 28695 - }, - { - "epoch": 0.8233758409478863, - "grad_norm": 0.376953125, - "learning_rate": 1.8409363516169907e-05, - "loss": 0.9398, - "step": 28700 - }, - { - "epoch": 0.8235192862163442, - "grad_norm": 0.373046875, - "learning_rate": 1.83804222193799e-05, - "loss": 0.9538, - "step": 28705 - }, - { - "epoch": 0.823662731484802, - "grad_norm": 0.3671875, - "learning_rate": 1.8351501387274307e-05, - "loss": 0.9763, - "step": 28710 - }, - { - "epoch": 0.8238061767532598, - "grad_norm": 0.376953125, - "learning_rate": 1.83226010271045e-05, - "loss": 0.9205, - "step": 28715 - }, - { - "epoch": 0.8239496220217176, - "grad_norm": 0.435546875, - "learning_rate": 1.829372114611675e-05, - "loss": 0.8904, - "step": 28720 - }, - { - "epoch": 0.8240930672901754, - "grad_norm": 0.39453125, - "learning_rate": 1.8264861751552153e-05, - "loss": 1.0059, - "step": 28725 - }, - { - "epoch": 0.8242365125586333, - "grad_norm": 0.38671875, - "learning_rate": 1.823602285064675e-05, - "loss": 0.9582, - "step": 28730 - }, - { - "epoch": 0.8243799578270911, - "grad_norm": 0.431640625, - "learning_rate": 1.8207204450631365e-05, - "loss": 0.938, - "step": 28735 - }, - { - "epoch": 0.824523403095549, - "grad_norm": 0.376953125, - "learning_rate": 1.817840655873171e-05, - "loss": 0.8473, - "step": 28740 - }, - { - "epoch": 0.8246668483640067, - "grad_norm": 0.365234375, - "learning_rate": 1.814962918216835e-05, - "loss": 0.8842, - "step": 28745 - }, - { - "epoch": 0.8248102936324645, - "grad_norm": 0.37109375, - "learning_rate": 1.8120872328156713e-05, - "loss": 0.9356, - "step": 28750 - }, - { - "epoch": 0.8249537389009224, - "grad_norm": 0.37890625, - "learning_rate": 1.8092136003907056e-05, - "loss": 0.9066, - "step": 28755 - }, - { - "epoch": 0.8250971841693802, - "grad_norm": 0.3515625, - "learning_rate": 1.8063420216624548e-05, - "loss": 0.794, - "step": 28760 - }, - { - "epoch": 0.825240629437838, - "grad_norm": 0.38671875, - "learning_rate": 1.8034724973509164e-05, - "loss": 0.9007, - "step": 28765 - }, - { - "epoch": 0.8253840747062958, - "grad_norm": 0.39453125, - "learning_rate": 1.8006050281755725e-05, - "loss": 0.8826, - "step": 28770 - }, - { - "epoch": 0.8255275199747536, - "grad_norm": 0.375, - "learning_rate": 1.7977396148553916e-05, - "loss": 0.8561, - "step": 28775 - }, - { - "epoch": 0.8256709652432115, - "grad_norm": 0.369140625, - "learning_rate": 1.794876258108823e-05, - "loss": 0.9361, - "step": 28780 - }, - { - "epoch": 0.8258144105116693, - "grad_norm": 0.392578125, - "learning_rate": 1.7920149586538083e-05, - "loss": 1.0052, - "step": 28785 - }, - { - "epoch": 0.825957855780127, - "grad_norm": 0.3671875, - "learning_rate": 1.7891557172077666e-05, - "loss": 1.0239, - "step": 28790 - }, - { - "epoch": 0.8261013010485849, - "grad_norm": 0.365234375, - "learning_rate": 1.7862985344875994e-05, - "loss": 0.9538, - "step": 28795 - }, - { - "epoch": 0.8262447463170427, - "grad_norm": 0.390625, - "learning_rate": 1.783443411209701e-05, - "loss": 0.9529, - "step": 28800 - }, - { - "epoch": 0.8263881915855006, - "grad_norm": 0.361328125, - "learning_rate": 1.7805903480899412e-05, - "loss": 0.866, - "step": 28805 - }, - { - "epoch": 0.8265316368539584, - "grad_norm": 0.369140625, - "learning_rate": 1.7777393458436753e-05, - "loss": 0.9765, - "step": 28810 - }, - { - "epoch": 0.8266750821224161, - "grad_norm": 0.39453125, - "learning_rate": 1.7748904051857396e-05, - "loss": 0.9634, - "step": 28815 - }, - { - "epoch": 0.826818527390874, - "grad_norm": 0.45703125, - "learning_rate": 1.7720435268304625e-05, - "loss": 1.0601, - "step": 28820 - }, - { - "epoch": 0.8269619726593318, - "grad_norm": 0.37890625, - "learning_rate": 1.7691987114916462e-05, - "loss": 0.9319, - "step": 28825 - }, - { - "epoch": 0.8271054179277897, - "grad_norm": 0.357421875, - "learning_rate": 1.766355959882575e-05, - "loss": 0.9609, - "step": 28830 - }, - { - "epoch": 0.8272488631962475, - "grad_norm": 0.4375, - "learning_rate": 1.7635152727160254e-05, - "loss": 0.9548, - "step": 28835 - }, - { - "epoch": 0.8273923084647052, - "grad_norm": 0.45703125, - "learning_rate": 1.7606766507042473e-05, - "loss": 0.9778, - "step": 28840 - }, - { - "epoch": 0.8275357537331631, - "grad_norm": 0.40625, - "learning_rate": 1.7578400945589745e-05, - "loss": 0.9644, - "step": 28845 - }, - { - "epoch": 0.8276791990016209, - "grad_norm": 0.35546875, - "learning_rate": 1.755005604991423e-05, - "loss": 0.9887, - "step": 28850 - }, - { - "epoch": 0.8278226442700788, - "grad_norm": 0.46484375, - "learning_rate": 1.7521731827122967e-05, - "loss": 0.954, - "step": 28855 - }, - { - "epoch": 0.8279660895385366, - "grad_norm": 0.38671875, - "learning_rate": 1.7493428284317716e-05, - "loss": 0.8278, - "step": 28860 - }, - { - "epoch": 0.8281095348069943, - "grad_norm": 0.3828125, - "learning_rate": 1.746514542859512e-05, - "loss": 0.939, - "step": 28865 - }, - { - "epoch": 0.8282529800754522, - "grad_norm": 0.38671875, - "learning_rate": 1.7436883267046567e-05, - "loss": 0.94, - "step": 28870 - }, - { - "epoch": 0.82839642534391, - "grad_norm": 0.44140625, - "learning_rate": 1.7408641806758354e-05, - "loss": 1.0265, - "step": 28875 - }, - { - "epoch": 0.8285398706123679, - "grad_norm": 0.41015625, - "learning_rate": 1.7380421054811524e-05, - "loss": 1.0167, - "step": 28880 - }, - { - "epoch": 0.8286833158808257, - "grad_norm": 0.404296875, - "learning_rate": 1.73522210182819e-05, - "loss": 0.8998, - "step": 28885 - }, - { - "epoch": 0.8288267611492834, - "grad_norm": 0.33984375, - "learning_rate": 1.732404170424019e-05, - "loss": 0.8709, - "step": 28890 - }, - { - "epoch": 0.8289702064177413, - "grad_norm": 0.3984375, - "learning_rate": 1.7295883119751866e-05, - "loss": 0.9669, - "step": 28895 - }, - { - "epoch": 0.8291136516861991, - "grad_norm": 0.36328125, - "learning_rate": 1.7267745271877177e-05, - "loss": 1.0066, - "step": 28900 - }, - { - "epoch": 0.829257096954657, - "grad_norm": 0.384765625, - "learning_rate": 1.7239628167671195e-05, - "loss": 0.8343, - "step": 28905 - }, - { - "epoch": 0.8294005422231148, - "grad_norm": 0.373046875, - "learning_rate": 1.7211531814183825e-05, - "loss": 0.9051, - "step": 28910 - }, - { - "epoch": 0.8295439874915725, - "grad_norm": 0.37109375, - "learning_rate": 1.7183456218459703e-05, - "loss": 0.9628, - "step": 28915 - }, - { - "epoch": 0.8296874327600304, - "grad_norm": 0.34765625, - "learning_rate": 1.7155401387538327e-05, - "loss": 0.8932, - "step": 28920 - }, - { - "epoch": 0.8298308780284882, - "grad_norm": 0.37890625, - "learning_rate": 1.7127367328453937e-05, - "loss": 0.882, - "step": 28925 - }, - { - "epoch": 0.8299743232969461, - "grad_norm": 0.353515625, - "learning_rate": 1.709935404823555e-05, - "loss": 0.8351, - "step": 28930 - }, - { - "epoch": 0.8301177685654039, - "grad_norm": 0.376953125, - "learning_rate": 1.7071361553907073e-05, - "loss": 0.9404, - "step": 28935 - }, - { - "epoch": 0.8302612138338616, - "grad_norm": 0.390625, - "learning_rate": 1.70433898524871e-05, - "loss": 0.933, - "step": 28940 - }, - { - "epoch": 0.8304046591023195, - "grad_norm": 0.396484375, - "learning_rate": 1.701543895098906e-05, - "loss": 0.9161, - "step": 28945 - }, - { - "epoch": 0.8305481043707773, - "grad_norm": 0.412109375, - "learning_rate": 1.698750885642114e-05, - "loss": 0.9834, - "step": 28950 - }, - { - "epoch": 0.8306915496392352, - "grad_norm": 0.408203125, - "learning_rate": 1.69595995757863e-05, - "loss": 0.9987, - "step": 28955 - }, - { - "epoch": 0.830834994907693, - "grad_norm": 0.34765625, - "learning_rate": 1.693171111608237e-05, - "loss": 0.8715, - "step": 28960 - }, - { - "epoch": 0.8309784401761507, - "grad_norm": 0.375, - "learning_rate": 1.690384348430185e-05, - "loss": 0.9909, - "step": 28965 - }, - { - "epoch": 0.8311218854446086, - "grad_norm": 0.361328125, - "learning_rate": 1.6875996687432073e-05, - "loss": 1.0172, - "step": 28970 - }, - { - "epoch": 0.8312653307130664, - "grad_norm": 0.37890625, - "learning_rate": 1.6848170732455106e-05, - "loss": 0.9197, - "step": 28975 - }, - { - "epoch": 0.8314087759815243, - "grad_norm": 0.380859375, - "learning_rate": 1.6820365626347878e-05, - "loss": 0.9287, - "step": 28980 - }, - { - "epoch": 0.8315522212499821, - "grad_norm": 0.373046875, - "learning_rate": 1.6792581376082006e-05, - "loss": 0.8506, - "step": 28985 - }, - { - "epoch": 0.8316956665184398, - "grad_norm": 0.412109375, - "learning_rate": 1.676481798862387e-05, - "loss": 0.9278, - "step": 28990 - }, - { - "epoch": 0.8318391117868977, - "grad_norm": 0.3515625, - "learning_rate": 1.6737075470934728e-05, - "loss": 0.9064, - "step": 28995 - }, - { - "epoch": 0.8319825570553555, - "grad_norm": 0.4140625, - "learning_rate": 1.6709353829970496e-05, - "loss": 0.9135, - "step": 29000 - }, - { - "epoch": 0.8321260023238134, - "grad_norm": 0.392578125, - "learning_rate": 1.668165307268189e-05, - "loss": 0.9022, - "step": 29005 - }, - { - "epoch": 0.8322694475922712, - "grad_norm": 0.380859375, - "learning_rate": 1.665397320601436e-05, - "loss": 0.9055, - "step": 29010 - }, - { - "epoch": 0.832412892860729, - "grad_norm": 0.39453125, - "learning_rate": 1.6626314236908202e-05, - "loss": 0.8994, - "step": 29015 - }, - { - "epoch": 0.8325563381291868, - "grad_norm": 0.37890625, - "learning_rate": 1.659867617229841e-05, - "loss": 0.8838, - "step": 29020 - }, - { - "epoch": 0.8326997833976446, - "grad_norm": 0.3828125, - "learning_rate": 1.657105901911473e-05, - "loss": 0.9596, - "step": 29025 - }, - { - "epoch": 0.8328432286661025, - "grad_norm": 0.375, - "learning_rate": 1.654346278428166e-05, - "loss": 0.9369, - "step": 29030 - }, - { - "epoch": 0.8329866739345603, - "grad_norm": 0.35546875, - "learning_rate": 1.6515887474718528e-05, - "loss": 0.9251, - "step": 29035 - }, - { - "epoch": 0.833130119203018, - "grad_norm": 0.4453125, - "learning_rate": 1.6488333097339335e-05, - "loss": 0.9408, - "step": 29040 - }, - { - "epoch": 0.8332735644714759, - "grad_norm": 0.39453125, - "learning_rate": 1.6460799659052827e-05, - "loss": 0.8766, - "step": 29045 - }, - { - "epoch": 0.8334170097399337, - "grad_norm": 0.37890625, - "learning_rate": 1.64332871667626e-05, - "loss": 0.9029, - "step": 29050 - }, - { - "epoch": 0.8335604550083916, - "grad_norm": 0.388671875, - "learning_rate": 1.6405795627366883e-05, - "loss": 0.8993, - "step": 29055 - }, - { - "epoch": 0.8337039002768494, - "grad_norm": 0.376953125, - "learning_rate": 1.6378325047758723e-05, - "loss": 1.0164, - "step": 29060 - }, - { - "epoch": 0.8338473455453072, - "grad_norm": 0.375, - "learning_rate": 1.6350875434825852e-05, - "loss": 0.9062, - "step": 29065 - }, - { - "epoch": 0.833990790813765, - "grad_norm": 0.357421875, - "learning_rate": 1.6323446795450826e-05, - "loss": 0.9541, - "step": 29070 - }, - { - "epoch": 0.8341342360822228, - "grad_norm": 0.388671875, - "learning_rate": 1.6296039136510877e-05, - "loss": 1.0213, - "step": 29075 - }, - { - "epoch": 0.8342776813506807, - "grad_norm": 0.41015625, - "learning_rate": 1.6268652464877988e-05, - "loss": 1.1782, - "step": 29080 - }, - { - "epoch": 0.8344211266191385, - "grad_norm": 0.400390625, - "learning_rate": 1.624128678741891e-05, - "loss": 1.0622, - "step": 29085 - }, - { - "epoch": 0.8345645718875963, - "grad_norm": 0.37109375, - "learning_rate": 1.6213942110995105e-05, - "loss": 0.9764, - "step": 29090 - }, - { - "epoch": 0.8347080171560541, - "grad_norm": 0.435546875, - "learning_rate": 1.6186618442462765e-05, - "loss": 1.0742, - "step": 29095 - }, - { - "epoch": 0.8348514624245119, - "grad_norm": 0.369140625, - "learning_rate": 1.6159315788672825e-05, - "loss": 0.9503, - "step": 29100 - }, - { - "epoch": 0.8349949076929698, - "grad_norm": 0.4140625, - "learning_rate": 1.6132034156470933e-05, - "loss": 1.0027, - "step": 29105 - }, - { - "epoch": 0.8351383529614276, - "grad_norm": 0.37890625, - "learning_rate": 1.6104773552697517e-05, - "loss": 0.8851, - "step": 29110 - }, - { - "epoch": 0.8352817982298854, - "grad_norm": 0.36328125, - "learning_rate": 1.6077533984187677e-05, - "loss": 0.9731, - "step": 29115 - }, - { - "epoch": 0.8354252434983432, - "grad_norm": 0.345703125, - "learning_rate": 1.6050315457771257e-05, - "loss": 0.9496, - "step": 29120 - }, - { - "epoch": 0.835568688766801, - "grad_norm": 0.365234375, - "learning_rate": 1.6023117980272828e-05, - "loss": 0.942, - "step": 29125 - }, - { - "epoch": 0.8357121340352589, - "grad_norm": 0.375, - "learning_rate": 1.5995941558511695e-05, - "loss": 0.9172, - "step": 29130 - }, - { - "epoch": 0.8358555793037167, - "grad_norm": 0.33203125, - "learning_rate": 1.596878619930183e-05, - "loss": 0.9761, - "step": 29135 - }, - { - "epoch": 0.8359990245721745, - "grad_norm": 0.37890625, - "learning_rate": 1.5941651909452028e-05, - "loss": 1.0139, - "step": 29140 - }, - { - "epoch": 0.8361424698406323, - "grad_norm": 0.36328125, - "learning_rate": 1.5914538695765713e-05, - "loss": 1.0216, - "step": 29145 - }, - { - "epoch": 0.8362859151090901, - "grad_norm": 0.376953125, - "learning_rate": 1.5887446565041007e-05, - "loss": 0.9231, - "step": 29150 - }, - { - "epoch": 0.8364293603775479, - "grad_norm": 0.416015625, - "learning_rate": 1.5860375524070858e-05, - "loss": 1.0237, - "step": 29155 - }, - { - "epoch": 0.8365728056460058, - "grad_norm": 0.408203125, - "learning_rate": 1.583332557964282e-05, - "loss": 0.9714, - "step": 29160 - }, - { - "epoch": 0.8367162509144636, - "grad_norm": 0.46484375, - "learning_rate": 1.5806296738539218e-05, - "loss": 1.0082, - "step": 29165 - }, - { - "epoch": 0.8368596961829214, - "grad_norm": 0.408203125, - "learning_rate": 1.5779289007537e-05, - "loss": 0.8404, - "step": 29170 - }, - { - "epoch": 0.8370031414513792, - "grad_norm": 0.380859375, - "learning_rate": 1.575230239340796e-05, - "loss": 0.9658, - "step": 29175 - }, - { - "epoch": 0.837146586719837, - "grad_norm": 0.4140625, - "learning_rate": 1.5725336902918486e-05, - "loss": 0.9546, - "step": 29180 - }, - { - "epoch": 0.8372900319882949, - "grad_norm": 0.373046875, - "learning_rate": 1.569839254282971e-05, - "loss": 1.0306, - "step": 29185 - }, - { - "epoch": 0.8374334772567527, - "grad_norm": 0.36328125, - "learning_rate": 1.5671469319897425e-05, - "loss": 0.9245, - "step": 29190 - }, - { - "epoch": 0.8375769225252105, - "grad_norm": 0.37890625, - "learning_rate": 1.5644567240872222e-05, - "loss": 0.9616, - "step": 29195 - }, - { - "epoch": 0.8377203677936683, - "grad_norm": 0.45703125, - "learning_rate": 1.561768631249929e-05, - "loss": 0.973, - "step": 29200 - }, - { - "epoch": 0.8378638130621261, - "grad_norm": 0.447265625, - "learning_rate": 1.5590826541518545e-05, - "loss": 0.9385, - "step": 29205 - }, - { - "epoch": 0.838007258330584, - "grad_norm": 0.5546875, - "learning_rate": 1.5563987934664624e-05, - "loss": 0.9202, - "step": 29210 - }, - { - "epoch": 0.8381507035990418, - "grad_norm": 0.400390625, - "learning_rate": 1.553717049866685e-05, - "loss": 0.9887, - "step": 29215 - }, - { - "epoch": 0.8382941488674996, - "grad_norm": 0.44921875, - "learning_rate": 1.5510374240249205e-05, - "loss": 0.9419, - "step": 29220 - }, - { - "epoch": 0.8384375941359574, - "grad_norm": 0.37109375, - "learning_rate": 1.548359916613037e-05, - "loss": 1.0199, - "step": 29225 - }, - { - "epoch": 0.8385810394044152, - "grad_norm": 0.373046875, - "learning_rate": 1.5456845283023758e-05, - "loss": 0.9093, - "step": 29230 - }, - { - "epoch": 0.8387244846728731, - "grad_norm": 0.3671875, - "learning_rate": 1.5430112597637438e-05, - "loss": 0.8122, - "step": 29235 - }, - { - "epoch": 0.8388679299413309, - "grad_norm": 0.36328125, - "learning_rate": 1.540340111667413e-05, - "loss": 0.9593, - "step": 29240 - }, - { - "epoch": 0.8390113752097887, - "grad_norm": 0.416015625, - "learning_rate": 1.537671084683131e-05, - "loss": 1.0537, - "step": 29245 - }, - { - "epoch": 0.8391548204782465, - "grad_norm": 0.40234375, - "learning_rate": 1.5350041794801097e-05, - "loss": 0.8419, - "step": 29250 - }, - { - "epoch": 0.8392982657467043, - "grad_norm": 0.43359375, - "learning_rate": 1.532339396727026e-05, - "loss": 1.0935, - "step": 29255 - }, - { - "epoch": 0.8394417110151622, - "grad_norm": 0.390625, - "learning_rate": 1.5296767370920273e-05, - "loss": 0.8692, - "step": 29260 - }, - { - "epoch": 0.83958515628362, - "grad_norm": 0.361328125, - "learning_rate": 1.5270162012427336e-05, - "loss": 0.9172, - "step": 29265 - }, - { - "epoch": 0.8397286015520778, - "grad_norm": 0.38671875, - "learning_rate": 1.5243577898462246e-05, - "loss": 0.9387, - "step": 29270 - }, - { - "epoch": 0.8398720468205356, - "grad_norm": 0.43359375, - "learning_rate": 1.5217015035690507e-05, - "loss": 1.0505, - "step": 29275 - }, - { - "epoch": 0.8400154920889934, - "grad_norm": 0.41796875, - "learning_rate": 1.5190473430772289e-05, - "loss": 1.0287, - "step": 29280 - }, - { - "epoch": 0.8401589373574513, - "grad_norm": 0.369140625, - "learning_rate": 1.516395309036246e-05, - "loss": 0.9168, - "step": 29285 - }, - { - "epoch": 0.8403023826259091, - "grad_norm": 0.404296875, - "learning_rate": 1.5137454021110508e-05, - "loss": 0.95, - "step": 29290 - }, - { - "epoch": 0.840445827894367, - "grad_norm": 0.376953125, - "learning_rate": 1.5110976229660623e-05, - "loss": 0.8914, - "step": 29295 - }, - { - "epoch": 0.8405892731628247, - "grad_norm": 0.3671875, - "learning_rate": 1.5084519722651658e-05, - "loss": 0.9107, - "step": 29300 - }, - { - "epoch": 0.8407327184312825, - "grad_norm": 0.388671875, - "learning_rate": 1.5058084506717097e-05, - "loss": 0.9156, - "step": 29305 - }, - { - "epoch": 0.8408761636997404, - "grad_norm": 0.39453125, - "learning_rate": 1.5031670588485103e-05, - "loss": 0.9105, - "step": 29310 - }, - { - "epoch": 0.8410196089681982, - "grad_norm": 0.361328125, - "learning_rate": 1.5005277974578547e-05, - "loss": 0.9538, - "step": 29315 - }, - { - "epoch": 0.841163054236656, - "grad_norm": 0.421875, - "learning_rate": 1.497890667161489e-05, - "loss": 1.0089, - "step": 29320 - }, - { - "epoch": 0.8413064995051138, - "grad_norm": 0.359375, - "learning_rate": 1.4952556686206276e-05, - "loss": 0.8792, - "step": 29325 - }, - { - "epoch": 0.8414499447735716, - "grad_norm": 0.357421875, - "learning_rate": 1.4926228024959487e-05, - "loss": 0.8608, - "step": 29330 - }, - { - "epoch": 0.8415933900420295, - "grad_norm": 0.416015625, - "learning_rate": 1.489992069447601e-05, - "loss": 0.8733, - "step": 29335 - }, - { - "epoch": 0.8417368353104873, - "grad_norm": 0.357421875, - "learning_rate": 1.4873634701351946e-05, - "loss": 0.912, - "step": 29340 - }, - { - "epoch": 0.8418802805789452, - "grad_norm": 0.37890625, - "learning_rate": 1.4847370052178023e-05, - "loss": 0.9717, - "step": 29345 - }, - { - "epoch": 0.8420237258474029, - "grad_norm": 0.404296875, - "learning_rate": 1.4821126753539638e-05, - "loss": 1.0057, - "step": 29350 - }, - { - "epoch": 0.8421671711158607, - "grad_norm": 0.369140625, - "learning_rate": 1.4794904812016885e-05, - "loss": 1.04, - "step": 29355 - }, - { - "epoch": 0.8423106163843186, - "grad_norm": 0.435546875, - "learning_rate": 1.4768704234184428e-05, - "loss": 0.9254, - "step": 29360 - }, - { - "epoch": 0.8424540616527764, - "grad_norm": 0.341796875, - "learning_rate": 1.4742525026611576e-05, - "loss": 0.9826, - "step": 29365 - }, - { - "epoch": 0.8425975069212343, - "grad_norm": 0.3828125, - "learning_rate": 1.471636719586238e-05, - "loss": 1.0197, - "step": 29370 - }, - { - "epoch": 0.842740952189692, - "grad_norm": 0.375, - "learning_rate": 1.4690230748495403e-05, - "loss": 0.898, - "step": 29375 - }, - { - "epoch": 0.8428843974581498, - "grad_norm": 0.392578125, - "learning_rate": 1.466411569106393e-05, - "loss": 0.9347, - "step": 29380 - }, - { - "epoch": 0.8430278427266077, - "grad_norm": 0.361328125, - "learning_rate": 1.4638022030115817e-05, - "loss": 0.9201, - "step": 29385 - }, - { - "epoch": 0.8431712879950655, - "grad_norm": 0.400390625, - "learning_rate": 1.4611949772193657e-05, - "loss": 0.8679, - "step": 29390 - }, - { - "epoch": 0.8433147332635234, - "grad_norm": 0.37890625, - "learning_rate": 1.4585898923834563e-05, - "loss": 0.8799, - "step": 29395 - }, - { - "epoch": 0.8434581785319811, - "grad_norm": 0.43359375, - "learning_rate": 1.4559869491570332e-05, - "loss": 0.8813, - "step": 29400 - }, - { - "epoch": 0.8436016238004389, - "grad_norm": 0.365234375, - "learning_rate": 1.4533861481927425e-05, - "loss": 0.939, - "step": 29405 - }, - { - "epoch": 0.8437450690688968, - "grad_norm": 0.35546875, - "learning_rate": 1.4507874901426877e-05, - "loss": 1.0499, - "step": 29410 - }, - { - "epoch": 0.8438885143373546, - "grad_norm": 0.421875, - "learning_rate": 1.4481909756584366e-05, - "loss": 0.9066, - "step": 29415 - }, - { - "epoch": 0.8440319596058125, - "grad_norm": 0.3984375, - "learning_rate": 1.4455966053910187e-05, - "loss": 0.9337, - "step": 29420 - }, - { - "epoch": 0.8441754048742702, - "grad_norm": 0.48046875, - "learning_rate": 1.44300437999093e-05, - "loss": 0.91, - "step": 29425 - }, - { - "epoch": 0.844318850142728, - "grad_norm": 0.3984375, - "learning_rate": 1.4404143001081238e-05, - "loss": 0.9689, - "step": 29430 - }, - { - "epoch": 0.8444622954111859, - "grad_norm": 0.392578125, - "learning_rate": 1.4378263663920155e-05, - "loss": 0.927, - "step": 29435 - }, - { - "epoch": 0.8446057406796437, - "grad_norm": 0.365234375, - "learning_rate": 1.4352405794914892e-05, - "loss": 0.9534, - "step": 29440 - }, - { - "epoch": 0.8447491859481016, - "grad_norm": 0.390625, - "learning_rate": 1.432656940054884e-05, - "loss": 0.9543, - "step": 29445 - }, - { - "epoch": 0.8448926312165593, - "grad_norm": 0.396484375, - "learning_rate": 1.4300754487300006e-05, - "loss": 0.962, - "step": 29450 - }, - { - "epoch": 0.8450360764850171, - "grad_norm": 0.380859375, - "learning_rate": 1.4274961061641023e-05, - "loss": 0.8929, - "step": 29455 - }, - { - "epoch": 0.845179521753475, - "grad_norm": 0.396484375, - "learning_rate": 1.4249189130039175e-05, - "loss": 0.9598, - "step": 29460 - }, - { - "epoch": 0.8453229670219328, - "grad_norm": 0.376953125, - "learning_rate": 1.4223438698956315e-05, - "loss": 1.0082, - "step": 29465 - }, - { - "epoch": 0.8454664122903907, - "grad_norm": 0.359375, - "learning_rate": 1.419770977484891e-05, - "loss": 0.9781, - "step": 29470 - }, - { - "epoch": 0.8456098575588484, - "grad_norm": 0.37109375, - "learning_rate": 1.4172002364168024e-05, - "loss": 0.9265, - "step": 29475 - }, - { - "epoch": 0.8457533028273062, - "grad_norm": 0.3828125, - "learning_rate": 1.4146316473359366e-05, - "loss": 0.9126, - "step": 29480 - }, - { - "epoch": 0.8458967480957641, - "grad_norm": 0.3828125, - "learning_rate": 1.4120652108863175e-05, - "loss": 1.0379, - "step": 29485 - }, - { - "epoch": 0.8460401933642219, - "grad_norm": 0.357421875, - "learning_rate": 1.4095009277114412e-05, - "loss": 0.9061, - "step": 29490 - }, - { - "epoch": 0.8461836386326796, - "grad_norm": 0.37890625, - "learning_rate": 1.4069387984542526e-05, - "loss": 0.8831, - "step": 29495 - }, - { - "epoch": 0.8463270839011375, - "grad_norm": 0.396484375, - "learning_rate": 1.4043788237571632e-05, - "loss": 0.9444, - "step": 29500 - }, - { - "epoch": 0.8464705291695953, - "grad_norm": 0.38671875, - "learning_rate": 1.4018210042620394e-05, - "loss": 0.8754, - "step": 29505 - }, - { - "epoch": 0.8466139744380532, - "grad_norm": 0.43359375, - "learning_rate": 1.3992653406102097e-05, - "loss": 1.0764, - "step": 29510 - }, - { - "epoch": 0.846757419706511, - "grad_norm": 0.42578125, - "learning_rate": 1.3967118334424655e-05, - "loss": 1.0099, - "step": 29515 - }, - { - "epoch": 0.8469008649749687, - "grad_norm": 0.3984375, - "learning_rate": 1.3941604833990528e-05, - "loss": 0.995, - "step": 29520 - }, - { - "epoch": 0.8470443102434266, - "grad_norm": 0.498046875, - "learning_rate": 1.3916112911196743e-05, - "loss": 0.8945, - "step": 29525 - }, - { - "epoch": 0.8471877555118844, - "grad_norm": 0.38671875, - "learning_rate": 1.389064257243502e-05, - "loss": 0.9073, - "step": 29530 - }, - { - "epoch": 0.8473312007803423, - "grad_norm": 0.36328125, - "learning_rate": 1.386519382409156e-05, - "loss": 0.9435, - "step": 29535 - }, - { - "epoch": 0.8474746460488001, - "grad_norm": 0.416015625, - "learning_rate": 1.3839766672547206e-05, - "loss": 0.904, - "step": 29540 - }, - { - "epoch": 0.8476180913172578, - "grad_norm": 0.3671875, - "learning_rate": 1.3814361124177333e-05, - "loss": 0.9455, - "step": 29545 - }, - { - "epoch": 0.8477615365857157, - "grad_norm": 0.396484375, - "learning_rate": 1.3788977185352003e-05, - "loss": 0.9046, - "step": 29550 - }, - { - "epoch": 0.8479049818541735, - "grad_norm": 0.4609375, - "learning_rate": 1.376361486243577e-05, - "loss": 0.9382, - "step": 29555 - }, - { - "epoch": 0.8480484271226314, - "grad_norm": 0.380859375, - "learning_rate": 1.3738274161787768e-05, - "loss": 0.8988, - "step": 29560 - }, - { - "epoch": 0.8481918723910892, - "grad_norm": 0.341796875, - "learning_rate": 1.371295508976177e-05, - "loss": 0.9719, - "step": 29565 - }, - { - "epoch": 0.848335317659547, - "grad_norm": 0.373046875, - "learning_rate": 1.3687657652706076e-05, - "loss": 0.9104, - "step": 29570 - }, - { - "epoch": 0.8484787629280048, - "grad_norm": 0.3828125, - "learning_rate": 1.3662381856963592e-05, - "loss": 0.9358, - "step": 29575 - }, - { - "epoch": 0.8486222081964626, - "grad_norm": 0.330078125, - "learning_rate": 1.3637127708871734e-05, - "loss": 0.9988, - "step": 29580 - }, - { - "epoch": 0.8487656534649205, - "grad_norm": 0.380859375, - "learning_rate": 1.3611895214762604e-05, - "loss": 0.8868, - "step": 29585 - }, - { - "epoch": 0.8489090987333783, - "grad_norm": 0.376953125, - "learning_rate": 1.3586684380962778e-05, - "loss": 1.0221, - "step": 29590 - }, - { - "epoch": 0.849052544001836, - "grad_norm": 0.609375, - "learning_rate": 1.3561495213793428e-05, - "loss": 0.9246, - "step": 29595 - }, - { - "epoch": 0.8491959892702939, - "grad_norm": 0.373046875, - "learning_rate": 1.3536327719570286e-05, - "loss": 0.882, - "step": 29600 - }, - { - "epoch": 0.8493394345387517, - "grad_norm": 0.40625, - "learning_rate": 1.351118190460371e-05, - "loss": 0.9343, - "step": 29605 - }, - { - "epoch": 0.8494828798072096, - "grad_norm": 0.361328125, - "learning_rate": 1.3486057775198535e-05, - "loss": 0.9867, - "step": 29610 - }, - { - "epoch": 0.8496263250756674, - "grad_norm": 0.359375, - "learning_rate": 1.3460955337654191e-05, - "loss": 0.9261, - "step": 29615 - }, - { - "epoch": 0.8497697703441252, - "grad_norm": 0.44140625, - "learning_rate": 1.3435874598264709e-05, - "loss": 1.136, - "step": 29620 - }, - { - "epoch": 0.849913215612583, - "grad_norm": 0.388671875, - "learning_rate": 1.3410815563318624e-05, - "loss": 0.9087, - "step": 29625 - }, - { - "epoch": 0.8500566608810408, - "grad_norm": 0.38671875, - "learning_rate": 1.3385778239099067e-05, - "loss": 0.9299, - "step": 29630 - }, - { - "epoch": 0.8502001061494987, - "grad_norm": 0.369140625, - "learning_rate": 1.3360762631883672e-05, - "loss": 0.8875, - "step": 29635 - }, - { - "epoch": 0.8503435514179565, - "grad_norm": 0.373046875, - "learning_rate": 1.3335768747944722e-05, - "loss": 0.9232, - "step": 29640 - }, - { - "epoch": 0.8504869966864143, - "grad_norm": 0.349609375, - "learning_rate": 1.3310796593548958e-05, - "loss": 0.9316, - "step": 29645 - }, - { - "epoch": 0.8506304419548721, - "grad_norm": 0.40625, - "learning_rate": 1.3285846174957728e-05, - "loss": 0.9916, - "step": 29650 - }, - { - "epoch": 0.8507738872233299, - "grad_norm": 0.412109375, - "learning_rate": 1.3260917498426917e-05, - "loss": 0.9583, - "step": 29655 - }, - { - "epoch": 0.8509173324917878, - "grad_norm": 0.36328125, - "learning_rate": 1.3236010570206914e-05, - "loss": 0.8604, - "step": 29660 - }, - { - "epoch": 0.8510607777602456, - "grad_norm": 0.36328125, - "learning_rate": 1.3211125396542757e-05, - "loss": 0.9474, - "step": 29665 - }, - { - "epoch": 0.8512042230287034, - "grad_norm": 0.3828125, - "learning_rate": 1.3186261983673942e-05, - "loss": 0.9028, - "step": 29670 - }, - { - "epoch": 0.8513476682971612, - "grad_norm": 0.37890625, - "learning_rate": 1.316142033783454e-05, - "loss": 0.9222, - "step": 29675 - }, - { - "epoch": 0.851491113565619, - "grad_norm": 0.365234375, - "learning_rate": 1.3136600465253147e-05, - "loss": 0.9282, - "step": 29680 - }, - { - "epoch": 0.8516345588340769, - "grad_norm": 0.400390625, - "learning_rate": 1.3111802372152903e-05, - "loss": 0.9674, - "step": 29685 - }, - { - "epoch": 0.8517780041025347, - "grad_norm": 0.392578125, - "learning_rate": 1.308702606475154e-05, - "loss": 0.9895, - "step": 29690 - }, - { - "epoch": 0.8519214493709925, - "grad_norm": 0.357421875, - "learning_rate": 1.3062271549261252e-05, - "loss": 1.0244, - "step": 29695 - }, - { - "epoch": 0.8520648946394503, - "grad_norm": 0.369140625, - "learning_rate": 1.3037538831888819e-05, - "loss": 0.8825, - "step": 29700 - }, - { - "epoch": 0.8522083399079081, - "grad_norm": 0.380859375, - "learning_rate": 1.3012827918835502e-05, - "loss": 0.9126, - "step": 29705 - }, - { - "epoch": 0.852351785176366, - "grad_norm": 0.357421875, - "learning_rate": 1.298813881629718e-05, - "loss": 0.8667, - "step": 29710 - }, - { - "epoch": 0.8524952304448238, - "grad_norm": 0.3828125, - "learning_rate": 1.2963471530464188e-05, - "loss": 0.8487, - "step": 29715 - }, - { - "epoch": 0.8526386757132816, - "grad_norm": 0.404296875, - "learning_rate": 1.2938826067521404e-05, - "loss": 0.8834, - "step": 29720 - }, - { - "epoch": 0.8527821209817394, - "grad_norm": 0.36328125, - "learning_rate": 1.2914202433648282e-05, - "loss": 1.0662, - "step": 29725 - }, - { - "epoch": 0.8529255662501972, - "grad_norm": 0.365234375, - "learning_rate": 1.2889600635018762e-05, - "loss": 0.9744, - "step": 29730 - }, - { - "epoch": 0.8530690115186551, - "grad_norm": 0.37109375, - "learning_rate": 1.2865020677801298e-05, - "loss": 0.8703, - "step": 29735 - }, - { - "epoch": 0.8532124567871129, - "grad_norm": 0.474609375, - "learning_rate": 1.2840462568158874e-05, - "loss": 0.9126, - "step": 29740 - }, - { - "epoch": 0.8533559020555707, - "grad_norm": 0.435546875, - "learning_rate": 1.2815926312249038e-05, - "loss": 0.863, - "step": 29745 - }, - { - "epoch": 0.8534993473240285, - "grad_norm": 0.388671875, - "learning_rate": 1.2791411916223827e-05, - "loss": 0.9867, - "step": 29750 - }, - { - "epoch": 0.8536427925924863, - "grad_norm": 0.3984375, - "learning_rate": 1.2766919386229782e-05, - "loss": 0.9221, - "step": 29755 - }, - { - "epoch": 0.8537862378609442, - "grad_norm": 0.427734375, - "learning_rate": 1.2742448728407963e-05, - "loss": 0.9308, - "step": 29760 - }, - { - "epoch": 0.853929683129402, - "grad_norm": 0.375, - "learning_rate": 1.2717999948893999e-05, - "loss": 0.9022, - "step": 29765 - }, - { - "epoch": 0.8540731283978598, - "grad_norm": 0.376953125, - "learning_rate": 1.2693573053817976e-05, - "loss": 0.8887, - "step": 29770 - }, - { - "epoch": 0.8542165736663176, - "grad_norm": 0.34765625, - "learning_rate": 1.2669168049304481e-05, - "loss": 0.8422, - "step": 29775 - }, - { - "epoch": 0.8543600189347754, - "grad_norm": 0.376953125, - "learning_rate": 1.2644784941472699e-05, - "loss": 1.0321, - "step": 29780 - }, - { - "epoch": 0.8545034642032333, - "grad_norm": 0.400390625, - "learning_rate": 1.2620423736436248e-05, - "loss": 0.9208, - "step": 29785 - }, - { - "epoch": 0.8546469094716911, - "grad_norm": 0.3671875, - "learning_rate": 1.2596084440303258e-05, - "loss": 0.92, - "step": 29790 - }, - { - "epoch": 0.8547903547401489, - "grad_norm": 0.39453125, - "learning_rate": 1.2571767059176377e-05, - "loss": 0.9925, - "step": 29795 - }, - { - "epoch": 0.8549338000086067, - "grad_norm": 0.376953125, - "learning_rate": 1.2547471599152804e-05, - "loss": 0.9484, - "step": 29800 - }, - { - "epoch": 0.8550772452770645, - "grad_norm": 0.408203125, - "learning_rate": 1.2523198066324183e-05, - "loss": 1.0493, - "step": 29805 - }, - { - "epoch": 0.8552206905455224, - "grad_norm": 0.361328125, - "learning_rate": 1.2498946466776639e-05, - "loss": 0.9351, - "step": 29810 - }, - { - "epoch": 0.8553641358139802, - "grad_norm": 0.373046875, - "learning_rate": 1.2474716806590903e-05, - "loss": 0.9521, - "step": 29815 - }, - { - "epoch": 0.855507581082438, - "grad_norm": 0.48046875, - "learning_rate": 1.24505090918421e-05, - "loss": 1.0035, - "step": 29820 - }, - { - "epoch": 0.8556510263508958, - "grad_norm": 0.384765625, - "learning_rate": 1.242632332859991e-05, - "loss": 0.9756, - "step": 29825 - }, - { - "epoch": 0.8557944716193536, - "grad_norm": 0.37890625, - "learning_rate": 1.240215952292847e-05, - "loss": 1.025, - "step": 29830 - }, - { - "epoch": 0.8559379168878114, - "grad_norm": 0.412109375, - "learning_rate": 1.2378017680886422e-05, - "loss": 0.9738, - "step": 29835 - }, - { - "epoch": 0.8560813621562693, - "grad_norm": 0.357421875, - "learning_rate": 1.235389780852696e-05, - "loss": 0.9334, - "step": 29840 - }, - { - "epoch": 0.8562248074247271, - "grad_norm": 0.39453125, - "learning_rate": 1.23297999118977e-05, - "loss": 0.9059, - "step": 29845 - }, - { - "epoch": 0.856368252693185, - "grad_norm": 0.365234375, - "learning_rate": 1.2305723997040752e-05, - "loss": 1.007, - "step": 29850 - }, - { - "epoch": 0.8565116979616427, - "grad_norm": 0.390625, - "learning_rate": 1.2281670069992746e-05, - "loss": 0.9423, - "step": 29855 - }, - { - "epoch": 0.8566551432301005, - "grad_norm": 0.359375, - "learning_rate": 1.2257638136784777e-05, - "loss": 0.9334, - "step": 29860 - }, - { - "epoch": 0.8567985884985584, - "grad_norm": 0.34375, - "learning_rate": 1.2233628203442415e-05, - "loss": 0.9379, - "step": 29865 - }, - { - "epoch": 0.8569420337670162, - "grad_norm": 0.361328125, - "learning_rate": 1.2209640275985779e-05, - "loss": 0.9479, - "step": 29870 - }, - { - "epoch": 0.857085479035474, - "grad_norm": 0.3984375, - "learning_rate": 1.21856743604294e-05, - "loss": 0.8971, - "step": 29875 - }, - { - "epoch": 0.8572289243039318, - "grad_norm": 0.357421875, - "learning_rate": 1.2161730462782283e-05, - "loss": 0.9576, - "step": 29880 - }, - { - "epoch": 0.8573723695723896, - "grad_norm": 0.408203125, - "learning_rate": 1.2137808589047994e-05, - "loss": 1.0335, - "step": 29885 - }, - { - "epoch": 0.8575158148408475, - "grad_norm": 0.419921875, - "learning_rate": 1.211390874522449e-05, - "loss": 1.0143, - "step": 29890 - }, - { - "epoch": 0.8576592601093053, - "grad_norm": 0.4375, - "learning_rate": 1.2090030937304264e-05, - "loss": 1.0378, - "step": 29895 - }, - { - "epoch": 0.8578027053777632, - "grad_norm": 0.419921875, - "learning_rate": 1.2066175171274219e-05, - "loss": 0.8729, - "step": 29900 - }, - { - "epoch": 0.8579461506462209, - "grad_norm": 0.40234375, - "learning_rate": 1.2042341453115813e-05, - "loss": 0.894, - "step": 29905 - }, - { - "epoch": 0.8580895959146787, - "grad_norm": 0.31640625, - "learning_rate": 1.2018529788804932e-05, - "loss": 0.8629, - "step": 29910 - }, - { - "epoch": 0.8582330411831366, - "grad_norm": 0.357421875, - "learning_rate": 1.1994740184311915e-05, - "loss": 0.9093, - "step": 29915 - }, - { - "epoch": 0.8583764864515944, - "grad_norm": 0.37890625, - "learning_rate": 1.1970972645601587e-05, - "loss": 0.9624, - "step": 29920 - }, - { - "epoch": 0.8585199317200523, - "grad_norm": 0.421875, - "learning_rate": 1.1947227178633269e-05, - "loss": 1.0348, - "step": 29925 - }, - { - "epoch": 0.85866337698851, - "grad_norm": 0.333984375, - "learning_rate": 1.1923503789360712e-05, - "loss": 0.8823, - "step": 29930 - }, - { - "epoch": 0.8588068222569678, - "grad_norm": 0.408203125, - "learning_rate": 1.1899802483732115e-05, - "loss": 0.9271, - "step": 29935 - }, - { - "epoch": 0.8589502675254257, - "grad_norm": 0.359375, - "learning_rate": 1.1876123267690209e-05, - "loss": 0.8938, - "step": 29940 - }, - { - "epoch": 0.8590937127938835, - "grad_norm": 0.3828125, - "learning_rate": 1.1852466147172126e-05, - "loss": 1.0917, - "step": 29945 - }, - { - "epoch": 0.8592371580623414, - "grad_norm": 0.37890625, - "learning_rate": 1.1828831128109475e-05, - "loss": 0.907, - "step": 29950 - }, - { - "epoch": 0.8593806033307991, - "grad_norm": 0.404296875, - "learning_rate": 1.1805218216428305e-05, - "loss": 0.9071, - "step": 29955 - }, - { - "epoch": 0.8595240485992569, - "grad_norm": 0.416015625, - "learning_rate": 1.1781627418049179e-05, - "loss": 1.0044, - "step": 29960 - }, - { - "epoch": 0.8596674938677148, - "grad_norm": 0.40234375, - "learning_rate": 1.1758058738887067e-05, - "loss": 0.8673, - "step": 29965 - }, - { - "epoch": 0.8598109391361726, - "grad_norm": 0.357421875, - "learning_rate": 1.1734512184851377e-05, - "loss": 1.0099, - "step": 29970 - }, - { - "epoch": 0.8599543844046305, - "grad_norm": 0.369140625, - "learning_rate": 1.1710987761846027e-05, - "loss": 0.9165, - "step": 29975 - }, - { - "epoch": 0.8600978296730882, - "grad_norm": 0.359375, - "learning_rate": 1.1687485475769343e-05, - "loss": 0.9149, - "step": 29980 - }, - { - "epoch": 0.860241274941546, - "grad_norm": 0.419921875, - "learning_rate": 1.1664005332514128e-05, - "loss": 1.0156, - "step": 29985 - }, - { - "epoch": 0.8603847202100039, - "grad_norm": 0.361328125, - "learning_rate": 1.1640547337967577e-05, - "loss": 0.9695, - "step": 29990 - }, - { - "epoch": 0.8605281654784617, - "grad_norm": 0.373046875, - "learning_rate": 1.1617111498011413e-05, - "loss": 0.9319, - "step": 29995 - }, - { - "epoch": 0.8606716107469196, - "grad_norm": 0.37109375, - "learning_rate": 1.1593697818521765e-05, - "loss": 0.863, - "step": 30000 - }, - { - "epoch": 0.8608150560153773, - "grad_norm": 0.412109375, - "learning_rate": 1.1570306305369182e-05, - "loss": 1.0045, - "step": 30005 - }, - { - "epoch": 0.8609585012838351, - "grad_norm": 0.416015625, - "learning_rate": 1.1546936964418664e-05, - "loss": 0.9757, - "step": 30010 - }, - { - "epoch": 0.861101946552293, - "grad_norm": 0.419921875, - "learning_rate": 1.1523589801529711e-05, - "loss": 0.946, - "step": 30015 - }, - { - "epoch": 0.8612453918207508, - "grad_norm": 0.330078125, - "learning_rate": 1.1500264822556194e-05, - "loss": 0.8666, - "step": 30020 - }, - { - "epoch": 0.8613888370892087, - "grad_norm": 0.375, - "learning_rate": 1.1476962033346438e-05, - "loss": 0.9348, - "step": 30025 - }, - { - "epoch": 0.8615322823576664, - "grad_norm": 0.35546875, - "learning_rate": 1.1453681439743224e-05, - "loss": 0.9222, - "step": 30030 - }, - { - "epoch": 0.8616757276261242, - "grad_norm": 0.400390625, - "learning_rate": 1.1430423047583739e-05, - "loss": 0.9351, - "step": 30035 - }, - { - "epoch": 0.8618191728945821, - "grad_norm": 0.36328125, - "learning_rate": 1.1407186862699614e-05, - "loss": 0.8778, - "step": 30040 - }, - { - "epoch": 0.8619626181630399, - "grad_norm": 0.423828125, - "learning_rate": 1.1383972890916938e-05, - "loss": 1.0283, - "step": 30045 - }, - { - "epoch": 0.8621060634314978, - "grad_norm": 0.404296875, - "learning_rate": 1.1360781138056209e-05, - "loss": 0.9126, - "step": 30050 - }, - { - "epoch": 0.8622495086999555, - "grad_norm": 0.435546875, - "learning_rate": 1.1337611609932342e-05, - "loss": 1.1537, - "step": 30055 - }, - { - "epoch": 0.8623929539684133, - "grad_norm": 0.34375, - "learning_rate": 1.1314464312354678e-05, - "loss": 0.8767, - "step": 30060 - }, - { - "epoch": 0.8625363992368712, - "grad_norm": 0.3671875, - "learning_rate": 1.129133925112703e-05, - "loss": 0.8924, - "step": 30065 - }, - { - "epoch": 0.862679844505329, - "grad_norm": 0.400390625, - "learning_rate": 1.1268236432047596e-05, - "loss": 0.988, - "step": 30070 - }, - { - "epoch": 0.8628232897737869, - "grad_norm": 0.373046875, - "learning_rate": 1.1245155860908984e-05, - "loss": 0.907, - "step": 30075 - }, - { - "epoch": 0.8629667350422446, - "grad_norm": 0.390625, - "learning_rate": 1.1222097543498244e-05, - "loss": 1.0739, - "step": 30080 - }, - { - "epoch": 0.8631101803107024, - "grad_norm": 0.3671875, - "learning_rate": 1.1199061485596885e-05, - "loss": 0.903, - "step": 30085 - }, - { - "epoch": 0.8632536255791603, - "grad_norm": 0.40625, - "learning_rate": 1.1176047692980773e-05, - "loss": 0.9662, - "step": 30090 - }, - { - "epoch": 0.8633970708476181, - "grad_norm": 0.369140625, - "learning_rate": 1.1153056171420185e-05, - "loss": 0.9879, - "step": 30095 - }, - { - "epoch": 0.863540516116076, - "grad_norm": 0.427734375, - "learning_rate": 1.1130086926679894e-05, - "loss": 0.9508, - "step": 30100 - }, - { - "epoch": 0.8636839613845337, - "grad_norm": 0.3984375, - "learning_rate": 1.1107139964519008e-05, - "loss": 0.9732, - "step": 30105 - }, - { - "epoch": 0.8638274066529915, - "grad_norm": 0.37890625, - "learning_rate": 1.1084215290691092e-05, - "loss": 0.8957, - "step": 30110 - }, - { - "epoch": 0.8639708519214494, - "grad_norm": 0.39453125, - "learning_rate": 1.1061312910944077e-05, - "loss": 1.0409, - "step": 30115 - }, - { - "epoch": 0.8641142971899072, - "grad_norm": 0.3828125, - "learning_rate": 1.1038432831020384e-05, - "loss": 1.069, - "step": 30120 - }, - { - "epoch": 0.8642577424583651, - "grad_norm": 0.408203125, - "learning_rate": 1.1015575056656757e-05, - "loss": 0.9657, - "step": 30125 - }, - { - "epoch": 0.8644011877268228, - "grad_norm": 0.4140625, - "learning_rate": 1.099273959358439e-05, - "loss": 0.9079, - "step": 30130 - }, - { - "epoch": 0.8645446329952806, - "grad_norm": 0.388671875, - "learning_rate": 1.096992644752889e-05, - "loss": 0.851, - "step": 30135 - }, - { - "epoch": 0.8646880782637385, - "grad_norm": 0.3359375, - "learning_rate": 1.0947135624210247e-05, - "loss": 0.8643, - "step": 30140 - }, - { - "epoch": 0.8648315235321963, - "grad_norm": 0.376953125, - "learning_rate": 1.092436712934286e-05, - "loss": 0.9411, - "step": 30145 - }, - { - "epoch": 0.8649749688006542, - "grad_norm": 0.376953125, - "learning_rate": 1.0901620968635517e-05, - "loss": 0.8839, - "step": 30150 - }, - { - "epoch": 0.8651184140691119, - "grad_norm": 0.390625, - "learning_rate": 1.0878897147791456e-05, - "loss": 1.0209, - "step": 30155 - }, - { - "epoch": 0.8652618593375697, - "grad_norm": 0.43359375, - "learning_rate": 1.0856195672508262e-05, - "loss": 0.9415, - "step": 30160 - }, - { - "epoch": 0.8654053046060276, - "grad_norm": 0.439453125, - "learning_rate": 1.0833516548477907e-05, - "loss": 0.9421, - "step": 30165 - }, - { - "epoch": 0.8655487498744854, - "grad_norm": 1.1484375, - "learning_rate": 1.0810859781386828e-05, - "loss": 0.9718, - "step": 30170 - }, - { - "epoch": 0.8656921951429432, - "grad_norm": 0.38671875, - "learning_rate": 1.0788225376915795e-05, - "loss": 0.9171, - "step": 30175 - }, - { - "epoch": 0.865835640411401, - "grad_norm": 0.458984375, - "learning_rate": 1.0765613340739989e-05, - "loss": 1.0682, - "step": 30180 - }, - { - "epoch": 0.8659790856798588, - "grad_norm": 0.380859375, - "learning_rate": 1.0743023678528975e-05, - "loss": 1.0092, - "step": 30185 - }, - { - "epoch": 0.8661225309483167, - "grad_norm": 0.63671875, - "learning_rate": 1.0720456395946732e-05, - "loss": 1.1926, - "step": 30190 - }, - { - "epoch": 0.8662659762167745, - "grad_norm": 0.41015625, - "learning_rate": 1.0697911498651614e-05, - "loss": 0.9399, - "step": 30195 - }, - { - "epoch": 0.8664094214852323, - "grad_norm": 0.337890625, - "learning_rate": 1.0675388992296353e-05, - "loss": 0.87, - "step": 30200 - }, - { - "epoch": 0.8665528667536901, - "grad_norm": 0.384765625, - "learning_rate": 1.0652888882528068e-05, - "loss": 0.9626, - "step": 30205 - }, - { - "epoch": 0.8666963120221479, - "grad_norm": 0.37890625, - "learning_rate": 1.0630411174988275e-05, - "loss": 1.0042, - "step": 30210 - }, - { - "epoch": 0.8668397572906058, - "grad_norm": 0.400390625, - "learning_rate": 1.0607955875312858e-05, - "loss": 0.9545, - "step": 30215 - }, - { - "epoch": 0.8669832025590636, - "grad_norm": 0.388671875, - "learning_rate": 1.0585522989132102e-05, - "loss": 0.9773, - "step": 30220 - }, - { - "epoch": 0.8671266478275214, - "grad_norm": 0.34765625, - "learning_rate": 1.0563112522070673e-05, - "loss": 0.8964, - "step": 30225 - }, - { - "epoch": 0.8672700930959792, - "grad_norm": 0.40234375, - "learning_rate": 1.0540724479747587e-05, - "loss": 0.9742, - "step": 30230 - }, - { - "epoch": 0.867413538364437, - "grad_norm": 0.40625, - "learning_rate": 1.0518358867776256e-05, - "loss": 0.932, - "step": 30235 - }, - { - "epoch": 0.8675569836328949, - "grad_norm": 0.40625, - "learning_rate": 1.0496015691764461e-05, - "loss": 0.9457, - "step": 30240 - }, - { - "epoch": 0.8677004289013527, - "grad_norm": 0.392578125, - "learning_rate": 1.0473694957314373e-05, - "loss": 0.9313, - "step": 30245 - }, - { - "epoch": 0.8678438741698105, - "grad_norm": 0.39453125, - "learning_rate": 1.045139667002254e-05, - "loss": 0.9352, - "step": 30250 - }, - { - "epoch": 0.8679873194382683, - "grad_norm": 0.365234375, - "learning_rate": 1.0429120835479832e-05, - "loss": 0.8461, - "step": 30255 - }, - { - "epoch": 0.8681307647067261, - "grad_norm": 0.375, - "learning_rate": 1.0406867459271564e-05, - "loss": 0.9357, - "step": 30260 - }, - { - "epoch": 0.868274209975184, - "grad_norm": 0.380859375, - "learning_rate": 1.0384636546977366e-05, - "loss": 0.8638, - "step": 30265 - }, - { - "epoch": 0.8684176552436418, - "grad_norm": 0.38671875, - "learning_rate": 1.036242810417124e-05, - "loss": 0.9074, - "step": 30270 - }, - { - "epoch": 0.8685611005120996, - "grad_norm": 0.400390625, - "learning_rate": 1.0340242136421574e-05, - "loss": 0.9592, - "step": 30275 - }, - { - "epoch": 0.8687045457805574, - "grad_norm": 0.35546875, - "learning_rate": 1.0318078649291119e-05, - "loss": 0.9324, - "step": 30280 - }, - { - "epoch": 0.8688479910490152, - "grad_norm": 0.40234375, - "learning_rate": 1.0295937648336984e-05, - "loss": 0.9787, - "step": 30285 - }, - { - "epoch": 0.8689914363174731, - "grad_norm": 0.396484375, - "learning_rate": 1.0273819139110608e-05, - "loss": 0.8907, - "step": 30290 - }, - { - "epoch": 0.8691348815859309, - "grad_norm": 0.400390625, - "learning_rate": 1.0251723127157875e-05, - "loss": 0.9552, - "step": 30295 - }, - { - "epoch": 0.8692783268543887, - "grad_norm": 0.3671875, - "learning_rate": 1.0229649618018933e-05, - "loss": 0.9115, - "step": 30300 - }, - { - "epoch": 0.8694217721228465, - "grad_norm": 0.390625, - "learning_rate": 1.0207598617228343e-05, - "loss": 0.9623, - "step": 30305 - }, - { - "epoch": 0.8695652173913043, - "grad_norm": 0.392578125, - "learning_rate": 1.0185570130314991e-05, - "loss": 0.9571, - "step": 30310 - }, - { - "epoch": 0.8697086626597622, - "grad_norm": 0.396484375, - "learning_rate": 1.0163564162802164e-05, - "loss": 0.9916, - "step": 30315 - }, - { - "epoch": 0.86985210792822, - "grad_norm": 0.390625, - "learning_rate": 1.0141580720207466e-05, - "loss": 0.919, - "step": 30320 - }, - { - "epoch": 0.8699955531966778, - "grad_norm": 0.349609375, - "learning_rate": 1.0119619808042824e-05, - "loss": 0.9602, - "step": 30325 - }, - { - "epoch": 0.8701389984651356, - "grad_norm": 0.396484375, - "learning_rate": 1.0097681431814621e-05, - "loss": 1.0977, - "step": 30330 - }, - { - "epoch": 0.8702824437335934, - "grad_norm": 0.40234375, - "learning_rate": 1.0075765597023479e-05, - "loss": 1.0426, - "step": 30335 - }, - { - "epoch": 0.8704258890020513, - "grad_norm": 0.5625, - "learning_rate": 1.0053872309164414e-05, - "loss": 1.0112, - "step": 30340 - }, - { - "epoch": 0.8705693342705091, - "grad_norm": 0.40234375, - "learning_rate": 1.0032001573726778e-05, - "loss": 0.9756, - "step": 30345 - }, - { - "epoch": 0.8707127795389669, - "grad_norm": 0.42578125, - "learning_rate": 1.00101533961943e-05, - "loss": 0.9403, - "step": 30350 - }, - { - "epoch": 0.8708562248074248, - "grad_norm": 0.359375, - "learning_rate": 9.988327782045026e-06, - "loss": 0.922, - "step": 30355 - }, - { - "epoch": 0.8709996700758825, - "grad_norm": 0.390625, - "learning_rate": 9.966524736751337e-06, - "loss": 0.9188, - "step": 30360 - }, - { - "epoch": 0.8711431153443404, - "grad_norm": 0.380859375, - "learning_rate": 9.944744265779949e-06, - "loss": 0.9005, - "step": 30365 - }, - { - "epoch": 0.8712865606127982, - "grad_norm": 0.388671875, - "learning_rate": 9.922986374591969e-06, - "loss": 0.9629, - "step": 30370 - }, - { - "epoch": 0.871430005881256, - "grad_norm": 0.369140625, - "learning_rate": 9.901251068642792e-06, - "loss": 0.9819, - "step": 30375 - }, - { - "epoch": 0.8715734511497139, - "grad_norm": 0.369140625, - "learning_rate": 9.879538353382178e-06, - "loss": 0.9442, - "step": 30380 - }, - { - "epoch": 0.8717168964181716, - "grad_norm": 0.380859375, - "learning_rate": 9.857848234254197e-06, - "loss": 0.8892, - "step": 30385 - }, - { - "epoch": 0.8718603416866295, - "grad_norm": 0.3828125, - "learning_rate": 9.836180716697251e-06, - "loss": 1.0244, - "step": 30390 - }, - { - "epoch": 0.8720037869550873, - "grad_norm": 0.416015625, - "learning_rate": 9.814535806144132e-06, - "loss": 0.9453, - "step": 30395 - }, - { - "epoch": 0.8721472322235451, - "grad_norm": 0.375, - "learning_rate": 9.792913508021906e-06, - "loss": 0.9693, - "step": 30400 - }, - { - "epoch": 0.872290677492003, - "grad_norm": 0.400390625, - "learning_rate": 9.771313827751982e-06, - "loss": 0.9713, - "step": 30405 - }, - { - "epoch": 0.8724341227604607, - "grad_norm": 0.40234375, - "learning_rate": 9.749736770750106e-06, - "loss": 0.947, - "step": 30410 - }, - { - "epoch": 0.8725775680289186, - "grad_norm": 0.388671875, - "learning_rate": 9.728182342426329e-06, - "loss": 0.9571, - "step": 30415 - }, - { - "epoch": 0.8727210132973764, - "grad_norm": 0.41015625, - "learning_rate": 9.706650548185091e-06, - "loss": 0.9342, - "step": 30420 - }, - { - "epoch": 0.8728644585658342, - "grad_norm": 0.42578125, - "learning_rate": 9.68514139342509e-06, - "loss": 0.9559, - "step": 30425 - }, - { - "epoch": 0.873007903834292, - "grad_norm": 0.40625, - "learning_rate": 9.663654883539364e-06, - "loss": 0.9349, - "step": 30430 - }, - { - "epoch": 0.8731513491027498, - "grad_norm": 0.41015625, - "learning_rate": 9.64219102391527e-06, - "loss": 1.0452, - "step": 30435 - }, - { - "epoch": 0.8732947943712077, - "grad_norm": 0.35546875, - "learning_rate": 9.620749819934539e-06, - "loss": 0.947, - "step": 30440 - }, - { - "epoch": 0.8734382396396655, - "grad_norm": 0.388671875, - "learning_rate": 9.599331276973144e-06, - "loss": 1.0222, - "step": 30445 - }, - { - "epoch": 0.8735816849081233, - "grad_norm": 0.490234375, - "learning_rate": 9.577935400401406e-06, - "loss": 0.9765, - "step": 30450 - }, - { - "epoch": 0.8737251301765812, - "grad_norm": 0.3828125, - "learning_rate": 9.556562195583996e-06, - "loss": 0.9251, - "step": 30455 - }, - { - "epoch": 0.8738685754450389, - "grad_norm": 0.384765625, - "learning_rate": 9.535211667879861e-06, - "loss": 1.0233, - "step": 30460 - }, - { - "epoch": 0.8740120207134968, - "grad_norm": 0.3984375, - "learning_rate": 9.513883822642267e-06, - "loss": 0.9663, - "step": 30465 - }, - { - "epoch": 0.8741554659819546, - "grad_norm": 0.392578125, - "learning_rate": 9.492578665218788e-06, - "loss": 0.927, - "step": 30470 - }, - { - "epoch": 0.8742989112504124, - "grad_norm": 0.380859375, - "learning_rate": 9.471296200951351e-06, - "loss": 0.9505, - "step": 30475 - }, - { - "epoch": 0.8744423565188703, - "grad_norm": 0.369140625, - "learning_rate": 9.450036435176136e-06, - "loss": 0.9894, - "step": 30480 - }, - { - "epoch": 0.874585801787328, - "grad_norm": 0.373046875, - "learning_rate": 9.428799373223673e-06, - "loss": 0.9592, - "step": 30485 - }, - { - "epoch": 0.8747292470557859, - "grad_norm": 0.390625, - "learning_rate": 9.407585020418763e-06, - "loss": 0.9763, - "step": 30490 - }, - { - "epoch": 0.8748726923242437, - "grad_norm": 0.412109375, - "learning_rate": 9.38639338208056e-06, - "loss": 0.886, - "step": 30495 - }, - { - "epoch": 0.8750161375927015, - "grad_norm": 0.400390625, - "learning_rate": 9.365224463522492e-06, - "loss": 0.8447, - "step": 30500 - }, - { - "epoch": 0.8751595828611594, - "grad_norm": 0.359375, - "learning_rate": 9.344078270052282e-06, - "loss": 1.0281, - "step": 30505 - }, - { - "epoch": 0.8753030281296171, - "grad_norm": 0.345703125, - "learning_rate": 9.322954806971985e-06, - "loss": 0.8402, - "step": 30510 - }, - { - "epoch": 0.8754464733980749, - "grad_norm": 0.4375, - "learning_rate": 9.301854079577943e-06, - "loss": 0.9987, - "step": 30515 - }, - { - "epoch": 0.8755899186665328, - "grad_norm": 0.38671875, - "learning_rate": 9.280776093160782e-06, - "loss": 0.9969, - "step": 30520 - }, - { - "epoch": 0.8757333639349906, - "grad_norm": 0.34375, - "learning_rate": 9.259720853005416e-06, - "loss": 0.9644, - "step": 30525 - }, - { - "epoch": 0.8758768092034485, - "grad_norm": 0.44140625, - "learning_rate": 9.238688364391135e-06, - "loss": 0.9907, - "step": 30530 - }, - { - "epoch": 0.8760202544719062, - "grad_norm": 0.357421875, - "learning_rate": 9.217678632591442e-06, - "loss": 0.984, - "step": 30535 - }, - { - "epoch": 0.876163699740364, - "grad_norm": 0.37109375, - "learning_rate": 9.196691662874135e-06, - "loss": 1.0049, - "step": 30540 - }, - { - "epoch": 0.8763071450088219, - "grad_norm": 0.41015625, - "learning_rate": 9.17572746050137e-06, - "loss": 0.994, - "step": 30545 - }, - { - "epoch": 0.8764505902772797, - "grad_norm": 0.384765625, - "learning_rate": 9.154786030729545e-06, - "loss": 0.9887, - "step": 30550 - }, - { - "epoch": 0.8765940355457376, - "grad_norm": 0.392578125, - "learning_rate": 9.133867378809347e-06, - "loss": 0.8933, - "step": 30555 - }, - { - "epoch": 0.8767374808141953, - "grad_norm": 0.400390625, - "learning_rate": 9.112971509985757e-06, - "loss": 0.9294, - "step": 30560 - }, - { - "epoch": 0.8768809260826531, - "grad_norm": 0.3984375, - "learning_rate": 9.092098429498053e-06, - "loss": 0.9622, - "step": 30565 - }, - { - "epoch": 0.877024371351111, - "grad_norm": 0.400390625, - "learning_rate": 9.071248142579825e-06, - "loss": 0.9693, - "step": 30570 - }, - { - "epoch": 0.8771678166195688, - "grad_norm": 0.34375, - "learning_rate": 9.05042065445888e-06, - "loss": 1.0384, - "step": 30575 - }, - { - "epoch": 0.8773112618880267, - "grad_norm": 0.388671875, - "learning_rate": 9.029615970357375e-06, - "loss": 0.8989, - "step": 30580 - }, - { - "epoch": 0.8774547071564844, - "grad_norm": 0.400390625, - "learning_rate": 9.008834095491703e-06, - "loss": 0.8706, - "step": 30585 - }, - { - "epoch": 0.8775981524249422, - "grad_norm": 0.4140625, - "learning_rate": 8.988075035072562e-06, - "loss": 0.8575, - "step": 30590 - }, - { - "epoch": 0.8777415976934001, - "grad_norm": 0.376953125, - "learning_rate": 8.967338794304913e-06, - "loss": 0.9387, - "step": 30595 - }, - { - "epoch": 0.8778850429618579, - "grad_norm": 0.376953125, - "learning_rate": 8.946625378388029e-06, - "loss": 0.9169, - "step": 30600 - }, - { - "epoch": 0.8780284882303158, - "grad_norm": 0.365234375, - "learning_rate": 8.925934792515422e-06, - "loss": 0.9282, - "step": 30605 - }, - { - "epoch": 0.8781719334987735, - "grad_norm": 0.400390625, - "learning_rate": 8.905267041874887e-06, - "loss": 0.9667, - "step": 30610 - }, - { - "epoch": 0.8783153787672313, - "grad_norm": 0.404296875, - "learning_rate": 8.884622131648534e-06, - "loss": 1.0259, - "step": 30615 - }, - { - "epoch": 0.8784588240356892, - "grad_norm": 0.37109375, - "learning_rate": 8.864000067012702e-06, - "loss": 0.9385, - "step": 30620 - }, - { - "epoch": 0.878602269304147, - "grad_norm": 0.388671875, - "learning_rate": 8.843400853137996e-06, - "loss": 1.0054, - "step": 30625 - }, - { - "epoch": 0.8787457145726049, - "grad_norm": 0.396484375, - "learning_rate": 8.822824495189297e-06, - "loss": 0.9807, - "step": 30630 - }, - { - "epoch": 0.8788891598410626, - "grad_norm": 0.3671875, - "learning_rate": 8.802270998325812e-06, - "loss": 0.811, - "step": 30635 - }, - { - "epoch": 0.8790326051095204, - "grad_norm": 0.400390625, - "learning_rate": 8.781740367700941e-06, - "loss": 0.9042, - "step": 30640 - }, - { - "epoch": 0.8791760503779783, - "grad_norm": 0.39453125, - "learning_rate": 8.7612326084624e-06, - "loss": 0.9798, - "step": 30645 - }, - { - "epoch": 0.8793194956464361, - "grad_norm": 0.388671875, - "learning_rate": 8.740747725752118e-06, - "loss": 0.9389, - "step": 30650 - }, - { - "epoch": 0.879462940914894, - "grad_norm": 0.39453125, - "learning_rate": 8.720285724706368e-06, - "loss": 0.9314, - "step": 30655 - }, - { - "epoch": 0.8796063861833517, - "grad_norm": 0.357421875, - "learning_rate": 8.69984661045562e-06, - "loss": 0.9869, - "step": 30660 - }, - { - "epoch": 0.8797498314518095, - "grad_norm": 0.388671875, - "learning_rate": 8.679430388124588e-06, - "loss": 0.9612, - "step": 30665 - }, - { - "epoch": 0.8798932767202674, - "grad_norm": 0.388671875, - "learning_rate": 8.659037062832342e-06, - "loss": 0.9949, - "step": 30670 - }, - { - "epoch": 0.8800367219887252, - "grad_norm": 0.37890625, - "learning_rate": 8.63866663969214e-06, - "loss": 0.9574, - "step": 30675 - }, - { - "epoch": 0.8801801672571831, - "grad_norm": 0.365234375, - "learning_rate": 8.61831912381148e-06, - "loss": 0.8927, - "step": 30680 - }, - { - "epoch": 0.8803236125256408, - "grad_norm": 0.44140625, - "learning_rate": 8.597994520292151e-06, - "loss": 0.9812, - "step": 30685 - }, - { - "epoch": 0.8804670577940986, - "grad_norm": 0.349609375, - "learning_rate": 8.577692834230223e-06, - "loss": 0.8823, - "step": 30690 - }, - { - "epoch": 0.8806105030625565, - "grad_norm": 0.400390625, - "learning_rate": 8.557414070715974e-06, - "loss": 0.978, - "step": 30695 - }, - { - "epoch": 0.8807539483310143, - "grad_norm": 0.45703125, - "learning_rate": 8.53715823483393e-06, - "loss": 0.9829, - "step": 30700 - }, - { - "epoch": 0.8808973935994722, - "grad_norm": 0.40234375, - "learning_rate": 8.516925331662918e-06, - "loss": 0.9841, - "step": 30705 - }, - { - "epoch": 0.8810408388679299, - "grad_norm": 0.4453125, - "learning_rate": 8.496715366275976e-06, - "loss": 0.9102, - "step": 30710 - }, - { - "epoch": 0.8811842841363877, - "grad_norm": 0.45703125, - "learning_rate": 8.476528343740398e-06, - "loss": 0.9147, - "step": 30715 - }, - { - "epoch": 0.8813277294048456, - "grad_norm": 0.3671875, - "learning_rate": 8.456364269117711e-06, - "loss": 0.8456, - "step": 30720 - }, - { - "epoch": 0.8814711746733034, - "grad_norm": 0.365234375, - "learning_rate": 8.43622314746374e-06, - "loss": 0.8798, - "step": 30725 - }, - { - "epoch": 0.8816146199417613, - "grad_norm": 0.3828125, - "learning_rate": 8.416104983828499e-06, - "loss": 0.9714, - "step": 30730 - }, - { - "epoch": 0.881758065210219, - "grad_norm": 0.375, - "learning_rate": 8.396009783256264e-06, - "loss": 0.9661, - "step": 30735 - }, - { - "epoch": 0.8819015104786768, - "grad_norm": 0.384765625, - "learning_rate": 8.375937550785539e-06, - "loss": 1.029, - "step": 30740 - }, - { - "epoch": 0.8820449557471347, - "grad_norm": 0.41015625, - "learning_rate": 8.355888291449132e-06, - "loss": 1.109, - "step": 30745 - }, - { - "epoch": 0.8821884010155925, - "grad_norm": 0.41796875, - "learning_rate": 8.335862010274009e-06, - "loss": 0.9311, - "step": 30750 - }, - { - "epoch": 0.8823318462840504, - "grad_norm": 0.400390625, - "learning_rate": 8.315858712281433e-06, - "loss": 0.9602, - "step": 30755 - }, - { - "epoch": 0.8824752915525081, - "grad_norm": 0.39453125, - "learning_rate": 8.295878402486856e-06, - "loss": 0.9296, - "step": 30760 - }, - { - "epoch": 0.8826187368209659, - "grad_norm": 0.37890625, - "learning_rate": 8.275921085900007e-06, - "loss": 0.891, - "step": 30765 - }, - { - "epoch": 0.8827621820894238, - "grad_norm": 0.35546875, - "learning_rate": 8.255986767524826e-06, - "loss": 0.9054, - "step": 30770 - }, - { - "epoch": 0.8829056273578816, - "grad_norm": 0.388671875, - "learning_rate": 8.236075452359515e-06, - "loss": 0.9133, - "step": 30775 - }, - { - "epoch": 0.8830490726263395, - "grad_norm": 0.38671875, - "learning_rate": 8.21618714539647e-06, - "loss": 0.9095, - "step": 30780 - }, - { - "epoch": 0.8831925178947972, - "grad_norm": 0.46484375, - "learning_rate": 8.196321851622347e-06, - "loss": 0.9881, - "step": 30785 - }, - { - "epoch": 0.883335963163255, - "grad_norm": 0.404296875, - "learning_rate": 8.176479576018003e-06, - "loss": 0.8921, - "step": 30790 - }, - { - "epoch": 0.8834794084317129, - "grad_norm": 0.36328125, - "learning_rate": 8.156660323558563e-06, - "loss": 0.9667, - "step": 30795 - }, - { - "epoch": 0.8836228537001707, - "grad_norm": 0.380859375, - "learning_rate": 8.136864099213359e-06, - "loss": 1.0008, - "step": 30800 - }, - { - "epoch": 0.8837662989686286, - "grad_norm": 0.396484375, - "learning_rate": 8.117090907945946e-06, - "loss": 0.9546, - "step": 30805 - }, - { - "epoch": 0.8839097442370863, - "grad_norm": 0.41015625, - "learning_rate": 8.097340754714078e-06, - "loss": 0.9931, - "step": 30810 - }, - { - "epoch": 0.8840531895055441, - "grad_norm": 0.388671875, - "learning_rate": 8.077613644469816e-06, - "loss": 0.9061, - "step": 30815 - }, - { - "epoch": 0.884196634774002, - "grad_norm": 0.341796875, - "learning_rate": 8.05790958215934e-06, - "loss": 0.8897, - "step": 30820 - }, - { - "epoch": 0.8843400800424598, - "grad_norm": 0.4140625, - "learning_rate": 8.038228572723117e-06, - "loss": 0.9572, - "step": 30825 - }, - { - "epoch": 0.8844835253109177, - "grad_norm": 0.404296875, - "learning_rate": 8.018570621095822e-06, - "loss": 1.0284, - "step": 30830 - }, - { - "epoch": 0.8846269705793754, - "grad_norm": 0.40234375, - "learning_rate": 7.998935732206347e-06, - "loss": 1.0277, - "step": 30835 - }, - { - "epoch": 0.8847704158478332, - "grad_norm": 0.427734375, - "learning_rate": 7.979323910977787e-06, - "loss": 0.8872, - "step": 30840 - }, - { - "epoch": 0.8849138611162911, - "grad_norm": 0.44140625, - "learning_rate": 7.959735162327442e-06, - "loss": 1.0068, - "step": 30845 - }, - { - "epoch": 0.8850573063847489, - "grad_norm": 0.41796875, - "learning_rate": 7.940169491166904e-06, - "loss": 0.9898, - "step": 30850 - }, - { - "epoch": 0.8852007516532068, - "grad_norm": 0.40234375, - "learning_rate": 7.920626902401885e-06, - "loss": 0.8602, - "step": 30855 - }, - { - "epoch": 0.8853441969216646, - "grad_norm": 0.37109375, - "learning_rate": 7.90110740093234e-06, - "loss": 0.8454, - "step": 30860 - }, - { - "epoch": 0.8854876421901223, - "grad_norm": 0.400390625, - "learning_rate": 7.881610991652466e-06, - "loss": 0.9102, - "step": 30865 - }, - { - "epoch": 0.8856310874585802, - "grad_norm": 0.376953125, - "learning_rate": 7.862137679450653e-06, - "loss": 0.8552, - "step": 30870 - }, - { - "epoch": 0.885774532727038, - "grad_norm": 0.416015625, - "learning_rate": 7.842687469209476e-06, - "loss": 0.843, - "step": 30875 - }, - { - "epoch": 0.8859179779954958, - "grad_norm": 0.498046875, - "learning_rate": 7.823260365805717e-06, - "loss": 0.9843, - "step": 30880 - }, - { - "epoch": 0.8860614232639537, - "grad_norm": 0.361328125, - "learning_rate": 7.80385637411043e-06, - "loss": 0.9135, - "step": 30885 - }, - { - "epoch": 0.8862048685324114, - "grad_norm": 0.39453125, - "learning_rate": 7.784475498988808e-06, - "loss": 1.0275, - "step": 30890 - }, - { - "epoch": 0.8863483138008693, - "grad_norm": 0.41796875, - "learning_rate": 7.765117745300243e-06, - "loss": 0.9441, - "step": 30895 - }, - { - "epoch": 0.8864917590693271, - "grad_norm": 0.361328125, - "learning_rate": 7.745783117898397e-06, - "loss": 0.9066, - "step": 30900 - }, - { - "epoch": 0.8866352043377849, - "grad_norm": 0.3984375, - "learning_rate": 7.72647162163106e-06, - "loss": 1.0105, - "step": 30905 - }, - { - "epoch": 0.8867786496062428, - "grad_norm": 0.357421875, - "learning_rate": 7.707183261340255e-06, - "loss": 1.0345, - "step": 30910 - }, - { - "epoch": 0.8869220948747005, - "grad_norm": 0.392578125, - "learning_rate": 7.687918041862197e-06, - "loss": 0.9755, - "step": 30915 - }, - { - "epoch": 0.8870655401431584, - "grad_norm": 0.353515625, - "learning_rate": 7.668675968027328e-06, - "loss": 0.941, - "step": 30920 - }, - { - "epoch": 0.8872089854116162, - "grad_norm": 0.380859375, - "learning_rate": 7.649457044660247e-06, - "loss": 0.9708, - "step": 30925 - }, - { - "epoch": 0.887352430680074, - "grad_norm": 0.421875, - "learning_rate": 7.630261276579765e-06, - "loss": 0.8719, - "step": 30930 - }, - { - "epoch": 0.8874958759485319, - "grad_norm": 0.384765625, - "learning_rate": 7.611088668598887e-06, - "loss": 1.0074, - "step": 30935 - }, - { - "epoch": 0.8876393212169896, - "grad_norm": 0.3515625, - "learning_rate": 7.5919392255248025e-06, - "loss": 0.9705, - "step": 30940 - }, - { - "epoch": 0.8877827664854475, - "grad_norm": 0.404296875, - "learning_rate": 7.572812952158892e-06, - "loss": 1.0245, - "step": 30945 - }, - { - "epoch": 0.8879262117539053, - "grad_norm": 0.408203125, - "learning_rate": 7.553709853296764e-06, - "loss": 0.9242, - "step": 30950 - }, - { - "epoch": 0.8880696570223631, - "grad_norm": 0.373046875, - "learning_rate": 7.534629933728176e-06, - "loss": 0.913, - "step": 30955 - }, - { - "epoch": 0.888213102290821, - "grad_norm": 0.388671875, - "learning_rate": 7.515573198237069e-06, - "loss": 0.9304, - "step": 30960 - }, - { - "epoch": 0.8883565475592787, - "grad_norm": 0.38671875, - "learning_rate": 7.496539651601598e-06, - "loss": 0.9454, - "step": 30965 - }, - { - "epoch": 0.8884999928277366, - "grad_norm": 0.345703125, - "learning_rate": 7.477529298594077e-06, - "loss": 0.9934, - "step": 30970 - }, - { - "epoch": 0.8886434380961944, - "grad_norm": 0.390625, - "learning_rate": 7.458542143981062e-06, - "loss": 1.0036, - "step": 30975 - }, - { - "epoch": 0.8887868833646522, - "grad_norm": 0.359375, - "learning_rate": 7.439578192523211e-06, - "loss": 0.9456, - "step": 30980 - }, - { - "epoch": 0.8889303286331101, - "grad_norm": 0.404296875, - "learning_rate": 7.420637448975398e-06, - "loss": 0.9783, - "step": 30985 - }, - { - "epoch": 0.8890737739015678, - "grad_norm": 0.412109375, - "learning_rate": 7.4017199180867246e-06, - "loss": 1.083, - "step": 30990 - }, - { - "epoch": 0.8892172191700257, - "grad_norm": 0.369140625, - "learning_rate": 7.3828256046004074e-06, - "loss": 0.9391, - "step": 30995 - }, - { - "epoch": 0.8893606644384835, - "grad_norm": 0.37109375, - "learning_rate": 7.363954513253879e-06, - "loss": 0.9832, - "step": 31000 - }, - { - "epoch": 0.8895041097069413, - "grad_norm": 0.376953125, - "learning_rate": 7.345106648778688e-06, - "loss": 0.903, - "step": 31005 - }, - { - "epoch": 0.8896475549753992, - "grad_norm": 0.353515625, - "learning_rate": 7.3262820159006765e-06, - "loss": 0.8229, - "step": 31010 - }, - { - "epoch": 0.8897910002438569, - "grad_norm": 0.40234375, - "learning_rate": 7.307480619339746e-06, - "loss": 0.9982, - "step": 31015 - }, - { - "epoch": 0.8899344455123148, - "grad_norm": 0.34765625, - "learning_rate": 7.288702463810026e-06, - "loss": 0.9318, - "step": 31020 - }, - { - "epoch": 0.8900778907807726, - "grad_norm": 0.396484375, - "learning_rate": 7.269947554019829e-06, - "loss": 0.9133, - "step": 31025 - }, - { - "epoch": 0.8902213360492304, - "grad_norm": 0.396484375, - "learning_rate": 7.2512158946716145e-06, - "loss": 0.9774, - "step": 31030 - }, - { - "epoch": 0.8903647813176883, - "grad_norm": 0.41015625, - "learning_rate": 7.232507490462015e-06, - "loss": 0.984, - "step": 31035 - }, - { - "epoch": 0.890508226586146, - "grad_norm": 0.359375, - "learning_rate": 7.213822346081822e-06, - "loss": 0.9053, - "step": 31040 - }, - { - "epoch": 0.8906516718546039, - "grad_norm": 0.396484375, - "learning_rate": 7.195160466216033e-06, - "loss": 0.9495, - "step": 31045 - }, - { - "epoch": 0.8907951171230617, - "grad_norm": 0.369140625, - "learning_rate": 7.176521855543772e-06, - "loss": 0.989, - "step": 31050 - }, - { - "epoch": 0.8909385623915195, - "grad_norm": 0.39453125, - "learning_rate": 7.157906518738344e-06, - "loss": 0.995, - "step": 31055 - }, - { - "epoch": 0.8910820076599774, - "grad_norm": 0.390625, - "learning_rate": 7.13931446046725e-06, - "loss": 0.9762, - "step": 31060 - }, - { - "epoch": 0.8912254529284351, - "grad_norm": 0.404296875, - "learning_rate": 7.120745685392094e-06, - "loss": 1.0511, - "step": 31065 - }, - { - "epoch": 0.891368898196893, - "grad_norm": 0.369140625, - "learning_rate": 7.1022001981686845e-06, - "loss": 0.8279, - "step": 31070 - }, - { - "epoch": 0.8915123434653508, - "grad_norm": 0.380859375, - "learning_rate": 7.0836780034469585e-06, - "loss": 0.9058, - "step": 31075 - }, - { - "epoch": 0.8916557887338086, - "grad_norm": 0.39453125, - "learning_rate": 7.065179105871056e-06, - "loss": 1.0099, - "step": 31080 - }, - { - "epoch": 0.8917992340022665, - "grad_norm": 0.369140625, - "learning_rate": 7.046703510079255e-06, - "loss": 0.962, - "step": 31085 - }, - { - "epoch": 0.8919426792707242, - "grad_norm": 0.39453125, - "learning_rate": 7.028251220703974e-06, - "loss": 0.8762, - "step": 31090 - }, - { - "epoch": 0.8920861245391821, - "grad_norm": 0.3671875, - "learning_rate": 7.009822242371788e-06, - "loss": 0.8982, - "step": 31095 - }, - { - "epoch": 0.8922295698076399, - "grad_norm": 0.365234375, - "learning_rate": 6.99141657970348e-06, - "loss": 0.9245, - "step": 31100 - }, - { - "epoch": 0.8923730150760977, - "grad_norm": 0.416015625, - "learning_rate": 6.973034237313935e-06, - "loss": 0.8765, - "step": 31105 - }, - { - "epoch": 0.8925164603445556, - "grad_norm": 0.365234375, - "learning_rate": 6.954675219812201e-06, - "loss": 1.0802, - "step": 31110 - }, - { - "epoch": 0.8926599056130133, - "grad_norm": 0.412109375, - "learning_rate": 6.936339531801472e-06, - "loss": 0.9709, - "step": 31115 - }, - { - "epoch": 0.8928033508814712, - "grad_norm": 0.392578125, - "learning_rate": 6.918027177879094e-06, - "loss": 0.9227, - "step": 31120 - }, - { - "epoch": 0.892946796149929, - "grad_norm": 0.423828125, - "learning_rate": 6.899738162636604e-06, - "loss": 0.9382, - "step": 31125 - }, - { - "epoch": 0.8930902414183868, - "grad_norm": 0.40625, - "learning_rate": 6.881472490659635e-06, - "loss": 0.9297, - "step": 31130 - }, - { - "epoch": 0.8932336866868447, - "grad_norm": 0.400390625, - "learning_rate": 6.863230166527979e-06, - "loss": 0.8873, - "step": 31135 - }, - { - "epoch": 0.8933771319553024, - "grad_norm": 0.37890625, - "learning_rate": 6.845011194815598e-06, - "loss": 0.9309, - "step": 31140 - }, - { - "epoch": 0.8935205772237603, - "grad_norm": 0.40625, - "learning_rate": 6.82681558009054e-06, - "loss": 1.0066, - "step": 31145 - }, - { - "epoch": 0.8936640224922181, - "grad_norm": 0.380859375, - "learning_rate": 6.808643326915087e-06, - "loss": 0.9917, - "step": 31150 - }, - { - "epoch": 0.8938074677606759, - "grad_norm": 0.3671875, - "learning_rate": 6.790494439845596e-06, - "loss": 0.9427, - "step": 31155 - }, - { - "epoch": 0.8939509130291338, - "grad_norm": 0.392578125, - "learning_rate": 6.772368923432593e-06, - "loss": 0.9998, - "step": 31160 - }, - { - "epoch": 0.8940943582975915, - "grad_norm": 0.392578125, - "learning_rate": 6.754266782220698e-06, - "loss": 0.9699, - "step": 31165 - }, - { - "epoch": 0.8942378035660494, - "grad_norm": 0.412109375, - "learning_rate": 6.736188020748746e-06, - "loss": 0.9513, - "step": 31170 - }, - { - "epoch": 0.8943812488345072, - "grad_norm": 0.37109375, - "learning_rate": 6.7181326435496575e-06, - "loss": 0.9534, - "step": 31175 - }, - { - "epoch": 0.894524694102965, - "grad_norm": 0.41015625, - "learning_rate": 6.700100655150487e-06, - "loss": 1.0394, - "step": 31180 - }, - { - "epoch": 0.8946681393714229, - "grad_norm": 0.376953125, - "learning_rate": 6.682092060072476e-06, - "loss": 0.9114, - "step": 31185 - }, - { - "epoch": 0.8948115846398806, - "grad_norm": 0.375, - "learning_rate": 6.6641068628309545e-06, - "loss": 1.0007, - "step": 31190 - }, - { - "epoch": 0.8949550299083385, - "grad_norm": 0.4140625, - "learning_rate": 6.6461450679353835e-06, - "loss": 0.9865, - "step": 31195 - }, - { - "epoch": 0.8950984751767963, - "grad_norm": 0.43359375, - "learning_rate": 6.628206679889349e-06, - "loss": 0.8823, - "step": 31200 - }, - { - "epoch": 0.8952419204452541, - "grad_norm": 0.35546875, - "learning_rate": 6.610291703190641e-06, - "loss": 0.9248, - "step": 31205 - }, - { - "epoch": 0.895385365713712, - "grad_norm": 0.466796875, - "learning_rate": 6.5924001423311014e-06, - "loss": 0.9727, - "step": 31210 - }, - { - "epoch": 0.8955288109821697, - "grad_norm": 0.349609375, - "learning_rate": 6.574532001796718e-06, - "loss": 0.8817, - "step": 31215 - }, - { - "epoch": 0.8956722562506275, - "grad_norm": 0.3671875, - "learning_rate": 6.55668728606762e-06, - "loss": 0.9083, - "step": 31220 - }, - { - "epoch": 0.8958157015190854, - "grad_norm": 0.3671875, - "learning_rate": 6.53886599961806e-06, - "loss": 0.9923, - "step": 31225 - }, - { - "epoch": 0.8959591467875432, - "grad_norm": 0.37890625, - "learning_rate": 6.521068146916432e-06, - "loss": 0.8716, - "step": 31230 - }, - { - "epoch": 0.8961025920560011, - "grad_norm": 0.5703125, - "learning_rate": 6.503293732425198e-06, - "loss": 0.953, - "step": 31235 - }, - { - "epoch": 0.8962460373244588, - "grad_norm": 0.3671875, - "learning_rate": 6.485542760601027e-06, - "loss": 0.8936, - "step": 31240 - }, - { - "epoch": 0.8963894825929166, - "grad_norm": 0.380859375, - "learning_rate": 6.467815235894637e-06, - "loss": 0.9848, - "step": 31245 - }, - { - "epoch": 0.8965329278613745, - "grad_norm": 0.384765625, - "learning_rate": 6.450111162750905e-06, - "loss": 1.0228, - "step": 31250 - }, - { - "epoch": 0.8966763731298323, - "grad_norm": 0.37109375, - "learning_rate": 6.432430545608792e-06, - "loss": 0.8753, - "step": 31255 - }, - { - "epoch": 0.8968198183982902, - "grad_norm": 0.357421875, - "learning_rate": 6.414773388901452e-06, - "loss": 0.8841, - "step": 31260 - }, - { - "epoch": 0.896963263666748, - "grad_norm": 0.40625, - "learning_rate": 6.397139697056076e-06, - "loss": 0.9391, - "step": 31265 - }, - { - "epoch": 0.8971067089352057, - "grad_norm": 0.392578125, - "learning_rate": 6.379529474494006e-06, - "loss": 0.8533, - "step": 31270 - }, - { - "epoch": 0.8972501542036636, - "grad_norm": 0.361328125, - "learning_rate": 6.361942725630721e-06, - "loss": 0.9452, - "step": 31275 - }, - { - "epoch": 0.8973935994721214, - "grad_norm": 0.375, - "learning_rate": 6.344379454875771e-06, - "loss": 0.952, - "step": 31280 - }, - { - "epoch": 0.8975370447405793, - "grad_norm": 0.43359375, - "learning_rate": 6.326839666632833e-06, - "loss": 0.9887, - "step": 31285 - }, - { - "epoch": 0.897680490009037, - "grad_norm": 0.373046875, - "learning_rate": 6.309323365299724e-06, - "loss": 0.9276, - "step": 31290 - }, - { - "epoch": 0.8978239352774948, - "grad_norm": 0.3828125, - "learning_rate": 6.2918305552683295e-06, - "loss": 1.0213, - "step": 31295 - }, - { - "epoch": 0.8979673805459527, - "grad_norm": 0.427734375, - "learning_rate": 6.274361240924676e-06, - "loss": 0.9132, - "step": 31300 - }, - { - "epoch": 0.8981108258144105, - "grad_norm": 0.35546875, - "learning_rate": 6.256915426648902e-06, - "loss": 0.9142, - "step": 31305 - }, - { - "epoch": 0.8982542710828684, - "grad_norm": 0.34375, - "learning_rate": 6.239493116815231e-06, - "loss": 0.9206, - "step": 31310 - }, - { - "epoch": 0.8983977163513261, - "grad_norm": 0.40234375, - "learning_rate": 6.222094315791993e-06, - "loss": 0.9, - "step": 31315 - }, - { - "epoch": 0.8985411616197839, - "grad_norm": 0.400390625, - "learning_rate": 6.204719027941641e-06, - "loss": 0.8922, - "step": 31320 - }, - { - "epoch": 0.8986846068882418, - "grad_norm": 0.36328125, - "learning_rate": 6.187367257620713e-06, - "loss": 0.946, - "step": 31325 - }, - { - "epoch": 0.8988280521566996, - "grad_norm": 0.376953125, - "learning_rate": 6.170039009179895e-06, - "loss": 0.8602, - "step": 31330 - }, - { - "epoch": 0.8989714974251575, - "grad_norm": 0.3671875, - "learning_rate": 6.152734286963913e-06, - "loss": 0.8762, - "step": 31335 - }, - { - "epoch": 0.8991149426936152, - "grad_norm": 0.3984375, - "learning_rate": 6.135453095311627e-06, - "loss": 0.9847, - "step": 31340 - }, - { - "epoch": 0.899258387962073, - "grad_norm": 0.380859375, - "learning_rate": 6.118195438556007e-06, - "loss": 0.9686, - "step": 31345 - }, - { - "epoch": 0.8994018332305309, - "grad_norm": 0.353515625, - "learning_rate": 6.100961321024112e-06, - "loss": 0.9856, - "step": 31350 - }, - { - "epoch": 0.8995452784989887, - "grad_norm": 0.380859375, - "learning_rate": 6.083750747037087e-06, - "loss": 1.0219, - "step": 31355 - }, - { - "epoch": 0.8996887237674466, - "grad_norm": 0.390625, - "learning_rate": 6.066563720910168e-06, - "loss": 0.9963, - "step": 31360 - }, - { - "epoch": 0.8998321690359044, - "grad_norm": 0.384765625, - "learning_rate": 6.04940024695273e-06, - "loss": 0.9669, - "step": 31365 - }, - { - "epoch": 0.8999756143043621, - "grad_norm": 0.37109375, - "learning_rate": 6.032260329468198e-06, - "loss": 0.9289, - "step": 31370 - }, - { - "epoch": 0.90011905957282, - "grad_norm": 0.40625, - "learning_rate": 6.015143972754112e-06, - "loss": 0.9023, - "step": 31375 - }, - { - "epoch": 0.9002625048412778, - "grad_norm": 0.39453125, - "learning_rate": 5.998051181102082e-06, - "loss": 0.8562, - "step": 31380 - }, - { - "epoch": 0.9004059501097357, - "grad_norm": 0.380859375, - "learning_rate": 5.980981958797871e-06, - "loss": 0.9517, - "step": 31385 - }, - { - "epoch": 0.9005493953781935, - "grad_norm": 0.404296875, - "learning_rate": 5.963936310121243e-06, - "loss": 0.9867, - "step": 31390 - }, - { - "epoch": 0.9006928406466512, - "grad_norm": 0.412109375, - "learning_rate": 5.946914239346113e-06, - "loss": 0.884, - "step": 31395 - }, - { - "epoch": 0.9008362859151091, - "grad_norm": 0.3671875, - "learning_rate": 5.929915750740478e-06, - "loss": 0.9529, - "step": 31400 - }, - { - "epoch": 0.9009797311835669, - "grad_norm": 0.408203125, - "learning_rate": 5.912940848566406e-06, - "loss": 0.8793, - "step": 31405 - }, - { - "epoch": 0.9011231764520248, - "grad_norm": 0.37109375, - "learning_rate": 5.89598953708006e-06, - "loss": 0.9504, - "step": 31410 - }, - { - "epoch": 0.9012666217204826, - "grad_norm": 0.41015625, - "learning_rate": 5.8790618205316616e-06, - "loss": 0.9621, - "step": 31415 - }, - { - "epoch": 0.9014100669889403, - "grad_norm": 0.3671875, - "learning_rate": 5.862157703165583e-06, - "loss": 0.9519, - "step": 31420 - }, - { - "epoch": 0.9015535122573982, - "grad_norm": 0.421875, - "learning_rate": 5.8452771892202e-06, - "loss": 0.9862, - "step": 31425 - }, - { - "epoch": 0.901696957525856, - "grad_norm": 0.400390625, - "learning_rate": 5.828420282928016e-06, - "loss": 1.0101, - "step": 31430 - }, - { - "epoch": 0.9018404027943139, - "grad_norm": 0.427734375, - "learning_rate": 5.811586988515627e-06, - "loss": 0.968, - "step": 31435 - }, - { - "epoch": 0.9019838480627717, - "grad_norm": 0.400390625, - "learning_rate": 5.794777310203658e-06, - "loss": 0.9296, - "step": 31440 - }, - { - "epoch": 0.9021272933312294, - "grad_norm": 0.462890625, - "learning_rate": 5.777991252206871e-06, - "loss": 0.9602, - "step": 31445 - }, - { - "epoch": 0.9022707385996873, - "grad_norm": 0.400390625, - "learning_rate": 5.761228818734032e-06, - "loss": 0.8777, - "step": 31450 - }, - { - "epoch": 0.9024141838681451, - "grad_norm": 0.369140625, - "learning_rate": 5.74449001398808e-06, - "loss": 0.9616, - "step": 31455 - }, - { - "epoch": 0.902557629136603, - "grad_norm": 0.353515625, - "learning_rate": 5.727774842165956e-06, - "loss": 0.8779, - "step": 31460 - }, - { - "epoch": 0.9027010744050608, - "grad_norm": 0.34765625, - "learning_rate": 5.711083307458698e-06, - "loss": 0.8384, - "step": 31465 - }, - { - "epoch": 0.9028445196735185, - "grad_norm": 0.333984375, - "learning_rate": 5.694415414051402e-06, - "loss": 0.8939, - "step": 31470 - }, - { - "epoch": 0.9029879649419764, - "grad_norm": 0.392578125, - "learning_rate": 5.6777711661232805e-06, - "loss": 0.9599, - "step": 31475 - }, - { - "epoch": 0.9031314102104342, - "grad_norm": 0.359375, - "learning_rate": 5.6611505678475726e-06, - "loss": 1.0076, - "step": 31480 - }, - { - "epoch": 0.9032748554788921, - "grad_norm": 0.392578125, - "learning_rate": 5.644553623391602e-06, - "loss": 0.9823, - "step": 31485 - }, - { - "epoch": 0.9034183007473499, - "grad_norm": 0.37890625, - "learning_rate": 5.627980336916772e-06, - "loss": 0.955, - "step": 31490 - }, - { - "epoch": 0.9035617460158076, - "grad_norm": 0.384765625, - "learning_rate": 5.611430712578525e-06, - "loss": 0.9678, - "step": 31495 - }, - { - "epoch": 0.9037051912842655, - "grad_norm": 0.4140625, - "learning_rate": 5.594904754526398e-06, - "loss": 1.0125, - "step": 31500 - }, - { - "epoch": 0.9038486365527233, - "grad_norm": 0.39453125, - "learning_rate": 5.5784024669040095e-06, - "loss": 0.9615, - "step": 31505 - }, - { - "epoch": 0.9039920818211812, - "grad_norm": 0.3671875, - "learning_rate": 5.561923853848994e-06, - "loss": 0.9784, - "step": 31510 - }, - { - "epoch": 0.904135527089639, - "grad_norm": 0.365234375, - "learning_rate": 5.545468919493091e-06, - "loss": 1.0389, - "step": 31515 - }, - { - "epoch": 0.9042789723580967, - "grad_norm": 0.380859375, - "learning_rate": 5.529037667962067e-06, - "loss": 0.8982, - "step": 31520 - }, - { - "epoch": 0.9044224176265546, - "grad_norm": 0.39453125, - "learning_rate": 5.512630103375793e-06, - "loss": 0.9327, - "step": 31525 - }, - { - "epoch": 0.9045658628950124, - "grad_norm": 0.375, - "learning_rate": 5.496246229848179e-06, - "loss": 0.8885, - "step": 31530 - }, - { - "epoch": 0.9047093081634703, - "grad_norm": 0.37109375, - "learning_rate": 5.479886051487182e-06, - "loss": 0.7947, - "step": 31535 - }, - { - "epoch": 0.9048527534319281, - "grad_norm": 0.376953125, - "learning_rate": 5.463549572394833e-06, - "loss": 0.9892, - "step": 31540 - }, - { - "epoch": 0.9049961987003858, - "grad_norm": 0.33984375, - "learning_rate": 5.447236796667233e-06, - "loss": 0.9333, - "step": 31545 - }, - { - "epoch": 0.9051396439688437, - "grad_norm": 0.380859375, - "learning_rate": 5.4309477283945194e-06, - "loss": 0.8613, - "step": 31550 - }, - { - "epoch": 0.9052830892373015, - "grad_norm": 0.41015625, - "learning_rate": 5.414682371660873e-06, - "loss": 0.9129, - "step": 31555 - }, - { - "epoch": 0.9054265345057593, - "grad_norm": 0.37890625, - "learning_rate": 5.3984407305445736e-06, - "loss": 1.0408, - "step": 31560 - }, - { - "epoch": 0.9055699797742172, - "grad_norm": 0.4375, - "learning_rate": 5.3822228091179205e-06, - "loss": 0.9507, - "step": 31565 - }, - { - "epoch": 0.9057134250426749, - "grad_norm": 0.373046875, - "learning_rate": 5.366028611447282e-06, - "loss": 1.0646, - "step": 31570 - }, - { - "epoch": 0.9058568703111328, - "grad_norm": 0.361328125, - "learning_rate": 5.349858141593034e-06, - "loss": 0.9757, - "step": 31575 - }, - { - "epoch": 0.9060003155795906, - "grad_norm": 0.365234375, - "learning_rate": 5.333711403609698e-06, - "loss": 0.8915, - "step": 31580 - }, - { - "epoch": 0.9061437608480484, - "grad_norm": 0.380859375, - "learning_rate": 5.317588401545748e-06, - "loss": 0.9973, - "step": 31585 - }, - { - "epoch": 0.9062872061165063, - "grad_norm": 0.37109375, - "learning_rate": 5.301489139443738e-06, - "loss": 0.8577, - "step": 31590 - }, - { - "epoch": 0.906430651384964, - "grad_norm": 0.38671875, - "learning_rate": 5.285413621340307e-06, - "loss": 1.0046, - "step": 31595 - }, - { - "epoch": 0.9065740966534219, - "grad_norm": 0.416015625, - "learning_rate": 5.269361851266097e-06, - "loss": 1.0491, - "step": 31600 - }, - { - "epoch": 0.9067175419218797, - "grad_norm": 0.353515625, - "learning_rate": 5.25333383324581e-06, - "loss": 1.0062, - "step": 31605 - }, - { - "epoch": 0.9068609871903375, - "grad_norm": 0.37109375, - "learning_rate": 5.237329571298166e-06, - "loss": 0.9269, - "step": 31610 - }, - { - "epoch": 0.9070044324587954, - "grad_norm": 0.37890625, - "learning_rate": 5.22134906943601e-06, - "loss": 0.9558, - "step": 31615 - }, - { - "epoch": 0.9071478777272531, - "grad_norm": 0.3828125, - "learning_rate": 5.205392331666126e-06, - "loss": 0.9771, - "step": 31620 - }, - { - "epoch": 0.907291322995711, - "grad_norm": 0.427734375, - "learning_rate": 5.189459361989402e-06, - "loss": 1.0235, - "step": 31625 - }, - { - "epoch": 0.9074347682641688, - "grad_norm": 0.3984375, - "learning_rate": 5.173550164400753e-06, - "loss": 0.9501, - "step": 31630 - }, - { - "epoch": 0.9075782135326266, - "grad_norm": 0.412109375, - "learning_rate": 5.157664742889146e-06, - "loss": 1.0446, - "step": 31635 - }, - { - "epoch": 0.9077216588010845, - "grad_norm": 0.466796875, - "learning_rate": 5.141803101437559e-06, - "loss": 0.9562, - "step": 31640 - }, - { - "epoch": 0.9078651040695422, - "grad_norm": 0.40625, - "learning_rate": 5.1259652440230014e-06, - "loss": 0.8461, - "step": 31645 - }, - { - "epoch": 0.9080085493380001, - "grad_norm": 0.36328125, - "learning_rate": 5.110151174616584e-06, - "loss": 0.8724, - "step": 31650 - }, - { - "epoch": 0.9081519946064579, - "grad_norm": 0.400390625, - "learning_rate": 5.094360897183381e-06, - "loss": 0.8845, - "step": 31655 - }, - { - "epoch": 0.9082954398749157, - "grad_norm": 0.59765625, - "learning_rate": 5.078594415682536e-06, - "loss": 1.0712, - "step": 31660 - }, - { - "epoch": 0.9084388851433736, - "grad_norm": 0.37890625, - "learning_rate": 5.0628517340672086e-06, - "loss": 0.9118, - "step": 31665 - }, - { - "epoch": 0.9085823304118313, - "grad_norm": 0.45703125, - "learning_rate": 5.04713285628462e-06, - "loss": 1.0347, - "step": 31670 - }, - { - "epoch": 0.9087257756802892, - "grad_norm": 0.37890625, - "learning_rate": 5.031437786275972e-06, - "loss": 1.0342, - "step": 31675 - }, - { - "epoch": 0.908869220948747, - "grad_norm": 0.373046875, - "learning_rate": 5.015766527976573e-06, - "loss": 0.9783, - "step": 31680 - }, - { - "epoch": 0.9090126662172048, - "grad_norm": 0.375, - "learning_rate": 5.000119085315691e-06, - "loss": 0.8997, - "step": 31685 - }, - { - "epoch": 0.9091561114856627, - "grad_norm": 0.37890625, - "learning_rate": 4.984495462216654e-06, - "loss": 0.9445, - "step": 31690 - }, - { - "epoch": 0.9092995567541204, - "grad_norm": 0.462890625, - "learning_rate": 4.968895662596817e-06, - "loss": 1.06, - "step": 31695 - }, - { - "epoch": 0.9094430020225783, - "grad_norm": 0.390625, - "learning_rate": 4.953319690367531e-06, - "loss": 1.0415, - "step": 31700 - }, - { - "epoch": 0.9095864472910361, - "grad_norm": 0.423828125, - "learning_rate": 4.937767549434247e-06, - "loss": 0.8754, - "step": 31705 - }, - { - "epoch": 0.9097298925594939, - "grad_norm": 0.42578125, - "learning_rate": 4.922239243696358e-06, - "loss": 1.0886, - "step": 31710 - }, - { - "epoch": 0.9098733378279518, - "grad_norm": 0.38671875, - "learning_rate": 4.906734777047317e-06, - "loss": 0.9491, - "step": 31715 - }, - { - "epoch": 0.9100167830964095, - "grad_norm": 0.3671875, - "learning_rate": 4.891254153374614e-06, - "loss": 0.7867, - "step": 31720 - }, - { - "epoch": 0.9101602283648674, - "grad_norm": 0.375, - "learning_rate": 4.875797376559732e-06, - "loss": 1.0132, - "step": 31725 - }, - { - "epoch": 0.9103036736333252, - "grad_norm": 0.39453125, - "learning_rate": 4.860364450478206e-06, - "loss": 1.0234, - "step": 31730 - }, - { - "epoch": 0.910447118901783, - "grad_norm": 0.38671875, - "learning_rate": 4.844955378999539e-06, - "loss": 0.9648, - "step": 31735 - }, - { - "epoch": 0.9105905641702409, - "grad_norm": 0.400390625, - "learning_rate": 4.829570165987318e-06, - "loss": 0.9269, - "step": 31740 - }, - { - "epoch": 0.9107340094386986, - "grad_norm": 0.40234375, - "learning_rate": 4.814208815299115e-06, - "loss": 0.9729, - "step": 31745 - }, - { - "epoch": 0.9108774547071565, - "grad_norm": 0.3671875, - "learning_rate": 4.798871330786492e-06, - "loss": 0.913, - "step": 31750 - }, - { - "epoch": 0.9110208999756143, - "grad_norm": 0.37109375, - "learning_rate": 4.783557716295095e-06, - "loss": 1.0258, - "step": 31755 - }, - { - "epoch": 0.9111643452440721, - "grad_norm": 0.39453125, - "learning_rate": 4.7682679756645195e-06, - "loss": 0.927, - "step": 31760 - }, - { - "epoch": 0.91130779051253, - "grad_norm": 0.40625, - "learning_rate": 4.753002112728411e-06, - "loss": 0.9488, - "step": 31765 - }, - { - "epoch": 0.9114512357809877, - "grad_norm": 0.376953125, - "learning_rate": 4.737760131314406e-06, - "loss": 0.822, - "step": 31770 - }, - { - "epoch": 0.9115946810494456, - "grad_norm": 0.42578125, - "learning_rate": 4.722542035244182e-06, - "loss": 0.9196, - "step": 31775 - }, - { - "epoch": 0.9117381263179034, - "grad_norm": 0.392578125, - "learning_rate": 4.707347828333408e-06, - "loss": 0.9703, - "step": 31780 - }, - { - "epoch": 0.9118815715863612, - "grad_norm": 0.37890625, - "learning_rate": 4.692177514391738e-06, - "loss": 0.8376, - "step": 31785 - }, - { - "epoch": 0.9120250168548191, - "grad_norm": 0.478515625, - "learning_rate": 4.677031097222906e-06, - "loss": 0.8668, - "step": 31790 - }, - { - "epoch": 0.9121684621232768, - "grad_norm": 0.38671875, - "learning_rate": 4.6619085806245856e-06, - "loss": 0.9871, - "step": 31795 - }, - { - "epoch": 0.9123119073917347, - "grad_norm": 0.4140625, - "learning_rate": 4.646809968388499e-06, - "loss": 0.9196, - "step": 31800 - }, - { - "epoch": 0.9124553526601925, - "grad_norm": 0.3671875, - "learning_rate": 4.631735264300341e-06, - "loss": 0.9025, - "step": 31805 - }, - { - "epoch": 0.9125987979286503, - "grad_norm": 0.375, - "learning_rate": 4.616684472139842e-06, - "loss": 0.9211, - "step": 31810 - }, - { - "epoch": 0.9127422431971082, - "grad_norm": 0.392578125, - "learning_rate": 4.601657595680731e-06, - "loss": 0.8954, - "step": 31815 - }, - { - "epoch": 0.912885688465566, - "grad_norm": 0.396484375, - "learning_rate": 4.586654638690724e-06, - "loss": 0.9866, - "step": 31820 - }, - { - "epoch": 0.9130291337340238, - "grad_norm": 0.357421875, - "learning_rate": 4.571675604931547e-06, - "loss": 0.8504, - "step": 31825 - }, - { - "epoch": 0.9131725790024816, - "grad_norm": 0.36328125, - "learning_rate": 4.55672049815895e-06, - "loss": 1.0171, - "step": 31830 - }, - { - "epoch": 0.9133160242709394, - "grad_norm": 0.341796875, - "learning_rate": 4.541789322122647e-06, - "loss": 0.9071, - "step": 31835 - }, - { - "epoch": 0.9134594695393973, - "grad_norm": 0.375, - "learning_rate": 4.5268820805663855e-06, - "loss": 0.877, - "step": 31840 - }, - { - "epoch": 0.913602914807855, - "grad_norm": 0.365234375, - "learning_rate": 4.5119987772278886e-06, - "loss": 0.912, - "step": 31845 - }, - { - "epoch": 0.9137463600763129, - "grad_norm": 0.3671875, - "learning_rate": 4.497139415838858e-06, - "loss": 0.9108, - "step": 31850 - }, - { - "epoch": 0.9138898053447707, - "grad_norm": 0.3671875, - "learning_rate": 4.482304000125071e-06, - "loss": 0.9681, - "step": 31855 - }, - { - "epoch": 0.9140332506132285, - "grad_norm": 0.376953125, - "learning_rate": 4.467492533806217e-06, - "loss": 0.9198, - "step": 31860 - }, - { - "epoch": 0.9141766958816864, - "grad_norm": 0.3515625, - "learning_rate": 4.452705020596015e-06, - "loss": 0.9521, - "step": 31865 - }, - { - "epoch": 0.9143201411501442, - "grad_norm": 0.376953125, - "learning_rate": 4.437941464202177e-06, - "loss": 0.9275, - "step": 31870 - }, - { - "epoch": 0.914463586418602, - "grad_norm": 0.3828125, - "learning_rate": 4.42320186832641e-06, - "loss": 1.0007, - "step": 31875 - }, - { - "epoch": 0.9146070316870598, - "grad_norm": 0.380859375, - "learning_rate": 4.408486236664411e-06, - "loss": 1.0111, - "step": 31880 - }, - { - "epoch": 0.9147504769555176, - "grad_norm": 0.361328125, - "learning_rate": 4.393794572905874e-06, - "loss": 0.8575, - "step": 31885 - }, - { - "epoch": 0.9148939222239755, - "grad_norm": 0.392578125, - "learning_rate": 4.379126880734463e-06, - "loss": 0.7594, - "step": 31890 - }, - { - "epoch": 0.9150373674924333, - "grad_norm": 0.365234375, - "learning_rate": 4.364483163827837e-06, - "loss": 0.9486, - "step": 31895 - }, - { - "epoch": 0.915180812760891, - "grad_norm": 0.37109375, - "learning_rate": 4.349863425857681e-06, - "loss": 0.814, - "step": 31900 - }, - { - "epoch": 0.9153242580293489, - "grad_norm": 0.373046875, - "learning_rate": 4.335267670489629e-06, - "loss": 0.9893, - "step": 31905 - }, - { - "epoch": 0.9154677032978067, - "grad_norm": 0.359375, - "learning_rate": 4.320695901383298e-06, - "loss": 0.9986, - "step": 31910 - }, - { - "epoch": 0.9156111485662646, - "grad_norm": 0.369140625, - "learning_rate": 4.306148122192332e-06, - "loss": 0.9261, - "step": 31915 - }, - { - "epoch": 0.9157545938347224, - "grad_norm": 0.361328125, - "learning_rate": 4.291624336564304e-06, - "loss": 0.925, - "step": 31920 - }, - { - "epoch": 0.9158980391031801, - "grad_norm": 0.41015625, - "learning_rate": 4.277124548140821e-06, - "loss": 0.9644, - "step": 31925 - }, - { - "epoch": 0.916041484371638, - "grad_norm": 0.4140625, - "learning_rate": 4.26264876055743e-06, - "loss": 0.9526, - "step": 31930 - }, - { - "epoch": 0.9161849296400958, - "grad_norm": 0.3828125, - "learning_rate": 4.248196977443708e-06, - "loss": 0.9745, - "step": 31935 - }, - { - "epoch": 0.9163283749085537, - "grad_norm": 0.3515625, - "learning_rate": 4.233769202423166e-06, - "loss": 0.8711, - "step": 31940 - }, - { - "epoch": 0.9164718201770115, - "grad_norm": 0.408203125, - "learning_rate": 4.219365439113332e-06, - "loss": 0.9524, - "step": 31945 - }, - { - "epoch": 0.9166152654454692, - "grad_norm": 0.375, - "learning_rate": 4.204985691125674e-06, - "loss": 0.9503, - "step": 31950 - }, - { - "epoch": 0.9167587107139271, - "grad_norm": 0.416015625, - "learning_rate": 4.190629962065695e-06, - "loss": 0.8917, - "step": 31955 - }, - { - "epoch": 0.9169021559823849, - "grad_norm": 0.37890625, - "learning_rate": 4.176298255532829e-06, - "loss": 0.9939, - "step": 31960 - }, - { - "epoch": 0.9170456012508428, - "grad_norm": 0.396484375, - "learning_rate": 4.161990575120489e-06, - "loss": 0.9671, - "step": 31965 - }, - { - "epoch": 0.9171890465193006, - "grad_norm": 0.419921875, - "learning_rate": 4.147706924416095e-06, - "loss": 0.8945, - "step": 31970 - }, - { - "epoch": 0.9173324917877583, - "grad_norm": 0.3828125, - "learning_rate": 4.133447307001026e-06, - "loss": 0.9242, - "step": 31975 - }, - { - "epoch": 0.9174759370562162, - "grad_norm": 0.431640625, - "learning_rate": 4.119211726450611e-06, - "loss": 0.9602, - "step": 31980 - }, - { - "epoch": 0.917619382324674, - "grad_norm": 0.37890625, - "learning_rate": 4.105000186334173e-06, - "loss": 1.0335, - "step": 31985 - }, - { - "epoch": 0.9177628275931319, - "grad_norm": 0.388671875, - "learning_rate": 4.0908126902150404e-06, - "loss": 0.9705, - "step": 31990 - }, - { - "epoch": 0.9179062728615897, - "grad_norm": 0.388671875, - "learning_rate": 4.076649241650443e-06, - "loss": 0.9467, - "step": 31995 - }, - { - "epoch": 0.9180497181300474, - "grad_norm": 0.359375, - "learning_rate": 4.062509844191631e-06, - "loss": 0.9693, - "step": 32000 - }, - { - "epoch": 0.9181931633985053, - "grad_norm": 0.404296875, - "learning_rate": 4.048394501383834e-06, - "loss": 1.0236, - "step": 32005 - }, - { - "epoch": 0.9183366086669631, - "grad_norm": 0.419921875, - "learning_rate": 4.0343032167662e-06, - "loss": 0.9736, - "step": 32010 - }, - { - "epoch": 0.918480053935421, - "grad_norm": 0.41796875, - "learning_rate": 4.0202359938718795e-06, - "loss": 1.045, - "step": 32015 - }, - { - "epoch": 0.9186234992038788, - "grad_norm": 0.439453125, - "learning_rate": 4.0061928362279846e-06, - "loss": 0.9485, - "step": 32020 - }, - { - "epoch": 0.9187669444723365, - "grad_norm": 0.431640625, - "learning_rate": 3.992173747355587e-06, - "loss": 0.9061, - "step": 32025 - }, - { - "epoch": 0.9189103897407944, - "grad_norm": 0.365234375, - "learning_rate": 3.978178730769733e-06, - "loss": 1.01, - "step": 32030 - }, - { - "epoch": 0.9190538350092522, - "grad_norm": 0.39453125, - "learning_rate": 3.964207789979446e-06, - "loss": 0.9099, - "step": 32035 - }, - { - "epoch": 0.9191972802777101, - "grad_norm": 0.375, - "learning_rate": 3.95026092848767e-06, - "loss": 0.9042, - "step": 32040 - }, - { - "epoch": 0.9193407255461679, - "grad_norm": 0.40625, - "learning_rate": 3.936338149791341e-06, - "loss": 1.0643, - "step": 32045 - }, - { - "epoch": 0.9194841708146256, - "grad_norm": 0.3828125, - "learning_rate": 3.922439457381355e-06, - "loss": 0.9315, - "step": 32050 - }, - { - "epoch": 0.9196276160830835, - "grad_norm": 0.357421875, - "learning_rate": 3.908564854742569e-06, - "loss": 0.8577, - "step": 32055 - }, - { - "epoch": 0.9197710613515413, - "grad_norm": 0.390625, - "learning_rate": 3.894714345353801e-06, - "loss": 0.926, - "step": 32060 - }, - { - "epoch": 0.9199145066199992, - "grad_norm": 0.37890625, - "learning_rate": 3.8808879326878266e-06, - "loss": 0.9846, - "step": 32065 - }, - { - "epoch": 0.920057951888457, - "grad_norm": 0.408203125, - "learning_rate": 3.867085620211352e-06, - "loss": 1.0084, - "step": 32070 - }, - { - "epoch": 0.9202013971569147, - "grad_norm": 0.40625, - "learning_rate": 3.853307411385098e-06, - "loss": 0.9769, - "step": 32075 - }, - { - "epoch": 0.9203448424253726, - "grad_norm": 0.404296875, - "learning_rate": 3.8395533096637015e-06, - "loss": 0.9459, - "step": 32080 - }, - { - "epoch": 0.9204882876938304, - "grad_norm": 0.365234375, - "learning_rate": 3.825823318495769e-06, - "loss": 0.912, - "step": 32085 - }, - { - "epoch": 0.9206317329622883, - "grad_norm": 0.392578125, - "learning_rate": 3.8121174413238257e-06, - "loss": 0.8855, - "step": 32090 - }, - { - "epoch": 0.9207751782307461, - "grad_norm": 0.41796875, - "learning_rate": 3.7984356815844113e-06, - "loss": 0.9539, - "step": 32095 - }, - { - "epoch": 0.9209186234992038, - "grad_norm": 0.42578125, - "learning_rate": 3.7847780427079814e-06, - "loss": 0.9647, - "step": 32100 - }, - { - "epoch": 0.9210620687676617, - "grad_norm": 0.3515625, - "learning_rate": 3.7711445281189416e-06, - "loss": 0.862, - "step": 32105 - }, - { - "epoch": 0.9212055140361195, - "grad_norm": 0.435546875, - "learning_rate": 3.7575351412356576e-06, - "loss": 1.0036, - "step": 32110 - }, - { - "epoch": 0.9213489593045774, - "grad_norm": 0.388671875, - "learning_rate": 3.7439498854704657e-06, - "loss": 0.9595, - "step": 32115 - }, - { - "epoch": 0.9214924045730352, - "grad_norm": 0.3984375, - "learning_rate": 3.73038876422962e-06, - "loss": 0.9226, - "step": 32120 - }, - { - "epoch": 0.9216358498414929, - "grad_norm": 0.376953125, - "learning_rate": 3.7168517809133217e-06, - "loss": 0.8829, - "step": 32125 - }, - { - "epoch": 0.9217792951099508, - "grad_norm": 0.427734375, - "learning_rate": 3.7033389389157567e-06, - "loss": 1.0396, - "step": 32130 - }, - { - "epoch": 0.9219227403784086, - "grad_norm": 0.404296875, - "learning_rate": 3.689850241625015e-06, - "loss": 0.9637, - "step": 32135 - }, - { - "epoch": 0.9220661856468665, - "grad_norm": 0.421875, - "learning_rate": 3.6763856924231678e-06, - "loss": 0.9461, - "step": 32140 - }, - { - "epoch": 0.9222096309153243, - "grad_norm": 0.373046875, - "learning_rate": 3.662945294686182e-06, - "loss": 0.9865, - "step": 32145 - }, - { - "epoch": 0.922353076183782, - "grad_norm": 0.37109375, - "learning_rate": 3.649529051784051e-06, - "loss": 0.9702, - "step": 32150 - }, - { - "epoch": 0.9224965214522399, - "grad_norm": 0.373046875, - "learning_rate": 3.636136967080639e-06, - "loss": 0.9372, - "step": 32155 - }, - { - "epoch": 0.9226399667206977, - "grad_norm": 0.365234375, - "learning_rate": 3.62276904393376e-06, - "loss": 0.8787, - "step": 32160 - }, - { - "epoch": 0.9227834119891556, - "grad_norm": 0.357421875, - "learning_rate": 3.609425285695234e-06, - "loss": 0.9304, - "step": 32165 - }, - { - "epoch": 0.9229268572576134, - "grad_norm": 0.37890625, - "learning_rate": 3.5961056957107273e-06, - "loss": 0.8903, - "step": 32170 - }, - { - "epoch": 0.9230703025260711, - "grad_norm": 0.40625, - "learning_rate": 3.5828102773199258e-06, - "loss": 1.0441, - "step": 32175 - }, - { - "epoch": 0.923213747794529, - "grad_norm": 0.392578125, - "learning_rate": 3.569539033856406e-06, - "loss": 0.9371, - "step": 32180 - }, - { - "epoch": 0.9233571930629868, - "grad_norm": 0.369140625, - "learning_rate": 3.556291968647707e-06, - "loss": 1.1225, - "step": 32185 - }, - { - "epoch": 0.9235006383314447, - "grad_norm": 0.373046875, - "learning_rate": 3.5430690850153046e-06, - "loss": 0.9099, - "step": 32190 - }, - { - "epoch": 0.9236440835999025, - "grad_norm": 0.361328125, - "learning_rate": 3.529870386274603e-06, - "loss": 0.8476, - "step": 32195 - }, - { - "epoch": 0.9237875288683602, - "grad_norm": 0.390625, - "learning_rate": 3.516695875734932e-06, - "loss": 0.8934, - "step": 32200 - }, - { - "epoch": 0.9239309741368181, - "grad_norm": 0.431640625, - "learning_rate": 3.5035455566995832e-06, - "loss": 0.9421, - "step": 32205 - }, - { - "epoch": 0.9240744194052759, - "grad_norm": 0.41796875, - "learning_rate": 3.4904194324657748e-06, - "loss": 0.9392, - "step": 32210 - }, - { - "epoch": 0.9242178646737338, - "grad_norm": 0.3671875, - "learning_rate": 3.477317506324651e-06, - "loss": 0.9683, - "step": 32215 - }, - { - "epoch": 0.9243613099421916, - "grad_norm": 0.38671875, - "learning_rate": 3.4642397815612847e-06, - "loss": 0.9752, - "step": 32220 - }, - { - "epoch": 0.9245047552106493, - "grad_norm": 0.375, - "learning_rate": 3.4511862614546866e-06, - "loss": 1.0228, - "step": 32225 - }, - { - "epoch": 0.9246482004791072, - "grad_norm": 0.34375, - "learning_rate": 3.438156949277782e-06, - "loss": 0.957, - "step": 32230 - }, - { - "epoch": 0.924791645747565, - "grad_norm": 0.404296875, - "learning_rate": 3.425151848297481e-06, - "loss": 0.8922, - "step": 32235 - }, - { - "epoch": 0.9249350910160228, - "grad_norm": 0.392578125, - "learning_rate": 3.4121709617745745e-06, - "loss": 0.9135, - "step": 32240 - }, - { - "epoch": 0.9250785362844807, - "grad_norm": 0.38671875, - "learning_rate": 3.3992142929637704e-06, - "loss": 0.9172, - "step": 32245 - }, - { - "epoch": 0.9252219815529384, - "grad_norm": 0.427734375, - "learning_rate": 3.386281845113748e-06, - "loss": 0.9747, - "step": 32250 - }, - { - "epoch": 0.9253654268213963, - "grad_norm": 0.384765625, - "learning_rate": 3.373373621467091e-06, - "loss": 0.958, - "step": 32255 - }, - { - "epoch": 0.9255088720898541, - "grad_norm": 0.3671875, - "learning_rate": 3.3604896252603104e-06, - "loss": 0.9379, - "step": 32260 - }, - { - "epoch": 0.9256523173583119, - "grad_norm": 0.376953125, - "learning_rate": 3.347629859723833e-06, - "loss": 0.9075, - "step": 32265 - }, - { - "epoch": 0.9257957626267698, - "grad_norm": 0.4375, - "learning_rate": 3.334794328082025e-06, - "loss": 1.0806, - "step": 32270 - }, - { - "epoch": 0.9259392078952275, - "grad_norm": 0.3984375, - "learning_rate": 3.3219830335531776e-06, - "loss": 0.9294, - "step": 32275 - }, - { - "epoch": 0.9260826531636854, - "grad_norm": 0.39453125, - "learning_rate": 3.3091959793495107e-06, - "loss": 0.9404, - "step": 32280 - }, - { - "epoch": 0.9262260984321432, - "grad_norm": 0.384765625, - "learning_rate": 3.2964331686771155e-06, - "loss": 0.9818, - "step": 32285 - }, - { - "epoch": 0.926369543700601, - "grad_norm": 0.388671875, - "learning_rate": 3.2836946047360982e-06, - "loss": 0.915, - "step": 32290 - }, - { - "epoch": 0.9265129889690589, - "grad_norm": 0.3671875, - "learning_rate": 3.2709802907203934e-06, - "loss": 0.9422, - "step": 32295 - }, - { - "epoch": 0.9266564342375166, - "grad_norm": 0.375, - "learning_rate": 3.2582902298178953e-06, - "loss": 0.9411, - "step": 32300 - }, - { - "epoch": 0.9267998795059745, - "grad_norm": 0.388671875, - "learning_rate": 3.245624425210425e-06, - "loss": 1.014, - "step": 32305 - }, - { - "epoch": 0.9269433247744323, - "grad_norm": 0.41796875, - "learning_rate": 3.2329828800737096e-06, - "loss": 0.9517, - "step": 32310 - }, - { - "epoch": 0.9270867700428901, - "grad_norm": 0.353515625, - "learning_rate": 3.2203655975774018e-06, - "loss": 0.9598, - "step": 32315 - }, - { - "epoch": 0.927230215311348, - "grad_norm": 0.41015625, - "learning_rate": 3.207772580885049e-06, - "loss": 0.9307, - "step": 32320 - }, - { - "epoch": 0.9273736605798057, - "grad_norm": 0.404296875, - "learning_rate": 3.195203833154148e-06, - "loss": 0.9382, - "step": 32325 - }, - { - "epoch": 0.9275171058482636, - "grad_norm": 0.37109375, - "learning_rate": 3.1826593575360887e-06, - "loss": 0.9046, - "step": 32330 - }, - { - "epoch": 0.9276605511167214, - "grad_norm": 0.44140625, - "learning_rate": 3.1701391571761663e-06, - "loss": 0.9459, - "step": 32335 - }, - { - "epoch": 0.9278039963851792, - "grad_norm": 0.37890625, - "learning_rate": 3.1576432352136144e-06, - "loss": 0.9147, - "step": 32340 - }, - { - "epoch": 0.9279474416536371, - "grad_norm": 0.412109375, - "learning_rate": 3.145171594781571e-06, - "loss": 0.9615, - "step": 32345 - }, - { - "epoch": 0.9280908869220948, - "grad_norm": 0.37890625, - "learning_rate": 3.1327242390070677e-06, - "loss": 0.8856, - "step": 32350 - }, - { - "epoch": 0.9282343321905527, - "grad_norm": 0.390625, - "learning_rate": 3.120301171011075e-06, - "loss": 0.9311, - "step": 32355 - }, - { - "epoch": 0.9283777774590105, - "grad_norm": 0.380859375, - "learning_rate": 3.1079023939084573e-06, - "loss": 0.881, - "step": 32360 - }, - { - "epoch": 0.9285212227274683, - "grad_norm": 0.365234375, - "learning_rate": 3.0955279108079827e-06, - "loss": 0.9783, - "step": 32365 - }, - { - "epoch": 0.9286646679959262, - "grad_norm": 0.376953125, - "learning_rate": 3.0831777248123584e-06, - "loss": 0.8706, - "step": 32370 - }, - { - "epoch": 0.928808113264384, - "grad_norm": 0.376953125, - "learning_rate": 3.0708518390181405e-06, - "loss": 0.8662, - "step": 32375 - }, - { - "epoch": 0.9289515585328418, - "grad_norm": 0.3671875, - "learning_rate": 3.0585502565158687e-06, - "loss": 0.9263, - "step": 32380 - }, - { - "epoch": 0.9290950038012996, - "grad_norm": 0.416015625, - "learning_rate": 3.0462729803899416e-06, - "loss": 0.8457, - "step": 32385 - }, - { - "epoch": 0.9292384490697574, - "grad_norm": 0.34765625, - "learning_rate": 3.0340200137186526e-06, - "loss": 0.956, - "step": 32390 - }, - { - "epoch": 0.9293818943382153, - "grad_norm": 0.453125, - "learning_rate": 3.021791359574244e-06, - "loss": 1.0462, - "step": 32395 - }, - { - "epoch": 0.929525339606673, - "grad_norm": 0.60546875, - "learning_rate": 3.0095870210228083e-06, - "loss": 0.9165, - "step": 32400 - }, - { - "epoch": 0.9296687848751309, - "grad_norm": 0.416015625, - "learning_rate": 2.997407001124386e-06, - "loss": 0.9284, - "step": 32405 - }, - { - "epoch": 0.9298122301435887, - "grad_norm": 0.365234375, - "learning_rate": 2.985251302932912e-06, - "loss": 1.0394, - "step": 32410 - }, - { - "epoch": 0.9299556754120465, - "grad_norm": 0.380859375, - "learning_rate": 2.9731199294962153e-06, - "loss": 0.8996, - "step": 32415 - }, - { - "epoch": 0.9300991206805044, - "grad_norm": 0.451171875, - "learning_rate": 2.9610128838560068e-06, - "loss": 0.7722, - "step": 32420 - }, - { - "epoch": 0.9302425659489622, - "grad_norm": 0.36328125, - "learning_rate": 2.9489301690479365e-06, - "loss": 0.8633, - "step": 32425 - }, - { - "epoch": 0.93038601121742, - "grad_norm": 0.384765625, - "learning_rate": 2.936871788101514e-06, - "loss": 0.9575, - "step": 32430 - }, - { - "epoch": 0.9305294564858778, - "grad_norm": 0.47265625, - "learning_rate": 2.9248377440401985e-06, - "loss": 0.8525, - "step": 32435 - }, - { - "epoch": 0.9306729017543356, - "grad_norm": 0.369140625, - "learning_rate": 2.912828039881288e-06, - "loss": 0.9399, - "step": 32440 - }, - { - "epoch": 0.9308163470227935, - "grad_norm": 0.40234375, - "learning_rate": 2.9008426786360064e-06, - "loss": 0.9139, - "step": 32445 - }, - { - "epoch": 0.9309597922912513, - "grad_norm": 0.384765625, - "learning_rate": 2.8888816633095063e-06, - "loss": 1.0112, - "step": 32450 - }, - { - "epoch": 0.9311032375597091, - "grad_norm": 0.40234375, - "learning_rate": 2.876944996900777e-06, - "loss": 0.8408, - "step": 32455 - }, - { - "epoch": 0.9312466828281669, - "grad_norm": 0.375, - "learning_rate": 2.865032682402724e-06, - "loss": 1.0209, - "step": 32460 - }, - { - "epoch": 0.9313901280966247, - "grad_norm": 0.3359375, - "learning_rate": 2.853144722802159e-06, - "loss": 0.8684, - "step": 32465 - }, - { - "epoch": 0.9315335733650826, - "grad_norm": 0.396484375, - "learning_rate": 2.8412811210797975e-06, - "loss": 0.9591, - "step": 32470 - }, - { - "epoch": 0.9316770186335404, - "grad_norm": 0.4296875, - "learning_rate": 2.829441880210215e-06, - "loss": 0.9181, - "step": 32475 - }, - { - "epoch": 0.9318204639019982, - "grad_norm": 0.404296875, - "learning_rate": 2.817627003161882e-06, - "loss": 0.9117, - "step": 32480 - }, - { - "epoch": 0.931963909170456, - "grad_norm": 0.390625, - "learning_rate": 2.805836492897196e-06, - "loss": 0.909, - "step": 32485 - }, - { - "epoch": 0.9321073544389138, - "grad_norm": 0.37109375, - "learning_rate": 2.7940703523724134e-06, - "loss": 0.8927, - "step": 32490 - }, - { - "epoch": 0.9322507997073717, - "grad_norm": 0.40234375, - "learning_rate": 2.7823285845376855e-06, - "loss": 1.0034, - "step": 32495 - }, - { - "epoch": 0.9323942449758295, - "grad_norm": 0.37890625, - "learning_rate": 2.770611192337047e-06, - "loss": 0.982, - "step": 32500 - }, - { - "epoch": 0.9325376902442873, - "grad_norm": 0.376953125, - "learning_rate": 2.7589181787084474e-06, - "loss": 0.9716, - "step": 32505 - }, - { - "epoch": 0.9326811355127451, - "grad_norm": 0.390625, - "learning_rate": 2.747249546583708e-06, - "loss": 1.0027, - "step": 32510 - }, - { - "epoch": 0.9328245807812029, - "grad_norm": 0.330078125, - "learning_rate": 2.7356052988885105e-06, - "loss": 0.8315, - "step": 32515 - }, - { - "epoch": 0.9329680260496608, - "grad_norm": 0.3984375, - "learning_rate": 2.7239854385424645e-06, - "loss": 0.8765, - "step": 32520 - }, - { - "epoch": 0.9331114713181186, - "grad_norm": 0.376953125, - "learning_rate": 2.7123899684590505e-06, - "loss": 0.9871, - "step": 32525 - }, - { - "epoch": 0.9332549165865764, - "grad_norm": 0.4140625, - "learning_rate": 2.70081889154562e-06, - "loss": 1.0186, - "step": 32530 - }, - { - "epoch": 0.9333983618550342, - "grad_norm": 0.3984375, - "learning_rate": 2.68927221070342e-06, - "loss": 1.0249, - "step": 32535 - }, - { - "epoch": 0.933541807123492, - "grad_norm": 0.408203125, - "learning_rate": 2.6777499288275775e-06, - "loss": 0.9963, - "step": 32540 - }, - { - "epoch": 0.9336852523919499, - "grad_norm": 0.359375, - "learning_rate": 2.6662520488071164e-06, - "loss": 0.9031, - "step": 32545 - }, - { - "epoch": 0.9338286976604077, - "grad_norm": 0.38671875, - "learning_rate": 2.6547785735249187e-06, - "loss": 0.9808, - "step": 32550 - }, - { - "epoch": 0.9339721429288655, - "grad_norm": 0.37890625, - "learning_rate": 2.64332950585775e-06, - "loss": 0.937, - "step": 32555 - }, - { - "epoch": 0.9341155881973233, - "grad_norm": 0.396484375, - "learning_rate": 2.631904848676281e-06, - "loss": 1.0273, - "step": 32560 - }, - { - "epoch": 0.9342590334657811, - "grad_norm": 0.404296875, - "learning_rate": 2.6205046048450423e-06, - "loss": 1.041, - "step": 32565 - }, - { - "epoch": 0.934402478734239, - "grad_norm": 0.408203125, - "learning_rate": 2.609128777222447e-06, - "loss": 1.0555, - "step": 32570 - }, - { - "epoch": 0.9345459240026968, - "grad_norm": 0.30859375, - "learning_rate": 2.59777736866077e-06, - "loss": 0.8786, - "step": 32575 - }, - { - "epoch": 0.9346893692711546, - "grad_norm": 0.396484375, - "learning_rate": 2.586450382006178e-06, - "loss": 0.9158, - "step": 32580 - }, - { - "epoch": 0.9348328145396124, - "grad_norm": 0.388671875, - "learning_rate": 2.5751478200987335e-06, - "loss": 0.9939, - "step": 32585 - }, - { - "epoch": 0.9349762598080702, - "grad_norm": 0.447265625, - "learning_rate": 2.563869685772358e-06, - "loss": 0.9023, - "step": 32590 - }, - { - "epoch": 0.9351197050765281, - "grad_norm": 0.412109375, - "learning_rate": 2.5526159818548335e-06, - "loss": 1.0532, - "step": 32595 - }, - { - "epoch": 0.9352631503449859, - "grad_norm": 0.37109375, - "learning_rate": 2.5413867111678367e-06, - "loss": 0.9175, - "step": 32600 - }, - { - "epoch": 0.9354065956134436, - "grad_norm": 0.357421875, - "learning_rate": 2.5301818765268937e-06, - "loss": 0.8503, - "step": 32605 - }, - { - "epoch": 0.9355500408819015, - "grad_norm": 0.380859375, - "learning_rate": 2.5190014807414454e-06, - "loss": 0.9808, - "step": 32610 - }, - { - "epoch": 0.9356934861503593, - "grad_norm": 0.39453125, - "learning_rate": 2.5078455266147826e-06, - "loss": 1.044, - "step": 32615 - }, - { - "epoch": 0.9358369314188172, - "grad_norm": 0.376953125, - "learning_rate": 2.4967140169440464e-06, - "loss": 0.8614, - "step": 32620 - }, - { - "epoch": 0.935980376687275, - "grad_norm": 0.400390625, - "learning_rate": 2.4856069545202697e-06, - "loss": 1.0242, - "step": 32625 - }, - { - "epoch": 0.9361238219557327, - "grad_norm": 0.380859375, - "learning_rate": 2.4745243421283706e-06, - "loss": 0.9554, - "step": 32630 - }, - { - "epoch": 0.9362672672241906, - "grad_norm": 0.349609375, - "learning_rate": 2.463466182547103e-06, - "loss": 0.9685, - "step": 32635 - }, - { - "epoch": 0.9364107124926484, - "grad_norm": 0.408203125, - "learning_rate": 2.452432478549116e-06, - "loss": 0.9837, - "step": 32640 - }, - { - "epoch": 0.9365541577611063, - "grad_norm": 0.380859375, - "learning_rate": 2.44142323290093e-06, - "loss": 0.9975, - "step": 32645 - }, - { - "epoch": 0.9366976030295641, - "grad_norm": 0.384765625, - "learning_rate": 2.430438448362915e-06, - "loss": 0.9209, - "step": 32650 - }, - { - "epoch": 0.9368410482980218, - "grad_norm": 0.345703125, - "learning_rate": 2.419478127689301e-06, - "loss": 1.017, - "step": 32655 - }, - { - "epoch": 0.9369844935664797, - "grad_norm": 0.38671875, - "learning_rate": 2.408542273628189e-06, - "loss": 0.945, - "step": 32660 - }, - { - "epoch": 0.9371279388349375, - "grad_norm": 0.333984375, - "learning_rate": 2.397630888921587e-06, - "loss": 0.8529, - "step": 32665 - }, - { - "epoch": 0.9372713841033954, - "grad_norm": 0.349609375, - "learning_rate": 2.3867439763053166e-06, - "loss": 0.8877, - "step": 32670 - }, - { - "epoch": 0.9374148293718532, - "grad_norm": 0.412109375, - "learning_rate": 2.375881538509084e-06, - "loss": 1.0372, - "step": 32675 - }, - { - "epoch": 0.9375582746403109, - "grad_norm": 0.3984375, - "learning_rate": 2.3650435782564427e-06, - "loss": 0.9947, - "step": 32680 - }, - { - "epoch": 0.9377017199087688, - "grad_norm": 0.38671875, - "learning_rate": 2.354230098264831e-06, - "loss": 0.891, - "step": 32685 - }, - { - "epoch": 0.9378451651772266, - "grad_norm": 0.36328125, - "learning_rate": 2.343441101245558e-06, - "loss": 0.9249, - "step": 32690 - }, - { - "epoch": 0.9379886104456845, - "grad_norm": 0.380859375, - "learning_rate": 2.3326765899037374e-06, - "loss": 0.9071, - "step": 32695 - }, - { - "epoch": 0.9381320557141423, - "grad_norm": 0.41015625, - "learning_rate": 2.3219365669384206e-06, - "loss": 1.0107, - "step": 32700 - }, - { - "epoch": 0.9382755009826, - "grad_norm": 0.384765625, - "learning_rate": 2.3112210350424657e-06, - "loss": 1.0048, - "step": 32705 - }, - { - "epoch": 0.9384189462510579, - "grad_norm": 0.35546875, - "learning_rate": 2.3005299969026117e-06, - "loss": 1.0286, - "step": 32710 - }, - { - "epoch": 0.9385623915195157, - "grad_norm": 0.369140625, - "learning_rate": 2.289863455199426e-06, - "loss": 0.9507, - "step": 32715 - }, - { - "epoch": 0.9387058367879736, - "grad_norm": 0.353515625, - "learning_rate": 2.2792214126073797e-06, - "loss": 0.8924, - "step": 32720 - }, - { - "epoch": 0.9388492820564314, - "grad_norm": 0.380859375, - "learning_rate": 2.2686038717947832e-06, - "loss": 0.9014, - "step": 32725 - }, - { - "epoch": 0.9389927273248891, - "grad_norm": 0.37890625, - "learning_rate": 2.258010835423774e-06, - "loss": 0.9278, - "step": 32730 - }, - { - "epoch": 0.939136172593347, - "grad_norm": 0.361328125, - "learning_rate": 2.2474423061504045e-06, - "loss": 0.8805, - "step": 32735 - }, - { - "epoch": 0.9392796178618048, - "grad_norm": 0.373046875, - "learning_rate": 2.2368982866245225e-06, - "loss": 0.925, - "step": 32740 - }, - { - "epoch": 0.9394230631302627, - "grad_norm": 0.34765625, - "learning_rate": 2.2263787794898796e-06, - "loss": 0.843, - "step": 32745 - }, - { - "epoch": 0.9395665083987205, - "grad_norm": 0.384765625, - "learning_rate": 2.215883787384032e-06, - "loss": 0.9121, - "step": 32750 - }, - { - "epoch": 0.9397099536671782, - "grad_norm": 0.373046875, - "learning_rate": 2.2054133129384313e-06, - "loss": 1.018, - "step": 32755 - }, - { - "epoch": 0.9398533989356361, - "grad_norm": 0.384765625, - "learning_rate": 2.194967358778366e-06, - "loss": 0.907, - "step": 32760 - }, - { - "epoch": 0.9399968442040939, - "grad_norm": 0.3828125, - "learning_rate": 2.184545927522985e-06, - "loss": 0.9584, - "step": 32765 - }, - { - "epoch": 0.9401402894725518, - "grad_norm": 0.384765625, - "learning_rate": 2.1741490217852545e-06, - "loss": 0.8569, - "step": 32770 - }, - { - "epoch": 0.9402837347410096, - "grad_norm": 0.353515625, - "learning_rate": 2.163776644172044e-06, - "loss": 0.9347, - "step": 32775 - }, - { - "epoch": 0.9404271800094673, - "grad_norm": 0.40234375, - "learning_rate": 2.1534287972840294e-06, - "loss": 0.8958, - "step": 32780 - }, - { - "epoch": 0.9405706252779252, - "grad_norm": 0.37890625, - "learning_rate": 2.143105483715757e-06, - "loss": 0.8922, - "step": 32785 - }, - { - "epoch": 0.940714070546383, - "grad_norm": 0.37109375, - "learning_rate": 2.1328067060556235e-06, - "loss": 0.8956, - "step": 32790 - }, - { - "epoch": 0.9408575158148409, - "grad_norm": 0.373046875, - "learning_rate": 2.1225324668858625e-06, - "loss": 0.8799, - "step": 32795 - }, - { - "epoch": 0.9410009610832987, - "grad_norm": 0.3828125, - "learning_rate": 2.1122827687825473e-06, - "loss": 0.8857, - "step": 32800 - }, - { - "epoch": 0.9411444063517564, - "grad_norm": 0.39453125, - "learning_rate": 2.1020576143156446e-06, - "loss": 0.8465, - "step": 32805 - }, - { - "epoch": 0.9412878516202143, - "grad_norm": 0.404296875, - "learning_rate": 2.091857006048903e-06, - "loss": 1.0478, - "step": 32810 - }, - { - "epoch": 0.9414312968886721, - "grad_norm": 0.388671875, - "learning_rate": 2.0816809465399543e-06, - "loss": 0.9835, - "step": 32815 - }, - { - "epoch": 0.94157474215713, - "grad_norm": 0.373046875, - "learning_rate": 2.0715294383402695e-06, - "loss": 0.9429, - "step": 32820 - }, - { - "epoch": 0.9417181874255878, - "grad_norm": 0.349609375, - "learning_rate": 2.0614024839951672e-06, - "loss": 0.9895, - "step": 32825 - }, - { - "epoch": 0.9418616326940455, - "grad_norm": 0.3984375, - "learning_rate": 2.051300086043806e-06, - "loss": 0.9852, - "step": 32830 - }, - { - "epoch": 0.9420050779625034, - "grad_norm": 0.390625, - "learning_rate": 2.04122224701917e-06, - "loss": 0.9807, - "step": 32835 - }, - { - "epoch": 0.9421485232309612, - "grad_norm": 0.390625, - "learning_rate": 2.031168969448116e-06, - "loss": 0.9909, - "step": 32840 - }, - { - "epoch": 0.9422919684994191, - "grad_norm": 0.36328125, - "learning_rate": 2.021140255851317e-06, - "loss": 0.9297, - "step": 32845 - }, - { - "epoch": 0.9424354137678769, - "grad_norm": 0.3828125, - "learning_rate": 2.0111361087433165e-06, - "loss": 0.9633, - "step": 32850 - }, - { - "epoch": 0.9425788590363346, - "grad_norm": 0.37109375, - "learning_rate": 2.0011565306324533e-06, - "loss": 0.9797, - "step": 32855 - }, - { - "epoch": 0.9427223043047925, - "grad_norm": 0.38671875, - "learning_rate": 1.9912015240209583e-06, - "loss": 0.9058, - "step": 32860 - }, - { - "epoch": 0.9428657495732503, - "grad_norm": 0.431640625, - "learning_rate": 1.981271091404857e-06, - "loss": 0.9238, - "step": 32865 - }, - { - "epoch": 0.9430091948417082, - "grad_norm": 0.34375, - "learning_rate": 1.9713652352740364e-06, - "loss": 0.9158, - "step": 32870 - }, - { - "epoch": 0.943152640110166, - "grad_norm": 0.34765625, - "learning_rate": 1.9614839581122202e-06, - "loss": 0.9587, - "step": 32875 - }, - { - "epoch": 0.9432960853786237, - "grad_norm": 0.36328125, - "learning_rate": 1.951627262396971e-06, - "loss": 0.9351, - "step": 32880 - }, - { - "epoch": 0.9434395306470816, - "grad_norm": 0.451171875, - "learning_rate": 1.941795150599679e-06, - "loss": 0.979, - "step": 32885 - }, - { - "epoch": 0.9435829759155394, - "grad_norm": 0.35546875, - "learning_rate": 1.9319876251855606e-06, - "loss": 0.9094, - "step": 32890 - }, - { - "epoch": 0.9437264211839973, - "grad_norm": 0.369140625, - "learning_rate": 1.9222046886137047e-06, - "loss": 0.9864, - "step": 32895 - }, - { - "epoch": 0.9438698664524551, - "grad_norm": 0.412109375, - "learning_rate": 1.9124463433370045e-06, - "loss": 0.9841, - "step": 32900 - }, - { - "epoch": 0.9440133117209129, - "grad_norm": 0.37109375, - "learning_rate": 1.9027125918021915e-06, - "loss": 0.9455, - "step": 32905 - }, - { - "epoch": 0.9441567569893707, - "grad_norm": 0.392578125, - "learning_rate": 1.893003436449814e-06, - "loss": 0.994, - "step": 32910 - }, - { - "epoch": 0.9443002022578285, - "grad_norm": 0.375, - "learning_rate": 1.883318879714313e-06, - "loss": 0.975, - "step": 32915 - }, - { - "epoch": 0.9444436475262864, - "grad_norm": 0.412109375, - "learning_rate": 1.8736589240239022e-06, - "loss": 0.9499, - "step": 32920 - }, - { - "epoch": 0.9445870927947442, - "grad_norm": 0.419921875, - "learning_rate": 1.8640235718006438e-06, - "loss": 0.9869, - "step": 32925 - }, - { - "epoch": 0.944730538063202, - "grad_norm": 0.37109375, - "learning_rate": 1.8544128254604277e-06, - "loss": 0.9202, - "step": 32930 - }, - { - "epoch": 0.9448739833316598, - "grad_norm": 0.404296875, - "learning_rate": 1.8448266874130038e-06, - "loss": 0.9652, - "step": 32935 - }, - { - "epoch": 0.9450174286001176, - "grad_norm": 0.390625, - "learning_rate": 1.8352651600619165e-06, - "loss": 0.8946, - "step": 32940 - }, - { - "epoch": 0.9451608738685754, - "grad_norm": 0.392578125, - "learning_rate": 1.8257282458045477e-06, - "loss": 0.9468, - "step": 32945 - }, - { - "epoch": 0.9453043191370333, - "grad_norm": 0.361328125, - "learning_rate": 1.8162159470321072e-06, - "loss": 0.8948, - "step": 32950 - }, - { - "epoch": 0.945447764405491, - "grad_norm": 0.384765625, - "learning_rate": 1.8067282661296537e-06, - "loss": 0.9557, - "step": 32955 - }, - { - "epoch": 0.9455912096739489, - "grad_norm": 0.35546875, - "learning_rate": 1.7972652054760508e-06, - "loss": 0.9005, - "step": 32960 - }, - { - "epoch": 0.9457346549424067, - "grad_norm": 0.37109375, - "learning_rate": 1.7878267674439898e-06, - "loss": 0.9073, - "step": 32965 - }, - { - "epoch": 0.9458781002108645, - "grad_norm": 0.3984375, - "learning_rate": 1.7784129544000106e-06, - "loss": 0.902, - "step": 32970 - }, - { - "epoch": 0.9460215454793224, - "grad_norm": 0.388671875, - "learning_rate": 1.7690237687044364e-06, - "loss": 0.8805, - "step": 32975 - }, - { - "epoch": 0.9461649907477802, - "grad_norm": 0.412109375, - "learning_rate": 1.7596592127114619e-06, - "loss": 0.9931, - "step": 32980 - }, - { - "epoch": 0.946308436016238, - "grad_norm": 0.400390625, - "learning_rate": 1.7503192887690867e-06, - "loss": 1.0467, - "step": 32985 - }, - { - "epoch": 0.9464518812846958, - "grad_norm": 0.421875, - "learning_rate": 1.741003999219115e-06, - "loss": 0.95, - "step": 32990 - }, - { - "epoch": 0.9465953265531536, - "grad_norm": 0.376953125, - "learning_rate": 1.7317133463972235e-06, - "loss": 0.895, - "step": 32995 - }, - { - "epoch": 0.9467387718216115, - "grad_norm": 0.37890625, - "learning_rate": 1.7224473326328484e-06, - "loss": 0.9748, - "step": 33000 - }, - { - "epoch": 0.9468822170900693, - "grad_norm": 0.40234375, - "learning_rate": 1.7132059602492979e-06, - "loss": 0.9, - "step": 33005 - }, - { - "epoch": 0.9470256623585271, - "grad_norm": 0.37890625, - "learning_rate": 1.703989231563685e-06, - "loss": 0.9931, - "step": 33010 - }, - { - "epoch": 0.9471691076269849, - "grad_norm": 0.396484375, - "learning_rate": 1.6947971488869397e-06, - "loss": 0.9889, - "step": 33015 - }, - { - "epoch": 0.9473125528954427, - "grad_norm": 0.396484375, - "learning_rate": 1.6856297145238177e-06, - "loss": 0.8913, - "step": 33020 - }, - { - "epoch": 0.9474559981639006, - "grad_norm": 0.388671875, - "learning_rate": 1.6764869307729025e-06, - "loss": 0.8674, - "step": 33025 - }, - { - "epoch": 0.9475994434323584, - "grad_norm": 0.384765625, - "learning_rate": 1.667368799926572e-06, - "loss": 0.9381, - "step": 33030 - }, - { - "epoch": 0.9477428887008162, - "grad_norm": 0.4140625, - "learning_rate": 1.6582753242710414e-06, - "loss": 0.9286, - "step": 33035 - }, - { - "epoch": 0.947886333969274, - "grad_norm": 0.36328125, - "learning_rate": 1.6492065060863536e-06, - "loss": 0.947, - "step": 33040 - }, - { - "epoch": 0.9480297792377318, - "grad_norm": 0.421875, - "learning_rate": 1.6401623476463457e-06, - "loss": 1.0194, - "step": 33045 - }, - { - "epoch": 0.9481732245061897, - "grad_norm": 0.384765625, - "learning_rate": 1.6311428512186699e-06, - "loss": 0.9826, - "step": 33050 - }, - { - "epoch": 0.9483166697746475, - "grad_norm": 0.380859375, - "learning_rate": 1.6221480190648286e-06, - "loss": 1.0291, - "step": 33055 - }, - { - "epoch": 0.9484601150431053, - "grad_norm": 0.408203125, - "learning_rate": 1.6131778534401176e-06, - "loss": 0.9331, - "step": 33060 - }, - { - "epoch": 0.9486035603115631, - "grad_norm": 0.41015625, - "learning_rate": 1.604232356593649e-06, - "loss": 0.9729, - "step": 33065 - }, - { - "epoch": 0.9487470055800209, - "grad_norm": 0.3671875, - "learning_rate": 1.5953115307683287e-06, - "loss": 0.9449, - "step": 33070 - }, - { - "epoch": 0.9488904508484788, - "grad_norm": 0.388671875, - "learning_rate": 1.5864153782009228e-06, - "loss": 0.928, - "step": 33075 - }, - { - "epoch": 0.9490338961169366, - "grad_norm": 0.375, - "learning_rate": 1.5775439011219806e-06, - "loss": 0.9158, - "step": 33080 - }, - { - "epoch": 0.9491773413853944, - "grad_norm": 0.3515625, - "learning_rate": 1.568697101755856e-06, - "loss": 0.9308, - "step": 33085 - }, - { - "epoch": 0.9493207866538522, - "grad_norm": 0.4140625, - "learning_rate": 1.5598749823207525e-06, - "loss": 0.8879, - "step": 33090 - }, - { - "epoch": 0.94946423192231, - "grad_norm": 0.40625, - "learning_rate": 1.5510775450286452e-06, - "loss": 0.9298, - "step": 33095 - }, - { - "epoch": 0.9496076771907679, - "grad_norm": 0.431640625, - "learning_rate": 1.5423047920853473e-06, - "loss": 1.0097, - "step": 33100 - }, - { - "epoch": 0.9497511224592257, - "grad_norm": 0.40234375, - "learning_rate": 1.533556725690466e-06, - "loss": 0.9233, - "step": 33105 - }, - { - "epoch": 0.9498945677276835, - "grad_norm": 0.34375, - "learning_rate": 1.5248333480374355e-06, - "loss": 0.9026, - "step": 33110 - }, - { - "epoch": 0.9500380129961413, - "grad_norm": 0.3984375, - "learning_rate": 1.5161346613134842e-06, - "loss": 0.9704, - "step": 33115 - }, - { - "epoch": 0.9501814582645991, - "grad_norm": 0.41015625, - "learning_rate": 1.5074606676996561e-06, - "loss": 0.8883, - "step": 33120 - }, - { - "epoch": 0.950324903533057, - "grad_norm": 0.380859375, - "learning_rate": 1.4988113693708005e-06, - "loss": 0.9074, - "step": 33125 - }, - { - "epoch": 0.9504683488015148, - "grad_norm": 0.41015625, - "learning_rate": 1.4901867684955829e-06, - "loss": 1.0102, - "step": 33130 - }, - { - "epoch": 0.9506117940699726, - "grad_norm": 0.408203125, - "learning_rate": 1.481586867236462e-06, - "loss": 0.926, - "step": 33135 - }, - { - "epoch": 0.9507552393384304, - "grad_norm": 0.38671875, - "learning_rate": 1.4730116677497351e-06, - "loss": 0.9207, - "step": 33140 - }, - { - "epoch": 0.9508986846068882, - "grad_norm": 0.408203125, - "learning_rate": 1.4644611721854607e-06, - "loss": 1.0381, - "step": 33145 - }, - { - "epoch": 0.9510421298753461, - "grad_norm": 0.376953125, - "learning_rate": 1.4559353826875344e-06, - "loss": 0.9095, - "step": 33150 - }, - { - "epoch": 0.9511855751438039, - "grad_norm": 0.373046875, - "learning_rate": 1.4474343013936576e-06, - "loss": 0.9079, - "step": 33155 - }, - { - "epoch": 0.9513290204122617, - "grad_norm": 0.37109375, - "learning_rate": 1.438957930435314e-06, - "loss": 0.8989, - "step": 33160 - }, - { - "epoch": 0.9514724656807195, - "grad_norm": 0.37890625, - "learning_rate": 1.4305062719378149e-06, - "loss": 0.9485, - "step": 33165 - }, - { - "epoch": 0.9516159109491773, - "grad_norm": 0.365234375, - "learning_rate": 1.422079328020265e-06, - "loss": 0.9342, - "step": 33170 - }, - { - "epoch": 0.9517593562176352, - "grad_norm": 0.345703125, - "learning_rate": 1.4136771007955628e-06, - "loss": 0.9279, - "step": 33175 - }, - { - "epoch": 0.951902801486093, - "grad_norm": 0.408203125, - "learning_rate": 1.405299592370435e-06, - "loss": 1.0164, - "step": 33180 - }, - { - "epoch": 0.9520462467545509, - "grad_norm": 0.380859375, - "learning_rate": 1.3969468048453894e-06, - "loss": 0.9752, - "step": 33185 - }, - { - "epoch": 0.9521896920230086, - "grad_norm": 0.373046875, - "learning_rate": 1.388618740314751e-06, - "loss": 0.94, - "step": 33190 - }, - { - "epoch": 0.9523331372914664, - "grad_norm": 0.423828125, - "learning_rate": 1.3803154008666165e-06, - "loss": 1.0592, - "step": 33195 - }, - { - "epoch": 0.9524765825599243, - "grad_norm": 0.349609375, - "learning_rate": 1.37203678858292e-06, - "loss": 0.882, - "step": 33200 - }, - { - "epoch": 0.9526200278283821, - "grad_norm": 0.4296875, - "learning_rate": 1.363782905539368e-06, - "loss": 0.9964, - "step": 33205 - }, - { - "epoch": 0.95276347309684, - "grad_norm": 0.359375, - "learning_rate": 1.355553753805483e-06, - "loss": 1.0224, - "step": 33210 - }, - { - "epoch": 0.9529069183652977, - "grad_norm": 0.3671875, - "learning_rate": 1.347349335444581e-06, - "loss": 0.9943, - "step": 33215 - }, - { - "epoch": 0.9530503636337555, - "grad_norm": 0.375, - "learning_rate": 1.3391696525137831e-06, - "loss": 0.9793, - "step": 33220 - }, - { - "epoch": 0.9531938089022134, - "grad_norm": 0.396484375, - "learning_rate": 1.3310147070639933e-06, - "loss": 1.0316, - "step": 33225 - }, - { - "epoch": 0.9533372541706712, - "grad_norm": 0.3828125, - "learning_rate": 1.3228845011399204e-06, - "loss": 0.8525, - "step": 33230 - }, - { - "epoch": 0.953480699439129, - "grad_norm": 0.388671875, - "learning_rate": 1.3147790367800782e-06, - "loss": 0.9529, - "step": 33235 - }, - { - "epoch": 0.9536241447075868, - "grad_norm": 0.416015625, - "learning_rate": 1.3066983160167746e-06, - "loss": 1.0705, - "step": 33240 - }, - { - "epoch": 0.9537675899760446, - "grad_norm": 0.392578125, - "learning_rate": 1.2986423408760883e-06, - "loss": 0.958, - "step": 33245 - }, - { - "epoch": 0.9539110352445025, - "grad_norm": 0.375, - "learning_rate": 1.2906111133779376e-06, - "loss": 1.0471, - "step": 33250 - }, - { - "epoch": 0.9540544805129603, - "grad_norm": 0.41796875, - "learning_rate": 1.282604635536e-06, - "loss": 1.0092, - "step": 33255 - }, - { - "epoch": 0.9541979257814182, - "grad_norm": 0.365234375, - "learning_rate": 1.27462290935777e-06, - "loss": 0.8594, - "step": 33260 - }, - { - "epoch": 0.9543413710498759, - "grad_norm": 0.40234375, - "learning_rate": 1.2666659368445132e-06, - "loss": 0.8647, - "step": 33265 - }, - { - "epoch": 0.9544848163183337, - "grad_norm": 0.39453125, - "learning_rate": 1.2587337199913118e-06, - "loss": 0.8356, - "step": 33270 - }, - { - "epoch": 0.9546282615867916, - "grad_norm": 0.39453125, - "learning_rate": 1.250826260787019e-06, - "loss": 0.9214, - "step": 33275 - }, - { - "epoch": 0.9547717068552494, - "grad_norm": 0.341796875, - "learning_rate": 1.2429435612143158e-06, - "loss": 1.0455, - "step": 33280 - }, - { - "epoch": 0.9549151521237071, - "grad_norm": 0.392578125, - "learning_rate": 1.2350856232496211e-06, - "loss": 0.9166, - "step": 33285 - }, - { - "epoch": 0.955058597392165, - "grad_norm": 0.357421875, - "learning_rate": 1.227252448863192e-06, - "loss": 0.9495, - "step": 33290 - }, - { - "epoch": 0.9552020426606228, - "grad_norm": 0.40234375, - "learning_rate": 1.2194440400190688e-06, - "loss": 1.0324, - "step": 33295 - }, - { - "epoch": 0.9553454879290807, - "grad_norm": 0.396484375, - "learning_rate": 1.211660398675052e-06, - "loss": 1.079, - "step": 33300 - }, - { - "epoch": 0.9554889331975385, - "grad_norm": 0.36328125, - "learning_rate": 1.2039015267827803e-06, - "loss": 0.9632, - "step": 33305 - }, - { - "epoch": 0.9556323784659962, - "grad_norm": 0.373046875, - "learning_rate": 1.1961674262876199e-06, - "loss": 0.9453, - "step": 33310 - }, - { - "epoch": 0.9557758237344541, - "grad_norm": 0.3828125, - "learning_rate": 1.1884580991287974e-06, - "loss": 0.9411, - "step": 33315 - }, - { - "epoch": 0.9559192690029119, - "grad_norm": 0.3828125, - "learning_rate": 1.1807735472392778e-06, - "loss": 0.9059, - "step": 33320 - }, - { - "epoch": 0.9560627142713698, - "grad_norm": 0.41015625, - "learning_rate": 1.1731137725458196e-06, - "loss": 1.0088, - "step": 33325 - }, - { - "epoch": 0.9562061595398276, - "grad_norm": 0.4375, - "learning_rate": 1.1654787769689868e-06, - "loss": 0.9606, - "step": 33330 - }, - { - "epoch": 0.9563496048082853, - "grad_norm": 0.41015625, - "learning_rate": 1.1578685624231144e-06, - "loss": 0.9877, - "step": 33335 - }, - { - "epoch": 0.9564930500767432, - "grad_norm": 0.388671875, - "learning_rate": 1.150283130816343e-06, - "loss": 0.8973, - "step": 33340 - }, - { - "epoch": 0.956636495345201, - "grad_norm": 0.39453125, - "learning_rate": 1.1427224840505845e-06, - "loss": 0.9616, - "step": 33345 - }, - { - "epoch": 0.9567799406136589, - "grad_norm": 0.41796875, - "learning_rate": 1.1351866240215336e-06, - "loss": 0.953, - "step": 33350 - }, - { - "epoch": 0.9569233858821167, - "grad_norm": 0.38671875, - "learning_rate": 1.1276755526186678e-06, - "loss": 1.001, - "step": 33355 - }, - { - "epoch": 0.9570668311505744, - "grad_norm": 0.373046875, - "learning_rate": 1.1201892717252692e-06, - "loss": 0.9458, - "step": 33360 - }, - { - "epoch": 0.9572102764190323, - "grad_norm": 0.390625, - "learning_rate": 1.1127277832183925e-06, - "loss": 0.8768, - "step": 33365 - }, - { - "epoch": 0.9573537216874901, - "grad_norm": 0.353515625, - "learning_rate": 1.1052910889688629e-06, - "loss": 0.9413, - "step": 33370 - }, - { - "epoch": 0.957497166955948, - "grad_norm": 0.37890625, - "learning_rate": 1.0978791908413223e-06, - "loss": 0.8846, - "step": 33375 - }, - { - "epoch": 0.9576406122244058, - "grad_norm": 0.345703125, - "learning_rate": 1.0904920906941618e-06, - "loss": 1.0595, - "step": 33380 - }, - { - "epoch": 0.9577840574928635, - "grad_norm": 0.392578125, - "learning_rate": 1.0831297903795778e-06, - "loss": 0.8906, - "step": 33385 - }, - { - "epoch": 0.9579275027613214, - "grad_norm": 0.416015625, - "learning_rate": 1.075792291743516e-06, - "loss": 0.9094, - "step": 33390 - }, - { - "epoch": 0.9580709480297792, - "grad_norm": 0.361328125, - "learning_rate": 1.0684795966257489e-06, - "loss": 0.9552, - "step": 33395 - }, - { - "epoch": 0.9582143932982371, - "grad_norm": 0.42578125, - "learning_rate": 1.0611917068597877e-06, - "loss": 0.9571, - "step": 33400 - }, - { - "epoch": 0.9583578385666949, - "grad_norm": 0.3828125, - "learning_rate": 1.0539286242729706e-06, - "loss": 0.9773, - "step": 33405 - }, - { - "epoch": 0.9585012838351527, - "grad_norm": 0.40234375, - "learning_rate": 1.0466903506863523e-06, - "loss": 0.9354, - "step": 33410 - }, - { - "epoch": 0.9586447291036105, - "grad_norm": 0.41796875, - "learning_rate": 1.0394768879148475e-06, - "loss": 0.9984, - "step": 33415 - }, - { - "epoch": 0.9587881743720683, - "grad_norm": 0.3984375, - "learning_rate": 1.032288237767065e-06, - "loss": 0.9961, - "step": 33420 - }, - { - "epoch": 0.9589316196405262, - "grad_norm": 0.36328125, - "learning_rate": 1.025124402045452e-06, - "loss": 0.9788, - "step": 33425 - }, - { - "epoch": 0.959075064908984, - "grad_norm": 0.384765625, - "learning_rate": 1.0179853825462271e-06, - "loss": 0.8724, - "step": 33430 - }, - { - "epoch": 0.9592185101774418, - "grad_norm": 0.37109375, - "learning_rate": 1.0108711810593585e-06, - "loss": 0.8923, - "step": 33435 - }, - { - "epoch": 0.9593619554458996, - "grad_norm": 0.515625, - "learning_rate": 1.0037817993686084e-06, - "loss": 0.9808, - "step": 33440 - }, - { - "epoch": 0.9595054007143574, - "grad_norm": 0.408203125, - "learning_rate": 9.9671723925151e-07, - "loss": 0.9084, - "step": 33445 - }, - { - "epoch": 0.9596488459828153, - "grad_norm": 0.384765625, - "learning_rate": 9.896775024793914e-07, - "loss": 0.9673, - "step": 33450 - }, - { - "epoch": 0.9597922912512731, - "grad_norm": 0.369140625, - "learning_rate": 9.826625908173514e-07, - "loss": 0.8834, - "step": 33455 - }, - { - "epoch": 0.9599357365197309, - "grad_norm": 0.384765625, - "learning_rate": 9.756725060242277e-07, - "loss": 0.9617, - "step": 33460 - }, - { - "epoch": 0.9600791817881887, - "grad_norm": 0.3984375, - "learning_rate": 9.68707249852685e-07, - "loss": 1.1226, - "step": 33465 - }, - { - "epoch": 0.9602226270566465, - "grad_norm": 0.40625, - "learning_rate": 9.617668240491372e-07, - "loss": 0.976, - "step": 33470 - }, - { - "epoch": 0.9603660723251044, - "grad_norm": 0.4140625, - "learning_rate": 9.548512303537816e-07, - "loss": 0.9285, - "step": 33475 - }, - { - "epoch": 0.9605095175935622, - "grad_norm": 0.380859375, - "learning_rate": 9.479604705005529e-07, - "loss": 0.9513, - "step": 33480 - }, - { - "epoch": 0.96065296286202, - "grad_norm": 0.404296875, - "learning_rate": 9.410945462172139e-07, - "loss": 0.9355, - "step": 33485 - }, - { - "epoch": 0.9607964081304778, - "grad_norm": 0.357421875, - "learning_rate": 9.342534592252761e-07, - "loss": 1.006, - "step": 33490 - }, - { - "epoch": 0.9609398533989356, - "grad_norm": 0.375, - "learning_rate": 9.274372112400121e-07, - "loss": 1.0461, - "step": 33495 - }, - { - "epoch": 0.9610832986673935, - "grad_norm": 0.361328125, - "learning_rate": 9.206458039704768e-07, - "loss": 0.9415, - "step": 33500 - }, - { - "epoch": 0.9612267439358513, - "grad_norm": 0.3671875, - "learning_rate": 9.138792391195083e-07, - "loss": 0.9729, - "step": 33505 - }, - { - "epoch": 0.9613701892043091, - "grad_norm": 0.359375, - "learning_rate": 9.07137518383705e-07, - "loss": 0.9286, - "step": 33510 - }, - { - "epoch": 0.9615136344727669, - "grad_norm": 0.37109375, - "learning_rate": 9.004206434534146e-07, - "loss": 0.9773, - "step": 33515 - }, - { - "epoch": 0.9616570797412247, - "grad_norm": 0.35546875, - "learning_rate": 8.9372861601279e-07, - "loss": 1.0255, - "step": 33520 - }, - { - "epoch": 0.9618005250096826, - "grad_norm": 0.4453125, - "learning_rate": 8.870614377397446e-07, - "loss": 0.9096, - "step": 33525 - }, - { - "epoch": 0.9619439702781404, - "grad_norm": 0.400390625, - "learning_rate": 8.804191103059523e-07, - "loss": 0.9693, - "step": 33530 - }, - { - "epoch": 0.9620874155465982, - "grad_norm": 0.400390625, - "learning_rate": 8.738016353768585e-07, - "loss": 0.9038, - "step": 33535 - }, - { - "epoch": 0.962230860815056, - "grad_norm": 0.412109375, - "learning_rate": 8.672090146116917e-07, - "loss": 0.9495, - "step": 33540 - }, - { - "epoch": 0.9623743060835138, - "grad_norm": 0.39453125, - "learning_rate": 8.606412496634186e-07, - "loss": 1.0077, - "step": 33545 - }, - { - "epoch": 0.9625177513519717, - "grad_norm": 0.40234375, - "learning_rate": 8.540983421787996e-07, - "loss": 0.9602, - "step": 33550 - }, - { - "epoch": 0.9626611966204295, - "grad_norm": 0.369140625, - "learning_rate": 8.475802937983557e-07, - "loss": 0.9398, - "step": 33555 - }, - { - "epoch": 0.9628046418888873, - "grad_norm": 0.361328125, - "learning_rate": 8.410871061563797e-07, - "loss": 0.9816, - "step": 33560 - }, - { - "epoch": 0.9629480871573451, - "grad_norm": 0.373046875, - "learning_rate": 8.346187808809136e-07, - "loss": 0.9877, - "step": 33565 - }, - { - "epoch": 0.9630915324258029, - "grad_norm": 0.353515625, - "learning_rate": 8.281753195937714e-07, - "loss": 0.8666, - "step": 33570 - }, - { - "epoch": 0.9632349776942608, - "grad_norm": 0.384765625, - "learning_rate": 8.217567239105606e-07, - "loss": 0.9065, - "step": 33575 - }, - { - "epoch": 0.9633784229627186, - "grad_norm": 0.38671875, - "learning_rate": 8.153629954406161e-07, - "loss": 0.8922, - "step": 33580 - }, - { - "epoch": 0.9635218682311764, - "grad_norm": 0.384765625, - "learning_rate": 8.089941357870556e-07, - "loss": 1.0669, - "step": 33585 - }, - { - "epoch": 0.9636653134996342, - "grad_norm": 0.349609375, - "learning_rate": 8.026501465467684e-07, - "loss": 1.0159, - "step": 33590 - }, - { - "epoch": 0.963808758768092, - "grad_norm": 0.357421875, - "learning_rate": 7.963310293103931e-07, - "loss": 0.9045, - "step": 33595 - }, - { - "epoch": 0.9639522040365499, - "grad_norm": 0.37890625, - "learning_rate": 7.900367856623403e-07, - "loss": 0.9077, - "step": 33600 - }, - { - "epoch": 0.9640956493050077, - "grad_norm": 0.369140625, - "learning_rate": 7.837674171807696e-07, - "loss": 0.8709, - "step": 33605 - }, - { - "epoch": 0.9642390945734655, - "grad_norm": 0.4375, - "learning_rate": 7.775229254376348e-07, - "loss": 0.8867, - "step": 33610 - }, - { - "epoch": 0.9643825398419233, - "grad_norm": 0.39453125, - "learning_rate": 7.713033119986169e-07, - "loss": 0.8524, - "step": 33615 - }, - { - "epoch": 0.9645259851103811, - "grad_norm": 0.380859375, - "learning_rate": 7.651085784231793e-07, - "loss": 1.0415, - "step": 33620 - }, - { - "epoch": 0.9646694303788389, - "grad_norm": 0.40234375, - "learning_rate": 7.589387262645464e-07, - "loss": 0.9607, - "step": 33625 - }, - { - "epoch": 0.9648128756472968, - "grad_norm": 0.3828125, - "learning_rate": 7.52793757069703e-07, - "loss": 0.9395, - "step": 33630 - }, - { - "epoch": 0.9649563209157546, - "grad_norm": 0.34375, - "learning_rate": 7.466736723793943e-07, - "loss": 1.0147, - "step": 33635 - }, - { - "epoch": 0.9650997661842124, - "grad_norm": 0.41015625, - "learning_rate": 7.405784737281151e-07, - "loss": 0.9812, - "step": 33640 - }, - { - "epoch": 0.9652432114526702, - "grad_norm": 0.392578125, - "learning_rate": 7.345081626441319e-07, - "loss": 0.9441, - "step": 33645 - }, - { - "epoch": 0.965386656721128, - "grad_norm": 0.373046875, - "learning_rate": 7.284627406494826e-07, - "loss": 0.9753, - "step": 33650 - }, - { - "epoch": 0.9655301019895859, - "grad_norm": 0.40234375, - "learning_rate": 7.224422092599437e-07, - "loss": 0.9135, - "step": 33655 - }, - { - "epoch": 0.9656735472580437, - "grad_norm": 0.359375, - "learning_rate": 7.164465699850409e-07, - "loss": 0.9811, - "step": 33660 - }, - { - "epoch": 0.9658169925265015, - "grad_norm": 0.416015625, - "learning_rate": 7.104758243281051e-07, - "loss": 1.0384, - "step": 33665 - }, - { - "epoch": 0.9659604377949593, - "grad_norm": 0.408203125, - "learning_rate": 7.045299737861832e-07, - "loss": 0.9232, - "step": 33670 - }, - { - "epoch": 0.9661038830634171, - "grad_norm": 0.392578125, - "learning_rate": 6.986090198500828e-07, - "loss": 0.9268, - "step": 33675 - }, - { - "epoch": 0.966247328331875, - "grad_norm": 0.412109375, - "learning_rate": 6.92712964004405e-07, - "loss": 0.9862, - "step": 33680 - }, - { - "epoch": 0.9663907736003328, - "grad_norm": 0.408203125, - "learning_rate": 6.868418077274563e-07, - "loss": 1.0349, - "step": 33685 - }, - { - "epoch": 0.9665342188687907, - "grad_norm": 0.4296875, - "learning_rate": 6.809955524913369e-07, - "loss": 0.9276, - "step": 33690 - }, - { - "epoch": 0.9666776641372484, - "grad_norm": 0.380859375, - "learning_rate": 6.751741997618965e-07, - "loss": 0.9261, - "step": 33695 - }, - { - "epoch": 0.9668211094057062, - "grad_norm": 0.373046875, - "learning_rate": 6.693777509987453e-07, - "loss": 0.9025, - "step": 33700 - }, - { - "epoch": 0.9669645546741641, - "grad_norm": 0.404296875, - "learning_rate": 6.636062076552207e-07, - "loss": 0.9953, - "step": 33705 - }, - { - "epoch": 0.9671079999426219, - "grad_norm": 0.380859375, - "learning_rate": 6.578595711784541e-07, - "loss": 0.9132, - "step": 33710 - }, - { - "epoch": 0.9672514452110798, - "grad_norm": 0.412109375, - "learning_rate": 6.521378430093039e-07, - "loss": 1.0079, - "step": 33715 - }, - { - "epoch": 0.9673948904795375, - "grad_norm": 0.357421875, - "learning_rate": 6.464410245824004e-07, - "loss": 1.0112, - "step": 33720 - }, - { - "epoch": 0.9675383357479953, - "grad_norm": 0.37890625, - "learning_rate": 6.407691173261232e-07, - "loss": 0.9581, - "step": 33725 - }, - { - "epoch": 0.9676817810164532, - "grad_norm": 0.384765625, - "learning_rate": 6.351221226625903e-07, - "loss": 1.037, - "step": 33730 - }, - { - "epoch": 0.967825226284911, - "grad_norm": 0.373046875, - "learning_rate": 6.295000420077024e-07, - "loss": 1.0568, - "step": 33735 - }, - { - "epoch": 0.9679686715533689, - "grad_norm": 0.380859375, - "learning_rate": 6.239028767710986e-07, - "loss": 0.9397, - "step": 33740 - }, - { - "epoch": 0.9681121168218266, - "grad_norm": 0.392578125, - "learning_rate": 6.183306283561452e-07, - "loss": 0.938, - "step": 33745 - }, - { - "epoch": 0.9682555620902844, - "grad_norm": 0.396484375, - "learning_rate": 6.127832981600246e-07, - "loss": 0.8421, - "step": 33750 - }, - { - "epoch": 0.9683990073587423, - "grad_norm": 0.3984375, - "learning_rate": 6.072608875736019e-07, - "loss": 0.9446, - "step": 33755 - }, - { - "epoch": 0.9685424526272001, - "grad_norm": 0.36328125, - "learning_rate": 6.017633979815363e-07, - "loss": 0.9558, - "step": 33760 - }, - { - "epoch": 0.968685897895658, - "grad_norm": 0.40234375, - "learning_rate": 5.962908307622362e-07, - "loss": 0.9361, - "step": 33765 - }, - { - "epoch": 0.9688293431641157, - "grad_norm": 0.396484375, - "learning_rate": 5.908431872878372e-07, - "loss": 0.9004, - "step": 33770 - }, - { - "epoch": 0.9689727884325735, - "grad_norm": 0.404296875, - "learning_rate": 5.854204689242471e-07, - "loss": 0.9088, - "step": 33775 - }, - { - "epoch": 0.9691162337010314, - "grad_norm": 0.41015625, - "learning_rate": 5.800226770311113e-07, - "loss": 0.9965, - "step": 33780 - }, - { - "epoch": 0.9692596789694892, - "grad_norm": 0.439453125, - "learning_rate": 5.746498129618471e-07, - "loss": 1.0521, - "step": 33785 - }, - { - "epoch": 0.9694031242379471, - "grad_norm": 0.359375, - "learning_rate": 5.693018780635995e-07, - "loss": 0.9941, - "step": 33790 - }, - { - "epoch": 0.9695465695064048, - "grad_norm": 0.390625, - "learning_rate": 5.639788736772733e-07, - "loss": 0.9376, - "step": 33795 - }, - { - "epoch": 0.9696900147748626, - "grad_norm": 0.369140625, - "learning_rate": 5.58680801137501e-07, - "loss": 0.9336, - "step": 33800 - }, - { - "epoch": 0.9698334600433205, - "grad_norm": 0.384765625, - "learning_rate": 5.53407661772698e-07, - "loss": 0.8708, - "step": 33805 - }, - { - "epoch": 0.9699769053117783, - "grad_norm": 0.357421875, - "learning_rate": 5.481594569050174e-07, - "loss": 0.9038, - "step": 33810 - }, - { - "epoch": 0.9701203505802362, - "grad_norm": 0.361328125, - "learning_rate": 5.429361878503292e-07, - "loss": 0.8461, - "step": 33815 - }, - { - "epoch": 0.9702637958486939, - "grad_norm": 0.47265625, - "learning_rate": 5.377378559183077e-07, - "loss": 1.0028, - "step": 33820 - }, - { - "epoch": 0.9704072411171517, - "grad_norm": 0.373046875, - "learning_rate": 5.325644624123327e-07, - "loss": 0.9661, - "step": 33825 - }, - { - "epoch": 0.9705506863856096, - "grad_norm": 0.365234375, - "learning_rate": 5.274160086295332e-07, - "loss": 0.9304, - "step": 33830 - }, - { - "epoch": 0.9706941316540674, - "grad_norm": 0.4296875, - "learning_rate": 5.222924958607989e-07, - "loss": 0.9854, - "step": 33835 - }, - { - "epoch": 0.9708375769225253, - "grad_norm": 0.3671875, - "learning_rate": 5.171939253907687e-07, - "loss": 0.8977, - "step": 33840 - }, - { - "epoch": 0.970981022190983, - "grad_norm": 0.392578125, - "learning_rate": 5.121202984978091e-07, - "loss": 0.9478, - "step": 33845 - }, - { - "epoch": 0.9711244674594408, - "grad_norm": 0.38671875, - "learning_rate": 5.070716164540579e-07, - "loss": 0.9506, - "step": 33850 - }, - { - "epoch": 0.9712679127278987, - "grad_norm": 0.408203125, - "learning_rate": 5.020478805253692e-07, - "loss": 0.9876, - "step": 33855 - }, - { - "epoch": 0.9714113579963565, - "grad_norm": 0.388671875, - "learning_rate": 4.970490919713577e-07, - "loss": 0.9754, - "step": 33860 - }, - { - "epoch": 0.9715548032648144, - "grad_norm": 0.400390625, - "learning_rate": 4.920752520453876e-07, - "loss": 0.9626, - "step": 33865 - }, - { - "epoch": 0.9716982485332721, - "grad_norm": 0.373046875, - "learning_rate": 4.871263619945721e-07, - "loss": 1.0021, - "step": 33870 - }, - { - "epoch": 0.9718416938017299, - "grad_norm": 0.423828125, - "learning_rate": 4.82202423059741e-07, - "loss": 1.0151, - "step": 33875 - }, - { - "epoch": 0.9719851390701878, - "grad_norm": 0.3515625, - "learning_rate": 4.773034364754958e-07, - "loss": 0.9315, - "step": 33880 - }, - { - "epoch": 0.9721285843386456, - "grad_norm": 0.408203125, - "learning_rate": 4.724294034701759e-07, - "loss": 0.9905, - "step": 33885 - }, - { - "epoch": 0.9722720296071035, - "grad_norm": 0.361328125, - "learning_rate": 4.675803252658484e-07, - "loss": 0.8918, - "step": 33890 - }, - { - "epoch": 0.9724154748755612, - "grad_norm": 0.3984375, - "learning_rate": 4.627562030783406e-07, - "loss": 0.8703, - "step": 33895 - }, - { - "epoch": 0.972558920144019, - "grad_norm": 0.365234375, - "learning_rate": 4.5795703811721825e-07, - "loss": 0.9949, - "step": 33900 - }, - { - "epoch": 0.9727023654124769, - "grad_norm": 0.388671875, - "learning_rate": 4.531828315857856e-07, - "loss": 0.8976, - "step": 33905 - }, - { - "epoch": 0.9728458106809347, - "grad_norm": 0.376953125, - "learning_rate": 4.484335846810961e-07, - "loss": 0.8016, - "step": 33910 - }, - { - "epoch": 0.9729892559493926, - "grad_norm": 0.4609375, - "learning_rate": 4.437092985939417e-07, - "loss": 0.8494, - "step": 33915 - }, - { - "epoch": 0.9731327012178503, - "grad_norm": 0.392578125, - "learning_rate": 4.3900997450885274e-07, - "loss": 0.9466, - "step": 33920 - }, - { - "epoch": 0.9732761464863081, - "grad_norm": 0.380859375, - "learning_rate": 4.3433561360408656e-07, - "loss": 0.8567, - "step": 33925 - }, - { - "epoch": 0.973419591754766, - "grad_norm": 0.3515625, - "learning_rate": 4.2968621705168354e-07, - "loss": 0.8586, - "step": 33930 - }, - { - "epoch": 0.9735630370232238, - "grad_norm": 0.431640625, - "learning_rate": 4.2506178601737777e-07, - "loss": 0.9103, - "step": 33935 - }, - { - "epoch": 0.9737064822916817, - "grad_norm": 0.431640625, - "learning_rate": 4.204623216606751e-07, - "loss": 0.937, - "step": 33940 - }, - { - "epoch": 0.9738499275601394, - "grad_norm": 0.37890625, - "learning_rate": 4.1588782513481974e-07, - "loss": 1.0578, - "step": 33945 - }, - { - "epoch": 0.9739933728285972, - "grad_norm": 0.359375, - "learning_rate": 4.113382975867608e-07, - "loss": 0.9021, - "step": 33950 - }, - { - "epoch": 0.9741368180970551, - "grad_norm": 0.365234375, - "learning_rate": 4.068137401572414e-07, - "loss": 0.9689, - "step": 33955 - }, - { - "epoch": 0.9742802633655129, - "grad_norm": 0.400390625, - "learning_rate": 4.023141539806985e-07, - "loss": 0.9434, - "step": 33960 - }, - { - "epoch": 0.9744237086339707, - "grad_norm": 0.357421875, - "learning_rate": 3.9783954018532964e-07, - "loss": 0.88, - "step": 33965 - }, - { - "epoch": 0.9745671539024285, - "grad_norm": 0.39453125, - "learning_rate": 3.9338989989307073e-07, - "loss": 0.9427, - "step": 33970 - }, - { - "epoch": 0.9747105991708863, - "grad_norm": 0.40234375, - "learning_rate": 3.8896523421957375e-07, - "loss": 0.9325, - "step": 33975 - }, - { - "epoch": 0.9748540444393442, - "grad_norm": 0.37890625, - "learning_rate": 3.845655442742624e-07, - "loss": 0.9218, - "step": 33980 - }, - { - "epoch": 0.974997489707802, - "grad_norm": 0.390625, - "learning_rate": 3.8019083116028755e-07, - "loss": 0.9604, - "step": 33985 - }, - { - "epoch": 0.9751409349762598, - "grad_norm": 0.3984375, - "learning_rate": 3.7584109597451623e-07, - "loss": 0.9209, - "step": 33990 - }, - { - "epoch": 0.9752843802447176, - "grad_norm": 0.36328125, - "learning_rate": 3.7151633980756496e-07, - "loss": 0.9445, - "step": 33995 - }, - { - "epoch": 0.9754278255131754, - "grad_norm": 0.50390625, - "learning_rate": 3.672165637438218e-07, - "loss": 0.9713, - "step": 34000 - }, - { - "epoch": 0.9755712707816333, - "grad_norm": 0.400390625, - "learning_rate": 3.629417688613468e-07, - "loss": 0.9597, - "step": 34005 - }, - { - "epoch": 0.9757147160500911, - "grad_norm": 0.38671875, - "learning_rate": 3.586919562319935e-07, - "loss": 0.9497, - "step": 34010 - }, - { - "epoch": 0.9758581613185489, - "grad_norm": 0.36328125, - "learning_rate": 3.5446712692129844e-07, - "loss": 0.9422, - "step": 34015 - }, - { - "epoch": 0.9760016065870067, - "grad_norm": 0.349609375, - "learning_rate": 3.5026728198860324e-07, - "loss": 0.9384, - "step": 34020 - }, - { - "epoch": 0.9761450518554645, - "grad_norm": 0.384765625, - "learning_rate": 3.4609242248692107e-07, - "loss": 1.02, - "step": 34025 - }, - { - "epoch": 0.9762884971239224, - "grad_norm": 0.353515625, - "learning_rate": 3.4194254946302573e-07, - "loss": 0.8468, - "step": 34030 - }, - { - "epoch": 0.9764319423923802, - "grad_norm": 0.3828125, - "learning_rate": 3.3781766395742933e-07, - "loss": 1.0154, - "step": 34035 - }, - { - "epoch": 0.976575387660838, - "grad_norm": 0.3671875, - "learning_rate": 3.337177670043823e-07, - "loss": 0.8691, - "step": 34040 - }, - { - "epoch": 0.9767188329292958, - "grad_norm": 0.3671875, - "learning_rate": 3.2964285963185124e-07, - "loss": 0.9113, - "step": 34045 - }, - { - "epoch": 0.9768622781977536, - "grad_norm": 0.40234375, - "learning_rate": 3.255929428615523e-07, - "loss": 0.8704, - "step": 34050 - }, - { - "epoch": 0.9770057234662115, - "grad_norm": 0.361328125, - "learning_rate": 3.215680177089286e-07, - "loss": 0.978, - "step": 34055 - }, - { - "epoch": 0.9771491687346693, - "grad_norm": 0.380859375, - "learning_rate": 3.175680851831619e-07, - "loss": 0.9438, - "step": 34060 - }, - { - "epoch": 0.9772926140031271, - "grad_norm": 0.380859375, - "learning_rate": 3.135931462871722e-07, - "loss": 0.9863, - "step": 34065 - }, - { - "epoch": 0.977436059271585, - "grad_norm": 0.36328125, - "learning_rate": 3.0964320201759545e-07, - "loss": 0.8466, - "step": 34070 - }, - { - "epoch": 0.9775795045400427, - "grad_norm": 0.365234375, - "learning_rate": 3.057182533648173e-07, - "loss": 1.0576, - "step": 34075 - }, - { - "epoch": 0.9777229498085006, - "grad_norm": 0.396484375, - "learning_rate": 3.0181830131295053e-07, - "loss": 0.9031, - "step": 34080 - }, - { - "epoch": 0.9778663950769584, - "grad_norm": 0.37890625, - "learning_rate": 2.9794334683982406e-07, - "loss": 0.8934, - "step": 34085 - }, - { - "epoch": 0.9780098403454162, - "grad_norm": 0.369140625, - "learning_rate": 2.9409339091703844e-07, - "loss": 0.9949, - "step": 34090 - }, - { - "epoch": 0.978153285613874, - "grad_norm": 0.376953125, - "learning_rate": 2.902684345098994e-07, - "loss": 0.9195, - "step": 34095 - }, - { - "epoch": 0.9782967308823318, - "grad_norm": 0.408203125, - "learning_rate": 2.8646847857742854e-07, - "loss": 0.9191, - "step": 34100 - }, - { - "epoch": 0.9784401761507897, - "grad_norm": 0.3828125, - "learning_rate": 2.8269352407243045e-07, - "loss": 0.947, - "step": 34105 - }, - { - "epoch": 0.9785836214192475, - "grad_norm": 0.37109375, - "learning_rate": 2.789435719413813e-07, - "loss": 0.9279, - "step": 34110 - }, - { - "epoch": 0.9787270666877053, - "grad_norm": 0.392578125, - "learning_rate": 2.7521862312451795e-07, - "loss": 0.9078, - "step": 34115 - }, - { - "epoch": 0.9788705119561631, - "grad_norm": 0.37890625, - "learning_rate": 2.7151867855581546e-07, - "loss": 0.9673, - "step": 34120 - }, - { - "epoch": 0.9790139572246209, - "grad_norm": 0.419921875, - "learning_rate": 2.6784373916297624e-07, - "loss": 0.9142, - "step": 34125 - }, - { - "epoch": 0.9791574024930788, - "grad_norm": 0.46484375, - "learning_rate": 2.641938058674187e-07, - "loss": 0.9658, - "step": 34130 - }, - { - "epoch": 0.9793008477615366, - "grad_norm": 0.3515625, - "learning_rate": 2.6056887958429976e-07, - "loss": 0.9222, - "step": 34135 - }, - { - "epoch": 0.9794442930299944, - "grad_norm": 0.404296875, - "learning_rate": 2.569689612225035e-07, - "loss": 0.9296, - "step": 34140 - }, - { - "epoch": 0.9795877382984522, - "grad_norm": 0.3828125, - "learning_rate": 2.5339405168465223e-07, - "loss": 0.9112, - "step": 34145 - }, - { - "epoch": 0.97973118356691, - "grad_norm": 0.416015625, - "learning_rate": 2.4984415186709576e-07, - "loss": 0.9068, - "step": 34150 - }, - { - "epoch": 0.9798746288353679, - "grad_norm": 0.36328125, - "learning_rate": 2.463192626598998e-07, - "loss": 0.919, - "step": 34155 - }, - { - "epoch": 0.9800180741038257, - "grad_norm": 0.40625, - "learning_rate": 2.4281938494686853e-07, - "loss": 1.0724, - "step": 34160 - }, - { - "epoch": 0.9801615193722835, - "grad_norm": 0.392578125, - "learning_rate": 2.393445196055444e-07, - "loss": 0.9651, - "step": 34165 - }, - { - "epoch": 0.9803049646407413, - "grad_norm": 0.375, - "learning_rate": 2.3589466750718604e-07, - "loss": 0.916, - "step": 34170 - }, - { - "epoch": 0.9804484099091991, - "grad_norm": 0.4375, - "learning_rate": 2.324698295167793e-07, - "loss": 0.9395, - "step": 34175 - }, - { - "epoch": 0.980591855177657, - "grad_norm": 0.380859375, - "learning_rate": 2.2907000649304845e-07, - "loss": 0.8805, - "step": 34180 - }, - { - "epoch": 0.9807353004461148, - "grad_norm": 0.39453125, - "learning_rate": 2.2569519928843374e-07, - "loss": 0.8721, - "step": 34185 - }, - { - "epoch": 0.9808787457145726, - "grad_norm": 0.408203125, - "learning_rate": 2.2234540874911392e-07, - "loss": 0.9692, - "step": 34190 - }, - { - "epoch": 0.9810221909830305, - "grad_norm": 0.421875, - "learning_rate": 2.1902063571498378e-07, - "loss": 0.9494, - "step": 34195 - }, - { - "epoch": 0.9811656362514882, - "grad_norm": 0.4375, - "learning_rate": 2.1572088101968758e-07, - "loss": 1.0797, - "step": 34200 - }, - { - "epoch": 0.9813090815199461, - "grad_norm": 0.455078125, - "learning_rate": 2.1244614549056352e-07, - "loss": 0.8957, - "step": 34205 - }, - { - "epoch": 0.9814525267884039, - "grad_norm": 0.37109375, - "learning_rate": 2.0919642994869925e-07, - "loss": 0.8953, - "step": 34210 - }, - { - "epoch": 0.9815959720568617, - "grad_norm": 0.419921875, - "learning_rate": 2.0597173520890967e-07, - "loss": 0.9258, - "step": 34215 - }, - { - "epoch": 0.9817394173253196, - "grad_norm": 0.384765625, - "learning_rate": 2.0277206207972576e-07, - "loss": 0.973, - "step": 34220 - }, - { - "epoch": 0.9818828625937773, - "grad_norm": 0.40625, - "learning_rate": 1.995974113634169e-07, - "loss": 0.95, - "step": 34225 - }, - { - "epoch": 0.9820263078622352, - "grad_norm": 0.419921875, - "learning_rate": 1.9644778385596864e-07, - "loss": 0.9855, - "step": 34230 - }, - { - "epoch": 0.982169753130693, - "grad_norm": 0.373046875, - "learning_rate": 1.9332318034708253e-07, - "loss": 0.9383, - "step": 34235 - }, - { - "epoch": 0.9823131983991508, - "grad_norm": 0.40234375, - "learning_rate": 1.902236016202208e-07, - "loss": 0.9576, - "step": 34240 - }, - { - "epoch": 0.9824566436676087, - "grad_norm": 0.427734375, - "learning_rate": 1.8714904845251734e-07, - "loss": 0.9123, - "step": 34245 - }, - { - "epoch": 0.9826000889360664, - "grad_norm": 0.384765625, - "learning_rate": 1.8409952161489997e-07, - "loss": 0.9244, - "step": 34250 - }, - { - "epoch": 0.9827435342045243, - "grad_norm": 0.349609375, - "learning_rate": 1.8107502187195702e-07, - "loss": 0.8956, - "step": 34255 - }, - { - "epoch": 0.9828869794729821, - "grad_norm": 0.376953125, - "learning_rate": 1.7807554998203747e-07, - "loss": 0.8823, - "step": 34260 - }, - { - "epoch": 0.9830304247414399, - "grad_norm": 0.375, - "learning_rate": 1.7510110669720636e-07, - "loss": 0.9769, - "step": 34265 - }, - { - "epoch": 0.9831738700098978, - "grad_norm": 0.365234375, - "learning_rate": 1.7215169276325605e-07, - "loss": 0.9164, - "step": 34270 - }, - { - "epoch": 0.9833173152783555, - "grad_norm": 0.373046875, - "learning_rate": 1.6922730891969497e-07, - "loss": 0.9796, - "step": 34275 - }, - { - "epoch": 0.9834607605468134, - "grad_norm": 0.3671875, - "learning_rate": 1.663279558997699e-07, - "loss": 0.9643, - "step": 34280 - }, - { - "epoch": 0.9836042058152712, - "grad_norm": 0.38671875, - "learning_rate": 1.634536344304327e-07, - "loss": 0.8788, - "step": 34285 - }, - { - "epoch": 0.983747651083729, - "grad_norm": 0.41015625, - "learning_rate": 1.6060434523238466e-07, - "loss": 0.8934, - "step": 34290 - }, - { - "epoch": 0.9838910963521869, - "grad_norm": 0.416015625, - "learning_rate": 1.5778008902002095e-07, - "loss": 0.9535, - "step": 34295 - }, - { - "epoch": 0.9840345416206446, - "grad_norm": 0.34765625, - "learning_rate": 1.5498086650147513e-07, - "loss": 0.9098, - "step": 34300 - }, - { - "epoch": 0.9841779868891024, - "grad_norm": 0.40625, - "learning_rate": 1.5220667837860804e-07, - "loss": 0.9655, - "step": 34305 - }, - { - "epoch": 0.9843214321575603, - "grad_norm": 0.375, - "learning_rate": 1.4945752534699653e-07, - "loss": 0.9918, - "step": 34310 - }, - { - "epoch": 0.9844648774260181, - "grad_norm": 0.380859375, - "learning_rate": 1.4673340809594482e-07, - "loss": 0.942, - "step": 34315 - }, - { - "epoch": 0.984608322694476, - "grad_norm": 0.400390625, - "learning_rate": 1.4403432730847323e-07, - "loss": 0.9839, - "step": 34320 - }, - { - "epoch": 0.9847517679629337, - "grad_norm": 0.369140625, - "learning_rate": 1.4136028366134036e-07, - "loss": 0.8846, - "step": 34325 - }, - { - "epoch": 0.9848952132313915, - "grad_norm": 0.380859375, - "learning_rate": 1.3871127782500993e-07, - "loss": 0.9682, - "step": 34330 - }, - { - "epoch": 0.9850386584998494, - "grad_norm": 0.39453125, - "learning_rate": 1.3608731046366174e-07, - "loss": 0.7744, - "step": 34335 - }, - { - "epoch": 0.9851821037683072, - "grad_norm": 0.404296875, - "learning_rate": 1.3348838223523618e-07, - "loss": 0.9374, - "step": 34340 - }, - { - "epoch": 0.9853255490367651, - "grad_norm": 0.33984375, - "learning_rate": 1.3091449379134534e-07, - "loss": 0.8052, - "step": 34345 - }, - { - "epoch": 0.9854689943052228, - "grad_norm": 0.37890625, - "learning_rate": 1.2836564577735078e-07, - "loss": 0.9244, - "step": 34350 - }, - { - "epoch": 0.9856124395736806, - "grad_norm": 0.453125, - "learning_rate": 1.2584183883234123e-07, - "loss": 0.9064, - "step": 34355 - }, - { - "epoch": 0.9857558848421385, - "grad_norm": 0.357421875, - "learning_rate": 1.2334307358911056e-07, - "loss": 0.946, - "step": 34360 - }, - { - "epoch": 0.9858993301105963, - "grad_norm": 0.408203125, - "learning_rate": 1.2086935067417983e-07, - "loss": 0.9854, - "step": 34365 - }, - { - "epoch": 0.9860427753790542, - "grad_norm": 0.412109375, - "learning_rate": 1.1842067070779728e-07, - "loss": 0.9539, - "step": 34370 - }, - { - "epoch": 0.9861862206475119, - "grad_norm": 0.365234375, - "learning_rate": 1.1599703430392739e-07, - "loss": 0.878, - "step": 34375 - }, - { - "epoch": 0.9863296659159697, - "grad_norm": 0.35546875, - "learning_rate": 1.1359844207023962e-07, - "loss": 0.7989, - "step": 34380 - }, - { - "epoch": 0.9864731111844276, - "grad_norm": 0.390625, - "learning_rate": 1.1122489460816398e-07, - "loss": 1.007, - "step": 34385 - }, - { - "epoch": 0.9866165564528854, - "grad_norm": 0.365234375, - "learning_rate": 1.0887639251280224e-07, - "loss": 0.9024, - "step": 34390 - }, - { - "epoch": 0.9867600017213433, - "grad_norm": 0.359375, - "learning_rate": 1.0655293637300556e-07, - "loss": 0.9773, - "step": 34395 - }, - { - "epoch": 0.986903446989801, - "grad_norm": 0.39453125, - "learning_rate": 1.0425452677135238e-07, - "loss": 1.0352, - "step": 34400 - }, - { - "epoch": 0.9870468922582588, - "grad_norm": 0.3671875, - "learning_rate": 1.0198116428412618e-07, - "loss": 1.0191, - "step": 34405 - }, - { - "epoch": 0.9871903375267167, - "grad_norm": 0.474609375, - "learning_rate": 9.973284948132656e-08, - "loss": 0.9563, - "step": 34410 - }, - { - "epoch": 0.9873337827951745, - "grad_norm": 0.396484375, - "learning_rate": 9.750958292666923e-08, - "loss": 0.8916, - "step": 34415 - }, - { - "epoch": 0.9874772280636324, - "grad_norm": 0.33203125, - "learning_rate": 9.531136517761941e-08, - "loss": 0.9273, - "step": 34420 - }, - { - "epoch": 0.9876206733320901, - "grad_norm": 0.37109375, - "learning_rate": 9.31381967853362e-08, - "loss": 0.9063, - "step": 34425 - }, - { - "epoch": 0.9877641186005479, - "grad_norm": 0.3671875, - "learning_rate": 9.099007829469486e-08, - "loss": 0.8682, - "step": 34430 - }, - { - "epoch": 0.9879075638690058, - "grad_norm": 0.34765625, - "learning_rate": 8.886701024429789e-08, - "loss": 0.9094, - "step": 34435 - }, - { - "epoch": 0.9880510091374636, - "grad_norm": 0.38671875, - "learning_rate": 8.676899316648613e-08, - "loss": 1.0162, - "step": 34440 - }, - { - "epoch": 0.9881944544059215, - "grad_norm": 0.419921875, - "learning_rate": 8.469602758728324e-08, - "loss": 1.0277, - "step": 34445 - }, - { - "epoch": 0.9883378996743792, - "grad_norm": 0.404296875, - "learning_rate": 8.264811402646233e-08, - "loss": 1.0131, - "step": 34450 - }, - { - "epoch": 0.988481344942837, - "grad_norm": 0.40625, - "learning_rate": 8.062525299747936e-08, - "loss": 0.9037, - "step": 34455 - }, - { - "epoch": 0.9886247902112949, - "grad_norm": 0.384765625, - "learning_rate": 7.862744500756192e-08, - "loss": 0.9613, - "step": 34460 - }, - { - "epoch": 0.9887682354797527, - "grad_norm": 0.412109375, - "learning_rate": 7.665469055759822e-08, - "loss": 0.9741, - "step": 34465 - }, - { - "epoch": 0.9889116807482106, - "grad_norm": 0.408203125, - "learning_rate": 7.470699014223703e-08, - "loss": 0.944, - "step": 34470 - }, - { - "epoch": 0.9890551260166683, - "grad_norm": 0.388671875, - "learning_rate": 7.278434424983216e-08, - "loss": 0.9529, - "step": 34475 - }, - { - "epoch": 0.9891985712851261, - "grad_norm": 0.388671875, - "learning_rate": 7.088675336244244e-08, - "loss": 0.8864, - "step": 34480 - }, - { - "epoch": 0.989342016553584, - "grad_norm": 0.357421875, - "learning_rate": 6.901421795585395e-08, - "loss": 0.9865, - "step": 34485 - }, - { - "epoch": 0.9894854618220418, - "grad_norm": 0.396484375, - "learning_rate": 6.71667384995911e-08, - "loss": 0.9299, - "step": 34490 - }, - { - "epoch": 0.9896289070904997, - "grad_norm": 0.345703125, - "learning_rate": 6.534431545686115e-08, - "loss": 0.9895, - "step": 34495 - }, - { - "epoch": 0.9897723523589574, - "grad_norm": 0.392578125, - "learning_rate": 6.35469492846208e-08, - "loss": 0.9503, - "step": 34500 - }, - { - "epoch": 0.9899157976274152, - "grad_norm": 0.3828125, - "learning_rate": 6.177464043349845e-08, - "loss": 0.8726, - "step": 34505 - }, - { - "epoch": 0.9900592428958731, - "grad_norm": 0.37109375, - "learning_rate": 6.00273893479053e-08, - "loss": 0.9555, - "step": 34510 - }, - { - "epoch": 0.9902026881643309, - "grad_norm": 0.373046875, - "learning_rate": 5.830519646590204e-08, - "loss": 0.9818, - "step": 34515 - }, - { - "epoch": 0.9903461334327888, - "grad_norm": 0.47265625, - "learning_rate": 5.660806221932102e-08, - "loss": 0.9177, - "step": 34520 - }, - { - "epoch": 0.9904895787012465, - "grad_norm": 0.361328125, - "learning_rate": 5.493598703367742e-08, - "loss": 0.9943, - "step": 34525 - }, - { - "epoch": 0.9906330239697043, - "grad_norm": 0.365234375, - "learning_rate": 5.3288971328224747e-08, - "loss": 0.9329, - "step": 34530 - }, - { - "epoch": 0.9907764692381622, - "grad_norm": 0.392578125, - "learning_rate": 5.166701551591047e-08, - "loss": 0.9434, - "step": 34535 - }, - { - "epoch": 0.99091991450662, - "grad_norm": 0.3828125, - "learning_rate": 5.0070120003420375e-08, - "loss": 0.968, - "step": 34540 - }, - { - "epoch": 0.9910633597750779, - "grad_norm": 0.38671875, - "learning_rate": 4.84982851911564e-08, - "loss": 1.0503, - "step": 34545 - }, - { - "epoch": 0.9912068050435356, - "grad_norm": 0.369140625, - "learning_rate": 4.6951511473203316e-08, - "loss": 0.8986, - "step": 34550 - }, - { - "epoch": 0.9913502503119934, - "grad_norm": 0.380859375, - "learning_rate": 4.542979923741753e-08, - "loss": 1.0017, - "step": 34555 - }, - { - "epoch": 0.9914936955804513, - "grad_norm": 0.421875, - "learning_rate": 4.3933148865316075e-08, - "loss": 0.9823, - "step": 34560 - }, - { - "epoch": 0.9916371408489091, - "grad_norm": 0.412109375, - "learning_rate": 4.2461560732176555e-08, - "loss": 1.0573, - "step": 34565 - }, - { - "epoch": 0.991780586117367, - "grad_norm": 0.373046875, - "learning_rate": 4.101503520695937e-08, - "loss": 0.9163, - "step": 34570 - }, - { - "epoch": 0.9919240313858247, - "grad_norm": 0.388671875, - "learning_rate": 3.9593572652363295e-08, - "loss": 0.903, - "step": 34575 - }, - { - "epoch": 0.9920674766542825, - "grad_norm": 0.38671875, - "learning_rate": 3.819717342480322e-08, - "loss": 0.9764, - "step": 34580 - }, - { - "epoch": 0.9922109219227404, - "grad_norm": 0.40234375, - "learning_rate": 3.682583787438798e-08, - "loss": 1.0149, - "step": 34585 - }, - { - "epoch": 0.9923543671911982, - "grad_norm": 0.39453125, - "learning_rate": 3.547956634495364e-08, - "loss": 0.9535, - "step": 34590 - }, - { - "epoch": 0.9924978124596561, - "grad_norm": 0.4609375, - "learning_rate": 3.4158359174074614e-08, - "loss": 0.952, - "step": 34595 - }, - { - "epoch": 0.9926412577281138, - "grad_norm": 0.375, - "learning_rate": 3.286221669299705e-08, - "loss": 0.9299, - "step": 34600 - }, - { - "epoch": 0.9927847029965716, - "grad_norm": 0.40625, - "learning_rate": 3.159113922672763e-08, - "loss": 0.9092, - "step": 34605 - }, - { - "epoch": 0.9929281482650295, - "grad_norm": 0.39453125, - "learning_rate": 3.0345127093955875e-08, - "loss": 0.8972, - "step": 34610 - }, - { - "epoch": 0.9930715935334873, - "grad_norm": 0.376953125, - "learning_rate": 2.9124180607087437e-08, - "loss": 0.9552, - "step": 34615 - }, - { - "epoch": 0.9932150388019452, - "grad_norm": 0.361328125, - "learning_rate": 2.7928300072277424e-08, - "loss": 0.8272, - "step": 34620 - }, - { - "epoch": 0.993358484070403, - "grad_norm": 0.388671875, - "learning_rate": 2.6757485789341565e-08, - "loss": 0.9761, - "step": 34625 - }, - { - "epoch": 0.9935019293388607, - "grad_norm": 0.412109375, - "learning_rate": 2.561173805186723e-08, - "loss": 0.9668, - "step": 34630 - }, - { - "epoch": 0.9936453746073186, - "grad_norm": 0.357421875, - "learning_rate": 2.449105714712463e-08, - "loss": 1.0036, - "step": 34635 - }, - { - "epoch": 0.9937888198757764, - "grad_norm": 0.375, - "learning_rate": 2.339544335610011e-08, - "loss": 0.9538, - "step": 34640 - }, - { - "epoch": 0.9939322651442343, - "grad_norm": 0.396484375, - "learning_rate": 2.2324896953496154e-08, - "loss": 0.9581, - "step": 34645 - }, - { - "epoch": 0.994075710412692, - "grad_norm": 0.39453125, - "learning_rate": 2.1279418207742486e-08, - "loss": 0.8594, - "step": 34650 - }, - { - "epoch": 0.9942191556811498, - "grad_norm": 0.40234375, - "learning_rate": 2.025900738097386e-08, - "loss": 0.9258, - "step": 34655 - }, - { - "epoch": 0.9943626009496077, - "grad_norm": 0.373046875, - "learning_rate": 1.9263664729030073e-08, - "loss": 0.867, - "step": 34660 - }, - { - "epoch": 0.9945060462180655, - "grad_norm": 0.3984375, - "learning_rate": 1.829339050148926e-08, - "loss": 0.9512, - "step": 34665 - }, - { - "epoch": 0.9946494914865233, - "grad_norm": 0.388671875, - "learning_rate": 1.7348184941623492e-08, - "loss": 0.9649, - "step": 34670 - }, - { - "epoch": 0.9947929367549811, - "grad_norm": 0.427734375, - "learning_rate": 1.6428048286432074e-08, - "loss": 1.0501, - "step": 34675 - }, - { - "epoch": 0.9949363820234389, - "grad_norm": 0.376953125, - "learning_rate": 1.5532980766608252e-08, - "loss": 0.9892, - "step": 34680 - }, - { - "epoch": 0.9950798272918968, - "grad_norm": 0.357421875, - "learning_rate": 1.466298260659471e-08, - "loss": 0.9275, - "step": 34685 - }, - { - "epoch": 0.9952232725603546, - "grad_norm": 0.3984375, - "learning_rate": 1.3818054024516969e-08, - "loss": 0.9644, - "step": 34690 - }, - { - "epoch": 0.9953667178288124, - "grad_norm": 0.376953125, - "learning_rate": 1.2998195232227783e-08, - "loss": 0.9826, - "step": 34695 - }, - { - "epoch": 0.9955101630972703, - "grad_norm": 0.375, - "learning_rate": 1.2203406435284948e-08, - "loss": 0.897, - "step": 34700 - }, - { - "epoch": 0.995653608365728, - "grad_norm": 0.369140625, - "learning_rate": 1.1433687832984596e-08, - "loss": 0.9444, - "step": 34705 - }, - { - "epoch": 0.9957970536341859, - "grad_norm": 0.373046875, - "learning_rate": 1.0689039618305696e-08, - "loss": 0.8778, - "step": 34710 - }, - { - "epoch": 0.9959404989026437, - "grad_norm": 0.353515625, - "learning_rate": 9.969461977965554e-09, - "loss": 1.0509, - "step": 34715 - }, - { - "epoch": 0.9960839441711015, - "grad_norm": 0.37890625, - "learning_rate": 9.274955092386516e-09, - "loss": 0.9509, - "step": 34720 - }, - { - "epoch": 0.9962273894395594, - "grad_norm": 0.380859375, - "learning_rate": 8.60551913569596e-09, - "loss": 0.9537, - "step": 34725 - }, - { - "epoch": 0.9963708347080171, - "grad_norm": 0.380859375, - "learning_rate": 7.961154275737403e-09, - "loss": 0.9344, - "step": 34730 - }, - { - "epoch": 0.996514279976475, - "grad_norm": 0.37890625, - "learning_rate": 7.341860674092704e-09, - "loss": 0.9246, - "step": 34735 - }, - { - "epoch": 0.9966577252449328, - "grad_norm": 0.369140625, - "learning_rate": 6.747638486026553e-09, - "loss": 0.91, - "step": 34740 - }, - { - "epoch": 0.9968011705133906, - "grad_norm": 0.4453125, - "learning_rate": 6.178487860530879e-09, - "loss": 0.942, - "step": 34745 - }, - { - "epoch": 0.9969446157818485, - "grad_norm": 0.470703125, - "learning_rate": 5.634408940313751e-09, - "loss": 1.0696, - "step": 34750 - }, - { - "epoch": 0.9970880610503062, - "grad_norm": 0.396484375, - "learning_rate": 5.115401861788271e-09, - "loss": 1.0094, - "step": 34755 - }, - { - "epoch": 0.9972315063187641, - "grad_norm": 0.404296875, - "learning_rate": 4.621466755094784e-09, - "loss": 0.9581, - "step": 34760 - }, - { - "epoch": 0.9973749515872219, - "grad_norm": 0.365234375, - "learning_rate": 4.152603744067563e-09, - "loss": 0.9481, - "step": 34765 - }, - { - "epoch": 0.9975183968556797, - "grad_norm": 0.388671875, - "learning_rate": 3.7088129462792277e-09, - "loss": 0.9385, - "step": 34770 - }, - { - "epoch": 0.9976618421241376, - "grad_norm": 0.345703125, - "learning_rate": 3.2900944729963303e-09, - "loss": 0.9076, - "step": 34775 - }, - { - "epoch": 0.9978052873925953, - "grad_norm": 0.349609375, - "learning_rate": 2.896448429201559e-09, - "loss": 1.053, - "step": 34780 - }, - { - "epoch": 0.9979487326610532, - "grad_norm": 0.369140625, - "learning_rate": 2.527874913593742e-09, - "loss": 0.9521, - "step": 34785 - }, - { - "epoch": 0.998092177929511, - "grad_norm": 0.423828125, - "learning_rate": 2.1843740185878423e-09, - "loss": 0.9766, - "step": 34790 - }, - { - "epoch": 0.9982356231979688, - "grad_norm": 0.384765625, - "learning_rate": 1.865945830326066e-09, - "loss": 0.8898, - "step": 34795 - }, - { - "epoch": 0.9983790684664267, - "grad_norm": 0.416015625, - "learning_rate": 1.5725904286223446e-09, - "loss": 0.931, - "step": 34800 - }, - { - "epoch": 0.9985225137348844, - "grad_norm": 0.353515625, - "learning_rate": 1.3043078870511594e-09, - "loss": 0.9298, - "step": 34805 - }, - { - "epoch": 0.9986659590033423, - "grad_norm": 0.39453125, - "learning_rate": 1.0610982728698203e-09, - "loss": 0.8984, - "step": 34810 - }, - { - "epoch": 0.9988094042718001, - "grad_norm": 0.380859375, - "learning_rate": 8.429616470628787e-10, - "loss": 1.0385, - "step": 34815 - }, - { - "epoch": 0.9989528495402579, - "grad_norm": 0.43359375, - "learning_rate": 6.498980643199204e-10, - "loss": 0.8592, - "step": 34820 - }, - { - "epoch": 0.9990962948087158, - "grad_norm": 0.36328125, - "learning_rate": 4.819075730577715e-10, - "loss": 1.0503, - "step": 34825 - }, - { - "epoch": 0.9992397400771735, - "grad_norm": 0.380859375, - "learning_rate": 3.389902153760893e-10, - "loss": 0.9081, - "step": 34830 - }, - { - "epoch": 0.9993831853456314, - "grad_norm": 0.361328125, - "learning_rate": 2.2114602713507737e-10, - "loss": 0.9476, - "step": 34835 - }, - { - "epoch": 0.9995266306140892, - "grad_norm": 0.416015625, - "learning_rate": 1.2837503786666815e-10, - "loss": 0.9387, - "step": 34840 - }, - { - "epoch": 0.999670075882547, - "grad_norm": 0.373046875, - "learning_rate": 6.067727084113628e-11, - "loss": 0.9583, - "step": 34845 - }, - { - "epoch": 0.9998135211510049, - "grad_norm": 0.37890625, - "learning_rate": 1.8052743022689556e-11, - "loss": 0.9403, - "step": 34850 - }, - { - "epoch": 0.9999569664194626, - "grad_norm": 0.369140625, - "learning_rate": 5.014651027757112e-13, - "loss": 0.8484, - "step": 34855 - }, - { - "epoch": 0.9999856554731542, - "eval_loss": 0.9543404579162598, - "eval_runtime": 2084.1231, - "eval_samples_per_second": 7.404, - "eval_steps_per_second": 0.463, - "step": 34856 - }, - { - "epoch": 0.9999856554731542, - "step": 34856, - "total_flos": 1.2236563132662678e+19, - "train_loss": 0.9581741800145707, - "train_runtime": 92834.4231, - "train_samples_per_second": 1.502, - "train_steps_per_second": 0.375 + "epoch": 1.0, + "step": 17429, + "total_flos": 1.2254844500131709e+19, + "train_loss": 0.9352519262663926, + "train_runtime": 32149.2413, + "train_samples_per_second": 4.337, + "train_steps_per_second": 0.542 } ], "logging_steps": 5, - "max_steps": 34856, + "max_steps": 17429, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, @@ -48847,7 +24445,7 @@ "attributes": {} } }, - "total_flos": 1.2236563132662678e+19, + "total_flos": 1.2254844500131709e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null