{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.911154371867623, "eval_steps": 200000, "global_step": 360000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.69921875, "learning_rate": 5e-05, "loss": 0.979, "step": 10 }, { "epoch": 0.0, "grad_norm": 2.125, "learning_rate": 0.0001, "loss": 0.8256, "step": 20 }, { "epoch": 0.0, "grad_norm": 2.59375, "learning_rate": 0.00015, "loss": 0.6018, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.875, "learning_rate": 0.0002, "loss": 0.6244, "step": 40 }, { "epoch": 0.0, "grad_norm": 3.375, "learning_rate": 0.00025, "loss": 0.5708, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 0.4826, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.4765625, "learning_rate": 0.00035, "loss": 0.4008, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.5390625, "learning_rate": 0.0004, "loss": 0.47, "step": 80 }, { "epoch": 0.0, "grad_norm": 2.375, "learning_rate": 0.00045000000000000004, "loss": 0.4558, "step": 90 }, { "epoch": 0.0, "grad_norm": 3.78125, "learning_rate": 0.0005, "loss": 0.4735, "step": 100 }, { "epoch": 0.0, "grad_norm": 4.5, "learning_rate": 0.0004999999997647635, "loss": 0.4286, "step": 110 }, { "epoch": 0.0, "grad_norm": 2.5625, "learning_rate": 0.000499999999059054, "loss": 0.4376, "step": 120 }, { "epoch": 0.01, "grad_norm": 3.609375, "learning_rate": 0.0004999999978828715, "loss": 0.4174, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.000499999996236216, "loss": 0.4391, "step": 140 }, { "epoch": 0.01, "grad_norm": 2.96875, "learning_rate": 0.0004999999941190875, "loss": 0.4171, "step": 150 }, { "epoch": 0.01, "grad_norm": 4.9375, "learning_rate": 0.000499999991531486, "loss": 0.3757, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.265625, "learning_rate": 0.0004999999884734115, "loss": 0.3802, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.000499999984944864, "loss": 0.4809, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.0004999999809458436, "loss": 0.3962, "step": 190 }, { "epoch": 0.01, "grad_norm": 2.140625, "learning_rate": 0.0004999999764763503, "loss": 0.399, "step": 200 }, { "epoch": 0.01, "grad_norm": 1.9453125, "learning_rate": 0.0004999999715363839, "loss": 0.3718, "step": 210 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.0004999999661259445, "loss": 0.3845, "step": 220 }, { "epoch": 0.01, "grad_norm": 2.21875, "learning_rate": 0.0004999999602450324, "loss": 0.3646, "step": 230 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.0004999999538936471, "loss": 0.3809, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.984375, "learning_rate": 0.000499999947071789, "loss": 0.3322, "step": 250 }, { "epoch": 0.01, "grad_norm": 2.34375, "learning_rate": 0.0004999999397794581, "loss": 0.319, "step": 260 }, { "epoch": 0.01, "grad_norm": 1.078125, "learning_rate": 0.0004999999320166543, "loss": 0.4018, "step": 270 }, { "epoch": 0.01, "grad_norm": 1.1328125, "learning_rate": 0.0004999999237833775, "loss": 0.3773, "step": 280 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.0004999999150796278, "loss": 0.3919, "step": 290 }, { "epoch": 0.01, "grad_norm": 1.953125, "learning_rate": 0.0004999999059054055, "loss": 0.377, "step": 300 }, { "epoch": 0.01, "grad_norm": 0.68359375, "learning_rate": 0.0004999998962607102, "loss": 0.3864, "step": 310 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.000499999886145542, "loss": 0.4597, "step": 320 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.0004999998755599013, "loss": 0.4042, "step": 330 }, { "epoch": 0.01, "grad_norm": 4.53125, "learning_rate": 0.0004999998645037876, "loss": 0.4348, "step": 340 }, { "epoch": 0.01, "grad_norm": 4.15625, "learning_rate": 0.0004999998529772012, "loss": 0.4317, "step": 350 }, { "epoch": 0.01, "grad_norm": 1.890625, "learning_rate": 0.0004999998409801421, "loss": 0.5081, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.94921875, "learning_rate": 0.0004999998285126103, "loss": 0.3064, "step": 370 }, { "epoch": 0.02, "grad_norm": 2.90625, "learning_rate": 0.0004999998155746057, "loss": 0.3847, "step": 380 }, { "epoch": 0.02, "grad_norm": 2.5625, "learning_rate": 0.0004999998021661287, "loss": 0.4232, "step": 390 }, { "epoch": 0.02, "grad_norm": 1.28125, "learning_rate": 0.0004999997882871788, "loss": 0.3319, "step": 400 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.0004999997739377564, "loss": 0.4151, "step": 410 }, { "epoch": 0.02, "grad_norm": 1.171875, "learning_rate": 0.0004999997591178614, "loss": 0.3973, "step": 420 }, { "epoch": 0.02, "grad_norm": 1.1640625, "learning_rate": 0.000499999743827494, "loss": 0.3519, "step": 430 }, { "epoch": 0.02, "grad_norm": 0.93359375, "learning_rate": 0.0004999997280666539, "loss": 0.3441, "step": 440 }, { "epoch": 0.02, "grad_norm": 2.90625, "learning_rate": 0.0004999997118353414, "loss": 0.3376, "step": 450 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.0004999996951335564, "loss": 0.318, "step": 460 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.000499999677961299, "loss": 0.3783, "step": 470 }, { "epoch": 0.02, "grad_norm": 0.94921875, "learning_rate": 0.0004999996603185692, "loss": 0.2997, "step": 480 }, { "epoch": 0.02, "grad_norm": 1.21875, "learning_rate": 0.000499999642205367, "loss": 0.3152, "step": 490 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.0004999996236216925, "loss": 0.4532, "step": 500 }, { "epoch": 0.02, "grad_norm": 1.078125, "learning_rate": 0.0004999996045675458, "loss": 0.3603, "step": 510 }, { "epoch": 0.02, "grad_norm": 0.85546875, "learning_rate": 0.0004999995850429266, "loss": 0.3464, "step": 520 }, { "epoch": 0.02, "grad_norm": 1.171875, "learning_rate": 0.0004999995650478353, "loss": 0.4385, "step": 530 }, { "epoch": 0.02, "grad_norm": 1.5078125, "learning_rate": 0.000499999544582272, "loss": 0.3639, "step": 540 }, { "epoch": 0.02, "grad_norm": 2.875, "learning_rate": 0.0004999995236462364, "loss": 0.3404, "step": 550 }, { "epoch": 0.02, "grad_norm": 0.6796875, "learning_rate": 0.0004999995022397286, "loss": 0.4101, "step": 560 }, { "epoch": 0.02, "grad_norm": 1.21875, "learning_rate": 0.0004999994803627489, "loss": 0.37, "step": 570 }, { "epoch": 0.02, "grad_norm": 0.86328125, "learning_rate": 0.000499999458015297, "loss": 0.4176, "step": 580 }, { "epoch": 0.02, "grad_norm": 1.203125, "learning_rate": 0.0004999994351973733, "loss": 0.4137, "step": 590 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.0004999994119089776, "loss": 0.332, "step": 600 }, { "epoch": 0.03, "grad_norm": 0.96875, "learning_rate": 0.0004999993881501099, "loss": 0.3805, "step": 610 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.0004999993639207705, "loss": 0.4088, "step": 620 }, { "epoch": 0.03, "grad_norm": 1.2109375, "learning_rate": 0.0004999993392209592, "loss": 0.3283, "step": 630 }, { "epoch": 0.03, "grad_norm": 1.421875, "learning_rate": 0.0004999993140506762, "loss": 0.3476, "step": 640 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.0004999992884099215, "loss": 0.3169, "step": 650 }, { "epoch": 0.03, "grad_norm": 1.046875, "learning_rate": 0.000499999262298695, "loss": 0.359, "step": 660 }, { "epoch": 0.03, "grad_norm": 0.78515625, "learning_rate": 0.000499999235716997, "loss": 0.363, "step": 670 }, { "epoch": 0.03, "grad_norm": 0.58203125, "learning_rate": 0.0004999992086648274, "loss": 0.302, "step": 680 }, { "epoch": 0.03, "grad_norm": 0.890625, "learning_rate": 0.0004999991811421863, "loss": 0.3503, "step": 690 }, { "epoch": 0.03, "grad_norm": 1.21875, "learning_rate": 0.0004999991531490737, "loss": 0.3585, "step": 700 }, { "epoch": 0.03, "grad_norm": 2.8125, "learning_rate": 0.0004999991246854898, "loss": 0.3214, "step": 710 }, { "epoch": 0.03, "grad_norm": 2.1875, "learning_rate": 0.0004999990957514344, "loss": 0.3568, "step": 720 }, { "epoch": 0.03, "grad_norm": 0.6640625, "learning_rate": 0.0004999990663469079, "loss": 0.2573, "step": 730 }, { "epoch": 0.03, "grad_norm": 1.2109375, "learning_rate": 0.00049999903647191, "loss": 0.3604, "step": 740 }, { "epoch": 0.03, "grad_norm": 0.8125, "learning_rate": 0.0004999990061264409, "loss": 0.3285, "step": 750 }, { "epoch": 0.03, "grad_norm": 1.203125, "learning_rate": 0.0004999989753105007, "loss": 0.2736, "step": 760 }, { "epoch": 0.03, "grad_norm": 0.70703125, "learning_rate": 0.0004999989440240896, "loss": 0.294, "step": 770 }, { "epoch": 0.03, "grad_norm": 1.1484375, "learning_rate": 0.0004999989122672072, "loss": 0.3478, "step": 780 }, { "epoch": 0.03, "grad_norm": 1.0078125, "learning_rate": 0.0004999988800398539, "loss": 0.3922, "step": 790 }, { "epoch": 0.03, "grad_norm": 0.72265625, "learning_rate": 0.0004999988473420299, "loss": 0.3029, "step": 800 }, { "epoch": 0.03, "grad_norm": 1.3046875, "learning_rate": 0.0004999988141737349, "loss": 0.3642, "step": 810 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.0004999987805349692, "loss": 0.315, "step": 820 }, { "epoch": 0.03, "grad_norm": 0.44140625, "learning_rate": 0.0004999987464257327, "loss": 0.3264, "step": 830 }, { "epoch": 0.03, "grad_norm": 2.359375, "learning_rate": 0.0004999987118460257, "loss": 0.3657, "step": 840 }, { "epoch": 0.04, "grad_norm": 1.1875, "learning_rate": 0.000499998676795848, "loss": 0.3896, "step": 850 }, { "epoch": 0.04, "grad_norm": 0.8359375, "learning_rate": 0.0004999986412751998, "loss": 0.3145, "step": 860 }, { "epoch": 0.04, "grad_norm": 0.8203125, "learning_rate": 0.0004999986052840812, "loss": 0.4001, "step": 870 }, { "epoch": 0.04, "grad_norm": 1.1796875, "learning_rate": 0.0004999985688224921, "loss": 0.3462, "step": 880 }, { "epoch": 0.04, "grad_norm": 0.828125, "learning_rate": 0.0004999985318904328, "loss": 0.3135, "step": 890 }, { "epoch": 0.04, "grad_norm": 0.8671875, "learning_rate": 0.0004999984944879034, "loss": 0.3184, "step": 900 }, { "epoch": 0.04, "grad_norm": 0.322265625, "learning_rate": 0.0004999984566149036, "loss": 0.3851, "step": 910 }, { "epoch": 0.04, "grad_norm": 6.53125, "learning_rate": 0.0004999984182714339, "loss": 0.3676, "step": 920 }, { "epoch": 0.04, "grad_norm": 2.390625, "learning_rate": 0.0004999983794574939, "loss": 0.3173, "step": 930 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.0004999983401730842, "loss": 0.3962, "step": 940 }, { "epoch": 0.04, "grad_norm": 0.79296875, "learning_rate": 0.0004999983004182045, "loss": 0.3593, "step": 950 }, { "epoch": 0.04, "grad_norm": 0.474609375, "learning_rate": 0.0004999982601928551, "loss": 0.3684, "step": 960 }, { "epoch": 0.04, "grad_norm": 1.375, "learning_rate": 0.0004999982194970359, "loss": 0.3056, "step": 970 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.000499998178330747, "loss": 0.3502, "step": 980 }, { "epoch": 0.04, "grad_norm": 2.75, "learning_rate": 0.0004999981366939886, "loss": 0.2836, "step": 990 }, { "epoch": 0.04, "grad_norm": 0.76953125, "learning_rate": 0.0004999980945867606, "loss": 0.3602, "step": 1000 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.0004999980520090634, "loss": 0.2913, "step": 1010 }, { "epoch": 0.04, "grad_norm": 1.1953125, "learning_rate": 0.0004999980089608967, "loss": 0.2992, "step": 1020 }, { "epoch": 0.04, "grad_norm": 1.34375, "learning_rate": 0.0004999979654422607, "loss": 0.2966, "step": 1030 }, { "epoch": 0.04, "grad_norm": 0.91796875, "learning_rate": 0.0004999979214531556, "loss": 0.3145, "step": 1040 }, { "epoch": 0.04, "grad_norm": 0.72265625, "learning_rate": 0.0004999978769935815, "loss": 0.3271, "step": 1050 }, { "epoch": 0.04, "grad_norm": 2.171875, "learning_rate": 0.0004999978320635383, "loss": 0.3331, "step": 1060 }, { "epoch": 0.04, "grad_norm": 0.75, "learning_rate": 0.0004999977866630261, "loss": 0.2827, "step": 1070 }, { "epoch": 0.04, "grad_norm": 1.4921875, "learning_rate": 0.0004999977407920452, "loss": 0.3029, "step": 1080 }, { "epoch": 0.05, "grad_norm": 1.0234375, "learning_rate": 0.0004999976944505954, "loss": 0.371, "step": 1090 }, { "epoch": 0.05, "grad_norm": 0.89453125, "learning_rate": 0.000499997647638677, "loss": 0.3152, "step": 1100 }, { "epoch": 0.05, "grad_norm": 1.0546875, "learning_rate": 0.0004999976003562901, "loss": 0.3901, "step": 1110 }, { "epoch": 0.05, "grad_norm": 1.6015625, "learning_rate": 0.0004999975526034347, "loss": 0.4085, "step": 1120 }, { "epoch": 0.05, "grad_norm": 0.6484375, "learning_rate": 0.0004999975043801108, "loss": 0.2502, "step": 1130 }, { "epoch": 0.05, "grad_norm": 0.68359375, "learning_rate": 0.0004999974556863187, "loss": 0.3639, "step": 1140 }, { "epoch": 0.05, "grad_norm": 0.6875, "learning_rate": 0.0004999974065220583, "loss": 0.312, "step": 1150 }, { "epoch": 0.05, "grad_norm": 3.75, "learning_rate": 0.0004999973568873299, "loss": 0.331, "step": 1160 }, { "epoch": 0.05, "grad_norm": 0.7265625, "learning_rate": 0.0004999973067821333, "loss": 0.3724, "step": 1170 }, { "epoch": 0.05, "grad_norm": 0.578125, "learning_rate": 0.0004999972562064689, "loss": 0.3137, "step": 1180 }, { "epoch": 0.05, "grad_norm": 1.2109375, "learning_rate": 0.0004999972051603366, "loss": 0.3284, "step": 1190 }, { "epoch": 0.05, "grad_norm": 1.515625, "learning_rate": 0.0004999971536437366, "loss": 0.3006, "step": 1200 }, { "epoch": 0.05, "grad_norm": 0.50390625, "learning_rate": 0.0004999971016566689, "loss": 0.3346, "step": 1210 }, { "epoch": 0.05, "grad_norm": 0.82421875, "learning_rate": 0.0004999970491991338, "loss": 0.3051, "step": 1220 }, { "epoch": 0.05, "grad_norm": 1.265625, "learning_rate": 0.0004999969962711311, "loss": 0.3407, "step": 1230 }, { "epoch": 0.05, "grad_norm": 1.203125, "learning_rate": 0.0004999969428726611, "loss": 0.3765, "step": 1240 }, { "epoch": 0.05, "grad_norm": 0.578125, "learning_rate": 0.0004999968890037238, "loss": 0.2441, "step": 1250 }, { "epoch": 0.05, "grad_norm": 1.421875, "learning_rate": 0.0004999968346643194, "loss": 0.3162, "step": 1260 }, { "epoch": 0.05, "grad_norm": 1.1953125, "learning_rate": 0.0004999967798544479, "loss": 0.3095, "step": 1270 }, { "epoch": 0.05, "grad_norm": 0.4609375, "learning_rate": 0.0004999967245741095, "loss": 0.3518, "step": 1280 }, { "epoch": 0.05, "grad_norm": 0.921875, "learning_rate": 0.0004999966688233043, "loss": 0.3693, "step": 1290 }, { "epoch": 0.05, "grad_norm": 1.4765625, "learning_rate": 0.0004999966126020323, "loss": 0.3685, "step": 1300 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.0004999965559102938, "loss": 0.3648, "step": 1310 }, { "epoch": 0.05, "grad_norm": 0.486328125, "learning_rate": 0.0004999964987480886, "loss": 0.3722, "step": 1320 }, { "epoch": 0.06, "grad_norm": 0.8359375, "learning_rate": 0.000499996441115417, "loss": 0.2889, "step": 1330 }, { "epoch": 0.06, "grad_norm": 0.828125, "learning_rate": 0.0004999963830122793, "loss": 0.3683, "step": 1340 }, { "epoch": 0.06, "grad_norm": 1.203125, "learning_rate": 0.0004999963244386753, "loss": 0.3026, "step": 1350 }, { "epoch": 0.06, "grad_norm": 0.6328125, "learning_rate": 0.0004999962653946051, "loss": 0.3666, "step": 1360 }, { "epoch": 0.06, "grad_norm": 1.2265625, "learning_rate": 0.000499996205880069, "loss": 0.295, "step": 1370 }, { "epoch": 0.06, "grad_norm": 0.8203125, "learning_rate": 0.000499996145895067, "loss": 0.3602, "step": 1380 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.0004999960854395994, "loss": 0.2869, "step": 1390 }, { "epoch": 0.06, "grad_norm": 1.2578125, "learning_rate": 0.000499996024513666, "loss": 0.2896, "step": 1400 }, { "epoch": 0.06, "grad_norm": 0.86328125, "learning_rate": 0.0004999959631172672, "loss": 0.32, "step": 1410 }, { "epoch": 0.06, "grad_norm": 0.7265625, "learning_rate": 0.0004999959012504029, "loss": 0.3323, "step": 1420 }, { "epoch": 0.06, "grad_norm": 0.30078125, "learning_rate": 0.0004999958389130733, "loss": 0.2821, "step": 1430 }, { "epoch": 0.06, "grad_norm": 0.6484375, "learning_rate": 0.0004999957761052786, "loss": 0.3225, "step": 1440 }, { "epoch": 0.06, "grad_norm": 0.86328125, "learning_rate": 0.0004999957128270188, "loss": 0.3507, "step": 1450 }, { "epoch": 0.06, "grad_norm": 0.98046875, "learning_rate": 0.0004999956490782942, "loss": 0.3555, "step": 1460 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.0004999955848591047, "loss": 0.325, "step": 1470 }, { "epoch": 0.06, "grad_norm": 2.34375, "learning_rate": 0.0004999955201694504, "loss": 0.3127, "step": 1480 }, { "epoch": 0.06, "grad_norm": 1.0546875, "learning_rate": 0.0004999954550093317, "loss": 0.2929, "step": 1490 }, { "epoch": 0.06, "grad_norm": 0.78515625, "learning_rate": 0.0004999953893787484, "loss": 0.3094, "step": 1500 }, { "epoch": 0.06, "grad_norm": 1.234375, "learning_rate": 0.0004999953232777008, "loss": 0.334, "step": 1510 }, { "epoch": 0.06, "grad_norm": 2.15625, "learning_rate": 0.0004999952567061891, "loss": 0.3412, "step": 1520 }, { "epoch": 0.06, "grad_norm": 0.5546875, "learning_rate": 0.0004999951896642132, "loss": 0.3145, "step": 1530 }, { "epoch": 0.06, "grad_norm": 0.640625, "learning_rate": 0.0004999951221517734, "loss": 0.2527, "step": 1540 }, { "epoch": 0.06, "grad_norm": 0.5, "learning_rate": 0.0004999950541688697, "loss": 0.2872, "step": 1550 }, { "epoch": 0.06, "grad_norm": 1.1171875, "learning_rate": 0.0004999949857155024, "loss": 0.2888, "step": 1560 }, { "epoch": 0.07, "grad_norm": 0.96484375, "learning_rate": 0.0004999949167916716, "loss": 0.3165, "step": 1570 }, { "epoch": 0.07, "grad_norm": 0.875, "learning_rate": 0.0004999948473973772, "loss": 0.2907, "step": 1580 }, { "epoch": 0.07, "grad_norm": 2.59375, "learning_rate": 0.0004999947775326197, "loss": 0.3347, "step": 1590 }, { "epoch": 0.07, "grad_norm": 1.1796875, "learning_rate": 0.0004999947071973989, "loss": 0.35, "step": 1600 }, { "epoch": 0.07, "grad_norm": 0.84375, "learning_rate": 0.0004999946363917151, "loss": 0.3087, "step": 1610 }, { "epoch": 0.07, "grad_norm": 0.859375, "learning_rate": 0.0004999945651155683, "loss": 0.3198, "step": 1620 }, { "epoch": 0.07, "grad_norm": 2.328125, "learning_rate": 0.0004999944933689588, "loss": 0.3703, "step": 1630 }, { "epoch": 0.07, "grad_norm": 0.494140625, "learning_rate": 0.0004999944211518866, "loss": 0.348, "step": 1640 }, { "epoch": 0.07, "grad_norm": 1.125, "learning_rate": 0.0004999943484643519, "loss": 0.3179, "step": 1650 }, { "epoch": 0.07, "grad_norm": 1.4375, "learning_rate": 0.0004999942753063549, "loss": 0.3416, "step": 1660 }, { "epoch": 0.07, "grad_norm": 1.265625, "learning_rate": 0.0004999942016778957, "loss": 0.3047, "step": 1670 }, { "epoch": 0.07, "grad_norm": 1.09375, "learning_rate": 0.0004999941275789743, "loss": 0.2894, "step": 1680 }, { "epoch": 0.07, "grad_norm": 1.03125, "learning_rate": 0.000499994053009591, "loss": 0.2702, "step": 1690 }, { "epoch": 0.07, "grad_norm": 1.0234375, "learning_rate": 0.0004999939779697459, "loss": 0.2902, "step": 1700 }, { "epoch": 0.07, "grad_norm": 0.46484375, "learning_rate": 0.0004999939024594391, "loss": 0.3448, "step": 1710 }, { "epoch": 0.07, "grad_norm": 1.28125, "learning_rate": 0.0004999938264786708, "loss": 0.33, "step": 1720 }, { "epoch": 0.07, "grad_norm": 0.0, "learning_rate": 0.0004999937500274411, "loss": 0.3007, "step": 1730 }, { "epoch": 0.07, "grad_norm": 1.0078125, "learning_rate": 0.0004999936731057502, "loss": 0.3127, "step": 1740 }, { "epoch": 0.07, "grad_norm": 0.76953125, "learning_rate": 0.0004999935957135981, "loss": 0.3853, "step": 1750 }, { "epoch": 0.07, "grad_norm": 0.72265625, "learning_rate": 0.0004999935178509852, "loss": 0.3469, "step": 1760 }, { "epoch": 0.07, "grad_norm": 1.2421875, "learning_rate": 0.0004999934395179114, "loss": 0.2776, "step": 1770 }, { "epoch": 0.07, "grad_norm": 1.2578125, "learning_rate": 0.000499993360714377, "loss": 0.3026, "step": 1780 }, { "epoch": 0.07, "grad_norm": 1.09375, "learning_rate": 0.0004999932814403821, "loss": 0.2991, "step": 1790 }, { "epoch": 0.07, "grad_norm": 1.296875, "learning_rate": 0.0004999932016959267, "loss": 0.3332, "step": 1800 }, { "epoch": 0.07, "grad_norm": 1.328125, "learning_rate": 0.0004999931214810111, "loss": 0.2901, "step": 1810 }, { "epoch": 0.08, "grad_norm": 0.8046875, "learning_rate": 0.0004999930407956356, "loss": 0.3706, "step": 1820 }, { "epoch": 0.08, "grad_norm": 0.87109375, "learning_rate": 0.0004999929596398002, "loss": 0.2969, "step": 1830 }, { "epoch": 0.08, "grad_norm": 0.671875, "learning_rate": 0.0004999928780135049, "loss": 0.3116, "step": 1840 }, { "epoch": 0.08, "grad_norm": 0.4609375, "learning_rate": 0.00049999279591675, "loss": 0.3009, "step": 1850 }, { "epoch": 0.08, "grad_norm": 0.84765625, "learning_rate": 0.0004999927133495358, "loss": 0.3357, "step": 1860 }, { "epoch": 0.08, "grad_norm": 0.330078125, "learning_rate": 0.0004999926303118623, "loss": 0.353, "step": 1870 }, { "epoch": 0.08, "grad_norm": 1.390625, "learning_rate": 0.0004999925468037296, "loss": 0.2716, "step": 1880 }, { "epoch": 0.08, "grad_norm": 0.58203125, "learning_rate": 0.0004999924628251379, "loss": 0.2685, "step": 1890 }, { "epoch": 0.08, "grad_norm": 0.86328125, "learning_rate": 0.0004999923783760874, "loss": 0.3665, "step": 1900 }, { "epoch": 0.08, "grad_norm": 0.5546875, "learning_rate": 0.0004999922934565783, "loss": 0.2999, "step": 1910 }, { "epoch": 0.08, "grad_norm": 1.375, "learning_rate": 0.0004999922080666106, "loss": 0.3282, "step": 1920 }, { "epoch": 0.08, "grad_norm": 1.265625, "learning_rate": 0.0004999921222061846, "loss": 0.3201, "step": 1930 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.0004999920358753004, "loss": 0.2926, "step": 1940 }, { "epoch": 0.08, "grad_norm": 0.50390625, "learning_rate": 0.0004999919490739583, "loss": 0.2654, "step": 1950 }, { "epoch": 0.08, "grad_norm": 0.5390625, "learning_rate": 0.0004999918618021583, "loss": 0.3121, "step": 1960 }, { "epoch": 0.08, "grad_norm": 1.0703125, "learning_rate": 0.0004999917740599005, "loss": 0.3108, "step": 1970 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.0004999916858471852, "loss": 0.3156, "step": 1980 }, { "epoch": 0.08, "grad_norm": 3.25, "learning_rate": 0.0004999915971640127, "loss": 0.3008, "step": 1990 }, { "epoch": 0.08, "grad_norm": 0.384765625, "learning_rate": 0.0004999915080103829, "loss": 0.3155, "step": 2000 }, { "epoch": 0.08, "grad_norm": 0.59765625, "learning_rate": 0.0004999914183862961, "loss": 0.334, "step": 2010 }, { "epoch": 0.08, "grad_norm": 0.83984375, "learning_rate": 0.0004999913282917524, "loss": 0.3051, "step": 2020 }, { "epoch": 0.08, "grad_norm": 0.31640625, "learning_rate": 0.0004999912377267521, "loss": 0.3024, "step": 2030 }, { "epoch": 0.08, "grad_norm": 0.5390625, "learning_rate": 0.0004999911466912952, "loss": 0.3597, "step": 2040 }, { "epoch": 0.08, "grad_norm": 0.9140625, "learning_rate": 0.0004999910551853821, "loss": 0.2553, "step": 2050 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.0004999909632090126, "loss": 0.3038, "step": 2060 }, { "epoch": 0.09, "grad_norm": 0.68359375, "learning_rate": 0.0004999908707621873, "loss": 0.3228, "step": 2070 }, { "epoch": 0.09, "grad_norm": 1.125, "learning_rate": 0.0004999907778449061, "loss": 0.2831, "step": 2080 }, { "epoch": 0.09, "grad_norm": 0.8671875, "learning_rate": 0.0004999906844571693, "loss": 0.3181, "step": 2090 }, { "epoch": 0.09, "grad_norm": 0.71484375, "learning_rate": 0.000499990590598977, "loss": 0.2869, "step": 2100 }, { "epoch": 0.09, "grad_norm": 0.75390625, "learning_rate": 0.0004999904962703294, "loss": 0.3167, "step": 2110 }, { "epoch": 0.09, "grad_norm": 0.765625, "learning_rate": 0.0004999904014712267, "loss": 0.2201, "step": 2120 }, { "epoch": 0.09, "grad_norm": 0.97265625, "learning_rate": 0.000499990306201669, "loss": 0.3236, "step": 2130 }, { "epoch": 0.09, "grad_norm": 1.09375, "learning_rate": 0.0004999902104616566, "loss": 0.2701, "step": 2140 }, { "epoch": 0.09, "grad_norm": 0.8828125, "learning_rate": 0.0004999901142511895, "loss": 0.3678, "step": 2150 }, { "epoch": 0.09, "grad_norm": 1.1171875, "learning_rate": 0.0004999900175702682, "loss": 0.3429, "step": 2160 }, { "epoch": 0.09, "grad_norm": 0.76953125, "learning_rate": 0.0004999899204188925, "loss": 0.2931, "step": 2170 }, { "epoch": 0.09, "grad_norm": 0.890625, "learning_rate": 0.000499989822797063, "loss": 0.302, "step": 2180 }, { "epoch": 0.09, "grad_norm": 0.59765625, "learning_rate": 0.0004999897247047794, "loss": 0.3104, "step": 2190 }, { "epoch": 0.09, "grad_norm": 1.4375, "learning_rate": 0.0004999896261420423, "loss": 0.3297, "step": 2200 }, { "epoch": 0.09, "grad_norm": 0.8359375, "learning_rate": 0.0004999895271088517, "loss": 0.3463, "step": 2210 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.0004999894276052077, "loss": 0.3334, "step": 2220 }, { "epoch": 0.09, "grad_norm": 1.3125, "learning_rate": 0.0004999893276311107, "loss": 0.3042, "step": 2230 }, { "epoch": 0.09, "grad_norm": 0.7265625, "learning_rate": 0.0004999892271865607, "loss": 0.2923, "step": 2240 }, { "epoch": 0.09, "grad_norm": 0.78515625, "learning_rate": 0.000499989126271558, "loss": 0.3354, "step": 2250 }, { "epoch": 0.09, "grad_norm": 0.83984375, "learning_rate": 0.0004999890248861029, "loss": 0.285, "step": 2260 }, { "epoch": 0.09, "grad_norm": 0.73046875, "learning_rate": 0.0004999889230301953, "loss": 0.3075, "step": 2270 }, { "epoch": 0.09, "grad_norm": 0.80859375, "learning_rate": 0.0004999888207038356, "loss": 0.2968, "step": 2280 }, { "epoch": 0.09, "grad_norm": 2.328125, "learning_rate": 0.0004999887179070238, "loss": 0.3542, "step": 2290 }, { "epoch": 0.1, "grad_norm": 0.259765625, "learning_rate": 0.0004999886146397605, "loss": 0.258, "step": 2300 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.0004999885109020453, "loss": 0.291, "step": 2310 }, { "epoch": 0.1, "grad_norm": 1.09375, "learning_rate": 0.0004999884066938789, "loss": 0.2664, "step": 2320 }, { "epoch": 0.1, "grad_norm": 0.7109375, "learning_rate": 0.0004999883020152614, "loss": 0.2862, "step": 2330 }, { "epoch": 0.1, "grad_norm": 2.75, "learning_rate": 0.0004999881968661928, "loss": 0.3029, "step": 2340 }, { "epoch": 0.1, "grad_norm": 1.6015625, "learning_rate": 0.0004999880912466733, "loss": 0.3229, "step": 2350 }, { "epoch": 0.1, "grad_norm": 0.90234375, "learning_rate": 0.0004999879851567033, "loss": 0.3332, "step": 2360 }, { "epoch": 0.1, "grad_norm": 1.625, "learning_rate": 0.000499987878596283, "loss": 0.2553, "step": 2370 }, { "epoch": 0.1, "grad_norm": 2.859375, "learning_rate": 0.0004999877715654124, "loss": 0.2685, "step": 2380 }, { "epoch": 0.1, "grad_norm": 0.6171875, "learning_rate": 0.0004999876640640919, "loss": 0.2665, "step": 2390 }, { "epoch": 0.1, "grad_norm": 0.7265625, "learning_rate": 0.0004999875560923215, "loss": 0.3255, "step": 2400 }, { "epoch": 0.1, "grad_norm": 0.5390625, "learning_rate": 0.0004999874476501016, "loss": 0.2994, "step": 2410 }, { "epoch": 0.1, "grad_norm": 0.75390625, "learning_rate": 0.0004999873387374323, "loss": 0.2796, "step": 2420 }, { "epoch": 0.1, "grad_norm": 0.6953125, "learning_rate": 0.0004999872293543139, "loss": 0.3015, "step": 2430 }, { "epoch": 0.1, "grad_norm": 5.5625, "learning_rate": 0.0004999871195007463, "loss": 0.3009, "step": 2440 }, { "epoch": 0.1, "grad_norm": 0.3046875, "learning_rate": 0.0004999870091767303, "loss": 0.3356, "step": 2450 }, { "epoch": 0.1, "grad_norm": 1.4296875, "learning_rate": 0.0004999868983822654, "loss": 0.2623, "step": 2460 }, { "epoch": 0.1, "grad_norm": 1.25, "learning_rate": 0.0004999867871173523, "loss": 0.3621, "step": 2470 }, { "epoch": 0.1, "grad_norm": 1.2109375, "learning_rate": 0.0004999866753819911, "loss": 0.3105, "step": 2480 }, { "epoch": 0.1, "grad_norm": 1.3046875, "learning_rate": 0.0004999865631761819, "loss": 0.2932, "step": 2490 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.000499986450499925, "loss": 0.2864, "step": 2500 }, { "epoch": 0.1, "grad_norm": 0.92578125, "learning_rate": 0.0004999863373532207, "loss": 0.3091, "step": 2510 }, { "epoch": 0.1, "grad_norm": 1.5703125, "learning_rate": 0.000499986223736069, "loss": 0.3078, "step": 2520 }, { "epoch": 0.1, "grad_norm": 0.78125, "learning_rate": 0.0004999861096484702, "loss": 0.3218, "step": 2530 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.0004999859950904245, "loss": 0.2197, "step": 2540 }, { "epoch": 0.11, "grad_norm": 0.62890625, "learning_rate": 0.0004999858800619324, "loss": 0.288, "step": 2550 }, { "epoch": 0.11, "grad_norm": 1.0234375, "learning_rate": 0.0004999857645629936, "loss": 0.302, "step": 2560 }, { "epoch": 0.11, "grad_norm": 0.83984375, "learning_rate": 0.0004999856485936087, "loss": 0.3331, "step": 2570 }, { "epoch": 0.11, "grad_norm": 1.4765625, "learning_rate": 0.0004999855321537777, "loss": 0.3044, "step": 2580 }, { "epoch": 0.11, "grad_norm": 0.546875, "learning_rate": 0.0004999854152435011, "loss": 0.2696, "step": 2590 }, { "epoch": 0.11, "grad_norm": 1.234375, "learning_rate": 0.0004999852978627789, "loss": 0.2431, "step": 2600 }, { "epoch": 0.11, "grad_norm": 0.59765625, "learning_rate": 0.0004999851800116113, "loss": 0.2824, "step": 2610 }, { "epoch": 0.11, "grad_norm": 0.63671875, "learning_rate": 0.0004999850616899986, "loss": 0.3289, "step": 2620 }, { "epoch": 0.11, "grad_norm": 0.92578125, "learning_rate": 0.000499984942897941, "loss": 0.3242, "step": 2630 }, { "epoch": 0.11, "grad_norm": 1.34375, "learning_rate": 0.0004999848236354388, "loss": 0.2995, "step": 2640 }, { "epoch": 0.11, "grad_norm": 0.64453125, "learning_rate": 0.0004999847039024922, "loss": 0.3169, "step": 2650 }, { "epoch": 0.11, "grad_norm": 0.703125, "learning_rate": 0.0004999845836991013, "loss": 0.3488, "step": 2660 }, { "epoch": 0.11, "grad_norm": 0.40234375, "learning_rate": 0.0004999844630252663, "loss": 0.2909, "step": 2670 }, { "epoch": 0.11, "grad_norm": 0.7109375, "learning_rate": 0.0004999843418809877, "loss": 0.268, "step": 2680 }, { "epoch": 0.11, "grad_norm": 0.62890625, "learning_rate": 0.0004999842202662655, "loss": 0.3085, "step": 2690 }, { "epoch": 0.11, "grad_norm": 0.25, "learning_rate": 0.0004999840981811, "loss": 0.2817, "step": 2700 }, { "epoch": 0.11, "grad_norm": 0.70703125, "learning_rate": 0.0004999839756254915, "loss": 0.2842, "step": 2710 }, { "epoch": 0.11, "grad_norm": 0.89453125, "learning_rate": 0.00049998385259944, "loss": 0.3295, "step": 2720 }, { "epoch": 0.11, "grad_norm": 0.421875, "learning_rate": 0.000499983729102946, "loss": 0.2718, "step": 2730 }, { "epoch": 0.11, "grad_norm": 0.54296875, "learning_rate": 0.0004999836051360096, "loss": 0.2964, "step": 2740 }, { "epoch": 0.11, "grad_norm": 0.2138671875, "learning_rate": 0.0004999834806986309, "loss": 0.2851, "step": 2750 }, { "epoch": 0.11, "grad_norm": 1.0859375, "learning_rate": 0.0004999833557908105, "loss": 0.2808, "step": 2760 }, { "epoch": 0.11, "grad_norm": 0.66015625, "learning_rate": 0.0004999832304125484, "loss": 0.3468, "step": 2770 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.0004999831045638447, "loss": 0.3336, "step": 2780 }, { "epoch": 0.12, "grad_norm": 0.9921875, "learning_rate": 0.0004999829782446999, "loss": 0.2583, "step": 2790 }, { "epoch": 0.12, "grad_norm": 1.4765625, "learning_rate": 0.0004999828514551141, "loss": 0.2889, "step": 2800 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.0004999827241950876, "loss": 0.2915, "step": 2810 }, { "epoch": 0.12, "grad_norm": 1.6953125, "learning_rate": 0.0004999825964646207, "loss": 0.3186, "step": 2820 }, { "epoch": 0.12, "grad_norm": 0.78515625, "learning_rate": 0.0004999824682637134, "loss": 0.3093, "step": 2830 }, { "epoch": 0.12, "grad_norm": 2.4375, "learning_rate": 0.0004999823395923662, "loss": 0.3435, "step": 2840 }, { "epoch": 0.12, "grad_norm": 0.59375, "learning_rate": 0.0004999822104505791, "loss": 0.261, "step": 2850 }, { "epoch": 0.12, "grad_norm": 0.60546875, "learning_rate": 0.0004999820808383527, "loss": 0.2935, "step": 2860 }, { "epoch": 0.12, "grad_norm": 0.494140625, "learning_rate": 0.0004999819507556868, "loss": 0.3102, "step": 2870 }, { "epoch": 0.12, "grad_norm": 0.326171875, "learning_rate": 0.0004999818202025819, "loss": 0.2597, "step": 2880 }, { "epoch": 0.12, "grad_norm": 0.1796875, "learning_rate": 0.0004999816891790382, "loss": 0.328, "step": 2890 }, { "epoch": 0.12, "grad_norm": 1.1015625, "learning_rate": 0.000499981557685056, "loss": 0.2938, "step": 2900 }, { "epoch": 0.12, "grad_norm": 0.984375, "learning_rate": 0.0004999814257206355, "loss": 0.3352, "step": 2910 }, { "epoch": 0.12, "grad_norm": 3.3125, "learning_rate": 0.000499981293285777, "loss": 0.2745, "step": 2920 }, { "epoch": 0.12, "grad_norm": 1.015625, "learning_rate": 0.0004999811603804806, "loss": 0.2161, "step": 2930 }, { "epoch": 0.12, "grad_norm": 0.37109375, "learning_rate": 0.0004999810270047468, "loss": 0.2805, "step": 2940 }, { "epoch": 0.12, "grad_norm": 1.0234375, "learning_rate": 0.0004999808931585755, "loss": 0.2409, "step": 2950 }, { "epoch": 0.12, "grad_norm": 0.65234375, "learning_rate": 0.0004999807588419674, "loss": 0.3236, "step": 2960 }, { "epoch": 0.12, "grad_norm": 0.625, "learning_rate": 0.0004999806240549222, "loss": 0.2807, "step": 2970 }, { "epoch": 0.12, "grad_norm": 0.373046875, "learning_rate": 0.0004999804887974407, "loss": 0.2781, "step": 2980 }, { "epoch": 0.12, "grad_norm": 0.84375, "learning_rate": 0.0004999803530695229, "loss": 0.3015, "step": 2990 }, { "epoch": 0.12, "grad_norm": 1.375, "learning_rate": 0.000499980216871169, "loss": 0.3282, "step": 3000 }, { "epoch": 0.12, "grad_norm": 5.09375, "learning_rate": 0.0004999800802023794, "loss": 0.3094, "step": 3010 }, { "epoch": 0.13, "grad_norm": 0.67578125, "learning_rate": 0.0004999799430631542, "loss": 0.281, "step": 3020 }, { "epoch": 0.13, "grad_norm": 1.359375, "learning_rate": 0.0004999798054534937, "loss": 0.2694, "step": 3030 }, { "epoch": 0.13, "grad_norm": 1.1171875, "learning_rate": 0.0004999796673733983, "loss": 0.2808, "step": 3040 }, { "epoch": 0.13, "grad_norm": 0.703125, "learning_rate": 0.0004999795288228682, "loss": 0.2501, "step": 3050 }, { "epoch": 0.13, "grad_norm": 1.3046875, "learning_rate": 0.0004999793898019035, "loss": 0.3492, "step": 3060 }, { "epoch": 0.13, "grad_norm": 0.421875, "learning_rate": 0.0004999792503105048, "loss": 0.219, "step": 3070 }, { "epoch": 0.13, "grad_norm": 0.90625, "learning_rate": 0.0004999791103486719, "loss": 0.2859, "step": 3080 }, { "epoch": 0.13, "grad_norm": 1.0625, "learning_rate": 0.0004999789699164053, "loss": 0.3115, "step": 3090 }, { "epoch": 0.13, "grad_norm": 0.80078125, "learning_rate": 0.0004999788290137054, "loss": 0.3104, "step": 3100 }, { "epoch": 0.13, "grad_norm": 8.4375, "learning_rate": 0.0004999786876405724, "loss": 0.2714, "step": 3110 }, { "epoch": 0.13, "grad_norm": 1.34375, "learning_rate": 0.0004999785457970064, "loss": 0.366, "step": 3120 }, { "epoch": 0.13, "grad_norm": 0.8046875, "learning_rate": 0.0004999784034830078, "loss": 0.3262, "step": 3130 }, { "epoch": 0.13, "grad_norm": 1.2421875, "learning_rate": 0.0004999782606985769, "loss": 0.3268, "step": 3140 }, { "epoch": 0.13, "grad_norm": 0.921875, "learning_rate": 0.0004999781174437138, "loss": 0.2109, "step": 3150 }, { "epoch": 0.13, "grad_norm": 1.125, "learning_rate": 0.0004999779737184189, "loss": 0.2906, "step": 3160 }, { "epoch": 0.13, "grad_norm": 0.455078125, "learning_rate": 0.0004999778295226925, "loss": 0.2829, "step": 3170 }, { "epoch": 0.13, "grad_norm": 0.76953125, "learning_rate": 0.0004999776848565347, "loss": 0.3023, "step": 3180 }, { "epoch": 0.13, "grad_norm": 0.73828125, "learning_rate": 0.000499977539719946, "loss": 0.2116, "step": 3190 }, { "epoch": 0.13, "grad_norm": 0.248046875, "learning_rate": 0.0004999773941129265, "loss": 0.3067, "step": 3200 }, { "epoch": 0.13, "grad_norm": 0.54296875, "learning_rate": 0.0004999772480354766, "loss": 0.3927, "step": 3210 }, { "epoch": 0.13, "grad_norm": 1.203125, "learning_rate": 0.0004999771014875965, "loss": 0.2088, "step": 3220 }, { "epoch": 0.13, "grad_norm": 0.8359375, "learning_rate": 0.0004999769544692866, "loss": 0.2711, "step": 3230 }, { "epoch": 0.13, "grad_norm": 1.171875, "learning_rate": 0.0004999768069805469, "loss": 0.3164, "step": 3240 }, { "epoch": 0.13, "grad_norm": 0.66015625, "learning_rate": 0.0004999766590213779, "loss": 0.2701, "step": 3250 }, { "epoch": 0.14, "grad_norm": 0.67578125, "learning_rate": 0.0004999765105917799, "loss": 0.3645, "step": 3260 }, { "epoch": 0.14, "grad_norm": 1.328125, "learning_rate": 0.000499976361691753, "loss": 0.266, "step": 3270 }, { "epoch": 0.14, "grad_norm": 0.2236328125, "learning_rate": 0.0004999762123212975, "loss": 0.2915, "step": 3280 }, { "epoch": 0.14, "grad_norm": 1.359375, "learning_rate": 0.0004999760624804139, "loss": 0.3049, "step": 3290 }, { "epoch": 0.14, "grad_norm": 0.97265625, "learning_rate": 0.0004999759121691023, "loss": 0.2528, "step": 3300 }, { "epoch": 0.14, "grad_norm": 1.0703125, "learning_rate": 0.0004999757613873631, "loss": 0.2787, "step": 3310 }, { "epoch": 0.14, "grad_norm": 1.609375, "learning_rate": 0.0004999756101351964, "loss": 0.2743, "step": 3320 }, { "epoch": 0.14, "grad_norm": 1.4453125, "learning_rate": 0.0004999754584126027, "loss": 0.3245, "step": 3330 }, { "epoch": 0.14, "grad_norm": 0.9765625, "learning_rate": 0.0004999753062195822, "loss": 0.3649, "step": 3340 }, { "epoch": 0.14, "grad_norm": 1.34375, "learning_rate": 0.0004999751535561351, "loss": 0.3301, "step": 3350 }, { "epoch": 0.14, "grad_norm": 1.078125, "learning_rate": 0.0004999750004222618, "loss": 0.2969, "step": 3360 }, { "epoch": 0.14, "grad_norm": 7.6875, "learning_rate": 0.0004999748468179624, "loss": 0.2987, "step": 3370 }, { "epoch": 0.14, "grad_norm": 0.3359375, "learning_rate": 0.0004999746927432375, "loss": 0.3099, "step": 3380 }, { "epoch": 0.14, "grad_norm": 0.40234375, "learning_rate": 0.0004999745381980872, "loss": 0.3225, "step": 3390 }, { "epoch": 0.14, "grad_norm": 0.765625, "learning_rate": 0.0004999743831825117, "loss": 0.2727, "step": 3400 }, { "epoch": 0.14, "grad_norm": 0.9609375, "learning_rate": 0.0004999742276965114, "loss": 0.3039, "step": 3410 }, { "epoch": 0.14, "grad_norm": 0.703125, "learning_rate": 0.0004999740717400868, "loss": 0.337, "step": 3420 }, { "epoch": 0.14, "grad_norm": 0.380859375, "learning_rate": 0.0004999739153132379, "loss": 0.2486, "step": 3430 }, { "epoch": 0.14, "grad_norm": 0.75390625, "learning_rate": 0.000499973758415965, "loss": 0.3389, "step": 3440 }, { "epoch": 0.14, "grad_norm": 0.478515625, "learning_rate": 0.0004999736010482685, "loss": 0.3683, "step": 3450 }, { "epoch": 0.14, "grad_norm": 1.21875, "learning_rate": 0.0004999734432101487, "loss": 0.3068, "step": 3460 }, { "epoch": 0.14, "grad_norm": 0.365234375, "learning_rate": 0.0004999732849016059, "loss": 0.325, "step": 3470 }, { "epoch": 0.14, "grad_norm": 1.046875, "learning_rate": 0.0004999731261226403, "loss": 0.2552, "step": 3480 }, { "epoch": 0.14, "grad_norm": 0.6484375, "learning_rate": 0.0004999729668732525, "loss": 0.2935, "step": 3490 }, { "epoch": 0.14, "grad_norm": 0.65625, "learning_rate": 0.0004999728071534424, "loss": 0.2868, "step": 3500 }, { "epoch": 0.15, "grad_norm": 1.5078125, "learning_rate": 0.0004999726469632104, "loss": 0.2938, "step": 3510 }, { "epoch": 0.15, "grad_norm": 0.431640625, "learning_rate": 0.000499972486302557, "loss": 0.2528, "step": 3520 }, { "epoch": 0.15, "grad_norm": 1.3359375, "learning_rate": 0.0004999723251714823, "loss": 0.272, "step": 3530 }, { "epoch": 0.15, "grad_norm": 1.0390625, "learning_rate": 0.0004999721635699867, "loss": 0.2603, "step": 3540 }, { "epoch": 0.15, "grad_norm": 0.404296875, "learning_rate": 0.0004999720014980704, "loss": 0.2908, "step": 3550 }, { "epoch": 0.15, "grad_norm": 1.125, "learning_rate": 0.000499971838955734, "loss": 0.3134, "step": 3560 }, { "epoch": 0.15, "grad_norm": 2.515625, "learning_rate": 0.0004999716759429775, "loss": 0.2135, "step": 3570 }, { "epoch": 0.15, "grad_norm": 1.1953125, "learning_rate": 0.0004999715124598013, "loss": 0.2759, "step": 3580 }, { "epoch": 0.15, "grad_norm": 0.435546875, "learning_rate": 0.0004999713485062057, "loss": 0.2508, "step": 3590 }, { "epoch": 0.15, "grad_norm": 0.484375, "learning_rate": 0.0004999711840821909, "loss": 0.3017, "step": 3600 }, { "epoch": 0.15, "grad_norm": 0.81640625, "learning_rate": 0.0004999710191877575, "loss": 0.2752, "step": 3610 }, { "epoch": 0.15, "grad_norm": 1.328125, "learning_rate": 0.0004999708538229055, "loss": 0.2971, "step": 3620 }, { "epoch": 0.15, "grad_norm": 0.8046875, "learning_rate": 0.0004999706879876354, "loss": 0.2524, "step": 3630 }, { "epoch": 0.15, "grad_norm": 2.859375, "learning_rate": 0.0004999705216819475, "loss": 0.2615, "step": 3640 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.000499970354905842, "loss": 0.252, "step": 3650 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.0004999701876593194, "loss": 0.2969, "step": 3660 }, { "epoch": 0.15, "grad_norm": 0.62890625, "learning_rate": 0.0004999700199423798, "loss": 0.3443, "step": 3670 }, { "epoch": 0.15, "grad_norm": 0.4453125, "learning_rate": 0.0004999698517550236, "loss": 0.2812, "step": 3680 }, { "epoch": 0.15, "grad_norm": 0.640625, "learning_rate": 0.0004999696830972511, "loss": 0.2697, "step": 3690 }, { "epoch": 0.15, "grad_norm": 1.0625, "learning_rate": 0.0004999695139690628, "loss": 0.2609, "step": 3700 }, { "epoch": 0.15, "grad_norm": 0.453125, "learning_rate": 0.0004999693443704588, "loss": 0.323, "step": 3710 }, { "epoch": 0.15, "grad_norm": 2.703125, "learning_rate": 0.0004999691743014394, "loss": 0.2663, "step": 3720 }, { "epoch": 0.15, "grad_norm": 0.453125, "learning_rate": 0.0004999690037620052, "loss": 0.3385, "step": 3730 }, { "epoch": 0.15, "grad_norm": 0.875, "learning_rate": 0.0004999688327521562, "loss": 0.2737, "step": 3740 }, { "epoch": 0.16, "grad_norm": 0.484375, "learning_rate": 0.0004999686612718929, "loss": 0.2881, "step": 3750 }, { "epoch": 0.16, "grad_norm": 0.4765625, "learning_rate": 0.0004999684893212155, "loss": 0.2836, "step": 3760 }, { "epoch": 0.16, "grad_norm": 1.140625, "learning_rate": 0.0004999683169001245, "loss": 0.3308, "step": 3770 }, { "epoch": 0.16, "grad_norm": 0.6484375, "learning_rate": 0.00049996814400862, "loss": 0.2909, "step": 3780 }, { "epoch": 0.16, "grad_norm": 0.78515625, "learning_rate": 0.0004999679706467025, "loss": 0.3195, "step": 3790 }, { "epoch": 0.16, "grad_norm": 0.201171875, "learning_rate": 0.0004999677968143723, "loss": 0.2826, "step": 3800 }, { "epoch": 0.16, "grad_norm": 0.7578125, "learning_rate": 0.0004999676225116297, "loss": 0.3067, "step": 3810 }, { "epoch": 0.16, "grad_norm": 0.96875, "learning_rate": 0.000499967447738475, "loss": 0.375, "step": 3820 }, { "epoch": 0.16, "grad_norm": 1.03125, "learning_rate": 0.0004999672724949086, "loss": 0.3255, "step": 3830 }, { "epoch": 0.16, "grad_norm": 1.6484375, "learning_rate": 0.0004999670967809307, "loss": 0.2972, "step": 3840 }, { "epoch": 0.16, "grad_norm": 0.0, "learning_rate": 0.0004999669205965418, "loss": 0.2676, "step": 3850 }, { "epoch": 0.16, "grad_norm": 0.703125, "learning_rate": 0.0004999667439417421, "loss": 0.3539, "step": 3860 }, { "epoch": 0.16, "grad_norm": 1.296875, "learning_rate": 0.0004999665668165321, "loss": 0.3361, "step": 3870 }, { "epoch": 0.16, "grad_norm": 1.0, "learning_rate": 0.0004999663892209119, "loss": 0.3094, "step": 3880 }, { "epoch": 0.16, "grad_norm": 0.9296875, "learning_rate": 0.0004999662111548819, "loss": 0.2927, "step": 3890 }, { "epoch": 0.16, "grad_norm": 0.447265625, "learning_rate": 0.0004999660326184427, "loss": 0.2805, "step": 3900 }, { "epoch": 0.16, "grad_norm": 1.640625, "learning_rate": 0.0004999658536115942, "loss": 0.2376, "step": 3910 }, { "epoch": 0.16, "grad_norm": 1.328125, "learning_rate": 0.000499965674134337, "loss": 0.2676, "step": 3920 }, { "epoch": 0.16, "grad_norm": 1.1171875, "learning_rate": 0.0004999654941866715, "loss": 0.311, "step": 3930 }, { "epoch": 0.16, "grad_norm": 0.77734375, "learning_rate": 0.0004999653137685979, "loss": 0.2543, "step": 3940 }, { "epoch": 0.16, "grad_norm": 0.84375, "learning_rate": 0.0004999651328801164, "loss": 0.2481, "step": 3950 }, { "epoch": 0.16, "grad_norm": 0.72265625, "learning_rate": 0.0004999649515212277, "loss": 0.2252, "step": 3960 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.0004999647696919319, "loss": 0.343, "step": 3970 }, { "epoch": 0.16, "grad_norm": 0.578125, "learning_rate": 0.0004999645873922295, "loss": 0.2701, "step": 3980 }, { "epoch": 0.17, "grad_norm": 0.5078125, "learning_rate": 0.0004999644046221205, "loss": 0.35, "step": 3990 }, { "epoch": 0.17, "grad_norm": 1.3046875, "learning_rate": 0.0004999642213816057, "loss": 0.2573, "step": 4000 }, { "epoch": 0.17, "grad_norm": 0.3984375, "learning_rate": 0.0004999640376706852, "loss": 0.3026, "step": 4010 }, { "epoch": 0.17, "grad_norm": 1.5, "learning_rate": 0.0004999638534893593, "loss": 0.2408, "step": 4020 }, { "epoch": 0.17, "grad_norm": 1.53125, "learning_rate": 0.0004999636688376285, "loss": 0.278, "step": 4030 }, { "epoch": 0.17, "grad_norm": 0.29296875, "learning_rate": 0.0004999634837154931, "loss": 0.2663, "step": 4040 }, { "epoch": 0.17, "grad_norm": 1.3984375, "learning_rate": 0.0004999632981229533, "loss": 0.234, "step": 4050 }, { "epoch": 0.17, "grad_norm": 0.83203125, "learning_rate": 0.0004999631120600096, "loss": 0.2501, "step": 4060 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.0004999629255266623, "loss": 0.2926, "step": 4070 }, { "epoch": 0.17, "grad_norm": 0.96875, "learning_rate": 0.0004999627385229118, "loss": 0.3074, "step": 4080 }, { "epoch": 0.17, "grad_norm": 0.291015625, "learning_rate": 0.0004999625510487584, "loss": 0.2032, "step": 4090 }, { "epoch": 0.17, "grad_norm": 0.58984375, "learning_rate": 0.0004999623631042025, "loss": 0.2784, "step": 4100 }, { "epoch": 0.17, "grad_norm": 1.1953125, "learning_rate": 0.0004999621746892445, "loss": 0.3041, "step": 4110 }, { "epoch": 0.17, "grad_norm": 0.625, "learning_rate": 0.0004999619858038845, "loss": 0.2728, "step": 4120 }, { "epoch": 0.17, "grad_norm": 0.8046875, "learning_rate": 0.0004999617964481231, "loss": 0.3083, "step": 4130 }, { "epoch": 0.17, "grad_norm": 0.376953125, "learning_rate": 0.0004999616066219606, "loss": 0.2734, "step": 4140 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.0004999614163253974, "loss": 0.2871, "step": 4150 }, { "epoch": 0.17, "grad_norm": 1.4140625, "learning_rate": 0.0004999612255584338, "loss": 0.2708, "step": 4160 }, { "epoch": 0.17, "grad_norm": 0.79296875, "learning_rate": 0.0004999610343210701, "loss": 0.2589, "step": 4170 }, { "epoch": 0.17, "grad_norm": 0.5234375, "learning_rate": 0.0004999608426133069, "loss": 0.2684, "step": 4180 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.0004999606504351441, "loss": 0.2734, "step": 4190 }, { "epoch": 0.17, "grad_norm": 0.8125, "learning_rate": 0.0004999604577865827, "loss": 0.3128, "step": 4200 }, { "epoch": 0.17, "grad_norm": 0.345703125, "learning_rate": 0.0004999602646676225, "loss": 0.2464, "step": 4210 }, { "epoch": 0.17, "grad_norm": 0.78125, "learning_rate": 0.0004999600710782641, "loss": 0.3485, "step": 4220 }, { "epoch": 0.18, "grad_norm": 0.875, "learning_rate": 0.0004999598770185077, "loss": 0.2814, "step": 4230 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.000499959682488354, "loss": 0.3007, "step": 4240 }, { "epoch": 0.18, "grad_norm": 0.81640625, "learning_rate": 0.0004999594874878031, "loss": 0.2577, "step": 4250 }, { "epoch": 0.18, "grad_norm": 0.67578125, "learning_rate": 0.0004999592920168555, "loss": 0.2729, "step": 4260 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.0004999590960755114, "loss": 0.2438, "step": 4270 }, { "epoch": 0.18, "grad_norm": 0.2578125, "learning_rate": 0.0004999588996637714, "loss": 0.2645, "step": 4280 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.0004999587027816356, "loss": 0.2711, "step": 4290 }, { "epoch": 0.18, "grad_norm": 1.2109375, "learning_rate": 0.0004999585054291046, "loss": 0.3607, "step": 4300 }, { "epoch": 0.18, "grad_norm": 0.455078125, "learning_rate": 0.0004999583076061787, "loss": 0.3275, "step": 4310 }, { "epoch": 0.18, "grad_norm": 0.84375, "learning_rate": 0.0004999581093128582, "loss": 0.2892, "step": 4320 }, { "epoch": 0.18, "grad_norm": 0.328125, "learning_rate": 0.0004999579105491437, "loss": 0.2947, "step": 4330 }, { "epoch": 0.18, "grad_norm": 0.7578125, "learning_rate": 0.0004999577113150352, "loss": 0.3011, "step": 4340 }, { "epoch": 0.18, "grad_norm": 0.640625, "learning_rate": 0.0004999575116105333, "loss": 0.2475, "step": 4350 }, { "epoch": 0.18, "grad_norm": 0.97265625, "learning_rate": 0.0004999573114356384, "loss": 0.2808, "step": 4360 }, { "epoch": 0.18, "grad_norm": 0.70703125, "learning_rate": 0.0004999571107903508, "loss": 0.3107, "step": 4370 }, { "epoch": 0.18, "grad_norm": 0.48046875, "learning_rate": 0.000499956909674671, "loss": 0.2195, "step": 4380 }, { "epoch": 0.18, "grad_norm": 0.84375, "learning_rate": 0.0004999567080885992, "loss": 0.2721, "step": 4390 }, { "epoch": 0.18, "grad_norm": 1.125, "learning_rate": 0.000499956506032136, "loss": 0.3345, "step": 4400 }, { "epoch": 0.18, "grad_norm": 0.490234375, "learning_rate": 0.0004999563035052815, "loss": 0.2697, "step": 4410 }, { "epoch": 0.18, "grad_norm": 0.7109375, "learning_rate": 0.0004999561005080363, "loss": 0.2423, "step": 4420 }, { "epoch": 0.18, "grad_norm": 0.7578125, "learning_rate": 0.0004999558970404007, "loss": 0.2606, "step": 4430 }, { "epoch": 0.18, "grad_norm": 0.1845703125, "learning_rate": 0.0004999556931023751, "loss": 0.2731, "step": 4440 }, { "epoch": 0.18, "grad_norm": 1.1796875, "learning_rate": 0.0004999554886939599, "loss": 0.2086, "step": 4450 }, { "epoch": 0.18, "grad_norm": 0.8515625, "learning_rate": 0.0004999552838151555, "loss": 0.2924, "step": 4460 }, { "epoch": 0.19, "grad_norm": 0.78125, "learning_rate": 0.0004999550784659621, "loss": 0.2017, "step": 4470 }, { "epoch": 0.19, "grad_norm": 1.25, "learning_rate": 0.0004999548726463803, "loss": 0.2898, "step": 4480 }, { "epoch": 0.19, "grad_norm": 1.0234375, "learning_rate": 0.0004999546663564104, "loss": 0.2527, "step": 4490 }, { "epoch": 0.19, "grad_norm": 0.72265625, "learning_rate": 0.0004999544595960529, "loss": 0.2789, "step": 4500 }, { "epoch": 0.19, "grad_norm": 1.078125, "learning_rate": 0.0004999542523653081, "loss": 0.2697, "step": 4510 }, { "epoch": 0.19, "grad_norm": 0.57421875, "learning_rate": 0.0004999540446641764, "loss": 0.2611, "step": 4520 }, { "epoch": 0.19, "grad_norm": 0.52734375, "learning_rate": 0.000499953836492658, "loss": 0.2665, "step": 4530 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.0004999536278507536, "loss": 0.2592, "step": 4540 }, { "epoch": 0.19, "grad_norm": 0.69140625, "learning_rate": 0.0004999534187384634, "loss": 0.2374, "step": 4550 }, { "epoch": 0.19, "grad_norm": 1.390625, "learning_rate": 0.000499953209155788, "loss": 0.2703, "step": 4560 }, { "epoch": 0.19, "grad_norm": 0.86328125, "learning_rate": 0.0004999529991027275, "loss": 0.2952, "step": 4570 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.0004999527885792826, "loss": 0.2998, "step": 4580 }, { "epoch": 0.19, "grad_norm": 0.54296875, "learning_rate": 0.0004999525775854534, "loss": 0.2716, "step": 4590 }, { "epoch": 0.19, "grad_norm": 0.765625, "learning_rate": 0.0004999523661212405, "loss": 0.2741, "step": 4600 }, { "epoch": 0.19, "grad_norm": 0.625, "learning_rate": 0.0004999521541866443, "loss": 0.3269, "step": 4610 }, { "epoch": 0.19, "grad_norm": 0.6484375, "learning_rate": 0.0004999519417816651, "loss": 0.325, "step": 4620 }, { "epoch": 0.19, "grad_norm": 0.76171875, "learning_rate": 0.0004999517289063033, "loss": 0.2495, "step": 4630 }, { "epoch": 0.19, "grad_norm": 0.83984375, "learning_rate": 0.0004999515155605594, "loss": 0.2341, "step": 4640 }, { "epoch": 0.19, "grad_norm": 0.41015625, "learning_rate": 0.0004999513017444337, "loss": 0.2627, "step": 4650 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.0004999510874579266, "loss": 0.2857, "step": 4660 }, { "epoch": 0.19, "grad_norm": 2.703125, "learning_rate": 0.0004999508727010386, "loss": 0.3004, "step": 4670 }, { "epoch": 0.19, "grad_norm": 1.375, "learning_rate": 0.0004999506574737701, "loss": 0.2956, "step": 4680 }, { "epoch": 0.19, "grad_norm": 0.54296875, "learning_rate": 0.0004999504417761214, "loss": 0.2723, "step": 4690 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.0004999502256080928, "loss": 0.2646, "step": 4700 }, { "epoch": 0.2, "grad_norm": 0.3046875, "learning_rate": 0.0004999500089696851, "loss": 0.2751, "step": 4710 }, { "epoch": 0.2, "grad_norm": 0.83203125, "learning_rate": 0.0004999497918608984, "loss": 0.3105, "step": 4720 }, { "epoch": 0.2, "grad_norm": 0.70703125, "learning_rate": 0.0004999495742817332, "loss": 0.2986, "step": 4730 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.0004999493562321899, "loss": 0.2921, "step": 4740 }, { "epoch": 0.2, "grad_norm": 0.7734375, "learning_rate": 0.0004999491377122689, "loss": 0.2576, "step": 4750 }, { "epoch": 0.2, "grad_norm": 0.296875, "learning_rate": 0.0004999489187219705, "loss": 0.2669, "step": 4760 }, { "epoch": 0.2, "grad_norm": 1.1328125, "learning_rate": 0.0004999486992612954, "loss": 0.3043, "step": 4770 }, { "epoch": 0.2, "grad_norm": 1.1640625, "learning_rate": 0.0004999484793302437, "loss": 0.2603, "step": 4780 }, { "epoch": 0.2, "grad_norm": 0.2080078125, "learning_rate": 0.0004999482589288161, "loss": 0.2197, "step": 4790 }, { "epoch": 0.2, "grad_norm": 0.40234375, "learning_rate": 0.0004999480380570127, "loss": 0.2672, "step": 4800 }, { "epoch": 0.2, "grad_norm": 0.96875, "learning_rate": 0.0004999478167148342, "loss": 0.2163, "step": 4810 }, { "epoch": 0.2, "grad_norm": 0.62890625, "learning_rate": 0.0004999475949022809, "loss": 0.2583, "step": 4820 }, { "epoch": 0.2, "grad_norm": 1.03125, "learning_rate": 0.0004999473726193532, "loss": 0.2959, "step": 4830 }, { "epoch": 0.2, "grad_norm": 0.40625, "learning_rate": 0.0004999471498660515, "loss": 0.2997, "step": 4840 }, { "epoch": 0.2, "grad_norm": 0.5390625, "learning_rate": 0.0004999469266423762, "loss": 0.2495, "step": 4850 }, { "epoch": 0.2, "grad_norm": 0.498046875, "learning_rate": 0.0004999467029483279, "loss": 0.2332, "step": 4860 }, { "epoch": 0.2, "grad_norm": 1.4921875, "learning_rate": 0.0004999464787839069, "loss": 0.3074, "step": 4870 }, { "epoch": 0.2, "grad_norm": 1.0078125, "learning_rate": 0.0004999462541491136, "loss": 0.2415, "step": 4880 }, { "epoch": 0.2, "grad_norm": 1.21875, "learning_rate": 0.0004999460290439484, "loss": 0.3081, "step": 4890 }, { "epoch": 0.2, "grad_norm": 0.83203125, "learning_rate": 0.0004999458034684117, "loss": 0.2545, "step": 4900 }, { "epoch": 0.2, "grad_norm": 1.109375, "learning_rate": 0.0004999455774225041, "loss": 0.3227, "step": 4910 }, { "epoch": 0.2, "grad_norm": 0.2490234375, "learning_rate": 0.0004999453509062259, "loss": 0.2479, "step": 4920 }, { "epoch": 0.2, "grad_norm": 0.37109375, "learning_rate": 0.0004999451239195775, "loss": 0.2326, "step": 4930 }, { "epoch": 0.2, "grad_norm": 0.4453125, "learning_rate": 0.0004999448964625593, "loss": 0.2613, "step": 4940 }, { "epoch": 0.21, "grad_norm": 0.63671875, "learning_rate": 0.000499944668535172, "loss": 0.2278, "step": 4950 }, { "epoch": 0.21, "grad_norm": 0.56640625, "learning_rate": 0.0004999444401374157, "loss": 0.2715, "step": 4960 }, { "epoch": 0.21, "grad_norm": 1.3515625, "learning_rate": 0.000499944211269291, "loss": 0.2595, "step": 4970 }, { "epoch": 0.21, "grad_norm": 0.64453125, "learning_rate": 0.0004999439819307983, "loss": 0.2903, "step": 4980 }, { "epoch": 0.21, "grad_norm": 0.6328125, "learning_rate": 0.0004999437521219378, "loss": 0.2561, "step": 4990 }, { "epoch": 0.21, "grad_norm": 1.015625, "learning_rate": 0.0004999435218427104, "loss": 0.2737, "step": 5000 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.0004999432910931162, "loss": 0.2805, "step": 5010 }, { "epoch": 0.21, "grad_norm": 0.79296875, "learning_rate": 0.0004999430598731558, "loss": 0.2532, "step": 5020 }, { "epoch": 0.21, "grad_norm": 0.77734375, "learning_rate": 0.0004999428281828295, "loss": 0.2889, "step": 5030 }, { "epoch": 0.21, "grad_norm": 0.921875, "learning_rate": 0.0004999425960221378, "loss": 0.2324, "step": 5040 }, { "epoch": 0.21, "grad_norm": 0.6171875, "learning_rate": 0.000499942363391081, "loss": 0.2554, "step": 5050 }, { "epoch": 0.21, "grad_norm": 0.62109375, "learning_rate": 0.0004999421302896598, "loss": 0.2961, "step": 5060 }, { "epoch": 0.21, "grad_norm": 0.58984375, "learning_rate": 0.0004999418967178744, "loss": 0.2982, "step": 5070 }, { "epoch": 0.21, "grad_norm": 1.078125, "learning_rate": 0.0004999416626757253, "loss": 0.2943, "step": 5080 }, { "epoch": 0.21, "grad_norm": 0.79296875, "learning_rate": 0.0004999414281632132, "loss": 0.2682, "step": 5090 }, { "epoch": 0.21, "grad_norm": 1.09375, "learning_rate": 0.0004999411931803382, "loss": 0.2447, "step": 5100 }, { "epoch": 0.21, "grad_norm": 0.62109375, "learning_rate": 0.0004999409577271009, "loss": 0.2873, "step": 5110 }, { "epoch": 0.21, "grad_norm": 1.1875, "learning_rate": 0.0004999407218035017, "loss": 0.3185, "step": 5120 }, { "epoch": 0.21, "grad_norm": 0.6796875, "learning_rate": 0.000499940485409541, "loss": 0.2467, "step": 5130 }, { "epoch": 0.21, "grad_norm": 0.48046875, "learning_rate": 0.0004999402485452194, "loss": 0.2611, "step": 5140 }, { "epoch": 0.21, "grad_norm": 0.80859375, "learning_rate": 0.0004999400112105371, "loss": 0.203, "step": 5150 }, { "epoch": 0.21, "grad_norm": 0.52734375, "learning_rate": 0.0004999397734054948, "loss": 0.3108, "step": 5160 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.0004999395351300928, "loss": 0.2525, "step": 5170 }, { "epoch": 0.21, "grad_norm": 0.93359375, "learning_rate": 0.0004999392963843316, "loss": 0.233, "step": 5180 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.0004999390571682116, "loss": 0.282, "step": 5190 }, { "epoch": 0.22, "grad_norm": 1.1171875, "learning_rate": 0.0004999388174817334, "loss": 0.3435, "step": 5200 }, { "epoch": 0.22, "grad_norm": 0.33984375, "learning_rate": 0.0004999385773248971, "loss": 0.3237, "step": 5210 }, { "epoch": 0.22, "grad_norm": 2.4375, "learning_rate": 0.0004999383366977036, "loss": 0.2247, "step": 5220 }, { "epoch": 0.22, "grad_norm": 1.09375, "learning_rate": 0.000499938095600153, "loss": 0.2347, "step": 5230 }, { "epoch": 0.22, "grad_norm": 0.96484375, "learning_rate": 0.000499937854032246, "loss": 0.2529, "step": 5240 }, { "epoch": 0.22, "grad_norm": 0.78515625, "learning_rate": 0.0004999376119939829, "loss": 0.2809, "step": 5250 }, { "epoch": 0.22, "grad_norm": 0.9140625, "learning_rate": 0.0004999373694853643, "loss": 0.349, "step": 5260 }, { "epoch": 0.22, "grad_norm": 0.55859375, "learning_rate": 0.0004999371265063904, "loss": 0.2144, "step": 5270 }, { "epoch": 0.22, "grad_norm": 0.0, "learning_rate": 0.000499936883057062, "loss": 0.2463, "step": 5280 }, { "epoch": 0.22, "grad_norm": 0.3984375, "learning_rate": 0.0004999366391373793, "loss": 0.213, "step": 5290 }, { "epoch": 0.22, "grad_norm": 0.51171875, "learning_rate": 0.0004999363947473428, "loss": 0.2825, "step": 5300 }, { "epoch": 0.22, "grad_norm": 0.9140625, "learning_rate": 0.000499936149886953, "loss": 0.2826, "step": 5310 }, { "epoch": 0.22, "grad_norm": 0.61328125, "learning_rate": 0.0004999359045562104, "loss": 0.2517, "step": 5320 }, { "epoch": 0.22, "grad_norm": 0.56640625, "learning_rate": 0.0004999356587551154, "loss": 0.2937, "step": 5330 }, { "epoch": 0.22, "grad_norm": 0.287109375, "learning_rate": 0.0004999354124836684, "loss": 0.2733, "step": 5340 }, { "epoch": 0.22, "grad_norm": 1.2109375, "learning_rate": 0.0004999351657418701, "loss": 0.3671, "step": 5350 }, { "epoch": 0.22, "grad_norm": 0.7265625, "learning_rate": 0.0004999349185297207, "loss": 0.3006, "step": 5360 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.0004999346708472208, "loss": 0.2936, "step": 5370 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.0004999344226943708, "loss": 0.3282, "step": 5380 }, { "epoch": 0.22, "grad_norm": 0.6328125, "learning_rate": 0.0004999341740711713, "loss": 0.233, "step": 5390 }, { "epoch": 0.22, "grad_norm": 1.1796875, "learning_rate": 0.0004999339249776225, "loss": 0.2972, "step": 5400 }, { "epoch": 0.22, "grad_norm": 1.484375, "learning_rate": 0.0004999336754137252, "loss": 0.2508, "step": 5410 }, { "epoch": 0.22, "grad_norm": 0.478515625, "learning_rate": 0.0004999334253794797, "loss": 0.3503, "step": 5420 }, { "epoch": 0.22, "grad_norm": 0.275390625, "learning_rate": 0.0004999331748748864, "loss": 0.2447, "step": 5430 }, { "epoch": 0.23, "grad_norm": 1.25, "learning_rate": 0.0004999329238999459, "loss": 0.2452, "step": 5440 }, { "epoch": 0.23, "grad_norm": 0.59765625, "learning_rate": 0.0004999326724546587, "loss": 0.2644, "step": 5450 }, { "epoch": 0.23, "grad_norm": 0.8125, "learning_rate": 0.0004999324205390252, "loss": 0.3061, "step": 5460 }, { "epoch": 0.23, "grad_norm": 0.75, "learning_rate": 0.0004999321681530458, "loss": 0.257, "step": 5470 }, { "epoch": 0.23, "grad_norm": 0.56640625, "learning_rate": 0.000499931915296721, "loss": 0.267, "step": 5480 }, { "epoch": 0.23, "grad_norm": 1.3984375, "learning_rate": 0.0004999316619700515, "loss": 0.252, "step": 5490 }, { "epoch": 0.23, "grad_norm": 1.4453125, "learning_rate": 0.0004999314081730374, "loss": 0.2536, "step": 5500 }, { "epoch": 0.23, "grad_norm": 1.1328125, "learning_rate": 0.0004999311539056796, "loss": 0.2684, "step": 5510 }, { "epoch": 0.23, "grad_norm": 0.1884765625, "learning_rate": 0.0004999308991679781, "loss": 0.2814, "step": 5520 }, { "epoch": 0.23, "grad_norm": 0.65625, "learning_rate": 0.0004999306439599338, "loss": 0.2445, "step": 5530 }, { "epoch": 0.23, "grad_norm": 0.69140625, "learning_rate": 0.000499930388281547, "loss": 0.3406, "step": 5540 }, { "epoch": 0.23, "grad_norm": 2.84375, "learning_rate": 0.0004999301321328182, "loss": 0.2602, "step": 5550 }, { "epoch": 0.23, "grad_norm": 1.75, "learning_rate": 0.000499929875513748, "loss": 0.2742, "step": 5560 }, { "epoch": 0.23, "grad_norm": 0.6875, "learning_rate": 0.0004999296184243365, "loss": 0.2872, "step": 5570 }, { "epoch": 0.23, "grad_norm": 0.76953125, "learning_rate": 0.0004999293608645846, "loss": 0.2783, "step": 5580 }, { "epoch": 0.23, "grad_norm": 0.6875, "learning_rate": 0.0004999291028344926, "loss": 0.2612, "step": 5590 }, { "epoch": 0.23, "grad_norm": 3.296875, "learning_rate": 0.0004999288443340611, "loss": 0.2853, "step": 5600 }, { "epoch": 0.23, "grad_norm": 1.46875, "learning_rate": 0.0004999285853632905, "loss": 0.2347, "step": 5610 }, { "epoch": 0.23, "grad_norm": 0.88671875, "learning_rate": 0.0004999283259221811, "loss": 0.2836, "step": 5620 }, { "epoch": 0.23, "grad_norm": 1.0703125, "learning_rate": 0.0004999280660107337, "loss": 0.2657, "step": 5630 }, { "epoch": 0.23, "grad_norm": 1.5234375, "learning_rate": 0.0004999278056289487, "loss": 0.2512, "step": 5640 }, { "epoch": 0.23, "grad_norm": 4.0625, "learning_rate": 0.0004999275447768266, "loss": 0.2362, "step": 5650 }, { "epoch": 0.23, "grad_norm": 0.4453125, "learning_rate": 0.0004999272834543678, "loss": 0.2655, "step": 5660 }, { "epoch": 0.23, "grad_norm": 1.140625, "learning_rate": 0.0004999270216615728, "loss": 0.2737, "step": 5670 }, { "epoch": 0.24, "grad_norm": 0.45703125, "learning_rate": 0.0004999267593984422, "loss": 0.2887, "step": 5680 }, { "epoch": 0.24, "grad_norm": 0.38671875, "learning_rate": 0.0004999264966649763, "loss": 0.266, "step": 5690 }, { "epoch": 0.24, "grad_norm": 1.8046875, "learning_rate": 0.0004999262334611759, "loss": 0.2254, "step": 5700 }, { "epoch": 0.24, "grad_norm": 1.0234375, "learning_rate": 0.0004999259697870413, "loss": 0.3001, "step": 5710 }, { "epoch": 0.24, "grad_norm": 0.345703125, "learning_rate": 0.0004999257056425729, "loss": 0.2493, "step": 5720 }, { "epoch": 0.24, "grad_norm": 0.828125, "learning_rate": 0.0004999254410277714, "loss": 0.3177, "step": 5730 }, { "epoch": 0.24, "grad_norm": 1.3125, "learning_rate": 0.0004999251759426372, "loss": 0.2835, "step": 5740 }, { "epoch": 0.24, "grad_norm": 1.1328125, "learning_rate": 0.0004999249103871707, "loss": 0.199, "step": 5750 }, { "epoch": 0.24, "grad_norm": 0.67578125, "learning_rate": 0.0004999246443613726, "loss": 0.3123, "step": 5760 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.0004999243778652433, "loss": 0.2364, "step": 5770 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.0004999241108987833, "loss": 0.3185, "step": 5780 }, { "epoch": 0.24, "grad_norm": 1.015625, "learning_rate": 0.0004999238434619932, "loss": 0.2743, "step": 5790 }, { "epoch": 0.24, "grad_norm": 0.84375, "learning_rate": 0.0004999235755548733, "loss": 0.2819, "step": 5800 }, { "epoch": 0.24, "grad_norm": 0.828125, "learning_rate": 0.0004999233071774243, "loss": 0.2655, "step": 5810 }, { "epoch": 0.24, "grad_norm": 0.69921875, "learning_rate": 0.0004999230383296466, "loss": 0.2494, "step": 5820 }, { "epoch": 0.24, "grad_norm": 0.62890625, "learning_rate": 0.0004999227690115407, "loss": 0.2733, "step": 5830 }, { "epoch": 0.24, "grad_norm": 1.359375, "learning_rate": 0.0004999224992231072, "loss": 0.275, "step": 5840 }, { "epoch": 0.24, "grad_norm": 0.65234375, "learning_rate": 0.0004999222289643465, "loss": 0.2669, "step": 5850 }, { "epoch": 0.24, "grad_norm": 0.81640625, "learning_rate": 0.0004999219582352591, "loss": 0.2788, "step": 5860 }, { "epoch": 0.24, "grad_norm": 0.29296875, "learning_rate": 0.0004999216870358456, "loss": 0.2962, "step": 5870 }, { "epoch": 0.24, "grad_norm": 0.96484375, "learning_rate": 0.0004999214153661065, "loss": 0.3491, "step": 5880 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.0004999211432260423, "loss": 0.2553, "step": 5890 }, { "epoch": 0.24, "grad_norm": 1.3359375, "learning_rate": 0.0004999208706156535, "loss": 0.2887, "step": 5900 }, { "epoch": 0.24, "grad_norm": 0.85546875, "learning_rate": 0.0004999205975349405, "loss": 0.2612, "step": 5910 }, { "epoch": 0.25, "grad_norm": 0.9375, "learning_rate": 0.0004999203239839041, "loss": 0.2811, "step": 5920 }, { "epoch": 0.25, "grad_norm": 0.796875, "learning_rate": 0.0004999200499625446, "loss": 0.2453, "step": 5930 }, { "epoch": 0.25, "grad_norm": 0.416015625, "learning_rate": 0.0004999197754708625, "loss": 0.2228, "step": 5940 }, { "epoch": 0.25, "grad_norm": 0.46875, "learning_rate": 0.0004999195005088584, "loss": 0.2179, "step": 5950 }, { "epoch": 0.25, "grad_norm": 0.83203125, "learning_rate": 0.0004999192250765328, "loss": 0.2533, "step": 5960 }, { "epoch": 0.25, "grad_norm": 0.2490234375, "learning_rate": 0.0004999189491738861, "loss": 0.2556, "step": 5970 }, { "epoch": 0.25, "grad_norm": 1.1328125, "learning_rate": 0.000499918672800919, "loss": 0.2838, "step": 5980 }, { "epoch": 0.25, "grad_norm": 1.4140625, "learning_rate": 0.0004999183959576319, "loss": 0.3194, "step": 5990 }, { "epoch": 0.25, "grad_norm": 0.8125, "learning_rate": 0.0004999181186440255, "loss": 0.2582, "step": 6000 }, { "epoch": 0.25, "grad_norm": 0.5546875, "learning_rate": 0.0004999178408601001, "loss": 0.2744, "step": 6010 }, { "epoch": 0.25, "grad_norm": 0.0, "learning_rate": 0.0004999175626058563, "loss": 0.2508, "step": 6020 }, { "epoch": 0.25, "grad_norm": 0.859375, "learning_rate": 0.0004999172838812947, "loss": 0.2944, "step": 6030 }, { "epoch": 0.25, "grad_norm": 0.66015625, "learning_rate": 0.0004999170046864156, "loss": 0.2641, "step": 6040 }, { "epoch": 0.25, "grad_norm": 0.71484375, "learning_rate": 0.0004999167250212199, "loss": 0.2694, "step": 6050 }, { "epoch": 0.25, "grad_norm": 0.37890625, "learning_rate": 0.0004999164448857078, "loss": 0.2722, "step": 6060 }, { "epoch": 0.25, "grad_norm": 0.97265625, "learning_rate": 0.0004999161642798799, "loss": 0.328, "step": 6070 }, { "epoch": 0.25, "grad_norm": 0.3046875, "learning_rate": 0.0004999158832037368, "loss": 0.2708, "step": 6080 }, { "epoch": 0.25, "grad_norm": 0.5390625, "learning_rate": 0.0004999156016572791, "loss": 0.2589, "step": 6090 }, { "epoch": 0.25, "grad_norm": 0.5625, "learning_rate": 0.0004999153196405071, "loss": 0.2697, "step": 6100 }, { "epoch": 0.25, "grad_norm": 0.62109375, "learning_rate": 0.0004999150371534215, "loss": 0.2656, "step": 6110 }, { "epoch": 0.25, "grad_norm": 0.234375, "learning_rate": 0.0004999147541960228, "loss": 0.2574, "step": 6120 }, { "epoch": 0.25, "grad_norm": 0.47265625, "learning_rate": 0.0004999144707683116, "loss": 0.2504, "step": 6130 }, { "epoch": 0.25, "grad_norm": 0.94140625, "learning_rate": 0.0004999141868702882, "loss": 0.283, "step": 6140 }, { "epoch": 0.25, "grad_norm": 0.330078125, "learning_rate": 0.0004999139025019533, "loss": 0.2157, "step": 6150 }, { "epoch": 0.26, "grad_norm": 2.453125, "learning_rate": 0.0004999136176633075, "loss": 0.1966, "step": 6160 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.0004999133323543512, "loss": 0.2488, "step": 6170 }, { "epoch": 0.26, "grad_norm": 0.55078125, "learning_rate": 0.0004999130465750851, "loss": 0.2782, "step": 6180 }, { "epoch": 0.26, "grad_norm": 1.3046875, "learning_rate": 0.0004999127603255095, "loss": 0.249, "step": 6190 }, { "epoch": 0.26, "grad_norm": 0.9453125, "learning_rate": 0.0004999124736056252, "loss": 0.2818, "step": 6200 }, { "epoch": 0.26, "grad_norm": 0.9921875, "learning_rate": 0.0004999121864154325, "loss": 0.2484, "step": 6210 }, { "epoch": 0.26, "grad_norm": 1.0546875, "learning_rate": 0.0004999118987549321, "loss": 0.3662, "step": 6220 }, { "epoch": 0.26, "grad_norm": 0.94921875, "learning_rate": 0.0004999116106241245, "loss": 0.2629, "step": 6230 }, { "epoch": 0.26, "grad_norm": 0.90234375, "learning_rate": 0.0004999113220230103, "loss": 0.296, "step": 6240 }, { "epoch": 0.26, "grad_norm": 0.61328125, "learning_rate": 0.0004999110329515899, "loss": 0.2875, "step": 6250 }, { "epoch": 0.26, "grad_norm": 0.69921875, "learning_rate": 0.0004999107434098639, "loss": 0.3191, "step": 6260 }, { "epoch": 0.26, "grad_norm": 0.51953125, "learning_rate": 0.000499910453397833, "loss": 0.2137, "step": 6270 }, { "epoch": 0.26, "grad_norm": 0.49609375, "learning_rate": 0.0004999101629154975, "loss": 0.3463, "step": 6280 }, { "epoch": 0.26, "grad_norm": 0.96484375, "learning_rate": 0.0004999098719628581, "loss": 0.2234, "step": 6290 }, { "epoch": 0.26, "grad_norm": 0.98046875, "learning_rate": 0.0004999095805399153, "loss": 0.3228, "step": 6300 }, { "epoch": 0.26, "grad_norm": 0.341796875, "learning_rate": 0.0004999092886466696, "loss": 0.3047, "step": 6310 }, { "epoch": 0.26, "grad_norm": 1.0859375, "learning_rate": 0.0004999089962831217, "loss": 0.256, "step": 6320 }, { "epoch": 0.26, "grad_norm": 0.294921875, "learning_rate": 0.000499908703449272, "loss": 0.2991, "step": 6330 }, { "epoch": 0.26, "grad_norm": 0.228515625, "learning_rate": 0.0004999084101451211, "loss": 0.256, "step": 6340 }, { "epoch": 0.26, "grad_norm": 0.484375, "learning_rate": 0.0004999081163706696, "loss": 0.229, "step": 6350 }, { "epoch": 0.26, "grad_norm": 0.88671875, "learning_rate": 0.000499907822125918, "loss": 0.2649, "step": 6360 }, { "epoch": 0.26, "grad_norm": 0.578125, "learning_rate": 0.0004999075274108669, "loss": 0.2622, "step": 6370 }, { "epoch": 0.26, "grad_norm": 0.67578125, "learning_rate": 0.0004999072322255167, "loss": 0.2987, "step": 6380 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.0004999069365698681, "loss": 0.2557, "step": 6390 }, { "epoch": 0.27, "grad_norm": 0.48046875, "learning_rate": 0.0004999066404439218, "loss": 0.299, "step": 6400 }, { "epoch": 0.27, "grad_norm": 0.58984375, "learning_rate": 0.000499906343847678, "loss": 0.2982, "step": 6410 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.0004999060467811376, "loss": 0.2717, "step": 6420 }, { "epoch": 0.27, "grad_norm": 0.72265625, "learning_rate": 0.000499905749244301, "loss": 0.2326, "step": 6430 }, { "epoch": 0.27, "grad_norm": 0.56640625, "learning_rate": 0.0004999054512371686, "loss": 0.2836, "step": 6440 }, { "epoch": 0.27, "grad_norm": 0.734375, "learning_rate": 0.0004999051527597413, "loss": 0.2804, "step": 6450 }, { "epoch": 0.27, "grad_norm": 0.62109375, "learning_rate": 0.0004999048538120195, "loss": 0.2319, "step": 6460 }, { "epoch": 0.27, "grad_norm": 1.2109375, "learning_rate": 0.0004999045543940036, "loss": 0.2958, "step": 6470 }, { "epoch": 0.27, "grad_norm": 1.125, "learning_rate": 0.0004999042545056944, "loss": 0.3421, "step": 6480 }, { "epoch": 0.27, "grad_norm": 0.76171875, "learning_rate": 0.0004999039541470924, "loss": 0.2333, "step": 6490 }, { "epoch": 0.27, "grad_norm": 2.765625, "learning_rate": 0.000499903653318198, "loss": 0.2685, "step": 6500 }, { "epoch": 0.27, "grad_norm": 0.2001953125, "learning_rate": 0.0004999033520190121, "loss": 0.2689, "step": 6510 }, { "epoch": 0.27, "grad_norm": 0.6875, "learning_rate": 0.0004999030502495351, "loss": 0.3213, "step": 6520 }, { "epoch": 0.27, "grad_norm": 0.4375, "learning_rate": 0.0004999027480097674, "loss": 0.2467, "step": 6530 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.0004999024452997097, "loss": 0.327, "step": 6540 }, { "epoch": 0.27, "grad_norm": 0.90625, "learning_rate": 0.0004999021421193627, "loss": 0.254, "step": 6550 }, { "epoch": 0.27, "grad_norm": 0.58984375, "learning_rate": 0.0004999018384687268, "loss": 0.2201, "step": 6560 }, { "epoch": 0.27, "grad_norm": 0.83984375, "learning_rate": 0.0004999015343478027, "loss": 0.3098, "step": 6570 }, { "epoch": 0.27, "grad_norm": 0.95703125, "learning_rate": 0.0004999012297565908, "loss": 0.2631, "step": 6580 }, { "epoch": 0.27, "grad_norm": 0.349609375, "learning_rate": 0.0004999009246950918, "loss": 0.2423, "step": 6590 }, { "epoch": 0.27, "grad_norm": 0.6953125, "learning_rate": 0.0004999006191633063, "loss": 0.2571, "step": 6600 }, { "epoch": 0.27, "grad_norm": 0.66796875, "learning_rate": 0.0004999003131612347, "loss": 0.2164, "step": 6610 }, { "epoch": 0.27, "grad_norm": 0.5546875, "learning_rate": 0.0004999000066888779, "loss": 0.2425, "step": 6620 }, { "epoch": 0.27, "grad_norm": 0.318359375, "learning_rate": 0.0004998996997462362, "loss": 0.2572, "step": 6630 }, { "epoch": 0.28, "grad_norm": 0.796875, "learning_rate": 0.0004998993923333102, "loss": 0.2865, "step": 6640 }, { "epoch": 0.28, "grad_norm": 0.498046875, "learning_rate": 0.0004998990844501005, "loss": 0.2969, "step": 6650 }, { "epoch": 0.28, "grad_norm": 0.443359375, "learning_rate": 0.0004998987760966077, "loss": 0.269, "step": 6660 }, { "epoch": 0.28, "grad_norm": 0.640625, "learning_rate": 0.0004998984672728324, "loss": 0.2682, "step": 6670 }, { "epoch": 0.28, "grad_norm": 0.671875, "learning_rate": 0.0004998981579787753, "loss": 0.2851, "step": 6680 }, { "epoch": 0.28, "grad_norm": 0.62890625, "learning_rate": 0.0004998978482144367, "loss": 0.2411, "step": 6690 }, { "epoch": 0.28, "grad_norm": 0.77734375, "learning_rate": 0.0004998975379798174, "loss": 0.2918, "step": 6700 }, { "epoch": 0.28, "grad_norm": 0.431640625, "learning_rate": 0.000499897227274918, "loss": 0.2642, "step": 6710 }, { "epoch": 0.28, "grad_norm": 0.310546875, "learning_rate": 0.0004998969160997388, "loss": 0.2663, "step": 6720 }, { "epoch": 0.28, "grad_norm": 0.80078125, "learning_rate": 0.0004998966044542808, "loss": 0.2772, "step": 6730 }, { "epoch": 0.28, "grad_norm": 0.6171875, "learning_rate": 0.0004998962923385443, "loss": 0.257, "step": 6740 }, { "epoch": 0.28, "grad_norm": 1.75, "learning_rate": 0.00049989597975253, "loss": 0.328, "step": 6750 }, { "epoch": 0.28, "grad_norm": 0.55859375, "learning_rate": 0.0004998956666962383, "loss": 0.2458, "step": 6760 }, { "epoch": 0.28, "grad_norm": 0.74609375, "learning_rate": 0.00049989535316967, "loss": 0.2825, "step": 6770 }, { "epoch": 0.28, "grad_norm": 0.60546875, "learning_rate": 0.0004998950391728258, "loss": 0.2644, "step": 6780 }, { "epoch": 0.28, "grad_norm": 0.9765625, "learning_rate": 0.000499894724705706, "loss": 0.2367, "step": 6790 }, { "epoch": 0.28, "grad_norm": 0.59375, "learning_rate": 0.0004998944097683113, "loss": 0.246, "step": 6800 }, { "epoch": 0.28, "grad_norm": 0.65625, "learning_rate": 0.0004998940943606422, "loss": 0.3284, "step": 6810 }, { "epoch": 0.28, "grad_norm": 0.4375, "learning_rate": 0.0004998937784826996, "loss": 0.2721, "step": 6820 }, { "epoch": 0.28, "grad_norm": 0.48046875, "learning_rate": 0.0004998934621344838, "loss": 0.2482, "step": 6830 }, { "epoch": 0.28, "grad_norm": 0.80078125, "learning_rate": 0.0004998931453159955, "loss": 0.2069, "step": 6840 }, { "epoch": 0.28, "grad_norm": 1.296875, "learning_rate": 0.0004998928280272354, "loss": 0.2775, "step": 6850 }, { "epoch": 0.28, "grad_norm": 0.75, "learning_rate": 0.0004998925102682038, "loss": 0.2783, "step": 6860 }, { "epoch": 0.28, "grad_norm": 0.6640625, "learning_rate": 0.0004998921920389015, "loss": 0.3178, "step": 6870 }, { "epoch": 0.28, "grad_norm": 1.28125, "learning_rate": 0.0004998918733393293, "loss": 0.2495, "step": 6880 }, { "epoch": 0.29, "grad_norm": 0.6484375, "learning_rate": 0.0004998915541694873, "loss": 0.2152, "step": 6890 }, { "epoch": 0.29, "grad_norm": 0.50390625, "learning_rate": 0.0004998912345293765, "loss": 0.2574, "step": 6900 }, { "epoch": 0.29, "grad_norm": 0.68359375, "learning_rate": 0.0004998909144189975, "loss": 0.2611, "step": 6910 }, { "epoch": 0.29, "grad_norm": 1.03125, "learning_rate": 0.0004998905938383506, "loss": 0.3197, "step": 6920 }, { "epoch": 0.29, "grad_norm": 0.30859375, "learning_rate": 0.0004998902727874367, "loss": 0.2412, "step": 6930 }, { "epoch": 0.29, "grad_norm": 0.6328125, "learning_rate": 0.0004998899512662563, "loss": 0.2402, "step": 6940 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.0004998896292748099, "loss": 0.2811, "step": 6950 }, { "epoch": 0.29, "grad_norm": 1.1640625, "learning_rate": 0.0004998893068130983, "loss": 0.2634, "step": 6960 }, { "epoch": 0.29, "grad_norm": 0.7421875, "learning_rate": 0.000499888983881122, "loss": 0.3311, "step": 6970 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.0004998886604788815, "loss": 0.3394, "step": 6980 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.0004998883366063775, "loss": 0.2298, "step": 6990 }, { "epoch": 0.29, "grad_norm": 0.462890625, "learning_rate": 0.0004998880122636108, "loss": 0.2717, "step": 7000 }, { "epoch": 0.29, "grad_norm": 0.63671875, "learning_rate": 0.0004998876874505818, "loss": 0.2882, "step": 7010 }, { "epoch": 0.29, "grad_norm": 0.1298828125, "learning_rate": 0.0004998873621672911, "loss": 0.2549, "step": 7020 }, { "epoch": 0.29, "grad_norm": 0.5703125, "learning_rate": 0.0004998870364137395, "loss": 0.3172, "step": 7030 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.0004998867101899273, "loss": 0.3427, "step": 7040 }, { "epoch": 0.29, "grad_norm": 0.77734375, "learning_rate": 0.0004998863834958555, "loss": 0.2506, "step": 7050 }, { "epoch": 0.29, "grad_norm": 0.6796875, "learning_rate": 0.0004998860563315244, "loss": 0.2623, "step": 7060 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.0004998857286969348, "loss": 0.3157, "step": 7070 }, { "epoch": 0.29, "grad_norm": 0.466796875, "learning_rate": 0.0004998854005920871, "loss": 0.2776, "step": 7080 }, { "epoch": 0.29, "grad_norm": 0.279296875, "learning_rate": 0.0004998850720169822, "loss": 0.2527, "step": 7090 }, { "epoch": 0.29, "grad_norm": 0.5234375, "learning_rate": 0.0004998847429716205, "loss": 0.2899, "step": 7100 }, { "epoch": 0.29, "grad_norm": 0.447265625, "learning_rate": 0.0004998844134560026, "loss": 0.2246, "step": 7110 }, { "epoch": 0.29, "grad_norm": 0.26171875, "learning_rate": 0.0004998840834701294, "loss": 0.2994, "step": 7120 }, { "epoch": 0.3, "grad_norm": 1.2265625, "learning_rate": 0.0004998837530140013, "loss": 0.2738, "step": 7130 }, { "epoch": 0.3, "grad_norm": 0.609375, "learning_rate": 0.000499883422087619, "loss": 0.2933, "step": 7140 }, { "epoch": 0.3, "grad_norm": 1.1328125, "learning_rate": 0.000499883090690983, "loss": 0.2967, "step": 7150 }, { "epoch": 0.3, "grad_norm": 0.6484375, "learning_rate": 0.000499882758824094, "loss": 0.2614, "step": 7160 }, { "epoch": 0.3, "grad_norm": 0.6796875, "learning_rate": 0.0004998824264869527, "loss": 0.2934, "step": 7170 }, { "epoch": 0.3, "grad_norm": 0.546875, "learning_rate": 0.0004998820936795597, "loss": 0.299, "step": 7180 }, { "epoch": 0.3, "grad_norm": 1.1484375, "learning_rate": 0.0004998817604019155, "loss": 0.3003, "step": 7190 }, { "epoch": 0.3, "grad_norm": 1.859375, "learning_rate": 0.0004998814266540208, "loss": 0.2196, "step": 7200 }, { "epoch": 0.3, "grad_norm": 0.7421875, "learning_rate": 0.0004998810924358762, "loss": 0.3219, "step": 7210 }, { "epoch": 0.3, "grad_norm": 0.77734375, "learning_rate": 0.0004998807577474825, "loss": 0.2483, "step": 7220 }, { "epoch": 0.3, "grad_norm": 0.7109375, "learning_rate": 0.0004998804225888401, "loss": 0.2674, "step": 7230 }, { "epoch": 0.3, "grad_norm": 0.6328125, "learning_rate": 0.0004998800869599497, "loss": 0.2998, "step": 7240 }, { "epoch": 0.3, "grad_norm": 0.53515625, "learning_rate": 0.0004998797508608121, "loss": 0.2499, "step": 7250 }, { "epoch": 0.3, "grad_norm": 0.34765625, "learning_rate": 0.0004998794142914277, "loss": 0.2512, "step": 7260 }, { "epoch": 0.3, "grad_norm": 1.3046875, "learning_rate": 0.0004998790772517972, "loss": 0.2656, "step": 7270 }, { "epoch": 0.3, "grad_norm": 0.36328125, "learning_rate": 0.0004998787397419213, "loss": 0.2781, "step": 7280 }, { "epoch": 0.3, "grad_norm": 1.0390625, "learning_rate": 0.0004998784017618006, "loss": 0.2953, "step": 7290 }, { "epoch": 0.3, "grad_norm": 0.36328125, "learning_rate": 0.0004998780633114357, "loss": 0.2848, "step": 7300 }, { "epoch": 0.3, "grad_norm": 0.28515625, "learning_rate": 0.0004998777243908273, "loss": 0.209, "step": 7310 }, { "epoch": 0.3, "grad_norm": 0.69140625, "learning_rate": 0.000499877384999976, "loss": 0.2633, "step": 7320 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.0004998770451388825, "loss": 0.2502, "step": 7330 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0004998767048075473, "loss": 0.2453, "step": 7340 }, { "epoch": 0.3, "grad_norm": 1.2578125, "learning_rate": 0.0004998763640059712, "loss": 0.2631, "step": 7350 }, { "epoch": 0.3, "grad_norm": 0.9609375, "learning_rate": 0.0004998760227341547, "loss": 0.1911, "step": 7360 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.0004998756809920985, "loss": 0.284, "step": 7370 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.0004998753387798034, "loss": 0.2477, "step": 7380 }, { "epoch": 0.31, "grad_norm": 0.5, "learning_rate": 0.0004998749960972697, "loss": 0.2838, "step": 7390 }, { "epoch": 0.31, "grad_norm": 0.76953125, "learning_rate": 0.0004998746529444984, "loss": 0.2795, "step": 7400 }, { "epoch": 0.31, "grad_norm": 0.48046875, "learning_rate": 0.00049987430932149, "loss": 0.256, "step": 7410 }, { "epoch": 0.31, "grad_norm": 0.2421875, "learning_rate": 0.000499873965228245, "loss": 0.2568, "step": 7420 }, { "epoch": 0.31, "grad_norm": 0.76953125, "learning_rate": 0.0004998736206647642, "loss": 0.2957, "step": 7430 }, { "epoch": 0.31, "grad_norm": 0.90234375, "learning_rate": 0.0004998732756310483, "loss": 0.3161, "step": 7440 }, { "epoch": 0.31, "grad_norm": 0.96875, "learning_rate": 0.0004998729301270978, "loss": 0.3257, "step": 7450 }, { "epoch": 0.31, "grad_norm": 0.6171875, "learning_rate": 0.0004998725841529135, "loss": 0.2278, "step": 7460 }, { "epoch": 0.31, "grad_norm": 0.80859375, "learning_rate": 0.000499872237708496, "loss": 0.3528, "step": 7470 }, { "epoch": 0.31, "grad_norm": 1.1640625, "learning_rate": 0.0004998718907938458, "loss": 0.2537, "step": 7480 }, { "epoch": 0.31, "grad_norm": 1.1796875, "learning_rate": 0.0004998715434089638, "loss": 0.2937, "step": 7490 }, { "epoch": 0.31, "grad_norm": 0.484375, "learning_rate": 0.0004998711955538505, "loss": 0.2759, "step": 7500 }, { "epoch": 0.31, "grad_norm": 0.86328125, "learning_rate": 0.0004998708472285067, "loss": 0.2315, "step": 7510 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.0004998704984329328, "loss": 0.276, "step": 7520 }, { "epoch": 0.31, "grad_norm": 0.6953125, "learning_rate": 0.0004998701491671296, "loss": 0.2863, "step": 7530 }, { "epoch": 0.31, "grad_norm": 0.58203125, "learning_rate": 0.0004998697994310979, "loss": 0.3342, "step": 7540 }, { "epoch": 0.31, "grad_norm": 0.69140625, "learning_rate": 0.0004998694492248381, "loss": 0.3033, "step": 7550 }, { "epoch": 0.31, "grad_norm": 0.70703125, "learning_rate": 0.000499869098548351, "loss": 0.2557, "step": 7560 }, { "epoch": 0.31, "grad_norm": 1.5078125, "learning_rate": 0.0004998687474016373, "loss": 0.2467, "step": 7570 }, { "epoch": 0.31, "grad_norm": 0.859375, "learning_rate": 0.0004998683957846975, "loss": 0.2534, "step": 7580 }, { "epoch": 0.31, "grad_norm": 0.9921875, "learning_rate": 0.0004998680436975325, "loss": 0.2629, "step": 7590 }, { "epoch": 0.31, "grad_norm": 1.4765625, "learning_rate": 0.0004998676911401427, "loss": 0.2304, "step": 7600 }, { "epoch": 0.32, "grad_norm": 0.78515625, "learning_rate": 0.0004998673381125289, "loss": 0.2324, "step": 7610 }, { "epoch": 0.32, "grad_norm": 0.78125, "learning_rate": 0.0004998669846146919, "loss": 0.2593, "step": 7620 }, { "epoch": 0.32, "grad_norm": 1.0625, "learning_rate": 0.0004998666306466321, "loss": 0.2533, "step": 7630 }, { "epoch": 0.32, "grad_norm": 1.28125, "learning_rate": 0.0004998662762083503, "loss": 0.2852, "step": 7640 }, { "epoch": 0.32, "grad_norm": 0.58203125, "learning_rate": 0.0004998659212998471, "loss": 0.2666, "step": 7650 }, { "epoch": 0.32, "grad_norm": 1.328125, "learning_rate": 0.0004998655659211233, "loss": 0.2299, "step": 7660 }, { "epoch": 0.32, "grad_norm": 0.458984375, "learning_rate": 0.0004998652100721794, "loss": 0.2339, "step": 7670 }, { "epoch": 0.32, "grad_norm": 0.51953125, "learning_rate": 0.0004998648537530162, "loss": 0.2222, "step": 7680 }, { "epoch": 0.32, "grad_norm": 0.6171875, "learning_rate": 0.0004998644969636343, "loss": 0.28, "step": 7690 }, { "epoch": 0.32, "grad_norm": 0.6015625, "learning_rate": 0.0004998641397040345, "loss": 0.2721, "step": 7700 }, { "epoch": 0.32, "grad_norm": 0.1484375, "learning_rate": 0.0004998637819742174, "loss": 0.289, "step": 7710 }, { "epoch": 0.32, "grad_norm": 0.48828125, "learning_rate": 0.0004998634237741835, "loss": 0.2741, "step": 7720 }, { "epoch": 0.32, "grad_norm": 0.50390625, "learning_rate": 0.0004998630651039337, "loss": 0.271, "step": 7730 }, { "epoch": 0.32, "grad_norm": 0.279296875, "learning_rate": 0.0004998627059634686, "loss": 0.2092, "step": 7740 }, { "epoch": 0.32, "grad_norm": 3.140625, "learning_rate": 0.0004998623463527888, "loss": 0.2307, "step": 7750 }, { "epoch": 0.32, "grad_norm": 0.828125, "learning_rate": 0.0004998619862718951, "loss": 0.2485, "step": 7760 }, { "epoch": 0.32, "grad_norm": 0.453125, "learning_rate": 0.0004998616257207881, "loss": 0.3343, "step": 7770 }, { "epoch": 0.32, "grad_norm": 0.6015625, "learning_rate": 0.0004998612646994686, "loss": 0.2756, "step": 7780 }, { "epoch": 0.32, "grad_norm": 0.671875, "learning_rate": 0.000499860903207937, "loss": 0.179, "step": 7790 }, { "epoch": 0.32, "grad_norm": 0.26171875, "learning_rate": 0.0004998605412461943, "loss": 0.2581, "step": 7800 }, { "epoch": 0.32, "grad_norm": 0.6171875, "learning_rate": 0.0004998601788142411, "loss": 0.284, "step": 7810 }, { "epoch": 0.32, "grad_norm": 0.94921875, "learning_rate": 0.0004998598159120779, "loss": 0.2596, "step": 7820 }, { "epoch": 0.32, "grad_norm": 1.3359375, "learning_rate": 0.0004998594525397054, "loss": 0.2653, "step": 7830 }, { "epoch": 0.32, "grad_norm": 1.0703125, "learning_rate": 0.0004998590886971246, "loss": 0.2733, "step": 7840 }, { "epoch": 0.33, "grad_norm": 0.7578125, "learning_rate": 0.000499858724384336, "loss": 0.2572, "step": 7850 }, { "epoch": 0.33, "grad_norm": 0.61328125, "learning_rate": 0.0004998583596013401, "loss": 0.2722, "step": 7860 }, { "epoch": 0.33, "grad_norm": 0.96484375, "learning_rate": 0.0004998579943481378, "loss": 0.3304, "step": 7870 }, { "epoch": 0.33, "grad_norm": 1.7265625, "learning_rate": 0.0004998576286247298, "loss": 0.253, "step": 7880 }, { "epoch": 0.33, "grad_norm": 1.7109375, "learning_rate": 0.0004998572624311167, "loss": 0.2158, "step": 7890 }, { "epoch": 0.33, "grad_norm": 0.58203125, "learning_rate": 0.0004998568957672992, "loss": 0.2663, "step": 7900 }, { "epoch": 0.33, "grad_norm": 0.46484375, "learning_rate": 0.000499856528633278, "loss": 0.2257, "step": 7910 }, { "epoch": 0.33, "grad_norm": 0.859375, "learning_rate": 0.0004998561610290538, "loss": 0.2291, "step": 7920 }, { "epoch": 0.33, "grad_norm": 1.078125, "learning_rate": 0.0004998557929546272, "loss": 0.2809, "step": 7930 }, { "epoch": 0.33, "grad_norm": 0.50390625, "learning_rate": 0.0004998554244099991, "loss": 0.2368, "step": 7940 }, { "epoch": 0.33, "grad_norm": 0.470703125, "learning_rate": 0.0004998550553951701, "loss": 0.2638, "step": 7950 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.0004998546859101408, "loss": 0.2655, "step": 7960 }, { "epoch": 0.33, "grad_norm": 0.9296875, "learning_rate": 0.000499854315954912, "loss": 0.2455, "step": 7970 }, { "epoch": 0.33, "grad_norm": 0.7109375, "learning_rate": 0.0004998539455294842, "loss": 0.2501, "step": 7980 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.0004998535746338585, "loss": 0.2749, "step": 7990 }, { "epoch": 0.33, "grad_norm": 0.734375, "learning_rate": 0.0004998532032680352, "loss": 0.2603, "step": 8000 }, { "epoch": 0.33, "grad_norm": 0.330078125, "learning_rate": 0.0004998528314320152, "loss": 0.2983, "step": 8010 }, { "epoch": 0.33, "grad_norm": 0.57421875, "learning_rate": 0.0004998524591257991, "loss": 0.2774, "step": 8020 }, { "epoch": 0.33, "grad_norm": 0.6796875, "learning_rate": 0.0004998520863493878, "loss": 0.2965, "step": 8030 }, { "epoch": 0.33, "grad_norm": 0.6796875, "learning_rate": 0.0004998517131027817, "loss": 0.2433, "step": 8040 }, { "epoch": 0.33, "grad_norm": 0.5, "learning_rate": 0.0004998513393859817, "loss": 0.2153, "step": 8050 }, { "epoch": 0.33, "grad_norm": 0.83203125, "learning_rate": 0.0004998509651989886, "loss": 0.3167, "step": 8060 }, { "epoch": 0.33, "grad_norm": 0.8984375, "learning_rate": 0.0004998505905418028, "loss": 0.2944, "step": 8070 }, { "epoch": 0.33, "grad_norm": 1.625, "learning_rate": 0.0004998502154144252, "loss": 0.2473, "step": 8080 }, { "epoch": 0.34, "grad_norm": 0.984375, "learning_rate": 0.0004998498398168565, "loss": 0.2825, "step": 8090 }, { "epoch": 0.34, "grad_norm": 1.078125, "learning_rate": 0.0004998494637490973, "loss": 0.3617, "step": 8100 }, { "epoch": 0.34, "grad_norm": 0.189453125, "learning_rate": 0.0004998490872111485, "loss": 0.2453, "step": 8110 }, { "epoch": 0.34, "grad_norm": 0.91015625, "learning_rate": 0.0004998487102030107, "loss": 0.272, "step": 8120 }, { "epoch": 0.34, "grad_norm": 0.71875, "learning_rate": 0.0004998483327246845, "loss": 0.2994, "step": 8130 }, { "epoch": 0.34, "grad_norm": 0.267578125, "learning_rate": 0.0004998479547761709, "loss": 0.2359, "step": 8140 }, { "epoch": 0.34, "grad_norm": 0.43359375, "learning_rate": 0.0004998475763574702, "loss": 0.2469, "step": 8150 }, { "epoch": 0.34, "grad_norm": 0.72265625, "learning_rate": 0.0004998471974685835, "loss": 0.2994, "step": 8160 }, { "epoch": 0.34, "grad_norm": 0.68359375, "learning_rate": 0.0004998468181095113, "loss": 0.3199, "step": 8170 }, { "epoch": 0.34, "grad_norm": 3.28125, "learning_rate": 0.0004998464382802544, "loss": 0.2537, "step": 8180 }, { "epoch": 0.34, "grad_norm": 0.671875, "learning_rate": 0.0004998460579808135, "loss": 0.2669, "step": 8190 }, { "epoch": 0.34, "grad_norm": 1.2265625, "learning_rate": 0.0004998456772111892, "loss": 0.2626, "step": 8200 }, { "epoch": 0.34, "grad_norm": 1.3515625, "learning_rate": 0.0004998452959713824, "loss": 0.2541, "step": 8210 }, { "epoch": 0.34, "grad_norm": 0.53125, "learning_rate": 0.0004998449142613937, "loss": 0.3303, "step": 8220 }, { "epoch": 0.34, "grad_norm": 0.6328125, "learning_rate": 0.000499844532081224, "loss": 0.25, "step": 8230 }, { "epoch": 0.34, "grad_norm": 0.58984375, "learning_rate": 0.0004998441494308736, "loss": 0.2069, "step": 8240 }, { "epoch": 0.34, "grad_norm": 0.6328125, "learning_rate": 0.0004998437663103437, "loss": 0.2604, "step": 8250 }, { "epoch": 0.34, "grad_norm": 0.57421875, "learning_rate": 0.0004998433827196347, "loss": 0.3084, "step": 8260 }, { "epoch": 0.34, "grad_norm": 0.99609375, "learning_rate": 0.0004998429986587475, "loss": 0.2483, "step": 8270 }, { "epoch": 0.34, "grad_norm": 0.287109375, "learning_rate": 0.0004998426141276828, "loss": 0.2874, "step": 8280 }, { "epoch": 0.34, "grad_norm": 0.8515625, "learning_rate": 0.0004998422291264411, "loss": 0.2739, "step": 8290 }, { "epoch": 0.34, "grad_norm": 0.46484375, "learning_rate": 0.0004998418436550234, "loss": 0.2946, "step": 8300 }, { "epoch": 0.34, "grad_norm": 0.75, "learning_rate": 0.0004998414577134305, "loss": 0.2911, "step": 8310 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.0004998410713016628, "loss": 0.2316, "step": 8320 }, { "epoch": 0.35, "grad_norm": 1.5078125, "learning_rate": 0.0004998406844197212, "loss": 0.2325, "step": 8330 }, { "epoch": 0.35, "grad_norm": 0.5859375, "learning_rate": 0.0004998402970676064, "loss": 0.2424, "step": 8340 }, { "epoch": 0.35, "grad_norm": 1.03125, "learning_rate": 0.0004998399092453191, "loss": 0.2746, "step": 8350 }, { "epoch": 0.35, "grad_norm": 1.8046875, "learning_rate": 0.0004998395209528601, "loss": 0.2648, "step": 8360 }, { "epoch": 0.35, "grad_norm": 0.5546875, "learning_rate": 0.0004998391321902301, "loss": 0.2891, "step": 8370 }, { "epoch": 0.35, "grad_norm": 0.7109375, "learning_rate": 0.0004998387429574299, "loss": 0.295, "step": 8380 }, { "epoch": 0.35, "grad_norm": 0.287109375, "learning_rate": 0.0004998383532544601, "loss": 0.3096, "step": 8390 }, { "epoch": 0.35, "grad_norm": 0.828125, "learning_rate": 0.0004998379630813216, "loss": 0.2383, "step": 8400 }, { "epoch": 0.35, "grad_norm": 1.2265625, "learning_rate": 0.000499837572438015, "loss": 0.2351, "step": 8410 }, { "epoch": 0.35, "grad_norm": 0.58984375, "learning_rate": 0.0004998371813245409, "loss": 0.239, "step": 8420 }, { "epoch": 0.35, "grad_norm": 0.498046875, "learning_rate": 0.0004998367897409004, "loss": 0.2674, "step": 8430 }, { "epoch": 0.35, "grad_norm": 1.859375, "learning_rate": 0.000499836397687094, "loss": 0.3296, "step": 8440 }, { "epoch": 0.35, "grad_norm": 0.96484375, "learning_rate": 0.0004998360051631225, "loss": 0.2563, "step": 8450 }, { "epoch": 0.35, "grad_norm": 1.1953125, "learning_rate": 0.0004998356121689865, "loss": 0.2205, "step": 8460 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.000499835218704687, "loss": 0.2527, "step": 8470 }, { "epoch": 0.35, "grad_norm": 1.875, "learning_rate": 0.0004998348247702244, "loss": 0.2221, "step": 8480 }, { "epoch": 0.35, "grad_norm": 0.97265625, "learning_rate": 0.0004998344303655998, "loss": 0.2245, "step": 8490 }, { "epoch": 0.35, "grad_norm": 0.8828125, "learning_rate": 0.0004998340354908137, "loss": 0.2696, "step": 8500 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.0004998336401458671, "loss": 0.2723, "step": 8510 }, { "epoch": 0.35, "grad_norm": 0.94921875, "learning_rate": 0.0004998332443307604, "loss": 0.2877, "step": 8520 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.0004998328480454946, "loss": 0.2524, "step": 8530 }, { "epoch": 0.35, "grad_norm": 0.2890625, "learning_rate": 0.0004998324512900703, "loss": 0.2379, "step": 8540 }, { "epoch": 0.35, "grad_norm": 0.89453125, "learning_rate": 0.0004998320540644883, "loss": 0.28, "step": 8550 }, { "epoch": 0.35, "grad_norm": 0.8984375, "learning_rate": 0.0004998316563687493, "loss": 0.275, "step": 8560 }, { "epoch": 0.35, "grad_norm": 0.26171875, "learning_rate": 0.0004998312582028542, "loss": 0.2379, "step": 8570 }, { "epoch": 0.36, "grad_norm": 1.4453125, "learning_rate": 0.0004998308595668036, "loss": 0.1995, "step": 8580 }, { "epoch": 0.36, "grad_norm": 0.68359375, "learning_rate": 0.0004998304604605984, "loss": 0.2182, "step": 8590 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.0004998300608842392, "loss": 0.2395, "step": 8600 }, { "epoch": 0.36, "grad_norm": 0.494140625, "learning_rate": 0.0004998296608377267, "loss": 0.2551, "step": 8610 }, { "epoch": 0.36, "grad_norm": 0.65234375, "learning_rate": 0.0004998292603210619, "loss": 0.2501, "step": 8620 }, { "epoch": 0.36, "grad_norm": 0.95703125, "learning_rate": 0.0004998288593342454, "loss": 0.1997, "step": 8630 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.0004998284578772779, "loss": 0.2105, "step": 8640 }, { "epoch": 0.36, "grad_norm": 0.208984375, "learning_rate": 0.0004998280559501602, "loss": 0.2675, "step": 8650 }, { "epoch": 0.36, "grad_norm": 0.71484375, "learning_rate": 0.0004998276535528931, "loss": 0.2465, "step": 8660 }, { "epoch": 0.36, "grad_norm": 0.58984375, "learning_rate": 0.0004998272506854774, "loss": 0.3133, "step": 8670 }, { "epoch": 0.36, "grad_norm": 0.314453125, "learning_rate": 0.0004998268473479137, "loss": 0.2646, "step": 8680 }, { "epoch": 0.36, "grad_norm": 1.78125, "learning_rate": 0.0004998264435402029, "loss": 0.2282, "step": 8690 }, { "epoch": 0.36, "grad_norm": 1.4609375, "learning_rate": 0.0004998260392623459, "loss": 0.3514, "step": 8700 }, { "epoch": 0.36, "grad_norm": 0.34375, "learning_rate": 0.0004998256345143429, "loss": 0.2323, "step": 8710 }, { "epoch": 0.36, "grad_norm": 0.375, "learning_rate": 0.0004998252292961953, "loss": 0.2963, "step": 8720 }, { "epoch": 0.36, "grad_norm": 0.6171875, "learning_rate": 0.0004998248236079035, "loss": 0.2901, "step": 8730 }, { "epoch": 0.36, "grad_norm": 0.318359375, "learning_rate": 0.0004998244174494684, "loss": 0.243, "step": 8740 }, { "epoch": 0.36, "grad_norm": 1.0390625, "learning_rate": 0.0004998240108208907, "loss": 0.1882, "step": 8750 }, { "epoch": 0.36, "grad_norm": 0.78125, "learning_rate": 0.0004998236037221711, "loss": 0.2243, "step": 8760 }, { "epoch": 0.36, "grad_norm": 1.8828125, "learning_rate": 0.0004998231961533107, "loss": 0.289, "step": 8770 }, { "epoch": 0.36, "grad_norm": 0.6875, "learning_rate": 0.0004998227881143098, "loss": 0.1817, "step": 8780 }, { "epoch": 0.36, "grad_norm": 0.71875, "learning_rate": 0.0004998223796051695, "loss": 0.2264, "step": 8790 }, { "epoch": 0.36, "grad_norm": 1.4765625, "learning_rate": 0.0004998219706258904, "loss": 0.2128, "step": 8800 }, { "epoch": 0.36, "grad_norm": 0.68359375, "learning_rate": 0.0004998215611764734, "loss": 0.3047, "step": 8810 }, { "epoch": 0.37, "grad_norm": 0.8046875, "learning_rate": 0.0004998211512569191, "loss": 0.2505, "step": 8820 }, { "epoch": 0.37, "grad_norm": 0.74609375, "learning_rate": 0.0004998207408672285, "loss": 0.2399, "step": 8830 }, { "epoch": 0.37, "grad_norm": 1.78125, "learning_rate": 0.0004998203300074022, "loss": 0.3077, "step": 8840 }, { "epoch": 0.37, "grad_norm": 0.310546875, "learning_rate": 0.000499819918677441, "loss": 0.2372, "step": 8850 }, { "epoch": 0.37, "grad_norm": 0.458984375, "learning_rate": 0.0004998195068773456, "loss": 0.2643, "step": 8860 }, { "epoch": 0.37, "grad_norm": 0.69921875, "learning_rate": 0.0004998190946071169, "loss": 0.2693, "step": 8870 }, { "epoch": 0.37, "grad_norm": 0.427734375, "learning_rate": 0.0004998186818667557, "loss": 0.2794, "step": 8880 }, { "epoch": 0.37, "grad_norm": 0.484375, "learning_rate": 0.0004998182686562628, "loss": 0.2509, "step": 8890 }, { "epoch": 0.37, "grad_norm": 0.486328125, "learning_rate": 0.0004998178549756387, "loss": 0.3016, "step": 8900 }, { "epoch": 0.37, "grad_norm": 0.328125, "learning_rate": 0.0004998174408248846, "loss": 0.2302, "step": 8910 }, { "epoch": 0.37, "grad_norm": 0.5, "learning_rate": 0.0004998170262040008, "loss": 0.212, "step": 8920 }, { "epoch": 0.37, "grad_norm": 0.765625, "learning_rate": 0.0004998166111129885, "loss": 0.2341, "step": 8930 }, { "epoch": 0.37, "grad_norm": 0.88671875, "learning_rate": 0.0004998161955518483, "loss": 0.2323, "step": 8940 }, { "epoch": 0.37, "grad_norm": 0.984375, "learning_rate": 0.000499815779520581, "loss": 0.2473, "step": 8950 }, { "epoch": 0.37, "grad_norm": 0.298828125, "learning_rate": 0.0004998153630191874, "loss": 0.1603, "step": 8960 }, { "epoch": 0.37, "grad_norm": 0.330078125, "learning_rate": 0.0004998149460476682, "loss": 0.1883, "step": 8970 }, { "epoch": 0.37, "grad_norm": 0.828125, "learning_rate": 0.0004998145286060243, "loss": 0.223, "step": 8980 }, { "epoch": 0.37, "grad_norm": 0.703125, "learning_rate": 0.0004998141106942564, "loss": 0.2721, "step": 8990 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.0004998136923123653, "loss": 0.2824, "step": 9000 }, { "epoch": 0.37, "grad_norm": 1.59375, "learning_rate": 0.0004998132734603519, "loss": 0.2442, "step": 9010 }, { "epoch": 0.37, "grad_norm": 0.55859375, "learning_rate": 0.0004998128541382168, "loss": 0.2447, "step": 9020 }, { "epoch": 0.37, "grad_norm": 0.490234375, "learning_rate": 0.0004998124343459609, "loss": 0.2571, "step": 9030 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 0.000499812014083585, "loss": 0.3239, "step": 9040 }, { "epoch": 0.37, "grad_norm": 1.3984375, "learning_rate": 0.0004998115933510899, "loss": 0.2907, "step": 9050 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 0.0004998111721484763, "loss": 0.2353, "step": 9060 }, { "epoch": 0.38, "grad_norm": 0.318359375, "learning_rate": 0.0004998107504757451, "loss": 0.2586, "step": 9070 }, { "epoch": 0.38, "grad_norm": 0.74609375, "learning_rate": 0.0004998103283328971, "loss": 0.2965, "step": 9080 }, { "epoch": 0.38, "grad_norm": 0.0, "learning_rate": 0.0004998099057199329, "loss": 0.2697, "step": 9090 }, { "epoch": 0.38, "grad_norm": 0.8828125, "learning_rate": 0.0004998094826368535, "loss": 0.2726, "step": 9100 }, { "epoch": 0.38, "grad_norm": 1.0859375, "learning_rate": 0.0004998090590836596, "loss": 0.2333, "step": 9110 }, { "epoch": 0.38, "grad_norm": 0.359375, "learning_rate": 0.0004998086350603521, "loss": 0.2468, "step": 9120 }, { "epoch": 0.38, "grad_norm": 0.6015625, "learning_rate": 0.0004998082105669316, "loss": 0.2026, "step": 9130 }, { "epoch": 0.38, "grad_norm": 0.498046875, "learning_rate": 0.0004998077856033991, "loss": 0.2667, "step": 9140 }, { "epoch": 0.38, "grad_norm": 0.37109375, "learning_rate": 0.0004998073601697554, "loss": 0.2211, "step": 9150 }, { "epoch": 0.38, "grad_norm": 1.3515625, "learning_rate": 0.0004998069342660011, "loss": 0.2342, "step": 9160 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.0004998065078921372, "loss": 0.3042, "step": 9170 }, { "epoch": 0.38, "grad_norm": 0.419921875, "learning_rate": 0.0004998060810481644, "loss": 0.2268, "step": 9180 }, { "epoch": 0.38, "grad_norm": 1.59375, "learning_rate": 0.0004998056537340836, "loss": 0.3005, "step": 9190 }, { "epoch": 0.38, "grad_norm": 1.4140625, "learning_rate": 0.0004998052259498954, "loss": 0.2707, "step": 9200 }, { "epoch": 0.38, "grad_norm": 1.421875, "learning_rate": 0.0004998047976956008, "loss": 0.2413, "step": 9210 }, { "epoch": 0.38, "grad_norm": 0.6171875, "learning_rate": 0.0004998043689712007, "loss": 0.2952, "step": 9220 }, { "epoch": 0.38, "grad_norm": 0.25, "learning_rate": 0.0004998039397766955, "loss": 0.2654, "step": 9230 }, { "epoch": 0.38, "grad_norm": 0.8671875, "learning_rate": 0.0004998035101120865, "loss": 0.1688, "step": 9240 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 0.0004998030799773741, "loss": 0.274, "step": 9250 }, { "epoch": 0.38, "grad_norm": 0.66796875, "learning_rate": 0.0004998026493725593, "loss": 0.2148, "step": 9260 }, { "epoch": 0.38, "grad_norm": 0.57421875, "learning_rate": 0.0004998022182976429, "loss": 0.2488, "step": 9270 }, { "epoch": 0.38, "grad_norm": 1.015625, "learning_rate": 0.0004998017867526257, "loss": 0.2633, "step": 9280 }, { "epoch": 0.38, "grad_norm": 0.80859375, "learning_rate": 0.0004998013547375086, "loss": 0.313, "step": 9290 }, { "epoch": 0.39, "grad_norm": 0.6640625, "learning_rate": 0.0004998009222522922, "loss": 0.2938, "step": 9300 }, { "epoch": 0.39, "grad_norm": 0.5078125, "learning_rate": 0.0004998004892969776, "loss": 0.2695, "step": 9310 }, { "epoch": 0.39, "grad_norm": 0.62109375, "learning_rate": 0.0004998000558715653, "loss": 0.2811, "step": 9320 }, { "epoch": 0.39, "grad_norm": 0.91015625, "learning_rate": 0.0004997996219760564, "loss": 0.2205, "step": 9330 }, { "epoch": 0.39, "grad_norm": 1.234375, "learning_rate": 0.0004997991876104515, "loss": 0.2902, "step": 9340 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.0004997987527747515, "loss": 0.2575, "step": 9350 }, { "epoch": 0.39, "grad_norm": 1.2265625, "learning_rate": 0.0004997983174689572, "loss": 0.2652, "step": 9360 }, { "epoch": 0.39, "grad_norm": 1.0234375, "learning_rate": 0.0004997978816930695, "loss": 0.2387, "step": 9370 }, { "epoch": 0.39, "grad_norm": 0.921875, "learning_rate": 0.0004997974454470891, "loss": 0.2588, "step": 9380 }, { "epoch": 0.39, "grad_norm": 0.67578125, "learning_rate": 0.0004997970087310171, "loss": 0.3239, "step": 9390 }, { "epoch": 0.39, "grad_norm": 0.61328125, "learning_rate": 0.0004997965715448539, "loss": 0.2368, "step": 9400 }, { "epoch": 0.39, "grad_norm": 0.50390625, "learning_rate": 0.0004997961338886006, "loss": 0.2535, "step": 9410 }, { "epoch": 0.39, "grad_norm": 0.5234375, "learning_rate": 0.0004997956957622578, "loss": 0.291, "step": 9420 }, { "epoch": 0.39, "grad_norm": 0.80859375, "learning_rate": 0.0004997952571658266, "loss": 0.2872, "step": 9430 }, { "epoch": 0.39, "grad_norm": 0.82421875, "learning_rate": 0.0004997948180993077, "loss": 0.3157, "step": 9440 }, { "epoch": 0.39, "grad_norm": 0.384765625, "learning_rate": 0.000499794378562702, "loss": 0.2278, "step": 9450 }, { "epoch": 0.39, "grad_norm": 0.734375, "learning_rate": 0.0004997939385560101, "loss": 0.2595, "step": 9460 }, { "epoch": 0.39, "grad_norm": 1.6875, "learning_rate": 0.0004997934980792331, "loss": 0.2786, "step": 9470 }, { "epoch": 0.39, "grad_norm": 0.8125, "learning_rate": 0.0004997930571323718, "loss": 0.2605, "step": 9480 }, { "epoch": 0.39, "grad_norm": 0.64453125, "learning_rate": 0.0004997926157154268, "loss": 0.2685, "step": 9490 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.000499792173828399, "loss": 0.2057, "step": 9500 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.0004997917314712894, "loss": 0.2852, "step": 9510 }, { "epoch": 0.39, "grad_norm": 1.125, "learning_rate": 0.0004997912886440987, "loss": 0.204, "step": 9520 }, { "epoch": 0.39, "grad_norm": 0.8046875, "learning_rate": 0.0004997908453468279, "loss": 0.2967, "step": 9530 }, { "epoch": 0.4, "grad_norm": 0.984375, "learning_rate": 0.0004997904015794775, "loss": 0.3227, "step": 9540 }, { "epoch": 0.4, "grad_norm": 0.478515625, "learning_rate": 0.0004997899573420487, "loss": 0.2689, "step": 9550 }, { "epoch": 0.4, "grad_norm": 1.75, "learning_rate": 0.0004997895126345421, "loss": 0.2408, "step": 9560 }, { "epoch": 0.4, "grad_norm": 0.4609375, "learning_rate": 0.0004997890674569586, "loss": 0.2939, "step": 9570 }, { "epoch": 0.4, "grad_norm": 1.609375, "learning_rate": 0.0004997886218092992, "loss": 0.3197, "step": 9580 }, { "epoch": 0.4, "grad_norm": 0.51171875, "learning_rate": 0.0004997881756915644, "loss": 0.2507, "step": 9590 }, { "epoch": 0.4, "grad_norm": 0.65234375, "learning_rate": 0.0004997877291037553, "loss": 0.2775, "step": 9600 }, { "epoch": 0.4, "grad_norm": 0.60546875, "learning_rate": 0.0004997872820458727, "loss": 0.237, "step": 9610 }, { "epoch": 0.4, "grad_norm": 0.490234375, "learning_rate": 0.0004997868345179173, "loss": 0.2885, "step": 9620 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.0004997863865198902, "loss": 0.236, "step": 9630 }, { "epoch": 0.4, "grad_norm": 0.91015625, "learning_rate": 0.000499785938051792, "loss": 0.2855, "step": 9640 }, { "epoch": 0.4, "grad_norm": 0.6875, "learning_rate": 0.0004997854891136236, "loss": 0.2697, "step": 9650 }, { "epoch": 0.4, "grad_norm": 0.63671875, "learning_rate": 0.000499785039705386, "loss": 0.2297, "step": 9660 }, { "epoch": 0.4, "grad_norm": 0.72265625, "learning_rate": 0.0004997845898270798, "loss": 0.2423, "step": 9670 }, { "epoch": 0.4, "grad_norm": 0.54296875, "learning_rate": 0.0004997841394787061, "loss": 0.272, "step": 9680 }, { "epoch": 0.4, "grad_norm": 0.70703125, "learning_rate": 0.0004997836886602656, "loss": 0.3032, "step": 9690 }, { "epoch": 0.4, "grad_norm": 0.7421875, "learning_rate": 0.0004997832373717591, "loss": 0.258, "step": 9700 }, { "epoch": 0.4, "grad_norm": 0.8359375, "learning_rate": 0.0004997827856131876, "loss": 0.3609, "step": 9710 }, { "epoch": 0.4, "grad_norm": 0.42578125, "learning_rate": 0.0004997823333845519, "loss": 0.3004, "step": 9720 }, { "epoch": 0.4, "grad_norm": 0.376953125, "learning_rate": 0.0004997818806858527, "loss": 0.2483, "step": 9730 }, { "epoch": 0.4, "grad_norm": 0.98046875, "learning_rate": 0.0004997814275170911, "loss": 0.2509, "step": 9740 }, { "epoch": 0.4, "grad_norm": 1.453125, "learning_rate": 0.0004997809738782678, "loss": 0.2446, "step": 9750 }, { "epoch": 0.4, "grad_norm": 0.734375, "learning_rate": 0.0004997805197693836, "loss": 0.224, "step": 9760 }, { "epoch": 0.4, "grad_norm": 0.80859375, "learning_rate": 0.0004997800651904395, "loss": 0.2761, "step": 9770 }, { "epoch": 0.41, "grad_norm": 0.248046875, "learning_rate": 0.0004997796101414363, "loss": 0.2133, "step": 9780 }, { "epoch": 0.41, "grad_norm": 2.59375, "learning_rate": 0.0004997791546223748, "loss": 0.2887, "step": 9790 }, { "epoch": 0.41, "grad_norm": 0.4375, "learning_rate": 0.0004997786986332559, "loss": 0.2445, "step": 9800 }, { "epoch": 0.41, "grad_norm": 0.97265625, "learning_rate": 0.0004997782421740805, "loss": 0.2669, "step": 9810 }, { "epoch": 0.41, "grad_norm": 1.0546875, "learning_rate": 0.0004997777852448494, "loss": 0.2651, "step": 9820 }, { "epoch": 0.41, "grad_norm": 0.62890625, "learning_rate": 0.0004997773278455635, "loss": 0.2704, "step": 9830 }, { "epoch": 0.41, "grad_norm": 0.55859375, "learning_rate": 0.0004997768699762236, "loss": 0.2474, "step": 9840 }, { "epoch": 0.41, "grad_norm": 0.84375, "learning_rate": 0.0004997764116368307, "loss": 0.3074, "step": 9850 }, { "epoch": 0.41, "grad_norm": 1.46875, "learning_rate": 0.0004997759528273855, "loss": 0.2811, "step": 9860 }, { "epoch": 0.41, "grad_norm": 0.81640625, "learning_rate": 0.000499775493547889, "loss": 0.2775, "step": 9870 }, { "epoch": 0.41, "grad_norm": 0.90234375, "learning_rate": 0.0004997750337983419, "loss": 0.2756, "step": 9880 }, { "epoch": 0.41, "grad_norm": 1.0390625, "learning_rate": 0.0004997745735787452, "loss": 0.2703, "step": 9890 }, { "epoch": 0.41, "grad_norm": 0.310546875, "learning_rate": 0.0004997741128890997, "loss": 0.2818, "step": 9900 }, { "epoch": 0.41, "grad_norm": 0.61328125, "learning_rate": 0.0004997736517294064, "loss": 0.1997, "step": 9910 }, { "epoch": 0.41, "grad_norm": 0.83203125, "learning_rate": 0.0004997731900996658, "loss": 0.2691, "step": 9920 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0004997727279998792, "loss": 0.2514, "step": 9930 }, { "epoch": 0.41, "grad_norm": 0.298828125, "learning_rate": 0.0004997722654300474, "loss": 0.1862, "step": 9940 }, { "epoch": 0.41, "grad_norm": 0.5, "learning_rate": 0.0004997718023901711, "loss": 0.3287, "step": 9950 }, { "epoch": 0.41, "grad_norm": 0.84765625, "learning_rate": 0.0004997713388802512, "loss": 0.2876, "step": 9960 }, { "epoch": 0.41, "grad_norm": 0.330078125, "learning_rate": 0.0004997708749002886, "loss": 0.2067, "step": 9970 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.0004997704104502842, "loss": 0.2545, "step": 9980 }, { "epoch": 0.41, "grad_norm": 0.5078125, "learning_rate": 0.0004997699455302389, "loss": 0.2143, "step": 9990 }, { "epoch": 0.41, "grad_norm": 1.046875, "learning_rate": 0.0004997694801401534, "loss": 0.2766, "step": 10000 }, { "epoch": 0.41, "grad_norm": 0.62890625, "learning_rate": 0.0004997690142800288, "loss": 0.2988, "step": 10010 }, { "epoch": 0.42, "grad_norm": 1.234375, "learning_rate": 0.0004997685479498661, "loss": 0.2779, "step": 10020 }, { "epoch": 0.42, "grad_norm": 1.1953125, "learning_rate": 0.0004997680811496657, "loss": 0.2556, "step": 10030 }, { "epoch": 0.42, "grad_norm": 0.40625, "learning_rate": 0.0004997676138794288, "loss": 0.2489, "step": 10040 }, { "epoch": 0.42, "grad_norm": 0.51171875, "learning_rate": 0.0004997671461391561, "loss": 0.2324, "step": 10050 }, { "epoch": 0.42, "grad_norm": 0.298828125, "learning_rate": 0.0004997666779288489, "loss": 0.2369, "step": 10060 }, { "epoch": 0.42, "grad_norm": 0.40625, "learning_rate": 0.0004997662092485075, "loss": 0.2779, "step": 10070 }, { "epoch": 0.42, "grad_norm": 0.73828125, "learning_rate": 0.0004997657400981333, "loss": 0.2431, "step": 10080 }, { "epoch": 0.42, "grad_norm": 0.65625, "learning_rate": 0.0004997652704777268, "loss": 0.2992, "step": 10090 }, { "epoch": 0.42, "grad_norm": 0.90625, "learning_rate": 0.0004997648003872891, "loss": 0.285, "step": 10100 }, { "epoch": 0.42, "grad_norm": 0.86328125, "learning_rate": 0.000499764329826821, "loss": 0.3023, "step": 10110 }, { "epoch": 0.42, "grad_norm": 2.828125, "learning_rate": 0.0004997638587963234, "loss": 0.3089, "step": 10120 }, { "epoch": 0.42, "grad_norm": 0.796875, "learning_rate": 0.0004997633872957972, "loss": 0.1711, "step": 10130 }, { "epoch": 0.42, "grad_norm": 2.5625, "learning_rate": 0.0004997629153252433, "loss": 0.2699, "step": 10140 }, { "epoch": 0.42, "grad_norm": 1.4765625, "learning_rate": 0.0004997624428846625, "loss": 0.2359, "step": 10150 }, { "epoch": 0.42, "grad_norm": 0.91015625, "learning_rate": 0.0004997619699740558, "loss": 0.2554, "step": 10160 }, { "epoch": 0.42, "grad_norm": 0.546875, "learning_rate": 0.000499761496593424, "loss": 0.293, "step": 10170 }, { "epoch": 0.42, "grad_norm": 0.8359375, "learning_rate": 0.000499761022742768, "loss": 0.3406, "step": 10180 }, { "epoch": 0.42, "grad_norm": 0.3984375, "learning_rate": 0.0004997605484220888, "loss": 0.2979, "step": 10190 }, { "epoch": 0.42, "grad_norm": 0.73828125, "learning_rate": 0.0004997600736313873, "loss": 0.2138, "step": 10200 }, { "epoch": 0.42, "grad_norm": 0.73046875, "learning_rate": 0.0004997595983706642, "loss": 0.2211, "step": 10210 }, { "epoch": 0.42, "grad_norm": 0.47265625, "learning_rate": 0.0004997591226399205, "loss": 0.2854, "step": 10220 }, { "epoch": 0.42, "grad_norm": 0.6171875, "learning_rate": 0.0004997586464391572, "loss": 0.2612, "step": 10230 }, { "epoch": 0.42, "grad_norm": 0.703125, "learning_rate": 0.0004997581697683749, "loss": 0.2516, "step": 10240 }, { "epoch": 0.42, "grad_norm": 0.859375, "learning_rate": 0.000499757692627575, "loss": 0.296, "step": 10250 }, { "epoch": 0.42, "grad_norm": 0.71484375, "learning_rate": 0.0004997572150167578, "loss": 0.24, "step": 10260 }, { "epoch": 0.43, "grad_norm": 0.326171875, "learning_rate": 0.0004997567369359247, "loss": 0.2024, "step": 10270 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.0004997562583850763, "loss": 0.2612, "step": 10280 }, { "epoch": 0.43, "grad_norm": 0.310546875, "learning_rate": 0.0004997557793642135, "loss": 0.324, "step": 10290 }, { "epoch": 0.43, "grad_norm": 0.9296875, "learning_rate": 0.0004997552998733375, "loss": 0.2599, "step": 10300 }, { "epoch": 0.43, "grad_norm": 0.55078125, "learning_rate": 0.0004997548199124488, "loss": 0.2054, "step": 10310 }, { "epoch": 0.43, "grad_norm": 0.72265625, "learning_rate": 0.0004997543394815486, "loss": 0.2677, "step": 10320 }, { "epoch": 0.43, "grad_norm": 0.333984375, "learning_rate": 0.0004997538585806377, "loss": 0.2574, "step": 10330 }, { "epoch": 0.43, "grad_norm": 0.353515625, "learning_rate": 0.000499753377209717, "loss": 0.2911, "step": 10340 }, { "epoch": 0.43, "grad_norm": 0.388671875, "learning_rate": 0.0004997528953687875, "loss": 0.2893, "step": 10350 }, { "epoch": 0.43, "grad_norm": 0.2275390625, "learning_rate": 0.0004997524130578499, "loss": 0.2223, "step": 10360 }, { "epoch": 0.43, "grad_norm": 0.609375, "learning_rate": 0.0004997519302769053, "loss": 0.2902, "step": 10370 }, { "epoch": 0.43, "grad_norm": 0.55859375, "learning_rate": 0.0004997514470259545, "loss": 0.3172, "step": 10380 }, { "epoch": 0.43, "grad_norm": 1.359375, "learning_rate": 0.0004997509633049985, "loss": 0.2653, "step": 10390 }, { "epoch": 0.43, "grad_norm": 0.4921875, "learning_rate": 0.000499750479114038, "loss": 0.2742, "step": 10400 }, { "epoch": 0.43, "grad_norm": 1.109375, "learning_rate": 0.0004997499944530742, "loss": 0.255, "step": 10410 }, { "epoch": 0.43, "grad_norm": 0.470703125, "learning_rate": 0.000499749509322108, "loss": 0.2522, "step": 10420 }, { "epoch": 0.43, "grad_norm": 0.72265625, "learning_rate": 0.00049974902372114, "loss": 0.2381, "step": 10430 }, { "epoch": 0.43, "grad_norm": 0.734375, "learning_rate": 0.0004997485376501714, "loss": 0.217, "step": 10440 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.000499748051109203, "loss": 0.2247, "step": 10450 }, { "epoch": 0.43, "grad_norm": 0.67578125, "learning_rate": 0.0004997475640982357, "loss": 0.2944, "step": 10460 }, { "epoch": 0.43, "grad_norm": 0.8984375, "learning_rate": 0.0004997470766172705, "loss": 0.2701, "step": 10470 }, { "epoch": 0.43, "grad_norm": 0.9765625, "learning_rate": 0.0004997465886663082, "loss": 0.2348, "step": 10480 }, { "epoch": 0.43, "grad_norm": 1.2578125, "learning_rate": 0.0004997461002453498, "loss": 0.1593, "step": 10490 }, { "epoch": 0.43, "grad_norm": 0.72265625, "learning_rate": 0.0004997456113543964, "loss": 0.2726, "step": 10500 }, { "epoch": 0.44, "grad_norm": 1.34375, "learning_rate": 0.0004997451219934486, "loss": 0.2785, "step": 10510 }, { "epoch": 0.44, "grad_norm": 0.6953125, "learning_rate": 0.0004997446321625073, "loss": 0.2246, "step": 10520 }, { "epoch": 0.44, "grad_norm": 1.4296875, "learning_rate": 0.0004997441418615738, "loss": 0.2917, "step": 10530 }, { "epoch": 0.44, "grad_norm": 1.34375, "learning_rate": 0.0004997436510906487, "loss": 0.2464, "step": 10540 }, { "epoch": 0.44, "grad_norm": 0.82421875, "learning_rate": 0.0004997431598497329, "loss": 0.3073, "step": 10550 }, { "epoch": 0.44, "grad_norm": 0.287109375, "learning_rate": 0.0004997426681388276, "loss": 0.2618, "step": 10560 }, { "epoch": 0.44, "grad_norm": 1.28125, "learning_rate": 0.0004997421759579336, "loss": 0.3002, "step": 10570 }, { "epoch": 0.44, "grad_norm": 0.7578125, "learning_rate": 0.0004997416833070517, "loss": 0.3009, "step": 10580 }, { "epoch": 0.44, "grad_norm": 0.72265625, "learning_rate": 0.0004997411901861829, "loss": 0.2736, "step": 10590 }, { "epoch": 0.44, "grad_norm": 0.357421875, "learning_rate": 0.0004997406965953283, "loss": 0.3414, "step": 10600 }, { "epoch": 0.44, "grad_norm": 0.72265625, "learning_rate": 0.0004997402025344886, "loss": 0.2631, "step": 10610 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.0004997397080036647, "loss": 0.271, "step": 10620 }, { "epoch": 0.44, "grad_norm": 1.4921875, "learning_rate": 0.0004997392130028578, "loss": 0.2247, "step": 10630 }, { "epoch": 0.44, "grad_norm": 0.6875, "learning_rate": 0.0004997387175320686, "loss": 0.2633, "step": 10640 }, { "epoch": 0.44, "grad_norm": 1.59375, "learning_rate": 0.000499738221591298, "loss": 0.237, "step": 10650 }, { "epoch": 0.44, "grad_norm": 0.73828125, "learning_rate": 0.0004997377251805471, "loss": 0.279, "step": 10660 }, { "epoch": 0.44, "grad_norm": 0.400390625, "learning_rate": 0.000499737228299817, "loss": 0.287, "step": 10670 }, { "epoch": 0.44, "grad_norm": 0.94921875, "learning_rate": 0.0004997367309491081, "loss": 0.2665, "step": 10680 }, { "epoch": 0.44, "grad_norm": 0.6640625, "learning_rate": 0.0004997362331284217, "loss": 0.3324, "step": 10690 }, { "epoch": 0.44, "grad_norm": 0.8046875, "learning_rate": 0.0004997357348377589, "loss": 0.3001, "step": 10700 }, { "epoch": 0.44, "grad_norm": 0.85546875, "learning_rate": 0.0004997352360771202, "loss": 0.3396, "step": 10710 }, { "epoch": 0.44, "grad_norm": 0.9140625, "learning_rate": 0.0004997347368465068, "loss": 0.272, "step": 10720 }, { "epoch": 0.44, "grad_norm": 0.396484375, "learning_rate": 0.0004997342371459196, "loss": 0.266, "step": 10730 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.0004997337369753595, "loss": 0.303, "step": 10740 }, { "epoch": 0.45, "grad_norm": 0.515625, "learning_rate": 0.0004997332363348275, "loss": 0.2887, "step": 10750 }, { "epoch": 0.45, "grad_norm": 0.5859375, "learning_rate": 0.0004997327352243245, "loss": 0.2902, "step": 10760 }, { "epoch": 0.45, "grad_norm": 1.4609375, "learning_rate": 0.0004997322336438515, "loss": 0.2258, "step": 10770 }, { "epoch": 0.45, "grad_norm": 0.5234375, "learning_rate": 0.0004997317315934094, "loss": 0.2614, "step": 10780 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.0004997312290729992, "loss": 0.2558, "step": 10790 }, { "epoch": 0.45, "grad_norm": 0.73046875, "learning_rate": 0.0004997307260826217, "loss": 0.2918, "step": 10800 }, { "epoch": 0.45, "grad_norm": 0.34375, "learning_rate": 0.0004997302226222779, "loss": 0.2496, "step": 10810 }, { "epoch": 0.45, "grad_norm": 0.59765625, "learning_rate": 0.000499729718691969, "loss": 0.2286, "step": 10820 }, { "epoch": 0.45, "grad_norm": 0.470703125, "learning_rate": 0.0004997292142916956, "loss": 0.2398, "step": 10830 }, { "epoch": 0.45, "grad_norm": 0.3984375, "learning_rate": 0.0004997287094214587, "loss": 0.1762, "step": 10840 }, { "epoch": 0.45, "grad_norm": 1.65625, "learning_rate": 0.0004997282040812596, "loss": 0.1878, "step": 10850 }, { "epoch": 0.45, "grad_norm": 0.4765625, "learning_rate": 0.0004997276982710988, "loss": 0.2679, "step": 10860 }, { "epoch": 0.45, "grad_norm": 0.77734375, "learning_rate": 0.0004997271919909774, "loss": 0.2761, "step": 10870 }, { "epoch": 0.45, "grad_norm": 0.6484375, "learning_rate": 0.0004997266852408964, "loss": 0.265, "step": 10880 }, { "epoch": 0.45, "grad_norm": 0.416015625, "learning_rate": 0.0004997261780208569, "loss": 0.3033, "step": 10890 }, { "epoch": 0.45, "grad_norm": 0.0, "learning_rate": 0.0004997256703308595, "loss": 0.2449, "step": 10900 }, { "epoch": 0.45, "grad_norm": 0.69140625, "learning_rate": 0.0004997251621709055, "loss": 0.2679, "step": 10910 }, { "epoch": 0.45, "grad_norm": 1.0234375, "learning_rate": 0.0004997246535409956, "loss": 0.2798, "step": 10920 }, { "epoch": 0.45, "grad_norm": 0.453125, "learning_rate": 0.000499724144441131, "loss": 0.2605, "step": 10930 }, { "epoch": 0.45, "grad_norm": 0.181640625, "learning_rate": 0.0004997236348713124, "loss": 0.2455, "step": 10940 }, { "epoch": 0.45, "grad_norm": 0.79296875, "learning_rate": 0.000499723124831541, "loss": 0.2581, "step": 10950 }, { "epoch": 0.45, "grad_norm": 1.453125, "learning_rate": 0.0004997226143218177, "loss": 0.2667, "step": 10960 }, { "epoch": 0.45, "grad_norm": 0.98828125, "learning_rate": 0.0004997221033421432, "loss": 0.2716, "step": 10970 }, { "epoch": 0.45, "grad_norm": 1.1171875, "learning_rate": 0.0004997215918925188, "loss": 0.2767, "step": 10980 }, { "epoch": 0.46, "grad_norm": 0.80078125, "learning_rate": 0.0004997210799729453, "loss": 0.2445, "step": 10990 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.0004997205675834237, "loss": 0.2607, "step": 11000 }, { "epoch": 0.46, "grad_norm": 0.59765625, "learning_rate": 0.000499720054723955, "loss": 0.2562, "step": 11010 }, { "epoch": 0.46, "grad_norm": 1.015625, "learning_rate": 0.00049971954139454, "loss": 0.2781, "step": 11020 }, { "epoch": 0.46, "grad_norm": 0.7734375, "learning_rate": 0.00049971902759518, "loss": 0.3021, "step": 11030 }, { "epoch": 0.46, "grad_norm": 0.94140625, "learning_rate": 0.0004997185133258756, "loss": 0.2691, "step": 11040 }, { "epoch": 0.46, "grad_norm": 0.5078125, "learning_rate": 0.0004997179985866279, "loss": 0.219, "step": 11050 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.000499717483377438, "loss": 0.2806, "step": 11060 }, { "epoch": 0.46, "grad_norm": 0.97265625, "learning_rate": 0.0004997169676983068, "loss": 0.2534, "step": 11070 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.000499716451549235, "loss": 0.3137, "step": 11080 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.000499715934930224, "loss": 0.2147, "step": 11090 }, { "epoch": 0.46, "grad_norm": 0.93359375, "learning_rate": 0.0004997154178412746, "loss": 0.2544, "step": 11100 }, { "epoch": 0.46, "grad_norm": 0.27734375, "learning_rate": 0.0004997149002823877, "loss": 0.2752, "step": 11110 }, { "epoch": 0.46, "grad_norm": 0.734375, "learning_rate": 0.0004997143822535643, "loss": 0.2133, "step": 11120 }, { "epoch": 0.46, "grad_norm": 0.353515625, "learning_rate": 0.0004997138637548055, "loss": 0.2792, "step": 11130 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.0004997133447861119, "loss": 0.2426, "step": 11140 }, { "epoch": 0.46, "grad_norm": 1.3359375, "learning_rate": 0.000499712825347485, "loss": 0.2457, "step": 11150 }, { "epoch": 0.46, "grad_norm": 0.6640625, "learning_rate": 0.0004997123054389255, "loss": 0.2882, "step": 11160 }, { "epoch": 0.46, "grad_norm": 3.234375, "learning_rate": 0.0004997117850604343, "loss": 0.2904, "step": 11170 }, { "epoch": 0.46, "grad_norm": 0.859375, "learning_rate": 0.0004997112642120126, "loss": 0.2814, "step": 11180 }, { "epoch": 0.46, "grad_norm": 1.1328125, "learning_rate": 0.0004997107428936613, "loss": 0.3676, "step": 11190 }, { "epoch": 0.46, "grad_norm": 1.21875, "learning_rate": 0.0004997102211053812, "loss": 0.1906, "step": 11200 }, { "epoch": 0.46, "grad_norm": 0.416015625, "learning_rate": 0.0004997096988471736, "loss": 0.2253, "step": 11210 }, { "epoch": 0.46, "grad_norm": 0.80859375, "learning_rate": 0.0004997091761190391, "loss": 0.2462, "step": 11220 }, { "epoch": 0.47, "grad_norm": 0.68359375, "learning_rate": 0.0004997086529209791, "loss": 0.2329, "step": 11230 }, { "epoch": 0.47, "grad_norm": 1.09375, "learning_rate": 0.0004997081292529942, "loss": 0.2471, "step": 11240 }, { "epoch": 0.47, "grad_norm": 1.3046875, "learning_rate": 0.0004997076051150857, "loss": 0.2515, "step": 11250 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.0004997070805072545, "loss": 0.2529, "step": 11260 }, { "epoch": 0.47, "grad_norm": 0.91015625, "learning_rate": 0.0004997065554295014, "loss": 0.3051, "step": 11270 }, { "epoch": 0.47, "grad_norm": 0.77734375, "learning_rate": 0.0004997060298818276, "loss": 0.3082, "step": 11280 }, { "epoch": 0.47, "grad_norm": 1.21875, "learning_rate": 0.000499705503864234, "loss": 0.2656, "step": 11290 }, { "epoch": 0.47, "grad_norm": 0.427734375, "learning_rate": 0.0004997049773767216, "loss": 0.2069, "step": 11300 }, { "epoch": 0.47, "grad_norm": 0.59375, "learning_rate": 0.0004997044504192915, "loss": 0.2724, "step": 11310 }, { "epoch": 0.47, "grad_norm": 0.4921875, "learning_rate": 0.0004997039229919445, "loss": 0.2362, "step": 11320 }, { "epoch": 0.47, "grad_norm": 0.984375, "learning_rate": 0.0004997033950946817, "loss": 0.279, "step": 11330 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.000499702866727504, "loss": 0.2233, "step": 11340 }, { "epoch": 0.47, "grad_norm": 0.51953125, "learning_rate": 0.0004997023378904126, "loss": 0.2283, "step": 11350 }, { "epoch": 0.47, "grad_norm": 0.7734375, "learning_rate": 0.0004997018085834082, "loss": 0.2311, "step": 11360 }, { "epoch": 0.47, "grad_norm": 0.4765625, "learning_rate": 0.0004997012788064921, "loss": 0.2468, "step": 11370 }, { "epoch": 0.47, "grad_norm": 0.392578125, "learning_rate": 0.000499700748559665, "loss": 0.2575, "step": 11380 }, { "epoch": 0.47, "grad_norm": 0.6953125, "learning_rate": 0.0004997002178429283, "loss": 0.211, "step": 11390 }, { "epoch": 0.47, "grad_norm": 0.8671875, "learning_rate": 0.0004996996866562827, "loss": 0.2062, "step": 11400 }, { "epoch": 0.47, "grad_norm": 0.796875, "learning_rate": 0.000499699154999729, "loss": 0.2423, "step": 11410 }, { "epoch": 0.47, "grad_norm": 0.75390625, "learning_rate": 0.0004996986228732687, "loss": 0.1886, "step": 11420 }, { "epoch": 0.47, "grad_norm": 0.96484375, "learning_rate": 0.0004996980902769025, "loss": 0.2616, "step": 11430 }, { "epoch": 0.47, "grad_norm": 0.0, "learning_rate": 0.0004996975572106315, "loss": 0.2525, "step": 11440 }, { "epoch": 0.47, "grad_norm": 0.74609375, "learning_rate": 0.0004996970236744566, "loss": 0.2928, "step": 11450 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.0004996964896683789, "loss": 0.2477, "step": 11460 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.0004996959551923993, "loss": 0.2405, "step": 11470 }, { "epoch": 0.48, "grad_norm": 0.84765625, "learning_rate": 0.0004996954202465189, "loss": 0.244, "step": 11480 }, { "epoch": 0.48, "grad_norm": 1.015625, "learning_rate": 0.0004996948848307388, "loss": 0.2727, "step": 11490 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.0004996943489450599, "loss": 0.1946, "step": 11500 }, { "epoch": 0.48, "grad_norm": 0.92578125, "learning_rate": 0.000499693812589483, "loss": 0.2017, "step": 11510 }, { "epoch": 0.48, "grad_norm": 0.392578125, "learning_rate": 0.0004996932757640094, "loss": 0.303, "step": 11520 }, { "epoch": 0.48, "grad_norm": 1.09375, "learning_rate": 0.00049969273846864, "loss": 0.2459, "step": 11530 }, { "epoch": 0.48, "grad_norm": 0.349609375, "learning_rate": 0.0004996922007033759, "loss": 0.2526, "step": 11540 }, { "epoch": 0.48, "grad_norm": 1.390625, "learning_rate": 0.0004996916624682181, "loss": 0.2446, "step": 11550 }, { "epoch": 0.48, "grad_norm": 0.6328125, "learning_rate": 0.0004996911237631674, "loss": 0.233, "step": 11560 }, { "epoch": 0.48, "grad_norm": 1.171875, "learning_rate": 0.0004996905845882251, "loss": 0.2742, "step": 11570 }, { "epoch": 0.48, "grad_norm": 0.80078125, "learning_rate": 0.000499690044943392, "loss": 0.2908, "step": 11580 }, { "epoch": 0.48, "grad_norm": 0.984375, "learning_rate": 0.0004996895048286692, "loss": 0.2879, "step": 11590 }, { "epoch": 0.48, "grad_norm": 0.2890625, "learning_rate": 0.0004996889642440577, "loss": 0.2929, "step": 11600 }, { "epoch": 0.48, "grad_norm": 0.318359375, "learning_rate": 0.0004996884231895586, "loss": 0.3438, "step": 11610 }, { "epoch": 0.48, "grad_norm": 0.88671875, "learning_rate": 0.0004996878816651728, "loss": 0.2925, "step": 11620 }, { "epoch": 0.48, "grad_norm": 2.875, "learning_rate": 0.0004996873396709014, "loss": 0.2603, "step": 11630 }, { "epoch": 0.48, "grad_norm": 0.7890625, "learning_rate": 0.0004996867972067453, "loss": 0.2788, "step": 11640 }, { "epoch": 0.48, "grad_norm": 0.7109375, "learning_rate": 0.0004996862542727057, "loss": 0.2439, "step": 11650 }, { "epoch": 0.48, "grad_norm": 0.333984375, "learning_rate": 0.0004996857108687836, "loss": 0.2506, "step": 11660 }, { "epoch": 0.48, "grad_norm": 0.5703125, "learning_rate": 0.00049968516699498, "loss": 0.2646, "step": 11670 }, { "epoch": 0.48, "grad_norm": 0.65625, "learning_rate": 0.0004996846226512957, "loss": 0.2454, "step": 11680 }, { "epoch": 0.48, "grad_norm": 0.45703125, "learning_rate": 0.0004996840778377319, "loss": 0.2306, "step": 11690 }, { "epoch": 0.48, "grad_norm": 0.59375, "learning_rate": 0.0004996835325542896, "loss": 0.1964, "step": 11700 }, { "epoch": 0.49, "grad_norm": 0.41796875, "learning_rate": 0.00049968298680097, "loss": 0.2394, "step": 11710 }, { "epoch": 0.49, "grad_norm": 0.63671875, "learning_rate": 0.000499682440577774, "loss": 0.2437, "step": 11720 }, { "epoch": 0.49, "grad_norm": 0.3125, "learning_rate": 0.0004996818938847026, "loss": 0.2254, "step": 11730 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 0.0004996813467217566, "loss": 0.2711, "step": 11740 }, { "epoch": 0.49, "grad_norm": 0.9609375, "learning_rate": 0.0004996807990889376, "loss": 0.2165, "step": 11750 }, { "epoch": 0.49, "grad_norm": 0.4140625, "learning_rate": 0.0004996802509862461, "loss": 0.2162, "step": 11760 }, { "epoch": 0.49, "grad_norm": 1.6640625, "learning_rate": 0.0004996797024136834, "loss": 0.2604, "step": 11770 }, { "epoch": 0.49, "grad_norm": 0.6796875, "learning_rate": 0.0004996791533712504, "loss": 0.2313, "step": 11780 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0004996786038589482, "loss": 0.2979, "step": 11790 }, { "epoch": 0.49, "grad_norm": 1.1328125, "learning_rate": 0.0004996780538767779, "loss": 0.2763, "step": 11800 }, { "epoch": 0.49, "grad_norm": 0.96484375, "learning_rate": 0.0004996775034247405, "loss": 0.2545, "step": 11810 }, { "epoch": 0.49, "grad_norm": 0.65234375, "learning_rate": 0.0004996769525028369, "loss": 0.2526, "step": 11820 }, { "epoch": 0.49, "grad_norm": 0.796875, "learning_rate": 0.0004996764011110683, "loss": 0.2658, "step": 11830 }, { "epoch": 0.49, "grad_norm": 0.5859375, "learning_rate": 0.0004996758492494356, "loss": 0.2253, "step": 11840 }, { "epoch": 0.49, "grad_norm": 0.75, "learning_rate": 0.00049967529691794, "loss": 0.2808, "step": 11850 }, { "epoch": 0.49, "grad_norm": 1.328125, "learning_rate": 0.0004996747441165825, "loss": 0.2238, "step": 11860 }, { "epoch": 0.49, "grad_norm": 0.8046875, "learning_rate": 0.0004996741908453639, "loss": 0.2057, "step": 11870 }, { "epoch": 0.49, "grad_norm": 0.54296875, "learning_rate": 0.0004996736371042856, "loss": 0.2924, "step": 11880 }, { "epoch": 0.49, "grad_norm": 0.73046875, "learning_rate": 0.0004996730828933484, "loss": 0.2492, "step": 11890 }, { "epoch": 0.49, "grad_norm": 0.419921875, "learning_rate": 0.0004996725282125534, "loss": 0.3119, "step": 11900 }, { "epoch": 0.49, "grad_norm": 0.546875, "learning_rate": 0.0004996719730619017, "loss": 0.2726, "step": 11910 }, { "epoch": 0.49, "grad_norm": 1.3203125, "learning_rate": 0.0004996714174413943, "loss": 0.2744, "step": 11920 }, { "epoch": 0.49, "grad_norm": 0.9140625, "learning_rate": 0.0004996708613510323, "loss": 0.2227, "step": 11930 }, { "epoch": 0.49, "grad_norm": 1.4609375, "learning_rate": 0.0004996703047908167, "loss": 0.2705, "step": 11940 }, { "epoch": 0.49, "grad_norm": 0.2138671875, "learning_rate": 0.0004996697477607485, "loss": 0.2188, "step": 11950 }, { "epoch": 0.5, "grad_norm": 0.75390625, "learning_rate": 0.0004996691902608289, "loss": 0.2519, "step": 11960 }, { "epoch": 0.5, "grad_norm": 0.62890625, "learning_rate": 0.0004996686322910587, "loss": 0.2057, "step": 11970 }, { "epoch": 0.5, "grad_norm": 0.86328125, "learning_rate": 0.0004996680738514392, "loss": 0.2539, "step": 11980 }, { "epoch": 0.5, "grad_norm": 0.73828125, "learning_rate": 0.0004996675149419713, "loss": 0.2428, "step": 11990 }, { "epoch": 0.5, "grad_norm": 1.3515625, "learning_rate": 0.0004996669555626561, "loss": 0.2226, "step": 12000 }, { "epoch": 0.5, "grad_norm": 0.451171875, "learning_rate": 0.0004996663957134947, "loss": 0.2298, "step": 12010 }, { "epoch": 0.5, "grad_norm": 0.47265625, "learning_rate": 0.0004996658353944881, "loss": 0.221, "step": 12020 }, { "epoch": 0.5, "grad_norm": 0.306640625, "learning_rate": 0.0004996652746056372, "loss": 0.243, "step": 12030 }, { "epoch": 0.5, "grad_norm": 0.640625, "learning_rate": 0.0004996647133469434, "loss": 0.3031, "step": 12040 }, { "epoch": 0.5, "grad_norm": 0.84765625, "learning_rate": 0.0004996641516184075, "loss": 0.2261, "step": 12050 }, { "epoch": 0.5, "grad_norm": 0.5625, "learning_rate": 0.0004996635894200307, "loss": 0.2584, "step": 12060 }, { "epoch": 0.5, "grad_norm": 0.76171875, "learning_rate": 0.0004996630267518139, "loss": 0.169, "step": 12070 }, { "epoch": 0.5, "grad_norm": 0.9375, "learning_rate": 0.0004996624636137582, "loss": 0.2332, "step": 12080 }, { "epoch": 0.5, "grad_norm": 0.79296875, "learning_rate": 0.0004996619000058647, "loss": 0.2623, "step": 12090 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.0004996613359281346, "loss": 0.321, "step": 12100 }, { "epoch": 0.5, "grad_norm": 0.6484375, "learning_rate": 0.0004996607713805688, "loss": 0.2758, "step": 12110 }, { "epoch": 0.5, "grad_norm": 1.5625, "learning_rate": 0.0004996602063631684, "loss": 0.2643, "step": 12120 }, { "epoch": 0.5, "grad_norm": 0.5078125, "learning_rate": 0.0004996596408759343, "loss": 0.2369, "step": 12130 }, { "epoch": 0.5, "grad_norm": 0.42578125, "learning_rate": 0.0004996590749188678, "loss": 0.3099, "step": 12140 }, { "epoch": 0.5, "grad_norm": 0.82421875, "learning_rate": 0.0004996585084919699, "loss": 0.2804, "step": 12150 }, { "epoch": 0.5, "grad_norm": 0.59765625, "learning_rate": 0.0004996579415952417, "loss": 0.2636, "step": 12160 }, { "epoch": 0.5, "grad_norm": 0.494140625, "learning_rate": 0.0004996573742286842, "loss": 0.2652, "step": 12170 }, { "epoch": 0.5, "grad_norm": 1.7890625, "learning_rate": 0.0004996568063922984, "loss": 0.2606, "step": 12180 }, { "epoch": 0.5, "grad_norm": 0.34375, "learning_rate": 0.0004996562380860855, "loss": 0.2746, "step": 12190 }, { "epoch": 0.51, "grad_norm": 0.91796875, "learning_rate": 0.0004996556693100466, "loss": 0.2494, "step": 12200 }, { "epoch": 0.51, "grad_norm": 0.98046875, "learning_rate": 0.0004996551000641825, "loss": 0.2228, "step": 12210 }, { "epoch": 0.51, "grad_norm": 0.65625, "learning_rate": 0.0004996545303484947, "loss": 0.2498, "step": 12220 }, { "epoch": 0.51, "grad_norm": 0.40625, "learning_rate": 0.0004996539601629839, "loss": 0.2589, "step": 12230 }, { "epoch": 0.51, "grad_norm": 0.90625, "learning_rate": 0.0004996533895076513, "loss": 0.3008, "step": 12240 }, { "epoch": 0.51, "grad_norm": 0.625, "learning_rate": 0.000499652818382498, "loss": 0.2513, "step": 12250 }, { "epoch": 0.51, "grad_norm": 1.03125, "learning_rate": 0.000499652246787525, "loss": 0.2084, "step": 12260 }, { "epoch": 0.51, "grad_norm": 0.578125, "learning_rate": 0.0004996516747227336, "loss": 0.196, "step": 12270 }, { "epoch": 0.51, "grad_norm": 1.078125, "learning_rate": 0.0004996511021881244, "loss": 0.2265, "step": 12280 }, { "epoch": 0.51, "grad_norm": 0.80078125, "learning_rate": 0.0004996505291836991, "loss": 0.2067, "step": 12290 }, { "epoch": 0.51, "grad_norm": 1.84375, "learning_rate": 0.0004996499557094584, "loss": 0.1859, "step": 12300 }, { "epoch": 0.51, "grad_norm": 0.48046875, "learning_rate": 0.0004996493817654033, "loss": 0.2147, "step": 12310 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.0004996488073515351, "loss": 0.2567, "step": 12320 }, { "epoch": 0.51, "grad_norm": 0.470703125, "learning_rate": 0.0004996482324678549, "loss": 0.259, "step": 12330 }, { "epoch": 0.51, "grad_norm": 0.6171875, "learning_rate": 0.0004996476571143636, "loss": 0.1846, "step": 12340 }, { "epoch": 0.51, "grad_norm": 0.55859375, "learning_rate": 0.0004996470812910623, "loss": 0.256, "step": 12350 }, { "epoch": 0.51, "grad_norm": 0.890625, "learning_rate": 0.0004996465049979523, "loss": 0.2945, "step": 12360 }, { "epoch": 0.51, "grad_norm": 0.65234375, "learning_rate": 0.0004996459282350344, "loss": 0.2705, "step": 12370 }, { "epoch": 0.51, "grad_norm": 0.6484375, "learning_rate": 0.0004996453510023098, "loss": 0.3059, "step": 12380 }, { "epoch": 0.51, "grad_norm": 0.388671875, "learning_rate": 0.0004996447732997797, "loss": 0.2624, "step": 12390 }, { "epoch": 0.51, "grad_norm": 0.361328125, "learning_rate": 0.0004996441951274452, "loss": 0.249, "step": 12400 }, { "epoch": 0.51, "grad_norm": 0.421875, "learning_rate": 0.0004996436164853071, "loss": 0.2587, "step": 12410 }, { "epoch": 0.51, "grad_norm": 0.76171875, "learning_rate": 0.0004996430373733668, "loss": 0.2674, "step": 12420 }, { "epoch": 0.51, "grad_norm": 0.330078125, "learning_rate": 0.0004996424577916251, "loss": 0.2315, "step": 12430 }, { "epoch": 0.52, "grad_norm": 0.7109375, "learning_rate": 0.0004996418777400834, "loss": 0.2058, "step": 12440 }, { "epoch": 0.52, "grad_norm": 1.046875, "learning_rate": 0.0004996412972187427, "loss": 0.2426, "step": 12450 }, { "epoch": 0.52, "grad_norm": 0.3671875, "learning_rate": 0.0004996407162276039, "loss": 0.2825, "step": 12460 }, { "epoch": 0.52, "grad_norm": 0.240234375, "learning_rate": 0.0004996401347666683, "loss": 0.1898, "step": 12470 }, { "epoch": 0.52, "grad_norm": 0.86328125, "learning_rate": 0.0004996395528359368, "loss": 0.267, "step": 12480 }, { "epoch": 0.52, "grad_norm": 1.7109375, "learning_rate": 0.0004996389704354107, "loss": 0.2219, "step": 12490 }, { "epoch": 0.52, "grad_norm": 0.67578125, "learning_rate": 0.0004996383875650911, "loss": 0.2615, "step": 12500 }, { "epoch": 0.52, "grad_norm": 0.96484375, "learning_rate": 0.000499637804224979, "loss": 0.2419, "step": 12510 }, { "epoch": 0.52, "grad_norm": 0.431640625, "learning_rate": 0.0004996372204150754, "loss": 0.2582, "step": 12520 }, { "epoch": 0.52, "grad_norm": 0.62109375, "learning_rate": 0.0004996366361353816, "loss": 0.2359, "step": 12530 }, { "epoch": 0.52, "grad_norm": 0.2197265625, "learning_rate": 0.0004996360513858985, "loss": 0.2114, "step": 12540 }, { "epoch": 0.52, "grad_norm": 0.71875, "learning_rate": 0.0004996354661666274, "loss": 0.2562, "step": 12550 }, { "epoch": 0.52, "grad_norm": 0.94140625, "learning_rate": 0.0004996348804775693, "loss": 0.2496, "step": 12560 }, { "epoch": 0.52, "grad_norm": 0.48046875, "learning_rate": 0.0004996342943187253, "loss": 0.1683, "step": 12570 }, { "epoch": 0.52, "grad_norm": 1.0625, "learning_rate": 0.0004996337076900965, "loss": 0.2598, "step": 12580 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.0004996331205916841, "loss": 0.2989, "step": 12590 }, { "epoch": 0.52, "grad_norm": 0.47265625, "learning_rate": 0.0004996325330234891, "loss": 0.1701, "step": 12600 }, { "epoch": 0.52, "grad_norm": 1.1015625, "learning_rate": 0.0004996319449855125, "loss": 0.2306, "step": 12610 }, { "epoch": 0.52, "grad_norm": 0.91796875, "learning_rate": 0.0004996313564777557, "loss": 0.2561, "step": 12620 }, { "epoch": 0.52, "grad_norm": 0.48828125, "learning_rate": 0.0004996307675002197, "loss": 0.2958, "step": 12630 }, { "epoch": 0.52, "grad_norm": 0.412109375, "learning_rate": 0.0004996301780529054, "loss": 0.2615, "step": 12640 }, { "epoch": 0.52, "grad_norm": 0.390625, "learning_rate": 0.000499629588135814, "loss": 0.2403, "step": 12650 }, { "epoch": 0.52, "grad_norm": 0.88671875, "learning_rate": 0.0004996289977489468, "loss": 0.2499, "step": 12660 }, { "epoch": 0.52, "grad_norm": 1.2109375, "learning_rate": 0.0004996284068923048, "loss": 0.2717, "step": 12670 }, { "epoch": 0.53, "grad_norm": 0.77734375, "learning_rate": 0.000499627815565889, "loss": 0.2107, "step": 12680 }, { "epoch": 0.53, "grad_norm": 0.82421875, "learning_rate": 0.0004996272237697007, "loss": 0.2736, "step": 12690 }, { "epoch": 0.53, "grad_norm": 0.62890625, "learning_rate": 0.0004996266315037409, "loss": 0.2264, "step": 12700 }, { "epoch": 0.53, "grad_norm": 0.271484375, "learning_rate": 0.0004996260387680107, "loss": 0.2796, "step": 12710 }, { "epoch": 0.53, "grad_norm": 1.7890625, "learning_rate": 0.0004996254455625112, "loss": 0.2161, "step": 12720 }, { "epoch": 0.53, "grad_norm": 0.5546875, "learning_rate": 0.0004996248518872437, "loss": 0.2829, "step": 12730 }, { "epoch": 0.53, "grad_norm": 0.33984375, "learning_rate": 0.0004996242577422091, "loss": 0.2529, "step": 12740 }, { "epoch": 0.53, "grad_norm": 0.640625, "learning_rate": 0.0004996236631274087, "loss": 0.2989, "step": 12750 }, { "epoch": 0.53, "grad_norm": 0.92578125, "learning_rate": 0.0004996230680428434, "loss": 0.2907, "step": 12760 }, { "epoch": 0.53, "grad_norm": 0.76953125, "learning_rate": 0.0004996224724885146, "loss": 0.2576, "step": 12770 }, { "epoch": 0.53, "grad_norm": 0.40234375, "learning_rate": 0.0004996218764644231, "loss": 0.2536, "step": 12780 }, { "epoch": 0.53, "grad_norm": 0.0, "learning_rate": 0.0004996212799705702, "loss": 0.2959, "step": 12790 }, { "epoch": 0.53, "grad_norm": 0.58203125, "learning_rate": 0.0004996206830069571, "loss": 0.2481, "step": 12800 }, { "epoch": 0.53, "grad_norm": 0.41015625, "learning_rate": 0.0004996200855735848, "loss": 0.2618, "step": 12810 }, { "epoch": 0.53, "grad_norm": 1.2421875, "learning_rate": 0.0004996194876704544, "loss": 0.2346, "step": 12820 }, { "epoch": 0.53, "grad_norm": 0.77734375, "learning_rate": 0.0004996188892975672, "loss": 0.2301, "step": 12830 }, { "epoch": 0.53, "grad_norm": 0.7265625, "learning_rate": 0.0004996182904549241, "loss": 0.2844, "step": 12840 }, { "epoch": 0.53, "grad_norm": 0.318359375, "learning_rate": 0.0004996176911425263, "loss": 0.2547, "step": 12850 }, { "epoch": 0.53, "grad_norm": 0.75390625, "learning_rate": 0.000499617091360375, "loss": 0.2795, "step": 12860 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.0004996164911084714, "loss": 0.2046, "step": 12870 }, { "epoch": 0.53, "grad_norm": 0.75390625, "learning_rate": 0.0004996158903868164, "loss": 0.2241, "step": 12880 }, { "epoch": 0.53, "grad_norm": 1.6484375, "learning_rate": 0.0004996152891954113, "loss": 0.1926, "step": 12890 }, { "epoch": 0.53, "grad_norm": 0.890625, "learning_rate": 0.0004996146875342572, "loss": 0.2101, "step": 12900 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.0004996140854033552, "loss": 0.2335, "step": 12910 }, { "epoch": 0.54, "grad_norm": 0.412109375, "learning_rate": 0.0004996134828027063, "loss": 0.2318, "step": 12920 }, { "epoch": 0.54, "grad_norm": 0.8203125, "learning_rate": 0.0004996128797323119, "loss": 0.179, "step": 12930 }, { "epoch": 0.54, "grad_norm": 0.75390625, "learning_rate": 0.0004996122761921731, "loss": 0.2761, "step": 12940 }, { "epoch": 0.54, "grad_norm": 0.59375, "learning_rate": 0.0004996116721822909, "loss": 0.3093, "step": 12950 }, { "epoch": 0.54, "grad_norm": 1.1015625, "learning_rate": 0.0004996110677026665, "loss": 0.2626, "step": 12960 }, { "epoch": 0.54, "grad_norm": 0.4296875, "learning_rate": 0.000499610462753301, "loss": 0.2832, "step": 12970 }, { "epoch": 0.54, "grad_norm": 0.83984375, "learning_rate": 0.0004996098573341955, "loss": 0.2302, "step": 12980 }, { "epoch": 0.54, "grad_norm": 0.328125, "learning_rate": 0.0004996092514453513, "loss": 0.2273, "step": 12990 }, { "epoch": 0.54, "grad_norm": 0.384765625, "learning_rate": 0.0004996086450867694, "loss": 0.2482, "step": 13000 }, { "epoch": 0.54, "grad_norm": 1.5625, "learning_rate": 0.000499608038258451, "loss": 0.2562, "step": 13010 }, { "epoch": 0.54, "grad_norm": 0.60546875, "learning_rate": 0.0004996074309603971, "loss": 0.2624, "step": 13020 }, { "epoch": 0.54, "grad_norm": 0.66796875, "learning_rate": 0.0004996068231926092, "loss": 0.2411, "step": 13030 }, { "epoch": 0.54, "grad_norm": 1.546875, "learning_rate": 0.0004996062149550881, "loss": 0.237, "step": 13040 }, { "epoch": 0.54, "grad_norm": 1.6875, "learning_rate": 0.000499605606247835, "loss": 0.2883, "step": 13050 }, { "epoch": 0.54, "grad_norm": 0.8359375, "learning_rate": 0.0004996049970708512, "loss": 0.3037, "step": 13060 }, { "epoch": 0.54, "grad_norm": 1.5078125, "learning_rate": 0.0004996043874241378, "loss": 0.2306, "step": 13070 }, { "epoch": 0.54, "grad_norm": 0.51953125, "learning_rate": 0.0004996037773076957, "loss": 0.2694, "step": 13080 }, { "epoch": 0.54, "grad_norm": 0.333984375, "learning_rate": 0.0004996031667215264, "loss": 0.1821, "step": 13090 }, { "epoch": 0.54, "grad_norm": 0.86328125, "learning_rate": 0.0004996025556656308, "loss": 0.2322, "step": 13100 }, { "epoch": 0.54, "grad_norm": 0.212890625, "learning_rate": 0.0004996019441400102, "loss": 0.2405, "step": 13110 }, { "epoch": 0.54, "grad_norm": 1.1640625, "learning_rate": 0.0004996013321446657, "loss": 0.254, "step": 13120 }, { "epoch": 0.54, "grad_norm": 0.58203125, "learning_rate": 0.0004996007196795986, "loss": 0.2997, "step": 13130 }, { "epoch": 0.54, "grad_norm": 0.70703125, "learning_rate": 0.0004996001067448096, "loss": 0.2703, "step": 13140 }, { "epoch": 0.54, "grad_norm": 0.375, "learning_rate": 0.0004995994933403003, "loss": 0.2569, "step": 13150 }, { "epoch": 0.55, "grad_norm": 1.171875, "learning_rate": 0.0004995988794660718, "loss": 0.2327, "step": 13160 }, { "epoch": 0.55, "grad_norm": 0.6953125, "learning_rate": 0.0004995982651221251, "loss": 0.2467, "step": 13170 }, { "epoch": 0.55, "grad_norm": 0.77734375, "learning_rate": 0.0004995976503084612, "loss": 0.1784, "step": 13180 }, { "epoch": 0.55, "grad_norm": 0.8359375, "learning_rate": 0.0004995970350250819, "loss": 0.2317, "step": 13190 }, { "epoch": 0.55, "grad_norm": 1.1484375, "learning_rate": 0.0004995964192719876, "loss": 0.2616, "step": 13200 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.0004995958030491798, "loss": 0.1955, "step": 13210 }, { "epoch": 0.55, "grad_norm": 1.1796875, "learning_rate": 0.0004995951863566598, "loss": 0.215, "step": 13220 }, { "epoch": 0.55, "grad_norm": 0.359375, "learning_rate": 0.0004995945691944286, "loss": 0.26, "step": 13230 }, { "epoch": 0.55, "grad_norm": 1.1015625, "learning_rate": 0.0004995939515624873, "loss": 0.2212, "step": 13240 }, { "epoch": 0.55, "grad_norm": 2.515625, "learning_rate": 0.0004995933334608372, "loss": 0.2671, "step": 13250 }, { "epoch": 0.55, "grad_norm": 0.498046875, "learning_rate": 0.0004995927148894793, "loss": 0.226, "step": 13260 }, { "epoch": 0.55, "grad_norm": 0.625, "learning_rate": 0.000499592095848415, "loss": 0.2084, "step": 13270 }, { "epoch": 0.55, "grad_norm": 1.171875, "learning_rate": 0.0004995914763376452, "loss": 0.2098, "step": 13280 }, { "epoch": 0.55, "grad_norm": 0.63671875, "learning_rate": 0.0004995908563571713, "loss": 0.2819, "step": 13290 }, { "epoch": 0.55, "grad_norm": 0.51953125, "learning_rate": 0.0004995902359069943, "loss": 0.2602, "step": 13300 }, { "epoch": 0.55, "grad_norm": 0.69140625, "learning_rate": 0.0004995896149871154, "loss": 0.2179, "step": 13310 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 0.0004995889935975359, "loss": 0.2343, "step": 13320 }, { "epoch": 0.55, "grad_norm": 0.87109375, "learning_rate": 0.0004995883717382567, "loss": 0.1691, "step": 13330 }, { "epoch": 0.55, "grad_norm": 0.8984375, "learning_rate": 0.0004995877494092793, "loss": 0.3114, "step": 13340 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 0.0004995871266106047, "loss": 0.2224, "step": 13350 }, { "epoch": 0.55, "grad_norm": 0.72265625, "learning_rate": 0.0004995865033422341, "loss": 0.2726, "step": 13360 }, { "epoch": 0.55, "grad_norm": 0.609375, "learning_rate": 0.0004995858796041686, "loss": 0.2317, "step": 13370 }, { "epoch": 0.55, "grad_norm": 0.66015625, "learning_rate": 0.0004995852553964094, "loss": 0.2744, "step": 13380 }, { "epoch": 0.55, "grad_norm": 0.421875, "learning_rate": 0.0004995846307189577, "loss": 0.2335, "step": 13390 }, { "epoch": 0.56, "grad_norm": 0.81640625, "learning_rate": 0.0004995840055718147, "loss": 0.2615, "step": 13400 }, { "epoch": 0.56, "grad_norm": 0.5, "learning_rate": 0.0004995833799549816, "loss": 0.2339, "step": 13410 }, { "epoch": 0.56, "grad_norm": 0.79296875, "learning_rate": 0.0004995827538684595, "loss": 0.2743, "step": 13420 }, { "epoch": 0.56, "grad_norm": 0.8984375, "learning_rate": 0.0004995821273122495, "loss": 0.2834, "step": 13430 }, { "epoch": 0.56, "grad_norm": 0.47265625, "learning_rate": 0.0004995815002863531, "loss": 0.1678, "step": 13440 }, { "epoch": 0.56, "grad_norm": 1.515625, "learning_rate": 0.0004995808727907711, "loss": 0.2685, "step": 13450 }, { "epoch": 0.56, "grad_norm": 0.7421875, "learning_rate": 0.0004995802448255049, "loss": 0.2079, "step": 13460 }, { "epoch": 0.56, "grad_norm": 1.71875, "learning_rate": 0.0004995796163905557, "loss": 0.1969, "step": 13470 }, { "epoch": 0.56, "grad_norm": 0.53515625, "learning_rate": 0.0004995789874859245, "loss": 0.2641, "step": 13480 }, { "epoch": 0.56, "grad_norm": 1.015625, "learning_rate": 0.0004995783581116127, "loss": 0.2001, "step": 13490 }, { "epoch": 0.56, "grad_norm": 0.62109375, "learning_rate": 0.0004995777282676213, "loss": 0.2241, "step": 13500 }, { "epoch": 0.56, "grad_norm": 0.62890625, "learning_rate": 0.0004995770979539516, "loss": 0.3077, "step": 13510 }, { "epoch": 0.56, "grad_norm": 0.74609375, "learning_rate": 0.0004995764671706048, "loss": 0.2396, "step": 13520 }, { "epoch": 0.56, "grad_norm": 0.388671875, "learning_rate": 0.0004995758359175819, "loss": 0.2242, "step": 13530 }, { "epoch": 0.56, "grad_norm": 0.291015625, "learning_rate": 0.0004995752041948843, "loss": 0.255, "step": 13540 }, { "epoch": 0.56, "grad_norm": 1.046875, "learning_rate": 0.0004995745720025132, "loss": 0.2285, "step": 13550 }, { "epoch": 0.56, "grad_norm": 0.61328125, "learning_rate": 0.0004995739393404697, "loss": 0.2632, "step": 13560 }, { "epoch": 0.56, "grad_norm": 1.25, "learning_rate": 0.0004995733062087549, "loss": 0.3018, "step": 13570 }, { "epoch": 0.56, "grad_norm": 1.1328125, "learning_rate": 0.0004995726726073701, "loss": 0.2273, "step": 13580 }, { "epoch": 0.56, "grad_norm": 0.75, "learning_rate": 0.0004995720385363165, "loss": 0.1924, "step": 13590 }, { "epoch": 0.56, "grad_norm": 0.4453125, "learning_rate": 0.0004995714039955953, "loss": 0.2614, "step": 13600 }, { "epoch": 0.56, "grad_norm": 1.4765625, "learning_rate": 0.0004995707689852077, "loss": 0.2057, "step": 13610 }, { "epoch": 0.56, "grad_norm": 1.2265625, "learning_rate": 0.0004995701335051548, "loss": 0.2785, "step": 13620 }, { "epoch": 0.56, "grad_norm": 0.93359375, "learning_rate": 0.0004995694975554379, "loss": 0.2519, "step": 13630 }, { "epoch": 0.56, "grad_norm": 0.5546875, "learning_rate": 0.0004995688611360581, "loss": 0.3119, "step": 13640 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 0.0004995682242470167, "loss": 0.219, "step": 13650 }, { "epoch": 0.57, "grad_norm": 0.640625, "learning_rate": 0.0004995675868883149, "loss": 0.2399, "step": 13660 }, { "epoch": 0.57, "grad_norm": 1.546875, "learning_rate": 0.0004995669490599538, "loss": 0.2815, "step": 13670 }, { "epoch": 0.57, "grad_norm": 0.62890625, "learning_rate": 0.0004995663107619346, "loss": 0.2547, "step": 13680 }, { "epoch": 0.57, "grad_norm": 1.109375, "learning_rate": 0.0004995656719942586, "loss": 0.2507, "step": 13690 }, { "epoch": 0.57, "grad_norm": 0.5078125, "learning_rate": 0.000499565032756927, "loss": 0.2775, "step": 13700 }, { "epoch": 0.57, "grad_norm": 1.0703125, "learning_rate": 0.000499564393049941, "loss": 0.2631, "step": 13710 }, { "epoch": 0.57, "grad_norm": 1.09375, "learning_rate": 0.0004995637528733015, "loss": 0.2436, "step": 13720 }, { "epoch": 0.57, "grad_norm": 0.392578125, "learning_rate": 0.0004995631122270102, "loss": 0.1846, "step": 13730 }, { "epoch": 0.57, "grad_norm": 0.6796875, "learning_rate": 0.0004995624711110681, "loss": 0.2283, "step": 13740 }, { "epoch": 0.57, "grad_norm": 0.734375, "learning_rate": 0.0004995618295254763, "loss": 0.2332, "step": 13750 }, { "epoch": 0.57, "grad_norm": 0.6640625, "learning_rate": 0.0004995611874702361, "loss": 0.2093, "step": 13760 }, { "epoch": 0.57, "grad_norm": 1.1171875, "learning_rate": 0.0004995605449453486, "loss": 0.2575, "step": 13770 }, { "epoch": 0.57, "grad_norm": 1.0859375, "learning_rate": 0.0004995599019508153, "loss": 0.2676, "step": 13780 }, { "epoch": 0.57, "grad_norm": 1.6015625, "learning_rate": 0.000499559258486637, "loss": 0.2634, "step": 13790 }, { "epoch": 0.57, "grad_norm": 0.322265625, "learning_rate": 0.0004995586145528153, "loss": 0.2509, "step": 13800 }, { "epoch": 0.57, "grad_norm": 1.5390625, "learning_rate": 0.0004995579701493511, "loss": 0.2346, "step": 13810 }, { "epoch": 0.57, "grad_norm": 0.65234375, "learning_rate": 0.0004995573252762459, "loss": 0.2452, "step": 13820 }, { "epoch": 0.57, "grad_norm": 0.5, "learning_rate": 0.0004995566799335006, "loss": 0.2592, "step": 13830 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 0.0004995560341211167, "loss": 0.2139, "step": 13840 }, { "epoch": 0.57, "grad_norm": 0.44921875, "learning_rate": 0.0004995553878390953, "loss": 0.3036, "step": 13850 }, { "epoch": 0.57, "grad_norm": 1.3828125, "learning_rate": 0.0004995547410874375, "loss": 0.2706, "step": 13860 }, { "epoch": 0.57, "grad_norm": 1.3125, "learning_rate": 0.0004995540938661447, "loss": 0.2251, "step": 13870 }, { "epoch": 0.57, "grad_norm": 0.310546875, "learning_rate": 0.0004995534461752181, "loss": 0.2388, "step": 13880 }, { "epoch": 0.58, "grad_norm": 0.486328125, "learning_rate": 0.0004995527980146588, "loss": 0.2894, "step": 13890 }, { "epoch": 0.58, "grad_norm": 0.609375, "learning_rate": 0.0004995521493844681, "loss": 0.2616, "step": 13900 }, { "epoch": 0.58, "grad_norm": 0.36328125, "learning_rate": 0.0004995515002846471, "loss": 0.2312, "step": 13910 }, { "epoch": 0.58, "grad_norm": 1.09375, "learning_rate": 0.0004995508507151973, "loss": 0.25, "step": 13920 }, { "epoch": 0.58, "grad_norm": 0.93359375, "learning_rate": 0.0004995502006761197, "loss": 0.2461, "step": 13930 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 0.0004995495501674155, "loss": 0.2224, "step": 13940 }, { "epoch": 0.58, "grad_norm": 0.75, "learning_rate": 0.000499548899189086, "loss": 0.2569, "step": 13950 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 0.0004995482477411325, "loss": 0.2508, "step": 13960 }, { "epoch": 0.58, "grad_norm": 0.75390625, "learning_rate": 0.0004995475958235561, "loss": 0.2307, "step": 13970 }, { "epoch": 0.58, "grad_norm": 0.5078125, "learning_rate": 0.000499546943436358, "loss": 0.2748, "step": 13980 }, { "epoch": 0.58, "grad_norm": 0.65625, "learning_rate": 0.0004995462905795396, "loss": 0.2595, "step": 13990 }, { "epoch": 0.58, "grad_norm": 0.30078125, "learning_rate": 0.000499545637253102, "loss": 0.2121, "step": 14000 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 0.0004995449834570465, "loss": 0.3073, "step": 14010 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 0.0004995443291913742, "loss": 0.259, "step": 14020 }, { "epoch": 0.58, "grad_norm": 0.734375, "learning_rate": 0.0004995436744560865, "loss": 0.2233, "step": 14030 }, { "epoch": 0.58, "grad_norm": 0.546875, "learning_rate": 0.0004995430192511845, "loss": 0.2296, "step": 14040 }, { "epoch": 0.58, "grad_norm": 1.8046875, "learning_rate": 0.0004995423635766696, "loss": 0.2673, "step": 14050 }, { "epoch": 0.58, "grad_norm": 0.734375, "learning_rate": 0.0004995417074325428, "loss": 0.2613, "step": 14060 }, { "epoch": 0.58, "grad_norm": 0.431640625, "learning_rate": 0.0004995410508188054, "loss": 0.302, "step": 14070 }, { "epoch": 0.58, "grad_norm": 1.4765625, "learning_rate": 0.0004995403937354588, "loss": 0.2151, "step": 14080 }, { "epoch": 0.58, "grad_norm": 0.81640625, "learning_rate": 0.0004995397361825042, "loss": 0.2904, "step": 14090 }, { "epoch": 0.58, "grad_norm": 0.427734375, "learning_rate": 0.0004995390781599426, "loss": 0.2919, "step": 14100 }, { "epoch": 0.58, "grad_norm": 0.5, "learning_rate": 0.0004995384196677755, "loss": 0.2536, "step": 14110 }, { "epoch": 0.58, "grad_norm": 0.494140625, "learning_rate": 0.000499537760706004, "loss": 0.2561, "step": 14120 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 0.0004995371012746294, "loss": 0.2942, "step": 14130 }, { "epoch": 0.59, "grad_norm": 0.80078125, "learning_rate": 0.0004995364413736529, "loss": 0.2373, "step": 14140 }, { "epoch": 0.59, "grad_norm": 0.5546875, "learning_rate": 0.0004995357810030757, "loss": 0.2727, "step": 14150 }, { "epoch": 0.59, "grad_norm": 0.32421875, "learning_rate": 0.0004995351201628992, "loss": 0.232, "step": 14160 }, { "epoch": 0.59, "grad_norm": 1.359375, "learning_rate": 0.0004995344588531246, "loss": 0.2614, "step": 14170 }, { "epoch": 0.59, "grad_norm": 1.78125, "learning_rate": 0.0004995337970737531, "loss": 0.2045, "step": 14180 }, { "epoch": 0.59, "grad_norm": 0.65234375, "learning_rate": 0.0004995331348247858, "loss": 0.284, "step": 14190 }, { "epoch": 0.59, "grad_norm": 0.55078125, "learning_rate": 0.0004995324721062242, "loss": 0.246, "step": 14200 }, { "epoch": 0.59, "grad_norm": 0.67578125, "learning_rate": 0.0004995318089180694, "loss": 0.2755, "step": 14210 }, { "epoch": 0.59, "grad_norm": 0.62890625, "learning_rate": 0.0004995311452603226, "loss": 0.2174, "step": 14220 }, { "epoch": 0.59, "grad_norm": 1.8828125, "learning_rate": 0.0004995304811329853, "loss": 0.2605, "step": 14230 }, { "epoch": 0.59, "grad_norm": 0.64453125, "learning_rate": 0.0004995298165360585, "loss": 0.2819, "step": 14240 }, { "epoch": 0.59, "grad_norm": 0.98046875, "learning_rate": 0.0004995291514695435, "loss": 0.3071, "step": 14250 }, { "epoch": 0.59, "grad_norm": 0.384765625, "learning_rate": 0.0004995284859334417, "loss": 0.2543, "step": 14260 }, { "epoch": 0.59, "grad_norm": 3.75, "learning_rate": 0.0004995278199277541, "loss": 0.2567, "step": 14270 }, { "epoch": 0.59, "grad_norm": 0.39453125, "learning_rate": 0.0004995271534524821, "loss": 0.2601, "step": 14280 }, { "epoch": 0.59, "grad_norm": 0.75, "learning_rate": 0.000499526486507627, "loss": 0.2741, "step": 14290 }, { "epoch": 0.59, "grad_norm": 0.515625, "learning_rate": 0.00049952581909319, "loss": 0.2149, "step": 14300 }, { "epoch": 0.59, "grad_norm": 1.3125, "learning_rate": 0.0004995251512091723, "loss": 0.247, "step": 14310 }, { "epoch": 0.59, "grad_norm": 0.91015625, "learning_rate": 0.0004995244828555753, "loss": 0.2407, "step": 14320 }, { "epoch": 0.59, "grad_norm": 0.66796875, "learning_rate": 0.0004995238140324001, "loss": 0.2207, "step": 14330 }, { "epoch": 0.59, "grad_norm": 0.703125, "learning_rate": 0.000499523144739648, "loss": 0.2493, "step": 14340 }, { "epoch": 0.59, "grad_norm": 0.765625, "learning_rate": 0.0004995224749773204, "loss": 0.3104, "step": 14350 }, { "epoch": 0.59, "grad_norm": 0.314453125, "learning_rate": 0.0004995218047454183, "loss": 0.2288, "step": 14360 }, { "epoch": 0.6, "grad_norm": 0.6328125, "learning_rate": 0.0004995211340439432, "loss": 0.2732, "step": 14370 }, { "epoch": 0.6, "grad_norm": 0.77734375, "learning_rate": 0.0004995204628728963, "loss": 0.2424, "step": 14380 }, { "epoch": 0.6, "grad_norm": 0.98046875, "learning_rate": 0.0004995197912322787, "loss": 0.2151, "step": 14390 }, { "epoch": 0.6, "grad_norm": 0.703125, "learning_rate": 0.000499519119122092, "loss": 0.2538, "step": 14400 }, { "epoch": 0.6, "grad_norm": 0.98046875, "learning_rate": 0.0004995184465423372, "loss": 0.2513, "step": 14410 }, { "epoch": 0.6, "grad_norm": 1.265625, "learning_rate": 0.0004995177734930155, "loss": 0.2593, "step": 14420 }, { "epoch": 0.6, "grad_norm": 0.291015625, "learning_rate": 0.0004995170999741285, "loss": 0.2505, "step": 14430 }, { "epoch": 0.6, "grad_norm": 0.4765625, "learning_rate": 0.000499516425985677, "loss": 0.2398, "step": 14440 }, { "epoch": 0.6, "grad_norm": 0.8125, "learning_rate": 0.0004995157515276627, "loss": 0.3041, "step": 14450 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 0.0004995150766000867, "loss": 0.2713, "step": 14460 }, { "epoch": 0.6, "grad_norm": 0.486328125, "learning_rate": 0.0004995144012029503, "loss": 0.2588, "step": 14470 }, { "epoch": 0.6, "grad_norm": 0.79296875, "learning_rate": 0.0004995137253362546, "loss": 0.2697, "step": 14480 }, { "epoch": 0.6, "grad_norm": 0.65234375, "learning_rate": 0.0004995130490000011, "loss": 0.2235, "step": 14490 }, { "epoch": 0.6, "grad_norm": 0.490234375, "learning_rate": 0.0004995123721941911, "loss": 0.1984, "step": 14500 }, { "epoch": 0.6, "grad_norm": 0.8046875, "learning_rate": 0.0004995116949188256, "loss": 0.2747, "step": 14510 }, { "epoch": 0.6, "grad_norm": 0.6484375, "learning_rate": 0.0004995110171739061, "loss": 0.2638, "step": 14520 }, { "epoch": 0.6, "grad_norm": 0.50390625, "learning_rate": 0.0004995103389594339, "loss": 0.2858, "step": 14530 }, { "epoch": 0.6, "grad_norm": 0.6875, "learning_rate": 0.00049950966027541, "loss": 0.2744, "step": 14540 }, { "epoch": 0.6, "grad_norm": 0.625, "learning_rate": 0.000499508981121836, "loss": 0.2562, "step": 14550 }, { "epoch": 0.6, "grad_norm": 0.796875, "learning_rate": 0.000499508301498713, "loss": 0.2537, "step": 14560 }, { "epoch": 0.6, "grad_norm": 0.94140625, "learning_rate": 0.0004995076214060422, "loss": 0.2035, "step": 14570 }, { "epoch": 0.6, "grad_norm": 0.5703125, "learning_rate": 0.0004995069408438252, "loss": 0.2859, "step": 14580 }, { "epoch": 0.6, "grad_norm": 0.443359375, "learning_rate": 0.000499506259812063, "loss": 0.2544, "step": 14590 }, { "epoch": 0.6, "grad_norm": 1.015625, "learning_rate": 0.0004995055783107569, "loss": 0.2407, "step": 14600 }, { "epoch": 0.61, "grad_norm": 0.58203125, "learning_rate": 0.0004995048963399083, "loss": 0.2195, "step": 14610 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 0.0004995042138995185, "loss": 0.2912, "step": 14620 }, { "epoch": 0.61, "grad_norm": 0.376953125, "learning_rate": 0.0004995035309895887, "loss": 0.2738, "step": 14630 }, { "epoch": 0.61, "grad_norm": 0.2890625, "learning_rate": 0.0004995028476101201, "loss": 0.2364, "step": 14640 }, { "epoch": 0.61, "grad_norm": 0.8125, "learning_rate": 0.0004995021637611141, "loss": 0.2355, "step": 14650 }, { "epoch": 0.61, "grad_norm": 0.240234375, "learning_rate": 0.0004995014794425721, "loss": 0.1927, "step": 14660 }, { "epoch": 0.61, "grad_norm": 0.703125, "learning_rate": 0.0004995007946544951, "loss": 0.2553, "step": 14670 }, { "epoch": 0.61, "grad_norm": 0.7265625, "learning_rate": 0.0004995001093968846, "loss": 0.2474, "step": 14680 }, { "epoch": 0.61, "grad_norm": 0.7890625, "learning_rate": 0.0004994994236697419, "loss": 0.2573, "step": 14690 }, { "epoch": 0.61, "grad_norm": 0.29296875, "learning_rate": 0.0004994987374730682, "loss": 0.2263, "step": 14700 }, { "epoch": 0.61, "grad_norm": 0.875, "learning_rate": 0.0004994980508068647, "loss": 0.2805, "step": 14710 }, { "epoch": 0.61, "grad_norm": 0.828125, "learning_rate": 0.0004994973636711329, "loss": 0.2899, "step": 14720 }, { "epoch": 0.61, "grad_norm": 0.625, "learning_rate": 0.0004994966760658741, "loss": 0.2508, "step": 14730 }, { "epoch": 0.61, "grad_norm": 0.423828125, "learning_rate": 0.0004994959879910894, "loss": 0.2679, "step": 14740 }, { "epoch": 0.61, "grad_norm": 0.6796875, "learning_rate": 0.0004994952994467802, "loss": 0.2845, "step": 14750 }, { "epoch": 0.61, "grad_norm": 0.44140625, "learning_rate": 0.0004994946104329479, "loss": 0.2588, "step": 14760 }, { "epoch": 0.61, "grad_norm": 0.50390625, "learning_rate": 0.0004994939209495934, "loss": 0.2413, "step": 14770 }, { "epoch": 0.61, "grad_norm": 0.333984375, "learning_rate": 0.0004994932309967185, "loss": 0.2333, "step": 14780 }, { "epoch": 0.61, "grad_norm": 0.474609375, "learning_rate": 0.0004994925405743243, "loss": 0.2576, "step": 14790 }, { "epoch": 0.61, "grad_norm": 0.2734375, "learning_rate": 0.000499491849682412, "loss": 0.211, "step": 14800 }, { "epoch": 0.61, "grad_norm": 0.470703125, "learning_rate": 0.000499491158320983, "loss": 0.265, "step": 14810 }, { "epoch": 0.61, "grad_norm": 0.1845703125, "learning_rate": 0.0004994904664900387, "loss": 0.2789, "step": 14820 }, { "epoch": 0.61, "grad_norm": 0.2734375, "learning_rate": 0.00049948977418958, "loss": 0.2975, "step": 14830 }, { "epoch": 0.61, "grad_norm": 0.1416015625, "learning_rate": 0.0004994890814196088, "loss": 0.2559, "step": 14840 }, { "epoch": 0.62, "grad_norm": 0.671875, "learning_rate": 0.000499488388180126, "loss": 0.2279, "step": 14850 }, { "epoch": 0.62, "grad_norm": 0.478515625, "learning_rate": 0.0004994876944711329, "loss": 0.2602, "step": 14860 }, { "epoch": 0.62, "grad_norm": 0.453125, "learning_rate": 0.0004994870002926309, "loss": 0.2448, "step": 14870 }, { "epoch": 0.62, "grad_norm": 0.61328125, "learning_rate": 0.0004994863056446214, "loss": 0.2266, "step": 14880 }, { "epoch": 0.62, "grad_norm": 0.36328125, "learning_rate": 0.0004994856105271056, "loss": 0.214, "step": 14890 }, { "epoch": 0.62, "grad_norm": 0.462890625, "learning_rate": 0.0004994849149400848, "loss": 0.2992, "step": 14900 }, { "epoch": 0.62, "grad_norm": 0.8515625, "learning_rate": 0.0004994842188835603, "loss": 0.219, "step": 14910 }, { "epoch": 0.62, "grad_norm": 0.482421875, "learning_rate": 0.0004994835223575335, "loss": 0.227, "step": 14920 }, { "epoch": 0.62, "grad_norm": 0.380859375, "learning_rate": 0.0004994828253620056, "loss": 0.1893, "step": 14930 }, { "epoch": 0.62, "grad_norm": 0.44921875, "learning_rate": 0.000499482127896978, "loss": 0.2657, "step": 14940 }, { "epoch": 0.62, "grad_norm": 0.439453125, "learning_rate": 0.000499481429962452, "loss": 0.2463, "step": 14950 }, { "epoch": 0.62, "grad_norm": 0.68359375, "learning_rate": 0.0004994807315584289, "loss": 0.2566, "step": 14960 }, { "epoch": 0.62, "grad_norm": 0.765625, "learning_rate": 0.00049948003268491, "loss": 0.2261, "step": 14970 }, { "epoch": 0.62, "grad_norm": 1.0390625, "learning_rate": 0.0004994793333418966, "loss": 0.2238, "step": 14980 }, { "epoch": 0.62, "grad_norm": 0.5625, "learning_rate": 0.0004994786335293899, "loss": 0.2283, "step": 14990 }, { "epoch": 0.62, "grad_norm": 0.49609375, "learning_rate": 0.0004994779332473916, "loss": 0.2071, "step": 15000 }, { "epoch": 0.62, "grad_norm": 0.3359375, "learning_rate": 0.0004994772324959026, "loss": 0.2802, "step": 15010 }, { "epoch": 0.62, "grad_norm": 0.6015625, "learning_rate": 0.0004994765312749244, "loss": 0.2558, "step": 15020 }, { "epoch": 0.62, "grad_norm": 0.337890625, "learning_rate": 0.0004994758295844584, "loss": 0.2589, "step": 15030 }, { "epoch": 0.62, "grad_norm": 1.5859375, "learning_rate": 0.0004994751274245057, "loss": 0.2529, "step": 15040 }, { "epoch": 0.62, "grad_norm": 0.87109375, "learning_rate": 0.0004994744247950679, "loss": 0.239, "step": 15050 }, { "epoch": 0.62, "grad_norm": 0.6796875, "learning_rate": 0.000499473721696146, "loss": 0.239, "step": 15060 }, { "epoch": 0.62, "grad_norm": 0.74609375, "learning_rate": 0.0004994730181277417, "loss": 0.2289, "step": 15070 }, { "epoch": 0.62, "grad_norm": 0.87109375, "learning_rate": 0.000499472314089856, "loss": 0.2264, "step": 15080 }, { "epoch": 0.63, "grad_norm": 0.56640625, "learning_rate": 0.0004994716095824903, "loss": 0.2753, "step": 15090 }, { "epoch": 0.63, "grad_norm": 0.6640625, "learning_rate": 0.000499470904605646, "loss": 0.28, "step": 15100 }, { "epoch": 0.63, "grad_norm": 1.3671875, "learning_rate": 0.0004994701991593245, "loss": 0.1957, "step": 15110 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 0.000499469493243527, "loss": 0.2011, "step": 15120 }, { "epoch": 0.63, "grad_norm": 0.41796875, "learning_rate": 0.0004994687868582549, "loss": 0.2223, "step": 15130 }, { "epoch": 0.63, "grad_norm": 0.6953125, "learning_rate": 0.0004994680800035093, "loss": 0.2521, "step": 15140 }, { "epoch": 0.63, "grad_norm": 0.2470703125, "learning_rate": 0.0004994673726792918, "loss": 0.2931, "step": 15150 }, { "epoch": 0.63, "grad_norm": 0.3125, "learning_rate": 0.0004994666648856037, "loss": 0.2298, "step": 15160 }, { "epoch": 0.63, "grad_norm": 0.443359375, "learning_rate": 0.0004994659566224462, "loss": 0.2806, "step": 15170 }, { "epoch": 0.63, "grad_norm": 0.63671875, "learning_rate": 0.0004994652478898207, "loss": 0.1948, "step": 15180 }, { "epoch": 0.63, "grad_norm": 0.7578125, "learning_rate": 0.0004994645386877286, "loss": 0.2482, "step": 15190 }, { "epoch": 0.63, "grad_norm": 1.1328125, "learning_rate": 0.0004994638290161712, "loss": 0.2693, "step": 15200 }, { "epoch": 0.63, "grad_norm": 0.51953125, "learning_rate": 0.0004994631188751498, "loss": 0.2988, "step": 15210 }, { "epoch": 0.63, "grad_norm": 0.62109375, "learning_rate": 0.0004994624082646657, "loss": 0.2183, "step": 15220 }, { "epoch": 0.63, "grad_norm": 0.79296875, "learning_rate": 0.0004994616971847202, "loss": 0.2744, "step": 15230 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0004994609856353149, "loss": 0.2219, "step": 15240 }, { "epoch": 0.63, "grad_norm": 0.85546875, "learning_rate": 0.0004994602736164509, "loss": 0.2668, "step": 15250 }, { "epoch": 0.63, "grad_norm": 1.890625, "learning_rate": 0.0004994595611281296, "loss": 0.2662, "step": 15260 }, { "epoch": 0.63, "grad_norm": 0.8046875, "learning_rate": 0.0004994588481703524, "loss": 0.2058, "step": 15270 }, { "epoch": 0.63, "grad_norm": 0.341796875, "learning_rate": 0.0004994581347431206, "loss": 0.2371, "step": 15280 }, { "epoch": 0.63, "grad_norm": 0.59375, "learning_rate": 0.0004994574208464353, "loss": 0.2333, "step": 15290 }, { "epoch": 0.63, "grad_norm": 0.90625, "learning_rate": 0.0004994567064802983, "loss": 0.3015, "step": 15300 }, { "epoch": 0.63, "grad_norm": 1.0859375, "learning_rate": 0.0004994559916447107, "loss": 0.1547, "step": 15310 }, { "epoch": 0.63, "grad_norm": 0.5625, "learning_rate": 0.0004994552763396738, "loss": 0.2772, "step": 15320 }, { "epoch": 0.63, "grad_norm": 0.294921875, "learning_rate": 0.000499454560565189, "loss": 0.2598, "step": 15330 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 0.0004994538443212577, "loss": 0.2779, "step": 15340 }, { "epoch": 0.64, "grad_norm": 1.1015625, "learning_rate": 0.0004994531276078811, "loss": 0.2526, "step": 15350 }, { "epoch": 0.64, "grad_norm": 1.5546875, "learning_rate": 0.0004994524104250609, "loss": 0.2667, "step": 15360 }, { "epoch": 0.64, "grad_norm": 0.032958984375, "learning_rate": 0.000499451692772798, "loss": 0.23, "step": 15370 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 0.000499450974651094, "loss": 0.224, "step": 15380 }, { "epoch": 0.64, "grad_norm": 0.578125, "learning_rate": 0.0004994502560599502, "loss": 0.2363, "step": 15390 }, { "epoch": 0.64, "grad_norm": 0.82421875, "learning_rate": 0.0004994495369993679, "loss": 0.259, "step": 15400 }, { "epoch": 0.64, "grad_norm": 0.58984375, "learning_rate": 0.0004994488174693486, "loss": 0.3012, "step": 15410 }, { "epoch": 0.64, "grad_norm": 0.83984375, "learning_rate": 0.0004994480974698935, "loss": 0.3022, "step": 15420 }, { "epoch": 0.64, "grad_norm": 1.6015625, "learning_rate": 0.0004994473770010041, "loss": 0.2567, "step": 15430 }, { "epoch": 0.64, "grad_norm": 0.455078125, "learning_rate": 0.0004994466560626816, "loss": 0.2118, "step": 15440 }, { "epoch": 0.64, "grad_norm": 0.298828125, "learning_rate": 0.0004994459346549275, "loss": 0.2449, "step": 15450 }, { "epoch": 0.64, "grad_norm": 0.76953125, "learning_rate": 0.0004994452127777431, "loss": 0.2002, "step": 15460 }, { "epoch": 0.64, "grad_norm": 1.4609375, "learning_rate": 0.0004994444904311296, "loss": 0.2414, "step": 15470 }, { "epoch": 0.64, "grad_norm": 0.61328125, "learning_rate": 0.0004994437676150886, "loss": 0.2411, "step": 15480 }, { "epoch": 0.64, "grad_norm": 0.8984375, "learning_rate": 0.0004994430443296214, "loss": 0.277, "step": 15490 }, { "epoch": 0.64, "grad_norm": 1.4921875, "learning_rate": 0.0004994423205747293, "loss": 0.2178, "step": 15500 }, { "epoch": 0.64, "grad_norm": 0.703125, "learning_rate": 0.0004994415963504136, "loss": 0.2688, "step": 15510 }, { "epoch": 0.64, "grad_norm": 1.0703125, "learning_rate": 0.0004994408716566758, "loss": 0.2433, "step": 15520 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 0.0004994401464935173, "loss": 0.2357, "step": 15530 }, { "epoch": 0.64, "grad_norm": 1.03125, "learning_rate": 0.0004994394208609394, "loss": 0.2743, "step": 15540 }, { "epoch": 0.64, "grad_norm": 0.95703125, "learning_rate": 0.0004994386947589434, "loss": 0.2447, "step": 15550 }, { "epoch": 0.64, "grad_norm": 0.84765625, "learning_rate": 0.0004994379681875306, "loss": 0.1985, "step": 15560 }, { "epoch": 0.64, "grad_norm": 0.70703125, "learning_rate": 0.0004994372411467027, "loss": 0.233, "step": 15570 }, { "epoch": 0.65, "grad_norm": 0.640625, "learning_rate": 0.0004994365136364606, "loss": 0.2202, "step": 15580 }, { "epoch": 0.65, "grad_norm": 0.490234375, "learning_rate": 0.0004994357856568061, "loss": 0.2524, "step": 15590 }, { "epoch": 0.65, "grad_norm": 0.462890625, "learning_rate": 0.0004994350572077403, "loss": 0.2551, "step": 15600 }, { "epoch": 0.65, "grad_norm": 1.0390625, "learning_rate": 0.0004994343282892647, "loss": 0.3086, "step": 15610 }, { "epoch": 0.65, "grad_norm": 0.70703125, "learning_rate": 0.0004994335989013806, "loss": 0.2364, "step": 15620 }, { "epoch": 0.65, "grad_norm": 0.0, "learning_rate": 0.0004994328690440895, "loss": 0.2736, "step": 15630 }, { "epoch": 0.65, "grad_norm": 1.0390625, "learning_rate": 0.0004994321387173926, "loss": 0.1977, "step": 15640 }, { "epoch": 0.65, "grad_norm": 2.671875, "learning_rate": 0.0004994314079212913, "loss": 0.2901, "step": 15650 }, { "epoch": 0.65, "grad_norm": 0.494140625, "learning_rate": 0.0004994306766557871, "loss": 0.1955, "step": 15660 }, { "epoch": 0.65, "grad_norm": 0.47265625, "learning_rate": 0.0004994299449208813, "loss": 0.2311, "step": 15670 }, { "epoch": 0.65, "grad_norm": 0.640625, "learning_rate": 0.0004994292127165753, "loss": 0.2373, "step": 15680 }, { "epoch": 0.65, "grad_norm": 0.86328125, "learning_rate": 0.0004994284800428704, "loss": 0.1971, "step": 15690 }, { "epoch": 0.65, "grad_norm": 1.359375, "learning_rate": 0.000499427746899768, "loss": 0.2061, "step": 15700 }, { "epoch": 0.65, "grad_norm": 0.84375, "learning_rate": 0.0004994270132872696, "loss": 0.2498, "step": 15710 }, { "epoch": 0.65, "grad_norm": 0.5078125, "learning_rate": 0.0004994262792053765, "loss": 0.1932, "step": 15720 }, { "epoch": 0.65, "grad_norm": 0.439453125, "learning_rate": 0.0004994255446540899, "loss": 0.2652, "step": 15730 }, { "epoch": 0.65, "grad_norm": 0.373046875, "learning_rate": 0.0004994248096334117, "loss": 0.2972, "step": 15740 }, { "epoch": 0.65, "grad_norm": 0.87890625, "learning_rate": 0.0004994240741433427, "loss": 0.239, "step": 15750 }, { "epoch": 0.65, "grad_norm": 0.42578125, "learning_rate": 0.0004994233381838846, "loss": 0.3088, "step": 15760 }, { "epoch": 0.65, "grad_norm": 0.515625, "learning_rate": 0.0004994226017550386, "loss": 0.2117, "step": 15770 }, { "epoch": 0.65, "grad_norm": 0.9296875, "learning_rate": 0.0004994218648568063, "loss": 0.1854, "step": 15780 }, { "epoch": 0.65, "grad_norm": 1.21875, "learning_rate": 0.000499421127489189, "loss": 0.2247, "step": 15790 }, { "epoch": 0.65, "grad_norm": 0.72265625, "learning_rate": 0.0004994203896521881, "loss": 0.2258, "step": 15800 }, { "epoch": 0.65, "grad_norm": 1.1171875, "learning_rate": 0.0004994196513458048, "loss": 0.2129, "step": 15810 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 0.0004994189125700408, "loss": 0.286, "step": 15820 }, { "epoch": 0.66, "grad_norm": 0.78515625, "learning_rate": 0.0004994181733248974, "loss": 0.2557, "step": 15830 }, { "epoch": 0.66, "grad_norm": 0.68359375, "learning_rate": 0.0004994174336103757, "loss": 0.2905, "step": 15840 }, { "epoch": 0.66, "grad_norm": 0.62890625, "learning_rate": 0.0004994166934264775, "loss": 0.1584, "step": 15850 }, { "epoch": 0.66, "grad_norm": 0.66015625, "learning_rate": 0.000499415952773204, "loss": 0.2625, "step": 15860 }, { "epoch": 0.66, "grad_norm": 0.8203125, "learning_rate": 0.0004994152116505566, "loss": 0.183, "step": 15870 }, { "epoch": 0.66, "grad_norm": 0.345703125, "learning_rate": 0.0004994144700585367, "loss": 0.1989, "step": 15880 }, { "epoch": 0.66, "grad_norm": 0.8515625, "learning_rate": 0.0004994137279971457, "loss": 0.2535, "step": 15890 }, { "epoch": 0.66, "grad_norm": 0.73828125, "learning_rate": 0.000499412985466385, "loss": 0.2947, "step": 15900 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.0004994122424662559, "loss": 0.282, "step": 15910 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 0.0004994114989967601, "loss": 0.2482, "step": 15920 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 0.0004994107550578986, "loss": 0.2317, "step": 15930 }, { "epoch": 0.66, "grad_norm": 0.62890625, "learning_rate": 0.0004994100106496731, "loss": 0.2387, "step": 15940 }, { "epoch": 0.66, "grad_norm": 1.15625, "learning_rate": 0.0004994092657720849, "loss": 0.3027, "step": 15950 }, { "epoch": 0.66, "grad_norm": 0.8828125, "learning_rate": 0.0004994085204251354, "loss": 0.2657, "step": 15960 }, { "epoch": 0.66, "grad_norm": 0.79296875, "learning_rate": 0.0004994077746088259, "loss": 0.2443, "step": 15970 }, { "epoch": 0.66, "grad_norm": 0.41796875, "learning_rate": 0.0004994070283231579, "loss": 0.2834, "step": 15980 }, { "epoch": 0.66, "grad_norm": 1.375, "learning_rate": 0.0004994062815681329, "loss": 0.249, "step": 15990 }, { "epoch": 0.66, "grad_norm": 0.953125, "learning_rate": 0.0004994055343437523, "loss": 0.2528, "step": 16000 }, { "epoch": 0.66, "grad_norm": 0.546875, "learning_rate": 0.0004994047866500172, "loss": 0.2185, "step": 16010 }, { "epoch": 0.66, "grad_norm": 0.80859375, "learning_rate": 0.0004994040384869293, "loss": 0.249, "step": 16020 }, { "epoch": 0.66, "grad_norm": 0.99609375, "learning_rate": 0.0004994032898544901, "loss": 0.2617, "step": 16030 }, { "epoch": 0.66, "grad_norm": 0.8125, "learning_rate": 0.0004994025407527006, "loss": 0.2215, "step": 16040 }, { "epoch": 0.66, "grad_norm": 0.6953125, "learning_rate": 0.0004994017911815626, "loss": 0.2899, "step": 16050 }, { "epoch": 0.67, "grad_norm": 1.125, "learning_rate": 0.0004994010411410772, "loss": 0.2479, "step": 16060 }, { "epoch": 0.67, "grad_norm": 1.7890625, "learning_rate": 0.0004994002906312462, "loss": 0.2787, "step": 16070 }, { "epoch": 0.67, "grad_norm": 0.60546875, "learning_rate": 0.0004993995396520706, "loss": 0.2809, "step": 16080 }, { "epoch": 0.67, "grad_norm": 0.451171875, "learning_rate": 0.0004993987882035521, "loss": 0.2344, "step": 16090 }, { "epoch": 0.67, "grad_norm": 0.62890625, "learning_rate": 0.000499398036285692, "loss": 0.2347, "step": 16100 }, { "epoch": 0.67, "grad_norm": 0.7734375, "learning_rate": 0.0004993972838984917, "loss": 0.238, "step": 16110 }, { "epoch": 0.67, "grad_norm": 0.50390625, "learning_rate": 0.0004993965310419526, "loss": 0.2533, "step": 16120 }, { "epoch": 0.67, "grad_norm": 1.3125, "learning_rate": 0.0004993957777160762, "loss": 0.3023, "step": 16130 }, { "epoch": 0.67, "grad_norm": 0.0, "learning_rate": 0.0004993950239208639, "loss": 0.2277, "step": 16140 }, { "epoch": 0.67, "grad_norm": 1.96875, "learning_rate": 0.0004993942696563171, "loss": 0.2834, "step": 16150 }, { "epoch": 0.67, "grad_norm": 0.6015625, "learning_rate": 0.0004993935149224373, "loss": 0.2556, "step": 16160 }, { "epoch": 0.67, "grad_norm": 0.5546875, "learning_rate": 0.0004993927597192257, "loss": 0.2114, "step": 16170 }, { "epoch": 0.67, "grad_norm": 0.609375, "learning_rate": 0.0004993920040466839, "loss": 0.2365, "step": 16180 }, { "epoch": 0.67, "grad_norm": 0.482421875, "learning_rate": 0.0004993912479048133, "loss": 0.2753, "step": 16190 }, { "epoch": 0.67, "grad_norm": 1.03125, "learning_rate": 0.0004993904912936152, "loss": 0.2339, "step": 16200 }, { "epoch": 0.67, "grad_norm": 0.62890625, "learning_rate": 0.0004993897342130913, "loss": 0.2429, "step": 16210 }, { "epoch": 0.67, "grad_norm": 0.77734375, "learning_rate": 0.0004993889766632428, "loss": 0.2122, "step": 16220 }, { "epoch": 0.67, "grad_norm": 0.65234375, "learning_rate": 0.0004993882186440712, "loss": 0.203, "step": 16230 }, { "epoch": 0.67, "grad_norm": 1.4296875, "learning_rate": 0.0004993874601555777, "loss": 0.3366, "step": 16240 }, { "epoch": 0.67, "grad_norm": 0.33984375, "learning_rate": 0.0004993867011977641, "loss": 0.2137, "step": 16250 }, { "epoch": 0.67, "grad_norm": 0.59765625, "learning_rate": 0.0004993859417706317, "loss": 0.2576, "step": 16260 }, { "epoch": 0.67, "grad_norm": 0.4609375, "learning_rate": 0.0004993851818741818, "loss": 0.2684, "step": 16270 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 0.0004993844215084159, "loss": 0.2314, "step": 16280 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 0.0004993836606733355, "loss": 0.2644, "step": 16290 }, { "epoch": 0.68, "grad_norm": 1.1640625, "learning_rate": 0.000499382899368942, "loss": 0.337, "step": 16300 }, { "epoch": 0.68, "grad_norm": 0.4375, "learning_rate": 0.0004993821375952368, "loss": 0.2295, "step": 16310 }, { "epoch": 0.68, "grad_norm": 0.384765625, "learning_rate": 0.0004993813753522214, "loss": 0.2582, "step": 16320 }, { "epoch": 0.68, "grad_norm": 0.859375, "learning_rate": 0.000499380612639897, "loss": 0.2319, "step": 16330 }, { "epoch": 0.68, "grad_norm": 0.73046875, "learning_rate": 0.0004993798494582654, "loss": 0.2881, "step": 16340 }, { "epoch": 0.68, "grad_norm": 0.1923828125, "learning_rate": 0.0004993790858073278, "loss": 0.2651, "step": 16350 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 0.0004993783216870858, "loss": 0.2049, "step": 16360 }, { "epoch": 0.68, "grad_norm": 0.59765625, "learning_rate": 0.0004993775570975405, "loss": 0.2234, "step": 16370 }, { "epoch": 0.68, "grad_norm": 2.875, "learning_rate": 0.0004993767920386937, "loss": 0.2635, "step": 16380 }, { "epoch": 0.68, "grad_norm": 0.5390625, "learning_rate": 0.0004993760265105467, "loss": 0.2483, "step": 16390 }, { "epoch": 0.68, "grad_norm": 0.90234375, "learning_rate": 0.0004993752605131009, "loss": 0.2294, "step": 16400 }, { "epoch": 0.68, "grad_norm": 0.6640625, "learning_rate": 0.0004993744940463579, "loss": 0.2409, "step": 16410 }, { "epoch": 0.68, "grad_norm": 0.6171875, "learning_rate": 0.0004993737271103189, "loss": 0.1956, "step": 16420 }, { "epoch": 0.68, "grad_norm": 0.6171875, "learning_rate": 0.0004993729597049856, "loss": 0.2682, "step": 16430 }, { "epoch": 0.68, "grad_norm": 0.75, "learning_rate": 0.0004993721918303592, "loss": 0.2362, "step": 16440 }, { "epoch": 0.68, "grad_norm": 0.7890625, "learning_rate": 0.0004993714234864414, "loss": 0.2321, "step": 16450 }, { "epoch": 0.68, "grad_norm": 0.61328125, "learning_rate": 0.0004993706546732334, "loss": 0.2932, "step": 16460 }, { "epoch": 0.68, "grad_norm": 0.443359375, "learning_rate": 0.0004993698853907368, "loss": 0.212, "step": 16470 }, { "epoch": 0.68, "grad_norm": 2.640625, "learning_rate": 0.000499369115638953, "loss": 0.2107, "step": 16480 }, { "epoch": 0.68, "grad_norm": 0.8671875, "learning_rate": 0.0004993683454178835, "loss": 0.2679, "step": 16490 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 0.0004993675747275296, "loss": 0.2585, "step": 16500 }, { "epoch": 0.68, "grad_norm": 0.9375, "learning_rate": 0.0004993668035678929, "loss": 0.2922, "step": 16510 }, { "epoch": 0.68, "grad_norm": 0.62109375, "learning_rate": 0.0004993660319389748, "loss": 0.2259, "step": 16520 }, { "epoch": 0.68, "grad_norm": 0.640625, "learning_rate": 0.0004993652598407768, "loss": 0.264, "step": 16530 }, { "epoch": 0.69, "grad_norm": 0.5078125, "learning_rate": 0.0004993644872733003, "loss": 0.2662, "step": 16540 }, { "epoch": 0.69, "grad_norm": 0.76171875, "learning_rate": 0.0004993637142365467, "loss": 0.2643, "step": 16550 }, { "epoch": 0.69, "grad_norm": 0.330078125, "learning_rate": 0.0004993629407305176, "loss": 0.2561, "step": 16560 }, { "epoch": 0.69, "grad_norm": 0.41015625, "learning_rate": 0.0004993621667552143, "loss": 0.2218, "step": 16570 }, { "epoch": 0.69, "grad_norm": 0.890625, "learning_rate": 0.0004993613923106383, "loss": 0.2361, "step": 16580 }, { "epoch": 0.69, "grad_norm": 0.421875, "learning_rate": 0.0004993606173967913, "loss": 0.2465, "step": 16590 }, { "epoch": 0.69, "grad_norm": 0.33203125, "learning_rate": 0.0004993598420136744, "loss": 0.1965, "step": 16600 }, { "epoch": 0.69, "grad_norm": 0.70703125, "learning_rate": 0.0004993590661612891, "loss": 0.2469, "step": 16610 }, { "epoch": 0.69, "grad_norm": 0.29296875, "learning_rate": 0.0004993582898396372, "loss": 0.2099, "step": 16620 }, { "epoch": 0.69, "grad_norm": 0.41796875, "learning_rate": 0.0004993575130487197, "loss": 0.1864, "step": 16630 }, { "epoch": 0.69, "grad_norm": 0.71484375, "learning_rate": 0.0004993567357885384, "loss": 0.2479, "step": 16640 }, { "epoch": 0.69, "grad_norm": 0.7265625, "learning_rate": 0.0004993559580590947, "loss": 0.2677, "step": 16650 }, { "epoch": 0.69, "grad_norm": 0.56640625, "learning_rate": 0.0004993551798603899, "loss": 0.2738, "step": 16660 }, { "epoch": 0.69, "grad_norm": 0.77734375, "learning_rate": 0.0004993544011924257, "loss": 0.2, "step": 16670 }, { "epoch": 0.69, "grad_norm": 0.42578125, "learning_rate": 0.0004993536220552034, "loss": 0.2493, "step": 16680 }, { "epoch": 0.69, "grad_norm": 0.8046875, "learning_rate": 0.0004993528424487245, "loss": 0.2753, "step": 16690 }, { "epoch": 0.69, "grad_norm": 0.609375, "learning_rate": 0.0004993520623729905, "loss": 0.2631, "step": 16700 }, { "epoch": 0.69, "grad_norm": 0.578125, "learning_rate": 0.0004993512818280028, "loss": 0.2295, "step": 16710 }, { "epoch": 0.69, "grad_norm": 0.53515625, "learning_rate": 0.0004993505008137628, "loss": 0.2647, "step": 16720 }, { "epoch": 0.69, "grad_norm": 0.494140625, "learning_rate": 0.0004993497193302723, "loss": 0.2539, "step": 16730 }, { "epoch": 0.69, "grad_norm": 0.6328125, "learning_rate": 0.0004993489373775324, "loss": 0.2232, "step": 16740 }, { "epoch": 0.69, "grad_norm": 0.279296875, "learning_rate": 0.0004993481549555449, "loss": 0.2289, "step": 16750 }, { "epoch": 0.69, "grad_norm": 0.4609375, "learning_rate": 0.0004993473720643109, "loss": 0.291, "step": 16760 }, { "epoch": 0.69, "grad_norm": 1.5859375, "learning_rate": 0.0004993465887038322, "loss": 0.2992, "step": 16770 }, { "epoch": 0.7, "grad_norm": 0.86328125, "learning_rate": 0.0004993458048741102, "loss": 0.2626, "step": 16780 }, { "epoch": 0.7, "grad_norm": 1.1328125, "learning_rate": 0.0004993450205751462, "loss": 0.2139, "step": 16790 }, { "epoch": 0.7, "grad_norm": 0.984375, "learning_rate": 0.0004993442358069419, "loss": 0.243, "step": 16800 }, { "epoch": 0.7, "grad_norm": 1.2265625, "learning_rate": 0.0004993434505694986, "loss": 0.2542, "step": 16810 }, { "epoch": 0.7, "grad_norm": 1.0859375, "learning_rate": 0.0004993426648628179, "loss": 0.2659, "step": 16820 }, { "epoch": 0.7, "grad_norm": 0.439453125, "learning_rate": 0.0004993418786869012, "loss": 0.2235, "step": 16830 }, { "epoch": 0.7, "grad_norm": 0.87890625, "learning_rate": 0.0004993410920417499, "loss": 0.2194, "step": 16840 }, { "epoch": 0.7, "grad_norm": 1.09375, "learning_rate": 0.0004993403049273657, "loss": 0.286, "step": 16850 }, { "epoch": 0.7, "grad_norm": 0.1962890625, "learning_rate": 0.0004993395173437501, "loss": 0.261, "step": 16860 }, { "epoch": 0.7, "grad_norm": 0.57421875, "learning_rate": 0.0004993387292909042, "loss": 0.2352, "step": 16870 }, { "epoch": 0.7, "grad_norm": 1.3203125, "learning_rate": 0.0004993379407688299, "loss": 0.2301, "step": 16880 }, { "epoch": 0.7, "grad_norm": 0.4609375, "learning_rate": 0.0004993371517775285, "loss": 0.2677, "step": 16890 }, { "epoch": 0.7, "grad_norm": 0.81640625, "learning_rate": 0.0004993363623170016, "loss": 0.2974, "step": 16900 }, { "epoch": 0.7, "grad_norm": 0.77734375, "learning_rate": 0.0004993355723872504, "loss": 0.2538, "step": 16910 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 0.0004993347819882767, "loss": 0.2575, "step": 16920 }, { "epoch": 0.7, "grad_norm": 0.65234375, "learning_rate": 0.0004993339911200819, "loss": 0.2861, "step": 16930 }, { "epoch": 0.7, "grad_norm": 0.423828125, "learning_rate": 0.0004993331997826673, "loss": 0.2344, "step": 16940 }, { "epoch": 0.7, "grad_norm": 0.71875, "learning_rate": 0.0004993324079760346, "loss": 0.2709, "step": 16950 }, { "epoch": 0.7, "grad_norm": 1.2734375, "learning_rate": 0.0004993316157001853, "loss": 0.2834, "step": 16960 }, { "epoch": 0.7, "grad_norm": 1.15625, "learning_rate": 0.0004993308229551208, "loss": 0.2285, "step": 16970 }, { "epoch": 0.7, "grad_norm": 0.79296875, "learning_rate": 0.0004993300297408426, "loss": 0.2872, "step": 16980 }, { "epoch": 0.7, "grad_norm": 0.703125, "learning_rate": 0.0004993292360573522, "loss": 0.2266, "step": 16990 }, { "epoch": 0.7, "grad_norm": 0.0, "learning_rate": 0.0004993284419046511, "loss": 0.2214, "step": 17000 }, { "epoch": 0.7, "grad_norm": 0.275390625, "learning_rate": 0.0004993276472827408, "loss": 0.1889, "step": 17010 }, { "epoch": 0.7, "grad_norm": 0.74609375, "learning_rate": 0.0004993268521916228, "loss": 0.2453, "step": 17020 }, { "epoch": 0.71, "grad_norm": 1.0703125, "learning_rate": 0.0004993260566312986, "loss": 0.249, "step": 17030 }, { "epoch": 0.71, "grad_norm": 0.2216796875, "learning_rate": 0.0004993252606017698, "loss": 0.165, "step": 17040 }, { "epoch": 0.71, "grad_norm": 0.349609375, "learning_rate": 0.0004993244641030376, "loss": 0.2713, "step": 17050 }, { "epoch": 0.71, "grad_norm": 0.58984375, "learning_rate": 0.0004993236671351038, "loss": 0.2168, "step": 17060 }, { "epoch": 0.71, "grad_norm": 0.435546875, "learning_rate": 0.0004993228696979697, "loss": 0.2315, "step": 17070 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.0004993220717916369, "loss": 0.2322, "step": 17080 }, { "epoch": 0.71, "grad_norm": 1.0625, "learning_rate": 0.0004993212734161069, "loss": 0.2528, "step": 17090 }, { "epoch": 0.71, "grad_norm": 1.0234375, "learning_rate": 0.0004993204745713811, "loss": 0.2223, "step": 17100 }, { "epoch": 0.71, "grad_norm": 0.55078125, "learning_rate": 0.0004993196752574613, "loss": 0.3006, "step": 17110 }, { "epoch": 0.71, "grad_norm": 1.5, "learning_rate": 0.0004993188754743486, "loss": 0.2415, "step": 17120 }, { "epoch": 0.71, "grad_norm": 0.484375, "learning_rate": 0.0004993180752220449, "loss": 0.2322, "step": 17130 }, { "epoch": 0.71, "grad_norm": 0.66796875, "learning_rate": 0.0004993172745005513, "loss": 0.1904, "step": 17140 }, { "epoch": 0.71, "grad_norm": 1.1171875, "learning_rate": 0.0004993164733098696, "loss": 0.2586, "step": 17150 }, { "epoch": 0.71, "grad_norm": 0.310546875, "learning_rate": 0.0004993156716500012, "loss": 0.2273, "step": 17160 }, { "epoch": 0.71, "grad_norm": 0.67578125, "learning_rate": 0.0004993148695209477, "loss": 0.2098, "step": 17170 }, { "epoch": 0.71, "grad_norm": 0.8984375, "learning_rate": 0.0004993140669227105, "loss": 0.2482, "step": 17180 }, { "epoch": 0.71, "grad_norm": 0.94921875, "learning_rate": 0.0004993132638552911, "loss": 0.2544, "step": 17190 }, { "epoch": 0.71, "grad_norm": 0.58984375, "learning_rate": 0.0004993124603186911, "loss": 0.1878, "step": 17200 }, { "epoch": 0.71, "grad_norm": 0.55078125, "learning_rate": 0.000499311656312912, "loss": 0.2489, "step": 17210 }, { "epoch": 0.71, "grad_norm": 0.9375, "learning_rate": 0.0004993108518379552, "loss": 0.2692, "step": 17220 }, { "epoch": 0.71, "grad_norm": 0.80859375, "learning_rate": 0.0004993100468938225, "loss": 0.2249, "step": 17230 }, { "epoch": 0.71, "grad_norm": 0.84375, "learning_rate": 0.0004993092414805149, "loss": 0.2435, "step": 17240 }, { "epoch": 0.71, "grad_norm": 0.76953125, "learning_rate": 0.0004993084355980345, "loss": 0.194, "step": 17250 }, { "epoch": 0.71, "grad_norm": 0.92578125, "learning_rate": 0.0004993076292463824, "loss": 0.2787, "step": 17260 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 0.0004993068224255603, "loss": 0.3071, "step": 17270 }, { "epoch": 0.72, "grad_norm": 1.375, "learning_rate": 0.0004993060151355697, "loss": 0.2312, "step": 17280 }, { "epoch": 0.72, "grad_norm": 0.796875, "learning_rate": 0.0004993052073764122, "loss": 0.2134, "step": 17290 }, { "epoch": 0.72, "grad_norm": 1.8671875, "learning_rate": 0.000499304399148089, "loss": 0.2643, "step": 17300 }, { "epoch": 0.72, "grad_norm": 0.59375, "learning_rate": 0.000499303590450602, "loss": 0.2533, "step": 17310 }, { "epoch": 0.72, "grad_norm": 0.2275390625, "learning_rate": 0.0004993027812839525, "loss": 0.2177, "step": 17320 }, { "epoch": 0.72, "grad_norm": 0.59765625, "learning_rate": 0.0004993019716481422, "loss": 0.253, "step": 17330 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 0.0004993011615431723, "loss": 0.2803, "step": 17340 }, { "epoch": 0.72, "grad_norm": 0.376953125, "learning_rate": 0.0004993003509690448, "loss": 0.2332, "step": 17350 }, { "epoch": 0.72, "grad_norm": 0.94140625, "learning_rate": 0.0004992995399257608, "loss": 0.2953, "step": 17360 }, { "epoch": 0.72, "grad_norm": 0.68359375, "learning_rate": 0.000499298728413322, "loss": 0.2093, "step": 17370 }, { "epoch": 0.72, "grad_norm": 0.6171875, "learning_rate": 0.00049929791643173, "loss": 0.2506, "step": 17380 }, { "epoch": 0.72, "grad_norm": 0.37109375, "learning_rate": 0.0004992971039809861, "loss": 0.245, "step": 17390 }, { "epoch": 0.72, "grad_norm": 1.6171875, "learning_rate": 0.0004992962910610921, "loss": 0.2056, "step": 17400 }, { "epoch": 0.72, "grad_norm": 1.25, "learning_rate": 0.0004992954776720493, "loss": 0.2252, "step": 17410 }, { "epoch": 0.72, "grad_norm": 0.671875, "learning_rate": 0.0004992946638138594, "loss": 0.1891, "step": 17420 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 0.0004992938494865238, "loss": 0.2621, "step": 17430 }, { "epoch": 0.72, "grad_norm": 0.6171875, "learning_rate": 0.0004992930346900442, "loss": 0.2367, "step": 17440 }, { "epoch": 0.72, "grad_norm": 0.107421875, "learning_rate": 0.0004992922194244219, "loss": 0.2166, "step": 17450 }, { "epoch": 0.72, "grad_norm": 1.4765625, "learning_rate": 0.0004992914036896586, "loss": 0.2661, "step": 17460 }, { "epoch": 0.72, "grad_norm": 0.4609375, "learning_rate": 0.000499290587485756, "loss": 0.2547, "step": 17470 }, { "epoch": 0.72, "grad_norm": 1.2421875, "learning_rate": 0.0004992897708127152, "loss": 0.2819, "step": 17480 }, { "epoch": 0.72, "grad_norm": 0.498046875, "learning_rate": 0.000499288953670538, "loss": 0.2292, "step": 17490 }, { "epoch": 0.72, "grad_norm": 1.234375, "learning_rate": 0.000499288136059226, "loss": 0.255, "step": 17500 }, { "epoch": 0.73, "grad_norm": 0.5859375, "learning_rate": 0.0004992873179787806, "loss": 0.2223, "step": 17510 }, { "epoch": 0.73, "grad_norm": 1.3125, "learning_rate": 0.0004992864994292034, "loss": 0.2639, "step": 17520 }, { "epoch": 0.73, "grad_norm": 0.5, "learning_rate": 0.000499285680410496, "loss": 0.2631, "step": 17530 }, { "epoch": 0.73, "grad_norm": 0.6796875, "learning_rate": 0.0004992848609226597, "loss": 0.265, "step": 17540 }, { "epoch": 0.73, "grad_norm": 1.0, "learning_rate": 0.0004992840409656963, "loss": 0.2531, "step": 17550 }, { "epoch": 0.73, "grad_norm": 1.78125, "learning_rate": 0.0004992832205396073, "loss": 0.274, "step": 17560 }, { "epoch": 0.73, "grad_norm": 0.81640625, "learning_rate": 0.0004992823996443942, "loss": 0.2619, "step": 17570 }, { "epoch": 0.73, "grad_norm": 0.59375, "learning_rate": 0.0004992815782800585, "loss": 0.1951, "step": 17580 }, { "epoch": 0.73, "grad_norm": 1.1015625, "learning_rate": 0.0004992807564466017, "loss": 0.2277, "step": 17590 }, { "epoch": 0.73, "grad_norm": 0.5234375, "learning_rate": 0.0004992799341440255, "loss": 0.2044, "step": 17600 }, { "epoch": 0.73, "grad_norm": 1.171875, "learning_rate": 0.0004992791113723314, "loss": 0.2635, "step": 17610 }, { "epoch": 0.73, "grad_norm": 1.15625, "learning_rate": 0.000499278288131521, "loss": 0.234, "step": 17620 }, { "epoch": 0.73, "grad_norm": 0.72265625, "learning_rate": 0.0004992774644215957, "loss": 0.2378, "step": 17630 }, { "epoch": 0.73, "grad_norm": 0.625, "learning_rate": 0.0004992766402425571, "loss": 0.2155, "step": 17640 }, { "epoch": 0.73, "grad_norm": 0.51171875, "learning_rate": 0.000499275815594407, "loss": 0.2374, "step": 17650 }, { "epoch": 0.73, "grad_norm": 0.71875, "learning_rate": 0.0004992749904771466, "loss": 0.2154, "step": 17660 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 0.0004992741648907775, "loss": 0.2423, "step": 17670 }, { "epoch": 0.73, "grad_norm": 0.451171875, "learning_rate": 0.0004992733388353013, "loss": 0.2596, "step": 17680 }, { "epoch": 0.73, "grad_norm": 1.0234375, "learning_rate": 0.0004992725123107198, "loss": 0.2236, "step": 17690 }, { "epoch": 0.73, "grad_norm": 0.828125, "learning_rate": 0.0004992716853170342, "loss": 0.1911, "step": 17700 }, { "epoch": 0.73, "grad_norm": 0.435546875, "learning_rate": 0.0004992708578542462, "loss": 0.2483, "step": 17710 }, { "epoch": 0.73, "grad_norm": 0.53125, "learning_rate": 0.0004992700299223575, "loss": 0.225, "step": 17720 }, { "epoch": 0.73, "grad_norm": 1.3359375, "learning_rate": 0.0004992692015213695, "loss": 0.2102, "step": 17730 }, { "epoch": 0.73, "grad_norm": 0.78515625, "learning_rate": 0.0004992683726512836, "loss": 0.2057, "step": 17740 }, { "epoch": 0.74, "grad_norm": 0.59375, "learning_rate": 0.0004992675433121017, "loss": 0.2417, "step": 17750 }, { "epoch": 0.74, "grad_norm": 1.109375, "learning_rate": 0.0004992667135038252, "loss": 0.2267, "step": 17760 }, { "epoch": 0.74, "grad_norm": 1.453125, "learning_rate": 0.0004992658832264557, "loss": 0.1883, "step": 17770 }, { "epoch": 0.74, "grad_norm": 0.5703125, "learning_rate": 0.0004992650524799946, "loss": 0.2357, "step": 17780 }, { "epoch": 0.74, "grad_norm": 0.27734375, "learning_rate": 0.0004992642212644436, "loss": 0.249, "step": 17790 }, { "epoch": 0.74, "grad_norm": 1.1953125, "learning_rate": 0.0004992633895798043, "loss": 0.2357, "step": 17800 }, { "epoch": 0.74, "grad_norm": 0.68359375, "learning_rate": 0.0004992625574260783, "loss": 0.2548, "step": 17810 }, { "epoch": 0.74, "grad_norm": 1.8125, "learning_rate": 0.000499261724803267, "loss": 0.2284, "step": 17820 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 0.000499260891711372, "loss": 0.1952, "step": 17830 }, { "epoch": 0.74, "grad_norm": 0.416015625, "learning_rate": 0.0004992600581503949, "loss": 0.1884, "step": 17840 }, { "epoch": 0.74, "grad_norm": 0.765625, "learning_rate": 0.0004992592241203375, "loss": 0.2618, "step": 17850 }, { "epoch": 0.74, "grad_norm": 0.6640625, "learning_rate": 0.000499258389621201, "loss": 0.2842, "step": 17860 }, { "epoch": 0.74, "grad_norm": 0.74609375, "learning_rate": 0.0004992575546529871, "loss": 0.2695, "step": 17870 }, { "epoch": 0.74, "grad_norm": 0.671875, "learning_rate": 0.0004992567192156975, "loss": 0.3269, "step": 17880 }, { "epoch": 0.74, "grad_norm": 0.34375, "learning_rate": 0.0004992558833093335, "loss": 0.2118, "step": 17890 }, { "epoch": 0.74, "grad_norm": 0.625, "learning_rate": 0.000499255046933897, "loss": 0.2652, "step": 17900 }, { "epoch": 0.74, "grad_norm": 0.79296875, "learning_rate": 0.0004992542100893894, "loss": 0.2799, "step": 17910 }, { "epoch": 0.74, "grad_norm": 1.1796875, "learning_rate": 0.0004992533727758122, "loss": 0.3002, "step": 17920 }, { "epoch": 0.74, "grad_norm": 0.70703125, "learning_rate": 0.0004992525349931672, "loss": 0.2393, "step": 17930 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.0004992516967414559, "loss": 0.2784, "step": 17940 }, { "epoch": 0.74, "grad_norm": 0.52734375, "learning_rate": 0.0004992508580206797, "loss": 0.2165, "step": 17950 }, { "epoch": 0.74, "grad_norm": 0.765625, "learning_rate": 0.0004992500188308403, "loss": 0.2561, "step": 17960 }, { "epoch": 0.74, "grad_norm": 1.3046875, "learning_rate": 0.0004992491791719393, "loss": 0.3094, "step": 17970 }, { "epoch": 0.74, "grad_norm": 0.640625, "learning_rate": 0.0004992483390439782, "loss": 0.2693, "step": 17980 }, { "epoch": 0.75, "grad_norm": 0.67578125, "learning_rate": 0.0004992474984469587, "loss": 0.2246, "step": 17990 }, { "epoch": 0.75, "grad_norm": 1.6953125, "learning_rate": 0.0004992466573808823, "loss": 0.2121, "step": 18000 }, { "epoch": 0.75, "grad_norm": 0.63671875, "learning_rate": 0.0004992458158457507, "loss": 0.2536, "step": 18010 }, { "epoch": 0.75, "grad_norm": 0.640625, "learning_rate": 0.0004992449738415653, "loss": 0.265, "step": 18020 }, { "epoch": 0.75, "grad_norm": 0.82421875, "learning_rate": 0.0004992441313683278, "loss": 0.2412, "step": 18030 }, { "epoch": 0.75, "grad_norm": 0.828125, "learning_rate": 0.0004992432884260398, "loss": 0.2425, "step": 18040 }, { "epoch": 0.75, "grad_norm": 0.58203125, "learning_rate": 0.0004992424450147028, "loss": 0.1954, "step": 18050 }, { "epoch": 0.75, "grad_norm": 1.609375, "learning_rate": 0.0004992416011343185, "loss": 0.2031, "step": 18060 }, { "epoch": 0.75, "grad_norm": 1.34375, "learning_rate": 0.0004992407567848883, "loss": 0.2401, "step": 18070 }, { "epoch": 0.75, "grad_norm": 1.234375, "learning_rate": 0.000499239911966414, "loss": 0.2886, "step": 18080 }, { "epoch": 0.75, "grad_norm": 0.76953125, "learning_rate": 0.0004992390666788971, "loss": 0.3007, "step": 18090 }, { "epoch": 0.75, "grad_norm": 0.51953125, "learning_rate": 0.0004992382209223392, "loss": 0.2466, "step": 18100 }, { "epoch": 0.75, "grad_norm": 0.515625, "learning_rate": 0.0004992373746967418, "loss": 0.1946, "step": 18110 }, { "epoch": 0.75, "grad_norm": 0.47265625, "learning_rate": 0.0004992365280021066, "loss": 0.2871, "step": 18120 }, { "epoch": 0.75, "grad_norm": 0.8671875, "learning_rate": 0.0004992356808384352, "loss": 0.177, "step": 18130 }, { "epoch": 0.75, "grad_norm": 0.255859375, "learning_rate": 0.0004992348332057292, "loss": 0.2067, "step": 18140 }, { "epoch": 0.75, "grad_norm": 0.50390625, "learning_rate": 0.0004992339851039901, "loss": 0.2346, "step": 18150 }, { "epoch": 0.75, "grad_norm": 0.5703125, "learning_rate": 0.0004992331365332196, "loss": 0.2302, "step": 18160 }, { "epoch": 0.75, "grad_norm": 0.2890625, "learning_rate": 0.0004992322874934192, "loss": 0.2224, "step": 18170 }, { "epoch": 0.75, "grad_norm": 0.36328125, "learning_rate": 0.0004992314379845906, "loss": 0.2123, "step": 18180 }, { "epoch": 0.75, "grad_norm": 1.0546875, "learning_rate": 0.0004992305880067353, "loss": 0.2437, "step": 18190 }, { "epoch": 0.75, "grad_norm": 0.5, "learning_rate": 0.000499229737559855, "loss": 0.2096, "step": 18200 }, { "epoch": 0.75, "grad_norm": 0.25, "learning_rate": 0.0004992288866439513, "loss": 0.1769, "step": 18210 }, { "epoch": 0.75, "grad_norm": 0.953125, "learning_rate": 0.0004992280352590256, "loss": 0.2369, "step": 18220 }, { "epoch": 0.76, "grad_norm": 1.4453125, "learning_rate": 0.0004992271834050797, "loss": 0.2499, "step": 18230 }, { "epoch": 0.76, "grad_norm": 0.6171875, "learning_rate": 0.0004992263310821152, "loss": 0.2691, "step": 18240 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 0.0004992254782901337, "loss": 0.2555, "step": 18250 }, { "epoch": 0.76, "grad_norm": 1.265625, "learning_rate": 0.0004992246250291367, "loss": 0.2829, "step": 18260 }, { "epoch": 0.76, "grad_norm": 0.62890625, "learning_rate": 0.0004992237712991258, "loss": 0.1797, "step": 18270 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 0.0004992229171001028, "loss": 0.2878, "step": 18280 }, { "epoch": 0.76, "grad_norm": 0.74609375, "learning_rate": 0.0004992220624320692, "loss": 0.2282, "step": 18290 }, { "epoch": 0.76, "grad_norm": 1.3515625, "learning_rate": 0.0004992212072950265, "loss": 0.2757, "step": 18300 }, { "epoch": 0.76, "grad_norm": 0.4609375, "learning_rate": 0.0004992203516889764, "loss": 0.2269, "step": 18310 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 0.0004992194956139205, "loss": 0.2339, "step": 18320 }, { "epoch": 0.76, "grad_norm": 0.423828125, "learning_rate": 0.0004992186390698606, "loss": 0.2265, "step": 18330 }, { "epoch": 0.76, "grad_norm": 0.51953125, "learning_rate": 0.0004992177820567979, "loss": 0.2385, "step": 18340 }, { "epoch": 0.76, "grad_norm": 0.71875, "learning_rate": 0.0004992169245747343, "loss": 0.3103, "step": 18350 }, { "epoch": 0.76, "grad_norm": 0.98046875, "learning_rate": 0.0004992160666236714, "loss": 0.2667, "step": 18360 }, { "epoch": 0.76, "grad_norm": 0.74609375, "learning_rate": 0.0004992152082036108, "loss": 0.2514, "step": 18370 }, { "epoch": 0.76, "grad_norm": 0.435546875, "learning_rate": 0.0004992143493145542, "loss": 0.2051, "step": 18380 }, { "epoch": 0.76, "grad_norm": 1.609375, "learning_rate": 0.0004992134899565029, "loss": 0.2016, "step": 18390 }, { "epoch": 0.76, "grad_norm": 0.5859375, "learning_rate": 0.0004992126301294588, "loss": 0.243, "step": 18400 }, { "epoch": 0.76, "grad_norm": 1.0078125, "learning_rate": 0.0004992117698334234, "loss": 0.2704, "step": 18410 }, { "epoch": 0.76, "grad_norm": 0.6484375, "learning_rate": 0.0004992109090683984, "loss": 0.2169, "step": 18420 }, { "epoch": 0.76, "grad_norm": 0.4375, "learning_rate": 0.0004992100478343854, "loss": 0.2469, "step": 18430 }, { "epoch": 0.76, "grad_norm": 0.8671875, "learning_rate": 0.000499209186131386, "loss": 0.2797, "step": 18440 }, { "epoch": 0.76, "grad_norm": 0.9140625, "learning_rate": 0.0004992083239594018, "loss": 0.2794, "step": 18450 }, { "epoch": 0.76, "grad_norm": 0.8046875, "learning_rate": 0.0004992074613184345, "loss": 0.2631, "step": 18460 }, { "epoch": 0.77, "grad_norm": 0.412109375, "learning_rate": 0.0004992065982084857, "loss": 0.224, "step": 18470 }, { "epoch": 0.77, "grad_norm": 0.828125, "learning_rate": 0.0004992057346295569, "loss": 0.2397, "step": 18480 }, { "epoch": 0.77, "grad_norm": 0.34765625, "learning_rate": 0.0004992048705816498, "loss": 0.2209, "step": 18490 }, { "epoch": 0.77, "grad_norm": 0.69921875, "learning_rate": 0.0004992040060647661, "loss": 0.2347, "step": 18500 }, { "epoch": 0.77, "grad_norm": 1.203125, "learning_rate": 0.0004992031410789074, "loss": 0.2044, "step": 18510 }, { "epoch": 0.77, "grad_norm": 1.3359375, "learning_rate": 0.0004992022756240752, "loss": 0.2877, "step": 18520 }, { "epoch": 0.77, "grad_norm": 0.5, "learning_rate": 0.0004992014097002713, "loss": 0.1885, "step": 18530 }, { "epoch": 0.77, "grad_norm": 0.69140625, "learning_rate": 0.0004992005433074973, "loss": 0.2601, "step": 18540 }, { "epoch": 0.77, "grad_norm": 0.087890625, "learning_rate": 0.0004991996764457547, "loss": 0.2354, "step": 18550 }, { "epoch": 0.77, "grad_norm": 0.5703125, "learning_rate": 0.0004991988091150453, "loss": 0.245, "step": 18560 }, { "epoch": 0.77, "grad_norm": 0.5, "learning_rate": 0.0004991979413153705, "loss": 0.2392, "step": 18570 }, { "epoch": 0.77, "grad_norm": 0.8203125, "learning_rate": 0.0004991970730467322, "loss": 0.2513, "step": 18580 }, { "epoch": 0.77, "grad_norm": 0.1640625, "learning_rate": 0.000499196204309132, "loss": 0.2127, "step": 18590 }, { "epoch": 0.77, "grad_norm": 0.390625, "learning_rate": 0.0004991953351025714, "loss": 0.229, "step": 18600 }, { "epoch": 0.77, "grad_norm": 0.578125, "learning_rate": 0.000499194465427052, "loss": 0.236, "step": 18610 }, { "epoch": 0.77, "grad_norm": 0.96484375, "learning_rate": 0.0004991935952825756, "loss": 0.2351, "step": 18620 }, { "epoch": 0.77, "grad_norm": 0.90234375, "learning_rate": 0.0004991927246691438, "loss": 0.229, "step": 18630 }, { "epoch": 0.77, "grad_norm": 0.2421875, "learning_rate": 0.0004991918535867581, "loss": 0.2497, "step": 18640 }, { "epoch": 0.77, "grad_norm": 0.439453125, "learning_rate": 0.0004991909820354204, "loss": 0.2857, "step": 18650 }, { "epoch": 0.77, "grad_norm": 0.322265625, "learning_rate": 0.000499190110015132, "loss": 0.2132, "step": 18660 }, { "epoch": 0.77, "grad_norm": 1.0390625, "learning_rate": 0.0004991892375258948, "loss": 0.2546, "step": 18670 }, { "epoch": 0.77, "grad_norm": 0.48828125, "learning_rate": 0.0004991883645677103, "loss": 0.2352, "step": 18680 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 0.0004991874911405804, "loss": 0.1868, "step": 18690 }, { "epoch": 0.77, "grad_norm": 1.2109375, "learning_rate": 0.0004991866172445065, "loss": 0.273, "step": 18700 }, { "epoch": 0.77, "grad_norm": 0.5234375, "learning_rate": 0.0004991857428794901, "loss": 0.2649, "step": 18710 }, { "epoch": 0.78, "grad_norm": 0.46875, "learning_rate": 0.0004991848680455332, "loss": 0.2093, "step": 18720 }, { "epoch": 0.78, "grad_norm": 1.0078125, "learning_rate": 0.0004991839927426373, "loss": 0.2695, "step": 18730 }, { "epoch": 0.78, "grad_norm": 2.984375, "learning_rate": 0.0004991831169708039, "loss": 0.2263, "step": 18740 }, { "epoch": 0.78, "grad_norm": 0.90234375, "learning_rate": 0.0004991822407300349, "loss": 0.2336, "step": 18750 }, { "epoch": 0.78, "grad_norm": 0.765625, "learning_rate": 0.0004991813640203318, "loss": 0.259, "step": 18760 }, { "epoch": 0.78, "grad_norm": 3.359375, "learning_rate": 0.0004991804868416963, "loss": 0.1828, "step": 18770 }, { "epoch": 0.78, "grad_norm": 0.4765625, "learning_rate": 0.00049917960919413, "loss": 0.2196, "step": 18780 }, { "epoch": 0.78, "grad_norm": 0.796875, "learning_rate": 0.0004991787310776346, "loss": 0.1959, "step": 18790 }, { "epoch": 0.78, "grad_norm": 1.3984375, "learning_rate": 0.0004991778524922117, "loss": 0.2624, "step": 18800 }, { "epoch": 0.78, "grad_norm": 0.3984375, "learning_rate": 0.000499176973437863, "loss": 0.2861, "step": 18810 }, { "epoch": 0.78, "grad_norm": 1.1953125, "learning_rate": 0.0004991760939145902, "loss": 0.2527, "step": 18820 }, { "epoch": 0.78, "grad_norm": 0.76171875, "learning_rate": 0.0004991752139223949, "loss": 0.2685, "step": 18830 }, { "epoch": 0.78, "grad_norm": 1.0859375, "learning_rate": 0.0004991743334612787, "loss": 0.2376, "step": 18840 }, { "epoch": 0.78, "grad_norm": 0.5703125, "learning_rate": 0.0004991734525312434, "loss": 0.2262, "step": 18850 }, { "epoch": 0.78, "grad_norm": 0.357421875, "learning_rate": 0.0004991725711322905, "loss": 0.2015, "step": 18860 }, { "epoch": 0.78, "grad_norm": 0.63671875, "learning_rate": 0.0004991716892644218, "loss": 0.261, "step": 18870 }, { "epoch": 0.78, "grad_norm": 0.41015625, "learning_rate": 0.0004991708069276388, "loss": 0.2486, "step": 18880 }, { "epoch": 0.78, "grad_norm": 1.0078125, "learning_rate": 0.0004991699241219433, "loss": 0.2392, "step": 18890 }, { "epoch": 0.78, "grad_norm": 0.85546875, "learning_rate": 0.0004991690408473368, "loss": 0.2374, "step": 18900 }, { "epoch": 0.78, "grad_norm": 2.5625, "learning_rate": 0.0004991681571038212, "loss": 0.2469, "step": 18910 }, { "epoch": 0.78, "grad_norm": 10.5625, "learning_rate": 0.0004991672728913981, "loss": 0.2868, "step": 18920 }, { "epoch": 0.78, "grad_norm": 2.46875, "learning_rate": 0.000499166388210069, "loss": 0.2709, "step": 18930 }, { "epoch": 0.78, "grad_norm": 0.34375, "learning_rate": 0.0004991655030598356, "loss": 0.276, "step": 18940 }, { "epoch": 0.78, "grad_norm": 0.3359375, "learning_rate": 0.0004991646174406998, "loss": 0.2303, "step": 18950 }, { "epoch": 0.79, "grad_norm": 0.6171875, "learning_rate": 0.000499163731352663, "loss": 0.1832, "step": 18960 }, { "epoch": 0.79, "grad_norm": 0.69140625, "learning_rate": 0.0004991628447957269, "loss": 0.2499, "step": 18970 }, { "epoch": 0.79, "grad_norm": 0.671875, "learning_rate": 0.0004991619577698933, "loss": 0.2411, "step": 18980 }, { "epoch": 0.79, "grad_norm": 1.984375, "learning_rate": 0.0004991610702751638, "loss": 0.2897, "step": 18990 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 0.00049916018231154, "loss": 0.161, "step": 19000 }, { "epoch": 0.79, "grad_norm": 0.65234375, "learning_rate": 0.0004991592938790238, "loss": 0.2286, "step": 19010 }, { "epoch": 0.79, "grad_norm": 0.5078125, "learning_rate": 0.0004991584049776165, "loss": 0.2884, "step": 19020 }, { "epoch": 0.79, "grad_norm": 0.90625, "learning_rate": 0.0004991575156073202, "loss": 0.1708, "step": 19030 }, { "epoch": 0.79, "grad_norm": 0.458984375, "learning_rate": 0.0004991566257681363, "loss": 0.2228, "step": 19040 }, { "epoch": 0.79, "grad_norm": 0.75390625, "learning_rate": 0.0004991557354600666, "loss": 0.2641, "step": 19050 }, { "epoch": 0.79, "grad_norm": 0.54296875, "learning_rate": 0.0004991548446831125, "loss": 0.239, "step": 19060 }, { "epoch": 0.79, "grad_norm": 0.6796875, "learning_rate": 0.0004991539534372761, "loss": 0.2876, "step": 19070 }, { "epoch": 0.79, "grad_norm": 0.88671875, "learning_rate": 0.0004991530617225587, "loss": 0.2842, "step": 19080 }, { "epoch": 0.79, "grad_norm": 0.95703125, "learning_rate": 0.0004991521695389623, "loss": 0.1881, "step": 19090 }, { "epoch": 0.79, "grad_norm": 0.625, "learning_rate": 0.0004991512768864883, "loss": 0.2918, "step": 19100 }, { "epoch": 0.79, "grad_norm": 0.73828125, "learning_rate": 0.0004991503837651386, "loss": 0.1823, "step": 19110 }, { "epoch": 0.79, "grad_norm": 1.34375, "learning_rate": 0.0004991494901749147, "loss": 0.2628, "step": 19120 }, { "epoch": 0.79, "grad_norm": 0.5625, "learning_rate": 0.0004991485961158184, "loss": 0.2186, "step": 19130 }, { "epoch": 0.79, "grad_norm": 0.73046875, "learning_rate": 0.0004991477015878514, "loss": 0.2331, "step": 19140 }, { "epoch": 0.79, "grad_norm": 1.6953125, "learning_rate": 0.0004991468065910152, "loss": 0.242, "step": 19150 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 0.0004991459111253117, "loss": 0.2394, "step": 19160 }, { "epoch": 0.79, "grad_norm": 0.63671875, "learning_rate": 0.0004991450151907425, "loss": 0.2236, "step": 19170 }, { "epoch": 0.79, "grad_norm": 0.7890625, "learning_rate": 0.0004991441187873094, "loss": 0.2406, "step": 19180 }, { "epoch": 0.79, "grad_norm": 0.294921875, "learning_rate": 0.0004991432219150138, "loss": 0.2011, "step": 19190 }, { "epoch": 0.8, "grad_norm": 1.1484375, "learning_rate": 0.0004991423245738576, "loss": 0.2481, "step": 19200 }, { "epoch": 0.8, "grad_norm": 0.6328125, "learning_rate": 0.0004991414267638425, "loss": 0.2826, "step": 19210 }, { "epoch": 0.8, "grad_norm": 0.8515625, "learning_rate": 0.0004991405284849701, "loss": 0.2581, "step": 19220 }, { "epoch": 0.8, "grad_norm": 0.66796875, "learning_rate": 0.0004991396297372422, "loss": 0.274, "step": 19230 }, { "epoch": 0.8, "grad_norm": 0.578125, "learning_rate": 0.0004991387305206602, "loss": 0.2679, "step": 19240 }, { "epoch": 0.8, "grad_norm": 0.71484375, "learning_rate": 0.0004991378308352263, "loss": 0.2411, "step": 19250 }, { "epoch": 0.8, "grad_norm": 1.375, "learning_rate": 0.0004991369306809418, "loss": 0.2653, "step": 19260 }, { "epoch": 0.8, "grad_norm": 1.1015625, "learning_rate": 0.0004991360300578084, "loss": 0.2284, "step": 19270 }, { "epoch": 0.8, "grad_norm": 0.59375, "learning_rate": 0.000499135128965828, "loss": 0.2059, "step": 19280 }, { "epoch": 0.8, "grad_norm": 0.796875, "learning_rate": 0.0004991342274050022, "loss": 0.2395, "step": 19290 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 0.0004991333253753326, "loss": 0.2862, "step": 19300 }, { "epoch": 0.8, "grad_norm": 0.8359375, "learning_rate": 0.0004991324228768211, "loss": 0.2441, "step": 19310 }, { "epoch": 0.8, "grad_norm": 0.58984375, "learning_rate": 0.0004991315199094693, "loss": 0.2864, "step": 19320 }, { "epoch": 0.8, "grad_norm": 0.91796875, "learning_rate": 0.0004991306164732788, "loss": 0.2728, "step": 19330 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 0.0004991297125682513, "loss": 0.2378, "step": 19340 }, { "epoch": 0.8, "grad_norm": 0.71875, "learning_rate": 0.0004991288081943887, "loss": 0.2146, "step": 19350 }, { "epoch": 0.8, "grad_norm": 1.8359375, "learning_rate": 0.0004991279033516926, "loss": 0.1939, "step": 19360 }, { "epoch": 0.8, "grad_norm": 0.6484375, "learning_rate": 0.0004991269980401646, "loss": 0.2356, "step": 19370 }, { "epoch": 0.8, "grad_norm": 0.96484375, "learning_rate": 0.0004991260922598067, "loss": 0.2703, "step": 19380 }, { "epoch": 0.8, "grad_norm": 0.65625, "learning_rate": 0.0004991251860106202, "loss": 0.2, "step": 19390 }, { "epoch": 0.8, "grad_norm": 0.890625, "learning_rate": 0.000499124279292607, "loss": 0.2227, "step": 19400 }, { "epoch": 0.8, "grad_norm": 0.478515625, "learning_rate": 0.0004991233721057689, "loss": 0.2046, "step": 19410 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 0.0004991224644501075, "loss": 0.2576, "step": 19420 }, { "epoch": 0.8, "grad_norm": 0.439453125, "learning_rate": 0.0004991215563256244, "loss": 0.1826, "step": 19430 }, { "epoch": 0.81, "grad_norm": 0.3828125, "learning_rate": 0.0004991206477323216, "loss": 0.246, "step": 19440 }, { "epoch": 0.81, "grad_norm": 0.6640625, "learning_rate": 0.0004991197386702005, "loss": 0.2074, "step": 19450 }, { "epoch": 0.81, "grad_norm": 0.86328125, "learning_rate": 0.0004991188291392631, "loss": 0.1499, "step": 19460 }, { "epoch": 0.81, "grad_norm": 0.7109375, "learning_rate": 0.0004991179191395109, "loss": 0.2188, "step": 19470 }, { "epoch": 0.81, "grad_norm": 0.63671875, "learning_rate": 0.0004991170086709457, "loss": 0.1868, "step": 19480 }, { "epoch": 0.81, "grad_norm": 0.466796875, "learning_rate": 0.0004991160977335691, "loss": 0.2462, "step": 19490 }, { "epoch": 0.81, "grad_norm": 0.474609375, "learning_rate": 0.0004991151863273831, "loss": 0.2261, "step": 19500 }, { "epoch": 0.81, "grad_norm": 0.35546875, "learning_rate": 0.000499114274452389, "loss": 0.2167, "step": 19510 }, { "epoch": 0.81, "grad_norm": 0.80859375, "learning_rate": 0.0004991133621085889, "loss": 0.3245, "step": 19520 }, { "epoch": 0.81, "grad_norm": 0.78125, "learning_rate": 0.0004991124492959842, "loss": 0.2546, "step": 19530 }, { "epoch": 0.81, "grad_norm": 0.71484375, "learning_rate": 0.0004991115360145769, "loss": 0.2279, "step": 19540 }, { "epoch": 0.81, "grad_norm": 0.5546875, "learning_rate": 0.0004991106222643685, "loss": 0.1857, "step": 19550 }, { "epoch": 0.81, "grad_norm": 0.80078125, "learning_rate": 0.0004991097080453609, "loss": 0.3075, "step": 19560 }, { "epoch": 0.81, "grad_norm": 1.53125, "learning_rate": 0.0004991087933575558, "loss": 0.2187, "step": 19570 }, { "epoch": 0.81, "grad_norm": 0.3046875, "learning_rate": 0.0004991078782009547, "loss": 0.191, "step": 19580 }, { "epoch": 0.81, "grad_norm": 0.84375, "learning_rate": 0.0004991069625755595, "loss": 0.2783, "step": 19590 }, { "epoch": 0.81, "grad_norm": 1.2734375, "learning_rate": 0.000499106046481372, "loss": 0.2072, "step": 19600 }, { "epoch": 0.81, "grad_norm": 0.5625, "learning_rate": 0.0004991051299183937, "loss": 0.232, "step": 19610 }, { "epoch": 0.81, "grad_norm": 0.0, "learning_rate": 0.0004991042128866264, "loss": 0.202, "step": 19620 }, { "epoch": 0.81, "grad_norm": 3.09375, "learning_rate": 0.000499103295386072, "loss": 0.2601, "step": 19630 }, { "epoch": 0.81, "grad_norm": 0.96875, "learning_rate": 0.0004991023774167321, "loss": 0.2078, "step": 19640 }, { "epoch": 0.81, "grad_norm": 1.125, "learning_rate": 0.0004991014589786083, "loss": 0.2598, "step": 19650 }, { "epoch": 0.81, "grad_norm": 2.078125, "learning_rate": 0.0004991005400717026, "loss": 0.2598, "step": 19660 }, { "epoch": 0.81, "grad_norm": 0.5078125, "learning_rate": 0.0004990996206960165, "loss": 0.236, "step": 19670 }, { "epoch": 0.82, "grad_norm": 0.625, "learning_rate": 0.0004990987008515518, "loss": 0.2201, "step": 19680 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 0.0004990977805383103, "loss": 0.2803, "step": 19690 }, { "epoch": 0.82, "grad_norm": 1.984375, "learning_rate": 0.0004990968597562937, "loss": 0.2678, "step": 19700 }, { "epoch": 0.82, "grad_norm": 0.7578125, "learning_rate": 0.0004990959385055037, "loss": 0.248, "step": 19710 }, { "epoch": 0.82, "grad_norm": 0.94921875, "learning_rate": 0.000499095016785942, "loss": 0.2298, "step": 19720 }, { "epoch": 0.82, "grad_norm": 0.76171875, "learning_rate": 0.0004990940945976104, "loss": 0.246, "step": 19730 }, { "epoch": 0.82, "grad_norm": 0.392578125, "learning_rate": 0.0004990931719405106, "loss": 0.2098, "step": 19740 }, { "epoch": 0.82, "grad_norm": 1.3046875, "learning_rate": 0.0004990922488146444, "loss": 0.2199, "step": 19750 }, { "epoch": 0.82, "grad_norm": 0.87109375, "learning_rate": 0.0004990913252200135, "loss": 0.2366, "step": 19760 }, { "epoch": 0.82, "grad_norm": 1.1328125, "learning_rate": 0.0004990904011566194, "loss": 0.207, "step": 19770 }, { "epoch": 0.82, "grad_norm": 0.259765625, "learning_rate": 0.0004990894766244643, "loss": 0.2101, "step": 19780 }, { "epoch": 0.82, "grad_norm": 0.4296875, "learning_rate": 0.0004990885516235496, "loss": 0.2191, "step": 19790 }, { "epoch": 0.82, "grad_norm": 0.251953125, "learning_rate": 0.0004990876261538773, "loss": 0.2621, "step": 19800 }, { "epoch": 0.82, "grad_norm": 0.443359375, "learning_rate": 0.0004990867002154488, "loss": 0.2337, "step": 19810 }, { "epoch": 0.82, "grad_norm": 0.31640625, "learning_rate": 0.0004990857738082662, "loss": 0.205, "step": 19820 }, { "epoch": 0.82, "grad_norm": 0.5078125, "learning_rate": 0.0004990848469323309, "loss": 0.2116, "step": 19830 }, { "epoch": 0.82, "grad_norm": 0.423828125, "learning_rate": 0.0004990839195876448, "loss": 0.2426, "step": 19840 }, { "epoch": 0.82, "grad_norm": 0.71875, "learning_rate": 0.0004990829917742098, "loss": 0.2261, "step": 19850 }, { "epoch": 0.82, "grad_norm": 1.09375, "learning_rate": 0.0004990820634920275, "loss": 0.2587, "step": 19860 }, { "epoch": 0.82, "grad_norm": 0.66796875, "learning_rate": 0.0004990811347410996, "loss": 0.2729, "step": 19870 }, { "epoch": 0.82, "grad_norm": 0.39453125, "learning_rate": 0.000499080205521428, "loss": 0.2079, "step": 19880 }, { "epoch": 0.82, "grad_norm": 0.5078125, "learning_rate": 0.0004990792758330141, "loss": 0.242, "step": 19890 }, { "epoch": 0.82, "grad_norm": 0.5078125, "learning_rate": 0.0004990783456758601, "loss": 0.2392, "step": 19900 }, { "epoch": 0.82, "grad_norm": 0.310546875, "learning_rate": 0.0004990774150499676, "loss": 0.2243, "step": 19910 }, { "epoch": 0.83, "grad_norm": 0.515625, "learning_rate": 0.0004990764839553383, "loss": 0.2701, "step": 19920 }, { "epoch": 0.83, "grad_norm": 0.515625, "learning_rate": 0.0004990755523919738, "loss": 0.2857, "step": 19930 }, { "epoch": 0.83, "grad_norm": 0.6953125, "learning_rate": 0.0004990746203598761, "loss": 0.2446, "step": 19940 }, { "epoch": 0.83, "grad_norm": 0.99609375, "learning_rate": 0.0004990736878590468, "loss": 0.2501, "step": 19950 }, { "epoch": 0.83, "grad_norm": 1.4453125, "learning_rate": 0.0004990727548894878, "loss": 0.3534, "step": 19960 }, { "epoch": 0.83, "grad_norm": 0.7421875, "learning_rate": 0.0004990718214512007, "loss": 0.2235, "step": 19970 }, { "epoch": 0.83, "grad_norm": 0.23828125, "learning_rate": 0.0004990708875441873, "loss": 0.3046, "step": 19980 }, { "epoch": 0.83, "grad_norm": 1.15625, "learning_rate": 0.0004990699531684495, "loss": 0.2258, "step": 19990 }, { "epoch": 0.83, "grad_norm": 0.828125, "learning_rate": 0.000499069018323989, "loss": 0.2502, "step": 20000 }, { "epoch": 0.83, "grad_norm": 1.109375, "learning_rate": 0.0004990680830108074, "loss": 0.238, "step": 20010 }, { "epoch": 0.83, "grad_norm": 0.625, "learning_rate": 0.0004990671472289065, "loss": 0.1479, "step": 20020 }, { "epoch": 0.83, "grad_norm": 0.7890625, "learning_rate": 0.0004990662109782882, "loss": 0.2365, "step": 20030 }, { "epoch": 0.83, "grad_norm": 0.78515625, "learning_rate": 0.0004990652742589542, "loss": 0.2575, "step": 20040 }, { "epoch": 0.83, "grad_norm": 0.484375, "learning_rate": 0.0004990643370709063, "loss": 0.2933, "step": 20050 }, { "epoch": 0.83, "grad_norm": 0.416015625, "learning_rate": 0.0004990633994141462, "loss": 0.1982, "step": 20060 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 0.0004990624612886755, "loss": 0.2551, "step": 20070 }, { "epoch": 0.83, "grad_norm": 1.046875, "learning_rate": 0.0004990615226944964, "loss": 0.2263, "step": 20080 }, { "epoch": 0.83, "grad_norm": 0.73046875, "learning_rate": 0.0004990605836316102, "loss": 0.2009, "step": 20090 }, { "epoch": 0.83, "grad_norm": 0.44921875, "learning_rate": 0.0004990596441000189, "loss": 0.1811, "step": 20100 }, { "epoch": 0.83, "grad_norm": 0.484375, "learning_rate": 0.0004990587040997244, "loss": 0.2527, "step": 20110 }, { "epoch": 0.83, "grad_norm": 0.640625, "learning_rate": 0.0004990577636307282, "loss": 0.2432, "step": 20120 }, { "epoch": 0.83, "grad_norm": 0.58984375, "learning_rate": 0.0004990568226930322, "loss": 0.2788, "step": 20130 }, { "epoch": 0.83, "grad_norm": 0.466796875, "learning_rate": 0.0004990558812866382, "loss": 0.2421, "step": 20140 }, { "epoch": 0.83, "grad_norm": 0.76171875, "learning_rate": 0.0004990549394115479, "loss": 0.2436, "step": 20150 }, { "epoch": 0.84, "grad_norm": 0.66796875, "learning_rate": 0.000499053997067763, "loss": 0.276, "step": 20160 }, { "epoch": 0.84, "grad_norm": 0.8671875, "learning_rate": 0.0004990530542552854, "loss": 0.2846, "step": 20170 }, { "epoch": 0.84, "grad_norm": 0.4296875, "learning_rate": 0.000499052110974117, "loss": 0.2138, "step": 20180 }, { "epoch": 0.84, "grad_norm": 0.6484375, "learning_rate": 0.0004990511672242593, "loss": 0.24, "step": 20190 }, { "epoch": 0.84, "grad_norm": 0.478515625, "learning_rate": 0.0004990502230057143, "loss": 0.2334, "step": 20200 }, { "epoch": 0.84, "grad_norm": 1.0078125, "learning_rate": 0.0004990492783184836, "loss": 0.2342, "step": 20210 }, { "epoch": 0.84, "grad_norm": 0.90625, "learning_rate": 0.000499048333162569, "loss": 0.2751, "step": 20220 }, { "epoch": 0.84, "grad_norm": 0.78515625, "learning_rate": 0.0004990473875379724, "loss": 0.2884, "step": 20230 }, { "epoch": 0.84, "grad_norm": 0.4921875, "learning_rate": 0.0004990464414446955, "loss": 0.1955, "step": 20240 }, { "epoch": 0.84, "grad_norm": 0.8203125, "learning_rate": 0.0004990454948827401, "loss": 0.2349, "step": 20250 }, { "epoch": 0.84, "grad_norm": 0.6640625, "learning_rate": 0.000499044547852108, "loss": 0.3031, "step": 20260 }, { "epoch": 0.84, "grad_norm": 0.62109375, "learning_rate": 0.0004990436003528009, "loss": 0.2377, "step": 20270 }, { "epoch": 0.84, "grad_norm": 0.28515625, "learning_rate": 0.0004990426523848207, "loss": 0.2532, "step": 20280 }, { "epoch": 0.84, "grad_norm": 0.73828125, "learning_rate": 0.000499041703948169, "loss": 0.23, "step": 20290 }, { "epoch": 0.84, "grad_norm": 0.287109375, "learning_rate": 0.0004990407550428479, "loss": 0.2619, "step": 20300 }, { "epoch": 0.84, "grad_norm": 0.5625, "learning_rate": 0.0004990398056688588, "loss": 0.243, "step": 20310 }, { "epoch": 0.84, "grad_norm": 2.53125, "learning_rate": 0.0004990388558262038, "loss": 0.2415, "step": 20320 }, { "epoch": 0.84, "grad_norm": 0.609375, "learning_rate": 0.0004990379055148846, "loss": 0.2018, "step": 20330 }, { "epoch": 0.84, "grad_norm": 0.47265625, "learning_rate": 0.0004990369547349028, "loss": 0.1937, "step": 20340 }, { "epoch": 0.84, "grad_norm": 0.46484375, "learning_rate": 0.0004990360034862604, "loss": 0.2325, "step": 20350 }, { "epoch": 0.84, "grad_norm": 5.25, "learning_rate": 0.0004990350517689592, "loss": 0.2597, "step": 20360 }, { "epoch": 0.84, "grad_norm": 0.55859375, "learning_rate": 0.0004990340995830009, "loss": 0.2195, "step": 20370 }, { "epoch": 0.84, "grad_norm": 0.46484375, "learning_rate": 0.0004990331469283873, "loss": 0.2135, "step": 20380 }, { "epoch": 0.84, "grad_norm": 0.306640625, "learning_rate": 0.0004990321938051202, "loss": 0.2261, "step": 20390 }, { "epoch": 0.84, "grad_norm": 0.73828125, "learning_rate": 0.0004990312402132015, "loss": 0.2466, "step": 20400 }, { "epoch": 0.85, "grad_norm": 0.5, "learning_rate": 0.0004990302861526328, "loss": 0.2537, "step": 20410 }, { "epoch": 0.85, "grad_norm": 0.4609375, "learning_rate": 0.000499029331623416, "loss": 0.2416, "step": 20420 }, { "epoch": 0.85, "grad_norm": 0.87109375, "learning_rate": 0.0004990283766255529, "loss": 0.2524, "step": 20430 }, { "epoch": 0.85, "grad_norm": 0.5859375, "learning_rate": 0.0004990274211590453, "loss": 0.2667, "step": 20440 }, { "epoch": 0.85, "grad_norm": 1.203125, "learning_rate": 0.0004990264652238951, "loss": 0.2409, "step": 20450 }, { "epoch": 0.85, "grad_norm": 0.419921875, "learning_rate": 0.0004990255088201037, "loss": 0.2658, "step": 20460 }, { "epoch": 0.85, "grad_norm": 0.62890625, "learning_rate": 0.0004990245519476735, "loss": 0.2083, "step": 20470 }, { "epoch": 0.85, "grad_norm": 1.3671875, "learning_rate": 0.0004990235946066057, "loss": 0.2787, "step": 20480 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 0.0004990226367969027, "loss": 0.2239, "step": 20490 }, { "epoch": 0.85, "grad_norm": 0.73046875, "learning_rate": 0.0004990216785185658, "loss": 0.2148, "step": 20500 }, { "epoch": 0.85, "grad_norm": 1.015625, "learning_rate": 0.0004990207197715969, "loss": 0.2627, "step": 20510 }, { "epoch": 0.85, "grad_norm": 0.6328125, "learning_rate": 0.000499019760555998, "loss": 0.2456, "step": 20520 }, { "epoch": 0.85, "grad_norm": 0.58984375, "learning_rate": 0.0004990188008717709, "loss": 0.2327, "step": 20530 }, { "epoch": 0.85, "grad_norm": 0.41015625, "learning_rate": 0.0004990178407189172, "loss": 0.2526, "step": 20540 }, { "epoch": 0.85, "grad_norm": 0.68359375, "learning_rate": 0.0004990168800974387, "loss": 0.2834, "step": 20550 }, { "epoch": 0.85, "grad_norm": 0.671875, "learning_rate": 0.0004990159190073376, "loss": 0.2664, "step": 20560 }, { "epoch": 0.85, "grad_norm": 0.5546875, "learning_rate": 0.0004990149574486153, "loss": 0.2531, "step": 20570 }, { "epoch": 0.85, "grad_norm": 0.73046875, "learning_rate": 0.0004990139954212737, "loss": 0.2605, "step": 20580 }, { "epoch": 0.85, "grad_norm": 0.6015625, "learning_rate": 0.0004990130329253147, "loss": 0.2144, "step": 20590 }, { "epoch": 0.85, "grad_norm": 0.61328125, "learning_rate": 0.00049901206996074, "loss": 0.2897, "step": 20600 }, { "epoch": 0.85, "grad_norm": 0.69921875, "learning_rate": 0.0004990111065275516, "loss": 0.2405, "step": 20610 }, { "epoch": 0.85, "grad_norm": 0.365234375, "learning_rate": 0.0004990101426257511, "loss": 0.2058, "step": 20620 }, { "epoch": 0.85, "grad_norm": 0.78515625, "learning_rate": 0.0004990091782553403, "loss": 0.1949, "step": 20630 }, { "epoch": 0.85, "grad_norm": 1.515625, "learning_rate": 0.0004990082134163213, "loss": 0.2814, "step": 20640 }, { "epoch": 0.86, "grad_norm": 0.69921875, "learning_rate": 0.0004990072481086957, "loss": 0.2029, "step": 20650 }, { "epoch": 0.86, "grad_norm": 0.294921875, "learning_rate": 0.0004990062823324652, "loss": 0.2556, "step": 20660 }, { "epoch": 0.86, "grad_norm": 1.125, "learning_rate": 0.0004990053160876319, "loss": 0.2358, "step": 20670 }, { "epoch": 0.86, "grad_norm": 0.828125, "learning_rate": 0.0004990043493741975, "loss": 0.2346, "step": 20680 }, { "epoch": 0.86, "grad_norm": 0.478515625, "learning_rate": 0.0004990033821921637, "loss": 0.2793, "step": 20690 }, { "epoch": 0.86, "grad_norm": 0.546875, "learning_rate": 0.0004990024145415325, "loss": 0.2692, "step": 20700 }, { "epoch": 0.86, "grad_norm": 0.61328125, "learning_rate": 0.0004990014464223057, "loss": 0.252, "step": 20710 }, { "epoch": 0.86, "grad_norm": 0.42578125, "learning_rate": 0.000499000477834485, "loss": 0.2624, "step": 20720 }, { "epoch": 0.86, "grad_norm": 0.7578125, "learning_rate": 0.0004989995087780723, "loss": 0.2461, "step": 20730 }, { "epoch": 0.86, "grad_norm": 2.03125, "learning_rate": 0.0004989985392530693, "loss": 0.2804, "step": 20740 }, { "epoch": 0.86, "grad_norm": 0.59375, "learning_rate": 0.0004989975692594781, "loss": 0.1743, "step": 20750 }, { "epoch": 0.86, "grad_norm": 0.470703125, "learning_rate": 0.0004989965987973003, "loss": 0.2024, "step": 20760 }, { "epoch": 0.86, "grad_norm": 1.3984375, "learning_rate": 0.0004989956278665379, "loss": 0.203, "step": 20770 }, { "epoch": 0.86, "grad_norm": 1.03125, "learning_rate": 0.0004989946564671925, "loss": 0.2304, "step": 20780 }, { "epoch": 0.86, "grad_norm": 0.70703125, "learning_rate": 0.000498993684599266, "loss": 0.2517, "step": 20790 }, { "epoch": 0.86, "grad_norm": 0.95703125, "learning_rate": 0.0004989927122627604, "loss": 0.2906, "step": 20800 }, { "epoch": 0.86, "grad_norm": 0.796875, "learning_rate": 0.0004989917394576773, "loss": 0.275, "step": 20810 }, { "epoch": 0.86, "grad_norm": 1.21875, "learning_rate": 0.0004989907661840187, "loss": 0.2351, "step": 20820 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 0.0004989897924417864, "loss": 0.2409, "step": 20830 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 0.0004989888182309821, "loss": 0.2208, "step": 20840 }, { "epoch": 0.86, "grad_norm": 0.408203125, "learning_rate": 0.0004989878435516078, "loss": 0.293, "step": 20850 }, { "epoch": 0.86, "grad_norm": 0.6484375, "learning_rate": 0.0004989868684036653, "loss": 0.255, "step": 20860 }, { "epoch": 0.86, "grad_norm": 0.58984375, "learning_rate": 0.0004989858927871562, "loss": 0.235, "step": 20870 }, { "epoch": 0.86, "grad_norm": 1.1328125, "learning_rate": 0.0004989849167020827, "loss": 0.1533, "step": 20880 }, { "epoch": 0.87, "grad_norm": 2.1875, "learning_rate": 0.0004989839401484466, "loss": 0.2366, "step": 20890 }, { "epoch": 0.87, "grad_norm": 0.609375, "learning_rate": 0.0004989829631262494, "loss": 0.2398, "step": 20900 }, { "epoch": 0.87, "grad_norm": 0.80859375, "learning_rate": 0.0004989819856354933, "loss": 0.2641, "step": 20910 }, { "epoch": 0.87, "grad_norm": 0.59375, "learning_rate": 0.0004989810076761798, "loss": 0.2017, "step": 20920 }, { "epoch": 0.87, "grad_norm": 0.48828125, "learning_rate": 0.0004989800292483111, "loss": 0.2434, "step": 20930 }, { "epoch": 0.87, "grad_norm": 0.625, "learning_rate": 0.0004989790503518888, "loss": 0.2618, "step": 20940 }, { "epoch": 0.87, "grad_norm": 1.046875, "learning_rate": 0.0004989780709869149, "loss": 0.2838, "step": 20950 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 0.000498977091153391, "loss": 0.2566, "step": 20960 }, { "epoch": 0.87, "grad_norm": 1.3515625, "learning_rate": 0.0004989761108513193, "loss": 0.2233, "step": 20970 }, { "epoch": 0.87, "grad_norm": 0.921875, "learning_rate": 0.0004989751300807012, "loss": 0.2666, "step": 20980 }, { "epoch": 0.87, "grad_norm": 0.392578125, "learning_rate": 0.0004989741488415389, "loss": 0.2227, "step": 20990 }, { "epoch": 0.87, "grad_norm": 1.234375, "learning_rate": 0.0004989731671338342, "loss": 0.2401, "step": 21000 }, { "epoch": 0.87, "grad_norm": 0.61328125, "learning_rate": 0.0004989721849575889, "loss": 0.2427, "step": 21010 }, { "epoch": 0.87, "grad_norm": 1.1171875, "learning_rate": 0.0004989712023128048, "loss": 0.172, "step": 21020 }, { "epoch": 0.87, "grad_norm": 0.94921875, "learning_rate": 0.0004989702191994838, "loss": 0.2715, "step": 21030 }, { "epoch": 0.87, "grad_norm": 1.78125, "learning_rate": 0.0004989692356176277, "loss": 0.2896, "step": 21040 }, { "epoch": 0.87, "grad_norm": 0.75390625, "learning_rate": 0.0004989682515672383, "loss": 0.2351, "step": 21050 }, { "epoch": 0.87, "grad_norm": 0.76171875, "learning_rate": 0.0004989672670483177, "loss": 0.1964, "step": 21060 }, { "epoch": 0.87, "grad_norm": 0.99609375, "learning_rate": 0.0004989662820608675, "loss": 0.2369, "step": 21070 }, { "epoch": 0.87, "grad_norm": 0.671875, "learning_rate": 0.0004989652966048896, "loss": 0.2799, "step": 21080 }, { "epoch": 0.87, "grad_norm": 0.57421875, "learning_rate": 0.0004989643106803861, "loss": 0.2454, "step": 21090 }, { "epoch": 0.87, "grad_norm": 0.71484375, "learning_rate": 0.0004989633242873584, "loss": 0.2181, "step": 21100 }, { "epoch": 0.87, "grad_norm": 0.69140625, "learning_rate": 0.0004989623374258088, "loss": 0.2384, "step": 21110 }, { "epoch": 0.87, "grad_norm": 0.69921875, "learning_rate": 0.0004989613500957389, "loss": 0.2445, "step": 21120 }, { "epoch": 0.88, "grad_norm": 0.50390625, "learning_rate": 0.0004989603622971506, "loss": 0.2006, "step": 21130 }, { "epoch": 0.88, "grad_norm": 1.296875, "learning_rate": 0.0004989593740300458, "loss": 0.22, "step": 21140 }, { "epoch": 0.88, "grad_norm": 0.56640625, "learning_rate": 0.0004989583852944262, "loss": 0.2591, "step": 21150 }, { "epoch": 0.88, "grad_norm": 0.97265625, "learning_rate": 0.0004989573960902941, "loss": 0.2459, "step": 21160 }, { "epoch": 0.88, "grad_norm": 0.62890625, "learning_rate": 0.0004989564064176508, "loss": 0.2651, "step": 21170 }, { "epoch": 0.88, "grad_norm": 0.67578125, "learning_rate": 0.0004989554162764986, "loss": 0.2378, "step": 21180 }, { "epoch": 0.88, "grad_norm": 0.609375, "learning_rate": 0.0004989544256668391, "loss": 0.2396, "step": 21190 }, { "epoch": 0.88, "grad_norm": 0.333984375, "learning_rate": 0.0004989534345886743, "loss": 0.2392, "step": 21200 }, { "epoch": 0.88, "grad_norm": 0.98046875, "learning_rate": 0.0004989524430420061, "loss": 0.3005, "step": 21210 }, { "epoch": 0.88, "grad_norm": 0.26953125, "learning_rate": 0.0004989514510268362, "loss": 0.2001, "step": 21220 }, { "epoch": 0.88, "grad_norm": 0.57421875, "learning_rate": 0.0004989504585431665, "loss": 0.2396, "step": 21230 }, { "epoch": 0.88, "grad_norm": 0.78125, "learning_rate": 0.000498949465590999, "loss": 0.251, "step": 21240 }, { "epoch": 0.88, "grad_norm": 0.373046875, "learning_rate": 0.0004989484721703354, "loss": 0.2587, "step": 21250 }, { "epoch": 0.88, "grad_norm": 0.451171875, "learning_rate": 0.0004989474782811777, "loss": 0.2229, "step": 21260 }, { "epoch": 0.88, "grad_norm": 0.58984375, "learning_rate": 0.0004989464839235278, "loss": 0.2536, "step": 21270 }, { "epoch": 0.88, "grad_norm": 0.87890625, "learning_rate": 0.0004989454890973874, "loss": 0.2368, "step": 21280 }, { "epoch": 0.88, "grad_norm": 1.0625, "learning_rate": 0.0004989444938027585, "loss": 0.2342, "step": 21290 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 0.000498943498039643, "loss": 0.2571, "step": 21300 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 0.0004989425018080427, "loss": 0.1802, "step": 21310 }, { "epoch": 0.88, "grad_norm": 0.46875, "learning_rate": 0.0004989415051079594, "loss": 0.2183, "step": 21320 }, { "epoch": 0.88, "grad_norm": 0.57421875, "learning_rate": 0.0004989405079393953, "loss": 0.2718, "step": 21330 }, { "epoch": 0.88, "grad_norm": 0.10693359375, "learning_rate": 0.0004989395103023518, "loss": 0.2058, "step": 21340 }, { "epoch": 0.88, "grad_norm": 0.88671875, "learning_rate": 0.0004989385121968312, "loss": 0.2207, "step": 21350 }, { "epoch": 0.88, "grad_norm": 0.91796875, "learning_rate": 0.0004989375136228351, "loss": 0.2458, "step": 21360 }, { "epoch": 0.89, "grad_norm": 0.578125, "learning_rate": 0.0004989365145803655, "loss": 0.2085, "step": 21370 }, { "epoch": 0.89, "grad_norm": 0.64453125, "learning_rate": 0.0004989355150694242, "loss": 0.2545, "step": 21380 }, { "epoch": 0.89, "grad_norm": 1.4375, "learning_rate": 0.0004989345150900133, "loss": 0.2374, "step": 21390 }, { "epoch": 0.89, "grad_norm": 0.921875, "learning_rate": 0.0004989335146421345, "loss": 0.2098, "step": 21400 }, { "epoch": 0.89, "grad_norm": 0.6640625, "learning_rate": 0.0004989325137257897, "loss": 0.2977, "step": 21410 }, { "epoch": 0.89, "grad_norm": 1.265625, "learning_rate": 0.0004989315123409807, "loss": 0.2245, "step": 21420 }, { "epoch": 0.89, "grad_norm": 1.5859375, "learning_rate": 0.0004989305104877095, "loss": 0.2064, "step": 21430 }, { "epoch": 0.89, "grad_norm": 1.1171875, "learning_rate": 0.0004989295081659779, "loss": 0.2853, "step": 21440 }, { "epoch": 0.89, "grad_norm": 1.765625, "learning_rate": 0.0004989285053757879, "loss": 0.2333, "step": 21450 }, { "epoch": 0.89, "grad_norm": 0.5, "learning_rate": 0.0004989275021171414, "loss": 0.2301, "step": 21460 }, { "epoch": 0.89, "grad_norm": 0.90625, "learning_rate": 0.0004989264983900402, "loss": 0.2032, "step": 21470 }, { "epoch": 0.89, "grad_norm": 0.640625, "learning_rate": 0.0004989254941944862, "loss": 0.2627, "step": 21480 }, { "epoch": 0.89, "grad_norm": 0.4921875, "learning_rate": 0.0004989244895304813, "loss": 0.254, "step": 21490 }, { "epoch": 0.89, "grad_norm": 0.400390625, "learning_rate": 0.0004989234843980274, "loss": 0.2247, "step": 21500 }, { "epoch": 0.89, "grad_norm": 0.51171875, "learning_rate": 0.0004989224787971264, "loss": 0.2412, "step": 21510 }, { "epoch": 0.89, "grad_norm": 0.3984375, "learning_rate": 0.0004989214727277801, "loss": 0.2496, "step": 21520 }, { "epoch": 0.89, "grad_norm": 0.640625, "learning_rate": 0.0004989204661899905, "loss": 0.2019, "step": 21530 }, { "epoch": 0.89, "grad_norm": 0.734375, "learning_rate": 0.0004989194591837595, "loss": 0.2503, "step": 21540 }, { "epoch": 0.89, "grad_norm": 0.90234375, "learning_rate": 0.000498918451709089, "loss": 0.2004, "step": 21550 }, { "epoch": 0.89, "grad_norm": 0.828125, "learning_rate": 0.0004989174437659808, "loss": 0.2082, "step": 21560 }, { "epoch": 0.89, "grad_norm": 0.73046875, "learning_rate": 0.0004989164353544368, "loss": 0.2393, "step": 21570 }, { "epoch": 0.89, "grad_norm": 1.09375, "learning_rate": 0.000498915426474459, "loss": 0.2451, "step": 21580 }, { "epoch": 0.89, "grad_norm": 0.671875, "learning_rate": 0.0004989144171260492, "loss": 0.2657, "step": 21590 }, { "epoch": 0.89, "grad_norm": 1.8203125, "learning_rate": 0.0004989134073092094, "loss": 0.25, "step": 21600 }, { "epoch": 0.9, "grad_norm": 0.625, "learning_rate": 0.0004989123970239415, "loss": 0.2551, "step": 21610 }, { "epoch": 0.9, "grad_norm": 0.7109375, "learning_rate": 0.0004989113862702473, "loss": 0.2555, "step": 21620 }, { "epoch": 0.9, "grad_norm": 0.263671875, "learning_rate": 0.0004989103750481287, "loss": 0.2005, "step": 21630 }, { "epoch": 0.9, "grad_norm": 0.47265625, "learning_rate": 0.0004989093633575878, "loss": 0.2904, "step": 21640 }, { "epoch": 0.9, "grad_norm": 0.4296875, "learning_rate": 0.0004989083511986262, "loss": 0.2356, "step": 21650 }, { "epoch": 0.9, "grad_norm": 1.109375, "learning_rate": 0.000498907338571246, "loss": 0.2532, "step": 21660 }, { "epoch": 0.9, "grad_norm": 0.51953125, "learning_rate": 0.000498906325475449, "loss": 0.2469, "step": 21670 }, { "epoch": 0.9, "grad_norm": 0.7109375, "learning_rate": 0.0004989053119112373, "loss": 0.2741, "step": 21680 }, { "epoch": 0.9, "grad_norm": 0.875, "learning_rate": 0.0004989042978786127, "loss": 0.2891, "step": 21690 }, { "epoch": 0.9, "grad_norm": 0.72265625, "learning_rate": 0.000498903283377577, "loss": 0.2078, "step": 21700 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 0.0004989022684081323, "loss": 0.2253, "step": 21710 }, { "epoch": 0.9, "grad_norm": 0.57421875, "learning_rate": 0.0004989012529702803, "loss": 0.2536, "step": 21720 }, { "epoch": 0.9, "grad_norm": 0.62890625, "learning_rate": 0.0004989002370640231, "loss": 0.2416, "step": 21730 }, { "epoch": 0.9, "grad_norm": 0.75, "learning_rate": 0.0004988992206893624, "loss": 0.2148, "step": 21740 }, { "epoch": 0.9, "grad_norm": 0.72265625, "learning_rate": 0.0004988982038463003, "loss": 0.2604, "step": 21750 }, { "epoch": 0.9, "grad_norm": 0.69140625, "learning_rate": 0.0004988971865348388, "loss": 0.2215, "step": 21760 }, { "epoch": 0.9, "grad_norm": 0.53515625, "learning_rate": 0.0004988961687549796, "loss": 0.2187, "step": 21770 }, { "epoch": 0.9, "grad_norm": 0.7734375, "learning_rate": 0.0004988951505067247, "loss": 0.2722, "step": 21780 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 0.000498894131790076, "loss": 0.2064, "step": 21790 }, { "epoch": 0.9, "grad_norm": 0.640625, "learning_rate": 0.0004988931126050352, "loss": 0.2452, "step": 21800 }, { "epoch": 0.9, "grad_norm": 1.140625, "learning_rate": 0.0004988920929516048, "loss": 0.2675, "step": 21810 }, { "epoch": 0.9, "grad_norm": 0.43359375, "learning_rate": 0.000498891072829786, "loss": 0.2287, "step": 21820 }, { "epoch": 0.9, "grad_norm": 0.640625, "learning_rate": 0.0004988900522395814, "loss": 0.2285, "step": 21830 }, { "epoch": 0.9, "grad_norm": 0.953125, "learning_rate": 0.0004988890311809924, "loss": 0.2738, "step": 21840 }, { "epoch": 0.91, "grad_norm": 0.921875, "learning_rate": 0.0004988880096540212, "loss": 0.2145, "step": 21850 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 0.0004988869876586697, "loss": 0.2395, "step": 21860 }, { "epoch": 0.91, "grad_norm": 0.38671875, "learning_rate": 0.0004988859651949397, "loss": 0.2613, "step": 21870 }, { "epoch": 0.91, "grad_norm": 0.5078125, "learning_rate": 0.0004988849422628332, "loss": 0.2207, "step": 21880 }, { "epoch": 0.91, "grad_norm": 0.5078125, "learning_rate": 0.0004988839188623521, "loss": 0.207, "step": 21890 }, { "epoch": 0.91, "grad_norm": 0.6484375, "learning_rate": 0.0004988828949934983, "loss": 0.2436, "step": 21900 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 0.0004988818706562738, "loss": 0.2769, "step": 21910 }, { "epoch": 0.91, "grad_norm": 0.228515625, "learning_rate": 0.0004988808458506806, "loss": 0.2418, "step": 21920 }, { "epoch": 0.91, "grad_norm": 0.8203125, "learning_rate": 0.0004988798205767204, "loss": 0.2441, "step": 21930 }, { "epoch": 0.91, "grad_norm": 0.498046875, "learning_rate": 0.0004988787948343953, "loss": 0.289, "step": 21940 }, { "epoch": 0.91, "grad_norm": 0.67578125, "learning_rate": 0.0004988777686237071, "loss": 0.2456, "step": 21950 }, { "epoch": 0.91, "grad_norm": 0.875, "learning_rate": 0.0004988767419446579, "loss": 0.2212, "step": 21960 }, { "epoch": 0.91, "grad_norm": 0.79296875, "learning_rate": 0.0004988757147972496, "loss": 0.2029, "step": 21970 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 0.000498874687181484, "loss": 0.2465, "step": 21980 }, { "epoch": 0.91, "grad_norm": 0.4140625, "learning_rate": 0.0004988736590973631, "loss": 0.2228, "step": 21990 }, { "epoch": 0.91, "grad_norm": 0.416015625, "learning_rate": 0.000498872630544889, "loss": 0.2229, "step": 22000 }, { "epoch": 0.91, "grad_norm": 0.64453125, "learning_rate": 0.0004988716015240633, "loss": 0.1896, "step": 22010 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 0.0004988705720348882, "loss": 0.2493, "step": 22020 }, { "epoch": 0.91, "grad_norm": 1.8828125, "learning_rate": 0.0004988695420773656, "loss": 0.2748, "step": 22030 }, { "epoch": 0.91, "grad_norm": 0.72265625, "learning_rate": 0.0004988685116514973, "loss": 0.2328, "step": 22040 }, { "epoch": 0.91, "grad_norm": 0.79296875, "learning_rate": 0.0004988674807572854, "loss": 0.268, "step": 22050 }, { "epoch": 0.91, "grad_norm": 0.62109375, "learning_rate": 0.0004988664493947318, "loss": 0.2159, "step": 22060 }, { "epoch": 0.91, "grad_norm": 0.48828125, "learning_rate": 0.0004988654175638384, "loss": 0.2065, "step": 22070 }, { "epoch": 0.91, "grad_norm": 0.5859375, "learning_rate": 0.0004988643852646071, "loss": 0.2188, "step": 22080 }, { "epoch": 0.91, "grad_norm": 0.7109375, "learning_rate": 0.0004988633524970399, "loss": 0.2031, "step": 22090 }, { "epoch": 0.92, "grad_norm": 0.87890625, "learning_rate": 0.0004988623192611388, "loss": 0.2534, "step": 22100 }, { "epoch": 0.92, "grad_norm": 0.76171875, "learning_rate": 0.0004988612855569057, "loss": 0.275, "step": 22110 }, { "epoch": 0.92, "grad_norm": 0.57421875, "learning_rate": 0.0004988602513843425, "loss": 0.1833, "step": 22120 }, { "epoch": 0.92, "grad_norm": 0.71875, "learning_rate": 0.0004988592167434512, "loss": 0.2381, "step": 22130 }, { "epoch": 0.92, "grad_norm": 0.73046875, "learning_rate": 0.0004988581816342337, "loss": 0.2008, "step": 22140 }, { "epoch": 0.92, "grad_norm": 1.6640625, "learning_rate": 0.000498857146056692, "loss": 0.2488, "step": 22150 }, { "epoch": 0.92, "grad_norm": 0.2490234375, "learning_rate": 0.000498856110010828, "loss": 0.2451, "step": 22160 }, { "epoch": 0.92, "grad_norm": 1.859375, "learning_rate": 0.0004988550734966438, "loss": 0.2201, "step": 22170 }, { "epoch": 0.92, "grad_norm": 0.9609375, "learning_rate": 0.0004988540365141411, "loss": 0.2595, "step": 22180 }, { "epoch": 0.92, "grad_norm": 0.482421875, "learning_rate": 0.000498852999063322, "loss": 0.25, "step": 22190 }, { "epoch": 0.92, "grad_norm": 1.4609375, "learning_rate": 0.0004988519611441884, "loss": 0.2211, "step": 22200 }, { "epoch": 0.92, "grad_norm": 0.3984375, "learning_rate": 0.0004988509227567423, "loss": 0.2355, "step": 22210 }, { "epoch": 0.92, "grad_norm": 0.6015625, "learning_rate": 0.0004988498839009857, "loss": 0.3228, "step": 22220 }, { "epoch": 0.92, "grad_norm": 0.5390625, "learning_rate": 0.0004988488445769204, "loss": 0.2455, "step": 22230 }, { "epoch": 0.92, "grad_norm": 0.2392578125, "learning_rate": 0.0004988478047845485, "loss": 0.2133, "step": 22240 }, { "epoch": 0.92, "grad_norm": 0.64453125, "learning_rate": 0.0004988467645238719, "loss": 0.2049, "step": 22250 }, { "epoch": 0.92, "grad_norm": 0.193359375, "learning_rate": 0.0004988457237948925, "loss": 0.2623, "step": 22260 }, { "epoch": 0.92, "grad_norm": 0.890625, "learning_rate": 0.0004988446825976125, "loss": 0.2319, "step": 22270 }, { "epoch": 0.92, "grad_norm": 0.59375, "learning_rate": 0.0004988436409320335, "loss": 0.2191, "step": 22280 }, { "epoch": 0.92, "grad_norm": 1.0234375, "learning_rate": 0.0004988425987981578, "loss": 0.229, "step": 22290 }, { "epoch": 0.92, "grad_norm": 0.458984375, "learning_rate": 0.000498841556195987, "loss": 0.2768, "step": 22300 }, { "epoch": 0.92, "grad_norm": 2.03125, "learning_rate": 0.0004988405131255234, "loss": 0.2441, "step": 22310 }, { "epoch": 0.92, "grad_norm": 0.59375, "learning_rate": 0.0004988394695867687, "loss": 0.2294, "step": 22320 }, { "epoch": 0.92, "grad_norm": 0.625, "learning_rate": 0.0004988384255797251, "loss": 0.2555, "step": 22330 }, { "epoch": 0.93, "grad_norm": 0.51171875, "learning_rate": 0.0004988373811043945, "loss": 0.2099, "step": 22340 }, { "epoch": 0.93, "grad_norm": 1.5234375, "learning_rate": 0.0004988363361607787, "loss": 0.2079, "step": 22350 }, { "epoch": 0.93, "grad_norm": 0.71875, "learning_rate": 0.0004988352907488799, "loss": 0.2964, "step": 22360 }, { "epoch": 0.93, "grad_norm": 0.23828125, "learning_rate": 0.0004988342448686998, "loss": 0.1908, "step": 22370 }, { "epoch": 0.93, "grad_norm": 0.984375, "learning_rate": 0.0004988331985202407, "loss": 0.2985, "step": 22380 }, { "epoch": 0.93, "grad_norm": 0.45703125, "learning_rate": 0.0004988321517035044, "loss": 0.2589, "step": 22390 }, { "epoch": 0.93, "grad_norm": 0.95703125, "learning_rate": 0.0004988311044184928, "loss": 0.2479, "step": 22400 }, { "epoch": 0.93, "grad_norm": 0.359375, "learning_rate": 0.0004988300566652079, "loss": 0.2502, "step": 22410 }, { "epoch": 0.93, "grad_norm": 0.796875, "learning_rate": 0.0004988290084436516, "loss": 0.2057, "step": 22420 }, { "epoch": 0.93, "grad_norm": 0.68359375, "learning_rate": 0.0004988279597538261, "loss": 0.2225, "step": 22430 }, { "epoch": 0.93, "grad_norm": 0.26171875, "learning_rate": 0.0004988269105957332, "loss": 0.2804, "step": 22440 }, { "epoch": 0.93, "grad_norm": 2.109375, "learning_rate": 0.000498825860969375, "loss": 0.2564, "step": 22450 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 0.0004988248108747534, "loss": 0.2594, "step": 22460 }, { "epoch": 0.93, "grad_norm": 1.3671875, "learning_rate": 0.0004988237603118703, "loss": 0.2572, "step": 22470 }, { "epoch": 0.93, "grad_norm": 1.0859375, "learning_rate": 0.0004988227092807279, "loss": 0.2041, "step": 22480 }, { "epoch": 0.93, "grad_norm": 1.0, "learning_rate": 0.000498821657781328, "loss": 0.2166, "step": 22490 }, { "epoch": 0.93, "grad_norm": 0.90625, "learning_rate": 0.0004988206058136724, "loss": 0.2643, "step": 22500 }, { "epoch": 0.93, "grad_norm": 0.71484375, "learning_rate": 0.0004988195533777635, "loss": 0.2241, "step": 22510 }, { "epoch": 0.93, "grad_norm": 0.76953125, "learning_rate": 0.000498818500473603, "loss": 0.2173, "step": 22520 }, { "epoch": 0.93, "grad_norm": 0.181640625, "learning_rate": 0.0004988174471011929, "loss": 0.2408, "step": 22530 }, { "epoch": 0.93, "grad_norm": 0.46484375, "learning_rate": 0.0004988163932605353, "loss": 0.2781, "step": 22540 }, { "epoch": 0.93, "grad_norm": 0.57421875, "learning_rate": 0.0004988153389516321, "loss": 0.2378, "step": 22550 }, { "epoch": 0.93, "grad_norm": 0.671875, "learning_rate": 0.0004988142841744854, "loss": 0.273, "step": 22560 }, { "epoch": 0.93, "grad_norm": 0.333984375, "learning_rate": 0.000498813228929097, "loss": 0.2437, "step": 22570 }, { "epoch": 0.94, "grad_norm": 0.7890625, "learning_rate": 0.0004988121732154689, "loss": 0.2297, "step": 22580 }, { "epoch": 0.94, "grad_norm": 0.478515625, "learning_rate": 0.0004988111170336032, "loss": 0.249, "step": 22590 }, { "epoch": 0.94, "grad_norm": 2.0, "learning_rate": 0.0004988100603835019, "loss": 0.249, "step": 22600 }, { "epoch": 0.94, "grad_norm": 0.625, "learning_rate": 0.0004988090032651669, "loss": 0.3069, "step": 22610 }, { "epoch": 0.94, "grad_norm": 0.72265625, "learning_rate": 0.0004988079456786003, "loss": 0.2585, "step": 22620 }, { "epoch": 0.94, "grad_norm": 1.28125, "learning_rate": 0.0004988068876238039, "loss": 0.261, "step": 22630 }, { "epoch": 0.94, "grad_norm": 0.50390625, "learning_rate": 0.0004988058291007798, "loss": 0.2032, "step": 22640 }, { "epoch": 0.94, "grad_norm": 1.625, "learning_rate": 0.0004988047701095301, "loss": 0.1875, "step": 22650 }, { "epoch": 0.94, "grad_norm": 0.94921875, "learning_rate": 0.0004988037106500567, "loss": 0.3067, "step": 22660 }, { "epoch": 0.94, "grad_norm": 0.439453125, "learning_rate": 0.0004988026507223615, "loss": 0.1534, "step": 22670 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 0.0004988015903264466, "loss": 0.23, "step": 22680 }, { "epoch": 0.94, "grad_norm": 0.255859375, "learning_rate": 0.000498800529462314, "loss": 0.2227, "step": 22690 }, { "epoch": 0.94, "grad_norm": 0.447265625, "learning_rate": 0.0004987994681299656, "loss": 0.2536, "step": 22700 }, { "epoch": 0.94, "grad_norm": 0.3671875, "learning_rate": 0.0004987984063294036, "loss": 0.2128, "step": 22710 }, { "epoch": 0.94, "grad_norm": 0.80078125, "learning_rate": 0.0004987973440606299, "loss": 0.2331, "step": 22720 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 0.0004987962813236463, "loss": 0.2089, "step": 22730 }, { "epoch": 0.94, "grad_norm": 0.62890625, "learning_rate": 0.000498795218118455, "loss": 0.2931, "step": 22740 }, { "epoch": 0.94, "grad_norm": 0.59765625, "learning_rate": 0.000498794154445058, "loss": 0.2273, "step": 22750 }, { "epoch": 0.94, "grad_norm": 0.431640625, "learning_rate": 0.0004987930903034572, "loss": 0.2744, "step": 22760 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 0.0004987920256936547, "loss": 0.239, "step": 22770 }, { "epoch": 0.94, "grad_norm": 0.61328125, "learning_rate": 0.0004987909606156526, "loss": 0.2316, "step": 22780 }, { "epoch": 0.94, "grad_norm": 0.390625, "learning_rate": 0.0004987898950694527, "loss": 0.2124, "step": 22790 }, { "epoch": 0.94, "grad_norm": 0.0, "learning_rate": 0.000498788829055057, "loss": 0.2282, "step": 22800 }, { "epoch": 0.94, "grad_norm": 1.0078125, "learning_rate": 0.0004987877625724677, "loss": 0.2254, "step": 22810 }, { "epoch": 0.95, "grad_norm": 0.79296875, "learning_rate": 0.0004987866956216866, "loss": 0.2256, "step": 22820 }, { "epoch": 0.95, "grad_norm": 0.63671875, "learning_rate": 0.0004987856282027159, "loss": 0.2298, "step": 22830 }, { "epoch": 0.95, "grad_norm": 0.26953125, "learning_rate": 0.0004987845603155576, "loss": 0.2776, "step": 22840 }, { "epoch": 0.95, "grad_norm": 0.23828125, "learning_rate": 0.0004987834919602135, "loss": 0.2323, "step": 22850 }, { "epoch": 0.95, "grad_norm": 0.8046875, "learning_rate": 0.0004987824231366857, "loss": 0.227, "step": 22860 }, { "epoch": 0.95, "grad_norm": 1.0703125, "learning_rate": 0.0004987813538449762, "loss": 0.2821, "step": 22870 }, { "epoch": 0.95, "grad_norm": 0.65234375, "learning_rate": 0.0004987802840850873, "loss": 0.2506, "step": 22880 }, { "epoch": 0.95, "grad_norm": 0.703125, "learning_rate": 0.0004987792138570205, "loss": 0.2492, "step": 22890 }, { "epoch": 0.95, "grad_norm": 1.734375, "learning_rate": 0.0004987781431607782, "loss": 0.2658, "step": 22900 }, { "epoch": 0.95, "grad_norm": 0.75390625, "learning_rate": 0.0004987770719963624, "loss": 0.2242, "step": 22910 }, { "epoch": 0.95, "grad_norm": 0.6171875, "learning_rate": 0.0004987760003637748, "loss": 0.271, "step": 22920 }, { "epoch": 0.95, "grad_norm": 1.65625, "learning_rate": 0.0004987749282630178, "loss": 0.2293, "step": 22930 }, { "epoch": 0.95, "grad_norm": 0.625, "learning_rate": 0.0004987738556940932, "loss": 0.2205, "step": 22940 }, { "epoch": 0.95, "grad_norm": 0.73828125, "learning_rate": 0.0004987727826570031, "loss": 0.2698, "step": 22950 }, { "epoch": 0.95, "grad_norm": 0.44921875, "learning_rate": 0.0004987717091517494, "loss": 0.2448, "step": 22960 }, { "epoch": 0.95, "grad_norm": 0.6328125, "learning_rate": 0.0004987706351783342, "loss": 0.2364, "step": 22970 }, { "epoch": 0.95, "grad_norm": 0.68359375, "learning_rate": 0.0004987695607367597, "loss": 0.2803, "step": 22980 }, { "epoch": 0.95, "grad_norm": 0.8046875, "learning_rate": 0.0004987684858270276, "loss": 0.2736, "step": 22990 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 0.0004987674104491402, "loss": 0.2311, "step": 23000 }, { "epoch": 0.95, "grad_norm": 1.8515625, "learning_rate": 0.0004987663346030992, "loss": 0.2691, "step": 23010 }, { "epoch": 0.95, "grad_norm": 0.66796875, "learning_rate": 0.000498765258288907, "loss": 0.2785, "step": 23020 }, { "epoch": 0.95, "grad_norm": 0.796875, "learning_rate": 0.0004987641815065654, "loss": 0.243, "step": 23030 }, { "epoch": 0.95, "grad_norm": 0.470703125, "learning_rate": 0.0004987631042560765, "loss": 0.2225, "step": 23040 }, { "epoch": 0.95, "grad_norm": 1.0859375, "learning_rate": 0.0004987620265374422, "loss": 0.245, "step": 23050 }, { "epoch": 0.96, "grad_norm": 1.5078125, "learning_rate": 0.0004987609483506647, "loss": 0.2737, "step": 23060 }, { "epoch": 0.96, "grad_norm": 0.73828125, "learning_rate": 0.000498759869695746, "loss": 0.2513, "step": 23070 }, { "epoch": 0.96, "grad_norm": 0.7109375, "learning_rate": 0.000498758790572688, "loss": 0.2705, "step": 23080 }, { "epoch": 0.96, "grad_norm": 1.0234375, "learning_rate": 0.0004987577109814929, "loss": 0.2242, "step": 23090 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 0.0004987566309221627, "loss": 0.2138, "step": 23100 }, { "epoch": 0.96, "grad_norm": 0.310546875, "learning_rate": 0.0004987555503946992, "loss": 0.2022, "step": 23110 }, { "epoch": 0.96, "grad_norm": 0.62890625, "learning_rate": 0.0004987544693991048, "loss": 0.2156, "step": 23120 }, { "epoch": 0.96, "grad_norm": 0.5, "learning_rate": 0.0004987533879353812, "loss": 0.2547, "step": 23130 }, { "epoch": 0.96, "grad_norm": 1.03125, "learning_rate": 0.0004987523060035307, "loss": 0.2308, "step": 23140 }, { "epoch": 0.96, "grad_norm": 0.59765625, "learning_rate": 0.000498751223603555, "loss": 0.2416, "step": 23150 }, { "epoch": 0.96, "grad_norm": 0.5625, "learning_rate": 0.0004987501407354567, "loss": 0.2232, "step": 23160 }, { "epoch": 0.96, "grad_norm": 0.96484375, "learning_rate": 0.0004987490573992372, "loss": 0.1981, "step": 23170 }, { "epoch": 0.96, "grad_norm": 0.703125, "learning_rate": 0.000498747973594899, "loss": 0.2365, "step": 23180 }, { "epoch": 0.96, "grad_norm": 0.4609375, "learning_rate": 0.000498746889322444, "loss": 0.279, "step": 23190 }, { "epoch": 0.96, "grad_norm": 0.96875, "learning_rate": 0.0004987458045818742, "loss": 0.2141, "step": 23200 }, { "epoch": 0.96, "grad_norm": 0.001708984375, "learning_rate": 0.0004987447193731916, "loss": 0.2838, "step": 23210 }, { "epoch": 0.96, "grad_norm": 0.8203125, "learning_rate": 0.0004987436336963983, "loss": 0.2849, "step": 23220 }, { "epoch": 0.96, "grad_norm": 0.48046875, "learning_rate": 0.0004987425475514964, "loss": 0.2162, "step": 23230 }, { "epoch": 0.96, "grad_norm": 0.671875, "learning_rate": 0.0004987414609384878, "loss": 0.2428, "step": 23240 }, { "epoch": 0.96, "grad_norm": 0.1923828125, "learning_rate": 0.0004987403738573746, "loss": 0.2512, "step": 23250 }, { "epoch": 0.96, "grad_norm": 1.0703125, "learning_rate": 0.0004987392863081591, "loss": 0.2453, "step": 23260 }, { "epoch": 0.96, "grad_norm": 0.287109375, "learning_rate": 0.000498738198290843, "loss": 0.2459, "step": 23270 }, { "epoch": 0.96, "grad_norm": 0.29296875, "learning_rate": 0.0004987371098054284, "loss": 0.2693, "step": 23280 }, { "epoch": 0.96, "grad_norm": 0.75, "learning_rate": 0.0004987360208519175, "loss": 0.2442, "step": 23290 }, { "epoch": 0.97, "grad_norm": 0.41796875, "learning_rate": 0.0004987349314303122, "loss": 0.2562, "step": 23300 }, { "epoch": 0.97, "grad_norm": 0.578125, "learning_rate": 0.0004987338415406148, "loss": 0.2055, "step": 23310 }, { "epoch": 0.97, "grad_norm": 0.9609375, "learning_rate": 0.000498732751182827, "loss": 0.2026, "step": 23320 }, { "epoch": 0.97, "grad_norm": 0.373046875, "learning_rate": 0.0004987316603569511, "loss": 0.2181, "step": 23330 }, { "epoch": 0.97, "grad_norm": 0.7734375, "learning_rate": 0.000498730569062989, "loss": 0.1899, "step": 23340 }, { "epoch": 0.97, "grad_norm": 0.69140625, "learning_rate": 0.0004987294773009429, "loss": 0.2513, "step": 23350 }, { "epoch": 0.97, "grad_norm": 0.267578125, "learning_rate": 0.0004987283850708148, "loss": 0.271, "step": 23360 }, { "epoch": 0.97, "grad_norm": 1.2578125, "learning_rate": 0.0004987272923726066, "loss": 0.232, "step": 23370 }, { "epoch": 0.97, "grad_norm": 0.953125, "learning_rate": 0.0004987261992063206, "loss": 0.2412, "step": 23380 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 0.0004987251055719587, "loss": 0.1915, "step": 23390 }, { "epoch": 0.97, "grad_norm": 0.74609375, "learning_rate": 0.0004987240114695231, "loss": 0.2093, "step": 23400 }, { "epoch": 0.97, "grad_norm": 0.55859375, "learning_rate": 0.0004987229168990158, "loss": 0.2179, "step": 23410 }, { "epoch": 0.97, "grad_norm": 0.8671875, "learning_rate": 0.0004987218218604387, "loss": 0.2628, "step": 23420 }, { "epoch": 0.97, "grad_norm": 4.59375, "learning_rate": 0.0004987207263537941, "loss": 0.2355, "step": 23430 }, { "epoch": 0.97, "grad_norm": 1.71875, "learning_rate": 0.0004987196303790839, "loss": 0.2455, "step": 23440 }, { "epoch": 0.97, "grad_norm": 0.86328125, "learning_rate": 0.0004987185339363102, "loss": 0.251, "step": 23450 }, { "epoch": 0.97, "grad_norm": 1.5625, "learning_rate": 0.000498717437025475, "loss": 0.2525, "step": 23460 }, { "epoch": 0.97, "grad_norm": 0.384765625, "learning_rate": 0.0004987163396465806, "loss": 0.24, "step": 23470 }, { "epoch": 0.97, "grad_norm": 0.86328125, "learning_rate": 0.0004987152417996289, "loss": 0.1614, "step": 23480 }, { "epoch": 0.97, "grad_norm": 1.2734375, "learning_rate": 0.0004987141434846219, "loss": 0.2353, "step": 23490 }, { "epoch": 0.97, "grad_norm": 0.21484375, "learning_rate": 0.0004987130447015618, "loss": 0.2821, "step": 23500 }, { "epoch": 0.97, "grad_norm": 0.4921875, "learning_rate": 0.0004987119454504506, "loss": 0.2933, "step": 23510 }, { "epoch": 0.97, "grad_norm": 0.267578125, "learning_rate": 0.0004987108457312902, "loss": 0.2002, "step": 23520 }, { "epoch": 0.97, "grad_norm": 0.95703125, "learning_rate": 0.000498709745544083, "loss": 0.2254, "step": 23530 }, { "epoch": 0.98, "grad_norm": 1.40625, "learning_rate": 0.000498708644888831, "loss": 0.2543, "step": 23540 }, { "epoch": 0.98, "grad_norm": 0.8671875, "learning_rate": 0.0004987075437655361, "loss": 0.2748, "step": 23550 }, { "epoch": 0.98, "grad_norm": 0.00162506103515625, "learning_rate": 0.0004987064421742004, "loss": 0.2203, "step": 23560 }, { "epoch": 0.98, "grad_norm": 0.484375, "learning_rate": 0.0004987053401148261, "loss": 0.2146, "step": 23570 }, { "epoch": 0.98, "grad_norm": 1.15625, "learning_rate": 0.0004987042375874152, "loss": 0.2351, "step": 23580 }, { "epoch": 0.98, "grad_norm": 0.44921875, "learning_rate": 0.0004987031345919698, "loss": 0.2165, "step": 23590 }, { "epoch": 0.98, "grad_norm": 1.0625, "learning_rate": 0.0004987020311284919, "loss": 0.1965, "step": 23600 }, { "epoch": 0.98, "grad_norm": 0.921875, "learning_rate": 0.0004987009271969837, "loss": 0.2556, "step": 23610 }, { "epoch": 0.98, "grad_norm": 0.4296875, "learning_rate": 0.0004986998227974472, "loss": 0.2642, "step": 23620 }, { "epoch": 0.98, "grad_norm": 0.56640625, "learning_rate": 0.0004986987179298844, "loss": 0.1995, "step": 23630 }, { "epoch": 0.98, "grad_norm": 1.046875, "learning_rate": 0.0004986976125942975, "loss": 0.2606, "step": 23640 }, { "epoch": 0.98, "grad_norm": 0.578125, "learning_rate": 0.0004986965067906887, "loss": 0.2867, "step": 23650 }, { "epoch": 0.98, "grad_norm": 0.3984375, "learning_rate": 0.0004986954005190598, "loss": 0.2742, "step": 23660 }, { "epoch": 0.98, "grad_norm": 0.51953125, "learning_rate": 0.0004986942937794131, "loss": 0.2334, "step": 23670 }, { "epoch": 0.98, "grad_norm": 0.41796875, "learning_rate": 0.0004986931865717505, "loss": 0.2529, "step": 23680 }, { "epoch": 0.98, "grad_norm": 0.64453125, "learning_rate": 0.0004986920788960743, "loss": 0.2189, "step": 23690 }, { "epoch": 0.98, "grad_norm": 0.65625, "learning_rate": 0.0004986909707523863, "loss": 0.2164, "step": 23700 }, { "epoch": 0.98, "grad_norm": 0.65234375, "learning_rate": 0.0004986898621406889, "loss": 0.2239, "step": 23710 }, { "epoch": 0.98, "grad_norm": 0.6171875, "learning_rate": 0.000498688753060984, "loss": 0.2364, "step": 23720 }, { "epoch": 0.98, "grad_norm": 0.0, "learning_rate": 0.0004986876435132736, "loss": 0.2487, "step": 23730 }, { "epoch": 0.98, "grad_norm": 0.59375, "learning_rate": 0.00049868653349756, "loss": 0.2025, "step": 23740 }, { "epoch": 0.98, "grad_norm": 0.66015625, "learning_rate": 0.0004986854230138452, "loss": 0.2527, "step": 23750 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 0.0004986843120621312, "loss": 0.227, "step": 23760 }, { "epoch": 0.98, "grad_norm": 0.373046875, "learning_rate": 0.0004986832006424203, "loss": 0.2317, "step": 23770 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 0.0004986820887547145, "loss": 0.2462, "step": 23780 }, { "epoch": 0.99, "grad_norm": 0.60546875, "learning_rate": 0.0004986809763990157, "loss": 0.2071, "step": 23790 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 0.0004986798635753264, "loss": 0.2287, "step": 23800 }, { "epoch": 0.99, "grad_norm": 0.796875, "learning_rate": 0.0004986787502836483, "loss": 0.2148, "step": 23810 }, { "epoch": 0.99, "grad_norm": 1.125, "learning_rate": 0.0004986776365239837, "loss": 0.203, "step": 23820 }, { "epoch": 0.99, "grad_norm": 1.2890625, "learning_rate": 0.0004986765222963345, "loss": 0.3097, "step": 23830 }, { "epoch": 0.99, "grad_norm": 0.9296875, "learning_rate": 0.000498675407600703, "loss": 0.228, "step": 23840 }, { "epoch": 0.99, "grad_norm": 0.359375, "learning_rate": 0.0004986742924370914, "loss": 0.2305, "step": 23850 }, { "epoch": 0.99, "grad_norm": 0.78515625, "learning_rate": 0.0004986731768055015, "loss": 0.2455, "step": 23860 }, { "epoch": 0.99, "grad_norm": 1.0703125, "learning_rate": 0.0004986720607059355, "loss": 0.2323, "step": 23870 }, { "epoch": 0.99, "grad_norm": 0.5078125, "learning_rate": 0.0004986709441383956, "loss": 0.2459, "step": 23880 }, { "epoch": 0.99, "grad_norm": 0.3671875, "learning_rate": 0.0004986698271028839, "loss": 0.2676, "step": 23890 }, { "epoch": 0.99, "grad_norm": 0.427734375, "learning_rate": 0.0004986687095994023, "loss": 0.3124, "step": 23900 }, { "epoch": 0.99, "grad_norm": 1.4140625, "learning_rate": 0.0004986675916279532, "loss": 0.2653, "step": 23910 }, { "epoch": 0.99, "grad_norm": 0.255859375, "learning_rate": 0.0004986664731885384, "loss": 0.2098, "step": 23920 }, { "epoch": 0.99, "grad_norm": 0.6015625, "learning_rate": 0.0004986653542811602, "loss": 0.2359, "step": 23930 }, { "epoch": 0.99, "grad_norm": 1.8125, "learning_rate": 0.0004986642349058207, "loss": 0.1903, "step": 23940 }, { "epoch": 0.99, "grad_norm": 0.50390625, "learning_rate": 0.0004986631150625219, "loss": 0.2406, "step": 23950 }, { "epoch": 0.99, "grad_norm": 0.86328125, "learning_rate": 0.000498661994751266, "loss": 0.2797, "step": 23960 }, { "epoch": 0.99, "grad_norm": 0.515625, "learning_rate": 0.000498660873972055, "loss": 0.2223, "step": 23970 }, { "epoch": 0.99, "grad_norm": 0.39453125, "learning_rate": 0.0004986597527248912, "loss": 0.2132, "step": 23980 }, { "epoch": 0.99, "grad_norm": 0.66015625, "learning_rate": 0.0004986586310097766, "loss": 0.2734, "step": 23990 }, { "epoch": 0.99, "grad_norm": 1.328125, "learning_rate": 0.0004986575088267133, "loss": 0.2672, "step": 24000 }, { "epoch": 0.99, "grad_norm": 1.0625, "learning_rate": 0.0004986563861757034, "loss": 0.2256, "step": 24010 }, { "epoch": 0.99, "grad_norm": 0.474609375, "learning_rate": 0.000498655263056749, "loss": 0.2494, "step": 24020 }, { "epoch": 1.0, "grad_norm": 1.2578125, "learning_rate": 0.0004986541394698523, "loss": 0.2434, "step": 24030 }, { "epoch": 1.0, "grad_norm": 0.765625, "learning_rate": 0.0004986530154150152, "loss": 0.1956, "step": 24040 }, { "epoch": 1.0, "grad_norm": 0.69140625, "learning_rate": 0.0004986518908922403, "loss": 0.2116, "step": 24050 }, { "epoch": 1.0, "grad_norm": 1.5625, "learning_rate": 0.0004986507659015292, "loss": 0.1953, "step": 24060 }, { "epoch": 1.0, "grad_norm": 0.7734375, "learning_rate": 0.0004986496404428842, "loss": 0.269, "step": 24070 }, { "epoch": 1.0, "grad_norm": 0.640625, "learning_rate": 0.0004986485145163075, "loss": 0.2372, "step": 24080 }, { "epoch": 1.0, "grad_norm": 0.57421875, "learning_rate": 0.000498647388121801, "loss": 0.3138, "step": 24090 }, { "epoch": 1.0, "grad_norm": 1.8515625, "learning_rate": 0.0004986462612593671, "loss": 0.2377, "step": 24100 }, { "epoch": 1.0, "grad_norm": 1.2734375, "learning_rate": 0.0004986451339290077, "loss": 0.3302, "step": 24110 }, { "epoch": 1.0, "grad_norm": 0.388671875, "learning_rate": 0.0004986440061307251, "loss": 0.2444, "step": 24120 }, { "epoch": 1.0, "grad_norm": 0.70703125, "learning_rate": 0.0004986428778645212, "loss": 0.2929, "step": 24130 }, { "epoch": 1.0, "grad_norm": 1.171875, "learning_rate": 0.0004986417491303984, "loss": 0.2487, "step": 24140 }, { "epoch": 1.0, "grad_norm": 0.73046875, "learning_rate": 0.0004986406199283586, "loss": 0.2258, "step": 24150 }, { "epoch": 1.0, "grad_norm": 0.78515625, "learning_rate": 0.000498639490258404, "loss": 0.2111, "step": 24160 }, { "epoch": 1.0, "grad_norm": 0.486328125, "learning_rate": 0.0004986383601205368, "loss": 0.2215, "step": 24170 }, { "epoch": 1.0, "grad_norm": 1.9140625, "learning_rate": 0.000498637229514759, "loss": 0.2223, "step": 24180 }, { "epoch": 1.0, "grad_norm": 1.125, "learning_rate": 0.0004986360984410728, "loss": 0.2282, "step": 24190 }, { "epoch": 1.0, "grad_norm": 1.2109375, "learning_rate": 0.0004986349668994804, "loss": 0.2304, "step": 24200 }, { "epoch": 1.0, "grad_norm": 0.609375, "learning_rate": 0.0004986338348899837, "loss": 0.1814, "step": 24210 }, { "epoch": 1.0, "grad_norm": 0.57421875, "learning_rate": 0.0004986327024125851, "loss": 0.1663, "step": 24220 }, { "epoch": 1.0, "grad_norm": 1.2421875, "learning_rate": 0.0004986315694672865, "loss": 0.2139, "step": 24230 }, { "epoch": 1.0, "grad_norm": 0.5546875, "learning_rate": 0.0004986304360540901, "loss": 0.2459, "step": 24240 }, { "epoch": 1.0, "grad_norm": 1.0390625, "learning_rate": 0.0004986293021729982, "loss": 0.209, "step": 24250 }, { "epoch": 1.0, "grad_norm": 0.93359375, "learning_rate": 0.0004986281678240127, "loss": 0.2936, "step": 24260 }, { "epoch": 1.01, "grad_norm": 0.84765625, "learning_rate": 0.0004986270330071358, "loss": 0.2534, "step": 24270 }, { "epoch": 1.01, "grad_norm": 0.921875, "learning_rate": 0.0004986258977223699, "loss": 0.2146, "step": 24280 }, { "epoch": 1.01, "grad_norm": 0.921875, "learning_rate": 0.0004986247619697167, "loss": 0.2468, "step": 24290 }, { "epoch": 1.01, "grad_norm": 0.322265625, "learning_rate": 0.0004986236257491786, "loss": 0.2621, "step": 24300 }, { "epoch": 1.01, "grad_norm": 0.466796875, "learning_rate": 0.0004986224890607577, "loss": 0.2088, "step": 24310 }, { "epoch": 1.01, "grad_norm": 1.171875, "learning_rate": 0.0004986213519044561, "loss": 0.2528, "step": 24320 }, { "epoch": 1.01, "grad_norm": 1.5390625, "learning_rate": 0.000498620214280276, "loss": 0.2117, "step": 24330 }, { "epoch": 1.01, "grad_norm": 1.0078125, "learning_rate": 0.0004986190761882195, "loss": 0.2141, "step": 24340 }, { "epoch": 1.01, "grad_norm": 1.6796875, "learning_rate": 0.0004986179376282887, "loss": 0.2227, "step": 24350 }, { "epoch": 1.01, "grad_norm": 0.9921875, "learning_rate": 0.0004986167986004859, "loss": 0.1563, "step": 24360 }, { "epoch": 1.01, "grad_norm": 0.8046875, "learning_rate": 0.000498615659104813, "loss": 0.2138, "step": 24370 }, { "epoch": 1.01, "grad_norm": 0.390625, "learning_rate": 0.0004986145191412723, "loss": 0.2027, "step": 24380 }, { "epoch": 1.01, "grad_norm": 0.90234375, "learning_rate": 0.0004986133787098661, "loss": 0.2699, "step": 24390 }, { "epoch": 1.01, "grad_norm": 1.2734375, "learning_rate": 0.0004986122378105961, "loss": 0.2732, "step": 24400 }, { "epoch": 1.01, "grad_norm": 0.337890625, "learning_rate": 0.0004986110964434649, "loss": 0.3105, "step": 24410 }, { "epoch": 1.01, "grad_norm": 0.54296875, "learning_rate": 0.0004986099546084743, "loss": 0.2134, "step": 24420 }, { "epoch": 1.01, "grad_norm": 0.56640625, "learning_rate": 0.0004986088123056268, "loss": 0.2006, "step": 24430 }, { "epoch": 1.01, "grad_norm": 0.78125, "learning_rate": 0.0004986076695349243, "loss": 0.238, "step": 24440 }, { "epoch": 1.01, "grad_norm": 0.4921875, "learning_rate": 0.000498606526296369, "loss": 0.2244, "step": 24450 }, { "epoch": 1.01, "grad_norm": 0.8828125, "learning_rate": 0.000498605382589963, "loss": 0.22, "step": 24460 }, { "epoch": 1.01, "grad_norm": 0.82421875, "learning_rate": 0.0004986042384157087, "loss": 0.2206, "step": 24470 }, { "epoch": 1.01, "grad_norm": 0.54296875, "learning_rate": 0.0004986030937736079, "loss": 0.2453, "step": 24480 }, { "epoch": 1.01, "grad_norm": 0.94921875, "learning_rate": 0.0004986019486636629, "loss": 0.2871, "step": 24490 }, { "epoch": 1.01, "grad_norm": 0.7578125, "learning_rate": 0.000498600803085876, "loss": 0.2127, "step": 24500 }, { "epoch": 1.02, "grad_norm": 0.81640625, "learning_rate": 0.0004985996570402492, "loss": 0.2489, "step": 24510 }, { "epoch": 1.02, "grad_norm": 1.0859375, "learning_rate": 0.0004985985105267846, "loss": 0.1922, "step": 24520 }, { "epoch": 1.02, "grad_norm": 1.3125, "learning_rate": 0.0004985973635454847, "loss": 0.23, "step": 24530 }, { "epoch": 1.02, "grad_norm": 0.80078125, "learning_rate": 0.0004985962160963512, "loss": 0.2032, "step": 24540 }, { "epoch": 1.02, "grad_norm": 0.47265625, "learning_rate": 0.0004985950681793865, "loss": 0.1951, "step": 24550 }, { "epoch": 1.02, "grad_norm": 1.6640625, "learning_rate": 0.0004985939197945927, "loss": 0.2382, "step": 24560 }, { "epoch": 1.02, "grad_norm": 1.0078125, "learning_rate": 0.000498592770941972, "loss": 0.1994, "step": 24570 }, { "epoch": 1.02, "grad_norm": 0.8125, "learning_rate": 0.0004985916216215267, "loss": 0.2084, "step": 24580 }, { "epoch": 1.02, "grad_norm": 0.51171875, "learning_rate": 0.0004985904718332586, "loss": 0.1868, "step": 24590 }, { "epoch": 1.02, "grad_norm": 0.78125, "learning_rate": 0.0004985893215771701, "loss": 0.2304, "step": 24600 }, { "epoch": 1.02, "grad_norm": 0.271484375, "learning_rate": 0.0004985881708532635, "loss": 0.2139, "step": 24610 }, { "epoch": 1.02, "grad_norm": 0.58203125, "learning_rate": 0.0004985870196615406, "loss": 0.2683, "step": 24620 }, { "epoch": 1.02, "grad_norm": 1.0546875, "learning_rate": 0.000498585868002004, "loss": 0.3117, "step": 24630 }, { "epoch": 1.02, "grad_norm": 0.4140625, "learning_rate": 0.0004985847158746555, "loss": 0.285, "step": 24640 }, { "epoch": 1.02, "grad_norm": 0.83984375, "learning_rate": 0.0004985835632794974, "loss": 0.2129, "step": 24650 }, { "epoch": 1.02, "grad_norm": 0.478515625, "learning_rate": 0.000498582410216532, "loss": 0.2616, "step": 24660 }, { "epoch": 1.02, "grad_norm": 0.474609375, "learning_rate": 0.0004985812566857612, "loss": 0.31, "step": 24670 }, { "epoch": 1.02, "grad_norm": 0.84375, "learning_rate": 0.0004985801026871873, "loss": 0.234, "step": 24680 }, { "epoch": 1.02, "grad_norm": 0.765625, "learning_rate": 0.0004985789482208126, "loss": 0.2255, "step": 24690 }, { "epoch": 1.02, "grad_norm": 0.31640625, "learning_rate": 0.0004985777932866392, "loss": 0.2532, "step": 24700 }, { "epoch": 1.02, "grad_norm": 0.671875, "learning_rate": 0.0004985766378846692, "loss": 0.1965, "step": 24710 }, { "epoch": 1.02, "grad_norm": 0.51171875, "learning_rate": 0.0004985754820149048, "loss": 0.2427, "step": 24720 }, { "epoch": 1.02, "grad_norm": 1.3359375, "learning_rate": 0.0004985743256773482, "loss": 0.2719, "step": 24730 }, { "epoch": 1.02, "grad_norm": 0.859375, "learning_rate": 0.0004985731688720014, "loss": 0.2388, "step": 24740 }, { "epoch": 1.03, "grad_norm": 0.55078125, "learning_rate": 0.0004985720115988669, "loss": 0.2692, "step": 24750 }, { "epoch": 1.03, "grad_norm": 0.33203125, "learning_rate": 0.0004985708538579467, "loss": 0.2172, "step": 24760 }, { "epoch": 1.03, "grad_norm": 2.078125, "learning_rate": 0.000498569695649243, "loss": 0.2383, "step": 24770 }, { "epoch": 1.03, "grad_norm": 0.251953125, "learning_rate": 0.000498568536972758, "loss": 0.1863, "step": 24780 }, { "epoch": 1.03, "grad_norm": 0.578125, "learning_rate": 0.0004985673778284939, "loss": 0.1834, "step": 24790 }, { "epoch": 1.03, "grad_norm": 1.421875, "learning_rate": 0.0004985662182164527, "loss": 0.2113, "step": 24800 }, { "epoch": 1.03, "grad_norm": 1.25, "learning_rate": 0.0004985650581366367, "loss": 0.2573, "step": 24810 }, { "epoch": 1.03, "grad_norm": 0.66015625, "learning_rate": 0.0004985638975890483, "loss": 0.2534, "step": 24820 }, { "epoch": 1.03, "grad_norm": 0.359375, "learning_rate": 0.0004985627365736893, "loss": 0.2591, "step": 24830 }, { "epoch": 1.03, "grad_norm": 0.88671875, "learning_rate": 0.0004985615750905622, "loss": 0.211, "step": 24840 }, { "epoch": 1.03, "grad_norm": 1.0234375, "learning_rate": 0.000498560413139669, "loss": 0.2526, "step": 24850 }, { "epoch": 1.03, "grad_norm": 0.2578125, "learning_rate": 0.000498559250721012, "loss": 0.2631, "step": 24860 }, { "epoch": 1.03, "grad_norm": 0.890625, "learning_rate": 0.0004985580878345932, "loss": 0.1916, "step": 24870 }, { "epoch": 1.03, "grad_norm": 0.6484375, "learning_rate": 0.000498556924480415, "loss": 0.3054, "step": 24880 }, { "epoch": 1.03, "grad_norm": 0.71875, "learning_rate": 0.0004985557606584795, "loss": 0.2222, "step": 24890 }, { "epoch": 1.03, "grad_norm": 0.81640625, "learning_rate": 0.0004985545963687889, "loss": 0.2524, "step": 24900 }, { "epoch": 1.03, "grad_norm": 0.490234375, "learning_rate": 0.0004985534316113454, "loss": 0.2549, "step": 24910 }, { "epoch": 1.03, "grad_norm": 1.1875, "learning_rate": 0.0004985522663861513, "loss": 0.2242, "step": 24920 }, { "epoch": 1.03, "grad_norm": 1.109375, "learning_rate": 0.0004985511006932085, "loss": 0.2472, "step": 24930 }, { "epoch": 1.03, "grad_norm": 0.45703125, "learning_rate": 0.0004985499345325194, "loss": 0.2193, "step": 24940 }, { "epoch": 1.03, "grad_norm": 0.5390625, "learning_rate": 0.0004985487679040862, "loss": 0.2175, "step": 24950 }, { "epoch": 1.03, "grad_norm": 0.73828125, "learning_rate": 0.000498547600807911, "loss": 0.2424, "step": 24960 }, { "epoch": 1.03, "grad_norm": 0.37109375, "learning_rate": 0.0004985464332439962, "loss": 0.192, "step": 24970 }, { "epoch": 1.03, "grad_norm": 0.796875, "learning_rate": 0.0004985452652123437, "loss": 0.2209, "step": 24980 }, { "epoch": 1.04, "grad_norm": 0.75390625, "learning_rate": 0.000498544096712956, "loss": 0.2286, "step": 24990 }, { "epoch": 1.04, "grad_norm": 1.4453125, "learning_rate": 0.000498542927745835, "loss": 0.2529, "step": 25000 }, { "epoch": 1.04, "grad_norm": 0.55078125, "learning_rate": 0.0004985417583109831, "loss": 0.2017, "step": 25010 }, { "epoch": 1.04, "grad_norm": 0.859375, "learning_rate": 0.0004985405884084025, "loss": 0.2254, "step": 25020 }, { "epoch": 1.04, "grad_norm": 0.439453125, "learning_rate": 0.0004985394180380953, "loss": 0.2436, "step": 25030 }, { "epoch": 1.04, "grad_norm": 0.69140625, "learning_rate": 0.0004985382472000638, "loss": 0.2412, "step": 25040 }, { "epoch": 1.04, "grad_norm": 0.84765625, "learning_rate": 0.0004985370758943101, "loss": 0.2231, "step": 25050 }, { "epoch": 1.04, "grad_norm": 0.8359375, "learning_rate": 0.0004985359041208365, "loss": 0.2159, "step": 25060 }, { "epoch": 1.04, "grad_norm": 2.078125, "learning_rate": 0.0004985347318796451, "loss": 0.2401, "step": 25070 }, { "epoch": 1.04, "grad_norm": 0.0, "learning_rate": 0.0004985335591707383, "loss": 0.2661, "step": 25080 }, { "epoch": 1.04, "grad_norm": 0.77734375, "learning_rate": 0.0004985323859941182, "loss": 0.2408, "step": 25090 }, { "epoch": 1.04, "grad_norm": 1.0, "learning_rate": 0.0004985312123497868, "loss": 0.275, "step": 25100 }, { "epoch": 1.04, "grad_norm": 0.65625, "learning_rate": 0.0004985300382377467, "loss": 0.2611, "step": 25110 }, { "epoch": 1.04, "grad_norm": 0.58203125, "learning_rate": 0.0004985288636579998, "loss": 0.1907, "step": 25120 }, { "epoch": 1.04, "grad_norm": 0.88671875, "learning_rate": 0.0004985276886105484, "loss": 0.2402, "step": 25130 }, { "epoch": 1.04, "grad_norm": 0.734375, "learning_rate": 0.0004985265130953947, "loss": 0.2019, "step": 25140 }, { "epoch": 1.04, "grad_norm": 0.546875, "learning_rate": 0.0004985253371125411, "loss": 0.2776, "step": 25150 }, { "epoch": 1.04, "grad_norm": 1.109375, "learning_rate": 0.0004985241606619895, "loss": 0.2922, "step": 25160 }, { "epoch": 1.04, "grad_norm": 0.470703125, "learning_rate": 0.0004985229837437423, "loss": 0.2887, "step": 25170 }, { "epoch": 1.04, "grad_norm": 0.546875, "learning_rate": 0.0004985218063578017, "loss": 0.2304, "step": 25180 }, { "epoch": 1.04, "grad_norm": 0.298828125, "learning_rate": 0.00049852062850417, "loss": 0.2465, "step": 25190 }, { "epoch": 1.04, "grad_norm": 1.015625, "learning_rate": 0.0004985194501828491, "loss": 0.2214, "step": 25200 }, { "epoch": 1.04, "grad_norm": 0.423828125, "learning_rate": 0.0004985182713938416, "loss": 0.2468, "step": 25210 }, { "epoch": 1.04, "grad_norm": 0.9609375, "learning_rate": 0.0004985170921371496, "loss": 0.2467, "step": 25220 }, { "epoch": 1.05, "grad_norm": 1.1171875, "learning_rate": 0.000498515912412775, "loss": 0.1967, "step": 25230 }, { "epoch": 1.05, "grad_norm": 0.3046875, "learning_rate": 0.0004985147322207205, "loss": 0.2804, "step": 25240 }, { "epoch": 1.05, "grad_norm": 1.4921875, "learning_rate": 0.0004985135515609881, "loss": 0.2346, "step": 25250 }, { "epoch": 1.05, "grad_norm": 0.435546875, "learning_rate": 0.00049851237043358, "loss": 0.2188, "step": 25260 }, { "epoch": 1.05, "grad_norm": 0.259765625, "learning_rate": 0.0004985111888384984, "loss": 0.2616, "step": 25270 }, { "epoch": 1.05, "grad_norm": 0.1171875, "learning_rate": 0.0004985100067757457, "loss": 0.2291, "step": 25280 }, { "epoch": 1.05, "grad_norm": 2.8125, "learning_rate": 0.000498508824245324, "loss": 0.2097, "step": 25290 }, { "epoch": 1.05, "grad_norm": 1.046875, "learning_rate": 0.0004985076412472355, "loss": 0.2195, "step": 25300 }, { "epoch": 1.05, "grad_norm": 0.0, "learning_rate": 0.0004985064577814824, "loss": 0.2122, "step": 25310 }, { "epoch": 1.05, "grad_norm": 1.0625, "learning_rate": 0.0004985052738480669, "loss": 0.2578, "step": 25320 }, { "epoch": 1.05, "grad_norm": 0.7109375, "learning_rate": 0.0004985040894469916, "loss": 0.2659, "step": 25330 }, { "epoch": 1.05, "grad_norm": 0.75, "learning_rate": 0.0004985029045782582, "loss": 0.186, "step": 25340 }, { "epoch": 1.05, "grad_norm": 0.4375, "learning_rate": 0.0004985017192418692, "loss": 0.1916, "step": 25350 }, { "epoch": 1.05, "grad_norm": 0.56640625, "learning_rate": 0.0004985005334378268, "loss": 0.2396, "step": 25360 }, { "epoch": 1.05, "grad_norm": 0.94921875, "learning_rate": 0.0004984993471661333, "loss": 0.1813, "step": 25370 }, { "epoch": 1.05, "grad_norm": 1.2578125, "learning_rate": 0.0004984981604267909, "loss": 0.2819, "step": 25380 }, { "epoch": 1.05, "grad_norm": 0.3671875, "learning_rate": 0.0004984969732198017, "loss": 0.2509, "step": 25390 }, { "epoch": 1.05, "grad_norm": 0.91796875, "learning_rate": 0.000498495785545168, "loss": 0.2134, "step": 25400 }, { "epoch": 1.05, "grad_norm": 0.55859375, "learning_rate": 0.0004984945974028922, "loss": 0.2235, "step": 25410 }, { "epoch": 1.05, "grad_norm": 0.40625, "learning_rate": 0.0004984934087929763, "loss": 0.2015, "step": 25420 }, { "epoch": 1.05, "grad_norm": 0.400390625, "learning_rate": 0.0004984922197154226, "loss": 0.2269, "step": 25430 }, { "epoch": 1.05, "grad_norm": 0.625, "learning_rate": 0.0004984910301702335, "loss": 0.2544, "step": 25440 }, { "epoch": 1.05, "grad_norm": 2.3125, "learning_rate": 0.000498489840157411, "loss": 0.1949, "step": 25450 }, { "epoch": 1.05, "grad_norm": 1.2109375, "learning_rate": 0.0004984886496769576, "loss": 0.2898, "step": 25460 }, { "epoch": 1.05, "grad_norm": 0.72265625, "learning_rate": 0.0004984874587288752, "loss": 0.2528, "step": 25470 }, { "epoch": 1.06, "grad_norm": 0.640625, "learning_rate": 0.0004984862673131664, "loss": 0.1861, "step": 25480 }, { "epoch": 1.06, "grad_norm": 1.0859375, "learning_rate": 0.0004984850754298333, "loss": 0.2238, "step": 25490 }, { "epoch": 1.06, "grad_norm": 0.490234375, "learning_rate": 0.000498483883078878, "loss": 0.2343, "step": 25500 }, { "epoch": 1.06, "grad_norm": 0.4453125, "learning_rate": 0.0004984826902603029, "loss": 0.271, "step": 25510 }, { "epoch": 1.06, "grad_norm": 0.6484375, "learning_rate": 0.0004984814969741102, "loss": 0.243, "step": 25520 }, { "epoch": 1.06, "grad_norm": 0.5546875, "learning_rate": 0.0004984803032203022, "loss": 0.2179, "step": 25530 }, { "epoch": 1.06, "grad_norm": 0.73828125, "learning_rate": 0.0004984791089988811, "loss": 0.1896, "step": 25540 }, { "epoch": 1.06, "grad_norm": 0.53125, "learning_rate": 0.0004984779143098492, "loss": 0.2187, "step": 25550 }, { "epoch": 1.06, "grad_norm": 0.7734375, "learning_rate": 0.0004984767191532085, "loss": 0.2065, "step": 25560 }, { "epoch": 1.06, "grad_norm": 0.30859375, "learning_rate": 0.0004984755235289617, "loss": 0.2297, "step": 25570 }, { "epoch": 1.06, "grad_norm": 0.8671875, "learning_rate": 0.0004984743274371107, "loss": 0.2037, "step": 25580 }, { "epoch": 1.06, "grad_norm": 0.6640625, "learning_rate": 0.0004984731308776578, "loss": 0.2273, "step": 25590 }, { "epoch": 1.06, "grad_norm": 0.384765625, "learning_rate": 0.0004984719338506052, "loss": 0.2308, "step": 25600 }, { "epoch": 1.06, "grad_norm": 0.83203125, "learning_rate": 0.0004984707363559555, "loss": 0.293, "step": 25610 }, { "epoch": 1.06, "grad_norm": 0.73828125, "learning_rate": 0.0004984695383937106, "loss": 0.247, "step": 25620 }, { "epoch": 1.06, "grad_norm": 0.94921875, "learning_rate": 0.0004984683399638729, "loss": 0.2381, "step": 25630 }, { "epoch": 1.06, "grad_norm": 0.78125, "learning_rate": 0.0004984671410664445, "loss": 0.254, "step": 25640 }, { "epoch": 1.06, "grad_norm": 0.32421875, "learning_rate": 0.0004984659417014279, "loss": 0.2633, "step": 25650 }, { "epoch": 1.06, "grad_norm": 0.59765625, "learning_rate": 0.0004984647418688252, "loss": 0.2071, "step": 25660 }, { "epoch": 1.06, "grad_norm": 0.408203125, "learning_rate": 0.0004984635415686386, "loss": 0.3069, "step": 25670 }, { "epoch": 1.06, "grad_norm": 0.6484375, "learning_rate": 0.0004984623408008705, "loss": 0.2815, "step": 25680 }, { "epoch": 1.06, "grad_norm": 0.84765625, "learning_rate": 0.000498461139565523, "loss": 0.2435, "step": 25690 }, { "epoch": 1.06, "grad_norm": 0.42578125, "learning_rate": 0.0004984599378625987, "loss": 0.2302, "step": 25700 }, { "epoch": 1.06, "grad_norm": 0.65625, "learning_rate": 0.0004984587356920995, "loss": 0.2512, "step": 25710 }, { "epoch": 1.07, "grad_norm": 0.2265625, "learning_rate": 0.0004984575330540278, "loss": 0.2526, "step": 25720 }, { "epoch": 1.07, "grad_norm": 0.373046875, "learning_rate": 0.0004984563299483859, "loss": 0.2371, "step": 25730 }, { "epoch": 1.07, "grad_norm": 1.421875, "learning_rate": 0.0004984551263751759, "loss": 0.204, "step": 25740 }, { "epoch": 1.07, "grad_norm": 0.240234375, "learning_rate": 0.0004984539223344002, "loss": 0.2621, "step": 25750 }, { "epoch": 1.07, "grad_norm": 0.828125, "learning_rate": 0.0004984527178260612, "loss": 0.2347, "step": 25760 }, { "epoch": 1.07, "grad_norm": 0.466796875, "learning_rate": 0.0004984515128501609, "loss": 0.2598, "step": 25770 }, { "epoch": 1.07, "grad_norm": 0.9921875, "learning_rate": 0.0004984503074067017, "loss": 0.2191, "step": 25780 }, { "epoch": 1.07, "grad_norm": 1.0546875, "learning_rate": 0.0004984491014956858, "loss": 0.2482, "step": 25790 }, { "epoch": 1.07, "grad_norm": 1.046875, "learning_rate": 0.0004984478951171156, "loss": 0.2523, "step": 25800 }, { "epoch": 1.07, "grad_norm": 0.6015625, "learning_rate": 0.0004984466882709933, "loss": 0.223, "step": 25810 }, { "epoch": 1.07, "grad_norm": 0.5546875, "learning_rate": 0.0004984454809573211, "loss": 0.2211, "step": 25820 }, { "epoch": 1.07, "grad_norm": 1.8203125, "learning_rate": 0.0004984442731761013, "loss": 0.2373, "step": 25830 }, { "epoch": 1.07, "grad_norm": 0.71484375, "learning_rate": 0.0004984430649273363, "loss": 0.2201, "step": 25840 }, { "epoch": 1.07, "grad_norm": 0.9765625, "learning_rate": 0.0004984418562110283, "loss": 0.2155, "step": 25850 }, { "epoch": 1.07, "grad_norm": 0.578125, "learning_rate": 0.0004984406470271794, "loss": 0.2077, "step": 25860 }, { "epoch": 1.07, "grad_norm": 0.58203125, "learning_rate": 0.0004984394373757922, "loss": 0.2463, "step": 25870 }, { "epoch": 1.07, "grad_norm": 0.1943359375, "learning_rate": 0.0004984382272568687, "loss": 0.2043, "step": 25880 }, { "epoch": 1.07, "grad_norm": 0.396484375, "learning_rate": 0.0004984370166704114, "loss": 0.2843, "step": 25890 }, { "epoch": 1.07, "grad_norm": 0.53515625, "learning_rate": 0.0004984358056164223, "loss": 0.218, "step": 25900 }, { "epoch": 1.07, "grad_norm": 0.45703125, "learning_rate": 0.000498434594094904, "loss": 0.217, "step": 25910 }, { "epoch": 1.07, "grad_norm": 34.5, "learning_rate": 0.0004984333821058585, "loss": 0.1736, "step": 25920 }, { "epoch": 1.07, "grad_norm": 5.5625, "learning_rate": 0.0004984321696492883, "loss": 0.2811, "step": 25930 }, { "epoch": 1.07, "grad_norm": 1.015625, "learning_rate": 0.0004984309567251956, "loss": 0.2473, "step": 25940 }, { "epoch": 1.07, "grad_norm": 0.9921875, "learning_rate": 0.0004984297433335826, "loss": 0.2599, "step": 25950 }, { "epoch": 1.08, "grad_norm": 0.734375, "learning_rate": 0.0004984285294744516, "loss": 0.2219, "step": 25960 }, { "epoch": 1.08, "grad_norm": 2.71875, "learning_rate": 0.0004984273151478051, "loss": 0.1902, "step": 25970 }, { "epoch": 1.08, "grad_norm": 0.58984375, "learning_rate": 0.0004984261003536452, "loss": 0.2356, "step": 25980 }, { "epoch": 1.08, "grad_norm": 1.390625, "learning_rate": 0.000498424885091974, "loss": 0.2375, "step": 25990 }, { "epoch": 1.08, "grad_norm": 0.35546875, "learning_rate": 0.0004984236693627942, "loss": 0.2711, "step": 26000 }, { "epoch": 1.08, "grad_norm": 0.5234375, "learning_rate": 0.0004984224531661077, "loss": 0.2149, "step": 26010 }, { "epoch": 1.08, "grad_norm": 1.234375, "learning_rate": 0.0004984212365019171, "loss": 0.2324, "step": 26020 }, { "epoch": 1.08, "grad_norm": 0.4921875, "learning_rate": 0.0004984200193702246, "loss": 0.2714, "step": 26030 }, { "epoch": 1.08, "grad_norm": 1.8671875, "learning_rate": 0.0004984188017710323, "loss": 0.2363, "step": 26040 }, { "epoch": 1.08, "grad_norm": 0.5078125, "learning_rate": 0.0004984175837043428, "loss": 0.2355, "step": 26050 }, { "epoch": 1.08, "grad_norm": 1.734375, "learning_rate": 0.0004984163651701581, "loss": 0.2401, "step": 26060 }, { "epoch": 1.08, "grad_norm": 0.78125, "learning_rate": 0.0004984151461684807, "loss": 0.2344, "step": 26070 }, { "epoch": 1.08, "grad_norm": 1.2265625, "learning_rate": 0.0004984139266993128, "loss": 0.2555, "step": 26080 }, { "epoch": 1.08, "grad_norm": 0.447265625, "learning_rate": 0.0004984127067626567, "loss": 0.2222, "step": 26090 }, { "epoch": 1.08, "grad_norm": 0.63671875, "learning_rate": 0.0004984114863585147, "loss": 0.2663, "step": 26100 }, { "epoch": 1.08, "grad_norm": 0.9765625, "learning_rate": 0.0004984102654868892, "loss": 0.1978, "step": 26110 }, { "epoch": 1.08, "grad_norm": 0.490234375, "learning_rate": 0.0004984090441477823, "loss": 0.2236, "step": 26120 }, { "epoch": 1.08, "grad_norm": 0.84375, "learning_rate": 0.0004984078223411965, "loss": 0.2353, "step": 26130 }, { "epoch": 1.08, "grad_norm": 0.57421875, "learning_rate": 0.000498406600067134, "loss": 0.2403, "step": 26140 }, { "epoch": 1.08, "grad_norm": 0.59765625, "learning_rate": 0.0004984053773255971, "loss": 0.2254, "step": 26150 }, { "epoch": 1.08, "grad_norm": 0.41796875, "learning_rate": 0.000498404154116588, "loss": 0.2294, "step": 26160 }, { "epoch": 1.08, "grad_norm": 0.890625, "learning_rate": 0.0004984029304401092, "loss": 0.2697, "step": 26170 }, { "epoch": 1.08, "grad_norm": 0.68359375, "learning_rate": 0.0004984017062961629, "loss": 0.2785, "step": 26180 }, { "epoch": 1.08, "grad_norm": 0.796875, "learning_rate": 0.0004984004816847514, "loss": 0.2034, "step": 26190 }, { "epoch": 1.09, "grad_norm": 0.384765625, "learning_rate": 0.000498399256605877, "loss": 0.2074, "step": 26200 }, { "epoch": 1.09, "grad_norm": 1.2734375, "learning_rate": 0.000498398031059542, "loss": 0.2264, "step": 26210 }, { "epoch": 1.09, "grad_norm": 0.54296875, "learning_rate": 0.0004983968050457488, "loss": 0.2372, "step": 26220 }, { "epoch": 1.09, "grad_norm": 0.287109375, "learning_rate": 0.0004983955785644997, "loss": 0.2186, "step": 26230 }, { "epoch": 1.09, "grad_norm": 0.3203125, "learning_rate": 0.0004983943516157968, "loss": 0.2465, "step": 26240 }, { "epoch": 1.09, "grad_norm": 0.53515625, "learning_rate": 0.0004983931241996426, "loss": 0.2564, "step": 26250 }, { "epoch": 1.09, "grad_norm": 0.53125, "learning_rate": 0.0004983918963160394, "loss": 0.2439, "step": 26260 }, { "epoch": 1.09, "grad_norm": 1.15625, "learning_rate": 0.0004983906679649894, "loss": 0.2192, "step": 26270 }, { "epoch": 1.09, "grad_norm": 0.87890625, "learning_rate": 0.0004983894391464951, "loss": 0.214, "step": 26280 }, { "epoch": 1.09, "grad_norm": 0.80859375, "learning_rate": 0.0004983882098605586, "loss": 0.2495, "step": 26290 }, { "epoch": 1.09, "grad_norm": 0.0, "learning_rate": 0.0004983869801071824, "loss": 0.24, "step": 26300 }, { "epoch": 1.09, "grad_norm": 0.68359375, "learning_rate": 0.0004983857498863687, "loss": 0.2398, "step": 26310 }, { "epoch": 1.09, "grad_norm": 0.94140625, "learning_rate": 0.0004983845191981198, "loss": 0.2574, "step": 26320 }, { "epoch": 1.09, "grad_norm": 0.73046875, "learning_rate": 0.0004983832880424381, "loss": 0.2269, "step": 26330 }, { "epoch": 1.09, "grad_norm": 0.65234375, "learning_rate": 0.0004983820564193259, "loss": 0.259, "step": 26340 }, { "epoch": 1.09, "grad_norm": 0.984375, "learning_rate": 0.0004983808243287854, "loss": 0.2154, "step": 26350 }, { "epoch": 1.09, "grad_norm": 0.7734375, "learning_rate": 0.000498379591770819, "loss": 0.2453, "step": 26360 }, { "epoch": 1.09, "grad_norm": 0.44921875, "learning_rate": 0.0004983783587454291, "loss": 0.2271, "step": 26370 }, { "epoch": 1.09, "grad_norm": 0.83203125, "learning_rate": 0.000498377125252618, "loss": 0.2121, "step": 26380 }, { "epoch": 1.09, "grad_norm": 0.85546875, "learning_rate": 0.0004983758912923879, "loss": 0.3093, "step": 26390 }, { "epoch": 1.09, "grad_norm": 1.1953125, "learning_rate": 0.0004983746568647412, "loss": 0.1661, "step": 26400 }, { "epoch": 1.09, "grad_norm": 1.40625, "learning_rate": 0.0004983734219696803, "loss": 0.1865, "step": 26410 }, { "epoch": 1.09, "grad_norm": 0.87890625, "learning_rate": 0.0004983721866072073, "loss": 0.2314, "step": 26420 }, { "epoch": 1.09, "grad_norm": 0.6796875, "learning_rate": 0.0004983709507773248, "loss": 0.2095, "step": 26430 }, { "epoch": 1.1, "grad_norm": 0.32421875, "learning_rate": 0.0004983697144800348, "loss": 0.262, "step": 26440 }, { "epoch": 1.1, "grad_norm": 1.4296875, "learning_rate": 0.00049836847771534, "loss": 0.2708, "step": 26450 }, { "epoch": 1.1, "grad_norm": 0.69921875, "learning_rate": 0.0004983672404832425, "loss": 0.2501, "step": 26460 }, { "epoch": 1.1, "grad_norm": 0.4375, "learning_rate": 0.0004983660027837447, "loss": 0.1925, "step": 26470 }, { "epoch": 1.1, "grad_norm": 0.458984375, "learning_rate": 0.0004983647646168488, "loss": 0.2765, "step": 26480 }, { "epoch": 1.1, "grad_norm": 1.28125, "learning_rate": 0.0004983635259825574, "loss": 0.2878, "step": 26490 }, { "epoch": 1.1, "grad_norm": 0.46875, "learning_rate": 0.0004983622868808725, "loss": 0.2154, "step": 26500 }, { "epoch": 1.1, "grad_norm": 0.400390625, "learning_rate": 0.0004983610473117966, "loss": 0.2358, "step": 26510 }, { "epoch": 1.1, "grad_norm": 0.63671875, "learning_rate": 0.0004983598072753322, "loss": 0.2472, "step": 26520 }, { "epoch": 1.1, "grad_norm": 0.5390625, "learning_rate": 0.0004983585667714814, "loss": 0.2317, "step": 26530 }, { "epoch": 1.1, "grad_norm": 0.6328125, "learning_rate": 0.0004983573258002465, "loss": 0.2166, "step": 26540 }, { "epoch": 1.1, "grad_norm": 0.62890625, "learning_rate": 0.00049835608436163, "loss": 0.2662, "step": 26550 }, { "epoch": 1.1, "grad_norm": 1.1171875, "learning_rate": 0.0004983548424556341, "loss": 0.2778, "step": 26560 }, { "epoch": 1.1, "grad_norm": 0.7734375, "learning_rate": 0.0004983536000822613, "loss": 0.255, "step": 26570 }, { "epoch": 1.1, "grad_norm": 0.7578125, "learning_rate": 0.0004983523572415138, "loss": 0.2826, "step": 26580 }, { "epoch": 1.1, "grad_norm": 0.69140625, "learning_rate": 0.000498351113933394, "loss": 0.224, "step": 26590 }, { "epoch": 1.1, "grad_norm": 1.8828125, "learning_rate": 0.0004983498701579042, "loss": 0.2482, "step": 26600 }, { "epoch": 1.1, "grad_norm": 2.46875, "learning_rate": 0.0004983486259150468, "loss": 0.195, "step": 26610 }, { "epoch": 1.1, "grad_norm": 0.58203125, "learning_rate": 0.0004983473812048241, "loss": 0.2282, "step": 26620 }, { "epoch": 1.1, "grad_norm": 0.640625, "learning_rate": 0.0004983461360272384, "loss": 0.2456, "step": 26630 }, { "epoch": 1.1, "grad_norm": 0.419921875, "learning_rate": 0.0004983448903822922, "loss": 0.2869, "step": 26640 }, { "epoch": 1.1, "grad_norm": 0.416015625, "learning_rate": 0.0004983436442699877, "loss": 0.2751, "step": 26650 }, { "epoch": 1.1, "grad_norm": 0.69921875, "learning_rate": 0.0004983423976903272, "loss": 0.2094, "step": 26660 }, { "epoch": 1.1, "grad_norm": 0.43359375, "learning_rate": 0.0004983411506433132, "loss": 0.2628, "step": 26670 }, { "epoch": 1.11, "grad_norm": 0.52734375, "learning_rate": 0.0004983399031289479, "loss": 0.1648, "step": 26680 }, { "epoch": 1.11, "grad_norm": 0.77734375, "learning_rate": 0.0004983386551472337, "loss": 0.2174, "step": 26690 }, { "epoch": 1.11, "grad_norm": 0.353515625, "learning_rate": 0.000498337406698173, "loss": 0.1865, "step": 26700 }, { "epoch": 1.11, "grad_norm": 1.34375, "learning_rate": 0.0004983361577817682, "loss": 0.2267, "step": 26710 }, { "epoch": 1.11, "grad_norm": 0.69140625, "learning_rate": 0.0004983349083980215, "loss": 0.2391, "step": 26720 }, { "epoch": 1.11, "grad_norm": 0.66015625, "learning_rate": 0.0004983336585469353, "loss": 0.2327, "step": 26730 }, { "epoch": 1.11, "grad_norm": 0.91796875, "learning_rate": 0.0004983324082285121, "loss": 0.1692, "step": 26740 }, { "epoch": 1.11, "grad_norm": 0.5234375, "learning_rate": 0.000498331157442754, "loss": 0.2429, "step": 26750 }, { "epoch": 1.11, "grad_norm": 0.67578125, "learning_rate": 0.0004983299061896636, "loss": 0.2452, "step": 26760 }, { "epoch": 1.11, "grad_norm": 2.4375, "learning_rate": 0.000498328654469243, "loss": 0.2623, "step": 26770 }, { "epoch": 1.11, "grad_norm": 0.49609375, "learning_rate": 0.0004983274022814948, "loss": 0.2316, "step": 26780 }, { "epoch": 1.11, "grad_norm": 1.1328125, "learning_rate": 0.0004983261496264211, "loss": 0.249, "step": 26790 }, { "epoch": 1.11, "grad_norm": 1.0546875, "learning_rate": 0.0004983248965040245, "loss": 0.2533, "step": 26800 }, { "epoch": 1.11, "grad_norm": 0.98828125, "learning_rate": 0.0004983236429143072, "loss": 0.2216, "step": 26810 }, { "epoch": 1.11, "grad_norm": 0.384765625, "learning_rate": 0.0004983223888572717, "loss": 0.2477, "step": 26820 }, { "epoch": 1.11, "grad_norm": 0.9453125, "learning_rate": 0.0004983211343329203, "loss": 0.2172, "step": 26830 }, { "epoch": 1.11, "grad_norm": 0.5078125, "learning_rate": 0.0004983198793412553, "loss": 0.27, "step": 26840 }, { "epoch": 1.11, "grad_norm": 1.28125, "learning_rate": 0.000498318623882279, "loss": 0.2004, "step": 26850 }, { "epoch": 1.11, "grad_norm": 0.4765625, "learning_rate": 0.000498317367955994, "loss": 0.2293, "step": 26860 }, { "epoch": 1.11, "grad_norm": 0.57421875, "learning_rate": 0.0004983161115624025, "loss": 0.2498, "step": 26870 }, { "epoch": 1.11, "grad_norm": 0.470703125, "learning_rate": 0.0004983148547015069, "loss": 0.1956, "step": 26880 }, { "epoch": 1.11, "grad_norm": 0.6484375, "learning_rate": 0.0004983135973733096, "loss": 0.2447, "step": 26890 }, { "epoch": 1.11, "grad_norm": 0.7578125, "learning_rate": 0.0004983123395778128, "loss": 0.2482, "step": 26900 }, { "epoch": 1.11, "grad_norm": 0.82421875, "learning_rate": 0.000498311081315019, "loss": 0.2299, "step": 26910 }, { "epoch": 1.12, "grad_norm": 0.71875, "learning_rate": 0.0004983098225849308, "loss": 0.2294, "step": 26920 }, { "epoch": 1.12, "grad_norm": 0.921875, "learning_rate": 0.0004983085633875501, "loss": 0.2448, "step": 26930 }, { "epoch": 1.12, "grad_norm": 0.8203125, "learning_rate": 0.0004983073037228794, "loss": 0.2398, "step": 26940 }, { "epoch": 1.12, "grad_norm": 1.1171875, "learning_rate": 0.0004983060435909215, "loss": 0.2361, "step": 26950 }, { "epoch": 1.12, "grad_norm": 0.56640625, "learning_rate": 0.0004983047829916781, "loss": 0.236, "step": 26960 }, { "epoch": 1.12, "grad_norm": 1.1484375, "learning_rate": 0.0004983035219251521, "loss": 0.2421, "step": 26970 }, { "epoch": 1.12, "grad_norm": 1.1640625, "learning_rate": 0.0004983022603913457, "loss": 0.2284, "step": 26980 }, { "epoch": 1.12, "grad_norm": 0.341796875, "learning_rate": 0.0004983009983902612, "loss": 0.1811, "step": 26990 }, { "epoch": 1.12, "grad_norm": 0.6484375, "learning_rate": 0.0004982997359219011, "loss": 0.2267, "step": 27000 }, { "epoch": 1.12, "grad_norm": 0.48828125, "learning_rate": 0.0004982984729862677, "loss": 0.2362, "step": 27010 }, { "epoch": 1.12, "grad_norm": 0.44140625, "learning_rate": 0.0004982972095833633, "loss": 0.3276, "step": 27020 }, { "epoch": 1.12, "grad_norm": 0.8203125, "learning_rate": 0.0004982959457131904, "loss": 0.1894, "step": 27030 }, { "epoch": 1.12, "grad_norm": 1.1484375, "learning_rate": 0.0004982946813757514, "loss": 0.1884, "step": 27040 }, { "epoch": 1.12, "grad_norm": 0.19921875, "learning_rate": 0.0004982934165710485, "loss": 0.1988, "step": 27050 }, { "epoch": 1.12, "grad_norm": 0.51171875, "learning_rate": 0.0004982921512990844, "loss": 0.1859, "step": 27060 }, { "epoch": 1.12, "grad_norm": 2.046875, "learning_rate": 0.0004982908855598611, "loss": 0.2269, "step": 27070 }, { "epoch": 1.12, "grad_norm": 0.447265625, "learning_rate": 0.0004982896193533813, "loss": 0.2167, "step": 27080 }, { "epoch": 1.12, "grad_norm": 0.56640625, "learning_rate": 0.0004982883526796471, "loss": 0.2252, "step": 27090 }, { "epoch": 1.12, "grad_norm": 1.0625, "learning_rate": 0.0004982870855386612, "loss": 0.2265, "step": 27100 }, { "epoch": 1.12, "grad_norm": 0.55078125, "learning_rate": 0.0004982858179304258, "loss": 0.2738, "step": 27110 }, { "epoch": 1.12, "grad_norm": 0.62890625, "learning_rate": 0.0004982845498549432, "loss": 0.2714, "step": 27120 }, { "epoch": 1.12, "grad_norm": 0.54296875, "learning_rate": 0.0004982832813122159, "loss": 0.2506, "step": 27130 }, { "epoch": 1.12, "grad_norm": 0.83984375, "learning_rate": 0.0004982820123022463, "loss": 0.2627, "step": 27140 }, { "epoch": 1.12, "grad_norm": 0.9609375, "learning_rate": 0.0004982807428250368, "loss": 0.267, "step": 27150 }, { "epoch": 1.12, "grad_norm": 0.58984375, "learning_rate": 0.0004982794728805897, "loss": 0.2109, "step": 27160 }, { "epoch": 1.13, "grad_norm": 1.4453125, "learning_rate": 0.0004982782024689075, "loss": 0.2534, "step": 27170 }, { "epoch": 1.13, "grad_norm": 0.65234375, "learning_rate": 0.0004982769315899925, "loss": 0.2894, "step": 27180 }, { "epoch": 1.13, "grad_norm": 0.466796875, "learning_rate": 0.0004982756602438472, "loss": 0.2376, "step": 27190 }, { "epoch": 1.13, "grad_norm": 0.6171875, "learning_rate": 0.0004982743884304738, "loss": 0.1934, "step": 27200 }, { "epoch": 1.13, "grad_norm": 1.328125, "learning_rate": 0.0004982731161498748, "loss": 0.2285, "step": 27210 }, { "epoch": 1.13, "grad_norm": 0.53515625, "learning_rate": 0.0004982718434020527, "loss": 0.224, "step": 27220 }, { "epoch": 1.13, "grad_norm": 0.390625, "learning_rate": 0.0004982705701870098, "loss": 0.2883, "step": 27230 }, { "epoch": 1.13, "grad_norm": 0.259765625, "learning_rate": 0.0004982692965047485, "loss": 0.1746, "step": 27240 }, { "epoch": 1.13, "grad_norm": 0.546875, "learning_rate": 0.0004982680223552711, "loss": 0.2221, "step": 27250 }, { "epoch": 1.13, "grad_norm": 0.37890625, "learning_rate": 0.0004982667477385802, "loss": 0.1898, "step": 27260 }, { "epoch": 1.13, "grad_norm": 0.66796875, "learning_rate": 0.000498265472654678, "loss": 0.233, "step": 27270 }, { "epoch": 1.13, "grad_norm": 1.8828125, "learning_rate": 0.0004982641971035671, "loss": 0.2934, "step": 27280 }, { "epoch": 1.13, "grad_norm": 0.44921875, "learning_rate": 0.0004982629210852497, "loss": 0.2226, "step": 27290 }, { "epoch": 1.13, "grad_norm": 0.953125, "learning_rate": 0.0004982616445997284, "loss": 0.2761, "step": 27300 }, { "epoch": 1.13, "grad_norm": 0.41015625, "learning_rate": 0.0004982603676470053, "loss": 0.2155, "step": 27310 }, { "epoch": 1.13, "grad_norm": 0.62890625, "learning_rate": 0.0004982590902270831, "loss": 0.2101, "step": 27320 }, { "epoch": 1.13, "grad_norm": 0.7265625, "learning_rate": 0.0004982578123399642, "loss": 0.1988, "step": 27330 }, { "epoch": 1.13, "grad_norm": 1.2109375, "learning_rate": 0.0004982565339856508, "loss": 0.2135, "step": 27340 }, { "epoch": 1.13, "grad_norm": 0.431640625, "learning_rate": 0.0004982552551641454, "loss": 0.2179, "step": 27350 }, { "epoch": 1.13, "grad_norm": 0.796875, "learning_rate": 0.0004982539758754505, "loss": 0.2589, "step": 27360 }, { "epoch": 1.13, "grad_norm": 0.490234375, "learning_rate": 0.0004982526961195684, "loss": 0.2246, "step": 27370 }, { "epoch": 1.13, "grad_norm": 0.62890625, "learning_rate": 0.0004982514158965015, "loss": 0.2175, "step": 27380 }, { "epoch": 1.13, "grad_norm": 1.125, "learning_rate": 0.0004982501352062523, "loss": 0.218, "step": 27390 }, { "epoch": 1.13, "grad_norm": 1.0546875, "learning_rate": 0.000498248854048823, "loss": 0.2496, "step": 27400 }, { "epoch": 1.14, "grad_norm": 1.7578125, "learning_rate": 0.0004982475724242164, "loss": 0.3228, "step": 27410 }, { "epoch": 1.14, "grad_norm": 0.296875, "learning_rate": 0.0004982462903324346, "loss": 0.2298, "step": 27420 }, { "epoch": 1.14, "grad_norm": 3.28125, "learning_rate": 0.00049824500777348, "loss": 0.2346, "step": 27430 }, { "epoch": 1.14, "grad_norm": 0.345703125, "learning_rate": 0.0004982437247473551, "loss": 0.1856, "step": 27440 }, { "epoch": 1.14, "grad_norm": 0.875, "learning_rate": 0.0004982424412540623, "loss": 0.2186, "step": 27450 }, { "epoch": 1.14, "grad_norm": 0.359375, "learning_rate": 0.0004982411572936042, "loss": 0.2222, "step": 27460 }, { "epoch": 1.14, "grad_norm": 1.046875, "learning_rate": 0.000498239872865983, "loss": 0.2181, "step": 27470 }, { "epoch": 1.14, "grad_norm": 0.64453125, "learning_rate": 0.000498238587971201, "loss": 0.1939, "step": 27480 }, { "epoch": 1.14, "grad_norm": 0.3984375, "learning_rate": 0.000498237302609261, "loss": 0.1706, "step": 27490 }, { "epoch": 1.14, "grad_norm": 1.1171875, "learning_rate": 0.000498236016780165, "loss": 0.1697, "step": 27500 }, { "epoch": 1.14, "grad_norm": 0.8125, "learning_rate": 0.0004982347304839158, "loss": 0.2384, "step": 27510 }, { "epoch": 1.14, "grad_norm": 0.6796875, "learning_rate": 0.0004982334437205156, "loss": 0.2748, "step": 27520 }, { "epoch": 1.14, "grad_norm": 0.37109375, "learning_rate": 0.0004982321564899669, "loss": 0.2225, "step": 27530 }, { "epoch": 1.14, "grad_norm": 0.375, "learning_rate": 0.000498230868792272, "loss": 0.2237, "step": 27540 }, { "epoch": 1.14, "grad_norm": 0.52734375, "learning_rate": 0.0004982295806274334, "loss": 0.202, "step": 27550 }, { "epoch": 1.14, "grad_norm": 0.953125, "learning_rate": 0.0004982282919954536, "loss": 0.1905, "step": 27560 }, { "epoch": 1.14, "grad_norm": 0.44921875, "learning_rate": 0.0004982270028963349, "loss": 0.2387, "step": 27570 }, { "epoch": 1.14, "grad_norm": 0.7265625, "learning_rate": 0.0004982257133300798, "loss": 0.2911, "step": 27580 }, { "epoch": 1.14, "grad_norm": 0.478515625, "learning_rate": 0.0004982244232966908, "loss": 0.2126, "step": 27590 }, { "epoch": 1.14, "grad_norm": 0.60546875, "learning_rate": 0.0004982231327961701, "loss": 0.2711, "step": 27600 }, { "epoch": 1.14, "grad_norm": 0.76171875, "learning_rate": 0.0004982218418285204, "loss": 0.1746, "step": 27610 }, { "epoch": 1.14, "grad_norm": 0.56640625, "learning_rate": 0.000498220550393744, "loss": 0.2865, "step": 27620 }, { "epoch": 1.14, "grad_norm": 0.6953125, "learning_rate": 0.0004982192584918432, "loss": 0.2234, "step": 27630 }, { "epoch": 1.14, "grad_norm": 1.1484375, "learning_rate": 0.0004982179661228207, "loss": 0.2296, "step": 27640 }, { "epoch": 1.15, "grad_norm": 2.0, "learning_rate": 0.0004982166732866788, "loss": 0.2522, "step": 27650 }, { "epoch": 1.15, "grad_norm": 0.87109375, "learning_rate": 0.0004982153799834198, "loss": 0.2316, "step": 27660 }, { "epoch": 1.15, "grad_norm": 0.462890625, "learning_rate": 0.0004982140862130463, "loss": 0.233, "step": 27670 }, { "epoch": 1.15, "grad_norm": 0.7578125, "learning_rate": 0.0004982127919755607, "loss": 0.2187, "step": 27680 }, { "epoch": 1.15, "grad_norm": 0.51953125, "learning_rate": 0.0004982114972709655, "loss": 0.2635, "step": 27690 }, { "epoch": 1.15, "grad_norm": 0.76953125, "learning_rate": 0.000498210202099263, "loss": 0.2341, "step": 27700 }, { "epoch": 1.15, "grad_norm": 0.94140625, "learning_rate": 0.0004982089064604557, "loss": 0.2396, "step": 27710 }, { "epoch": 1.15, "grad_norm": 0.52734375, "learning_rate": 0.0004982076103545461, "loss": 0.2598, "step": 27720 }, { "epoch": 1.15, "grad_norm": 0.796875, "learning_rate": 0.0004982063137815365, "loss": 0.2848, "step": 27730 }, { "epoch": 1.15, "grad_norm": 0.41015625, "learning_rate": 0.0004982050167414295, "loss": 0.172, "step": 27740 }, { "epoch": 1.15, "grad_norm": 0.458984375, "learning_rate": 0.0004982037192342273, "loss": 0.1948, "step": 27750 }, { "epoch": 1.15, "grad_norm": 0.50390625, "learning_rate": 0.0004982024212599328, "loss": 0.2309, "step": 27760 }, { "epoch": 1.15, "grad_norm": 0.578125, "learning_rate": 0.0004982011228185478, "loss": 0.2205, "step": 27770 }, { "epoch": 1.15, "grad_norm": 0.89453125, "learning_rate": 0.0004981998239100753, "loss": 0.1954, "step": 27780 }, { "epoch": 1.15, "grad_norm": 0.98046875, "learning_rate": 0.0004981985245345175, "loss": 0.2255, "step": 27790 }, { "epoch": 1.15, "grad_norm": 1.1875, "learning_rate": 0.0004981972246918769, "loss": 0.2177, "step": 27800 }, { "epoch": 1.15, "grad_norm": 0.6328125, "learning_rate": 0.0004981959243821559, "loss": 0.2429, "step": 27810 }, { "epoch": 1.15, "grad_norm": 0.73046875, "learning_rate": 0.0004981946236053569, "loss": 0.2768, "step": 27820 }, { "epoch": 1.15, "grad_norm": 0.69140625, "learning_rate": 0.0004981933223614825, "loss": 0.234, "step": 27830 }, { "epoch": 1.15, "grad_norm": 0.443359375, "learning_rate": 0.0004981920206505352, "loss": 0.2157, "step": 27840 }, { "epoch": 1.15, "grad_norm": 0.75, "learning_rate": 0.0004981907184725172, "loss": 0.2798, "step": 27850 }, { "epoch": 1.15, "grad_norm": 0.375, "learning_rate": 0.000498189415827431, "loss": 0.1793, "step": 27860 }, { "epoch": 1.15, "grad_norm": 1.03125, "learning_rate": 0.0004981881127152792, "loss": 0.2022, "step": 27870 }, { "epoch": 1.15, "grad_norm": 0.2294921875, "learning_rate": 0.0004981868091360641, "loss": 0.1746, "step": 27880 }, { "epoch": 1.16, "grad_norm": 0.48046875, "learning_rate": 0.0004981855050897883, "loss": 0.2474, "step": 27890 }, { "epoch": 1.16, "grad_norm": 0.484375, "learning_rate": 0.0004981842005764541, "loss": 0.2451, "step": 27900 }, { "epoch": 1.16, "grad_norm": 0.455078125, "learning_rate": 0.0004981828955960641, "loss": 0.2206, "step": 27910 }, { "epoch": 1.16, "grad_norm": 1.6875, "learning_rate": 0.0004981815901486207, "loss": 0.2313, "step": 27920 }, { "epoch": 1.16, "grad_norm": 0.345703125, "learning_rate": 0.0004981802842341264, "loss": 0.2276, "step": 27930 }, { "epoch": 1.16, "grad_norm": 0.76171875, "learning_rate": 0.0004981789778525836, "loss": 0.2152, "step": 27940 }, { "epoch": 1.16, "grad_norm": 0.408203125, "learning_rate": 0.0004981776710039946, "loss": 0.1877, "step": 27950 }, { "epoch": 1.16, "grad_norm": 0.96484375, "learning_rate": 0.0004981763636883622, "loss": 0.2116, "step": 27960 }, { "epoch": 1.16, "grad_norm": 0.486328125, "learning_rate": 0.0004981750559056886, "loss": 0.2487, "step": 27970 }, { "epoch": 1.16, "grad_norm": 0.85546875, "learning_rate": 0.0004981737476559764, "loss": 0.2205, "step": 27980 }, { "epoch": 1.16, "grad_norm": 1.1640625, "learning_rate": 0.0004981724389392279, "loss": 0.271, "step": 27990 }, { "epoch": 1.16, "grad_norm": 1.4609375, "learning_rate": 0.0004981711297554458, "loss": 0.1956, "step": 28000 }, { "epoch": 1.16, "grad_norm": 0.578125, "learning_rate": 0.0004981698201046323, "loss": 0.1974, "step": 28010 }, { "epoch": 1.16, "grad_norm": 0.48828125, "learning_rate": 0.0004981685099867901, "loss": 0.2166, "step": 28020 }, { "epoch": 1.16, "grad_norm": 0.3515625, "learning_rate": 0.0004981671994019216, "loss": 0.1933, "step": 28030 }, { "epoch": 1.16, "grad_norm": 0.56640625, "learning_rate": 0.0004981658883500291, "loss": 0.2398, "step": 28040 }, { "epoch": 1.16, "grad_norm": 0.1904296875, "learning_rate": 0.0004981645768311153, "loss": 0.2739, "step": 28050 }, { "epoch": 1.16, "grad_norm": 0.7421875, "learning_rate": 0.0004981632648451825, "loss": 0.2209, "step": 28060 }, { "epoch": 1.16, "grad_norm": 0.6015625, "learning_rate": 0.0004981619523922332, "loss": 0.2574, "step": 28070 }, { "epoch": 1.16, "grad_norm": 0.369140625, "learning_rate": 0.00049816063947227, "loss": 0.2166, "step": 28080 }, { "epoch": 1.16, "grad_norm": 0.6328125, "learning_rate": 0.0004981593260852953, "loss": 0.2473, "step": 28090 }, { "epoch": 1.16, "grad_norm": 0.451171875, "learning_rate": 0.0004981580122313115, "loss": 0.2462, "step": 28100 }, { "epoch": 1.16, "grad_norm": 0.88671875, "learning_rate": 0.000498156697910321, "loss": 0.2027, "step": 28110 }, { "epoch": 1.16, "grad_norm": 1.0859375, "learning_rate": 0.0004981553831223266, "loss": 0.253, "step": 28120 }, { "epoch": 1.17, "grad_norm": 0.859375, "learning_rate": 0.0004981540678673305, "loss": 0.2102, "step": 28130 }, { "epoch": 1.17, "grad_norm": 0.91015625, "learning_rate": 0.0004981527521453351, "loss": 0.2186, "step": 28140 }, { "epoch": 1.17, "grad_norm": 0.42578125, "learning_rate": 0.0004981514359563432, "loss": 0.2606, "step": 28150 }, { "epoch": 1.17, "grad_norm": 0.98046875, "learning_rate": 0.0004981501193003571, "loss": 0.2215, "step": 28160 }, { "epoch": 1.17, "grad_norm": 0.546875, "learning_rate": 0.0004981488021773791, "loss": 0.2298, "step": 28170 }, { "epoch": 1.17, "grad_norm": 0.75, "learning_rate": 0.000498147484587412, "loss": 0.2383, "step": 28180 }, { "epoch": 1.17, "grad_norm": 0.78125, "learning_rate": 0.000498146166530458, "loss": 0.2266, "step": 28190 }, { "epoch": 1.17, "grad_norm": 1.171875, "learning_rate": 0.0004981448480065199, "loss": 0.2783, "step": 28200 }, { "epoch": 1.17, "grad_norm": 0.5234375, "learning_rate": 0.0004981435290155999, "loss": 0.2549, "step": 28210 }, { "epoch": 1.17, "grad_norm": 1.125, "learning_rate": 0.0004981422095577005, "loss": 0.2298, "step": 28220 }, { "epoch": 1.17, "grad_norm": 0.373046875, "learning_rate": 0.0004981408896328244, "loss": 0.2497, "step": 28230 }, { "epoch": 1.17, "grad_norm": 0.79296875, "learning_rate": 0.0004981395692409739, "loss": 0.228, "step": 28240 }, { "epoch": 1.17, "grad_norm": 0.3125, "learning_rate": 0.0004981382483821515, "loss": 0.2216, "step": 28250 }, { "epoch": 1.17, "grad_norm": 0.56640625, "learning_rate": 0.0004981369270563597, "loss": 0.2908, "step": 28260 }, { "epoch": 1.17, "grad_norm": 0.193359375, "learning_rate": 0.000498135605263601, "loss": 0.2213, "step": 28270 }, { "epoch": 1.17, "grad_norm": 0.7265625, "learning_rate": 0.0004981342830038778, "loss": 0.2642, "step": 28280 }, { "epoch": 1.17, "grad_norm": 0.439453125, "learning_rate": 0.0004981329602771928, "loss": 0.1845, "step": 28290 }, { "epoch": 1.17, "grad_norm": 0.578125, "learning_rate": 0.0004981316370835483, "loss": 0.2037, "step": 28300 }, { "epoch": 1.17, "grad_norm": 0.6015625, "learning_rate": 0.000498130313422947, "loss": 0.1987, "step": 28310 }, { "epoch": 1.17, "grad_norm": 0.9140625, "learning_rate": 0.0004981289892953911, "loss": 0.2535, "step": 28320 }, { "epoch": 1.17, "grad_norm": 0.94140625, "learning_rate": 0.0004981276647008833, "loss": 0.231, "step": 28330 }, { "epoch": 1.17, "grad_norm": 0.80859375, "learning_rate": 0.000498126339639426, "loss": 0.2164, "step": 28340 }, { "epoch": 1.17, "grad_norm": 0.70703125, "learning_rate": 0.0004981250141110217, "loss": 0.2739, "step": 28350 }, { "epoch": 1.17, "grad_norm": 1.4296875, "learning_rate": 0.000498123688115673, "loss": 0.2009, "step": 28360 }, { "epoch": 1.18, "grad_norm": 0.80859375, "learning_rate": 0.0004981223616533822, "loss": 0.2212, "step": 28370 }, { "epoch": 1.18, "grad_norm": 0.427734375, "learning_rate": 0.0004981210347241521, "loss": 0.2718, "step": 28380 }, { "epoch": 1.18, "grad_norm": 0.5390625, "learning_rate": 0.0004981197073279848, "loss": 0.2101, "step": 28390 }, { "epoch": 1.18, "grad_norm": 0.65625, "learning_rate": 0.0004981183794648831, "loss": 0.2382, "step": 28400 }, { "epoch": 1.18, "grad_norm": 0.53515625, "learning_rate": 0.0004981170511348494, "loss": 0.1893, "step": 28410 }, { "epoch": 1.18, "grad_norm": 0.5, "learning_rate": 0.0004981157223378862, "loss": 0.2185, "step": 28420 }, { "epoch": 1.18, "grad_norm": 0.78125, "learning_rate": 0.000498114393073996, "loss": 0.2393, "step": 28430 }, { "epoch": 1.18, "grad_norm": 0.6484375, "learning_rate": 0.0004981130633431813, "loss": 0.1904, "step": 28440 }, { "epoch": 1.18, "grad_norm": 0.83984375, "learning_rate": 0.0004981117331454446, "loss": 0.2106, "step": 28450 }, { "epoch": 1.18, "grad_norm": 0.65625, "learning_rate": 0.0004981104024807885, "loss": 0.245, "step": 28460 }, { "epoch": 1.18, "grad_norm": 0.7421875, "learning_rate": 0.0004981090713492152, "loss": 0.2262, "step": 28470 }, { "epoch": 1.18, "grad_norm": 0.443359375, "learning_rate": 0.0004981077397507276, "loss": 0.2228, "step": 28480 }, { "epoch": 1.18, "grad_norm": 0.546875, "learning_rate": 0.000498106407685328, "loss": 0.2363, "step": 28490 }, { "epoch": 1.18, "grad_norm": 0.734375, "learning_rate": 0.000498105075153019, "loss": 0.2264, "step": 28500 }, { "epoch": 1.18, "grad_norm": 0.5, "learning_rate": 0.0004981037421538029, "loss": 0.2238, "step": 28510 }, { "epoch": 1.18, "grad_norm": 0.455078125, "learning_rate": 0.0004981024086876824, "loss": 0.2203, "step": 28520 }, { "epoch": 1.18, "grad_norm": 0.294921875, "learning_rate": 0.00049810107475466, "loss": 0.2494, "step": 28530 }, { "epoch": 1.18, "grad_norm": 0.859375, "learning_rate": 0.0004980997403547381, "loss": 0.2573, "step": 28540 }, { "epoch": 1.18, "grad_norm": 0.54296875, "learning_rate": 0.0004980984054879194, "loss": 0.249, "step": 28550 }, { "epoch": 1.18, "grad_norm": 0.66015625, "learning_rate": 0.0004980970701542062, "loss": 0.2303, "step": 28560 }, { "epoch": 1.18, "grad_norm": 0.55859375, "learning_rate": 0.0004980957343536011, "loss": 0.2076, "step": 28570 }, { "epoch": 1.18, "grad_norm": 0.671875, "learning_rate": 0.0004980943980861066, "loss": 0.2473, "step": 28580 }, { "epoch": 1.18, "grad_norm": 0.87890625, "learning_rate": 0.0004980930613517254, "loss": 0.2257, "step": 28590 }, { "epoch": 1.18, "grad_norm": 0.73046875, "learning_rate": 0.0004980917241504596, "loss": 0.207, "step": 28600 }, { "epoch": 1.19, "grad_norm": 0.83984375, "learning_rate": 0.0004980903864823122, "loss": 0.2119, "step": 28610 }, { "epoch": 1.19, "grad_norm": 0.96484375, "learning_rate": 0.0004980890483472853, "loss": 0.2546, "step": 28620 }, { "epoch": 1.19, "grad_norm": 0.5703125, "learning_rate": 0.0004980877097453817, "loss": 0.2255, "step": 28630 }, { "epoch": 1.19, "grad_norm": 0.72265625, "learning_rate": 0.0004980863706766038, "loss": 0.25, "step": 28640 }, { "epoch": 1.19, "grad_norm": 0.439453125, "learning_rate": 0.0004980850311409542, "loss": 0.2986, "step": 28650 }, { "epoch": 1.19, "grad_norm": 0.625, "learning_rate": 0.0004980836911384353, "loss": 0.242, "step": 28660 }, { "epoch": 1.19, "grad_norm": 1.0703125, "learning_rate": 0.0004980823506690497, "loss": 0.2655, "step": 28670 }, { "epoch": 1.19, "grad_norm": 0.439453125, "learning_rate": 0.0004980810097327999, "loss": 0.2492, "step": 28680 }, { "epoch": 1.19, "grad_norm": 0.87890625, "learning_rate": 0.0004980796683296885, "loss": 0.2119, "step": 28690 }, { "epoch": 1.19, "grad_norm": 0.8046875, "learning_rate": 0.0004980783264597179, "loss": 0.1917, "step": 28700 }, { "epoch": 1.19, "grad_norm": 0.4140625, "learning_rate": 0.0004980769841228906, "loss": 0.2613, "step": 28710 }, { "epoch": 1.19, "grad_norm": 0.5625, "learning_rate": 0.0004980756413192093, "loss": 0.1942, "step": 28720 }, { "epoch": 1.19, "grad_norm": 1.7265625, "learning_rate": 0.0004980742980486763, "loss": 0.2592, "step": 28730 }, { "epoch": 1.19, "grad_norm": 0.68359375, "learning_rate": 0.0004980729543112944, "loss": 0.2298, "step": 28740 }, { "epoch": 1.19, "grad_norm": 0.546875, "learning_rate": 0.0004980716101070659, "loss": 0.2283, "step": 28750 }, { "epoch": 1.19, "grad_norm": 2.078125, "learning_rate": 0.0004980702654359936, "loss": 0.2228, "step": 28760 }, { "epoch": 1.19, "grad_norm": 1.1171875, "learning_rate": 0.0004980689202980797, "loss": 0.2853, "step": 28770 }, { "epoch": 1.19, "grad_norm": 0.65234375, "learning_rate": 0.0004980675746933268, "loss": 0.2371, "step": 28780 }, { "epoch": 1.19, "grad_norm": 0.66015625, "learning_rate": 0.0004980662286217377, "loss": 0.2193, "step": 28790 }, { "epoch": 1.19, "grad_norm": 0.8671875, "learning_rate": 0.0004980648820833146, "loss": 0.2839, "step": 28800 }, { "epoch": 1.19, "grad_norm": 0.7265625, "learning_rate": 0.0004980635350780603, "loss": 0.2333, "step": 28810 }, { "epoch": 1.19, "grad_norm": 0.5390625, "learning_rate": 0.0004980621876059772, "loss": 0.2467, "step": 28820 }, { "epoch": 1.19, "grad_norm": 0.49609375, "learning_rate": 0.0004980608396670677, "loss": 0.194, "step": 28830 }, { "epoch": 1.19, "grad_norm": 1.125, "learning_rate": 0.0004980594912613347, "loss": 0.2031, "step": 28840 }, { "epoch": 1.19, "grad_norm": 0.52734375, "learning_rate": 0.0004980581423887804, "loss": 0.2147, "step": 28850 }, { "epoch": 1.2, "grad_norm": 0.68359375, "learning_rate": 0.0004980567930494075, "loss": 0.3, "step": 28860 }, { "epoch": 1.2, "grad_norm": 1.1875, "learning_rate": 0.0004980554432432185, "loss": 0.287, "step": 28870 }, { "epoch": 1.2, "grad_norm": 1.2421875, "learning_rate": 0.0004980540929702159, "loss": 0.1967, "step": 28880 }, { "epoch": 1.2, "grad_norm": 0.205078125, "learning_rate": 0.0004980527422304023, "loss": 0.2325, "step": 28890 }, { "epoch": 1.2, "grad_norm": 0.6328125, "learning_rate": 0.0004980513910237803, "loss": 0.2311, "step": 28900 }, { "epoch": 1.2, "grad_norm": 0.318359375, "learning_rate": 0.0004980500393503523, "loss": 0.2961, "step": 28910 }, { "epoch": 1.2, "grad_norm": 0.62890625, "learning_rate": 0.0004980486872101209, "loss": 0.2118, "step": 28920 }, { "epoch": 1.2, "grad_norm": 0.9921875, "learning_rate": 0.0004980473346030887, "loss": 0.2, "step": 28930 }, { "epoch": 1.2, "grad_norm": 0.51953125, "learning_rate": 0.0004980459815292582, "loss": 0.1977, "step": 28940 }, { "epoch": 1.2, "grad_norm": 0.59765625, "learning_rate": 0.0004980446279886319, "loss": 0.2552, "step": 28950 }, { "epoch": 1.2, "grad_norm": 0.298828125, "learning_rate": 0.0004980432739812125, "loss": 0.2229, "step": 28960 }, { "epoch": 1.2, "grad_norm": 1.1484375, "learning_rate": 0.0004980419195070023, "loss": 0.213, "step": 28970 }, { "epoch": 1.2, "grad_norm": 0.8828125, "learning_rate": 0.0004980405645660041, "loss": 0.2325, "step": 28980 }, { "epoch": 1.2, "grad_norm": 0.375, "learning_rate": 0.0004980392091582203, "loss": 0.1737, "step": 28990 }, { "epoch": 1.2, "grad_norm": 0.59375, "learning_rate": 0.0004980378532836535, "loss": 0.2489, "step": 29000 }, { "epoch": 1.2, "grad_norm": 0.09326171875, "learning_rate": 0.0004980364969423063, "loss": 0.2361, "step": 29010 }, { "epoch": 1.2, "grad_norm": 0.890625, "learning_rate": 0.0004980351401341811, "loss": 0.2159, "step": 29020 }, { "epoch": 1.2, "grad_norm": 1.0546875, "learning_rate": 0.0004980337828592807, "loss": 0.2182, "step": 29030 }, { "epoch": 1.2, "grad_norm": 0.82421875, "learning_rate": 0.0004980324251176074, "loss": 0.202, "step": 29040 }, { "epoch": 1.2, "grad_norm": 1.2265625, "learning_rate": 0.0004980310669091639, "loss": 0.2665, "step": 29050 }, { "epoch": 1.2, "grad_norm": 1.75, "learning_rate": 0.0004980297082339527, "loss": 0.2497, "step": 29060 }, { "epoch": 1.2, "grad_norm": 1.2578125, "learning_rate": 0.0004980283490919763, "loss": 0.2067, "step": 29070 }, { "epoch": 1.2, "grad_norm": 0.404296875, "learning_rate": 0.0004980269894832375, "loss": 0.2599, "step": 29080 }, { "epoch": 1.2, "grad_norm": 0.8125, "learning_rate": 0.0004980256294077385, "loss": 0.2182, "step": 29090 }, { "epoch": 1.21, "grad_norm": 1.1875, "learning_rate": 0.0004980242688654821, "loss": 0.2453, "step": 29100 }, { "epoch": 1.21, "grad_norm": 0.375, "learning_rate": 0.0004980229078564709, "loss": 0.2369, "step": 29110 }, { "epoch": 1.21, "grad_norm": 0.828125, "learning_rate": 0.0004980215463807072, "loss": 0.2605, "step": 29120 }, { "epoch": 1.21, "grad_norm": 0.7890625, "learning_rate": 0.0004980201844381939, "loss": 0.1893, "step": 29130 }, { "epoch": 1.21, "grad_norm": 0.58984375, "learning_rate": 0.0004980188220289333, "loss": 0.2269, "step": 29140 }, { "epoch": 1.21, "grad_norm": 0.0341796875, "learning_rate": 0.0004980174591529281, "loss": 0.2225, "step": 29150 }, { "epoch": 1.21, "grad_norm": 0.66015625, "learning_rate": 0.000498016095810181, "loss": 0.2793, "step": 29160 }, { "epoch": 1.21, "grad_norm": 0.6328125, "learning_rate": 0.0004980147320006942, "loss": 0.2693, "step": 29170 }, { "epoch": 1.21, "grad_norm": 0.484375, "learning_rate": 0.0004980133677244705, "loss": 0.2415, "step": 29180 }, { "epoch": 1.21, "grad_norm": 0.8046875, "learning_rate": 0.0004980120029815124, "loss": 0.2184, "step": 29190 }, { "epoch": 1.21, "grad_norm": 0.546875, "learning_rate": 0.0004980106377718225, "loss": 0.1811, "step": 29200 }, { "epoch": 1.21, "grad_norm": 0.52734375, "learning_rate": 0.0004980092720954034, "loss": 0.2422, "step": 29210 }, { "epoch": 1.21, "grad_norm": 0.8671875, "learning_rate": 0.0004980079059522575, "loss": 0.2151, "step": 29220 }, { "epoch": 1.21, "grad_norm": 0.3671875, "learning_rate": 0.0004980065393423876, "loss": 0.2712, "step": 29230 }, { "epoch": 1.21, "grad_norm": 0.39453125, "learning_rate": 0.0004980051722657962, "loss": 0.2402, "step": 29240 }, { "epoch": 1.21, "grad_norm": 0.69140625, "learning_rate": 0.0004980038047224858, "loss": 0.2688, "step": 29250 }, { "epoch": 1.21, "grad_norm": 0.8515625, "learning_rate": 0.000498002436712459, "loss": 0.1967, "step": 29260 }, { "epoch": 1.21, "grad_norm": 0.63671875, "learning_rate": 0.0004980010682357186, "loss": 0.2869, "step": 29270 }, { "epoch": 1.21, "grad_norm": 0.75, "learning_rate": 0.0004979996992922667, "loss": 0.2712, "step": 29280 }, { "epoch": 1.21, "grad_norm": 1.4765625, "learning_rate": 0.0004979983298821063, "loss": 0.2418, "step": 29290 }, { "epoch": 1.21, "grad_norm": 0.53125, "learning_rate": 0.0004979969600052398, "loss": 0.2228, "step": 29300 }, { "epoch": 1.21, "grad_norm": 0.85546875, "learning_rate": 0.0004979955896616699, "loss": 0.2276, "step": 29310 }, { "epoch": 1.21, "grad_norm": 0.310546875, "learning_rate": 0.0004979942188513989, "loss": 0.2477, "step": 29320 }, { "epoch": 1.21, "grad_norm": 0.28125, "learning_rate": 0.0004979928475744296, "loss": 0.2218, "step": 29330 }, { "epoch": 1.22, "grad_norm": 0.416015625, "learning_rate": 0.0004979914758307646, "loss": 0.2827, "step": 29340 }, { "epoch": 1.22, "grad_norm": 2.0625, "learning_rate": 0.0004979901036204063, "loss": 0.2734, "step": 29350 }, { "epoch": 1.22, "grad_norm": 0.6328125, "learning_rate": 0.0004979887309433576, "loss": 0.2028, "step": 29360 }, { "epoch": 1.22, "grad_norm": 0.59375, "learning_rate": 0.0004979873577996207, "loss": 0.2287, "step": 29370 }, { "epoch": 1.22, "grad_norm": 0.54296875, "learning_rate": 0.0004979859841891985, "loss": 0.1985, "step": 29380 }, { "epoch": 1.22, "grad_norm": 0.89453125, "learning_rate": 0.0004979846101120935, "loss": 0.2328, "step": 29390 }, { "epoch": 1.22, "grad_norm": 0.2890625, "learning_rate": 0.0004979832355683082, "loss": 0.1834, "step": 29400 }, { "epoch": 1.22, "grad_norm": 0.65234375, "learning_rate": 0.0004979818605578451, "loss": 0.223, "step": 29410 }, { "epoch": 1.22, "grad_norm": 1.2265625, "learning_rate": 0.000497980485080707, "loss": 0.2235, "step": 29420 }, { "epoch": 1.22, "grad_norm": 0.24609375, "learning_rate": 0.0004979791091368965, "loss": 0.2304, "step": 29430 }, { "epoch": 1.22, "grad_norm": 0.484375, "learning_rate": 0.000497977732726416, "loss": 0.3291, "step": 29440 }, { "epoch": 1.22, "grad_norm": 0.470703125, "learning_rate": 0.0004979763558492681, "loss": 0.2457, "step": 29450 }, { "epoch": 1.22, "grad_norm": 0.49609375, "learning_rate": 0.0004979749785054557, "loss": 0.2418, "step": 29460 }, { "epoch": 1.22, "grad_norm": 1.84375, "learning_rate": 0.000497973600694981, "loss": 0.2519, "step": 29470 }, { "epoch": 1.22, "grad_norm": 1.140625, "learning_rate": 0.0004979722224178468, "loss": 0.2319, "step": 29480 }, { "epoch": 1.22, "grad_norm": 0.69921875, "learning_rate": 0.0004979708436740557, "loss": 0.2939, "step": 29490 }, { "epoch": 1.22, "grad_norm": 0.81640625, "learning_rate": 0.0004979694644636103, "loss": 0.2502, "step": 29500 }, { "epoch": 1.22, "grad_norm": 1.65625, "learning_rate": 0.000497968084786513, "loss": 0.2621, "step": 29510 }, { "epoch": 1.22, "grad_norm": 3.03125, "learning_rate": 0.0004979667046427666, "loss": 0.2467, "step": 29520 }, { "epoch": 1.22, "grad_norm": 0.734375, "learning_rate": 0.0004979653240323736, "loss": 0.2286, "step": 29530 }, { "epoch": 1.22, "grad_norm": 0.84765625, "learning_rate": 0.0004979639429553367, "loss": 0.2387, "step": 29540 }, { "epoch": 1.22, "grad_norm": 0.7890625, "learning_rate": 0.0004979625614116584, "loss": 0.2107, "step": 29550 }, { "epoch": 1.22, "grad_norm": 0.8203125, "learning_rate": 0.0004979611794013414, "loss": 0.2857, "step": 29560 }, { "epoch": 1.22, "grad_norm": 0.91015625, "learning_rate": 0.0004979597969243882, "loss": 0.1905, "step": 29570 }, { "epoch": 1.23, "grad_norm": 0.734375, "learning_rate": 0.0004979584139808014, "loss": 0.2208, "step": 29580 }, { "epoch": 1.23, "grad_norm": 1.03125, "learning_rate": 0.0004979570305705838, "loss": 0.1802, "step": 29590 }, { "epoch": 1.23, "grad_norm": 0.7890625, "learning_rate": 0.0004979556466937376, "loss": 0.252, "step": 29600 }, { "epoch": 1.23, "grad_norm": 0.796875, "learning_rate": 0.0004979542623502659, "loss": 0.2337, "step": 29610 }, { "epoch": 1.23, "grad_norm": 0.8828125, "learning_rate": 0.0004979528775401708, "loss": 0.2237, "step": 29620 }, { "epoch": 1.23, "grad_norm": 1.546875, "learning_rate": 0.0004979514922634554, "loss": 0.219, "step": 29630 }, { "epoch": 1.23, "grad_norm": 0.5234375, "learning_rate": 0.0004979501065201219, "loss": 0.2679, "step": 29640 }, { "epoch": 1.23, "grad_norm": 0.357421875, "learning_rate": 0.0004979487203101732, "loss": 0.1649, "step": 29650 }, { "epoch": 1.23, "grad_norm": 0.75390625, "learning_rate": 0.0004979473336336116, "loss": 0.257, "step": 29660 }, { "epoch": 1.23, "grad_norm": 0.63671875, "learning_rate": 0.0004979459464904401, "loss": 0.2116, "step": 29670 }, { "epoch": 1.23, "grad_norm": 1.296875, "learning_rate": 0.000497944558880661, "loss": 0.207, "step": 29680 }, { "epoch": 1.23, "grad_norm": 0.287109375, "learning_rate": 0.000497943170804277, "loss": 0.1607, "step": 29690 }, { "epoch": 1.23, "grad_norm": 0.38671875, "learning_rate": 0.0004979417822612908, "loss": 0.2356, "step": 29700 }, { "epoch": 1.23, "grad_norm": 0.328125, "learning_rate": 0.000497940393251705, "loss": 0.2094, "step": 29710 }, { "epoch": 1.23, "grad_norm": 0.62109375, "learning_rate": 0.000497939003775522, "loss": 0.2159, "step": 29720 }, { "epoch": 1.23, "grad_norm": 0.7890625, "learning_rate": 0.0004979376138327446, "loss": 0.184, "step": 29730 }, { "epoch": 1.23, "grad_norm": 0.67578125, "learning_rate": 0.0004979362234233754, "loss": 0.2702, "step": 29740 }, { "epoch": 1.23, "grad_norm": 0.61328125, "learning_rate": 0.000497934832547417, "loss": 0.2802, "step": 29750 }, { "epoch": 1.23, "grad_norm": 0.494140625, "learning_rate": 0.000497933441204872, "loss": 0.24, "step": 29760 }, { "epoch": 1.23, "grad_norm": 0.474609375, "learning_rate": 0.0004979320493957431, "loss": 0.2459, "step": 29770 }, { "epoch": 1.23, "grad_norm": 0.333984375, "learning_rate": 0.0004979306571200328, "loss": 0.2767, "step": 29780 }, { "epoch": 1.23, "grad_norm": 0.5703125, "learning_rate": 0.0004979292643777439, "loss": 0.2225, "step": 29790 }, { "epoch": 1.23, "grad_norm": 0.828125, "learning_rate": 0.0004979278711688788, "loss": 0.2414, "step": 29800 }, { "epoch": 1.23, "grad_norm": 0.640625, "learning_rate": 0.0004979264774934402, "loss": 0.2055, "step": 29810 }, { "epoch": 1.24, "grad_norm": 0.578125, "learning_rate": 0.0004979250833514308, "loss": 0.271, "step": 29820 }, { "epoch": 1.24, "grad_norm": 0.90234375, "learning_rate": 0.0004979236887428531, "loss": 0.2563, "step": 29830 }, { "epoch": 1.24, "grad_norm": 0.59765625, "learning_rate": 0.0004979222936677099, "loss": 0.2031, "step": 29840 }, { "epoch": 1.24, "grad_norm": 0.83203125, "learning_rate": 0.0004979208981260036, "loss": 0.213, "step": 29850 }, { "epoch": 1.24, "grad_norm": 0.359375, "learning_rate": 0.0004979195021177369, "loss": 0.2208, "step": 29860 }, { "epoch": 1.24, "grad_norm": 0.68359375, "learning_rate": 0.0004979181056429126, "loss": 0.2327, "step": 29870 }, { "epoch": 1.24, "grad_norm": 0.2119140625, "learning_rate": 0.0004979167087015332, "loss": 0.3257, "step": 29880 }, { "epoch": 1.24, "grad_norm": 1.0859375, "learning_rate": 0.0004979153112936013, "loss": 0.3074, "step": 29890 }, { "epoch": 1.24, "grad_norm": 1.1484375, "learning_rate": 0.0004979139134191195, "loss": 0.264, "step": 29900 }, { "epoch": 1.24, "grad_norm": 0.859375, "learning_rate": 0.0004979125150780905, "loss": 0.2336, "step": 29910 }, { "epoch": 1.24, "grad_norm": 1.0703125, "learning_rate": 0.000497911116270517, "loss": 0.2675, "step": 29920 }, { "epoch": 1.24, "grad_norm": 0.263671875, "learning_rate": 0.0004979097169964015, "loss": 0.1898, "step": 29930 }, { "epoch": 1.24, "grad_norm": 0.302734375, "learning_rate": 0.0004979083172557467, "loss": 0.269, "step": 29940 }, { "epoch": 1.24, "grad_norm": 0.462890625, "learning_rate": 0.0004979069170485551, "loss": 0.2398, "step": 29950 }, { "epoch": 1.24, "grad_norm": 0.55078125, "learning_rate": 0.0004979055163748296, "loss": 0.2173, "step": 29960 }, { "epoch": 1.24, "grad_norm": 0.421875, "learning_rate": 0.0004979041152345727, "loss": 0.2699, "step": 29970 }, { "epoch": 1.24, "grad_norm": 0.7890625, "learning_rate": 0.0004979027136277869, "loss": 0.214, "step": 29980 }, { "epoch": 1.24, "grad_norm": 0.52734375, "learning_rate": 0.0004979013115544751, "loss": 0.2756, "step": 29990 }, { "epoch": 1.24, "grad_norm": 0.361328125, "learning_rate": 0.0004978999090146398, "loss": 0.1941, "step": 30000 }, { "epoch": 1.24, "grad_norm": 1.328125, "learning_rate": 0.0004978985060082835, "loss": 0.2228, "step": 30010 }, { "epoch": 1.24, "grad_norm": 1.375, "learning_rate": 0.0004978971025354091, "loss": 0.2229, "step": 30020 }, { "epoch": 1.24, "grad_norm": 1.2265625, "learning_rate": 0.0004978956985960191, "loss": 0.2719, "step": 30030 }, { "epoch": 1.24, "grad_norm": 0.7734375, "learning_rate": 0.0004978942941901161, "loss": 0.2044, "step": 30040 }, { "epoch": 1.24, "grad_norm": 4.15625, "learning_rate": 0.0004978928893177029, "loss": 0.2158, "step": 30050 }, { "epoch": 1.25, "grad_norm": 1.5625, "learning_rate": 0.000497891483978782, "loss": 0.27, "step": 30060 }, { "epoch": 1.25, "grad_norm": 0.70703125, "learning_rate": 0.0004978900781733561, "loss": 0.2107, "step": 30070 }, { "epoch": 1.25, "grad_norm": 2.0625, "learning_rate": 0.0004978886719014279, "loss": 0.2385, "step": 30080 }, { "epoch": 1.25, "grad_norm": 0.29296875, "learning_rate": 0.0004978872651629999, "loss": 0.2621, "step": 30090 }, { "epoch": 1.25, "grad_norm": 0.921875, "learning_rate": 0.0004978858579580749, "loss": 0.2922, "step": 30100 }, { "epoch": 1.25, "grad_norm": 0.3828125, "learning_rate": 0.0004978844502866554, "loss": 0.1946, "step": 30110 }, { "epoch": 1.25, "grad_norm": 0.9296875, "learning_rate": 0.0004978830421487442, "loss": 0.2284, "step": 30120 }, { "epoch": 1.25, "grad_norm": 0.484375, "learning_rate": 0.0004978816335443439, "loss": 0.1997, "step": 30130 }, { "epoch": 1.25, "grad_norm": 0.4921875, "learning_rate": 0.0004978802244734572, "loss": 0.201, "step": 30140 }, { "epoch": 1.25, "grad_norm": 1.6328125, "learning_rate": 0.0004978788149360866, "loss": 0.1695, "step": 30150 }, { "epoch": 1.25, "grad_norm": 0.71484375, "learning_rate": 0.0004978774049322347, "loss": 0.2006, "step": 30160 }, { "epoch": 1.25, "grad_norm": 0.89453125, "learning_rate": 0.0004978759944619045, "loss": 0.2059, "step": 30170 }, { "epoch": 1.25, "grad_norm": 1.375, "learning_rate": 0.0004978745835250984, "loss": 0.2352, "step": 30180 }, { "epoch": 1.25, "grad_norm": 0.4296875, "learning_rate": 0.000497873172121819, "loss": 0.2065, "step": 30190 }, { "epoch": 1.25, "grad_norm": 0.9453125, "learning_rate": 0.0004978717602520692, "loss": 0.2704, "step": 30200 }, { "epoch": 1.25, "grad_norm": 0.76171875, "learning_rate": 0.0004978703479158515, "loss": 0.2881, "step": 30210 }, { "epoch": 1.25, "grad_norm": 1.2421875, "learning_rate": 0.0004978689351131685, "loss": 0.2765, "step": 30220 }, { "epoch": 1.25, "grad_norm": 0.55078125, "learning_rate": 0.000497867521844023, "loss": 0.2729, "step": 30230 }, { "epoch": 1.25, "grad_norm": 0.64453125, "learning_rate": 0.0004978661081084175, "loss": 0.2388, "step": 30240 }, { "epoch": 1.25, "grad_norm": 0.404296875, "learning_rate": 0.0004978646939063548, "loss": 0.2523, "step": 30250 }, { "epoch": 1.25, "grad_norm": 0.77734375, "learning_rate": 0.0004978632792378376, "loss": 0.2598, "step": 30260 }, { "epoch": 1.25, "grad_norm": 0.80078125, "learning_rate": 0.0004978618641028683, "loss": 0.2673, "step": 30270 }, { "epoch": 1.25, "grad_norm": 0.67578125, "learning_rate": 0.0004978604485014499, "loss": 0.2065, "step": 30280 }, { "epoch": 1.25, "grad_norm": 0.42578125, "learning_rate": 0.0004978590324335848, "loss": 0.2479, "step": 30290 }, { "epoch": 1.26, "grad_norm": 0.431640625, "learning_rate": 0.0004978576158992758, "loss": 0.2577, "step": 30300 }, { "epoch": 1.26, "grad_norm": 0.52734375, "learning_rate": 0.0004978561988985256, "loss": 0.2384, "step": 30310 }, { "epoch": 1.26, "grad_norm": 1.9296875, "learning_rate": 0.0004978547814313367, "loss": 0.2085, "step": 30320 }, { "epoch": 1.26, "grad_norm": 0.88671875, "learning_rate": 0.0004978533634977118, "loss": 0.226, "step": 30330 }, { "epoch": 1.26, "grad_norm": 0.62109375, "learning_rate": 0.0004978519450976538, "loss": 0.2493, "step": 30340 }, { "epoch": 1.26, "grad_norm": 0.54296875, "learning_rate": 0.0004978505262311651, "loss": 0.2145, "step": 30350 }, { "epoch": 1.26, "grad_norm": 0.56640625, "learning_rate": 0.0004978491068982486, "loss": 0.2402, "step": 30360 }, { "epoch": 1.26, "grad_norm": 0.400390625, "learning_rate": 0.0004978476870989067, "loss": 0.2039, "step": 30370 }, { "epoch": 1.26, "grad_norm": 1.53125, "learning_rate": 0.0004978462668331423, "loss": 0.2623, "step": 30380 }, { "epoch": 1.26, "grad_norm": 0.8359375, "learning_rate": 0.000497844846100958, "loss": 0.2162, "step": 30390 }, { "epoch": 1.26, "grad_norm": 0.80859375, "learning_rate": 0.0004978434249023563, "loss": 0.2081, "step": 30400 }, { "epoch": 1.26, "grad_norm": 0.609375, "learning_rate": 0.0004978420032373403, "loss": 0.2303, "step": 30410 }, { "epoch": 1.26, "grad_norm": 0.63671875, "learning_rate": 0.0004978405811059122, "loss": 0.2476, "step": 30420 }, { "epoch": 1.26, "grad_norm": 0.51171875, "learning_rate": 0.0004978391585080751, "loss": 0.1821, "step": 30430 }, { "epoch": 1.26, "grad_norm": 0.65625, "learning_rate": 0.0004978377354438313, "loss": 0.264, "step": 30440 }, { "epoch": 1.26, "grad_norm": 0.92578125, "learning_rate": 0.0004978363119131836, "loss": 0.2086, "step": 30450 }, { "epoch": 1.26, "grad_norm": 0.765625, "learning_rate": 0.0004978348879161349, "loss": 0.2325, "step": 30460 }, { "epoch": 1.26, "grad_norm": 0.85546875, "learning_rate": 0.0004978334634526876, "loss": 0.2222, "step": 30470 }, { "epoch": 1.26, "grad_norm": 0.640625, "learning_rate": 0.0004978320385228446, "loss": 0.2264, "step": 30480 }, { "epoch": 1.26, "grad_norm": 0.796875, "learning_rate": 0.0004978306131266083, "loss": 0.1996, "step": 30490 }, { "epoch": 1.26, "grad_norm": 0.984375, "learning_rate": 0.0004978291872639816, "loss": 0.2725, "step": 30500 }, { "epoch": 1.26, "grad_norm": 0.921875, "learning_rate": 0.0004978277609349672, "loss": 0.2362, "step": 30510 }, { "epoch": 1.26, "grad_norm": 0.404296875, "learning_rate": 0.0004978263341395677, "loss": 0.2578, "step": 30520 }, { "epoch": 1.26, "grad_norm": 1.0390625, "learning_rate": 0.0004978249068777857, "loss": 0.2669, "step": 30530 }, { "epoch": 1.26, "grad_norm": 1.0703125, "learning_rate": 0.0004978234791496242, "loss": 0.2051, "step": 30540 }, { "epoch": 1.27, "grad_norm": 0.37890625, "learning_rate": 0.0004978220509550855, "loss": 0.2342, "step": 30550 }, { "epoch": 1.27, "grad_norm": 0.66015625, "learning_rate": 0.0004978206222941725, "loss": 0.2161, "step": 30560 }, { "epoch": 1.27, "grad_norm": 1.0078125, "learning_rate": 0.0004978191931668878, "loss": 0.1928, "step": 30570 }, { "epoch": 1.27, "grad_norm": 0.69140625, "learning_rate": 0.0004978177635732342, "loss": 0.225, "step": 30580 }, { "epoch": 1.27, "grad_norm": 0.90625, "learning_rate": 0.0004978163335132143, "loss": 0.2134, "step": 30590 }, { "epoch": 1.27, "grad_norm": 0.671875, "learning_rate": 0.0004978149029868309, "loss": 0.2774, "step": 30600 }, { "epoch": 1.27, "grad_norm": 1.484375, "learning_rate": 0.0004978134719940866, "loss": 0.211, "step": 30610 }, { "epoch": 1.27, "grad_norm": 0.734375, "learning_rate": 0.0004978120405349839, "loss": 0.181, "step": 30620 }, { "epoch": 1.27, "grad_norm": 0.2373046875, "learning_rate": 0.000497810608609526, "loss": 0.2473, "step": 30630 }, { "epoch": 1.27, "grad_norm": 0.5234375, "learning_rate": 0.0004978091762177151, "loss": 0.2203, "step": 30640 }, { "epoch": 1.27, "grad_norm": 0.484375, "learning_rate": 0.0004978077433595542, "loss": 0.2202, "step": 30650 }, { "epoch": 1.27, "grad_norm": 0.423828125, "learning_rate": 0.0004978063100350459, "loss": 0.2101, "step": 30660 }, { "epoch": 1.27, "grad_norm": 0.86328125, "learning_rate": 0.0004978048762441928, "loss": 0.2421, "step": 30670 }, { "epoch": 1.27, "grad_norm": 0.68359375, "learning_rate": 0.0004978034419869977, "loss": 0.28, "step": 30680 }, { "epoch": 1.27, "grad_norm": 0.8046875, "learning_rate": 0.0004978020072634633, "loss": 0.254, "step": 30690 }, { "epoch": 1.27, "grad_norm": 0.259765625, "learning_rate": 0.0004978005720735923, "loss": 0.2182, "step": 30700 }, { "epoch": 1.27, "grad_norm": 0.7109375, "learning_rate": 0.0004977991364173873, "loss": 0.1637, "step": 30710 }, { "epoch": 1.27, "grad_norm": 0.546875, "learning_rate": 0.0004977977002948512, "loss": 0.2135, "step": 30720 }, { "epoch": 1.27, "grad_norm": 0.57421875, "learning_rate": 0.0004977962637059865, "loss": 0.2245, "step": 30730 }, { "epoch": 1.27, "grad_norm": 0.72265625, "learning_rate": 0.000497794826650796, "loss": 0.2122, "step": 30740 }, { "epoch": 1.27, "grad_norm": 0.439453125, "learning_rate": 0.0004977933891292825, "loss": 0.2489, "step": 30750 }, { "epoch": 1.27, "grad_norm": 0.625, "learning_rate": 0.0004977919511414485, "loss": 0.2401, "step": 30760 }, { "epoch": 1.27, "grad_norm": 0.458984375, "learning_rate": 0.0004977905126872968, "loss": 0.2235, "step": 30770 }, { "epoch": 1.27, "grad_norm": 0.41796875, "learning_rate": 0.0004977890737668301, "loss": 0.2183, "step": 30780 }, { "epoch": 1.28, "grad_norm": 0.369140625, "learning_rate": 0.0004977876343800513, "loss": 0.2103, "step": 30790 }, { "epoch": 1.28, "grad_norm": 0.515625, "learning_rate": 0.0004977861945269627, "loss": 0.2056, "step": 30800 }, { "epoch": 1.28, "grad_norm": 0.70703125, "learning_rate": 0.0004977847542075674, "loss": 0.1752, "step": 30810 }, { "epoch": 1.28, "grad_norm": 0.5390625, "learning_rate": 0.0004977833134218679, "loss": 0.241, "step": 30820 }, { "epoch": 1.28, "grad_norm": 0.294921875, "learning_rate": 0.0004977818721698669, "loss": 0.2131, "step": 30830 }, { "epoch": 1.28, "grad_norm": 0.478515625, "learning_rate": 0.0004977804304515673, "loss": 0.2196, "step": 30840 }, { "epoch": 1.28, "grad_norm": 1.1484375, "learning_rate": 0.0004977789882669716, "loss": 0.2553, "step": 30850 }, { "epoch": 1.28, "grad_norm": 3.5, "learning_rate": 0.0004977775456160826, "loss": 0.2252, "step": 30860 }, { "epoch": 1.28, "grad_norm": 0.5234375, "learning_rate": 0.000497776102498903, "loss": 0.2233, "step": 30870 }, { "epoch": 1.28, "grad_norm": 0.5703125, "learning_rate": 0.0004977746589154357, "loss": 0.2417, "step": 30880 }, { "epoch": 1.28, "grad_norm": 1.0546875, "learning_rate": 0.000497773214865683, "loss": 0.1938, "step": 30890 }, { "epoch": 1.28, "grad_norm": 0.6171875, "learning_rate": 0.0004977717703496479, "loss": 0.2588, "step": 30900 }, { "epoch": 1.28, "grad_norm": 0.79296875, "learning_rate": 0.0004977703253673333, "loss": 0.2528, "step": 30910 }, { "epoch": 1.28, "grad_norm": 1.3125, "learning_rate": 0.0004977688799187415, "loss": 0.2798, "step": 30920 }, { "epoch": 1.28, "grad_norm": 0.64453125, "learning_rate": 0.0004977674340038756, "loss": 0.2145, "step": 30930 }, { "epoch": 1.28, "grad_norm": 0.46875, "learning_rate": 0.000497765987622738, "loss": 0.2988, "step": 30940 }, { "epoch": 1.28, "grad_norm": 0.48046875, "learning_rate": 0.0004977645407753316, "loss": 0.2353, "step": 30950 }, { "epoch": 1.28, "grad_norm": 0.197265625, "learning_rate": 0.000497763093461659, "loss": 0.2396, "step": 30960 }, { "epoch": 1.28, "grad_norm": 0.7265625, "learning_rate": 0.0004977616456817233, "loss": 0.2279, "step": 30970 }, { "epoch": 1.28, "grad_norm": 0.92578125, "learning_rate": 0.0004977601974355267, "loss": 0.2165, "step": 30980 }, { "epoch": 1.28, "grad_norm": 0.458984375, "learning_rate": 0.0004977587487230721, "loss": 0.2786, "step": 30990 }, { "epoch": 1.28, "grad_norm": 0.53125, "learning_rate": 0.0004977572995443624, "loss": 0.2199, "step": 31000 }, { "epoch": 1.28, "grad_norm": 2.0, "learning_rate": 0.0004977558498994003, "loss": 0.2207, "step": 31010 }, { "epoch": 1.28, "grad_norm": 0.6484375, "learning_rate": 0.0004977543997881883, "loss": 0.2411, "step": 31020 }, { "epoch": 1.29, "grad_norm": 0.47265625, "learning_rate": 0.0004977529492107294, "loss": 0.1872, "step": 31030 }, { "epoch": 1.29, "grad_norm": 0.49609375, "learning_rate": 0.0004977514981670261, "loss": 0.2231, "step": 31040 }, { "epoch": 1.29, "grad_norm": 0.33203125, "learning_rate": 0.0004977500466570813, "loss": 0.2462, "step": 31050 }, { "epoch": 1.29, "grad_norm": 1.8359375, "learning_rate": 0.0004977485946808977, "loss": 0.2055, "step": 31060 }, { "epoch": 1.29, "grad_norm": 0.69140625, "learning_rate": 0.0004977471422384779, "loss": 0.2724, "step": 31070 }, { "epoch": 1.29, "grad_norm": 0.74609375, "learning_rate": 0.0004977456893298248, "loss": 0.2735, "step": 31080 }, { "epoch": 1.29, "grad_norm": 1.4609375, "learning_rate": 0.0004977442359549411, "loss": 0.1824, "step": 31090 }, { "epoch": 1.29, "grad_norm": 0.66015625, "learning_rate": 0.0004977427821138296, "loss": 0.2358, "step": 31100 }, { "epoch": 1.29, "grad_norm": 1.1796875, "learning_rate": 0.0004977413278064927, "loss": 0.2254, "step": 31110 }, { "epoch": 1.29, "grad_norm": 0.353515625, "learning_rate": 0.0004977398730329335, "loss": 0.1862, "step": 31120 }, { "epoch": 1.29, "grad_norm": 2.203125, "learning_rate": 0.0004977384177931545, "loss": 0.2135, "step": 31130 }, { "epoch": 1.29, "grad_norm": 0.64453125, "learning_rate": 0.0004977369620871587, "loss": 0.253, "step": 31140 }, { "epoch": 1.29, "grad_norm": 1.03125, "learning_rate": 0.0004977355059149485, "loss": 0.2312, "step": 31150 }, { "epoch": 1.29, "grad_norm": 0.9375, "learning_rate": 0.000497734049276527, "loss": 0.3005, "step": 31160 }, { "epoch": 1.29, "grad_norm": 0.8046875, "learning_rate": 0.0004977325921718967, "loss": 0.2577, "step": 31170 }, { "epoch": 1.29, "grad_norm": 0.890625, "learning_rate": 0.0004977311346010605, "loss": 0.2061, "step": 31180 }, { "epoch": 1.29, "grad_norm": 0.55078125, "learning_rate": 0.000497729676564021, "loss": 0.2037, "step": 31190 }, { "epoch": 1.29, "grad_norm": 1.1328125, "learning_rate": 0.0004977282180607809, "loss": 0.2276, "step": 31200 }, { "epoch": 1.29, "grad_norm": 0.283203125, "learning_rate": 0.0004977267590913433, "loss": 0.2374, "step": 31210 }, { "epoch": 1.29, "grad_norm": 0.75390625, "learning_rate": 0.0004977252996557105, "loss": 0.2287, "step": 31220 }, { "epoch": 1.29, "grad_norm": 2.40625, "learning_rate": 0.0004977238397538855, "loss": 0.2339, "step": 31230 }, { "epoch": 1.29, "grad_norm": 0.77734375, "learning_rate": 0.000497722379385871, "loss": 0.1991, "step": 31240 }, { "epoch": 1.29, "grad_norm": 0.60546875, "learning_rate": 0.0004977209185516695, "loss": 0.222, "step": 31250 }, { "epoch": 1.29, "grad_norm": 2.078125, "learning_rate": 0.0004977194572512842, "loss": 0.2635, "step": 31260 }, { "epoch": 1.3, "grad_norm": 1.40625, "learning_rate": 0.0004977179954847176, "loss": 0.2397, "step": 31270 }, { "epoch": 1.3, "grad_norm": 1.4375, "learning_rate": 0.0004977165332519726, "loss": 0.2012, "step": 31280 }, { "epoch": 1.3, "grad_norm": 2.140625, "learning_rate": 0.0004977150705530516, "loss": 0.2454, "step": 31290 }, { "epoch": 1.3, "grad_norm": 0.984375, "learning_rate": 0.0004977136073879577, "loss": 0.2254, "step": 31300 }, { "epoch": 1.3, "grad_norm": 0.7578125, "learning_rate": 0.0004977121437566937, "loss": 0.1934, "step": 31310 }, { "epoch": 1.3, "grad_norm": 0.6328125, "learning_rate": 0.000497710679659262, "loss": 0.2675, "step": 31320 }, { "epoch": 1.3, "grad_norm": 0.451171875, "learning_rate": 0.0004977092150956656, "loss": 0.2104, "step": 31330 }, { "epoch": 1.3, "grad_norm": 0.54296875, "learning_rate": 0.0004977077500659073, "loss": 0.253, "step": 31340 }, { "epoch": 1.3, "grad_norm": 0.47265625, "learning_rate": 0.0004977062845699896, "loss": 0.2455, "step": 31350 }, { "epoch": 1.3, "grad_norm": 0.5546875, "learning_rate": 0.0004977048186079155, "loss": 0.2399, "step": 31360 }, { "epoch": 1.3, "grad_norm": 0.35546875, "learning_rate": 0.0004977033521796877, "loss": 0.2204, "step": 31370 }, { "epoch": 1.3, "grad_norm": 1.7890625, "learning_rate": 0.000497701885285309, "loss": 0.229, "step": 31380 }, { "epoch": 1.3, "grad_norm": 4.6875, "learning_rate": 0.0004977004179247819, "loss": 0.2091, "step": 31390 }, { "epoch": 1.3, "grad_norm": 0.26953125, "learning_rate": 0.0004976989500981095, "loss": 0.2801, "step": 31400 }, { "epoch": 1.3, "grad_norm": 1.5078125, "learning_rate": 0.0004976974818052944, "loss": 0.191, "step": 31410 }, { "epoch": 1.3, "grad_norm": 0.98046875, "learning_rate": 0.0004976960130463395, "loss": 0.2488, "step": 31420 }, { "epoch": 1.3, "grad_norm": 0.56640625, "learning_rate": 0.0004976945438212473, "loss": 0.2148, "step": 31430 }, { "epoch": 1.3, "grad_norm": 0.30859375, "learning_rate": 0.0004976930741300207, "loss": 0.2022, "step": 31440 }, { "epoch": 1.3, "grad_norm": 0.419921875, "learning_rate": 0.0004976916039726627, "loss": 0.2821, "step": 31450 }, { "epoch": 1.3, "grad_norm": 0.90625, "learning_rate": 0.0004976901333491755, "loss": 0.253, "step": 31460 }, { "epoch": 1.3, "grad_norm": 0.859375, "learning_rate": 0.0004976886622595625, "loss": 0.2097, "step": 31470 }, { "epoch": 1.3, "grad_norm": 0.71484375, "learning_rate": 0.0004976871907038261, "loss": 0.2773, "step": 31480 }, { "epoch": 1.3, "grad_norm": 1.03125, "learning_rate": 0.0004976857186819692, "loss": 0.2952, "step": 31490 }, { "epoch": 1.3, "grad_norm": 0.61328125, "learning_rate": 0.0004976842461939944, "loss": 0.2015, "step": 31500 }, { "epoch": 1.31, "grad_norm": 0.392578125, "learning_rate": 0.0004976827732399048, "loss": 0.2325, "step": 31510 }, { "epoch": 1.31, "grad_norm": 0.890625, "learning_rate": 0.0004976812998197027, "loss": 0.2291, "step": 31520 }, { "epoch": 1.31, "grad_norm": 1.1953125, "learning_rate": 0.0004976798259333913, "loss": 0.2584, "step": 31530 }, { "epoch": 1.31, "grad_norm": 0.64453125, "learning_rate": 0.0004976783515809733, "loss": 0.1883, "step": 31540 }, { "epoch": 1.31, "grad_norm": 0.9453125, "learning_rate": 0.0004976768767624513, "loss": 0.2004, "step": 31550 }, { "epoch": 1.31, "grad_norm": 1.25, "learning_rate": 0.0004976754014778281, "loss": 0.319, "step": 31560 }, { "epoch": 1.31, "grad_norm": 1.53125, "learning_rate": 0.0004976739257271066, "loss": 0.2449, "step": 31570 }, { "epoch": 1.31, "grad_norm": 0.578125, "learning_rate": 0.0004976724495102896, "loss": 0.2172, "step": 31580 }, { "epoch": 1.31, "grad_norm": 0.3203125, "learning_rate": 0.0004976709728273797, "loss": 0.222, "step": 31590 }, { "epoch": 1.31, "grad_norm": 0.47265625, "learning_rate": 0.0004976694956783798, "loss": 0.1728, "step": 31600 }, { "epoch": 1.31, "grad_norm": 0.84375, "learning_rate": 0.0004976680180632927, "loss": 0.215, "step": 31610 }, { "epoch": 1.31, "grad_norm": 0.5703125, "learning_rate": 0.000497666539982121, "loss": 0.1708, "step": 31620 }, { "epoch": 1.31, "grad_norm": 1.84375, "learning_rate": 0.0004976650614348677, "loss": 0.1994, "step": 31630 }, { "epoch": 1.31, "grad_norm": 0.435546875, "learning_rate": 0.0004976635824215356, "loss": 0.2158, "step": 31640 }, { "epoch": 1.31, "grad_norm": 0.47265625, "learning_rate": 0.0004976621029421272, "loss": 0.2836, "step": 31650 }, { "epoch": 1.31, "grad_norm": 0.5859375, "learning_rate": 0.0004976606229966455, "loss": 0.2073, "step": 31660 }, { "epoch": 1.31, "grad_norm": 1.234375, "learning_rate": 0.0004976591425850933, "loss": 0.2665, "step": 31670 }, { "epoch": 1.31, "grad_norm": 0.5625, "learning_rate": 0.0004976576617074734, "loss": 0.2502, "step": 31680 }, { "epoch": 1.31, "grad_norm": 0.52734375, "learning_rate": 0.0004976561803637884, "loss": 0.2074, "step": 31690 }, { "epoch": 1.31, "grad_norm": 1.1171875, "learning_rate": 0.0004976546985540414, "loss": 0.2976, "step": 31700 }, { "epoch": 1.31, "grad_norm": 1.125, "learning_rate": 0.0004976532162782348, "loss": 0.1696, "step": 31710 }, { "epoch": 1.31, "grad_norm": 0.625, "learning_rate": 0.0004976517335363716, "loss": 0.3082, "step": 31720 }, { "epoch": 1.31, "grad_norm": 0.35546875, "learning_rate": 0.0004976502503284547, "loss": 0.2133, "step": 31730 }, { "epoch": 1.31, "grad_norm": 0.380859375, "learning_rate": 0.0004976487666544868, "loss": 0.2313, "step": 31740 }, { "epoch": 1.32, "grad_norm": 0.6640625, "learning_rate": 0.0004976472825144705, "loss": 0.2453, "step": 31750 }, { "epoch": 1.32, "grad_norm": 0.9453125, "learning_rate": 0.000497645797908409, "loss": 0.1966, "step": 31760 }, { "epoch": 1.32, "grad_norm": 1.8359375, "learning_rate": 0.0004976443128363046, "loss": 0.2073, "step": 31770 }, { "epoch": 1.32, "grad_norm": 0.474609375, "learning_rate": 0.0004976428272981605, "loss": 0.2297, "step": 31780 }, { "epoch": 1.32, "grad_norm": 0.515625, "learning_rate": 0.0004976413412939792, "loss": 0.1888, "step": 31790 }, { "epoch": 1.32, "grad_norm": 1.015625, "learning_rate": 0.0004976398548237638, "loss": 0.2413, "step": 31800 }, { "epoch": 1.32, "grad_norm": 0.54296875, "learning_rate": 0.0004976383678875168, "loss": 0.2598, "step": 31810 }, { "epoch": 1.32, "grad_norm": 0.875, "learning_rate": 0.0004976368804852412, "loss": 0.3119, "step": 31820 }, { "epoch": 1.32, "grad_norm": 0.51171875, "learning_rate": 0.0004976353926169398, "loss": 0.2339, "step": 31830 }, { "epoch": 1.32, "grad_norm": 0.546875, "learning_rate": 0.0004976339042826152, "loss": 0.2267, "step": 31840 }, { "epoch": 1.32, "grad_norm": 0.79296875, "learning_rate": 0.0004976324154822704, "loss": 0.2861, "step": 31850 }, { "epoch": 1.32, "grad_norm": 1.34375, "learning_rate": 0.0004976309262159081, "loss": 0.205, "step": 31860 }, { "epoch": 1.32, "grad_norm": 0.6171875, "learning_rate": 0.0004976294364835313, "loss": 0.2636, "step": 31870 }, { "epoch": 1.32, "grad_norm": 0.6328125, "learning_rate": 0.0004976279462851424, "loss": 0.2281, "step": 31880 }, { "epoch": 1.32, "grad_norm": 0.326171875, "learning_rate": 0.0004976264556207446, "loss": 0.2523, "step": 31890 }, { "epoch": 1.32, "grad_norm": 0.77734375, "learning_rate": 0.0004976249644903405, "loss": 0.2484, "step": 31900 }, { "epoch": 1.32, "grad_norm": 0.55078125, "learning_rate": 0.000497623472893933, "loss": 0.2287, "step": 31910 }, { "epoch": 1.32, "grad_norm": 1.59375, "learning_rate": 0.0004976219808315248, "loss": 0.2188, "step": 31920 }, { "epoch": 1.32, "grad_norm": 0.359375, "learning_rate": 0.0004976204883031188, "loss": 0.2556, "step": 31930 }, { "epoch": 1.32, "grad_norm": 0.376953125, "learning_rate": 0.0004976189953087178, "loss": 0.2056, "step": 31940 }, { "epoch": 1.32, "grad_norm": 1.0390625, "learning_rate": 0.0004976175018483245, "loss": 0.2676, "step": 31950 }, { "epoch": 1.32, "grad_norm": 0.400390625, "learning_rate": 0.000497616007921942, "loss": 0.2145, "step": 31960 }, { "epoch": 1.32, "grad_norm": 0.859375, "learning_rate": 0.0004976145135295728, "loss": 0.245, "step": 31970 }, { "epoch": 1.32, "grad_norm": 0.5234375, "learning_rate": 0.0004976130186712198, "loss": 0.2102, "step": 31980 }, { "epoch": 1.33, "grad_norm": 0.9921875, "learning_rate": 0.0004976115233468858, "loss": 0.1795, "step": 31990 }, { "epoch": 1.33, "grad_norm": 0.54296875, "learning_rate": 0.0004976100275565738, "loss": 0.208, "step": 32000 }, { "epoch": 1.33, "grad_norm": 0.380859375, "learning_rate": 0.0004976085313002864, "loss": 0.2244, "step": 32010 }, { "epoch": 1.33, "grad_norm": 0.443359375, "learning_rate": 0.0004976070345780264, "loss": 0.262, "step": 32020 }, { "epoch": 1.33, "grad_norm": 0.6875, "learning_rate": 0.0004976055373897968, "loss": 0.1825, "step": 32030 }, { "epoch": 1.33, "grad_norm": 4.59375, "learning_rate": 0.0004976040397356004, "loss": 0.3612, "step": 32040 }, { "epoch": 1.33, "grad_norm": 0.5546875, "learning_rate": 0.0004976025416154398, "loss": 0.24, "step": 32050 }, { "epoch": 1.33, "grad_norm": 0.220703125, "learning_rate": 0.0004976010430293178, "loss": 0.2125, "step": 32060 }, { "epoch": 1.33, "grad_norm": 0.58203125, "learning_rate": 0.0004975995439772376, "loss": 0.2179, "step": 32070 }, { "epoch": 1.33, "grad_norm": 0.427734375, "learning_rate": 0.0004975980444592018, "loss": 0.2522, "step": 32080 }, { "epoch": 1.33, "grad_norm": 1.0234375, "learning_rate": 0.0004975965444752132, "loss": 0.2569, "step": 32090 }, { "epoch": 1.33, "grad_norm": 0.494140625, "learning_rate": 0.0004975950440252745, "loss": 0.2787, "step": 32100 }, { "epoch": 1.33, "grad_norm": 0.83203125, "learning_rate": 0.0004975935431093888, "loss": 0.1881, "step": 32110 }, { "epoch": 1.33, "grad_norm": 0.453125, "learning_rate": 0.0004975920417275587, "loss": 0.2643, "step": 32120 }, { "epoch": 1.33, "grad_norm": 0.5078125, "learning_rate": 0.0004975905398797873, "loss": 0.2513, "step": 32130 }, { "epoch": 1.33, "grad_norm": 1.828125, "learning_rate": 0.0004975890375660771, "loss": 0.2441, "step": 32140 }, { "epoch": 1.33, "grad_norm": 0.443359375, "learning_rate": 0.000497587534786431, "loss": 0.2425, "step": 32150 }, { "epoch": 1.33, "grad_norm": 0.5546875, "learning_rate": 0.000497586031540852, "loss": 0.2449, "step": 32160 }, { "epoch": 1.33, "grad_norm": 0.6171875, "learning_rate": 0.0004975845278293429, "loss": 0.2251, "step": 32170 }, { "epoch": 1.33, "grad_norm": 0.52734375, "learning_rate": 0.0004975830236519063, "loss": 0.2257, "step": 32180 }, { "epoch": 1.33, "grad_norm": 0.80859375, "learning_rate": 0.0004975815190085453, "loss": 0.298, "step": 32190 }, { "epoch": 1.33, "grad_norm": 0.890625, "learning_rate": 0.0004975800138992626, "loss": 0.2576, "step": 32200 }, { "epoch": 1.33, "grad_norm": 0.2734375, "learning_rate": 0.0004975785083240609, "loss": 0.178, "step": 32210 }, { "epoch": 1.33, "grad_norm": 0.91796875, "learning_rate": 0.0004975770022829434, "loss": 0.2709, "step": 32220 }, { "epoch": 1.33, "grad_norm": 1.1875, "learning_rate": 0.0004975754957759126, "loss": 0.2237, "step": 32230 }, { "epoch": 1.34, "grad_norm": 1.8203125, "learning_rate": 0.0004975739888029714, "loss": 0.2505, "step": 32240 }, { "epoch": 1.34, "grad_norm": 0.8359375, "learning_rate": 0.0004975724813641228, "loss": 0.2173, "step": 32250 }, { "epoch": 1.34, "grad_norm": 0.55078125, "learning_rate": 0.0004975709734593696, "loss": 0.2095, "step": 32260 }, { "epoch": 1.34, "grad_norm": 0.5859375, "learning_rate": 0.0004975694650887144, "loss": 0.237, "step": 32270 }, { "epoch": 1.34, "grad_norm": 0.447265625, "learning_rate": 0.0004975679562521602, "loss": 0.2681, "step": 32280 }, { "epoch": 1.34, "grad_norm": 0.875, "learning_rate": 0.00049756644694971, "loss": 0.2306, "step": 32290 }, { "epoch": 1.34, "grad_norm": 0.78125, "learning_rate": 0.0004975649371813663, "loss": 0.2532, "step": 32300 }, { "epoch": 1.34, "grad_norm": 0.79296875, "learning_rate": 0.0004975634269471322, "loss": 0.2358, "step": 32310 }, { "epoch": 1.34, "grad_norm": 0.62890625, "learning_rate": 0.0004975619162470104, "loss": 0.2668, "step": 32320 }, { "epoch": 1.34, "grad_norm": 0.400390625, "learning_rate": 0.000497560405081004, "loss": 0.2721, "step": 32330 }, { "epoch": 1.34, "grad_norm": 0.326171875, "learning_rate": 0.0004975588934491154, "loss": 0.1985, "step": 32340 }, { "epoch": 1.34, "grad_norm": 1.3984375, "learning_rate": 0.0004975573813513479, "loss": 0.2611, "step": 32350 }, { "epoch": 1.34, "grad_norm": 0.67578125, "learning_rate": 0.000497555868787704, "loss": 0.2644, "step": 32360 }, { "epoch": 1.34, "grad_norm": 0.74609375, "learning_rate": 0.0004975543557581868, "loss": 0.2361, "step": 32370 }, { "epoch": 1.34, "grad_norm": 0.455078125, "learning_rate": 0.000497552842262799, "loss": 0.213, "step": 32380 }, { "epoch": 1.34, "grad_norm": 0.46484375, "learning_rate": 0.0004975513283015434, "loss": 0.29, "step": 32390 }, { "epoch": 1.34, "grad_norm": 0.357421875, "learning_rate": 0.0004975498138744231, "loss": 0.1909, "step": 32400 }, { "epoch": 1.34, "grad_norm": 0.283203125, "learning_rate": 0.0004975482989814406, "loss": 0.2191, "step": 32410 }, { "epoch": 1.34, "grad_norm": 0.69140625, "learning_rate": 0.0004975467836225991, "loss": 0.2252, "step": 32420 }, { "epoch": 1.34, "grad_norm": 0.890625, "learning_rate": 0.0004975452677979012, "loss": 0.2582, "step": 32430 }, { "epoch": 1.34, "grad_norm": 1.2890625, "learning_rate": 0.0004975437515073499, "loss": 0.2052, "step": 32440 }, { "epoch": 1.34, "grad_norm": 0.255859375, "learning_rate": 0.0004975422347509479, "loss": 0.2016, "step": 32450 }, { "epoch": 1.34, "grad_norm": 0.2197265625, "learning_rate": 0.0004975407175286981, "loss": 0.1805, "step": 32460 }, { "epoch": 1.34, "grad_norm": 0.5546875, "learning_rate": 0.0004975391998406036, "loss": 0.207, "step": 32470 }, { "epoch": 1.35, "grad_norm": 0.890625, "learning_rate": 0.0004975376816866669, "loss": 0.2258, "step": 32480 }, { "epoch": 1.35, "grad_norm": 0.50390625, "learning_rate": 0.0004975361630668911, "loss": 0.2221, "step": 32490 }, { "epoch": 1.35, "grad_norm": 0.6875, "learning_rate": 0.0004975346439812789, "loss": 0.2446, "step": 32500 }, { "epoch": 1.35, "grad_norm": 0.369140625, "learning_rate": 0.0004975331244298333, "loss": 0.248, "step": 32510 }, { "epoch": 1.35, "grad_norm": 0.419921875, "learning_rate": 0.0004975316044125571, "loss": 0.1889, "step": 32520 }, { "epoch": 1.35, "grad_norm": 0.9140625, "learning_rate": 0.000497530083929453, "loss": 0.2818, "step": 32530 }, { "epoch": 1.35, "grad_norm": 0.7421875, "learning_rate": 0.0004975285629805241, "loss": 0.2763, "step": 32540 }, { "epoch": 1.35, "grad_norm": 0.48046875, "learning_rate": 0.0004975270415657732, "loss": 0.2207, "step": 32550 }, { "epoch": 1.35, "grad_norm": 0.76171875, "learning_rate": 0.0004975255196852031, "loss": 0.2306, "step": 32560 }, { "epoch": 1.35, "grad_norm": 0.365234375, "learning_rate": 0.0004975239973388168, "loss": 0.1824, "step": 32570 }, { "epoch": 1.35, "grad_norm": 0.52734375, "learning_rate": 0.000497522474526617, "loss": 0.2707, "step": 32580 }, { "epoch": 1.35, "grad_norm": 0.3984375, "learning_rate": 0.0004975209512486067, "loss": 0.2436, "step": 32590 }, { "epoch": 1.35, "grad_norm": 0.65234375, "learning_rate": 0.0004975194275047886, "loss": 0.2286, "step": 32600 }, { "epoch": 1.35, "grad_norm": 0.419921875, "learning_rate": 0.0004975179032951657, "loss": 0.2406, "step": 32610 }, { "epoch": 1.35, "grad_norm": 0.6640625, "learning_rate": 0.0004975163786197409, "loss": 0.1771, "step": 32620 }, { "epoch": 1.35, "grad_norm": 0.2177734375, "learning_rate": 0.0004975148534785169, "loss": 0.2174, "step": 32630 }, { "epoch": 1.35, "grad_norm": 0.6484375, "learning_rate": 0.0004975133278714967, "loss": 0.2257, "step": 32640 }, { "epoch": 1.35, "grad_norm": 1.0625, "learning_rate": 0.0004975118017986832, "loss": 0.266, "step": 32650 }, { "epoch": 1.35, "grad_norm": 0.515625, "learning_rate": 0.0004975102752600791, "loss": 0.2381, "step": 32660 }, { "epoch": 1.35, "grad_norm": 0.27734375, "learning_rate": 0.0004975087482556875, "loss": 0.2707, "step": 32670 }, { "epoch": 1.35, "grad_norm": 0.58984375, "learning_rate": 0.0004975072207855112, "loss": 0.242, "step": 32680 }, { "epoch": 1.35, "grad_norm": 0.29296875, "learning_rate": 0.000497505692849553, "loss": 0.2092, "step": 32690 }, { "epoch": 1.35, "grad_norm": 0.279296875, "learning_rate": 0.0004975041644478157, "loss": 0.2647, "step": 32700 }, { "epoch": 1.35, "grad_norm": 0.6171875, "learning_rate": 0.0004975026355803024, "loss": 0.2037, "step": 32710 }, { "epoch": 1.36, "grad_norm": 0.302734375, "learning_rate": 0.0004975011062470159, "loss": 0.2289, "step": 32720 }, { "epoch": 1.36, "grad_norm": 0.53515625, "learning_rate": 0.0004974995764479589, "loss": 0.1803, "step": 32730 }, { "epoch": 1.36, "grad_norm": 0.56640625, "learning_rate": 0.0004974980461831345, "loss": 0.1966, "step": 32740 }, { "epoch": 1.36, "grad_norm": 1.03125, "learning_rate": 0.0004974965154525456, "loss": 0.26, "step": 32750 }, { "epoch": 1.36, "grad_norm": 0.53125, "learning_rate": 0.0004974949842561949, "loss": 0.2281, "step": 32760 }, { "epoch": 1.36, "grad_norm": 0.703125, "learning_rate": 0.0004974934525940854, "loss": 0.249, "step": 32770 }, { "epoch": 1.36, "grad_norm": 0.75, "learning_rate": 0.0004974919204662199, "loss": 0.2527, "step": 32780 }, { "epoch": 1.36, "grad_norm": 0.3125, "learning_rate": 0.0004974903878726014, "loss": 0.2104, "step": 32790 }, { "epoch": 1.36, "grad_norm": 0.7578125, "learning_rate": 0.0004974888548132326, "loss": 0.2529, "step": 32800 }, { "epoch": 1.36, "grad_norm": 0.97265625, "learning_rate": 0.0004974873212881166, "loss": 0.2916, "step": 32810 }, { "epoch": 1.36, "grad_norm": 0.455078125, "learning_rate": 0.0004974857872972562, "loss": 0.266, "step": 32820 }, { "epoch": 1.36, "grad_norm": 0.96484375, "learning_rate": 0.0004974842528406542, "loss": 0.2606, "step": 32830 }, { "epoch": 1.36, "grad_norm": 0.49609375, "learning_rate": 0.0004974827179183136, "loss": 0.1918, "step": 32840 }, { "epoch": 1.36, "grad_norm": 1.328125, "learning_rate": 0.0004974811825302373, "loss": 0.2369, "step": 32850 }, { "epoch": 1.36, "grad_norm": 0.703125, "learning_rate": 0.0004974796466764281, "loss": 0.2363, "step": 32860 }, { "epoch": 1.36, "grad_norm": 0.71484375, "learning_rate": 0.0004974781103568889, "loss": 0.2438, "step": 32870 }, { "epoch": 1.36, "grad_norm": 0.58984375, "learning_rate": 0.0004974765735716227, "loss": 0.1976, "step": 32880 }, { "epoch": 1.36, "grad_norm": 1.0078125, "learning_rate": 0.0004974750363206322, "loss": 0.2125, "step": 32890 }, { "epoch": 1.36, "grad_norm": 0.462890625, "learning_rate": 0.0004974734986039204, "loss": 0.2248, "step": 32900 }, { "epoch": 1.36, "grad_norm": 0.71484375, "learning_rate": 0.0004974719604214904, "loss": 0.2357, "step": 32910 }, { "epoch": 1.36, "grad_norm": 0.77734375, "learning_rate": 0.0004974704217733446, "loss": 0.2727, "step": 32920 }, { "epoch": 1.36, "grad_norm": 0.78125, "learning_rate": 0.0004974688826594865, "loss": 0.2314, "step": 32930 }, { "epoch": 1.36, "grad_norm": 0.2314453125, "learning_rate": 0.0004974673430799185, "loss": 0.2381, "step": 32940 }, { "epoch": 1.36, "grad_norm": 0.46875, "learning_rate": 0.0004974658030346438, "loss": 0.2457, "step": 32950 }, { "epoch": 1.37, "grad_norm": 0.80078125, "learning_rate": 0.000497464262523665, "loss": 0.2398, "step": 32960 }, { "epoch": 1.37, "grad_norm": 0.65625, "learning_rate": 0.0004974627215469853, "loss": 0.1927, "step": 32970 }, { "epoch": 1.37, "grad_norm": 0.43359375, "learning_rate": 0.0004974611801046075, "loss": 0.2194, "step": 32980 }, { "epoch": 1.37, "grad_norm": 0.91015625, "learning_rate": 0.0004974596381965344, "loss": 0.267, "step": 32990 }, { "epoch": 1.37, "grad_norm": 0.765625, "learning_rate": 0.000497458095822769, "loss": 0.2736, "step": 33000 }, { "epoch": 1.37, "grad_norm": 0.5078125, "learning_rate": 0.0004974565529833143, "loss": 0.2545, "step": 33010 }, { "epoch": 1.37, "grad_norm": 0.51953125, "learning_rate": 0.0004974550096781729, "loss": 0.1434, "step": 33020 }, { "epoch": 1.37, "grad_norm": 0.384765625, "learning_rate": 0.0004974534659073481, "loss": 0.254, "step": 33030 }, { "epoch": 1.37, "grad_norm": 1.0390625, "learning_rate": 0.0004974519216708425, "loss": 0.2121, "step": 33040 }, { "epoch": 1.37, "grad_norm": 0.34765625, "learning_rate": 0.0004974503769686592, "loss": 0.2612, "step": 33050 }, { "epoch": 1.37, "grad_norm": 0.75390625, "learning_rate": 0.0004974488318008008, "loss": 0.2154, "step": 33060 }, { "epoch": 1.37, "grad_norm": 0.76171875, "learning_rate": 0.0004974472861672706, "loss": 0.2612, "step": 33070 }, { "epoch": 1.37, "grad_norm": 0.59765625, "learning_rate": 0.0004974457400680713, "loss": 0.2274, "step": 33080 }, { "epoch": 1.37, "grad_norm": 0.404296875, "learning_rate": 0.0004974441935032059, "loss": 0.2118, "step": 33090 }, { "epoch": 1.37, "grad_norm": 0.53125, "learning_rate": 0.000497442646472677, "loss": 0.2302, "step": 33100 }, { "epoch": 1.37, "grad_norm": 0.310546875, "learning_rate": 0.000497441098976488, "loss": 0.1775, "step": 33110 }, { "epoch": 1.37, "grad_norm": 0.74609375, "learning_rate": 0.0004974395510146415, "loss": 0.2501, "step": 33120 }, { "epoch": 1.37, "grad_norm": 0.875, "learning_rate": 0.0004974380025871405, "loss": 0.2409, "step": 33130 }, { "epoch": 1.37, "grad_norm": 0.52734375, "learning_rate": 0.0004974364536939878, "loss": 0.2412, "step": 33140 }, { "epoch": 1.37, "grad_norm": 0.65625, "learning_rate": 0.0004974349043351865, "loss": 0.2513, "step": 33150 }, { "epoch": 1.37, "grad_norm": 0.74609375, "learning_rate": 0.0004974333545107395, "loss": 0.2502, "step": 33160 }, { "epoch": 1.37, "grad_norm": 0.88671875, "learning_rate": 0.0004974318042206495, "loss": 0.1957, "step": 33170 }, { "epoch": 1.37, "grad_norm": 1.171875, "learning_rate": 0.0004974302534649196, "loss": 0.2504, "step": 33180 }, { "epoch": 1.37, "grad_norm": 0.609375, "learning_rate": 0.0004974287022435528, "loss": 0.2253, "step": 33190 }, { "epoch": 1.38, "grad_norm": 0.7421875, "learning_rate": 0.0004974271505565519, "loss": 0.2018, "step": 33200 }, { "epoch": 1.38, "grad_norm": 1.9453125, "learning_rate": 0.0004974255984039196, "loss": 0.1939, "step": 33210 }, { "epoch": 1.38, "grad_norm": 0.9609375, "learning_rate": 0.0004974240457856592, "loss": 0.2707, "step": 33220 }, { "epoch": 1.38, "grad_norm": 0.78515625, "learning_rate": 0.0004974224927017736, "loss": 0.2298, "step": 33230 }, { "epoch": 1.38, "grad_norm": 0.3984375, "learning_rate": 0.0004974209391522653, "loss": 0.2173, "step": 33240 }, { "epoch": 1.38, "grad_norm": 0.140625, "learning_rate": 0.0004974193851371377, "loss": 0.3204, "step": 33250 }, { "epoch": 1.38, "grad_norm": 0.76953125, "learning_rate": 0.0004974178306563936, "loss": 0.2167, "step": 33260 }, { "epoch": 1.38, "grad_norm": 0.53125, "learning_rate": 0.0004974162757100356, "loss": 0.224, "step": 33270 }, { "epoch": 1.38, "grad_norm": 0.66015625, "learning_rate": 0.0004974147202980671, "loss": 0.2371, "step": 33280 }, { "epoch": 1.38, "grad_norm": 0.69921875, "learning_rate": 0.0004974131644204908, "loss": 0.1639, "step": 33290 }, { "epoch": 1.38, "grad_norm": 1.2578125, "learning_rate": 0.0004974116080773095, "loss": 0.1847, "step": 33300 }, { "epoch": 1.38, "grad_norm": 0.400390625, "learning_rate": 0.0004974100512685264, "loss": 0.2106, "step": 33310 }, { "epoch": 1.38, "grad_norm": 0.79296875, "learning_rate": 0.0004974084939941444, "loss": 0.2672, "step": 33320 }, { "epoch": 1.38, "grad_norm": 0.4921875, "learning_rate": 0.0004974069362541661, "loss": 0.1985, "step": 33330 }, { "epoch": 1.38, "grad_norm": 1.1015625, "learning_rate": 0.0004974053780485948, "loss": 0.2302, "step": 33340 }, { "epoch": 1.38, "grad_norm": 0.8671875, "learning_rate": 0.0004974038193774333, "loss": 0.3218, "step": 33350 }, { "epoch": 1.38, "grad_norm": 0.703125, "learning_rate": 0.0004974022602406844, "loss": 0.2386, "step": 33360 }, { "epoch": 1.38, "grad_norm": 0.81640625, "learning_rate": 0.0004974007006383513, "loss": 0.2612, "step": 33370 }, { "epoch": 1.38, "grad_norm": 0.6875, "learning_rate": 0.0004973991405704367, "loss": 0.2085, "step": 33380 }, { "epoch": 1.38, "grad_norm": 0.3671875, "learning_rate": 0.0004973975800369438, "loss": 0.2308, "step": 33390 }, { "epoch": 1.38, "grad_norm": 0.76953125, "learning_rate": 0.0004973960190378752, "loss": 0.2609, "step": 33400 }, { "epoch": 1.38, "grad_norm": 0.66015625, "learning_rate": 0.0004973944575732341, "loss": 0.215, "step": 33410 }, { "epoch": 1.38, "grad_norm": 0.546875, "learning_rate": 0.0004973928956430233, "loss": 0.2207, "step": 33420 }, { "epoch": 1.38, "grad_norm": 0.515625, "learning_rate": 0.0004973913332472458, "loss": 0.2002, "step": 33430 }, { "epoch": 1.39, "grad_norm": 0.443359375, "learning_rate": 0.0004973897703859046, "loss": 0.2622, "step": 33440 }, { "epoch": 1.39, "grad_norm": 0.40625, "learning_rate": 0.0004973882070590025, "loss": 0.1508, "step": 33450 }, { "epoch": 1.39, "grad_norm": 0.90234375, "learning_rate": 0.0004973866432665425, "loss": 0.2182, "step": 33460 }, { "epoch": 1.39, "grad_norm": 0.97265625, "learning_rate": 0.0004973850790085276, "loss": 0.2433, "step": 33470 }, { "epoch": 1.39, "grad_norm": 0.28125, "learning_rate": 0.0004973835142849607, "loss": 0.2446, "step": 33480 }, { "epoch": 1.39, "grad_norm": 0.73828125, "learning_rate": 0.0004973819490958446, "loss": 0.2571, "step": 33490 }, { "epoch": 1.39, "grad_norm": 0.8515625, "learning_rate": 0.0004973803834411825, "loss": 0.266, "step": 33500 }, { "epoch": 1.39, "grad_norm": 0.94140625, "learning_rate": 0.0004973788173209772, "loss": 0.2425, "step": 33510 }, { "epoch": 1.39, "grad_norm": 0.86328125, "learning_rate": 0.0004973772507352317, "loss": 0.2241, "step": 33520 }, { "epoch": 1.39, "grad_norm": 0.7421875, "learning_rate": 0.000497375683683949, "loss": 0.1952, "step": 33530 }, { "epoch": 1.39, "grad_norm": 0.1982421875, "learning_rate": 0.0004973741161671319, "loss": 0.254, "step": 33540 }, { "epoch": 1.39, "grad_norm": 0.765625, "learning_rate": 0.0004973725481847834, "loss": 0.2539, "step": 33550 }, { "epoch": 1.39, "grad_norm": 0.953125, "learning_rate": 0.0004973709797369065, "loss": 0.2909, "step": 33560 }, { "epoch": 1.39, "grad_norm": 0.58203125, "learning_rate": 0.0004973694108235041, "loss": 0.273, "step": 33570 }, { "epoch": 1.39, "grad_norm": 1.3671875, "learning_rate": 0.0004973678414445793, "loss": 0.2238, "step": 33580 }, { "epoch": 1.39, "grad_norm": 0.439453125, "learning_rate": 0.0004973662716001349, "loss": 0.2471, "step": 33590 }, { "epoch": 1.39, "grad_norm": 2.5625, "learning_rate": 0.0004973647012901739, "loss": 0.2268, "step": 33600 }, { "epoch": 1.39, "grad_norm": 1.625, "learning_rate": 0.0004973631305146991, "loss": 0.218, "step": 33610 }, { "epoch": 1.39, "grad_norm": 0.6640625, "learning_rate": 0.0004973615592737137, "loss": 0.2226, "step": 33620 }, { "epoch": 1.39, "grad_norm": 0.462890625, "learning_rate": 0.0004973599875672206, "loss": 0.2764, "step": 33630 }, { "epoch": 1.39, "grad_norm": 0.58203125, "learning_rate": 0.0004973584153952226, "loss": 0.1981, "step": 33640 }, { "epoch": 1.39, "grad_norm": 0.50390625, "learning_rate": 0.0004973568427577229, "loss": 0.198, "step": 33650 }, { "epoch": 1.39, "grad_norm": 0.51953125, "learning_rate": 0.0004973552696547242, "loss": 0.1861, "step": 33660 }, { "epoch": 1.39, "grad_norm": 0.65625, "learning_rate": 0.0004973536960862297, "loss": 0.2692, "step": 33670 }, { "epoch": 1.4, "grad_norm": 0.78515625, "learning_rate": 0.0004973521220522422, "loss": 0.2121, "step": 33680 }, { "epoch": 1.4, "grad_norm": 0.953125, "learning_rate": 0.0004973505475527648, "loss": 0.2738, "step": 33690 }, { "epoch": 1.4, "grad_norm": 0.79296875, "learning_rate": 0.0004973489725878003, "loss": 0.2514, "step": 33700 }, { "epoch": 1.4, "grad_norm": 0.93359375, "learning_rate": 0.0004973473971573518, "loss": 0.2225, "step": 33710 }, { "epoch": 1.4, "grad_norm": 0.54296875, "learning_rate": 0.0004973458212614222, "loss": 0.2376, "step": 33720 }, { "epoch": 1.4, "grad_norm": 0.5078125, "learning_rate": 0.0004973442449000145, "loss": 0.242, "step": 33730 }, { "epoch": 1.4, "grad_norm": 0.703125, "learning_rate": 0.0004973426680731315, "loss": 0.2222, "step": 33740 }, { "epoch": 1.4, "grad_norm": 1.4453125, "learning_rate": 0.0004973410907807764, "loss": 0.1971, "step": 33750 }, { "epoch": 1.4, "grad_norm": 2.046875, "learning_rate": 0.0004973395130229522, "loss": 0.2418, "step": 33760 }, { "epoch": 1.4, "grad_norm": 0.7265625, "learning_rate": 0.0004973379347996617, "loss": 0.2073, "step": 33770 }, { "epoch": 1.4, "grad_norm": 0.5234375, "learning_rate": 0.0004973363561109078, "loss": 0.1941, "step": 33780 }, { "epoch": 1.4, "grad_norm": 0.703125, "learning_rate": 0.0004973347769566936, "loss": 0.2276, "step": 33790 }, { "epoch": 1.4, "grad_norm": 0.41796875, "learning_rate": 0.0004973331973370221, "loss": 0.1788, "step": 33800 }, { "epoch": 1.4, "grad_norm": 0.875, "learning_rate": 0.0004973316172518962, "loss": 0.2064, "step": 33810 }, { "epoch": 1.4, "grad_norm": 0.625, "learning_rate": 0.000497330036701319, "loss": 0.2022, "step": 33820 }, { "epoch": 1.4, "grad_norm": 0.40625, "learning_rate": 0.0004973284556852933, "loss": 0.2576, "step": 33830 }, { "epoch": 1.4, "grad_norm": 1.0234375, "learning_rate": 0.0004973268742038222, "loss": 0.238, "step": 33840 }, { "epoch": 1.4, "grad_norm": 1.2265625, "learning_rate": 0.0004973252922569086, "loss": 0.2106, "step": 33850 }, { "epoch": 1.4, "grad_norm": 0.251953125, "learning_rate": 0.0004973237098445555, "loss": 0.2035, "step": 33860 }, { "epoch": 1.4, "grad_norm": 2.109375, "learning_rate": 0.0004973221269667659, "loss": 0.234, "step": 33870 }, { "epoch": 1.4, "grad_norm": 1.8125, "learning_rate": 0.0004973205436235428, "loss": 0.2361, "step": 33880 }, { "epoch": 1.4, "grad_norm": 0.60546875, "learning_rate": 0.0004973189598148891, "loss": 0.2585, "step": 33890 }, { "epoch": 1.4, "grad_norm": 0.62890625, "learning_rate": 0.0004973173755408078, "loss": 0.206, "step": 33900 }, { "epoch": 1.4, "grad_norm": 0.90234375, "learning_rate": 0.000497315790801302, "loss": 0.272, "step": 33910 }, { "epoch": 1.4, "grad_norm": 0.5, "learning_rate": 0.0004973142055963746, "loss": 0.2236, "step": 33920 }, { "epoch": 1.41, "grad_norm": 0.462890625, "learning_rate": 0.0004973126199260283, "loss": 0.2354, "step": 33930 }, { "epoch": 1.41, "grad_norm": 0.71484375, "learning_rate": 0.0004973110337902667, "loss": 0.2378, "step": 33940 }, { "epoch": 1.41, "grad_norm": 0.51953125, "learning_rate": 0.0004973094471890923, "loss": 0.2366, "step": 33950 }, { "epoch": 1.41, "grad_norm": 0.671875, "learning_rate": 0.0004973078601225082, "loss": 0.2232, "step": 33960 }, { "epoch": 1.41, "grad_norm": 0.51953125, "learning_rate": 0.0004973062725905174, "loss": 0.2387, "step": 33970 }, { "epoch": 1.41, "grad_norm": 1.296875, "learning_rate": 0.000497304684593123, "loss": 0.2257, "step": 33980 }, { "epoch": 1.41, "grad_norm": 0.83203125, "learning_rate": 0.0004973030961303279, "loss": 0.2441, "step": 33990 }, { "epoch": 1.41, "grad_norm": 0.47265625, "learning_rate": 0.000497301507202135, "loss": 0.2468, "step": 34000 }, { "epoch": 1.41, "grad_norm": 0.66796875, "learning_rate": 0.0004972999178085474, "loss": 0.2508, "step": 34010 }, { "epoch": 1.41, "grad_norm": 0.7109375, "learning_rate": 0.000497298327949568, "loss": 0.2095, "step": 34020 }, { "epoch": 1.41, "grad_norm": 0.40234375, "learning_rate": 0.0004972967376251999, "loss": 0.1829, "step": 34030 }, { "epoch": 1.41, "grad_norm": 1.1796875, "learning_rate": 0.0004972951468354461, "loss": 0.2092, "step": 34040 }, { "epoch": 1.41, "grad_norm": 0.55859375, "learning_rate": 0.0004972935555803094, "loss": 0.216, "step": 34050 }, { "epoch": 1.41, "grad_norm": 0.400390625, "learning_rate": 0.000497291963859793, "loss": 0.2133, "step": 34060 }, { "epoch": 1.41, "grad_norm": 0.7421875, "learning_rate": 0.0004972903716738999, "loss": 0.2427, "step": 34070 }, { "epoch": 1.41, "grad_norm": 0.80078125, "learning_rate": 0.000497288779022633, "loss": 0.2072, "step": 34080 }, { "epoch": 1.41, "grad_norm": 0.74609375, "learning_rate": 0.0004972871859059954, "loss": 0.2428, "step": 34090 }, { "epoch": 1.41, "grad_norm": 0.5078125, "learning_rate": 0.0004972855923239899, "loss": 0.1908, "step": 34100 }, { "epoch": 1.41, "grad_norm": 1.0390625, "learning_rate": 0.0004972839982766195, "loss": 0.2338, "step": 34110 }, { "epoch": 1.41, "grad_norm": 0.546875, "learning_rate": 0.0004972824037638875, "loss": 0.1912, "step": 34120 }, { "epoch": 1.41, "grad_norm": 3.125, "learning_rate": 0.0004972808087857967, "loss": 0.1916, "step": 34130 }, { "epoch": 1.41, "grad_norm": 0.89453125, "learning_rate": 0.0004972792133423501, "loss": 0.2105, "step": 34140 }, { "epoch": 1.41, "grad_norm": 0.61328125, "learning_rate": 0.0004972776174335508, "loss": 0.1574, "step": 34150 }, { "epoch": 1.41, "grad_norm": 0.73046875, "learning_rate": 0.0004972760210594016, "loss": 0.2259, "step": 34160 }, { "epoch": 1.42, "grad_norm": 0.703125, "learning_rate": 0.0004972744242199056, "loss": 0.2312, "step": 34170 }, { "epoch": 1.42, "grad_norm": 0.74609375, "learning_rate": 0.0004972728269150659, "loss": 0.2505, "step": 34180 }, { "epoch": 1.42, "grad_norm": 0.53125, "learning_rate": 0.0004972712291448856, "loss": 0.2035, "step": 34190 }, { "epoch": 1.42, "grad_norm": 0.7109375, "learning_rate": 0.0004972696309093673, "loss": 0.1633, "step": 34200 }, { "epoch": 1.42, "grad_norm": 1.5625, "learning_rate": 0.0004972680322085144, "loss": 0.2578, "step": 34210 }, { "epoch": 1.42, "grad_norm": 0.33984375, "learning_rate": 0.0004972664330423298, "loss": 0.2219, "step": 34220 }, { "epoch": 1.42, "grad_norm": 0.7578125, "learning_rate": 0.0004972648334108165, "loss": 0.2388, "step": 34230 }, { "epoch": 1.42, "grad_norm": 0.64453125, "learning_rate": 0.0004972632333139773, "loss": 0.179, "step": 34240 }, { "epoch": 1.42, "grad_norm": 0.82421875, "learning_rate": 0.0004972616327518155, "loss": 0.2271, "step": 34250 }, { "epoch": 1.42, "grad_norm": 0.60546875, "learning_rate": 0.000497260031724334, "loss": 0.3013, "step": 34260 }, { "epoch": 1.42, "grad_norm": 2.296875, "learning_rate": 0.0004972584302315358, "loss": 0.1855, "step": 34270 }, { "epoch": 1.42, "grad_norm": 0.341796875, "learning_rate": 0.000497256828273424, "loss": 0.2565, "step": 34280 }, { "epoch": 1.42, "grad_norm": 0.55078125, "learning_rate": 0.0004972552258500015, "loss": 0.1978, "step": 34290 }, { "epoch": 1.42, "grad_norm": 0.67578125, "learning_rate": 0.0004972536229612713, "loss": 0.1941, "step": 34300 }, { "epoch": 1.42, "grad_norm": 0.94921875, "learning_rate": 0.0004972520196072366, "loss": 0.2048, "step": 34310 }, { "epoch": 1.42, "grad_norm": 0.62890625, "learning_rate": 0.0004972504157879003, "loss": 0.2495, "step": 34320 }, { "epoch": 1.42, "grad_norm": 0.455078125, "learning_rate": 0.0004972488115032653, "loss": 0.2151, "step": 34330 }, { "epoch": 1.42, "grad_norm": 0.55859375, "learning_rate": 0.0004972472067533349, "loss": 0.2775, "step": 34340 }, { "epoch": 1.42, "grad_norm": 0.53125, "learning_rate": 0.0004972456015381119, "loss": 0.259, "step": 34350 }, { "epoch": 1.42, "grad_norm": 0.2060546875, "learning_rate": 0.0004972439958575993, "loss": 0.1731, "step": 34360 }, { "epoch": 1.42, "grad_norm": 0.75390625, "learning_rate": 0.0004972423897118002, "loss": 0.2107, "step": 34370 }, { "epoch": 1.42, "grad_norm": 0.66796875, "learning_rate": 0.0004972407831007178, "loss": 0.255, "step": 34380 }, { "epoch": 1.42, "grad_norm": 0.31640625, "learning_rate": 0.0004972391760243547, "loss": 0.2457, "step": 34390 }, { "epoch": 1.42, "grad_norm": 0.92578125, "learning_rate": 0.0004972375684827144, "loss": 0.2535, "step": 34400 }, { "epoch": 1.43, "grad_norm": 0.453125, "learning_rate": 0.0004972359604757995, "loss": 0.1854, "step": 34410 }, { "epoch": 1.43, "grad_norm": 1.328125, "learning_rate": 0.0004972343520036134, "loss": 0.2135, "step": 34420 }, { "epoch": 1.43, "grad_norm": 1.078125, "learning_rate": 0.0004972327430661589, "loss": 0.2137, "step": 34430 }, { "epoch": 1.43, "grad_norm": 0.2197265625, "learning_rate": 0.0004972311336634389, "loss": 0.25, "step": 34440 }, { "epoch": 1.43, "grad_norm": 0.6171875, "learning_rate": 0.0004972295237954567, "loss": 0.2761, "step": 34450 }, { "epoch": 1.43, "grad_norm": 0.83203125, "learning_rate": 0.0004972279134622153, "loss": 0.2822, "step": 34460 }, { "epoch": 1.43, "grad_norm": 0.71484375, "learning_rate": 0.0004972263026637177, "loss": 0.207, "step": 34470 }, { "epoch": 1.43, "grad_norm": 0.47265625, "learning_rate": 0.0004972246913999669, "loss": 0.24, "step": 34480 }, { "epoch": 1.43, "grad_norm": 0.462890625, "learning_rate": 0.0004972230796709658, "loss": 0.1965, "step": 34490 }, { "epoch": 1.43, "grad_norm": 0.453125, "learning_rate": 0.0004972214674767175, "loss": 0.2433, "step": 34500 }, { "epoch": 1.43, "grad_norm": 0.61328125, "learning_rate": 0.0004972198548172253, "loss": 0.1926, "step": 34510 }, { "epoch": 1.43, "grad_norm": 0.498046875, "learning_rate": 0.000497218241692492, "loss": 0.2038, "step": 34520 }, { "epoch": 1.43, "grad_norm": 0.498046875, "learning_rate": 0.0004972166281025206, "loss": 0.2651, "step": 34530 }, { "epoch": 1.43, "grad_norm": 2.359375, "learning_rate": 0.0004972150140473143, "loss": 0.2405, "step": 34540 }, { "epoch": 1.43, "grad_norm": 0.216796875, "learning_rate": 0.0004972133995268759, "loss": 0.2277, "step": 34550 }, { "epoch": 1.43, "grad_norm": 1.9453125, "learning_rate": 0.0004972117845412086, "loss": 0.2697, "step": 34560 }, { "epoch": 1.43, "grad_norm": 0.4921875, "learning_rate": 0.0004972101690903154, "loss": 0.2268, "step": 34570 }, { "epoch": 1.43, "grad_norm": 0.90234375, "learning_rate": 0.0004972085531741995, "loss": 0.2333, "step": 34580 }, { "epoch": 1.43, "grad_norm": 0.43359375, "learning_rate": 0.0004972069367928638, "loss": 0.2426, "step": 34590 }, { "epoch": 1.43, "grad_norm": 1.0703125, "learning_rate": 0.0004972053199463112, "loss": 0.2353, "step": 34600 }, { "epoch": 1.43, "grad_norm": 1.21875, "learning_rate": 0.0004972037026345449, "loss": 0.2012, "step": 34610 }, { "epoch": 1.43, "grad_norm": 0.23828125, "learning_rate": 0.000497202084857568, "loss": 0.2463, "step": 34620 }, { "epoch": 1.43, "grad_norm": 0.75390625, "learning_rate": 0.0004972004666153834, "loss": 0.2445, "step": 34630 }, { "epoch": 1.43, "grad_norm": 0.40234375, "learning_rate": 0.0004971988479079943, "loss": 0.2766, "step": 34640 }, { "epoch": 1.44, "grad_norm": 0.82421875, "learning_rate": 0.0004971972287354037, "loss": 0.2183, "step": 34650 }, { "epoch": 1.44, "grad_norm": 0.61328125, "learning_rate": 0.0004971956090976144, "loss": 0.1914, "step": 34660 }, { "epoch": 1.44, "grad_norm": 0.2490234375, "learning_rate": 0.0004971939889946298, "loss": 0.2588, "step": 34670 }, { "epoch": 1.44, "grad_norm": 0.76953125, "learning_rate": 0.0004971923684264529, "loss": 0.1932, "step": 34680 }, { "epoch": 1.44, "grad_norm": 0.94921875, "learning_rate": 0.0004971907473930865, "loss": 0.229, "step": 34690 }, { "epoch": 1.44, "grad_norm": 0.69921875, "learning_rate": 0.0004971891258945338, "loss": 0.2208, "step": 34700 }, { "epoch": 1.44, "grad_norm": 0.97265625, "learning_rate": 0.000497187503930798, "loss": 0.2328, "step": 34710 }, { "epoch": 1.44, "grad_norm": 0.54296875, "learning_rate": 0.0004971858815018819, "loss": 0.2035, "step": 34720 }, { "epoch": 1.44, "grad_norm": 0.8203125, "learning_rate": 0.0004971842586077887, "loss": 0.196, "step": 34730 }, { "epoch": 1.44, "grad_norm": 0.58984375, "learning_rate": 0.0004971826352485214, "loss": 0.2227, "step": 34740 }, { "epoch": 1.44, "grad_norm": 0.462890625, "learning_rate": 0.0004971810114240831, "loss": 0.1942, "step": 34750 }, { "epoch": 1.44, "grad_norm": 0.546875, "learning_rate": 0.0004971793871344769, "loss": 0.2463, "step": 34760 }, { "epoch": 1.44, "grad_norm": 0.7421875, "learning_rate": 0.0004971777623797057, "loss": 0.2441, "step": 34770 }, { "epoch": 1.44, "grad_norm": 0.58984375, "learning_rate": 0.0004971761371597726, "loss": 0.2323, "step": 34780 }, { "epoch": 1.44, "grad_norm": 0.51953125, "learning_rate": 0.0004971745114746807, "loss": 0.2159, "step": 34790 }, { "epoch": 1.44, "grad_norm": 0.341796875, "learning_rate": 0.0004971728853244332, "loss": 0.2276, "step": 34800 }, { "epoch": 1.44, "grad_norm": 0.78125, "learning_rate": 0.000497171258709033, "loss": 0.3041, "step": 34810 }, { "epoch": 1.44, "grad_norm": 0.81640625, "learning_rate": 0.000497169631628483, "loss": 0.1959, "step": 34820 }, { "epoch": 1.44, "grad_norm": 0.74609375, "learning_rate": 0.0004971680040827866, "loss": 0.2056, "step": 34830 }, { "epoch": 1.44, "grad_norm": 2.46875, "learning_rate": 0.0004971663760719468, "loss": 0.2596, "step": 34840 }, { "epoch": 1.44, "grad_norm": 0.44921875, "learning_rate": 0.0004971647475959663, "loss": 0.1424, "step": 34850 }, { "epoch": 1.44, "grad_norm": 0.46484375, "learning_rate": 0.0004971631186548485, "loss": 0.2479, "step": 34860 }, { "epoch": 1.44, "grad_norm": 0.65234375, "learning_rate": 0.0004971614892485966, "loss": 0.2249, "step": 34870 }, { "epoch": 1.44, "grad_norm": 0.3984375, "learning_rate": 0.0004971598593772133, "loss": 0.2682, "step": 34880 }, { "epoch": 1.45, "grad_norm": 1.0546875, "learning_rate": 0.0004971582290407019, "loss": 0.2116, "step": 34890 }, { "epoch": 1.45, "grad_norm": 0.404296875, "learning_rate": 0.0004971565982390655, "loss": 0.2536, "step": 34900 }, { "epoch": 1.45, "grad_norm": 0.494140625, "learning_rate": 0.0004971549669723069, "loss": 0.2457, "step": 34910 }, { "epoch": 1.45, "grad_norm": 0.435546875, "learning_rate": 0.0004971533352404293, "loss": 0.2349, "step": 34920 }, { "epoch": 1.45, "grad_norm": 0.5234375, "learning_rate": 0.000497151703043436, "loss": 0.237, "step": 34930 }, { "epoch": 1.45, "grad_norm": 0.65625, "learning_rate": 0.0004971500703813298, "loss": 0.2297, "step": 34940 }, { "epoch": 1.45, "grad_norm": 1.5, "learning_rate": 0.0004971484372541139, "loss": 0.2623, "step": 34950 }, { "epoch": 1.45, "grad_norm": 0.55078125, "learning_rate": 0.0004971468036617913, "loss": 0.1688, "step": 34960 }, { "epoch": 1.45, "grad_norm": 0.54296875, "learning_rate": 0.0004971451696043651, "loss": 0.2036, "step": 34970 }, { "epoch": 1.45, "grad_norm": 0.671875, "learning_rate": 0.0004971435350818383, "loss": 0.2725, "step": 34980 }, { "epoch": 1.45, "grad_norm": 0.61328125, "learning_rate": 0.0004971419000942142, "loss": 0.2261, "step": 34990 }, { "epoch": 1.45, "grad_norm": 1.3671875, "learning_rate": 0.0004971402646414956, "loss": 0.2527, "step": 35000 }, { "epoch": 1.45, "grad_norm": 1.1171875, "learning_rate": 0.0004971386287236859, "loss": 0.1944, "step": 35010 }, { "epoch": 1.45, "grad_norm": 0.392578125, "learning_rate": 0.0004971369923407878, "loss": 0.2435, "step": 35020 }, { "epoch": 1.45, "grad_norm": 2.25, "learning_rate": 0.0004971353554928047, "loss": 0.2256, "step": 35030 }, { "epoch": 1.45, "grad_norm": 0.54296875, "learning_rate": 0.0004971337181797394, "loss": 0.2237, "step": 35040 }, { "epoch": 1.45, "grad_norm": 0.66796875, "learning_rate": 0.0004971320804015953, "loss": 0.2559, "step": 35050 }, { "epoch": 1.45, "grad_norm": 0.8984375, "learning_rate": 0.0004971304421583753, "loss": 0.2025, "step": 35060 }, { "epoch": 1.45, "grad_norm": 0.66015625, "learning_rate": 0.0004971288034500823, "loss": 0.2184, "step": 35070 }, { "epoch": 1.45, "grad_norm": 0.6484375, "learning_rate": 0.0004971271642767197, "loss": 0.2229, "step": 35080 }, { "epoch": 1.45, "grad_norm": 1.09375, "learning_rate": 0.0004971255246382904, "loss": 0.1613, "step": 35090 }, { "epoch": 1.45, "grad_norm": 0.78125, "learning_rate": 0.0004971238845347978, "loss": 0.2497, "step": 35100 }, { "epoch": 1.45, "grad_norm": 0.80078125, "learning_rate": 0.0004971222439662445, "loss": 0.1692, "step": 35110 }, { "epoch": 1.45, "grad_norm": 0.67578125, "learning_rate": 0.0004971206029326338, "loss": 0.2397, "step": 35120 }, { "epoch": 1.46, "grad_norm": 0.6171875, "learning_rate": 0.000497118961433969, "loss": 0.2188, "step": 35130 }, { "epoch": 1.46, "grad_norm": 0.54296875, "learning_rate": 0.0004971173194702528, "loss": 0.2466, "step": 35140 }, { "epoch": 1.46, "grad_norm": 0.8046875, "learning_rate": 0.0004971156770414886, "loss": 0.2407, "step": 35150 }, { "epoch": 1.46, "grad_norm": 1.0390625, "learning_rate": 0.0004971140341476794, "loss": 0.2371, "step": 35160 }, { "epoch": 1.46, "grad_norm": 0.67578125, "learning_rate": 0.0004971123907888282, "loss": 0.2381, "step": 35170 }, { "epoch": 1.46, "grad_norm": 0.478515625, "learning_rate": 0.0004971107469649382, "loss": 0.1977, "step": 35180 }, { "epoch": 1.46, "grad_norm": 0.498046875, "learning_rate": 0.0004971091026760125, "loss": 0.2276, "step": 35190 }, { "epoch": 1.46, "grad_norm": 0.70703125, "learning_rate": 0.0004971074579220541, "loss": 0.2475, "step": 35200 }, { "epoch": 1.46, "grad_norm": 0.6484375, "learning_rate": 0.0004971058127030662, "loss": 0.2072, "step": 35210 }, { "epoch": 1.46, "grad_norm": 0.640625, "learning_rate": 0.0004971041670190519, "loss": 0.2476, "step": 35220 }, { "epoch": 1.46, "grad_norm": 0.625, "learning_rate": 0.000497102520870014, "loss": 0.2155, "step": 35230 }, { "epoch": 1.46, "grad_norm": 0.6875, "learning_rate": 0.000497100874255956, "loss": 0.2769, "step": 35240 }, { "epoch": 1.46, "grad_norm": 0.34375, "learning_rate": 0.0004970992271768809, "loss": 0.1927, "step": 35250 }, { "epoch": 1.46, "grad_norm": 0.30078125, "learning_rate": 0.0004970975796327917, "loss": 0.2523, "step": 35260 }, { "epoch": 1.46, "grad_norm": 0.953125, "learning_rate": 0.0004970959316236915, "loss": 0.2251, "step": 35270 }, { "epoch": 1.46, "grad_norm": 0.384765625, "learning_rate": 0.0004970942831495834, "loss": 0.238, "step": 35280 }, { "epoch": 1.46, "grad_norm": 0.63671875, "learning_rate": 0.0004970926342104706, "loss": 0.2836, "step": 35290 }, { "epoch": 1.46, "grad_norm": 0.7578125, "learning_rate": 0.0004970909848063562, "loss": 0.2309, "step": 35300 }, { "epoch": 1.46, "grad_norm": 0.44140625, "learning_rate": 0.0004970893349372431, "loss": 0.2853, "step": 35310 }, { "epoch": 1.46, "grad_norm": 0.66796875, "learning_rate": 0.0004970876846031347, "loss": 0.228, "step": 35320 }, { "epoch": 1.46, "grad_norm": 0.302734375, "learning_rate": 0.0004970860338040339, "loss": 0.2123, "step": 35330 }, { "epoch": 1.46, "grad_norm": 0.73828125, "learning_rate": 0.0004970843825399439, "loss": 0.1809, "step": 35340 }, { "epoch": 1.46, "grad_norm": 1.390625, "learning_rate": 0.0004970827308108677, "loss": 0.2515, "step": 35350 }, { "epoch": 1.46, "grad_norm": 0.97265625, "learning_rate": 0.0004970810786168086, "loss": 0.2247, "step": 35360 }, { "epoch": 1.47, "grad_norm": 0.75, "learning_rate": 0.0004970794259577696, "loss": 0.2446, "step": 35370 }, { "epoch": 1.47, "grad_norm": 0.60546875, "learning_rate": 0.0004970777728337537, "loss": 0.2018, "step": 35380 }, { "epoch": 1.47, "grad_norm": 0.46484375, "learning_rate": 0.0004970761192447642, "loss": 0.2285, "step": 35390 }, { "epoch": 1.47, "grad_norm": 0.486328125, "learning_rate": 0.000497074465190804, "loss": 0.2045, "step": 35400 }, { "epoch": 1.47, "grad_norm": 1.5, "learning_rate": 0.0004970728106718764, "loss": 0.2028, "step": 35410 }, { "epoch": 1.47, "grad_norm": 0.52734375, "learning_rate": 0.0004970711556879844, "loss": 0.2483, "step": 35420 }, { "epoch": 1.47, "grad_norm": 1.5234375, "learning_rate": 0.0004970695002391313, "loss": 0.2751, "step": 35430 }, { "epoch": 1.47, "grad_norm": 0.828125, "learning_rate": 0.00049706784432532, "loss": 0.2645, "step": 35440 }, { "epoch": 1.47, "grad_norm": 0.69921875, "learning_rate": 0.0004970661879465537, "loss": 0.2082, "step": 35450 }, { "epoch": 1.47, "grad_norm": 0.59765625, "learning_rate": 0.0004970645311028355, "loss": 0.2525, "step": 35460 }, { "epoch": 1.47, "grad_norm": 1.0078125, "learning_rate": 0.0004970628737941686, "loss": 0.3155, "step": 35470 }, { "epoch": 1.47, "grad_norm": 0.53125, "learning_rate": 0.000497061216020556, "loss": 0.2763, "step": 35480 }, { "epoch": 1.47, "grad_norm": 0.58203125, "learning_rate": 0.0004970595577820008, "loss": 0.2597, "step": 35490 }, { "epoch": 1.47, "grad_norm": 0.71484375, "learning_rate": 0.0004970578990785063, "loss": 0.2336, "step": 35500 }, { "epoch": 1.47, "grad_norm": 0.376953125, "learning_rate": 0.0004970562399100756, "loss": 0.2178, "step": 35510 }, { "epoch": 1.47, "grad_norm": 0.33984375, "learning_rate": 0.0004970545802767116, "loss": 0.1701, "step": 35520 }, { "epoch": 1.47, "grad_norm": 0.33203125, "learning_rate": 0.0004970529201784175, "loss": 0.2332, "step": 35530 }, { "epoch": 1.47, "grad_norm": 0.8046875, "learning_rate": 0.0004970512596151966, "loss": 0.2382, "step": 35540 }, { "epoch": 1.47, "grad_norm": 0.98046875, "learning_rate": 0.0004970495985870519, "loss": 0.3322, "step": 35550 }, { "epoch": 1.47, "grad_norm": 0.51953125, "learning_rate": 0.0004970479370939864, "loss": 0.2165, "step": 35560 }, { "epoch": 1.47, "grad_norm": 0.58203125, "learning_rate": 0.0004970462751360035, "loss": 0.2535, "step": 35570 }, { "epoch": 1.47, "grad_norm": 1.046875, "learning_rate": 0.0004970446127131061, "loss": 0.1974, "step": 35580 }, { "epoch": 1.47, "grad_norm": 0.76953125, "learning_rate": 0.0004970429498252976, "loss": 0.2901, "step": 35590 }, { "epoch": 1.47, "grad_norm": 0.38671875, "learning_rate": 0.0004970412864725808, "loss": 0.1826, "step": 35600 }, { "epoch": 1.47, "grad_norm": 0.484375, "learning_rate": 0.000497039622654959, "loss": 0.2422, "step": 35610 }, { "epoch": 1.48, "grad_norm": 0.48828125, "learning_rate": 0.0004970379583724352, "loss": 0.1997, "step": 35620 }, { "epoch": 1.48, "grad_norm": 0.259765625, "learning_rate": 0.0004970362936250128, "loss": 0.1904, "step": 35630 }, { "epoch": 1.48, "grad_norm": 1.609375, "learning_rate": 0.0004970346284126947, "loss": 0.2623, "step": 35640 }, { "epoch": 1.48, "grad_norm": 0.50390625, "learning_rate": 0.0004970329627354842, "loss": 0.2432, "step": 35650 }, { "epoch": 1.48, "grad_norm": 0.76171875, "learning_rate": 0.0004970312965933843, "loss": 0.2873, "step": 35660 }, { "epoch": 1.48, "grad_norm": 0.609375, "learning_rate": 0.0004970296299863981, "loss": 0.2219, "step": 35670 }, { "epoch": 1.48, "grad_norm": 0.1884765625, "learning_rate": 0.0004970279629145289, "loss": 0.1834, "step": 35680 }, { "epoch": 1.48, "grad_norm": 0.79296875, "learning_rate": 0.0004970262953777797, "loss": 0.2484, "step": 35690 }, { "epoch": 1.48, "grad_norm": 1.5703125, "learning_rate": 0.0004970246273761536, "loss": 0.1831, "step": 35700 }, { "epoch": 1.48, "grad_norm": 0.33203125, "learning_rate": 0.000497022958909654, "loss": 0.2141, "step": 35710 }, { "epoch": 1.48, "grad_norm": 0.396484375, "learning_rate": 0.0004970212899782837, "loss": 0.2746, "step": 35720 }, { "epoch": 1.48, "grad_norm": 0.96875, "learning_rate": 0.0004970196205820462, "loss": 0.246, "step": 35730 }, { "epoch": 1.48, "grad_norm": 0.39453125, "learning_rate": 0.0004970179507209443, "loss": 0.181, "step": 35740 }, { "epoch": 1.48, "grad_norm": 0.71484375, "learning_rate": 0.0004970162803949814, "loss": 0.238, "step": 35750 }, { "epoch": 1.48, "grad_norm": 0.380859375, "learning_rate": 0.0004970146096041605, "loss": 0.2028, "step": 35760 }, { "epoch": 1.48, "grad_norm": 0.435546875, "learning_rate": 0.0004970129383484848, "loss": 0.2708, "step": 35770 }, { "epoch": 1.48, "grad_norm": 0.34375, "learning_rate": 0.0004970112666279573, "loss": 0.1731, "step": 35780 }, { "epoch": 1.48, "grad_norm": 0.78125, "learning_rate": 0.0004970095944425814, "loss": 0.2642, "step": 35790 }, { "epoch": 1.48, "grad_norm": 0.625, "learning_rate": 0.00049700792179236, "loss": 0.2619, "step": 35800 }, { "epoch": 1.48, "grad_norm": 0.404296875, "learning_rate": 0.0004970062486772965, "loss": 0.2, "step": 35810 }, { "epoch": 1.48, "grad_norm": 1.3515625, "learning_rate": 0.0004970045750973939, "loss": 0.2821, "step": 35820 }, { "epoch": 1.48, "grad_norm": 0.578125, "learning_rate": 0.0004970029010526552, "loss": 0.2358, "step": 35830 }, { "epoch": 1.48, "grad_norm": 1.9453125, "learning_rate": 0.0004970012265430838, "loss": 0.2242, "step": 35840 }, { "epoch": 1.48, "grad_norm": 0.5, "learning_rate": 0.0004969995515686829, "loss": 0.2079, "step": 35850 }, { "epoch": 1.49, "grad_norm": 0.4921875, "learning_rate": 0.0004969978761294554, "loss": 0.2329, "step": 35860 }, { "epoch": 1.49, "grad_norm": 0.6953125, "learning_rate": 0.0004969962002254046, "loss": 0.2288, "step": 35870 }, { "epoch": 1.49, "grad_norm": 0.703125, "learning_rate": 0.0004969945238565336, "loss": 0.1897, "step": 35880 }, { "epoch": 1.49, "grad_norm": 0.6015625, "learning_rate": 0.0004969928470228456, "loss": 0.2305, "step": 35890 }, { "epoch": 1.49, "grad_norm": 0.392578125, "learning_rate": 0.0004969911697243437, "loss": 0.2042, "step": 35900 }, { "epoch": 1.49, "grad_norm": 0.458984375, "learning_rate": 0.0004969894919610311, "loss": 0.2139, "step": 35910 }, { "epoch": 1.49, "grad_norm": 0.353515625, "learning_rate": 0.000496987813732911, "loss": 0.223, "step": 35920 }, { "epoch": 1.49, "grad_norm": 0.0, "learning_rate": 0.0004969861350399865, "loss": 0.2379, "step": 35930 }, { "epoch": 1.49, "grad_norm": 0.39453125, "learning_rate": 0.0004969844558822607, "loss": 0.2024, "step": 35940 }, { "epoch": 1.49, "grad_norm": 0.443359375, "learning_rate": 0.0004969827762597369, "loss": 0.1963, "step": 35950 }, { "epoch": 1.49, "grad_norm": 0.84765625, "learning_rate": 0.0004969810961724181, "loss": 0.2576, "step": 35960 }, { "epoch": 1.49, "grad_norm": 0.357421875, "learning_rate": 0.0004969794156203075, "loss": 0.2436, "step": 35970 }, { "epoch": 1.49, "grad_norm": 1.1875, "learning_rate": 0.0004969777346034086, "loss": 0.2286, "step": 35980 }, { "epoch": 1.49, "grad_norm": 0.68359375, "learning_rate": 0.000496976053121724, "loss": 0.2788, "step": 35990 }, { "epoch": 1.49, "grad_norm": 0.29296875, "learning_rate": 0.0004969743711752573, "loss": 0.244, "step": 36000 }, { "epoch": 1.49, "grad_norm": 0.76953125, "learning_rate": 0.0004969726887640114, "loss": 0.2035, "step": 36010 }, { "epoch": 1.49, "grad_norm": 0.8828125, "learning_rate": 0.0004969710058879896, "loss": 0.252, "step": 36020 }, { "epoch": 1.49, "grad_norm": 1.375, "learning_rate": 0.000496969322547195, "loss": 0.2548, "step": 36030 }, { "epoch": 1.49, "grad_norm": 1.171875, "learning_rate": 0.0004969676387416308, "loss": 0.2445, "step": 36040 }, { "epoch": 1.49, "grad_norm": 0.59765625, "learning_rate": 0.0004969659544713002, "loss": 0.2681, "step": 36050 }, { "epoch": 1.49, "grad_norm": 1.1953125, "learning_rate": 0.0004969642697362064, "loss": 0.2438, "step": 36060 }, { "epoch": 1.49, "grad_norm": 0.470703125, "learning_rate": 0.0004969625845363525, "loss": 0.2305, "step": 36070 }, { "epoch": 1.49, "grad_norm": 0.498046875, "learning_rate": 0.0004969608988717416, "loss": 0.2489, "step": 36080 }, { "epoch": 1.49, "grad_norm": 0.2578125, "learning_rate": 0.0004969592127423772, "loss": 0.2736, "step": 36090 }, { "epoch": 1.5, "grad_norm": 0.453125, "learning_rate": 0.0004969575261482619, "loss": 0.231, "step": 36100 }, { "epoch": 1.5, "grad_norm": 1.1953125, "learning_rate": 0.0004969558390893993, "loss": 0.1996, "step": 36110 }, { "epoch": 1.5, "grad_norm": 0.353515625, "learning_rate": 0.0004969541515657925, "loss": 0.1583, "step": 36120 }, { "epoch": 1.5, "grad_norm": 0.84765625, "learning_rate": 0.0004969524635774448, "loss": 0.2699, "step": 36130 }, { "epoch": 1.5, "grad_norm": 0.5546875, "learning_rate": 0.0004969507751243591, "loss": 0.2391, "step": 36140 }, { "epoch": 1.5, "grad_norm": 0.4921875, "learning_rate": 0.0004969490862065388, "loss": 0.2557, "step": 36150 }, { "epoch": 1.5, "grad_norm": 0.498046875, "learning_rate": 0.0004969473968239868, "loss": 0.2922, "step": 36160 }, { "epoch": 1.5, "grad_norm": 0.67578125, "learning_rate": 0.0004969457069767066, "loss": 0.2295, "step": 36170 }, { "epoch": 1.5, "grad_norm": 0.2578125, "learning_rate": 0.0004969440166647012, "loss": 0.191, "step": 36180 }, { "epoch": 1.5, "grad_norm": 1.5703125, "learning_rate": 0.0004969423258879739, "loss": 0.2075, "step": 36190 }, { "epoch": 1.5, "grad_norm": 1.046875, "learning_rate": 0.0004969406346465278, "loss": 0.2522, "step": 36200 }, { "epoch": 1.5, "grad_norm": 1.0078125, "learning_rate": 0.000496938942940366, "loss": 0.262, "step": 36210 }, { "epoch": 1.5, "grad_norm": 0.83203125, "learning_rate": 0.0004969372507694919, "loss": 0.1914, "step": 36220 }, { "epoch": 1.5, "grad_norm": 0.8984375, "learning_rate": 0.0004969355581339086, "loss": 0.2356, "step": 36230 }, { "epoch": 1.5, "grad_norm": 0.828125, "learning_rate": 0.000496933865033619, "loss": 0.2753, "step": 36240 }, { "epoch": 1.5, "grad_norm": 0.6015625, "learning_rate": 0.0004969321714686267, "loss": 0.2507, "step": 36250 }, { "epoch": 1.5, "grad_norm": 1.140625, "learning_rate": 0.0004969304774389347, "loss": 0.2512, "step": 36260 }, { "epoch": 1.5, "grad_norm": 0.72265625, "learning_rate": 0.0004969287829445462, "loss": 0.1798, "step": 36270 }, { "epoch": 1.5, "grad_norm": 1.109375, "learning_rate": 0.0004969270879854644, "loss": 0.2185, "step": 36280 }, { "epoch": 1.5, "grad_norm": 0.64453125, "learning_rate": 0.0004969253925616925, "loss": 0.2104, "step": 36290 }, { "epoch": 1.5, "grad_norm": 0.69140625, "learning_rate": 0.0004969236966732337, "loss": 0.2116, "step": 36300 }, { "epoch": 1.5, "grad_norm": 0.96875, "learning_rate": 0.0004969220003200912, "loss": 0.2689, "step": 36310 }, { "epoch": 1.5, "grad_norm": 1.828125, "learning_rate": 0.000496920303502268, "loss": 0.2033, "step": 36320 }, { "epoch": 1.5, "grad_norm": 0.765625, "learning_rate": 0.0004969186062197676, "loss": 0.2434, "step": 36330 }, { "epoch": 1.51, "grad_norm": 0.5703125, "learning_rate": 0.000496916908472593, "loss": 0.2484, "step": 36340 }, { "epoch": 1.51, "grad_norm": 0.5, "learning_rate": 0.0004969152102607474, "loss": 0.2215, "step": 36350 }, { "epoch": 1.51, "grad_norm": 0.63671875, "learning_rate": 0.000496913511584234, "loss": 0.223, "step": 36360 }, { "epoch": 1.51, "grad_norm": 0.12353515625, "learning_rate": 0.0004969118124430561, "loss": 0.2212, "step": 36370 }, { "epoch": 1.51, "grad_norm": 0.1689453125, "learning_rate": 0.000496910112837217, "loss": 0.2423, "step": 36380 }, { "epoch": 1.51, "grad_norm": 1.5, "learning_rate": 0.0004969084127667195, "loss": 0.2109, "step": 36390 }, { "epoch": 1.51, "grad_norm": 0.5, "learning_rate": 0.0004969067122315671, "loss": 0.2117, "step": 36400 }, { "epoch": 1.51, "grad_norm": 0.68359375, "learning_rate": 0.000496905011231763, "loss": 0.1734, "step": 36410 }, { "epoch": 1.51, "grad_norm": 0.322265625, "learning_rate": 0.0004969033097673103, "loss": 0.2013, "step": 36420 }, { "epoch": 1.51, "grad_norm": 0.890625, "learning_rate": 0.0004969016078382122, "loss": 0.2323, "step": 36430 }, { "epoch": 1.51, "grad_norm": 0.38671875, "learning_rate": 0.000496899905444472, "loss": 0.2059, "step": 36440 }, { "epoch": 1.51, "grad_norm": 1.328125, "learning_rate": 0.0004968982025860927, "loss": 0.2723, "step": 36450 }, { "epoch": 1.51, "grad_norm": 0.83984375, "learning_rate": 0.0004968964992630777, "loss": 0.2475, "step": 36460 }, { "epoch": 1.51, "grad_norm": 0.78515625, "learning_rate": 0.0004968947954754302, "loss": 0.2396, "step": 36470 }, { "epoch": 1.51, "grad_norm": 0.609375, "learning_rate": 0.0004968930912231534, "loss": 0.2173, "step": 36480 }, { "epoch": 1.51, "grad_norm": 0.404296875, "learning_rate": 0.0004968913865062504, "loss": 0.2017, "step": 36490 }, { "epoch": 1.51, "grad_norm": 0.361328125, "learning_rate": 0.0004968896813247244, "loss": 0.292, "step": 36500 }, { "epoch": 1.51, "grad_norm": 0.70703125, "learning_rate": 0.0004968879756785788, "loss": 0.2642, "step": 36510 }, { "epoch": 1.51, "grad_norm": 0.6015625, "learning_rate": 0.0004968862695678166, "loss": 0.2348, "step": 36520 }, { "epoch": 1.51, "grad_norm": 0.94140625, "learning_rate": 0.0004968845629924412, "loss": 0.1444, "step": 36530 }, { "epoch": 1.51, "grad_norm": 1.0234375, "learning_rate": 0.0004968828559524556, "loss": 0.2514, "step": 36540 }, { "epoch": 1.51, "grad_norm": 0.55859375, "learning_rate": 0.0004968811484478633, "loss": 0.2797, "step": 36550 }, { "epoch": 1.51, "grad_norm": 0.88671875, "learning_rate": 0.0004968794404786672, "loss": 0.2657, "step": 36560 }, { "epoch": 1.51, "grad_norm": 0.89453125, "learning_rate": 0.0004968777320448706, "loss": 0.2141, "step": 36570 }, { "epoch": 1.52, "grad_norm": 0.4453125, "learning_rate": 0.000496876023146477, "loss": 0.2147, "step": 36580 }, { "epoch": 1.52, "grad_norm": 0.6328125, "learning_rate": 0.0004968743137834891, "loss": 0.2236, "step": 36590 }, { "epoch": 1.52, "grad_norm": 0.25, "learning_rate": 0.0004968726039559107, "loss": 0.2681, "step": 36600 }, { "epoch": 1.52, "grad_norm": 0.546875, "learning_rate": 0.0004968708936637445, "loss": 0.1938, "step": 36610 }, { "epoch": 1.52, "grad_norm": 0.41015625, "learning_rate": 0.000496869182906994, "loss": 0.2323, "step": 36620 }, { "epoch": 1.52, "grad_norm": 0.8515625, "learning_rate": 0.0004968674716856623, "loss": 0.2188, "step": 36630 }, { "epoch": 1.52, "grad_norm": 0.294921875, "learning_rate": 0.0004968657599997528, "loss": 0.2, "step": 36640 }, { "epoch": 1.52, "grad_norm": 0.8203125, "learning_rate": 0.0004968640478492685, "loss": 0.2652, "step": 36650 }, { "epoch": 1.52, "grad_norm": 0.17578125, "learning_rate": 0.0004968623352342127, "loss": 0.2348, "step": 36660 }, { "epoch": 1.52, "grad_norm": 1.109375, "learning_rate": 0.0004968606221545887, "loss": 0.2693, "step": 36670 }, { "epoch": 1.52, "grad_norm": 0.447265625, "learning_rate": 0.0004968589086103997, "loss": 0.2154, "step": 36680 }, { "epoch": 1.52, "grad_norm": 0.328125, "learning_rate": 0.0004968571946016488, "loss": 0.224, "step": 36690 }, { "epoch": 1.52, "grad_norm": 1.8046875, "learning_rate": 0.0004968554801283395, "loss": 0.2436, "step": 36700 }, { "epoch": 1.52, "grad_norm": 0.5625, "learning_rate": 0.0004968537651904747, "loss": 0.2207, "step": 36710 }, { "epoch": 1.52, "grad_norm": 0.59765625, "learning_rate": 0.0004968520497880578, "loss": 0.2341, "step": 36720 }, { "epoch": 1.52, "grad_norm": 0.9296875, "learning_rate": 0.0004968503339210919, "loss": 0.2874, "step": 36730 }, { "epoch": 1.52, "grad_norm": 0.8359375, "learning_rate": 0.0004968486175895804, "loss": 0.2627, "step": 36740 }, { "epoch": 1.52, "grad_norm": 0.486328125, "learning_rate": 0.0004968469007935265, "loss": 0.2494, "step": 36750 }, { "epoch": 1.52, "grad_norm": 0.84375, "learning_rate": 0.0004968451835329334, "loss": 0.2541, "step": 36760 }, { "epoch": 1.52, "grad_norm": 0.55078125, "learning_rate": 0.0004968434658078043, "loss": 0.2114, "step": 36770 }, { "epoch": 1.52, "grad_norm": 0.75, "learning_rate": 0.0004968417476181423, "loss": 0.2307, "step": 36780 }, { "epoch": 1.52, "grad_norm": 0.89453125, "learning_rate": 0.000496840028963951, "loss": 0.2277, "step": 36790 }, { "epoch": 1.52, "grad_norm": 0.7890625, "learning_rate": 0.0004968383098452333, "loss": 0.2088, "step": 36800 }, { "epoch": 1.52, "grad_norm": 0.341796875, "learning_rate": 0.0004968365902619927, "loss": 0.1827, "step": 36810 }, { "epoch": 1.53, "grad_norm": 0.625, "learning_rate": 0.0004968348702142322, "loss": 0.2659, "step": 36820 }, { "epoch": 1.53, "grad_norm": 0.5078125, "learning_rate": 0.0004968331497019552, "loss": 0.2071, "step": 36830 }, { "epoch": 1.53, "grad_norm": 0.72265625, "learning_rate": 0.0004968314287251647, "loss": 0.2174, "step": 36840 }, { "epoch": 1.53, "grad_norm": 0.62890625, "learning_rate": 0.0004968297072838642, "loss": 0.232, "step": 36850 }, { "epoch": 1.53, "grad_norm": 0.51953125, "learning_rate": 0.0004968279853780569, "loss": 0.1905, "step": 36860 }, { "epoch": 1.53, "grad_norm": 0.54296875, "learning_rate": 0.0004968262630077459, "loss": 0.2117, "step": 36870 }, { "epoch": 1.53, "grad_norm": 0.56640625, "learning_rate": 0.0004968245401729347, "loss": 0.2297, "step": 36880 }, { "epoch": 1.53, "grad_norm": 0.1591796875, "learning_rate": 0.0004968228168736262, "loss": 0.2349, "step": 36890 }, { "epoch": 1.53, "grad_norm": 0.62109375, "learning_rate": 0.0004968210931098238, "loss": 0.2037, "step": 36900 }, { "epoch": 1.53, "grad_norm": 0.5390625, "learning_rate": 0.0004968193688815308, "loss": 0.187, "step": 36910 }, { "epoch": 1.53, "grad_norm": 0.357421875, "learning_rate": 0.0004968176441887504, "loss": 0.1582, "step": 36920 }, { "epoch": 1.53, "grad_norm": 0.5234375, "learning_rate": 0.000496815919031486, "loss": 0.2276, "step": 36930 }, { "epoch": 1.53, "grad_norm": 1.1484375, "learning_rate": 0.0004968141934097404, "loss": 0.1896, "step": 36940 }, { "epoch": 1.53, "grad_norm": 0.35546875, "learning_rate": 0.0004968124673235174, "loss": 0.2401, "step": 36950 }, { "epoch": 1.53, "grad_norm": 0.0, "learning_rate": 0.0004968107407728198, "loss": 0.2086, "step": 36960 }, { "epoch": 1.53, "grad_norm": 1.625, "learning_rate": 0.0004968090137576511, "loss": 0.2328, "step": 36970 }, { "epoch": 1.53, "grad_norm": 0.5078125, "learning_rate": 0.0004968072862780146, "loss": 0.195, "step": 36980 }, { "epoch": 1.53, "grad_norm": 0.92578125, "learning_rate": 0.0004968055583339133, "loss": 0.254, "step": 36990 }, { "epoch": 1.53, "grad_norm": 2.265625, "learning_rate": 0.0004968038299253506, "loss": 0.2602, "step": 37000 }, { "epoch": 1.53, "grad_norm": 0.28125, "learning_rate": 0.0004968021010523299, "loss": 0.3009, "step": 37010 }, { "epoch": 1.53, "grad_norm": 0.79296875, "learning_rate": 0.0004968003717148541, "loss": 0.2404, "step": 37020 }, { "epoch": 1.53, "grad_norm": 1.3671875, "learning_rate": 0.0004967986419129268, "loss": 0.2237, "step": 37030 }, { "epoch": 1.53, "grad_norm": 0.5859375, "learning_rate": 0.0004967969116465511, "loss": 0.2699, "step": 37040 }, { "epoch": 1.53, "grad_norm": 0.318359375, "learning_rate": 0.0004967951809157302, "loss": 0.2567, "step": 37050 }, { "epoch": 1.54, "grad_norm": 0.85546875, "learning_rate": 0.0004967934497204674, "loss": 0.2057, "step": 37060 }, { "epoch": 1.54, "grad_norm": 0.306640625, "learning_rate": 0.0004967917180607659, "loss": 0.1745, "step": 37070 }, { "epoch": 1.54, "grad_norm": 2.15625, "learning_rate": 0.0004967899859366293, "loss": 0.216, "step": 37080 }, { "epoch": 1.54, "grad_norm": 0.55859375, "learning_rate": 0.0004967882533480604, "loss": 0.2533, "step": 37090 }, { "epoch": 1.54, "grad_norm": 0.96875, "learning_rate": 0.0004967865202950627, "loss": 0.2454, "step": 37100 }, { "epoch": 1.54, "grad_norm": 0.73046875, "learning_rate": 0.0004967847867776394, "loss": 0.237, "step": 37110 }, { "epoch": 1.54, "grad_norm": 1.125, "learning_rate": 0.0004967830527957939, "loss": 0.2344, "step": 37120 }, { "epoch": 1.54, "grad_norm": 0.734375, "learning_rate": 0.0004967813183495292, "loss": 0.2478, "step": 37130 }, { "epoch": 1.54, "grad_norm": 0.890625, "learning_rate": 0.0004967795834388488, "loss": 0.2554, "step": 37140 }, { "epoch": 1.54, "grad_norm": 2.875, "learning_rate": 0.0004967778480637558, "loss": 0.2315, "step": 37150 }, { "epoch": 1.54, "grad_norm": 1.2578125, "learning_rate": 0.0004967761122242535, "loss": 0.2212, "step": 37160 }, { "epoch": 1.54, "grad_norm": 0.52734375, "learning_rate": 0.0004967743759203454, "loss": 0.2574, "step": 37170 }, { "epoch": 1.54, "grad_norm": 0.66015625, "learning_rate": 0.0004967726391520344, "loss": 0.2372, "step": 37180 }, { "epoch": 1.54, "grad_norm": 1.984375, "learning_rate": 0.000496770901919324, "loss": 0.2245, "step": 37190 }, { "epoch": 1.54, "grad_norm": 0.75, "learning_rate": 0.0004967691642222174, "loss": 0.2332, "step": 37200 }, { "epoch": 1.54, "grad_norm": 0.8359375, "learning_rate": 0.0004967674260607179, "loss": 0.1794, "step": 37210 }, { "epoch": 1.54, "grad_norm": 0.0, "learning_rate": 0.0004967656874348287, "loss": 0.197, "step": 37220 }, { "epoch": 1.54, "grad_norm": 1.0546875, "learning_rate": 0.0004967639483445532, "loss": 0.2429, "step": 37230 }, { "epoch": 1.54, "grad_norm": 0.458984375, "learning_rate": 0.0004967622087898945, "loss": 0.2377, "step": 37240 }, { "epoch": 1.54, "grad_norm": 0.318359375, "learning_rate": 0.0004967604687708561, "loss": 0.277, "step": 37250 }, { "epoch": 1.54, "grad_norm": 0.7578125, "learning_rate": 0.000496758728287441, "loss": 0.205, "step": 37260 }, { "epoch": 1.54, "grad_norm": 0.7265625, "learning_rate": 0.0004967569873396529, "loss": 0.2005, "step": 37270 }, { "epoch": 1.54, "grad_norm": 0.546875, "learning_rate": 0.0004967552459274945, "loss": 0.243, "step": 37280 }, { "epoch": 1.54, "grad_norm": 0.8671875, "learning_rate": 0.0004967535040509694, "loss": 0.2718, "step": 37290 }, { "epoch": 1.54, "grad_norm": 0.94921875, "learning_rate": 0.000496751761710081, "loss": 0.2361, "step": 37300 }, { "epoch": 1.55, "grad_norm": 0.7578125, "learning_rate": 0.0004967500189048324, "loss": 0.2434, "step": 37310 }, { "epoch": 1.55, "grad_norm": 0.3515625, "learning_rate": 0.0004967482756352269, "loss": 0.2048, "step": 37320 }, { "epoch": 1.55, "grad_norm": 0.4453125, "learning_rate": 0.0004967465319012677, "loss": 0.2087, "step": 37330 }, { "epoch": 1.55, "grad_norm": 0.462890625, "learning_rate": 0.0004967447877029581, "loss": 0.2013, "step": 37340 }, { "epoch": 1.55, "grad_norm": 0.427734375, "learning_rate": 0.0004967430430403017, "loss": 0.243, "step": 37350 }, { "epoch": 1.55, "grad_norm": 0.69140625, "learning_rate": 0.0004967412979133014, "loss": 0.2354, "step": 37360 }, { "epoch": 1.55, "grad_norm": 0.486328125, "learning_rate": 0.0004967395523219607, "loss": 0.1833, "step": 37370 }, { "epoch": 1.55, "grad_norm": 0.6875, "learning_rate": 0.0004967378062662827, "loss": 0.2784, "step": 37380 }, { "epoch": 1.55, "grad_norm": 0.9375, "learning_rate": 0.0004967360597462709, "loss": 0.2154, "step": 37390 }, { "epoch": 1.55, "grad_norm": 0.61328125, "learning_rate": 0.0004967343127619284, "loss": 0.2248, "step": 37400 }, { "epoch": 1.55, "grad_norm": 0.25390625, "learning_rate": 0.0004967325653132586, "loss": 0.1726, "step": 37410 }, { "epoch": 1.55, "grad_norm": 0.55859375, "learning_rate": 0.0004967308174002648, "loss": 0.2466, "step": 37420 }, { "epoch": 1.55, "grad_norm": 2.8125, "learning_rate": 0.0004967290690229502, "loss": 0.217, "step": 37430 }, { "epoch": 1.55, "grad_norm": 0.65234375, "learning_rate": 0.0004967273201813182, "loss": 0.2195, "step": 37440 }, { "epoch": 1.55, "grad_norm": 0.41796875, "learning_rate": 0.0004967255708753719, "loss": 0.2434, "step": 37450 }, { "epoch": 1.55, "grad_norm": 0.70703125, "learning_rate": 0.0004967238211051148, "loss": 0.1943, "step": 37460 }, { "epoch": 1.55, "grad_norm": 1.0390625, "learning_rate": 0.0004967220708705501, "loss": 0.251, "step": 37470 }, { "epoch": 1.55, "grad_norm": 0.400390625, "learning_rate": 0.0004967203201716811, "loss": 0.2268, "step": 37480 }, { "epoch": 1.55, "grad_norm": 0.16796875, "learning_rate": 0.000496718569008511, "loss": 0.1789, "step": 37490 }, { "epoch": 1.55, "grad_norm": 1.203125, "learning_rate": 0.0004967168173810435, "loss": 0.2521, "step": 37500 }, { "epoch": 1.55, "grad_norm": 0.72265625, "learning_rate": 0.0004967150652892814, "loss": 0.2471, "step": 37510 }, { "epoch": 1.55, "grad_norm": 0.296875, "learning_rate": 0.0004967133127332281, "loss": 0.206, "step": 37520 }, { "epoch": 1.55, "grad_norm": 0.60546875, "learning_rate": 0.0004967115597128871, "loss": 0.2749, "step": 37530 }, { "epoch": 1.55, "grad_norm": 0.7734375, "learning_rate": 0.0004967098062282616, "loss": 0.225, "step": 37540 }, { "epoch": 1.56, "grad_norm": 0.7578125, "learning_rate": 0.0004967080522793548, "loss": 0.2028, "step": 37550 }, { "epoch": 1.56, "grad_norm": 0.1923828125, "learning_rate": 0.0004967062978661702, "loss": 0.2156, "step": 37560 }, { "epoch": 1.56, "grad_norm": 0.294921875, "learning_rate": 0.0004967045429887109, "loss": 0.1863, "step": 37570 }, { "epoch": 1.56, "grad_norm": 0.99609375, "learning_rate": 0.0004967027876469803, "loss": 0.2772, "step": 37580 }, { "epoch": 1.56, "grad_norm": 2.03125, "learning_rate": 0.0004967010318409817, "loss": 0.2052, "step": 37590 }, { "epoch": 1.56, "grad_norm": 1.2578125, "learning_rate": 0.0004966992755707184, "loss": 0.2719, "step": 37600 }, { "epoch": 1.56, "grad_norm": 1.6640625, "learning_rate": 0.0004966975188361938, "loss": 0.234, "step": 37610 }, { "epoch": 1.56, "grad_norm": 0.59375, "learning_rate": 0.000496695761637411, "loss": 0.2031, "step": 37620 }, { "epoch": 1.56, "grad_norm": 1.2578125, "learning_rate": 0.0004966940039743735, "loss": 0.2136, "step": 37630 }, { "epoch": 1.56, "grad_norm": 1.2734375, "learning_rate": 0.0004966922458470844, "loss": 0.2123, "step": 37640 }, { "epoch": 1.56, "grad_norm": 1.3984375, "learning_rate": 0.0004966904872555473, "loss": 0.2626, "step": 37650 }, { "epoch": 1.56, "grad_norm": 1.4453125, "learning_rate": 0.0004966887281997651, "loss": 0.2525, "step": 37660 }, { "epoch": 1.56, "grad_norm": 0.462890625, "learning_rate": 0.0004966869686797415, "loss": 0.2392, "step": 37670 }, { "epoch": 1.56, "grad_norm": 3.375, "learning_rate": 0.0004966852086954797, "loss": 0.2299, "step": 37680 }, { "epoch": 1.56, "grad_norm": 0.1904296875, "learning_rate": 0.000496683448246983, "loss": 0.1954, "step": 37690 }, { "epoch": 1.56, "grad_norm": 1.40625, "learning_rate": 0.0004966816873342546, "loss": 0.2288, "step": 37700 }, { "epoch": 1.56, "grad_norm": 0.74609375, "learning_rate": 0.0004966799259572979, "loss": 0.2336, "step": 37710 }, { "epoch": 1.56, "grad_norm": 0.8515625, "learning_rate": 0.0004966781641161163, "loss": 0.2269, "step": 37720 }, { "epoch": 1.56, "grad_norm": 0.62109375, "learning_rate": 0.000496676401810713, "loss": 0.2347, "step": 37730 }, { "epoch": 1.56, "grad_norm": 0.359375, "learning_rate": 0.0004966746390410913, "loss": 0.2399, "step": 37740 }, { "epoch": 1.56, "grad_norm": 0.3125, "learning_rate": 0.0004966728758072546, "loss": 0.2203, "step": 37750 }, { "epoch": 1.56, "grad_norm": 0.48828125, "learning_rate": 0.0004966711121092062, "loss": 0.2431, "step": 37760 }, { "epoch": 1.56, "grad_norm": 0.48828125, "learning_rate": 0.0004966693479469495, "loss": 0.2287, "step": 37770 }, { "epoch": 1.56, "grad_norm": 0.765625, "learning_rate": 0.0004966675833204875, "loss": 0.2242, "step": 37780 }, { "epoch": 1.57, "grad_norm": 1.1796875, "learning_rate": 0.0004966658182298239, "loss": 0.2299, "step": 37790 }, { "epoch": 1.57, "grad_norm": 0.318359375, "learning_rate": 0.0004966640526749619, "loss": 0.2695, "step": 37800 }, { "epoch": 1.57, "grad_norm": 0.48046875, "learning_rate": 0.0004966622866559048, "loss": 0.2649, "step": 37810 }, { "epoch": 1.57, "grad_norm": 0.484375, "learning_rate": 0.0004966605201726558, "loss": 0.2424, "step": 37820 }, { "epoch": 1.57, "grad_norm": 0.796875, "learning_rate": 0.0004966587532252184, "loss": 0.2689, "step": 37830 }, { "epoch": 1.57, "grad_norm": 0.515625, "learning_rate": 0.0004966569858135958, "loss": 0.1895, "step": 37840 }, { "epoch": 1.57, "grad_norm": 1.4453125, "learning_rate": 0.0004966552179377914, "loss": 0.2209, "step": 37850 }, { "epoch": 1.57, "grad_norm": 0.98828125, "learning_rate": 0.0004966534495978085, "loss": 0.2127, "step": 37860 }, { "epoch": 1.57, "grad_norm": 0.65234375, "learning_rate": 0.0004966516807936506, "loss": 0.1849, "step": 37870 }, { "epoch": 1.57, "grad_norm": 1.421875, "learning_rate": 0.0004966499115253208, "loss": 0.1644, "step": 37880 }, { "epoch": 1.57, "grad_norm": 0.80078125, "learning_rate": 0.0004966481417928224, "loss": 0.2558, "step": 37890 }, { "epoch": 1.57, "grad_norm": 0.69921875, "learning_rate": 0.0004966463715961589, "loss": 0.2328, "step": 37900 }, { "epoch": 1.57, "grad_norm": 0.61328125, "learning_rate": 0.0004966446009353336, "loss": 0.2219, "step": 37910 }, { "epoch": 1.57, "grad_norm": 0.6484375, "learning_rate": 0.0004966428298103497, "loss": 0.1814, "step": 37920 }, { "epoch": 1.57, "grad_norm": 0.6015625, "learning_rate": 0.0004966410582212108, "loss": 0.2724, "step": 37930 }, { "epoch": 1.57, "grad_norm": 0.57421875, "learning_rate": 0.0004966392861679199, "loss": 0.2275, "step": 37940 }, { "epoch": 1.57, "grad_norm": 0.67578125, "learning_rate": 0.0004966375136504806, "loss": 0.2155, "step": 37950 }, { "epoch": 1.57, "grad_norm": 0.451171875, "learning_rate": 0.000496635740668896, "loss": 0.2427, "step": 37960 }, { "epoch": 1.57, "grad_norm": 0.451171875, "learning_rate": 0.0004966339672231697, "loss": 0.2243, "step": 37970 }, { "epoch": 1.57, "grad_norm": 0.81640625, "learning_rate": 0.0004966321933133049, "loss": 0.2224, "step": 37980 }, { "epoch": 1.57, "grad_norm": 0.74609375, "learning_rate": 0.0004966304189393049, "loss": 0.2376, "step": 37990 }, { "epoch": 1.57, "grad_norm": 0.365234375, "learning_rate": 0.000496628644101173, "loss": 0.2107, "step": 38000 }, { "epoch": 1.57, "grad_norm": 0.51953125, "learning_rate": 0.0004966268687989128, "loss": 0.2843, "step": 38010 }, { "epoch": 1.57, "grad_norm": 0.7578125, "learning_rate": 0.0004966250930325274, "loss": 0.2228, "step": 38020 }, { "epoch": 1.58, "grad_norm": 0.5859375, "learning_rate": 0.0004966233168020202, "loss": 0.2624, "step": 38030 }, { "epoch": 1.58, "grad_norm": 0.48046875, "learning_rate": 0.0004966215401073946, "loss": 0.2634, "step": 38040 }, { "epoch": 1.58, "grad_norm": 0.53125, "learning_rate": 0.0004966197629486538, "loss": 0.1956, "step": 38050 }, { "epoch": 1.58, "grad_norm": 0.5625, "learning_rate": 0.0004966179853258013, "loss": 0.1966, "step": 38060 }, { "epoch": 1.58, "grad_norm": 0.3125, "learning_rate": 0.0004966162072388404, "loss": 0.1949, "step": 38070 }, { "epoch": 1.58, "grad_norm": 0.384765625, "learning_rate": 0.0004966144286877743, "loss": 0.1764, "step": 38080 }, { "epoch": 1.58, "grad_norm": 0.94921875, "learning_rate": 0.0004966126496726066, "loss": 0.2084, "step": 38090 }, { "epoch": 1.58, "grad_norm": 0.796875, "learning_rate": 0.0004966108701933405, "loss": 0.2283, "step": 38100 }, { "epoch": 1.58, "grad_norm": 0.46875, "learning_rate": 0.0004966090902499793, "loss": 0.2196, "step": 38110 }, { "epoch": 1.58, "grad_norm": 0.37890625, "learning_rate": 0.0004966073098425266, "loss": 0.1983, "step": 38120 }, { "epoch": 1.58, "grad_norm": 1.171875, "learning_rate": 0.0004966055289709854, "loss": 0.2531, "step": 38130 }, { "epoch": 1.58, "grad_norm": 0.8046875, "learning_rate": 0.0004966037476353593, "loss": 0.2735, "step": 38140 }, { "epoch": 1.58, "grad_norm": 0.64453125, "learning_rate": 0.0004966019658356516, "loss": 0.208, "step": 38150 }, { "epoch": 1.58, "grad_norm": 0.72265625, "learning_rate": 0.0004966001835718656, "loss": 0.1757, "step": 38160 }, { "epoch": 1.58, "grad_norm": 0.404296875, "learning_rate": 0.0004965984008440048, "loss": 0.2421, "step": 38170 }, { "epoch": 1.58, "grad_norm": 1.0546875, "learning_rate": 0.0004965966176520723, "loss": 0.2151, "step": 38180 }, { "epoch": 1.58, "grad_norm": 0.78125, "learning_rate": 0.0004965948339960716, "loss": 0.2561, "step": 38190 }, { "epoch": 1.58, "grad_norm": 0.625, "learning_rate": 0.000496593049876006, "loss": 0.1962, "step": 38200 }, { "epoch": 1.58, "grad_norm": 0.5546875, "learning_rate": 0.0004965912652918791, "loss": 0.1981, "step": 38210 }, { "epoch": 1.58, "grad_norm": 0.95703125, "learning_rate": 0.0004965894802436939, "loss": 0.1889, "step": 38220 }, { "epoch": 1.58, "grad_norm": 0.625, "learning_rate": 0.0004965876947314539, "loss": 0.256, "step": 38230 }, { "epoch": 1.58, "grad_norm": 0.55859375, "learning_rate": 0.0004965859087551626, "loss": 0.2032, "step": 38240 }, { "epoch": 1.58, "grad_norm": 0.30078125, "learning_rate": 0.0004965841223148233, "loss": 0.199, "step": 38250 }, { "epoch": 1.58, "grad_norm": 0.53125, "learning_rate": 0.0004965823354104392, "loss": 0.2076, "step": 38260 }, { "epoch": 1.59, "grad_norm": 0.474609375, "learning_rate": 0.0004965805480420138, "loss": 0.2183, "step": 38270 }, { "epoch": 1.59, "grad_norm": 0.7421875, "learning_rate": 0.0004965787602095505, "loss": 0.206, "step": 38280 }, { "epoch": 1.59, "grad_norm": 0.796875, "learning_rate": 0.0004965769719130525, "loss": 0.2385, "step": 38290 }, { "epoch": 1.59, "grad_norm": 1.78125, "learning_rate": 0.0004965751831525233, "loss": 0.2316, "step": 38300 }, { "epoch": 1.59, "grad_norm": 2.84375, "learning_rate": 0.0004965733939279662, "loss": 0.2565, "step": 38310 }, { "epoch": 1.59, "grad_norm": 0.37109375, "learning_rate": 0.0004965716042393847, "loss": 0.2078, "step": 38320 }, { "epoch": 1.59, "grad_norm": 0.77734375, "learning_rate": 0.000496569814086782, "loss": 0.2191, "step": 38330 }, { "epoch": 1.59, "grad_norm": 0.259765625, "learning_rate": 0.0004965680234701615, "loss": 0.2227, "step": 38340 }, { "epoch": 1.59, "grad_norm": 0.828125, "learning_rate": 0.0004965662323895267, "loss": 0.2208, "step": 38350 }, { "epoch": 1.59, "grad_norm": 0.41015625, "learning_rate": 0.0004965644408448808, "loss": 0.2335, "step": 38360 }, { "epoch": 1.59, "grad_norm": 0.56640625, "learning_rate": 0.0004965626488362273, "loss": 0.2146, "step": 38370 }, { "epoch": 1.59, "grad_norm": 0.671875, "learning_rate": 0.0004965608563635695, "loss": 0.2357, "step": 38380 }, { "epoch": 1.59, "grad_norm": 0.498046875, "learning_rate": 0.0004965590634269108, "loss": 0.2408, "step": 38390 }, { "epoch": 1.59, "grad_norm": 2.234375, "learning_rate": 0.0004965572700262546, "loss": 0.2274, "step": 38400 }, { "epoch": 1.59, "grad_norm": 0.7578125, "learning_rate": 0.0004965554761616043, "loss": 0.2149, "step": 38410 }, { "epoch": 1.59, "grad_norm": 0.2216796875, "learning_rate": 0.000496553681832963, "loss": 0.2548, "step": 38420 }, { "epoch": 1.59, "grad_norm": 0.47265625, "learning_rate": 0.0004965518870403345, "loss": 0.2672, "step": 38430 }, { "epoch": 1.59, "grad_norm": 1.78125, "learning_rate": 0.0004965500917837218, "loss": 0.2331, "step": 38440 }, { "epoch": 1.59, "grad_norm": 0.9453125, "learning_rate": 0.0004965482960631286, "loss": 0.2134, "step": 38450 }, { "epoch": 1.59, "grad_norm": 0.61328125, "learning_rate": 0.0004965464998785581, "loss": 0.2122, "step": 38460 }, { "epoch": 1.59, "grad_norm": 0.828125, "learning_rate": 0.0004965447032300136, "loss": 0.2234, "step": 38470 }, { "epoch": 1.59, "grad_norm": 0.8515625, "learning_rate": 0.0004965429061174987, "loss": 0.2588, "step": 38480 }, { "epoch": 1.59, "grad_norm": 0.375, "learning_rate": 0.0004965411085410168, "loss": 0.2769, "step": 38490 }, { "epoch": 1.59, "grad_norm": 0.6328125, "learning_rate": 0.0004965393105005709, "loss": 0.2516, "step": 38500 }, { "epoch": 1.6, "grad_norm": 0.5390625, "learning_rate": 0.0004965375119961648, "loss": 0.2536, "step": 38510 }, { "epoch": 1.6, "grad_norm": 0.60546875, "learning_rate": 0.0004965357130278017, "loss": 0.2514, "step": 38520 }, { "epoch": 1.6, "grad_norm": 0.177734375, "learning_rate": 0.000496533913595485, "loss": 0.2303, "step": 38530 }, { "epoch": 1.6, "grad_norm": 0.73046875, "learning_rate": 0.0004965321136992182, "loss": 0.1954, "step": 38540 }, { "epoch": 1.6, "grad_norm": 0.55078125, "learning_rate": 0.0004965303133390044, "loss": 0.2214, "step": 38550 }, { "epoch": 1.6, "grad_norm": 0.87890625, "learning_rate": 0.0004965285125148473, "loss": 0.2906, "step": 38560 }, { "epoch": 1.6, "grad_norm": 0.66015625, "learning_rate": 0.0004965267112267501, "loss": 0.2503, "step": 38570 }, { "epoch": 1.6, "grad_norm": 5.09375, "learning_rate": 0.0004965249094747163, "loss": 0.2665, "step": 38580 }, { "epoch": 1.6, "grad_norm": 0.62890625, "learning_rate": 0.0004965231072587492, "loss": 0.1989, "step": 38590 }, { "epoch": 1.6, "grad_norm": 0.361328125, "learning_rate": 0.0004965213045788523, "loss": 0.2027, "step": 38600 }, { "epoch": 1.6, "grad_norm": 0.390625, "learning_rate": 0.0004965195014350288, "loss": 0.2034, "step": 38610 }, { "epoch": 1.6, "grad_norm": 0.41015625, "learning_rate": 0.0004965176978272823, "loss": 0.2163, "step": 38620 }, { "epoch": 1.6, "grad_norm": 0.59375, "learning_rate": 0.0004965158937556162, "loss": 0.1841, "step": 38630 }, { "epoch": 1.6, "grad_norm": 0.48046875, "learning_rate": 0.0004965140892200336, "loss": 0.1566, "step": 38640 }, { "epoch": 1.6, "grad_norm": 0.6015625, "learning_rate": 0.0004965122842205382, "loss": 0.2586, "step": 38650 }, { "epoch": 1.6, "grad_norm": 0.330078125, "learning_rate": 0.0004965104787571334, "loss": 0.2449, "step": 38660 }, { "epoch": 1.6, "grad_norm": 0.62109375, "learning_rate": 0.0004965086728298223, "loss": 0.2156, "step": 38670 }, { "epoch": 1.6, "grad_norm": 0.8984375, "learning_rate": 0.0004965068664386088, "loss": 0.2371, "step": 38680 }, { "epoch": 1.6, "grad_norm": 0.55859375, "learning_rate": 0.0004965050595834957, "loss": 0.2011, "step": 38690 }, { "epoch": 1.6, "grad_norm": 0.80859375, "learning_rate": 0.0004965032522644869, "loss": 0.2581, "step": 38700 }, { "epoch": 1.6, "grad_norm": 1.03125, "learning_rate": 0.0004965014444815855, "loss": 0.2612, "step": 38710 }, { "epoch": 1.6, "grad_norm": 0.63671875, "learning_rate": 0.000496499636234795, "loss": 0.1947, "step": 38720 }, { "epoch": 1.6, "grad_norm": 0.92578125, "learning_rate": 0.0004964978275241188, "loss": 0.1904, "step": 38730 }, { "epoch": 1.6, "grad_norm": 0.404296875, "learning_rate": 0.0004964960183495604, "loss": 0.2257, "step": 38740 }, { "epoch": 1.61, "grad_norm": 0.205078125, "learning_rate": 0.000496494208711123, "loss": 0.2578, "step": 38750 }, { "epoch": 1.61, "grad_norm": 0.734375, "learning_rate": 0.0004964923986088102, "loss": 0.2196, "step": 38760 }, { "epoch": 1.61, "grad_norm": 0.91015625, "learning_rate": 0.0004964905880426253, "loss": 0.2292, "step": 38770 }, { "epoch": 1.61, "grad_norm": 0.494140625, "learning_rate": 0.0004964887770125717, "loss": 0.2129, "step": 38780 }, { "epoch": 1.61, "grad_norm": 2.0, "learning_rate": 0.0004964869655186529, "loss": 0.2124, "step": 38790 }, { "epoch": 1.61, "grad_norm": 1.609375, "learning_rate": 0.0004964851535608722, "loss": 0.2011, "step": 38800 }, { "epoch": 1.61, "grad_norm": 0.90625, "learning_rate": 0.000496483341139233, "loss": 0.238, "step": 38810 }, { "epoch": 1.61, "grad_norm": 0.2109375, "learning_rate": 0.0004964815282537388, "loss": 0.2688, "step": 38820 }, { "epoch": 1.61, "grad_norm": 0.42578125, "learning_rate": 0.000496479714904393, "loss": 0.1965, "step": 38830 }, { "epoch": 1.61, "grad_norm": 0.5234375, "learning_rate": 0.0004964779010911991, "loss": 0.2136, "step": 38840 }, { "epoch": 1.61, "grad_norm": 0.68359375, "learning_rate": 0.0004964760868141604, "loss": 0.26, "step": 38850 }, { "epoch": 1.61, "grad_norm": 0.76171875, "learning_rate": 0.0004964742720732803, "loss": 0.2183, "step": 38860 }, { "epoch": 1.61, "grad_norm": 0.640625, "learning_rate": 0.000496472456868562, "loss": 0.1965, "step": 38870 }, { "epoch": 1.61, "grad_norm": 0.796875, "learning_rate": 0.0004964706412000094, "loss": 0.195, "step": 38880 }, { "epoch": 1.61, "grad_norm": 0.482421875, "learning_rate": 0.0004964688250676256, "loss": 0.2827, "step": 38890 }, { "epoch": 1.61, "grad_norm": 0.6875, "learning_rate": 0.0004964670084714141, "loss": 0.2533, "step": 38900 }, { "epoch": 1.61, "grad_norm": 3.203125, "learning_rate": 0.0004964651914113783, "loss": 0.2906, "step": 38910 }, { "epoch": 1.61, "grad_norm": 1.546875, "learning_rate": 0.0004964633738875216, "loss": 0.1613, "step": 38920 }, { "epoch": 1.61, "grad_norm": 0.87109375, "learning_rate": 0.0004964615558998474, "loss": 0.2324, "step": 38930 }, { "epoch": 1.61, "grad_norm": 0.2451171875, "learning_rate": 0.0004964597374483593, "loss": 0.1969, "step": 38940 }, { "epoch": 1.61, "grad_norm": 0.71484375, "learning_rate": 0.0004964579185330605, "loss": 0.2234, "step": 38950 }, { "epoch": 1.61, "grad_norm": 0.51953125, "learning_rate": 0.0004964560991539546, "loss": 0.2566, "step": 38960 }, { "epoch": 1.61, "grad_norm": 0.51171875, "learning_rate": 0.0004964542793110449, "loss": 0.196, "step": 38970 }, { "epoch": 1.61, "grad_norm": 0.50390625, "learning_rate": 0.0004964524590043347, "loss": 0.1914, "step": 38980 }, { "epoch": 1.61, "grad_norm": 0.71875, "learning_rate": 0.0004964506382338278, "loss": 0.2505, "step": 38990 }, { "epoch": 1.62, "grad_norm": 0.734375, "learning_rate": 0.0004964488169995274, "loss": 0.2008, "step": 39000 }, { "epoch": 1.62, "grad_norm": 0.65625, "learning_rate": 0.0004964469953014369, "loss": 0.2866, "step": 39010 }, { "epoch": 1.62, "grad_norm": 0.60546875, "learning_rate": 0.0004964451731395597, "loss": 0.213, "step": 39020 }, { "epoch": 1.62, "grad_norm": 0.3671875, "learning_rate": 0.0004964433505138993, "loss": 0.2161, "step": 39030 }, { "epoch": 1.62, "grad_norm": 0.66796875, "learning_rate": 0.0004964415274244592, "loss": 0.2642, "step": 39040 }, { "epoch": 1.62, "grad_norm": 0.9453125, "learning_rate": 0.0004964397038712428, "loss": 0.2482, "step": 39050 }, { "epoch": 1.62, "grad_norm": 0.65625, "learning_rate": 0.0004964378798542533, "loss": 0.2686, "step": 39060 }, { "epoch": 1.62, "grad_norm": 1.2734375, "learning_rate": 0.0004964360553734945, "loss": 0.1995, "step": 39070 }, { "epoch": 1.62, "grad_norm": 0.89453125, "learning_rate": 0.0004964342304289697, "loss": 0.2429, "step": 39080 }, { "epoch": 1.62, "grad_norm": 0.48046875, "learning_rate": 0.0004964324050206821, "loss": 0.2909, "step": 39090 }, { "epoch": 1.62, "grad_norm": 0.2578125, "learning_rate": 0.0004964305791486355, "loss": 0.2243, "step": 39100 }, { "epoch": 1.62, "grad_norm": 0.875, "learning_rate": 0.0004964287528128329, "loss": 0.2537, "step": 39110 }, { "epoch": 1.62, "grad_norm": 0.37890625, "learning_rate": 0.0004964269260132782, "loss": 0.2358, "step": 39120 }, { "epoch": 1.62, "grad_norm": 0.40625, "learning_rate": 0.0004964250987499747, "loss": 0.2647, "step": 39130 }, { "epoch": 1.62, "grad_norm": 0.73828125, "learning_rate": 0.0004964232710229256, "loss": 0.2449, "step": 39140 }, { "epoch": 1.62, "grad_norm": 0.65625, "learning_rate": 0.0004964214428321347, "loss": 0.2553, "step": 39150 }, { "epoch": 1.62, "grad_norm": 0.53125, "learning_rate": 0.0004964196141776052, "loss": 0.2214, "step": 39160 }, { "epoch": 1.62, "grad_norm": 0.59765625, "learning_rate": 0.0004964177850593405, "loss": 0.187, "step": 39170 }, { "epoch": 1.62, "grad_norm": 0.240234375, "learning_rate": 0.0004964159554773442, "loss": 0.2115, "step": 39180 }, { "epoch": 1.62, "grad_norm": 1.1640625, "learning_rate": 0.0004964141254316197, "loss": 0.2459, "step": 39190 }, { "epoch": 1.62, "grad_norm": 0.66015625, "learning_rate": 0.0004964122949221705, "loss": 0.2429, "step": 39200 }, { "epoch": 1.62, "grad_norm": 0.48046875, "learning_rate": 0.0004964104639489998, "loss": 0.1995, "step": 39210 }, { "epoch": 1.62, "grad_norm": 1.03125, "learning_rate": 0.0004964086325121113, "loss": 0.2204, "step": 39220 }, { "epoch": 1.62, "grad_norm": 0.5703125, "learning_rate": 0.0004964068006115083, "loss": 0.2407, "step": 39230 }, { "epoch": 1.63, "grad_norm": 0.3671875, "learning_rate": 0.0004964049682471944, "loss": 0.2388, "step": 39240 }, { "epoch": 1.63, "grad_norm": 0.259765625, "learning_rate": 0.0004964031354191729, "loss": 0.1868, "step": 39250 }, { "epoch": 1.63, "grad_norm": 0.76953125, "learning_rate": 0.0004964013021274473, "loss": 0.2624, "step": 39260 }, { "epoch": 1.63, "grad_norm": 0.87109375, "learning_rate": 0.0004963994683720212, "loss": 0.1746, "step": 39270 }, { "epoch": 1.63, "grad_norm": 0.310546875, "learning_rate": 0.0004963976341528978, "loss": 0.1977, "step": 39280 }, { "epoch": 1.63, "grad_norm": 1.4296875, "learning_rate": 0.0004963957994700806, "loss": 0.2068, "step": 39290 }, { "epoch": 1.63, "grad_norm": 0.609375, "learning_rate": 0.0004963939643235732, "loss": 0.2348, "step": 39300 }, { "epoch": 1.63, "grad_norm": 1.4609375, "learning_rate": 0.0004963921287133789, "loss": 0.2653, "step": 39310 }, { "epoch": 1.63, "grad_norm": 2.0625, "learning_rate": 0.0004963902926395013, "loss": 0.1923, "step": 39320 }, { "epoch": 1.63, "grad_norm": 2.890625, "learning_rate": 0.0004963884561019438, "loss": 0.2815, "step": 39330 }, { "epoch": 1.63, "grad_norm": 0.35546875, "learning_rate": 0.0004963866191007099, "loss": 0.2199, "step": 39340 }, { "epoch": 1.63, "grad_norm": 0.89453125, "learning_rate": 0.0004963847816358027, "loss": 0.2558, "step": 39350 }, { "epoch": 1.63, "grad_norm": 0.58984375, "learning_rate": 0.0004963829437072262, "loss": 0.236, "step": 39360 }, { "epoch": 1.63, "grad_norm": 0.9453125, "learning_rate": 0.0004963811053149835, "loss": 0.2675, "step": 39370 }, { "epoch": 1.63, "grad_norm": 1.9921875, "learning_rate": 0.0004963792664590781, "loss": 0.2238, "step": 39380 }, { "epoch": 1.63, "grad_norm": 2.125, "learning_rate": 0.0004963774271395137, "loss": 0.2198, "step": 39390 }, { "epoch": 1.63, "grad_norm": 0.84765625, "learning_rate": 0.0004963755873562936, "loss": 0.2511, "step": 39400 }, { "epoch": 1.63, "grad_norm": 0.73046875, "learning_rate": 0.000496373747109421, "loss": 0.2361, "step": 39410 }, { "epoch": 1.63, "grad_norm": 0.9453125, "learning_rate": 0.0004963719063988998, "loss": 0.2062, "step": 39420 }, { "epoch": 1.63, "grad_norm": 0.2373046875, "learning_rate": 0.0004963700652247333, "loss": 0.242, "step": 39430 }, { "epoch": 1.63, "grad_norm": 1.4140625, "learning_rate": 0.0004963682235869249, "loss": 0.2005, "step": 39440 }, { "epoch": 1.63, "grad_norm": 1.640625, "learning_rate": 0.0004963663814854781, "loss": 0.2058, "step": 39450 }, { "epoch": 1.63, "grad_norm": 0.97265625, "learning_rate": 0.0004963645389203964, "loss": 0.2506, "step": 39460 }, { "epoch": 1.63, "grad_norm": 0.515625, "learning_rate": 0.0004963626958916832, "loss": 0.2263, "step": 39470 }, { "epoch": 1.64, "grad_norm": 0.59765625, "learning_rate": 0.000496360852399342, "loss": 0.2359, "step": 39480 }, { "epoch": 1.64, "grad_norm": 1.0703125, "learning_rate": 0.0004963590084433764, "loss": 0.2448, "step": 39490 }, { "epoch": 1.64, "grad_norm": 0.94921875, "learning_rate": 0.0004963571640237896, "loss": 0.256, "step": 39500 }, { "epoch": 1.64, "grad_norm": 0.37109375, "learning_rate": 0.0004963553191405852, "loss": 0.26, "step": 39510 }, { "epoch": 1.64, "grad_norm": 0.31640625, "learning_rate": 0.0004963534737937669, "loss": 0.2365, "step": 39520 }, { "epoch": 1.64, "grad_norm": 0.59375, "learning_rate": 0.0004963516279833379, "loss": 0.2344, "step": 39530 }, { "epoch": 1.64, "grad_norm": 0.5859375, "learning_rate": 0.0004963497817093016, "loss": 0.2077, "step": 39540 }, { "epoch": 1.64, "grad_norm": 0.7265625, "learning_rate": 0.0004963479349716617, "loss": 0.2021, "step": 39550 }, { "epoch": 1.64, "grad_norm": 0.5625, "learning_rate": 0.0004963460877704215, "loss": 0.2187, "step": 39560 }, { "epoch": 1.64, "grad_norm": 0.78125, "learning_rate": 0.0004963442401055847, "loss": 0.2754, "step": 39570 }, { "epoch": 1.64, "grad_norm": 0.4296875, "learning_rate": 0.0004963423919771546, "loss": 0.2127, "step": 39580 }, { "epoch": 1.64, "grad_norm": 0.435546875, "learning_rate": 0.0004963405433851348, "loss": 0.2511, "step": 39590 }, { "epoch": 1.64, "grad_norm": 0.62890625, "learning_rate": 0.0004963386943295286, "loss": 0.2444, "step": 39600 }, { "epoch": 1.64, "grad_norm": 0.4453125, "learning_rate": 0.0004963368448103396, "loss": 0.2241, "step": 39610 }, { "epoch": 1.64, "grad_norm": 0.5234375, "learning_rate": 0.0004963349948275713, "loss": 0.2285, "step": 39620 }, { "epoch": 1.64, "grad_norm": 1.0390625, "learning_rate": 0.000496333144381227, "loss": 0.2366, "step": 39630 }, { "epoch": 1.64, "grad_norm": 0.427734375, "learning_rate": 0.0004963312934713104, "loss": 0.2541, "step": 39640 }, { "epoch": 1.64, "grad_norm": 0.76171875, "learning_rate": 0.000496329442097825, "loss": 0.2934, "step": 39650 }, { "epoch": 1.64, "grad_norm": 0.84765625, "learning_rate": 0.0004963275902607741, "loss": 0.1902, "step": 39660 }, { "epoch": 1.64, "grad_norm": 0.63671875, "learning_rate": 0.0004963257379601613, "loss": 0.2151, "step": 39670 }, { "epoch": 1.64, "grad_norm": 0.291015625, "learning_rate": 0.00049632388519599, "loss": 0.2226, "step": 39680 }, { "epoch": 1.64, "grad_norm": 0.67578125, "learning_rate": 0.0004963220319682639, "loss": 0.1651, "step": 39690 }, { "epoch": 1.64, "grad_norm": 0.6015625, "learning_rate": 0.0004963201782769862, "loss": 0.1702, "step": 39700 }, { "epoch": 1.64, "grad_norm": 0.54296875, "learning_rate": 0.0004963183241221606, "loss": 0.2205, "step": 39710 }, { "epoch": 1.65, "grad_norm": 0.734375, "learning_rate": 0.0004963164695037905, "loss": 0.2104, "step": 39720 }, { "epoch": 1.65, "grad_norm": 0.53515625, "learning_rate": 0.0004963146144218793, "loss": 0.2174, "step": 39730 }, { "epoch": 1.65, "grad_norm": 1.0546875, "learning_rate": 0.0004963127588764307, "loss": 0.2069, "step": 39740 }, { "epoch": 1.65, "grad_norm": 1.078125, "learning_rate": 0.0004963109028674481, "loss": 0.2056, "step": 39750 }, { "epoch": 1.65, "grad_norm": 3.953125, "learning_rate": 0.000496309046394935, "loss": 0.2357, "step": 39760 }, { "epoch": 1.65, "grad_norm": 0.42578125, "learning_rate": 0.0004963071894588948, "loss": 0.2084, "step": 39770 }, { "epoch": 1.65, "grad_norm": 0.474609375, "learning_rate": 0.000496305332059331, "loss": 0.2074, "step": 39780 }, { "epoch": 1.65, "grad_norm": 0.6640625, "learning_rate": 0.0004963034741962473, "loss": 0.1605, "step": 39790 }, { "epoch": 1.65, "grad_norm": 0.486328125, "learning_rate": 0.000496301615869647, "loss": 0.2223, "step": 39800 }, { "epoch": 1.65, "grad_norm": 0.95703125, "learning_rate": 0.0004962997570795337, "loss": 0.2065, "step": 39810 }, { "epoch": 1.65, "grad_norm": 0.6875, "learning_rate": 0.0004962978978259109, "loss": 0.2912, "step": 39820 }, { "epoch": 1.65, "grad_norm": 0.5078125, "learning_rate": 0.000496296038108782, "loss": 0.2187, "step": 39830 }, { "epoch": 1.65, "grad_norm": 1.0546875, "learning_rate": 0.0004962941779281505, "loss": 0.2004, "step": 39840 }, { "epoch": 1.65, "grad_norm": 0.9375, "learning_rate": 0.00049629231728402, "loss": 0.1986, "step": 39850 }, { "epoch": 1.65, "grad_norm": 1.3828125, "learning_rate": 0.0004962904561763939, "loss": 0.2279, "step": 39860 }, { "epoch": 1.65, "grad_norm": 0.57421875, "learning_rate": 0.0004962885946052758, "loss": 0.2008, "step": 39870 }, { "epoch": 1.65, "grad_norm": 1.671875, "learning_rate": 0.0004962867325706692, "loss": 0.2705, "step": 39880 }, { "epoch": 1.65, "grad_norm": 0.6484375, "learning_rate": 0.0004962848700725775, "loss": 0.2342, "step": 39890 }, { "epoch": 1.65, "grad_norm": 0.63671875, "learning_rate": 0.0004962830071110044, "loss": 0.276, "step": 39900 }, { "epoch": 1.65, "grad_norm": 0.63671875, "learning_rate": 0.0004962811436859531, "loss": 0.2465, "step": 39910 }, { "epoch": 1.65, "grad_norm": 0.400390625, "learning_rate": 0.0004962792797974274, "loss": 0.2014, "step": 39920 }, { "epoch": 1.65, "grad_norm": 0.328125, "learning_rate": 0.0004962774154454306, "loss": 0.2205, "step": 39930 }, { "epoch": 1.65, "grad_norm": 0.2392578125, "learning_rate": 0.0004962755506299664, "loss": 0.2614, "step": 39940 }, { "epoch": 1.65, "grad_norm": 0.365234375, "learning_rate": 0.0004962736853510382, "loss": 0.2035, "step": 39950 }, { "epoch": 1.66, "grad_norm": 1.21875, "learning_rate": 0.0004962718196086495, "loss": 0.2384, "step": 39960 }, { "epoch": 1.66, "grad_norm": 1.2265625, "learning_rate": 0.0004962699534028038, "loss": 0.2214, "step": 39970 }, { "epoch": 1.66, "grad_norm": 1.4296875, "learning_rate": 0.0004962680867335046, "loss": 0.1827, "step": 39980 }, { "epoch": 1.66, "grad_norm": 0.63671875, "learning_rate": 0.0004962662196007555, "loss": 0.1924, "step": 39990 }, { "epoch": 1.66, "grad_norm": 0.2890625, "learning_rate": 0.00049626435200456, "loss": 0.2233, "step": 40000 }, { "epoch": 1.66, "grad_norm": 0.62890625, "learning_rate": 0.0004962624839449216, "loss": 0.2202, "step": 40010 }, { "epoch": 1.66, "grad_norm": 0.498046875, "learning_rate": 0.0004962606154218438, "loss": 0.2659, "step": 40020 }, { "epoch": 1.66, "grad_norm": 0.77734375, "learning_rate": 0.0004962587464353301, "loss": 0.2327, "step": 40030 }, { "epoch": 1.66, "grad_norm": 0.83984375, "learning_rate": 0.000496256876985384, "loss": 0.2687, "step": 40040 }, { "epoch": 1.66, "grad_norm": 1.0625, "learning_rate": 0.000496255007072009, "loss": 0.2227, "step": 40050 }, { "epoch": 1.66, "grad_norm": 0.427734375, "learning_rate": 0.0004962531366952087, "loss": 0.2305, "step": 40060 }, { "epoch": 1.66, "grad_norm": 0.72265625, "learning_rate": 0.0004962512658549868, "loss": 0.1808, "step": 40070 }, { "epoch": 1.66, "grad_norm": 0.7265625, "learning_rate": 0.0004962493945513463, "loss": 0.242, "step": 40080 }, { "epoch": 1.66, "grad_norm": 0.6640625, "learning_rate": 0.0004962475227842912, "loss": 0.2242, "step": 40090 }, { "epoch": 1.66, "grad_norm": 0.294921875, "learning_rate": 0.0004962456505538248, "loss": 0.1929, "step": 40100 }, { "epoch": 1.66, "grad_norm": 0.298828125, "learning_rate": 0.0004962437778599508, "loss": 0.2266, "step": 40110 }, { "epoch": 1.66, "grad_norm": 0.54296875, "learning_rate": 0.0004962419047026724, "loss": 0.2184, "step": 40120 }, { "epoch": 1.66, "grad_norm": 0.5546875, "learning_rate": 0.0004962400310819934, "loss": 0.187, "step": 40130 }, { "epoch": 1.66, "grad_norm": 1.28125, "learning_rate": 0.0004962381569979174, "loss": 0.2801, "step": 40140 }, { "epoch": 1.66, "grad_norm": 0.8046875, "learning_rate": 0.0004962362824504476, "loss": 0.3076, "step": 40150 }, { "epoch": 1.66, "grad_norm": 0.8671875, "learning_rate": 0.0004962344074395878, "loss": 0.1774, "step": 40160 }, { "epoch": 1.66, "grad_norm": 0.59765625, "learning_rate": 0.0004962325319653413, "loss": 0.2228, "step": 40170 }, { "epoch": 1.66, "grad_norm": 2.4375, "learning_rate": 0.0004962306560277119, "loss": 0.2416, "step": 40180 }, { "epoch": 1.66, "grad_norm": 0.69921875, "learning_rate": 0.000496228779626703, "loss": 0.1342, "step": 40190 }, { "epoch": 1.67, "grad_norm": 0.70703125, "learning_rate": 0.000496226902762318, "loss": 0.2033, "step": 40200 }, { "epoch": 1.67, "grad_norm": 0.74609375, "learning_rate": 0.0004962250254345608, "loss": 0.1869, "step": 40210 }, { "epoch": 1.67, "grad_norm": 0.63671875, "learning_rate": 0.0004962231476434345, "loss": 0.1955, "step": 40220 }, { "epoch": 1.67, "grad_norm": 0.326171875, "learning_rate": 0.0004962212693889428, "loss": 0.2172, "step": 40230 }, { "epoch": 1.67, "grad_norm": 0.61328125, "learning_rate": 0.0004962193906710894, "loss": 0.2422, "step": 40240 }, { "epoch": 1.67, "grad_norm": 0.392578125, "learning_rate": 0.0004962175114898777, "loss": 0.2461, "step": 40250 }, { "epoch": 1.67, "grad_norm": 0.91796875, "learning_rate": 0.0004962156318453111, "loss": 0.267, "step": 40260 }, { "epoch": 1.67, "grad_norm": 0.255859375, "learning_rate": 0.0004962137517373934, "loss": 0.2113, "step": 40270 }, { "epoch": 1.67, "grad_norm": 1.3359375, "learning_rate": 0.0004962118711661278, "loss": 0.2289, "step": 40280 }, { "epoch": 1.67, "grad_norm": 1.421875, "learning_rate": 0.0004962099901315183, "loss": 0.2154, "step": 40290 }, { "epoch": 1.67, "grad_norm": 0.83203125, "learning_rate": 0.000496208108633568, "loss": 0.2358, "step": 40300 }, { "epoch": 1.67, "grad_norm": 0.546875, "learning_rate": 0.0004962062266722808, "loss": 0.2707, "step": 40310 }, { "epoch": 1.67, "grad_norm": 0.671875, "learning_rate": 0.0004962043442476599, "loss": 0.2025, "step": 40320 }, { "epoch": 1.67, "grad_norm": 0.484375, "learning_rate": 0.0004962024613597091, "loss": 0.189, "step": 40330 }, { "epoch": 1.67, "grad_norm": 0.765625, "learning_rate": 0.0004962005780084318, "loss": 0.1713, "step": 40340 }, { "epoch": 1.67, "grad_norm": 2.09375, "learning_rate": 0.0004961986941938316, "loss": 0.2291, "step": 40350 }, { "epoch": 1.67, "grad_norm": 0.498046875, "learning_rate": 0.0004961968099159122, "loss": 0.1799, "step": 40360 }, { "epoch": 1.67, "grad_norm": 0.6328125, "learning_rate": 0.0004961949251746768, "loss": 0.2221, "step": 40370 }, { "epoch": 1.67, "grad_norm": 0.458984375, "learning_rate": 0.0004961930399701292, "loss": 0.1898, "step": 40380 }, { "epoch": 1.67, "grad_norm": 0.359375, "learning_rate": 0.0004961911543022729, "loss": 0.2391, "step": 40390 }, { "epoch": 1.67, "grad_norm": 0.357421875, "learning_rate": 0.0004961892681711115, "loss": 0.2079, "step": 40400 }, { "epoch": 1.67, "grad_norm": 0.73046875, "learning_rate": 0.0004961873815766484, "loss": 0.2173, "step": 40410 }, { "epoch": 1.67, "grad_norm": 0.671875, "learning_rate": 0.0004961854945188872, "loss": 0.2676, "step": 40420 }, { "epoch": 1.67, "grad_norm": 0.87890625, "learning_rate": 0.0004961836069978316, "loss": 0.2334, "step": 40430 }, { "epoch": 1.68, "grad_norm": 0.98828125, "learning_rate": 0.000496181719013485, "loss": 0.2003, "step": 40440 }, { "epoch": 1.68, "grad_norm": 0.60546875, "learning_rate": 0.0004961798305658509, "loss": 0.229, "step": 40450 }, { "epoch": 1.68, "grad_norm": 0.58203125, "learning_rate": 0.0004961779416549331, "loss": 0.2484, "step": 40460 }, { "epoch": 1.68, "grad_norm": 0.58203125, "learning_rate": 0.0004961760522807348, "loss": 0.2288, "step": 40470 }, { "epoch": 1.68, "grad_norm": 0.53515625, "learning_rate": 0.0004961741624432598, "loss": 0.2432, "step": 40480 }, { "epoch": 1.68, "grad_norm": 1.5625, "learning_rate": 0.0004961722721425117, "loss": 0.2313, "step": 40490 }, { "epoch": 1.68, "grad_norm": 5.21875, "learning_rate": 0.000496170381378494, "loss": 0.2333, "step": 40500 }, { "epoch": 1.68, "grad_norm": 0.86328125, "learning_rate": 0.0004961684901512102, "loss": 0.1828, "step": 40510 }, { "epoch": 1.68, "grad_norm": 0.78515625, "learning_rate": 0.0004961665984606638, "loss": 0.2367, "step": 40520 }, { "epoch": 1.68, "grad_norm": 0.2734375, "learning_rate": 0.0004961647063068585, "loss": 0.2041, "step": 40530 }, { "epoch": 1.68, "grad_norm": 0.310546875, "learning_rate": 0.0004961628136897978, "loss": 0.2168, "step": 40540 }, { "epoch": 1.68, "grad_norm": 0.458984375, "learning_rate": 0.0004961609206094853, "loss": 0.2084, "step": 40550 }, { "epoch": 1.68, "grad_norm": 0.62890625, "learning_rate": 0.0004961590270659244, "loss": 0.2431, "step": 40560 }, { "epoch": 1.68, "grad_norm": 0.5234375, "learning_rate": 0.000496157133059119, "loss": 0.2277, "step": 40570 }, { "epoch": 1.68, "grad_norm": 0.890625, "learning_rate": 0.0004961552385890723, "loss": 0.2112, "step": 40580 }, { "epoch": 1.68, "grad_norm": 0.84765625, "learning_rate": 0.000496153343655788, "loss": 0.2303, "step": 40590 }, { "epoch": 1.68, "grad_norm": 0.55078125, "learning_rate": 0.0004961514482592699, "loss": 0.2913, "step": 40600 }, { "epoch": 1.68, "grad_norm": 0.58984375, "learning_rate": 0.0004961495523995211, "loss": 0.251, "step": 40610 }, { "epoch": 1.68, "grad_norm": 0.6796875, "learning_rate": 0.0004961476560765456, "loss": 0.2111, "step": 40620 }, { "epoch": 1.68, "grad_norm": 2.3125, "learning_rate": 0.0004961457592903467, "loss": 0.2221, "step": 40630 }, { "epoch": 1.68, "grad_norm": 0.703125, "learning_rate": 0.0004961438620409281, "loss": 0.2591, "step": 40640 }, { "epoch": 1.68, "grad_norm": 0.84765625, "learning_rate": 0.0004961419643282933, "loss": 0.2459, "step": 40650 }, { "epoch": 1.68, "grad_norm": 0.53515625, "learning_rate": 0.000496140066152446, "loss": 0.2709, "step": 40660 }, { "epoch": 1.68, "grad_norm": 1.1328125, "learning_rate": 0.0004961381675133896, "loss": 0.2244, "step": 40670 }, { "epoch": 1.68, "grad_norm": 0.83984375, "learning_rate": 0.0004961362684111277, "loss": 0.2174, "step": 40680 }, { "epoch": 1.69, "grad_norm": 0.353515625, "learning_rate": 0.000496134368845664, "loss": 0.2403, "step": 40690 }, { "epoch": 1.69, "grad_norm": 0.37890625, "learning_rate": 0.0004961324688170021, "loss": 0.1625, "step": 40700 }, { "epoch": 1.69, "grad_norm": 0.71875, "learning_rate": 0.0004961305683251452, "loss": 0.1845, "step": 40710 }, { "epoch": 1.69, "grad_norm": 2.28125, "learning_rate": 0.0004961286673700974, "loss": 0.1744, "step": 40720 }, { "epoch": 1.69, "grad_norm": 0.8125, "learning_rate": 0.0004961267659518619, "loss": 0.2237, "step": 40730 }, { "epoch": 1.69, "grad_norm": 0.490234375, "learning_rate": 0.0004961248640704424, "loss": 0.2316, "step": 40740 }, { "epoch": 1.69, "grad_norm": 0.8359375, "learning_rate": 0.0004961229617258426, "loss": 0.2557, "step": 40750 }, { "epoch": 1.69, "grad_norm": 0.8984375, "learning_rate": 0.0004961210589180658, "loss": 0.2861, "step": 40760 }, { "epoch": 1.69, "grad_norm": 1.0546875, "learning_rate": 0.0004961191556471159, "loss": 0.1975, "step": 40770 }, { "epoch": 1.69, "grad_norm": 1.5703125, "learning_rate": 0.0004961172519129962, "loss": 0.2069, "step": 40780 }, { "epoch": 1.69, "grad_norm": 0.90625, "learning_rate": 0.0004961153477157105, "loss": 0.2684, "step": 40790 }, { "epoch": 1.69, "grad_norm": 0.1484375, "learning_rate": 0.0004961134430552622, "loss": 0.2332, "step": 40800 }, { "epoch": 1.69, "grad_norm": 0.796875, "learning_rate": 0.0004961115379316551, "loss": 0.2107, "step": 40810 }, { "epoch": 1.69, "grad_norm": 0.71875, "learning_rate": 0.0004961096323448927, "loss": 0.1992, "step": 40820 }, { "epoch": 1.69, "grad_norm": 2.65625, "learning_rate": 0.0004961077262949784, "loss": 0.2467, "step": 40830 }, { "epoch": 1.69, "grad_norm": 0.86328125, "learning_rate": 0.000496105819781916, "loss": 0.2608, "step": 40840 }, { "epoch": 1.69, "grad_norm": 0.6171875, "learning_rate": 0.0004961039128057091, "loss": 0.2217, "step": 40850 }, { "epoch": 1.69, "grad_norm": 0.54296875, "learning_rate": 0.0004961020053663611, "loss": 0.2174, "step": 40860 }, { "epoch": 1.69, "grad_norm": 0.380859375, "learning_rate": 0.0004961000974638757, "loss": 0.1511, "step": 40870 }, { "epoch": 1.69, "grad_norm": 0.6953125, "learning_rate": 0.0004960981890982565, "loss": 0.2247, "step": 40880 }, { "epoch": 1.69, "grad_norm": 0.55078125, "learning_rate": 0.0004960962802695072, "loss": 0.269, "step": 40890 }, { "epoch": 1.69, "grad_norm": 0.69140625, "learning_rate": 0.0004960943709776311, "loss": 0.2349, "step": 40900 }, { "epoch": 1.69, "grad_norm": 1.296875, "learning_rate": 0.0004960924612226321, "loss": 0.2036, "step": 40910 }, { "epoch": 1.69, "grad_norm": 0.427734375, "learning_rate": 0.0004960905510045136, "loss": 0.1978, "step": 40920 }, { "epoch": 1.7, "grad_norm": 0.50390625, "learning_rate": 0.0004960886403232793, "loss": 0.2312, "step": 40930 }, { "epoch": 1.7, "grad_norm": 2.078125, "learning_rate": 0.0004960867291789327, "loss": 0.2576, "step": 40940 }, { "epoch": 1.7, "grad_norm": 0.59765625, "learning_rate": 0.0004960848175714775, "loss": 0.2704, "step": 40950 }, { "epoch": 1.7, "grad_norm": 0.2294921875, "learning_rate": 0.0004960829055009173, "loss": 0.2106, "step": 40960 }, { "epoch": 1.7, "grad_norm": 1.1015625, "learning_rate": 0.0004960809929672555, "loss": 0.1571, "step": 40970 }, { "epoch": 1.7, "grad_norm": 0.74609375, "learning_rate": 0.0004960790799704959, "loss": 0.234, "step": 40980 }, { "epoch": 1.7, "grad_norm": 0.474609375, "learning_rate": 0.0004960771665106421, "loss": 0.2104, "step": 40990 }, { "epoch": 1.7, "grad_norm": 0.5234375, "learning_rate": 0.0004960752525876976, "loss": 0.2817, "step": 41000 }, { "epoch": 1.7, "grad_norm": 0.78515625, "learning_rate": 0.0004960733382016661, "loss": 0.181, "step": 41010 }, { "epoch": 1.7, "grad_norm": 0.81640625, "learning_rate": 0.0004960714233525512, "loss": 0.2442, "step": 41020 }, { "epoch": 1.7, "grad_norm": 2.125, "learning_rate": 0.0004960695080403563, "loss": 0.2439, "step": 41030 }, { "epoch": 1.7, "grad_norm": 0.640625, "learning_rate": 0.0004960675922650853, "loss": 0.2108, "step": 41040 }, { "epoch": 1.7, "grad_norm": 1.3125, "learning_rate": 0.0004960656760267416, "loss": 0.2781, "step": 41050 }, { "epoch": 1.7, "grad_norm": 0.640625, "learning_rate": 0.0004960637593253288, "loss": 0.2701, "step": 41060 }, { "epoch": 1.7, "grad_norm": 1.6875, "learning_rate": 0.0004960618421608506, "loss": 0.2248, "step": 41070 }, { "epoch": 1.7, "grad_norm": 0.8671875, "learning_rate": 0.0004960599245333107, "loss": 0.256, "step": 41080 }, { "epoch": 1.7, "grad_norm": 0.88671875, "learning_rate": 0.0004960580064427125, "loss": 0.1663, "step": 41090 }, { "epoch": 1.7, "grad_norm": 0.58203125, "learning_rate": 0.0004960560878890598, "loss": 0.1989, "step": 41100 }, { "epoch": 1.7, "grad_norm": 0.69921875, "learning_rate": 0.000496054168872356, "loss": 0.2101, "step": 41110 }, { "epoch": 1.7, "grad_norm": 1.125, "learning_rate": 0.0004960522493926048, "loss": 0.2232, "step": 41120 }, { "epoch": 1.7, "grad_norm": 0.5546875, "learning_rate": 0.00049605032944981, "loss": 0.2435, "step": 41130 }, { "epoch": 1.7, "grad_norm": 1.28125, "learning_rate": 0.0004960484090439749, "loss": 0.2385, "step": 41140 }, { "epoch": 1.7, "grad_norm": 0.81640625, "learning_rate": 0.0004960464881751033, "loss": 0.2434, "step": 41150 }, { "epoch": 1.7, "grad_norm": 0.373046875, "learning_rate": 0.0004960445668431989, "loss": 0.2359, "step": 41160 }, { "epoch": 1.71, "grad_norm": 0.451171875, "learning_rate": 0.000496042645048265, "loss": 0.2121, "step": 41170 }, { "epoch": 1.71, "grad_norm": 0.9375, "learning_rate": 0.0004960407227903056, "loss": 0.2072, "step": 41180 }, { "epoch": 1.71, "grad_norm": 0.44140625, "learning_rate": 0.000496038800069324, "loss": 0.2595, "step": 41190 }, { "epoch": 1.71, "grad_norm": 0.97265625, "learning_rate": 0.000496036876885324, "loss": 0.26, "step": 41200 }, { "epoch": 1.71, "grad_norm": 0.5390625, "learning_rate": 0.0004960349532383092, "loss": 0.1994, "step": 41210 }, { "epoch": 1.71, "grad_norm": 0.7421875, "learning_rate": 0.0004960330291282831, "loss": 0.242, "step": 41220 }, { "epoch": 1.71, "grad_norm": 0.67578125, "learning_rate": 0.0004960311045552494, "loss": 0.2226, "step": 41230 }, { "epoch": 1.71, "grad_norm": 0.84375, "learning_rate": 0.0004960291795192118, "loss": 0.2278, "step": 41240 }, { "epoch": 1.71, "grad_norm": 0.88671875, "learning_rate": 0.0004960272540201738, "loss": 0.2212, "step": 41250 }, { "epoch": 1.71, "grad_norm": 0.5234375, "learning_rate": 0.0004960253280581391, "loss": 0.2162, "step": 41260 }, { "epoch": 1.71, "grad_norm": 0.61328125, "learning_rate": 0.0004960234016331114, "loss": 0.1867, "step": 41270 }, { "epoch": 1.71, "grad_norm": 0.31640625, "learning_rate": 0.0004960214747450941, "loss": 0.2237, "step": 41280 }, { "epoch": 1.71, "grad_norm": 0.45703125, "learning_rate": 0.000496019547394091, "loss": 0.2632, "step": 41290 }, { "epoch": 1.71, "grad_norm": 0.6953125, "learning_rate": 0.0004960176195801056, "loss": 0.2152, "step": 41300 }, { "epoch": 1.71, "grad_norm": 0.6171875, "learning_rate": 0.0004960156913031417, "loss": 0.1876, "step": 41310 }, { "epoch": 1.71, "grad_norm": 0.59375, "learning_rate": 0.0004960137625632028, "loss": 0.2508, "step": 41320 }, { "epoch": 1.71, "grad_norm": 0.63671875, "learning_rate": 0.0004960118333602926, "loss": 0.2023, "step": 41330 }, { "epoch": 1.71, "grad_norm": 0.2470703125, "learning_rate": 0.0004960099036944147, "loss": 0.2154, "step": 41340 }, { "epoch": 1.71, "grad_norm": 0.74609375, "learning_rate": 0.0004960079735655727, "loss": 0.2096, "step": 41350 }, { "epoch": 1.71, "grad_norm": 0.6953125, "learning_rate": 0.0004960060429737703, "loss": 0.1973, "step": 41360 }, { "epoch": 1.71, "grad_norm": 0.33984375, "learning_rate": 0.000496004111919011, "loss": 0.1665, "step": 41370 }, { "epoch": 1.71, "grad_norm": 0.6484375, "learning_rate": 0.0004960021804012987, "loss": 0.2249, "step": 41380 }, { "epoch": 1.71, "grad_norm": 0.412109375, "learning_rate": 0.0004960002484206367, "loss": 0.2756, "step": 41390 }, { "epoch": 1.71, "grad_norm": 0.859375, "learning_rate": 0.0004959983159770288, "loss": 0.233, "step": 41400 }, { "epoch": 1.72, "grad_norm": 0.578125, "learning_rate": 0.0004959963830704787, "loss": 0.2335, "step": 41410 }, { "epoch": 1.72, "grad_norm": 0.86328125, "learning_rate": 0.0004959944497009899, "loss": 0.2248, "step": 41420 }, { "epoch": 1.72, "grad_norm": 1.46875, "learning_rate": 0.0004959925158685662, "loss": 0.1753, "step": 41430 }, { "epoch": 1.72, "grad_norm": 0.5625, "learning_rate": 0.0004959905815732111, "loss": 0.2309, "step": 41440 }, { "epoch": 1.72, "grad_norm": 0.50390625, "learning_rate": 0.0004959886468149284, "loss": 0.2341, "step": 41450 }, { "epoch": 1.72, "grad_norm": 0.435546875, "learning_rate": 0.0004959867115937215, "loss": 0.2237, "step": 41460 }, { "epoch": 1.72, "grad_norm": 0.8515625, "learning_rate": 0.0004959847759095943, "loss": 0.2236, "step": 41470 }, { "epoch": 1.72, "grad_norm": 0.8984375, "learning_rate": 0.0004959828397625501, "loss": 0.2042, "step": 41480 }, { "epoch": 1.72, "grad_norm": 1.0703125, "learning_rate": 0.0004959809031525929, "loss": 0.2633, "step": 41490 }, { "epoch": 1.72, "grad_norm": 0.66796875, "learning_rate": 0.0004959789660797262, "loss": 0.2185, "step": 41500 }, { "epoch": 1.72, "grad_norm": 0.5625, "learning_rate": 0.0004959770285439536, "loss": 0.198, "step": 41510 }, { "epoch": 1.72, "grad_norm": 0.62890625, "learning_rate": 0.0004959750905452789, "loss": 0.2135, "step": 41520 }, { "epoch": 1.72, "grad_norm": 1.0390625, "learning_rate": 0.0004959731520837056, "loss": 0.271, "step": 41530 }, { "epoch": 1.72, "grad_norm": 1.1796875, "learning_rate": 0.0004959712131592373, "loss": 0.2113, "step": 41540 }, { "epoch": 1.72, "grad_norm": 1.203125, "learning_rate": 0.0004959692737718779, "loss": 0.2323, "step": 41550 }, { "epoch": 1.72, "grad_norm": 0.76171875, "learning_rate": 0.0004959673339216307, "loss": 0.2136, "step": 41560 }, { "epoch": 1.72, "grad_norm": 0.74609375, "learning_rate": 0.0004959653936084996, "loss": 0.2218, "step": 41570 }, { "epoch": 1.72, "grad_norm": 0.373046875, "learning_rate": 0.0004959634528324883, "loss": 0.2502, "step": 41580 }, { "epoch": 1.72, "grad_norm": 0.953125, "learning_rate": 0.0004959615115936002, "loss": 0.2682, "step": 41590 }, { "epoch": 1.72, "grad_norm": 0.93359375, "learning_rate": 0.0004959595698918393, "loss": 0.293, "step": 41600 }, { "epoch": 1.72, "grad_norm": 0.2255859375, "learning_rate": 0.0004959576277272089, "loss": 0.2166, "step": 41610 }, { "epoch": 1.72, "grad_norm": 0.9765625, "learning_rate": 0.0004959556850997128, "loss": 0.2392, "step": 41620 }, { "epoch": 1.72, "grad_norm": 0.94140625, "learning_rate": 0.0004959537420093546, "loss": 0.2433, "step": 41630 }, { "epoch": 1.72, "grad_norm": 0.72265625, "learning_rate": 0.0004959517984561381, "loss": 0.1845, "step": 41640 }, { "epoch": 1.73, "grad_norm": 0.55078125, "learning_rate": 0.0004959498544400669, "loss": 0.2361, "step": 41650 }, { "epoch": 1.73, "grad_norm": 1.109375, "learning_rate": 0.0004959479099611447, "loss": 0.2494, "step": 41660 }, { "epoch": 1.73, "grad_norm": 0.310546875, "learning_rate": 0.000495945965019375, "loss": 0.2742, "step": 41670 }, { "epoch": 1.73, "grad_norm": 1.25, "learning_rate": 0.0004959440196147615, "loss": 0.2101, "step": 41680 }, { "epoch": 1.73, "grad_norm": 0.6953125, "learning_rate": 0.000495942073747308, "loss": 0.2704, "step": 41690 }, { "epoch": 1.73, "grad_norm": 0.5546875, "learning_rate": 0.000495940127417018, "loss": 0.17, "step": 41700 }, { "epoch": 1.73, "grad_norm": 0.60546875, "learning_rate": 0.0004959381806238953, "loss": 0.2616, "step": 41710 }, { "epoch": 1.73, "grad_norm": 0.369140625, "learning_rate": 0.0004959362333679436, "loss": 0.181, "step": 41720 }, { "epoch": 1.73, "grad_norm": 0.68359375, "learning_rate": 0.0004959342856491663, "loss": 0.2084, "step": 41730 }, { "epoch": 1.73, "grad_norm": 0.984375, "learning_rate": 0.0004959323374675673, "loss": 0.2392, "step": 41740 }, { "epoch": 1.73, "grad_norm": 0.84375, "learning_rate": 0.0004959303888231502, "loss": 0.1728, "step": 41750 }, { "epoch": 1.73, "grad_norm": 0.69921875, "learning_rate": 0.0004959284397159186, "loss": 0.2796, "step": 41760 }, { "epoch": 1.73, "grad_norm": 0.484375, "learning_rate": 0.0004959264901458763, "loss": 0.195, "step": 41770 }, { "epoch": 1.73, "grad_norm": 0.7109375, "learning_rate": 0.0004959245401130269, "loss": 0.1989, "step": 41780 }, { "epoch": 1.73, "grad_norm": 2.140625, "learning_rate": 0.0004959225896173741, "loss": 0.1937, "step": 41790 }, { "epoch": 1.73, "grad_norm": 1.3359375, "learning_rate": 0.0004959206386589215, "loss": 0.1783, "step": 41800 }, { "epoch": 1.73, "grad_norm": 0.79296875, "learning_rate": 0.0004959186872376728, "loss": 0.219, "step": 41810 }, { "epoch": 1.73, "grad_norm": 0.396484375, "learning_rate": 0.0004959167353536318, "loss": 0.2117, "step": 41820 }, { "epoch": 1.73, "grad_norm": 0.8671875, "learning_rate": 0.0004959147830068019, "loss": 0.241, "step": 41830 }, { "epoch": 1.73, "grad_norm": 0.53515625, "learning_rate": 0.000495912830197187, "loss": 0.1972, "step": 41840 }, { "epoch": 1.73, "grad_norm": 0.56640625, "learning_rate": 0.0004959108769247907, "loss": 0.1595, "step": 41850 }, { "epoch": 1.73, "grad_norm": 2.390625, "learning_rate": 0.0004959089231896167, "loss": 0.2558, "step": 41860 }, { "epoch": 1.73, "grad_norm": 0.4140625, "learning_rate": 0.0004959069689916688, "loss": 0.257, "step": 41870 }, { "epoch": 1.73, "grad_norm": 1.2109375, "learning_rate": 0.0004959050143309503, "loss": 0.2214, "step": 41880 }, { "epoch": 1.74, "grad_norm": 0.65234375, "learning_rate": 0.0004959030592074653, "loss": 0.2429, "step": 41890 }, { "epoch": 1.74, "grad_norm": 0.003509521484375, "learning_rate": 0.0004959011036212172, "loss": 0.1937, "step": 41900 }, { "epoch": 1.74, "grad_norm": 0.423828125, "learning_rate": 0.0004958991475722098, "loss": 0.274, "step": 41910 }, { "epoch": 1.74, "grad_norm": 0.46484375, "learning_rate": 0.0004958971910604468, "loss": 0.2137, "step": 41920 }, { "epoch": 1.74, "grad_norm": 0.6015625, "learning_rate": 0.0004958952340859318, "loss": 0.2522, "step": 41930 }, { "epoch": 1.74, "grad_norm": 0.75, "learning_rate": 0.0004958932766486686, "loss": 0.2594, "step": 41940 }, { "epoch": 1.74, "grad_norm": 0.91015625, "learning_rate": 0.0004958913187486606, "loss": 0.2274, "step": 41950 }, { "epoch": 1.74, "grad_norm": 0.703125, "learning_rate": 0.0004958893603859119, "loss": 0.2133, "step": 41960 }, { "epoch": 1.74, "grad_norm": 0.361328125, "learning_rate": 0.0004958874015604258, "loss": 0.2457, "step": 41970 }, { "epoch": 1.74, "grad_norm": 0.359375, "learning_rate": 0.0004958854422722064, "loss": 0.1895, "step": 41980 }, { "epoch": 1.74, "grad_norm": 0.79296875, "learning_rate": 0.000495883482521257, "loss": 0.2872, "step": 41990 }, { "epoch": 1.74, "grad_norm": 0.9765625, "learning_rate": 0.0004958815223075814, "loss": 0.2408, "step": 42000 }, { "epoch": 1.74, "grad_norm": 0.4765625, "learning_rate": 0.0004958795616311835, "loss": 0.2117, "step": 42010 }, { "epoch": 1.74, "grad_norm": 0.87109375, "learning_rate": 0.0004958776004920667, "loss": 0.2947, "step": 42020 }, { "epoch": 1.74, "grad_norm": 0.0, "learning_rate": 0.0004958756388902348, "loss": 0.2093, "step": 42030 }, { "epoch": 1.74, "grad_norm": 0.5390625, "learning_rate": 0.0004958736768256915, "loss": 0.1959, "step": 42040 }, { "epoch": 1.74, "grad_norm": 0.8046875, "learning_rate": 0.0004958717142984404, "loss": 0.2742, "step": 42050 }, { "epoch": 1.74, "grad_norm": 0.90625, "learning_rate": 0.0004958697513084855, "loss": 0.2233, "step": 42060 }, { "epoch": 1.74, "grad_norm": 0.42578125, "learning_rate": 0.0004958677878558302, "loss": 0.2052, "step": 42070 }, { "epoch": 1.74, "grad_norm": 1.171875, "learning_rate": 0.0004958658239404782, "loss": 0.194, "step": 42080 }, { "epoch": 1.74, "grad_norm": 0.474609375, "learning_rate": 0.0004958638595624333, "loss": 0.2168, "step": 42090 }, { "epoch": 1.74, "grad_norm": 0.85546875, "learning_rate": 0.0004958618947216992, "loss": 0.2542, "step": 42100 }, { "epoch": 1.74, "grad_norm": 0.76953125, "learning_rate": 0.0004958599294182796, "loss": 0.1895, "step": 42110 }, { "epoch": 1.74, "grad_norm": 0.99609375, "learning_rate": 0.0004958579636521781, "loss": 0.2348, "step": 42120 }, { "epoch": 1.75, "grad_norm": 0.734375, "learning_rate": 0.0004958559974233984, "loss": 0.2046, "step": 42130 }, { "epoch": 1.75, "grad_norm": 0.91796875, "learning_rate": 0.0004958540307319443, "loss": 0.2373, "step": 42140 }, { "epoch": 1.75, "grad_norm": 0.392578125, "learning_rate": 0.0004958520635778196, "loss": 0.23, "step": 42150 }, { "epoch": 1.75, "grad_norm": 1.3203125, "learning_rate": 0.0004958500959610278, "loss": 0.243, "step": 42160 }, { "epoch": 1.75, "grad_norm": 0.86328125, "learning_rate": 0.0004958481278815726, "loss": 0.205, "step": 42170 }, { "epoch": 1.75, "grad_norm": 0.55078125, "learning_rate": 0.0004958461593394578, "loss": 0.2149, "step": 42180 }, { "epoch": 1.75, "grad_norm": 0.59375, "learning_rate": 0.0004958441903346871, "loss": 0.1973, "step": 42190 }, { "epoch": 1.75, "grad_norm": 0.96875, "learning_rate": 0.0004958422208672642, "loss": 0.2218, "step": 42200 }, { "epoch": 1.75, "grad_norm": 0.55078125, "learning_rate": 0.0004958402509371928, "loss": 0.2736, "step": 42210 }, { "epoch": 1.75, "grad_norm": 0.85546875, "learning_rate": 0.0004958382805444764, "loss": 0.2161, "step": 42220 }, { "epoch": 1.75, "grad_norm": 0.62109375, "learning_rate": 0.0004958363096891191, "loss": 0.2555, "step": 42230 }, { "epoch": 1.75, "grad_norm": 0.35546875, "learning_rate": 0.0004958343383711244, "loss": 0.2486, "step": 42240 }, { "epoch": 1.75, "grad_norm": 0.388671875, "learning_rate": 0.000495832366590496, "loss": 0.2409, "step": 42250 }, { "epoch": 1.75, "grad_norm": 0.1845703125, "learning_rate": 0.0004958303943472376, "loss": 0.259, "step": 42260 }, { "epoch": 1.75, "grad_norm": 0.427734375, "learning_rate": 0.000495828421641353, "loss": 0.2215, "step": 42270 }, { "epoch": 1.75, "grad_norm": 0.5390625, "learning_rate": 0.0004958264484728459, "loss": 0.2313, "step": 42280 }, { "epoch": 1.75, "grad_norm": 0.55078125, "learning_rate": 0.0004958244748417198, "loss": 0.2145, "step": 42290 }, { "epoch": 1.75, "grad_norm": 0.54296875, "learning_rate": 0.0004958225007479787, "loss": 0.2573, "step": 42300 }, { "epoch": 1.75, "grad_norm": 0.84765625, "learning_rate": 0.0004958205261916262, "loss": 0.2709, "step": 42310 }, { "epoch": 1.75, "grad_norm": 0.189453125, "learning_rate": 0.0004958185511726661, "loss": 0.2335, "step": 42320 }, { "epoch": 1.75, "grad_norm": 0.6875, "learning_rate": 0.0004958165756911018, "loss": 0.203, "step": 42330 }, { "epoch": 1.75, "grad_norm": 1.484375, "learning_rate": 0.0004958145997469375, "loss": 0.2068, "step": 42340 }, { "epoch": 1.75, "grad_norm": 0.84765625, "learning_rate": 0.0004958126233401766, "loss": 0.2758, "step": 42350 }, { "epoch": 1.75, "grad_norm": 0.4765625, "learning_rate": 0.0004958106464708228, "loss": 0.2403, "step": 42360 }, { "epoch": 1.75, "grad_norm": 1.3046875, "learning_rate": 0.0004958086691388799, "loss": 0.1822, "step": 42370 }, { "epoch": 1.76, "grad_norm": 1.359375, "learning_rate": 0.0004958066913443517, "loss": 0.2124, "step": 42380 }, { "epoch": 1.76, "grad_norm": 0.486328125, "learning_rate": 0.0004958047130872419, "loss": 0.2417, "step": 42390 }, { "epoch": 1.76, "grad_norm": 0.51171875, "learning_rate": 0.0004958027343675541, "loss": 0.2178, "step": 42400 }, { "epoch": 1.76, "grad_norm": 0.8828125, "learning_rate": 0.000495800755185292, "loss": 0.2033, "step": 42410 }, { "epoch": 1.76, "grad_norm": 0.84375, "learning_rate": 0.0004957987755404596, "loss": 0.2539, "step": 42420 }, { "epoch": 1.76, "grad_norm": 1.171875, "learning_rate": 0.0004957967954330603, "loss": 0.1937, "step": 42430 }, { "epoch": 1.76, "grad_norm": 0.9609375, "learning_rate": 0.000495794814863098, "loss": 0.2562, "step": 42440 }, { "epoch": 1.76, "grad_norm": 1.59375, "learning_rate": 0.0004957928338305765, "loss": 0.2323, "step": 42450 }, { "epoch": 1.76, "grad_norm": 0.609375, "learning_rate": 0.0004957908523354995, "loss": 0.2053, "step": 42460 }, { "epoch": 1.76, "grad_norm": 0.6875, "learning_rate": 0.0004957888703778704, "loss": 0.2252, "step": 42470 }, { "epoch": 1.76, "grad_norm": 0.87109375, "learning_rate": 0.0004957868879576935, "loss": 0.2376, "step": 42480 }, { "epoch": 1.76, "grad_norm": 1.0390625, "learning_rate": 0.000495784905074972, "loss": 0.2084, "step": 42490 }, { "epoch": 1.76, "grad_norm": 1.03125, "learning_rate": 0.0004957829217297099, "loss": 0.2477, "step": 42500 }, { "epoch": 1.76, "grad_norm": 0.73046875, "learning_rate": 0.0004957809379219108, "loss": 0.2022, "step": 42510 }, { "epoch": 1.76, "grad_norm": 0.0006103515625, "learning_rate": 0.0004957789536515787, "loss": 0.2125, "step": 42520 }, { "epoch": 1.76, "grad_norm": 0.875, "learning_rate": 0.000495776968918717, "loss": 0.2755, "step": 42530 }, { "epoch": 1.76, "grad_norm": 0.65234375, "learning_rate": 0.0004957749837233297, "loss": 0.2386, "step": 42540 }, { "epoch": 1.76, "grad_norm": 1.5703125, "learning_rate": 0.0004957729980654204, "loss": 0.2366, "step": 42550 }, { "epoch": 1.76, "grad_norm": 0.94140625, "learning_rate": 0.0004957710119449927, "loss": 0.2244, "step": 42560 }, { "epoch": 1.76, "grad_norm": 0.474609375, "learning_rate": 0.0004957690253620507, "loss": 0.2546, "step": 42570 }, { "epoch": 1.76, "grad_norm": 0.310546875, "learning_rate": 0.0004957670383165979, "loss": 0.2512, "step": 42580 }, { "epoch": 1.76, "grad_norm": 0.375, "learning_rate": 0.000495765050808638, "loss": 0.251, "step": 42590 }, { "epoch": 1.76, "grad_norm": 0.44140625, "learning_rate": 0.0004957630628381749, "loss": 0.2292, "step": 42600 }, { "epoch": 1.76, "grad_norm": 0.326171875, "learning_rate": 0.0004957610744052121, "loss": 0.27, "step": 42610 }, { "epoch": 1.77, "grad_norm": 0.400390625, "learning_rate": 0.0004957590855097536, "loss": 0.2425, "step": 42620 }, { "epoch": 1.77, "grad_norm": 1.3203125, "learning_rate": 0.0004957570961518031, "loss": 0.2005, "step": 42630 }, { "epoch": 1.77, "grad_norm": 1.2734375, "learning_rate": 0.0004957551063313641, "loss": 0.2327, "step": 42640 }, { "epoch": 1.77, "grad_norm": 0.373046875, "learning_rate": 0.0004957531160484407, "loss": 0.2174, "step": 42650 }, { "epoch": 1.77, "grad_norm": 0.46875, "learning_rate": 0.0004957511253030365, "loss": 0.2434, "step": 42660 }, { "epoch": 1.77, "grad_norm": 0.546875, "learning_rate": 0.0004957491340951551, "loss": 0.243, "step": 42670 }, { "epoch": 1.77, "grad_norm": 0.6015625, "learning_rate": 0.0004957471424248004, "loss": 0.1998, "step": 42680 }, { "epoch": 1.77, "grad_norm": 0.9609375, "learning_rate": 0.0004957451502919761, "loss": 0.233, "step": 42690 }, { "epoch": 1.77, "grad_norm": 0.87890625, "learning_rate": 0.000495743157696686, "loss": 0.1985, "step": 42700 }, { "epoch": 1.77, "grad_norm": 0.75, "learning_rate": 0.0004957411646389338, "loss": 0.3032, "step": 42710 }, { "epoch": 1.77, "grad_norm": 0.3984375, "learning_rate": 0.0004957391711187234, "loss": 0.2709, "step": 42720 }, { "epoch": 1.77, "grad_norm": 0.333984375, "learning_rate": 0.0004957371771360582, "loss": 0.2102, "step": 42730 }, { "epoch": 1.77, "grad_norm": 1.2578125, "learning_rate": 0.0004957351826909423, "loss": 0.241, "step": 42740 }, { "epoch": 1.77, "grad_norm": 0.640625, "learning_rate": 0.0004957331877833793, "loss": 0.2708, "step": 42750 }, { "epoch": 1.77, "grad_norm": 0.72265625, "learning_rate": 0.0004957311924133729, "loss": 0.2049, "step": 42760 }, { "epoch": 1.77, "grad_norm": 0.51953125, "learning_rate": 0.0004957291965809271, "loss": 0.2119, "step": 42770 }, { "epoch": 1.77, "grad_norm": 0.4765625, "learning_rate": 0.0004957272002860453, "loss": 0.2159, "step": 42780 }, { "epoch": 1.77, "grad_norm": 1.140625, "learning_rate": 0.0004957252035287315, "loss": 0.2189, "step": 42790 }, { "epoch": 1.77, "grad_norm": 0.72265625, "learning_rate": 0.0004957232063089895, "loss": 0.2183, "step": 42800 }, { "epoch": 1.77, "grad_norm": 0.64453125, "learning_rate": 0.0004957212086268229, "loss": 0.1821, "step": 42810 }, { "epoch": 1.77, "grad_norm": 0.52734375, "learning_rate": 0.0004957192104822355, "loss": 0.2455, "step": 42820 }, { "epoch": 1.77, "grad_norm": 0.6875, "learning_rate": 0.0004957172118752311, "loss": 0.1954, "step": 42830 }, { "epoch": 1.77, "grad_norm": 0.37109375, "learning_rate": 0.0004957152128058134, "loss": 0.242, "step": 42840 }, { "epoch": 1.77, "grad_norm": 2.40625, "learning_rate": 0.0004957132132739863, "loss": 0.2524, "step": 42850 }, { "epoch": 1.78, "grad_norm": 0.5078125, "learning_rate": 0.0004957112132797535, "loss": 0.2472, "step": 42860 }, { "epoch": 1.78, "grad_norm": 0.6875, "learning_rate": 0.0004957092128231185, "loss": 0.2487, "step": 42870 }, { "epoch": 1.78, "grad_norm": 0.62890625, "learning_rate": 0.0004957072119040855, "loss": 0.2387, "step": 42880 }, { "epoch": 1.78, "grad_norm": 0.60546875, "learning_rate": 0.000495705210522658, "loss": 0.2569, "step": 42890 }, { "epoch": 1.78, "grad_norm": 0.6796875, "learning_rate": 0.0004957032086788398, "loss": 0.231, "step": 42900 }, { "epoch": 1.78, "grad_norm": 3.25, "learning_rate": 0.0004957012063726348, "loss": 0.2282, "step": 42910 }, { "epoch": 1.78, "grad_norm": 0.2216796875, "learning_rate": 0.0004956992036040465, "loss": 0.1809, "step": 42920 }, { "epoch": 1.78, "grad_norm": 0.478515625, "learning_rate": 0.0004956972003730789, "loss": 0.2007, "step": 42930 }, { "epoch": 1.78, "grad_norm": 0.361328125, "learning_rate": 0.0004956951966797359, "loss": 0.1502, "step": 42940 }, { "epoch": 1.78, "grad_norm": 0.59375, "learning_rate": 0.0004956931925240208, "loss": 0.1813, "step": 42950 }, { "epoch": 1.78, "grad_norm": 0.68359375, "learning_rate": 0.0004956911879059377, "loss": 0.2473, "step": 42960 }, { "epoch": 1.78, "grad_norm": 0.5390625, "learning_rate": 0.0004956891828254903, "loss": 0.24, "step": 42970 }, { "epoch": 1.78, "grad_norm": 0.4453125, "learning_rate": 0.0004956871772826825, "loss": 0.2179, "step": 42980 }, { "epoch": 1.78, "grad_norm": 0.9296875, "learning_rate": 0.0004956851712775179, "loss": 0.2468, "step": 42990 }, { "epoch": 1.78, "grad_norm": 0.40234375, "learning_rate": 0.0004956831648100004, "loss": 0.2439, "step": 43000 }, { "epoch": 1.78, "grad_norm": 0.55859375, "learning_rate": 0.0004956811578801336, "loss": 0.1998, "step": 43010 }, { "epoch": 1.78, "grad_norm": 1.28125, "learning_rate": 0.0004956791504879216, "loss": 0.2466, "step": 43020 }, { "epoch": 1.78, "grad_norm": 0.421875, "learning_rate": 0.0004956771426333677, "loss": 0.2192, "step": 43030 }, { "epoch": 1.78, "grad_norm": 1.7265625, "learning_rate": 0.0004956751343164761, "loss": 0.1666, "step": 43040 }, { "epoch": 1.78, "grad_norm": 0.5546875, "learning_rate": 0.0004956731255372504, "loss": 0.2317, "step": 43050 }, { "epoch": 1.78, "grad_norm": 0.42578125, "learning_rate": 0.0004956711162956945, "loss": 0.2165, "step": 43060 }, { "epoch": 1.78, "grad_norm": 0.8671875, "learning_rate": 0.000495669106591812, "loss": 0.3197, "step": 43070 }, { "epoch": 1.78, "grad_norm": 0.92578125, "learning_rate": 0.0004956670964256067, "loss": 0.2605, "step": 43080 }, { "epoch": 1.78, "grad_norm": 0.828125, "learning_rate": 0.0004956650857970825, "loss": 0.209, "step": 43090 }, { "epoch": 1.79, "grad_norm": 0.98046875, "learning_rate": 0.0004956630747062432, "loss": 0.2366, "step": 43100 }, { "epoch": 1.79, "grad_norm": 0.369140625, "learning_rate": 0.0004956610631530925, "loss": 0.259, "step": 43110 }, { "epoch": 1.79, "grad_norm": 0.34765625, "learning_rate": 0.0004956590511376342, "loss": 0.2248, "step": 43120 }, { "epoch": 1.79, "grad_norm": 0.46484375, "learning_rate": 0.0004956570386598721, "loss": 0.1957, "step": 43130 }, { "epoch": 1.79, "grad_norm": 1.1640625, "learning_rate": 0.0004956550257198099, "loss": 0.2649, "step": 43140 }, { "epoch": 1.79, "grad_norm": 0.76171875, "learning_rate": 0.0004956530123174516, "loss": 0.2401, "step": 43150 }, { "epoch": 1.79, "grad_norm": 0.33984375, "learning_rate": 0.0004956509984528008, "loss": 0.1975, "step": 43160 }, { "epoch": 1.79, "grad_norm": 1.1875, "learning_rate": 0.0004956489841258613, "loss": 0.2014, "step": 43170 }, { "epoch": 1.79, "grad_norm": 0.80078125, "learning_rate": 0.000495646969336637, "loss": 0.2124, "step": 43180 }, { "epoch": 1.79, "grad_norm": 1.0625, "learning_rate": 0.0004956449540851317, "loss": 0.2114, "step": 43190 }, { "epoch": 1.79, "grad_norm": 0.44921875, "learning_rate": 0.000495642938371349, "loss": 0.1994, "step": 43200 }, { "epoch": 1.79, "grad_norm": 0.30078125, "learning_rate": 0.0004956409221952929, "loss": 0.271, "step": 43210 }, { "epoch": 1.79, "grad_norm": 0.419921875, "learning_rate": 0.000495638905556967, "loss": 0.2151, "step": 43220 }, { "epoch": 1.79, "grad_norm": 0.419921875, "learning_rate": 0.0004956368884563753, "loss": 0.2744, "step": 43230 }, { "epoch": 1.79, "grad_norm": 0.0, "learning_rate": 0.0004956348708935215, "loss": 0.2196, "step": 43240 }, { "epoch": 1.79, "grad_norm": 0.53515625, "learning_rate": 0.0004956328528684093, "loss": 0.1939, "step": 43250 }, { "epoch": 1.79, "grad_norm": 0.4609375, "learning_rate": 0.0004956308343810427, "loss": 0.1633, "step": 43260 }, { "epoch": 1.79, "grad_norm": 1.2265625, "learning_rate": 0.0004956288154314254, "loss": 0.2849, "step": 43270 }, { "epoch": 1.79, "grad_norm": 0.765625, "learning_rate": 0.0004956267960195611, "loss": 0.2117, "step": 43280 }, { "epoch": 1.79, "grad_norm": 0.4296875, "learning_rate": 0.0004956247761454539, "loss": 0.2161, "step": 43290 }, { "epoch": 1.79, "grad_norm": 0.921875, "learning_rate": 0.0004956227558091072, "loss": 0.2415, "step": 43300 }, { "epoch": 1.79, "grad_norm": 0.4453125, "learning_rate": 0.0004956207350105251, "loss": 0.2358, "step": 43310 }, { "epoch": 1.79, "grad_norm": 0.42578125, "learning_rate": 0.0004956187137497112, "loss": 0.2698, "step": 43320 }, { "epoch": 1.79, "grad_norm": 1.09375, "learning_rate": 0.0004956166920266695, "loss": 0.2137, "step": 43330 }, { "epoch": 1.8, "grad_norm": 0.62890625, "learning_rate": 0.0004956146698414037, "loss": 0.2003, "step": 43340 }, { "epoch": 1.8, "grad_norm": 1.109375, "learning_rate": 0.0004956126471939175, "loss": 0.2254, "step": 43350 }, { "epoch": 1.8, "grad_norm": 0.90625, "learning_rate": 0.000495610624084215, "loss": 0.2851, "step": 43360 }, { "epoch": 1.8, "grad_norm": 0.498046875, "learning_rate": 0.0004956086005122997, "loss": 0.2396, "step": 43370 }, { "epoch": 1.8, "grad_norm": 0.18359375, "learning_rate": 0.0004956065764781756, "loss": 0.1669, "step": 43380 }, { "epoch": 1.8, "grad_norm": 0.64453125, "learning_rate": 0.0004956045519818464, "loss": 0.2433, "step": 43390 }, { "epoch": 1.8, "grad_norm": 0.453125, "learning_rate": 0.000495602527023316, "loss": 0.2686, "step": 43400 }, { "epoch": 1.8, "grad_norm": 0.828125, "learning_rate": 0.0004956005016025881, "loss": 0.2149, "step": 43410 }, { "epoch": 1.8, "grad_norm": 0.890625, "learning_rate": 0.0004955984757196667, "loss": 0.1936, "step": 43420 }, { "epoch": 1.8, "grad_norm": 1.4765625, "learning_rate": 0.0004955964493745554, "loss": 0.1992, "step": 43430 }, { "epoch": 1.8, "grad_norm": 0.5078125, "learning_rate": 0.0004955944225672581, "loss": 0.2408, "step": 43440 }, { "epoch": 1.8, "grad_norm": 1.015625, "learning_rate": 0.0004955923952977787, "loss": 0.2, "step": 43450 }, { "epoch": 1.8, "grad_norm": 0.212890625, "learning_rate": 0.0004955903675661208, "loss": 0.2006, "step": 43460 }, { "epoch": 1.8, "grad_norm": 0.79296875, "learning_rate": 0.0004955883393722884, "loss": 0.2429, "step": 43470 }, { "epoch": 1.8, "grad_norm": 0.4453125, "learning_rate": 0.0004955863107162854, "loss": 0.2313, "step": 43480 }, { "epoch": 1.8, "grad_norm": 0.71484375, "learning_rate": 0.0004955842815981154, "loss": 0.2797, "step": 43490 }, { "epoch": 1.8, "grad_norm": 0.75390625, "learning_rate": 0.0004955822520177821, "loss": 0.2398, "step": 43500 }, { "epoch": 1.8, "grad_norm": 0.9296875, "learning_rate": 0.0004955802219752898, "loss": 0.2089, "step": 43510 }, { "epoch": 1.8, "grad_norm": 0.66015625, "learning_rate": 0.000495578191470642, "loss": 0.2173, "step": 43520 }, { "epoch": 1.8, "grad_norm": 3.609375, "learning_rate": 0.0004955761605038425, "loss": 0.254, "step": 43530 }, { "epoch": 1.8, "grad_norm": 0.90625, "learning_rate": 0.0004955741290748951, "loss": 0.2643, "step": 43540 }, { "epoch": 1.8, "grad_norm": 0.2490234375, "learning_rate": 0.0004955720971838038, "loss": 0.2381, "step": 43550 }, { "epoch": 1.8, "grad_norm": 0.60546875, "learning_rate": 0.0004955700648305724, "loss": 0.2617, "step": 43560 }, { "epoch": 1.8, "grad_norm": 0.5390625, "learning_rate": 0.0004955680320152046, "loss": 0.2635, "step": 43570 }, { "epoch": 1.81, "grad_norm": 0.703125, "learning_rate": 0.0004955659987377042, "loss": 0.1755, "step": 43580 }, { "epoch": 1.81, "grad_norm": 0.375, "learning_rate": 0.0004955639649980752, "loss": 0.2557, "step": 43590 }, { "epoch": 1.81, "grad_norm": 0.61328125, "learning_rate": 0.0004955619307963214, "loss": 0.2342, "step": 43600 }, { "epoch": 1.81, "grad_norm": 0.8125, "learning_rate": 0.0004955598961324465, "loss": 0.2141, "step": 43610 }, { "epoch": 1.81, "grad_norm": 0.63671875, "learning_rate": 0.0004955578610064543, "loss": 0.2462, "step": 43620 }, { "epoch": 1.81, "grad_norm": 0.7421875, "learning_rate": 0.0004955558254183488, "loss": 0.2507, "step": 43630 }, { "epoch": 1.81, "grad_norm": 0.58984375, "learning_rate": 0.0004955537893681338, "loss": 0.2615, "step": 43640 }, { "epoch": 1.81, "grad_norm": 1.890625, "learning_rate": 0.000495551752855813, "loss": 0.1688, "step": 43650 }, { "epoch": 1.81, "grad_norm": 0.57421875, "learning_rate": 0.0004955497158813904, "loss": 0.2376, "step": 43660 }, { "epoch": 1.81, "grad_norm": 0.5234375, "learning_rate": 0.0004955476784448697, "loss": 0.1974, "step": 43670 }, { "epoch": 1.81, "grad_norm": 0.1494140625, "learning_rate": 0.0004955456405462547, "loss": 0.2362, "step": 43680 }, { "epoch": 1.81, "grad_norm": 0.431640625, "learning_rate": 0.0004955436021855495, "loss": 0.2565, "step": 43690 }, { "epoch": 1.81, "grad_norm": 0.46484375, "learning_rate": 0.0004955415633627577, "loss": 0.2483, "step": 43700 }, { "epoch": 1.81, "grad_norm": 0.470703125, "learning_rate": 0.0004955395240778831, "loss": 0.2473, "step": 43710 }, { "epoch": 1.81, "grad_norm": 0.90234375, "learning_rate": 0.0004955374843309297, "loss": 0.2016, "step": 43720 }, { "epoch": 1.81, "grad_norm": 0.4765625, "learning_rate": 0.0004955354441219013, "loss": 0.2479, "step": 43730 }, { "epoch": 1.81, "grad_norm": 0.734375, "learning_rate": 0.0004955334034508017, "loss": 0.2111, "step": 43740 }, { "epoch": 1.81, "grad_norm": 0.40625, "learning_rate": 0.0004955313623176348, "loss": 0.2737, "step": 43750 }, { "epoch": 1.81, "grad_norm": 0.5859375, "learning_rate": 0.0004955293207224042, "loss": 0.275, "step": 43760 }, { "epoch": 1.81, "grad_norm": 0.734375, "learning_rate": 0.0004955272786651142, "loss": 0.2627, "step": 43770 }, { "epoch": 1.81, "grad_norm": 0.6484375, "learning_rate": 0.0004955252361457682, "loss": 0.2234, "step": 43780 }, { "epoch": 1.81, "grad_norm": 0.86328125, "learning_rate": 0.0004955231931643702, "loss": 0.1745, "step": 43790 }, { "epoch": 1.81, "grad_norm": 0.2197265625, "learning_rate": 0.0004955211497209241, "loss": 0.2057, "step": 43800 }, { "epoch": 1.81, "grad_norm": 1.125, "learning_rate": 0.0004955191058154337, "loss": 0.26, "step": 43810 }, { "epoch": 1.82, "grad_norm": 0.5703125, "learning_rate": 0.0004955170614479029, "loss": 0.2292, "step": 43820 }, { "epoch": 1.82, "grad_norm": 0.51171875, "learning_rate": 0.0004955150166183356, "loss": 0.2466, "step": 43830 }, { "epoch": 1.82, "grad_norm": 1.0703125, "learning_rate": 0.0004955129713267353, "loss": 0.2303, "step": 43840 }, { "epoch": 1.82, "grad_norm": 1.984375, "learning_rate": 0.0004955109255731063, "loss": 0.2313, "step": 43850 }, { "epoch": 1.82, "grad_norm": 0.6875, "learning_rate": 0.0004955088793574522, "loss": 0.1699, "step": 43860 }, { "epoch": 1.82, "grad_norm": 0.34375, "learning_rate": 0.0004955068326797769, "loss": 0.2266, "step": 43870 }, { "epoch": 1.82, "grad_norm": 0.8671875, "learning_rate": 0.0004955047855400842, "loss": 0.2095, "step": 43880 }, { "epoch": 1.82, "grad_norm": 0.6640625, "learning_rate": 0.000495502737938378, "loss": 0.2076, "step": 43890 }, { "epoch": 1.82, "grad_norm": 0.78125, "learning_rate": 0.0004955006898746623, "loss": 0.2825, "step": 43900 }, { "epoch": 1.82, "grad_norm": 0.7265625, "learning_rate": 0.0004954986413489408, "loss": 0.175, "step": 43910 }, { "epoch": 1.82, "grad_norm": 0.59375, "learning_rate": 0.0004954965923612172, "loss": 0.1812, "step": 43920 }, { "epoch": 1.82, "grad_norm": 0.4140625, "learning_rate": 0.0004954945429114957, "loss": 0.2217, "step": 43930 }, { "epoch": 1.82, "grad_norm": 0.69921875, "learning_rate": 0.0004954924929997798, "loss": 0.1939, "step": 43940 }, { "epoch": 1.82, "grad_norm": 0.50390625, "learning_rate": 0.0004954904426260737, "loss": 0.1914, "step": 43950 }, { "epoch": 1.82, "grad_norm": 0.88671875, "learning_rate": 0.0004954883917903809, "loss": 0.2198, "step": 43960 }, { "epoch": 1.82, "grad_norm": 0.361328125, "learning_rate": 0.0004954863404927057, "loss": 0.2406, "step": 43970 }, { "epoch": 1.82, "grad_norm": 1.0078125, "learning_rate": 0.0004954842887330516, "loss": 0.2415, "step": 43980 }, { "epoch": 1.82, "grad_norm": 0.66796875, "learning_rate": 0.0004954822365114225, "loss": 0.2691, "step": 43990 }, { "epoch": 1.82, "grad_norm": 0.73828125, "learning_rate": 0.0004954801838278225, "loss": 0.2236, "step": 44000 }, { "epoch": 1.82, "grad_norm": 0.51953125, "learning_rate": 0.0004954781306822552, "loss": 0.1991, "step": 44010 }, { "epoch": 1.82, "grad_norm": 0.58203125, "learning_rate": 0.0004954760770747247, "loss": 0.1933, "step": 44020 }, { "epoch": 1.82, "grad_norm": 0.466796875, "learning_rate": 0.0004954740230052346, "loss": 0.1856, "step": 44030 }, { "epoch": 1.82, "grad_norm": 1.953125, "learning_rate": 0.0004954719684737889, "loss": 0.2359, "step": 44040 }, { "epoch": 1.82, "grad_norm": 0.466796875, "learning_rate": 0.0004954699134803915, "loss": 0.2256, "step": 44050 }, { "epoch": 1.82, "grad_norm": 0.99609375, "learning_rate": 0.0004954678580250463, "loss": 0.2034, "step": 44060 }, { "epoch": 1.83, "grad_norm": 0.439453125, "learning_rate": 0.0004954658021077569, "loss": 0.2089, "step": 44070 }, { "epoch": 1.83, "grad_norm": 0.62890625, "learning_rate": 0.0004954637457285276, "loss": 0.2472, "step": 44080 }, { "epoch": 1.83, "grad_norm": 1.15625, "learning_rate": 0.0004954616888873619, "loss": 0.252, "step": 44090 }, { "epoch": 1.83, "grad_norm": 0.6953125, "learning_rate": 0.0004954596315842638, "loss": 0.2166, "step": 44100 }, { "epoch": 1.83, "grad_norm": 0.625, "learning_rate": 0.0004954575738192373, "loss": 0.2212, "step": 44110 }, { "epoch": 1.83, "grad_norm": 0.53125, "learning_rate": 0.000495455515592286, "loss": 0.2023, "step": 44120 }, { "epoch": 1.83, "grad_norm": 0.4453125, "learning_rate": 0.0004954534569034141, "loss": 0.2776, "step": 44130 }, { "epoch": 1.83, "grad_norm": 0.87109375, "learning_rate": 0.0004954513977526252, "loss": 0.2232, "step": 44140 }, { "epoch": 1.83, "grad_norm": 0.51171875, "learning_rate": 0.0004954493381399232, "loss": 0.2106, "step": 44150 }, { "epoch": 1.83, "grad_norm": 0.8671875, "learning_rate": 0.0004954472780653122, "loss": 0.2479, "step": 44160 }, { "epoch": 1.83, "grad_norm": 0.4765625, "learning_rate": 0.0004954452175287958, "loss": 0.2583, "step": 44170 }, { "epoch": 1.83, "grad_norm": 1.421875, "learning_rate": 0.0004954431565303781, "loss": 0.2547, "step": 44180 }, { "epoch": 1.83, "grad_norm": 0.5, "learning_rate": 0.0004954410950700628, "loss": 0.2991, "step": 44190 }, { "epoch": 1.83, "grad_norm": 0.52734375, "learning_rate": 0.0004954390331478539, "loss": 0.2482, "step": 44200 }, { "epoch": 1.83, "grad_norm": 0.5859375, "learning_rate": 0.0004954369707637552, "loss": 0.2443, "step": 44210 }, { "epoch": 1.83, "grad_norm": 0.66796875, "learning_rate": 0.0004954349079177708, "loss": 0.2501, "step": 44220 }, { "epoch": 1.83, "grad_norm": 0.70703125, "learning_rate": 0.0004954328446099043, "loss": 0.1853, "step": 44230 }, { "epoch": 1.83, "grad_norm": 0.89453125, "learning_rate": 0.0004954307808401597, "loss": 0.2612, "step": 44240 }, { "epoch": 1.83, "grad_norm": 0.640625, "learning_rate": 0.0004954287166085409, "loss": 0.2621, "step": 44250 }, { "epoch": 1.83, "grad_norm": 1.0078125, "learning_rate": 0.0004954266519150517, "loss": 0.2205, "step": 44260 }, { "epoch": 1.83, "grad_norm": 1.5, "learning_rate": 0.000495424586759696, "loss": 0.2468, "step": 44270 }, { "epoch": 1.83, "grad_norm": 0.9765625, "learning_rate": 0.0004954225211424777, "loss": 0.2155, "step": 44280 }, { "epoch": 1.83, "grad_norm": 0.8359375, "learning_rate": 0.0004954204550634009, "loss": 0.2007, "step": 44290 }, { "epoch": 1.83, "grad_norm": 1.0859375, "learning_rate": 0.0004954183885224693, "loss": 0.2376, "step": 44300 }, { "epoch": 1.84, "grad_norm": 0.46875, "learning_rate": 0.0004954163215196867, "loss": 0.2063, "step": 44310 }, { "epoch": 1.84, "grad_norm": 0.921875, "learning_rate": 0.0004954142540550571, "loss": 0.2086, "step": 44320 }, { "epoch": 1.84, "grad_norm": 0.6953125, "learning_rate": 0.0004954121861285843, "loss": 0.2228, "step": 44330 }, { "epoch": 1.84, "grad_norm": 0.31640625, "learning_rate": 0.0004954101177402723, "loss": 0.2477, "step": 44340 }, { "epoch": 1.84, "grad_norm": 0.59765625, "learning_rate": 0.000495408048890125, "loss": 0.2196, "step": 44350 }, { "epoch": 1.84, "grad_norm": 0.275390625, "learning_rate": 0.0004954059795781463, "loss": 0.194, "step": 44360 }, { "epoch": 1.84, "grad_norm": 2.984375, "learning_rate": 0.00049540390980434, "loss": 0.2281, "step": 44370 }, { "epoch": 1.84, "grad_norm": 0.5390625, "learning_rate": 0.0004954018395687101, "loss": 0.2048, "step": 44380 }, { "epoch": 1.84, "grad_norm": 0.408203125, "learning_rate": 0.0004953997688712604, "loss": 0.2483, "step": 44390 }, { "epoch": 1.84, "grad_norm": 0.55859375, "learning_rate": 0.0004953976977119948, "loss": 0.2287, "step": 44400 }, { "epoch": 1.84, "grad_norm": 0.30078125, "learning_rate": 0.0004953956260909172, "loss": 0.2485, "step": 44410 }, { "epoch": 1.84, "grad_norm": 0.6640625, "learning_rate": 0.0004953935540080316, "loss": 0.2674, "step": 44420 }, { "epoch": 1.84, "grad_norm": 1.0234375, "learning_rate": 0.0004953914814633419, "loss": 0.239, "step": 44430 }, { "epoch": 1.84, "grad_norm": 0.734375, "learning_rate": 0.0004953894084568518, "loss": 0.2402, "step": 44440 }, { "epoch": 1.84, "grad_norm": 0.9921875, "learning_rate": 0.0004953873349885654, "loss": 0.2455, "step": 44450 }, { "epoch": 1.84, "grad_norm": 0.8515625, "learning_rate": 0.0004953852610584866, "loss": 0.2242, "step": 44460 }, { "epoch": 1.84, "grad_norm": 1.2421875, "learning_rate": 0.0004953831866666191, "loss": 0.2361, "step": 44470 }, { "epoch": 1.84, "grad_norm": 0.609375, "learning_rate": 0.000495381111812967, "loss": 0.2202, "step": 44480 }, { "epoch": 1.84, "grad_norm": 1.0625, "learning_rate": 0.0004953790364975342, "loss": 0.2062, "step": 44490 }, { "epoch": 1.84, "grad_norm": 0.375, "learning_rate": 0.0004953769607203244, "loss": 0.258, "step": 44500 }, { "epoch": 1.84, "grad_norm": 1.296875, "learning_rate": 0.0004953748844813418, "loss": 0.1856, "step": 44510 }, { "epoch": 1.84, "grad_norm": 0.421875, "learning_rate": 0.0004953728077805901, "loss": 0.2319, "step": 44520 }, { "epoch": 1.84, "grad_norm": 0.82421875, "learning_rate": 0.0004953707306180734, "loss": 0.2879, "step": 44530 }, { "epoch": 1.84, "grad_norm": 0.5703125, "learning_rate": 0.0004953686529937953, "loss": 0.2682, "step": 44540 }, { "epoch": 1.85, "grad_norm": 0.498046875, "learning_rate": 0.0004953665749077601, "loss": 0.2218, "step": 44550 }, { "epoch": 1.85, "grad_norm": 0.78515625, "learning_rate": 0.0004953644963599712, "loss": 0.1502, "step": 44560 }, { "epoch": 1.85, "grad_norm": 2.796875, "learning_rate": 0.0004953624173504331, "loss": 0.2374, "step": 44570 }, { "epoch": 1.85, "grad_norm": 0.64453125, "learning_rate": 0.0004953603378791493, "loss": 0.2302, "step": 44580 }, { "epoch": 1.85, "grad_norm": 1.078125, "learning_rate": 0.0004953582579461239, "loss": 0.2375, "step": 44590 }, { "epoch": 1.85, "grad_norm": 0.439453125, "learning_rate": 0.0004953561775513606, "loss": 0.1988, "step": 44600 }, { "epoch": 1.85, "grad_norm": 3.125, "learning_rate": 0.0004953540966948636, "loss": 0.1656, "step": 44610 }, { "epoch": 1.85, "grad_norm": 1.0859375, "learning_rate": 0.0004953520153766367, "loss": 0.2926, "step": 44620 }, { "epoch": 1.85, "grad_norm": 0.45703125, "learning_rate": 0.0004953499335966838, "loss": 0.1837, "step": 44630 }, { "epoch": 1.85, "grad_norm": 0.87109375, "learning_rate": 0.0004953478513550088, "loss": 0.2057, "step": 44640 }, { "epoch": 1.85, "grad_norm": 1.3828125, "learning_rate": 0.0004953457686516157, "loss": 0.2636, "step": 44650 }, { "epoch": 1.85, "grad_norm": 0.447265625, "learning_rate": 0.0004953436854865082, "loss": 0.2231, "step": 44660 }, { "epoch": 1.85, "grad_norm": 0.75390625, "learning_rate": 0.0004953416018596905, "loss": 0.2063, "step": 44670 }, { "epoch": 1.85, "grad_norm": 1.0859375, "learning_rate": 0.0004953395177711664, "loss": 0.2328, "step": 44680 }, { "epoch": 1.85, "grad_norm": 0.66796875, "learning_rate": 0.0004953374332209398, "loss": 0.2513, "step": 44690 }, { "epoch": 1.85, "grad_norm": 0.462890625, "learning_rate": 0.0004953353482090146, "loss": 0.299, "step": 44700 }, { "epoch": 1.85, "grad_norm": 0.734375, "learning_rate": 0.0004953332627353948, "loss": 0.1974, "step": 44710 }, { "epoch": 1.85, "grad_norm": 0.73046875, "learning_rate": 0.0004953311768000843, "loss": 0.1926, "step": 44720 }, { "epoch": 1.85, "grad_norm": 0.47265625, "learning_rate": 0.000495329090403087, "loss": 0.1778, "step": 44730 }, { "epoch": 1.85, "grad_norm": 0.8046875, "learning_rate": 0.0004953270035444069, "loss": 0.1957, "step": 44740 }, { "epoch": 1.85, "grad_norm": 0.63671875, "learning_rate": 0.0004953249162240478, "loss": 0.1857, "step": 44750 }, { "epoch": 1.85, "grad_norm": 0.6875, "learning_rate": 0.0004953228284420137, "loss": 0.1968, "step": 44760 }, { "epoch": 1.85, "grad_norm": 0.359375, "learning_rate": 0.0004953207401983086, "loss": 0.2613, "step": 44770 }, { "epoch": 1.85, "grad_norm": 1.7421875, "learning_rate": 0.0004953186514929363, "loss": 0.2342, "step": 44780 }, { "epoch": 1.86, "grad_norm": 0.84765625, "learning_rate": 0.0004953165623259008, "loss": 0.262, "step": 44790 }, { "epoch": 1.86, "grad_norm": 0.5703125, "learning_rate": 0.0004953144726972061, "loss": 0.2947, "step": 44800 }, { "epoch": 1.86, "grad_norm": 0.51171875, "learning_rate": 0.000495312382606856, "loss": 0.2006, "step": 44810 }, { "epoch": 1.86, "grad_norm": 0.396484375, "learning_rate": 0.0004953102920548545, "loss": 0.2011, "step": 44820 }, { "epoch": 1.86, "grad_norm": 0.55859375, "learning_rate": 0.0004953082010412055, "loss": 0.2544, "step": 44830 }, { "epoch": 1.86, "grad_norm": 1.3359375, "learning_rate": 0.0004953061095659129, "loss": 0.2301, "step": 44840 }, { "epoch": 1.86, "grad_norm": 1.046875, "learning_rate": 0.0004953040176289808, "loss": 0.2041, "step": 44850 }, { "epoch": 1.86, "grad_norm": 0.57421875, "learning_rate": 0.000495301925230413, "loss": 0.2749, "step": 44860 }, { "epoch": 1.86, "grad_norm": 0.75, "learning_rate": 0.0004952998323702134, "loss": 0.2211, "step": 44870 }, { "epoch": 1.86, "grad_norm": 0.50390625, "learning_rate": 0.0004952977390483861, "loss": 0.2429, "step": 44880 }, { "epoch": 1.86, "grad_norm": 0.546875, "learning_rate": 0.0004952956452649349, "loss": 0.2398, "step": 44890 }, { "epoch": 1.86, "grad_norm": 3.09375, "learning_rate": 0.0004952935510198637, "loss": 0.206, "step": 44900 }, { "epoch": 1.86, "grad_norm": 1.453125, "learning_rate": 0.0004952914563131766, "loss": 0.224, "step": 44910 }, { "epoch": 1.86, "grad_norm": 0.376953125, "learning_rate": 0.0004952893611448775, "loss": 0.1838, "step": 44920 }, { "epoch": 1.86, "grad_norm": 2.125, "learning_rate": 0.0004952872655149703, "loss": 0.1794, "step": 44930 }, { "epoch": 1.86, "grad_norm": 0.8515625, "learning_rate": 0.0004952851694234589, "loss": 0.2688, "step": 44940 }, { "epoch": 1.86, "grad_norm": 0.443359375, "learning_rate": 0.0004952830728703474, "loss": 0.1989, "step": 44950 }, { "epoch": 1.86, "grad_norm": 0.2109375, "learning_rate": 0.0004952809758556395, "loss": 0.2225, "step": 44960 }, { "epoch": 1.86, "grad_norm": 0.458984375, "learning_rate": 0.0004952788783793395, "loss": 0.1707, "step": 44970 }, { "epoch": 1.86, "grad_norm": 1.1640625, "learning_rate": 0.000495276780441451, "loss": 0.2445, "step": 44980 }, { "epoch": 1.86, "grad_norm": 0.69921875, "learning_rate": 0.0004952746820419781, "loss": 0.245, "step": 44990 }, { "epoch": 1.86, "grad_norm": 0.81640625, "learning_rate": 0.0004952725831809247, "loss": 0.2339, "step": 45000 }, { "epoch": 1.86, "grad_norm": 0.140625, "learning_rate": 0.0004952704838582948, "loss": 0.1689, "step": 45010 }, { "epoch": 1.86, "grad_norm": 0.4765625, "learning_rate": 0.0004952683840740924, "loss": 0.2491, "step": 45020 }, { "epoch": 1.87, "grad_norm": 1.2421875, "learning_rate": 0.0004952662838283214, "loss": 0.2085, "step": 45030 }, { "epoch": 1.87, "grad_norm": 0.423828125, "learning_rate": 0.0004952641831209856, "loss": 0.1871, "step": 45040 }, { "epoch": 1.87, "grad_norm": 1.8828125, "learning_rate": 0.0004952620819520892, "loss": 0.2263, "step": 45050 }, { "epoch": 1.87, "grad_norm": 0.71875, "learning_rate": 0.000495259980321636, "loss": 0.2186, "step": 45060 }, { "epoch": 1.87, "grad_norm": 0.75, "learning_rate": 0.0004952578782296301, "loss": 0.1958, "step": 45070 }, { "epoch": 1.87, "grad_norm": 0.59375, "learning_rate": 0.0004952557756760752, "loss": 0.2468, "step": 45080 }, { "epoch": 1.87, "grad_norm": 0.73828125, "learning_rate": 0.0004952536726609755, "loss": 0.1879, "step": 45090 }, { "epoch": 1.87, "grad_norm": 2.03125, "learning_rate": 0.0004952515691843349, "loss": 0.2318, "step": 45100 }, { "epoch": 1.87, "grad_norm": 0.64453125, "learning_rate": 0.0004952494652461574, "loss": 0.2371, "step": 45110 }, { "epoch": 1.87, "grad_norm": 0.88671875, "learning_rate": 0.0004952473608464468, "loss": 0.2354, "step": 45120 }, { "epoch": 1.87, "grad_norm": 1.3046875, "learning_rate": 0.0004952452559852072, "loss": 0.2136, "step": 45130 }, { "epoch": 1.87, "grad_norm": 0.6875, "learning_rate": 0.0004952431506624424, "loss": 0.1603, "step": 45140 }, { "epoch": 1.87, "grad_norm": 0.62109375, "learning_rate": 0.0004952410448781565, "loss": 0.2231, "step": 45150 }, { "epoch": 1.87, "grad_norm": 0.443359375, "learning_rate": 0.0004952389386323535, "loss": 0.2252, "step": 45160 }, { "epoch": 1.87, "grad_norm": 0.408203125, "learning_rate": 0.0004952368319250373, "loss": 0.2271, "step": 45170 }, { "epoch": 1.87, "grad_norm": 0.578125, "learning_rate": 0.0004952347247562119, "loss": 0.2114, "step": 45180 }, { "epoch": 1.87, "grad_norm": 0.578125, "learning_rate": 0.0004952326171258812, "loss": 0.1812, "step": 45190 }, { "epoch": 1.87, "grad_norm": 1.171875, "learning_rate": 0.0004952305090340491, "loss": 0.2429, "step": 45200 }, { "epoch": 1.87, "grad_norm": 1.0234375, "learning_rate": 0.0004952284004807197, "loss": 0.2145, "step": 45210 }, { "epoch": 1.87, "grad_norm": 0.94921875, "learning_rate": 0.0004952262914658971, "loss": 0.2121, "step": 45220 }, { "epoch": 1.87, "grad_norm": 0.67578125, "learning_rate": 0.0004952241819895848, "loss": 0.218, "step": 45230 }, { "epoch": 1.87, "grad_norm": 0.7734375, "learning_rate": 0.0004952220720517873, "loss": 0.1871, "step": 45240 }, { "epoch": 1.87, "grad_norm": 2.09375, "learning_rate": 0.0004952199616525084, "loss": 0.1919, "step": 45250 }, { "epoch": 1.87, "grad_norm": 0.40234375, "learning_rate": 0.0004952178507917519, "loss": 0.2278, "step": 45260 }, { "epoch": 1.88, "grad_norm": 0.8046875, "learning_rate": 0.0004952157394695218, "loss": 0.1999, "step": 45270 }, { "epoch": 1.88, "grad_norm": 0.41796875, "learning_rate": 0.0004952136276858223, "loss": 0.2051, "step": 45280 }, { "epoch": 1.88, "grad_norm": 0.71484375, "learning_rate": 0.0004952115154406571, "loss": 0.2135, "step": 45290 }, { "epoch": 1.88, "grad_norm": 0.31640625, "learning_rate": 0.0004952094027340304, "loss": 0.1683, "step": 45300 }, { "epoch": 1.88, "grad_norm": 0.27734375, "learning_rate": 0.0004952072895659461, "loss": 0.2229, "step": 45310 }, { "epoch": 1.88, "grad_norm": 0.80859375, "learning_rate": 0.0004952051759364081, "loss": 0.2462, "step": 45320 }, { "epoch": 1.88, "grad_norm": 0.6328125, "learning_rate": 0.0004952030618454205, "loss": 0.2422, "step": 45330 }, { "epoch": 1.88, "grad_norm": 0.80078125, "learning_rate": 0.0004952009472929871, "loss": 0.1991, "step": 45340 }, { "epoch": 1.88, "grad_norm": 0.2236328125, "learning_rate": 0.0004951988322791121, "loss": 0.2089, "step": 45350 }, { "epoch": 1.88, "grad_norm": 4.46875, "learning_rate": 0.0004951967168037994, "loss": 0.2536, "step": 45360 }, { "epoch": 1.88, "grad_norm": 1.6328125, "learning_rate": 0.000495194600867053, "loss": 0.2529, "step": 45370 }, { "epoch": 1.88, "grad_norm": 0.75390625, "learning_rate": 0.0004951924844688767, "loss": 0.2603, "step": 45380 }, { "epoch": 1.88, "grad_norm": 0.53125, "learning_rate": 0.0004951903676092747, "loss": 0.2606, "step": 45390 }, { "epoch": 1.88, "grad_norm": 0.4296875, "learning_rate": 0.0004951882502882509, "loss": 0.2183, "step": 45400 }, { "epoch": 1.88, "grad_norm": 0.54296875, "learning_rate": 0.0004951861325058093, "loss": 0.1934, "step": 45410 }, { "epoch": 1.88, "grad_norm": 0.765625, "learning_rate": 0.0004951840142619538, "loss": 0.2227, "step": 45420 }, { "epoch": 1.88, "grad_norm": 1.0546875, "learning_rate": 0.0004951818955566886, "loss": 0.1898, "step": 45430 }, { "epoch": 1.88, "grad_norm": 0.8125, "learning_rate": 0.0004951797763900175, "loss": 0.2063, "step": 45440 }, { "epoch": 1.88, "grad_norm": 0.61328125, "learning_rate": 0.0004951776567619444, "loss": 0.2031, "step": 45450 }, { "epoch": 1.88, "grad_norm": 0.95703125, "learning_rate": 0.0004951755366724735, "loss": 0.2215, "step": 45460 }, { "epoch": 1.88, "grad_norm": 0.7421875, "learning_rate": 0.0004951734161216089, "loss": 0.2828, "step": 45470 }, { "epoch": 1.88, "grad_norm": 0.77734375, "learning_rate": 0.0004951712951093541, "loss": 0.2082, "step": 45480 }, { "epoch": 1.88, "grad_norm": 0.490234375, "learning_rate": 0.0004951691736357136, "loss": 0.2904, "step": 45490 }, { "epoch": 1.88, "grad_norm": 0.6328125, "learning_rate": 0.0004951670517006912, "loss": 0.2252, "step": 45500 }, { "epoch": 1.89, "grad_norm": 0.60546875, "learning_rate": 0.0004951649293042908, "loss": 0.2526, "step": 45510 }, { "epoch": 1.89, "grad_norm": 0.9765625, "learning_rate": 0.0004951628064465165, "loss": 0.2061, "step": 45520 }, { "epoch": 1.89, "grad_norm": 0.53515625, "learning_rate": 0.0004951606831273723, "loss": 0.1776, "step": 45530 }, { "epoch": 1.89, "grad_norm": 1.4765625, "learning_rate": 0.0004951585593468621, "loss": 0.1996, "step": 45540 }, { "epoch": 1.89, "grad_norm": 0.54296875, "learning_rate": 0.0004951564351049901, "loss": 0.187, "step": 45550 }, { "epoch": 1.89, "grad_norm": 0.4609375, "learning_rate": 0.0004951543104017601, "loss": 0.1927, "step": 45560 }, { "epoch": 1.89, "grad_norm": 0.8984375, "learning_rate": 0.0004951521852371761, "loss": 0.1802, "step": 45570 }, { "epoch": 1.89, "grad_norm": 0.66015625, "learning_rate": 0.0004951500596112422, "loss": 0.1753, "step": 45580 }, { "epoch": 1.89, "grad_norm": 0.0, "learning_rate": 0.0004951479335239624, "loss": 0.222, "step": 45590 }, { "epoch": 1.89, "grad_norm": 1.1015625, "learning_rate": 0.0004951458069753406, "loss": 0.2526, "step": 45600 }, { "epoch": 1.89, "grad_norm": 0.376953125, "learning_rate": 0.0004951436799653808, "loss": 0.2075, "step": 45610 }, { "epoch": 1.89, "grad_norm": 1.3671875, "learning_rate": 0.0004951415524940873, "loss": 0.1669, "step": 45620 }, { "epoch": 1.89, "grad_norm": 0.484375, "learning_rate": 0.0004951394245614637, "loss": 0.2027, "step": 45630 }, { "epoch": 1.89, "grad_norm": 0.458984375, "learning_rate": 0.0004951372961675142, "loss": 0.2008, "step": 45640 }, { "epoch": 1.89, "grad_norm": 0.64453125, "learning_rate": 0.0004951351673122428, "loss": 0.2652, "step": 45650 }, { "epoch": 1.89, "grad_norm": 1.0859375, "learning_rate": 0.0004951330379956535, "loss": 0.2054, "step": 45660 }, { "epoch": 1.89, "grad_norm": 0.6171875, "learning_rate": 0.0004951309082177503, "loss": 0.2443, "step": 45670 }, { "epoch": 1.89, "grad_norm": 0.2314453125, "learning_rate": 0.0004951287779785371, "loss": 0.221, "step": 45680 }, { "epoch": 1.89, "grad_norm": 1.9140625, "learning_rate": 0.0004951266472780181, "loss": 0.2148, "step": 45690 }, { "epoch": 1.89, "grad_norm": 0.6953125, "learning_rate": 0.0004951245161161972, "loss": 0.2465, "step": 45700 }, { "epoch": 1.89, "grad_norm": 1.140625, "learning_rate": 0.0004951223844930785, "loss": 0.2995, "step": 45710 }, { "epoch": 1.89, "grad_norm": 0.232421875, "learning_rate": 0.0004951202524086658, "loss": 0.2289, "step": 45720 }, { "epoch": 1.89, "grad_norm": 0.60546875, "learning_rate": 0.0004951181198629634, "loss": 0.1902, "step": 45730 }, { "epoch": 1.89, "grad_norm": 0.6484375, "learning_rate": 0.0004951159868559752, "loss": 0.2424, "step": 45740 }, { "epoch": 1.89, "grad_norm": 0.33984375, "learning_rate": 0.000495113853387705, "loss": 0.1497, "step": 45750 }, { "epoch": 1.9, "grad_norm": 0.30078125, "learning_rate": 0.0004951117194581571, "loss": 0.2164, "step": 45760 }, { "epoch": 1.9, "grad_norm": 0.546875, "learning_rate": 0.0004951095850673355, "loss": 0.1761, "step": 45770 }, { "epoch": 1.9, "grad_norm": 0.322265625, "learning_rate": 0.0004951074502152439, "loss": 0.2076, "step": 45780 }, { "epoch": 1.9, "grad_norm": 2.03125, "learning_rate": 0.0004951053149018867, "loss": 0.1829, "step": 45790 }, { "epoch": 1.9, "grad_norm": 0.5390625, "learning_rate": 0.0004951031791272678, "loss": 0.1894, "step": 45800 }, { "epoch": 1.9, "grad_norm": 0.54296875, "learning_rate": 0.0004951010428913911, "loss": 0.2705, "step": 45810 }, { "epoch": 1.9, "grad_norm": 0.0, "learning_rate": 0.0004950989061942608, "loss": 0.2334, "step": 45820 }, { "epoch": 1.9, "grad_norm": 0.56640625, "learning_rate": 0.0004950967690358808, "loss": 0.2491, "step": 45830 }, { "epoch": 1.9, "grad_norm": 1.3046875, "learning_rate": 0.0004950946314162551, "loss": 0.2111, "step": 45840 }, { "epoch": 1.9, "grad_norm": 0.3984375, "learning_rate": 0.0004950924933353878, "loss": 0.2307, "step": 45850 }, { "epoch": 1.9, "grad_norm": 0.6328125, "learning_rate": 0.0004950903547932829, "loss": 0.2109, "step": 45860 }, { "epoch": 1.9, "grad_norm": 0.70703125, "learning_rate": 0.0004950882157899444, "loss": 0.2225, "step": 45870 }, { "epoch": 1.9, "grad_norm": 1.453125, "learning_rate": 0.0004950860763253763, "loss": 0.2136, "step": 45880 }, { "epoch": 1.9, "grad_norm": 0.240234375, "learning_rate": 0.0004950839363995827, "loss": 0.2847, "step": 45890 }, { "epoch": 1.9, "grad_norm": 0.69921875, "learning_rate": 0.0004950817960125677, "loss": 0.2307, "step": 45900 }, { "epoch": 1.9, "grad_norm": 0.87890625, "learning_rate": 0.000495079655164335, "loss": 0.2482, "step": 45910 }, { "epoch": 1.9, "grad_norm": 0.466796875, "learning_rate": 0.0004950775138548891, "loss": 0.2375, "step": 45920 }, { "epoch": 1.9, "grad_norm": 0.5546875, "learning_rate": 0.0004950753720842337, "loss": 0.2304, "step": 45930 }, { "epoch": 1.9, "grad_norm": 0.4375, "learning_rate": 0.0004950732298523729, "loss": 0.1987, "step": 45940 }, { "epoch": 1.9, "grad_norm": 0.6796875, "learning_rate": 0.0004950710871593107, "loss": 0.2325, "step": 45950 }, { "epoch": 1.9, "grad_norm": 2.453125, "learning_rate": 0.0004950689440050512, "loss": 0.2463, "step": 45960 }, { "epoch": 1.9, "grad_norm": 1.78125, "learning_rate": 0.0004950668003895985, "loss": 0.2282, "step": 45970 }, { "epoch": 1.9, "grad_norm": 0.482421875, "learning_rate": 0.0004950646563129565, "loss": 0.1856, "step": 45980 }, { "epoch": 1.9, "grad_norm": 0.640625, "learning_rate": 0.0004950625117751292, "loss": 0.2082, "step": 45990 }, { "epoch": 1.91, "grad_norm": 1.8828125, "learning_rate": 0.0004950603667761208, "loss": 0.1976, "step": 46000 }, { "epoch": 1.91, "grad_norm": 0.54296875, "learning_rate": 0.0004950582213159352, "loss": 0.2325, "step": 46010 }, { "epoch": 1.91, "grad_norm": 0.55078125, "learning_rate": 0.0004950560753945766, "loss": 0.2071, "step": 46020 }, { "epoch": 1.91, "grad_norm": 0.306640625, "learning_rate": 0.0004950539290120488, "loss": 0.1911, "step": 46030 }, { "epoch": 1.91, "grad_norm": 0.6328125, "learning_rate": 0.0004950517821683562, "loss": 0.2061, "step": 46040 }, { "epoch": 1.91, "grad_norm": 1.1640625, "learning_rate": 0.0004950496348635023, "loss": 0.2183, "step": 46050 }, { "epoch": 1.91, "grad_norm": 0.494140625, "learning_rate": 0.0004950474870974916, "loss": 0.1338, "step": 46060 }, { "epoch": 1.91, "grad_norm": 0.61328125, "learning_rate": 0.000495045338870328, "loss": 0.1866, "step": 46070 }, { "epoch": 1.91, "grad_norm": 0.63671875, "learning_rate": 0.0004950431901820155, "loss": 0.215, "step": 46080 }, { "epoch": 1.91, "grad_norm": 1.109375, "learning_rate": 0.0004950410410325582, "loss": 0.2731, "step": 46090 }, { "epoch": 1.91, "grad_norm": 1.5859375, "learning_rate": 0.0004950388914219602, "loss": 0.2963, "step": 46100 }, { "epoch": 1.91, "grad_norm": 1.796875, "learning_rate": 0.0004950367413502253, "loss": 0.2189, "step": 46110 }, { "epoch": 1.91, "grad_norm": 0.96875, "learning_rate": 0.0004950345908173578, "loss": 0.235, "step": 46120 }, { "epoch": 1.91, "grad_norm": 0.53515625, "learning_rate": 0.0004950324398233616, "loss": 0.2128, "step": 46130 }, { "epoch": 1.91, "grad_norm": 0.96875, "learning_rate": 0.0004950302883682409, "loss": 0.2331, "step": 46140 }, { "epoch": 1.91, "grad_norm": 0.37890625, "learning_rate": 0.0004950281364519997, "loss": 0.2059, "step": 46150 }, { "epoch": 1.91, "grad_norm": 0.89453125, "learning_rate": 0.0004950259840746419, "loss": 0.1896, "step": 46160 }, { "epoch": 1.91, "grad_norm": 0.8984375, "learning_rate": 0.0004950238312361717, "loss": 0.1957, "step": 46170 }, { "epoch": 1.91, "grad_norm": 0.7421875, "learning_rate": 0.0004950216779365932, "loss": 0.2029, "step": 46180 }, { "epoch": 1.91, "grad_norm": 0.6328125, "learning_rate": 0.0004950195241759102, "loss": 0.2123, "step": 46190 }, { "epoch": 1.91, "grad_norm": 0.48828125, "learning_rate": 0.000495017369954127, "loss": 0.1979, "step": 46200 }, { "epoch": 1.91, "grad_norm": 0.38671875, "learning_rate": 0.0004950152152712475, "loss": 0.2659, "step": 46210 }, { "epoch": 1.91, "grad_norm": 1.3359375, "learning_rate": 0.0004950130601272759, "loss": 0.2747, "step": 46220 }, { "epoch": 1.91, "grad_norm": 0.482421875, "learning_rate": 0.0004950109045222162, "loss": 0.1854, "step": 46230 }, { "epoch": 1.92, "grad_norm": 1.2265625, "learning_rate": 0.0004950087484560723, "loss": 0.2766, "step": 46240 }, { "epoch": 1.92, "grad_norm": 0.5234375, "learning_rate": 0.0004950065919288486, "loss": 0.2172, "step": 46250 }, { "epoch": 1.92, "grad_norm": 0.478515625, "learning_rate": 0.0004950044349405488, "loss": 0.162, "step": 46260 }, { "epoch": 1.92, "grad_norm": 1.6015625, "learning_rate": 0.0004950022774911771, "loss": 0.2125, "step": 46270 }, { "epoch": 1.92, "grad_norm": 0.63671875, "learning_rate": 0.0004950001195807377, "loss": 0.2237, "step": 46280 }, { "epoch": 1.92, "grad_norm": 1.4296875, "learning_rate": 0.0004949979612092344, "loss": 0.1529, "step": 46290 }, { "epoch": 1.92, "grad_norm": 0.6640625, "learning_rate": 0.0004949958023766714, "loss": 0.2296, "step": 46300 }, { "epoch": 1.92, "grad_norm": 0.59765625, "learning_rate": 0.0004949936430830528, "loss": 0.224, "step": 46310 }, { "epoch": 1.92, "grad_norm": 0.37890625, "learning_rate": 0.0004949914833283827, "loss": 0.1948, "step": 46320 }, { "epoch": 1.92, "grad_norm": 0.9921875, "learning_rate": 0.000494989323112665, "loss": 0.2165, "step": 46330 }, { "epoch": 1.92, "grad_norm": 1.3125, "learning_rate": 0.0004949871624359038, "loss": 0.18, "step": 46340 }, { "epoch": 1.92, "grad_norm": 0.2578125, "learning_rate": 0.0004949850012981033, "loss": 0.2608, "step": 46350 }, { "epoch": 1.92, "grad_norm": 0.9140625, "learning_rate": 0.0004949828396992675, "loss": 0.2633, "step": 46360 }, { "epoch": 1.92, "grad_norm": 1.90625, "learning_rate": 0.0004949806776394004, "loss": 0.2338, "step": 46370 }, { "epoch": 1.92, "grad_norm": 0.51953125, "learning_rate": 0.0004949785151185062, "loss": 0.2231, "step": 46380 }, { "epoch": 1.92, "grad_norm": 0.5, "learning_rate": 0.0004949763521365887, "loss": 0.2529, "step": 46390 }, { "epoch": 1.92, "grad_norm": 0.7734375, "learning_rate": 0.0004949741886936524, "loss": 0.2192, "step": 46400 }, { "epoch": 1.92, "grad_norm": 0.373046875, "learning_rate": 0.000494972024789701, "loss": 0.2505, "step": 46410 }, { "epoch": 1.92, "grad_norm": 0.69140625, "learning_rate": 0.0004949698604247387, "loss": 0.2232, "step": 46420 }, { "epoch": 1.92, "grad_norm": 0.33203125, "learning_rate": 0.0004949676955987697, "loss": 0.2244, "step": 46430 }, { "epoch": 1.92, "grad_norm": 0.3984375, "learning_rate": 0.0004949655303117978, "loss": 0.2253, "step": 46440 }, { "epoch": 1.92, "grad_norm": 0.2275390625, "learning_rate": 0.0004949633645638272, "loss": 0.2685, "step": 46450 }, { "epoch": 1.92, "grad_norm": 0.6484375, "learning_rate": 0.0004949611983548621, "loss": 0.2142, "step": 46460 }, { "epoch": 1.92, "grad_norm": 1.640625, "learning_rate": 0.0004949590316849064, "loss": 0.2475, "step": 46470 }, { "epoch": 1.93, "grad_norm": 1.484375, "learning_rate": 0.0004949568645539644, "loss": 0.1797, "step": 46480 }, { "epoch": 1.93, "grad_norm": 0.76171875, "learning_rate": 0.00049495469696204, "loss": 0.2523, "step": 46490 }, { "epoch": 1.93, "grad_norm": 0.51953125, "learning_rate": 0.0004949525289091372, "loss": 0.1976, "step": 46500 }, { "epoch": 1.93, "grad_norm": 0.859375, "learning_rate": 0.0004949503603952603, "loss": 0.2209, "step": 46510 }, { "epoch": 1.93, "grad_norm": 0.7265625, "learning_rate": 0.0004949481914204131, "loss": 0.2363, "step": 46520 }, { "epoch": 1.93, "grad_norm": 0.54296875, "learning_rate": 0.0004949460219846, "loss": 0.2378, "step": 46530 }, { "epoch": 1.93, "grad_norm": 0.7421875, "learning_rate": 0.000494943852087825, "loss": 0.2644, "step": 46540 }, { "epoch": 1.93, "grad_norm": 0.60546875, "learning_rate": 0.000494941681730092, "loss": 0.2739, "step": 46550 }, { "epoch": 1.93, "grad_norm": 0.41796875, "learning_rate": 0.0004949395109114053, "loss": 0.219, "step": 46560 }, { "epoch": 1.93, "grad_norm": 1.0546875, "learning_rate": 0.0004949373396317688, "loss": 0.2384, "step": 46570 }, { "epoch": 1.93, "grad_norm": 0.734375, "learning_rate": 0.0004949351678911868, "loss": 0.2522, "step": 46580 }, { "epoch": 1.93, "grad_norm": 0.4765625, "learning_rate": 0.0004949329956896631, "loss": 0.2475, "step": 46590 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 0.000494930823027202, "loss": 0.2511, "step": 46600 }, { "epoch": 1.93, "grad_norm": 0.255859375, "learning_rate": 0.0004949286499038076, "loss": 0.2501, "step": 46610 }, { "epoch": 1.93, "grad_norm": 1.1171875, "learning_rate": 0.000494926476319484, "loss": 0.275, "step": 46620 }, { "epoch": 1.93, "grad_norm": 0.546875, "learning_rate": 0.000494924302274235, "loss": 0.2559, "step": 46630 }, { "epoch": 1.93, "grad_norm": 0.57421875, "learning_rate": 0.0004949221277680651, "loss": 0.2224, "step": 46640 }, { "epoch": 1.93, "grad_norm": 1.265625, "learning_rate": 0.0004949199528009781, "loss": 0.1925, "step": 46650 }, { "epoch": 1.93, "grad_norm": 0.5078125, "learning_rate": 0.0004949177773729783, "loss": 0.1857, "step": 46660 }, { "epoch": 1.93, "grad_norm": 1.359375, "learning_rate": 0.0004949156014840697, "loss": 0.2329, "step": 46670 }, { "epoch": 1.93, "grad_norm": 0.640625, "learning_rate": 0.0004949134251342563, "loss": 0.2251, "step": 46680 }, { "epoch": 1.93, "grad_norm": 0.0, "learning_rate": 0.0004949112483235423, "loss": 0.2388, "step": 46690 }, { "epoch": 1.93, "grad_norm": 0.2080078125, "learning_rate": 0.0004949090710519317, "loss": 0.2138, "step": 46700 }, { "epoch": 1.93, "grad_norm": 0.57421875, "learning_rate": 0.0004949068933194289, "loss": 0.1994, "step": 46710 }, { "epoch": 1.94, "grad_norm": 0.474609375, "learning_rate": 0.0004949047151260375, "loss": 0.2329, "step": 46720 }, { "epoch": 1.94, "grad_norm": 1.2421875, "learning_rate": 0.0004949025364717621, "loss": 0.2026, "step": 46730 }, { "epoch": 1.94, "grad_norm": 0.396484375, "learning_rate": 0.0004949003573566065, "loss": 0.2817, "step": 46740 }, { "epoch": 1.94, "grad_norm": 0.75390625, "learning_rate": 0.0004948981777805748, "loss": 0.243, "step": 46750 }, { "epoch": 1.94, "grad_norm": 0.2373046875, "learning_rate": 0.0004948959977436712, "loss": 0.2324, "step": 46760 }, { "epoch": 1.94, "grad_norm": 1.1640625, "learning_rate": 0.0004948938172458999, "loss": 0.1978, "step": 46770 }, { "epoch": 1.94, "grad_norm": 0.671875, "learning_rate": 0.0004948916362872648, "loss": 0.2194, "step": 46780 }, { "epoch": 1.94, "grad_norm": 0.83984375, "learning_rate": 0.0004948894548677701, "loss": 0.198, "step": 46790 }, { "epoch": 1.94, "grad_norm": 0.71875, "learning_rate": 0.0004948872729874197, "loss": 0.2495, "step": 46800 }, { "epoch": 1.94, "grad_norm": 0.69921875, "learning_rate": 0.0004948850906462181, "loss": 0.1812, "step": 46810 }, { "epoch": 1.94, "grad_norm": 0.57421875, "learning_rate": 0.0004948829078441692, "loss": 0.1923, "step": 46820 }, { "epoch": 1.94, "grad_norm": 0.55859375, "learning_rate": 0.0004948807245812771, "loss": 0.2381, "step": 46830 }, { "epoch": 1.94, "grad_norm": 0.93359375, "learning_rate": 0.000494878540857546, "loss": 0.2327, "step": 46840 }, { "epoch": 1.94, "grad_norm": 1.1953125, "learning_rate": 0.0004948763566729798, "loss": 0.2043, "step": 46850 }, { "epoch": 1.94, "grad_norm": 0.5625, "learning_rate": 0.0004948741720275828, "loss": 0.2103, "step": 46860 }, { "epoch": 1.94, "grad_norm": 0.65234375, "learning_rate": 0.000494871986921359, "loss": 0.2318, "step": 46870 }, { "epoch": 1.94, "grad_norm": 1.0, "learning_rate": 0.0004948698013543125, "loss": 0.1933, "step": 46880 }, { "epoch": 1.94, "grad_norm": 0.359375, "learning_rate": 0.0004948676153264476, "loss": 0.2348, "step": 46890 }, { "epoch": 1.94, "grad_norm": 0.62890625, "learning_rate": 0.0004948654288377683, "loss": 0.2443, "step": 46900 }, { "epoch": 1.94, "grad_norm": 1.109375, "learning_rate": 0.0004948632418882787, "loss": 0.2017, "step": 46910 }, { "epoch": 1.94, "grad_norm": 0.984375, "learning_rate": 0.0004948610544779829, "loss": 0.2482, "step": 46920 }, { "epoch": 1.94, "grad_norm": 0.86328125, "learning_rate": 0.000494858866606885, "loss": 0.2571, "step": 46930 }, { "epoch": 1.94, "grad_norm": 0.361328125, "learning_rate": 0.0004948566782749892, "loss": 0.2302, "step": 46940 }, { "epoch": 1.94, "grad_norm": 0.2734375, "learning_rate": 0.0004948544894822995, "loss": 0.2543, "step": 46950 }, { "epoch": 1.95, "grad_norm": 0.69140625, "learning_rate": 0.0004948523002288203, "loss": 0.2115, "step": 46960 }, { "epoch": 1.95, "grad_norm": 0.5703125, "learning_rate": 0.0004948501105145553, "loss": 0.227, "step": 46970 }, { "epoch": 1.95, "grad_norm": 0.734375, "learning_rate": 0.0004948479203395089, "loss": 0.2494, "step": 46980 }, { "epoch": 1.95, "grad_norm": 0.439453125, "learning_rate": 0.0004948457297036851, "loss": 0.1791, "step": 46990 }, { "epoch": 1.95, "grad_norm": 0.328125, "learning_rate": 0.0004948435386070882, "loss": 0.2351, "step": 47000 }, { "epoch": 1.95, "grad_norm": 0.51171875, "learning_rate": 0.0004948413470497221, "loss": 0.1721, "step": 47010 }, { "epoch": 1.95, "grad_norm": 0.9453125, "learning_rate": 0.000494839155031591, "loss": 0.2856, "step": 47020 }, { "epoch": 1.95, "grad_norm": 0.384765625, "learning_rate": 0.0004948369625526991, "loss": 0.1849, "step": 47030 }, { "epoch": 1.95, "grad_norm": 0.404296875, "learning_rate": 0.0004948347696130505, "loss": 0.2958, "step": 47040 }, { "epoch": 1.95, "grad_norm": 0.74609375, "learning_rate": 0.0004948325762126493, "loss": 0.2914, "step": 47050 }, { "epoch": 1.95, "grad_norm": 0.25390625, "learning_rate": 0.0004948303823514997, "loss": 0.2159, "step": 47060 }, { "epoch": 1.95, "grad_norm": 0.69140625, "learning_rate": 0.0004948281880296056, "loss": 0.2382, "step": 47070 }, { "epoch": 1.95, "grad_norm": 0.8984375, "learning_rate": 0.0004948259932469714, "loss": 0.2152, "step": 47080 }, { "epoch": 1.95, "grad_norm": 0.4921875, "learning_rate": 0.0004948237980036012, "loss": 0.2376, "step": 47090 }, { "epoch": 1.95, "grad_norm": 1.2578125, "learning_rate": 0.0004948216022994989, "loss": 0.2115, "step": 47100 }, { "epoch": 1.95, "grad_norm": 0.53515625, "learning_rate": 0.0004948194061346688, "loss": 0.204, "step": 47110 }, { "epoch": 1.95, "grad_norm": 0.875, "learning_rate": 0.0004948172095091151, "loss": 0.182, "step": 47120 }, { "epoch": 1.95, "grad_norm": 0.71875, "learning_rate": 0.0004948150124228417, "loss": 0.188, "step": 47130 }, { "epoch": 1.95, "grad_norm": 0.515625, "learning_rate": 0.000494812814875853, "loss": 0.2406, "step": 47140 }, { "epoch": 1.95, "grad_norm": 0.8359375, "learning_rate": 0.000494810616868153, "loss": 0.2605, "step": 47150 }, { "epoch": 1.95, "grad_norm": 0.90625, "learning_rate": 0.0004948084183997458, "loss": 0.2217, "step": 47160 }, { "epoch": 1.95, "grad_norm": 0.69921875, "learning_rate": 0.0004948062194706357, "loss": 0.2901, "step": 47170 }, { "epoch": 1.95, "grad_norm": 0.77734375, "learning_rate": 0.0004948040200808266, "loss": 0.2331, "step": 47180 }, { "epoch": 1.95, "grad_norm": 0.62890625, "learning_rate": 0.0004948018202303229, "loss": 0.2306, "step": 47190 }, { "epoch": 1.96, "grad_norm": 1.15625, "learning_rate": 0.0004947996199191285, "loss": 0.2015, "step": 47200 }, { "epoch": 1.96, "grad_norm": 1.140625, "learning_rate": 0.0004947974191472478, "loss": 0.2065, "step": 47210 }, { "epoch": 1.96, "grad_norm": 0.384765625, "learning_rate": 0.0004947952179146846, "loss": 0.2276, "step": 47220 }, { "epoch": 1.96, "grad_norm": 0.70703125, "learning_rate": 0.0004947930162214433, "loss": 0.2289, "step": 47230 }, { "epoch": 1.96, "grad_norm": 0.9453125, "learning_rate": 0.0004947908140675282, "loss": 0.2242, "step": 47240 }, { "epoch": 1.96, "grad_norm": 0.6484375, "learning_rate": 0.000494788611452943, "loss": 0.2046, "step": 47250 }, { "epoch": 1.96, "grad_norm": 1.3359375, "learning_rate": 0.0004947864083776921, "loss": 0.195, "step": 47260 }, { "epoch": 1.96, "grad_norm": 0.322265625, "learning_rate": 0.0004947842048417796, "loss": 0.2153, "step": 47270 }, { "epoch": 1.96, "grad_norm": 0.416015625, "learning_rate": 0.0004947820008452097, "loss": 0.1993, "step": 47280 }, { "epoch": 1.96, "grad_norm": 0.71484375, "learning_rate": 0.0004947797963879865, "loss": 0.2119, "step": 47290 }, { "epoch": 1.96, "grad_norm": 0.8671875, "learning_rate": 0.000494777591470114, "loss": 0.2173, "step": 47300 }, { "epoch": 1.96, "grad_norm": 1.3984375, "learning_rate": 0.0004947753860915968, "loss": 0.2539, "step": 47310 }, { "epoch": 1.96, "grad_norm": 0.2490234375, "learning_rate": 0.0004947731802524385, "loss": 0.2189, "step": 47320 }, { "epoch": 1.96, "grad_norm": 0.365234375, "learning_rate": 0.0004947709739526437, "loss": 0.2338, "step": 47330 }, { "epoch": 1.96, "grad_norm": 0.421875, "learning_rate": 0.0004947687671922163, "loss": 0.2655, "step": 47340 }, { "epoch": 1.96, "grad_norm": 0.5546875, "learning_rate": 0.0004947665599711605, "loss": 0.2284, "step": 47350 }, { "epoch": 1.96, "grad_norm": 2.25, "learning_rate": 0.0004947643522894803, "loss": 0.2521, "step": 47360 }, { "epoch": 1.96, "grad_norm": 0.54296875, "learning_rate": 0.0004947621441471802, "loss": 0.2137, "step": 47370 }, { "epoch": 1.96, "grad_norm": 0.609375, "learning_rate": 0.0004947599355442642, "loss": 0.2098, "step": 47380 }, { "epoch": 1.96, "grad_norm": 0.57421875, "learning_rate": 0.0004947577264807364, "loss": 0.2221, "step": 47390 }, { "epoch": 1.96, "grad_norm": 0.65234375, "learning_rate": 0.0004947555169566009, "loss": 0.1779, "step": 47400 }, { "epoch": 1.96, "grad_norm": 0.421875, "learning_rate": 0.0004947533069718621, "loss": 0.2085, "step": 47410 }, { "epoch": 1.96, "grad_norm": 0.6328125, "learning_rate": 0.000494751096526524, "loss": 0.2256, "step": 47420 }, { "epoch": 1.96, "grad_norm": 0.4765625, "learning_rate": 0.0004947488856205907, "loss": 0.2319, "step": 47430 }, { "epoch": 1.96, "grad_norm": 1.0625, "learning_rate": 0.0004947466742540664, "loss": 0.2138, "step": 47440 }, { "epoch": 1.97, "grad_norm": 0.9375, "learning_rate": 0.0004947444624269553, "loss": 0.2133, "step": 47450 }, { "epoch": 1.97, "grad_norm": 0.37890625, "learning_rate": 0.0004947422501392616, "loss": 0.2613, "step": 47460 }, { "epoch": 1.97, "grad_norm": 0.6484375, "learning_rate": 0.0004947400373909894, "loss": 0.1776, "step": 47470 }, { "epoch": 1.97, "grad_norm": 0.298828125, "learning_rate": 0.0004947378241821429, "loss": 0.2339, "step": 47480 }, { "epoch": 1.97, "grad_norm": 0.859375, "learning_rate": 0.0004947356105127261, "loss": 0.2308, "step": 47490 }, { "epoch": 1.97, "grad_norm": 0.96875, "learning_rate": 0.0004947333963827435, "loss": 0.2348, "step": 47500 }, { "epoch": 1.97, "grad_norm": 0.45703125, "learning_rate": 0.000494731181792199, "loss": 0.2173, "step": 47510 }, { "epoch": 1.97, "grad_norm": 0.474609375, "learning_rate": 0.0004947289667410968, "loss": 0.2403, "step": 47520 }, { "epoch": 1.97, "grad_norm": 0.421875, "learning_rate": 0.0004947267512294412, "loss": 0.2291, "step": 47530 }, { "epoch": 1.97, "grad_norm": 0.48828125, "learning_rate": 0.0004947245352572362, "loss": 0.2197, "step": 47540 }, { "epoch": 1.97, "grad_norm": 1.484375, "learning_rate": 0.0004947223188244861, "loss": 0.215, "step": 47550 }, { "epoch": 1.97, "grad_norm": 0.4296875, "learning_rate": 0.000494720101931195, "loss": 0.2694, "step": 47560 }, { "epoch": 1.97, "grad_norm": 0.796875, "learning_rate": 0.0004947178845773671, "loss": 0.2067, "step": 47570 }, { "epoch": 1.97, "grad_norm": 0.6328125, "learning_rate": 0.0004947156667630066, "loss": 0.2187, "step": 47580 }, { "epoch": 1.97, "grad_norm": 0.494140625, "learning_rate": 0.0004947134484881177, "loss": 0.2134, "step": 47590 }, { "epoch": 1.97, "grad_norm": 0.984375, "learning_rate": 0.0004947112297527043, "loss": 0.2748, "step": 47600 }, { "epoch": 1.97, "grad_norm": 0.248046875, "learning_rate": 0.000494709010556771, "loss": 0.2608, "step": 47610 }, { "epoch": 1.97, "grad_norm": 0.5703125, "learning_rate": 0.0004947067909003217, "loss": 0.2342, "step": 47620 }, { "epoch": 1.97, "grad_norm": 0.421875, "learning_rate": 0.0004947045707833606, "loss": 0.2213, "step": 47630 }, { "epoch": 1.97, "grad_norm": 0.921875, "learning_rate": 0.0004947023502058919, "loss": 0.1901, "step": 47640 }, { "epoch": 1.97, "grad_norm": 0.63671875, "learning_rate": 0.00049470012916792, "loss": 0.2026, "step": 47650 }, { "epoch": 1.97, "grad_norm": 0.875, "learning_rate": 0.0004946979076694487, "loss": 0.2225, "step": 47660 }, { "epoch": 1.97, "grad_norm": 1.8984375, "learning_rate": 0.0004946956857104824, "loss": 0.2581, "step": 47670 }, { "epoch": 1.97, "grad_norm": 0.28125, "learning_rate": 0.0004946934632910253, "loss": 0.2145, "step": 47680 }, { "epoch": 1.98, "grad_norm": 0.4765625, "learning_rate": 0.0004946912404110815, "loss": 0.22, "step": 47690 }, { "epoch": 1.98, "grad_norm": 0.76171875, "learning_rate": 0.0004946890170706552, "loss": 0.2137, "step": 47700 }, { "epoch": 1.98, "grad_norm": 0.56640625, "learning_rate": 0.0004946867932697505, "loss": 0.2377, "step": 47710 }, { "epoch": 1.98, "grad_norm": 0.69140625, "learning_rate": 0.0004946845690083718, "loss": 0.2125, "step": 47720 }, { "epoch": 1.98, "grad_norm": 0.99609375, "learning_rate": 0.0004946823442865231, "loss": 0.2209, "step": 47730 }, { "epoch": 1.98, "grad_norm": 0.59765625, "learning_rate": 0.0004946801191042088, "loss": 0.2022, "step": 47740 }, { "epoch": 1.98, "grad_norm": 0.1943359375, "learning_rate": 0.0004946778934614328, "loss": 0.2323, "step": 47750 }, { "epoch": 1.98, "grad_norm": 0.55078125, "learning_rate": 0.0004946756673581994, "loss": 0.226, "step": 47760 }, { "epoch": 1.98, "grad_norm": 0.5703125, "learning_rate": 0.0004946734407945129, "loss": 0.1827, "step": 47770 }, { "epoch": 1.98, "grad_norm": 0.85546875, "learning_rate": 0.0004946712137703773, "loss": 0.2676, "step": 47780 }, { "epoch": 1.98, "grad_norm": 0.482421875, "learning_rate": 0.000494668986285797, "loss": 0.1913, "step": 47790 }, { "epoch": 1.98, "grad_norm": 0.51171875, "learning_rate": 0.0004946667583407761, "loss": 0.2006, "step": 47800 }, { "epoch": 1.98, "grad_norm": 1.078125, "learning_rate": 0.0004946645299353187, "loss": 0.1687, "step": 47810 }, { "epoch": 1.98, "grad_norm": 0.73828125, "learning_rate": 0.0004946623010694291, "loss": 0.1983, "step": 47820 }, { "epoch": 1.98, "grad_norm": 1.015625, "learning_rate": 0.0004946600717431115, "loss": 0.2605, "step": 47830 }, { "epoch": 1.98, "grad_norm": 0.70703125, "learning_rate": 0.0004946578419563701, "loss": 0.2547, "step": 47840 }, { "epoch": 1.98, "grad_norm": 0.828125, "learning_rate": 0.000494655611709209, "loss": 0.2245, "step": 47850 }, { "epoch": 1.98, "grad_norm": 0.80859375, "learning_rate": 0.0004946533810016324, "loss": 0.2224, "step": 47860 }, { "epoch": 1.98, "grad_norm": 0.494140625, "learning_rate": 0.0004946511498336447, "loss": 0.2273, "step": 47870 }, { "epoch": 1.98, "grad_norm": 2.078125, "learning_rate": 0.0004946489182052499, "loss": 0.21, "step": 47880 }, { "epoch": 1.98, "grad_norm": 1.0859375, "learning_rate": 0.0004946466861164522, "loss": 0.2475, "step": 47890 }, { "epoch": 1.98, "grad_norm": 1.03125, "learning_rate": 0.0004946444535672559, "loss": 0.24, "step": 47900 }, { "epoch": 1.98, "grad_norm": 0.0, "learning_rate": 0.0004946422205576652, "loss": 0.2885, "step": 47910 }, { "epoch": 1.98, "grad_norm": 0.64453125, "learning_rate": 0.0004946399870876842, "loss": 0.1566, "step": 47920 }, { "epoch": 1.99, "grad_norm": 0.78125, "learning_rate": 0.0004946377531573171, "loss": 0.2706, "step": 47930 }, { "epoch": 1.99, "grad_norm": 1.046875, "learning_rate": 0.0004946355187665683, "loss": 0.2698, "step": 47940 }, { "epoch": 1.99, "grad_norm": 0.66796875, "learning_rate": 0.0004946332839154419, "loss": 0.227, "step": 47950 }, { "epoch": 1.99, "grad_norm": 0.53125, "learning_rate": 0.000494631048603942, "loss": 0.2307, "step": 47960 }, { "epoch": 1.99, "grad_norm": 0.55078125, "learning_rate": 0.0004946288128320729, "loss": 0.2276, "step": 47970 }, { "epoch": 1.99, "grad_norm": 0.421875, "learning_rate": 0.0004946265765998388, "loss": 0.207, "step": 47980 }, { "epoch": 1.99, "grad_norm": 0.373046875, "learning_rate": 0.0004946243399072439, "loss": 0.1841, "step": 47990 }, { "epoch": 1.99, "grad_norm": 0.41015625, "learning_rate": 0.0004946221027542923, "loss": 0.2339, "step": 48000 }, { "epoch": 1.99, "grad_norm": 0.25390625, "learning_rate": 0.0004946198651409884, "loss": 0.2165, "step": 48010 }, { "epoch": 1.99, "grad_norm": 1.90625, "learning_rate": 0.0004946176270673364, "loss": 0.2022, "step": 48020 }, { "epoch": 1.99, "grad_norm": 0.5703125, "learning_rate": 0.0004946153885333403, "loss": 0.2038, "step": 48030 }, { "epoch": 1.99, "grad_norm": 0.455078125, "learning_rate": 0.0004946131495390046, "loss": 0.2059, "step": 48040 }, { "epoch": 1.99, "grad_norm": 0.60546875, "learning_rate": 0.0004946109100843332, "loss": 0.2156, "step": 48050 }, { "epoch": 1.99, "grad_norm": 0.88671875, "learning_rate": 0.0004946086701693306, "loss": 0.1869, "step": 48060 }, { "epoch": 1.99, "grad_norm": 0.5078125, "learning_rate": 0.0004946064297940009, "loss": 0.2303, "step": 48070 }, { "epoch": 1.99, "grad_norm": 0.625, "learning_rate": 0.0004946041889583484, "loss": 0.2298, "step": 48080 }, { "epoch": 1.99, "grad_norm": 0.45703125, "learning_rate": 0.0004946019476623771, "loss": 0.1896, "step": 48090 }, { "epoch": 1.99, "grad_norm": 0.60546875, "learning_rate": 0.0004945997059060914, "loss": 0.2256, "step": 48100 }, { "epoch": 1.99, "grad_norm": 0.609375, "learning_rate": 0.0004945974636894954, "loss": 0.2093, "step": 48110 }, { "epoch": 1.99, "grad_norm": 0.69140625, "learning_rate": 0.0004945952210125934, "loss": 0.2596, "step": 48120 }, { "epoch": 1.99, "grad_norm": 1.0390625, "learning_rate": 0.0004945929778753897, "loss": 0.2697, "step": 48130 }, { "epoch": 1.99, "grad_norm": 0.64453125, "learning_rate": 0.0004945907342778883, "loss": 0.204, "step": 48140 }, { "epoch": 1.99, "grad_norm": 0.7109375, "learning_rate": 0.0004945884902200937, "loss": 0.2027, "step": 48150 }, { "epoch": 1.99, "grad_norm": 0.8125, "learning_rate": 0.0004945862457020099, "loss": 0.2143, "step": 48160 }, { "epoch": 2.0, "grad_norm": 0.56640625, "learning_rate": 0.0004945840007236412, "loss": 0.2398, "step": 48170 }, { "epoch": 2.0, "grad_norm": 0.451171875, "learning_rate": 0.0004945817552849918, "loss": 0.1956, "step": 48180 }, { "epoch": 2.0, "grad_norm": 0.21875, "learning_rate": 0.000494579509386066, "loss": 0.2506, "step": 48190 }, { "epoch": 2.0, "grad_norm": 1.2890625, "learning_rate": 0.0004945772630268679, "loss": 0.2104, "step": 48200 }, { "epoch": 2.0, "grad_norm": 3.09375, "learning_rate": 0.0004945750162074019, "loss": 0.2428, "step": 48210 }, { "epoch": 2.0, "grad_norm": 0.361328125, "learning_rate": 0.0004945727689276721, "loss": 0.2399, "step": 48220 }, { "epoch": 2.0, "grad_norm": 0.423828125, "learning_rate": 0.0004945705211876827, "loss": 0.2027, "step": 48230 }, { "epoch": 2.0, "grad_norm": 0.2021484375, "learning_rate": 0.000494568272987438, "loss": 0.2713, "step": 48240 }, { "epoch": 2.0, "grad_norm": 0.546875, "learning_rate": 0.0004945660243269423, "loss": 0.2127, "step": 48250 }, { "epoch": 2.0, "grad_norm": 0.56640625, "learning_rate": 0.0004945637752061996, "loss": 0.1366, "step": 48260 }, { "epoch": 2.0, "grad_norm": 0.94140625, "learning_rate": 0.0004945615256252145, "loss": 0.2103, "step": 48270 }, { "epoch": 2.0, "grad_norm": 0.65625, "learning_rate": 0.0004945592755839908, "loss": 0.2185, "step": 48280 }, { "epoch": 2.0, "grad_norm": 0.439453125, "learning_rate": 0.0004945570250825331, "loss": 0.1365, "step": 48290 }, { "epoch": 2.0, "grad_norm": 0.44921875, "learning_rate": 0.0004945547741208455, "loss": 0.249, "step": 48300 }, { "epoch": 2.0, "grad_norm": 0.447265625, "learning_rate": 0.0004945525226989321, "loss": 0.2118, "step": 48310 }, { "epoch": 2.0, "grad_norm": 0.921875, "learning_rate": 0.0004945502708167973, "loss": 0.1868, "step": 48320 }, { "epoch": 2.0, "grad_norm": 2.203125, "learning_rate": 0.0004945480184744454, "loss": 0.1854, "step": 48330 }, { "epoch": 2.0, "grad_norm": 0.76953125, "learning_rate": 0.0004945457656718805, "loss": 0.247, "step": 48340 }, { "epoch": 2.0, "grad_norm": 1.2890625, "learning_rate": 0.0004945435124091069, "loss": 0.2494, "step": 48350 }, { "epoch": 2.0, "grad_norm": 0.30078125, "learning_rate": 0.0004945412586861288, "loss": 0.2209, "step": 48360 }, { "epoch": 2.0, "grad_norm": 1.015625, "learning_rate": 0.0004945390045029503, "loss": 0.2047, "step": 48370 }, { "epoch": 2.0, "grad_norm": 0.35546875, "learning_rate": 0.000494536749859576, "loss": 0.2534, "step": 48380 }, { "epoch": 2.0, "grad_norm": 0.125, "learning_rate": 0.0004945344947560098, "loss": 0.2828, "step": 48390 }, { "epoch": 2.0, "grad_norm": 0.66796875, "learning_rate": 0.0004945322391922562, "loss": 0.2458, "step": 48400 }, { "epoch": 2.01, "grad_norm": 0.56640625, "learning_rate": 0.0004945299831683193, "loss": 0.234, "step": 48410 }, { "epoch": 2.01, "grad_norm": 0.98046875, "learning_rate": 0.0004945277266842034, "loss": 0.2698, "step": 48420 }, { "epoch": 2.01, "grad_norm": 0.9296875, "learning_rate": 0.0004945254697399128, "loss": 0.1942, "step": 48430 }, { "epoch": 2.01, "grad_norm": 0.6796875, "learning_rate": 0.0004945232123354515, "loss": 0.189, "step": 48440 }, { "epoch": 2.01, "grad_norm": 0.6484375, "learning_rate": 0.000494520954470824, "loss": 0.2508, "step": 48450 }, { "epoch": 2.01, "grad_norm": 0.6171875, "learning_rate": 0.0004945186961460345, "loss": 0.2017, "step": 48460 }, { "epoch": 2.01, "grad_norm": 0.51171875, "learning_rate": 0.0004945164373610872, "loss": 0.2336, "step": 48470 }, { "epoch": 2.01, "grad_norm": 0.67578125, "learning_rate": 0.0004945141781159863, "loss": 0.2272, "step": 48480 }, { "epoch": 2.01, "grad_norm": 0.48046875, "learning_rate": 0.0004945119184107361, "loss": 0.1592, "step": 48490 }, { "epoch": 2.01, "grad_norm": 0.4296875, "learning_rate": 0.000494509658245341, "loss": 0.196, "step": 48500 }, { "epoch": 2.01, "grad_norm": 0.7578125, "learning_rate": 0.0004945073976198051, "loss": 0.1986, "step": 48510 }, { "epoch": 2.01, "grad_norm": 0.9765625, "learning_rate": 0.0004945051365341326, "loss": 0.1975, "step": 48520 }, { "epoch": 2.01, "grad_norm": 0.359375, "learning_rate": 0.000494502874988328, "loss": 0.2317, "step": 48530 }, { "epoch": 2.01, "grad_norm": 0.875, "learning_rate": 0.0004945006129823952, "loss": 0.1878, "step": 48540 }, { "epoch": 2.01, "grad_norm": 1.421875, "learning_rate": 0.0004944983505163388, "loss": 0.2836, "step": 48550 }, { "epoch": 2.01, "grad_norm": 0.6328125, "learning_rate": 0.0004944960875901629, "loss": 0.2428, "step": 48560 }, { "epoch": 2.01, "grad_norm": 1.296875, "learning_rate": 0.0004944938242038716, "loss": 0.1908, "step": 48570 }, { "epoch": 2.01, "grad_norm": 0.2890625, "learning_rate": 0.0004944915603574695, "loss": 0.2207, "step": 48580 }, { "epoch": 2.01, "grad_norm": 0.78515625, "learning_rate": 0.0004944892960509606, "loss": 0.2024, "step": 48590 }, { "epoch": 2.01, "grad_norm": 0.455078125, "learning_rate": 0.0004944870312843493, "loss": 0.1939, "step": 48600 }, { "epoch": 2.01, "grad_norm": 0.875, "learning_rate": 0.0004944847660576399, "loss": 0.1602, "step": 48610 }, { "epoch": 2.01, "grad_norm": 0.2138671875, "learning_rate": 0.0004944825003708363, "loss": 0.2578, "step": 48620 }, { "epoch": 2.01, "grad_norm": 0.5234375, "learning_rate": 0.0004944802342239432, "loss": 0.221, "step": 48630 }, { "epoch": 2.01, "grad_norm": 0.59375, "learning_rate": 0.0004944779676169647, "loss": 0.2226, "step": 48640 }, { "epoch": 2.02, "grad_norm": 0.609375, "learning_rate": 0.0004944757005499051, "loss": 0.1922, "step": 48650 }, { "epoch": 2.02, "grad_norm": 0.890625, "learning_rate": 0.0004944734330227686, "loss": 0.2098, "step": 48660 }, { "epoch": 2.02, "grad_norm": 0.435546875, "learning_rate": 0.0004944711650355595, "loss": 0.2507, "step": 48670 }, { "epoch": 2.02, "grad_norm": 0.2890625, "learning_rate": 0.000494468896588282, "loss": 0.191, "step": 48680 }, { "epoch": 2.02, "grad_norm": 0.70703125, "learning_rate": 0.0004944666276809406, "loss": 0.2866, "step": 48690 }, { "epoch": 2.02, "grad_norm": 1.2890625, "learning_rate": 0.0004944643583135392, "loss": 0.215, "step": 48700 }, { "epoch": 2.02, "grad_norm": 1.078125, "learning_rate": 0.0004944620884860824, "loss": 0.2296, "step": 48710 }, { "epoch": 2.02, "grad_norm": 0.77734375, "learning_rate": 0.0004944598181985744, "loss": 0.2158, "step": 48720 }, { "epoch": 2.02, "grad_norm": 1.921875, "learning_rate": 0.0004944575474510193, "loss": 0.1905, "step": 48730 }, { "epoch": 2.02, "grad_norm": 0.365234375, "learning_rate": 0.0004944552762434216, "loss": 0.182, "step": 48740 }, { "epoch": 2.02, "grad_norm": 0.52734375, "learning_rate": 0.0004944530045757853, "loss": 0.1883, "step": 48750 }, { "epoch": 2.02, "grad_norm": 0.78515625, "learning_rate": 0.000494450732448115, "loss": 0.2682, "step": 48760 }, { "epoch": 2.02, "grad_norm": 0.84765625, "learning_rate": 0.0004944484598604148, "loss": 0.2434, "step": 48770 }, { "epoch": 2.02, "grad_norm": 1.015625, "learning_rate": 0.000494446186812689, "loss": 0.2874, "step": 48780 }, { "epoch": 2.02, "grad_norm": 0.427734375, "learning_rate": 0.0004944439133049419, "loss": 0.2129, "step": 48790 }, { "epoch": 2.02, "grad_norm": 0.478515625, "learning_rate": 0.0004944416393371777, "loss": 0.1454, "step": 48800 }, { "epoch": 2.02, "grad_norm": 0.63671875, "learning_rate": 0.0004944393649094007, "loss": 0.2254, "step": 48810 }, { "epoch": 2.02, "grad_norm": 0.2314453125, "learning_rate": 0.0004944370900216153, "loss": 0.23, "step": 48820 }, { "epoch": 2.02, "grad_norm": 0.259765625, "learning_rate": 0.0004944348146738256, "loss": 0.188, "step": 48830 }, { "epoch": 2.02, "grad_norm": 0.7890625, "learning_rate": 0.000494432538866036, "loss": 0.2141, "step": 48840 }, { "epoch": 2.02, "grad_norm": 0.57421875, "learning_rate": 0.0004944302625982508, "loss": 0.2182, "step": 48850 }, { "epoch": 2.02, "grad_norm": 0.37109375, "learning_rate": 0.0004944279858704742, "loss": 0.2396, "step": 48860 }, { "epoch": 2.02, "grad_norm": 0.3203125, "learning_rate": 0.0004944257086827105, "loss": 0.2638, "step": 48870 }, { "epoch": 2.02, "grad_norm": 2.25, "learning_rate": 0.0004944234310349641, "loss": 0.2432, "step": 48880 }, { "epoch": 2.03, "grad_norm": 0.515625, "learning_rate": 0.0004944211529272391, "loss": 0.1975, "step": 48890 }, { "epoch": 2.03, "grad_norm": 0.0, "learning_rate": 0.00049441887435954, "loss": 0.1927, "step": 48900 }, { "epoch": 2.03, "grad_norm": 0.53515625, "learning_rate": 0.0004944165953318708, "loss": 0.2546, "step": 48910 }, { "epoch": 2.03, "grad_norm": 0.5625, "learning_rate": 0.0004944143158442362, "loss": 0.2286, "step": 48920 }, { "epoch": 2.03, "grad_norm": 1.1328125, "learning_rate": 0.0004944120358966401, "loss": 0.1972, "step": 48930 }, { "epoch": 2.03, "grad_norm": 0.9296875, "learning_rate": 0.000494409755489087, "loss": 0.2412, "step": 48940 }, { "epoch": 2.03, "grad_norm": 0.765625, "learning_rate": 0.0004944074746215811, "loss": 0.2467, "step": 48950 }, { "epoch": 2.03, "grad_norm": 0.640625, "learning_rate": 0.0004944051932941268, "loss": 0.2035, "step": 48960 }, { "epoch": 2.03, "grad_norm": 0.6953125, "learning_rate": 0.0004944029115067282, "loss": 0.1969, "step": 48970 }, { "epoch": 2.03, "grad_norm": 0.8125, "learning_rate": 0.0004944006292593898, "loss": 0.2987, "step": 48980 }, { "epoch": 2.03, "grad_norm": 0.357421875, "learning_rate": 0.0004943983465521157, "loss": 0.1789, "step": 48990 }, { "epoch": 2.03, "grad_norm": 0.57421875, "learning_rate": 0.0004943960633849104, "loss": 0.2343, "step": 49000 }, { "epoch": 2.03, "grad_norm": 0.47265625, "learning_rate": 0.0004943937797577781, "loss": 0.2174, "step": 49010 }, { "epoch": 2.03, "grad_norm": 0.70703125, "learning_rate": 0.0004943914956707231, "loss": 0.2161, "step": 49020 }, { "epoch": 2.03, "grad_norm": 0.314453125, "learning_rate": 0.0004943892111237496, "loss": 0.2067, "step": 49030 }, { "epoch": 2.03, "grad_norm": 0.6484375, "learning_rate": 0.0004943869261168621, "loss": 0.2222, "step": 49040 }, { "epoch": 2.03, "grad_norm": 0.44140625, "learning_rate": 0.0004943846406500647, "loss": 0.154, "step": 49050 }, { "epoch": 2.03, "grad_norm": 0.859375, "learning_rate": 0.0004943823547233619, "loss": 0.2865, "step": 49060 }, { "epoch": 2.03, "grad_norm": 0.63671875, "learning_rate": 0.0004943800683367579, "loss": 0.2163, "step": 49070 }, { "epoch": 2.03, "grad_norm": 1.0546875, "learning_rate": 0.000494377781490257, "loss": 0.1648, "step": 49080 }, { "epoch": 2.03, "grad_norm": 1.078125, "learning_rate": 0.0004943754941838635, "loss": 0.208, "step": 49090 }, { "epoch": 2.03, "grad_norm": 1.1484375, "learning_rate": 0.0004943732064175817, "loss": 0.2083, "step": 49100 }, { "epoch": 2.03, "grad_norm": 1.078125, "learning_rate": 0.0004943709181914159, "loss": 0.1809, "step": 49110 }, { "epoch": 2.03, "grad_norm": 0.4453125, "learning_rate": 0.0004943686295053703, "loss": 0.2084, "step": 49120 }, { "epoch": 2.03, "grad_norm": 1.171875, "learning_rate": 0.0004943663403594495, "loss": 0.2569, "step": 49130 }, { "epoch": 2.04, "grad_norm": 0.58984375, "learning_rate": 0.0004943640507536576, "loss": 0.2593, "step": 49140 }, { "epoch": 2.04, "grad_norm": 0.2177734375, "learning_rate": 0.0004943617606879989, "loss": 0.2497, "step": 49150 }, { "epoch": 2.04, "grad_norm": 0.0, "learning_rate": 0.0004943594701624778, "loss": 0.2341, "step": 49160 }, { "epoch": 2.04, "grad_norm": 0.458984375, "learning_rate": 0.0004943571791770986, "loss": 0.2219, "step": 49170 }, { "epoch": 2.04, "grad_norm": 0.796875, "learning_rate": 0.0004943548877318655, "loss": 0.2303, "step": 49180 }, { "epoch": 2.04, "grad_norm": 0.404296875, "learning_rate": 0.000494352595826783, "loss": 0.2435, "step": 49190 }, { "epoch": 2.04, "grad_norm": 0.90234375, "learning_rate": 0.0004943503034618552, "loss": 0.2894, "step": 49200 }, { "epoch": 2.04, "grad_norm": 0.7265625, "learning_rate": 0.0004943480106370864, "loss": 0.2392, "step": 49210 }, { "epoch": 2.04, "grad_norm": 0.703125, "learning_rate": 0.0004943457173524812, "loss": 0.2164, "step": 49220 }, { "epoch": 2.04, "grad_norm": 1.34375, "learning_rate": 0.0004943434236080438, "loss": 0.2377, "step": 49230 }, { "epoch": 2.04, "grad_norm": 0.047607421875, "learning_rate": 0.0004943411294037783, "loss": 0.264, "step": 49240 }, { "epoch": 2.04, "grad_norm": 0.4609375, "learning_rate": 0.0004943388347396894, "loss": 0.1667, "step": 49250 }, { "epoch": 2.04, "grad_norm": 0.59765625, "learning_rate": 0.0004943365396157809, "loss": 0.1882, "step": 49260 }, { "epoch": 2.04, "grad_norm": 1.4921875, "learning_rate": 0.0004943342440320576, "loss": 0.2255, "step": 49270 }, { "epoch": 2.04, "grad_norm": 0.47265625, "learning_rate": 0.0004943319479885237, "loss": 0.2336, "step": 49280 }, { "epoch": 2.04, "grad_norm": 1.4765625, "learning_rate": 0.0004943296514851833, "loss": 0.2048, "step": 49290 }, { "epoch": 2.04, "grad_norm": 0.4765625, "learning_rate": 0.000494327354522041, "loss": 0.2155, "step": 49300 }, { "epoch": 2.04, "grad_norm": 0.408203125, "learning_rate": 0.0004943250570991009, "loss": 0.2399, "step": 49310 }, { "epoch": 2.04, "grad_norm": 0.7890625, "learning_rate": 0.0004943227592163676, "loss": 0.2306, "step": 49320 }, { "epoch": 2.04, "grad_norm": 0.9296875, "learning_rate": 0.0004943204608738451, "loss": 0.2198, "step": 49330 }, { "epoch": 2.04, "grad_norm": 1.171875, "learning_rate": 0.0004943181620715379, "loss": 0.2569, "step": 49340 }, { "epoch": 2.04, "grad_norm": 0.9765625, "learning_rate": 0.0004943158628094502, "loss": 0.2108, "step": 49350 }, { "epoch": 2.04, "grad_norm": 0.7109375, "learning_rate": 0.0004943135630875867, "loss": 0.2402, "step": 49360 }, { "epoch": 2.04, "grad_norm": 0.39453125, "learning_rate": 0.0004943112629059513, "loss": 0.1931, "step": 49370 }, { "epoch": 2.05, "grad_norm": 0.43359375, "learning_rate": 0.0004943089622645485, "loss": 0.2098, "step": 49380 }, { "epoch": 2.05, "grad_norm": 0.326171875, "learning_rate": 0.0004943066611633825, "loss": 0.2129, "step": 49390 }, { "epoch": 2.05, "grad_norm": 1.2265625, "learning_rate": 0.0004943043596024579, "loss": 0.2483, "step": 49400 }, { "epoch": 2.05, "grad_norm": 0.8203125, "learning_rate": 0.0004943020575817788, "loss": 0.2252, "step": 49410 }, { "epoch": 2.05, "grad_norm": 0.98828125, "learning_rate": 0.0004942997551013497, "loss": 0.1943, "step": 49420 }, { "epoch": 2.05, "grad_norm": 1.203125, "learning_rate": 0.0004942974521611748, "loss": 0.2382, "step": 49430 }, { "epoch": 2.05, "grad_norm": 0.8671875, "learning_rate": 0.0004942951487612585, "loss": 0.2459, "step": 49440 }, { "epoch": 2.05, "grad_norm": 0.10791015625, "learning_rate": 0.0004942928449016051, "loss": 0.263, "step": 49450 }, { "epoch": 2.05, "grad_norm": 0.55078125, "learning_rate": 0.000494290540582219, "loss": 0.2295, "step": 49460 }, { "epoch": 2.05, "grad_norm": 1.0390625, "learning_rate": 0.0004942882358031043, "loss": 0.1858, "step": 49470 }, { "epoch": 2.05, "grad_norm": 0.52734375, "learning_rate": 0.0004942859305642656, "loss": 0.2206, "step": 49480 }, { "epoch": 2.05, "grad_norm": 0.30078125, "learning_rate": 0.0004942836248657072, "loss": 0.2271, "step": 49490 }, { "epoch": 2.05, "grad_norm": 0.65625, "learning_rate": 0.0004942813187074334, "loss": 0.2112, "step": 49500 }, { "epoch": 2.05, "grad_norm": 1.15625, "learning_rate": 0.0004942790120894485, "loss": 0.2136, "step": 49510 }, { "epoch": 2.05, "grad_norm": 0.328125, "learning_rate": 0.000494276705011757, "loss": 0.2349, "step": 49520 }, { "epoch": 2.05, "grad_norm": 0.69921875, "learning_rate": 0.0004942743974743629, "loss": 0.2584, "step": 49530 }, { "epoch": 2.05, "grad_norm": 0.6640625, "learning_rate": 0.000494272089477271, "loss": 0.245, "step": 49540 }, { "epoch": 2.05, "grad_norm": 0.474609375, "learning_rate": 0.0004942697810204852, "loss": 0.1848, "step": 49550 }, { "epoch": 2.05, "grad_norm": 0.265625, "learning_rate": 0.0004942674721040102, "loss": 0.1427, "step": 49560 }, { "epoch": 2.05, "grad_norm": 0.87109375, "learning_rate": 0.0004942651627278502, "loss": 0.2091, "step": 49570 }, { "epoch": 2.05, "grad_norm": 0.74609375, "learning_rate": 0.0004942628528920094, "loss": 0.2259, "step": 49580 }, { "epoch": 2.05, "grad_norm": 0.578125, "learning_rate": 0.0004942605425964924, "loss": 0.1944, "step": 49590 }, { "epoch": 2.05, "grad_norm": 0.71484375, "learning_rate": 0.0004942582318413033, "loss": 0.212, "step": 49600 }, { "epoch": 2.05, "grad_norm": 0.375, "learning_rate": 0.0004942559206264468, "loss": 0.209, "step": 49610 }, { "epoch": 2.06, "grad_norm": 0.4140625, "learning_rate": 0.0004942536089519269, "loss": 0.1804, "step": 49620 }, { "epoch": 2.06, "grad_norm": 1.1328125, "learning_rate": 0.0004942512968177481, "loss": 0.2185, "step": 49630 }, { "epoch": 2.06, "grad_norm": 0.431640625, "learning_rate": 0.0004942489842239148, "loss": 0.2257, "step": 49640 }, { "epoch": 2.06, "grad_norm": 0.41015625, "learning_rate": 0.0004942466711704312, "loss": 0.2514, "step": 49650 }, { "epoch": 2.06, "grad_norm": 0.58984375, "learning_rate": 0.0004942443576573018, "loss": 0.2314, "step": 49660 }, { "epoch": 2.06, "grad_norm": 1.3671875, "learning_rate": 0.0004942420436845308, "loss": 0.2253, "step": 49670 }, { "epoch": 2.06, "grad_norm": 0.6015625, "learning_rate": 0.0004942397292521228, "loss": 0.252, "step": 49680 }, { "epoch": 2.06, "grad_norm": 1.53125, "learning_rate": 0.000494237414360082, "loss": 0.2454, "step": 49690 }, { "epoch": 2.06, "grad_norm": 0.69140625, "learning_rate": 0.0004942350990084127, "loss": 0.2165, "step": 49700 }, { "epoch": 2.06, "grad_norm": 0.412109375, "learning_rate": 0.0004942327831971192, "loss": 0.2309, "step": 49710 }, { "epoch": 2.06, "grad_norm": 0.890625, "learning_rate": 0.000494230466926206, "loss": 0.2422, "step": 49720 }, { "epoch": 2.06, "grad_norm": 0.3828125, "learning_rate": 0.0004942281501956777, "loss": 0.2286, "step": 49730 }, { "epoch": 2.06, "grad_norm": 0.447265625, "learning_rate": 0.0004942258330055382, "loss": 0.1765, "step": 49740 }, { "epoch": 2.06, "grad_norm": 0.625, "learning_rate": 0.000494223515355792, "loss": 0.1958, "step": 49750 }, { "epoch": 2.06, "grad_norm": 0.421875, "learning_rate": 0.0004942211972464437, "loss": 0.2258, "step": 49760 }, { "epoch": 2.06, "grad_norm": 0.34765625, "learning_rate": 0.0004942188786774973, "loss": 0.2126, "step": 49770 }, { "epoch": 2.06, "grad_norm": 0.48046875, "learning_rate": 0.0004942165596489574, "loss": 0.2348, "step": 49780 }, { "epoch": 2.06, "grad_norm": 0.56640625, "learning_rate": 0.0004942142401608284, "loss": 0.2713, "step": 49790 }, { "epoch": 2.06, "grad_norm": 1.390625, "learning_rate": 0.0004942119202131144, "loss": 0.2454, "step": 49800 }, { "epoch": 2.06, "grad_norm": 1.6875, "learning_rate": 0.00049420959980582, "loss": 0.204, "step": 49810 }, { "epoch": 2.06, "grad_norm": 2.234375, "learning_rate": 0.0004942072789389497, "loss": 0.2641, "step": 49820 }, { "epoch": 2.06, "grad_norm": 0.5390625, "learning_rate": 0.0004942049576125075, "loss": 0.2171, "step": 49830 }, { "epoch": 2.06, "grad_norm": 0.3359375, "learning_rate": 0.0004942026358264979, "loss": 0.2118, "step": 49840 }, { "epoch": 2.06, "grad_norm": 1.015625, "learning_rate": 0.0004942003135809254, "loss": 0.2425, "step": 49850 }, { "epoch": 2.07, "grad_norm": 0.65234375, "learning_rate": 0.0004941979908757942, "loss": 0.2608, "step": 49860 }, { "epoch": 2.07, "grad_norm": 0.640625, "learning_rate": 0.0004941956677111089, "loss": 0.2473, "step": 49870 }, { "epoch": 2.07, "grad_norm": 0.3046875, "learning_rate": 0.0004941933440868736, "loss": 0.1622, "step": 49880 }, { "epoch": 2.07, "grad_norm": 0.6015625, "learning_rate": 0.0004941910200030928, "loss": 0.1891, "step": 49890 }, { "epoch": 2.07, "grad_norm": 0.92578125, "learning_rate": 0.0004941886954597708, "loss": 0.2386, "step": 49900 }, { "epoch": 2.07, "grad_norm": 1.1796875, "learning_rate": 0.0004941863704569122, "loss": 0.2207, "step": 49910 }, { "epoch": 2.07, "grad_norm": 0.6875, "learning_rate": 0.0004941840449945212, "loss": 0.2321, "step": 49920 }, { "epoch": 2.07, "grad_norm": 0.62109375, "learning_rate": 0.0004941817190726021, "loss": 0.2576, "step": 49930 }, { "epoch": 2.07, "grad_norm": 0.474609375, "learning_rate": 0.0004941793926911595, "loss": 0.205, "step": 49940 }, { "epoch": 2.07, "grad_norm": 2.046875, "learning_rate": 0.0004941770658501975, "loss": 0.2438, "step": 49950 }, { "epoch": 2.07, "grad_norm": 1.203125, "learning_rate": 0.0004941747385497207, "loss": 0.1659, "step": 49960 }, { "epoch": 2.07, "grad_norm": 0.65234375, "learning_rate": 0.0004941724107897335, "loss": 0.1914, "step": 49970 }, { "epoch": 2.07, "grad_norm": 0.65234375, "learning_rate": 0.0004941700825702401, "loss": 0.2053, "step": 49980 }, { "epoch": 2.07, "grad_norm": 1.5078125, "learning_rate": 0.000494167753891245, "loss": 0.2276, "step": 49990 }, { "epoch": 2.07, "grad_norm": 0.546875, "learning_rate": 0.0004941654247527525, "loss": 0.3057, "step": 50000 }, { "epoch": 2.07, "grad_norm": 0.376953125, "learning_rate": 0.0004941630951547671, "loss": 0.2062, "step": 50010 }, { "epoch": 2.07, "grad_norm": 0.43359375, "learning_rate": 0.0004941607650972931, "loss": 0.2105, "step": 50020 }, { "epoch": 2.07, "grad_norm": 0.51953125, "learning_rate": 0.0004941584345803349, "loss": 0.2817, "step": 50030 }, { "epoch": 2.07, "grad_norm": 0.9609375, "learning_rate": 0.000494156103603897, "loss": 0.2441, "step": 50040 }, { "epoch": 2.07, "grad_norm": 0.625, "learning_rate": 0.0004941537721679835, "loss": 0.2286, "step": 50050 }, { "epoch": 2.07, "grad_norm": 1.640625, "learning_rate": 0.000494151440272599, "loss": 0.2633, "step": 50060 }, { "epoch": 2.07, "grad_norm": 1.9375, "learning_rate": 0.000494149107917748, "loss": 0.238, "step": 50070 }, { "epoch": 2.07, "grad_norm": 1.109375, "learning_rate": 0.0004941467751034347, "loss": 0.2074, "step": 50080 }, { "epoch": 2.07, "grad_norm": 1.59375, "learning_rate": 0.0004941444418296634, "loss": 0.2906, "step": 50090 }, { "epoch": 2.08, "grad_norm": 0.55078125, "learning_rate": 0.0004941421080964387, "loss": 0.2629, "step": 50100 }, { "epoch": 2.08, "grad_norm": 0.7109375, "learning_rate": 0.000494139773903765, "loss": 0.2423, "step": 50110 }, { "epoch": 2.08, "grad_norm": 0.384765625, "learning_rate": 0.0004941374392516465, "loss": 0.2017, "step": 50120 }, { "epoch": 2.08, "grad_norm": 0.490234375, "learning_rate": 0.0004941351041400878, "loss": 0.2469, "step": 50130 }, { "epoch": 2.08, "grad_norm": 1.8046875, "learning_rate": 0.000494132768569093, "loss": 0.2535, "step": 50140 }, { "epoch": 2.08, "grad_norm": 0.71875, "learning_rate": 0.000494130432538667, "loss": 0.2335, "step": 50150 }, { "epoch": 2.08, "grad_norm": 0.8125, "learning_rate": 0.0004941280960488137, "loss": 0.2249, "step": 50160 }, { "epoch": 2.08, "grad_norm": 0.453125, "learning_rate": 0.0004941257590995376, "loss": 0.2334, "step": 50170 }, { "epoch": 2.08, "grad_norm": 1.3671875, "learning_rate": 0.0004941234216908433, "loss": 0.2899, "step": 50180 }, { "epoch": 2.08, "grad_norm": 0.63671875, "learning_rate": 0.0004941210838227351, "loss": 0.1853, "step": 50190 }, { "epoch": 2.08, "grad_norm": 0.79296875, "learning_rate": 0.0004941187454952174, "loss": 0.2252, "step": 50200 }, { "epoch": 2.08, "grad_norm": 0.6015625, "learning_rate": 0.0004941164067082945, "loss": 0.2459, "step": 50210 }, { "epoch": 2.08, "grad_norm": 1.1796875, "learning_rate": 0.0004941140674619709, "loss": 0.1834, "step": 50220 }, { "epoch": 2.08, "grad_norm": 0.439453125, "learning_rate": 0.0004941117277562511, "loss": 0.2056, "step": 50230 }, { "epoch": 2.08, "grad_norm": 0.48828125, "learning_rate": 0.0004941093875911394, "loss": 0.2326, "step": 50240 }, { "epoch": 2.08, "grad_norm": 0.76953125, "learning_rate": 0.00049410704696664, "loss": 0.1751, "step": 50250 }, { "epoch": 2.08, "grad_norm": 0.88671875, "learning_rate": 0.0004941047058827576, "loss": 0.2529, "step": 50260 }, { "epoch": 2.08, "grad_norm": 0.9453125, "learning_rate": 0.0004941023643394965, "loss": 0.1978, "step": 50270 }, { "epoch": 2.08, "grad_norm": 1.15625, "learning_rate": 0.0004941000223368611, "loss": 0.1842, "step": 50280 }, { "epoch": 2.08, "grad_norm": 0.447265625, "learning_rate": 0.0004940976798748558, "loss": 0.2013, "step": 50290 }, { "epoch": 2.08, "grad_norm": 0.58203125, "learning_rate": 0.0004940953369534851, "loss": 0.1912, "step": 50300 }, { "epoch": 2.08, "grad_norm": 1.1328125, "learning_rate": 0.0004940929935727533, "loss": 0.1576, "step": 50310 }, { "epoch": 2.08, "grad_norm": 0.75390625, "learning_rate": 0.0004940906497326649, "loss": 0.2352, "step": 50320 }, { "epoch": 2.08, "grad_norm": 0.68359375, "learning_rate": 0.0004940883054332241, "loss": 0.242, "step": 50330 }, { "epoch": 2.09, "grad_norm": 1.5390625, "learning_rate": 0.0004940859606744356, "loss": 0.2228, "step": 50340 }, { "epoch": 2.09, "grad_norm": 0.64453125, "learning_rate": 0.0004940836154563036, "loss": 0.243, "step": 50350 }, { "epoch": 2.09, "grad_norm": 2.0625, "learning_rate": 0.0004940812697788327, "loss": 0.1954, "step": 50360 }, { "epoch": 2.09, "grad_norm": 0.65234375, "learning_rate": 0.0004940789236420272, "loss": 0.219, "step": 50370 }, { "epoch": 2.09, "grad_norm": 0.5390625, "learning_rate": 0.0004940765770458915, "loss": 0.2333, "step": 50380 }, { "epoch": 2.09, "grad_norm": 1.953125, "learning_rate": 0.00049407422999043, "loss": 0.2109, "step": 50390 }, { "epoch": 2.09, "grad_norm": 0.50390625, "learning_rate": 0.0004940718824756471, "loss": 0.1952, "step": 50400 }, { "epoch": 2.09, "grad_norm": 0.189453125, "learning_rate": 0.0004940695345015474, "loss": 0.201, "step": 50410 }, { "epoch": 2.09, "grad_norm": 1.4921875, "learning_rate": 0.0004940671860681351, "loss": 0.2086, "step": 50420 }, { "epoch": 2.09, "grad_norm": 0.60546875, "learning_rate": 0.0004940648371754148, "loss": 0.189, "step": 50430 }, { "epoch": 2.09, "grad_norm": 0.6640625, "learning_rate": 0.0004940624878233908, "loss": 0.1735, "step": 50440 }, { "epoch": 2.09, "grad_norm": 0.5859375, "learning_rate": 0.0004940601380120675, "loss": 0.2336, "step": 50450 }, { "epoch": 2.09, "grad_norm": 3.578125, "learning_rate": 0.0004940577877414494, "loss": 0.2046, "step": 50460 }, { "epoch": 2.09, "grad_norm": 1.0859375, "learning_rate": 0.000494055437011541, "loss": 0.236, "step": 50470 }, { "epoch": 2.09, "grad_norm": 0.9296875, "learning_rate": 0.0004940530858223466, "loss": 0.2221, "step": 50480 }, { "epoch": 2.09, "grad_norm": 0.6875, "learning_rate": 0.0004940507341738705, "loss": 0.2109, "step": 50490 }, { "epoch": 2.09, "grad_norm": 0.60546875, "learning_rate": 0.0004940483820661173, "loss": 0.2817, "step": 50500 }, { "epoch": 2.09, "grad_norm": 0.609375, "learning_rate": 0.0004940460294990915, "loss": 0.2714, "step": 50510 }, { "epoch": 2.09, "grad_norm": 0.28515625, "learning_rate": 0.0004940436764727974, "loss": 0.2438, "step": 50520 }, { "epoch": 2.09, "grad_norm": 0.67578125, "learning_rate": 0.0004940413229872395, "loss": 0.2444, "step": 50530 }, { "epoch": 2.09, "grad_norm": 0.9375, "learning_rate": 0.0004940389690424221, "loss": 0.1883, "step": 50540 }, { "epoch": 2.09, "grad_norm": 1.140625, "learning_rate": 0.0004940366146383497, "loss": 0.2571, "step": 50550 }, { "epoch": 2.09, "grad_norm": 0.470703125, "learning_rate": 0.0004940342597750268, "loss": 0.198, "step": 50560 }, { "epoch": 2.09, "grad_norm": 0.5078125, "learning_rate": 0.0004940319044524578, "loss": 0.2322, "step": 50570 }, { "epoch": 2.1, "grad_norm": 0.69921875, "learning_rate": 0.0004940295486706471, "loss": 0.2399, "step": 50580 }, { "epoch": 2.1, "grad_norm": 0.8671875, "learning_rate": 0.0004940271924295991, "loss": 0.2318, "step": 50590 }, { "epoch": 2.1, "grad_norm": 0.59375, "learning_rate": 0.0004940248357293182, "loss": 0.2388, "step": 50600 }, { "epoch": 2.1, "grad_norm": 0.77734375, "learning_rate": 0.0004940224785698091, "loss": 0.2297, "step": 50610 }, { "epoch": 2.1, "grad_norm": 1.6484375, "learning_rate": 0.0004940201209510759, "loss": 0.2374, "step": 50620 }, { "epoch": 2.1, "grad_norm": 0.70703125, "learning_rate": 0.0004940177628731233, "loss": 0.2486, "step": 50630 }, { "epoch": 2.1, "grad_norm": 0.7109375, "learning_rate": 0.0004940154043359556, "loss": 0.2259, "step": 50640 }, { "epoch": 2.1, "grad_norm": 0.73046875, "learning_rate": 0.0004940130453395771, "loss": 0.2143, "step": 50650 }, { "epoch": 2.1, "grad_norm": 0.8828125, "learning_rate": 0.0004940106858839925, "loss": 0.2665, "step": 50660 }, { "epoch": 2.1, "grad_norm": 0.66796875, "learning_rate": 0.0004940083259692062, "loss": 0.2306, "step": 50670 }, { "epoch": 2.1, "grad_norm": 0.953125, "learning_rate": 0.0004940059655952226, "loss": 0.2066, "step": 50680 }, { "epoch": 2.1, "grad_norm": 0.55859375, "learning_rate": 0.000494003604762046, "loss": 0.1983, "step": 50690 }, { "epoch": 2.1, "grad_norm": 0.6484375, "learning_rate": 0.000494001243469681, "loss": 0.1774, "step": 50700 }, { "epoch": 2.1, "grad_norm": 0.58203125, "learning_rate": 0.0004939988817181322, "loss": 0.267, "step": 50710 }, { "epoch": 2.1, "grad_norm": 0.40625, "learning_rate": 0.0004939965195074036, "loss": 0.2549, "step": 50720 }, { "epoch": 2.1, "grad_norm": 0.322265625, "learning_rate": 0.0004939941568375, "loss": 0.1989, "step": 50730 }, { "epoch": 2.1, "grad_norm": 0.87109375, "learning_rate": 0.0004939917937084257, "loss": 0.2098, "step": 50740 }, { "epoch": 2.1, "grad_norm": 1.2734375, "learning_rate": 0.0004939894301201853, "loss": 0.2349, "step": 50750 }, { "epoch": 2.1, "grad_norm": 0.57421875, "learning_rate": 0.0004939870660727831, "loss": 0.2175, "step": 50760 }, { "epoch": 2.1, "grad_norm": 2.96875, "learning_rate": 0.0004939847015662235, "loss": 0.2082, "step": 50770 }, { "epoch": 2.1, "grad_norm": 0.34375, "learning_rate": 0.000493982336600511, "loss": 0.209, "step": 50780 }, { "epoch": 2.1, "grad_norm": 0.625, "learning_rate": 0.0004939799711756503, "loss": 0.2473, "step": 50790 }, { "epoch": 2.1, "grad_norm": 0.7421875, "learning_rate": 0.0004939776052916455, "loss": 0.2461, "step": 50800 }, { "epoch": 2.1, "grad_norm": 0.54296875, "learning_rate": 0.0004939752389485013, "loss": 0.2006, "step": 50810 }, { "epoch": 2.1, "grad_norm": 0.84765625, "learning_rate": 0.0004939728721462219, "loss": 0.2858, "step": 50820 }, { "epoch": 2.11, "grad_norm": 0.4140625, "learning_rate": 0.000493970504884812, "loss": 0.2428, "step": 50830 }, { "epoch": 2.11, "grad_norm": 0.46875, "learning_rate": 0.0004939681371642759, "loss": 0.2114, "step": 50840 }, { "epoch": 2.11, "grad_norm": 0.72265625, "learning_rate": 0.0004939657689846182, "loss": 0.2401, "step": 50850 }, { "epoch": 2.11, "grad_norm": 0.91796875, "learning_rate": 0.0004939634003458432, "loss": 0.1643, "step": 50860 }, { "epoch": 2.11, "grad_norm": 0.7890625, "learning_rate": 0.0004939610312479554, "loss": 0.2461, "step": 50870 }, { "epoch": 2.11, "grad_norm": 0.3125, "learning_rate": 0.0004939586616909594, "loss": 0.2138, "step": 50880 }, { "epoch": 2.11, "grad_norm": 0.625, "learning_rate": 0.0004939562916748594, "loss": 0.2033, "step": 50890 }, { "epoch": 2.11, "grad_norm": 0.57421875, "learning_rate": 0.0004939539211996601, "loss": 0.2618, "step": 50900 }, { "epoch": 2.11, "grad_norm": 0.0, "learning_rate": 0.0004939515502653659, "loss": 0.1999, "step": 50910 }, { "epoch": 2.11, "grad_norm": 0.439453125, "learning_rate": 0.0004939491788719811, "loss": 0.3103, "step": 50920 }, { "epoch": 2.11, "grad_norm": 0.84375, "learning_rate": 0.0004939468070195103, "loss": 0.2411, "step": 50930 }, { "epoch": 2.11, "grad_norm": 0.4453125, "learning_rate": 0.0004939444347079581, "loss": 0.232, "step": 50940 }, { "epoch": 2.11, "grad_norm": 0.66796875, "learning_rate": 0.0004939420619373287, "loss": 0.2199, "step": 50950 }, { "epoch": 2.11, "grad_norm": 1.15625, "learning_rate": 0.0004939396887076267, "loss": 0.1853, "step": 50960 }, { "epoch": 2.11, "grad_norm": 0.39453125, "learning_rate": 0.0004939373150188566, "loss": 0.2119, "step": 50970 }, { "epoch": 2.11, "grad_norm": 0.87890625, "learning_rate": 0.0004939349408710227, "loss": 0.2278, "step": 50980 }, { "epoch": 2.11, "grad_norm": 0.40625, "learning_rate": 0.0004939325662641296, "loss": 0.2066, "step": 50990 }, { "epoch": 2.11, "grad_norm": 0.36328125, "learning_rate": 0.0004939301911981819, "loss": 0.195, "step": 51000 }, { "epoch": 2.11, "grad_norm": 0.546875, "learning_rate": 0.0004939278156731839, "loss": 0.1426, "step": 51010 }, { "epoch": 2.11, "grad_norm": 0.5234375, "learning_rate": 0.0004939254396891399, "loss": 0.2441, "step": 51020 }, { "epoch": 2.11, "grad_norm": 1.1953125, "learning_rate": 0.0004939230632460547, "loss": 0.2049, "step": 51030 }, { "epoch": 2.11, "grad_norm": 0.81640625, "learning_rate": 0.0004939206863439328, "loss": 0.2141, "step": 51040 }, { "epoch": 2.11, "grad_norm": 0.5390625, "learning_rate": 0.0004939183089827783, "loss": 0.2526, "step": 51050 }, { "epoch": 2.11, "grad_norm": 0.734375, "learning_rate": 0.0004939159311625959, "loss": 0.2355, "step": 51060 }, { "epoch": 2.12, "grad_norm": 0.482421875, "learning_rate": 0.00049391355288339, "loss": 0.1938, "step": 51070 }, { "epoch": 2.12, "grad_norm": 0.396484375, "learning_rate": 0.0004939111741451653, "loss": 0.185, "step": 51080 }, { "epoch": 2.12, "grad_norm": 0.9765625, "learning_rate": 0.000493908794947926, "loss": 0.1962, "step": 51090 }, { "epoch": 2.12, "grad_norm": 0.65234375, "learning_rate": 0.0004939064152916767, "loss": 0.2588, "step": 51100 }, { "epoch": 2.12, "grad_norm": 0.68359375, "learning_rate": 0.0004939040351764219, "loss": 0.2455, "step": 51110 }, { "epoch": 2.12, "grad_norm": 0.1484375, "learning_rate": 0.0004939016546021659, "loss": 0.2357, "step": 51120 }, { "epoch": 2.12, "grad_norm": 0.400390625, "learning_rate": 0.0004938992735689135, "loss": 0.219, "step": 51130 }, { "epoch": 2.12, "grad_norm": 0.462890625, "learning_rate": 0.000493896892076669, "loss": 0.2225, "step": 51140 }, { "epoch": 2.12, "grad_norm": 0.80078125, "learning_rate": 0.0004938945101254368, "loss": 0.1868, "step": 51150 }, { "epoch": 2.12, "grad_norm": 0.3515625, "learning_rate": 0.0004938921277152215, "loss": 0.2105, "step": 51160 }, { "epoch": 2.12, "grad_norm": 0.06640625, "learning_rate": 0.0004938897448460276, "loss": 0.2073, "step": 51170 }, { "epoch": 2.12, "grad_norm": 0.625, "learning_rate": 0.0004938873615178595, "loss": 0.1916, "step": 51180 }, { "epoch": 2.12, "grad_norm": 0.57421875, "learning_rate": 0.0004938849777307217, "loss": 0.2396, "step": 51190 }, { "epoch": 2.12, "grad_norm": 0.419921875, "learning_rate": 0.0004938825934846187, "loss": 0.2257, "step": 51200 }, { "epoch": 2.12, "grad_norm": 0.7734375, "learning_rate": 0.0004938802087795551, "loss": 0.2047, "step": 51210 }, { "epoch": 2.12, "grad_norm": 1.4296875, "learning_rate": 0.0004938778236155351, "loss": 0.2799, "step": 51220 }, { "epoch": 2.12, "grad_norm": 0.47265625, "learning_rate": 0.0004938754379925636, "loss": 0.1998, "step": 51230 }, { "epoch": 2.12, "grad_norm": 0.498046875, "learning_rate": 0.0004938730519106447, "loss": 0.2026, "step": 51240 }, { "epoch": 2.12, "grad_norm": 0.7734375, "learning_rate": 0.0004938706653697831, "loss": 0.2021, "step": 51250 }, { "epoch": 2.12, "grad_norm": 2.640625, "learning_rate": 0.0004938682783699833, "loss": 0.1942, "step": 51260 }, { "epoch": 2.12, "grad_norm": 1.46875, "learning_rate": 0.0004938658909112496, "loss": 0.197, "step": 51270 }, { "epoch": 2.12, "grad_norm": 1.296875, "learning_rate": 0.0004938635029935868, "loss": 0.2361, "step": 51280 }, { "epoch": 2.12, "grad_norm": 0.609375, "learning_rate": 0.0004938611146169991, "loss": 0.2649, "step": 51290 }, { "epoch": 2.12, "grad_norm": 0.66796875, "learning_rate": 0.0004938587257814911, "loss": 0.2165, "step": 51300 }, { "epoch": 2.13, "grad_norm": 0.78515625, "learning_rate": 0.0004938563364870674, "loss": 0.1958, "step": 51310 }, { "epoch": 2.13, "grad_norm": 0.7890625, "learning_rate": 0.0004938539467337324, "loss": 0.2201, "step": 51320 }, { "epoch": 2.13, "grad_norm": 0.5625, "learning_rate": 0.0004938515565214905, "loss": 0.1693, "step": 51330 }, { "epoch": 2.13, "grad_norm": 1.3515625, "learning_rate": 0.0004938491658503465, "loss": 0.1595, "step": 51340 }, { "epoch": 2.13, "grad_norm": 0.703125, "learning_rate": 0.0004938467747203044, "loss": 0.2336, "step": 51350 }, { "epoch": 2.13, "grad_norm": 0.0, "learning_rate": 0.0004938443831313693, "loss": 0.2326, "step": 51360 }, { "epoch": 2.13, "grad_norm": 0.5390625, "learning_rate": 0.0004938419910835453, "loss": 0.2013, "step": 51370 }, { "epoch": 2.13, "grad_norm": 0.439453125, "learning_rate": 0.000493839598576837, "loss": 0.2684, "step": 51380 }, { "epoch": 2.13, "grad_norm": 0.5859375, "learning_rate": 0.0004938372056112488, "loss": 0.2052, "step": 51390 }, { "epoch": 2.13, "grad_norm": 0.63671875, "learning_rate": 0.0004938348121867855, "loss": 0.2178, "step": 51400 }, { "epoch": 2.13, "grad_norm": 0.91796875, "learning_rate": 0.0004938324183034514, "loss": 0.217, "step": 51410 }, { "epoch": 2.13, "grad_norm": 0.412109375, "learning_rate": 0.0004938300239612509, "loss": 0.2475, "step": 51420 }, { "epoch": 2.13, "grad_norm": 0.62890625, "learning_rate": 0.0004938276291601886, "loss": 0.2199, "step": 51430 }, { "epoch": 2.13, "grad_norm": 0.8671875, "learning_rate": 0.0004938252339002691, "loss": 0.2204, "step": 51440 }, { "epoch": 2.13, "grad_norm": 0.3515625, "learning_rate": 0.0004938228381814969, "loss": 0.2056, "step": 51450 }, { "epoch": 2.13, "grad_norm": 0.84375, "learning_rate": 0.0004938204420038764, "loss": 0.1846, "step": 51460 }, { "epoch": 2.13, "grad_norm": 0.46875, "learning_rate": 0.0004938180453674121, "loss": 0.187, "step": 51470 }, { "epoch": 2.13, "grad_norm": 0.44140625, "learning_rate": 0.0004938156482721087, "loss": 0.2049, "step": 51480 }, { "epoch": 2.13, "grad_norm": 0.58984375, "learning_rate": 0.0004938132507179704, "loss": 0.192, "step": 51490 }, { "epoch": 2.13, "grad_norm": 0.62109375, "learning_rate": 0.0004938108527050021, "loss": 0.2525, "step": 51500 }, { "epoch": 2.13, "grad_norm": 1.3203125, "learning_rate": 0.000493808454233208, "loss": 0.2667, "step": 51510 }, { "epoch": 2.13, "grad_norm": 0.50390625, "learning_rate": 0.0004938060553025928, "loss": 0.2005, "step": 51520 }, { "epoch": 2.13, "grad_norm": 0.375, "learning_rate": 0.0004938036559131608, "loss": 0.2362, "step": 51530 }, { "epoch": 2.13, "grad_norm": 0.4921875, "learning_rate": 0.0004938012560649167, "loss": 0.1669, "step": 51540 }, { "epoch": 2.14, "grad_norm": 0.380859375, "learning_rate": 0.0004937988557578649, "loss": 0.2429, "step": 51550 }, { "epoch": 2.14, "grad_norm": 3.5, "learning_rate": 0.0004937964549920101, "loss": 0.2225, "step": 51560 }, { "epoch": 2.14, "grad_norm": 1.578125, "learning_rate": 0.0004937940537673567, "loss": 0.2159, "step": 51570 }, { "epoch": 2.14, "grad_norm": 0.51171875, "learning_rate": 0.000493791652083909, "loss": 0.2891, "step": 51580 }, { "epoch": 2.14, "grad_norm": 0.92578125, "learning_rate": 0.000493789249941672, "loss": 0.2143, "step": 51590 }, { "epoch": 2.14, "grad_norm": 0.66015625, "learning_rate": 0.0004937868473406499, "loss": 0.2109, "step": 51600 }, { "epoch": 2.14, "grad_norm": 0.51171875, "learning_rate": 0.0004937844442808473, "loss": 0.2093, "step": 51610 }, { "epoch": 2.14, "grad_norm": 0.6484375, "learning_rate": 0.0004937820407622685, "loss": 0.2806, "step": 51620 }, { "epoch": 2.14, "grad_norm": 0.2177734375, "learning_rate": 0.0004937796367849185, "loss": 0.2598, "step": 51630 }, { "epoch": 2.14, "grad_norm": 0.890625, "learning_rate": 0.0004937772323488014, "loss": 0.2085, "step": 51640 }, { "epoch": 2.14, "grad_norm": 0.255859375, "learning_rate": 0.0004937748274539218, "loss": 0.1986, "step": 51650 }, { "epoch": 2.14, "grad_norm": 0.94921875, "learning_rate": 0.0004937724221002844, "loss": 0.1783, "step": 51660 }, { "epoch": 2.14, "grad_norm": 1.9296875, "learning_rate": 0.0004937700162878936, "loss": 0.2158, "step": 51670 }, { "epoch": 2.14, "grad_norm": 1.21875, "learning_rate": 0.000493767610016754, "loss": 0.216, "step": 51680 }, { "epoch": 2.14, "grad_norm": 1.671875, "learning_rate": 0.00049376520328687, "loss": 0.2081, "step": 51690 }, { "epoch": 2.14, "grad_norm": 0.53125, "learning_rate": 0.0004937627960982464, "loss": 0.2159, "step": 51700 }, { "epoch": 2.14, "grad_norm": 0.51171875, "learning_rate": 0.0004937603884508873, "loss": 0.1636, "step": 51710 }, { "epoch": 2.14, "grad_norm": 0.470703125, "learning_rate": 0.0004937579803447976, "loss": 0.1954, "step": 51720 }, { "epoch": 2.14, "grad_norm": 0.83203125, "learning_rate": 0.0004937555717799818, "loss": 0.2322, "step": 51730 }, { "epoch": 2.14, "grad_norm": 0.3203125, "learning_rate": 0.0004937531627564442, "loss": 0.2352, "step": 51740 }, { "epoch": 2.14, "grad_norm": 0.5390625, "learning_rate": 0.0004937507532741895, "loss": 0.2329, "step": 51750 }, { "epoch": 2.14, "grad_norm": 0.703125, "learning_rate": 0.0004937483433332221, "loss": 0.2117, "step": 51760 }, { "epoch": 2.14, "grad_norm": 0.224609375, "learning_rate": 0.0004937459329335468, "loss": 0.189, "step": 51770 }, { "epoch": 2.14, "grad_norm": 0.44140625, "learning_rate": 0.000493743522075168, "loss": 0.1761, "step": 51780 }, { "epoch": 2.15, "grad_norm": 1.2265625, "learning_rate": 0.00049374111075809, "loss": 0.2219, "step": 51790 }, { "epoch": 2.15, "grad_norm": 0.71484375, "learning_rate": 0.0004937386989823177, "loss": 0.1722, "step": 51800 }, { "epoch": 2.15, "grad_norm": 0.63671875, "learning_rate": 0.0004937362867478555, "loss": 0.2382, "step": 51810 }, { "epoch": 2.15, "grad_norm": 0.251953125, "learning_rate": 0.000493733874054708, "loss": 0.2264, "step": 51820 }, { "epoch": 2.15, "grad_norm": 0.79296875, "learning_rate": 0.0004937314609028794, "loss": 0.1929, "step": 51830 }, { "epoch": 2.15, "grad_norm": 0.99609375, "learning_rate": 0.0004937290472923748, "loss": 0.2439, "step": 51840 }, { "epoch": 2.15, "grad_norm": 0.859375, "learning_rate": 0.0004937266332231984, "loss": 0.1988, "step": 51850 }, { "epoch": 2.15, "grad_norm": 0.0, "learning_rate": 0.0004937242186953547, "loss": 0.2332, "step": 51860 }, { "epoch": 2.15, "grad_norm": 0.486328125, "learning_rate": 0.0004937218037088483, "loss": 0.2383, "step": 51870 }, { "epoch": 2.15, "grad_norm": 0.8671875, "learning_rate": 0.0004937193882636839, "loss": 0.226, "step": 51880 }, { "epoch": 2.15, "grad_norm": 0.6171875, "learning_rate": 0.0004937169723598659, "loss": 0.2118, "step": 51890 }, { "epoch": 2.15, "grad_norm": 0.4921875, "learning_rate": 0.0004937145559973988, "loss": 0.2322, "step": 51900 }, { "epoch": 2.15, "grad_norm": 0.9609375, "learning_rate": 0.0004937121391762873, "loss": 0.292, "step": 51910 }, { "epoch": 2.15, "grad_norm": 0.66015625, "learning_rate": 0.0004937097218965359, "loss": 0.188, "step": 51920 }, { "epoch": 2.15, "grad_norm": 0.9375, "learning_rate": 0.000493707304158149, "loss": 0.2438, "step": 51930 }, { "epoch": 2.15, "grad_norm": 0.431640625, "learning_rate": 0.0004937048859611314, "loss": 0.2257, "step": 51940 }, { "epoch": 2.15, "grad_norm": 0.427734375, "learning_rate": 0.0004937024673054875, "loss": 0.2226, "step": 51950 }, { "epoch": 2.15, "grad_norm": 0.65234375, "learning_rate": 0.0004937000481912219, "loss": 0.2707, "step": 51960 }, { "epoch": 2.15, "grad_norm": 0.5625, "learning_rate": 0.000493697628618339, "loss": 0.2393, "step": 51970 }, { "epoch": 2.15, "grad_norm": 0.546875, "learning_rate": 0.0004936952085868436, "loss": 0.2316, "step": 51980 }, { "epoch": 2.15, "grad_norm": 0.734375, "learning_rate": 0.0004936927880967401, "loss": 0.2438, "step": 51990 }, { "epoch": 2.15, "grad_norm": 1.953125, "learning_rate": 0.0004936903671480331, "loss": 0.2075, "step": 52000 }, { "epoch": 2.15, "grad_norm": 0.71484375, "learning_rate": 0.0004936879457407271, "loss": 0.2527, "step": 52010 }, { "epoch": 2.15, "grad_norm": 2.65625, "learning_rate": 0.0004936855238748268, "loss": 0.2141, "step": 52020 }, { "epoch": 2.16, "grad_norm": 0.482421875, "learning_rate": 0.0004936831015503365, "loss": 0.1975, "step": 52030 }, { "epoch": 2.16, "grad_norm": 1.8671875, "learning_rate": 0.0004936806787672611, "loss": 0.2369, "step": 52040 }, { "epoch": 2.16, "grad_norm": 1.1171875, "learning_rate": 0.0004936782555256049, "loss": 0.2259, "step": 52050 }, { "epoch": 2.16, "grad_norm": 0.39453125, "learning_rate": 0.0004936758318253724, "loss": 0.2333, "step": 52060 }, { "epoch": 2.16, "grad_norm": 0.58984375, "learning_rate": 0.0004936734076665685, "loss": 0.2, "step": 52070 }, { "epoch": 2.16, "grad_norm": 1.0546875, "learning_rate": 0.0004936709830491975, "loss": 0.2772, "step": 52080 }, { "epoch": 2.16, "grad_norm": 1.703125, "learning_rate": 0.0004936685579732639, "loss": 0.279, "step": 52090 }, { "epoch": 2.16, "grad_norm": 0.53515625, "learning_rate": 0.0004936661324387725, "loss": 0.2392, "step": 52100 }, { "epoch": 2.16, "grad_norm": 1.0859375, "learning_rate": 0.0004936637064457277, "loss": 0.1893, "step": 52110 }, { "epoch": 2.16, "grad_norm": 0.38671875, "learning_rate": 0.0004936612799941343, "loss": 0.1837, "step": 52120 }, { "epoch": 2.16, "grad_norm": 0.79296875, "learning_rate": 0.0004936588530839966, "loss": 0.2011, "step": 52130 }, { "epoch": 2.16, "grad_norm": 0.57421875, "learning_rate": 0.0004936564257153192, "loss": 0.2146, "step": 52140 }, { "epoch": 2.16, "grad_norm": 0.703125, "learning_rate": 0.0004936539978881067, "loss": 0.2835, "step": 52150 }, { "epoch": 2.16, "grad_norm": 0.416015625, "learning_rate": 0.0004936515696023637, "loss": 0.1877, "step": 52160 }, { "epoch": 2.16, "grad_norm": 1.640625, "learning_rate": 0.0004936491408580948, "loss": 0.2226, "step": 52170 }, { "epoch": 2.16, "grad_norm": 0.58984375, "learning_rate": 0.0004936467116553045, "loss": 0.1934, "step": 52180 }, { "epoch": 2.16, "grad_norm": 0.353515625, "learning_rate": 0.0004936442819939974, "loss": 0.2327, "step": 52190 }, { "epoch": 2.16, "grad_norm": 0.46875, "learning_rate": 0.000493641851874178, "loss": 0.2384, "step": 52200 }, { "epoch": 2.16, "grad_norm": 0.000782012939453125, "learning_rate": 0.000493639421295851, "loss": 0.2103, "step": 52210 }, { "epoch": 2.16, "grad_norm": 0.75, "learning_rate": 0.0004936369902590209, "loss": 0.2105, "step": 52220 }, { "epoch": 2.16, "grad_norm": 0.67578125, "learning_rate": 0.0004936345587636924, "loss": 0.2159, "step": 52230 }, { "epoch": 2.16, "grad_norm": 0.421875, "learning_rate": 0.0004936321268098699, "loss": 0.247, "step": 52240 }, { "epoch": 2.16, "grad_norm": 1.25, "learning_rate": 0.0004936296943975581, "loss": 0.2472, "step": 52250 }, { "epoch": 2.16, "grad_norm": 1.1875, "learning_rate": 0.0004936272615267614, "loss": 0.2329, "step": 52260 }, { "epoch": 2.17, "grad_norm": 0.875, "learning_rate": 0.0004936248281974846, "loss": 0.2449, "step": 52270 }, { "epoch": 2.17, "grad_norm": 2.0, "learning_rate": 0.0004936223944097322, "loss": 0.2098, "step": 52280 }, { "epoch": 2.17, "grad_norm": 0.466796875, "learning_rate": 0.0004936199601635086, "loss": 0.2079, "step": 52290 }, { "epoch": 2.17, "grad_norm": 0.271484375, "learning_rate": 0.0004936175254588187, "loss": 0.1866, "step": 52300 }, { "epoch": 2.17, "grad_norm": 1.03125, "learning_rate": 0.000493615090295667, "loss": 0.2115, "step": 52310 }, { "epoch": 2.17, "grad_norm": 0.4296875, "learning_rate": 0.0004936126546740578, "loss": 0.1771, "step": 52320 }, { "epoch": 2.17, "grad_norm": 0.384765625, "learning_rate": 0.000493610218593996, "loss": 0.2378, "step": 52330 }, { "epoch": 2.17, "grad_norm": 1.9296875, "learning_rate": 0.0004936077820554861, "loss": 0.2273, "step": 52340 }, { "epoch": 2.17, "grad_norm": 0.93359375, "learning_rate": 0.0004936053450585327, "loss": 0.262, "step": 52350 }, { "epoch": 2.17, "grad_norm": 2.5625, "learning_rate": 0.0004936029076031402, "loss": 0.2231, "step": 52360 }, { "epoch": 2.17, "grad_norm": 0.9453125, "learning_rate": 0.0004936004696893134, "loss": 0.2332, "step": 52370 }, { "epoch": 2.17, "grad_norm": 0.357421875, "learning_rate": 0.0004935980313170568, "loss": 0.2063, "step": 52380 }, { "epoch": 2.17, "grad_norm": 0.6953125, "learning_rate": 0.0004935955924863751, "loss": 0.1799, "step": 52390 }, { "epoch": 2.17, "grad_norm": 1.0390625, "learning_rate": 0.0004935931531972728, "loss": 0.2429, "step": 52400 }, { "epoch": 2.17, "grad_norm": 1.7734375, "learning_rate": 0.0004935907134497544, "loss": 0.1863, "step": 52410 }, { "epoch": 2.17, "grad_norm": 0.5625, "learning_rate": 0.0004935882732438246, "loss": 0.1884, "step": 52420 }, { "epoch": 2.17, "grad_norm": 0.32421875, "learning_rate": 0.000493585832579488, "loss": 0.2377, "step": 52430 }, { "epoch": 2.17, "grad_norm": 0.453125, "learning_rate": 0.0004935833914567492, "loss": 0.2394, "step": 52440 }, { "epoch": 2.17, "grad_norm": 0.6640625, "learning_rate": 0.0004935809498756127, "loss": 0.2241, "step": 52450 }, { "epoch": 2.17, "grad_norm": 0.65625, "learning_rate": 0.0004935785078360832, "loss": 0.2453, "step": 52460 }, { "epoch": 2.17, "grad_norm": 0.3359375, "learning_rate": 0.0004935760653381652, "loss": 0.2557, "step": 52470 }, { "epoch": 2.17, "grad_norm": 0.796875, "learning_rate": 0.0004935736223818635, "loss": 0.2323, "step": 52480 }, { "epoch": 2.17, "grad_norm": 0.55859375, "learning_rate": 0.0004935711789671824, "loss": 0.1788, "step": 52490 }, { "epoch": 2.17, "grad_norm": 0.66796875, "learning_rate": 0.0004935687350941266, "loss": 0.2426, "step": 52500 }, { "epoch": 2.17, "grad_norm": 2.046875, "learning_rate": 0.0004935662907627009, "loss": 0.2449, "step": 52510 }, { "epoch": 2.18, "grad_norm": 0.68359375, "learning_rate": 0.0004935638459729096, "loss": 0.2872, "step": 52520 }, { "epoch": 2.18, "grad_norm": 1.046875, "learning_rate": 0.0004935614007247575, "loss": 0.2279, "step": 52530 }, { "epoch": 2.18, "grad_norm": 1.6875, "learning_rate": 0.0004935589550182492, "loss": 0.2228, "step": 52540 }, { "epoch": 2.18, "grad_norm": 0.27734375, "learning_rate": 0.0004935565088533893, "loss": 0.2599, "step": 52550 }, { "epoch": 2.18, "grad_norm": 0.984375, "learning_rate": 0.0004935540622301822, "loss": 0.1946, "step": 52560 }, { "epoch": 2.18, "grad_norm": 0.482421875, "learning_rate": 0.0004935516151486327, "loss": 0.2527, "step": 52570 }, { "epoch": 2.18, "grad_norm": 0.80078125, "learning_rate": 0.0004935491676087454, "loss": 0.2165, "step": 52580 }, { "epoch": 2.18, "grad_norm": 0.75390625, "learning_rate": 0.0004935467196105248, "loss": 0.2125, "step": 52590 }, { "epoch": 2.18, "grad_norm": 0.46875, "learning_rate": 0.0004935442711539756, "loss": 0.25, "step": 52600 }, { "epoch": 2.18, "grad_norm": 1.0390625, "learning_rate": 0.0004935418222391023, "loss": 0.2295, "step": 52610 }, { "epoch": 2.18, "grad_norm": 0.328125, "learning_rate": 0.0004935393728659098, "loss": 0.243, "step": 52620 }, { "epoch": 2.18, "grad_norm": 0.828125, "learning_rate": 0.0004935369230344023, "loss": 0.1958, "step": 52630 }, { "epoch": 2.18, "grad_norm": 0.84375, "learning_rate": 0.0004935344727445847, "loss": 0.2024, "step": 52640 }, { "epoch": 2.18, "grad_norm": 0.9765625, "learning_rate": 0.0004935320219964616, "loss": 0.2428, "step": 52650 }, { "epoch": 2.18, "grad_norm": 0.71875, "learning_rate": 0.0004935295707900375, "loss": 0.1989, "step": 52660 }, { "epoch": 2.18, "grad_norm": 0.65625, "learning_rate": 0.0004935271191253169, "loss": 0.1972, "step": 52670 }, { "epoch": 2.18, "grad_norm": 0.74609375, "learning_rate": 0.0004935246670023047, "loss": 0.1945, "step": 52680 }, { "epoch": 2.18, "grad_norm": 0.68359375, "learning_rate": 0.0004935222144210053, "loss": 0.2465, "step": 52690 }, { "epoch": 2.18, "grad_norm": 0.734375, "learning_rate": 0.0004935197613814235, "loss": 0.191, "step": 52700 }, { "epoch": 2.18, "grad_norm": 0.9765625, "learning_rate": 0.0004935173078835637, "loss": 0.236, "step": 52710 }, { "epoch": 2.18, "grad_norm": 0.7734375, "learning_rate": 0.0004935148539274306, "loss": 0.2231, "step": 52720 }, { "epoch": 2.18, "grad_norm": 0.75, "learning_rate": 0.000493512399513029, "loss": 0.1603, "step": 52730 }, { "epoch": 2.18, "grad_norm": 0.57421875, "learning_rate": 0.0004935099446403633, "loss": 0.2145, "step": 52740 }, { "epoch": 2.18, "grad_norm": 0.625, "learning_rate": 0.000493507489309438, "loss": 0.2159, "step": 52750 }, { "epoch": 2.19, "grad_norm": 0.77734375, "learning_rate": 0.0004935050335202581, "loss": 0.2648, "step": 52760 }, { "epoch": 2.19, "grad_norm": 1.0703125, "learning_rate": 0.000493502577272828, "loss": 0.2378, "step": 52770 }, { "epoch": 2.19, "grad_norm": 0.0, "learning_rate": 0.0004935001205671523, "loss": 0.2466, "step": 52780 }, { "epoch": 2.19, "grad_norm": 0.7734375, "learning_rate": 0.0004934976634032356, "loss": 0.2499, "step": 52790 }, { "epoch": 2.19, "grad_norm": 1.984375, "learning_rate": 0.0004934952057810828, "loss": 0.2381, "step": 52800 }, { "epoch": 2.19, "grad_norm": 1.0234375, "learning_rate": 0.0004934927477006982, "loss": 0.2509, "step": 52810 }, { "epoch": 2.19, "grad_norm": 0.546875, "learning_rate": 0.0004934902891620865, "loss": 0.216, "step": 52820 }, { "epoch": 2.19, "grad_norm": 0.66015625, "learning_rate": 0.0004934878301652524, "loss": 0.244, "step": 52830 }, { "epoch": 2.19, "grad_norm": 1.0703125, "learning_rate": 0.0004934853707102005, "loss": 0.2027, "step": 52840 }, { "epoch": 2.19, "grad_norm": 0.66796875, "learning_rate": 0.0004934829107969354, "loss": 0.2442, "step": 52850 }, { "epoch": 2.19, "grad_norm": 0.6875, "learning_rate": 0.0004934804504254618, "loss": 0.2399, "step": 52860 }, { "epoch": 2.19, "grad_norm": 0.3828125, "learning_rate": 0.0004934779895957843, "loss": 0.2186, "step": 52870 }, { "epoch": 2.19, "grad_norm": 1.40625, "learning_rate": 0.0004934755283079075, "loss": 0.2634, "step": 52880 }, { "epoch": 2.19, "grad_norm": 0.49609375, "learning_rate": 0.000493473066561836, "loss": 0.2261, "step": 52890 }, { "epoch": 2.19, "grad_norm": 0.640625, "learning_rate": 0.0004934706043575745, "loss": 0.2478, "step": 52900 }, { "epoch": 2.19, "grad_norm": 0.310546875, "learning_rate": 0.0004934681416951277, "loss": 0.2409, "step": 52910 }, { "epoch": 2.19, "grad_norm": 0.62890625, "learning_rate": 0.0004934656785745001, "loss": 0.2494, "step": 52920 }, { "epoch": 2.19, "grad_norm": 0.28515625, "learning_rate": 0.0004934632149956964, "loss": 0.204, "step": 52930 }, { "epoch": 2.19, "grad_norm": 2.5625, "learning_rate": 0.0004934607509587211, "loss": 0.2215, "step": 52940 }, { "epoch": 2.19, "grad_norm": 0.828125, "learning_rate": 0.000493458286463579, "loss": 0.2181, "step": 52950 }, { "epoch": 2.19, "grad_norm": 0.828125, "learning_rate": 0.0004934558215102748, "loss": 0.2319, "step": 52960 }, { "epoch": 2.19, "grad_norm": 0.6328125, "learning_rate": 0.000493453356098813, "loss": 0.1908, "step": 52970 }, { "epoch": 2.19, "grad_norm": 0.3125, "learning_rate": 0.0004934508902291983, "loss": 0.2263, "step": 52980 }, { "epoch": 2.19, "grad_norm": 0.55078125, "learning_rate": 0.0004934484239014353, "loss": 0.1992, "step": 52990 }, { "epoch": 2.2, "grad_norm": 0.98046875, "learning_rate": 0.0004934459571155286, "loss": 0.1669, "step": 53000 }, { "epoch": 2.2, "grad_norm": 0.53125, "learning_rate": 0.0004934434898714829, "loss": 0.2086, "step": 53010 }, { "epoch": 2.2, "grad_norm": 0.77734375, "learning_rate": 0.0004934410221693029, "loss": 0.2099, "step": 53020 }, { "epoch": 2.2, "grad_norm": 0.8671875, "learning_rate": 0.0004934385540089931, "loss": 0.2445, "step": 53030 }, { "epoch": 2.2, "grad_norm": 0.796875, "learning_rate": 0.0004934360853905583, "loss": 0.2327, "step": 53040 }, { "epoch": 2.2, "grad_norm": 0.92578125, "learning_rate": 0.000493433616314003, "loss": 0.2454, "step": 53050 }, { "epoch": 2.2, "grad_norm": 0.4453125, "learning_rate": 0.000493431146779332, "loss": 0.2974, "step": 53060 }, { "epoch": 2.2, "grad_norm": 0.40625, "learning_rate": 0.0004934286767865499, "loss": 0.1703, "step": 53070 }, { "epoch": 2.2, "grad_norm": 0.92578125, "learning_rate": 0.0004934262063356613, "loss": 0.2196, "step": 53080 }, { "epoch": 2.2, "grad_norm": 0.640625, "learning_rate": 0.0004934237354266708, "loss": 0.2456, "step": 53090 }, { "epoch": 2.2, "grad_norm": 0.765625, "learning_rate": 0.0004934212640595832, "loss": 0.2432, "step": 53100 }, { "epoch": 2.2, "grad_norm": 0.8671875, "learning_rate": 0.000493418792234403, "loss": 0.2196, "step": 53110 }, { "epoch": 2.2, "grad_norm": 0.6484375, "learning_rate": 0.0004934163199511349, "loss": 0.18, "step": 53120 }, { "epoch": 2.2, "grad_norm": 0.9296875, "learning_rate": 0.0004934138472097836, "loss": 0.2279, "step": 53130 }, { "epoch": 2.2, "grad_norm": 0.60546875, "learning_rate": 0.0004934113740103537, "loss": 0.2316, "step": 53140 }, { "epoch": 2.2, "grad_norm": 0.451171875, "learning_rate": 0.00049340890035285, "loss": 0.171, "step": 53150 }, { "epoch": 2.2, "grad_norm": 1.5390625, "learning_rate": 0.000493406426237277, "loss": 0.283, "step": 53160 }, { "epoch": 2.2, "grad_norm": 0.30078125, "learning_rate": 0.0004934039516636393, "loss": 0.1886, "step": 53170 }, { "epoch": 2.2, "grad_norm": 1.1640625, "learning_rate": 0.0004934014766319417, "loss": 0.1802, "step": 53180 }, { "epoch": 2.2, "grad_norm": 2.65625, "learning_rate": 0.0004933990011421888, "loss": 0.2677, "step": 53190 }, { "epoch": 2.2, "grad_norm": 0.90625, "learning_rate": 0.0004933965251943853, "loss": 0.2344, "step": 53200 }, { "epoch": 2.2, "grad_norm": 0.427734375, "learning_rate": 0.0004933940487885358, "loss": 0.2491, "step": 53210 }, { "epoch": 2.2, "grad_norm": 0.96484375, "learning_rate": 0.0004933915719246449, "loss": 0.2234, "step": 53220 }, { "epoch": 2.2, "grad_norm": 0.6796875, "learning_rate": 0.0004933890946027175, "loss": 0.224, "step": 53230 }, { "epoch": 2.21, "grad_norm": 0.72265625, "learning_rate": 0.000493386616822758, "loss": 0.195, "step": 53240 }, { "epoch": 2.21, "grad_norm": 1.0390625, "learning_rate": 0.0004933841385847712, "loss": 0.2243, "step": 53250 }, { "epoch": 2.21, "grad_norm": 0.37890625, "learning_rate": 0.0004933816598887617, "loss": 0.1782, "step": 53260 }, { "epoch": 2.21, "grad_norm": 0.80859375, "learning_rate": 0.0004933791807347342, "loss": 0.2563, "step": 53270 }, { "epoch": 2.21, "grad_norm": 0.6875, "learning_rate": 0.0004933767011226934, "loss": 0.2576, "step": 53280 }, { "epoch": 2.21, "grad_norm": 0.486328125, "learning_rate": 0.0004933742210526439, "loss": 0.1895, "step": 53290 }, { "epoch": 2.21, "grad_norm": 0.31640625, "learning_rate": 0.0004933717405245906, "loss": 0.2082, "step": 53300 }, { "epoch": 2.21, "grad_norm": 1.65625, "learning_rate": 0.0004933692595385377, "loss": 0.2459, "step": 53310 }, { "epoch": 2.21, "grad_norm": 0.76953125, "learning_rate": 0.0004933667780944903, "loss": 0.2862, "step": 53320 }, { "epoch": 2.21, "grad_norm": 1.1015625, "learning_rate": 0.0004933642961924528, "loss": 0.2437, "step": 53330 }, { "epoch": 2.21, "grad_norm": 0.80078125, "learning_rate": 0.0004933618138324299, "loss": 0.2214, "step": 53340 }, { "epoch": 2.21, "grad_norm": 0.6796875, "learning_rate": 0.0004933593310144266, "loss": 0.24, "step": 53350 }, { "epoch": 2.21, "grad_norm": 0.59765625, "learning_rate": 0.0004933568477384472, "loss": 0.2496, "step": 53360 }, { "epoch": 2.21, "grad_norm": 0.55859375, "learning_rate": 0.0004933543640044964, "loss": 0.2342, "step": 53370 }, { "epoch": 2.21, "grad_norm": 0.7578125, "learning_rate": 0.000493351879812579, "loss": 0.1812, "step": 53380 }, { "epoch": 2.21, "grad_norm": 0.625, "learning_rate": 0.0004933493951626997, "loss": 0.182, "step": 53390 }, { "epoch": 2.21, "grad_norm": 1.6796875, "learning_rate": 0.0004933469100548631, "loss": 0.2348, "step": 53400 }, { "epoch": 2.21, "grad_norm": 0.921875, "learning_rate": 0.0004933444244890739, "loss": 0.1832, "step": 53410 }, { "epoch": 2.21, "grad_norm": 0.3203125, "learning_rate": 0.0004933419384653368, "loss": 0.2133, "step": 53420 }, { "epoch": 2.21, "grad_norm": 0.51953125, "learning_rate": 0.0004933394519836564, "loss": 0.1934, "step": 53430 }, { "epoch": 2.21, "grad_norm": 0.61328125, "learning_rate": 0.0004933369650440374, "loss": 0.2786, "step": 53440 }, { "epoch": 2.21, "grad_norm": 0.419921875, "learning_rate": 0.0004933344776464845, "loss": 0.2363, "step": 53450 }, { "epoch": 2.21, "grad_norm": 0.2734375, "learning_rate": 0.0004933319897910025, "loss": 0.2469, "step": 53460 }, { "epoch": 2.21, "grad_norm": 0.5859375, "learning_rate": 0.0004933295014775959, "loss": 0.2353, "step": 53470 }, { "epoch": 2.22, "grad_norm": 0.82421875, "learning_rate": 0.0004933270127062694, "loss": 0.1658, "step": 53480 }, { "epoch": 2.22, "grad_norm": 0.6640625, "learning_rate": 0.0004933245234770278, "loss": 0.1933, "step": 53490 }, { "epoch": 2.22, "grad_norm": 0.55078125, "learning_rate": 0.0004933220337898759, "loss": 0.2083, "step": 53500 }, { "epoch": 2.22, "grad_norm": 2.5625, "learning_rate": 0.0004933195436448181, "loss": 0.2025, "step": 53510 }, { "epoch": 2.22, "grad_norm": 0.81640625, "learning_rate": 0.000493317053041859, "loss": 0.1766, "step": 53520 }, { "epoch": 2.22, "grad_norm": 0.56640625, "learning_rate": 0.0004933145619810036, "loss": 0.2283, "step": 53530 }, { "epoch": 2.22, "grad_norm": 0.734375, "learning_rate": 0.0004933120704622566, "loss": 0.1926, "step": 53540 }, { "epoch": 2.22, "grad_norm": 0.44140625, "learning_rate": 0.0004933095784856224, "loss": 0.2385, "step": 53550 }, { "epoch": 2.22, "grad_norm": 1.078125, "learning_rate": 0.0004933070860511058, "loss": 0.1645, "step": 53560 }, { "epoch": 2.22, "grad_norm": 1.125, "learning_rate": 0.0004933045931587117, "loss": 0.2347, "step": 53570 }, { "epoch": 2.22, "grad_norm": 0.63671875, "learning_rate": 0.0004933020998084445, "loss": 0.1755, "step": 53580 }, { "epoch": 2.22, "grad_norm": 1.625, "learning_rate": 0.0004932996060003092, "loss": 0.2081, "step": 53590 }, { "epoch": 2.22, "grad_norm": 0.0830078125, "learning_rate": 0.0004932971117343101, "loss": 0.235, "step": 53600 }, { "epoch": 2.22, "grad_norm": 0.6328125, "learning_rate": 0.0004932946170104523, "loss": 0.2522, "step": 53610 }, { "epoch": 2.22, "grad_norm": 0.65625, "learning_rate": 0.0004932921218287401, "loss": 0.2138, "step": 53620 }, { "epoch": 2.22, "grad_norm": 0.482421875, "learning_rate": 0.0004932896261891786, "loss": 0.202, "step": 53630 }, { "epoch": 2.22, "grad_norm": 1.0234375, "learning_rate": 0.0004932871300917722, "loss": 0.1826, "step": 53640 }, { "epoch": 2.22, "grad_norm": 0.8515625, "learning_rate": 0.0004932846335365257, "loss": 0.2381, "step": 53650 }, { "epoch": 2.22, "grad_norm": 0.73828125, "learning_rate": 0.0004932821365234439, "loss": 0.2199, "step": 53660 }, { "epoch": 2.22, "grad_norm": 1.328125, "learning_rate": 0.0004932796390525313, "loss": 0.2338, "step": 53670 }, { "epoch": 2.22, "grad_norm": 0.51953125, "learning_rate": 0.0004932771411237926, "loss": 0.2634, "step": 53680 }, { "epoch": 2.22, "grad_norm": 1.2578125, "learning_rate": 0.0004932746427372327, "loss": 0.1909, "step": 53690 }, { "epoch": 2.22, "grad_norm": 0.63671875, "learning_rate": 0.0004932721438928562, "loss": 0.2314, "step": 53700 }, { "epoch": 2.22, "grad_norm": 0.25, "learning_rate": 0.0004932696445906678, "loss": 0.262, "step": 53710 }, { "epoch": 2.23, "grad_norm": 0.55078125, "learning_rate": 0.0004932671448306721, "loss": 0.1379, "step": 53720 }, { "epoch": 2.23, "grad_norm": 0.77734375, "learning_rate": 0.000493264644612874, "loss": 0.2597, "step": 53730 }, { "epoch": 2.23, "grad_norm": 0.92578125, "learning_rate": 0.0004932621439372781, "loss": 0.2273, "step": 53740 }, { "epoch": 2.23, "grad_norm": 0.9453125, "learning_rate": 0.000493259642803889, "loss": 0.234, "step": 53750 }, { "epoch": 2.23, "grad_norm": 1.1640625, "learning_rate": 0.0004932571412127118, "loss": 0.268, "step": 53760 }, { "epoch": 2.23, "grad_norm": 2.84375, "learning_rate": 0.0004932546391637506, "loss": 0.1908, "step": 53770 }, { "epoch": 2.23, "grad_norm": 0.486328125, "learning_rate": 0.0004932521366570106, "loss": 0.2034, "step": 53780 }, { "epoch": 2.23, "grad_norm": 0.40625, "learning_rate": 0.0004932496336924963, "loss": 0.1829, "step": 53790 }, { "epoch": 2.23, "grad_norm": 0.90234375, "learning_rate": 0.0004932471302702125, "loss": 0.2166, "step": 53800 }, { "epoch": 2.23, "grad_norm": 0.8515625, "learning_rate": 0.0004932446263901639, "loss": 0.2562, "step": 53810 }, { "epoch": 2.23, "grad_norm": 0.87890625, "learning_rate": 0.0004932421220523552, "loss": 0.2716, "step": 53820 }, { "epoch": 2.23, "grad_norm": 0.69140625, "learning_rate": 0.000493239617256791, "loss": 0.2332, "step": 53830 }, { "epoch": 2.23, "grad_norm": 1.0, "learning_rate": 0.0004932371120034761, "loss": 0.2755, "step": 53840 }, { "epoch": 2.23, "grad_norm": 1.1015625, "learning_rate": 0.0004932346062924152, "loss": 0.1691, "step": 53850 }, { "epoch": 2.23, "grad_norm": 0.326171875, "learning_rate": 0.0004932321001236132, "loss": 0.2145, "step": 53860 }, { "epoch": 2.23, "grad_norm": 0.2353515625, "learning_rate": 0.0004932295934970745, "loss": 0.2513, "step": 53870 }, { "epoch": 2.23, "grad_norm": 0.39453125, "learning_rate": 0.000493227086412804, "loss": 0.1747, "step": 53880 }, { "epoch": 2.23, "grad_norm": 0.61328125, "learning_rate": 0.0004932245788708065, "loss": 0.2491, "step": 53890 }, { "epoch": 2.23, "grad_norm": 0.98046875, "learning_rate": 0.0004932220708710865, "loss": 0.2067, "step": 53900 }, { "epoch": 2.23, "grad_norm": 1.0625, "learning_rate": 0.0004932195624136489, "loss": 0.2129, "step": 53910 }, { "epoch": 2.23, "grad_norm": 0.765625, "learning_rate": 0.0004932170534984983, "loss": 0.2198, "step": 53920 }, { "epoch": 2.23, "grad_norm": 2.96875, "learning_rate": 0.0004932145441256395, "loss": 0.2326, "step": 53930 }, { "epoch": 2.23, "grad_norm": 0.373046875, "learning_rate": 0.0004932120342950771, "loss": 0.221, "step": 53940 }, { "epoch": 2.23, "grad_norm": 1.25, "learning_rate": 0.000493209524006816, "loss": 0.2129, "step": 53950 }, { "epoch": 2.24, "grad_norm": 0.609375, "learning_rate": 0.0004932070132608608, "loss": 0.2464, "step": 53960 }, { "epoch": 2.24, "grad_norm": 0.208984375, "learning_rate": 0.0004932045020572163, "loss": 0.2105, "step": 53970 }, { "epoch": 2.24, "grad_norm": 0.7734375, "learning_rate": 0.0004932019903958872, "loss": 0.2813, "step": 53980 }, { "epoch": 2.24, "grad_norm": 1.765625, "learning_rate": 0.0004931994782768782, "loss": 0.3199, "step": 53990 }, { "epoch": 2.24, "grad_norm": 0.498046875, "learning_rate": 0.000493196965700194, "loss": 0.2121, "step": 54000 }, { "epoch": 2.24, "grad_norm": 1.90625, "learning_rate": 0.0004931944526658395, "loss": 0.2079, "step": 54010 }, { "epoch": 2.24, "grad_norm": 2.25, "learning_rate": 0.0004931919391738191, "loss": 0.193, "step": 54020 }, { "epoch": 2.24, "grad_norm": 1.1640625, "learning_rate": 0.0004931894252241379, "loss": 0.2017, "step": 54030 }, { "epoch": 2.24, "grad_norm": 0.5234375, "learning_rate": 0.0004931869108168004, "loss": 0.2038, "step": 54040 }, { "epoch": 2.24, "grad_norm": 1.5078125, "learning_rate": 0.0004931843959518114, "loss": 0.2489, "step": 54050 }, { "epoch": 2.24, "grad_norm": 0.76171875, "learning_rate": 0.0004931818806291757, "loss": 0.2454, "step": 54060 }, { "epoch": 2.24, "grad_norm": 0.357421875, "learning_rate": 0.0004931793648488979, "loss": 0.214, "step": 54070 }, { "epoch": 2.24, "grad_norm": 1.09375, "learning_rate": 0.0004931768486109828, "loss": 0.2385, "step": 54080 }, { "epoch": 2.24, "grad_norm": 0.62109375, "learning_rate": 0.0004931743319154351, "loss": 0.2317, "step": 54090 }, { "epoch": 2.24, "grad_norm": 0.796875, "learning_rate": 0.0004931718147622595, "loss": 0.2392, "step": 54100 }, { "epoch": 2.24, "grad_norm": 0.7734375, "learning_rate": 0.000493169297151461, "loss": 0.2039, "step": 54110 }, { "epoch": 2.24, "grad_norm": 0.51953125, "learning_rate": 0.0004931667790830441, "loss": 0.1881, "step": 54120 }, { "epoch": 2.24, "grad_norm": 1.2578125, "learning_rate": 0.0004931642605570135, "loss": 0.1792, "step": 54130 }, { "epoch": 2.24, "grad_norm": 0.44140625, "learning_rate": 0.000493161741573374, "loss": 0.2291, "step": 54140 }, { "epoch": 2.24, "grad_norm": 0.75, "learning_rate": 0.0004931592221321305, "loss": 0.2433, "step": 54150 }, { "epoch": 2.24, "grad_norm": 0.77734375, "learning_rate": 0.0004931567022332875, "loss": 0.2625, "step": 54160 }, { "epoch": 2.24, "grad_norm": 1.0703125, "learning_rate": 0.0004931541818768498, "loss": 0.2197, "step": 54170 }, { "epoch": 2.24, "grad_norm": 0.451171875, "learning_rate": 0.0004931516610628223, "loss": 0.1702, "step": 54180 }, { "epoch": 2.24, "grad_norm": 0.412109375, "learning_rate": 0.0004931491397912096, "loss": 0.2098, "step": 54190 }, { "epoch": 2.24, "grad_norm": 1.140625, "learning_rate": 0.0004931466180620165, "loss": 0.2339, "step": 54200 }, { "epoch": 2.25, "grad_norm": 1.15625, "learning_rate": 0.0004931440958752476, "loss": 0.229, "step": 54210 }, { "epoch": 2.25, "grad_norm": 0.5390625, "learning_rate": 0.0004931415732309078, "loss": 0.2393, "step": 54220 }, { "epoch": 2.25, "grad_norm": 0.19140625, "learning_rate": 0.0004931390501290019, "loss": 0.2022, "step": 54230 }, { "epoch": 2.25, "grad_norm": 0.79296875, "learning_rate": 0.0004931365265695345, "loss": 0.2098, "step": 54240 }, { "epoch": 2.25, "grad_norm": 1.0546875, "learning_rate": 0.0004931340025525105, "loss": 0.2031, "step": 54250 }, { "epoch": 2.25, "grad_norm": 0.99609375, "learning_rate": 0.0004931314780779345, "loss": 0.2017, "step": 54260 }, { "epoch": 2.25, "grad_norm": 1.6640625, "learning_rate": 0.0004931289531458113, "loss": 0.2371, "step": 54270 }, { "epoch": 2.25, "grad_norm": 1.515625, "learning_rate": 0.0004931264277561457, "loss": 0.2681, "step": 54280 }, { "epoch": 2.25, "grad_norm": 1.28125, "learning_rate": 0.0004931239019089424, "loss": 0.232, "step": 54290 }, { "epoch": 2.25, "grad_norm": 0.65234375, "learning_rate": 0.0004931213756042062, "loss": 0.2787, "step": 54300 }, { "epoch": 2.25, "grad_norm": 0.53515625, "learning_rate": 0.0004931188488419418, "loss": 0.2134, "step": 54310 }, { "epoch": 2.25, "grad_norm": 0.734375, "learning_rate": 0.000493116321622154, "loss": 0.2042, "step": 54320 }, { "epoch": 2.25, "grad_norm": 0.53515625, "learning_rate": 0.0004931137939448475, "loss": 0.2353, "step": 54330 }, { "epoch": 2.25, "grad_norm": 0.97265625, "learning_rate": 0.000493111265810027, "loss": 0.2269, "step": 54340 }, { "epoch": 2.25, "grad_norm": 0.2734375, "learning_rate": 0.0004931087372176976, "loss": 0.2674, "step": 54350 }, { "epoch": 2.25, "grad_norm": 0.5625, "learning_rate": 0.0004931062081678636, "loss": 0.2363, "step": 54360 }, { "epoch": 2.25, "grad_norm": 0.30078125, "learning_rate": 0.0004931036786605301, "loss": 0.2364, "step": 54370 }, { "epoch": 2.25, "grad_norm": 0.85546875, "learning_rate": 0.0004931011486957016, "loss": 0.2082, "step": 54380 }, { "epoch": 2.25, "grad_norm": 0.671875, "learning_rate": 0.0004930986182733831, "loss": 0.2547, "step": 54390 }, { "epoch": 2.25, "grad_norm": 0.70703125, "learning_rate": 0.0004930960873935792, "loss": 0.2168, "step": 54400 }, { "epoch": 2.25, "grad_norm": 0.7734375, "learning_rate": 0.0004930935560562948, "loss": 0.249, "step": 54410 }, { "epoch": 2.25, "grad_norm": 0.56640625, "learning_rate": 0.0004930910242615344, "loss": 0.2271, "step": 54420 }, { "epoch": 2.25, "grad_norm": 0.76953125, "learning_rate": 0.0004930884920093031, "loss": 0.231, "step": 54430 }, { "epoch": 2.25, "grad_norm": 0.8359375, "learning_rate": 0.0004930859592996054, "loss": 0.2003, "step": 54440 }, { "epoch": 2.26, "grad_norm": 0.59765625, "learning_rate": 0.0004930834261324462, "loss": 0.2532, "step": 54450 }, { "epoch": 2.26, "grad_norm": 0.287109375, "learning_rate": 0.0004930808925078304, "loss": 0.2248, "step": 54460 }, { "epoch": 2.26, "grad_norm": 0.26171875, "learning_rate": 0.0004930783584257625, "loss": 0.2119, "step": 54470 }, { "epoch": 2.26, "grad_norm": 0.6171875, "learning_rate": 0.0004930758238862474, "loss": 0.2623, "step": 54480 }, { "epoch": 2.26, "grad_norm": 0.68359375, "learning_rate": 0.0004930732888892898, "loss": 0.1977, "step": 54490 }, { "epoch": 2.26, "grad_norm": 0.66796875, "learning_rate": 0.0004930707534348945, "loss": 0.1727, "step": 54500 }, { "epoch": 2.26, "grad_norm": 1.5234375, "learning_rate": 0.0004930682175230663, "loss": 0.2869, "step": 54510 }, { "epoch": 2.26, "grad_norm": 0.66796875, "learning_rate": 0.0004930656811538101, "loss": 0.2388, "step": 54520 }, { "epoch": 2.26, "grad_norm": 0.494140625, "learning_rate": 0.0004930631443271304, "loss": 0.1979, "step": 54530 }, { "epoch": 2.26, "grad_norm": 0.734375, "learning_rate": 0.0004930606070430321, "loss": 0.234, "step": 54540 }, { "epoch": 2.26, "grad_norm": 1.2890625, "learning_rate": 0.0004930580693015201, "loss": 0.2326, "step": 54550 }, { "epoch": 2.26, "grad_norm": 0.984375, "learning_rate": 0.0004930555311025989, "loss": 0.1675, "step": 54560 }, { "epoch": 2.26, "grad_norm": 1.390625, "learning_rate": 0.0004930529924462735, "loss": 0.2633, "step": 54570 }, { "epoch": 2.26, "grad_norm": 0.78515625, "learning_rate": 0.0004930504533325487, "loss": 0.221, "step": 54580 }, { "epoch": 2.26, "grad_norm": 0.55859375, "learning_rate": 0.0004930479137614292, "loss": 0.203, "step": 54590 }, { "epoch": 2.26, "grad_norm": 0.58203125, "learning_rate": 0.0004930453737329197, "loss": 0.2567, "step": 54600 }, { "epoch": 2.26, "grad_norm": 2.140625, "learning_rate": 0.000493042833247025, "loss": 0.217, "step": 54610 }, { "epoch": 2.26, "grad_norm": 1.3671875, "learning_rate": 0.00049304029230375, "loss": 0.2078, "step": 54620 }, { "epoch": 2.26, "grad_norm": 0.59765625, "learning_rate": 0.0004930377509030995, "loss": 0.2416, "step": 54630 }, { "epoch": 2.26, "grad_norm": 1.1484375, "learning_rate": 0.0004930352090450781, "loss": 0.2078, "step": 54640 }, { "epoch": 2.26, "grad_norm": 1.609375, "learning_rate": 0.0004930326667296907, "loss": 0.2247, "step": 54650 }, { "epoch": 2.26, "grad_norm": 0.55078125, "learning_rate": 0.000493030123956942, "loss": 0.2255, "step": 54660 }, { "epoch": 2.26, "grad_norm": 1.015625, "learning_rate": 0.000493027580726837, "loss": 0.2612, "step": 54670 }, { "epoch": 2.26, "grad_norm": 1.3125, "learning_rate": 0.0004930250370393803, "loss": 0.2119, "step": 54680 }, { "epoch": 2.27, "grad_norm": 0.51953125, "learning_rate": 0.0004930224928945766, "loss": 0.2113, "step": 54690 }, { "epoch": 2.27, "grad_norm": 0.57421875, "learning_rate": 0.0004930199482924309, "loss": 0.277, "step": 54700 }, { "epoch": 2.27, "grad_norm": 0.61328125, "learning_rate": 0.0004930174032329479, "loss": 0.2284, "step": 54710 }, { "epoch": 2.27, "grad_norm": 0.34765625, "learning_rate": 0.0004930148577161324, "loss": 0.2524, "step": 54720 }, { "epoch": 2.27, "grad_norm": 0.259765625, "learning_rate": 0.0004930123117419891, "loss": 0.2368, "step": 54730 }, { "epoch": 2.27, "grad_norm": 0.67578125, "learning_rate": 0.0004930097653105229, "loss": 0.2679, "step": 54740 }, { "epoch": 2.27, "grad_norm": 0.36328125, "learning_rate": 0.0004930072184217387, "loss": 0.2453, "step": 54750 }, { "epoch": 2.27, "grad_norm": 0.0, "learning_rate": 0.000493004671075641, "loss": 0.1958, "step": 54760 }, { "epoch": 2.27, "grad_norm": 0.45703125, "learning_rate": 0.0004930021232722348, "loss": 0.1862, "step": 54770 }, { "epoch": 2.27, "grad_norm": 0.94921875, "learning_rate": 0.0004929995750115249, "loss": 0.2225, "step": 54780 }, { "epoch": 2.27, "grad_norm": 0.3671875, "learning_rate": 0.000492997026293516, "loss": 0.2374, "step": 54790 }, { "epoch": 2.27, "grad_norm": 0.66015625, "learning_rate": 0.000492994477118213, "loss": 0.1905, "step": 54800 }, { "epoch": 2.27, "grad_norm": 0.421875, "learning_rate": 0.0004929919274856204, "loss": 0.2087, "step": 54810 }, { "epoch": 2.27, "grad_norm": 0.51953125, "learning_rate": 0.0004929893773957436, "loss": 0.2121, "step": 54820 }, { "epoch": 2.27, "grad_norm": 0.4921875, "learning_rate": 0.0004929868268485867, "loss": 0.2644, "step": 54830 }, { "epoch": 2.27, "grad_norm": 1.21875, "learning_rate": 0.000492984275844155, "loss": 0.2388, "step": 54840 }, { "epoch": 2.27, "grad_norm": 0.427734375, "learning_rate": 0.0004929817243824532, "loss": 0.2933, "step": 54850 }, { "epoch": 2.27, "grad_norm": 0.76953125, "learning_rate": 0.0004929791724634858, "loss": 0.1678, "step": 54860 }, { "epoch": 2.27, "grad_norm": 0.58203125, "learning_rate": 0.000492976620087258, "loss": 0.2067, "step": 54870 }, { "epoch": 2.27, "grad_norm": 0.88671875, "learning_rate": 0.0004929740672537745, "loss": 0.2411, "step": 54880 }, { "epoch": 2.27, "grad_norm": 0.78515625, "learning_rate": 0.0004929715139630398, "loss": 0.2204, "step": 54890 }, { "epoch": 2.27, "grad_norm": 1.1171875, "learning_rate": 0.0004929689602150593, "loss": 0.2736, "step": 54900 }, { "epoch": 2.27, "grad_norm": 0.5234375, "learning_rate": 0.0004929664060098371, "loss": 0.2387, "step": 54910 }, { "epoch": 2.27, "grad_norm": 0.6171875, "learning_rate": 0.0004929638513473786, "loss": 0.213, "step": 54920 }, { "epoch": 2.28, "grad_norm": 0.56640625, "learning_rate": 0.0004929612962276883, "loss": 0.2042, "step": 54930 }, { "epoch": 2.28, "grad_norm": 0.99609375, "learning_rate": 0.000492958740650771, "loss": 0.232, "step": 54940 }, { "epoch": 2.28, "grad_norm": 0.2353515625, "learning_rate": 0.0004929561846166317, "loss": 0.2396, "step": 54950 }, { "epoch": 2.28, "grad_norm": 0.2041015625, "learning_rate": 0.0004929536281252751, "loss": 0.1805, "step": 54960 }, { "epoch": 2.28, "grad_norm": 0.53125, "learning_rate": 0.0004929510711767058, "loss": 0.2413, "step": 54970 }, { "epoch": 2.28, "grad_norm": 1.296875, "learning_rate": 0.0004929485137709291, "loss": 0.2233, "step": 54980 }, { "epoch": 2.28, "grad_norm": 1.65625, "learning_rate": 0.0004929459559079494, "loss": 0.2202, "step": 54990 }, { "epoch": 2.28, "grad_norm": 0.41015625, "learning_rate": 0.0004929433975877718, "loss": 0.2244, "step": 55000 }, { "epoch": 2.28, "grad_norm": 0.5078125, "learning_rate": 0.0004929408388104008, "loss": 0.2361, "step": 55010 }, { "epoch": 2.28, "grad_norm": 0.60546875, "learning_rate": 0.0004929382795758414, "loss": 0.2793, "step": 55020 }, { "epoch": 2.28, "grad_norm": 0.8046875, "learning_rate": 0.0004929357198840983, "loss": 0.251, "step": 55030 }, { "epoch": 2.28, "grad_norm": 0.25390625, "learning_rate": 0.0004929331597351765, "loss": 0.2333, "step": 55040 }, { "epoch": 2.28, "grad_norm": 0.1962890625, "learning_rate": 0.0004929305991290808, "loss": 0.19, "step": 55050 }, { "epoch": 2.28, "grad_norm": 0.462890625, "learning_rate": 0.0004929280380658159, "loss": 0.2485, "step": 55060 }, { "epoch": 2.28, "grad_norm": 0.265625, "learning_rate": 0.0004929254765453867, "loss": 0.2082, "step": 55070 }, { "epoch": 2.28, "grad_norm": 0.74609375, "learning_rate": 0.0004929229145677978, "loss": 0.248, "step": 55080 }, { "epoch": 2.28, "grad_norm": 0.98828125, "learning_rate": 0.0004929203521330544, "loss": 0.1998, "step": 55090 }, { "epoch": 2.28, "grad_norm": 0.828125, "learning_rate": 0.0004929177892411611, "loss": 0.2298, "step": 55100 }, { "epoch": 2.28, "grad_norm": 1.375, "learning_rate": 0.0004929152258921226, "loss": 0.2165, "step": 55110 }, { "epoch": 2.28, "grad_norm": 0.62890625, "learning_rate": 0.0004929126620859441, "loss": 0.2506, "step": 55120 }, { "epoch": 2.28, "grad_norm": 0.74609375, "learning_rate": 0.00049291009782263, "loss": 0.27, "step": 55130 }, { "epoch": 2.28, "grad_norm": 0.80859375, "learning_rate": 0.0004929075331021856, "loss": 0.1857, "step": 55140 }, { "epoch": 2.28, "grad_norm": 4.5625, "learning_rate": 0.0004929049679246152, "loss": 0.1807, "step": 55150 }, { "epoch": 2.28, "grad_norm": 1.609375, "learning_rate": 0.000492902402289924, "loss": 0.2213, "step": 55160 }, { "epoch": 2.29, "grad_norm": 0.30859375, "learning_rate": 0.0004928998361981166, "loss": 0.2305, "step": 55170 }, { "epoch": 2.29, "grad_norm": 0.466796875, "learning_rate": 0.0004928972696491981, "loss": 0.2455, "step": 55180 }, { "epoch": 2.29, "grad_norm": 0.98828125, "learning_rate": 0.000492894702643173, "loss": 0.2317, "step": 55190 }, { "epoch": 2.29, "grad_norm": 0.640625, "learning_rate": 0.0004928921351800464, "loss": 0.2592, "step": 55200 }, { "epoch": 2.29, "grad_norm": 0.60546875, "learning_rate": 0.000492889567259823, "loss": 0.2299, "step": 55210 }, { "epoch": 2.29, "grad_norm": 0.578125, "learning_rate": 0.0004928869988825077, "loss": 0.2445, "step": 55220 }, { "epoch": 2.29, "grad_norm": 0.55859375, "learning_rate": 0.0004928844300481054, "loss": 0.2026, "step": 55230 }, { "epoch": 2.29, "grad_norm": 0.85546875, "learning_rate": 0.0004928818607566206, "loss": 0.2392, "step": 55240 }, { "epoch": 2.29, "grad_norm": 0.71484375, "learning_rate": 0.0004928792910080585, "loss": 0.2327, "step": 55250 }, { "epoch": 2.29, "grad_norm": 0.5703125, "learning_rate": 0.0004928767208024239, "loss": 0.2287, "step": 55260 }, { "epoch": 2.29, "grad_norm": 0.859375, "learning_rate": 0.0004928741501397213, "loss": 0.2414, "step": 55270 }, { "epoch": 2.29, "grad_norm": 0.1962890625, "learning_rate": 0.000492871579019956, "loss": 0.1902, "step": 55280 }, { "epoch": 2.29, "grad_norm": 0.7578125, "learning_rate": 0.0004928690074431324, "loss": 0.2089, "step": 55290 }, { "epoch": 2.29, "grad_norm": 0.8203125, "learning_rate": 0.0004928664354092557, "loss": 0.1656, "step": 55300 }, { "epoch": 2.29, "grad_norm": 0.73046875, "learning_rate": 0.0004928638629183306, "loss": 0.1711, "step": 55310 }, { "epoch": 2.29, "grad_norm": 0.53515625, "learning_rate": 0.000492861289970362, "loss": 0.2582, "step": 55320 }, { "epoch": 2.29, "grad_norm": 0.734375, "learning_rate": 0.0004928587165653546, "loss": 0.2612, "step": 55330 }, { "epoch": 2.29, "grad_norm": 0.38671875, "learning_rate": 0.0004928561427033132, "loss": 0.2466, "step": 55340 }, { "epoch": 2.29, "grad_norm": 0.98828125, "learning_rate": 0.0004928535683842429, "loss": 0.2304, "step": 55350 }, { "epoch": 2.29, "grad_norm": 0.7890625, "learning_rate": 0.0004928509936081483, "loss": 0.2165, "step": 55360 }, { "epoch": 2.29, "grad_norm": 0.6875, "learning_rate": 0.0004928484183750344, "loss": 0.2144, "step": 55370 }, { "epoch": 2.29, "grad_norm": 1.203125, "learning_rate": 0.0004928458426849061, "loss": 0.2206, "step": 55380 }, { "epoch": 2.29, "grad_norm": 0.734375, "learning_rate": 0.0004928432665377682, "loss": 0.2405, "step": 55390 }, { "epoch": 2.29, "grad_norm": 0.81640625, "learning_rate": 0.0004928406899336253, "loss": 0.2157, "step": 55400 }, { "epoch": 2.3, "grad_norm": 0.5390625, "learning_rate": 0.0004928381128724826, "loss": 0.2293, "step": 55410 }, { "epoch": 2.3, "grad_norm": 1.46875, "learning_rate": 0.0004928355353543447, "loss": 0.2214, "step": 55420 }, { "epoch": 2.3, "grad_norm": 0.890625, "learning_rate": 0.0004928329573792165, "loss": 0.2477, "step": 55430 }, { "epoch": 2.3, "grad_norm": 0.25, "learning_rate": 0.0004928303789471031, "loss": 0.1631, "step": 55440 }, { "epoch": 2.3, "grad_norm": 0.828125, "learning_rate": 0.0004928278000580089, "loss": 0.2524, "step": 55450 }, { "epoch": 2.3, "grad_norm": 0.50390625, "learning_rate": 0.0004928252207119392, "loss": 0.2401, "step": 55460 }, { "epoch": 2.3, "grad_norm": 0.71484375, "learning_rate": 0.0004928226409088985, "loss": 0.2053, "step": 55470 }, { "epoch": 2.3, "grad_norm": 0.578125, "learning_rate": 0.000492820060648892, "loss": 0.2044, "step": 55480 }, { "epoch": 2.3, "grad_norm": 0.59765625, "learning_rate": 0.0004928174799319242, "loss": 0.2432, "step": 55490 }, { "epoch": 2.3, "grad_norm": 2.140625, "learning_rate": 0.0004928148987580003, "loss": 0.2456, "step": 55500 }, { "epoch": 2.3, "grad_norm": 1.3125, "learning_rate": 0.0004928123171271248, "loss": 0.2343, "step": 55510 }, { "epoch": 2.3, "grad_norm": 0.859375, "learning_rate": 0.0004928097350393028, "loss": 0.2001, "step": 55520 }, { "epoch": 2.3, "grad_norm": 0.318359375, "learning_rate": 0.0004928071524945391, "loss": 0.2073, "step": 55530 }, { "epoch": 2.3, "grad_norm": 0.470703125, "learning_rate": 0.0004928045694928386, "loss": 0.2104, "step": 55540 }, { "epoch": 2.3, "grad_norm": 0.7265625, "learning_rate": 0.0004928019860342061, "loss": 0.1984, "step": 55550 }, { "epoch": 2.3, "grad_norm": 0.6796875, "learning_rate": 0.0004927994021186465, "loss": 0.2232, "step": 55560 }, { "epoch": 2.3, "grad_norm": 0.74609375, "learning_rate": 0.0004927968177461646, "loss": 0.2109, "step": 55570 }, { "epoch": 2.3, "grad_norm": 1.4609375, "learning_rate": 0.0004927942329167653, "loss": 0.2462, "step": 55580 }, { "epoch": 2.3, "grad_norm": 1.546875, "learning_rate": 0.0004927916476304535, "loss": 0.2604, "step": 55590 }, { "epoch": 2.3, "grad_norm": 0.8828125, "learning_rate": 0.000492789061887234, "loss": 0.2692, "step": 55600 }, { "epoch": 2.3, "grad_norm": 0.859375, "learning_rate": 0.0004927864756871118, "loss": 0.2319, "step": 55610 }, { "epoch": 2.3, "grad_norm": 0.77734375, "learning_rate": 0.0004927838890300916, "loss": 0.2301, "step": 55620 }, { "epoch": 2.3, "grad_norm": 1.6953125, "learning_rate": 0.0004927813019161783, "loss": 0.2275, "step": 55630 }, { "epoch": 2.3, "grad_norm": 0.53125, "learning_rate": 0.0004927787143453769, "loss": 0.2501, "step": 55640 }, { "epoch": 2.31, "grad_norm": 0.49609375, "learning_rate": 0.0004927761263176922, "loss": 0.2311, "step": 55650 }, { "epoch": 2.31, "grad_norm": 1.1484375, "learning_rate": 0.0004927735378331289, "loss": 0.2014, "step": 55660 }, { "epoch": 2.31, "grad_norm": 0.4453125, "learning_rate": 0.0004927709488916921, "loss": 0.2275, "step": 55670 }, { "epoch": 2.31, "grad_norm": 0.1630859375, "learning_rate": 0.0004927683594933866, "loss": 0.2497, "step": 55680 }, { "epoch": 2.31, "grad_norm": 1.03125, "learning_rate": 0.0004927657696382173, "loss": 0.2279, "step": 55690 }, { "epoch": 2.31, "grad_norm": 1.3984375, "learning_rate": 0.0004927631793261891, "loss": 0.1955, "step": 55700 }, { "epoch": 2.31, "grad_norm": 0.44140625, "learning_rate": 0.0004927605885573067, "loss": 0.2027, "step": 55710 }, { "epoch": 2.31, "grad_norm": 0.3828125, "learning_rate": 0.0004927579973315751, "loss": 0.166, "step": 55720 }, { "epoch": 2.31, "grad_norm": 0.5546875, "learning_rate": 0.0004927554056489991, "loss": 0.2388, "step": 55730 }, { "epoch": 2.31, "grad_norm": 0.69140625, "learning_rate": 0.0004927528135095838, "loss": 0.2308, "step": 55740 }, { "epoch": 2.31, "grad_norm": 0.1279296875, "learning_rate": 0.0004927502209133338, "loss": 0.1735, "step": 55750 }, { "epoch": 2.31, "grad_norm": 0.5234375, "learning_rate": 0.0004927476278602541, "loss": 0.2004, "step": 55760 }, { "epoch": 2.31, "grad_norm": 0.5859375, "learning_rate": 0.0004927450343503495, "loss": 0.2219, "step": 55770 }, { "epoch": 2.31, "grad_norm": 0.99609375, "learning_rate": 0.0004927424403836252, "loss": 0.2008, "step": 55780 }, { "epoch": 2.31, "grad_norm": 0.76171875, "learning_rate": 0.0004927398459600857, "loss": 0.2717, "step": 55790 }, { "epoch": 2.31, "grad_norm": 0.58203125, "learning_rate": 0.000492737251079736, "loss": 0.2122, "step": 55800 }, { "epoch": 2.31, "grad_norm": 0.314453125, "learning_rate": 0.000492734655742581, "loss": 0.1942, "step": 55810 }, { "epoch": 2.31, "grad_norm": 0.55078125, "learning_rate": 0.0004927320599486257, "loss": 0.2117, "step": 55820 }, { "epoch": 2.31, "grad_norm": 0.447265625, "learning_rate": 0.0004927294636978747, "loss": 0.2366, "step": 55830 }, { "epoch": 2.31, "grad_norm": 0.6171875, "learning_rate": 0.0004927268669903332, "loss": 0.2095, "step": 55840 }, { "epoch": 2.31, "grad_norm": 0.412109375, "learning_rate": 0.0004927242698260059, "loss": 0.2432, "step": 55850 }, { "epoch": 2.31, "grad_norm": 0.8671875, "learning_rate": 0.0004927216722048979, "loss": 0.2035, "step": 55860 }, { "epoch": 2.31, "grad_norm": 1.0625, "learning_rate": 0.0004927190741270136, "loss": 0.213, "step": 55870 }, { "epoch": 2.31, "grad_norm": 0.51953125, "learning_rate": 0.0004927164755923585, "loss": 0.2177, "step": 55880 }, { "epoch": 2.31, "grad_norm": 1.09375, "learning_rate": 0.0004927138766009371, "loss": 0.2338, "step": 55890 }, { "epoch": 2.32, "grad_norm": 0.69921875, "learning_rate": 0.0004927112771527544, "loss": 0.2044, "step": 55900 }, { "epoch": 2.32, "grad_norm": 0.5859375, "learning_rate": 0.0004927086772478152, "loss": 0.2619, "step": 55910 }, { "epoch": 2.32, "grad_norm": 0.49609375, "learning_rate": 0.0004927060768861246, "loss": 0.2274, "step": 55920 }, { "epoch": 2.32, "grad_norm": 0.73046875, "learning_rate": 0.0004927034760676873, "loss": 0.2174, "step": 55930 }, { "epoch": 2.32, "grad_norm": 0.73046875, "learning_rate": 0.0004927008747925084, "loss": 0.2163, "step": 55940 }, { "epoch": 2.32, "grad_norm": 0.65234375, "learning_rate": 0.0004926982730605924, "loss": 0.1996, "step": 55950 }, { "epoch": 2.32, "grad_norm": 1.1171875, "learning_rate": 0.0004926956708719447, "loss": 0.1784, "step": 55960 }, { "epoch": 2.32, "grad_norm": 0.875, "learning_rate": 0.0004926930682265699, "loss": 0.2203, "step": 55970 }, { "epoch": 2.32, "grad_norm": 0.5, "learning_rate": 0.000492690465124473, "loss": 0.2098, "step": 55980 }, { "epoch": 2.32, "grad_norm": 0.396484375, "learning_rate": 0.0004926878615656587, "loss": 0.2008, "step": 55990 }, { "epoch": 2.32, "grad_norm": 1.1328125, "learning_rate": 0.0004926852575501321, "loss": 0.2202, "step": 56000 }, { "epoch": 2.32, "grad_norm": 0.79296875, "learning_rate": 0.0004926826530778981, "loss": 0.2119, "step": 56010 }, { "epoch": 2.32, "grad_norm": 0.58203125, "learning_rate": 0.0004926800481489616, "loss": 0.2144, "step": 56020 }, { "epoch": 2.32, "grad_norm": 0.94140625, "learning_rate": 0.0004926774427633274, "loss": 0.2, "step": 56030 }, { "epoch": 2.32, "grad_norm": 0.68359375, "learning_rate": 0.0004926748369210004, "loss": 0.2134, "step": 56040 }, { "epoch": 2.32, "grad_norm": 0.3046875, "learning_rate": 0.0004926722306219856, "loss": 0.2569, "step": 56050 }, { "epoch": 2.32, "grad_norm": 0.80859375, "learning_rate": 0.0004926696238662879, "loss": 0.2215, "step": 56060 }, { "epoch": 2.32, "grad_norm": 1.109375, "learning_rate": 0.0004926670166539122, "loss": 0.2515, "step": 56070 }, { "epoch": 2.32, "grad_norm": 0.578125, "learning_rate": 0.0004926644089848633, "loss": 0.1996, "step": 56080 }, { "epoch": 2.32, "grad_norm": 1.0703125, "learning_rate": 0.0004926618008591463, "loss": 0.2551, "step": 56090 }, { "epoch": 2.32, "grad_norm": 0.7265625, "learning_rate": 0.0004926591922767659, "loss": 0.2461, "step": 56100 }, { "epoch": 2.32, "grad_norm": 1.09375, "learning_rate": 0.0004926565832377271, "loss": 0.2267, "step": 56110 }, { "epoch": 2.32, "grad_norm": 1.46875, "learning_rate": 0.0004926539737420349, "loss": 0.2067, "step": 56120 }, { "epoch": 2.32, "grad_norm": 0.90234375, "learning_rate": 0.0004926513637896941, "loss": 0.1981, "step": 56130 }, { "epoch": 2.33, "grad_norm": 0.59765625, "learning_rate": 0.0004926487533807095, "loss": 0.2522, "step": 56140 }, { "epoch": 2.33, "grad_norm": 0.5390625, "learning_rate": 0.0004926461425150863, "loss": 0.2361, "step": 56150 }, { "epoch": 2.33, "grad_norm": 0.65234375, "learning_rate": 0.0004926435311928293, "loss": 0.2013, "step": 56160 }, { "epoch": 2.33, "grad_norm": 0.83203125, "learning_rate": 0.0004926409194139433, "loss": 0.2194, "step": 56170 }, { "epoch": 2.33, "grad_norm": 0.45703125, "learning_rate": 0.0004926383071784333, "loss": 0.2262, "step": 56180 }, { "epoch": 2.33, "grad_norm": 1.078125, "learning_rate": 0.0004926356944863041, "loss": 0.214, "step": 56190 }, { "epoch": 2.33, "grad_norm": 0.62109375, "learning_rate": 0.000492633081337561, "loss": 0.2643, "step": 56200 }, { "epoch": 2.33, "grad_norm": 0.31640625, "learning_rate": 0.0004926304677322084, "loss": 0.221, "step": 56210 }, { "epoch": 2.33, "grad_norm": 1.875, "learning_rate": 0.0004926278536702515, "loss": 0.2245, "step": 56220 }, { "epoch": 2.33, "grad_norm": 0.423828125, "learning_rate": 0.0004926252391516952, "loss": 0.1911, "step": 56230 }, { "epoch": 2.33, "grad_norm": 0.1728515625, "learning_rate": 0.0004926226241765445, "loss": 0.1497, "step": 56240 }, { "epoch": 2.33, "grad_norm": 0.29296875, "learning_rate": 0.0004926200087448041, "loss": 0.2163, "step": 56250 }, { "epoch": 2.33, "grad_norm": 0.640625, "learning_rate": 0.000492617392856479, "loss": 0.1969, "step": 56260 }, { "epoch": 2.33, "grad_norm": 0.388671875, "learning_rate": 0.0004926147765115743, "loss": 0.1931, "step": 56270 }, { "epoch": 2.33, "grad_norm": 0.49609375, "learning_rate": 0.0004926121597100948, "loss": 0.2206, "step": 56280 }, { "epoch": 2.33, "grad_norm": 0.53125, "learning_rate": 0.0004926095424520453, "loss": 0.2305, "step": 56290 }, { "epoch": 2.33, "grad_norm": 0.29296875, "learning_rate": 0.0004926069247374309, "loss": 0.2401, "step": 56300 }, { "epoch": 2.33, "grad_norm": 0.8671875, "learning_rate": 0.0004926043065662564, "loss": 0.2033, "step": 56310 }, { "epoch": 2.33, "grad_norm": 0.58203125, "learning_rate": 0.0004926016879385268, "loss": 0.2192, "step": 56320 }, { "epoch": 2.33, "grad_norm": 0.2421875, "learning_rate": 0.0004925990688542472, "loss": 0.1932, "step": 56330 }, { "epoch": 2.33, "grad_norm": 0.765625, "learning_rate": 0.0004925964493134223, "loss": 0.2094, "step": 56340 }, { "epoch": 2.33, "grad_norm": 0.30859375, "learning_rate": 0.0004925938293160568, "loss": 0.2229, "step": 56350 }, { "epoch": 2.33, "grad_norm": 0.60546875, "learning_rate": 0.0004925912088621562, "loss": 0.2043, "step": 56360 }, { "epoch": 2.33, "grad_norm": 0.36328125, "learning_rate": 0.0004925885879517251, "loss": 0.2087, "step": 56370 }, { "epoch": 2.34, "grad_norm": 0.64453125, "learning_rate": 0.0004925859665847684, "loss": 0.2155, "step": 56380 }, { "epoch": 2.34, "grad_norm": 0.76953125, "learning_rate": 0.0004925833447612912, "loss": 0.1741, "step": 56390 }, { "epoch": 2.34, "grad_norm": 0.9609375, "learning_rate": 0.0004925807224812983, "loss": 0.256, "step": 56400 }, { "epoch": 2.34, "grad_norm": 0.6953125, "learning_rate": 0.0004925780997447947, "loss": 0.2398, "step": 56410 }, { "epoch": 2.34, "grad_norm": 0.6015625, "learning_rate": 0.0004925754765517852, "loss": 0.2005, "step": 56420 }, { "epoch": 2.34, "grad_norm": 0.8828125, "learning_rate": 0.0004925728529022749, "loss": 0.2288, "step": 56430 }, { "epoch": 2.34, "grad_norm": 0.51953125, "learning_rate": 0.0004925702287962688, "loss": 0.2592, "step": 56440 }, { "epoch": 2.34, "grad_norm": 0.56640625, "learning_rate": 0.0004925676042337716, "loss": 0.2262, "step": 56450 }, { "epoch": 2.34, "grad_norm": 0.474609375, "learning_rate": 0.0004925649792147885, "loss": 0.1733, "step": 56460 }, { "epoch": 2.34, "grad_norm": 2.21875, "learning_rate": 0.0004925623537393242, "loss": 0.2132, "step": 56470 }, { "epoch": 2.34, "grad_norm": 0.84765625, "learning_rate": 0.0004925597278073836, "loss": 0.2414, "step": 56480 }, { "epoch": 2.34, "grad_norm": 0.8359375, "learning_rate": 0.000492557101418972, "loss": 0.2931, "step": 56490 }, { "epoch": 2.34, "grad_norm": 1.2265625, "learning_rate": 0.0004925544745740941, "loss": 0.2627, "step": 56500 }, { "epoch": 2.34, "grad_norm": 0.515625, "learning_rate": 0.0004925518472727548, "loss": 0.2331, "step": 56510 }, { "epoch": 2.34, "grad_norm": 0.50390625, "learning_rate": 0.0004925492195149592, "loss": 0.2318, "step": 56520 }, { "epoch": 2.34, "grad_norm": 1.6875, "learning_rate": 0.0004925465913007121, "loss": 0.2513, "step": 56530 }, { "epoch": 2.34, "grad_norm": 0.4453125, "learning_rate": 0.0004925439626300186, "loss": 0.2406, "step": 56540 }, { "epoch": 2.34, "grad_norm": 0.279296875, "learning_rate": 0.0004925413335028834, "loss": 0.2171, "step": 56550 }, { "epoch": 2.34, "grad_norm": 0.78515625, "learning_rate": 0.0004925387039193117, "loss": 0.211, "step": 56560 }, { "epoch": 2.34, "grad_norm": 0.578125, "learning_rate": 0.0004925360738793083, "loss": 0.2327, "step": 56570 }, { "epoch": 2.34, "grad_norm": 0.84375, "learning_rate": 0.0004925334433828782, "loss": 0.213, "step": 56580 }, { "epoch": 2.34, "grad_norm": 0.365234375, "learning_rate": 0.0004925308124300264, "loss": 0.24, "step": 56590 }, { "epoch": 2.34, "grad_norm": 0.28515625, "learning_rate": 0.0004925281810207578, "loss": 0.2025, "step": 56600 }, { "epoch": 2.34, "grad_norm": 2.140625, "learning_rate": 0.0004925255491550774, "loss": 0.2282, "step": 56610 }, { "epoch": 2.35, "grad_norm": 0.5078125, "learning_rate": 0.00049252291683299, "loss": 0.2137, "step": 56620 }, { "epoch": 2.35, "grad_norm": 0.5, "learning_rate": 0.0004925202840545007, "loss": 0.1993, "step": 56630 }, { "epoch": 2.35, "grad_norm": 0.53125, "learning_rate": 0.0004925176508196144, "loss": 0.2118, "step": 56640 }, { "epoch": 2.35, "grad_norm": 0.6328125, "learning_rate": 0.0004925150171283361, "loss": 0.2146, "step": 56650 }, { "epoch": 2.35, "grad_norm": 1.1796875, "learning_rate": 0.0004925123829806708, "loss": 0.2385, "step": 56660 }, { "epoch": 2.35, "grad_norm": 0.3046875, "learning_rate": 0.0004925097483766233, "loss": 0.2066, "step": 56670 }, { "epoch": 2.35, "grad_norm": 0.474609375, "learning_rate": 0.0004925071133161986, "loss": 0.2529, "step": 56680 }, { "epoch": 2.35, "grad_norm": 0.62109375, "learning_rate": 0.0004925044777994018, "loss": 0.2458, "step": 56690 }, { "epoch": 2.35, "grad_norm": 0.546875, "learning_rate": 0.0004925018418262377, "loss": 0.1916, "step": 56700 }, { "epoch": 2.35, "grad_norm": 0.6875, "learning_rate": 0.0004924992053967113, "loss": 0.1819, "step": 56710 }, { "epoch": 2.35, "grad_norm": 0.26171875, "learning_rate": 0.0004924965685108276, "loss": 0.2381, "step": 56720 }, { "epoch": 2.35, "grad_norm": 1.3359375, "learning_rate": 0.0004924939311685915, "loss": 0.2623, "step": 56730 }, { "epoch": 2.35, "grad_norm": 0.181640625, "learning_rate": 0.0004924912933700081, "loss": 0.2096, "step": 56740 }, { "epoch": 2.35, "grad_norm": 0.6796875, "learning_rate": 0.0004924886551150823, "loss": 0.247, "step": 56750 }, { "epoch": 2.35, "grad_norm": 0.45703125, "learning_rate": 0.0004924860164038189, "loss": 0.2347, "step": 56760 }, { "epoch": 2.35, "grad_norm": 0.859375, "learning_rate": 0.0004924833772362232, "loss": 0.2366, "step": 56770 }, { "epoch": 2.35, "grad_norm": 1.03125, "learning_rate": 0.0004924807376122998, "loss": 0.1789, "step": 56780 }, { "epoch": 2.35, "grad_norm": 0.60546875, "learning_rate": 0.0004924780975320539, "loss": 0.1752, "step": 56790 }, { "epoch": 2.35, "grad_norm": 1.3125, "learning_rate": 0.0004924754569954904, "loss": 0.1929, "step": 56800 }, { "epoch": 2.35, "grad_norm": 0.6796875, "learning_rate": 0.0004924728160026143, "loss": 0.2186, "step": 56810 }, { "epoch": 2.35, "grad_norm": 0.85546875, "learning_rate": 0.0004924701745534305, "loss": 0.2241, "step": 56820 }, { "epoch": 2.35, "grad_norm": 0.68359375, "learning_rate": 0.000492467532647944, "loss": 0.2193, "step": 56830 }, { "epoch": 2.35, "grad_norm": 0.6640625, "learning_rate": 0.0004924648902861599, "loss": 0.1981, "step": 56840 }, { "epoch": 2.35, "grad_norm": 0.6953125, "learning_rate": 0.000492462247468083, "loss": 0.1805, "step": 56850 }, { "epoch": 2.36, "grad_norm": 0.208984375, "learning_rate": 0.0004924596041937183, "loss": 0.195, "step": 56860 }, { "epoch": 2.36, "grad_norm": 0.5859375, "learning_rate": 0.0004924569604630708, "loss": 0.2678, "step": 56870 }, { "epoch": 2.36, "grad_norm": 1.0234375, "learning_rate": 0.0004924543162761455, "loss": 0.2283, "step": 56880 }, { "epoch": 2.36, "grad_norm": 0.76953125, "learning_rate": 0.0004924516716329474, "loss": 0.2315, "step": 56890 }, { "epoch": 2.36, "grad_norm": 0.341796875, "learning_rate": 0.0004924490265334813, "loss": 0.1918, "step": 56900 }, { "epoch": 2.36, "grad_norm": 0.8671875, "learning_rate": 0.0004924463809777525, "loss": 0.224, "step": 56910 }, { "epoch": 2.36, "grad_norm": 0.40234375, "learning_rate": 0.0004924437349657656, "loss": 0.1886, "step": 56920 }, { "epoch": 2.36, "grad_norm": 0.443359375, "learning_rate": 0.000492441088497526, "loss": 0.2391, "step": 56930 }, { "epoch": 2.36, "grad_norm": 0.462890625, "learning_rate": 0.0004924384415730383, "loss": 0.2355, "step": 56940 }, { "epoch": 2.36, "grad_norm": 0.51171875, "learning_rate": 0.0004924357941923077, "loss": 0.2521, "step": 56950 }, { "epoch": 2.36, "grad_norm": 0.53125, "learning_rate": 0.0004924331463553391, "loss": 0.2344, "step": 56960 }, { "epoch": 2.36, "grad_norm": 0.40625, "learning_rate": 0.0004924304980621375, "loss": 0.193, "step": 56970 }, { "epoch": 2.36, "grad_norm": 0.55078125, "learning_rate": 0.0004924278493127078, "loss": 0.2024, "step": 56980 }, { "epoch": 2.36, "grad_norm": 0.6640625, "learning_rate": 0.0004924252001070552, "loss": 0.2181, "step": 56990 }, { "epoch": 2.36, "grad_norm": 0.494140625, "learning_rate": 0.0004924225504451845, "loss": 0.1681, "step": 57000 }, { "epoch": 2.36, "grad_norm": 1.171875, "learning_rate": 0.0004924199003271006, "loss": 0.1974, "step": 57010 }, { "epoch": 2.36, "grad_norm": 0.6171875, "learning_rate": 0.0004924172497528088, "loss": 0.2431, "step": 57020 }, { "epoch": 2.36, "grad_norm": 0.390625, "learning_rate": 0.0004924145987223139, "loss": 0.1602, "step": 57030 }, { "epoch": 2.36, "grad_norm": 0.65625, "learning_rate": 0.0004924119472356209, "loss": 0.1773, "step": 57040 }, { "epoch": 2.36, "grad_norm": 0.91796875, "learning_rate": 0.0004924092952927347, "loss": 0.1908, "step": 57050 }, { "epoch": 2.36, "grad_norm": 0.8984375, "learning_rate": 0.0004924066428936604, "loss": 0.234, "step": 57060 }, { "epoch": 2.36, "grad_norm": 0.361328125, "learning_rate": 0.0004924039900384031, "loss": 0.2041, "step": 57070 }, { "epoch": 2.36, "grad_norm": 0.83203125, "learning_rate": 0.0004924013367269677, "loss": 0.2157, "step": 57080 }, { "epoch": 2.36, "grad_norm": 0.7109375, "learning_rate": 0.000492398682959359, "loss": 0.2218, "step": 57090 }, { "epoch": 2.37, "grad_norm": 0.578125, "learning_rate": 0.0004923960287355821, "loss": 0.2733, "step": 57100 }, { "epoch": 2.37, "grad_norm": 1.1484375, "learning_rate": 0.0004923933740556422, "loss": 0.2468, "step": 57110 }, { "epoch": 2.37, "grad_norm": 0.609375, "learning_rate": 0.0004923907189195441, "loss": 0.1934, "step": 57120 }, { "epoch": 2.37, "grad_norm": 1.6171875, "learning_rate": 0.0004923880633272929, "loss": 0.2, "step": 57130 }, { "epoch": 2.37, "grad_norm": 0.796875, "learning_rate": 0.0004923854072788935, "loss": 0.1896, "step": 57140 }, { "epoch": 2.37, "grad_norm": 0.53125, "learning_rate": 0.0004923827507743509, "loss": 0.2673, "step": 57150 }, { "epoch": 2.37, "grad_norm": 0.6484375, "learning_rate": 0.0004923800938136702, "loss": 0.2337, "step": 57160 }, { "epoch": 2.37, "grad_norm": 0.578125, "learning_rate": 0.0004923774363968563, "loss": 0.2149, "step": 57170 }, { "epoch": 2.37, "grad_norm": 4.03125, "learning_rate": 0.0004923747785239142, "loss": 0.3006, "step": 57180 }, { "epoch": 2.37, "grad_norm": 1.078125, "learning_rate": 0.000492372120194849, "loss": 0.1856, "step": 57190 }, { "epoch": 2.37, "grad_norm": 1.0546875, "learning_rate": 0.0004923694614096657, "loss": 0.2317, "step": 57200 }, { "epoch": 2.37, "grad_norm": 1.109375, "learning_rate": 0.0004923668021683691, "loss": 0.2048, "step": 57210 }, { "epoch": 2.37, "grad_norm": 0.515625, "learning_rate": 0.0004923641424709644, "loss": 0.2477, "step": 57220 }, { "epoch": 2.37, "grad_norm": 0.78515625, "learning_rate": 0.0004923614823174567, "loss": 0.2109, "step": 57230 }, { "epoch": 2.37, "grad_norm": 0.392578125, "learning_rate": 0.0004923588217078507, "loss": 0.2351, "step": 57240 }, { "epoch": 2.37, "grad_norm": 0.478515625, "learning_rate": 0.0004923561606421516, "loss": 0.138, "step": 57250 }, { "epoch": 2.37, "grad_norm": 0.90234375, "learning_rate": 0.0004923534991203645, "loss": 0.2211, "step": 57260 }, { "epoch": 2.37, "grad_norm": 0.248046875, "learning_rate": 0.0004923508371424941, "loss": 0.2189, "step": 57270 }, { "epoch": 2.37, "grad_norm": 0.86328125, "learning_rate": 0.0004923481747085457, "loss": 0.1913, "step": 57280 }, { "epoch": 2.37, "grad_norm": 0.2578125, "learning_rate": 0.0004923455118185241, "loss": 0.2331, "step": 57290 }, { "epoch": 2.37, "grad_norm": 0.9375, "learning_rate": 0.0004923428484724346, "loss": 0.2218, "step": 57300 }, { "epoch": 2.37, "grad_norm": 2.1875, "learning_rate": 0.0004923401846702819, "loss": 0.2475, "step": 57310 }, { "epoch": 2.37, "grad_norm": 0.6015625, "learning_rate": 0.0004923375204120711, "loss": 0.2418, "step": 57320 }, { "epoch": 2.37, "grad_norm": 0.353515625, "learning_rate": 0.0004923348556978074, "loss": 0.1531, "step": 57330 }, { "epoch": 2.38, "grad_norm": 0.416015625, "learning_rate": 0.0004923321905274956, "loss": 0.2093, "step": 57340 }, { "epoch": 2.38, "grad_norm": 0.5234375, "learning_rate": 0.0004923295249011408, "loss": 0.1796, "step": 57350 }, { "epoch": 2.38, "grad_norm": 0.4375, "learning_rate": 0.000492326858818748, "loss": 0.2126, "step": 57360 }, { "epoch": 2.38, "grad_norm": 0.8359375, "learning_rate": 0.0004923241922803222, "loss": 0.2103, "step": 57370 }, { "epoch": 2.38, "grad_norm": 0.84765625, "learning_rate": 0.0004923215252858685, "loss": 0.2044, "step": 57380 }, { "epoch": 2.38, "grad_norm": 0.8125, "learning_rate": 0.0004923188578353917, "loss": 0.1752, "step": 57390 }, { "epoch": 2.38, "grad_norm": 1.171875, "learning_rate": 0.0004923161899288972, "loss": 0.2142, "step": 57400 }, { "epoch": 2.38, "grad_norm": 0.7578125, "learning_rate": 0.0004923135215663897, "loss": 0.1583, "step": 57410 }, { "epoch": 2.38, "grad_norm": 0.671875, "learning_rate": 0.0004923108527478742, "loss": 0.1843, "step": 57420 }, { "epoch": 2.38, "grad_norm": 0.3046875, "learning_rate": 0.0004923081834733561, "loss": 0.2353, "step": 57430 }, { "epoch": 2.38, "grad_norm": 0.81640625, "learning_rate": 0.0004923055137428399, "loss": 0.2397, "step": 57440 }, { "epoch": 2.38, "grad_norm": 0.37890625, "learning_rate": 0.000492302843556331, "loss": 0.2243, "step": 57450 }, { "epoch": 2.38, "grad_norm": 0.6171875, "learning_rate": 0.0004923001729138343, "loss": 0.235, "step": 57460 }, { "epoch": 2.38, "grad_norm": 0.359375, "learning_rate": 0.0004922975018153549, "loss": 0.2471, "step": 57470 }, { "epoch": 2.38, "grad_norm": 0.82421875, "learning_rate": 0.0004922948302608978, "loss": 0.2162, "step": 57480 }, { "epoch": 2.38, "grad_norm": 1.65625, "learning_rate": 0.0004922921582504679, "loss": 0.2111, "step": 57490 }, { "epoch": 2.38, "grad_norm": 0.96875, "learning_rate": 0.0004922894857840703, "loss": 0.2441, "step": 57500 }, { "epoch": 2.38, "grad_norm": 0.62109375, "learning_rate": 0.0004922868128617102, "loss": 0.198, "step": 57510 }, { "epoch": 2.38, "grad_norm": 0.4921875, "learning_rate": 0.0004922841394833923, "loss": 0.2198, "step": 57520 }, { "epoch": 2.38, "grad_norm": 0.71875, "learning_rate": 0.0004922814656491219, "loss": 0.2423, "step": 57530 }, { "epoch": 2.38, "grad_norm": 0.51171875, "learning_rate": 0.0004922787913589039, "loss": 0.1888, "step": 57540 }, { "epoch": 2.38, "grad_norm": 1.8125, "learning_rate": 0.0004922761166127435, "loss": 0.2127, "step": 57550 }, { "epoch": 2.38, "grad_norm": 0.5, "learning_rate": 0.0004922734414106456, "loss": 0.1795, "step": 57560 }, { "epoch": 2.38, "grad_norm": 1.0625, "learning_rate": 0.0004922707657526151, "loss": 0.2226, "step": 57570 }, { "epoch": 2.38, "grad_norm": 0.765625, "learning_rate": 0.0004922680896386573, "loss": 0.2149, "step": 57580 }, { "epoch": 2.39, "grad_norm": 0.51171875, "learning_rate": 0.0004922654130687771, "loss": 0.2322, "step": 57590 }, { "epoch": 2.39, "grad_norm": 0.52734375, "learning_rate": 0.0004922627360429795, "loss": 0.2321, "step": 57600 }, { "epoch": 2.39, "grad_norm": 0.671875, "learning_rate": 0.0004922600585612697, "loss": 0.2374, "step": 57610 }, { "epoch": 2.39, "grad_norm": 0.69921875, "learning_rate": 0.0004922573806236525, "loss": 0.1835, "step": 57620 }, { "epoch": 2.39, "grad_norm": 0.6328125, "learning_rate": 0.0004922547022301331, "loss": 0.2544, "step": 57630 }, { "epoch": 2.39, "grad_norm": 0.91796875, "learning_rate": 0.0004922520233807165, "loss": 0.2309, "step": 57640 }, { "epoch": 2.39, "grad_norm": 0.625, "learning_rate": 0.0004922493440754079, "loss": 0.2778, "step": 57650 }, { "epoch": 2.39, "grad_norm": 0.5390625, "learning_rate": 0.000492246664314212, "loss": 0.2441, "step": 57660 }, { "epoch": 2.39, "grad_norm": 0.77734375, "learning_rate": 0.0004922439840971341, "loss": 0.2017, "step": 57670 }, { "epoch": 2.39, "grad_norm": 0.58203125, "learning_rate": 0.0004922413034241793, "loss": 0.232, "step": 57680 }, { "epoch": 2.39, "grad_norm": 0.515625, "learning_rate": 0.0004922386222953524, "loss": 0.2299, "step": 57690 }, { "epoch": 2.39, "grad_norm": 0.498046875, "learning_rate": 0.0004922359407106586, "loss": 0.1858, "step": 57700 }, { "epoch": 2.39, "grad_norm": 0.314453125, "learning_rate": 0.0004922332586701029, "loss": 0.2113, "step": 57710 }, { "epoch": 2.39, "grad_norm": 0.85546875, "learning_rate": 0.0004922305761736905, "loss": 0.2311, "step": 57720 }, { "epoch": 2.39, "grad_norm": 0.65234375, "learning_rate": 0.0004922278932214262, "loss": 0.2297, "step": 57730 }, { "epoch": 2.39, "grad_norm": 0.5859375, "learning_rate": 0.0004922252098133152, "loss": 0.2417, "step": 57740 }, { "epoch": 2.39, "grad_norm": 0.58203125, "learning_rate": 0.0004922225259493625, "loss": 0.2377, "step": 57750 }, { "epoch": 2.39, "grad_norm": 0.9140625, "learning_rate": 0.0004922198416295731, "loss": 0.2339, "step": 57760 }, { "epoch": 2.39, "grad_norm": 0.59375, "learning_rate": 0.0004922171568539522, "loss": 0.2342, "step": 57770 }, { "epoch": 2.39, "grad_norm": 0.392578125, "learning_rate": 0.0004922144716225047, "loss": 0.217, "step": 57780 }, { "epoch": 2.39, "grad_norm": 0.181640625, "learning_rate": 0.0004922117859352357, "loss": 0.1867, "step": 57790 }, { "epoch": 2.39, "grad_norm": 0.490234375, "learning_rate": 0.0004922090997921503, "loss": 0.2016, "step": 57800 }, { "epoch": 2.39, "grad_norm": 0.70703125, "learning_rate": 0.0004922064131932536, "loss": 0.1825, "step": 57810 }, { "epoch": 2.39, "grad_norm": 0.55078125, "learning_rate": 0.0004922037261385506, "loss": 0.227, "step": 57820 }, { "epoch": 2.4, "grad_norm": 0.294921875, "learning_rate": 0.0004922010386280462, "loss": 0.2237, "step": 57830 }, { "epoch": 2.4, "grad_norm": 0.5234375, "learning_rate": 0.0004921983506617457, "loss": 0.2416, "step": 57840 }, { "epoch": 2.4, "grad_norm": 0.69140625, "learning_rate": 0.0004921956622396541, "loss": 0.2162, "step": 57850 }, { "epoch": 2.4, "grad_norm": 0.74609375, "learning_rate": 0.0004921929733617764, "loss": 0.1963, "step": 57860 }, { "epoch": 2.4, "grad_norm": 0.61328125, "learning_rate": 0.0004921902840281176, "loss": 0.2397, "step": 57870 }, { "epoch": 2.4, "grad_norm": 0.4140625, "learning_rate": 0.0004921875942386829, "loss": 0.1901, "step": 57880 }, { "epoch": 2.4, "grad_norm": 0.61328125, "learning_rate": 0.0004921849039934773, "loss": 0.209, "step": 57890 }, { "epoch": 2.4, "grad_norm": 0.5703125, "learning_rate": 0.0004921822132925059, "loss": 0.2359, "step": 57900 }, { "epoch": 2.4, "grad_norm": 0.47265625, "learning_rate": 0.0004921795221357737, "loss": 0.2287, "step": 57910 }, { "epoch": 2.4, "grad_norm": 0.298828125, "learning_rate": 0.0004921768305232858, "loss": 0.1984, "step": 57920 }, { "epoch": 2.4, "grad_norm": 0.71484375, "learning_rate": 0.0004921741384550472, "loss": 0.26, "step": 57930 }, { "epoch": 2.4, "grad_norm": 0.66796875, "learning_rate": 0.000492171445931063, "loss": 0.2435, "step": 57940 }, { "epoch": 2.4, "grad_norm": 1.109375, "learning_rate": 0.0004921687529513383, "loss": 0.1944, "step": 57950 }, { "epoch": 2.4, "grad_norm": 0.79296875, "learning_rate": 0.0004921660595158783, "loss": 0.2293, "step": 57960 }, { "epoch": 2.4, "grad_norm": 0.60546875, "learning_rate": 0.0004921633656246877, "loss": 0.2294, "step": 57970 }, { "epoch": 2.4, "grad_norm": 0.3828125, "learning_rate": 0.000492160671277772, "loss": 0.2027, "step": 57980 }, { "epoch": 2.4, "grad_norm": 0.55859375, "learning_rate": 0.000492157976475136, "loss": 0.2228, "step": 57990 }, { "epoch": 2.4, "grad_norm": 0.7734375, "learning_rate": 0.0004921552812167849, "loss": 0.2381, "step": 58000 }, { "epoch": 2.4, "grad_norm": 0.25, "learning_rate": 0.0004921525855027236, "loss": 0.2455, "step": 58010 }, { "epoch": 2.4, "grad_norm": 0.8984375, "learning_rate": 0.0004921498893329573, "loss": 0.197, "step": 58020 }, { "epoch": 2.4, "grad_norm": 0.6640625, "learning_rate": 0.0004921471927074911, "loss": 0.2484, "step": 58030 }, { "epoch": 2.4, "grad_norm": 0.765625, "learning_rate": 0.00049214449562633, "loss": 0.2072, "step": 58040 }, { "epoch": 2.4, "grad_norm": 0.376953125, "learning_rate": 0.0004921417980894792, "loss": 0.2464, "step": 58050 }, { "epoch": 2.4, "grad_norm": 0.345703125, "learning_rate": 0.0004921391000969436, "loss": 0.2275, "step": 58060 }, { "epoch": 2.41, "grad_norm": 0.76171875, "learning_rate": 0.0004921364016487284, "loss": 0.2265, "step": 58070 }, { "epoch": 2.41, "grad_norm": 0.71875, "learning_rate": 0.0004921337027448386, "loss": 0.2182, "step": 58080 }, { "epoch": 2.41, "grad_norm": 0.9140625, "learning_rate": 0.0004921310033852794, "loss": 0.2331, "step": 58090 }, { "epoch": 2.41, "grad_norm": 0.404296875, "learning_rate": 0.0004921283035700557, "loss": 0.2476, "step": 58100 }, { "epoch": 2.41, "grad_norm": 0.44921875, "learning_rate": 0.0004921256032991728, "loss": 0.1948, "step": 58110 }, { "epoch": 2.41, "grad_norm": 0.388671875, "learning_rate": 0.0004921229025726354, "loss": 0.1801, "step": 58120 }, { "epoch": 2.41, "grad_norm": 0.58203125, "learning_rate": 0.0004921202013904491, "loss": 0.2708, "step": 58130 }, { "epoch": 2.41, "grad_norm": 0.99609375, "learning_rate": 0.0004921174997526187, "loss": 0.2521, "step": 58140 }, { "epoch": 2.41, "grad_norm": 2.28125, "learning_rate": 0.0004921147976591492, "loss": 0.2113, "step": 58150 }, { "epoch": 2.41, "grad_norm": 0.6875, "learning_rate": 0.000492112095110046, "loss": 0.2355, "step": 58160 }, { "epoch": 2.41, "grad_norm": 0.314453125, "learning_rate": 0.0004921093921053138, "loss": 0.2152, "step": 58170 }, { "epoch": 2.41, "grad_norm": 0.60546875, "learning_rate": 0.0004921066886449579, "loss": 0.2789, "step": 58180 }, { "epoch": 2.41, "grad_norm": 1.3125, "learning_rate": 0.0004921039847289833, "loss": 0.1992, "step": 58190 }, { "epoch": 2.41, "grad_norm": 0.26171875, "learning_rate": 0.0004921012803573953, "loss": 0.2081, "step": 58200 }, { "epoch": 2.41, "grad_norm": 0.1572265625, "learning_rate": 0.0004920985755301988, "loss": 0.1916, "step": 58210 }, { "epoch": 2.41, "grad_norm": 0.64453125, "learning_rate": 0.0004920958702473988, "loss": 0.2164, "step": 58220 }, { "epoch": 2.41, "grad_norm": 0.66796875, "learning_rate": 0.0004920931645090006, "loss": 0.2504, "step": 58230 }, { "epoch": 2.41, "grad_norm": 0.3671875, "learning_rate": 0.0004920904583150092, "loss": 0.2325, "step": 58240 }, { "epoch": 2.41, "grad_norm": 0.70703125, "learning_rate": 0.0004920877516654298, "loss": 0.2512, "step": 58250 }, { "epoch": 2.41, "grad_norm": 0.345703125, "learning_rate": 0.0004920850445602672, "loss": 0.1884, "step": 58260 }, { "epoch": 2.41, "grad_norm": 1.2578125, "learning_rate": 0.0004920823369995268, "loss": 0.2514, "step": 58270 }, { "epoch": 2.41, "grad_norm": 0.47265625, "learning_rate": 0.0004920796289832136, "loss": 0.1897, "step": 58280 }, { "epoch": 2.41, "grad_norm": 1.390625, "learning_rate": 0.0004920769205113327, "loss": 0.1859, "step": 58290 }, { "epoch": 2.41, "grad_norm": 0.59765625, "learning_rate": 0.0004920742115838891, "loss": 0.2543, "step": 58300 }, { "epoch": 2.42, "grad_norm": 0.5, "learning_rate": 0.0004920715022008879, "loss": 0.1794, "step": 58310 }, { "epoch": 2.42, "grad_norm": 0.51953125, "learning_rate": 0.0004920687923623345, "loss": 0.2343, "step": 58320 }, { "epoch": 2.42, "grad_norm": 1.2109375, "learning_rate": 0.0004920660820682336, "loss": 0.2533, "step": 58330 }, { "epoch": 2.42, "grad_norm": 0.90625, "learning_rate": 0.0004920633713185906, "loss": 0.2374, "step": 58340 }, { "epoch": 2.42, "grad_norm": 0.306640625, "learning_rate": 0.0004920606601134103, "loss": 0.1632, "step": 58350 }, { "epoch": 2.42, "grad_norm": 0.4140625, "learning_rate": 0.0004920579484526982, "loss": 0.2157, "step": 58360 }, { "epoch": 2.42, "grad_norm": 0.2060546875, "learning_rate": 0.000492055236336459, "loss": 0.2414, "step": 58370 }, { "epoch": 2.42, "grad_norm": 1.609375, "learning_rate": 0.000492052523764698, "loss": 0.193, "step": 58380 }, { "epoch": 2.42, "grad_norm": 0.427734375, "learning_rate": 0.0004920498107374204, "loss": 0.1916, "step": 58390 }, { "epoch": 2.42, "grad_norm": 0.890625, "learning_rate": 0.0004920470972546311, "loss": 0.2333, "step": 58400 }, { "epoch": 2.42, "grad_norm": 1.7734375, "learning_rate": 0.0004920443833163353, "loss": 0.2674, "step": 58410 }, { "epoch": 2.42, "grad_norm": 0.7734375, "learning_rate": 0.0004920416689225382, "loss": 0.2395, "step": 58420 }, { "epoch": 2.42, "grad_norm": 0.58203125, "learning_rate": 0.0004920389540732448, "loss": 0.1942, "step": 58430 }, { "epoch": 2.42, "grad_norm": 1.234375, "learning_rate": 0.0004920362387684601, "loss": 0.2225, "step": 58440 }, { "epoch": 2.42, "grad_norm": 0.1591796875, "learning_rate": 0.0004920335230081895, "loss": 0.1782, "step": 58450 }, { "epoch": 2.42, "grad_norm": 0.890625, "learning_rate": 0.0004920308067924378, "loss": 0.2845, "step": 58460 }, { "epoch": 2.42, "grad_norm": 0.890625, "learning_rate": 0.0004920280901212103, "loss": 0.2516, "step": 58470 }, { "epoch": 2.42, "grad_norm": 1.296875, "learning_rate": 0.0004920253729945121, "loss": 0.2478, "step": 58480 }, { "epoch": 2.42, "grad_norm": 1.078125, "learning_rate": 0.0004920226554123484, "loss": 0.2678, "step": 58490 }, { "epoch": 2.42, "grad_norm": 0.8984375, "learning_rate": 0.0004920199373747241, "loss": 0.1921, "step": 58500 }, { "epoch": 2.42, "grad_norm": 0.46875, "learning_rate": 0.0004920172188816443, "loss": 0.2097, "step": 58510 }, { "epoch": 2.42, "grad_norm": 0.388671875, "learning_rate": 0.0004920144999331144, "loss": 0.1718, "step": 58520 }, { "epoch": 2.42, "grad_norm": 0.59765625, "learning_rate": 0.0004920117805291392, "loss": 0.2489, "step": 58530 }, { "epoch": 2.42, "grad_norm": 1.1328125, "learning_rate": 0.0004920090606697241, "loss": 0.2269, "step": 58540 }, { "epoch": 2.43, "grad_norm": 1.09375, "learning_rate": 0.000492006340354874, "loss": 0.2097, "step": 58550 }, { "epoch": 2.43, "grad_norm": 0.3671875, "learning_rate": 0.0004920036195845942, "loss": 0.2339, "step": 58560 }, { "epoch": 2.43, "grad_norm": 0.578125, "learning_rate": 0.0004920008983588896, "loss": 0.2025, "step": 58570 }, { "epoch": 2.43, "grad_norm": 1.5390625, "learning_rate": 0.0004919981766777655, "loss": 0.2149, "step": 58580 }, { "epoch": 2.43, "grad_norm": 0.8984375, "learning_rate": 0.0004919954545412269, "loss": 0.1969, "step": 58590 }, { "epoch": 2.43, "grad_norm": 2.328125, "learning_rate": 0.0004919927319492791, "loss": 0.2449, "step": 58600 }, { "epoch": 2.43, "grad_norm": 0.83984375, "learning_rate": 0.000491990008901927, "loss": 0.2345, "step": 58610 }, { "epoch": 2.43, "grad_norm": 1.078125, "learning_rate": 0.0004919872853991759, "loss": 0.1765, "step": 58620 }, { "epoch": 2.43, "grad_norm": 1.2578125, "learning_rate": 0.0004919845614410309, "loss": 0.2555, "step": 58630 }, { "epoch": 2.43, "grad_norm": 0.3984375, "learning_rate": 0.000491981837027497, "loss": 0.2204, "step": 58640 }, { "epoch": 2.43, "grad_norm": 0.6328125, "learning_rate": 0.0004919791121585794, "loss": 0.2216, "step": 58650 }, { "epoch": 2.43, "grad_norm": 0.75, "learning_rate": 0.0004919763868342833, "loss": 0.1738, "step": 58660 }, { "epoch": 2.43, "grad_norm": 0.9765625, "learning_rate": 0.0004919736610546139, "loss": 0.2434, "step": 58670 }, { "epoch": 2.43, "grad_norm": 1.9140625, "learning_rate": 0.000491970934819576, "loss": 0.2191, "step": 58680 }, { "epoch": 2.43, "grad_norm": 0.62890625, "learning_rate": 0.000491968208129175, "loss": 0.1824, "step": 58690 }, { "epoch": 2.43, "grad_norm": 1.34375, "learning_rate": 0.0004919654809834159, "loss": 0.2052, "step": 58700 }, { "epoch": 2.43, "grad_norm": 0.8515625, "learning_rate": 0.000491962753382304, "loss": 0.2271, "step": 58710 }, { "epoch": 2.43, "grad_norm": 0.46875, "learning_rate": 0.0004919600253258442, "loss": 0.1954, "step": 58720 }, { "epoch": 2.43, "grad_norm": 3.5625, "learning_rate": 0.0004919572968140419, "loss": 0.1639, "step": 58730 }, { "epoch": 2.43, "grad_norm": 0.7421875, "learning_rate": 0.0004919545678469021, "loss": 0.2742, "step": 58740 }, { "epoch": 2.43, "grad_norm": 0.6953125, "learning_rate": 0.0004919518384244298, "loss": 0.2171, "step": 58750 }, { "epoch": 2.43, "grad_norm": 0.828125, "learning_rate": 0.0004919491085466303, "loss": 0.2722, "step": 58760 }, { "epoch": 2.43, "grad_norm": 0.2265625, "learning_rate": 0.0004919463782135088, "loss": 0.2453, "step": 58770 }, { "epoch": 2.43, "grad_norm": 0.6875, "learning_rate": 0.0004919436474250703, "loss": 0.2368, "step": 58780 }, { "epoch": 2.44, "grad_norm": 0.8046875, "learning_rate": 0.00049194091618132, "loss": 0.1693, "step": 58790 }, { "epoch": 2.44, "grad_norm": 0.89453125, "learning_rate": 0.0004919381844822629, "loss": 0.2475, "step": 58800 }, { "epoch": 2.44, "grad_norm": 0.640625, "learning_rate": 0.0004919354523279044, "loss": 0.263, "step": 58810 }, { "epoch": 2.44, "grad_norm": 0.69140625, "learning_rate": 0.0004919327197182495, "loss": 0.2022, "step": 58820 }, { "epoch": 2.44, "grad_norm": 0.70703125, "learning_rate": 0.0004919299866533033, "loss": 0.1932, "step": 58830 }, { "epoch": 2.44, "grad_norm": 0.54296875, "learning_rate": 0.000491927253133071, "loss": 0.2296, "step": 58840 }, { "epoch": 2.44, "grad_norm": 0.69921875, "learning_rate": 0.0004919245191575578, "loss": 0.2003, "step": 58850 }, { "epoch": 2.44, "grad_norm": 0.55078125, "learning_rate": 0.0004919217847267687, "loss": 0.1861, "step": 58860 }, { "epoch": 2.44, "grad_norm": 0.296875, "learning_rate": 0.000491919049840709, "loss": 0.1423, "step": 58870 }, { "epoch": 2.44, "grad_norm": 1.5078125, "learning_rate": 0.0004919163144993837, "loss": 0.2245, "step": 58880 }, { "epoch": 2.44, "grad_norm": 0.1767578125, "learning_rate": 0.000491913578702798, "loss": 0.1927, "step": 58890 }, { "epoch": 2.44, "grad_norm": 0.56640625, "learning_rate": 0.0004919108424509571, "loss": 0.2075, "step": 58900 }, { "epoch": 2.44, "grad_norm": 0.42578125, "learning_rate": 0.0004919081057438661, "loss": 0.2708, "step": 58910 }, { "epoch": 2.44, "grad_norm": 0.59375, "learning_rate": 0.0004919053685815303, "loss": 0.2194, "step": 58920 }, { "epoch": 2.44, "grad_norm": 0.36328125, "learning_rate": 0.0004919026309639546, "loss": 0.2284, "step": 58930 }, { "epoch": 2.44, "grad_norm": 0.5, "learning_rate": 0.0004918998928911442, "loss": 0.243, "step": 58940 }, { "epoch": 2.44, "grad_norm": 0.58984375, "learning_rate": 0.0004918971543631045, "loss": 0.2252, "step": 58950 }, { "epoch": 2.44, "grad_norm": 0.4609375, "learning_rate": 0.0004918944153798403, "loss": 0.218, "step": 58960 }, { "epoch": 2.44, "grad_norm": 0.5234375, "learning_rate": 0.0004918916759413571, "loss": 0.2274, "step": 58970 }, { "epoch": 2.44, "grad_norm": 2.453125, "learning_rate": 0.0004918889360476599, "loss": 0.244, "step": 58980 }, { "epoch": 2.44, "grad_norm": 0.7734375, "learning_rate": 0.0004918861956987537, "loss": 0.252, "step": 58990 }, { "epoch": 2.44, "grad_norm": 0.609375, "learning_rate": 0.0004918834548946438, "loss": 0.1551, "step": 59000 }, { "epoch": 2.44, "grad_norm": 0.7421875, "learning_rate": 0.0004918807136353355, "loss": 0.2063, "step": 59010 }, { "epoch": 2.44, "grad_norm": 0.91796875, "learning_rate": 0.0004918779719208337, "loss": 0.263, "step": 59020 }, { "epoch": 2.45, "grad_norm": 0.376953125, "learning_rate": 0.0004918752297511437, "loss": 0.2356, "step": 59030 }, { "epoch": 2.45, "grad_norm": 1.1484375, "learning_rate": 0.0004918724871262706, "loss": 0.2077, "step": 59040 }, { "epoch": 2.45, "grad_norm": 1.1171875, "learning_rate": 0.0004918697440462197, "loss": 0.1856, "step": 59050 }, { "epoch": 2.45, "grad_norm": 0.71484375, "learning_rate": 0.000491867000510996, "loss": 0.2494, "step": 59060 }, { "epoch": 2.45, "grad_norm": 0.9296875, "learning_rate": 0.0004918642565206047, "loss": 0.2428, "step": 59070 }, { "epoch": 2.45, "grad_norm": 0.7578125, "learning_rate": 0.0004918615120750511, "loss": 0.2049, "step": 59080 }, { "epoch": 2.45, "grad_norm": 0.69140625, "learning_rate": 0.0004918587671743402, "loss": 0.2147, "step": 59090 }, { "epoch": 2.45, "grad_norm": 1.1875, "learning_rate": 0.0004918560218184772, "loss": 0.2217, "step": 59100 }, { "epoch": 2.45, "grad_norm": 0.8671875, "learning_rate": 0.0004918532760074672, "loss": 0.2469, "step": 59110 }, { "epoch": 2.45, "grad_norm": 0.82421875, "learning_rate": 0.0004918505297413156, "loss": 0.2107, "step": 59120 }, { "epoch": 2.45, "grad_norm": 0.7421875, "learning_rate": 0.0004918477830200273, "loss": 0.2088, "step": 59130 }, { "epoch": 2.45, "grad_norm": 1.1640625, "learning_rate": 0.0004918450358436077, "loss": 0.1849, "step": 59140 }, { "epoch": 2.45, "grad_norm": 0.84765625, "learning_rate": 0.0004918422882120618, "loss": 0.2202, "step": 59150 }, { "epoch": 2.45, "grad_norm": 0.9375, "learning_rate": 0.0004918395401253948, "loss": 0.1882, "step": 59160 }, { "epoch": 2.45, "grad_norm": 0.3046875, "learning_rate": 0.000491836791583612, "loss": 0.2669, "step": 59170 }, { "epoch": 2.45, "grad_norm": 0.63671875, "learning_rate": 0.0004918340425867184, "loss": 0.2336, "step": 59180 }, { "epoch": 2.45, "grad_norm": 1.1171875, "learning_rate": 0.0004918312931347192, "loss": 0.1796, "step": 59190 }, { "epoch": 2.45, "grad_norm": 0.70703125, "learning_rate": 0.0004918285432276197, "loss": 0.1921, "step": 59200 }, { "epoch": 2.45, "grad_norm": 0.416015625, "learning_rate": 0.000491825792865425, "loss": 0.2791, "step": 59210 }, { "epoch": 2.45, "grad_norm": 0.625, "learning_rate": 0.0004918230420481402, "loss": 0.2219, "step": 59220 }, { "epoch": 2.45, "grad_norm": 0.58984375, "learning_rate": 0.0004918202907757707, "loss": 0.1898, "step": 59230 }, { "epoch": 2.45, "grad_norm": 0.546875, "learning_rate": 0.0004918175390483214, "loss": 0.2017, "step": 59240 }, { "epoch": 2.45, "grad_norm": 1.40625, "learning_rate": 0.0004918147868657976, "loss": 0.1781, "step": 59250 }, { "epoch": 2.45, "grad_norm": 0.765625, "learning_rate": 0.0004918120342282046, "loss": 0.1941, "step": 59260 }, { "epoch": 2.45, "grad_norm": 0.48828125, "learning_rate": 0.0004918092811355473, "loss": 0.22, "step": 59270 }, { "epoch": 2.46, "grad_norm": 0.859375, "learning_rate": 0.0004918065275878311, "loss": 0.2737, "step": 59280 }, { "epoch": 2.46, "grad_norm": 1.1875, "learning_rate": 0.0004918037735850612, "loss": 0.2153, "step": 59290 }, { "epoch": 2.46, "grad_norm": 0.99609375, "learning_rate": 0.0004918010191272426, "loss": 0.2438, "step": 59300 }, { "epoch": 2.46, "grad_norm": 0.322265625, "learning_rate": 0.0004917982642143806, "loss": 0.2223, "step": 59310 }, { "epoch": 2.46, "grad_norm": 1.109375, "learning_rate": 0.0004917955088464805, "loss": 0.2452, "step": 59320 }, { "epoch": 2.46, "grad_norm": 0.58984375, "learning_rate": 0.0004917927530235473, "loss": 0.2396, "step": 59330 }, { "epoch": 2.46, "grad_norm": 0.828125, "learning_rate": 0.0004917899967455863, "loss": 0.219, "step": 59340 }, { "epoch": 2.46, "grad_norm": 0.60546875, "learning_rate": 0.0004917872400126026, "loss": 0.1752, "step": 59350 }, { "epoch": 2.46, "grad_norm": 0.5, "learning_rate": 0.0004917844828246015, "loss": 0.2601, "step": 59360 }, { "epoch": 2.46, "grad_norm": 0.248046875, "learning_rate": 0.0004917817251815879, "loss": 0.1862, "step": 59370 }, { "epoch": 2.46, "grad_norm": 0.49609375, "learning_rate": 0.0004917789670835674, "loss": 0.2633, "step": 59380 }, { "epoch": 2.46, "grad_norm": 0.578125, "learning_rate": 0.0004917762085305449, "loss": 0.196, "step": 59390 }, { "epoch": 2.46, "grad_norm": 0.306640625, "learning_rate": 0.0004917734495225257, "loss": 0.2427, "step": 59400 }, { "epoch": 2.46, "grad_norm": 0.48046875, "learning_rate": 0.000491770690059515, "loss": 0.2416, "step": 59410 }, { "epoch": 2.46, "grad_norm": 1.5859375, "learning_rate": 0.000491767930141518, "loss": 0.2367, "step": 59420 }, { "epoch": 2.46, "grad_norm": 0.54296875, "learning_rate": 0.0004917651697685398, "loss": 0.2163, "step": 59430 }, { "epoch": 2.46, "grad_norm": 0.77734375, "learning_rate": 0.0004917624089405857, "loss": 0.2352, "step": 59440 }, { "epoch": 2.46, "grad_norm": 0.341796875, "learning_rate": 0.0004917596476576608, "loss": 0.1979, "step": 59450 }, { "epoch": 2.46, "grad_norm": 0.138671875, "learning_rate": 0.0004917568859197704, "loss": 0.1776, "step": 59460 }, { "epoch": 2.46, "grad_norm": 1.109375, "learning_rate": 0.0004917541237269196, "loss": 0.204, "step": 59470 }, { "epoch": 2.46, "grad_norm": 0.5625, "learning_rate": 0.0004917513610791137, "loss": 0.2029, "step": 59480 }, { "epoch": 2.46, "grad_norm": 0.796875, "learning_rate": 0.0004917485979763579, "loss": 0.1755, "step": 59490 }, { "epoch": 2.46, "grad_norm": 0.181640625, "learning_rate": 0.0004917458344186572, "loss": 0.1455, "step": 59500 }, { "epoch": 2.46, "grad_norm": 0.671875, "learning_rate": 0.0004917430704060171, "loss": 0.228, "step": 59510 }, { "epoch": 2.47, "grad_norm": 0.51953125, "learning_rate": 0.0004917403059384425, "loss": 0.2166, "step": 59520 }, { "epoch": 2.47, "grad_norm": 0.310546875, "learning_rate": 0.0004917375410159388, "loss": 0.2472, "step": 59530 }, { "epoch": 2.47, "grad_norm": 0.486328125, "learning_rate": 0.0004917347756385112, "loss": 0.2399, "step": 59540 }, { "epoch": 2.47, "grad_norm": 0.35546875, "learning_rate": 0.0004917320098061648, "loss": 0.1622, "step": 59550 }, { "epoch": 2.47, "grad_norm": 0.640625, "learning_rate": 0.0004917292435189049, "loss": 0.2126, "step": 59560 }, { "epoch": 2.47, "grad_norm": 0.60546875, "learning_rate": 0.0004917264767767366, "loss": 0.199, "step": 59570 }, { "epoch": 2.47, "grad_norm": 1.1953125, "learning_rate": 0.0004917237095796653, "loss": 0.2206, "step": 59580 }, { "epoch": 2.47, "grad_norm": 0.2021484375, "learning_rate": 0.000491720941927696, "loss": 0.2047, "step": 59590 }, { "epoch": 2.47, "grad_norm": 0.86328125, "learning_rate": 0.000491718173820834, "loss": 0.1928, "step": 59600 }, { "epoch": 2.47, "grad_norm": 0.72265625, "learning_rate": 0.0004917154052590845, "loss": 0.2147, "step": 59610 }, { "epoch": 2.47, "grad_norm": 0.78515625, "learning_rate": 0.0004917126362424528, "loss": 0.2315, "step": 59620 }, { "epoch": 2.47, "grad_norm": 0.6328125, "learning_rate": 0.0004917098667709439, "loss": 0.2232, "step": 59630 }, { "epoch": 2.47, "grad_norm": 0.2236328125, "learning_rate": 0.0004917070968445632, "loss": 0.2082, "step": 59640 }, { "epoch": 2.47, "grad_norm": 0.5859375, "learning_rate": 0.0004917043264633157, "loss": 0.2122, "step": 59650 }, { "epoch": 2.47, "grad_norm": 0.75, "learning_rate": 0.0004917015556272069, "loss": 0.2439, "step": 59660 }, { "epoch": 2.47, "grad_norm": 0.287109375, "learning_rate": 0.0004916987843362418, "loss": 0.1509, "step": 59670 }, { "epoch": 2.47, "grad_norm": 0.859375, "learning_rate": 0.0004916960125904259, "loss": 0.1954, "step": 59680 }, { "epoch": 2.47, "grad_norm": 0.984375, "learning_rate": 0.0004916932403897639, "loss": 0.2675, "step": 59690 }, { "epoch": 2.47, "grad_norm": 0.6796875, "learning_rate": 0.0004916904677342615, "loss": 0.2462, "step": 59700 }, { "epoch": 2.47, "grad_norm": 0.640625, "learning_rate": 0.0004916876946239237, "loss": 0.2182, "step": 59710 }, { "epoch": 2.47, "grad_norm": 0.67578125, "learning_rate": 0.0004916849210587559, "loss": 0.1832, "step": 59720 }, { "epoch": 2.47, "grad_norm": 2.265625, "learning_rate": 0.000491682147038763, "loss": 0.2849, "step": 59730 }, { "epoch": 2.47, "grad_norm": 0.412109375, "learning_rate": 0.0004916793725639504, "loss": 0.2239, "step": 59740 }, { "epoch": 2.47, "grad_norm": 0.53125, "learning_rate": 0.0004916765976343233, "loss": 0.2235, "step": 59750 }, { "epoch": 2.48, "grad_norm": 0.53515625, "learning_rate": 0.0004916738222498871, "loss": 0.2239, "step": 59760 }, { "epoch": 2.48, "grad_norm": 0.578125, "learning_rate": 0.0004916710464106468, "loss": 0.2046, "step": 59770 }, { "epoch": 2.48, "grad_norm": 0.703125, "learning_rate": 0.0004916682701166076, "loss": 0.1869, "step": 59780 }, { "epoch": 2.48, "grad_norm": 1.453125, "learning_rate": 0.0004916654933677749, "loss": 0.2031, "step": 59790 }, { "epoch": 2.48, "grad_norm": 0.5625, "learning_rate": 0.0004916627161641537, "loss": 0.2024, "step": 59800 }, { "epoch": 2.48, "grad_norm": 0.79296875, "learning_rate": 0.0004916599385057495, "loss": 0.1714, "step": 59810 }, { "epoch": 2.48, "grad_norm": 1.0, "learning_rate": 0.0004916571603925674, "loss": 0.218, "step": 59820 }, { "epoch": 2.48, "grad_norm": 0.267578125, "learning_rate": 0.0004916543818246126, "loss": 0.2052, "step": 59830 }, { "epoch": 2.48, "grad_norm": 1.0234375, "learning_rate": 0.0004916516028018904, "loss": 0.2098, "step": 59840 }, { "epoch": 2.48, "grad_norm": 0.859375, "learning_rate": 0.0004916488233244059, "loss": 0.2346, "step": 59850 }, { "epoch": 2.48, "grad_norm": 0.54296875, "learning_rate": 0.0004916460433921644, "loss": 0.2139, "step": 59860 }, { "epoch": 2.48, "grad_norm": 0.609375, "learning_rate": 0.0004916432630051712, "loss": 0.2227, "step": 59870 }, { "epoch": 2.48, "grad_norm": 0.59375, "learning_rate": 0.0004916404821634314, "loss": 0.2572, "step": 59880 }, { "epoch": 2.48, "grad_norm": 0.62109375, "learning_rate": 0.0004916377008669504, "loss": 0.2039, "step": 59890 }, { "epoch": 2.48, "grad_norm": 1.5625, "learning_rate": 0.0004916349191157333, "loss": 0.198, "step": 59900 }, { "epoch": 2.48, "grad_norm": 0.8359375, "learning_rate": 0.0004916321369097854, "loss": 0.2805, "step": 59910 }, { "epoch": 2.48, "grad_norm": 0.369140625, "learning_rate": 0.0004916293542491119, "loss": 0.241, "step": 59920 }, { "epoch": 2.48, "grad_norm": 0.6953125, "learning_rate": 0.0004916265711337179, "loss": 0.2665, "step": 59930 }, { "epoch": 2.48, "grad_norm": 0.322265625, "learning_rate": 0.0004916237875636089, "loss": 0.219, "step": 59940 }, { "epoch": 2.48, "grad_norm": 0.392578125, "learning_rate": 0.0004916210035387902, "loss": 0.1856, "step": 59950 }, { "epoch": 2.48, "grad_norm": 0.8671875, "learning_rate": 0.0004916182190592666, "loss": 0.2821, "step": 59960 }, { "epoch": 2.48, "grad_norm": 0.65625, "learning_rate": 0.0004916154341250437, "loss": 0.203, "step": 59970 }, { "epoch": 2.48, "grad_norm": 0.55859375, "learning_rate": 0.0004916126487361267, "loss": 0.1725, "step": 59980 }, { "epoch": 2.48, "grad_norm": 0.70703125, "learning_rate": 0.0004916098628925207, "loss": 0.2144, "step": 59990 }, { "epoch": 2.49, "grad_norm": 0.310546875, "learning_rate": 0.0004916070765942311, "loss": 0.1954, "step": 60000 }, { "epoch": 2.49, "grad_norm": 0.953125, "learning_rate": 0.0004916042898412631, "loss": 0.2646, "step": 60010 }, { "epoch": 2.49, "grad_norm": 0.43359375, "learning_rate": 0.0004916015026336218, "loss": 0.2277, "step": 60020 }, { "epoch": 2.49, "grad_norm": 1.046875, "learning_rate": 0.0004915987149713126, "loss": 0.199, "step": 60030 }, { "epoch": 2.49, "grad_norm": 0.490234375, "learning_rate": 0.0004915959268543408, "loss": 0.2519, "step": 60040 }, { "epoch": 2.49, "grad_norm": 0.333984375, "learning_rate": 0.0004915931382827115, "loss": 0.2285, "step": 60050 }, { "epoch": 2.49, "grad_norm": 1.328125, "learning_rate": 0.00049159034925643, "loss": 0.2026, "step": 60060 }, { "epoch": 2.49, "grad_norm": 0.71484375, "learning_rate": 0.0004915875597755015, "loss": 0.2251, "step": 60070 }, { "epoch": 2.49, "grad_norm": 0.52734375, "learning_rate": 0.0004915847698399314, "loss": 0.2059, "step": 60080 }, { "epoch": 2.49, "grad_norm": 0.439453125, "learning_rate": 0.0004915819794497247, "loss": 0.2093, "step": 60090 }, { "epoch": 2.49, "grad_norm": 0.5625, "learning_rate": 0.0004915791886048869, "loss": 0.2424, "step": 60100 }, { "epoch": 2.49, "grad_norm": 0.546875, "learning_rate": 0.0004915763973054232, "loss": 0.1937, "step": 60110 }, { "epoch": 2.49, "grad_norm": 1.4296875, "learning_rate": 0.0004915736055513388, "loss": 0.1795, "step": 60120 }, { "epoch": 2.49, "grad_norm": 0.451171875, "learning_rate": 0.000491570813342639, "loss": 0.2453, "step": 60130 }, { "epoch": 2.49, "grad_norm": 0.94140625, "learning_rate": 0.0004915680206793289, "loss": 0.1969, "step": 60140 }, { "epoch": 2.49, "grad_norm": 0.45703125, "learning_rate": 0.000491565227561414, "loss": 0.1562, "step": 60150 }, { "epoch": 2.49, "grad_norm": 0.86328125, "learning_rate": 0.0004915624339888993, "loss": 0.2261, "step": 60160 }, { "epoch": 2.49, "grad_norm": 0.7421875, "learning_rate": 0.0004915596399617902, "loss": 0.2133, "step": 60170 }, { "epoch": 2.49, "grad_norm": 1.03125, "learning_rate": 0.000491556845480092, "loss": 0.176, "step": 60180 }, { "epoch": 2.49, "grad_norm": 0.703125, "learning_rate": 0.0004915540505438099, "loss": 0.2311, "step": 60190 }, { "epoch": 2.49, "grad_norm": 0.365234375, "learning_rate": 0.0004915512551529491, "loss": 0.2346, "step": 60200 }, { "epoch": 2.49, "grad_norm": 1.046875, "learning_rate": 0.0004915484593075149, "loss": 0.2563, "step": 60210 }, { "epoch": 2.49, "grad_norm": 0.95703125, "learning_rate": 0.0004915456630075127, "loss": 0.2604, "step": 60220 }, { "epoch": 2.49, "grad_norm": 0.6953125, "learning_rate": 0.0004915428662529476, "loss": 0.2657, "step": 60230 }, { "epoch": 2.5, "grad_norm": 0.92578125, "learning_rate": 0.000491540069043825, "loss": 0.2, "step": 60240 }, { "epoch": 2.5, "grad_norm": 0.66015625, "learning_rate": 0.0004915372713801499, "loss": 0.2342, "step": 60250 }, { "epoch": 2.5, "grad_norm": 0.57421875, "learning_rate": 0.0004915344732619278, "loss": 0.1853, "step": 60260 }, { "epoch": 2.5, "grad_norm": 0.94140625, "learning_rate": 0.0004915316746891639, "loss": 0.2303, "step": 60270 }, { "epoch": 2.5, "grad_norm": 0.4140625, "learning_rate": 0.0004915288756618635, "loss": 0.2616, "step": 60280 }, { "epoch": 2.5, "grad_norm": 0.23828125, "learning_rate": 0.0004915260761800318, "loss": 0.22, "step": 60290 }, { "epoch": 2.5, "grad_norm": 0.494140625, "learning_rate": 0.0004915232762436742, "loss": 0.22, "step": 60300 }, { "epoch": 2.5, "grad_norm": 0.62109375, "learning_rate": 0.0004915204758527958, "loss": 0.2164, "step": 60310 }, { "epoch": 2.5, "grad_norm": 0.64453125, "learning_rate": 0.000491517675007402, "loss": 0.2333, "step": 60320 }, { "epoch": 2.5, "grad_norm": 0.2578125, "learning_rate": 0.000491514873707498, "loss": 0.1946, "step": 60330 }, { "epoch": 2.5, "grad_norm": 0.66015625, "learning_rate": 0.000491512071953089, "loss": 0.169, "step": 60340 }, { "epoch": 2.5, "grad_norm": 0.43359375, "learning_rate": 0.0004915092697441806, "loss": 0.2201, "step": 60350 }, { "epoch": 2.5, "grad_norm": 1.453125, "learning_rate": 0.0004915064670807775, "loss": 0.2287, "step": 60360 }, { "epoch": 2.5, "grad_norm": 0.51953125, "learning_rate": 0.0004915036639628856, "loss": 0.2034, "step": 60370 }, { "epoch": 2.5, "grad_norm": 0.55859375, "learning_rate": 0.0004915008603905097, "loss": 0.2781, "step": 60380 }, { "epoch": 2.5, "grad_norm": 0.392578125, "learning_rate": 0.0004914980563636553, "loss": 0.2238, "step": 60390 }, { "epoch": 2.5, "grad_norm": 0.515625, "learning_rate": 0.0004914952518823277, "loss": 0.1982, "step": 60400 }, { "epoch": 2.5, "grad_norm": 0.7734375, "learning_rate": 0.000491492446946532, "loss": 0.1682, "step": 60410 }, { "epoch": 2.5, "grad_norm": 0.416015625, "learning_rate": 0.0004914896415562737, "loss": 0.2021, "step": 60420 }, { "epoch": 2.5, "grad_norm": 0.84765625, "learning_rate": 0.000491486835711558, "loss": 0.2059, "step": 60430 }, { "epoch": 2.5, "grad_norm": 0.80859375, "learning_rate": 0.0004914840294123901, "loss": 0.1997, "step": 60440 }, { "epoch": 2.5, "grad_norm": 1.0546875, "learning_rate": 0.0004914812226587753, "loss": 0.2056, "step": 60450 }, { "epoch": 2.5, "grad_norm": 0.59375, "learning_rate": 0.0004914784154507189, "loss": 0.2128, "step": 60460 }, { "epoch": 2.5, "grad_norm": 1.3046875, "learning_rate": 0.0004914756077882263, "loss": 0.2162, "step": 60470 }, { "epoch": 2.51, "grad_norm": 0.45703125, "learning_rate": 0.0004914727996713027, "loss": 0.2675, "step": 60480 }, { "epoch": 2.51, "grad_norm": 0.3203125, "learning_rate": 0.0004914699910999532, "loss": 0.2175, "step": 60490 }, { "epoch": 2.51, "grad_norm": 0.88671875, "learning_rate": 0.0004914671820741834, "loss": 0.2132, "step": 60500 }, { "epoch": 2.51, "grad_norm": 0.53125, "learning_rate": 0.0004914643725939983, "loss": 0.2026, "step": 60510 }, { "epoch": 2.51, "grad_norm": 0.50390625, "learning_rate": 0.0004914615626594034, "loss": 0.2279, "step": 60520 }, { "epoch": 2.51, "grad_norm": 0.609375, "learning_rate": 0.000491458752270404, "loss": 0.198, "step": 60530 }, { "epoch": 2.51, "grad_norm": 0.486328125, "learning_rate": 0.0004914559414270053, "loss": 0.284, "step": 60540 }, { "epoch": 2.51, "grad_norm": 0.7109375, "learning_rate": 0.0004914531301292124, "loss": 0.2268, "step": 60550 }, { "epoch": 2.51, "grad_norm": 1.6796875, "learning_rate": 0.0004914503183770311, "loss": 0.2176, "step": 60560 }, { "epoch": 2.51, "grad_norm": 1.6171875, "learning_rate": 0.0004914475061704661, "loss": 0.2087, "step": 60570 }, { "epoch": 2.51, "grad_norm": 0.48828125, "learning_rate": 0.000491444693509523, "loss": 0.1613, "step": 60580 }, { "epoch": 2.51, "grad_norm": 0.71875, "learning_rate": 0.0004914418803942071, "loss": 0.2145, "step": 60590 }, { "epoch": 2.51, "grad_norm": 1.265625, "learning_rate": 0.0004914390668245238, "loss": 0.201, "step": 60600 }, { "epoch": 2.51, "grad_norm": 0.734375, "learning_rate": 0.0004914362528004781, "loss": 0.149, "step": 60610 }, { "epoch": 2.51, "grad_norm": 0.62890625, "learning_rate": 0.0004914334383220755, "loss": 0.1871, "step": 60620 }, { "epoch": 2.51, "grad_norm": 0.490234375, "learning_rate": 0.0004914306233893211, "loss": 0.1756, "step": 60630 }, { "epoch": 2.51, "grad_norm": 0.79296875, "learning_rate": 0.0004914278080022205, "loss": 0.2346, "step": 60640 }, { "epoch": 2.51, "grad_norm": 0.25, "learning_rate": 0.0004914249921607787, "loss": 0.2545, "step": 60650 }, { "epoch": 2.51, "grad_norm": 0.314453125, "learning_rate": 0.0004914221758650013, "loss": 0.2867, "step": 60660 }, { "epoch": 2.51, "grad_norm": 0.70703125, "learning_rate": 0.0004914193591148933, "loss": 0.2277, "step": 60670 }, { "epoch": 2.51, "grad_norm": 0.3671875, "learning_rate": 0.0004914165419104602, "loss": 0.2082, "step": 60680 }, { "epoch": 2.51, "grad_norm": 0.484375, "learning_rate": 0.0004914137242517072, "loss": 0.2047, "step": 60690 }, { "epoch": 2.51, "grad_norm": 0.62890625, "learning_rate": 0.0004914109061386397, "loss": 0.2115, "step": 60700 }, { "epoch": 2.51, "grad_norm": 0.5078125, "learning_rate": 0.000491408087571263, "loss": 0.2241, "step": 60710 }, { "epoch": 2.52, "grad_norm": 0.71484375, "learning_rate": 0.0004914052685495822, "loss": 0.2209, "step": 60720 }, { "epoch": 2.52, "grad_norm": 0.640625, "learning_rate": 0.0004914024490736029, "loss": 0.2666, "step": 60730 }, { "epoch": 2.52, "grad_norm": 0.64453125, "learning_rate": 0.0004913996291433301, "loss": 0.2366, "step": 60740 }, { "epoch": 2.52, "grad_norm": 0.96875, "learning_rate": 0.0004913968087587693, "loss": 0.2806, "step": 60750 }, { "epoch": 2.52, "grad_norm": 0.44921875, "learning_rate": 0.0004913939879199259, "loss": 0.1849, "step": 60760 }, { "epoch": 2.52, "grad_norm": 0.345703125, "learning_rate": 0.000491391166626805, "loss": 0.2133, "step": 60770 }, { "epoch": 2.52, "grad_norm": 0.345703125, "learning_rate": 0.000491388344879412, "loss": 0.2416, "step": 60780 }, { "epoch": 2.52, "grad_norm": 0.76953125, "learning_rate": 0.0004913855226777522, "loss": 0.1989, "step": 60790 }, { "epoch": 2.52, "grad_norm": 0.52734375, "learning_rate": 0.0004913827000218309, "loss": 0.2459, "step": 60800 }, { "epoch": 2.52, "grad_norm": 0.478515625, "learning_rate": 0.0004913798769116534, "loss": 0.2214, "step": 60810 }, { "epoch": 2.52, "grad_norm": 0.53515625, "learning_rate": 0.0004913770533472252, "loss": 0.2413, "step": 60820 }, { "epoch": 2.52, "grad_norm": 0.1689453125, "learning_rate": 0.0004913742293285512, "loss": 0.1982, "step": 60830 }, { "epoch": 2.52, "grad_norm": 0.67578125, "learning_rate": 0.0004913714048556372, "loss": 0.2127, "step": 60840 }, { "epoch": 2.52, "grad_norm": 0.6796875, "learning_rate": 0.0004913685799284882, "loss": 0.2567, "step": 60850 }, { "epoch": 2.52, "grad_norm": 0.51171875, "learning_rate": 0.0004913657545471095, "loss": 0.2273, "step": 60860 }, { "epoch": 2.52, "grad_norm": 0.640625, "learning_rate": 0.0004913629287115065, "loss": 0.2221, "step": 60870 }, { "epoch": 2.52, "grad_norm": 1.5234375, "learning_rate": 0.0004913601024216847, "loss": 0.2608, "step": 60880 }, { "epoch": 2.52, "grad_norm": 1.6484375, "learning_rate": 0.0004913572756776492, "loss": 0.2386, "step": 60890 }, { "epoch": 2.52, "grad_norm": 1.171875, "learning_rate": 0.0004913544484794054, "loss": 0.1962, "step": 60900 }, { "epoch": 2.52, "grad_norm": 0.5234375, "learning_rate": 0.0004913516208269585, "loss": 0.2656, "step": 60910 }, { "epoch": 2.52, "grad_norm": 0.89453125, "learning_rate": 0.0004913487927203139, "loss": 0.187, "step": 60920 }, { "epoch": 2.52, "grad_norm": 0.8984375, "learning_rate": 0.0004913459641594771, "loss": 0.2558, "step": 60930 }, { "epoch": 2.52, "grad_norm": 0.68359375, "learning_rate": 0.000491343135144453, "loss": 0.2013, "step": 60940 }, { "epoch": 2.52, "grad_norm": 1.5078125, "learning_rate": 0.0004913403056752474, "loss": 0.1974, "step": 60950 }, { "epoch": 2.52, "grad_norm": 3.390625, "learning_rate": 0.0004913374757518654, "loss": 0.2279, "step": 60960 }, { "epoch": 2.53, "grad_norm": 0.92578125, "learning_rate": 0.0004913346453743122, "loss": 0.2627, "step": 60970 }, { "epoch": 2.53, "grad_norm": 2.421875, "learning_rate": 0.0004913318145425933, "loss": 0.25, "step": 60980 }, { "epoch": 2.53, "grad_norm": 0.58203125, "learning_rate": 0.0004913289832567141, "loss": 0.2268, "step": 60990 }, { "epoch": 2.53, "grad_norm": 0.404296875, "learning_rate": 0.0004913261515166797, "loss": 0.2364, "step": 61000 }, { "epoch": 2.53, "grad_norm": 0.82421875, "learning_rate": 0.0004913233193224955, "loss": 0.1859, "step": 61010 }, { "epoch": 2.53, "grad_norm": 0.765625, "learning_rate": 0.0004913204866741671, "loss": 0.2276, "step": 61020 }, { "epoch": 2.53, "grad_norm": 0.671875, "learning_rate": 0.0004913176535716994, "loss": 0.2299, "step": 61030 }, { "epoch": 2.53, "grad_norm": 0.3671875, "learning_rate": 0.000491314820015098, "loss": 0.2297, "step": 61040 }, { "epoch": 2.53, "grad_norm": 1.1640625, "learning_rate": 0.0004913119860043681, "loss": 0.2018, "step": 61050 }, { "epoch": 2.53, "grad_norm": 0.79296875, "learning_rate": 0.0004913091515395152, "loss": 0.2163, "step": 61060 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 0.0004913063166205445, "loss": 0.2314, "step": 61070 }, { "epoch": 2.53, "grad_norm": 0.455078125, "learning_rate": 0.0004913034812474614, "loss": 0.2768, "step": 61080 }, { "epoch": 2.53, "grad_norm": 0.0, "learning_rate": 0.0004913006454202711, "loss": 0.1717, "step": 61090 }, { "epoch": 2.53, "grad_norm": 0.4453125, "learning_rate": 0.0004912978091389791, "loss": 0.2098, "step": 61100 }, { "epoch": 2.53, "grad_norm": 1.5, "learning_rate": 0.0004912949724035908, "loss": 0.2822, "step": 61110 }, { "epoch": 2.53, "grad_norm": 1.2109375, "learning_rate": 0.0004912921352141112, "loss": 0.2415, "step": 61120 }, { "epoch": 2.53, "grad_norm": 0.482421875, "learning_rate": 0.0004912892975705461, "loss": 0.2286, "step": 61130 }, { "epoch": 2.53, "grad_norm": 0.353515625, "learning_rate": 0.0004912864594729004, "loss": 0.1819, "step": 61140 }, { "epoch": 2.53, "grad_norm": 0.98828125, "learning_rate": 0.0004912836209211797, "loss": 0.2252, "step": 61150 }, { "epoch": 2.53, "grad_norm": 0.5859375, "learning_rate": 0.0004912807819153893, "loss": 0.2037, "step": 61160 }, { "epoch": 2.53, "grad_norm": 0.921875, "learning_rate": 0.0004912779424555345, "loss": 0.1978, "step": 61170 }, { "epoch": 2.53, "grad_norm": 0.51171875, "learning_rate": 0.0004912751025416207, "loss": 0.2221, "step": 61180 }, { "epoch": 2.53, "grad_norm": 1.21875, "learning_rate": 0.0004912722621736532, "loss": 0.2351, "step": 61190 }, { "epoch": 2.53, "grad_norm": 0.341796875, "learning_rate": 0.0004912694213516374, "loss": 0.2428, "step": 61200 }, { "epoch": 2.54, "grad_norm": 0.4609375, "learning_rate": 0.0004912665800755786, "loss": 0.2275, "step": 61210 }, { "epoch": 2.54, "grad_norm": 0.65625, "learning_rate": 0.0004912637383454821, "loss": 0.2273, "step": 61220 }, { "epoch": 2.54, "grad_norm": 0.255859375, "learning_rate": 0.0004912608961613532, "loss": 0.1852, "step": 61230 }, { "epoch": 2.54, "grad_norm": 1.1328125, "learning_rate": 0.0004912580535231975, "loss": 0.2304, "step": 61240 }, { "epoch": 2.54, "grad_norm": 0.255859375, "learning_rate": 0.0004912552104310202, "loss": 0.1983, "step": 61250 }, { "epoch": 2.54, "grad_norm": 2.328125, "learning_rate": 0.0004912523668848266, "loss": 0.1444, "step": 61260 }, { "epoch": 2.54, "grad_norm": 0.47265625, "learning_rate": 0.0004912495228846221, "loss": 0.2435, "step": 61270 }, { "epoch": 2.54, "grad_norm": 0.96484375, "learning_rate": 0.0004912466784304121, "loss": 0.2062, "step": 61280 }, { "epoch": 2.54, "grad_norm": 0.73046875, "learning_rate": 0.0004912438335222018, "loss": 0.2594, "step": 61290 }, { "epoch": 2.54, "grad_norm": 0.6953125, "learning_rate": 0.0004912409881599967, "loss": 0.2309, "step": 61300 }, { "epoch": 2.54, "grad_norm": 0.419921875, "learning_rate": 0.0004912381423438022, "loss": 0.2408, "step": 61310 }, { "epoch": 2.54, "grad_norm": 0.87109375, "learning_rate": 0.0004912352960736235, "loss": 0.2322, "step": 61320 }, { "epoch": 2.54, "grad_norm": 0.70703125, "learning_rate": 0.000491232449349466, "loss": 0.223, "step": 61330 }, { "epoch": 2.54, "grad_norm": 0.6171875, "learning_rate": 0.0004912296021713351, "loss": 0.2338, "step": 61340 }, { "epoch": 2.54, "grad_norm": 0.431640625, "learning_rate": 0.0004912267545392362, "loss": 0.2247, "step": 61350 }, { "epoch": 2.54, "grad_norm": 0.79296875, "learning_rate": 0.0004912239064531745, "loss": 0.2244, "step": 61360 }, { "epoch": 2.54, "grad_norm": 0.50390625, "learning_rate": 0.0004912210579131555, "loss": 0.1863, "step": 61370 }, { "epoch": 2.54, "grad_norm": 1.46875, "learning_rate": 0.0004912182089191844, "loss": 0.2363, "step": 61380 }, { "epoch": 2.54, "grad_norm": 1.0625, "learning_rate": 0.0004912153594712668, "loss": 0.2178, "step": 61390 }, { "epoch": 2.54, "grad_norm": 1.046875, "learning_rate": 0.000491212509569408, "loss": 0.2633, "step": 61400 }, { "epoch": 2.54, "grad_norm": 0.56640625, "learning_rate": 0.0004912096592136133, "loss": 0.2362, "step": 61410 }, { "epoch": 2.54, "grad_norm": 0.921875, "learning_rate": 0.000491206808403888, "loss": 0.207, "step": 61420 }, { "epoch": 2.54, "grad_norm": 0.9375, "learning_rate": 0.0004912039571402375, "loss": 0.2181, "step": 61430 }, { "epoch": 2.54, "grad_norm": 1.0546875, "learning_rate": 0.0004912011054226673, "loss": 0.2294, "step": 61440 }, { "epoch": 2.55, "grad_norm": 0.875, "learning_rate": 0.0004911982532511826, "loss": 0.2187, "step": 61450 }, { "epoch": 2.55, "grad_norm": 0.59375, "learning_rate": 0.0004911954006257888, "loss": 0.1879, "step": 61460 }, { "epoch": 2.55, "grad_norm": 0.90625, "learning_rate": 0.0004911925475464913, "loss": 0.2114, "step": 61470 }, { "epoch": 2.55, "grad_norm": 0.86328125, "learning_rate": 0.0004911896940132956, "loss": 0.2317, "step": 61480 }, { "epoch": 2.55, "grad_norm": 0.80859375, "learning_rate": 0.0004911868400262068, "loss": 0.2348, "step": 61490 }, { "epoch": 2.55, "grad_norm": 1.1328125, "learning_rate": 0.0004911839855852306, "loss": 0.2176, "step": 61500 }, { "epoch": 2.55, "grad_norm": 0.48828125, "learning_rate": 0.000491181130690372, "loss": 0.1799, "step": 61510 }, { "epoch": 2.55, "grad_norm": 0.494140625, "learning_rate": 0.0004911782753416366, "loss": 0.2293, "step": 61520 }, { "epoch": 2.55, "grad_norm": 0.5703125, "learning_rate": 0.0004911754195390298, "loss": 0.2635, "step": 61530 }, { "epoch": 2.55, "grad_norm": 0.29296875, "learning_rate": 0.0004911725632825568, "loss": 0.2325, "step": 61540 }, { "epoch": 2.55, "grad_norm": 0.271484375, "learning_rate": 0.0004911697065722231, "loss": 0.1926, "step": 61550 }, { "epoch": 2.55, "grad_norm": 1.4296875, "learning_rate": 0.0004911668494080342, "loss": 0.2368, "step": 61560 }, { "epoch": 2.55, "grad_norm": 0.9140625, "learning_rate": 0.0004911639917899952, "loss": 0.1722, "step": 61570 }, { "epoch": 2.55, "grad_norm": 0.412109375, "learning_rate": 0.0004911611337181116, "loss": 0.2476, "step": 61580 }, { "epoch": 2.55, "grad_norm": 0.435546875, "learning_rate": 0.0004911582751923888, "loss": 0.2436, "step": 61590 }, { "epoch": 2.55, "grad_norm": 0.51171875, "learning_rate": 0.0004911554162128322, "loss": 0.2396, "step": 61600 }, { "epoch": 2.55, "grad_norm": 0.8984375, "learning_rate": 0.0004911525567794471, "loss": 0.2104, "step": 61610 }, { "epoch": 2.55, "grad_norm": 2.78125, "learning_rate": 0.0004911496968922391, "loss": 0.234, "step": 61620 }, { "epoch": 2.55, "grad_norm": 0.65625, "learning_rate": 0.0004911468365512133, "loss": 0.1905, "step": 61630 }, { "epoch": 2.55, "grad_norm": 1.1171875, "learning_rate": 0.0004911439757563751, "loss": 0.2518, "step": 61640 }, { "epoch": 2.55, "grad_norm": 0.625, "learning_rate": 0.0004911411145077301, "loss": 0.2245, "step": 61650 }, { "epoch": 2.55, "grad_norm": 0.41796875, "learning_rate": 0.0004911382528052836, "loss": 0.2625, "step": 61660 }, { "epoch": 2.55, "grad_norm": 0.5078125, "learning_rate": 0.0004911353906490408, "loss": 0.2327, "step": 61670 }, { "epoch": 2.55, "grad_norm": 0.58984375, "learning_rate": 0.0004911325280390073, "loss": 0.2392, "step": 61680 }, { "epoch": 2.56, "grad_norm": 0.49609375, "learning_rate": 0.0004911296649751884, "loss": 0.2153, "step": 61690 }, { "epoch": 2.56, "grad_norm": 0.72265625, "learning_rate": 0.0004911268014575896, "loss": 0.25, "step": 61700 }, { "epoch": 2.56, "grad_norm": 0.86328125, "learning_rate": 0.000491123937486216, "loss": 0.2492, "step": 61710 }, { "epoch": 2.56, "grad_norm": 0.56640625, "learning_rate": 0.0004911210730610734, "loss": 0.2524, "step": 61720 }, { "epoch": 2.56, "grad_norm": 0.51171875, "learning_rate": 0.000491118208182167, "loss": 0.1819, "step": 61730 }, { "epoch": 2.56, "grad_norm": 0.796875, "learning_rate": 0.000491115342849502, "loss": 0.2055, "step": 61740 }, { "epoch": 2.56, "grad_norm": 1.078125, "learning_rate": 0.0004911124770630841, "loss": 0.2195, "step": 61750 }, { "epoch": 2.56, "grad_norm": 0.65625, "learning_rate": 0.0004911096108229185, "loss": 0.1851, "step": 61760 }, { "epoch": 2.56, "grad_norm": 0.66796875, "learning_rate": 0.0004911067441290106, "loss": 0.2173, "step": 61770 }, { "epoch": 2.56, "grad_norm": 0.2041015625, "learning_rate": 0.0004911038769813659, "loss": 0.2124, "step": 61780 }, { "epoch": 2.56, "grad_norm": 0.494140625, "learning_rate": 0.0004911010093799897, "loss": 0.2446, "step": 61790 }, { "epoch": 2.56, "grad_norm": 0.50390625, "learning_rate": 0.0004910981413248875, "loss": 0.2262, "step": 61800 }, { "epoch": 2.56, "grad_norm": 0.90234375, "learning_rate": 0.0004910952728160646, "loss": 0.2128, "step": 61810 }, { "epoch": 2.56, "grad_norm": 0.6875, "learning_rate": 0.0004910924038535265, "loss": 0.2394, "step": 61820 }, { "epoch": 2.56, "grad_norm": 1.3203125, "learning_rate": 0.0004910895344372784, "loss": 0.2612, "step": 61830 }, { "epoch": 2.56, "grad_norm": 1.015625, "learning_rate": 0.0004910866645673259, "loss": 0.2312, "step": 61840 }, { "epoch": 2.56, "grad_norm": 0.640625, "learning_rate": 0.0004910837942436744, "loss": 0.2064, "step": 61850 }, { "epoch": 2.56, "grad_norm": 0.53125, "learning_rate": 0.0004910809234663292, "loss": 0.2628, "step": 61860 }, { "epoch": 2.56, "grad_norm": 1.578125, "learning_rate": 0.0004910780522352957, "loss": 0.1879, "step": 61870 }, { "epoch": 2.56, "grad_norm": 0.8203125, "learning_rate": 0.0004910751805505794, "loss": 0.1951, "step": 61880 }, { "epoch": 2.56, "grad_norm": 1.71875, "learning_rate": 0.0004910723084121855, "loss": 0.2322, "step": 61890 }, { "epoch": 2.56, "grad_norm": 0.6640625, "learning_rate": 0.0004910694358201197, "loss": 0.2458, "step": 61900 }, { "epoch": 2.56, "grad_norm": 0.57421875, "learning_rate": 0.0004910665627743871, "loss": 0.195, "step": 61910 }, { "epoch": 2.56, "grad_norm": 0.7578125, "learning_rate": 0.0004910636892749933, "loss": 0.2174, "step": 61920 }, { "epoch": 2.57, "grad_norm": 1.2109375, "learning_rate": 0.0004910608153219438, "loss": 0.2365, "step": 61930 }, { "epoch": 2.57, "grad_norm": 1.0234375, "learning_rate": 0.0004910579409152438, "loss": 0.2063, "step": 61940 }, { "epoch": 2.57, "grad_norm": 0.70703125, "learning_rate": 0.0004910550660548988, "loss": 0.1786, "step": 61950 }, { "epoch": 2.57, "grad_norm": 0.6953125, "learning_rate": 0.0004910521907409141, "loss": 0.2283, "step": 61960 }, { "epoch": 2.57, "grad_norm": 0.6328125, "learning_rate": 0.0004910493149732954, "loss": 0.2611, "step": 61970 }, { "epoch": 2.57, "grad_norm": 0.9296875, "learning_rate": 0.0004910464387520478, "loss": 0.2276, "step": 61980 }, { "epoch": 2.57, "grad_norm": 0.59375, "learning_rate": 0.0004910435620771768, "loss": 0.2134, "step": 61990 }, { "epoch": 2.57, "grad_norm": 0.76953125, "learning_rate": 0.0004910406849486879, "loss": 0.1933, "step": 62000 }, { "epoch": 2.57, "grad_norm": 1.0546875, "learning_rate": 0.0004910378073665864, "loss": 0.1609, "step": 62010 }, { "epoch": 2.57, "grad_norm": 0.61328125, "learning_rate": 0.0004910349293308779, "loss": 0.1911, "step": 62020 }, { "epoch": 2.57, "grad_norm": 0.6796875, "learning_rate": 0.0004910320508415677, "loss": 0.2387, "step": 62030 }, { "epoch": 2.57, "grad_norm": 0.9140625, "learning_rate": 0.000491029171898661, "loss": 0.1584, "step": 62040 }, { "epoch": 2.57, "grad_norm": 0.8125, "learning_rate": 0.0004910262925021636, "loss": 0.1855, "step": 62050 }, { "epoch": 2.57, "grad_norm": 0.82421875, "learning_rate": 0.0004910234126520807, "loss": 0.2447, "step": 62060 }, { "epoch": 2.57, "grad_norm": 1.1171875, "learning_rate": 0.0004910205323484178, "loss": 0.2262, "step": 62070 }, { "epoch": 2.57, "grad_norm": 0.54296875, "learning_rate": 0.0004910176515911803, "loss": 0.2319, "step": 62080 }, { "epoch": 2.57, "grad_norm": 0.69921875, "learning_rate": 0.0004910147703803735, "loss": 0.1908, "step": 62090 }, { "epoch": 2.57, "grad_norm": 0.5078125, "learning_rate": 0.000491011888716003, "loss": 0.1534, "step": 62100 }, { "epoch": 2.57, "grad_norm": 0.8984375, "learning_rate": 0.0004910090065980742, "loss": 0.2843, "step": 62110 }, { "epoch": 2.57, "grad_norm": 0.890625, "learning_rate": 0.0004910061240265924, "loss": 0.2084, "step": 62120 }, { "epoch": 2.57, "grad_norm": 0.400390625, "learning_rate": 0.0004910032410015632, "loss": 0.1613, "step": 62130 }, { "epoch": 2.57, "grad_norm": 0.78515625, "learning_rate": 0.0004910003575229918, "loss": 0.2279, "step": 62140 }, { "epoch": 2.57, "grad_norm": 0.95703125, "learning_rate": 0.0004909974735908838, "loss": 0.2486, "step": 62150 }, { "epoch": 2.57, "grad_norm": 0.875, "learning_rate": 0.0004909945892052445, "loss": 0.1541, "step": 62160 }, { "epoch": 2.58, "grad_norm": 0.54296875, "learning_rate": 0.0004909917043660795, "loss": 0.2431, "step": 62170 }, { "epoch": 2.58, "grad_norm": 0.4375, "learning_rate": 0.0004909888190733942, "loss": 0.2435, "step": 62180 }, { "epoch": 2.58, "grad_norm": 0.46875, "learning_rate": 0.0004909859333271938, "loss": 0.2469, "step": 62190 }, { "epoch": 2.58, "grad_norm": 2.109375, "learning_rate": 0.0004909830471274841, "loss": 0.2465, "step": 62200 }, { "epoch": 2.58, "grad_norm": 0.458984375, "learning_rate": 0.0004909801604742702, "loss": 0.2161, "step": 62210 }, { "epoch": 2.58, "grad_norm": 1.078125, "learning_rate": 0.0004909772733675577, "loss": 0.2008, "step": 62220 }, { "epoch": 2.58, "grad_norm": 0.6640625, "learning_rate": 0.000490974385807352, "loss": 0.228, "step": 62230 }, { "epoch": 2.58, "grad_norm": 0.6953125, "learning_rate": 0.0004909714977936585, "loss": 0.2033, "step": 62240 }, { "epoch": 2.58, "grad_norm": 0.0, "learning_rate": 0.0004909686093264827, "loss": 0.2119, "step": 62250 }, { "epoch": 2.58, "grad_norm": 1.125, "learning_rate": 0.0004909657204058299, "loss": 0.2095, "step": 62260 }, { "epoch": 2.58, "grad_norm": 1.3125, "learning_rate": 0.0004909628310317058, "loss": 0.2218, "step": 62270 }, { "epoch": 2.58, "grad_norm": 1.3203125, "learning_rate": 0.0004909599412041155, "loss": 0.2857, "step": 62280 }, { "epoch": 2.58, "grad_norm": 0.5546875, "learning_rate": 0.0004909570509230648, "loss": 0.251, "step": 62290 }, { "epoch": 2.58, "grad_norm": 0.76953125, "learning_rate": 0.0004909541601885587, "loss": 0.2294, "step": 62300 }, { "epoch": 2.58, "grad_norm": 0.625, "learning_rate": 0.0004909512690006031, "loss": 0.1956, "step": 62310 }, { "epoch": 2.58, "grad_norm": 1.1328125, "learning_rate": 0.0004909483773592032, "loss": 0.2396, "step": 62320 }, { "epoch": 2.58, "grad_norm": 0.302734375, "learning_rate": 0.0004909454852643644, "loss": 0.2105, "step": 62330 }, { "epoch": 2.58, "grad_norm": 0.58203125, "learning_rate": 0.0004909425927160923, "loss": 0.2308, "step": 62340 }, { "epoch": 2.58, "grad_norm": 0.33203125, "learning_rate": 0.0004909396997143922, "loss": 0.2032, "step": 62350 }, { "epoch": 2.58, "grad_norm": 0.953125, "learning_rate": 0.0004909368062592696, "loss": 0.227, "step": 62360 }, { "epoch": 2.58, "grad_norm": 0.5234375, "learning_rate": 0.00049093391235073, "loss": 0.1597, "step": 62370 }, { "epoch": 2.58, "grad_norm": 0.98828125, "learning_rate": 0.0004909310179887788, "loss": 0.1965, "step": 62380 }, { "epoch": 2.58, "grad_norm": 0.1708984375, "learning_rate": 0.0004909281231734214, "loss": 0.2364, "step": 62390 }, { "epoch": 2.58, "grad_norm": 1.390625, "learning_rate": 0.0004909252279046634, "loss": 0.175, "step": 62400 }, { "epoch": 2.59, "grad_norm": 0.6328125, "learning_rate": 0.0004909223321825099, "loss": 0.2075, "step": 62410 }, { "epoch": 2.59, "grad_norm": 0.6484375, "learning_rate": 0.0004909194360069668, "loss": 0.2667, "step": 62420 }, { "epoch": 2.59, "grad_norm": 0.7421875, "learning_rate": 0.0004909165393780393, "loss": 0.2515, "step": 62430 }, { "epoch": 2.59, "grad_norm": 0.68359375, "learning_rate": 0.0004909136422957328, "loss": 0.2621, "step": 62440 }, { "epoch": 2.59, "grad_norm": 2.328125, "learning_rate": 0.0004909107447600529, "loss": 0.2395, "step": 62450 }, { "epoch": 2.59, "grad_norm": 0.1884765625, "learning_rate": 0.0004909078467710049, "loss": 0.2733, "step": 62460 }, { "epoch": 2.59, "grad_norm": 0.5859375, "learning_rate": 0.0004909049483285946, "loss": 0.2062, "step": 62470 }, { "epoch": 2.59, "grad_norm": 1.2421875, "learning_rate": 0.0004909020494328271, "loss": 0.1689, "step": 62480 }, { "epoch": 2.59, "grad_norm": 1.65625, "learning_rate": 0.0004908991500837078, "loss": 0.2063, "step": 62490 }, { "epoch": 2.59, "grad_norm": 0.419921875, "learning_rate": 0.0004908962502812425, "loss": 0.2063, "step": 62500 }, { "epoch": 2.59, "grad_norm": 0.58984375, "learning_rate": 0.0004908933500254363, "loss": 0.2604, "step": 62510 }, { "epoch": 2.59, "grad_norm": 1.078125, "learning_rate": 0.0004908904493162949, "loss": 0.2397, "step": 62520 }, { "epoch": 2.59, "grad_norm": 1.03125, "learning_rate": 0.0004908875481538237, "loss": 0.2351, "step": 62530 }, { "epoch": 2.59, "grad_norm": 0.6171875, "learning_rate": 0.0004908846465380282, "loss": 0.1626, "step": 62540 }, { "epoch": 2.59, "grad_norm": 0.41796875, "learning_rate": 0.0004908817444689138, "loss": 0.2083, "step": 62550 }, { "epoch": 2.59, "grad_norm": 0.71875, "learning_rate": 0.0004908788419464859, "loss": 0.2062, "step": 62560 }, { "epoch": 2.59, "grad_norm": 0.51953125, "learning_rate": 0.00049087593897075, "loss": 0.2009, "step": 62570 }, { "epoch": 2.59, "grad_norm": 0.94140625, "learning_rate": 0.0004908730355417117, "loss": 0.2519, "step": 62580 }, { "epoch": 2.59, "grad_norm": 0.5390625, "learning_rate": 0.0004908701316593763, "loss": 0.2339, "step": 62590 }, { "epoch": 2.59, "grad_norm": 0.458984375, "learning_rate": 0.0004908672273237493, "loss": 0.1651, "step": 62600 }, { "epoch": 2.59, "grad_norm": 0.83984375, "learning_rate": 0.0004908643225348362, "loss": 0.216, "step": 62610 }, { "epoch": 2.59, "grad_norm": 0.62109375, "learning_rate": 0.0004908614172926426, "loss": 0.2664, "step": 62620 }, { "epoch": 2.59, "grad_norm": 0.94140625, "learning_rate": 0.0004908585115971737, "loss": 0.1787, "step": 62630 }, { "epoch": 2.59, "grad_norm": 1.0625, "learning_rate": 0.0004908556054484351, "loss": 0.2295, "step": 62640 }, { "epoch": 2.59, "grad_norm": 0.435546875, "learning_rate": 0.0004908526988464323, "loss": 0.2171, "step": 62650 }, { "epoch": 2.6, "grad_norm": 0.408203125, "learning_rate": 0.0004908497917911706, "loss": 0.2427, "step": 62660 }, { "epoch": 2.6, "grad_norm": 1.0390625, "learning_rate": 0.0004908468842826557, "loss": 0.1558, "step": 62670 }, { "epoch": 2.6, "grad_norm": 1.1484375, "learning_rate": 0.0004908439763208931, "loss": 0.1729, "step": 62680 }, { "epoch": 2.6, "grad_norm": 1.015625, "learning_rate": 0.0004908410679058879, "loss": 0.2673, "step": 62690 }, { "epoch": 2.6, "grad_norm": 0.41796875, "learning_rate": 0.000490838159037646, "loss": 0.1841, "step": 62700 }, { "epoch": 2.6, "grad_norm": 0.484375, "learning_rate": 0.0004908352497161726, "loss": 0.2279, "step": 62710 }, { "epoch": 2.6, "grad_norm": 0.53515625, "learning_rate": 0.0004908323399414733, "loss": 0.1633, "step": 62720 }, { "epoch": 2.6, "grad_norm": 1.21875, "learning_rate": 0.0004908294297135535, "loss": 0.2478, "step": 62730 }, { "epoch": 2.6, "grad_norm": 0.73828125, "learning_rate": 0.0004908265190324188, "loss": 0.217, "step": 62740 }, { "epoch": 2.6, "grad_norm": 0.4765625, "learning_rate": 0.0004908236078980746, "loss": 0.1685, "step": 62750 }, { "epoch": 2.6, "grad_norm": 0.62109375, "learning_rate": 0.0004908206963105263, "loss": 0.2233, "step": 62760 }, { "epoch": 2.6, "grad_norm": 0.5390625, "learning_rate": 0.0004908177842697795, "loss": 0.1983, "step": 62770 }, { "epoch": 2.6, "grad_norm": 0.59765625, "learning_rate": 0.0004908148717758396, "loss": 0.2106, "step": 62780 }, { "epoch": 2.6, "grad_norm": 0.515625, "learning_rate": 0.0004908119588287121, "loss": 0.1954, "step": 62790 }, { "epoch": 2.6, "grad_norm": 0.1572265625, "learning_rate": 0.0004908090454284026, "loss": 0.1737, "step": 62800 }, { "epoch": 2.6, "grad_norm": 0.55078125, "learning_rate": 0.0004908061315749164, "loss": 0.2143, "step": 62810 }, { "epoch": 2.6, "grad_norm": 0.5, "learning_rate": 0.0004908032172682592, "loss": 0.2289, "step": 62820 }, { "epoch": 2.6, "grad_norm": 0.5859375, "learning_rate": 0.0004908003025084362, "loss": 0.2729, "step": 62830 }, { "epoch": 2.6, "grad_norm": 0.57421875, "learning_rate": 0.000490797387295453, "loss": 0.2458, "step": 62840 }, { "epoch": 2.6, "grad_norm": 0.48828125, "learning_rate": 0.0004907944716293153, "loss": 0.2012, "step": 62850 }, { "epoch": 2.6, "grad_norm": 0.69140625, "learning_rate": 0.0004907915555100282, "loss": 0.1453, "step": 62860 }, { "epoch": 2.6, "grad_norm": 1.1484375, "learning_rate": 0.0004907886389375975, "loss": 0.2141, "step": 62870 }, { "epoch": 2.6, "grad_norm": 0.52734375, "learning_rate": 0.0004907857219120286, "loss": 0.2729, "step": 62880 }, { "epoch": 2.6, "grad_norm": 0.5703125, "learning_rate": 0.000490782804433327, "loss": 0.2458, "step": 62890 }, { "epoch": 2.61, "grad_norm": 0.78125, "learning_rate": 0.0004907798865014981, "loss": 0.1578, "step": 62900 }, { "epoch": 2.61, "grad_norm": 1.9921875, "learning_rate": 0.0004907769681165475, "loss": 0.2324, "step": 62910 }, { "epoch": 2.61, "grad_norm": 1.2578125, "learning_rate": 0.0004907740492784805, "loss": 0.2598, "step": 62920 }, { "epoch": 2.61, "grad_norm": 0.62890625, "learning_rate": 0.000490771129987303, "loss": 0.2475, "step": 62930 }, { "epoch": 2.61, "grad_norm": 0.7578125, "learning_rate": 0.0004907682102430201, "loss": 0.1894, "step": 62940 }, { "epoch": 2.61, "grad_norm": 0.65234375, "learning_rate": 0.0004907652900456375, "loss": 0.2224, "step": 62950 }, { "epoch": 2.61, "grad_norm": 0.494140625, "learning_rate": 0.0004907623693951605, "loss": 0.2297, "step": 62960 }, { "epoch": 2.61, "grad_norm": 1.6171875, "learning_rate": 0.0004907594482915947, "loss": 0.2009, "step": 62970 }, { "epoch": 2.61, "grad_norm": 0.73828125, "learning_rate": 0.0004907565267349458, "loss": 0.2164, "step": 62980 }, { "epoch": 2.61, "grad_norm": 1.3515625, "learning_rate": 0.0004907536047252189, "loss": 0.217, "step": 62990 }, { "epoch": 2.61, "grad_norm": 0.2451171875, "learning_rate": 0.0004907506822624198, "loss": 0.2261, "step": 63000 }, { "epoch": 2.61, "grad_norm": 1.7265625, "learning_rate": 0.000490747759346554, "loss": 0.2278, "step": 63010 }, { "epoch": 2.61, "grad_norm": 0.58203125, "learning_rate": 0.0004907448359776268, "loss": 0.2465, "step": 63020 }, { "epoch": 2.61, "grad_norm": 0.85546875, "learning_rate": 0.0004907419121556439, "loss": 0.2233, "step": 63030 }, { "epoch": 2.61, "grad_norm": 0.64453125, "learning_rate": 0.0004907389878806107, "loss": 0.2724, "step": 63040 }, { "epoch": 2.61, "grad_norm": 1.0234375, "learning_rate": 0.0004907360631525328, "loss": 0.214, "step": 63050 }, { "epoch": 2.61, "grad_norm": 0.90234375, "learning_rate": 0.0004907331379714155, "loss": 0.2144, "step": 63060 }, { "epoch": 2.61, "grad_norm": 0.6328125, "learning_rate": 0.0004907302123372644, "loss": 0.2165, "step": 63070 }, { "epoch": 2.61, "grad_norm": 0.67578125, "learning_rate": 0.0004907272862500851, "loss": 0.174, "step": 63080 }, { "epoch": 2.61, "grad_norm": 0.87890625, "learning_rate": 0.0004907243597098831, "loss": 0.2097, "step": 63090 }, { "epoch": 2.61, "grad_norm": 0.921875, "learning_rate": 0.0004907214327166638, "loss": 0.2439, "step": 63100 }, { "epoch": 2.61, "grad_norm": 0.7265625, "learning_rate": 0.0004907185052704327, "loss": 0.2127, "step": 63110 }, { "epoch": 2.61, "grad_norm": 0.640625, "learning_rate": 0.0004907155773711955, "loss": 0.1855, "step": 63120 }, { "epoch": 2.61, "grad_norm": 0.8203125, "learning_rate": 0.0004907126490189575, "loss": 0.2715, "step": 63130 }, { "epoch": 2.62, "grad_norm": 1.125, "learning_rate": 0.0004907097202137243, "loss": 0.2287, "step": 63140 }, { "epoch": 2.62, "grad_norm": 0.9921875, "learning_rate": 0.0004907067909555014, "loss": 0.3, "step": 63150 }, { "epoch": 2.62, "grad_norm": 0.6015625, "learning_rate": 0.0004907038612442943, "loss": 0.2183, "step": 63160 }, { "epoch": 2.62, "grad_norm": 0.67578125, "learning_rate": 0.0004907009310801085, "loss": 0.2436, "step": 63170 }, { "epoch": 2.62, "grad_norm": 0.54296875, "learning_rate": 0.0004906980004629496, "loss": 0.1849, "step": 63180 }, { "epoch": 2.62, "grad_norm": 0.46875, "learning_rate": 0.0004906950693928231, "loss": 0.205, "step": 63190 }, { "epoch": 2.62, "grad_norm": 0.27734375, "learning_rate": 0.0004906921378697343, "loss": 0.1752, "step": 63200 }, { "epoch": 2.62, "grad_norm": 0.46875, "learning_rate": 0.000490689205893689, "loss": 0.2172, "step": 63210 }, { "epoch": 2.62, "grad_norm": 1.515625, "learning_rate": 0.0004906862734646927, "loss": 0.2893, "step": 63220 }, { "epoch": 2.62, "grad_norm": 0.85546875, "learning_rate": 0.0004906833405827507, "loss": 0.195, "step": 63230 }, { "epoch": 2.62, "grad_norm": 0.51171875, "learning_rate": 0.0004906804072478686, "loss": 0.2369, "step": 63240 }, { "epoch": 2.62, "grad_norm": 0.443359375, "learning_rate": 0.0004906774734600521, "loss": 0.1905, "step": 63250 }, { "epoch": 2.62, "grad_norm": 3.296875, "learning_rate": 0.0004906745392193065, "loss": 0.2149, "step": 63260 }, { "epoch": 2.62, "grad_norm": 0.52734375, "learning_rate": 0.0004906716045256374, "loss": 0.2138, "step": 63270 }, { "epoch": 2.62, "grad_norm": 0.435546875, "learning_rate": 0.0004906686693790504, "loss": 0.1714, "step": 63280 }, { "epoch": 2.62, "grad_norm": 1.515625, "learning_rate": 0.000490665733779551, "loss": 0.2053, "step": 63290 }, { "epoch": 2.62, "grad_norm": 0.59375, "learning_rate": 0.0004906627977271445, "loss": 0.2, "step": 63300 }, { "epoch": 2.62, "grad_norm": 0.369140625, "learning_rate": 0.0004906598612218367, "loss": 0.1564, "step": 63310 }, { "epoch": 2.62, "grad_norm": 0.38671875, "learning_rate": 0.0004906569242636331, "loss": 0.2284, "step": 63320 }, { "epoch": 2.62, "grad_norm": 0.85546875, "learning_rate": 0.000490653986852539, "loss": 0.2079, "step": 63330 }, { "epoch": 2.62, "grad_norm": 0.4296875, "learning_rate": 0.0004906510489885602, "loss": 0.2003, "step": 63340 }, { "epoch": 2.62, "grad_norm": 0.75390625, "learning_rate": 0.0004906481106717022, "loss": 0.239, "step": 63350 }, { "epoch": 2.62, "grad_norm": 0.8125, "learning_rate": 0.0004906451719019703, "loss": 0.238, "step": 63360 }, { "epoch": 2.62, "grad_norm": 0.8125, "learning_rate": 0.0004906422326793701, "loss": 0.229, "step": 63370 }, { "epoch": 2.63, "grad_norm": 0.94921875, "learning_rate": 0.0004906392930039073, "loss": 0.1833, "step": 63380 }, { "epoch": 2.63, "grad_norm": 0.95703125, "learning_rate": 0.0004906363528755874, "loss": 0.2206, "step": 63390 }, { "epoch": 2.63, "grad_norm": 0.44921875, "learning_rate": 0.0004906334122944158, "loss": 0.1993, "step": 63400 }, { "epoch": 2.63, "grad_norm": 1.015625, "learning_rate": 0.0004906304712603981, "loss": 0.2756, "step": 63410 }, { "epoch": 2.63, "grad_norm": 0.75, "learning_rate": 0.0004906275297735399, "loss": 0.1887, "step": 63420 }, { "epoch": 2.63, "grad_norm": 1.1328125, "learning_rate": 0.0004906245878338466, "loss": 0.2118, "step": 63430 }, { "epoch": 2.63, "grad_norm": 1.0546875, "learning_rate": 0.0004906216454413239, "loss": 0.2344, "step": 63440 }, { "epoch": 2.63, "grad_norm": 0.263671875, "learning_rate": 0.0004906187025959772, "loss": 0.2221, "step": 63450 }, { "epoch": 2.63, "grad_norm": 0.65234375, "learning_rate": 0.000490615759297812, "loss": 0.2256, "step": 63460 }, { "epoch": 2.63, "grad_norm": 1.4140625, "learning_rate": 0.000490612815546834, "loss": 0.2225, "step": 63470 }, { "epoch": 2.63, "grad_norm": 0.5703125, "learning_rate": 0.0004906098713430486, "loss": 0.2295, "step": 63480 }, { "epoch": 2.63, "grad_norm": 1.7109375, "learning_rate": 0.0004906069266864616, "loss": 0.1966, "step": 63490 }, { "epoch": 2.63, "grad_norm": 0.66796875, "learning_rate": 0.0004906039815770782, "loss": 0.1744, "step": 63500 }, { "epoch": 2.63, "grad_norm": 0.9375, "learning_rate": 0.0004906010360149042, "loss": 0.2185, "step": 63510 }, { "epoch": 2.63, "grad_norm": 0.4609375, "learning_rate": 0.0004905980899999449, "loss": 0.1782, "step": 63520 }, { "epoch": 2.63, "grad_norm": 0.353515625, "learning_rate": 0.000490595143532206, "loss": 0.2356, "step": 63530 }, { "epoch": 2.63, "grad_norm": 0.361328125, "learning_rate": 0.000490592196611693, "loss": 0.2173, "step": 63540 }, { "epoch": 2.63, "grad_norm": 0.421875, "learning_rate": 0.0004905892492384115, "loss": 0.1999, "step": 63550 }, { "epoch": 2.63, "grad_norm": 0.373046875, "learning_rate": 0.0004905863014123671, "loss": 0.2545, "step": 63560 }, { "epoch": 2.63, "grad_norm": 1.171875, "learning_rate": 0.0004905833531335652, "loss": 0.2289, "step": 63570 }, { "epoch": 2.63, "grad_norm": 0.447265625, "learning_rate": 0.0004905804044020113, "loss": 0.2764, "step": 63580 }, { "epoch": 2.63, "grad_norm": 0.5859375, "learning_rate": 0.0004905774552177113, "loss": 0.1965, "step": 63590 }, { "epoch": 2.63, "grad_norm": 0.87109375, "learning_rate": 0.0004905745055806703, "loss": 0.2233, "step": 63600 }, { "epoch": 2.63, "grad_norm": 0.58203125, "learning_rate": 0.0004905715554908941, "loss": 0.1802, "step": 63610 }, { "epoch": 2.64, "grad_norm": 0.4375, "learning_rate": 0.0004905686049483883, "loss": 0.2223, "step": 63620 }, { "epoch": 2.64, "grad_norm": 0.640625, "learning_rate": 0.0004905656539531582, "loss": 0.3025, "step": 63630 }, { "epoch": 2.64, "grad_norm": 0.423828125, "learning_rate": 0.0004905627025052097, "loss": 0.2618, "step": 63640 }, { "epoch": 2.64, "grad_norm": 0.2138671875, "learning_rate": 0.000490559750604548, "loss": 0.1626, "step": 63650 }, { "epoch": 2.64, "grad_norm": 0.55078125, "learning_rate": 0.000490556798251179, "loss": 0.1787, "step": 63660 }, { "epoch": 2.64, "grad_norm": 1.1484375, "learning_rate": 0.0004905538454451079, "loss": 0.1989, "step": 63670 }, { "epoch": 2.64, "grad_norm": 1.0, "learning_rate": 0.0004905508921863406, "loss": 0.2063, "step": 63680 }, { "epoch": 2.64, "grad_norm": 0.494140625, "learning_rate": 0.0004905479384748825, "loss": 0.2084, "step": 63690 }, { "epoch": 2.64, "grad_norm": 0.4765625, "learning_rate": 0.000490544984310739, "loss": 0.1991, "step": 63700 }, { "epoch": 2.64, "grad_norm": 0.55078125, "learning_rate": 0.000490542029693916, "loss": 0.196, "step": 63710 }, { "epoch": 2.64, "grad_norm": 0.65625, "learning_rate": 0.0004905390746244189, "loss": 0.2449, "step": 63720 }, { "epoch": 2.64, "grad_norm": 0.427734375, "learning_rate": 0.0004905361191022531, "loss": 0.2102, "step": 63730 }, { "epoch": 2.64, "grad_norm": 0.64453125, "learning_rate": 0.0004905331631274242, "loss": 0.2091, "step": 63740 }, { "epoch": 2.64, "grad_norm": 0.310546875, "learning_rate": 0.0004905302066999381, "loss": 0.2348, "step": 63750 }, { "epoch": 2.64, "grad_norm": 0.75390625, "learning_rate": 0.0004905272498198, "loss": 0.1697, "step": 63760 }, { "epoch": 2.64, "grad_norm": 0.4765625, "learning_rate": 0.0004905242924870156, "loss": 0.2552, "step": 63770 }, { "epoch": 2.64, "grad_norm": 0.357421875, "learning_rate": 0.0004905213347015904, "loss": 0.223, "step": 63780 }, { "epoch": 2.64, "grad_norm": 0.5078125, "learning_rate": 0.0004905183764635302, "loss": 0.1712, "step": 63790 }, { "epoch": 2.64, "grad_norm": 0.546875, "learning_rate": 0.0004905154177728402, "loss": 0.2183, "step": 63800 }, { "epoch": 2.64, "grad_norm": 1.5, "learning_rate": 0.0004905124586295262, "loss": 0.2101, "step": 63810 }, { "epoch": 2.64, "grad_norm": 0.296875, "learning_rate": 0.0004905094990335937, "loss": 0.2152, "step": 63820 }, { "epoch": 2.64, "grad_norm": 0.58203125, "learning_rate": 0.0004905065389850485, "loss": 0.2107, "step": 63830 }, { "epoch": 2.64, "grad_norm": 0.79296875, "learning_rate": 0.0004905035784838958, "loss": 0.2416, "step": 63840 }, { "epoch": 2.64, "grad_norm": 1.6953125, "learning_rate": 0.0004905006175301413, "loss": 0.1859, "step": 63850 }, { "epoch": 2.65, "grad_norm": 0.349609375, "learning_rate": 0.0004904976561237907, "loss": 0.2065, "step": 63860 }, { "epoch": 2.65, "grad_norm": 2.609375, "learning_rate": 0.0004904946942648495, "loss": 0.2588, "step": 63870 }, { "epoch": 2.65, "grad_norm": 0.50390625, "learning_rate": 0.0004904917319533232, "loss": 0.2069, "step": 63880 }, { "epoch": 2.65, "grad_norm": 0.453125, "learning_rate": 0.0004904887691892174, "loss": 0.1566, "step": 63890 }, { "epoch": 2.65, "grad_norm": 0.0, "learning_rate": 0.0004904858059725378, "loss": 0.1738, "step": 63900 }, { "epoch": 2.65, "grad_norm": 0.703125, "learning_rate": 0.0004904828423032898, "loss": 0.2273, "step": 63910 }, { "epoch": 2.65, "grad_norm": 0.37890625, "learning_rate": 0.0004904798781814791, "loss": 0.2195, "step": 63920 }, { "epoch": 2.65, "grad_norm": 1.375, "learning_rate": 0.0004904769136071112, "loss": 0.2235, "step": 63930 }, { "epoch": 2.65, "grad_norm": 0.30859375, "learning_rate": 0.0004904739485801918, "loss": 0.2004, "step": 63940 }, { "epoch": 2.65, "grad_norm": 0.6171875, "learning_rate": 0.0004904709831007263, "loss": 0.2121, "step": 63950 }, { "epoch": 2.65, "grad_norm": 0.337890625, "learning_rate": 0.0004904680171687204, "loss": 0.2186, "step": 63960 }, { "epoch": 2.65, "grad_norm": 1.28125, "learning_rate": 0.0004904650507841796, "loss": 0.1909, "step": 63970 }, { "epoch": 2.65, "grad_norm": 0.59375, "learning_rate": 0.0004904620839471097, "loss": 0.2089, "step": 63980 }, { "epoch": 2.65, "grad_norm": 0.51953125, "learning_rate": 0.0004904591166575161, "loss": 0.2741, "step": 63990 }, { "epoch": 2.65, "grad_norm": 0.546875, "learning_rate": 0.0004904561489154043, "loss": 0.259, "step": 64000 }, { "epoch": 2.65, "grad_norm": 0.546875, "learning_rate": 0.0004904531807207801, "loss": 0.1877, "step": 64010 }, { "epoch": 2.65, "grad_norm": 0.58984375, "learning_rate": 0.0004904502120736489, "loss": 0.1594, "step": 64020 }, { "epoch": 2.65, "grad_norm": 0.404296875, "learning_rate": 0.0004904472429740164, "loss": 0.215, "step": 64030 }, { "epoch": 2.65, "grad_norm": 0.50390625, "learning_rate": 0.0004904442734218881, "loss": 0.2322, "step": 64040 }, { "epoch": 2.65, "grad_norm": 0.34765625, "learning_rate": 0.0004904413034172698, "loss": 0.1667, "step": 64050 }, { "epoch": 2.65, "grad_norm": 0.64453125, "learning_rate": 0.0004904383329601669, "loss": 0.1841, "step": 64060 }, { "epoch": 2.65, "grad_norm": 0.71484375, "learning_rate": 0.0004904353620505848, "loss": 0.2318, "step": 64070 }, { "epoch": 2.65, "grad_norm": 0.44140625, "learning_rate": 0.0004904323906885296, "loss": 0.197, "step": 64080 }, { "epoch": 2.65, "grad_norm": 0.34765625, "learning_rate": 0.0004904294188740064, "loss": 0.2013, "step": 64090 }, { "epoch": 2.66, "grad_norm": 0.46484375, "learning_rate": 0.0004904264466070211, "loss": 0.2123, "step": 64100 }, { "epoch": 2.66, "grad_norm": 0.55859375, "learning_rate": 0.0004904234738875792, "loss": 0.2351, "step": 64110 }, { "epoch": 2.66, "grad_norm": 0.53125, "learning_rate": 0.0004904205007156862, "loss": 0.1884, "step": 64120 }, { "epoch": 2.66, "grad_norm": 0.65625, "learning_rate": 0.0004904175270913478, "loss": 0.208, "step": 64130 }, { "epoch": 2.66, "grad_norm": 0.83984375, "learning_rate": 0.0004904145530145696, "loss": 0.218, "step": 64140 }, { "epoch": 2.66, "grad_norm": 1.0, "learning_rate": 0.0004904115784853572, "loss": 0.1754, "step": 64150 }, { "epoch": 2.66, "grad_norm": 0.62890625, "learning_rate": 0.0004904086035037162, "loss": 0.1666, "step": 64160 }, { "epoch": 2.66, "grad_norm": 1.8203125, "learning_rate": 0.000490405628069652, "loss": 0.2314, "step": 64170 }, { "epoch": 2.66, "grad_norm": 0.484375, "learning_rate": 0.0004904026521831706, "loss": 0.1955, "step": 64180 }, { "epoch": 2.66, "grad_norm": 0.271484375, "learning_rate": 0.0004903996758442772, "loss": 0.2254, "step": 64190 }, { "epoch": 2.66, "grad_norm": 0.55859375, "learning_rate": 0.0004903966990529777, "loss": 0.2567, "step": 64200 }, { "epoch": 2.66, "grad_norm": 1.0390625, "learning_rate": 0.0004903937218092775, "loss": 0.214, "step": 64210 }, { "epoch": 2.66, "grad_norm": 0.52734375, "learning_rate": 0.0004903907441131823, "loss": 0.2378, "step": 64220 }, { "epoch": 2.66, "grad_norm": 2.1875, "learning_rate": 0.0004903877659646976, "loss": 0.2494, "step": 64230 }, { "epoch": 2.66, "grad_norm": 0.66796875, "learning_rate": 0.0004903847873638292, "loss": 0.1876, "step": 64240 }, { "epoch": 2.66, "grad_norm": 1.53125, "learning_rate": 0.0004903818083105825, "loss": 0.2505, "step": 64250 }, { "epoch": 2.66, "grad_norm": 0.55859375, "learning_rate": 0.0004903788288049632, "loss": 0.2318, "step": 64260 }, { "epoch": 2.66, "grad_norm": 0.5703125, "learning_rate": 0.0004903758488469769, "loss": 0.224, "step": 64270 }, { "epoch": 2.66, "grad_norm": 0.765625, "learning_rate": 0.0004903728684366293, "loss": 0.2559, "step": 64280 }, { "epoch": 2.66, "grad_norm": 0.53125, "learning_rate": 0.0004903698875739259, "loss": 0.2481, "step": 64290 }, { "epoch": 2.66, "grad_norm": 0.91015625, "learning_rate": 0.0004903669062588723, "loss": 0.1993, "step": 64300 }, { "epoch": 2.66, "grad_norm": 0.392578125, "learning_rate": 0.0004903639244914741, "loss": 0.2821, "step": 64310 }, { "epoch": 2.66, "grad_norm": 0.64453125, "learning_rate": 0.000490360942271737, "loss": 0.1934, "step": 64320 }, { "epoch": 2.66, "grad_norm": 0.59765625, "learning_rate": 0.0004903579595996665, "loss": 0.2139, "step": 64330 }, { "epoch": 2.66, "grad_norm": 0.796875, "learning_rate": 0.0004903549764752684, "loss": 0.1683, "step": 64340 }, { "epoch": 2.67, "grad_norm": 0.59375, "learning_rate": 0.0004903519928985481, "loss": 0.2297, "step": 64350 }, { "epoch": 2.67, "grad_norm": 1.1171875, "learning_rate": 0.0004903490088695113, "loss": 0.2029, "step": 64360 }, { "epoch": 2.67, "grad_norm": 0.78125, "learning_rate": 0.0004903460243881637, "loss": 0.2715, "step": 64370 }, { "epoch": 2.67, "grad_norm": 0.5703125, "learning_rate": 0.0004903430394545107, "loss": 0.2539, "step": 64380 }, { "epoch": 2.67, "grad_norm": 0.40234375, "learning_rate": 0.0004903400540685581, "loss": 0.2183, "step": 64390 }, { "epoch": 2.67, "grad_norm": 0.4296875, "learning_rate": 0.0004903370682303116, "loss": 0.2018, "step": 64400 }, { "epoch": 2.67, "grad_norm": 0.6953125, "learning_rate": 0.0004903340819397766, "loss": 0.2567, "step": 64410 }, { "epoch": 2.67, "grad_norm": 1.2265625, "learning_rate": 0.0004903310951969587, "loss": 0.2563, "step": 64420 }, { "epoch": 2.67, "grad_norm": 0.322265625, "learning_rate": 0.0004903281080018638, "loss": 0.1964, "step": 64430 }, { "epoch": 2.67, "grad_norm": 0.9296875, "learning_rate": 0.0004903251203544973, "loss": 0.2237, "step": 64440 }, { "epoch": 2.67, "grad_norm": 0.71484375, "learning_rate": 0.0004903221322548649, "loss": 0.2006, "step": 64450 }, { "epoch": 2.67, "grad_norm": 1.234375, "learning_rate": 0.0004903191437029722, "loss": 0.2458, "step": 64460 }, { "epoch": 2.67, "grad_norm": 0.447265625, "learning_rate": 0.0004903161546988247, "loss": 0.2303, "step": 64470 }, { "epoch": 2.67, "grad_norm": 1.015625, "learning_rate": 0.0004903131652424283, "loss": 0.2703, "step": 64480 }, { "epoch": 2.67, "grad_norm": 1.9453125, "learning_rate": 0.0004903101753337884, "loss": 0.2578, "step": 64490 }, { "epoch": 2.67, "grad_norm": 0.828125, "learning_rate": 0.0004903071849729107, "loss": 0.2736, "step": 64500 }, { "epoch": 2.67, "grad_norm": 0.40234375, "learning_rate": 0.0004903041941598009, "loss": 0.2046, "step": 64510 }, { "epoch": 2.67, "grad_norm": 0.7578125, "learning_rate": 0.0004903012028944644, "loss": 0.2053, "step": 64520 }, { "epoch": 2.67, "grad_norm": 0.392578125, "learning_rate": 0.0004902982111769071, "loss": 0.1671, "step": 64530 }, { "epoch": 2.67, "grad_norm": 0.55859375, "learning_rate": 0.0004902952190071346, "loss": 0.216, "step": 64540 }, { "epoch": 2.67, "grad_norm": 1.234375, "learning_rate": 0.0004902922263851523, "loss": 0.2614, "step": 64550 }, { "epoch": 2.67, "grad_norm": 1.1328125, "learning_rate": 0.000490289233310966, "loss": 0.2192, "step": 64560 }, { "epoch": 2.67, "grad_norm": 0.5703125, "learning_rate": 0.0004902862397845814, "loss": 0.2119, "step": 64570 }, { "epoch": 2.67, "grad_norm": 0.75390625, "learning_rate": 0.000490283245806004, "loss": 0.2298, "step": 64580 }, { "epoch": 2.68, "grad_norm": 0.46484375, "learning_rate": 0.0004902802513752395, "loss": 0.1971, "step": 64590 }, { "epoch": 2.68, "grad_norm": 0.455078125, "learning_rate": 0.0004902772564922935, "loss": 0.2801, "step": 64600 }, { "epoch": 2.68, "grad_norm": 0.77734375, "learning_rate": 0.0004902742611571716, "loss": 0.2325, "step": 64610 }, { "epoch": 2.68, "grad_norm": 0.8828125, "learning_rate": 0.0004902712653698795, "loss": 0.1869, "step": 64620 }, { "epoch": 2.68, "grad_norm": 1.1484375, "learning_rate": 0.0004902682691304229, "loss": 0.2086, "step": 64630 }, { "epoch": 2.68, "grad_norm": 0.67578125, "learning_rate": 0.0004902652724388072, "loss": 0.2255, "step": 64640 }, { "epoch": 2.68, "grad_norm": 0.349609375, "learning_rate": 0.0004902622752950384, "loss": 0.1838, "step": 64650 }, { "epoch": 2.68, "grad_norm": 0.734375, "learning_rate": 0.0004902592776991218, "loss": 0.2266, "step": 64660 }, { "epoch": 2.68, "grad_norm": 1.0, "learning_rate": 0.0004902562796510633, "loss": 0.2572, "step": 64670 }, { "epoch": 2.68, "grad_norm": 0.6328125, "learning_rate": 0.0004902532811508684, "loss": 0.2209, "step": 64680 }, { "epoch": 2.68, "grad_norm": 0.640625, "learning_rate": 0.0004902502821985428, "loss": 0.2473, "step": 64690 }, { "epoch": 2.68, "grad_norm": 0.359375, "learning_rate": 0.0004902472827940919, "loss": 0.2338, "step": 64700 }, { "epoch": 2.68, "grad_norm": 0.498046875, "learning_rate": 0.0004902442829375218, "loss": 0.1977, "step": 64710 }, { "epoch": 2.68, "grad_norm": 0.40234375, "learning_rate": 0.0004902412826288379, "loss": 0.2265, "step": 64720 }, { "epoch": 2.68, "grad_norm": 1.3515625, "learning_rate": 0.0004902382818680457, "loss": 0.1671, "step": 64730 }, { "epoch": 2.68, "grad_norm": 0.259765625, "learning_rate": 0.000490235280655151, "loss": 0.1753, "step": 64740 }, { "epoch": 2.68, "grad_norm": 0.50390625, "learning_rate": 0.0004902322789901596, "loss": 0.2168, "step": 64750 }, { "epoch": 2.68, "grad_norm": 1.6484375, "learning_rate": 0.0004902292768730769, "loss": 0.1741, "step": 64760 }, { "epoch": 2.68, "grad_norm": 1.03125, "learning_rate": 0.0004902262743039087, "loss": 0.2119, "step": 64770 }, { "epoch": 2.68, "grad_norm": 0.54296875, "learning_rate": 0.0004902232712826604, "loss": 0.2401, "step": 64780 }, { "epoch": 2.68, "grad_norm": 0.7421875, "learning_rate": 0.000490220267809338, "loss": 0.2342, "step": 64790 }, { "epoch": 2.68, "grad_norm": 0.58203125, "learning_rate": 0.0004902172638839471, "loss": 0.2272, "step": 64800 }, { "epoch": 2.68, "grad_norm": 0.451171875, "learning_rate": 0.0004902142595064931, "loss": 0.2024, "step": 64810 }, { "epoch": 2.68, "grad_norm": 0.5546875, "learning_rate": 0.0004902112546769818, "loss": 0.2283, "step": 64820 }, { "epoch": 2.69, "grad_norm": 0.380859375, "learning_rate": 0.0004902082493954189, "loss": 0.1925, "step": 64830 }, { "epoch": 2.69, "grad_norm": 0.89453125, "learning_rate": 0.00049020524366181, "loss": 0.2521, "step": 64840 }, { "epoch": 2.69, "grad_norm": 1.3203125, "learning_rate": 0.0004902022374761608, "loss": 0.2642, "step": 64850 }, { "epoch": 2.69, "grad_norm": 1.5390625, "learning_rate": 0.000490199230838477, "loss": 0.2602, "step": 64860 }, { "epoch": 2.69, "grad_norm": 1.1640625, "learning_rate": 0.000490196223748764, "loss": 0.1566, "step": 64870 }, { "epoch": 2.69, "grad_norm": 0.59765625, "learning_rate": 0.0004901932162070277, "loss": 0.2391, "step": 64880 }, { "epoch": 2.69, "grad_norm": 0.69921875, "learning_rate": 0.0004901902082132738, "loss": 0.1822, "step": 64890 }, { "epoch": 2.69, "grad_norm": 0.703125, "learning_rate": 0.0004901871997675079, "loss": 0.2836, "step": 64900 }, { "epoch": 2.69, "grad_norm": 0.80078125, "learning_rate": 0.0004901841908697356, "loss": 0.2073, "step": 64910 }, { "epoch": 2.69, "grad_norm": 0.65234375, "learning_rate": 0.0004901811815199625, "loss": 0.211, "step": 64920 }, { "epoch": 2.69, "grad_norm": 1.25, "learning_rate": 0.0004901781717181943, "loss": 0.2685, "step": 64930 }, { "epoch": 2.69, "grad_norm": 1.5703125, "learning_rate": 0.0004901751614644369, "loss": 0.1822, "step": 64940 }, { "epoch": 2.69, "grad_norm": 0.859375, "learning_rate": 0.0004901721507586957, "loss": 0.2542, "step": 64950 }, { "epoch": 2.69, "grad_norm": 0.4140625, "learning_rate": 0.0004901691396009763, "loss": 0.1606, "step": 64960 }, { "epoch": 2.69, "grad_norm": 0.546875, "learning_rate": 0.0004901661279912848, "loss": 0.2619, "step": 64970 }, { "epoch": 2.69, "grad_norm": 0.26953125, "learning_rate": 0.0004901631159296264, "loss": 0.1958, "step": 64980 }, { "epoch": 2.69, "grad_norm": 1.2265625, "learning_rate": 0.000490160103416007, "loss": 0.2038, "step": 64990 }, { "epoch": 2.69, "grad_norm": 1.0703125, "learning_rate": 0.0004901570904504323, "loss": 0.222, "step": 65000 }, { "epoch": 2.69, "grad_norm": 0.396484375, "learning_rate": 0.0004901540770329077, "loss": 0.2372, "step": 65010 }, { "epoch": 2.69, "grad_norm": 1.203125, "learning_rate": 0.0004901510631634392, "loss": 0.2675, "step": 65020 }, { "epoch": 2.69, "grad_norm": 0.9296875, "learning_rate": 0.0004901480488420322, "loss": 0.2547, "step": 65030 }, { "epoch": 2.69, "grad_norm": 0.6875, "learning_rate": 0.0004901450340686926, "loss": 0.2184, "step": 65040 }, { "epoch": 2.69, "grad_norm": 0.78515625, "learning_rate": 0.000490142018843426, "loss": 0.2603, "step": 65050 }, { "epoch": 2.69, "grad_norm": 0.5234375, "learning_rate": 0.000490139003166238, "loss": 0.2769, "step": 65060 }, { "epoch": 2.7, "grad_norm": 0.640625, "learning_rate": 0.0004901359870371344, "loss": 0.1893, "step": 65070 }, { "epoch": 2.7, "grad_norm": 0.2578125, "learning_rate": 0.0004901329704561208, "loss": 0.1923, "step": 65080 }, { "epoch": 2.7, "grad_norm": 1.0859375, "learning_rate": 0.0004901299534232029, "loss": 0.2026, "step": 65090 }, { "epoch": 2.7, "grad_norm": 1.109375, "learning_rate": 0.0004901269359383863, "loss": 0.24, "step": 65100 }, { "epoch": 2.7, "grad_norm": 0.291015625, "learning_rate": 0.0004901239180016767, "loss": 0.1785, "step": 65110 }, { "epoch": 2.7, "grad_norm": 0.443359375, "learning_rate": 0.0004901208996130799, "loss": 0.231, "step": 65120 }, { "epoch": 2.7, "grad_norm": 0.79296875, "learning_rate": 0.0004901178807726014, "loss": 0.1925, "step": 65130 }, { "epoch": 2.7, "grad_norm": 0.92578125, "learning_rate": 0.0004901148614802471, "loss": 0.2177, "step": 65140 }, { "epoch": 2.7, "grad_norm": 0.68359375, "learning_rate": 0.0004901118417360225, "loss": 0.2737, "step": 65150 }, { "epoch": 2.7, "grad_norm": 0.76171875, "learning_rate": 0.0004901088215399333, "loss": 0.2241, "step": 65160 }, { "epoch": 2.7, "grad_norm": 0.578125, "learning_rate": 0.0004901058008919853, "loss": 0.1877, "step": 65170 }, { "epoch": 2.7, "grad_norm": 0.5546875, "learning_rate": 0.0004901027797921841, "loss": 0.2258, "step": 65180 }, { "epoch": 2.7, "grad_norm": 1.0234375, "learning_rate": 0.0004900997582405354, "loss": 0.2045, "step": 65190 }, { "epoch": 2.7, "grad_norm": 1.34375, "learning_rate": 0.0004900967362370448, "loss": 0.2667, "step": 65200 }, { "epoch": 2.7, "grad_norm": 0.2080078125, "learning_rate": 0.0004900937137817182, "loss": 0.2035, "step": 65210 }, { "epoch": 2.7, "grad_norm": 0.890625, "learning_rate": 0.000490090690874561, "loss": 0.1858, "step": 65220 }, { "epoch": 2.7, "grad_norm": 0.91796875, "learning_rate": 0.0004900876675155792, "loss": 0.1278, "step": 65230 }, { "epoch": 2.7, "grad_norm": 1.609375, "learning_rate": 0.0004900846437047783, "loss": 0.2188, "step": 65240 }, { "epoch": 2.7, "grad_norm": 0.82421875, "learning_rate": 0.000490081619442164, "loss": 0.2323, "step": 65250 }, { "epoch": 2.7, "grad_norm": 0.81640625, "learning_rate": 0.000490078594727742, "loss": 0.2281, "step": 65260 }, { "epoch": 2.7, "grad_norm": 1.0703125, "learning_rate": 0.000490075569561518, "loss": 0.2037, "step": 65270 }, { "epoch": 2.7, "grad_norm": 0.85546875, "learning_rate": 0.0004900725439434978, "loss": 0.2131, "step": 65280 }, { "epoch": 2.7, "grad_norm": 0.609375, "learning_rate": 0.000490069517873687, "loss": 0.1801, "step": 65290 }, { "epoch": 2.7, "grad_norm": 0.0, "learning_rate": 0.0004900664913520912, "loss": 0.1747, "step": 65300 }, { "epoch": 2.71, "grad_norm": 0.373046875, "learning_rate": 0.0004900634643787161, "loss": 0.202, "step": 65310 }, { "epoch": 2.71, "grad_norm": 1.015625, "learning_rate": 0.0004900604369535677, "loss": 0.277, "step": 65320 }, { "epoch": 2.71, "grad_norm": 0.89453125, "learning_rate": 0.0004900574090766514, "loss": 0.2322, "step": 65330 }, { "epoch": 2.71, "grad_norm": 0.376953125, "learning_rate": 0.0004900543807479729, "loss": 0.176, "step": 65340 }, { "epoch": 2.71, "grad_norm": 0.609375, "learning_rate": 0.0004900513519675382, "loss": 0.23, "step": 65350 }, { "epoch": 2.71, "grad_norm": 0.70703125, "learning_rate": 0.0004900483227353525, "loss": 0.2034, "step": 65360 }, { "epoch": 2.71, "grad_norm": 0.6171875, "learning_rate": 0.0004900452930514219, "loss": 0.2026, "step": 65370 }, { "epoch": 2.71, "grad_norm": 0.70703125, "learning_rate": 0.0004900422629157521, "loss": 0.1987, "step": 65380 }, { "epoch": 2.71, "grad_norm": 0.55859375, "learning_rate": 0.0004900392323283485, "loss": 0.2483, "step": 65390 }, { "epoch": 2.71, "grad_norm": 0.8984375, "learning_rate": 0.0004900362012892171, "loss": 0.1712, "step": 65400 }, { "epoch": 2.71, "grad_norm": 0.5625, "learning_rate": 0.0004900331697983635, "loss": 0.2336, "step": 65410 }, { "epoch": 2.71, "grad_norm": 0.337890625, "learning_rate": 0.0004900301378557932, "loss": 0.2268, "step": 65420 }, { "epoch": 2.71, "grad_norm": 0.330078125, "learning_rate": 0.0004900271054615123, "loss": 0.1966, "step": 65430 }, { "epoch": 2.71, "grad_norm": 0.79296875, "learning_rate": 0.0004900240726155263, "loss": 0.2551, "step": 65440 }, { "epoch": 2.71, "grad_norm": 0.6484375, "learning_rate": 0.0004900210393178409, "loss": 0.2012, "step": 65450 }, { "epoch": 2.71, "grad_norm": 0.380859375, "learning_rate": 0.0004900180055684616, "loss": 0.2687, "step": 65460 }, { "epoch": 2.71, "grad_norm": 0.47265625, "learning_rate": 0.0004900149713673946, "loss": 0.2341, "step": 65470 }, { "epoch": 2.71, "grad_norm": 0.46875, "learning_rate": 0.0004900119367146452, "loss": 0.1887, "step": 65480 }, { "epoch": 2.71, "grad_norm": 0.6640625, "learning_rate": 0.0004900089016102194, "loss": 0.2567, "step": 65490 }, { "epoch": 2.71, "grad_norm": 0.515625, "learning_rate": 0.0004900058660541227, "loss": 0.1905, "step": 65500 }, { "epoch": 2.71, "grad_norm": 1.515625, "learning_rate": 0.0004900028300463609, "loss": 0.2144, "step": 65510 }, { "epoch": 2.71, "grad_norm": 0.7890625, "learning_rate": 0.0004899997935869396, "loss": 0.1997, "step": 65520 }, { "epoch": 2.71, "grad_norm": 0.609375, "learning_rate": 0.0004899967566758647, "loss": 0.176, "step": 65530 }, { "epoch": 2.71, "grad_norm": 0.375, "learning_rate": 0.0004899937193131417, "loss": 0.2511, "step": 65540 }, { "epoch": 2.72, "grad_norm": 0.95703125, "learning_rate": 0.0004899906814987766, "loss": 0.2138, "step": 65550 }, { "epoch": 2.72, "grad_norm": 0.4296875, "learning_rate": 0.0004899876432327749, "loss": 0.2431, "step": 65560 }, { "epoch": 2.72, "grad_norm": 0.98828125, "learning_rate": 0.0004899846045151423, "loss": 0.1867, "step": 65570 }, { "epoch": 2.72, "grad_norm": 0.640625, "learning_rate": 0.0004899815653458847, "loss": 0.1927, "step": 65580 }, { "epoch": 2.72, "grad_norm": 0.59375, "learning_rate": 0.0004899785257250077, "loss": 0.2468, "step": 65590 }, { "epoch": 2.72, "grad_norm": 0.6796875, "learning_rate": 0.0004899754856525169, "loss": 0.2382, "step": 65600 }, { "epoch": 2.72, "grad_norm": 0.66796875, "learning_rate": 0.0004899724451284183, "loss": 0.1941, "step": 65610 }, { "epoch": 2.72, "grad_norm": 0.330078125, "learning_rate": 0.0004899694041527173, "loss": 0.2269, "step": 65620 }, { "epoch": 2.72, "grad_norm": 0.84765625, "learning_rate": 0.0004899663627254199, "loss": 0.1983, "step": 65630 }, { "epoch": 2.72, "grad_norm": 0.43359375, "learning_rate": 0.0004899633208465317, "loss": 0.2163, "step": 65640 }, { "epoch": 2.72, "grad_norm": 0.388671875, "learning_rate": 0.0004899602785160585, "loss": 0.2406, "step": 65650 }, { "epoch": 2.72, "grad_norm": 0.79296875, "learning_rate": 0.000489957235734006, "loss": 0.2222, "step": 65660 }, { "epoch": 2.72, "grad_norm": 0.921875, "learning_rate": 0.0004899541925003798, "loss": 0.2759, "step": 65670 }, { "epoch": 2.72, "grad_norm": 0.40625, "learning_rate": 0.0004899511488151857, "loss": 0.2014, "step": 65680 }, { "epoch": 2.72, "grad_norm": 1.0234375, "learning_rate": 0.0004899481046784296, "loss": 0.2615, "step": 65690 }, { "epoch": 2.72, "grad_norm": 0.451171875, "learning_rate": 0.0004899450600901169, "loss": 0.216, "step": 65700 }, { "epoch": 2.72, "grad_norm": 0.78515625, "learning_rate": 0.0004899420150502536, "loss": 0.2697, "step": 65710 }, { "epoch": 2.72, "grad_norm": 0.7265625, "learning_rate": 0.0004899389695588453, "loss": 0.1721, "step": 65720 }, { "epoch": 2.72, "grad_norm": 0.51171875, "learning_rate": 0.0004899359236158979, "loss": 0.2208, "step": 65730 }, { "epoch": 2.72, "grad_norm": 0.546875, "learning_rate": 0.0004899328772214168, "loss": 0.2189, "step": 65740 }, { "epoch": 2.72, "grad_norm": 0.85546875, "learning_rate": 0.000489929830375408, "loss": 0.2113, "step": 65750 }, { "epoch": 2.72, "grad_norm": 0.53125, "learning_rate": 0.0004899267830778773, "loss": 0.2538, "step": 65760 }, { "epoch": 2.72, "grad_norm": 1.390625, "learning_rate": 0.0004899237353288301, "loss": 0.1961, "step": 65770 }, { "epoch": 2.72, "grad_norm": 0.6640625, "learning_rate": 0.0004899206871282725, "loss": 0.2597, "step": 65780 }, { "epoch": 2.73, "grad_norm": 0.32421875, "learning_rate": 0.00048991763847621, "loss": 0.2074, "step": 65790 }, { "epoch": 2.73, "grad_norm": 0.25, "learning_rate": 0.0004899145893726485, "loss": 0.2205, "step": 65800 }, { "epoch": 2.73, "grad_norm": 0.81640625, "learning_rate": 0.0004899115398175936, "loss": 0.2056, "step": 65810 }, { "epoch": 2.73, "grad_norm": 0.474609375, "learning_rate": 0.000489908489811051, "loss": 0.1773, "step": 65820 }, { "epoch": 2.73, "grad_norm": 0.51953125, "learning_rate": 0.0004899054393530266, "loss": 0.2303, "step": 65830 }, { "epoch": 2.73, "grad_norm": 0.671875, "learning_rate": 0.0004899023884435262, "loss": 0.1698, "step": 65840 }, { "epoch": 2.73, "grad_norm": 0.69921875, "learning_rate": 0.0004898993370825552, "loss": 0.2198, "step": 65850 }, { "epoch": 2.73, "grad_norm": 0.4375, "learning_rate": 0.0004898962852701197, "loss": 0.2015, "step": 65860 }, { "epoch": 2.73, "grad_norm": 0.5078125, "learning_rate": 0.0004898932330062252, "loss": 0.2094, "step": 65870 }, { "epoch": 2.73, "grad_norm": 0.443359375, "learning_rate": 0.0004898901802908776, "loss": 0.1872, "step": 65880 }, { "epoch": 2.73, "grad_norm": 0.478515625, "learning_rate": 0.0004898871271240826, "loss": 0.2241, "step": 65890 }, { "epoch": 2.73, "grad_norm": 0.65234375, "learning_rate": 0.0004898840735058459, "loss": 0.2008, "step": 65900 }, { "epoch": 2.73, "grad_norm": 0.265625, "learning_rate": 0.0004898810194361733, "loss": 0.1941, "step": 65910 }, { "epoch": 2.73, "grad_norm": 1.09375, "learning_rate": 0.0004898779649150705, "loss": 0.1997, "step": 65920 }, { "epoch": 2.73, "grad_norm": 0.37109375, "learning_rate": 0.0004898749099425432, "loss": 0.2374, "step": 65930 }, { "epoch": 2.73, "grad_norm": 1.25, "learning_rate": 0.0004898718545185974, "loss": 0.1986, "step": 65940 }, { "epoch": 2.73, "grad_norm": 0.56640625, "learning_rate": 0.0004898687986432386, "loss": 0.183, "step": 65950 }, { "epoch": 2.73, "grad_norm": 1.75, "learning_rate": 0.0004898657423164726, "loss": 0.2511, "step": 65960 }, { "epoch": 2.73, "grad_norm": 0.55859375, "learning_rate": 0.0004898626855383052, "loss": 0.1712, "step": 65970 }, { "epoch": 2.73, "grad_norm": 0.55078125, "learning_rate": 0.0004898596283087421, "loss": 0.1912, "step": 65980 }, { "epoch": 2.73, "grad_norm": 0.76171875, "learning_rate": 0.000489856570627789, "loss": 0.2211, "step": 65990 }, { "epoch": 2.73, "grad_norm": 2.046875, "learning_rate": 0.000489853512495452, "loss": 0.2654, "step": 66000 }, { "epoch": 2.73, "grad_norm": 0.953125, "learning_rate": 0.0004898504539117363, "loss": 0.1972, "step": 66010 }, { "epoch": 2.73, "grad_norm": 0.69921875, "learning_rate": 0.0004898473948766482, "loss": 0.2439, "step": 66020 }, { "epoch": 2.73, "grad_norm": 0.65234375, "learning_rate": 0.0004898443353901931, "loss": 0.2101, "step": 66030 }, { "epoch": 2.74, "grad_norm": 0.7109375, "learning_rate": 0.0004898412754523768, "loss": 0.2153, "step": 66040 }, { "epoch": 2.74, "grad_norm": 0.88671875, "learning_rate": 0.0004898382150632051, "loss": 0.2481, "step": 66050 }, { "epoch": 2.74, "grad_norm": 0.82421875, "learning_rate": 0.0004898351542226839, "loss": 0.2676, "step": 66060 }, { "epoch": 2.74, "grad_norm": 1.125, "learning_rate": 0.0004898320929308187, "loss": 0.1833, "step": 66070 }, { "epoch": 2.74, "grad_norm": 0.439453125, "learning_rate": 0.0004898290311876154, "loss": 0.2148, "step": 66080 }, { "epoch": 2.74, "grad_norm": 0.88671875, "learning_rate": 0.0004898259689930799, "loss": 0.2303, "step": 66090 }, { "epoch": 2.74, "grad_norm": 0.88671875, "learning_rate": 0.0004898229063472179, "loss": 0.227, "step": 66100 }, { "epoch": 2.74, "grad_norm": 0.546875, "learning_rate": 0.0004898198432500349, "loss": 0.2184, "step": 66110 }, { "epoch": 2.74, "grad_norm": 0.69140625, "learning_rate": 0.0004898167797015369, "loss": 0.2214, "step": 66120 }, { "epoch": 2.74, "grad_norm": 0.341796875, "learning_rate": 0.0004898137157017296, "loss": 0.1859, "step": 66130 }, { "epoch": 2.74, "grad_norm": 1.28125, "learning_rate": 0.0004898106512506188, "loss": 0.2696, "step": 66140 }, { "epoch": 2.74, "grad_norm": 0.2373046875, "learning_rate": 0.0004898075863482103, "loss": 0.2007, "step": 66150 }, { "epoch": 2.74, "grad_norm": 0.234375, "learning_rate": 0.0004898045209945097, "loss": 0.1943, "step": 66160 }, { "epoch": 2.74, "grad_norm": 2.1875, "learning_rate": 0.000489801455189523, "loss": 0.2213, "step": 66170 }, { "epoch": 2.74, "grad_norm": 0.40234375, "learning_rate": 0.0004897983889332559, "loss": 0.207, "step": 66180 }, { "epoch": 2.74, "grad_norm": 0.376953125, "learning_rate": 0.000489795322225714, "loss": 0.2231, "step": 66190 }, { "epoch": 2.74, "grad_norm": 0.953125, "learning_rate": 0.0004897922550669033, "loss": 0.2329, "step": 66200 }, { "epoch": 2.74, "grad_norm": 0.2451171875, "learning_rate": 0.0004897891874568294, "loss": 0.2301, "step": 66210 }, { "epoch": 2.74, "grad_norm": 0.546875, "learning_rate": 0.0004897861193954981, "loss": 0.221, "step": 66220 }, { "epoch": 2.74, "grad_norm": 0.353515625, "learning_rate": 0.0004897830508829153, "loss": 0.2039, "step": 66230 }, { "epoch": 2.74, "grad_norm": 0.5703125, "learning_rate": 0.0004897799819190867, "loss": 0.2312, "step": 66240 }, { "epoch": 2.74, "grad_norm": 0.50390625, "learning_rate": 0.000489776912504018, "loss": 0.1967, "step": 66250 }, { "epoch": 2.74, "grad_norm": 1.671875, "learning_rate": 0.0004897738426377152, "loss": 0.2402, "step": 66260 }, { "epoch": 2.74, "grad_norm": 1.375, "learning_rate": 0.0004897707723201838, "loss": 0.1885, "step": 66270 }, { "epoch": 2.75, "grad_norm": 0.66796875, "learning_rate": 0.0004897677015514297, "loss": 0.1738, "step": 66280 }, { "epoch": 2.75, "grad_norm": 0.52734375, "learning_rate": 0.0004897646303314586, "loss": 0.1957, "step": 66290 }, { "epoch": 2.75, "grad_norm": 0.4296875, "learning_rate": 0.0004897615586602765, "loss": 0.2331, "step": 66300 }, { "epoch": 2.75, "grad_norm": 0.57421875, "learning_rate": 0.000489758486537889, "loss": 0.1977, "step": 66310 }, { "epoch": 2.75, "grad_norm": 0.44921875, "learning_rate": 0.0004897554139643019, "loss": 0.2046, "step": 66320 }, { "epoch": 2.75, "grad_norm": 0.99609375, "learning_rate": 0.0004897523409395211, "loss": 0.1958, "step": 66330 }, { "epoch": 2.75, "grad_norm": 0.9921875, "learning_rate": 0.0004897492674635521, "loss": 0.2593, "step": 66340 }, { "epoch": 2.75, "grad_norm": 0.57421875, "learning_rate": 0.000489746193536401, "loss": 0.2239, "step": 66350 }, { "epoch": 2.75, "grad_norm": 0.52734375, "learning_rate": 0.0004897431191580734, "loss": 0.2394, "step": 66360 }, { "epoch": 2.75, "grad_norm": 0.8515625, "learning_rate": 0.0004897400443285751, "loss": 0.2177, "step": 66370 }, { "epoch": 2.75, "grad_norm": 0.365234375, "learning_rate": 0.0004897369690479121, "loss": 0.231, "step": 66380 }, { "epoch": 2.75, "grad_norm": 0.59765625, "learning_rate": 0.0004897338933160898, "loss": 0.2095, "step": 66390 }, { "epoch": 2.75, "grad_norm": 0.3984375, "learning_rate": 0.0004897308171331144, "loss": 0.218, "step": 66400 }, { "epoch": 2.75, "grad_norm": 0.390625, "learning_rate": 0.0004897277404989914, "loss": 0.2146, "step": 66410 }, { "epoch": 2.75, "grad_norm": 2.109375, "learning_rate": 0.0004897246634137266, "loss": 0.1931, "step": 66420 }, { "epoch": 2.75, "grad_norm": 0.43359375, "learning_rate": 0.000489721585877326, "loss": 0.1828, "step": 66430 }, { "epoch": 2.75, "grad_norm": 1.09375, "learning_rate": 0.0004897185078897952, "loss": 0.222, "step": 66440 }, { "epoch": 2.75, "grad_norm": 0.36328125, "learning_rate": 0.00048971542945114, "loss": 0.2344, "step": 66450 }, { "epoch": 2.75, "grad_norm": 0.6015625, "learning_rate": 0.0004897123505613663, "loss": 0.1823, "step": 66460 }, { "epoch": 2.75, "grad_norm": 0.5078125, "learning_rate": 0.00048970927122048, "loss": 0.2224, "step": 66470 }, { "epoch": 2.75, "grad_norm": 0.83203125, "learning_rate": 0.0004897061914284865, "loss": 0.1823, "step": 66480 }, { "epoch": 2.75, "grad_norm": 0.47265625, "learning_rate": 0.0004897031111853919, "loss": 0.2261, "step": 66490 }, { "epoch": 2.75, "grad_norm": 0.80859375, "learning_rate": 0.0004897000304912019, "loss": 0.2442, "step": 66500 }, { "epoch": 2.75, "grad_norm": 2.03125, "learning_rate": 0.0004896969493459225, "loss": 0.2498, "step": 66510 }, { "epoch": 2.76, "grad_norm": 0.34765625, "learning_rate": 0.0004896938677495592, "loss": 0.2443, "step": 66520 }, { "epoch": 2.76, "grad_norm": 0.875, "learning_rate": 0.0004896907857021179, "loss": 0.2346, "step": 66530 }, { "epoch": 2.76, "grad_norm": 0.73828125, "learning_rate": 0.0004896877032036044, "loss": 0.2275, "step": 66540 }, { "epoch": 2.76, "grad_norm": 0.7734375, "learning_rate": 0.0004896846202540247, "loss": 0.1967, "step": 66550 }, { "epoch": 2.76, "grad_norm": 0.57421875, "learning_rate": 0.0004896815368533843, "loss": 0.2083, "step": 66560 }, { "epoch": 2.76, "grad_norm": 0.515625, "learning_rate": 0.0004896784530016891, "loss": 0.219, "step": 66570 }, { "epoch": 2.76, "grad_norm": 0.56640625, "learning_rate": 0.000489675368698945, "loss": 0.2243, "step": 66580 }, { "epoch": 2.76, "grad_norm": 0.6875, "learning_rate": 0.0004896722839451578, "loss": 0.2223, "step": 66590 }, { "epoch": 2.76, "grad_norm": 0.419921875, "learning_rate": 0.0004896691987403331, "loss": 0.1834, "step": 66600 }, { "epoch": 2.76, "grad_norm": 1.90625, "learning_rate": 0.0004896661130844769, "loss": 0.1928, "step": 66610 }, { "epoch": 2.76, "grad_norm": 2.390625, "learning_rate": 0.0004896630269775951, "loss": 0.2143, "step": 66620 }, { "epoch": 2.76, "grad_norm": 1.1171875, "learning_rate": 0.0004896599404196931, "loss": 0.2185, "step": 66630 }, { "epoch": 2.76, "grad_norm": 0.353515625, "learning_rate": 0.0004896568534107771, "loss": 0.2259, "step": 66640 }, { "epoch": 2.76, "grad_norm": 0.76953125, "learning_rate": 0.0004896537659508528, "loss": 0.2442, "step": 66650 }, { "epoch": 2.76, "grad_norm": 0.4765625, "learning_rate": 0.0004896506780399261, "loss": 0.2105, "step": 66660 }, { "epoch": 2.76, "grad_norm": 0.73828125, "learning_rate": 0.0004896475896780025, "loss": 0.2442, "step": 66670 }, { "epoch": 2.76, "grad_norm": 0.259765625, "learning_rate": 0.0004896445008650881, "loss": 0.2178, "step": 66680 }, { "epoch": 2.76, "grad_norm": 0.8125, "learning_rate": 0.0004896414116011886, "loss": 0.2163, "step": 66690 }, { "epoch": 2.76, "grad_norm": 0.59375, "learning_rate": 0.00048963832188631, "loss": 0.189, "step": 66700 }, { "epoch": 2.76, "grad_norm": 0.55859375, "learning_rate": 0.0004896352317204578, "loss": 0.2209, "step": 66710 }, { "epoch": 2.76, "grad_norm": 0.73046875, "learning_rate": 0.0004896321411036379, "loss": 0.2082, "step": 66720 }, { "epoch": 2.76, "grad_norm": 0.96484375, "learning_rate": 0.0004896290500358563, "loss": 0.2549, "step": 66730 }, { "epoch": 2.76, "grad_norm": 0.5859375, "learning_rate": 0.0004896259585171188, "loss": 0.1923, "step": 66740 }, { "epoch": 2.76, "grad_norm": 0.65234375, "learning_rate": 0.000489622866547431, "loss": 0.2185, "step": 66750 }, { "epoch": 2.77, "grad_norm": 0.5078125, "learning_rate": 0.0004896197741267989, "loss": 0.2551, "step": 66760 }, { "epoch": 2.77, "grad_norm": 0.71484375, "learning_rate": 0.0004896166812552281, "loss": 0.1724, "step": 66770 }, { "epoch": 2.77, "grad_norm": 0.98046875, "learning_rate": 0.0004896135879327248, "loss": 0.2074, "step": 66780 }, { "epoch": 2.77, "grad_norm": 0.490234375, "learning_rate": 0.0004896104941592945, "loss": 0.2355, "step": 66790 }, { "epoch": 2.77, "grad_norm": 0.43359375, "learning_rate": 0.0004896073999349431, "loss": 0.2075, "step": 66800 }, { "epoch": 2.77, "grad_norm": 0.60546875, "learning_rate": 0.0004896043052596765, "loss": 0.241, "step": 66810 }, { "epoch": 2.77, "grad_norm": 0.44140625, "learning_rate": 0.0004896012101335004, "loss": 0.1948, "step": 66820 }, { "epoch": 2.77, "grad_norm": 0.9609375, "learning_rate": 0.0004895981145564208, "loss": 0.1956, "step": 66830 }, { "epoch": 2.77, "grad_norm": 1.1171875, "learning_rate": 0.0004895950185284434, "loss": 0.2345, "step": 66840 }, { "epoch": 2.77, "grad_norm": 0.703125, "learning_rate": 0.000489591922049574, "loss": 0.2196, "step": 66850 }, { "epoch": 2.77, "grad_norm": 1.140625, "learning_rate": 0.0004895888251198186, "loss": 0.2562, "step": 66860 }, { "epoch": 2.77, "grad_norm": 2.109375, "learning_rate": 0.0004895857277391828, "loss": 0.215, "step": 66870 }, { "epoch": 2.77, "grad_norm": 0.2578125, "learning_rate": 0.0004895826299076725, "loss": 0.2026, "step": 66880 }, { "epoch": 2.77, "grad_norm": 0.384765625, "learning_rate": 0.0004895795316252937, "loss": 0.2068, "step": 66890 }, { "epoch": 2.77, "grad_norm": 0.279296875, "learning_rate": 0.0004895764328920519, "loss": 0.2188, "step": 66900 }, { "epoch": 2.77, "grad_norm": 0.56640625, "learning_rate": 0.0004895733337079532, "loss": 0.2281, "step": 66910 }, { "epoch": 2.77, "grad_norm": 0.94140625, "learning_rate": 0.0004895702340730034, "loss": 0.2336, "step": 66920 }, { "epoch": 2.77, "grad_norm": 1.1171875, "learning_rate": 0.0004895671339872083, "loss": 0.2217, "step": 66930 }, { "epoch": 2.77, "grad_norm": 0.0, "learning_rate": 0.0004895640334505736, "loss": 0.1799, "step": 66940 }, { "epoch": 2.77, "grad_norm": 0.50390625, "learning_rate": 0.0004895609324631054, "loss": 0.1533, "step": 66950 }, { "epoch": 2.77, "grad_norm": 0.6171875, "learning_rate": 0.0004895578310248095, "loss": 0.2478, "step": 66960 }, { "epoch": 2.77, "grad_norm": 1.375, "learning_rate": 0.0004895547291356914, "loss": 0.1967, "step": 66970 }, { "epoch": 2.77, "grad_norm": 2.09375, "learning_rate": 0.0004895516267957573, "loss": 0.1752, "step": 66980 }, { "epoch": 2.77, "grad_norm": 0.6875, "learning_rate": 0.0004895485240050128, "loss": 0.1919, "step": 66990 }, { "epoch": 2.78, "grad_norm": 1.1328125, "learning_rate": 0.0004895454207634639, "loss": 0.2074, "step": 67000 }, { "epoch": 2.78, "grad_norm": 0.68359375, "learning_rate": 0.0004895423170711164, "loss": 0.2216, "step": 67010 }, { "epoch": 2.78, "grad_norm": 1.6640625, "learning_rate": 0.0004895392129279761, "loss": 0.2734, "step": 67020 }, { "epoch": 2.78, "grad_norm": 0.60546875, "learning_rate": 0.0004895361083340489, "loss": 0.2055, "step": 67030 }, { "epoch": 2.78, "grad_norm": 0.43359375, "learning_rate": 0.0004895330032893406, "loss": 0.1994, "step": 67040 }, { "epoch": 2.78, "grad_norm": 1.078125, "learning_rate": 0.0004895298977938571, "loss": 0.2051, "step": 67050 }, { "epoch": 2.78, "grad_norm": 0.365234375, "learning_rate": 0.0004895267918476042, "loss": 0.2393, "step": 67060 }, { "epoch": 2.78, "grad_norm": 1.0703125, "learning_rate": 0.0004895236854505876, "loss": 0.2517, "step": 67070 }, { "epoch": 2.78, "grad_norm": 0.93359375, "learning_rate": 0.0004895205786028135, "loss": 0.2648, "step": 67080 }, { "epoch": 2.78, "grad_norm": 0.66015625, "learning_rate": 0.0004895174713042873, "loss": 0.2076, "step": 67090 }, { "epoch": 2.78, "grad_norm": 0.546875, "learning_rate": 0.0004895143635550153, "loss": 0.2602, "step": 67100 }, { "epoch": 2.78, "grad_norm": 0.6875, "learning_rate": 0.0004895112553550031, "loss": 0.2172, "step": 67110 }, { "epoch": 2.78, "grad_norm": 0.7890625, "learning_rate": 0.0004895081467042564, "loss": 0.2144, "step": 67120 }, { "epoch": 2.78, "grad_norm": 0.66796875, "learning_rate": 0.0004895050376027815, "loss": 0.235, "step": 67130 }, { "epoch": 2.78, "grad_norm": 0.80078125, "learning_rate": 0.0004895019280505838, "loss": 0.1556, "step": 67140 }, { "epoch": 2.78, "grad_norm": 0.9765625, "learning_rate": 0.0004894988180476694, "loss": 0.2253, "step": 67150 }, { "epoch": 2.78, "grad_norm": 0.462890625, "learning_rate": 0.0004894957075940441, "loss": 0.2965, "step": 67160 }, { "epoch": 2.78, "grad_norm": 0.53125, "learning_rate": 0.0004894925966897137, "loss": 0.1994, "step": 67170 }, { "epoch": 2.78, "grad_norm": 0.71484375, "learning_rate": 0.0004894894853346841, "loss": 0.2718, "step": 67180 }, { "epoch": 2.78, "grad_norm": 2.0625, "learning_rate": 0.0004894863735289611, "loss": 0.2231, "step": 67190 }, { "epoch": 2.78, "grad_norm": 1.0078125, "learning_rate": 0.0004894832612725508, "loss": 0.2546, "step": 67200 }, { "epoch": 2.78, "grad_norm": 0.0, "learning_rate": 0.0004894801485654587, "loss": 0.2446, "step": 67210 }, { "epoch": 2.78, "grad_norm": 1.5703125, "learning_rate": 0.0004894770354076908, "loss": 0.2493, "step": 67220 }, { "epoch": 2.78, "grad_norm": 0.6875, "learning_rate": 0.0004894739217992531, "loss": 0.1698, "step": 67230 }, { "epoch": 2.79, "grad_norm": 0.625, "learning_rate": 0.0004894708077401512, "loss": 0.2305, "step": 67240 }, { "epoch": 2.79, "grad_norm": 0.88671875, "learning_rate": 0.0004894676932303912, "loss": 0.1799, "step": 67250 }, { "epoch": 2.79, "grad_norm": 0.34765625, "learning_rate": 0.0004894645782699788, "loss": 0.275, "step": 67260 }, { "epoch": 2.79, "grad_norm": 0.66796875, "learning_rate": 0.00048946146285892, "loss": 0.2098, "step": 67270 }, { "epoch": 2.79, "grad_norm": 0.91015625, "learning_rate": 0.0004894583469972206, "loss": 0.1712, "step": 67280 }, { "epoch": 2.79, "grad_norm": 0.44140625, "learning_rate": 0.0004894552306848863, "loss": 0.2385, "step": 67290 }, { "epoch": 2.79, "grad_norm": 0.32421875, "learning_rate": 0.0004894521139219232, "loss": 0.2139, "step": 67300 }, { "epoch": 2.79, "grad_norm": 0.703125, "learning_rate": 0.0004894489967083371, "loss": 0.2432, "step": 67310 }, { "epoch": 2.79, "grad_norm": 0.9375, "learning_rate": 0.0004894458790441338, "loss": 0.2582, "step": 67320 }, { "epoch": 2.79, "grad_norm": 0.28515625, "learning_rate": 0.0004894427609293193, "loss": 0.198, "step": 67330 }, { "epoch": 2.79, "grad_norm": 0.2578125, "learning_rate": 0.0004894396423638994, "loss": 0.2052, "step": 67340 }, { "epoch": 2.79, "grad_norm": 0.431640625, "learning_rate": 0.0004894365233478798, "loss": 0.27, "step": 67350 }, { "epoch": 2.79, "grad_norm": 0.439453125, "learning_rate": 0.0004894334038812667, "loss": 0.2495, "step": 67360 }, { "epoch": 2.79, "grad_norm": 0.38671875, "learning_rate": 0.0004894302839640656, "loss": 0.2903, "step": 67370 }, { "epoch": 2.79, "grad_norm": 0.734375, "learning_rate": 0.0004894271635962827, "loss": 0.2246, "step": 67380 }, { "epoch": 2.79, "grad_norm": 0.61328125, "learning_rate": 0.0004894240427779237, "loss": 0.1843, "step": 67390 }, { "epoch": 2.79, "grad_norm": 0.96484375, "learning_rate": 0.0004894209215089945, "loss": 0.2121, "step": 67400 }, { "epoch": 2.79, "grad_norm": 0.84375, "learning_rate": 0.0004894177997895009, "loss": 0.2021, "step": 67410 }, { "epoch": 2.79, "grad_norm": 0.498046875, "learning_rate": 0.000489414677619449, "loss": 0.2598, "step": 67420 }, { "epoch": 2.79, "grad_norm": 1.3046875, "learning_rate": 0.0004894115549988444, "loss": 0.2266, "step": 67430 }, { "epoch": 2.79, "grad_norm": 0.7265625, "learning_rate": 0.0004894084319276933, "loss": 0.2198, "step": 67440 }, { "epoch": 2.79, "grad_norm": 0.37109375, "learning_rate": 0.0004894053084060012, "loss": 0.2329, "step": 67450 }, { "epoch": 2.79, "grad_norm": 0.484375, "learning_rate": 0.0004894021844337742, "loss": 0.1631, "step": 67460 }, { "epoch": 2.79, "grad_norm": 1.1953125, "learning_rate": 0.0004893990600110182, "loss": 0.2588, "step": 67470 }, { "epoch": 2.8, "grad_norm": 0.578125, "learning_rate": 0.0004893959351377389, "loss": 0.2114, "step": 67480 }, { "epoch": 2.8, "grad_norm": 0.44921875, "learning_rate": 0.0004893928098139424, "loss": 0.1634, "step": 67490 }, { "epoch": 2.8, "grad_norm": 0.392578125, "learning_rate": 0.0004893896840396345, "loss": 0.2434, "step": 67500 }, { "epoch": 2.8, "grad_norm": 0.76171875, "learning_rate": 0.0004893865578148211, "loss": 0.2413, "step": 67510 }, { "epoch": 2.8, "grad_norm": 1.3671875, "learning_rate": 0.0004893834311395079, "loss": 0.2334, "step": 67520 }, { "epoch": 2.8, "grad_norm": 0.57421875, "learning_rate": 0.000489380304013701, "loss": 0.2389, "step": 67530 }, { "epoch": 2.8, "grad_norm": 0.470703125, "learning_rate": 0.0004893771764374063, "loss": 0.221, "step": 67540 }, { "epoch": 2.8, "grad_norm": 0.458984375, "learning_rate": 0.0004893740484106296, "loss": 0.2291, "step": 67550 }, { "epoch": 2.8, "grad_norm": 0.6875, "learning_rate": 0.0004893709199333767, "loss": 0.2297, "step": 67560 }, { "epoch": 2.8, "grad_norm": 2.5625, "learning_rate": 0.0004893677910056535, "loss": 0.2034, "step": 67570 }, { "epoch": 2.8, "grad_norm": 2.125, "learning_rate": 0.0004893646616274662, "loss": 0.2321, "step": 67580 }, { "epoch": 2.8, "grad_norm": 1.7734375, "learning_rate": 0.0004893615317988203, "loss": 0.2003, "step": 67590 }, { "epoch": 2.8, "grad_norm": 0.5625, "learning_rate": 0.0004893584015197218, "loss": 0.1991, "step": 67600 }, { "epoch": 2.8, "grad_norm": 0.5703125, "learning_rate": 0.0004893552707901769, "loss": 0.2361, "step": 67610 }, { "epoch": 2.8, "grad_norm": 0.546875, "learning_rate": 0.000489352139610191, "loss": 0.2462, "step": 67620 }, { "epoch": 2.8, "grad_norm": 1.6796875, "learning_rate": 0.0004893490079797702, "loss": 0.1764, "step": 67630 }, { "epoch": 2.8, "grad_norm": 0.609375, "learning_rate": 0.0004893458758989205, "loss": 0.2177, "step": 67640 }, { "epoch": 2.8, "grad_norm": 0.51171875, "learning_rate": 0.0004893427433676477, "loss": 0.2256, "step": 67650 }, { "epoch": 2.8, "grad_norm": 0.357421875, "learning_rate": 0.0004893396103859578, "loss": 0.2207, "step": 67660 }, { "epoch": 2.8, "grad_norm": 1.15625, "learning_rate": 0.0004893364769538564, "loss": 0.2522, "step": 67670 }, { "epoch": 2.8, "grad_norm": 1.0, "learning_rate": 0.0004893333430713496, "loss": 0.2676, "step": 67680 }, { "epoch": 2.8, "grad_norm": 0.546875, "learning_rate": 0.0004893302087384435, "loss": 0.2146, "step": 67690 }, { "epoch": 2.8, "grad_norm": 0.2119140625, "learning_rate": 0.0004893270739551437, "loss": 0.1853, "step": 67700 }, { "epoch": 2.8, "grad_norm": 0.30859375, "learning_rate": 0.0004893239387214561, "loss": 0.2197, "step": 67710 }, { "epoch": 2.8, "grad_norm": 0.76171875, "learning_rate": 0.0004893208030373868, "loss": 0.2113, "step": 67720 }, { "epoch": 2.81, "grad_norm": 0.6875, "learning_rate": 0.0004893176669029416, "loss": 0.2232, "step": 67730 }, { "epoch": 2.81, "grad_norm": 0.65625, "learning_rate": 0.0004893145303181264, "loss": 0.2224, "step": 67740 }, { "epoch": 2.81, "grad_norm": 1.1796875, "learning_rate": 0.000489311393282947, "loss": 0.2048, "step": 67750 }, { "epoch": 2.81, "grad_norm": 1.3125, "learning_rate": 0.0004893082557974094, "loss": 0.2256, "step": 67760 }, { "epoch": 2.81, "grad_norm": 1.171875, "learning_rate": 0.0004893051178615196, "loss": 0.1869, "step": 67770 }, { "epoch": 2.81, "grad_norm": 0.5390625, "learning_rate": 0.0004893019794752834, "loss": 0.1727, "step": 67780 }, { "epoch": 2.81, "grad_norm": 0.3671875, "learning_rate": 0.0004892988406387066, "loss": 0.2418, "step": 67790 }, { "epoch": 2.81, "grad_norm": 1.34375, "learning_rate": 0.0004892957013517954, "loss": 0.2518, "step": 67800 }, { "epoch": 2.81, "grad_norm": 0.38671875, "learning_rate": 0.0004892925616145554, "loss": 0.1807, "step": 67810 }, { "epoch": 2.81, "grad_norm": 0.78125, "learning_rate": 0.0004892894214269927, "loss": 0.2147, "step": 67820 }, { "epoch": 2.81, "grad_norm": 0.62109375, "learning_rate": 0.0004892862807891131, "loss": 0.208, "step": 67830 }, { "epoch": 2.81, "grad_norm": 0.68359375, "learning_rate": 0.0004892831397009226, "loss": 0.2587, "step": 67840 }, { "epoch": 2.81, "grad_norm": 0.71875, "learning_rate": 0.000489279998162427, "loss": 0.2025, "step": 67850 }, { "epoch": 2.81, "grad_norm": 0.65625, "learning_rate": 0.0004892768561736324, "loss": 0.2146, "step": 67860 }, { "epoch": 2.81, "grad_norm": 2.515625, "learning_rate": 0.0004892737137345446, "loss": 0.1868, "step": 67870 }, { "epoch": 2.81, "grad_norm": 0.94921875, "learning_rate": 0.0004892705708451694, "loss": 0.2395, "step": 67880 }, { "epoch": 2.81, "grad_norm": 0.96875, "learning_rate": 0.0004892674275055128, "loss": 0.2198, "step": 67890 }, { "epoch": 2.81, "grad_norm": 0.47265625, "learning_rate": 0.0004892642837155809, "loss": 0.2247, "step": 67900 }, { "epoch": 2.81, "grad_norm": 0.76953125, "learning_rate": 0.0004892611394753793, "loss": 0.2274, "step": 67910 }, { "epoch": 2.81, "grad_norm": 0.5859375, "learning_rate": 0.0004892579947849142, "loss": 0.17, "step": 67920 }, { "epoch": 2.81, "grad_norm": 0.5, "learning_rate": 0.0004892548496441913, "loss": 0.1769, "step": 67930 }, { "epoch": 2.81, "grad_norm": 1.1328125, "learning_rate": 0.0004892517040532167, "loss": 0.1818, "step": 67940 }, { "epoch": 2.81, "grad_norm": 0.515625, "learning_rate": 0.0004892485580119962, "loss": 0.2297, "step": 67950 }, { "epoch": 2.81, "grad_norm": 0.5546875, "learning_rate": 0.0004892454115205357, "loss": 0.1929, "step": 67960 }, { "epoch": 2.82, "grad_norm": 1.1875, "learning_rate": 0.0004892422645788411, "loss": 0.1904, "step": 67970 }, { "epoch": 2.82, "grad_norm": 0.578125, "learning_rate": 0.0004892391171869185, "loss": 0.1924, "step": 67980 }, { "epoch": 2.82, "grad_norm": 0.59375, "learning_rate": 0.0004892359693447738, "loss": 0.2324, "step": 67990 }, { "epoch": 2.82, "grad_norm": 0.1904296875, "learning_rate": 0.0004892328210524127, "loss": 0.203, "step": 68000 }, { "epoch": 2.82, "grad_norm": 0.5, "learning_rate": 0.0004892296723098414, "loss": 0.1906, "step": 68010 }, { "epoch": 2.82, "grad_norm": 0.734375, "learning_rate": 0.0004892265231170656, "loss": 0.2553, "step": 68020 }, { "epoch": 2.82, "grad_norm": 0.33203125, "learning_rate": 0.0004892233734740913, "loss": 0.214, "step": 68030 }, { "epoch": 2.82, "grad_norm": 0.4140625, "learning_rate": 0.0004892202233809244, "loss": 0.1816, "step": 68040 }, { "epoch": 2.82, "grad_norm": 0.48046875, "learning_rate": 0.000489217072837571, "loss": 0.1917, "step": 68050 }, { "epoch": 2.82, "grad_norm": 1.1015625, "learning_rate": 0.0004892139218440368, "loss": 0.2178, "step": 68060 }, { "epoch": 2.82, "grad_norm": 0.77734375, "learning_rate": 0.0004892107704003279, "loss": 0.2183, "step": 68070 }, { "epoch": 2.82, "grad_norm": 0.52734375, "learning_rate": 0.0004892076185064501, "loss": 0.2621, "step": 68080 }, { "epoch": 2.82, "grad_norm": 0.36328125, "learning_rate": 0.0004892044661624095, "loss": 0.214, "step": 68090 }, { "epoch": 2.82, "grad_norm": 0.61328125, "learning_rate": 0.0004892013133682118, "loss": 0.1619, "step": 68100 }, { "epoch": 2.82, "grad_norm": 0.85546875, "learning_rate": 0.0004891981601238632, "loss": 0.1992, "step": 68110 }, { "epoch": 2.82, "grad_norm": 0.59765625, "learning_rate": 0.0004891950064293694, "loss": 0.2123, "step": 68120 }, { "epoch": 2.82, "grad_norm": 0.84375, "learning_rate": 0.0004891918522847364, "loss": 0.2003, "step": 68130 }, { "epoch": 2.82, "grad_norm": 0.400390625, "learning_rate": 0.0004891886976899702, "loss": 0.2831, "step": 68140 }, { "epoch": 2.82, "grad_norm": 1.171875, "learning_rate": 0.0004891855426450767, "loss": 0.2239, "step": 68150 }, { "epoch": 2.82, "grad_norm": 1.1328125, "learning_rate": 0.0004891823871500619, "loss": 0.1636, "step": 68160 }, { "epoch": 2.82, "grad_norm": 1.8359375, "learning_rate": 0.0004891792312049317, "loss": 0.179, "step": 68170 }, { "epoch": 2.82, "grad_norm": 0.8671875, "learning_rate": 0.0004891760748096919, "loss": 0.2148, "step": 68180 }, { "epoch": 2.82, "grad_norm": 0.52734375, "learning_rate": 0.0004891729179643486, "loss": 0.1855, "step": 68190 }, { "epoch": 2.82, "grad_norm": 0.42578125, "learning_rate": 0.0004891697606689077, "loss": 0.209, "step": 68200 }, { "epoch": 2.83, "grad_norm": 0.48046875, "learning_rate": 0.0004891666029233752, "loss": 0.1878, "step": 68210 }, { "epoch": 2.83, "grad_norm": 0.306640625, "learning_rate": 0.0004891634447277568, "loss": 0.1686, "step": 68220 }, { "epoch": 2.83, "grad_norm": 1.34375, "learning_rate": 0.0004891602860820587, "loss": 0.2763, "step": 68230 }, { "epoch": 2.83, "grad_norm": 1.046875, "learning_rate": 0.0004891571269862869, "loss": 0.2411, "step": 68240 }, { "epoch": 2.83, "grad_norm": 0.466796875, "learning_rate": 0.0004891539674404471, "loss": 0.2543, "step": 68250 }, { "epoch": 2.83, "grad_norm": 0.5859375, "learning_rate": 0.0004891508074445453, "loss": 0.2378, "step": 68260 }, { "epoch": 2.83, "grad_norm": 0.87109375, "learning_rate": 0.0004891476469985876, "loss": 0.1626, "step": 68270 }, { "epoch": 2.83, "grad_norm": 1.5, "learning_rate": 0.0004891444861025799, "loss": 0.2365, "step": 68280 }, { "epoch": 2.83, "grad_norm": 1.1953125, "learning_rate": 0.0004891413247565281, "loss": 0.2761, "step": 68290 }, { "epoch": 2.83, "grad_norm": 0.9296875, "learning_rate": 0.000489138162960438, "loss": 0.1765, "step": 68300 }, { "epoch": 2.83, "grad_norm": 0.90234375, "learning_rate": 0.0004891350007143158, "loss": 0.2318, "step": 68310 }, { "epoch": 2.83, "grad_norm": 1.4765625, "learning_rate": 0.0004891318380181673, "loss": 0.2124, "step": 68320 }, { "epoch": 2.83, "grad_norm": 1.578125, "learning_rate": 0.0004891286748719986, "loss": 0.1802, "step": 68330 }, { "epoch": 2.83, "grad_norm": 0.412109375, "learning_rate": 0.0004891255112758155, "loss": 0.1705, "step": 68340 }, { "epoch": 2.83, "grad_norm": 0.6015625, "learning_rate": 0.0004891223472296241, "loss": 0.2042, "step": 68350 }, { "epoch": 2.83, "grad_norm": 0.5546875, "learning_rate": 0.0004891191827334302, "loss": 0.182, "step": 68360 }, { "epoch": 2.83, "grad_norm": 0.875, "learning_rate": 0.0004891160177872398, "loss": 0.227, "step": 68370 }, { "epoch": 2.83, "grad_norm": 0.734375, "learning_rate": 0.0004891128523910589, "loss": 0.2051, "step": 68380 }, { "epoch": 2.83, "grad_norm": 0.61328125, "learning_rate": 0.0004891096865448934, "loss": 0.2375, "step": 68390 }, { "epoch": 2.83, "grad_norm": 0.84375, "learning_rate": 0.0004891065202487492, "loss": 0.2403, "step": 68400 }, { "epoch": 2.83, "grad_norm": 0.66796875, "learning_rate": 0.0004891033535026326, "loss": 0.2545, "step": 68410 }, { "epoch": 2.83, "grad_norm": 0.9765625, "learning_rate": 0.0004891001863065491, "loss": 0.2368, "step": 68420 }, { "epoch": 2.83, "grad_norm": 1.640625, "learning_rate": 0.000489097018660505, "loss": 0.2179, "step": 68430 }, { "epoch": 2.83, "grad_norm": 0.7109375, "learning_rate": 0.0004890938505645061, "loss": 0.194, "step": 68440 }, { "epoch": 2.84, "grad_norm": 0.765625, "learning_rate": 0.0004890906820185583, "loss": 0.2157, "step": 68450 }, { "epoch": 2.84, "grad_norm": 0.59375, "learning_rate": 0.0004890875130226678, "loss": 0.2058, "step": 68460 }, { "epoch": 2.84, "grad_norm": 0.41796875, "learning_rate": 0.0004890843435768402, "loss": 0.1829, "step": 68470 }, { "epoch": 2.84, "grad_norm": 0.71875, "learning_rate": 0.0004890811736810818, "loss": 0.228, "step": 68480 }, { "epoch": 2.84, "grad_norm": 0.4765625, "learning_rate": 0.0004890780033353986, "loss": 0.2274, "step": 68490 }, { "epoch": 2.84, "grad_norm": 0.2373046875, "learning_rate": 0.0004890748325397963, "loss": 0.2367, "step": 68500 }, { "epoch": 2.84, "grad_norm": 0.52734375, "learning_rate": 0.0004890716612942809, "loss": 0.1595, "step": 68510 }, { "epoch": 2.84, "grad_norm": 0.8984375, "learning_rate": 0.0004890684895988585, "loss": 0.2215, "step": 68520 }, { "epoch": 2.84, "grad_norm": 0.734375, "learning_rate": 0.0004890653174535351, "loss": 0.2041, "step": 68530 }, { "epoch": 2.84, "grad_norm": 1.515625, "learning_rate": 0.0004890621448583165, "loss": 0.2592, "step": 68540 }, { "epoch": 2.84, "grad_norm": 0.95703125, "learning_rate": 0.0004890589718132088, "loss": 0.2288, "step": 68550 }, { "epoch": 2.84, "grad_norm": 0.5546875, "learning_rate": 0.0004890557983182179, "loss": 0.216, "step": 68560 }, { "epoch": 2.84, "grad_norm": 0.4921875, "learning_rate": 0.0004890526243733498, "loss": 0.227, "step": 68570 }, { "epoch": 2.84, "grad_norm": 7.53125, "learning_rate": 0.0004890494499786106, "loss": 0.2312, "step": 68580 }, { "epoch": 2.84, "grad_norm": 0.2353515625, "learning_rate": 0.000489046275134006, "loss": 0.2242, "step": 68590 }, { "epoch": 2.84, "grad_norm": 0.447265625, "learning_rate": 0.0004890430998395422, "loss": 0.2177, "step": 68600 }, { "epoch": 2.84, "grad_norm": 0.53125, "learning_rate": 0.0004890399240952252, "loss": 0.194, "step": 68610 }, { "epoch": 2.84, "grad_norm": 0.3984375, "learning_rate": 0.0004890367479010607, "loss": 0.1968, "step": 68620 }, { "epoch": 2.84, "grad_norm": 0.64453125, "learning_rate": 0.0004890335712570548, "loss": 0.1897, "step": 68630 }, { "epoch": 2.84, "grad_norm": 0.5703125, "learning_rate": 0.0004890303941632137, "loss": 0.1784, "step": 68640 }, { "epoch": 2.84, "grad_norm": 0.57421875, "learning_rate": 0.0004890272166195432, "loss": 0.1885, "step": 68650 }, { "epoch": 2.84, "grad_norm": 0.63671875, "learning_rate": 0.0004890240386260492, "loss": 0.2017, "step": 68660 }, { "epoch": 2.84, "grad_norm": 0.50390625, "learning_rate": 0.0004890208601827378, "loss": 0.2232, "step": 68670 }, { "epoch": 2.84, "grad_norm": 0.314453125, "learning_rate": 0.000489017681289615, "loss": 0.2137, "step": 68680 }, { "epoch": 2.85, "grad_norm": 0.796875, "learning_rate": 0.0004890145019466868, "loss": 0.2161, "step": 68690 }, { "epoch": 2.85, "grad_norm": 0.310546875, "learning_rate": 0.0004890113221539589, "loss": 0.2025, "step": 68700 }, { "epoch": 2.85, "grad_norm": 0.263671875, "learning_rate": 0.0004890081419114377, "loss": 0.2465, "step": 68710 }, { "epoch": 2.85, "grad_norm": 0.318359375, "learning_rate": 0.0004890049612191288, "loss": 0.1884, "step": 68720 }, { "epoch": 2.85, "grad_norm": 1.015625, "learning_rate": 0.0004890017800770385, "loss": 0.1905, "step": 68730 }, { "epoch": 2.85, "grad_norm": 0.4140625, "learning_rate": 0.0004889985984851727, "loss": 0.2108, "step": 68740 }, { "epoch": 2.85, "grad_norm": 1.4375, "learning_rate": 0.0004889954164435373, "loss": 0.2093, "step": 68750 }, { "epoch": 2.85, "grad_norm": 0.87109375, "learning_rate": 0.0004889922339521383, "loss": 0.227, "step": 68760 }, { "epoch": 2.85, "grad_norm": 2.9375, "learning_rate": 0.0004889890510109818, "loss": 0.2147, "step": 68770 }, { "epoch": 2.85, "grad_norm": 0.859375, "learning_rate": 0.0004889858676200737, "loss": 0.2076, "step": 68780 }, { "epoch": 2.85, "grad_norm": 0.90625, "learning_rate": 0.0004889826837794199, "loss": 0.2304, "step": 68790 }, { "epoch": 2.85, "grad_norm": 0.435546875, "learning_rate": 0.0004889794994890267, "loss": 0.183, "step": 68800 }, { "epoch": 2.85, "grad_norm": 0.416015625, "learning_rate": 0.0004889763147488997, "loss": 0.1957, "step": 68810 }, { "epoch": 2.85, "grad_norm": 0.6171875, "learning_rate": 0.0004889731295590452, "loss": 0.219, "step": 68820 }, { "epoch": 2.85, "grad_norm": 0.166015625, "learning_rate": 0.000488969943919469, "loss": 0.1681, "step": 68830 }, { "epoch": 2.85, "grad_norm": 0.66796875, "learning_rate": 0.0004889667578301772, "loss": 0.2288, "step": 68840 }, { "epoch": 2.85, "grad_norm": 0.3984375, "learning_rate": 0.0004889635712911757, "loss": 0.1875, "step": 68850 }, { "epoch": 2.85, "grad_norm": 0.625, "learning_rate": 0.0004889603843024707, "loss": 0.1882, "step": 68860 }, { "epoch": 2.85, "grad_norm": 0.5390625, "learning_rate": 0.000488957196864068, "loss": 0.1518, "step": 68870 }, { "epoch": 2.85, "grad_norm": 0.0, "learning_rate": 0.0004889540089759736, "loss": 0.1911, "step": 68880 }, { "epoch": 2.85, "grad_norm": 0.71484375, "learning_rate": 0.0004889508206381936, "loss": 0.1897, "step": 68890 }, { "epoch": 2.85, "grad_norm": 0.43359375, "learning_rate": 0.0004889476318507339, "loss": 0.2294, "step": 68900 }, { "epoch": 2.85, "grad_norm": 0.65234375, "learning_rate": 0.0004889444426136007, "loss": 0.2391, "step": 68910 }, { "epoch": 2.85, "grad_norm": 0.244140625, "learning_rate": 0.0004889412529267998, "loss": 0.2365, "step": 68920 }, { "epoch": 2.86, "grad_norm": 0.40234375, "learning_rate": 0.0004889380627903372, "loss": 0.204, "step": 68930 }, { "epoch": 2.86, "grad_norm": 0.80078125, "learning_rate": 0.000488934872204219, "loss": 0.2408, "step": 68940 }, { "epoch": 2.86, "grad_norm": 0.9609375, "learning_rate": 0.0004889316811684512, "loss": 0.2081, "step": 68950 }, { "epoch": 2.86, "grad_norm": 0.5078125, "learning_rate": 0.0004889284896830398, "loss": 0.2437, "step": 68960 }, { "epoch": 2.86, "grad_norm": 0.48046875, "learning_rate": 0.0004889252977479908, "loss": 0.1905, "step": 68970 }, { "epoch": 2.86, "grad_norm": 0.337890625, "learning_rate": 0.0004889221053633101, "loss": 0.2215, "step": 68980 }, { "epoch": 2.86, "grad_norm": 0.96484375, "learning_rate": 0.0004889189125290039, "loss": 0.2104, "step": 68990 }, { "epoch": 2.86, "grad_norm": 0.359375, "learning_rate": 0.0004889157192450781, "loss": 0.2398, "step": 69000 }, { "epoch": 2.86, "grad_norm": 0.65625, "learning_rate": 0.0004889125255115387, "loss": 0.2115, "step": 69010 }, { "epoch": 2.86, "grad_norm": 0.451171875, "learning_rate": 0.0004889093313283916, "loss": 0.2283, "step": 69020 }, { "epoch": 2.86, "grad_norm": 1.25, "learning_rate": 0.0004889061366956431, "loss": 0.2359, "step": 69030 }, { "epoch": 2.86, "grad_norm": 0.5703125, "learning_rate": 0.0004889029416132991, "loss": 0.2518, "step": 69040 }, { "epoch": 2.86, "grad_norm": 1.0859375, "learning_rate": 0.0004888997460813654, "loss": 0.1923, "step": 69050 }, { "epoch": 2.86, "grad_norm": 0.6171875, "learning_rate": 0.0004888965500998482, "loss": 0.2106, "step": 69060 }, { "epoch": 2.86, "grad_norm": 1.328125, "learning_rate": 0.0004888933536687536, "loss": 0.21, "step": 69070 }, { "epoch": 2.86, "grad_norm": 0.97265625, "learning_rate": 0.0004888901567880874, "loss": 0.3109, "step": 69080 }, { "epoch": 2.86, "grad_norm": 0.6171875, "learning_rate": 0.0004888869594578557, "loss": 0.1756, "step": 69090 }, { "epoch": 2.86, "grad_norm": 0.6640625, "learning_rate": 0.0004888837616780647, "loss": 0.1933, "step": 69100 }, { "epoch": 2.86, "grad_norm": 0.28515625, "learning_rate": 0.0004888805634487201, "loss": 0.2544, "step": 69110 }, { "epoch": 2.86, "grad_norm": 0.40234375, "learning_rate": 0.0004888773647698281, "loss": 0.1659, "step": 69120 }, { "epoch": 2.86, "grad_norm": 0.54296875, "learning_rate": 0.0004888741656413947, "loss": 0.258, "step": 69130 }, { "epoch": 2.86, "grad_norm": 1.1328125, "learning_rate": 0.000488870966063426, "loss": 0.2102, "step": 69140 }, { "epoch": 2.86, "grad_norm": 0.50390625, "learning_rate": 0.0004888677660359279, "loss": 0.1627, "step": 69150 }, { "epoch": 2.86, "grad_norm": 0.703125, "learning_rate": 0.0004888645655589064, "loss": 0.2274, "step": 69160 }, { "epoch": 2.87, "grad_norm": 0.482421875, "learning_rate": 0.0004888613646323676, "loss": 0.1628, "step": 69170 }, { "epoch": 2.87, "grad_norm": 0.8359375, "learning_rate": 0.0004888581632563176, "loss": 0.2036, "step": 69180 }, { "epoch": 2.87, "grad_norm": 0.77734375, "learning_rate": 0.0004888549614307622, "loss": 0.2366, "step": 69190 }, { "epoch": 2.87, "grad_norm": 0.5546875, "learning_rate": 0.0004888517591557077, "loss": 0.1904, "step": 69200 }, { "epoch": 2.87, "grad_norm": 0.57421875, "learning_rate": 0.0004888485564311599, "loss": 0.2575, "step": 69210 }, { "epoch": 2.87, "grad_norm": 0.435546875, "learning_rate": 0.0004888453532571248, "loss": 0.208, "step": 69220 }, { "epoch": 2.87, "grad_norm": 0.5234375, "learning_rate": 0.0004888421496336087, "loss": 0.2761, "step": 69230 }, { "epoch": 2.87, "grad_norm": 0.56640625, "learning_rate": 0.0004888389455606174, "loss": 0.2117, "step": 69240 }, { "epoch": 2.87, "grad_norm": 0.75390625, "learning_rate": 0.000488835741038157, "loss": 0.1361, "step": 69250 }, { "epoch": 2.87, "grad_norm": 0.7734375, "learning_rate": 0.0004888325360662335, "loss": 0.2006, "step": 69260 }, { "epoch": 2.87, "grad_norm": 0.36328125, "learning_rate": 0.0004888293306448531, "loss": 0.248, "step": 69270 }, { "epoch": 2.87, "grad_norm": 0.8671875, "learning_rate": 0.0004888261247740216, "loss": 0.1909, "step": 69280 }, { "epoch": 2.87, "grad_norm": 0.953125, "learning_rate": 0.000488822918453745, "loss": 0.2021, "step": 69290 }, { "epoch": 2.87, "grad_norm": 0.4375, "learning_rate": 0.0004888197116840297, "loss": 0.134, "step": 69300 }, { "epoch": 2.87, "grad_norm": 1.171875, "learning_rate": 0.0004888165044648813, "loss": 0.2082, "step": 69310 }, { "epoch": 2.87, "grad_norm": 0.443359375, "learning_rate": 0.0004888132967963061, "loss": 0.2351, "step": 69320 }, { "epoch": 2.87, "grad_norm": 1.0078125, "learning_rate": 0.0004888100886783101, "loss": 0.2087, "step": 69330 }, { "epoch": 2.87, "grad_norm": 0.7734375, "learning_rate": 0.0004888068801108992, "loss": 0.2115, "step": 69340 }, { "epoch": 2.87, "grad_norm": 0.0, "learning_rate": 0.0004888036710940796, "loss": 0.1439, "step": 69350 }, { "epoch": 2.87, "grad_norm": 0.6015625, "learning_rate": 0.0004888004616278573, "loss": 0.1918, "step": 69360 }, { "epoch": 2.87, "grad_norm": 0.703125, "learning_rate": 0.0004887972517122383, "loss": 0.2171, "step": 69370 }, { "epoch": 2.87, "grad_norm": 0.58203125, "learning_rate": 0.0004887940413472287, "loss": 0.2009, "step": 69380 }, { "epoch": 2.87, "grad_norm": 0.419921875, "learning_rate": 0.0004887908305328345, "loss": 0.2285, "step": 69390 }, { "epoch": 2.87, "grad_norm": 0.41796875, "learning_rate": 0.0004887876192690617, "loss": 0.2352, "step": 69400 }, { "epoch": 2.87, "grad_norm": 0.8046875, "learning_rate": 0.0004887844075559163, "loss": 0.2323, "step": 69410 }, { "epoch": 2.88, "grad_norm": 0.423828125, "learning_rate": 0.0004887811953934046, "loss": 0.2372, "step": 69420 }, { "epoch": 2.88, "grad_norm": 0.263671875, "learning_rate": 0.0004887779827815324, "loss": 0.1873, "step": 69430 }, { "epoch": 2.88, "grad_norm": 0.57421875, "learning_rate": 0.0004887747697203058, "loss": 0.1851, "step": 69440 }, { "epoch": 2.88, "grad_norm": 0.5703125, "learning_rate": 0.0004887715562097309, "loss": 0.2637, "step": 69450 }, { "epoch": 2.88, "grad_norm": 0.275390625, "learning_rate": 0.0004887683422498137, "loss": 0.2426, "step": 69460 }, { "epoch": 2.88, "grad_norm": 0.28515625, "learning_rate": 0.0004887651278405602, "loss": 0.1983, "step": 69470 }, { "epoch": 2.88, "grad_norm": 0.8515625, "learning_rate": 0.0004887619129819767, "loss": 0.2012, "step": 69480 }, { "epoch": 2.88, "grad_norm": 0.458984375, "learning_rate": 0.0004887586976740689, "loss": 0.2082, "step": 69490 }, { "epoch": 2.88, "grad_norm": 0.412109375, "learning_rate": 0.000488755481916843, "loss": 0.278, "step": 69500 }, { "epoch": 2.88, "grad_norm": 0.70703125, "learning_rate": 0.0004887522657103053, "loss": 0.2461, "step": 69510 }, { "epoch": 2.88, "grad_norm": 1.28125, "learning_rate": 0.0004887490490544614, "loss": 0.1862, "step": 69520 }, { "epoch": 2.88, "grad_norm": 1.7265625, "learning_rate": 0.0004887458319493176, "loss": 0.2396, "step": 69530 }, { "epoch": 2.88, "grad_norm": 1.2109375, "learning_rate": 0.0004887426143948799, "loss": 0.2456, "step": 69540 }, { "epoch": 2.88, "grad_norm": 0.6171875, "learning_rate": 0.0004887393963911545, "loss": 0.1944, "step": 69550 }, { "epoch": 2.88, "grad_norm": 0.80078125, "learning_rate": 0.0004887361779381473, "loss": 0.2501, "step": 69560 }, { "epoch": 2.88, "grad_norm": 0.474609375, "learning_rate": 0.0004887329590358644, "loss": 0.2288, "step": 69570 }, { "epoch": 2.88, "grad_norm": 0.5625, "learning_rate": 0.0004887297396843118, "loss": 0.1828, "step": 69580 }, { "epoch": 2.88, "grad_norm": 0.88671875, "learning_rate": 0.0004887265198834956, "loss": 0.1823, "step": 69590 }, { "epoch": 2.88, "grad_norm": 1.265625, "learning_rate": 0.0004887232996334219, "loss": 0.2337, "step": 69600 }, { "epoch": 2.88, "grad_norm": 0.70703125, "learning_rate": 0.0004887200789340967, "loss": 0.2317, "step": 69610 }, { "epoch": 2.88, "grad_norm": 1.1484375, "learning_rate": 0.0004887168577855261, "loss": 0.1528, "step": 69620 }, { "epoch": 2.88, "grad_norm": 0.30078125, "learning_rate": 0.0004887136361877161, "loss": 0.1878, "step": 69630 }, { "epoch": 2.88, "grad_norm": 0.3671875, "learning_rate": 0.000488710414140673, "loss": 0.191, "step": 69640 }, { "epoch": 2.88, "grad_norm": 0.68359375, "learning_rate": 0.0004887071916444025, "loss": 0.2041, "step": 69650 }, { "epoch": 2.89, "grad_norm": 0.53515625, "learning_rate": 0.000488703968698911, "loss": 0.2112, "step": 69660 }, { "epoch": 2.89, "grad_norm": 0.53515625, "learning_rate": 0.0004887007453042043, "loss": 0.2104, "step": 69670 }, { "epoch": 2.89, "grad_norm": 0.62890625, "learning_rate": 0.0004886975214602885, "loss": 0.2015, "step": 69680 }, { "epoch": 2.89, "grad_norm": 1.28125, "learning_rate": 0.0004886942971671698, "loss": 0.2037, "step": 69690 }, { "epoch": 2.89, "grad_norm": 0.2734375, "learning_rate": 0.0004886910724248543, "loss": 0.189, "step": 69700 }, { "epoch": 2.89, "grad_norm": 0.6875, "learning_rate": 0.000488687847233348, "loss": 0.1797, "step": 69710 }, { "epoch": 2.89, "grad_norm": 0.98828125, "learning_rate": 0.0004886846215926568, "loss": 0.2894, "step": 69720 }, { "epoch": 2.89, "grad_norm": 1.40625, "learning_rate": 0.000488681395502787, "loss": 0.201, "step": 69730 }, { "epoch": 2.89, "grad_norm": 0.5078125, "learning_rate": 0.0004886781689637446, "loss": 0.178, "step": 69740 }, { "epoch": 2.89, "grad_norm": 0.63671875, "learning_rate": 0.0004886749419755357, "loss": 0.1899, "step": 69750 }, { "epoch": 2.89, "grad_norm": 0.703125, "learning_rate": 0.0004886717145381661, "loss": 0.2081, "step": 69760 }, { "epoch": 2.89, "grad_norm": 0.5390625, "learning_rate": 0.0004886684866516424, "loss": 0.2034, "step": 69770 }, { "epoch": 2.89, "grad_norm": 0.5390625, "learning_rate": 0.0004886652583159702, "loss": 0.2354, "step": 69780 }, { "epoch": 2.89, "grad_norm": 0.5390625, "learning_rate": 0.0004886620295311559, "loss": 0.2974, "step": 69790 }, { "epoch": 2.89, "grad_norm": 0.68359375, "learning_rate": 0.0004886588002972052, "loss": 0.2783, "step": 69800 }, { "epoch": 2.89, "grad_norm": 0.435546875, "learning_rate": 0.0004886555706141247, "loss": 0.1979, "step": 69810 }, { "epoch": 2.89, "grad_norm": 0.89453125, "learning_rate": 0.00048865234048192, "loss": 0.2396, "step": 69820 }, { "epoch": 2.89, "grad_norm": 0.88671875, "learning_rate": 0.0004886491099005974, "loss": 0.2489, "step": 69830 }, { "epoch": 2.89, "grad_norm": 0.50390625, "learning_rate": 0.000488645878870163, "loss": 0.2436, "step": 69840 }, { "epoch": 2.89, "grad_norm": 0.734375, "learning_rate": 0.0004886426473906228, "loss": 0.2184, "step": 69850 }, { "epoch": 2.89, "grad_norm": 0.7734375, "learning_rate": 0.0004886394154619829, "loss": 0.2036, "step": 69860 }, { "epoch": 2.89, "grad_norm": 0.91796875, "learning_rate": 0.0004886361830842495, "loss": 0.1367, "step": 69870 }, { "epoch": 2.89, "grad_norm": 0.65234375, "learning_rate": 0.0004886329502574284, "loss": 0.221, "step": 69880 }, { "epoch": 2.89, "grad_norm": 0.4453125, "learning_rate": 0.0004886297169815259, "loss": 0.1638, "step": 69890 }, { "epoch": 2.9, "grad_norm": 0.490234375, "learning_rate": 0.000488626483256548, "loss": 0.2141, "step": 69900 }, { "epoch": 2.9, "grad_norm": 0.66015625, "learning_rate": 0.0004886232490825009, "loss": 0.2184, "step": 69910 }, { "epoch": 2.9, "grad_norm": 0.79296875, "learning_rate": 0.0004886200144593907, "loss": 0.1929, "step": 69920 }, { "epoch": 2.9, "grad_norm": 0.54296875, "learning_rate": 0.0004886167793872233, "loss": 0.1589, "step": 69930 }, { "epoch": 2.9, "grad_norm": 0.8828125, "learning_rate": 0.0004886135438660049, "loss": 0.2278, "step": 69940 }, { "epoch": 2.9, "grad_norm": 0.5, "learning_rate": 0.0004886103078957416, "loss": 0.1706, "step": 69950 }, { "epoch": 2.9, "grad_norm": 1.1953125, "learning_rate": 0.0004886070714764393, "loss": 0.2465, "step": 69960 }, { "epoch": 2.9, "grad_norm": 0.73828125, "learning_rate": 0.0004886038346081045, "loss": 0.2132, "step": 69970 }, { "epoch": 2.9, "grad_norm": 0.54296875, "learning_rate": 0.0004886005972907429, "loss": 0.176, "step": 69980 }, { "epoch": 2.9, "grad_norm": 0.828125, "learning_rate": 0.0004885973595243608, "loss": 0.1886, "step": 69990 }, { "epoch": 2.9, "grad_norm": 0.70703125, "learning_rate": 0.0004885941213089642, "loss": 0.1764, "step": 70000 }, { "epoch": 2.9, "grad_norm": 1.3515625, "learning_rate": 0.0004885908826445593, "loss": 0.2407, "step": 70010 }, { "epoch": 2.9, "grad_norm": 0.609375, "learning_rate": 0.0004885876435311522, "loss": 0.1938, "step": 70020 }, { "epoch": 2.9, "grad_norm": 0.89453125, "learning_rate": 0.0004885844039687487, "loss": 0.265, "step": 70030 }, { "epoch": 2.9, "grad_norm": 0.69921875, "learning_rate": 0.0004885811639573553, "loss": 0.1964, "step": 70040 }, { "epoch": 2.9, "grad_norm": 0.42578125, "learning_rate": 0.0004885779234969778, "loss": 0.3107, "step": 70050 }, { "epoch": 2.9, "grad_norm": 3.21875, "learning_rate": 0.0004885746825876225, "loss": 0.2371, "step": 70060 }, { "epoch": 2.9, "grad_norm": 1.5078125, "learning_rate": 0.0004885714412292953, "loss": 0.2369, "step": 70070 }, { "epoch": 2.9, "grad_norm": 1.59375, "learning_rate": 0.0004885681994220024, "loss": 0.2101, "step": 70080 }, { "epoch": 2.9, "grad_norm": 0.921875, "learning_rate": 0.0004885649571657501, "loss": 0.2055, "step": 70090 }, { "epoch": 2.9, "grad_norm": 0.71484375, "learning_rate": 0.0004885617144605442, "loss": 0.265, "step": 70100 }, { "epoch": 2.9, "grad_norm": 0.8671875, "learning_rate": 0.0004885584713063909, "loss": 0.1961, "step": 70110 }, { "epoch": 2.9, "grad_norm": 0.2138671875, "learning_rate": 0.0004885552277032963, "loss": 0.2513, "step": 70120 }, { "epoch": 2.9, "grad_norm": 0.6328125, "learning_rate": 0.0004885519836512666, "loss": 0.2379, "step": 70130 }, { "epoch": 2.91, "grad_norm": 0.80078125, "learning_rate": 0.0004885487391503077, "loss": 0.2321, "step": 70140 }, { "epoch": 2.91, "grad_norm": 0.55859375, "learning_rate": 0.000488545494200426, "loss": 0.1907, "step": 70150 }, { "epoch": 2.91, "grad_norm": 0.640625, "learning_rate": 0.0004885422488016273, "loss": 0.2312, "step": 70160 }, { "epoch": 2.91, "grad_norm": 0.63671875, "learning_rate": 0.000488539002953918, "loss": 0.1992, "step": 70170 }, { "epoch": 2.91, "grad_norm": 0.64453125, "learning_rate": 0.0004885357566573039, "loss": 0.2353, "step": 70180 }, { "epoch": 2.91, "grad_norm": 0.6875, "learning_rate": 0.0004885325099117914, "loss": 0.2122, "step": 70190 }, { "epoch": 2.91, "grad_norm": 0.51171875, "learning_rate": 0.0004885292627173864, "loss": 0.261, "step": 70200 }, { "epoch": 2.91, "grad_norm": 0.625, "learning_rate": 0.000488526015074095, "loss": 0.2239, "step": 70210 }, { "epoch": 2.91, "grad_norm": 0.83203125, "learning_rate": 0.0004885227669819235, "loss": 0.2137, "step": 70220 }, { "epoch": 2.91, "grad_norm": 0.671875, "learning_rate": 0.000488519518440878, "loss": 0.2621, "step": 70230 }, { "epoch": 2.91, "grad_norm": 0.76171875, "learning_rate": 0.0004885162694509644, "loss": 0.239, "step": 70240 }, { "epoch": 2.91, "grad_norm": 1.4140625, "learning_rate": 0.0004885130200121889, "loss": 0.1989, "step": 70250 }, { "epoch": 2.91, "grad_norm": 0.486328125, "learning_rate": 0.0004885097701245578, "loss": 0.1843, "step": 70260 }, { "epoch": 2.91, "grad_norm": 0.75390625, "learning_rate": 0.0004885065197880769, "loss": 0.1588, "step": 70270 }, { "epoch": 2.91, "grad_norm": 1.1015625, "learning_rate": 0.0004885032690027526, "loss": 0.2245, "step": 70280 }, { "epoch": 2.91, "grad_norm": 0.380859375, "learning_rate": 0.0004885000177685909, "loss": 0.2062, "step": 70290 }, { "epoch": 2.91, "grad_norm": 0.333984375, "learning_rate": 0.0004884967660855979, "loss": 0.2119, "step": 70300 }, { "epoch": 2.91, "grad_norm": 0.333984375, "learning_rate": 0.0004884935139537797, "loss": 0.1902, "step": 70310 }, { "epoch": 2.91, "grad_norm": 3.203125, "learning_rate": 0.0004884902613731425, "loss": 0.2194, "step": 70320 }, { "epoch": 2.91, "grad_norm": 0.83203125, "learning_rate": 0.0004884870083436924, "loss": 0.2284, "step": 70330 }, { "epoch": 2.91, "grad_norm": 0.515625, "learning_rate": 0.0004884837548654355, "loss": 0.1885, "step": 70340 }, { "epoch": 2.91, "grad_norm": 1.0859375, "learning_rate": 0.0004884805009383779, "loss": 0.1983, "step": 70350 }, { "epoch": 2.91, "grad_norm": 0.84765625, "learning_rate": 0.0004884772465625258, "loss": 0.205, "step": 70360 }, { "epoch": 2.91, "grad_norm": 0.59765625, "learning_rate": 0.0004884739917378852, "loss": 0.2053, "step": 70370 }, { "epoch": 2.92, "grad_norm": 1.109375, "learning_rate": 0.0004884707364644624, "loss": 0.2269, "step": 70380 }, { "epoch": 2.92, "grad_norm": 0.6171875, "learning_rate": 0.0004884674807422633, "loss": 0.1974, "step": 70390 }, { "epoch": 2.92, "grad_norm": 0.16015625, "learning_rate": 0.0004884642245712943, "loss": 0.2495, "step": 70400 }, { "epoch": 2.92, "grad_norm": 0.63671875, "learning_rate": 0.0004884609679515613, "loss": 0.1745, "step": 70410 }, { "epoch": 2.92, "grad_norm": 0.6796875, "learning_rate": 0.0004884577108830705, "loss": 0.1893, "step": 70420 }, { "epoch": 2.92, "grad_norm": 0.2431640625, "learning_rate": 0.0004884544533658281, "loss": 0.2923, "step": 70430 }, { "epoch": 2.92, "grad_norm": 0.37890625, "learning_rate": 0.0004884511953998401, "loss": 0.1896, "step": 70440 }, { "epoch": 2.92, "grad_norm": 0.6328125, "learning_rate": 0.0004884479369851127, "loss": 0.169, "step": 70450 }, { "epoch": 2.92, "grad_norm": 0.33984375, "learning_rate": 0.000488444678121652, "loss": 0.1916, "step": 70460 }, { "epoch": 2.92, "grad_norm": 1.5078125, "learning_rate": 0.0004884414188094643, "loss": 0.2033, "step": 70470 }, { "epoch": 2.92, "grad_norm": 1.0859375, "learning_rate": 0.0004884381590485555, "loss": 0.2334, "step": 70480 }, { "epoch": 2.92, "grad_norm": 0.447265625, "learning_rate": 0.0004884348988389318, "loss": 0.2361, "step": 70490 }, { "epoch": 2.92, "grad_norm": 0.48046875, "learning_rate": 0.0004884316381805994, "loss": 0.2025, "step": 70500 }, { "epoch": 2.92, "grad_norm": 1.328125, "learning_rate": 0.0004884283770735645, "loss": 0.2322, "step": 70510 }, { "epoch": 2.92, "grad_norm": 2.140625, "learning_rate": 0.000488425115517833, "loss": 0.2072, "step": 70520 }, { "epoch": 2.92, "grad_norm": 0.400390625, "learning_rate": 0.0004884218535134113, "loss": 0.1674, "step": 70530 }, { "epoch": 2.92, "grad_norm": 0.87109375, "learning_rate": 0.0004884185910603054, "loss": 0.2209, "step": 70540 }, { "epoch": 2.92, "grad_norm": 0.640625, "learning_rate": 0.0004884153281585214, "loss": 0.2502, "step": 70550 }, { "epoch": 2.92, "grad_norm": 0.85546875, "learning_rate": 0.0004884120648080655, "loss": 0.2758, "step": 70560 }, { "epoch": 2.92, "grad_norm": 0.63671875, "learning_rate": 0.0004884088010089439, "loss": 0.2113, "step": 70570 }, { "epoch": 2.92, "grad_norm": 0.63671875, "learning_rate": 0.0004884055367611627, "loss": 0.2405, "step": 70580 }, { "epoch": 2.92, "grad_norm": 1.1953125, "learning_rate": 0.0004884022720647279, "loss": 0.2057, "step": 70590 }, { "epoch": 2.92, "grad_norm": 0.36328125, "learning_rate": 0.0004883990069196459, "loss": 0.1637, "step": 70600 }, { "epoch": 2.92, "grad_norm": 0.5703125, "learning_rate": 0.0004883957413259227, "loss": 0.2837, "step": 70610 }, { "epoch": 2.93, "grad_norm": 0.68359375, "learning_rate": 0.0004883924752835644, "loss": 0.2194, "step": 70620 }, { "epoch": 2.93, "grad_norm": 1.140625, "learning_rate": 0.0004883892087925772, "loss": 0.253, "step": 70630 }, { "epoch": 2.93, "grad_norm": 0.482421875, "learning_rate": 0.0004883859418529673, "loss": 0.2117, "step": 70640 }, { "epoch": 2.93, "grad_norm": 1.046875, "learning_rate": 0.0004883826744647408, "loss": 0.2425, "step": 70650 }, { "epoch": 2.93, "grad_norm": 0.734375, "learning_rate": 0.0004883794066279038, "loss": 0.2118, "step": 70660 }, { "epoch": 2.93, "grad_norm": 0.69140625, "learning_rate": 0.0004883761383424625, "loss": 0.1871, "step": 70670 }, { "epoch": 2.93, "grad_norm": 0.51953125, "learning_rate": 0.0004883728696084231, "loss": 0.2319, "step": 70680 }, { "epoch": 2.93, "grad_norm": 1.0234375, "learning_rate": 0.0004883696004257917, "loss": 0.1944, "step": 70690 }, { "epoch": 2.93, "grad_norm": 0.294921875, "learning_rate": 0.0004883663307945744, "loss": 0.1576, "step": 70700 }, { "epoch": 2.93, "grad_norm": 0.67578125, "learning_rate": 0.0004883630607147774, "loss": 0.2082, "step": 70710 }, { "epoch": 2.93, "grad_norm": 1.65625, "learning_rate": 0.0004883597901864069, "loss": 0.2155, "step": 70720 }, { "epoch": 2.93, "grad_norm": 0.9375, "learning_rate": 0.0004883565192094692, "loss": 0.2488, "step": 70730 }, { "epoch": 2.93, "grad_norm": 0.84765625, "learning_rate": 0.00048835324778397, "loss": 0.2035, "step": 70740 }, { "epoch": 2.93, "grad_norm": 0.59765625, "learning_rate": 0.0004883499759099158, "loss": 0.1541, "step": 70750 }, { "epoch": 2.93, "grad_norm": 0.369140625, "learning_rate": 0.0004883467035873127, "loss": 0.1718, "step": 70760 }, { "epoch": 2.93, "grad_norm": 0.81640625, "learning_rate": 0.0004883434308161668, "loss": 0.2228, "step": 70770 }, { "epoch": 2.93, "grad_norm": 0.0, "learning_rate": 0.0004883401575964843, "loss": 0.2292, "step": 70780 }, { "epoch": 2.93, "grad_norm": 0.7265625, "learning_rate": 0.0004883368839282714, "loss": 0.1967, "step": 70790 }, { "epoch": 2.93, "grad_norm": 0.5, "learning_rate": 0.0004883336098115343, "loss": 0.2399, "step": 70800 }, { "epoch": 2.93, "grad_norm": 0.8828125, "learning_rate": 0.000488330335246279, "loss": 0.1903, "step": 70810 }, { "epoch": 2.93, "grad_norm": 0.470703125, "learning_rate": 0.0004883270602325118, "loss": 0.2122, "step": 70820 }, { "epoch": 2.93, "grad_norm": 0.69140625, "learning_rate": 0.0004883237847702387, "loss": 0.2595, "step": 70830 }, { "epoch": 2.93, "grad_norm": 0.33203125, "learning_rate": 0.000488320508859466, "loss": 0.2184, "step": 70840 }, { "epoch": 2.93, "grad_norm": 0.73828125, "learning_rate": 0.0004883172325002, "loss": 0.2323, "step": 70850 }, { "epoch": 2.94, "grad_norm": 0.68359375, "learning_rate": 0.0004883139556924466, "loss": 0.2437, "step": 70860 }, { "epoch": 2.94, "grad_norm": 0.62109375, "learning_rate": 0.000488310678436212, "loss": 0.263, "step": 70870 }, { "epoch": 2.94, "grad_norm": 1.0546875, "learning_rate": 0.0004883074007315026, "loss": 0.1788, "step": 70880 }, { "epoch": 2.94, "grad_norm": 1.2109375, "learning_rate": 0.0004883041225783243, "loss": 0.2186, "step": 70890 }, { "epoch": 2.94, "grad_norm": 0.44140625, "learning_rate": 0.0004883008439766832, "loss": 0.2292, "step": 70900 }, { "epoch": 2.94, "grad_norm": 1.234375, "learning_rate": 0.0004882975649265859, "loss": 0.1891, "step": 70910 }, { "epoch": 2.94, "grad_norm": 0.67578125, "learning_rate": 0.0004882942854280383, "loss": 0.1882, "step": 70920 }, { "epoch": 2.94, "grad_norm": 0.353515625, "learning_rate": 0.0004882910054810466, "loss": 0.197, "step": 70930 }, { "epoch": 2.94, "grad_norm": 0.703125, "learning_rate": 0.0004882877250856168, "loss": 0.2776, "step": 70940 }, { "epoch": 2.94, "grad_norm": 1.0625, "learning_rate": 0.0004882844442417553, "loss": 0.2025, "step": 70950 }, { "epoch": 2.94, "grad_norm": 0.80078125, "learning_rate": 0.0004882811629494683, "loss": 0.236, "step": 70960 }, { "epoch": 2.94, "grad_norm": 0.6875, "learning_rate": 0.00048827788120876185, "loss": 0.1906, "step": 70970 }, { "epoch": 2.94, "grad_norm": 0.9375, "learning_rate": 0.0004882745990196421, "loss": 0.2398, "step": 70980 }, { "epoch": 2.94, "grad_norm": 0.828125, "learning_rate": 0.0004882713163821153, "loss": 0.1863, "step": 70990 }, { "epoch": 2.94, "grad_norm": 0.3828125, "learning_rate": 0.0004882680332961876, "loss": 0.1789, "step": 71000 }, { "epoch": 2.94, "grad_norm": 1.296875, "learning_rate": 0.0004882647497618652, "loss": 0.2164, "step": 71010 }, { "epoch": 2.94, "grad_norm": 1.7734375, "learning_rate": 0.0004882614657791543, "loss": 0.2398, "step": 71020 }, { "epoch": 2.94, "grad_norm": 0.365234375, "learning_rate": 0.0004882581813480611, "loss": 0.2221, "step": 71030 }, { "epoch": 2.94, "grad_norm": 0.1474609375, "learning_rate": 0.00048825489646859167, "loss": 0.2201, "step": 71040 }, { "epoch": 2.94, "grad_norm": 0.65625, "learning_rate": 0.00048825161114075225, "loss": 0.2051, "step": 71050 }, { "epoch": 2.94, "grad_norm": 0.42578125, "learning_rate": 0.000488248325364549, "loss": 0.2748, "step": 71060 }, { "epoch": 2.94, "grad_norm": 0.88671875, "learning_rate": 0.0004882450391399882, "loss": 0.2223, "step": 71070 }, { "epoch": 2.94, "grad_norm": 1.0390625, "learning_rate": 0.00048824175246707594, "loss": 0.2607, "step": 71080 }, { "epoch": 2.94, "grad_norm": 1.0625, "learning_rate": 0.0004882384653458184, "loss": 0.2646, "step": 71090 }, { "epoch": 2.94, "grad_norm": 1.21875, "learning_rate": 0.00048823517777622186, "loss": 0.2231, "step": 71100 }, { "epoch": 2.95, "grad_norm": 1.0234375, "learning_rate": 0.0004882318897582924, "loss": 0.2075, "step": 71110 }, { "epoch": 2.95, "grad_norm": 1.21875, "learning_rate": 0.0004882286012920362, "loss": 0.2393, "step": 71120 }, { "epoch": 2.95, "grad_norm": 0.69921875, "learning_rate": 0.0004882253123774595, "loss": 0.2536, "step": 71130 }, { "epoch": 2.95, "grad_norm": 1.953125, "learning_rate": 0.00048822202301456857, "loss": 0.234, "step": 71140 }, { "epoch": 2.95, "grad_norm": 0.890625, "learning_rate": 0.0004882187332033695, "loss": 0.2563, "step": 71150 }, { "epoch": 2.95, "grad_norm": 0.78515625, "learning_rate": 0.00048821544294386845, "loss": 0.2201, "step": 71160 }, { "epoch": 2.95, "grad_norm": 0.6953125, "learning_rate": 0.00048821215223607175, "loss": 0.1954, "step": 71170 }, { "epoch": 2.95, "grad_norm": 1.8203125, "learning_rate": 0.00048820886107998543, "loss": 0.2505, "step": 71180 }, { "epoch": 2.95, "grad_norm": 0.0, "learning_rate": 0.00048820556947561577, "loss": 0.2222, "step": 71190 }, { "epoch": 2.95, "grad_norm": 0.71875, "learning_rate": 0.00048820227742296896, "loss": 0.2207, "step": 71200 }, { "epoch": 2.95, "grad_norm": 1.125, "learning_rate": 0.00048819898492205116, "loss": 0.2234, "step": 71210 }, { "epoch": 2.95, "grad_norm": 0.65234375, "learning_rate": 0.0004881956919728686, "loss": 0.2822, "step": 71220 }, { "epoch": 2.95, "grad_norm": 1.140625, "learning_rate": 0.0004881923985754275, "loss": 0.2253, "step": 71230 }, { "epoch": 2.95, "grad_norm": 0.66015625, "learning_rate": 0.00048818910472973404, "loss": 0.2631, "step": 71240 }, { "epoch": 2.95, "grad_norm": 0.6953125, "learning_rate": 0.0004881858104357944, "loss": 0.2246, "step": 71250 }, { "epoch": 2.95, "grad_norm": 0.306640625, "learning_rate": 0.00048818251569361475, "loss": 0.1991, "step": 71260 }, { "epoch": 2.95, "grad_norm": 0.30078125, "learning_rate": 0.0004881792205032014, "loss": 0.2254, "step": 71270 }, { "epoch": 2.95, "grad_norm": 0.984375, "learning_rate": 0.0004881759248645604, "loss": 0.2035, "step": 71280 }, { "epoch": 2.95, "grad_norm": 1.046875, "learning_rate": 0.0004881726287776981, "loss": 0.1759, "step": 71290 }, { "epoch": 2.95, "grad_norm": 0.71875, "learning_rate": 0.00048816933224262056, "loss": 0.1803, "step": 71300 }, { "epoch": 2.95, "grad_norm": 1.2578125, "learning_rate": 0.0004881660352593341, "loss": 0.206, "step": 71310 }, { "epoch": 2.95, "grad_norm": 0.84375, "learning_rate": 0.0004881627378278448, "loss": 0.2679, "step": 71320 }, { "epoch": 2.95, "grad_norm": 0.400390625, "learning_rate": 0.000488159439948159, "loss": 0.1987, "step": 71330 }, { "epoch": 2.95, "grad_norm": 1.0859375, "learning_rate": 0.00048815614162028294, "loss": 0.2294, "step": 71340 }, { "epoch": 2.96, "grad_norm": 0.40234375, "learning_rate": 0.0004881528428442227, "loss": 0.2184, "step": 71350 }, { "epoch": 2.96, "grad_norm": 0.494140625, "learning_rate": 0.00048814954361998443, "loss": 0.1522, "step": 71360 }, { "epoch": 2.96, "grad_norm": 0.828125, "learning_rate": 0.00048814624394757455, "loss": 0.1758, "step": 71370 }, { "epoch": 2.96, "grad_norm": 0.640625, "learning_rate": 0.0004881429438269991, "loss": 0.2968, "step": 71380 }, { "epoch": 2.96, "grad_norm": 0.193359375, "learning_rate": 0.0004881396432582643, "loss": 0.2377, "step": 71390 }, { "epoch": 2.96, "grad_norm": 0.48828125, "learning_rate": 0.0004881363422413765, "loss": 0.2281, "step": 71400 }, { "epoch": 2.96, "grad_norm": 0.427734375, "learning_rate": 0.0004881330407763418, "loss": 0.2063, "step": 71410 }, { "epoch": 2.96, "grad_norm": 0.482421875, "learning_rate": 0.0004881297388631664, "loss": 0.1654, "step": 71420 }, { "epoch": 2.96, "grad_norm": 0.765625, "learning_rate": 0.0004881264365018566, "loss": 0.2251, "step": 71430 }, { "epoch": 2.96, "grad_norm": 0.62890625, "learning_rate": 0.0004881231336924185, "loss": 0.2569, "step": 71440 }, { "epoch": 2.96, "grad_norm": 0.423828125, "learning_rate": 0.00048811983043485843, "loss": 0.1904, "step": 71450 }, { "epoch": 2.96, "grad_norm": 0.67578125, "learning_rate": 0.0004881165267291825, "loss": 0.233, "step": 71460 }, { "epoch": 2.96, "grad_norm": 0.74609375, "learning_rate": 0.00048811322257539706, "loss": 0.2037, "step": 71470 }, { "epoch": 2.96, "grad_norm": 2.9375, "learning_rate": 0.00048810991797350816, "loss": 0.1803, "step": 71480 }, { "epoch": 2.96, "grad_norm": 2.296875, "learning_rate": 0.00048810661292352214, "loss": 0.1891, "step": 71490 }, { "epoch": 2.96, "grad_norm": 0.859375, "learning_rate": 0.0004881033074254452, "loss": 0.3033, "step": 71500 }, { "epoch": 2.96, "grad_norm": 0.55078125, "learning_rate": 0.0004881000014792836, "loss": 0.2606, "step": 71510 }, { "epoch": 2.96, "grad_norm": 0.67578125, "learning_rate": 0.0004880966950850434, "loss": 0.2012, "step": 71520 }, { "epoch": 2.96, "grad_norm": 0.76953125, "learning_rate": 0.000488093388242731, "loss": 0.1895, "step": 71530 }, { "epoch": 2.96, "grad_norm": 0.462890625, "learning_rate": 0.0004880900809523525, "loss": 0.1695, "step": 71540 }, { "epoch": 2.96, "grad_norm": 0.90234375, "learning_rate": 0.00048808677321391427, "loss": 0.1795, "step": 71550 }, { "epoch": 2.96, "grad_norm": 1.53125, "learning_rate": 0.0004880834650274224, "loss": 0.2306, "step": 71560 }, { "epoch": 2.96, "grad_norm": 0.294921875, "learning_rate": 0.0004880801563928832, "loss": 0.2433, "step": 71570 }, { "epoch": 2.96, "grad_norm": 1.0625, "learning_rate": 0.00048807684731030275, "loss": 0.2928, "step": 71580 }, { "epoch": 2.97, "grad_norm": 0.46875, "learning_rate": 0.00048807353777968743, "loss": 0.2403, "step": 71590 }, { "epoch": 2.97, "grad_norm": 0.3515625, "learning_rate": 0.0004880702278010435, "loss": 0.1902, "step": 71600 }, { "epoch": 2.97, "grad_norm": 0.55078125, "learning_rate": 0.00048806691737437704, "loss": 0.2623, "step": 71610 }, { "epoch": 2.97, "grad_norm": 1.4609375, "learning_rate": 0.00048806360649969433, "loss": 0.1771, "step": 71620 }, { "epoch": 2.97, "grad_norm": 0.63671875, "learning_rate": 0.00048806029517700167, "loss": 0.2745, "step": 71630 }, { "epoch": 2.97, "grad_norm": 0.6796875, "learning_rate": 0.00048805698340630523, "loss": 0.2442, "step": 71640 }, { "epoch": 2.97, "grad_norm": 0.484375, "learning_rate": 0.00048805367118761125, "loss": 0.1739, "step": 71650 }, { "epoch": 2.97, "grad_norm": 0.5859375, "learning_rate": 0.00048805035852092596, "loss": 0.2743, "step": 71660 }, { "epoch": 2.97, "grad_norm": 0.5078125, "learning_rate": 0.00048804704540625555, "loss": 0.2061, "step": 71670 }, { "epoch": 2.97, "grad_norm": 0.609375, "learning_rate": 0.0004880437318436064, "loss": 0.2508, "step": 71680 }, { "epoch": 2.97, "grad_norm": 0.90625, "learning_rate": 0.0004880404178329846, "loss": 0.195, "step": 71690 }, { "epoch": 2.97, "grad_norm": 0.52734375, "learning_rate": 0.0004880371033743964, "loss": 0.2486, "step": 71700 }, { "epoch": 2.97, "grad_norm": 0.59375, "learning_rate": 0.00048803378846784816, "loss": 0.2084, "step": 71710 }, { "epoch": 2.97, "grad_norm": 0.51953125, "learning_rate": 0.00048803047311334605, "loss": 0.1868, "step": 71720 }, { "epoch": 2.97, "grad_norm": 0.640625, "learning_rate": 0.0004880271573108963, "loss": 0.2051, "step": 71730 }, { "epoch": 2.97, "grad_norm": 0.43359375, "learning_rate": 0.000488023841060505, "loss": 0.1991, "step": 71740 }, { "epoch": 2.97, "grad_norm": 1.5234375, "learning_rate": 0.0004880205243621787, "loss": 0.2575, "step": 71750 }, { "epoch": 2.97, "grad_norm": 0.353515625, "learning_rate": 0.00048801720721592333, "loss": 0.2823, "step": 71760 }, { "epoch": 2.97, "grad_norm": 1.0625, "learning_rate": 0.00048801388962174544, "loss": 0.2158, "step": 71770 }, { "epoch": 2.97, "grad_norm": 1.03125, "learning_rate": 0.000488010571579651, "loss": 0.2444, "step": 71780 }, { "epoch": 2.97, "grad_norm": 0.369140625, "learning_rate": 0.00048800725308964645, "loss": 0.1964, "step": 71790 }, { "epoch": 2.97, "grad_norm": 0.478515625, "learning_rate": 0.0004880039341517379, "loss": 0.1923, "step": 71800 }, { "epoch": 2.97, "grad_norm": 0.73828125, "learning_rate": 0.0004880006147659316, "loss": 0.1948, "step": 71810 }, { "epoch": 2.97, "grad_norm": 0.62890625, "learning_rate": 0.0004879972949322339, "loss": 0.2379, "step": 71820 }, { "epoch": 2.98, "grad_norm": 0.8125, "learning_rate": 0.000487993974650651, "loss": 0.192, "step": 71830 }, { "epoch": 2.98, "grad_norm": 0.62890625, "learning_rate": 0.0004879906539211891, "loss": 0.2021, "step": 71840 }, { "epoch": 2.98, "grad_norm": 0.92578125, "learning_rate": 0.00048798733274385455, "loss": 0.2294, "step": 71850 }, { "epoch": 2.98, "grad_norm": 0.431640625, "learning_rate": 0.00048798401111865353, "loss": 0.2164, "step": 71860 }, { "epoch": 2.98, "grad_norm": 1.2734375, "learning_rate": 0.00048798068904559223, "loss": 0.1919, "step": 71870 }, { "epoch": 2.98, "grad_norm": 0.5625, "learning_rate": 0.00048797736652467704, "loss": 0.2509, "step": 71880 }, { "epoch": 2.98, "grad_norm": 0.5703125, "learning_rate": 0.00048797404355591415, "loss": 0.2571, "step": 71890 }, { "epoch": 2.98, "grad_norm": 1.015625, "learning_rate": 0.0004879707201393098, "loss": 0.2226, "step": 71900 }, { "epoch": 2.98, "grad_norm": 0.0, "learning_rate": 0.00048796739627487024, "loss": 0.1937, "step": 71910 }, { "epoch": 2.98, "grad_norm": 0.53125, "learning_rate": 0.00048796407196260175, "loss": 0.2193, "step": 71920 }, { "epoch": 2.98, "grad_norm": 1.0625, "learning_rate": 0.00048796074720251054, "loss": 0.2114, "step": 71930 }, { "epoch": 2.98, "grad_norm": 0.404296875, "learning_rate": 0.0004879574219946029, "loss": 0.2087, "step": 71940 }, { "epoch": 2.98, "grad_norm": 0.640625, "learning_rate": 0.00048795409633888514, "loss": 0.2018, "step": 71950 }, { "epoch": 2.98, "grad_norm": 0.65234375, "learning_rate": 0.0004879507702353635, "loss": 0.2279, "step": 71960 }, { "epoch": 2.98, "grad_norm": 0.41796875, "learning_rate": 0.00048794744368404406, "loss": 0.2306, "step": 71970 }, { "epoch": 2.98, "grad_norm": 0.56640625, "learning_rate": 0.0004879441166849334, "loss": 0.2042, "step": 71980 }, { "epoch": 2.98, "grad_norm": 0.58984375, "learning_rate": 0.0004879407892380375, "loss": 0.1775, "step": 71990 }, { "epoch": 2.98, "grad_norm": 0.6875, "learning_rate": 0.00048793746134336274, "loss": 0.2049, "step": 72000 }, { "epoch": 2.98, "grad_norm": 0.98046875, "learning_rate": 0.0004879341330009154, "loss": 0.2592, "step": 72010 }, { "epoch": 2.98, "grad_norm": 0.09912109375, "learning_rate": 0.0004879308042107017, "loss": 0.2273, "step": 72020 }, { "epoch": 2.98, "grad_norm": 0.1787109375, "learning_rate": 0.00048792747497272796, "loss": 0.2029, "step": 72030 }, { "epoch": 2.98, "grad_norm": 0.423828125, "learning_rate": 0.00048792414528700035, "loss": 0.2187, "step": 72040 }, { "epoch": 2.98, "grad_norm": 0.90625, "learning_rate": 0.00048792081515352517, "loss": 0.2199, "step": 72050 }, { "epoch": 2.98, "grad_norm": 0.353515625, "learning_rate": 0.0004879174845723088, "loss": 0.2342, "step": 72060 }, { "epoch": 2.99, "grad_norm": 1.03125, "learning_rate": 0.00048791415354335733, "loss": 0.2126, "step": 72070 }, { "epoch": 2.99, "grad_norm": 1.6015625, "learning_rate": 0.00048791082206667714, "loss": 0.1571, "step": 72080 }, { "epoch": 2.99, "grad_norm": 0.353515625, "learning_rate": 0.0004879074901422745, "loss": 0.2076, "step": 72090 }, { "epoch": 2.99, "grad_norm": 0.84375, "learning_rate": 0.00048790415777015566, "loss": 0.2469, "step": 72100 }, { "epoch": 2.99, "grad_norm": 0.361328125, "learning_rate": 0.0004879008249503269, "loss": 0.1783, "step": 72110 }, { "epoch": 2.99, "grad_norm": 0.3671875, "learning_rate": 0.0004878974916827944, "loss": 0.2376, "step": 72120 }, { "epoch": 2.99, "grad_norm": 0.546875, "learning_rate": 0.0004878941579675646, "loss": 0.2034, "step": 72130 }, { "epoch": 2.99, "grad_norm": 0.61328125, "learning_rate": 0.0004878908238046437, "loss": 0.2649, "step": 72140 }, { "epoch": 2.99, "grad_norm": 0.87890625, "learning_rate": 0.0004878874891940378, "loss": 0.1896, "step": 72150 }, { "epoch": 2.99, "grad_norm": 0.484375, "learning_rate": 0.0004878841541357535, "loss": 0.2109, "step": 72160 }, { "epoch": 2.99, "grad_norm": 0.94140625, "learning_rate": 0.0004878808186297968, "loss": 0.2062, "step": 72170 }, { "epoch": 2.99, "grad_norm": 0.57421875, "learning_rate": 0.00048787748267617415, "loss": 0.1673, "step": 72180 }, { "epoch": 2.99, "grad_norm": 0.68359375, "learning_rate": 0.0004878741462748918, "loss": 0.2011, "step": 72190 }, { "epoch": 2.99, "grad_norm": 0.275390625, "learning_rate": 0.00048787080942595595, "loss": 0.2357, "step": 72200 }, { "epoch": 2.99, "grad_norm": 0.75390625, "learning_rate": 0.0004878674721293729, "loss": 0.1926, "step": 72210 }, { "epoch": 2.99, "grad_norm": 0.8046875, "learning_rate": 0.00048786413438514897, "loss": 0.2105, "step": 72220 }, { "epoch": 2.99, "grad_norm": 0.51171875, "learning_rate": 0.0004878607961932905, "loss": 0.2121, "step": 72230 }, { "epoch": 2.99, "grad_norm": 0.69140625, "learning_rate": 0.0004878574575538036, "loss": 0.2099, "step": 72240 }, { "epoch": 2.99, "grad_norm": 0.478515625, "learning_rate": 0.0004878541184666947, "loss": 0.2236, "step": 72250 }, { "epoch": 2.99, "grad_norm": 0.53515625, "learning_rate": 0.00048785077893197, "loss": 0.2327, "step": 72260 }, { "epoch": 2.99, "grad_norm": 0.921875, "learning_rate": 0.00048784743894963583, "loss": 0.23, "step": 72270 }, { "epoch": 2.99, "grad_norm": 0.6015625, "learning_rate": 0.00048784409851969845, "loss": 0.2363, "step": 72280 }, { "epoch": 2.99, "grad_norm": 0.361328125, "learning_rate": 0.00048784075764216425, "loss": 0.2224, "step": 72290 }, { "epoch": 2.99, "grad_norm": 0.51171875, "learning_rate": 0.00048783741631703935, "loss": 0.2215, "step": 72300 }, { "epoch": 3.0, "grad_norm": 0.77734375, "learning_rate": 0.00048783407454433016, "loss": 0.268, "step": 72310 }, { "epoch": 3.0, "grad_norm": 0.91796875, "learning_rate": 0.00048783073232404285, "loss": 0.2132, "step": 72320 }, { "epoch": 3.0, "grad_norm": 0.5625, "learning_rate": 0.00048782738965618387, "loss": 0.2695, "step": 72330 }, { "epoch": 3.0, "grad_norm": 0.5703125, "learning_rate": 0.0004878240465407594, "loss": 0.2792, "step": 72340 }, { "epoch": 3.0, "grad_norm": 0.62890625, "learning_rate": 0.0004878207029777757, "loss": 0.2346, "step": 72350 }, { "epoch": 3.0, "grad_norm": 0.55078125, "learning_rate": 0.0004878173589672392, "loss": 0.2275, "step": 72360 }, { "epoch": 3.0, "grad_norm": 0.458984375, "learning_rate": 0.00048781401450915607, "loss": 0.2062, "step": 72370 }, { "epoch": 3.0, "grad_norm": 0.7265625, "learning_rate": 0.00048781066960353265, "loss": 0.2178, "step": 72380 }, { "epoch": 3.0, "grad_norm": 0.412109375, "learning_rate": 0.00048780732425037523, "loss": 0.1775, "step": 72390 }, { "epoch": 3.0, "grad_norm": 0.8359375, "learning_rate": 0.00048780397844969017, "loss": 0.2349, "step": 72400 }, { "epoch": 3.0, "grad_norm": 0.6015625, "learning_rate": 0.00048780063220148364, "loss": 0.1752, "step": 72410 }, { "epoch": 3.0, "grad_norm": 0.53125, "learning_rate": 0.000487797285505762, "loss": 0.181, "step": 72420 }, { "epoch": 3.0, "grad_norm": 0.73046875, "learning_rate": 0.0004877939383625315, "loss": 0.2107, "step": 72430 }, { "epoch": 3.0, "grad_norm": 0.640625, "learning_rate": 0.00048779059077179855, "loss": 0.1439, "step": 72440 }, { "epoch": 3.0, "grad_norm": 0.7578125, "learning_rate": 0.0004877872427335694, "loss": 0.2217, "step": 72450 }, { "epoch": 3.0, "grad_norm": 0.486328125, "learning_rate": 0.0004877838942478503, "loss": 0.1862, "step": 72460 }, { "epoch": 3.0, "grad_norm": 1.265625, "learning_rate": 0.00048778054531464765, "loss": 0.243, "step": 72470 }, { "epoch": 3.0, "grad_norm": 0.423828125, "learning_rate": 0.0004877771959339676, "loss": 0.1798, "step": 72480 }, { "epoch": 3.0, "grad_norm": 0.98046875, "learning_rate": 0.00048777384610581664, "loss": 0.217, "step": 72490 }, { "epoch": 3.0, "grad_norm": 0.625, "learning_rate": 0.00048777049583020095, "loss": 0.2105, "step": 72500 }, { "epoch": 3.0, "grad_norm": 0.60546875, "learning_rate": 0.00048776714510712684, "loss": 0.1668, "step": 72510 }, { "epoch": 3.0, "grad_norm": 0.54296875, "learning_rate": 0.00048776379393660065, "loss": 0.1755, "step": 72520 }, { "epoch": 3.0, "grad_norm": 0.43359375, "learning_rate": 0.00048776044231862867, "loss": 0.1925, "step": 72530 }, { "epoch": 3.0, "grad_norm": 0.75390625, "learning_rate": 0.0004877570902532172, "loss": 0.1652, "step": 72540 }, { "epoch": 3.01, "grad_norm": 0.416015625, "learning_rate": 0.0004877537377403726, "loss": 0.2013, "step": 72550 }, { "epoch": 3.01, "grad_norm": 1.21875, "learning_rate": 0.0004877503847801011, "loss": 0.2054, "step": 72560 }, { "epoch": 3.01, "grad_norm": 0.796875, "learning_rate": 0.00048774703137240903, "loss": 0.2, "step": 72570 }, { "epoch": 3.01, "grad_norm": 1.5078125, "learning_rate": 0.00048774367751730274, "loss": 0.166, "step": 72580 }, { "epoch": 3.01, "grad_norm": 0.41015625, "learning_rate": 0.00048774032321478855, "loss": 0.1647, "step": 72590 }, { "epoch": 3.01, "grad_norm": 0.52734375, "learning_rate": 0.0004877369684648728, "loss": 0.2036, "step": 72600 }, { "epoch": 3.01, "grad_norm": 0.84375, "learning_rate": 0.00048773361326756164, "loss": 0.1787, "step": 72610 }, { "epoch": 3.01, "grad_norm": 0.50390625, "learning_rate": 0.0004877302576228616, "loss": 0.2388, "step": 72620 }, { "epoch": 3.01, "grad_norm": 1.234375, "learning_rate": 0.0004877269015307788, "loss": 0.2288, "step": 72630 }, { "epoch": 3.01, "grad_norm": 0.48046875, "learning_rate": 0.00048772354499131967, "loss": 0.2394, "step": 72640 }, { "epoch": 3.01, "grad_norm": 0.67578125, "learning_rate": 0.0004877201880044905, "loss": 0.2308, "step": 72650 }, { "epoch": 3.01, "grad_norm": 4.09375, "learning_rate": 0.00048771683057029756, "loss": 0.1905, "step": 72660 }, { "epoch": 3.01, "grad_norm": 0.59765625, "learning_rate": 0.0004877134726887473, "loss": 0.2068, "step": 72670 }, { "epoch": 3.01, "grad_norm": 0.55078125, "learning_rate": 0.0004877101143598459, "loss": 0.2087, "step": 72680 }, { "epoch": 3.01, "grad_norm": 2.234375, "learning_rate": 0.0004877067555835998, "loss": 0.2214, "step": 72690 }, { "epoch": 3.01, "grad_norm": 0.53125, "learning_rate": 0.00048770339636001526, "loss": 0.2059, "step": 72700 }, { "epoch": 3.01, "grad_norm": 0.38671875, "learning_rate": 0.0004877000366890985, "loss": 0.2037, "step": 72710 }, { "epoch": 3.01, "grad_norm": 0.76953125, "learning_rate": 0.000487696676570856, "loss": 0.194, "step": 72720 }, { "epoch": 3.01, "grad_norm": 0.66015625, "learning_rate": 0.00048769331600529396, "loss": 0.2644, "step": 72730 }, { "epoch": 3.01, "grad_norm": 0.75, "learning_rate": 0.0004876899549924189, "loss": 0.2014, "step": 72740 }, { "epoch": 3.01, "grad_norm": 0.40625, "learning_rate": 0.0004876865935322369, "loss": 0.1983, "step": 72750 }, { "epoch": 3.01, "grad_norm": 0.546875, "learning_rate": 0.00048768323162475446, "loss": 0.2287, "step": 72760 }, { "epoch": 3.01, "grad_norm": 0.578125, "learning_rate": 0.0004876798692699778, "loss": 0.1679, "step": 72770 }, { "epoch": 3.01, "grad_norm": 1.2890625, "learning_rate": 0.00048767650646791333, "loss": 0.2739, "step": 72780 }, { "epoch": 3.01, "grad_norm": 0.486328125, "learning_rate": 0.0004876731432185673, "loss": 0.1546, "step": 72790 }, { "epoch": 3.02, "grad_norm": 0.6640625, "learning_rate": 0.0004876697795219461, "loss": 0.2504, "step": 72800 }, { "epoch": 3.02, "grad_norm": 0.486328125, "learning_rate": 0.00048766641537805605, "loss": 0.1978, "step": 72810 }, { "epoch": 3.02, "grad_norm": 0.546875, "learning_rate": 0.0004876630507869035, "loss": 0.188, "step": 72820 }, { "epoch": 3.02, "grad_norm": 1.1484375, "learning_rate": 0.0004876596857484947, "loss": 0.2741, "step": 72830 }, { "epoch": 3.02, "grad_norm": 0.6015625, "learning_rate": 0.00048765632026283605, "loss": 0.1905, "step": 72840 }, { "epoch": 3.02, "grad_norm": 0.203125, "learning_rate": 0.0004876529543299339, "loss": 0.2008, "step": 72850 }, { "epoch": 3.02, "grad_norm": 0.58203125, "learning_rate": 0.00048764958794979453, "loss": 0.196, "step": 72860 }, { "epoch": 3.02, "grad_norm": 0.369140625, "learning_rate": 0.0004876462211224243, "loss": 0.1902, "step": 72870 }, { "epoch": 3.02, "grad_norm": 0.515625, "learning_rate": 0.00048764285384782956, "loss": 0.2203, "step": 72880 }, { "epoch": 3.02, "grad_norm": 1.859375, "learning_rate": 0.0004876394861260166, "loss": 0.2204, "step": 72890 }, { "epoch": 3.02, "grad_norm": 0.6171875, "learning_rate": 0.00048763611795699184, "loss": 0.2486, "step": 72900 }, { "epoch": 3.02, "grad_norm": 0.625, "learning_rate": 0.00048763274934076153, "loss": 0.1858, "step": 72910 }, { "epoch": 3.02, "grad_norm": 0.484375, "learning_rate": 0.00048762938027733203, "loss": 0.2007, "step": 72920 }, { "epoch": 3.02, "grad_norm": 0.48046875, "learning_rate": 0.00048762601076670975, "loss": 0.1984, "step": 72930 }, { "epoch": 3.02, "grad_norm": 0.6875, "learning_rate": 0.00048762264080890095, "loss": 0.2159, "step": 72940 }, { "epoch": 3.02, "grad_norm": 0.3359375, "learning_rate": 0.000487619270403912, "loss": 0.1565, "step": 72950 }, { "epoch": 3.02, "grad_norm": 0.6328125, "learning_rate": 0.0004876158995517492, "loss": 0.2507, "step": 72960 }, { "epoch": 3.02, "grad_norm": 0.88671875, "learning_rate": 0.0004876125282524191, "loss": 0.2102, "step": 72970 }, { "epoch": 3.02, "grad_norm": 0.95703125, "learning_rate": 0.00048760915650592775, "loss": 0.2121, "step": 72980 }, { "epoch": 3.02, "grad_norm": 1.0, "learning_rate": 0.00048760578431228163, "loss": 0.2234, "step": 72990 }, { "epoch": 3.02, "grad_norm": 1.0234375, "learning_rate": 0.0004876024116714871, "loss": 0.1907, "step": 73000 }, { "epoch": 3.02, "grad_norm": 0.46875, "learning_rate": 0.0004875990385835505, "loss": 0.2353, "step": 73010 }, { "epoch": 3.02, "grad_norm": 0.85546875, "learning_rate": 0.00048759566504847816, "loss": 0.1463, "step": 73020 }, { "epoch": 3.02, "grad_norm": 1.1171875, "learning_rate": 0.0004875922910662764, "loss": 0.1872, "step": 73030 }, { "epoch": 3.03, "grad_norm": 1.1953125, "learning_rate": 0.0004875889166369516, "loss": 0.1915, "step": 73040 }, { "epoch": 3.03, "grad_norm": 1.0859375, "learning_rate": 0.00048758554176051017, "loss": 0.277, "step": 73050 }, { "epoch": 3.03, "grad_norm": 1.6640625, "learning_rate": 0.0004875821664369584, "loss": 0.1807, "step": 73060 }, { "epoch": 3.03, "grad_norm": 0.578125, "learning_rate": 0.0004875787906663027, "loss": 0.1922, "step": 73070 }, { "epoch": 3.03, "grad_norm": 1.21875, "learning_rate": 0.00048757541444854923, "loss": 0.1925, "step": 73080 }, { "epoch": 3.03, "grad_norm": 0.67578125, "learning_rate": 0.00048757203778370463, "loss": 0.1845, "step": 73090 }, { "epoch": 3.03, "grad_norm": 0.86328125, "learning_rate": 0.00048756866067177506, "loss": 0.1858, "step": 73100 }, { "epoch": 3.03, "grad_norm": 1.390625, "learning_rate": 0.0004875652831127669, "loss": 0.195, "step": 73110 }, { "epoch": 3.03, "grad_norm": 0.98046875, "learning_rate": 0.0004875619051066865, "loss": 0.2507, "step": 73120 }, { "epoch": 3.03, "grad_norm": 0.671875, "learning_rate": 0.00048755852665354035, "loss": 0.2051, "step": 73130 }, { "epoch": 3.03, "grad_norm": 1.1953125, "learning_rate": 0.00048755514775333466, "loss": 0.2253, "step": 73140 }, { "epoch": 3.03, "grad_norm": 0.859375, "learning_rate": 0.0004875517684060758, "loss": 0.2032, "step": 73150 }, { "epoch": 3.03, "grad_norm": 0.828125, "learning_rate": 0.0004875483886117702, "loss": 0.2175, "step": 73160 }, { "epoch": 3.03, "grad_norm": 0.94140625, "learning_rate": 0.0004875450083704242, "loss": 0.1815, "step": 73170 }, { "epoch": 3.03, "grad_norm": 0.8984375, "learning_rate": 0.0004875416276820441, "loss": 0.1723, "step": 73180 }, { "epoch": 3.03, "grad_norm": 0.390625, "learning_rate": 0.0004875382465466364, "loss": 0.2832, "step": 73190 }, { "epoch": 3.03, "grad_norm": 0.8671875, "learning_rate": 0.00048753486496420727, "loss": 0.1818, "step": 73200 }, { "epoch": 3.03, "grad_norm": 0.421875, "learning_rate": 0.00048753148293476323, "loss": 0.2538, "step": 73210 }, { "epoch": 3.03, "grad_norm": 0.470703125, "learning_rate": 0.00048752810045831055, "loss": 0.2147, "step": 73220 }, { "epoch": 3.03, "grad_norm": 0.76953125, "learning_rate": 0.00048752471753485567, "loss": 0.1986, "step": 73230 }, { "epoch": 3.03, "grad_norm": 0.431640625, "learning_rate": 0.0004875213341644049, "loss": 0.1704, "step": 73240 }, { "epoch": 3.03, "grad_norm": 0.9609375, "learning_rate": 0.00048751795034696467, "loss": 0.2443, "step": 73250 }, { "epoch": 3.03, "grad_norm": 0.53125, "learning_rate": 0.0004875145660825413, "loss": 0.1706, "step": 73260 }, { "epoch": 3.03, "grad_norm": 0.703125, "learning_rate": 0.00048751118137114113, "loss": 0.2349, "step": 73270 }, { "epoch": 3.04, "grad_norm": 0.578125, "learning_rate": 0.00048750779621277055, "loss": 0.2239, "step": 73280 }, { "epoch": 3.04, "grad_norm": 0.625, "learning_rate": 0.000487504410607436, "loss": 0.211, "step": 73290 }, { "epoch": 3.04, "grad_norm": 1.28125, "learning_rate": 0.0004875010245551438, "loss": 0.2575, "step": 73300 }, { "epoch": 3.04, "grad_norm": 0.2001953125, "learning_rate": 0.00048749763805590025, "loss": 0.1918, "step": 73310 }, { "epoch": 3.04, "grad_norm": 1.3359375, "learning_rate": 0.00048749425110971185, "loss": 0.2217, "step": 73320 }, { "epoch": 3.04, "grad_norm": 0.68359375, "learning_rate": 0.0004874908637165849, "loss": 0.1684, "step": 73330 }, { "epoch": 3.04, "grad_norm": 0.396484375, "learning_rate": 0.00048748747587652573, "loss": 0.2268, "step": 73340 }, { "epoch": 3.04, "grad_norm": 0.953125, "learning_rate": 0.00048748408758954087, "loss": 0.2751, "step": 73350 }, { "epoch": 3.04, "grad_norm": 0.439453125, "learning_rate": 0.0004874806988556366, "loss": 0.2484, "step": 73360 }, { "epoch": 3.04, "grad_norm": 0.48046875, "learning_rate": 0.0004874773096748193, "loss": 0.2622, "step": 73370 }, { "epoch": 3.04, "grad_norm": 1.2890625, "learning_rate": 0.00048747392004709535, "loss": 0.1791, "step": 73380 }, { "epoch": 3.04, "grad_norm": 0.84765625, "learning_rate": 0.00048747052997247107, "loss": 0.2235, "step": 73390 }, { "epoch": 3.04, "grad_norm": 0.49609375, "learning_rate": 0.0004874671394509529, "loss": 0.2666, "step": 73400 }, { "epoch": 3.04, "grad_norm": 0.55078125, "learning_rate": 0.00048746374848254725, "loss": 0.2475, "step": 73410 }, { "epoch": 3.04, "grad_norm": 0.79296875, "learning_rate": 0.0004874603570672604, "loss": 0.1935, "step": 73420 }, { "epoch": 3.04, "grad_norm": 0.51953125, "learning_rate": 0.00048745696520509894, "loss": 0.2355, "step": 73430 }, { "epoch": 3.04, "grad_norm": 4.28125, "learning_rate": 0.00048745357289606907, "loss": 0.2292, "step": 73440 }, { "epoch": 3.04, "grad_norm": 0.6953125, "learning_rate": 0.0004874501801401772, "loss": 0.2344, "step": 73450 }, { "epoch": 3.04, "grad_norm": 0.73828125, "learning_rate": 0.0004874467869374297, "loss": 0.1944, "step": 73460 }, { "epoch": 3.04, "grad_norm": 0.8203125, "learning_rate": 0.00048744339328783297, "loss": 0.2549, "step": 73470 }, { "epoch": 3.04, "grad_norm": 0.431640625, "learning_rate": 0.00048743999919139346, "loss": 0.2009, "step": 73480 }, { "epoch": 3.04, "grad_norm": 0.79296875, "learning_rate": 0.00048743660464811754, "loss": 0.2083, "step": 73490 }, { "epoch": 3.04, "grad_norm": 1.1328125, "learning_rate": 0.0004874332096580115, "loss": 0.2185, "step": 73500 }, { "epoch": 3.04, "grad_norm": 0.66796875, "learning_rate": 0.0004874298142210819, "loss": 0.2591, "step": 73510 }, { "epoch": 3.05, "grad_norm": 0.625, "learning_rate": 0.00048742641833733494, "loss": 0.1996, "step": 73520 }, { "epoch": 3.05, "grad_norm": 0.5546875, "learning_rate": 0.0004874230220067771, "loss": 0.2206, "step": 73530 }, { "epoch": 3.05, "grad_norm": 0.2216796875, "learning_rate": 0.00048741962522941485, "loss": 0.2299, "step": 73540 }, { "epoch": 3.05, "grad_norm": 0.6796875, "learning_rate": 0.00048741622800525444, "loss": 0.2073, "step": 73550 }, { "epoch": 3.05, "grad_norm": 1.71875, "learning_rate": 0.0004874128303343024, "loss": 0.2152, "step": 73560 }, { "epoch": 3.05, "grad_norm": 0.451171875, "learning_rate": 0.00048740943221656496, "loss": 0.2292, "step": 73570 }, { "epoch": 3.05, "grad_norm": 1.1875, "learning_rate": 0.0004874060336520486, "loss": 0.2768, "step": 73580 }, { "epoch": 3.05, "grad_norm": 0.80859375, "learning_rate": 0.0004874026346407598, "loss": 0.232, "step": 73590 }, { "epoch": 3.05, "grad_norm": 0.458984375, "learning_rate": 0.00048739923518270487, "loss": 0.2236, "step": 73600 }, { "epoch": 3.05, "grad_norm": 1.2265625, "learning_rate": 0.0004873958352778902, "loss": 0.2414, "step": 73610 }, { "epoch": 3.05, "grad_norm": 1.3203125, "learning_rate": 0.0004873924349263222, "loss": 0.2556, "step": 73620 }, { "epoch": 3.05, "grad_norm": 0.703125, "learning_rate": 0.0004873890341280073, "loss": 0.2733, "step": 73630 }, { "epoch": 3.05, "grad_norm": 0.404296875, "learning_rate": 0.0004873856328829518, "loss": 0.2222, "step": 73640 }, { "epoch": 3.05, "grad_norm": 1.015625, "learning_rate": 0.0004873822311911622, "loss": 0.1805, "step": 73650 }, { "epoch": 3.05, "grad_norm": 0.76953125, "learning_rate": 0.0004873788290526449, "loss": 0.202, "step": 73660 }, { "epoch": 3.05, "grad_norm": 1.015625, "learning_rate": 0.00048737542646740627, "loss": 0.2271, "step": 73670 }, { "epoch": 3.05, "grad_norm": 0.46875, "learning_rate": 0.0004873720234354527, "loss": 0.2459, "step": 73680 }, { "epoch": 3.05, "grad_norm": 0.87109375, "learning_rate": 0.00048736861995679067, "loss": 0.2104, "step": 73690 }, { "epoch": 3.05, "grad_norm": 0.369140625, "learning_rate": 0.0004873652160314265, "loss": 0.1958, "step": 73700 }, { "epoch": 3.05, "grad_norm": 0.416015625, "learning_rate": 0.0004873618116593667, "loss": 0.204, "step": 73710 }, { "epoch": 3.05, "grad_norm": 0.279296875, "learning_rate": 0.0004873584068406175, "loss": 0.1555, "step": 73720 }, { "epoch": 3.05, "grad_norm": 1.3203125, "learning_rate": 0.00048735500157518543, "loss": 0.2232, "step": 73730 }, { "epoch": 3.05, "grad_norm": 1.2890625, "learning_rate": 0.0004873515958630769, "loss": 0.1992, "step": 73740 }, { "epoch": 3.05, "grad_norm": 1.546875, "learning_rate": 0.0004873481897042983, "loss": 0.2826, "step": 73750 }, { "epoch": 3.06, "grad_norm": 0.6796875, "learning_rate": 0.000487344783098856, "loss": 0.1707, "step": 73760 }, { "epoch": 3.06, "grad_norm": 3.0625, "learning_rate": 0.0004873413760467564, "loss": 0.2739, "step": 73770 }, { "epoch": 3.06, "grad_norm": 0.474609375, "learning_rate": 0.0004873379685480061, "loss": 0.2207, "step": 73780 }, { "epoch": 3.06, "grad_norm": 0.212890625, "learning_rate": 0.0004873345606026113, "loss": 0.1909, "step": 73790 }, { "epoch": 3.06, "grad_norm": 1.234375, "learning_rate": 0.0004873311522105785, "loss": 0.1933, "step": 73800 }, { "epoch": 3.06, "grad_norm": 0.72265625, "learning_rate": 0.0004873277433719141, "loss": 0.1565, "step": 73810 }, { "epoch": 3.06, "grad_norm": 0.62109375, "learning_rate": 0.00048732433408662455, "loss": 0.2122, "step": 73820 }, { "epoch": 3.06, "grad_norm": 0.671875, "learning_rate": 0.0004873209243547162, "loss": 0.2397, "step": 73830 }, { "epoch": 3.06, "grad_norm": 0.2421875, "learning_rate": 0.00048731751417619546, "loss": 0.2042, "step": 73840 }, { "epoch": 3.06, "grad_norm": 0.81640625, "learning_rate": 0.0004873141035510688, "loss": 0.2229, "step": 73850 }, { "epoch": 3.06, "grad_norm": 0.47265625, "learning_rate": 0.00048731069247934266, "loss": 0.2532, "step": 73860 }, { "epoch": 3.06, "grad_norm": 1.1328125, "learning_rate": 0.0004873072809610234, "loss": 0.2278, "step": 73870 }, { "epoch": 3.06, "grad_norm": 0.7578125, "learning_rate": 0.0004873038689961175, "loss": 0.2484, "step": 73880 }, { "epoch": 3.06, "grad_norm": 0.6953125, "learning_rate": 0.0004873004565846313, "loss": 0.1948, "step": 73890 }, { "epoch": 3.06, "grad_norm": 0.470703125, "learning_rate": 0.0004872970437265713, "loss": 0.2021, "step": 73900 }, { "epoch": 3.06, "grad_norm": 0.703125, "learning_rate": 0.00048729363042194384, "loss": 0.237, "step": 73910 }, { "epoch": 3.06, "grad_norm": 0.455078125, "learning_rate": 0.0004872902166707555, "loss": 0.2537, "step": 73920 }, { "epoch": 3.06, "grad_norm": 0.267578125, "learning_rate": 0.0004872868024730125, "loss": 0.2726, "step": 73930 }, { "epoch": 3.06, "grad_norm": 0.78125, "learning_rate": 0.00048728338782872137, "loss": 0.2311, "step": 73940 }, { "epoch": 3.06, "grad_norm": 0.86328125, "learning_rate": 0.0004872799727378886, "loss": 0.1994, "step": 73950 }, { "epoch": 3.06, "grad_norm": 0.765625, "learning_rate": 0.00048727655720052045, "loss": 0.2452, "step": 73960 }, { "epoch": 3.06, "grad_norm": 0.6328125, "learning_rate": 0.00048727314121662345, "loss": 0.1759, "step": 73970 }, { "epoch": 3.06, "grad_norm": 0.6328125, "learning_rate": 0.0004872697247862041, "loss": 0.2141, "step": 73980 }, { "epoch": 3.06, "grad_norm": 0.1650390625, "learning_rate": 0.00048726630790926875, "loss": 0.1746, "step": 73990 }, { "epoch": 3.07, "grad_norm": 0.310546875, "learning_rate": 0.00048726289058582375, "loss": 0.1923, "step": 74000 }, { "epoch": 3.07, "grad_norm": 0.0022125244140625, "learning_rate": 0.0004872594728158757, "loss": 0.1961, "step": 74010 }, { "epoch": 3.07, "grad_norm": 0.7109375, "learning_rate": 0.00048725605459943086, "loss": 0.2439, "step": 74020 }, { "epoch": 3.07, "grad_norm": 0.76953125, "learning_rate": 0.00048725263593649577, "loss": 0.2458, "step": 74030 }, { "epoch": 3.07, "grad_norm": 0.326171875, "learning_rate": 0.00048724921682707684, "loss": 0.2663, "step": 74040 }, { "epoch": 3.07, "grad_norm": 0.79296875, "learning_rate": 0.0004872457972711806, "loss": 0.2116, "step": 74050 }, { "epoch": 3.07, "grad_norm": 0.57421875, "learning_rate": 0.0004872423772688133, "loss": 0.198, "step": 74060 }, { "epoch": 3.07, "grad_norm": 1.7890625, "learning_rate": 0.00048723895681998145, "loss": 0.1864, "step": 74070 }, { "epoch": 3.07, "grad_norm": 0.80859375, "learning_rate": 0.0004872355359246915, "loss": 0.237, "step": 74080 }, { "epoch": 3.07, "grad_norm": 0.6953125, "learning_rate": 0.00048723211458294994, "loss": 0.2472, "step": 74090 }, { "epoch": 3.07, "grad_norm": 0.7734375, "learning_rate": 0.0004872286927947631, "loss": 0.2114, "step": 74100 }, { "epoch": 3.07, "grad_norm": 0.2099609375, "learning_rate": 0.00048722527056013754, "loss": 0.2861, "step": 74110 }, { "epoch": 3.07, "grad_norm": 0.921875, "learning_rate": 0.00048722184787907964, "loss": 0.1873, "step": 74120 }, { "epoch": 3.07, "grad_norm": 1.1015625, "learning_rate": 0.0004872184247515958, "loss": 0.2359, "step": 74130 }, { "epoch": 3.07, "grad_norm": 0.63671875, "learning_rate": 0.0004872150011776925, "loss": 0.2526, "step": 74140 }, { "epoch": 3.07, "grad_norm": 1.140625, "learning_rate": 0.0004872115771573762, "loss": 0.2178, "step": 74150 }, { "epoch": 3.07, "grad_norm": 0.82421875, "learning_rate": 0.00048720815269065335, "loss": 0.2769, "step": 74160 }, { "epoch": 3.07, "grad_norm": 0.466796875, "learning_rate": 0.00048720472777753033, "loss": 0.2539, "step": 74170 }, { "epoch": 3.07, "grad_norm": 0.640625, "learning_rate": 0.00048720130241801363, "loss": 0.2595, "step": 74180 }, { "epoch": 3.07, "grad_norm": 0.435546875, "learning_rate": 0.00048719787661210976, "loss": 0.2066, "step": 74190 }, { "epoch": 3.07, "grad_norm": 1.328125, "learning_rate": 0.000487194450359825, "loss": 0.1813, "step": 74200 }, { "epoch": 3.07, "grad_norm": 0.345703125, "learning_rate": 0.00048719102366116595, "loss": 0.1825, "step": 74210 }, { "epoch": 3.07, "grad_norm": 0.515625, "learning_rate": 0.00048718759651613904, "loss": 0.21, "step": 74220 }, { "epoch": 3.07, "grad_norm": 0.546875, "learning_rate": 0.00048718416892475064, "loss": 0.2267, "step": 74230 }, { "epoch": 3.08, "grad_norm": 0.953125, "learning_rate": 0.00048718074088700727, "loss": 0.1799, "step": 74240 }, { "epoch": 3.08, "grad_norm": 2.0, "learning_rate": 0.00048717731240291543, "loss": 0.1784, "step": 74250 }, { "epoch": 3.08, "grad_norm": 1.15625, "learning_rate": 0.00048717388347248135, "loss": 0.2123, "step": 74260 }, { "epoch": 3.08, "grad_norm": 0.55078125, "learning_rate": 0.0004871704540957117, "loss": 0.2307, "step": 74270 }, { "epoch": 3.08, "grad_norm": 0.65234375, "learning_rate": 0.00048716702427261294, "loss": 0.2161, "step": 74280 }, { "epoch": 3.08, "grad_norm": 0.82421875, "learning_rate": 0.0004871635940031914, "loss": 0.2187, "step": 74290 }, { "epoch": 3.08, "grad_norm": 1.109375, "learning_rate": 0.00048716016328745354, "loss": 0.2815, "step": 74300 }, { "epoch": 3.08, "grad_norm": 0.9453125, "learning_rate": 0.0004871567321254059, "loss": 0.2447, "step": 74310 }, { "epoch": 3.08, "grad_norm": 0.62890625, "learning_rate": 0.0004871533005170549, "loss": 0.1874, "step": 74320 }, { "epoch": 3.08, "grad_norm": 0.73046875, "learning_rate": 0.00048714986846240706, "loss": 0.2047, "step": 74330 }, { "epoch": 3.08, "grad_norm": 1.015625, "learning_rate": 0.0004871464359614687, "loss": 0.1969, "step": 74340 }, { "epoch": 3.08, "grad_norm": 0.9140625, "learning_rate": 0.00048714300301424634, "loss": 0.2407, "step": 74350 }, { "epoch": 3.08, "grad_norm": 0.7109375, "learning_rate": 0.0004871395696207465, "loss": 0.1862, "step": 74360 }, { "epoch": 3.08, "grad_norm": 0.53515625, "learning_rate": 0.0004871361357809756, "loss": 0.2237, "step": 74370 }, { "epoch": 3.08, "grad_norm": 1.015625, "learning_rate": 0.0004871327014949401, "loss": 0.1938, "step": 74380 }, { "epoch": 3.08, "grad_norm": 0.67578125, "learning_rate": 0.00048712926676264647, "loss": 0.193, "step": 74390 }, { "epoch": 3.08, "grad_norm": 0.91015625, "learning_rate": 0.00048712583158410115, "loss": 0.1844, "step": 74400 }, { "epoch": 3.08, "grad_norm": 0.62109375, "learning_rate": 0.00048712239595931063, "loss": 0.2097, "step": 74410 }, { "epoch": 3.08, "grad_norm": 0.78515625, "learning_rate": 0.00048711895988828136, "loss": 0.2252, "step": 74420 }, { "epoch": 3.08, "grad_norm": 0.703125, "learning_rate": 0.00048711552337101985, "loss": 0.2165, "step": 74430 }, { "epoch": 3.08, "grad_norm": 0.6796875, "learning_rate": 0.00048711208640753244, "loss": 0.2026, "step": 74440 }, { "epoch": 3.08, "grad_norm": 0.296875, "learning_rate": 0.0004871086489978258, "loss": 0.1961, "step": 74450 }, { "epoch": 3.08, "grad_norm": 1.21875, "learning_rate": 0.0004871052111419062, "loss": 0.205, "step": 74460 }, { "epoch": 3.08, "grad_norm": 0.49609375, "learning_rate": 0.0004871017728397803, "loss": 0.221, "step": 74470 }, { "epoch": 3.08, "grad_norm": 0.69140625, "learning_rate": 0.00048709833409145436, "loss": 0.198, "step": 74480 }, { "epoch": 3.09, "grad_norm": 0.7421875, "learning_rate": 0.000487094894896935, "loss": 0.2249, "step": 74490 }, { "epoch": 3.09, "grad_norm": 0.30859375, "learning_rate": 0.00048709145525622865, "loss": 0.2476, "step": 74500 }, { "epoch": 3.09, "grad_norm": 0.26171875, "learning_rate": 0.0004870880151693418, "loss": 0.2306, "step": 74510 }, { "epoch": 3.09, "grad_norm": 0.9765625, "learning_rate": 0.00048708457463628093, "loss": 0.2395, "step": 74520 }, { "epoch": 3.09, "grad_norm": 0.67578125, "learning_rate": 0.00048708113365705243, "loss": 0.1723, "step": 74530 }, { "epoch": 3.09, "grad_norm": 0.41015625, "learning_rate": 0.00048707769223166287, "loss": 0.2081, "step": 74540 }, { "epoch": 3.09, "grad_norm": 0.77734375, "learning_rate": 0.0004870742503601187, "loss": 0.1828, "step": 74550 }, { "epoch": 3.09, "grad_norm": 0.85546875, "learning_rate": 0.0004870708080424264, "loss": 0.2185, "step": 74560 }, { "epoch": 3.09, "grad_norm": 1.4609375, "learning_rate": 0.0004870673652785924, "loss": 0.2814, "step": 74570 }, { "epoch": 3.09, "grad_norm": 0.26171875, "learning_rate": 0.00048706392206862326, "loss": 0.2318, "step": 74580 }, { "epoch": 3.09, "grad_norm": 1.046875, "learning_rate": 0.00048706047841252546, "loss": 0.1994, "step": 74590 }, { "epoch": 3.09, "grad_norm": 0.56640625, "learning_rate": 0.00048705703431030535, "loss": 0.2512, "step": 74600 }, { "epoch": 3.09, "grad_norm": 0.6171875, "learning_rate": 0.00048705358976196955, "loss": 0.2478, "step": 74610 }, { "epoch": 3.09, "grad_norm": 0.6796875, "learning_rate": 0.0004870501447675245, "loss": 0.1924, "step": 74620 }, { "epoch": 3.09, "grad_norm": 0.8046875, "learning_rate": 0.00048704669932697663, "loss": 0.2074, "step": 74630 }, { "epoch": 3.09, "grad_norm": 0.7109375, "learning_rate": 0.0004870432534403325, "loss": 0.2241, "step": 74640 }, { "epoch": 3.09, "grad_norm": 0.625, "learning_rate": 0.0004870398071075986, "loss": 0.1863, "step": 74650 }, { "epoch": 3.09, "grad_norm": 0.49609375, "learning_rate": 0.00048703636032878134, "loss": 0.1563, "step": 74660 }, { "epoch": 3.09, "grad_norm": 0.75, "learning_rate": 0.00048703291310388724, "loss": 0.3047, "step": 74670 }, { "epoch": 3.09, "grad_norm": 0.75, "learning_rate": 0.0004870294654329228, "loss": 0.201, "step": 74680 }, { "epoch": 3.09, "grad_norm": 0.6875, "learning_rate": 0.0004870260173158946, "loss": 0.193, "step": 74690 }, { "epoch": 3.09, "grad_norm": 0.66015625, "learning_rate": 0.00048702256875280894, "loss": 0.1976, "step": 74700 }, { "epoch": 3.09, "grad_norm": 0.486328125, "learning_rate": 0.00048701911974367244, "loss": 0.2685, "step": 74710 }, { "epoch": 3.09, "grad_norm": 0.5390625, "learning_rate": 0.00048701567028849147, "loss": 0.2463, "step": 74720 }, { "epoch": 3.1, "grad_norm": 0.68359375, "learning_rate": 0.00048701222038727266, "loss": 0.1852, "step": 74730 }, { "epoch": 3.1, "grad_norm": 1.921875, "learning_rate": 0.0004870087700400224, "loss": 0.2467, "step": 74740 }, { "epoch": 3.1, "grad_norm": 0.53125, "learning_rate": 0.0004870053192467473, "loss": 0.239, "step": 74750 }, { "epoch": 3.1, "grad_norm": 0.80859375, "learning_rate": 0.0004870018680074537, "loss": 0.176, "step": 74760 }, { "epoch": 3.1, "grad_norm": 0.53515625, "learning_rate": 0.00048699841632214824, "loss": 0.2479, "step": 74770 }, { "epoch": 3.1, "grad_norm": 0.75, "learning_rate": 0.0004869949641908374, "loss": 0.2113, "step": 74780 }, { "epoch": 3.1, "grad_norm": 0.3203125, "learning_rate": 0.00048699151161352757, "loss": 0.2088, "step": 74790 }, { "epoch": 3.1, "grad_norm": 1.265625, "learning_rate": 0.00048698805859022533, "loss": 0.2537, "step": 74800 }, { "epoch": 3.1, "grad_norm": 0.94921875, "learning_rate": 0.0004869846051209371, "loss": 0.2545, "step": 74810 }, { "epoch": 3.1, "grad_norm": 0.84375, "learning_rate": 0.0004869811512056695, "loss": 0.1687, "step": 74820 }, { "epoch": 3.1, "grad_norm": 0.71875, "learning_rate": 0.00048697769684442893, "loss": 0.2517, "step": 74830 }, { "epoch": 3.1, "grad_norm": 0.8984375, "learning_rate": 0.00048697424203722196, "loss": 0.2569, "step": 74840 }, { "epoch": 3.1, "grad_norm": 0.5234375, "learning_rate": 0.00048697078678405504, "loss": 0.2825, "step": 74850 }, { "epoch": 3.1, "grad_norm": 0.8984375, "learning_rate": 0.0004869673310849347, "loss": 0.2246, "step": 74860 }, { "epoch": 3.1, "grad_norm": 0.84765625, "learning_rate": 0.0004869638749398674, "loss": 0.2391, "step": 74870 }, { "epoch": 3.1, "grad_norm": 0.53515625, "learning_rate": 0.00048696041834885966, "loss": 0.2607, "step": 74880 }, { "epoch": 3.1, "grad_norm": 0.5703125, "learning_rate": 0.00048695696131191807, "loss": 0.2377, "step": 74890 }, { "epoch": 3.1, "grad_norm": 0.80859375, "learning_rate": 0.000486953503829049, "loss": 0.1982, "step": 74900 }, { "epoch": 3.1, "grad_norm": 0.5625, "learning_rate": 0.0004869500459002591, "loss": 0.1778, "step": 74910 }, { "epoch": 3.1, "grad_norm": 0.203125, "learning_rate": 0.0004869465875255548, "loss": 0.2137, "step": 74920 }, { "epoch": 3.1, "grad_norm": 0.48828125, "learning_rate": 0.0004869431287049425, "loss": 0.2208, "step": 74930 }, { "epoch": 3.1, "grad_norm": 0.6953125, "learning_rate": 0.00048693966943842893, "loss": 0.225, "step": 74940 }, { "epoch": 3.1, "grad_norm": 0.271484375, "learning_rate": 0.0004869362097260205, "loss": 0.2365, "step": 74950 }, { "epoch": 3.1, "grad_norm": 0.259765625, "learning_rate": 0.0004869327495677236, "loss": 0.2478, "step": 74960 }, { "epoch": 3.11, "grad_norm": 0.72265625, "learning_rate": 0.0004869292889635449, "loss": 0.2016, "step": 74970 }, { "epoch": 3.11, "grad_norm": 1.0546875, "learning_rate": 0.00048692582791349094, "loss": 0.2145, "step": 74980 }, { "epoch": 3.11, "grad_norm": 0.671875, "learning_rate": 0.00048692236641756804, "loss": 0.186, "step": 74990 }, { "epoch": 3.11, "grad_norm": 0.51171875, "learning_rate": 0.00048691890447578293, "loss": 0.2622, "step": 75000 }, { "epoch": 3.11, "grad_norm": 0.93359375, "learning_rate": 0.00048691544208814196, "loss": 0.1908, "step": 75010 }, { "epoch": 3.11, "grad_norm": 0.60546875, "learning_rate": 0.00048691197925465173, "loss": 0.1609, "step": 75020 }, { "epoch": 3.11, "grad_norm": 0.72265625, "learning_rate": 0.00048690851597531877, "loss": 0.2121, "step": 75030 }, { "epoch": 3.11, "grad_norm": 0.6015625, "learning_rate": 0.0004869050522501495, "loss": 0.2386, "step": 75040 }, { "epoch": 3.11, "grad_norm": 0.86328125, "learning_rate": 0.00048690158807915057, "loss": 0.2522, "step": 75050 }, { "epoch": 3.11, "grad_norm": 0.81640625, "learning_rate": 0.0004868981234623284, "loss": 0.2329, "step": 75060 }, { "epoch": 3.11, "grad_norm": 0.6640625, "learning_rate": 0.0004868946583996896, "loss": 0.2143, "step": 75070 }, { "epoch": 3.11, "grad_norm": 0.671875, "learning_rate": 0.00048689119289124056, "loss": 0.2056, "step": 75080 }, { "epoch": 3.11, "grad_norm": 0.62890625, "learning_rate": 0.0004868877269369879, "loss": 0.2274, "step": 75090 }, { "epoch": 3.11, "grad_norm": 1.1953125, "learning_rate": 0.00048688426053693814, "loss": 0.2926, "step": 75100 }, { "epoch": 3.11, "grad_norm": 0.97265625, "learning_rate": 0.0004868807936910978, "loss": 0.2211, "step": 75110 }, { "epoch": 3.11, "grad_norm": 1.1484375, "learning_rate": 0.00048687732639947335, "loss": 0.2214, "step": 75120 }, { "epoch": 3.11, "grad_norm": 0.5, "learning_rate": 0.0004868738586620713, "loss": 0.1772, "step": 75130 }, { "epoch": 3.11, "grad_norm": 0.9375, "learning_rate": 0.0004868703904788984, "loss": 0.2388, "step": 75140 }, { "epoch": 3.11, "grad_norm": 0.25, "learning_rate": 0.00048686692184996087, "loss": 0.1368, "step": 75150 }, { "epoch": 3.11, "grad_norm": 0.66015625, "learning_rate": 0.00048686345277526534, "loss": 0.1473, "step": 75160 }, { "epoch": 3.11, "grad_norm": 1.34375, "learning_rate": 0.00048685998325481846, "loss": 0.2187, "step": 75170 }, { "epoch": 3.11, "grad_norm": 0.69140625, "learning_rate": 0.00048685651328862664, "loss": 0.234, "step": 75180 }, { "epoch": 3.11, "grad_norm": 0.376953125, "learning_rate": 0.0004868530428766964, "loss": 0.2204, "step": 75190 }, { "epoch": 3.11, "grad_norm": 0.42578125, "learning_rate": 0.0004868495720190343, "loss": 0.2326, "step": 75200 }, { "epoch": 3.12, "grad_norm": 0.90234375, "learning_rate": 0.00048684610071564707, "loss": 0.2412, "step": 75210 }, { "epoch": 3.12, "grad_norm": 1.015625, "learning_rate": 0.0004868426289665409, "loss": 0.1761, "step": 75220 }, { "epoch": 3.12, "grad_norm": 0.70703125, "learning_rate": 0.0004868391567717225, "loss": 0.2289, "step": 75230 }, { "epoch": 3.12, "grad_norm": 0.80078125, "learning_rate": 0.00048683568413119833, "loss": 0.2197, "step": 75240 }, { "epoch": 3.12, "grad_norm": 0.380859375, "learning_rate": 0.00048683221104497504, "loss": 0.1994, "step": 75250 }, { "epoch": 3.12, "grad_norm": 0.002593994140625, "learning_rate": 0.0004868287375130591, "loss": 0.1754, "step": 75260 }, { "epoch": 3.12, "grad_norm": 0.8046875, "learning_rate": 0.00048682526353545705, "loss": 0.2145, "step": 75270 }, { "epoch": 3.12, "grad_norm": 0.298828125, "learning_rate": 0.00048682178911217547, "loss": 0.2454, "step": 75280 }, { "epoch": 3.12, "grad_norm": 0.73046875, "learning_rate": 0.0004868183142432208, "loss": 0.2335, "step": 75290 }, { "epoch": 3.12, "grad_norm": 0.97265625, "learning_rate": 0.00048681483892859957, "loss": 0.2151, "step": 75300 }, { "epoch": 3.12, "grad_norm": 0.220703125, "learning_rate": 0.0004868113631683185, "loss": 0.1634, "step": 75310 }, { "epoch": 3.12, "grad_norm": 0.75, "learning_rate": 0.000486807886962384, "loss": 0.2583, "step": 75320 }, { "epoch": 3.12, "grad_norm": 0.61328125, "learning_rate": 0.00048680441031080256, "loss": 0.1783, "step": 75330 }, { "epoch": 3.12, "grad_norm": 0.74609375, "learning_rate": 0.00048680093321358086, "loss": 0.2546, "step": 75340 }, { "epoch": 3.12, "grad_norm": 0.412109375, "learning_rate": 0.0004867974556707254, "loss": 0.2167, "step": 75350 }, { "epoch": 3.12, "grad_norm": 0.58203125, "learning_rate": 0.00048679397768224263, "loss": 0.1755, "step": 75360 }, { "epoch": 3.12, "grad_norm": 0.419921875, "learning_rate": 0.0004867904992481392, "loss": 0.1679, "step": 75370 }, { "epoch": 3.12, "grad_norm": 0.89453125, "learning_rate": 0.0004867870203684216, "loss": 0.2073, "step": 75380 }, { "epoch": 3.12, "grad_norm": 0.67578125, "learning_rate": 0.0004867835410430964, "loss": 0.2396, "step": 75390 }, { "epoch": 3.12, "grad_norm": 0.46875, "learning_rate": 0.00048678006127217025, "loss": 0.2361, "step": 75400 }, { "epoch": 3.12, "grad_norm": 0.58203125, "learning_rate": 0.00048677658105564947, "loss": 0.2466, "step": 75410 }, { "epoch": 3.12, "grad_norm": 0.6015625, "learning_rate": 0.0004867731003935408, "loss": 0.2032, "step": 75420 }, { "epoch": 3.12, "grad_norm": 1.6328125, "learning_rate": 0.0004867696192858507, "loss": 0.2151, "step": 75430 }, { "epoch": 3.12, "grad_norm": 0.9375, "learning_rate": 0.0004867661377325857, "loss": 0.2117, "step": 75440 }, { "epoch": 3.13, "grad_norm": 0.609375, "learning_rate": 0.00048676265573375247, "loss": 0.168, "step": 75450 }, { "epoch": 3.13, "grad_norm": 0.6640625, "learning_rate": 0.0004867591732893575, "loss": 0.2195, "step": 75460 }, { "epoch": 3.13, "grad_norm": 0.96484375, "learning_rate": 0.00048675569039940725, "loss": 0.155, "step": 75470 }, { "epoch": 3.13, "grad_norm": 0.88671875, "learning_rate": 0.00048675220706390836, "loss": 0.2309, "step": 75480 }, { "epoch": 3.13, "grad_norm": 0.578125, "learning_rate": 0.00048674872328286747, "loss": 0.2205, "step": 75490 }, { "epoch": 3.13, "grad_norm": 2.984375, "learning_rate": 0.000486745239056291, "loss": 0.2281, "step": 75500 }, { "epoch": 3.13, "grad_norm": 0.92578125, "learning_rate": 0.00048674175438418555, "loss": 0.2466, "step": 75510 }, { "epoch": 3.13, "grad_norm": 1.0703125, "learning_rate": 0.00048673826926655774, "loss": 0.2197, "step": 75520 }, { "epoch": 3.13, "grad_norm": 0.88671875, "learning_rate": 0.00048673478370341396, "loss": 0.1929, "step": 75530 }, { "epoch": 3.13, "grad_norm": 1.6640625, "learning_rate": 0.000486731297694761, "loss": 0.2061, "step": 75540 }, { "epoch": 3.13, "grad_norm": 0.67578125, "learning_rate": 0.00048672781124060524, "loss": 0.2053, "step": 75550 }, { "epoch": 3.13, "grad_norm": 0.29296875, "learning_rate": 0.00048672432434095325, "loss": 0.1714, "step": 75560 }, { "epoch": 3.13, "grad_norm": 0.1923828125, "learning_rate": 0.00048672083699581175, "loss": 0.182, "step": 75570 }, { "epoch": 3.13, "grad_norm": 0.455078125, "learning_rate": 0.0004867173492051872, "loss": 0.2254, "step": 75580 }, { "epoch": 3.13, "grad_norm": 0.451171875, "learning_rate": 0.0004867138609690861, "loss": 0.1265, "step": 75590 }, { "epoch": 3.13, "grad_norm": 0.412109375, "learning_rate": 0.00048671037228751506, "loss": 0.1455, "step": 75600 }, { "epoch": 3.13, "grad_norm": 0.78125, "learning_rate": 0.0004867068831604807, "loss": 0.2166, "step": 75610 }, { "epoch": 3.13, "grad_norm": 0.451171875, "learning_rate": 0.0004867033935879895, "loss": 0.1881, "step": 75620 }, { "epoch": 3.13, "grad_norm": 0.47265625, "learning_rate": 0.0004866999035700482, "loss": 0.252, "step": 75630 }, { "epoch": 3.13, "grad_norm": 0.0, "learning_rate": 0.0004866964131066632, "loss": 0.2433, "step": 75640 }, { "epoch": 3.13, "grad_norm": 0.6640625, "learning_rate": 0.00048669292219784104, "loss": 0.169, "step": 75650 }, { "epoch": 3.13, "grad_norm": 0.65234375, "learning_rate": 0.0004866894308435884, "loss": 0.2411, "step": 75660 }, { "epoch": 3.13, "grad_norm": 0.55859375, "learning_rate": 0.0004866859390439118, "loss": 0.1801, "step": 75670 }, { "epoch": 3.13, "grad_norm": 0.91796875, "learning_rate": 0.00048668244679881783, "loss": 0.2312, "step": 75680 }, { "epoch": 3.14, "grad_norm": 1.328125, "learning_rate": 0.00048667895410831307, "loss": 0.2379, "step": 75690 }, { "epoch": 3.14, "grad_norm": 0.4765625, "learning_rate": 0.00048667546097240407, "loss": 0.1836, "step": 75700 }, { "epoch": 3.14, "grad_norm": 0.9765625, "learning_rate": 0.00048667196739109745, "loss": 0.1853, "step": 75710 }, { "epoch": 3.14, "grad_norm": 0.671875, "learning_rate": 0.0004866684733643997, "loss": 0.1938, "step": 75720 }, { "epoch": 3.14, "grad_norm": 0.177734375, "learning_rate": 0.0004866649788923174, "loss": 0.1804, "step": 75730 }, { "epoch": 3.14, "grad_norm": 0.59765625, "learning_rate": 0.0004866614839748572, "loss": 0.242, "step": 75740 }, { "epoch": 3.14, "grad_norm": 1.1796875, "learning_rate": 0.0004866579886120257, "loss": 0.2473, "step": 75750 }, { "epoch": 3.14, "grad_norm": 0.609375, "learning_rate": 0.00048665449280382937, "loss": 0.2142, "step": 75760 }, { "epoch": 3.14, "grad_norm": 0.48828125, "learning_rate": 0.0004866509965502748, "loss": 0.1966, "step": 75770 }, { "epoch": 3.14, "grad_norm": 0.734375, "learning_rate": 0.0004866474998513687, "loss": 0.1611, "step": 75780 }, { "epoch": 3.14, "grad_norm": 0.69921875, "learning_rate": 0.0004866440027071175, "loss": 0.2353, "step": 75790 }, { "epoch": 3.14, "grad_norm": 0.294921875, "learning_rate": 0.00048664050511752787, "loss": 0.2218, "step": 75800 }, { "epoch": 3.14, "grad_norm": 0.79296875, "learning_rate": 0.00048663700708260636, "loss": 0.2121, "step": 75810 }, { "epoch": 3.14, "grad_norm": 0.51171875, "learning_rate": 0.00048663350860235954, "loss": 0.2174, "step": 75820 }, { "epoch": 3.14, "grad_norm": 0.9609375, "learning_rate": 0.00048663000967679403, "loss": 0.2006, "step": 75830 }, { "epoch": 3.14, "grad_norm": 0.6640625, "learning_rate": 0.00048662651030591643, "loss": 0.2396, "step": 75840 }, { "epoch": 3.14, "grad_norm": 0.7734375, "learning_rate": 0.0004866230104897332, "loss": 0.2294, "step": 75850 }, { "epoch": 3.14, "grad_norm": 0.435546875, "learning_rate": 0.00048661951022825107, "loss": 0.2376, "step": 75860 }, { "epoch": 3.14, "grad_norm": 0.84375, "learning_rate": 0.00048661600952147653, "loss": 0.245, "step": 75870 }, { "epoch": 3.14, "grad_norm": 0.33203125, "learning_rate": 0.00048661250836941626, "loss": 0.2192, "step": 75880 }, { "epoch": 3.14, "grad_norm": 0.625, "learning_rate": 0.00048660900677207687, "loss": 0.2599, "step": 75890 }, { "epoch": 3.14, "grad_norm": 0.58984375, "learning_rate": 0.00048660550472946475, "loss": 0.1729, "step": 75900 }, { "epoch": 3.14, "grad_norm": 1.1484375, "learning_rate": 0.0004866020022415867, "loss": 0.2844, "step": 75910 }, { "epoch": 3.14, "grad_norm": 0.78515625, "learning_rate": 0.0004865984993084492, "loss": 0.1972, "step": 75920 }, { "epoch": 3.15, "grad_norm": 0.5234375, "learning_rate": 0.0004865949959300589, "loss": 0.1966, "step": 75930 }, { "epoch": 3.15, "grad_norm": 0.671875, "learning_rate": 0.0004865914921064224, "loss": 0.1556, "step": 75940 }, { "epoch": 3.15, "grad_norm": 0.8984375, "learning_rate": 0.0004865879878375462, "loss": 0.2096, "step": 75950 }, { "epoch": 3.15, "grad_norm": 0.51171875, "learning_rate": 0.00048658448312343696, "loss": 0.1934, "step": 75960 }, { "epoch": 3.15, "grad_norm": 0.8203125, "learning_rate": 0.00048658097796410127, "loss": 0.2166, "step": 75970 }, { "epoch": 3.15, "grad_norm": 0.6171875, "learning_rate": 0.00048657747235954577, "loss": 0.1357, "step": 75980 }, { "epoch": 3.15, "grad_norm": 1.9765625, "learning_rate": 0.00048657396630977695, "loss": 0.198, "step": 75990 }, { "epoch": 3.15, "grad_norm": 2.265625, "learning_rate": 0.00048657045981480155, "loss": 0.212, "step": 76000 }, { "epoch": 3.15, "grad_norm": 0.515625, "learning_rate": 0.00048656695287462606, "loss": 0.2087, "step": 76010 }, { "epoch": 3.15, "grad_norm": 0.578125, "learning_rate": 0.000486563445489257, "loss": 0.2687, "step": 76020 }, { "epoch": 3.15, "grad_norm": 0.83984375, "learning_rate": 0.0004865599376587012, "loss": 0.2446, "step": 76030 }, { "epoch": 3.15, "grad_norm": 1.1171875, "learning_rate": 0.0004865564293829652, "loss": 0.2476, "step": 76040 }, { "epoch": 3.15, "grad_norm": 0.92578125, "learning_rate": 0.00048655292066205545, "loss": 0.1841, "step": 76050 }, { "epoch": 3.15, "grad_norm": 0.3046875, "learning_rate": 0.00048654941149597864, "loss": 0.2098, "step": 76060 }, { "epoch": 3.15, "grad_norm": 0.74609375, "learning_rate": 0.00048654590188474143, "loss": 0.2198, "step": 76070 }, { "epoch": 3.15, "grad_norm": 0.91015625, "learning_rate": 0.0004865423918283504, "loss": 0.2651, "step": 76080 }, { "epoch": 3.15, "grad_norm": 0.46484375, "learning_rate": 0.00048653888132681205, "loss": 0.2216, "step": 76090 }, { "epoch": 3.15, "grad_norm": 0.80859375, "learning_rate": 0.00048653537038013316, "loss": 0.2415, "step": 76100 }, { "epoch": 3.15, "grad_norm": 0.765625, "learning_rate": 0.0004865318589883202, "loss": 0.2485, "step": 76110 }, { "epoch": 3.15, "grad_norm": 0.84765625, "learning_rate": 0.0004865283471513798, "loss": 0.1855, "step": 76120 }, { "epoch": 3.15, "grad_norm": 0.53515625, "learning_rate": 0.00048652483486931865, "loss": 0.2288, "step": 76130 }, { "epoch": 3.15, "grad_norm": 0.82421875, "learning_rate": 0.00048652132214214327, "loss": 0.1923, "step": 76140 }, { "epoch": 3.15, "grad_norm": 0.70703125, "learning_rate": 0.0004865178089698603, "loss": 0.1532, "step": 76150 }, { "epoch": 3.15, "grad_norm": 0.4453125, "learning_rate": 0.0004865142953524764, "loss": 0.2525, "step": 76160 }, { "epoch": 3.15, "grad_norm": 0.158203125, "learning_rate": 0.00048651078128999803, "loss": 0.1827, "step": 76170 }, { "epoch": 3.16, "grad_norm": 0.357421875, "learning_rate": 0.00048650726678243204, "loss": 0.1746, "step": 76180 }, { "epoch": 3.16, "grad_norm": 0.640625, "learning_rate": 0.0004865037518297848, "loss": 0.2618, "step": 76190 }, { "epoch": 3.16, "grad_norm": 0.7734375, "learning_rate": 0.0004865002364320631, "loss": 0.239, "step": 76200 }, { "epoch": 3.16, "grad_norm": 0.6796875, "learning_rate": 0.0004864967205892735, "loss": 0.2202, "step": 76210 }, { "epoch": 3.16, "grad_norm": 0.58984375, "learning_rate": 0.00048649320430142263, "loss": 0.2463, "step": 76220 }, { "epoch": 3.16, "grad_norm": 0.875, "learning_rate": 0.00048648968756851704, "loss": 0.2251, "step": 76230 }, { "epoch": 3.16, "grad_norm": 0.6171875, "learning_rate": 0.0004864861703905634, "loss": 0.1798, "step": 76240 }, { "epoch": 3.16, "grad_norm": 0.703125, "learning_rate": 0.00048648265276756835, "loss": 0.1844, "step": 76250 }, { "epoch": 3.16, "grad_norm": 0.8125, "learning_rate": 0.00048647913469953847, "loss": 0.251, "step": 76260 }, { "epoch": 3.16, "grad_norm": 0.37109375, "learning_rate": 0.0004864756161864804, "loss": 0.2283, "step": 76270 }, { "epoch": 3.16, "grad_norm": 0.5390625, "learning_rate": 0.0004864720972284008, "loss": 0.2399, "step": 76280 }, { "epoch": 3.16, "grad_norm": 0.375, "learning_rate": 0.0004864685778253062, "loss": 0.2525, "step": 76290 }, { "epoch": 3.16, "grad_norm": 0.5703125, "learning_rate": 0.0004864650579772033, "loss": 0.2411, "step": 76300 }, { "epoch": 3.16, "grad_norm": 0.353515625, "learning_rate": 0.0004864615376840986, "loss": 0.1988, "step": 76310 }, { "epoch": 3.16, "grad_norm": 0.53125, "learning_rate": 0.00048645801694599896, "loss": 0.2547, "step": 76320 }, { "epoch": 3.16, "grad_norm": 0.703125, "learning_rate": 0.0004864544957629108, "loss": 0.238, "step": 76330 }, { "epoch": 3.16, "grad_norm": 1.0234375, "learning_rate": 0.00048645097413484086, "loss": 0.2317, "step": 76340 }, { "epoch": 3.16, "grad_norm": 1.0390625, "learning_rate": 0.00048644745206179564, "loss": 0.2216, "step": 76350 }, { "epoch": 3.16, "grad_norm": 0.349609375, "learning_rate": 0.00048644392954378193, "loss": 0.218, "step": 76360 }, { "epoch": 3.16, "grad_norm": 0.0, "learning_rate": 0.0004864404065808062, "loss": 0.1763, "step": 76370 }, { "epoch": 3.16, "grad_norm": 0.33984375, "learning_rate": 0.0004864368831728752, "loss": 0.2447, "step": 76380 }, { "epoch": 3.16, "grad_norm": 1.1484375, "learning_rate": 0.00048643335931999556, "loss": 0.1969, "step": 76390 }, { "epoch": 3.16, "grad_norm": 0.4765625, "learning_rate": 0.00048642983502217383, "loss": 0.1879, "step": 76400 }, { "epoch": 3.16, "grad_norm": 0.65625, "learning_rate": 0.0004864263102794166, "loss": 0.1922, "step": 76410 }, { "epoch": 3.17, "grad_norm": 0.82421875, "learning_rate": 0.0004864227850917307, "loss": 0.2029, "step": 76420 }, { "epoch": 3.17, "grad_norm": 1.03125, "learning_rate": 0.0004864192594591226, "loss": 0.2089, "step": 76430 }, { "epoch": 3.17, "grad_norm": 0.5234375, "learning_rate": 0.00048641573338159907, "loss": 0.2173, "step": 76440 }, { "epoch": 3.17, "grad_norm": 0.50390625, "learning_rate": 0.00048641220685916655, "loss": 0.256, "step": 76450 }, { "epoch": 3.17, "grad_norm": 0.53125, "learning_rate": 0.0004864086798918319, "loss": 0.2652, "step": 76460 }, { "epoch": 3.17, "grad_norm": 0.8203125, "learning_rate": 0.0004864051524796015, "loss": 0.1905, "step": 76470 }, { "epoch": 3.17, "grad_norm": 0.5078125, "learning_rate": 0.00048640162462248223, "loss": 0.2436, "step": 76480 }, { "epoch": 3.17, "grad_norm": 0.73828125, "learning_rate": 0.0004863980963204806, "loss": 0.2101, "step": 76490 }, { "epoch": 3.17, "grad_norm": 0.361328125, "learning_rate": 0.00048639456757360326, "loss": 0.231, "step": 76500 }, { "epoch": 3.17, "grad_norm": 0.77734375, "learning_rate": 0.00048639103838185696, "loss": 0.2016, "step": 76510 }, { "epoch": 3.17, "grad_norm": 0.416015625, "learning_rate": 0.00048638750874524815, "loss": 0.2291, "step": 76520 }, { "epoch": 3.17, "grad_norm": 0.62109375, "learning_rate": 0.00048638397866378365, "loss": 0.1993, "step": 76530 }, { "epoch": 3.17, "grad_norm": 0.80078125, "learning_rate": 0.00048638044813746997, "loss": 0.2237, "step": 76540 }, { "epoch": 3.17, "grad_norm": 0.30859375, "learning_rate": 0.0004863769171663139, "loss": 0.1843, "step": 76550 }, { "epoch": 3.17, "grad_norm": 0.51171875, "learning_rate": 0.00048637338575032194, "loss": 0.1589, "step": 76560 }, { "epoch": 3.17, "grad_norm": 0.85546875, "learning_rate": 0.0004863698538895008, "loss": 0.1967, "step": 76570 }, { "epoch": 3.17, "grad_norm": 0.70703125, "learning_rate": 0.00048636632158385705, "loss": 0.2564, "step": 76580 }, { "epoch": 3.17, "grad_norm": 0.4765625, "learning_rate": 0.00048636278883339753, "loss": 0.2277, "step": 76590 }, { "epoch": 3.17, "grad_norm": 0.74609375, "learning_rate": 0.0004863592556381287, "loss": 0.1709, "step": 76600 }, { "epoch": 3.17, "grad_norm": 0.8046875, "learning_rate": 0.00048635572199805726, "loss": 0.21, "step": 76610 }, { "epoch": 3.17, "grad_norm": 1.1953125, "learning_rate": 0.0004863521879131899, "loss": 0.1604, "step": 76620 }, { "epoch": 3.17, "grad_norm": 0.283203125, "learning_rate": 0.0004863486533835333, "loss": 0.1937, "step": 76630 }, { "epoch": 3.17, "grad_norm": 0.83203125, "learning_rate": 0.000486345118409094, "loss": 0.2028, "step": 76640 }, { "epoch": 3.17, "grad_norm": 1.015625, "learning_rate": 0.00048634158298987876, "loss": 0.2475, "step": 76650 }, { "epoch": 3.18, "grad_norm": 1.1953125, "learning_rate": 0.00048633804712589407, "loss": 0.1718, "step": 76660 }, { "epoch": 3.18, "grad_norm": 1.078125, "learning_rate": 0.0004863345108171468, "loss": 0.1373, "step": 76670 }, { "epoch": 3.18, "grad_norm": 0.71875, "learning_rate": 0.0004863309740636435, "loss": 0.2065, "step": 76680 }, { "epoch": 3.18, "grad_norm": 0.76953125, "learning_rate": 0.00048632743686539075, "loss": 0.2192, "step": 76690 }, { "epoch": 3.18, "grad_norm": 0.58203125, "learning_rate": 0.0004863238992223953, "loss": 0.2389, "step": 76700 }, { "epoch": 3.18, "grad_norm": 1.328125, "learning_rate": 0.0004863203611346639, "loss": 0.2186, "step": 76710 }, { "epoch": 3.18, "grad_norm": 0.88671875, "learning_rate": 0.000486316822602203, "loss": 0.2226, "step": 76720 }, { "epoch": 3.18, "grad_norm": 1.3515625, "learning_rate": 0.00048631328362501935, "loss": 0.2062, "step": 76730 }, { "epoch": 3.18, "grad_norm": 0.59765625, "learning_rate": 0.0004863097442031197, "loss": 0.2319, "step": 76740 }, { "epoch": 3.18, "grad_norm": 0.55078125, "learning_rate": 0.00048630620433651065, "loss": 0.218, "step": 76750 }, { "epoch": 3.18, "grad_norm": 0.76171875, "learning_rate": 0.00048630266402519874, "loss": 0.1936, "step": 76760 }, { "epoch": 3.18, "grad_norm": 1.0546875, "learning_rate": 0.0004862991232691908, "loss": 0.2427, "step": 76770 }, { "epoch": 3.18, "grad_norm": 0.796875, "learning_rate": 0.0004862955820684933, "loss": 0.1961, "step": 76780 }, { "epoch": 3.18, "grad_norm": 1.0234375, "learning_rate": 0.0004862920404231132, "loss": 0.2183, "step": 76790 }, { "epoch": 3.18, "grad_norm": 0.828125, "learning_rate": 0.0004862884983330569, "loss": 0.209, "step": 76800 }, { "epoch": 3.18, "grad_norm": 1.2421875, "learning_rate": 0.0004862849557983312, "loss": 0.2853, "step": 76810 }, { "epoch": 3.18, "grad_norm": 1.2890625, "learning_rate": 0.0004862814128189428, "loss": 0.2384, "step": 76820 }, { "epoch": 3.18, "grad_norm": 0.484375, "learning_rate": 0.00048627786939489816, "loss": 0.2078, "step": 76830 }, { "epoch": 3.18, "grad_norm": 0.490234375, "learning_rate": 0.00048627432552620416, "loss": 0.1949, "step": 76840 }, { "epoch": 3.18, "grad_norm": 0.2236328125, "learning_rate": 0.00048627078121286736, "loss": 0.2333, "step": 76850 }, { "epoch": 3.18, "grad_norm": 0.1962890625, "learning_rate": 0.00048626723645489454, "loss": 0.2505, "step": 76860 }, { "epoch": 3.18, "grad_norm": 0.9375, "learning_rate": 0.0004862636912522922, "loss": 0.2375, "step": 76870 }, { "epoch": 3.18, "grad_norm": 0.97265625, "learning_rate": 0.00048626014560506714, "loss": 0.2086, "step": 76880 }, { "epoch": 3.18, "grad_norm": 0.416015625, "learning_rate": 0.000486256599513226, "loss": 0.2056, "step": 76890 }, { "epoch": 3.19, "grad_norm": 0.474609375, "learning_rate": 0.00048625305297677544, "loss": 0.1589, "step": 76900 }, { "epoch": 3.19, "grad_norm": 0.578125, "learning_rate": 0.0004862495059957223, "loss": 0.23, "step": 76910 }, { "epoch": 3.19, "grad_norm": 1.46875, "learning_rate": 0.00048624595857007293, "loss": 0.2286, "step": 76920 }, { "epoch": 3.19, "grad_norm": 0.58203125, "learning_rate": 0.00048624241069983423, "loss": 0.1811, "step": 76930 }, { "epoch": 3.19, "grad_norm": 1.8984375, "learning_rate": 0.0004862388623850128, "loss": 0.2829, "step": 76940 }, { "epoch": 3.19, "grad_norm": 0.5859375, "learning_rate": 0.00048623531362561534, "loss": 0.209, "step": 76950 }, { "epoch": 3.19, "grad_norm": 1.078125, "learning_rate": 0.0004862317644216486, "loss": 0.2462, "step": 76960 }, { "epoch": 3.19, "grad_norm": 0.458984375, "learning_rate": 0.00048622821477311906, "loss": 0.214, "step": 76970 }, { "epoch": 3.19, "grad_norm": 0.765625, "learning_rate": 0.00048622466468003367, "loss": 0.223, "step": 76980 }, { "epoch": 3.19, "grad_norm": 1.1171875, "learning_rate": 0.0004862211141423989, "loss": 0.19, "step": 76990 }, { "epoch": 3.19, "grad_norm": 0.625, "learning_rate": 0.00048621756316022147, "loss": 0.2022, "step": 77000 }, { "epoch": 3.19, "grad_norm": 0.318359375, "learning_rate": 0.0004862140117335082, "loss": 0.1568, "step": 77010 }, { "epoch": 3.19, "grad_norm": 1.2421875, "learning_rate": 0.0004862104598622656, "loss": 0.2239, "step": 77020 }, { "epoch": 3.19, "grad_norm": 0.5859375, "learning_rate": 0.00048620690754650045, "loss": 0.2055, "step": 77030 }, { "epoch": 3.19, "grad_norm": 0.419921875, "learning_rate": 0.00048620335478621933, "loss": 0.2026, "step": 77040 }, { "epoch": 3.19, "grad_norm": 0.70703125, "learning_rate": 0.00048619980158142915, "loss": 0.2013, "step": 77050 }, { "epoch": 3.19, "grad_norm": 0.490234375, "learning_rate": 0.00048619624793213635, "loss": 0.2007, "step": 77060 }, { "epoch": 3.19, "grad_norm": 0.6015625, "learning_rate": 0.00048619269383834776, "loss": 0.2575, "step": 77070 }, { "epoch": 3.19, "grad_norm": 0.3203125, "learning_rate": 0.0004861891393000699, "loss": 0.2202, "step": 77080 }, { "epoch": 3.19, "grad_norm": 0.2392578125, "learning_rate": 0.00048618558431730974, "loss": 0.1805, "step": 77090 }, { "epoch": 3.19, "grad_norm": 1.828125, "learning_rate": 0.0004861820288900738, "loss": 0.1773, "step": 77100 }, { "epoch": 3.19, "grad_norm": 0.5390625, "learning_rate": 0.0004861784730183687, "loss": 0.26, "step": 77110 }, { "epoch": 3.19, "grad_norm": 0.5859375, "learning_rate": 0.00048617491670220126, "loss": 0.2319, "step": 77120 }, { "epoch": 3.19, "grad_norm": 1.0, "learning_rate": 0.00048617135994157815, "loss": 0.2319, "step": 77130 }, { "epoch": 3.2, "grad_norm": 0.62109375, "learning_rate": 0.0004861678027365061, "loss": 0.229, "step": 77140 }, { "epoch": 3.2, "grad_norm": 3.671875, "learning_rate": 0.00048616424508699164, "loss": 0.2383, "step": 77150 }, { "epoch": 3.2, "grad_norm": 0.63671875, "learning_rate": 0.00048616068699304163, "loss": 0.2235, "step": 77160 }, { "epoch": 3.2, "grad_norm": 0.59765625, "learning_rate": 0.0004861571284546626, "loss": 0.2354, "step": 77170 }, { "epoch": 3.2, "grad_norm": 0.83984375, "learning_rate": 0.0004861535694718615, "loss": 0.2078, "step": 77180 }, { "epoch": 3.2, "grad_norm": 1.1875, "learning_rate": 0.0004861500100446449, "loss": 0.2276, "step": 77190 }, { "epoch": 3.2, "grad_norm": 0.6171875, "learning_rate": 0.0004861464501730194, "loss": 0.1749, "step": 77200 }, { "epoch": 3.2, "grad_norm": 0.5, "learning_rate": 0.00048614288985699187, "loss": 0.206, "step": 77210 }, { "epoch": 3.2, "grad_norm": 0.2451171875, "learning_rate": 0.00048613932909656875, "loss": 0.2305, "step": 77220 }, { "epoch": 3.2, "grad_norm": 1.09375, "learning_rate": 0.0004861357678917571, "loss": 0.2306, "step": 77230 }, { "epoch": 3.2, "grad_norm": 0.412109375, "learning_rate": 0.0004861322062425633, "loss": 0.1811, "step": 77240 }, { "epoch": 3.2, "grad_norm": 0.7265625, "learning_rate": 0.0004861286441489943, "loss": 0.1609, "step": 77250 }, { "epoch": 3.2, "grad_norm": 0.7421875, "learning_rate": 0.00048612508161105663, "loss": 0.2148, "step": 77260 }, { "epoch": 3.2, "grad_norm": 0.6015625, "learning_rate": 0.0004861215186287571, "loss": 0.2357, "step": 77270 }, { "epoch": 3.2, "grad_norm": 0.76171875, "learning_rate": 0.0004861179552021023, "loss": 0.2614, "step": 77280 }, { "epoch": 3.2, "grad_norm": 0.5625, "learning_rate": 0.00048611439133109904, "loss": 0.2342, "step": 77290 }, { "epoch": 3.2, "grad_norm": 1.125, "learning_rate": 0.000486110827015754, "loss": 0.1917, "step": 77300 }, { "epoch": 3.2, "grad_norm": 0.26953125, "learning_rate": 0.00048610726225607384, "loss": 0.249, "step": 77310 }, { "epoch": 3.2, "grad_norm": 1.4921875, "learning_rate": 0.00048610369705206536, "loss": 0.1946, "step": 77320 }, { "epoch": 3.2, "grad_norm": 0.7265625, "learning_rate": 0.0004861001314037352, "loss": 0.1932, "step": 77330 }, { "epoch": 3.2, "grad_norm": 0.96875, "learning_rate": 0.0004860965653110901, "loss": 0.2193, "step": 77340 }, { "epoch": 3.2, "grad_norm": 0.4140625, "learning_rate": 0.0004860929987741368, "loss": 0.1851, "step": 77350 }, { "epoch": 3.2, "grad_norm": 0.6953125, "learning_rate": 0.0004860894317928819, "loss": 0.2093, "step": 77360 }, { "epoch": 3.2, "grad_norm": 0.52734375, "learning_rate": 0.00048608586436733214, "loss": 0.2564, "step": 77370 }, { "epoch": 3.21, "grad_norm": 0.6640625, "learning_rate": 0.00048608229649749435, "loss": 0.17, "step": 77380 }, { "epoch": 3.21, "grad_norm": 0.39453125, "learning_rate": 0.00048607872818337523, "loss": 0.2455, "step": 77390 }, { "epoch": 3.21, "grad_norm": 0.384765625, "learning_rate": 0.0004860751594249814, "loss": 0.1291, "step": 77400 }, { "epoch": 3.21, "grad_norm": 0.435546875, "learning_rate": 0.00048607159022231956, "loss": 0.2527, "step": 77410 }, { "epoch": 3.21, "grad_norm": 1.625, "learning_rate": 0.0004860680205753965, "loss": 0.2725, "step": 77420 }, { "epoch": 3.21, "grad_norm": 0.279296875, "learning_rate": 0.00048606445048421897, "loss": 0.2224, "step": 77430 }, { "epoch": 3.21, "grad_norm": 1.015625, "learning_rate": 0.00048606087994879354, "loss": 0.2558, "step": 77440 }, { "epoch": 3.21, "grad_norm": 0.44140625, "learning_rate": 0.00048605730896912716, "loss": 0.2822, "step": 77450 }, { "epoch": 3.21, "grad_norm": 0.279296875, "learning_rate": 0.0004860537375452263, "loss": 0.2012, "step": 77460 }, { "epoch": 3.21, "grad_norm": 0.84375, "learning_rate": 0.0004860501656770978, "loss": 0.2143, "step": 77470 }, { "epoch": 3.21, "grad_norm": 0.73046875, "learning_rate": 0.0004860465933647484, "loss": 0.2435, "step": 77480 }, { "epoch": 3.21, "grad_norm": 0.66796875, "learning_rate": 0.0004860430206081849, "loss": 0.2006, "step": 77490 }, { "epoch": 3.21, "grad_norm": 0.58984375, "learning_rate": 0.00048603944740741386, "loss": 0.1847, "step": 77500 }, { "epoch": 3.21, "grad_norm": 0.46875, "learning_rate": 0.00048603587376244207, "loss": 0.1794, "step": 77510 }, { "epoch": 3.21, "grad_norm": 0.62890625, "learning_rate": 0.0004860322996732762, "loss": 0.2273, "step": 77520 }, { "epoch": 3.21, "grad_norm": 0.53515625, "learning_rate": 0.0004860287251399231, "loss": 0.2161, "step": 77530 }, { "epoch": 3.21, "grad_norm": 1.234375, "learning_rate": 0.00048602515016238937, "loss": 0.1924, "step": 77540 }, { "epoch": 3.21, "grad_norm": 0.88671875, "learning_rate": 0.00048602157474068186, "loss": 0.2439, "step": 77550 }, { "epoch": 3.21, "grad_norm": 0.609375, "learning_rate": 0.0004860179988748072, "loss": 0.2406, "step": 77560 }, { "epoch": 3.21, "grad_norm": 0.9296875, "learning_rate": 0.0004860144225647721, "loss": 0.1417, "step": 77570 }, { "epoch": 3.21, "grad_norm": 0.4921875, "learning_rate": 0.0004860108458105834, "loss": 0.2011, "step": 77580 }, { "epoch": 3.21, "grad_norm": 0.61328125, "learning_rate": 0.0004860072686122478, "loss": 0.2183, "step": 77590 }, { "epoch": 3.21, "grad_norm": 1.25, "learning_rate": 0.00048600369096977206, "loss": 0.2161, "step": 77600 }, { "epoch": 3.21, "grad_norm": 0.66796875, "learning_rate": 0.0004860001128831627, "loss": 0.2267, "step": 77610 }, { "epoch": 3.22, "grad_norm": 0.431640625, "learning_rate": 0.00048599653435242677, "loss": 0.2215, "step": 77620 }, { "epoch": 3.22, "grad_norm": 2.21875, "learning_rate": 0.0004859929553775707, "loss": 0.2882, "step": 77630 }, { "epoch": 3.22, "grad_norm": 1.0234375, "learning_rate": 0.0004859893759586015, "loss": 0.2054, "step": 77640 }, { "epoch": 3.22, "grad_norm": 1.2890625, "learning_rate": 0.00048598579609552574, "loss": 0.1765, "step": 77650 }, { "epoch": 3.22, "grad_norm": 0.625, "learning_rate": 0.0004859822157883501, "loss": 0.2704, "step": 77660 }, { "epoch": 3.22, "grad_norm": 1.1953125, "learning_rate": 0.0004859786350370815, "loss": 0.2432, "step": 77670 }, { "epoch": 3.22, "grad_norm": 0.31640625, "learning_rate": 0.00048597505384172663, "loss": 0.212, "step": 77680 }, { "epoch": 3.22, "grad_norm": 1.2421875, "learning_rate": 0.00048597147220229216, "loss": 0.1732, "step": 77690 }, { "epoch": 3.22, "grad_norm": 0.2578125, "learning_rate": 0.00048596789011878485, "loss": 0.2366, "step": 77700 }, { "epoch": 3.22, "grad_norm": 1.453125, "learning_rate": 0.0004859643075912115, "loss": 0.2072, "step": 77710 }, { "epoch": 3.22, "grad_norm": 0.265625, "learning_rate": 0.0004859607246195787, "loss": 0.1939, "step": 77720 }, { "epoch": 3.22, "grad_norm": 0.52734375, "learning_rate": 0.00048595714120389333, "loss": 0.2075, "step": 77730 }, { "epoch": 3.22, "grad_norm": 1.3046875, "learning_rate": 0.00048595355734416216, "loss": 0.1787, "step": 77740 }, { "epoch": 3.22, "grad_norm": 0.51953125, "learning_rate": 0.00048594997304039186, "loss": 0.2224, "step": 77750 }, { "epoch": 3.22, "grad_norm": 0.625, "learning_rate": 0.0004859463882925892, "loss": 0.199, "step": 77760 }, { "epoch": 3.22, "grad_norm": 0.96484375, "learning_rate": 0.00048594280310076086, "loss": 0.2356, "step": 77770 }, { "epoch": 3.22, "grad_norm": 1.2734375, "learning_rate": 0.00048593921746491365, "loss": 0.238, "step": 77780 }, { "epoch": 3.22, "grad_norm": 0.423828125, "learning_rate": 0.0004859356313850544, "loss": 0.2551, "step": 77790 }, { "epoch": 3.22, "grad_norm": 1.15625, "learning_rate": 0.0004859320448611897, "loss": 0.246, "step": 77800 }, { "epoch": 3.22, "grad_norm": 0.61328125, "learning_rate": 0.00048592845789332634, "loss": 0.2294, "step": 77810 }, { "epoch": 3.22, "grad_norm": 0.65234375, "learning_rate": 0.0004859248704814712, "loss": 0.1923, "step": 77820 }, { "epoch": 3.22, "grad_norm": 1.2109375, "learning_rate": 0.00048592128262563085, "loss": 0.2261, "step": 77830 }, { "epoch": 3.22, "grad_norm": 0.84375, "learning_rate": 0.0004859176943258121, "loss": 0.2342, "step": 77840 }, { "epoch": 3.22, "grad_norm": 0.361328125, "learning_rate": 0.00048591410558202175, "loss": 0.2477, "step": 77850 }, { "epoch": 3.22, "grad_norm": 0.43359375, "learning_rate": 0.0004859105163942666, "loss": 0.2214, "step": 77860 }, { "epoch": 3.23, "grad_norm": 0.3046875, "learning_rate": 0.0004859069267625533, "loss": 0.1856, "step": 77870 }, { "epoch": 3.23, "grad_norm": 0.353515625, "learning_rate": 0.0004859033366868886, "loss": 0.1822, "step": 77880 }, { "epoch": 3.23, "grad_norm": 1.890625, "learning_rate": 0.0004858997461672793, "loss": 0.2302, "step": 77890 }, { "epoch": 3.23, "grad_norm": 0.390625, "learning_rate": 0.00048589615520373223, "loss": 0.2759, "step": 77900 }, { "epoch": 3.23, "grad_norm": 0.63671875, "learning_rate": 0.000485892563796254, "loss": 0.2143, "step": 77910 }, { "epoch": 3.23, "grad_norm": 0.8984375, "learning_rate": 0.00048588897194485145, "loss": 0.1389, "step": 77920 }, { "epoch": 3.23, "grad_norm": 0.462890625, "learning_rate": 0.0004858853796495313, "loss": 0.1495, "step": 77930 }, { "epoch": 3.23, "grad_norm": 0.36328125, "learning_rate": 0.0004858817869103004, "loss": 0.2156, "step": 77940 }, { "epoch": 3.23, "grad_norm": 0.67578125, "learning_rate": 0.0004858781937271654, "loss": 0.2142, "step": 77950 }, { "epoch": 3.23, "grad_norm": 0.62890625, "learning_rate": 0.0004858746001001331, "loss": 0.1995, "step": 77960 }, { "epoch": 3.23, "grad_norm": 0.625, "learning_rate": 0.0004858710060292103, "loss": 0.2224, "step": 77970 }, { "epoch": 3.23, "grad_norm": 1.7421875, "learning_rate": 0.0004858674115144038, "loss": 0.2189, "step": 77980 }, { "epoch": 3.23, "grad_norm": 0.54296875, "learning_rate": 0.0004858638165557202, "loss": 0.1984, "step": 77990 }, { "epoch": 3.23, "grad_norm": 0.427734375, "learning_rate": 0.00048586022115316643, "loss": 0.1877, "step": 78000 }, { "epoch": 3.23, "grad_norm": 0.51953125, "learning_rate": 0.00048585662530674926, "loss": 0.2569, "step": 78010 }, { "epoch": 3.23, "grad_norm": 0.62109375, "learning_rate": 0.00048585302901647527, "loss": 0.2619, "step": 78020 }, { "epoch": 3.23, "grad_norm": 0.62109375, "learning_rate": 0.00048584943228235136, "loss": 0.1908, "step": 78030 }, { "epoch": 3.23, "grad_norm": 0.8828125, "learning_rate": 0.00048584583510438435, "loss": 0.2171, "step": 78040 }, { "epoch": 3.23, "grad_norm": 0.2392578125, "learning_rate": 0.00048584223748258096, "loss": 0.2497, "step": 78050 }, { "epoch": 3.23, "grad_norm": 1.4375, "learning_rate": 0.0004858386394169478, "loss": 0.1769, "step": 78060 }, { "epoch": 3.23, "grad_norm": 0.85546875, "learning_rate": 0.00048583504090749197, "loss": 0.1748, "step": 78070 }, { "epoch": 3.23, "grad_norm": 0.361328125, "learning_rate": 0.00048583144195421996, "loss": 0.2148, "step": 78080 }, { "epoch": 3.23, "grad_norm": 0.5078125, "learning_rate": 0.0004858278425571387, "loss": 0.241, "step": 78090 }, { "epoch": 3.23, "grad_norm": 0.9921875, "learning_rate": 0.00048582424271625485, "loss": 0.2428, "step": 78100 }, { "epoch": 3.24, "grad_norm": 0.92578125, "learning_rate": 0.0004858206424315753, "loss": 0.223, "step": 78110 }, { "epoch": 3.24, "grad_norm": 0.58984375, "learning_rate": 0.0004858170417031067, "loss": 0.2546, "step": 78120 }, { "epoch": 3.24, "grad_norm": 1.6640625, "learning_rate": 0.00048581344053085597, "loss": 0.2122, "step": 78130 }, { "epoch": 3.24, "grad_norm": 1.0234375, "learning_rate": 0.00048580983891482977, "loss": 0.2198, "step": 78140 }, { "epoch": 3.24, "grad_norm": 0.6015625, "learning_rate": 0.0004858062368550349, "loss": 0.18, "step": 78150 }, { "epoch": 3.24, "grad_norm": 0.984375, "learning_rate": 0.0004858026343514782, "loss": 0.1672, "step": 78160 }, { "epoch": 3.24, "grad_norm": 0.39453125, "learning_rate": 0.00048579903140416635, "loss": 0.2259, "step": 78170 }, { "epoch": 3.24, "grad_norm": 0.6875, "learning_rate": 0.0004857954280131062, "loss": 0.197, "step": 78180 }, { "epoch": 3.24, "grad_norm": 0.6953125, "learning_rate": 0.0004857918241783046, "loss": 0.2422, "step": 78190 }, { "epoch": 3.24, "grad_norm": 1.4765625, "learning_rate": 0.00048578821989976817, "loss": 0.2131, "step": 78200 }, { "epoch": 3.24, "grad_norm": 0.96484375, "learning_rate": 0.0004857846151775038, "loss": 0.1934, "step": 78210 }, { "epoch": 3.24, "grad_norm": 0.3125, "learning_rate": 0.0004857810100115182, "loss": 0.2155, "step": 78220 }, { "epoch": 3.24, "grad_norm": 1.046875, "learning_rate": 0.0004857774044018183, "loss": 0.2569, "step": 78230 }, { "epoch": 3.24, "grad_norm": 0.42578125, "learning_rate": 0.00048577379834841065, "loss": 0.1716, "step": 78240 }, { "epoch": 3.24, "grad_norm": 0.365234375, "learning_rate": 0.00048577019185130227, "loss": 0.1956, "step": 78250 }, { "epoch": 3.24, "grad_norm": 0.47265625, "learning_rate": 0.00048576658491049986, "loss": 0.2411, "step": 78260 }, { "epoch": 3.24, "grad_norm": 0.62109375, "learning_rate": 0.00048576297752601016, "loss": 0.213, "step": 78270 }, { "epoch": 3.24, "grad_norm": 0.1318359375, "learning_rate": 0.00048575936969783994, "loss": 0.1448, "step": 78280 }, { "epoch": 3.24, "grad_norm": 0.27734375, "learning_rate": 0.00048575576142599613, "loss": 0.2381, "step": 78290 }, { "epoch": 3.24, "grad_norm": 0.353515625, "learning_rate": 0.00048575215271048547, "loss": 0.203, "step": 78300 }, { "epoch": 3.24, "grad_norm": 0.671875, "learning_rate": 0.00048574854355131457, "loss": 0.1983, "step": 78310 }, { "epoch": 3.24, "grad_norm": 0.56640625, "learning_rate": 0.0004857449339484905, "loss": 0.2723, "step": 78320 }, { "epoch": 3.24, "grad_norm": 0.6875, "learning_rate": 0.00048574132390201993, "loss": 0.2005, "step": 78330 }, { "epoch": 3.24, "grad_norm": 0.65625, "learning_rate": 0.0004857377134119096, "loss": 0.1988, "step": 78340 }, { "epoch": 3.25, "grad_norm": 0.3671875, "learning_rate": 0.00048573410247816634, "loss": 0.2034, "step": 78350 }, { "epoch": 3.25, "grad_norm": 0.87109375, "learning_rate": 0.000485730491100797, "loss": 0.2306, "step": 78360 }, { "epoch": 3.25, "grad_norm": 0.671875, "learning_rate": 0.0004857268792798083, "loss": 0.2202, "step": 78370 }, { "epoch": 3.25, "grad_norm": 1.2265625, "learning_rate": 0.000485723267015207, "loss": 0.2628, "step": 78380 }, { "epoch": 3.25, "grad_norm": 0.61328125, "learning_rate": 0.00048571965430700004, "loss": 0.26, "step": 78390 }, { "epoch": 3.25, "grad_norm": 0.59765625, "learning_rate": 0.0004857160411551942, "loss": 0.1827, "step": 78400 }, { "epoch": 3.25, "grad_norm": 0.828125, "learning_rate": 0.0004857124275597962, "loss": 0.1619, "step": 78410 }, { "epoch": 3.25, "grad_norm": 0.9453125, "learning_rate": 0.00048570881352081273, "loss": 0.2316, "step": 78420 }, { "epoch": 3.25, "grad_norm": 0.369140625, "learning_rate": 0.0004857051990382509, "loss": 0.2181, "step": 78430 }, { "epoch": 3.25, "grad_norm": 0.69140625, "learning_rate": 0.0004857015841121173, "loss": 0.2141, "step": 78440 }, { "epoch": 3.25, "grad_norm": 0.75, "learning_rate": 0.0004856979687424187, "loss": 0.2087, "step": 78450 }, { "epoch": 3.25, "grad_norm": 0.21484375, "learning_rate": 0.000485694352929162, "loss": 0.2128, "step": 78460 }, { "epoch": 3.25, "grad_norm": 0.8359375, "learning_rate": 0.000485690736672354, "loss": 0.2428, "step": 78470 }, { "epoch": 3.25, "grad_norm": 0.365234375, "learning_rate": 0.00048568711997200157, "loss": 0.2542, "step": 78480 }, { "epoch": 3.25, "grad_norm": 0.42578125, "learning_rate": 0.0004856835028281113, "loss": 0.209, "step": 78490 }, { "epoch": 3.25, "grad_norm": 0.796875, "learning_rate": 0.0004856798852406902, "loss": 0.2242, "step": 78500 }, { "epoch": 3.25, "grad_norm": 0.7265625, "learning_rate": 0.000485676267209745, "loss": 0.2272, "step": 78510 }, { "epoch": 3.25, "grad_norm": 0.470703125, "learning_rate": 0.0004856726487352825, "loss": 0.2504, "step": 78520 }, { "epoch": 3.25, "grad_norm": 0.890625, "learning_rate": 0.0004856690298173095, "loss": 0.1615, "step": 78530 }, { "epoch": 3.25, "grad_norm": 0.640625, "learning_rate": 0.0004856654104558328, "loss": 0.1694, "step": 78540 }, { "epoch": 3.25, "grad_norm": 0.640625, "learning_rate": 0.00048566179065085937, "loss": 0.1628, "step": 78550 }, { "epoch": 3.25, "grad_norm": 0.48046875, "learning_rate": 0.00048565817040239584, "loss": 0.1901, "step": 78560 }, { "epoch": 3.25, "grad_norm": 1.703125, "learning_rate": 0.0004856545497104491, "loss": 0.2717, "step": 78570 }, { "epoch": 3.25, "grad_norm": 0.55859375, "learning_rate": 0.0004856509285750259, "loss": 0.2312, "step": 78580 }, { "epoch": 3.26, "grad_norm": 0.353515625, "learning_rate": 0.0004856473069961331, "loss": 0.2135, "step": 78590 }, { "epoch": 3.26, "grad_norm": 0.578125, "learning_rate": 0.0004856436849737776, "loss": 0.2436, "step": 78600 }, { "epoch": 3.26, "grad_norm": 0.5234375, "learning_rate": 0.0004856400625079661, "loss": 0.2014, "step": 78610 }, { "epoch": 3.26, "grad_norm": 0.7890625, "learning_rate": 0.00048563643959870543, "loss": 0.1885, "step": 78620 }, { "epoch": 3.26, "grad_norm": 1.421875, "learning_rate": 0.00048563281624600243, "loss": 0.2315, "step": 78630 }, { "epoch": 3.26, "grad_norm": 0.65234375, "learning_rate": 0.0004856291924498639, "loss": 0.2224, "step": 78640 }, { "epoch": 3.26, "grad_norm": 0.578125, "learning_rate": 0.0004856255682102967, "loss": 0.16, "step": 78650 }, { "epoch": 3.26, "grad_norm": 1.4609375, "learning_rate": 0.0004856219435273076, "loss": 0.1815, "step": 78660 }, { "epoch": 3.26, "grad_norm": 1.1640625, "learning_rate": 0.0004856183184009034, "loss": 0.1733, "step": 78670 }, { "epoch": 3.26, "grad_norm": 0.52734375, "learning_rate": 0.00048561469283109104, "loss": 0.2664, "step": 78680 }, { "epoch": 3.26, "grad_norm": 1.125, "learning_rate": 0.00048561106681787724, "loss": 0.2349, "step": 78690 }, { "epoch": 3.26, "grad_norm": 0.388671875, "learning_rate": 0.0004856074403612689, "loss": 0.2233, "step": 78700 }, { "epoch": 3.26, "grad_norm": 0.56640625, "learning_rate": 0.0004856038134612728, "loss": 0.2047, "step": 78710 }, { "epoch": 3.26, "grad_norm": 1.1953125, "learning_rate": 0.0004856001861178957, "loss": 0.2391, "step": 78720 }, { "epoch": 3.26, "grad_norm": 0.84765625, "learning_rate": 0.0004855965583311446, "loss": 0.1945, "step": 78730 }, { "epoch": 3.26, "grad_norm": 0.96484375, "learning_rate": 0.0004855929301010261, "loss": 0.1717, "step": 78740 }, { "epoch": 3.26, "grad_norm": 0.47265625, "learning_rate": 0.0004855893014275472, "loss": 0.2346, "step": 78750 }, { "epoch": 3.26, "grad_norm": 0.3828125, "learning_rate": 0.0004855856723107147, "loss": 0.1896, "step": 78760 }, { "epoch": 3.26, "grad_norm": 0.423828125, "learning_rate": 0.0004855820427505354, "loss": 0.1525, "step": 78770 }, { "epoch": 3.26, "grad_norm": 0.5625, "learning_rate": 0.0004855784127470161, "loss": 0.217, "step": 78780 }, { "epoch": 3.26, "grad_norm": 0.392578125, "learning_rate": 0.00048557478230016373, "loss": 0.1683, "step": 78790 }, { "epoch": 3.26, "grad_norm": 0.65625, "learning_rate": 0.000485571151409985, "loss": 0.1955, "step": 78800 }, { "epoch": 3.26, "grad_norm": 0.306640625, "learning_rate": 0.0004855675200764868, "loss": 0.2077, "step": 78810 }, { "epoch": 3.26, "grad_norm": 1.0546875, "learning_rate": 0.000485563888299676, "loss": 0.2571, "step": 78820 }, { "epoch": 3.27, "grad_norm": 1.0546875, "learning_rate": 0.0004855602560795593, "loss": 0.1719, "step": 78830 }, { "epoch": 3.27, "grad_norm": 0.671875, "learning_rate": 0.0004855566234161438, "loss": 0.2495, "step": 78840 }, { "epoch": 3.27, "grad_norm": 0.94140625, "learning_rate": 0.0004855529903094361, "loss": 0.2241, "step": 78850 }, { "epoch": 3.27, "grad_norm": 0.609375, "learning_rate": 0.0004855493567594431, "loss": 0.208, "step": 78860 }, { "epoch": 3.27, "grad_norm": 0.34765625, "learning_rate": 0.00048554572276617166, "loss": 0.2114, "step": 78870 }, { "epoch": 3.27, "grad_norm": 0.66796875, "learning_rate": 0.00048554208832962854, "loss": 0.2046, "step": 78880 }, { "epoch": 3.27, "grad_norm": 0.921875, "learning_rate": 0.0004855384534498207, "loss": 0.2185, "step": 78890 }, { "epoch": 3.27, "grad_norm": 0.44140625, "learning_rate": 0.00048553481812675496, "loss": 0.2581, "step": 78900 }, { "epoch": 3.27, "grad_norm": 0.76953125, "learning_rate": 0.00048553118236043815, "loss": 0.2505, "step": 78910 }, { "epoch": 3.27, "grad_norm": 0.59765625, "learning_rate": 0.000485527546150877, "loss": 0.1838, "step": 78920 }, { "epoch": 3.27, "grad_norm": 0.5703125, "learning_rate": 0.00048552390949807843, "loss": 0.1977, "step": 78930 }, { "epoch": 3.27, "grad_norm": 0.80078125, "learning_rate": 0.00048552027240204946, "loss": 0.2398, "step": 78940 }, { "epoch": 3.27, "grad_norm": 0.2578125, "learning_rate": 0.00048551663486279663, "loss": 0.2645, "step": 78950 }, { "epoch": 3.27, "grad_norm": 0.98828125, "learning_rate": 0.0004855129968803269, "loss": 0.2275, "step": 78960 }, { "epoch": 3.27, "grad_norm": 0.65625, "learning_rate": 0.0004855093584546473, "loss": 0.2359, "step": 78970 }, { "epoch": 3.27, "grad_norm": 0.89453125, "learning_rate": 0.0004855057195857644, "loss": 0.231, "step": 78980 }, { "epoch": 3.27, "grad_norm": 0.447265625, "learning_rate": 0.0004855020802736851, "loss": 0.1818, "step": 78990 }, { "epoch": 3.27, "grad_norm": 0.80078125, "learning_rate": 0.00048549844051841646, "loss": 0.223, "step": 79000 }, { "epoch": 3.27, "grad_norm": 0.53515625, "learning_rate": 0.00048549480031996514, "loss": 0.2034, "step": 79010 }, { "epoch": 3.27, "grad_norm": 0.640625, "learning_rate": 0.000485491159678338, "loss": 0.2266, "step": 79020 }, { "epoch": 3.27, "grad_norm": 0.5703125, "learning_rate": 0.000485487518593542, "loss": 0.2516, "step": 79030 }, { "epoch": 3.27, "grad_norm": 0.625, "learning_rate": 0.00048548387706558383, "loss": 0.2518, "step": 79040 }, { "epoch": 3.27, "grad_norm": 0.53515625, "learning_rate": 0.00048548023509447047, "loss": 0.2241, "step": 79050 }, { "epoch": 3.27, "grad_norm": 0.427734375, "learning_rate": 0.0004854765926802088, "loss": 0.213, "step": 79060 }, { "epoch": 3.28, "grad_norm": 0.625, "learning_rate": 0.00048547294982280556, "loss": 0.1909, "step": 79070 }, { "epoch": 3.28, "grad_norm": 0.65625, "learning_rate": 0.0004854693065222676, "loss": 0.2231, "step": 79080 }, { "epoch": 3.28, "grad_norm": 0.796875, "learning_rate": 0.00048546566277860194, "loss": 0.2492, "step": 79090 }, { "epoch": 3.28, "grad_norm": 0.80859375, "learning_rate": 0.00048546201859181524, "loss": 0.2229, "step": 79100 }, { "epoch": 3.28, "grad_norm": 1.15625, "learning_rate": 0.0004854583739619145, "loss": 0.1583, "step": 79110 }, { "epoch": 3.28, "grad_norm": 0.77734375, "learning_rate": 0.0004854547288889065, "loss": 0.261, "step": 79120 }, { "epoch": 3.28, "grad_norm": 0.984375, "learning_rate": 0.0004854510833727982, "loss": 0.263, "step": 79130 }, { "epoch": 3.28, "grad_norm": 0.263671875, "learning_rate": 0.0004854474374135963, "loss": 0.256, "step": 79140 }, { "epoch": 3.28, "grad_norm": 0.310546875, "learning_rate": 0.0004854437910113078, "loss": 0.1909, "step": 79150 }, { "epoch": 3.28, "grad_norm": 0.55859375, "learning_rate": 0.0004854401441659395, "loss": 0.2165, "step": 79160 }, { "epoch": 3.28, "grad_norm": 0.609375, "learning_rate": 0.00048543649687749824, "loss": 0.1656, "step": 79170 }, { "epoch": 3.28, "grad_norm": 1.4921875, "learning_rate": 0.00048543284914599093, "loss": 0.216, "step": 79180 }, { "epoch": 3.28, "grad_norm": 0.74609375, "learning_rate": 0.0004854292009714245, "loss": 0.2089, "step": 79190 }, { "epoch": 3.28, "grad_norm": 0.4296875, "learning_rate": 0.0004854255523538057, "loss": 0.2056, "step": 79200 }, { "epoch": 3.28, "grad_norm": 0.2470703125, "learning_rate": 0.0004854219032931414, "loss": 0.1542, "step": 79210 }, { "epoch": 3.28, "grad_norm": 0.44921875, "learning_rate": 0.0004854182537894385, "loss": 0.187, "step": 79220 }, { "epoch": 3.28, "grad_norm": 0.90625, "learning_rate": 0.00048541460384270385, "loss": 0.1908, "step": 79230 }, { "epoch": 3.28, "grad_norm": 0.640625, "learning_rate": 0.0004854109534529444, "loss": 0.203, "step": 79240 }, { "epoch": 3.28, "grad_norm": 0.16015625, "learning_rate": 0.0004854073026201669, "loss": 0.1955, "step": 79250 }, { "epoch": 3.28, "grad_norm": 0.494140625, "learning_rate": 0.00048540365134437836, "loss": 0.1377, "step": 79260 }, { "epoch": 3.28, "grad_norm": 1.1171875, "learning_rate": 0.00048539999962558545, "loss": 0.2259, "step": 79270 }, { "epoch": 3.28, "grad_norm": 0.70703125, "learning_rate": 0.00048539634746379523, "loss": 0.2108, "step": 79280 }, { "epoch": 3.28, "grad_norm": 0.439453125, "learning_rate": 0.0004853926948590145, "loss": 0.2252, "step": 79290 }, { "epoch": 3.28, "grad_norm": 0.61328125, "learning_rate": 0.0004853890418112502, "loss": 0.235, "step": 79300 }, { "epoch": 3.29, "grad_norm": 0.7734375, "learning_rate": 0.00048538538832050906, "loss": 0.2411, "step": 79310 }, { "epoch": 3.29, "grad_norm": 0.83984375, "learning_rate": 0.00048538173438679807, "loss": 0.1972, "step": 79320 }, { "epoch": 3.29, "grad_norm": 0.87890625, "learning_rate": 0.0004853780800101241, "loss": 0.1916, "step": 79330 }, { "epoch": 3.29, "grad_norm": 0.859375, "learning_rate": 0.00048537442519049393, "loss": 0.2145, "step": 79340 }, { "epoch": 3.29, "grad_norm": 2.359375, "learning_rate": 0.0004853707699279145, "loss": 0.2438, "step": 79350 }, { "epoch": 3.29, "grad_norm": 0.88671875, "learning_rate": 0.0004853671142223928, "loss": 0.2156, "step": 79360 }, { "epoch": 3.29, "grad_norm": 1.453125, "learning_rate": 0.00048536345807393555, "loss": 0.2102, "step": 79370 }, { "epoch": 3.29, "grad_norm": 0.94140625, "learning_rate": 0.00048535980148254964, "loss": 0.2161, "step": 79380 }, { "epoch": 3.29, "grad_norm": 0.345703125, "learning_rate": 0.0004853561444482421, "loss": 0.2212, "step": 79390 }, { "epoch": 3.29, "grad_norm": 0.6875, "learning_rate": 0.00048535248697101964, "loss": 0.1743, "step": 79400 }, { "epoch": 3.29, "grad_norm": 0.87109375, "learning_rate": 0.0004853488290508893, "loss": 0.2544, "step": 79410 }, { "epoch": 3.29, "grad_norm": 0.515625, "learning_rate": 0.00048534517068785776, "loss": 0.2123, "step": 79420 }, { "epoch": 3.29, "grad_norm": 0.53515625, "learning_rate": 0.0004853415118819321, "loss": 0.2, "step": 79430 }, { "epoch": 3.29, "grad_norm": 0.69921875, "learning_rate": 0.00048533785263311913, "loss": 0.2331, "step": 79440 }, { "epoch": 3.29, "grad_norm": 0.59375, "learning_rate": 0.0004853341929414257, "loss": 0.1882, "step": 79450 }, { "epoch": 3.29, "grad_norm": 0.73828125, "learning_rate": 0.0004853305328068588, "loss": 0.2046, "step": 79460 }, { "epoch": 3.29, "grad_norm": 0.90625, "learning_rate": 0.0004853268722294252, "loss": 0.184, "step": 79470 }, { "epoch": 3.29, "grad_norm": 0.6796875, "learning_rate": 0.0004853232112091318, "loss": 0.2169, "step": 79480 }, { "epoch": 3.29, "grad_norm": 0.8359375, "learning_rate": 0.00048531954974598556, "loss": 0.2033, "step": 79490 }, { "epoch": 3.29, "grad_norm": 0.52734375, "learning_rate": 0.00048531588783999336, "loss": 0.2355, "step": 79500 }, { "epoch": 3.29, "grad_norm": 0.74609375, "learning_rate": 0.00048531222549116204, "loss": 0.2141, "step": 79510 }, { "epoch": 3.29, "grad_norm": 1.0625, "learning_rate": 0.00048530856269949865, "loss": 0.2255, "step": 79520 }, { "epoch": 3.29, "grad_norm": 0.80859375, "learning_rate": 0.0004853048994650098, "loss": 0.1707, "step": 79530 }, { "epoch": 3.29, "grad_norm": 0.83203125, "learning_rate": 0.00048530123578770256, "loss": 0.1946, "step": 79540 }, { "epoch": 3.29, "grad_norm": 1.21875, "learning_rate": 0.00048529757166758386, "loss": 0.2116, "step": 79550 }, { "epoch": 3.3, "grad_norm": 1.5, "learning_rate": 0.0004852939071046605, "loss": 0.2593, "step": 79560 }, { "epoch": 3.3, "grad_norm": 0.5546875, "learning_rate": 0.00048529024209893946, "loss": 0.2509, "step": 79570 }, { "epoch": 3.3, "grad_norm": 0.67578125, "learning_rate": 0.0004852865766504276, "loss": 0.2207, "step": 79580 }, { "epoch": 3.3, "grad_norm": 0.46484375, "learning_rate": 0.00048528291075913176, "loss": 0.194, "step": 79590 }, { "epoch": 3.3, "grad_norm": 0.6953125, "learning_rate": 0.0004852792444250589, "loss": 0.1882, "step": 79600 }, { "epoch": 3.3, "grad_norm": 0.373046875, "learning_rate": 0.00048527557764821595, "loss": 0.2375, "step": 79610 }, { "epoch": 3.3, "grad_norm": 0.291015625, "learning_rate": 0.00048527191042860974, "loss": 0.1758, "step": 79620 }, { "epoch": 3.3, "grad_norm": 0.59765625, "learning_rate": 0.00048526824276624717, "loss": 0.2226, "step": 79630 }, { "epoch": 3.3, "grad_norm": 0.2373046875, "learning_rate": 0.0004852645746611352, "loss": 0.2274, "step": 79640 }, { "epoch": 3.3, "grad_norm": 0.333984375, "learning_rate": 0.0004852609061132807, "loss": 0.1795, "step": 79650 }, { "epoch": 3.3, "grad_norm": 0.83203125, "learning_rate": 0.0004852572371226906, "loss": 0.2136, "step": 79660 }, { "epoch": 3.3, "grad_norm": 0.640625, "learning_rate": 0.0004852535676893718, "loss": 0.2388, "step": 79670 }, { "epoch": 3.3, "grad_norm": 0.58203125, "learning_rate": 0.00048524989781333116, "loss": 0.2692, "step": 79680 }, { "epoch": 3.3, "grad_norm": 0.322265625, "learning_rate": 0.0004852462274945756, "loss": 0.1745, "step": 79690 }, { "epoch": 3.3, "grad_norm": 1.0859375, "learning_rate": 0.0004852425567331121, "loss": 0.2251, "step": 79700 }, { "epoch": 3.3, "grad_norm": 0.875, "learning_rate": 0.00048523888552894746, "loss": 0.2251, "step": 79710 }, { "epoch": 3.3, "grad_norm": 0.51953125, "learning_rate": 0.0004852352138820887, "loss": 0.2339, "step": 79720 }, { "epoch": 3.3, "grad_norm": 0.396484375, "learning_rate": 0.0004852315417925426, "loss": 0.1958, "step": 79730 }, { "epoch": 3.3, "grad_norm": 1.1484375, "learning_rate": 0.0004852278692603162, "loss": 0.2371, "step": 79740 }, { "epoch": 3.3, "grad_norm": 0.359375, "learning_rate": 0.0004852241962854164, "loss": 0.1975, "step": 79750 }, { "epoch": 3.3, "grad_norm": 0.34375, "learning_rate": 0.0004852205228678499, "loss": 0.2036, "step": 79760 }, { "epoch": 3.3, "grad_norm": 1.4375, "learning_rate": 0.00048521684900762387, "loss": 0.1986, "step": 79770 }, { "epoch": 3.3, "grad_norm": 0.5078125, "learning_rate": 0.00048521317470474515, "loss": 0.1944, "step": 79780 }, { "epoch": 3.3, "grad_norm": 0.6015625, "learning_rate": 0.00048520949995922057, "loss": 0.227, "step": 79790 }, { "epoch": 3.31, "grad_norm": 1.453125, "learning_rate": 0.0004852058247710572, "loss": 0.2412, "step": 79800 }, { "epoch": 3.31, "grad_norm": 0.310546875, "learning_rate": 0.00048520214914026183, "loss": 0.1637, "step": 79810 }, { "epoch": 3.31, "grad_norm": 0.158203125, "learning_rate": 0.0004851984730668414, "loss": 0.1812, "step": 79820 }, { "epoch": 3.31, "grad_norm": 0.51171875, "learning_rate": 0.0004851947965508029, "loss": 0.1852, "step": 79830 }, { "epoch": 3.31, "grad_norm": 0.96484375, "learning_rate": 0.0004851911195921531, "loss": 0.238, "step": 79840 }, { "epoch": 3.31, "grad_norm": 0.71875, "learning_rate": 0.00048518744219089907, "loss": 0.2204, "step": 79850 }, { "epoch": 3.31, "grad_norm": 0.6171875, "learning_rate": 0.0004851837643470477, "loss": 0.2068, "step": 79860 }, { "epoch": 3.31, "grad_norm": 1.1796875, "learning_rate": 0.00048518008606060584, "loss": 0.2133, "step": 79870 }, { "epoch": 3.31, "grad_norm": 0.25, "learning_rate": 0.0004851764073315804, "loss": 0.1949, "step": 79880 }, { "epoch": 3.31, "grad_norm": 1.140625, "learning_rate": 0.0004851727281599784, "loss": 0.2277, "step": 79890 }, { "epoch": 3.31, "grad_norm": 0.6953125, "learning_rate": 0.0004851690485458068, "loss": 0.2091, "step": 79900 }, { "epoch": 3.31, "grad_norm": 0.66015625, "learning_rate": 0.00048516536848907234, "loss": 0.145, "step": 79910 }, { "epoch": 3.31, "grad_norm": 0.7421875, "learning_rate": 0.00048516168798978213, "loss": 0.2468, "step": 79920 }, { "epoch": 3.31, "grad_norm": 1.125, "learning_rate": 0.000485158007047943, "loss": 0.1951, "step": 79930 }, { "epoch": 3.31, "grad_norm": 0.68359375, "learning_rate": 0.0004851543256635619, "loss": 0.1996, "step": 79940 }, { "epoch": 3.31, "grad_norm": 0.70703125, "learning_rate": 0.0004851506438366457, "loss": 0.2609, "step": 79950 }, { "epoch": 3.31, "grad_norm": 0.71875, "learning_rate": 0.0004851469615672014, "loss": 0.1971, "step": 79960 }, { "epoch": 3.31, "grad_norm": 0.51953125, "learning_rate": 0.0004851432788552359, "loss": 0.1822, "step": 79970 }, { "epoch": 3.31, "grad_norm": 0.8828125, "learning_rate": 0.00048513959570075624, "loss": 0.202, "step": 79980 }, { "epoch": 3.31, "grad_norm": 0.419921875, "learning_rate": 0.0004851359121037692, "loss": 0.2025, "step": 79990 }, { "epoch": 3.31, "grad_norm": 0.86328125, "learning_rate": 0.0004851322280642817, "loss": 0.2275, "step": 80000 }, { "epoch": 3.31, "grad_norm": 0.5703125, "learning_rate": 0.0004851285435823008, "loss": 0.1865, "step": 80010 }, { "epoch": 3.31, "grad_norm": 0.91015625, "learning_rate": 0.00048512485865783334, "loss": 0.2306, "step": 80020 }, { "epoch": 3.31, "grad_norm": 0.6640625, "learning_rate": 0.00048512117329088634, "loss": 0.2225, "step": 80030 }, { "epoch": 3.32, "grad_norm": 0.326171875, "learning_rate": 0.0004851174874814667, "loss": 0.1954, "step": 80040 }, { "epoch": 3.32, "grad_norm": 0.734375, "learning_rate": 0.0004851138012295813, "loss": 0.2454, "step": 80050 }, { "epoch": 3.32, "grad_norm": 1.0546875, "learning_rate": 0.000485110114535237, "loss": 0.1822, "step": 80060 }, { "epoch": 3.32, "grad_norm": 0.51953125, "learning_rate": 0.00048510642739844103, "loss": 0.2153, "step": 80070 }, { "epoch": 3.32, "grad_norm": 0.46484375, "learning_rate": 0.00048510273981920005, "loss": 0.2862, "step": 80080 }, { "epoch": 3.32, "grad_norm": 0.66015625, "learning_rate": 0.0004850990517975211, "loss": 0.2013, "step": 80090 }, { "epoch": 3.32, "grad_norm": 0.9296875, "learning_rate": 0.0004850953633334112, "loss": 0.1968, "step": 80100 }, { "epoch": 3.32, "grad_norm": 1.0390625, "learning_rate": 0.0004850916744268772, "loss": 0.1972, "step": 80110 }, { "epoch": 3.32, "grad_norm": 0.279296875, "learning_rate": 0.000485087985077926, "loss": 0.1901, "step": 80120 }, { "epoch": 3.32, "grad_norm": 1.0859375, "learning_rate": 0.0004850842952865646, "loss": 0.233, "step": 80130 }, { "epoch": 3.32, "grad_norm": 0.890625, "learning_rate": 0.00048508060505280006, "loss": 0.1886, "step": 80140 }, { "epoch": 3.32, "grad_norm": 1.1875, "learning_rate": 0.0004850769143766391, "loss": 0.2491, "step": 80150 }, { "epoch": 3.32, "grad_norm": 0.7578125, "learning_rate": 0.00048507322325808876, "loss": 0.2318, "step": 80160 }, { "epoch": 3.32, "grad_norm": 0.81640625, "learning_rate": 0.00048506953169715606, "loss": 0.2077, "step": 80170 }, { "epoch": 3.32, "grad_norm": 0.70703125, "learning_rate": 0.0004850658396938479, "loss": 0.2028, "step": 80180 }, { "epoch": 3.32, "grad_norm": 0.263671875, "learning_rate": 0.0004850621472481711, "loss": 0.2148, "step": 80190 }, { "epoch": 3.32, "grad_norm": 1.0, "learning_rate": 0.00048505845436013287, "loss": 0.1926, "step": 80200 }, { "epoch": 3.32, "grad_norm": 0.291015625, "learning_rate": 0.0004850547610297399, "loss": 0.2083, "step": 80210 }, { "epoch": 3.32, "grad_norm": 0.72265625, "learning_rate": 0.0004850510672569993, "loss": 0.1658, "step": 80220 }, { "epoch": 3.32, "grad_norm": 1.734375, "learning_rate": 0.00048504737304191795, "loss": 0.199, "step": 80230 }, { "epoch": 3.32, "grad_norm": 0.75, "learning_rate": 0.00048504367838450287, "loss": 0.2044, "step": 80240 }, { "epoch": 3.32, "grad_norm": 0.90234375, "learning_rate": 0.00048503998328476095, "loss": 0.1678, "step": 80250 }, { "epoch": 3.32, "grad_norm": 0.62890625, "learning_rate": 0.00048503628774269913, "loss": 0.2161, "step": 80260 }, { "epoch": 3.32, "grad_norm": 0.73828125, "learning_rate": 0.00048503259175832446, "loss": 0.2066, "step": 80270 }, { "epoch": 3.33, "grad_norm": 0.390625, "learning_rate": 0.0004850288953316438, "loss": 0.239, "step": 80280 }, { "epoch": 3.33, "grad_norm": 0.6015625, "learning_rate": 0.00048502519846266415, "loss": 0.1934, "step": 80290 }, { "epoch": 3.33, "grad_norm": 2.015625, "learning_rate": 0.00048502150115139244, "loss": 0.1643, "step": 80300 }, { "epoch": 3.33, "grad_norm": 0.6640625, "learning_rate": 0.00048501780339783564, "loss": 0.1822, "step": 80310 }, { "epoch": 3.33, "grad_norm": 2.0625, "learning_rate": 0.00048501410520200073, "loss": 0.1235, "step": 80320 }, { "epoch": 3.33, "grad_norm": 0.76953125, "learning_rate": 0.00048501040656389466, "loss": 0.2259, "step": 80330 }, { "epoch": 3.33, "grad_norm": 0.48828125, "learning_rate": 0.00048500670748352436, "loss": 0.2378, "step": 80340 }, { "epoch": 3.33, "grad_norm": 0.828125, "learning_rate": 0.0004850030079608969, "loss": 0.2253, "step": 80350 }, { "epoch": 3.33, "grad_norm": 0.42578125, "learning_rate": 0.00048499930799601905, "loss": 0.1994, "step": 80360 }, { "epoch": 3.33, "grad_norm": 0.80078125, "learning_rate": 0.0004849956075888979, "loss": 0.2437, "step": 80370 }, { "epoch": 3.33, "grad_norm": 0.498046875, "learning_rate": 0.00048499190673954043, "loss": 0.2531, "step": 80380 }, { "epoch": 3.33, "grad_norm": 0.443359375, "learning_rate": 0.0004849882054479535, "loss": 0.2442, "step": 80390 }, { "epoch": 3.33, "grad_norm": 0.76171875, "learning_rate": 0.0004849845037141443, "loss": 0.2175, "step": 80400 }, { "epoch": 3.33, "grad_norm": 0.8046875, "learning_rate": 0.00048498080153811944, "loss": 0.1956, "step": 80410 }, { "epoch": 3.33, "grad_norm": 0.609375, "learning_rate": 0.00048497709891988616, "loss": 0.2279, "step": 80420 }, { "epoch": 3.33, "grad_norm": 0.54296875, "learning_rate": 0.0004849733958594514, "loss": 0.2383, "step": 80430 }, { "epoch": 3.33, "grad_norm": 0.6484375, "learning_rate": 0.00048496969235682207, "loss": 0.1448, "step": 80440 }, { "epoch": 3.33, "grad_norm": 0.466796875, "learning_rate": 0.00048496598841200515, "loss": 0.2412, "step": 80450 }, { "epoch": 3.33, "grad_norm": 0.75, "learning_rate": 0.00048496228402500764, "loss": 0.1841, "step": 80460 }, { "epoch": 3.33, "grad_norm": 0.8671875, "learning_rate": 0.00048495857919583643, "loss": 0.2608, "step": 80470 }, { "epoch": 3.33, "grad_norm": 0.291015625, "learning_rate": 0.00048495487392449853, "loss": 0.2494, "step": 80480 }, { "epoch": 3.33, "grad_norm": 0.60546875, "learning_rate": 0.000484951168211001, "loss": 0.1826, "step": 80490 }, { "epoch": 3.33, "grad_norm": 0.68359375, "learning_rate": 0.00048494746205535074, "loss": 0.1965, "step": 80500 }, { "epoch": 3.33, "grad_norm": 0.84375, "learning_rate": 0.0004849437554575547, "loss": 0.2384, "step": 80510 }, { "epoch": 3.34, "grad_norm": 0.4140625, "learning_rate": 0.00048494004841761985, "loss": 0.2581, "step": 80520 }, { "epoch": 3.34, "grad_norm": 0.890625, "learning_rate": 0.0004849363409355533, "loss": 0.2726, "step": 80530 }, { "epoch": 3.34, "grad_norm": 0.5390625, "learning_rate": 0.0004849326330113618, "loss": 0.2447, "step": 80540 }, { "epoch": 3.34, "grad_norm": 0.43359375, "learning_rate": 0.00048492892464505256, "loss": 0.1522, "step": 80550 }, { "epoch": 3.34, "grad_norm": 0.2451171875, "learning_rate": 0.0004849252158366324, "loss": 0.1735, "step": 80560 }, { "epoch": 3.34, "grad_norm": 1.875, "learning_rate": 0.0004849215065861083, "loss": 0.242, "step": 80570 }, { "epoch": 3.34, "grad_norm": 1.1171875, "learning_rate": 0.00048491779689348747, "loss": 0.1861, "step": 80580 }, { "epoch": 3.34, "grad_norm": 0.89453125, "learning_rate": 0.00048491408675877657, "loss": 0.1936, "step": 80590 }, { "epoch": 3.34, "grad_norm": 0.47265625, "learning_rate": 0.00048491037618198277, "loss": 0.2546, "step": 80600 }, { "epoch": 3.34, "grad_norm": 0.443359375, "learning_rate": 0.00048490666516311297, "loss": 0.2104, "step": 80610 }, { "epoch": 3.34, "grad_norm": 0.404296875, "learning_rate": 0.00048490295370217425, "loss": 0.1747, "step": 80620 }, { "epoch": 3.34, "grad_norm": 0.5546875, "learning_rate": 0.00048489924179917357, "loss": 0.2477, "step": 80630 }, { "epoch": 3.34, "grad_norm": 1.3671875, "learning_rate": 0.00048489552945411775, "loss": 0.2145, "step": 80640 }, { "epoch": 3.34, "grad_norm": 1.0234375, "learning_rate": 0.000484891816667014, "loss": 0.2651, "step": 80650 }, { "epoch": 3.34, "grad_norm": 1.1015625, "learning_rate": 0.0004848881034378692, "loss": 0.2008, "step": 80660 }, { "epoch": 3.34, "grad_norm": 0.80859375, "learning_rate": 0.0004848843897666904, "loss": 0.2624, "step": 80670 }, { "epoch": 3.34, "grad_norm": 0.48046875, "learning_rate": 0.0004848806756534845, "loss": 0.2182, "step": 80680 }, { "epoch": 3.34, "grad_norm": 0.72265625, "learning_rate": 0.0004848769610982585, "loss": 0.2114, "step": 80690 }, { "epoch": 3.34, "grad_norm": 0.75390625, "learning_rate": 0.0004848732461010195, "loss": 0.2468, "step": 80700 }, { "epoch": 3.34, "grad_norm": 0.69140625, "learning_rate": 0.00048486953066177437, "loss": 0.2043, "step": 80710 }, { "epoch": 3.34, "grad_norm": 0.5390625, "learning_rate": 0.00048486581478053017, "loss": 0.1799, "step": 80720 }, { "epoch": 3.34, "grad_norm": 1.25, "learning_rate": 0.0004848620984572938, "loss": 0.2587, "step": 80730 }, { "epoch": 3.34, "grad_norm": 0.82421875, "learning_rate": 0.00048485838169207244, "loss": 0.1566, "step": 80740 }, { "epoch": 3.34, "grad_norm": 0.609375, "learning_rate": 0.00048485466448487294, "loss": 0.2086, "step": 80750 }, { "epoch": 3.35, "grad_norm": 0.64453125, "learning_rate": 0.00048485094683570223, "loss": 0.1585, "step": 80760 }, { "epoch": 3.35, "grad_norm": 0.310546875, "learning_rate": 0.00048484722874456745, "loss": 0.2617, "step": 80770 }, { "epoch": 3.35, "grad_norm": 0.65625, "learning_rate": 0.00048484351021147555, "loss": 0.1949, "step": 80780 }, { "epoch": 3.35, "grad_norm": 0.48046875, "learning_rate": 0.00048483979123643356, "loss": 0.1921, "step": 80790 }, { "epoch": 3.35, "grad_norm": 0.0, "learning_rate": 0.00048483607181944844, "loss": 0.2364, "step": 80800 }, { "epoch": 3.35, "grad_norm": 0.5, "learning_rate": 0.0004848323519605271, "loss": 0.1975, "step": 80810 }, { "epoch": 3.35, "grad_norm": 0.609375, "learning_rate": 0.00048482863165967674, "loss": 0.2009, "step": 80820 }, { "epoch": 3.35, "grad_norm": 0.361328125, "learning_rate": 0.0004848249109169042, "loss": 0.2076, "step": 80830 }, { "epoch": 3.35, "grad_norm": 0.64453125, "learning_rate": 0.00048482118973221656, "loss": 0.2289, "step": 80840 }, { "epoch": 3.35, "grad_norm": 0.87890625, "learning_rate": 0.0004848174681056208, "loss": 0.2487, "step": 80850 }, { "epoch": 3.35, "grad_norm": 1.453125, "learning_rate": 0.0004848137460371239, "loss": 0.1855, "step": 80860 }, { "epoch": 3.35, "grad_norm": 0.63671875, "learning_rate": 0.00048481002352673293, "loss": 0.2189, "step": 80870 }, { "epoch": 3.35, "grad_norm": 0.6875, "learning_rate": 0.00048480630057445487, "loss": 0.256, "step": 80880 }, { "epoch": 3.35, "grad_norm": 0.73046875, "learning_rate": 0.0004848025771802966, "loss": 0.2304, "step": 80890 }, { "epoch": 3.35, "grad_norm": 0.0, "learning_rate": 0.0004847988533442653, "loss": 0.1646, "step": 80900 }, { "epoch": 3.35, "grad_norm": 0.34765625, "learning_rate": 0.0004847951290663679, "loss": 0.1998, "step": 80910 }, { "epoch": 3.35, "grad_norm": 1.171875, "learning_rate": 0.00048479140434661146, "loss": 0.1997, "step": 80920 }, { "epoch": 3.35, "grad_norm": 0.640625, "learning_rate": 0.0004847876791850029, "loss": 0.2622, "step": 80930 }, { "epoch": 3.35, "grad_norm": 0.69140625, "learning_rate": 0.00048478395358154936, "loss": 0.2303, "step": 80940 }, { "epoch": 3.35, "grad_norm": 0.48046875, "learning_rate": 0.0004847802275362577, "loss": 0.2048, "step": 80950 }, { "epoch": 3.35, "grad_norm": 0.6875, "learning_rate": 0.0004847765010491351, "loss": 0.2127, "step": 80960 }, { "epoch": 3.35, "grad_norm": 0.423828125, "learning_rate": 0.0004847727741201884, "loss": 0.1799, "step": 80970 }, { "epoch": 3.35, "grad_norm": 0.8671875, "learning_rate": 0.00048476904674942466, "loss": 0.2009, "step": 80980 }, { "epoch": 3.35, "grad_norm": 0.7421875, "learning_rate": 0.00048476531893685095, "loss": 0.2118, "step": 80990 }, { "epoch": 3.36, "grad_norm": 0.64453125, "learning_rate": 0.0004847615906824743, "loss": 0.2476, "step": 81000 }, { "epoch": 3.36, "grad_norm": 0.63671875, "learning_rate": 0.00048475786198630167, "loss": 0.208, "step": 81010 }, { "epoch": 3.36, "grad_norm": 0.91015625, "learning_rate": 0.00048475413284834013, "loss": 0.2489, "step": 81020 }, { "epoch": 3.36, "grad_norm": 0.64453125, "learning_rate": 0.0004847504032685966, "loss": 0.1978, "step": 81030 }, { "epoch": 3.36, "grad_norm": 0.46875, "learning_rate": 0.00048474667324707824, "loss": 0.1991, "step": 81040 }, { "epoch": 3.36, "grad_norm": 0.8671875, "learning_rate": 0.0004847429427837919, "loss": 0.1861, "step": 81050 }, { "epoch": 3.36, "grad_norm": 2.375, "learning_rate": 0.00048473921187874473, "loss": 0.1891, "step": 81060 }, { "epoch": 3.36, "grad_norm": 0.455078125, "learning_rate": 0.00048473548053194374, "loss": 0.1894, "step": 81070 }, { "epoch": 3.36, "grad_norm": 0.52734375, "learning_rate": 0.00048473174874339586, "loss": 0.1529, "step": 81080 }, { "epoch": 3.36, "grad_norm": 0.75390625, "learning_rate": 0.0004847280165131083, "loss": 0.2741, "step": 81090 }, { "epoch": 3.36, "grad_norm": 0.8984375, "learning_rate": 0.0004847242838410878, "loss": 0.2159, "step": 81100 }, { "epoch": 3.36, "grad_norm": 0.546875, "learning_rate": 0.00048472055072734167, "loss": 0.1978, "step": 81110 }, { "epoch": 3.36, "grad_norm": 0.30078125, "learning_rate": 0.00048471681717187677, "loss": 0.1949, "step": 81120 }, { "epoch": 3.36, "grad_norm": 0.490234375, "learning_rate": 0.00048471308317470015, "loss": 0.1898, "step": 81130 }, { "epoch": 3.36, "grad_norm": 0.7734375, "learning_rate": 0.00048470934873581887, "loss": 0.2808, "step": 81140 }, { "epoch": 3.36, "grad_norm": 0.470703125, "learning_rate": 0.0004847056138552399, "loss": 0.18, "step": 81150 }, { "epoch": 3.36, "grad_norm": 0.66796875, "learning_rate": 0.00048470187853297036, "loss": 0.1881, "step": 81160 }, { "epoch": 3.36, "grad_norm": 0.72265625, "learning_rate": 0.00048469814276901724, "loss": 0.2513, "step": 81170 }, { "epoch": 3.36, "grad_norm": 0.37109375, "learning_rate": 0.0004846944065633876, "loss": 0.1765, "step": 81180 }, { "epoch": 3.36, "grad_norm": 1.15625, "learning_rate": 0.00048469066991608834, "loss": 0.2562, "step": 81190 }, { "epoch": 3.36, "grad_norm": 1.0390625, "learning_rate": 0.0004846869328271266, "loss": 0.1877, "step": 81200 }, { "epoch": 3.36, "grad_norm": 0.359375, "learning_rate": 0.0004846831952965094, "loss": 0.2069, "step": 81210 }, { "epoch": 3.36, "grad_norm": 0.7265625, "learning_rate": 0.0004846794573242438, "loss": 0.2093, "step": 81220 }, { "epoch": 3.36, "grad_norm": 2.25, "learning_rate": 0.0004846757189103368, "loss": 0.1857, "step": 81230 }, { "epoch": 3.36, "grad_norm": 0.73046875, "learning_rate": 0.0004846719800547954, "loss": 0.2508, "step": 81240 }, { "epoch": 3.37, "grad_norm": 0.72265625, "learning_rate": 0.00048466824075762673, "loss": 0.2697, "step": 81250 }, { "epoch": 3.37, "grad_norm": 0.443359375, "learning_rate": 0.00048466450101883774, "loss": 0.1808, "step": 81260 }, { "epoch": 3.37, "grad_norm": 0.58984375, "learning_rate": 0.00048466076083843553, "loss": 0.1829, "step": 81270 }, { "epoch": 3.37, "grad_norm": 0.69921875, "learning_rate": 0.0004846570202164271, "loss": 0.2062, "step": 81280 }, { "epoch": 3.37, "grad_norm": 0.408203125, "learning_rate": 0.00048465327915281944, "loss": 0.2312, "step": 81290 }, { "epoch": 3.37, "grad_norm": 0.6875, "learning_rate": 0.0004846495376476197, "loss": 0.2887, "step": 81300 }, { "epoch": 3.37, "grad_norm": 0.439453125, "learning_rate": 0.0004846457957008349, "loss": 0.2267, "step": 81310 }, { "epoch": 3.37, "grad_norm": 0.609375, "learning_rate": 0.00048464205331247203, "loss": 0.2548, "step": 81320 }, { "epoch": 3.37, "grad_norm": 0.30859375, "learning_rate": 0.00048463831048253813, "loss": 0.2076, "step": 81330 }, { "epoch": 3.37, "grad_norm": 0.20703125, "learning_rate": 0.00048463456721104024, "loss": 0.2183, "step": 81340 }, { "epoch": 3.37, "grad_norm": 0.34765625, "learning_rate": 0.00048463082349798546, "loss": 0.2076, "step": 81350 }, { "epoch": 3.37, "grad_norm": 0.5859375, "learning_rate": 0.0004846270793433809, "loss": 0.2243, "step": 81360 }, { "epoch": 3.37, "grad_norm": 0.87890625, "learning_rate": 0.0004846233347472334, "loss": 0.1666, "step": 81370 }, { "epoch": 3.37, "grad_norm": 0.490234375, "learning_rate": 0.00048461958970955017, "loss": 0.2185, "step": 81380 }, { "epoch": 3.37, "grad_norm": 0.70703125, "learning_rate": 0.00048461584423033823, "loss": 0.2208, "step": 81390 }, { "epoch": 3.37, "grad_norm": 1.625, "learning_rate": 0.00048461209830960453, "loss": 0.1902, "step": 81400 }, { "epoch": 3.37, "grad_norm": 0.484375, "learning_rate": 0.0004846083519473563, "loss": 0.1952, "step": 81410 }, { "epoch": 3.37, "grad_norm": 1.4296875, "learning_rate": 0.0004846046051436004, "loss": 0.1743, "step": 81420 }, { "epoch": 3.37, "grad_norm": 0.80859375, "learning_rate": 0.00048460085789834396, "loss": 0.2405, "step": 81430 }, { "epoch": 3.37, "grad_norm": 0.69921875, "learning_rate": 0.0004845971102115941, "loss": 0.2338, "step": 81440 }, { "epoch": 3.37, "grad_norm": 0.0, "learning_rate": 0.0004845933620833578, "loss": 0.2281, "step": 81450 }, { "epoch": 3.37, "grad_norm": 0.55859375, "learning_rate": 0.0004845896135136421, "loss": 0.2013, "step": 81460 }, { "epoch": 3.37, "grad_norm": 0.68359375, "learning_rate": 0.0004845858645024541, "loss": 0.22, "step": 81470 }, { "epoch": 3.37, "grad_norm": 0.1689453125, "learning_rate": 0.0004845821150498008, "loss": 0.2123, "step": 81480 }, { "epoch": 3.38, "grad_norm": 0.40234375, "learning_rate": 0.0004845783651556893, "loss": 0.2117, "step": 81490 }, { "epoch": 3.38, "grad_norm": 0.578125, "learning_rate": 0.0004845746148201267, "loss": 0.222, "step": 81500 }, { "epoch": 3.38, "grad_norm": 0.330078125, "learning_rate": 0.00048457086404312, "loss": 0.2098, "step": 81510 }, { "epoch": 3.38, "grad_norm": 1.109375, "learning_rate": 0.00048456711282467623, "loss": 0.1947, "step": 81520 }, { "epoch": 3.38, "grad_norm": 0.58203125, "learning_rate": 0.00048456336116480247, "loss": 0.2399, "step": 81530 }, { "epoch": 3.38, "grad_norm": 0.51953125, "learning_rate": 0.00048455960906350587, "loss": 0.2588, "step": 81540 }, { "epoch": 3.38, "grad_norm": 0.6953125, "learning_rate": 0.0004845558565207934, "loss": 0.2047, "step": 81550 }, { "epoch": 3.38, "grad_norm": 1.1640625, "learning_rate": 0.0004845521035366721, "loss": 0.2558, "step": 81560 }, { "epoch": 3.38, "grad_norm": 0.478515625, "learning_rate": 0.00048454835011114907, "loss": 0.2019, "step": 81570 }, { "epoch": 3.38, "grad_norm": 1.171875, "learning_rate": 0.0004845445962442314, "loss": 0.1764, "step": 81580 }, { "epoch": 3.38, "grad_norm": 0.75, "learning_rate": 0.0004845408419359261, "loss": 0.2641, "step": 81590 }, { "epoch": 3.38, "grad_norm": 1.1953125, "learning_rate": 0.0004845370871862402, "loss": 0.299, "step": 81600 }, { "epoch": 3.38, "grad_norm": 0.38671875, "learning_rate": 0.00048453333199518096, "loss": 0.2141, "step": 81610 }, { "epoch": 3.38, "grad_norm": 0.197265625, "learning_rate": 0.0004845295763627553, "loss": 0.2005, "step": 81620 }, { "epoch": 3.38, "grad_norm": 0.451171875, "learning_rate": 0.00048452582028897024, "loss": 0.2278, "step": 81630 }, { "epoch": 3.38, "grad_norm": 0.9609375, "learning_rate": 0.00048452206377383293, "loss": 0.1831, "step": 81640 }, { "epoch": 3.38, "grad_norm": 0.5546875, "learning_rate": 0.0004845183068173504, "loss": 0.2113, "step": 81650 }, { "epoch": 3.38, "grad_norm": 1.03125, "learning_rate": 0.0004845145494195298, "loss": 0.2772, "step": 81660 }, { "epoch": 3.38, "grad_norm": 0.314453125, "learning_rate": 0.00048451079158037813, "loss": 0.2682, "step": 81670 }, { "epoch": 3.38, "grad_norm": 0.31640625, "learning_rate": 0.00048450703329990246, "loss": 0.1855, "step": 81680 }, { "epoch": 3.38, "grad_norm": 1.328125, "learning_rate": 0.0004845032745781099, "loss": 0.2667, "step": 81690 }, { "epoch": 3.38, "grad_norm": 0.86328125, "learning_rate": 0.0004844995154150075, "loss": 0.2768, "step": 81700 }, { "epoch": 3.38, "grad_norm": 0.71484375, "learning_rate": 0.0004844957558106023, "loss": 0.2328, "step": 81710 }, { "epoch": 3.38, "grad_norm": 0.609375, "learning_rate": 0.00048449199576490145, "loss": 0.1991, "step": 81720 }, { "epoch": 3.39, "grad_norm": 0.625, "learning_rate": 0.000484488235277912, "loss": 0.2357, "step": 81730 }, { "epoch": 3.39, "grad_norm": 0.640625, "learning_rate": 0.0004844844743496409, "loss": 0.1832, "step": 81740 }, { "epoch": 3.39, "grad_norm": 0.41015625, "learning_rate": 0.00048448071298009545, "loss": 0.2206, "step": 81750 }, { "epoch": 3.39, "grad_norm": 1.78125, "learning_rate": 0.00048447695116928257, "loss": 0.1918, "step": 81760 }, { "epoch": 3.39, "grad_norm": 0.890625, "learning_rate": 0.00048447318891720937, "loss": 0.2012, "step": 81770 }, { "epoch": 3.39, "grad_norm": 5.875, "learning_rate": 0.000484469426223883, "loss": 0.2267, "step": 81780 }, { "epoch": 3.39, "grad_norm": 1.09375, "learning_rate": 0.0004844656630893105, "loss": 0.2139, "step": 81790 }, { "epoch": 3.39, "grad_norm": 0.5703125, "learning_rate": 0.00048446189951349895, "loss": 0.1962, "step": 81800 }, { "epoch": 3.39, "grad_norm": 0.796875, "learning_rate": 0.00048445813549645537, "loss": 0.1989, "step": 81810 }, { "epoch": 3.39, "grad_norm": 0.4140625, "learning_rate": 0.0004844543710381869, "loss": 0.2126, "step": 81820 }, { "epoch": 3.39, "grad_norm": 0.6875, "learning_rate": 0.0004844506061387007, "loss": 0.2475, "step": 81830 }, { "epoch": 3.39, "grad_norm": 0.5390625, "learning_rate": 0.00048444684079800373, "loss": 0.2323, "step": 81840 }, { "epoch": 3.39, "grad_norm": 0.75390625, "learning_rate": 0.00048444307501610307, "loss": 0.2409, "step": 81850 }, { "epoch": 3.39, "grad_norm": 0.3359375, "learning_rate": 0.0004844393087930059, "loss": 0.1598, "step": 81860 }, { "epoch": 3.39, "grad_norm": 0.318359375, "learning_rate": 0.0004844355421287193, "loss": 0.2233, "step": 81870 }, { "epoch": 3.39, "grad_norm": 0.75390625, "learning_rate": 0.0004844317750232503, "loss": 0.2118, "step": 81880 }, { "epoch": 3.39, "grad_norm": 0.380859375, "learning_rate": 0.00048442800747660605, "loss": 0.2248, "step": 81890 }, { "epoch": 3.39, "grad_norm": 0.36328125, "learning_rate": 0.00048442423948879355, "loss": 0.1889, "step": 81900 }, { "epoch": 3.39, "grad_norm": 0.54296875, "learning_rate": 0.00048442047105982, "loss": 0.2174, "step": 81910 }, { "epoch": 3.39, "grad_norm": 0.5078125, "learning_rate": 0.0004844167021896924, "loss": 0.1987, "step": 81920 }, { "epoch": 3.39, "grad_norm": 1.453125, "learning_rate": 0.00048441293287841794, "loss": 0.1696, "step": 81930 }, { "epoch": 3.39, "grad_norm": 0.73828125, "learning_rate": 0.0004844091631260036, "loss": 0.2093, "step": 81940 }, { "epoch": 3.39, "grad_norm": 0.421875, "learning_rate": 0.0004844053929324565, "loss": 0.2151, "step": 81950 }, { "epoch": 3.39, "grad_norm": 0.98828125, "learning_rate": 0.00048440162229778386, "loss": 0.2365, "step": 81960 }, { "epoch": 3.4, "grad_norm": 0.71875, "learning_rate": 0.00048439785122199254, "loss": 0.2056, "step": 81970 }, { "epoch": 3.4, "grad_norm": 0.51953125, "learning_rate": 0.0004843940797050899, "loss": 0.1591, "step": 81980 }, { "epoch": 3.4, "grad_norm": 0.53515625, "learning_rate": 0.0004843903077470828, "loss": 0.1959, "step": 81990 }, { "epoch": 3.4, "grad_norm": 0.62890625, "learning_rate": 0.00048438653534797863, "loss": 0.2348, "step": 82000 }, { "epoch": 3.4, "grad_norm": 0.56640625, "learning_rate": 0.0004843827625077841, "loss": 0.1866, "step": 82010 }, { "epoch": 3.4, "grad_norm": 0.353515625, "learning_rate": 0.0004843789892265067, "loss": 0.2013, "step": 82020 }, { "epoch": 3.4, "grad_norm": 0.84765625, "learning_rate": 0.00048437521550415326, "loss": 0.2435, "step": 82030 }, { "epoch": 3.4, "grad_norm": 0.90625, "learning_rate": 0.000484371441340731, "loss": 0.249, "step": 82040 }, { "epoch": 3.4, "grad_norm": 0.95703125, "learning_rate": 0.000484367666736247, "loss": 0.2489, "step": 82050 }, { "epoch": 3.4, "grad_norm": 0.73046875, "learning_rate": 0.0004843638916907084, "loss": 0.2121, "step": 82060 }, { "epoch": 3.4, "grad_norm": 1.3671875, "learning_rate": 0.0004843601162041222, "loss": 0.2387, "step": 82070 }, { "epoch": 3.4, "grad_norm": 0.52734375, "learning_rate": 0.00048435634027649553, "loss": 0.2548, "step": 82080 }, { "epoch": 3.4, "grad_norm": 0.52734375, "learning_rate": 0.00048435256390783564, "loss": 0.2008, "step": 82090 }, { "epoch": 3.4, "grad_norm": 0.71875, "learning_rate": 0.00048434878709814947, "loss": 0.2112, "step": 82100 }, { "epoch": 3.4, "grad_norm": 1.6640625, "learning_rate": 0.0004843450098474442, "loss": 0.1585, "step": 82110 }, { "epoch": 3.4, "grad_norm": 0.46875, "learning_rate": 0.00048434123215572694, "loss": 0.257, "step": 82120 }, { "epoch": 3.4, "grad_norm": 0.435546875, "learning_rate": 0.0004843374540230048, "loss": 0.2353, "step": 82130 }, { "epoch": 3.4, "grad_norm": 1.34375, "learning_rate": 0.00048433367544928487, "loss": 0.1812, "step": 82140 }, { "epoch": 3.4, "grad_norm": 0.419921875, "learning_rate": 0.0004843298964345743, "loss": 0.1804, "step": 82150 }, { "epoch": 3.4, "grad_norm": 0.1689453125, "learning_rate": 0.00048432611697888007, "loss": 0.1743, "step": 82160 }, { "epoch": 3.4, "grad_norm": 0.470703125, "learning_rate": 0.00048432233708220945, "loss": 0.2361, "step": 82170 }, { "epoch": 3.4, "grad_norm": 0.52734375, "learning_rate": 0.00048431855674456947, "loss": 0.2171, "step": 82180 }, { "epoch": 3.4, "grad_norm": 0.44921875, "learning_rate": 0.00048431477596596733, "loss": 0.1734, "step": 82190 }, { "epoch": 3.4, "grad_norm": 0.61328125, "learning_rate": 0.00048431099474641003, "loss": 0.1663, "step": 82200 }, { "epoch": 3.41, "grad_norm": 1.8515625, "learning_rate": 0.00048430721308590483, "loss": 0.2035, "step": 82210 }, { "epoch": 3.41, "grad_norm": 0.53125, "learning_rate": 0.0004843034309844587, "loss": 0.217, "step": 82220 }, { "epoch": 3.41, "grad_norm": 1.7265625, "learning_rate": 0.0004842996484420788, "loss": 0.2112, "step": 82230 }, { "epoch": 3.41, "grad_norm": 0.5703125, "learning_rate": 0.00048429586545877223, "loss": 0.2534, "step": 82240 }, { "epoch": 3.41, "grad_norm": 0.83203125, "learning_rate": 0.00048429208203454625, "loss": 0.1979, "step": 82250 }, { "epoch": 3.41, "grad_norm": 0.345703125, "learning_rate": 0.00048428829816940777, "loss": 0.2188, "step": 82260 }, { "epoch": 3.41, "grad_norm": 0.53125, "learning_rate": 0.00048428451386336405, "loss": 0.2499, "step": 82270 }, { "epoch": 3.41, "grad_norm": 1.890625, "learning_rate": 0.0004842807291164222, "loss": 0.2187, "step": 82280 }, { "epoch": 3.41, "grad_norm": 0.73828125, "learning_rate": 0.0004842769439285893, "loss": 0.186, "step": 82290 }, { "epoch": 3.41, "grad_norm": 0.65234375, "learning_rate": 0.0004842731582998725, "loss": 0.1938, "step": 82300 }, { "epoch": 3.41, "grad_norm": 0.76171875, "learning_rate": 0.0004842693722302789, "loss": 0.225, "step": 82310 }, { "epoch": 3.41, "grad_norm": 1.2265625, "learning_rate": 0.0004842655857198156, "loss": 0.1351, "step": 82320 }, { "epoch": 3.41, "grad_norm": 0.5546875, "learning_rate": 0.00048426179876848987, "loss": 0.2329, "step": 82330 }, { "epoch": 3.41, "grad_norm": 1.203125, "learning_rate": 0.00048425801137630864, "loss": 0.1734, "step": 82340 }, { "epoch": 3.41, "grad_norm": 0.5625, "learning_rate": 0.0004842542235432792, "loss": 0.2323, "step": 82350 }, { "epoch": 3.41, "grad_norm": 0.390625, "learning_rate": 0.00048425043526940855, "loss": 0.2118, "step": 82360 }, { "epoch": 3.41, "grad_norm": 0.341796875, "learning_rate": 0.0004842466465547039, "loss": 0.1811, "step": 82370 }, { "epoch": 3.41, "grad_norm": 0.000408172607421875, "learning_rate": 0.00048424285739917235, "loss": 0.153, "step": 82380 }, { "epoch": 3.41, "grad_norm": 1.046875, "learning_rate": 0.00048423906780282105, "loss": 0.1814, "step": 82390 }, { "epoch": 3.41, "grad_norm": 0.8203125, "learning_rate": 0.0004842352777656571, "loss": 0.195, "step": 82400 }, { "epoch": 3.41, "grad_norm": 1.3671875, "learning_rate": 0.0004842314872876877, "loss": 0.2172, "step": 82410 }, { "epoch": 3.41, "grad_norm": 0.83984375, "learning_rate": 0.0004842276963689199, "loss": 0.2266, "step": 82420 }, { "epoch": 3.41, "grad_norm": 0.8359375, "learning_rate": 0.00048422390500936084, "loss": 0.2547, "step": 82430 }, { "epoch": 3.41, "grad_norm": 0.3671875, "learning_rate": 0.00048422011320901773, "loss": 0.237, "step": 82440 }, { "epoch": 3.42, "grad_norm": 1.75, "learning_rate": 0.0004842163209678977, "loss": 0.2039, "step": 82450 }, { "epoch": 3.42, "grad_norm": 0.330078125, "learning_rate": 0.00048421252828600774, "loss": 0.2122, "step": 82460 }, { "epoch": 3.42, "grad_norm": 0.58984375, "learning_rate": 0.0004842087351633552, "loss": 0.2418, "step": 82470 }, { "epoch": 3.42, "grad_norm": 0.91015625, "learning_rate": 0.00048420494159994706, "loss": 0.1942, "step": 82480 }, { "epoch": 3.42, "grad_norm": 0.6171875, "learning_rate": 0.0004842011475957905, "loss": 0.2176, "step": 82490 }, { "epoch": 3.42, "grad_norm": 0.5546875, "learning_rate": 0.0004841973531508927, "loss": 0.2044, "step": 82500 }, { "epoch": 3.42, "grad_norm": 1.0625, "learning_rate": 0.00048419355826526074, "loss": 0.2086, "step": 82510 }, { "epoch": 3.42, "grad_norm": 0.44921875, "learning_rate": 0.00048418976293890183, "loss": 0.2055, "step": 82520 }, { "epoch": 3.42, "grad_norm": 0.734375, "learning_rate": 0.0004841859671718231, "loss": 0.1984, "step": 82530 }, { "epoch": 3.42, "grad_norm": 0.62890625, "learning_rate": 0.0004841821709640316, "loss": 0.2435, "step": 82540 }, { "epoch": 3.42, "grad_norm": 0.341796875, "learning_rate": 0.00048417837431553455, "loss": 0.2455, "step": 82550 }, { "epoch": 3.42, "grad_norm": 0.41015625, "learning_rate": 0.00048417457722633913, "loss": 0.2361, "step": 82560 }, { "epoch": 3.42, "grad_norm": 1.4609375, "learning_rate": 0.0004841707796964524, "loss": 0.1743, "step": 82570 }, { "epoch": 3.42, "grad_norm": 0.259765625, "learning_rate": 0.0004841669817258816, "loss": 0.1748, "step": 82580 }, { "epoch": 3.42, "grad_norm": 0.87890625, "learning_rate": 0.00048416318331463384, "loss": 0.1942, "step": 82590 }, { "epoch": 3.42, "grad_norm": 0.78515625, "learning_rate": 0.0004841593844627162, "loss": 0.2281, "step": 82600 }, { "epoch": 3.42, "grad_norm": 0.455078125, "learning_rate": 0.00048415558517013593, "loss": 0.1645, "step": 82610 }, { "epoch": 3.42, "grad_norm": 0.73828125, "learning_rate": 0.0004841517854369001, "loss": 0.1857, "step": 82620 }, { "epoch": 3.42, "grad_norm": 0.5, "learning_rate": 0.00048414798526301585, "loss": 0.2487, "step": 82630 }, { "epoch": 3.42, "grad_norm": 1.015625, "learning_rate": 0.00048414418464849045, "loss": 0.2251, "step": 82640 }, { "epoch": 3.42, "grad_norm": 0.51953125, "learning_rate": 0.000484140383593331, "loss": 0.2285, "step": 82650 }, { "epoch": 3.42, "grad_norm": 0.6640625, "learning_rate": 0.0004841365820975446, "loss": 0.2503, "step": 82660 }, { "epoch": 3.42, "grad_norm": 0.259765625, "learning_rate": 0.00048413278016113846, "loss": 0.2404, "step": 82670 }, { "epoch": 3.42, "grad_norm": 0.59765625, "learning_rate": 0.0004841289777841197, "loss": 0.258, "step": 82680 }, { "epoch": 3.43, "grad_norm": 0.6640625, "learning_rate": 0.00048412517496649547, "loss": 0.2457, "step": 82690 }, { "epoch": 3.43, "grad_norm": 0.55078125, "learning_rate": 0.0004841213717082729, "loss": 0.1853, "step": 82700 }, { "epoch": 3.43, "grad_norm": 0.9453125, "learning_rate": 0.0004841175680094593, "loss": 0.2006, "step": 82710 }, { "epoch": 3.43, "grad_norm": 1.9375, "learning_rate": 0.0004841137638700616, "loss": 0.2435, "step": 82720 }, { "epoch": 3.43, "grad_norm": 0.6484375, "learning_rate": 0.0004841099592900872, "loss": 0.2535, "step": 82730 }, { "epoch": 3.43, "grad_norm": 0.419921875, "learning_rate": 0.0004841061542695431, "loss": 0.2113, "step": 82740 }, { "epoch": 3.43, "grad_norm": 0.80859375, "learning_rate": 0.0004841023488084364, "loss": 0.198, "step": 82750 }, { "epoch": 3.43, "grad_norm": 0.5703125, "learning_rate": 0.00048409854290677447, "loss": 0.1848, "step": 82760 }, { "epoch": 3.43, "grad_norm": 0.326171875, "learning_rate": 0.0004840947365645643, "loss": 0.2298, "step": 82770 }, { "epoch": 3.43, "grad_norm": 0.0, "learning_rate": 0.00048409092978181313, "loss": 0.2277, "step": 82780 }, { "epoch": 3.43, "grad_norm": 1.9921875, "learning_rate": 0.0004840871225585282, "loss": 0.2307, "step": 82790 }, { "epoch": 3.43, "grad_norm": 0.73046875, "learning_rate": 0.0004840833148947165, "loss": 0.205, "step": 82800 }, { "epoch": 3.43, "grad_norm": 0.953125, "learning_rate": 0.00048407950679038526, "loss": 0.1709, "step": 82810 }, { "epoch": 3.43, "grad_norm": 0.76171875, "learning_rate": 0.0004840756982455417, "loss": 0.175, "step": 82820 }, { "epoch": 3.43, "grad_norm": 0.51953125, "learning_rate": 0.00048407188926019297, "loss": 0.2222, "step": 82830 }, { "epoch": 3.43, "grad_norm": 0.90625, "learning_rate": 0.0004840680798343462, "loss": 0.1879, "step": 82840 }, { "epoch": 3.43, "grad_norm": 0.39453125, "learning_rate": 0.00048406426996800854, "loss": 0.17, "step": 82850 }, { "epoch": 3.43, "grad_norm": 0.76171875, "learning_rate": 0.0004840604596611873, "loss": 0.1944, "step": 82860 }, { "epoch": 3.43, "grad_norm": 0.494140625, "learning_rate": 0.00048405664891388944, "loss": 0.2199, "step": 82870 }, { "epoch": 3.43, "grad_norm": 0.8203125, "learning_rate": 0.0004840528377261223, "loss": 0.2358, "step": 82880 }, { "epoch": 3.43, "grad_norm": 0.291015625, "learning_rate": 0.000484049026097893, "loss": 0.22, "step": 82890 }, { "epoch": 3.43, "grad_norm": 0.57421875, "learning_rate": 0.00048404521402920875, "loss": 0.1884, "step": 82900 }, { "epoch": 3.43, "grad_norm": 0.408203125, "learning_rate": 0.00048404140152007657, "loss": 0.215, "step": 82910 }, { "epoch": 3.43, "grad_norm": 0.6328125, "learning_rate": 0.00048403758857050385, "loss": 0.2117, "step": 82920 }, { "epoch": 3.43, "grad_norm": 1.1171875, "learning_rate": 0.0004840337751804976, "loss": 0.2398, "step": 82930 }, { "epoch": 3.44, "grad_norm": 0.53125, "learning_rate": 0.0004840299613500651, "loss": 0.221, "step": 82940 }, { "epoch": 3.44, "grad_norm": 0.8359375, "learning_rate": 0.0004840261470792134, "loss": 0.2274, "step": 82950 }, { "epoch": 3.44, "grad_norm": 0.6796875, "learning_rate": 0.0004840223323679499, "loss": 0.2014, "step": 82960 }, { "epoch": 3.44, "grad_norm": 0.89453125, "learning_rate": 0.00048401851721628154, "loss": 0.2353, "step": 82970 }, { "epoch": 3.44, "grad_norm": 0.54296875, "learning_rate": 0.00048401470162421565, "loss": 0.1948, "step": 82980 }, { "epoch": 3.44, "grad_norm": 0.65234375, "learning_rate": 0.0004840108855917593, "loss": 0.1565, "step": 82990 }, { "epoch": 3.44, "grad_norm": 0.52734375, "learning_rate": 0.00048400706911891985, "loss": 0.1756, "step": 83000 }, { "epoch": 3.44, "grad_norm": 0.494140625, "learning_rate": 0.0004840032522057043, "loss": 0.2362, "step": 83010 }, { "epoch": 3.44, "grad_norm": 0.79296875, "learning_rate": 0.0004839994348521199, "loss": 0.2488, "step": 83020 }, { "epoch": 3.44, "grad_norm": 0.298828125, "learning_rate": 0.00048399561705817385, "loss": 0.2174, "step": 83030 }, { "epoch": 3.44, "grad_norm": 0.625, "learning_rate": 0.0004839917988238733, "loss": 0.2075, "step": 83040 }, { "epoch": 3.44, "grad_norm": 0.396484375, "learning_rate": 0.0004839879801492254, "loss": 0.2659, "step": 83050 }, { "epoch": 3.44, "grad_norm": 0.74609375, "learning_rate": 0.0004839841610342375, "loss": 0.1958, "step": 83060 }, { "epoch": 3.44, "grad_norm": 0.466796875, "learning_rate": 0.00048398034147891655, "loss": 0.2462, "step": 83070 }, { "epoch": 3.44, "grad_norm": 3.453125, "learning_rate": 0.00048397652148327, "loss": 0.1773, "step": 83080 }, { "epoch": 3.44, "grad_norm": 0.50390625, "learning_rate": 0.00048397270104730484, "loss": 0.1522, "step": 83090 }, { "epoch": 3.44, "grad_norm": 4.125, "learning_rate": 0.0004839688801710283, "loss": 0.2622, "step": 83100 }, { "epoch": 3.44, "grad_norm": 0.984375, "learning_rate": 0.0004839650588544476, "loss": 0.1975, "step": 83110 }, { "epoch": 3.44, "grad_norm": 0.80078125, "learning_rate": 0.00048396123709757, "loss": 0.1896, "step": 83120 }, { "epoch": 3.44, "grad_norm": 0.75, "learning_rate": 0.00048395741490040253, "loss": 0.1916, "step": 83130 }, { "epoch": 3.44, "grad_norm": 1.0859375, "learning_rate": 0.0004839535922629525, "loss": 0.2439, "step": 83140 }, { "epoch": 3.44, "grad_norm": 0.578125, "learning_rate": 0.000483949769185227, "loss": 0.2206, "step": 83150 }, { "epoch": 3.44, "grad_norm": 0.45703125, "learning_rate": 0.00048394594566723343, "loss": 0.216, "step": 83160 }, { "epoch": 3.44, "grad_norm": 0.7421875, "learning_rate": 0.0004839421217089788, "loss": 0.2498, "step": 83170 }, { "epoch": 3.45, "grad_norm": 0.86328125, "learning_rate": 0.00048393829731047035, "loss": 0.2252, "step": 83180 }, { "epoch": 3.45, "grad_norm": 0.7890625, "learning_rate": 0.0004839344724717153, "loss": 0.1922, "step": 83190 }, { "epoch": 3.45, "grad_norm": 0.82421875, "learning_rate": 0.00048393064719272084, "loss": 0.207, "step": 83200 }, { "epoch": 3.45, "grad_norm": 0.84765625, "learning_rate": 0.00048392682147349413, "loss": 0.2032, "step": 83210 }, { "epoch": 3.45, "grad_norm": 0.33984375, "learning_rate": 0.00048392299531404247, "loss": 0.224, "step": 83220 }, { "epoch": 3.45, "grad_norm": 0.2890625, "learning_rate": 0.0004839191687143729, "loss": 0.161, "step": 83230 }, { "epoch": 3.45, "grad_norm": 0.34765625, "learning_rate": 0.0004839153416744928, "loss": 0.2719, "step": 83240 }, { "epoch": 3.45, "grad_norm": 0.65234375, "learning_rate": 0.0004839115141944093, "loss": 0.2162, "step": 83250 }, { "epoch": 3.45, "grad_norm": 0.78125, "learning_rate": 0.0004839076862741295, "loss": 0.2139, "step": 83260 }, { "epoch": 3.45, "grad_norm": 0.61328125, "learning_rate": 0.0004839038579136608, "loss": 0.3009, "step": 83270 }, { "epoch": 3.45, "grad_norm": 0.3359375, "learning_rate": 0.00048390002911301023, "loss": 0.1809, "step": 83280 }, { "epoch": 3.45, "grad_norm": 0.404296875, "learning_rate": 0.00048389619987218504, "loss": 0.2269, "step": 83290 }, { "epoch": 3.45, "grad_norm": 0.296875, "learning_rate": 0.00048389237019119246, "loss": 0.2221, "step": 83300 }, { "epoch": 3.45, "grad_norm": 1.4296875, "learning_rate": 0.00048388854007003977, "loss": 0.17, "step": 83310 }, { "epoch": 3.45, "grad_norm": 0.5234375, "learning_rate": 0.00048388470950873406, "loss": 0.2111, "step": 83320 }, { "epoch": 3.45, "grad_norm": 0.6875, "learning_rate": 0.00048388087850728255, "loss": 0.2043, "step": 83330 }, { "epoch": 3.45, "grad_norm": 0.365234375, "learning_rate": 0.00048387704706569256, "loss": 0.2179, "step": 83340 }, { "epoch": 3.45, "grad_norm": 0.396484375, "learning_rate": 0.0004838732151839712, "loss": 0.2438, "step": 83350 }, { "epoch": 3.45, "grad_norm": 0.4375, "learning_rate": 0.0004838693828621257, "loss": 0.2005, "step": 83360 }, { "epoch": 3.45, "grad_norm": 1.296875, "learning_rate": 0.00048386555010016333, "loss": 0.2195, "step": 83370 }, { "epoch": 3.45, "grad_norm": 0.38671875, "learning_rate": 0.0004838617168980911, "loss": 0.196, "step": 83380 }, { "epoch": 3.45, "grad_norm": 0.2197265625, "learning_rate": 0.0004838578832559165, "loss": 0.1808, "step": 83390 }, { "epoch": 3.45, "grad_norm": 0.4140625, "learning_rate": 0.00048385404917364663, "loss": 0.1655, "step": 83400 }, { "epoch": 3.45, "grad_norm": 0.609375, "learning_rate": 0.0004838502146512886, "loss": 0.2114, "step": 83410 }, { "epoch": 3.46, "grad_norm": 0.5546875, "learning_rate": 0.0004838463796888498, "loss": 0.2489, "step": 83420 }, { "epoch": 3.46, "grad_norm": 0.59765625, "learning_rate": 0.00048384254428633736, "loss": 0.2313, "step": 83430 }, { "epoch": 3.46, "grad_norm": 0.5546875, "learning_rate": 0.00048383870844375846, "loss": 0.1864, "step": 83440 }, { "epoch": 3.46, "grad_norm": 0.578125, "learning_rate": 0.00048383487216112033, "loss": 0.2118, "step": 83450 }, { "epoch": 3.46, "grad_norm": 0.7890625, "learning_rate": 0.00048383103543843034, "loss": 0.2444, "step": 83460 }, { "epoch": 3.46, "grad_norm": 0.95703125, "learning_rate": 0.00048382719827569544, "loss": 0.2017, "step": 83470 }, { "epoch": 3.46, "grad_norm": 1.59375, "learning_rate": 0.0004838233606729231, "loss": 0.2311, "step": 83480 }, { "epoch": 3.46, "grad_norm": 0.83203125, "learning_rate": 0.00048381952263012047, "loss": 0.2236, "step": 83490 }, { "epoch": 3.46, "grad_norm": 1.078125, "learning_rate": 0.0004838156841472947, "loss": 0.2446, "step": 83500 }, { "epoch": 3.46, "grad_norm": 0.87109375, "learning_rate": 0.0004838118452244531, "loss": 0.3069, "step": 83510 }, { "epoch": 3.46, "grad_norm": 0.1943359375, "learning_rate": 0.0004838080058616028, "loss": 0.1853, "step": 83520 }, { "epoch": 3.46, "grad_norm": 1.7109375, "learning_rate": 0.0004838041660587511, "loss": 0.1547, "step": 83530 }, { "epoch": 3.46, "grad_norm": 0.59765625, "learning_rate": 0.0004838003258159053, "loss": 0.1884, "step": 83540 }, { "epoch": 3.46, "grad_norm": 0.412109375, "learning_rate": 0.0004837964851330724, "loss": 0.173, "step": 83550 }, { "epoch": 3.46, "grad_norm": 1.2734375, "learning_rate": 0.00048379264401025987, "loss": 0.2672, "step": 83560 }, { "epoch": 3.46, "grad_norm": 0.84765625, "learning_rate": 0.00048378880244747476, "loss": 0.2201, "step": 83570 }, { "epoch": 3.46, "grad_norm": 0.77734375, "learning_rate": 0.0004837849604447244, "loss": 0.2021, "step": 83580 }, { "epoch": 3.46, "grad_norm": 0.59765625, "learning_rate": 0.00048378111800201595, "loss": 0.2835, "step": 83590 }, { "epoch": 3.46, "grad_norm": 0.91796875, "learning_rate": 0.0004837772751193568, "loss": 0.2353, "step": 83600 }, { "epoch": 3.46, "grad_norm": 1.4375, "learning_rate": 0.0004837734317967539, "loss": 0.2099, "step": 83610 }, { "epoch": 3.46, "grad_norm": 0.59375, "learning_rate": 0.0004837695880342148, "loss": 0.2187, "step": 83620 }, { "epoch": 3.46, "grad_norm": 0.66015625, "learning_rate": 0.00048376574383174646, "loss": 0.2573, "step": 83630 }, { "epoch": 3.46, "grad_norm": 0.578125, "learning_rate": 0.0004837618991893563, "loss": 0.2779, "step": 83640 }, { "epoch": 3.46, "grad_norm": 0.248046875, "learning_rate": 0.00048375805410705145, "loss": 0.2057, "step": 83650 }, { "epoch": 3.47, "grad_norm": 0.68359375, "learning_rate": 0.0004837542085848392, "loss": 0.1996, "step": 83660 }, { "epoch": 3.47, "grad_norm": 0.3515625, "learning_rate": 0.0004837503626227268, "loss": 0.2053, "step": 83670 }, { "epoch": 3.47, "grad_norm": 1.1953125, "learning_rate": 0.0004837465162207214, "loss": 0.192, "step": 83680 }, { "epoch": 3.47, "grad_norm": 1.828125, "learning_rate": 0.00048374266937883036, "loss": 0.2185, "step": 83690 }, { "epoch": 3.47, "grad_norm": 0.46484375, "learning_rate": 0.0004837388220970609, "loss": 0.2369, "step": 83700 }, { "epoch": 3.47, "grad_norm": 0.58984375, "learning_rate": 0.0004837349743754201, "loss": 0.2424, "step": 83710 }, { "epoch": 3.47, "grad_norm": 0.416015625, "learning_rate": 0.0004837311262139154, "loss": 0.2576, "step": 83720 }, { "epoch": 3.47, "grad_norm": 1.046875, "learning_rate": 0.00048372727761255397, "loss": 0.2188, "step": 83730 }, { "epoch": 3.47, "grad_norm": 0.7578125, "learning_rate": 0.000483723428571343, "loss": 0.241, "step": 83740 }, { "epoch": 3.47, "grad_norm": 0.1474609375, "learning_rate": 0.00048371957909028974, "loss": 0.2342, "step": 83750 }, { "epoch": 3.47, "grad_norm": 0.66796875, "learning_rate": 0.0004837157291694015, "loss": 0.2328, "step": 83760 }, { "epoch": 3.47, "grad_norm": 0.78125, "learning_rate": 0.0004837118788086856, "loss": 0.1965, "step": 83770 }, { "epoch": 3.47, "grad_norm": 1.171875, "learning_rate": 0.00048370802800814903, "loss": 0.2285, "step": 83780 }, { "epoch": 3.47, "grad_norm": 1.6015625, "learning_rate": 0.0004837041767677993, "loss": 0.2253, "step": 83790 }, { "epoch": 3.47, "grad_norm": 0.5078125, "learning_rate": 0.0004837003250876435, "loss": 0.1864, "step": 83800 }, { "epoch": 3.47, "grad_norm": 0.64453125, "learning_rate": 0.0004836964729676889, "loss": 0.1916, "step": 83810 }, { "epoch": 3.47, "grad_norm": 0.9765625, "learning_rate": 0.0004836926204079428, "loss": 0.2106, "step": 83820 }, { "epoch": 3.47, "grad_norm": 0.640625, "learning_rate": 0.0004836887674084125, "loss": 0.2455, "step": 83830 }, { "epoch": 3.47, "grad_norm": 0.63671875, "learning_rate": 0.00048368491396910506, "loss": 0.2554, "step": 83840 }, { "epoch": 3.47, "grad_norm": 0.69140625, "learning_rate": 0.0004836810600900279, "loss": 0.1948, "step": 83850 }, { "epoch": 3.47, "grad_norm": 0.734375, "learning_rate": 0.00048367720577118824, "loss": 0.2347, "step": 83860 }, { "epoch": 3.47, "grad_norm": 0.51171875, "learning_rate": 0.0004836733510125933, "loss": 0.2236, "step": 83870 }, { "epoch": 3.47, "grad_norm": 0.71484375, "learning_rate": 0.0004836694958142503, "loss": 0.1952, "step": 83880 }, { "epoch": 3.47, "grad_norm": 0.88671875, "learning_rate": 0.0004836656401761667, "loss": 0.2218, "step": 83890 }, { "epoch": 3.48, "grad_norm": 0.5625, "learning_rate": 0.0004836617840983494, "loss": 0.224, "step": 83900 }, { "epoch": 3.48, "grad_norm": 1.0703125, "learning_rate": 0.00048365792758080597, "loss": 0.2163, "step": 83910 }, { "epoch": 3.48, "grad_norm": 0.859375, "learning_rate": 0.0004836540706235435, "loss": 0.2142, "step": 83920 }, { "epoch": 3.48, "grad_norm": 0.357421875, "learning_rate": 0.00048365021322656934, "loss": 0.1991, "step": 83930 }, { "epoch": 3.48, "grad_norm": 0.4375, "learning_rate": 0.00048364635538989065, "loss": 0.2036, "step": 83940 }, { "epoch": 3.48, "grad_norm": 0.3828125, "learning_rate": 0.0004836424971135148, "loss": 0.2664, "step": 83950 }, { "epoch": 3.48, "grad_norm": 0.72265625, "learning_rate": 0.00048363863839744905, "loss": 0.2682, "step": 83960 }, { "epoch": 3.48, "grad_norm": 0.85546875, "learning_rate": 0.0004836347792417005, "loss": 0.1756, "step": 83970 }, { "epoch": 3.48, "grad_norm": 1.90625, "learning_rate": 0.0004836309196462766, "loss": 0.2557, "step": 83980 }, { "epoch": 3.48, "grad_norm": 0.380859375, "learning_rate": 0.0004836270596111845, "loss": 0.1903, "step": 83990 }, { "epoch": 3.48, "grad_norm": 2.265625, "learning_rate": 0.00048362319913643155, "loss": 0.2324, "step": 84000 }, { "epoch": 3.48, "grad_norm": 0.5390625, "learning_rate": 0.00048361933822202495, "loss": 0.165, "step": 84010 }, { "epoch": 3.48, "grad_norm": 0.84375, "learning_rate": 0.000483615476867972, "loss": 0.202, "step": 84020 }, { "epoch": 3.48, "grad_norm": 0.330078125, "learning_rate": 0.00048361161507427987, "loss": 0.1942, "step": 84030 }, { "epoch": 3.48, "grad_norm": 0.53125, "learning_rate": 0.00048360775284095596, "loss": 0.2152, "step": 84040 }, { "epoch": 3.48, "grad_norm": 0.478515625, "learning_rate": 0.00048360389016800754, "loss": 0.2225, "step": 84050 }, { "epoch": 3.48, "grad_norm": 0.90625, "learning_rate": 0.00048360002705544175, "loss": 0.2653, "step": 84060 }, { "epoch": 3.48, "grad_norm": 0.7890625, "learning_rate": 0.000483596163503266, "loss": 0.2579, "step": 84070 }, { "epoch": 3.48, "grad_norm": 0.7421875, "learning_rate": 0.0004835922995114874, "loss": 0.2012, "step": 84080 }, { "epoch": 3.48, "grad_norm": 0.73046875, "learning_rate": 0.0004835884350801134, "loss": 0.1969, "step": 84090 }, { "epoch": 3.48, "grad_norm": 1.171875, "learning_rate": 0.0004835845702091512, "loss": 0.1906, "step": 84100 }, { "epoch": 3.48, "grad_norm": 1.1015625, "learning_rate": 0.00048358070489860807, "loss": 0.199, "step": 84110 }, { "epoch": 3.48, "grad_norm": 0.84375, "learning_rate": 0.0004835768391484912, "loss": 0.2365, "step": 84120 }, { "epoch": 3.48, "grad_norm": 0.72265625, "learning_rate": 0.00048357297295880796, "loss": 0.1946, "step": 84130 }, { "epoch": 3.49, "grad_norm": 1.2265625, "learning_rate": 0.00048356910632956564, "loss": 0.2372, "step": 84140 }, { "epoch": 3.49, "grad_norm": 0.76953125, "learning_rate": 0.0004835652392607715, "loss": 0.1938, "step": 84150 }, { "epoch": 3.49, "grad_norm": 1.546875, "learning_rate": 0.00048356137175243274, "loss": 0.2487, "step": 84160 }, { "epoch": 3.49, "grad_norm": 0.75, "learning_rate": 0.00048355750380455674, "loss": 0.2212, "step": 84170 }, { "epoch": 3.49, "grad_norm": 0.87109375, "learning_rate": 0.0004835536354171508, "loss": 0.2403, "step": 84180 }, { "epoch": 3.49, "grad_norm": 0.5625, "learning_rate": 0.0004835497665902221, "loss": 0.2392, "step": 84190 }, { "epoch": 3.49, "grad_norm": 0.26171875, "learning_rate": 0.0004835458973237779, "loss": 0.1639, "step": 84200 }, { "epoch": 3.49, "grad_norm": 0.58984375, "learning_rate": 0.0004835420276178256, "loss": 0.2565, "step": 84210 }, { "epoch": 3.49, "grad_norm": 0.9375, "learning_rate": 0.00048353815747237237, "loss": 0.2501, "step": 84220 }, { "epoch": 3.49, "grad_norm": 0.7578125, "learning_rate": 0.00048353428688742564, "loss": 0.2121, "step": 84230 }, { "epoch": 3.49, "grad_norm": 0.400390625, "learning_rate": 0.0004835304158629925, "loss": 0.1596, "step": 84240 }, { "epoch": 3.49, "grad_norm": 0.65234375, "learning_rate": 0.0004835265443990804, "loss": 0.2195, "step": 84250 }, { "epoch": 3.49, "grad_norm": 1.015625, "learning_rate": 0.0004835226724956966, "loss": 0.1555, "step": 84260 }, { "epoch": 3.49, "grad_norm": 0.6015625, "learning_rate": 0.0004835188001528483, "loss": 0.1846, "step": 84270 }, { "epoch": 3.49, "grad_norm": 0.490234375, "learning_rate": 0.0004835149273705428, "loss": 0.2287, "step": 84280 }, { "epoch": 3.49, "grad_norm": 0.76953125, "learning_rate": 0.0004835110541487875, "loss": 0.1753, "step": 84290 }, { "epoch": 3.49, "grad_norm": 0.431640625, "learning_rate": 0.0004835071804875895, "loss": 0.2302, "step": 84300 }, { "epoch": 3.49, "grad_norm": 0.77734375, "learning_rate": 0.00048350330638695626, "loss": 0.2028, "step": 84310 }, { "epoch": 3.49, "grad_norm": 0.283203125, "learning_rate": 0.000483499431846895, "loss": 0.2857, "step": 84320 }, { "epoch": 3.49, "grad_norm": 0.53125, "learning_rate": 0.0004834955568674131, "loss": 0.2383, "step": 84330 }, { "epoch": 3.49, "grad_norm": 0.671875, "learning_rate": 0.00048349168144851774, "loss": 0.1913, "step": 84340 }, { "epoch": 3.49, "grad_norm": 0.470703125, "learning_rate": 0.0004834878055902162, "loss": 0.1969, "step": 84350 }, { "epoch": 3.49, "grad_norm": 0.490234375, "learning_rate": 0.00048348392929251587, "loss": 0.1781, "step": 84360 }, { "epoch": 3.49, "grad_norm": 0.63671875, "learning_rate": 0.00048348005255542404, "loss": 0.1827, "step": 84370 }, { "epoch": 3.5, "grad_norm": 0.7890625, "learning_rate": 0.0004834761753789479, "loss": 0.2775, "step": 84380 }, { "epoch": 3.5, "grad_norm": 1.1875, "learning_rate": 0.0004834722977630948, "loss": 0.1558, "step": 84390 }, { "epoch": 3.5, "grad_norm": 0.7578125, "learning_rate": 0.0004834684197078721, "loss": 0.1807, "step": 84400 }, { "epoch": 3.5, "grad_norm": 0.9453125, "learning_rate": 0.000483464541213287, "loss": 0.1897, "step": 84410 }, { "epoch": 3.5, "grad_norm": 0.609375, "learning_rate": 0.0004834606622793469, "loss": 0.2198, "step": 84420 }, { "epoch": 3.5, "grad_norm": 0.6640625, "learning_rate": 0.000483456782906059, "loss": 0.1809, "step": 84430 }, { "epoch": 3.5, "grad_norm": 0.52734375, "learning_rate": 0.00048345290309343065, "loss": 0.2299, "step": 84440 }, { "epoch": 3.5, "grad_norm": 0.60546875, "learning_rate": 0.00048344902284146917, "loss": 0.234, "step": 84450 }, { "epoch": 3.5, "grad_norm": 0.73828125, "learning_rate": 0.0004834451421501818, "loss": 0.2304, "step": 84460 }, { "epoch": 3.5, "grad_norm": 0.61328125, "learning_rate": 0.00048344126101957594, "loss": 0.2153, "step": 84470 }, { "epoch": 3.5, "grad_norm": 1.4140625, "learning_rate": 0.00048343737944965884, "loss": 0.2297, "step": 84480 }, { "epoch": 3.5, "grad_norm": 0.546875, "learning_rate": 0.00048343349744043776, "loss": 0.2167, "step": 84490 }, { "epoch": 3.5, "grad_norm": 0.81640625, "learning_rate": 0.00048342961499192003, "loss": 0.2522, "step": 84500 }, { "epoch": 3.5, "grad_norm": 1.4609375, "learning_rate": 0.000483425732104113, "loss": 0.2318, "step": 84510 }, { "epoch": 3.5, "grad_norm": 0.8125, "learning_rate": 0.00048342184877702386, "loss": 0.2427, "step": 84520 }, { "epoch": 3.5, "grad_norm": 0.59765625, "learning_rate": 0.0004834179650106602, "loss": 0.2526, "step": 84530 }, { "epoch": 3.5, "grad_norm": 0.1796875, "learning_rate": 0.00048341408080502894, "loss": 0.1821, "step": 84540 }, { "epoch": 3.5, "grad_norm": 0.578125, "learning_rate": 0.0004834101961601377, "loss": 0.1957, "step": 84550 }, { "epoch": 3.5, "grad_norm": 1.3515625, "learning_rate": 0.00048340631107599366, "loss": 0.2197, "step": 84560 }, { "epoch": 3.5, "grad_norm": 0.55078125, "learning_rate": 0.0004834024255526042, "loss": 0.2128, "step": 84570 }, { "epoch": 3.5, "grad_norm": 0.75390625, "learning_rate": 0.0004833985395899765, "loss": 0.2401, "step": 84580 }, { "epoch": 3.5, "grad_norm": 1.0859375, "learning_rate": 0.0004833946531881179, "loss": 0.2082, "step": 84590 }, { "epoch": 3.5, "grad_norm": 0.375, "learning_rate": 0.0004833907663470359, "loss": 0.2511, "step": 84600 }, { "epoch": 3.5, "grad_norm": 0.48828125, "learning_rate": 0.0004833868790667376, "loss": 0.233, "step": 84610 }, { "epoch": 3.5, "grad_norm": 0.8984375, "learning_rate": 0.0004833829913472304, "loss": 0.2383, "step": 84620 }, { "epoch": 3.51, "grad_norm": 1.171875, "learning_rate": 0.0004833791031885216, "loss": 0.2466, "step": 84630 }, { "epoch": 3.51, "grad_norm": 0.5546875, "learning_rate": 0.00048337521459061863, "loss": 0.1708, "step": 84640 }, { "epoch": 3.51, "grad_norm": 0.65625, "learning_rate": 0.00048337132555352865, "loss": 0.2559, "step": 84650 }, { "epoch": 3.51, "grad_norm": 0.87109375, "learning_rate": 0.00048336743607725895, "loss": 0.1535, "step": 84660 }, { "epoch": 3.51, "grad_norm": 0.2119140625, "learning_rate": 0.0004833635461618171, "loss": 0.1927, "step": 84670 }, { "epoch": 3.51, "grad_norm": 0.640625, "learning_rate": 0.0004833596558072101, "loss": 0.2276, "step": 84680 }, { "epoch": 3.51, "grad_norm": 0.94140625, "learning_rate": 0.0004833557650134456, "loss": 0.1889, "step": 84690 }, { "epoch": 3.51, "grad_norm": 0.50390625, "learning_rate": 0.0004833518737805306, "loss": 0.2006, "step": 84700 }, { "epoch": 3.51, "grad_norm": 0.79296875, "learning_rate": 0.00048334798210847264, "loss": 0.2068, "step": 84710 }, { "epoch": 3.51, "grad_norm": 0.859375, "learning_rate": 0.00048334408999727897, "loss": 0.1845, "step": 84720 }, { "epoch": 3.51, "grad_norm": 1.1171875, "learning_rate": 0.0004833401974469569, "loss": 0.1661, "step": 84730 }, { "epoch": 3.51, "grad_norm": 1.4375, "learning_rate": 0.00048333630445751373, "loss": 0.1964, "step": 84740 }, { "epoch": 3.51, "grad_norm": 0.439453125, "learning_rate": 0.00048333241102895696, "loss": 0.2096, "step": 84750 }, { "epoch": 3.51, "grad_norm": 0.9296875, "learning_rate": 0.0004833285171612937, "loss": 0.2859, "step": 84760 }, { "epoch": 3.51, "grad_norm": 0.11572265625, "learning_rate": 0.0004833246228545314, "loss": 0.2331, "step": 84770 }, { "epoch": 3.51, "grad_norm": 0.5703125, "learning_rate": 0.00048332072810867733, "loss": 0.2552, "step": 84780 }, { "epoch": 3.51, "grad_norm": 0.90234375, "learning_rate": 0.0004833168329237389, "loss": 0.2087, "step": 84790 }, { "epoch": 3.51, "grad_norm": 1.203125, "learning_rate": 0.00048331293729972336, "loss": 0.2448, "step": 84800 }, { "epoch": 3.51, "grad_norm": 0.51171875, "learning_rate": 0.00048330904123663804, "loss": 0.208, "step": 84810 }, { "epoch": 3.51, "grad_norm": 0.625, "learning_rate": 0.00048330514473449035, "loss": 0.1949, "step": 84820 }, { "epoch": 3.51, "grad_norm": 0.96875, "learning_rate": 0.00048330124779328757, "loss": 0.203, "step": 84830 }, { "epoch": 3.51, "grad_norm": 0.546875, "learning_rate": 0.00048329735041303696, "loss": 0.2151, "step": 84840 }, { "epoch": 3.51, "grad_norm": 0.80859375, "learning_rate": 0.00048329345259374605, "loss": 0.2019, "step": 84850 }, { "epoch": 3.51, "grad_norm": 0.87109375, "learning_rate": 0.0004832895543354219, "loss": 0.2097, "step": 84860 }, { "epoch": 3.52, "grad_norm": 0.60546875, "learning_rate": 0.0004832856556380721, "loss": 0.1954, "step": 84870 }, { "epoch": 3.52, "grad_norm": 0.66015625, "learning_rate": 0.0004832817565017039, "loss": 0.1602, "step": 84880 }, { "epoch": 3.52, "grad_norm": 1.0546875, "learning_rate": 0.0004832778569263246, "loss": 0.2157, "step": 84890 }, { "epoch": 3.52, "grad_norm": 0.8984375, "learning_rate": 0.00048327395691194155, "loss": 0.227, "step": 84900 }, { "epoch": 3.52, "grad_norm": 0.55078125, "learning_rate": 0.00048327005645856216, "loss": 0.1924, "step": 84910 }, { "epoch": 3.52, "grad_norm": 0.5703125, "learning_rate": 0.00048326615556619365, "loss": 0.1731, "step": 84920 }, { "epoch": 3.52, "grad_norm": 0.72265625, "learning_rate": 0.00048326225423484345, "loss": 0.2289, "step": 84930 }, { "epoch": 3.52, "grad_norm": 0.546875, "learning_rate": 0.00048325835246451886, "loss": 0.1715, "step": 84940 }, { "epoch": 3.52, "grad_norm": 0.5, "learning_rate": 0.00048325445025522727, "loss": 0.1766, "step": 84950 }, { "epoch": 3.52, "grad_norm": 0.98046875, "learning_rate": 0.00048325054760697594, "loss": 0.2493, "step": 84960 }, { "epoch": 3.52, "grad_norm": 0.75390625, "learning_rate": 0.00048324664451977235, "loss": 0.1902, "step": 84970 }, { "epoch": 3.52, "grad_norm": 0.84765625, "learning_rate": 0.00048324274099362377, "loss": 0.173, "step": 84980 }, { "epoch": 3.52, "grad_norm": 0.65625, "learning_rate": 0.00048323883702853745, "loss": 0.2154, "step": 84990 }, { "epoch": 3.52, "grad_norm": 0.388671875, "learning_rate": 0.0004832349326245209, "loss": 0.2232, "step": 85000 }, { "epoch": 3.52, "grad_norm": 0.90625, "learning_rate": 0.0004832310277815813, "loss": 0.2199, "step": 85010 }, { "epoch": 3.52, "grad_norm": 1.2578125, "learning_rate": 0.0004832271224997262, "loss": 0.2364, "step": 85020 }, { "epoch": 3.52, "grad_norm": 0.609375, "learning_rate": 0.0004832232167789628, "loss": 0.221, "step": 85030 }, { "epoch": 3.52, "grad_norm": 0.65234375, "learning_rate": 0.00048321931061929847, "loss": 0.2217, "step": 85040 }, { "epoch": 3.52, "grad_norm": 0.91015625, "learning_rate": 0.0004832154040207406, "loss": 0.2487, "step": 85050 }, { "epoch": 3.52, "grad_norm": 0.734375, "learning_rate": 0.0004832114969832966, "loss": 0.2074, "step": 85060 }, { "epoch": 3.52, "grad_norm": 1.2890625, "learning_rate": 0.00048320758950697366, "loss": 0.2174, "step": 85070 }, { "epoch": 3.52, "grad_norm": 0.376953125, "learning_rate": 0.0004832036815917792, "loss": 0.1682, "step": 85080 }, { "epoch": 3.52, "grad_norm": 0.69921875, "learning_rate": 0.0004831997732377206, "loss": 0.2285, "step": 85090 }, { "epoch": 3.52, "grad_norm": 1.1328125, "learning_rate": 0.00048319586444480525, "loss": 0.2101, "step": 85100 }, { "epoch": 3.53, "grad_norm": 0.29296875, "learning_rate": 0.0004831919552130405, "loss": 0.2403, "step": 85110 }, { "epoch": 3.53, "grad_norm": 0.640625, "learning_rate": 0.0004831880455424336, "loss": 0.2085, "step": 85120 }, { "epoch": 3.53, "grad_norm": 0.50390625, "learning_rate": 0.000483184135432992, "loss": 0.2112, "step": 85130 }, { "epoch": 3.53, "grad_norm": 0.1748046875, "learning_rate": 0.0004831802248847231, "loss": 0.188, "step": 85140 }, { "epoch": 3.53, "grad_norm": 0.46875, "learning_rate": 0.0004831763138976341, "loss": 0.2405, "step": 85150 }, { "epoch": 3.53, "grad_norm": 0.271484375, "learning_rate": 0.00048317240247173256, "loss": 0.2155, "step": 85160 }, { "epoch": 3.53, "grad_norm": 0.703125, "learning_rate": 0.0004831684906070257, "loss": 0.2167, "step": 85170 }, { "epoch": 3.53, "grad_norm": 0.44140625, "learning_rate": 0.0004831645783035209, "loss": 0.2038, "step": 85180 }, { "epoch": 3.53, "grad_norm": 0.36328125, "learning_rate": 0.0004831606655612255, "loss": 0.173, "step": 85190 }, { "epoch": 3.53, "grad_norm": 1.1171875, "learning_rate": 0.000483156752380147, "loss": 0.2153, "step": 85200 }, { "epoch": 3.53, "grad_norm": 0.93359375, "learning_rate": 0.0004831528387602926, "loss": 0.2266, "step": 85210 }, { "epoch": 3.53, "grad_norm": 0.73046875, "learning_rate": 0.00048314892470166976, "loss": 0.1933, "step": 85220 }, { "epoch": 3.53, "grad_norm": 0.87890625, "learning_rate": 0.0004831450102042858, "loss": 0.1813, "step": 85230 }, { "epoch": 3.53, "grad_norm": 1.5234375, "learning_rate": 0.00048314109526814816, "loss": 0.2577, "step": 85240 }, { "epoch": 3.53, "grad_norm": 1.0078125, "learning_rate": 0.0004831371798932641, "loss": 0.256, "step": 85250 }, { "epoch": 3.53, "grad_norm": 0.71875, "learning_rate": 0.0004831332640796411, "loss": 0.2518, "step": 85260 }, { "epoch": 3.53, "grad_norm": 0.98828125, "learning_rate": 0.00048312934782728645, "loss": 0.1895, "step": 85270 }, { "epoch": 3.53, "grad_norm": 0.404296875, "learning_rate": 0.00048312543113620757, "loss": 0.246, "step": 85280 }, { "epoch": 3.53, "grad_norm": 0.67578125, "learning_rate": 0.00048312151400641175, "loss": 0.1993, "step": 85290 }, { "epoch": 3.53, "grad_norm": 0.4140625, "learning_rate": 0.0004831175964379064, "loss": 0.2289, "step": 85300 }, { "epoch": 3.53, "grad_norm": 0.875, "learning_rate": 0.00048311367843069896, "loss": 0.1599, "step": 85310 }, { "epoch": 3.53, "grad_norm": 0.6015625, "learning_rate": 0.0004831097599847968, "loss": 0.1975, "step": 85320 }, { "epoch": 3.53, "grad_norm": 0.52734375, "learning_rate": 0.0004831058411002072, "loss": 0.2598, "step": 85330 }, { "epoch": 3.53, "grad_norm": 0.890625, "learning_rate": 0.00048310192177693743, "loss": 0.1715, "step": 85340 }, { "epoch": 3.54, "grad_norm": 0.8359375, "learning_rate": 0.0004830980020149952, "loss": 0.1941, "step": 85350 }, { "epoch": 3.54, "grad_norm": 0.50390625, "learning_rate": 0.0004830940818143876, "loss": 0.1847, "step": 85360 }, { "epoch": 3.54, "grad_norm": 0.765625, "learning_rate": 0.0004830901611751222, "loss": 0.2444, "step": 85370 }, { "epoch": 3.54, "grad_norm": 0.54296875, "learning_rate": 0.00048308624009720625, "loss": 0.2276, "step": 85380 }, { "epoch": 3.54, "grad_norm": 0.37109375, "learning_rate": 0.00048308231858064724, "loss": 0.1728, "step": 85390 }, { "epoch": 3.54, "grad_norm": 1.515625, "learning_rate": 0.0004830783966254524, "loss": 0.2081, "step": 85400 }, { "epoch": 3.54, "grad_norm": 0.6875, "learning_rate": 0.0004830744742316291, "loss": 0.209, "step": 85410 }, { "epoch": 3.54, "grad_norm": 1.1328125, "learning_rate": 0.000483070551399185, "loss": 0.219, "step": 85420 }, { "epoch": 3.54, "grad_norm": 1.25, "learning_rate": 0.0004830666281281272, "loss": 0.241, "step": 85430 }, { "epoch": 3.54, "grad_norm": 0.54296875, "learning_rate": 0.0004830627044184631, "loss": 0.2186, "step": 85440 }, { "epoch": 3.54, "grad_norm": 1.4375, "learning_rate": 0.00048305878027020025, "loss": 0.239, "step": 85450 }, { "epoch": 3.54, "grad_norm": 0.240234375, "learning_rate": 0.00048305485568334597, "loss": 0.2408, "step": 85460 }, { "epoch": 3.54, "grad_norm": 0.76953125, "learning_rate": 0.0004830509306579075, "loss": 0.1667, "step": 85470 }, { "epoch": 3.54, "grad_norm": 1.6328125, "learning_rate": 0.0004830470051938924, "loss": 0.2046, "step": 85480 }, { "epoch": 3.54, "grad_norm": 0.83203125, "learning_rate": 0.00048304307929130806, "loss": 0.2299, "step": 85490 }, { "epoch": 3.54, "grad_norm": 0.37890625, "learning_rate": 0.00048303915295016175, "loss": 0.2163, "step": 85500 }, { "epoch": 3.54, "grad_norm": 0.40625, "learning_rate": 0.0004830352261704609, "loss": 0.1806, "step": 85510 }, { "epoch": 3.54, "grad_norm": 0.22265625, "learning_rate": 0.000483031298952213, "loss": 0.1969, "step": 85520 }, { "epoch": 3.54, "grad_norm": 0.341796875, "learning_rate": 0.00048302737129542536, "loss": 0.2095, "step": 85530 }, { "epoch": 3.54, "grad_norm": 0.73046875, "learning_rate": 0.00048302344320010526, "loss": 0.2265, "step": 85540 }, { "epoch": 3.54, "grad_norm": 0.9609375, "learning_rate": 0.0004830195146662603, "loss": 0.1746, "step": 85550 }, { "epoch": 3.54, "grad_norm": 0.77734375, "learning_rate": 0.0004830155856938978, "loss": 0.1587, "step": 85560 }, { "epoch": 3.54, "grad_norm": 1.0, "learning_rate": 0.0004830116562830251, "loss": 0.2723, "step": 85570 }, { "epoch": 3.54, "grad_norm": 0.2001953125, "learning_rate": 0.00048300772643364954, "loss": 0.2006, "step": 85580 }, { "epoch": 3.55, "grad_norm": 0.6875, "learning_rate": 0.0004830037961457787, "loss": 0.2248, "step": 85590 }, { "epoch": 3.55, "grad_norm": 0.69140625, "learning_rate": 0.00048299986541941984, "loss": 0.2116, "step": 85600 }, { "epoch": 3.55, "grad_norm": 0.73046875, "learning_rate": 0.00048299593425458044, "loss": 0.2102, "step": 85610 }, { "epoch": 3.55, "grad_norm": 0.88671875, "learning_rate": 0.0004829920026512678, "loss": 0.2138, "step": 85620 }, { "epoch": 3.55, "grad_norm": 1.1484375, "learning_rate": 0.0004829880706094894, "loss": 0.2041, "step": 85630 }, { "epoch": 3.55, "grad_norm": 0.97265625, "learning_rate": 0.00048298413812925255, "loss": 0.2139, "step": 85640 }, { "epoch": 3.55, "grad_norm": 0.73046875, "learning_rate": 0.00048298020521056484, "loss": 0.1757, "step": 85650 }, { "epoch": 3.55, "grad_norm": 0.416015625, "learning_rate": 0.00048297627185343346, "loss": 0.1747, "step": 85660 }, { "epoch": 3.55, "grad_norm": 0.64453125, "learning_rate": 0.0004829723380578659, "loss": 0.2227, "step": 85670 }, { "epoch": 3.55, "grad_norm": 1.328125, "learning_rate": 0.00048296840382386954, "loss": 0.1827, "step": 85680 }, { "epoch": 3.55, "grad_norm": 0.8203125, "learning_rate": 0.00048296446915145185, "loss": 0.1764, "step": 85690 }, { "epoch": 3.55, "grad_norm": 1.046875, "learning_rate": 0.0004829605340406201, "loss": 0.2169, "step": 85700 }, { "epoch": 3.55, "grad_norm": 0.6953125, "learning_rate": 0.00048295659849138193, "loss": 0.2197, "step": 85710 }, { "epoch": 3.55, "grad_norm": 0.41796875, "learning_rate": 0.0004829526625037445, "loss": 0.2171, "step": 85720 }, { "epoch": 3.55, "grad_norm": 0.7578125, "learning_rate": 0.00048294872607771534, "loss": 0.2082, "step": 85730 }, { "epoch": 3.55, "grad_norm": 0.30078125, "learning_rate": 0.00048294478921330177, "loss": 0.1819, "step": 85740 }, { "epoch": 3.55, "grad_norm": 0.8046875, "learning_rate": 0.0004829408519105113, "loss": 0.2408, "step": 85750 }, { "epoch": 3.55, "grad_norm": 0.3671875, "learning_rate": 0.0004829369141693514, "loss": 0.2582, "step": 85760 }, { "epoch": 3.55, "grad_norm": 1.0078125, "learning_rate": 0.00048293297598982926, "loss": 0.2329, "step": 85770 }, { "epoch": 3.55, "grad_norm": 0.7265625, "learning_rate": 0.0004829290373719525, "loss": 0.1816, "step": 85780 }, { "epoch": 3.55, "grad_norm": 0.6875, "learning_rate": 0.0004829250983157284, "loss": 0.2602, "step": 85790 }, { "epoch": 3.55, "grad_norm": 0.61328125, "learning_rate": 0.00048292115882116437, "loss": 0.2224, "step": 85800 }, { "epoch": 3.55, "grad_norm": 0.60546875, "learning_rate": 0.0004829172188882679, "loss": 0.1852, "step": 85810 }, { "epoch": 3.55, "grad_norm": 0.57421875, "learning_rate": 0.0004829132785170464, "loss": 0.1925, "step": 85820 }, { "epoch": 3.56, "grad_norm": 0.984375, "learning_rate": 0.00048290933770750726, "loss": 0.1955, "step": 85830 }, { "epoch": 3.56, "grad_norm": 0.796875, "learning_rate": 0.0004829053964596579, "loss": 0.2804, "step": 85840 }, { "epoch": 3.56, "grad_norm": 0.2177734375, "learning_rate": 0.0004829014547735058, "loss": 0.1906, "step": 85850 }, { "epoch": 3.56, "grad_norm": 0.58984375, "learning_rate": 0.0004828975126490582, "loss": 0.1975, "step": 85860 }, { "epoch": 3.56, "grad_norm": 0.78515625, "learning_rate": 0.00048289357008632274, "loss": 0.2239, "step": 85870 }, { "epoch": 3.56, "grad_norm": 0.53125, "learning_rate": 0.0004828896270853066, "loss": 0.253, "step": 85880 }, { "epoch": 3.56, "grad_norm": 1.0390625, "learning_rate": 0.0004828856836460174, "loss": 0.2135, "step": 85890 }, { "epoch": 3.56, "grad_norm": 0.53125, "learning_rate": 0.0004828817397684625, "loss": 0.1934, "step": 85900 }, { "epoch": 3.56, "grad_norm": 0.6015625, "learning_rate": 0.0004828777954526493, "loss": 0.2367, "step": 85910 }, { "epoch": 3.56, "grad_norm": 0.404296875, "learning_rate": 0.00048287385069858523, "loss": 0.2328, "step": 85920 }, { "epoch": 3.56, "grad_norm": 0.470703125, "learning_rate": 0.0004828699055062778, "loss": 0.169, "step": 85930 }, { "epoch": 3.56, "grad_norm": 0.73828125, "learning_rate": 0.0004828659598757343, "loss": 0.2882, "step": 85940 }, { "epoch": 3.56, "grad_norm": 0.62890625, "learning_rate": 0.0004828620138069622, "loss": 0.2222, "step": 85950 }, { "epoch": 3.56, "grad_norm": 0.2451171875, "learning_rate": 0.00048285806729996895, "loss": 0.1987, "step": 85960 }, { "epoch": 3.56, "grad_norm": 0.91015625, "learning_rate": 0.00048285412035476197, "loss": 0.15, "step": 85970 }, { "epoch": 3.56, "grad_norm": 1.234375, "learning_rate": 0.00048285017297134867, "loss": 0.2291, "step": 85980 }, { "epoch": 3.56, "grad_norm": 0.609375, "learning_rate": 0.00048284622514973647, "loss": 0.2431, "step": 85990 }, { "epoch": 3.56, "grad_norm": 0.9140625, "learning_rate": 0.0004828422768899329, "loss": 0.175, "step": 86000 }, { "epoch": 3.56, "grad_norm": 0.86328125, "learning_rate": 0.00048283832819194517, "loss": 0.2232, "step": 86010 }, { "epoch": 3.56, "grad_norm": 0.640625, "learning_rate": 0.0004828343790557809, "loss": 0.2527, "step": 86020 }, { "epoch": 3.56, "grad_norm": 0.2099609375, "learning_rate": 0.0004828304294814475, "loss": 0.1901, "step": 86030 }, { "epoch": 3.56, "grad_norm": 0.69921875, "learning_rate": 0.00048282647946895244, "loss": 0.1694, "step": 86040 }, { "epoch": 3.56, "grad_norm": 0.92578125, "learning_rate": 0.00048282252901830306, "loss": 0.215, "step": 86050 }, { "epoch": 3.56, "grad_norm": 0.93359375, "learning_rate": 0.0004828185781295067, "loss": 0.2004, "step": 86060 }, { "epoch": 3.57, "grad_norm": 0.396484375, "learning_rate": 0.00048281462680257103, "loss": 0.2133, "step": 86070 }, { "epoch": 3.57, "grad_norm": 1.0234375, "learning_rate": 0.00048281067503750335, "loss": 0.272, "step": 86080 }, { "epoch": 3.57, "grad_norm": 0.2734375, "learning_rate": 0.00048280672283431114, "loss": 0.221, "step": 86090 }, { "epoch": 3.57, "grad_norm": 0.828125, "learning_rate": 0.00048280277019300177, "loss": 0.226, "step": 86100 }, { "epoch": 3.57, "grad_norm": 0.81640625, "learning_rate": 0.00048279881711358274, "loss": 0.1692, "step": 86110 }, { "epoch": 3.57, "grad_norm": 1.953125, "learning_rate": 0.0004827948635960615, "loss": 0.2323, "step": 86120 }, { "epoch": 3.57, "grad_norm": 0.85546875, "learning_rate": 0.00048279090964044546, "loss": 0.1944, "step": 86130 }, { "epoch": 3.57, "grad_norm": 1.5234375, "learning_rate": 0.0004827869552467421, "loss": 0.2287, "step": 86140 }, { "epoch": 3.57, "grad_norm": 2.140625, "learning_rate": 0.00048278300041495884, "loss": 0.1631, "step": 86150 }, { "epoch": 3.57, "grad_norm": 0.84375, "learning_rate": 0.000482779045145103, "loss": 0.2171, "step": 86160 }, { "epoch": 3.57, "grad_norm": 0.984375, "learning_rate": 0.00048277508943718223, "loss": 0.1952, "step": 86170 }, { "epoch": 3.57, "grad_norm": 0.1884765625, "learning_rate": 0.0004827711332912038, "loss": 0.221, "step": 86180 }, { "epoch": 3.57, "grad_norm": 0.35546875, "learning_rate": 0.0004827671767071753, "loss": 0.2207, "step": 86190 }, { "epoch": 3.57, "grad_norm": 0.5859375, "learning_rate": 0.00048276321968510413, "loss": 0.2336, "step": 86200 }, { "epoch": 3.57, "grad_norm": 0.6015625, "learning_rate": 0.00048275926222499766, "loss": 0.1628, "step": 86210 }, { "epoch": 3.57, "grad_norm": 0.69140625, "learning_rate": 0.00048275530432686343, "loss": 0.25, "step": 86220 }, { "epoch": 3.57, "grad_norm": 0.67578125, "learning_rate": 0.00048275134599070886, "loss": 0.2002, "step": 86230 }, { "epoch": 3.57, "grad_norm": 0.3984375, "learning_rate": 0.00048274738721654133, "loss": 0.1775, "step": 86240 }, { "epoch": 3.57, "grad_norm": 0.9453125, "learning_rate": 0.00048274342800436843, "loss": 0.1621, "step": 86250 }, { "epoch": 3.57, "grad_norm": 0.79296875, "learning_rate": 0.0004827394683541975, "loss": 0.1983, "step": 86260 }, { "epoch": 3.57, "grad_norm": 0.84375, "learning_rate": 0.000482735508266036, "loss": 0.2411, "step": 86270 }, { "epoch": 3.57, "grad_norm": 0.447265625, "learning_rate": 0.00048273154773989144, "loss": 0.2462, "step": 86280 }, { "epoch": 3.57, "grad_norm": 0.3984375, "learning_rate": 0.0004827275867757712, "loss": 0.2015, "step": 86290 }, { "epoch": 3.57, "grad_norm": 0.984375, "learning_rate": 0.00048272362537368286, "loss": 0.1808, "step": 86300 }, { "epoch": 3.57, "grad_norm": 0.060302734375, "learning_rate": 0.00048271966353363374, "loss": 0.2049, "step": 86310 }, { "epoch": 3.58, "grad_norm": 0.384765625, "learning_rate": 0.00048271570125563124, "loss": 0.2399, "step": 86320 }, { "epoch": 3.58, "grad_norm": 0.859375, "learning_rate": 0.0004827117385396831, "loss": 0.2278, "step": 86330 }, { "epoch": 3.58, "grad_norm": 0.5078125, "learning_rate": 0.00048270777538579645, "loss": 0.2371, "step": 86340 }, { "epoch": 3.58, "grad_norm": 0.60546875, "learning_rate": 0.000482703811793979, "loss": 0.2318, "step": 86350 }, { "epoch": 3.58, "grad_norm": 1.0390625, "learning_rate": 0.00048269984776423805, "loss": 0.2312, "step": 86360 }, { "epoch": 3.58, "grad_norm": 0.2109375, "learning_rate": 0.00048269588329658117, "loss": 0.2178, "step": 86370 }, { "epoch": 3.58, "grad_norm": 0.63671875, "learning_rate": 0.0004826919183910157, "loss": 0.2006, "step": 86380 }, { "epoch": 3.58, "grad_norm": 0.703125, "learning_rate": 0.0004826879530475492, "loss": 0.1827, "step": 86390 }, { "epoch": 3.58, "grad_norm": 2.21875, "learning_rate": 0.00048268398726618913, "loss": 0.1519, "step": 86400 }, { "epoch": 3.58, "grad_norm": 0.1640625, "learning_rate": 0.00048268002104694287, "loss": 0.2491, "step": 86410 }, { "epoch": 3.58, "grad_norm": 0.82421875, "learning_rate": 0.000482676054389818, "loss": 0.2409, "step": 86420 }, { "epoch": 3.58, "grad_norm": 0.64453125, "learning_rate": 0.00048267208729482183, "loss": 0.2438, "step": 86430 }, { "epoch": 3.58, "grad_norm": 1.234375, "learning_rate": 0.000482668119761962, "loss": 0.1968, "step": 86440 }, { "epoch": 3.58, "grad_norm": 0.38671875, "learning_rate": 0.00048266415179124586, "loss": 0.2302, "step": 86450 }, { "epoch": 3.58, "grad_norm": 1.1015625, "learning_rate": 0.00048266018338268094, "loss": 0.2073, "step": 86460 }, { "epoch": 3.58, "grad_norm": 0.71875, "learning_rate": 0.00048265621453627463, "loss": 0.2428, "step": 86470 }, { "epoch": 3.58, "grad_norm": 1.4921875, "learning_rate": 0.0004826522452520344, "loss": 0.1949, "step": 86480 }, { "epoch": 3.58, "grad_norm": 0.5390625, "learning_rate": 0.00048264827552996795, "loss": 0.2472, "step": 86490 }, { "epoch": 3.58, "grad_norm": 0.96875, "learning_rate": 0.0004826443053700824, "loss": 0.2854, "step": 86500 }, { "epoch": 3.58, "grad_norm": 0.66015625, "learning_rate": 0.00048264033477238543, "loss": 0.2077, "step": 86510 }, { "epoch": 3.58, "grad_norm": 0.5234375, "learning_rate": 0.00048263636373688457, "loss": 0.2255, "step": 86520 }, { "epoch": 3.58, "grad_norm": 0.25390625, "learning_rate": 0.0004826323922635871, "loss": 0.2177, "step": 86530 }, { "epoch": 3.58, "grad_norm": 0.69140625, "learning_rate": 0.00048262842035250067, "loss": 0.1978, "step": 86540 }, { "epoch": 3.58, "grad_norm": 0.80078125, "learning_rate": 0.0004826244480036326, "loss": 0.1816, "step": 86550 }, { "epoch": 3.59, "grad_norm": 0.97265625, "learning_rate": 0.00048262047521699044, "loss": 0.2177, "step": 86560 }, { "epoch": 3.59, "grad_norm": 0.8125, "learning_rate": 0.0004826165019925817, "loss": 0.2508, "step": 86570 }, { "epoch": 3.59, "grad_norm": 0.8046875, "learning_rate": 0.0004826125283304138, "loss": 0.2028, "step": 86580 }, { "epoch": 3.59, "grad_norm": 0.61328125, "learning_rate": 0.0004826085542304942, "loss": 0.1791, "step": 86590 }, { "epoch": 3.59, "grad_norm": 0.76171875, "learning_rate": 0.0004826045796928305, "loss": 0.2037, "step": 86600 }, { "epoch": 3.59, "grad_norm": 0.494140625, "learning_rate": 0.00048260060471743007, "loss": 0.2281, "step": 86610 }, { "epoch": 3.59, "grad_norm": 0.8515625, "learning_rate": 0.0004825966293043004, "loss": 0.1995, "step": 86620 }, { "epoch": 3.59, "grad_norm": 0.59375, "learning_rate": 0.00048259265345344907, "loss": 0.2129, "step": 86630 }, { "epoch": 3.59, "grad_norm": 0.63671875, "learning_rate": 0.0004825886771648834, "loss": 0.1946, "step": 86640 }, { "epoch": 3.59, "grad_norm": 0.609375, "learning_rate": 0.000482584700438611, "loss": 0.2634, "step": 86650 }, { "epoch": 3.59, "grad_norm": 0.40625, "learning_rate": 0.0004825807232746393, "loss": 0.2142, "step": 86660 }, { "epoch": 3.59, "grad_norm": 0.408203125, "learning_rate": 0.00048257674567297575, "loss": 0.1609, "step": 86670 }, { "epoch": 3.59, "grad_norm": 0.80859375, "learning_rate": 0.00048257276763362795, "loss": 0.1813, "step": 86680 }, { "epoch": 3.59, "grad_norm": 0.7421875, "learning_rate": 0.0004825687891566033, "loss": 0.2108, "step": 86690 }, { "epoch": 3.59, "grad_norm": 0.9296875, "learning_rate": 0.00048256481024190926, "loss": 0.2157, "step": 86700 }, { "epoch": 3.59, "grad_norm": 3.140625, "learning_rate": 0.0004825608308895534, "loss": 0.2409, "step": 86710 }, { "epoch": 3.59, "grad_norm": 0.671875, "learning_rate": 0.0004825568510995432, "loss": 0.2072, "step": 86720 }, { "epoch": 3.59, "grad_norm": 1.40625, "learning_rate": 0.000482552870871886, "loss": 0.1971, "step": 86730 }, { "epoch": 3.59, "grad_norm": 0.8359375, "learning_rate": 0.00048254889020658956, "loss": 0.2653, "step": 86740 }, { "epoch": 3.59, "grad_norm": 0.9765625, "learning_rate": 0.00048254490910366117, "loss": 0.1656, "step": 86750 }, { "epoch": 3.59, "grad_norm": 0.5703125, "learning_rate": 0.00048254092756310835, "loss": 0.2833, "step": 86760 }, { "epoch": 3.59, "grad_norm": 0.87890625, "learning_rate": 0.0004825369455849387, "loss": 0.2193, "step": 86770 }, { "epoch": 3.59, "grad_norm": 0.72265625, "learning_rate": 0.00048253296316915954, "loss": 0.2151, "step": 86780 }, { "epoch": 3.59, "grad_norm": 1.6484375, "learning_rate": 0.0004825289803157784, "loss": 0.203, "step": 86790 }, { "epoch": 3.6, "grad_norm": 1.078125, "learning_rate": 0.0004825249970248029, "loss": 0.213, "step": 86800 }, { "epoch": 3.6, "grad_norm": 0.55078125, "learning_rate": 0.0004825210132962405, "loss": 0.2054, "step": 86810 }, { "epoch": 3.6, "grad_norm": 0.67578125, "learning_rate": 0.00048251702913009863, "loss": 0.176, "step": 86820 }, { "epoch": 3.6, "grad_norm": 0.83984375, "learning_rate": 0.00048251304452638476, "loss": 0.2075, "step": 86830 }, { "epoch": 3.6, "grad_norm": 0.25, "learning_rate": 0.0004825090594851066, "loss": 0.2447, "step": 86840 }, { "epoch": 3.6, "grad_norm": 0.92578125, "learning_rate": 0.0004825050740062714, "loss": 0.2222, "step": 86850 }, { "epoch": 3.6, "grad_norm": 0.71875, "learning_rate": 0.00048250108808988676, "loss": 0.2231, "step": 86860 }, { "epoch": 3.6, "grad_norm": 0.84375, "learning_rate": 0.0004824971017359602, "loss": 0.2118, "step": 86870 }, { "epoch": 3.6, "grad_norm": 0.7265625, "learning_rate": 0.00048249311494449914, "loss": 0.1517, "step": 86880 }, { "epoch": 3.6, "grad_norm": 0.67578125, "learning_rate": 0.00048248912771551124, "loss": 0.1902, "step": 86890 }, { "epoch": 3.6, "grad_norm": 0.6640625, "learning_rate": 0.00048248514004900386, "loss": 0.1506, "step": 86900 }, { "epoch": 3.6, "grad_norm": 0.5859375, "learning_rate": 0.0004824811519449845, "loss": 0.2241, "step": 86910 }, { "epoch": 3.6, "grad_norm": 0.283203125, "learning_rate": 0.00048247716340346083, "loss": 0.2001, "step": 86920 }, { "epoch": 3.6, "grad_norm": 1.0078125, "learning_rate": 0.00048247317442444014, "loss": 0.2389, "step": 86930 }, { "epoch": 3.6, "grad_norm": 0.63671875, "learning_rate": 0.0004824691850079301, "loss": 0.2451, "step": 86940 }, { "epoch": 3.6, "grad_norm": 0.65625, "learning_rate": 0.00048246519515393816, "loss": 0.1682, "step": 86950 }, { "epoch": 3.6, "grad_norm": 0.8828125, "learning_rate": 0.0004824612048624718, "loss": 0.1975, "step": 86960 }, { "epoch": 3.6, "grad_norm": 0.71484375, "learning_rate": 0.0004824572141335386, "loss": 0.1439, "step": 86970 }, { "epoch": 3.6, "grad_norm": 0.0, "learning_rate": 0.000482453222967146, "loss": 0.2542, "step": 86980 }, { "epoch": 3.6, "grad_norm": 1.4765625, "learning_rate": 0.00048244923136330154, "loss": 0.232, "step": 86990 }, { "epoch": 3.6, "grad_norm": 0.734375, "learning_rate": 0.00048244523932201275, "loss": 0.2047, "step": 87000 }, { "epoch": 3.6, "grad_norm": 0.439453125, "learning_rate": 0.00048244124684328703, "loss": 0.2678, "step": 87010 }, { "epoch": 3.6, "grad_norm": 0.44140625, "learning_rate": 0.0004824372539271321, "loss": 0.1885, "step": 87020 }, { "epoch": 3.6, "grad_norm": 0.8671875, "learning_rate": 0.0004824332605735553, "loss": 0.1718, "step": 87030 }, { "epoch": 3.61, "grad_norm": 0.39453125, "learning_rate": 0.00048242926678256423, "loss": 0.2047, "step": 87040 }, { "epoch": 3.61, "grad_norm": 0.765625, "learning_rate": 0.00048242527255416633, "loss": 0.1831, "step": 87050 }, { "epoch": 3.61, "grad_norm": 0.79296875, "learning_rate": 0.0004824212778883692, "loss": 0.1539, "step": 87060 }, { "epoch": 3.61, "grad_norm": 0.484375, "learning_rate": 0.0004824172827851804, "loss": 0.2059, "step": 87070 }, { "epoch": 3.61, "grad_norm": 0.4296875, "learning_rate": 0.0004824132872446073, "loss": 0.1782, "step": 87080 }, { "epoch": 3.61, "grad_norm": 0.177734375, "learning_rate": 0.00048240929126665744, "loss": 0.2059, "step": 87090 }, { "epoch": 3.61, "grad_norm": 0.80078125, "learning_rate": 0.0004824052948513384, "loss": 0.2415, "step": 87100 }, { "epoch": 3.61, "grad_norm": 0.88671875, "learning_rate": 0.0004824012979986578, "loss": 0.2559, "step": 87110 }, { "epoch": 3.61, "grad_norm": 0.87890625, "learning_rate": 0.00048239730070862296, "loss": 0.2342, "step": 87120 }, { "epoch": 3.61, "grad_norm": 0.56640625, "learning_rate": 0.0004823933029812415, "loss": 0.2548, "step": 87130 }, { "epoch": 3.61, "grad_norm": 1.515625, "learning_rate": 0.000482389304816521, "loss": 0.2462, "step": 87140 }, { "epoch": 3.61, "grad_norm": 0.65625, "learning_rate": 0.00048238530621446887, "loss": 0.1639, "step": 87150 }, { "epoch": 3.61, "grad_norm": 1.1953125, "learning_rate": 0.00048238130717509275, "loss": 0.2376, "step": 87160 }, { "epoch": 3.61, "grad_norm": 1.0078125, "learning_rate": 0.00048237730769840005, "loss": 0.1834, "step": 87170 }, { "epoch": 3.61, "grad_norm": 1.0390625, "learning_rate": 0.00048237330778439836, "loss": 0.2583, "step": 87180 }, { "epoch": 3.61, "grad_norm": 0.4765625, "learning_rate": 0.0004823693074330952, "loss": 0.1999, "step": 87190 }, { "epoch": 3.61, "grad_norm": 0.84765625, "learning_rate": 0.00048236530664449805, "loss": 0.196, "step": 87200 }, { "epoch": 3.61, "grad_norm": 1.8125, "learning_rate": 0.00048236130541861455, "loss": 0.2751, "step": 87210 }, { "epoch": 3.61, "grad_norm": 0.59765625, "learning_rate": 0.00048235730375545215, "loss": 0.1888, "step": 87220 }, { "epoch": 3.61, "grad_norm": 1.359375, "learning_rate": 0.00048235330165501844, "loss": 0.1903, "step": 87230 }, { "epoch": 3.61, "grad_norm": 1.1171875, "learning_rate": 0.00048234929911732084, "loss": 0.2009, "step": 87240 }, { "epoch": 3.61, "grad_norm": 1.15625, "learning_rate": 0.000482345296142367, "loss": 0.2521, "step": 87250 }, { "epoch": 3.61, "grad_norm": 1.078125, "learning_rate": 0.00048234129273016434, "loss": 0.2399, "step": 87260 }, { "epoch": 3.61, "grad_norm": 0.494140625, "learning_rate": 0.0004823372888807206, "loss": 0.2063, "step": 87270 }, { "epoch": 3.62, "grad_norm": 1.09375, "learning_rate": 0.00048233328459404304, "loss": 0.2773, "step": 87280 }, { "epoch": 3.62, "grad_norm": 0.318359375, "learning_rate": 0.00048232927987013936, "loss": 0.2159, "step": 87290 }, { "epoch": 3.62, "grad_norm": 1.0703125, "learning_rate": 0.00048232527470901706, "loss": 0.2563, "step": 87300 }, { "epoch": 3.62, "grad_norm": 0.62890625, "learning_rate": 0.0004823212691106837, "loss": 0.2508, "step": 87310 }, { "epoch": 3.62, "grad_norm": 0.86328125, "learning_rate": 0.0004823172630751468, "loss": 0.2094, "step": 87320 }, { "epoch": 3.62, "grad_norm": 0.376953125, "learning_rate": 0.00048231325660241385, "loss": 0.207, "step": 87330 }, { "epoch": 3.62, "grad_norm": 0.494140625, "learning_rate": 0.0004823092496924925, "loss": 0.1754, "step": 87340 }, { "epoch": 3.62, "grad_norm": 0.345703125, "learning_rate": 0.0004823052423453902, "loss": 0.2107, "step": 87350 }, { "epoch": 3.62, "grad_norm": 0.3671875, "learning_rate": 0.00048230123456111454, "loss": 0.2118, "step": 87360 }, { "epoch": 3.62, "grad_norm": 0.69140625, "learning_rate": 0.000482297226339673, "loss": 0.2112, "step": 87370 }, { "epoch": 3.62, "grad_norm": 0.1806640625, "learning_rate": 0.0004822932176810732, "loss": 0.1905, "step": 87380 }, { "epoch": 3.62, "grad_norm": 1.015625, "learning_rate": 0.0004822892085853227, "loss": 0.2427, "step": 87390 }, { "epoch": 3.62, "grad_norm": 0.90234375, "learning_rate": 0.0004822851990524289, "loss": 0.1758, "step": 87400 }, { "epoch": 3.62, "grad_norm": 0.6171875, "learning_rate": 0.00048228118908239946, "loss": 0.23, "step": 87410 }, { "epoch": 3.62, "grad_norm": 0.4765625, "learning_rate": 0.0004822771786752419, "loss": 0.197, "step": 87420 }, { "epoch": 3.62, "grad_norm": 0.671875, "learning_rate": 0.0004822731678309639, "loss": 0.1882, "step": 87430 }, { "epoch": 3.62, "grad_norm": 0.8203125, "learning_rate": 0.0004822691565495727, "loss": 0.2678, "step": 87440 }, { "epoch": 3.62, "grad_norm": 0.234375, "learning_rate": 0.0004822651448310761, "loss": 0.1988, "step": 87450 }, { "epoch": 3.62, "grad_norm": 0.427734375, "learning_rate": 0.0004822611326754816, "loss": 0.2124, "step": 87460 }, { "epoch": 3.62, "grad_norm": 0.451171875, "learning_rate": 0.0004822571200827968, "loss": 0.2231, "step": 87470 }, { "epoch": 3.62, "grad_norm": 0.65234375, "learning_rate": 0.00048225310705302907, "loss": 0.1863, "step": 87480 }, { "epoch": 3.62, "grad_norm": 0.87109375, "learning_rate": 0.0004822490935861861, "loss": 0.2481, "step": 87490 }, { "epoch": 3.62, "grad_norm": 0.62109375, "learning_rate": 0.0004822450796822754, "loss": 0.204, "step": 87500 }, { "epoch": 3.62, "grad_norm": 0.515625, "learning_rate": 0.0004822410653413046, "loss": 0.1982, "step": 87510 }, { "epoch": 3.63, "grad_norm": 0.388671875, "learning_rate": 0.00048223705056328115, "loss": 0.2335, "step": 87520 }, { "epoch": 3.63, "grad_norm": 0.578125, "learning_rate": 0.00048223303534821263, "loss": 0.219, "step": 87530 }, { "epoch": 3.63, "grad_norm": 0.435546875, "learning_rate": 0.00048222901969610665, "loss": 0.2356, "step": 87540 }, { "epoch": 3.63, "grad_norm": 2.21875, "learning_rate": 0.00048222500360697076, "loss": 0.2693, "step": 87550 }, { "epoch": 3.63, "grad_norm": 0.9609375, "learning_rate": 0.00048222098708081243, "loss": 0.2584, "step": 87560 }, { "epoch": 3.63, "grad_norm": 0.4375, "learning_rate": 0.0004822169701176393, "loss": 0.2608, "step": 87570 }, { "epoch": 3.63, "grad_norm": 0.82421875, "learning_rate": 0.0004822129527174589, "loss": 0.2005, "step": 87580 }, { "epoch": 3.63, "grad_norm": 1.1875, "learning_rate": 0.00048220893488027885, "loss": 0.2129, "step": 87590 }, { "epoch": 3.63, "grad_norm": 0.7578125, "learning_rate": 0.0004822049166061067, "loss": 0.1807, "step": 87600 }, { "epoch": 3.63, "grad_norm": 1.0, "learning_rate": 0.0004822008978949498, "loss": 0.1786, "step": 87610 }, { "epoch": 3.63, "grad_norm": 0.796875, "learning_rate": 0.000482196878746816, "loss": 0.158, "step": 87620 }, { "epoch": 3.63, "grad_norm": 0.68359375, "learning_rate": 0.00048219285916171274, "loss": 0.1514, "step": 87630 }, { "epoch": 3.63, "grad_norm": 0.78515625, "learning_rate": 0.0004821888391396476, "loss": 0.1951, "step": 87640 }, { "epoch": 3.63, "grad_norm": 1.1171875, "learning_rate": 0.00048218481868062815, "loss": 0.3203, "step": 87650 }, { "epoch": 3.63, "grad_norm": 0.61328125, "learning_rate": 0.000482180797784662, "loss": 0.1779, "step": 87660 }, { "epoch": 3.63, "grad_norm": 0.8828125, "learning_rate": 0.00048217677645175653, "loss": 0.2357, "step": 87670 }, { "epoch": 3.63, "grad_norm": 0.490234375, "learning_rate": 0.00048217275468191945, "loss": 0.1891, "step": 87680 }, { "epoch": 3.63, "grad_norm": 0.83984375, "learning_rate": 0.0004821687324751584, "loss": 0.2352, "step": 87690 }, { "epoch": 3.63, "grad_norm": 3.890625, "learning_rate": 0.0004821647098314808, "loss": 0.2131, "step": 87700 }, { "epoch": 3.63, "grad_norm": 0.625, "learning_rate": 0.0004821606867508943, "loss": 0.2104, "step": 87710 }, { "epoch": 3.63, "grad_norm": 0.734375, "learning_rate": 0.00048215666323340653, "loss": 0.2278, "step": 87720 }, { "epoch": 3.63, "grad_norm": 0.87890625, "learning_rate": 0.0004821526392790249, "loss": 0.1505, "step": 87730 }, { "epoch": 3.63, "grad_norm": 0.75390625, "learning_rate": 0.00048214861488775714, "loss": 0.1629, "step": 87740 }, { "epoch": 3.63, "grad_norm": 0.59765625, "learning_rate": 0.00048214459005961076, "loss": 0.1872, "step": 87750 }, { "epoch": 3.64, "grad_norm": 0.9921875, "learning_rate": 0.0004821405647945933, "loss": 0.1887, "step": 87760 }, { "epoch": 3.64, "grad_norm": 0.57421875, "learning_rate": 0.0004821365390927124, "loss": 0.1948, "step": 87770 }, { "epoch": 3.64, "grad_norm": 1.6171875, "learning_rate": 0.00048213251295397554, "loss": 0.2742, "step": 87780 }, { "epoch": 3.64, "grad_norm": 0.232421875, "learning_rate": 0.0004821284863783904, "loss": 0.1722, "step": 87790 }, { "epoch": 3.64, "grad_norm": 1.5625, "learning_rate": 0.0004821244593659645, "loss": 0.2026, "step": 87800 }, { "epoch": 3.64, "grad_norm": 0.95703125, "learning_rate": 0.0004821204319167054, "loss": 0.2027, "step": 87810 }, { "epoch": 3.64, "grad_norm": 1.1640625, "learning_rate": 0.00048211640403062074, "loss": 0.1859, "step": 87820 }, { "epoch": 3.64, "grad_norm": 0.69921875, "learning_rate": 0.0004821123757077181, "loss": 0.2399, "step": 87830 }, { "epoch": 3.64, "grad_norm": 0.76953125, "learning_rate": 0.00048210834694800497, "loss": 0.1898, "step": 87840 }, { "epoch": 3.64, "grad_norm": 0.478515625, "learning_rate": 0.0004821043177514891, "loss": 0.2066, "step": 87850 }, { "epoch": 3.64, "grad_norm": 0.56640625, "learning_rate": 0.0004821002881181779, "loss": 0.2368, "step": 87860 }, { "epoch": 3.64, "grad_norm": 0.96875, "learning_rate": 0.00048209625804807905, "loss": 0.2189, "step": 87870 }, { "epoch": 3.64, "grad_norm": 0.4296875, "learning_rate": 0.00048209222754120007, "loss": 0.2525, "step": 87880 }, { "epoch": 3.64, "grad_norm": 0.443359375, "learning_rate": 0.00048208819659754866, "loss": 0.2394, "step": 87890 }, { "epoch": 3.64, "grad_norm": 1.078125, "learning_rate": 0.00048208416521713227, "loss": 0.21, "step": 87900 }, { "epoch": 3.64, "grad_norm": 0.96484375, "learning_rate": 0.0004820801333999585, "loss": 0.196, "step": 87910 }, { "epoch": 3.64, "grad_norm": 0.27734375, "learning_rate": 0.0004820761011460351, "loss": 0.1237, "step": 87920 }, { "epoch": 3.64, "grad_norm": 0.71875, "learning_rate": 0.0004820720684553694, "loss": 0.2028, "step": 87930 }, { "epoch": 3.64, "grad_norm": 0.0, "learning_rate": 0.0004820680353279692, "loss": 0.182, "step": 87940 }, { "epoch": 3.64, "grad_norm": 2.0, "learning_rate": 0.00048206400176384204, "loss": 0.2195, "step": 87950 }, { "epoch": 3.64, "grad_norm": 0.984375, "learning_rate": 0.0004820599677629955, "loss": 0.1942, "step": 87960 }, { "epoch": 3.64, "grad_norm": 0.376953125, "learning_rate": 0.0004820559333254371, "loss": 0.1457, "step": 87970 }, { "epoch": 3.64, "grad_norm": 1.1484375, "learning_rate": 0.0004820518984511745, "loss": 0.2357, "step": 87980 }, { "epoch": 3.64, "grad_norm": 0.57421875, "learning_rate": 0.0004820478631402153, "loss": 0.2018, "step": 87990 }, { "epoch": 3.64, "grad_norm": 0.9609375, "learning_rate": 0.00048204382739256714, "loss": 0.2368, "step": 88000 }, { "epoch": 3.65, "grad_norm": 0.9375, "learning_rate": 0.0004820397912082375, "loss": 0.1984, "step": 88010 }, { "epoch": 3.65, "grad_norm": 1.5, "learning_rate": 0.000482035754587234, "loss": 0.2281, "step": 88020 }, { "epoch": 3.65, "grad_norm": 0.177734375, "learning_rate": 0.0004820317175295643, "loss": 0.1422, "step": 88030 }, { "epoch": 3.65, "grad_norm": 0.609375, "learning_rate": 0.000482027680035236, "loss": 0.2482, "step": 88040 }, { "epoch": 3.65, "grad_norm": 0.67578125, "learning_rate": 0.00048202364210425664, "loss": 0.255, "step": 88050 }, { "epoch": 3.65, "grad_norm": 0.6640625, "learning_rate": 0.0004820196037366338, "loss": 0.2054, "step": 88060 }, { "epoch": 3.65, "grad_norm": 1.1328125, "learning_rate": 0.0004820155649323752, "loss": 0.1982, "step": 88070 }, { "epoch": 3.65, "grad_norm": 0.361328125, "learning_rate": 0.0004820115256914883, "loss": 0.1696, "step": 88080 }, { "epoch": 3.65, "grad_norm": 0.45703125, "learning_rate": 0.0004820074860139808, "loss": 0.2124, "step": 88090 }, { "epoch": 3.65, "grad_norm": 0.6484375, "learning_rate": 0.0004820034458998602, "loss": 0.2227, "step": 88100 }, { "epoch": 3.65, "grad_norm": 0.5390625, "learning_rate": 0.00048199940534913424, "loss": 0.1908, "step": 88110 }, { "epoch": 3.65, "grad_norm": 0.703125, "learning_rate": 0.0004819953643618104, "loss": 0.1846, "step": 88120 }, { "epoch": 3.65, "grad_norm": 0.41015625, "learning_rate": 0.0004819913229378964, "loss": 0.2327, "step": 88130 }, { "epoch": 3.65, "grad_norm": 0.375, "learning_rate": 0.00048198728107739976, "loss": 0.2155, "step": 88140 }, { "epoch": 3.65, "grad_norm": 1.2109375, "learning_rate": 0.0004819832387803281, "loss": 0.2069, "step": 88150 }, { "epoch": 3.65, "grad_norm": 1.09375, "learning_rate": 0.00048197919604668903, "loss": 0.1803, "step": 88160 }, { "epoch": 3.65, "grad_norm": 0.98828125, "learning_rate": 0.00048197515287649016, "loss": 0.2307, "step": 88170 }, { "epoch": 3.65, "grad_norm": 0.7109375, "learning_rate": 0.0004819711092697391, "loss": 0.1751, "step": 88180 }, { "epoch": 3.65, "grad_norm": 0.734375, "learning_rate": 0.0004819670652264435, "loss": 0.21, "step": 88190 }, { "epoch": 3.65, "grad_norm": 1.03125, "learning_rate": 0.00048196302074661094, "loss": 0.2281, "step": 88200 }, { "epoch": 3.65, "grad_norm": 0.474609375, "learning_rate": 0.00048195897583024896, "loss": 0.2834, "step": 88210 }, { "epoch": 3.65, "grad_norm": 1.0703125, "learning_rate": 0.0004819549304773653, "loss": 0.2281, "step": 88220 }, { "epoch": 3.65, "grad_norm": 0.54296875, "learning_rate": 0.00048195088468796745, "loss": 0.2281, "step": 88230 }, { "epoch": 3.65, "grad_norm": 0.66015625, "learning_rate": 0.0004819468384620631, "loss": 0.2235, "step": 88240 }, { "epoch": 3.66, "grad_norm": 1.4296875, "learning_rate": 0.0004819427917996598, "loss": 0.2461, "step": 88250 }, { "epoch": 3.66, "grad_norm": 0.4296875, "learning_rate": 0.00048193874470076536, "loss": 0.1905, "step": 88260 }, { "epoch": 3.66, "grad_norm": 0.419921875, "learning_rate": 0.00048193469716538716, "loss": 0.2439, "step": 88270 }, { "epoch": 3.66, "grad_norm": 0.69140625, "learning_rate": 0.0004819306491935329, "loss": 0.1679, "step": 88280 }, { "epoch": 3.66, "grad_norm": 0.9765625, "learning_rate": 0.00048192660078521024, "loss": 0.2448, "step": 88290 }, { "epoch": 3.66, "grad_norm": 0.625, "learning_rate": 0.0004819225519404267, "loss": 0.1768, "step": 88300 }, { "epoch": 3.66, "grad_norm": 0.75390625, "learning_rate": 0.00048191850265919, "loss": 0.2381, "step": 88310 }, { "epoch": 3.66, "grad_norm": 0.5390625, "learning_rate": 0.0004819144529415077, "loss": 0.248, "step": 88320 }, { "epoch": 3.66, "grad_norm": 0.34765625, "learning_rate": 0.00048191040278738754, "loss": 0.2321, "step": 88330 }, { "epoch": 3.66, "grad_norm": 0.671875, "learning_rate": 0.000481906352196837, "loss": 0.1929, "step": 88340 }, { "epoch": 3.66, "grad_norm": 0.6640625, "learning_rate": 0.00048190230116986366, "loss": 0.2212, "step": 88350 }, { "epoch": 3.66, "grad_norm": 0.4453125, "learning_rate": 0.00048189824970647536, "loss": 0.1937, "step": 88360 }, { "epoch": 3.66, "grad_norm": 0.4609375, "learning_rate": 0.00048189419780667953, "loss": 0.2441, "step": 88370 }, { "epoch": 3.66, "grad_norm": 0.66015625, "learning_rate": 0.0004818901454704838, "loss": 0.2045, "step": 88380 }, { "epoch": 3.66, "grad_norm": 0.3984375, "learning_rate": 0.00048188609269789596, "loss": 0.2579, "step": 88390 }, { "epoch": 3.66, "grad_norm": 1.3671875, "learning_rate": 0.0004818820394889235, "loss": 0.1974, "step": 88400 }, { "epoch": 3.66, "grad_norm": 0.68359375, "learning_rate": 0.0004818779858435741, "loss": 0.2561, "step": 88410 }, { "epoch": 3.66, "grad_norm": 0.369140625, "learning_rate": 0.00048187393176185535, "loss": 0.1714, "step": 88420 }, { "epoch": 3.66, "grad_norm": 1.1171875, "learning_rate": 0.0004818698772437749, "loss": 0.2172, "step": 88430 }, { "epoch": 3.66, "grad_norm": 1.34375, "learning_rate": 0.0004818658222893404, "loss": 0.1839, "step": 88440 }, { "epoch": 3.66, "grad_norm": 0.55078125, "learning_rate": 0.00048186176689855945, "loss": 0.226, "step": 88450 }, { "epoch": 3.66, "grad_norm": 0.74609375, "learning_rate": 0.00048185771107143966, "loss": 0.2339, "step": 88460 }, { "epoch": 3.66, "grad_norm": 1.328125, "learning_rate": 0.0004818536548079887, "loss": 0.1904, "step": 88470 }, { "epoch": 3.66, "grad_norm": 1.15625, "learning_rate": 0.0004818495981082143, "loss": 0.2332, "step": 88480 }, { "epoch": 3.67, "grad_norm": 0.734375, "learning_rate": 0.0004818455409721239, "loss": 0.249, "step": 88490 }, { "epoch": 3.67, "grad_norm": 0.6875, "learning_rate": 0.00048184148339972525, "loss": 0.2028, "step": 88500 }, { "epoch": 3.67, "grad_norm": 0.67578125, "learning_rate": 0.00048183742539102594, "loss": 0.1625, "step": 88510 }, { "epoch": 3.67, "grad_norm": 0.91015625, "learning_rate": 0.00048183336694603365, "loss": 0.2142, "step": 88520 }, { "epoch": 3.67, "grad_norm": 0.6640625, "learning_rate": 0.000481829308064756, "loss": 0.24, "step": 88530 }, { "epoch": 3.67, "grad_norm": 0.9765625, "learning_rate": 0.0004818252487472007, "loss": 0.1782, "step": 88540 }, { "epoch": 3.67, "grad_norm": 0.69140625, "learning_rate": 0.0004818211889933752, "loss": 0.2378, "step": 88550 }, { "epoch": 3.67, "grad_norm": 0.546875, "learning_rate": 0.0004818171288032873, "loss": 0.2477, "step": 88560 }, { "epoch": 3.67, "grad_norm": 0.5390625, "learning_rate": 0.0004818130681769446, "loss": 0.221, "step": 88570 }, { "epoch": 3.67, "grad_norm": 0.28515625, "learning_rate": 0.00048180900711435477, "loss": 0.2664, "step": 88580 }, { "epoch": 3.67, "grad_norm": 0.79296875, "learning_rate": 0.00048180494561552535, "loss": 0.1819, "step": 88590 }, { "epoch": 3.67, "grad_norm": 0.69921875, "learning_rate": 0.00048180088368046415, "loss": 0.194, "step": 88600 }, { "epoch": 3.67, "grad_norm": 0.6640625, "learning_rate": 0.0004817968213091787, "loss": 0.2313, "step": 88610 }, { "epoch": 3.67, "grad_norm": 0.54296875, "learning_rate": 0.0004817927585016766, "loss": 0.206, "step": 88620 }, { "epoch": 3.67, "grad_norm": 0.69921875, "learning_rate": 0.0004817886952579656, "loss": 0.2444, "step": 88630 }, { "epoch": 3.67, "grad_norm": 0.6484375, "learning_rate": 0.0004817846315780533, "loss": 0.1916, "step": 88640 }, { "epoch": 3.67, "grad_norm": 1.0546875, "learning_rate": 0.0004817805674619473, "loss": 0.1957, "step": 88650 }, { "epoch": 3.67, "grad_norm": 0.427734375, "learning_rate": 0.00048177650290965533, "loss": 0.2372, "step": 88660 }, { "epoch": 3.67, "grad_norm": 0.43359375, "learning_rate": 0.0004817724379211851, "loss": 0.1536, "step": 88670 }, { "epoch": 3.67, "grad_norm": 0.58203125, "learning_rate": 0.0004817683724965442, "loss": 0.2428, "step": 88680 }, { "epoch": 3.67, "grad_norm": 0.57421875, "learning_rate": 0.0004817643066357401, "loss": 0.1657, "step": 88690 }, { "epoch": 3.67, "grad_norm": 0.875, "learning_rate": 0.00048176024033878064, "loss": 0.1538, "step": 88700 }, { "epoch": 3.67, "grad_norm": 0.3125, "learning_rate": 0.00048175617360567347, "loss": 0.2284, "step": 88710 }, { "epoch": 3.67, "grad_norm": 1.2109375, "learning_rate": 0.0004817521064364262, "loss": 0.2424, "step": 88720 }, { "epoch": 3.68, "grad_norm": 1.0078125, "learning_rate": 0.00048174803883104645, "loss": 0.178, "step": 88730 }, { "epoch": 3.68, "grad_norm": 1.1484375, "learning_rate": 0.00048174397078954204, "loss": 0.2524, "step": 88740 }, { "epoch": 3.68, "grad_norm": 0.62890625, "learning_rate": 0.0004817399023119204, "loss": 0.2043, "step": 88750 }, { "epoch": 3.68, "grad_norm": 0.3828125, "learning_rate": 0.0004817358333981893, "loss": 0.2001, "step": 88760 }, { "epoch": 3.68, "grad_norm": 0.5546875, "learning_rate": 0.0004817317640483564, "loss": 0.1965, "step": 88770 }, { "epoch": 3.68, "grad_norm": 0.66015625, "learning_rate": 0.00048172769426242933, "loss": 0.1786, "step": 88780 }, { "epoch": 3.68, "grad_norm": 0.46484375, "learning_rate": 0.0004817236240404158, "loss": 0.2466, "step": 88790 }, { "epoch": 3.68, "grad_norm": 1.046875, "learning_rate": 0.0004817195533823234, "loss": 0.2412, "step": 88800 }, { "epoch": 3.68, "grad_norm": 0.62109375, "learning_rate": 0.0004817154822881598, "loss": 0.2501, "step": 88810 }, { "epoch": 3.68, "grad_norm": 0.6953125, "learning_rate": 0.0004817114107579327, "loss": 0.1917, "step": 88820 }, { "epoch": 3.68, "grad_norm": 0.62109375, "learning_rate": 0.00048170733879164985, "loss": 0.2456, "step": 88830 }, { "epoch": 3.68, "grad_norm": 0.734375, "learning_rate": 0.0004817032663893187, "loss": 0.2186, "step": 88840 }, { "epoch": 3.68, "grad_norm": 1.7578125, "learning_rate": 0.00048169919355094706, "loss": 0.2017, "step": 88850 }, { "epoch": 3.68, "grad_norm": 0.859375, "learning_rate": 0.0004816951202765425, "loss": 0.2267, "step": 88860 }, { "epoch": 3.68, "grad_norm": 1.6875, "learning_rate": 0.00048169104656611286, "loss": 0.2419, "step": 88870 }, { "epoch": 3.68, "grad_norm": 0.462890625, "learning_rate": 0.0004816869724196656, "loss": 0.1949, "step": 88880 }, { "epoch": 3.68, "grad_norm": 1.0078125, "learning_rate": 0.00048168289783720854, "loss": 0.1948, "step": 88890 }, { "epoch": 3.68, "grad_norm": 0.345703125, "learning_rate": 0.0004816788228187492, "loss": 0.2037, "step": 88900 }, { "epoch": 3.68, "grad_norm": 0.3515625, "learning_rate": 0.0004816747473642954, "loss": 0.1973, "step": 88910 }, { "epoch": 3.68, "grad_norm": 0.3671875, "learning_rate": 0.0004816706714738547, "loss": 0.1965, "step": 88920 }, { "epoch": 3.68, "grad_norm": 0.69140625, "learning_rate": 0.00048166659514743487, "loss": 0.2195, "step": 88930 }, { "epoch": 3.68, "grad_norm": 0.345703125, "learning_rate": 0.0004816625183850435, "loss": 0.2393, "step": 88940 }, { "epoch": 3.68, "grad_norm": 0.92578125, "learning_rate": 0.00048165844118668835, "loss": 0.2519, "step": 88950 }, { "epoch": 3.68, "grad_norm": 0.51171875, "learning_rate": 0.00048165436355237693, "loss": 0.2468, "step": 88960 }, { "epoch": 3.69, "grad_norm": 0.6171875, "learning_rate": 0.00048165028548211703, "loss": 0.1653, "step": 88970 }, { "epoch": 3.69, "grad_norm": 1.5390625, "learning_rate": 0.00048164620697591635, "loss": 0.214, "step": 88980 }, { "epoch": 3.69, "grad_norm": 1.25, "learning_rate": 0.0004816421280337825, "loss": 0.2439, "step": 88990 }, { "epoch": 3.69, "grad_norm": 0.69921875, "learning_rate": 0.00048163804865572317, "loss": 0.2112, "step": 89000 }, { "epoch": 3.69, "grad_norm": 0.30078125, "learning_rate": 0.0004816339688417461, "loss": 0.1893, "step": 89010 }, { "epoch": 3.69, "grad_norm": 0.64453125, "learning_rate": 0.00048162988859185886, "loss": 0.2387, "step": 89020 }, { "epoch": 3.69, "grad_norm": 1.0234375, "learning_rate": 0.0004816258079060692, "loss": 0.25, "step": 89030 }, { "epoch": 3.69, "grad_norm": 0.73828125, "learning_rate": 0.00048162172678438474, "loss": 0.2639, "step": 89040 }, { "epoch": 3.69, "grad_norm": 0.64453125, "learning_rate": 0.00048161764522681327, "loss": 0.222, "step": 89050 }, { "epoch": 3.69, "grad_norm": 0.30078125, "learning_rate": 0.00048161356323336244, "loss": 0.2451, "step": 89060 }, { "epoch": 3.69, "grad_norm": 0.984375, "learning_rate": 0.00048160948080403984, "loss": 0.1632, "step": 89070 }, { "epoch": 3.69, "grad_norm": 0.93359375, "learning_rate": 0.00048160539793885317, "loss": 0.2195, "step": 89080 }, { "epoch": 3.69, "grad_norm": 0.6953125, "learning_rate": 0.00048160131463781023, "loss": 0.2091, "step": 89090 }, { "epoch": 3.69, "grad_norm": 0.3125, "learning_rate": 0.00048159723090091856, "loss": 0.2422, "step": 89100 }, { "epoch": 3.69, "grad_norm": 0.94140625, "learning_rate": 0.0004815931467281859, "loss": 0.2721, "step": 89110 }, { "epoch": 3.69, "grad_norm": 0.7109375, "learning_rate": 0.00048158906211962004, "loss": 0.2254, "step": 89120 }, { "epoch": 3.69, "grad_norm": 0.435546875, "learning_rate": 0.0004815849770752285, "loss": 0.2498, "step": 89130 }, { "epoch": 3.69, "grad_norm": 0.71484375, "learning_rate": 0.00048158089159501907, "loss": 0.1644, "step": 89140 }, { "epoch": 3.69, "grad_norm": 0.63671875, "learning_rate": 0.0004815768056789994, "loss": 0.219, "step": 89150 }, { "epoch": 3.69, "grad_norm": 0.81640625, "learning_rate": 0.0004815727193271772, "loss": 0.226, "step": 89160 }, { "epoch": 3.69, "grad_norm": 0.83984375, "learning_rate": 0.0004815686325395601, "loss": 0.1933, "step": 89170 }, { "epoch": 3.69, "grad_norm": 0.85546875, "learning_rate": 0.0004815645453161559, "loss": 0.2615, "step": 89180 }, { "epoch": 3.69, "grad_norm": 0.6484375, "learning_rate": 0.00048156045765697223, "loss": 0.2319, "step": 89190 }, { "epoch": 3.69, "grad_norm": 1.3671875, "learning_rate": 0.00048155636956201674, "loss": 0.2486, "step": 89200 }, { "epoch": 3.7, "grad_norm": 1.0546875, "learning_rate": 0.00048155228103129725, "loss": 0.192, "step": 89210 }, { "epoch": 3.7, "grad_norm": 0.21484375, "learning_rate": 0.0004815481920648213, "loss": 0.1901, "step": 89220 }, { "epoch": 3.7, "grad_norm": 0.8359375, "learning_rate": 0.00048154410266259667, "loss": 0.2396, "step": 89230 }, { "epoch": 3.7, "grad_norm": 0.0849609375, "learning_rate": 0.00048154001282463103, "loss": 0.1401, "step": 89240 }, { "epoch": 3.7, "grad_norm": 0.310546875, "learning_rate": 0.00048153592255093214, "loss": 0.2011, "step": 89250 }, { "epoch": 3.7, "grad_norm": 0.88671875, "learning_rate": 0.0004815318318415076, "loss": 0.2407, "step": 89260 }, { "epoch": 3.7, "grad_norm": 0.96484375, "learning_rate": 0.0004815277406963652, "loss": 0.234, "step": 89270 }, { "epoch": 3.7, "grad_norm": 0.5234375, "learning_rate": 0.0004815236491155126, "loss": 0.1769, "step": 89280 }, { "epoch": 3.7, "grad_norm": 0.267578125, "learning_rate": 0.00048151955709895743, "loss": 0.1418, "step": 89290 }, { "epoch": 3.7, "grad_norm": 0.67578125, "learning_rate": 0.00048151546464670746, "loss": 0.2431, "step": 89300 }, { "epoch": 3.7, "grad_norm": 0.484375, "learning_rate": 0.0004815113717587705, "loss": 0.169, "step": 89310 }, { "epoch": 3.7, "grad_norm": 0.796875, "learning_rate": 0.000481507278435154, "loss": 0.1969, "step": 89320 }, { "epoch": 3.7, "grad_norm": 0.94140625, "learning_rate": 0.00048150318467586584, "loss": 0.1898, "step": 89330 }, { "epoch": 3.7, "grad_norm": 0.60546875, "learning_rate": 0.00048149909048091376, "loss": 0.2131, "step": 89340 }, { "epoch": 3.7, "grad_norm": 0.462890625, "learning_rate": 0.00048149499585030533, "loss": 0.2418, "step": 89350 }, { "epoch": 3.7, "grad_norm": 0.35546875, "learning_rate": 0.0004814909007840483, "loss": 0.1763, "step": 89360 }, { "epoch": 3.7, "grad_norm": 0.451171875, "learning_rate": 0.0004814868052821504, "loss": 0.2272, "step": 89370 }, { "epoch": 3.7, "grad_norm": 0.515625, "learning_rate": 0.00048148270934461943, "loss": 0.1762, "step": 89380 }, { "epoch": 3.7, "grad_norm": 0.390625, "learning_rate": 0.0004814786129714629, "loss": 0.1826, "step": 89390 }, { "epoch": 3.7, "grad_norm": 0.0, "learning_rate": 0.0004814745161626886, "loss": 0.18, "step": 89400 }, { "epoch": 3.7, "grad_norm": 0.318359375, "learning_rate": 0.00048147041891830435, "loss": 0.1924, "step": 89410 }, { "epoch": 3.7, "grad_norm": 0.46875, "learning_rate": 0.0004814663212383177, "loss": 0.2742, "step": 89420 }, { "epoch": 3.7, "grad_norm": 0.392578125, "learning_rate": 0.0004814622231227365, "loss": 0.1847, "step": 89430 }, { "epoch": 3.7, "grad_norm": 1.1328125, "learning_rate": 0.0004814581245715682, "loss": 0.1781, "step": 89440 }, { "epoch": 3.71, "grad_norm": 0.2001953125, "learning_rate": 0.00048145402558482086, "loss": 0.2031, "step": 89450 }, { "epoch": 3.71, "grad_norm": 1.6875, "learning_rate": 0.00048144992616250206, "loss": 0.2572, "step": 89460 }, { "epoch": 3.71, "grad_norm": 1.0390625, "learning_rate": 0.0004814458263046194, "loss": 0.1996, "step": 89470 }, { "epoch": 3.71, "grad_norm": 0.64453125, "learning_rate": 0.0004814417260111808, "loss": 0.229, "step": 89480 }, { "epoch": 3.71, "grad_norm": 0.6015625, "learning_rate": 0.00048143762528219384, "loss": 0.1933, "step": 89490 }, { "epoch": 3.71, "grad_norm": 0.59765625, "learning_rate": 0.0004814335241176662, "loss": 0.2302, "step": 89500 }, { "epoch": 3.71, "grad_norm": 1.0390625, "learning_rate": 0.00048142942251760566, "loss": 0.2201, "step": 89510 }, { "epoch": 3.71, "grad_norm": 0.40625, "learning_rate": 0.00048142532048202, "loss": 0.2327, "step": 89520 }, { "epoch": 3.71, "grad_norm": 0.8515625, "learning_rate": 0.0004814212180109168, "loss": 0.2241, "step": 89530 }, { "epoch": 3.71, "grad_norm": 0.65625, "learning_rate": 0.00048141711510430395, "loss": 0.2359, "step": 89540 }, { "epoch": 3.71, "grad_norm": 0.51953125, "learning_rate": 0.00048141301176218897, "loss": 0.2054, "step": 89550 }, { "epoch": 3.71, "grad_norm": 1.2421875, "learning_rate": 0.00048140890798457984, "loss": 0.2075, "step": 89560 }, { "epoch": 3.71, "grad_norm": 0.625, "learning_rate": 0.000481404803771484, "loss": 0.2241, "step": 89570 }, { "epoch": 3.71, "grad_norm": 1.0390625, "learning_rate": 0.0004814006991229094, "loss": 0.2145, "step": 89580 }, { "epoch": 3.71, "grad_norm": 0.478515625, "learning_rate": 0.0004813965940388636, "loss": 0.2329, "step": 89590 }, { "epoch": 3.71, "grad_norm": 1.4609375, "learning_rate": 0.00048139248851935445, "loss": 0.2031, "step": 89600 }, { "epoch": 3.71, "grad_norm": 0.5234375, "learning_rate": 0.0004813883825643896, "loss": 0.2679, "step": 89610 }, { "epoch": 3.71, "grad_norm": 0.51953125, "learning_rate": 0.0004813842761739768, "loss": 0.2505, "step": 89620 }, { "epoch": 3.71, "grad_norm": 0.6015625, "learning_rate": 0.0004813801693481238, "loss": 0.2245, "step": 89630 }, { "epoch": 3.71, "grad_norm": 0.76953125, "learning_rate": 0.00048137606208683827, "loss": 0.2445, "step": 89640 }, { "epoch": 3.71, "grad_norm": 0.95703125, "learning_rate": 0.00048137195439012804, "loss": 0.2453, "step": 89650 }, { "epoch": 3.71, "grad_norm": 0.48828125, "learning_rate": 0.00048136784625800076, "loss": 0.2244, "step": 89660 }, { "epoch": 3.71, "grad_norm": 0.7578125, "learning_rate": 0.00048136373769046417, "loss": 0.233, "step": 89670 }, { "epoch": 3.71, "grad_norm": 0.55078125, "learning_rate": 0.00048135962868752597, "loss": 0.2189, "step": 89680 }, { "epoch": 3.71, "grad_norm": 0.58984375, "learning_rate": 0.000481355519249194, "loss": 0.1979, "step": 89690 }, { "epoch": 3.72, "grad_norm": 1.125, "learning_rate": 0.0004813514093754758, "loss": 0.1755, "step": 89700 }, { "epoch": 3.72, "grad_norm": 0.5859375, "learning_rate": 0.00048134729906637943, "loss": 0.2439, "step": 89710 }, { "epoch": 3.72, "grad_norm": 1.6640625, "learning_rate": 0.0004813431883219123, "loss": 0.203, "step": 89720 }, { "epoch": 3.72, "grad_norm": 0.68359375, "learning_rate": 0.0004813390771420822, "loss": 0.1568, "step": 89730 }, { "epoch": 3.72, "grad_norm": 0.5, "learning_rate": 0.00048133496552689713, "loss": 0.1614, "step": 89740 }, { "epoch": 3.72, "grad_norm": 0.0, "learning_rate": 0.00048133085347636453, "loss": 0.1593, "step": 89750 }, { "epoch": 3.72, "grad_norm": 0.734375, "learning_rate": 0.00048132674099049224, "loss": 0.3255, "step": 89760 }, { "epoch": 3.72, "grad_norm": 0.765625, "learning_rate": 0.00048132262806928805, "loss": 0.2138, "step": 89770 }, { "epoch": 3.72, "grad_norm": 0.451171875, "learning_rate": 0.0004813185147127596, "loss": 0.2178, "step": 89780 }, { "epoch": 3.72, "grad_norm": 0.59765625, "learning_rate": 0.00048131440092091473, "loss": 0.2188, "step": 89790 }, { "epoch": 3.72, "grad_norm": 0.5625, "learning_rate": 0.0004813102866937611, "loss": 0.2209, "step": 89800 }, { "epoch": 3.72, "grad_norm": 1.3125, "learning_rate": 0.0004813061720313065, "loss": 0.2162, "step": 89810 }, { "epoch": 3.72, "grad_norm": 0.478515625, "learning_rate": 0.00048130205693355864, "loss": 0.1811, "step": 89820 }, { "epoch": 3.72, "grad_norm": 1.2578125, "learning_rate": 0.00048129794140052535, "loss": 0.2069, "step": 89830 }, { "epoch": 3.72, "grad_norm": 1.1875, "learning_rate": 0.00048129382543221427, "loss": 0.2329, "step": 89840 }, { "epoch": 3.72, "grad_norm": 0.47265625, "learning_rate": 0.0004812897090286332, "loss": 0.1451, "step": 89850 }, { "epoch": 3.72, "grad_norm": 0.39453125, "learning_rate": 0.0004812855921897899, "loss": 0.2214, "step": 89860 }, { "epoch": 3.72, "grad_norm": 0.69140625, "learning_rate": 0.0004812814749156921, "loss": 0.2135, "step": 89870 }, { "epoch": 3.72, "grad_norm": 0.953125, "learning_rate": 0.00048127735720634746, "loss": 0.231, "step": 89880 }, { "epoch": 3.72, "grad_norm": 0.421875, "learning_rate": 0.00048127323906176387, "loss": 0.1876, "step": 89890 }, { "epoch": 3.72, "grad_norm": 1.03125, "learning_rate": 0.000481269120481949, "loss": 0.2076, "step": 89900 }, { "epoch": 3.72, "grad_norm": 0.8359375, "learning_rate": 0.00048126500146691067, "loss": 0.2117, "step": 89910 }, { "epoch": 3.72, "grad_norm": 1.1953125, "learning_rate": 0.0004812608820166565, "loss": 0.1968, "step": 89920 }, { "epoch": 3.72, "grad_norm": 0.66796875, "learning_rate": 0.00048125676213119436, "loss": 0.2203, "step": 89930 }, { "epoch": 3.73, "grad_norm": 0.46875, "learning_rate": 0.00048125264181053196, "loss": 0.1797, "step": 89940 }, { "epoch": 3.73, "grad_norm": 0.828125, "learning_rate": 0.0004812485210546771, "loss": 0.2434, "step": 89950 }, { "epoch": 3.73, "grad_norm": 0.5234375, "learning_rate": 0.00048124439986363746, "loss": 0.2284, "step": 89960 }, { "epoch": 3.73, "grad_norm": 0.337890625, "learning_rate": 0.00048124027823742075, "loss": 0.2054, "step": 89970 }, { "epoch": 3.73, "grad_norm": 0.3046875, "learning_rate": 0.00048123615617603495, "loss": 0.1896, "step": 89980 }, { "epoch": 3.73, "grad_norm": 0.95703125, "learning_rate": 0.00048123203367948764, "loss": 0.2427, "step": 89990 }, { "epoch": 3.73, "grad_norm": 1.09375, "learning_rate": 0.0004812279107477866, "loss": 0.1845, "step": 90000 }, { "epoch": 3.73, "grad_norm": 0.796875, "learning_rate": 0.0004812237873809396, "loss": 0.2277, "step": 90010 }, { "epoch": 3.73, "grad_norm": 0.3203125, "learning_rate": 0.00048121966357895433, "loss": 0.1677, "step": 90020 }, { "epoch": 3.73, "grad_norm": 0.98046875, "learning_rate": 0.0004812155393418387, "loss": 0.2861, "step": 90030 }, { "epoch": 3.73, "grad_norm": 0.484375, "learning_rate": 0.0004812114146696004, "loss": 0.1545, "step": 90040 }, { "epoch": 3.73, "grad_norm": 0.68359375, "learning_rate": 0.00048120728956224716, "loss": 0.2431, "step": 90050 }, { "epoch": 3.73, "grad_norm": 0.7421875, "learning_rate": 0.00048120316401978683, "loss": 0.2569, "step": 90060 }, { "epoch": 3.73, "grad_norm": 0.23046875, "learning_rate": 0.00048119903804222705, "loss": 0.1716, "step": 90070 }, { "epoch": 3.73, "grad_norm": 0.6171875, "learning_rate": 0.00048119491162957565, "loss": 0.2331, "step": 90080 }, { "epoch": 3.73, "grad_norm": 0.56640625, "learning_rate": 0.0004811907847818404, "loss": 0.2147, "step": 90090 }, { "epoch": 3.73, "grad_norm": 0.69921875, "learning_rate": 0.00048118665749902906, "loss": 0.2394, "step": 90100 }, { "epoch": 3.73, "grad_norm": 1.4921875, "learning_rate": 0.0004811825297811494, "loss": 0.2376, "step": 90110 }, { "epoch": 3.73, "grad_norm": 0.7578125, "learning_rate": 0.00048117840162820917, "loss": 0.189, "step": 90120 }, { "epoch": 3.73, "grad_norm": 0.59765625, "learning_rate": 0.0004811742730402162, "loss": 0.2658, "step": 90130 }, { "epoch": 3.73, "grad_norm": 0.625, "learning_rate": 0.0004811701440171782, "loss": 0.1851, "step": 90140 }, { "epoch": 3.73, "grad_norm": 1.0625, "learning_rate": 0.00048116601455910294, "loss": 0.226, "step": 90150 }, { "epoch": 3.73, "grad_norm": 0.361328125, "learning_rate": 0.0004811618846659982, "loss": 0.137, "step": 90160 }, { "epoch": 3.73, "grad_norm": 0.93359375, "learning_rate": 0.00048115775433787175, "loss": 0.2018, "step": 90170 }, { "epoch": 3.74, "grad_norm": 0.86328125, "learning_rate": 0.00048115362357473135, "loss": 0.2292, "step": 90180 }, { "epoch": 3.74, "grad_norm": 0.1787109375, "learning_rate": 0.00048114949237658476, "loss": 0.1834, "step": 90190 }, { "epoch": 3.74, "grad_norm": 0.76171875, "learning_rate": 0.0004811453607434399, "loss": 0.1983, "step": 90200 }, { "epoch": 3.74, "grad_norm": 0.90625, "learning_rate": 0.0004811412286753044, "loss": 0.1853, "step": 90210 }, { "epoch": 3.74, "grad_norm": 0.8828125, "learning_rate": 0.000481137096172186, "loss": 0.1692, "step": 90220 }, { "epoch": 3.74, "grad_norm": 0.220703125, "learning_rate": 0.0004811329632340926, "loss": 0.2442, "step": 90230 }, { "epoch": 3.74, "grad_norm": 0.65234375, "learning_rate": 0.0004811288298610319, "loss": 0.2206, "step": 90240 }, { "epoch": 3.74, "grad_norm": 0.69140625, "learning_rate": 0.00048112469605301176, "loss": 0.2357, "step": 90250 }, { "epoch": 3.74, "grad_norm": 0.57421875, "learning_rate": 0.0004811205618100398, "loss": 0.1822, "step": 90260 }, { "epoch": 3.74, "grad_norm": 0.0, "learning_rate": 0.000481116427132124, "loss": 0.2024, "step": 90270 }, { "epoch": 3.74, "grad_norm": 0.609375, "learning_rate": 0.000481112292019272, "loss": 0.2064, "step": 90280 }, { "epoch": 3.74, "grad_norm": 0.47265625, "learning_rate": 0.00048110815647149164, "loss": 0.1945, "step": 90290 }, { "epoch": 3.74, "grad_norm": 0.87890625, "learning_rate": 0.00048110402048879067, "loss": 0.2408, "step": 90300 }, { "epoch": 3.74, "grad_norm": 0.0, "learning_rate": 0.0004810998840711769, "loss": 0.2036, "step": 90310 }, { "epoch": 3.74, "grad_norm": 0.94921875, "learning_rate": 0.0004810957472186581, "loss": 0.2267, "step": 90320 }, { "epoch": 3.74, "grad_norm": 0.51953125, "learning_rate": 0.000481091609931242, "loss": 0.228, "step": 90330 }, { "epoch": 3.74, "grad_norm": 0.8046875, "learning_rate": 0.00048108747220893655, "loss": 0.2034, "step": 90340 }, { "epoch": 3.74, "grad_norm": 0.400390625, "learning_rate": 0.0004810833340517494, "loss": 0.2681, "step": 90350 }, { "epoch": 3.74, "grad_norm": 0.427734375, "learning_rate": 0.00048107919545968834, "loss": 0.2019, "step": 90360 }, { "epoch": 3.74, "grad_norm": 0.2412109375, "learning_rate": 0.0004810750564327612, "loss": 0.2654, "step": 90370 }, { "epoch": 3.74, "grad_norm": 1.046875, "learning_rate": 0.0004810709169709758, "loss": 0.1773, "step": 90380 }, { "epoch": 3.74, "grad_norm": 1.0859375, "learning_rate": 0.00048106677707433987, "loss": 0.1941, "step": 90390 }, { "epoch": 3.74, "grad_norm": 0.41015625, "learning_rate": 0.00048106263674286117, "loss": 0.2182, "step": 90400 }, { "epoch": 3.74, "grad_norm": 0.70703125, "learning_rate": 0.0004810584959765476, "loss": 0.2038, "step": 90410 }, { "epoch": 3.75, "grad_norm": 0.7421875, "learning_rate": 0.0004810543547754068, "loss": 0.2016, "step": 90420 }, { "epoch": 3.75, "grad_norm": 0.404296875, "learning_rate": 0.00048105021313944675, "loss": 0.2098, "step": 90430 }, { "epoch": 3.75, "grad_norm": 0.8828125, "learning_rate": 0.00048104607106867514, "loss": 0.181, "step": 90440 }, { "epoch": 3.75, "grad_norm": 0.53515625, "learning_rate": 0.0004810419285630997, "loss": 0.181, "step": 90450 }, { "epoch": 3.75, "grad_norm": 0.16796875, "learning_rate": 0.00048103778562272835, "loss": 0.2345, "step": 90460 }, { "epoch": 3.75, "grad_norm": 0.453125, "learning_rate": 0.00048103364224756883, "loss": 0.1571, "step": 90470 }, { "epoch": 3.75, "grad_norm": 0.625, "learning_rate": 0.000481029498437629, "loss": 0.1908, "step": 90480 }, { "epoch": 3.75, "grad_norm": 0.326171875, "learning_rate": 0.0004810253541929166, "loss": 0.2216, "step": 90490 }, { "epoch": 3.75, "grad_norm": 1.5078125, "learning_rate": 0.00048102120951343935, "loss": 0.1618, "step": 90500 }, { "epoch": 3.75, "grad_norm": 0.439453125, "learning_rate": 0.0004810170643992052, "loss": 0.1545, "step": 90510 }, { "epoch": 3.75, "grad_norm": 1.3828125, "learning_rate": 0.0004810129188502219, "loss": 0.1815, "step": 90520 }, { "epoch": 3.75, "grad_norm": 0.68359375, "learning_rate": 0.00048100877286649715, "loss": 0.2265, "step": 90530 }, { "epoch": 3.75, "grad_norm": 0.462890625, "learning_rate": 0.0004810046264480389, "loss": 0.2015, "step": 90540 }, { "epoch": 3.75, "grad_norm": 1.1796875, "learning_rate": 0.0004810004795948548, "loss": 0.1892, "step": 90550 }, { "epoch": 3.75, "grad_norm": 1.2109375, "learning_rate": 0.0004809963323069528, "loss": 0.2297, "step": 90560 }, { "epoch": 3.75, "grad_norm": 1.1328125, "learning_rate": 0.00048099218458434067, "loss": 0.285, "step": 90570 }, { "epoch": 3.75, "grad_norm": 0.828125, "learning_rate": 0.00048098803642702623, "loss": 0.2117, "step": 90580 }, { "epoch": 3.75, "grad_norm": 1.09375, "learning_rate": 0.0004809838878350172, "loss": 0.1851, "step": 90590 }, { "epoch": 3.75, "grad_norm": 0.2392578125, "learning_rate": 0.0004809797388083214, "loss": 0.25, "step": 90600 }, { "epoch": 3.75, "grad_norm": 1.59375, "learning_rate": 0.00048097558934694675, "loss": 0.181, "step": 90610 }, { "epoch": 3.75, "grad_norm": 0.7421875, "learning_rate": 0.0004809714394509009, "loss": 0.2002, "step": 90620 }, { "epoch": 3.75, "grad_norm": 0.53515625, "learning_rate": 0.0004809672891201918, "loss": 0.2398, "step": 90630 }, { "epoch": 3.75, "grad_norm": 0.45703125, "learning_rate": 0.0004809631383548272, "loss": 0.2145, "step": 90640 }, { "epoch": 3.75, "grad_norm": 0.361328125, "learning_rate": 0.0004809589871548149, "loss": 0.2067, "step": 90650 }, { "epoch": 3.76, "grad_norm": 0.5078125, "learning_rate": 0.00048095483552016273, "loss": 0.2345, "step": 90660 }, { "epoch": 3.76, "grad_norm": 0.7265625, "learning_rate": 0.0004809506834508786, "loss": 0.2059, "step": 90670 }, { "epoch": 3.76, "grad_norm": 2.359375, "learning_rate": 0.00048094653094697004, "loss": 0.2689, "step": 90680 }, { "epoch": 3.76, "grad_norm": 1.2265625, "learning_rate": 0.00048094237800844523, "loss": 0.1852, "step": 90690 }, { "epoch": 3.76, "grad_norm": 1.6328125, "learning_rate": 0.0004809382246353117, "loss": 0.215, "step": 90700 }, { "epoch": 3.76, "grad_norm": 1.140625, "learning_rate": 0.00048093407082757734, "loss": 0.2212, "step": 90710 }, { "epoch": 3.76, "grad_norm": 1.2265625, "learning_rate": 0.0004809299165852501, "loss": 0.2303, "step": 90720 }, { "epoch": 3.76, "grad_norm": 0.388671875, "learning_rate": 0.0004809257619083376, "loss": 0.2383, "step": 90730 }, { "epoch": 3.76, "grad_norm": 0.51171875, "learning_rate": 0.00048092160679684783, "loss": 0.1465, "step": 90740 }, { "epoch": 3.76, "grad_norm": 1.640625, "learning_rate": 0.00048091745125078845, "loss": 0.2389, "step": 90750 }, { "epoch": 3.76, "grad_norm": 1.25, "learning_rate": 0.00048091329527016744, "loss": 0.2372, "step": 90760 }, { "epoch": 3.76, "grad_norm": 0.65625, "learning_rate": 0.0004809091388549925, "loss": 0.1936, "step": 90770 }, { "epoch": 3.76, "grad_norm": 1.2890625, "learning_rate": 0.0004809049820052716, "loss": 0.245, "step": 90780 }, { "epoch": 3.76, "grad_norm": 0.50390625, "learning_rate": 0.0004809008247210123, "loss": 0.229, "step": 90790 }, { "epoch": 3.76, "grad_norm": 0.404296875, "learning_rate": 0.0004808966670022227, "loss": 0.1973, "step": 90800 }, { "epoch": 3.76, "grad_norm": 0.201171875, "learning_rate": 0.0004808925088489104, "loss": 0.1995, "step": 90810 }, { "epoch": 3.76, "grad_norm": 0.89453125, "learning_rate": 0.0004808883502610834, "loss": 0.2203, "step": 90820 }, { "epoch": 3.76, "grad_norm": 0.70703125, "learning_rate": 0.00048088419123874944, "loss": 0.2208, "step": 90830 }, { "epoch": 3.76, "grad_norm": 0.62109375, "learning_rate": 0.0004808800317819164, "loss": 0.1991, "step": 90840 }, { "epoch": 3.76, "grad_norm": 1.1796875, "learning_rate": 0.000480875871890592, "loss": 0.233, "step": 90850 }, { "epoch": 3.76, "grad_norm": 0.59375, "learning_rate": 0.00048087171156478427, "loss": 0.2794, "step": 90860 }, { "epoch": 3.76, "grad_norm": 0.609375, "learning_rate": 0.0004808675508045008, "loss": 0.2344, "step": 90870 }, { "epoch": 3.76, "grad_norm": 0.9375, "learning_rate": 0.00048086338960974957, "loss": 0.2399, "step": 90880 }, { "epoch": 3.76, "grad_norm": 0.45703125, "learning_rate": 0.0004808592279805384, "loss": 0.2298, "step": 90890 }, { "epoch": 3.77, "grad_norm": 0.1865234375, "learning_rate": 0.000480855065916875, "loss": 0.2039, "step": 90900 }, { "epoch": 3.77, "grad_norm": 0.404296875, "learning_rate": 0.00048085090341876737, "loss": 0.2067, "step": 90910 }, { "epoch": 3.77, "grad_norm": 1.0703125, "learning_rate": 0.0004808467404862232, "loss": 0.1964, "step": 90920 }, { "epoch": 3.77, "grad_norm": 0.474609375, "learning_rate": 0.0004808425771192505, "loss": 0.2177, "step": 90930 }, { "epoch": 3.77, "grad_norm": 0.66015625, "learning_rate": 0.0004808384133178569, "loss": 0.2023, "step": 90940 }, { "epoch": 3.77, "grad_norm": 0.58984375, "learning_rate": 0.0004808342490820504, "loss": 0.2376, "step": 90950 }, { "epoch": 3.77, "grad_norm": 0.77734375, "learning_rate": 0.0004808300844118388, "loss": 0.2055, "step": 90960 }, { "epoch": 3.77, "grad_norm": 1.3203125, "learning_rate": 0.00048082591930722983, "loss": 0.1893, "step": 90970 }, { "epoch": 3.77, "grad_norm": 0.7421875, "learning_rate": 0.00048082175376823146, "loss": 0.2799, "step": 90980 }, { "epoch": 3.77, "grad_norm": 0.609375, "learning_rate": 0.00048081758779485145, "loss": 0.2576, "step": 90990 }, { "epoch": 3.77, "grad_norm": 0.4140625, "learning_rate": 0.0004808134213870977, "loss": 0.1928, "step": 91000 }, { "epoch": 3.77, "grad_norm": 0.90234375, "learning_rate": 0.000480809254544978, "loss": 0.2699, "step": 91010 }, { "epoch": 3.77, "grad_norm": 0.58984375, "learning_rate": 0.00048080508726850015, "loss": 0.2487, "step": 91020 }, { "epoch": 3.77, "grad_norm": 0.515625, "learning_rate": 0.0004808009195576721, "loss": 0.2355, "step": 91030 }, { "epoch": 3.77, "grad_norm": 1.0, "learning_rate": 0.0004807967514125017, "loss": 0.2006, "step": 91040 }, { "epoch": 3.77, "grad_norm": 0.162109375, "learning_rate": 0.00048079258283299667, "loss": 0.1931, "step": 91050 }, { "epoch": 3.77, "grad_norm": 0.703125, "learning_rate": 0.00048078841381916487, "loss": 0.2018, "step": 91060 }, { "epoch": 3.77, "grad_norm": 0.6484375, "learning_rate": 0.0004807842443710143, "loss": 0.1952, "step": 91070 }, { "epoch": 3.77, "grad_norm": 0.33984375, "learning_rate": 0.00048078007448855263, "loss": 0.2079, "step": 91080 }, { "epoch": 3.77, "grad_norm": 0.65234375, "learning_rate": 0.00048077590417178786, "loss": 0.2663, "step": 91090 }, { "epoch": 3.77, "grad_norm": 1.171875, "learning_rate": 0.00048077173342072764, "loss": 0.2326, "step": 91100 }, { "epoch": 3.77, "grad_norm": 0.68359375, "learning_rate": 0.0004807675622353801, "loss": 0.1673, "step": 91110 }, { "epoch": 3.77, "grad_norm": 0.84375, "learning_rate": 0.0004807633906157528, "loss": 0.1556, "step": 91120 }, { "epoch": 3.77, "grad_norm": 1.1953125, "learning_rate": 0.00048075921856185376, "loss": 0.2016, "step": 91130 }, { "epoch": 3.78, "grad_norm": 0.50390625, "learning_rate": 0.0004807550460736908, "loss": 0.1799, "step": 91140 }, { "epoch": 3.78, "grad_norm": 1.625, "learning_rate": 0.0004807508731512718, "loss": 0.1731, "step": 91150 }, { "epoch": 3.78, "grad_norm": 0.8671875, "learning_rate": 0.00048074669979460453, "loss": 0.1698, "step": 91160 }, { "epoch": 3.78, "grad_norm": 0.474609375, "learning_rate": 0.00048074252600369686, "loss": 0.176, "step": 91170 }, { "epoch": 3.78, "grad_norm": 0.58984375, "learning_rate": 0.00048073835177855673, "loss": 0.1877, "step": 91180 }, { "epoch": 3.78, "grad_norm": 0.8984375, "learning_rate": 0.0004807341771191919, "loss": 0.1914, "step": 91190 }, { "epoch": 3.78, "grad_norm": 0.62890625, "learning_rate": 0.0004807300020256102, "loss": 0.2374, "step": 91200 }, { "epoch": 3.78, "grad_norm": 1.078125, "learning_rate": 0.0004807258264978197, "loss": 0.225, "step": 91210 }, { "epoch": 3.78, "grad_norm": 1.0625, "learning_rate": 0.00048072165053582804, "loss": 0.2214, "step": 91220 }, { "epoch": 3.78, "grad_norm": 0.640625, "learning_rate": 0.00048071747413964315, "loss": 0.264, "step": 91230 }, { "epoch": 3.78, "grad_norm": 0.388671875, "learning_rate": 0.00048071329730927287, "loss": 0.2253, "step": 91240 }, { "epoch": 3.78, "grad_norm": 0.54296875, "learning_rate": 0.0004807091200447251, "loss": 0.1937, "step": 91250 }, { "epoch": 3.78, "grad_norm": 0.671875, "learning_rate": 0.0004807049423460077, "loss": 0.2647, "step": 91260 }, { "epoch": 3.78, "grad_norm": 0.404296875, "learning_rate": 0.00048070076421312846, "loss": 0.2325, "step": 91270 }, { "epoch": 3.78, "grad_norm": 0.75, "learning_rate": 0.0004806965856460954, "loss": 0.2252, "step": 91280 }, { "epoch": 3.78, "grad_norm": 0.7109375, "learning_rate": 0.0004806924066449162, "loss": 0.196, "step": 91290 }, { "epoch": 3.78, "grad_norm": 0.3984375, "learning_rate": 0.0004806882272095988, "loss": 0.1979, "step": 91300 }, { "epoch": 3.78, "grad_norm": 1.1640625, "learning_rate": 0.00048068404734015105, "loss": 0.1881, "step": 91310 }, { "epoch": 3.78, "grad_norm": 0.828125, "learning_rate": 0.0004806798670365809, "loss": 0.2142, "step": 91320 }, { "epoch": 3.78, "grad_norm": 0.484375, "learning_rate": 0.00048067568629889615, "loss": 0.2271, "step": 91330 }, { "epoch": 3.78, "grad_norm": 0.640625, "learning_rate": 0.00048067150512710454, "loss": 0.197, "step": 91340 }, { "epoch": 3.78, "grad_norm": 0.73046875, "learning_rate": 0.0004806673235212142, "loss": 0.2671, "step": 91350 }, { "epoch": 3.78, "grad_norm": 0.546875, "learning_rate": 0.00048066314148123286, "loss": 0.243, "step": 91360 }, { "epoch": 3.78, "grad_norm": 0.474609375, "learning_rate": 0.0004806589590071684, "loss": 0.2897, "step": 91370 }, { "epoch": 3.78, "grad_norm": 0.671875, "learning_rate": 0.00048065477609902864, "loss": 0.2273, "step": 91380 }, { "epoch": 3.79, "grad_norm": 0.6796875, "learning_rate": 0.0004806505927568215, "loss": 0.2425, "step": 91390 }, { "epoch": 3.79, "grad_norm": 1.3203125, "learning_rate": 0.00048064640898055487, "loss": 0.2381, "step": 91400 }, { "epoch": 3.79, "grad_norm": 0.88671875, "learning_rate": 0.00048064222477023653, "loss": 0.2375, "step": 91410 }, { "epoch": 3.79, "grad_norm": 0.703125, "learning_rate": 0.00048063804012587455, "loss": 0.2383, "step": 91420 }, { "epoch": 3.79, "grad_norm": 0.61328125, "learning_rate": 0.0004806338550474766, "loss": 0.2274, "step": 91430 }, { "epoch": 3.79, "grad_norm": 0.65625, "learning_rate": 0.0004806296695350507, "loss": 0.239, "step": 91440 }, { "epoch": 3.79, "grad_norm": 0.482421875, "learning_rate": 0.0004806254835886047, "loss": 0.1616, "step": 91450 }, { "epoch": 3.79, "grad_norm": 0.8828125, "learning_rate": 0.0004806212972081464, "loss": 0.2353, "step": 91460 }, { "epoch": 3.79, "grad_norm": 1.0703125, "learning_rate": 0.00048061711039368375, "loss": 0.1752, "step": 91470 }, { "epoch": 3.79, "grad_norm": 0.65625, "learning_rate": 0.00048061292314522454, "loss": 0.2342, "step": 91480 }, { "epoch": 3.79, "grad_norm": 0.4609375, "learning_rate": 0.0004806087354627767, "loss": 0.2441, "step": 91490 }, { "epoch": 3.79, "grad_norm": 0.77734375, "learning_rate": 0.0004806045473463482, "loss": 0.2049, "step": 91500 }, { "epoch": 3.79, "grad_norm": 0.55859375, "learning_rate": 0.00048060035879594676, "loss": 0.2009, "step": 91510 }, { "epoch": 3.79, "grad_norm": 0.9375, "learning_rate": 0.00048059616981158045, "loss": 0.2022, "step": 91520 }, { "epoch": 3.79, "grad_norm": 0.875, "learning_rate": 0.000480591980393257, "loss": 0.2449, "step": 91530 }, { "epoch": 3.79, "grad_norm": 0.494140625, "learning_rate": 0.00048058779054098433, "loss": 0.1413, "step": 91540 }, { "epoch": 3.79, "grad_norm": 0.51171875, "learning_rate": 0.00048058360025477034, "loss": 0.202, "step": 91550 }, { "epoch": 3.79, "grad_norm": 1.9921875, "learning_rate": 0.000480579409534623, "loss": 0.2013, "step": 91560 }, { "epoch": 3.79, "grad_norm": 0.34765625, "learning_rate": 0.00048057521838055, "loss": 0.1891, "step": 91570 }, { "epoch": 3.79, "grad_norm": 0.671875, "learning_rate": 0.0004805710267925594, "loss": 0.2499, "step": 91580 }, { "epoch": 3.79, "grad_norm": 0.99609375, "learning_rate": 0.000480566834770659, "loss": 0.2193, "step": 91590 }, { "epoch": 3.79, "grad_norm": 0.77734375, "learning_rate": 0.0004805626423148567, "loss": 0.2063, "step": 91600 }, { "epoch": 3.79, "grad_norm": 0.58203125, "learning_rate": 0.0004805584494251604, "loss": 0.1933, "step": 91610 }, { "epoch": 3.79, "grad_norm": 0.5234375, "learning_rate": 0.00048055425610157805, "loss": 0.1794, "step": 91620 }, { "epoch": 3.8, "grad_norm": 0.03515625, "learning_rate": 0.00048055006234411744, "loss": 0.2285, "step": 91630 }, { "epoch": 3.8, "grad_norm": 1.03125, "learning_rate": 0.00048054586815278656, "loss": 0.2552, "step": 91640 }, { "epoch": 3.8, "grad_norm": 0.0, "learning_rate": 0.0004805416735275932, "loss": 0.21, "step": 91650 }, { "epoch": 3.8, "grad_norm": 0.52734375, "learning_rate": 0.00048053747846854534, "loss": 0.1555, "step": 91660 }, { "epoch": 3.8, "grad_norm": 0.65625, "learning_rate": 0.00048053328297565083, "loss": 0.2524, "step": 91670 }, { "epoch": 3.8, "grad_norm": 2.03125, "learning_rate": 0.0004805290870489176, "loss": 0.1937, "step": 91680 }, { "epoch": 3.8, "grad_norm": 0.55859375, "learning_rate": 0.0004805248906883535, "loss": 0.1495, "step": 91690 }, { "epoch": 3.8, "grad_norm": 1.2109375, "learning_rate": 0.00048052069389396644, "loss": 0.2001, "step": 91700 }, { "epoch": 3.8, "grad_norm": 0.6796875, "learning_rate": 0.00048051649666576435, "loss": 0.1219, "step": 91710 }, { "epoch": 3.8, "grad_norm": 0.546875, "learning_rate": 0.00048051229900375513, "loss": 0.2695, "step": 91720 }, { "epoch": 3.8, "grad_norm": 1.3203125, "learning_rate": 0.0004805081009079466, "loss": 0.1929, "step": 91730 }, { "epoch": 3.8, "grad_norm": 0.478515625, "learning_rate": 0.00048050390237834676, "loss": 0.2143, "step": 91740 }, { "epoch": 3.8, "grad_norm": 0.59375, "learning_rate": 0.0004804997034149634, "loss": 0.2081, "step": 91750 }, { "epoch": 3.8, "grad_norm": 1.1171875, "learning_rate": 0.0004804955040178046, "loss": 0.2531, "step": 91760 }, { "epoch": 3.8, "grad_norm": 0.96484375, "learning_rate": 0.00048049130418687804, "loss": 0.2407, "step": 91770 }, { "epoch": 3.8, "grad_norm": 1.5390625, "learning_rate": 0.0004804871039221918, "loss": 0.2512, "step": 91780 }, { "epoch": 3.8, "grad_norm": 0.55078125, "learning_rate": 0.0004804829032237537, "loss": 0.1919, "step": 91790 }, { "epoch": 3.8, "grad_norm": 0.578125, "learning_rate": 0.00048047870209157173, "loss": 0.2077, "step": 91800 }, { "epoch": 3.8, "grad_norm": 1.0, "learning_rate": 0.0004804745005256537, "loss": 0.2533, "step": 91810 }, { "epoch": 3.8, "grad_norm": 1.3984375, "learning_rate": 0.0004804702985260075, "loss": 0.2075, "step": 91820 }, { "epoch": 3.8, "grad_norm": 0.62890625, "learning_rate": 0.0004804660960926411, "loss": 0.1778, "step": 91830 }, { "epoch": 3.8, "grad_norm": 1.9609375, "learning_rate": 0.0004804618932255624, "loss": 0.2141, "step": 91840 }, { "epoch": 3.8, "grad_norm": 0.3203125, "learning_rate": 0.00048045768992477936, "loss": 0.2026, "step": 91850 }, { "epoch": 3.8, "grad_norm": 0.87109375, "learning_rate": 0.0004804534861902997, "loss": 0.27, "step": 91860 }, { "epoch": 3.81, "grad_norm": 0.87890625, "learning_rate": 0.00048044928202213154, "loss": 0.2054, "step": 91870 }, { "epoch": 3.81, "grad_norm": 0.578125, "learning_rate": 0.00048044507742028283, "loss": 0.2195, "step": 91880 }, { "epoch": 3.81, "grad_norm": 0.1337890625, "learning_rate": 0.0004804408723847613, "loss": 0.2179, "step": 91890 }, { "epoch": 3.81, "grad_norm": 1.3828125, "learning_rate": 0.00048043666691557484, "loss": 0.2024, "step": 91900 }, { "epoch": 3.81, "grad_norm": 0.69140625, "learning_rate": 0.0004804324610127315, "loss": 0.2222, "step": 91910 }, { "epoch": 3.81, "grad_norm": 0.62890625, "learning_rate": 0.00048042825467623917, "loss": 0.1599, "step": 91920 }, { "epoch": 3.81, "grad_norm": 0.61328125, "learning_rate": 0.0004804240479061058, "loss": 0.2068, "step": 91930 }, { "epoch": 3.81, "grad_norm": 0.98828125, "learning_rate": 0.00048041984070233923, "loss": 0.1952, "step": 91940 }, { "epoch": 3.81, "grad_norm": 0.64453125, "learning_rate": 0.0004804156330649474, "loss": 0.206, "step": 91950 }, { "epoch": 3.81, "grad_norm": 0.7890625, "learning_rate": 0.0004804114249939382, "loss": 0.1522, "step": 91960 }, { "epoch": 3.81, "grad_norm": 0.6015625, "learning_rate": 0.0004804072164893196, "loss": 0.1992, "step": 91970 }, { "epoch": 3.81, "grad_norm": 0.7109375, "learning_rate": 0.00048040300755109946, "loss": 0.1977, "step": 91980 }, { "epoch": 3.81, "grad_norm": 0.7421875, "learning_rate": 0.0004803987981792858, "loss": 0.2123, "step": 91990 }, { "epoch": 3.81, "grad_norm": 0.58203125, "learning_rate": 0.00048039458837388643, "loss": 0.1885, "step": 92000 }, { "epoch": 3.81, "grad_norm": 1.140625, "learning_rate": 0.00048039037813490937, "loss": 0.1966, "step": 92010 }, { "epoch": 3.81, "grad_norm": 1.3984375, "learning_rate": 0.0004803861674623625, "loss": 0.2115, "step": 92020 }, { "epoch": 3.81, "grad_norm": 0.34375, "learning_rate": 0.0004803819563562537, "loss": 0.2138, "step": 92030 }, { "epoch": 3.81, "grad_norm": 0.51171875, "learning_rate": 0.00048037774481659104, "loss": 0.2004, "step": 92040 }, { "epoch": 3.81, "grad_norm": 0.9453125, "learning_rate": 0.00048037353284338224, "loss": 0.2064, "step": 92050 }, { "epoch": 3.81, "grad_norm": 0.26953125, "learning_rate": 0.00048036932043663536, "loss": 0.2317, "step": 92060 }, { "epoch": 3.81, "grad_norm": 0.9765625, "learning_rate": 0.00048036510759635827, "loss": 0.2164, "step": 92070 }, { "epoch": 3.81, "grad_norm": 0.8046875, "learning_rate": 0.0004803608943225589, "loss": 0.1406, "step": 92080 }, { "epoch": 3.81, "grad_norm": 1.0390625, "learning_rate": 0.0004803566806152453, "loss": 0.2272, "step": 92090 }, { "epoch": 3.81, "grad_norm": 0.52734375, "learning_rate": 0.00048035246647442525, "loss": 0.2313, "step": 92100 }, { "epoch": 3.82, "grad_norm": 0.3515625, "learning_rate": 0.00048034825190010675, "loss": 0.2204, "step": 92110 }, { "epoch": 3.82, "grad_norm": 1.015625, "learning_rate": 0.00048034403689229766, "loss": 0.2399, "step": 92120 }, { "epoch": 3.82, "grad_norm": 0.8359375, "learning_rate": 0.00048033982145100605, "loss": 0.2131, "step": 92130 }, { "epoch": 3.82, "grad_norm": 1.0078125, "learning_rate": 0.00048033560557623974, "loss": 0.1901, "step": 92140 }, { "epoch": 3.82, "grad_norm": 0.734375, "learning_rate": 0.0004803313892680067, "loss": 0.2889, "step": 92150 }, { "epoch": 3.82, "grad_norm": 0.390625, "learning_rate": 0.00048032717252631486, "loss": 0.2183, "step": 92160 }, { "epoch": 3.82, "grad_norm": 0.66015625, "learning_rate": 0.0004803229553511721, "loss": 0.2715, "step": 92170 }, { "epoch": 3.82, "grad_norm": 0.421875, "learning_rate": 0.0004803187377425865, "loss": 0.2135, "step": 92180 }, { "epoch": 3.82, "grad_norm": 1.2265625, "learning_rate": 0.0004803145197005658, "loss": 0.2381, "step": 92190 }, { "epoch": 3.82, "grad_norm": 1.390625, "learning_rate": 0.0004803103012251181, "loss": 0.2344, "step": 92200 }, { "epoch": 3.82, "grad_norm": 0.51953125, "learning_rate": 0.00048030608231625126, "loss": 0.2201, "step": 92210 }, { "epoch": 3.82, "grad_norm": 0.515625, "learning_rate": 0.00048030186297397335, "loss": 0.2249, "step": 92220 }, { "epoch": 3.82, "grad_norm": 0.57421875, "learning_rate": 0.00048029764319829207, "loss": 0.226, "step": 92230 }, { "epoch": 3.82, "grad_norm": 0.765625, "learning_rate": 0.00048029342298921556, "loss": 0.3081, "step": 92240 }, { "epoch": 3.82, "grad_norm": 0.4609375, "learning_rate": 0.00048028920234675167, "loss": 0.1992, "step": 92250 }, { "epoch": 3.82, "grad_norm": 0.765625, "learning_rate": 0.0004802849812709085, "loss": 0.1949, "step": 92260 }, { "epoch": 3.82, "grad_norm": 0.62109375, "learning_rate": 0.00048028075976169375, "loss": 0.2358, "step": 92270 }, { "epoch": 3.82, "grad_norm": 0.828125, "learning_rate": 0.0004802765378191154, "loss": 0.1759, "step": 92280 }, { "epoch": 3.82, "grad_norm": 0.5859375, "learning_rate": 0.00048027231544318157, "loss": 0.2065, "step": 92290 }, { "epoch": 3.82, "grad_norm": 2.734375, "learning_rate": 0.0004802680926339001, "loss": 0.1797, "step": 92300 }, { "epoch": 3.82, "grad_norm": 0.88671875, "learning_rate": 0.000480263869391279, "loss": 0.1855, "step": 92310 }, { "epoch": 3.82, "grad_norm": 0.376953125, "learning_rate": 0.00048025964571532607, "loss": 0.2055, "step": 92320 }, { "epoch": 3.82, "grad_norm": 0.455078125, "learning_rate": 0.0004802554216060494, "loss": 0.2556, "step": 92330 }, { "epoch": 3.82, "grad_norm": 0.609375, "learning_rate": 0.00048025119706345687, "loss": 0.1499, "step": 92340 }, { "epoch": 3.83, "grad_norm": 0.8203125, "learning_rate": 0.00048024697208755643, "loss": 0.2268, "step": 92350 }, { "epoch": 3.83, "grad_norm": 1.890625, "learning_rate": 0.0004802427466783561, "loss": 0.2023, "step": 92360 }, { "epoch": 3.83, "grad_norm": 0.72265625, "learning_rate": 0.00048023852083586377, "loss": 0.2753, "step": 92370 }, { "epoch": 3.83, "grad_norm": 0.89453125, "learning_rate": 0.0004802342945600874, "loss": 0.226, "step": 92380 }, { "epoch": 3.83, "grad_norm": 0.80078125, "learning_rate": 0.00048023006785103496, "loss": 0.2508, "step": 92390 }, { "epoch": 3.83, "grad_norm": 0.58984375, "learning_rate": 0.00048022584070871443, "loss": 0.2184, "step": 92400 }, { "epoch": 3.83, "grad_norm": 0.0, "learning_rate": 0.0004802216131331337, "loss": 0.2519, "step": 92410 }, { "epoch": 3.83, "grad_norm": 0.64453125, "learning_rate": 0.0004802173851243007, "loss": 0.2244, "step": 92420 }, { "epoch": 3.83, "grad_norm": 0.427734375, "learning_rate": 0.0004802131566822235, "loss": 0.191, "step": 92430 }, { "epoch": 3.83, "grad_norm": 0.44140625, "learning_rate": 0.00048020892780691, "loss": 0.2137, "step": 92440 }, { "epoch": 3.83, "grad_norm": 0.75390625, "learning_rate": 0.0004802046984983681, "loss": 0.2166, "step": 92450 }, { "epoch": 3.83, "grad_norm": 1.953125, "learning_rate": 0.0004802004687566059, "loss": 0.2379, "step": 92460 }, { "epoch": 3.83, "grad_norm": 0.5625, "learning_rate": 0.0004801962385816313, "loss": 0.1984, "step": 92470 }, { "epoch": 3.83, "grad_norm": 0.671875, "learning_rate": 0.0004801920079734522, "loss": 0.2794, "step": 92480 }, { "epoch": 3.83, "grad_norm": 0.287109375, "learning_rate": 0.00048018777693207654, "loss": 0.2247, "step": 92490 }, { "epoch": 3.83, "grad_norm": 1.0234375, "learning_rate": 0.0004801835454575124, "loss": 0.2452, "step": 92500 }, { "epoch": 3.83, "grad_norm": 0.4921875, "learning_rate": 0.00048017931354976765, "loss": 0.2306, "step": 92510 }, { "epoch": 3.83, "grad_norm": 1.53125, "learning_rate": 0.00048017508120885036, "loss": 0.2411, "step": 92520 }, { "epoch": 3.83, "grad_norm": 0.369140625, "learning_rate": 0.00048017084843476833, "loss": 0.2657, "step": 92530 }, { "epoch": 3.83, "grad_norm": 0.287109375, "learning_rate": 0.0004801666152275297, "loss": 0.2008, "step": 92540 }, { "epoch": 3.83, "grad_norm": 0.87109375, "learning_rate": 0.0004801623815871423, "loss": 0.2029, "step": 92550 }, { "epoch": 3.83, "grad_norm": 0.69921875, "learning_rate": 0.0004801581475136142, "loss": 0.1908, "step": 92560 }, { "epoch": 3.83, "grad_norm": 1.5078125, "learning_rate": 0.0004801539130069532, "loss": 0.1476, "step": 92570 }, { "epoch": 3.83, "grad_norm": 0.74609375, "learning_rate": 0.00048014967806716755, "loss": 0.2475, "step": 92580 }, { "epoch": 3.84, "grad_norm": 0.1435546875, "learning_rate": 0.00048014544269426494, "loss": 0.1491, "step": 92590 }, { "epoch": 3.84, "grad_norm": 0.5078125, "learning_rate": 0.00048014120688825355, "loss": 0.1877, "step": 92600 }, { "epoch": 3.84, "grad_norm": 0.8984375, "learning_rate": 0.0004801369706491412, "loss": 0.1924, "step": 92610 }, { "epoch": 3.84, "grad_norm": 0.73046875, "learning_rate": 0.00048013273397693595, "loss": 0.2351, "step": 92620 }, { "epoch": 3.84, "grad_norm": 0.1142578125, "learning_rate": 0.0004801284968716458, "loss": 0.2011, "step": 92630 }, { "epoch": 3.84, "grad_norm": 0.36328125, "learning_rate": 0.00048012425933327866, "loss": 0.2398, "step": 92640 }, { "epoch": 3.84, "grad_norm": 0.7109375, "learning_rate": 0.0004801200213618424, "loss": 0.2021, "step": 92650 }, { "epoch": 3.84, "grad_norm": 0.70703125, "learning_rate": 0.00048011578295734515, "loss": 0.1872, "step": 92660 }, { "epoch": 3.84, "grad_norm": 0.6640625, "learning_rate": 0.00048011154411979493, "loss": 0.2094, "step": 92670 }, { "epoch": 3.84, "grad_norm": 2.03125, "learning_rate": 0.00048010730484919956, "loss": 0.2161, "step": 92680 }, { "epoch": 3.84, "grad_norm": 0.455078125, "learning_rate": 0.0004801030651455671, "loss": 0.21, "step": 92690 }, { "epoch": 3.84, "grad_norm": 0.47265625, "learning_rate": 0.0004800988250089056, "loss": 0.2317, "step": 92700 }, { "epoch": 3.84, "grad_norm": 0.63671875, "learning_rate": 0.00048009458443922285, "loss": 0.2147, "step": 92710 }, { "epoch": 3.84, "grad_norm": 0.7734375, "learning_rate": 0.000480090343436527, "loss": 0.2668, "step": 92720 }, { "epoch": 3.84, "grad_norm": 0.7890625, "learning_rate": 0.00048008610200082593, "loss": 0.217, "step": 92730 }, { "epoch": 3.84, "grad_norm": 0.294921875, "learning_rate": 0.0004800818601321277, "loss": 0.1825, "step": 92740 }, { "epoch": 3.84, "grad_norm": 0.9765625, "learning_rate": 0.0004800776178304402, "loss": 0.1882, "step": 92750 }, { "epoch": 3.84, "grad_norm": 0.49609375, "learning_rate": 0.0004800733750957715, "loss": 0.1986, "step": 92760 }, { "epoch": 3.84, "grad_norm": 0.66015625, "learning_rate": 0.0004800691319281296, "loss": 0.1935, "step": 92770 }, { "epoch": 3.84, "grad_norm": 0.671875, "learning_rate": 0.0004800648883275225, "loss": 0.1991, "step": 92780 }, { "epoch": 3.84, "grad_norm": 0.73828125, "learning_rate": 0.000480060644293958, "loss": 0.2363, "step": 92790 }, { "epoch": 3.84, "grad_norm": 1.5546875, "learning_rate": 0.0004800563998274442, "loss": 0.2162, "step": 92800 }, { "epoch": 3.84, "grad_norm": 0.79296875, "learning_rate": 0.00048005215492798913, "loss": 0.1946, "step": 92810 }, { "epoch": 3.84, "grad_norm": 1.078125, "learning_rate": 0.0004800479095956009, "loss": 0.2433, "step": 92820 }, { "epoch": 3.85, "grad_norm": 1.0078125, "learning_rate": 0.00048004366383028706, "loss": 0.2078, "step": 92830 }, { "epoch": 3.85, "grad_norm": 0.44921875, "learning_rate": 0.0004800394176320561, "loss": 0.2277, "step": 92840 }, { "epoch": 3.85, "grad_norm": 0.296875, "learning_rate": 0.00048003517100091576, "loss": 0.2184, "step": 92850 }, { "epoch": 3.85, "grad_norm": 0.32421875, "learning_rate": 0.00048003092393687405, "loss": 0.2352, "step": 92860 }, { "epoch": 3.85, "grad_norm": 1.296875, "learning_rate": 0.00048002667643993894, "loss": 0.2233, "step": 92870 }, { "epoch": 3.85, "grad_norm": 1.5234375, "learning_rate": 0.00048002242851011854, "loss": 0.2273, "step": 92880 }, { "epoch": 3.85, "grad_norm": 1.015625, "learning_rate": 0.0004800181801474207, "loss": 0.2207, "step": 92890 }, { "epoch": 3.85, "grad_norm": 0.279296875, "learning_rate": 0.00048001393135185355, "loss": 0.2661, "step": 92900 }, { "epoch": 3.85, "grad_norm": 0.734375, "learning_rate": 0.000480009682123425, "loss": 0.2188, "step": 92910 }, { "epoch": 3.85, "grad_norm": 0.7265625, "learning_rate": 0.000480005432462143, "loss": 0.2231, "step": 92920 }, { "epoch": 3.85, "grad_norm": 0.1982421875, "learning_rate": 0.00048000118236801574, "loss": 0.1828, "step": 92930 }, { "epoch": 3.85, "grad_norm": 1.9765625, "learning_rate": 0.000479996931841051, "loss": 0.2213, "step": 92940 }, { "epoch": 3.85, "grad_norm": 0.64453125, "learning_rate": 0.00047999268088125686, "loss": 0.2177, "step": 92950 }, { "epoch": 3.85, "grad_norm": 0.20703125, "learning_rate": 0.0004799884294886414, "loss": 0.2073, "step": 92960 }, { "epoch": 3.85, "grad_norm": 0.462890625, "learning_rate": 0.00047998417766321246, "loss": 0.205, "step": 92970 }, { "epoch": 3.85, "grad_norm": 0.57421875, "learning_rate": 0.00047997992540497824, "loss": 0.2127, "step": 92980 }, { "epoch": 3.85, "grad_norm": 0.50390625, "learning_rate": 0.0004799756727139466, "loss": 0.264, "step": 92990 }, { "epoch": 3.85, "grad_norm": 1.0390625, "learning_rate": 0.00047997141959012555, "loss": 0.2217, "step": 93000 }, { "epoch": 3.85, "grad_norm": 1.09375, "learning_rate": 0.00047996716603352307, "loss": 0.2117, "step": 93010 }, { "epoch": 3.85, "grad_norm": 0.93359375, "learning_rate": 0.0004799629120441473, "loss": 0.2431, "step": 93020 }, { "epoch": 3.85, "grad_norm": 0.73828125, "learning_rate": 0.00047995865762200617, "loss": 0.2372, "step": 93030 }, { "epoch": 3.85, "grad_norm": 0.7890625, "learning_rate": 0.0004799544027671077, "loss": 0.2326, "step": 93040 }, { "epoch": 3.85, "grad_norm": 0.1748046875, "learning_rate": 0.00047995014747945983, "loss": 0.2512, "step": 93050 }, { "epoch": 3.85, "grad_norm": 0.58984375, "learning_rate": 0.0004799458917590707, "loss": 0.1837, "step": 93060 }, { "epoch": 3.85, "grad_norm": 0.96484375, "learning_rate": 0.00047994163560594803, "loss": 0.1915, "step": 93070 }, { "epoch": 3.86, "grad_norm": 0.51171875, "learning_rate": 0.0004799373790201002, "loss": 0.2089, "step": 93080 }, { "epoch": 3.86, "grad_norm": 0.4609375, "learning_rate": 0.00047993312200153506, "loss": 0.2223, "step": 93090 }, { "epoch": 3.86, "grad_norm": 0.48828125, "learning_rate": 0.0004799288645502605, "loss": 0.2428, "step": 93100 }, { "epoch": 3.86, "grad_norm": 0.310546875, "learning_rate": 0.0004799246066662848, "loss": 0.2323, "step": 93110 }, { "epoch": 3.86, "grad_norm": 0.259765625, "learning_rate": 0.0004799203483496157, "loss": 0.2356, "step": 93120 }, { "epoch": 3.86, "grad_norm": 0.4609375, "learning_rate": 0.00047991608960026134, "loss": 0.2018, "step": 93130 }, { "epoch": 3.86, "grad_norm": 0.6640625, "learning_rate": 0.00047991183041822975, "loss": 0.1885, "step": 93140 }, { "epoch": 3.86, "grad_norm": 0.8515625, "learning_rate": 0.00047990757080352897, "loss": 0.1476, "step": 93150 }, { "epoch": 3.86, "grad_norm": 0.8828125, "learning_rate": 0.0004799033107561669, "loss": 0.2335, "step": 93160 }, { "epoch": 3.86, "grad_norm": 0.373046875, "learning_rate": 0.00047989905027615167, "loss": 0.2289, "step": 93170 }, { "epoch": 3.86, "grad_norm": 0.6171875, "learning_rate": 0.00047989478936349126, "loss": 0.2142, "step": 93180 }, { "epoch": 3.86, "grad_norm": 1.0234375, "learning_rate": 0.00047989052801819364, "loss": 0.2076, "step": 93190 }, { "epoch": 3.86, "grad_norm": 0.671875, "learning_rate": 0.00047988626624026687, "loss": 0.2248, "step": 93200 }, { "epoch": 3.86, "grad_norm": 0.66015625, "learning_rate": 0.00047988200402971905, "loss": 0.1858, "step": 93210 }, { "epoch": 3.86, "grad_norm": 0.0, "learning_rate": 0.000479877741386558, "loss": 0.1655, "step": 93220 }, { "epoch": 3.86, "grad_norm": 0.51171875, "learning_rate": 0.000479873478310792, "loss": 0.228, "step": 93230 }, { "epoch": 3.86, "grad_norm": 0.6015625, "learning_rate": 0.00047986921480242884, "loss": 0.1817, "step": 93240 }, { "epoch": 3.86, "grad_norm": 0.15625, "learning_rate": 0.0004798649508614767, "loss": 0.2103, "step": 93250 }, { "epoch": 3.86, "grad_norm": 0.82421875, "learning_rate": 0.00047986068648794356, "loss": 0.1628, "step": 93260 }, { "epoch": 3.86, "grad_norm": 0.310546875, "learning_rate": 0.0004798564216818374, "loss": 0.2155, "step": 93270 }, { "epoch": 3.86, "grad_norm": 1.375, "learning_rate": 0.00047985215644316627, "loss": 0.2557, "step": 93280 }, { "epoch": 3.86, "grad_norm": 0.94140625, "learning_rate": 0.0004798478907719382, "loss": 0.1992, "step": 93290 }, { "epoch": 3.86, "grad_norm": 0.6953125, "learning_rate": 0.00047984362466816125, "loss": 0.1969, "step": 93300 }, { "epoch": 3.86, "grad_norm": 0.435546875, "learning_rate": 0.0004798393581318433, "loss": 0.296, "step": 93310 }, { "epoch": 3.87, "grad_norm": 1.8203125, "learning_rate": 0.00047983509116299264, "loss": 0.1984, "step": 93320 }, { "epoch": 3.87, "grad_norm": 0.49609375, "learning_rate": 0.0004798308237616171, "loss": 0.1643, "step": 93330 }, { "epoch": 3.87, "grad_norm": 0.298828125, "learning_rate": 0.00047982655592772484, "loss": 0.2222, "step": 93340 }, { "epoch": 3.87, "grad_norm": 0.703125, "learning_rate": 0.0004798222876613237, "loss": 0.2176, "step": 93350 }, { "epoch": 3.87, "grad_norm": 0.6015625, "learning_rate": 0.00047981801896242193, "loss": 0.2266, "step": 93360 }, { "epoch": 3.87, "grad_norm": 0.365234375, "learning_rate": 0.00047981374983102745, "loss": 0.2057, "step": 93370 }, { "epoch": 3.87, "grad_norm": 0.87890625, "learning_rate": 0.00047980948026714825, "loss": 0.2252, "step": 93380 }, { "epoch": 3.87, "grad_norm": 0.486328125, "learning_rate": 0.0004798052102707925, "loss": 0.2391, "step": 93390 }, { "epoch": 3.87, "grad_norm": 1.1953125, "learning_rate": 0.00047980093984196815, "loss": 0.2326, "step": 93400 }, { "epoch": 3.87, "grad_norm": 0.6875, "learning_rate": 0.0004797966689806832, "loss": 0.2464, "step": 93410 }, { "epoch": 3.87, "grad_norm": 0.86328125, "learning_rate": 0.0004797923976869457, "loss": 0.1907, "step": 93420 }, { "epoch": 3.87, "grad_norm": 0.494140625, "learning_rate": 0.00047978812596076383, "loss": 0.1357, "step": 93430 }, { "epoch": 3.87, "grad_norm": 1.8671875, "learning_rate": 0.00047978385380214553, "loss": 0.2976, "step": 93440 }, { "epoch": 3.87, "grad_norm": 0.5546875, "learning_rate": 0.0004797795812110988, "loss": 0.1923, "step": 93450 }, { "epoch": 3.87, "grad_norm": 0.73046875, "learning_rate": 0.0004797753081876316, "loss": 0.2255, "step": 93460 }, { "epoch": 3.87, "grad_norm": 0.51171875, "learning_rate": 0.00047977103473175224, "loss": 0.186, "step": 93470 }, { "epoch": 3.87, "grad_norm": 0.478515625, "learning_rate": 0.0004797667608434685, "loss": 0.1452, "step": 93480 }, { "epoch": 3.87, "grad_norm": 0.93359375, "learning_rate": 0.00047976248652278853, "loss": 0.239, "step": 93490 }, { "epoch": 3.87, "grad_norm": 0.88671875, "learning_rate": 0.00047975821176972046, "loss": 0.2424, "step": 93500 }, { "epoch": 3.87, "grad_norm": 0.69140625, "learning_rate": 0.0004797539365842722, "loss": 0.1844, "step": 93510 }, { "epoch": 3.87, "grad_norm": 0.5234375, "learning_rate": 0.0004797496609664518, "loss": 0.2083, "step": 93520 }, { "epoch": 3.87, "grad_norm": 0.61328125, "learning_rate": 0.0004797453849162674, "loss": 0.2401, "step": 93530 }, { "epoch": 3.87, "grad_norm": 0.69140625, "learning_rate": 0.000479741108433727, "loss": 0.2085, "step": 93540 }, { "epoch": 3.87, "grad_norm": 0.56640625, "learning_rate": 0.0004797368315188386, "loss": 0.2105, "step": 93550 }, { "epoch": 3.88, "grad_norm": 0.84765625, "learning_rate": 0.0004797325541716103, "loss": 0.1706, "step": 93560 }, { "epoch": 3.88, "grad_norm": 0.2197265625, "learning_rate": 0.0004797282763920502, "loss": 0.1647, "step": 93570 }, { "epoch": 3.88, "grad_norm": 1.3046875, "learning_rate": 0.0004797239981801663, "loss": 0.1923, "step": 93580 }, { "epoch": 3.88, "grad_norm": 0.478515625, "learning_rate": 0.0004797197195359666, "loss": 0.2025, "step": 93590 }, { "epoch": 3.88, "grad_norm": 1.8125, "learning_rate": 0.00047971544045945913, "loss": 0.195, "step": 93600 }, { "epoch": 3.88, "grad_norm": 0.65234375, "learning_rate": 0.0004797111609506521, "loss": 0.2228, "step": 93610 }, { "epoch": 3.88, "grad_norm": 0.82421875, "learning_rate": 0.00047970688100955344, "loss": 0.149, "step": 93620 }, { "epoch": 3.88, "grad_norm": 0.345703125, "learning_rate": 0.00047970260063617126, "loss": 0.1841, "step": 93630 }, { "epoch": 3.88, "grad_norm": 0.5234375, "learning_rate": 0.0004796983198305136, "loss": 0.2262, "step": 93640 }, { "epoch": 3.88, "grad_norm": 1.09375, "learning_rate": 0.0004796940385925884, "loss": 0.1705, "step": 93650 }, { "epoch": 3.88, "grad_norm": 0.7890625, "learning_rate": 0.000479689756922404, "loss": 0.1382, "step": 93660 }, { "epoch": 3.88, "grad_norm": 1.65625, "learning_rate": 0.00047968547481996816, "loss": 0.2347, "step": 93670 }, { "epoch": 3.88, "grad_norm": 1.03125, "learning_rate": 0.00047968119228528907, "loss": 0.2359, "step": 93680 }, { "epoch": 3.88, "grad_norm": 0.1650390625, "learning_rate": 0.00047967690931837484, "loss": 0.252, "step": 93690 }, { "epoch": 3.88, "grad_norm": 1.40625, "learning_rate": 0.0004796726259192334, "loss": 0.3156, "step": 93700 }, { "epoch": 3.88, "grad_norm": 1.4765625, "learning_rate": 0.00047966834208787294, "loss": 0.2031, "step": 93710 }, { "epoch": 3.88, "grad_norm": 0.88671875, "learning_rate": 0.00047966405782430137, "loss": 0.1755, "step": 93720 }, { "epoch": 3.88, "grad_norm": 0.625, "learning_rate": 0.0004796597731285269, "loss": 0.1882, "step": 93730 }, { "epoch": 3.88, "grad_norm": 0.65625, "learning_rate": 0.0004796554880005576, "loss": 0.2042, "step": 93740 }, { "epoch": 3.88, "grad_norm": 0.1875, "learning_rate": 0.0004796512024404014, "loss": 0.2459, "step": 93750 }, { "epoch": 3.88, "grad_norm": 0.515625, "learning_rate": 0.0004796469164480664, "loss": 0.1968, "step": 93760 }, { "epoch": 3.88, "grad_norm": 0.83203125, "learning_rate": 0.0004796426300235608, "loss": 0.2006, "step": 93770 }, { "epoch": 3.88, "grad_norm": 0.7734375, "learning_rate": 0.0004796383431668925, "loss": 0.225, "step": 93780 }, { "epoch": 3.88, "grad_norm": 0.265625, "learning_rate": 0.00047963405587806964, "loss": 0.1946, "step": 93790 }, { "epoch": 3.89, "grad_norm": 0.37890625, "learning_rate": 0.00047962976815710035, "loss": 0.1848, "step": 93800 }, { "epoch": 3.89, "grad_norm": 0.279296875, "learning_rate": 0.0004796254800039925, "loss": 0.1854, "step": 93810 }, { "epoch": 3.89, "grad_norm": 0.71875, "learning_rate": 0.0004796211914187545, "loss": 0.2126, "step": 93820 }, { "epoch": 3.89, "grad_norm": 0.6796875, "learning_rate": 0.00047961690240139404, "loss": 0.2141, "step": 93830 }, { "epoch": 3.89, "grad_norm": 0.796875, "learning_rate": 0.00047961261295191945, "loss": 0.1651, "step": 93840 }, { "epoch": 3.89, "grad_norm": 0.63671875, "learning_rate": 0.0004796083230703386, "loss": 0.2445, "step": 93850 }, { "epoch": 3.89, "grad_norm": 0.69140625, "learning_rate": 0.00047960403275665986, "loss": 0.212, "step": 93860 }, { "epoch": 3.89, "grad_norm": 0.375, "learning_rate": 0.00047959974201089103, "loss": 0.2307, "step": 93870 }, { "epoch": 3.89, "grad_norm": 0.9453125, "learning_rate": 0.0004795954508330403, "loss": 0.2421, "step": 93880 }, { "epoch": 3.89, "grad_norm": 1.8046875, "learning_rate": 0.00047959115922311567, "loss": 0.1933, "step": 93890 }, { "epoch": 3.89, "grad_norm": 0.0, "learning_rate": 0.0004795868671811253, "loss": 0.2009, "step": 93900 }, { "epoch": 3.89, "grad_norm": 0.0, "learning_rate": 0.00047958257470707733, "loss": 0.1973, "step": 93910 }, { "epoch": 3.89, "grad_norm": 0.54296875, "learning_rate": 0.0004795782818009796, "loss": 0.1957, "step": 93920 }, { "epoch": 3.89, "grad_norm": 0.9765625, "learning_rate": 0.00047957398846284045, "loss": 0.2546, "step": 93930 }, { "epoch": 3.89, "grad_norm": 0.349609375, "learning_rate": 0.0004795696946926678, "loss": 0.2413, "step": 93940 }, { "epoch": 3.89, "grad_norm": 1.140625, "learning_rate": 0.00047956540049046983, "loss": 0.236, "step": 93950 }, { "epoch": 3.89, "grad_norm": 0.48828125, "learning_rate": 0.00047956110585625447, "loss": 0.2271, "step": 93960 }, { "epoch": 3.89, "grad_norm": 0.796875, "learning_rate": 0.00047955681079003, "loss": 0.1911, "step": 93970 }, { "epoch": 3.89, "grad_norm": 0.0, "learning_rate": 0.00047955251529180435, "loss": 0.1778, "step": 93980 }, { "epoch": 3.89, "grad_norm": 0.5625, "learning_rate": 0.00047954821936158564, "loss": 0.2207, "step": 93990 }, { "epoch": 3.89, "grad_norm": 2.890625, "learning_rate": 0.000479543922999382, "loss": 0.1507, "step": 94000 }, { "epoch": 3.89, "grad_norm": 0.63671875, "learning_rate": 0.0004795396262052014, "loss": 0.2273, "step": 94010 }, { "epoch": 3.89, "grad_norm": 0.90625, "learning_rate": 0.0004795353289790521, "loss": 0.233, "step": 94020 }, { "epoch": 3.89, "grad_norm": 0.921875, "learning_rate": 0.0004795310313209421, "loss": 0.2481, "step": 94030 }, { "epoch": 3.9, "grad_norm": 0.73828125, "learning_rate": 0.00047952673323087947, "loss": 0.1862, "step": 94040 }, { "epoch": 3.9, "grad_norm": 0.4453125, "learning_rate": 0.00047952243470887233, "loss": 0.1889, "step": 94050 }, { "epoch": 3.9, "grad_norm": 0.8125, "learning_rate": 0.00047951813575492874, "loss": 0.2258, "step": 94060 }, { "epoch": 3.9, "grad_norm": 0.671875, "learning_rate": 0.0004795138363690568, "loss": 0.2156, "step": 94070 }, { "epoch": 3.9, "grad_norm": 0.7421875, "learning_rate": 0.0004795095365512646, "loss": 0.2218, "step": 94080 }, { "epoch": 3.9, "grad_norm": 0.76171875, "learning_rate": 0.0004795052363015602, "loss": 0.2273, "step": 94090 }, { "epoch": 3.9, "grad_norm": 0.6015625, "learning_rate": 0.0004795009356199518, "loss": 0.2562, "step": 94100 }, { "epoch": 3.9, "grad_norm": 0.314453125, "learning_rate": 0.00047949663450644743, "loss": 0.208, "step": 94110 }, { "epoch": 3.9, "grad_norm": 0.625, "learning_rate": 0.0004794923329610551, "loss": 0.2059, "step": 94120 }, { "epoch": 3.9, "grad_norm": 0.5390625, "learning_rate": 0.000479488030983783, "loss": 0.1836, "step": 94130 }, { "epoch": 3.9, "grad_norm": 0.80859375, "learning_rate": 0.00047948372857463926, "loss": 0.1775, "step": 94140 }, { "epoch": 3.9, "grad_norm": 0.94140625, "learning_rate": 0.0004794794257336319, "loss": 0.2109, "step": 94150 }, { "epoch": 3.9, "grad_norm": 0.6484375, "learning_rate": 0.00047947512246076905, "loss": 0.2463, "step": 94160 }, { "epoch": 3.9, "grad_norm": 1.4765625, "learning_rate": 0.0004794708187560588, "loss": 0.2215, "step": 94170 }, { "epoch": 3.9, "grad_norm": 0.4140625, "learning_rate": 0.00047946651461950923, "loss": 0.2, "step": 94180 }, { "epoch": 3.9, "grad_norm": 0.546875, "learning_rate": 0.00047946221005112845, "loss": 0.2445, "step": 94190 }, { "epoch": 3.9, "grad_norm": 0.62890625, "learning_rate": 0.00047945790505092464, "loss": 0.222, "step": 94200 }, { "epoch": 3.9, "grad_norm": 0.625, "learning_rate": 0.00047945359961890576, "loss": 0.2055, "step": 94210 }, { "epoch": 3.9, "grad_norm": 0.9453125, "learning_rate": 0.00047944929375508, "loss": 0.2369, "step": 94220 }, { "epoch": 3.9, "grad_norm": 0.4140625, "learning_rate": 0.0004794449874594555, "loss": 0.2381, "step": 94230 }, { "epoch": 3.9, "grad_norm": 0.376953125, "learning_rate": 0.00047944068073204027, "loss": 0.1836, "step": 94240 }, { "epoch": 3.9, "grad_norm": 0.375, "learning_rate": 0.00047943637357284244, "loss": 0.2055, "step": 94250 }, { "epoch": 3.9, "grad_norm": 1.4375, "learning_rate": 0.00047943206598187015, "loss": 0.2166, "step": 94260 }, { "epoch": 3.9, "grad_norm": 1.109375, "learning_rate": 0.0004794277579591315, "loss": 0.2104, "step": 94270 }, { "epoch": 3.91, "grad_norm": 1.234375, "learning_rate": 0.00047942344950463456, "loss": 0.2353, "step": 94280 }, { "epoch": 3.91, "grad_norm": 0.69140625, "learning_rate": 0.0004794191406183874, "loss": 0.2344, "step": 94290 }, { "epoch": 3.91, "grad_norm": 0.83203125, "learning_rate": 0.0004794148313003983, "loss": 0.2139, "step": 94300 }, { "epoch": 3.91, "grad_norm": 0.53515625, "learning_rate": 0.0004794105215506752, "loss": 0.1673, "step": 94310 }, { "epoch": 3.91, "grad_norm": 1.03125, "learning_rate": 0.00047940621136922636, "loss": 0.1918, "step": 94320 }, { "epoch": 3.91, "grad_norm": 0.65625, "learning_rate": 0.0004794019007560597, "loss": 0.1871, "step": 94330 }, { "epoch": 3.91, "grad_norm": 0.494140625, "learning_rate": 0.00047939758971118354, "loss": 0.2221, "step": 94340 }, { "epoch": 3.91, "grad_norm": 0.875, "learning_rate": 0.00047939327823460576, "loss": 0.1668, "step": 94350 }, { "epoch": 3.91, "grad_norm": 0.71484375, "learning_rate": 0.00047938896632633473, "loss": 0.2783, "step": 94360 }, { "epoch": 3.91, "grad_norm": 0.3125, "learning_rate": 0.00047938465398637836, "loss": 0.2076, "step": 94370 }, { "epoch": 3.91, "grad_norm": 0.6171875, "learning_rate": 0.00047938034121474483, "loss": 0.2169, "step": 94380 }, { "epoch": 3.91, "grad_norm": 0.59765625, "learning_rate": 0.0004793760280114423, "loss": 0.1896, "step": 94390 }, { "epoch": 3.91, "grad_norm": 0.74609375, "learning_rate": 0.00047937171437647885, "loss": 0.2121, "step": 94400 }, { "epoch": 3.91, "grad_norm": 1.0546875, "learning_rate": 0.0004793674003098627, "loss": 0.2539, "step": 94410 }, { "epoch": 3.91, "grad_norm": 0.3671875, "learning_rate": 0.0004793630858116017, "loss": 0.2197, "step": 94420 }, { "epoch": 3.91, "grad_norm": 1.2578125, "learning_rate": 0.00047935877088170427, "loss": 0.1668, "step": 94430 }, { "epoch": 3.91, "grad_norm": 0.412109375, "learning_rate": 0.0004793544555201783, "loss": 0.2257, "step": 94440 }, { "epoch": 3.91, "grad_norm": 0.37890625, "learning_rate": 0.0004793501397270321, "loss": 0.2531, "step": 94450 }, { "epoch": 3.91, "grad_norm": 0.47265625, "learning_rate": 0.00047934582350227375, "loss": 0.2353, "step": 94460 }, { "epoch": 3.91, "grad_norm": 0.78515625, "learning_rate": 0.00047934150684591115, "loss": 0.205, "step": 94470 }, { "epoch": 3.91, "grad_norm": 0.9140625, "learning_rate": 0.0004793371897579527, "loss": 0.2574, "step": 94480 }, { "epoch": 3.91, "grad_norm": 0.74609375, "learning_rate": 0.0004793328722384065, "loss": 0.2102, "step": 94490 }, { "epoch": 3.91, "grad_norm": 0.55859375, "learning_rate": 0.0004793285542872805, "loss": 0.2137, "step": 94500 }, { "epoch": 3.91, "grad_norm": 0.6015625, "learning_rate": 0.00047932423590458297, "loss": 0.1959, "step": 94510 }, { "epoch": 3.92, "grad_norm": 0.287109375, "learning_rate": 0.00047931991709032195, "loss": 0.2566, "step": 94520 }, { "epoch": 3.92, "grad_norm": 1.0078125, "learning_rate": 0.00047931559784450563, "loss": 0.2623, "step": 94530 }, { "epoch": 3.92, "grad_norm": 0.55859375, "learning_rate": 0.00047931127816714216, "loss": 0.2681, "step": 94540 }, { "epoch": 3.92, "grad_norm": 0.6875, "learning_rate": 0.00047930695805823955, "loss": 0.2346, "step": 94550 }, { "epoch": 3.92, "grad_norm": 0.7265625, "learning_rate": 0.0004793026375178061, "loss": 0.2204, "step": 94560 }, { "epoch": 3.92, "grad_norm": 1.0234375, "learning_rate": 0.00047929831654584977, "loss": 0.2283, "step": 94570 }, { "epoch": 3.92, "grad_norm": 0.455078125, "learning_rate": 0.0004792939951423788, "loss": 0.1558, "step": 94580 }, { "epoch": 3.92, "grad_norm": 2.984375, "learning_rate": 0.0004792896733074013, "loss": 0.2243, "step": 94590 }, { "epoch": 3.92, "grad_norm": 0.37890625, "learning_rate": 0.0004792853510409254, "loss": 0.1984, "step": 94600 }, { "epoch": 3.92, "grad_norm": 1.25, "learning_rate": 0.00047928102834295924, "loss": 0.1868, "step": 94610 }, { "epoch": 3.92, "grad_norm": 0.87109375, "learning_rate": 0.0004792767052135109, "loss": 0.1949, "step": 94620 }, { "epoch": 3.92, "grad_norm": 0.8359375, "learning_rate": 0.0004792723816525886, "loss": 0.2499, "step": 94630 }, { "epoch": 3.92, "grad_norm": 0.7265625, "learning_rate": 0.00047926805766020043, "loss": 0.215, "step": 94640 }, { "epoch": 3.92, "grad_norm": 0.458984375, "learning_rate": 0.0004792637332363545, "loss": 0.1927, "step": 94650 }, { "epoch": 3.92, "grad_norm": 0.416015625, "learning_rate": 0.00047925940838105895, "loss": 0.1876, "step": 94660 }, { "epoch": 3.92, "grad_norm": 0.92578125, "learning_rate": 0.0004792550830943221, "loss": 0.2193, "step": 94670 }, { "epoch": 3.92, "grad_norm": 0.41015625, "learning_rate": 0.00047925075737615183, "loss": 0.2123, "step": 94680 }, { "epoch": 3.92, "grad_norm": 1.2734375, "learning_rate": 0.00047924643122655643, "loss": 0.1724, "step": 94690 }, { "epoch": 3.92, "grad_norm": 0.45703125, "learning_rate": 0.00047924210464554397, "loss": 0.1955, "step": 94700 }, { "epoch": 3.92, "grad_norm": 0.81640625, "learning_rate": 0.0004792377776331226, "loss": 0.2064, "step": 94710 }, { "epoch": 3.92, "grad_norm": 0.7421875, "learning_rate": 0.0004792334501893005, "loss": 0.2299, "step": 94720 }, { "epoch": 3.92, "grad_norm": 0.8125, "learning_rate": 0.0004792291223140859, "loss": 0.2001, "step": 94730 }, { "epoch": 3.92, "grad_norm": 0.373046875, "learning_rate": 0.00047922479400748676, "loss": 0.2401, "step": 94740 }, { "epoch": 3.92, "grad_norm": 0.6953125, "learning_rate": 0.0004792204652695114, "loss": 0.1966, "step": 94750 }, { "epoch": 3.92, "grad_norm": 0.36328125, "learning_rate": 0.00047921613610016773, "loss": 0.2004, "step": 94760 }, { "epoch": 3.93, "grad_norm": 0.5078125, "learning_rate": 0.00047921180649946417, "loss": 0.1987, "step": 94770 }, { "epoch": 3.93, "grad_norm": 0.58984375, "learning_rate": 0.00047920747646740865, "loss": 0.2246, "step": 94780 }, { "epoch": 3.93, "grad_norm": 1.1640625, "learning_rate": 0.0004792031460040095, "loss": 0.2764, "step": 94790 }, { "epoch": 3.93, "grad_norm": 0.37109375, "learning_rate": 0.00047919881510927463, "loss": 0.2536, "step": 94800 }, { "epoch": 3.93, "grad_norm": 0.9375, "learning_rate": 0.0004791944837832125, "loss": 0.2322, "step": 94810 }, { "epoch": 3.93, "grad_norm": 1.078125, "learning_rate": 0.00047919015202583105, "loss": 0.2299, "step": 94820 }, { "epoch": 3.93, "grad_norm": 0.408203125, "learning_rate": 0.00047918581983713847, "loss": 0.2057, "step": 94830 }, { "epoch": 3.93, "grad_norm": 0.83984375, "learning_rate": 0.00047918148721714295, "loss": 0.207, "step": 94840 }, { "epoch": 3.93, "grad_norm": 0.5234375, "learning_rate": 0.0004791771541658526, "loss": 0.2406, "step": 94850 }, { "epoch": 3.93, "grad_norm": 0.75, "learning_rate": 0.0004791728206832756, "loss": 0.2186, "step": 94860 }, { "epoch": 3.93, "grad_norm": 1.1171875, "learning_rate": 0.00047916848676942016, "loss": 0.2346, "step": 94870 }, { "epoch": 3.93, "grad_norm": 1.1953125, "learning_rate": 0.00047916415242429435, "loss": 0.212, "step": 94880 }, { "epoch": 3.93, "grad_norm": 1.0, "learning_rate": 0.0004791598176479063, "loss": 0.2217, "step": 94890 }, { "epoch": 3.93, "grad_norm": 1.2734375, "learning_rate": 0.0004791554824402642, "loss": 0.2064, "step": 94900 }, { "epoch": 3.93, "grad_norm": 0.38671875, "learning_rate": 0.00047915114680137627, "loss": 0.2713, "step": 94910 }, { "epoch": 3.93, "grad_norm": 0.240234375, "learning_rate": 0.00047914681073125064, "loss": 0.2294, "step": 94920 }, { "epoch": 3.93, "grad_norm": 1.1640625, "learning_rate": 0.0004791424742298955, "loss": 0.2238, "step": 94930 }, { "epoch": 3.93, "grad_norm": 0.9921875, "learning_rate": 0.0004791381372973189, "loss": 0.2292, "step": 94940 }, { "epoch": 3.93, "grad_norm": 1.4609375, "learning_rate": 0.0004791337999335291, "loss": 0.2823, "step": 94950 }, { "epoch": 3.93, "grad_norm": 0.78515625, "learning_rate": 0.00047912946213853427, "loss": 0.1927, "step": 94960 }, { "epoch": 3.93, "grad_norm": 0.5625, "learning_rate": 0.0004791251239123424, "loss": 0.2365, "step": 94970 }, { "epoch": 3.93, "grad_norm": 0.6796875, "learning_rate": 0.00047912078525496195, "loss": 0.1591, "step": 94980 }, { "epoch": 3.93, "grad_norm": 1.5546875, "learning_rate": 0.00047911644616640083, "loss": 0.2215, "step": 94990 }, { "epoch": 3.93, "grad_norm": 0.859375, "learning_rate": 0.0004791121066466673, "loss": 0.1557, "step": 95000 }, { "epoch": 3.94, "grad_norm": 0.453125, "learning_rate": 0.0004791077666957695, "loss": 0.1949, "step": 95010 }, { "epoch": 3.94, "grad_norm": 0.8046875, "learning_rate": 0.0004791034263137157, "loss": 0.2064, "step": 95020 }, { "epoch": 3.94, "grad_norm": 1.6796875, "learning_rate": 0.00047909908550051403, "loss": 0.1925, "step": 95030 }, { "epoch": 3.94, "grad_norm": 1.0234375, "learning_rate": 0.0004790947442561725, "loss": 0.1848, "step": 95040 }, { "epoch": 3.94, "grad_norm": 1.96875, "learning_rate": 0.00047909040258069946, "loss": 0.2188, "step": 95050 }, { "epoch": 3.94, "grad_norm": 0.5859375, "learning_rate": 0.0004790860604741031, "loss": 0.2788, "step": 95060 }, { "epoch": 3.94, "grad_norm": 0.6875, "learning_rate": 0.00047908171793639145, "loss": 0.2255, "step": 95070 }, { "epoch": 3.94, "grad_norm": 1.2734375, "learning_rate": 0.0004790773749675726, "loss": 0.2542, "step": 95080 }, { "epoch": 3.94, "grad_norm": 1.5390625, "learning_rate": 0.00047907303156765506, "loss": 0.1881, "step": 95090 }, { "epoch": 3.94, "grad_norm": 0.77734375, "learning_rate": 0.0004790686877366468, "loss": 0.1773, "step": 95100 }, { "epoch": 3.94, "grad_norm": 0.703125, "learning_rate": 0.00047906434347455595, "loss": 0.1991, "step": 95110 }, { "epoch": 3.94, "grad_norm": 1.4140625, "learning_rate": 0.0004790599987813907, "loss": 0.2248, "step": 95120 }, { "epoch": 3.94, "grad_norm": 1.296875, "learning_rate": 0.00047905565365715936, "loss": 0.2263, "step": 95130 }, { "epoch": 3.94, "grad_norm": 0.5, "learning_rate": 0.00047905130810186995, "loss": 0.2168, "step": 95140 }, { "epoch": 3.94, "grad_norm": 0.61328125, "learning_rate": 0.0004790469621155308, "loss": 0.2362, "step": 95150 }, { "epoch": 3.94, "grad_norm": 0.625, "learning_rate": 0.0004790426156981499, "loss": 0.2153, "step": 95160 }, { "epoch": 3.94, "grad_norm": 0.9375, "learning_rate": 0.00047903826884973554, "loss": 0.1941, "step": 95170 }, { "epoch": 3.94, "grad_norm": 0.81640625, "learning_rate": 0.000479033921570296, "loss": 0.2466, "step": 95180 }, { "epoch": 3.94, "grad_norm": 0.416015625, "learning_rate": 0.00047902957385983927, "loss": 0.2486, "step": 95190 }, { "epoch": 3.94, "grad_norm": 0.53125, "learning_rate": 0.0004790252257183736, "loss": 0.2543, "step": 95200 }, { "epoch": 3.94, "grad_norm": 0.7109375, "learning_rate": 0.00047902087714590726, "loss": 0.2503, "step": 95210 }, { "epoch": 3.94, "grad_norm": 0.984375, "learning_rate": 0.0004790165281424483, "loss": 0.1779, "step": 95220 }, { "epoch": 3.94, "grad_norm": 1.15625, "learning_rate": 0.00047901217870800495, "loss": 0.1904, "step": 95230 }, { "epoch": 3.94, "grad_norm": 0.84375, "learning_rate": 0.00047900782884258543, "loss": 0.1349, "step": 95240 }, { "epoch": 3.95, "grad_norm": 1.609375, "learning_rate": 0.000479003478546198, "loss": 0.3005, "step": 95250 }, { "epoch": 3.95, "grad_norm": 1.0234375, "learning_rate": 0.0004789991278188506, "loss": 0.2296, "step": 95260 }, { "epoch": 3.95, "grad_norm": 0.546875, "learning_rate": 0.0004789947766605517, "loss": 0.2726, "step": 95270 }, { "epoch": 3.95, "grad_norm": 0.6796875, "learning_rate": 0.0004789904250713093, "loss": 0.2494, "step": 95280 }, { "epoch": 3.95, "grad_norm": 0.291015625, "learning_rate": 0.00047898607305113164, "loss": 0.2182, "step": 95290 }, { "epoch": 3.95, "grad_norm": 0.55859375, "learning_rate": 0.0004789817206000269, "loss": 0.1665, "step": 95300 }, { "epoch": 3.95, "grad_norm": 0.4765625, "learning_rate": 0.0004789773677180033, "loss": 0.1808, "step": 95310 }, { "epoch": 3.95, "grad_norm": 1.515625, "learning_rate": 0.0004789730144050691, "loss": 0.2249, "step": 95320 }, { "epoch": 3.95, "grad_norm": 0.9296875, "learning_rate": 0.00047896866066123234, "loss": 0.2623, "step": 95330 }, { "epoch": 3.95, "grad_norm": 0.55859375, "learning_rate": 0.00047896430648650123, "loss": 0.2298, "step": 95340 }, { "epoch": 3.95, "grad_norm": 1.0703125, "learning_rate": 0.00047895995188088417, "loss": 0.1698, "step": 95350 }, { "epoch": 3.95, "grad_norm": 0.6484375, "learning_rate": 0.0004789555968443891, "loss": 0.1783, "step": 95360 }, { "epoch": 3.95, "grad_norm": 0.9140625, "learning_rate": 0.0004789512413770244, "loss": 0.2562, "step": 95370 }, { "epoch": 3.95, "grad_norm": 0.77734375, "learning_rate": 0.0004789468854787981, "loss": 0.2128, "step": 95380 }, { "epoch": 3.95, "grad_norm": 1.671875, "learning_rate": 0.00047894252914971845, "loss": 0.1672, "step": 95390 }, { "epoch": 3.95, "grad_norm": 0.404296875, "learning_rate": 0.00047893817238979383, "loss": 0.2229, "step": 95400 }, { "epoch": 3.95, "grad_norm": 0.546875, "learning_rate": 0.0004789338151990322, "loss": 0.1656, "step": 95410 }, { "epoch": 3.95, "grad_norm": 0.6640625, "learning_rate": 0.0004789294575774419, "loss": 0.1984, "step": 95420 }, { "epoch": 3.95, "grad_norm": 0.58984375, "learning_rate": 0.00047892509952503107, "loss": 0.2329, "step": 95430 }, { "epoch": 3.95, "grad_norm": 0.5078125, "learning_rate": 0.00047892074104180786, "loss": 0.2381, "step": 95440 }, { "epoch": 3.95, "grad_norm": 0.68359375, "learning_rate": 0.00047891638212778066, "loss": 0.227, "step": 95450 }, { "epoch": 3.95, "grad_norm": 1.1875, "learning_rate": 0.00047891202278295744, "loss": 0.2382, "step": 95460 }, { "epoch": 3.95, "grad_norm": 0.45703125, "learning_rate": 0.00047890766300734655, "loss": 0.2429, "step": 95470 }, { "epoch": 3.95, "grad_norm": 0.83984375, "learning_rate": 0.00047890330280095616, "loss": 0.1745, "step": 95480 }, { "epoch": 3.96, "grad_norm": 0.58984375, "learning_rate": 0.00047889894216379447, "loss": 0.1883, "step": 95490 }, { "epoch": 3.96, "grad_norm": 0.40234375, "learning_rate": 0.0004788945810958697, "loss": 0.1921, "step": 95500 }, { "epoch": 3.96, "grad_norm": 1.859375, "learning_rate": 0.00047889021959719003, "loss": 0.2077, "step": 95510 }, { "epoch": 3.96, "grad_norm": 0.33984375, "learning_rate": 0.0004788858576677637, "loss": 0.2218, "step": 95520 }, { "epoch": 3.96, "grad_norm": 1.5, "learning_rate": 0.0004788814953075989, "loss": 0.2093, "step": 95530 }, { "epoch": 3.96, "grad_norm": 1.5625, "learning_rate": 0.00047887713251670383, "loss": 0.2197, "step": 95540 }, { "epoch": 3.96, "grad_norm": 0.765625, "learning_rate": 0.0004788727692950867, "loss": 0.2021, "step": 95550 }, { "epoch": 3.96, "grad_norm": 1.0234375, "learning_rate": 0.0004788684056427558, "loss": 0.2042, "step": 95560 }, { "epoch": 3.96, "grad_norm": 0.35546875, "learning_rate": 0.00047886404155971917, "loss": 0.1604, "step": 95570 }, { "epoch": 3.96, "grad_norm": 0.455078125, "learning_rate": 0.0004788596770459852, "loss": 0.1723, "step": 95580 }, { "epoch": 3.96, "grad_norm": 0.61328125, "learning_rate": 0.0004788553121015621, "loss": 0.2323, "step": 95590 }, { "epoch": 3.96, "grad_norm": 0.609375, "learning_rate": 0.00047885094672645793, "loss": 0.2336, "step": 95600 }, { "epoch": 3.96, "grad_norm": 0.45703125, "learning_rate": 0.00047884658092068103, "loss": 0.2039, "step": 95610 }, { "epoch": 3.96, "grad_norm": 0.515625, "learning_rate": 0.0004788422146842395, "loss": 0.1875, "step": 95620 }, { "epoch": 3.96, "grad_norm": 1.40625, "learning_rate": 0.0004788378480171417, "loss": 0.1823, "step": 95630 }, { "epoch": 3.96, "grad_norm": 0.2470703125, "learning_rate": 0.0004788334809193958, "loss": 0.2119, "step": 95640 }, { "epoch": 3.96, "grad_norm": 0.81640625, "learning_rate": 0.00047882911339100997, "loss": 0.25, "step": 95650 }, { "epoch": 3.96, "grad_norm": 0.380859375, "learning_rate": 0.0004788247454319924, "loss": 0.243, "step": 95660 }, { "epoch": 3.96, "grad_norm": 0.1474609375, "learning_rate": 0.00047882037704235147, "loss": 0.2493, "step": 95670 }, { "epoch": 3.96, "grad_norm": 0.84375, "learning_rate": 0.0004788160082220953, "loss": 0.1804, "step": 95680 }, { "epoch": 3.96, "grad_norm": 0.484375, "learning_rate": 0.00047881163897123204, "loss": 0.2035, "step": 95690 }, { "epoch": 3.96, "grad_norm": 0.400390625, "learning_rate": 0.00047880726928977005, "loss": 0.1895, "step": 95700 }, { "epoch": 3.96, "grad_norm": 0.13671875, "learning_rate": 0.00047880289917771743, "loss": 0.1624, "step": 95710 }, { "epoch": 3.96, "grad_norm": 0.70703125, "learning_rate": 0.0004787985286350825, "loss": 0.2362, "step": 95720 }, { "epoch": 3.97, "grad_norm": 0.0, "learning_rate": 0.0004787941576618734, "loss": 0.2328, "step": 95730 }, { "epoch": 3.97, "grad_norm": 0.7578125, "learning_rate": 0.0004787897862580984, "loss": 0.245, "step": 95740 }, { "epoch": 3.97, "grad_norm": 0.54296875, "learning_rate": 0.00047878541442376583, "loss": 0.2419, "step": 95750 }, { "epoch": 3.97, "grad_norm": 0.06640625, "learning_rate": 0.00047878104215888376, "loss": 0.1764, "step": 95760 }, { "epoch": 3.97, "grad_norm": 1.734375, "learning_rate": 0.00047877666946346046, "loss": 0.2214, "step": 95770 }, { "epoch": 3.97, "grad_norm": 1.375, "learning_rate": 0.0004787722963375042, "loss": 0.2177, "step": 95780 }, { "epoch": 3.97, "grad_norm": 0.96484375, "learning_rate": 0.00047876792278102315, "loss": 0.2367, "step": 95790 }, { "epoch": 3.97, "grad_norm": 0.94921875, "learning_rate": 0.00047876354879402563, "loss": 0.2142, "step": 95800 }, { "epoch": 3.97, "grad_norm": 0.5703125, "learning_rate": 0.0004787591743765197, "loss": 0.2213, "step": 95810 }, { "epoch": 3.97, "grad_norm": 1.71875, "learning_rate": 0.0004787547995285139, "loss": 0.2259, "step": 95820 }, { "epoch": 3.97, "grad_norm": 0.6015625, "learning_rate": 0.0004787504242500161, "loss": 0.228, "step": 95830 }, { "epoch": 3.97, "grad_norm": 0.44140625, "learning_rate": 0.00047874604854103474, "loss": 0.1561, "step": 95840 }, { "epoch": 3.97, "grad_norm": 0.7890625, "learning_rate": 0.0004787416724015781, "loss": 0.1995, "step": 95850 }, { "epoch": 3.97, "grad_norm": 0.8671875, "learning_rate": 0.0004787372958316543, "loss": 0.2268, "step": 95860 }, { "epoch": 3.97, "grad_norm": 0.6953125, "learning_rate": 0.00047873291883127154, "loss": 0.2381, "step": 95870 }, { "epoch": 3.97, "grad_norm": 0.455078125, "learning_rate": 0.00047872854140043814, "loss": 0.2435, "step": 95880 }, { "epoch": 3.97, "grad_norm": 0.51953125, "learning_rate": 0.0004787241635391624, "loss": 0.1964, "step": 95890 }, { "epoch": 3.97, "grad_norm": 0.51171875, "learning_rate": 0.0004787197852474524, "loss": 0.2132, "step": 95900 }, { "epoch": 3.97, "grad_norm": 0.314453125, "learning_rate": 0.00047871540652531656, "loss": 0.2206, "step": 95910 }, { "epoch": 3.97, "grad_norm": 0.7265625, "learning_rate": 0.00047871102737276296, "loss": 0.2072, "step": 95920 }, { "epoch": 3.97, "grad_norm": 0.462890625, "learning_rate": 0.0004787066477897999, "loss": 0.1953, "step": 95930 }, { "epoch": 3.97, "grad_norm": 0.400390625, "learning_rate": 0.00047870226777643564, "loss": 0.1754, "step": 95940 }, { "epoch": 3.97, "grad_norm": 0.84375, "learning_rate": 0.0004786978873326784, "loss": 0.26, "step": 95950 }, { "epoch": 3.97, "grad_norm": 0.19921875, "learning_rate": 0.00047869350645853644, "loss": 0.2152, "step": 95960 }, { "epoch": 3.98, "grad_norm": 0.78515625, "learning_rate": 0.000478689125154018, "loss": 0.2301, "step": 95970 }, { "epoch": 3.98, "grad_norm": 0.70703125, "learning_rate": 0.0004786847434191314, "loss": 0.2174, "step": 95980 }, { "epoch": 3.98, "grad_norm": 0.70703125, "learning_rate": 0.0004786803612538847, "loss": 0.2033, "step": 95990 }, { "epoch": 3.98, "grad_norm": 0.6484375, "learning_rate": 0.00047867597865828627, "loss": 0.284, "step": 96000 }, { "epoch": 3.98, "grad_norm": 0.97265625, "learning_rate": 0.00047867159563234435, "loss": 0.2324, "step": 96010 }, { "epoch": 3.98, "grad_norm": 1.609375, "learning_rate": 0.00047866721217606725, "loss": 0.2123, "step": 96020 }, { "epoch": 3.98, "grad_norm": 1.3203125, "learning_rate": 0.00047866282828946304, "loss": 0.2034, "step": 96030 }, { "epoch": 3.98, "grad_norm": 0.61328125, "learning_rate": 0.0004786584439725401, "loss": 0.1649, "step": 96040 }, { "epoch": 3.98, "grad_norm": 0.61328125, "learning_rate": 0.0004786540592253067, "loss": 0.1973, "step": 96050 }, { "epoch": 3.98, "grad_norm": 0.62109375, "learning_rate": 0.0004786496740477711, "loss": 0.1433, "step": 96060 }, { "epoch": 3.98, "grad_norm": 3.078125, "learning_rate": 0.0004786452884399415, "loss": 0.1864, "step": 96070 }, { "epoch": 3.98, "grad_norm": 0.4609375, "learning_rate": 0.00047864090240182607, "loss": 0.2031, "step": 96080 }, { "epoch": 3.98, "grad_norm": 0.8828125, "learning_rate": 0.00047863651593343313, "loss": 0.1531, "step": 96090 }, { "epoch": 3.98, "grad_norm": 0.83984375, "learning_rate": 0.00047863212903477103, "loss": 0.2267, "step": 96100 }, { "epoch": 3.98, "grad_norm": 1.2421875, "learning_rate": 0.000478627741705848, "loss": 0.1987, "step": 96110 }, { "epoch": 3.98, "grad_norm": 0.73046875, "learning_rate": 0.0004786233539466722, "loss": 0.2287, "step": 96120 }, { "epoch": 3.98, "grad_norm": 0.68359375, "learning_rate": 0.00047861896575725194, "loss": 0.2399, "step": 96130 }, { "epoch": 3.98, "grad_norm": 1.71875, "learning_rate": 0.0004786145771375955, "loss": 0.2105, "step": 96140 }, { "epoch": 3.98, "grad_norm": 1.0859375, "learning_rate": 0.00047861018808771107, "loss": 0.2341, "step": 96150 }, { "epoch": 3.98, "grad_norm": 0.78125, "learning_rate": 0.00047860579860760697, "loss": 0.1793, "step": 96160 }, { "epoch": 3.98, "grad_norm": 0.83203125, "learning_rate": 0.0004786014086972914, "loss": 0.2035, "step": 96170 }, { "epoch": 3.98, "grad_norm": 1.0, "learning_rate": 0.00047859701835677274, "loss": 0.2291, "step": 96180 }, { "epoch": 3.98, "grad_norm": 0.63671875, "learning_rate": 0.00047859262758605914, "loss": 0.1686, "step": 96190 }, { "epoch": 3.98, "grad_norm": 0.73046875, "learning_rate": 0.0004785882363851589, "loss": 0.2107, "step": 96200 }, { "epoch": 3.99, "grad_norm": 0.95703125, "learning_rate": 0.0004785838447540803, "loss": 0.2253, "step": 96210 }, { "epoch": 3.99, "grad_norm": 1.0859375, "learning_rate": 0.00047857945269283154, "loss": 0.2093, "step": 96220 }, { "epoch": 3.99, "grad_norm": 0.59375, "learning_rate": 0.000478575060201421, "loss": 0.1932, "step": 96230 }, { "epoch": 3.99, "grad_norm": 0.796875, "learning_rate": 0.00047857066727985685, "loss": 0.1899, "step": 96240 }, { "epoch": 3.99, "grad_norm": 1.5546875, "learning_rate": 0.00047856627392814746, "loss": 0.2065, "step": 96250 }, { "epoch": 3.99, "grad_norm": 0.8984375, "learning_rate": 0.00047856188014630085, "loss": 0.1961, "step": 96260 }, { "epoch": 3.99, "grad_norm": 0.77734375, "learning_rate": 0.0004785574859343256, "loss": 0.2163, "step": 96270 }, { "epoch": 3.99, "grad_norm": 1.1796875, "learning_rate": 0.00047855309129222985, "loss": 0.287, "step": 96280 }, { "epoch": 3.99, "grad_norm": 0.51953125, "learning_rate": 0.0004785486962200218, "loss": 0.1968, "step": 96290 }, { "epoch": 3.99, "grad_norm": 1.265625, "learning_rate": 0.0004785443007177098, "loss": 0.2523, "step": 96300 }, { "epoch": 3.99, "grad_norm": 0.60546875, "learning_rate": 0.0004785399047853022, "loss": 0.2017, "step": 96310 }, { "epoch": 3.99, "grad_norm": 0.15234375, "learning_rate": 0.00047853550842280704, "loss": 0.2056, "step": 96320 }, { "epoch": 3.99, "grad_norm": 0.5078125, "learning_rate": 0.00047853111163023276, "loss": 0.2453, "step": 96330 }, { "epoch": 3.99, "grad_norm": 0.93359375, "learning_rate": 0.00047852671440758767, "loss": 0.1945, "step": 96340 }, { "epoch": 3.99, "grad_norm": 0.50390625, "learning_rate": 0.00047852231675488, "loss": 0.2028, "step": 96350 }, { "epoch": 3.99, "grad_norm": 1.015625, "learning_rate": 0.0004785179186721179, "loss": 0.2169, "step": 96360 }, { "epoch": 3.99, "grad_norm": 0.365234375, "learning_rate": 0.00047851352015930985, "loss": 0.2064, "step": 96370 }, { "epoch": 3.99, "grad_norm": 1.0, "learning_rate": 0.000478509121216464, "loss": 0.2126, "step": 96380 }, { "epoch": 3.99, "grad_norm": 0.63671875, "learning_rate": 0.00047850472184358863, "loss": 0.231, "step": 96390 }, { "epoch": 3.99, "grad_norm": 0.98828125, "learning_rate": 0.0004785003220406921, "loss": 0.1834, "step": 96400 }, { "epoch": 3.99, "grad_norm": 1.734375, "learning_rate": 0.00047849592180778267, "loss": 0.2247, "step": 96410 }, { "epoch": 3.99, "grad_norm": 0.703125, "learning_rate": 0.00047849152114486847, "loss": 0.2629, "step": 96420 }, { "epoch": 3.99, "grad_norm": 0.1796875, "learning_rate": 0.000478487120051958, "loss": 0.2012, "step": 96430 }, { "epoch": 3.99, "grad_norm": 0.447265625, "learning_rate": 0.00047848271852905946, "loss": 0.2, "step": 96440 }, { "epoch": 3.99, "grad_norm": 1.7265625, "learning_rate": 0.000478478316576181, "loss": 0.2459, "step": 96450 }, { "epoch": 4.0, "grad_norm": 1.421875, "learning_rate": 0.00047847391419333116, "loss": 0.2222, "step": 96460 }, { "epoch": 4.0, "grad_norm": 0.384765625, "learning_rate": 0.000478469511380518, "loss": 0.231, "step": 96470 }, { "epoch": 4.0, "grad_norm": 0.453125, "learning_rate": 0.0004784651081377499, "loss": 0.2128, "step": 96480 }, { "epoch": 4.0, "grad_norm": 1.453125, "learning_rate": 0.0004784607044650352, "loss": 0.2017, "step": 96490 }, { "epoch": 4.0, "grad_norm": 0.419921875, "learning_rate": 0.00047845630036238204, "loss": 0.1802, "step": 96500 }, { "epoch": 4.0, "grad_norm": 0.6640625, "learning_rate": 0.00047845189582979887, "loss": 0.1999, "step": 96510 }, { "epoch": 4.0, "grad_norm": 1.046875, "learning_rate": 0.0004784474908672938, "loss": 0.1914, "step": 96520 }, { "epoch": 4.0, "grad_norm": 0.9296875, "learning_rate": 0.00047844308547487533, "loss": 0.2273, "step": 96530 }, { "epoch": 4.0, "grad_norm": 0.10986328125, "learning_rate": 0.0004784386796525516, "loss": 0.2024, "step": 96540 }, { "epoch": 4.0, "grad_norm": 0.53125, "learning_rate": 0.000478434273400331, "loss": 0.1751, "step": 96550 }, { "epoch": 4.0, "grad_norm": 0.66015625, "learning_rate": 0.00047842986671822166, "loss": 0.2333, "step": 96560 }, { "epoch": 4.0, "grad_norm": 1.078125, "learning_rate": 0.000478425459606232, "loss": 0.2161, "step": 96570 }, { "epoch": 4.0, "grad_norm": 0.51171875, "learning_rate": 0.00047842105206437037, "loss": 0.1911, "step": 96580 }, { "epoch": 4.0, "grad_norm": 0.33203125, "learning_rate": 0.0004784166440926449, "loss": 0.1908, "step": 96590 }, { "epoch": 4.0, "grad_norm": 1.3828125, "learning_rate": 0.000478412235691064, "loss": 0.2142, "step": 96600 }, { "epoch": 4.0, "grad_norm": 0.435546875, "learning_rate": 0.00047840782685963597, "loss": 0.1829, "step": 96610 }, { "epoch": 4.0, "grad_norm": 0.37109375, "learning_rate": 0.00047840341759836905, "loss": 0.2797, "step": 96620 }, { "epoch": 4.0, "grad_norm": 2.125, "learning_rate": 0.0004783990079072716, "loss": 0.2295, "step": 96630 }, { "epoch": 4.0, "grad_norm": 0.89453125, "learning_rate": 0.0004783945977863519, "loss": 0.2062, "step": 96640 }, { "epoch": 4.0, "grad_norm": 0.46875, "learning_rate": 0.0004783901872356181, "loss": 0.2207, "step": 96650 }, { "epoch": 4.0, "grad_norm": 0.58984375, "learning_rate": 0.0004783857762550787, "loss": 0.1984, "step": 96660 }, { "epoch": 4.0, "grad_norm": 1.0703125, "learning_rate": 0.000478381364844742, "loss": 0.2526, "step": 96670 }, { "epoch": 4.0, "grad_norm": 1.234375, "learning_rate": 0.00047837695300461617, "loss": 0.2326, "step": 96680 }, { "epoch": 4.0, "grad_norm": 0.56640625, "learning_rate": 0.0004783725407347096, "loss": 0.1943, "step": 96690 }, { "epoch": 4.01, "grad_norm": 0.578125, "learning_rate": 0.00047836812803503056, "loss": 0.2207, "step": 96700 }, { "epoch": 4.01, "grad_norm": 0.0, "learning_rate": 0.0004783637149055874, "loss": 0.2109, "step": 96710 }, { "epoch": 4.01, "grad_norm": 0.490234375, "learning_rate": 0.00047835930134638825, "loss": 0.203, "step": 96720 }, { "epoch": 4.01, "grad_norm": 0.40625, "learning_rate": 0.0004783548873574417, "loss": 0.2231, "step": 96730 }, { "epoch": 4.01, "grad_norm": 0.65234375, "learning_rate": 0.00047835047293875587, "loss": 0.2538, "step": 96740 }, { "epoch": 4.01, "grad_norm": 0.546875, "learning_rate": 0.00047834605809033917, "loss": 0.189, "step": 96750 }, { "epoch": 4.01, "grad_norm": 0.515625, "learning_rate": 0.0004783416428121997, "loss": 0.2437, "step": 96760 }, { "epoch": 4.01, "grad_norm": 0.91796875, "learning_rate": 0.00047833722710434603, "loss": 0.1711, "step": 96770 }, { "epoch": 4.01, "grad_norm": 2.109375, "learning_rate": 0.0004783328109667864, "loss": 0.2331, "step": 96780 }, { "epoch": 4.01, "grad_norm": 0.9296875, "learning_rate": 0.000478328394399529, "loss": 0.1889, "step": 96790 }, { "epoch": 4.01, "grad_norm": 1.21875, "learning_rate": 0.0004783239774025822, "loss": 0.2284, "step": 96800 }, { "epoch": 4.01, "grad_norm": 0.5859375, "learning_rate": 0.00047831955997595433, "loss": 0.2032, "step": 96810 }, { "epoch": 4.01, "grad_norm": 0.52734375, "learning_rate": 0.00047831514211965376, "loss": 0.177, "step": 96820 }, { "epoch": 4.01, "grad_norm": 0.609375, "learning_rate": 0.0004783107238336888, "loss": 0.1412, "step": 96830 }, { "epoch": 4.01, "grad_norm": 1.5703125, "learning_rate": 0.0004783063051180676, "loss": 0.222, "step": 96840 }, { "epoch": 4.01, "grad_norm": 0.9296875, "learning_rate": 0.00047830188597279864, "loss": 0.2145, "step": 96850 }, { "epoch": 4.01, "grad_norm": 0.6015625, "learning_rate": 0.0004782974663978902, "loss": 0.2151, "step": 96860 }, { "epoch": 4.01, "grad_norm": 0.74609375, "learning_rate": 0.00047829304639335045, "loss": 0.2325, "step": 96870 }, { "epoch": 4.01, "grad_norm": 0.73828125, "learning_rate": 0.00047828862595918796, "loss": 0.2481, "step": 96880 }, { "epoch": 4.01, "grad_norm": 1.046875, "learning_rate": 0.0004782842050954109, "loss": 0.192, "step": 96890 }, { "epoch": 4.01, "grad_norm": 0.373046875, "learning_rate": 0.0004782797838020277, "loss": 0.2047, "step": 96900 }, { "epoch": 4.01, "grad_norm": 1.0859375, "learning_rate": 0.00047827536207904655, "loss": 0.2416, "step": 96910 }, { "epoch": 4.01, "grad_norm": 0.87890625, "learning_rate": 0.0004782709399264757, "loss": 0.1913, "step": 96920 }, { "epoch": 4.01, "grad_norm": 0.60546875, "learning_rate": 0.00047826651734432377, "loss": 0.2253, "step": 96930 }, { "epoch": 4.02, "grad_norm": 1.9765625, "learning_rate": 0.00047826209433259883, "loss": 0.1791, "step": 96940 }, { "epoch": 4.02, "grad_norm": 0.8828125, "learning_rate": 0.00047825767089130925, "loss": 0.2279, "step": 96950 }, { "epoch": 4.02, "grad_norm": 0.66015625, "learning_rate": 0.00047825324702046343, "loss": 0.236, "step": 96960 }, { "epoch": 4.02, "grad_norm": 0.83203125, "learning_rate": 0.00047824882272006966, "loss": 0.2325, "step": 96970 }, { "epoch": 4.02, "grad_norm": 0.244140625, "learning_rate": 0.0004782443979901362, "loss": 0.1491, "step": 96980 }, { "epoch": 4.02, "grad_norm": 0.77734375, "learning_rate": 0.0004782399728306714, "loss": 0.1987, "step": 96990 }, { "epoch": 4.02, "grad_norm": 0.89453125, "learning_rate": 0.0004782355472416837, "loss": 0.1927, "step": 97000 }, { "epoch": 4.02, "grad_norm": 0.59375, "learning_rate": 0.00047823112122318136, "loss": 0.2509, "step": 97010 }, { "epoch": 4.02, "grad_norm": 0.609375, "learning_rate": 0.0004782266947751726, "loss": 0.1943, "step": 97020 }, { "epoch": 4.02, "grad_norm": 0.84375, "learning_rate": 0.00047822226789766595, "loss": 0.2296, "step": 97030 }, { "epoch": 4.02, "grad_norm": 0.953125, "learning_rate": 0.00047821784059066957, "loss": 0.2173, "step": 97040 }, { "epoch": 4.02, "grad_norm": 1.375, "learning_rate": 0.0004782134128541919, "loss": 0.2148, "step": 97050 }, { "epoch": 4.02, "grad_norm": 1.9453125, "learning_rate": 0.0004782089846882412, "loss": 0.2037, "step": 97060 }, { "epoch": 4.02, "grad_norm": 0.75390625, "learning_rate": 0.00047820455609282584, "loss": 0.2199, "step": 97070 }, { "epoch": 4.02, "grad_norm": 1.015625, "learning_rate": 0.0004782001270679541, "loss": 0.2456, "step": 97080 }, { "epoch": 4.02, "grad_norm": 0.6171875, "learning_rate": 0.00047819569761363445, "loss": 0.2024, "step": 97090 }, { "epoch": 4.02, "grad_norm": 0.90234375, "learning_rate": 0.00047819126772987515, "loss": 0.199, "step": 97100 }, { "epoch": 4.02, "grad_norm": 0.55078125, "learning_rate": 0.00047818683741668446, "loss": 0.2201, "step": 97110 }, { "epoch": 4.02, "grad_norm": 0.474609375, "learning_rate": 0.0004781824066740709, "loss": 0.1958, "step": 97120 }, { "epoch": 4.02, "grad_norm": 1.078125, "learning_rate": 0.0004781779755020426, "loss": 0.2176, "step": 97130 }, { "epoch": 4.02, "grad_norm": 0.80078125, "learning_rate": 0.00047817354390060796, "loss": 0.2139, "step": 97140 }, { "epoch": 4.02, "grad_norm": 0.703125, "learning_rate": 0.00047816911186977535, "loss": 0.2028, "step": 97150 }, { "epoch": 4.02, "grad_norm": 0.44921875, "learning_rate": 0.00047816467940955314, "loss": 0.2074, "step": 97160 }, { "epoch": 4.02, "grad_norm": 0.265625, "learning_rate": 0.00047816024651994973, "loss": 0.1942, "step": 97170 }, { "epoch": 4.03, "grad_norm": 0.640625, "learning_rate": 0.0004781558132009732, "loss": 0.1953, "step": 97180 }, { "epoch": 4.03, "grad_norm": 0.73828125, "learning_rate": 0.0004781513794526322, "loss": 0.2174, "step": 97190 }, { "epoch": 4.03, "grad_norm": 0.40625, "learning_rate": 0.0004781469452749349, "loss": 0.2116, "step": 97200 }, { "epoch": 4.03, "grad_norm": 0.546875, "learning_rate": 0.00047814251066788973, "loss": 0.237, "step": 97210 }, { "epoch": 4.03, "grad_norm": 0.34765625, "learning_rate": 0.00047813807563150493, "loss": 0.202, "step": 97220 }, { "epoch": 4.03, "grad_norm": 5.53125, "learning_rate": 0.000478133640165789, "loss": 0.2095, "step": 97230 }, { "epoch": 4.03, "grad_norm": 0.76953125, "learning_rate": 0.0004781292042707501, "loss": 0.2215, "step": 97240 }, { "epoch": 4.03, "grad_norm": 0.26953125, "learning_rate": 0.0004781247679463967, "loss": 0.2324, "step": 97250 }, { "epoch": 4.03, "grad_norm": 0.69921875, "learning_rate": 0.00047812033119273714, "loss": 0.2055, "step": 97260 }, { "epoch": 4.03, "grad_norm": 0.4140625, "learning_rate": 0.0004781158940097797, "loss": 0.2336, "step": 97270 }, { "epoch": 4.03, "grad_norm": 0.80859375, "learning_rate": 0.00047811145639753286, "loss": 0.2487, "step": 97280 }, { "epoch": 4.03, "grad_norm": 1.078125, "learning_rate": 0.0004781070183560048, "loss": 0.2077, "step": 97290 }, { "epoch": 4.03, "grad_norm": 1.6640625, "learning_rate": 0.000478102579885204, "loss": 0.2353, "step": 97300 }, { "epoch": 4.03, "grad_norm": 0.59765625, "learning_rate": 0.0004780981409851388, "loss": 0.161, "step": 97310 }, { "epoch": 4.03, "grad_norm": 0.83203125, "learning_rate": 0.00047809370165581756, "loss": 0.1935, "step": 97320 }, { "epoch": 4.03, "grad_norm": 0.5390625, "learning_rate": 0.0004780892618972485, "loss": 0.2185, "step": 97330 }, { "epoch": 4.03, "grad_norm": 0.828125, "learning_rate": 0.0004780848217094402, "loss": 0.2318, "step": 97340 }, { "epoch": 4.03, "grad_norm": 1.828125, "learning_rate": 0.0004780803810924009, "loss": 0.2243, "step": 97350 }, { "epoch": 4.03, "grad_norm": 0.244140625, "learning_rate": 0.00047807594004613886, "loss": 0.1852, "step": 97360 }, { "epoch": 4.03, "grad_norm": 0.70703125, "learning_rate": 0.00047807149857066256, "loss": 0.2181, "step": 97370 }, { "epoch": 4.03, "grad_norm": 0.23828125, "learning_rate": 0.0004780670566659804, "loss": 0.1682, "step": 97380 }, { "epoch": 4.03, "grad_norm": 1.0390625, "learning_rate": 0.00047806261433210056, "loss": 0.2574, "step": 97390 }, { "epoch": 4.03, "grad_norm": 0.50390625, "learning_rate": 0.0004780581715690315, "loss": 0.2389, "step": 97400 }, { "epoch": 4.03, "grad_norm": 0.470703125, "learning_rate": 0.0004780537283767817, "loss": 0.1883, "step": 97410 }, { "epoch": 4.04, "grad_norm": 0.84765625, "learning_rate": 0.00047804928475535935, "loss": 0.2381, "step": 97420 }, { "epoch": 4.04, "grad_norm": 0.859375, "learning_rate": 0.00047804484070477295, "loss": 0.1944, "step": 97430 }, { "epoch": 4.04, "grad_norm": 0.357421875, "learning_rate": 0.0004780403962250307, "loss": 0.1935, "step": 97440 }, { "epoch": 4.04, "grad_norm": 0.9609375, "learning_rate": 0.00047803595131614107, "loss": 0.1853, "step": 97450 }, { "epoch": 4.04, "grad_norm": 0.5625, "learning_rate": 0.0004780315059781124, "loss": 0.2407, "step": 97460 }, { "epoch": 4.04, "grad_norm": 1.0546875, "learning_rate": 0.00047802706021095304, "loss": 0.1905, "step": 97470 }, { "epoch": 4.04, "grad_norm": 0.400390625, "learning_rate": 0.0004780226140146714, "loss": 0.2391, "step": 97480 }, { "epoch": 4.04, "grad_norm": 0.435546875, "learning_rate": 0.00047801816738927586, "loss": 0.1623, "step": 97490 }, { "epoch": 4.04, "grad_norm": 1.1796875, "learning_rate": 0.0004780137203347747, "loss": 0.1889, "step": 97500 }, { "epoch": 4.04, "grad_norm": 0.68359375, "learning_rate": 0.00047800927285117633, "loss": 0.2438, "step": 97510 }, { "epoch": 4.04, "grad_norm": 0.62109375, "learning_rate": 0.0004780048249384892, "loss": 0.2394, "step": 97520 }, { "epoch": 4.04, "grad_norm": 1.0546875, "learning_rate": 0.0004780003765967216, "loss": 0.1959, "step": 97530 }, { "epoch": 4.04, "grad_norm": 0.54296875, "learning_rate": 0.0004779959278258819, "loss": 0.2308, "step": 97540 }, { "epoch": 4.04, "grad_norm": 0.71875, "learning_rate": 0.00047799147862597846, "loss": 0.2497, "step": 97550 }, { "epoch": 4.04, "grad_norm": 0.56640625, "learning_rate": 0.0004779870289970196, "loss": 0.2, "step": 97560 }, { "epoch": 4.04, "grad_norm": 1.0625, "learning_rate": 0.0004779825789390139, "loss": 0.2212, "step": 97570 }, { "epoch": 4.04, "grad_norm": 1.09375, "learning_rate": 0.00047797812845196965, "loss": 0.2905, "step": 97580 }, { "epoch": 4.04, "grad_norm": 0.671875, "learning_rate": 0.00047797367753589504, "loss": 0.2257, "step": 97590 }, { "epoch": 4.04, "grad_norm": 0.734375, "learning_rate": 0.00047796922619079864, "loss": 0.221, "step": 97600 }, { "epoch": 4.04, "grad_norm": 0.4453125, "learning_rate": 0.00047796477441668886, "loss": 0.1796, "step": 97610 }, { "epoch": 4.04, "grad_norm": 0.3828125, "learning_rate": 0.0004779603222135739, "loss": 0.2128, "step": 97620 }, { "epoch": 4.04, "grad_norm": 0.546875, "learning_rate": 0.0004779558695814622, "loss": 0.2278, "step": 97630 }, { "epoch": 4.04, "grad_norm": 0.9453125, "learning_rate": 0.0004779514165203622, "loss": 0.1838, "step": 97640 }, { "epoch": 4.04, "grad_norm": 0.33203125, "learning_rate": 0.0004779469630302823, "loss": 0.1977, "step": 97650 }, { "epoch": 4.05, "grad_norm": 0.267578125, "learning_rate": 0.00047794250911123085, "loss": 0.1999, "step": 97660 }, { "epoch": 4.05, "grad_norm": 0.62890625, "learning_rate": 0.0004779380547632161, "loss": 0.2702, "step": 97670 }, { "epoch": 4.05, "grad_norm": 0.66015625, "learning_rate": 0.00047793359998624666, "loss": 0.1849, "step": 97680 }, { "epoch": 4.05, "grad_norm": 0.400390625, "learning_rate": 0.0004779291447803307, "loss": 0.2196, "step": 97690 }, { "epoch": 4.05, "grad_norm": 0.4375, "learning_rate": 0.00047792468914547675, "loss": 0.1774, "step": 97700 }, { "epoch": 4.05, "grad_norm": 0.73828125, "learning_rate": 0.00047792023308169317, "loss": 0.2102, "step": 97710 }, { "epoch": 4.05, "grad_norm": 0.81640625, "learning_rate": 0.0004779157765889883, "loss": 0.2061, "step": 97720 }, { "epoch": 4.05, "grad_norm": 0.61328125, "learning_rate": 0.00047791131966737056, "loss": 0.2098, "step": 97730 }, { "epoch": 4.05, "grad_norm": 0.44140625, "learning_rate": 0.0004779068623168483, "loss": 0.2172, "step": 97740 }, { "epoch": 4.05, "grad_norm": 0.439453125, "learning_rate": 0.00047790240453743, "loss": 0.2342, "step": 97750 }, { "epoch": 4.05, "grad_norm": 0.609375, "learning_rate": 0.00047789794632912397, "loss": 0.2332, "step": 97760 }, { "epoch": 4.05, "grad_norm": 0.53125, "learning_rate": 0.0004778934876919386, "loss": 0.1971, "step": 97770 }, { "epoch": 4.05, "grad_norm": 0.75390625, "learning_rate": 0.0004778890286258822, "loss": 0.2021, "step": 97780 }, { "epoch": 4.05, "grad_norm": 0.44140625, "learning_rate": 0.0004778845691309635, "loss": 0.2143, "step": 97790 }, { "epoch": 4.05, "grad_norm": 0.6953125, "learning_rate": 0.00047788010920719046, "loss": 0.2155, "step": 97800 }, { "epoch": 4.05, "grad_norm": 0.703125, "learning_rate": 0.0004778756488545717, "loss": 0.1767, "step": 97810 }, { "epoch": 4.05, "grad_norm": 0.8671875, "learning_rate": 0.00047787118807311556, "loss": 0.1833, "step": 97820 }, { "epoch": 4.05, "grad_norm": 0.5625, "learning_rate": 0.0004778667268628305, "loss": 0.233, "step": 97830 }, { "epoch": 4.05, "grad_norm": 0.7734375, "learning_rate": 0.0004778622652237248, "loss": 0.1706, "step": 97840 }, { "epoch": 4.05, "grad_norm": 1.1875, "learning_rate": 0.00047785780315580693, "loss": 0.2249, "step": 97850 }, { "epoch": 4.05, "grad_norm": 0.447265625, "learning_rate": 0.0004778533406590854, "loss": 0.2179, "step": 97860 }, { "epoch": 4.05, "grad_norm": 0.365234375, "learning_rate": 0.00047784887773356835, "loss": 0.2135, "step": 97870 }, { "epoch": 4.05, "grad_norm": 0.83203125, "learning_rate": 0.0004778444143792644, "loss": 0.2272, "step": 97880 }, { "epoch": 4.05, "grad_norm": 0.77734375, "learning_rate": 0.0004778399505961818, "loss": 0.2175, "step": 97890 }, { "epoch": 4.06, "grad_norm": 1.0703125, "learning_rate": 0.00047783548638432905, "loss": 0.2456, "step": 97900 }, { "epoch": 4.06, "grad_norm": 0.62109375, "learning_rate": 0.0004778310217437146, "loss": 0.1552, "step": 97910 }, { "epoch": 4.06, "grad_norm": 0.51953125, "learning_rate": 0.0004778265566743467, "loss": 0.2098, "step": 97920 }, { "epoch": 4.06, "grad_norm": 0.0, "learning_rate": 0.00047782209117623375, "loss": 0.2298, "step": 97930 }, { "epoch": 4.06, "grad_norm": 0.416015625, "learning_rate": 0.00047781762524938434, "loss": 0.1582, "step": 97940 }, { "epoch": 4.06, "grad_norm": 1.0, "learning_rate": 0.0004778131588938067, "loss": 0.2153, "step": 97950 }, { "epoch": 4.06, "grad_norm": 0.390625, "learning_rate": 0.00047780869210950933, "loss": 0.2007, "step": 97960 }, { "epoch": 4.06, "grad_norm": 0.77734375, "learning_rate": 0.0004778042248965006, "loss": 0.2359, "step": 97970 }, { "epoch": 4.06, "grad_norm": 0.734375, "learning_rate": 0.00047779975725478893, "loss": 0.1834, "step": 97980 }, { "epoch": 4.06, "grad_norm": 0.6328125, "learning_rate": 0.00047779528918438265, "loss": 0.2508, "step": 97990 }, { "epoch": 4.06, "grad_norm": 0.345703125, "learning_rate": 0.00047779082068529036, "loss": 0.2588, "step": 98000 }, { "epoch": 4.06, "grad_norm": 0.90234375, "learning_rate": 0.0004777863517575202, "loss": 0.2167, "step": 98010 }, { "epoch": 4.06, "grad_norm": 0.2890625, "learning_rate": 0.0004777818824010808, "loss": 0.2053, "step": 98020 }, { "epoch": 4.06, "grad_norm": 0.2578125, "learning_rate": 0.00047777741261598053, "loss": 0.1797, "step": 98030 }, { "epoch": 4.06, "grad_norm": 0.5078125, "learning_rate": 0.00047777294240222766, "loss": 0.1823, "step": 98040 }, { "epoch": 4.06, "grad_norm": 0.87890625, "learning_rate": 0.0004777684717598308, "loss": 0.2277, "step": 98050 }, { "epoch": 4.06, "grad_norm": 0.62890625, "learning_rate": 0.00047776400068879824, "loss": 0.2102, "step": 98060 }, { "epoch": 4.06, "grad_norm": 0.734375, "learning_rate": 0.00047775952918913844, "loss": 0.202, "step": 98070 }, { "epoch": 4.06, "grad_norm": 0.77734375, "learning_rate": 0.00047775505726085975, "loss": 0.1849, "step": 98080 }, { "epoch": 4.06, "grad_norm": 0.341796875, "learning_rate": 0.00047775058490397073, "loss": 0.1554, "step": 98090 }, { "epoch": 4.06, "grad_norm": 0.427734375, "learning_rate": 0.0004777461121184796, "loss": 0.2118, "step": 98100 }, { "epoch": 4.06, "grad_norm": 0.7421875, "learning_rate": 0.000477741638904395, "loss": 0.1611, "step": 98110 }, { "epoch": 4.06, "grad_norm": 0.76953125, "learning_rate": 0.00047773716526172515, "loss": 0.2492, "step": 98120 }, { "epoch": 4.06, "grad_norm": 0.44140625, "learning_rate": 0.0004777326911904786, "loss": 0.1691, "step": 98130 }, { "epoch": 4.06, "grad_norm": 0.296875, "learning_rate": 0.0004777282166906637, "loss": 0.1624, "step": 98140 }, { "epoch": 4.07, "grad_norm": 0.515625, "learning_rate": 0.00047772374176228885, "loss": 0.1728, "step": 98150 }, { "epoch": 4.07, "grad_norm": 1.515625, "learning_rate": 0.0004777192664053625, "loss": 0.1766, "step": 98160 }, { "epoch": 4.07, "grad_norm": 1.09375, "learning_rate": 0.0004777147906198931, "loss": 0.205, "step": 98170 }, { "epoch": 4.07, "grad_norm": 0.6171875, "learning_rate": 0.0004777103144058891, "loss": 0.1836, "step": 98180 }, { "epoch": 4.07, "grad_norm": 0.734375, "learning_rate": 0.00047770583776335883, "loss": 0.2449, "step": 98190 }, { "epoch": 4.07, "grad_norm": 0.875, "learning_rate": 0.0004777013606923108, "loss": 0.1952, "step": 98200 }, { "epoch": 4.07, "grad_norm": 0.55859375, "learning_rate": 0.0004776968831927534, "loss": 0.2371, "step": 98210 }, { "epoch": 4.07, "grad_norm": 1.71875, "learning_rate": 0.00047769240526469506, "loss": 0.2093, "step": 98220 }, { "epoch": 4.07, "grad_norm": 1.0234375, "learning_rate": 0.0004776879269081441, "loss": 0.1852, "step": 98230 }, { "epoch": 4.07, "grad_norm": 0.376953125, "learning_rate": 0.00047768344812310916, "loss": 0.2194, "step": 98240 }, { "epoch": 4.07, "grad_norm": 0.70703125, "learning_rate": 0.0004776789689095985, "loss": 0.2102, "step": 98250 }, { "epoch": 4.07, "grad_norm": 0.52734375, "learning_rate": 0.0004776744892676207, "loss": 0.2358, "step": 98260 }, { "epoch": 4.07, "grad_norm": 0.68359375, "learning_rate": 0.000477670009197184, "loss": 0.1893, "step": 98270 }, { "epoch": 4.07, "grad_norm": 0.85546875, "learning_rate": 0.00047766552869829695, "loss": 0.2384, "step": 98280 }, { "epoch": 4.07, "grad_norm": 1.03125, "learning_rate": 0.00047766104777096795, "loss": 0.1855, "step": 98290 }, { "epoch": 4.07, "grad_norm": 0.53515625, "learning_rate": 0.0004776565664152055, "loss": 0.1929, "step": 98300 }, { "epoch": 4.07, "grad_norm": 1.3984375, "learning_rate": 0.000477652084631018, "loss": 0.2421, "step": 98310 }, { "epoch": 4.07, "grad_norm": 1.1484375, "learning_rate": 0.00047764760241841375, "loss": 0.2324, "step": 98320 }, { "epoch": 4.07, "grad_norm": 0.765625, "learning_rate": 0.0004776431197774013, "loss": 0.2049, "step": 98330 }, { "epoch": 4.07, "grad_norm": 0.578125, "learning_rate": 0.0004776386367079892, "loss": 0.2218, "step": 98340 }, { "epoch": 4.07, "grad_norm": 1.3359375, "learning_rate": 0.00047763415321018564, "loss": 0.2227, "step": 98350 }, { "epoch": 4.07, "grad_norm": 0.3359375, "learning_rate": 0.0004776296692839993, "loss": 0.1827, "step": 98360 }, { "epoch": 4.07, "grad_norm": 0.39453125, "learning_rate": 0.00047762518492943843, "loss": 0.1935, "step": 98370 }, { "epoch": 4.07, "grad_norm": 0.7109375, "learning_rate": 0.0004776207001465116, "loss": 0.2001, "step": 98380 }, { "epoch": 4.08, "grad_norm": 0.81640625, "learning_rate": 0.0004776162149352271, "loss": 0.2527, "step": 98390 }, { "epoch": 4.08, "grad_norm": 0.4375, "learning_rate": 0.00047761172929559355, "loss": 0.2065, "step": 98400 }, { "epoch": 4.08, "grad_norm": 0.62890625, "learning_rate": 0.00047760724322761926, "loss": 0.2767, "step": 98410 }, { "epoch": 4.08, "grad_norm": 1.625, "learning_rate": 0.0004776027567313127, "loss": 0.2396, "step": 98420 }, { "epoch": 4.08, "grad_norm": 0.44921875, "learning_rate": 0.0004775982698066824, "loss": 0.2404, "step": 98430 }, { "epoch": 4.08, "grad_norm": 0.1943359375, "learning_rate": 0.0004775937824537367, "loss": 0.1997, "step": 98440 }, { "epoch": 4.08, "grad_norm": 0.345703125, "learning_rate": 0.00047758929467248406, "loss": 0.2197, "step": 98450 }, { "epoch": 4.08, "grad_norm": 0.431640625, "learning_rate": 0.00047758480646293303, "loss": 0.2125, "step": 98460 }, { "epoch": 4.08, "grad_norm": 0.427734375, "learning_rate": 0.0004775803178250919, "loss": 0.2115, "step": 98470 }, { "epoch": 4.08, "grad_norm": 0.984375, "learning_rate": 0.00047757582875896916, "loss": 0.1915, "step": 98480 }, { "epoch": 4.08, "grad_norm": 0.75390625, "learning_rate": 0.0004775713392645733, "loss": 0.205, "step": 98490 }, { "epoch": 4.08, "grad_norm": 0.79296875, "learning_rate": 0.00047756684934191273, "loss": 0.2076, "step": 98500 }, { "epoch": 4.08, "grad_norm": 0.8828125, "learning_rate": 0.00047756235899099606, "loss": 0.2049, "step": 98510 }, { "epoch": 4.08, "grad_norm": 0.361328125, "learning_rate": 0.00047755786821183146, "loss": 0.2497, "step": 98520 }, { "epoch": 4.08, "grad_norm": 0.71875, "learning_rate": 0.0004775533770044276, "loss": 0.2053, "step": 98530 }, { "epoch": 4.08, "grad_norm": 0.61328125, "learning_rate": 0.0004775488853687928, "loss": 0.2099, "step": 98540 }, { "epoch": 4.08, "grad_norm": 0.412109375, "learning_rate": 0.00047754439330493565, "loss": 0.2134, "step": 98550 }, { "epoch": 4.08, "grad_norm": 0.5546875, "learning_rate": 0.0004775399008128645, "loss": 0.23, "step": 98560 }, { "epoch": 4.08, "grad_norm": 0.80859375, "learning_rate": 0.0004775354078925878, "loss": 0.2541, "step": 98570 }, { "epoch": 4.08, "grad_norm": 1.7265625, "learning_rate": 0.00047753091454411404, "loss": 0.1736, "step": 98580 }, { "epoch": 4.08, "grad_norm": 0.8359375, "learning_rate": 0.0004775264207674517, "loss": 0.1767, "step": 98590 }, { "epoch": 4.08, "grad_norm": 0.83984375, "learning_rate": 0.0004775219265626092, "loss": 0.2089, "step": 98600 }, { "epoch": 4.08, "grad_norm": 0.94921875, "learning_rate": 0.00047751743192959496, "loss": 0.2386, "step": 98610 }, { "epoch": 4.08, "grad_norm": 0.35546875, "learning_rate": 0.0004775129368684175, "loss": 0.2132, "step": 98620 }, { "epoch": 4.09, "grad_norm": 0.67578125, "learning_rate": 0.00047750844137908527, "loss": 0.2314, "step": 98630 }, { "epoch": 4.09, "grad_norm": 0.5625, "learning_rate": 0.0004775039454616067, "loss": 0.2237, "step": 98640 }, { "epoch": 4.09, "grad_norm": 1.3203125, "learning_rate": 0.0004774994491159903, "loss": 0.2225, "step": 98650 }, { "epoch": 4.09, "grad_norm": 0.6796875, "learning_rate": 0.00047749495234224447, "loss": 0.2628, "step": 98660 }, { "epoch": 4.09, "grad_norm": 0.546875, "learning_rate": 0.0004774904551403778, "loss": 0.2181, "step": 98670 }, { "epoch": 4.09, "grad_norm": 0.65234375, "learning_rate": 0.0004774859575103986, "loss": 0.2358, "step": 98680 }, { "epoch": 4.09, "grad_norm": 0.85546875, "learning_rate": 0.00047748145945231536, "loss": 0.2194, "step": 98690 }, { "epoch": 4.09, "grad_norm": 0.8671875, "learning_rate": 0.00047747696096613664, "loss": 0.2412, "step": 98700 }, { "epoch": 4.09, "grad_norm": 1.5234375, "learning_rate": 0.0004774724620518708, "loss": 0.2859, "step": 98710 }, { "epoch": 4.09, "grad_norm": 0.53515625, "learning_rate": 0.00047746796270952644, "loss": 0.2264, "step": 98720 }, { "epoch": 4.09, "grad_norm": 0.451171875, "learning_rate": 0.0004774634629391118, "loss": 0.175, "step": 98730 }, { "epoch": 4.09, "grad_norm": 0.3125, "learning_rate": 0.0004774589627406356, "loss": 0.1645, "step": 98740 }, { "epoch": 4.09, "grad_norm": 1.8046875, "learning_rate": 0.00047745446211410614, "loss": 0.1823, "step": 98750 }, { "epoch": 4.09, "grad_norm": 0.6640625, "learning_rate": 0.000477449961059532, "loss": 0.1695, "step": 98760 }, { "epoch": 4.09, "grad_norm": 0.58203125, "learning_rate": 0.00047744545957692156, "loss": 0.1804, "step": 98770 }, { "epoch": 4.09, "grad_norm": 0.443359375, "learning_rate": 0.0004774409576662833, "loss": 0.23, "step": 98780 }, { "epoch": 4.09, "grad_norm": 1.0859375, "learning_rate": 0.0004774364553276257, "loss": 0.1947, "step": 98790 }, { "epoch": 4.09, "grad_norm": 0.671875, "learning_rate": 0.0004774319525609573, "loss": 0.1945, "step": 98800 }, { "epoch": 4.09, "grad_norm": 0.69140625, "learning_rate": 0.0004774274493662865, "loss": 0.2628, "step": 98810 }, { "epoch": 4.09, "grad_norm": 1.4140625, "learning_rate": 0.0004774229457436218, "loss": 0.1748, "step": 98820 }, { "epoch": 4.09, "grad_norm": 0.6171875, "learning_rate": 0.0004774184416929717, "loss": 0.2233, "step": 98830 }, { "epoch": 4.09, "grad_norm": 1.875, "learning_rate": 0.0004774139372143447, "loss": 0.1573, "step": 98840 }, { "epoch": 4.09, "grad_norm": 1.125, "learning_rate": 0.00047740943230774915, "loss": 0.211, "step": 98850 }, { "epoch": 4.09, "grad_norm": 0.59375, "learning_rate": 0.0004774049269731936, "loss": 0.1767, "step": 98860 }, { "epoch": 4.1, "grad_norm": 0.416015625, "learning_rate": 0.00047740042121068656, "loss": 0.208, "step": 98870 }, { "epoch": 4.1, "grad_norm": 0.27734375, "learning_rate": 0.0004773959150202365, "loss": 0.1998, "step": 98880 }, { "epoch": 4.1, "grad_norm": 0.416015625, "learning_rate": 0.00047739140840185186, "loss": 0.2031, "step": 98890 }, { "epoch": 4.1, "grad_norm": 0.62890625, "learning_rate": 0.0004773869013555412, "loss": 0.2301, "step": 98900 }, { "epoch": 4.1, "grad_norm": 0.69921875, "learning_rate": 0.00047738239388131286, "loss": 0.1786, "step": 98910 }, { "epoch": 4.1, "grad_norm": 0.9140625, "learning_rate": 0.00047737788597917546, "loss": 0.2367, "step": 98920 }, { "epoch": 4.1, "grad_norm": 0.94921875, "learning_rate": 0.00047737337764913745, "loss": 0.1912, "step": 98930 }, { "epoch": 4.1, "grad_norm": 1.203125, "learning_rate": 0.00047736886889120724, "loss": 0.2024, "step": 98940 }, { "epoch": 4.1, "grad_norm": 0.80078125, "learning_rate": 0.00047736435970539343, "loss": 0.2342, "step": 98950 }, { "epoch": 4.1, "grad_norm": 1.046875, "learning_rate": 0.00047735985009170445, "loss": 0.2302, "step": 98960 }, { "epoch": 4.1, "grad_norm": 1.2265625, "learning_rate": 0.00047735534005014874, "loss": 0.1795, "step": 98970 }, { "epoch": 4.1, "grad_norm": 0.62109375, "learning_rate": 0.0004773508295807348, "loss": 0.254, "step": 98980 }, { "epoch": 4.1, "grad_norm": 0.9453125, "learning_rate": 0.00047734631868347123, "loss": 0.2042, "step": 98990 }, { "epoch": 4.1, "grad_norm": 1.9140625, "learning_rate": 0.0004773418073583664, "loss": 0.2475, "step": 99000 }, { "epoch": 4.1, "grad_norm": 0.7109375, "learning_rate": 0.0004773372956054289, "loss": 0.1631, "step": 99010 }, { "epoch": 4.1, "grad_norm": 0.2041015625, "learning_rate": 0.0004773327834246671, "loss": 0.181, "step": 99020 }, { "epoch": 4.1, "grad_norm": 0.85546875, "learning_rate": 0.00047732827081608956, "loss": 0.1943, "step": 99030 }, { "epoch": 4.1, "grad_norm": 0.5859375, "learning_rate": 0.00047732375777970475, "loss": 0.2456, "step": 99040 }, { "epoch": 4.1, "grad_norm": 0.4140625, "learning_rate": 0.0004773192443155212, "loss": 0.2101, "step": 99050 }, { "epoch": 4.1, "grad_norm": 0.60546875, "learning_rate": 0.0004773147304235474, "loss": 0.2202, "step": 99060 }, { "epoch": 4.1, "grad_norm": 0.7734375, "learning_rate": 0.00047731021610379176, "loss": 0.2145, "step": 99070 }, { "epoch": 4.1, "grad_norm": 0.51953125, "learning_rate": 0.00047730570135626297, "loss": 0.2208, "step": 99080 }, { "epoch": 4.1, "grad_norm": 0.68359375, "learning_rate": 0.0004773011861809693, "loss": 0.1889, "step": 99090 }, { "epoch": 4.1, "grad_norm": 0.50390625, "learning_rate": 0.0004772966705779193, "loss": 0.1673, "step": 99100 }, { "epoch": 4.11, "grad_norm": 0.4296875, "learning_rate": 0.0004772921545471216, "loss": 0.2792, "step": 99110 }, { "epoch": 4.11, "grad_norm": 0.3671875, "learning_rate": 0.00047728763808858453, "loss": 0.2353, "step": 99120 }, { "epoch": 4.11, "grad_norm": 0.6796875, "learning_rate": 0.00047728312120231674, "loss": 0.2125, "step": 99130 }, { "epoch": 4.11, "grad_norm": 0.79296875, "learning_rate": 0.0004772786038883267, "loss": 0.2239, "step": 99140 }, { "epoch": 4.11, "grad_norm": 1.40625, "learning_rate": 0.0004772740861466227, "loss": 0.1912, "step": 99150 }, { "epoch": 4.11, "grad_norm": 0.6015625, "learning_rate": 0.00047726956797721355, "loss": 0.1997, "step": 99160 }, { "epoch": 4.11, "grad_norm": 0.50390625, "learning_rate": 0.00047726504938010763, "loss": 0.2067, "step": 99170 }, { "epoch": 4.11, "grad_norm": 0.640625, "learning_rate": 0.00047726053035531334, "loss": 0.2374, "step": 99180 }, { "epoch": 4.11, "grad_norm": 0.73828125, "learning_rate": 0.0004772560109028393, "loss": 0.2117, "step": 99190 }, { "epoch": 4.11, "grad_norm": 0.150390625, "learning_rate": 0.00047725149102269406, "loss": 0.2054, "step": 99200 }, { "epoch": 4.11, "grad_norm": 0.5625, "learning_rate": 0.000477246970714886, "loss": 0.1926, "step": 99210 }, { "epoch": 4.11, "grad_norm": 0.62109375, "learning_rate": 0.0004772424499794237, "loss": 0.2491, "step": 99220 }, { "epoch": 4.11, "grad_norm": 0.96484375, "learning_rate": 0.0004772379288163156, "loss": 0.2454, "step": 99230 }, { "epoch": 4.11, "grad_norm": 0.357421875, "learning_rate": 0.0004772334072255703, "loss": 0.2089, "step": 99240 }, { "epoch": 4.11, "grad_norm": 0.56640625, "learning_rate": 0.0004772288852071963, "loss": 0.216, "step": 99250 }, { "epoch": 4.11, "grad_norm": 2.1875, "learning_rate": 0.00047722436276120193, "loss": 0.2161, "step": 99260 }, { "epoch": 4.11, "grad_norm": 0.470703125, "learning_rate": 0.00047721983988759597, "loss": 0.2068, "step": 99270 }, { "epoch": 4.11, "grad_norm": 0.54296875, "learning_rate": 0.00047721531658638684, "loss": 0.1914, "step": 99280 }, { "epoch": 4.11, "grad_norm": 0.64453125, "learning_rate": 0.00047721079285758297, "loss": 0.218, "step": 99290 }, { "epoch": 4.11, "grad_norm": 0.21484375, "learning_rate": 0.00047720626870119295, "loss": 0.2193, "step": 99300 }, { "epoch": 4.11, "grad_norm": 0.875, "learning_rate": 0.0004772017441172252, "loss": 0.2121, "step": 99310 }, { "epoch": 4.11, "grad_norm": 0.29296875, "learning_rate": 0.00047719721910568835, "loss": 0.2194, "step": 99320 }, { "epoch": 4.11, "grad_norm": 0.29296875, "learning_rate": 0.0004771926936665909, "loss": 0.2284, "step": 99330 }, { "epoch": 4.11, "grad_norm": 0.00131988525390625, "learning_rate": 0.0004771881677999413, "loss": 0.1545, "step": 99340 }, { "epoch": 4.12, "grad_norm": 1.671875, "learning_rate": 0.0004771836415057481, "loss": 0.2294, "step": 99350 }, { "epoch": 4.12, "grad_norm": 0.326171875, "learning_rate": 0.0004771791147840198, "loss": 0.1794, "step": 99360 }, { "epoch": 4.12, "grad_norm": 1.5859375, "learning_rate": 0.000477174587634765, "loss": 0.2279, "step": 99370 }, { "epoch": 4.12, "grad_norm": 1.3203125, "learning_rate": 0.0004771700600579921, "loss": 0.2204, "step": 99380 }, { "epoch": 4.12, "grad_norm": 0.9609375, "learning_rate": 0.0004771655320537097, "loss": 0.2089, "step": 99390 }, { "epoch": 4.12, "grad_norm": 0.921875, "learning_rate": 0.0004771610036219263, "loss": 0.196, "step": 99400 }, { "epoch": 4.12, "grad_norm": 0.34375, "learning_rate": 0.00047715647476265043, "loss": 0.198, "step": 99410 }, { "epoch": 4.12, "grad_norm": 0.71484375, "learning_rate": 0.0004771519454758906, "loss": 0.1949, "step": 99420 }, { "epoch": 4.12, "grad_norm": 0.322265625, "learning_rate": 0.00047714741576165533, "loss": 0.2447, "step": 99430 }, { "epoch": 4.12, "grad_norm": 0.474609375, "learning_rate": 0.00047714288561995325, "loss": 0.1816, "step": 99440 }, { "epoch": 4.12, "grad_norm": 0.3359375, "learning_rate": 0.00047713835505079264, "loss": 0.1558, "step": 99450 }, { "epoch": 4.12, "grad_norm": 0.94140625, "learning_rate": 0.0004771338240541823, "loss": 0.1883, "step": 99460 }, { "epoch": 4.12, "grad_norm": 0.6484375, "learning_rate": 0.0004771292926301305, "loss": 0.1995, "step": 99470 }, { "epoch": 4.12, "grad_norm": 0.77734375, "learning_rate": 0.00047712476077864597, "loss": 0.2154, "step": 99480 }, { "epoch": 4.12, "grad_norm": 0.734375, "learning_rate": 0.0004771202284997372, "loss": 0.194, "step": 99490 }, { "epoch": 4.12, "grad_norm": 1.4453125, "learning_rate": 0.00047711569579341265, "loss": 0.2442, "step": 99500 }, { "epoch": 4.12, "grad_norm": 0.625, "learning_rate": 0.00047711116265968087, "loss": 0.182, "step": 99510 }, { "epoch": 4.12, "grad_norm": 0.77734375, "learning_rate": 0.0004771066290985504, "loss": 0.1697, "step": 99520 }, { "epoch": 4.12, "grad_norm": 4.125, "learning_rate": 0.00047710209511002985, "loss": 0.1968, "step": 99530 }, { "epoch": 4.12, "grad_norm": 0.5, "learning_rate": 0.0004770975606941276, "loss": 0.1806, "step": 99540 }, { "epoch": 4.12, "grad_norm": 0.85546875, "learning_rate": 0.0004770930258508523, "loss": 0.2216, "step": 99550 }, { "epoch": 4.12, "grad_norm": 0.9296875, "learning_rate": 0.0004770884905802125, "loss": 0.2182, "step": 99560 }, { "epoch": 4.12, "grad_norm": 1.5390625, "learning_rate": 0.00047708395488221664, "loss": 0.1752, "step": 99570 }, { "epoch": 4.12, "grad_norm": 0.3671875, "learning_rate": 0.0004770794187568733, "loss": 0.2256, "step": 99580 }, { "epoch": 4.13, "grad_norm": 0.98046875, "learning_rate": 0.0004770748822041909, "loss": 0.186, "step": 99590 }, { "epoch": 4.13, "grad_norm": 0.74609375, "learning_rate": 0.00047707034522417825, "loss": 0.1676, "step": 99600 }, { "epoch": 4.13, "grad_norm": 1.1875, "learning_rate": 0.00047706580781684373, "loss": 0.2254, "step": 99610 }, { "epoch": 4.13, "grad_norm": 0.328125, "learning_rate": 0.0004770612699821958, "loss": 0.2371, "step": 99620 }, { "epoch": 4.13, "grad_norm": 0.7265625, "learning_rate": 0.0004770567317202431, "loss": 0.2398, "step": 99630 }, { "epoch": 4.13, "grad_norm": 2.109375, "learning_rate": 0.00047705219303099424, "loss": 0.2369, "step": 99640 }, { "epoch": 4.13, "grad_norm": 0.7265625, "learning_rate": 0.0004770476539144576, "loss": 0.2037, "step": 99650 }, { "epoch": 4.13, "grad_norm": 0.34375, "learning_rate": 0.0004770431143706417, "loss": 0.1788, "step": 99660 }, { "epoch": 4.13, "grad_norm": 0.9765625, "learning_rate": 0.0004770385743995553, "loss": 0.2051, "step": 99670 }, { "epoch": 4.13, "grad_norm": 1.4296875, "learning_rate": 0.00047703403400120684, "loss": 0.212, "step": 99680 }, { "epoch": 4.13, "grad_norm": 0.333984375, "learning_rate": 0.00047702949317560486, "loss": 0.2033, "step": 99690 }, { "epoch": 4.13, "grad_norm": 0.97265625, "learning_rate": 0.00047702495192275773, "loss": 0.2038, "step": 99700 }, { "epoch": 4.13, "grad_norm": 0.578125, "learning_rate": 0.0004770204102426743, "loss": 0.2357, "step": 99710 }, { "epoch": 4.13, "grad_norm": 1.375, "learning_rate": 0.0004770158681353629, "loss": 0.1652, "step": 99720 }, { "epoch": 4.13, "grad_norm": 0.734375, "learning_rate": 0.0004770113256008322, "loss": 0.212, "step": 99730 }, { "epoch": 4.13, "grad_norm": 1.390625, "learning_rate": 0.0004770067826390907, "loss": 0.278, "step": 99740 }, { "epoch": 4.13, "grad_norm": 0.3515625, "learning_rate": 0.00047700223925014695, "loss": 0.1676, "step": 99750 }, { "epoch": 4.13, "grad_norm": 0.5078125, "learning_rate": 0.0004769976954340095, "loss": 0.2102, "step": 99760 }, { "epoch": 4.13, "grad_norm": 0.69140625, "learning_rate": 0.00047699315119068686, "loss": 0.2222, "step": 99770 }, { "epoch": 4.13, "grad_norm": 0.53125, "learning_rate": 0.0004769886065201876, "loss": 0.1822, "step": 99780 }, { "epoch": 4.13, "grad_norm": 0.921875, "learning_rate": 0.0004769840614225204, "loss": 0.205, "step": 99790 }, { "epoch": 4.13, "grad_norm": 0.5078125, "learning_rate": 0.00047697951589769364, "loss": 0.2792, "step": 99800 }, { "epoch": 4.13, "grad_norm": 0.78125, "learning_rate": 0.000476974969945716, "loss": 0.2261, "step": 99810 }, { "epoch": 4.13, "grad_norm": 0.7265625, "learning_rate": 0.0004769704235665959, "loss": 0.2063, "step": 99820 }, { "epoch": 4.13, "grad_norm": 0.62890625, "learning_rate": 0.000476965876760342, "loss": 0.1809, "step": 99830 }, { "epoch": 4.14, "grad_norm": 0.93359375, "learning_rate": 0.0004769613295269628, "loss": 0.2275, "step": 99840 }, { "epoch": 4.14, "grad_norm": 0.361328125, "learning_rate": 0.000476956781866467, "loss": 0.2138, "step": 99850 }, { "epoch": 4.14, "grad_norm": 0.7734375, "learning_rate": 0.0004769522337788629, "loss": 0.18, "step": 99860 }, { "epoch": 4.14, "grad_norm": 0.69140625, "learning_rate": 0.0004769476852641593, "loss": 0.2557, "step": 99870 }, { "epoch": 4.14, "grad_norm": 0.796875, "learning_rate": 0.00047694313632236466, "loss": 0.2348, "step": 99880 }, { "epoch": 4.14, "grad_norm": 0.7890625, "learning_rate": 0.0004769385869534875, "loss": 0.2076, "step": 99890 }, { "epoch": 4.14, "grad_norm": 0.64453125, "learning_rate": 0.00047693403715753647, "loss": 0.2312, "step": 99900 }, { "epoch": 4.14, "grad_norm": 0.494140625, "learning_rate": 0.00047692948693452006, "loss": 0.1606, "step": 99910 }, { "epoch": 4.14, "grad_norm": 1.0703125, "learning_rate": 0.0004769249362844469, "loss": 0.2722, "step": 99920 }, { "epoch": 4.14, "grad_norm": 0.5, "learning_rate": 0.00047692038520732544, "loss": 0.2333, "step": 99930 }, { "epoch": 4.14, "grad_norm": 0.96875, "learning_rate": 0.0004769158337031644, "loss": 0.1651, "step": 99940 }, { "epoch": 4.14, "grad_norm": 0.86328125, "learning_rate": 0.0004769112817719722, "loss": 0.2336, "step": 99950 }, { "epoch": 4.14, "grad_norm": 0.5234375, "learning_rate": 0.0004769067294137575, "loss": 0.2397, "step": 99960 }, { "epoch": 4.14, "grad_norm": 1.453125, "learning_rate": 0.0004769021766285289, "loss": 0.2469, "step": 99970 }, { "epoch": 4.14, "grad_norm": 0.76953125, "learning_rate": 0.00047689762341629477, "loss": 0.2406, "step": 99980 }, { "epoch": 4.14, "grad_norm": 0.75, "learning_rate": 0.00047689306977706394, "loss": 0.2232, "step": 99990 }, { "epoch": 4.14, "grad_norm": 0.859375, "learning_rate": 0.0004768885157108448, "loss": 0.159, "step": 100000 }, { "epoch": 4.14, "grad_norm": 0.57421875, "learning_rate": 0.000476883961217646, "loss": 0.2042, "step": 100010 }, { "epoch": 4.14, "grad_norm": 0.49609375, "learning_rate": 0.00047687940629747606, "loss": 0.2118, "step": 100020 }, { "epoch": 4.14, "grad_norm": 0.53515625, "learning_rate": 0.00047687485095034357, "loss": 0.2263, "step": 100030 }, { "epoch": 4.14, "grad_norm": 0.83984375, "learning_rate": 0.00047687029517625714, "loss": 0.2486, "step": 100040 }, { "epoch": 4.14, "grad_norm": 0.859375, "learning_rate": 0.00047686573897522534, "loss": 0.1759, "step": 100050 }, { "epoch": 4.14, "grad_norm": 1.953125, "learning_rate": 0.0004768611823472566, "loss": 0.1896, "step": 100060 }, { "epoch": 4.14, "grad_norm": 0.373046875, "learning_rate": 0.0004768566252923597, "loss": 0.2257, "step": 100070 }, { "epoch": 4.15, "grad_norm": 0.33984375, "learning_rate": 0.0004768520678105432, "loss": 0.2127, "step": 100080 }, { "epoch": 4.15, "grad_norm": 0.86328125, "learning_rate": 0.0004768475099018155, "loss": 0.1834, "step": 100090 }, { "epoch": 4.15, "grad_norm": 0.59765625, "learning_rate": 0.0004768429515661853, "loss": 0.1845, "step": 100100 }, { "epoch": 4.15, "grad_norm": 0.193359375, "learning_rate": 0.0004768383928036612, "loss": 0.1823, "step": 100110 }, { "epoch": 4.15, "grad_norm": 1.9765625, "learning_rate": 0.0004768338336142517, "loss": 0.2616, "step": 100120 }, { "epoch": 4.15, "grad_norm": 0.6484375, "learning_rate": 0.0004768292739979655, "loss": 0.2218, "step": 100130 }, { "epoch": 4.15, "grad_norm": 0.640625, "learning_rate": 0.000476824713954811, "loss": 0.2634, "step": 100140 }, { "epoch": 4.15, "grad_norm": 0.59375, "learning_rate": 0.0004768201534847969, "loss": 0.2311, "step": 100150 }, { "epoch": 4.15, "grad_norm": 0.640625, "learning_rate": 0.00047681559258793186, "loss": 0.1867, "step": 100160 }, { "epoch": 4.15, "grad_norm": 0.84375, "learning_rate": 0.00047681103126422434, "loss": 0.1654, "step": 100170 }, { "epoch": 4.15, "grad_norm": 0.74609375, "learning_rate": 0.0004768064695136829, "loss": 0.1952, "step": 100180 }, { "epoch": 4.15, "grad_norm": 0.71484375, "learning_rate": 0.0004768019073363161, "loss": 0.203, "step": 100190 }, { "epoch": 4.15, "grad_norm": 1.09375, "learning_rate": 0.00047679734473213276, "loss": 0.2211, "step": 100200 }, { "epoch": 4.15, "grad_norm": 0.7890625, "learning_rate": 0.00047679278170114123, "loss": 0.221, "step": 100210 }, { "epoch": 4.15, "grad_norm": 0.78125, "learning_rate": 0.00047678821824335017, "loss": 0.169, "step": 100220 }, { "epoch": 4.15, "grad_norm": 0.423828125, "learning_rate": 0.00047678365435876817, "loss": 0.1742, "step": 100230 }, { "epoch": 4.15, "grad_norm": 0.498046875, "learning_rate": 0.00047677909004740387, "loss": 0.1746, "step": 100240 }, { "epoch": 4.15, "grad_norm": 1.15625, "learning_rate": 0.00047677452530926577, "loss": 0.2117, "step": 100250 }, { "epoch": 4.15, "grad_norm": 0.80859375, "learning_rate": 0.0004767699601443626, "loss": 0.2249, "step": 100260 }, { "epoch": 4.15, "grad_norm": 0.515625, "learning_rate": 0.0004767653945527027, "loss": 0.2193, "step": 100270 }, { "epoch": 4.15, "grad_norm": 0.466796875, "learning_rate": 0.00047676082853429495, "loss": 0.217, "step": 100280 }, { "epoch": 4.15, "grad_norm": 0.361328125, "learning_rate": 0.00047675626208914775, "loss": 0.1734, "step": 100290 }, { "epoch": 4.15, "grad_norm": 0.494140625, "learning_rate": 0.00047675169521726974, "loss": 0.1899, "step": 100300 }, { "epoch": 4.15, "grad_norm": 0.51171875, "learning_rate": 0.00047674712791866955, "loss": 0.2451, "step": 100310 }, { "epoch": 4.16, "grad_norm": 0.75, "learning_rate": 0.00047674256019335573, "loss": 0.1837, "step": 100320 }, { "epoch": 4.16, "grad_norm": 0.75390625, "learning_rate": 0.00047673799204133696, "loss": 0.2066, "step": 100330 }, { "epoch": 4.16, "grad_norm": 1.328125, "learning_rate": 0.00047673342346262165, "loss": 0.2266, "step": 100340 }, { "epoch": 4.16, "grad_norm": 0.0, "learning_rate": 0.00047672885445721857, "loss": 0.2616, "step": 100350 }, { "epoch": 4.16, "grad_norm": 0.73828125, "learning_rate": 0.00047672428502513634, "loss": 0.1876, "step": 100360 }, { "epoch": 4.16, "grad_norm": 0.52734375, "learning_rate": 0.0004767197151663835, "loss": 0.1781, "step": 100370 }, { "epoch": 4.16, "grad_norm": 1.2734375, "learning_rate": 0.00047671514488096856, "loss": 0.1697, "step": 100380 }, { "epoch": 4.16, "grad_norm": 0.474609375, "learning_rate": 0.0004767105741689002, "loss": 0.2026, "step": 100390 }, { "epoch": 4.16, "grad_norm": 0.98046875, "learning_rate": 0.0004767060030301871, "loss": 0.2179, "step": 100400 }, { "epoch": 4.16, "grad_norm": 0.45703125, "learning_rate": 0.0004767014314648377, "loss": 0.1945, "step": 100410 }, { "epoch": 4.16, "grad_norm": 0.390625, "learning_rate": 0.0004766968594728607, "loss": 0.1963, "step": 100420 }, { "epoch": 4.16, "grad_norm": 0.28515625, "learning_rate": 0.0004766922870542647, "loss": 0.1758, "step": 100430 }, { "epoch": 4.16, "grad_norm": 0.53125, "learning_rate": 0.0004766877142090583, "loss": 0.1878, "step": 100440 }, { "epoch": 4.16, "grad_norm": 0.765625, "learning_rate": 0.0004766831409372502, "loss": 0.2117, "step": 100450 }, { "epoch": 4.16, "grad_norm": 2.96875, "learning_rate": 0.00047667856723884874, "loss": 0.2061, "step": 100460 }, { "epoch": 4.16, "grad_norm": 0.51953125, "learning_rate": 0.0004766739931138628, "loss": 0.2237, "step": 100470 }, { "epoch": 4.16, "grad_norm": 0.361328125, "learning_rate": 0.00047666941856230085, "loss": 0.2457, "step": 100480 }, { "epoch": 4.16, "grad_norm": 0.96875, "learning_rate": 0.00047666484358417155, "loss": 0.2149, "step": 100490 }, { "epoch": 4.16, "grad_norm": 0.81640625, "learning_rate": 0.0004766602681794835, "loss": 0.2365, "step": 100500 }, { "epoch": 4.16, "grad_norm": 1.0859375, "learning_rate": 0.00047665569234824523, "loss": 0.2322, "step": 100510 }, { "epoch": 4.16, "grad_norm": 0.455078125, "learning_rate": 0.0004766511160904655, "loss": 0.1812, "step": 100520 }, { "epoch": 4.16, "grad_norm": 0.55859375, "learning_rate": 0.00047664653940615283, "loss": 0.2092, "step": 100530 }, { "epoch": 4.16, "grad_norm": 0.515625, "learning_rate": 0.0004766419622953159, "loss": 0.2336, "step": 100540 }, { "epoch": 4.16, "grad_norm": 0.87109375, "learning_rate": 0.00047663738475796314, "loss": 0.2457, "step": 100550 }, { "epoch": 4.17, "grad_norm": 0.2138671875, "learning_rate": 0.00047663280679410337, "loss": 0.1825, "step": 100560 }, { "epoch": 4.17, "grad_norm": 1.0703125, "learning_rate": 0.0004766282284037452, "loss": 0.2051, "step": 100570 }, { "epoch": 4.17, "grad_norm": 0.79296875, "learning_rate": 0.0004766236495868971, "loss": 0.2273, "step": 100580 }, { "epoch": 4.17, "grad_norm": 0.1806640625, "learning_rate": 0.0004766190703435678, "loss": 0.2022, "step": 100590 }, { "epoch": 4.17, "grad_norm": 0.85546875, "learning_rate": 0.0004766144906737658, "loss": 0.1832, "step": 100600 }, { "epoch": 4.17, "grad_norm": 0.322265625, "learning_rate": 0.00047660991057749987, "loss": 0.2026, "step": 100610 }, { "epoch": 4.17, "grad_norm": 0.796875, "learning_rate": 0.0004766053300547786, "loss": 0.2452, "step": 100620 }, { "epoch": 4.17, "grad_norm": 0.5546875, "learning_rate": 0.0004766007491056105, "loss": 0.1704, "step": 100630 }, { "epoch": 4.17, "grad_norm": 0.73828125, "learning_rate": 0.00047659616773000423, "loss": 0.2128, "step": 100640 }, { "epoch": 4.17, "grad_norm": 0.78125, "learning_rate": 0.00047659158592796853, "loss": 0.2242, "step": 100650 }, { "epoch": 4.17, "grad_norm": 1.2578125, "learning_rate": 0.00047658700369951194, "loss": 0.2485, "step": 100660 }, { "epoch": 4.17, "grad_norm": 2.3125, "learning_rate": 0.000476582421044643, "loss": 0.1861, "step": 100670 }, { "epoch": 4.17, "grad_norm": 1.3359375, "learning_rate": 0.0004765778379633704, "loss": 0.1571, "step": 100680 }, { "epoch": 4.17, "grad_norm": 0.77734375, "learning_rate": 0.00047657325445570287, "loss": 0.1607, "step": 100690 }, { "epoch": 4.17, "grad_norm": 1.4453125, "learning_rate": 0.00047656867052164886, "loss": 0.1855, "step": 100700 }, { "epoch": 4.17, "grad_norm": 0.76171875, "learning_rate": 0.00047656408616121717, "loss": 0.1897, "step": 100710 }, { "epoch": 4.17, "grad_norm": 1.046875, "learning_rate": 0.00047655950137441626, "loss": 0.2328, "step": 100720 }, { "epoch": 4.17, "grad_norm": 0.69140625, "learning_rate": 0.0004765549161612549, "loss": 0.242, "step": 100730 }, { "epoch": 4.17, "grad_norm": 0.71875, "learning_rate": 0.00047655033052174155, "loss": 0.1689, "step": 100740 }, { "epoch": 4.17, "grad_norm": 0.65234375, "learning_rate": 0.00047654574445588505, "loss": 0.1744, "step": 100750 }, { "epoch": 4.17, "grad_norm": 0.71484375, "learning_rate": 0.0004765411579636939, "loss": 0.1991, "step": 100760 }, { "epoch": 4.17, "grad_norm": 0.470703125, "learning_rate": 0.00047653657104517665, "loss": 0.2136, "step": 100770 }, { "epoch": 4.17, "grad_norm": 0.40234375, "learning_rate": 0.0004765319837003422, "loss": 0.2195, "step": 100780 }, { "epoch": 4.17, "grad_norm": 1.0703125, "learning_rate": 0.0004765273959291989, "loss": 0.2236, "step": 100790 }, { "epoch": 4.18, "grad_norm": 1.1015625, "learning_rate": 0.0004765228077317556, "loss": 0.2047, "step": 100800 }, { "epoch": 4.18, "grad_norm": 0.2490234375, "learning_rate": 0.00047651821910802073, "loss": 0.1997, "step": 100810 }, { "epoch": 4.18, "grad_norm": 0.76171875, "learning_rate": 0.0004765136300580031, "loss": 0.2093, "step": 100820 }, { "epoch": 4.18, "grad_norm": 0.6328125, "learning_rate": 0.0004765090405817113, "loss": 0.2265, "step": 100830 }, { "epoch": 4.18, "grad_norm": 0.66015625, "learning_rate": 0.0004765044506791539, "loss": 0.2337, "step": 100840 }, { "epoch": 4.18, "grad_norm": 0.55859375, "learning_rate": 0.0004764998603503396, "loss": 0.2214, "step": 100850 }, { "epoch": 4.18, "grad_norm": 0.5703125, "learning_rate": 0.000476495269595277, "loss": 0.2135, "step": 100860 }, { "epoch": 4.18, "grad_norm": 0.76171875, "learning_rate": 0.00047649067841397483, "loss": 0.2207, "step": 100870 }, { "epoch": 4.18, "grad_norm": 1.5859375, "learning_rate": 0.00047648608680644157, "loss": 0.2299, "step": 100880 }, { "epoch": 4.18, "grad_norm": 0.6796875, "learning_rate": 0.000476481494772686, "loss": 0.2143, "step": 100890 }, { "epoch": 4.18, "grad_norm": 0.52734375, "learning_rate": 0.0004764769023127167, "loss": 0.2232, "step": 100900 }, { "epoch": 4.18, "grad_norm": 1.078125, "learning_rate": 0.0004764723094265424, "loss": 0.2781, "step": 100910 }, { "epoch": 4.18, "grad_norm": 0.2412109375, "learning_rate": 0.0004764677161141716, "loss": 0.2054, "step": 100920 }, { "epoch": 4.18, "grad_norm": 1.1328125, "learning_rate": 0.000476463122375613, "loss": 0.2636, "step": 100930 }, { "epoch": 4.18, "grad_norm": 0.6015625, "learning_rate": 0.00047645852821087535, "loss": 0.2493, "step": 100940 }, { "epoch": 4.18, "grad_norm": 0.671875, "learning_rate": 0.0004764539336199671, "loss": 0.2273, "step": 100950 }, { "epoch": 4.18, "grad_norm": 1.359375, "learning_rate": 0.00047644933860289707, "loss": 0.1895, "step": 100960 }, { "epoch": 4.18, "grad_norm": 0.76953125, "learning_rate": 0.0004764447431596738, "loss": 0.2148, "step": 100970 }, { "epoch": 4.18, "grad_norm": 1.328125, "learning_rate": 0.00047644014729030594, "loss": 0.1735, "step": 100980 }, { "epoch": 4.18, "grad_norm": 0.6171875, "learning_rate": 0.00047643555099480224, "loss": 0.1748, "step": 100990 }, { "epoch": 4.18, "grad_norm": 0.98046875, "learning_rate": 0.0004764309542731713, "loss": 0.1647, "step": 101000 }, { "epoch": 4.18, "grad_norm": 1.203125, "learning_rate": 0.0004764263571254217, "loss": 0.1791, "step": 101010 }, { "epoch": 4.18, "grad_norm": 1.84375, "learning_rate": 0.0004764217595515622, "loss": 0.2018, "step": 101020 }, { "epoch": 4.18, "grad_norm": 0.6796875, "learning_rate": 0.0004764171615516014, "loss": 0.2031, "step": 101030 }, { "epoch": 4.19, "grad_norm": 2.28125, "learning_rate": 0.00047641256312554793, "loss": 0.2054, "step": 101040 }, { "epoch": 4.19, "grad_norm": 1.3828125, "learning_rate": 0.00047640796427341045, "loss": 0.1998, "step": 101050 }, { "epoch": 4.19, "grad_norm": 0.86328125, "learning_rate": 0.00047640336499519767, "loss": 0.2065, "step": 101060 }, { "epoch": 4.19, "grad_norm": 0.90234375, "learning_rate": 0.00047639876529091815, "loss": 0.204, "step": 101070 }, { "epoch": 4.19, "grad_norm": 0.62109375, "learning_rate": 0.0004763941651605806, "loss": 0.1963, "step": 101080 }, { "epoch": 4.19, "grad_norm": 0.74609375, "learning_rate": 0.0004763895646041937, "loss": 0.1978, "step": 101090 }, { "epoch": 4.19, "grad_norm": 0.400390625, "learning_rate": 0.00047638496362176615, "loss": 0.2132, "step": 101100 }, { "epoch": 4.19, "grad_norm": 0.73828125, "learning_rate": 0.0004763803622133065, "loss": 0.2475, "step": 101110 }, { "epoch": 4.19, "grad_norm": 1.1796875, "learning_rate": 0.00047637576037882346, "loss": 0.1804, "step": 101120 }, { "epoch": 4.19, "grad_norm": 0.5859375, "learning_rate": 0.0004763711581183257, "loss": 0.2459, "step": 101130 }, { "epoch": 4.19, "grad_norm": 0.56640625, "learning_rate": 0.00047636655543182183, "loss": 0.238, "step": 101140 }, { "epoch": 4.19, "grad_norm": 0.53515625, "learning_rate": 0.00047636195231932053, "loss": 0.1858, "step": 101150 }, { "epoch": 4.19, "grad_norm": 0.59375, "learning_rate": 0.0004763573487808305, "loss": 0.2329, "step": 101160 }, { "epoch": 4.19, "grad_norm": 0.78515625, "learning_rate": 0.0004763527448163604, "loss": 0.2178, "step": 101170 }, { "epoch": 4.19, "grad_norm": 0.76171875, "learning_rate": 0.0004763481404259189, "loss": 0.2373, "step": 101180 }, { "epoch": 4.19, "grad_norm": 0.859375, "learning_rate": 0.0004763435356095146, "loss": 0.2139, "step": 101190 }, { "epoch": 4.19, "grad_norm": 0.6953125, "learning_rate": 0.00047633893036715623, "loss": 0.2269, "step": 101200 }, { "epoch": 4.19, "grad_norm": 3.609375, "learning_rate": 0.00047633432469885246, "loss": 0.188, "step": 101210 }, { "epoch": 4.19, "grad_norm": 0.828125, "learning_rate": 0.0004763297186046119, "loss": 0.1781, "step": 101220 }, { "epoch": 4.19, "grad_norm": 0.53125, "learning_rate": 0.0004763251120844433, "loss": 0.2369, "step": 101230 }, { "epoch": 4.19, "grad_norm": 0.640625, "learning_rate": 0.00047632050513835516, "loss": 0.2007, "step": 101240 }, { "epoch": 4.19, "grad_norm": 1.015625, "learning_rate": 0.0004763158977663564, "loss": 0.2405, "step": 101250 }, { "epoch": 4.19, "grad_norm": 0.1455078125, "learning_rate": 0.00047631128996845554, "loss": 0.1975, "step": 101260 }, { "epoch": 4.19, "grad_norm": 1.1953125, "learning_rate": 0.0004763066817446612, "loss": 0.2078, "step": 101270 }, { "epoch": 4.2, "grad_norm": 0.828125, "learning_rate": 0.00047630207309498217, "loss": 0.1325, "step": 101280 }, { "epoch": 4.2, "grad_norm": 0.50390625, "learning_rate": 0.0004762974640194271, "loss": 0.2187, "step": 101290 }, { "epoch": 4.2, "grad_norm": 0.70703125, "learning_rate": 0.0004762928545180046, "loss": 0.2215, "step": 101300 }, { "epoch": 4.2, "grad_norm": 0.59765625, "learning_rate": 0.0004762882445907234, "loss": 0.2006, "step": 101310 }, { "epoch": 4.2, "grad_norm": 1.53125, "learning_rate": 0.0004762836342375921, "loss": 0.1845, "step": 101320 }, { "epoch": 4.2, "grad_norm": 0.34765625, "learning_rate": 0.0004762790234586195, "loss": 0.2021, "step": 101330 }, { "epoch": 4.2, "grad_norm": 0.5234375, "learning_rate": 0.00047627441225381426, "loss": 0.2015, "step": 101340 }, { "epoch": 4.2, "grad_norm": 1.4375, "learning_rate": 0.0004762698006231849, "loss": 0.1946, "step": 101350 }, { "epoch": 4.2, "grad_norm": 0.74609375, "learning_rate": 0.00047626518856674026, "loss": 0.203, "step": 101360 }, { "epoch": 4.2, "grad_norm": 0.43359375, "learning_rate": 0.00047626057608448903, "loss": 0.1718, "step": 101370 }, { "epoch": 4.2, "grad_norm": 0.59375, "learning_rate": 0.0004762559631764398, "loss": 0.2212, "step": 101380 }, { "epoch": 4.2, "grad_norm": 0.99609375, "learning_rate": 0.0004762513498426012, "loss": 0.1894, "step": 101390 }, { "epoch": 4.2, "grad_norm": 0.875, "learning_rate": 0.0004762467360829821, "loss": 0.1702, "step": 101400 }, { "epoch": 4.2, "grad_norm": 1.4453125, "learning_rate": 0.00047624212189759096, "loss": 0.2324, "step": 101410 }, { "epoch": 4.2, "grad_norm": 0.427734375, "learning_rate": 0.00047623750728643667, "loss": 0.1478, "step": 101420 }, { "epoch": 4.2, "grad_norm": 0.76953125, "learning_rate": 0.00047623289224952783, "loss": 0.2033, "step": 101430 }, { "epoch": 4.2, "grad_norm": 0.166015625, "learning_rate": 0.0004762282767868731, "loss": 0.2358, "step": 101440 }, { "epoch": 4.2, "grad_norm": 0.326171875, "learning_rate": 0.00047622366089848114, "loss": 0.2326, "step": 101450 }, { "epoch": 4.2, "grad_norm": 0.53515625, "learning_rate": 0.00047621904458436073, "loss": 0.2342, "step": 101460 }, { "epoch": 4.2, "grad_norm": 0.4375, "learning_rate": 0.00047621442784452053, "loss": 0.1813, "step": 101470 }, { "epoch": 4.2, "grad_norm": 0.8515625, "learning_rate": 0.0004762098106789691, "loss": 0.2146, "step": 101480 }, { "epoch": 4.2, "grad_norm": 0.81640625, "learning_rate": 0.0004762051930877153, "loss": 0.205, "step": 101490 }, { "epoch": 4.2, "grad_norm": 0.55078125, "learning_rate": 0.0004762005750707677, "loss": 0.2142, "step": 101500 }, { "epoch": 4.2, "grad_norm": 0.796875, "learning_rate": 0.0004761959566281352, "loss": 0.1688, "step": 101510 }, { "epoch": 4.2, "grad_norm": 1.1640625, "learning_rate": 0.00047619133775982623, "loss": 0.1948, "step": 101520 }, { "epoch": 4.21, "grad_norm": 1.21875, "learning_rate": 0.0004761867184658496, "loss": 0.2292, "step": 101530 }, { "epoch": 4.21, "grad_norm": 0.5859375, "learning_rate": 0.00047618209874621397, "loss": 0.2271, "step": 101540 }, { "epoch": 4.21, "grad_norm": 1.0234375, "learning_rate": 0.0004761774786009281, "loss": 0.2179, "step": 101550 }, { "epoch": 4.21, "grad_norm": 1.1015625, "learning_rate": 0.0004761728580300006, "loss": 0.2363, "step": 101560 }, { "epoch": 4.21, "grad_norm": 1.1484375, "learning_rate": 0.0004761682370334402, "loss": 0.2191, "step": 101570 }, { "epoch": 4.21, "grad_norm": 0.83984375, "learning_rate": 0.00047616361561125564, "loss": 0.2696, "step": 101580 }, { "epoch": 4.21, "grad_norm": 0.60546875, "learning_rate": 0.00047615899376345555, "loss": 0.2032, "step": 101590 }, { "epoch": 4.21, "grad_norm": 0.87109375, "learning_rate": 0.0004761543714900487, "loss": 0.2165, "step": 101600 }, { "epoch": 4.21, "grad_norm": 0.734375, "learning_rate": 0.0004761497487910437, "loss": 0.2025, "step": 101610 }, { "epoch": 4.21, "grad_norm": 1.75, "learning_rate": 0.0004761451256664493, "loss": 0.2254, "step": 101620 }, { "epoch": 4.21, "grad_norm": 0.6015625, "learning_rate": 0.0004761405021162742, "loss": 0.249, "step": 101630 }, { "epoch": 4.21, "grad_norm": 0.45703125, "learning_rate": 0.00047613587814052707, "loss": 0.2421, "step": 101640 }, { "epoch": 4.21, "grad_norm": 0.306640625, "learning_rate": 0.00047613125373921673, "loss": 0.1916, "step": 101650 }, { "epoch": 4.21, "grad_norm": 0.76953125, "learning_rate": 0.0004761266289123517, "loss": 0.2566, "step": 101660 }, { "epoch": 4.21, "grad_norm": 1.0703125, "learning_rate": 0.0004761220036599408, "loss": 0.1973, "step": 101670 }, { "epoch": 4.21, "grad_norm": 0.765625, "learning_rate": 0.00047611737798199273, "loss": 0.2222, "step": 101680 }, { "epoch": 4.21, "grad_norm": 0.7890625, "learning_rate": 0.0004761127518785161, "loss": 0.2419, "step": 101690 }, { "epoch": 4.21, "grad_norm": 0.640625, "learning_rate": 0.00047610812534951976, "loss": 0.2222, "step": 101700 }, { "epoch": 4.21, "grad_norm": 0.90625, "learning_rate": 0.00047610349839501235, "loss": 0.1908, "step": 101710 }, { "epoch": 4.21, "grad_norm": 0.63671875, "learning_rate": 0.0004760988710150025, "loss": 0.2418, "step": 101720 }, { "epoch": 4.21, "grad_norm": 0.69921875, "learning_rate": 0.0004760942432094991, "loss": 0.1588, "step": 101730 }, { "epoch": 4.21, "grad_norm": 0.486328125, "learning_rate": 0.0004760896149785107, "loss": 0.1875, "step": 101740 }, { "epoch": 4.21, "grad_norm": 0.94921875, "learning_rate": 0.000476084986322046, "loss": 0.1959, "step": 101750 }, { "epoch": 4.21, "grad_norm": 0.490234375, "learning_rate": 0.00047608035724011383, "loss": 0.2313, "step": 101760 }, { "epoch": 4.22, "grad_norm": 0.90234375, "learning_rate": 0.00047607572773272284, "loss": 0.1891, "step": 101770 }, { "epoch": 4.22, "grad_norm": 1.015625, "learning_rate": 0.0004760710977998818, "loss": 0.2037, "step": 101780 }, { "epoch": 4.22, "grad_norm": 0.4296875, "learning_rate": 0.0004760664674415992, "loss": 0.2503, "step": 101790 }, { "epoch": 4.22, "grad_norm": 0.53515625, "learning_rate": 0.00047606183665788404, "loss": 0.217, "step": 101800 }, { "epoch": 4.22, "grad_norm": 0.66015625, "learning_rate": 0.000476057205448745, "loss": 0.1631, "step": 101810 }, { "epoch": 4.22, "grad_norm": 0.60546875, "learning_rate": 0.00047605257381419053, "loss": 0.2243, "step": 101820 }, { "epoch": 4.22, "grad_norm": 0.93359375, "learning_rate": 0.0004760479417542296, "loss": 0.2798, "step": 101830 }, { "epoch": 4.22, "grad_norm": 0.91015625, "learning_rate": 0.0004760433092688709, "loss": 0.2152, "step": 101840 }, { "epoch": 4.22, "grad_norm": 0.90625, "learning_rate": 0.00047603867635812307, "loss": 0.1739, "step": 101850 }, { "epoch": 4.22, "grad_norm": 1.0859375, "learning_rate": 0.0004760340430219949, "loss": 0.2143, "step": 101860 }, { "epoch": 4.22, "grad_norm": 0.337890625, "learning_rate": 0.000476029409260495, "loss": 0.1476, "step": 101870 }, { "epoch": 4.22, "grad_norm": 0.90234375, "learning_rate": 0.0004760247750736322, "loss": 0.2044, "step": 101880 }, { "epoch": 4.22, "grad_norm": 0.267578125, "learning_rate": 0.0004760201404614152, "loss": 0.2041, "step": 101890 }, { "epoch": 4.22, "grad_norm": 0.75390625, "learning_rate": 0.00047601550542385266, "loss": 0.1983, "step": 101900 }, { "epoch": 4.22, "grad_norm": 0.97265625, "learning_rate": 0.00047601086996095334, "loss": 0.2209, "step": 101910 }, { "epoch": 4.22, "grad_norm": 0.6484375, "learning_rate": 0.00047600623407272606, "loss": 0.1989, "step": 101920 }, { "epoch": 4.22, "grad_norm": 0.578125, "learning_rate": 0.0004760015977591793, "loss": 0.1865, "step": 101930 }, { "epoch": 4.22, "grad_norm": 0.2255859375, "learning_rate": 0.0004759969610203221, "loss": 0.2455, "step": 101940 }, { "epoch": 4.22, "grad_norm": 0.8984375, "learning_rate": 0.0004759923238561629, "loss": 0.2188, "step": 101950 }, { "epoch": 4.22, "grad_norm": 1.1484375, "learning_rate": 0.00047598768626671065, "loss": 0.2501, "step": 101960 }, { "epoch": 4.22, "grad_norm": 0.68359375, "learning_rate": 0.0004759830482519739, "loss": 0.2726, "step": 101970 }, { "epoch": 4.22, "grad_norm": 0.5703125, "learning_rate": 0.0004759784098119615, "loss": 0.1714, "step": 101980 }, { "epoch": 4.22, "grad_norm": 0.2119140625, "learning_rate": 0.0004759737709466822, "loss": 0.2031, "step": 101990 }, { "epoch": 4.22, "grad_norm": 0.3125, "learning_rate": 0.0004759691316561445, "loss": 0.1565, "step": 102000 }, { "epoch": 4.23, "grad_norm": 0.6171875, "learning_rate": 0.0004759644919403574, "loss": 0.1759, "step": 102010 }, { "epoch": 4.23, "grad_norm": 0.50390625, "learning_rate": 0.00047595985179932956, "loss": 0.178, "step": 102020 }, { "epoch": 4.23, "grad_norm": 0.6484375, "learning_rate": 0.00047595521123306963, "loss": 0.2102, "step": 102030 }, { "epoch": 4.23, "grad_norm": 0.8359375, "learning_rate": 0.00047595057024158637, "loss": 0.2199, "step": 102040 }, { "epoch": 4.23, "grad_norm": 0.7734375, "learning_rate": 0.00047594592882488855, "loss": 0.2437, "step": 102050 }, { "epoch": 4.23, "grad_norm": 0.34765625, "learning_rate": 0.0004759412869829849, "loss": 0.2094, "step": 102060 }, { "epoch": 4.23, "grad_norm": 0.443359375, "learning_rate": 0.00047593664471588424, "loss": 0.2344, "step": 102070 }, { "epoch": 4.23, "grad_norm": 0.9453125, "learning_rate": 0.0004759320020235951, "loss": 0.2154, "step": 102080 }, { "epoch": 4.23, "grad_norm": 0.82421875, "learning_rate": 0.00047592735890612635, "loss": 0.1842, "step": 102090 }, { "epoch": 4.23, "grad_norm": 0.92578125, "learning_rate": 0.00047592271536348675, "loss": 0.1807, "step": 102100 }, { "epoch": 4.23, "grad_norm": 0.53125, "learning_rate": 0.00047591807139568497, "loss": 0.1932, "step": 102110 }, { "epoch": 4.23, "grad_norm": 0.55078125, "learning_rate": 0.0004759134270027298, "loss": 0.2624, "step": 102120 }, { "epoch": 4.23, "grad_norm": 0.51171875, "learning_rate": 0.0004759087821846299, "loss": 0.2219, "step": 102130 }, { "epoch": 4.23, "grad_norm": 0.51953125, "learning_rate": 0.0004759041369413941, "loss": 0.2449, "step": 102140 }, { "epoch": 4.23, "grad_norm": 0.5234375, "learning_rate": 0.0004758994912730311, "loss": 0.182, "step": 102150 }, { "epoch": 4.23, "grad_norm": 0.546875, "learning_rate": 0.0004758948451795497, "loss": 0.2197, "step": 102160 }, { "epoch": 4.23, "grad_norm": 0.4140625, "learning_rate": 0.00047589019866095853, "loss": 0.1975, "step": 102170 }, { "epoch": 4.23, "grad_norm": 0.486328125, "learning_rate": 0.00047588555171726644, "loss": 0.2442, "step": 102180 }, { "epoch": 4.23, "grad_norm": 1.3203125, "learning_rate": 0.0004758809043484821, "loss": 0.2844, "step": 102190 }, { "epoch": 4.23, "grad_norm": 0.66015625, "learning_rate": 0.0004758762565546143, "loss": 0.2137, "step": 102200 }, { "epoch": 4.23, "grad_norm": 0.322265625, "learning_rate": 0.00047587160833567177, "loss": 0.2235, "step": 102210 }, { "epoch": 4.23, "grad_norm": 0.51171875, "learning_rate": 0.0004758669596916634, "loss": 0.2376, "step": 102220 }, { "epoch": 4.23, "grad_norm": 0.486328125, "learning_rate": 0.00047586231062259765, "loss": 0.2003, "step": 102230 }, { "epoch": 4.23, "grad_norm": 0.404296875, "learning_rate": 0.00047585766112848344, "loss": 0.1965, "step": 102240 }, { "epoch": 4.24, "grad_norm": 0.8828125, "learning_rate": 0.00047585301120932957, "loss": 0.2144, "step": 102250 }, { "epoch": 4.24, "grad_norm": 0.9453125, "learning_rate": 0.0004758483608651446, "loss": 0.2123, "step": 102260 }, { "epoch": 4.24, "grad_norm": 0.2353515625, "learning_rate": 0.0004758437100959375, "loss": 0.2106, "step": 102270 }, { "epoch": 4.24, "grad_norm": 1.34375, "learning_rate": 0.000475839058901717, "loss": 0.2223, "step": 102280 }, { "epoch": 4.24, "grad_norm": 0.54296875, "learning_rate": 0.0004758344072824916, "loss": 0.2163, "step": 102290 }, { "epoch": 4.24, "grad_norm": 1.265625, "learning_rate": 0.00047582975523827035, "loss": 0.1628, "step": 102300 }, { "epoch": 4.24, "grad_norm": 1.2109375, "learning_rate": 0.0004758251027690619, "loss": 0.2257, "step": 102310 }, { "epoch": 4.24, "grad_norm": 0.59375, "learning_rate": 0.00047582044987487494, "loss": 0.2391, "step": 102320 }, { "epoch": 4.24, "grad_norm": 0.484375, "learning_rate": 0.0004758157965557183, "loss": 0.1954, "step": 102330 }, { "epoch": 4.24, "grad_norm": 0.443359375, "learning_rate": 0.00047581114281160063, "loss": 0.1575, "step": 102340 }, { "epoch": 4.24, "grad_norm": 0.94140625, "learning_rate": 0.0004758064886425309, "loss": 0.1835, "step": 102350 }, { "epoch": 4.24, "grad_norm": 0.87890625, "learning_rate": 0.00047580183404851773, "loss": 0.2063, "step": 102360 }, { "epoch": 4.24, "grad_norm": 0.59375, "learning_rate": 0.0004757971790295699, "loss": 0.2265, "step": 102370 }, { "epoch": 4.24, "grad_norm": 1.3046875, "learning_rate": 0.000475792523585696, "loss": 0.1896, "step": 102380 }, { "epoch": 4.24, "grad_norm": 0.345703125, "learning_rate": 0.0004757878677169052, "loss": 0.1877, "step": 102390 }, { "epoch": 4.24, "grad_norm": 0.70703125, "learning_rate": 0.00047578321142320584, "loss": 0.1884, "step": 102400 }, { "epoch": 4.24, "grad_norm": 0.28125, "learning_rate": 0.0004757785547046069, "loss": 0.1709, "step": 102410 }, { "epoch": 4.24, "grad_norm": 1.0625, "learning_rate": 0.0004757738975611171, "loss": 0.1752, "step": 102420 }, { "epoch": 4.24, "grad_norm": 0.5703125, "learning_rate": 0.00047576923999274526, "loss": 0.2032, "step": 102430 }, { "epoch": 4.24, "grad_norm": 0.69140625, "learning_rate": 0.0004757645819995, "loss": 0.212, "step": 102440 }, { "epoch": 4.24, "grad_norm": 0.890625, "learning_rate": 0.0004757599235813903, "loss": 0.2625, "step": 102450 }, { "epoch": 4.24, "grad_norm": 1.03125, "learning_rate": 0.00047575526473842466, "loss": 0.236, "step": 102460 }, { "epoch": 4.24, "grad_norm": 0.609375, "learning_rate": 0.0004757506054706121, "loss": 0.1896, "step": 102470 }, { "epoch": 4.24, "grad_norm": 0.419921875, "learning_rate": 0.0004757459457779613, "loss": 0.2485, "step": 102480 }, { "epoch": 4.25, "grad_norm": 0.4921875, "learning_rate": 0.0004757412856604809, "loss": 0.1597, "step": 102490 }, { "epoch": 4.25, "grad_norm": 0.30859375, "learning_rate": 0.0004757366251181798, "loss": 0.188, "step": 102500 }, { "epoch": 4.25, "grad_norm": 0.51953125, "learning_rate": 0.00047573196415106684, "loss": 0.1422, "step": 102510 }, { "epoch": 4.25, "grad_norm": 0.7734375, "learning_rate": 0.00047572730275915066, "loss": 0.2401, "step": 102520 }, { "epoch": 4.25, "grad_norm": 0.66015625, "learning_rate": 0.00047572264094244, "loss": 0.1946, "step": 102530 }, { "epoch": 4.25, "grad_norm": 0.7578125, "learning_rate": 0.0004757179787009438, "loss": 0.159, "step": 102540 }, { "epoch": 4.25, "grad_norm": 1.1953125, "learning_rate": 0.0004757133160346707, "loss": 0.2301, "step": 102550 }, { "epoch": 4.25, "grad_norm": 0.61328125, "learning_rate": 0.00047570865294362954, "loss": 0.2527, "step": 102560 }, { "epoch": 4.25, "grad_norm": 0.7109375, "learning_rate": 0.000475703989427829, "loss": 0.2863, "step": 102570 }, { "epoch": 4.25, "grad_norm": 0.5625, "learning_rate": 0.000475699325487278, "loss": 0.2497, "step": 102580 }, { "epoch": 4.25, "grad_norm": 0.380859375, "learning_rate": 0.00047569466112198515, "loss": 0.2275, "step": 102590 }, { "epoch": 4.25, "grad_norm": 1.2265625, "learning_rate": 0.00047568999633195943, "loss": 0.2271, "step": 102600 }, { "epoch": 4.25, "grad_norm": 0.9609375, "learning_rate": 0.00047568533111720944, "loss": 0.254, "step": 102610 }, { "epoch": 4.25, "grad_norm": 0.275390625, "learning_rate": 0.00047568066547774406, "loss": 0.2085, "step": 102620 }, { "epoch": 4.25, "grad_norm": 0.74609375, "learning_rate": 0.00047567599941357203, "loss": 0.2276, "step": 102630 }, { "epoch": 4.25, "grad_norm": 0.94140625, "learning_rate": 0.0004756713329247021, "loss": 0.2037, "step": 102640 }, { "epoch": 4.25, "grad_norm": 0.7265625, "learning_rate": 0.0004756666660111432, "loss": 0.2044, "step": 102650 }, { "epoch": 4.25, "grad_norm": 1.1484375, "learning_rate": 0.0004756619986729039, "loss": 0.2574, "step": 102660 }, { "epoch": 4.25, "grad_norm": 0.75, "learning_rate": 0.00047565733090999314, "loss": 0.1947, "step": 102670 }, { "epoch": 4.25, "grad_norm": 2.296875, "learning_rate": 0.00047565266272241966, "loss": 0.2164, "step": 102680 }, { "epoch": 4.25, "grad_norm": 0.9453125, "learning_rate": 0.0004756479941101922, "loss": 0.2502, "step": 102690 }, { "epoch": 4.25, "grad_norm": 0.55078125, "learning_rate": 0.0004756433250733196, "loss": 0.2165, "step": 102700 }, { "epoch": 4.25, "grad_norm": 0.984375, "learning_rate": 0.0004756386556118106, "loss": 0.1829, "step": 102710 }, { "epoch": 4.25, "grad_norm": 0.84765625, "learning_rate": 0.0004756339857256741, "loss": 0.2738, "step": 102720 }, { "epoch": 4.26, "grad_norm": 0.6015625, "learning_rate": 0.0004756293154149187, "loss": 0.1844, "step": 102730 }, { "epoch": 4.26, "grad_norm": 0.83203125, "learning_rate": 0.00047562464467955335, "loss": 0.2229, "step": 102740 }, { "epoch": 4.26, "grad_norm": 1.09375, "learning_rate": 0.00047561997351958676, "loss": 0.1745, "step": 102750 }, { "epoch": 4.26, "grad_norm": 0.671875, "learning_rate": 0.0004756153019350278, "loss": 0.2415, "step": 102760 }, { "epoch": 4.26, "grad_norm": 0.66796875, "learning_rate": 0.0004756106299258851, "loss": 0.2595, "step": 102770 }, { "epoch": 4.26, "grad_norm": 0.126953125, "learning_rate": 0.0004756059574921677, "loss": 0.1906, "step": 102780 }, { "epoch": 4.26, "grad_norm": 0.56640625, "learning_rate": 0.00047560128463388415, "loss": 0.191, "step": 102790 }, { "epoch": 4.26, "grad_norm": 0.5625, "learning_rate": 0.00047559661135104336, "loss": 0.2405, "step": 102800 }, { "epoch": 4.26, "grad_norm": 0.8359375, "learning_rate": 0.00047559193764365416, "loss": 0.272, "step": 102810 }, { "epoch": 4.26, "grad_norm": 0.953125, "learning_rate": 0.0004755872635117252, "loss": 0.2145, "step": 102820 }, { "epoch": 4.26, "grad_norm": 0.28125, "learning_rate": 0.00047558258895526547, "loss": 0.2583, "step": 102830 }, { "epoch": 4.26, "grad_norm": 0.3359375, "learning_rate": 0.0004755779139742836, "loss": 0.1824, "step": 102840 }, { "epoch": 4.26, "grad_norm": 0.96875, "learning_rate": 0.0004755732385687885, "loss": 0.199, "step": 102850 }, { "epoch": 4.26, "grad_norm": 0.349609375, "learning_rate": 0.0004755685627387889, "loss": 0.1598, "step": 102860 }, { "epoch": 4.26, "grad_norm": 0.7265625, "learning_rate": 0.0004755638864842936, "loss": 0.2152, "step": 102870 }, { "epoch": 4.26, "grad_norm": 0.490234375, "learning_rate": 0.0004755592098053115, "loss": 0.2295, "step": 102880 }, { "epoch": 4.26, "grad_norm": 0.1337890625, "learning_rate": 0.0004755545327018512, "loss": 0.2453, "step": 102890 }, { "epoch": 4.26, "grad_norm": 0.6640625, "learning_rate": 0.00047554985517392177, "loss": 0.1519, "step": 102900 }, { "epoch": 4.26, "grad_norm": 0.45703125, "learning_rate": 0.0004755451772215318, "loss": 0.1679, "step": 102910 }, { "epoch": 4.26, "grad_norm": 1.65625, "learning_rate": 0.0004755404988446902, "loss": 0.2194, "step": 102920 }, { "epoch": 4.26, "grad_norm": 0.546875, "learning_rate": 0.00047553582004340565, "loss": 0.2258, "step": 102930 }, { "epoch": 4.26, "grad_norm": 0.6640625, "learning_rate": 0.00047553114081768717, "loss": 0.176, "step": 102940 }, { "epoch": 4.26, "grad_norm": 1.0078125, "learning_rate": 0.0004755264611675434, "loss": 0.2252, "step": 102950 }, { "epoch": 4.26, "grad_norm": 0.765625, "learning_rate": 0.0004755217810929831, "loss": 0.2784, "step": 102960 }, { "epoch": 4.27, "grad_norm": 0.7734375, "learning_rate": 0.0004755171005940152, "loss": 0.2309, "step": 102970 }, { "epoch": 4.27, "grad_norm": 1.8203125, "learning_rate": 0.00047551241967064853, "loss": 0.2029, "step": 102980 }, { "epoch": 4.27, "grad_norm": 0.625, "learning_rate": 0.00047550773832289177, "loss": 0.2865, "step": 102990 }, { "epoch": 4.27, "grad_norm": 0.98828125, "learning_rate": 0.0004755030565507539, "loss": 0.2701, "step": 103000 }, { "epoch": 4.27, "grad_norm": 0.94140625, "learning_rate": 0.0004754983743542435, "loss": 0.1983, "step": 103010 }, { "epoch": 4.27, "grad_norm": 0.48828125, "learning_rate": 0.00047549369173336954, "loss": 0.2205, "step": 103020 }, { "epoch": 4.27, "grad_norm": 0.4921875, "learning_rate": 0.0004754890086881408, "loss": 0.2251, "step": 103030 }, { "epoch": 4.27, "grad_norm": 0.51953125, "learning_rate": 0.0004754843252185662, "loss": 0.2316, "step": 103040 }, { "epoch": 4.27, "grad_norm": 0.361328125, "learning_rate": 0.00047547964132465436, "loss": 0.2308, "step": 103050 }, { "epoch": 4.27, "grad_norm": 1.1015625, "learning_rate": 0.0004754749570064142, "loss": 0.218, "step": 103060 }, { "epoch": 4.27, "grad_norm": 0.796875, "learning_rate": 0.00047547027226385455, "loss": 0.2619, "step": 103070 }, { "epoch": 4.27, "grad_norm": 1.1796875, "learning_rate": 0.00047546558709698415, "loss": 0.2122, "step": 103080 }, { "epoch": 4.27, "grad_norm": 1.140625, "learning_rate": 0.0004754609015058119, "loss": 0.2113, "step": 103090 }, { "epoch": 4.27, "grad_norm": 0.412109375, "learning_rate": 0.00047545621549034656, "loss": 0.2347, "step": 103100 }, { "epoch": 4.27, "grad_norm": 1.21875, "learning_rate": 0.00047545152905059695, "loss": 0.2078, "step": 103110 }, { "epoch": 4.27, "grad_norm": 1.5, "learning_rate": 0.0004754468421865719, "loss": 0.2262, "step": 103120 }, { "epoch": 4.27, "grad_norm": 0.9453125, "learning_rate": 0.0004754421548982803, "loss": 0.228, "step": 103130 }, { "epoch": 4.27, "grad_norm": 0.69921875, "learning_rate": 0.0004754374671857309, "loss": 0.1812, "step": 103140 }, { "epoch": 4.27, "grad_norm": 1.171875, "learning_rate": 0.00047543277904893243, "loss": 0.1583, "step": 103150 }, { "epoch": 4.27, "grad_norm": 0.1591796875, "learning_rate": 0.00047542809048789394, "loss": 0.1877, "step": 103160 }, { "epoch": 4.27, "grad_norm": 0.41796875, "learning_rate": 0.0004754234015026241, "loss": 0.185, "step": 103170 }, { "epoch": 4.27, "grad_norm": 0.66796875, "learning_rate": 0.00047541871209313177, "loss": 0.1998, "step": 103180 }, { "epoch": 4.27, "grad_norm": 0.51171875, "learning_rate": 0.0004754140222594257, "loss": 0.1987, "step": 103190 }, { "epoch": 4.27, "grad_norm": 0.88671875, "learning_rate": 0.0004754093320015148, "loss": 0.165, "step": 103200 }, { "epoch": 4.27, "grad_norm": 0.6484375, "learning_rate": 0.00047540464131940797, "loss": 0.2128, "step": 103210 }, { "epoch": 4.28, "grad_norm": 0.431640625, "learning_rate": 0.00047539995021311386, "loss": 0.2325, "step": 103220 }, { "epoch": 4.28, "grad_norm": 0.63671875, "learning_rate": 0.0004753952586826414, "loss": 0.2394, "step": 103230 }, { "epoch": 4.28, "grad_norm": 1.109375, "learning_rate": 0.0004753905667279994, "loss": 0.2569, "step": 103240 }, { "epoch": 4.28, "grad_norm": 0.671875, "learning_rate": 0.0004753858743491967, "loss": 0.1863, "step": 103250 }, { "epoch": 4.28, "grad_norm": 0.59375, "learning_rate": 0.00047538118154624216, "loss": 0.2399, "step": 103260 }, { "epoch": 4.28, "grad_norm": 0.375, "learning_rate": 0.0004753764883191445, "loss": 0.1791, "step": 103270 }, { "epoch": 4.28, "grad_norm": 0.255859375, "learning_rate": 0.0004753717946679127, "loss": 0.2132, "step": 103280 }, { "epoch": 4.28, "grad_norm": 0.62109375, "learning_rate": 0.0004753671005925555, "loss": 0.1949, "step": 103290 }, { "epoch": 4.28, "grad_norm": 0.23046875, "learning_rate": 0.00047536240609308175, "loss": 0.2222, "step": 103300 }, { "epoch": 4.28, "grad_norm": 0.337890625, "learning_rate": 0.0004753577111695003, "loss": 0.1819, "step": 103310 }, { "epoch": 4.28, "grad_norm": 0.66015625, "learning_rate": 0.00047535301582181996, "loss": 0.1941, "step": 103320 }, { "epoch": 4.28, "grad_norm": 1.0390625, "learning_rate": 0.00047534832005004956, "loss": 0.215, "step": 103330 }, { "epoch": 4.28, "grad_norm": 1.578125, "learning_rate": 0.00047534362385419793, "loss": 0.2658, "step": 103340 }, { "epoch": 4.28, "grad_norm": 0.76171875, "learning_rate": 0.000475338927234274, "loss": 0.2372, "step": 103350 }, { "epoch": 4.28, "grad_norm": 0.67578125, "learning_rate": 0.00047533423019028654, "loss": 0.251, "step": 103360 }, { "epoch": 4.28, "grad_norm": 0.875, "learning_rate": 0.0004753295327222444, "loss": 0.2191, "step": 103370 }, { "epoch": 4.28, "grad_norm": 0.58984375, "learning_rate": 0.0004753248348301564, "loss": 0.1464, "step": 103380 }, { "epoch": 4.28, "grad_norm": 0.29296875, "learning_rate": 0.0004753201365140314, "loss": 0.1775, "step": 103390 }, { "epoch": 4.28, "grad_norm": 0.80078125, "learning_rate": 0.0004753154377738782, "loss": 0.2016, "step": 103400 }, { "epoch": 4.28, "grad_norm": 0.53515625, "learning_rate": 0.0004753107386097057, "loss": 0.2666, "step": 103410 }, { "epoch": 4.28, "grad_norm": 1.1328125, "learning_rate": 0.0004753060390215227, "loss": 0.2088, "step": 103420 }, { "epoch": 4.28, "grad_norm": 0.349609375, "learning_rate": 0.0004753013390093381, "loss": 0.2652, "step": 103430 }, { "epoch": 4.28, "grad_norm": 0.73046875, "learning_rate": 0.0004752966385731607, "loss": 0.2114, "step": 103440 }, { "epoch": 4.28, "grad_norm": 0.06298828125, "learning_rate": 0.00047529193771299934, "loss": 0.2092, "step": 103450 }, { "epoch": 4.29, "grad_norm": 0.90234375, "learning_rate": 0.00047528723642886283, "loss": 0.2883, "step": 103460 }, { "epoch": 4.29, "grad_norm": 0.671875, "learning_rate": 0.00047528253472076023, "loss": 0.1615, "step": 103470 }, { "epoch": 4.29, "grad_norm": 0.232421875, "learning_rate": 0.00047527783258870005, "loss": 0.197, "step": 103480 }, { "epoch": 4.29, "grad_norm": 0.5390625, "learning_rate": 0.00047527313003269144, "loss": 0.2157, "step": 103490 }, { "epoch": 4.29, "grad_norm": 0.8125, "learning_rate": 0.0004752684270527431, "loss": 0.2789, "step": 103500 }, { "epoch": 4.29, "grad_norm": 0.59765625, "learning_rate": 0.0004752637236488639, "loss": 0.1741, "step": 103510 }, { "epoch": 4.29, "grad_norm": 0.625, "learning_rate": 0.00047525901982106266, "loss": 0.1888, "step": 103520 }, { "epoch": 4.29, "grad_norm": 0.953125, "learning_rate": 0.0004752543155693483, "loss": 0.2184, "step": 103530 }, { "epoch": 4.29, "grad_norm": 0.55078125, "learning_rate": 0.00047524961089372966, "loss": 0.1973, "step": 103540 }, { "epoch": 4.29, "grad_norm": 1.21875, "learning_rate": 0.00047524490579421556, "loss": 0.2688, "step": 103550 }, { "epoch": 4.29, "grad_norm": 0.62109375, "learning_rate": 0.0004752402002708149, "loss": 0.2479, "step": 103560 }, { "epoch": 4.29, "grad_norm": 0.37890625, "learning_rate": 0.00047523549432353644, "loss": 0.2482, "step": 103570 }, { "epoch": 4.29, "grad_norm": 0.470703125, "learning_rate": 0.00047523078795238914, "loss": 0.2192, "step": 103580 }, { "epoch": 4.29, "grad_norm": 0.60546875, "learning_rate": 0.00047522608115738185, "loss": 0.2379, "step": 103590 }, { "epoch": 4.29, "grad_norm": 0.63671875, "learning_rate": 0.0004752213739385234, "loss": 0.2558, "step": 103600 }, { "epoch": 4.29, "grad_norm": 0.6328125, "learning_rate": 0.0004752166662958226, "loss": 0.2558, "step": 103610 }, { "epoch": 4.29, "grad_norm": 0.85546875, "learning_rate": 0.00047521195822928836, "loss": 0.1942, "step": 103620 }, { "epoch": 4.29, "grad_norm": 1.1015625, "learning_rate": 0.0004752072497389295, "loss": 0.241, "step": 103630 }, { "epoch": 4.29, "grad_norm": 0.9765625, "learning_rate": 0.000475202540824755, "loss": 0.2407, "step": 103640 }, { "epoch": 4.29, "grad_norm": 1.15625, "learning_rate": 0.00047519783148677365, "loss": 0.2023, "step": 103650 }, { "epoch": 4.29, "grad_norm": 1.5234375, "learning_rate": 0.0004751931217249942, "loss": 0.1778, "step": 103660 }, { "epoch": 4.29, "grad_norm": 0.70703125, "learning_rate": 0.0004751884115394257, "loss": 0.1971, "step": 103670 }, { "epoch": 4.29, "grad_norm": 0.671875, "learning_rate": 0.0004751837009300769, "loss": 0.2495, "step": 103680 }, { "epoch": 4.29, "grad_norm": 0.65625, "learning_rate": 0.00047517898989695665, "loss": 0.2115, "step": 103690 }, { "epoch": 4.3, "grad_norm": 0.75, "learning_rate": 0.00047517427844007387, "loss": 0.1718, "step": 103700 }, { "epoch": 4.3, "grad_norm": 0.70703125, "learning_rate": 0.00047516956655943744, "loss": 0.2307, "step": 103710 }, { "epoch": 4.3, "grad_norm": 0.359375, "learning_rate": 0.0004751648542550562, "loss": 0.1788, "step": 103720 }, { "epoch": 4.3, "grad_norm": 1.21875, "learning_rate": 0.0004751601415269391, "loss": 0.2526, "step": 103730 }, { "epoch": 4.3, "grad_norm": 0.70703125, "learning_rate": 0.00047515542837509484, "loss": 0.245, "step": 103740 }, { "epoch": 4.3, "grad_norm": 0.5390625, "learning_rate": 0.00047515071479953237, "loss": 0.2438, "step": 103750 }, { "epoch": 4.3, "grad_norm": 0.76171875, "learning_rate": 0.0004751460008002606, "loss": 0.184, "step": 103760 }, { "epoch": 4.3, "grad_norm": 0.453125, "learning_rate": 0.00047514128637728836, "loss": 0.229, "step": 103770 }, { "epoch": 4.3, "grad_norm": 0.44921875, "learning_rate": 0.0004751365715306245, "loss": 0.2405, "step": 103780 }, { "epoch": 4.3, "grad_norm": 0.455078125, "learning_rate": 0.000475131856260278, "loss": 0.1876, "step": 103790 }, { "epoch": 4.3, "grad_norm": 0.625, "learning_rate": 0.00047512714056625763, "loss": 0.1821, "step": 103800 }, { "epoch": 4.3, "grad_norm": 0.828125, "learning_rate": 0.00047512242444857223, "loss": 0.1488, "step": 103810 }, { "epoch": 4.3, "grad_norm": 0.640625, "learning_rate": 0.00047511770790723077, "loss": 0.2516, "step": 103820 }, { "epoch": 4.3, "grad_norm": 0.67578125, "learning_rate": 0.00047511299094224214, "loss": 0.1969, "step": 103830 }, { "epoch": 4.3, "grad_norm": 0.5, "learning_rate": 0.00047510827355361505, "loss": 0.2368, "step": 103840 }, { "epoch": 4.3, "grad_norm": 0.62890625, "learning_rate": 0.0004751035557413587, "loss": 0.2319, "step": 103850 }, { "epoch": 4.3, "grad_norm": 1.046875, "learning_rate": 0.0004750988375054816, "loss": 0.2076, "step": 103860 }, { "epoch": 4.3, "grad_norm": 2.75, "learning_rate": 0.00047509411884599285, "loss": 0.2287, "step": 103870 }, { "epoch": 4.3, "grad_norm": 0.330078125, "learning_rate": 0.0004750893997629013, "loss": 0.2088, "step": 103880 }, { "epoch": 4.3, "grad_norm": 0.0, "learning_rate": 0.00047508468025621574, "loss": 0.1891, "step": 103890 }, { "epoch": 4.3, "grad_norm": 0.7421875, "learning_rate": 0.00047507996032594517, "loss": 0.209, "step": 103900 }, { "epoch": 4.3, "grad_norm": 1.0546875, "learning_rate": 0.00047507523997209844, "loss": 0.2057, "step": 103910 }, { "epoch": 4.3, "grad_norm": 0.640625, "learning_rate": 0.00047507051919468444, "loss": 0.2498, "step": 103920 }, { "epoch": 4.3, "grad_norm": 0.44921875, "learning_rate": 0.00047506579799371195, "loss": 0.2156, "step": 103930 }, { "epoch": 4.31, "grad_norm": 1.0546875, "learning_rate": 0.0004750610763691899, "loss": 0.2732, "step": 103940 }, { "epoch": 4.31, "grad_norm": 0.59375, "learning_rate": 0.0004750563543211273, "loss": 0.2446, "step": 103950 }, { "epoch": 4.31, "grad_norm": 0.71875, "learning_rate": 0.0004750516318495329, "loss": 0.2171, "step": 103960 }, { "epoch": 4.31, "grad_norm": 0.59765625, "learning_rate": 0.0004750469089544157, "loss": 0.1708, "step": 103970 }, { "epoch": 4.31, "grad_norm": 0.73828125, "learning_rate": 0.0004750421856357845, "loss": 0.1995, "step": 103980 }, { "epoch": 4.31, "grad_norm": 0.97265625, "learning_rate": 0.00047503746189364815, "loss": 0.2344, "step": 103990 }, { "epoch": 4.31, "grad_norm": 0.671875, "learning_rate": 0.00047503273772801557, "loss": 0.2257, "step": 104000 }, { "epoch": 4.31, "grad_norm": 1.578125, "learning_rate": 0.00047502801313889574, "loss": 0.2312, "step": 104010 }, { "epoch": 4.31, "grad_norm": 0.490234375, "learning_rate": 0.00047502328812629754, "loss": 0.2203, "step": 104020 }, { "epoch": 4.31, "grad_norm": 0.828125, "learning_rate": 0.00047501856269022976, "loss": 0.1992, "step": 104030 }, { "epoch": 4.31, "grad_norm": 0.63671875, "learning_rate": 0.0004750138368307013, "loss": 0.1871, "step": 104040 }, { "epoch": 4.31, "grad_norm": 0.55078125, "learning_rate": 0.0004750091105477212, "loss": 0.1839, "step": 104050 }, { "epoch": 4.31, "grad_norm": 0.9609375, "learning_rate": 0.00047500438384129816, "loss": 0.2164, "step": 104060 }, { "epoch": 4.31, "grad_norm": 0.265625, "learning_rate": 0.0004749996567114412, "loss": 0.2192, "step": 104070 }, { "epoch": 4.31, "grad_norm": 0.6796875, "learning_rate": 0.0004749949291581592, "loss": 0.2017, "step": 104080 }, { "epoch": 4.31, "grad_norm": 0.45703125, "learning_rate": 0.00047499020118146103, "loss": 0.2377, "step": 104090 }, { "epoch": 4.31, "grad_norm": 0.99609375, "learning_rate": 0.0004749854727813556, "loss": 0.2618, "step": 104100 }, { "epoch": 4.31, "grad_norm": 0.53515625, "learning_rate": 0.00047498074395785177, "loss": 0.2097, "step": 104110 }, { "epoch": 4.31, "grad_norm": 0.7578125, "learning_rate": 0.0004749760147109585, "loss": 0.2104, "step": 104120 }, { "epoch": 4.31, "grad_norm": 0.8828125, "learning_rate": 0.0004749712850406847, "loss": 0.2339, "step": 104130 }, { "epoch": 4.31, "grad_norm": 0.75, "learning_rate": 0.00047496655494703923, "loss": 0.2316, "step": 104140 }, { "epoch": 4.31, "grad_norm": 0.86328125, "learning_rate": 0.000474961824430031, "loss": 0.2183, "step": 104150 }, { "epoch": 4.31, "grad_norm": 0.4765625, "learning_rate": 0.0004749570934896689, "loss": 0.1697, "step": 104160 }, { "epoch": 4.31, "grad_norm": 0.474609375, "learning_rate": 0.00047495236212596176, "loss": 0.2147, "step": 104170 }, { "epoch": 4.32, "grad_norm": 0.515625, "learning_rate": 0.0004749476303389186, "loss": 0.2478, "step": 104180 }, { "epoch": 4.32, "grad_norm": 0.53515625, "learning_rate": 0.00047494289812854843, "loss": 0.2308, "step": 104190 }, { "epoch": 4.32, "grad_norm": 0.953125, "learning_rate": 0.0004749381654948599, "loss": 0.1952, "step": 104200 }, { "epoch": 4.32, "grad_norm": 0.291015625, "learning_rate": 0.000474933432437862, "loss": 0.2044, "step": 104210 }, { "epoch": 4.32, "grad_norm": 1.1875, "learning_rate": 0.00047492869895756376, "loss": 0.21, "step": 104220 }, { "epoch": 4.32, "grad_norm": 1.0859375, "learning_rate": 0.00047492396505397394, "loss": 0.1955, "step": 104230 }, { "epoch": 4.32, "grad_norm": 0.7421875, "learning_rate": 0.0004749192307271015, "loss": 0.2248, "step": 104240 }, { "epoch": 4.32, "grad_norm": 0.0, "learning_rate": 0.00047491449597695547, "loss": 0.2205, "step": 104250 }, { "epoch": 4.32, "grad_norm": 0.50390625, "learning_rate": 0.00047490976080354454, "loss": 0.1559, "step": 104260 }, { "epoch": 4.32, "grad_norm": 0.83984375, "learning_rate": 0.00047490502520687773, "loss": 0.2153, "step": 104270 }, { "epoch": 4.32, "grad_norm": 0.62890625, "learning_rate": 0.0004749002891869639, "loss": 0.1944, "step": 104280 }, { "epoch": 4.32, "grad_norm": 0.40625, "learning_rate": 0.0004748955527438121, "loss": 0.1711, "step": 104290 }, { "epoch": 4.32, "grad_norm": 0.26171875, "learning_rate": 0.0004748908158774312, "loss": 0.1978, "step": 104300 }, { "epoch": 4.32, "grad_norm": 0.263671875, "learning_rate": 0.00047488607858783003, "loss": 0.1914, "step": 104310 }, { "epoch": 4.32, "grad_norm": 0.0, "learning_rate": 0.00047488134087501747, "loss": 0.2515, "step": 104320 }, { "epoch": 4.32, "grad_norm": 0.43359375, "learning_rate": 0.0004748766027390026, "loss": 0.2446, "step": 104330 }, { "epoch": 4.32, "grad_norm": 1.7578125, "learning_rate": 0.0004748718641797942, "loss": 0.2196, "step": 104340 }, { "epoch": 4.32, "grad_norm": 1.765625, "learning_rate": 0.0004748671251974013, "loss": 0.1929, "step": 104350 }, { "epoch": 4.32, "grad_norm": 0.72265625, "learning_rate": 0.00047486238579183267, "loss": 0.1462, "step": 104360 }, { "epoch": 4.32, "grad_norm": 1.6484375, "learning_rate": 0.0004748576459630973, "loss": 0.244, "step": 104370 }, { "epoch": 4.32, "grad_norm": 0.60546875, "learning_rate": 0.0004748529057112042, "loss": 0.2119, "step": 104380 }, { "epoch": 4.32, "grad_norm": 0.9453125, "learning_rate": 0.00047484816503616224, "loss": 0.2017, "step": 104390 }, { "epoch": 4.32, "grad_norm": 0.59375, "learning_rate": 0.0004748434239379802, "loss": 0.2216, "step": 104400 }, { "epoch": 4.32, "grad_norm": 1.9140625, "learning_rate": 0.00047483868241666717, "loss": 0.2312, "step": 104410 }, { "epoch": 4.33, "grad_norm": 0.86328125, "learning_rate": 0.00047483394047223207, "loss": 0.1796, "step": 104420 }, { "epoch": 4.33, "grad_norm": 0.75390625, "learning_rate": 0.0004748291981046837, "loss": 0.2282, "step": 104430 }, { "epoch": 4.33, "grad_norm": 0.61328125, "learning_rate": 0.0004748244553140311, "loss": 0.198, "step": 104440 }, { "epoch": 4.33, "grad_norm": 1.3515625, "learning_rate": 0.00047481971210028317, "loss": 0.2258, "step": 104450 }, { "epoch": 4.33, "grad_norm": 0.2890625, "learning_rate": 0.0004748149684634488, "loss": 0.1607, "step": 104460 }, { "epoch": 4.33, "grad_norm": 0.45703125, "learning_rate": 0.0004748102244035369, "loss": 0.219, "step": 104470 }, { "epoch": 4.33, "grad_norm": 1.6171875, "learning_rate": 0.00047480547992055644, "loss": 0.2229, "step": 104480 }, { "epoch": 4.33, "grad_norm": 1.2578125, "learning_rate": 0.0004748007350145164, "loss": 0.2363, "step": 104490 }, { "epoch": 4.33, "grad_norm": 1.0703125, "learning_rate": 0.00047479598968542565, "loss": 0.2356, "step": 104500 }, { "epoch": 4.33, "grad_norm": 2.359375, "learning_rate": 0.00047479124393329307, "loss": 0.2168, "step": 104510 }, { "epoch": 4.33, "grad_norm": 0.44140625, "learning_rate": 0.0004747864977581277, "loss": 0.1996, "step": 104520 }, { "epoch": 4.33, "grad_norm": 0.59375, "learning_rate": 0.00047478175115993836, "loss": 0.2106, "step": 104530 }, { "epoch": 4.33, "grad_norm": 0.75390625, "learning_rate": 0.000474777004138734, "loss": 0.2036, "step": 104540 }, { "epoch": 4.33, "grad_norm": 0.6875, "learning_rate": 0.00047477225669452375, "loss": 0.2406, "step": 104550 }, { "epoch": 4.33, "grad_norm": 1.1484375, "learning_rate": 0.0004747675088273163, "loss": 0.2026, "step": 104560 }, { "epoch": 4.33, "grad_norm": 0.828125, "learning_rate": 0.00047476276053712063, "loss": 0.1517, "step": 104570 }, { "epoch": 4.33, "grad_norm": 0.78125, "learning_rate": 0.0004747580118239457, "loss": 0.205, "step": 104580 }, { "epoch": 4.33, "grad_norm": 0.376953125, "learning_rate": 0.0004747532626878006, "loss": 0.2147, "step": 104590 }, { "epoch": 4.33, "grad_norm": 0.92578125, "learning_rate": 0.00047474851312869404, "loss": 0.1667, "step": 104600 }, { "epoch": 4.33, "grad_norm": 0.45703125, "learning_rate": 0.00047474376314663504, "loss": 0.2254, "step": 104610 }, { "epoch": 4.33, "grad_norm": 0.578125, "learning_rate": 0.0004747390127416326, "loss": 0.184, "step": 104620 }, { "epoch": 4.33, "grad_norm": 0.86328125, "learning_rate": 0.0004747342619136955, "loss": 0.1654, "step": 104630 }, { "epoch": 4.33, "grad_norm": 0.70703125, "learning_rate": 0.0004747295106628329, "loss": 0.1949, "step": 104640 }, { "epoch": 4.33, "grad_norm": 0.63671875, "learning_rate": 0.0004747247589890536, "loss": 0.1756, "step": 104650 }, { "epoch": 4.34, "grad_norm": 0.55078125, "learning_rate": 0.00047472000689236654, "loss": 0.1966, "step": 104660 }, { "epoch": 4.34, "grad_norm": 0.6484375, "learning_rate": 0.0004747152543727807, "loss": 0.2338, "step": 104670 }, { "epoch": 4.34, "grad_norm": 0.96875, "learning_rate": 0.00047471050143030503, "loss": 0.2417, "step": 104680 }, { "epoch": 4.34, "grad_norm": 0.447265625, "learning_rate": 0.0004747057480649485, "loss": 0.1883, "step": 104690 }, { "epoch": 4.34, "grad_norm": 0.5, "learning_rate": 0.00047470099427672, "loss": 0.2364, "step": 104700 }, { "epoch": 4.34, "grad_norm": 0.4140625, "learning_rate": 0.0004746962400656285, "loss": 0.264, "step": 104710 }, { "epoch": 4.34, "grad_norm": 1.3828125, "learning_rate": 0.0004746914854316829, "loss": 0.1832, "step": 104720 }, { "epoch": 4.34, "grad_norm": 0.8125, "learning_rate": 0.0004746867303748922, "loss": 0.2352, "step": 104730 }, { "epoch": 4.34, "grad_norm": 1.328125, "learning_rate": 0.00047468197489526534, "loss": 0.2213, "step": 104740 }, { "epoch": 4.34, "grad_norm": 0.73046875, "learning_rate": 0.00047467721899281125, "loss": 0.2075, "step": 104750 }, { "epoch": 4.34, "grad_norm": 0.5625, "learning_rate": 0.0004746724626675389, "loss": 0.2043, "step": 104760 }, { "epoch": 4.34, "grad_norm": 0.271484375, "learning_rate": 0.0004746677059194573, "loss": 0.2027, "step": 104770 }, { "epoch": 4.34, "grad_norm": 1.4375, "learning_rate": 0.0004746629487485753, "loss": 0.1453, "step": 104780 }, { "epoch": 4.34, "grad_norm": 0.400390625, "learning_rate": 0.0004746581911549019, "loss": 0.271, "step": 104790 }, { "epoch": 4.34, "grad_norm": 1.8046875, "learning_rate": 0.000474653433138446, "loss": 0.247, "step": 104800 }, { "epoch": 4.34, "grad_norm": 0.328125, "learning_rate": 0.0004746486746992167, "loss": 0.1844, "step": 104810 }, { "epoch": 4.34, "grad_norm": 0.6953125, "learning_rate": 0.00047464391583722275, "loss": 0.1968, "step": 104820 }, { "epoch": 4.34, "grad_norm": 0.69921875, "learning_rate": 0.00047463915655247325, "loss": 0.1929, "step": 104830 }, { "epoch": 4.34, "grad_norm": 0.65625, "learning_rate": 0.0004746343968449771, "loss": 0.2398, "step": 104840 }, { "epoch": 4.34, "grad_norm": 0.412109375, "learning_rate": 0.00047462963671474333, "loss": 0.2661, "step": 104850 }, { "epoch": 4.34, "grad_norm": 0.48828125, "learning_rate": 0.0004746248761617808, "loss": 0.2602, "step": 104860 }, { "epoch": 4.34, "grad_norm": 0.671875, "learning_rate": 0.0004746201151860985, "loss": 0.2117, "step": 104870 }, { "epoch": 4.34, "grad_norm": 0.384765625, "learning_rate": 0.00047461535378770536, "loss": 0.2132, "step": 104880 }, { "epoch": 4.34, "grad_norm": 1.0234375, "learning_rate": 0.0004746105919666105, "loss": 0.1879, "step": 104890 }, { "epoch": 4.34, "grad_norm": 0.6875, "learning_rate": 0.0004746058297228227, "loss": 0.244, "step": 104900 }, { "epoch": 4.35, "grad_norm": 0.51171875, "learning_rate": 0.000474601067056351, "loss": 0.1979, "step": 104910 }, { "epoch": 4.35, "grad_norm": 0.65625, "learning_rate": 0.00047459630396720425, "loss": 0.1698, "step": 104920 }, { "epoch": 4.35, "grad_norm": 0.86328125, "learning_rate": 0.0004745915404553916, "loss": 0.1871, "step": 104930 }, { "epoch": 4.35, "grad_norm": 0.45703125, "learning_rate": 0.00047458677652092193, "loss": 0.2055, "step": 104940 }, { "epoch": 4.35, "grad_norm": 0.490234375, "learning_rate": 0.0004745820121638041, "loss": 0.1883, "step": 104950 }, { "epoch": 4.35, "grad_norm": 1.3984375, "learning_rate": 0.0004745772473840473, "loss": 0.1706, "step": 104960 }, { "epoch": 4.35, "grad_norm": 0.953125, "learning_rate": 0.00047457248218166036, "loss": 0.2203, "step": 104970 }, { "epoch": 4.35, "grad_norm": 0.0, "learning_rate": 0.0004745677165566522, "loss": 0.2505, "step": 104980 }, { "epoch": 4.35, "grad_norm": 0.423828125, "learning_rate": 0.0004745629505090319, "loss": 0.1988, "step": 104990 }, { "epoch": 4.35, "grad_norm": 0.40625, "learning_rate": 0.00047455818403880837, "loss": 0.175, "step": 105000 }, { "epoch": 4.35, "grad_norm": 0.92578125, "learning_rate": 0.00047455341714599056, "loss": 0.2253, "step": 105010 }, { "epoch": 4.35, "grad_norm": 0.275390625, "learning_rate": 0.00047454864983058744, "loss": 0.187, "step": 105020 }, { "epoch": 4.35, "grad_norm": 0.73046875, "learning_rate": 0.00047454388209260806, "loss": 0.196, "step": 105030 }, { "epoch": 4.35, "grad_norm": 1.046875, "learning_rate": 0.0004745391139320613, "loss": 0.1801, "step": 105040 }, { "epoch": 4.35, "grad_norm": 0.8125, "learning_rate": 0.0004745343453489562, "loss": 0.255, "step": 105050 }, { "epoch": 4.35, "grad_norm": 0.3671875, "learning_rate": 0.00047452957634330176, "loss": 0.1998, "step": 105060 }, { "epoch": 4.35, "grad_norm": 0.73046875, "learning_rate": 0.00047452480691510693, "loss": 0.2142, "step": 105070 }, { "epoch": 4.35, "grad_norm": 0.65234375, "learning_rate": 0.0004745200370643805, "loss": 0.2451, "step": 105080 }, { "epoch": 4.35, "grad_norm": 0.90625, "learning_rate": 0.0004745152667911318, "loss": 0.2275, "step": 105090 }, { "epoch": 4.35, "grad_norm": 0.6796875, "learning_rate": 0.00047451049609536946, "loss": 0.1888, "step": 105100 }, { "epoch": 4.35, "grad_norm": 1.1171875, "learning_rate": 0.00047450572497710267, "loss": 0.2329, "step": 105110 }, { "epoch": 4.35, "grad_norm": 0.4140625, "learning_rate": 0.0004745009534363404, "loss": 0.2164, "step": 105120 }, { "epoch": 4.35, "grad_norm": 0.193359375, "learning_rate": 0.00047449618147309147, "loss": 0.1994, "step": 105130 }, { "epoch": 4.35, "grad_norm": 1.078125, "learning_rate": 0.000474491409087365, "loss": 0.2498, "step": 105140 }, { "epoch": 4.36, "grad_norm": 0.68359375, "learning_rate": 0.00047448663627917, "loss": 0.2478, "step": 105150 }, { "epoch": 4.36, "grad_norm": 0.921875, "learning_rate": 0.0004744818630485153, "loss": 0.1854, "step": 105160 }, { "epoch": 4.36, "grad_norm": 0.765625, "learning_rate": 0.0004744770893954101, "loss": 0.2629, "step": 105170 }, { "epoch": 4.36, "grad_norm": 0.94921875, "learning_rate": 0.00047447231531986314, "loss": 0.2027, "step": 105180 }, { "epoch": 4.36, "grad_norm": 0.55078125, "learning_rate": 0.0004744675408218836, "loss": 0.1902, "step": 105190 }, { "epoch": 4.36, "grad_norm": 1.140625, "learning_rate": 0.00047446276590148035, "loss": 0.1768, "step": 105200 }, { "epoch": 4.36, "grad_norm": 0.4453125, "learning_rate": 0.00047445799055866245, "loss": 0.2226, "step": 105210 }, { "epoch": 4.36, "grad_norm": 0.443359375, "learning_rate": 0.00047445321479343885, "loss": 0.2527, "step": 105220 }, { "epoch": 4.36, "grad_norm": 0.41015625, "learning_rate": 0.0004744484386058185, "loss": 0.2212, "step": 105230 }, { "epoch": 4.36, "grad_norm": 0.5078125, "learning_rate": 0.00047444366199581045, "loss": 0.2089, "step": 105240 }, { "epoch": 4.36, "grad_norm": 0.890625, "learning_rate": 0.00047443888496342365, "loss": 0.2569, "step": 105250 }, { "epoch": 4.36, "grad_norm": 1.3046875, "learning_rate": 0.0004744341075086671, "loss": 0.213, "step": 105260 }, { "epoch": 4.36, "grad_norm": 0.9296875, "learning_rate": 0.0004744293296315498, "loss": 0.1995, "step": 105270 }, { "epoch": 4.36, "grad_norm": 0.546875, "learning_rate": 0.0004744245513320807, "loss": 0.1829, "step": 105280 }, { "epoch": 4.36, "grad_norm": 0.3359375, "learning_rate": 0.0004744197726102689, "loss": 0.1605, "step": 105290 }, { "epoch": 4.36, "grad_norm": 0.77734375, "learning_rate": 0.0004744149934661234, "loss": 0.2085, "step": 105300 }, { "epoch": 4.36, "grad_norm": 0.703125, "learning_rate": 0.000474410213899653, "loss": 0.1985, "step": 105310 }, { "epoch": 4.36, "grad_norm": 0.2197265625, "learning_rate": 0.00047440543391086683, "loss": 0.1915, "step": 105320 }, { "epoch": 4.36, "grad_norm": 1.1875, "learning_rate": 0.00047440065349977386, "loss": 0.2257, "step": 105330 }, { "epoch": 4.36, "grad_norm": 0.76171875, "learning_rate": 0.00047439587266638307, "loss": 0.2236, "step": 105340 }, { "epoch": 4.36, "grad_norm": 0.81640625, "learning_rate": 0.00047439109141070356, "loss": 0.1814, "step": 105350 }, { "epoch": 4.36, "grad_norm": 0.98828125, "learning_rate": 0.0004743863097327442, "loss": 0.179, "step": 105360 }, { "epoch": 4.36, "grad_norm": 0.462890625, "learning_rate": 0.000474381527632514, "loss": 0.181, "step": 105370 }, { "epoch": 4.36, "grad_norm": 0.453125, "learning_rate": 0.00047437674511002206, "loss": 0.1906, "step": 105380 }, { "epoch": 4.37, "grad_norm": 1.125, "learning_rate": 0.00047437196216527726, "loss": 0.2522, "step": 105390 }, { "epoch": 4.37, "grad_norm": 0.466796875, "learning_rate": 0.00047436717879828874, "loss": 0.2143, "step": 105400 }, { "epoch": 4.37, "grad_norm": 0.578125, "learning_rate": 0.00047436239500906537, "loss": 0.1683, "step": 105410 }, { "epoch": 4.37, "grad_norm": 0.8046875, "learning_rate": 0.00047435761079761616, "loss": 0.2136, "step": 105420 }, { "epoch": 4.37, "grad_norm": 0.75, "learning_rate": 0.00047435282616395023, "loss": 0.1957, "step": 105430 }, { "epoch": 4.37, "grad_norm": 0.9140625, "learning_rate": 0.0004743480411080765, "loss": 0.2102, "step": 105440 }, { "epoch": 4.37, "grad_norm": 0.60546875, "learning_rate": 0.00047434325563000394, "loss": 0.2476, "step": 105450 }, { "epoch": 4.37, "grad_norm": 0.66796875, "learning_rate": 0.0004743384697297416, "loss": 0.1829, "step": 105460 }, { "epoch": 4.37, "grad_norm": 0.54296875, "learning_rate": 0.0004743336834072985, "loss": 0.2192, "step": 105470 }, { "epoch": 4.37, "grad_norm": 1.328125, "learning_rate": 0.00047432889666268365, "loss": 0.1741, "step": 105480 }, { "epoch": 4.37, "grad_norm": 0.55859375, "learning_rate": 0.000474324109495906, "loss": 0.1587, "step": 105490 }, { "epoch": 4.37, "grad_norm": 0.49609375, "learning_rate": 0.0004743193219069746, "loss": 0.2415, "step": 105500 }, { "epoch": 4.37, "grad_norm": 0.455078125, "learning_rate": 0.0004743145338958985, "loss": 0.2352, "step": 105510 }, { "epoch": 4.37, "grad_norm": 1.1953125, "learning_rate": 0.00047430974546268666, "loss": 0.1747, "step": 105520 }, { "epoch": 4.37, "grad_norm": 0.890625, "learning_rate": 0.0004743049566073482, "loss": 0.2103, "step": 105530 }, { "epoch": 4.37, "grad_norm": 0.8046875, "learning_rate": 0.0004743001673298919, "loss": 0.2008, "step": 105540 }, { "epoch": 4.37, "grad_norm": 1.125, "learning_rate": 0.0004742953776303269, "loss": 0.1679, "step": 105550 }, { "epoch": 4.37, "grad_norm": 1.390625, "learning_rate": 0.00047429058750866226, "loss": 0.2435, "step": 105560 }, { "epoch": 4.37, "grad_norm": 0.8671875, "learning_rate": 0.000474285796964907, "loss": 0.1683, "step": 105570 }, { "epoch": 4.37, "grad_norm": 0.78515625, "learning_rate": 0.0004742810059990701, "loss": 0.2306, "step": 105580 }, { "epoch": 4.37, "grad_norm": 0.65625, "learning_rate": 0.0004742762146111604, "loss": 0.2008, "step": 105590 }, { "epoch": 4.37, "grad_norm": 0.60546875, "learning_rate": 0.0004742714228011873, "loss": 0.2029, "step": 105600 }, { "epoch": 4.37, "grad_norm": 0.51171875, "learning_rate": 0.00047426663056915954, "loss": 0.2036, "step": 105610 }, { "epoch": 4.37, "grad_norm": 0.85546875, "learning_rate": 0.00047426183791508613, "loss": 0.1575, "step": 105620 }, { "epoch": 4.38, "grad_norm": 1.359375, "learning_rate": 0.0004742570448389762, "loss": 0.1993, "step": 105630 }, { "epoch": 4.38, "grad_norm": 0.2275390625, "learning_rate": 0.0004742522513408387, "loss": 0.215, "step": 105640 }, { "epoch": 4.38, "grad_norm": 0.74609375, "learning_rate": 0.0004742474574206827, "loss": 0.1861, "step": 105650 }, { "epoch": 4.38, "grad_norm": 2.796875, "learning_rate": 0.0004742426630785172, "loss": 0.2094, "step": 105660 }, { "epoch": 4.38, "grad_norm": 1.3359375, "learning_rate": 0.0004742378683143512, "loss": 0.1938, "step": 105670 }, { "epoch": 4.38, "grad_norm": 0.3125, "learning_rate": 0.00047423307312819383, "loss": 0.1957, "step": 105680 }, { "epoch": 4.38, "grad_norm": 0.0, "learning_rate": 0.00047422827752005396, "loss": 0.156, "step": 105690 }, { "epoch": 4.38, "grad_norm": 0.65625, "learning_rate": 0.0004742234814899407, "loss": 0.2224, "step": 105700 }, { "epoch": 4.38, "grad_norm": 1.5234375, "learning_rate": 0.00047421868503786307, "loss": 0.2444, "step": 105710 }, { "epoch": 4.38, "grad_norm": 1.265625, "learning_rate": 0.0004742138881638301, "loss": 0.1802, "step": 105720 }, { "epoch": 4.38, "grad_norm": 0.55859375, "learning_rate": 0.0004742090908678508, "loss": 0.2608, "step": 105730 }, { "epoch": 4.38, "grad_norm": 0.67578125, "learning_rate": 0.00047420429314993417, "loss": 0.1886, "step": 105740 }, { "epoch": 4.38, "grad_norm": 0.41796875, "learning_rate": 0.0004741994950100893, "loss": 0.2283, "step": 105750 }, { "epoch": 4.38, "grad_norm": 0.37890625, "learning_rate": 0.0004741946964483253, "loss": 0.2044, "step": 105760 }, { "epoch": 4.38, "grad_norm": 1.3828125, "learning_rate": 0.0004741898974646509, "loss": 0.247, "step": 105770 }, { "epoch": 4.38, "grad_norm": 0.421875, "learning_rate": 0.0004741850980590754, "loss": 0.2794, "step": 105780 }, { "epoch": 4.38, "grad_norm": 0.62890625, "learning_rate": 0.0004741802982316078, "loss": 0.2363, "step": 105790 }, { "epoch": 4.38, "grad_norm": 0.625, "learning_rate": 0.000474175497982257, "loss": 0.1893, "step": 105800 }, { "epoch": 4.38, "grad_norm": 1.2890625, "learning_rate": 0.0004741706973110322, "loss": 0.2235, "step": 105810 }, { "epoch": 4.38, "grad_norm": 1.0, "learning_rate": 0.00047416589621794233, "loss": 0.2008, "step": 105820 }, { "epoch": 4.38, "grad_norm": 0.330078125, "learning_rate": 0.0004741610947029964, "loss": 0.2194, "step": 105830 }, { "epoch": 4.38, "grad_norm": 0.7890625, "learning_rate": 0.0004741562927662035, "loss": 0.1852, "step": 105840 }, { "epoch": 4.38, "grad_norm": 1.4296875, "learning_rate": 0.00047415149040757275, "loss": 0.1357, "step": 105850 }, { "epoch": 4.38, "grad_norm": 3.75, "learning_rate": 0.0004741466876271131, "loss": 0.2078, "step": 105860 }, { "epoch": 4.39, "grad_norm": 0.7109375, "learning_rate": 0.0004741418844248335, "loss": 0.2035, "step": 105870 }, { "epoch": 4.39, "grad_norm": 0.57421875, "learning_rate": 0.0004741370808007431, "loss": 0.2136, "step": 105880 }, { "epoch": 4.39, "grad_norm": 0.357421875, "learning_rate": 0.000474132276754851, "loss": 0.2437, "step": 105890 }, { "epoch": 4.39, "grad_norm": 0.52734375, "learning_rate": 0.0004741274722871661, "loss": 0.2156, "step": 105900 }, { "epoch": 4.39, "grad_norm": 0.5390625, "learning_rate": 0.0004741226673976975, "loss": 0.2043, "step": 105910 }, { "epoch": 4.39, "grad_norm": 0.390625, "learning_rate": 0.0004741178620864542, "loss": 0.1882, "step": 105920 }, { "epoch": 4.39, "grad_norm": 0.74609375, "learning_rate": 0.00047411305635344537, "loss": 0.1609, "step": 105930 }, { "epoch": 4.39, "grad_norm": 1.0234375, "learning_rate": 0.00047410825019867997, "loss": 0.2455, "step": 105940 }, { "epoch": 4.39, "grad_norm": 0.5546875, "learning_rate": 0.000474103443622167, "loss": 0.1891, "step": 105950 }, { "epoch": 4.39, "grad_norm": 0.52734375, "learning_rate": 0.00047409863662391564, "loss": 0.1841, "step": 105960 }, { "epoch": 4.39, "grad_norm": 1.234375, "learning_rate": 0.0004740938292039347, "loss": 0.1985, "step": 105970 }, { "epoch": 4.39, "grad_norm": 0.609375, "learning_rate": 0.0004740890213622334, "loss": 0.2205, "step": 105980 }, { "epoch": 4.39, "grad_norm": 1.296875, "learning_rate": 0.00047408421309882087, "loss": 0.2107, "step": 105990 }, { "epoch": 4.39, "grad_norm": 2.5, "learning_rate": 0.00047407940441370595, "loss": 0.2247, "step": 106000 }, { "epoch": 4.39, "grad_norm": 0.59375, "learning_rate": 0.00047407459530689785, "loss": 0.1863, "step": 106010 }, { "epoch": 4.39, "grad_norm": 0.318359375, "learning_rate": 0.00047406978577840554, "loss": 0.2319, "step": 106020 }, { "epoch": 4.39, "grad_norm": 0.66015625, "learning_rate": 0.00047406497582823805, "loss": 0.231, "step": 106030 }, { "epoch": 4.39, "grad_norm": 0.796875, "learning_rate": 0.0004740601654564045, "loss": 0.2539, "step": 106040 }, { "epoch": 4.39, "grad_norm": 0.361328125, "learning_rate": 0.0004740553546629139, "loss": 0.2312, "step": 106050 }, { "epoch": 4.39, "grad_norm": 0.62890625, "learning_rate": 0.00047405054344777534, "loss": 0.2625, "step": 106060 }, { "epoch": 4.39, "grad_norm": 0.45703125, "learning_rate": 0.00047404573181099785, "loss": 0.1775, "step": 106070 }, { "epoch": 4.39, "grad_norm": 1.0390625, "learning_rate": 0.0004740409197525905, "loss": 0.1408, "step": 106080 }, { "epoch": 4.39, "grad_norm": 0.0, "learning_rate": 0.0004740361072725623, "loss": 0.2081, "step": 106090 }, { "epoch": 4.39, "grad_norm": 0.33203125, "learning_rate": 0.0004740312943709224, "loss": 0.1605, "step": 106100 }, { "epoch": 4.4, "grad_norm": 0.75390625, "learning_rate": 0.0004740264810476798, "loss": 0.1909, "step": 106110 }, { "epoch": 4.4, "grad_norm": 0.828125, "learning_rate": 0.00047402166730284344, "loss": 0.2368, "step": 106120 }, { "epoch": 4.4, "grad_norm": 0.66796875, "learning_rate": 0.00047401685313642263, "loss": 0.1929, "step": 106130 }, { "epoch": 4.4, "grad_norm": 0.5, "learning_rate": 0.0004740120385484262, "loss": 0.1928, "step": 106140 }, { "epoch": 4.4, "grad_norm": 0.96484375, "learning_rate": 0.00047400722353886334, "loss": 0.2245, "step": 106150 }, { "epoch": 4.4, "grad_norm": 0.5703125, "learning_rate": 0.0004740024081077431, "loss": 0.1644, "step": 106160 }, { "epoch": 4.4, "grad_norm": 0.828125, "learning_rate": 0.00047399759225507445, "loss": 0.198, "step": 106170 }, { "epoch": 4.4, "grad_norm": 0.46875, "learning_rate": 0.00047399277598086654, "loss": 0.2131, "step": 106180 }, { "epoch": 4.4, "grad_norm": 0.75390625, "learning_rate": 0.0004739879592851285, "loss": 0.2018, "step": 106190 }, { "epoch": 4.4, "grad_norm": 0.625, "learning_rate": 0.0004739831421678692, "loss": 0.2099, "step": 106200 }, { "epoch": 4.4, "grad_norm": 2.453125, "learning_rate": 0.00047397832462909786, "loss": 0.2156, "step": 106210 }, { "epoch": 4.4, "grad_norm": 0.3203125, "learning_rate": 0.00047397350666882347, "loss": 0.2654, "step": 106220 }, { "epoch": 4.4, "grad_norm": 1.3828125, "learning_rate": 0.0004739686882870552, "loss": 0.2207, "step": 106230 }, { "epoch": 4.4, "grad_norm": 0.427734375, "learning_rate": 0.000473963869483802, "loss": 0.2476, "step": 106240 }, { "epoch": 4.4, "grad_norm": 1.2109375, "learning_rate": 0.000473959050259073, "loss": 0.2922, "step": 106250 }, { "epoch": 4.4, "grad_norm": 0.70703125, "learning_rate": 0.0004739542306128772, "loss": 0.2517, "step": 106260 }, { "epoch": 4.4, "grad_norm": 1.4921875, "learning_rate": 0.0004739494105452238, "loss": 0.1587, "step": 106270 }, { "epoch": 4.4, "grad_norm": 1.453125, "learning_rate": 0.00047394459005612176, "loss": 0.1764, "step": 106280 }, { "epoch": 4.4, "grad_norm": 0.302734375, "learning_rate": 0.00047393976914558017, "loss": 0.1751, "step": 106290 }, { "epoch": 4.4, "grad_norm": 0.48046875, "learning_rate": 0.0004739349478136081, "loss": 0.2027, "step": 106300 }, { "epoch": 4.4, "grad_norm": 0.85546875, "learning_rate": 0.0004739301260602147, "loss": 0.2041, "step": 106310 }, { "epoch": 4.4, "grad_norm": 0.56640625, "learning_rate": 0.000473925303885409, "loss": 0.2407, "step": 106320 }, { "epoch": 4.4, "grad_norm": 1.7265625, "learning_rate": 0.00047392048128919997, "loss": 0.2196, "step": 106330 }, { "epoch": 4.4, "grad_norm": 0.72265625, "learning_rate": 0.00047391565827159684, "loss": 0.2324, "step": 106340 }, { "epoch": 4.41, "grad_norm": 0.6875, "learning_rate": 0.0004739108348326086, "loss": 0.2242, "step": 106350 }, { "epoch": 4.41, "grad_norm": 0.921875, "learning_rate": 0.0004739060109722444, "loss": 0.1981, "step": 106360 }, { "epoch": 4.41, "grad_norm": 0.69921875, "learning_rate": 0.0004739011866905132, "loss": 0.1879, "step": 106370 }, { "epoch": 4.41, "grad_norm": 1.609375, "learning_rate": 0.00047389636198742416, "loss": 0.2335, "step": 106380 }, { "epoch": 4.41, "grad_norm": 1.6875, "learning_rate": 0.00047389153686298635, "loss": 0.2135, "step": 106390 }, { "epoch": 4.41, "grad_norm": 0.6015625, "learning_rate": 0.0004738867113172088, "loss": 0.1922, "step": 106400 }, { "epoch": 4.41, "grad_norm": 0.59765625, "learning_rate": 0.0004738818853501007, "loss": 0.1899, "step": 106410 }, { "epoch": 4.41, "grad_norm": 0.478515625, "learning_rate": 0.00047387705896167104, "loss": 0.2123, "step": 106420 }, { "epoch": 4.41, "grad_norm": 0.3359375, "learning_rate": 0.000473872232151929, "loss": 0.232, "step": 106430 }, { "epoch": 4.41, "grad_norm": 0.404296875, "learning_rate": 0.00047386740492088355, "loss": 0.22, "step": 106440 }, { "epoch": 4.41, "grad_norm": 0.328125, "learning_rate": 0.00047386257726854376, "loss": 0.2208, "step": 106450 }, { "epoch": 4.41, "grad_norm": 0.50390625, "learning_rate": 0.0004738577491949189, "loss": 0.2214, "step": 106460 }, { "epoch": 4.41, "grad_norm": 0.55078125, "learning_rate": 0.0004738529207000178, "loss": 0.1986, "step": 106470 }, { "epoch": 4.41, "grad_norm": 0.376953125, "learning_rate": 0.0004738480917838497, "loss": 0.182, "step": 106480 }, { "epoch": 4.41, "grad_norm": 1.9140625, "learning_rate": 0.00047384326244642375, "loss": 0.1924, "step": 106490 }, { "epoch": 4.41, "grad_norm": 0.33203125, "learning_rate": 0.00047383843268774883, "loss": 0.1686, "step": 106500 }, { "epoch": 4.41, "grad_norm": 1.03125, "learning_rate": 0.00047383360250783426, "loss": 0.1785, "step": 106510 }, { "epoch": 4.41, "grad_norm": 0.0, "learning_rate": 0.00047382877190668894, "loss": 0.2151, "step": 106520 }, { "epoch": 4.41, "grad_norm": 0.60546875, "learning_rate": 0.0004738239408843221, "loss": 0.2538, "step": 106530 }, { "epoch": 4.41, "grad_norm": 0.201171875, "learning_rate": 0.0004738191094407428, "loss": 0.2114, "step": 106540 }, { "epoch": 4.41, "grad_norm": 1.0390625, "learning_rate": 0.0004738142775759601, "loss": 0.1847, "step": 106550 }, { "epoch": 4.41, "grad_norm": 0.419921875, "learning_rate": 0.00047380944528998306, "loss": 0.2388, "step": 106560 }, { "epoch": 4.41, "grad_norm": 0.6328125, "learning_rate": 0.0004738046125828208, "loss": 0.1763, "step": 106570 }, { "epoch": 4.41, "grad_norm": 0.46875, "learning_rate": 0.0004737997794544824, "loss": 0.1727, "step": 106580 }, { "epoch": 4.41, "grad_norm": 0.65234375, "learning_rate": 0.00047379494590497705, "loss": 0.2172, "step": 106590 }, { "epoch": 4.42, "grad_norm": 0.373046875, "learning_rate": 0.00047379011193431374, "loss": 0.2337, "step": 106600 }, { "epoch": 4.42, "grad_norm": 0.408203125, "learning_rate": 0.0004737852775425017, "loss": 0.1998, "step": 106610 }, { "epoch": 4.42, "grad_norm": 0.8828125, "learning_rate": 0.0004737804427295498, "loss": 0.2529, "step": 106620 }, { "epoch": 4.42, "grad_norm": 0.59765625, "learning_rate": 0.00047377560749546735, "loss": 0.2144, "step": 106630 }, { "epoch": 4.42, "grad_norm": 0.421875, "learning_rate": 0.0004737707718402634, "loss": 0.2163, "step": 106640 }, { "epoch": 4.42, "grad_norm": 0.36328125, "learning_rate": 0.0004737659357639469, "loss": 0.2144, "step": 106650 }, { "epoch": 4.42, "grad_norm": 0.5859375, "learning_rate": 0.00047376109926652724, "loss": 0.2092, "step": 106660 }, { "epoch": 4.42, "grad_norm": 1.3984375, "learning_rate": 0.00047375626234801325, "loss": 0.1582, "step": 106670 }, { "epoch": 4.42, "grad_norm": 0.7734375, "learning_rate": 0.00047375142500841416, "loss": 0.2499, "step": 106680 }, { "epoch": 4.42, "grad_norm": 1.0078125, "learning_rate": 0.000473746587247739, "loss": 0.1956, "step": 106690 }, { "epoch": 4.42, "grad_norm": 1.625, "learning_rate": 0.00047374174906599696, "loss": 0.2242, "step": 106700 }, { "epoch": 4.42, "grad_norm": 0.6015625, "learning_rate": 0.0004737369104631972, "loss": 0.2595, "step": 106710 }, { "epoch": 4.42, "grad_norm": 1.1328125, "learning_rate": 0.0004737320714393486, "loss": 0.1665, "step": 106720 }, { "epoch": 4.42, "grad_norm": 0.51171875, "learning_rate": 0.00047372723199446044, "loss": 0.2044, "step": 106730 }, { "epoch": 4.42, "grad_norm": 1.0703125, "learning_rate": 0.00047372239212854187, "loss": 0.1812, "step": 106740 }, { "epoch": 4.42, "grad_norm": 1.359375, "learning_rate": 0.00047371755184160184, "loss": 0.2581, "step": 106750 }, { "epoch": 4.42, "grad_norm": 0.6796875, "learning_rate": 0.0004737127111336496, "loss": 0.2131, "step": 106760 }, { "epoch": 4.42, "grad_norm": 0.44140625, "learning_rate": 0.0004737078700046941, "loss": 0.2609, "step": 106770 }, { "epoch": 4.42, "grad_norm": 1.15625, "learning_rate": 0.0004737030284547446, "loss": 0.186, "step": 106780 }, { "epoch": 4.42, "grad_norm": 1.6484375, "learning_rate": 0.00047369818648381015, "loss": 0.2069, "step": 106790 }, { "epoch": 4.42, "grad_norm": 1.671875, "learning_rate": 0.0004736933440918999, "loss": 0.1582, "step": 106800 }, { "epoch": 4.42, "grad_norm": 0.94140625, "learning_rate": 0.0004736885012790229, "loss": 0.1705, "step": 106810 }, { "epoch": 4.42, "grad_norm": 0.54296875, "learning_rate": 0.0004736836580451883, "loss": 0.2199, "step": 106820 }, { "epoch": 4.42, "grad_norm": 0.50390625, "learning_rate": 0.00047367881439040527, "loss": 0.2343, "step": 106830 }, { "epoch": 4.43, "grad_norm": 0.5625, "learning_rate": 0.0004736739703146828, "loss": 0.2109, "step": 106840 }, { "epoch": 4.43, "grad_norm": 1.359375, "learning_rate": 0.00047366912581803006, "loss": 0.2389, "step": 106850 }, { "epoch": 4.43, "grad_norm": 0.890625, "learning_rate": 0.0004736642809004562, "loss": 0.2075, "step": 106860 }, { "epoch": 4.43, "grad_norm": 1.1171875, "learning_rate": 0.0004736594355619703, "loss": 0.2366, "step": 106870 }, { "epoch": 4.43, "grad_norm": 0.546875, "learning_rate": 0.00047365458980258156, "loss": 0.219, "step": 106880 }, { "epoch": 4.43, "grad_norm": 0.369140625, "learning_rate": 0.000473649743622299, "loss": 0.2576, "step": 106890 }, { "epoch": 4.43, "grad_norm": 0.828125, "learning_rate": 0.0004736448970211318, "loss": 0.241, "step": 106900 }, { "epoch": 4.43, "grad_norm": 1.0703125, "learning_rate": 0.000473640049999089, "loss": 0.2072, "step": 106910 }, { "epoch": 4.43, "grad_norm": 0.828125, "learning_rate": 0.0004736352025561798, "loss": 0.1734, "step": 106920 }, { "epoch": 4.43, "grad_norm": 0.27734375, "learning_rate": 0.00047363035469241336, "loss": 0.2159, "step": 106930 }, { "epoch": 4.43, "grad_norm": 1.0625, "learning_rate": 0.0004736255064077986, "loss": 0.1744, "step": 106940 }, { "epoch": 4.43, "grad_norm": 0.70703125, "learning_rate": 0.0004736206577023449, "loss": 0.191, "step": 106950 }, { "epoch": 4.43, "grad_norm": 1.1484375, "learning_rate": 0.00047361580857606123, "loss": 0.1961, "step": 106960 }, { "epoch": 4.43, "grad_norm": 0.6015625, "learning_rate": 0.0004736109590289568, "loss": 0.2205, "step": 106970 }, { "epoch": 4.43, "grad_norm": 0.462890625, "learning_rate": 0.00047360610906104064, "loss": 0.1741, "step": 106980 }, { "epoch": 4.43, "grad_norm": 0.49609375, "learning_rate": 0.00047360125867232193, "loss": 0.2354, "step": 106990 }, { "epoch": 4.43, "grad_norm": 0.1728515625, "learning_rate": 0.00047359640786280983, "loss": 0.2015, "step": 107000 }, { "epoch": 4.43, "grad_norm": 0.70703125, "learning_rate": 0.0004735915566325134, "loss": 0.2745, "step": 107010 }, { "epoch": 4.43, "grad_norm": 0.5234375, "learning_rate": 0.00047358670498144184, "loss": 0.2272, "step": 107020 }, { "epoch": 4.43, "grad_norm": 0.57421875, "learning_rate": 0.00047358185290960426, "loss": 0.1691, "step": 107030 }, { "epoch": 4.43, "grad_norm": 0.39453125, "learning_rate": 0.0004735770004170098, "loss": 0.193, "step": 107040 }, { "epoch": 4.43, "grad_norm": 0.625, "learning_rate": 0.0004735721475036675, "loss": 0.1823, "step": 107050 }, { "epoch": 4.43, "grad_norm": 0.4609375, "learning_rate": 0.00047356729416958657, "loss": 0.1352, "step": 107060 }, { "epoch": 4.43, "grad_norm": 1.1171875, "learning_rate": 0.0004735624404147761, "loss": 0.2249, "step": 107070 }, { "epoch": 4.44, "grad_norm": 0.5546875, "learning_rate": 0.0004735575862392454, "loss": 0.1949, "step": 107080 }, { "epoch": 4.44, "grad_norm": 0.87109375, "learning_rate": 0.00047355273164300337, "loss": 0.2218, "step": 107090 }, { "epoch": 4.44, "grad_norm": 0.1845703125, "learning_rate": 0.00047354787662605923, "loss": 0.1845, "step": 107100 }, { "epoch": 4.44, "grad_norm": 0.7578125, "learning_rate": 0.0004735430211884222, "loss": 0.2001, "step": 107110 }, { "epoch": 4.44, "grad_norm": 0.5859375, "learning_rate": 0.00047353816533010124, "loss": 0.2347, "step": 107120 }, { "epoch": 4.44, "grad_norm": 0.84765625, "learning_rate": 0.00047353330905110566, "loss": 0.2034, "step": 107130 }, { "epoch": 4.44, "grad_norm": 0.5546875, "learning_rate": 0.00047352845235144447, "loss": 0.1758, "step": 107140 }, { "epoch": 4.44, "grad_norm": 0.9453125, "learning_rate": 0.000473523595231127, "loss": 0.2183, "step": 107150 }, { "epoch": 4.44, "grad_norm": 0.6796875, "learning_rate": 0.00047351873769016213, "loss": 0.2315, "step": 107160 }, { "epoch": 4.44, "grad_norm": 0.52734375, "learning_rate": 0.0004735138797285592, "loss": 0.189, "step": 107170 }, { "epoch": 4.44, "grad_norm": 1.0234375, "learning_rate": 0.0004735090213463272, "loss": 0.2183, "step": 107180 }, { "epoch": 4.44, "grad_norm": 1.484375, "learning_rate": 0.0004735041625434755, "loss": 0.2688, "step": 107190 }, { "epoch": 4.44, "grad_norm": 0.62109375, "learning_rate": 0.000473499303320013, "loss": 0.2353, "step": 107200 }, { "epoch": 4.44, "grad_norm": 0.57421875, "learning_rate": 0.000473494443675949, "loss": 0.1869, "step": 107210 }, { "epoch": 4.44, "grad_norm": 0.58984375, "learning_rate": 0.0004734895836112926, "loss": 0.2329, "step": 107220 }, { "epoch": 4.44, "grad_norm": 0.734375, "learning_rate": 0.00047348472312605283, "loss": 0.1824, "step": 107230 }, { "epoch": 4.44, "grad_norm": 0.6328125, "learning_rate": 0.00047347986222023907, "loss": 0.1927, "step": 107240 }, { "epoch": 4.44, "grad_norm": 0.97265625, "learning_rate": 0.0004734750008938603, "loss": 0.2258, "step": 107250 }, { "epoch": 4.44, "grad_norm": 0.71484375, "learning_rate": 0.00047347013914692573, "loss": 0.2204, "step": 107260 }, { "epoch": 4.44, "grad_norm": 0.77734375, "learning_rate": 0.00047346527697944443, "loss": 0.2125, "step": 107270 }, { "epoch": 4.44, "grad_norm": 0.373046875, "learning_rate": 0.0004734604143914256, "loss": 0.1841, "step": 107280 }, { "epoch": 4.44, "grad_norm": 0.71875, "learning_rate": 0.00047345555138287855, "loss": 0.2349, "step": 107290 }, { "epoch": 4.44, "grad_norm": 0.51171875, "learning_rate": 0.0004734506879538121, "loss": 0.2136, "step": 107300 }, { "epoch": 4.44, "grad_norm": 0.419921875, "learning_rate": 0.0004734458241042357, "loss": 0.1802, "step": 107310 }, { "epoch": 4.45, "grad_norm": 0.75, "learning_rate": 0.0004734409598341584, "loss": 0.1747, "step": 107320 }, { "epoch": 4.45, "grad_norm": 1.0078125, "learning_rate": 0.0004734360951435893, "loss": 0.2047, "step": 107330 }, { "epoch": 4.45, "grad_norm": 0.6953125, "learning_rate": 0.0004734312300325376, "loss": 0.1771, "step": 107340 }, { "epoch": 4.45, "grad_norm": 1.21875, "learning_rate": 0.00047342636450101237, "loss": 0.2011, "step": 107350 }, { "epoch": 4.45, "grad_norm": 0.57421875, "learning_rate": 0.00047342149854902294, "loss": 0.1947, "step": 107360 }, { "epoch": 4.45, "grad_norm": 0.73828125, "learning_rate": 0.0004734166321765784, "loss": 0.2321, "step": 107370 }, { "epoch": 4.45, "grad_norm": 3.078125, "learning_rate": 0.00047341176538368793, "loss": 0.2156, "step": 107380 }, { "epoch": 4.45, "grad_norm": 0.6015625, "learning_rate": 0.0004734068981703605, "loss": 0.1932, "step": 107390 }, { "epoch": 4.45, "grad_norm": 0.79296875, "learning_rate": 0.00047340203053660546, "loss": 0.196, "step": 107400 }, { "epoch": 4.45, "grad_norm": 0.875, "learning_rate": 0.000473397162482432, "loss": 0.2012, "step": 107410 }, { "epoch": 4.45, "grad_norm": 0.7265625, "learning_rate": 0.0004733922940078491, "loss": 0.2013, "step": 107420 }, { "epoch": 4.45, "grad_norm": 0.41796875, "learning_rate": 0.00047338742511286616, "loss": 0.1913, "step": 107430 }, { "epoch": 4.45, "grad_norm": 0.71875, "learning_rate": 0.0004733825557974921, "loss": 0.2169, "step": 107440 }, { "epoch": 4.45, "grad_norm": 1.5625, "learning_rate": 0.0004733776860617362, "loss": 0.2045, "step": 107450 }, { "epoch": 4.45, "grad_norm": 0.38671875, "learning_rate": 0.0004733728159056077, "loss": 0.1779, "step": 107460 }, { "epoch": 4.45, "grad_norm": 0.46484375, "learning_rate": 0.0004733679453291156, "loss": 0.1712, "step": 107470 }, { "epoch": 4.45, "grad_norm": 0.92578125, "learning_rate": 0.0004733630743322691, "loss": 0.1719, "step": 107480 }, { "epoch": 4.45, "grad_norm": 0.69140625, "learning_rate": 0.00047335820291507754, "loss": 0.1588, "step": 107490 }, { "epoch": 4.45, "grad_norm": 0.7109375, "learning_rate": 0.0004733533310775499, "loss": 0.2357, "step": 107500 }, { "epoch": 4.45, "grad_norm": 0.7265625, "learning_rate": 0.00047334845881969546, "loss": 0.2472, "step": 107510 }, { "epoch": 4.45, "grad_norm": 0.5703125, "learning_rate": 0.00047334358614152327, "loss": 0.1836, "step": 107520 }, { "epoch": 4.45, "grad_norm": 0.201171875, "learning_rate": 0.0004733387130430426, "loss": 0.1837, "step": 107530 }, { "epoch": 4.45, "grad_norm": 0.515625, "learning_rate": 0.00047333383952426254, "loss": 0.2272, "step": 107540 }, { "epoch": 4.45, "grad_norm": 0.8828125, "learning_rate": 0.00047332896558519244, "loss": 0.2055, "step": 107550 }, { "epoch": 4.46, "grad_norm": 0.6484375, "learning_rate": 0.0004733240912258412, "loss": 0.1774, "step": 107560 }, { "epoch": 4.46, "grad_norm": 1.140625, "learning_rate": 0.00047331921644621825, "loss": 0.2429, "step": 107570 }, { "epoch": 4.46, "grad_norm": 0.6875, "learning_rate": 0.0004733143412463326, "loss": 0.194, "step": 107580 }, { "epoch": 4.46, "grad_norm": 0.42578125, "learning_rate": 0.00047330946562619346, "loss": 0.1905, "step": 107590 }, { "epoch": 4.46, "grad_norm": 0.54296875, "learning_rate": 0.00047330458958581004, "loss": 0.1302, "step": 107600 }, { "epoch": 4.46, "grad_norm": 1.0859375, "learning_rate": 0.0004732997131251915, "loss": 0.2132, "step": 107610 }, { "epoch": 4.46, "grad_norm": 0.63671875, "learning_rate": 0.000473294836244347, "loss": 0.2605, "step": 107620 }, { "epoch": 4.46, "grad_norm": 0.3359375, "learning_rate": 0.00047328995894328573, "loss": 0.1868, "step": 107630 }, { "epoch": 4.46, "grad_norm": 0.68359375, "learning_rate": 0.00047328508122201695, "loss": 0.2395, "step": 107640 }, { "epoch": 4.46, "grad_norm": 0.60546875, "learning_rate": 0.00047328020308054963, "loss": 0.24, "step": 107650 }, { "epoch": 4.46, "grad_norm": 0.474609375, "learning_rate": 0.0004732753245188932, "loss": 0.2209, "step": 107660 }, { "epoch": 4.46, "grad_norm": 0.671875, "learning_rate": 0.00047327044553705666, "loss": 0.2133, "step": 107670 }, { "epoch": 4.46, "grad_norm": 1.0078125, "learning_rate": 0.0004732655661350492, "loss": 0.1885, "step": 107680 }, { "epoch": 4.46, "grad_norm": 1.34375, "learning_rate": 0.00047326068631288015, "loss": 0.1837, "step": 107690 }, { "epoch": 4.46, "grad_norm": 0.546875, "learning_rate": 0.00047325580607055856, "loss": 0.2316, "step": 107700 }, { "epoch": 4.46, "grad_norm": 0.640625, "learning_rate": 0.00047325092540809367, "loss": 0.2419, "step": 107710 }, { "epoch": 4.46, "grad_norm": 0.435546875, "learning_rate": 0.0004732460443254947, "loss": 0.2152, "step": 107720 }, { "epoch": 4.46, "grad_norm": 0.78125, "learning_rate": 0.0004732411628227706, "loss": 0.2156, "step": 107730 }, { "epoch": 4.46, "grad_norm": 1.046875, "learning_rate": 0.00047323628089993085, "loss": 0.1927, "step": 107740 }, { "epoch": 4.46, "grad_norm": 0.7578125, "learning_rate": 0.0004732313985569846, "loss": 0.2099, "step": 107750 }, { "epoch": 4.46, "grad_norm": 0.5859375, "learning_rate": 0.00047322651579394086, "loss": 0.2506, "step": 107760 }, { "epoch": 4.46, "grad_norm": 1.484375, "learning_rate": 0.00047322163261080897, "loss": 0.2162, "step": 107770 }, { "epoch": 4.46, "grad_norm": 0.640625, "learning_rate": 0.00047321674900759807, "loss": 0.2356, "step": 107780 }, { "epoch": 4.46, "grad_norm": 0.703125, "learning_rate": 0.0004732118649843173, "loss": 0.2291, "step": 107790 }, { "epoch": 4.47, "grad_norm": 0.416015625, "learning_rate": 0.000473206980540976, "loss": 0.2057, "step": 107800 }, { "epoch": 4.47, "grad_norm": 0.49609375, "learning_rate": 0.0004732020956775832, "loss": 0.198, "step": 107810 }, { "epoch": 4.47, "grad_norm": 1.1640625, "learning_rate": 0.0004731972103941482, "loss": 0.2067, "step": 107820 }, { "epoch": 4.47, "grad_norm": 1.5625, "learning_rate": 0.00047319232469068015, "loss": 0.2017, "step": 107830 }, { "epoch": 4.47, "grad_norm": 0.56640625, "learning_rate": 0.0004731874385671883, "loss": 0.1574, "step": 107840 }, { "epoch": 4.47, "grad_norm": 0.8984375, "learning_rate": 0.00047318255202368165, "loss": 0.2279, "step": 107850 }, { "epoch": 4.47, "grad_norm": 1.0546875, "learning_rate": 0.00047317766506016956, "loss": 0.1787, "step": 107860 }, { "epoch": 4.47, "grad_norm": 0.81640625, "learning_rate": 0.00047317277767666135, "loss": 0.2409, "step": 107870 }, { "epoch": 4.47, "grad_norm": 1.2890625, "learning_rate": 0.00047316788987316596, "loss": 0.2276, "step": 107880 }, { "epoch": 4.47, "grad_norm": 1.09375, "learning_rate": 0.0004731630016496927, "loss": 0.1549, "step": 107890 }, { "epoch": 4.47, "grad_norm": 0.83984375, "learning_rate": 0.0004731581130062508, "loss": 0.2165, "step": 107900 }, { "epoch": 4.47, "grad_norm": 0.40625, "learning_rate": 0.00047315322394284943, "loss": 0.1882, "step": 107910 }, { "epoch": 4.47, "grad_norm": 0.625, "learning_rate": 0.0004731483344594978, "loss": 0.2847, "step": 107920 }, { "epoch": 4.47, "grad_norm": 0.478515625, "learning_rate": 0.0004731434445562051, "loss": 0.2304, "step": 107930 }, { "epoch": 4.47, "grad_norm": 1.734375, "learning_rate": 0.0004731385542329805, "loss": 0.2184, "step": 107940 }, { "epoch": 4.47, "grad_norm": 0.78125, "learning_rate": 0.0004731336634898332, "loss": 0.2016, "step": 107950 }, { "epoch": 4.47, "grad_norm": 1.3515625, "learning_rate": 0.00047312877232677254, "loss": 0.2405, "step": 107960 }, { "epoch": 4.47, "grad_norm": 1.421875, "learning_rate": 0.0004731238807438076, "loss": 0.2222, "step": 107970 }, { "epoch": 4.47, "grad_norm": 0.5703125, "learning_rate": 0.00047311898874094753, "loss": 0.2079, "step": 107980 }, { "epoch": 4.47, "grad_norm": 1.125, "learning_rate": 0.0004731140963182017, "loss": 0.2114, "step": 107990 }, { "epoch": 4.47, "grad_norm": 1.1328125, "learning_rate": 0.00047310920347557925, "loss": 0.2106, "step": 108000 }, { "epoch": 4.47, "grad_norm": 0.61328125, "learning_rate": 0.0004731043102130893, "loss": 0.2293, "step": 108010 }, { "epoch": 4.47, "grad_norm": 0.51171875, "learning_rate": 0.0004730994165307412, "loss": 0.1822, "step": 108020 }, { "epoch": 4.47, "grad_norm": 0.59375, "learning_rate": 0.00047309452242854413, "loss": 0.2317, "step": 108030 }, { "epoch": 4.48, "grad_norm": 1.1015625, "learning_rate": 0.0004730896279065071, "loss": 0.2541, "step": 108040 }, { "epoch": 4.48, "grad_norm": 0.58203125, "learning_rate": 0.0004730847329646396, "loss": 0.1732, "step": 108050 }, { "epoch": 4.48, "grad_norm": 0.61328125, "learning_rate": 0.0004730798376029507, "loss": 0.2166, "step": 108060 }, { "epoch": 4.48, "grad_norm": 0.8046875, "learning_rate": 0.0004730749418214497, "loss": 0.2283, "step": 108070 }, { "epoch": 4.48, "grad_norm": 0.8671875, "learning_rate": 0.00047307004562014565, "loss": 0.2492, "step": 108080 }, { "epoch": 4.48, "grad_norm": 0.40234375, "learning_rate": 0.0004730651489990479, "loss": 0.2405, "step": 108090 }, { "epoch": 4.48, "grad_norm": 0.291015625, "learning_rate": 0.0004730602519581656, "loss": 0.2101, "step": 108100 }, { "epoch": 4.48, "grad_norm": 0.74609375, "learning_rate": 0.000473055354497508, "loss": 0.2314, "step": 108110 }, { "epoch": 4.48, "grad_norm": 1.015625, "learning_rate": 0.00047305045661708435, "loss": 0.218, "step": 108120 }, { "epoch": 4.48, "grad_norm": 0.58984375, "learning_rate": 0.0004730455583169039, "loss": 0.1981, "step": 108130 }, { "epoch": 4.48, "grad_norm": 0.279296875, "learning_rate": 0.0004730406595969756, "loss": 0.2003, "step": 108140 }, { "epoch": 4.48, "grad_norm": 0.71875, "learning_rate": 0.000473035760457309, "loss": 0.1793, "step": 108150 }, { "epoch": 4.48, "grad_norm": 1.015625, "learning_rate": 0.0004730308608979132, "loss": 0.1818, "step": 108160 }, { "epoch": 4.48, "grad_norm": 0.82421875, "learning_rate": 0.00047302596091879735, "loss": 0.1967, "step": 108170 }, { "epoch": 4.48, "grad_norm": 0.64453125, "learning_rate": 0.0004730210605199707, "loss": 0.1821, "step": 108180 }, { "epoch": 4.48, "grad_norm": 0.84765625, "learning_rate": 0.0004730161597014426, "loss": 0.1469, "step": 108190 }, { "epoch": 4.48, "grad_norm": 0.88671875, "learning_rate": 0.0004730112584632221, "loss": 0.2338, "step": 108200 }, { "epoch": 4.48, "grad_norm": 0.640625, "learning_rate": 0.00047300635680531846, "loss": 0.2188, "step": 108210 }, { "epoch": 4.48, "grad_norm": 1.0234375, "learning_rate": 0.00047300145472774105, "loss": 0.2224, "step": 108220 }, { "epoch": 4.48, "grad_norm": 0.82421875, "learning_rate": 0.00047299655223049885, "loss": 0.2241, "step": 108230 }, { "epoch": 4.48, "grad_norm": 0.59375, "learning_rate": 0.0004729916493136013, "loss": 0.1873, "step": 108240 }, { "epoch": 4.48, "grad_norm": 0.59765625, "learning_rate": 0.0004729867459770575, "loss": 0.1908, "step": 108250 }, { "epoch": 4.48, "grad_norm": 1.5625, "learning_rate": 0.00047298184222087684, "loss": 0.2052, "step": 108260 }, { "epoch": 4.48, "grad_norm": 0.8984375, "learning_rate": 0.0004729769380450684, "loss": 0.2095, "step": 108270 }, { "epoch": 4.48, "grad_norm": 0.60546875, "learning_rate": 0.00047297203344964133, "loss": 0.2114, "step": 108280 }, { "epoch": 4.49, "grad_norm": 2.09375, "learning_rate": 0.000472967128434605, "loss": 0.2648, "step": 108290 }, { "epoch": 4.49, "grad_norm": 0.390625, "learning_rate": 0.00047296222299996865, "loss": 0.2266, "step": 108300 }, { "epoch": 4.49, "grad_norm": 0.1962890625, "learning_rate": 0.0004729573171457415, "loss": 0.2331, "step": 108310 }, { "epoch": 4.49, "grad_norm": 0.26171875, "learning_rate": 0.00047295241087193273, "loss": 0.2133, "step": 108320 }, { "epoch": 4.49, "grad_norm": 0.609375, "learning_rate": 0.0004729475041785516, "loss": 0.2877, "step": 108330 }, { "epoch": 4.49, "grad_norm": 0.57421875, "learning_rate": 0.0004729425970656073, "loss": 0.1948, "step": 108340 }, { "epoch": 4.49, "grad_norm": 0.392578125, "learning_rate": 0.00047293768953310915, "loss": 0.1632, "step": 108350 }, { "epoch": 4.49, "grad_norm": 0.55078125, "learning_rate": 0.0004729327815810663, "loss": 0.1796, "step": 108360 }, { "epoch": 4.49, "grad_norm": 0.8671875, "learning_rate": 0.00047292787320948816, "loss": 0.1858, "step": 108370 }, { "epoch": 4.49, "grad_norm": 0.53515625, "learning_rate": 0.0004729229644183837, "loss": 0.2235, "step": 108380 }, { "epoch": 4.49, "grad_norm": 1.4375, "learning_rate": 0.00047291805520776237, "loss": 0.1861, "step": 108390 }, { "epoch": 4.49, "grad_norm": 0.64453125, "learning_rate": 0.0004729131455776333, "loss": 0.2295, "step": 108400 }, { "epoch": 4.49, "grad_norm": 1.078125, "learning_rate": 0.00047290823552800576, "loss": 0.1975, "step": 108410 }, { "epoch": 4.49, "grad_norm": 0.71484375, "learning_rate": 0.000472903325058889, "loss": 0.224, "step": 108420 }, { "epoch": 4.49, "grad_norm": 0.65625, "learning_rate": 0.0004728984141702923, "loss": 0.2104, "step": 108430 }, { "epoch": 4.49, "grad_norm": 0.8359375, "learning_rate": 0.0004728935028622248, "loss": 0.2152, "step": 108440 }, { "epoch": 4.49, "grad_norm": 0.9453125, "learning_rate": 0.00047288859113469586, "loss": 0.2334, "step": 108450 }, { "epoch": 4.49, "grad_norm": 1.5390625, "learning_rate": 0.0004728836789877146, "loss": 0.2335, "step": 108460 }, { "epoch": 4.49, "grad_norm": 0.431640625, "learning_rate": 0.0004728787664212903, "loss": 0.1998, "step": 108470 }, { "epoch": 4.49, "grad_norm": 0.75390625, "learning_rate": 0.00047287385343543225, "loss": 0.1759, "step": 108480 }, { "epoch": 4.49, "grad_norm": 0.60546875, "learning_rate": 0.0004728689400301497, "loss": 0.2061, "step": 108490 }, { "epoch": 4.49, "grad_norm": 0.65234375, "learning_rate": 0.0004728640262054519, "loss": 0.2269, "step": 108500 }, { "epoch": 4.49, "grad_norm": 1.0859375, "learning_rate": 0.000472859111961348, "loss": 0.2444, "step": 108510 }, { "epoch": 4.49, "grad_norm": 1.2265625, "learning_rate": 0.0004728541972978474, "loss": 0.214, "step": 108520 }, { "epoch": 4.5, "grad_norm": 0.99609375, "learning_rate": 0.00047284928221495915, "loss": 0.1958, "step": 108530 }, { "epoch": 4.5, "grad_norm": 0.5703125, "learning_rate": 0.0004728443667126927, "loss": 0.2258, "step": 108540 }, { "epoch": 4.5, "grad_norm": 0.765625, "learning_rate": 0.0004728394507910572, "loss": 0.1945, "step": 108550 }, { "epoch": 4.5, "grad_norm": 0.87109375, "learning_rate": 0.00047283453445006194, "loss": 0.1994, "step": 108560 }, { "epoch": 4.5, "grad_norm": 0.90234375, "learning_rate": 0.00047282961768971613, "loss": 0.2127, "step": 108570 }, { "epoch": 4.5, "grad_norm": 0.5078125, "learning_rate": 0.00047282470051002905, "loss": 0.2602, "step": 108580 }, { "epoch": 4.5, "grad_norm": 0.6875, "learning_rate": 0.0004728197829110099, "loss": 0.1977, "step": 108590 }, { "epoch": 4.5, "grad_norm": 1.0546875, "learning_rate": 0.0004728148648926681, "loss": 0.2521, "step": 108600 }, { "epoch": 4.5, "grad_norm": 0.5859375, "learning_rate": 0.0004728099464550127, "loss": 0.225, "step": 108610 }, { "epoch": 4.5, "grad_norm": 1.0078125, "learning_rate": 0.000472805027598053, "loss": 0.1738, "step": 108620 }, { "epoch": 4.5, "grad_norm": 0.24609375, "learning_rate": 0.00047280010832179836, "loss": 0.2801, "step": 108630 }, { "epoch": 4.5, "grad_norm": 0.224609375, "learning_rate": 0.00047279518862625803, "loss": 0.252, "step": 108640 }, { "epoch": 4.5, "grad_norm": 0.4765625, "learning_rate": 0.0004727902685114411, "loss": 0.2397, "step": 108650 }, { "epoch": 4.5, "grad_norm": 0.439453125, "learning_rate": 0.000472785347977357, "loss": 0.1686, "step": 108660 }, { "epoch": 4.5, "grad_norm": 0.78125, "learning_rate": 0.0004727804270240149, "loss": 0.2474, "step": 108670 }, { "epoch": 4.5, "grad_norm": 0.4765625, "learning_rate": 0.00047277550565142415, "loss": 0.1862, "step": 108680 }, { "epoch": 4.5, "grad_norm": 0.0, "learning_rate": 0.00047277058385959393, "loss": 0.229, "step": 108690 }, { "epoch": 4.5, "grad_norm": 0.38671875, "learning_rate": 0.00047276566164853353, "loss": 0.1664, "step": 108700 }, { "epoch": 4.5, "grad_norm": 0.3671875, "learning_rate": 0.0004727607390182522, "loss": 0.1752, "step": 108710 }, { "epoch": 4.5, "grad_norm": 0.5, "learning_rate": 0.0004727558159687593, "loss": 0.2063, "step": 108720 }, { "epoch": 4.5, "grad_norm": 0.455078125, "learning_rate": 0.00047275089250006384, "loss": 0.2783, "step": 108730 }, { "epoch": 4.5, "grad_norm": 0.48046875, "learning_rate": 0.0004727459686121754, "loss": 0.2614, "step": 108740 }, { "epoch": 4.5, "grad_norm": 0.72265625, "learning_rate": 0.000472741044305103, "loss": 0.2427, "step": 108750 }, { "epoch": 4.5, "grad_norm": 0.76171875, "learning_rate": 0.0004727361195788561, "loss": 0.1623, "step": 108760 }, { "epoch": 4.51, "grad_norm": 0.53125, "learning_rate": 0.0004727311944334438, "loss": 0.1889, "step": 108770 }, { "epoch": 4.51, "grad_norm": 1.109375, "learning_rate": 0.0004727262688688755, "loss": 0.206, "step": 108780 }, { "epoch": 4.51, "grad_norm": 0.6015625, "learning_rate": 0.0004727213428851604, "loss": 0.1447, "step": 108790 }, { "epoch": 4.51, "grad_norm": 0.68359375, "learning_rate": 0.00047271641648230777, "loss": 0.2053, "step": 108800 }, { "epoch": 4.51, "grad_norm": 0.44921875, "learning_rate": 0.00047271148966032694, "loss": 0.2264, "step": 108810 }, { "epoch": 4.51, "grad_norm": 1.703125, "learning_rate": 0.0004727065624192271, "loss": 0.2033, "step": 108820 }, { "epoch": 4.51, "grad_norm": 0.62109375, "learning_rate": 0.00047270163475901755, "loss": 0.2053, "step": 108830 }, { "epoch": 4.51, "grad_norm": 0.6640625, "learning_rate": 0.0004726967066797076, "loss": 0.2523, "step": 108840 }, { "epoch": 4.51, "grad_norm": 0.54296875, "learning_rate": 0.00047269177818130647, "loss": 0.256, "step": 108850 }, { "epoch": 4.51, "grad_norm": 0.68359375, "learning_rate": 0.0004726868492638235, "loss": 0.227, "step": 108860 }, { "epoch": 4.51, "grad_norm": 0.88671875, "learning_rate": 0.0004726819199272679, "loss": 0.1991, "step": 108870 }, { "epoch": 4.51, "grad_norm": 0.44140625, "learning_rate": 0.000472676990171649, "loss": 0.1966, "step": 108880 }, { "epoch": 4.51, "grad_norm": 0.84375, "learning_rate": 0.00047267205999697596, "loss": 0.2116, "step": 108890 }, { "epoch": 4.51, "grad_norm": 0.0, "learning_rate": 0.0004726671294032583, "loss": 0.2464, "step": 108900 }, { "epoch": 4.51, "grad_norm": 0.390625, "learning_rate": 0.0004726621983905051, "loss": 0.1914, "step": 108910 }, { "epoch": 4.51, "grad_norm": 1.0078125, "learning_rate": 0.00047265726695872555, "loss": 0.2211, "step": 108920 }, { "epoch": 4.51, "grad_norm": 0.6640625, "learning_rate": 0.0004726523351079292, "loss": 0.2108, "step": 108930 }, { "epoch": 4.51, "grad_norm": 0.5546875, "learning_rate": 0.0004726474028381252, "loss": 0.2028, "step": 108940 }, { "epoch": 4.51, "grad_norm": 2.6875, "learning_rate": 0.0004726424701493228, "loss": 0.1716, "step": 108950 }, { "epoch": 4.51, "grad_norm": 0.5625, "learning_rate": 0.00047263753704153134, "loss": 0.17, "step": 108960 }, { "epoch": 4.51, "grad_norm": 1.0078125, "learning_rate": 0.00047263260351476, "loss": 0.1926, "step": 108970 }, { "epoch": 4.51, "grad_norm": 1.0703125, "learning_rate": 0.00047262766956901827, "loss": 0.1914, "step": 108980 }, { "epoch": 4.51, "grad_norm": 1.109375, "learning_rate": 0.0004726227352043152, "loss": 0.2159, "step": 108990 }, { "epoch": 4.51, "grad_norm": 0.5625, "learning_rate": 0.0004726178004206602, "loss": 0.2369, "step": 109000 }, { "epoch": 4.52, "grad_norm": 0.4921875, "learning_rate": 0.0004726128652180626, "loss": 0.1798, "step": 109010 }, { "epoch": 4.52, "grad_norm": 1.1171875, "learning_rate": 0.00047260792959653153, "loss": 0.2284, "step": 109020 }, { "epoch": 4.52, "grad_norm": 0.46484375, "learning_rate": 0.0004726029935560765, "loss": 0.2432, "step": 109030 }, { "epoch": 4.52, "grad_norm": 0.6484375, "learning_rate": 0.00047259805709670656, "loss": 0.2294, "step": 109040 }, { "epoch": 4.52, "grad_norm": 0.70703125, "learning_rate": 0.0004725931202184311, "loss": 0.2023, "step": 109050 }, { "epoch": 4.52, "grad_norm": 0.609375, "learning_rate": 0.0004725881829212596, "loss": 0.1994, "step": 109060 }, { "epoch": 4.52, "grad_norm": 0.59765625, "learning_rate": 0.00047258324520520103, "loss": 0.2379, "step": 109070 }, { "epoch": 4.52, "grad_norm": 1.109375, "learning_rate": 0.0004725783070702649, "loss": 0.2197, "step": 109080 }, { "epoch": 4.52, "grad_norm": 1.71875, "learning_rate": 0.00047257336851646035, "loss": 0.2809, "step": 109090 }, { "epoch": 4.52, "grad_norm": 0.8125, "learning_rate": 0.0004725684295437969, "loss": 0.1942, "step": 109100 }, { "epoch": 4.52, "grad_norm": 0.734375, "learning_rate": 0.00047256349015228357, "loss": 0.2468, "step": 109110 }, { "epoch": 4.52, "grad_norm": 0.439453125, "learning_rate": 0.0004725585503419298, "loss": 0.2409, "step": 109120 }, { "epoch": 4.52, "grad_norm": 0.60546875, "learning_rate": 0.0004725536101127449, "loss": 0.209, "step": 109130 }, { "epoch": 4.52, "grad_norm": 1.0703125, "learning_rate": 0.00047254866946473814, "loss": 0.2001, "step": 109140 }, { "epoch": 4.52, "grad_norm": 0.62109375, "learning_rate": 0.0004725437283979188, "loss": 0.2227, "step": 109150 }, { "epoch": 4.52, "grad_norm": 1.0078125, "learning_rate": 0.0004725387869122962, "loss": 0.2335, "step": 109160 }, { "epoch": 4.52, "grad_norm": 0.91015625, "learning_rate": 0.00047253384500787966, "loss": 0.1793, "step": 109170 }, { "epoch": 4.52, "grad_norm": 1.2265625, "learning_rate": 0.0004725289026846784, "loss": 0.242, "step": 109180 }, { "epoch": 4.52, "grad_norm": 0.53125, "learning_rate": 0.00047252395994270185, "loss": 0.2468, "step": 109190 }, { "epoch": 4.52, "grad_norm": 0.99609375, "learning_rate": 0.0004725190167819592, "loss": 0.1848, "step": 109200 }, { "epoch": 4.52, "grad_norm": 0.703125, "learning_rate": 0.00047251407320245977, "loss": 0.1741, "step": 109210 }, { "epoch": 4.52, "grad_norm": 0.54296875, "learning_rate": 0.00047250912920421295, "loss": 0.2635, "step": 109220 }, { "epoch": 4.52, "grad_norm": 0.388671875, "learning_rate": 0.0004725041847872279, "loss": 0.2147, "step": 109230 }, { "epoch": 4.52, "grad_norm": 0.6484375, "learning_rate": 0.00047249923995151404, "loss": 0.2054, "step": 109240 }, { "epoch": 4.53, "grad_norm": 1.2734375, "learning_rate": 0.00047249429469708066, "loss": 0.1754, "step": 109250 }, { "epoch": 4.53, "grad_norm": 0.326171875, "learning_rate": 0.000472489349023937, "loss": 0.2136, "step": 109260 }, { "epoch": 4.53, "grad_norm": 0.578125, "learning_rate": 0.00047248440293209243, "loss": 0.1829, "step": 109270 }, { "epoch": 4.53, "grad_norm": 1.09375, "learning_rate": 0.00047247945642155624, "loss": 0.2104, "step": 109280 }, { "epoch": 4.53, "grad_norm": 0.62109375, "learning_rate": 0.00047247450949233777, "loss": 0.2025, "step": 109290 }, { "epoch": 4.53, "grad_norm": 0.26171875, "learning_rate": 0.0004724695621444463, "loss": 0.2244, "step": 109300 }, { "epoch": 4.53, "grad_norm": 0.17578125, "learning_rate": 0.0004724646143778911, "loss": 0.2319, "step": 109310 }, { "epoch": 4.53, "grad_norm": 1.734375, "learning_rate": 0.0004724596661926815, "loss": 0.2609, "step": 109320 }, { "epoch": 4.53, "grad_norm": 0.66015625, "learning_rate": 0.0004724547175888269, "loss": 0.2106, "step": 109330 }, { "epoch": 4.53, "grad_norm": 0.376953125, "learning_rate": 0.0004724497685663365, "loss": 0.2535, "step": 109340 }, { "epoch": 4.53, "grad_norm": 0.984375, "learning_rate": 0.0004724448191252196, "loss": 0.2313, "step": 109350 }, { "epoch": 4.53, "grad_norm": 1.3125, "learning_rate": 0.00047243986926548566, "loss": 0.1889, "step": 109360 }, { "epoch": 4.53, "grad_norm": 2.5, "learning_rate": 0.00047243491898714385, "loss": 0.1809, "step": 109370 }, { "epoch": 4.53, "grad_norm": 0.84765625, "learning_rate": 0.00047242996829020356, "loss": 0.2009, "step": 109380 }, { "epoch": 4.53, "grad_norm": 0.8046875, "learning_rate": 0.0004724250171746741, "loss": 0.2065, "step": 109390 }, { "epoch": 4.53, "grad_norm": 0.11572265625, "learning_rate": 0.00047242006564056485, "loss": 0.135, "step": 109400 }, { "epoch": 4.53, "grad_norm": 0.671875, "learning_rate": 0.00047241511368788496, "loss": 0.2257, "step": 109410 }, { "epoch": 4.53, "grad_norm": 1.078125, "learning_rate": 0.0004724101613166439, "loss": 0.2391, "step": 109420 }, { "epoch": 4.53, "grad_norm": 0.54296875, "learning_rate": 0.00047240520852685086, "loss": 0.2204, "step": 109430 }, { "epoch": 4.53, "grad_norm": 0.35546875, "learning_rate": 0.00047240025531851534, "loss": 0.1876, "step": 109440 }, { "epoch": 4.53, "grad_norm": 0.70703125, "learning_rate": 0.00047239530169164646, "loss": 0.1782, "step": 109450 }, { "epoch": 4.53, "grad_norm": 0.9453125, "learning_rate": 0.00047239034764625374, "loss": 0.2711, "step": 109460 }, { "epoch": 4.53, "grad_norm": 0.6953125, "learning_rate": 0.0004723853931823463, "loss": 0.2296, "step": 109470 }, { "epoch": 4.53, "grad_norm": 1.09375, "learning_rate": 0.0004723804382999336, "loss": 0.1887, "step": 109480 }, { "epoch": 4.54, "grad_norm": 0.515625, "learning_rate": 0.0004723754829990249, "loss": 0.1661, "step": 109490 }, { "epoch": 4.54, "grad_norm": 0.87890625, "learning_rate": 0.00047237052727962963, "loss": 0.218, "step": 109500 }, { "epoch": 4.54, "grad_norm": 0.3828125, "learning_rate": 0.000472365571141757, "loss": 0.2138, "step": 109510 }, { "epoch": 4.54, "grad_norm": 0.40625, "learning_rate": 0.0004723606145854164, "loss": 0.1638, "step": 109520 }, { "epoch": 4.54, "grad_norm": 0.443359375, "learning_rate": 0.0004723556576106171, "loss": 0.1989, "step": 109530 }, { "epoch": 4.54, "grad_norm": 0.2158203125, "learning_rate": 0.0004723507002173685, "loss": 0.2244, "step": 109540 }, { "epoch": 4.54, "grad_norm": 0.65625, "learning_rate": 0.00047234574240567984, "loss": 0.2182, "step": 109550 }, { "epoch": 4.54, "grad_norm": 0.1533203125, "learning_rate": 0.0004723407841755606, "loss": 0.2095, "step": 109560 }, { "epoch": 4.54, "grad_norm": 0.8203125, "learning_rate": 0.00047233582552701995, "loss": 0.2103, "step": 109570 }, { "epoch": 4.54, "grad_norm": 1.7890625, "learning_rate": 0.0004723308664600673, "loss": 0.1819, "step": 109580 }, { "epoch": 4.54, "grad_norm": 0.72265625, "learning_rate": 0.0004723259069747119, "loss": 0.1933, "step": 109590 }, { "epoch": 4.54, "grad_norm": 0.3125, "learning_rate": 0.00047232094707096324, "loss": 0.2407, "step": 109600 }, { "epoch": 4.54, "grad_norm": 0.515625, "learning_rate": 0.0004723159867488306, "loss": 0.2792, "step": 109610 }, { "epoch": 4.54, "grad_norm": 0.37109375, "learning_rate": 0.0004723110260083232, "loss": 0.1967, "step": 109620 }, { "epoch": 4.54, "grad_norm": 0.96875, "learning_rate": 0.0004723060648494505, "loss": 0.2227, "step": 109630 }, { "epoch": 4.54, "grad_norm": 0.6953125, "learning_rate": 0.0004723011032722218, "loss": 0.1713, "step": 109640 }, { "epoch": 4.54, "grad_norm": 1.046875, "learning_rate": 0.00047229614127664634, "loss": 0.2369, "step": 109650 }, { "epoch": 4.54, "grad_norm": 0.404296875, "learning_rate": 0.0004722911788627336, "loss": 0.1765, "step": 109660 }, { "epoch": 4.54, "grad_norm": 0.40234375, "learning_rate": 0.0004722862160304929, "loss": 0.2618, "step": 109670 }, { "epoch": 4.54, "grad_norm": 1.2734375, "learning_rate": 0.0004722812527799335, "loss": 0.2504, "step": 109680 }, { "epoch": 4.54, "grad_norm": 0.60546875, "learning_rate": 0.0004722762891110648, "loss": 0.2028, "step": 109690 }, { "epoch": 4.54, "grad_norm": 0.47265625, "learning_rate": 0.00047227132502389616, "loss": 0.237, "step": 109700 }, { "epoch": 4.54, "grad_norm": 1.1640625, "learning_rate": 0.0004722663605184369, "loss": 0.2654, "step": 109710 }, { "epoch": 4.54, "grad_norm": 0.8203125, "learning_rate": 0.0004722613955946963, "loss": 0.2235, "step": 109720 }, { "epoch": 4.55, "grad_norm": 0.9375, "learning_rate": 0.00047225643025268374, "loss": 0.2176, "step": 109730 }, { "epoch": 4.55, "grad_norm": 0.271484375, "learning_rate": 0.00047225146449240865, "loss": 0.1674, "step": 109740 }, { "epoch": 4.55, "grad_norm": 2.15625, "learning_rate": 0.00047224649831388024, "loss": 0.2403, "step": 109750 }, { "epoch": 4.55, "grad_norm": 0.46875, "learning_rate": 0.00047224153171710793, "loss": 0.1807, "step": 109760 }, { "epoch": 4.55, "grad_norm": 0.62890625, "learning_rate": 0.00047223656470210106, "loss": 0.2302, "step": 109770 }, { "epoch": 4.55, "grad_norm": 2.4375, "learning_rate": 0.000472231597268869, "loss": 0.2094, "step": 109780 }, { "epoch": 4.55, "grad_norm": 0.69921875, "learning_rate": 0.0004722266294174211, "loss": 0.1867, "step": 109790 }, { "epoch": 4.55, "grad_norm": 0.462890625, "learning_rate": 0.0004722216611477666, "loss": 0.2077, "step": 109800 }, { "epoch": 4.55, "grad_norm": 0.61328125, "learning_rate": 0.000472216692459915, "loss": 0.1811, "step": 109810 }, { "epoch": 4.55, "grad_norm": 0.69921875, "learning_rate": 0.00047221172335387555, "loss": 0.2194, "step": 109820 }, { "epoch": 4.55, "grad_norm": 0.3828125, "learning_rate": 0.00047220675382965763, "loss": 0.2166, "step": 109830 }, { "epoch": 4.55, "grad_norm": 0.43359375, "learning_rate": 0.00047220178388727063, "loss": 0.2259, "step": 109840 }, { "epoch": 4.55, "grad_norm": 0.447265625, "learning_rate": 0.00047219681352672383, "loss": 0.2215, "step": 109850 }, { "epoch": 4.55, "grad_norm": 0.515625, "learning_rate": 0.00047219184274802664, "loss": 0.1699, "step": 109860 }, { "epoch": 4.55, "grad_norm": 1.0625, "learning_rate": 0.0004721868715511884, "loss": 0.1803, "step": 109870 }, { "epoch": 4.55, "grad_norm": 1.3828125, "learning_rate": 0.00047218189993621844, "loss": 0.2353, "step": 109880 }, { "epoch": 4.55, "grad_norm": 0.73046875, "learning_rate": 0.0004721769279031262, "loss": 0.221, "step": 109890 }, { "epoch": 4.55, "grad_norm": 0.80078125, "learning_rate": 0.00047217195545192094, "loss": 0.2077, "step": 109900 }, { "epoch": 4.55, "grad_norm": 1.6015625, "learning_rate": 0.00047216698258261205, "loss": 0.1969, "step": 109910 }, { "epoch": 4.55, "grad_norm": 0.5234375, "learning_rate": 0.0004721620092952088, "loss": 0.148, "step": 109920 }, { "epoch": 4.55, "grad_norm": 0.6015625, "learning_rate": 0.0004721570355897208, "loss": 0.1822, "step": 109930 }, { "epoch": 4.55, "grad_norm": 1.1796875, "learning_rate": 0.00047215206146615713, "loss": 0.2594, "step": 109940 }, { "epoch": 4.55, "grad_norm": 0.42578125, "learning_rate": 0.00047214708692452733, "loss": 0.2044, "step": 109950 }, { "epoch": 4.55, "grad_norm": 0.96484375, "learning_rate": 0.0004721421119648407, "loss": 0.2594, "step": 109960 }, { "epoch": 4.55, "grad_norm": 0.546875, "learning_rate": 0.00047213713658710656, "loss": 0.1864, "step": 109970 }, { "epoch": 4.56, "grad_norm": 1.3671875, "learning_rate": 0.00047213216079133435, "loss": 0.1633, "step": 109980 }, { "epoch": 4.56, "grad_norm": 1.2109375, "learning_rate": 0.0004721271845775334, "loss": 0.1945, "step": 109990 }, { "epoch": 4.56, "grad_norm": 0.99609375, "learning_rate": 0.0004721222079457131, "loss": 0.2264, "step": 110000 }, { "epoch": 4.56, "grad_norm": 0.91796875, "learning_rate": 0.00047211723089588274, "loss": 0.2041, "step": 110010 }, { "epoch": 4.56, "grad_norm": 0.2041015625, "learning_rate": 0.0004721122534280518, "loss": 0.243, "step": 110020 }, { "epoch": 4.56, "grad_norm": 0.33203125, "learning_rate": 0.00047210727554222953, "loss": 0.2168, "step": 110030 }, { "epoch": 4.56, "grad_norm": 0.7890625, "learning_rate": 0.00047210229723842535, "loss": 0.1822, "step": 110040 }, { "epoch": 4.56, "grad_norm": 0.353515625, "learning_rate": 0.00047209731851664865, "loss": 0.2156, "step": 110050 }, { "epoch": 4.56, "grad_norm": 0.2001953125, "learning_rate": 0.00047209233937690876, "loss": 0.1587, "step": 110060 }, { "epoch": 4.56, "grad_norm": 0.578125, "learning_rate": 0.0004720873598192151, "loss": 0.2284, "step": 110070 }, { "epoch": 4.56, "grad_norm": 0.69921875, "learning_rate": 0.000472082379843577, "loss": 0.2086, "step": 110080 }, { "epoch": 4.56, "grad_norm": 0.99609375, "learning_rate": 0.00047207739945000394, "loss": 0.2577, "step": 110090 }, { "epoch": 4.56, "grad_norm": 0.89453125, "learning_rate": 0.0004720724186385051, "loss": 0.2625, "step": 110100 }, { "epoch": 4.56, "grad_norm": 0.53125, "learning_rate": 0.0004720674374090899, "loss": 0.2929, "step": 110110 }, { "epoch": 4.56, "grad_norm": 0.91015625, "learning_rate": 0.0004720624557617679, "loss": 0.1859, "step": 110120 }, { "epoch": 4.56, "grad_norm": 0.58984375, "learning_rate": 0.0004720574736965482, "loss": 0.2123, "step": 110130 }, { "epoch": 4.56, "grad_norm": 0.5078125, "learning_rate": 0.0004720524912134404, "loss": 0.2034, "step": 110140 }, { "epoch": 4.56, "grad_norm": 1.015625, "learning_rate": 0.0004720475083124538, "loss": 0.2637, "step": 110150 }, { "epoch": 4.56, "grad_norm": 0.6953125, "learning_rate": 0.0004720425249935977, "loss": 0.272, "step": 110160 }, { "epoch": 4.56, "grad_norm": 0.51171875, "learning_rate": 0.00047203754125688157, "loss": 0.1859, "step": 110170 }, { "epoch": 4.56, "grad_norm": 0.79296875, "learning_rate": 0.0004720325571023148, "loss": 0.2315, "step": 110180 }, { "epoch": 4.56, "grad_norm": 0.490234375, "learning_rate": 0.0004720275725299067, "loss": 0.1986, "step": 110190 }, { "epoch": 4.56, "grad_norm": 1.1640625, "learning_rate": 0.0004720225875396667, "loss": 0.1776, "step": 110200 }, { "epoch": 4.56, "grad_norm": 0.52734375, "learning_rate": 0.00047201760213160416, "loss": 0.2277, "step": 110210 }, { "epoch": 4.57, "grad_norm": 0.69140625, "learning_rate": 0.00047201261630572846, "loss": 0.1763, "step": 110220 }, { "epoch": 4.57, "grad_norm": 0.82421875, "learning_rate": 0.00047200763006204907, "loss": 0.1643, "step": 110230 }, { "epoch": 4.57, "grad_norm": 0.57421875, "learning_rate": 0.0004720026434005752, "loss": 0.2142, "step": 110240 }, { "epoch": 4.57, "grad_norm": 0.6953125, "learning_rate": 0.00047199765632131635, "loss": 0.1759, "step": 110250 }, { "epoch": 4.57, "grad_norm": 0.515625, "learning_rate": 0.00047199266882428194, "loss": 0.2167, "step": 110260 }, { "epoch": 4.57, "grad_norm": 0.80078125, "learning_rate": 0.0004719876809094812, "loss": 0.2393, "step": 110270 }, { "epoch": 4.57, "grad_norm": 1.3125, "learning_rate": 0.00047198269257692373, "loss": 0.2073, "step": 110280 }, { "epoch": 4.57, "grad_norm": 0.7421875, "learning_rate": 0.0004719777038266187, "loss": 0.2292, "step": 110290 }, { "epoch": 4.57, "grad_norm": 0.59375, "learning_rate": 0.00047197271465857567, "loss": 0.2123, "step": 110300 }, { "epoch": 4.57, "grad_norm": 0.60546875, "learning_rate": 0.00047196772507280394, "loss": 0.2088, "step": 110310 }, { "epoch": 4.57, "grad_norm": 2.296875, "learning_rate": 0.00047196273506931285, "loss": 0.2251, "step": 110320 }, { "epoch": 4.57, "grad_norm": 0.474609375, "learning_rate": 0.000471957744648112, "loss": 0.2075, "step": 110330 }, { "epoch": 4.57, "grad_norm": 0.439453125, "learning_rate": 0.00047195275380921056, "loss": 0.2385, "step": 110340 }, { "epoch": 4.57, "grad_norm": 0.6484375, "learning_rate": 0.00047194776255261807, "loss": 0.2252, "step": 110350 }, { "epoch": 4.57, "grad_norm": 0.0, "learning_rate": 0.0004719427708783438, "loss": 0.1941, "step": 110360 }, { "epoch": 4.57, "grad_norm": 0.74609375, "learning_rate": 0.0004719377787863972, "loss": 0.2306, "step": 110370 }, { "epoch": 4.57, "grad_norm": 1.3203125, "learning_rate": 0.0004719327862767877, "loss": 0.2236, "step": 110380 }, { "epoch": 4.57, "grad_norm": 0.80078125, "learning_rate": 0.0004719277933495246, "loss": 0.2835, "step": 110390 }, { "epoch": 4.57, "grad_norm": 0.5234375, "learning_rate": 0.0004719228000046174, "loss": 0.2369, "step": 110400 }, { "epoch": 4.57, "grad_norm": 1.1328125, "learning_rate": 0.00047191780624207546, "loss": 0.2199, "step": 110410 }, { "epoch": 4.57, "grad_norm": 1.3125, "learning_rate": 0.00047191281206190823, "loss": 0.2367, "step": 110420 }, { "epoch": 4.57, "grad_norm": 0.68359375, "learning_rate": 0.000471907817464125, "loss": 0.2129, "step": 110430 }, { "epoch": 4.57, "grad_norm": 0.6875, "learning_rate": 0.0004719028224487352, "loss": 0.2022, "step": 110440 }, { "epoch": 4.57, "grad_norm": 0.337890625, "learning_rate": 0.0004718978270157483, "loss": 0.2693, "step": 110450 }, { "epoch": 4.58, "grad_norm": 1.359375, "learning_rate": 0.0004718928311651736, "loss": 0.1961, "step": 110460 }, { "epoch": 4.58, "grad_norm": 0.91796875, "learning_rate": 0.0004718878348970206, "loss": 0.1779, "step": 110470 }, { "epoch": 4.58, "grad_norm": 0.62890625, "learning_rate": 0.0004718828382112986, "loss": 0.2201, "step": 110480 }, { "epoch": 4.58, "grad_norm": 0.4296875, "learning_rate": 0.0004718778411080171, "loss": 0.2102, "step": 110490 }, { "epoch": 4.58, "grad_norm": 0.68359375, "learning_rate": 0.00047187284358718554, "loss": 0.2147, "step": 110500 }, { "epoch": 4.58, "grad_norm": 0.55078125, "learning_rate": 0.00047186784564881313, "loss": 0.1896, "step": 110510 }, { "epoch": 4.58, "grad_norm": 0.6796875, "learning_rate": 0.0004718628472929094, "loss": 0.197, "step": 110520 }, { "epoch": 4.58, "grad_norm": 0.416015625, "learning_rate": 0.0004718578485194838, "loss": 0.2194, "step": 110530 }, { "epoch": 4.58, "grad_norm": 0.55078125, "learning_rate": 0.0004718528493285457, "loss": 0.1882, "step": 110540 }, { "epoch": 4.58, "grad_norm": 0.412109375, "learning_rate": 0.0004718478497201044, "loss": 0.171, "step": 110550 }, { "epoch": 4.58, "grad_norm": 0.5546875, "learning_rate": 0.00047184284969416945, "loss": 0.2106, "step": 110560 }, { "epoch": 4.58, "grad_norm": 0.53125, "learning_rate": 0.00047183784925075025, "loss": 0.1931, "step": 110570 }, { "epoch": 4.58, "grad_norm": 0.4921875, "learning_rate": 0.0004718328483898562, "loss": 0.2442, "step": 110580 }, { "epoch": 4.58, "grad_norm": 0.2099609375, "learning_rate": 0.00047182784711149664, "loss": 0.1797, "step": 110590 }, { "epoch": 4.58, "grad_norm": 0.421875, "learning_rate": 0.000471822845415681, "loss": 0.2036, "step": 110600 }, { "epoch": 4.58, "grad_norm": 0.66015625, "learning_rate": 0.0004718178433024188, "loss": 0.1908, "step": 110610 }, { "epoch": 4.58, "grad_norm": 0.31640625, "learning_rate": 0.0004718128407717193, "loss": 0.2801, "step": 110620 }, { "epoch": 4.58, "grad_norm": 0.5546875, "learning_rate": 0.00047180783782359206, "loss": 0.2051, "step": 110630 }, { "epoch": 4.58, "grad_norm": 0.55078125, "learning_rate": 0.00047180283445804634, "loss": 0.1964, "step": 110640 }, { "epoch": 4.58, "grad_norm": 1.5546875, "learning_rate": 0.00047179783067509163, "loss": 0.2239, "step": 110650 }, { "epoch": 4.58, "grad_norm": 0.6484375, "learning_rate": 0.0004717928264747374, "loss": 0.1902, "step": 110660 }, { "epoch": 4.58, "grad_norm": 0.251953125, "learning_rate": 0.00047178782185699304, "loss": 0.191, "step": 110670 }, { "epoch": 4.58, "grad_norm": 0.96875, "learning_rate": 0.000471782816821868, "loss": 0.2228, "step": 110680 }, { "epoch": 4.58, "grad_norm": 0.86328125, "learning_rate": 0.0004717778113693715, "loss": 0.196, "step": 110690 }, { "epoch": 4.59, "grad_norm": 0.283203125, "learning_rate": 0.0004717728054995132, "loss": 0.2765, "step": 110700 }, { "epoch": 4.59, "grad_norm": 0.62109375, "learning_rate": 0.0004717677992123024, "loss": 0.2214, "step": 110710 }, { "epoch": 4.59, "grad_norm": 0.76171875, "learning_rate": 0.0004717627925077486, "loss": 0.241, "step": 110720 }, { "epoch": 4.59, "grad_norm": 0.451171875, "learning_rate": 0.0004717577853858611, "loss": 0.2168, "step": 110730 }, { "epoch": 4.59, "grad_norm": 1.65625, "learning_rate": 0.00047175277784664943, "loss": 0.2259, "step": 110740 }, { "epoch": 4.59, "grad_norm": 0.75, "learning_rate": 0.000471747769890123, "loss": 0.2045, "step": 110750 }, { "epoch": 4.59, "grad_norm": 1.03125, "learning_rate": 0.0004717427615162912, "loss": 0.1971, "step": 110760 }, { "epoch": 4.59, "grad_norm": 0.6640625, "learning_rate": 0.0004717377527251635, "loss": 0.2423, "step": 110770 }, { "epoch": 4.59, "grad_norm": 0.6875, "learning_rate": 0.00047173274351674924, "loss": 0.2139, "step": 110780 }, { "epoch": 4.59, "grad_norm": 0.2490234375, "learning_rate": 0.0004717277338910579, "loss": 0.2675, "step": 110790 }, { "epoch": 4.59, "grad_norm": 0.84375, "learning_rate": 0.00047172272384809897, "loss": 0.2239, "step": 110800 }, { "epoch": 4.59, "grad_norm": 0.921875, "learning_rate": 0.0004717177133878817, "loss": 0.2317, "step": 110810 }, { "epoch": 4.59, "grad_norm": 0.75, "learning_rate": 0.0004717127025104158, "loss": 0.1854, "step": 110820 }, { "epoch": 4.59, "grad_norm": 1.2578125, "learning_rate": 0.00047170769121571046, "loss": 0.2202, "step": 110830 }, { "epoch": 4.59, "grad_norm": 1.078125, "learning_rate": 0.00047170267950377514, "loss": 0.2084, "step": 110840 }, { "epoch": 4.59, "grad_norm": 0.84765625, "learning_rate": 0.0004716976673746194, "loss": 0.2259, "step": 110850 }, { "epoch": 4.59, "grad_norm": 0.265625, "learning_rate": 0.0004716926548282525, "loss": 0.2022, "step": 110860 }, { "epoch": 4.59, "grad_norm": 0.404296875, "learning_rate": 0.00047168764186468406, "loss": 0.2544, "step": 110870 }, { "epoch": 4.59, "grad_norm": 0.369140625, "learning_rate": 0.00047168262848392334, "loss": 0.1731, "step": 110880 }, { "epoch": 4.59, "grad_norm": 0.470703125, "learning_rate": 0.0004716776146859799, "loss": 0.2402, "step": 110890 }, { "epoch": 4.59, "grad_norm": 1.140625, "learning_rate": 0.00047167260047086316, "loss": 0.2097, "step": 110900 }, { "epoch": 4.59, "grad_norm": 0.60546875, "learning_rate": 0.00047166758583858245, "loss": 0.2144, "step": 110910 }, { "epoch": 4.59, "grad_norm": 0.921875, "learning_rate": 0.0004716625707891473, "loss": 0.2086, "step": 110920 }, { "epoch": 4.59, "grad_norm": 0.54296875, "learning_rate": 0.0004716575553225672, "loss": 0.1736, "step": 110930 }, { "epoch": 4.6, "grad_norm": 0.6328125, "learning_rate": 0.00047165253943885137, "loss": 0.2194, "step": 110940 }, { "epoch": 4.6, "grad_norm": 2.21875, "learning_rate": 0.00047164752313800953, "loss": 0.2146, "step": 110950 }, { "epoch": 4.6, "grad_norm": 0.609375, "learning_rate": 0.0004716425064200509, "loss": 0.2162, "step": 110960 }, { "epoch": 4.6, "grad_norm": 0.388671875, "learning_rate": 0.00047163748928498504, "loss": 0.1819, "step": 110970 }, { "epoch": 4.6, "grad_norm": 0.7109375, "learning_rate": 0.0004716324717328214, "loss": 0.236, "step": 110980 }, { "epoch": 4.6, "grad_norm": 0.5234375, "learning_rate": 0.0004716274537635694, "loss": 0.1911, "step": 110990 }, { "epoch": 4.6, "grad_norm": 0.54296875, "learning_rate": 0.0004716224353772384, "loss": 0.2181, "step": 111000 }, { "epoch": 4.6, "grad_norm": 1.703125, "learning_rate": 0.00047161741657383795, "loss": 0.2184, "step": 111010 }, { "epoch": 4.6, "grad_norm": 0.58984375, "learning_rate": 0.0004716123973533774, "loss": 0.194, "step": 111020 }, { "epoch": 4.6, "grad_norm": 0.640625, "learning_rate": 0.00047160737771586626, "loss": 0.2246, "step": 111030 }, { "epoch": 4.6, "grad_norm": 0.9765625, "learning_rate": 0.000471602357661314, "loss": 0.2448, "step": 111040 }, { "epoch": 4.6, "grad_norm": 0.94140625, "learning_rate": 0.00047159733718973005, "loss": 0.2038, "step": 111050 }, { "epoch": 4.6, "grad_norm": 1.078125, "learning_rate": 0.0004715923163011238, "loss": 0.2517, "step": 111060 }, { "epoch": 4.6, "grad_norm": 1.1640625, "learning_rate": 0.0004715872949955048, "loss": 0.2263, "step": 111070 }, { "epoch": 4.6, "grad_norm": 0.62890625, "learning_rate": 0.00047158227327288236, "loss": 0.2035, "step": 111080 }, { "epoch": 4.6, "grad_norm": 0.68359375, "learning_rate": 0.000471577251133266, "loss": 0.1628, "step": 111090 }, { "epoch": 4.6, "grad_norm": 0.80859375, "learning_rate": 0.00047157222857666526, "loss": 0.2466, "step": 111100 }, { "epoch": 4.6, "grad_norm": 0.3359375, "learning_rate": 0.0004715672056030895, "loss": 0.2377, "step": 111110 }, { "epoch": 4.6, "grad_norm": 0.53515625, "learning_rate": 0.0004715621822125482, "loss": 0.2069, "step": 111120 }, { "epoch": 4.6, "grad_norm": 0.462890625, "learning_rate": 0.00047155715840505074, "loss": 0.1996, "step": 111130 }, { "epoch": 4.6, "grad_norm": 0.61328125, "learning_rate": 0.00047155213418060665, "loss": 0.2178, "step": 111140 }, { "epoch": 4.6, "grad_norm": 0.5078125, "learning_rate": 0.0004715471095392254, "loss": 0.2034, "step": 111150 }, { "epoch": 4.6, "grad_norm": 0.2041015625, "learning_rate": 0.0004715420844809164, "loss": 0.2194, "step": 111160 }, { "epoch": 4.6, "grad_norm": 1.8125, "learning_rate": 0.0004715370590056891, "loss": 0.2158, "step": 111170 }, { "epoch": 4.61, "grad_norm": 2.125, "learning_rate": 0.000471532033113553, "loss": 0.2207, "step": 111180 }, { "epoch": 4.61, "grad_norm": 0.52734375, "learning_rate": 0.0004715270068045175, "loss": 0.2248, "step": 111190 }, { "epoch": 4.61, "grad_norm": 0.67578125, "learning_rate": 0.00047152198007859215, "loss": 0.25, "step": 111200 }, { "epoch": 4.61, "grad_norm": 1.2109375, "learning_rate": 0.00047151695293578633, "loss": 0.1763, "step": 111210 }, { "epoch": 4.61, "grad_norm": 0.765625, "learning_rate": 0.00047151192537610954, "loss": 0.2299, "step": 111220 }, { "epoch": 4.61, "grad_norm": 0.921875, "learning_rate": 0.0004715068973995713, "loss": 0.2257, "step": 111230 }, { "epoch": 4.61, "grad_norm": 0.81640625, "learning_rate": 0.00047150186900618085, "loss": 0.2144, "step": 111240 }, { "epoch": 4.61, "grad_norm": 0.921875, "learning_rate": 0.0004714968401959479, "loss": 0.2565, "step": 111250 }, { "epoch": 4.61, "grad_norm": 0.28515625, "learning_rate": 0.0004714918109688818, "loss": 0.189, "step": 111260 }, { "epoch": 4.61, "grad_norm": 1.1015625, "learning_rate": 0.00047148678132499203, "loss": 0.1999, "step": 111270 }, { "epoch": 4.61, "grad_norm": 0.56640625, "learning_rate": 0.000471481751264288, "loss": 0.18, "step": 111280 }, { "epoch": 4.61, "grad_norm": 0.52734375, "learning_rate": 0.0004714767207867793, "loss": 0.1659, "step": 111290 }, { "epoch": 4.61, "grad_norm": 0.5078125, "learning_rate": 0.00047147168989247534, "loss": 0.2416, "step": 111300 }, { "epoch": 4.61, "grad_norm": 0.2373046875, "learning_rate": 0.0004714666585813855, "loss": 0.2517, "step": 111310 }, { "epoch": 4.61, "grad_norm": 0.75, "learning_rate": 0.00047146162685351935, "loss": 0.2459, "step": 111320 }, { "epoch": 4.61, "grad_norm": 0.92578125, "learning_rate": 0.00047145659470888634, "loss": 0.2339, "step": 111330 }, { "epoch": 4.61, "grad_norm": 1.5078125, "learning_rate": 0.00047145156214749593, "loss": 0.1852, "step": 111340 }, { "epoch": 4.61, "grad_norm": 0.2265625, "learning_rate": 0.00047144652916935773, "loss": 0.1569, "step": 111350 }, { "epoch": 4.61, "grad_norm": 0.7109375, "learning_rate": 0.00047144149577448095, "loss": 0.2391, "step": 111360 }, { "epoch": 4.61, "grad_norm": 0.443359375, "learning_rate": 0.00047143646196287517, "loss": 0.208, "step": 111370 }, { "epoch": 4.61, "grad_norm": 0.98046875, "learning_rate": 0.00047143142773454984, "loss": 0.211, "step": 111380 }, { "epoch": 4.61, "grad_norm": 0.72265625, "learning_rate": 0.00047142639308951455, "loss": 0.1953, "step": 111390 }, { "epoch": 4.61, "grad_norm": 0.90625, "learning_rate": 0.00047142135802777873, "loss": 0.1728, "step": 111400 }, { "epoch": 4.61, "grad_norm": 0.72265625, "learning_rate": 0.00047141632254935176, "loss": 0.2452, "step": 111410 }, { "epoch": 4.62, "grad_norm": 0.44140625, "learning_rate": 0.0004714112866542433, "loss": 0.2181, "step": 111420 }, { "epoch": 4.62, "grad_norm": 0.76953125, "learning_rate": 0.00047140625034246253, "loss": 0.1924, "step": 111430 }, { "epoch": 4.62, "grad_norm": 0.93359375, "learning_rate": 0.0004714012136140192, "loss": 0.2094, "step": 111440 }, { "epoch": 4.62, "grad_norm": 0.8984375, "learning_rate": 0.00047139617646892276, "loss": 0.159, "step": 111450 }, { "epoch": 4.62, "grad_norm": 0.42578125, "learning_rate": 0.0004713911389071825, "loss": 0.2012, "step": 111460 }, { "epoch": 4.62, "grad_norm": 0.7890625, "learning_rate": 0.0004713861009288081, "loss": 0.2006, "step": 111470 }, { "epoch": 4.62, "grad_norm": 0.5546875, "learning_rate": 0.00047138106253380896, "loss": 0.2572, "step": 111480 }, { "epoch": 4.62, "grad_norm": 0.64453125, "learning_rate": 0.0004713760237221946, "loss": 0.1927, "step": 111490 }, { "epoch": 4.62, "grad_norm": 1.5546875, "learning_rate": 0.0004713709844939744, "loss": 0.1561, "step": 111500 }, { "epoch": 4.62, "grad_norm": 0.77734375, "learning_rate": 0.0004713659448491579, "loss": 0.2314, "step": 111510 }, { "epoch": 4.62, "grad_norm": 0.66015625, "learning_rate": 0.00047136090478775463, "loss": 0.2222, "step": 111520 }, { "epoch": 4.62, "grad_norm": 0.7578125, "learning_rate": 0.00047135586430977405, "loss": 0.188, "step": 111530 }, { "epoch": 4.62, "grad_norm": 0.828125, "learning_rate": 0.0004713508234152256, "loss": 0.2223, "step": 111540 }, { "epoch": 4.62, "grad_norm": 0.4375, "learning_rate": 0.00047134578210411885, "loss": 0.1907, "step": 111550 }, { "epoch": 4.62, "grad_norm": 0.79296875, "learning_rate": 0.00047134074037646326, "loss": 0.2349, "step": 111560 }, { "epoch": 4.62, "grad_norm": 0.69140625, "learning_rate": 0.0004713356982322683, "loss": 0.2221, "step": 111570 }, { "epoch": 4.62, "grad_norm": 1.0, "learning_rate": 0.0004713306556715434, "loss": 0.2033, "step": 111580 }, { "epoch": 4.62, "grad_norm": 0.765625, "learning_rate": 0.00047132561269429805, "loss": 0.2515, "step": 111590 }, { "epoch": 4.62, "grad_norm": 0.91796875, "learning_rate": 0.00047132056930054194, "loss": 0.1909, "step": 111600 }, { "epoch": 4.62, "grad_norm": 0.640625, "learning_rate": 0.00047131552549028435, "loss": 0.2457, "step": 111610 }, { "epoch": 4.62, "grad_norm": 0.58203125, "learning_rate": 0.0004713104812635349, "loss": 0.2166, "step": 111620 }, { "epoch": 4.62, "grad_norm": 0.90625, "learning_rate": 0.00047130543662030294, "loss": 0.1786, "step": 111630 }, { "epoch": 4.62, "grad_norm": 0.61328125, "learning_rate": 0.00047130039156059814, "loss": 0.2577, "step": 111640 }, { "epoch": 4.62, "grad_norm": 0.33203125, "learning_rate": 0.0004712953460844298, "loss": 0.185, "step": 111650 }, { "epoch": 4.62, "grad_norm": 0.6640625, "learning_rate": 0.0004712903001918076, "loss": 0.2459, "step": 111660 }, { "epoch": 4.63, "grad_norm": 1.015625, "learning_rate": 0.0004712852538827409, "loss": 0.2334, "step": 111670 }, { "epoch": 4.63, "grad_norm": 1.4453125, "learning_rate": 0.00047128020715723925, "loss": 0.1821, "step": 111680 }, { "epoch": 4.63, "grad_norm": 0.380859375, "learning_rate": 0.00047127516001531215, "loss": 0.2158, "step": 111690 }, { "epoch": 4.63, "grad_norm": 0.97265625, "learning_rate": 0.0004712701124569691, "loss": 0.2092, "step": 111700 }, { "epoch": 4.63, "grad_norm": 0.6875, "learning_rate": 0.0004712650644822196, "loss": 0.2528, "step": 111710 }, { "epoch": 4.63, "grad_norm": 0.578125, "learning_rate": 0.0004712600160910732, "loss": 0.242, "step": 111720 }, { "epoch": 4.63, "grad_norm": 0.48828125, "learning_rate": 0.00047125496728353923, "loss": 0.1966, "step": 111730 }, { "epoch": 4.63, "grad_norm": 0.921875, "learning_rate": 0.00047124991805962737, "loss": 0.1776, "step": 111740 }, { "epoch": 4.63, "grad_norm": 0.640625, "learning_rate": 0.000471244868419347, "loss": 0.2356, "step": 111750 }, { "epoch": 4.63, "grad_norm": 0.1416015625, "learning_rate": 0.0004712398183627078, "loss": 0.1873, "step": 111760 }, { "epoch": 4.63, "grad_norm": 0.9921875, "learning_rate": 0.0004712347678897191, "loss": 0.1855, "step": 111770 }, { "epoch": 4.63, "grad_norm": 0.5234375, "learning_rate": 0.0004712297170003904, "loss": 0.2388, "step": 111780 }, { "epoch": 4.63, "grad_norm": 0.7109375, "learning_rate": 0.0004712246656947313, "loss": 0.193, "step": 111790 }, { "epoch": 4.63, "grad_norm": 0.49609375, "learning_rate": 0.00047121961397275126, "loss": 0.2153, "step": 111800 }, { "epoch": 4.63, "grad_norm": 0.859375, "learning_rate": 0.0004712145618344598, "loss": 0.1474, "step": 111810 }, { "epoch": 4.63, "grad_norm": 1.1484375, "learning_rate": 0.00047120950927986643, "loss": 0.2067, "step": 111820 }, { "epoch": 4.63, "grad_norm": 2.3125, "learning_rate": 0.0004712044563089807, "loss": 0.2603, "step": 111830 }, { "epoch": 4.63, "grad_norm": 0.31640625, "learning_rate": 0.00047119940292181196, "loss": 0.2096, "step": 111840 }, { "epoch": 4.63, "grad_norm": 0.50390625, "learning_rate": 0.0004711943491183699, "loss": 0.1749, "step": 111850 }, { "epoch": 4.63, "grad_norm": 1.8359375, "learning_rate": 0.000471189294898664, "loss": 0.1953, "step": 111860 }, { "epoch": 4.63, "grad_norm": 1.390625, "learning_rate": 0.0004711842402627037, "loss": 0.1814, "step": 111870 }, { "epoch": 4.63, "grad_norm": 1.375, "learning_rate": 0.00047117918521049853, "loss": 0.1927, "step": 111880 }, { "epoch": 4.63, "grad_norm": 0.34375, "learning_rate": 0.00047117412974205796, "loss": 0.2689, "step": 111890 }, { "epoch": 4.63, "grad_norm": 0.609375, "learning_rate": 0.0004711690738573917, "loss": 0.2219, "step": 111900 }, { "epoch": 4.64, "grad_norm": 0.5859375, "learning_rate": 0.000471164017556509, "loss": 0.2403, "step": 111910 }, { "epoch": 4.64, "grad_norm": 0.65625, "learning_rate": 0.00047115896083941953, "loss": 0.1827, "step": 111920 }, { "epoch": 4.64, "grad_norm": 0.5390625, "learning_rate": 0.00047115390370613286, "loss": 0.215, "step": 111930 }, { "epoch": 4.64, "grad_norm": 1.53125, "learning_rate": 0.00047114884615665837, "loss": 0.2025, "step": 111940 }, { "epoch": 4.64, "grad_norm": 0.5625, "learning_rate": 0.0004711437881910056, "loss": 0.1813, "step": 111950 }, { "epoch": 4.64, "grad_norm": 1.6171875, "learning_rate": 0.00047113872980918413, "loss": 0.1859, "step": 111960 }, { "epoch": 4.64, "grad_norm": 0.4296875, "learning_rate": 0.0004711336710112035, "loss": 0.2472, "step": 111970 }, { "epoch": 4.64, "grad_norm": 1.0, "learning_rate": 0.0004711286117970731, "loss": 0.2048, "step": 111980 }, { "epoch": 4.64, "grad_norm": 0.458984375, "learning_rate": 0.00047112355216680256, "loss": 0.2224, "step": 111990 }, { "epoch": 4.64, "grad_norm": 0.11767578125, "learning_rate": 0.0004711184921204015, "loss": 0.1497, "step": 112000 }, { "epoch": 4.64, "grad_norm": 0.71484375, "learning_rate": 0.00047111343165787915, "loss": 0.2517, "step": 112010 }, { "epoch": 4.64, "grad_norm": 0.375, "learning_rate": 0.0004711083707792453, "loss": 0.1945, "step": 112020 }, { "epoch": 4.64, "grad_norm": 0.73828125, "learning_rate": 0.0004711033094845093, "loss": 0.2381, "step": 112030 }, { "epoch": 4.64, "grad_norm": 0.59765625, "learning_rate": 0.00047109824777368073, "loss": 0.213, "step": 112040 }, { "epoch": 4.64, "grad_norm": 0.97265625, "learning_rate": 0.0004710931856467692, "loss": 0.2261, "step": 112050 }, { "epoch": 4.64, "grad_norm": 1.8125, "learning_rate": 0.00047108812310378415, "loss": 0.2077, "step": 112060 }, { "epoch": 4.64, "grad_norm": 0.357421875, "learning_rate": 0.00047108306014473513, "loss": 0.1764, "step": 112070 }, { "epoch": 4.64, "grad_norm": 0.984375, "learning_rate": 0.0004710779967696317, "loss": 0.2366, "step": 112080 }, { "epoch": 4.64, "grad_norm": 0.5859375, "learning_rate": 0.0004710729329784833, "loss": 0.1648, "step": 112090 }, { "epoch": 4.64, "grad_norm": 0.6328125, "learning_rate": 0.00047106786877129946, "loss": 0.1952, "step": 112100 }, { "epoch": 4.64, "grad_norm": 1.2578125, "learning_rate": 0.00047106280414808987, "loss": 0.2549, "step": 112110 }, { "epoch": 4.64, "grad_norm": 0.34765625, "learning_rate": 0.00047105773910886394, "loss": 0.1733, "step": 112120 }, { "epoch": 4.64, "grad_norm": 0.63671875, "learning_rate": 0.0004710526736536312, "loss": 0.2503, "step": 112130 }, { "epoch": 4.64, "grad_norm": 0.609375, "learning_rate": 0.00047104760778240117, "loss": 0.2056, "step": 112140 }, { "epoch": 4.65, "grad_norm": 0.46484375, "learning_rate": 0.00047104254149518346, "loss": 0.2183, "step": 112150 }, { "epoch": 4.65, "grad_norm": 0.54296875, "learning_rate": 0.00047103747479198757, "loss": 0.1946, "step": 112160 }, { "epoch": 4.65, "grad_norm": 0.7578125, "learning_rate": 0.00047103240767282293, "loss": 0.2303, "step": 112170 }, { "epoch": 4.65, "grad_norm": 1.0546875, "learning_rate": 0.00047102734013769926, "loss": 0.231, "step": 112180 }, { "epoch": 4.65, "grad_norm": 0.953125, "learning_rate": 0.00047102227218662597, "loss": 0.242, "step": 112190 }, { "epoch": 4.65, "grad_norm": 0.96484375, "learning_rate": 0.00047101720381961267, "loss": 0.2311, "step": 112200 }, { "epoch": 4.65, "grad_norm": 0.0, "learning_rate": 0.00047101213503666884, "loss": 0.1934, "step": 112210 }, { "epoch": 4.65, "grad_norm": 0.453125, "learning_rate": 0.000471007065837804, "loss": 0.1399, "step": 112220 }, { "epoch": 4.65, "grad_norm": 0.271484375, "learning_rate": 0.00047100199622302776, "loss": 0.2219, "step": 112230 }, { "epoch": 4.65, "grad_norm": 1.828125, "learning_rate": 0.00047099692619234965, "loss": 0.2044, "step": 112240 }, { "epoch": 4.65, "grad_norm": 1.859375, "learning_rate": 0.0004709918557457791, "loss": 0.2547, "step": 112250 }, { "epoch": 4.65, "grad_norm": 0.2255859375, "learning_rate": 0.0004709867848833259, "loss": 0.1515, "step": 112260 }, { "epoch": 4.65, "grad_norm": 0.181640625, "learning_rate": 0.0004709817136049993, "loss": 0.221, "step": 112270 }, { "epoch": 4.65, "grad_norm": 0.8125, "learning_rate": 0.000470976641910809, "loss": 0.1873, "step": 112280 }, { "epoch": 4.65, "grad_norm": 1.078125, "learning_rate": 0.00047097156980076456, "loss": 0.2386, "step": 112290 }, { "epoch": 4.65, "grad_norm": 0.34765625, "learning_rate": 0.0004709664972748755, "loss": 0.1824, "step": 112300 }, { "epoch": 4.65, "grad_norm": 0.625, "learning_rate": 0.0004709614243331513, "loss": 0.2313, "step": 112310 }, { "epoch": 4.65, "grad_norm": 0.451171875, "learning_rate": 0.00047095635097560163, "loss": 0.2095, "step": 112320 }, { "epoch": 4.65, "grad_norm": 0.796875, "learning_rate": 0.0004709512772022358, "loss": 0.1904, "step": 112330 }, { "epoch": 4.65, "grad_norm": 0.59375, "learning_rate": 0.00047094620301306374, "loss": 0.2494, "step": 112340 }, { "epoch": 4.65, "grad_norm": 0.7734375, "learning_rate": 0.0004709411284080947, "loss": 0.184, "step": 112350 }, { "epoch": 4.65, "grad_norm": 0.64453125, "learning_rate": 0.00047093605338733837, "loss": 0.2315, "step": 112360 }, { "epoch": 4.65, "grad_norm": 0.5859375, "learning_rate": 0.00047093097795080415, "loss": 0.1988, "step": 112370 }, { "epoch": 4.65, "grad_norm": 0.87890625, "learning_rate": 0.0004709259020985017, "loss": 0.2253, "step": 112380 }, { "epoch": 4.66, "grad_norm": 0.77734375, "learning_rate": 0.0004709208258304406, "loss": 0.2073, "step": 112390 }, { "epoch": 4.66, "grad_norm": 0.62109375, "learning_rate": 0.0004709157491466304, "loss": 0.2892, "step": 112400 }, { "epoch": 4.66, "grad_norm": 0.2333984375, "learning_rate": 0.00047091067204708053, "loss": 0.1679, "step": 112410 }, { "epoch": 4.66, "grad_norm": 0.96875, "learning_rate": 0.0004709055945318007, "loss": 0.2, "step": 112420 }, { "epoch": 4.66, "grad_norm": 0.70703125, "learning_rate": 0.00047090051660080034, "loss": 0.1809, "step": 112430 }, { "epoch": 4.66, "grad_norm": 1.1796875, "learning_rate": 0.0004708954382540891, "loss": 0.1924, "step": 112440 }, { "epoch": 4.66, "grad_norm": 0.765625, "learning_rate": 0.0004708903594916765, "loss": 0.2077, "step": 112450 }, { "epoch": 4.66, "grad_norm": 0.57421875, "learning_rate": 0.00047088528031357215, "loss": 0.2037, "step": 112460 }, { "epoch": 4.66, "grad_norm": 1.2578125, "learning_rate": 0.00047088020071978543, "loss": 0.2103, "step": 112470 }, { "epoch": 4.66, "grad_norm": 0.384765625, "learning_rate": 0.0004708751207103261, "loss": 0.1823, "step": 112480 }, { "epoch": 4.66, "grad_norm": 1.1953125, "learning_rate": 0.00047087004028520364, "loss": 0.1869, "step": 112490 }, { "epoch": 4.66, "grad_norm": 0.9140625, "learning_rate": 0.00047086495944442764, "loss": 0.2712, "step": 112500 }, { "epoch": 4.66, "grad_norm": 0.6875, "learning_rate": 0.00047085987818800757, "loss": 0.1694, "step": 112510 }, { "epoch": 4.66, "grad_norm": 0.9375, "learning_rate": 0.00047085479651595306, "loss": 0.2001, "step": 112520 }, { "epoch": 4.66, "grad_norm": 0.703125, "learning_rate": 0.0004708497144282738, "loss": 0.242, "step": 112530 }, { "epoch": 4.66, "grad_norm": 0.6015625, "learning_rate": 0.00047084463192497913, "loss": 0.1745, "step": 112540 }, { "epoch": 4.66, "grad_norm": 0.5546875, "learning_rate": 0.00047083954900607874, "loss": 0.2353, "step": 112550 }, { "epoch": 4.66, "grad_norm": 1.171875, "learning_rate": 0.00047083446567158213, "loss": 0.1929, "step": 112560 }, { "epoch": 4.66, "grad_norm": 0.77734375, "learning_rate": 0.00047082938192149896, "loss": 0.2199, "step": 112570 }, { "epoch": 4.66, "grad_norm": 0.7421875, "learning_rate": 0.0004708242977558387, "loss": 0.2635, "step": 112580 }, { "epoch": 4.66, "grad_norm": 0.67578125, "learning_rate": 0.00047081921317461097, "loss": 0.2314, "step": 112590 }, { "epoch": 4.66, "grad_norm": 0.8671875, "learning_rate": 0.0004708141281778253, "loss": 0.1647, "step": 112600 }, { "epoch": 4.66, "grad_norm": 0.53515625, "learning_rate": 0.00047080904276549133, "loss": 0.2254, "step": 112610 }, { "epoch": 4.66, "grad_norm": 1.8984375, "learning_rate": 0.0004708039569376185, "loss": 0.2495, "step": 112620 }, { "epoch": 4.67, "grad_norm": 0.51953125, "learning_rate": 0.00047079887069421657, "loss": 0.2092, "step": 112630 }, { "epoch": 4.67, "grad_norm": 0.71484375, "learning_rate": 0.0004707937840352949, "loss": 0.1979, "step": 112640 }, { "epoch": 4.67, "grad_norm": 0.84375, "learning_rate": 0.0004707886969608633, "loss": 0.1916, "step": 112650 }, { "epoch": 4.67, "grad_norm": 1.0546875, "learning_rate": 0.00047078360947093114, "loss": 0.1927, "step": 112660 }, { "epoch": 4.67, "grad_norm": 0.87890625, "learning_rate": 0.0004707785215655081, "loss": 0.2109, "step": 112670 }, { "epoch": 4.67, "grad_norm": 0.890625, "learning_rate": 0.0004707734332446038, "loss": 0.2014, "step": 112680 }, { "epoch": 4.67, "grad_norm": 0.5390625, "learning_rate": 0.0004707683445082276, "loss": 0.2402, "step": 112690 }, { "epoch": 4.67, "grad_norm": 0.80078125, "learning_rate": 0.00047076325535638926, "loss": 0.2507, "step": 112700 }, { "epoch": 4.67, "grad_norm": 0.69140625, "learning_rate": 0.0004707581657890983, "loss": 0.2436, "step": 112710 }, { "epoch": 4.67, "grad_norm": 1.75, "learning_rate": 0.00047075307580636436, "loss": 0.2066, "step": 112720 }, { "epoch": 4.67, "grad_norm": 0.703125, "learning_rate": 0.000470747985408197, "loss": 0.2332, "step": 112730 }, { "epoch": 4.67, "grad_norm": 0.65625, "learning_rate": 0.0004707428945946056, "loss": 0.1986, "step": 112740 }, { "epoch": 4.67, "grad_norm": 0.72265625, "learning_rate": 0.00047073780336560005, "loss": 0.2471, "step": 112750 }, { "epoch": 4.67, "grad_norm": 1.5625, "learning_rate": 0.0004707327117211898, "loss": 0.2757, "step": 112760 }, { "epoch": 4.67, "grad_norm": 0.78515625, "learning_rate": 0.0004707276196613844, "loss": 0.1852, "step": 112770 }, { "epoch": 4.67, "grad_norm": 0.59375, "learning_rate": 0.0004707225271861935, "loss": 0.2102, "step": 112780 }, { "epoch": 4.67, "grad_norm": 0.5703125, "learning_rate": 0.00047071743429562653, "loss": 0.2286, "step": 112790 }, { "epoch": 4.67, "grad_norm": 1.0703125, "learning_rate": 0.0004707123409896932, "loss": 0.1926, "step": 112800 }, { "epoch": 4.67, "grad_norm": 0.8203125, "learning_rate": 0.0004707072472684032, "loss": 0.1979, "step": 112810 }, { "epoch": 4.67, "grad_norm": 0.337890625, "learning_rate": 0.0004707021531317659, "loss": 0.1895, "step": 112820 }, { "epoch": 4.67, "grad_norm": 0.40625, "learning_rate": 0.00047069705857979093, "loss": 0.2358, "step": 112830 }, { "epoch": 4.67, "grad_norm": 0.82421875, "learning_rate": 0.000470691963612488, "loss": 0.197, "step": 112840 }, { "epoch": 4.67, "grad_norm": 0.88671875, "learning_rate": 0.0004706868682298667, "loss": 0.2463, "step": 112850 }, { "epoch": 4.67, "grad_norm": 0.69140625, "learning_rate": 0.0004706817724319364, "loss": 0.2505, "step": 112860 }, { "epoch": 4.68, "grad_norm": 0.81640625, "learning_rate": 0.0004706766762187069, "loss": 0.2264, "step": 112870 }, { "epoch": 4.68, "grad_norm": 0.62890625, "learning_rate": 0.0004706715795901878, "loss": 0.2084, "step": 112880 }, { "epoch": 4.68, "grad_norm": 0.51171875, "learning_rate": 0.00047066648254638854, "loss": 0.2518, "step": 112890 }, { "epoch": 4.68, "grad_norm": 0.41015625, "learning_rate": 0.00047066138508731884, "loss": 0.1396, "step": 112900 }, { "epoch": 4.68, "grad_norm": 0.90625, "learning_rate": 0.00047065628721298816, "loss": 0.2601, "step": 112910 }, { "epoch": 4.68, "grad_norm": 0.404296875, "learning_rate": 0.0004706511889234062, "loss": 0.169, "step": 112920 }, { "epoch": 4.68, "grad_norm": 0.6875, "learning_rate": 0.0004706460902185825, "loss": 0.1772, "step": 112930 }, { "epoch": 4.68, "grad_norm": 0.92578125, "learning_rate": 0.00047064099109852674, "loss": 0.2158, "step": 112940 }, { "epoch": 4.68, "grad_norm": 1.8203125, "learning_rate": 0.0004706358915632485, "loss": 0.1815, "step": 112950 }, { "epoch": 4.68, "grad_norm": 0.765625, "learning_rate": 0.0004706307916127573, "loss": 0.2114, "step": 112960 }, { "epoch": 4.68, "grad_norm": 0.62890625, "learning_rate": 0.0004706256912470628, "loss": 0.2683, "step": 112970 }, { "epoch": 4.68, "grad_norm": 1.3359375, "learning_rate": 0.0004706205904661745, "loss": 0.1864, "step": 112980 }, { "epoch": 4.68, "grad_norm": 1.421875, "learning_rate": 0.0004706154892701021, "loss": 0.2409, "step": 112990 }, { "epoch": 4.68, "grad_norm": 0.80078125, "learning_rate": 0.0004706103876588552, "loss": 0.237, "step": 113000 }, { "epoch": 4.68, "grad_norm": 1.1171875, "learning_rate": 0.00047060528563244345, "loss": 0.2494, "step": 113010 }, { "epoch": 4.68, "grad_norm": 0.8125, "learning_rate": 0.00047060018319087626, "loss": 0.1467, "step": 113020 }, { "epoch": 4.68, "grad_norm": 0.71875, "learning_rate": 0.0004705950803341634, "loss": 0.2251, "step": 113030 }, { "epoch": 4.68, "grad_norm": 0.376953125, "learning_rate": 0.0004705899770623144, "loss": 0.2614, "step": 113040 }, { "epoch": 4.68, "grad_norm": 1.140625, "learning_rate": 0.00047058487337533887, "loss": 0.2373, "step": 113050 }, { "epoch": 4.68, "grad_norm": 0.4609375, "learning_rate": 0.0004705797692732464, "loss": 0.1954, "step": 113060 }, { "epoch": 4.68, "grad_norm": 0.90234375, "learning_rate": 0.00047057466475604673, "loss": 0.2323, "step": 113070 }, { "epoch": 4.68, "grad_norm": 0.2431640625, "learning_rate": 0.00047056955982374926, "loss": 0.2281, "step": 113080 }, { "epoch": 4.68, "grad_norm": 0.7421875, "learning_rate": 0.00047056445447636374, "loss": 0.2493, "step": 113090 }, { "epoch": 4.68, "grad_norm": 0.546875, "learning_rate": 0.0004705593487138998, "loss": 0.2345, "step": 113100 }, { "epoch": 4.69, "grad_norm": 0.50390625, "learning_rate": 0.0004705542425363669, "loss": 0.2203, "step": 113110 }, { "epoch": 4.69, "grad_norm": 1.453125, "learning_rate": 0.00047054913594377475, "loss": 0.2022, "step": 113120 }, { "epoch": 4.69, "grad_norm": 0.85546875, "learning_rate": 0.0004705440289361329, "loss": 0.2295, "step": 113130 }, { "epoch": 4.69, "grad_norm": 0.376953125, "learning_rate": 0.0004705389215134511, "loss": 0.2043, "step": 113140 }, { "epoch": 4.69, "grad_norm": 0.71484375, "learning_rate": 0.0004705338136757388, "loss": 0.2068, "step": 113150 }, { "epoch": 4.69, "grad_norm": 0.6015625, "learning_rate": 0.00047052870542300577, "loss": 0.2779, "step": 113160 }, { "epoch": 4.69, "grad_norm": 0.51171875, "learning_rate": 0.00047052359675526134, "loss": 0.2252, "step": 113170 }, { "epoch": 4.69, "grad_norm": 0.59375, "learning_rate": 0.0004705184876725155, "loss": 0.2107, "step": 113180 }, { "epoch": 4.69, "grad_norm": 0.5390625, "learning_rate": 0.00047051337817477756, "loss": 0.2366, "step": 113190 }, { "epoch": 4.69, "grad_norm": 0.34765625, "learning_rate": 0.00047050826826205733, "loss": 0.1925, "step": 113200 }, { "epoch": 4.69, "grad_norm": 0.2490234375, "learning_rate": 0.00047050315793436436, "loss": 0.1769, "step": 113210 }, { "epoch": 4.69, "grad_norm": 0.83203125, "learning_rate": 0.0004704980471917082, "loss": 0.218, "step": 113220 }, { "epoch": 4.69, "grad_norm": 0.45703125, "learning_rate": 0.0004704929360340986, "loss": 0.1971, "step": 113230 }, { "epoch": 4.69, "grad_norm": 3.234375, "learning_rate": 0.00047048782446154505, "loss": 0.2147, "step": 113240 }, { "epoch": 4.69, "grad_norm": 0.6953125, "learning_rate": 0.0004704827124740572, "loss": 0.2237, "step": 113250 }, { "epoch": 4.69, "grad_norm": 0.353515625, "learning_rate": 0.00047047760007164473, "loss": 0.1795, "step": 113260 }, { "epoch": 4.69, "grad_norm": 1.1796875, "learning_rate": 0.00047047248725431723, "loss": 0.241, "step": 113270 }, { "epoch": 4.69, "grad_norm": 0.9375, "learning_rate": 0.0004704673740220843, "loss": 0.2238, "step": 113280 }, { "epoch": 4.69, "grad_norm": 0.703125, "learning_rate": 0.00047046226037495564, "loss": 0.2072, "step": 113290 }, { "epoch": 4.69, "grad_norm": 0.318359375, "learning_rate": 0.0004704571463129408, "loss": 0.2089, "step": 113300 }, { "epoch": 4.69, "grad_norm": 1.59375, "learning_rate": 0.00047045203183604937, "loss": 0.2248, "step": 113310 }, { "epoch": 4.69, "grad_norm": 0.333984375, "learning_rate": 0.00047044691694429097, "loss": 0.1833, "step": 113320 }, { "epoch": 4.69, "grad_norm": 0.56640625, "learning_rate": 0.0004704418016376754, "loss": 0.198, "step": 113330 }, { "epoch": 4.69, "grad_norm": 0.59375, "learning_rate": 0.00047043668591621214, "loss": 0.2345, "step": 113340 }, { "epoch": 4.69, "grad_norm": 0.3125, "learning_rate": 0.0004704315697799108, "loss": 0.1937, "step": 113350 }, { "epoch": 4.7, "grad_norm": 0.0, "learning_rate": 0.0004704264532287811, "loss": 0.2046, "step": 113360 }, { "epoch": 4.7, "grad_norm": 0.6875, "learning_rate": 0.0004704213362628326, "loss": 0.2045, "step": 113370 }, { "epoch": 4.7, "grad_norm": 0.94140625, "learning_rate": 0.0004704162188820749, "loss": 0.2056, "step": 113380 }, { "epoch": 4.7, "grad_norm": 0.921875, "learning_rate": 0.0004704111010865177, "loss": 0.1544, "step": 113390 }, { "epoch": 4.7, "grad_norm": 0.37890625, "learning_rate": 0.00047040598287617066, "loss": 0.1823, "step": 113400 }, { "epoch": 4.7, "grad_norm": 0.490234375, "learning_rate": 0.0004704008642510433, "loss": 0.1757, "step": 113410 }, { "epoch": 4.7, "grad_norm": 0.52734375, "learning_rate": 0.00047039574521114537, "loss": 0.1845, "step": 113420 }, { "epoch": 4.7, "grad_norm": 0.91015625, "learning_rate": 0.0004703906257564864, "loss": 0.2077, "step": 113430 }, { "epoch": 4.7, "grad_norm": 0.390625, "learning_rate": 0.0004703855058870761, "loss": 0.1519, "step": 113440 }, { "epoch": 4.7, "grad_norm": 0.2734375, "learning_rate": 0.00047038038560292405, "loss": 0.1883, "step": 113450 }, { "epoch": 4.7, "grad_norm": 0.70703125, "learning_rate": 0.00047037526490403993, "loss": 0.2, "step": 113460 }, { "epoch": 4.7, "grad_norm": 0.5625, "learning_rate": 0.00047037014379043335, "loss": 0.2291, "step": 113470 }, { "epoch": 4.7, "grad_norm": 0.41015625, "learning_rate": 0.00047036502226211396, "loss": 0.2189, "step": 113480 }, { "epoch": 4.7, "grad_norm": 0.515625, "learning_rate": 0.0004703599003190914, "loss": 0.2217, "step": 113490 }, { "epoch": 4.7, "grad_norm": 0.91015625, "learning_rate": 0.0004703547779613753, "loss": 0.2021, "step": 113500 }, { "epoch": 4.7, "grad_norm": 0.35546875, "learning_rate": 0.00047034965518897535, "loss": 0.1875, "step": 113510 }, { "epoch": 4.7, "grad_norm": 0.40625, "learning_rate": 0.00047034453200190106, "loss": 0.227, "step": 113520 }, { "epoch": 4.7, "grad_norm": 0.80859375, "learning_rate": 0.0004703394084001622, "loss": 0.2387, "step": 113530 }, { "epoch": 4.7, "grad_norm": 0.96484375, "learning_rate": 0.0004703342843837684, "loss": 0.2203, "step": 113540 }, { "epoch": 4.7, "grad_norm": 1.15625, "learning_rate": 0.0004703291599527293, "loss": 0.1803, "step": 113550 }, { "epoch": 4.7, "grad_norm": 1.0234375, "learning_rate": 0.0004703240351070544, "loss": 0.1642, "step": 113560 }, { "epoch": 4.7, "grad_norm": 0.5859375, "learning_rate": 0.0004703189098467535, "loss": 0.1696, "step": 113570 }, { "epoch": 4.7, "grad_norm": 1.203125, "learning_rate": 0.0004703137841718362, "loss": 0.2215, "step": 113580 }, { "epoch": 4.7, "grad_norm": 0.78125, "learning_rate": 0.00047030865808231223, "loss": 0.2555, "step": 113590 }, { "epoch": 4.71, "grad_norm": 1.5859375, "learning_rate": 0.00047030353157819107, "loss": 0.2781, "step": 113600 }, { "epoch": 4.71, "grad_norm": 0.66015625, "learning_rate": 0.0004702984046594825, "loss": 0.2434, "step": 113610 }, { "epoch": 4.71, "grad_norm": 0.44921875, "learning_rate": 0.00047029327732619607, "loss": 0.1755, "step": 113620 }, { "epoch": 4.71, "grad_norm": 0.310546875, "learning_rate": 0.00047028814957834154, "loss": 0.1881, "step": 113630 }, { "epoch": 4.71, "grad_norm": 0.56640625, "learning_rate": 0.0004702830214159285, "loss": 0.2047, "step": 113640 }, { "epoch": 4.71, "grad_norm": 0.388671875, "learning_rate": 0.0004702778928389666, "loss": 0.2168, "step": 113650 }, { "epoch": 4.71, "grad_norm": 0.486328125, "learning_rate": 0.0004702727638474654, "loss": 0.2041, "step": 113660 }, { "epoch": 4.71, "grad_norm": 0.6328125, "learning_rate": 0.00047026763444143473, "loss": 0.2, "step": 113670 }, { "epoch": 4.71, "grad_norm": 1.1640625, "learning_rate": 0.00047026250462088413, "loss": 0.2583, "step": 113680 }, { "epoch": 4.71, "grad_norm": 0.412109375, "learning_rate": 0.00047025737438582336, "loss": 0.1978, "step": 113690 }, { "epoch": 4.71, "grad_norm": 0.6484375, "learning_rate": 0.0004702522437362619, "loss": 0.2277, "step": 113700 }, { "epoch": 4.71, "grad_norm": 0.62890625, "learning_rate": 0.00047024711267220953, "loss": 0.1936, "step": 113710 }, { "epoch": 4.71, "grad_norm": 0.5546875, "learning_rate": 0.0004702419811936759, "loss": 0.288, "step": 113720 }, { "epoch": 4.71, "grad_norm": 0.57421875, "learning_rate": 0.00047023684930067066, "loss": 0.1741, "step": 113730 }, { "epoch": 4.71, "grad_norm": 0.6328125, "learning_rate": 0.00047023171699320346, "loss": 0.1738, "step": 113740 }, { "epoch": 4.71, "grad_norm": 0.2099609375, "learning_rate": 0.0004702265842712839, "loss": 0.1381, "step": 113750 }, { "epoch": 4.71, "grad_norm": 0.333984375, "learning_rate": 0.00047022145113492173, "loss": 0.2252, "step": 113760 }, { "epoch": 4.71, "grad_norm": 0.73828125, "learning_rate": 0.0004702163175841265, "loss": 0.198, "step": 113770 }, { "epoch": 4.71, "grad_norm": 0.83203125, "learning_rate": 0.000470211183618908, "loss": 0.2186, "step": 113780 }, { "epoch": 4.71, "grad_norm": 1.3359375, "learning_rate": 0.00047020604923927583, "loss": 0.2053, "step": 113790 }, { "epoch": 4.71, "grad_norm": 1.03125, "learning_rate": 0.0004702009144452397, "loss": 0.2491, "step": 113800 }, { "epoch": 4.71, "grad_norm": 0.59765625, "learning_rate": 0.0004701957792368091, "loss": 0.2092, "step": 113810 }, { "epoch": 4.71, "grad_norm": 0.60546875, "learning_rate": 0.00047019064361399396, "loss": 0.2609, "step": 113820 }, { "epoch": 4.71, "grad_norm": 0.98046875, "learning_rate": 0.00047018550757680375, "loss": 0.1833, "step": 113830 }, { "epoch": 4.72, "grad_norm": 0.578125, "learning_rate": 0.00047018037112524816, "loss": 0.2394, "step": 113840 }, { "epoch": 4.72, "grad_norm": 0.431640625, "learning_rate": 0.00047017523425933695, "loss": 0.2077, "step": 113850 }, { "epoch": 4.72, "grad_norm": 0.5703125, "learning_rate": 0.00047017009697907967, "loss": 0.1968, "step": 113860 }, { "epoch": 4.72, "grad_norm": 0.59765625, "learning_rate": 0.0004701649592844861, "loss": 0.1914, "step": 113870 }, { "epoch": 4.72, "grad_norm": 0.439453125, "learning_rate": 0.00047015982117556575, "loss": 0.1844, "step": 113880 }, { "epoch": 4.72, "grad_norm": 1.3125, "learning_rate": 0.0004701546826523285, "loss": 0.2172, "step": 113890 }, { "epoch": 4.72, "grad_norm": 0.8359375, "learning_rate": 0.0004701495437147839, "loss": 0.2176, "step": 113900 }, { "epoch": 4.72, "grad_norm": 0.345703125, "learning_rate": 0.0004701444043629416, "loss": 0.1878, "step": 113910 }, { "epoch": 4.72, "grad_norm": 1.0546875, "learning_rate": 0.0004701392645968113, "loss": 0.2098, "step": 113920 }, { "epoch": 4.72, "grad_norm": 0.7265625, "learning_rate": 0.00047013412441640267, "loss": 0.1959, "step": 113930 }, { "epoch": 4.72, "grad_norm": 1.0546875, "learning_rate": 0.0004701289838217254, "loss": 0.2141, "step": 113940 }, { "epoch": 4.72, "grad_norm": 0.71875, "learning_rate": 0.0004701238428127892, "loss": 0.1864, "step": 113950 }, { "epoch": 4.72, "grad_norm": 0.53515625, "learning_rate": 0.0004701187013896037, "loss": 0.1879, "step": 113960 }, { "epoch": 4.72, "grad_norm": 0.3671875, "learning_rate": 0.0004701135595521785, "loss": 0.1545, "step": 113970 }, { "epoch": 4.72, "grad_norm": 0.9921875, "learning_rate": 0.0004701084173005234, "loss": 0.2791, "step": 113980 }, { "epoch": 4.72, "grad_norm": 0.71484375, "learning_rate": 0.000470103274634648, "loss": 0.2172, "step": 113990 }, { "epoch": 4.72, "grad_norm": 0.71484375, "learning_rate": 0.00047009813155456207, "loss": 0.1984, "step": 114000 }, { "epoch": 4.72, "grad_norm": 0.58203125, "learning_rate": 0.0004700929880602751, "loss": 0.1827, "step": 114010 }, { "epoch": 4.72, "grad_norm": 0.6875, "learning_rate": 0.0004700878441517971, "loss": 0.217, "step": 114020 }, { "epoch": 4.72, "grad_norm": 0.86328125, "learning_rate": 0.0004700826998291373, "loss": 0.1885, "step": 114030 }, { "epoch": 4.72, "grad_norm": 0.625, "learning_rate": 0.0004700775550923058, "loss": 0.1611, "step": 114040 }, { "epoch": 4.72, "grad_norm": 0.2294921875, "learning_rate": 0.00047007240994131205, "loss": 0.2048, "step": 114050 }, { "epoch": 4.72, "grad_norm": 0.60546875, "learning_rate": 0.00047006726437616577, "loss": 0.2616, "step": 114060 }, { "epoch": 4.72, "grad_norm": 0.466796875, "learning_rate": 0.00047006211839687676, "loss": 0.2202, "step": 114070 }, { "epoch": 4.73, "grad_norm": 0.671875, "learning_rate": 0.0004700569720034545, "loss": 0.1675, "step": 114080 }, { "epoch": 4.73, "grad_norm": 0.330078125, "learning_rate": 0.0004700518251959088, "loss": 0.2042, "step": 114090 }, { "epoch": 4.73, "grad_norm": 1.0390625, "learning_rate": 0.0004700466779742494, "loss": 0.2011, "step": 114100 }, { "epoch": 4.73, "grad_norm": 0.6640625, "learning_rate": 0.00047004153033848584, "loss": 0.1648, "step": 114110 }, { "epoch": 4.73, "grad_norm": 1.2109375, "learning_rate": 0.0004700363822886279, "loss": 0.1881, "step": 114120 }, { "epoch": 4.73, "grad_norm": 2.671875, "learning_rate": 0.0004700312338246852, "loss": 0.2267, "step": 114130 }, { "epoch": 4.73, "grad_norm": 0.466796875, "learning_rate": 0.00047002608494666755, "loss": 0.2426, "step": 114140 }, { "epoch": 4.73, "grad_norm": 0.640625, "learning_rate": 0.0004700209356545846, "loss": 0.2339, "step": 114150 }, { "epoch": 4.73, "grad_norm": 0.41796875, "learning_rate": 0.0004700157859484459, "loss": 0.197, "step": 114160 }, { "epoch": 4.73, "grad_norm": 0.56640625, "learning_rate": 0.00047001063582826133, "loss": 0.2303, "step": 114170 }, { "epoch": 4.73, "grad_norm": 0.259765625, "learning_rate": 0.00047000548529404053, "loss": 0.2006, "step": 114180 }, { "epoch": 4.73, "grad_norm": 1.0625, "learning_rate": 0.000470000334345793, "loss": 0.2575, "step": 114190 }, { "epoch": 4.73, "grad_norm": 1.390625, "learning_rate": 0.0004699951829835288, "loss": 0.2292, "step": 114200 }, { "epoch": 4.73, "grad_norm": 0.7734375, "learning_rate": 0.00046999003120725736, "loss": 0.2396, "step": 114210 }, { "epoch": 4.73, "grad_norm": 1.171875, "learning_rate": 0.0004699848790169884, "loss": 0.2143, "step": 114220 }, { "epoch": 4.73, "grad_norm": 0.41796875, "learning_rate": 0.0004699797264127317, "loss": 0.2293, "step": 114230 }, { "epoch": 4.73, "grad_norm": 0.96484375, "learning_rate": 0.0004699745733944968, "loss": 0.234, "step": 114240 }, { "epoch": 4.73, "grad_norm": 1.53125, "learning_rate": 0.00046996941996229366, "loss": 0.2432, "step": 114250 }, { "epoch": 4.73, "grad_norm": 0.6484375, "learning_rate": 0.00046996426611613175, "loss": 0.2016, "step": 114260 }, { "epoch": 4.73, "grad_norm": 0.359375, "learning_rate": 0.0004699591118560208, "loss": 0.1594, "step": 114270 }, { "epoch": 4.73, "grad_norm": 0.5234375, "learning_rate": 0.0004699539571819706, "loss": 0.2395, "step": 114280 }, { "epoch": 4.73, "grad_norm": 1.203125, "learning_rate": 0.0004699488020939908, "loss": 0.1551, "step": 114290 }, { "epoch": 4.73, "grad_norm": 1.046875, "learning_rate": 0.0004699436465920912, "loss": 0.2611, "step": 114300 }, { "epoch": 4.73, "grad_norm": 0.68359375, "learning_rate": 0.00046993849067628133, "loss": 0.2615, "step": 114310 }, { "epoch": 4.74, "grad_norm": 0.470703125, "learning_rate": 0.00046993333434657095, "loss": 0.1794, "step": 114320 }, { "epoch": 4.74, "grad_norm": 0.18359375, "learning_rate": 0.0004699281776029698, "loss": 0.1922, "step": 114330 }, { "epoch": 4.74, "grad_norm": 0.77734375, "learning_rate": 0.0004699230204454876, "loss": 0.2075, "step": 114340 }, { "epoch": 4.74, "grad_norm": 0.54296875, "learning_rate": 0.00046991786287413394, "loss": 0.1887, "step": 114350 }, { "epoch": 4.74, "grad_norm": 1.8515625, "learning_rate": 0.0004699127048889187, "loss": 0.2443, "step": 114360 }, { "epoch": 4.74, "grad_norm": 0.671875, "learning_rate": 0.00046990754648985146, "loss": 0.2185, "step": 114370 }, { "epoch": 4.74, "grad_norm": 0.3515625, "learning_rate": 0.00046990238767694205, "loss": 0.2061, "step": 114380 }, { "epoch": 4.74, "grad_norm": 1.59375, "learning_rate": 0.0004698972284502, "loss": 0.2163, "step": 114390 }, { "epoch": 4.74, "grad_norm": 0.9453125, "learning_rate": 0.0004698920688096351, "loss": 0.1291, "step": 114400 }, { "epoch": 4.74, "grad_norm": 1.0, "learning_rate": 0.0004698869087552571, "loss": 0.222, "step": 114410 }, { "epoch": 4.74, "grad_norm": 0.72265625, "learning_rate": 0.0004698817482870757, "loss": 0.2046, "step": 114420 }, { "epoch": 4.74, "grad_norm": 1.09375, "learning_rate": 0.00046987658740510057, "loss": 0.2007, "step": 114430 }, { "epoch": 4.74, "grad_norm": 0.37109375, "learning_rate": 0.0004698714261093415, "loss": 0.2678, "step": 114440 }, { "epoch": 4.74, "grad_norm": 1.0859375, "learning_rate": 0.0004698662643998081, "loss": 0.2102, "step": 114450 }, { "epoch": 4.74, "grad_norm": 1.1640625, "learning_rate": 0.00046986110227651014, "loss": 0.1866, "step": 114460 }, { "epoch": 4.74, "grad_norm": 1.0, "learning_rate": 0.00046985593973945735, "loss": 0.1831, "step": 114470 }, { "epoch": 4.74, "grad_norm": 0.75, "learning_rate": 0.0004698507767886594, "loss": 0.2123, "step": 114480 }, { "epoch": 4.74, "grad_norm": 0.5859375, "learning_rate": 0.00046984561342412604, "loss": 0.19, "step": 114490 }, { "epoch": 4.74, "grad_norm": 0.64453125, "learning_rate": 0.000469840449645867, "loss": 0.187, "step": 114500 }, { "epoch": 4.74, "grad_norm": 0.3984375, "learning_rate": 0.0004698352854538919, "loss": 0.2209, "step": 114510 }, { "epoch": 4.74, "grad_norm": 1.203125, "learning_rate": 0.00046983012084821064, "loss": 0.1937, "step": 114520 }, { "epoch": 4.74, "grad_norm": 0.3984375, "learning_rate": 0.0004698249558288328, "loss": 0.2259, "step": 114530 }, { "epoch": 4.74, "grad_norm": 1.1328125, "learning_rate": 0.00046981979039576805, "loss": 0.1887, "step": 114540 }, { "epoch": 4.74, "grad_norm": 0.6171875, "learning_rate": 0.00046981462454902625, "loss": 0.1885, "step": 114550 }, { "epoch": 4.75, "grad_norm": 0.68359375, "learning_rate": 0.0004698094582886171, "loss": 0.2073, "step": 114560 }, { "epoch": 4.75, "grad_norm": 0.5859375, "learning_rate": 0.0004698042916145502, "loss": 0.2232, "step": 114570 }, { "epoch": 4.75, "grad_norm": 0.498046875, "learning_rate": 0.00046979912452683537, "loss": 0.2193, "step": 114580 }, { "epoch": 4.75, "grad_norm": 1.453125, "learning_rate": 0.00046979395702548244, "loss": 0.224, "step": 114590 }, { "epoch": 4.75, "grad_norm": 0.79296875, "learning_rate": 0.0004697887891105009, "loss": 0.2045, "step": 114600 }, { "epoch": 4.75, "grad_norm": 1.015625, "learning_rate": 0.00046978362078190064, "loss": 0.177, "step": 114610 }, { "epoch": 4.75, "grad_norm": 0.59765625, "learning_rate": 0.0004697784520396914, "loss": 0.2473, "step": 114620 }, { "epoch": 4.75, "grad_norm": 0.42578125, "learning_rate": 0.00046977328288388276, "loss": 0.1825, "step": 114630 }, { "epoch": 4.75, "grad_norm": 0.49609375, "learning_rate": 0.00046976811331448455, "loss": 0.2774, "step": 114640 }, { "epoch": 4.75, "grad_norm": 0.74609375, "learning_rate": 0.0004697629433315065, "loss": 0.2189, "step": 114650 }, { "epoch": 4.75, "grad_norm": 0.96484375, "learning_rate": 0.0004697577729349583, "loss": 0.2008, "step": 114660 }, { "epoch": 4.75, "grad_norm": 0.70703125, "learning_rate": 0.0004697526021248497, "loss": 0.1857, "step": 114670 }, { "epoch": 4.75, "grad_norm": 0.7890625, "learning_rate": 0.0004697474309011905, "loss": 0.2087, "step": 114680 }, { "epoch": 4.75, "grad_norm": 0.765625, "learning_rate": 0.00046974225926399026, "loss": 0.2112, "step": 114690 }, { "epoch": 4.75, "grad_norm": 0.5625, "learning_rate": 0.00046973708721325893, "loss": 0.2237, "step": 114700 }, { "epoch": 4.75, "grad_norm": 0.57421875, "learning_rate": 0.00046973191474900607, "loss": 0.1584, "step": 114710 }, { "epoch": 4.75, "grad_norm": 1.375, "learning_rate": 0.0004697267418712415, "loss": 0.2884, "step": 114720 }, { "epoch": 4.75, "grad_norm": 0.78515625, "learning_rate": 0.0004697215685799749, "loss": 0.2236, "step": 114730 }, { "epoch": 4.75, "grad_norm": 1.6875, "learning_rate": 0.000469716394875216, "loss": 0.2398, "step": 114740 }, { "epoch": 4.75, "grad_norm": 0.73046875, "learning_rate": 0.0004697112207569747, "loss": 0.1867, "step": 114750 }, { "epoch": 4.75, "grad_norm": 0.421875, "learning_rate": 0.00046970604622526045, "loss": 0.2372, "step": 114760 }, { "epoch": 4.75, "grad_norm": 1.171875, "learning_rate": 0.00046970087128008327, "loss": 0.2246, "step": 114770 }, { "epoch": 4.75, "grad_norm": 2.609375, "learning_rate": 0.00046969569592145274, "loss": 0.2561, "step": 114780 }, { "epoch": 4.75, "grad_norm": 1.2734375, "learning_rate": 0.00046969052014937863, "loss": 0.2061, "step": 114790 }, { "epoch": 4.76, "grad_norm": 0.302734375, "learning_rate": 0.00046968534396387064, "loss": 0.1936, "step": 114800 }, { "epoch": 4.76, "grad_norm": 0.65234375, "learning_rate": 0.0004696801673649387, "loss": 0.2023, "step": 114810 }, { "epoch": 4.76, "grad_norm": 0.5546875, "learning_rate": 0.00046967499035259225, "loss": 0.2266, "step": 114820 }, { "epoch": 4.76, "grad_norm": 0.263671875, "learning_rate": 0.0004696698129268413, "loss": 0.1984, "step": 114830 }, { "epoch": 4.76, "grad_norm": 0.95703125, "learning_rate": 0.00046966463508769544, "loss": 0.2341, "step": 114840 }, { "epoch": 4.76, "grad_norm": 0.953125, "learning_rate": 0.0004696594568351644, "loss": 0.1965, "step": 114850 }, { "epoch": 4.76, "grad_norm": 1.2890625, "learning_rate": 0.00046965427816925804, "loss": 0.1745, "step": 114860 }, { "epoch": 4.76, "grad_norm": 0.7578125, "learning_rate": 0.0004696490990899861, "loss": 0.198, "step": 114870 }, { "epoch": 4.76, "grad_norm": 0.52734375, "learning_rate": 0.00046964391959735817, "loss": 0.1929, "step": 114880 }, { "epoch": 4.76, "grad_norm": 1.234375, "learning_rate": 0.00046963873969138413, "loss": 0.1902, "step": 114890 }, { "epoch": 4.76, "grad_norm": 1.4375, "learning_rate": 0.00046963355937207373, "loss": 0.2282, "step": 114900 }, { "epoch": 4.76, "grad_norm": 0.6640625, "learning_rate": 0.00046962837863943674, "loss": 0.155, "step": 114910 }, { "epoch": 4.76, "grad_norm": 0.7265625, "learning_rate": 0.0004696231974934828, "loss": 0.1862, "step": 114920 }, { "epoch": 4.76, "grad_norm": 0.58984375, "learning_rate": 0.0004696180159342217, "loss": 0.2122, "step": 114930 }, { "epoch": 4.76, "grad_norm": 0.83984375, "learning_rate": 0.0004696128339616632, "loss": 0.1779, "step": 114940 }, { "epoch": 4.76, "grad_norm": 1.1015625, "learning_rate": 0.00046960765157581715, "loss": 0.2215, "step": 114950 }, { "epoch": 4.76, "grad_norm": 0.89453125, "learning_rate": 0.00046960246877669314, "loss": 0.1492, "step": 114960 }, { "epoch": 4.76, "grad_norm": 0.65625, "learning_rate": 0.00046959728556430103, "loss": 0.1968, "step": 114970 }, { "epoch": 4.76, "grad_norm": 0.69140625, "learning_rate": 0.0004695921019386505, "loss": 0.2087, "step": 114980 }, { "epoch": 4.76, "grad_norm": 0.703125, "learning_rate": 0.00046958691789975146, "loss": 0.1977, "step": 114990 }, { "epoch": 4.76, "grad_norm": 0.96484375, "learning_rate": 0.00046958173344761346, "loss": 0.1996, "step": 115000 }, { "epoch": 4.76, "grad_norm": 1.6328125, "learning_rate": 0.00046957654858224634, "loss": 0.2285, "step": 115010 }, { "epoch": 4.76, "grad_norm": 0.34765625, "learning_rate": 0.0004695713633036599, "loss": 0.2158, "step": 115020 }, { "epoch": 4.76, "grad_norm": 0.5546875, "learning_rate": 0.0004695661776118639, "loss": 0.1649, "step": 115030 }, { "epoch": 4.76, "grad_norm": 1.4765625, "learning_rate": 0.000469560991506868, "loss": 0.228, "step": 115040 }, { "epoch": 4.77, "grad_norm": 1.2421875, "learning_rate": 0.0004695558049886821, "loss": 0.229, "step": 115050 }, { "epoch": 4.77, "grad_norm": 1.1328125, "learning_rate": 0.0004695506180573158, "loss": 0.187, "step": 115060 }, { "epoch": 4.77, "grad_norm": 0.71484375, "learning_rate": 0.000469545430712779, "loss": 0.2496, "step": 115070 }, { "epoch": 4.77, "grad_norm": 1.09375, "learning_rate": 0.00046954024295508135, "loss": 0.2229, "step": 115080 }, { "epoch": 4.77, "grad_norm": 0.6796875, "learning_rate": 0.00046953505478423274, "loss": 0.1689, "step": 115090 }, { "epoch": 4.77, "grad_norm": 0.65234375, "learning_rate": 0.0004695298662002429, "loss": 0.2516, "step": 115100 }, { "epoch": 4.77, "grad_norm": 0.62109375, "learning_rate": 0.0004695246772031214, "loss": 0.2247, "step": 115110 }, { "epoch": 4.77, "grad_norm": 0.64453125, "learning_rate": 0.00046951948779287825, "loss": 0.1792, "step": 115120 }, { "epoch": 4.77, "grad_norm": 0.306640625, "learning_rate": 0.00046951429796952316, "loss": 0.203, "step": 115130 }, { "epoch": 4.77, "grad_norm": 0.48828125, "learning_rate": 0.00046950910773306586, "loss": 0.2406, "step": 115140 }, { "epoch": 4.77, "grad_norm": 0.162109375, "learning_rate": 0.00046950391708351614, "loss": 0.2776, "step": 115150 }, { "epoch": 4.77, "grad_norm": 1.7734375, "learning_rate": 0.00046949872602088365, "loss": 0.2641, "step": 115160 }, { "epoch": 4.77, "grad_norm": 0.3984375, "learning_rate": 0.00046949353454517833, "loss": 0.1943, "step": 115170 }, { "epoch": 4.77, "grad_norm": 0.765625, "learning_rate": 0.0004694883426564099, "loss": 0.2224, "step": 115180 }, { "epoch": 4.77, "grad_norm": 0.46484375, "learning_rate": 0.00046948315035458813, "loss": 0.2548, "step": 115190 }, { "epoch": 4.77, "grad_norm": 0.435546875, "learning_rate": 0.0004694779576397227, "loss": 0.231, "step": 115200 }, { "epoch": 4.77, "grad_norm": 0.81640625, "learning_rate": 0.00046947276451182355, "loss": 0.255, "step": 115210 }, { "epoch": 4.77, "grad_norm": 0.5390625, "learning_rate": 0.0004694675709709003, "loss": 0.2264, "step": 115220 }, { "epoch": 4.77, "grad_norm": 0.248046875, "learning_rate": 0.00046946237701696276, "loss": 0.1444, "step": 115230 }, { "epoch": 4.77, "grad_norm": 0.70703125, "learning_rate": 0.00046945718265002076, "loss": 0.2224, "step": 115240 }, { "epoch": 4.77, "grad_norm": 0.59765625, "learning_rate": 0.00046945198787008404, "loss": 0.1625, "step": 115250 }, { "epoch": 4.77, "grad_norm": 0.625, "learning_rate": 0.00046944679267716234, "loss": 0.224, "step": 115260 }, { "epoch": 4.77, "grad_norm": 2.078125, "learning_rate": 0.00046944159707126555, "loss": 0.1537, "step": 115270 }, { "epoch": 4.77, "grad_norm": 0.45703125, "learning_rate": 0.00046943640105240325, "loss": 0.2133, "step": 115280 }, { "epoch": 4.78, "grad_norm": 0.88671875, "learning_rate": 0.0004694312046205855, "loss": 0.2442, "step": 115290 }, { "epoch": 4.78, "grad_norm": 0.37890625, "learning_rate": 0.00046942600777582176, "loss": 0.2682, "step": 115300 }, { "epoch": 4.78, "grad_norm": 0.53125, "learning_rate": 0.0004694208105181221, "loss": 0.2113, "step": 115310 }, { "epoch": 4.78, "grad_norm": 0.703125, "learning_rate": 0.0004694156128474961, "loss": 0.1845, "step": 115320 }, { "epoch": 4.78, "grad_norm": 0.33203125, "learning_rate": 0.0004694104147639536, "loss": 0.2009, "step": 115330 }, { "epoch": 4.78, "grad_norm": 0.68359375, "learning_rate": 0.00046940521626750444, "loss": 0.1794, "step": 115340 }, { "epoch": 4.78, "grad_norm": 0.0, "learning_rate": 0.00046940001735815834, "loss": 0.1429, "step": 115350 }, { "epoch": 4.78, "grad_norm": 0.67578125, "learning_rate": 0.0004693948180359251, "loss": 0.155, "step": 115360 }, { "epoch": 4.78, "grad_norm": 0.55078125, "learning_rate": 0.0004693896183008145, "loss": 0.1807, "step": 115370 }, { "epoch": 4.78, "grad_norm": 0.51171875, "learning_rate": 0.00046938441815283635, "loss": 0.2319, "step": 115380 }, { "epoch": 4.78, "grad_norm": 0.5390625, "learning_rate": 0.00046937921759200043, "loss": 0.2317, "step": 115390 }, { "epoch": 4.78, "grad_norm": 0.6171875, "learning_rate": 0.0004693740166183165, "loss": 0.1932, "step": 115400 }, { "epoch": 4.78, "grad_norm": 1.4453125, "learning_rate": 0.0004693688152317943, "loss": 0.1443, "step": 115410 }, { "epoch": 4.78, "grad_norm": 0.7578125, "learning_rate": 0.0004693636134324437, "loss": 0.2436, "step": 115420 }, { "epoch": 4.78, "grad_norm": 0.61328125, "learning_rate": 0.0004693584112202745, "loss": 0.262, "step": 115430 }, { "epoch": 4.78, "grad_norm": 0.73046875, "learning_rate": 0.0004693532085952965, "loss": 0.2206, "step": 115440 }, { "epoch": 4.78, "grad_norm": 0.5625, "learning_rate": 0.00046934800555751936, "loss": 0.2043, "step": 115450 }, { "epoch": 4.78, "grad_norm": 0.703125, "learning_rate": 0.000469342802106953, "loss": 0.1839, "step": 115460 }, { "epoch": 4.78, "grad_norm": 1.53125, "learning_rate": 0.0004693375982436072, "loss": 0.1163, "step": 115470 }, { "epoch": 4.78, "grad_norm": 0.9921875, "learning_rate": 0.0004693323939674917, "loss": 0.2352, "step": 115480 }, { "epoch": 4.78, "grad_norm": 0.41015625, "learning_rate": 0.0004693271892786163, "loss": 0.1939, "step": 115490 }, { "epoch": 4.78, "grad_norm": 0.8125, "learning_rate": 0.00046932198417699085, "loss": 0.1986, "step": 115500 }, { "epoch": 4.78, "grad_norm": 1.4765625, "learning_rate": 0.0004693167786626251, "loss": 0.2708, "step": 115510 }, { "epoch": 4.78, "grad_norm": 0.65234375, "learning_rate": 0.00046931157273552885, "loss": 0.2458, "step": 115520 }, { "epoch": 4.79, "grad_norm": 1.1171875, "learning_rate": 0.0004693063663957119, "loss": 0.1943, "step": 115530 }, { "epoch": 4.79, "grad_norm": 1.0703125, "learning_rate": 0.0004693011596431841, "loss": 0.1812, "step": 115540 }, { "epoch": 4.79, "grad_norm": 0.98046875, "learning_rate": 0.00046929595247795507, "loss": 0.1864, "step": 115550 }, { "epoch": 4.79, "grad_norm": 1.09375, "learning_rate": 0.00046929074490003486, "loss": 0.2393, "step": 115560 }, { "epoch": 4.79, "grad_norm": 0.8515625, "learning_rate": 0.00046928553690943315, "loss": 0.2235, "step": 115570 }, { "epoch": 4.79, "grad_norm": 0.80078125, "learning_rate": 0.0004692803285061597, "loss": 0.2188, "step": 115580 }, { "epoch": 4.79, "grad_norm": 0.6015625, "learning_rate": 0.00046927511969022443, "loss": 0.1561, "step": 115590 }, { "epoch": 4.79, "grad_norm": 0.62890625, "learning_rate": 0.00046926991046163693, "loss": 0.2063, "step": 115600 }, { "epoch": 4.79, "grad_norm": 1.25, "learning_rate": 0.00046926470082040724, "loss": 0.2052, "step": 115610 }, { "epoch": 4.79, "grad_norm": 0.0, "learning_rate": 0.000469259490766545, "loss": 0.224, "step": 115620 }, { "epoch": 4.79, "grad_norm": 0.8671875, "learning_rate": 0.00046925428030006013, "loss": 0.1249, "step": 115630 }, { "epoch": 4.79, "grad_norm": 0.94921875, "learning_rate": 0.00046924906942096234, "loss": 0.2412, "step": 115640 }, { "epoch": 4.79, "grad_norm": 0.4765625, "learning_rate": 0.0004692438581292615, "loss": 0.2189, "step": 115650 }, { "epoch": 4.79, "grad_norm": 0.359375, "learning_rate": 0.0004692386464249674, "loss": 0.1329, "step": 115660 }, { "epoch": 4.79, "grad_norm": 0.77734375, "learning_rate": 0.0004692334343080898, "loss": 0.233, "step": 115670 }, { "epoch": 4.79, "grad_norm": 1.8046875, "learning_rate": 0.00046922822177863856, "loss": 0.21, "step": 115680 }, { "epoch": 4.79, "grad_norm": 0.1845703125, "learning_rate": 0.0004692230088366235, "loss": 0.2583, "step": 115690 }, { "epoch": 4.79, "grad_norm": 0.6171875, "learning_rate": 0.00046921779548205443, "loss": 0.2189, "step": 115700 }, { "epoch": 4.79, "grad_norm": 0.7890625, "learning_rate": 0.00046921258171494113, "loss": 0.2177, "step": 115710 }, { "epoch": 4.79, "grad_norm": 1.015625, "learning_rate": 0.0004692073675352934, "loss": 0.1756, "step": 115720 }, { "epoch": 4.79, "grad_norm": 1.0390625, "learning_rate": 0.0004692021529431211, "loss": 0.1555, "step": 115730 }, { "epoch": 4.79, "grad_norm": 0.45703125, "learning_rate": 0.000469196937938434, "loss": 0.1829, "step": 115740 }, { "epoch": 4.79, "grad_norm": 0.57421875, "learning_rate": 0.000469191722521242, "loss": 0.1905, "step": 115750 }, { "epoch": 4.79, "grad_norm": 0.8203125, "learning_rate": 0.00046918650669155483, "loss": 0.2362, "step": 115760 }, { "epoch": 4.8, "grad_norm": 0.20703125, "learning_rate": 0.0004691812904493823, "loss": 0.1662, "step": 115770 }, { "epoch": 4.8, "grad_norm": 0.9375, "learning_rate": 0.0004691760737947342, "loss": 0.2397, "step": 115780 }, { "epoch": 4.8, "grad_norm": 0.6640625, "learning_rate": 0.00046917085672762047, "loss": 0.1814, "step": 115790 }, { "epoch": 4.8, "grad_norm": 0.9921875, "learning_rate": 0.00046916563924805077, "loss": 0.1837, "step": 115800 }, { "epoch": 4.8, "grad_norm": 0.80078125, "learning_rate": 0.0004691604213560351, "loss": 0.1655, "step": 115810 }, { "epoch": 4.8, "grad_norm": 0.365234375, "learning_rate": 0.00046915520305158316, "loss": 0.2239, "step": 115820 }, { "epoch": 4.8, "grad_norm": 0.390625, "learning_rate": 0.0004691499843347048, "loss": 0.2216, "step": 115830 }, { "epoch": 4.8, "grad_norm": 0.7265625, "learning_rate": 0.00046914476520540984, "loss": 0.2367, "step": 115840 }, { "epoch": 4.8, "grad_norm": 1.6171875, "learning_rate": 0.00046913954566370805, "loss": 0.1601, "step": 115850 }, { "epoch": 4.8, "grad_norm": 1.8359375, "learning_rate": 0.00046913432570960936, "loss": 0.2453, "step": 115860 }, { "epoch": 4.8, "grad_norm": 0.93359375, "learning_rate": 0.0004691291053431235, "loss": 0.2624, "step": 115870 }, { "epoch": 4.8, "grad_norm": 1.4765625, "learning_rate": 0.0004691238845642603, "loss": 0.209, "step": 115880 }, { "epoch": 4.8, "grad_norm": 1.375, "learning_rate": 0.0004691186633730296, "loss": 0.2132, "step": 115890 }, { "epoch": 4.8, "grad_norm": 0.90625, "learning_rate": 0.00046911344176944124, "loss": 0.2192, "step": 115900 }, { "epoch": 4.8, "grad_norm": 0.50390625, "learning_rate": 0.00046910821975350514, "loss": 0.188, "step": 115910 }, { "epoch": 4.8, "grad_norm": 0.44140625, "learning_rate": 0.00046910299732523097, "loss": 0.1965, "step": 115920 }, { "epoch": 4.8, "grad_norm": 1.890625, "learning_rate": 0.00046909777448462864, "loss": 0.2326, "step": 115930 }, { "epoch": 4.8, "grad_norm": 0.5546875, "learning_rate": 0.0004690925512317079, "loss": 0.1944, "step": 115940 }, { "epoch": 4.8, "grad_norm": 0.890625, "learning_rate": 0.00046908732756647875, "loss": 0.2214, "step": 115950 }, { "epoch": 4.8, "grad_norm": 1.078125, "learning_rate": 0.00046908210348895087, "loss": 0.2127, "step": 115960 }, { "epoch": 4.8, "grad_norm": 0.76953125, "learning_rate": 0.000469076878999134, "loss": 0.1791, "step": 115970 }, { "epoch": 4.8, "grad_norm": 0.478515625, "learning_rate": 0.00046907165409703825, "loss": 0.173, "step": 115980 }, { "epoch": 4.8, "grad_norm": 0.78515625, "learning_rate": 0.00046906642878267324, "loss": 0.2065, "step": 115990 }, { "epoch": 4.8, "grad_norm": 1.203125, "learning_rate": 0.0004690612030560489, "loss": 0.2454, "step": 116000 }, { "epoch": 4.81, "grad_norm": 1.0234375, "learning_rate": 0.00046905597691717505, "loss": 0.2645, "step": 116010 }, { "epoch": 4.81, "grad_norm": 0.96875, "learning_rate": 0.0004690507503660615, "loss": 0.1623, "step": 116020 }, { "epoch": 4.81, "grad_norm": 0.83984375, "learning_rate": 0.000469045523402718, "loss": 0.1835, "step": 116030 }, { "epoch": 4.81, "grad_norm": 0.72265625, "learning_rate": 0.0004690402960271546, "loss": 0.1913, "step": 116040 }, { "epoch": 4.81, "grad_norm": 1.1640625, "learning_rate": 0.0004690350682393809, "loss": 0.2363, "step": 116050 }, { "epoch": 4.81, "grad_norm": 0.55859375, "learning_rate": 0.00046902984003940694, "loss": 0.227, "step": 116060 }, { "epoch": 4.81, "grad_norm": 0.74609375, "learning_rate": 0.0004690246114272425, "loss": 0.2401, "step": 116070 }, { "epoch": 4.81, "grad_norm": 1.1484375, "learning_rate": 0.0004690193824028973, "loss": 0.2062, "step": 116080 }, { "epoch": 4.81, "grad_norm": 0.9296875, "learning_rate": 0.0004690141529663814, "loss": 0.1959, "step": 116090 }, { "epoch": 4.81, "grad_norm": 0.84765625, "learning_rate": 0.0004690089231177044, "loss": 0.1976, "step": 116100 }, { "epoch": 4.81, "grad_norm": 0.5859375, "learning_rate": 0.00046900369285687626, "loss": 0.2462, "step": 116110 }, { "epoch": 4.81, "grad_norm": 0.578125, "learning_rate": 0.0004689984621839069, "loss": 0.183, "step": 116120 }, { "epoch": 4.81, "grad_norm": 0.34375, "learning_rate": 0.000468993231098806, "loss": 0.1916, "step": 116130 }, { "epoch": 4.81, "grad_norm": 0.462890625, "learning_rate": 0.00046898799960158356, "loss": 0.2361, "step": 116140 }, { "epoch": 4.81, "grad_norm": 0.73046875, "learning_rate": 0.00046898276769224935, "loss": 0.2086, "step": 116150 }, { "epoch": 4.81, "grad_norm": 0.5078125, "learning_rate": 0.0004689775353708132, "loss": 0.2315, "step": 116160 }, { "epoch": 4.81, "grad_norm": 0.443359375, "learning_rate": 0.000468972302637285, "loss": 0.1945, "step": 116170 }, { "epoch": 4.81, "grad_norm": 0.515625, "learning_rate": 0.00046896706949167444, "loss": 0.2103, "step": 116180 }, { "epoch": 4.81, "grad_norm": 0.546875, "learning_rate": 0.0004689618359339917, "loss": 0.1855, "step": 116190 }, { "epoch": 4.81, "grad_norm": 0.7578125, "learning_rate": 0.00046895660196424627, "loss": 0.1907, "step": 116200 }, { "epoch": 4.81, "grad_norm": 0.25, "learning_rate": 0.00046895136758244826, "loss": 0.1999, "step": 116210 }, { "epoch": 4.81, "grad_norm": 0.50390625, "learning_rate": 0.00046894613278860735, "loss": 0.193, "step": 116220 }, { "epoch": 4.81, "grad_norm": 1.0859375, "learning_rate": 0.0004689408975827335, "loss": 0.1651, "step": 116230 }, { "epoch": 4.81, "grad_norm": 0.6796875, "learning_rate": 0.00046893566196483654, "loss": 0.225, "step": 116240 }, { "epoch": 4.82, "grad_norm": 0.419921875, "learning_rate": 0.00046893042593492627, "loss": 0.2231, "step": 116250 }, { "epoch": 4.82, "grad_norm": 2.390625, "learning_rate": 0.00046892518949301267, "loss": 0.2198, "step": 116260 }, { "epoch": 4.82, "grad_norm": 0.65234375, "learning_rate": 0.00046891995263910534, "loss": 0.2214, "step": 116270 }, { "epoch": 4.82, "grad_norm": 0.9921875, "learning_rate": 0.00046891471537321446, "loss": 0.2545, "step": 116280 }, { "epoch": 4.82, "grad_norm": 0.1435546875, "learning_rate": 0.00046890947769534963, "loss": 0.1759, "step": 116290 }, { "epoch": 4.82, "grad_norm": 0.99609375, "learning_rate": 0.0004689042396055209, "loss": 0.2123, "step": 116300 }, { "epoch": 4.82, "grad_norm": 0.44140625, "learning_rate": 0.000468899001103738, "loss": 0.1534, "step": 116310 }, { "epoch": 4.82, "grad_norm": 0.50390625, "learning_rate": 0.00046889376219001066, "loss": 0.2753, "step": 116320 }, { "epoch": 4.82, "grad_norm": 0.70703125, "learning_rate": 0.0004688885228643491, "loss": 0.1849, "step": 116330 }, { "epoch": 4.82, "grad_norm": 0.66015625, "learning_rate": 0.0004688832831267629, "loss": 0.2125, "step": 116340 }, { "epoch": 4.82, "grad_norm": 0.546875, "learning_rate": 0.000468878042977262, "loss": 0.1967, "step": 116350 }, { "epoch": 4.82, "grad_norm": 0.69140625, "learning_rate": 0.00046887280241585626, "loss": 0.1915, "step": 116360 }, { "epoch": 4.82, "grad_norm": 0.26171875, "learning_rate": 0.0004688675614425555, "loss": 0.1498, "step": 116370 }, { "epoch": 4.82, "grad_norm": 1.1875, "learning_rate": 0.0004688623200573697, "loss": 0.2082, "step": 116380 }, { "epoch": 4.82, "grad_norm": 0.87109375, "learning_rate": 0.0004688570782603086, "loss": 0.2029, "step": 116390 }, { "epoch": 4.82, "grad_norm": 0.7734375, "learning_rate": 0.00046885183605138215, "loss": 0.1935, "step": 116400 }, { "epoch": 4.82, "grad_norm": 0.37109375, "learning_rate": 0.00046884659343060015, "loss": 0.2514, "step": 116410 }, { "epoch": 4.82, "grad_norm": 0.609375, "learning_rate": 0.00046884135039797247, "loss": 0.2272, "step": 116420 }, { "epoch": 4.82, "grad_norm": 1.3125, "learning_rate": 0.0004688361069535091, "loss": 0.2003, "step": 116430 }, { "epoch": 4.82, "grad_norm": 1.03125, "learning_rate": 0.00046883086309721967, "loss": 0.2486, "step": 116440 }, { "epoch": 4.82, "grad_norm": 0.2294921875, "learning_rate": 0.00046882561882911433, "loss": 0.2606, "step": 116450 }, { "epoch": 4.82, "grad_norm": 0.8125, "learning_rate": 0.0004688203741492027, "loss": 0.1689, "step": 116460 }, { "epoch": 4.82, "grad_norm": 0.1748046875, "learning_rate": 0.0004688151290574948, "loss": 0.2801, "step": 116470 }, { "epoch": 4.82, "grad_norm": 0.3359375, "learning_rate": 0.00046880988355400046, "loss": 0.2497, "step": 116480 }, { "epoch": 4.83, "grad_norm": 0.8125, "learning_rate": 0.0004688046376387295, "loss": 0.2225, "step": 116490 }, { "epoch": 4.83, "grad_norm": 0.6328125, "learning_rate": 0.0004687993913116919, "loss": 0.1898, "step": 116500 }, { "epoch": 4.83, "grad_norm": 1.4453125, "learning_rate": 0.00046879414457289746, "loss": 0.2062, "step": 116510 }, { "epoch": 4.83, "grad_norm": 1.53125, "learning_rate": 0.0004687888974223561, "loss": 0.1822, "step": 116520 }, { "epoch": 4.83, "grad_norm": 0.8125, "learning_rate": 0.0004687836498600776, "loss": 0.2096, "step": 116530 }, { "epoch": 4.83, "grad_norm": 0.396484375, "learning_rate": 0.0004687784018860719, "loss": 0.2588, "step": 116540 }, { "epoch": 4.83, "grad_norm": 0.83984375, "learning_rate": 0.0004687731535003489, "loss": 0.1945, "step": 116550 }, { "epoch": 4.83, "grad_norm": 0.70703125, "learning_rate": 0.0004687679047029184, "loss": 0.1858, "step": 116560 }, { "epoch": 4.83, "grad_norm": 0.4296875, "learning_rate": 0.0004687626554937904, "loss": 0.2588, "step": 116570 }, { "epoch": 4.83, "grad_norm": 0.6171875, "learning_rate": 0.00046875740587297465, "loss": 0.2253, "step": 116580 }, { "epoch": 4.83, "grad_norm": 0.3046875, "learning_rate": 0.00046875215584048116, "loss": 0.2079, "step": 116590 }, { "epoch": 4.83, "grad_norm": 0.43359375, "learning_rate": 0.0004687469053963197, "loss": 0.2125, "step": 116600 }, { "epoch": 4.83, "grad_norm": 0.59375, "learning_rate": 0.00046874165454050015, "loss": 0.2045, "step": 116610 }, { "epoch": 4.83, "grad_norm": 1.4140625, "learning_rate": 0.00046873640327303246, "loss": 0.2327, "step": 116620 }, { "epoch": 4.83, "grad_norm": 0.000232696533203125, "learning_rate": 0.0004687311515939265, "loss": 0.1602, "step": 116630 }, { "epoch": 4.83, "grad_norm": 0.6015625, "learning_rate": 0.0004687258995031921, "loss": 0.2185, "step": 116640 }, { "epoch": 4.83, "grad_norm": 0.796875, "learning_rate": 0.0004687206470008392, "loss": 0.2137, "step": 116650 }, { "epoch": 4.83, "grad_norm": 0.328125, "learning_rate": 0.00046871539408687763, "loss": 0.189, "step": 116660 }, { "epoch": 4.83, "grad_norm": 1.171875, "learning_rate": 0.0004687101407613174, "loss": 0.2726, "step": 116670 }, { "epoch": 4.83, "grad_norm": 0.734375, "learning_rate": 0.0004687048870241681, "loss": 0.2035, "step": 116680 }, { "epoch": 4.83, "grad_norm": 0.64453125, "learning_rate": 0.00046869963287544004, "loss": 0.2324, "step": 116690 }, { "epoch": 4.83, "grad_norm": 0.5546875, "learning_rate": 0.0004686943783151428, "loss": 0.2003, "step": 116700 }, { "epoch": 4.83, "grad_norm": 0.375, "learning_rate": 0.0004686891233432863, "loss": 0.2573, "step": 116710 }, { "epoch": 4.83, "grad_norm": 0.60546875, "learning_rate": 0.00046868386795988063, "loss": 0.1533, "step": 116720 }, { "epoch": 4.83, "grad_norm": 1.2109375, "learning_rate": 0.00046867861216493543, "loss": 0.2371, "step": 116730 }, { "epoch": 4.84, "grad_norm": 0.54296875, "learning_rate": 0.00046867335595846075, "loss": 0.1879, "step": 116740 }, { "epoch": 4.84, "grad_norm": 0.7734375, "learning_rate": 0.00046866809934046635, "loss": 0.1605, "step": 116750 }, { "epoch": 4.84, "grad_norm": 1.03125, "learning_rate": 0.00046866284231096225, "loss": 0.2216, "step": 116760 }, { "epoch": 4.84, "grad_norm": 0.283203125, "learning_rate": 0.0004686575848699582, "loss": 0.2114, "step": 116770 }, { "epoch": 4.84, "grad_norm": 1.015625, "learning_rate": 0.00046865232701746433, "loss": 0.2416, "step": 116780 }, { "epoch": 4.84, "grad_norm": 1.5390625, "learning_rate": 0.0004686470687534904, "loss": 0.1996, "step": 116790 }, { "epoch": 4.84, "grad_norm": 0.52734375, "learning_rate": 0.0004686418100780462, "loss": 0.2099, "step": 116800 }, { "epoch": 4.84, "grad_norm": 1.0859375, "learning_rate": 0.0004686365509911418, "loss": 0.1615, "step": 116810 }, { "epoch": 4.84, "grad_norm": 1.0703125, "learning_rate": 0.00046863129149278695, "loss": 0.267, "step": 116820 }, { "epoch": 4.84, "grad_norm": 2.125, "learning_rate": 0.0004686260315829917, "loss": 0.205, "step": 116830 }, { "epoch": 4.84, "grad_norm": 0.408203125, "learning_rate": 0.0004686207712617658, "loss": 0.208, "step": 116840 }, { "epoch": 4.84, "grad_norm": 0.52734375, "learning_rate": 0.0004686155105291192, "loss": 0.2321, "step": 116850 }, { "epoch": 4.84, "grad_norm": 0.515625, "learning_rate": 0.0004686102493850619, "loss": 0.1836, "step": 116860 }, { "epoch": 4.84, "grad_norm": 0.48046875, "learning_rate": 0.0004686049878296037, "loss": 0.1349, "step": 116870 }, { "epoch": 4.84, "grad_norm": 0.75390625, "learning_rate": 0.00046859972586275444, "loss": 0.2654, "step": 116880 }, { "epoch": 4.84, "grad_norm": 0.67578125, "learning_rate": 0.00046859446348452416, "loss": 0.1644, "step": 116890 }, { "epoch": 4.84, "grad_norm": 0.61328125, "learning_rate": 0.0004685892006949227, "loss": 0.2292, "step": 116900 }, { "epoch": 4.84, "grad_norm": 1.5546875, "learning_rate": 0.00046858393749396, "loss": 0.1962, "step": 116910 }, { "epoch": 4.84, "grad_norm": 0.5625, "learning_rate": 0.000468578673881646, "loss": 0.2006, "step": 116920 }, { "epoch": 4.84, "grad_norm": 0.44921875, "learning_rate": 0.0004685734098579904, "loss": 0.1993, "step": 116930 }, { "epoch": 4.84, "grad_norm": 1.875, "learning_rate": 0.0004685681454230033, "loss": 0.2047, "step": 116940 }, { "epoch": 4.84, "grad_norm": 0.83203125, "learning_rate": 0.0004685628805766946, "loss": 0.2406, "step": 116950 }, { "epoch": 4.84, "grad_norm": 0.890625, "learning_rate": 0.00046855761531907405, "loss": 0.2007, "step": 116960 }, { "epoch": 4.84, "grad_norm": 0.30859375, "learning_rate": 0.0004685523496501518, "loss": 0.2058, "step": 116970 }, { "epoch": 4.85, "grad_norm": 0.88671875, "learning_rate": 0.00046854708356993747, "loss": 0.173, "step": 116980 }, { "epoch": 4.85, "grad_norm": 0.64453125, "learning_rate": 0.00046854181707844125, "loss": 0.161, "step": 116990 }, { "epoch": 4.85, "grad_norm": 0.62109375, "learning_rate": 0.00046853655017567295, "loss": 0.2251, "step": 117000 }, { "epoch": 4.85, "grad_norm": 0.73046875, "learning_rate": 0.00046853128286164237, "loss": 0.197, "step": 117010 }, { "epoch": 4.85, "grad_norm": 1.9765625, "learning_rate": 0.0004685260151363596, "loss": 0.2201, "step": 117020 }, { "epoch": 4.85, "grad_norm": 1.3515625, "learning_rate": 0.0004685207469998344, "loss": 0.2307, "step": 117030 }, { "epoch": 4.85, "grad_norm": 0.9296875, "learning_rate": 0.00046851547845207675, "loss": 0.2162, "step": 117040 }, { "epoch": 4.85, "grad_norm": 0.25, "learning_rate": 0.00046851020949309664, "loss": 0.2169, "step": 117050 }, { "epoch": 4.85, "grad_norm": 0.55859375, "learning_rate": 0.0004685049401229039, "loss": 0.1927, "step": 117060 }, { "epoch": 4.85, "grad_norm": 0.462890625, "learning_rate": 0.0004684996703415084, "loss": 0.1674, "step": 117070 }, { "epoch": 4.85, "grad_norm": 0.6953125, "learning_rate": 0.00046849440014892015, "loss": 0.2462, "step": 117080 }, { "epoch": 4.85, "grad_norm": 0.9296875, "learning_rate": 0.00046848912954514907, "loss": 0.2213, "step": 117090 }, { "epoch": 4.85, "grad_norm": 0.240234375, "learning_rate": 0.000468483858530205, "loss": 0.2058, "step": 117100 }, { "epoch": 4.85, "grad_norm": 0.318359375, "learning_rate": 0.00046847858710409785, "loss": 0.2181, "step": 117110 }, { "epoch": 4.85, "grad_norm": 0.48828125, "learning_rate": 0.00046847331526683766, "loss": 0.2409, "step": 117120 }, { "epoch": 4.85, "grad_norm": 1.0859375, "learning_rate": 0.0004684680430184343, "loss": 0.2381, "step": 117130 }, { "epoch": 4.85, "grad_norm": 0.1884765625, "learning_rate": 0.0004684627703588976, "loss": 0.2051, "step": 117140 }, { "epoch": 4.85, "grad_norm": 0.86328125, "learning_rate": 0.0004684574972882376, "loss": 0.2341, "step": 117150 }, { "epoch": 4.85, "grad_norm": 0.984375, "learning_rate": 0.00046845222380646425, "loss": 0.2506, "step": 117160 }, { "epoch": 4.85, "grad_norm": 0.5546875, "learning_rate": 0.00046844694991358737, "loss": 0.2456, "step": 117170 }, { "epoch": 4.85, "grad_norm": 1.078125, "learning_rate": 0.00046844167560961686, "loss": 0.2451, "step": 117180 }, { "epoch": 4.85, "grad_norm": 0.59765625, "learning_rate": 0.00046843640089456274, "loss": 0.2124, "step": 117190 }, { "epoch": 4.85, "grad_norm": 0.60546875, "learning_rate": 0.00046843112576843495, "loss": 0.2103, "step": 117200 }, { "epoch": 4.85, "grad_norm": 0.474609375, "learning_rate": 0.0004684258502312433, "loss": 0.273, "step": 117210 }, { "epoch": 4.86, "grad_norm": 0.76171875, "learning_rate": 0.0004684205742829978, "loss": 0.2175, "step": 117220 }, { "epoch": 4.86, "grad_norm": 0.6796875, "learning_rate": 0.0004684152979237084, "loss": 0.2492, "step": 117230 }, { "epoch": 4.86, "grad_norm": 1.28125, "learning_rate": 0.0004684100211533849, "loss": 0.2224, "step": 117240 }, { "epoch": 4.86, "grad_norm": 0.53515625, "learning_rate": 0.00046840474397203747, "loss": 0.1494, "step": 117250 }, { "epoch": 4.86, "grad_norm": 0.67578125, "learning_rate": 0.0004683994663796758, "loss": 0.1984, "step": 117260 }, { "epoch": 4.86, "grad_norm": 0.451171875, "learning_rate": 0.00046839418837631, "loss": 0.2502, "step": 117270 }, { "epoch": 4.86, "grad_norm": 0.6796875, "learning_rate": 0.00046838890996194984, "loss": 0.2161, "step": 117280 }, { "epoch": 4.86, "grad_norm": 0.384765625, "learning_rate": 0.00046838363113660533, "loss": 0.209, "step": 117290 }, { "epoch": 4.86, "grad_norm": 0.365234375, "learning_rate": 0.00046837835190028645, "loss": 0.2185, "step": 117300 }, { "epoch": 4.86, "grad_norm": 0.478515625, "learning_rate": 0.00046837307225300307, "loss": 0.1542, "step": 117310 }, { "epoch": 4.86, "grad_norm": 0.68359375, "learning_rate": 0.0004683677921947652, "loss": 0.2102, "step": 117320 }, { "epoch": 4.86, "grad_norm": 0.50390625, "learning_rate": 0.0004683625117255827, "loss": 0.2259, "step": 117330 }, { "epoch": 4.86, "grad_norm": 0.703125, "learning_rate": 0.00046835723084546555, "loss": 0.1975, "step": 117340 }, { "epoch": 4.86, "grad_norm": 0.828125, "learning_rate": 0.00046835194955442363, "loss": 0.2656, "step": 117350 }, { "epoch": 4.86, "grad_norm": 0.8125, "learning_rate": 0.0004683466678524669, "loss": 0.2312, "step": 117360 }, { "epoch": 4.86, "grad_norm": 0.5703125, "learning_rate": 0.0004683413857396054, "loss": 0.2416, "step": 117370 }, { "epoch": 4.86, "grad_norm": 0.61328125, "learning_rate": 0.000468336103215849, "loss": 0.2242, "step": 117380 }, { "epoch": 4.86, "grad_norm": 0.62109375, "learning_rate": 0.0004683308202812075, "loss": 0.1873, "step": 117390 }, { "epoch": 4.86, "grad_norm": 0.173828125, "learning_rate": 0.0004683255369356911, "loss": 0.2315, "step": 117400 }, { "epoch": 4.86, "grad_norm": 2.9375, "learning_rate": 0.0004683202531793096, "loss": 0.1931, "step": 117410 }, { "epoch": 4.86, "grad_norm": 1.2890625, "learning_rate": 0.00046831496901207295, "loss": 0.2293, "step": 117420 }, { "epoch": 4.86, "grad_norm": 0.8515625, "learning_rate": 0.00046830968443399107, "loss": 0.1655, "step": 117430 }, { "epoch": 4.86, "grad_norm": 1.15625, "learning_rate": 0.000468304399445074, "loss": 0.2289, "step": 117440 }, { "epoch": 4.86, "grad_norm": 0.39453125, "learning_rate": 0.00046829911404533154, "loss": 0.2183, "step": 117450 }, { "epoch": 4.87, "grad_norm": 0.69921875, "learning_rate": 0.00046829382823477373, "loss": 0.2047, "step": 117460 }, { "epoch": 4.87, "grad_norm": 1.921875, "learning_rate": 0.0004682885420134106, "loss": 0.2516, "step": 117470 }, { "epoch": 4.87, "grad_norm": 0.85546875, "learning_rate": 0.00046828325538125196, "loss": 0.1872, "step": 117480 }, { "epoch": 4.87, "grad_norm": 2.203125, "learning_rate": 0.00046827796833830783, "loss": 0.2676, "step": 117490 }, { "epoch": 4.87, "grad_norm": 0.52734375, "learning_rate": 0.00046827268088458807, "loss": 0.2004, "step": 117500 }, { "epoch": 4.87, "grad_norm": 0.7734375, "learning_rate": 0.00046826739302010276, "loss": 0.2045, "step": 117510 }, { "epoch": 4.87, "grad_norm": 1.390625, "learning_rate": 0.00046826210474486176, "loss": 0.2462, "step": 117520 }, { "epoch": 4.87, "grad_norm": 0.59375, "learning_rate": 0.00046825681605887505, "loss": 0.1647, "step": 117530 }, { "epoch": 4.87, "grad_norm": 0.5625, "learning_rate": 0.00046825152696215256, "loss": 0.2254, "step": 117540 }, { "epoch": 4.87, "grad_norm": 0.9375, "learning_rate": 0.0004682462374547043, "loss": 0.1497, "step": 117550 }, { "epoch": 4.87, "grad_norm": 0.73046875, "learning_rate": 0.0004682409475365401, "loss": 0.2386, "step": 117560 }, { "epoch": 4.87, "grad_norm": 0.341796875, "learning_rate": 0.0004682356572076701, "loss": 0.1929, "step": 117570 }, { "epoch": 4.87, "grad_norm": 0.43359375, "learning_rate": 0.0004682303664681041, "loss": 0.2043, "step": 117580 }, { "epoch": 4.87, "grad_norm": 1.5859375, "learning_rate": 0.00046822507531785217, "loss": 0.1322, "step": 117590 }, { "epoch": 4.87, "grad_norm": 1.46875, "learning_rate": 0.00046821978375692424, "loss": 0.2661, "step": 117600 }, { "epoch": 4.87, "grad_norm": 0.64453125, "learning_rate": 0.00046821449178533023, "loss": 0.2101, "step": 117610 }, { "epoch": 4.87, "grad_norm": 0.58984375, "learning_rate": 0.00046820919940308007, "loss": 0.2028, "step": 117620 }, { "epoch": 4.87, "grad_norm": 0.890625, "learning_rate": 0.0004682039066101837, "loss": 0.2004, "step": 117630 }, { "epoch": 4.87, "grad_norm": 1.328125, "learning_rate": 0.00046819861340665134, "loss": 0.1523, "step": 117640 }, { "epoch": 4.87, "grad_norm": 0.96484375, "learning_rate": 0.00046819331979249256, "loss": 0.1796, "step": 117650 }, { "epoch": 4.87, "grad_norm": 0.515625, "learning_rate": 0.0004681880257677176, "loss": 0.2223, "step": 117660 }, { "epoch": 4.87, "grad_norm": 0.484375, "learning_rate": 0.00046818273133233636, "loss": 0.2455, "step": 117670 }, { "epoch": 4.87, "grad_norm": 0.4296875, "learning_rate": 0.00046817743648635874, "loss": 0.2107, "step": 117680 }, { "epoch": 4.87, "grad_norm": 0.8671875, "learning_rate": 0.0004681721412297947, "loss": 0.1937, "step": 117690 }, { "epoch": 4.88, "grad_norm": 0.48828125, "learning_rate": 0.0004681668455626543, "loss": 0.1997, "step": 117700 }, { "epoch": 4.88, "grad_norm": 0.490234375, "learning_rate": 0.0004681615494849475, "loss": 0.2086, "step": 117710 }, { "epoch": 4.88, "grad_norm": 0.65234375, "learning_rate": 0.00046815625299668416, "loss": 0.1819, "step": 117720 }, { "epoch": 4.88, "grad_norm": 1.4921875, "learning_rate": 0.0004681509560978744, "loss": 0.1542, "step": 117730 }, { "epoch": 4.88, "grad_norm": 1.890625, "learning_rate": 0.000468145658788528, "loss": 0.2055, "step": 117740 }, { "epoch": 4.88, "grad_norm": 1.3359375, "learning_rate": 0.00046814036106865504, "loss": 0.204, "step": 117750 }, { "epoch": 4.88, "grad_norm": 1.140625, "learning_rate": 0.0004681350629382655, "loss": 0.1739, "step": 117760 }, { "epoch": 4.88, "grad_norm": 0.478515625, "learning_rate": 0.0004681297643973693, "loss": 0.221, "step": 117770 }, { "epoch": 4.88, "grad_norm": 0.58203125, "learning_rate": 0.0004681244654459765, "loss": 0.2028, "step": 117780 }, { "epoch": 4.88, "grad_norm": 0.306640625, "learning_rate": 0.000468119166084097, "loss": 0.2248, "step": 117790 }, { "epoch": 4.88, "grad_norm": 0.51171875, "learning_rate": 0.0004681138663117407, "loss": 0.2231, "step": 117800 }, { "epoch": 4.88, "grad_norm": 0.58203125, "learning_rate": 0.00046810856612891774, "loss": 0.1721, "step": 117810 }, { "epoch": 4.88, "grad_norm": 0.3828125, "learning_rate": 0.00046810326553563795, "loss": 0.201, "step": 117820 }, { "epoch": 4.88, "grad_norm": 2.0, "learning_rate": 0.0004680979645319114, "loss": 0.223, "step": 117830 }, { "epoch": 4.88, "grad_norm": 0.765625, "learning_rate": 0.00046809266311774813, "loss": 0.2353, "step": 117840 }, { "epoch": 4.88, "grad_norm": 1.328125, "learning_rate": 0.0004680873612931579, "loss": 0.2413, "step": 117850 }, { "epoch": 4.88, "grad_norm": 0.921875, "learning_rate": 0.0004680820590581508, "loss": 0.1733, "step": 117860 }, { "epoch": 4.88, "grad_norm": 0.63671875, "learning_rate": 0.0004680767564127369, "loss": 0.1926, "step": 117870 }, { "epoch": 4.88, "grad_norm": 0.474609375, "learning_rate": 0.000468071453356926, "loss": 0.1948, "step": 117880 }, { "epoch": 4.88, "grad_norm": 1.4140625, "learning_rate": 0.00046806614989072825, "loss": 0.2007, "step": 117890 }, { "epoch": 4.88, "grad_norm": 0.345703125, "learning_rate": 0.00046806084601415355, "loss": 0.237, "step": 117900 }, { "epoch": 4.88, "grad_norm": 0.90625, "learning_rate": 0.00046805554172721185, "loss": 0.1789, "step": 117910 }, { "epoch": 4.88, "grad_norm": 1.109375, "learning_rate": 0.00046805023702991324, "loss": 0.2168, "step": 117920 }, { "epoch": 4.88, "grad_norm": 1.1171875, "learning_rate": 0.0004680449319222675, "loss": 0.2196, "step": 117930 }, { "epoch": 4.89, "grad_norm": 0.298828125, "learning_rate": 0.0004680396264042849, "loss": 0.2354, "step": 117940 }, { "epoch": 4.89, "grad_norm": 0.1767578125, "learning_rate": 0.00046803432047597517, "loss": 0.2284, "step": 117950 }, { "epoch": 4.89, "grad_norm": 0.95703125, "learning_rate": 0.00046802901413734837, "loss": 0.1902, "step": 117960 }, { "epoch": 4.89, "grad_norm": 0.66796875, "learning_rate": 0.0004680237073884146, "loss": 0.1959, "step": 117970 }, { "epoch": 4.89, "grad_norm": 0.7109375, "learning_rate": 0.0004680184002291837, "loss": 0.2419, "step": 117980 }, { "epoch": 4.89, "grad_norm": 0.310546875, "learning_rate": 0.00046801309265966567, "loss": 0.2059, "step": 117990 }, { "epoch": 4.89, "grad_norm": 0.90234375, "learning_rate": 0.0004680077846798706, "loss": 0.1863, "step": 118000 }, { "epoch": 4.89, "grad_norm": 1.7734375, "learning_rate": 0.0004680024762898084, "loss": 0.2375, "step": 118010 }, { "epoch": 4.89, "grad_norm": 1.1640625, "learning_rate": 0.0004679971674894892, "loss": 0.2118, "step": 118020 }, { "epoch": 4.89, "grad_norm": 0.373046875, "learning_rate": 0.0004679918582789227, "loss": 0.2161, "step": 118030 }, { "epoch": 4.89, "grad_norm": 0.490234375, "learning_rate": 0.0004679865486581192, "loss": 0.2314, "step": 118040 }, { "epoch": 4.89, "grad_norm": 0.75, "learning_rate": 0.0004679812386270885, "loss": 0.1827, "step": 118050 }, { "epoch": 4.89, "grad_norm": 0.4296875, "learning_rate": 0.0004679759281858406, "loss": 0.2624, "step": 118060 }, { "epoch": 4.89, "grad_norm": 0.83203125, "learning_rate": 0.0004679706173343856, "loss": 0.2045, "step": 118070 }, { "epoch": 4.89, "grad_norm": 0.99609375, "learning_rate": 0.0004679653060727334, "loss": 0.1384, "step": 118080 }, { "epoch": 4.89, "grad_norm": 0.33984375, "learning_rate": 0.0004679599944008941, "loss": 0.1723, "step": 118090 }, { "epoch": 4.89, "grad_norm": 0.73828125, "learning_rate": 0.00046795468231887753, "loss": 0.1888, "step": 118100 }, { "epoch": 4.89, "grad_norm": 1.4921875, "learning_rate": 0.0004679493698266939, "loss": 0.2013, "step": 118110 }, { "epoch": 4.89, "grad_norm": 0.57421875, "learning_rate": 0.000467944056924353, "loss": 0.201, "step": 118120 }, { "epoch": 4.89, "grad_norm": 0.7734375, "learning_rate": 0.00046793874361186495, "loss": 0.2444, "step": 118130 }, { "epoch": 4.89, "grad_norm": 0.671875, "learning_rate": 0.0004679334298892397, "loss": 0.2172, "step": 118140 }, { "epoch": 4.89, "grad_norm": 0.61328125, "learning_rate": 0.0004679281157564873, "loss": 0.2192, "step": 118150 }, { "epoch": 4.89, "grad_norm": 0.388671875, "learning_rate": 0.00046792280121361776, "loss": 0.1676, "step": 118160 }, { "epoch": 4.89, "grad_norm": 0.67578125, "learning_rate": 0.00046791748626064095, "loss": 0.2006, "step": 118170 }, { "epoch": 4.9, "grad_norm": 0.197265625, "learning_rate": 0.000467912170897567, "loss": 0.1647, "step": 118180 }, { "epoch": 4.9, "grad_norm": 0.96875, "learning_rate": 0.0004679068551244059, "loss": 0.2298, "step": 118190 }, { "epoch": 4.9, "grad_norm": 0.98046875, "learning_rate": 0.0004679015389411676, "loss": 0.2065, "step": 118200 }, { "epoch": 4.9, "grad_norm": 0.69921875, "learning_rate": 0.00046789622234786216, "loss": 0.2267, "step": 118210 }, { "epoch": 4.9, "grad_norm": 0.625, "learning_rate": 0.0004678909053444995, "loss": 0.2177, "step": 118220 }, { "epoch": 4.9, "grad_norm": 0.5859375, "learning_rate": 0.0004678855879310898, "loss": 0.1908, "step": 118230 }, { "epoch": 4.9, "grad_norm": 0.56640625, "learning_rate": 0.0004678802701076429, "loss": 0.2157, "step": 118240 }, { "epoch": 4.9, "grad_norm": 0.57421875, "learning_rate": 0.00046787495187416886, "loss": 0.194, "step": 118250 }, { "epoch": 4.9, "grad_norm": 0.443359375, "learning_rate": 0.0004678696332306777, "loss": 0.2441, "step": 118260 }, { "epoch": 4.9, "grad_norm": 0.8359375, "learning_rate": 0.00046786431417717935, "loss": 0.1798, "step": 118270 }, { "epoch": 4.9, "grad_norm": 0.59765625, "learning_rate": 0.0004678589947136839, "loss": 0.2426, "step": 118280 }, { "epoch": 4.9, "grad_norm": 0.75390625, "learning_rate": 0.0004678536748402014, "loss": 0.2134, "step": 118290 }, { "epoch": 4.9, "grad_norm": 0.40234375, "learning_rate": 0.0004678483545567418, "loss": 0.1732, "step": 118300 }, { "epoch": 4.9, "grad_norm": 0.8828125, "learning_rate": 0.0004678430338633151, "loss": 0.2085, "step": 118310 }, { "epoch": 4.9, "grad_norm": 0.423828125, "learning_rate": 0.00046783771275993134, "loss": 0.1907, "step": 118320 }, { "epoch": 4.9, "grad_norm": 0.609375, "learning_rate": 0.0004678323912466006, "loss": 0.2145, "step": 118330 }, { "epoch": 4.9, "grad_norm": 1.140625, "learning_rate": 0.00046782706932333275, "loss": 0.2145, "step": 118340 }, { "epoch": 4.9, "grad_norm": 0.79296875, "learning_rate": 0.0004678217469901379, "loss": 0.2388, "step": 118350 }, { "epoch": 4.9, "grad_norm": 0.75390625, "learning_rate": 0.00046781642424702604, "loss": 0.2236, "step": 118360 }, { "epoch": 4.9, "grad_norm": 1.25, "learning_rate": 0.00046781110109400713, "loss": 0.2517, "step": 118370 }, { "epoch": 4.9, "grad_norm": 0.8671875, "learning_rate": 0.00046780577753109133, "loss": 0.2055, "step": 118380 }, { "epoch": 4.9, "grad_norm": 1.5234375, "learning_rate": 0.00046780045355828857, "loss": 0.2114, "step": 118390 }, { "epoch": 4.9, "grad_norm": 0.65625, "learning_rate": 0.00046779512917560887, "loss": 0.2159, "step": 118400 }, { "epoch": 4.9, "grad_norm": 1.0390625, "learning_rate": 0.0004677898043830623, "loss": 0.1995, "step": 118410 }, { "epoch": 4.9, "grad_norm": 0.486328125, "learning_rate": 0.0004677844791806587, "loss": 0.1779, "step": 118420 }, { "epoch": 4.91, "grad_norm": 0.25390625, "learning_rate": 0.0004677791535684084, "loss": 0.1878, "step": 118430 }, { "epoch": 4.91, "grad_norm": 0.46484375, "learning_rate": 0.0004677738275463211, "loss": 0.2354, "step": 118440 }, { "epoch": 4.91, "grad_norm": 1.265625, "learning_rate": 0.00046776850111440705, "loss": 0.1638, "step": 118450 }, { "epoch": 4.91, "grad_norm": 1.3359375, "learning_rate": 0.0004677631742726762, "loss": 0.2656, "step": 118460 }, { "epoch": 4.91, "grad_norm": 0.193359375, "learning_rate": 0.00046775784702113857, "loss": 0.2102, "step": 118470 }, { "epoch": 4.91, "grad_norm": 1.4453125, "learning_rate": 0.00046775251935980423, "loss": 0.1707, "step": 118480 }, { "epoch": 4.91, "grad_norm": 0.73046875, "learning_rate": 0.00046774719128868305, "loss": 0.1993, "step": 118490 }, { "epoch": 4.91, "grad_norm": 0.56640625, "learning_rate": 0.00046774186280778527, "loss": 0.2344, "step": 118500 }, { "epoch": 4.91, "grad_norm": 0.74609375, "learning_rate": 0.0004677365339171208, "loss": 0.1963, "step": 118510 }, { "epoch": 4.91, "grad_norm": 0.6875, "learning_rate": 0.0004677312046166996, "loss": 0.2048, "step": 118520 }, { "epoch": 4.91, "grad_norm": 0.85546875, "learning_rate": 0.0004677258749065319, "loss": 0.233, "step": 118530 }, { "epoch": 4.91, "grad_norm": 1.0, "learning_rate": 0.0004677205447866275, "loss": 0.2159, "step": 118540 }, { "epoch": 4.91, "grad_norm": 0.71484375, "learning_rate": 0.0004677152142569967, "loss": 0.193, "step": 118550 }, { "epoch": 4.91, "grad_norm": 0.90234375, "learning_rate": 0.0004677098833176493, "loss": 0.1871, "step": 118560 }, { "epoch": 4.91, "grad_norm": 0.443359375, "learning_rate": 0.0004677045519685954, "loss": 0.1479, "step": 118570 }, { "epoch": 4.91, "grad_norm": 0.66015625, "learning_rate": 0.000467699220209845, "loss": 0.2362, "step": 118580 }, { "epoch": 4.91, "grad_norm": 0.515625, "learning_rate": 0.00046769388804140824, "loss": 0.1941, "step": 118590 }, { "epoch": 4.91, "grad_norm": 0.6328125, "learning_rate": 0.0004676885554632951, "loss": 0.2182, "step": 118600 }, { "epoch": 4.91, "grad_norm": 1.1640625, "learning_rate": 0.00046768322247551554, "loss": 0.2287, "step": 118610 }, { "epoch": 4.91, "grad_norm": 0.56640625, "learning_rate": 0.0004676778890780797, "loss": 0.2282, "step": 118620 }, { "epoch": 4.91, "grad_norm": 0.70703125, "learning_rate": 0.0004676725552709976, "loss": 0.2381, "step": 118630 }, { "epoch": 4.91, "grad_norm": 1.015625, "learning_rate": 0.00046766722105427927, "loss": 0.1782, "step": 118640 }, { "epoch": 4.91, "grad_norm": 0.69921875, "learning_rate": 0.0004676618864279347, "loss": 0.1758, "step": 118650 }, { "epoch": 4.91, "grad_norm": 0.6171875, "learning_rate": 0.000467656551391974, "loss": 0.2189, "step": 118660 }, { "epoch": 4.92, "grad_norm": 0.2578125, "learning_rate": 0.00046765121594640716, "loss": 0.1629, "step": 118670 }, { "epoch": 4.92, "grad_norm": 1.1640625, "learning_rate": 0.0004676458800912442, "loss": 0.185, "step": 118680 }, { "epoch": 4.92, "grad_norm": 0.66796875, "learning_rate": 0.00046764054382649524, "loss": 0.217, "step": 118690 }, { "epoch": 4.92, "grad_norm": 0.470703125, "learning_rate": 0.00046763520715217025, "loss": 0.2438, "step": 118700 }, { "epoch": 4.92, "grad_norm": 1.0, "learning_rate": 0.00046762987006827936, "loss": 0.1968, "step": 118710 }, { "epoch": 4.92, "grad_norm": 0.99609375, "learning_rate": 0.0004676245325748325, "loss": 0.1314, "step": 118720 }, { "epoch": 4.92, "grad_norm": 0.62890625, "learning_rate": 0.0004676191946718398, "loss": 0.2413, "step": 118730 }, { "epoch": 4.92, "grad_norm": 0.98046875, "learning_rate": 0.0004676138563593112, "loss": 0.2127, "step": 118740 }, { "epoch": 4.92, "grad_norm": 0.71484375, "learning_rate": 0.00046760851763725695, "loss": 0.2022, "step": 118750 }, { "epoch": 4.92, "grad_norm": 1.421875, "learning_rate": 0.0004676031785056869, "loss": 0.2112, "step": 118760 }, { "epoch": 4.92, "grad_norm": 0.59375, "learning_rate": 0.0004675978389646111, "loss": 0.1543, "step": 118770 }, { "epoch": 4.92, "grad_norm": 0.5703125, "learning_rate": 0.0004675924990140398, "loss": 0.1924, "step": 118780 }, { "epoch": 4.92, "grad_norm": 0.388671875, "learning_rate": 0.00046758715865398285, "loss": 0.1987, "step": 118790 }, { "epoch": 4.92, "grad_norm": 0.7890625, "learning_rate": 0.00046758181788445033, "loss": 0.2298, "step": 118800 }, { "epoch": 4.92, "grad_norm": 0.78515625, "learning_rate": 0.00046757647670545233, "loss": 0.2497, "step": 118810 }, { "epoch": 4.92, "grad_norm": 1.125, "learning_rate": 0.000467571135116999, "loss": 0.2272, "step": 118820 }, { "epoch": 4.92, "grad_norm": 0.484375, "learning_rate": 0.0004675657931191002, "loss": 0.206, "step": 118830 }, { "epoch": 4.92, "grad_norm": 1.3984375, "learning_rate": 0.00046756045071176603, "loss": 0.1798, "step": 118840 }, { "epoch": 4.92, "grad_norm": 0.6171875, "learning_rate": 0.00046755510789500665, "loss": 0.2114, "step": 118850 }, { "epoch": 4.92, "grad_norm": 0.69140625, "learning_rate": 0.0004675497646688321, "loss": 0.1963, "step": 118860 }, { "epoch": 4.92, "grad_norm": 0.8515625, "learning_rate": 0.00046754442103325234, "loss": 0.2213, "step": 118870 }, { "epoch": 4.92, "grad_norm": 1.0390625, "learning_rate": 0.00046753907698827746, "loss": 0.2604, "step": 118880 }, { "epoch": 4.92, "grad_norm": 0.65625, "learning_rate": 0.0004675337325339175, "loss": 0.1917, "step": 118890 }, { "epoch": 4.92, "grad_norm": 0.5546875, "learning_rate": 0.00046752838767018256, "loss": 0.1847, "step": 118900 }, { "epoch": 4.93, "grad_norm": 0.57421875, "learning_rate": 0.00046752304239708277, "loss": 0.2427, "step": 118910 }, { "epoch": 4.93, "grad_norm": 0.498046875, "learning_rate": 0.000467517696714628, "loss": 0.2255, "step": 118920 }, { "epoch": 4.93, "grad_norm": 1.1484375, "learning_rate": 0.00046751235062282847, "loss": 0.1692, "step": 118930 }, { "epoch": 4.93, "grad_norm": 0.6328125, "learning_rate": 0.0004675070041216942, "loss": 0.249, "step": 118940 }, { "epoch": 4.93, "grad_norm": 0.51953125, "learning_rate": 0.0004675016572112352, "loss": 0.2362, "step": 118950 }, { "epoch": 4.93, "grad_norm": 0.0, "learning_rate": 0.0004674963098914616, "loss": 0.188, "step": 118960 }, { "epoch": 4.93, "grad_norm": 0.72265625, "learning_rate": 0.0004674909621623834, "loss": 0.1553, "step": 118970 }, { "epoch": 4.93, "grad_norm": 0.6015625, "learning_rate": 0.0004674856140240107, "loss": 0.2011, "step": 118980 }, { "epoch": 4.93, "grad_norm": 2.390625, "learning_rate": 0.00046748026547635356, "loss": 0.2105, "step": 118990 }, { "epoch": 4.93, "grad_norm": 4.875, "learning_rate": 0.0004674749165194221, "loss": 0.2692, "step": 119000 }, { "epoch": 4.93, "grad_norm": 0.7421875, "learning_rate": 0.0004674695671532262, "loss": 0.2017, "step": 119010 }, { "epoch": 4.93, "grad_norm": 1.0625, "learning_rate": 0.00046746421737777616, "loss": 0.2078, "step": 119020 }, { "epoch": 4.93, "grad_norm": 0.234375, "learning_rate": 0.00046745886719308196, "loss": 0.1501, "step": 119030 }, { "epoch": 4.93, "grad_norm": 0.578125, "learning_rate": 0.0004674535165991536, "loss": 0.1742, "step": 119040 }, { "epoch": 4.93, "grad_norm": 0.7265625, "learning_rate": 0.00046744816559600124, "loss": 0.2141, "step": 119050 }, { "epoch": 4.93, "grad_norm": 0.490234375, "learning_rate": 0.0004674428141836349, "loss": 0.236, "step": 119060 }, { "epoch": 4.93, "grad_norm": 0.546875, "learning_rate": 0.00046743746236206467, "loss": 0.1868, "step": 119070 }, { "epoch": 4.93, "grad_norm": 0.515625, "learning_rate": 0.00046743211013130066, "loss": 0.1917, "step": 119080 }, { "epoch": 4.93, "grad_norm": 1.140625, "learning_rate": 0.0004674267574913529, "loss": 0.2062, "step": 119090 }, { "epoch": 4.93, "grad_norm": 0.73828125, "learning_rate": 0.00046742140444223137, "loss": 0.2037, "step": 119100 }, { "epoch": 4.93, "grad_norm": 0.7890625, "learning_rate": 0.0004674160509839463, "loss": 0.1871, "step": 119110 }, { "epoch": 4.93, "grad_norm": 0.33984375, "learning_rate": 0.00046741069711650767, "loss": 0.2315, "step": 119120 }, { "epoch": 4.93, "grad_norm": 0.47265625, "learning_rate": 0.00046740534283992567, "loss": 0.2087, "step": 119130 }, { "epoch": 4.93, "grad_norm": 0.765625, "learning_rate": 0.00046739998815421017, "loss": 0.1726, "step": 119140 }, { "epoch": 4.94, "grad_norm": 0.5703125, "learning_rate": 0.00046739463305937146, "loss": 0.1926, "step": 119150 }, { "epoch": 4.94, "grad_norm": 0.91796875, "learning_rate": 0.0004673892775554195, "loss": 0.2438, "step": 119160 }, { "epoch": 4.94, "grad_norm": 1.125, "learning_rate": 0.0004673839216423643, "loss": 0.2302, "step": 119170 }, { "epoch": 4.94, "grad_norm": 0.0004634857177734375, "learning_rate": 0.00046737856532021616, "loss": 0.148, "step": 119180 }, { "epoch": 4.94, "grad_norm": 0.60546875, "learning_rate": 0.00046737320858898505, "loss": 0.1991, "step": 119190 }, { "epoch": 4.94, "grad_norm": 0.27734375, "learning_rate": 0.000467367851448681, "loss": 0.1914, "step": 119200 }, { "epoch": 4.94, "grad_norm": 0.5625, "learning_rate": 0.00046736249389931405, "loss": 0.2032, "step": 119210 }, { "epoch": 4.94, "grad_norm": 0.76953125, "learning_rate": 0.00046735713594089445, "loss": 0.2052, "step": 119220 }, { "epoch": 4.94, "grad_norm": 0.625, "learning_rate": 0.00046735177757343215, "loss": 0.2335, "step": 119230 }, { "epoch": 4.94, "grad_norm": 0.6953125, "learning_rate": 0.0004673464187969373, "loss": 0.1777, "step": 119240 }, { "epoch": 4.94, "grad_norm": 0.671875, "learning_rate": 0.00046734105961141994, "loss": 0.2043, "step": 119250 }, { "epoch": 4.94, "grad_norm": 0.4453125, "learning_rate": 0.0004673357000168902, "loss": 0.2313, "step": 119260 }, { "epoch": 4.94, "grad_norm": 0.84375, "learning_rate": 0.0004673303400133582, "loss": 0.2295, "step": 119270 }, { "epoch": 4.94, "grad_norm": 1.03125, "learning_rate": 0.00046732497960083385, "loss": 0.2293, "step": 119280 }, { "epoch": 4.94, "grad_norm": 0.515625, "learning_rate": 0.00046731961877932737, "loss": 0.19, "step": 119290 }, { "epoch": 4.94, "grad_norm": 0.71875, "learning_rate": 0.0004673142575488489, "loss": 0.231, "step": 119300 }, { "epoch": 4.94, "grad_norm": 0.703125, "learning_rate": 0.00046730889590940845, "loss": 0.235, "step": 119310 }, { "epoch": 4.94, "grad_norm": 1.15625, "learning_rate": 0.00046730353386101607, "loss": 0.21, "step": 119320 }, { "epoch": 4.94, "grad_norm": 0.470703125, "learning_rate": 0.00046729817140368204, "loss": 0.1776, "step": 119330 }, { "epoch": 4.94, "grad_norm": 0.87890625, "learning_rate": 0.0004672928085374162, "loss": 0.2692, "step": 119340 }, { "epoch": 4.94, "grad_norm": 0.458984375, "learning_rate": 0.00046728744526222877, "loss": 0.19, "step": 119350 }, { "epoch": 4.94, "grad_norm": 0.40625, "learning_rate": 0.00046728208157812985, "loss": 0.1623, "step": 119360 }, { "epoch": 4.94, "grad_norm": 0.6953125, "learning_rate": 0.00046727671748512955, "loss": 0.2465, "step": 119370 }, { "epoch": 4.94, "grad_norm": 1.2421875, "learning_rate": 0.00046727135298323786, "loss": 0.2212, "step": 119380 }, { "epoch": 4.95, "grad_norm": 0.78515625, "learning_rate": 0.000467265988072465, "loss": 0.1768, "step": 119390 }, { "epoch": 4.95, "grad_norm": 0.232421875, "learning_rate": 0.000467260622752821, "loss": 0.1905, "step": 119400 }, { "epoch": 4.95, "grad_norm": 0.67578125, "learning_rate": 0.00046725525702431597, "loss": 0.2511, "step": 119410 }, { "epoch": 4.95, "grad_norm": 0.7109375, "learning_rate": 0.00046724989088696, "loss": 0.2194, "step": 119420 }, { "epoch": 4.95, "grad_norm": 0.60546875, "learning_rate": 0.00046724452434076326, "loss": 0.2171, "step": 119430 }, { "epoch": 4.95, "grad_norm": 0.90625, "learning_rate": 0.0004672391573857357, "loss": 0.2031, "step": 119440 }, { "epoch": 4.95, "grad_norm": 0.8203125, "learning_rate": 0.00046723379002188754, "loss": 0.1259, "step": 119450 }, { "epoch": 4.95, "grad_norm": 0.65234375, "learning_rate": 0.0004672284222492288, "loss": 0.2505, "step": 119460 }, { "epoch": 4.95, "grad_norm": 0.83203125, "learning_rate": 0.0004672230540677697, "loss": 0.2099, "step": 119470 }, { "epoch": 4.95, "grad_norm": 0.640625, "learning_rate": 0.00046721768547752025, "loss": 0.1788, "step": 119480 }, { "epoch": 4.95, "grad_norm": 0.625, "learning_rate": 0.00046721231647849055, "loss": 0.2045, "step": 119490 }, { "epoch": 4.95, "grad_norm": 0.66796875, "learning_rate": 0.00046720694707069076, "loss": 0.1804, "step": 119500 }, { "epoch": 4.95, "grad_norm": 1.046875, "learning_rate": 0.00046720157725413093, "loss": 0.2526, "step": 119510 }, { "epoch": 4.95, "grad_norm": 1.3359375, "learning_rate": 0.00046719620702882113, "loss": 0.2729, "step": 119520 }, { "epoch": 4.95, "grad_norm": 0.6875, "learning_rate": 0.00046719083639477167, "loss": 0.2464, "step": 119530 }, { "epoch": 4.95, "grad_norm": 0.7734375, "learning_rate": 0.00046718546535199235, "loss": 0.217, "step": 119540 }, { "epoch": 4.95, "grad_norm": 0.5859375, "learning_rate": 0.00046718009390049354, "loss": 0.1941, "step": 119550 }, { "epoch": 4.95, "grad_norm": 0.6015625, "learning_rate": 0.0004671747220402852, "loss": 0.2238, "step": 119560 }, { "epoch": 4.95, "grad_norm": 0.36328125, "learning_rate": 0.0004671693497713775, "loss": 0.2069, "step": 119570 }, { "epoch": 4.95, "grad_norm": 0.74609375, "learning_rate": 0.00046716397709378055, "loss": 0.2041, "step": 119580 }, { "epoch": 4.95, "grad_norm": 0.73046875, "learning_rate": 0.0004671586040075045, "loss": 0.1852, "step": 119590 }, { "epoch": 4.95, "grad_norm": 0.59765625, "learning_rate": 0.00046715323051255934, "loss": 0.2246, "step": 119600 }, { "epoch": 4.95, "grad_norm": 0.37890625, "learning_rate": 0.0004671478566089552, "loss": 0.2382, "step": 119610 }, { "epoch": 4.95, "grad_norm": 1.3671875, "learning_rate": 0.0004671424822967023, "loss": 0.2053, "step": 119620 }, { "epoch": 4.96, "grad_norm": 0.5546875, "learning_rate": 0.00046713710757581074, "loss": 0.2478, "step": 119630 }, { "epoch": 4.96, "grad_norm": 0.435546875, "learning_rate": 0.0004671317324462906, "loss": 0.1985, "step": 119640 }, { "epoch": 4.96, "grad_norm": 0.94140625, "learning_rate": 0.00046712635690815185, "loss": 0.2049, "step": 119650 }, { "epoch": 4.96, "grad_norm": 0.81640625, "learning_rate": 0.0004671209809614049, "loss": 0.2086, "step": 119660 }, { "epoch": 4.96, "grad_norm": 1.03125, "learning_rate": 0.0004671156046060596, "loss": 0.2089, "step": 119670 }, { "epoch": 4.96, "grad_norm": 1.0, "learning_rate": 0.00046711022784212626, "loss": 0.2136, "step": 119680 }, { "epoch": 4.96, "grad_norm": 0.66015625, "learning_rate": 0.00046710485066961486, "loss": 0.2383, "step": 119690 }, { "epoch": 4.96, "grad_norm": 0.69140625, "learning_rate": 0.00046709947308853564, "loss": 0.1375, "step": 119700 }, { "epoch": 4.96, "grad_norm": 0.31640625, "learning_rate": 0.0004670940950988986, "loss": 0.1746, "step": 119710 }, { "epoch": 4.96, "grad_norm": 0.640625, "learning_rate": 0.0004670887167007139, "loss": 0.1984, "step": 119720 }, { "epoch": 4.96, "grad_norm": 0.6640625, "learning_rate": 0.00046708333789399173, "loss": 0.1965, "step": 119730 }, { "epoch": 4.96, "grad_norm": 1.3046875, "learning_rate": 0.0004670779586787421, "loss": 0.2305, "step": 119740 }, { "epoch": 4.96, "grad_norm": 0.279296875, "learning_rate": 0.00046707257905497526, "loss": 0.2104, "step": 119750 }, { "epoch": 4.96, "grad_norm": 0.55078125, "learning_rate": 0.00046706719902270125, "loss": 0.1979, "step": 119760 }, { "epoch": 4.96, "grad_norm": 0.306640625, "learning_rate": 0.0004670618185819302, "loss": 0.1989, "step": 119770 }, { "epoch": 4.96, "grad_norm": 1.0234375, "learning_rate": 0.00046705643773267225, "loss": 0.2098, "step": 119780 }, { "epoch": 4.96, "grad_norm": 0.81640625, "learning_rate": 0.00046705105647493753, "loss": 0.1724, "step": 119790 }, { "epoch": 4.96, "grad_norm": 0.6953125, "learning_rate": 0.00046704567480873615, "loss": 0.1859, "step": 119800 }, { "epoch": 4.96, "grad_norm": 1.28125, "learning_rate": 0.0004670402927340783, "loss": 0.2459, "step": 119810 }, { "epoch": 4.96, "grad_norm": 0.81640625, "learning_rate": 0.000467034910250974, "loss": 0.1854, "step": 119820 }, { "epoch": 4.96, "grad_norm": 0.181640625, "learning_rate": 0.00046702952735943346, "loss": 0.1654, "step": 119830 }, { "epoch": 4.96, "grad_norm": 1.71875, "learning_rate": 0.00046702414405946674, "loss": 0.2268, "step": 119840 }, { "epoch": 4.96, "grad_norm": 0.89453125, "learning_rate": 0.00046701876035108405, "loss": 0.169, "step": 119850 }, { "epoch": 4.96, "grad_norm": 1.078125, "learning_rate": 0.0004670133762342955, "loss": 0.2051, "step": 119860 }, { "epoch": 4.97, "grad_norm": 0.423828125, "learning_rate": 0.00046700799170911123, "loss": 0.202, "step": 119870 }, { "epoch": 4.97, "grad_norm": 0.6015625, "learning_rate": 0.00046700260677554127, "loss": 0.2125, "step": 119880 }, { "epoch": 4.97, "grad_norm": 0.75, "learning_rate": 0.00046699722143359593, "loss": 0.1807, "step": 119890 }, { "epoch": 4.97, "grad_norm": 0.83984375, "learning_rate": 0.00046699183568328526, "loss": 0.2171, "step": 119900 }, { "epoch": 4.97, "grad_norm": 0.88671875, "learning_rate": 0.0004669864495246193, "loss": 0.2318, "step": 119910 }, { "epoch": 4.97, "grad_norm": 0.49609375, "learning_rate": 0.00046698106295760836, "loss": 0.1742, "step": 119920 }, { "epoch": 4.97, "grad_norm": 0.6328125, "learning_rate": 0.0004669756759822624, "loss": 0.1854, "step": 119930 }, { "epoch": 4.97, "grad_norm": 1.2109375, "learning_rate": 0.00046697028859859173, "loss": 0.2247, "step": 119940 }, { "epoch": 4.97, "grad_norm": 0.3671875, "learning_rate": 0.0004669649008066064, "loss": 0.2185, "step": 119950 }, { "epoch": 4.97, "grad_norm": 0.431640625, "learning_rate": 0.0004669595126063165, "loss": 0.1947, "step": 119960 }, { "epoch": 4.97, "grad_norm": 1.28125, "learning_rate": 0.0004669541239977323, "loss": 0.1764, "step": 119970 }, { "epoch": 4.97, "grad_norm": 0.51953125, "learning_rate": 0.00046694873498086384, "loss": 0.228, "step": 119980 }, { "epoch": 4.97, "grad_norm": 0.435546875, "learning_rate": 0.0004669433455557213, "loss": 0.2079, "step": 119990 }, { "epoch": 4.97, "grad_norm": 0.90234375, "learning_rate": 0.00046693795572231477, "loss": 0.1636, "step": 120000 }, { "epoch": 4.97, "grad_norm": 0.32421875, "learning_rate": 0.0004669325654806544, "loss": 0.1951, "step": 120010 }, { "epoch": 4.97, "grad_norm": 0.73828125, "learning_rate": 0.0004669271748307505, "loss": 0.2226, "step": 120020 }, { "epoch": 4.97, "grad_norm": 1.03125, "learning_rate": 0.000466921783772613, "loss": 0.2044, "step": 120030 }, { "epoch": 4.97, "grad_norm": 0.50390625, "learning_rate": 0.00046691639230625215, "loss": 0.2333, "step": 120040 }, { "epoch": 4.97, "grad_norm": 2.265625, "learning_rate": 0.00046691100043167805, "loss": 0.2084, "step": 120050 }, { "epoch": 4.97, "grad_norm": 1.1953125, "learning_rate": 0.0004669056081489009, "loss": 0.2377, "step": 120060 }, { "epoch": 4.97, "grad_norm": 0.59765625, "learning_rate": 0.0004669002154579308, "loss": 0.29, "step": 120070 }, { "epoch": 4.97, "grad_norm": 0.392578125, "learning_rate": 0.0004668948223587779, "loss": 0.1755, "step": 120080 }, { "epoch": 4.97, "grad_norm": 0.26953125, "learning_rate": 0.0004668894288514524, "loss": 0.2263, "step": 120090 }, { "epoch": 4.97, "grad_norm": 0.435546875, "learning_rate": 0.0004668840349359644, "loss": 0.2052, "step": 120100 }, { "epoch": 4.97, "grad_norm": 0.6796875, "learning_rate": 0.0004668786406123241, "loss": 0.2532, "step": 120110 }, { "epoch": 4.98, "grad_norm": 0.9140625, "learning_rate": 0.0004668732458805416, "loss": 0.2061, "step": 120120 }, { "epoch": 4.98, "grad_norm": 0.87109375, "learning_rate": 0.000466867850740627, "loss": 0.1965, "step": 120130 }, { "epoch": 4.98, "grad_norm": 0.69140625, "learning_rate": 0.0004668624551925906, "loss": 0.2004, "step": 120140 }, { "epoch": 4.98, "grad_norm": 0.205078125, "learning_rate": 0.0004668570592364425, "loss": 0.2417, "step": 120150 }, { "epoch": 4.98, "grad_norm": 0.31640625, "learning_rate": 0.00046685166287219285, "loss": 0.1935, "step": 120160 }, { "epoch": 4.98, "grad_norm": 0.2080078125, "learning_rate": 0.0004668462660998517, "loss": 0.1748, "step": 120170 }, { "epoch": 4.98, "grad_norm": 0.828125, "learning_rate": 0.00046684086891942935, "loss": 0.2191, "step": 120180 }, { "epoch": 4.98, "grad_norm": 0.470703125, "learning_rate": 0.00046683547133093584, "loss": 0.2575, "step": 120190 }, { "epoch": 4.98, "grad_norm": 1.0078125, "learning_rate": 0.00046683007333438143, "loss": 0.2082, "step": 120200 }, { "epoch": 4.98, "grad_norm": 0.458984375, "learning_rate": 0.00046682467492977625, "loss": 0.2376, "step": 120210 }, { "epoch": 4.98, "grad_norm": 1.4375, "learning_rate": 0.0004668192761171305, "loss": 0.2537, "step": 120220 }, { "epoch": 4.98, "grad_norm": 0.63671875, "learning_rate": 0.00046681387689645416, "loss": 0.2194, "step": 120230 }, { "epoch": 4.98, "grad_norm": 0.78125, "learning_rate": 0.00046680847726775763, "loss": 0.2292, "step": 120240 }, { "epoch": 4.98, "grad_norm": 0.47265625, "learning_rate": 0.0004668030772310509, "loss": 0.1662, "step": 120250 }, { "epoch": 4.98, "grad_norm": 0.66796875, "learning_rate": 0.0004667976767863442, "loss": 0.1897, "step": 120260 }, { "epoch": 4.98, "grad_norm": 0.2060546875, "learning_rate": 0.00046679227593364773, "loss": 0.2263, "step": 120270 }, { "epoch": 4.98, "grad_norm": 0.0, "learning_rate": 0.00046678687467297154, "loss": 0.2444, "step": 120280 }, { "epoch": 4.98, "grad_norm": 1.0, "learning_rate": 0.00046678147300432585, "loss": 0.2121, "step": 120290 }, { "epoch": 4.98, "grad_norm": 0.86328125, "learning_rate": 0.0004667760709277209, "loss": 0.2257, "step": 120300 }, { "epoch": 4.98, "grad_norm": 1.21875, "learning_rate": 0.00046677066844316674, "loss": 0.2265, "step": 120310 }, { "epoch": 4.98, "grad_norm": 0.64453125, "learning_rate": 0.0004667652655506737, "loss": 0.2269, "step": 120320 }, { "epoch": 4.98, "grad_norm": 0.515625, "learning_rate": 0.0004667598622502517, "loss": 0.2166, "step": 120330 }, { "epoch": 4.98, "grad_norm": 0.68359375, "learning_rate": 0.00046675445854191114, "loss": 0.209, "step": 120340 }, { "epoch": 4.98, "grad_norm": 0.72265625, "learning_rate": 0.0004667490544256621, "loss": 0.2366, "step": 120350 }, { "epoch": 4.99, "grad_norm": 0.53515625, "learning_rate": 0.00046674364990151464, "loss": 0.2299, "step": 120360 }, { "epoch": 4.99, "grad_norm": 0.4609375, "learning_rate": 0.0004667382449694791, "loss": 0.1972, "step": 120370 }, { "epoch": 4.99, "grad_norm": 4.125, "learning_rate": 0.0004667328396295656, "loss": 0.2156, "step": 120380 }, { "epoch": 4.99, "grad_norm": 0.4453125, "learning_rate": 0.0004667274338817843, "loss": 0.1833, "step": 120390 }, { "epoch": 4.99, "grad_norm": 0.71484375, "learning_rate": 0.00046672202772614536, "loss": 0.1728, "step": 120400 }, { "epoch": 4.99, "grad_norm": 2.078125, "learning_rate": 0.000466716621162659, "loss": 0.2233, "step": 120410 }, { "epoch": 4.99, "grad_norm": 1.125, "learning_rate": 0.0004667112141913353, "loss": 0.2408, "step": 120420 }, { "epoch": 4.99, "grad_norm": 0.400390625, "learning_rate": 0.0004667058068121846, "loss": 0.2064, "step": 120430 }, { "epoch": 4.99, "grad_norm": 0.486328125, "learning_rate": 0.0004667003990252169, "loss": 0.1877, "step": 120440 }, { "epoch": 4.99, "grad_norm": 1.6484375, "learning_rate": 0.00046669499083044246, "loss": 0.2018, "step": 120450 }, { "epoch": 4.99, "grad_norm": 0.59765625, "learning_rate": 0.00046668958222787144, "loss": 0.1872, "step": 120460 }, { "epoch": 4.99, "grad_norm": 1.2265625, "learning_rate": 0.0004666841732175141, "loss": 0.2439, "step": 120470 }, { "epoch": 4.99, "grad_norm": 0.44921875, "learning_rate": 0.00046667876379938045, "loss": 0.2429, "step": 120480 }, { "epoch": 4.99, "grad_norm": 0.703125, "learning_rate": 0.00046667335397348076, "loss": 0.2325, "step": 120490 }, { "epoch": 4.99, "grad_norm": 0.25, "learning_rate": 0.0004666679437398253, "loss": 0.1812, "step": 120500 }, { "epoch": 4.99, "grad_norm": 0.462890625, "learning_rate": 0.0004666625330984241, "loss": 0.1836, "step": 120510 }, { "epoch": 4.99, "grad_norm": 2.25, "learning_rate": 0.00046665712204928744, "loss": 0.2656, "step": 120520 }, { "epoch": 4.99, "grad_norm": 1.28125, "learning_rate": 0.0004666517105924255, "loss": 0.2353, "step": 120530 }, { "epoch": 4.99, "grad_norm": 0.59375, "learning_rate": 0.00046664629872784837, "loss": 0.215, "step": 120540 }, { "epoch": 4.99, "grad_norm": 0.72265625, "learning_rate": 0.00046664088645556633, "loss": 0.1669, "step": 120550 }, { "epoch": 4.99, "grad_norm": 1.1015625, "learning_rate": 0.0004666354737755896, "loss": 0.1966, "step": 120560 }, { "epoch": 4.99, "grad_norm": 0.55859375, "learning_rate": 0.00046663006068792826, "loss": 0.2193, "step": 120570 }, { "epoch": 4.99, "grad_norm": 0.859375, "learning_rate": 0.00046662464719259244, "loss": 0.1603, "step": 120580 }, { "epoch": 4.99, "grad_norm": 0.4453125, "learning_rate": 0.00046661923328959254, "loss": 0.1765, "step": 120590 }, { "epoch": 5.0, "grad_norm": 2.390625, "learning_rate": 0.0004666138189789386, "loss": 0.2556, "step": 120600 }, { "epoch": 5.0, "grad_norm": 0.5546875, "learning_rate": 0.0004666084042606409, "loss": 0.1317, "step": 120610 }, { "epoch": 5.0, "grad_norm": 0.2451171875, "learning_rate": 0.00046660298913470957, "loss": 0.1899, "step": 120620 }, { "epoch": 5.0, "grad_norm": 0.55078125, "learning_rate": 0.0004665975736011547, "loss": 0.1869, "step": 120630 }, { "epoch": 5.0, "grad_norm": 0.76171875, "learning_rate": 0.0004665921576599867, "loss": 0.1985, "step": 120640 }, { "epoch": 5.0, "grad_norm": 1.125, "learning_rate": 0.0004665867413112156, "loss": 0.2366, "step": 120650 }, { "epoch": 5.0, "grad_norm": 0.703125, "learning_rate": 0.00046658132455485156, "loss": 0.1787, "step": 120660 }, { "epoch": 5.0, "grad_norm": 0.49609375, "learning_rate": 0.00046657590739090503, "loss": 0.1781, "step": 120670 }, { "epoch": 5.0, "grad_norm": 0.5625, "learning_rate": 0.0004665704898193859, "loss": 0.1872, "step": 120680 }, { "epoch": 5.0, "grad_norm": 0.45703125, "learning_rate": 0.00046656507184030454, "loss": 0.2261, "step": 120690 }, { "epoch": 5.0, "grad_norm": 0.625, "learning_rate": 0.0004665596534536711, "loss": 0.1959, "step": 120700 }, { "epoch": 5.0, "grad_norm": 0.1923828125, "learning_rate": 0.00046655423465949577, "loss": 0.188, "step": 120710 }, { "epoch": 5.0, "grad_norm": 0.65625, "learning_rate": 0.0004665488154577888, "loss": 0.2252, "step": 120720 }, { "epoch": 5.0, "grad_norm": 0.53515625, "learning_rate": 0.0004665433958485603, "loss": 0.1994, "step": 120730 }, { "epoch": 5.0, "grad_norm": 0.80078125, "learning_rate": 0.0004665379758318205, "loss": 0.1825, "step": 120740 }, { "epoch": 5.0, "grad_norm": 1.2734375, "learning_rate": 0.00046653255540757965, "loss": 0.1938, "step": 120750 }, { "epoch": 5.0, "grad_norm": 1.3515625, "learning_rate": 0.00046652713457584797, "loss": 0.1626, "step": 120760 }, { "epoch": 5.0, "grad_norm": 0.984375, "learning_rate": 0.00046652171333663544, "loss": 0.1977, "step": 120770 }, { "epoch": 5.0, "grad_norm": 0.5234375, "learning_rate": 0.00046651629168995256, "loss": 0.1632, "step": 120780 }, { "epoch": 5.0, "grad_norm": 0.400390625, "learning_rate": 0.00046651086963580933, "loss": 0.1447, "step": 120790 }, { "epoch": 5.0, "grad_norm": 0.2216796875, "learning_rate": 0.00046650544717421604, "loss": 0.1765, "step": 120800 }, { "epoch": 5.0, "grad_norm": 1.0859375, "learning_rate": 0.0004665000243051829, "loss": 0.2307, "step": 120810 }, { "epoch": 5.0, "grad_norm": 0.76171875, "learning_rate": 0.0004664946010287201, "loss": 0.2311, "step": 120820 }, { "epoch": 5.0, "grad_norm": 0.50390625, "learning_rate": 0.00046648917734483787, "loss": 0.1729, "step": 120830 }, { "epoch": 5.01, "grad_norm": 0.82421875, "learning_rate": 0.0004664837532535463, "loss": 0.2345, "step": 120840 }, { "epoch": 5.01, "grad_norm": 0.85546875, "learning_rate": 0.00046647832875485574, "loss": 0.224, "step": 120850 }, { "epoch": 5.01, "grad_norm": 0.85546875, "learning_rate": 0.00046647290384877637, "loss": 0.2309, "step": 120860 }, { "epoch": 5.01, "grad_norm": 0.57421875, "learning_rate": 0.00046646747853531834, "loss": 0.2506, "step": 120870 }, { "epoch": 5.01, "grad_norm": 0.419921875, "learning_rate": 0.0004664620528144919, "loss": 0.1849, "step": 120880 }, { "epoch": 5.01, "grad_norm": 1.0078125, "learning_rate": 0.00046645662668630725, "loss": 0.1615, "step": 120890 }, { "epoch": 5.01, "grad_norm": 0.51953125, "learning_rate": 0.0004664512001507746, "loss": 0.206, "step": 120900 }, { "epoch": 5.01, "grad_norm": 0.41015625, "learning_rate": 0.0004664457732079042, "loss": 0.2335, "step": 120910 }, { "epoch": 5.01, "grad_norm": 0.486328125, "learning_rate": 0.0004664403458577061, "loss": 0.212, "step": 120920 }, { "epoch": 5.01, "grad_norm": 0.86328125, "learning_rate": 0.00046643491810019076, "loss": 0.2294, "step": 120930 }, { "epoch": 5.01, "grad_norm": 1.2421875, "learning_rate": 0.0004664294899353683, "loss": 0.1667, "step": 120940 }, { "epoch": 5.01, "grad_norm": 0.490234375, "learning_rate": 0.00046642406136324885, "loss": 0.2102, "step": 120950 }, { "epoch": 5.01, "grad_norm": 0.72265625, "learning_rate": 0.00046641863238384263, "loss": 0.2137, "step": 120960 }, { "epoch": 5.01, "grad_norm": 0.7578125, "learning_rate": 0.00046641320299716004, "loss": 0.1813, "step": 120970 }, { "epoch": 5.01, "grad_norm": 0.482421875, "learning_rate": 0.00046640777320321116, "loss": 0.1878, "step": 120980 }, { "epoch": 5.01, "grad_norm": 0.5625, "learning_rate": 0.0004664023430020061, "loss": 0.182, "step": 120990 }, { "epoch": 5.01, "grad_norm": 0.9765625, "learning_rate": 0.0004663969123935553, "loss": 0.1721, "step": 121000 }, { "epoch": 5.01, "grad_norm": 1.1328125, "learning_rate": 0.0004663914813778689, "loss": 0.1487, "step": 121010 }, { "epoch": 5.01, "grad_norm": 1.140625, "learning_rate": 0.00046638604995495706, "loss": 0.2151, "step": 121020 }, { "epoch": 5.01, "grad_norm": 0.67578125, "learning_rate": 0.00046638061812483004, "loss": 0.2017, "step": 121030 }, { "epoch": 5.01, "grad_norm": 0.58203125, "learning_rate": 0.00046637518588749804, "loss": 0.2139, "step": 121040 }, { "epoch": 5.01, "grad_norm": 0.515625, "learning_rate": 0.0004663697532429713, "loss": 0.1804, "step": 121050 }, { "epoch": 5.01, "grad_norm": 1.0234375, "learning_rate": 0.00046636432019126015, "loss": 0.14, "step": 121060 }, { "epoch": 5.01, "grad_norm": 0.88671875, "learning_rate": 0.0004663588867323746, "loss": 0.1531, "step": 121070 }, { "epoch": 5.02, "grad_norm": 0.78125, "learning_rate": 0.00046635345286632503, "loss": 0.2093, "step": 121080 }, { "epoch": 5.02, "grad_norm": 0.671875, "learning_rate": 0.00046634801859312167, "loss": 0.264, "step": 121090 }, { "epoch": 5.02, "grad_norm": 1.15625, "learning_rate": 0.00046634258391277465, "loss": 0.2324, "step": 121100 }, { "epoch": 5.02, "grad_norm": 0.72265625, "learning_rate": 0.00046633714882529426, "loss": 0.154, "step": 121110 }, { "epoch": 5.02, "grad_norm": 0.6640625, "learning_rate": 0.0004663317133306907, "loss": 0.1995, "step": 121120 }, { "epoch": 5.02, "grad_norm": 0.40625, "learning_rate": 0.00046632627742897425, "loss": 0.2082, "step": 121130 }, { "epoch": 5.02, "grad_norm": 0.9765625, "learning_rate": 0.0004663208411201552, "loss": 0.1924, "step": 121140 }, { "epoch": 5.02, "grad_norm": 0.45703125, "learning_rate": 0.00046631540440424346, "loss": 0.2032, "step": 121150 }, { "epoch": 5.02, "grad_norm": 0.53515625, "learning_rate": 0.00046630996728124966, "loss": 0.1955, "step": 121160 }, { "epoch": 5.02, "grad_norm": 1.1015625, "learning_rate": 0.0004663045297511838, "loss": 0.1894, "step": 121170 }, { "epoch": 5.02, "grad_norm": 0.51953125, "learning_rate": 0.00046629909181405624, "loss": 0.2341, "step": 121180 }, { "epoch": 5.02, "grad_norm": 0.90234375, "learning_rate": 0.00046629365346987706, "loss": 0.1922, "step": 121190 }, { "epoch": 5.02, "grad_norm": 0.56640625, "learning_rate": 0.0004662882147186566, "loss": 0.2318, "step": 121200 }, { "epoch": 5.02, "grad_norm": 1.171875, "learning_rate": 0.0004662827755604051, "loss": 0.2451, "step": 121210 }, { "epoch": 5.02, "grad_norm": 0.88671875, "learning_rate": 0.0004662773359951328, "loss": 0.1811, "step": 121220 }, { "epoch": 5.02, "grad_norm": 0.73828125, "learning_rate": 0.00046627189602284987, "loss": 0.2059, "step": 121230 }, { "epoch": 5.02, "grad_norm": 0.640625, "learning_rate": 0.0004662664556435666, "loss": 0.2177, "step": 121240 }, { "epoch": 5.02, "grad_norm": 2.0625, "learning_rate": 0.0004662610148572932, "loss": 0.2587, "step": 121250 }, { "epoch": 5.02, "grad_norm": 0.91796875, "learning_rate": 0.00046625557366403994, "loss": 0.1885, "step": 121260 }, { "epoch": 5.02, "grad_norm": 1.9296875, "learning_rate": 0.00046625013206381705, "loss": 0.1966, "step": 121270 }, { "epoch": 5.02, "grad_norm": 1.5078125, "learning_rate": 0.0004662446900566347, "loss": 0.2202, "step": 121280 }, { "epoch": 5.02, "grad_norm": 0.5234375, "learning_rate": 0.00046623924764250326, "loss": 0.216, "step": 121290 }, { "epoch": 5.02, "grad_norm": 0.6484375, "learning_rate": 0.0004662338048214329, "loss": 0.2189, "step": 121300 }, { "epoch": 5.02, "grad_norm": 0.375, "learning_rate": 0.0004662283615934339, "loss": 0.217, "step": 121310 }, { "epoch": 5.03, "grad_norm": 1.09375, "learning_rate": 0.00046622291795851646, "loss": 0.2265, "step": 121320 }, { "epoch": 5.03, "grad_norm": 0.76171875, "learning_rate": 0.0004662174739166908, "loss": 0.245, "step": 121330 }, { "epoch": 5.03, "grad_norm": 0.455078125, "learning_rate": 0.0004662120294679672, "loss": 0.2073, "step": 121340 }, { "epoch": 5.03, "grad_norm": 0.29296875, "learning_rate": 0.00046620658461235596, "loss": 0.1524, "step": 121350 }, { "epoch": 5.03, "grad_norm": 0.314453125, "learning_rate": 0.0004662011393498672, "loss": 0.206, "step": 121360 }, { "epoch": 5.03, "grad_norm": 0.5546875, "learning_rate": 0.00046619569368051134, "loss": 0.2014, "step": 121370 }, { "epoch": 5.03, "grad_norm": 0.5234375, "learning_rate": 0.0004661902476042985, "loss": 0.2266, "step": 121380 }, { "epoch": 5.03, "grad_norm": 0.84375, "learning_rate": 0.000466184801121239, "loss": 0.2302, "step": 121390 }, { "epoch": 5.03, "grad_norm": 0.5234375, "learning_rate": 0.00046617935423134293, "loss": 0.2336, "step": 121400 }, { "epoch": 5.03, "grad_norm": 0.546875, "learning_rate": 0.0004661739069346207, "loss": 0.2105, "step": 121410 }, { "epoch": 5.03, "grad_norm": 0.65625, "learning_rate": 0.0004661684592310825, "loss": 0.1569, "step": 121420 }, { "epoch": 5.03, "grad_norm": 0.8125, "learning_rate": 0.00046616301112073873, "loss": 0.1696, "step": 121430 }, { "epoch": 5.03, "grad_norm": 0.1396484375, "learning_rate": 0.0004661575626035994, "loss": 0.1872, "step": 121440 }, { "epoch": 5.03, "grad_norm": 0.2490234375, "learning_rate": 0.0004661521136796749, "loss": 0.2681, "step": 121450 }, { "epoch": 5.03, "grad_norm": 0.326171875, "learning_rate": 0.00046614666434897554, "loss": 0.2162, "step": 121460 }, { "epoch": 5.03, "grad_norm": 0.98828125, "learning_rate": 0.00046614121461151145, "loss": 0.202, "step": 121470 }, { "epoch": 5.03, "grad_norm": 0.6640625, "learning_rate": 0.00046613576446729286, "loss": 0.1707, "step": 121480 }, { "epoch": 5.03, "grad_norm": 0.6875, "learning_rate": 0.0004661303139163302, "loss": 0.1897, "step": 121490 }, { "epoch": 5.03, "grad_norm": 1.40625, "learning_rate": 0.0004661248629586336, "loss": 0.1558, "step": 121500 }, { "epoch": 5.03, "grad_norm": 1.125, "learning_rate": 0.00046611941159421333, "loss": 0.2191, "step": 121510 }, { "epoch": 5.03, "grad_norm": 0.349609375, "learning_rate": 0.0004661139598230797, "loss": 0.1856, "step": 121520 }, { "epoch": 5.03, "grad_norm": 1.734375, "learning_rate": 0.0004661085076452429, "loss": 0.2339, "step": 121530 }, { "epoch": 5.03, "grad_norm": 0.427734375, "learning_rate": 0.00046610305506071325, "loss": 0.2039, "step": 121540 }, { "epoch": 5.03, "grad_norm": 0.341796875, "learning_rate": 0.00046609760206950103, "loss": 0.1532, "step": 121550 }, { "epoch": 5.03, "grad_norm": 0.50390625, "learning_rate": 0.00046609214867161645, "loss": 0.22, "step": 121560 }, { "epoch": 5.04, "grad_norm": 0.79296875, "learning_rate": 0.0004660866948670697, "loss": 0.1524, "step": 121570 }, { "epoch": 5.04, "grad_norm": 1.0625, "learning_rate": 0.0004660812406558712, "loss": 0.2568, "step": 121580 }, { "epoch": 5.04, "grad_norm": 0.66796875, "learning_rate": 0.00046607578603803104, "loss": 0.1989, "step": 121590 }, { "epoch": 5.04, "grad_norm": 0.6328125, "learning_rate": 0.0004660703310135597, "loss": 0.205, "step": 121600 }, { "epoch": 5.04, "grad_norm": 0.71875, "learning_rate": 0.00046606487558246724, "loss": 0.1838, "step": 121610 }, { "epoch": 5.04, "grad_norm": 0.53125, "learning_rate": 0.00046605941974476407, "loss": 0.2328, "step": 121620 }, { "epoch": 5.04, "grad_norm": 0.65234375, "learning_rate": 0.0004660539635004604, "loss": 0.2074, "step": 121630 }, { "epoch": 5.04, "grad_norm": 0.4765625, "learning_rate": 0.0004660485068495665, "loss": 0.2476, "step": 121640 }, { "epoch": 5.04, "grad_norm": 0.423828125, "learning_rate": 0.0004660430497920927, "loss": 0.158, "step": 121650 }, { "epoch": 5.04, "grad_norm": 0.349609375, "learning_rate": 0.0004660375923280491, "loss": 0.1619, "step": 121660 }, { "epoch": 5.04, "grad_norm": 0.68359375, "learning_rate": 0.0004660321344574461, "loss": 0.1779, "step": 121670 }, { "epoch": 5.04, "grad_norm": 1.6875, "learning_rate": 0.000466026676180294, "loss": 0.2354, "step": 121680 }, { "epoch": 5.04, "grad_norm": 0.3203125, "learning_rate": 0.00046602121749660303, "loss": 0.1931, "step": 121690 }, { "epoch": 5.04, "grad_norm": 0.369140625, "learning_rate": 0.00046601575840638345, "loss": 0.218, "step": 121700 }, { "epoch": 5.04, "grad_norm": 1.203125, "learning_rate": 0.0004660102989096455, "loss": 0.2223, "step": 121710 }, { "epoch": 5.04, "grad_norm": 0.6328125, "learning_rate": 0.00046600483900639957, "loss": 0.2138, "step": 121720 }, { "epoch": 5.04, "grad_norm": 0.66015625, "learning_rate": 0.0004659993786966558, "loss": 0.1769, "step": 121730 }, { "epoch": 5.04, "grad_norm": 0.50390625, "learning_rate": 0.0004659939179804245, "loss": 0.167, "step": 121740 }, { "epoch": 5.04, "grad_norm": 1.0, "learning_rate": 0.000465988456857716, "loss": 0.2725, "step": 121750 }, { "epoch": 5.04, "grad_norm": 1.5859375, "learning_rate": 0.0004659829953285406, "loss": 0.1993, "step": 121760 }, { "epoch": 5.04, "grad_norm": 1.3984375, "learning_rate": 0.00046597753339290847, "loss": 0.2104, "step": 121770 }, { "epoch": 5.04, "grad_norm": 0.8984375, "learning_rate": 0.0004659720710508299, "loss": 0.1964, "step": 121780 }, { "epoch": 5.04, "grad_norm": 0.68359375, "learning_rate": 0.0004659666083023153, "loss": 0.2153, "step": 121790 }, { "epoch": 5.04, "grad_norm": 0.7578125, "learning_rate": 0.00046596114514737484, "loss": 0.1868, "step": 121800 }, { "epoch": 5.05, "grad_norm": 0.80078125, "learning_rate": 0.00046595568158601885, "loss": 0.2031, "step": 121810 }, { "epoch": 5.05, "grad_norm": 0.6796875, "learning_rate": 0.0004659502176182576, "loss": 0.1936, "step": 121820 }, { "epoch": 5.05, "grad_norm": 0.06884765625, "learning_rate": 0.0004659447532441013, "loss": 0.1748, "step": 121830 }, { "epoch": 5.05, "grad_norm": 0.54296875, "learning_rate": 0.0004659392884635603, "loss": 0.2049, "step": 121840 }, { "epoch": 5.05, "grad_norm": 0.65234375, "learning_rate": 0.0004659338232766449, "loss": 0.1741, "step": 121850 }, { "epoch": 5.05, "grad_norm": 0.65625, "learning_rate": 0.0004659283576833654, "loss": 0.1955, "step": 121860 }, { "epoch": 5.05, "grad_norm": 0.8046875, "learning_rate": 0.00046592289168373197, "loss": 0.2223, "step": 121870 }, { "epoch": 5.05, "grad_norm": 1.3828125, "learning_rate": 0.00046591742527775506, "loss": 0.1882, "step": 121880 }, { "epoch": 5.05, "grad_norm": 1.046875, "learning_rate": 0.00046591195846544477, "loss": 0.1993, "step": 121890 }, { "epoch": 5.05, "grad_norm": 0.8046875, "learning_rate": 0.00046590649124681155, "loss": 0.2295, "step": 121900 }, { "epoch": 5.05, "grad_norm": 0.392578125, "learning_rate": 0.00046590102362186565, "loss": 0.2263, "step": 121910 }, { "epoch": 5.05, "grad_norm": 0.40234375, "learning_rate": 0.0004658955555906174, "loss": 0.2003, "step": 121920 }, { "epoch": 5.05, "grad_norm": 0.484375, "learning_rate": 0.0004658900871530769, "loss": 0.2198, "step": 121930 }, { "epoch": 5.05, "grad_norm": 1.3828125, "learning_rate": 0.00046588461830925456, "loss": 0.1799, "step": 121940 }, { "epoch": 5.05, "grad_norm": 0.98828125, "learning_rate": 0.00046587914905916073, "loss": 0.2693, "step": 121950 }, { "epoch": 5.05, "grad_norm": 0.81640625, "learning_rate": 0.00046587367940280566, "loss": 0.2159, "step": 121960 }, { "epoch": 5.05, "grad_norm": 0.6484375, "learning_rate": 0.00046586820934019967, "loss": 0.2307, "step": 121970 }, { "epoch": 5.05, "grad_norm": 0.404296875, "learning_rate": 0.000465862738871353, "loss": 0.1757, "step": 121980 }, { "epoch": 5.05, "grad_norm": 0.466796875, "learning_rate": 0.00046585726799627596, "loss": 0.2398, "step": 121990 }, { "epoch": 5.05, "grad_norm": 0.73828125, "learning_rate": 0.00046585179671497886, "loss": 0.2568, "step": 122000 }, { "epoch": 5.05, "grad_norm": 1.796875, "learning_rate": 0.00046584632502747196, "loss": 0.2081, "step": 122010 }, { "epoch": 5.05, "grad_norm": 1.9375, "learning_rate": 0.00046584085293376563, "loss": 0.2265, "step": 122020 }, { "epoch": 5.05, "grad_norm": 0.84765625, "learning_rate": 0.00046583538043387006, "loss": 0.212, "step": 122030 }, { "epoch": 5.05, "grad_norm": 0.88671875, "learning_rate": 0.0004658299075277957, "loss": 0.2394, "step": 122040 }, { "epoch": 5.06, "grad_norm": 1.1484375, "learning_rate": 0.0004658244342155527, "loss": 0.2233, "step": 122050 }, { "epoch": 5.06, "grad_norm": 0.271484375, "learning_rate": 0.00046581896049715144, "loss": 0.1909, "step": 122060 }, { "epoch": 5.06, "grad_norm": 0.83203125, "learning_rate": 0.0004658134863726022, "loss": 0.2325, "step": 122070 }, { "epoch": 5.06, "grad_norm": 0.41015625, "learning_rate": 0.0004658080118419153, "loss": 0.2293, "step": 122080 }, { "epoch": 5.06, "grad_norm": 0.75390625, "learning_rate": 0.00046580253690510097, "loss": 0.1997, "step": 122090 }, { "epoch": 5.06, "grad_norm": 0.83984375, "learning_rate": 0.0004657970615621696, "loss": 0.2328, "step": 122100 }, { "epoch": 5.06, "grad_norm": 0.423828125, "learning_rate": 0.0004657915858131315, "loss": 0.2137, "step": 122110 }, { "epoch": 5.06, "grad_norm": 0.306640625, "learning_rate": 0.00046578610965799697, "loss": 0.1958, "step": 122120 }, { "epoch": 5.06, "grad_norm": 0.734375, "learning_rate": 0.00046578063309677623, "loss": 0.1884, "step": 122130 }, { "epoch": 5.06, "grad_norm": 0.84765625, "learning_rate": 0.00046577515612947955, "loss": 0.1846, "step": 122140 }, { "epoch": 5.06, "grad_norm": 0.5625, "learning_rate": 0.00046576967875611744, "loss": 0.191, "step": 122150 }, { "epoch": 5.06, "grad_norm": 1.453125, "learning_rate": 0.0004657642009767001, "loss": 0.2601, "step": 122160 }, { "epoch": 5.06, "grad_norm": 2.09375, "learning_rate": 0.00046575872279123776, "loss": 0.2215, "step": 122170 }, { "epoch": 5.06, "grad_norm": 0.0, "learning_rate": 0.00046575324419974084, "loss": 0.2201, "step": 122180 }, { "epoch": 5.06, "grad_norm": 0.8984375, "learning_rate": 0.0004657477652022196, "loss": 0.1897, "step": 122190 }, { "epoch": 5.06, "grad_norm": 0.34375, "learning_rate": 0.0004657422857986844, "loss": 0.2109, "step": 122200 }, { "epoch": 5.06, "grad_norm": 0.890625, "learning_rate": 0.0004657368059891456, "loss": 0.2662, "step": 122210 }, { "epoch": 5.06, "grad_norm": 1.15625, "learning_rate": 0.00046573132577361326, "loss": 0.1819, "step": 122220 }, { "epoch": 5.06, "grad_norm": 0.396484375, "learning_rate": 0.00046572584515209794, "loss": 0.1925, "step": 122230 }, { "epoch": 5.06, "grad_norm": 0.80859375, "learning_rate": 0.0004657203641246099, "loss": 0.2046, "step": 122240 }, { "epoch": 5.06, "grad_norm": 0.703125, "learning_rate": 0.00046571488269115936, "loss": 0.1895, "step": 122250 }, { "epoch": 5.06, "grad_norm": 0.279296875, "learning_rate": 0.00046570940085175676, "loss": 0.2285, "step": 122260 }, { "epoch": 5.06, "grad_norm": 0.482421875, "learning_rate": 0.0004657039186064124, "loss": 0.2557, "step": 122270 }, { "epoch": 5.06, "grad_norm": 1.0625, "learning_rate": 0.0004656984359551365, "loss": 0.24, "step": 122280 }, { "epoch": 5.07, "grad_norm": 0.63671875, "learning_rate": 0.00046569295289793945, "loss": 0.1798, "step": 122290 }, { "epoch": 5.07, "grad_norm": 1.2734375, "learning_rate": 0.00046568746943483156, "loss": 0.213, "step": 122300 }, { "epoch": 5.07, "grad_norm": 1.234375, "learning_rate": 0.0004656819855658231, "loss": 0.241, "step": 122310 }, { "epoch": 5.07, "grad_norm": 0.58984375, "learning_rate": 0.0004656765012909245, "loss": 0.2394, "step": 122320 }, { "epoch": 5.07, "grad_norm": 0.7890625, "learning_rate": 0.000465671016610146, "loss": 0.1733, "step": 122330 }, { "epoch": 5.07, "grad_norm": 1.1171875, "learning_rate": 0.0004656655315234979, "loss": 0.2192, "step": 122340 }, { "epoch": 5.07, "grad_norm": 1.9453125, "learning_rate": 0.00046566004603099055, "loss": 0.2123, "step": 122350 }, { "epoch": 5.07, "grad_norm": 0.451171875, "learning_rate": 0.0004656545601326343, "loss": 0.2011, "step": 122360 }, { "epoch": 5.07, "grad_norm": 0.70703125, "learning_rate": 0.00046564907382843946, "loss": 0.2215, "step": 122370 }, { "epoch": 5.07, "grad_norm": 0.765625, "learning_rate": 0.00046564358711841634, "loss": 0.2106, "step": 122380 }, { "epoch": 5.07, "grad_norm": 0.4921875, "learning_rate": 0.00046563810000257533, "loss": 0.2267, "step": 122390 }, { "epoch": 5.07, "grad_norm": 0.94921875, "learning_rate": 0.0004656326124809267, "loss": 0.2131, "step": 122400 }, { "epoch": 5.07, "grad_norm": 0.76953125, "learning_rate": 0.0004656271245534808, "loss": 0.2102, "step": 122410 }, { "epoch": 5.07, "grad_norm": 0.6953125, "learning_rate": 0.0004656216362202478, "loss": 0.229, "step": 122420 }, { "epoch": 5.07, "grad_norm": 0.47265625, "learning_rate": 0.0004656161474812383, "loss": 0.1925, "step": 122430 }, { "epoch": 5.07, "grad_norm": 0.44921875, "learning_rate": 0.0004656106583364624, "loss": 0.182, "step": 122440 }, { "epoch": 5.07, "grad_norm": 0.47265625, "learning_rate": 0.0004656051687859306, "loss": 0.2461, "step": 122450 }, { "epoch": 5.07, "grad_norm": 0.6328125, "learning_rate": 0.00046559967882965316, "loss": 0.1862, "step": 122460 }, { "epoch": 5.07, "grad_norm": 1.1796875, "learning_rate": 0.00046559418846764037, "loss": 0.2143, "step": 122470 }, { "epoch": 5.07, "grad_norm": 0.96484375, "learning_rate": 0.00046558869769990264, "loss": 0.1619, "step": 122480 }, { "epoch": 5.07, "grad_norm": 0.73828125, "learning_rate": 0.0004655832065264502, "loss": 0.2357, "step": 122490 }, { "epoch": 5.07, "grad_norm": 0.7734375, "learning_rate": 0.0004655777149472935, "loss": 0.2115, "step": 122500 }, { "epoch": 5.07, "grad_norm": 0.24609375, "learning_rate": 0.00046557222296244276, "loss": 0.2042, "step": 122510 }, { "epoch": 5.07, "grad_norm": 1.1484375, "learning_rate": 0.00046556673057190846, "loss": 0.1711, "step": 122520 }, { "epoch": 5.08, "grad_norm": 0.55859375, "learning_rate": 0.0004655612377757008, "loss": 0.2142, "step": 122530 }, { "epoch": 5.08, "grad_norm": 0.625, "learning_rate": 0.00046555574457383015, "loss": 0.1867, "step": 122540 }, { "epoch": 5.08, "grad_norm": 0.54296875, "learning_rate": 0.0004655502509663069, "loss": 0.1989, "step": 122550 }, { "epoch": 5.08, "grad_norm": 0.5234375, "learning_rate": 0.00046554475695314136, "loss": 0.2054, "step": 122560 }, { "epoch": 5.08, "grad_norm": 1.4609375, "learning_rate": 0.0004655392625343439, "loss": 0.226, "step": 122570 }, { "epoch": 5.08, "grad_norm": 0.875, "learning_rate": 0.00046553376770992473, "loss": 0.1325, "step": 122580 }, { "epoch": 5.08, "grad_norm": 0.66796875, "learning_rate": 0.0004655282724798943, "loss": 0.2088, "step": 122590 }, { "epoch": 5.08, "grad_norm": 0.73046875, "learning_rate": 0.00046552277684426297, "loss": 0.2073, "step": 122600 }, { "epoch": 5.08, "grad_norm": 0.26953125, "learning_rate": 0.000465517280803041, "loss": 0.2378, "step": 122610 }, { "epoch": 5.08, "grad_norm": 0.40234375, "learning_rate": 0.00046551178435623886, "loss": 0.2089, "step": 122620 }, { "epoch": 5.08, "grad_norm": 0.55859375, "learning_rate": 0.0004655062875038668, "loss": 0.2625, "step": 122630 }, { "epoch": 5.08, "grad_norm": 0.5546875, "learning_rate": 0.0004655007902459352, "loss": 0.2126, "step": 122640 }, { "epoch": 5.08, "grad_norm": 0.5234375, "learning_rate": 0.00046549529258245426, "loss": 0.2407, "step": 122650 }, { "epoch": 5.08, "grad_norm": 0.9921875, "learning_rate": 0.00046548979451343456, "loss": 0.1837, "step": 122660 }, { "epoch": 5.08, "grad_norm": 0.71875, "learning_rate": 0.00046548429603888633, "loss": 0.2154, "step": 122670 }, { "epoch": 5.08, "grad_norm": 0.86328125, "learning_rate": 0.00046547879715881993, "loss": 0.1902, "step": 122680 }, { "epoch": 5.08, "grad_norm": 0.4140625, "learning_rate": 0.00046547329787324565, "loss": 0.1598, "step": 122690 }, { "epoch": 5.08, "grad_norm": 0.84375, "learning_rate": 0.00046546779818217395, "loss": 0.2369, "step": 122700 }, { "epoch": 5.08, "grad_norm": 0.75, "learning_rate": 0.00046546229808561515, "loss": 0.2212, "step": 122710 }, { "epoch": 5.08, "grad_norm": 0.80078125, "learning_rate": 0.0004654567975835795, "loss": 0.2113, "step": 122720 }, { "epoch": 5.08, "grad_norm": 0.1796875, "learning_rate": 0.0004654512966760774, "loss": 0.2098, "step": 122730 }, { "epoch": 5.08, "grad_norm": 0.64453125, "learning_rate": 0.00046544579536311926, "loss": 0.1881, "step": 122740 }, { "epoch": 5.08, "grad_norm": 0.224609375, "learning_rate": 0.0004654402936447154, "loss": 0.1782, "step": 122750 }, { "epoch": 5.08, "grad_norm": 0.5078125, "learning_rate": 0.0004654347915208762, "loss": 0.2336, "step": 122760 }, { "epoch": 5.09, "grad_norm": 0.466796875, "learning_rate": 0.00046542928899161195, "loss": 0.1638, "step": 122770 }, { "epoch": 5.09, "grad_norm": 0.70703125, "learning_rate": 0.0004654237860569331, "loss": 0.2127, "step": 122780 }, { "epoch": 5.09, "grad_norm": 0.466796875, "learning_rate": 0.0004654182827168499, "loss": 0.2531, "step": 122790 }, { "epoch": 5.09, "grad_norm": 0.2734375, "learning_rate": 0.0004654127789713728, "loss": 0.1886, "step": 122800 }, { "epoch": 5.09, "grad_norm": 0.671875, "learning_rate": 0.00046540727482051203, "loss": 0.1458, "step": 122810 }, { "epoch": 5.09, "grad_norm": 0.5234375, "learning_rate": 0.0004654017702642781, "loss": 0.1909, "step": 122820 }, { "epoch": 5.09, "grad_norm": 1.234375, "learning_rate": 0.00046539626530268133, "loss": 0.1794, "step": 122830 }, { "epoch": 5.09, "grad_norm": 0.85546875, "learning_rate": 0.00046539075993573195, "loss": 0.1976, "step": 122840 }, { "epoch": 5.09, "grad_norm": 0.8125, "learning_rate": 0.0004653852541634405, "loss": 0.2112, "step": 122850 }, { "epoch": 5.09, "grad_norm": 1.078125, "learning_rate": 0.0004653797479858172, "loss": 0.1868, "step": 122860 }, { "epoch": 5.09, "grad_norm": 0.72265625, "learning_rate": 0.00046537424140287247, "loss": 0.2186, "step": 122870 }, { "epoch": 5.09, "grad_norm": 0.56640625, "learning_rate": 0.00046536873441461676, "loss": 0.1761, "step": 122880 }, { "epoch": 5.09, "grad_norm": 0.66796875, "learning_rate": 0.00046536322702106026, "loss": 0.1785, "step": 122890 }, { "epoch": 5.09, "grad_norm": 0.56640625, "learning_rate": 0.0004653577192222135, "loss": 0.1697, "step": 122900 }, { "epoch": 5.09, "grad_norm": 0.94140625, "learning_rate": 0.0004653522110180867, "loss": 0.2322, "step": 122910 }, { "epoch": 5.09, "grad_norm": 1.328125, "learning_rate": 0.00046534670240869035, "loss": 0.2449, "step": 122920 }, { "epoch": 5.09, "grad_norm": 0.359375, "learning_rate": 0.0004653411933940347, "loss": 0.1869, "step": 122930 }, { "epoch": 5.09, "grad_norm": 0.318359375, "learning_rate": 0.00046533568397413025, "loss": 0.2086, "step": 122940 }, { "epoch": 5.09, "grad_norm": 0.7265625, "learning_rate": 0.00046533017414898724, "loss": 0.2052, "step": 122950 }, { "epoch": 5.09, "grad_norm": 0.921875, "learning_rate": 0.0004653246639186162, "loss": 0.2259, "step": 122960 }, { "epoch": 5.09, "grad_norm": 0.6796875, "learning_rate": 0.00046531915328302723, "loss": 0.2178, "step": 122970 }, { "epoch": 5.09, "grad_norm": 0.333984375, "learning_rate": 0.00046531364224223094, "loss": 0.2167, "step": 122980 }, { "epoch": 5.09, "grad_norm": 0.5, "learning_rate": 0.00046530813079623766, "loss": 0.1659, "step": 122990 }, { "epoch": 5.09, "grad_norm": 0.322265625, "learning_rate": 0.0004653026189450577, "loss": 0.2093, "step": 123000 }, { "epoch": 5.1, "grad_norm": 0.8203125, "learning_rate": 0.00046529710668870153, "loss": 0.1655, "step": 123010 }, { "epoch": 5.1, "grad_norm": 0.4609375, "learning_rate": 0.00046529159402717933, "loss": 0.1694, "step": 123020 }, { "epoch": 5.1, "grad_norm": 0.4609375, "learning_rate": 0.0004652860809605017, "loss": 0.1936, "step": 123030 }, { "epoch": 5.1, "grad_norm": 1.59375, "learning_rate": 0.00046528056748867887, "loss": 0.1704, "step": 123040 }, { "epoch": 5.1, "grad_norm": 2.25, "learning_rate": 0.0004652750536117213, "loss": 0.2272, "step": 123050 }, { "epoch": 5.1, "grad_norm": 0.484375, "learning_rate": 0.0004652695393296393, "loss": 0.2575, "step": 123060 }, { "epoch": 5.1, "grad_norm": 0.52734375, "learning_rate": 0.0004652640246424432, "loss": 0.1952, "step": 123070 }, { "epoch": 5.1, "grad_norm": 0.78125, "learning_rate": 0.0004652585095501436, "loss": 0.2625, "step": 123080 }, { "epoch": 5.1, "grad_norm": 1.0234375, "learning_rate": 0.0004652529940527506, "loss": 0.2118, "step": 123090 }, { "epoch": 5.1, "grad_norm": 0.73046875, "learning_rate": 0.0004652474781502748, "loss": 0.1834, "step": 123100 }, { "epoch": 5.1, "grad_norm": 5.1021575927734375e-05, "learning_rate": 0.0004652419618427264, "loss": 0.1581, "step": 123110 }, { "epoch": 5.1, "grad_norm": 0.36328125, "learning_rate": 0.000465236445130116, "loss": 0.2264, "step": 123120 }, { "epoch": 5.1, "grad_norm": 0.306640625, "learning_rate": 0.0004652309280124537, "loss": 0.2266, "step": 123130 }, { "epoch": 5.1, "grad_norm": 0.142578125, "learning_rate": 0.00046522541048975015, "loss": 0.1394, "step": 123140 }, { "epoch": 5.1, "grad_norm": 0.82421875, "learning_rate": 0.00046521989256201557, "loss": 0.1989, "step": 123150 }, { "epoch": 5.1, "grad_norm": 1.15625, "learning_rate": 0.0004652143742292604, "loss": 0.206, "step": 123160 }, { "epoch": 5.1, "grad_norm": 0.95703125, "learning_rate": 0.0004652088554914951, "loss": 0.1635, "step": 123170 }, { "epoch": 5.1, "grad_norm": 0.5234375, "learning_rate": 0.00046520333634872987, "loss": 0.2132, "step": 123180 }, { "epoch": 5.1, "grad_norm": 0.6640625, "learning_rate": 0.0004651978168009752, "loss": 0.1437, "step": 123190 }, { "epoch": 5.1, "grad_norm": 0.546875, "learning_rate": 0.0004651922968482415, "loss": 0.1886, "step": 123200 }, { "epoch": 5.1, "grad_norm": 1.5703125, "learning_rate": 0.0004651867764905392, "loss": 0.2065, "step": 123210 }, { "epoch": 5.1, "grad_norm": 0.375, "learning_rate": 0.00046518125572787855, "loss": 0.1612, "step": 123220 }, { "epoch": 5.1, "grad_norm": 0.71484375, "learning_rate": 0.00046517573456027005, "loss": 0.1685, "step": 123230 }, { "epoch": 5.1, "grad_norm": 1.0078125, "learning_rate": 0.0004651702129877241, "loss": 0.2463, "step": 123240 }, { "epoch": 5.1, "grad_norm": 0.8515625, "learning_rate": 0.0004651646910102509, "loss": 0.2555, "step": 123250 }, { "epoch": 5.11, "grad_norm": 0.58203125, "learning_rate": 0.0004651591686278611, "loss": 0.2205, "step": 123260 }, { "epoch": 5.11, "grad_norm": 0.34375, "learning_rate": 0.00046515364584056494, "loss": 0.2217, "step": 123270 }, { "epoch": 5.11, "grad_norm": 0.765625, "learning_rate": 0.00046514812264837293, "loss": 0.2214, "step": 123280 }, { "epoch": 5.11, "grad_norm": 0.7890625, "learning_rate": 0.0004651425990512953, "loss": 0.2048, "step": 123290 }, { "epoch": 5.11, "grad_norm": 0.6875, "learning_rate": 0.00046513707504934257, "loss": 0.1342, "step": 123300 }, { "epoch": 5.11, "grad_norm": 0.80859375, "learning_rate": 0.0004651315506425251, "loss": 0.2148, "step": 123310 }, { "epoch": 5.11, "grad_norm": 0.58203125, "learning_rate": 0.00046512602583085325, "loss": 0.1785, "step": 123320 }, { "epoch": 5.11, "grad_norm": 1.0234375, "learning_rate": 0.00046512050061433744, "loss": 0.1873, "step": 123330 }, { "epoch": 5.11, "grad_norm": 0.435546875, "learning_rate": 0.0004651149749929881, "loss": 0.2373, "step": 123340 }, { "epoch": 5.11, "grad_norm": 1.765625, "learning_rate": 0.0004651094489668156, "loss": 0.2116, "step": 123350 }, { "epoch": 5.11, "grad_norm": 0.2578125, "learning_rate": 0.0004651039225358304, "loss": 0.2176, "step": 123360 }, { "epoch": 5.11, "grad_norm": 0.8203125, "learning_rate": 0.0004650983957000428, "loss": 0.1899, "step": 123370 }, { "epoch": 5.11, "grad_norm": 0.59765625, "learning_rate": 0.0004650928684594633, "loss": 0.2412, "step": 123380 }, { "epoch": 5.11, "grad_norm": 0.6328125, "learning_rate": 0.0004650873408141022, "loss": 0.1831, "step": 123390 }, { "epoch": 5.11, "grad_norm": 1.7734375, "learning_rate": 0.00046508181276396995, "loss": 0.2454, "step": 123400 }, { "epoch": 5.11, "grad_norm": 0.71875, "learning_rate": 0.00046507628430907696, "loss": 0.2467, "step": 123410 }, { "epoch": 5.11, "grad_norm": 0.35546875, "learning_rate": 0.00046507075544943363, "loss": 0.2254, "step": 123420 }, { "epoch": 5.11, "grad_norm": 0.244140625, "learning_rate": 0.00046506522618505036, "loss": 0.2111, "step": 123430 }, { "epoch": 5.11, "grad_norm": 1.1953125, "learning_rate": 0.0004650596965159376, "loss": 0.2252, "step": 123440 }, { "epoch": 5.11, "grad_norm": 1.15625, "learning_rate": 0.0004650541664421056, "loss": 0.2073, "step": 123450 }, { "epoch": 5.11, "grad_norm": 0.8671875, "learning_rate": 0.000465048635963565, "loss": 0.2032, "step": 123460 }, { "epoch": 5.11, "grad_norm": 0.4765625, "learning_rate": 0.0004650431050803261, "loss": 0.2419, "step": 123470 }, { "epoch": 5.11, "grad_norm": 0.345703125, "learning_rate": 0.0004650375737923992, "loss": 0.1989, "step": 123480 }, { "epoch": 5.11, "grad_norm": 0.62109375, "learning_rate": 0.0004650320420997949, "loss": 0.2135, "step": 123490 }, { "epoch": 5.12, "grad_norm": 0.44921875, "learning_rate": 0.0004650265100025235, "loss": 0.2135, "step": 123500 }, { "epoch": 5.12, "grad_norm": 0.828125, "learning_rate": 0.00046502097750059535, "loss": 0.2459, "step": 123510 }, { "epoch": 5.12, "grad_norm": 0.765625, "learning_rate": 0.000465015444594021, "loss": 0.2066, "step": 123520 }, { "epoch": 5.12, "grad_norm": 0.73046875, "learning_rate": 0.00046500991128281083, "loss": 0.1596, "step": 123530 }, { "epoch": 5.12, "grad_norm": 0.6796875, "learning_rate": 0.0004650043775669752, "loss": 0.1804, "step": 123540 }, { "epoch": 5.12, "grad_norm": 0.435546875, "learning_rate": 0.00046499884344652453, "loss": 0.192, "step": 123550 }, { "epoch": 5.12, "grad_norm": 0.84765625, "learning_rate": 0.00046499330892146927, "loss": 0.2101, "step": 123560 }, { "epoch": 5.12, "grad_norm": 0.7265625, "learning_rate": 0.00046498777399181984, "loss": 0.1981, "step": 123570 }, { "epoch": 5.12, "grad_norm": 0.3671875, "learning_rate": 0.0004649822386575866, "loss": 0.1577, "step": 123580 }, { "epoch": 5.12, "grad_norm": 0.578125, "learning_rate": 0.00046497670291878, "loss": 0.2481, "step": 123590 }, { "epoch": 5.12, "grad_norm": 0.6953125, "learning_rate": 0.0004649711667754105, "loss": 0.2234, "step": 123600 }, { "epoch": 5.12, "grad_norm": 0.296875, "learning_rate": 0.0004649656302274885, "loss": 0.2033, "step": 123610 }, { "epoch": 5.12, "grad_norm": 0.337890625, "learning_rate": 0.00046496009327502436, "loss": 0.1991, "step": 123620 }, { "epoch": 5.12, "grad_norm": 0.484375, "learning_rate": 0.0004649545559180285, "loss": 0.2265, "step": 123630 }, { "epoch": 5.12, "grad_norm": 0.53125, "learning_rate": 0.0004649490181565115, "loss": 0.1844, "step": 123640 }, { "epoch": 5.12, "grad_norm": 0.48046875, "learning_rate": 0.00046494347999048355, "loss": 0.1866, "step": 123650 }, { "epoch": 5.12, "grad_norm": 0.7578125, "learning_rate": 0.00046493794141995513, "loss": 0.2209, "step": 123660 }, { "epoch": 5.12, "grad_norm": 0.40625, "learning_rate": 0.00046493240244493685, "loss": 0.2508, "step": 123670 }, { "epoch": 5.12, "grad_norm": 0.58984375, "learning_rate": 0.00046492686306543894, "loss": 0.2047, "step": 123680 }, { "epoch": 5.12, "grad_norm": 0.40625, "learning_rate": 0.00046492132328147193, "loss": 0.189, "step": 123690 }, { "epoch": 5.12, "grad_norm": 0.63671875, "learning_rate": 0.0004649157830930462, "loss": 0.2211, "step": 123700 }, { "epoch": 5.12, "grad_norm": 0.3984375, "learning_rate": 0.00046491024250017203, "loss": 0.2057, "step": 123710 }, { "epoch": 5.12, "grad_norm": 0.765625, "learning_rate": 0.0004649047015028601, "loss": 0.1935, "step": 123720 }, { "epoch": 5.12, "grad_norm": 0.416015625, "learning_rate": 0.0004648991601011208, "loss": 0.1472, "step": 123730 }, { "epoch": 5.13, "grad_norm": 0.5859375, "learning_rate": 0.00046489361829496435, "loss": 0.1673, "step": 123740 }, { "epoch": 5.13, "grad_norm": 0.68359375, "learning_rate": 0.0004648880760844014, "loss": 0.2347, "step": 123750 }, { "epoch": 5.13, "grad_norm": 2.4375, "learning_rate": 0.0004648825334694423, "loss": 0.2272, "step": 123760 }, { "epoch": 5.13, "grad_norm": 0.97265625, "learning_rate": 0.00046487699045009745, "loss": 0.2045, "step": 123770 }, { "epoch": 5.13, "grad_norm": 0.57421875, "learning_rate": 0.0004648714470263773, "loss": 0.1817, "step": 123780 }, { "epoch": 5.13, "grad_norm": 0.65234375, "learning_rate": 0.0004648659031982923, "loss": 0.1733, "step": 123790 }, { "epoch": 5.13, "grad_norm": 0.34375, "learning_rate": 0.00046486035896585286, "loss": 0.1638, "step": 123800 }, { "epoch": 5.13, "grad_norm": 0.69921875, "learning_rate": 0.00046485481432906946, "loss": 0.1894, "step": 123810 }, { "epoch": 5.13, "grad_norm": 0.5859375, "learning_rate": 0.00046484926928795243, "loss": 0.2234, "step": 123820 }, { "epoch": 5.13, "grad_norm": 0.87109375, "learning_rate": 0.0004648437238425124, "loss": 0.1439, "step": 123830 }, { "epoch": 5.13, "grad_norm": 1.3125, "learning_rate": 0.0004648381779927596, "loss": 0.1869, "step": 123840 }, { "epoch": 5.13, "grad_norm": 0.03564453125, "learning_rate": 0.0004648326317387046, "loss": 0.2323, "step": 123850 }, { "epoch": 5.13, "grad_norm": 0.6484375, "learning_rate": 0.00046482708508035776, "loss": 0.232, "step": 123860 }, { "epoch": 5.13, "grad_norm": 0.4296875, "learning_rate": 0.0004648215380177295, "loss": 0.1931, "step": 123870 }, { "epoch": 5.13, "grad_norm": 0.5703125, "learning_rate": 0.00046481599055083037, "loss": 0.1878, "step": 123880 }, { "epoch": 5.13, "grad_norm": 0.42578125, "learning_rate": 0.00046481044267967066, "loss": 0.2251, "step": 123890 }, { "epoch": 5.13, "grad_norm": 0.248046875, "learning_rate": 0.00046480489440426097, "loss": 0.2218, "step": 123900 }, { "epoch": 5.13, "grad_norm": 0.51171875, "learning_rate": 0.00046479934572461165, "loss": 0.2011, "step": 123910 }, { "epoch": 5.13, "grad_norm": 0.26171875, "learning_rate": 0.00046479379664073316, "loss": 0.2426, "step": 123920 }, { "epoch": 5.13, "grad_norm": 0.7578125, "learning_rate": 0.0004647882471526359, "loss": 0.2488, "step": 123930 }, { "epoch": 5.13, "grad_norm": 0.58203125, "learning_rate": 0.0004647826972603304, "loss": 0.2826, "step": 123940 }, { "epoch": 5.13, "grad_norm": 0.359375, "learning_rate": 0.00046477714696382705, "loss": 0.222, "step": 123950 }, { "epoch": 5.13, "grad_norm": 0.83203125, "learning_rate": 0.0004647715962631363, "loss": 0.2644, "step": 123960 }, { "epoch": 5.13, "grad_norm": 0.83984375, "learning_rate": 0.0004647660451582686, "loss": 0.2799, "step": 123970 }, { "epoch": 5.14, "grad_norm": 0.81640625, "learning_rate": 0.0004647604936492344, "loss": 0.2339, "step": 123980 }, { "epoch": 5.14, "grad_norm": 0.625, "learning_rate": 0.00046475494173604413, "loss": 0.2231, "step": 123990 }, { "epoch": 5.14, "grad_norm": 0.7734375, "learning_rate": 0.00046474938941870825, "loss": 0.2393, "step": 124000 }, { "epoch": 5.14, "grad_norm": 0.220703125, "learning_rate": 0.00046474383669723726, "loss": 0.228, "step": 124010 }, { "epoch": 5.14, "grad_norm": 0.2421875, "learning_rate": 0.0004647382835716415, "loss": 0.1849, "step": 124020 }, { "epoch": 5.14, "grad_norm": 0.89453125, "learning_rate": 0.0004647327300419315, "loss": 0.2207, "step": 124030 }, { "epoch": 5.14, "grad_norm": 0.8125, "learning_rate": 0.00046472717610811763, "loss": 0.2194, "step": 124040 }, { "epoch": 5.14, "grad_norm": 0.54296875, "learning_rate": 0.0004647216217702105, "loss": 0.2174, "step": 124050 }, { "epoch": 5.14, "grad_norm": 0.60546875, "learning_rate": 0.00046471606702822043, "loss": 0.2003, "step": 124060 }, { "epoch": 5.14, "grad_norm": 1.03125, "learning_rate": 0.0004647105118821579, "loss": 0.2089, "step": 124070 }, { "epoch": 5.14, "grad_norm": 0.546875, "learning_rate": 0.00046470495633203334, "loss": 0.2247, "step": 124080 }, { "epoch": 5.14, "grad_norm": 0.82421875, "learning_rate": 0.0004646994003778573, "loss": 0.1773, "step": 124090 }, { "epoch": 5.14, "grad_norm": 0.640625, "learning_rate": 0.0004646938440196402, "loss": 0.1836, "step": 124100 }, { "epoch": 5.14, "grad_norm": 0.6328125, "learning_rate": 0.00046468828725739245, "loss": 0.1718, "step": 124110 }, { "epoch": 5.14, "grad_norm": 1.484375, "learning_rate": 0.0004646827300911245, "loss": 0.1979, "step": 124120 }, { "epoch": 5.14, "grad_norm": 1.125, "learning_rate": 0.00046467717252084677, "loss": 0.2208, "step": 124130 }, { "epoch": 5.14, "grad_norm": 0.92578125, "learning_rate": 0.0004646716145465699, "loss": 0.2401, "step": 124140 }, { "epoch": 5.14, "grad_norm": 0.9296875, "learning_rate": 0.00046466605616830414, "loss": 0.2232, "step": 124150 }, { "epoch": 5.14, "grad_norm": 0.55078125, "learning_rate": 0.0004646604973860601, "loss": 0.2243, "step": 124160 }, { "epoch": 5.14, "grad_norm": 0.31640625, "learning_rate": 0.00046465493819984816, "loss": 0.1424, "step": 124170 }, { "epoch": 5.14, "grad_norm": 0.66015625, "learning_rate": 0.0004646493786096788, "loss": 0.249, "step": 124180 }, { "epoch": 5.14, "grad_norm": 1.859375, "learning_rate": 0.0004646438186155626, "loss": 0.2303, "step": 124190 }, { "epoch": 5.14, "grad_norm": 1.0703125, "learning_rate": 0.0004646382582175098, "loss": 0.2053, "step": 124200 }, { "epoch": 5.14, "grad_norm": 0.953125, "learning_rate": 0.00046463269741553096, "loss": 0.2759, "step": 124210 }, { "epoch": 5.15, "grad_norm": 0.765625, "learning_rate": 0.00046462713620963657, "loss": 0.2506, "step": 124220 }, { "epoch": 5.15, "grad_norm": 0.498046875, "learning_rate": 0.0004646215745998372, "loss": 0.2386, "step": 124230 }, { "epoch": 5.15, "grad_norm": 0.7734375, "learning_rate": 0.0004646160125861431, "loss": 0.215, "step": 124240 }, { "epoch": 5.15, "grad_norm": 0.462890625, "learning_rate": 0.00046461045016856487, "loss": 0.203, "step": 124250 }, { "epoch": 5.15, "grad_norm": 0.92578125, "learning_rate": 0.0004646048873471129, "loss": 0.2268, "step": 124260 }, { "epoch": 5.15, "grad_norm": 0.330078125, "learning_rate": 0.00046459932412179776, "loss": 0.219, "step": 124270 }, { "epoch": 5.15, "grad_norm": 0.51171875, "learning_rate": 0.00046459376049262993, "loss": 0.1666, "step": 124280 }, { "epoch": 5.15, "grad_norm": 0.32421875, "learning_rate": 0.00046458819645961967, "loss": 0.1756, "step": 124290 }, { "epoch": 5.15, "grad_norm": 0.8359375, "learning_rate": 0.0004645826320227777, "loss": 0.2005, "step": 124300 }, { "epoch": 5.15, "grad_norm": 0.671875, "learning_rate": 0.00046457706718211434, "loss": 0.2387, "step": 124310 }, { "epoch": 5.15, "grad_norm": 1.046875, "learning_rate": 0.0004645715019376402, "loss": 0.2249, "step": 124320 }, { "epoch": 5.15, "grad_norm": 0.474609375, "learning_rate": 0.00046456593628936555, "loss": 0.1952, "step": 124330 }, { "epoch": 5.15, "grad_norm": 1.453125, "learning_rate": 0.00046456037023730103, "loss": 0.2063, "step": 124340 }, { "epoch": 5.15, "grad_norm": 0.67578125, "learning_rate": 0.0004645548037814571, "loss": 0.2151, "step": 124350 }, { "epoch": 5.15, "grad_norm": 0.59765625, "learning_rate": 0.00046454923692184415, "loss": 0.1975, "step": 124360 }, { "epoch": 5.15, "grad_norm": 1.390625, "learning_rate": 0.0004645436696584727, "loss": 0.2754, "step": 124370 }, { "epoch": 5.15, "grad_norm": 0.84765625, "learning_rate": 0.00046453810199135326, "loss": 0.2582, "step": 124380 }, { "epoch": 5.15, "grad_norm": 0.86328125, "learning_rate": 0.0004645325339204963, "loss": 0.2468, "step": 124390 }, { "epoch": 5.15, "grad_norm": 0.578125, "learning_rate": 0.0004645269654459122, "loss": 0.2203, "step": 124400 }, { "epoch": 5.15, "grad_norm": 0.7421875, "learning_rate": 0.00046452139656761156, "loss": 0.27, "step": 124410 }, { "epoch": 5.15, "grad_norm": 0.54296875, "learning_rate": 0.0004645158272856048, "loss": 0.2193, "step": 124420 }, { "epoch": 5.15, "grad_norm": 0.9765625, "learning_rate": 0.0004645102575999025, "loss": 0.1477, "step": 124430 }, { "epoch": 5.15, "grad_norm": 0.83984375, "learning_rate": 0.000464504687510515, "loss": 0.1127, "step": 124440 }, { "epoch": 5.15, "grad_norm": 0.58984375, "learning_rate": 0.00046449911701745286, "loss": 0.2374, "step": 124450 }, { "epoch": 5.16, "grad_norm": 0.36328125, "learning_rate": 0.00046449354612072645, "loss": 0.1906, "step": 124460 }, { "epoch": 5.16, "grad_norm": 0.734375, "learning_rate": 0.00046448797482034643, "loss": 0.2344, "step": 124470 }, { "epoch": 5.16, "grad_norm": 0.73046875, "learning_rate": 0.00046448240311632326, "loss": 0.1851, "step": 124480 }, { "epoch": 5.16, "grad_norm": 0.48046875, "learning_rate": 0.0004644768310086673, "loss": 0.2087, "step": 124490 }, { "epoch": 5.16, "grad_norm": 0.32421875, "learning_rate": 0.0004644712584973891, "loss": 0.1854, "step": 124500 }, { "epoch": 5.16, "grad_norm": 0.87890625, "learning_rate": 0.0004644656855824992, "loss": 0.1953, "step": 124510 }, { "epoch": 5.16, "grad_norm": 0.71484375, "learning_rate": 0.00046446011226400797, "loss": 0.2147, "step": 124520 }, { "epoch": 5.16, "grad_norm": 0.470703125, "learning_rate": 0.00046445453854192606, "loss": 0.1495, "step": 124530 }, { "epoch": 5.16, "grad_norm": 0.55078125, "learning_rate": 0.0004644489644162638, "loss": 0.1836, "step": 124540 }, { "epoch": 5.16, "grad_norm": 1.125, "learning_rate": 0.0004644433898870317, "loss": 0.2181, "step": 124550 }, { "epoch": 5.16, "grad_norm": 0.5234375, "learning_rate": 0.00046443781495424043, "loss": 0.2702, "step": 124560 }, { "epoch": 5.16, "grad_norm": 0.4609375, "learning_rate": 0.00046443223961790026, "loss": 0.2116, "step": 124570 }, { "epoch": 5.16, "grad_norm": 1.8125, "learning_rate": 0.00046442666387802177, "loss": 0.229, "step": 124580 }, { "epoch": 5.16, "grad_norm": 0.59765625, "learning_rate": 0.00046442108773461554, "loss": 0.2035, "step": 124590 }, { "epoch": 5.16, "grad_norm": 1.2109375, "learning_rate": 0.00046441551118769187, "loss": 0.2041, "step": 124600 }, { "epoch": 5.16, "grad_norm": 0.56640625, "learning_rate": 0.00046440993423726144, "loss": 0.2541, "step": 124610 }, { "epoch": 5.16, "grad_norm": 0.55859375, "learning_rate": 0.0004644043568833346, "loss": 0.1907, "step": 124620 }, { "epoch": 5.16, "grad_norm": 0.369140625, "learning_rate": 0.00046439877912592196, "loss": 0.1386, "step": 124630 }, { "epoch": 5.16, "grad_norm": 2.5, "learning_rate": 0.000464393200965034, "loss": 0.1974, "step": 124640 }, { "epoch": 5.16, "grad_norm": 1.171875, "learning_rate": 0.0004643876224006811, "loss": 0.1534, "step": 124650 }, { "epoch": 5.16, "grad_norm": 0.33984375, "learning_rate": 0.00046438204343287394, "loss": 0.2168, "step": 124660 }, { "epoch": 5.16, "grad_norm": 2.296875, "learning_rate": 0.00046437646406162286, "loss": 0.2342, "step": 124670 }, { "epoch": 5.16, "grad_norm": 0.68359375, "learning_rate": 0.00046437088428693846, "loss": 0.1878, "step": 124680 }, { "epoch": 5.16, "grad_norm": 0.61328125, "learning_rate": 0.0004643653041088312, "loss": 0.1646, "step": 124690 }, { "epoch": 5.17, "grad_norm": 1.0078125, "learning_rate": 0.0004643597235273116, "loss": 0.1992, "step": 124700 }, { "epoch": 5.17, "grad_norm": 0.8515625, "learning_rate": 0.0004643541425423901, "loss": 0.1929, "step": 124710 }, { "epoch": 5.17, "grad_norm": 0.91796875, "learning_rate": 0.0004643485611540773, "loss": 0.1923, "step": 124720 }, { "epoch": 5.17, "grad_norm": 0.78125, "learning_rate": 0.0004643429793623837, "loss": 0.1926, "step": 124730 }, { "epoch": 5.17, "grad_norm": 0.5, "learning_rate": 0.00046433739716731964, "loss": 0.2037, "step": 124740 }, { "epoch": 5.17, "grad_norm": 0.84375, "learning_rate": 0.00046433181456889585, "loss": 0.2222, "step": 124750 }, { "epoch": 5.17, "grad_norm": 0.2197265625, "learning_rate": 0.0004643262315671226, "loss": 0.1932, "step": 124760 }, { "epoch": 5.17, "grad_norm": 0.466796875, "learning_rate": 0.00046432064816201066, "loss": 0.1969, "step": 124770 }, { "epoch": 5.17, "grad_norm": 0.72265625, "learning_rate": 0.00046431506435357035, "loss": 0.1124, "step": 124780 }, { "epoch": 5.17, "grad_norm": 0.462890625, "learning_rate": 0.00046430948014181226, "loss": 0.1243, "step": 124790 }, { "epoch": 5.17, "grad_norm": 0.578125, "learning_rate": 0.0004643038955267468, "loss": 0.1749, "step": 124800 }, { "epoch": 5.17, "grad_norm": 0.49609375, "learning_rate": 0.0004642983105083846, "loss": 0.2211, "step": 124810 }, { "epoch": 5.17, "grad_norm": 0.94921875, "learning_rate": 0.00046429272508673614, "loss": 0.1058, "step": 124820 }, { "epoch": 5.17, "grad_norm": 0.75390625, "learning_rate": 0.00046428713926181185, "loss": 0.2054, "step": 124830 }, { "epoch": 5.17, "grad_norm": 0.75390625, "learning_rate": 0.0004642815530336224, "loss": 0.138, "step": 124840 }, { "epoch": 5.17, "grad_norm": 0.37109375, "learning_rate": 0.00046427596640217817, "loss": 0.2129, "step": 124850 }, { "epoch": 5.17, "grad_norm": 0.58203125, "learning_rate": 0.0004642703793674897, "loss": 0.209, "step": 124860 }, { "epoch": 5.17, "grad_norm": 0.75, "learning_rate": 0.0004642647919295675, "loss": 0.1651, "step": 124870 }, { "epoch": 5.17, "grad_norm": 0.6328125, "learning_rate": 0.0004642592040884222, "loss": 0.2197, "step": 124880 }, { "epoch": 5.17, "grad_norm": 0.421875, "learning_rate": 0.00046425361584406403, "loss": 0.2225, "step": 124890 }, { "epoch": 5.17, "grad_norm": 0.828125, "learning_rate": 0.00046424802719650384, "loss": 0.206, "step": 124900 }, { "epoch": 5.17, "grad_norm": 0.65625, "learning_rate": 0.00046424243814575196, "loss": 0.2465, "step": 124910 }, { "epoch": 5.17, "grad_norm": 0.43359375, "learning_rate": 0.00046423684869181895, "loss": 0.2327, "step": 124920 }, { "epoch": 5.17, "grad_norm": 0.37890625, "learning_rate": 0.0004642312588347153, "loss": 0.1885, "step": 124930 }, { "epoch": 5.17, "grad_norm": 2.03125, "learning_rate": 0.0004642256685744516, "loss": 0.2323, "step": 124940 }, { "epoch": 5.18, "grad_norm": 0.66796875, "learning_rate": 0.00046422007791103824, "loss": 0.1923, "step": 124950 }, { "epoch": 5.18, "grad_norm": 0.306640625, "learning_rate": 0.00046421448684448585, "loss": 0.2235, "step": 124960 }, { "epoch": 5.18, "grad_norm": 0.5078125, "learning_rate": 0.00046420889537480503, "loss": 0.2125, "step": 124970 }, { "epoch": 5.18, "grad_norm": 0.333984375, "learning_rate": 0.00046420330350200613, "loss": 0.2288, "step": 124980 }, { "epoch": 5.18, "grad_norm": 1.1953125, "learning_rate": 0.0004641977112260998, "loss": 0.2492, "step": 124990 }, { "epoch": 5.18, "grad_norm": 0.828125, "learning_rate": 0.00046419211854709645, "loss": 0.2255, "step": 125000 }, { "epoch": 5.18, "grad_norm": 1.0703125, "learning_rate": 0.00046418652546500663, "loss": 0.2108, "step": 125010 }, { "epoch": 5.18, "grad_norm": 0.43359375, "learning_rate": 0.000464180931979841, "loss": 0.2099, "step": 125020 }, { "epoch": 5.18, "grad_norm": 0.53125, "learning_rate": 0.0004641753380916099, "loss": 0.2409, "step": 125030 }, { "epoch": 5.18, "grad_norm": 1.15625, "learning_rate": 0.00046416974380032397, "loss": 0.1905, "step": 125040 }, { "epoch": 5.18, "grad_norm": 0.609375, "learning_rate": 0.0004641641491059937, "loss": 0.1965, "step": 125050 }, { "epoch": 5.18, "grad_norm": 0.71484375, "learning_rate": 0.0004641585540086296, "loss": 0.2425, "step": 125060 }, { "epoch": 5.18, "grad_norm": 0.91015625, "learning_rate": 0.00046415295850824235, "loss": 0.2234, "step": 125070 }, { "epoch": 5.18, "grad_norm": 0.7265625, "learning_rate": 0.00046414736260484224, "loss": 0.2279, "step": 125080 }, { "epoch": 5.18, "grad_norm": 0.56640625, "learning_rate": 0.00046414176629843996, "loss": 0.1981, "step": 125090 }, { "epoch": 5.18, "grad_norm": 0.75, "learning_rate": 0.000464136169589046, "loss": 0.2038, "step": 125100 }, { "epoch": 5.18, "grad_norm": 0.5, "learning_rate": 0.0004641305724766709, "loss": 0.2179, "step": 125110 }, { "epoch": 5.18, "grad_norm": 0.51953125, "learning_rate": 0.00046412497496132516, "loss": 0.1876, "step": 125120 }, { "epoch": 5.18, "grad_norm": 0.66796875, "learning_rate": 0.00046411937704301934, "loss": 0.2141, "step": 125130 }, { "epoch": 5.18, "grad_norm": 0.76953125, "learning_rate": 0.00046411377872176404, "loss": 0.2533, "step": 125140 }, { "epoch": 5.18, "grad_norm": 0.53125, "learning_rate": 0.0004641081799975697, "loss": 0.2209, "step": 125150 }, { "epoch": 5.18, "grad_norm": 0.419921875, "learning_rate": 0.0004641025808704469, "loss": 0.1888, "step": 125160 }, { "epoch": 5.18, "grad_norm": 0.63671875, "learning_rate": 0.00046409698134040607, "loss": 0.1717, "step": 125170 }, { "epoch": 5.18, "grad_norm": 0.84375, "learning_rate": 0.0004640913814074579, "loss": 0.193, "step": 125180 }, { "epoch": 5.19, "grad_norm": 0.703125, "learning_rate": 0.0004640857810716129, "loss": 0.2279, "step": 125190 }, { "epoch": 5.19, "grad_norm": 0.375, "learning_rate": 0.00046408018033288156, "loss": 0.218, "step": 125200 }, { "epoch": 5.19, "grad_norm": 0.98828125, "learning_rate": 0.0004640745791912744, "loss": 0.1925, "step": 125210 }, { "epoch": 5.19, "grad_norm": 0.99609375, "learning_rate": 0.00046406897764680204, "loss": 0.2324, "step": 125220 }, { "epoch": 5.19, "grad_norm": 0.55859375, "learning_rate": 0.00046406337569947497, "loss": 0.2234, "step": 125230 }, { "epoch": 5.19, "grad_norm": 0.51953125, "learning_rate": 0.0004640577733493038, "loss": 0.2, "step": 125240 }, { "epoch": 5.19, "grad_norm": 0.482421875, "learning_rate": 0.000464052170596299, "loss": 0.1861, "step": 125250 }, { "epoch": 5.19, "grad_norm": 0.427734375, "learning_rate": 0.00046404656744047104, "loss": 0.2439, "step": 125260 }, { "epoch": 5.19, "grad_norm": 0.74609375, "learning_rate": 0.0004640409638818306, "loss": 0.2582, "step": 125270 }, { "epoch": 5.19, "grad_norm": 0.9375, "learning_rate": 0.00046403535992038826, "loss": 0.2124, "step": 125280 }, { "epoch": 5.19, "grad_norm": 1.0546875, "learning_rate": 0.00046402975555615434, "loss": 0.1827, "step": 125290 }, { "epoch": 5.19, "grad_norm": 0.71875, "learning_rate": 0.0004640241507891397, "loss": 0.2038, "step": 125300 }, { "epoch": 5.19, "grad_norm": 0.412109375, "learning_rate": 0.00046401854561935456, "loss": 0.2025, "step": 125310 }, { "epoch": 5.19, "grad_norm": 0.11767578125, "learning_rate": 0.00046401294004680974, "loss": 0.2508, "step": 125320 }, { "epoch": 5.19, "grad_norm": 0.58203125, "learning_rate": 0.0004640073340715156, "loss": 0.2293, "step": 125330 }, { "epoch": 5.19, "grad_norm": 0.73046875, "learning_rate": 0.00046400172769348286, "loss": 0.2302, "step": 125340 }, { "epoch": 5.19, "grad_norm": 0.8515625, "learning_rate": 0.00046399612091272194, "loss": 0.2131, "step": 125350 }, { "epoch": 5.19, "grad_norm": 0.74609375, "learning_rate": 0.0004639905137292434, "loss": 0.1948, "step": 125360 }, { "epoch": 5.19, "grad_norm": 0.82421875, "learning_rate": 0.0004639849061430579, "loss": 0.2467, "step": 125370 }, { "epoch": 5.19, "grad_norm": 0.34765625, "learning_rate": 0.0004639792981541758, "loss": 0.1704, "step": 125380 }, { "epoch": 5.19, "grad_norm": 1.1875, "learning_rate": 0.00046397368976260783, "loss": 0.2056, "step": 125390 }, { "epoch": 5.19, "grad_norm": 0.6796875, "learning_rate": 0.00046396808096836443, "loss": 0.1772, "step": 125400 }, { "epoch": 5.19, "grad_norm": 0.921875, "learning_rate": 0.0004639624717714563, "loss": 0.2107, "step": 125410 }, { "epoch": 5.19, "grad_norm": 1.0859375, "learning_rate": 0.00046395686217189384, "loss": 0.1893, "step": 125420 }, { "epoch": 5.2, "grad_norm": 0.51171875, "learning_rate": 0.0004639512521696877, "loss": 0.1995, "step": 125430 }, { "epoch": 5.2, "grad_norm": 0.77734375, "learning_rate": 0.00046394564176484846, "loss": 0.1912, "step": 125440 }, { "epoch": 5.2, "grad_norm": 0.357421875, "learning_rate": 0.00046394003095738654, "loss": 0.1684, "step": 125450 }, { "epoch": 5.2, "grad_norm": 0.75390625, "learning_rate": 0.0004639344197473126, "loss": 0.2508, "step": 125460 }, { "epoch": 5.2, "grad_norm": 0.45703125, "learning_rate": 0.00046392880813463723, "loss": 0.2444, "step": 125470 }, { "epoch": 5.2, "grad_norm": 0.984375, "learning_rate": 0.00046392319611937083, "loss": 0.2274, "step": 125480 }, { "epoch": 5.2, "grad_norm": 1.046875, "learning_rate": 0.0004639175837015242, "loss": 0.2236, "step": 125490 }, { "epoch": 5.2, "grad_norm": 1.4296875, "learning_rate": 0.0004639119708811078, "loss": 0.2084, "step": 125500 }, { "epoch": 5.2, "grad_norm": 0.65234375, "learning_rate": 0.00046390635765813205, "loss": 0.2063, "step": 125510 }, { "epoch": 5.2, "grad_norm": 0.89453125, "learning_rate": 0.00046390074403260766, "loss": 0.1679, "step": 125520 }, { "epoch": 5.2, "grad_norm": 0.66015625, "learning_rate": 0.0004638951300045452, "loss": 0.226, "step": 125530 }, { "epoch": 5.2, "grad_norm": 1.0703125, "learning_rate": 0.00046388951557395526, "loss": 0.2131, "step": 125540 }, { "epoch": 5.2, "grad_norm": 0.240234375, "learning_rate": 0.0004638839007408483, "loss": 0.19, "step": 125550 }, { "epoch": 5.2, "grad_norm": 0.921875, "learning_rate": 0.00046387828550523494, "loss": 0.2533, "step": 125560 }, { "epoch": 5.2, "grad_norm": 0.59375, "learning_rate": 0.00046387266986712574, "loss": 0.1952, "step": 125570 }, { "epoch": 5.2, "grad_norm": 1.0, "learning_rate": 0.00046386705382653125, "loss": 0.2584, "step": 125580 }, { "epoch": 5.2, "grad_norm": 1.0234375, "learning_rate": 0.0004638614373834621, "loss": 0.1902, "step": 125590 }, { "epoch": 5.2, "grad_norm": 0.578125, "learning_rate": 0.0004638558205379287, "loss": 0.1911, "step": 125600 }, { "epoch": 5.2, "grad_norm": 0.703125, "learning_rate": 0.00046385020328994187, "loss": 0.2754, "step": 125610 }, { "epoch": 5.2, "grad_norm": 0.33203125, "learning_rate": 0.000463844585639512, "loss": 0.1913, "step": 125620 }, { "epoch": 5.2, "grad_norm": 0.58984375, "learning_rate": 0.0004638389675866498, "loss": 0.2109, "step": 125630 }, { "epoch": 5.2, "grad_norm": 0.859375, "learning_rate": 0.00046383334913136567, "loss": 0.1788, "step": 125640 }, { "epoch": 5.2, "grad_norm": 0.46484375, "learning_rate": 0.00046382773027367025, "loss": 0.1963, "step": 125650 }, { "epoch": 5.2, "grad_norm": 0.90625, "learning_rate": 0.00046382211101357417, "loss": 0.1775, "step": 125660 }, { "epoch": 5.21, "grad_norm": 0.80078125, "learning_rate": 0.000463816491351088, "loss": 0.221, "step": 125670 }, { "epoch": 5.21, "grad_norm": 1.390625, "learning_rate": 0.00046381087128622225, "loss": 0.2091, "step": 125680 }, { "epoch": 5.21, "grad_norm": 1.0078125, "learning_rate": 0.0004638052508189875, "loss": 0.1881, "step": 125690 }, { "epoch": 5.21, "grad_norm": 0.58984375, "learning_rate": 0.0004637996299493944, "loss": 0.2023, "step": 125700 }, { "epoch": 5.21, "grad_norm": 0.5390625, "learning_rate": 0.00046379400867745346, "loss": 0.2154, "step": 125710 }, { "epoch": 5.21, "grad_norm": 0.2353515625, "learning_rate": 0.0004637883870031753, "loss": 0.2427, "step": 125720 }, { "epoch": 5.21, "grad_norm": 0.98828125, "learning_rate": 0.0004637827649265705, "loss": 0.2249, "step": 125730 }, { "epoch": 5.21, "grad_norm": 0.5859375, "learning_rate": 0.0004637771424476496, "loss": 0.1909, "step": 125740 }, { "epoch": 5.21, "grad_norm": 0.2177734375, "learning_rate": 0.0004637715195664232, "loss": 0.2093, "step": 125750 }, { "epoch": 5.21, "grad_norm": 1.0390625, "learning_rate": 0.0004637658962829019, "loss": 0.2086, "step": 125760 }, { "epoch": 5.21, "grad_norm": 0.8515625, "learning_rate": 0.00046376027259709623, "loss": 0.2514, "step": 125770 }, { "epoch": 5.21, "grad_norm": 0.9765625, "learning_rate": 0.0004637546485090168, "loss": 0.2199, "step": 125780 }, { "epoch": 5.21, "grad_norm": 1.0625, "learning_rate": 0.0004637490240186744, "loss": 0.181, "step": 125790 }, { "epoch": 5.21, "grad_norm": 0.453125, "learning_rate": 0.0004637433991260792, "loss": 0.2047, "step": 125800 }, { "epoch": 5.21, "grad_norm": 0.3828125, "learning_rate": 0.00046373777383124207, "loss": 0.2607, "step": 125810 }, { "epoch": 5.21, "grad_norm": 0.5546875, "learning_rate": 0.0004637321481341735, "loss": 0.191, "step": 125820 }, { "epoch": 5.21, "grad_norm": 0.73046875, "learning_rate": 0.0004637265220348842, "loss": 0.2239, "step": 125830 }, { "epoch": 5.21, "grad_norm": 0.66796875, "learning_rate": 0.0004637208955333846, "loss": 0.1748, "step": 125840 }, { "epoch": 5.21, "grad_norm": 0.57421875, "learning_rate": 0.0004637152686296853, "loss": 0.1754, "step": 125850 }, { "epoch": 5.21, "grad_norm": 0.49609375, "learning_rate": 0.000463709641323797, "loss": 0.1898, "step": 125860 }, { "epoch": 5.21, "grad_norm": 0.5078125, "learning_rate": 0.00046370401361573024, "loss": 0.1808, "step": 125870 }, { "epoch": 5.21, "grad_norm": 0.63671875, "learning_rate": 0.0004636983855054956, "loss": 0.2154, "step": 125880 }, { "epoch": 5.21, "grad_norm": 1.09375, "learning_rate": 0.00046369275699310365, "loss": 0.1626, "step": 125890 }, { "epoch": 5.21, "grad_norm": 0.5625, "learning_rate": 0.00046368712807856507, "loss": 0.2667, "step": 125900 }, { "epoch": 5.22, "grad_norm": 0.5625, "learning_rate": 0.00046368149876189037, "loss": 0.2142, "step": 125910 }, { "epoch": 5.22, "grad_norm": 0.62890625, "learning_rate": 0.00046367586904309013, "loss": 0.2254, "step": 125920 }, { "epoch": 5.22, "grad_norm": 0.322265625, "learning_rate": 0.000463670238922175, "loss": 0.2021, "step": 125930 }, { "epoch": 5.22, "grad_norm": 0.84765625, "learning_rate": 0.00046366460839915557, "loss": 0.2081, "step": 125940 }, { "epoch": 5.22, "grad_norm": 1.0, "learning_rate": 0.0004636589774740424, "loss": 0.2251, "step": 125950 }, { "epoch": 5.22, "grad_norm": 0.5, "learning_rate": 0.0004636533461468461, "loss": 0.1969, "step": 125960 }, { "epoch": 5.22, "grad_norm": 0.296875, "learning_rate": 0.0004636477144175773, "loss": 0.2164, "step": 125970 }, { "epoch": 5.22, "grad_norm": 0.703125, "learning_rate": 0.0004636420822862466, "loss": 0.2322, "step": 125980 }, { "epoch": 5.22, "grad_norm": 0.70703125, "learning_rate": 0.0004636364497528645, "loss": 0.263, "step": 125990 }, { "epoch": 5.22, "grad_norm": 0.71875, "learning_rate": 0.00046363081681744173, "loss": 0.2164, "step": 126000 }, { "epoch": 5.22, "grad_norm": 0.7421875, "learning_rate": 0.00046362518347998886, "loss": 0.2397, "step": 126010 }, { "epoch": 5.22, "grad_norm": 0.96875, "learning_rate": 0.00046361954974051637, "loss": 0.1947, "step": 126020 }, { "epoch": 5.22, "grad_norm": 0.9296875, "learning_rate": 0.000463613915599035, "loss": 0.2115, "step": 126030 }, { "epoch": 5.22, "grad_norm": 1.671875, "learning_rate": 0.0004636082810555553, "loss": 0.1711, "step": 126040 }, { "epoch": 5.22, "grad_norm": 0.6953125, "learning_rate": 0.0004636026461100879, "loss": 0.1712, "step": 126050 }, { "epoch": 5.22, "grad_norm": 1.890625, "learning_rate": 0.0004635970107626434, "loss": 0.1896, "step": 126060 }, { "epoch": 5.22, "grad_norm": 0.859375, "learning_rate": 0.00046359137501323234, "loss": 0.2165, "step": 126070 }, { "epoch": 5.22, "grad_norm": 0.6484375, "learning_rate": 0.0004635857388618654, "loss": 0.2224, "step": 126080 }, { "epoch": 5.22, "grad_norm": 0.765625, "learning_rate": 0.0004635801023085532, "loss": 0.1645, "step": 126090 }, { "epoch": 5.22, "grad_norm": 0.59375, "learning_rate": 0.00046357446535330625, "loss": 0.2181, "step": 126100 }, { "epoch": 5.22, "grad_norm": 0.462890625, "learning_rate": 0.00046356882799613534, "loss": 0.2156, "step": 126110 }, { "epoch": 5.22, "grad_norm": 0.330078125, "learning_rate": 0.00046356319023705084, "loss": 0.1976, "step": 126120 }, { "epoch": 5.22, "grad_norm": 0.2392578125, "learning_rate": 0.00046355755207606354, "loss": 0.1703, "step": 126130 }, { "epoch": 5.22, "grad_norm": 1.0625, "learning_rate": 0.0004635519135131839, "loss": 0.2423, "step": 126140 }, { "epoch": 5.23, "grad_norm": 2.1875, "learning_rate": 0.0004635462745484227, "loss": 0.1869, "step": 126150 }, { "epoch": 5.23, "grad_norm": 0.51171875, "learning_rate": 0.00046354063518179044, "loss": 0.2169, "step": 126160 }, { "epoch": 5.23, "grad_norm": 0.3203125, "learning_rate": 0.0004635349954132978, "loss": 0.2293, "step": 126170 }, { "epoch": 5.23, "grad_norm": 1.3359375, "learning_rate": 0.00046352935524295535, "loss": 0.175, "step": 126180 }, { "epoch": 5.23, "grad_norm": 0.9375, "learning_rate": 0.0004635237146707737, "loss": 0.2165, "step": 126190 }, { "epoch": 5.23, "grad_norm": 0.9375, "learning_rate": 0.0004635180736967635, "loss": 0.1311, "step": 126200 }, { "epoch": 5.23, "grad_norm": 0.6484375, "learning_rate": 0.0004635124323209353, "loss": 0.225, "step": 126210 }, { "epoch": 5.23, "grad_norm": 1.0, "learning_rate": 0.0004635067905432998, "loss": 0.1756, "step": 126220 }, { "epoch": 5.23, "grad_norm": 0.76171875, "learning_rate": 0.0004635011483638676, "loss": 0.2247, "step": 126230 }, { "epoch": 5.23, "grad_norm": 1.78125, "learning_rate": 0.0004634955057826492, "loss": 0.2158, "step": 126240 }, { "epoch": 5.23, "grad_norm": 1.0390625, "learning_rate": 0.0004634898627996554, "loss": 0.2414, "step": 126250 }, { "epoch": 5.23, "grad_norm": 0.384765625, "learning_rate": 0.0004634842194148967, "loss": 0.2023, "step": 126260 }, { "epoch": 5.23, "grad_norm": 0.63671875, "learning_rate": 0.00046347857562838374, "loss": 0.2233, "step": 126270 }, { "epoch": 5.23, "grad_norm": 0.466796875, "learning_rate": 0.00046347293144012716, "loss": 0.2131, "step": 126280 }, { "epoch": 5.23, "grad_norm": 0.162109375, "learning_rate": 0.0004634672868501376, "loss": 0.2312, "step": 126290 }, { "epoch": 5.23, "grad_norm": 0.6484375, "learning_rate": 0.00046346164185842565, "loss": 0.2405, "step": 126300 }, { "epoch": 5.23, "grad_norm": 0.18359375, "learning_rate": 0.0004634559964650019, "loss": 0.1948, "step": 126310 }, { "epoch": 5.23, "grad_norm": 0.33984375, "learning_rate": 0.00046345035066987705, "loss": 0.2388, "step": 126320 }, { "epoch": 5.23, "grad_norm": 0.4921875, "learning_rate": 0.00046344470447306173, "loss": 0.2394, "step": 126330 }, { "epoch": 5.23, "grad_norm": 1.109375, "learning_rate": 0.00046343905787456653, "loss": 0.2767, "step": 126340 }, { "epoch": 5.23, "grad_norm": 1.1015625, "learning_rate": 0.000463433410874402, "loss": 0.2343, "step": 126350 }, { "epoch": 5.23, "grad_norm": 0.474609375, "learning_rate": 0.00046342776347257887, "loss": 0.2502, "step": 126360 }, { "epoch": 5.23, "grad_norm": 1.703125, "learning_rate": 0.00046342211566910776, "loss": 0.1723, "step": 126370 }, { "epoch": 5.23, "grad_norm": 0.3203125, "learning_rate": 0.00046341646746399923, "loss": 0.1896, "step": 126380 }, { "epoch": 5.24, "grad_norm": 0.58203125, "learning_rate": 0.000463410818857264, "loss": 0.1898, "step": 126390 }, { "epoch": 5.24, "grad_norm": 1.5390625, "learning_rate": 0.00046340516984891256, "loss": 0.1387, "step": 126400 }, { "epoch": 5.24, "grad_norm": 0.625, "learning_rate": 0.00046339952043895574, "loss": 0.1858, "step": 126410 }, { "epoch": 5.24, "grad_norm": 0.52734375, "learning_rate": 0.0004633938706274041, "loss": 0.1832, "step": 126420 }, { "epoch": 5.24, "grad_norm": 1.8828125, "learning_rate": 0.0004633882204142681, "loss": 0.2311, "step": 126430 }, { "epoch": 5.24, "grad_norm": 0.69140625, "learning_rate": 0.00046338256979955864, "loss": 0.1915, "step": 126440 }, { "epoch": 5.24, "grad_norm": 1.171875, "learning_rate": 0.00046337691878328615, "loss": 0.2468, "step": 126450 }, { "epoch": 5.24, "grad_norm": 0.9921875, "learning_rate": 0.00046337126736546144, "loss": 0.1939, "step": 126460 }, { "epoch": 5.24, "grad_norm": 0.63671875, "learning_rate": 0.0004633656155460949, "loss": 0.194, "step": 126470 }, { "epoch": 5.24, "grad_norm": 1.0703125, "learning_rate": 0.00046335996332519744, "loss": 0.2073, "step": 126480 }, { "epoch": 5.24, "grad_norm": 0.375, "learning_rate": 0.0004633543107027795, "loss": 0.1957, "step": 126490 }, { "epoch": 5.24, "grad_norm": 1.1953125, "learning_rate": 0.0004633486576788518, "loss": 0.2257, "step": 126500 }, { "epoch": 5.24, "grad_norm": 0.8828125, "learning_rate": 0.000463343004253425, "loss": 0.2019, "step": 126510 }, { "epoch": 5.24, "grad_norm": 0.5078125, "learning_rate": 0.00046333735042650967, "loss": 0.176, "step": 126520 }, { "epoch": 5.24, "grad_norm": 0.421875, "learning_rate": 0.00046333169619811645, "loss": 0.201, "step": 126530 }, { "epoch": 5.24, "grad_norm": 1.1953125, "learning_rate": 0.00046332604156825607, "loss": 0.2304, "step": 126540 }, { "epoch": 5.24, "grad_norm": 0.29296875, "learning_rate": 0.00046332038653693915, "loss": 0.2219, "step": 126550 }, { "epoch": 5.24, "grad_norm": 0.37109375, "learning_rate": 0.0004633147311041762, "loss": 0.2038, "step": 126560 }, { "epoch": 5.24, "grad_norm": 0.9609375, "learning_rate": 0.000463309075269978, "loss": 0.2059, "step": 126570 }, { "epoch": 5.24, "grad_norm": 0.76953125, "learning_rate": 0.00046330341903435526, "loss": 0.2192, "step": 126580 }, { "epoch": 5.24, "grad_norm": 0.98046875, "learning_rate": 0.0004632977623973184, "loss": 0.2255, "step": 126590 }, { "epoch": 5.24, "grad_norm": 0.6015625, "learning_rate": 0.0004632921053588782, "loss": 0.1786, "step": 126600 }, { "epoch": 5.24, "grad_norm": 0.7421875, "learning_rate": 0.00046328644791904526, "loss": 0.1789, "step": 126610 }, { "epoch": 5.24, "grad_norm": 0.3984375, "learning_rate": 0.0004632807900778304, "loss": 0.2215, "step": 126620 }, { "epoch": 5.24, "grad_norm": 0.765625, "learning_rate": 0.0004632751318352441, "loss": 0.2619, "step": 126630 }, { "epoch": 5.25, "grad_norm": 0.96875, "learning_rate": 0.00046326947319129684, "loss": 0.2046, "step": 126640 }, { "epoch": 5.25, "grad_norm": 0.8125, "learning_rate": 0.0004632638141459996, "loss": 0.1796, "step": 126650 }, { "epoch": 5.25, "grad_norm": 0.82421875, "learning_rate": 0.00046325815469936294, "loss": 0.218, "step": 126660 }, { "epoch": 5.25, "grad_norm": 0.57421875, "learning_rate": 0.0004632524948513974, "loss": 0.2308, "step": 126670 }, { "epoch": 5.25, "grad_norm": 0.84765625, "learning_rate": 0.0004632468346021137, "loss": 0.2028, "step": 126680 }, { "epoch": 5.25, "grad_norm": 1.234375, "learning_rate": 0.00046324117395152244, "loss": 0.1826, "step": 126690 }, { "epoch": 5.25, "grad_norm": 0.5, "learning_rate": 0.0004632355128996344, "loss": 0.2404, "step": 126700 }, { "epoch": 5.25, "grad_norm": 0.7734375, "learning_rate": 0.0004632298514464601, "loss": 0.1723, "step": 126710 }, { "epoch": 5.25, "grad_norm": 0.53125, "learning_rate": 0.0004632241895920103, "loss": 0.1816, "step": 126720 }, { "epoch": 5.25, "grad_norm": 0.67578125, "learning_rate": 0.00046321852733629556, "loss": 0.1843, "step": 126730 }, { "epoch": 5.25, "grad_norm": 0.83203125, "learning_rate": 0.0004632128646793266, "loss": 0.2442, "step": 126740 }, { "epoch": 5.25, "grad_norm": 0.58203125, "learning_rate": 0.00046320720162111394, "loss": 0.165, "step": 126750 }, { "epoch": 5.25, "grad_norm": 0.64453125, "learning_rate": 0.0004632015381616685, "loss": 0.2186, "step": 126760 }, { "epoch": 5.25, "grad_norm": 0.41015625, "learning_rate": 0.00046319587430100075, "loss": 0.1294, "step": 126770 }, { "epoch": 5.25, "grad_norm": 0.451171875, "learning_rate": 0.00046319021003912134, "loss": 0.2167, "step": 126780 }, { "epoch": 5.25, "grad_norm": 0.451171875, "learning_rate": 0.00046318454537604104, "loss": 0.2226, "step": 126790 }, { "epoch": 5.25, "grad_norm": 0.384765625, "learning_rate": 0.0004631788803117704, "loss": 0.1842, "step": 126800 }, { "epoch": 5.25, "grad_norm": 1.1171875, "learning_rate": 0.00046317321484632014, "loss": 0.2069, "step": 126810 }, { "epoch": 5.25, "grad_norm": 1.078125, "learning_rate": 0.00046316754897970095, "loss": 0.2602, "step": 126820 }, { "epoch": 5.25, "grad_norm": 1.4140625, "learning_rate": 0.0004631618827119234, "loss": 0.2237, "step": 126830 }, { "epoch": 5.25, "grad_norm": 0.9453125, "learning_rate": 0.00046315621604299816, "loss": 0.2289, "step": 126840 }, { "epoch": 5.25, "grad_norm": 0.59375, "learning_rate": 0.0004631505489729361, "loss": 0.2499, "step": 126850 }, { "epoch": 5.25, "grad_norm": 0.796875, "learning_rate": 0.0004631448815017476, "loss": 0.196, "step": 126860 }, { "epoch": 5.25, "grad_norm": 1.2578125, "learning_rate": 0.00046313921362944345, "loss": 0.1909, "step": 126870 }, { "epoch": 5.26, "grad_norm": 0.625, "learning_rate": 0.0004631335453560344, "loss": 0.1832, "step": 126880 }, { "epoch": 5.26, "grad_norm": 0.546875, "learning_rate": 0.00046312787668153094, "loss": 0.2506, "step": 126890 }, { "epoch": 5.26, "grad_norm": 0.2265625, "learning_rate": 0.00046312220760594394, "loss": 0.1778, "step": 126900 }, { "epoch": 5.26, "grad_norm": 0.6171875, "learning_rate": 0.0004631165381292838, "loss": 0.2135, "step": 126910 }, { "epoch": 5.26, "grad_norm": 1.015625, "learning_rate": 0.00046311086825156144, "loss": 0.2729, "step": 126920 }, { "epoch": 5.26, "grad_norm": 0.54296875, "learning_rate": 0.0004631051979727875, "loss": 0.203, "step": 126930 }, { "epoch": 5.26, "grad_norm": 2.5, "learning_rate": 0.00046309952729297254, "loss": 0.2131, "step": 126940 }, { "epoch": 5.26, "grad_norm": 0.47265625, "learning_rate": 0.0004630938562121272, "loss": 0.3026, "step": 126950 }, { "epoch": 5.26, "grad_norm": 0.671875, "learning_rate": 0.0004630881847302624, "loss": 0.2168, "step": 126960 }, { "epoch": 5.26, "grad_norm": 0.388671875, "learning_rate": 0.00046308251284738855, "loss": 0.1945, "step": 126970 }, { "epoch": 5.26, "grad_norm": 0.90625, "learning_rate": 0.0004630768405635164, "loss": 0.2472, "step": 126980 }, { "epoch": 5.26, "grad_norm": 0.9140625, "learning_rate": 0.0004630711678786567, "loss": 0.2101, "step": 126990 }, { "epoch": 5.26, "grad_norm": 1.7109375, "learning_rate": 0.00046306549479282, "loss": 0.2476, "step": 127000 }, { "epoch": 5.26, "grad_norm": 0.486328125, "learning_rate": 0.00046305982130601716, "loss": 0.2312, "step": 127010 }, { "epoch": 5.26, "grad_norm": 0.93359375, "learning_rate": 0.00046305414741825866, "loss": 0.2249, "step": 127020 }, { "epoch": 5.26, "grad_norm": 0.50390625, "learning_rate": 0.00046304847312955526, "loss": 0.179, "step": 127030 }, { "epoch": 5.26, "grad_norm": 1.0078125, "learning_rate": 0.0004630427984399177, "loss": 0.2185, "step": 127040 }, { "epoch": 5.26, "grad_norm": 0.000141143798828125, "learning_rate": 0.00046303712334935657, "loss": 0.1818, "step": 127050 }, { "epoch": 5.26, "grad_norm": 0.79296875, "learning_rate": 0.00046303144785788254, "loss": 0.2109, "step": 127060 }, { "epoch": 5.26, "grad_norm": 0.60546875, "learning_rate": 0.0004630257719655064, "loss": 0.2266, "step": 127070 }, { "epoch": 5.26, "grad_norm": 0.5, "learning_rate": 0.0004630200956722387, "loss": 0.1917, "step": 127080 }, { "epoch": 5.26, "grad_norm": 0.349609375, "learning_rate": 0.00046301441897809027, "loss": 0.1914, "step": 127090 }, { "epoch": 5.26, "grad_norm": 1.46875, "learning_rate": 0.00046300874188307163, "loss": 0.2055, "step": 127100 }, { "epoch": 5.26, "grad_norm": 0.78125, "learning_rate": 0.0004630030643871935, "loss": 0.2101, "step": 127110 }, { "epoch": 5.27, "grad_norm": 0.51953125, "learning_rate": 0.0004629973864904667, "loss": 0.1741, "step": 127120 }, { "epoch": 5.27, "grad_norm": 1.796875, "learning_rate": 0.00046299170819290184, "loss": 0.2262, "step": 127130 }, { "epoch": 5.27, "grad_norm": 1.09375, "learning_rate": 0.0004629860294945095, "loss": 0.1868, "step": 127140 }, { "epoch": 5.27, "grad_norm": 0.734375, "learning_rate": 0.00046298035039530044, "loss": 0.1813, "step": 127150 }, { "epoch": 5.27, "grad_norm": 0.26953125, "learning_rate": 0.00046297467089528546, "loss": 0.1926, "step": 127160 }, { "epoch": 5.27, "grad_norm": 0.640625, "learning_rate": 0.0004629689909944751, "loss": 0.2039, "step": 127170 }, { "epoch": 5.27, "grad_norm": 0.55859375, "learning_rate": 0.00046296331069288005, "loss": 0.3192, "step": 127180 }, { "epoch": 5.27, "grad_norm": 0.44140625, "learning_rate": 0.0004629576299905111, "loss": 0.1992, "step": 127190 }, { "epoch": 5.27, "grad_norm": 0.5859375, "learning_rate": 0.0004629519488873789, "loss": 0.1839, "step": 127200 }, { "epoch": 5.27, "grad_norm": 0.486328125, "learning_rate": 0.0004629462673834941, "loss": 0.2168, "step": 127210 }, { "epoch": 5.27, "grad_norm": 0.71875, "learning_rate": 0.00046294058547886744, "loss": 0.2272, "step": 127220 }, { "epoch": 5.27, "grad_norm": 0.490234375, "learning_rate": 0.0004629349031735096, "loss": 0.2251, "step": 127230 }, { "epoch": 5.27, "grad_norm": 0.86328125, "learning_rate": 0.0004629292204674312, "loss": 0.2238, "step": 127240 }, { "epoch": 5.27, "grad_norm": 0.66015625, "learning_rate": 0.0004629235373606431, "loss": 0.1782, "step": 127250 }, { "epoch": 5.27, "grad_norm": 0.5390625, "learning_rate": 0.00046291785385315574, "loss": 0.2076, "step": 127260 }, { "epoch": 5.27, "grad_norm": 0.3359375, "learning_rate": 0.0004629121699449801, "loss": 0.1794, "step": 127270 }, { "epoch": 5.27, "grad_norm": 0.4453125, "learning_rate": 0.00046290648563612676, "loss": 0.2129, "step": 127280 }, { "epoch": 5.27, "grad_norm": 0.412109375, "learning_rate": 0.00046290080092660637, "loss": 0.1704, "step": 127290 }, { "epoch": 5.27, "grad_norm": 0.453125, "learning_rate": 0.00046289511581642964, "loss": 0.1781, "step": 127300 }, { "epoch": 5.27, "grad_norm": 1.75, "learning_rate": 0.00046288943030560736, "loss": 0.1931, "step": 127310 }, { "epoch": 5.27, "grad_norm": 0.5, "learning_rate": 0.0004628837443941501, "loss": 0.232, "step": 127320 }, { "epoch": 5.27, "grad_norm": 1.2578125, "learning_rate": 0.0004628780580820686, "loss": 0.1811, "step": 127330 }, { "epoch": 5.27, "grad_norm": 0.2236328125, "learning_rate": 0.00046287237136937364, "loss": 0.1694, "step": 127340 }, { "epoch": 5.27, "grad_norm": 0.451171875, "learning_rate": 0.00046286668425607594, "loss": 0.2077, "step": 127350 }, { "epoch": 5.28, "grad_norm": 1.15625, "learning_rate": 0.00046286099674218596, "loss": 0.204, "step": 127360 }, { "epoch": 5.28, "grad_norm": 0.9375, "learning_rate": 0.0004628553088277147, "loss": 0.269, "step": 127370 }, { "epoch": 5.28, "grad_norm": 0.82421875, "learning_rate": 0.00046284962051267264, "loss": 0.2171, "step": 127380 }, { "epoch": 5.28, "grad_norm": 1.0234375, "learning_rate": 0.0004628439317970706, "loss": 0.2365, "step": 127390 }, { "epoch": 5.28, "grad_norm": 0.7265625, "learning_rate": 0.00046283824268091934, "loss": 0.1926, "step": 127400 }, { "epoch": 5.28, "grad_norm": 0.77734375, "learning_rate": 0.00046283255316422947, "loss": 0.2006, "step": 127410 }, { "epoch": 5.28, "grad_norm": 1.59375, "learning_rate": 0.0004628268632470117, "loss": 0.1723, "step": 127420 }, { "epoch": 5.28, "grad_norm": 0.7734375, "learning_rate": 0.0004628211729292767, "loss": 0.192, "step": 127430 }, { "epoch": 5.28, "grad_norm": 1.21875, "learning_rate": 0.0004628154822110353, "loss": 0.2332, "step": 127440 }, { "epoch": 5.28, "grad_norm": 0.515625, "learning_rate": 0.0004628097910922982, "loss": 0.1922, "step": 127450 }, { "epoch": 5.28, "grad_norm": 0.365234375, "learning_rate": 0.0004628040995730759, "loss": 0.2112, "step": 127460 }, { "epoch": 5.28, "grad_norm": 0.2138671875, "learning_rate": 0.00046279840765337937, "loss": 0.2347, "step": 127470 }, { "epoch": 5.28, "grad_norm": 0.84765625, "learning_rate": 0.0004627927153332192, "loss": 0.2116, "step": 127480 }, { "epoch": 5.28, "grad_norm": 0.6015625, "learning_rate": 0.00046278702261260606, "loss": 0.2439, "step": 127490 }, { "epoch": 5.28, "grad_norm": 0.515625, "learning_rate": 0.00046278132949155085, "loss": 0.2273, "step": 127500 }, { "epoch": 5.28, "grad_norm": 0.9375, "learning_rate": 0.00046277563597006406, "loss": 0.2144, "step": 127510 }, { "epoch": 5.28, "grad_norm": 0.451171875, "learning_rate": 0.0004627699420481565, "loss": 0.2224, "step": 127520 }, { "epoch": 5.28, "grad_norm": 0.45703125, "learning_rate": 0.0004627642477258389, "loss": 0.2046, "step": 127530 }, { "epoch": 5.28, "grad_norm": 1.3828125, "learning_rate": 0.00046275855300312204, "loss": 0.2209, "step": 127540 }, { "epoch": 5.28, "grad_norm": 1.0, "learning_rate": 0.00046275285788001646, "loss": 0.228, "step": 127550 }, { "epoch": 5.28, "grad_norm": 0.9609375, "learning_rate": 0.000462747162356533, "loss": 0.2382, "step": 127560 }, { "epoch": 5.28, "grad_norm": 1.3515625, "learning_rate": 0.0004627414664326824, "loss": 0.1869, "step": 127570 }, { "epoch": 5.28, "grad_norm": 0.421875, "learning_rate": 0.0004627357701084753, "loss": 0.2711, "step": 127580 }, { "epoch": 5.28, "grad_norm": 0.765625, "learning_rate": 0.00046273007338392243, "loss": 0.1879, "step": 127590 }, { "epoch": 5.29, "grad_norm": 1.703125, "learning_rate": 0.0004627243762590346, "loss": 0.1574, "step": 127600 }, { "epoch": 5.29, "grad_norm": 0.16796875, "learning_rate": 0.0004627186787338224, "loss": 0.1457, "step": 127610 }, { "epoch": 5.29, "grad_norm": 0.70703125, "learning_rate": 0.0004627129808082966, "loss": 0.221, "step": 127620 }, { "epoch": 5.29, "grad_norm": 0.451171875, "learning_rate": 0.000462707282482468, "loss": 0.2447, "step": 127630 }, { "epoch": 5.29, "grad_norm": 0.498046875, "learning_rate": 0.0004627015837563473, "loss": 0.2106, "step": 127640 }, { "epoch": 5.29, "grad_norm": 1.015625, "learning_rate": 0.0004626958846299451, "loss": 0.26, "step": 127650 }, { "epoch": 5.29, "grad_norm": 0.703125, "learning_rate": 0.00046269018510327226, "loss": 0.1906, "step": 127660 }, { "epoch": 5.29, "grad_norm": 0.59375, "learning_rate": 0.0004626844851763394, "loss": 0.2656, "step": 127670 }, { "epoch": 5.29, "grad_norm": 0.515625, "learning_rate": 0.00046267878484915735, "loss": 0.1987, "step": 127680 }, { "epoch": 5.29, "grad_norm": 0.91015625, "learning_rate": 0.00046267308412173685, "loss": 0.2102, "step": 127690 }, { "epoch": 5.29, "grad_norm": 0.42578125, "learning_rate": 0.0004626673829940885, "loss": 0.1896, "step": 127700 }, { "epoch": 5.29, "grad_norm": 0.392578125, "learning_rate": 0.0004626616814662232, "loss": 0.203, "step": 127710 }, { "epoch": 5.29, "grad_norm": 0.47265625, "learning_rate": 0.00046265597953815146, "loss": 0.2171, "step": 127720 }, { "epoch": 5.29, "grad_norm": 0.81640625, "learning_rate": 0.00046265027720988417, "loss": 0.1849, "step": 127730 }, { "epoch": 5.29, "grad_norm": 0.041015625, "learning_rate": 0.00046264457448143205, "loss": 0.1763, "step": 127740 }, { "epoch": 5.29, "grad_norm": 0.90234375, "learning_rate": 0.0004626388713528058, "loss": 0.2171, "step": 127750 }, { "epoch": 5.29, "grad_norm": 1.3203125, "learning_rate": 0.0004626331678240161, "loss": 0.1707, "step": 127760 }, { "epoch": 5.29, "grad_norm": 0.197265625, "learning_rate": 0.00046262746389507384, "loss": 0.1711, "step": 127770 }, { "epoch": 5.29, "grad_norm": 1.265625, "learning_rate": 0.0004626217595659896, "loss": 0.2858, "step": 127780 }, { "epoch": 5.29, "grad_norm": 0.361328125, "learning_rate": 0.0004626160548367742, "loss": 0.2448, "step": 127790 }, { "epoch": 5.29, "grad_norm": 0.392578125, "learning_rate": 0.00046261034970743826, "loss": 0.1761, "step": 127800 }, { "epoch": 5.29, "grad_norm": 0.75, "learning_rate": 0.00046260464417799266, "loss": 0.2231, "step": 127810 }, { "epoch": 5.29, "grad_norm": 0.6484375, "learning_rate": 0.00046259893824844806, "loss": 0.2038, "step": 127820 }, { "epoch": 5.29, "grad_norm": 0.59765625, "learning_rate": 0.00046259323191881523, "loss": 0.2055, "step": 127830 }, { "epoch": 5.3, "grad_norm": 1.7734375, "learning_rate": 0.0004625875251891049, "loss": 0.2029, "step": 127840 }, { "epoch": 5.3, "grad_norm": 0.185546875, "learning_rate": 0.0004625818180593278, "loss": 0.2097, "step": 127850 }, { "epoch": 5.3, "grad_norm": 0.5859375, "learning_rate": 0.00046257611052949465, "loss": 0.2139, "step": 127860 }, { "epoch": 5.3, "grad_norm": 0.478515625, "learning_rate": 0.0004625704025996162, "loss": 0.2233, "step": 127870 }, { "epoch": 5.3, "grad_norm": 1.328125, "learning_rate": 0.0004625646942697033, "loss": 0.2305, "step": 127880 }, { "epoch": 5.3, "grad_norm": 0.828125, "learning_rate": 0.0004625589855397666, "loss": 0.233, "step": 127890 }, { "epoch": 5.3, "grad_norm": 0.796875, "learning_rate": 0.0004625532764098168, "loss": 0.181, "step": 127900 }, { "epoch": 5.3, "grad_norm": 0.890625, "learning_rate": 0.00046254756687986467, "loss": 0.1709, "step": 127910 }, { "epoch": 5.3, "grad_norm": 0.59375, "learning_rate": 0.000462541856949921, "loss": 0.169, "step": 127920 }, { "epoch": 5.3, "grad_norm": 0.61328125, "learning_rate": 0.0004625361466199964, "loss": 0.2227, "step": 127930 }, { "epoch": 5.3, "grad_norm": 0.65625, "learning_rate": 0.0004625304358901019, "loss": 0.2193, "step": 127940 }, { "epoch": 5.3, "grad_norm": 1.5078125, "learning_rate": 0.000462524724760248, "loss": 0.1874, "step": 127950 }, { "epoch": 5.3, "grad_norm": 0.8671875, "learning_rate": 0.0004625190132304455, "loss": 0.2301, "step": 127960 }, { "epoch": 5.3, "grad_norm": 0.80859375, "learning_rate": 0.00046251330130070515, "loss": 0.2482, "step": 127970 }, { "epoch": 5.3, "grad_norm": 0.3203125, "learning_rate": 0.0004625075889710377, "loss": 0.1928, "step": 127980 }, { "epoch": 5.3, "grad_norm": 0.447265625, "learning_rate": 0.000462501876241454, "loss": 0.1628, "step": 127990 }, { "epoch": 5.3, "grad_norm": 2.984375, "learning_rate": 0.0004624961631119647, "loss": 0.1917, "step": 128000 }, { "epoch": 5.3, "grad_norm": 0.54296875, "learning_rate": 0.0004624904495825805, "loss": 0.1556, "step": 128010 }, { "epoch": 5.3, "grad_norm": 1.0234375, "learning_rate": 0.0004624847356533123, "loss": 0.2249, "step": 128020 }, { "epoch": 5.3, "grad_norm": 0.78515625, "learning_rate": 0.00046247902132417074, "loss": 0.2153, "step": 128030 }, { "epoch": 5.3, "grad_norm": 0.83984375, "learning_rate": 0.0004624733065951666, "loss": 0.2027, "step": 128040 }, { "epoch": 5.3, "grad_norm": 0.984375, "learning_rate": 0.00046246759146631065, "loss": 0.2315, "step": 128050 }, { "epoch": 5.3, "grad_norm": 0.455078125, "learning_rate": 0.00046246187593761357, "loss": 0.1946, "step": 128060 }, { "epoch": 5.3, "grad_norm": 1.0078125, "learning_rate": 0.0004624561600090863, "loss": 0.218, "step": 128070 }, { "epoch": 5.31, "grad_norm": 0.36328125, "learning_rate": 0.0004624504436807394, "loss": 0.159, "step": 128080 }, { "epoch": 5.31, "grad_norm": 0.84375, "learning_rate": 0.00046244472695258375, "loss": 0.1639, "step": 128090 }, { "epoch": 5.31, "grad_norm": 0.234375, "learning_rate": 0.0004624390098246301, "loss": 0.1904, "step": 128100 }, { "epoch": 5.31, "grad_norm": 0.4453125, "learning_rate": 0.0004624332922968891, "loss": 0.2017, "step": 128110 }, { "epoch": 5.31, "grad_norm": 0.61328125, "learning_rate": 0.0004624275743693716, "loss": 0.2079, "step": 128120 }, { "epoch": 5.31, "grad_norm": 0.6484375, "learning_rate": 0.00046242185604208837, "loss": 0.1926, "step": 128130 }, { "epoch": 5.31, "grad_norm": 0.6640625, "learning_rate": 0.0004624161373150501, "loss": 0.2102, "step": 128140 }, { "epoch": 5.31, "grad_norm": 1.1875, "learning_rate": 0.00046241041818826766, "loss": 0.1555, "step": 128150 }, { "epoch": 5.31, "grad_norm": 0.5546875, "learning_rate": 0.00046240469866175174, "loss": 0.1652, "step": 128160 }, { "epoch": 5.31, "grad_norm": 0.69921875, "learning_rate": 0.0004623989787355131, "loss": 0.1766, "step": 128170 }, { "epoch": 5.31, "grad_norm": 0.60546875, "learning_rate": 0.0004623932584095625, "loss": 0.1758, "step": 128180 }, { "epoch": 5.31, "grad_norm": 0.7265625, "learning_rate": 0.0004623875376839107, "loss": 0.2048, "step": 128190 }, { "epoch": 5.31, "grad_norm": 0.3671875, "learning_rate": 0.0004623818165585686, "loss": 0.2171, "step": 128200 }, { "epoch": 5.31, "grad_norm": 0.92578125, "learning_rate": 0.0004623760950335467, "loss": 0.1991, "step": 128210 }, { "epoch": 5.31, "grad_norm": 0.625, "learning_rate": 0.000462370373108856, "loss": 0.1913, "step": 128220 }, { "epoch": 5.31, "grad_norm": 1.6171875, "learning_rate": 0.0004623646507845073, "loss": 0.1698, "step": 128230 }, { "epoch": 5.31, "grad_norm": 0.890625, "learning_rate": 0.0004623589280605111, "loss": 0.2173, "step": 128240 }, { "epoch": 5.31, "grad_norm": 0.486328125, "learning_rate": 0.0004623532049368784, "loss": 0.1996, "step": 128250 }, { "epoch": 5.31, "grad_norm": 0.53515625, "learning_rate": 0.00046234748141361996, "loss": 0.2039, "step": 128260 }, { "epoch": 5.31, "grad_norm": 0.48046875, "learning_rate": 0.00046234175749074635, "loss": 0.2101, "step": 128270 }, { "epoch": 5.31, "grad_norm": 1.4375, "learning_rate": 0.0004623360331682686, "loss": 0.1908, "step": 128280 }, { "epoch": 5.31, "grad_norm": 1.5625, "learning_rate": 0.0004623303084461973, "loss": 0.2395, "step": 128290 }, { "epoch": 5.31, "grad_norm": 0.73828125, "learning_rate": 0.0004623245833245433, "loss": 0.204, "step": 128300 }, { "epoch": 5.31, "grad_norm": 0.78515625, "learning_rate": 0.0004623188578033174, "loss": 0.1837, "step": 128310 }, { "epoch": 5.31, "grad_norm": 0.6328125, "learning_rate": 0.00046231313188253034, "loss": 0.2211, "step": 128320 }, { "epoch": 5.32, "grad_norm": 0.546875, "learning_rate": 0.00046230740556219284, "loss": 0.1564, "step": 128330 }, { "epoch": 5.32, "grad_norm": 0.49609375, "learning_rate": 0.0004623016788423158, "loss": 0.1948, "step": 128340 }, { "epoch": 5.32, "grad_norm": 0.91015625, "learning_rate": 0.00046229595172290984, "loss": 0.1744, "step": 128350 }, { "epoch": 5.32, "grad_norm": 0.30859375, "learning_rate": 0.0004622902242039859, "loss": 0.201, "step": 128360 }, { "epoch": 5.32, "grad_norm": 0.75, "learning_rate": 0.0004622844962855546, "loss": 0.2436, "step": 128370 }, { "epoch": 5.32, "grad_norm": 0.5703125, "learning_rate": 0.00046227876796762693, "loss": 0.1822, "step": 128380 }, { "epoch": 5.32, "grad_norm": 0.57421875, "learning_rate": 0.0004622730392502134, "loss": 0.2262, "step": 128390 }, { "epoch": 5.32, "grad_norm": 0.59765625, "learning_rate": 0.00046226731013332504, "loss": 0.2592, "step": 128400 }, { "epoch": 5.32, "grad_norm": 0.9921875, "learning_rate": 0.00046226158061697247, "loss": 0.1754, "step": 128410 }, { "epoch": 5.32, "grad_norm": 0.82421875, "learning_rate": 0.0004622558507011666, "loss": 0.2149, "step": 128420 }, { "epoch": 5.32, "grad_norm": 1.40625, "learning_rate": 0.00046225012038591807, "loss": 0.2334, "step": 128430 }, { "epoch": 5.32, "grad_norm": 1.0078125, "learning_rate": 0.0004622443896712377, "loss": 0.1976, "step": 128440 }, { "epoch": 5.32, "grad_norm": 0.8515625, "learning_rate": 0.00046223865855713636, "loss": 0.2439, "step": 128450 }, { "epoch": 5.32, "grad_norm": 0.435546875, "learning_rate": 0.00046223292704362484, "loss": 0.2001, "step": 128460 }, { "epoch": 5.32, "grad_norm": 0.609375, "learning_rate": 0.0004622271951307138, "loss": 0.2086, "step": 128470 }, { "epoch": 5.32, "grad_norm": 0.4765625, "learning_rate": 0.0004622214628184141, "loss": 0.1986, "step": 128480 }, { "epoch": 5.32, "grad_norm": 0.1943359375, "learning_rate": 0.00046221573010673655, "loss": 0.1772, "step": 128490 }, { "epoch": 5.32, "grad_norm": 0.1259765625, "learning_rate": 0.0004622099969956919, "loss": 0.2376, "step": 128500 }, { "epoch": 5.32, "grad_norm": 1.0234375, "learning_rate": 0.00046220426348529095, "loss": 0.1673, "step": 128510 }, { "epoch": 5.32, "grad_norm": 0.376953125, "learning_rate": 0.0004621985295755445, "loss": 0.1942, "step": 128520 }, { "epoch": 5.32, "grad_norm": 1.015625, "learning_rate": 0.0004621927952664633, "loss": 0.2042, "step": 128530 }, { "epoch": 5.32, "grad_norm": 1.4609375, "learning_rate": 0.00046218706055805827, "loss": 0.2447, "step": 128540 }, { "epoch": 5.32, "grad_norm": 0.80859375, "learning_rate": 0.00046218132545033995, "loss": 0.2204, "step": 128550 }, { "epoch": 5.32, "grad_norm": 0.1982421875, "learning_rate": 0.00046217558994331946, "loss": 0.2065, "step": 128560 }, { "epoch": 5.33, "grad_norm": 1.421875, "learning_rate": 0.0004621698540370073, "loss": 0.2149, "step": 128570 }, { "epoch": 5.33, "grad_norm": 0.66796875, "learning_rate": 0.00046216411773141445, "loss": 0.2163, "step": 128580 }, { "epoch": 5.33, "grad_norm": 1.484375, "learning_rate": 0.0004621583810265516, "loss": 0.241, "step": 128590 }, { "epoch": 5.33, "grad_norm": 0.96484375, "learning_rate": 0.0004621526439224296, "loss": 0.2561, "step": 128600 }, { "epoch": 5.33, "grad_norm": 1.2265625, "learning_rate": 0.0004621469064190592, "loss": 0.2288, "step": 128610 }, { "epoch": 5.33, "grad_norm": 0.33203125, "learning_rate": 0.00046214116851645134, "loss": 0.189, "step": 128620 }, { "epoch": 5.33, "grad_norm": 0.79296875, "learning_rate": 0.0004621354302146166, "loss": 0.2479, "step": 128630 }, { "epoch": 5.33, "grad_norm": 0.5625, "learning_rate": 0.00046212969151356595, "loss": 0.2659, "step": 128640 }, { "epoch": 5.33, "grad_norm": 0.478515625, "learning_rate": 0.0004621239524133101, "loss": 0.214, "step": 128650 }, { "epoch": 5.33, "grad_norm": 0.49609375, "learning_rate": 0.0004621182129138599, "loss": 0.2093, "step": 128660 }, { "epoch": 5.33, "grad_norm": 0.69140625, "learning_rate": 0.00046211247301522615, "loss": 0.2241, "step": 128670 }, { "epoch": 5.33, "grad_norm": 0.49609375, "learning_rate": 0.00046210673271741956, "loss": 0.2067, "step": 128680 }, { "epoch": 5.33, "grad_norm": 0.5078125, "learning_rate": 0.0004621009920204511, "loss": 0.1674, "step": 128690 }, { "epoch": 5.33, "grad_norm": 0.78125, "learning_rate": 0.0004620952509243314, "loss": 0.2117, "step": 128700 }, { "epoch": 5.33, "grad_norm": 0.47265625, "learning_rate": 0.00046208950942907136, "loss": 0.1988, "step": 128710 }, { "epoch": 5.33, "grad_norm": 0.6875, "learning_rate": 0.0004620837675346817, "loss": 0.2667, "step": 128720 }, { "epoch": 5.33, "grad_norm": 0.9140625, "learning_rate": 0.00046207802524117345, "loss": 0.2061, "step": 128730 }, { "epoch": 5.33, "grad_norm": 0.5625, "learning_rate": 0.00046207228254855715, "loss": 0.2094, "step": 128740 }, { "epoch": 5.33, "grad_norm": 1.2421875, "learning_rate": 0.00046206653945684374, "loss": 0.1592, "step": 128750 }, { "epoch": 5.33, "grad_norm": 0.52734375, "learning_rate": 0.000462060795966044, "loss": 0.2182, "step": 128760 }, { "epoch": 5.33, "grad_norm": 0.5546875, "learning_rate": 0.00046205505207616874, "loss": 0.2022, "step": 128770 }, { "epoch": 5.33, "grad_norm": 0.83984375, "learning_rate": 0.00046204930778722883, "loss": 0.1991, "step": 128780 }, { "epoch": 5.33, "grad_norm": 0.6953125, "learning_rate": 0.00046204356309923497, "loss": 0.1961, "step": 128790 }, { "epoch": 5.33, "grad_norm": 0.279296875, "learning_rate": 0.000462037818012198, "loss": 0.2121, "step": 128800 }, { "epoch": 5.34, "grad_norm": 0.5234375, "learning_rate": 0.0004620320725261288, "loss": 0.1856, "step": 128810 }, { "epoch": 5.34, "grad_norm": 1.25, "learning_rate": 0.0004620263266410381, "loss": 0.2374, "step": 128820 }, { "epoch": 5.34, "grad_norm": 0.396484375, "learning_rate": 0.00046202058035693677, "loss": 0.2321, "step": 128830 }, { "epoch": 5.34, "grad_norm": 0.69140625, "learning_rate": 0.00046201483367383554, "loss": 0.1902, "step": 128840 }, { "epoch": 5.34, "grad_norm": 0.96875, "learning_rate": 0.0004620090865917455, "loss": 0.2183, "step": 128850 }, { "epoch": 5.34, "grad_norm": 0.625, "learning_rate": 0.00046200333911067704, "loss": 0.1763, "step": 128860 }, { "epoch": 5.34, "grad_norm": 1.1875, "learning_rate": 0.00046199759123064123, "loss": 0.1793, "step": 128870 }, { "epoch": 5.34, "grad_norm": 0.73828125, "learning_rate": 0.0004619918429516489, "loss": 0.2057, "step": 128880 }, { "epoch": 5.34, "grad_norm": 0.302734375, "learning_rate": 0.00046198609427371075, "loss": 0.1565, "step": 128890 }, { "epoch": 5.34, "grad_norm": 1.1796875, "learning_rate": 0.0004619803451968377, "loss": 0.2003, "step": 128900 }, { "epoch": 5.34, "grad_norm": 0.26953125, "learning_rate": 0.0004619745957210405, "loss": 0.1509, "step": 128910 }, { "epoch": 5.34, "grad_norm": 0.5703125, "learning_rate": 0.00046196884584633004, "loss": 0.164, "step": 128920 }, { "epoch": 5.34, "grad_norm": 1.828125, "learning_rate": 0.0004619630955727171, "loss": 0.2489, "step": 128930 }, { "epoch": 5.34, "grad_norm": 0.796875, "learning_rate": 0.00046195734490021254, "loss": 0.254, "step": 128940 }, { "epoch": 5.34, "grad_norm": 0.93359375, "learning_rate": 0.00046195159382882714, "loss": 0.252, "step": 128950 }, { "epoch": 5.34, "grad_norm": 0.69140625, "learning_rate": 0.00046194584235857163, "loss": 0.2225, "step": 128960 }, { "epoch": 5.34, "grad_norm": 1.2578125, "learning_rate": 0.00046194009048945706, "loss": 0.198, "step": 128970 }, { "epoch": 5.34, "grad_norm": 0.703125, "learning_rate": 0.000461934338221494, "loss": 0.2139, "step": 128980 }, { "epoch": 5.34, "grad_norm": 0.5859375, "learning_rate": 0.0004619285855546935, "loss": 0.2321, "step": 128990 }, { "epoch": 5.34, "grad_norm": 0.87890625, "learning_rate": 0.00046192283248906626, "loss": 0.2587, "step": 129000 }, { "epoch": 5.34, "grad_norm": 0.8359375, "learning_rate": 0.0004619170790246231, "loss": 0.1752, "step": 129010 }, { "epoch": 5.34, "grad_norm": 1.0078125, "learning_rate": 0.0004619113251613749, "loss": 0.2364, "step": 129020 }, { "epoch": 5.34, "grad_norm": 0.2265625, "learning_rate": 0.0004619055708993325, "loss": 0.242, "step": 129030 }, { "epoch": 5.34, "grad_norm": 0.7265625, "learning_rate": 0.00046189981623850673, "loss": 0.2195, "step": 129040 }, { "epoch": 5.35, "grad_norm": 1.328125, "learning_rate": 0.0004618940611789083, "loss": 0.2189, "step": 129050 }, { "epoch": 5.35, "grad_norm": 2.34375, "learning_rate": 0.0004618883057205482, "loss": 0.2212, "step": 129060 }, { "epoch": 5.35, "grad_norm": 0.31640625, "learning_rate": 0.00046188254986343717, "loss": 0.2148, "step": 129070 }, { "epoch": 5.35, "grad_norm": 0.91015625, "learning_rate": 0.00046187679360758607, "loss": 0.2324, "step": 129080 }, { "epoch": 5.35, "grad_norm": 0.6953125, "learning_rate": 0.0004618710369530057, "loss": 0.1709, "step": 129090 }, { "epoch": 5.35, "grad_norm": 0.51953125, "learning_rate": 0.0004618652798997069, "loss": 0.1897, "step": 129100 }, { "epoch": 5.35, "grad_norm": 0.65625, "learning_rate": 0.00046185952244770056, "loss": 0.247, "step": 129110 }, { "epoch": 5.35, "grad_norm": 0.8125, "learning_rate": 0.00046185376459699745, "loss": 0.233, "step": 129120 }, { "epoch": 5.35, "grad_norm": 0.61328125, "learning_rate": 0.0004618480063476085, "loss": 0.2228, "step": 129130 }, { "epoch": 5.35, "grad_norm": 0.640625, "learning_rate": 0.00046184224769954443, "loss": 0.21, "step": 129140 }, { "epoch": 5.35, "grad_norm": 0.349609375, "learning_rate": 0.00046183648865281616, "loss": 0.189, "step": 129150 }, { "epoch": 5.35, "grad_norm": 0.98046875, "learning_rate": 0.0004618307292074344, "loss": 0.213, "step": 129160 }, { "epoch": 5.35, "grad_norm": 1.0078125, "learning_rate": 0.00046182496936341013, "loss": 0.244, "step": 129170 }, { "epoch": 5.35, "grad_norm": 2.640625, "learning_rate": 0.00046181920912075416, "loss": 0.1937, "step": 129180 }, { "epoch": 5.35, "grad_norm": 0.6015625, "learning_rate": 0.00046181344847947734, "loss": 0.2117, "step": 129190 }, { "epoch": 5.35, "grad_norm": 1.21875, "learning_rate": 0.00046180768743959045, "loss": 0.211, "step": 129200 }, { "epoch": 5.35, "grad_norm": 0.71484375, "learning_rate": 0.0004618019260011044, "loss": 0.2759, "step": 129210 }, { "epoch": 5.35, "grad_norm": 0.43359375, "learning_rate": 0.0004617961641640299, "loss": 0.1988, "step": 129220 }, { "epoch": 5.35, "grad_norm": 0.7109375, "learning_rate": 0.000461790401928378, "loss": 0.2061, "step": 129230 }, { "epoch": 5.35, "grad_norm": 0.6953125, "learning_rate": 0.00046178463929415935, "loss": 0.1989, "step": 129240 }, { "epoch": 5.35, "grad_norm": 0.390625, "learning_rate": 0.000461778876261385, "loss": 0.2088, "step": 129250 }, { "epoch": 5.35, "grad_norm": 0.345703125, "learning_rate": 0.00046177311283006556, "loss": 0.1817, "step": 129260 }, { "epoch": 5.35, "grad_norm": 0.32421875, "learning_rate": 0.00046176734900021206, "loss": 0.1951, "step": 129270 }, { "epoch": 5.35, "grad_norm": 0.98828125, "learning_rate": 0.0004617615847718352, "loss": 0.2283, "step": 129280 }, { "epoch": 5.36, "grad_norm": 0.64453125, "learning_rate": 0.000461755820144946, "loss": 0.2206, "step": 129290 }, { "epoch": 5.36, "grad_norm": 1.125, "learning_rate": 0.00046175005511955516, "loss": 0.2522, "step": 129300 }, { "epoch": 5.36, "grad_norm": 0.359375, "learning_rate": 0.00046174428969567363, "loss": 0.1741, "step": 129310 }, { "epoch": 5.36, "grad_norm": 0.5390625, "learning_rate": 0.0004617385238733122, "loss": 0.2149, "step": 129320 }, { "epoch": 5.36, "grad_norm": 0.8515625, "learning_rate": 0.0004617327576524817, "loss": 0.2311, "step": 129330 }, { "epoch": 5.36, "grad_norm": 0.62890625, "learning_rate": 0.000461726991033193, "loss": 0.1966, "step": 129340 }, { "epoch": 5.36, "grad_norm": 0.5078125, "learning_rate": 0.00046172122401545696, "loss": 0.2169, "step": 129350 }, { "epoch": 5.36, "grad_norm": 0.259765625, "learning_rate": 0.0004617154565992845, "loss": 0.1603, "step": 129360 }, { "epoch": 5.36, "grad_norm": 0.78125, "learning_rate": 0.00046170968878468633, "loss": 0.2613, "step": 129370 }, { "epoch": 5.36, "grad_norm": 0.83984375, "learning_rate": 0.00046170392057167346, "loss": 0.2346, "step": 129380 }, { "epoch": 5.36, "grad_norm": 0.94921875, "learning_rate": 0.0004616981519602567, "loss": 0.215, "step": 129390 }, { "epoch": 5.36, "grad_norm": 0.921875, "learning_rate": 0.0004616923829504468, "loss": 0.2129, "step": 129400 }, { "epoch": 5.36, "grad_norm": 0.50390625, "learning_rate": 0.0004616866135422547, "loss": 0.1675, "step": 129410 }, { "epoch": 5.36, "grad_norm": 0.8828125, "learning_rate": 0.00046168084373569135, "loss": 0.1869, "step": 129420 }, { "epoch": 5.36, "grad_norm": 0.5234375, "learning_rate": 0.00046167507353076745, "loss": 0.24, "step": 129430 }, { "epoch": 5.36, "grad_norm": 0.68359375, "learning_rate": 0.00046166930292749385, "loss": 0.1692, "step": 129440 }, { "epoch": 5.36, "grad_norm": 0.9609375, "learning_rate": 0.0004616635319258816, "loss": 0.2103, "step": 129450 }, { "epoch": 5.36, "grad_norm": 1.1875, "learning_rate": 0.00046165776052594146, "loss": 0.2224, "step": 129460 }, { "epoch": 5.36, "grad_norm": 0.5390625, "learning_rate": 0.00046165198872768413, "loss": 0.217, "step": 129470 }, { "epoch": 5.36, "grad_norm": 0.859375, "learning_rate": 0.0004616462165311207, "loss": 0.2157, "step": 129480 }, { "epoch": 5.36, "grad_norm": 0.39453125, "learning_rate": 0.0004616404439362619, "loss": 0.2152, "step": 129490 }, { "epoch": 5.36, "grad_norm": 0.70703125, "learning_rate": 0.0004616346709431187, "loss": 0.2284, "step": 129500 }, { "epoch": 5.36, "grad_norm": 0.97265625, "learning_rate": 0.0004616288975517019, "loss": 0.2258, "step": 129510 }, { "epoch": 5.36, "grad_norm": 0.67578125, "learning_rate": 0.0004616231237620223, "loss": 0.1997, "step": 129520 }, { "epoch": 5.37, "grad_norm": 1.09375, "learning_rate": 0.0004616173495740909, "loss": 0.2387, "step": 129530 }, { "epoch": 5.37, "grad_norm": 0.9765625, "learning_rate": 0.0004616115749879185, "loss": 0.2465, "step": 129540 }, { "epoch": 5.37, "grad_norm": 0.435546875, "learning_rate": 0.00046160580000351593, "loss": 0.1884, "step": 129550 }, { "epoch": 5.37, "grad_norm": 0.486328125, "learning_rate": 0.00046160002462089413, "loss": 0.1573, "step": 129560 }, { "epoch": 5.37, "grad_norm": 0.6328125, "learning_rate": 0.0004615942488400639, "loss": 0.2253, "step": 129570 }, { "epoch": 5.37, "grad_norm": 0.408203125, "learning_rate": 0.0004615884726610362, "loss": 0.2313, "step": 129580 }, { "epoch": 5.37, "grad_norm": 0.734375, "learning_rate": 0.00046158269608382175, "loss": 0.169, "step": 129590 }, { "epoch": 5.37, "grad_norm": 1.171875, "learning_rate": 0.00046157691910843156, "loss": 0.1819, "step": 129600 }, { "epoch": 5.37, "grad_norm": 1.0703125, "learning_rate": 0.0004615711417348765, "loss": 0.1835, "step": 129610 }, { "epoch": 5.37, "grad_norm": 0.91796875, "learning_rate": 0.0004615653639631674, "loss": 0.2458, "step": 129620 }, { "epoch": 5.37, "grad_norm": 0.54296875, "learning_rate": 0.00046155958579331506, "loss": 0.1984, "step": 129630 }, { "epoch": 5.37, "grad_norm": 0.88671875, "learning_rate": 0.0004615538072253305, "loss": 0.2299, "step": 129640 }, { "epoch": 5.37, "grad_norm": 1.4140625, "learning_rate": 0.0004615480282592245, "loss": 0.1609, "step": 129650 }, { "epoch": 5.37, "grad_norm": 1.5859375, "learning_rate": 0.0004615422488950079, "loss": 0.2564, "step": 129660 }, { "epoch": 5.37, "grad_norm": 0.59375, "learning_rate": 0.00046153646913269164, "loss": 0.2241, "step": 129670 }, { "epoch": 5.37, "grad_norm": 0.6015625, "learning_rate": 0.00046153068897228665, "loss": 0.2118, "step": 129680 }, { "epoch": 5.37, "grad_norm": 0.53125, "learning_rate": 0.00046152490841380376, "loss": 0.2175, "step": 129690 }, { "epoch": 5.37, "grad_norm": 0.68359375, "learning_rate": 0.00046151912745725376, "loss": 0.2236, "step": 129700 }, { "epoch": 5.37, "grad_norm": 0.416015625, "learning_rate": 0.0004615133461026476, "loss": 0.2146, "step": 129710 }, { "epoch": 5.37, "grad_norm": 0.421875, "learning_rate": 0.00046150756434999626, "loss": 0.2207, "step": 129720 }, { "epoch": 5.37, "grad_norm": 0.71484375, "learning_rate": 0.0004615017821993105, "loss": 0.2241, "step": 129730 }, { "epoch": 5.37, "grad_norm": 0.7421875, "learning_rate": 0.0004614959996506012, "loss": 0.2086, "step": 129740 }, { "epoch": 5.37, "grad_norm": 1.265625, "learning_rate": 0.00046149021670387923, "loss": 0.2429, "step": 129750 }, { "epoch": 5.37, "grad_norm": 0.94921875, "learning_rate": 0.00046148443335915554, "loss": 0.196, "step": 129760 }, { "epoch": 5.38, "grad_norm": 0.7265625, "learning_rate": 0.00046147864961644096, "loss": 0.2615, "step": 129770 }, { "epoch": 5.38, "grad_norm": 0.84765625, "learning_rate": 0.0004614728654757464, "loss": 0.1792, "step": 129780 }, { "epoch": 5.38, "grad_norm": 0.404296875, "learning_rate": 0.0004614670809370828, "loss": 0.1801, "step": 129790 }, { "epoch": 5.38, "grad_norm": 0.3984375, "learning_rate": 0.00046146129600046094, "loss": 0.2111, "step": 129800 }, { "epoch": 5.38, "grad_norm": 1.046875, "learning_rate": 0.00046145551066589184, "loss": 0.213, "step": 129810 }, { "epoch": 5.38, "grad_norm": 0.84765625, "learning_rate": 0.0004614497249333862, "loss": 0.2489, "step": 129820 }, { "epoch": 5.38, "grad_norm": 0.8984375, "learning_rate": 0.0004614439388029551, "loss": 0.2275, "step": 129830 }, { "epoch": 5.38, "grad_norm": 0.73046875, "learning_rate": 0.00046143815227460926, "loss": 0.2593, "step": 129840 }, { "epoch": 5.38, "grad_norm": 0.9453125, "learning_rate": 0.0004614323653483597, "loss": 0.1726, "step": 129850 }, { "epoch": 5.38, "grad_norm": 1.046875, "learning_rate": 0.00046142657802421727, "loss": 0.1961, "step": 129860 }, { "epoch": 5.38, "grad_norm": 1.6015625, "learning_rate": 0.0004614207903021928, "loss": 0.2276, "step": 129870 }, { "epoch": 5.38, "grad_norm": 0.427734375, "learning_rate": 0.00046141500218229727, "loss": 0.1944, "step": 129880 }, { "epoch": 5.38, "grad_norm": 0.859375, "learning_rate": 0.00046140921366454154, "loss": 0.1957, "step": 129890 }, { "epoch": 5.38, "grad_norm": 0.451171875, "learning_rate": 0.0004614034247489365, "loss": 0.1804, "step": 129900 }, { "epoch": 5.38, "grad_norm": 1.0703125, "learning_rate": 0.000461397635435493, "loss": 0.2329, "step": 129910 }, { "epoch": 5.38, "grad_norm": 0.287109375, "learning_rate": 0.000461391845724222, "loss": 0.1862, "step": 129920 }, { "epoch": 5.38, "grad_norm": 0.216796875, "learning_rate": 0.0004613860556151344, "loss": 0.2049, "step": 129930 }, { "epoch": 5.38, "grad_norm": 1.015625, "learning_rate": 0.00046138026510824103, "loss": 0.2027, "step": 129940 }, { "epoch": 5.38, "grad_norm": 0.375, "learning_rate": 0.0004613744742035529, "loss": 0.2362, "step": 129950 }, { "epoch": 5.38, "grad_norm": 3.59375, "learning_rate": 0.0004613686829010808, "loss": 0.2056, "step": 129960 }, { "epoch": 5.38, "grad_norm": 0.609375, "learning_rate": 0.0004613628912008356, "loss": 0.2409, "step": 129970 }, { "epoch": 5.38, "grad_norm": 0.3828125, "learning_rate": 0.0004613570991028284, "loss": 0.2104, "step": 129980 }, { "epoch": 5.38, "grad_norm": 0.35546875, "learning_rate": 0.00046135130660706985, "loss": 0.2034, "step": 129990 }, { "epoch": 5.38, "grad_norm": 0.61328125, "learning_rate": 0.000461345513713571, "loss": 0.2532, "step": 130000 }, { "epoch": 5.38, "grad_norm": 0.33203125, "learning_rate": 0.0004613397204223427, "loss": 0.2011, "step": 130010 }, { "epoch": 5.39, "grad_norm": 0.474609375, "learning_rate": 0.00046133392673339594, "loss": 0.1616, "step": 130020 }, { "epoch": 5.39, "grad_norm": 1.6328125, "learning_rate": 0.00046132813264674146, "loss": 0.2147, "step": 130030 }, { "epoch": 5.39, "grad_norm": 0.3828125, "learning_rate": 0.00046132233816239035, "loss": 0.2247, "step": 130040 }, { "epoch": 5.39, "grad_norm": 0.421875, "learning_rate": 0.00046131654328035335, "loss": 0.1933, "step": 130050 }, { "epoch": 5.39, "grad_norm": 0.65625, "learning_rate": 0.00046131074800064145, "loss": 0.2065, "step": 130060 }, { "epoch": 5.39, "grad_norm": 0.408203125, "learning_rate": 0.00046130495232326555, "loss": 0.1783, "step": 130070 }, { "epoch": 5.39, "grad_norm": 0.90625, "learning_rate": 0.0004612991562482365, "loss": 0.2508, "step": 130080 }, { "epoch": 5.39, "grad_norm": 1.3984375, "learning_rate": 0.0004612933597755653, "loss": 0.2387, "step": 130090 }, { "epoch": 5.39, "grad_norm": 0.5234375, "learning_rate": 0.0004612875629052629, "loss": 0.1701, "step": 130100 }, { "epoch": 5.39, "grad_norm": 0.84375, "learning_rate": 0.00046128176563733995, "loss": 0.1923, "step": 130110 }, { "epoch": 5.39, "grad_norm": 0.6640625, "learning_rate": 0.0004612759679718076, "loss": 0.1991, "step": 130120 }, { "epoch": 5.39, "grad_norm": 0.74609375, "learning_rate": 0.00046127016990867676, "loss": 0.267, "step": 130130 }, { "epoch": 5.39, "grad_norm": 0.671875, "learning_rate": 0.0004612643714479582, "loss": 0.2361, "step": 130140 }, { "epoch": 5.39, "grad_norm": 0.43359375, "learning_rate": 0.00046125857258966295, "loss": 0.2124, "step": 130150 }, { "epoch": 5.39, "grad_norm": 1.0234375, "learning_rate": 0.0004612527733338019, "loss": 0.1829, "step": 130160 }, { "epoch": 5.39, "grad_norm": 1.09375, "learning_rate": 0.0004612469736803859, "loss": 0.2302, "step": 130170 }, { "epoch": 5.39, "grad_norm": 0.70703125, "learning_rate": 0.0004612411736294259, "loss": 0.1696, "step": 130180 }, { "epoch": 5.39, "grad_norm": 0.435546875, "learning_rate": 0.0004612353731809328, "loss": 0.2221, "step": 130190 }, { "epoch": 5.39, "grad_norm": 0.51171875, "learning_rate": 0.0004612295723349176, "loss": 0.2024, "step": 130200 }, { "epoch": 5.39, "grad_norm": 0.57421875, "learning_rate": 0.0004612237710913911, "loss": 0.2249, "step": 130210 }, { "epoch": 5.39, "grad_norm": 1.3671875, "learning_rate": 0.00046121796945036437, "loss": 0.2086, "step": 130220 }, { "epoch": 5.39, "grad_norm": 3.078125, "learning_rate": 0.0004612121674118482, "loss": 0.2445, "step": 130230 }, { "epoch": 5.39, "grad_norm": 1.2421875, "learning_rate": 0.00046120636497585344, "loss": 0.2521, "step": 130240 }, { "epoch": 5.39, "grad_norm": 0.37890625, "learning_rate": 0.0004612005621423913, "loss": 0.2058, "step": 130250 }, { "epoch": 5.4, "grad_norm": 1.3515625, "learning_rate": 0.00046119475891147235, "loss": 0.2082, "step": 130260 }, { "epoch": 5.4, "grad_norm": 0.71484375, "learning_rate": 0.0004611889552831077, "loss": 0.1633, "step": 130270 }, { "epoch": 5.4, "grad_norm": 0.0, "learning_rate": 0.00046118315125730827, "loss": 0.1863, "step": 130280 }, { "epoch": 5.4, "grad_norm": 0.8125, "learning_rate": 0.0004611773468340849, "loss": 0.2206, "step": 130290 }, { "epoch": 5.4, "grad_norm": 1.71875, "learning_rate": 0.0004611715420134486, "loss": 0.1776, "step": 130300 }, { "epoch": 5.4, "grad_norm": 0.7734375, "learning_rate": 0.0004611657367954103, "loss": 0.2193, "step": 130310 }, { "epoch": 5.4, "grad_norm": 0.54296875, "learning_rate": 0.00046115993117998083, "loss": 0.2203, "step": 130320 }, { "epoch": 5.4, "grad_norm": 0.6171875, "learning_rate": 0.0004611541251671712, "loss": 0.1838, "step": 130330 }, { "epoch": 5.4, "grad_norm": 0.73046875, "learning_rate": 0.0004611483187569923, "loss": 0.1754, "step": 130340 }, { "epoch": 5.4, "grad_norm": 1.0390625, "learning_rate": 0.0004611425119494551, "loss": 0.1935, "step": 130350 }, { "epoch": 5.4, "grad_norm": 0.953125, "learning_rate": 0.0004611367047445705, "loss": 0.1583, "step": 130360 }, { "epoch": 5.4, "grad_norm": 0.55078125, "learning_rate": 0.00046113089714234937, "loss": 0.2109, "step": 130370 }, { "epoch": 5.4, "grad_norm": 0.0, "learning_rate": 0.00046112508914280273, "loss": 0.2406, "step": 130380 }, { "epoch": 5.4, "grad_norm": 1.1328125, "learning_rate": 0.00046111928074594143, "loss": 0.2242, "step": 130390 }, { "epoch": 5.4, "grad_norm": 0.50390625, "learning_rate": 0.00046111347195177646, "loss": 0.1855, "step": 130400 }, { "epoch": 5.4, "grad_norm": 0.65625, "learning_rate": 0.00046110766276031873, "loss": 0.2767, "step": 130410 }, { "epoch": 5.4, "grad_norm": 1.3046875, "learning_rate": 0.0004611018531715792, "loss": 0.1993, "step": 130420 }, { "epoch": 5.4, "grad_norm": 0.421875, "learning_rate": 0.00046109604318556876, "loss": 0.1646, "step": 130430 }, { "epoch": 5.4, "grad_norm": 0.8046875, "learning_rate": 0.0004610902328022983, "loss": 0.2002, "step": 130440 }, { "epoch": 5.4, "grad_norm": 1.0234375, "learning_rate": 0.00046108442202177897, "loss": 0.1686, "step": 130450 }, { "epoch": 5.4, "grad_norm": 0.921875, "learning_rate": 0.0004610786108440215, "loss": 0.2066, "step": 130460 }, { "epoch": 5.4, "grad_norm": 0.86328125, "learning_rate": 0.0004610727992690368, "loss": 0.2248, "step": 130470 }, { "epoch": 5.4, "grad_norm": 0.376953125, "learning_rate": 0.0004610669872968359, "loss": 0.1899, "step": 130480 }, { "epoch": 5.4, "grad_norm": 0.4453125, "learning_rate": 0.0004610611749274298, "loss": 0.1678, "step": 130490 }, { "epoch": 5.41, "grad_norm": 4.90625, "learning_rate": 0.0004610553621608293, "loss": 0.1344, "step": 130500 }, { "epoch": 5.41, "grad_norm": 0.890625, "learning_rate": 0.0004610495489970454, "loss": 0.1715, "step": 130510 }, { "epoch": 5.41, "grad_norm": 0.486328125, "learning_rate": 0.00046104373543608915, "loss": 0.2128, "step": 130520 }, { "epoch": 5.41, "grad_norm": 0.85546875, "learning_rate": 0.00046103792147797126, "loss": 0.2522, "step": 130530 }, { "epoch": 5.41, "grad_norm": 0.77734375, "learning_rate": 0.00046103210712270285, "loss": 0.1851, "step": 130540 }, { "epoch": 5.41, "grad_norm": 0.33203125, "learning_rate": 0.0004610262923702947, "loss": 0.1815, "step": 130550 }, { "epoch": 5.41, "grad_norm": 1.015625, "learning_rate": 0.000461020477220758, "loss": 0.215, "step": 130560 }, { "epoch": 5.41, "grad_norm": 1.375, "learning_rate": 0.00046101466167410346, "loss": 0.1971, "step": 130570 }, { "epoch": 5.41, "grad_norm": 1.40625, "learning_rate": 0.00046100884573034215, "loss": 0.1861, "step": 130580 }, { "epoch": 5.41, "grad_norm": 0.6796875, "learning_rate": 0.00046100302938948496, "loss": 0.1658, "step": 130590 }, { "epoch": 5.41, "grad_norm": 0.9453125, "learning_rate": 0.00046099721265154285, "loss": 0.2417, "step": 130600 }, { "epoch": 5.41, "grad_norm": 0.8828125, "learning_rate": 0.0004609913955165268, "loss": 0.2622, "step": 130610 }, { "epoch": 5.41, "grad_norm": 0.271484375, "learning_rate": 0.00046098557798444776, "loss": 0.1987, "step": 130620 }, { "epoch": 5.41, "grad_norm": 0.70703125, "learning_rate": 0.00046097976005531657, "loss": 0.1976, "step": 130630 }, { "epoch": 5.41, "grad_norm": 0.0, "learning_rate": 0.00046097394172914426, "loss": 0.2311, "step": 130640 }, { "epoch": 5.41, "grad_norm": 0.48046875, "learning_rate": 0.0004609681230059418, "loss": 0.2073, "step": 130650 }, { "epoch": 5.41, "grad_norm": 1.6953125, "learning_rate": 0.0004609623038857201, "loss": 0.2428, "step": 130660 }, { "epoch": 5.41, "grad_norm": 0.51953125, "learning_rate": 0.00046095648436849014, "loss": 0.2192, "step": 130670 }, { "epoch": 5.41, "grad_norm": 0.5859375, "learning_rate": 0.0004609506644542629, "loss": 0.2307, "step": 130680 }, { "epoch": 5.41, "grad_norm": 1.984375, "learning_rate": 0.00046094484414304927, "loss": 0.1659, "step": 130690 }, { "epoch": 5.41, "grad_norm": 1.1484375, "learning_rate": 0.00046093902343486017, "loss": 0.2082, "step": 130700 }, { "epoch": 5.41, "grad_norm": 1.140625, "learning_rate": 0.0004609332023297066, "loss": 0.2479, "step": 130710 }, { "epoch": 5.41, "grad_norm": 0.83203125, "learning_rate": 0.0004609273808275996, "loss": 0.1875, "step": 130720 }, { "epoch": 5.41, "grad_norm": 0.6484375, "learning_rate": 0.00046092155892855, "loss": 0.2338, "step": 130730 }, { "epoch": 5.42, "grad_norm": 0.3828125, "learning_rate": 0.0004609157366325688, "loss": 0.2075, "step": 130740 }, { "epoch": 5.42, "grad_norm": 0.9453125, "learning_rate": 0.00046090991393966696, "loss": 0.2275, "step": 130750 }, { "epoch": 5.42, "grad_norm": 1.1171875, "learning_rate": 0.00046090409084985546, "loss": 0.2403, "step": 130760 }, { "epoch": 5.42, "grad_norm": 0.62890625, "learning_rate": 0.0004608982673631453, "loss": 0.1991, "step": 130770 }, { "epoch": 5.42, "grad_norm": 0.279296875, "learning_rate": 0.00046089244347954727, "loss": 0.2082, "step": 130780 }, { "epoch": 5.42, "grad_norm": 0.84765625, "learning_rate": 0.0004608866191990725, "loss": 0.1776, "step": 130790 }, { "epoch": 5.42, "grad_norm": 1.1328125, "learning_rate": 0.0004608807945217318, "loss": 0.2329, "step": 130800 }, { "epoch": 5.42, "grad_norm": 0.2138671875, "learning_rate": 0.00046087496944753625, "loss": 0.2178, "step": 130810 }, { "epoch": 5.42, "grad_norm": 0.82421875, "learning_rate": 0.00046086914397649683, "loss": 0.1878, "step": 130820 }, { "epoch": 5.42, "grad_norm": 0.66796875, "learning_rate": 0.00046086331810862445, "loss": 0.2471, "step": 130830 }, { "epoch": 5.42, "grad_norm": 0.5078125, "learning_rate": 0.00046085749184393, "loss": 0.2128, "step": 130840 }, { "epoch": 5.42, "grad_norm": 0.6015625, "learning_rate": 0.00046085166518242463, "loss": 0.1552, "step": 130850 }, { "epoch": 5.42, "grad_norm": 0.9140625, "learning_rate": 0.00046084583812411913, "loss": 0.2162, "step": 130860 }, { "epoch": 5.42, "grad_norm": 0.1728515625, "learning_rate": 0.00046084001066902454, "loss": 0.2184, "step": 130870 }, { "epoch": 5.42, "grad_norm": 1.375, "learning_rate": 0.00046083418281715185, "loss": 0.1732, "step": 130880 }, { "epoch": 5.42, "grad_norm": 1.0, "learning_rate": 0.00046082835456851196, "loss": 0.2464, "step": 130890 }, { "epoch": 5.42, "grad_norm": 1.15625, "learning_rate": 0.00046082252592311593, "loss": 0.2315, "step": 130900 }, { "epoch": 5.42, "grad_norm": 0.431640625, "learning_rate": 0.00046081669688097454, "loss": 0.1442, "step": 130910 }, { "epoch": 5.42, "grad_norm": 1.03125, "learning_rate": 0.000460810867442099, "loss": 0.2533, "step": 130920 }, { "epoch": 5.42, "grad_norm": 0.5234375, "learning_rate": 0.00046080503760650017, "loss": 0.1897, "step": 130930 }, { "epoch": 5.42, "grad_norm": 0.61328125, "learning_rate": 0.000460799207374189, "loss": 0.1817, "step": 130940 }, { "epoch": 5.42, "grad_norm": 0.439453125, "learning_rate": 0.0004607933767451765, "loss": 0.2083, "step": 130950 }, { "epoch": 5.42, "grad_norm": 0.66015625, "learning_rate": 0.00046078754571947356, "loss": 0.15, "step": 130960 }, { "epoch": 5.42, "grad_norm": 0.94140625, "learning_rate": 0.0004607817142970913, "loss": 0.2252, "step": 130970 }, { "epoch": 5.43, "grad_norm": 1.0234375, "learning_rate": 0.0004607758824780406, "loss": 0.2331, "step": 130980 }, { "epoch": 5.43, "grad_norm": 1.8515625, "learning_rate": 0.00046077005026233243, "loss": 0.224, "step": 130990 }, { "epoch": 5.43, "grad_norm": 0.8515625, "learning_rate": 0.00046076421764997786, "loss": 0.2362, "step": 131000 }, { "epoch": 5.43, "grad_norm": 1.0703125, "learning_rate": 0.00046075838464098776, "loss": 0.1729, "step": 131010 }, { "epoch": 5.43, "grad_norm": 0.94921875, "learning_rate": 0.0004607525512353731, "loss": 0.2273, "step": 131020 }, { "epoch": 5.43, "grad_norm": 1.2734375, "learning_rate": 0.0004607467174331449, "loss": 0.2713, "step": 131030 }, { "epoch": 5.43, "grad_norm": 0.51953125, "learning_rate": 0.0004607408832343142, "loss": 0.1979, "step": 131040 }, { "epoch": 5.43, "grad_norm": 0.875, "learning_rate": 0.00046073504863889184, "loss": 0.1902, "step": 131050 }, { "epoch": 5.43, "grad_norm": 0.3828125, "learning_rate": 0.0004607292136468889, "loss": 0.2645, "step": 131060 }, { "epoch": 5.43, "grad_norm": 0.361328125, "learning_rate": 0.00046072337825831634, "loss": 0.1861, "step": 131070 }, { "epoch": 5.43, "grad_norm": 0.44921875, "learning_rate": 0.0004607175424731851, "loss": 0.2101, "step": 131080 }, { "epoch": 5.43, "grad_norm": 0.6796875, "learning_rate": 0.0004607117062915063, "loss": 0.1942, "step": 131090 }, { "epoch": 5.43, "grad_norm": 1.765625, "learning_rate": 0.00046070586971329075, "loss": 0.1709, "step": 131100 }, { "epoch": 5.43, "grad_norm": 0.302734375, "learning_rate": 0.0004607000327385495, "loss": 0.186, "step": 131110 }, { "epoch": 5.43, "grad_norm": 0.5625, "learning_rate": 0.00046069419536729357, "loss": 0.1885, "step": 131120 }, { "epoch": 5.43, "grad_norm": 0.87109375, "learning_rate": 0.00046068835759953386, "loss": 0.2074, "step": 131130 }, { "epoch": 5.43, "grad_norm": 0.72265625, "learning_rate": 0.0004606825194352815, "loss": 0.2434, "step": 131140 }, { "epoch": 5.43, "grad_norm": 0.494140625, "learning_rate": 0.00046067668087454737, "loss": 0.2162, "step": 131150 }, { "epoch": 5.43, "grad_norm": 0.302734375, "learning_rate": 0.0004606708419173424, "loss": 0.2398, "step": 131160 }, { "epoch": 5.43, "grad_norm": 0.5859375, "learning_rate": 0.00046066500256367775, "loss": 0.2043, "step": 131170 }, { "epoch": 5.43, "grad_norm": 0.333984375, "learning_rate": 0.0004606591628135643, "loss": 0.1801, "step": 131180 }, { "epoch": 5.43, "grad_norm": 1.1328125, "learning_rate": 0.00046065332266701306, "loss": 0.1945, "step": 131190 }, { "epoch": 5.43, "grad_norm": 0.306640625, "learning_rate": 0.00046064748212403493, "loss": 0.2014, "step": 131200 }, { "epoch": 5.43, "grad_norm": 0.77734375, "learning_rate": 0.00046064164118464116, "loss": 0.1827, "step": 131210 }, { "epoch": 5.44, "grad_norm": 0.51953125, "learning_rate": 0.0004606357998488424, "loss": 0.237, "step": 131220 }, { "epoch": 5.44, "grad_norm": 0.78515625, "learning_rate": 0.00046062995811664987, "loss": 0.2325, "step": 131230 }, { "epoch": 5.44, "grad_norm": 0.20703125, "learning_rate": 0.00046062411598807453, "loss": 0.2808, "step": 131240 }, { "epoch": 5.44, "grad_norm": 0.47265625, "learning_rate": 0.0004606182734631273, "loss": 0.139, "step": 131250 }, { "epoch": 5.44, "grad_norm": 0.2060546875, "learning_rate": 0.0004606124305418192, "loss": 0.2182, "step": 131260 }, { "epoch": 5.44, "grad_norm": 0.4296875, "learning_rate": 0.0004606065872241614, "loss": 0.1772, "step": 131270 }, { "epoch": 5.44, "grad_norm": 0.314453125, "learning_rate": 0.00046060074351016454, "loss": 0.1683, "step": 131280 }, { "epoch": 5.44, "grad_norm": 0.609375, "learning_rate": 0.00046059489939984, "loss": 0.1674, "step": 131290 }, { "epoch": 5.44, "grad_norm": 0.56640625, "learning_rate": 0.00046058905489319846, "loss": 0.2008, "step": 131300 }, { "epoch": 5.44, "grad_norm": 0.6875, "learning_rate": 0.0004605832099902512, "loss": 0.2421, "step": 131310 }, { "epoch": 5.44, "grad_norm": 0.8671875, "learning_rate": 0.000460577364691009, "loss": 0.1885, "step": 131320 }, { "epoch": 5.44, "grad_norm": 0.314453125, "learning_rate": 0.00046057151899548293, "loss": 0.2213, "step": 131330 }, { "epoch": 5.44, "grad_norm": 0.6484375, "learning_rate": 0.000460565672903684, "loss": 0.2218, "step": 131340 }, { "epoch": 5.44, "grad_norm": 0.48046875, "learning_rate": 0.0004605598264156232, "loss": 0.1896, "step": 131350 }, { "epoch": 5.44, "grad_norm": 0.9453125, "learning_rate": 0.00046055397953131163, "loss": 0.1925, "step": 131360 }, { "epoch": 5.44, "grad_norm": 0.396484375, "learning_rate": 0.0004605481322507602, "loss": 0.2557, "step": 131370 }, { "epoch": 5.44, "grad_norm": 0.75, "learning_rate": 0.0004605422845739798, "loss": 0.213, "step": 131380 }, { "epoch": 5.44, "grad_norm": 0.431640625, "learning_rate": 0.0004605364365009816, "loss": 0.2816, "step": 131390 }, { "epoch": 5.44, "grad_norm": 1.46875, "learning_rate": 0.00046053058803177666, "loss": 0.2139, "step": 131400 }, { "epoch": 5.44, "grad_norm": 0.306640625, "learning_rate": 0.0004605247391663758, "loss": 0.183, "step": 131410 }, { "epoch": 5.44, "grad_norm": 0.76953125, "learning_rate": 0.0004605188899047902, "loss": 0.1581, "step": 131420 }, { "epoch": 5.44, "grad_norm": 1.140625, "learning_rate": 0.00046051304024703066, "loss": 0.2269, "step": 131430 }, { "epoch": 5.44, "grad_norm": 1.265625, "learning_rate": 0.0004605071901931084, "loss": 0.1727, "step": 131440 }, { "epoch": 5.44, "grad_norm": 0.7890625, "learning_rate": 0.00046050133974303437, "loss": 0.2073, "step": 131450 }, { "epoch": 5.45, "grad_norm": 0.70703125, "learning_rate": 0.00046049548889681946, "loss": 0.1775, "step": 131460 }, { "epoch": 5.45, "grad_norm": 0.640625, "learning_rate": 0.0004604896376544748, "loss": 0.2095, "step": 131470 }, { "epoch": 5.45, "grad_norm": 0.44140625, "learning_rate": 0.0004604837860160114, "loss": 0.1849, "step": 131480 }, { "epoch": 5.45, "grad_norm": 0.65234375, "learning_rate": 0.00046047793398144025, "loss": 0.2238, "step": 131490 }, { "epoch": 5.45, "grad_norm": 1.640625, "learning_rate": 0.00046047208155077234, "loss": 0.2087, "step": 131500 }, { "epoch": 5.45, "grad_norm": 0.42578125, "learning_rate": 0.00046046622872401865, "loss": 0.1822, "step": 131510 }, { "epoch": 5.45, "grad_norm": 0.62109375, "learning_rate": 0.00046046037550119036, "loss": 0.2281, "step": 131520 }, { "epoch": 5.45, "grad_norm": 0.73828125, "learning_rate": 0.0004604545218822983, "loss": 0.2309, "step": 131530 }, { "epoch": 5.45, "grad_norm": 0.77734375, "learning_rate": 0.0004604486678673535, "loss": 0.242, "step": 131540 }, { "epoch": 5.45, "grad_norm": 0.7421875, "learning_rate": 0.00046044281345636713, "loss": 0.1873, "step": 131550 }, { "epoch": 5.45, "grad_norm": 1.4140625, "learning_rate": 0.0004604369586493501, "loss": 0.2426, "step": 131560 }, { "epoch": 5.45, "grad_norm": 0.40625, "learning_rate": 0.00046043110344631344, "loss": 0.2001, "step": 131570 }, { "epoch": 5.45, "grad_norm": 1.078125, "learning_rate": 0.00046042524784726814, "loss": 0.2109, "step": 131580 }, { "epoch": 5.45, "grad_norm": 0.765625, "learning_rate": 0.00046041939185222525, "loss": 0.2253, "step": 131590 }, { "epoch": 5.45, "grad_norm": 0.447265625, "learning_rate": 0.0004604135354611958, "loss": 0.1714, "step": 131600 }, { "epoch": 5.45, "grad_norm": 1.0, "learning_rate": 0.00046040767867419076, "loss": 0.2324, "step": 131610 }, { "epoch": 5.45, "grad_norm": 0.8046875, "learning_rate": 0.0004604018214912212, "loss": 0.1433, "step": 131620 }, { "epoch": 5.45, "grad_norm": 1.03125, "learning_rate": 0.0004603959639122982, "loss": 0.2178, "step": 131630 }, { "epoch": 5.45, "grad_norm": 0.486328125, "learning_rate": 0.00046039010593743263, "loss": 0.2102, "step": 131640 }, { "epoch": 5.45, "grad_norm": 0.5703125, "learning_rate": 0.00046038424756663564, "loss": 0.1672, "step": 131650 }, { "epoch": 5.45, "grad_norm": 1.171875, "learning_rate": 0.0004603783887999182, "loss": 0.2439, "step": 131660 }, { "epoch": 5.45, "grad_norm": 1.109375, "learning_rate": 0.0004603725296372914, "loss": 0.2713, "step": 131670 }, { "epoch": 5.45, "grad_norm": 0.65234375, "learning_rate": 0.00046036667007876616, "loss": 0.1853, "step": 131680 }, { "epoch": 5.45, "grad_norm": 0.353515625, "learning_rate": 0.0004603608101243536, "loss": 0.1844, "step": 131690 }, { "epoch": 5.45, "grad_norm": 0.7421875, "learning_rate": 0.0004603549497740647, "loss": 0.2294, "step": 131700 }, { "epoch": 5.46, "grad_norm": 0.5078125, "learning_rate": 0.0004603490890279105, "loss": 0.2015, "step": 131710 }, { "epoch": 5.46, "grad_norm": 1.453125, "learning_rate": 0.000460343227885902, "loss": 0.2048, "step": 131720 }, { "epoch": 5.46, "grad_norm": 1.0390625, "learning_rate": 0.0004603373663480503, "loss": 0.2185, "step": 131730 }, { "epoch": 5.46, "grad_norm": 0.84375, "learning_rate": 0.0004603315044143664, "loss": 0.2022, "step": 131740 }, { "epoch": 5.46, "grad_norm": 0.44921875, "learning_rate": 0.0004603256420848613, "loss": 0.2253, "step": 131750 }, { "epoch": 5.46, "grad_norm": 0.373046875, "learning_rate": 0.0004603197793595461, "loss": 0.1738, "step": 131760 }, { "epoch": 5.46, "grad_norm": 1.1484375, "learning_rate": 0.0004603139162384317, "loss": 0.2195, "step": 131770 }, { "epoch": 5.46, "grad_norm": 0.5859375, "learning_rate": 0.00046030805272152933, "loss": 0.2364, "step": 131780 }, { "epoch": 5.46, "grad_norm": 0.53125, "learning_rate": 0.00046030218880884985, "loss": 0.1487, "step": 131790 }, { "epoch": 5.46, "grad_norm": 0.65625, "learning_rate": 0.0004602963245004043, "loss": 0.2068, "step": 131800 }, { "epoch": 5.46, "grad_norm": 1.5078125, "learning_rate": 0.00046029045979620385, "loss": 0.1816, "step": 131810 }, { "epoch": 5.46, "grad_norm": 0.74609375, "learning_rate": 0.0004602845946962595, "loss": 0.2121, "step": 131820 }, { "epoch": 5.46, "grad_norm": 0.7421875, "learning_rate": 0.00046027872920058224, "loss": 0.165, "step": 131830 }, { "epoch": 5.46, "grad_norm": 1.46875, "learning_rate": 0.00046027286330918305, "loss": 0.2123, "step": 131840 }, { "epoch": 5.46, "grad_norm": 0.625, "learning_rate": 0.0004602669970220731, "loss": 0.1784, "step": 131850 }, { "epoch": 5.46, "grad_norm": 0.7421875, "learning_rate": 0.0004602611303392633, "loss": 0.2161, "step": 131860 }, { "epoch": 5.46, "grad_norm": 0.69921875, "learning_rate": 0.0004602552632607648, "loss": 0.1975, "step": 131870 }, { "epoch": 5.46, "grad_norm": 0.796875, "learning_rate": 0.00046024939578658865, "loss": 0.1918, "step": 131880 }, { "epoch": 5.46, "grad_norm": 1.5390625, "learning_rate": 0.0004602435279167458, "loss": 0.2346, "step": 131890 }, { "epoch": 5.46, "grad_norm": 0.6875, "learning_rate": 0.0004602376596512473, "loss": 0.2131, "step": 131900 }, { "epoch": 5.46, "grad_norm": 1.2265625, "learning_rate": 0.00046023179099010427, "loss": 0.251, "step": 131910 }, { "epoch": 5.46, "grad_norm": 0.5078125, "learning_rate": 0.0004602259219333277, "loss": 0.2397, "step": 131920 }, { "epoch": 5.46, "grad_norm": 0.77734375, "learning_rate": 0.0004602200524809287, "loss": 0.1728, "step": 131930 }, { "epoch": 5.46, "grad_norm": 1.1015625, "learning_rate": 0.00046021418263291814, "loss": 0.196, "step": 131940 }, { "epoch": 5.47, "grad_norm": 0.69140625, "learning_rate": 0.0004602083123893073, "loss": 0.1929, "step": 131950 }, { "epoch": 5.47, "grad_norm": 0.640625, "learning_rate": 0.0004602024417501071, "loss": 0.182, "step": 131960 }, { "epoch": 5.47, "grad_norm": 0.55078125, "learning_rate": 0.00046019657071532863, "loss": 0.2369, "step": 131970 }, { "epoch": 5.47, "grad_norm": 0.9765625, "learning_rate": 0.0004601906992849828, "loss": 0.153, "step": 131980 }, { "epoch": 5.47, "grad_norm": 0.640625, "learning_rate": 0.00046018482745908084, "loss": 0.1743, "step": 131990 }, { "epoch": 5.47, "grad_norm": 0.578125, "learning_rate": 0.0004601789552376338, "loss": 0.1832, "step": 132000 }, { "epoch": 5.47, "grad_norm": 1.2578125, "learning_rate": 0.00046017308262065253, "loss": 0.2485, "step": 132010 }, { "epoch": 5.47, "grad_norm": 1.6328125, "learning_rate": 0.00046016720960814826, "loss": 0.2025, "step": 132020 }, { "epoch": 5.47, "grad_norm": 0.65625, "learning_rate": 0.000460161336200132, "loss": 0.2349, "step": 132030 }, { "epoch": 5.47, "grad_norm": 0.8203125, "learning_rate": 0.0004601554623966149, "loss": 0.2127, "step": 132040 }, { "epoch": 5.47, "grad_norm": 0.70703125, "learning_rate": 0.00046014958819760784, "loss": 0.171, "step": 132050 }, { "epoch": 5.47, "grad_norm": 0.6640625, "learning_rate": 0.0004601437136031219, "loss": 0.2154, "step": 132060 }, { "epoch": 5.47, "grad_norm": 0.9921875, "learning_rate": 0.0004601378386131683, "loss": 0.2025, "step": 132070 }, { "epoch": 5.47, "grad_norm": 0.68359375, "learning_rate": 0.0004601319632277579, "loss": 0.1834, "step": 132080 }, { "epoch": 5.47, "grad_norm": 0.486328125, "learning_rate": 0.0004601260874469018, "loss": 0.1821, "step": 132090 }, { "epoch": 5.47, "grad_norm": 0.80078125, "learning_rate": 0.00046012021127061115, "loss": 0.2222, "step": 132100 }, { "epoch": 5.47, "grad_norm": 0.99609375, "learning_rate": 0.000460114334698897, "loss": 0.2201, "step": 132110 }, { "epoch": 5.47, "grad_norm": 0.9140625, "learning_rate": 0.0004601084577317702, "loss": 0.2196, "step": 132120 }, { "epoch": 5.47, "grad_norm": 0.1484375, "learning_rate": 0.00046010258036924213, "loss": 0.1957, "step": 132130 }, { "epoch": 5.47, "grad_norm": 0.8515625, "learning_rate": 0.0004600967026113236, "loss": 0.1858, "step": 132140 }, { "epoch": 5.47, "grad_norm": 0.640625, "learning_rate": 0.00046009082445802585, "loss": 0.1653, "step": 132150 }, { "epoch": 5.47, "grad_norm": 1.1953125, "learning_rate": 0.00046008494590935977, "loss": 0.2234, "step": 132160 }, { "epoch": 5.47, "grad_norm": 0.875, "learning_rate": 0.00046007906696533666, "loss": 0.1621, "step": 132170 }, { "epoch": 5.47, "grad_norm": 0.5234375, "learning_rate": 0.0004600731876259673, "loss": 0.1971, "step": 132180 }, { "epoch": 5.48, "grad_norm": 0.70703125, "learning_rate": 0.0004600673078912628, "loss": 0.19, "step": 132190 }, { "epoch": 5.48, "grad_norm": 0.75, "learning_rate": 0.00046006142776123447, "loss": 0.2117, "step": 132200 }, { "epoch": 5.48, "grad_norm": 0.69921875, "learning_rate": 0.0004600555472358932, "loss": 0.2296, "step": 132210 }, { "epoch": 5.48, "grad_norm": 1.0390625, "learning_rate": 0.00046004966631525, "loss": 0.208, "step": 132220 }, { "epoch": 5.48, "grad_norm": 0.7265625, "learning_rate": 0.0004600437849993161, "loss": 0.1961, "step": 132230 }, { "epoch": 5.48, "grad_norm": 0.2392578125, "learning_rate": 0.00046003790328810247, "loss": 0.2247, "step": 132240 }, { "epoch": 5.48, "grad_norm": 0.85546875, "learning_rate": 0.0004600320211816201, "loss": 0.2546, "step": 132250 }, { "epoch": 5.48, "grad_norm": 1.109375, "learning_rate": 0.00046002613867988023, "loss": 0.235, "step": 132260 }, { "epoch": 5.48, "grad_norm": 0.91015625, "learning_rate": 0.0004600202557828938, "loss": 0.2309, "step": 132270 }, { "epoch": 5.48, "grad_norm": 1.2890625, "learning_rate": 0.00046001437249067195, "loss": 0.2178, "step": 132280 }, { "epoch": 5.48, "grad_norm": 0.470703125, "learning_rate": 0.0004600084888032258, "loss": 0.2021, "step": 132290 }, { "epoch": 5.48, "grad_norm": 0.357421875, "learning_rate": 0.0004600026047205662, "loss": 0.2276, "step": 132300 }, { "epoch": 5.48, "grad_norm": 0.76953125, "learning_rate": 0.00045999672024270446, "loss": 0.216, "step": 132310 }, { "epoch": 5.48, "grad_norm": 2.53125, "learning_rate": 0.00045999083536965156, "loss": 0.2268, "step": 132320 }, { "epoch": 5.48, "grad_norm": 1.1796875, "learning_rate": 0.00045998495010141863, "loss": 0.2636, "step": 132330 }, { "epoch": 5.48, "grad_norm": 0.53125, "learning_rate": 0.00045997906443801667, "loss": 0.1565, "step": 132340 }, { "epoch": 5.48, "grad_norm": 0.671875, "learning_rate": 0.00045997317837945674, "loss": 0.2088, "step": 132350 }, { "epoch": 5.48, "grad_norm": 0.251953125, "learning_rate": 0.00045996729192575005, "loss": 0.2215, "step": 132360 }, { "epoch": 5.48, "grad_norm": 0.91796875, "learning_rate": 0.00045996140507690756, "loss": 0.1893, "step": 132370 }, { "epoch": 5.48, "grad_norm": 2.140625, "learning_rate": 0.00045995551783294033, "loss": 0.3131, "step": 132380 }, { "epoch": 5.48, "grad_norm": 0.0, "learning_rate": 0.0004599496301938595, "loss": 0.209, "step": 132390 }, { "epoch": 5.48, "grad_norm": 1.125, "learning_rate": 0.0004599437421596762, "loss": 0.2206, "step": 132400 }, { "epoch": 5.48, "grad_norm": 0.3515625, "learning_rate": 0.0004599378537304014, "loss": 0.231, "step": 132410 }, { "epoch": 5.48, "grad_norm": 0.89453125, "learning_rate": 0.0004599319649060463, "loss": 0.2032, "step": 132420 }, { "epoch": 5.49, "grad_norm": 1.5625, "learning_rate": 0.00045992607568662183, "loss": 0.2305, "step": 132430 }, { "epoch": 5.49, "grad_norm": 0.60546875, "learning_rate": 0.00045992018607213914, "loss": 0.1635, "step": 132440 }, { "epoch": 5.49, "grad_norm": 0.80078125, "learning_rate": 0.0004599142960626094, "loss": 0.1655, "step": 132450 }, { "epoch": 5.49, "grad_norm": 0.162109375, "learning_rate": 0.0004599084056580436, "loss": 0.1878, "step": 132460 }, { "epoch": 5.49, "grad_norm": 0.294921875, "learning_rate": 0.0004599025148584528, "loss": 0.1932, "step": 132470 }, { "epoch": 5.49, "grad_norm": 0.93359375, "learning_rate": 0.0004598966236638482, "loss": 0.2358, "step": 132480 }, { "epoch": 5.49, "grad_norm": 0.640625, "learning_rate": 0.0004598907320742408, "loss": 0.181, "step": 132490 }, { "epoch": 5.49, "grad_norm": 0.5390625, "learning_rate": 0.0004598848400896417, "loss": 0.2235, "step": 132500 }, { "epoch": 5.49, "grad_norm": 0.6796875, "learning_rate": 0.000459878947710062, "loss": 0.1702, "step": 132510 }, { "epoch": 5.49, "grad_norm": 1.0234375, "learning_rate": 0.0004598730549355128, "loss": 0.2628, "step": 132520 }, { "epoch": 5.49, "grad_norm": 0.48046875, "learning_rate": 0.0004598671617660052, "loss": 0.2186, "step": 132530 }, { "epoch": 5.49, "grad_norm": 0.78125, "learning_rate": 0.00045986126820155016, "loss": 0.1823, "step": 132540 }, { "epoch": 5.49, "grad_norm": 0.8046875, "learning_rate": 0.00045985537424215897, "loss": 0.2274, "step": 132550 }, { "epoch": 5.49, "grad_norm": 0.5625, "learning_rate": 0.00045984947988784265, "loss": 0.233, "step": 132560 }, { "epoch": 5.49, "grad_norm": 1.0546875, "learning_rate": 0.0004598435851386122, "loss": 0.2115, "step": 132570 }, { "epoch": 5.49, "grad_norm": 0.4453125, "learning_rate": 0.00045983768999447876, "loss": 0.1918, "step": 132580 }, { "epoch": 5.49, "grad_norm": 0.7578125, "learning_rate": 0.0004598317944554535, "loss": 0.2352, "step": 132590 }, { "epoch": 5.49, "grad_norm": 0.609375, "learning_rate": 0.00045982589852154744, "loss": 0.2382, "step": 132600 }, { "epoch": 5.49, "grad_norm": 0.412109375, "learning_rate": 0.00045982000219277173, "loss": 0.2302, "step": 132610 }, { "epoch": 5.49, "grad_norm": 1.1015625, "learning_rate": 0.00045981410546913745, "loss": 0.2354, "step": 132620 }, { "epoch": 5.49, "grad_norm": 1.109375, "learning_rate": 0.0004598082083506556, "loss": 0.1949, "step": 132630 }, { "epoch": 5.49, "grad_norm": 0.7890625, "learning_rate": 0.00045980231083733746, "loss": 0.192, "step": 132640 }, { "epoch": 5.49, "grad_norm": 0.48046875, "learning_rate": 0.00045979641292919393, "loss": 0.2222, "step": 132650 }, { "epoch": 5.49, "grad_norm": 2.375, "learning_rate": 0.00045979051462623633, "loss": 0.2425, "step": 132660 }, { "epoch": 5.5, "grad_norm": 1.078125, "learning_rate": 0.0004597846159284755, "loss": 0.1966, "step": 132670 }, { "epoch": 5.5, "grad_norm": 0.55859375, "learning_rate": 0.00045977871683592275, "loss": 0.2111, "step": 132680 }, { "epoch": 5.5, "grad_norm": 0.65625, "learning_rate": 0.0004597728173485891, "loss": 0.1532, "step": 132690 }, { "epoch": 5.5, "grad_norm": 0.546875, "learning_rate": 0.00045976691746648567, "loss": 0.2525, "step": 132700 }, { "epoch": 5.5, "grad_norm": 0.5078125, "learning_rate": 0.0004597610171896236, "loss": 0.1789, "step": 132710 }, { "epoch": 5.5, "grad_norm": 0.431640625, "learning_rate": 0.00045975511651801383, "loss": 0.2084, "step": 132720 }, { "epoch": 5.5, "grad_norm": 0.59375, "learning_rate": 0.0004597492154516677, "loss": 0.2186, "step": 132730 }, { "epoch": 5.5, "grad_norm": 0.51171875, "learning_rate": 0.0004597433139905961, "loss": 0.17, "step": 132740 }, { "epoch": 5.5, "grad_norm": 0.66796875, "learning_rate": 0.0004597374121348103, "loss": 0.2018, "step": 132750 }, { "epoch": 5.5, "grad_norm": 0.3984375, "learning_rate": 0.00045973150988432135, "loss": 0.1629, "step": 132760 }, { "epoch": 5.5, "grad_norm": 0.65625, "learning_rate": 0.00045972560723914023, "loss": 0.2305, "step": 132770 }, { "epoch": 5.5, "grad_norm": 0.1875, "learning_rate": 0.0004597197041992783, "loss": 0.2098, "step": 132780 }, { "epoch": 5.5, "grad_norm": 0.76171875, "learning_rate": 0.00045971380076474644, "loss": 0.1824, "step": 132790 }, { "epoch": 5.5, "grad_norm": 0.609375, "learning_rate": 0.00045970789693555595, "loss": 0.1791, "step": 132800 }, { "epoch": 5.5, "grad_norm": 0.97265625, "learning_rate": 0.00045970199271171776, "loss": 0.1881, "step": 132810 }, { "epoch": 5.5, "grad_norm": 0.341796875, "learning_rate": 0.0004596960880932431, "loss": 0.2121, "step": 132820 }, { "epoch": 5.5, "grad_norm": 0.8359375, "learning_rate": 0.000459690183080143, "loss": 0.1592, "step": 132830 }, { "epoch": 5.5, "grad_norm": 0.6171875, "learning_rate": 0.0004596842776724287, "loss": 0.2189, "step": 132840 }, { "epoch": 5.5, "grad_norm": 0.55078125, "learning_rate": 0.0004596783718701112, "loss": 0.1813, "step": 132850 }, { "epoch": 5.5, "grad_norm": 1.5625, "learning_rate": 0.00045967246567320166, "loss": 0.2638, "step": 132860 }, { "epoch": 5.5, "grad_norm": 0.8203125, "learning_rate": 0.00045966655908171117, "loss": 0.2251, "step": 132870 }, { "epoch": 5.5, "grad_norm": 0.55078125, "learning_rate": 0.0004596606520956509, "loss": 0.1976, "step": 132880 }, { "epoch": 5.5, "grad_norm": 0.2890625, "learning_rate": 0.0004596547447150318, "loss": 0.1722, "step": 132890 }, { "epoch": 5.5, "grad_norm": 0.21484375, "learning_rate": 0.00045964883693986523, "loss": 0.2164, "step": 132900 }, { "epoch": 5.51, "grad_norm": 0.72265625, "learning_rate": 0.00045964292877016214, "loss": 0.1811, "step": 132910 }, { "epoch": 5.51, "grad_norm": 0.33984375, "learning_rate": 0.00045963702020593365, "loss": 0.2649, "step": 132920 }, { "epoch": 5.51, "grad_norm": 0.66015625, "learning_rate": 0.00045963111124719104, "loss": 0.2055, "step": 132930 }, { "epoch": 5.51, "grad_norm": 0.60546875, "learning_rate": 0.0004596252018939453, "loss": 0.1784, "step": 132940 }, { "epoch": 5.51, "grad_norm": 2.09375, "learning_rate": 0.00045961929214620743, "loss": 0.1923, "step": 132950 }, { "epoch": 5.51, "grad_norm": 0.53125, "learning_rate": 0.0004596133820039888, "loss": 0.2232, "step": 132960 }, { "epoch": 5.51, "grad_norm": 0.8984375, "learning_rate": 0.0004596074714673005, "loss": 0.206, "step": 132970 }, { "epoch": 5.51, "grad_norm": 0.60546875, "learning_rate": 0.00045960156053615343, "loss": 0.1993, "step": 132980 }, { "epoch": 5.51, "grad_norm": 1.8359375, "learning_rate": 0.00045959564921055894, "loss": 0.1834, "step": 132990 }, { "epoch": 5.51, "grad_norm": 0.57421875, "learning_rate": 0.000459589737490528, "loss": 0.235, "step": 133000 }, { "epoch": 5.51, "grad_norm": 0.6171875, "learning_rate": 0.0004595838253760719, "loss": 0.2325, "step": 133010 }, { "epoch": 5.51, "grad_norm": 0.4375, "learning_rate": 0.0004595779128672016, "loss": 0.179, "step": 133020 }, { "epoch": 5.51, "grad_norm": 0.50390625, "learning_rate": 0.0004595719999639283, "loss": 0.2174, "step": 133030 }, { "epoch": 5.51, "grad_norm": 1.2578125, "learning_rate": 0.0004595660866662632, "loss": 0.1762, "step": 133040 }, { "epoch": 5.51, "grad_norm": 1.1015625, "learning_rate": 0.00045956017297421733, "loss": 0.229, "step": 133050 }, { "epoch": 5.51, "grad_norm": 1.0234375, "learning_rate": 0.0004595542588878018, "loss": 0.2048, "step": 133060 }, { "epoch": 5.51, "grad_norm": 0.7421875, "learning_rate": 0.0004595483444070278, "loss": 0.1893, "step": 133070 }, { "epoch": 5.51, "grad_norm": 0.8046875, "learning_rate": 0.00045954242953190653, "loss": 0.1785, "step": 133080 }, { "epoch": 5.51, "grad_norm": 0.453125, "learning_rate": 0.0004595365142624489, "loss": 0.2155, "step": 133090 }, { "epoch": 5.51, "grad_norm": 2.1875, "learning_rate": 0.0004595305985986662, "loss": 0.2586, "step": 133100 }, { "epoch": 5.51, "grad_norm": 0.369140625, "learning_rate": 0.00045952468254056964, "loss": 0.1774, "step": 133110 }, { "epoch": 5.51, "grad_norm": 0.66796875, "learning_rate": 0.0004595187660881702, "loss": 0.1908, "step": 133120 }, { "epoch": 5.51, "grad_norm": 0.94921875, "learning_rate": 0.0004595128492414791, "loss": 0.2015, "step": 133130 }, { "epoch": 5.51, "grad_norm": 0.89453125, "learning_rate": 0.0004595069320005073, "loss": 0.2332, "step": 133140 }, { "epoch": 5.52, "grad_norm": 1.4609375, "learning_rate": 0.00045950101436526626, "loss": 0.2079, "step": 133150 }, { "epoch": 5.52, "grad_norm": 4.84375, "learning_rate": 0.0004594950963357668, "loss": 0.2171, "step": 133160 }, { "epoch": 5.52, "grad_norm": 0.7421875, "learning_rate": 0.0004594891779120203, "loss": 0.2532, "step": 133170 }, { "epoch": 5.52, "grad_norm": 0.6875, "learning_rate": 0.00045948325909403773, "loss": 0.1809, "step": 133180 }, { "epoch": 5.52, "grad_norm": 0.640625, "learning_rate": 0.0004594773398818303, "loss": 0.2421, "step": 133190 }, { "epoch": 5.52, "grad_norm": 0.4453125, "learning_rate": 0.0004594714202754091, "loss": 0.1987, "step": 133200 }, { "epoch": 5.52, "grad_norm": 0.66015625, "learning_rate": 0.0004594655002747854, "loss": 0.2297, "step": 133210 }, { "epoch": 5.52, "grad_norm": 0.3125, "learning_rate": 0.00045945957987997017, "loss": 0.1732, "step": 133220 }, { "epoch": 5.52, "grad_norm": 0.41015625, "learning_rate": 0.00045945365909097463, "loss": 0.2269, "step": 133230 }, { "epoch": 5.52, "grad_norm": 0.546875, "learning_rate": 0.00045944773790781, "loss": 0.2027, "step": 133240 }, { "epoch": 5.52, "grad_norm": 0.59765625, "learning_rate": 0.00045944181633048725, "loss": 0.1732, "step": 133250 }, { "epoch": 5.52, "grad_norm": 0.52734375, "learning_rate": 0.0004594358943590177, "loss": 0.1862, "step": 133260 }, { "epoch": 5.52, "grad_norm": 0.41796875, "learning_rate": 0.0004594299719934124, "loss": 0.2255, "step": 133270 }, { "epoch": 5.52, "grad_norm": 0.76171875, "learning_rate": 0.0004594240492336824, "loss": 0.2171, "step": 133280 }, { "epoch": 5.52, "grad_norm": 0.4375, "learning_rate": 0.00045941812607983907, "loss": 0.2254, "step": 133290 }, { "epoch": 5.52, "grad_norm": 0.376953125, "learning_rate": 0.0004594122025318934, "loss": 0.2003, "step": 133300 }, { "epoch": 5.52, "grad_norm": 0.314453125, "learning_rate": 0.0004594062785898566, "loss": 0.2676, "step": 133310 }, { "epoch": 5.52, "grad_norm": 0.66015625, "learning_rate": 0.00045940035425373984, "loss": 0.1666, "step": 133320 }, { "epoch": 5.52, "grad_norm": 1.0234375, "learning_rate": 0.00045939442952355416, "loss": 0.1651, "step": 133330 }, { "epoch": 5.52, "grad_norm": 0.6015625, "learning_rate": 0.0004593885043993108, "loss": 0.2153, "step": 133340 }, { "epoch": 5.52, "grad_norm": 0.7890625, "learning_rate": 0.00045938257888102085, "loss": 0.2163, "step": 133350 }, { "epoch": 5.52, "grad_norm": 0.70703125, "learning_rate": 0.00045937665296869555, "loss": 0.192, "step": 133360 }, { "epoch": 5.52, "grad_norm": 0.380859375, "learning_rate": 0.000459370726662346, "loss": 0.1857, "step": 133370 }, { "epoch": 5.52, "grad_norm": 0.82421875, "learning_rate": 0.00045936479996198335, "loss": 0.2135, "step": 133380 }, { "epoch": 5.52, "grad_norm": 0.55078125, "learning_rate": 0.00045935887286761867, "loss": 0.2724, "step": 133390 }, { "epoch": 5.53, "grad_norm": 1.1953125, "learning_rate": 0.0004593529453792633, "loss": 0.2069, "step": 133400 }, { "epoch": 5.53, "grad_norm": 0.71484375, "learning_rate": 0.00045934701749692825, "loss": 0.2122, "step": 133410 }, { "epoch": 5.53, "grad_norm": 1.1171875, "learning_rate": 0.00045934108922062475, "loss": 0.2058, "step": 133420 }, { "epoch": 5.53, "grad_norm": 2.015625, "learning_rate": 0.0004593351605503639, "loss": 0.213, "step": 133430 }, { "epoch": 5.53, "grad_norm": 0.59765625, "learning_rate": 0.0004593292314861569, "loss": 0.209, "step": 133440 }, { "epoch": 5.53, "grad_norm": 0.6328125, "learning_rate": 0.0004593233020280149, "loss": 0.2442, "step": 133450 }, { "epoch": 5.53, "grad_norm": 1.2109375, "learning_rate": 0.000459317372175949, "loss": 0.2298, "step": 133460 }, { "epoch": 5.53, "grad_norm": 0.92578125, "learning_rate": 0.00045931144192997044, "loss": 0.2572, "step": 133470 }, { "epoch": 5.53, "grad_norm": 0.69140625, "learning_rate": 0.00045930551129009037, "loss": 0.2309, "step": 133480 }, { "epoch": 5.53, "grad_norm": 0.1953125, "learning_rate": 0.0004592995802563199, "loss": 0.1363, "step": 133490 }, { "epoch": 5.53, "grad_norm": 0.7265625, "learning_rate": 0.00045929364882867026, "loss": 0.1966, "step": 133500 }, { "epoch": 5.53, "grad_norm": 0.640625, "learning_rate": 0.00045928771700715255, "loss": 0.204, "step": 133510 }, { "epoch": 5.53, "grad_norm": 0.41015625, "learning_rate": 0.000459281784791778, "loss": 0.2323, "step": 133520 }, { "epoch": 5.53, "grad_norm": 1.2265625, "learning_rate": 0.00045927585218255763, "loss": 0.1735, "step": 133530 }, { "epoch": 5.53, "grad_norm": 0.380859375, "learning_rate": 0.0004592699191795028, "loss": 0.1521, "step": 133540 }, { "epoch": 5.53, "grad_norm": 0.578125, "learning_rate": 0.00045926398578262454, "loss": 0.2253, "step": 133550 }, { "epoch": 5.53, "grad_norm": 0.458984375, "learning_rate": 0.000459258051991934, "loss": 0.2286, "step": 133560 }, { "epoch": 5.53, "grad_norm": 0.59375, "learning_rate": 0.0004592521178074425, "loss": 0.2392, "step": 133570 }, { "epoch": 5.53, "grad_norm": 1.1171875, "learning_rate": 0.00045924618322916113, "loss": 0.2249, "step": 133580 }, { "epoch": 5.53, "grad_norm": 0.9921875, "learning_rate": 0.000459240248257101, "loss": 0.2276, "step": 133590 }, { "epoch": 5.53, "grad_norm": 0.859375, "learning_rate": 0.00045923431289127326, "loss": 0.1856, "step": 133600 }, { "epoch": 5.53, "grad_norm": 1.3671875, "learning_rate": 0.0004592283771316892, "loss": 0.238, "step": 133610 }, { "epoch": 5.53, "grad_norm": 0.498046875, "learning_rate": 0.00045922244097835996, "loss": 0.1481, "step": 133620 }, { "epoch": 5.53, "grad_norm": 0.546875, "learning_rate": 0.00045921650443129657, "loss": 0.2403, "step": 133630 }, { "epoch": 5.54, "grad_norm": 0.50390625, "learning_rate": 0.0004592105674905104, "loss": 0.2246, "step": 133640 }, { "epoch": 5.54, "grad_norm": 0.54296875, "learning_rate": 0.00045920463015601255, "loss": 0.1876, "step": 133650 }, { "epoch": 5.54, "grad_norm": 1.15625, "learning_rate": 0.0004591986924278141, "loss": 0.2526, "step": 133660 }, { "epoch": 5.54, "grad_norm": 0.6171875, "learning_rate": 0.00045919275430592635, "loss": 0.2175, "step": 133670 }, { "epoch": 5.54, "grad_norm": 1.3359375, "learning_rate": 0.0004591868157903605, "loss": 0.2181, "step": 133680 }, { "epoch": 5.54, "grad_norm": 0.37890625, "learning_rate": 0.00045918087688112756, "loss": 0.1761, "step": 133690 }, { "epoch": 5.54, "grad_norm": 0.55859375, "learning_rate": 0.0004591749375782388, "loss": 0.2214, "step": 133700 }, { "epoch": 5.54, "grad_norm": 0.90234375, "learning_rate": 0.0004591689978817054, "loss": 0.2188, "step": 133710 }, { "epoch": 5.54, "grad_norm": 0.9296875, "learning_rate": 0.00045916305779153854, "loss": 0.185, "step": 133720 }, { "epoch": 5.54, "grad_norm": 0.88671875, "learning_rate": 0.0004591571173077495, "loss": 0.2296, "step": 133730 }, { "epoch": 5.54, "grad_norm": 0.373046875, "learning_rate": 0.00045915117643034915, "loss": 0.2206, "step": 133740 }, { "epoch": 5.54, "grad_norm": 0.578125, "learning_rate": 0.00045914523515934897, "loss": 0.1885, "step": 133750 }, { "epoch": 5.54, "grad_norm": 0.439453125, "learning_rate": 0.00045913929349476, "loss": 0.2056, "step": 133760 }, { "epoch": 5.54, "grad_norm": 1.703125, "learning_rate": 0.00045913335143659355, "loss": 0.2528, "step": 133770 }, { "epoch": 5.54, "grad_norm": 0.5546875, "learning_rate": 0.0004591274089848607, "loss": 0.2106, "step": 133780 }, { "epoch": 5.54, "grad_norm": 0.298828125, "learning_rate": 0.00045912146613957263, "loss": 0.2187, "step": 133790 }, { "epoch": 5.54, "grad_norm": 0.8203125, "learning_rate": 0.00045911552290074057, "loss": 0.2013, "step": 133800 }, { "epoch": 5.54, "grad_norm": 0.9921875, "learning_rate": 0.0004591095792683756, "loss": 0.2131, "step": 133810 }, { "epoch": 5.54, "grad_norm": 0.59375, "learning_rate": 0.000459103635242489, "loss": 0.1538, "step": 133820 }, { "epoch": 5.54, "grad_norm": 0.72265625, "learning_rate": 0.00045909769082309204, "loss": 0.227, "step": 133830 }, { "epoch": 5.54, "grad_norm": 1.171875, "learning_rate": 0.00045909174601019574, "loss": 0.2353, "step": 133840 }, { "epoch": 5.54, "grad_norm": 2.328125, "learning_rate": 0.00045908580080381136, "loss": 0.1808, "step": 133850 }, { "epoch": 5.54, "grad_norm": 0.734375, "learning_rate": 0.00045907985520395007, "loss": 0.2287, "step": 133860 }, { "epoch": 5.54, "grad_norm": 0.9140625, "learning_rate": 0.0004590739092106231, "loss": 0.238, "step": 133870 }, { "epoch": 5.55, "grad_norm": 0.2119140625, "learning_rate": 0.00045906796282384156, "loss": 0.1944, "step": 133880 }, { "epoch": 5.55, "grad_norm": 0.25, "learning_rate": 0.00045906201604361673, "loss": 0.1896, "step": 133890 }, { "epoch": 5.55, "grad_norm": 3.109375, "learning_rate": 0.00045905606886995976, "loss": 0.2333, "step": 133900 }, { "epoch": 5.55, "grad_norm": 0.9296875, "learning_rate": 0.0004590501213028818, "loss": 0.1931, "step": 133910 }, { "epoch": 5.55, "grad_norm": 0.8984375, "learning_rate": 0.00045904417334239413, "loss": 0.1976, "step": 133920 }, { "epoch": 5.55, "grad_norm": 0.56640625, "learning_rate": 0.0004590382249885079, "loss": 0.1994, "step": 133930 }, { "epoch": 5.55, "grad_norm": 1.1015625, "learning_rate": 0.0004590322762412343, "loss": 0.1979, "step": 133940 }, { "epoch": 5.55, "grad_norm": 0.8203125, "learning_rate": 0.00045902632710058454, "loss": 0.2016, "step": 133950 }, { "epoch": 5.55, "grad_norm": 1.046875, "learning_rate": 0.0004590203775665698, "loss": 0.2153, "step": 133960 }, { "epoch": 5.55, "grad_norm": 0.73828125, "learning_rate": 0.0004590144276392013, "loss": 0.1832, "step": 133970 }, { "epoch": 5.55, "grad_norm": 0.66796875, "learning_rate": 0.00045900847731849013, "loss": 0.1886, "step": 133980 }, { "epoch": 5.55, "grad_norm": 0.6328125, "learning_rate": 0.0004590025266044477, "loss": 0.2072, "step": 133990 }, { "epoch": 5.55, "grad_norm": 0.546875, "learning_rate": 0.000458996575497085, "loss": 0.1666, "step": 134000 }, { "epoch": 5.55, "grad_norm": 0.65234375, "learning_rate": 0.00045899062399641335, "loss": 0.2226, "step": 134010 }, { "epoch": 5.55, "grad_norm": 0.5625, "learning_rate": 0.00045898467210244386, "loss": 0.1971, "step": 134020 }, { "epoch": 5.55, "grad_norm": 0.765625, "learning_rate": 0.0004589787198151879, "loss": 0.1953, "step": 134030 }, { "epoch": 5.55, "grad_norm": 1.015625, "learning_rate": 0.00045897276713465645, "loss": 0.2144, "step": 134040 }, { "epoch": 5.55, "grad_norm": 0.53125, "learning_rate": 0.00045896681406086087, "loss": 0.2246, "step": 134050 }, { "epoch": 5.55, "grad_norm": 0.55859375, "learning_rate": 0.0004589608605938123, "loss": 0.2214, "step": 134060 }, { "epoch": 5.55, "grad_norm": 0.578125, "learning_rate": 0.00045895490673352195, "loss": 0.1675, "step": 134070 }, { "epoch": 5.55, "grad_norm": 0.466796875, "learning_rate": 0.00045894895248000105, "loss": 0.1703, "step": 134080 }, { "epoch": 5.55, "grad_norm": 1.6015625, "learning_rate": 0.0004589429978332608, "loss": 0.1722, "step": 134090 }, { "epoch": 5.55, "grad_norm": 0.51953125, "learning_rate": 0.0004589370427933124, "loss": 0.1962, "step": 134100 }, { "epoch": 5.55, "grad_norm": 0.6328125, "learning_rate": 0.000458931087360167, "loss": 0.2311, "step": 134110 }, { "epoch": 5.56, "grad_norm": 0.34765625, "learning_rate": 0.00045892513153383585, "loss": 0.1697, "step": 134120 }, { "epoch": 5.56, "grad_norm": 0.5546875, "learning_rate": 0.0004589191753143301, "loss": 0.2164, "step": 134130 }, { "epoch": 5.56, "grad_norm": 0.796875, "learning_rate": 0.00045891321870166114, "loss": 0.1991, "step": 134140 }, { "epoch": 5.56, "grad_norm": 2.0, "learning_rate": 0.00045890726169584006, "loss": 0.2125, "step": 134150 }, { "epoch": 5.56, "grad_norm": 0.53515625, "learning_rate": 0.00045890130429687806, "loss": 0.1775, "step": 134160 }, { "epoch": 5.56, "grad_norm": 0.5546875, "learning_rate": 0.00045889534650478636, "loss": 0.1791, "step": 134170 }, { "epoch": 5.56, "grad_norm": 1.109375, "learning_rate": 0.0004588893883195762, "loss": 0.1985, "step": 134180 }, { "epoch": 5.56, "grad_norm": 0.86328125, "learning_rate": 0.0004588834297412587, "loss": 0.1864, "step": 134190 }, { "epoch": 5.56, "grad_norm": 1.0703125, "learning_rate": 0.0004588774707698452, "loss": 0.2324, "step": 134200 }, { "epoch": 5.56, "grad_norm": 1.109375, "learning_rate": 0.0004588715114053468, "loss": 0.2408, "step": 134210 }, { "epoch": 5.56, "grad_norm": 0.498046875, "learning_rate": 0.00045886555164777475, "loss": 0.1927, "step": 134220 }, { "epoch": 5.56, "grad_norm": 1.03125, "learning_rate": 0.00045885959149714043, "loss": 0.256, "step": 134230 }, { "epoch": 5.56, "grad_norm": 0.75390625, "learning_rate": 0.0004588536309534548, "loss": 0.1883, "step": 134240 }, { "epoch": 5.56, "grad_norm": 1.6953125, "learning_rate": 0.00045884767001672924, "loss": 0.1979, "step": 134250 }, { "epoch": 5.56, "grad_norm": 0.6328125, "learning_rate": 0.00045884170868697486, "loss": 0.1868, "step": 134260 }, { "epoch": 5.56, "grad_norm": 0.734375, "learning_rate": 0.000458835746964203, "loss": 0.1903, "step": 134270 }, { "epoch": 5.56, "grad_norm": 0.90234375, "learning_rate": 0.0004588297848484248, "loss": 0.2163, "step": 134280 }, { "epoch": 5.56, "grad_norm": 0.9609375, "learning_rate": 0.0004588238223396515, "loss": 0.211, "step": 134290 }, { "epoch": 5.56, "grad_norm": 0.70703125, "learning_rate": 0.00045881785943789426, "loss": 0.2209, "step": 134300 }, { "epoch": 5.56, "grad_norm": 1.34375, "learning_rate": 0.00045881189614316444, "loss": 0.1761, "step": 134310 }, { "epoch": 5.56, "grad_norm": 0.4765625, "learning_rate": 0.00045880593245547314, "loss": 0.1945, "step": 134320 }, { "epoch": 5.56, "grad_norm": 0.30078125, "learning_rate": 0.0004587999683748316, "loss": 0.226, "step": 134330 }, { "epoch": 5.56, "grad_norm": 0.4921875, "learning_rate": 0.0004587940039012511, "loss": 0.1762, "step": 134340 }, { "epoch": 5.56, "grad_norm": 0.828125, "learning_rate": 0.00045878803903474284, "loss": 0.1762, "step": 134350 }, { "epoch": 5.57, "grad_norm": 0.50390625, "learning_rate": 0.0004587820737753181, "loss": 0.209, "step": 134360 }, { "epoch": 5.57, "grad_norm": 0.6015625, "learning_rate": 0.00045877610812298787, "loss": 0.2532, "step": 134370 }, { "epoch": 5.57, "grad_norm": 0.6875, "learning_rate": 0.00045877014207776367, "loss": 0.2109, "step": 134380 }, { "epoch": 5.57, "grad_norm": 0.7578125, "learning_rate": 0.00045876417563965653, "loss": 0.2278, "step": 134390 }, { "epoch": 5.57, "grad_norm": 0.458984375, "learning_rate": 0.0004587582088086779, "loss": 0.2018, "step": 134400 }, { "epoch": 5.57, "grad_norm": 0.71875, "learning_rate": 0.00045875224158483876, "loss": 0.2274, "step": 134410 }, { "epoch": 5.57, "grad_norm": 2.046875, "learning_rate": 0.0004587462739681505, "loss": 0.2016, "step": 134420 }, { "epoch": 5.57, "grad_norm": 0.76953125, "learning_rate": 0.0004587403059586243, "loss": 0.2078, "step": 134430 }, { "epoch": 5.57, "grad_norm": 0.8125, "learning_rate": 0.0004587343375562713, "loss": 0.2027, "step": 134440 }, { "epoch": 5.57, "grad_norm": 0.578125, "learning_rate": 0.00045872836876110286, "loss": 0.1798, "step": 134450 }, { "epoch": 5.57, "grad_norm": 0.578125, "learning_rate": 0.0004587223995731302, "loss": 0.2238, "step": 134460 }, { "epoch": 5.57, "grad_norm": 1.2734375, "learning_rate": 0.0004587164299923645, "loss": 0.1525, "step": 134470 }, { "epoch": 5.57, "grad_norm": 0.369140625, "learning_rate": 0.000458710460018817, "loss": 0.1882, "step": 134480 }, { "epoch": 5.57, "grad_norm": 1.4375, "learning_rate": 0.00045870448965249893, "loss": 0.1998, "step": 134490 }, { "epoch": 5.57, "grad_norm": 1.1484375, "learning_rate": 0.0004586985188934216, "loss": 0.167, "step": 134500 }, { "epoch": 5.57, "grad_norm": 0.88671875, "learning_rate": 0.0004586925477415962, "loss": 0.24, "step": 134510 }, { "epoch": 5.57, "grad_norm": 0.5703125, "learning_rate": 0.0004586865761970339, "loss": 0.18, "step": 134520 }, { "epoch": 5.57, "grad_norm": 0.5859375, "learning_rate": 0.00045868060425974613, "loss": 0.2133, "step": 134530 }, { "epoch": 5.57, "grad_norm": 1.0546875, "learning_rate": 0.0004586746319297439, "loss": 0.1903, "step": 134540 }, { "epoch": 5.57, "grad_norm": 0.60546875, "learning_rate": 0.0004586686592070385, "loss": 0.1459, "step": 134550 }, { "epoch": 5.57, "grad_norm": 0.486328125, "learning_rate": 0.0004586626860916413, "loss": 0.1762, "step": 134560 }, { "epoch": 5.57, "grad_norm": 0.6328125, "learning_rate": 0.00045865671258356344, "loss": 0.216, "step": 134570 }, { "epoch": 5.57, "grad_norm": 0.58984375, "learning_rate": 0.00045865073868281615, "loss": 0.2448, "step": 134580 }, { "epoch": 5.57, "grad_norm": 0.458984375, "learning_rate": 0.00045864476438941073, "loss": 0.2045, "step": 134590 }, { "epoch": 5.58, "grad_norm": 0.328125, "learning_rate": 0.0004586387897033584, "loss": 0.2612, "step": 134600 }, { "epoch": 5.58, "grad_norm": 0.6640625, "learning_rate": 0.00045863281462467045, "loss": 0.1698, "step": 134610 }, { "epoch": 5.58, "grad_norm": 0.5625, "learning_rate": 0.00045862683915335803, "loss": 0.1989, "step": 134620 }, { "epoch": 5.58, "grad_norm": 0.6796875, "learning_rate": 0.00045862086328943237, "loss": 0.2784, "step": 134630 }, { "epoch": 5.58, "grad_norm": 0.58984375, "learning_rate": 0.00045861488703290485, "loss": 0.2035, "step": 134640 }, { "epoch": 5.58, "grad_norm": 0.58984375, "learning_rate": 0.0004586089103837866, "loss": 0.2116, "step": 134650 }, { "epoch": 5.58, "grad_norm": 0.859375, "learning_rate": 0.0004586029333420889, "loss": 0.2238, "step": 134660 }, { "epoch": 5.58, "grad_norm": 0.640625, "learning_rate": 0.00045859695590782303, "loss": 0.1823, "step": 134670 }, { "epoch": 5.58, "grad_norm": 0.5234375, "learning_rate": 0.00045859097808100024, "loss": 0.2044, "step": 134680 }, { "epoch": 5.58, "grad_norm": 0.56640625, "learning_rate": 0.0004585849998616317, "loss": 0.2441, "step": 134690 }, { "epoch": 5.58, "grad_norm": 1.15625, "learning_rate": 0.00045857902124972873, "loss": 0.2201, "step": 134700 }, { "epoch": 5.58, "grad_norm": 0.796875, "learning_rate": 0.0004585730422453026, "loss": 0.1925, "step": 134710 }, { "epoch": 5.58, "grad_norm": 1.015625, "learning_rate": 0.0004585670628483645, "loss": 0.1907, "step": 134720 }, { "epoch": 5.58, "grad_norm": 0.62890625, "learning_rate": 0.00045856108305892575, "loss": 0.1801, "step": 134730 }, { "epoch": 5.58, "grad_norm": 0.423828125, "learning_rate": 0.00045855510287699753, "loss": 0.1968, "step": 134740 }, { "epoch": 5.58, "grad_norm": 0.43359375, "learning_rate": 0.0004585491223025911, "loss": 0.206, "step": 134750 }, { "epoch": 5.58, "grad_norm": 0.77734375, "learning_rate": 0.00045854314133571776, "loss": 0.1585, "step": 134760 }, { "epoch": 5.58, "grad_norm": 0.275390625, "learning_rate": 0.00045853715997638876, "loss": 0.2002, "step": 134770 }, { "epoch": 5.58, "grad_norm": 0.5078125, "learning_rate": 0.0004585311782246154, "loss": 0.2074, "step": 134780 }, { "epoch": 5.58, "grad_norm": 0.6796875, "learning_rate": 0.0004585251960804088, "loss": 0.192, "step": 134790 }, { "epoch": 5.58, "grad_norm": 0.78515625, "learning_rate": 0.0004585192135437803, "loss": 0.2214, "step": 134800 }, { "epoch": 5.58, "grad_norm": 0.6484375, "learning_rate": 0.0004585132306147412, "loss": 0.1793, "step": 134810 }, { "epoch": 5.58, "grad_norm": 0.349609375, "learning_rate": 0.00045850724729330273, "loss": 0.1853, "step": 134820 }, { "epoch": 5.58, "grad_norm": 1.3828125, "learning_rate": 0.00045850126357947606, "loss": 0.2174, "step": 134830 }, { "epoch": 5.59, "grad_norm": 0.49609375, "learning_rate": 0.00045849527947327266, "loss": 0.1877, "step": 134840 }, { "epoch": 5.59, "grad_norm": 1.0234375, "learning_rate": 0.00045848929497470354, "loss": 0.1892, "step": 134850 }, { "epoch": 5.59, "grad_norm": 0.0, "learning_rate": 0.00045848331008378014, "loss": 0.2072, "step": 134860 }, { "epoch": 5.59, "grad_norm": 0.9296875, "learning_rate": 0.00045847732480051363, "loss": 0.2336, "step": 134870 }, { "epoch": 5.59, "grad_norm": 0.609375, "learning_rate": 0.00045847133912491533, "loss": 0.2297, "step": 134880 }, { "epoch": 5.59, "grad_norm": 0.640625, "learning_rate": 0.0004584653530569964, "loss": 0.2259, "step": 134890 }, { "epoch": 5.59, "grad_norm": 0.328125, "learning_rate": 0.00045845936659676833, "loss": 0.2045, "step": 134900 }, { "epoch": 5.59, "grad_norm": 0.7109375, "learning_rate": 0.0004584533797442422, "loss": 0.2502, "step": 134910 }, { "epoch": 5.59, "grad_norm": 0.7265625, "learning_rate": 0.0004584473924994293, "loss": 0.2953, "step": 134920 }, { "epoch": 5.59, "grad_norm": 0.56640625, "learning_rate": 0.0004584414048623409, "loss": 0.2581, "step": 134930 }, { "epoch": 5.59, "grad_norm": 0.78125, "learning_rate": 0.0004584354168329883, "loss": 0.192, "step": 134940 }, { "epoch": 5.59, "grad_norm": 0.90625, "learning_rate": 0.0004584294284113828, "loss": 0.2191, "step": 134950 }, { "epoch": 5.59, "grad_norm": 0.77734375, "learning_rate": 0.0004584234395975355, "loss": 0.1907, "step": 134960 }, { "epoch": 5.59, "grad_norm": 0.408203125, "learning_rate": 0.00045841745039145793, "loss": 0.1988, "step": 134970 }, { "epoch": 5.59, "grad_norm": 0.4453125, "learning_rate": 0.00045841146079316115, "loss": 0.2206, "step": 134980 }, { "epoch": 5.59, "grad_norm": 0.98046875, "learning_rate": 0.0004584054708026565, "loss": 0.194, "step": 134990 }, { "epoch": 5.59, "grad_norm": 0.68359375, "learning_rate": 0.0004583994804199554, "loss": 0.1853, "step": 135000 }, { "epoch": 5.59, "grad_norm": 0.97265625, "learning_rate": 0.00045839348964506877, "loss": 0.168, "step": 135010 }, { "epoch": 5.59, "grad_norm": 2.796875, "learning_rate": 0.0004583874984780082, "loss": 0.2112, "step": 135020 }, { "epoch": 5.59, "grad_norm": 0.67578125, "learning_rate": 0.0004583815069187849, "loss": 0.2071, "step": 135030 }, { "epoch": 5.59, "grad_norm": 0.9140625, "learning_rate": 0.0004583755149674101, "loss": 0.2048, "step": 135040 }, { "epoch": 5.59, "grad_norm": 1.1015625, "learning_rate": 0.00045836952262389504, "loss": 0.2275, "step": 135050 }, { "epoch": 5.59, "grad_norm": 0.66015625, "learning_rate": 0.00045836352988825105, "loss": 0.1963, "step": 135060 }, { "epoch": 5.59, "grad_norm": 1.59375, "learning_rate": 0.00045835753676048936, "loss": 0.1822, "step": 135070 }, { "epoch": 5.59, "grad_norm": 0.44921875, "learning_rate": 0.00045835154324062134, "loss": 0.2181, "step": 135080 }, { "epoch": 5.6, "grad_norm": 0.80859375, "learning_rate": 0.0004583455493286582, "loss": 0.2089, "step": 135090 }, { "epoch": 5.6, "grad_norm": 0.58203125, "learning_rate": 0.0004583395550246112, "loss": 0.2282, "step": 135100 }, { "epoch": 5.6, "grad_norm": 0.78125, "learning_rate": 0.0004583335603284917, "loss": 0.1593, "step": 135110 }, { "epoch": 5.6, "grad_norm": 0.70703125, "learning_rate": 0.000458327565240311, "loss": 0.194, "step": 135120 }, { "epoch": 5.6, "grad_norm": 0.212890625, "learning_rate": 0.00045832156976008024, "loss": 0.1764, "step": 135130 }, { "epoch": 5.6, "grad_norm": 0.67578125, "learning_rate": 0.00045831557388781075, "loss": 0.2245, "step": 135140 }, { "epoch": 5.6, "grad_norm": 0.99609375, "learning_rate": 0.00045830957762351386, "loss": 0.1929, "step": 135150 }, { "epoch": 5.6, "grad_norm": 1.1328125, "learning_rate": 0.0004583035809672009, "loss": 0.1935, "step": 135160 }, { "epoch": 5.6, "grad_norm": 0.43359375, "learning_rate": 0.0004582975839188831, "loss": 0.2275, "step": 135170 }, { "epoch": 5.6, "grad_norm": 0.6640625, "learning_rate": 0.0004582915864785717, "loss": 0.1882, "step": 135180 }, { "epoch": 5.6, "grad_norm": 1.109375, "learning_rate": 0.000458285588646278, "loss": 0.209, "step": 135190 }, { "epoch": 5.6, "grad_norm": 1.2890625, "learning_rate": 0.0004582795904220134, "loss": 0.2119, "step": 135200 }, { "epoch": 5.6, "grad_norm": 0.6171875, "learning_rate": 0.00045827359180578903, "loss": 0.2515, "step": 135210 }, { "epoch": 5.6, "grad_norm": 0.57421875, "learning_rate": 0.00045826759279761633, "loss": 0.1923, "step": 135220 }, { "epoch": 5.6, "grad_norm": 0.59375, "learning_rate": 0.0004582615933975064, "loss": 0.2232, "step": 135230 }, { "epoch": 5.6, "grad_norm": 0.9453125, "learning_rate": 0.00045825559360547074, "loss": 0.1945, "step": 135240 }, { "epoch": 5.6, "grad_norm": 0.70703125, "learning_rate": 0.0004582495934215205, "loss": 0.2355, "step": 135250 }, { "epoch": 5.6, "grad_norm": 1.1328125, "learning_rate": 0.000458243592845667, "loss": 0.258, "step": 135260 }, { "epoch": 5.6, "grad_norm": 0.58203125, "learning_rate": 0.0004582375918779215, "loss": 0.2047, "step": 135270 }, { "epoch": 5.6, "grad_norm": 0.78125, "learning_rate": 0.0004582315905182954, "loss": 0.182, "step": 135280 }, { "epoch": 5.6, "grad_norm": 0.578125, "learning_rate": 0.0004582255887668, "loss": 0.2189, "step": 135290 }, { "epoch": 5.6, "grad_norm": 1.3671875, "learning_rate": 0.00045821958662344643, "loss": 0.2334, "step": 135300 }, { "epoch": 5.6, "grad_norm": 1.203125, "learning_rate": 0.0004582135840882461, "loss": 0.2104, "step": 135310 }, { "epoch": 5.6, "grad_norm": 0.328125, "learning_rate": 0.00045820758116121033, "loss": 0.1874, "step": 135320 }, { "epoch": 5.61, "grad_norm": 0.9375, "learning_rate": 0.0004582015778423504, "loss": 0.2351, "step": 135330 }, { "epoch": 5.61, "grad_norm": 0.73828125, "learning_rate": 0.0004581955741316775, "loss": 0.2353, "step": 135340 }, { "epoch": 5.61, "grad_norm": 0.734375, "learning_rate": 0.00045818957002920305, "loss": 0.2088, "step": 135350 }, { "epoch": 5.61, "grad_norm": 0.31640625, "learning_rate": 0.00045818356553493835, "loss": 0.2506, "step": 135360 }, { "epoch": 5.61, "grad_norm": 1.0625, "learning_rate": 0.0004581775606488946, "loss": 0.192, "step": 135370 }, { "epoch": 5.61, "grad_norm": 0.490234375, "learning_rate": 0.0004581715553710831, "loss": 0.1722, "step": 135380 }, { "epoch": 5.61, "grad_norm": 0.388671875, "learning_rate": 0.0004581655497015154, "loss": 0.1746, "step": 135390 }, { "epoch": 5.61, "grad_norm": 0.93359375, "learning_rate": 0.00045815954364020244, "loss": 0.1741, "step": 135400 }, { "epoch": 5.61, "grad_norm": 0.376953125, "learning_rate": 0.0004581535371871558, "loss": 0.2912, "step": 135410 }, { "epoch": 5.61, "grad_norm": 0.7265625, "learning_rate": 0.00045814753034238665, "loss": 0.1962, "step": 135420 }, { "epoch": 5.61, "grad_norm": 0.263671875, "learning_rate": 0.0004581415231059063, "loss": 0.2266, "step": 135430 }, { "epoch": 5.61, "grad_norm": 0.0, "learning_rate": 0.00045813551547772613, "loss": 0.1531, "step": 135440 }, { "epoch": 5.61, "grad_norm": 0.66015625, "learning_rate": 0.0004581295074578574, "loss": 0.2065, "step": 135450 }, { "epoch": 5.61, "grad_norm": 0.5859375, "learning_rate": 0.0004581234990463114, "loss": 0.2486, "step": 135460 }, { "epoch": 5.61, "grad_norm": 1.140625, "learning_rate": 0.00045811749024309944, "loss": 0.1844, "step": 135470 }, { "epoch": 5.61, "grad_norm": 0.73828125, "learning_rate": 0.00045811148104823286, "loss": 0.2453, "step": 135480 }, { "epoch": 5.61, "grad_norm": 0.86328125, "learning_rate": 0.0004581054714617229, "loss": 0.2565, "step": 135490 }, { "epoch": 5.61, "grad_norm": 0.5703125, "learning_rate": 0.00045809946148358096, "loss": 0.2758, "step": 135500 }, { "epoch": 5.61, "grad_norm": 0.734375, "learning_rate": 0.0004580934511138183, "loss": 0.2284, "step": 135510 }, { "epoch": 5.61, "grad_norm": 0.96875, "learning_rate": 0.0004580874403524462, "loss": 0.163, "step": 135520 }, { "epoch": 5.61, "grad_norm": 0.54296875, "learning_rate": 0.00045808142919947603, "loss": 0.2029, "step": 135530 }, { "epoch": 5.61, "grad_norm": 1.328125, "learning_rate": 0.0004580754176549191, "loss": 0.1637, "step": 135540 }, { "epoch": 5.61, "grad_norm": 0.66796875, "learning_rate": 0.00045806940571878666, "loss": 0.195, "step": 135550 }, { "epoch": 5.61, "grad_norm": 1.3203125, "learning_rate": 0.0004580633933910901, "loss": 0.2308, "step": 135560 }, { "epoch": 5.62, "grad_norm": 0.2431640625, "learning_rate": 0.00045805738067184067, "loss": 0.1717, "step": 135570 }, { "epoch": 5.62, "grad_norm": 0.65234375, "learning_rate": 0.0004580513675610498, "loss": 0.2253, "step": 135580 }, { "epoch": 5.62, "grad_norm": 0.875, "learning_rate": 0.0004580453540587286, "loss": 0.2312, "step": 135590 }, { "epoch": 5.62, "grad_norm": 0.447265625, "learning_rate": 0.00045803934016488864, "loss": 0.1669, "step": 135600 }, { "epoch": 5.62, "grad_norm": 0.1865234375, "learning_rate": 0.00045803332587954107, "loss": 0.2113, "step": 135610 }, { "epoch": 5.62, "grad_norm": 0.8359375, "learning_rate": 0.0004580273112026972, "loss": 0.2581, "step": 135620 }, { "epoch": 5.62, "grad_norm": 1.265625, "learning_rate": 0.0004580212961343684, "loss": 0.2322, "step": 135630 }, { "epoch": 5.62, "grad_norm": 0.4765625, "learning_rate": 0.000458015280674566, "loss": 0.237, "step": 135640 }, { "epoch": 5.62, "grad_norm": 0.408203125, "learning_rate": 0.0004580092648233013, "loss": 0.2004, "step": 135650 }, { "epoch": 5.62, "grad_norm": 0.361328125, "learning_rate": 0.0004580032485805856, "loss": 0.1246, "step": 135660 }, { "epoch": 5.62, "grad_norm": 1.015625, "learning_rate": 0.0004579972319464304, "loss": 0.2001, "step": 135670 }, { "epoch": 5.62, "grad_norm": 1.03125, "learning_rate": 0.00045799121492084673, "loss": 0.2516, "step": 135680 }, { "epoch": 5.62, "grad_norm": 0.9296875, "learning_rate": 0.000457985197503846, "loss": 0.232, "step": 135690 }, { "epoch": 5.62, "grad_norm": 0.37109375, "learning_rate": 0.00045797917969543975, "loss": 0.2372, "step": 135700 }, { "epoch": 5.62, "grad_norm": 0.69921875, "learning_rate": 0.00045797316149563905, "loss": 0.2105, "step": 135710 }, { "epoch": 5.62, "grad_norm": 0.39453125, "learning_rate": 0.00045796714290445535, "loss": 0.2208, "step": 135720 }, { "epoch": 5.62, "grad_norm": 0.609375, "learning_rate": 0.0004579611239218999, "loss": 0.2175, "step": 135730 }, { "epoch": 5.62, "grad_norm": 2.078125, "learning_rate": 0.0004579551045479841, "loss": 0.2119, "step": 135740 }, { "epoch": 5.62, "grad_norm": 1.0703125, "learning_rate": 0.00045794908478271923, "loss": 0.2058, "step": 135750 }, { "epoch": 5.62, "grad_norm": 0.9140625, "learning_rate": 0.0004579430646261167, "loss": 0.1678, "step": 135760 }, { "epoch": 5.62, "grad_norm": 0.71875, "learning_rate": 0.00045793704407818777, "loss": 0.2603, "step": 135770 }, { "epoch": 5.62, "grad_norm": 0.51953125, "learning_rate": 0.00045793102313894374, "loss": 0.2191, "step": 135780 }, { "epoch": 5.62, "grad_norm": 0.76171875, "learning_rate": 0.00045792500180839597, "loss": 0.147, "step": 135790 }, { "epoch": 5.62, "grad_norm": 1.0078125, "learning_rate": 0.00045791898008655583, "loss": 0.29, "step": 135800 }, { "epoch": 5.63, "grad_norm": 0.60546875, "learning_rate": 0.00045791295797343456, "loss": 0.2268, "step": 135810 }, { "epoch": 5.63, "grad_norm": 0.51171875, "learning_rate": 0.0004579069354690436, "loss": 0.24, "step": 135820 }, { "epoch": 5.63, "grad_norm": 0.95703125, "learning_rate": 0.00045790091257339426, "loss": 0.2023, "step": 135830 }, { "epoch": 5.63, "grad_norm": 0.400390625, "learning_rate": 0.00045789488928649793, "loss": 0.1858, "step": 135840 }, { "epoch": 5.63, "grad_norm": 0.5390625, "learning_rate": 0.00045788886560836574, "loss": 0.2213, "step": 135850 }, { "epoch": 5.63, "grad_norm": 0.26953125, "learning_rate": 0.00045788284153900916, "loss": 0.2283, "step": 135860 }, { "epoch": 5.63, "grad_norm": 0.6640625, "learning_rate": 0.0004578768170784396, "loss": 0.1755, "step": 135870 }, { "epoch": 5.63, "grad_norm": 0.9140625, "learning_rate": 0.00045787079222666825, "loss": 0.2508, "step": 135880 }, { "epoch": 5.63, "grad_norm": 0.302734375, "learning_rate": 0.0004578647669837066, "loss": 0.166, "step": 135890 }, { "epoch": 5.63, "grad_norm": 0.76953125, "learning_rate": 0.0004578587413495658, "loss": 0.2535, "step": 135900 }, { "epoch": 5.63, "grad_norm": 0.90625, "learning_rate": 0.0004578527153242573, "loss": 0.2396, "step": 135910 }, { "epoch": 5.63, "grad_norm": 0.53125, "learning_rate": 0.0004578466889077925, "loss": 0.2277, "step": 135920 }, { "epoch": 5.63, "grad_norm": 0.43359375, "learning_rate": 0.0004578406621001827, "loss": 0.2405, "step": 135930 }, { "epoch": 5.63, "grad_norm": 1.0703125, "learning_rate": 0.00045783463490143916, "loss": 0.2085, "step": 135940 }, { "epoch": 5.63, "grad_norm": 0.50390625, "learning_rate": 0.0004578286073115733, "loss": 0.1511, "step": 135950 }, { "epoch": 5.63, "grad_norm": 0.57421875, "learning_rate": 0.00045782257933059644, "loss": 0.2217, "step": 135960 }, { "epoch": 5.63, "grad_norm": 1.5703125, "learning_rate": 0.0004578165509585199, "loss": 0.2137, "step": 135970 }, { "epoch": 5.63, "grad_norm": 0.7109375, "learning_rate": 0.0004578105221953551, "loss": 0.2115, "step": 135980 }, { "epoch": 5.63, "grad_norm": 0.482421875, "learning_rate": 0.00045780449304111333, "loss": 0.1816, "step": 135990 }, { "epoch": 5.63, "grad_norm": 1.859375, "learning_rate": 0.00045779846349580595, "loss": 0.2077, "step": 136000 }, { "epoch": 5.63, "grad_norm": 0.64453125, "learning_rate": 0.00045779243355944426, "loss": 0.2612, "step": 136010 }, { "epoch": 5.63, "grad_norm": 2.59375, "learning_rate": 0.0004577864032320397, "loss": 0.1897, "step": 136020 }, { "epoch": 5.63, "grad_norm": 0.57421875, "learning_rate": 0.0004577803725136035, "loss": 0.2819, "step": 136030 }, { "epoch": 5.63, "grad_norm": 0.8515625, "learning_rate": 0.0004577743414041472, "loss": 0.2281, "step": 136040 }, { "epoch": 5.64, "grad_norm": 0.380859375, "learning_rate": 0.0004577683099036819, "loss": 0.3223, "step": 136050 }, { "epoch": 5.64, "grad_norm": 0.376953125, "learning_rate": 0.00045776227801221916, "loss": 0.2437, "step": 136060 }, { "epoch": 5.64, "grad_norm": 0.875, "learning_rate": 0.00045775624572977016, "loss": 0.2218, "step": 136070 }, { "epoch": 5.64, "grad_norm": 0.88671875, "learning_rate": 0.0004577502130563465, "loss": 0.2028, "step": 136080 }, { "epoch": 5.64, "grad_norm": 0.6328125, "learning_rate": 0.0004577441799919592, "loss": 0.1601, "step": 136090 }, { "epoch": 5.64, "grad_norm": 0.26953125, "learning_rate": 0.0004577381465366199, "loss": 0.1983, "step": 136100 }, { "epoch": 5.64, "grad_norm": 0.84765625, "learning_rate": 0.0004577321126903398, "loss": 0.2272, "step": 136110 }, { "epoch": 5.64, "grad_norm": 0.71875, "learning_rate": 0.0004577260784531303, "loss": 0.1862, "step": 136120 }, { "epoch": 5.64, "grad_norm": 0.78515625, "learning_rate": 0.00045772004382500276, "loss": 0.2489, "step": 136130 }, { "epoch": 5.64, "grad_norm": 0.53125, "learning_rate": 0.00045771400880596845, "loss": 0.2238, "step": 136140 }, { "epoch": 5.64, "grad_norm": 0.80859375, "learning_rate": 0.0004577079733960389, "loss": 0.2052, "step": 136150 }, { "epoch": 5.64, "grad_norm": 1.8359375, "learning_rate": 0.0004577019375952254, "loss": 0.2642, "step": 136160 }, { "epoch": 5.64, "grad_norm": 0.67578125, "learning_rate": 0.0004576959014035392, "loss": 0.2182, "step": 136170 }, { "epoch": 5.64, "grad_norm": 0.5625, "learning_rate": 0.0004576898648209918, "loss": 0.2058, "step": 136180 }, { "epoch": 5.64, "grad_norm": 1.1015625, "learning_rate": 0.0004576838278475944, "loss": 0.1866, "step": 136190 }, { "epoch": 5.64, "grad_norm": 1.203125, "learning_rate": 0.0004576777904833586, "loss": 0.2168, "step": 136200 }, { "epoch": 5.64, "grad_norm": 1.0859375, "learning_rate": 0.0004576717527282956, "loss": 0.2033, "step": 136210 }, { "epoch": 5.64, "grad_norm": 0.55859375, "learning_rate": 0.0004576657145824168, "loss": 0.1974, "step": 136220 }, { "epoch": 5.64, "grad_norm": 0.703125, "learning_rate": 0.00045765967604573346, "loss": 0.2198, "step": 136230 }, { "epoch": 5.64, "grad_norm": 0.57421875, "learning_rate": 0.0004576536371182571, "loss": 0.1752, "step": 136240 }, { "epoch": 5.64, "grad_norm": 1.3515625, "learning_rate": 0.000457647597799999, "loss": 0.2386, "step": 136250 }, { "epoch": 5.64, "grad_norm": 1.015625, "learning_rate": 0.0004576415580909705, "loss": 0.2168, "step": 136260 }, { "epoch": 5.64, "grad_norm": 1.4296875, "learning_rate": 0.00045763551799118306, "loss": 0.2106, "step": 136270 }, { "epoch": 5.64, "grad_norm": 0.72265625, "learning_rate": 0.0004576294775006481, "loss": 0.2603, "step": 136280 }, { "epoch": 5.65, "grad_norm": 0.7109375, "learning_rate": 0.0004576234366193767, "loss": 0.1929, "step": 136290 }, { "epoch": 5.65, "grad_norm": 0.78125, "learning_rate": 0.00045761739534738054, "loss": 0.1986, "step": 136300 }, { "epoch": 5.65, "grad_norm": 0.8515625, "learning_rate": 0.0004576113536846708, "loss": 0.2183, "step": 136310 }, { "epoch": 5.65, "grad_norm": 0.61328125, "learning_rate": 0.00045760531163125897, "loss": 0.2039, "step": 136320 }, { "epoch": 5.65, "grad_norm": 0.384765625, "learning_rate": 0.00045759926918715634, "loss": 0.1635, "step": 136330 }, { "epoch": 5.65, "grad_norm": 0.765625, "learning_rate": 0.00045759322635237424, "loss": 0.2197, "step": 136340 }, { "epoch": 5.65, "grad_norm": 0.427734375, "learning_rate": 0.0004575871831269242, "loss": 0.2203, "step": 136350 }, { "epoch": 5.65, "grad_norm": 0.302734375, "learning_rate": 0.0004575811395108175, "loss": 0.1984, "step": 136360 }, { "epoch": 5.65, "grad_norm": 0.80078125, "learning_rate": 0.00045757509550406537, "loss": 0.198, "step": 136370 }, { "epoch": 5.65, "grad_norm": 0.38671875, "learning_rate": 0.00045756905110667944, "loss": 0.198, "step": 136380 }, { "epoch": 5.65, "grad_norm": 0.5546875, "learning_rate": 0.000457563006318671, "loss": 0.1936, "step": 136390 }, { "epoch": 5.65, "grad_norm": 0.61328125, "learning_rate": 0.0004575569611400513, "loss": 0.2173, "step": 136400 }, { "epoch": 5.65, "grad_norm": 0.41015625, "learning_rate": 0.0004575509155708319, "loss": 0.1802, "step": 136410 }, { "epoch": 5.65, "grad_norm": 0.92578125, "learning_rate": 0.00045754486961102403, "loss": 0.1857, "step": 136420 }, { "epoch": 5.65, "grad_norm": 1.4453125, "learning_rate": 0.0004575388232606392, "loss": 0.2073, "step": 136430 }, { "epoch": 5.65, "grad_norm": 0.4609375, "learning_rate": 0.0004575327765196887, "loss": 0.2216, "step": 136440 }, { "epoch": 5.65, "grad_norm": 1.78125, "learning_rate": 0.00045752672938818384, "loss": 0.181, "step": 136450 }, { "epoch": 5.65, "grad_norm": 0.44921875, "learning_rate": 0.00045752068186613614, "loss": 0.1567, "step": 136460 }, { "epoch": 5.65, "grad_norm": 1.7265625, "learning_rate": 0.000457514633953557, "loss": 0.1944, "step": 136470 }, { "epoch": 5.65, "grad_norm": 1.171875, "learning_rate": 0.0004575085856504576, "loss": 0.1506, "step": 136480 }, { "epoch": 5.65, "grad_norm": 0.77734375, "learning_rate": 0.0004575025369568495, "loss": 0.2401, "step": 136490 }, { "epoch": 5.65, "grad_norm": 0.76953125, "learning_rate": 0.00045749648787274405, "loss": 0.191, "step": 136500 }, { "epoch": 5.65, "grad_norm": 0.76953125, "learning_rate": 0.00045749043839815265, "loss": 0.2059, "step": 136510 }, { "epoch": 5.65, "grad_norm": 1.3046875, "learning_rate": 0.0004574843885330866, "loss": 0.2129, "step": 136520 }, { "epoch": 5.66, "grad_norm": 0.373046875, "learning_rate": 0.0004574783382775574, "loss": 0.1789, "step": 136530 }, { "epoch": 5.66, "grad_norm": 0.32421875, "learning_rate": 0.0004574722876315762, "loss": 0.1509, "step": 136540 }, { "epoch": 5.66, "grad_norm": 0.92578125, "learning_rate": 0.0004574662365951547, "loss": 0.2508, "step": 136550 }, { "epoch": 5.66, "grad_norm": 1.0859375, "learning_rate": 0.00045746018516830417, "loss": 0.2044, "step": 136560 }, { "epoch": 5.66, "grad_norm": 0.74609375, "learning_rate": 0.00045745413335103593, "loss": 0.2268, "step": 136570 }, { "epoch": 5.66, "grad_norm": 0.92578125, "learning_rate": 0.00045744808114336145, "loss": 0.2018, "step": 136580 }, { "epoch": 5.66, "grad_norm": 1.0625, "learning_rate": 0.00045744202854529207, "loss": 0.2152, "step": 136590 }, { "epoch": 5.66, "grad_norm": 0.3984375, "learning_rate": 0.00045743597555683916, "loss": 0.2279, "step": 136600 }, { "epoch": 5.66, "grad_norm": 0.71484375, "learning_rate": 0.00045742992217801415, "loss": 0.2234, "step": 136610 }, { "epoch": 5.66, "grad_norm": 0.302734375, "learning_rate": 0.00045742386840882845, "loss": 0.2043, "step": 136620 }, { "epoch": 5.66, "grad_norm": 0.306640625, "learning_rate": 0.0004574178142492934, "loss": 0.2249, "step": 136630 }, { "epoch": 5.66, "grad_norm": 2.875, "learning_rate": 0.00045741175969942047, "loss": 0.2119, "step": 136640 }, { "epoch": 5.66, "grad_norm": 0.87109375, "learning_rate": 0.00045740570475922095, "loss": 0.2322, "step": 136650 }, { "epoch": 5.66, "grad_norm": 0.294921875, "learning_rate": 0.00045739964942870633, "loss": 0.164, "step": 136660 }, { "epoch": 5.66, "grad_norm": 0.40625, "learning_rate": 0.000457393593707888, "loss": 0.1875, "step": 136670 }, { "epoch": 5.66, "grad_norm": 1.1171875, "learning_rate": 0.0004573875375967773, "loss": 0.2006, "step": 136680 }, { "epoch": 5.66, "grad_norm": 0.578125, "learning_rate": 0.00045738148109538557, "loss": 0.2064, "step": 136690 }, { "epoch": 5.66, "grad_norm": 0.4609375, "learning_rate": 0.00045737542420372433, "loss": 0.1778, "step": 136700 }, { "epoch": 5.66, "grad_norm": 0.56640625, "learning_rate": 0.00045736936692180496, "loss": 0.1917, "step": 136710 }, { "epoch": 5.66, "grad_norm": 0.5, "learning_rate": 0.0004573633092496389, "loss": 0.1509, "step": 136720 }, { "epoch": 5.66, "grad_norm": 1.0625, "learning_rate": 0.0004573572511872374, "loss": 0.1999, "step": 136730 }, { "epoch": 5.66, "grad_norm": 0.91015625, "learning_rate": 0.0004573511927346119, "loss": 0.1698, "step": 136740 }, { "epoch": 5.66, "grad_norm": 0.41015625, "learning_rate": 0.0004573451338917739, "loss": 0.1694, "step": 136750 }, { "epoch": 5.66, "grad_norm": 1.015625, "learning_rate": 0.00045733907465873484, "loss": 0.199, "step": 136760 }, { "epoch": 5.66, "grad_norm": 0.71875, "learning_rate": 0.00045733301503550597, "loss": 0.2295, "step": 136770 }, { "epoch": 5.67, "grad_norm": 0.7734375, "learning_rate": 0.00045732695502209865, "loss": 0.2381, "step": 136780 }, { "epoch": 5.67, "grad_norm": 0.76171875, "learning_rate": 0.0004573208946185245, "loss": 0.1952, "step": 136790 }, { "epoch": 5.67, "grad_norm": 1.875, "learning_rate": 0.00045731483382479475, "loss": 0.2091, "step": 136800 }, { "epoch": 5.67, "grad_norm": 0.6171875, "learning_rate": 0.000457308772640921, "loss": 0.1687, "step": 136810 }, { "epoch": 5.67, "grad_norm": 1.0, "learning_rate": 0.0004573027110669144, "loss": 0.1997, "step": 136820 }, { "epoch": 5.67, "grad_norm": 0.76953125, "learning_rate": 0.00045729664910278654, "loss": 0.1998, "step": 136830 }, { "epoch": 5.67, "grad_norm": 0.69921875, "learning_rate": 0.00045729058674854874, "loss": 0.1914, "step": 136840 }, { "epoch": 5.67, "grad_norm": 0.3984375, "learning_rate": 0.0004572845240042125, "loss": 0.1628, "step": 136850 }, { "epoch": 5.67, "grad_norm": 0.5390625, "learning_rate": 0.0004572784608697891, "loss": 0.1863, "step": 136860 }, { "epoch": 5.67, "grad_norm": 0.76953125, "learning_rate": 0.00045727239734529, "loss": 0.2581, "step": 136870 }, { "epoch": 5.67, "grad_norm": 1.234375, "learning_rate": 0.0004572663334307267, "loss": 0.2421, "step": 136880 }, { "epoch": 5.67, "grad_norm": 0.703125, "learning_rate": 0.0004572602691261105, "loss": 0.1945, "step": 136890 }, { "epoch": 5.67, "grad_norm": 0.79296875, "learning_rate": 0.0004572542044314529, "loss": 0.1838, "step": 136900 }, { "epoch": 5.67, "grad_norm": 0.7890625, "learning_rate": 0.00045724813934676524, "loss": 0.2294, "step": 136910 }, { "epoch": 5.67, "grad_norm": 0.671875, "learning_rate": 0.000457242073872059, "loss": 0.1664, "step": 136920 }, { "epoch": 5.67, "grad_norm": 0.458984375, "learning_rate": 0.00045723600800734557, "loss": 0.2165, "step": 136930 }, { "epoch": 5.67, "grad_norm": 0.275390625, "learning_rate": 0.0004572299417526363, "loss": 0.1899, "step": 136940 }, { "epoch": 5.67, "grad_norm": 0.44140625, "learning_rate": 0.00045722387510794264, "loss": 0.1693, "step": 136950 }, { "epoch": 5.67, "grad_norm": 0.48046875, "learning_rate": 0.00045721780807327604, "loss": 0.2051, "step": 136960 }, { "epoch": 5.67, "grad_norm": 0.31640625, "learning_rate": 0.0004572117406486479, "loss": 0.2419, "step": 136970 }, { "epoch": 5.67, "grad_norm": 0.515625, "learning_rate": 0.0004572056728340697, "loss": 0.2747, "step": 136980 }, { "epoch": 5.67, "grad_norm": 1.0234375, "learning_rate": 0.00045719960462955274, "loss": 0.2486, "step": 136990 }, { "epoch": 5.67, "grad_norm": 0.75, "learning_rate": 0.0004571935360351085, "loss": 0.2061, "step": 137000 }, { "epoch": 5.67, "grad_norm": 0.68359375, "learning_rate": 0.0004571874670507484, "loss": 0.2348, "step": 137010 }, { "epoch": 5.68, "grad_norm": 1.15625, "learning_rate": 0.00045718139767648395, "loss": 0.2183, "step": 137020 }, { "epoch": 5.68, "grad_norm": 0.494140625, "learning_rate": 0.0004571753279123264, "loss": 0.1893, "step": 137030 }, { "epoch": 5.68, "grad_norm": 0.55859375, "learning_rate": 0.00045716925775828726, "loss": 0.2423, "step": 137040 }, { "epoch": 5.68, "grad_norm": 0.439453125, "learning_rate": 0.000457163187214378, "loss": 0.1909, "step": 137050 }, { "epoch": 5.68, "grad_norm": 0.71484375, "learning_rate": 0.0004571571162806099, "loss": 0.2228, "step": 137060 }, { "epoch": 5.68, "grad_norm": 0.6484375, "learning_rate": 0.00045715104495699455, "loss": 0.2204, "step": 137070 }, { "epoch": 5.68, "grad_norm": 0.7890625, "learning_rate": 0.0004571449732435433, "loss": 0.2472, "step": 137080 }, { "epoch": 5.68, "grad_norm": 0.8125, "learning_rate": 0.00045713890114026755, "loss": 0.2591, "step": 137090 }, { "epoch": 5.68, "grad_norm": 0.5625, "learning_rate": 0.0004571328286471788, "loss": 0.2361, "step": 137100 }, { "epoch": 5.68, "grad_norm": 0.5078125, "learning_rate": 0.00045712675576428836, "loss": 0.1578, "step": 137110 }, { "epoch": 5.68, "grad_norm": 1.015625, "learning_rate": 0.00045712068249160776, "loss": 0.1411, "step": 137120 }, { "epoch": 5.68, "grad_norm": 0.427734375, "learning_rate": 0.00045711460882914845, "loss": 0.258, "step": 137130 }, { "epoch": 5.68, "grad_norm": 0.64453125, "learning_rate": 0.0004571085347769218, "loss": 0.2015, "step": 137140 }, { "epoch": 5.68, "grad_norm": 0.70703125, "learning_rate": 0.0004571024603349392, "loss": 0.2462, "step": 137150 }, { "epoch": 5.68, "grad_norm": 0.71484375, "learning_rate": 0.0004570963855032122, "loss": 0.2025, "step": 137160 }, { "epoch": 5.68, "grad_norm": 0.7421875, "learning_rate": 0.0004570903102817521, "loss": 0.2364, "step": 137170 }, { "epoch": 5.68, "grad_norm": 0.62109375, "learning_rate": 0.0004570842346705705, "loss": 0.2156, "step": 137180 }, { "epoch": 5.68, "grad_norm": 0.6171875, "learning_rate": 0.0004570781586696786, "loss": 0.2292, "step": 137190 }, { "epoch": 5.68, "grad_norm": 0.6796875, "learning_rate": 0.0004570720822790881, "loss": 0.2176, "step": 137200 }, { "epoch": 5.68, "grad_norm": 0.98828125, "learning_rate": 0.00045706600549881023, "loss": 0.1935, "step": 137210 }, { "epoch": 5.68, "grad_norm": 0.5078125, "learning_rate": 0.0004570599283288565, "loss": 0.2117, "step": 137220 }, { "epoch": 5.68, "grad_norm": 0.9609375, "learning_rate": 0.00045705385076923833, "loss": 0.1682, "step": 137230 }, { "epoch": 5.68, "grad_norm": 0.470703125, "learning_rate": 0.00045704777281996723, "loss": 0.2107, "step": 137240 }, { "epoch": 5.68, "grad_norm": 0.79296875, "learning_rate": 0.0004570416944810546, "loss": 0.1832, "step": 137250 }, { "epoch": 5.69, "grad_norm": 1.078125, "learning_rate": 0.00045703561575251175, "loss": 0.2362, "step": 137260 }, { "epoch": 5.69, "grad_norm": 2.40625, "learning_rate": 0.0004570295366343503, "loss": 0.1669, "step": 137270 }, { "epoch": 5.69, "grad_norm": 9.3125, "learning_rate": 0.0004570234571265817, "loss": 0.208, "step": 137280 }, { "epoch": 5.69, "grad_norm": 0.494140625, "learning_rate": 0.00045701737722921715, "loss": 0.2585, "step": 137290 }, { "epoch": 5.69, "grad_norm": 0.4296875, "learning_rate": 0.0004570112969422683, "loss": 0.2051, "step": 137300 }, { "epoch": 5.69, "grad_norm": 0.78515625, "learning_rate": 0.0004570052162657466, "loss": 0.1904, "step": 137310 }, { "epoch": 5.69, "grad_norm": 0.82421875, "learning_rate": 0.0004569991351996634, "loss": 0.2311, "step": 137320 }, { "epoch": 5.69, "grad_norm": 0.46875, "learning_rate": 0.00045699305374403016, "loss": 0.1975, "step": 137330 }, { "epoch": 5.69, "grad_norm": 1.1640625, "learning_rate": 0.0004569869718988584, "loss": 0.2457, "step": 137340 }, { "epoch": 5.69, "grad_norm": 0.69140625, "learning_rate": 0.00045698088966415946, "loss": 0.2283, "step": 137350 }, { "epoch": 5.69, "grad_norm": 0.859375, "learning_rate": 0.0004569748070399449, "loss": 0.2201, "step": 137360 }, { "epoch": 5.69, "grad_norm": 1.5703125, "learning_rate": 0.000456968724026226, "loss": 0.2475, "step": 137370 }, { "epoch": 5.69, "grad_norm": 0.62109375, "learning_rate": 0.00045696264062301443, "loss": 0.1951, "step": 137380 }, { "epoch": 5.69, "grad_norm": 0.287109375, "learning_rate": 0.00045695655683032144, "loss": 0.2205, "step": 137390 }, { "epoch": 5.69, "grad_norm": 0.52734375, "learning_rate": 0.0004569504726481586, "loss": 0.2293, "step": 137400 }, { "epoch": 5.69, "grad_norm": 1.0546875, "learning_rate": 0.00045694438807653725, "loss": 0.2403, "step": 137410 }, { "epoch": 5.69, "grad_norm": 0.515625, "learning_rate": 0.000456938303115469, "loss": 0.2139, "step": 137420 }, { "epoch": 5.69, "grad_norm": 0.73046875, "learning_rate": 0.0004569322177649652, "loss": 0.2258, "step": 137430 }, { "epoch": 5.69, "grad_norm": 1.046875, "learning_rate": 0.0004569261320250373, "loss": 0.1772, "step": 137440 }, { "epoch": 5.69, "grad_norm": 0.99609375, "learning_rate": 0.00045692004589569673, "loss": 0.1631, "step": 137450 }, { "epoch": 5.69, "grad_norm": 0.458984375, "learning_rate": 0.00045691395937695503, "loss": 0.2366, "step": 137460 }, { "epoch": 5.69, "grad_norm": 0.703125, "learning_rate": 0.0004569078724688236, "loss": 0.2532, "step": 137470 }, { "epoch": 5.69, "grad_norm": 0.96484375, "learning_rate": 0.0004569017851713139, "loss": 0.165, "step": 137480 }, { "epoch": 5.69, "grad_norm": 0.7109375, "learning_rate": 0.00045689569748443725, "loss": 0.2441, "step": 137490 }, { "epoch": 5.7, "grad_norm": 0.44140625, "learning_rate": 0.0004568896094082054, "loss": 0.2123, "step": 137500 }, { "epoch": 5.7, "grad_norm": 0.55859375, "learning_rate": 0.00045688352094262963, "loss": 0.1848, "step": 137510 }, { "epoch": 5.7, "grad_norm": 0.73046875, "learning_rate": 0.00045687743208772135, "loss": 0.2432, "step": 137520 }, { "epoch": 5.7, "grad_norm": 0.76953125, "learning_rate": 0.00045687134284349206, "loss": 0.1769, "step": 137530 }, { "epoch": 5.7, "grad_norm": 0.5390625, "learning_rate": 0.0004568652532099533, "loss": 0.1864, "step": 137540 }, { "epoch": 5.7, "grad_norm": 0.8828125, "learning_rate": 0.0004568591631871165, "loss": 0.2669, "step": 137550 }, { "epoch": 5.7, "grad_norm": 0.291015625, "learning_rate": 0.00045685307277499313, "loss": 0.2054, "step": 137560 }, { "epoch": 5.7, "grad_norm": 0.439453125, "learning_rate": 0.0004568469819735945, "loss": 0.1912, "step": 137570 }, { "epoch": 5.7, "grad_norm": 1.046875, "learning_rate": 0.00045684089078293225, "loss": 0.2089, "step": 137580 }, { "epoch": 5.7, "grad_norm": 0.69921875, "learning_rate": 0.0004568347992030177, "loss": 0.164, "step": 137590 }, { "epoch": 5.7, "grad_norm": 0.87109375, "learning_rate": 0.0004568287072338625, "loss": 0.1928, "step": 137600 }, { "epoch": 5.7, "grad_norm": 0.5703125, "learning_rate": 0.0004568226148754781, "loss": 0.2206, "step": 137610 }, { "epoch": 5.7, "grad_norm": 0.796875, "learning_rate": 0.00045681652212787563, "loss": 0.1853, "step": 137620 }, { "epoch": 5.7, "grad_norm": 0.6015625, "learning_rate": 0.00045681042899106694, "loss": 0.2116, "step": 137630 }, { "epoch": 5.7, "grad_norm": 1.390625, "learning_rate": 0.0004568043354650633, "loss": 0.2142, "step": 137640 }, { "epoch": 5.7, "grad_norm": 0.8125, "learning_rate": 0.00045679824154987625, "loss": 0.2037, "step": 137650 }, { "epoch": 5.7, "grad_norm": 0.69921875, "learning_rate": 0.00045679214724551735, "loss": 0.2376, "step": 137660 }, { "epoch": 5.7, "grad_norm": 1.015625, "learning_rate": 0.0004567860525519978, "loss": 0.2403, "step": 137670 }, { "epoch": 5.7, "grad_norm": 0.357421875, "learning_rate": 0.00045677995746932934, "loss": 0.2132, "step": 137680 }, { "epoch": 5.7, "grad_norm": 0.56640625, "learning_rate": 0.0004567738619975232, "loss": 0.189, "step": 137690 }, { "epoch": 5.7, "grad_norm": 0.640625, "learning_rate": 0.0004567677661365911, "loss": 0.2103, "step": 137700 }, { "epoch": 5.7, "grad_norm": 0.431640625, "learning_rate": 0.0004567616698865444, "loss": 0.2444, "step": 137710 }, { "epoch": 5.7, "grad_norm": 0.94921875, "learning_rate": 0.00045675557324739447, "loss": 0.1574, "step": 137720 }, { "epoch": 5.7, "grad_norm": 0.419921875, "learning_rate": 0.000456749476219153, "loss": 0.2003, "step": 137730 }, { "epoch": 5.71, "grad_norm": 0.796875, "learning_rate": 0.0004567433788018313, "loss": 0.218, "step": 137740 }, { "epoch": 5.71, "grad_norm": 0.80078125, "learning_rate": 0.0004567372809954408, "loss": 0.2321, "step": 137750 }, { "epoch": 5.71, "grad_norm": 0.400390625, "learning_rate": 0.0004567311827999932, "loss": 0.2218, "step": 137760 }, { "epoch": 5.71, "grad_norm": 0.64453125, "learning_rate": 0.00045672508421549976, "loss": 0.2166, "step": 137770 }, { "epoch": 5.71, "grad_norm": 1.375, "learning_rate": 0.000456718985241972, "loss": 0.2107, "step": 137780 }, { "epoch": 5.71, "grad_norm": 0.431640625, "learning_rate": 0.0004567128858794215, "loss": 0.2332, "step": 137790 }, { "epoch": 5.71, "grad_norm": 1.2421875, "learning_rate": 0.00045670678612785967, "loss": 0.2052, "step": 137800 }, { "epoch": 5.71, "grad_norm": 0.890625, "learning_rate": 0.00045670068598729793, "loss": 0.1495, "step": 137810 }, { "epoch": 5.71, "grad_norm": 0.8828125, "learning_rate": 0.0004566945854577479, "loss": 0.2371, "step": 137820 }, { "epoch": 5.71, "grad_norm": 0.63671875, "learning_rate": 0.0004566884845392209, "loss": 0.1865, "step": 137830 }, { "epoch": 5.71, "grad_norm": 0.41796875, "learning_rate": 0.0004566823832317285, "loss": 0.1739, "step": 137840 }, { "epoch": 5.71, "grad_norm": 1.078125, "learning_rate": 0.0004566762815352822, "loss": 0.2081, "step": 137850 }, { "epoch": 5.71, "grad_norm": 0.70703125, "learning_rate": 0.00045667017944989353, "loss": 0.22, "step": 137860 }, { "epoch": 5.71, "grad_norm": 0.6640625, "learning_rate": 0.00045666407697557383, "loss": 0.2302, "step": 137870 }, { "epoch": 5.71, "grad_norm": 1.2578125, "learning_rate": 0.00045665797411233465, "loss": 0.2196, "step": 137880 }, { "epoch": 5.71, "grad_norm": 0.298828125, "learning_rate": 0.0004566518708601875, "loss": 0.2396, "step": 137890 }, { "epoch": 5.71, "grad_norm": 0.83984375, "learning_rate": 0.00045664576721914384, "loss": 0.2073, "step": 137900 }, { "epoch": 5.71, "grad_norm": 0.9609375, "learning_rate": 0.00045663966318921514, "loss": 0.2203, "step": 137910 }, { "epoch": 5.71, "grad_norm": 0.55859375, "learning_rate": 0.0004566335587704129, "loss": 0.1693, "step": 137920 }, { "epoch": 5.71, "grad_norm": 0.921875, "learning_rate": 0.00045662745396274866, "loss": 0.1446, "step": 137930 }, { "epoch": 5.71, "grad_norm": 0.82421875, "learning_rate": 0.0004566213487662338, "loss": 0.2162, "step": 137940 }, { "epoch": 5.71, "grad_norm": 0.296875, "learning_rate": 0.00045661524318088, "loss": 0.2057, "step": 137950 }, { "epoch": 5.71, "grad_norm": 0.99609375, "learning_rate": 0.00045660913720669855, "loss": 0.164, "step": 137960 }, { "epoch": 5.71, "grad_norm": 0.421875, "learning_rate": 0.00045660303084370105, "loss": 0.2083, "step": 137970 }, { "epoch": 5.72, "grad_norm": 0.5703125, "learning_rate": 0.0004565969240918989, "loss": 0.2201, "step": 137980 }, { "epoch": 5.72, "grad_norm": 0.73828125, "learning_rate": 0.0004565908169513037, "loss": 0.2045, "step": 137990 }, { "epoch": 5.72, "grad_norm": 1.0078125, "learning_rate": 0.00045658470942192686, "loss": 0.2035, "step": 138000 }, { "epoch": 5.72, "grad_norm": 1.125, "learning_rate": 0.0004565786015037799, "loss": 0.1896, "step": 138010 }, { "epoch": 5.72, "grad_norm": 0.482421875, "learning_rate": 0.0004565724931968743, "loss": 0.2144, "step": 138020 }, { "epoch": 5.72, "grad_norm": 0.39453125, "learning_rate": 0.0004565663845012216, "loss": 0.2309, "step": 138030 }, { "epoch": 5.72, "grad_norm": 2.15625, "learning_rate": 0.00045656027541683333, "loss": 0.24, "step": 138040 }, { "epoch": 5.72, "grad_norm": 0.73828125, "learning_rate": 0.00045655416594372086, "loss": 0.2309, "step": 138050 }, { "epoch": 5.72, "grad_norm": 0.65234375, "learning_rate": 0.0004565480560818958, "loss": 0.2678, "step": 138060 }, { "epoch": 5.72, "grad_norm": 0.80078125, "learning_rate": 0.00045654194583136953, "loss": 0.2286, "step": 138070 }, { "epoch": 5.72, "grad_norm": 0.8359375, "learning_rate": 0.0004565358351921537, "loss": 0.2393, "step": 138080 }, { "epoch": 5.72, "grad_norm": 0.8984375, "learning_rate": 0.00045652972416425965, "loss": 0.2033, "step": 138090 }, { "epoch": 5.72, "grad_norm": 0.84765625, "learning_rate": 0.00045652361274769906, "loss": 0.2137, "step": 138100 }, { "epoch": 5.72, "grad_norm": 2.515625, "learning_rate": 0.0004565175009424832, "loss": 0.2374, "step": 138110 }, { "epoch": 5.72, "grad_norm": 0.326171875, "learning_rate": 0.00045651138874862383, "loss": 0.1714, "step": 138120 }, { "epoch": 5.72, "grad_norm": 1.0, "learning_rate": 0.00045650527616613234, "loss": 0.2274, "step": 138130 }, { "epoch": 5.72, "grad_norm": 1.046875, "learning_rate": 0.0004564991631950202, "loss": 0.1855, "step": 138140 }, { "epoch": 5.72, "grad_norm": 0.71484375, "learning_rate": 0.00045649304983529885, "loss": 0.2385, "step": 138150 }, { "epoch": 5.72, "grad_norm": 1.53125, "learning_rate": 0.00045648693608697994, "loss": 0.2153, "step": 138160 }, { "epoch": 5.72, "grad_norm": 1.0390625, "learning_rate": 0.0004564808219500749, "loss": 0.2133, "step": 138170 }, { "epoch": 5.72, "grad_norm": 0.78125, "learning_rate": 0.0004564747074245953, "loss": 0.2114, "step": 138180 }, { "epoch": 5.72, "grad_norm": 1.2890625, "learning_rate": 0.0004564685925105525, "loss": 0.2152, "step": 138190 }, { "epoch": 5.72, "grad_norm": 0.44921875, "learning_rate": 0.0004564624772079582, "loss": 0.1738, "step": 138200 }, { "epoch": 5.72, "grad_norm": 0.5859375, "learning_rate": 0.0004564563615168238, "loss": 0.1945, "step": 138210 }, { "epoch": 5.73, "grad_norm": 0.1923828125, "learning_rate": 0.00045645024543716073, "loss": 0.143, "step": 138220 }, { "epoch": 5.73, "grad_norm": 1.078125, "learning_rate": 0.0004564441289689807, "loss": 0.201, "step": 138230 }, { "epoch": 5.73, "grad_norm": 0.828125, "learning_rate": 0.0004564380121122951, "loss": 0.222, "step": 138240 }, { "epoch": 5.73, "grad_norm": 1.078125, "learning_rate": 0.00045643189486711545, "loss": 0.1744, "step": 138250 }, { "epoch": 5.73, "grad_norm": 0.498046875, "learning_rate": 0.00045642577723345326, "loss": 0.1908, "step": 138260 }, { "epoch": 5.73, "grad_norm": 0.4375, "learning_rate": 0.00045641965921132, "loss": 0.2021, "step": 138270 }, { "epoch": 5.73, "grad_norm": 0.64453125, "learning_rate": 0.0004564135408007273, "loss": 0.2396, "step": 138280 }, { "epoch": 5.73, "grad_norm": 2.296875, "learning_rate": 0.00045640742200168656, "loss": 0.218, "step": 138290 }, { "epoch": 5.73, "grad_norm": 0.146484375, "learning_rate": 0.0004564013028142093, "loss": 0.168, "step": 138300 }, { "epoch": 5.73, "grad_norm": 0.2451171875, "learning_rate": 0.0004563951832383072, "loss": 0.208, "step": 138310 }, { "epoch": 5.73, "grad_norm": 0.333984375, "learning_rate": 0.00045638906327399154, "loss": 0.1957, "step": 138320 }, { "epoch": 5.73, "grad_norm": 0.60546875, "learning_rate": 0.000456382942921274, "loss": 0.1876, "step": 138330 }, { "epoch": 5.73, "grad_norm": 0.76171875, "learning_rate": 0.00045637682218016605, "loss": 0.2091, "step": 138340 }, { "epoch": 5.73, "grad_norm": 0.72265625, "learning_rate": 0.00045637070105067926, "loss": 0.2178, "step": 138350 }, { "epoch": 5.73, "grad_norm": 0.58984375, "learning_rate": 0.00045636457953282506, "loss": 0.1745, "step": 138360 }, { "epoch": 5.73, "grad_norm": 0.8125, "learning_rate": 0.000456358457626615, "loss": 0.1934, "step": 138370 }, { "epoch": 5.73, "grad_norm": 0.80859375, "learning_rate": 0.00045635233533206055, "loss": 0.192, "step": 138380 }, { "epoch": 5.73, "grad_norm": 0.087890625, "learning_rate": 0.0004563462126491734, "loss": 0.1914, "step": 138390 }, { "epoch": 5.73, "grad_norm": 1.2265625, "learning_rate": 0.00045634008957796485, "loss": 0.2222, "step": 138400 }, { "epoch": 5.73, "grad_norm": 0.82421875, "learning_rate": 0.0004563339661184466, "loss": 0.219, "step": 138410 }, { "epoch": 5.73, "grad_norm": 1.21875, "learning_rate": 0.00045632784227063006, "loss": 0.2123, "step": 138420 }, { "epoch": 5.73, "grad_norm": 1.109375, "learning_rate": 0.00045632171803452684, "loss": 0.2114, "step": 138430 }, { "epoch": 5.73, "grad_norm": 0.39453125, "learning_rate": 0.0004563155934101484, "loss": 0.2085, "step": 138440 }, { "epoch": 5.73, "grad_norm": 1.0234375, "learning_rate": 0.0004563094683975063, "loss": 0.2227, "step": 138450 }, { "epoch": 5.73, "grad_norm": 0.41015625, "learning_rate": 0.00045630334299661215, "loss": 0.2013, "step": 138460 }, { "epoch": 5.74, "grad_norm": 0.64453125, "learning_rate": 0.0004562972172074773, "loss": 0.191, "step": 138470 }, { "epoch": 5.74, "grad_norm": 0.546875, "learning_rate": 0.00045629109103011335, "loss": 0.2925, "step": 138480 }, { "epoch": 5.74, "grad_norm": 0.455078125, "learning_rate": 0.0004562849644645319, "loss": 0.2209, "step": 138490 }, { "epoch": 5.74, "grad_norm": 1.2421875, "learning_rate": 0.0004562788375107444, "loss": 0.1754, "step": 138500 }, { "epoch": 5.74, "grad_norm": 0.81640625, "learning_rate": 0.00045627271016876236, "loss": 0.1651, "step": 138510 }, { "epoch": 5.74, "grad_norm": 1.5859375, "learning_rate": 0.00045626658243859745, "loss": 0.1663, "step": 138520 }, { "epoch": 5.74, "grad_norm": 0.22265625, "learning_rate": 0.000456260454320261, "loss": 0.2523, "step": 138530 }, { "epoch": 5.74, "grad_norm": 1.078125, "learning_rate": 0.0004562543258137647, "loss": 0.2283, "step": 138540 }, { "epoch": 5.74, "grad_norm": 0.6171875, "learning_rate": 0.0004562481969191201, "loss": 0.2082, "step": 138550 }, { "epoch": 5.74, "grad_norm": 0.6015625, "learning_rate": 0.0004562420676363386, "loss": 0.2032, "step": 138560 }, { "epoch": 5.74, "grad_norm": 0.90234375, "learning_rate": 0.0004562359379654317, "loss": 0.185, "step": 138570 }, { "epoch": 5.74, "grad_norm": 0.61328125, "learning_rate": 0.0004562298079064112, "loss": 0.3242, "step": 138580 }, { "epoch": 5.74, "grad_norm": 1.0390625, "learning_rate": 0.0004562236774592884, "loss": 0.1681, "step": 138590 }, { "epoch": 5.74, "grad_norm": 0.8515625, "learning_rate": 0.00045621754662407486, "loss": 0.1687, "step": 138600 }, { "epoch": 5.74, "grad_norm": 0.85546875, "learning_rate": 0.0004562114154007823, "loss": 0.1908, "step": 138610 }, { "epoch": 5.74, "grad_norm": 0.72265625, "learning_rate": 0.00045620528378942203, "loss": 0.2095, "step": 138620 }, { "epoch": 5.74, "grad_norm": 0.8671875, "learning_rate": 0.0004561991517900057, "loss": 0.1887, "step": 138630 }, { "epoch": 5.74, "grad_norm": 0.96484375, "learning_rate": 0.00045619301940254487, "loss": 0.191, "step": 138640 }, { "epoch": 5.74, "grad_norm": 0.52734375, "learning_rate": 0.000456186886627051, "loss": 0.1624, "step": 138650 }, { "epoch": 5.74, "grad_norm": 0.8125, "learning_rate": 0.00045618075346353564, "loss": 0.2021, "step": 138660 }, { "epoch": 5.74, "grad_norm": 0.890625, "learning_rate": 0.0004561746199120105, "loss": 0.2075, "step": 138670 }, { "epoch": 5.74, "grad_norm": 1.0546875, "learning_rate": 0.0004561684859724868, "loss": 0.194, "step": 138680 }, { "epoch": 5.74, "grad_norm": 0.61328125, "learning_rate": 0.00045616235164497646, "loss": 0.1967, "step": 138690 }, { "epoch": 5.74, "grad_norm": 0.470703125, "learning_rate": 0.00045615621692949074, "loss": 0.1425, "step": 138700 }, { "epoch": 5.75, "grad_norm": 1.3046875, "learning_rate": 0.0004561500818260413, "loss": 0.1961, "step": 138710 }, { "epoch": 5.75, "grad_norm": 0.400390625, "learning_rate": 0.0004561439463346396, "loss": 0.253, "step": 138720 }, { "epoch": 5.75, "grad_norm": 0.0, "learning_rate": 0.00045613781045529734, "loss": 0.1832, "step": 138730 }, { "epoch": 5.75, "grad_norm": 0.478515625, "learning_rate": 0.00045613167418802596, "loss": 0.2215, "step": 138740 }, { "epoch": 5.75, "grad_norm": 0.38671875, "learning_rate": 0.000456125537532837, "loss": 0.2408, "step": 138750 }, { "epoch": 5.75, "grad_norm": 1.0546875, "learning_rate": 0.0004561194004897422, "loss": 0.1994, "step": 138760 }, { "epoch": 5.75, "grad_norm": 0.953125, "learning_rate": 0.0004561132630587528, "loss": 0.2246, "step": 138770 }, { "epoch": 5.75, "grad_norm": 1.046875, "learning_rate": 0.00045610712523988045, "loss": 0.1857, "step": 138780 }, { "epoch": 5.75, "grad_norm": 0.4609375, "learning_rate": 0.00045610098703313684, "loss": 0.2306, "step": 138790 }, { "epoch": 5.75, "grad_norm": 0.76171875, "learning_rate": 0.00045609484843853345, "loss": 0.1764, "step": 138800 }, { "epoch": 5.75, "grad_norm": 1.0859375, "learning_rate": 0.0004560887094560817, "loss": 0.1662, "step": 138810 }, { "epoch": 5.75, "grad_norm": 0.30859375, "learning_rate": 0.00045608257008579337, "loss": 0.2198, "step": 138820 }, { "epoch": 5.75, "grad_norm": 0.9609375, "learning_rate": 0.0004560764303276798, "loss": 0.2134, "step": 138830 }, { "epoch": 5.75, "grad_norm": 0.54296875, "learning_rate": 0.00045607029018175275, "loss": 0.1445, "step": 138840 }, { "epoch": 5.75, "grad_norm": 1.0625, "learning_rate": 0.0004560641496480236, "loss": 0.1547, "step": 138850 }, { "epoch": 5.75, "grad_norm": 0.24609375, "learning_rate": 0.0004560580087265039, "loss": 0.2084, "step": 138860 }, { "epoch": 5.75, "grad_norm": 0.68359375, "learning_rate": 0.0004560518674172054, "loss": 0.2272, "step": 138870 }, { "epoch": 5.75, "grad_norm": 0.6953125, "learning_rate": 0.0004560457257201395, "loss": 0.2022, "step": 138880 }, { "epoch": 5.75, "grad_norm": 0.484375, "learning_rate": 0.00045603958363531785, "loss": 0.2492, "step": 138890 }, { "epoch": 5.75, "grad_norm": 0.390625, "learning_rate": 0.00045603344116275184, "loss": 0.1975, "step": 138900 }, { "epoch": 5.75, "grad_norm": 0.86328125, "learning_rate": 0.00045602729830245323, "loss": 0.1839, "step": 138910 }, { "epoch": 5.75, "grad_norm": 1.2265625, "learning_rate": 0.0004560211550544334, "loss": 0.2312, "step": 138920 }, { "epoch": 5.75, "grad_norm": 0.58984375, "learning_rate": 0.0004560150114187042, "loss": 0.2111, "step": 138930 }, { "epoch": 5.75, "grad_norm": 1.125, "learning_rate": 0.0004560088673952768, "loss": 0.1748, "step": 138940 }, { "epoch": 5.76, "grad_norm": 0.578125, "learning_rate": 0.000456002722984163, "loss": 0.1853, "step": 138950 }, { "epoch": 5.76, "grad_norm": 0.0966796875, "learning_rate": 0.0004559965781853744, "loss": 0.2777, "step": 138960 }, { "epoch": 5.76, "grad_norm": 0.52734375, "learning_rate": 0.0004559904329989224, "loss": 0.1896, "step": 138970 }, { "epoch": 5.76, "grad_norm": 0.92578125, "learning_rate": 0.0004559842874248187, "loss": 0.2379, "step": 138980 }, { "epoch": 5.76, "grad_norm": 0.90625, "learning_rate": 0.00045597814146307477, "loss": 0.2582, "step": 138990 }, { "epoch": 5.76, "grad_norm": 0.59765625, "learning_rate": 0.0004559719951137022, "loss": 0.2123, "step": 139000 }, { "epoch": 5.76, "grad_norm": 1.078125, "learning_rate": 0.0004559658483767127, "loss": 0.158, "step": 139010 }, { "epoch": 5.76, "grad_norm": 0.224609375, "learning_rate": 0.0004559597012521176, "loss": 0.2333, "step": 139020 }, { "epoch": 5.76, "grad_norm": 0.8359375, "learning_rate": 0.00045595355373992863, "loss": 0.1686, "step": 139030 }, { "epoch": 5.76, "grad_norm": 0.33984375, "learning_rate": 0.0004559474058401574, "loss": 0.2127, "step": 139040 }, { "epoch": 5.76, "grad_norm": 0.80859375, "learning_rate": 0.00045594125755281525, "loss": 0.1732, "step": 139050 }, { "epoch": 5.76, "grad_norm": 0.1767578125, "learning_rate": 0.0004559351088779139, "loss": 0.2601, "step": 139060 }, { "epoch": 5.76, "grad_norm": 0.435546875, "learning_rate": 0.000455928959815465, "loss": 0.2227, "step": 139070 }, { "epoch": 5.76, "grad_norm": 0.640625, "learning_rate": 0.00045592281036548, "loss": 0.2292, "step": 139080 }, { "epoch": 5.76, "grad_norm": 1.265625, "learning_rate": 0.0004559166605279705, "loss": 0.197, "step": 139090 }, { "epoch": 5.76, "grad_norm": 0.66796875, "learning_rate": 0.0004559105103029481, "loss": 0.1701, "step": 139100 }, { "epoch": 5.76, "grad_norm": 1.09375, "learning_rate": 0.00045590435969042434, "loss": 0.2198, "step": 139110 }, { "epoch": 5.76, "grad_norm": 0.94921875, "learning_rate": 0.00045589820869041076, "loss": 0.2108, "step": 139120 }, { "epoch": 5.76, "grad_norm": 0.96875, "learning_rate": 0.00045589205730291903, "loss": 0.2538, "step": 139130 }, { "epoch": 5.76, "grad_norm": 0.447265625, "learning_rate": 0.0004558859055279607, "loss": 0.229, "step": 139140 }, { "epoch": 5.76, "grad_norm": 0.494140625, "learning_rate": 0.0004558797533655473, "loss": 0.1303, "step": 139150 }, { "epoch": 5.76, "grad_norm": 0.82421875, "learning_rate": 0.0004558736008156905, "loss": 0.1949, "step": 139160 }, { "epoch": 5.76, "grad_norm": 1.3046875, "learning_rate": 0.00045586744787840173, "loss": 0.2325, "step": 139170 }, { "epoch": 5.76, "grad_norm": 1.0546875, "learning_rate": 0.00045586129455369275, "loss": 0.2191, "step": 139180 }, { "epoch": 5.77, "grad_norm": 0.90234375, "learning_rate": 0.00045585514084157497, "loss": 0.2076, "step": 139190 }, { "epoch": 5.77, "grad_norm": 0.67578125, "learning_rate": 0.0004558489867420601, "loss": 0.1637, "step": 139200 }, { "epoch": 5.77, "grad_norm": 0.66796875, "learning_rate": 0.0004558428322551597, "loss": 0.2068, "step": 139210 }, { "epoch": 5.77, "grad_norm": 1.1328125, "learning_rate": 0.0004558366773808852, "loss": 0.1999, "step": 139220 }, { "epoch": 5.77, "grad_norm": 0.80859375, "learning_rate": 0.00045583052211924834, "loss": 0.2219, "step": 139230 }, { "epoch": 5.77, "grad_norm": 1.046875, "learning_rate": 0.0004558243664702607, "loss": 0.1763, "step": 139240 }, { "epoch": 5.77, "grad_norm": 0.796875, "learning_rate": 0.00045581821043393377, "loss": 0.2209, "step": 139250 }, { "epoch": 5.77, "grad_norm": 1.2265625, "learning_rate": 0.00045581205401027926, "loss": 0.2312, "step": 139260 }, { "epoch": 5.77, "grad_norm": 0.6875, "learning_rate": 0.00045580589719930865, "loss": 0.152, "step": 139270 }, { "epoch": 5.77, "grad_norm": 0.412109375, "learning_rate": 0.0004557997400010336, "loss": 0.1766, "step": 139280 }, { "epoch": 5.77, "grad_norm": 0.3515625, "learning_rate": 0.00045579358241546565, "loss": 0.1753, "step": 139290 }, { "epoch": 5.77, "grad_norm": 1.0234375, "learning_rate": 0.00045578742444261634, "loss": 0.2072, "step": 139300 }, { "epoch": 5.77, "grad_norm": 0.99609375, "learning_rate": 0.00045578126608249744, "loss": 0.1883, "step": 139310 }, { "epoch": 5.77, "grad_norm": 0.62890625, "learning_rate": 0.0004557751073351203, "loss": 0.1847, "step": 139320 }, { "epoch": 5.77, "grad_norm": 0.298828125, "learning_rate": 0.0004557689482004967, "loss": 0.2834, "step": 139330 }, { "epoch": 5.77, "grad_norm": 0.66015625, "learning_rate": 0.0004557627886786382, "loss": 0.2044, "step": 139340 }, { "epoch": 5.77, "grad_norm": 0.58203125, "learning_rate": 0.00045575662876955626, "loss": 0.2069, "step": 139350 }, { "epoch": 5.77, "grad_norm": 0.7734375, "learning_rate": 0.0004557504684732626, "loss": 0.2169, "step": 139360 }, { "epoch": 5.77, "grad_norm": 1.2265625, "learning_rate": 0.0004557443077897688, "loss": 0.1911, "step": 139370 }, { "epoch": 5.77, "grad_norm": 1.0, "learning_rate": 0.00045573814671908644, "loss": 0.1865, "step": 139380 }, { "epoch": 5.77, "grad_norm": 0.5546875, "learning_rate": 0.00045573198526122706, "loss": 0.1945, "step": 139390 }, { "epoch": 5.77, "grad_norm": 0.59765625, "learning_rate": 0.0004557258234162023, "loss": 0.1724, "step": 139400 }, { "epoch": 5.77, "grad_norm": 0.66015625, "learning_rate": 0.00045571966118402376, "loss": 0.2349, "step": 139410 }, { "epoch": 5.77, "grad_norm": 0.416015625, "learning_rate": 0.0004557134985647031, "loss": 0.2197, "step": 139420 }, { "epoch": 5.78, "grad_norm": 1.5078125, "learning_rate": 0.0004557073355582518, "loss": 0.2076, "step": 139430 }, { "epoch": 5.78, "grad_norm": 0.609375, "learning_rate": 0.0004557011721646814, "loss": 0.2085, "step": 139440 }, { "epoch": 5.78, "grad_norm": 0.416015625, "learning_rate": 0.00045569500838400377, "loss": 0.2535, "step": 139450 }, { "epoch": 5.78, "grad_norm": 0.87890625, "learning_rate": 0.0004556888442162303, "loss": 0.1933, "step": 139460 }, { "epoch": 5.78, "grad_norm": 0.5390625, "learning_rate": 0.00045568267966137265, "loss": 0.2175, "step": 139470 }, { "epoch": 5.78, "grad_norm": 0.3125, "learning_rate": 0.0004556765147194424, "loss": 0.1446, "step": 139480 }, { "epoch": 5.78, "grad_norm": 0.427734375, "learning_rate": 0.0004556703493904512, "loss": 0.2221, "step": 139490 }, { "epoch": 5.78, "grad_norm": 0.8125, "learning_rate": 0.00045566418367441047, "loss": 0.2018, "step": 139500 }, { "epoch": 5.78, "grad_norm": 0.7265625, "learning_rate": 0.0004556580175713321, "loss": 0.2158, "step": 139510 }, { "epoch": 5.78, "grad_norm": 1.28125, "learning_rate": 0.00045565185108122747, "loss": 0.1825, "step": 139520 }, { "epoch": 5.78, "grad_norm": 0.6015625, "learning_rate": 0.00045564568420410827, "loss": 0.2586, "step": 139530 }, { "epoch": 5.78, "grad_norm": 0.453125, "learning_rate": 0.0004556395169399862, "loss": 0.222, "step": 139540 }, { "epoch": 5.78, "grad_norm": 0.8359375, "learning_rate": 0.00045563334928887267, "loss": 0.2419, "step": 139550 }, { "epoch": 5.78, "grad_norm": 0.66796875, "learning_rate": 0.0004556271812507794, "loss": 0.2003, "step": 139560 }, { "epoch": 5.78, "grad_norm": 0.80078125, "learning_rate": 0.000455621012825718, "loss": 0.1834, "step": 139570 }, { "epoch": 5.78, "grad_norm": 0.478515625, "learning_rate": 0.00045561484401370004, "loss": 0.1904, "step": 139580 }, { "epoch": 5.78, "grad_norm": 0.640625, "learning_rate": 0.0004556086748147371, "loss": 0.2326, "step": 139590 }, { "epoch": 5.78, "grad_norm": 0.44140625, "learning_rate": 0.00045560250522884096, "loss": 0.1891, "step": 139600 }, { "epoch": 5.78, "grad_norm": 0.9765625, "learning_rate": 0.000455596335256023, "loss": 0.2134, "step": 139610 }, { "epoch": 5.78, "grad_norm": 0.08984375, "learning_rate": 0.00045559016489629494, "loss": 0.1555, "step": 139620 }, { "epoch": 5.78, "grad_norm": 0.58984375, "learning_rate": 0.0004555839941496684, "loss": 0.2034, "step": 139630 }, { "epoch": 5.78, "grad_norm": 0.546875, "learning_rate": 0.0004555778230161551, "loss": 0.2295, "step": 139640 }, { "epoch": 5.78, "grad_norm": 0.62890625, "learning_rate": 0.0004555716514957664, "loss": 0.1799, "step": 139650 }, { "epoch": 5.78, "grad_norm": 0.6171875, "learning_rate": 0.00045556547958851414, "loss": 0.2126, "step": 139660 }, { "epoch": 5.79, "grad_norm": 0.41796875, "learning_rate": 0.00045555930729440983, "loss": 0.1318, "step": 139670 }, { "epoch": 5.79, "grad_norm": 0.427734375, "learning_rate": 0.00045555313461346505, "loss": 0.1499, "step": 139680 }, { "epoch": 5.79, "grad_norm": 0.5234375, "learning_rate": 0.0004555469615456915, "loss": 0.1686, "step": 139690 }, { "epoch": 5.79, "grad_norm": 0.79296875, "learning_rate": 0.0004555407880911008, "loss": 0.2015, "step": 139700 }, { "epoch": 5.79, "grad_norm": 0.439453125, "learning_rate": 0.00045553461424970444, "loss": 0.2136, "step": 139710 }, { "epoch": 5.79, "grad_norm": 1.2421875, "learning_rate": 0.0004555284400215142, "loss": 0.2013, "step": 139720 }, { "epoch": 5.79, "grad_norm": 1.3125, "learning_rate": 0.0004555222654065416, "loss": 0.1928, "step": 139730 }, { "epoch": 5.79, "grad_norm": 0.1640625, "learning_rate": 0.00045551609040479833, "loss": 0.2185, "step": 139740 }, { "epoch": 5.79, "grad_norm": 0.99609375, "learning_rate": 0.000455509915016296, "loss": 0.218, "step": 139750 }, { "epoch": 5.79, "grad_norm": 0.66015625, "learning_rate": 0.00045550373924104615, "loss": 0.2046, "step": 139760 }, { "epoch": 5.79, "grad_norm": 1.0546875, "learning_rate": 0.00045549756307906045, "loss": 0.2547, "step": 139770 }, { "epoch": 5.79, "grad_norm": 1.15625, "learning_rate": 0.00045549138653035057, "loss": 0.2524, "step": 139780 }, { "epoch": 5.79, "grad_norm": 0.255859375, "learning_rate": 0.000455485209594928, "loss": 0.1909, "step": 139790 }, { "epoch": 5.79, "grad_norm": 0.765625, "learning_rate": 0.00045547903227280455, "loss": 0.2243, "step": 139800 }, { "epoch": 5.79, "grad_norm": 0.4609375, "learning_rate": 0.00045547285456399173, "loss": 0.2276, "step": 139810 }, { "epoch": 5.79, "grad_norm": 0.70703125, "learning_rate": 0.00045546667646850114, "loss": 0.222, "step": 139820 }, { "epoch": 5.79, "grad_norm": 0.81640625, "learning_rate": 0.0004554604979863445, "loss": 0.1677, "step": 139830 }, { "epoch": 5.79, "grad_norm": 0.52734375, "learning_rate": 0.00045545431911753344, "loss": 0.1851, "step": 139840 }, { "epoch": 5.79, "grad_norm": 0.65625, "learning_rate": 0.00045544813986207944, "loss": 0.1944, "step": 139850 }, { "epoch": 5.79, "grad_norm": 0.6015625, "learning_rate": 0.0004554419602199943, "loss": 0.1979, "step": 139860 }, { "epoch": 5.79, "grad_norm": 0.8515625, "learning_rate": 0.0004554357801912895, "loss": 0.1819, "step": 139870 }, { "epoch": 5.79, "grad_norm": 0.26953125, "learning_rate": 0.0004554295997759768, "loss": 0.1913, "step": 139880 }, { "epoch": 5.79, "grad_norm": 0.96484375, "learning_rate": 0.0004554234189740678, "loss": 0.2286, "step": 139890 }, { "epoch": 5.79, "grad_norm": 0.423828125, "learning_rate": 0.00045541723778557406, "loss": 0.1651, "step": 139900 }, { "epoch": 5.8, "grad_norm": 0.7734375, "learning_rate": 0.00045541105621050724, "loss": 0.2193, "step": 139910 }, { "epoch": 5.8, "grad_norm": 1.2890625, "learning_rate": 0.00045540487424887904, "loss": 0.2191, "step": 139920 }, { "epoch": 5.8, "grad_norm": 0.384765625, "learning_rate": 0.00045539869190070105, "loss": 0.204, "step": 139930 }, { "epoch": 5.8, "grad_norm": 1.609375, "learning_rate": 0.0004553925091659849, "loss": 0.2213, "step": 139940 }, { "epoch": 5.8, "grad_norm": 0.259765625, "learning_rate": 0.0004553863260447422, "loss": 0.2057, "step": 139950 }, { "epoch": 5.8, "grad_norm": 0.5625, "learning_rate": 0.0004553801425369847, "loss": 0.2152, "step": 139960 }, { "epoch": 5.8, "grad_norm": 2.234375, "learning_rate": 0.00045537395864272387, "loss": 0.1793, "step": 139970 }, { "epoch": 5.8, "grad_norm": 0.94921875, "learning_rate": 0.0004553677743619714, "loss": 0.2012, "step": 139980 }, { "epoch": 5.8, "grad_norm": 0.640625, "learning_rate": 0.00045536158969473905, "loss": 0.1735, "step": 139990 }, { "epoch": 5.8, "grad_norm": 0.89453125, "learning_rate": 0.0004553554046410383, "loss": 0.2213, "step": 140000 }, { "epoch": 5.8, "grad_norm": 0.68359375, "learning_rate": 0.0004553492192008809, "loss": 0.2606, "step": 140010 }, { "epoch": 5.8, "grad_norm": 0.69140625, "learning_rate": 0.0004553430333742784, "loss": 0.2275, "step": 140020 }, { "epoch": 5.8, "grad_norm": 0.73828125, "learning_rate": 0.00045533684716124245, "loss": 0.1922, "step": 140030 }, { "epoch": 5.8, "grad_norm": 0.5859375, "learning_rate": 0.00045533066056178484, "loss": 0.2144, "step": 140040 }, { "epoch": 5.8, "grad_norm": 0.8359375, "learning_rate": 0.000455324473575917, "loss": 0.2384, "step": 140050 }, { "epoch": 5.8, "grad_norm": 1.109375, "learning_rate": 0.0004553182862036508, "loss": 0.2109, "step": 140060 }, { "epoch": 5.8, "grad_norm": 1.4921875, "learning_rate": 0.00045531209844499763, "loss": 0.1986, "step": 140070 }, { "epoch": 5.8, "grad_norm": 0.76171875, "learning_rate": 0.0004553059102999693, "loss": 0.1774, "step": 140080 }, { "epoch": 5.8, "grad_norm": 0.65625, "learning_rate": 0.0004552997217685774, "loss": 0.1882, "step": 140090 }, { "epoch": 5.8, "grad_norm": 0.765625, "learning_rate": 0.0004552935328508336, "loss": 0.2315, "step": 140100 }, { "epoch": 5.8, "grad_norm": 0.36328125, "learning_rate": 0.0004552873435467496, "loss": 0.1894, "step": 140110 }, { "epoch": 5.8, "grad_norm": 0.8359375, "learning_rate": 0.00045528115385633694, "loss": 0.206, "step": 140120 }, { "epoch": 5.8, "grad_norm": 0.74609375, "learning_rate": 0.0004552749637796073, "loss": 0.1977, "step": 140130 }, { "epoch": 5.8, "grad_norm": 0.470703125, "learning_rate": 0.0004552687733165724, "loss": 0.197, "step": 140140 }, { "epoch": 5.8, "grad_norm": 0.455078125, "learning_rate": 0.0004552625824672438, "loss": 0.1929, "step": 140150 }, { "epoch": 5.81, "grad_norm": 1.1953125, "learning_rate": 0.0004552563912316332, "loss": 0.1852, "step": 140160 }, { "epoch": 5.81, "grad_norm": 0.7578125, "learning_rate": 0.0004552501996097522, "loss": 0.1852, "step": 140170 }, { "epoch": 5.81, "grad_norm": 0.59765625, "learning_rate": 0.00045524400760161253, "loss": 0.2606, "step": 140180 }, { "epoch": 5.81, "grad_norm": 0.89453125, "learning_rate": 0.0004552378152072258, "loss": 0.2438, "step": 140190 }, { "epoch": 5.81, "grad_norm": 0.62109375, "learning_rate": 0.00045523162242660365, "loss": 0.2246, "step": 140200 }, { "epoch": 5.81, "grad_norm": 0.61328125, "learning_rate": 0.0004552254292597578, "loss": 0.2113, "step": 140210 }, { "epoch": 5.81, "grad_norm": 0.9140625, "learning_rate": 0.0004552192357066998, "loss": 0.2389, "step": 140220 }, { "epoch": 5.81, "grad_norm": 0.486328125, "learning_rate": 0.0004552130417674414, "loss": 0.1721, "step": 140230 }, { "epoch": 5.81, "grad_norm": 0.703125, "learning_rate": 0.0004552068474419941, "loss": 0.2249, "step": 140240 }, { "epoch": 5.81, "grad_norm": 0.1513671875, "learning_rate": 0.0004552006527303698, "loss": 0.2085, "step": 140250 }, { "epoch": 5.81, "grad_norm": 0.67578125, "learning_rate": 0.00045519445763258007, "loss": 0.1475, "step": 140260 }, { "epoch": 5.81, "grad_norm": 0.55078125, "learning_rate": 0.0004551882621486364, "loss": 0.2116, "step": 140270 }, { "epoch": 5.81, "grad_norm": 0.90625, "learning_rate": 0.0004551820662785506, "loss": 0.1976, "step": 140280 }, { "epoch": 5.81, "grad_norm": 0.61328125, "learning_rate": 0.0004551758700223344, "loss": 0.2249, "step": 140290 }, { "epoch": 5.81, "grad_norm": 1.2421875, "learning_rate": 0.0004551696733799993, "loss": 0.228, "step": 140300 }, { "epoch": 5.81, "grad_norm": 0.85546875, "learning_rate": 0.00045516347635155707, "loss": 0.1842, "step": 140310 }, { "epoch": 5.81, "grad_norm": 1.09375, "learning_rate": 0.0004551572789370193, "loss": 0.1835, "step": 140320 }, { "epoch": 5.81, "grad_norm": 0.5546875, "learning_rate": 0.00045515108113639767, "loss": 0.1739, "step": 140330 }, { "epoch": 5.81, "grad_norm": 0.54296875, "learning_rate": 0.0004551448829497039, "loss": 0.1866, "step": 140340 }, { "epoch": 5.81, "grad_norm": 0.439453125, "learning_rate": 0.0004551386843769496, "loss": 0.1783, "step": 140350 }, { "epoch": 5.81, "grad_norm": 0.58203125, "learning_rate": 0.00045513248541814645, "loss": 0.2171, "step": 140360 }, { "epoch": 5.81, "grad_norm": 0.2431640625, "learning_rate": 0.0004551262860733061, "loss": 0.1921, "step": 140370 }, { "epoch": 5.81, "grad_norm": 1.109375, "learning_rate": 0.00045512008634244024, "loss": 0.1939, "step": 140380 }, { "epoch": 5.81, "grad_norm": 0.59375, "learning_rate": 0.0004551138862255605, "loss": 0.2049, "step": 140390 }, { "epoch": 5.82, "grad_norm": 0.68359375, "learning_rate": 0.0004551076857226786, "loss": 0.1963, "step": 140400 }, { "epoch": 5.82, "grad_norm": 0.671875, "learning_rate": 0.0004551014848338062, "loss": 0.2356, "step": 140410 }, { "epoch": 5.82, "grad_norm": 0.455078125, "learning_rate": 0.00045509528355895494, "loss": 0.1903, "step": 140420 }, { "epoch": 5.82, "grad_norm": 0.431640625, "learning_rate": 0.00045508908189813654, "loss": 0.2014, "step": 140430 }, { "epoch": 5.82, "grad_norm": 1.046875, "learning_rate": 0.0004550828798513626, "loss": 0.2641, "step": 140440 }, { "epoch": 5.82, "grad_norm": 0.4375, "learning_rate": 0.00045507667741864484, "loss": 0.2085, "step": 140450 }, { "epoch": 5.82, "grad_norm": 0.8046875, "learning_rate": 0.0004550704745999949, "loss": 0.2255, "step": 140460 }, { "epoch": 5.82, "grad_norm": 0.5390625, "learning_rate": 0.0004550642713954245, "loss": 0.1971, "step": 140470 }, { "epoch": 5.82, "grad_norm": 0.71484375, "learning_rate": 0.0004550580678049453, "loss": 0.2237, "step": 140480 }, { "epoch": 5.82, "grad_norm": 0.71875, "learning_rate": 0.0004550518638285689, "loss": 0.1776, "step": 140490 }, { "epoch": 5.82, "grad_norm": 0.75390625, "learning_rate": 0.00045504565946630703, "loss": 0.2071, "step": 140500 }, { "epoch": 5.82, "grad_norm": 0.419921875, "learning_rate": 0.00045503945471817145, "loss": 0.2239, "step": 140510 }, { "epoch": 5.82, "grad_norm": 0.73046875, "learning_rate": 0.0004550332495841737, "loss": 0.2079, "step": 140520 }, { "epoch": 5.82, "grad_norm": 0.9609375, "learning_rate": 0.00045502704406432553, "loss": 0.221, "step": 140530 }, { "epoch": 5.82, "grad_norm": 1.171875, "learning_rate": 0.0004550208381586386, "loss": 0.223, "step": 140540 }, { "epoch": 5.82, "grad_norm": 0.78125, "learning_rate": 0.00045501463186712465, "loss": 0.1855, "step": 140550 }, { "epoch": 5.82, "grad_norm": 0.427734375, "learning_rate": 0.00045500842518979523, "loss": 0.209, "step": 140560 }, { "epoch": 5.82, "grad_norm": 0.9140625, "learning_rate": 0.0004550022181266621, "loss": 0.1833, "step": 140570 }, { "epoch": 5.82, "grad_norm": 1.8984375, "learning_rate": 0.000454996010677737, "loss": 0.211, "step": 140580 }, { "epoch": 5.82, "grad_norm": 0.5859375, "learning_rate": 0.00045498980284303147, "loss": 0.176, "step": 140590 }, { "epoch": 5.82, "grad_norm": 0.734375, "learning_rate": 0.0004549835946225573, "loss": 0.1766, "step": 140600 }, { "epoch": 5.82, "grad_norm": 0.66015625, "learning_rate": 0.0004549773860163261, "loss": 0.1857, "step": 140610 }, { "epoch": 5.82, "grad_norm": 0.65625, "learning_rate": 0.00045497117702434964, "loss": 0.1992, "step": 140620 }, { "epoch": 5.82, "grad_norm": 0.416015625, "learning_rate": 0.0004549649676466396, "loss": 0.1703, "step": 140630 }, { "epoch": 5.83, "grad_norm": 0.62890625, "learning_rate": 0.0004549587578832075, "loss": 0.1948, "step": 140640 }, { "epoch": 5.83, "grad_norm": 0.84375, "learning_rate": 0.00045495254773406525, "loss": 0.2507, "step": 140650 }, { "epoch": 5.83, "grad_norm": 0.90625, "learning_rate": 0.0004549463371992244, "loss": 0.2059, "step": 140660 }, { "epoch": 5.83, "grad_norm": 0.65234375, "learning_rate": 0.00045494012627869673, "loss": 0.2077, "step": 140670 }, { "epoch": 5.83, "grad_norm": 1.109375, "learning_rate": 0.0004549339149724938, "loss": 0.2021, "step": 140680 }, { "epoch": 5.83, "grad_norm": 0.68359375, "learning_rate": 0.00045492770328062746, "loss": 0.2139, "step": 140690 }, { "epoch": 5.83, "grad_norm": 1.0546875, "learning_rate": 0.0004549214912031092, "loss": 0.1808, "step": 140700 }, { "epoch": 5.83, "grad_norm": 0.291015625, "learning_rate": 0.0004549152787399509, "loss": 0.1945, "step": 140710 }, { "epoch": 5.83, "grad_norm": 0.6328125, "learning_rate": 0.0004549090658911642, "loss": 0.1736, "step": 140720 }, { "epoch": 5.83, "grad_norm": 0.400390625, "learning_rate": 0.00045490285265676066, "loss": 0.1825, "step": 140730 }, { "epoch": 5.83, "grad_norm": 0.90625, "learning_rate": 0.00045489663903675216, "loss": 0.2378, "step": 140740 }, { "epoch": 5.83, "grad_norm": 0.76171875, "learning_rate": 0.0004548904250311503, "loss": 0.2166, "step": 140750 }, { "epoch": 5.83, "grad_norm": 0.66015625, "learning_rate": 0.00045488421063996675, "loss": 0.2102, "step": 140760 }, { "epoch": 5.83, "grad_norm": 0.359375, "learning_rate": 0.0004548779958632133, "loss": 0.1974, "step": 140770 }, { "epoch": 5.83, "grad_norm": 0.828125, "learning_rate": 0.00045487178070090163, "loss": 0.2509, "step": 140780 }, { "epoch": 5.83, "grad_norm": 0.345703125, "learning_rate": 0.0004548655651530433, "loss": 0.1991, "step": 140790 }, { "epoch": 5.83, "grad_norm": 1.1015625, "learning_rate": 0.0004548593492196501, "loss": 0.1949, "step": 140800 }, { "epoch": 5.83, "grad_norm": 0.57421875, "learning_rate": 0.00045485313290073373, "loss": 0.1765, "step": 140810 }, { "epoch": 5.83, "grad_norm": 0.953125, "learning_rate": 0.0004548469161963059, "loss": 0.2021, "step": 140820 }, { "epoch": 5.83, "grad_norm": 1.2578125, "learning_rate": 0.0004548406991063784, "loss": 0.21, "step": 140830 }, { "epoch": 5.83, "grad_norm": 0.55078125, "learning_rate": 0.00045483448163096264, "loss": 0.195, "step": 140840 }, { "epoch": 5.83, "grad_norm": 0.25390625, "learning_rate": 0.00045482826377007057, "loss": 0.1832, "step": 140850 }, { "epoch": 5.83, "grad_norm": 0.50390625, "learning_rate": 0.0004548220455237139, "loss": 0.2033, "step": 140860 }, { "epoch": 5.83, "grad_norm": 0.78515625, "learning_rate": 0.0004548158268919042, "loss": 0.2046, "step": 140870 }, { "epoch": 5.84, "grad_norm": 0.796875, "learning_rate": 0.00045480960787465326, "loss": 0.1822, "step": 140880 }, { "epoch": 5.84, "grad_norm": 0.373046875, "learning_rate": 0.00045480338847197273, "loss": 0.2103, "step": 140890 }, { "epoch": 5.84, "grad_norm": 1.109375, "learning_rate": 0.0004547971686838743, "loss": 0.1835, "step": 140900 }, { "epoch": 5.84, "grad_norm": 0.69140625, "learning_rate": 0.00045479094851036986, "loss": 0.2394, "step": 140910 }, { "epoch": 5.84, "grad_norm": 0.67578125, "learning_rate": 0.0004547847279514708, "loss": 0.1928, "step": 140920 }, { "epoch": 5.84, "grad_norm": 0.5390625, "learning_rate": 0.00045477850700718903, "loss": 0.176, "step": 140930 }, { "epoch": 5.84, "grad_norm": 0.953125, "learning_rate": 0.0004547722856775363, "loss": 0.2063, "step": 140940 }, { "epoch": 5.84, "grad_norm": 0.53515625, "learning_rate": 0.00045476606396252424, "loss": 0.2255, "step": 140950 }, { "epoch": 5.84, "grad_norm": 0.4921875, "learning_rate": 0.0004547598418621645, "loss": 0.2243, "step": 140960 }, { "epoch": 5.84, "grad_norm": 1.5390625, "learning_rate": 0.00045475361937646886, "loss": 0.2048, "step": 140970 }, { "epoch": 5.84, "grad_norm": 1.15625, "learning_rate": 0.00045474739650544905, "loss": 0.1818, "step": 140980 }, { "epoch": 5.84, "grad_norm": 0.421875, "learning_rate": 0.00045474117324911676, "loss": 0.224, "step": 140990 }, { "epoch": 5.84, "grad_norm": 0.279296875, "learning_rate": 0.0004547349496074836, "loss": 0.1484, "step": 141000 }, { "epoch": 5.84, "grad_norm": 0.65625, "learning_rate": 0.00045472872558056145, "loss": 0.2431, "step": 141010 }, { "epoch": 5.84, "grad_norm": 0.8984375, "learning_rate": 0.000454722501168362, "loss": 0.1909, "step": 141020 }, { "epoch": 5.84, "grad_norm": 0.87109375, "learning_rate": 0.00045471627637089685, "loss": 0.296, "step": 141030 }, { "epoch": 5.84, "grad_norm": 0.703125, "learning_rate": 0.00045471005118817776, "loss": 0.174, "step": 141040 }, { "epoch": 5.84, "grad_norm": 1.3359375, "learning_rate": 0.00045470382562021644, "loss": 0.233, "step": 141050 }, { "epoch": 5.84, "grad_norm": 0.83203125, "learning_rate": 0.0004546975996670247, "loss": 0.2369, "step": 141060 }, { "epoch": 5.84, "grad_norm": 0.28515625, "learning_rate": 0.0004546913733286141, "loss": 0.1714, "step": 141070 }, { "epoch": 5.84, "grad_norm": 0.81640625, "learning_rate": 0.0004546851466049965, "loss": 0.2091, "step": 141080 }, { "epoch": 5.84, "grad_norm": 0.90234375, "learning_rate": 0.00045467891949618354, "loss": 0.2098, "step": 141090 }, { "epoch": 5.84, "grad_norm": 0.8359375, "learning_rate": 0.000454672692002187, "loss": 0.2449, "step": 141100 }, { "epoch": 5.84, "grad_norm": 1.359375, "learning_rate": 0.0004546664641230185, "loss": 0.2083, "step": 141110 }, { "epoch": 5.85, "grad_norm": 1.0078125, "learning_rate": 0.0004546602358586898, "loss": 0.2498, "step": 141120 }, { "epoch": 5.85, "grad_norm": 0.5390625, "learning_rate": 0.0004546540072092127, "loss": 0.2272, "step": 141130 }, { "epoch": 5.85, "grad_norm": 0.16796875, "learning_rate": 0.0004546477781745988, "loss": 0.197, "step": 141140 }, { "epoch": 5.85, "grad_norm": 0.62890625, "learning_rate": 0.0004546415487548599, "loss": 0.2142, "step": 141150 }, { "epoch": 5.85, "grad_norm": 0.54296875, "learning_rate": 0.00045463531895000774, "loss": 0.1707, "step": 141160 }, { "epoch": 5.85, "grad_norm": 0.69140625, "learning_rate": 0.000454629088760054, "loss": 0.1876, "step": 141170 }, { "epoch": 5.85, "grad_norm": 0.7109375, "learning_rate": 0.00045462285818501037, "loss": 0.1983, "step": 141180 }, { "epoch": 5.85, "grad_norm": 0.64453125, "learning_rate": 0.00045461662722488864, "loss": 0.2403, "step": 141190 }, { "epoch": 5.85, "grad_norm": 0.5234375, "learning_rate": 0.00045461039587970053, "loss": 0.2271, "step": 141200 }, { "epoch": 5.85, "grad_norm": 0.439453125, "learning_rate": 0.0004546041641494577, "loss": 0.2363, "step": 141210 }, { "epoch": 5.85, "grad_norm": 0.466796875, "learning_rate": 0.000454597932034172, "loss": 0.1892, "step": 141220 }, { "epoch": 5.85, "grad_norm": 1.125, "learning_rate": 0.00045459169953385506, "loss": 0.23, "step": 141230 }, { "epoch": 5.85, "grad_norm": 0.7734375, "learning_rate": 0.0004545854666485186, "loss": 0.2418, "step": 141240 }, { "epoch": 5.85, "grad_norm": 0.73828125, "learning_rate": 0.0004545792333781744, "loss": 0.23, "step": 141250 }, { "epoch": 5.85, "grad_norm": 0.54296875, "learning_rate": 0.0004545729997228343, "loss": 0.1702, "step": 141260 }, { "epoch": 5.85, "grad_norm": 0.22265625, "learning_rate": 0.00045456676568250974, "loss": 0.1997, "step": 141270 }, { "epoch": 5.85, "grad_norm": 0.82421875, "learning_rate": 0.0004545605312572127, "loss": 0.1362, "step": 141280 }, { "epoch": 5.85, "grad_norm": 1.2109375, "learning_rate": 0.0004545542964469548, "loss": 0.1822, "step": 141290 }, { "epoch": 5.85, "grad_norm": 0.8671875, "learning_rate": 0.0004545480612517478, "loss": 0.2369, "step": 141300 }, { "epoch": 5.85, "grad_norm": 0.875, "learning_rate": 0.00045454182567160345, "loss": 0.1773, "step": 141310 }, { "epoch": 5.85, "grad_norm": 0.98046875, "learning_rate": 0.0004545355897065335, "loss": 0.2238, "step": 141320 }, { "epoch": 5.85, "grad_norm": 0.60546875, "learning_rate": 0.0004545293533565496, "loss": 0.1843, "step": 141330 }, { "epoch": 5.85, "grad_norm": 0.66015625, "learning_rate": 0.0004545231166216636, "loss": 0.255, "step": 141340 }, { "epoch": 5.85, "grad_norm": 0.3515625, "learning_rate": 0.00045451687950188714, "loss": 0.2097, "step": 141350 }, { "epoch": 5.86, "grad_norm": 0.4609375, "learning_rate": 0.000454510641997232, "loss": 0.2234, "step": 141360 }, { "epoch": 5.86, "grad_norm": 0.46875, "learning_rate": 0.0004545044041077099, "loss": 0.205, "step": 141370 }, { "epoch": 5.86, "grad_norm": 1.0546875, "learning_rate": 0.00045449816583333265, "loss": 0.1979, "step": 141380 }, { "epoch": 5.86, "grad_norm": 0.609375, "learning_rate": 0.0004544919271741119, "loss": 0.2354, "step": 141390 }, { "epoch": 5.86, "grad_norm": 0.5859375, "learning_rate": 0.0004544856881300594, "loss": 0.2403, "step": 141400 }, { "epoch": 5.86, "grad_norm": 0.671875, "learning_rate": 0.00045447944870118703, "loss": 0.1577, "step": 141410 }, { "epoch": 5.86, "grad_norm": 0.64453125, "learning_rate": 0.0004544732088875063, "loss": 0.1696, "step": 141420 }, { "epoch": 5.86, "grad_norm": 1.2109375, "learning_rate": 0.00045446696868902916, "loss": 0.2131, "step": 141430 }, { "epoch": 5.86, "grad_norm": 0.4296875, "learning_rate": 0.0004544607281057672, "loss": 0.236, "step": 141440 }, { "epoch": 5.86, "grad_norm": 1.359375, "learning_rate": 0.0004544544871377323, "loss": 0.2464, "step": 141450 }, { "epoch": 5.86, "grad_norm": 0.30859375, "learning_rate": 0.00045444824578493606, "loss": 0.2456, "step": 141460 }, { "epoch": 5.86, "grad_norm": 0.30078125, "learning_rate": 0.00045444200404739035, "loss": 0.1533, "step": 141470 }, { "epoch": 5.86, "grad_norm": 2.828125, "learning_rate": 0.0004544357619251068, "loss": 0.1728, "step": 141480 }, { "epoch": 5.86, "grad_norm": 0.55078125, "learning_rate": 0.0004544295194180973, "loss": 0.1937, "step": 141490 }, { "epoch": 5.86, "grad_norm": 0.71875, "learning_rate": 0.0004544232765263735, "loss": 0.1932, "step": 141500 }, { "epoch": 5.86, "grad_norm": 1.0, "learning_rate": 0.0004544170332499471, "loss": 0.2124, "step": 141510 }, { "epoch": 5.86, "grad_norm": 0.1845703125, "learning_rate": 0.00045441078958882996, "loss": 0.1956, "step": 141520 }, { "epoch": 5.86, "grad_norm": 1.4921875, "learning_rate": 0.0004544045455430338, "loss": 0.2891, "step": 141530 }, { "epoch": 5.86, "grad_norm": 0.63671875, "learning_rate": 0.00045439830111257043, "loss": 0.2017, "step": 141540 }, { "epoch": 5.86, "grad_norm": 0.9765625, "learning_rate": 0.0004543920562974514, "loss": 0.2516, "step": 141550 }, { "epoch": 5.86, "grad_norm": 0.8125, "learning_rate": 0.00045438581109768865, "loss": 0.2391, "step": 141560 }, { "epoch": 5.86, "grad_norm": 0.79296875, "learning_rate": 0.0004543795655132939, "loss": 0.2107, "step": 141570 }, { "epoch": 5.86, "grad_norm": 0.26953125, "learning_rate": 0.00045437331954427884, "loss": 0.2053, "step": 141580 }, { "epoch": 5.86, "grad_norm": 0.7109375, "learning_rate": 0.0004543670731906553, "loss": 0.217, "step": 141590 }, { "epoch": 5.87, "grad_norm": 1.4296875, "learning_rate": 0.00045436082645243503, "loss": 0.2657, "step": 141600 }, { "epoch": 5.87, "grad_norm": 0.36328125, "learning_rate": 0.00045435457932962965, "loss": 0.214, "step": 141610 }, { "epoch": 5.87, "grad_norm": 0.9921875, "learning_rate": 0.0004543483318222511, "loss": 0.1939, "step": 141620 }, { "epoch": 5.87, "grad_norm": 0.7421875, "learning_rate": 0.00045434208393031105, "loss": 0.2084, "step": 141630 }, { "epoch": 5.87, "grad_norm": 0.72265625, "learning_rate": 0.0004543358356538212, "loss": 0.1973, "step": 141640 }, { "epoch": 5.87, "grad_norm": 0.5859375, "learning_rate": 0.00045432958699279346, "loss": 0.1994, "step": 141650 }, { "epoch": 5.87, "grad_norm": 1.0234375, "learning_rate": 0.00045432333794723944, "loss": 0.1978, "step": 141660 }, { "epoch": 5.87, "grad_norm": 0.6875, "learning_rate": 0.000454317088517171, "loss": 0.2665, "step": 141670 }, { "epoch": 5.87, "grad_norm": 0.62890625, "learning_rate": 0.0004543108387025998, "loss": 0.2555, "step": 141680 }, { "epoch": 5.87, "grad_norm": 0.55078125, "learning_rate": 0.0004543045885035377, "loss": 0.2066, "step": 141690 }, { "epoch": 5.87, "grad_norm": 0.98046875, "learning_rate": 0.0004542983379199965, "loss": 0.1539, "step": 141700 }, { "epoch": 5.87, "grad_norm": 0.4296875, "learning_rate": 0.0004542920869519879, "loss": 0.2178, "step": 141710 }, { "epoch": 5.87, "grad_norm": 0.68359375, "learning_rate": 0.0004542858355995235, "loss": 0.2334, "step": 141720 }, { "epoch": 5.87, "grad_norm": 0.453125, "learning_rate": 0.00045427958386261535, "loss": 0.2465, "step": 141730 }, { "epoch": 5.87, "grad_norm": 0.91796875, "learning_rate": 0.00045427333174127507, "loss": 0.2276, "step": 141740 }, { "epoch": 5.87, "grad_norm": 0.61328125, "learning_rate": 0.00045426707923551437, "loss": 0.2152, "step": 141750 }, { "epoch": 5.87, "grad_norm": 0.5703125, "learning_rate": 0.00045426082634534513, "loss": 0.1605, "step": 141760 }, { "epoch": 5.87, "grad_norm": 0.412109375, "learning_rate": 0.00045425457307077905, "loss": 0.1844, "step": 141770 }, { "epoch": 5.87, "grad_norm": 1.46875, "learning_rate": 0.0004542483194118279, "loss": 0.2034, "step": 141780 }, { "epoch": 5.87, "grad_norm": 0.859375, "learning_rate": 0.0004542420653685035, "loss": 0.2204, "step": 141790 }, { "epoch": 5.87, "grad_norm": 0.51171875, "learning_rate": 0.0004542358109408177, "loss": 0.1547, "step": 141800 }, { "epoch": 5.87, "grad_norm": 0.7265625, "learning_rate": 0.00045422955612878203, "loss": 0.1924, "step": 141810 }, { "epoch": 5.87, "grad_norm": 0.9921875, "learning_rate": 0.0004542233009324084, "loss": 0.2051, "step": 141820 }, { "epoch": 5.87, "grad_norm": 0.6640625, "learning_rate": 0.0004542170453517086, "loss": 0.1739, "step": 141830 }, { "epoch": 5.87, "grad_norm": 0.7109375, "learning_rate": 0.00045421078938669433, "loss": 0.1588, "step": 141840 }, { "epoch": 5.88, "grad_norm": 0.87890625, "learning_rate": 0.0004542045330373774, "loss": 0.2202, "step": 141850 }, { "epoch": 5.88, "grad_norm": 1.4375, "learning_rate": 0.00045419827630376964, "loss": 0.2615, "step": 141860 }, { "epoch": 5.88, "grad_norm": 0.90234375, "learning_rate": 0.0004541920191858828, "loss": 0.1951, "step": 141870 }, { "epoch": 5.88, "grad_norm": 0.70703125, "learning_rate": 0.0004541857616837286, "loss": 0.2012, "step": 141880 }, { "epoch": 5.88, "grad_norm": 0.96875, "learning_rate": 0.0004541795037973189, "loss": 0.1686, "step": 141890 }, { "epoch": 5.88, "grad_norm": 0.5625, "learning_rate": 0.0004541732455266653, "loss": 0.2054, "step": 141900 }, { "epoch": 5.88, "grad_norm": 0.53515625, "learning_rate": 0.00045416698687177976, "loss": 0.218, "step": 141910 }, { "epoch": 5.88, "grad_norm": 1.6484375, "learning_rate": 0.000454160727832674, "loss": 0.2512, "step": 141920 }, { "epoch": 5.88, "grad_norm": 0.69921875, "learning_rate": 0.0004541544684093598, "loss": 0.1907, "step": 141930 }, { "epoch": 5.88, "grad_norm": 0.466796875, "learning_rate": 0.000454148208601849, "loss": 0.221, "step": 141940 }, { "epoch": 5.88, "grad_norm": 0.46875, "learning_rate": 0.00045414194841015323, "loss": 0.1713, "step": 141950 }, { "epoch": 5.88, "grad_norm": 0.9375, "learning_rate": 0.00045413568783428436, "loss": 0.2024, "step": 141960 }, { "epoch": 5.88, "grad_norm": 1.0625, "learning_rate": 0.00045412942687425426, "loss": 0.2371, "step": 141970 }, { "epoch": 5.88, "grad_norm": 0.2470703125, "learning_rate": 0.0004541231655300745, "loss": 0.1794, "step": 141980 }, { "epoch": 5.88, "grad_norm": 0.578125, "learning_rate": 0.0004541169038017571, "loss": 0.2074, "step": 141990 }, { "epoch": 5.88, "grad_norm": 0.7265625, "learning_rate": 0.00045411064168931364, "loss": 0.2305, "step": 142000 }, { "epoch": 5.88, "grad_norm": 0.337890625, "learning_rate": 0.00045410437919275607, "loss": 0.1912, "step": 142010 }, { "epoch": 5.88, "grad_norm": 0.67578125, "learning_rate": 0.0004540981163120961, "loss": 0.2, "step": 142020 }, { "epoch": 5.88, "grad_norm": 0.82421875, "learning_rate": 0.0004540918530473454, "loss": 0.2052, "step": 142030 }, { "epoch": 5.88, "grad_norm": 1.203125, "learning_rate": 0.00045408558939851596, "loss": 0.2086, "step": 142040 }, { "epoch": 5.88, "grad_norm": 1.109375, "learning_rate": 0.00045407932536561946, "loss": 0.229, "step": 142050 }, { "epoch": 5.88, "grad_norm": 1.03125, "learning_rate": 0.00045407306094866776, "loss": 0.194, "step": 142060 }, { "epoch": 5.88, "grad_norm": 1.0, "learning_rate": 0.00045406679614767257, "loss": 0.2097, "step": 142070 }, { "epoch": 5.88, "grad_norm": 1.1328125, "learning_rate": 0.00045406053096264565, "loss": 0.1958, "step": 142080 }, { "epoch": 5.89, "grad_norm": 0.66015625, "learning_rate": 0.0004540542653935988, "loss": 0.1959, "step": 142090 }, { "epoch": 5.89, "grad_norm": 0.62109375, "learning_rate": 0.00045404799944054395, "loss": 0.2348, "step": 142100 }, { "epoch": 5.89, "grad_norm": 0.62109375, "learning_rate": 0.0004540417331034928, "loss": 0.1989, "step": 142110 }, { "epoch": 5.89, "grad_norm": 1.234375, "learning_rate": 0.00045403546638245716, "loss": 0.1999, "step": 142120 }, { "epoch": 5.89, "grad_norm": 0.486328125, "learning_rate": 0.00045402919927744887, "loss": 0.266, "step": 142130 }, { "epoch": 5.89, "grad_norm": 0.6328125, "learning_rate": 0.00045402293178847954, "loss": 0.2185, "step": 142140 }, { "epoch": 5.89, "grad_norm": 0.53125, "learning_rate": 0.0004540166639155611, "loss": 0.1997, "step": 142150 }, { "epoch": 5.89, "grad_norm": 0.85546875, "learning_rate": 0.00045401039565870537, "loss": 0.2059, "step": 142160 }, { "epoch": 5.89, "grad_norm": 0.7890625, "learning_rate": 0.0004540041270179241, "loss": 0.2118, "step": 142170 }, { "epoch": 5.89, "grad_norm": 1.4140625, "learning_rate": 0.0004539978579932291, "loss": 0.2229, "step": 142180 }, { "epoch": 5.89, "grad_norm": 1.328125, "learning_rate": 0.00045399158858463215, "loss": 0.1633, "step": 142190 }, { "epoch": 5.89, "grad_norm": 0.447265625, "learning_rate": 0.000453985318792145, "loss": 0.2428, "step": 142200 }, { "epoch": 5.89, "grad_norm": 2.421875, "learning_rate": 0.0004539790486157796, "loss": 0.241, "step": 142210 }, { "epoch": 5.89, "grad_norm": 0.91796875, "learning_rate": 0.0004539727780555476, "loss": 0.2349, "step": 142220 }, { "epoch": 5.89, "grad_norm": 1.109375, "learning_rate": 0.00045396650711146093, "loss": 0.1772, "step": 142230 }, { "epoch": 5.89, "grad_norm": 1.3515625, "learning_rate": 0.0004539602357835312, "loss": 0.1881, "step": 142240 }, { "epoch": 5.89, "grad_norm": 0.6328125, "learning_rate": 0.00045395396407177044, "loss": 0.2337, "step": 142250 }, { "epoch": 5.89, "grad_norm": 0.99609375, "learning_rate": 0.0004539476919761903, "loss": 0.223, "step": 142260 }, { "epoch": 5.89, "grad_norm": 0.9453125, "learning_rate": 0.0004539414194968026, "loss": 0.1788, "step": 142270 }, { "epoch": 5.89, "grad_norm": 0.87890625, "learning_rate": 0.00045393514663361924, "loss": 0.1965, "step": 142280 }, { "epoch": 5.89, "grad_norm": 0.60546875, "learning_rate": 0.0004539288733866519, "loss": 0.2336, "step": 142290 }, { "epoch": 5.89, "grad_norm": 0.9765625, "learning_rate": 0.0004539225997559124, "loss": 0.2025, "step": 142300 }, { "epoch": 5.89, "grad_norm": 0.404296875, "learning_rate": 0.00045391632574141275, "loss": 0.2125, "step": 142310 }, { "epoch": 5.89, "grad_norm": 0.79296875, "learning_rate": 0.0004539100513431645, "loss": 0.2268, "step": 142320 }, { "epoch": 5.9, "grad_norm": 0.296875, "learning_rate": 0.00045390377656117953, "loss": 0.2543, "step": 142330 }, { "epoch": 5.9, "grad_norm": 0.490234375, "learning_rate": 0.0004538975013954697, "loss": 0.2116, "step": 142340 }, { "epoch": 5.9, "grad_norm": 0.99609375, "learning_rate": 0.00045389122584604683, "loss": 0.1906, "step": 142350 }, { "epoch": 5.9, "grad_norm": 1.1484375, "learning_rate": 0.0004538849499129226, "loss": 0.2637, "step": 142360 }, { "epoch": 5.9, "grad_norm": 0.859375, "learning_rate": 0.00045387867359610897, "loss": 0.1842, "step": 142370 }, { "epoch": 5.9, "grad_norm": 0.83984375, "learning_rate": 0.00045387239689561763, "loss": 0.2054, "step": 142380 }, { "epoch": 5.9, "grad_norm": 0.388671875, "learning_rate": 0.0004538661198114606, "loss": 0.1621, "step": 142390 }, { "epoch": 5.9, "grad_norm": 0.1845703125, "learning_rate": 0.0004538598423436494, "loss": 0.2204, "step": 142400 }, { "epoch": 5.9, "grad_norm": 0.9296875, "learning_rate": 0.00045385356449219604, "loss": 0.2493, "step": 142410 }, { "epoch": 5.9, "grad_norm": 1.25, "learning_rate": 0.00045384728625711237, "loss": 0.1944, "step": 142420 }, { "epoch": 5.9, "grad_norm": 0.69140625, "learning_rate": 0.00045384100763841, "loss": 0.2307, "step": 142430 }, { "epoch": 5.9, "grad_norm": 0.96875, "learning_rate": 0.0004538347286361009, "loss": 0.219, "step": 142440 }, { "epoch": 5.9, "grad_norm": 0.734375, "learning_rate": 0.0004538284492501969, "loss": 0.2456, "step": 142450 }, { "epoch": 5.9, "grad_norm": 0.62109375, "learning_rate": 0.00045382216948070973, "loss": 0.2103, "step": 142460 }, { "epoch": 5.9, "grad_norm": 0.7734375, "learning_rate": 0.0004538158893276513, "loss": 0.2026, "step": 142470 }, { "epoch": 5.9, "grad_norm": 0.70703125, "learning_rate": 0.00045380960879103327, "loss": 0.247, "step": 142480 }, { "epoch": 5.9, "grad_norm": 0.5234375, "learning_rate": 0.00045380332787086763, "loss": 0.2112, "step": 142490 }, { "epoch": 5.9, "grad_norm": 0.498046875, "learning_rate": 0.0004537970465671661, "loss": 0.2303, "step": 142500 }, { "epoch": 5.9, "grad_norm": 0.59375, "learning_rate": 0.00045379076487994067, "loss": 0.2164, "step": 142510 }, { "epoch": 5.9, "grad_norm": 0.53515625, "learning_rate": 0.0004537844828092029, "loss": 0.1903, "step": 142520 }, { "epoch": 5.9, "grad_norm": 0.69140625, "learning_rate": 0.0004537782003549648, "loss": 0.2048, "step": 142530 }, { "epoch": 5.9, "grad_norm": 0.37890625, "learning_rate": 0.000453771917517238, "loss": 0.2428, "step": 142540 }, { "epoch": 5.9, "grad_norm": 1.3984375, "learning_rate": 0.00045376563429603455, "loss": 0.1883, "step": 142550 }, { "epoch": 5.9, "grad_norm": 1.6484375, "learning_rate": 0.0004537593506913662, "loss": 0.1975, "step": 142560 }, { "epoch": 5.91, "grad_norm": 0.298828125, "learning_rate": 0.0004537530667032448, "loss": 0.2267, "step": 142570 }, { "epoch": 5.91, "grad_norm": 1.6796875, "learning_rate": 0.00045374678233168206, "loss": 0.2259, "step": 142580 }, { "epoch": 5.91, "grad_norm": 0.58984375, "learning_rate": 0.00045374049757668987, "loss": 0.2381, "step": 142590 }, { "epoch": 5.91, "grad_norm": 0.63671875, "learning_rate": 0.0004537342124382801, "loss": 0.181, "step": 142600 }, { "epoch": 5.91, "grad_norm": 0.40234375, "learning_rate": 0.00045372792691646455, "loss": 0.1875, "step": 142610 }, { "epoch": 5.91, "grad_norm": 0.3515625, "learning_rate": 0.000453721641011255, "loss": 0.1469, "step": 142620 }, { "epoch": 5.91, "grad_norm": 0.91015625, "learning_rate": 0.00045371535472266334, "loss": 0.2038, "step": 142630 }, { "epoch": 5.91, "grad_norm": 0.62890625, "learning_rate": 0.00045370906805070147, "loss": 0.1996, "step": 142640 }, { "epoch": 5.91, "grad_norm": 1.09375, "learning_rate": 0.000453702780995381, "loss": 0.1957, "step": 142650 }, { "epoch": 5.91, "grad_norm": 0.50390625, "learning_rate": 0.00045369649355671396, "loss": 0.1904, "step": 142660 }, { "epoch": 5.91, "grad_norm": 0.67578125, "learning_rate": 0.0004536902057347121, "loss": 0.2175, "step": 142670 }, { "epoch": 5.91, "grad_norm": 0.5546875, "learning_rate": 0.00045368391752938724, "loss": 0.1643, "step": 142680 }, { "epoch": 5.91, "grad_norm": 0.921875, "learning_rate": 0.00045367762894075125, "loss": 0.2277, "step": 142690 }, { "epoch": 5.91, "grad_norm": 0.75390625, "learning_rate": 0.00045367133996881607, "loss": 0.2014, "step": 142700 }, { "epoch": 5.91, "grad_norm": 0.69140625, "learning_rate": 0.0004536650506135933, "loss": 0.1929, "step": 142710 }, { "epoch": 5.91, "grad_norm": 2.84375, "learning_rate": 0.00045365876087509493, "loss": 0.2045, "step": 142720 }, { "epoch": 5.91, "grad_norm": 1.4453125, "learning_rate": 0.0004536524707533327, "loss": 0.1778, "step": 142730 }, { "epoch": 5.91, "grad_norm": 1.25, "learning_rate": 0.0004536461802483186, "loss": 0.2075, "step": 142740 }, { "epoch": 5.91, "grad_norm": 0.64453125, "learning_rate": 0.0004536398893600644, "loss": 0.216, "step": 142750 }, { "epoch": 5.91, "grad_norm": 0.98828125, "learning_rate": 0.00045363359808858186, "loss": 0.1723, "step": 142760 }, { "epoch": 5.91, "grad_norm": 0.5546875, "learning_rate": 0.00045362730643388295, "loss": 0.1863, "step": 142770 }, { "epoch": 5.91, "grad_norm": 0.1328125, "learning_rate": 0.00045362101439597934, "loss": 0.1908, "step": 142780 }, { "epoch": 5.91, "grad_norm": 0.31640625, "learning_rate": 0.00045361472197488306, "loss": 0.2716, "step": 142790 }, { "epoch": 5.91, "grad_norm": 0.98046875, "learning_rate": 0.0004536084291706058, "loss": 0.231, "step": 142800 }, { "epoch": 5.92, "grad_norm": 0.9453125, "learning_rate": 0.00045360213598315946, "loss": 0.2043, "step": 142810 }, { "epoch": 5.92, "grad_norm": 0.2041015625, "learning_rate": 0.00045359584241255594, "loss": 0.209, "step": 142820 }, { "epoch": 5.92, "grad_norm": 0.58203125, "learning_rate": 0.000453589548458807, "loss": 0.1973, "step": 142830 }, { "epoch": 5.92, "grad_norm": 0.9375, "learning_rate": 0.00045358325412192454, "loss": 0.2664, "step": 142840 }, { "epoch": 5.92, "grad_norm": 0.515625, "learning_rate": 0.00045357695940192034, "loss": 0.264, "step": 142850 }, { "epoch": 5.92, "grad_norm": 0.73828125, "learning_rate": 0.0004535706642988063, "loss": 0.2004, "step": 142860 }, { "epoch": 5.92, "grad_norm": 0.404296875, "learning_rate": 0.0004535643688125943, "loss": 0.2074, "step": 142870 }, { "epoch": 5.92, "grad_norm": 0.443359375, "learning_rate": 0.0004535580729432961, "loss": 0.1781, "step": 142880 }, { "epoch": 5.92, "grad_norm": 0.7421875, "learning_rate": 0.00045355177669092355, "loss": 0.2067, "step": 142890 }, { "epoch": 5.92, "grad_norm": 0.392578125, "learning_rate": 0.00045354548005548855, "loss": 0.195, "step": 142900 }, { "epoch": 5.92, "grad_norm": 0.283203125, "learning_rate": 0.000453539183037003, "loss": 0.2007, "step": 142910 }, { "epoch": 5.92, "grad_norm": 0.4453125, "learning_rate": 0.00045353288563547867, "loss": 0.1622, "step": 142920 }, { "epoch": 5.92, "grad_norm": 0.8203125, "learning_rate": 0.0004535265878509274, "loss": 0.2296, "step": 142930 }, { "epoch": 5.92, "grad_norm": 0.953125, "learning_rate": 0.00045352028968336113, "loss": 0.1949, "step": 142940 }, { "epoch": 5.92, "grad_norm": 0.9375, "learning_rate": 0.00045351399113279157, "loss": 0.1999, "step": 142950 }, { "epoch": 5.92, "grad_norm": 0.64453125, "learning_rate": 0.00045350769219923073, "loss": 0.1613, "step": 142960 }, { "epoch": 5.92, "grad_norm": 0.3671875, "learning_rate": 0.00045350139288269034, "loss": 0.2016, "step": 142970 }, { "epoch": 5.92, "grad_norm": 0.47265625, "learning_rate": 0.00045349509318318225, "loss": 0.2124, "step": 142980 }, { "epoch": 5.92, "grad_norm": 0.7890625, "learning_rate": 0.00045348879310071845, "loss": 0.2191, "step": 142990 }, { "epoch": 5.92, "grad_norm": 0.671875, "learning_rate": 0.0004534824926353107, "loss": 0.1965, "step": 143000 }, { "epoch": 5.92, "grad_norm": 0.388671875, "learning_rate": 0.00045347619178697093, "loss": 0.1693, "step": 143010 }, { "epoch": 5.92, "grad_norm": 1.8046875, "learning_rate": 0.00045346989055571085, "loss": 0.2409, "step": 143020 }, { "epoch": 5.92, "grad_norm": 0.6484375, "learning_rate": 0.0004534635889415425, "loss": 0.2116, "step": 143030 }, { "epoch": 5.92, "grad_norm": 0.6796875, "learning_rate": 0.0004534572869444775, "loss": 0.2449, "step": 143040 }, { "epoch": 5.93, "grad_norm": 0.185546875, "learning_rate": 0.000453450984564528, "loss": 0.2342, "step": 143050 }, { "epoch": 5.93, "grad_norm": 0.63671875, "learning_rate": 0.00045344468180170565, "loss": 0.1783, "step": 143060 }, { "epoch": 5.93, "grad_norm": 2.609375, "learning_rate": 0.00045343837865602246, "loss": 0.2074, "step": 143070 }, { "epoch": 5.93, "grad_norm": 0.625, "learning_rate": 0.00045343207512749014, "loss": 0.1559, "step": 143080 }, { "epoch": 5.93, "grad_norm": 1.015625, "learning_rate": 0.0004534257712161206, "loss": 0.2014, "step": 143090 }, { "epoch": 5.93, "grad_norm": 1.09375, "learning_rate": 0.00045341946692192576, "loss": 0.204, "step": 143100 }, { "epoch": 5.93, "grad_norm": 1.2734375, "learning_rate": 0.0004534131622449175, "loss": 0.1675, "step": 143110 }, { "epoch": 5.93, "grad_norm": 1.15625, "learning_rate": 0.00045340685718510756, "loss": 0.2144, "step": 143120 }, { "epoch": 5.93, "grad_norm": 1.0703125, "learning_rate": 0.00045340055174250787, "loss": 0.2159, "step": 143130 }, { "epoch": 5.93, "grad_norm": 1.1171875, "learning_rate": 0.0004533942459171304, "loss": 0.183, "step": 143140 }, { "epoch": 5.93, "grad_norm": 0.55859375, "learning_rate": 0.0004533879397089868, "loss": 0.262, "step": 143150 }, { "epoch": 5.93, "grad_norm": 0.21875, "learning_rate": 0.00045338163311808914, "loss": 0.2667, "step": 143160 }, { "epoch": 5.93, "grad_norm": 0.71875, "learning_rate": 0.00045337532614444923, "loss": 0.2442, "step": 143170 }, { "epoch": 5.93, "grad_norm": 0.59375, "learning_rate": 0.0004533690187880789, "loss": 0.1769, "step": 143180 }, { "epoch": 5.93, "grad_norm": 0.46484375, "learning_rate": 0.00045336271104899, "loss": 0.2142, "step": 143190 }, { "epoch": 5.93, "grad_norm": 0.76171875, "learning_rate": 0.0004533564029271945, "loss": 0.2109, "step": 143200 }, { "epoch": 5.93, "grad_norm": 0.87890625, "learning_rate": 0.0004533500944227041, "loss": 0.1786, "step": 143210 }, { "epoch": 5.93, "grad_norm": 0.9296875, "learning_rate": 0.0004533437855355309, "loss": 0.2158, "step": 143220 }, { "epoch": 5.93, "grad_norm": 0.6796875, "learning_rate": 0.00045333747626568667, "loss": 0.2079, "step": 143230 }, { "epoch": 5.93, "grad_norm": 1.078125, "learning_rate": 0.00045333116661318317, "loss": 0.2131, "step": 143240 }, { "epoch": 5.93, "grad_norm": 0.6171875, "learning_rate": 0.0004533248565780324, "loss": 0.2381, "step": 143250 }, { "epoch": 5.93, "grad_norm": 1.1796875, "learning_rate": 0.00045331854616024623, "loss": 0.1542, "step": 143260 }, { "epoch": 5.93, "grad_norm": 0.859375, "learning_rate": 0.00045331223535983653, "loss": 0.2394, "step": 143270 }, { "epoch": 5.93, "grad_norm": 0.65625, "learning_rate": 0.0004533059241768151, "loss": 0.1848, "step": 143280 }, { "epoch": 5.94, "grad_norm": 0.67578125, "learning_rate": 0.0004532996126111939, "loss": 0.2263, "step": 143290 }, { "epoch": 5.94, "grad_norm": 1.0703125, "learning_rate": 0.0004532933006629848, "loss": 0.2233, "step": 143300 }, { "epoch": 5.94, "grad_norm": 0.55859375, "learning_rate": 0.0004532869883321997, "loss": 0.1892, "step": 143310 }, { "epoch": 5.94, "grad_norm": 1.0546875, "learning_rate": 0.00045328067561885033, "loss": 0.211, "step": 143320 }, { "epoch": 5.94, "grad_norm": 2.09375, "learning_rate": 0.0004532743625229487, "loss": 0.211, "step": 143330 }, { "epoch": 5.94, "grad_norm": 0.484375, "learning_rate": 0.0004532680490445068, "loss": 0.1895, "step": 143340 }, { "epoch": 5.94, "grad_norm": 1.65625, "learning_rate": 0.00045326173518353633, "loss": 0.2534, "step": 143350 }, { "epoch": 5.94, "grad_norm": 1.0078125, "learning_rate": 0.0004532554209400491, "loss": 0.1946, "step": 143360 }, { "epoch": 5.94, "grad_norm": 0.796875, "learning_rate": 0.00045324910631405725, "loss": 0.2381, "step": 143370 }, { "epoch": 5.94, "grad_norm": 1.9921875, "learning_rate": 0.0004532427913055725, "loss": 0.2174, "step": 143380 }, { "epoch": 5.94, "grad_norm": 0.796875, "learning_rate": 0.00045323647591460675, "loss": 0.2075, "step": 143390 }, { "epoch": 5.94, "grad_norm": 0.78515625, "learning_rate": 0.0004532301601411719, "loss": 0.2151, "step": 143400 }, { "epoch": 5.94, "grad_norm": 1.109375, "learning_rate": 0.0004532238439852798, "loss": 0.2078, "step": 143410 }, { "epoch": 5.94, "grad_norm": 0.703125, "learning_rate": 0.00045321752744694247, "loss": 0.174, "step": 143420 }, { "epoch": 5.94, "grad_norm": 0.48046875, "learning_rate": 0.00045321121052617166, "loss": 0.1975, "step": 143430 }, { "epoch": 5.94, "grad_norm": 1.15625, "learning_rate": 0.0004532048932229793, "loss": 0.1669, "step": 143440 }, { "epoch": 5.94, "grad_norm": 0.6484375, "learning_rate": 0.0004531985755373772, "loss": 0.1919, "step": 143450 }, { "epoch": 5.94, "grad_norm": 0.73046875, "learning_rate": 0.0004531922574693774, "loss": 0.1925, "step": 143460 }, { "epoch": 5.94, "grad_norm": 0.44140625, "learning_rate": 0.0004531859390189917, "loss": 0.2014, "step": 143470 }, { "epoch": 5.94, "grad_norm": 0.5546875, "learning_rate": 0.000453179620186232, "loss": 0.2345, "step": 143480 }, { "epoch": 5.94, "grad_norm": 0.412109375, "learning_rate": 0.0004531733009711102, "loss": 0.2191, "step": 143490 }, { "epoch": 5.94, "grad_norm": 0.81640625, "learning_rate": 0.0004531669813736382, "loss": 0.258, "step": 143500 }, { "epoch": 5.94, "grad_norm": 1.5390625, "learning_rate": 0.0004531606613938278, "loss": 0.2146, "step": 143510 }, { "epoch": 5.94, "grad_norm": 2.125, "learning_rate": 0.00045315434103169105, "loss": 0.2301, "step": 143520 }, { "epoch": 5.94, "grad_norm": 0.412109375, "learning_rate": 0.0004531480202872398, "loss": 0.2177, "step": 143530 }, { "epoch": 5.95, "grad_norm": 0.66015625, "learning_rate": 0.00045314169916048586, "loss": 0.2103, "step": 143540 }, { "epoch": 5.95, "grad_norm": 0.59765625, "learning_rate": 0.00045313537765144117, "loss": 0.24, "step": 143550 }, { "epoch": 5.95, "grad_norm": 0.85546875, "learning_rate": 0.0004531290557601177, "loss": 0.2001, "step": 143560 }, { "epoch": 5.95, "grad_norm": 1.375, "learning_rate": 0.00045312273348652724, "loss": 0.2262, "step": 143570 }, { "epoch": 5.95, "grad_norm": 1.0625, "learning_rate": 0.00045311641083068175, "loss": 0.2694, "step": 143580 }, { "epoch": 5.95, "grad_norm": 0.73828125, "learning_rate": 0.00045311008779259313, "loss": 0.1893, "step": 143590 }, { "epoch": 5.95, "grad_norm": 0.52734375, "learning_rate": 0.00045310376437227316, "loss": 0.2044, "step": 143600 }, { "epoch": 5.95, "grad_norm": 0.83984375, "learning_rate": 0.0004530974405697339, "loss": 0.2205, "step": 143610 }, { "epoch": 5.95, "grad_norm": 0.84765625, "learning_rate": 0.00045309111638498724, "loss": 0.2125, "step": 143620 }, { "epoch": 5.95, "grad_norm": 0.65625, "learning_rate": 0.00045308479181804497, "loss": 0.1856, "step": 143630 }, { "epoch": 5.95, "grad_norm": 1.8359375, "learning_rate": 0.000453078466868919, "loss": 0.2191, "step": 143640 }, { "epoch": 5.95, "grad_norm": 0.78125, "learning_rate": 0.0004530721415376213, "loss": 0.2513, "step": 143650 }, { "epoch": 5.95, "grad_norm": 0.97265625, "learning_rate": 0.0004530658158241639, "loss": 0.1925, "step": 143660 }, { "epoch": 5.95, "grad_norm": 0.4375, "learning_rate": 0.0004530594897285585, "loss": 0.2297, "step": 143670 }, { "epoch": 5.95, "grad_norm": 0.37890625, "learning_rate": 0.000453053163250817, "loss": 0.2197, "step": 143680 }, { "epoch": 5.95, "grad_norm": 0.5390625, "learning_rate": 0.0004530468363909514, "loss": 0.2028, "step": 143690 }, { "epoch": 5.95, "grad_norm": 0.5078125, "learning_rate": 0.0004530405091489736, "loss": 0.2105, "step": 143700 }, { "epoch": 5.95, "grad_norm": 1.625, "learning_rate": 0.0004530341815248955, "loss": 0.209, "step": 143710 }, { "epoch": 5.95, "grad_norm": 0.6015625, "learning_rate": 0.000453027853518729, "loss": 0.2123, "step": 143720 }, { "epoch": 5.95, "grad_norm": 0.8046875, "learning_rate": 0.000453021525130486, "loss": 0.2075, "step": 143730 }, { "epoch": 5.95, "grad_norm": 0.53515625, "learning_rate": 0.0004530151963601784, "loss": 0.204, "step": 143740 }, { "epoch": 5.95, "grad_norm": 0.9609375, "learning_rate": 0.0004530088672078181, "loss": 0.2023, "step": 143750 }, { "epoch": 5.95, "grad_norm": 1.53125, "learning_rate": 0.00045300253767341706, "loss": 0.2041, "step": 143760 }, { "epoch": 5.95, "grad_norm": 0.76171875, "learning_rate": 0.0004529962077569871, "loss": 0.2161, "step": 143770 }, { "epoch": 5.96, "grad_norm": 0.515625, "learning_rate": 0.0004529898774585403, "loss": 0.241, "step": 143780 }, { "epoch": 5.96, "grad_norm": 0.5546875, "learning_rate": 0.0004529835467780885, "loss": 0.2389, "step": 143790 }, { "epoch": 5.96, "grad_norm": 0.41796875, "learning_rate": 0.00045297721571564345, "loss": 0.214, "step": 143800 }, { "epoch": 5.96, "grad_norm": 1.140625, "learning_rate": 0.00045297088427121725, "loss": 0.2669, "step": 143810 }, { "epoch": 5.96, "grad_norm": 1.171875, "learning_rate": 0.0004529645524448218, "loss": 0.2183, "step": 143820 }, { "epoch": 5.96, "grad_norm": 0.68359375, "learning_rate": 0.000452958220236469, "loss": 0.1563, "step": 143830 }, { "epoch": 5.96, "grad_norm": 0.70703125, "learning_rate": 0.0004529518876461707, "loss": 0.2018, "step": 143840 }, { "epoch": 5.96, "grad_norm": 0.27734375, "learning_rate": 0.0004529455546739389, "loss": 0.2331, "step": 143850 }, { "epoch": 5.96, "grad_norm": 0.8828125, "learning_rate": 0.0004529392213197855, "loss": 0.2212, "step": 143860 }, { "epoch": 5.96, "grad_norm": 0.40234375, "learning_rate": 0.0004529328875837223, "loss": 0.2478, "step": 143870 }, { "epoch": 5.96, "grad_norm": 0.56640625, "learning_rate": 0.0004529265534657614, "loss": 0.2394, "step": 143880 }, { "epoch": 5.96, "grad_norm": 0.359375, "learning_rate": 0.0004529202189659146, "loss": 0.2276, "step": 143890 }, { "epoch": 5.96, "grad_norm": 0.8125, "learning_rate": 0.00045291388408419387, "loss": 0.2002, "step": 143900 }, { "epoch": 5.96, "grad_norm": 0.859375, "learning_rate": 0.00045290754882061114, "loss": 0.2303, "step": 143910 }, { "epoch": 5.96, "grad_norm": 0.58203125, "learning_rate": 0.0004529012131751783, "loss": 0.2074, "step": 143920 }, { "epoch": 5.96, "grad_norm": 0.455078125, "learning_rate": 0.00045289487714790733, "loss": 0.1695, "step": 143930 }, { "epoch": 5.96, "grad_norm": 0.625, "learning_rate": 0.00045288854073881015, "loss": 0.191, "step": 143940 }, { "epoch": 5.96, "grad_norm": 0.90625, "learning_rate": 0.00045288220394789855, "loss": 0.188, "step": 143950 }, { "epoch": 5.96, "grad_norm": 1.03125, "learning_rate": 0.00045287586677518455, "loss": 0.2027, "step": 143960 }, { "epoch": 5.96, "grad_norm": 0.75, "learning_rate": 0.0004528695292206801, "loss": 0.2251, "step": 143970 }, { "epoch": 5.96, "grad_norm": 1.296875, "learning_rate": 0.00045286319128439714, "loss": 0.2057, "step": 143980 }, { "epoch": 5.96, "grad_norm": 0.78125, "learning_rate": 0.00045285685296634747, "loss": 0.1851, "step": 143990 }, { "epoch": 5.96, "grad_norm": 1.03125, "learning_rate": 0.0004528505142665432, "loss": 0.2754, "step": 144000 }, { "epoch": 5.96, "grad_norm": 0.54296875, "learning_rate": 0.00045284417518499616, "loss": 0.202, "step": 144010 }, { "epoch": 5.97, "grad_norm": 0.55859375, "learning_rate": 0.0004528378357217182, "loss": 0.2056, "step": 144020 }, { "epoch": 5.97, "grad_norm": 1.375, "learning_rate": 0.00045283149587672147, "loss": 0.2075, "step": 144030 }, { "epoch": 5.97, "grad_norm": 0.79296875, "learning_rate": 0.0004528251556500177, "loss": 0.2065, "step": 144040 }, { "epoch": 5.97, "grad_norm": 0.294921875, "learning_rate": 0.00045281881504161885, "loss": 0.2037, "step": 144050 }, { "epoch": 5.97, "grad_norm": 0.92578125, "learning_rate": 0.000452812474051537, "loss": 0.2235, "step": 144060 }, { "epoch": 5.97, "grad_norm": 0.466796875, "learning_rate": 0.00045280613267978387, "loss": 0.2183, "step": 144070 }, { "epoch": 5.97, "grad_norm": 0.5859375, "learning_rate": 0.0004527997909263716, "loss": 0.2042, "step": 144080 }, { "epoch": 5.97, "grad_norm": 0.69140625, "learning_rate": 0.0004527934487913119, "loss": 0.2175, "step": 144090 }, { "epoch": 5.97, "grad_norm": 0.4765625, "learning_rate": 0.00045278710627461694, "loss": 0.1755, "step": 144100 }, { "epoch": 5.97, "grad_norm": 0.51171875, "learning_rate": 0.0004527807633762985, "loss": 0.1724, "step": 144110 }, { "epoch": 5.97, "grad_norm": 1.109375, "learning_rate": 0.0004527744200963685, "loss": 0.2236, "step": 144120 }, { "epoch": 5.97, "grad_norm": 1.4921875, "learning_rate": 0.0004527680764348391, "loss": 0.1753, "step": 144130 }, { "epoch": 5.97, "grad_norm": 0.5546875, "learning_rate": 0.0004527617323917219, "loss": 0.2236, "step": 144140 }, { "epoch": 5.97, "grad_norm": 0.7890625, "learning_rate": 0.00045275538796702916, "loss": 0.217, "step": 144150 }, { "epoch": 5.97, "grad_norm": 0.65625, "learning_rate": 0.0004527490431607726, "loss": 0.2231, "step": 144160 }, { "epoch": 5.97, "grad_norm": 0.3125, "learning_rate": 0.0004527426979729642, "loss": 0.2004, "step": 144170 }, { "epoch": 5.97, "grad_norm": 1.5078125, "learning_rate": 0.00045273635240361597, "loss": 0.2404, "step": 144180 }, { "epoch": 5.97, "grad_norm": 0.55078125, "learning_rate": 0.00045273000645273986, "loss": 0.2358, "step": 144190 }, { "epoch": 5.97, "grad_norm": 1.5625, "learning_rate": 0.0004527236601203477, "loss": 0.1779, "step": 144200 }, { "epoch": 5.97, "grad_norm": 0.34375, "learning_rate": 0.0004527173134064516, "loss": 0.2511, "step": 144210 }, { "epoch": 5.97, "grad_norm": 0.49609375, "learning_rate": 0.00045271096631106333, "loss": 0.2177, "step": 144220 }, { "epoch": 5.97, "grad_norm": 0.8828125, "learning_rate": 0.00045270461883419494, "loss": 0.1986, "step": 144230 }, { "epoch": 5.97, "grad_norm": 0.875, "learning_rate": 0.00045269827097585836, "loss": 0.1649, "step": 144240 }, { "epoch": 5.97, "grad_norm": 0.1982421875, "learning_rate": 0.00045269192273606553, "loss": 0.2095, "step": 144250 }, { "epoch": 5.98, "grad_norm": 1.3203125, "learning_rate": 0.00045268557411482836, "loss": 0.1583, "step": 144260 }, { "epoch": 5.98, "grad_norm": 0.9609375, "learning_rate": 0.00045267922511215883, "loss": 0.2661, "step": 144270 }, { "epoch": 5.98, "grad_norm": 0.48046875, "learning_rate": 0.0004526728757280689, "loss": 0.228, "step": 144280 }, { "epoch": 5.98, "grad_norm": 0.578125, "learning_rate": 0.0004526665259625705, "loss": 0.1962, "step": 144290 }, { "epoch": 5.98, "grad_norm": 0.671875, "learning_rate": 0.0004526601758156755, "loss": 0.2289, "step": 144300 }, { "epoch": 5.98, "grad_norm": 2.28125, "learning_rate": 0.00045265382528739606, "loss": 0.1956, "step": 144310 }, { "epoch": 5.98, "grad_norm": 0.59375, "learning_rate": 0.000452647474377744, "loss": 0.2663, "step": 144320 }, { "epoch": 5.98, "grad_norm": 0.322265625, "learning_rate": 0.00045264112308673123, "loss": 0.2108, "step": 144330 }, { "epoch": 5.98, "grad_norm": 0.353515625, "learning_rate": 0.0004526347714143697, "loss": 0.1956, "step": 144340 }, { "epoch": 5.98, "grad_norm": 0.80859375, "learning_rate": 0.0004526284193606715, "loss": 0.2008, "step": 144350 }, { "epoch": 5.98, "grad_norm": 0.73828125, "learning_rate": 0.00045262206692564847, "loss": 0.1956, "step": 144360 }, { "epoch": 5.98, "grad_norm": 1.0234375, "learning_rate": 0.00045261571410931255, "loss": 0.1887, "step": 144370 }, { "epoch": 5.98, "grad_norm": 0.671875, "learning_rate": 0.0004526093609116758, "loss": 0.1776, "step": 144380 }, { "epoch": 5.98, "grad_norm": 0.87890625, "learning_rate": 0.00045260300733275007, "loss": 0.2245, "step": 144390 }, { "epoch": 5.98, "grad_norm": 0.70703125, "learning_rate": 0.0004525966533725474, "loss": 0.2004, "step": 144400 }, { "epoch": 5.98, "grad_norm": 0.91015625, "learning_rate": 0.0004525902990310797, "loss": 0.161, "step": 144410 }, { "epoch": 5.98, "grad_norm": 0.953125, "learning_rate": 0.00045258394430835894, "loss": 0.2513, "step": 144420 }, { "epoch": 5.98, "grad_norm": 1.46875, "learning_rate": 0.00045257758920439704, "loss": 0.2275, "step": 144430 }, { "epoch": 5.98, "grad_norm": 0.37109375, "learning_rate": 0.00045257123371920596, "loss": 0.1924, "step": 144440 }, { "epoch": 5.98, "grad_norm": 1.734375, "learning_rate": 0.00045256487785279775, "loss": 0.2038, "step": 144450 }, { "epoch": 5.98, "grad_norm": 0.515625, "learning_rate": 0.00045255852160518427, "loss": 0.1745, "step": 144460 }, { "epoch": 5.98, "grad_norm": 0.4453125, "learning_rate": 0.0004525521649763776, "loss": 0.2124, "step": 144470 }, { "epoch": 5.98, "grad_norm": 1.953125, "learning_rate": 0.00045254580796638954, "loss": 0.2096, "step": 144480 }, { "epoch": 5.98, "grad_norm": 0.33984375, "learning_rate": 0.00045253945057523225, "loss": 0.1565, "step": 144490 }, { "epoch": 5.99, "grad_norm": 0.81640625, "learning_rate": 0.00045253309280291756, "loss": 0.1959, "step": 144500 }, { "epoch": 5.99, "grad_norm": 0.466796875, "learning_rate": 0.0004525267346494574, "loss": 0.2074, "step": 144510 }, { "epoch": 5.99, "grad_norm": 0.578125, "learning_rate": 0.00045252037611486385, "loss": 0.2066, "step": 144520 }, { "epoch": 5.99, "grad_norm": 0.53515625, "learning_rate": 0.00045251401719914873, "loss": 0.2028, "step": 144530 }, { "epoch": 5.99, "grad_norm": 0.90625, "learning_rate": 0.00045250765790232425, "loss": 0.2044, "step": 144540 }, { "epoch": 5.99, "grad_norm": 0.5390625, "learning_rate": 0.0004525012982244021, "loss": 0.1803, "step": 144550 }, { "epoch": 5.99, "grad_norm": 0.66796875, "learning_rate": 0.00045249493816539445, "loss": 0.2122, "step": 144560 }, { "epoch": 5.99, "grad_norm": 0.68359375, "learning_rate": 0.00045248857772531314, "loss": 0.1802, "step": 144570 }, { "epoch": 5.99, "grad_norm": 0.95703125, "learning_rate": 0.0004524822169041702, "loss": 0.1955, "step": 144580 }, { "epoch": 5.99, "grad_norm": 1.1171875, "learning_rate": 0.0004524758557019776, "loss": 0.2266, "step": 144590 }, { "epoch": 5.99, "grad_norm": 0.58203125, "learning_rate": 0.0004524694941187474, "loss": 0.1678, "step": 144600 }, { "epoch": 5.99, "grad_norm": 2.203125, "learning_rate": 0.0004524631321544913, "loss": 0.215, "step": 144610 }, { "epoch": 5.99, "grad_norm": 0.4140625, "learning_rate": 0.0004524567698092216, "loss": 0.183, "step": 144620 }, { "epoch": 5.99, "grad_norm": 0.51171875, "learning_rate": 0.00045245040708295005, "loss": 0.2199, "step": 144630 }, { "epoch": 5.99, "grad_norm": 0.640625, "learning_rate": 0.00045244404397568874, "loss": 0.2133, "step": 144640 }, { "epoch": 5.99, "grad_norm": 0.7265625, "learning_rate": 0.00045243768048744957, "loss": 0.2091, "step": 144650 }, { "epoch": 5.99, "grad_norm": 0.52734375, "learning_rate": 0.0004524313166182445, "loss": 0.2231, "step": 144660 }, { "epoch": 5.99, "grad_norm": 1.046875, "learning_rate": 0.0004524249523680857, "loss": 0.2118, "step": 144670 }, { "epoch": 5.99, "grad_norm": 0.94921875, "learning_rate": 0.00045241858773698484, "loss": 0.198, "step": 144680 }, { "epoch": 5.99, "grad_norm": 0.384765625, "learning_rate": 0.00045241222272495406, "loss": 0.1619, "step": 144690 }, { "epoch": 5.99, "grad_norm": 0.361328125, "learning_rate": 0.0004524058573320055, "loss": 0.2008, "step": 144700 }, { "epoch": 5.99, "grad_norm": 0.7578125, "learning_rate": 0.0004523994915581509, "loss": 0.227, "step": 144710 }, { "epoch": 5.99, "grad_norm": 0.70703125, "learning_rate": 0.0004523931254034022, "loss": 0.2396, "step": 144720 }, { "epoch": 5.99, "grad_norm": 0.7578125, "learning_rate": 0.00045238675886777156, "loss": 0.171, "step": 144730 }, { "epoch": 6.0, "grad_norm": 0.60546875, "learning_rate": 0.000452380391951271, "loss": 0.1965, "step": 144740 }, { "epoch": 6.0, "grad_norm": 0.70703125, "learning_rate": 0.00045237402465391223, "loss": 0.1961, "step": 144750 }, { "epoch": 6.0, "grad_norm": 1.0859375, "learning_rate": 0.00045236765697570747, "loss": 0.2172, "step": 144760 }, { "epoch": 6.0, "grad_norm": 0.58203125, "learning_rate": 0.00045236128891666867, "loss": 0.1862, "step": 144770 }, { "epoch": 6.0, "grad_norm": 1.0390625, "learning_rate": 0.00045235492047680776, "loss": 0.253, "step": 144780 }, { "epoch": 6.0, "grad_norm": 1.8359375, "learning_rate": 0.0004523485516561368, "loss": 0.221, "step": 144790 }, { "epoch": 6.0, "grad_norm": 0.7578125, "learning_rate": 0.00045234218245466764, "loss": 0.1805, "step": 144800 }, { "epoch": 6.0, "grad_norm": 1.5703125, "learning_rate": 0.00045233581287241234, "loss": 0.1864, "step": 144810 }, { "epoch": 6.0, "grad_norm": 0.40234375, "learning_rate": 0.0004523294429093829, "loss": 0.1978, "step": 144820 }, { "epoch": 6.0, "grad_norm": 0.66015625, "learning_rate": 0.0004523230725655913, "loss": 0.1748, "step": 144830 }, { "epoch": 6.0, "grad_norm": 0.94140625, "learning_rate": 0.0004523167018410495, "loss": 0.2275, "step": 144840 }, { "epoch": 6.0, "grad_norm": 0.26953125, "learning_rate": 0.00045231033073576954, "loss": 0.2668, "step": 144850 }, { "epoch": 6.0, "grad_norm": 0.65625, "learning_rate": 0.0004523039592497634, "loss": 0.1528, "step": 144860 }, { "epoch": 6.0, "grad_norm": 0.96484375, "learning_rate": 0.000452297587383043, "loss": 0.1482, "step": 144870 }, { "epoch": 6.0, "grad_norm": 1.7109375, "learning_rate": 0.0004522912151356205, "loss": 0.2558, "step": 144880 }, { "epoch": 6.0, "grad_norm": 0.83203125, "learning_rate": 0.00045228484250750767, "loss": 0.192, "step": 144890 }, { "epoch": 6.0, "grad_norm": 1.7578125, "learning_rate": 0.00045227846949871673, "loss": 0.1697, "step": 144900 }, { "epoch": 6.0, "grad_norm": 0.39453125, "learning_rate": 0.0004522720961092595, "loss": 0.1961, "step": 144910 }, { "epoch": 6.0, "grad_norm": 0.5078125, "learning_rate": 0.000452265722339148, "loss": 0.2096, "step": 144920 }, { "epoch": 6.0, "grad_norm": 0.8671875, "learning_rate": 0.0004522593481883942, "loss": 0.1812, "step": 144930 }, { "epoch": 6.0, "grad_norm": 0.7578125, "learning_rate": 0.00045225297365701026, "loss": 0.1799, "step": 144940 }, { "epoch": 6.0, "grad_norm": 0.61328125, "learning_rate": 0.00045224659874500795, "loss": 0.1827, "step": 144950 }, { "epoch": 6.0, "grad_norm": 0.28515625, "learning_rate": 0.00045224022345239945, "loss": 0.2156, "step": 144960 }, { "epoch": 6.0, "grad_norm": 1.1640625, "learning_rate": 0.00045223384777919674, "loss": 0.2106, "step": 144970 }, { "epoch": 6.01, "grad_norm": 0.77734375, "learning_rate": 0.0004522274717254117, "loss": 0.1504, "step": 144980 }, { "epoch": 6.01, "grad_norm": 0.78125, "learning_rate": 0.0004522210952910564, "loss": 0.2457, "step": 144990 }, { "epoch": 6.01, "grad_norm": 0.55078125, "learning_rate": 0.00045221471847614283, "loss": 0.1698, "step": 145000 }, { "epoch": 6.01, "grad_norm": 0.77734375, "learning_rate": 0.000452208341280683, "loss": 0.2101, "step": 145010 }, { "epoch": 6.01, "grad_norm": 0.54296875, "learning_rate": 0.00045220196370468897, "loss": 0.2413, "step": 145020 }, { "epoch": 6.01, "grad_norm": 0.796875, "learning_rate": 0.00045219558574817264, "loss": 0.1854, "step": 145030 }, { "epoch": 6.01, "grad_norm": 0.71875, "learning_rate": 0.000452189207411146, "loss": 0.2583, "step": 145040 }, { "epoch": 6.01, "grad_norm": 0.76171875, "learning_rate": 0.00045218282869362113, "loss": 0.2475, "step": 145050 }, { "epoch": 6.01, "grad_norm": 0.97265625, "learning_rate": 0.00045217644959561013, "loss": 0.2098, "step": 145060 }, { "epoch": 6.01, "grad_norm": 0.63671875, "learning_rate": 0.00045217007011712473, "loss": 0.1898, "step": 145070 }, { "epoch": 6.01, "grad_norm": 0.7109375, "learning_rate": 0.00045216369025817717, "loss": 0.2057, "step": 145080 }, { "epoch": 6.01, "grad_norm": 0.4765625, "learning_rate": 0.0004521573100187794, "loss": 0.2296, "step": 145090 }, { "epoch": 6.01, "grad_norm": 0.2001953125, "learning_rate": 0.0004521509293989433, "loss": 0.2046, "step": 145100 }, { "epoch": 6.01, "grad_norm": 0.7109375, "learning_rate": 0.00045214454839868104, "loss": 0.166, "step": 145110 }, { "epoch": 6.01, "grad_norm": 0.6796875, "learning_rate": 0.0004521381670180046, "loss": 0.2225, "step": 145120 }, { "epoch": 6.01, "grad_norm": 0.439453125, "learning_rate": 0.0004521317852569259, "loss": 0.2289, "step": 145130 }, { "epoch": 6.01, "grad_norm": 0.89453125, "learning_rate": 0.00045212540311545703, "loss": 0.2739, "step": 145140 }, { "epoch": 6.01, "grad_norm": 0.95703125, "learning_rate": 0.00045211902059361, "loss": 0.2, "step": 145150 }, { "epoch": 6.01, "grad_norm": 0.42578125, "learning_rate": 0.00045211263769139677, "loss": 0.1738, "step": 145160 }, { "epoch": 6.01, "grad_norm": 0.51953125, "learning_rate": 0.0004521062544088294, "loss": 0.2204, "step": 145170 }, { "epoch": 6.01, "grad_norm": 0.66796875, "learning_rate": 0.0004520998707459199, "loss": 0.2074, "step": 145180 }, { "epoch": 6.01, "grad_norm": 0.5390625, "learning_rate": 0.00045209348670268026, "loss": 0.2583, "step": 145190 }, { "epoch": 6.01, "grad_norm": 0.384765625, "learning_rate": 0.00045208710227912245, "loss": 0.1286, "step": 145200 }, { "epoch": 6.01, "grad_norm": 0.5234375, "learning_rate": 0.00045208071747525856, "loss": 0.2046, "step": 145210 }, { "epoch": 6.01, "grad_norm": 0.9765625, "learning_rate": 0.0004520743322911006, "loss": 0.1848, "step": 145220 }, { "epoch": 6.02, "grad_norm": 0.90625, "learning_rate": 0.0004520679467266606, "loss": 0.1738, "step": 145230 }, { "epoch": 6.02, "grad_norm": 0.388671875, "learning_rate": 0.00045206156078195047, "loss": 0.2169, "step": 145240 }, { "epoch": 6.02, "grad_norm": 0.58203125, "learning_rate": 0.0004520551744569824, "loss": 0.1755, "step": 145250 }, { "epoch": 6.02, "grad_norm": 0.5546875, "learning_rate": 0.00045204878775176825, "loss": 0.2216, "step": 145260 }, { "epoch": 6.02, "grad_norm": 0.84765625, "learning_rate": 0.00045204240066632016, "loss": 0.2381, "step": 145270 }, { "epoch": 6.02, "grad_norm": 0.67578125, "learning_rate": 0.00045203601320065, "loss": 0.2155, "step": 145280 }, { "epoch": 6.02, "grad_norm": 0.73046875, "learning_rate": 0.00045202962535477, "loss": 0.1933, "step": 145290 }, { "epoch": 6.02, "grad_norm": 0.298828125, "learning_rate": 0.00045202323712869197, "loss": 0.1926, "step": 145300 }, { "epoch": 6.02, "grad_norm": 1.828125, "learning_rate": 0.000452016848522428, "loss": 0.1605, "step": 145310 }, { "epoch": 6.02, "grad_norm": 0.76171875, "learning_rate": 0.0004520104595359902, "loss": 0.1939, "step": 145320 }, { "epoch": 6.02, "grad_norm": 0.7578125, "learning_rate": 0.00045200407016939047, "loss": 0.1695, "step": 145330 }, { "epoch": 6.02, "grad_norm": 0.408203125, "learning_rate": 0.000451997680422641, "loss": 0.1985, "step": 145340 }, { "epoch": 6.02, "grad_norm": 1.0703125, "learning_rate": 0.0004519912902957536, "loss": 0.2093, "step": 145350 }, { "epoch": 6.02, "grad_norm": 0.353515625, "learning_rate": 0.0004519848997887405, "loss": 0.2457, "step": 145360 }, { "epoch": 6.02, "grad_norm": 0.69140625, "learning_rate": 0.0004519785089016135, "loss": 0.2044, "step": 145370 }, { "epoch": 6.02, "grad_norm": 0.41796875, "learning_rate": 0.0004519721176343849, "loss": 0.1874, "step": 145380 }, { "epoch": 6.02, "grad_norm": 0.5859375, "learning_rate": 0.00045196572598706655, "loss": 0.1511, "step": 145390 }, { "epoch": 6.02, "grad_norm": 0.73828125, "learning_rate": 0.00045195933395967045, "loss": 0.18, "step": 145400 }, { "epoch": 6.02, "grad_norm": 0.5, "learning_rate": 0.0004519529415522087, "loss": 0.1799, "step": 145410 }, { "epoch": 6.02, "grad_norm": 1.015625, "learning_rate": 0.00045194654876469335, "loss": 0.2357, "step": 145420 }, { "epoch": 6.02, "grad_norm": 0.5859375, "learning_rate": 0.0004519401555971364, "loss": 0.2674, "step": 145430 }, { "epoch": 6.02, "grad_norm": 0.4609375, "learning_rate": 0.00045193376204954994, "loss": 0.2539, "step": 145440 }, { "epoch": 6.02, "grad_norm": 0.92578125, "learning_rate": 0.0004519273681219459, "loss": 0.2384, "step": 145450 }, { "epoch": 6.02, "grad_norm": 0.42578125, "learning_rate": 0.0004519209738143363, "loss": 0.2105, "step": 145460 }, { "epoch": 6.03, "grad_norm": 0.95703125, "learning_rate": 0.00045191457912673326, "loss": 0.1746, "step": 145470 }, { "epoch": 6.03, "grad_norm": 0.59765625, "learning_rate": 0.0004519081840591489, "loss": 0.2125, "step": 145480 }, { "epoch": 6.03, "grad_norm": 0.3671875, "learning_rate": 0.000451901788611595, "loss": 0.1828, "step": 145490 }, { "epoch": 6.03, "grad_norm": 0.421875, "learning_rate": 0.00045189539278408386, "loss": 0.193, "step": 145500 }, { "epoch": 6.03, "grad_norm": 0.279296875, "learning_rate": 0.00045188899657662727, "loss": 0.1993, "step": 145510 }, { "epoch": 6.03, "grad_norm": 0.70703125, "learning_rate": 0.00045188259998923746, "loss": 0.1773, "step": 145520 }, { "epoch": 6.03, "grad_norm": 0.42578125, "learning_rate": 0.0004518762030219264, "loss": 0.2174, "step": 145530 }, { "epoch": 6.03, "grad_norm": 0.515625, "learning_rate": 0.00045186980567470613, "loss": 0.1779, "step": 145540 }, { "epoch": 6.03, "grad_norm": 0.55078125, "learning_rate": 0.0004518634079475887, "loss": 0.2354, "step": 145550 }, { "epoch": 6.03, "grad_norm": 1.15625, "learning_rate": 0.0004518570098405861, "loss": 0.1774, "step": 145560 }, { "epoch": 6.03, "grad_norm": 0.44140625, "learning_rate": 0.0004518506113537104, "loss": 0.2192, "step": 145570 }, { "epoch": 6.03, "grad_norm": 0.83984375, "learning_rate": 0.0004518442124869737, "loss": 0.2021, "step": 145580 }, { "epoch": 6.03, "grad_norm": 0.8359375, "learning_rate": 0.0004518378132403879, "loss": 0.223, "step": 145590 }, { "epoch": 6.03, "grad_norm": 0.369140625, "learning_rate": 0.00045183141361396516, "loss": 0.2102, "step": 145600 }, { "epoch": 6.03, "grad_norm": 1.0390625, "learning_rate": 0.00045182501360771754, "loss": 0.2071, "step": 145610 }, { "epoch": 6.03, "grad_norm": 0.58203125, "learning_rate": 0.00045181861322165704, "loss": 0.1873, "step": 145620 }, { "epoch": 6.03, "grad_norm": 0.60546875, "learning_rate": 0.0004518122124557957, "loss": 0.2251, "step": 145630 }, { "epoch": 6.03, "grad_norm": 0.546875, "learning_rate": 0.00045180581131014553, "loss": 0.1827, "step": 145640 }, { "epoch": 6.03, "grad_norm": 1.421875, "learning_rate": 0.0004517994097847186, "loss": 0.2281, "step": 145650 }, { "epoch": 6.03, "grad_norm": 0.7890625, "learning_rate": 0.00045179300787952703, "loss": 0.2553, "step": 145660 }, { "epoch": 6.03, "grad_norm": 1.0625, "learning_rate": 0.0004517866055945828, "loss": 0.1943, "step": 145670 }, { "epoch": 6.03, "grad_norm": 0.515625, "learning_rate": 0.00045178020292989797, "loss": 0.1643, "step": 145680 }, { "epoch": 6.03, "grad_norm": 1.5703125, "learning_rate": 0.00045177379988548455, "loss": 0.236, "step": 145690 }, { "epoch": 6.03, "grad_norm": 0.671875, "learning_rate": 0.0004517673964613547, "loss": 0.1583, "step": 145700 }, { "epoch": 6.04, "grad_norm": 0.302734375, "learning_rate": 0.00045176099265752036, "loss": 0.223, "step": 145710 }, { "epoch": 6.04, "grad_norm": 0.50390625, "learning_rate": 0.0004517545884739936, "loss": 0.2277, "step": 145720 }, { "epoch": 6.04, "grad_norm": 0.83984375, "learning_rate": 0.0004517481839107865, "loss": 0.2201, "step": 145730 }, { "epoch": 6.04, "grad_norm": 0.76953125, "learning_rate": 0.00045174177896791114, "loss": 0.1861, "step": 145740 }, { "epoch": 6.04, "grad_norm": 0.9609375, "learning_rate": 0.0004517353736453795, "loss": 0.1996, "step": 145750 }, { "epoch": 6.04, "grad_norm": 0.828125, "learning_rate": 0.00045172896794320365, "loss": 0.1949, "step": 145760 }, { "epoch": 6.04, "grad_norm": 0.51953125, "learning_rate": 0.00045172256186139573, "loss": 0.1712, "step": 145770 }, { "epoch": 6.04, "grad_norm": 0.859375, "learning_rate": 0.0004517161553999677, "loss": 0.1879, "step": 145780 }, { "epoch": 6.04, "grad_norm": 0.8671875, "learning_rate": 0.00045170974855893165, "loss": 0.2586, "step": 145790 }, { "epoch": 6.04, "grad_norm": 0.921875, "learning_rate": 0.0004517033413382996, "loss": 0.272, "step": 145800 }, { "epoch": 6.04, "grad_norm": 0.65234375, "learning_rate": 0.0004516969337380837, "loss": 0.2032, "step": 145810 }, { "epoch": 6.04, "grad_norm": 1.1015625, "learning_rate": 0.00045169052575829593, "loss": 0.1794, "step": 145820 }, { "epoch": 6.04, "grad_norm": 0.6015625, "learning_rate": 0.0004516841173989484, "loss": 0.1701, "step": 145830 }, { "epoch": 6.04, "grad_norm": 1.125, "learning_rate": 0.0004516777086600531, "loss": 0.2191, "step": 145840 }, { "epoch": 6.04, "grad_norm": 0.67578125, "learning_rate": 0.0004516712995416222, "loss": 0.2333, "step": 145850 }, { "epoch": 6.04, "grad_norm": 0.51171875, "learning_rate": 0.0004516648900436676, "loss": 0.1968, "step": 145860 }, { "epoch": 6.04, "grad_norm": 0.81640625, "learning_rate": 0.00045165848016620147, "loss": 0.1755, "step": 145870 }, { "epoch": 6.04, "grad_norm": 0.90234375, "learning_rate": 0.00045165206990923593, "loss": 0.2107, "step": 145880 }, { "epoch": 6.04, "grad_norm": 0.59375, "learning_rate": 0.0004516456592727829, "loss": 0.2255, "step": 145890 }, { "epoch": 6.04, "grad_norm": 0.77734375, "learning_rate": 0.0004516392482568545, "loss": 0.2073, "step": 145900 }, { "epoch": 6.04, "grad_norm": 0.68359375, "learning_rate": 0.00045163283686146293, "loss": 0.1802, "step": 145910 }, { "epoch": 6.04, "grad_norm": 0.86328125, "learning_rate": 0.00045162642508662, "loss": 0.2322, "step": 145920 }, { "epoch": 6.04, "grad_norm": 0.4609375, "learning_rate": 0.000451620012932338, "loss": 0.2296, "step": 145930 }, { "epoch": 6.04, "grad_norm": 0.68359375, "learning_rate": 0.0004516136003986289, "loss": 0.2113, "step": 145940 }, { "epoch": 6.05, "grad_norm": 0.625, "learning_rate": 0.00045160718748550474, "loss": 0.2059, "step": 145950 }, { "epoch": 6.05, "grad_norm": 1.015625, "learning_rate": 0.0004516007741929776, "loss": 0.2351, "step": 145960 }, { "epoch": 6.05, "grad_norm": 1.0234375, "learning_rate": 0.0004515943605210596, "loss": 0.1591, "step": 145970 }, { "epoch": 6.05, "grad_norm": 0.51953125, "learning_rate": 0.00045158794646976285, "loss": 0.174, "step": 145980 }, { "epoch": 6.05, "grad_norm": 1.640625, "learning_rate": 0.0004515815320390993, "loss": 0.1879, "step": 145990 }, { "epoch": 6.05, "grad_norm": 0.58203125, "learning_rate": 0.00045157511722908106, "loss": 0.18, "step": 146000 }, { "epoch": 6.05, "grad_norm": 0.84765625, "learning_rate": 0.0004515687020397202, "loss": 0.2063, "step": 146010 }, { "epoch": 6.05, "grad_norm": 1.3203125, "learning_rate": 0.00045156228647102894, "loss": 0.2115, "step": 146020 }, { "epoch": 6.05, "grad_norm": 0.34375, "learning_rate": 0.00045155587052301904, "loss": 0.2286, "step": 146030 }, { "epoch": 6.05, "grad_norm": 0.65234375, "learning_rate": 0.0004515494541957029, "loss": 0.2078, "step": 146040 }, { "epoch": 6.05, "grad_norm": 0.84375, "learning_rate": 0.0004515430374890924, "loss": 0.2446, "step": 146050 }, { "epoch": 6.05, "grad_norm": 0.66796875, "learning_rate": 0.0004515366204031997, "loss": 0.2192, "step": 146060 }, { "epoch": 6.05, "grad_norm": 0.703125, "learning_rate": 0.00045153020293803677, "loss": 0.2499, "step": 146070 }, { "epoch": 6.05, "grad_norm": 0.447265625, "learning_rate": 0.0004515237850936158, "loss": 0.1918, "step": 146080 }, { "epoch": 6.05, "grad_norm": 0.63671875, "learning_rate": 0.00045151736686994883, "loss": 0.2151, "step": 146090 }, { "epoch": 6.05, "grad_norm": 1.078125, "learning_rate": 0.0004515109482670479, "loss": 0.195, "step": 146100 }, { "epoch": 6.05, "grad_norm": 1.0625, "learning_rate": 0.00045150452928492517, "loss": 0.1827, "step": 146110 }, { "epoch": 6.05, "grad_norm": 0.9765625, "learning_rate": 0.00045149810992359263, "loss": 0.1868, "step": 146120 }, { "epoch": 6.05, "grad_norm": 2.046875, "learning_rate": 0.00045149169018306244, "loss": 0.1836, "step": 146130 }, { "epoch": 6.05, "grad_norm": 0.359375, "learning_rate": 0.00045148527006334674, "loss": 0.1895, "step": 146140 }, { "epoch": 6.05, "grad_norm": 0.2451171875, "learning_rate": 0.0004514788495644574, "loss": 0.1674, "step": 146150 }, { "epoch": 6.05, "grad_norm": 1.0390625, "learning_rate": 0.0004514724286864067, "loss": 0.1766, "step": 146160 }, { "epoch": 6.05, "grad_norm": 1.1328125, "learning_rate": 0.00045146600742920664, "loss": 0.2255, "step": 146170 }, { "epoch": 6.05, "grad_norm": 0.66796875, "learning_rate": 0.0004514595857928692, "loss": 0.2209, "step": 146180 }, { "epoch": 6.06, "grad_norm": 0.4609375, "learning_rate": 0.00045145316377740666, "loss": 0.1878, "step": 146190 }, { "epoch": 6.06, "grad_norm": 1.125, "learning_rate": 0.00045144674138283104, "loss": 0.1615, "step": 146200 }, { "epoch": 6.06, "grad_norm": 0.8046875, "learning_rate": 0.00045144031860915436, "loss": 0.2061, "step": 146210 }, { "epoch": 6.06, "grad_norm": 0.318359375, "learning_rate": 0.0004514338954563888, "loss": 0.2011, "step": 146220 }, { "epoch": 6.06, "grad_norm": 0.50390625, "learning_rate": 0.0004514274719245464, "loss": 0.2233, "step": 146230 }, { "epoch": 6.06, "grad_norm": 0.4375, "learning_rate": 0.00045142104801363915, "loss": 0.1705, "step": 146240 }, { "epoch": 6.06, "grad_norm": 0.322265625, "learning_rate": 0.00045141462372367934, "loss": 0.1869, "step": 146250 }, { "epoch": 6.06, "grad_norm": 0.9453125, "learning_rate": 0.0004514081990546789, "loss": 0.1676, "step": 146260 }, { "epoch": 6.06, "grad_norm": 0.390625, "learning_rate": 0.00045140177400665004, "loss": 0.1785, "step": 146270 }, { "epoch": 6.06, "grad_norm": 0.6015625, "learning_rate": 0.0004513953485796048, "loss": 0.2183, "step": 146280 }, { "epoch": 6.06, "grad_norm": 1.203125, "learning_rate": 0.00045138892277355526, "loss": 0.2046, "step": 146290 }, { "epoch": 6.06, "grad_norm": 1.109375, "learning_rate": 0.00045138249658851343, "loss": 0.203, "step": 146300 }, { "epoch": 6.06, "grad_norm": 1.046875, "learning_rate": 0.00045137607002449155, "loss": 0.2277, "step": 146310 }, { "epoch": 6.06, "grad_norm": 1.1953125, "learning_rate": 0.00045136964308150165, "loss": 0.2329, "step": 146320 }, { "epoch": 6.06, "grad_norm": 0.80859375, "learning_rate": 0.0004513632157595559, "loss": 0.2226, "step": 146330 }, { "epoch": 6.06, "grad_norm": 0.7578125, "learning_rate": 0.00045135678805866615, "loss": 0.142, "step": 146340 }, { "epoch": 6.06, "grad_norm": 0.77734375, "learning_rate": 0.0004513503599788448, "loss": 0.1618, "step": 146350 }, { "epoch": 6.06, "grad_norm": 0.244140625, "learning_rate": 0.0004513439315201038, "loss": 0.2212, "step": 146360 }, { "epoch": 6.06, "grad_norm": 1.1875, "learning_rate": 0.00045133750268245524, "loss": 0.164, "step": 146370 }, { "epoch": 6.06, "grad_norm": 1.109375, "learning_rate": 0.00045133107346591116, "loss": 0.2637, "step": 146380 }, { "epoch": 6.06, "grad_norm": 0.36328125, "learning_rate": 0.0004513246438704839, "loss": 0.1945, "step": 146390 }, { "epoch": 6.06, "grad_norm": 0.33984375, "learning_rate": 0.0004513182138961853, "loss": 0.1894, "step": 146400 }, { "epoch": 6.06, "grad_norm": 0.58984375, "learning_rate": 0.0004513117835430276, "loss": 0.1848, "step": 146410 }, { "epoch": 6.06, "grad_norm": 2.421875, "learning_rate": 0.00045130535281102285, "loss": 0.2553, "step": 146420 }, { "epoch": 6.07, "grad_norm": 0.4765625, "learning_rate": 0.0004512989217001832, "loss": 0.2116, "step": 146430 }, { "epoch": 6.07, "grad_norm": 0.7578125, "learning_rate": 0.00045129249021052067, "loss": 0.2157, "step": 146440 }, { "epoch": 6.07, "grad_norm": 0.90625, "learning_rate": 0.0004512860583420474, "loss": 0.1851, "step": 146450 }, { "epoch": 6.07, "grad_norm": 0.86328125, "learning_rate": 0.0004512796260947756, "loss": 0.2072, "step": 146460 }, { "epoch": 6.07, "grad_norm": 1.03125, "learning_rate": 0.0004512731934687172, "loss": 0.2168, "step": 146470 }, { "epoch": 6.07, "grad_norm": 0.984375, "learning_rate": 0.0004512667604638844, "loss": 0.201, "step": 146480 }, { "epoch": 6.07, "grad_norm": 0.9140625, "learning_rate": 0.0004512603270802893, "loss": 0.2434, "step": 146490 }, { "epoch": 6.07, "grad_norm": 1.0625, "learning_rate": 0.00045125389331794396, "loss": 0.1938, "step": 146500 }, { "epoch": 6.07, "grad_norm": 0.91796875, "learning_rate": 0.00045124745917686054, "loss": 0.1728, "step": 146510 }, { "epoch": 6.07, "grad_norm": 0.80078125, "learning_rate": 0.0004512410246570512, "loss": 0.1961, "step": 146520 }, { "epoch": 6.07, "grad_norm": 1.6328125, "learning_rate": 0.00045123458975852794, "loss": 0.1879, "step": 146530 }, { "epoch": 6.07, "grad_norm": 0.71875, "learning_rate": 0.0004512281544813029, "loss": 0.1587, "step": 146540 }, { "epoch": 6.07, "grad_norm": 0.765625, "learning_rate": 0.00045122171882538823, "loss": 0.221, "step": 146550 }, { "epoch": 6.07, "grad_norm": 0.7265625, "learning_rate": 0.00045121528279079597, "loss": 0.1949, "step": 146560 }, { "epoch": 6.07, "grad_norm": 0.59765625, "learning_rate": 0.00045120884637753835, "loss": 0.1788, "step": 146570 }, { "epoch": 6.07, "grad_norm": 0.408203125, "learning_rate": 0.0004512024095856273, "loss": 0.2579, "step": 146580 }, { "epoch": 6.07, "grad_norm": 0.8984375, "learning_rate": 0.00045119597241507516, "loss": 0.2298, "step": 146590 }, { "epoch": 6.07, "grad_norm": 0.6015625, "learning_rate": 0.0004511895348658939, "loss": 0.2429, "step": 146600 }, { "epoch": 6.07, "grad_norm": 0.5078125, "learning_rate": 0.00045118309693809554, "loss": 0.1606, "step": 146610 }, { "epoch": 6.07, "grad_norm": 0.45703125, "learning_rate": 0.00045117665863169244, "loss": 0.2024, "step": 146620 }, { "epoch": 6.07, "grad_norm": 0.1962890625, "learning_rate": 0.0004511702199466966, "loss": 0.1439, "step": 146630 }, { "epoch": 6.07, "grad_norm": 0.498046875, "learning_rate": 0.0004511637808831201, "loss": 0.2264, "step": 146640 }, { "epoch": 6.07, "grad_norm": 0.0, "learning_rate": 0.0004511573414409751, "loss": 0.209, "step": 146650 }, { "epoch": 6.07, "grad_norm": 1.3984375, "learning_rate": 0.0004511509016202737, "loss": 0.2041, "step": 146660 }, { "epoch": 6.08, "grad_norm": 0.30078125, "learning_rate": 0.000451144461421028, "loss": 0.167, "step": 146670 }, { "epoch": 6.08, "grad_norm": 1.2109375, "learning_rate": 0.00045113802084325016, "loss": 0.2213, "step": 146680 }, { "epoch": 6.08, "grad_norm": 0.6796875, "learning_rate": 0.00045113157988695233, "loss": 0.2059, "step": 146690 }, { "epoch": 6.08, "grad_norm": 0.73828125, "learning_rate": 0.00045112513855214656, "loss": 0.1358, "step": 146700 }, { "epoch": 6.08, "grad_norm": 0.93359375, "learning_rate": 0.00045111869683884495, "loss": 0.2311, "step": 146710 }, { "epoch": 6.08, "grad_norm": 0.76953125, "learning_rate": 0.0004511122547470597, "loss": 0.1763, "step": 146720 }, { "epoch": 6.08, "grad_norm": 0.51953125, "learning_rate": 0.00045110581227680293, "loss": 0.161, "step": 146730 }, { "epoch": 6.08, "grad_norm": 0.60546875, "learning_rate": 0.0004510993694280867, "loss": 0.2181, "step": 146740 }, { "epoch": 6.08, "grad_norm": 0.71875, "learning_rate": 0.00045109292620092325, "loss": 0.2045, "step": 146750 }, { "epoch": 6.08, "grad_norm": 1.15625, "learning_rate": 0.00045108648259532455, "loss": 0.1924, "step": 146760 }, { "epoch": 6.08, "grad_norm": 1.5, "learning_rate": 0.0004510800386113028, "loss": 0.209, "step": 146770 }, { "epoch": 6.08, "grad_norm": 0.3359375, "learning_rate": 0.0004510735942488702, "loss": 0.1916, "step": 146780 }, { "epoch": 6.08, "grad_norm": 1.3515625, "learning_rate": 0.00045106714950803874, "loss": 0.2382, "step": 146790 }, { "epoch": 6.08, "grad_norm": 0.6640625, "learning_rate": 0.00045106070438882064, "loss": 0.1749, "step": 146800 }, { "epoch": 6.08, "grad_norm": 0.59765625, "learning_rate": 0.00045105425889122806, "loss": 0.1708, "step": 146810 }, { "epoch": 6.08, "grad_norm": 0.34375, "learning_rate": 0.000451047813015273, "loss": 0.2196, "step": 146820 }, { "epoch": 6.08, "grad_norm": 0.65234375, "learning_rate": 0.00045104136676096774, "loss": 0.2116, "step": 146830 }, { "epoch": 6.08, "grad_norm": 0.39453125, "learning_rate": 0.00045103492012832423, "loss": 0.1945, "step": 146840 }, { "epoch": 6.08, "grad_norm": 2.1875, "learning_rate": 0.00045102847311735483, "loss": 0.2201, "step": 146850 }, { "epoch": 6.08, "grad_norm": 0.65234375, "learning_rate": 0.0004510220257280715, "loss": 0.2064, "step": 146860 }, { "epoch": 6.08, "grad_norm": 1.328125, "learning_rate": 0.0004510155779604864, "loss": 0.2082, "step": 146870 }, { "epoch": 6.08, "grad_norm": 0.7265625, "learning_rate": 0.00045100912981461174, "loss": 0.2216, "step": 146880 }, { "epoch": 6.08, "grad_norm": 0.87109375, "learning_rate": 0.00045100268129045964, "loss": 0.2353, "step": 146890 }, { "epoch": 6.08, "grad_norm": 0.92578125, "learning_rate": 0.0004509962323880421, "loss": 0.156, "step": 146900 }, { "epoch": 6.08, "grad_norm": 0.58984375, "learning_rate": 0.0004509897831073714, "loss": 0.2228, "step": 146910 }, { "epoch": 6.09, "grad_norm": 0.79296875, "learning_rate": 0.00045098333344845967, "loss": 0.1932, "step": 146920 }, { "epoch": 6.09, "grad_norm": 0.6171875, "learning_rate": 0.00045097688341131894, "loss": 0.2028, "step": 146930 }, { "epoch": 6.09, "grad_norm": 0.328125, "learning_rate": 0.00045097043299596143, "loss": 0.1912, "step": 146940 }, { "epoch": 6.09, "grad_norm": 0.27734375, "learning_rate": 0.0004509639822023993, "loss": 0.194, "step": 146950 }, { "epoch": 6.09, "grad_norm": 0.54296875, "learning_rate": 0.0004509575310306446, "loss": 0.2248, "step": 146960 }, { "epoch": 6.09, "grad_norm": 0.5703125, "learning_rate": 0.0004509510794807096, "loss": 0.2501, "step": 146970 }, { "epoch": 6.09, "grad_norm": 0.6328125, "learning_rate": 0.00045094462755260634, "loss": 0.1792, "step": 146980 }, { "epoch": 6.09, "grad_norm": 0.6875, "learning_rate": 0.00045093817524634707, "loss": 0.1523, "step": 146990 }, { "epoch": 6.09, "grad_norm": 1.203125, "learning_rate": 0.0004509317225619437, "loss": 0.1988, "step": 147000 }, { "epoch": 6.09, "grad_norm": 0.65234375, "learning_rate": 0.00045092526949940865, "loss": 0.2343, "step": 147010 }, { "epoch": 6.09, "grad_norm": 0.6796875, "learning_rate": 0.0004509188160587539, "loss": 0.1703, "step": 147020 }, { "epoch": 6.09, "grad_norm": 0.59375, "learning_rate": 0.0004509123622399917, "loss": 0.2486, "step": 147030 }, { "epoch": 6.09, "grad_norm": 0.7890625, "learning_rate": 0.00045090590804313405, "loss": 0.1831, "step": 147040 }, { "epoch": 6.09, "grad_norm": 0.63671875, "learning_rate": 0.00045089945346819317, "loss": 0.2141, "step": 147050 }, { "epoch": 6.09, "grad_norm": 0.86328125, "learning_rate": 0.0004508929985151812, "loss": 0.2265, "step": 147060 }, { "epoch": 6.09, "grad_norm": 0.69140625, "learning_rate": 0.0004508865431841104, "loss": 0.2057, "step": 147070 }, { "epoch": 6.09, "grad_norm": 0.7890625, "learning_rate": 0.0004508800874749927, "loss": 0.1769, "step": 147080 }, { "epoch": 6.09, "grad_norm": 0.7578125, "learning_rate": 0.0004508736313878404, "loss": 0.1694, "step": 147090 }, { "epoch": 6.09, "grad_norm": 0.58984375, "learning_rate": 0.00045086717492266563, "loss": 0.1964, "step": 147100 }, { "epoch": 6.09, "grad_norm": 0.8046875, "learning_rate": 0.0004508607180794806, "loss": 0.1832, "step": 147110 }, { "epoch": 6.09, "grad_norm": 1.296875, "learning_rate": 0.0004508542608582973, "loss": 0.2065, "step": 147120 }, { "epoch": 6.09, "grad_norm": 0.7578125, "learning_rate": 0.000450847803259128, "loss": 0.2343, "step": 147130 }, { "epoch": 6.09, "grad_norm": 0.32421875, "learning_rate": 0.00045084134528198486, "loss": 0.2132, "step": 147140 }, { "epoch": 6.09, "grad_norm": 0.466796875, "learning_rate": 0.00045083488692688, "loss": 0.2288, "step": 147150 }, { "epoch": 6.1, "grad_norm": 0.388671875, "learning_rate": 0.0004508284281938255, "loss": 0.2086, "step": 147160 }, { "epoch": 6.1, "grad_norm": 0.44140625, "learning_rate": 0.00045082196908283363, "loss": 0.2024, "step": 147170 }, { "epoch": 6.1, "grad_norm": 2.0, "learning_rate": 0.00045081550959391646, "loss": 0.2009, "step": 147180 }, { "epoch": 6.1, "grad_norm": 0.5078125, "learning_rate": 0.00045080904972708626, "loss": 0.1738, "step": 147190 }, { "epoch": 6.1, "grad_norm": 0.79296875, "learning_rate": 0.0004508025894823551, "loss": 0.247, "step": 147200 }, { "epoch": 6.1, "grad_norm": 0.63671875, "learning_rate": 0.0004507961288597351, "loss": 0.2377, "step": 147210 }, { "epoch": 6.1, "grad_norm": 0.85546875, "learning_rate": 0.00045078966785923845, "loss": 0.1953, "step": 147220 }, { "epoch": 6.1, "grad_norm": 0.69921875, "learning_rate": 0.0004507832064808774, "loss": 0.2449, "step": 147230 }, { "epoch": 6.1, "grad_norm": 1.3515625, "learning_rate": 0.000450776744724664, "loss": 0.1654, "step": 147240 }, { "epoch": 6.1, "grad_norm": 0.80859375, "learning_rate": 0.0004507702825906105, "loss": 0.228, "step": 147250 }, { "epoch": 6.1, "grad_norm": 1.21875, "learning_rate": 0.00045076382007872896, "loss": 0.1511, "step": 147260 }, { "epoch": 6.1, "grad_norm": 0.69921875, "learning_rate": 0.00045075735718903153, "loss": 0.1935, "step": 147270 }, { "epoch": 6.1, "grad_norm": 0.412109375, "learning_rate": 0.0004507508939215306, "loss": 0.1922, "step": 147280 }, { "epoch": 6.1, "grad_norm": 0.57421875, "learning_rate": 0.000450744430276238, "loss": 0.2019, "step": 147290 }, { "epoch": 6.1, "grad_norm": 0.2294921875, "learning_rate": 0.0004507379662531662, "loss": 0.206, "step": 147300 }, { "epoch": 6.1, "grad_norm": 0.75390625, "learning_rate": 0.00045073150185232706, "loss": 0.186, "step": 147310 }, { "epoch": 6.1, "grad_norm": 0.484375, "learning_rate": 0.00045072503707373305, "loss": 0.2382, "step": 147320 }, { "epoch": 6.1, "grad_norm": 0.84375, "learning_rate": 0.00045071857191739616, "loss": 0.226, "step": 147330 }, { "epoch": 6.1, "grad_norm": 0.4140625, "learning_rate": 0.0004507121063833286, "loss": 0.1746, "step": 147340 }, { "epoch": 6.1, "grad_norm": 0.7890625, "learning_rate": 0.0004507056404715425, "loss": 0.1665, "step": 147350 }, { "epoch": 6.1, "grad_norm": 0.54296875, "learning_rate": 0.00045069917418205007, "loss": 0.2413, "step": 147360 }, { "epoch": 6.1, "grad_norm": 0.345703125, "learning_rate": 0.00045069270751486347, "loss": 0.222, "step": 147370 }, { "epoch": 6.1, "grad_norm": 1.5, "learning_rate": 0.0004506862404699949, "loss": 0.1822, "step": 147380 }, { "epoch": 6.1, "grad_norm": 0.74609375, "learning_rate": 0.00045067977304745645, "loss": 0.2626, "step": 147390 }, { "epoch": 6.11, "grad_norm": 1.078125, "learning_rate": 0.00045067330524726037, "loss": 0.2078, "step": 147400 }, { "epoch": 6.11, "grad_norm": 0.490234375, "learning_rate": 0.0004506668370694188, "loss": 0.1752, "step": 147410 }, { "epoch": 6.11, "grad_norm": 0.37890625, "learning_rate": 0.0004506603685139439, "loss": 0.2053, "step": 147420 }, { "epoch": 6.11, "grad_norm": 0.373046875, "learning_rate": 0.0004506538995808478, "loss": 0.2109, "step": 147430 }, { "epoch": 6.11, "grad_norm": 0.61328125, "learning_rate": 0.00045064743027014285, "loss": 0.2506, "step": 147440 }, { "epoch": 6.11, "grad_norm": 0.8359375, "learning_rate": 0.000450640960581841, "loss": 0.2065, "step": 147450 }, { "epoch": 6.11, "grad_norm": 0.94921875, "learning_rate": 0.0004506344905159546, "loss": 0.2131, "step": 147460 }, { "epoch": 6.11, "grad_norm": 0.56640625, "learning_rate": 0.0004506280200724957, "loss": 0.1806, "step": 147470 }, { "epoch": 6.11, "grad_norm": 0.83203125, "learning_rate": 0.00045062154925147656, "loss": 0.2074, "step": 147480 }, { "epoch": 6.11, "grad_norm": 0.470703125, "learning_rate": 0.00045061507805290937, "loss": 0.2024, "step": 147490 }, { "epoch": 6.11, "grad_norm": 1.109375, "learning_rate": 0.00045060860647680624, "loss": 0.2016, "step": 147500 }, { "epoch": 6.11, "grad_norm": 0.859375, "learning_rate": 0.0004506021345231793, "loss": 0.1863, "step": 147510 }, { "epoch": 6.11, "grad_norm": 0.55859375, "learning_rate": 0.0004505956621920409, "loss": 0.2425, "step": 147520 }, { "epoch": 6.11, "grad_norm": 0.17578125, "learning_rate": 0.0004505891894834031, "loss": 0.2081, "step": 147530 }, { "epoch": 6.11, "grad_norm": 0.76953125, "learning_rate": 0.0004505827163972781, "loss": 0.1794, "step": 147540 }, { "epoch": 6.11, "grad_norm": 0.8046875, "learning_rate": 0.0004505762429336781, "loss": 0.2116, "step": 147550 }, { "epoch": 6.11, "grad_norm": 0.71484375, "learning_rate": 0.0004505697690926153, "loss": 0.2255, "step": 147560 }, { "epoch": 6.11, "grad_norm": 0.0, "learning_rate": 0.00045056329487410177, "loss": 0.2028, "step": 147570 }, { "epoch": 6.11, "grad_norm": 2.015625, "learning_rate": 0.00045055682027814983, "loss": 0.2194, "step": 147580 }, { "epoch": 6.11, "grad_norm": 0.212890625, "learning_rate": 0.00045055034530477157, "loss": 0.1672, "step": 147590 }, { "epoch": 6.11, "grad_norm": 0.63671875, "learning_rate": 0.00045054386995397927, "loss": 0.2457, "step": 147600 }, { "epoch": 6.11, "grad_norm": 0.703125, "learning_rate": 0.0004505373942257851, "loss": 0.2049, "step": 147610 }, { "epoch": 6.11, "grad_norm": 1.625, "learning_rate": 0.00045053091812020116, "loss": 0.1573, "step": 147620 }, { "epoch": 6.11, "grad_norm": 1.578125, "learning_rate": 0.00045052444163723964, "loss": 0.2616, "step": 147630 }, { "epoch": 6.12, "grad_norm": 0.76171875, "learning_rate": 0.0004505179647769128, "loss": 0.1705, "step": 147640 }, { "epoch": 6.12, "grad_norm": 0.447265625, "learning_rate": 0.0004505114875392329, "loss": 0.2111, "step": 147650 }, { "epoch": 6.12, "grad_norm": 0.5703125, "learning_rate": 0.000450505009924212, "loss": 0.2262, "step": 147660 }, { "epoch": 6.12, "grad_norm": 0.44140625, "learning_rate": 0.0004504985319318622, "loss": 0.1722, "step": 147670 }, { "epoch": 6.12, "grad_norm": 1.5390625, "learning_rate": 0.0004504920535621959, "loss": 0.2367, "step": 147680 }, { "epoch": 6.12, "grad_norm": 0.78515625, "learning_rate": 0.00045048557481522524, "loss": 0.2184, "step": 147690 }, { "epoch": 6.12, "grad_norm": 0.462890625, "learning_rate": 0.00045047909569096236, "loss": 0.2037, "step": 147700 }, { "epoch": 6.12, "grad_norm": 0.81640625, "learning_rate": 0.00045047261618941936, "loss": 0.2326, "step": 147710 }, { "epoch": 6.12, "grad_norm": 1.0625, "learning_rate": 0.0004504661363106087, "loss": 0.1741, "step": 147720 }, { "epoch": 6.12, "grad_norm": 0.58984375, "learning_rate": 0.00045045965605454235, "loss": 0.2363, "step": 147730 }, { "epoch": 6.12, "grad_norm": 1.03125, "learning_rate": 0.00045045317542123257, "loss": 0.2016, "step": 147740 }, { "epoch": 6.12, "grad_norm": 0.482421875, "learning_rate": 0.0004504466944106916, "loss": 0.2139, "step": 147750 }, { "epoch": 6.12, "grad_norm": 0.5859375, "learning_rate": 0.0004504402130229316, "loss": 0.1834, "step": 147760 }, { "epoch": 6.12, "grad_norm": 0.62109375, "learning_rate": 0.0004504337312579647, "loss": 0.2173, "step": 147770 }, { "epoch": 6.12, "grad_norm": 0.97265625, "learning_rate": 0.0004504272491158032, "loss": 0.2528, "step": 147780 }, { "epoch": 6.12, "grad_norm": 0.5546875, "learning_rate": 0.00045042076659645926, "loss": 0.1975, "step": 147790 }, { "epoch": 6.12, "grad_norm": 0.71875, "learning_rate": 0.0004504142836999451, "loss": 0.2097, "step": 147800 }, { "epoch": 6.12, "grad_norm": 0.53515625, "learning_rate": 0.0004504078004262729, "loss": 0.1629, "step": 147810 }, { "epoch": 6.12, "grad_norm": 0.5078125, "learning_rate": 0.00045040131677545484, "loss": 0.2203, "step": 147820 }, { "epoch": 6.12, "grad_norm": 0.435546875, "learning_rate": 0.00045039483274750313, "loss": 0.1937, "step": 147830 }, { "epoch": 6.12, "grad_norm": 0.341796875, "learning_rate": 0.00045038834834243, "loss": 0.2281, "step": 147840 }, { "epoch": 6.12, "grad_norm": 0.87109375, "learning_rate": 0.0004503818635602477, "loss": 0.2138, "step": 147850 }, { "epoch": 6.12, "grad_norm": 0.62890625, "learning_rate": 0.0004503753784009683, "loss": 0.2326, "step": 147860 }, { "epoch": 6.12, "grad_norm": 0.91796875, "learning_rate": 0.00045036889286460406, "loss": 0.2078, "step": 147870 }, { "epoch": 6.13, "grad_norm": 0.7734375, "learning_rate": 0.00045036240695116726, "loss": 0.2287, "step": 147880 }, { "epoch": 6.13, "grad_norm": 0.765625, "learning_rate": 0.00045035592066067, "loss": 0.1844, "step": 147890 }, { "epoch": 6.13, "grad_norm": 0.416015625, "learning_rate": 0.00045034943399312454, "loss": 0.1806, "step": 147900 }, { "epoch": 6.13, "grad_norm": 0.60546875, "learning_rate": 0.0004503429469485432, "loss": 0.2097, "step": 147910 }, { "epoch": 6.13, "grad_norm": 0.7421875, "learning_rate": 0.0004503364595269379, "loss": 0.1441, "step": 147920 }, { "epoch": 6.13, "grad_norm": 0.7265625, "learning_rate": 0.0004503299717283211, "loss": 0.218, "step": 147930 }, { "epoch": 6.13, "grad_norm": 0.890625, "learning_rate": 0.0004503234835527049, "loss": 0.2218, "step": 147940 }, { "epoch": 6.13, "grad_norm": 1.125, "learning_rate": 0.00045031699500010155, "loss": 0.2079, "step": 147950 }, { "epoch": 6.13, "grad_norm": 0.345703125, "learning_rate": 0.00045031050607052326, "loss": 0.2087, "step": 147960 }, { "epoch": 6.13, "grad_norm": 0.6796875, "learning_rate": 0.0004503040167639822, "loss": 0.1567, "step": 147970 }, { "epoch": 6.13, "grad_norm": 0.55078125, "learning_rate": 0.0004502975270804906, "loss": 0.2003, "step": 147980 }, { "epoch": 6.13, "grad_norm": 0.5234375, "learning_rate": 0.0004502910370200607, "loss": 0.2071, "step": 147990 }, { "epoch": 6.13, "grad_norm": 0.5703125, "learning_rate": 0.00045028454658270473, "loss": 0.1915, "step": 148000 }, { "epoch": 6.13, "grad_norm": 1.1171875, "learning_rate": 0.0004502780557684348, "loss": 0.1965, "step": 148010 }, { "epoch": 6.13, "grad_norm": 0.6640625, "learning_rate": 0.0004502715645772633, "loss": 0.1972, "step": 148020 }, { "epoch": 6.13, "grad_norm": 0.3984375, "learning_rate": 0.0004502650730092023, "loss": 0.1811, "step": 148030 }, { "epoch": 6.13, "grad_norm": 1.078125, "learning_rate": 0.000450258581064264, "loss": 0.2526, "step": 148040 }, { "epoch": 6.13, "grad_norm": 0.5234375, "learning_rate": 0.00045025208874246075, "loss": 0.1639, "step": 148050 }, { "epoch": 6.13, "grad_norm": 0.6796875, "learning_rate": 0.00045024559604380464, "loss": 0.2595, "step": 148060 }, { "epoch": 6.13, "grad_norm": 0.79296875, "learning_rate": 0.000450239102968308, "loss": 0.2274, "step": 148070 }, { "epoch": 6.13, "grad_norm": 0.65234375, "learning_rate": 0.000450232609515983, "loss": 0.2289, "step": 148080 }, { "epoch": 6.13, "grad_norm": 0.2578125, "learning_rate": 0.0004502261156868418, "loss": 0.1981, "step": 148090 }, { "epoch": 6.13, "grad_norm": 1.140625, "learning_rate": 0.00045021962148089666, "loss": 0.1797, "step": 148100 }, { "epoch": 6.13, "grad_norm": 0.640625, "learning_rate": 0.0004502131268981599, "loss": 0.1691, "step": 148110 }, { "epoch": 6.14, "grad_norm": 0.421875, "learning_rate": 0.0004502066319386436, "loss": 0.2056, "step": 148120 }, { "epoch": 6.14, "grad_norm": 1.65625, "learning_rate": 0.00045020013660236006, "loss": 0.2052, "step": 148130 }, { "epoch": 6.14, "grad_norm": 0.73046875, "learning_rate": 0.0004501936408893215, "loss": 0.2523, "step": 148140 }, { "epoch": 6.14, "grad_norm": 0.5, "learning_rate": 0.00045018714479954004, "loss": 0.193, "step": 148150 }, { "epoch": 6.14, "grad_norm": 0.57421875, "learning_rate": 0.000450180648333028, "loss": 0.201, "step": 148160 }, { "epoch": 6.14, "grad_norm": 1.4765625, "learning_rate": 0.00045017415148979767, "loss": 0.1983, "step": 148170 }, { "epoch": 6.14, "grad_norm": 0.78515625, "learning_rate": 0.0004501676542698612, "loss": 0.2127, "step": 148180 }, { "epoch": 6.14, "grad_norm": 0.6328125, "learning_rate": 0.0004501611566732308, "loss": 0.2018, "step": 148190 }, { "epoch": 6.14, "grad_norm": 1.140625, "learning_rate": 0.0004501546586999187, "loss": 0.1963, "step": 148200 }, { "epoch": 6.14, "grad_norm": 0.3671875, "learning_rate": 0.0004501481603499371, "loss": 0.2085, "step": 148210 }, { "epoch": 6.14, "grad_norm": 0.50390625, "learning_rate": 0.0004501416616232984, "loss": 0.1512, "step": 148220 }, { "epoch": 6.14, "grad_norm": 0.5078125, "learning_rate": 0.00045013516252001464, "loss": 0.1924, "step": 148230 }, { "epoch": 6.14, "grad_norm": 0.8671875, "learning_rate": 0.0004501286630400981, "loss": 0.2402, "step": 148240 }, { "epoch": 6.14, "grad_norm": 0.5, "learning_rate": 0.000450122163183561, "loss": 0.238, "step": 148250 }, { "epoch": 6.14, "grad_norm": 0.9921875, "learning_rate": 0.0004501156629504157, "loss": 0.2108, "step": 148260 }, { "epoch": 6.14, "grad_norm": 0.498046875, "learning_rate": 0.00045010916234067424, "loss": 0.2448, "step": 148270 }, { "epoch": 6.14, "grad_norm": 0.921875, "learning_rate": 0.00045010266135434897, "loss": 0.2114, "step": 148280 }, { "epoch": 6.14, "grad_norm": 0.765625, "learning_rate": 0.00045009615999145214, "loss": 0.2154, "step": 148290 }, { "epoch": 6.14, "grad_norm": 0.8203125, "learning_rate": 0.0004500896582519959, "loss": 0.2113, "step": 148300 }, { "epoch": 6.14, "grad_norm": 0.474609375, "learning_rate": 0.0004500831561359926, "loss": 0.2018, "step": 148310 }, { "epoch": 6.14, "grad_norm": 0.5625, "learning_rate": 0.00045007665364345435, "loss": 0.1839, "step": 148320 }, { "epoch": 6.14, "grad_norm": 0.86328125, "learning_rate": 0.00045007015077439346, "loss": 0.1994, "step": 148330 }, { "epoch": 6.14, "grad_norm": 0.55078125, "learning_rate": 0.0004500636475288222, "loss": 0.2139, "step": 148340 }, { "epoch": 6.14, "grad_norm": 1.203125, "learning_rate": 0.0004500571439067527, "loss": 0.2179, "step": 148350 }, { "epoch": 6.15, "grad_norm": 0.0, "learning_rate": 0.00045005063990819725, "loss": 0.2079, "step": 148360 }, { "epoch": 6.15, "grad_norm": 1.234375, "learning_rate": 0.0004500441355331681, "loss": 0.1896, "step": 148370 }, { "epoch": 6.15, "grad_norm": 0.73046875, "learning_rate": 0.00045003763078167746, "loss": 0.2322, "step": 148380 }, { "epoch": 6.15, "grad_norm": 0.73046875, "learning_rate": 0.00045003112565373773, "loss": 0.215, "step": 148390 }, { "epoch": 6.15, "grad_norm": 0.416015625, "learning_rate": 0.00045002462014936083, "loss": 0.2037, "step": 148400 }, { "epoch": 6.15, "grad_norm": 0.75390625, "learning_rate": 0.00045001811426855935, "loss": 0.1801, "step": 148410 }, { "epoch": 6.15, "grad_norm": 0.578125, "learning_rate": 0.0004500116080113453, "loss": 0.2011, "step": 148420 }, { "epoch": 6.15, "grad_norm": 2.3125, "learning_rate": 0.0004500051013777311, "loss": 0.162, "step": 148430 }, { "epoch": 6.15, "grad_norm": 0.6328125, "learning_rate": 0.0004499985943677288, "loss": 0.242, "step": 148440 }, { "epoch": 6.15, "grad_norm": 0.8359375, "learning_rate": 0.00044999208698135086, "loss": 0.2214, "step": 148450 }, { "epoch": 6.15, "grad_norm": 0.72265625, "learning_rate": 0.0004499855792186093, "loss": 0.1997, "step": 148460 }, { "epoch": 6.15, "grad_norm": 1.1015625, "learning_rate": 0.0004499790710795165, "loss": 0.1515, "step": 148470 }, { "epoch": 6.15, "grad_norm": 1.2109375, "learning_rate": 0.0004499725625640847, "loss": 0.2126, "step": 148480 }, { "epoch": 6.15, "grad_norm": 1.453125, "learning_rate": 0.0004499660536723261, "loss": 0.2064, "step": 148490 }, { "epoch": 6.15, "grad_norm": 0.97265625, "learning_rate": 0.000449959544404253, "loss": 0.2425, "step": 148500 }, { "epoch": 6.15, "grad_norm": 2.0625, "learning_rate": 0.00044995303475987765, "loss": 0.2209, "step": 148510 }, { "epoch": 6.15, "grad_norm": 0.58984375, "learning_rate": 0.00044994652473921226, "loss": 0.1575, "step": 148520 }, { "epoch": 6.15, "grad_norm": 1.484375, "learning_rate": 0.0004499400143422691, "loss": 0.2126, "step": 148530 }, { "epoch": 6.15, "grad_norm": 1.46875, "learning_rate": 0.0004499335035690605, "loss": 0.276, "step": 148540 }, { "epoch": 6.15, "grad_norm": 0.83203125, "learning_rate": 0.00044992699241959856, "loss": 0.2298, "step": 148550 }, { "epoch": 6.15, "grad_norm": 1.625, "learning_rate": 0.0004499204808938956, "loss": 0.2053, "step": 148560 }, { "epoch": 6.15, "grad_norm": 0.9296875, "learning_rate": 0.0004499139689919639, "loss": 0.1732, "step": 148570 }, { "epoch": 6.15, "grad_norm": 0.76953125, "learning_rate": 0.00044990745671381573, "loss": 0.2549, "step": 148580 }, { "epoch": 6.15, "grad_norm": 0.8984375, "learning_rate": 0.00044990094405946324, "loss": 0.2059, "step": 148590 }, { "epoch": 6.15, "grad_norm": 0.8125, "learning_rate": 0.00044989443102891883, "loss": 0.1765, "step": 148600 }, { "epoch": 6.16, "grad_norm": 0.55078125, "learning_rate": 0.00044988791762219464, "loss": 0.1859, "step": 148610 }, { "epoch": 6.16, "grad_norm": 0.404296875, "learning_rate": 0.000449881403839303, "loss": 0.171, "step": 148620 }, { "epoch": 6.16, "grad_norm": 0.357421875, "learning_rate": 0.0004498748896802561, "loss": 0.2494, "step": 148630 }, { "epoch": 6.16, "grad_norm": 0.8203125, "learning_rate": 0.00044986837514506635, "loss": 0.2173, "step": 148640 }, { "epoch": 6.16, "grad_norm": 0.59375, "learning_rate": 0.00044986186023374586, "loss": 0.2223, "step": 148650 }, { "epoch": 6.16, "grad_norm": 1.0, "learning_rate": 0.00044985534494630685, "loss": 0.1938, "step": 148660 }, { "epoch": 6.16, "grad_norm": 0.2412109375, "learning_rate": 0.0004498488292827618, "loss": 0.2349, "step": 148670 }, { "epoch": 6.16, "grad_norm": 0.55078125, "learning_rate": 0.00044984231324312273, "loss": 0.1754, "step": 148680 }, { "epoch": 6.16, "grad_norm": 0.62890625, "learning_rate": 0.00044983579682740207, "loss": 0.1642, "step": 148690 }, { "epoch": 6.16, "grad_norm": 1.0390625, "learning_rate": 0.00044982928003561195, "loss": 0.2006, "step": 148700 }, { "epoch": 6.16, "grad_norm": 0.8046875, "learning_rate": 0.0004498227628677648, "loss": 0.2203, "step": 148710 }, { "epoch": 6.16, "grad_norm": 1.015625, "learning_rate": 0.00044981624532387267, "loss": 0.2443, "step": 148720 }, { "epoch": 6.16, "grad_norm": 0.51953125, "learning_rate": 0.000449809727403948, "loss": 0.2436, "step": 148730 }, { "epoch": 6.16, "grad_norm": 1.53125, "learning_rate": 0.000449803209108003, "loss": 0.2559, "step": 148740 }, { "epoch": 6.16, "grad_norm": 1.0546875, "learning_rate": 0.00044979669043605, "loss": 0.2694, "step": 148750 }, { "epoch": 6.16, "grad_norm": 0.73828125, "learning_rate": 0.00044979017138810113, "loss": 0.2072, "step": 148760 }, { "epoch": 6.16, "grad_norm": 0.50390625, "learning_rate": 0.00044978365196416884, "loss": 0.2046, "step": 148770 }, { "epoch": 6.16, "grad_norm": 1.3515625, "learning_rate": 0.00044977713216426515, "loss": 0.2192, "step": 148780 }, { "epoch": 6.16, "grad_norm": 0.89453125, "learning_rate": 0.0004497706119884025, "loss": 0.2412, "step": 148790 }, { "epoch": 6.16, "grad_norm": 0.68359375, "learning_rate": 0.0004497640914365931, "loss": 0.19, "step": 148800 }, { "epoch": 6.16, "grad_norm": 0.890625, "learning_rate": 0.0004497575705088494, "loss": 0.183, "step": 148810 }, { "epoch": 6.16, "grad_norm": 1.46875, "learning_rate": 0.0004497510492051834, "loss": 0.2381, "step": 148820 }, { "epoch": 6.16, "grad_norm": 0.28125, "learning_rate": 0.0004497445275256076, "loss": 0.1663, "step": 148830 }, { "epoch": 6.16, "grad_norm": 0.3515625, "learning_rate": 0.00044973800547013404, "loss": 0.219, "step": 148840 }, { "epoch": 6.17, "grad_norm": 0.6796875, "learning_rate": 0.0004497314830387752, "loss": 0.1791, "step": 148850 }, { "epoch": 6.17, "grad_norm": 0.80078125, "learning_rate": 0.0004497249602315433, "loss": 0.1978, "step": 148860 }, { "epoch": 6.17, "grad_norm": 0.11572265625, "learning_rate": 0.0004497184370484505, "loss": 0.1851, "step": 148870 }, { "epoch": 6.17, "grad_norm": 0.265625, "learning_rate": 0.00044971191348950923, "loss": 0.203, "step": 148880 }, { "epoch": 6.17, "grad_norm": 0.2109375, "learning_rate": 0.00044970538955473174, "loss": 0.1828, "step": 148890 }, { "epoch": 6.17, "grad_norm": 0.78125, "learning_rate": 0.00044969886524413025, "loss": 0.1747, "step": 148900 }, { "epoch": 6.17, "grad_norm": 0.384765625, "learning_rate": 0.000449692340557717, "loss": 0.2232, "step": 148910 }, { "epoch": 6.17, "grad_norm": 1.3984375, "learning_rate": 0.00044968581549550436, "loss": 0.2051, "step": 148920 }, { "epoch": 6.17, "grad_norm": 0.98828125, "learning_rate": 0.00044967929005750463, "loss": 0.217, "step": 148930 }, { "epoch": 6.17, "grad_norm": 0.5703125, "learning_rate": 0.00044967276424373, "loss": 0.2474, "step": 148940 }, { "epoch": 6.17, "grad_norm": 0.3515625, "learning_rate": 0.00044966623805419283, "loss": 0.165, "step": 148950 }, { "epoch": 6.17, "grad_norm": 0.6953125, "learning_rate": 0.00044965971148890527, "loss": 0.2385, "step": 148960 }, { "epoch": 6.17, "grad_norm": 0.0, "learning_rate": 0.0004496531845478798, "loss": 0.2195, "step": 148970 }, { "epoch": 6.17, "grad_norm": 1.4453125, "learning_rate": 0.0004496466572311285, "loss": 0.2734, "step": 148980 }, { "epoch": 6.17, "grad_norm": 0.306640625, "learning_rate": 0.0004496401295386638, "loss": 0.2376, "step": 148990 }, { "epoch": 6.17, "grad_norm": 0.73828125, "learning_rate": 0.00044963360147049793, "loss": 0.2008, "step": 149000 }, { "epoch": 6.17, "grad_norm": 0.55078125, "learning_rate": 0.00044962707302664324, "loss": 0.2096, "step": 149010 }, { "epoch": 6.17, "grad_norm": 0.484375, "learning_rate": 0.00044962054420711185, "loss": 0.1943, "step": 149020 }, { "epoch": 6.17, "grad_norm": 0.609375, "learning_rate": 0.0004496140150119162, "loss": 0.2216, "step": 149030 }, { "epoch": 6.17, "grad_norm": 0.25, "learning_rate": 0.00044960748544106856, "loss": 0.2002, "step": 149040 }, { "epoch": 6.17, "grad_norm": 0.91015625, "learning_rate": 0.00044960095549458117, "loss": 0.2189, "step": 149050 }, { "epoch": 6.17, "grad_norm": 0.578125, "learning_rate": 0.00044959442517246634, "loss": 0.2137, "step": 149060 }, { "epoch": 6.17, "grad_norm": 0.5, "learning_rate": 0.00044958789447473635, "loss": 0.2531, "step": 149070 }, { "epoch": 6.17, "grad_norm": 0.294921875, "learning_rate": 0.0004495813634014035, "loss": 0.211, "step": 149080 }, { "epoch": 6.18, "grad_norm": 0.21484375, "learning_rate": 0.00044957483195248005, "loss": 0.1658, "step": 149090 }, { "epoch": 6.18, "grad_norm": 0.41796875, "learning_rate": 0.0004495683001279783, "loss": 0.1827, "step": 149100 }, { "epoch": 6.18, "grad_norm": 0.70703125, "learning_rate": 0.0004495617679279107, "loss": 0.2045, "step": 149110 }, { "epoch": 6.18, "grad_norm": 2.71875, "learning_rate": 0.0004495552353522893, "loss": 0.2141, "step": 149120 }, { "epoch": 6.18, "grad_norm": 0.451171875, "learning_rate": 0.00044954870240112644, "loss": 0.2016, "step": 149130 }, { "epoch": 6.18, "grad_norm": 1.3828125, "learning_rate": 0.00044954216907443456, "loss": 0.1599, "step": 149140 }, { "epoch": 6.18, "grad_norm": 1.0, "learning_rate": 0.00044953563537222585, "loss": 0.2113, "step": 149150 }, { "epoch": 6.18, "grad_norm": 0.68359375, "learning_rate": 0.0004495291012945126, "loss": 0.2359, "step": 149160 }, { "epoch": 6.18, "grad_norm": 0.45703125, "learning_rate": 0.0004495225668413071, "loss": 0.2168, "step": 149170 }, { "epoch": 6.18, "grad_norm": 0.515625, "learning_rate": 0.00044951603201262173, "loss": 0.1872, "step": 149180 }, { "epoch": 6.18, "grad_norm": 0.34375, "learning_rate": 0.0004495094968084688, "loss": 0.1688, "step": 149190 }, { "epoch": 6.18, "grad_norm": 0.61328125, "learning_rate": 0.00044950296122886035, "loss": 0.2167, "step": 149200 }, { "epoch": 6.18, "grad_norm": 1.0, "learning_rate": 0.00044949642527380903, "loss": 0.1843, "step": 149210 }, { "epoch": 6.18, "grad_norm": 0.8984375, "learning_rate": 0.00044948988894332695, "loss": 0.2336, "step": 149220 }, { "epoch": 6.18, "grad_norm": 0.5546875, "learning_rate": 0.00044948335223742643, "loss": 0.1766, "step": 149230 }, { "epoch": 6.18, "grad_norm": 0.609375, "learning_rate": 0.0004494768151561197, "loss": 0.1688, "step": 149240 }, { "epoch": 6.18, "grad_norm": 0.83203125, "learning_rate": 0.0004494702776994193, "loss": 0.2094, "step": 149250 }, { "epoch": 6.18, "grad_norm": 0.890625, "learning_rate": 0.0004494637398673373, "loss": 0.2118, "step": 149260 }, { "epoch": 6.18, "grad_norm": 0.546875, "learning_rate": 0.00044945720165988606, "loss": 0.1921, "step": 149270 }, { "epoch": 6.18, "grad_norm": 0.4375, "learning_rate": 0.00044945066307707795, "loss": 0.1655, "step": 149280 }, { "epoch": 6.18, "grad_norm": 0.77734375, "learning_rate": 0.0004494441241189252, "loss": 0.1793, "step": 149290 }, { "epoch": 6.18, "grad_norm": 0.470703125, "learning_rate": 0.0004494375847854402, "loss": 0.1969, "step": 149300 }, { "epoch": 6.18, "grad_norm": 1.0078125, "learning_rate": 0.0004494310450766351, "loss": 0.1864, "step": 149310 }, { "epoch": 6.18, "grad_norm": 0.37890625, "learning_rate": 0.00044942450499252243, "loss": 0.1848, "step": 149320 }, { "epoch": 6.19, "grad_norm": 0.640625, "learning_rate": 0.00044941796453311426, "loss": 0.2255, "step": 149330 }, { "epoch": 6.19, "grad_norm": 0.380859375, "learning_rate": 0.00044941142369842306, "loss": 0.1799, "step": 149340 }, { "epoch": 6.19, "grad_norm": 0.6484375, "learning_rate": 0.00044940488248846114, "loss": 0.1835, "step": 149350 }, { "epoch": 6.19, "grad_norm": 0.478515625, "learning_rate": 0.00044939834090324075, "loss": 0.2181, "step": 149360 }, { "epoch": 6.19, "grad_norm": 0.96484375, "learning_rate": 0.00044939179894277413, "loss": 0.2384, "step": 149370 }, { "epoch": 6.19, "grad_norm": 0.7578125, "learning_rate": 0.0004493852566070738, "loss": 0.1819, "step": 149380 }, { "epoch": 6.19, "grad_norm": 1.09375, "learning_rate": 0.00044937871389615194, "loss": 0.1857, "step": 149390 }, { "epoch": 6.19, "grad_norm": 0.470703125, "learning_rate": 0.0004493721708100208, "loss": 0.2034, "step": 149400 }, { "epoch": 6.19, "grad_norm": 2.28125, "learning_rate": 0.0004493656273486928, "loss": 0.2274, "step": 149410 }, { "epoch": 6.19, "grad_norm": 0.56640625, "learning_rate": 0.00044935908351218017, "loss": 0.2297, "step": 149420 }, { "epoch": 6.19, "grad_norm": 1.984375, "learning_rate": 0.00044935253930049535, "loss": 0.1842, "step": 149430 }, { "epoch": 6.19, "grad_norm": 0.283203125, "learning_rate": 0.00044934599471365056, "loss": 0.1599, "step": 149440 }, { "epoch": 6.19, "grad_norm": 1.9140625, "learning_rate": 0.0004493394497516581, "loss": 0.197, "step": 149450 }, { "epoch": 6.19, "grad_norm": 1.25, "learning_rate": 0.00044933290441453034, "loss": 0.2319, "step": 149460 }, { "epoch": 6.19, "grad_norm": 0.47265625, "learning_rate": 0.0004493263587022796, "loss": 0.2096, "step": 149470 }, { "epoch": 6.19, "grad_norm": 0.93359375, "learning_rate": 0.00044931981261491817, "loss": 0.1939, "step": 149480 }, { "epoch": 6.19, "grad_norm": 1.1875, "learning_rate": 0.0004493132661524584, "loss": 0.193, "step": 149490 }, { "epoch": 6.19, "grad_norm": 1.09375, "learning_rate": 0.0004493067193149125, "loss": 0.2328, "step": 149500 }, { "epoch": 6.19, "grad_norm": 0.51953125, "learning_rate": 0.0004493001721022929, "loss": 0.2321, "step": 149510 }, { "epoch": 6.19, "grad_norm": 1.8359375, "learning_rate": 0.000449293624514612, "loss": 0.2116, "step": 149520 }, { "epoch": 6.19, "grad_norm": 0.81640625, "learning_rate": 0.00044928707655188195, "loss": 0.2278, "step": 149530 }, { "epoch": 6.19, "grad_norm": 0.51953125, "learning_rate": 0.0004492805282141151, "loss": 0.1852, "step": 149540 }, { "epoch": 6.19, "grad_norm": 0.412109375, "learning_rate": 0.0004492739795013239, "loss": 0.143, "step": 149550 }, { "epoch": 6.19, "grad_norm": 0.3984375, "learning_rate": 0.00044926743041352046, "loss": 0.2115, "step": 149560 }, { "epoch": 6.2, "grad_norm": 1.1796875, "learning_rate": 0.00044926088095071737, "loss": 0.1908, "step": 149570 }, { "epoch": 6.2, "grad_norm": 0.68359375, "learning_rate": 0.0004492543311129268, "loss": 0.221, "step": 149580 }, { "epoch": 6.2, "grad_norm": 0.9296875, "learning_rate": 0.00044924778090016105, "loss": 0.2466, "step": 149590 }, { "epoch": 6.2, "grad_norm": 2.03125, "learning_rate": 0.0004492412303124325, "loss": 0.23, "step": 149600 }, { "epoch": 6.2, "grad_norm": 0.482421875, "learning_rate": 0.0004492346793497535, "loss": 0.2115, "step": 149610 }, { "epoch": 6.2, "grad_norm": 0.3203125, "learning_rate": 0.00044922812801213635, "loss": 0.1849, "step": 149620 }, { "epoch": 6.2, "grad_norm": 0.828125, "learning_rate": 0.00044922157629959336, "loss": 0.1798, "step": 149630 }, { "epoch": 6.2, "grad_norm": 0.443359375, "learning_rate": 0.0004492150242121369, "loss": 0.1421, "step": 149640 }, { "epoch": 6.2, "grad_norm": 0.0, "learning_rate": 0.00044920847174977925, "loss": 0.1771, "step": 149650 }, { "epoch": 6.2, "grad_norm": 0.92578125, "learning_rate": 0.00044920191891253275, "loss": 0.1822, "step": 149660 }, { "epoch": 6.2, "grad_norm": 0.462890625, "learning_rate": 0.00044919536570040973, "loss": 0.1631, "step": 149670 }, { "epoch": 6.2, "grad_norm": 0.3828125, "learning_rate": 0.0004491888121134227, "loss": 0.1848, "step": 149680 }, { "epoch": 6.2, "grad_norm": 0.765625, "learning_rate": 0.0004491822581515837, "loss": 0.1954, "step": 149690 }, { "epoch": 6.2, "grad_norm": 1.2421875, "learning_rate": 0.00044917570381490524, "loss": 0.2259, "step": 149700 }, { "epoch": 6.2, "grad_norm": 0.462890625, "learning_rate": 0.00044916914910339957, "loss": 0.2031, "step": 149710 }, { "epoch": 6.2, "grad_norm": 0.474609375, "learning_rate": 0.0004491625940170791, "loss": 0.1561, "step": 149720 }, { "epoch": 6.2, "grad_norm": 1.1171875, "learning_rate": 0.00044915603855595615, "loss": 0.1851, "step": 149730 }, { "epoch": 6.2, "grad_norm": 0.890625, "learning_rate": 0.000449149482720043, "loss": 0.1721, "step": 149740 }, { "epoch": 6.2, "grad_norm": 0.46484375, "learning_rate": 0.00044914292650935207, "loss": 0.1529, "step": 149750 }, { "epoch": 6.2, "grad_norm": 1.578125, "learning_rate": 0.0004491363699238956, "loss": 0.1713, "step": 149760 }, { "epoch": 6.2, "grad_norm": 2.484375, "learning_rate": 0.0004491298129636861, "loss": 0.1826, "step": 149770 }, { "epoch": 6.2, "grad_norm": 1.4765625, "learning_rate": 0.00044912325562873574, "loss": 0.1962, "step": 149780 }, { "epoch": 6.2, "grad_norm": 0.98046875, "learning_rate": 0.0004491166979190569, "loss": 0.1835, "step": 149790 }, { "epoch": 6.2, "grad_norm": 0.94140625, "learning_rate": 0.00044911013983466193, "loss": 0.2303, "step": 149800 }, { "epoch": 6.21, "grad_norm": 0.8359375, "learning_rate": 0.0004491035813755632, "loss": 0.2617, "step": 149810 }, { "epoch": 6.21, "grad_norm": 0.546875, "learning_rate": 0.00044909702254177305, "loss": 0.184, "step": 149820 }, { "epoch": 6.21, "grad_norm": 0.89453125, "learning_rate": 0.0004490904633333037, "loss": 0.241, "step": 149830 }, { "epoch": 6.21, "grad_norm": 0.443359375, "learning_rate": 0.0004490839037501677, "loss": 0.2085, "step": 149840 }, { "epoch": 6.21, "grad_norm": 0.6171875, "learning_rate": 0.0004490773437923773, "loss": 0.197, "step": 149850 }, { "epoch": 6.21, "grad_norm": 0.9609375, "learning_rate": 0.0004490707834599448, "loss": 0.1759, "step": 149860 }, { "epoch": 6.21, "grad_norm": 0.796875, "learning_rate": 0.0004490642227528826, "loss": 0.1968, "step": 149870 }, { "epoch": 6.21, "grad_norm": 0.8203125, "learning_rate": 0.000449057661671203, "loss": 0.1868, "step": 149880 }, { "epoch": 6.21, "grad_norm": 0.46875, "learning_rate": 0.0004490511002149184, "loss": 0.1668, "step": 149890 }, { "epoch": 6.21, "grad_norm": 0.4921875, "learning_rate": 0.00044904453838404113, "loss": 0.199, "step": 149900 }, { "epoch": 6.21, "grad_norm": 0.36328125, "learning_rate": 0.00044903797617858355, "loss": 0.213, "step": 149910 }, { "epoch": 6.21, "grad_norm": 0.87109375, "learning_rate": 0.0004490314135985579, "loss": 0.2263, "step": 149920 }, { "epoch": 6.21, "grad_norm": 1.578125, "learning_rate": 0.00044902485064397677, "loss": 0.2432, "step": 149930 }, { "epoch": 6.21, "grad_norm": 0.59765625, "learning_rate": 0.0004490182873148523, "loss": 0.1481, "step": 149940 }, { "epoch": 6.21, "grad_norm": 1.015625, "learning_rate": 0.00044901172361119687, "loss": 0.209, "step": 149950 }, { "epoch": 6.21, "grad_norm": 0.486328125, "learning_rate": 0.00044900515953302285, "loss": 0.1906, "step": 149960 }, { "epoch": 6.21, "grad_norm": 1.59375, "learning_rate": 0.0004489985950803427, "loss": 0.1701, "step": 149970 }, { "epoch": 6.21, "grad_norm": 0.6328125, "learning_rate": 0.00044899203025316857, "loss": 0.2231, "step": 149980 }, { "epoch": 6.21, "grad_norm": 0.400390625, "learning_rate": 0.00044898546505151303, "loss": 0.1802, "step": 149990 }, { "epoch": 6.21, "grad_norm": 0.8515625, "learning_rate": 0.00044897889947538827, "loss": 0.2123, "step": 150000 }, { "epoch": 6.21, "grad_norm": 1.1171875, "learning_rate": 0.0004489723335248067, "loss": 0.2073, "step": 150010 }, { "epoch": 6.21, "grad_norm": 1.09375, "learning_rate": 0.00044896576719978076, "loss": 0.2543, "step": 150020 }, { "epoch": 6.21, "grad_norm": 1.453125, "learning_rate": 0.0004489592005003227, "loss": 0.2283, "step": 150030 }, { "epoch": 6.21, "grad_norm": 1.1640625, "learning_rate": 0.0004489526334264449, "loss": 0.1785, "step": 150040 }, { "epoch": 6.22, "grad_norm": 0.828125, "learning_rate": 0.0004489460659781597, "loss": 0.1677, "step": 150050 }, { "epoch": 6.22, "grad_norm": 0.50390625, "learning_rate": 0.0004489394981554795, "loss": 0.1914, "step": 150060 }, { "epoch": 6.22, "grad_norm": 0.6171875, "learning_rate": 0.00044893292995841673, "loss": 0.2257, "step": 150070 }, { "epoch": 6.22, "grad_norm": 0.546875, "learning_rate": 0.00044892636138698364, "loss": 0.2391, "step": 150080 }, { "epoch": 6.22, "grad_norm": 0.95703125, "learning_rate": 0.0004489197924411925, "loss": 0.2285, "step": 150090 }, { "epoch": 6.22, "grad_norm": 0.46875, "learning_rate": 0.0004489132231210559, "loss": 0.2337, "step": 150100 }, { "epoch": 6.22, "grad_norm": 0.71875, "learning_rate": 0.0004489066534265861, "loss": 0.222, "step": 150110 }, { "epoch": 6.22, "grad_norm": 0.7734375, "learning_rate": 0.0004489000833577954, "loss": 0.2121, "step": 150120 }, { "epoch": 6.22, "grad_norm": 0.41796875, "learning_rate": 0.00044889351291469624, "loss": 0.2094, "step": 150130 }, { "epoch": 6.22, "grad_norm": 0.98046875, "learning_rate": 0.000448886942097301, "loss": 0.1791, "step": 150140 }, { "epoch": 6.22, "grad_norm": 1.0390625, "learning_rate": 0.000448880370905622, "loss": 0.2398, "step": 150150 }, { "epoch": 6.22, "grad_norm": 1.2265625, "learning_rate": 0.0004488737993396716, "loss": 0.1814, "step": 150160 }, { "epoch": 6.22, "grad_norm": 0.8671875, "learning_rate": 0.00044886722739946225, "loss": 0.2434, "step": 150170 }, { "epoch": 6.22, "grad_norm": 0.58203125, "learning_rate": 0.0004488606550850062, "loss": 0.2157, "step": 150180 }, { "epoch": 6.22, "grad_norm": 0.447265625, "learning_rate": 0.00044885408239631584, "loss": 0.1681, "step": 150190 }, { "epoch": 6.22, "grad_norm": 0.328125, "learning_rate": 0.0004488475093334036, "loss": 0.1895, "step": 150200 }, { "epoch": 6.22, "grad_norm": 0.8125, "learning_rate": 0.00044884093589628185, "loss": 0.2053, "step": 150210 }, { "epoch": 6.22, "grad_norm": 0.51171875, "learning_rate": 0.0004488343620849629, "loss": 0.1977, "step": 150220 }, { "epoch": 6.22, "grad_norm": 1.1484375, "learning_rate": 0.00044882778789945917, "loss": 0.2179, "step": 150230 }, { "epoch": 6.22, "grad_norm": 0.7734375, "learning_rate": 0.000448821213339783, "loss": 0.2109, "step": 150240 }, { "epoch": 6.22, "grad_norm": 0.412109375, "learning_rate": 0.00044881463840594683, "loss": 0.1969, "step": 150250 }, { "epoch": 6.22, "grad_norm": 0.671875, "learning_rate": 0.0004488080630979629, "loss": 0.2253, "step": 150260 }, { "epoch": 6.22, "grad_norm": 1.0, "learning_rate": 0.0004488014874158437, "loss": 0.2053, "step": 150270 }, { "epoch": 6.22, "grad_norm": 0.73828125, "learning_rate": 0.0004487949113596016, "loss": 0.244, "step": 150280 }, { "epoch": 6.22, "grad_norm": 0.404296875, "learning_rate": 0.00044878833492924887, "loss": 0.1863, "step": 150290 }, { "epoch": 6.23, "grad_norm": 0.267578125, "learning_rate": 0.00044878175812479806, "loss": 0.2051, "step": 150300 }, { "epoch": 6.23, "grad_norm": 0.58203125, "learning_rate": 0.00044877518094626134, "loss": 0.1466, "step": 150310 }, { "epoch": 6.23, "grad_norm": 0.62890625, "learning_rate": 0.00044876860339365123, "loss": 0.2236, "step": 150320 }, { "epoch": 6.23, "grad_norm": 0.416015625, "learning_rate": 0.0004487620254669801, "loss": 0.1822, "step": 150330 }, { "epoch": 6.23, "grad_norm": 0.61328125, "learning_rate": 0.00044875544716626025, "loss": 0.2213, "step": 150340 }, { "epoch": 6.23, "grad_norm": 1.078125, "learning_rate": 0.0004487488684915042, "loss": 0.1752, "step": 150350 }, { "epoch": 6.23, "grad_norm": 0.7265625, "learning_rate": 0.0004487422894427242, "loss": 0.1715, "step": 150360 }, { "epoch": 6.23, "grad_norm": 0.0, "learning_rate": 0.0004487357100199327, "loss": 0.1678, "step": 150370 }, { "epoch": 6.23, "grad_norm": 2.0, "learning_rate": 0.000448729130223142, "loss": 0.2207, "step": 150380 }, { "epoch": 6.23, "grad_norm": 1.21875, "learning_rate": 0.0004487225500523646, "loss": 0.1894, "step": 150390 }, { "epoch": 6.23, "grad_norm": 0.8125, "learning_rate": 0.00044871596950761273, "loss": 0.2157, "step": 150400 }, { "epoch": 6.23, "grad_norm": 0.578125, "learning_rate": 0.000448709388588899, "loss": 0.2017, "step": 150410 }, { "epoch": 6.23, "grad_norm": 0.6875, "learning_rate": 0.00044870280729623555, "loss": 0.1972, "step": 150420 }, { "epoch": 6.23, "grad_norm": 1.015625, "learning_rate": 0.0004486962256296349, "loss": 0.2085, "step": 150430 }, { "epoch": 6.23, "grad_norm": 0.6171875, "learning_rate": 0.0004486896435891094, "loss": 0.1998, "step": 150440 }, { "epoch": 6.23, "grad_norm": 0.359375, "learning_rate": 0.0004486830611746715, "loss": 0.27, "step": 150450 }, { "epoch": 6.23, "grad_norm": 0.51171875, "learning_rate": 0.0004486764783863335, "loss": 0.2076, "step": 150460 }, { "epoch": 6.23, "grad_norm": 1.6484375, "learning_rate": 0.0004486698952241078, "loss": 0.203, "step": 150470 }, { "epoch": 6.23, "grad_norm": 0.71875, "learning_rate": 0.00044866331168800686, "loss": 0.2045, "step": 150480 }, { "epoch": 6.23, "grad_norm": 1.5859375, "learning_rate": 0.000448656727778043, "loss": 0.253, "step": 150490 }, { "epoch": 6.23, "grad_norm": 0.6484375, "learning_rate": 0.00044865014349422863, "loss": 0.2289, "step": 150500 }, { "epoch": 6.23, "grad_norm": 0.5546875, "learning_rate": 0.0004486435588365761, "loss": 0.2335, "step": 150510 }, { "epoch": 6.23, "grad_norm": 0.72265625, "learning_rate": 0.0004486369738050979, "loss": 0.2047, "step": 150520 }, { "epoch": 6.23, "grad_norm": 1.3515625, "learning_rate": 0.0004486303883998063, "loss": 0.1818, "step": 150530 }, { "epoch": 6.24, "grad_norm": 0.34765625, "learning_rate": 0.00044862380262071386, "loss": 0.235, "step": 150540 }, { "epoch": 6.24, "grad_norm": 0.70703125, "learning_rate": 0.00044861721646783276, "loss": 0.2171, "step": 150550 }, { "epoch": 6.24, "grad_norm": 0.796875, "learning_rate": 0.0004486106299411756, "loss": 0.221, "step": 150560 }, { "epoch": 6.24, "grad_norm": 0.671875, "learning_rate": 0.0004486040430407546, "loss": 0.2203, "step": 150570 }, { "epoch": 6.24, "grad_norm": 0.72265625, "learning_rate": 0.00044859745576658237, "loss": 0.2091, "step": 150580 }, { "epoch": 6.24, "grad_norm": 0.4375, "learning_rate": 0.00044859086811867104, "loss": 0.1903, "step": 150590 }, { "epoch": 6.24, "grad_norm": 0.671875, "learning_rate": 0.0004485842800970332, "loss": 0.1935, "step": 150600 }, { "epoch": 6.24, "grad_norm": 1.1875, "learning_rate": 0.00044857769170168115, "loss": 0.2245, "step": 150610 }, { "epoch": 6.24, "grad_norm": 0.953125, "learning_rate": 0.0004485711029326274, "loss": 0.2059, "step": 150620 }, { "epoch": 6.24, "grad_norm": 0.490234375, "learning_rate": 0.00044856451378988426, "loss": 0.1944, "step": 150630 }, { "epoch": 6.24, "grad_norm": 1.0234375, "learning_rate": 0.0004485579242734641, "loss": 0.2052, "step": 150640 }, { "epoch": 6.24, "grad_norm": 0.625, "learning_rate": 0.00044855133438337943, "loss": 0.151, "step": 150650 }, { "epoch": 6.24, "grad_norm": 3.0, "learning_rate": 0.0004485447441196425, "loss": 0.2046, "step": 150660 }, { "epoch": 6.24, "grad_norm": 1.0234375, "learning_rate": 0.0004485381534822659, "loss": 0.195, "step": 150670 }, { "epoch": 6.24, "grad_norm": 0.55859375, "learning_rate": 0.0004485315624712619, "loss": 0.1786, "step": 150680 }, { "epoch": 6.24, "grad_norm": 0.1513671875, "learning_rate": 0.0004485249710866429, "loss": 0.256, "step": 150690 }, { "epoch": 6.24, "grad_norm": 0.494140625, "learning_rate": 0.00044851837932842145, "loss": 0.2347, "step": 150700 }, { "epoch": 6.24, "grad_norm": 1.359375, "learning_rate": 0.0004485117871966098, "loss": 0.2208, "step": 150710 }, { "epoch": 6.24, "grad_norm": 0.5, "learning_rate": 0.0004485051946912203, "loss": 0.1675, "step": 150720 }, { "epoch": 6.24, "grad_norm": 0.828125, "learning_rate": 0.0004484986018122656, "loss": 0.203, "step": 150730 }, { "epoch": 6.24, "grad_norm": 0.6953125, "learning_rate": 0.0004484920085597579, "loss": 0.1956, "step": 150740 }, { "epoch": 6.24, "grad_norm": 0.0016632080078125, "learning_rate": 0.00044848541493370966, "loss": 0.166, "step": 150750 }, { "epoch": 6.24, "grad_norm": 0.0, "learning_rate": 0.0004484788209341333, "loss": 0.178, "step": 150760 }, { "epoch": 6.24, "grad_norm": 0.71484375, "learning_rate": 0.0004484722265610413, "loss": 0.2365, "step": 150770 }, { "epoch": 6.25, "grad_norm": 0.408203125, "learning_rate": 0.00044846563181444597, "loss": 0.2221, "step": 150780 }, { "epoch": 6.25, "grad_norm": 0.7421875, "learning_rate": 0.00044845903669435975, "loss": 0.2248, "step": 150790 }, { "epoch": 6.25, "grad_norm": 0.71875, "learning_rate": 0.00044845244120079504, "loss": 0.2142, "step": 150800 }, { "epoch": 6.25, "grad_norm": 0.33203125, "learning_rate": 0.0004484458453337643, "loss": 0.1947, "step": 150810 }, { "epoch": 6.25, "grad_norm": 1.3984375, "learning_rate": 0.0004484392490932799, "loss": 0.1771, "step": 150820 }, { "epoch": 6.25, "grad_norm": 0.5234375, "learning_rate": 0.0004484326524793542, "loss": 0.2175, "step": 150830 }, { "epoch": 6.25, "grad_norm": 0.66796875, "learning_rate": 0.00044842605549199975, "loss": 0.2316, "step": 150840 }, { "epoch": 6.25, "grad_norm": 0.87109375, "learning_rate": 0.00044841945813122884, "loss": 0.2206, "step": 150850 }, { "epoch": 6.25, "grad_norm": 0.73828125, "learning_rate": 0.000448412860397054, "loss": 0.2015, "step": 150860 }, { "epoch": 6.25, "grad_norm": 0.66796875, "learning_rate": 0.00044840626228948754, "loss": 0.2206, "step": 150870 }, { "epoch": 6.25, "grad_norm": 0.3671875, "learning_rate": 0.0004483996638085419, "loss": 0.1905, "step": 150880 }, { "epoch": 6.25, "grad_norm": 0.462890625, "learning_rate": 0.0004483930649542296, "loss": 0.2125, "step": 150890 }, { "epoch": 6.25, "grad_norm": 0.75390625, "learning_rate": 0.00044838646572656295, "loss": 0.2412, "step": 150900 }, { "epoch": 6.25, "grad_norm": 0.64453125, "learning_rate": 0.00044837986612555436, "loss": 0.2246, "step": 150910 }, { "epoch": 6.25, "grad_norm": 1.4375, "learning_rate": 0.00044837326615121626, "loss": 0.2109, "step": 150920 }, { "epoch": 6.25, "grad_norm": 0.4765625, "learning_rate": 0.0004483666658035611, "loss": 0.1893, "step": 150930 }, { "epoch": 6.25, "grad_norm": 0.8046875, "learning_rate": 0.0004483600650826014, "loss": 0.2106, "step": 150940 }, { "epoch": 6.25, "grad_norm": 0.462890625, "learning_rate": 0.00044835346398834945, "loss": 0.2346, "step": 150950 }, { "epoch": 6.25, "grad_norm": 0.59375, "learning_rate": 0.0004483468625208176, "loss": 0.1935, "step": 150960 }, { "epoch": 6.25, "grad_norm": 1.359375, "learning_rate": 0.0004483402606800184, "loss": 0.2191, "step": 150970 }, { "epoch": 6.25, "grad_norm": 0.44140625, "learning_rate": 0.0004483336584659643, "loss": 0.1991, "step": 150980 }, { "epoch": 6.25, "grad_norm": 0.439453125, "learning_rate": 0.0004483270558786676, "loss": 0.1852, "step": 150990 }, { "epoch": 6.25, "grad_norm": 0.4765625, "learning_rate": 0.0004483204529181409, "loss": 0.2044, "step": 151000 }, { "epoch": 6.25, "grad_norm": 0.984375, "learning_rate": 0.0004483138495843965, "loss": 0.1672, "step": 151010 }, { "epoch": 6.26, "grad_norm": 0.6640625, "learning_rate": 0.0004483072458774468, "loss": 0.2276, "step": 151020 }, { "epoch": 6.26, "grad_norm": 0.53515625, "learning_rate": 0.0004483006417973043, "loss": 0.1782, "step": 151030 }, { "epoch": 6.26, "grad_norm": 0.5703125, "learning_rate": 0.00044829403734398144, "loss": 0.2444, "step": 151040 }, { "epoch": 6.26, "grad_norm": 1.46875, "learning_rate": 0.0004482874325174906, "loss": 0.2087, "step": 151050 }, { "epoch": 6.26, "grad_norm": 0.3671875, "learning_rate": 0.0004482808273178442, "loss": 0.2433, "step": 151060 }, { "epoch": 6.26, "grad_norm": 0.5078125, "learning_rate": 0.00044827422174505474, "loss": 0.1968, "step": 151070 }, { "epoch": 6.26, "grad_norm": 0.90625, "learning_rate": 0.0004482676157991346, "loss": 0.2177, "step": 151080 }, { "epoch": 6.26, "grad_norm": 0.625, "learning_rate": 0.0004482610094800961, "loss": 0.2373, "step": 151090 }, { "epoch": 6.26, "grad_norm": 0.451171875, "learning_rate": 0.00044825440278795195, "loss": 0.2194, "step": 151100 }, { "epoch": 6.26, "grad_norm": 0.76171875, "learning_rate": 0.00044824779572271437, "loss": 0.2069, "step": 151110 }, { "epoch": 6.26, "grad_norm": 2.328125, "learning_rate": 0.00044824118828439577, "loss": 0.2173, "step": 151120 }, { "epoch": 6.26, "grad_norm": 0.76171875, "learning_rate": 0.00044823458047300874, "loss": 0.1965, "step": 151130 }, { "epoch": 6.26, "grad_norm": 0.52734375, "learning_rate": 0.0004482279722885656, "loss": 0.202, "step": 151140 }, { "epoch": 6.26, "grad_norm": 0.5546875, "learning_rate": 0.00044822136373107884, "loss": 0.2482, "step": 151150 }, { "epoch": 6.26, "grad_norm": 0.6796875, "learning_rate": 0.000448214754800561, "loss": 0.1803, "step": 151160 }, { "epoch": 6.26, "grad_norm": 0.734375, "learning_rate": 0.0004482081454970242, "loss": 0.2256, "step": 151170 }, { "epoch": 6.26, "grad_norm": 1.2890625, "learning_rate": 0.00044820153582048115, "loss": 0.2481, "step": 151180 }, { "epoch": 6.26, "grad_norm": 0.84375, "learning_rate": 0.00044819492577094424, "loss": 0.2086, "step": 151190 }, { "epoch": 6.26, "grad_norm": 0.4140625, "learning_rate": 0.00044818831534842584, "loss": 0.174, "step": 151200 }, { "epoch": 6.26, "grad_norm": 0.357421875, "learning_rate": 0.0004481817045529385, "loss": 0.2132, "step": 151210 }, { "epoch": 6.26, "grad_norm": 0.92578125, "learning_rate": 0.0004481750933844945, "loss": 0.2157, "step": 151220 }, { "epoch": 6.26, "grad_norm": 0.58203125, "learning_rate": 0.0004481684818431064, "loss": 0.2395, "step": 151230 }, { "epoch": 6.26, "grad_norm": 3.140625, "learning_rate": 0.0004481618699287867, "loss": 0.2322, "step": 151240 }, { "epoch": 6.26, "grad_norm": 0.6796875, "learning_rate": 0.00044815525764154774, "loss": 0.2035, "step": 151250 }, { "epoch": 6.27, "grad_norm": 0.85546875, "learning_rate": 0.0004481486449814019, "loss": 0.2066, "step": 151260 }, { "epoch": 6.27, "grad_norm": 1.03125, "learning_rate": 0.00044814203194836177, "loss": 0.2372, "step": 151270 }, { "epoch": 6.27, "grad_norm": 0.52734375, "learning_rate": 0.0004481354185424397, "loss": 0.2028, "step": 151280 }, { "epoch": 6.27, "grad_norm": 0.1337890625, "learning_rate": 0.0004481288047636482, "loss": 0.1921, "step": 151290 }, { "epoch": 6.27, "grad_norm": 0.6171875, "learning_rate": 0.00044812219061199967, "loss": 0.1819, "step": 151300 }, { "epoch": 6.27, "grad_norm": 0.82421875, "learning_rate": 0.0004481155760875066, "loss": 0.2251, "step": 151310 }, { "epoch": 6.27, "grad_norm": 0.64453125, "learning_rate": 0.0004481089611901814, "loss": 0.2528, "step": 151320 }, { "epoch": 6.27, "grad_norm": 0.7265625, "learning_rate": 0.0004481023459200365, "loss": 0.2287, "step": 151330 }, { "epoch": 6.27, "grad_norm": 1.078125, "learning_rate": 0.0004480957302770844, "loss": 0.2066, "step": 151340 }, { "epoch": 6.27, "grad_norm": 0.98828125, "learning_rate": 0.00044808911426133746, "loss": 0.1932, "step": 151350 }, { "epoch": 6.27, "grad_norm": 0.7109375, "learning_rate": 0.0004480824978728083, "loss": 0.1821, "step": 151360 }, { "epoch": 6.27, "grad_norm": 0.443359375, "learning_rate": 0.0004480758811115092, "loss": 0.1965, "step": 151370 }, { "epoch": 6.27, "grad_norm": 1.0625, "learning_rate": 0.00044806926397745274, "loss": 0.222, "step": 151380 }, { "epoch": 6.27, "grad_norm": 0.62109375, "learning_rate": 0.0004480626464706513, "loss": 0.2071, "step": 151390 }, { "epoch": 6.27, "grad_norm": 0.66796875, "learning_rate": 0.00044805602859111733, "loss": 0.2221, "step": 151400 }, { "epoch": 6.27, "grad_norm": 0.9140625, "learning_rate": 0.0004480494103388633, "loss": 0.2125, "step": 151410 }, { "epoch": 6.27, "grad_norm": 0.46484375, "learning_rate": 0.00044804279171390174, "loss": 0.1873, "step": 151420 }, { "epoch": 6.27, "grad_norm": 0.515625, "learning_rate": 0.0004480361727162449, "loss": 0.2082, "step": 151430 }, { "epoch": 6.27, "grad_norm": 0.453125, "learning_rate": 0.00044802955334590546, "loss": 0.1665, "step": 151440 }, { "epoch": 6.27, "grad_norm": 1.5859375, "learning_rate": 0.00044802293360289574, "loss": 0.2425, "step": 151450 }, { "epoch": 6.27, "grad_norm": 0.494140625, "learning_rate": 0.0004480163134872283, "loss": 0.1804, "step": 151460 }, { "epoch": 6.27, "grad_norm": 1.375, "learning_rate": 0.0004480096929989155, "loss": 0.1988, "step": 151470 }, { "epoch": 6.27, "grad_norm": 0.0, "learning_rate": 0.00044800307213796985, "loss": 0.2481, "step": 151480 }, { "epoch": 6.27, "grad_norm": 0.0, "learning_rate": 0.00044799645090440375, "loss": 0.2219, "step": 151490 }, { "epoch": 6.28, "grad_norm": 0.76171875, "learning_rate": 0.00044798982929822984, "loss": 0.1684, "step": 151500 }, { "epoch": 6.28, "grad_norm": 0.62109375, "learning_rate": 0.00044798320731946037, "loss": 0.2068, "step": 151510 }, { "epoch": 6.28, "grad_norm": 0.0, "learning_rate": 0.0004479765849681079, "loss": 0.2306, "step": 151520 }, { "epoch": 6.28, "grad_norm": 0.69921875, "learning_rate": 0.0004479699622441848, "loss": 0.2529, "step": 151530 }, { "epoch": 6.28, "grad_norm": 0.57421875, "learning_rate": 0.0004479633391477037, "loss": 0.2169, "step": 151540 }, { "epoch": 6.28, "grad_norm": 0.890625, "learning_rate": 0.00044795671567867697, "loss": 0.2042, "step": 151550 }, { "epoch": 6.28, "grad_norm": 0.55859375, "learning_rate": 0.00044795009183711703, "loss": 0.2359, "step": 151560 }, { "epoch": 6.28, "grad_norm": 0.73828125, "learning_rate": 0.00044794346762303645, "loss": 0.1882, "step": 151570 }, { "epoch": 6.28, "grad_norm": 0.51953125, "learning_rate": 0.00044793684303644763, "loss": 0.2217, "step": 151580 }, { "epoch": 6.28, "grad_norm": 1.328125, "learning_rate": 0.000447930218077363, "loss": 0.1918, "step": 151590 }, { "epoch": 6.28, "grad_norm": 1.09375, "learning_rate": 0.0004479235927457951, "loss": 0.1478, "step": 151600 }, { "epoch": 6.28, "grad_norm": 0.9375, "learning_rate": 0.0004479169670417564, "loss": 0.1782, "step": 151610 }, { "epoch": 6.28, "grad_norm": 0.515625, "learning_rate": 0.00044791034096525927, "loss": 0.2065, "step": 151620 }, { "epoch": 6.28, "grad_norm": 1.296875, "learning_rate": 0.00044790371451631627, "loss": 0.2354, "step": 151630 }, { "epoch": 6.28, "grad_norm": 0.50390625, "learning_rate": 0.0004478970876949399, "loss": 0.2055, "step": 151640 }, { "epoch": 6.28, "grad_norm": 1.0, "learning_rate": 0.0004478904605011426, "loss": 0.1576, "step": 151650 }, { "epoch": 6.28, "grad_norm": 0.59765625, "learning_rate": 0.00044788383293493673, "loss": 0.1925, "step": 151660 }, { "epoch": 6.28, "grad_norm": 1.0703125, "learning_rate": 0.00044787720499633497, "loss": 0.2204, "step": 151670 }, { "epoch": 6.28, "grad_norm": 1.203125, "learning_rate": 0.0004478705766853496, "loss": 0.1956, "step": 151680 }, { "epoch": 6.28, "grad_norm": 1.203125, "learning_rate": 0.0004478639480019932, "loss": 0.2101, "step": 151690 }, { "epoch": 6.28, "grad_norm": 1.1015625, "learning_rate": 0.00044785731894627826, "loss": 0.2091, "step": 151700 }, { "epoch": 6.28, "grad_norm": 0.5703125, "learning_rate": 0.0004478506895182171, "loss": 0.1688, "step": 151710 }, { "epoch": 6.28, "grad_norm": 0.796875, "learning_rate": 0.0004478440597178224, "loss": 0.1761, "step": 151720 }, { "epoch": 6.28, "grad_norm": 0.765625, "learning_rate": 0.0004478374295451065, "loss": 0.2197, "step": 151730 }, { "epoch": 6.29, "grad_norm": 1.15625, "learning_rate": 0.00044783079900008194, "loss": 0.2229, "step": 151740 }, { "epoch": 6.29, "grad_norm": 0.61328125, "learning_rate": 0.0004478241680827612, "loss": 0.1909, "step": 151750 }, { "epoch": 6.29, "grad_norm": 0.875, "learning_rate": 0.0004478175367931567, "loss": 0.192, "step": 151760 }, { "epoch": 6.29, "grad_norm": 0.408203125, "learning_rate": 0.000447810905131281, "loss": 0.1789, "step": 151770 }, { "epoch": 6.29, "grad_norm": 0.494140625, "learning_rate": 0.00044780427309714646, "loss": 0.1794, "step": 151780 }, { "epoch": 6.29, "grad_norm": 0.0, "learning_rate": 0.0004477976406907657, "loss": 0.2009, "step": 151790 }, { "epoch": 6.29, "grad_norm": 0.546875, "learning_rate": 0.0004477910079121511, "loss": 0.1768, "step": 151800 }, { "epoch": 6.29, "grad_norm": 0.8046875, "learning_rate": 0.0004477843747613153, "loss": 0.1982, "step": 151810 }, { "epoch": 6.29, "grad_norm": 0.5703125, "learning_rate": 0.00044777774123827053, "loss": 0.2249, "step": 151820 }, { "epoch": 6.29, "grad_norm": 2.03125, "learning_rate": 0.00044777110734302945, "loss": 0.2337, "step": 151830 }, { "epoch": 6.29, "grad_norm": 0.59765625, "learning_rate": 0.00044776447307560453, "loss": 0.1868, "step": 151840 }, { "epoch": 6.29, "grad_norm": 1.2421875, "learning_rate": 0.0004477578384360082, "loss": 0.2038, "step": 151850 }, { "epoch": 6.29, "grad_norm": 0.6484375, "learning_rate": 0.000447751203424253, "loss": 0.1949, "step": 151860 }, { "epoch": 6.29, "grad_norm": 0.5390625, "learning_rate": 0.00044774456804035133, "loss": 0.1894, "step": 151870 }, { "epoch": 6.29, "grad_norm": 1.234375, "learning_rate": 0.0004477379322843158, "loss": 0.1823, "step": 151880 }, { "epoch": 6.29, "grad_norm": 0.59375, "learning_rate": 0.00044773129615615883, "loss": 0.2172, "step": 151890 }, { "epoch": 6.29, "grad_norm": 0.57421875, "learning_rate": 0.00044772465965589294, "loss": 0.2071, "step": 151900 }, { "epoch": 6.29, "grad_norm": 0.5234375, "learning_rate": 0.00044771802278353054, "loss": 0.186, "step": 151910 }, { "epoch": 6.29, "grad_norm": 1.0546875, "learning_rate": 0.0004477113855390842, "loss": 0.2282, "step": 151920 }, { "epoch": 6.29, "grad_norm": 0.4140625, "learning_rate": 0.0004477047479225663, "loss": 0.1975, "step": 151930 }, { "epoch": 6.29, "grad_norm": 1.8515625, "learning_rate": 0.0004476981099339895, "loss": 0.1843, "step": 151940 }, { "epoch": 6.29, "grad_norm": 0.435546875, "learning_rate": 0.0004476914715733662, "loss": 0.2241, "step": 151950 }, { "epoch": 6.29, "grad_norm": 1.125, "learning_rate": 0.00044768483284070895, "loss": 0.1495, "step": 151960 }, { "epoch": 6.29, "grad_norm": 0.2353515625, "learning_rate": 0.0004476781937360301, "loss": 0.2125, "step": 151970 }, { "epoch": 6.29, "grad_norm": 0.828125, "learning_rate": 0.00044767155425934227, "loss": 0.186, "step": 151980 }, { "epoch": 6.3, "grad_norm": 0.69921875, "learning_rate": 0.00044766491441065794, "loss": 0.1468, "step": 151990 }, { "epoch": 6.3, "grad_norm": 0.66015625, "learning_rate": 0.00044765827418998954, "loss": 0.2213, "step": 152000 }, { "epoch": 6.3, "grad_norm": 0.455078125, "learning_rate": 0.0004476516335973497, "loss": 0.2045, "step": 152010 }, { "epoch": 6.3, "grad_norm": 0.4765625, "learning_rate": 0.0004476449926327507, "loss": 0.1973, "step": 152020 }, { "epoch": 6.3, "grad_norm": 2.0, "learning_rate": 0.0004476383512962052, "loss": 0.1972, "step": 152030 }, { "epoch": 6.3, "grad_norm": 0.65625, "learning_rate": 0.0004476317095877258, "loss": 0.2515, "step": 152040 }, { "epoch": 6.3, "grad_norm": 0.6171875, "learning_rate": 0.0004476250675073248, "loss": 0.2004, "step": 152050 }, { "epoch": 6.3, "grad_norm": 0.7578125, "learning_rate": 0.00044761842505501473, "loss": 0.224, "step": 152060 }, { "epoch": 6.3, "grad_norm": 1.3046875, "learning_rate": 0.0004476117822308081, "loss": 0.213, "step": 152070 }, { "epoch": 6.3, "grad_norm": 0.7109375, "learning_rate": 0.00044760513903471745, "loss": 0.2297, "step": 152080 }, { "epoch": 6.3, "grad_norm": 2.265625, "learning_rate": 0.0004475984954667553, "loss": 0.2457, "step": 152090 }, { "epoch": 6.3, "grad_norm": 0.5859375, "learning_rate": 0.0004475918515269341, "loss": 0.1795, "step": 152100 }, { "epoch": 6.3, "grad_norm": 0.4921875, "learning_rate": 0.0004475852072152664, "loss": 0.1838, "step": 152110 }, { "epoch": 6.3, "grad_norm": 0.462890625, "learning_rate": 0.0004475785625317647, "loss": 0.2003, "step": 152120 }, { "epoch": 6.3, "grad_norm": 0.435546875, "learning_rate": 0.0004475719174764414, "loss": 0.2045, "step": 152130 }, { "epoch": 6.3, "grad_norm": 0.64453125, "learning_rate": 0.00044756527204930905, "loss": 0.2094, "step": 152140 }, { "epoch": 6.3, "grad_norm": 0.640625, "learning_rate": 0.00044755862625038036, "loss": 0.2216, "step": 152150 }, { "epoch": 6.3, "grad_norm": 1.203125, "learning_rate": 0.00044755198007966757, "loss": 0.1368, "step": 152160 }, { "epoch": 6.3, "grad_norm": 1.0703125, "learning_rate": 0.0004475453335371833, "loss": 0.2213, "step": 152170 }, { "epoch": 6.3, "grad_norm": 0.70703125, "learning_rate": 0.00044753868662294, "loss": 0.209, "step": 152180 }, { "epoch": 6.3, "grad_norm": 1.296875, "learning_rate": 0.0004475320393369503, "loss": 0.1751, "step": 152190 }, { "epoch": 6.3, "grad_norm": 0.58984375, "learning_rate": 0.0004475253916792266, "loss": 0.1946, "step": 152200 }, { "epoch": 6.3, "grad_norm": 0.458984375, "learning_rate": 0.0004475187436497814, "loss": 0.226, "step": 152210 }, { "epoch": 6.3, "grad_norm": 0.7265625, "learning_rate": 0.00044751209524862735, "loss": 0.1933, "step": 152220 }, { "epoch": 6.31, "grad_norm": 0.3515625, "learning_rate": 0.0004475054464757767, "loss": 0.1801, "step": 152230 }, { "epoch": 6.31, "grad_norm": 0.73046875, "learning_rate": 0.0004474987973312423, "loss": 0.1642, "step": 152240 }, { "epoch": 6.31, "grad_norm": 0.88671875, "learning_rate": 0.00044749214781503643, "loss": 0.2292, "step": 152250 }, { "epoch": 6.31, "grad_norm": 0.75, "learning_rate": 0.0004474854979271717, "loss": 0.1755, "step": 152260 }, { "epoch": 6.31, "grad_norm": 0.330078125, "learning_rate": 0.0004474788476676606, "loss": 0.1557, "step": 152270 }, { "epoch": 6.31, "grad_norm": 0.671875, "learning_rate": 0.00044747219703651555, "loss": 0.1783, "step": 152280 }, { "epoch": 6.31, "grad_norm": 0.80859375, "learning_rate": 0.0004474655460337492, "loss": 0.1665, "step": 152290 }, { "epoch": 6.31, "grad_norm": 0.33203125, "learning_rate": 0.000447458894659374, "loss": 0.1837, "step": 152300 }, { "epoch": 6.31, "grad_norm": 0.5703125, "learning_rate": 0.0004474522429134025, "loss": 0.1971, "step": 152310 }, { "epoch": 6.31, "grad_norm": 0.87890625, "learning_rate": 0.00044744559079584723, "loss": 0.1608, "step": 152320 }, { "epoch": 6.31, "grad_norm": 0.376953125, "learning_rate": 0.0004474389383067206, "loss": 0.1447, "step": 152330 }, { "epoch": 6.31, "grad_norm": 0.515625, "learning_rate": 0.00044743228544603524, "loss": 0.223, "step": 152340 }, { "epoch": 6.31, "grad_norm": 0.40234375, "learning_rate": 0.00044742563221380365, "loss": 0.155, "step": 152350 }, { "epoch": 6.31, "grad_norm": 0.69140625, "learning_rate": 0.0004474189786100384, "loss": 0.1923, "step": 152360 }, { "epoch": 6.31, "grad_norm": 0.53515625, "learning_rate": 0.0004474123246347519, "loss": 0.2206, "step": 152370 }, { "epoch": 6.31, "grad_norm": 0.828125, "learning_rate": 0.00044740567028795665, "loss": 0.1785, "step": 152380 }, { "epoch": 6.31, "grad_norm": 0.6953125, "learning_rate": 0.0004473990155696653, "loss": 0.2406, "step": 152390 }, { "epoch": 6.31, "grad_norm": 0.4921875, "learning_rate": 0.0004473923604798904, "loss": 0.2173, "step": 152400 }, { "epoch": 6.31, "grad_norm": 0.5703125, "learning_rate": 0.00044738570501864434, "loss": 0.2276, "step": 152410 }, { "epoch": 6.31, "grad_norm": 0.7109375, "learning_rate": 0.00044737904918593964, "loss": 0.2164, "step": 152420 }, { "epoch": 6.31, "grad_norm": 0.5078125, "learning_rate": 0.000447372392981789, "loss": 0.2466, "step": 152430 }, { "epoch": 6.31, "grad_norm": 0.78515625, "learning_rate": 0.00044736573640620476, "loss": 0.2208, "step": 152440 }, { "epoch": 6.31, "grad_norm": 0.7578125, "learning_rate": 0.0004473590794591995, "loss": 0.1849, "step": 152450 }, { "epoch": 6.31, "grad_norm": 0.453125, "learning_rate": 0.00044735242214078574, "loss": 0.2017, "step": 152460 }, { "epoch": 6.32, "grad_norm": 1.234375, "learning_rate": 0.00044734576445097613, "loss": 0.2249, "step": 152470 }, { "epoch": 6.32, "grad_norm": 0.7578125, "learning_rate": 0.0004473391063897831, "loss": 0.2026, "step": 152480 }, { "epoch": 6.32, "grad_norm": 0.87890625, "learning_rate": 0.0004473324479572191, "loss": 0.1573, "step": 152490 }, { "epoch": 6.32, "grad_norm": 0.75, "learning_rate": 0.0004473257891532968, "loss": 0.2193, "step": 152500 }, { "epoch": 6.32, "grad_norm": 0.703125, "learning_rate": 0.00044731912997802867, "loss": 0.1875, "step": 152510 }, { "epoch": 6.32, "grad_norm": 0.875, "learning_rate": 0.0004473124704314272, "loss": 0.1946, "step": 152520 }, { "epoch": 6.32, "grad_norm": 1.5, "learning_rate": 0.000447305810513505, "loss": 0.2048, "step": 152530 }, { "epoch": 6.32, "grad_norm": 0.6328125, "learning_rate": 0.0004472991502242746, "loss": 0.2146, "step": 152540 }, { "epoch": 6.32, "grad_norm": 0.734375, "learning_rate": 0.00044729248956374844, "loss": 0.2449, "step": 152550 }, { "epoch": 6.32, "grad_norm": 0.52734375, "learning_rate": 0.00044728582853193914, "loss": 0.1776, "step": 152560 }, { "epoch": 6.32, "grad_norm": 1.796875, "learning_rate": 0.0004472791671288593, "loss": 0.2177, "step": 152570 }, { "epoch": 6.32, "grad_norm": 1.1796875, "learning_rate": 0.0004472725053545212, "loss": 0.2195, "step": 152580 }, { "epoch": 6.32, "grad_norm": 0.55078125, "learning_rate": 0.0004472658432089377, "loss": 0.1815, "step": 152590 }, { "epoch": 6.32, "grad_norm": 1.28125, "learning_rate": 0.00044725918069212113, "loss": 0.1649, "step": 152600 }, { "epoch": 6.32, "grad_norm": 1.03125, "learning_rate": 0.00044725251780408415, "loss": 0.2286, "step": 152610 }, { "epoch": 6.32, "grad_norm": 0.67578125, "learning_rate": 0.0004472458545448391, "loss": 0.2023, "step": 152620 }, { "epoch": 6.32, "grad_norm": 0.72265625, "learning_rate": 0.0004472391909143988, "loss": 0.1837, "step": 152630 }, { "epoch": 6.32, "grad_norm": 0.875, "learning_rate": 0.00044723252691277555, "loss": 0.1946, "step": 152640 }, { "epoch": 6.32, "grad_norm": 1.2265625, "learning_rate": 0.00044722586253998197, "loss": 0.189, "step": 152650 }, { "epoch": 6.32, "grad_norm": 0.2275390625, "learning_rate": 0.0004472191977960307, "loss": 0.2011, "step": 152660 }, { "epoch": 6.32, "grad_norm": 0.384765625, "learning_rate": 0.00044721253268093414, "loss": 0.2712, "step": 152670 }, { "epoch": 6.32, "grad_norm": 1.65625, "learning_rate": 0.00044720586719470493, "loss": 0.1893, "step": 152680 }, { "epoch": 6.32, "grad_norm": 0.953125, "learning_rate": 0.00044719920133735555, "loss": 0.2144, "step": 152690 }, { "epoch": 6.32, "grad_norm": 0.51171875, "learning_rate": 0.00044719253510889855, "loss": 0.193, "step": 152700 }, { "epoch": 6.33, "grad_norm": 0.5078125, "learning_rate": 0.0004471858685093465, "loss": 0.215, "step": 152710 }, { "epoch": 6.33, "grad_norm": 1.015625, "learning_rate": 0.00044717920153871196, "loss": 0.1945, "step": 152720 }, { "epoch": 6.33, "grad_norm": 0.1943359375, "learning_rate": 0.00044717253419700745, "loss": 0.1996, "step": 152730 }, { "epoch": 6.33, "grad_norm": 0.859375, "learning_rate": 0.0004471658664842455, "loss": 0.2364, "step": 152740 }, { "epoch": 6.33, "grad_norm": 0.484375, "learning_rate": 0.0004471591984004387, "loss": 0.2218, "step": 152750 }, { "epoch": 6.33, "grad_norm": 0.494140625, "learning_rate": 0.00044715252994559953, "loss": 0.1998, "step": 152760 }, { "epoch": 6.33, "grad_norm": 0.7265625, "learning_rate": 0.00044714586111974065, "loss": 0.2081, "step": 152770 }, { "epoch": 6.33, "grad_norm": 0.404296875, "learning_rate": 0.00044713919192287455, "loss": 0.233, "step": 152780 }, { "epoch": 6.33, "grad_norm": 0.28515625, "learning_rate": 0.0004471325223550138, "loss": 0.1615, "step": 152790 }, { "epoch": 6.33, "grad_norm": 0.482421875, "learning_rate": 0.0004471258524161709, "loss": 0.2051, "step": 152800 }, { "epoch": 6.33, "grad_norm": 0.56640625, "learning_rate": 0.00044711918210635837, "loss": 0.1966, "step": 152810 }, { "epoch": 6.33, "grad_norm": 0.380859375, "learning_rate": 0.0004471125114255889, "loss": 0.1933, "step": 152820 }, { "epoch": 6.33, "grad_norm": 0.78515625, "learning_rate": 0.0004471058403738749, "loss": 0.2135, "step": 152830 }, { "epoch": 6.33, "grad_norm": 0.59375, "learning_rate": 0.00044709916895122916, "loss": 0.1841, "step": 152840 }, { "epoch": 6.33, "grad_norm": 0.515625, "learning_rate": 0.00044709249715766395, "loss": 0.1886, "step": 152850 }, { "epoch": 6.33, "grad_norm": 1.0625, "learning_rate": 0.00044708582499319193, "loss": 0.201, "step": 152860 }, { "epoch": 6.33, "grad_norm": 0.69140625, "learning_rate": 0.00044707915245782574, "loss": 0.2541, "step": 152870 }, { "epoch": 6.33, "grad_norm": 0.83984375, "learning_rate": 0.0004470724795515778, "loss": 0.2947, "step": 152880 }, { "epoch": 6.33, "grad_norm": 0.92578125, "learning_rate": 0.0004470658062744607, "loss": 0.26, "step": 152890 }, { "epoch": 6.33, "grad_norm": 0.578125, "learning_rate": 0.00044705913262648713, "loss": 0.2188, "step": 152900 }, { "epoch": 6.33, "grad_norm": 0.55859375, "learning_rate": 0.0004470524586076695, "loss": 0.1955, "step": 152910 }, { "epoch": 6.33, "grad_norm": 0.64453125, "learning_rate": 0.00044704578421802046, "loss": 0.216, "step": 152920 }, { "epoch": 6.33, "grad_norm": 0.9140625, "learning_rate": 0.0004470391094575525, "loss": 0.1748, "step": 152930 }, { "epoch": 6.33, "grad_norm": 0.490234375, "learning_rate": 0.0004470324343262782, "loss": 0.2114, "step": 152940 }, { "epoch": 6.34, "grad_norm": 0.64453125, "learning_rate": 0.00044702575882421016, "loss": 0.1845, "step": 152950 }, { "epoch": 6.34, "grad_norm": 0.55078125, "learning_rate": 0.00044701908295136095, "loss": 0.167, "step": 152960 }, { "epoch": 6.34, "grad_norm": 1.5078125, "learning_rate": 0.000447012406707743, "loss": 0.2088, "step": 152970 }, { "epoch": 6.34, "grad_norm": 0.427734375, "learning_rate": 0.0004470057300933691, "loss": 0.1939, "step": 152980 }, { "epoch": 6.34, "grad_norm": 0.69921875, "learning_rate": 0.00044699905310825164, "loss": 0.216, "step": 152990 }, { "epoch": 6.34, "grad_norm": 0.498046875, "learning_rate": 0.00044699237575240326, "loss": 0.1721, "step": 153000 }, { "epoch": 6.34, "grad_norm": 0.85546875, "learning_rate": 0.00044698569802583646, "loss": 0.2129, "step": 153010 }, { "epoch": 6.34, "grad_norm": 1.109375, "learning_rate": 0.00044697901992856386, "loss": 0.2057, "step": 153020 }, { "epoch": 6.34, "grad_norm": 0.1787109375, "learning_rate": 0.00044697234146059804, "loss": 0.2286, "step": 153030 }, { "epoch": 6.34, "grad_norm": 0.0, "learning_rate": 0.00044696566262195146, "loss": 0.2118, "step": 153040 }, { "epoch": 6.34, "grad_norm": 0.9921875, "learning_rate": 0.00044695898341263687, "loss": 0.1993, "step": 153050 }, { "epoch": 6.34, "grad_norm": 0.8671875, "learning_rate": 0.00044695230383266673, "loss": 0.1999, "step": 153060 }, { "epoch": 6.34, "grad_norm": 0.5625, "learning_rate": 0.00044694562388205363, "loss": 0.2062, "step": 153070 }, { "epoch": 6.34, "grad_norm": 0.5234375, "learning_rate": 0.00044693894356081013, "loss": 0.213, "step": 153080 }, { "epoch": 6.34, "grad_norm": 0.8984375, "learning_rate": 0.0004469322628689487, "loss": 0.2359, "step": 153090 }, { "epoch": 6.34, "grad_norm": 0.89453125, "learning_rate": 0.00044692558180648215, "loss": 0.148, "step": 153100 }, { "epoch": 6.34, "grad_norm": 0.53515625, "learning_rate": 0.0004469189003734229, "loss": 0.1762, "step": 153110 }, { "epoch": 6.34, "grad_norm": 1.4140625, "learning_rate": 0.00044691221856978347, "loss": 0.2247, "step": 153120 }, { "epoch": 6.34, "grad_norm": 1.0234375, "learning_rate": 0.0004469055363955766, "loss": 0.207, "step": 153130 }, { "epoch": 6.34, "grad_norm": 0.83203125, "learning_rate": 0.00044689885385081475, "loss": 0.2181, "step": 153140 }, { "epoch": 6.34, "grad_norm": 0.6484375, "learning_rate": 0.0004468921709355105, "loss": 0.186, "step": 153150 }, { "epoch": 6.34, "grad_norm": 0.41796875, "learning_rate": 0.00044688548764967645, "loss": 0.1725, "step": 153160 }, { "epoch": 6.34, "grad_norm": 0.3828125, "learning_rate": 0.00044687880399332515, "loss": 0.1546, "step": 153170 }, { "epoch": 6.34, "grad_norm": 1.3046875, "learning_rate": 0.0004468721199664693, "loss": 0.2076, "step": 153180 }, { "epoch": 6.35, "grad_norm": 1.25, "learning_rate": 0.0004468654355691213, "loss": 0.2621, "step": 153190 }, { "epoch": 6.35, "grad_norm": 0.57421875, "learning_rate": 0.0004468587508012938, "loss": 0.1679, "step": 153200 }, { "epoch": 6.35, "grad_norm": 0.41015625, "learning_rate": 0.0004468520656629994, "loss": 0.2031, "step": 153210 }, { "epoch": 6.35, "grad_norm": 1.171875, "learning_rate": 0.00044684538015425066, "loss": 0.2199, "step": 153220 }, { "epoch": 6.35, "grad_norm": 0.50390625, "learning_rate": 0.0004468386942750602, "loss": 0.208, "step": 153230 }, { "epoch": 6.35, "grad_norm": 0.51953125, "learning_rate": 0.00044683200802544054, "loss": 0.1993, "step": 153240 }, { "epoch": 6.35, "grad_norm": 1.9609375, "learning_rate": 0.00044682532140540433, "loss": 0.2408, "step": 153250 }, { "epoch": 6.35, "grad_norm": 0.361328125, "learning_rate": 0.0004468186344149641, "loss": 0.2132, "step": 153260 }, { "epoch": 6.35, "grad_norm": 0.43359375, "learning_rate": 0.00044681194705413247, "loss": 0.2532, "step": 153270 }, { "epoch": 6.35, "grad_norm": 0.6484375, "learning_rate": 0.00044680525932292207, "loss": 0.2348, "step": 153280 }, { "epoch": 6.35, "grad_norm": 0.390625, "learning_rate": 0.00044679857122134533, "loss": 0.2108, "step": 153290 }, { "epoch": 6.35, "grad_norm": 0.2099609375, "learning_rate": 0.00044679188274941495, "loss": 0.1584, "step": 153300 }, { "epoch": 6.35, "grad_norm": 1.15625, "learning_rate": 0.00044678519390714343, "loss": 0.1982, "step": 153310 }, { "epoch": 6.35, "grad_norm": 0.203125, "learning_rate": 0.00044677850469454364, "loss": 0.216, "step": 153320 }, { "epoch": 6.35, "grad_norm": 0.4453125, "learning_rate": 0.00044677181511162777, "loss": 0.193, "step": 153330 }, { "epoch": 6.35, "grad_norm": 0.73828125, "learning_rate": 0.0004467651251584086, "loss": 0.1393, "step": 153340 }, { "epoch": 6.35, "grad_norm": 0.453125, "learning_rate": 0.00044675843483489873, "loss": 0.1792, "step": 153350 }, { "epoch": 6.35, "grad_norm": 1.78125, "learning_rate": 0.0004467517441411109, "loss": 0.2177, "step": 153360 }, { "epoch": 6.35, "grad_norm": 0.62109375, "learning_rate": 0.0004467450530770573, "loss": 0.1739, "step": 153370 }, { "epoch": 6.35, "grad_norm": 1.140625, "learning_rate": 0.0004467383616427508, "loss": 0.1989, "step": 153380 }, { "epoch": 6.35, "grad_norm": 1.5, "learning_rate": 0.00044673166983820407, "loss": 0.2041, "step": 153390 }, { "epoch": 6.35, "grad_norm": 0.6171875, "learning_rate": 0.00044672497766342956, "loss": 0.228, "step": 153400 }, { "epoch": 6.35, "grad_norm": 0.52734375, "learning_rate": 0.0004467182851184398, "loss": 0.2265, "step": 153410 }, { "epoch": 6.35, "grad_norm": 0.482421875, "learning_rate": 0.0004467115922032475, "loss": 0.201, "step": 153420 }, { "epoch": 6.36, "grad_norm": 1.1640625, "learning_rate": 0.00044670489891786524, "loss": 0.2207, "step": 153430 }, { "epoch": 6.36, "grad_norm": 0.353515625, "learning_rate": 0.00044669820526230567, "loss": 0.226, "step": 153440 }, { "epoch": 6.36, "grad_norm": 0.859375, "learning_rate": 0.00044669151123658126, "loss": 0.222, "step": 153450 }, { "epoch": 6.36, "grad_norm": 0.828125, "learning_rate": 0.0004466848168407046, "loss": 0.1672, "step": 153460 }, { "epoch": 6.36, "grad_norm": 1.65625, "learning_rate": 0.0004466781220746884, "loss": 0.2157, "step": 153470 }, { "epoch": 6.36, "grad_norm": 0.67578125, "learning_rate": 0.0004466714269385452, "loss": 0.1933, "step": 153480 }, { "epoch": 6.36, "grad_norm": 1.5, "learning_rate": 0.0004466647314322877, "loss": 0.1872, "step": 153490 }, { "epoch": 6.36, "grad_norm": 0.75390625, "learning_rate": 0.0004466580355559283, "loss": 0.2174, "step": 153500 }, { "epoch": 6.36, "grad_norm": 0.75390625, "learning_rate": 0.00044665133930947977, "loss": 0.1804, "step": 153510 }, { "epoch": 6.36, "grad_norm": 0.494140625, "learning_rate": 0.0004466446426929547, "loss": 0.1931, "step": 153520 }, { "epoch": 6.36, "grad_norm": 0.796875, "learning_rate": 0.00044663794570636565, "loss": 0.2047, "step": 153530 }, { "epoch": 6.36, "grad_norm": 0.376953125, "learning_rate": 0.00044663124834972513, "loss": 0.2312, "step": 153540 }, { "epoch": 6.36, "grad_norm": 2.296875, "learning_rate": 0.00044662455062304587, "loss": 0.2207, "step": 153550 }, { "epoch": 6.36, "grad_norm": 1.1171875, "learning_rate": 0.00044661785252634044, "loss": 0.1931, "step": 153560 }, { "epoch": 6.36, "grad_norm": 1.25, "learning_rate": 0.0004466111540596215, "loss": 0.2121, "step": 153570 }, { "epoch": 6.36, "grad_norm": 0.53125, "learning_rate": 0.0004466044552229015, "loss": 0.2034, "step": 153580 }, { "epoch": 6.36, "grad_norm": 0.427734375, "learning_rate": 0.0004465977560161932, "loss": 0.1631, "step": 153590 }, { "epoch": 6.36, "grad_norm": 0.9609375, "learning_rate": 0.00044659105643950913, "loss": 0.193, "step": 153600 }, { "epoch": 6.36, "grad_norm": 0.7421875, "learning_rate": 0.000446584356492862, "loss": 0.1808, "step": 153610 }, { "epoch": 6.36, "grad_norm": 0.92578125, "learning_rate": 0.00044657765617626423, "loss": 0.2146, "step": 153620 }, { "epoch": 6.36, "grad_norm": 0.6875, "learning_rate": 0.0004465709554897286, "loss": 0.2095, "step": 153630 }, { "epoch": 6.36, "grad_norm": 0.671875, "learning_rate": 0.00044656425443326763, "loss": 0.1964, "step": 153640 }, { "epoch": 6.36, "grad_norm": 0.80859375, "learning_rate": 0.00044655755300689406, "loss": 0.204, "step": 153650 }, { "epoch": 6.36, "grad_norm": 1.828125, "learning_rate": 0.00044655085121062026, "loss": 0.2215, "step": 153660 }, { "epoch": 6.36, "grad_norm": 1.109375, "learning_rate": 0.00044654414904445907, "loss": 0.2583, "step": 153670 }, { "epoch": 6.37, "grad_norm": 0.75390625, "learning_rate": 0.000446537446508423, "loss": 0.2013, "step": 153680 }, { "epoch": 6.37, "grad_norm": 0.0, "learning_rate": 0.00044653074360252466, "loss": 0.199, "step": 153690 }, { "epoch": 6.37, "grad_norm": 0.640625, "learning_rate": 0.00044652404032677676, "loss": 0.1834, "step": 153700 }, { "epoch": 6.37, "grad_norm": 0.15625, "learning_rate": 0.0004465173366811918, "loss": 0.234, "step": 153710 }, { "epoch": 6.37, "grad_norm": 0.62890625, "learning_rate": 0.00044651063266578234, "loss": 0.2081, "step": 153720 }, { "epoch": 6.37, "grad_norm": 0.251953125, "learning_rate": 0.0004465039282805612, "loss": 0.2048, "step": 153730 }, { "epoch": 6.37, "grad_norm": 0.5546875, "learning_rate": 0.0004464972235255408, "loss": 0.2637, "step": 153740 }, { "epoch": 6.37, "grad_norm": 0.828125, "learning_rate": 0.000446490518400734, "loss": 0.2147, "step": 153750 }, { "epoch": 6.37, "grad_norm": 0.5859375, "learning_rate": 0.0004464838129061531, "loss": 0.1716, "step": 153760 }, { "epoch": 6.37, "grad_norm": 0.58203125, "learning_rate": 0.0004464771070418109, "loss": 0.1735, "step": 153770 }, { "epoch": 6.37, "grad_norm": 0.875, "learning_rate": 0.00044647040080772005, "loss": 0.1909, "step": 153780 }, { "epoch": 6.37, "grad_norm": 1.7578125, "learning_rate": 0.0004464636942038931, "loss": 0.2078, "step": 153790 }, { "epoch": 6.37, "grad_norm": 4.3125, "learning_rate": 0.0004464569872303428, "loss": 0.1855, "step": 153800 }, { "epoch": 6.37, "grad_norm": 0.84375, "learning_rate": 0.00044645027988708154, "loss": 0.1924, "step": 153810 }, { "epoch": 6.37, "grad_norm": 0.578125, "learning_rate": 0.00044644357217412214, "loss": 0.1983, "step": 153820 }, { "epoch": 6.37, "grad_norm": 0.197265625, "learning_rate": 0.0004464368640914771, "loss": 0.1827, "step": 153830 }, { "epoch": 6.37, "grad_norm": 0.8125, "learning_rate": 0.0004464301556391591, "loss": 0.2254, "step": 153840 }, { "epoch": 6.37, "grad_norm": 0.72265625, "learning_rate": 0.00044642344681718074, "loss": 0.1767, "step": 153850 }, { "epoch": 6.37, "grad_norm": 0.53125, "learning_rate": 0.0004464167376255547, "loss": 0.2504, "step": 153860 }, { "epoch": 6.37, "grad_norm": 1.546875, "learning_rate": 0.0004464100280642935, "loss": 0.2086, "step": 153870 }, { "epoch": 6.37, "grad_norm": 0.82421875, "learning_rate": 0.00044640331813340997, "loss": 0.1967, "step": 153880 }, { "epoch": 6.37, "grad_norm": 1.8671875, "learning_rate": 0.0004463966078329166, "loss": 0.2161, "step": 153890 }, { "epoch": 6.37, "grad_norm": 0.357421875, "learning_rate": 0.0004463898971628259, "loss": 0.2454, "step": 153900 }, { "epoch": 6.37, "grad_norm": 0.83203125, "learning_rate": 0.00044638318612315065, "loss": 0.2235, "step": 153910 }, { "epoch": 6.38, "grad_norm": 0.64453125, "learning_rate": 0.0004463764747139035, "loss": 0.2127, "step": 153920 }, { "epoch": 6.38, "grad_norm": 0.1689453125, "learning_rate": 0.000446369762935097, "loss": 0.222, "step": 153930 }, { "epoch": 6.38, "grad_norm": 0.77734375, "learning_rate": 0.00044636305078674386, "loss": 0.2378, "step": 153940 }, { "epoch": 6.38, "grad_norm": 0.5703125, "learning_rate": 0.00044635633826885663, "loss": 0.1584, "step": 153950 }, { "epoch": 6.38, "grad_norm": 0.447265625, "learning_rate": 0.000446349625381448, "loss": 0.246, "step": 153960 }, { "epoch": 6.38, "grad_norm": 2.109375, "learning_rate": 0.00044634291212453054, "loss": 0.2031, "step": 153970 }, { "epoch": 6.38, "grad_norm": 0.6484375, "learning_rate": 0.0004463361984981169, "loss": 0.2445, "step": 153980 }, { "epoch": 6.38, "grad_norm": 0.859375, "learning_rate": 0.00044632948450221984, "loss": 0.2399, "step": 153990 }, { "epoch": 6.38, "grad_norm": 0.474609375, "learning_rate": 0.00044632277013685183, "loss": 0.2447, "step": 154000 }, { "epoch": 6.38, "grad_norm": 0.609375, "learning_rate": 0.00044631605540202556, "loss": 0.193, "step": 154010 }, { "epoch": 6.38, "grad_norm": 0.6875, "learning_rate": 0.00044630934029775367, "loss": 0.2441, "step": 154020 }, { "epoch": 6.38, "grad_norm": 0.796875, "learning_rate": 0.0004463026248240488, "loss": 0.1503, "step": 154030 }, { "epoch": 6.38, "grad_norm": 0.357421875, "learning_rate": 0.00044629590898092366, "loss": 0.1482, "step": 154040 }, { "epoch": 6.38, "grad_norm": 0.458984375, "learning_rate": 0.00044628919276839076, "loss": 0.205, "step": 154050 }, { "epoch": 6.38, "grad_norm": 0.65234375, "learning_rate": 0.00044628247618646276, "loss": 0.1957, "step": 154060 }, { "epoch": 6.38, "grad_norm": 0.8203125, "learning_rate": 0.00044627575923515233, "loss": 0.2322, "step": 154070 }, { "epoch": 6.38, "grad_norm": 0.3125, "learning_rate": 0.0004462690419144722, "loss": 0.1691, "step": 154080 }, { "epoch": 6.38, "grad_norm": 0.6640625, "learning_rate": 0.0004462623242244349, "loss": 0.2143, "step": 154090 }, { "epoch": 6.38, "grad_norm": 0.80078125, "learning_rate": 0.00044625560616505305, "loss": 0.1335, "step": 154100 }, { "epoch": 6.38, "grad_norm": 0.6328125, "learning_rate": 0.00044624888773633935, "loss": 0.1769, "step": 154110 }, { "epoch": 6.38, "grad_norm": 0.55859375, "learning_rate": 0.0004462421689383065, "loss": 0.2441, "step": 154120 }, { "epoch": 6.38, "grad_norm": 1.5859375, "learning_rate": 0.00044623544977096707, "loss": 0.2208, "step": 154130 }, { "epoch": 6.38, "grad_norm": 0.71875, "learning_rate": 0.0004462287302343337, "loss": 0.1486, "step": 154140 }, { "epoch": 6.38, "grad_norm": 0.373046875, "learning_rate": 0.000446222010328419, "loss": 0.2741, "step": 154150 }, { "epoch": 6.39, "grad_norm": 1.0859375, "learning_rate": 0.00044621529005323574, "loss": 0.213, "step": 154160 }, { "epoch": 6.39, "grad_norm": 0.59765625, "learning_rate": 0.00044620856940879645, "loss": 0.2147, "step": 154170 }, { "epoch": 6.39, "grad_norm": 1.2265625, "learning_rate": 0.00044620184839511384, "loss": 0.1912, "step": 154180 }, { "epoch": 6.39, "grad_norm": 0.341796875, "learning_rate": 0.00044619512701220054, "loss": 0.2302, "step": 154190 }, { "epoch": 6.39, "grad_norm": 0.48828125, "learning_rate": 0.0004461884052600692, "loss": 0.1955, "step": 154200 }, { "epoch": 6.39, "grad_norm": 0.345703125, "learning_rate": 0.0004461816831387324, "loss": 0.1922, "step": 154210 }, { "epoch": 6.39, "grad_norm": 0.455078125, "learning_rate": 0.00044617496064820294, "loss": 0.2251, "step": 154220 }, { "epoch": 6.39, "grad_norm": 0.46875, "learning_rate": 0.00044616823778849336, "loss": 0.2142, "step": 154230 }, { "epoch": 6.39, "grad_norm": 0.94921875, "learning_rate": 0.0004461615145596164, "loss": 0.2365, "step": 154240 }, { "epoch": 6.39, "grad_norm": 0.60546875, "learning_rate": 0.0004461547909615846, "loss": 0.2243, "step": 154250 }, { "epoch": 6.39, "grad_norm": 0.4296875, "learning_rate": 0.00044614806699441067, "loss": 0.2255, "step": 154260 }, { "epoch": 6.39, "grad_norm": 0.82421875, "learning_rate": 0.00044614134265810723, "loss": 0.2121, "step": 154270 }, { "epoch": 6.39, "grad_norm": 1.1171875, "learning_rate": 0.000446134617952687, "loss": 0.2288, "step": 154280 }, { "epoch": 6.39, "grad_norm": 0.9453125, "learning_rate": 0.00044612789287816257, "loss": 0.1971, "step": 154290 }, { "epoch": 6.39, "grad_norm": 0.328125, "learning_rate": 0.00044612116743454665, "loss": 0.2107, "step": 154300 }, { "epoch": 6.39, "grad_norm": 0.54296875, "learning_rate": 0.0004461144416218519, "loss": 0.2168, "step": 154310 }, { "epoch": 6.39, "grad_norm": 0.5703125, "learning_rate": 0.00044610771544009085, "loss": 0.2174, "step": 154320 }, { "epoch": 6.39, "grad_norm": 0.52734375, "learning_rate": 0.0004461009888892764, "loss": 0.1862, "step": 154330 }, { "epoch": 6.39, "grad_norm": 0.91796875, "learning_rate": 0.00044609426196942094, "loss": 0.2227, "step": 154340 }, { "epoch": 6.39, "grad_norm": 0.66796875, "learning_rate": 0.0004460875346805373, "loss": 0.237, "step": 154350 }, { "epoch": 6.39, "grad_norm": 0.419921875, "learning_rate": 0.0004460808070226381, "loss": 0.1858, "step": 154360 }, { "epoch": 6.39, "grad_norm": 0.447265625, "learning_rate": 0.00044607407899573603, "loss": 0.2095, "step": 154370 }, { "epoch": 6.39, "grad_norm": 0.5234375, "learning_rate": 0.0004460673505998437, "loss": 0.1917, "step": 154380 }, { "epoch": 6.39, "grad_norm": 1.1328125, "learning_rate": 0.0004460606218349738, "loss": 0.2101, "step": 154390 }, { "epoch": 6.4, "grad_norm": 1.3046875, "learning_rate": 0.00044605389270113894, "loss": 0.2197, "step": 154400 }, { "epoch": 6.4, "grad_norm": 0.466796875, "learning_rate": 0.00044604716319835184, "loss": 0.257, "step": 154410 }, { "epoch": 6.4, "grad_norm": 1.046875, "learning_rate": 0.00044604043332662516, "loss": 0.1689, "step": 154420 }, { "epoch": 6.4, "grad_norm": 1.0, "learning_rate": 0.0004460337030859715, "loss": 0.1624, "step": 154430 }, { "epoch": 6.4, "grad_norm": 0.412109375, "learning_rate": 0.0004460269724764037, "loss": 0.2149, "step": 154440 }, { "epoch": 6.4, "grad_norm": 0.828125, "learning_rate": 0.00044602024149793416, "loss": 0.1936, "step": 154450 }, { "epoch": 6.4, "grad_norm": 0.75, "learning_rate": 0.0004460135101505758, "loss": 0.1764, "step": 154460 }, { "epoch": 6.4, "grad_norm": 0.53125, "learning_rate": 0.0004460067784343411, "loss": 0.1824, "step": 154470 }, { "epoch": 6.4, "grad_norm": 1.7109375, "learning_rate": 0.00044600004634924296, "loss": 0.1992, "step": 154480 }, { "epoch": 6.4, "grad_norm": 0.84375, "learning_rate": 0.00044599331389529376, "loss": 0.2309, "step": 154490 }, { "epoch": 6.4, "grad_norm": 1.125, "learning_rate": 0.00044598658107250635, "loss": 0.1911, "step": 154500 }, { "epoch": 6.4, "grad_norm": 0.369140625, "learning_rate": 0.00044597984788089336, "loss": 0.2111, "step": 154510 }, { "epoch": 6.4, "grad_norm": 0.7421875, "learning_rate": 0.0004459731143204675, "loss": 0.1743, "step": 154520 }, { "epoch": 6.4, "grad_norm": 0.486328125, "learning_rate": 0.0004459663803912413, "loss": 0.245, "step": 154530 }, { "epoch": 6.4, "grad_norm": 0.41015625, "learning_rate": 0.0004459596460932276, "loss": 0.2195, "step": 154540 }, { "epoch": 6.4, "grad_norm": 0.68359375, "learning_rate": 0.000445952911426439, "loss": 0.2287, "step": 154550 }, { "epoch": 6.4, "grad_norm": 0.7578125, "learning_rate": 0.0004459461763908882, "loss": 0.2477, "step": 154560 }, { "epoch": 6.4, "grad_norm": 2.109375, "learning_rate": 0.00044593944098658786, "loss": 0.1628, "step": 154570 }, { "epoch": 6.4, "grad_norm": 0.318359375, "learning_rate": 0.0004459327052135506, "loss": 0.2003, "step": 154580 }, { "epoch": 6.4, "grad_norm": 0.390625, "learning_rate": 0.0004459259690717892, "loss": 0.2057, "step": 154590 }, { "epoch": 6.4, "grad_norm": 0.40234375, "learning_rate": 0.00044591923256131626, "loss": 0.2261, "step": 154600 }, { "epoch": 6.4, "grad_norm": 0.65234375, "learning_rate": 0.0004459124956821445, "loss": 0.1928, "step": 154610 }, { "epoch": 6.4, "grad_norm": 0.51953125, "learning_rate": 0.00044590575843428653, "loss": 0.2059, "step": 154620 }, { "epoch": 6.4, "grad_norm": 0.408203125, "learning_rate": 0.0004458990208177551, "loss": 0.1805, "step": 154630 }, { "epoch": 6.41, "grad_norm": 0.91796875, "learning_rate": 0.00044589228283256296, "loss": 0.1908, "step": 154640 }, { "epoch": 6.41, "grad_norm": 0.66796875, "learning_rate": 0.0004458855444787226, "loss": 0.2023, "step": 154650 }, { "epoch": 6.41, "grad_norm": 0.427734375, "learning_rate": 0.0004458788057562468, "loss": 0.2064, "step": 154660 }, { "epoch": 6.41, "grad_norm": 0.91015625, "learning_rate": 0.0004458720666651482, "loss": 0.2758, "step": 154670 }, { "epoch": 6.41, "grad_norm": 1.8046875, "learning_rate": 0.00044586532720543965, "loss": 0.2095, "step": 154680 }, { "epoch": 6.41, "grad_norm": 0.58984375, "learning_rate": 0.0004458585873771336, "loss": 0.1251, "step": 154690 }, { "epoch": 6.41, "grad_norm": 1.0390625, "learning_rate": 0.00044585184718024293, "loss": 0.1506, "step": 154700 }, { "epoch": 6.41, "grad_norm": 0.890625, "learning_rate": 0.00044584510661478015, "loss": 0.2311, "step": 154710 }, { "epoch": 6.41, "grad_norm": 0.62890625, "learning_rate": 0.0004458383656807581, "loss": 0.2114, "step": 154720 }, { "epoch": 6.41, "grad_norm": 1.25, "learning_rate": 0.00044583162437818934, "loss": 0.1784, "step": 154730 }, { "epoch": 6.41, "grad_norm": 1.1875, "learning_rate": 0.00044582488270708667, "loss": 0.2257, "step": 154740 }, { "epoch": 6.41, "grad_norm": 0.71875, "learning_rate": 0.0004458181406674626, "loss": 0.1962, "step": 154750 }, { "epoch": 6.41, "grad_norm": 0.2314453125, "learning_rate": 0.00044581139825933003, "loss": 0.2021, "step": 154760 }, { "epoch": 6.41, "grad_norm": 1.421875, "learning_rate": 0.00044580465548270154, "loss": 0.1545, "step": 154770 }, { "epoch": 6.41, "grad_norm": 0.4921875, "learning_rate": 0.0004457979123375898, "loss": 0.1689, "step": 154780 }, { "epoch": 6.41, "grad_norm": 1.09375, "learning_rate": 0.00044579116882400763, "loss": 0.2534, "step": 154790 }, { "epoch": 6.41, "grad_norm": 0.7421875, "learning_rate": 0.0004457844249419676, "loss": 0.1742, "step": 154800 }, { "epoch": 6.41, "grad_norm": 0.404296875, "learning_rate": 0.00044577768069148236, "loss": 0.2021, "step": 154810 }, { "epoch": 6.41, "grad_norm": 0.40234375, "learning_rate": 0.00044577093607256463, "loss": 0.2448, "step": 154820 }, { "epoch": 6.41, "grad_norm": 0.61328125, "learning_rate": 0.00044576419108522725, "loss": 0.2518, "step": 154830 }, { "epoch": 6.41, "grad_norm": 2.0625, "learning_rate": 0.00044575744572948275, "loss": 0.1987, "step": 154840 }, { "epoch": 6.41, "grad_norm": 0.40234375, "learning_rate": 0.00044575070000534386, "loss": 0.2341, "step": 154850 }, { "epoch": 6.41, "grad_norm": 0.546875, "learning_rate": 0.0004457439539128233, "loss": 0.1973, "step": 154860 }, { "epoch": 6.41, "grad_norm": 1.09375, "learning_rate": 0.0004457372074519338, "loss": 0.1911, "step": 154870 }, { "epoch": 6.42, "grad_norm": 0.58984375, "learning_rate": 0.00044573046062268797, "loss": 0.1856, "step": 154880 }, { "epoch": 6.42, "grad_norm": 0.83203125, "learning_rate": 0.00044572371342509854, "loss": 0.1991, "step": 154890 }, { "epoch": 6.42, "grad_norm": 0.640625, "learning_rate": 0.0004457169658591782, "loss": 0.1766, "step": 154900 }, { "epoch": 6.42, "grad_norm": 0.2890625, "learning_rate": 0.0004457102179249397, "loss": 0.1987, "step": 154910 }, { "epoch": 6.42, "grad_norm": 0.984375, "learning_rate": 0.00044570346962239575, "loss": 0.2123, "step": 154920 }, { "epoch": 6.42, "grad_norm": 0.134765625, "learning_rate": 0.000445696720951559, "loss": 0.1995, "step": 154930 }, { "epoch": 6.42, "grad_norm": 2.109375, "learning_rate": 0.0004456899719124421, "loss": 0.2275, "step": 154940 }, { "epoch": 6.42, "grad_norm": 0.34765625, "learning_rate": 0.0004456832225050578, "loss": 0.1539, "step": 154950 }, { "epoch": 6.42, "grad_norm": 0.55859375, "learning_rate": 0.00044567647272941886, "loss": 0.2273, "step": 154960 }, { "epoch": 6.42, "grad_norm": 1.0390625, "learning_rate": 0.0004456697225855379, "loss": 0.1919, "step": 154970 }, { "epoch": 6.42, "grad_norm": 0.66796875, "learning_rate": 0.00044566297207342766, "loss": 0.2422, "step": 154980 }, { "epoch": 6.42, "grad_norm": 0.60546875, "learning_rate": 0.0004456562211931008, "loss": 0.181, "step": 154990 }, { "epoch": 6.42, "grad_norm": 0.9921875, "learning_rate": 0.00044564946994457013, "loss": 0.2042, "step": 155000 }, { "epoch": 6.42, "grad_norm": 1.21875, "learning_rate": 0.0004456427183278482, "loss": 0.2494, "step": 155010 }, { "epoch": 6.42, "grad_norm": 1.109375, "learning_rate": 0.0004456359663429479, "loss": 0.2042, "step": 155020 }, { "epoch": 6.42, "grad_norm": 0.55859375, "learning_rate": 0.00044562921398988176, "loss": 0.2492, "step": 155030 }, { "epoch": 6.42, "grad_norm": 0.50390625, "learning_rate": 0.00044562246126866254, "loss": 0.1761, "step": 155040 }, { "epoch": 6.42, "grad_norm": 0.46484375, "learning_rate": 0.0004456157081793031, "loss": 0.2255, "step": 155050 }, { "epoch": 6.42, "grad_norm": 0.97265625, "learning_rate": 0.0004456089547218159, "loss": 0.1774, "step": 155060 }, { "epoch": 6.42, "grad_norm": 0.44921875, "learning_rate": 0.00044560220089621386, "loss": 0.1883, "step": 155070 }, { "epoch": 6.42, "grad_norm": 0.67578125, "learning_rate": 0.00044559544670250953, "loss": 0.164, "step": 155080 }, { "epoch": 6.42, "grad_norm": 0.41015625, "learning_rate": 0.00044558869214071574, "loss": 0.1931, "step": 155090 }, { "epoch": 6.42, "grad_norm": 1.2265625, "learning_rate": 0.00044558193721084513, "loss": 0.1692, "step": 155100 }, { "epoch": 6.42, "grad_norm": 0.1728515625, "learning_rate": 0.0004455751819129104, "loss": 0.1984, "step": 155110 }, { "epoch": 6.43, "grad_norm": 0.5234375, "learning_rate": 0.0004455684262469244, "loss": 0.2106, "step": 155120 }, { "epoch": 6.43, "grad_norm": 0.7890625, "learning_rate": 0.00044556167021289964, "loss": 0.2189, "step": 155130 }, { "epoch": 6.43, "grad_norm": 0.890625, "learning_rate": 0.000445554913810849, "loss": 0.2269, "step": 155140 }, { "epoch": 6.43, "grad_norm": 1.296875, "learning_rate": 0.0004455481570407851, "loss": 0.2228, "step": 155150 }, { "epoch": 6.43, "grad_norm": 0.6484375, "learning_rate": 0.0004455413999027207, "loss": 0.2103, "step": 155160 }, { "epoch": 6.43, "grad_norm": 1.03125, "learning_rate": 0.00044553464239666845, "loss": 0.219, "step": 155170 }, { "epoch": 6.43, "grad_norm": 0.7734375, "learning_rate": 0.00044552788452264114, "loss": 0.2709, "step": 155180 }, { "epoch": 6.43, "grad_norm": 0.474609375, "learning_rate": 0.0004455211262806515, "loss": 0.2106, "step": 155190 }, { "epoch": 6.43, "grad_norm": 0.578125, "learning_rate": 0.00044551436767071217, "loss": 0.2049, "step": 155200 }, { "epoch": 6.43, "grad_norm": 0.92578125, "learning_rate": 0.00044550760869283603, "loss": 0.2118, "step": 155210 }, { "epoch": 6.43, "grad_norm": 0.609375, "learning_rate": 0.0004455008493470356, "loss": 0.2062, "step": 155220 }, { "epoch": 6.43, "grad_norm": 1.171875, "learning_rate": 0.0004454940896333236, "loss": 0.229, "step": 155230 }, { "epoch": 6.43, "grad_norm": 0.859375, "learning_rate": 0.000445487329551713, "loss": 0.2066, "step": 155240 }, { "epoch": 6.43, "grad_norm": 0.255859375, "learning_rate": 0.00044548056910221625, "loss": 0.183, "step": 155250 }, { "epoch": 6.43, "grad_norm": 1.3046875, "learning_rate": 0.00044547380828484617, "loss": 0.2321, "step": 155260 }, { "epoch": 6.43, "grad_norm": 0.5859375, "learning_rate": 0.00044546704709961556, "loss": 0.1822, "step": 155270 }, { "epoch": 6.43, "grad_norm": 0.8359375, "learning_rate": 0.00044546028554653705, "loss": 0.2105, "step": 155280 }, { "epoch": 6.43, "grad_norm": 0.7109375, "learning_rate": 0.00044545352362562334, "loss": 0.2394, "step": 155290 }, { "epoch": 6.43, "grad_norm": 0.330078125, "learning_rate": 0.00044544676133688726, "loss": 0.1851, "step": 155300 }, { "epoch": 6.43, "grad_norm": 1.34375, "learning_rate": 0.00044543999868034144, "loss": 0.238, "step": 155310 }, { "epoch": 6.43, "grad_norm": 0.2333984375, "learning_rate": 0.0004454332356559987, "loss": 0.1847, "step": 155320 }, { "epoch": 6.43, "grad_norm": 0.78125, "learning_rate": 0.0004454264722638717, "loss": 0.1876, "step": 155330 }, { "epoch": 6.43, "grad_norm": 0.546875, "learning_rate": 0.00044541970850397317, "loss": 0.2084, "step": 155340 }, { "epoch": 6.43, "grad_norm": 0.28515625, "learning_rate": 0.0004454129443763159, "loss": 0.1959, "step": 155350 }, { "epoch": 6.43, "grad_norm": 0.67578125, "learning_rate": 0.0004454061798809125, "loss": 0.242, "step": 155360 }, { "epoch": 6.44, "grad_norm": 0.97265625, "learning_rate": 0.0004453994150177758, "loss": 0.2103, "step": 155370 }, { "epoch": 6.44, "grad_norm": 1.3828125, "learning_rate": 0.0004453926497869185, "loss": 0.2224, "step": 155380 }, { "epoch": 6.44, "grad_norm": 0.46875, "learning_rate": 0.00044538588418835336, "loss": 0.1405, "step": 155390 }, { "epoch": 6.44, "grad_norm": 0.78125, "learning_rate": 0.0004453791182220931, "loss": 0.2285, "step": 155400 }, { "epoch": 6.44, "grad_norm": 1.59375, "learning_rate": 0.0004453723518881504, "loss": 0.1985, "step": 155410 }, { "epoch": 6.44, "grad_norm": 1.0625, "learning_rate": 0.00044536558518653804, "loss": 0.1969, "step": 155420 }, { "epoch": 6.44, "grad_norm": 1.3515625, "learning_rate": 0.0004453588181172687, "loss": 0.2393, "step": 155430 }, { "epoch": 6.44, "grad_norm": 0.40234375, "learning_rate": 0.00044535205068035524, "loss": 0.2209, "step": 155440 }, { "epoch": 6.44, "grad_norm": 1.0234375, "learning_rate": 0.0004453452828758102, "loss": 0.1682, "step": 155450 }, { "epoch": 6.44, "grad_norm": 0.734375, "learning_rate": 0.0004453385147036465, "loss": 0.1674, "step": 155460 }, { "epoch": 6.44, "grad_norm": 0.78515625, "learning_rate": 0.0004453317461638768, "loss": 0.1858, "step": 155470 }, { "epoch": 6.44, "grad_norm": 0.87890625, "learning_rate": 0.0004453249772565139, "loss": 0.2182, "step": 155480 }, { "epoch": 6.44, "grad_norm": 0.89453125, "learning_rate": 0.00044531820798157055, "loss": 0.1718, "step": 155490 }, { "epoch": 6.44, "grad_norm": 0.921875, "learning_rate": 0.00044531143833905927, "loss": 0.2385, "step": 155500 }, { "epoch": 6.44, "grad_norm": 1.546875, "learning_rate": 0.00044530466832899305, "loss": 0.1798, "step": 155510 }, { "epoch": 6.44, "grad_norm": 0.9296875, "learning_rate": 0.00044529789795138446, "loss": 0.2237, "step": 155520 }, { "epoch": 6.44, "grad_norm": 2.25, "learning_rate": 0.00044529112720624635, "loss": 0.2099, "step": 155530 }, { "epoch": 6.44, "grad_norm": 0.412109375, "learning_rate": 0.0004452843560935914, "loss": 0.174, "step": 155540 }, { "epoch": 6.44, "grad_norm": 0.859375, "learning_rate": 0.0004452775846134325, "loss": 0.175, "step": 155550 }, { "epoch": 6.44, "grad_norm": 0.859375, "learning_rate": 0.0004452708127657822, "loss": 0.2236, "step": 155560 }, { "epoch": 6.44, "grad_norm": 1.1015625, "learning_rate": 0.0004452640405506533, "loss": 0.2405, "step": 155570 }, { "epoch": 6.44, "grad_norm": 0.3515625, "learning_rate": 0.00044525726796805856, "loss": 0.206, "step": 155580 }, { "epoch": 6.44, "grad_norm": 0.62109375, "learning_rate": 0.00044525049501801075, "loss": 0.2506, "step": 155590 }, { "epoch": 6.44, "grad_norm": 0.4296875, "learning_rate": 0.0004452437217005226, "loss": 0.2203, "step": 155600 }, { "epoch": 6.45, "grad_norm": 0.4765625, "learning_rate": 0.0004452369480156068, "loss": 0.1667, "step": 155610 }, { "epoch": 6.45, "grad_norm": 0.71875, "learning_rate": 0.00044523017396327615, "loss": 0.1939, "step": 155620 }, { "epoch": 6.45, "grad_norm": 0.349609375, "learning_rate": 0.0004452233995435434, "loss": 0.195, "step": 155630 }, { "epoch": 6.45, "grad_norm": 0.58984375, "learning_rate": 0.00044521662475642136, "loss": 0.1864, "step": 155640 }, { "epoch": 6.45, "grad_norm": 0.69921875, "learning_rate": 0.0004452098496019227, "loss": 0.1776, "step": 155650 }, { "epoch": 6.45, "grad_norm": 0.7265625, "learning_rate": 0.0004452030740800601, "loss": 0.1859, "step": 155660 }, { "epoch": 6.45, "grad_norm": 0.39453125, "learning_rate": 0.0004451962981908465, "loss": 0.1476, "step": 155670 }, { "epoch": 6.45, "grad_norm": 1.0234375, "learning_rate": 0.0004451895219342945, "loss": 0.2056, "step": 155680 }, { "epoch": 6.45, "grad_norm": 0.48046875, "learning_rate": 0.0004451827453104169, "loss": 0.2062, "step": 155690 }, { "epoch": 6.45, "grad_norm": 1.015625, "learning_rate": 0.0004451759683192264, "loss": 0.1924, "step": 155700 }, { "epoch": 6.45, "grad_norm": 0.6796875, "learning_rate": 0.00044516919096073586, "loss": 0.2132, "step": 155710 }, { "epoch": 6.45, "grad_norm": 0.5703125, "learning_rate": 0.000445162413234958, "loss": 0.2109, "step": 155720 }, { "epoch": 6.45, "grad_norm": 0.578125, "learning_rate": 0.0004451556351419055, "loss": 0.1871, "step": 155730 }, { "epoch": 6.45, "grad_norm": 1.765625, "learning_rate": 0.0004451488566815912, "loss": 0.1943, "step": 155740 }, { "epoch": 6.45, "grad_norm": 0.4296875, "learning_rate": 0.00044514207785402785, "loss": 0.2119, "step": 155750 }, { "epoch": 6.45, "grad_norm": 0.859375, "learning_rate": 0.00044513529865922814, "loss": 0.1669, "step": 155760 }, { "epoch": 6.45, "grad_norm": 0.59765625, "learning_rate": 0.0004451285190972049, "loss": 0.1482, "step": 155770 }, { "epoch": 6.45, "grad_norm": 0.625, "learning_rate": 0.00044512173916797085, "loss": 0.2124, "step": 155780 }, { "epoch": 6.45, "grad_norm": 0.5, "learning_rate": 0.00044511495887153874, "loss": 0.2193, "step": 155790 }, { "epoch": 6.45, "grad_norm": 0.443359375, "learning_rate": 0.0004451081782079214, "loss": 0.1931, "step": 155800 }, { "epoch": 6.45, "grad_norm": 0.353515625, "learning_rate": 0.00044510139717713145, "loss": 0.1973, "step": 155810 }, { "epoch": 6.45, "grad_norm": 1.640625, "learning_rate": 0.00044509461577918176, "loss": 0.2029, "step": 155820 }, { "epoch": 6.45, "grad_norm": 1.125, "learning_rate": 0.00044508783401408516, "loss": 0.1853, "step": 155830 }, { "epoch": 6.45, "grad_norm": 0.181640625, "learning_rate": 0.00044508105188185423, "loss": 0.2004, "step": 155840 }, { "epoch": 6.46, "grad_norm": 1.15625, "learning_rate": 0.0004450742693825018, "loss": 0.2191, "step": 155850 }, { "epoch": 6.46, "grad_norm": 0.703125, "learning_rate": 0.0004450674865160408, "loss": 0.2083, "step": 155860 }, { "epoch": 6.46, "grad_norm": 1.0859375, "learning_rate": 0.00044506070328248373, "loss": 0.1848, "step": 155870 }, { "epoch": 6.46, "grad_norm": 0.625, "learning_rate": 0.00044505391968184353, "loss": 0.1877, "step": 155880 }, { "epoch": 6.46, "grad_norm": 1.125, "learning_rate": 0.000445047135714133, "loss": 0.2597, "step": 155890 }, { "epoch": 6.46, "grad_norm": 0.59765625, "learning_rate": 0.00044504035137936474, "loss": 0.2332, "step": 155900 }, { "epoch": 6.46, "grad_norm": 2.203125, "learning_rate": 0.00044503356667755157, "loss": 0.1763, "step": 155910 }, { "epoch": 6.46, "grad_norm": 0.5703125, "learning_rate": 0.0004450267816087063, "loss": 0.2401, "step": 155920 }, { "epoch": 6.46, "grad_norm": 0.85546875, "learning_rate": 0.0004450199961728417, "loss": 0.2084, "step": 155930 }, { "epoch": 6.46, "grad_norm": 0.7890625, "learning_rate": 0.00044501321036997054, "loss": 0.1908, "step": 155940 }, { "epoch": 6.46, "grad_norm": 1.2421875, "learning_rate": 0.00044500642420010556, "loss": 0.1631, "step": 155950 }, { "epoch": 6.46, "grad_norm": 0.39453125, "learning_rate": 0.00044499963766325956, "loss": 0.2547, "step": 155960 }, { "epoch": 6.46, "grad_norm": 0.53515625, "learning_rate": 0.00044499285075944527, "loss": 0.2119, "step": 155970 }, { "epoch": 6.46, "grad_norm": 0.455078125, "learning_rate": 0.00044498606348867555, "loss": 0.2189, "step": 155980 }, { "epoch": 6.46, "grad_norm": 0.408203125, "learning_rate": 0.0004449792758509631, "loss": 0.1899, "step": 155990 }, { "epoch": 6.46, "grad_norm": 0.92578125, "learning_rate": 0.0004449724878463207, "loss": 0.193, "step": 156000 }, { "epoch": 6.46, "grad_norm": 0.59375, "learning_rate": 0.00044496569947476117, "loss": 0.2181, "step": 156010 }, { "epoch": 6.46, "grad_norm": 1.9296875, "learning_rate": 0.00044495891073629716, "loss": 0.1668, "step": 156020 }, { "epoch": 6.46, "grad_norm": 0.236328125, "learning_rate": 0.0004449521216309416, "loss": 0.1301, "step": 156030 }, { "epoch": 6.46, "grad_norm": 0.859375, "learning_rate": 0.0004449453321587072, "loss": 0.2623, "step": 156040 }, { "epoch": 6.46, "grad_norm": 0.8359375, "learning_rate": 0.00044493854231960673, "loss": 0.2312, "step": 156050 }, { "epoch": 6.46, "grad_norm": 0.9296875, "learning_rate": 0.000444931752113653, "loss": 0.2768, "step": 156060 }, { "epoch": 6.46, "grad_norm": 0.71484375, "learning_rate": 0.00044492496154085873, "loss": 0.2153, "step": 156070 }, { "epoch": 6.46, "grad_norm": 0.71484375, "learning_rate": 0.0004449181706012367, "loss": 0.2031, "step": 156080 }, { "epoch": 6.47, "grad_norm": 0.64453125, "learning_rate": 0.00044491137929479976, "loss": 0.1768, "step": 156090 }, { "epoch": 6.47, "grad_norm": 0.6328125, "learning_rate": 0.0004449045876215606, "loss": 0.2219, "step": 156100 }, { "epoch": 6.47, "grad_norm": 0.447265625, "learning_rate": 0.0004448977955815321, "loss": 0.1654, "step": 156110 }, { "epoch": 6.47, "grad_norm": 0.75, "learning_rate": 0.000444891003174727, "loss": 0.2246, "step": 156120 }, { "epoch": 6.47, "grad_norm": 0.88671875, "learning_rate": 0.00044488421040115813, "loss": 0.2173, "step": 156130 }, { "epoch": 6.47, "grad_norm": 0.9296875, "learning_rate": 0.0004448774172608381, "loss": 0.2745, "step": 156140 }, { "epoch": 6.47, "grad_norm": 0.86328125, "learning_rate": 0.00044487062375377995, "loss": 0.1925, "step": 156150 }, { "epoch": 6.47, "grad_norm": 0.65625, "learning_rate": 0.00044486382987999625, "loss": 0.2279, "step": 156160 }, { "epoch": 6.47, "grad_norm": 1.3125, "learning_rate": 0.00044485703563949987, "loss": 0.1849, "step": 156170 }, { "epoch": 6.47, "grad_norm": 0.484375, "learning_rate": 0.0004448502410323036, "loss": 0.1797, "step": 156180 }, { "epoch": 6.47, "grad_norm": 1.1640625, "learning_rate": 0.0004448434460584202, "loss": 0.1603, "step": 156190 }, { "epoch": 6.47, "grad_norm": 1.03125, "learning_rate": 0.0004448366507178625, "loss": 0.2049, "step": 156200 }, { "epoch": 6.47, "grad_norm": 0.546875, "learning_rate": 0.0004448298550106433, "loss": 0.2077, "step": 156210 }, { "epoch": 6.47, "grad_norm": 0.99609375, "learning_rate": 0.0004448230589367753, "loss": 0.2026, "step": 156220 }, { "epoch": 6.47, "grad_norm": 0.4609375, "learning_rate": 0.0004448162624962713, "loss": 0.176, "step": 156230 }, { "epoch": 6.47, "grad_norm": 1.3515625, "learning_rate": 0.0004448094656891442, "loss": 0.2185, "step": 156240 }, { "epoch": 6.47, "grad_norm": 0.328125, "learning_rate": 0.0004448026685154067, "loss": 0.1621, "step": 156250 }, { "epoch": 6.47, "grad_norm": 0.1689453125, "learning_rate": 0.00044479587097507163, "loss": 0.2041, "step": 156260 }, { "epoch": 6.47, "grad_norm": 1.1171875, "learning_rate": 0.00044478907306815175, "loss": 0.2424, "step": 156270 }, { "epoch": 6.47, "grad_norm": 0.337890625, "learning_rate": 0.0004447822747946599, "loss": 0.2387, "step": 156280 }, { "epoch": 6.47, "grad_norm": 0.26171875, "learning_rate": 0.00044477547615460886, "loss": 0.1346, "step": 156290 }, { "epoch": 6.47, "grad_norm": 1.03125, "learning_rate": 0.00044476867714801134, "loss": 0.2287, "step": 156300 }, { "epoch": 6.47, "grad_norm": 0.54296875, "learning_rate": 0.0004447618777748802, "loss": 0.1983, "step": 156310 }, { "epoch": 6.47, "grad_norm": 1.078125, "learning_rate": 0.0004447550780352283, "loss": 0.2115, "step": 156320 }, { "epoch": 6.48, "grad_norm": 0.828125, "learning_rate": 0.00044474827792906834, "loss": 0.198, "step": 156330 }, { "epoch": 6.48, "grad_norm": 0.439453125, "learning_rate": 0.0004447414774564131, "loss": 0.1725, "step": 156340 }, { "epoch": 6.48, "grad_norm": 0.41015625, "learning_rate": 0.00044473467661727557, "loss": 0.2134, "step": 156350 }, { "epoch": 6.48, "grad_norm": 0.94921875, "learning_rate": 0.00044472787541166835, "loss": 0.2244, "step": 156360 }, { "epoch": 6.48, "grad_norm": 0.6171875, "learning_rate": 0.00044472107383960427, "loss": 0.2151, "step": 156370 }, { "epoch": 6.48, "grad_norm": 0.640625, "learning_rate": 0.0004447142719010961, "loss": 0.2153, "step": 156380 }, { "epoch": 6.48, "grad_norm": 0.8046875, "learning_rate": 0.0004447074695961568, "loss": 0.2183, "step": 156390 }, { "epoch": 6.48, "grad_norm": 0.890625, "learning_rate": 0.00044470066692479905, "loss": 0.2174, "step": 156400 }, { "epoch": 6.48, "grad_norm": 0.5078125, "learning_rate": 0.00044469386388703566, "loss": 0.2252, "step": 156410 }, { "epoch": 6.48, "grad_norm": 0.64453125, "learning_rate": 0.00044468706048287937, "loss": 0.1599, "step": 156420 }, { "epoch": 6.48, "grad_norm": 0.462890625, "learning_rate": 0.00044468025671234315, "loss": 0.1215, "step": 156430 }, { "epoch": 6.48, "grad_norm": 1.0234375, "learning_rate": 0.00044467345257543967, "loss": 0.19, "step": 156440 }, { "epoch": 6.48, "grad_norm": 0.61328125, "learning_rate": 0.00044466664807218183, "loss": 0.2391, "step": 156450 }, { "epoch": 6.48, "grad_norm": 0.46484375, "learning_rate": 0.0004446598432025823, "loss": 0.2048, "step": 156460 }, { "epoch": 6.48, "grad_norm": 0.3046875, "learning_rate": 0.000444653037966654, "loss": 0.2016, "step": 156470 }, { "epoch": 6.48, "grad_norm": 0.73828125, "learning_rate": 0.0004446462323644097, "loss": 0.2237, "step": 156480 }, { "epoch": 6.48, "grad_norm": 0.57421875, "learning_rate": 0.0004446394263958622, "loss": 0.2352, "step": 156490 }, { "epoch": 6.48, "grad_norm": 0.359375, "learning_rate": 0.0004446326200610243, "loss": 0.2472, "step": 156500 }, { "epoch": 6.48, "grad_norm": 1.1796875, "learning_rate": 0.0004446258133599089, "loss": 0.1921, "step": 156510 }, { "epoch": 6.48, "grad_norm": 0.87890625, "learning_rate": 0.0004446190062925287, "loss": 0.1613, "step": 156520 }, { "epoch": 6.48, "grad_norm": 1.90625, "learning_rate": 0.00044461219885889657, "loss": 0.1967, "step": 156530 }, { "epoch": 6.48, "grad_norm": 0.48046875, "learning_rate": 0.00044460539105902527, "loss": 0.2233, "step": 156540 }, { "epoch": 6.48, "grad_norm": 0.38671875, "learning_rate": 0.00044459858289292765, "loss": 0.1412, "step": 156550 }, { "epoch": 6.48, "grad_norm": 0.47265625, "learning_rate": 0.00044459177436061647, "loss": 0.189, "step": 156560 }, { "epoch": 6.49, "grad_norm": 0.4609375, "learning_rate": 0.0004445849654621046, "loss": 0.1668, "step": 156570 }, { "epoch": 6.49, "grad_norm": 0.74609375, "learning_rate": 0.00044457815619740494, "loss": 0.1524, "step": 156580 }, { "epoch": 6.49, "grad_norm": 0.57421875, "learning_rate": 0.0004445713465665301, "loss": 0.2226, "step": 156590 }, { "epoch": 6.49, "grad_norm": 0.68359375, "learning_rate": 0.00044456453656949304, "loss": 0.198, "step": 156600 }, { "epoch": 6.49, "grad_norm": 0.625, "learning_rate": 0.00044455772620630645, "loss": 0.1403, "step": 156610 }, { "epoch": 6.49, "grad_norm": 1.28125, "learning_rate": 0.00044455091547698333, "loss": 0.2057, "step": 156620 }, { "epoch": 6.49, "grad_norm": 0.83203125, "learning_rate": 0.00044454410438153635, "loss": 0.1755, "step": 156630 }, { "epoch": 6.49, "grad_norm": 0.6484375, "learning_rate": 0.00044453729291997837, "loss": 0.1563, "step": 156640 }, { "epoch": 6.49, "grad_norm": 1.234375, "learning_rate": 0.00044453048109232223, "loss": 0.1638, "step": 156650 }, { "epoch": 6.49, "grad_norm": 0.40234375, "learning_rate": 0.0004445236688985808, "loss": 0.2125, "step": 156660 }, { "epoch": 6.49, "grad_norm": 0.4296875, "learning_rate": 0.0004445168563387667, "loss": 0.2561, "step": 156670 }, { "epoch": 6.49, "grad_norm": 0.0, "learning_rate": 0.0004445100434128929, "loss": 0.2083, "step": 156680 }, { "epoch": 6.49, "grad_norm": 0.7890625, "learning_rate": 0.00044450323012097225, "loss": 0.1889, "step": 156690 }, { "epoch": 6.49, "grad_norm": 0.7109375, "learning_rate": 0.0004444964164630175, "loss": 0.21, "step": 156700 }, { "epoch": 6.49, "grad_norm": 0.921875, "learning_rate": 0.0004444896024390416, "loss": 0.2174, "step": 156710 }, { "epoch": 6.49, "grad_norm": 0.6796875, "learning_rate": 0.00044448278804905717, "loss": 0.1929, "step": 156720 }, { "epoch": 6.49, "grad_norm": 0.36328125, "learning_rate": 0.0004444759732930771, "loss": 0.1695, "step": 156730 }, { "epoch": 6.49, "grad_norm": 0.85546875, "learning_rate": 0.00044446915817111435, "loss": 0.2394, "step": 156740 }, { "epoch": 6.49, "grad_norm": 0.82421875, "learning_rate": 0.00044446234268318165, "loss": 0.1988, "step": 156750 }, { "epoch": 6.49, "grad_norm": 0.69921875, "learning_rate": 0.0004444555268292917, "loss": 0.2264, "step": 156760 }, { "epoch": 6.49, "grad_norm": 0.71875, "learning_rate": 0.00044444871060945757, "loss": 0.2291, "step": 156770 }, { "epoch": 6.49, "grad_norm": 1.0, "learning_rate": 0.0004444418940236919, "loss": 0.1762, "step": 156780 }, { "epoch": 6.49, "grad_norm": 0.578125, "learning_rate": 0.0004444350770720076, "loss": 0.2188, "step": 156790 }, { "epoch": 6.49, "grad_norm": 0.4140625, "learning_rate": 0.0004444282597544175, "loss": 0.2216, "step": 156800 }, { "epoch": 6.5, "grad_norm": 0.796875, "learning_rate": 0.0004444214420709344, "loss": 0.2085, "step": 156810 }, { "epoch": 6.5, "grad_norm": 0.6640625, "learning_rate": 0.0004444146240215711, "loss": 0.1881, "step": 156820 }, { "epoch": 6.5, "grad_norm": 0.40234375, "learning_rate": 0.00044440780560634055, "loss": 0.1581, "step": 156830 }, { "epoch": 6.5, "grad_norm": 0.69140625, "learning_rate": 0.0004444009868252554, "loss": 0.2258, "step": 156840 }, { "epoch": 6.5, "grad_norm": 0.79296875, "learning_rate": 0.0004443941676783286, "loss": 0.2169, "step": 156850 }, { "epoch": 6.5, "grad_norm": 0.52734375, "learning_rate": 0.00044438734816557306, "loss": 0.2448, "step": 156860 }, { "epoch": 6.5, "grad_norm": 0.490234375, "learning_rate": 0.00044438052828700146, "loss": 0.2144, "step": 156870 }, { "epoch": 6.5, "grad_norm": 0.75390625, "learning_rate": 0.0004443737080426268, "loss": 0.1967, "step": 156880 }, { "epoch": 6.5, "grad_norm": 0.71484375, "learning_rate": 0.0004443668874324617, "loss": 0.2305, "step": 156890 }, { "epoch": 6.5, "grad_norm": 0.40625, "learning_rate": 0.0004443600664565191, "loss": 0.2164, "step": 156900 }, { "epoch": 6.5, "grad_norm": 0.8671875, "learning_rate": 0.0004443532451148119, "loss": 0.1776, "step": 156910 }, { "epoch": 6.5, "grad_norm": 2.609375, "learning_rate": 0.0004443464234073529, "loss": 0.2342, "step": 156920 }, { "epoch": 6.5, "grad_norm": 0.78515625, "learning_rate": 0.00044433960133415486, "loss": 0.1994, "step": 156930 }, { "epoch": 6.5, "grad_norm": 0.5390625, "learning_rate": 0.0004443327788952307, "loss": 0.2228, "step": 156940 }, { "epoch": 6.5, "grad_norm": 1.2734375, "learning_rate": 0.0004443259560905932, "loss": 0.2157, "step": 156950 }, { "epoch": 6.5, "grad_norm": 0.337890625, "learning_rate": 0.0004443191329202553, "loss": 0.1703, "step": 156960 }, { "epoch": 6.5, "grad_norm": 0.66796875, "learning_rate": 0.00044431230938422975, "loss": 0.2235, "step": 156970 }, { "epoch": 6.5, "grad_norm": 2.09375, "learning_rate": 0.0004443054854825294, "loss": 0.1879, "step": 156980 }, { "epoch": 6.5, "grad_norm": 1.0234375, "learning_rate": 0.00044429866121516707, "loss": 0.2217, "step": 156990 }, { "epoch": 6.5, "grad_norm": 0.6953125, "learning_rate": 0.0004442918365821558, "loss": 0.2284, "step": 157000 }, { "epoch": 6.5, "grad_norm": 0.52734375, "learning_rate": 0.00044428501158350807, "loss": 0.1975, "step": 157010 }, { "epoch": 6.5, "grad_norm": 0.408203125, "learning_rate": 0.00044427818621923707, "loss": 0.2262, "step": 157020 }, { "epoch": 6.5, "grad_norm": 0.6875, "learning_rate": 0.0004442713604893555, "loss": 0.2118, "step": 157030 }, { "epoch": 6.5, "grad_norm": 0.72265625, "learning_rate": 0.00044426453439387614, "loss": 0.1871, "step": 157040 }, { "epoch": 6.5, "grad_norm": 0.640625, "learning_rate": 0.00044425770793281197, "loss": 0.1787, "step": 157050 }, { "epoch": 6.51, "grad_norm": 0.34765625, "learning_rate": 0.0004442508811061757, "loss": 0.1613, "step": 157060 }, { "epoch": 6.51, "grad_norm": 0.7265625, "learning_rate": 0.00044424405391398027, "loss": 0.2045, "step": 157070 }, { "epoch": 6.51, "grad_norm": 0.78125, "learning_rate": 0.00044423722635623854, "loss": 0.1968, "step": 157080 }, { "epoch": 6.51, "grad_norm": 0.88671875, "learning_rate": 0.0004442303984329633, "loss": 0.2474, "step": 157090 }, { "epoch": 6.51, "grad_norm": 0.64453125, "learning_rate": 0.00044422357014416744, "loss": 0.2557, "step": 157100 }, { "epoch": 6.51, "grad_norm": 0.609375, "learning_rate": 0.0004442167414898638, "loss": 0.1799, "step": 157110 }, { "epoch": 6.51, "grad_norm": 0.76171875, "learning_rate": 0.0004442099124700652, "loss": 0.1982, "step": 157120 }, { "epoch": 6.51, "grad_norm": 0.88671875, "learning_rate": 0.0004442030830847845, "loss": 0.2036, "step": 157130 }, { "epoch": 6.51, "grad_norm": 1.0390625, "learning_rate": 0.00044419625333403463, "loss": 0.2101, "step": 157140 }, { "epoch": 6.51, "grad_norm": 0.458984375, "learning_rate": 0.0004441894232178284, "loss": 0.2092, "step": 157150 }, { "epoch": 6.51, "grad_norm": 0.63671875, "learning_rate": 0.00044418259273617855, "loss": 0.1534, "step": 157160 }, { "epoch": 6.51, "grad_norm": 0.828125, "learning_rate": 0.00044417576188909814, "loss": 0.1815, "step": 157170 }, { "epoch": 6.51, "grad_norm": 0.60546875, "learning_rate": 0.0004441689306765998, "loss": 0.22, "step": 157180 }, { "epoch": 6.51, "grad_norm": 0.5390625, "learning_rate": 0.0004441620990986966, "loss": 0.1978, "step": 157190 }, { "epoch": 6.51, "grad_norm": 0.5859375, "learning_rate": 0.0004441552671554012, "loss": 0.2653, "step": 157200 }, { "epoch": 6.51, "grad_norm": 0.828125, "learning_rate": 0.0004441484348467266, "loss": 0.1877, "step": 157210 }, { "epoch": 6.51, "grad_norm": 1.203125, "learning_rate": 0.0004441416021726857, "loss": 0.1963, "step": 157220 }, { "epoch": 6.51, "grad_norm": 0.40234375, "learning_rate": 0.0004441347691332912, "loss": 0.219, "step": 157230 }, { "epoch": 6.51, "grad_norm": 0.9609375, "learning_rate": 0.00044412793572855605, "loss": 0.2199, "step": 157240 }, { "epoch": 6.51, "grad_norm": 0.609375, "learning_rate": 0.000444121101958493, "loss": 0.2087, "step": 157250 }, { "epoch": 6.51, "grad_norm": 0.95703125, "learning_rate": 0.00044411426782311504, "loss": 0.1934, "step": 157260 }, { "epoch": 6.51, "grad_norm": 0.455078125, "learning_rate": 0.000444107433322435, "loss": 0.1492, "step": 157270 }, { "epoch": 6.51, "grad_norm": 0.470703125, "learning_rate": 0.00044410059845646576, "loss": 0.2037, "step": 157280 }, { "epoch": 6.51, "grad_norm": 0.9453125, "learning_rate": 0.00044409376322522017, "loss": 0.2411, "step": 157290 }, { "epoch": 6.52, "grad_norm": 0.41796875, "learning_rate": 0.00044408692762871104, "loss": 0.1795, "step": 157300 }, { "epoch": 6.52, "grad_norm": 1.8984375, "learning_rate": 0.0004440800916669513, "loss": 0.2023, "step": 157310 }, { "epoch": 6.52, "grad_norm": 0.546875, "learning_rate": 0.0004440732553399537, "loss": 0.2007, "step": 157320 }, { "epoch": 6.52, "grad_norm": 0.95703125, "learning_rate": 0.0004440664186477313, "loss": 0.1835, "step": 157330 }, { "epoch": 6.52, "grad_norm": 0.53125, "learning_rate": 0.0004440595815902968, "loss": 0.2261, "step": 157340 }, { "epoch": 6.52, "grad_norm": 0.6484375, "learning_rate": 0.00044405274416766316, "loss": 0.1728, "step": 157350 }, { "epoch": 6.52, "grad_norm": 0.76171875, "learning_rate": 0.00044404590637984315, "loss": 0.2045, "step": 157360 }, { "epoch": 6.52, "grad_norm": 0.640625, "learning_rate": 0.00044403906822684976, "loss": 0.1568, "step": 157370 }, { "epoch": 6.52, "grad_norm": 0.7734375, "learning_rate": 0.0004440322297086957, "loss": 0.1599, "step": 157380 }, { "epoch": 6.52, "grad_norm": 1.3359375, "learning_rate": 0.00044402539082539404, "loss": 0.224, "step": 157390 }, { "epoch": 6.52, "grad_norm": 0.388671875, "learning_rate": 0.0004440185515769576, "loss": 0.212, "step": 157400 }, { "epoch": 6.52, "grad_norm": 0.734375, "learning_rate": 0.00044401171196339906, "loss": 0.2136, "step": 157410 }, { "epoch": 6.52, "grad_norm": 0.62890625, "learning_rate": 0.0004440048719847315, "loss": 0.2316, "step": 157420 }, { "epoch": 6.52, "grad_norm": 0.62109375, "learning_rate": 0.00044399803164096776, "loss": 0.1642, "step": 157430 }, { "epoch": 6.52, "grad_norm": 1.0234375, "learning_rate": 0.0004439911909321206, "loss": 0.199, "step": 157440 }, { "epoch": 6.52, "grad_norm": 0.52734375, "learning_rate": 0.00044398434985820303, "loss": 0.2067, "step": 157450 }, { "epoch": 6.52, "grad_norm": 0.365234375, "learning_rate": 0.0004439775084192278, "loss": 0.1551, "step": 157460 }, { "epoch": 6.52, "grad_norm": 1.59375, "learning_rate": 0.0004439706666152079, "loss": 0.2614, "step": 157470 }, { "epoch": 6.52, "grad_norm": 1.0703125, "learning_rate": 0.00044396382444615606, "loss": 0.2009, "step": 157480 }, { "epoch": 6.52, "grad_norm": 0.298828125, "learning_rate": 0.00044395698191208535, "loss": 0.1795, "step": 157490 }, { "epoch": 6.52, "grad_norm": 0.80859375, "learning_rate": 0.0004439501390130085, "loss": 0.2199, "step": 157500 }, { "epoch": 6.52, "grad_norm": 0.86328125, "learning_rate": 0.00044394329574893844, "loss": 0.2321, "step": 157510 }, { "epoch": 6.52, "grad_norm": 0.421875, "learning_rate": 0.000443936452119888, "loss": 0.1796, "step": 157520 }, { "epoch": 6.52, "grad_norm": 0.431640625, "learning_rate": 0.0004439296081258701, "loss": 0.2178, "step": 157530 }, { "epoch": 6.53, "grad_norm": 0.8828125, "learning_rate": 0.00044392276376689773, "loss": 0.1966, "step": 157540 }, { "epoch": 6.53, "grad_norm": 1.1015625, "learning_rate": 0.00044391591904298356, "loss": 0.2164, "step": 157550 }, { "epoch": 6.53, "grad_norm": 0.578125, "learning_rate": 0.0004439090739541406, "loss": 0.174, "step": 157560 }, { "epoch": 6.53, "grad_norm": 1.203125, "learning_rate": 0.00044390222850038174, "loss": 0.2126, "step": 157570 }, { "epoch": 6.53, "grad_norm": 0.87890625, "learning_rate": 0.0004438953826817198, "loss": 0.1856, "step": 157580 }, { "epoch": 6.53, "grad_norm": 0.5390625, "learning_rate": 0.0004438885364981677, "loss": 0.1598, "step": 157590 }, { "epoch": 6.53, "grad_norm": 0.5, "learning_rate": 0.00044388168994973827, "loss": 0.2389, "step": 157600 }, { "epoch": 6.53, "grad_norm": 1.0859375, "learning_rate": 0.00044387484303644444, "loss": 0.2364, "step": 157610 }, { "epoch": 6.53, "grad_norm": 0.7421875, "learning_rate": 0.0004438679957582992, "loss": 0.2406, "step": 157620 }, { "epoch": 6.53, "grad_norm": 0.458984375, "learning_rate": 0.0004438611481153152, "loss": 0.1723, "step": 157630 }, { "epoch": 6.53, "grad_norm": 1.4375, "learning_rate": 0.0004438543001075055, "loss": 0.1816, "step": 157640 }, { "epoch": 6.53, "grad_norm": 1.609375, "learning_rate": 0.00044384745173488293, "loss": 0.1884, "step": 157650 }, { "epoch": 6.53, "grad_norm": 1.3515625, "learning_rate": 0.0004438406029974604, "loss": 0.2247, "step": 157660 }, { "epoch": 6.53, "grad_norm": 0.8203125, "learning_rate": 0.0004438337538952508, "loss": 0.2003, "step": 157670 }, { "epoch": 6.53, "grad_norm": 1.1328125, "learning_rate": 0.00044382690442826695, "loss": 0.1858, "step": 157680 }, { "epoch": 6.53, "grad_norm": 0.3828125, "learning_rate": 0.00044382005459652185, "loss": 0.2003, "step": 157690 }, { "epoch": 6.53, "grad_norm": 0.578125, "learning_rate": 0.0004438132044000283, "loss": 0.2334, "step": 157700 }, { "epoch": 6.53, "grad_norm": 1.3125, "learning_rate": 0.0004438063538387993, "loss": 0.2126, "step": 157710 }, { "epoch": 6.53, "grad_norm": 0.921875, "learning_rate": 0.00044379950291284763, "loss": 0.1639, "step": 157720 }, { "epoch": 6.53, "grad_norm": 0.21484375, "learning_rate": 0.0004437926516221862, "loss": 0.163, "step": 157730 }, { "epoch": 6.53, "grad_norm": 0.89453125, "learning_rate": 0.00044378579996682795, "loss": 0.2041, "step": 157740 }, { "epoch": 6.53, "grad_norm": 0.5703125, "learning_rate": 0.00044377894794678577, "loss": 0.2244, "step": 157750 }, { "epoch": 6.53, "grad_norm": 0.73046875, "learning_rate": 0.0004437720955620725, "loss": 0.1841, "step": 157760 }, { "epoch": 6.53, "grad_norm": 0.6640625, "learning_rate": 0.0004437652428127011, "loss": 0.1732, "step": 157770 }, { "epoch": 6.54, "grad_norm": 0.66796875, "learning_rate": 0.00044375838969868444, "loss": 0.2053, "step": 157780 }, { "epoch": 6.54, "grad_norm": 0.38671875, "learning_rate": 0.0004437515362200354, "loss": 0.1877, "step": 157790 }, { "epoch": 6.54, "grad_norm": 1.515625, "learning_rate": 0.0004437446823767669, "loss": 0.192, "step": 157800 }, { "epoch": 6.54, "grad_norm": 0.94921875, "learning_rate": 0.0004437378281688918, "loss": 0.2113, "step": 157810 }, { "epoch": 6.54, "grad_norm": 0.7265625, "learning_rate": 0.0004437309735964231, "loss": 0.1711, "step": 157820 }, { "epoch": 6.54, "grad_norm": 0.361328125, "learning_rate": 0.00044372411865937356, "loss": 0.1849, "step": 157830 }, { "epoch": 6.54, "grad_norm": 0.95703125, "learning_rate": 0.00044371726335775617, "loss": 0.2292, "step": 157840 }, { "epoch": 6.54, "grad_norm": 0.98046875, "learning_rate": 0.00044371040769158377, "loss": 0.2446, "step": 157850 }, { "epoch": 6.54, "grad_norm": 0.515625, "learning_rate": 0.0004437035516608694, "loss": 0.1641, "step": 157860 }, { "epoch": 6.54, "grad_norm": 1.0625, "learning_rate": 0.00044369669526562577, "loss": 0.2831, "step": 157870 }, { "epoch": 6.54, "grad_norm": 0.7890625, "learning_rate": 0.00044368983850586596, "loss": 0.2173, "step": 157880 }, { "epoch": 6.54, "grad_norm": 1.0078125, "learning_rate": 0.0004436829813816027, "loss": 0.2006, "step": 157890 }, { "epoch": 6.54, "grad_norm": 0.53515625, "learning_rate": 0.00044367612389284905, "loss": 0.1874, "step": 157900 }, { "epoch": 6.54, "grad_norm": 0.5234375, "learning_rate": 0.00044366926603961785, "loss": 0.2119, "step": 157910 }, { "epoch": 6.54, "grad_norm": 0.95703125, "learning_rate": 0.00044366240782192195, "loss": 0.1655, "step": 157920 }, { "epoch": 6.54, "grad_norm": 0.6484375, "learning_rate": 0.00044365554923977437, "loss": 0.1734, "step": 157930 }, { "epoch": 6.54, "grad_norm": 0.52734375, "learning_rate": 0.0004436486902931879, "loss": 0.2267, "step": 157940 }, { "epoch": 6.54, "grad_norm": 0.9765625, "learning_rate": 0.0004436418309821756, "loss": 0.1765, "step": 157950 }, { "epoch": 6.54, "grad_norm": 0.4453125, "learning_rate": 0.00044363497130675024, "loss": 0.2023, "step": 157960 }, { "epoch": 6.54, "grad_norm": 0.6875, "learning_rate": 0.0004436281112669247, "loss": 0.1909, "step": 157970 }, { "epoch": 6.54, "grad_norm": 1.4453125, "learning_rate": 0.00044362125086271206, "loss": 0.2214, "step": 157980 }, { "epoch": 6.54, "grad_norm": 0.54296875, "learning_rate": 0.0004436143900941251, "loss": 0.1884, "step": 157990 }, { "epoch": 6.54, "grad_norm": 0.7890625, "learning_rate": 0.0004436075289611768, "loss": 0.2153, "step": 158000 }, { "epoch": 6.54, "grad_norm": 0.357421875, "learning_rate": 0.00044360066746388, "loss": 0.1818, "step": 158010 }, { "epoch": 6.55, "grad_norm": 0.41796875, "learning_rate": 0.00044359380560224766, "loss": 0.2346, "step": 158020 }, { "epoch": 6.55, "grad_norm": 0.67578125, "learning_rate": 0.0004435869433762927, "loss": 0.1714, "step": 158030 }, { "epoch": 6.55, "grad_norm": 1.8828125, "learning_rate": 0.00044358008078602794, "loss": 0.233, "step": 158040 }, { "epoch": 6.55, "grad_norm": 0.61328125, "learning_rate": 0.00044357321783146646, "loss": 0.2037, "step": 158050 }, { "epoch": 6.55, "grad_norm": 0.59765625, "learning_rate": 0.0004435663545126211, "loss": 0.1593, "step": 158060 }, { "epoch": 6.55, "grad_norm": 0.5, "learning_rate": 0.00044355949082950475, "loss": 0.2248, "step": 158070 }, { "epoch": 6.55, "grad_norm": 0.419921875, "learning_rate": 0.00044355262678213026, "loss": 0.2842, "step": 158080 }, { "epoch": 6.55, "grad_norm": 0.80078125, "learning_rate": 0.0004435457623705107, "loss": 0.2216, "step": 158090 }, { "epoch": 6.55, "grad_norm": 0.375, "learning_rate": 0.0004435388975946589, "loss": 0.1752, "step": 158100 }, { "epoch": 6.55, "grad_norm": 0.703125, "learning_rate": 0.00044353203245458785, "loss": 0.2228, "step": 158110 }, { "epoch": 6.55, "grad_norm": 0.71875, "learning_rate": 0.0004435251669503103, "loss": 0.1865, "step": 158120 }, { "epoch": 6.55, "grad_norm": 0.6796875, "learning_rate": 0.00044351830108183937, "loss": 0.2248, "step": 158130 }, { "epoch": 6.55, "grad_norm": 0.384765625, "learning_rate": 0.00044351143484918786, "loss": 0.1893, "step": 158140 }, { "epoch": 6.55, "grad_norm": 0.625, "learning_rate": 0.0004435045682523687, "loss": 0.2102, "step": 158150 }, { "epoch": 6.55, "grad_norm": 0.578125, "learning_rate": 0.00044349770129139496, "loss": 0.1842, "step": 158160 }, { "epoch": 6.55, "grad_norm": 0.76171875, "learning_rate": 0.00044349083396627933, "loss": 0.178, "step": 158170 }, { "epoch": 6.55, "grad_norm": 0.57421875, "learning_rate": 0.0004434839662770349, "loss": 0.2051, "step": 158180 }, { "epoch": 6.55, "grad_norm": 1.3046875, "learning_rate": 0.00044347709822367454, "loss": 0.1826, "step": 158190 }, { "epoch": 6.55, "grad_norm": 1.4296875, "learning_rate": 0.0004434702298062111, "loss": 0.1929, "step": 158200 }, { "epoch": 6.55, "grad_norm": 0.45703125, "learning_rate": 0.00044346336102465766, "loss": 0.2315, "step": 158210 }, { "epoch": 6.55, "grad_norm": 0.57421875, "learning_rate": 0.00044345649187902704, "loss": 0.1781, "step": 158220 }, { "epoch": 6.55, "grad_norm": 1.328125, "learning_rate": 0.0004434496223693322, "loss": 0.2006, "step": 158230 }, { "epoch": 6.55, "grad_norm": 0.423828125, "learning_rate": 0.000443442752495586, "loss": 0.1847, "step": 158240 }, { "epoch": 6.55, "grad_norm": 0.8046875, "learning_rate": 0.00044343588225780154, "loss": 0.1776, "step": 158250 }, { "epoch": 6.56, "grad_norm": 0.6171875, "learning_rate": 0.0004434290116559916, "loss": 0.2465, "step": 158260 }, { "epoch": 6.56, "grad_norm": 0.7265625, "learning_rate": 0.00044342214069016905, "loss": 0.1992, "step": 158270 }, { "epoch": 6.56, "grad_norm": 1.4375, "learning_rate": 0.000443415269360347, "loss": 0.186, "step": 158280 }, { "epoch": 6.56, "grad_norm": 0.78515625, "learning_rate": 0.0004434083976665383, "loss": 0.1997, "step": 158290 }, { "epoch": 6.56, "grad_norm": 0.625, "learning_rate": 0.0004434015256087559, "loss": 0.237, "step": 158300 }, { "epoch": 6.56, "grad_norm": 0.6875, "learning_rate": 0.0004433946531870128, "loss": 0.2453, "step": 158310 }, { "epoch": 6.56, "grad_norm": 0.142578125, "learning_rate": 0.00044338778040132164, "loss": 0.2228, "step": 158320 }, { "epoch": 6.56, "grad_norm": 1.8046875, "learning_rate": 0.00044338090725169565, "loss": 0.2435, "step": 158330 }, { "epoch": 6.56, "grad_norm": 1.0625, "learning_rate": 0.00044337403373814776, "loss": 0.2009, "step": 158340 }, { "epoch": 6.56, "grad_norm": 0.84375, "learning_rate": 0.0004433671598606908, "loss": 0.1661, "step": 158350 }, { "epoch": 6.56, "grad_norm": 1.015625, "learning_rate": 0.0004433602856193376, "loss": 0.2079, "step": 158360 }, { "epoch": 6.56, "grad_norm": 0.69140625, "learning_rate": 0.0004433534110141013, "loss": 0.2127, "step": 158370 }, { "epoch": 6.56, "grad_norm": 0.271484375, "learning_rate": 0.00044334653604499475, "loss": 0.2405, "step": 158380 }, { "epoch": 6.56, "grad_norm": 0.6640625, "learning_rate": 0.000443339660712031, "loss": 0.2045, "step": 158390 }, { "epoch": 6.56, "grad_norm": 0.6640625, "learning_rate": 0.0004433327850152228, "loss": 0.2183, "step": 158400 }, { "epoch": 6.56, "grad_norm": 0.55859375, "learning_rate": 0.00044332590895458315, "loss": 0.1842, "step": 158410 }, { "epoch": 6.56, "grad_norm": 0.70703125, "learning_rate": 0.000443319032530125, "loss": 0.2473, "step": 158420 }, { "epoch": 6.56, "grad_norm": 0.49609375, "learning_rate": 0.0004433121557418614, "loss": 0.1922, "step": 158430 }, { "epoch": 6.56, "grad_norm": 0.5, "learning_rate": 0.00044330527858980515, "loss": 0.2105, "step": 158440 }, { "epoch": 6.56, "grad_norm": 0.72265625, "learning_rate": 0.00044329840107396924, "loss": 0.1869, "step": 158450 }, { "epoch": 6.56, "grad_norm": 0.490234375, "learning_rate": 0.0004432915231943666, "loss": 0.2301, "step": 158460 }, { "epoch": 6.56, "grad_norm": 1.5, "learning_rate": 0.0004432846449510102, "loss": 0.2392, "step": 158470 }, { "epoch": 6.56, "grad_norm": 0.51171875, "learning_rate": 0.000443277766343913, "loss": 0.2393, "step": 158480 }, { "epoch": 6.56, "grad_norm": 0.33203125, "learning_rate": 0.00044327088737308783, "loss": 0.1861, "step": 158490 }, { "epoch": 6.57, "grad_norm": 0.55859375, "learning_rate": 0.0004432640080385478, "loss": 0.1878, "step": 158500 }, { "epoch": 6.57, "grad_norm": 0.357421875, "learning_rate": 0.0004432571283403058, "loss": 0.2011, "step": 158510 }, { "epoch": 6.57, "grad_norm": 0.5390625, "learning_rate": 0.00044325024827837465, "loss": 0.2189, "step": 158520 }, { "epoch": 6.57, "grad_norm": 0.58984375, "learning_rate": 0.0004432433678527675, "loss": 0.2287, "step": 158530 }, { "epoch": 6.57, "grad_norm": 0.21875, "learning_rate": 0.0004432364870634972, "loss": 0.1968, "step": 158540 }, { "epoch": 6.57, "grad_norm": 1.203125, "learning_rate": 0.00044322960591057666, "loss": 0.1985, "step": 158550 }, { "epoch": 6.57, "grad_norm": 0.5703125, "learning_rate": 0.00044322272439401884, "loss": 0.21, "step": 158560 }, { "epoch": 6.57, "grad_norm": 0.390625, "learning_rate": 0.00044321584251383675, "loss": 0.1949, "step": 158570 }, { "epoch": 6.57, "grad_norm": 0.359375, "learning_rate": 0.00044320896027004333, "loss": 0.1532, "step": 158580 }, { "epoch": 6.57, "grad_norm": 0.73046875, "learning_rate": 0.0004432020776626515, "loss": 0.1903, "step": 158590 }, { "epoch": 6.57, "grad_norm": 0.3515625, "learning_rate": 0.0004431951946916742, "loss": 0.2115, "step": 158600 }, { "epoch": 6.57, "grad_norm": 0.921875, "learning_rate": 0.00044318831135712445, "loss": 0.2309, "step": 158610 }, { "epoch": 6.57, "grad_norm": 0.8125, "learning_rate": 0.0004431814276590151, "loss": 0.2113, "step": 158620 }, { "epoch": 6.57, "grad_norm": 0.1689453125, "learning_rate": 0.0004431745435973592, "loss": 0.1924, "step": 158630 }, { "epoch": 6.57, "grad_norm": 0.609375, "learning_rate": 0.00044316765917216966, "loss": 0.2367, "step": 158640 }, { "epoch": 6.57, "grad_norm": 0.94921875, "learning_rate": 0.0004431607743834595, "loss": 0.1821, "step": 158650 }, { "epoch": 6.57, "grad_norm": 0.25390625, "learning_rate": 0.00044315388923124154, "loss": 0.1519, "step": 158660 }, { "epoch": 6.57, "grad_norm": 0.68359375, "learning_rate": 0.0004431470037155288, "loss": 0.247, "step": 158670 }, { "epoch": 6.57, "grad_norm": 0.2294921875, "learning_rate": 0.00044314011783633435, "loss": 0.1739, "step": 158680 }, { "epoch": 6.57, "grad_norm": 0.765625, "learning_rate": 0.000443133231593671, "loss": 0.2153, "step": 158690 }, { "epoch": 6.57, "grad_norm": 0.546875, "learning_rate": 0.0004431263449875518, "loss": 0.2264, "step": 158700 }, { "epoch": 6.57, "grad_norm": 0.8046875, "learning_rate": 0.0004431194580179897, "loss": 0.1966, "step": 158710 }, { "epoch": 6.57, "grad_norm": 0.40625, "learning_rate": 0.00044311257068499753, "loss": 0.1882, "step": 158720 }, { "epoch": 6.57, "grad_norm": 0.546875, "learning_rate": 0.00044310568298858844, "loss": 0.2079, "step": 158730 }, { "epoch": 6.57, "grad_norm": 0.99609375, "learning_rate": 0.0004430987949287753, "loss": 0.1938, "step": 158740 }, { "epoch": 6.58, "grad_norm": 0.69921875, "learning_rate": 0.0004430919065055711, "loss": 0.2137, "step": 158750 }, { "epoch": 6.58, "grad_norm": 1.6484375, "learning_rate": 0.00044308501771898873, "loss": 0.1959, "step": 158760 }, { "epoch": 6.58, "grad_norm": 0.482421875, "learning_rate": 0.0004430781285690412, "loss": 0.14, "step": 158770 }, { "epoch": 6.58, "grad_norm": 0.98046875, "learning_rate": 0.0004430712390557415, "loss": 0.1904, "step": 158780 }, { "epoch": 6.58, "grad_norm": 0.8828125, "learning_rate": 0.00044306434917910264, "loss": 0.1708, "step": 158790 }, { "epoch": 6.58, "grad_norm": 0.55078125, "learning_rate": 0.00044305745893913746, "loss": 0.2292, "step": 158800 }, { "epoch": 6.58, "grad_norm": 0.75390625, "learning_rate": 0.000443050568335859, "loss": 0.2356, "step": 158810 }, { "epoch": 6.58, "grad_norm": 0.90625, "learning_rate": 0.00044304367736928025, "loss": 0.2138, "step": 158820 }, { "epoch": 6.58, "grad_norm": 0.58984375, "learning_rate": 0.0004430367860394141, "loss": 0.1758, "step": 158830 }, { "epoch": 6.58, "grad_norm": 0.67578125, "learning_rate": 0.0004430298943462736, "loss": 0.2498, "step": 158840 }, { "epoch": 6.58, "grad_norm": 0.96875, "learning_rate": 0.0004430230022898717, "loss": 0.2254, "step": 158850 }, { "epoch": 6.58, "grad_norm": 1.0546875, "learning_rate": 0.0004430161098702213, "loss": 0.1787, "step": 158860 }, { "epoch": 6.58, "grad_norm": 0.98828125, "learning_rate": 0.0004430092170873354, "loss": 0.1687, "step": 158870 }, { "epoch": 6.58, "grad_norm": 0.6171875, "learning_rate": 0.0004430023239412271, "loss": 0.1387, "step": 158880 }, { "epoch": 6.58, "grad_norm": 0.4921875, "learning_rate": 0.0004429954304319092, "loss": 0.2586, "step": 158890 }, { "epoch": 6.58, "grad_norm": 0.578125, "learning_rate": 0.00044298853655939474, "loss": 0.1841, "step": 158900 }, { "epoch": 6.58, "grad_norm": 0.376953125, "learning_rate": 0.00044298164232369673, "loss": 0.1526, "step": 158910 }, { "epoch": 6.58, "grad_norm": 0.7890625, "learning_rate": 0.0004429747477248281, "loss": 0.2, "step": 158920 }, { "epoch": 6.58, "grad_norm": 0.466796875, "learning_rate": 0.00044296785276280184, "loss": 0.2224, "step": 158930 }, { "epoch": 6.58, "grad_norm": 0.82421875, "learning_rate": 0.00044296095743763086, "loss": 0.1761, "step": 158940 }, { "epoch": 6.58, "grad_norm": 1.0, "learning_rate": 0.00044295406174932827, "loss": 0.2099, "step": 158950 }, { "epoch": 6.58, "grad_norm": 0.466796875, "learning_rate": 0.00044294716569790696, "loss": 0.2307, "step": 158960 }, { "epoch": 6.58, "grad_norm": 0.6015625, "learning_rate": 0.0004429402692833799, "loss": 0.1771, "step": 158970 }, { "epoch": 6.58, "grad_norm": 0.44921875, "learning_rate": 0.00044293337250576, "loss": 0.2365, "step": 158980 }, { "epoch": 6.59, "grad_norm": 0.69921875, "learning_rate": 0.0004429264753650605, "loss": 0.2277, "step": 158990 }, { "epoch": 6.59, "grad_norm": 0.5703125, "learning_rate": 0.0004429195778612941, "loss": 0.217, "step": 159000 }, { "epoch": 6.59, "grad_norm": 0.53125, "learning_rate": 0.00044291267999447393, "loss": 0.1751, "step": 159010 }, { "epoch": 6.59, "grad_norm": 0.87109375, "learning_rate": 0.00044290578176461295, "loss": 0.1936, "step": 159020 }, { "epoch": 6.59, "grad_norm": 0.57421875, "learning_rate": 0.0004428988831717241, "loss": 0.2197, "step": 159030 }, { "epoch": 6.59, "grad_norm": 0.62109375, "learning_rate": 0.00044289198421582033, "loss": 0.2248, "step": 159040 }, { "epoch": 6.59, "grad_norm": 1.59375, "learning_rate": 0.0004428850848969147, "loss": 0.16, "step": 159050 }, { "epoch": 6.59, "grad_norm": 0.51953125, "learning_rate": 0.0004428781852150202, "loss": 0.2043, "step": 159060 }, { "epoch": 6.59, "grad_norm": 0.5390625, "learning_rate": 0.0004428712851701498, "loss": 0.2055, "step": 159070 }, { "epoch": 6.59, "grad_norm": 0.76953125, "learning_rate": 0.00044286438476231636, "loss": 0.203, "step": 159080 }, { "epoch": 6.59, "grad_norm": 0.50390625, "learning_rate": 0.00044285748399153316, "loss": 0.1524, "step": 159090 }, { "epoch": 6.59, "grad_norm": 0.71484375, "learning_rate": 0.00044285058285781285, "loss": 0.1924, "step": 159100 }, { "epoch": 6.59, "grad_norm": 0.80859375, "learning_rate": 0.0004428436813611686, "loss": 0.2365, "step": 159110 }, { "epoch": 6.59, "grad_norm": 1.0625, "learning_rate": 0.0004428367795016134, "loss": 0.2038, "step": 159120 }, { "epoch": 6.59, "grad_norm": 0.9453125, "learning_rate": 0.0004428298772791602, "loss": 0.2199, "step": 159130 }, { "epoch": 6.59, "grad_norm": 0.56640625, "learning_rate": 0.000442822974693822, "loss": 0.1903, "step": 159140 }, { "epoch": 6.59, "grad_norm": 0.6875, "learning_rate": 0.0004428160717456118, "loss": 0.1761, "step": 159150 }, { "epoch": 6.59, "grad_norm": 0.390625, "learning_rate": 0.00044280916843454247, "loss": 0.2051, "step": 159160 }, { "epoch": 6.59, "grad_norm": 0.484375, "learning_rate": 0.00044280226476062717, "loss": 0.2056, "step": 159170 }, { "epoch": 6.59, "grad_norm": 0.51953125, "learning_rate": 0.0004427953607238789, "loss": 0.2301, "step": 159180 }, { "epoch": 6.59, "grad_norm": 0.419921875, "learning_rate": 0.00044278845632431054, "loss": 0.1816, "step": 159190 }, { "epoch": 6.59, "grad_norm": 0.8125, "learning_rate": 0.00044278155156193507, "loss": 0.211, "step": 159200 }, { "epoch": 6.59, "grad_norm": 0.51171875, "learning_rate": 0.00044277464643676555, "loss": 0.2561, "step": 159210 }, { "epoch": 6.59, "grad_norm": 0.8671875, "learning_rate": 0.000442767740948815, "loss": 0.2033, "step": 159220 }, { "epoch": 6.6, "grad_norm": 0.5078125, "learning_rate": 0.0004427608350980964, "loss": 0.2046, "step": 159230 }, { "epoch": 6.6, "grad_norm": 0.486328125, "learning_rate": 0.00044275392888462264, "loss": 0.2391, "step": 159240 }, { "epoch": 6.6, "grad_norm": 0.28515625, "learning_rate": 0.00044274702230840687, "loss": 0.1882, "step": 159250 }, { "epoch": 6.6, "grad_norm": 0.546875, "learning_rate": 0.00044274011536946204, "loss": 0.2317, "step": 159260 }, { "epoch": 6.6, "grad_norm": 1.09375, "learning_rate": 0.00044273320806780106, "loss": 0.1245, "step": 159270 }, { "epoch": 6.6, "grad_norm": 0.66796875, "learning_rate": 0.000442726300403437, "loss": 0.2565, "step": 159280 }, { "epoch": 6.6, "grad_norm": 0.640625, "learning_rate": 0.00044271939237638285, "loss": 0.223, "step": 159290 }, { "epoch": 6.6, "grad_norm": 0.396484375, "learning_rate": 0.00044271248398665164, "loss": 0.2274, "step": 159300 }, { "epoch": 6.6, "grad_norm": 1.03125, "learning_rate": 0.0004427055752342564, "loss": 0.2238, "step": 159310 }, { "epoch": 6.6, "grad_norm": 1.703125, "learning_rate": 0.0004426986661192101, "loss": 0.1907, "step": 159320 }, { "epoch": 6.6, "grad_norm": 0.8203125, "learning_rate": 0.0004426917566415256, "loss": 0.1746, "step": 159330 }, { "epoch": 6.6, "grad_norm": 0.72265625, "learning_rate": 0.000442684846801216, "loss": 0.2317, "step": 159340 }, { "epoch": 6.6, "grad_norm": 0.306640625, "learning_rate": 0.00044267793659829446, "loss": 0.2176, "step": 159350 }, { "epoch": 6.6, "grad_norm": 0.6796875, "learning_rate": 0.00044267102603277376, "loss": 0.2543, "step": 159360 }, { "epoch": 6.6, "grad_norm": 1.1328125, "learning_rate": 0.000442664115104667, "loss": 0.165, "step": 159370 }, { "epoch": 6.6, "grad_norm": 0.5625, "learning_rate": 0.0004426572038139872, "loss": 0.1659, "step": 159380 }, { "epoch": 6.6, "grad_norm": 2.046875, "learning_rate": 0.00044265029216074736, "loss": 0.2384, "step": 159390 }, { "epoch": 6.6, "grad_norm": 0.5390625, "learning_rate": 0.00044264338014496054, "loss": 0.2052, "step": 159400 }, { "epoch": 6.6, "grad_norm": 1.2265625, "learning_rate": 0.00044263646776663955, "loss": 0.16, "step": 159410 }, { "epoch": 6.6, "grad_norm": 0.984375, "learning_rate": 0.0004426295550257976, "loss": 0.2113, "step": 159420 }, { "epoch": 6.6, "grad_norm": 0.546875, "learning_rate": 0.0004426226419224476, "loss": 0.1874, "step": 159430 }, { "epoch": 6.6, "grad_norm": 0.796875, "learning_rate": 0.00044261572845660265, "loss": 0.191, "step": 159440 }, { "epoch": 6.6, "grad_norm": 0.46484375, "learning_rate": 0.00044260881462827563, "loss": 0.1852, "step": 159450 }, { "epoch": 6.6, "grad_norm": 0.94921875, "learning_rate": 0.00044260190043747966, "loss": 0.2006, "step": 159460 }, { "epoch": 6.61, "grad_norm": 0.412109375, "learning_rate": 0.0004425949858842277, "loss": 0.2137, "step": 159470 }, { "epoch": 6.61, "grad_norm": 0.875, "learning_rate": 0.0004425880709685328, "loss": 0.1888, "step": 159480 }, { "epoch": 6.61, "grad_norm": 0.74609375, "learning_rate": 0.00044258115569040794, "loss": 0.1933, "step": 159490 }, { "epoch": 6.61, "grad_norm": 0.59765625, "learning_rate": 0.00044257424004986605, "loss": 0.1954, "step": 159500 }, { "epoch": 6.61, "grad_norm": 0.333984375, "learning_rate": 0.00044256732404692037, "loss": 0.2257, "step": 159510 }, { "epoch": 6.61, "grad_norm": 0.79296875, "learning_rate": 0.00044256040768158374, "loss": 0.2225, "step": 159520 }, { "epoch": 6.61, "grad_norm": 0.26953125, "learning_rate": 0.0004425534909538692, "loss": 0.1848, "step": 159530 }, { "epoch": 6.61, "grad_norm": 0.703125, "learning_rate": 0.00044254657386378975, "loss": 0.2149, "step": 159540 }, { "epoch": 6.61, "grad_norm": 0.8125, "learning_rate": 0.0004425396564113585, "loss": 0.2098, "step": 159550 }, { "epoch": 6.61, "grad_norm": 0.51953125, "learning_rate": 0.0004425327385965884, "loss": 0.174, "step": 159560 }, { "epoch": 6.61, "grad_norm": 0.81640625, "learning_rate": 0.0004425258204194924, "loss": 0.1996, "step": 159570 }, { "epoch": 6.61, "grad_norm": 0.51171875, "learning_rate": 0.0004425189018800837, "loss": 0.1516, "step": 159580 }, { "epoch": 6.61, "grad_norm": 0.98046875, "learning_rate": 0.0004425119829783752, "loss": 0.1596, "step": 159590 }, { "epoch": 6.61, "grad_norm": 0.7734375, "learning_rate": 0.00044250506371437996, "loss": 0.1857, "step": 159600 }, { "epoch": 6.61, "grad_norm": 0.57421875, "learning_rate": 0.00044249814408811085, "loss": 0.2033, "step": 159610 }, { "epoch": 6.61, "grad_norm": 0.40234375, "learning_rate": 0.0004424912240995811, "loss": 0.2086, "step": 159620 }, { "epoch": 6.61, "grad_norm": 0.78125, "learning_rate": 0.00044248430374880366, "loss": 0.232, "step": 159630 }, { "epoch": 6.61, "grad_norm": 0.6796875, "learning_rate": 0.00044247738303579155, "loss": 0.2624, "step": 159640 }, { "epoch": 6.61, "grad_norm": 0.93359375, "learning_rate": 0.00044247046196055784, "loss": 0.2061, "step": 159650 }, { "epoch": 6.61, "grad_norm": 0.365234375, "learning_rate": 0.00044246354052311545, "loss": 0.2086, "step": 159660 }, { "epoch": 6.61, "grad_norm": 0.640625, "learning_rate": 0.00044245661872347743, "loss": 0.2174, "step": 159670 }, { "epoch": 6.61, "grad_norm": 0.416015625, "learning_rate": 0.0004424496965616569, "loss": 0.147, "step": 159680 }, { "epoch": 6.61, "grad_norm": 0.7265625, "learning_rate": 0.00044244277403766673, "loss": 0.2037, "step": 159690 }, { "epoch": 6.61, "grad_norm": 0.361328125, "learning_rate": 0.00044243585115152017, "loss": 0.2179, "step": 159700 }, { "epoch": 6.62, "grad_norm": 0.96875, "learning_rate": 0.0004424289279032301, "loss": 0.2512, "step": 159710 }, { "epoch": 6.62, "grad_norm": 0.66796875, "learning_rate": 0.0004424220042928094, "loss": 0.1983, "step": 159720 }, { "epoch": 6.62, "grad_norm": 0.5390625, "learning_rate": 0.00044241508032027147, "loss": 0.1822, "step": 159730 }, { "epoch": 6.62, "grad_norm": 0.380859375, "learning_rate": 0.0004424081559856291, "loss": 0.1916, "step": 159740 }, { "epoch": 6.62, "grad_norm": 0.5703125, "learning_rate": 0.0004424012312888953, "loss": 0.2049, "step": 159750 }, { "epoch": 6.62, "grad_norm": 0.71484375, "learning_rate": 0.00044239430623008315, "loss": 0.1878, "step": 159760 }, { "epoch": 6.62, "grad_norm": 0.75390625, "learning_rate": 0.0004423873808092057, "loss": 0.2013, "step": 159770 }, { "epoch": 6.62, "grad_norm": 0.287109375, "learning_rate": 0.000442380455026276, "loss": 0.1784, "step": 159780 }, { "epoch": 6.62, "grad_norm": 1.515625, "learning_rate": 0.0004423735288813071, "loss": 0.2154, "step": 159790 }, { "epoch": 6.62, "grad_norm": 1.15625, "learning_rate": 0.0004423666023743119, "loss": 0.2352, "step": 159800 }, { "epoch": 6.62, "grad_norm": 0.875, "learning_rate": 0.0004423596755053036, "loss": 0.2422, "step": 159810 }, { "epoch": 6.62, "grad_norm": 0.8046875, "learning_rate": 0.00044235274827429514, "loss": 0.1705, "step": 159820 }, { "epoch": 6.62, "grad_norm": 0.2041015625, "learning_rate": 0.0004423458206812996, "loss": 0.2022, "step": 159830 }, { "epoch": 6.62, "grad_norm": 0.9765625, "learning_rate": 0.00044233889272633, "loss": 0.2019, "step": 159840 }, { "epoch": 6.62, "grad_norm": 0.56640625, "learning_rate": 0.00044233196440939925, "loss": 0.2169, "step": 159850 }, { "epoch": 6.62, "grad_norm": 0.671875, "learning_rate": 0.0004423250357305206, "loss": 0.2333, "step": 159860 }, { "epoch": 6.62, "grad_norm": 0.5859375, "learning_rate": 0.00044231810668970707, "loss": 0.1942, "step": 159870 }, { "epoch": 6.62, "grad_norm": 0.4609375, "learning_rate": 0.0004423111772869716, "loss": 0.1912, "step": 159880 }, { "epoch": 6.62, "grad_norm": 0.66796875, "learning_rate": 0.0004423042475223272, "loss": 0.1942, "step": 159890 }, { "epoch": 6.62, "grad_norm": 0.421875, "learning_rate": 0.00044229731739578703, "loss": 0.1599, "step": 159900 }, { "epoch": 6.62, "grad_norm": 0.98046875, "learning_rate": 0.00044229038690736413, "loss": 0.2075, "step": 159910 }, { "epoch": 6.62, "grad_norm": 0.85546875, "learning_rate": 0.00044228345605707136, "loss": 0.1596, "step": 159920 }, { "epoch": 6.62, "grad_norm": 0.8046875, "learning_rate": 0.000442276524844922, "loss": 0.2201, "step": 159930 }, { "epoch": 6.62, "grad_norm": 0.78515625, "learning_rate": 0.0004422695932709289, "loss": 0.226, "step": 159940 }, { "epoch": 6.63, "grad_norm": 0.90625, "learning_rate": 0.0004422626613351052, "loss": 0.2842, "step": 159950 }, { "epoch": 6.63, "grad_norm": 0.6796875, "learning_rate": 0.000442255729037464, "loss": 0.2453, "step": 159960 }, { "epoch": 6.63, "grad_norm": 0.59765625, "learning_rate": 0.0004422487963780182, "loss": 0.2224, "step": 159970 }, { "epoch": 6.63, "grad_norm": 0.84765625, "learning_rate": 0.00044224186335678093, "loss": 0.1978, "step": 159980 }, { "epoch": 6.63, "grad_norm": 1.6953125, "learning_rate": 0.0004422349299737653, "loss": 0.2436, "step": 159990 }, { "epoch": 6.63, "grad_norm": 0.9609375, "learning_rate": 0.00044222799622898424, "loss": 0.1963, "step": 160000 }, { "epoch": 6.63, "grad_norm": 0.361328125, "learning_rate": 0.0004422210621224508, "loss": 0.1643, "step": 160010 }, { "epoch": 6.63, "grad_norm": 0.29296875, "learning_rate": 0.0004422141276541782, "loss": 0.1956, "step": 160020 }, { "epoch": 6.63, "grad_norm": 0.9296875, "learning_rate": 0.00044220719282417933, "loss": 0.2224, "step": 160030 }, { "epoch": 6.63, "grad_norm": 0.458984375, "learning_rate": 0.0004422002576324673, "loss": 0.2148, "step": 160040 }, { "epoch": 6.63, "grad_norm": 0.80859375, "learning_rate": 0.00044219332207905506, "loss": 0.1913, "step": 160050 }, { "epoch": 6.63, "grad_norm": 0.9375, "learning_rate": 0.0004421863861639558, "loss": 0.1958, "step": 160060 }, { "epoch": 6.63, "grad_norm": 0.6875, "learning_rate": 0.00044217944988718253, "loss": 0.2255, "step": 160070 }, { "epoch": 6.63, "grad_norm": 0.4921875, "learning_rate": 0.00044217251324874825, "loss": 0.1669, "step": 160080 }, { "epoch": 6.63, "grad_norm": 1.265625, "learning_rate": 0.00044216557624866603, "loss": 0.194, "step": 160090 }, { "epoch": 6.63, "grad_norm": 0.57421875, "learning_rate": 0.00044215863888694894, "loss": 0.2193, "step": 160100 }, { "epoch": 6.63, "grad_norm": 0.4375, "learning_rate": 0.00044215170116361014, "loss": 0.2037, "step": 160110 }, { "epoch": 6.63, "grad_norm": 0.671875, "learning_rate": 0.00044214476307866254, "loss": 0.1855, "step": 160120 }, { "epoch": 6.63, "grad_norm": 0.240234375, "learning_rate": 0.0004421378246321192, "loss": 0.1563, "step": 160130 }, { "epoch": 6.63, "grad_norm": 1.109375, "learning_rate": 0.0004421308858239933, "loss": 0.2698, "step": 160140 }, { "epoch": 6.63, "grad_norm": 0.55859375, "learning_rate": 0.0004421239466542978, "loss": 0.2313, "step": 160150 }, { "epoch": 6.63, "grad_norm": 0.609375, "learning_rate": 0.00044211700712304576, "loss": 0.1995, "step": 160160 }, { "epoch": 6.63, "grad_norm": 0.44140625, "learning_rate": 0.0004421100672302503, "loss": 0.1881, "step": 160170 }, { "epoch": 6.63, "grad_norm": 0.82421875, "learning_rate": 0.00044210312697592436, "loss": 0.2222, "step": 160180 }, { "epoch": 6.64, "grad_norm": 0.85546875, "learning_rate": 0.00044209618636008107, "loss": 0.1837, "step": 160190 }, { "epoch": 6.64, "grad_norm": 0.66015625, "learning_rate": 0.0004420892453827336, "loss": 0.1295, "step": 160200 }, { "epoch": 6.64, "grad_norm": 2.78125, "learning_rate": 0.0004420823040438948, "loss": 0.2801, "step": 160210 }, { "epoch": 6.64, "grad_norm": 0.376953125, "learning_rate": 0.000442075362343578, "loss": 0.227, "step": 160220 }, { "epoch": 6.64, "grad_norm": 0.890625, "learning_rate": 0.000442068420281796, "loss": 0.1865, "step": 160230 }, { "epoch": 6.64, "grad_norm": 0.80859375, "learning_rate": 0.000442061477858562, "loss": 0.1942, "step": 160240 }, { "epoch": 6.64, "grad_norm": 0.0, "learning_rate": 0.00044205453507388905, "loss": 0.2555, "step": 160250 }, { "epoch": 6.64, "grad_norm": 0.60546875, "learning_rate": 0.0004420475919277902, "loss": 0.205, "step": 160260 }, { "epoch": 6.64, "grad_norm": 0.92578125, "learning_rate": 0.0004420406484202785, "loss": 0.2043, "step": 160270 }, { "epoch": 6.64, "grad_norm": 0.8359375, "learning_rate": 0.00044203370455136705, "loss": 0.1812, "step": 160280 }, { "epoch": 6.64, "grad_norm": 0.65625, "learning_rate": 0.00044202676032106895, "loss": 0.1813, "step": 160290 }, { "epoch": 6.64, "grad_norm": 0.33984375, "learning_rate": 0.00044201981572939715, "loss": 0.1979, "step": 160300 }, { "epoch": 6.64, "grad_norm": 0.515625, "learning_rate": 0.0004420128707763649, "loss": 0.1509, "step": 160310 }, { "epoch": 6.64, "grad_norm": 0.54296875, "learning_rate": 0.000442005925461985, "loss": 0.1951, "step": 160320 }, { "epoch": 6.64, "grad_norm": 1.5, "learning_rate": 0.0004419989797862708, "loss": 0.2002, "step": 160330 }, { "epoch": 6.64, "grad_norm": 0.84765625, "learning_rate": 0.0004419920337492352, "loss": 0.1872, "step": 160340 }, { "epoch": 6.64, "grad_norm": 0.578125, "learning_rate": 0.0004419850873508914, "loss": 0.2449, "step": 160350 }, { "epoch": 6.64, "grad_norm": 0.51171875, "learning_rate": 0.0004419781405912523, "loss": 0.24, "step": 160360 }, { "epoch": 6.64, "grad_norm": 0.48828125, "learning_rate": 0.00044197119347033116, "loss": 0.2409, "step": 160370 }, { "epoch": 6.64, "grad_norm": 0.0, "learning_rate": 0.00044196424598814087, "loss": 0.2273, "step": 160380 }, { "epoch": 6.64, "grad_norm": 1.15625, "learning_rate": 0.0004419572981446948, "loss": 0.2208, "step": 160390 }, { "epoch": 6.64, "grad_norm": 0.69921875, "learning_rate": 0.0004419503499400056, "loss": 0.1714, "step": 160400 }, { "epoch": 6.64, "grad_norm": 0.65234375, "learning_rate": 0.00044194340137408667, "loss": 0.2112, "step": 160410 }, { "epoch": 6.64, "grad_norm": 0.79296875, "learning_rate": 0.000441936452446951, "loss": 0.189, "step": 160420 }, { "epoch": 6.64, "grad_norm": 0.97265625, "learning_rate": 0.0004419295031586116, "loss": 0.2448, "step": 160430 }, { "epoch": 6.65, "grad_norm": 1.8203125, "learning_rate": 0.0004419225535090816, "loss": 0.1976, "step": 160440 }, { "epoch": 6.65, "grad_norm": 0.40625, "learning_rate": 0.00044191560349837413, "loss": 0.1961, "step": 160450 }, { "epoch": 6.65, "grad_norm": 0.8828125, "learning_rate": 0.0004419086531265022, "loss": 0.2101, "step": 160460 }, { "epoch": 6.65, "grad_norm": 0.5234375, "learning_rate": 0.00044190170239347885, "loss": 0.1758, "step": 160470 }, { "epoch": 6.65, "grad_norm": 0.87109375, "learning_rate": 0.0004418947512993173, "loss": 0.2103, "step": 160480 }, { "epoch": 6.65, "grad_norm": 0.51171875, "learning_rate": 0.00044188779984403055, "loss": 0.2009, "step": 160490 }, { "epoch": 6.65, "grad_norm": 0.515625, "learning_rate": 0.00044188084802763164, "loss": 0.155, "step": 160500 }, { "epoch": 6.65, "grad_norm": 0.80078125, "learning_rate": 0.00044187389585013373, "loss": 0.226, "step": 160510 }, { "epoch": 6.65, "grad_norm": 0.6328125, "learning_rate": 0.00044186694331154987, "loss": 0.1407, "step": 160520 }, { "epoch": 6.65, "grad_norm": 0.443359375, "learning_rate": 0.0004418599904118931, "loss": 0.2032, "step": 160530 }, { "epoch": 6.65, "grad_norm": 1.203125, "learning_rate": 0.00044185303715117653, "loss": 0.2187, "step": 160540 }, { "epoch": 6.65, "grad_norm": 0.6640625, "learning_rate": 0.00044184608352941334, "loss": 0.2064, "step": 160550 }, { "epoch": 6.65, "grad_norm": 1.0078125, "learning_rate": 0.00044183912954661646, "loss": 0.1666, "step": 160560 }, { "epoch": 6.65, "grad_norm": 0.71484375, "learning_rate": 0.0004418321752027991, "loss": 0.204, "step": 160570 }, { "epoch": 6.65, "grad_norm": 2.15625, "learning_rate": 0.00044182522049797434, "loss": 0.1737, "step": 160580 }, { "epoch": 6.65, "grad_norm": 0.60546875, "learning_rate": 0.0004418182654321552, "loss": 0.1795, "step": 160590 }, { "epoch": 6.65, "grad_norm": 0.8203125, "learning_rate": 0.0004418113100053548, "loss": 0.2264, "step": 160600 }, { "epoch": 6.65, "grad_norm": 0.0, "learning_rate": 0.00044180435421758616, "loss": 0.1832, "step": 160610 }, { "epoch": 6.65, "grad_norm": 1.0703125, "learning_rate": 0.00044179739806886255, "loss": 0.1938, "step": 160620 }, { "epoch": 6.65, "grad_norm": 0.2109375, "learning_rate": 0.0004417904415591969, "loss": 0.1918, "step": 160630 }, { "epoch": 6.65, "grad_norm": 0.58984375, "learning_rate": 0.0004417834846886023, "loss": 0.1746, "step": 160640 }, { "epoch": 6.65, "grad_norm": 0.80859375, "learning_rate": 0.00044177652745709195, "loss": 0.2319, "step": 160650 }, { "epoch": 6.65, "grad_norm": 0.5859375, "learning_rate": 0.0004417695698646789, "loss": 0.2751, "step": 160660 }, { "epoch": 6.65, "grad_norm": 0.1455078125, "learning_rate": 0.0004417626119113762, "loss": 0.1731, "step": 160670 }, { "epoch": 6.66, "grad_norm": 1.3125, "learning_rate": 0.00044175565359719693, "loss": 0.1954, "step": 160680 }, { "epoch": 6.66, "grad_norm": 0.462890625, "learning_rate": 0.0004417486949221543, "loss": 0.2076, "step": 160690 }, { "epoch": 6.66, "grad_norm": 0.71484375, "learning_rate": 0.0004417417358862613, "loss": 0.2222, "step": 160700 }, { "epoch": 6.66, "grad_norm": 1.0234375, "learning_rate": 0.0004417347764895311, "loss": 0.2498, "step": 160710 }, { "epoch": 6.66, "grad_norm": 1.40625, "learning_rate": 0.0004417278167319767, "loss": 0.1895, "step": 160720 }, { "epoch": 6.66, "grad_norm": 0.470703125, "learning_rate": 0.00044172085661361125, "loss": 0.2361, "step": 160730 }, { "epoch": 6.66, "grad_norm": 0.59765625, "learning_rate": 0.0004417138961344479, "loss": 0.1821, "step": 160740 }, { "epoch": 6.66, "grad_norm": 0.87109375, "learning_rate": 0.00044170693529449966, "loss": 0.1717, "step": 160750 }, { "epoch": 6.66, "grad_norm": 0.765625, "learning_rate": 0.0004416999740937797, "loss": 0.211, "step": 160760 }, { "epoch": 6.66, "grad_norm": 0.578125, "learning_rate": 0.00044169301253230104, "loss": 0.2291, "step": 160770 }, { "epoch": 6.66, "grad_norm": 0.427734375, "learning_rate": 0.0004416860506100769, "loss": 0.2049, "step": 160780 }, { "epoch": 6.66, "grad_norm": 0.515625, "learning_rate": 0.0004416790883271203, "loss": 0.2206, "step": 160790 }, { "epoch": 6.66, "grad_norm": 1.3046875, "learning_rate": 0.0004416721256834443, "loss": 0.2477, "step": 160800 }, { "epoch": 6.66, "grad_norm": 0.515625, "learning_rate": 0.0004416651626790621, "loss": 0.199, "step": 160810 }, { "epoch": 6.66, "grad_norm": 0.84765625, "learning_rate": 0.00044165819931398675, "loss": 0.1971, "step": 160820 }, { "epoch": 6.66, "grad_norm": 0.6875, "learning_rate": 0.00044165123558823137, "loss": 0.2123, "step": 160830 }, { "epoch": 6.66, "grad_norm": 0.380859375, "learning_rate": 0.0004416442715018091, "loss": 0.199, "step": 160840 }, { "epoch": 6.66, "grad_norm": 3.453125, "learning_rate": 0.0004416373070547329, "loss": 0.1978, "step": 160850 }, { "epoch": 6.66, "grad_norm": 0.5546875, "learning_rate": 0.00044163034224701614, "loss": 0.1989, "step": 160860 }, { "epoch": 6.66, "grad_norm": 1.3046875, "learning_rate": 0.00044162337707867166, "loss": 0.1994, "step": 160870 }, { "epoch": 6.66, "grad_norm": 0.47265625, "learning_rate": 0.0004416164115497127, "loss": 0.2318, "step": 160880 }, { "epoch": 6.66, "grad_norm": 0.76953125, "learning_rate": 0.00044160944566015237, "loss": 0.2162, "step": 160890 }, { "epoch": 6.66, "grad_norm": 0.224609375, "learning_rate": 0.00044160247941000375, "loss": 0.2287, "step": 160900 }, { "epoch": 6.66, "grad_norm": 0.67578125, "learning_rate": 0.0004415955127992799, "loss": 0.2418, "step": 160910 }, { "epoch": 6.67, "grad_norm": 0.73046875, "learning_rate": 0.000441588545827994, "loss": 0.1794, "step": 160920 }, { "epoch": 6.67, "grad_norm": 0.75, "learning_rate": 0.0004415815784961592, "loss": 0.1744, "step": 160930 }, { "epoch": 6.67, "grad_norm": 0.8671875, "learning_rate": 0.0004415746108037886, "loss": 0.1793, "step": 160940 }, { "epoch": 6.67, "grad_norm": 1.1015625, "learning_rate": 0.0004415676427508951, "loss": 0.1715, "step": 160950 }, { "epoch": 6.67, "grad_norm": 0.8671875, "learning_rate": 0.0004415606743374921, "loss": 0.1559, "step": 160960 }, { "epoch": 6.67, "grad_norm": 0.7265625, "learning_rate": 0.00044155370556359265, "loss": 0.2175, "step": 160970 }, { "epoch": 6.67, "grad_norm": 0.73828125, "learning_rate": 0.00044154673642920973, "loss": 0.202, "step": 160980 }, { "epoch": 6.67, "grad_norm": 0.83984375, "learning_rate": 0.0004415397669343565, "loss": 0.1869, "step": 160990 }, { "epoch": 6.67, "grad_norm": 1.75, "learning_rate": 0.0004415327970790462, "loss": 0.2116, "step": 161000 }, { "epoch": 6.67, "grad_norm": 0.466796875, "learning_rate": 0.00044152582686329177, "loss": 0.1924, "step": 161010 }, { "epoch": 6.67, "grad_norm": 1.2109375, "learning_rate": 0.00044151885628710655, "loss": 0.2374, "step": 161020 }, { "epoch": 6.67, "grad_norm": 0.92578125, "learning_rate": 0.0004415118853505035, "loss": 0.1638, "step": 161030 }, { "epoch": 6.67, "grad_norm": 0.455078125, "learning_rate": 0.0004415049140534956, "loss": 0.2104, "step": 161040 }, { "epoch": 6.67, "grad_norm": 0.69921875, "learning_rate": 0.00044149794239609633, "loss": 0.1685, "step": 161050 }, { "epoch": 6.67, "grad_norm": 1.1484375, "learning_rate": 0.0004414909703783185, "loss": 0.1769, "step": 161060 }, { "epoch": 6.67, "grad_norm": 0.84375, "learning_rate": 0.00044148399800017534, "loss": 0.2525, "step": 161070 }, { "epoch": 6.67, "grad_norm": 0.60546875, "learning_rate": 0.00044147702526167997, "loss": 0.1312, "step": 161080 }, { "epoch": 6.67, "grad_norm": 0.7421875, "learning_rate": 0.00044147005216284556, "loss": 0.192, "step": 161090 }, { "epoch": 6.67, "grad_norm": 0.9140625, "learning_rate": 0.0004414630787036852, "loss": 0.2106, "step": 161100 }, { "epoch": 6.67, "grad_norm": 0.6875, "learning_rate": 0.00044145610488421195, "loss": 0.2419, "step": 161110 }, { "epoch": 6.67, "grad_norm": 0.91015625, "learning_rate": 0.00044144913070443903, "loss": 0.2362, "step": 161120 }, { "epoch": 6.67, "grad_norm": 0.400390625, "learning_rate": 0.00044144215616437956, "loss": 0.2432, "step": 161130 }, { "epoch": 6.67, "grad_norm": 1.03125, "learning_rate": 0.00044143518126404653, "loss": 0.1312, "step": 161140 }, { "epoch": 6.67, "grad_norm": 0.404296875, "learning_rate": 0.00044142820600345323, "loss": 0.2079, "step": 161150 }, { "epoch": 6.68, "grad_norm": 1.875, "learning_rate": 0.0004414212303826127, "loss": 0.1821, "step": 161160 }, { "epoch": 6.68, "grad_norm": 0.54296875, "learning_rate": 0.00044141425440153803, "loss": 0.2096, "step": 161170 }, { "epoch": 6.68, "grad_norm": 0.51953125, "learning_rate": 0.0004414072780602425, "loss": 0.236, "step": 161180 }, { "epoch": 6.68, "grad_norm": 2.234375, "learning_rate": 0.0004414003013587391, "loss": 0.2258, "step": 161190 }, { "epoch": 6.68, "grad_norm": 0.73828125, "learning_rate": 0.00044139332429704093, "loss": 0.2072, "step": 161200 }, { "epoch": 6.68, "grad_norm": 0.73046875, "learning_rate": 0.00044138634687516134, "loss": 0.3223, "step": 161210 }, { "epoch": 6.68, "grad_norm": 0.78515625, "learning_rate": 0.00044137936909311316, "loss": 0.1863, "step": 161220 }, { "epoch": 6.68, "grad_norm": 1.328125, "learning_rate": 0.00044137239095090976, "loss": 0.1715, "step": 161230 }, { "epoch": 6.68, "grad_norm": 0.6484375, "learning_rate": 0.0004413654124485642, "loss": 0.209, "step": 161240 }, { "epoch": 6.68, "grad_norm": 0.32421875, "learning_rate": 0.00044135843358608954, "loss": 0.2068, "step": 161250 }, { "epoch": 6.68, "grad_norm": 0.423828125, "learning_rate": 0.000441351454363499, "loss": 0.201, "step": 161260 }, { "epoch": 6.68, "grad_norm": 0.7265625, "learning_rate": 0.00044134447478080574, "loss": 0.1717, "step": 161270 }, { "epoch": 6.68, "grad_norm": 0.79296875, "learning_rate": 0.00044133749483802275, "loss": 0.1677, "step": 161280 }, { "epoch": 6.68, "grad_norm": 0.56640625, "learning_rate": 0.0004413305145351633, "loss": 0.1769, "step": 161290 }, { "epoch": 6.68, "grad_norm": 0.72265625, "learning_rate": 0.00044132353387224055, "loss": 0.2011, "step": 161300 }, { "epoch": 6.68, "grad_norm": 0.55859375, "learning_rate": 0.00044131655284926753, "loss": 0.2095, "step": 161310 }, { "epoch": 6.68, "grad_norm": 0.88671875, "learning_rate": 0.00044130957146625737, "loss": 0.1794, "step": 161320 }, { "epoch": 6.68, "grad_norm": 0.51953125, "learning_rate": 0.00044130258972322324, "loss": 0.1667, "step": 161330 }, { "epoch": 6.68, "grad_norm": 0.76953125, "learning_rate": 0.00044129560762017837, "loss": 0.2297, "step": 161340 }, { "epoch": 6.68, "grad_norm": 0.73046875, "learning_rate": 0.00044128862515713577, "loss": 0.1958, "step": 161350 }, { "epoch": 6.68, "grad_norm": 0.5546875, "learning_rate": 0.00044128164233410866, "loss": 0.2601, "step": 161360 }, { "epoch": 6.68, "grad_norm": 0.92578125, "learning_rate": 0.0004412746591511101, "loss": 0.2048, "step": 161370 }, { "epoch": 6.68, "grad_norm": 0.396484375, "learning_rate": 0.0004412676756081534, "loss": 0.2021, "step": 161380 }, { "epoch": 6.68, "grad_norm": 1.09375, "learning_rate": 0.0004412606917052515, "loss": 0.2027, "step": 161390 }, { "epoch": 6.69, "grad_norm": 1.4140625, "learning_rate": 0.00044125370744241765, "loss": 0.1938, "step": 161400 }, { "epoch": 6.69, "grad_norm": 1.7109375, "learning_rate": 0.000441246722819665, "loss": 0.2313, "step": 161410 }, { "epoch": 6.69, "grad_norm": 0.5625, "learning_rate": 0.00044123973783700667, "loss": 0.2132, "step": 161420 }, { "epoch": 6.69, "grad_norm": 1.1484375, "learning_rate": 0.0004412327524944558, "loss": 0.2144, "step": 161430 }, { "epoch": 6.69, "grad_norm": 0.6875, "learning_rate": 0.00044122576679202555, "loss": 0.2359, "step": 161440 }, { "epoch": 6.69, "grad_norm": 1.8671875, "learning_rate": 0.000441218780729729, "loss": 0.2737, "step": 161450 }, { "epoch": 6.69, "grad_norm": 0.53515625, "learning_rate": 0.0004412117943075794, "loss": 0.2225, "step": 161460 }, { "epoch": 6.69, "grad_norm": 0.42578125, "learning_rate": 0.0004412048075255899, "loss": 0.1877, "step": 161470 }, { "epoch": 6.69, "grad_norm": 0.431640625, "learning_rate": 0.00044119782038377355, "loss": 0.1816, "step": 161480 }, { "epoch": 6.69, "grad_norm": 0.6953125, "learning_rate": 0.0004411908328821435, "loss": 0.2197, "step": 161490 }, { "epoch": 6.69, "grad_norm": 0.5, "learning_rate": 0.00044118384502071306, "loss": 0.2, "step": 161500 }, { "epoch": 6.69, "grad_norm": 0.5234375, "learning_rate": 0.00044117685679949517, "loss": 0.1828, "step": 161510 }, { "epoch": 6.69, "grad_norm": 0.65625, "learning_rate": 0.00044116986821850316, "loss": 0.1377, "step": 161520 }, { "epoch": 6.69, "grad_norm": 1.578125, "learning_rate": 0.00044116287927775, "loss": 0.214, "step": 161530 }, { "epoch": 6.69, "grad_norm": 1.8671875, "learning_rate": 0.00044115588997724906, "loss": 0.2383, "step": 161540 }, { "epoch": 6.69, "grad_norm": 0.462890625, "learning_rate": 0.0004411489003170133, "loss": 0.266, "step": 161550 }, { "epoch": 6.69, "grad_norm": 0.416015625, "learning_rate": 0.000441141910297056, "loss": 0.1909, "step": 161560 }, { "epoch": 6.69, "grad_norm": 0.96484375, "learning_rate": 0.0004411349199173902, "loss": 0.1838, "step": 161570 }, { "epoch": 6.69, "grad_norm": 1.03125, "learning_rate": 0.0004411279291780292, "loss": 0.2009, "step": 161580 }, { "epoch": 6.69, "grad_norm": 0.275390625, "learning_rate": 0.000441120938078986, "loss": 0.2427, "step": 161590 }, { "epoch": 6.69, "grad_norm": 0.77734375, "learning_rate": 0.00044111394662027395, "loss": 0.1858, "step": 161600 }, { "epoch": 6.69, "grad_norm": 1.7421875, "learning_rate": 0.00044110695480190597, "loss": 0.2209, "step": 161610 }, { "epoch": 6.69, "grad_norm": 0.5390625, "learning_rate": 0.0004410999626238954, "loss": 0.1949, "step": 161620 }, { "epoch": 6.69, "grad_norm": 0.390625, "learning_rate": 0.00044109297008625533, "loss": 0.173, "step": 161630 }, { "epoch": 6.7, "grad_norm": 1.21875, "learning_rate": 0.0004410859771889989, "loss": 0.2396, "step": 161640 }, { "epoch": 6.7, "grad_norm": 0.70703125, "learning_rate": 0.0004410789839321393, "loss": 0.1877, "step": 161650 }, { "epoch": 6.7, "grad_norm": 0.466796875, "learning_rate": 0.0004410719903156897, "loss": 0.1911, "step": 161660 }, { "epoch": 6.7, "grad_norm": 0.78515625, "learning_rate": 0.00044106499633966324, "loss": 0.1791, "step": 161670 }, { "epoch": 6.7, "grad_norm": 0.57421875, "learning_rate": 0.00044105800200407315, "loss": 0.1777, "step": 161680 }, { "epoch": 6.7, "grad_norm": 1.2265625, "learning_rate": 0.00044105100730893246, "loss": 0.2022, "step": 161690 }, { "epoch": 6.7, "grad_norm": 0.2041015625, "learning_rate": 0.00044104401225425444, "loss": 0.2203, "step": 161700 }, { "epoch": 6.7, "grad_norm": 1.90625, "learning_rate": 0.0004410370168400522, "loss": 0.2262, "step": 161710 }, { "epoch": 6.7, "grad_norm": 1.0390625, "learning_rate": 0.00044103002106633896, "loss": 0.1704, "step": 161720 }, { "epoch": 6.7, "grad_norm": 0.484375, "learning_rate": 0.0004410230249331278, "loss": 0.2037, "step": 161730 }, { "epoch": 6.7, "grad_norm": 1.2734375, "learning_rate": 0.000441016028440432, "loss": 0.2142, "step": 161740 }, { "epoch": 6.7, "grad_norm": 0.609375, "learning_rate": 0.0004410090315882646, "loss": 0.1626, "step": 161750 }, { "epoch": 6.7, "grad_norm": 0.703125, "learning_rate": 0.00044100203437663887, "loss": 0.2213, "step": 161760 }, { "epoch": 6.7, "grad_norm": 0.98828125, "learning_rate": 0.00044099503680556783, "loss": 0.2336, "step": 161770 }, { "epoch": 6.7, "grad_norm": 1.2890625, "learning_rate": 0.0004409880388750649, "loss": 0.2363, "step": 161780 }, { "epoch": 6.7, "grad_norm": 0.56640625, "learning_rate": 0.0004409810405851431, "loss": 0.189, "step": 161790 }, { "epoch": 6.7, "grad_norm": 0.322265625, "learning_rate": 0.0004409740419358155, "loss": 0.2011, "step": 161800 }, { "epoch": 6.7, "grad_norm": 0.62890625, "learning_rate": 0.0004409670429270954, "loss": 0.2259, "step": 161810 }, { "epoch": 6.7, "grad_norm": 0.87890625, "learning_rate": 0.00044096004355899604, "loss": 0.2393, "step": 161820 }, { "epoch": 6.7, "grad_norm": 1.0625, "learning_rate": 0.0004409530438315305, "loss": 0.219, "step": 161830 }, { "epoch": 6.7, "grad_norm": 0.74609375, "learning_rate": 0.0004409460437447118, "loss": 0.1696, "step": 161840 }, { "epoch": 6.7, "grad_norm": 0.53515625, "learning_rate": 0.0004409390432985534, "loss": 0.2388, "step": 161850 }, { "epoch": 6.7, "grad_norm": 0.41015625, "learning_rate": 0.0004409320424930683, "loss": 0.1623, "step": 161860 }, { "epoch": 6.7, "grad_norm": 0.59375, "learning_rate": 0.00044092504132826974, "loss": 0.2186, "step": 161870 }, { "epoch": 6.71, "grad_norm": 0.482421875, "learning_rate": 0.00044091803980417075, "loss": 0.2373, "step": 161880 }, { "epoch": 6.71, "grad_norm": 0.5703125, "learning_rate": 0.0004409110379207848, "loss": 0.1743, "step": 161890 }, { "epoch": 6.71, "grad_norm": 1.1171875, "learning_rate": 0.00044090403567812483, "loss": 0.2232, "step": 161900 }, { "epoch": 6.71, "grad_norm": 0.3046875, "learning_rate": 0.00044089703307620404, "loss": 0.1741, "step": 161910 }, { "epoch": 6.71, "grad_norm": 0.4375, "learning_rate": 0.0004408900301150357, "loss": 0.2137, "step": 161920 }, { "epoch": 6.71, "grad_norm": 0.85546875, "learning_rate": 0.0004408830267946329, "loss": 0.1677, "step": 161930 }, { "epoch": 6.71, "grad_norm": 0.5859375, "learning_rate": 0.00044087602311500886, "loss": 0.1551, "step": 161940 }, { "epoch": 6.71, "grad_norm": 0.41015625, "learning_rate": 0.00044086901907617676, "loss": 0.2087, "step": 161950 }, { "epoch": 6.71, "grad_norm": 0.8046875, "learning_rate": 0.0004408620146781498, "loss": 0.2187, "step": 161960 }, { "epoch": 6.71, "grad_norm": 0.671875, "learning_rate": 0.0004408550099209411, "loss": 0.181, "step": 161970 }, { "epoch": 6.71, "grad_norm": 0.76171875, "learning_rate": 0.00044084800480456395, "loss": 0.2012, "step": 161980 }, { "epoch": 6.71, "grad_norm": 0.734375, "learning_rate": 0.00044084099932903143, "loss": 0.2112, "step": 161990 }, { "epoch": 6.71, "grad_norm": 0.9140625, "learning_rate": 0.00044083399349435673, "loss": 0.1884, "step": 162000 }, { "epoch": 6.71, "grad_norm": 1.1015625, "learning_rate": 0.00044082698730055306, "loss": 0.2168, "step": 162010 }, { "epoch": 6.71, "grad_norm": 0.37890625, "learning_rate": 0.0004408199807476336, "loss": 0.1992, "step": 162020 }, { "epoch": 6.71, "grad_norm": 0.61328125, "learning_rate": 0.0004408129738356116, "loss": 0.2297, "step": 162030 }, { "epoch": 6.71, "grad_norm": 0.53125, "learning_rate": 0.00044080596656450016, "loss": 0.2053, "step": 162040 }, { "epoch": 6.71, "grad_norm": 0.2578125, "learning_rate": 0.00044079895893431255, "loss": 0.2142, "step": 162050 }, { "epoch": 6.71, "grad_norm": 0.5, "learning_rate": 0.00044079195094506185, "loss": 0.2048, "step": 162060 }, { "epoch": 6.71, "grad_norm": 0.67578125, "learning_rate": 0.0004407849425967613, "loss": 0.1862, "step": 162070 }, { "epoch": 6.71, "grad_norm": 0.51953125, "learning_rate": 0.0004407779338894241, "loss": 0.1956, "step": 162080 }, { "epoch": 6.71, "grad_norm": 1.6328125, "learning_rate": 0.00044077092482306334, "loss": 0.2659, "step": 162090 }, { "epoch": 6.71, "grad_norm": 1.0078125, "learning_rate": 0.00044076391539769243, "loss": 0.1632, "step": 162100 }, { "epoch": 6.71, "grad_norm": 0.380859375, "learning_rate": 0.0004407569056133244, "loss": 0.2243, "step": 162110 }, { "epoch": 6.71, "grad_norm": 0.98828125, "learning_rate": 0.00044074989546997247, "loss": 0.1706, "step": 162120 }, { "epoch": 6.72, "grad_norm": 0.265625, "learning_rate": 0.0004407428849676498, "loss": 0.1589, "step": 162130 }, { "epoch": 6.72, "grad_norm": 0.8984375, "learning_rate": 0.0004407358741063697, "loss": 0.1814, "step": 162140 }, { "epoch": 6.72, "grad_norm": 0.51171875, "learning_rate": 0.0004407288628861452, "loss": 0.1683, "step": 162150 }, { "epoch": 6.72, "grad_norm": 1.78125, "learning_rate": 0.0004407218513069896, "loss": 0.1421, "step": 162160 }, { "epoch": 6.72, "grad_norm": 0.625, "learning_rate": 0.0004407148393689161, "loss": 0.2183, "step": 162170 }, { "epoch": 6.72, "grad_norm": 1.3125, "learning_rate": 0.0004407078270719378, "loss": 0.2384, "step": 162180 }, { "epoch": 6.72, "grad_norm": 1.296875, "learning_rate": 0.0004407008144160681, "loss": 0.211, "step": 162190 }, { "epoch": 6.72, "grad_norm": 0.6171875, "learning_rate": 0.0004406938014013199, "loss": 0.2237, "step": 162200 }, { "epoch": 6.72, "grad_norm": 0.28515625, "learning_rate": 0.00044068678802770665, "loss": 0.1453, "step": 162210 }, { "epoch": 6.72, "grad_norm": 0.7890625, "learning_rate": 0.0004406797742952414, "loss": 0.2438, "step": 162220 }, { "epoch": 6.72, "grad_norm": 1.2421875, "learning_rate": 0.00044067276020393753, "loss": 0.2199, "step": 162230 }, { "epoch": 6.72, "grad_norm": 0.625, "learning_rate": 0.000440665745753808, "loss": 0.252, "step": 162240 }, { "epoch": 6.72, "grad_norm": 0.59765625, "learning_rate": 0.0004406587309448661, "loss": 0.1718, "step": 162250 }, { "epoch": 6.72, "grad_norm": 1.1640625, "learning_rate": 0.00044065171577712515, "loss": 0.2455, "step": 162260 }, { "epoch": 6.72, "grad_norm": 0.82421875, "learning_rate": 0.0004406447002505982, "loss": 0.1672, "step": 162270 }, { "epoch": 6.72, "grad_norm": 1.046875, "learning_rate": 0.0004406376843652986, "loss": 0.2188, "step": 162280 }, { "epoch": 6.72, "grad_norm": 1.203125, "learning_rate": 0.00044063066812123934, "loss": 0.2032, "step": 162290 }, { "epoch": 6.72, "grad_norm": 0.5390625, "learning_rate": 0.0004406236515184339, "loss": 0.1995, "step": 162300 }, { "epoch": 6.72, "grad_norm": 0.7421875, "learning_rate": 0.0004406166345568952, "loss": 0.1778, "step": 162310 }, { "epoch": 6.72, "grad_norm": 0.6171875, "learning_rate": 0.0004406096172366366, "loss": 0.1405, "step": 162320 }, { "epoch": 6.72, "grad_norm": 0.5546875, "learning_rate": 0.00044060259955767137, "loss": 0.2062, "step": 162330 }, { "epoch": 6.72, "grad_norm": 1.15625, "learning_rate": 0.00044059558152001254, "loss": 0.2081, "step": 162340 }, { "epoch": 6.72, "grad_norm": 1.0078125, "learning_rate": 0.00044058856312367346, "loss": 0.2229, "step": 162350 }, { "epoch": 6.72, "grad_norm": 1.109375, "learning_rate": 0.0004405815443686673, "loss": 0.2147, "step": 162360 }, { "epoch": 6.73, "grad_norm": 1.078125, "learning_rate": 0.00044057452525500713, "loss": 0.2334, "step": 162370 }, { "epoch": 6.73, "grad_norm": 0.57421875, "learning_rate": 0.0004405675057827064, "loss": 0.2421, "step": 162380 }, { "epoch": 6.73, "grad_norm": 0.7421875, "learning_rate": 0.00044056048595177816, "loss": 0.2001, "step": 162390 }, { "epoch": 6.73, "grad_norm": 0.4296875, "learning_rate": 0.00044055346576223563, "loss": 0.1687, "step": 162400 }, { "epoch": 6.73, "grad_norm": 0.41015625, "learning_rate": 0.00044054644521409215, "loss": 0.2377, "step": 162410 }, { "epoch": 6.73, "grad_norm": 0.81640625, "learning_rate": 0.00044053942430736083, "loss": 0.2129, "step": 162420 }, { "epoch": 6.73, "grad_norm": 1.0703125, "learning_rate": 0.0004405324030420548, "loss": 0.1477, "step": 162430 }, { "epoch": 6.73, "grad_norm": 0.52734375, "learning_rate": 0.00044052538141818744, "loss": 0.2258, "step": 162440 }, { "epoch": 6.73, "grad_norm": 1.2734375, "learning_rate": 0.0004405183594357718, "loss": 0.2041, "step": 162450 }, { "epoch": 6.73, "grad_norm": 0.5703125, "learning_rate": 0.0004405113370948213, "loss": 0.257, "step": 162460 }, { "epoch": 6.73, "grad_norm": 0.259765625, "learning_rate": 0.000440504314395349, "loss": 0.1769, "step": 162470 }, { "epoch": 6.73, "grad_norm": 0.6953125, "learning_rate": 0.00044049729133736816, "loss": 0.2185, "step": 162480 }, { "epoch": 6.73, "grad_norm": 0.79296875, "learning_rate": 0.00044049026792089194, "loss": 0.1988, "step": 162490 }, { "epoch": 6.73, "grad_norm": 0.2734375, "learning_rate": 0.0004404832441459336, "loss": 0.1776, "step": 162500 }, { "epoch": 6.73, "grad_norm": 0.75, "learning_rate": 0.0004404762200125064, "loss": 0.2036, "step": 162510 }, { "epoch": 6.73, "grad_norm": 0.83203125, "learning_rate": 0.00044046919552062354, "loss": 0.1867, "step": 162520 }, { "epoch": 6.73, "grad_norm": 0.5078125, "learning_rate": 0.0004404621706702981, "loss": 0.1895, "step": 162530 }, { "epoch": 6.73, "grad_norm": 0.482421875, "learning_rate": 0.00044045514546154366, "loss": 0.2014, "step": 162540 }, { "epoch": 6.73, "grad_norm": 1.109375, "learning_rate": 0.000440448119894373, "loss": 0.1818, "step": 162550 }, { "epoch": 6.73, "grad_norm": 1.203125, "learning_rate": 0.0004404410939687996, "loss": 0.2454, "step": 162560 }, { "epoch": 6.73, "grad_norm": 0.52734375, "learning_rate": 0.00044043406768483665, "loss": 0.2194, "step": 162570 }, { "epoch": 6.73, "grad_norm": 0.5859375, "learning_rate": 0.0004404270410424974, "loss": 0.2012, "step": 162580 }, { "epoch": 6.73, "grad_norm": 0.17578125, "learning_rate": 0.00044042001404179487, "loss": 0.2086, "step": 162590 }, { "epoch": 6.73, "grad_norm": 1.25, "learning_rate": 0.00044041298668274256, "loss": 0.2112, "step": 162600 }, { "epoch": 6.74, "grad_norm": 0.6640625, "learning_rate": 0.0004404059589653536, "loss": 0.2308, "step": 162610 }, { "epoch": 6.74, "grad_norm": 0.51953125, "learning_rate": 0.00044039893088964114, "loss": 0.1835, "step": 162620 }, { "epoch": 6.74, "grad_norm": 1.6015625, "learning_rate": 0.00044039190245561836, "loss": 0.2787, "step": 162630 }, { "epoch": 6.74, "grad_norm": 0.416015625, "learning_rate": 0.0004403848736632987, "loss": 0.2102, "step": 162640 }, { "epoch": 6.74, "grad_norm": 0.455078125, "learning_rate": 0.0004403778445126952, "loss": 0.196, "step": 162650 }, { "epoch": 6.74, "grad_norm": 0.68359375, "learning_rate": 0.00044037081500382125, "loss": 0.1967, "step": 162660 }, { "epoch": 6.74, "grad_norm": 0.5078125, "learning_rate": 0.00044036378513668986, "loss": 0.2053, "step": 162670 }, { "epoch": 6.74, "grad_norm": 0.7109375, "learning_rate": 0.00044035675491131443, "loss": 0.2241, "step": 162680 }, { "epoch": 6.74, "grad_norm": 0.55859375, "learning_rate": 0.0004403497243277082, "loss": 0.1654, "step": 162690 }, { "epoch": 6.74, "grad_norm": 0.7265625, "learning_rate": 0.0004403426933858843, "loss": 0.236, "step": 162700 }, { "epoch": 6.74, "grad_norm": 0.26171875, "learning_rate": 0.000440335662085856, "loss": 0.1957, "step": 162710 }, { "epoch": 6.74, "grad_norm": 0.78515625, "learning_rate": 0.00044032863042763656, "loss": 0.2019, "step": 162720 }, { "epoch": 6.74, "grad_norm": 0.8203125, "learning_rate": 0.0004403215984112392, "loss": 0.1838, "step": 162730 }, { "epoch": 6.74, "grad_norm": 0.91015625, "learning_rate": 0.0004403145660366771, "loss": 0.2222, "step": 162740 }, { "epoch": 6.74, "grad_norm": 1.9140625, "learning_rate": 0.00044030753330396357, "loss": 0.2147, "step": 162750 }, { "epoch": 6.74, "grad_norm": 2.625, "learning_rate": 0.0004403005002131118, "loss": 0.1816, "step": 162760 }, { "epoch": 6.74, "grad_norm": 0.57421875, "learning_rate": 0.00044029346676413507, "loss": 0.1956, "step": 162770 }, { "epoch": 6.74, "grad_norm": 1.0234375, "learning_rate": 0.00044028643295704654, "loss": 0.2514, "step": 162780 }, { "epoch": 6.74, "grad_norm": 1.2109375, "learning_rate": 0.0004402793987918595, "loss": 0.2044, "step": 162790 }, { "epoch": 6.74, "grad_norm": 0.84765625, "learning_rate": 0.0004402723642685872, "loss": 0.2293, "step": 162800 }, { "epoch": 6.74, "grad_norm": 0.9609375, "learning_rate": 0.00044026532938724286, "loss": 0.2001, "step": 162810 }, { "epoch": 6.74, "grad_norm": 1.53125, "learning_rate": 0.00044025829414783967, "loss": 0.2614, "step": 162820 }, { "epoch": 6.74, "grad_norm": 0.8671875, "learning_rate": 0.000440251258550391, "loss": 0.1953, "step": 162830 }, { "epoch": 6.74, "grad_norm": 1.2109375, "learning_rate": 0.0004402442225949099, "loss": 0.2252, "step": 162840 }, { "epoch": 6.75, "grad_norm": 1.3984375, "learning_rate": 0.00044023718628140985, "loss": 0.1941, "step": 162850 }, { "epoch": 6.75, "grad_norm": 0.466796875, "learning_rate": 0.0004402301496099039, "loss": 0.2423, "step": 162860 }, { "epoch": 6.75, "grad_norm": 0.73828125, "learning_rate": 0.00044022311258040534, "loss": 0.1638, "step": 162870 }, { "epoch": 6.75, "grad_norm": 0.53125, "learning_rate": 0.0004402160751929274, "loss": 0.1942, "step": 162880 }, { "epoch": 6.75, "grad_norm": 0.66015625, "learning_rate": 0.0004402090374474834, "loss": 0.1466, "step": 162890 }, { "epoch": 6.75, "grad_norm": 0.8125, "learning_rate": 0.00044020199934408647, "loss": 0.2304, "step": 162900 }, { "epoch": 6.75, "grad_norm": 1.6328125, "learning_rate": 0.00044019496088275, "loss": 0.2246, "step": 162910 }, { "epoch": 6.75, "grad_norm": 1.0859375, "learning_rate": 0.00044018792206348707, "loss": 0.1817, "step": 162920 }, { "epoch": 6.75, "grad_norm": 0.41015625, "learning_rate": 0.000440180882886311, "loss": 0.222, "step": 162930 }, { "epoch": 6.75, "grad_norm": 1.1484375, "learning_rate": 0.0004401738433512351, "loss": 0.2596, "step": 162940 }, { "epoch": 6.75, "grad_norm": 0.85546875, "learning_rate": 0.0004401668034582726, "loss": 0.1939, "step": 162950 }, { "epoch": 6.75, "grad_norm": 0.34375, "learning_rate": 0.0004401597632074367, "loss": 0.2047, "step": 162960 }, { "epoch": 6.75, "grad_norm": 0.458984375, "learning_rate": 0.0004401527225987406, "loss": 0.1834, "step": 162970 }, { "epoch": 6.75, "grad_norm": 0.1611328125, "learning_rate": 0.0004401456816321976, "loss": 0.2019, "step": 162980 }, { "epoch": 6.75, "grad_norm": 0.78125, "learning_rate": 0.000440138640307821, "loss": 0.2144, "step": 162990 }, { "epoch": 6.75, "grad_norm": 1.2734375, "learning_rate": 0.000440131598625624, "loss": 0.2417, "step": 163000 }, { "epoch": 6.75, "grad_norm": 0.5, "learning_rate": 0.0004401245565856199, "loss": 0.1717, "step": 163010 }, { "epoch": 6.75, "grad_norm": 0.2890625, "learning_rate": 0.0004401175141878219, "loss": 0.1737, "step": 163020 }, { "epoch": 6.75, "grad_norm": 1.03125, "learning_rate": 0.00044011047143224325, "loss": 0.2383, "step": 163030 }, { "epoch": 6.75, "grad_norm": 0.3828125, "learning_rate": 0.0004401034283188973, "loss": 0.2064, "step": 163040 }, { "epoch": 6.75, "grad_norm": 0.50390625, "learning_rate": 0.00044009638484779714, "loss": 0.1477, "step": 163050 }, { "epoch": 6.75, "grad_norm": 0.875, "learning_rate": 0.0004400893410189561, "loss": 0.2007, "step": 163060 }, { "epoch": 6.75, "grad_norm": 0.357421875, "learning_rate": 0.0004400822968323875, "loss": 0.1864, "step": 163070 }, { "epoch": 6.75, "grad_norm": 0.578125, "learning_rate": 0.00044007525228810453, "loss": 0.1967, "step": 163080 }, { "epoch": 6.76, "grad_norm": 1.3046875, "learning_rate": 0.00044006820738612043, "loss": 0.2202, "step": 163090 }, { "epoch": 6.76, "grad_norm": 0.83984375, "learning_rate": 0.00044006116212644855, "loss": 0.1988, "step": 163100 }, { "epoch": 6.76, "grad_norm": 0.66796875, "learning_rate": 0.000440054116509102, "loss": 0.2292, "step": 163110 }, { "epoch": 6.76, "grad_norm": 0.59375, "learning_rate": 0.0004400470705340942, "loss": 0.2222, "step": 163120 }, { "epoch": 6.76, "grad_norm": 0.53125, "learning_rate": 0.0004400400242014383, "loss": 0.2113, "step": 163130 }, { "epoch": 6.76, "grad_norm": 0.9453125, "learning_rate": 0.00044003297751114763, "loss": 0.2203, "step": 163140 }, { "epoch": 6.76, "grad_norm": 0.56640625, "learning_rate": 0.0004400259304632354, "loss": 0.2345, "step": 163150 }, { "epoch": 6.76, "grad_norm": 0.453125, "learning_rate": 0.00044001888305771487, "loss": 0.1738, "step": 163160 }, { "epoch": 6.76, "grad_norm": 0.97265625, "learning_rate": 0.00044001183529459936, "loss": 0.2042, "step": 163170 }, { "epoch": 6.76, "grad_norm": 0.86328125, "learning_rate": 0.00044000478717390215, "loss": 0.2121, "step": 163180 }, { "epoch": 6.76, "grad_norm": 0.359375, "learning_rate": 0.0004399977386956363, "loss": 0.2017, "step": 163190 }, { "epoch": 6.76, "grad_norm": 0.92578125, "learning_rate": 0.00043999068985981534, "loss": 0.1866, "step": 163200 }, { "epoch": 6.76, "grad_norm": 1.3984375, "learning_rate": 0.00043998364066645236, "loss": 0.2332, "step": 163210 }, { "epoch": 6.76, "grad_norm": 0.9453125, "learning_rate": 0.0004399765911155607, "loss": 0.1846, "step": 163220 }, { "epoch": 6.76, "grad_norm": 0.890625, "learning_rate": 0.0004399695412071536, "loss": 0.1803, "step": 163230 }, { "epoch": 6.76, "grad_norm": 1.5390625, "learning_rate": 0.00043996249094124443, "loss": 0.2439, "step": 163240 }, { "epoch": 6.76, "grad_norm": 0.5390625, "learning_rate": 0.00043995544031784627, "loss": 0.1915, "step": 163250 }, { "epoch": 6.76, "grad_norm": 0.625, "learning_rate": 0.0004399483893369725, "loss": 0.173, "step": 163260 }, { "epoch": 6.76, "grad_norm": 0.275390625, "learning_rate": 0.00043994133799863636, "loss": 0.2015, "step": 163270 }, { "epoch": 6.76, "grad_norm": 0.890625, "learning_rate": 0.00043993428630285117, "loss": 0.2079, "step": 163280 }, { "epoch": 6.76, "grad_norm": 1.015625, "learning_rate": 0.00043992723424963017, "loss": 0.1901, "step": 163290 }, { "epoch": 6.76, "grad_norm": 0.63671875, "learning_rate": 0.00043992018183898663, "loss": 0.1629, "step": 163300 }, { "epoch": 6.76, "grad_norm": 0.455078125, "learning_rate": 0.00043991312907093374, "loss": 0.2481, "step": 163310 }, { "epoch": 6.76, "grad_norm": 0.8203125, "learning_rate": 0.0004399060759454849, "loss": 0.1929, "step": 163320 }, { "epoch": 6.77, "grad_norm": 2.734375, "learning_rate": 0.0004398990224626533, "loss": 0.1658, "step": 163330 }, { "epoch": 6.77, "grad_norm": 0.91015625, "learning_rate": 0.00043989196862245235, "loss": 0.2573, "step": 163340 }, { "epoch": 6.77, "grad_norm": 0.875, "learning_rate": 0.00043988491442489516, "loss": 0.1975, "step": 163350 }, { "epoch": 6.77, "grad_norm": 0.74609375, "learning_rate": 0.00043987785986999506, "loss": 0.2262, "step": 163360 }, { "epoch": 6.77, "grad_norm": 0.9140625, "learning_rate": 0.00043987080495776535, "loss": 0.1979, "step": 163370 }, { "epoch": 6.77, "grad_norm": 1.921875, "learning_rate": 0.00043986374968821925, "loss": 0.201, "step": 163380 }, { "epoch": 6.77, "grad_norm": 0.73828125, "learning_rate": 0.0004398566940613701, "loss": 0.2137, "step": 163390 }, { "epoch": 6.77, "grad_norm": 0.984375, "learning_rate": 0.0004398496380772311, "loss": 0.1717, "step": 163400 }, { "epoch": 6.77, "grad_norm": 0.53515625, "learning_rate": 0.0004398425817358157, "loss": 0.1543, "step": 163410 }, { "epoch": 6.77, "grad_norm": 0.671875, "learning_rate": 0.000439835525037137, "loss": 0.2109, "step": 163420 }, { "epoch": 6.77, "grad_norm": 0.8515625, "learning_rate": 0.00043982846798120833, "loss": 0.2134, "step": 163430 }, { "epoch": 6.77, "grad_norm": 1.4140625, "learning_rate": 0.00043982141056804296, "loss": 0.2406, "step": 163440 }, { "epoch": 6.77, "grad_norm": 0.7734375, "learning_rate": 0.0004398143527976542, "loss": 0.2386, "step": 163450 }, { "epoch": 6.77, "grad_norm": 1.28125, "learning_rate": 0.0004398072946700554, "loss": 0.2193, "step": 163460 }, { "epoch": 6.77, "grad_norm": 0.55078125, "learning_rate": 0.0004398002361852598, "loss": 0.2153, "step": 163470 }, { "epoch": 6.77, "grad_norm": 0.703125, "learning_rate": 0.0004397931773432805, "loss": 0.172, "step": 163480 }, { "epoch": 6.77, "grad_norm": 0.55859375, "learning_rate": 0.00043978611814413104, "loss": 0.1628, "step": 163490 }, { "epoch": 6.77, "grad_norm": 0.76171875, "learning_rate": 0.0004397790585878246, "loss": 0.2593, "step": 163500 }, { "epoch": 6.77, "grad_norm": 0.765625, "learning_rate": 0.0004397719986743744, "loss": 0.187, "step": 163510 }, { "epoch": 6.77, "grad_norm": 0.8828125, "learning_rate": 0.00043976493840379384, "loss": 0.2078, "step": 163520 }, { "epoch": 6.77, "grad_norm": 0.578125, "learning_rate": 0.00043975787777609615, "loss": 0.187, "step": 163530 }, { "epoch": 6.77, "grad_norm": 1.921875, "learning_rate": 0.00043975081679129463, "loss": 0.2655, "step": 163540 }, { "epoch": 6.77, "grad_norm": 0.75390625, "learning_rate": 0.0004397437554494026, "loss": 0.1852, "step": 163550 }, { "epoch": 6.77, "grad_norm": 0.671875, "learning_rate": 0.0004397366937504332, "loss": 0.1512, "step": 163560 }, { "epoch": 6.78, "grad_norm": 0.98046875, "learning_rate": 0.00043972963169439994, "loss": 0.1846, "step": 163570 }, { "epoch": 6.78, "grad_norm": 1.21875, "learning_rate": 0.0004397225692813159, "loss": 0.2045, "step": 163580 }, { "epoch": 6.78, "grad_norm": 0.421875, "learning_rate": 0.0004397155065111946, "loss": 0.1684, "step": 163590 }, { "epoch": 6.78, "grad_norm": 0.68359375, "learning_rate": 0.0004397084433840491, "loss": 0.1609, "step": 163600 }, { "epoch": 6.78, "grad_norm": 0.734375, "learning_rate": 0.00043970137989989287, "loss": 0.2003, "step": 163610 }, { "epoch": 6.78, "grad_norm": 1.015625, "learning_rate": 0.0004396943160587391, "loss": 0.2383, "step": 163620 }, { "epoch": 6.78, "grad_norm": 1.0859375, "learning_rate": 0.0004396872518606011, "loss": 0.2085, "step": 163630 }, { "epoch": 6.78, "grad_norm": 0.7890625, "learning_rate": 0.0004396801873054922, "loss": 0.2184, "step": 163640 }, { "epoch": 6.78, "grad_norm": 1.546875, "learning_rate": 0.00043967312239342565, "loss": 0.2131, "step": 163650 }, { "epoch": 6.78, "grad_norm": 0.82421875, "learning_rate": 0.0004396660571244148, "loss": 0.2023, "step": 163660 }, { "epoch": 6.78, "grad_norm": 0.56640625, "learning_rate": 0.0004396589914984729, "loss": 0.2136, "step": 163670 }, { "epoch": 6.78, "grad_norm": 0.51171875, "learning_rate": 0.0004396519255156133, "loss": 0.1747, "step": 163680 }, { "epoch": 6.78, "grad_norm": 0.2119140625, "learning_rate": 0.00043964485917584917, "loss": 0.1905, "step": 163690 }, { "epoch": 6.78, "grad_norm": 0.66015625, "learning_rate": 0.00043963779247919397, "loss": 0.1728, "step": 163700 }, { "epoch": 6.78, "grad_norm": 0.4921875, "learning_rate": 0.00043963072542566083, "loss": 0.2515, "step": 163710 }, { "epoch": 6.78, "grad_norm": 0.50390625, "learning_rate": 0.00043962365801526314, "loss": 0.1779, "step": 163720 }, { "epoch": 6.78, "grad_norm": 0.294921875, "learning_rate": 0.0004396165902480143, "loss": 0.2418, "step": 163730 }, { "epoch": 6.78, "grad_norm": 1.21875, "learning_rate": 0.00043960952212392747, "loss": 0.195, "step": 163740 }, { "epoch": 6.78, "grad_norm": 0.455078125, "learning_rate": 0.00043960245364301604, "loss": 0.1838, "step": 163750 }, { "epoch": 6.78, "grad_norm": 0.69921875, "learning_rate": 0.0004395953848052932, "loss": 0.2534, "step": 163760 }, { "epoch": 6.78, "grad_norm": 3.0625, "learning_rate": 0.00043958831561077236, "loss": 0.2398, "step": 163770 }, { "epoch": 6.78, "grad_norm": 0.87890625, "learning_rate": 0.00043958124605946683, "loss": 0.2025, "step": 163780 }, { "epoch": 6.78, "grad_norm": 1.390625, "learning_rate": 0.00043957417615138973, "loss": 0.1786, "step": 163790 }, { "epoch": 6.78, "grad_norm": 0.97265625, "learning_rate": 0.0004395671058865546, "loss": 0.2233, "step": 163800 }, { "epoch": 6.78, "grad_norm": 0.28515625, "learning_rate": 0.0004395600352649746, "loss": 0.18, "step": 163810 }, { "epoch": 6.79, "grad_norm": 0.5859375, "learning_rate": 0.0004395529642866631, "loss": 0.1776, "step": 163820 }, { "epoch": 6.79, "grad_norm": 0.93359375, "learning_rate": 0.0004395458929516335, "loss": 0.1391, "step": 163830 }, { "epoch": 6.79, "grad_norm": 0.46875, "learning_rate": 0.00043953882125989885, "loss": 0.2422, "step": 163840 }, { "epoch": 6.79, "grad_norm": 0.451171875, "learning_rate": 0.00043953174921147265, "loss": 0.1466, "step": 163850 }, { "epoch": 6.79, "grad_norm": 0.6171875, "learning_rate": 0.0004395246768063682, "loss": 0.1863, "step": 163860 }, { "epoch": 6.79, "grad_norm": 0.7421875, "learning_rate": 0.00043951760404459874, "loss": 0.1707, "step": 163870 }, { "epoch": 6.79, "grad_norm": 1.421875, "learning_rate": 0.00043951053092617764, "loss": 0.2218, "step": 163880 }, { "epoch": 6.79, "grad_norm": 1.2890625, "learning_rate": 0.0004395034574511182, "loss": 0.2281, "step": 163890 }, { "epoch": 6.79, "grad_norm": 1.0859375, "learning_rate": 0.0004394963836194337, "loss": 0.2164, "step": 163900 }, { "epoch": 6.79, "grad_norm": 1.1171875, "learning_rate": 0.0004394893094311374, "loss": 0.2047, "step": 163910 }, { "epoch": 6.79, "grad_norm": 0.35546875, "learning_rate": 0.00043948223488624277, "loss": 0.2312, "step": 163920 }, { "epoch": 6.79, "grad_norm": 0.63671875, "learning_rate": 0.000439475159984763, "loss": 0.2296, "step": 163930 }, { "epoch": 6.79, "grad_norm": 1.0703125, "learning_rate": 0.0004394680847267115, "loss": 0.2125, "step": 163940 }, { "epoch": 6.79, "grad_norm": 0.357421875, "learning_rate": 0.0004394610091121014, "loss": 0.1713, "step": 163950 }, { "epoch": 6.79, "grad_norm": 0.345703125, "learning_rate": 0.00043945393314094626, "loss": 0.1677, "step": 163960 }, { "epoch": 6.79, "grad_norm": 0.73828125, "learning_rate": 0.00043944685681325925, "loss": 0.2557, "step": 163970 }, { "epoch": 6.79, "grad_norm": 0.63671875, "learning_rate": 0.0004394397801290537, "loss": 0.2219, "step": 163980 }, { "epoch": 6.79, "grad_norm": 0.373046875, "learning_rate": 0.00043943270308834293, "loss": 0.1847, "step": 163990 }, { "epoch": 6.79, "grad_norm": 0.64453125, "learning_rate": 0.0004394256256911403, "loss": 0.2168, "step": 164000 }, { "epoch": 6.79, "grad_norm": 0.80859375, "learning_rate": 0.00043941854793745904, "loss": 0.1999, "step": 164010 }, { "epoch": 6.79, "grad_norm": 0.44921875, "learning_rate": 0.0004394114698273126, "loss": 0.1758, "step": 164020 }, { "epoch": 6.79, "grad_norm": 0.427734375, "learning_rate": 0.0004394043913607142, "loss": 0.1943, "step": 164030 }, { "epoch": 6.79, "grad_norm": 0.34375, "learning_rate": 0.00043939731253767717, "loss": 0.1877, "step": 164040 }, { "epoch": 6.79, "grad_norm": 0.416015625, "learning_rate": 0.0004393902333582148, "loss": 0.2258, "step": 164050 }, { "epoch": 6.8, "grad_norm": 0.482421875, "learning_rate": 0.0004393831538223406, "loss": 0.165, "step": 164060 }, { "epoch": 6.8, "grad_norm": 0.55859375, "learning_rate": 0.0004393760739300677, "loss": 0.1837, "step": 164070 }, { "epoch": 6.8, "grad_norm": 0.291015625, "learning_rate": 0.00043936899368140944, "loss": 0.1905, "step": 164080 }, { "epoch": 6.8, "grad_norm": 1.6171875, "learning_rate": 0.0004393619130763792, "loss": 0.17, "step": 164090 }, { "epoch": 6.8, "grad_norm": 0.474609375, "learning_rate": 0.0004393548321149903, "loss": 0.1737, "step": 164100 }, { "epoch": 6.8, "grad_norm": 0.5, "learning_rate": 0.0004393477507972561, "loss": 0.2169, "step": 164110 }, { "epoch": 6.8, "grad_norm": 0.66015625, "learning_rate": 0.0004393406691231898, "loss": 0.2142, "step": 164120 }, { "epoch": 6.8, "grad_norm": 0.4609375, "learning_rate": 0.0004393335870928048, "loss": 0.1723, "step": 164130 }, { "epoch": 6.8, "grad_norm": 0.5546875, "learning_rate": 0.0004393265047061145, "loss": 0.2635, "step": 164140 }, { "epoch": 6.8, "grad_norm": 0.53125, "learning_rate": 0.0004393194219631321, "loss": 0.1246, "step": 164150 }, { "epoch": 6.8, "grad_norm": 0.88671875, "learning_rate": 0.00043931233886387107, "loss": 0.2095, "step": 164160 }, { "epoch": 6.8, "grad_norm": 0.6640625, "learning_rate": 0.00043930525540834467, "loss": 0.2058, "step": 164170 }, { "epoch": 6.8, "grad_norm": 1.0703125, "learning_rate": 0.00043929817159656613, "loss": 0.1847, "step": 164180 }, { "epoch": 6.8, "grad_norm": 0.83984375, "learning_rate": 0.00043929108742854896, "loss": 0.1986, "step": 164190 }, { "epoch": 6.8, "grad_norm": 0.6171875, "learning_rate": 0.0004392840029043063, "loss": 0.2024, "step": 164200 }, { "epoch": 6.8, "grad_norm": 1.0234375, "learning_rate": 0.00043927691802385165, "loss": 0.19, "step": 164210 }, { "epoch": 6.8, "grad_norm": 0.349609375, "learning_rate": 0.00043926983278719836, "loss": 0.1699, "step": 164220 }, { "epoch": 6.8, "grad_norm": 0.59765625, "learning_rate": 0.00043926274719435955, "loss": 0.1742, "step": 164230 }, { "epoch": 6.8, "grad_norm": 0.75390625, "learning_rate": 0.0004392556612453488, "loss": 0.1421, "step": 164240 }, { "epoch": 6.8, "grad_norm": 1.609375, "learning_rate": 0.0004392485749401792, "loss": 0.1859, "step": 164250 }, { "epoch": 6.8, "grad_norm": 0.95703125, "learning_rate": 0.0004392414882788643, "loss": 0.2178, "step": 164260 }, { "epoch": 6.8, "grad_norm": 0.94921875, "learning_rate": 0.0004392344012614174, "loss": 0.2228, "step": 164270 }, { "epoch": 6.8, "grad_norm": 0.322265625, "learning_rate": 0.00043922731388785173, "loss": 0.16, "step": 164280 }, { "epoch": 6.8, "grad_norm": 0.9140625, "learning_rate": 0.0004392202261581807, "loss": 0.2159, "step": 164290 }, { "epoch": 6.81, "grad_norm": 0.74609375, "learning_rate": 0.0004392131380724177, "loss": 0.1982, "step": 164300 }, { "epoch": 6.81, "grad_norm": 0.6171875, "learning_rate": 0.0004392060496305759, "loss": 0.1816, "step": 164310 }, { "epoch": 6.81, "grad_norm": 0.80859375, "learning_rate": 0.0004391989608326688, "loss": 0.2052, "step": 164320 }, { "epoch": 6.81, "grad_norm": 0.380859375, "learning_rate": 0.00043919187167870964, "loss": 0.1634, "step": 164330 }, { "epoch": 6.81, "grad_norm": 0.60546875, "learning_rate": 0.0004391847821687119, "loss": 0.2298, "step": 164340 }, { "epoch": 6.81, "grad_norm": 1.078125, "learning_rate": 0.00043917769230268877, "loss": 0.2419, "step": 164350 }, { "epoch": 6.81, "grad_norm": 0.90234375, "learning_rate": 0.0004391706020806537, "loss": 0.187, "step": 164360 }, { "epoch": 6.81, "grad_norm": 1.109375, "learning_rate": 0.0004391635115026199, "loss": 0.236, "step": 164370 }, { "epoch": 6.81, "grad_norm": 0.162109375, "learning_rate": 0.0004391564205686009, "loss": 0.1151, "step": 164380 }, { "epoch": 6.81, "grad_norm": 0.83203125, "learning_rate": 0.00043914932927860986, "loss": 0.2122, "step": 164390 }, { "epoch": 6.81, "grad_norm": 0.953125, "learning_rate": 0.00043914223763266024, "loss": 0.1719, "step": 164400 }, { "epoch": 6.81, "grad_norm": 1.3203125, "learning_rate": 0.0004391351456307654, "loss": 0.1445, "step": 164410 }, { "epoch": 6.81, "grad_norm": 1.4375, "learning_rate": 0.0004391280532729386, "loss": 0.2117, "step": 164420 }, { "epoch": 6.81, "grad_norm": 0.345703125, "learning_rate": 0.0004391209605591932, "loss": 0.2197, "step": 164430 }, { "epoch": 6.81, "grad_norm": 0.65234375, "learning_rate": 0.00043911386748954264, "loss": 0.232, "step": 164440 }, { "epoch": 6.81, "grad_norm": 0.79296875, "learning_rate": 0.0004391067740640001, "loss": 0.1531, "step": 164450 }, { "epoch": 6.81, "grad_norm": 0.8984375, "learning_rate": 0.00043909968028257916, "loss": 0.2124, "step": 164460 }, { "epoch": 6.81, "grad_norm": 1.9140625, "learning_rate": 0.000439092586145293, "loss": 0.2198, "step": 164470 }, { "epoch": 6.81, "grad_norm": 0.81640625, "learning_rate": 0.00043908549165215494, "loss": 0.2027, "step": 164480 }, { "epoch": 6.81, "grad_norm": 0.9375, "learning_rate": 0.00043907839680317845, "loss": 0.1788, "step": 164490 }, { "epoch": 6.81, "grad_norm": 1.6015625, "learning_rate": 0.00043907130159837683, "loss": 0.2268, "step": 164500 }, { "epoch": 6.81, "grad_norm": 0.287109375, "learning_rate": 0.00043906420603776353, "loss": 0.1917, "step": 164510 }, { "epoch": 6.81, "grad_norm": 0.60546875, "learning_rate": 0.0004390571101213517, "loss": 0.2237, "step": 164520 }, { "epoch": 6.81, "grad_norm": 0.82421875, "learning_rate": 0.0004390500138491548, "loss": 0.1972, "step": 164530 }, { "epoch": 6.82, "grad_norm": 1.40625, "learning_rate": 0.00043904291722118627, "loss": 0.2021, "step": 164540 }, { "epoch": 6.82, "grad_norm": 0.6171875, "learning_rate": 0.00043903582023745933, "loss": 0.2224, "step": 164550 }, { "epoch": 6.82, "grad_norm": 0.5703125, "learning_rate": 0.00043902872289798745, "loss": 0.1665, "step": 164560 }, { "epoch": 6.82, "grad_norm": 1.484375, "learning_rate": 0.00043902162520278385, "loss": 0.1874, "step": 164570 }, { "epoch": 6.82, "grad_norm": 0.439453125, "learning_rate": 0.000439014527151862, "loss": 0.1762, "step": 164580 }, { "epoch": 6.82, "grad_norm": 0.7890625, "learning_rate": 0.00043900742874523527, "loss": 0.2465, "step": 164590 }, { "epoch": 6.82, "grad_norm": 0.51171875, "learning_rate": 0.00043900032998291685, "loss": 0.209, "step": 164600 }, { "epoch": 6.82, "grad_norm": 1.0859375, "learning_rate": 0.00043899323086492027, "loss": 0.1663, "step": 164610 }, { "epoch": 6.82, "grad_norm": 0.75390625, "learning_rate": 0.00043898613139125886, "loss": 0.225, "step": 164620 }, { "epoch": 6.82, "grad_norm": 0.66796875, "learning_rate": 0.00043897903156194597, "loss": 0.2307, "step": 164630 }, { "epoch": 6.82, "grad_norm": 0.78515625, "learning_rate": 0.000438971931376995, "loss": 0.2128, "step": 164640 }, { "epoch": 6.82, "grad_norm": 1.3359375, "learning_rate": 0.00043896483083641916, "loss": 0.2013, "step": 164650 }, { "epoch": 6.82, "grad_norm": 0.3046875, "learning_rate": 0.000438957729940232, "loss": 0.1915, "step": 164660 }, { "epoch": 6.82, "grad_norm": 0.39453125, "learning_rate": 0.0004389506286884467, "loss": 0.2, "step": 164670 }, { "epoch": 6.82, "grad_norm": 0.97265625, "learning_rate": 0.00043894352708107677, "loss": 0.2286, "step": 164680 }, { "epoch": 6.82, "grad_norm": 0.37890625, "learning_rate": 0.0004389364251181355, "loss": 0.2185, "step": 164690 }, { "epoch": 6.82, "grad_norm": 0.92578125, "learning_rate": 0.0004389293227996363, "loss": 0.2504, "step": 164700 }, { "epoch": 6.82, "grad_norm": 1.6171875, "learning_rate": 0.00043892222012559257, "loss": 0.2055, "step": 164710 }, { "epoch": 6.82, "grad_norm": 1.0546875, "learning_rate": 0.00043891511709601757, "loss": 0.1708, "step": 164720 }, { "epoch": 6.82, "grad_norm": 0.73046875, "learning_rate": 0.0004389080137109247, "loss": 0.1925, "step": 164730 }, { "epoch": 6.82, "grad_norm": 0.7265625, "learning_rate": 0.0004389009099703274, "loss": 0.2101, "step": 164740 }, { "epoch": 6.82, "grad_norm": 1.7578125, "learning_rate": 0.000438893805874239, "loss": 0.2106, "step": 164750 }, { "epoch": 6.82, "grad_norm": 0.376953125, "learning_rate": 0.00043888670142267286, "loss": 0.1527, "step": 164760 }, { "epoch": 6.82, "grad_norm": 0.52734375, "learning_rate": 0.00043887959661564224, "loss": 0.1704, "step": 164770 }, { "epoch": 6.83, "grad_norm": 1.8828125, "learning_rate": 0.00043887249145316065, "loss": 0.1887, "step": 164780 }, { "epoch": 6.83, "grad_norm": 0.66796875, "learning_rate": 0.0004388653859352415, "loss": 0.2237, "step": 164790 }, { "epoch": 6.83, "grad_norm": 0.41015625, "learning_rate": 0.0004388582800618981, "loss": 0.2178, "step": 164800 }, { "epoch": 6.83, "grad_norm": 0.55078125, "learning_rate": 0.0004388511738331437, "loss": 0.2173, "step": 164810 }, { "epoch": 6.83, "grad_norm": 1.7734375, "learning_rate": 0.00043884406724899183, "loss": 0.1847, "step": 164820 }, { "epoch": 6.83, "grad_norm": 0.470703125, "learning_rate": 0.0004388369603094559, "loss": 0.1924, "step": 164830 }, { "epoch": 6.83, "grad_norm": 1.0078125, "learning_rate": 0.00043882985301454914, "loss": 0.1997, "step": 164840 }, { "epoch": 6.83, "grad_norm": 0.609375, "learning_rate": 0.00043882274536428495, "loss": 0.1805, "step": 164850 }, { "epoch": 6.83, "grad_norm": 0.8359375, "learning_rate": 0.0004388156373586768, "loss": 0.2388, "step": 164860 }, { "epoch": 6.83, "grad_norm": 0.56640625, "learning_rate": 0.000438808528997738, "loss": 0.1916, "step": 164870 }, { "epoch": 6.83, "grad_norm": 0.72265625, "learning_rate": 0.000438801420281482, "loss": 0.1769, "step": 164880 }, { "epoch": 6.83, "grad_norm": 0.83984375, "learning_rate": 0.000438794311209922, "loss": 0.2295, "step": 164890 }, { "epoch": 6.83, "grad_norm": 0.34375, "learning_rate": 0.00043878720178307153, "loss": 0.2226, "step": 164900 }, { "epoch": 6.83, "grad_norm": 1.2421875, "learning_rate": 0.0004387800920009439, "loss": 0.2229, "step": 164910 }, { "epoch": 6.83, "grad_norm": 1.265625, "learning_rate": 0.00043877298186355263, "loss": 0.2116, "step": 164920 }, { "epoch": 6.83, "grad_norm": 0.7109375, "learning_rate": 0.00043876587137091095, "loss": 0.179, "step": 164930 }, { "epoch": 6.83, "grad_norm": 0.73828125, "learning_rate": 0.0004387587605230322, "loss": 0.2277, "step": 164940 }, { "epoch": 6.83, "grad_norm": 0.8203125, "learning_rate": 0.00043875164931992994, "loss": 0.1977, "step": 164950 }, { "epoch": 6.83, "grad_norm": 0.87109375, "learning_rate": 0.00043874453776161743, "loss": 0.2373, "step": 164960 }, { "epoch": 6.83, "grad_norm": 0.9453125, "learning_rate": 0.0004387374258481081, "loss": 0.1725, "step": 164970 }, { "epoch": 6.83, "grad_norm": 0.890625, "learning_rate": 0.0004387303135794153, "loss": 0.2434, "step": 164980 }, { "epoch": 6.83, "grad_norm": 0.453125, "learning_rate": 0.0004387232009555524, "loss": 0.1856, "step": 164990 }, { "epoch": 6.83, "grad_norm": 0.78125, "learning_rate": 0.0004387160879765328, "loss": 0.1634, "step": 165000 }, { "epoch": 6.83, "grad_norm": 1.1328125, "learning_rate": 0.00043870897464237, "loss": 0.1696, "step": 165010 }, { "epoch": 6.84, "grad_norm": 0.6640625, "learning_rate": 0.0004387018609530773, "loss": 0.2212, "step": 165020 }, { "epoch": 6.84, "grad_norm": 0.60546875, "learning_rate": 0.000438694746908668, "loss": 0.1779, "step": 165030 }, { "epoch": 6.84, "grad_norm": 0.73828125, "learning_rate": 0.00043868763250915556, "loss": 0.2238, "step": 165040 }, { "epoch": 6.84, "grad_norm": 0.68359375, "learning_rate": 0.0004386805177545534, "loss": 0.1985, "step": 165050 }, { "epoch": 6.84, "grad_norm": 0.6328125, "learning_rate": 0.0004386734026448749, "loss": 0.2221, "step": 165060 }, { "epoch": 6.84, "grad_norm": 0.78515625, "learning_rate": 0.0004386662871801334, "loss": 0.1806, "step": 165070 }, { "epoch": 6.84, "grad_norm": 0.65625, "learning_rate": 0.0004386591713603423, "loss": 0.217, "step": 165080 }, { "epoch": 6.84, "grad_norm": 0.65625, "learning_rate": 0.0004386520551855151, "loss": 0.1998, "step": 165090 }, { "epoch": 6.84, "grad_norm": 0.9453125, "learning_rate": 0.000438644938655665, "loss": 0.205, "step": 165100 }, { "epoch": 6.84, "grad_norm": 1.375, "learning_rate": 0.00043863782177080556, "loss": 0.2371, "step": 165110 }, { "epoch": 6.84, "grad_norm": 0.283203125, "learning_rate": 0.0004386307045309501, "loss": 0.2027, "step": 165120 }, { "epoch": 6.84, "grad_norm": 0.3125, "learning_rate": 0.00043862358693611204, "loss": 0.2407, "step": 165130 }, { "epoch": 6.84, "grad_norm": 0.478515625, "learning_rate": 0.00043861646898630467, "loss": 0.2084, "step": 165140 }, { "epoch": 6.84, "grad_norm": 1.8046875, "learning_rate": 0.00043860935068154155, "loss": 0.2332, "step": 165150 }, { "epoch": 6.84, "grad_norm": 0.5, "learning_rate": 0.000438602232021836, "loss": 0.1941, "step": 165160 }, { "epoch": 6.84, "grad_norm": 0.0, "learning_rate": 0.0004385951130072014, "loss": 0.148, "step": 165170 }, { "epoch": 6.84, "grad_norm": 0.390625, "learning_rate": 0.0004385879936376512, "loss": 0.2542, "step": 165180 }, { "epoch": 6.84, "grad_norm": 0.80859375, "learning_rate": 0.00043858087391319874, "loss": 0.2528, "step": 165190 }, { "epoch": 6.84, "grad_norm": 0.466796875, "learning_rate": 0.00043857375383385747, "loss": 0.2388, "step": 165200 }, { "epoch": 6.84, "grad_norm": 1.8359375, "learning_rate": 0.00043856663339964067, "loss": 0.2329, "step": 165210 }, { "epoch": 6.84, "grad_norm": 0.57421875, "learning_rate": 0.00043855951261056195, "loss": 0.2485, "step": 165220 }, { "epoch": 6.84, "grad_norm": 0.859375, "learning_rate": 0.00043855239146663443, "loss": 0.2125, "step": 165230 }, { "epoch": 6.84, "grad_norm": 0.7421875, "learning_rate": 0.0004385452699678718, "loss": 0.209, "step": 165240 }, { "epoch": 6.84, "grad_norm": 0.59375, "learning_rate": 0.00043853814811428726, "loss": 0.2354, "step": 165250 }, { "epoch": 6.85, "grad_norm": 1.1328125, "learning_rate": 0.0004385310259058943, "loss": 0.2616, "step": 165260 }, { "epoch": 6.85, "grad_norm": 0.69921875, "learning_rate": 0.00043852390334270637, "loss": 0.2047, "step": 165270 }, { "epoch": 6.85, "grad_norm": 0.546875, "learning_rate": 0.00043851678042473673, "loss": 0.1727, "step": 165280 }, { "epoch": 6.85, "grad_norm": 0.72265625, "learning_rate": 0.00043850965715199895, "loss": 0.1822, "step": 165290 }, { "epoch": 6.85, "grad_norm": 0.5546875, "learning_rate": 0.0004385025335245063, "loss": 0.1836, "step": 165300 }, { "epoch": 6.85, "grad_norm": 0.65234375, "learning_rate": 0.0004384954095422723, "loss": 0.1968, "step": 165310 }, { "epoch": 6.85, "grad_norm": 0.4609375, "learning_rate": 0.0004384882852053103, "loss": 0.189, "step": 165320 }, { "epoch": 6.85, "grad_norm": 1.015625, "learning_rate": 0.0004384811605136336, "loss": 0.1956, "step": 165330 }, { "epoch": 6.85, "grad_norm": 0.85546875, "learning_rate": 0.0004384740354672557, "loss": 0.2088, "step": 165340 }, { "epoch": 6.85, "grad_norm": 0.59375, "learning_rate": 0.0004384669100661901, "loss": 0.201, "step": 165350 }, { "epoch": 6.85, "grad_norm": 0.80078125, "learning_rate": 0.00043845978431045007, "loss": 0.1726, "step": 165360 }, { "epoch": 6.85, "grad_norm": 1.25, "learning_rate": 0.00043845265820004913, "loss": 0.191, "step": 165370 }, { "epoch": 6.85, "grad_norm": 1.09375, "learning_rate": 0.0004384455317350007, "loss": 0.2134, "step": 165380 }, { "epoch": 6.85, "grad_norm": 0.54296875, "learning_rate": 0.000438438404915318, "loss": 0.2078, "step": 165390 }, { "epoch": 6.85, "grad_norm": 0.81640625, "learning_rate": 0.0004384312777410147, "loss": 0.1313, "step": 165400 }, { "epoch": 6.85, "grad_norm": 0.474609375, "learning_rate": 0.0004384241502121039, "loss": 0.1952, "step": 165410 }, { "epoch": 6.85, "grad_norm": 0.5546875, "learning_rate": 0.0004384170223285994, "loss": 0.1927, "step": 165420 }, { "epoch": 6.85, "grad_norm": 0.498046875, "learning_rate": 0.00043840989409051425, "loss": 0.1879, "step": 165430 }, { "epoch": 6.85, "grad_norm": 0.419921875, "learning_rate": 0.00043840276549786205, "loss": 0.2189, "step": 165440 }, { "epoch": 6.85, "grad_norm": 0.400390625, "learning_rate": 0.00043839563655065627, "loss": 0.2142, "step": 165450 }, { "epoch": 6.85, "grad_norm": 0.388671875, "learning_rate": 0.0004383885072489102, "loss": 0.1575, "step": 165460 }, { "epoch": 6.85, "grad_norm": 0.50390625, "learning_rate": 0.0004383813775926373, "loss": 0.1857, "step": 165470 }, { "epoch": 6.85, "grad_norm": 0.5078125, "learning_rate": 0.000438374247581851, "loss": 0.2204, "step": 165480 }, { "epoch": 6.85, "grad_norm": 2.328125, "learning_rate": 0.00043836711721656474, "loss": 0.2559, "step": 165490 }, { "epoch": 6.85, "grad_norm": 0.9765625, "learning_rate": 0.0004383599864967919, "loss": 0.1896, "step": 165500 }, { "epoch": 6.86, "grad_norm": 0.86328125, "learning_rate": 0.0004383528554225459, "loss": 0.209, "step": 165510 }, { "epoch": 6.86, "grad_norm": 0.7578125, "learning_rate": 0.0004383457239938401, "loss": 0.1635, "step": 165520 }, { "epoch": 6.86, "grad_norm": 2.109375, "learning_rate": 0.00043833859221068807, "loss": 0.2118, "step": 165530 }, { "epoch": 6.86, "grad_norm": 1.5546875, "learning_rate": 0.00043833146007310307, "loss": 0.1899, "step": 165540 }, { "epoch": 6.86, "grad_norm": 0.3359375, "learning_rate": 0.0004383243275810986, "loss": 0.1988, "step": 165550 }, { "epoch": 6.86, "grad_norm": 0.7578125, "learning_rate": 0.00043831719473468825, "loss": 0.1822, "step": 165560 }, { "epoch": 6.86, "grad_norm": 0.498046875, "learning_rate": 0.0004383100615338851, "loss": 0.2204, "step": 165570 }, { "epoch": 6.86, "grad_norm": 0.67578125, "learning_rate": 0.00043830292797870284, "loss": 0.1558, "step": 165580 }, { "epoch": 6.86, "grad_norm": 0.97265625, "learning_rate": 0.0004382957940691548, "loss": 0.2565, "step": 165590 }, { "epoch": 6.86, "grad_norm": 0.640625, "learning_rate": 0.0004382886598052543, "loss": 0.1623, "step": 165600 }, { "epoch": 6.86, "grad_norm": 0.82421875, "learning_rate": 0.000438281525187015, "loss": 0.2479, "step": 165610 }, { "epoch": 6.86, "grad_norm": 0.54296875, "learning_rate": 0.00043827439021445016, "loss": 0.1663, "step": 165620 }, { "epoch": 6.86, "grad_norm": 0.400390625, "learning_rate": 0.00043826725488757323, "loss": 0.2339, "step": 165630 }, { "epoch": 6.86, "grad_norm": 0.91796875, "learning_rate": 0.0004382601192063976, "loss": 0.1856, "step": 165640 }, { "epoch": 6.86, "grad_norm": 0.625, "learning_rate": 0.00043825298317093695, "loss": 0.2559, "step": 165650 }, { "epoch": 6.86, "grad_norm": 0.80078125, "learning_rate": 0.0004382458467812044, "loss": 0.1872, "step": 165660 }, { "epoch": 6.86, "grad_norm": 0.65625, "learning_rate": 0.0004382387100372135, "loss": 0.1871, "step": 165670 }, { "epoch": 6.86, "grad_norm": 0.8359375, "learning_rate": 0.00043823157293897765, "loss": 0.2174, "step": 165680 }, { "epoch": 6.86, "grad_norm": 0.85546875, "learning_rate": 0.0004382244354865103, "loss": 0.1886, "step": 165690 }, { "epoch": 6.86, "grad_norm": 0.310546875, "learning_rate": 0.00043821729767982493, "loss": 0.1789, "step": 165700 }, { "epoch": 6.86, "grad_norm": 1.0625, "learning_rate": 0.00043821015951893487, "loss": 0.1643, "step": 165710 }, { "epoch": 6.86, "grad_norm": 2.796875, "learning_rate": 0.0004382030210038537, "loss": 0.2283, "step": 165720 }, { "epoch": 6.86, "grad_norm": 0.59375, "learning_rate": 0.0004381958821345947, "loss": 0.1957, "step": 165730 }, { "epoch": 6.86, "grad_norm": 0.4296875, "learning_rate": 0.0004381887429111714, "loss": 0.2035, "step": 165740 }, { "epoch": 6.87, "grad_norm": 1.984375, "learning_rate": 0.0004381816033335972, "loss": 0.1852, "step": 165750 }, { "epoch": 6.87, "grad_norm": 0.61328125, "learning_rate": 0.0004381744634018856, "loss": 0.22, "step": 165760 }, { "epoch": 6.87, "grad_norm": 0.8203125, "learning_rate": 0.0004381673231160499, "loss": 0.2237, "step": 165770 }, { "epoch": 6.87, "grad_norm": 1.0703125, "learning_rate": 0.00043816018247610366, "loss": 0.1744, "step": 165780 }, { "epoch": 6.87, "grad_norm": 0.56640625, "learning_rate": 0.00043815304148206024, "loss": 0.223, "step": 165790 }, { "epoch": 6.87, "grad_norm": 0.7265625, "learning_rate": 0.0004381459001339332, "loss": 0.159, "step": 165800 }, { "epoch": 6.87, "grad_norm": 0.84765625, "learning_rate": 0.00043813875843173584, "loss": 0.201, "step": 165810 }, { "epoch": 6.87, "grad_norm": 1.484375, "learning_rate": 0.00043813161637548166, "loss": 0.2478, "step": 165820 }, { "epoch": 6.87, "grad_norm": 0.466796875, "learning_rate": 0.00043812447396518407, "loss": 0.2494, "step": 165830 }, { "epoch": 6.87, "grad_norm": 0.408203125, "learning_rate": 0.00043811733120085653, "loss": 0.2147, "step": 165840 }, { "epoch": 6.87, "grad_norm": 0.361328125, "learning_rate": 0.0004381101880825126, "loss": 0.2256, "step": 165850 }, { "epoch": 6.87, "grad_norm": 1.453125, "learning_rate": 0.00043810304461016546, "loss": 0.2021, "step": 165860 }, { "epoch": 6.87, "grad_norm": 0.671875, "learning_rate": 0.00043809590078382877, "loss": 0.2621, "step": 165870 }, { "epoch": 6.87, "grad_norm": 0.5546875, "learning_rate": 0.0004380887566035159, "loss": 0.2219, "step": 165880 }, { "epoch": 6.87, "grad_norm": 0.73828125, "learning_rate": 0.0004380816120692403, "loss": 0.211, "step": 165890 }, { "epoch": 6.87, "grad_norm": 0.78515625, "learning_rate": 0.0004380744671810154, "loss": 0.2399, "step": 165900 }, { "epoch": 6.87, "grad_norm": 0.8515625, "learning_rate": 0.0004380673219388547, "loss": 0.1596, "step": 165910 }, { "epoch": 6.87, "grad_norm": 0.96484375, "learning_rate": 0.00043806017634277165, "loss": 0.1826, "step": 165920 }, { "epoch": 6.87, "grad_norm": 0.55078125, "learning_rate": 0.00043805303039277954, "loss": 0.1658, "step": 165930 }, { "epoch": 6.87, "grad_norm": 1.203125, "learning_rate": 0.000438045884088892, "loss": 0.1923, "step": 165940 }, { "epoch": 6.87, "grad_norm": 0.875, "learning_rate": 0.00043803873743112243, "loss": 0.2176, "step": 165950 }, { "epoch": 6.87, "grad_norm": 0.412109375, "learning_rate": 0.0004380315904194842, "loss": 0.2008, "step": 165960 }, { "epoch": 6.87, "grad_norm": 0.3671875, "learning_rate": 0.0004380244430539909, "loss": 0.234, "step": 165970 }, { "epoch": 6.87, "grad_norm": 2.578125, "learning_rate": 0.00043801729533465584, "loss": 0.2256, "step": 165980 }, { "epoch": 6.88, "grad_norm": 0.5, "learning_rate": 0.00043801014726149257, "loss": 0.1925, "step": 165990 }, { "epoch": 6.88, "grad_norm": 1.1953125, "learning_rate": 0.00043800299883451445, "loss": 0.241, "step": 166000 }, { "epoch": 6.88, "grad_norm": 0.77734375, "learning_rate": 0.0004379958500537351, "loss": 0.2574, "step": 166010 }, { "epoch": 6.88, "grad_norm": 0.875, "learning_rate": 0.00043798870091916775, "loss": 0.1822, "step": 166020 }, { "epoch": 6.88, "grad_norm": 1.296875, "learning_rate": 0.00043798155143082607, "loss": 0.1991, "step": 166030 }, { "epoch": 6.88, "grad_norm": 0.79296875, "learning_rate": 0.0004379744015887233, "loss": 0.2165, "step": 166040 }, { "epoch": 6.88, "grad_norm": 1.3984375, "learning_rate": 0.0004379672513928731, "loss": 0.2139, "step": 166050 }, { "epoch": 6.88, "grad_norm": 0.546875, "learning_rate": 0.00043796010084328873, "loss": 0.1649, "step": 166060 }, { "epoch": 6.88, "grad_norm": 0.80859375, "learning_rate": 0.00043795294993998385, "loss": 0.2413, "step": 166070 }, { "epoch": 6.88, "grad_norm": 0.671875, "learning_rate": 0.0004379457986829718, "loss": 0.2051, "step": 166080 }, { "epoch": 6.88, "grad_norm": 2.453125, "learning_rate": 0.000437938647072266, "loss": 0.2616, "step": 166090 }, { "epoch": 6.88, "grad_norm": 0.20703125, "learning_rate": 0.00043793149510788, "loss": 0.1691, "step": 166100 }, { "epoch": 6.88, "grad_norm": 0.2578125, "learning_rate": 0.0004379243427898272, "loss": 0.2379, "step": 166110 }, { "epoch": 6.88, "grad_norm": 0.384765625, "learning_rate": 0.0004379171901181211, "loss": 0.1848, "step": 166120 }, { "epoch": 6.88, "grad_norm": 0.46484375, "learning_rate": 0.0004379100370927751, "loss": 0.2186, "step": 166130 }, { "epoch": 6.88, "grad_norm": 0.515625, "learning_rate": 0.00043790288371380276, "loss": 0.1613, "step": 166140 }, { "epoch": 6.88, "grad_norm": 1.234375, "learning_rate": 0.00043789572998121745, "loss": 0.1769, "step": 166150 }, { "epoch": 6.88, "grad_norm": 0.4609375, "learning_rate": 0.0004378885758950327, "loss": 0.2132, "step": 166160 }, { "epoch": 6.88, "grad_norm": 5.125, "learning_rate": 0.0004378814214552619, "loss": 0.1786, "step": 166170 }, { "epoch": 6.88, "grad_norm": 0.734375, "learning_rate": 0.00043787426666191856, "loss": 0.2339, "step": 166180 }, { "epoch": 6.88, "grad_norm": 0.765625, "learning_rate": 0.0004378671115150162, "loss": 0.2138, "step": 166190 }, { "epoch": 6.88, "grad_norm": 0.421875, "learning_rate": 0.0004378599560145682, "loss": 0.1899, "step": 166200 }, { "epoch": 6.88, "grad_norm": 0.6953125, "learning_rate": 0.000437852800160588, "loss": 0.1977, "step": 166210 }, { "epoch": 6.88, "grad_norm": 0.60546875, "learning_rate": 0.0004378456439530891, "loss": 0.2682, "step": 166220 }, { "epoch": 6.89, "grad_norm": 0.68359375, "learning_rate": 0.00043783848739208504, "loss": 0.2134, "step": 166230 }, { "epoch": 6.89, "grad_norm": 0.56640625, "learning_rate": 0.0004378313304775892, "loss": 0.1988, "step": 166240 }, { "epoch": 6.89, "grad_norm": 0.3125, "learning_rate": 0.00043782417320961507, "loss": 0.2083, "step": 166250 }, { "epoch": 6.89, "grad_norm": 0.25390625, "learning_rate": 0.0004378170155881762, "loss": 0.1569, "step": 166260 }, { "epoch": 6.89, "grad_norm": 1.0859375, "learning_rate": 0.00043780985761328594, "loss": 0.1929, "step": 166270 }, { "epoch": 6.89, "grad_norm": 0.59375, "learning_rate": 0.0004378026992849578, "loss": 0.2099, "step": 166280 }, { "epoch": 6.89, "grad_norm": 1.234375, "learning_rate": 0.0004377955406032053, "loss": 0.1926, "step": 166290 }, { "epoch": 6.89, "grad_norm": 0.53125, "learning_rate": 0.00043778838156804186, "loss": 0.1768, "step": 166300 }, { "epoch": 6.89, "grad_norm": 0.423828125, "learning_rate": 0.0004377812221794809, "loss": 0.2527, "step": 166310 }, { "epoch": 6.89, "grad_norm": 0.220703125, "learning_rate": 0.00043777406243753603, "loss": 0.2092, "step": 166320 }, { "epoch": 6.89, "grad_norm": 0.6328125, "learning_rate": 0.0004377669023422206, "loss": 0.237, "step": 166330 }, { "epoch": 6.89, "grad_norm": 0.9140625, "learning_rate": 0.00043775974189354824, "loss": 0.2208, "step": 166340 }, { "epoch": 6.89, "grad_norm": 0.69140625, "learning_rate": 0.0004377525810915323, "loss": 0.1783, "step": 166350 }, { "epoch": 6.89, "grad_norm": 0.0, "learning_rate": 0.00043774541993618616, "loss": 0.2444, "step": 166360 }, { "epoch": 6.89, "grad_norm": 0.3203125, "learning_rate": 0.0004377382584275235, "loss": 0.1478, "step": 166370 }, { "epoch": 6.89, "grad_norm": 0.62890625, "learning_rate": 0.00043773109656555765, "loss": 0.2238, "step": 166380 }, { "epoch": 6.89, "grad_norm": 0.8515625, "learning_rate": 0.0004377239343503022, "loss": 0.2019, "step": 166390 }, { "epoch": 6.89, "grad_norm": 0.486328125, "learning_rate": 0.0004377167717817706, "loss": 0.2187, "step": 166400 }, { "epoch": 6.89, "grad_norm": 0.45703125, "learning_rate": 0.0004377096088599763, "loss": 0.1828, "step": 166410 }, { "epoch": 6.89, "grad_norm": 0.1904296875, "learning_rate": 0.00043770244558493277, "loss": 0.2059, "step": 166420 }, { "epoch": 6.89, "grad_norm": 0.28515625, "learning_rate": 0.0004376952819566535, "loss": 0.2403, "step": 166430 }, { "epoch": 6.89, "grad_norm": 0.63671875, "learning_rate": 0.00043768811797515193, "loss": 0.2642, "step": 166440 }, { "epoch": 6.89, "grad_norm": 1.0234375, "learning_rate": 0.0004376809536404417, "loss": 0.2108, "step": 166450 }, { "epoch": 6.89, "grad_norm": 0.3203125, "learning_rate": 0.00043767378895253614, "loss": 0.1627, "step": 166460 }, { "epoch": 6.9, "grad_norm": 0.53125, "learning_rate": 0.00043766662391144873, "loss": 0.1887, "step": 166470 }, { "epoch": 6.9, "grad_norm": 0.515625, "learning_rate": 0.00043765945851719304, "loss": 0.2147, "step": 166480 }, { "epoch": 6.9, "grad_norm": 0.62109375, "learning_rate": 0.0004376522927697825, "loss": 0.153, "step": 166490 }, { "epoch": 6.9, "grad_norm": 1.234375, "learning_rate": 0.0004376451266692306, "loss": 0.2144, "step": 166500 }, { "epoch": 6.9, "grad_norm": 1.078125, "learning_rate": 0.0004376379602155509, "loss": 0.2004, "step": 166510 }, { "epoch": 6.9, "grad_norm": 1.53125, "learning_rate": 0.00043763079340875677, "loss": 0.2247, "step": 166520 }, { "epoch": 6.9, "grad_norm": 0.74609375, "learning_rate": 0.00043762362624886174, "loss": 0.2087, "step": 166530 }, { "epoch": 6.9, "grad_norm": 0.37109375, "learning_rate": 0.00043761645873587935, "loss": 0.165, "step": 166540 }, { "epoch": 6.9, "grad_norm": 0.6015625, "learning_rate": 0.00043760929086982306, "loss": 0.259, "step": 166550 }, { "epoch": 6.9, "grad_norm": 0.51953125, "learning_rate": 0.00043760212265070626, "loss": 0.2345, "step": 166560 }, { "epoch": 6.9, "grad_norm": 1.5703125, "learning_rate": 0.00043759495407854254, "loss": 0.193, "step": 166570 }, { "epoch": 6.9, "grad_norm": 0.47265625, "learning_rate": 0.0004375877851533454, "loss": 0.162, "step": 166580 }, { "epoch": 6.9, "grad_norm": 0.58984375, "learning_rate": 0.0004375806158751283, "loss": 0.2265, "step": 166590 }, { "epoch": 6.9, "grad_norm": 1.3125, "learning_rate": 0.00043757344624390475, "loss": 0.2068, "step": 166600 }, { "epoch": 6.9, "grad_norm": 0.66015625, "learning_rate": 0.00043756627625968827, "loss": 0.25, "step": 166610 }, { "epoch": 6.9, "grad_norm": 0.796875, "learning_rate": 0.00043755910592249226, "loss": 0.1993, "step": 166620 }, { "epoch": 6.9, "grad_norm": 0.78515625, "learning_rate": 0.00043755193523233027, "loss": 0.1613, "step": 166630 }, { "epoch": 6.9, "grad_norm": 0.1689453125, "learning_rate": 0.0004375447641892158, "loss": 0.2136, "step": 166640 }, { "epoch": 6.9, "grad_norm": 0.423828125, "learning_rate": 0.00043753759279316237, "loss": 0.1894, "step": 166650 }, { "epoch": 6.9, "grad_norm": 0.462890625, "learning_rate": 0.0004375304210441834, "loss": 0.198, "step": 166660 }, { "epoch": 6.9, "grad_norm": 1.28125, "learning_rate": 0.00043752324894229244, "loss": 0.2122, "step": 166670 }, { "epoch": 6.9, "grad_norm": 1.1328125, "learning_rate": 0.0004375160764875029, "loss": 0.1805, "step": 166680 }, { "epoch": 6.9, "grad_norm": 1.53125, "learning_rate": 0.0004375089036798285, "loss": 0.234, "step": 166690 }, { "epoch": 6.9, "grad_norm": 0.40625, "learning_rate": 0.00043750173051928257, "loss": 0.2001, "step": 166700 }, { "epoch": 6.91, "grad_norm": 0.70703125, "learning_rate": 0.00043749455700587846, "loss": 0.2474, "step": 166710 }, { "epoch": 6.91, "grad_norm": 0.26171875, "learning_rate": 0.00043748738313963, "loss": 0.2356, "step": 166720 }, { "epoch": 6.91, "grad_norm": 0.7265625, "learning_rate": 0.00043748020892055053, "loss": 0.2477, "step": 166730 }, { "epoch": 6.91, "grad_norm": 0.76953125, "learning_rate": 0.0004374730343486535, "loss": 0.2709, "step": 166740 }, { "epoch": 6.91, "grad_norm": 0.55078125, "learning_rate": 0.00043746585942395247, "loss": 0.1439, "step": 166750 }, { "epoch": 6.91, "grad_norm": 0.9609375, "learning_rate": 0.00043745868414646094, "loss": 0.2148, "step": 166760 }, { "epoch": 6.91, "grad_norm": 0.88671875, "learning_rate": 0.0004374515085161924, "loss": 0.1874, "step": 166770 }, { "epoch": 6.91, "grad_norm": 1.03125, "learning_rate": 0.0004374443325331604, "loss": 0.2418, "step": 166780 }, { "epoch": 6.91, "grad_norm": 1.1171875, "learning_rate": 0.00043743715619737843, "loss": 0.209, "step": 166790 }, { "epoch": 6.91, "grad_norm": 0.71875, "learning_rate": 0.00043742997950885996, "loss": 0.1892, "step": 166800 }, { "epoch": 6.91, "grad_norm": 0.349609375, "learning_rate": 0.0004374228024676185, "loss": 0.2012, "step": 166810 }, { "epoch": 6.91, "grad_norm": 0.8984375, "learning_rate": 0.0004374156250736675, "loss": 0.1873, "step": 166820 }, { "epoch": 6.91, "grad_norm": 0.412109375, "learning_rate": 0.00043740844732702056, "loss": 0.2175, "step": 166830 }, { "epoch": 6.91, "grad_norm": 0.640625, "learning_rate": 0.0004374012692276912, "loss": 0.2099, "step": 166840 }, { "epoch": 6.91, "grad_norm": 0.283203125, "learning_rate": 0.0004373940907756929, "loss": 0.2147, "step": 166850 }, { "epoch": 6.91, "grad_norm": 0.498046875, "learning_rate": 0.0004373869119710391, "loss": 0.1659, "step": 166860 }, { "epoch": 6.91, "grad_norm": 1.4765625, "learning_rate": 0.00043737973281374344, "loss": 0.2216, "step": 166870 }, { "epoch": 6.91, "grad_norm": 0.2099609375, "learning_rate": 0.0004373725533038193, "loss": 0.2076, "step": 166880 }, { "epoch": 6.91, "grad_norm": 0.64453125, "learning_rate": 0.00043736537344128023, "loss": 0.2274, "step": 166890 }, { "epoch": 6.91, "grad_norm": 0.419921875, "learning_rate": 0.0004373581932261398, "loss": 0.2487, "step": 166900 }, { "epoch": 6.91, "grad_norm": 0.349609375, "learning_rate": 0.0004373510126584115, "loss": 0.2328, "step": 166910 }, { "epoch": 6.91, "grad_norm": 0.77734375, "learning_rate": 0.0004373438317381088, "loss": 0.1934, "step": 166920 }, { "epoch": 6.91, "grad_norm": 0.703125, "learning_rate": 0.0004373366504652453, "loss": 0.2154, "step": 166930 }, { "epoch": 6.91, "grad_norm": 1.4296875, "learning_rate": 0.0004373294688398344, "loss": 0.2048, "step": 166940 }, { "epoch": 6.92, "grad_norm": 0.373046875, "learning_rate": 0.0004373222868618897, "loss": 0.18, "step": 166950 }, { "epoch": 6.92, "grad_norm": 0.376953125, "learning_rate": 0.0004373151045314246, "loss": 0.1683, "step": 166960 }, { "epoch": 6.92, "grad_norm": 2.484375, "learning_rate": 0.0004373079218484528, "loss": 0.2376, "step": 166970 }, { "epoch": 6.92, "grad_norm": 0.5078125, "learning_rate": 0.00043730073881298757, "loss": 0.208, "step": 166980 }, { "epoch": 6.92, "grad_norm": 0.29296875, "learning_rate": 0.0004372935554250427, "loss": 0.2238, "step": 166990 }, { "epoch": 6.92, "grad_norm": 1.703125, "learning_rate": 0.0004372863716846316, "loss": 0.1962, "step": 167000 }, { "epoch": 6.92, "grad_norm": 0.55859375, "learning_rate": 0.00043727918759176775, "loss": 0.2044, "step": 167010 }, { "epoch": 6.92, "grad_norm": 0.52734375, "learning_rate": 0.00043727200314646464, "loss": 0.2153, "step": 167020 }, { "epoch": 6.92, "grad_norm": 0.91796875, "learning_rate": 0.00043726481834873587, "loss": 0.1967, "step": 167030 }, { "epoch": 6.92, "grad_norm": 0.78125, "learning_rate": 0.00043725763319859504, "loss": 0.2141, "step": 167040 }, { "epoch": 6.92, "grad_norm": 0.50390625, "learning_rate": 0.0004372504476960555, "loss": 0.1748, "step": 167050 }, { "epoch": 6.92, "grad_norm": 0.68359375, "learning_rate": 0.0004372432618411308, "loss": 0.1924, "step": 167060 }, { "epoch": 6.92, "grad_norm": 0.373046875, "learning_rate": 0.0004372360756338345, "loss": 0.2029, "step": 167070 }, { "epoch": 6.92, "grad_norm": 0.8828125, "learning_rate": 0.0004372288890741801, "loss": 0.1707, "step": 167080 }, { "epoch": 6.92, "grad_norm": 0.7578125, "learning_rate": 0.00043722170216218127, "loss": 0.1722, "step": 167090 }, { "epoch": 6.92, "grad_norm": 0.4140625, "learning_rate": 0.0004372145148978513, "loss": 0.2264, "step": 167100 }, { "epoch": 6.92, "grad_norm": 0.58984375, "learning_rate": 0.0004372073272812038, "loss": 0.2155, "step": 167110 }, { "epoch": 6.92, "grad_norm": 0.57421875, "learning_rate": 0.00043720013931225244, "loss": 0.1982, "step": 167120 }, { "epoch": 6.92, "grad_norm": 1.046875, "learning_rate": 0.00043719295099101064, "loss": 0.2152, "step": 167130 }, { "epoch": 6.92, "grad_norm": 1.359375, "learning_rate": 0.00043718576231749183, "loss": 0.1956, "step": 167140 }, { "epoch": 6.92, "grad_norm": 0.8203125, "learning_rate": 0.00043717857329170966, "loss": 0.1698, "step": 167150 }, { "epoch": 6.92, "grad_norm": 0.74609375, "learning_rate": 0.00043717138391367763, "loss": 0.1943, "step": 167160 }, { "epoch": 6.92, "grad_norm": 0.5234375, "learning_rate": 0.00043716419418340935, "loss": 0.1927, "step": 167170 }, { "epoch": 6.92, "grad_norm": 1.546875, "learning_rate": 0.0004371570041009182, "loss": 0.1748, "step": 167180 }, { "epoch": 6.92, "grad_norm": 0.7421875, "learning_rate": 0.00043714981366621775, "loss": 0.1776, "step": 167190 }, { "epoch": 6.93, "grad_norm": 0.87109375, "learning_rate": 0.00043714262287932154, "loss": 0.1764, "step": 167200 }, { "epoch": 6.93, "grad_norm": 0.2099609375, "learning_rate": 0.0004371354317402432, "loss": 0.1992, "step": 167210 }, { "epoch": 6.93, "grad_norm": 0.72265625, "learning_rate": 0.00043712824024899616, "loss": 0.2323, "step": 167220 }, { "epoch": 6.93, "grad_norm": 0.67578125, "learning_rate": 0.000437121048405594, "loss": 0.1756, "step": 167230 }, { "epoch": 6.93, "grad_norm": 0.6015625, "learning_rate": 0.00043711385621005017, "loss": 0.1588, "step": 167240 }, { "epoch": 6.93, "grad_norm": 1.984375, "learning_rate": 0.00043710666366237835, "loss": 0.2068, "step": 167250 }, { "epoch": 6.93, "grad_norm": 0.640625, "learning_rate": 0.0004370994707625919, "loss": 0.1908, "step": 167260 }, { "epoch": 6.93, "grad_norm": 0.72265625, "learning_rate": 0.00043709227751070456, "loss": 0.2465, "step": 167270 }, { "epoch": 6.93, "grad_norm": 1.015625, "learning_rate": 0.00043708508390672963, "loss": 0.2194, "step": 167280 }, { "epoch": 6.93, "grad_norm": 1.0, "learning_rate": 0.0004370778899506809, "loss": 0.216, "step": 167290 }, { "epoch": 6.93, "grad_norm": 1.046875, "learning_rate": 0.0004370706956425717, "loss": 0.217, "step": 167300 }, { "epoch": 6.93, "grad_norm": 0.67578125, "learning_rate": 0.0004370635009824157, "loss": 0.2429, "step": 167310 }, { "epoch": 6.93, "grad_norm": 0.77734375, "learning_rate": 0.0004370563059702264, "loss": 0.2228, "step": 167320 }, { "epoch": 6.93, "grad_norm": 1.1640625, "learning_rate": 0.00043704911060601725, "loss": 0.2323, "step": 167330 }, { "epoch": 6.93, "grad_norm": 0.61328125, "learning_rate": 0.000437041914889802, "loss": 0.2132, "step": 167340 }, { "epoch": 6.93, "grad_norm": 1.8125, "learning_rate": 0.000437034718821594, "loss": 0.2076, "step": 167350 }, { "epoch": 6.93, "grad_norm": 0.279296875, "learning_rate": 0.00043702752240140684, "loss": 0.1805, "step": 167360 }, { "epoch": 6.93, "grad_norm": 1.9765625, "learning_rate": 0.00043702032562925407, "loss": 0.1969, "step": 167370 }, { "epoch": 6.93, "grad_norm": 2.25, "learning_rate": 0.00043701312850514927, "loss": 0.2099, "step": 167380 }, { "epoch": 6.93, "grad_norm": 0.91015625, "learning_rate": 0.00043700593102910605, "loss": 0.2526, "step": 167390 }, { "epoch": 6.93, "grad_norm": 0.80859375, "learning_rate": 0.0004369987332011377, "loss": 0.1817, "step": 167400 }, { "epoch": 6.93, "grad_norm": 0.6796875, "learning_rate": 0.000436991535021258, "loss": 0.2068, "step": 167410 }, { "epoch": 6.93, "grad_norm": 0.90234375, "learning_rate": 0.00043698433648948045, "loss": 0.1572, "step": 167420 }, { "epoch": 6.93, "grad_norm": 0.83984375, "learning_rate": 0.0004369771376058185, "loss": 0.2034, "step": 167430 }, { "epoch": 6.94, "grad_norm": 0.330078125, "learning_rate": 0.0004369699383702859, "loss": 0.1901, "step": 167440 }, { "epoch": 6.94, "grad_norm": 0.453125, "learning_rate": 0.00043696273878289597, "loss": 0.1952, "step": 167450 }, { "epoch": 6.94, "grad_norm": 0.94140625, "learning_rate": 0.00043695553884366234, "loss": 0.1921, "step": 167460 }, { "epoch": 6.94, "grad_norm": 0.68359375, "learning_rate": 0.0004369483385525986, "loss": 0.185, "step": 167470 }, { "epoch": 6.94, "grad_norm": 0.49609375, "learning_rate": 0.00043694113790971825, "loss": 0.2455, "step": 167480 }, { "epoch": 6.94, "grad_norm": 0.0, "learning_rate": 0.0004369339369150349, "loss": 0.1962, "step": 167490 }, { "epoch": 6.94, "grad_norm": 0.85546875, "learning_rate": 0.0004369267355685621, "loss": 0.2715, "step": 167500 }, { "epoch": 6.94, "grad_norm": 0.68359375, "learning_rate": 0.00043691953387031327, "loss": 0.2284, "step": 167510 }, { "epoch": 6.94, "grad_norm": 0.458984375, "learning_rate": 0.0004369123318203021, "loss": 0.2288, "step": 167520 }, { "epoch": 6.94, "grad_norm": 0.734375, "learning_rate": 0.0004369051294185422, "loss": 0.1736, "step": 167530 }, { "epoch": 6.94, "grad_norm": 0.470703125, "learning_rate": 0.0004368979266650469, "loss": 0.2305, "step": 167540 }, { "epoch": 6.94, "grad_norm": 0.80859375, "learning_rate": 0.00043689072355982994, "loss": 0.2367, "step": 167550 }, { "epoch": 6.94, "grad_norm": 0.87109375, "learning_rate": 0.00043688352010290486, "loss": 0.2412, "step": 167560 }, { "epoch": 6.94, "grad_norm": 0.392578125, "learning_rate": 0.00043687631629428514, "loss": 0.2226, "step": 167570 }, { "epoch": 6.94, "grad_norm": 0.310546875, "learning_rate": 0.0004368691121339844, "loss": 0.1872, "step": 167580 }, { "epoch": 6.94, "grad_norm": 1.0546875, "learning_rate": 0.00043686190762201615, "loss": 0.2231, "step": 167590 }, { "epoch": 6.94, "grad_norm": 0.57421875, "learning_rate": 0.00043685470275839393, "loss": 0.2054, "step": 167600 }, { "epoch": 6.94, "grad_norm": 0.4375, "learning_rate": 0.00043684749754313134, "loss": 0.2221, "step": 167610 }, { "epoch": 6.94, "grad_norm": 1.3125, "learning_rate": 0.00043684029197624204, "loss": 0.2506, "step": 167620 }, { "epoch": 6.94, "grad_norm": 0.76171875, "learning_rate": 0.0004368330860577394, "loss": 0.2438, "step": 167630 }, { "epoch": 6.94, "grad_norm": 1.1328125, "learning_rate": 0.0004368258797876371, "loss": 0.2541, "step": 167640 }, { "epoch": 6.94, "grad_norm": 1.125, "learning_rate": 0.00043681867316594866, "loss": 0.2292, "step": 167650 }, { "epoch": 6.94, "grad_norm": 0.87890625, "learning_rate": 0.00043681146619268765, "loss": 0.1596, "step": 167660 }, { "epoch": 6.94, "grad_norm": 0.38671875, "learning_rate": 0.0004368042588678676, "loss": 0.2001, "step": 167670 }, { "epoch": 6.95, "grad_norm": 0.640625, "learning_rate": 0.00043679705119150215, "loss": 0.273, "step": 167680 }, { "epoch": 6.95, "grad_norm": 0.9296875, "learning_rate": 0.00043678984316360484, "loss": 0.2104, "step": 167690 }, { "epoch": 6.95, "grad_norm": 1.125, "learning_rate": 0.00043678263478418917, "loss": 0.2552, "step": 167700 }, { "epoch": 6.95, "grad_norm": 0.2890625, "learning_rate": 0.0004367754260532688, "loss": 0.2176, "step": 167710 }, { "epoch": 6.95, "grad_norm": 1.359375, "learning_rate": 0.0004367682169708572, "loss": 0.2097, "step": 167720 }, { "epoch": 6.95, "grad_norm": 0.439453125, "learning_rate": 0.00043676100753696804, "loss": 0.1992, "step": 167730 }, { "epoch": 6.95, "grad_norm": 0.8359375, "learning_rate": 0.0004367537977516148, "loss": 0.185, "step": 167740 }, { "epoch": 6.95, "grad_norm": 1.2421875, "learning_rate": 0.000436746587614811, "loss": 0.1849, "step": 167750 }, { "epoch": 6.95, "grad_norm": 0.703125, "learning_rate": 0.00043673937712657043, "loss": 0.2189, "step": 167760 }, { "epoch": 6.95, "grad_norm": 0.58984375, "learning_rate": 0.0004367321662869065, "loss": 0.182, "step": 167770 }, { "epoch": 6.95, "grad_norm": 0.703125, "learning_rate": 0.0004367249550958327, "loss": 0.2052, "step": 167780 }, { "epoch": 6.95, "grad_norm": 1.09375, "learning_rate": 0.00043671774355336275, "loss": 0.2242, "step": 167790 }, { "epoch": 6.95, "grad_norm": 1.28125, "learning_rate": 0.00043671053165951013, "loss": 0.2036, "step": 167800 }, { "epoch": 6.95, "grad_norm": 0.640625, "learning_rate": 0.0004367033194142885, "loss": 0.1531, "step": 167810 }, { "epoch": 6.95, "grad_norm": 0.6328125, "learning_rate": 0.00043669610681771145, "loss": 0.1888, "step": 167820 }, { "epoch": 6.95, "grad_norm": 0.474609375, "learning_rate": 0.0004366888938697924, "loss": 0.205, "step": 167830 }, { "epoch": 6.95, "grad_norm": 1.0, "learning_rate": 0.00043668168057054503, "loss": 0.2135, "step": 167840 }, { "epoch": 6.95, "grad_norm": 0.96484375, "learning_rate": 0.0004366744669199829, "loss": 0.1773, "step": 167850 }, { "epoch": 6.95, "grad_norm": 0.85546875, "learning_rate": 0.0004366672529181196, "loss": 0.215, "step": 167860 }, { "epoch": 6.95, "grad_norm": 0.57421875, "learning_rate": 0.0004366600385649686, "loss": 0.2417, "step": 167870 }, { "epoch": 6.95, "grad_norm": 0.76953125, "learning_rate": 0.00043665282386054374, "loss": 0.1788, "step": 167880 }, { "epoch": 6.95, "grad_norm": 1.15625, "learning_rate": 0.00043664560880485824, "loss": 0.2235, "step": 167890 }, { "epoch": 6.95, "grad_norm": 0.65234375, "learning_rate": 0.000436638393397926, "loss": 0.2325, "step": 167900 }, { "epoch": 6.95, "grad_norm": 0.4375, "learning_rate": 0.00043663117763976037, "loss": 0.1897, "step": 167910 }, { "epoch": 6.96, "grad_norm": 0.46484375, "learning_rate": 0.000436623961530375, "loss": 0.1593, "step": 167920 }, { "epoch": 6.96, "grad_norm": 0.361328125, "learning_rate": 0.00043661674506978356, "loss": 0.1918, "step": 167930 }, { "epoch": 6.96, "grad_norm": 0.80078125, "learning_rate": 0.00043660952825799954, "loss": 0.2321, "step": 167940 }, { "epoch": 6.96, "grad_norm": 1.25, "learning_rate": 0.0004366023110950365, "loss": 0.223, "step": 167950 }, { "epoch": 6.96, "grad_norm": 1.296875, "learning_rate": 0.0004365950935809081, "loss": 0.1825, "step": 167960 }, { "epoch": 6.96, "grad_norm": 0.51171875, "learning_rate": 0.0004365878757156279, "loss": 0.21, "step": 167970 }, { "epoch": 6.96, "grad_norm": 1.546875, "learning_rate": 0.0004365806574992094, "loss": 0.2085, "step": 167980 }, { "epoch": 6.96, "grad_norm": 0.345703125, "learning_rate": 0.00043657343893166635, "loss": 0.1951, "step": 167990 }, { "epoch": 6.96, "grad_norm": 0.625, "learning_rate": 0.00043656622001301214, "loss": 0.2259, "step": 168000 }, { "epoch": 6.96, "grad_norm": 0.53515625, "learning_rate": 0.0004365590007432605, "loss": 0.1657, "step": 168010 }, { "epoch": 6.96, "grad_norm": 0.369140625, "learning_rate": 0.000436551781122425, "loss": 0.2273, "step": 168020 }, { "epoch": 6.96, "grad_norm": 0.3125, "learning_rate": 0.0004365445611505191, "loss": 0.2021, "step": 168030 }, { "epoch": 6.96, "grad_norm": 0.392578125, "learning_rate": 0.0004365373408275566, "loss": 0.2027, "step": 168040 }, { "epoch": 6.96, "grad_norm": 0.427734375, "learning_rate": 0.00043653012015355087, "loss": 0.1715, "step": 168050 }, { "epoch": 6.96, "grad_norm": 0.5234375, "learning_rate": 0.0004365228991285156, "loss": 0.257, "step": 168060 }, { "epoch": 6.96, "grad_norm": 1.203125, "learning_rate": 0.0004365156777524645, "loss": 0.248, "step": 168070 }, { "epoch": 6.96, "grad_norm": 0.65234375, "learning_rate": 0.0004365084560254109, "loss": 0.2309, "step": 168080 }, { "epoch": 6.96, "grad_norm": 1.2578125, "learning_rate": 0.0004365012339473686, "loss": 0.1346, "step": 168090 }, { "epoch": 6.96, "grad_norm": 1.1640625, "learning_rate": 0.00043649401151835105, "loss": 0.1866, "step": 168100 }, { "epoch": 6.96, "grad_norm": 0.6171875, "learning_rate": 0.00043648678873837196, "loss": 0.173, "step": 168110 }, { "epoch": 6.96, "grad_norm": 0.9609375, "learning_rate": 0.00043647956560744487, "loss": 0.2029, "step": 168120 }, { "epoch": 6.96, "grad_norm": 1.03125, "learning_rate": 0.0004364723421255833, "loss": 0.214, "step": 168130 }, { "epoch": 6.96, "grad_norm": 1.1796875, "learning_rate": 0.00043646511829280104, "loss": 0.2418, "step": 168140 }, { "epoch": 6.96, "grad_norm": 0.478515625, "learning_rate": 0.0004364578941091115, "loss": 0.2066, "step": 168150 }, { "epoch": 6.97, "grad_norm": 0.490234375, "learning_rate": 0.0004364506695745283, "loss": 0.1792, "step": 168160 }, { "epoch": 6.97, "grad_norm": 0.51953125, "learning_rate": 0.00043644344468906515, "loss": 0.1955, "step": 168170 }, { "epoch": 6.97, "grad_norm": 0.58984375, "learning_rate": 0.0004364362194527356, "loss": 0.2189, "step": 168180 }, { "epoch": 6.97, "grad_norm": 0.58203125, "learning_rate": 0.0004364289938655531, "loss": 0.2317, "step": 168190 }, { "epoch": 6.97, "grad_norm": 0.7578125, "learning_rate": 0.00043642176792753145, "loss": 0.2128, "step": 168200 }, { "epoch": 6.97, "grad_norm": 0.80078125, "learning_rate": 0.00043641454163868414, "loss": 0.1968, "step": 168210 }, { "epoch": 6.97, "grad_norm": 0.703125, "learning_rate": 0.00043640731499902484, "loss": 0.1312, "step": 168220 }, { "epoch": 6.97, "grad_norm": 0.55859375, "learning_rate": 0.00043640008800856705, "loss": 0.2275, "step": 168230 }, { "epoch": 6.97, "grad_norm": 0.41796875, "learning_rate": 0.00043639286066732443, "loss": 0.2142, "step": 168240 }, { "epoch": 6.97, "grad_norm": 0.76171875, "learning_rate": 0.0004363856329753106, "loss": 0.1652, "step": 168250 }, { "epoch": 6.97, "grad_norm": 0.390625, "learning_rate": 0.0004363784049325391, "loss": 0.2392, "step": 168260 }, { "epoch": 6.97, "grad_norm": 0.6640625, "learning_rate": 0.0004363711765390236, "loss": 0.171, "step": 168270 }, { "epoch": 6.97, "grad_norm": 0.73828125, "learning_rate": 0.0004363639477947776, "loss": 0.1825, "step": 168280 }, { "epoch": 6.97, "grad_norm": 0.58984375, "learning_rate": 0.0004363567186998149, "loss": 0.2419, "step": 168290 }, { "epoch": 6.97, "grad_norm": 0.77734375, "learning_rate": 0.00043634948925414885, "loss": 0.1956, "step": 168300 }, { "epoch": 6.97, "grad_norm": 0.3203125, "learning_rate": 0.00043634225945779324, "loss": 0.2287, "step": 168310 }, { "epoch": 6.97, "grad_norm": 0.640625, "learning_rate": 0.0004363350293107617, "loss": 0.1752, "step": 168320 }, { "epoch": 6.97, "grad_norm": 0.9375, "learning_rate": 0.00043632779881306775, "loss": 0.2241, "step": 168330 }, { "epoch": 6.97, "grad_norm": 0.91015625, "learning_rate": 0.00043632056796472486, "loss": 0.2162, "step": 168340 }, { "epoch": 6.97, "grad_norm": 0.92578125, "learning_rate": 0.00043631333676574693, "loss": 0.2209, "step": 168350 }, { "epoch": 6.97, "grad_norm": 0.8046875, "learning_rate": 0.00043630610521614734, "loss": 0.1966, "step": 168360 }, { "epoch": 6.97, "grad_norm": 0.7890625, "learning_rate": 0.00043629887331593975, "loss": 0.1604, "step": 168370 }, { "epoch": 6.97, "grad_norm": 0.62890625, "learning_rate": 0.0004362916410651379, "loss": 0.1708, "step": 168380 }, { "epoch": 6.97, "grad_norm": 0.421875, "learning_rate": 0.00043628440846375517, "loss": 0.2107, "step": 168390 }, { "epoch": 6.98, "grad_norm": 1.9453125, "learning_rate": 0.0004362771755118054, "loss": 0.2193, "step": 168400 }, { "epoch": 6.98, "grad_norm": 0.4609375, "learning_rate": 0.00043626994220930204, "loss": 0.2341, "step": 168410 }, { "epoch": 6.98, "grad_norm": 0.77734375, "learning_rate": 0.00043626270855625884, "loss": 0.2158, "step": 168420 }, { "epoch": 6.98, "grad_norm": 1.1484375, "learning_rate": 0.00043625547455268933, "loss": 0.2051, "step": 168430 }, { "epoch": 6.98, "grad_norm": 0.1962890625, "learning_rate": 0.000436248240198607, "loss": 0.2024, "step": 168440 }, { "epoch": 6.98, "grad_norm": 0.546875, "learning_rate": 0.0004362410054940258, "loss": 0.2327, "step": 168450 }, { "epoch": 6.98, "grad_norm": 0.1923828125, "learning_rate": 0.00043623377043895895, "loss": 0.1923, "step": 168460 }, { "epoch": 6.98, "grad_norm": 0.69921875, "learning_rate": 0.00043622653503342035, "loss": 0.1638, "step": 168470 }, { "epoch": 6.98, "grad_norm": 0.74609375, "learning_rate": 0.00043621929927742354, "loss": 0.1544, "step": 168480 }, { "epoch": 6.98, "grad_norm": 0.38671875, "learning_rate": 0.00043621206317098207, "loss": 0.2362, "step": 168490 }, { "epoch": 6.98, "grad_norm": 0.76171875, "learning_rate": 0.00043620482671410965, "loss": 0.2167, "step": 168500 }, { "epoch": 6.98, "grad_norm": 1.09375, "learning_rate": 0.0004361975899068198, "loss": 0.2628, "step": 168510 }, { "epoch": 6.98, "grad_norm": 3.109375, "learning_rate": 0.0004361903527491262, "loss": 0.1997, "step": 168520 }, { "epoch": 6.98, "grad_norm": 0.30859375, "learning_rate": 0.0004361831152410425, "loss": 0.2308, "step": 168530 }, { "epoch": 6.98, "grad_norm": 0.7421875, "learning_rate": 0.00043617587738258224, "loss": 0.2531, "step": 168540 }, { "epoch": 6.98, "grad_norm": 1.40625, "learning_rate": 0.00043616863917375913, "loss": 0.1965, "step": 168550 }, { "epoch": 6.98, "grad_norm": 0.80078125, "learning_rate": 0.00043616140061458677, "loss": 0.2366, "step": 168560 }, { "epoch": 6.98, "grad_norm": 0.66015625, "learning_rate": 0.0004361541617050787, "loss": 0.2022, "step": 168570 }, { "epoch": 6.98, "grad_norm": 0.71484375, "learning_rate": 0.0004361469224452486, "loss": 0.2031, "step": 168580 }, { "epoch": 6.98, "grad_norm": 0.44921875, "learning_rate": 0.00043613968283511006, "loss": 0.2626, "step": 168590 }, { "epoch": 6.98, "grad_norm": 0.6015625, "learning_rate": 0.00043613244287467686, "loss": 0.2007, "step": 168600 }, { "epoch": 6.98, "grad_norm": 1.125, "learning_rate": 0.00043612520256396245, "loss": 0.1931, "step": 168610 }, { "epoch": 6.98, "grad_norm": 0.80859375, "learning_rate": 0.00043611796190298047, "loss": 0.2072, "step": 168620 }, { "epoch": 6.98, "grad_norm": 1.6484375, "learning_rate": 0.0004361107208917446, "loss": 0.2285, "step": 168630 }, { "epoch": 6.99, "grad_norm": 0.423828125, "learning_rate": 0.00043610347953026843, "loss": 0.1604, "step": 168640 }, { "epoch": 6.99, "grad_norm": 0.83203125, "learning_rate": 0.00043609623781856564, "loss": 0.2156, "step": 168650 }, { "epoch": 6.99, "grad_norm": 0.62890625, "learning_rate": 0.0004360889957566498, "loss": 0.1765, "step": 168660 }, { "epoch": 6.99, "grad_norm": 0.61328125, "learning_rate": 0.0004360817533445346, "loss": 0.178, "step": 168670 }, { "epoch": 6.99, "grad_norm": 1.3125, "learning_rate": 0.00043607451058223366, "loss": 0.2466, "step": 168680 }, { "epoch": 6.99, "grad_norm": 0.55078125, "learning_rate": 0.00043606726746976053, "loss": 0.2031, "step": 168690 }, { "epoch": 6.99, "grad_norm": 0.61328125, "learning_rate": 0.00043606002400712896, "loss": 0.2632, "step": 168700 }, { "epoch": 6.99, "grad_norm": 0.75, "learning_rate": 0.00043605278019435246, "loss": 0.2416, "step": 168710 }, { "epoch": 6.99, "grad_norm": 0.4375, "learning_rate": 0.0004360455360314447, "loss": 0.2353, "step": 168720 }, { "epoch": 6.99, "grad_norm": 0.431640625, "learning_rate": 0.0004360382915184194, "loss": 0.1901, "step": 168730 }, { "epoch": 6.99, "grad_norm": 0.7421875, "learning_rate": 0.00043603104665529, "loss": 0.2017, "step": 168740 }, { "epoch": 6.99, "grad_norm": 1.0546875, "learning_rate": 0.00043602380144207043, "loss": 0.2235, "step": 168750 }, { "epoch": 6.99, "grad_norm": 0.8359375, "learning_rate": 0.000436016555878774, "loss": 0.1853, "step": 168760 }, { "epoch": 6.99, "grad_norm": 0.5703125, "learning_rate": 0.00043600930996541467, "loss": 0.2288, "step": 168770 }, { "epoch": 6.99, "grad_norm": 0.51171875, "learning_rate": 0.0004360020637020058, "loss": 0.2394, "step": 168780 }, { "epoch": 6.99, "grad_norm": 0.70703125, "learning_rate": 0.00043599481708856113, "loss": 0.2021, "step": 168790 }, { "epoch": 6.99, "grad_norm": 0.83203125, "learning_rate": 0.00043598757012509425, "loss": 0.2258, "step": 168800 }, { "epoch": 6.99, "grad_norm": 0.59765625, "learning_rate": 0.000435980322811619, "loss": 0.2229, "step": 168810 }, { "epoch": 6.99, "grad_norm": 1.0625, "learning_rate": 0.00043597307514814875, "loss": 0.1941, "step": 168820 }, { "epoch": 6.99, "grad_norm": 0.87109375, "learning_rate": 0.00043596582713469723, "loss": 0.217, "step": 168830 }, { "epoch": 6.99, "grad_norm": 0.5390625, "learning_rate": 0.0004359585787712782, "loss": 0.1767, "step": 168840 }, { "epoch": 6.99, "grad_norm": 0.52734375, "learning_rate": 0.00043595133005790523, "loss": 0.1419, "step": 168850 }, { "epoch": 6.99, "grad_norm": 0.96484375, "learning_rate": 0.0004359440809945918, "loss": 0.1329, "step": 168860 }, { "epoch": 6.99, "grad_norm": 1.3046875, "learning_rate": 0.0004359368315813518, "loss": 0.1853, "step": 168870 }, { "epoch": 6.99, "grad_norm": 1.671875, "learning_rate": 0.00043592958181819865, "loss": 0.2236, "step": 168880 }, { "epoch": 7.0, "grad_norm": 0.2314453125, "learning_rate": 0.0004359223317051462, "loss": 0.2161, "step": 168890 }, { "epoch": 7.0, "grad_norm": 0.40625, "learning_rate": 0.00043591508124220805, "loss": 0.1471, "step": 168900 }, { "epoch": 7.0, "grad_norm": 1.0546875, "learning_rate": 0.0004359078304293977, "loss": 0.1561, "step": 168910 }, { "epoch": 7.0, "grad_norm": 0.3515625, "learning_rate": 0.0004359005792667289, "loss": 0.2504, "step": 168920 }, { "epoch": 7.0, "grad_norm": 1.0703125, "learning_rate": 0.0004358933277542153, "loss": 0.2147, "step": 168930 }, { "epoch": 7.0, "grad_norm": 0.7109375, "learning_rate": 0.00043588607589187055, "loss": 0.2467, "step": 168940 }, { "epoch": 7.0, "grad_norm": 0.5625, "learning_rate": 0.0004358788236797082, "loss": 0.2081, "step": 168950 }, { "epoch": 7.0, "grad_norm": 2.21875, "learning_rate": 0.0004358715711177421, "loss": 0.2514, "step": 168960 }, { "epoch": 7.0, "grad_norm": 0.158203125, "learning_rate": 0.0004358643182059857, "loss": 0.1524, "step": 168970 }, { "epoch": 7.0, "grad_norm": 0.609375, "learning_rate": 0.00043585706494445275, "loss": 0.2248, "step": 168980 }, { "epoch": 7.0, "grad_norm": 0.66796875, "learning_rate": 0.00043584981133315683, "loss": 0.2344, "step": 168990 }, { "epoch": 7.0, "grad_norm": 0.66796875, "learning_rate": 0.00043584255737211166, "loss": 0.1869, "step": 169000 }, { "epoch": 7.0, "grad_norm": 0.75390625, "learning_rate": 0.0004358353030613309, "loss": 0.2043, "step": 169010 }, { "epoch": 7.0, "grad_norm": 0.6328125, "learning_rate": 0.00043582804840082813, "loss": 0.1911, "step": 169020 }, { "epoch": 7.0, "grad_norm": 0.8828125, "learning_rate": 0.0004358207933906171, "loss": 0.2083, "step": 169030 }, { "epoch": 7.0, "grad_norm": 0.302734375, "learning_rate": 0.00043581353803071136, "loss": 0.2291, "step": 169040 }, { "epoch": 7.0, "grad_norm": 0.185546875, "learning_rate": 0.00043580628232112455, "loss": 0.2559, "step": 169050 }, { "epoch": 7.0, "grad_norm": 0.33203125, "learning_rate": 0.0004357990262618704, "loss": 0.1638, "step": 169060 }, { "epoch": 7.0, "grad_norm": 0.439453125, "learning_rate": 0.00043579176985296267, "loss": 0.2216, "step": 169070 }, { "epoch": 7.0, "grad_norm": 0.322265625, "learning_rate": 0.00043578451309441476, "loss": 0.2102, "step": 169080 }, { "epoch": 7.0, "grad_norm": 0.85546875, "learning_rate": 0.0004357772559862405, "loss": 0.215, "step": 169090 }, { "epoch": 7.0, "grad_norm": 0.796875, "learning_rate": 0.0004357699985284535, "loss": 0.2094, "step": 169100 }, { "epoch": 7.0, "grad_norm": 1.0, "learning_rate": 0.00043576274072106746, "loss": 0.1783, "step": 169110 }, { "epoch": 7.0, "grad_norm": 0.462890625, "learning_rate": 0.00043575548256409596, "loss": 0.1491, "step": 169120 }, { "epoch": 7.01, "grad_norm": 1.6953125, "learning_rate": 0.00043574822405755275, "loss": 0.2171, "step": 169130 }, { "epoch": 7.01, "grad_norm": 0.76953125, "learning_rate": 0.0004357409652014514, "loss": 0.2583, "step": 169140 }, { "epoch": 7.01, "grad_norm": 0.6640625, "learning_rate": 0.00043573370599580565, "loss": 0.1804, "step": 169150 }, { "epoch": 7.01, "grad_norm": 1.078125, "learning_rate": 0.0004357264464406291, "loss": 0.2085, "step": 169160 }, { "epoch": 7.01, "grad_norm": 0.47265625, "learning_rate": 0.0004357191865359354, "loss": 0.2034, "step": 169170 }, { "epoch": 7.01, "grad_norm": 0.259765625, "learning_rate": 0.0004357119262817383, "loss": 0.1766, "step": 169180 }, { "epoch": 7.01, "grad_norm": 1.0625, "learning_rate": 0.00043570466567805134, "loss": 0.1465, "step": 169190 }, { "epoch": 7.01, "grad_norm": 0.40625, "learning_rate": 0.00043569740472488834, "loss": 0.2185, "step": 169200 }, { "epoch": 7.01, "grad_norm": 0.765625, "learning_rate": 0.00043569014342226286, "loss": 0.2336, "step": 169210 }, { "epoch": 7.01, "grad_norm": 0.61328125, "learning_rate": 0.00043568288177018856, "loss": 0.2121, "step": 169220 }, { "epoch": 7.01, "grad_norm": 0.61328125, "learning_rate": 0.0004356756197686791, "loss": 0.2053, "step": 169230 }, { "epoch": 7.01, "grad_norm": 0.765625, "learning_rate": 0.00043566835741774823, "loss": 0.2319, "step": 169240 }, { "epoch": 7.01, "grad_norm": 1.2890625, "learning_rate": 0.00043566109471740954, "loss": 0.1609, "step": 169250 }, { "epoch": 7.01, "grad_norm": 0.59765625, "learning_rate": 0.0004356538316676767, "loss": 0.1897, "step": 169260 }, { "epoch": 7.01, "grad_norm": 0.322265625, "learning_rate": 0.00043564656826856343, "loss": 0.1825, "step": 169270 }, { "epoch": 7.01, "grad_norm": 0.96875, "learning_rate": 0.00043563930452008334, "loss": 0.258, "step": 169280 }, { "epoch": 7.01, "grad_norm": 0.78125, "learning_rate": 0.0004356320404222501, "loss": 0.1766, "step": 169290 }, { "epoch": 7.01, "grad_norm": 1.4140625, "learning_rate": 0.00043562477597507745, "loss": 0.1797, "step": 169300 }, { "epoch": 7.01, "grad_norm": 0.94921875, "learning_rate": 0.000435617511178579, "loss": 0.233, "step": 169310 }, { "epoch": 7.01, "grad_norm": 0.169921875, "learning_rate": 0.00043561024603276844, "loss": 0.2032, "step": 169320 }, { "epoch": 7.01, "grad_norm": 1.09375, "learning_rate": 0.0004356029805376595, "loss": 0.1748, "step": 169330 }, { "epoch": 7.01, "grad_norm": 0.3046875, "learning_rate": 0.00043559571469326577, "loss": 0.1841, "step": 169340 }, { "epoch": 7.01, "grad_norm": 0.400390625, "learning_rate": 0.0004355884484996009, "loss": 0.2126, "step": 169350 }, { "epoch": 7.01, "grad_norm": 0.671875, "learning_rate": 0.0004355811819566786, "loss": 0.2202, "step": 169360 }, { "epoch": 7.02, "grad_norm": 1.1015625, "learning_rate": 0.0004355739150645126, "loss": 0.2165, "step": 169370 }, { "epoch": 7.02, "grad_norm": 0.74609375, "learning_rate": 0.0004355666478231165, "loss": 0.2326, "step": 169380 }, { "epoch": 7.02, "grad_norm": 0.55859375, "learning_rate": 0.0004355593802325041, "loss": 0.2158, "step": 169390 }, { "epoch": 7.02, "grad_norm": 0.546875, "learning_rate": 0.0004355521122926889, "loss": 0.1802, "step": 169400 }, { "epoch": 7.02, "grad_norm": 0.5859375, "learning_rate": 0.00043554484400368465, "loss": 0.1999, "step": 169410 }, { "epoch": 7.02, "grad_norm": 0.546875, "learning_rate": 0.0004355375753655051, "loss": 0.156, "step": 169420 }, { "epoch": 7.02, "grad_norm": 0.369140625, "learning_rate": 0.00043553030637816383, "loss": 0.1775, "step": 169430 }, { "epoch": 7.02, "grad_norm": 1.0078125, "learning_rate": 0.0004355230370416746, "loss": 0.1793, "step": 169440 }, { "epoch": 7.02, "grad_norm": 0.0, "learning_rate": 0.000435515767356051, "loss": 0.2395, "step": 169450 }, { "epoch": 7.02, "grad_norm": 0.96484375, "learning_rate": 0.0004355084973213068, "loss": 0.2002, "step": 169460 }, { "epoch": 7.02, "grad_norm": 0.44921875, "learning_rate": 0.0004355012269374555, "loss": 0.2032, "step": 169470 }, { "epoch": 7.02, "grad_norm": 0.6484375, "learning_rate": 0.00043549395620451115, "loss": 0.1762, "step": 169480 }, { "epoch": 7.02, "grad_norm": 0.58984375, "learning_rate": 0.0004354866851224871, "loss": 0.2134, "step": 169490 }, { "epoch": 7.02, "grad_norm": 0.9765625, "learning_rate": 0.0004354794136913971, "loss": 0.1981, "step": 169500 }, { "epoch": 7.02, "grad_norm": 0.73046875, "learning_rate": 0.0004354721419112549, "loss": 0.1886, "step": 169510 }, { "epoch": 7.02, "grad_norm": 0.2890625, "learning_rate": 0.0004354648697820742, "loss": 0.2107, "step": 169520 }, { "epoch": 7.02, "grad_norm": 0.92578125, "learning_rate": 0.00043545759730386857, "loss": 0.2279, "step": 169530 }, { "epoch": 7.02, "grad_norm": 0.6328125, "learning_rate": 0.00043545032447665186, "loss": 0.1862, "step": 169540 }, { "epoch": 7.02, "grad_norm": 1.75, "learning_rate": 0.0004354430513004376, "loss": 0.2311, "step": 169550 }, { "epoch": 7.02, "grad_norm": 0.81640625, "learning_rate": 0.0004354357777752396, "loss": 0.1678, "step": 169560 }, { "epoch": 7.02, "grad_norm": 0.63671875, "learning_rate": 0.00043542850390107144, "loss": 0.2253, "step": 169570 }, { "epoch": 7.02, "grad_norm": 0.11083984375, "learning_rate": 0.0004354212296779468, "loss": 0.1781, "step": 169580 }, { "epoch": 7.02, "grad_norm": 0.703125, "learning_rate": 0.00043541395510587953, "loss": 0.2155, "step": 169590 }, { "epoch": 7.02, "grad_norm": 1.484375, "learning_rate": 0.00043540668018488324, "loss": 0.1886, "step": 169600 }, { "epoch": 7.03, "grad_norm": 0.5234375, "learning_rate": 0.00043539940491497156, "loss": 0.2362, "step": 169610 }, { "epoch": 7.03, "grad_norm": 1.109375, "learning_rate": 0.0004353921292961582, "loss": 0.2119, "step": 169620 }, { "epoch": 7.03, "grad_norm": 0.93359375, "learning_rate": 0.00043538485332845686, "loss": 0.2053, "step": 169630 }, { "epoch": 7.03, "grad_norm": 0.76953125, "learning_rate": 0.0004353775770118813, "loss": 0.2167, "step": 169640 }, { "epoch": 7.03, "grad_norm": 0.8125, "learning_rate": 0.0004353703003464451, "loss": 0.1533, "step": 169650 }, { "epoch": 7.03, "grad_norm": 0.71484375, "learning_rate": 0.000435363023332162, "loss": 0.2185, "step": 169660 }, { "epoch": 7.03, "grad_norm": 0.58984375, "learning_rate": 0.0004353557459690458, "loss": 0.2259, "step": 169670 }, { "epoch": 7.03, "grad_norm": 0.5859375, "learning_rate": 0.00043534846825711007, "loss": 0.1809, "step": 169680 }, { "epoch": 7.03, "grad_norm": 0.10009765625, "learning_rate": 0.00043534119019636853, "loss": 0.1844, "step": 169690 }, { "epoch": 7.03, "grad_norm": 0.9609375, "learning_rate": 0.0004353339117868349, "loss": 0.2217, "step": 169700 }, { "epoch": 7.03, "grad_norm": 0.47265625, "learning_rate": 0.0004353266330285228, "loss": 0.1579, "step": 169710 }, { "epoch": 7.03, "grad_norm": 0.47265625, "learning_rate": 0.00043531935392144606, "loss": 0.2004, "step": 169720 }, { "epoch": 7.03, "grad_norm": 1.328125, "learning_rate": 0.0004353120744656183, "loss": 0.1402, "step": 169730 }, { "epoch": 7.03, "grad_norm": 0.7109375, "learning_rate": 0.0004353047946610532, "loss": 0.1735, "step": 169740 }, { "epoch": 7.03, "grad_norm": 0.74609375, "learning_rate": 0.0004352975145077644, "loss": 0.2191, "step": 169750 }, { "epoch": 7.03, "grad_norm": 0.875, "learning_rate": 0.00043529023400576584, "loss": 0.233, "step": 169760 }, { "epoch": 7.03, "grad_norm": 0.62109375, "learning_rate": 0.00043528295315507104, "loss": 0.2011, "step": 169770 }, { "epoch": 7.03, "grad_norm": 1.328125, "learning_rate": 0.00043527567195569367, "loss": 0.1829, "step": 169780 }, { "epoch": 7.03, "grad_norm": 0.67578125, "learning_rate": 0.00043526839040764756, "loss": 0.1734, "step": 169790 }, { "epoch": 7.03, "grad_norm": 0.263671875, "learning_rate": 0.00043526110851094626, "loss": 0.1467, "step": 169800 }, { "epoch": 7.03, "grad_norm": 0.69140625, "learning_rate": 0.0004352538262656036, "loss": 0.1989, "step": 169810 }, { "epoch": 7.03, "grad_norm": 0.5234375, "learning_rate": 0.0004352465436716333, "loss": 0.1969, "step": 169820 }, { "epoch": 7.03, "grad_norm": 0.59765625, "learning_rate": 0.00043523926072904894, "loss": 0.1948, "step": 169830 }, { "epoch": 7.03, "grad_norm": 0.62109375, "learning_rate": 0.0004352319774378644, "loss": 0.2131, "step": 169840 }, { "epoch": 7.04, "grad_norm": 0.73046875, "learning_rate": 0.00043522469379809315, "loss": 0.2136, "step": 169850 }, { "epoch": 7.04, "grad_norm": 0.8203125, "learning_rate": 0.00043521740980974913, "loss": 0.2002, "step": 169860 }, { "epoch": 7.04, "grad_norm": 0.54296875, "learning_rate": 0.0004352101254728459, "loss": 0.2017, "step": 169870 }, { "epoch": 7.04, "grad_norm": 0.88671875, "learning_rate": 0.0004352028407873972, "loss": 0.1922, "step": 169880 }, { "epoch": 7.04, "grad_norm": 0.53125, "learning_rate": 0.00043519555575341674, "loss": 0.2361, "step": 169890 }, { "epoch": 7.04, "grad_norm": 0.57421875, "learning_rate": 0.0004351882703709183, "loss": 0.1976, "step": 169900 }, { "epoch": 7.04, "grad_norm": 0.5390625, "learning_rate": 0.0004351809846399155, "loss": 0.1837, "step": 169910 }, { "epoch": 7.04, "grad_norm": 0.73828125, "learning_rate": 0.0004351736985604221, "loss": 0.2361, "step": 169920 }, { "epoch": 7.04, "grad_norm": 0.439453125, "learning_rate": 0.00043516641213245177, "loss": 0.2259, "step": 169930 }, { "epoch": 7.04, "grad_norm": 0.49609375, "learning_rate": 0.0004351591253560183, "loss": 0.233, "step": 169940 }, { "epoch": 7.04, "grad_norm": 0.7578125, "learning_rate": 0.0004351518382311353, "loss": 0.1561, "step": 169950 }, { "epoch": 7.04, "grad_norm": 0.6875, "learning_rate": 0.00043514455075781657, "loss": 0.2105, "step": 169960 }, { "epoch": 7.04, "grad_norm": 0.78125, "learning_rate": 0.00043513726293607574, "loss": 0.2041, "step": 169970 }, { "epoch": 7.04, "grad_norm": 0.58203125, "learning_rate": 0.00043512997476592664, "loss": 0.1651, "step": 169980 }, { "epoch": 7.04, "grad_norm": 0.4140625, "learning_rate": 0.00043512268624738284, "loss": 0.1928, "step": 169990 }, { "epoch": 7.04, "grad_norm": 0.67578125, "learning_rate": 0.0004351153973804582, "loss": 0.1849, "step": 170000 }, { "epoch": 7.04, "grad_norm": 0.609375, "learning_rate": 0.00043510810816516635, "loss": 0.1931, "step": 170010 }, { "epoch": 7.04, "grad_norm": 0.90234375, "learning_rate": 0.00043510081860152105, "loss": 0.1968, "step": 170020 }, { "epoch": 7.04, "grad_norm": 0.67578125, "learning_rate": 0.000435093528689536, "loss": 0.2402, "step": 170030 }, { "epoch": 7.04, "grad_norm": 0.6171875, "learning_rate": 0.0004350862384292249, "loss": 0.2186, "step": 170040 }, { "epoch": 7.04, "grad_norm": 0.58984375, "learning_rate": 0.0004350789478206015, "loss": 0.1864, "step": 170050 }, { "epoch": 7.04, "grad_norm": 0.67578125, "learning_rate": 0.0004350716568636794, "loss": 0.1974, "step": 170060 }, { "epoch": 7.04, "grad_norm": 0.61328125, "learning_rate": 0.00043506436555847263, "loss": 0.1943, "step": 170070 }, { "epoch": 7.04, "grad_norm": 0.98828125, "learning_rate": 0.00043505707390499457, "loss": 0.2309, "step": 170080 }, { "epoch": 7.05, "grad_norm": 0.87890625, "learning_rate": 0.0004350497819032591, "loss": 0.2022, "step": 170090 }, { "epoch": 7.05, "grad_norm": 0.4609375, "learning_rate": 0.00043504248955328, "loss": 0.1877, "step": 170100 }, { "epoch": 7.05, "grad_norm": 0.96484375, "learning_rate": 0.0004350351968550708, "loss": 0.161, "step": 170110 }, { "epoch": 7.05, "grad_norm": 0.66796875, "learning_rate": 0.00043502790380864543, "loss": 0.1628, "step": 170120 }, { "epoch": 7.05, "grad_norm": 1.8828125, "learning_rate": 0.00043502061041401745, "loss": 0.2034, "step": 170130 }, { "epoch": 7.05, "grad_norm": 0.384765625, "learning_rate": 0.0004350133166712007, "loss": 0.1843, "step": 170140 }, { "epoch": 7.05, "grad_norm": 1.25, "learning_rate": 0.0004350060225802089, "loss": 0.218, "step": 170150 }, { "epoch": 7.05, "grad_norm": 0.65234375, "learning_rate": 0.00043499872814105567, "loss": 0.1965, "step": 170160 }, { "epoch": 7.05, "grad_norm": 0.59765625, "learning_rate": 0.0004349914333537549, "loss": 0.263, "step": 170170 }, { "epoch": 7.05, "grad_norm": 0.6640625, "learning_rate": 0.00043498413821832017, "loss": 0.2237, "step": 170180 }, { "epoch": 7.05, "grad_norm": 0.51953125, "learning_rate": 0.0004349768427347653, "loss": 0.2122, "step": 170190 }, { "epoch": 7.05, "grad_norm": 0.4140625, "learning_rate": 0.00043496954690310397, "loss": 0.1817, "step": 170200 }, { "epoch": 7.05, "grad_norm": 0.2353515625, "learning_rate": 0.00043496225072334996, "loss": 0.1719, "step": 170210 }, { "epoch": 7.05, "grad_norm": 0.62109375, "learning_rate": 0.0004349549541955169, "loss": 0.2468, "step": 170220 }, { "epoch": 7.05, "grad_norm": 1.1953125, "learning_rate": 0.00043494765731961864, "loss": 0.1976, "step": 170230 }, { "epoch": 7.05, "grad_norm": 1.6015625, "learning_rate": 0.00043494036009566887, "loss": 0.1767, "step": 170240 }, { "epoch": 7.05, "grad_norm": 0.439453125, "learning_rate": 0.0004349330625236813, "loss": 0.1929, "step": 170250 }, { "epoch": 7.05, "grad_norm": 0.451171875, "learning_rate": 0.00043492576460366967, "loss": 0.2199, "step": 170260 }, { "epoch": 7.05, "grad_norm": 0.5703125, "learning_rate": 0.0004349184663356477, "loss": 0.21, "step": 170270 }, { "epoch": 7.05, "grad_norm": 0.75, "learning_rate": 0.0004349111677196292, "loss": 0.2441, "step": 170280 }, { "epoch": 7.05, "grad_norm": 0.4453125, "learning_rate": 0.0004349038687556278, "loss": 0.2089, "step": 170290 }, { "epoch": 7.05, "grad_norm": 2.078125, "learning_rate": 0.0004348965694436573, "loss": 0.1836, "step": 170300 }, { "epoch": 7.05, "grad_norm": 0.4296875, "learning_rate": 0.0004348892697837314, "loss": 0.208, "step": 170310 }, { "epoch": 7.05, "grad_norm": 0.314453125, "learning_rate": 0.0004348819697758639, "loss": 0.1849, "step": 170320 }, { "epoch": 7.06, "grad_norm": 0.80859375, "learning_rate": 0.0004348746694200686, "loss": 0.186, "step": 170330 }, { "epoch": 7.06, "grad_norm": 0.71875, "learning_rate": 0.000434867368716359, "loss": 0.2599, "step": 170340 }, { "epoch": 7.06, "grad_norm": 0.216796875, "learning_rate": 0.00043486006766474893, "loss": 0.1596, "step": 170350 }, { "epoch": 7.06, "grad_norm": 0.7421875, "learning_rate": 0.00043485276626525226, "loss": 0.2047, "step": 170360 }, { "epoch": 7.06, "grad_norm": 0.6953125, "learning_rate": 0.00043484546451788266, "loss": 0.1994, "step": 170370 }, { "epoch": 7.06, "grad_norm": 0.90234375, "learning_rate": 0.00043483816242265383, "loss": 0.1886, "step": 170380 }, { "epoch": 7.06, "grad_norm": 1.6640625, "learning_rate": 0.00043483085997957956, "loss": 0.2595, "step": 170390 }, { "epoch": 7.06, "grad_norm": 0.55078125, "learning_rate": 0.00043482355718867354, "loss": 0.2435, "step": 170400 }, { "epoch": 7.06, "grad_norm": 0.59765625, "learning_rate": 0.00043481625404994957, "loss": 0.2033, "step": 170410 }, { "epoch": 7.06, "grad_norm": 0.41796875, "learning_rate": 0.0004348089505634214, "loss": 0.222, "step": 170420 }, { "epoch": 7.06, "grad_norm": 0.515625, "learning_rate": 0.0004348016467291027, "loss": 0.175, "step": 170430 }, { "epoch": 7.06, "grad_norm": 0.98828125, "learning_rate": 0.0004347943425470072, "loss": 0.1568, "step": 170440 }, { "epoch": 7.06, "grad_norm": 1.3984375, "learning_rate": 0.0004347870380171488, "loss": 0.1445, "step": 170450 }, { "epoch": 7.06, "grad_norm": 0.63671875, "learning_rate": 0.0004347797331395411, "loss": 0.2744, "step": 170460 }, { "epoch": 7.06, "grad_norm": 0.64453125, "learning_rate": 0.00043477242791419794, "loss": 0.2655, "step": 170470 }, { "epoch": 7.06, "grad_norm": 0.6484375, "learning_rate": 0.000434765122341133, "loss": 0.195, "step": 170480 }, { "epoch": 7.06, "grad_norm": 0.8046875, "learning_rate": 0.00043475781642036, "loss": 0.1834, "step": 170490 }, { "epoch": 7.06, "grad_norm": 0.4140625, "learning_rate": 0.0004347505101518928, "loss": 0.1991, "step": 170500 }, { "epoch": 7.06, "grad_norm": 0.671875, "learning_rate": 0.00043474320353574503, "loss": 0.1927, "step": 170510 }, { "epoch": 7.06, "grad_norm": 0.703125, "learning_rate": 0.0004347358965719306, "loss": 0.2103, "step": 170520 }, { "epoch": 7.06, "grad_norm": 0.49609375, "learning_rate": 0.0004347285892604631, "loss": 0.2304, "step": 170530 }, { "epoch": 7.06, "grad_norm": 1.8125, "learning_rate": 0.0004347212816013564, "loss": 0.1943, "step": 170540 }, { "epoch": 7.06, "grad_norm": 1.3671875, "learning_rate": 0.00043471397359462407, "loss": 0.2138, "step": 170550 }, { "epoch": 7.06, "grad_norm": 1.375, "learning_rate": 0.0004347066652402801, "loss": 0.1893, "step": 170560 }, { "epoch": 7.06, "grad_norm": 0.625, "learning_rate": 0.00043469935653833817, "loss": 0.2492, "step": 170570 }, { "epoch": 7.07, "grad_norm": 1.0, "learning_rate": 0.00043469204748881185, "loss": 0.2263, "step": 170580 }, { "epoch": 7.07, "grad_norm": 0.578125, "learning_rate": 0.00043468473809171515, "loss": 0.1607, "step": 170590 }, { "epoch": 7.07, "grad_norm": 0.546875, "learning_rate": 0.0004346774283470617, "loss": 0.1933, "step": 170600 }, { "epoch": 7.07, "grad_norm": 0.96484375, "learning_rate": 0.0004346701182548652, "loss": 0.2467, "step": 170610 }, { "epoch": 7.07, "grad_norm": 0.70703125, "learning_rate": 0.00043466280781513954, "loss": 0.2447, "step": 170620 }, { "epoch": 7.07, "grad_norm": 0.9140625, "learning_rate": 0.00043465549702789843, "loss": 0.1429, "step": 170630 }, { "epoch": 7.07, "grad_norm": 0.380859375, "learning_rate": 0.0004346481858931556, "loss": 0.21, "step": 170640 }, { "epoch": 7.07, "grad_norm": 0.2578125, "learning_rate": 0.0004346408744109249, "loss": 0.2371, "step": 170650 }, { "epoch": 7.07, "grad_norm": 0.76953125, "learning_rate": 0.00043463356258121986, "loss": 0.1933, "step": 170660 }, { "epoch": 7.07, "grad_norm": 0.7578125, "learning_rate": 0.0004346262504040545, "loss": 0.2353, "step": 170670 }, { "epoch": 7.07, "grad_norm": 0.8046875, "learning_rate": 0.00043461893787944246, "loss": 0.2137, "step": 170680 }, { "epoch": 7.07, "grad_norm": 0.58984375, "learning_rate": 0.0004346116250073975, "loss": 0.1933, "step": 170690 }, { "epoch": 7.07, "grad_norm": 0.6640625, "learning_rate": 0.0004346043117879334, "loss": 0.1902, "step": 170700 }, { "epoch": 7.07, "grad_norm": 0.8984375, "learning_rate": 0.00043459699822106395, "loss": 0.1502, "step": 170710 }, { "epoch": 7.07, "grad_norm": 0.42578125, "learning_rate": 0.0004345896843068029, "loss": 0.1708, "step": 170720 }, { "epoch": 7.07, "grad_norm": 0.5, "learning_rate": 0.0004345823700451639, "loss": 0.2009, "step": 170730 }, { "epoch": 7.07, "grad_norm": 1.53125, "learning_rate": 0.0004345750554361609, "loss": 0.1835, "step": 170740 }, { "epoch": 7.07, "grad_norm": 0.4765625, "learning_rate": 0.00043456774047980753, "loss": 0.1949, "step": 170750 }, { "epoch": 7.07, "grad_norm": 0.65234375, "learning_rate": 0.0004345604251761176, "loss": 0.1666, "step": 170760 }, { "epoch": 7.07, "grad_norm": 1.0078125, "learning_rate": 0.000434553109525105, "loss": 0.2272, "step": 170770 }, { "epoch": 7.07, "grad_norm": 0.515625, "learning_rate": 0.00043454579352678323, "loss": 0.1836, "step": 170780 }, { "epoch": 7.07, "grad_norm": 0.71875, "learning_rate": 0.0004345384771811662, "loss": 0.1823, "step": 170790 }, { "epoch": 7.07, "grad_norm": 0.8125, "learning_rate": 0.0004345311604882678, "loss": 0.1803, "step": 170800 }, { "epoch": 7.07, "grad_norm": 1.0078125, "learning_rate": 0.00043452384344810165, "loss": 0.2293, "step": 170810 }, { "epoch": 7.08, "grad_norm": 0.828125, "learning_rate": 0.0004345165260606815, "loss": 0.1959, "step": 170820 }, { "epoch": 7.08, "grad_norm": 0.0, "learning_rate": 0.0004345092083260212, "loss": 0.175, "step": 170830 }, { "epoch": 7.08, "grad_norm": 0.6953125, "learning_rate": 0.0004345018902441345, "loss": 0.2244, "step": 170840 }, { "epoch": 7.08, "grad_norm": 0.99609375, "learning_rate": 0.00043449457181503516, "loss": 0.2443, "step": 170850 }, { "epoch": 7.08, "grad_norm": 1.1328125, "learning_rate": 0.000434487253038737, "loss": 0.2298, "step": 170860 }, { "epoch": 7.08, "grad_norm": 0.92578125, "learning_rate": 0.0004344799339152537, "loss": 0.1868, "step": 170870 }, { "epoch": 7.08, "grad_norm": 1.3984375, "learning_rate": 0.0004344726144445991, "loss": 0.217, "step": 170880 }, { "epoch": 7.08, "grad_norm": 0.5078125, "learning_rate": 0.000434465294626787, "loss": 0.2025, "step": 170890 }, { "epoch": 7.08, "grad_norm": 2.984375, "learning_rate": 0.0004344579744618311, "loss": 0.1948, "step": 170900 }, { "epoch": 7.08, "grad_norm": 0.8671875, "learning_rate": 0.0004344506539497451, "loss": 0.2268, "step": 170910 }, { "epoch": 7.08, "grad_norm": 0.9140625, "learning_rate": 0.00043444333309054306, "loss": 0.2381, "step": 170920 }, { "epoch": 7.08, "grad_norm": 1.15625, "learning_rate": 0.00043443601188423856, "loss": 0.2284, "step": 170930 }, { "epoch": 7.08, "grad_norm": 1.0703125, "learning_rate": 0.0004344286903308454, "loss": 0.2124, "step": 170940 }, { "epoch": 7.08, "grad_norm": 0.59765625, "learning_rate": 0.0004344213684303773, "loss": 0.1741, "step": 170950 }, { "epoch": 7.08, "grad_norm": 0.45703125, "learning_rate": 0.0004344140461828481, "loss": 0.1737, "step": 170960 }, { "epoch": 7.08, "grad_norm": 0.60546875, "learning_rate": 0.00043440672358827163, "loss": 0.2098, "step": 170970 }, { "epoch": 7.08, "grad_norm": 0.65625, "learning_rate": 0.0004343994006466616, "loss": 0.2087, "step": 170980 }, { "epoch": 7.08, "grad_norm": 0.3671875, "learning_rate": 0.00043439207735803184, "loss": 0.1536, "step": 170990 }, { "epoch": 7.08, "grad_norm": 0.79296875, "learning_rate": 0.000434384753722396, "loss": 0.179, "step": 171000 }, { "epoch": 7.08, "grad_norm": 0.87890625, "learning_rate": 0.00043437742973976814, "loss": 0.2127, "step": 171010 }, { "epoch": 7.08, "grad_norm": 0.55859375, "learning_rate": 0.0004343701054101618, "loss": 0.2208, "step": 171020 }, { "epoch": 7.08, "grad_norm": 0.2275390625, "learning_rate": 0.0004343627807335907, "loss": 0.1962, "step": 171030 }, { "epoch": 7.08, "grad_norm": 1.8828125, "learning_rate": 0.0004343554557100689, "loss": 0.2284, "step": 171040 }, { "epoch": 7.08, "grad_norm": 0.59375, "learning_rate": 0.00043434813033961, "loss": 0.1813, "step": 171050 }, { "epoch": 7.09, "grad_norm": 0.6640625, "learning_rate": 0.0004343408046222278, "loss": 0.2895, "step": 171060 }, { "epoch": 7.09, "grad_norm": 0.58203125, "learning_rate": 0.00043433347855793613, "loss": 0.1952, "step": 171070 }, { "epoch": 7.09, "grad_norm": 0.0, "learning_rate": 0.00043432615214674883, "loss": 0.2261, "step": 171080 }, { "epoch": 7.09, "grad_norm": 0.3203125, "learning_rate": 0.0004343188253886795, "loss": 0.1887, "step": 171090 }, { "epoch": 7.09, "grad_norm": 0.361328125, "learning_rate": 0.00043431149828374217, "loss": 0.2263, "step": 171100 }, { "epoch": 7.09, "grad_norm": 0.65625, "learning_rate": 0.0004343041708319504, "loss": 0.1928, "step": 171110 }, { "epoch": 7.09, "grad_norm": 0.53125, "learning_rate": 0.0004342968430333181, "loss": 0.1834, "step": 171120 }, { "epoch": 7.09, "grad_norm": 0.7734375, "learning_rate": 0.00043428951488785905, "loss": 0.1815, "step": 171130 }, { "epoch": 7.09, "grad_norm": 1.390625, "learning_rate": 0.0004342821863955871, "loss": 0.1949, "step": 171140 }, { "epoch": 7.09, "grad_norm": 0.5859375, "learning_rate": 0.0004342748575565159, "loss": 0.1997, "step": 171150 }, { "epoch": 7.09, "grad_norm": 0.796875, "learning_rate": 0.0004342675283706593, "loss": 0.2171, "step": 171160 }, { "epoch": 7.09, "grad_norm": 0.392578125, "learning_rate": 0.0004342601988380311, "loss": 0.1794, "step": 171170 }, { "epoch": 7.09, "grad_norm": 0.6484375, "learning_rate": 0.00043425286895864526, "loss": 0.2096, "step": 171180 }, { "epoch": 7.09, "grad_norm": 0.87109375, "learning_rate": 0.0004342455387325153, "loss": 0.2169, "step": 171190 }, { "epoch": 7.09, "grad_norm": 0.59375, "learning_rate": 0.00043423820815965513, "loss": 0.1766, "step": 171200 }, { "epoch": 7.09, "grad_norm": 0.466796875, "learning_rate": 0.0004342308772400786, "loss": 0.1918, "step": 171210 }, { "epoch": 7.09, "grad_norm": 1.328125, "learning_rate": 0.00043422354597379945, "loss": 0.2078, "step": 171220 }, { "epoch": 7.09, "grad_norm": 1.90625, "learning_rate": 0.00043421621436083135, "loss": 0.183, "step": 171230 }, { "epoch": 7.09, "grad_norm": 0.609375, "learning_rate": 0.0004342088824011884, "loss": 0.2028, "step": 171240 }, { "epoch": 7.09, "grad_norm": 1.078125, "learning_rate": 0.00043420155009488414, "loss": 0.1794, "step": 171250 }, { "epoch": 7.09, "grad_norm": 0.52734375, "learning_rate": 0.00043419421744193246, "loss": 0.174, "step": 171260 }, { "epoch": 7.09, "grad_norm": 1.328125, "learning_rate": 0.00043418688444234714, "loss": 0.2182, "step": 171270 }, { "epoch": 7.09, "grad_norm": 0.484375, "learning_rate": 0.00043417955109614206, "loss": 0.156, "step": 171280 }, { "epoch": 7.09, "grad_norm": 0.828125, "learning_rate": 0.0004341722174033309, "loss": 0.2277, "step": 171290 }, { "epoch": 7.1, "grad_norm": 0.6953125, "learning_rate": 0.0004341648833639276, "loss": 0.2338, "step": 171300 }, { "epoch": 7.1, "grad_norm": 0.72265625, "learning_rate": 0.00043415754897794574, "loss": 0.1862, "step": 171310 }, { "epoch": 7.1, "grad_norm": 0.56640625, "learning_rate": 0.00043415021424539937, "loss": 0.1915, "step": 171320 }, { "epoch": 7.1, "grad_norm": 0.486328125, "learning_rate": 0.0004341428791663021, "loss": 0.2059, "step": 171330 }, { "epoch": 7.1, "grad_norm": 0.41796875, "learning_rate": 0.0004341355437406678, "loss": 0.2243, "step": 171340 }, { "epoch": 7.1, "grad_norm": 0.53125, "learning_rate": 0.0004341282079685104, "loss": 0.183, "step": 171350 }, { "epoch": 7.1, "grad_norm": 0.96875, "learning_rate": 0.0004341208718498435, "loss": 0.195, "step": 171360 }, { "epoch": 7.1, "grad_norm": 0.77734375, "learning_rate": 0.00043411353538468106, "loss": 0.1653, "step": 171370 }, { "epoch": 7.1, "grad_norm": 0.85546875, "learning_rate": 0.0004341061985730368, "loss": 0.1927, "step": 171380 }, { "epoch": 7.1, "grad_norm": 0.6640625, "learning_rate": 0.0004340988614149246, "loss": 0.2483, "step": 171390 }, { "epoch": 7.1, "grad_norm": 0.384765625, "learning_rate": 0.00043409152391035824, "loss": 0.2686, "step": 171400 }, { "epoch": 7.1, "grad_norm": 0.4921875, "learning_rate": 0.0004340841860593514, "loss": 0.2353, "step": 171410 }, { "epoch": 7.1, "grad_norm": 1.046875, "learning_rate": 0.0004340768478619181, "loss": 0.1824, "step": 171420 }, { "epoch": 7.1, "grad_norm": 1.125, "learning_rate": 0.00043406950931807196, "loss": 0.2357, "step": 171430 }, { "epoch": 7.1, "grad_norm": 0.54296875, "learning_rate": 0.00043406217042782705, "loss": 0.184, "step": 171440 }, { "epoch": 7.1, "grad_norm": 0.7734375, "learning_rate": 0.00043405483119119683, "loss": 0.1736, "step": 171450 }, { "epoch": 7.1, "grad_norm": 0.3984375, "learning_rate": 0.00043404749160819535, "loss": 0.2105, "step": 171460 }, { "epoch": 7.1, "grad_norm": 0.57421875, "learning_rate": 0.00043404015167883644, "loss": 0.2237, "step": 171470 }, { "epoch": 7.1, "grad_norm": 0.859375, "learning_rate": 0.00043403281140313375, "loss": 0.2047, "step": 171480 }, { "epoch": 7.1, "grad_norm": 0.36328125, "learning_rate": 0.0004340254707811012, "loss": 0.2277, "step": 171490 }, { "epoch": 7.1, "grad_norm": 0.8671875, "learning_rate": 0.0004340181298127526, "loss": 0.217, "step": 171500 }, { "epoch": 7.1, "grad_norm": 1.609375, "learning_rate": 0.00043401078849810175, "loss": 0.236, "step": 171510 }, { "epoch": 7.1, "grad_norm": 1.0859375, "learning_rate": 0.0004340034468371625, "loss": 0.222, "step": 171520 }, { "epoch": 7.1, "grad_norm": 0.92578125, "learning_rate": 0.0004339961048299486, "loss": 0.2042, "step": 171530 }, { "epoch": 7.11, "grad_norm": 1.3828125, "learning_rate": 0.0004339887624764739, "loss": 0.2124, "step": 171540 }, { "epoch": 7.11, "grad_norm": 1.1640625, "learning_rate": 0.0004339814197767522, "loss": 0.1649, "step": 171550 }, { "epoch": 7.11, "grad_norm": 0.76953125, "learning_rate": 0.00043397407673079734, "loss": 0.1897, "step": 171560 }, { "epoch": 7.11, "grad_norm": 0.30078125, "learning_rate": 0.00043396673333862326, "loss": 0.2091, "step": 171570 }, { "epoch": 7.11, "grad_norm": 0.99609375, "learning_rate": 0.0004339593896002435, "loss": 0.1932, "step": 171580 }, { "epoch": 7.11, "grad_norm": 0.6328125, "learning_rate": 0.0004339520455156721, "loss": 0.2216, "step": 171590 }, { "epoch": 7.11, "grad_norm": 0.8828125, "learning_rate": 0.0004339447010849228, "loss": 0.1838, "step": 171600 }, { "epoch": 7.11, "grad_norm": 0.419921875, "learning_rate": 0.0004339373563080095, "loss": 0.2025, "step": 171610 }, { "epoch": 7.11, "grad_norm": 0.365234375, "learning_rate": 0.0004339300111849458, "loss": 0.2516, "step": 171620 }, { "epoch": 7.11, "grad_norm": 1.1640625, "learning_rate": 0.00043392266571574576, "loss": 0.1863, "step": 171630 }, { "epoch": 7.11, "grad_norm": 0.34765625, "learning_rate": 0.0004339153199004232, "loss": 0.2472, "step": 171640 }, { "epoch": 7.11, "grad_norm": 0.69921875, "learning_rate": 0.0004339079737389918, "loss": 0.1954, "step": 171650 }, { "epoch": 7.11, "grad_norm": 0.94921875, "learning_rate": 0.00043390062723146547, "loss": 0.2021, "step": 171660 }, { "epoch": 7.11, "grad_norm": 0.6484375, "learning_rate": 0.000433893280377858, "loss": 0.1811, "step": 171670 }, { "epoch": 7.11, "grad_norm": 0.8046875, "learning_rate": 0.0004338859331781833, "loss": 0.2162, "step": 171680 }, { "epoch": 7.11, "grad_norm": 0.462890625, "learning_rate": 0.0004338785856324551, "loss": 0.1805, "step": 171690 }, { "epoch": 7.11, "grad_norm": 0.91015625, "learning_rate": 0.00043387123774068717, "loss": 0.1817, "step": 171700 }, { "epoch": 7.11, "grad_norm": 0.6796875, "learning_rate": 0.0004338638895028935, "loss": 0.1676, "step": 171710 }, { "epoch": 7.11, "grad_norm": 0.376953125, "learning_rate": 0.0004338565409190879, "loss": 0.2077, "step": 171720 }, { "epoch": 7.11, "grad_norm": 0.74609375, "learning_rate": 0.00043384919198928405, "loss": 0.1503, "step": 171730 }, { "epoch": 7.11, "grad_norm": 0.55859375, "learning_rate": 0.0004338418427134959, "loss": 0.1699, "step": 171740 }, { "epoch": 7.11, "grad_norm": 0.66796875, "learning_rate": 0.0004338344930917373, "loss": 0.197, "step": 171750 }, { "epoch": 7.11, "grad_norm": 0.83984375, "learning_rate": 0.00043382714312402193, "loss": 0.198, "step": 171760 }, { "epoch": 7.11, "grad_norm": 0.52734375, "learning_rate": 0.00043381979281036386, "loss": 0.1631, "step": 171770 }, { "epoch": 7.12, "grad_norm": 0.8125, "learning_rate": 0.00043381244215077677, "loss": 0.2317, "step": 171780 }, { "epoch": 7.12, "grad_norm": 0.71875, "learning_rate": 0.00043380509114527443, "loss": 0.1951, "step": 171790 }, { "epoch": 7.12, "grad_norm": 1.4453125, "learning_rate": 0.00043379773979387084, "loss": 0.1831, "step": 171800 }, { "epoch": 7.12, "grad_norm": 0.98828125, "learning_rate": 0.00043379038809657965, "loss": 0.2449, "step": 171810 }, { "epoch": 7.12, "grad_norm": 0.52734375, "learning_rate": 0.00043378303605341484, "loss": 0.2057, "step": 171820 }, { "epoch": 7.12, "grad_norm": 1.6796875, "learning_rate": 0.00043377568366439025, "loss": 0.1993, "step": 171830 }, { "epoch": 7.12, "grad_norm": 1.3984375, "learning_rate": 0.0004337683309295197, "loss": 0.1989, "step": 171840 }, { "epoch": 7.12, "grad_norm": 1.1015625, "learning_rate": 0.00043376097784881694, "loss": 0.2416, "step": 171850 }, { "epoch": 7.12, "grad_norm": 0.427734375, "learning_rate": 0.0004337536244222958, "loss": 0.1648, "step": 171860 }, { "epoch": 7.12, "grad_norm": 1.8984375, "learning_rate": 0.0004337462706499703, "loss": 0.1633, "step": 171870 }, { "epoch": 7.12, "grad_norm": 0.72265625, "learning_rate": 0.0004337389165318541, "loss": 0.1952, "step": 171880 }, { "epoch": 7.12, "grad_norm": 0.8515625, "learning_rate": 0.00043373156206796116, "loss": 0.1484, "step": 171890 }, { "epoch": 7.12, "grad_norm": 0.55078125, "learning_rate": 0.0004337242072583052, "loss": 0.1872, "step": 171900 }, { "epoch": 7.12, "grad_norm": 0.92578125, "learning_rate": 0.00043371685210290013, "loss": 0.2079, "step": 171910 }, { "epoch": 7.12, "grad_norm": 0.48828125, "learning_rate": 0.0004337094966017597, "loss": 0.213, "step": 171920 }, { "epoch": 7.12, "grad_norm": 1.2734375, "learning_rate": 0.000433702140754898, "loss": 0.1702, "step": 171930 }, { "epoch": 7.12, "grad_norm": 0.263671875, "learning_rate": 0.00043369478456232866, "loss": 0.2103, "step": 171940 }, { "epoch": 7.12, "grad_norm": 1.140625, "learning_rate": 0.00043368742802406545, "loss": 0.1912, "step": 171950 }, { "epoch": 7.12, "grad_norm": 0.012451171875, "learning_rate": 0.00043368007114012253, "loss": 0.1706, "step": 171960 }, { "epoch": 7.12, "grad_norm": 0.6015625, "learning_rate": 0.00043367271391051346, "loss": 0.2489, "step": 171970 }, { "epoch": 7.12, "grad_norm": 0.33984375, "learning_rate": 0.0004336653563352522, "loss": 0.2208, "step": 171980 }, { "epoch": 7.12, "grad_norm": 0.97265625, "learning_rate": 0.0004336579984143525, "loss": 0.2099, "step": 171990 }, { "epoch": 7.12, "grad_norm": 0.302734375, "learning_rate": 0.0004336506401478283, "loss": 0.2394, "step": 172000 }, { "epoch": 7.12, "grad_norm": 0.51953125, "learning_rate": 0.0004336432815356934, "loss": 0.2285, "step": 172010 }, { "epoch": 7.13, "grad_norm": 0.6640625, "learning_rate": 0.0004336359225779618, "loss": 0.2076, "step": 172020 }, { "epoch": 7.13, "grad_norm": 0.9921875, "learning_rate": 0.00043362856327464717, "loss": 0.2211, "step": 172030 }, { "epoch": 7.13, "grad_norm": 0.67578125, "learning_rate": 0.0004336212036257634, "loss": 0.2146, "step": 172040 }, { "epoch": 7.13, "grad_norm": 1.15625, "learning_rate": 0.0004336138436313243, "loss": 0.1684, "step": 172050 }, { "epoch": 7.13, "grad_norm": 0.65625, "learning_rate": 0.0004336064832913439, "loss": 0.1876, "step": 172060 }, { "epoch": 7.13, "grad_norm": 0.9453125, "learning_rate": 0.00043359912260583586, "loss": 0.1567, "step": 172070 }, { "epoch": 7.13, "grad_norm": 0.5546875, "learning_rate": 0.00043359176157481405, "loss": 0.2345, "step": 172080 }, { "epoch": 7.13, "grad_norm": 0.2353515625, "learning_rate": 0.0004335844001982924, "loss": 0.1641, "step": 172090 }, { "epoch": 7.13, "grad_norm": 0.8515625, "learning_rate": 0.00043357703847628475, "loss": 0.2365, "step": 172100 }, { "epoch": 7.13, "grad_norm": 0.451171875, "learning_rate": 0.00043356967640880497, "loss": 0.209, "step": 172110 }, { "epoch": 7.13, "grad_norm": 1.4140625, "learning_rate": 0.00043356231399586687, "loss": 0.2124, "step": 172120 }, { "epoch": 7.13, "grad_norm": 1.1796875, "learning_rate": 0.0004335549512374843, "loss": 0.2058, "step": 172130 }, { "epoch": 7.13, "grad_norm": 0.5234375, "learning_rate": 0.0004335475881336711, "loss": 0.2062, "step": 172140 }, { "epoch": 7.13, "grad_norm": 0.412109375, "learning_rate": 0.0004335402246844412, "loss": 0.167, "step": 172150 }, { "epoch": 7.13, "grad_norm": 0.96875, "learning_rate": 0.0004335328608898084, "loss": 0.2352, "step": 172160 }, { "epoch": 7.13, "grad_norm": 1.5859375, "learning_rate": 0.0004335254967497866, "loss": 0.2353, "step": 172170 }, { "epoch": 7.13, "grad_norm": 0.5859375, "learning_rate": 0.00043351813226438963, "loss": 0.1644, "step": 172180 }, { "epoch": 7.13, "grad_norm": 0.6875, "learning_rate": 0.0004335107674336313, "loss": 0.2008, "step": 172190 }, { "epoch": 7.13, "grad_norm": 0.87109375, "learning_rate": 0.0004335034022575256, "loss": 0.2302, "step": 172200 }, { "epoch": 7.13, "grad_norm": 1.171875, "learning_rate": 0.0004334960367360863, "loss": 0.2107, "step": 172210 }, { "epoch": 7.13, "grad_norm": 0.7265625, "learning_rate": 0.0004334886708693272, "loss": 0.2107, "step": 172220 }, { "epoch": 7.13, "grad_norm": 1.5234375, "learning_rate": 0.0004334813046572623, "loss": 0.1705, "step": 172230 }, { "epoch": 7.13, "grad_norm": 0.8359375, "learning_rate": 0.0004334739380999054, "loss": 0.167, "step": 172240 }, { "epoch": 7.13, "grad_norm": 1.015625, "learning_rate": 0.0004334665711972704, "loss": 0.1953, "step": 172250 }, { "epoch": 7.13, "grad_norm": 1.015625, "learning_rate": 0.00043345920394937103, "loss": 0.1698, "step": 172260 }, { "epoch": 7.14, "grad_norm": 0.9453125, "learning_rate": 0.0004334518363562213, "loss": 0.2039, "step": 172270 }, { "epoch": 7.14, "grad_norm": 0.328125, "learning_rate": 0.000433444468417835, "loss": 0.249, "step": 172280 }, { "epoch": 7.14, "grad_norm": 0.8828125, "learning_rate": 0.0004334371001342261, "loss": 0.1889, "step": 172290 }, { "epoch": 7.14, "grad_norm": 1.3046875, "learning_rate": 0.0004334297315054083, "loss": 0.2212, "step": 172300 }, { "epoch": 7.14, "grad_norm": 0.75, "learning_rate": 0.00043342236253139557, "loss": 0.1852, "step": 172310 }, { "epoch": 7.14, "grad_norm": 0.83203125, "learning_rate": 0.0004334149932122018, "loss": 0.2069, "step": 172320 }, { "epoch": 7.14, "grad_norm": 1.140625, "learning_rate": 0.0004334076235478408, "loss": 0.2024, "step": 172330 }, { "epoch": 7.14, "grad_norm": 0.90625, "learning_rate": 0.0004334002535383265, "loss": 0.2448, "step": 172340 }, { "epoch": 7.14, "grad_norm": 0.458984375, "learning_rate": 0.0004333928831836726, "loss": 0.1999, "step": 172350 }, { "epoch": 7.14, "grad_norm": 0.59765625, "learning_rate": 0.00043338551248389327, "loss": 0.2145, "step": 172360 }, { "epoch": 7.14, "grad_norm": 0.625, "learning_rate": 0.0004333781414390021, "loss": 0.2035, "step": 172370 }, { "epoch": 7.14, "grad_norm": 0.578125, "learning_rate": 0.00043337077004901303, "loss": 0.2078, "step": 172380 }, { "epoch": 7.14, "grad_norm": 0.80078125, "learning_rate": 0.0004333633983139401, "loss": 0.1867, "step": 172390 }, { "epoch": 7.14, "grad_norm": 0.9609375, "learning_rate": 0.000433356026233797, "loss": 0.23, "step": 172400 }, { "epoch": 7.14, "grad_norm": 0.64453125, "learning_rate": 0.00043334865380859766, "loss": 0.2088, "step": 172410 }, { "epoch": 7.14, "grad_norm": 0.40625, "learning_rate": 0.0004333412810383559, "loss": 0.1801, "step": 172420 }, { "epoch": 7.14, "grad_norm": 0.609375, "learning_rate": 0.00043333390792308566, "loss": 0.2108, "step": 172430 }, { "epoch": 7.14, "grad_norm": 1.015625, "learning_rate": 0.0004333265344628009, "loss": 0.1882, "step": 172440 }, { "epoch": 7.14, "grad_norm": 0.169921875, "learning_rate": 0.00043331916065751533, "loss": 0.1893, "step": 172450 }, { "epoch": 7.14, "grad_norm": 0.7734375, "learning_rate": 0.0004333117865072429, "loss": 0.196, "step": 172460 }, { "epoch": 7.14, "grad_norm": 0.60546875, "learning_rate": 0.00043330441201199746, "loss": 0.1982, "step": 172470 }, { "epoch": 7.14, "grad_norm": 1.2265625, "learning_rate": 0.0004332970371717929, "loss": 0.1747, "step": 172480 }, { "epoch": 7.14, "grad_norm": 0.83984375, "learning_rate": 0.0004332896619866432, "loss": 0.1899, "step": 172490 }, { "epoch": 7.14, "grad_norm": 0.6796875, "learning_rate": 0.0004332822864565621, "loss": 0.2498, "step": 172500 }, { "epoch": 7.15, "grad_norm": 1.703125, "learning_rate": 0.00043327491058156353, "loss": 0.196, "step": 172510 }, { "epoch": 7.15, "grad_norm": 1.046875, "learning_rate": 0.00043326753436166135, "loss": 0.187, "step": 172520 }, { "epoch": 7.15, "grad_norm": 0.2578125, "learning_rate": 0.0004332601577968695, "loss": 0.1751, "step": 172530 }, { "epoch": 7.15, "grad_norm": 0.453125, "learning_rate": 0.00043325278088720176, "loss": 0.1854, "step": 172540 }, { "epoch": 7.15, "grad_norm": 1.2578125, "learning_rate": 0.00043324540363267217, "loss": 0.2157, "step": 172550 }, { "epoch": 7.15, "grad_norm": 0.6875, "learning_rate": 0.0004332380260332944, "loss": 0.2119, "step": 172560 }, { "epoch": 7.15, "grad_norm": 0.8828125, "learning_rate": 0.00043323064808908256, "loss": 0.162, "step": 172570 }, { "epoch": 7.15, "grad_norm": 0.49609375, "learning_rate": 0.00043322326980005046, "loss": 0.2304, "step": 172580 }, { "epoch": 7.15, "grad_norm": 0.828125, "learning_rate": 0.0004332158911662119, "loss": 0.2304, "step": 172590 }, { "epoch": 7.15, "grad_norm": 0.60546875, "learning_rate": 0.00043320851218758074, "loss": 0.2003, "step": 172600 }, { "epoch": 7.15, "grad_norm": 0.5859375, "learning_rate": 0.00043320113286417103, "loss": 0.2, "step": 172610 }, { "epoch": 7.15, "grad_norm": 0.8359375, "learning_rate": 0.0004331937531959965, "loss": 0.2147, "step": 172620 }, { "epoch": 7.15, "grad_norm": 0.6484375, "learning_rate": 0.0004331863731830712, "loss": 0.2057, "step": 172630 }, { "epoch": 7.15, "grad_norm": 0.5625, "learning_rate": 0.00043317899282540885, "loss": 0.2231, "step": 172640 }, { "epoch": 7.15, "grad_norm": 1.0859375, "learning_rate": 0.0004331716121230235, "loss": 0.2055, "step": 172650 }, { "epoch": 7.15, "grad_norm": 0.50390625, "learning_rate": 0.00043316423107592883, "loss": 0.2262, "step": 172660 }, { "epoch": 7.15, "grad_norm": 0.4375, "learning_rate": 0.00043315684968413894, "loss": 0.1905, "step": 172670 }, { "epoch": 7.15, "grad_norm": 1.1484375, "learning_rate": 0.0004331494679476676, "loss": 0.258, "step": 172680 }, { "epoch": 7.15, "grad_norm": 3.484375, "learning_rate": 0.0004331420858665288, "loss": 0.2307, "step": 172690 }, { "epoch": 7.15, "grad_norm": 0.640625, "learning_rate": 0.00043313470344073633, "loss": 0.2146, "step": 172700 }, { "epoch": 7.15, "grad_norm": 1.25, "learning_rate": 0.00043312732067030417, "loss": 0.2089, "step": 172710 }, { "epoch": 7.15, "grad_norm": 0.66796875, "learning_rate": 0.00043311993755524615, "loss": 0.24, "step": 172720 }, { "epoch": 7.15, "grad_norm": 0.9296875, "learning_rate": 0.00043311255409557615, "loss": 0.1981, "step": 172730 }, { "epoch": 7.15, "grad_norm": 1.1640625, "learning_rate": 0.00043310517029130816, "loss": 0.2006, "step": 172740 }, { "epoch": 7.16, "grad_norm": 0.22265625, "learning_rate": 0.000433097786142456, "loss": 0.2014, "step": 172750 }, { "epoch": 7.16, "grad_norm": 0.4453125, "learning_rate": 0.0004330904016490335, "loss": 0.2112, "step": 172760 }, { "epoch": 7.16, "grad_norm": 0.296875, "learning_rate": 0.0004330830168110547, "loss": 0.1551, "step": 172770 }, { "epoch": 7.16, "grad_norm": 0.251953125, "learning_rate": 0.0004330756316285335, "loss": 0.2198, "step": 172780 }, { "epoch": 7.16, "grad_norm": 0.94140625, "learning_rate": 0.00043306824610148353, "loss": 0.2092, "step": 172790 }, { "epoch": 7.16, "grad_norm": 0.43359375, "learning_rate": 0.00043306086022991907, "loss": 0.199, "step": 172800 }, { "epoch": 7.16, "grad_norm": 0.390625, "learning_rate": 0.00043305347401385375, "loss": 0.2137, "step": 172810 }, { "epoch": 7.16, "grad_norm": 1.4921875, "learning_rate": 0.0004330460874533016, "loss": 0.206, "step": 172820 }, { "epoch": 7.16, "grad_norm": 0.7109375, "learning_rate": 0.00043303870054827646, "loss": 0.21, "step": 172830 }, { "epoch": 7.16, "grad_norm": 0.69140625, "learning_rate": 0.0004330313132987923, "loss": 0.1589, "step": 172840 }, { "epoch": 7.16, "grad_norm": 0.4765625, "learning_rate": 0.0004330239257048629, "loss": 0.2564, "step": 172850 }, { "epoch": 7.16, "grad_norm": 0.5859375, "learning_rate": 0.00043301653776650227, "loss": 0.2358, "step": 172860 }, { "epoch": 7.16, "grad_norm": 0.54296875, "learning_rate": 0.0004330091494837243, "loss": 0.2152, "step": 172870 }, { "epoch": 7.16, "grad_norm": 0.625, "learning_rate": 0.00043300176085654294, "loss": 0.1708, "step": 172880 }, { "epoch": 7.16, "grad_norm": 0.71484375, "learning_rate": 0.00043299437188497195, "loss": 0.213, "step": 172890 }, { "epoch": 7.16, "grad_norm": 0.94921875, "learning_rate": 0.0004329869825690252, "loss": 0.1882, "step": 172900 }, { "epoch": 7.16, "grad_norm": 0.333984375, "learning_rate": 0.0004329795929087169, "loss": 0.1772, "step": 172910 }, { "epoch": 7.16, "grad_norm": 0.83984375, "learning_rate": 0.0004329722029040607, "loss": 0.1845, "step": 172920 }, { "epoch": 7.16, "grad_norm": 0.8046875, "learning_rate": 0.0004329648125550706, "loss": 0.241, "step": 172930 }, { "epoch": 7.16, "grad_norm": 1.796875, "learning_rate": 0.00043295742186176035, "loss": 0.1581, "step": 172940 }, { "epoch": 7.16, "grad_norm": 0.89453125, "learning_rate": 0.00043295003082414416, "loss": 0.1818, "step": 172950 }, { "epoch": 7.16, "grad_norm": 0.61328125, "learning_rate": 0.00043294263944223567, "loss": 0.1768, "step": 172960 }, { "epoch": 7.16, "grad_norm": 1.59375, "learning_rate": 0.0004329352477160489, "loss": 0.2086, "step": 172970 }, { "epoch": 7.16, "grad_norm": 0.5078125, "learning_rate": 0.0004329278556455978, "loss": 0.1806, "step": 172980 }, { "epoch": 7.17, "grad_norm": 1.796875, "learning_rate": 0.0004329204632308962, "loss": 0.1362, "step": 172990 }, { "epoch": 7.17, "grad_norm": 1.171875, "learning_rate": 0.000432913070471958, "loss": 0.207, "step": 173000 }, { "epoch": 7.17, "grad_norm": 1.0859375, "learning_rate": 0.00043290567736879715, "loss": 0.2271, "step": 173010 }, { "epoch": 7.17, "grad_norm": 0.34375, "learning_rate": 0.00043289828392142763, "loss": 0.2394, "step": 173020 }, { "epoch": 7.17, "grad_norm": 0.73828125, "learning_rate": 0.0004328908901298633, "loss": 0.1802, "step": 173030 }, { "epoch": 7.17, "grad_norm": 1.3515625, "learning_rate": 0.000432883495994118, "loss": 0.1929, "step": 173040 }, { "epoch": 7.17, "grad_norm": 1.03125, "learning_rate": 0.00043287610151420574, "loss": 0.2319, "step": 173050 }, { "epoch": 7.17, "grad_norm": 0.97265625, "learning_rate": 0.00043286870669014037, "loss": 0.2163, "step": 173060 }, { "epoch": 7.17, "grad_norm": 0.84375, "learning_rate": 0.00043286131152193584, "loss": 0.1613, "step": 173070 }, { "epoch": 7.17, "grad_norm": 1.0546875, "learning_rate": 0.00043285391600960624, "loss": 0.2439, "step": 173080 }, { "epoch": 7.17, "grad_norm": 1.1015625, "learning_rate": 0.0004328465201531652, "loss": 0.2635, "step": 173090 }, { "epoch": 7.17, "grad_norm": 0.357421875, "learning_rate": 0.00043283912395262667, "loss": 0.2133, "step": 173100 }, { "epoch": 7.17, "grad_norm": 0.58984375, "learning_rate": 0.0004328317274080047, "loss": 0.1945, "step": 173110 }, { "epoch": 7.17, "grad_norm": 0.62109375, "learning_rate": 0.00043282433051931325, "loss": 0.2627, "step": 173120 }, { "epoch": 7.17, "grad_norm": 0.65234375, "learning_rate": 0.000432816933286566, "loss": 0.2176, "step": 173130 }, { "epoch": 7.17, "grad_norm": 0.8125, "learning_rate": 0.00043280953570977714, "loss": 0.2393, "step": 173140 }, { "epoch": 7.17, "grad_norm": 1.0, "learning_rate": 0.00043280213778896045, "loss": 0.2419, "step": 173150 }, { "epoch": 7.17, "grad_norm": 0.8203125, "learning_rate": 0.0004327947395241299, "loss": 0.2065, "step": 173160 }, { "epoch": 7.17, "grad_norm": 1.2578125, "learning_rate": 0.00043278734091529937, "loss": 0.1943, "step": 173170 }, { "epoch": 7.17, "grad_norm": 0.62890625, "learning_rate": 0.0004327799419624828, "loss": 0.1923, "step": 173180 }, { "epoch": 7.17, "grad_norm": 1.0234375, "learning_rate": 0.0004327725426656941, "loss": 0.202, "step": 173190 }, { "epoch": 7.17, "grad_norm": 0.83984375, "learning_rate": 0.00043276514302494724, "loss": 0.1966, "step": 173200 }, { "epoch": 7.17, "grad_norm": 0.40625, "learning_rate": 0.0004327577430402561, "loss": 0.1579, "step": 173210 }, { "epoch": 7.17, "grad_norm": 0.91015625, "learning_rate": 0.0004327503427116346, "loss": 0.2411, "step": 173220 }, { "epoch": 7.18, "grad_norm": 0.8515625, "learning_rate": 0.00043274294203909675, "loss": 0.2366, "step": 173230 }, { "epoch": 7.18, "grad_norm": 0.439453125, "learning_rate": 0.00043273554102265637, "loss": 0.1986, "step": 173240 }, { "epoch": 7.18, "grad_norm": 0.6640625, "learning_rate": 0.0004327281396623275, "loss": 0.2404, "step": 173250 }, { "epoch": 7.18, "grad_norm": 0.2060546875, "learning_rate": 0.00043272073795812396, "loss": 0.2761, "step": 173260 }, { "epoch": 7.18, "grad_norm": 1.21875, "learning_rate": 0.0004327133359100597, "loss": 0.1928, "step": 173270 }, { "epoch": 7.18, "grad_norm": 0.7734375, "learning_rate": 0.0004327059335181487, "loss": 0.2589, "step": 173280 }, { "epoch": 7.18, "grad_norm": 1.1796875, "learning_rate": 0.00043269853078240487, "loss": 0.1785, "step": 173290 }, { "epoch": 7.18, "grad_norm": 1.3828125, "learning_rate": 0.00043269112770284214, "loss": 0.2149, "step": 173300 }, { "epoch": 7.18, "grad_norm": 0.80859375, "learning_rate": 0.0004326837242794744, "loss": 0.2434, "step": 173310 }, { "epoch": 7.18, "grad_norm": 0.484375, "learning_rate": 0.00043267632051231564, "loss": 0.1737, "step": 173320 }, { "epoch": 7.18, "grad_norm": 0.73828125, "learning_rate": 0.0004326689164013798, "loss": 0.1711, "step": 173330 }, { "epoch": 7.18, "grad_norm": 1.03125, "learning_rate": 0.0004326615119466807, "loss": 0.2425, "step": 173340 }, { "epoch": 7.18, "grad_norm": 0.3671875, "learning_rate": 0.00043265410714823243, "loss": 0.1821, "step": 173350 }, { "epoch": 7.18, "grad_norm": 0.515625, "learning_rate": 0.00043264670200604885, "loss": 0.1802, "step": 173360 }, { "epoch": 7.18, "grad_norm": 0.1865234375, "learning_rate": 0.0004326392965201439, "loss": 0.2207, "step": 173370 }, { "epoch": 7.18, "grad_norm": 0.91796875, "learning_rate": 0.00043263189069053153, "loss": 0.1738, "step": 173380 }, { "epoch": 7.18, "grad_norm": 1.078125, "learning_rate": 0.00043262448451722565, "loss": 0.2434, "step": 173390 }, { "epoch": 7.18, "grad_norm": 0.4140625, "learning_rate": 0.0004326170780002402, "loss": 0.1739, "step": 173400 }, { "epoch": 7.18, "grad_norm": 0.90234375, "learning_rate": 0.00043260967113958914, "loss": 0.1852, "step": 173410 }, { "epoch": 7.18, "grad_norm": 0.00162506103515625, "learning_rate": 0.0004326022639352864, "loss": 0.2118, "step": 173420 }, { "epoch": 7.18, "grad_norm": 0.296875, "learning_rate": 0.00043259485638734587, "loss": 0.1839, "step": 173430 }, { "epoch": 7.18, "grad_norm": 0.9375, "learning_rate": 0.00043258744849578165, "loss": 0.1606, "step": 173440 }, { "epoch": 7.18, "grad_norm": 0.484375, "learning_rate": 0.00043258004026060747, "loss": 0.2408, "step": 173450 }, { "epoch": 7.18, "grad_norm": 0.390625, "learning_rate": 0.0004325726316818374, "loss": 0.1841, "step": 173460 }, { "epoch": 7.19, "grad_norm": 1.796875, "learning_rate": 0.0004325652227594854, "loss": 0.198, "step": 173470 }, { "epoch": 7.19, "grad_norm": 0.7265625, "learning_rate": 0.0004325578134935653, "loss": 0.1964, "step": 173480 }, { "epoch": 7.19, "grad_norm": 0.5546875, "learning_rate": 0.0004325504038840911, "loss": 0.1869, "step": 173490 }, { "epoch": 7.19, "grad_norm": 1.1015625, "learning_rate": 0.0004325429939310768, "loss": 0.1959, "step": 173500 }, { "epoch": 7.19, "grad_norm": 0.5546875, "learning_rate": 0.00043253558363453627, "loss": 0.1498, "step": 173510 }, { "epoch": 7.19, "grad_norm": 0.4921875, "learning_rate": 0.00043252817299448354, "loss": 0.1972, "step": 173520 }, { "epoch": 7.19, "grad_norm": 0.97265625, "learning_rate": 0.0004325207620109325, "loss": 0.1713, "step": 173530 }, { "epoch": 7.19, "grad_norm": 0.7109375, "learning_rate": 0.000432513350683897, "loss": 0.1855, "step": 173540 }, { "epoch": 7.19, "grad_norm": 1.0078125, "learning_rate": 0.00043250593901339115, "loss": 0.1994, "step": 173550 }, { "epoch": 7.19, "grad_norm": 0.447265625, "learning_rate": 0.0004324985269994288, "loss": 0.2116, "step": 173560 }, { "epoch": 7.19, "grad_norm": 0.83984375, "learning_rate": 0.00043249111464202397, "loss": 0.1969, "step": 173570 }, { "epoch": 7.19, "grad_norm": 0.73828125, "learning_rate": 0.00043248370194119056, "loss": 0.2643, "step": 173580 }, { "epoch": 7.19, "grad_norm": 0.2333984375, "learning_rate": 0.0004324762888969425, "loss": 0.2547, "step": 173590 }, { "epoch": 7.19, "grad_norm": 0.5234375, "learning_rate": 0.0004324688755092938, "loss": 0.2473, "step": 173600 }, { "epoch": 7.19, "grad_norm": 0.57421875, "learning_rate": 0.0004324614617782584, "loss": 0.2184, "step": 173610 }, { "epoch": 7.19, "grad_norm": 1.2734375, "learning_rate": 0.00043245404770385023, "loss": 0.2071, "step": 173620 }, { "epoch": 7.19, "grad_norm": 1.1796875, "learning_rate": 0.00043244663328608315, "loss": 0.2369, "step": 173630 }, { "epoch": 7.19, "grad_norm": 0.49609375, "learning_rate": 0.0004324392185249713, "loss": 0.2243, "step": 173640 }, { "epoch": 7.19, "grad_norm": 1.3359375, "learning_rate": 0.00043243180342052856, "loss": 0.1992, "step": 173650 }, { "epoch": 7.19, "grad_norm": 0.5703125, "learning_rate": 0.00043242438797276876, "loss": 0.222, "step": 173660 }, { "epoch": 7.19, "grad_norm": 0.921875, "learning_rate": 0.00043241697218170604, "loss": 0.1524, "step": 173670 }, { "epoch": 7.19, "grad_norm": 1.0, "learning_rate": 0.00043240955604735435, "loss": 0.1748, "step": 173680 }, { "epoch": 7.19, "grad_norm": 0.9140625, "learning_rate": 0.0004324021395697274, "loss": 0.1553, "step": 173690 }, { "epoch": 7.19, "grad_norm": 0.55859375, "learning_rate": 0.00043239472274883946, "loss": 0.252, "step": 173700 }, { "epoch": 7.2, "grad_norm": 0.59765625, "learning_rate": 0.0004323873055847043, "loss": 0.2632, "step": 173710 }, { "epoch": 7.2, "grad_norm": 0.384765625, "learning_rate": 0.000432379888077336, "loss": 0.1919, "step": 173720 }, { "epoch": 7.2, "grad_norm": 0.87890625, "learning_rate": 0.0004323724702267483, "loss": 0.1975, "step": 173730 }, { "epoch": 7.2, "grad_norm": 1.0625, "learning_rate": 0.0004323650520329554, "loss": 0.2028, "step": 173740 }, { "epoch": 7.2, "grad_norm": 0.69140625, "learning_rate": 0.00043235763349597114, "loss": 0.2073, "step": 173750 }, { "epoch": 7.2, "grad_norm": 0.81640625, "learning_rate": 0.00043235021461580946, "loss": 0.1845, "step": 173760 }, { "epoch": 7.2, "grad_norm": 0.640625, "learning_rate": 0.00043234279539248446, "loss": 0.2239, "step": 173770 }, { "epoch": 7.2, "grad_norm": 1.078125, "learning_rate": 0.00043233537582601, "loss": 0.2441, "step": 173780 }, { "epoch": 7.2, "grad_norm": 0.875, "learning_rate": 0.0004323279559164, "loss": 0.2138, "step": 173790 }, { "epoch": 7.2, "grad_norm": 0.6328125, "learning_rate": 0.0004323205356636685, "loss": 0.2157, "step": 173800 }, { "epoch": 7.2, "grad_norm": 1.1015625, "learning_rate": 0.00043231311506782943, "loss": 0.2575, "step": 173810 }, { "epoch": 7.2, "grad_norm": 0.5859375, "learning_rate": 0.00043230569412889674, "loss": 0.1644, "step": 173820 }, { "epoch": 7.2, "grad_norm": 0.95703125, "learning_rate": 0.0004322982728468845, "loss": 0.1918, "step": 173830 }, { "epoch": 7.2, "grad_norm": 0.78515625, "learning_rate": 0.0004322908512218066, "loss": 0.2329, "step": 173840 }, { "epoch": 7.2, "grad_norm": 0.447265625, "learning_rate": 0.0004322834292536769, "loss": 0.2299, "step": 173850 }, { "epoch": 7.2, "grad_norm": 0.66796875, "learning_rate": 0.00043227600694250955, "loss": 0.203, "step": 173860 }, { "epoch": 7.2, "grad_norm": 0.6171875, "learning_rate": 0.00043226858428831844, "loss": 0.2041, "step": 173870 }, { "epoch": 7.2, "grad_norm": 1.203125, "learning_rate": 0.00043226116129111753, "loss": 0.2452, "step": 173880 }, { "epoch": 7.2, "grad_norm": 0.59765625, "learning_rate": 0.00043225373795092076, "loss": 0.193, "step": 173890 }, { "epoch": 7.2, "grad_norm": 0.984375, "learning_rate": 0.0004322463142677422, "loss": 0.1981, "step": 173900 }, { "epoch": 7.2, "grad_norm": 0.94921875, "learning_rate": 0.0004322388902415957, "loss": 0.2432, "step": 173910 }, { "epoch": 7.2, "grad_norm": 0.76171875, "learning_rate": 0.0004322314658724953, "loss": 0.193, "step": 173920 }, { "epoch": 7.2, "grad_norm": 0.734375, "learning_rate": 0.00043222404116045497, "loss": 0.1726, "step": 173930 }, { "epoch": 7.2, "grad_norm": 0.53515625, "learning_rate": 0.0004322166161054887, "loss": 0.2491, "step": 173940 }, { "epoch": 7.2, "grad_norm": 0.48828125, "learning_rate": 0.0004322091907076103, "loss": 0.2229, "step": 173950 }, { "epoch": 7.21, "grad_norm": 0.53515625, "learning_rate": 0.0004322017649668341, "loss": 0.2127, "step": 173960 }, { "epoch": 7.21, "grad_norm": 0.390625, "learning_rate": 0.0004321943388831737, "loss": 0.2295, "step": 173970 }, { "epoch": 7.21, "grad_norm": 0.458984375, "learning_rate": 0.00043218691245664327, "loss": 0.2198, "step": 173980 }, { "epoch": 7.21, "grad_norm": 0.83203125, "learning_rate": 0.0004321794856872567, "loss": 0.2168, "step": 173990 }, { "epoch": 7.21, "grad_norm": 0.66796875, "learning_rate": 0.0004321720585750281, "loss": 0.2417, "step": 174000 }, { "epoch": 7.21, "grad_norm": 0.96484375, "learning_rate": 0.00043216463111997135, "loss": 0.2044, "step": 174010 }, { "epoch": 7.21, "grad_norm": 0.6796875, "learning_rate": 0.0004321572033221004, "loss": 0.2227, "step": 174020 }, { "epoch": 7.21, "grad_norm": 0.828125, "learning_rate": 0.0004321497751814293, "loss": 0.2314, "step": 174030 }, { "epoch": 7.21, "grad_norm": 0.98828125, "learning_rate": 0.0004321423466979719, "loss": 0.2061, "step": 174040 }, { "epoch": 7.21, "grad_norm": 0.40234375, "learning_rate": 0.00043213491787174235, "loss": 0.2665, "step": 174050 }, { "epoch": 7.21, "grad_norm": 0.75390625, "learning_rate": 0.0004321274887027545, "loss": 0.1865, "step": 174060 }, { "epoch": 7.21, "grad_norm": 0.50390625, "learning_rate": 0.0004321200591910225, "loss": 0.1852, "step": 174070 }, { "epoch": 7.21, "grad_norm": 0.482421875, "learning_rate": 0.0004321126293365601, "loss": 0.2037, "step": 174080 }, { "epoch": 7.21, "grad_norm": 0.7890625, "learning_rate": 0.0004321051991393815, "loss": 0.2456, "step": 174090 }, { "epoch": 7.21, "grad_norm": 0.6796875, "learning_rate": 0.00043209776859950044, "loss": 0.3129, "step": 174100 }, { "epoch": 7.21, "grad_norm": 0.62890625, "learning_rate": 0.0004320903377169311, "loss": 0.1557, "step": 174110 }, { "epoch": 7.21, "grad_norm": 0.7578125, "learning_rate": 0.00043208290649168746, "loss": 0.1916, "step": 174120 }, { "epoch": 7.21, "grad_norm": 0.396484375, "learning_rate": 0.00043207547492378343, "loss": 0.1797, "step": 174130 }, { "epoch": 7.21, "grad_norm": 0.7109375, "learning_rate": 0.000432068043013233, "loss": 0.1485, "step": 174140 }, { "epoch": 7.21, "grad_norm": 0.7890625, "learning_rate": 0.00043206061076005023, "loss": 0.195, "step": 174150 }, { "epoch": 7.21, "grad_norm": 1.0390625, "learning_rate": 0.00043205317816424893, "loss": 0.2358, "step": 174160 }, { "epoch": 7.21, "grad_norm": 0.6328125, "learning_rate": 0.0004320457452258433, "loss": 0.1715, "step": 174170 }, { "epoch": 7.21, "grad_norm": 0.5, "learning_rate": 0.00043203831194484727, "loss": 0.1435, "step": 174180 }, { "epoch": 7.21, "grad_norm": 1.046875, "learning_rate": 0.00043203087832127473, "loss": 0.2186, "step": 174190 }, { "epoch": 7.22, "grad_norm": 0.388671875, "learning_rate": 0.0004320234443551397, "loss": 0.1849, "step": 174200 }, { "epoch": 7.22, "grad_norm": 0.73046875, "learning_rate": 0.0004320160100464563, "loss": 0.2358, "step": 174210 }, { "epoch": 7.22, "grad_norm": 0.291015625, "learning_rate": 0.0004320085753952384, "loss": 0.2006, "step": 174220 }, { "epoch": 7.22, "grad_norm": 0.2578125, "learning_rate": 0.00043200114040149994, "loss": 0.1767, "step": 174230 }, { "epoch": 7.22, "grad_norm": 0.58984375, "learning_rate": 0.00043199370506525513, "loss": 0.2093, "step": 174240 }, { "epoch": 7.22, "grad_norm": 1.1015625, "learning_rate": 0.00043198626938651774, "loss": 0.1837, "step": 174250 }, { "epoch": 7.22, "grad_norm": 0.671875, "learning_rate": 0.00043197883336530186, "loss": 0.1613, "step": 174260 }, { "epoch": 7.22, "grad_norm": 0.53125, "learning_rate": 0.0004319713970016214, "loss": 0.2824, "step": 174270 }, { "epoch": 7.22, "grad_norm": 0.62890625, "learning_rate": 0.0004319639602954905, "loss": 0.1589, "step": 174280 }, { "epoch": 7.22, "grad_norm": 0.36328125, "learning_rate": 0.00043195652324692305, "loss": 0.2281, "step": 174290 }, { "epoch": 7.22, "grad_norm": 0.82421875, "learning_rate": 0.0004319490858559331, "loss": 0.2087, "step": 174300 }, { "epoch": 7.22, "grad_norm": 0.458984375, "learning_rate": 0.00043194164812253457, "loss": 0.1995, "step": 174310 }, { "epoch": 7.22, "grad_norm": 0.84765625, "learning_rate": 0.0004319342100467415, "loss": 0.1978, "step": 174320 }, { "epoch": 7.22, "grad_norm": 0.8515625, "learning_rate": 0.0004319267716285679, "loss": 0.1561, "step": 174330 }, { "epoch": 7.22, "grad_norm": 0.703125, "learning_rate": 0.0004319193328680278, "loss": 0.2034, "step": 174340 }, { "epoch": 7.22, "grad_norm": 1.4453125, "learning_rate": 0.0004319118937651352, "loss": 0.2085, "step": 174350 }, { "epoch": 7.22, "grad_norm": 0.84375, "learning_rate": 0.000431904454319904, "loss": 0.1614, "step": 174360 }, { "epoch": 7.22, "grad_norm": 0.53125, "learning_rate": 0.0004318970145323482, "loss": 0.1903, "step": 174370 }, { "epoch": 7.22, "grad_norm": 0.353515625, "learning_rate": 0.00043188957440248193, "loss": 0.2041, "step": 174380 }, { "epoch": 7.22, "grad_norm": 0.609375, "learning_rate": 0.00043188213393031917, "loss": 0.1486, "step": 174390 }, { "epoch": 7.22, "grad_norm": 0.369140625, "learning_rate": 0.0004318746931158738, "loss": 0.2069, "step": 174400 }, { "epoch": 7.22, "grad_norm": 0.353515625, "learning_rate": 0.0004318672519591599, "loss": 0.2216, "step": 174410 }, { "epoch": 7.22, "grad_norm": 0.734375, "learning_rate": 0.0004318598104601915, "loss": 0.2092, "step": 174420 }, { "epoch": 7.22, "grad_norm": 0.3359375, "learning_rate": 0.0004318523686189826, "loss": 0.2062, "step": 174430 }, { "epoch": 7.23, "grad_norm": 1.0703125, "learning_rate": 0.00043184492643554717, "loss": 0.2518, "step": 174440 }, { "epoch": 7.23, "grad_norm": 0.470703125, "learning_rate": 0.0004318374839098992, "loss": 0.203, "step": 174450 }, { "epoch": 7.23, "grad_norm": 0.73828125, "learning_rate": 0.0004318300410420527, "loss": 0.1842, "step": 174460 }, { "epoch": 7.23, "grad_norm": 0.255859375, "learning_rate": 0.00043182259783202173, "loss": 0.2013, "step": 174470 }, { "epoch": 7.23, "grad_norm": 1.78125, "learning_rate": 0.0004318151542798203, "loss": 0.2063, "step": 174480 }, { "epoch": 7.23, "grad_norm": 0.86328125, "learning_rate": 0.00043180771038546235, "loss": 0.2103, "step": 174490 }, { "epoch": 7.23, "grad_norm": 0.7578125, "learning_rate": 0.0004318002661489619, "loss": 0.199, "step": 174500 }, { "epoch": 7.23, "grad_norm": 0.1953125, "learning_rate": 0.000431792821570333, "loss": 0.1928, "step": 174510 }, { "epoch": 7.23, "grad_norm": 1.1171875, "learning_rate": 0.0004317853766495897, "loss": 0.1817, "step": 174520 }, { "epoch": 7.23, "grad_norm": 0.890625, "learning_rate": 0.0004317779313867459, "loss": 0.195, "step": 174530 }, { "epoch": 7.23, "grad_norm": 1.28125, "learning_rate": 0.00043177048578181565, "loss": 0.2041, "step": 174540 }, { "epoch": 7.23, "grad_norm": 1.2421875, "learning_rate": 0.00043176303983481296, "loss": 0.2424, "step": 174550 }, { "epoch": 7.23, "grad_norm": 0.453125, "learning_rate": 0.0004317555935457519, "loss": 0.2436, "step": 174560 }, { "epoch": 7.23, "grad_norm": 1.25, "learning_rate": 0.00043174814691464644, "loss": 0.2207, "step": 174570 }, { "epoch": 7.23, "grad_norm": 0.353515625, "learning_rate": 0.0004317406999415106, "loss": 0.237, "step": 174580 }, { "epoch": 7.23, "grad_norm": 0.80078125, "learning_rate": 0.0004317332526263583, "loss": 0.211, "step": 174590 }, { "epoch": 7.23, "grad_norm": 0.376953125, "learning_rate": 0.00043172580496920375, "loss": 0.1598, "step": 174600 }, { "epoch": 7.23, "grad_norm": 1.0625, "learning_rate": 0.0004317183569700608, "loss": 0.2094, "step": 174610 }, { "epoch": 7.23, "grad_norm": 1.3671875, "learning_rate": 0.0004317109086289436, "loss": 0.2036, "step": 174620 }, { "epoch": 7.23, "grad_norm": 0.8046875, "learning_rate": 0.00043170345994586603, "loss": 0.256, "step": 174630 }, { "epoch": 7.23, "grad_norm": 1.0703125, "learning_rate": 0.0004316960109208422, "loss": 0.2265, "step": 174640 }, { "epoch": 7.23, "grad_norm": 0.671875, "learning_rate": 0.000431688561553886, "loss": 0.1447, "step": 174650 }, { "epoch": 7.23, "grad_norm": 0.546875, "learning_rate": 0.00043168111184501166, "loss": 0.2164, "step": 174660 }, { "epoch": 7.23, "grad_norm": 0.53125, "learning_rate": 0.00043167366179423306, "loss": 0.1951, "step": 174670 }, { "epoch": 7.24, "grad_norm": 1.421875, "learning_rate": 0.00043166621140156426, "loss": 0.1898, "step": 174680 }, { "epoch": 7.24, "grad_norm": 0.3515625, "learning_rate": 0.00043165876066701925, "loss": 0.1552, "step": 174690 }, { "epoch": 7.24, "grad_norm": 1.28125, "learning_rate": 0.00043165130959061206, "loss": 0.1624, "step": 174700 }, { "epoch": 7.24, "grad_norm": 0.486328125, "learning_rate": 0.00043164385817235675, "loss": 0.2143, "step": 174710 }, { "epoch": 7.24, "grad_norm": 1.046875, "learning_rate": 0.00043163640641226734, "loss": 0.1808, "step": 174720 }, { "epoch": 7.24, "grad_norm": 0.37109375, "learning_rate": 0.00043162895431035777, "loss": 0.2211, "step": 174730 }, { "epoch": 7.24, "grad_norm": 1.3203125, "learning_rate": 0.00043162150186664214, "loss": 0.1945, "step": 174740 }, { "epoch": 7.24, "grad_norm": 0.291015625, "learning_rate": 0.0004316140490811344, "loss": 0.2096, "step": 174750 }, { "epoch": 7.24, "grad_norm": 0.40234375, "learning_rate": 0.00043160659595384873, "loss": 0.2715, "step": 174760 }, { "epoch": 7.24, "grad_norm": 0.41015625, "learning_rate": 0.00043159914248479904, "loss": 0.2701, "step": 174770 }, { "epoch": 7.24, "grad_norm": 0.52734375, "learning_rate": 0.00043159168867399933, "loss": 0.206, "step": 174780 }, { "epoch": 7.24, "grad_norm": 0.73046875, "learning_rate": 0.00043158423452146366, "loss": 0.1825, "step": 174790 }, { "epoch": 7.24, "grad_norm": 0.76953125, "learning_rate": 0.0004315767800272061, "loss": 0.2175, "step": 174800 }, { "epoch": 7.24, "grad_norm": 0.177734375, "learning_rate": 0.0004315693251912407, "loss": 0.2175, "step": 174810 }, { "epoch": 7.24, "grad_norm": 1.046875, "learning_rate": 0.00043156187001358137, "loss": 0.1883, "step": 174820 }, { "epoch": 7.24, "grad_norm": 0.392578125, "learning_rate": 0.00043155441449424227, "loss": 0.1993, "step": 174830 }, { "epoch": 7.24, "grad_norm": 0.73046875, "learning_rate": 0.0004315469586332373, "loss": 0.2031, "step": 174840 }, { "epoch": 7.24, "grad_norm": 0.97265625, "learning_rate": 0.0004315395024305806, "loss": 0.1795, "step": 174850 }, { "epoch": 7.24, "grad_norm": 0.90625, "learning_rate": 0.00043153204588628613, "loss": 0.1842, "step": 174860 }, { "epoch": 7.24, "grad_norm": 1.1796875, "learning_rate": 0.0004315245890003679, "loss": 0.2057, "step": 174870 }, { "epoch": 7.24, "grad_norm": 1.265625, "learning_rate": 0.0004315171317728401, "loss": 0.2196, "step": 174880 }, { "epoch": 7.24, "grad_norm": 1.140625, "learning_rate": 0.0004315096742037167, "loss": 0.1407, "step": 174890 }, { "epoch": 7.24, "grad_norm": 0.67578125, "learning_rate": 0.00043150221629301155, "loss": 0.1896, "step": 174900 }, { "epoch": 7.24, "grad_norm": 0.671875, "learning_rate": 0.0004314947580407389, "loss": 0.1816, "step": 174910 }, { "epoch": 7.25, "grad_norm": 0.625, "learning_rate": 0.0004314872994469128, "loss": 0.1792, "step": 174920 }, { "epoch": 7.25, "grad_norm": 0.87890625, "learning_rate": 0.0004314798405115471, "loss": 0.1812, "step": 174930 }, { "epoch": 7.25, "grad_norm": 0.24609375, "learning_rate": 0.00043147238123465595, "loss": 0.1652, "step": 174940 }, { "epoch": 7.25, "grad_norm": 0.44921875, "learning_rate": 0.0004314649216162533, "loss": 0.214, "step": 174950 }, { "epoch": 7.25, "grad_norm": 0.51953125, "learning_rate": 0.0004314574616563534, "loss": 0.1627, "step": 174960 }, { "epoch": 7.25, "grad_norm": 2.09375, "learning_rate": 0.0004314500013549701, "loss": 0.2346, "step": 174970 }, { "epoch": 7.25, "grad_norm": 0.5546875, "learning_rate": 0.00043144254071211753, "loss": 0.193, "step": 174980 }, { "epoch": 7.25, "grad_norm": 0.87109375, "learning_rate": 0.0004314350797278096, "loss": 0.2197, "step": 174990 }, { "epoch": 7.25, "grad_norm": 1.265625, "learning_rate": 0.00043142761840206053, "loss": 0.1877, "step": 175000 }, { "epoch": 7.25, "grad_norm": 0.66796875, "learning_rate": 0.0004314201567348842, "loss": 0.2098, "step": 175010 }, { "epoch": 7.25, "grad_norm": 0.408203125, "learning_rate": 0.00043141269472629477, "loss": 0.1615, "step": 175020 }, { "epoch": 7.25, "grad_norm": 0.6328125, "learning_rate": 0.00043140523237630623, "loss": 0.2145, "step": 175030 }, { "epoch": 7.25, "grad_norm": 1.2265625, "learning_rate": 0.0004313977696849327, "loss": 0.2328, "step": 175040 }, { "epoch": 7.25, "grad_norm": 0.578125, "learning_rate": 0.00043139030665218805, "loss": 0.1624, "step": 175050 }, { "epoch": 7.25, "grad_norm": 0.408203125, "learning_rate": 0.00043138284327808653, "loss": 0.2033, "step": 175060 }, { "epoch": 7.25, "grad_norm": 1.0078125, "learning_rate": 0.000431375379562642, "loss": 0.191, "step": 175070 }, { "epoch": 7.25, "grad_norm": 0.478515625, "learning_rate": 0.00043136791550586864, "loss": 0.199, "step": 175080 }, { "epoch": 7.25, "grad_norm": 0.453125, "learning_rate": 0.00043136045110778046, "loss": 0.2432, "step": 175090 }, { "epoch": 7.25, "grad_norm": 0.52734375, "learning_rate": 0.00043135298636839145, "loss": 0.1674, "step": 175100 }, { "epoch": 7.25, "grad_norm": 0.59375, "learning_rate": 0.0004313455212877157, "loss": 0.1963, "step": 175110 }, { "epoch": 7.25, "grad_norm": 1.078125, "learning_rate": 0.0004313380558657673, "loss": 0.1761, "step": 175120 }, { "epoch": 7.25, "grad_norm": 0.91796875, "learning_rate": 0.00043133059010256025, "loss": 0.1998, "step": 175130 }, { "epoch": 7.25, "grad_norm": 0.5859375, "learning_rate": 0.0004313231239981086, "loss": 0.2076, "step": 175140 }, { "epoch": 7.25, "grad_norm": 1.453125, "learning_rate": 0.0004313156575524264, "loss": 0.1739, "step": 175150 }, { "epoch": 7.26, "grad_norm": 1.546875, "learning_rate": 0.00043130819076552776, "loss": 0.2002, "step": 175160 }, { "epoch": 7.26, "grad_norm": 0.34375, "learning_rate": 0.0004313007236374267, "loss": 0.1583, "step": 175170 }, { "epoch": 7.26, "grad_norm": 0.5390625, "learning_rate": 0.0004312932561681372, "loss": 0.2089, "step": 175180 }, { "epoch": 7.26, "grad_norm": 0.67578125, "learning_rate": 0.0004312857883576734, "loss": 0.2048, "step": 175190 }, { "epoch": 7.26, "grad_norm": 0.65625, "learning_rate": 0.0004312783202060493, "loss": 0.2202, "step": 175200 }, { "epoch": 7.26, "grad_norm": 0.78125, "learning_rate": 0.00043127085171327893, "loss": 0.2299, "step": 175210 }, { "epoch": 7.26, "grad_norm": 0.3359375, "learning_rate": 0.0004312633828793765, "loss": 0.2487, "step": 175220 }, { "epoch": 7.26, "grad_norm": 0.47265625, "learning_rate": 0.0004312559137043559, "loss": 0.1912, "step": 175230 }, { "epoch": 7.26, "grad_norm": 0.2275390625, "learning_rate": 0.0004312484441882313, "loss": 0.2175, "step": 175240 }, { "epoch": 7.26, "grad_norm": 1.0390625, "learning_rate": 0.00043124097433101664, "loss": 0.2102, "step": 175250 }, { "epoch": 7.26, "grad_norm": 0.71484375, "learning_rate": 0.000431233504132726, "loss": 0.1737, "step": 175260 }, { "epoch": 7.26, "grad_norm": 0.953125, "learning_rate": 0.0004312260335933735, "loss": 0.2455, "step": 175270 }, { "epoch": 7.26, "grad_norm": 0.376953125, "learning_rate": 0.0004312185627129733, "loss": 0.1652, "step": 175280 }, { "epoch": 7.26, "grad_norm": 0.9921875, "learning_rate": 0.00043121109149153925, "loss": 0.2123, "step": 175290 }, { "epoch": 7.26, "grad_norm": 0.7578125, "learning_rate": 0.0004312036199290854, "loss": 0.1873, "step": 175300 }, { "epoch": 7.26, "grad_norm": 0.76953125, "learning_rate": 0.00043119614802562605, "loss": 0.247, "step": 175310 }, { "epoch": 7.26, "grad_norm": 0.376953125, "learning_rate": 0.00043118867578117504, "loss": 0.2048, "step": 175320 }, { "epoch": 7.26, "grad_norm": 0.62109375, "learning_rate": 0.00043118120319574657, "loss": 0.1863, "step": 175330 }, { "epoch": 7.26, "grad_norm": 0.92578125, "learning_rate": 0.00043117373026935456, "loss": 0.1949, "step": 175340 }, { "epoch": 7.26, "grad_norm": 0.578125, "learning_rate": 0.00043116625700201323, "loss": 0.232, "step": 175350 }, { "epoch": 7.26, "grad_norm": 3.046875, "learning_rate": 0.00043115878339373655, "loss": 0.1997, "step": 175360 }, { "epoch": 7.26, "grad_norm": 0.392578125, "learning_rate": 0.0004311513094445386, "loss": 0.2088, "step": 175370 }, { "epoch": 7.26, "grad_norm": 0.703125, "learning_rate": 0.0004311438351544334, "loss": 0.1854, "step": 175380 }, { "epoch": 7.26, "grad_norm": 0.5703125, "learning_rate": 0.0004311363605234351, "loss": 0.1814, "step": 175390 }, { "epoch": 7.27, "grad_norm": 0.7265625, "learning_rate": 0.0004311288855515577, "loss": 0.1978, "step": 175400 }, { "epoch": 7.27, "grad_norm": 0.318359375, "learning_rate": 0.0004311214102388153, "loss": 0.1878, "step": 175410 }, { "epoch": 7.27, "grad_norm": 0.5859375, "learning_rate": 0.00043111393458522204, "loss": 0.2102, "step": 175420 }, { "epoch": 7.27, "grad_norm": 0.80078125, "learning_rate": 0.0004311064585907919, "loss": 0.1851, "step": 175430 }, { "epoch": 7.27, "grad_norm": 2.625, "learning_rate": 0.0004310989822555389, "loss": 0.1719, "step": 175440 }, { "epoch": 7.27, "grad_norm": 0.546875, "learning_rate": 0.0004310915055794772, "loss": 0.193, "step": 175450 }, { "epoch": 7.27, "grad_norm": 0.8125, "learning_rate": 0.00043108402856262084, "loss": 0.2438, "step": 175460 }, { "epoch": 7.27, "grad_norm": 1.140625, "learning_rate": 0.0004310765512049839, "loss": 0.2135, "step": 175470 }, { "epoch": 7.27, "grad_norm": 0.59375, "learning_rate": 0.0004310690735065804, "loss": 0.2492, "step": 175480 }, { "epoch": 7.27, "grad_norm": 0.341796875, "learning_rate": 0.0004310615954674245, "loss": 0.1998, "step": 175490 }, { "epoch": 7.27, "grad_norm": 0.61328125, "learning_rate": 0.00043105411708753026, "loss": 0.2077, "step": 175500 }, { "epoch": 7.27, "grad_norm": 0.78125, "learning_rate": 0.0004310466383669117, "loss": 0.215, "step": 175510 }, { "epoch": 7.27, "grad_norm": 1.0234375, "learning_rate": 0.0004310391593055829, "loss": 0.2003, "step": 175520 }, { "epoch": 7.27, "grad_norm": 0.72265625, "learning_rate": 0.00043103167990355795, "loss": 0.2219, "step": 175530 }, { "epoch": 7.27, "grad_norm": 1.9609375, "learning_rate": 0.00043102420016085096, "loss": 0.1531, "step": 175540 }, { "epoch": 7.27, "grad_norm": 0.671875, "learning_rate": 0.00043101672007747594, "loss": 0.1883, "step": 175550 }, { "epoch": 7.27, "grad_norm": 0.921875, "learning_rate": 0.0004310092396534471, "loss": 0.2787, "step": 175560 }, { "epoch": 7.27, "grad_norm": 1.3046875, "learning_rate": 0.0004310017588887783, "loss": 0.1909, "step": 175570 }, { "epoch": 7.27, "grad_norm": 0.69921875, "learning_rate": 0.00043099427778348374, "loss": 0.189, "step": 175580 }, { "epoch": 7.27, "grad_norm": 0.5, "learning_rate": 0.00043098679633757754, "loss": 0.223, "step": 175590 }, { "epoch": 7.27, "grad_norm": 0.416015625, "learning_rate": 0.0004309793145510737, "loss": 0.1673, "step": 175600 }, { "epoch": 7.27, "grad_norm": 0.60546875, "learning_rate": 0.0004309718324239864, "loss": 0.1657, "step": 175610 }, { "epoch": 7.27, "grad_norm": 1.265625, "learning_rate": 0.0004309643499563296, "loss": 0.1725, "step": 175620 }, { "epoch": 7.27, "grad_norm": 0.62890625, "learning_rate": 0.00043095686714811744, "loss": 0.2324, "step": 175630 }, { "epoch": 7.27, "grad_norm": 0.466796875, "learning_rate": 0.000430949383999364, "loss": 0.164, "step": 175640 }, { "epoch": 7.28, "grad_norm": 0.8984375, "learning_rate": 0.00043094190051008334, "loss": 0.2372, "step": 175650 }, { "epoch": 7.28, "grad_norm": 0.57421875, "learning_rate": 0.0004309344166802896, "loss": 0.1753, "step": 175660 }, { "epoch": 7.28, "grad_norm": 0.8828125, "learning_rate": 0.00043092693250999683, "loss": 0.2608, "step": 175670 }, { "epoch": 7.28, "grad_norm": 0.47265625, "learning_rate": 0.00043091944799921914, "loss": 0.1477, "step": 175680 }, { "epoch": 7.28, "grad_norm": 0.42578125, "learning_rate": 0.0004309119631479704, "loss": 0.1952, "step": 175690 }, { "epoch": 7.28, "grad_norm": 0.4921875, "learning_rate": 0.0004309044779562651, "loss": 0.1375, "step": 175700 }, { "epoch": 7.28, "grad_norm": 0.625, "learning_rate": 0.00043089699242411706, "loss": 0.2203, "step": 175710 }, { "epoch": 7.28, "grad_norm": 0.875, "learning_rate": 0.00043088950655154035, "loss": 0.1675, "step": 175720 }, { "epoch": 7.28, "grad_norm": 0.7578125, "learning_rate": 0.00043088202033854915, "loss": 0.2309, "step": 175730 }, { "epoch": 7.28, "grad_norm": 0.96484375, "learning_rate": 0.00043087453378515755, "loss": 0.1994, "step": 175740 }, { "epoch": 7.28, "grad_norm": 0.58203125, "learning_rate": 0.00043086704689137965, "loss": 0.2007, "step": 175750 }, { "epoch": 7.28, "grad_norm": 0.78125, "learning_rate": 0.0004308595596572294, "loss": 0.2056, "step": 175760 }, { "epoch": 7.28, "grad_norm": 0.68359375, "learning_rate": 0.00043085207208272105, "loss": 0.2592, "step": 175770 }, { "epoch": 7.28, "grad_norm": 0.97265625, "learning_rate": 0.0004308445841678686, "loss": 0.2011, "step": 175780 }, { "epoch": 7.28, "grad_norm": 1.453125, "learning_rate": 0.00043083709591268615, "loss": 0.2389, "step": 175790 }, { "epoch": 7.28, "grad_norm": 1.265625, "learning_rate": 0.0004308296073171879, "loss": 0.2246, "step": 175800 }, { "epoch": 7.28, "grad_norm": 2.34375, "learning_rate": 0.00043082211838138773, "loss": 0.226, "step": 175810 }, { "epoch": 7.28, "grad_norm": 1.0546875, "learning_rate": 0.0004308146291053, "loss": 0.1872, "step": 175820 }, { "epoch": 7.28, "grad_norm": 0.462890625, "learning_rate": 0.0004308071394889386, "loss": 0.1489, "step": 175830 }, { "epoch": 7.28, "grad_norm": 0.62890625, "learning_rate": 0.00043079964953231763, "loss": 0.209, "step": 175840 }, { "epoch": 7.28, "grad_norm": 0.7890625, "learning_rate": 0.00043079215923545137, "loss": 0.2171, "step": 175850 }, { "epoch": 7.28, "grad_norm": 0.4296875, "learning_rate": 0.00043078466859835374, "loss": 0.2348, "step": 175860 }, { "epoch": 7.28, "grad_norm": 1.3671875, "learning_rate": 0.00043077717762103883, "loss": 0.2348, "step": 175870 }, { "epoch": 7.28, "grad_norm": 0.83984375, "learning_rate": 0.00043076968630352086, "loss": 0.2196, "step": 175880 }, { "epoch": 7.29, "grad_norm": 0.4921875, "learning_rate": 0.0004307621946458138, "loss": 0.2053, "step": 175890 }, { "epoch": 7.29, "grad_norm": 1.140625, "learning_rate": 0.0004307547026479318, "loss": 0.2287, "step": 175900 }, { "epoch": 7.29, "grad_norm": 0.69140625, "learning_rate": 0.00043074721030988907, "loss": 0.1811, "step": 175910 }, { "epoch": 7.29, "grad_norm": 0.71875, "learning_rate": 0.0004307397176316996, "loss": 0.2272, "step": 175920 }, { "epoch": 7.29, "grad_norm": 0.578125, "learning_rate": 0.0004307322246133774, "loss": 0.245, "step": 175930 }, { "epoch": 7.29, "grad_norm": 0.58984375, "learning_rate": 0.0004307247312549367, "loss": 0.2209, "step": 175940 }, { "epoch": 7.29, "grad_norm": 0.625, "learning_rate": 0.00043071723755639166, "loss": 0.1626, "step": 175950 }, { "epoch": 7.29, "grad_norm": 0.50390625, "learning_rate": 0.0004307097435177562, "loss": 0.2245, "step": 175960 }, { "epoch": 7.29, "grad_norm": 1.6171875, "learning_rate": 0.00043070224913904456, "loss": 0.2151, "step": 175970 }, { "epoch": 7.29, "grad_norm": 1.203125, "learning_rate": 0.0004306947544202708, "loss": 0.1756, "step": 175980 }, { "epoch": 7.29, "grad_norm": 0.67578125, "learning_rate": 0.00043068725936144906, "loss": 0.2138, "step": 175990 }, { "epoch": 7.29, "grad_norm": 0.703125, "learning_rate": 0.00043067976396259333, "loss": 0.2501, "step": 176000 }, { "epoch": 7.29, "grad_norm": 0.65625, "learning_rate": 0.00043067226822371787, "loss": 0.1814, "step": 176010 }, { "epoch": 7.29, "grad_norm": 0.64453125, "learning_rate": 0.0004306647721448366, "loss": 0.1694, "step": 176020 }, { "epoch": 7.29, "grad_norm": 0.5625, "learning_rate": 0.00043065727572596385, "loss": 0.1597, "step": 176030 }, { "epoch": 7.29, "grad_norm": 1.328125, "learning_rate": 0.0004306497789671136, "loss": 0.2517, "step": 176040 }, { "epoch": 7.29, "grad_norm": 0.55859375, "learning_rate": 0.0004306422818682999, "loss": 0.1765, "step": 176050 }, { "epoch": 7.29, "grad_norm": 0.72265625, "learning_rate": 0.000430634784429537, "loss": 0.2434, "step": 176060 }, { "epoch": 7.29, "grad_norm": 0.46484375, "learning_rate": 0.00043062728665083897, "loss": 0.2016, "step": 176070 }, { "epoch": 7.29, "grad_norm": 0.32421875, "learning_rate": 0.00043061978853221986, "loss": 0.2343, "step": 176080 }, { "epoch": 7.29, "grad_norm": 0.640625, "learning_rate": 0.0004306122900736938, "loss": 0.226, "step": 176090 }, { "epoch": 7.29, "grad_norm": 0.51171875, "learning_rate": 0.0004306047912752749, "loss": 0.1349, "step": 176100 }, { "epoch": 7.29, "grad_norm": 0.66015625, "learning_rate": 0.00043059729213697727, "loss": 0.2053, "step": 176110 }, { "epoch": 7.29, "grad_norm": 0.80078125, "learning_rate": 0.00043058979265881515, "loss": 0.2125, "step": 176120 }, { "epoch": 7.3, "grad_norm": 0.625, "learning_rate": 0.00043058229284080243, "loss": 0.1539, "step": 176130 }, { "epoch": 7.3, "grad_norm": 0.9296875, "learning_rate": 0.00043057479268295343, "loss": 0.1978, "step": 176140 }, { "epoch": 7.3, "grad_norm": 0.72265625, "learning_rate": 0.00043056729218528207, "loss": 0.2142, "step": 176150 }, { "epoch": 7.3, "grad_norm": 0.8046875, "learning_rate": 0.00043055979134780267, "loss": 0.176, "step": 176160 }, { "epoch": 7.3, "grad_norm": 0.3125, "learning_rate": 0.0004305522901705291, "loss": 0.2236, "step": 176170 }, { "epoch": 7.3, "grad_norm": 0.609375, "learning_rate": 0.00043054478865347574, "loss": 0.1734, "step": 176180 }, { "epoch": 7.3, "grad_norm": 1.125, "learning_rate": 0.00043053728679665656, "loss": 0.2183, "step": 176190 }, { "epoch": 7.3, "grad_norm": 0.71484375, "learning_rate": 0.00043052978460008566, "loss": 0.2513, "step": 176200 }, { "epoch": 7.3, "grad_norm": 1.265625, "learning_rate": 0.00043052228206377725, "loss": 0.2082, "step": 176210 }, { "epoch": 7.3, "grad_norm": 0.400390625, "learning_rate": 0.0004305147791877454, "loss": 0.2114, "step": 176220 }, { "epoch": 7.3, "grad_norm": 0.61328125, "learning_rate": 0.00043050727597200415, "loss": 0.1879, "step": 176230 }, { "epoch": 7.3, "grad_norm": 0.291015625, "learning_rate": 0.0004304997724165678, "loss": 0.1899, "step": 176240 }, { "epoch": 7.3, "grad_norm": 0.609375, "learning_rate": 0.0004304922685214503, "loss": 0.1766, "step": 176250 }, { "epoch": 7.3, "grad_norm": 0.7421875, "learning_rate": 0.0004304847642866659, "loss": 0.1593, "step": 176260 }, { "epoch": 7.3, "grad_norm": 0.80078125, "learning_rate": 0.00043047725971222863, "loss": 0.2212, "step": 176270 }, { "epoch": 7.3, "grad_norm": 1.1328125, "learning_rate": 0.00043046975479815264, "loss": 0.2273, "step": 176280 }, { "epoch": 7.3, "grad_norm": 0.53125, "learning_rate": 0.0004304622495444521, "loss": 0.236, "step": 176290 }, { "epoch": 7.3, "grad_norm": 0.376953125, "learning_rate": 0.000430454743951141, "loss": 0.2307, "step": 176300 }, { "epoch": 7.3, "grad_norm": 1.84375, "learning_rate": 0.00043044723801823374, "loss": 0.1974, "step": 176310 }, { "epoch": 7.3, "grad_norm": 0.87109375, "learning_rate": 0.0004304397317457441, "loss": 0.1987, "step": 176320 }, { "epoch": 7.3, "grad_norm": 1.0625, "learning_rate": 0.0004304322251336864, "loss": 0.2146, "step": 176330 }, { "epoch": 7.3, "grad_norm": 0.439453125, "learning_rate": 0.00043042471818207473, "loss": 0.2323, "step": 176340 }, { "epoch": 7.3, "grad_norm": 1.3203125, "learning_rate": 0.0004304172108909233, "loss": 0.2191, "step": 176350 }, { "epoch": 7.3, "grad_norm": 0.55078125, "learning_rate": 0.0004304097032602461, "loss": 0.2369, "step": 176360 }, { "epoch": 7.31, "grad_norm": 1.125, "learning_rate": 0.0004304021952900574, "loss": 0.2184, "step": 176370 }, { "epoch": 7.31, "grad_norm": 0.6015625, "learning_rate": 0.0004303946869803711, "loss": 0.2028, "step": 176380 }, { "epoch": 7.31, "grad_norm": 0.84765625, "learning_rate": 0.0004303871783312016, "loss": 0.1714, "step": 176390 }, { "epoch": 7.31, "grad_norm": 1.046875, "learning_rate": 0.0004303796693425629, "loss": 0.1925, "step": 176400 }, { "epoch": 7.31, "grad_norm": 0.53125, "learning_rate": 0.0004303721600144691, "loss": 0.1829, "step": 176410 }, { "epoch": 7.31, "grad_norm": 1.375, "learning_rate": 0.0004303646503469344, "loss": 0.2273, "step": 176420 }, { "epoch": 7.31, "grad_norm": 1.265625, "learning_rate": 0.00043035714033997285, "loss": 0.2138, "step": 176430 }, { "epoch": 7.31, "grad_norm": 0.400390625, "learning_rate": 0.00043034962999359875, "loss": 0.2122, "step": 176440 }, { "epoch": 7.31, "grad_norm": 0.92578125, "learning_rate": 0.000430342119307826, "loss": 0.1821, "step": 176450 }, { "epoch": 7.31, "grad_norm": 1.1640625, "learning_rate": 0.00043033460828266896, "loss": 0.2102, "step": 176460 }, { "epoch": 7.31, "grad_norm": 0.50390625, "learning_rate": 0.00043032709691814163, "loss": 0.1934, "step": 176470 }, { "epoch": 7.31, "grad_norm": 1.03125, "learning_rate": 0.00043031958521425815, "loss": 0.2045, "step": 176480 }, { "epoch": 7.31, "grad_norm": 0.74609375, "learning_rate": 0.00043031207317103273, "loss": 0.1973, "step": 176490 }, { "epoch": 7.31, "grad_norm": 0.5703125, "learning_rate": 0.0004303045607884794, "loss": 0.187, "step": 176500 }, { "epoch": 7.31, "grad_norm": 0.62890625, "learning_rate": 0.0004302970480666124, "loss": 0.2193, "step": 176510 }, { "epoch": 7.31, "grad_norm": 0.640625, "learning_rate": 0.00043028953500544575, "loss": 0.2134, "step": 176520 }, { "epoch": 7.31, "grad_norm": 0.828125, "learning_rate": 0.00043028202160499377, "loss": 0.1964, "step": 176530 }, { "epoch": 7.31, "grad_norm": 1.25, "learning_rate": 0.0004302745078652704, "loss": 0.2042, "step": 176540 }, { "epoch": 7.31, "grad_norm": 0.99609375, "learning_rate": 0.00043026699378628997, "loss": 0.2083, "step": 176550 }, { "epoch": 7.31, "grad_norm": 0.3515625, "learning_rate": 0.0004302594793680665, "loss": 0.1938, "step": 176560 }, { "epoch": 7.31, "grad_norm": 0.87890625, "learning_rate": 0.00043025196461061413, "loss": 0.1832, "step": 176570 }, { "epoch": 7.31, "grad_norm": 0.376953125, "learning_rate": 0.00043024444951394704, "loss": 0.2251, "step": 176580 }, { "epoch": 7.31, "grad_norm": 0.5234375, "learning_rate": 0.0004302369340780794, "loss": 0.215, "step": 176590 }, { "epoch": 7.31, "grad_norm": 0.6328125, "learning_rate": 0.0004302294183030252, "loss": 0.1612, "step": 176600 }, { "epoch": 7.32, "grad_norm": 0.3828125, "learning_rate": 0.0004302219021887988, "loss": 0.2005, "step": 176610 }, { "epoch": 7.32, "grad_norm": 0.470703125, "learning_rate": 0.0004302143857354142, "loss": 0.1948, "step": 176620 }, { "epoch": 7.32, "grad_norm": 1.0625, "learning_rate": 0.0004302068689428856, "loss": 0.1944, "step": 176630 }, { "epoch": 7.32, "grad_norm": 2.125, "learning_rate": 0.00043019935181122716, "loss": 0.2081, "step": 176640 }, { "epoch": 7.32, "grad_norm": 0.6953125, "learning_rate": 0.00043019183434045297, "loss": 0.1902, "step": 176650 }, { "epoch": 7.32, "grad_norm": 0.8359375, "learning_rate": 0.0004301843165305772, "loss": 0.2264, "step": 176660 }, { "epoch": 7.32, "grad_norm": 0.8828125, "learning_rate": 0.00043017679838161407, "loss": 0.2213, "step": 176670 }, { "epoch": 7.32, "grad_norm": 0.7578125, "learning_rate": 0.00043016927989357755, "loss": 0.196, "step": 176680 }, { "epoch": 7.32, "grad_norm": 1.2265625, "learning_rate": 0.00043016176106648195, "loss": 0.1906, "step": 176690 }, { "epoch": 7.32, "grad_norm": 1.4609375, "learning_rate": 0.00043015424190034137, "loss": 0.2234, "step": 176700 }, { "epoch": 7.32, "grad_norm": 0.0012054443359375, "learning_rate": 0.00043014672239517, "loss": 0.132, "step": 176710 }, { "epoch": 7.32, "grad_norm": 0.796875, "learning_rate": 0.0004301392025509819, "loss": 0.2325, "step": 176720 }, { "epoch": 7.32, "grad_norm": 0.6640625, "learning_rate": 0.00043013168236779133, "loss": 0.1902, "step": 176730 }, { "epoch": 7.32, "grad_norm": 0.8984375, "learning_rate": 0.0004301241618456123, "loss": 0.1944, "step": 176740 }, { "epoch": 7.32, "grad_norm": 0.1845703125, "learning_rate": 0.0004301166409844591, "loss": 0.2404, "step": 176750 }, { "epoch": 7.32, "grad_norm": 0.119140625, "learning_rate": 0.0004301091197843459, "loss": 0.1749, "step": 176760 }, { "epoch": 7.32, "grad_norm": 0.78515625, "learning_rate": 0.0004301015982452867, "loss": 0.1974, "step": 176770 }, { "epoch": 7.32, "grad_norm": 0.87109375, "learning_rate": 0.0004300940763672957, "loss": 0.1915, "step": 176780 }, { "epoch": 7.32, "grad_norm": 0.6328125, "learning_rate": 0.00043008655415038724, "loss": 0.1922, "step": 176790 }, { "epoch": 7.32, "grad_norm": 0.98046875, "learning_rate": 0.00043007903159457525, "loss": 0.1751, "step": 176800 }, { "epoch": 7.32, "grad_norm": 1.2734375, "learning_rate": 0.0004300715086998739, "loss": 0.2095, "step": 176810 }, { "epoch": 7.32, "grad_norm": 0.21875, "learning_rate": 0.00043006398546629755, "loss": 0.2183, "step": 176820 }, { "epoch": 7.32, "grad_norm": 0.71875, "learning_rate": 0.0004300564618938602, "loss": 0.1722, "step": 176830 }, { "epoch": 7.32, "grad_norm": 0.75, "learning_rate": 0.00043004893798257593, "loss": 0.195, "step": 176840 }, { "epoch": 7.33, "grad_norm": 0.82421875, "learning_rate": 0.0004300414137324591, "loss": 0.2567, "step": 176850 }, { "epoch": 7.33, "grad_norm": 0.51953125, "learning_rate": 0.0004300338891435237, "loss": 0.1738, "step": 176860 }, { "epoch": 7.33, "grad_norm": 0.38671875, "learning_rate": 0.000430026364215784, "loss": 0.1984, "step": 176870 }, { "epoch": 7.33, "grad_norm": 0.46875, "learning_rate": 0.0004300188389492542, "loss": 0.1745, "step": 176880 }, { "epoch": 7.33, "grad_norm": 0.66796875, "learning_rate": 0.0004300113133439483, "loss": 0.2076, "step": 176890 }, { "epoch": 7.33, "grad_norm": 0.63671875, "learning_rate": 0.0004300037873998806, "loss": 0.2292, "step": 176900 }, { "epoch": 7.33, "grad_norm": 0.796875, "learning_rate": 0.0004299962611170651, "loss": 0.2015, "step": 176910 }, { "epoch": 7.33, "grad_norm": 1.21875, "learning_rate": 0.00042998873449551613, "loss": 0.2359, "step": 176920 }, { "epoch": 7.33, "grad_norm": 0.625, "learning_rate": 0.00042998120753524785, "loss": 0.1966, "step": 176930 }, { "epoch": 7.33, "grad_norm": 0.81640625, "learning_rate": 0.0004299736802362743, "loss": 0.152, "step": 176940 }, { "epoch": 7.33, "grad_norm": 0.5390625, "learning_rate": 0.0004299661525986098, "loss": 0.151, "step": 176950 }, { "epoch": 7.33, "grad_norm": 0.7109375, "learning_rate": 0.0004299586246222684, "loss": 0.2236, "step": 176960 }, { "epoch": 7.33, "grad_norm": 0.8125, "learning_rate": 0.0004299510963072643, "loss": 0.1141, "step": 176970 }, { "epoch": 7.33, "grad_norm": 0.8984375, "learning_rate": 0.00042994356765361163, "loss": 0.222, "step": 176980 }, { "epoch": 7.33, "grad_norm": 0.474609375, "learning_rate": 0.00042993603866132464, "loss": 0.1786, "step": 176990 }, { "epoch": 7.33, "grad_norm": 0.39453125, "learning_rate": 0.00042992850933041746, "loss": 0.1996, "step": 177000 }, { "epoch": 7.33, "grad_norm": 0.58203125, "learning_rate": 0.00042992097966090424, "loss": 0.1846, "step": 177010 }, { "epoch": 7.33, "grad_norm": 0.6640625, "learning_rate": 0.0004299134496527992, "loss": 0.2283, "step": 177020 }, { "epoch": 7.33, "grad_norm": 0.7421875, "learning_rate": 0.00042990591930611643, "loss": 0.1886, "step": 177030 }, { "epoch": 7.33, "grad_norm": 0.63671875, "learning_rate": 0.00042989838862087014, "loss": 0.1205, "step": 177040 }, { "epoch": 7.33, "grad_norm": 0.5234375, "learning_rate": 0.00042989085759707456, "loss": 0.1823, "step": 177050 }, { "epoch": 7.33, "grad_norm": 0.59375, "learning_rate": 0.0004298833262347438, "loss": 0.2019, "step": 177060 }, { "epoch": 7.33, "grad_norm": 1.8828125, "learning_rate": 0.00042987579453389205, "loss": 0.2228, "step": 177070 }, { "epoch": 7.33, "grad_norm": 0.89453125, "learning_rate": 0.0004298682624945334, "loss": 0.1935, "step": 177080 }, { "epoch": 7.34, "grad_norm": 0.6953125, "learning_rate": 0.0004298607301166822, "loss": 0.2488, "step": 177090 }, { "epoch": 7.34, "grad_norm": 0.5234375, "learning_rate": 0.00042985319740035247, "loss": 0.1819, "step": 177100 }, { "epoch": 7.34, "grad_norm": 0.62109375, "learning_rate": 0.0004298456643455585, "loss": 0.2029, "step": 177110 }, { "epoch": 7.34, "grad_norm": 0.54296875, "learning_rate": 0.0004298381309523144, "loss": 0.2118, "step": 177120 }, { "epoch": 7.34, "grad_norm": 1.2421875, "learning_rate": 0.00042983059722063435, "loss": 0.2005, "step": 177130 }, { "epoch": 7.34, "grad_norm": 1.0078125, "learning_rate": 0.0004298230631505325, "loss": 0.208, "step": 177140 }, { "epoch": 7.34, "grad_norm": 0.4609375, "learning_rate": 0.0004298155287420231, "loss": 0.2013, "step": 177150 }, { "epoch": 7.34, "grad_norm": 0.41015625, "learning_rate": 0.00042980799399512025, "loss": 0.2173, "step": 177160 }, { "epoch": 7.34, "grad_norm": 0.76953125, "learning_rate": 0.00042980045890983823, "loss": 0.2235, "step": 177170 }, { "epoch": 7.34, "grad_norm": 1.1796875, "learning_rate": 0.00042979292348619113, "loss": 0.1869, "step": 177180 }, { "epoch": 7.34, "grad_norm": 0.828125, "learning_rate": 0.00042978538772419316, "loss": 0.2765, "step": 177190 }, { "epoch": 7.34, "grad_norm": 0.28515625, "learning_rate": 0.0004297778516238585, "loss": 0.2393, "step": 177200 }, { "epoch": 7.34, "grad_norm": 0.81640625, "learning_rate": 0.0004297703151852014, "loss": 0.207, "step": 177210 }, { "epoch": 7.34, "grad_norm": 0.70703125, "learning_rate": 0.00042976277840823596, "loss": 0.191, "step": 177220 }, { "epoch": 7.34, "grad_norm": 0.859375, "learning_rate": 0.0004297552412929764, "loss": 0.2254, "step": 177230 }, { "epoch": 7.34, "grad_norm": 0.67578125, "learning_rate": 0.0004297477038394368, "loss": 0.237, "step": 177240 }, { "epoch": 7.34, "grad_norm": 0.69140625, "learning_rate": 0.00042974016604763144, "loss": 0.1683, "step": 177250 }, { "epoch": 7.34, "grad_norm": 0.53125, "learning_rate": 0.0004297326279175746, "loss": 0.2277, "step": 177260 }, { "epoch": 7.34, "grad_norm": 0.765625, "learning_rate": 0.0004297250894492803, "loss": 0.1567, "step": 177270 }, { "epoch": 7.34, "grad_norm": 0.32421875, "learning_rate": 0.0004297175506427628, "loss": 0.1954, "step": 177280 }, { "epoch": 7.34, "grad_norm": 1.0859375, "learning_rate": 0.0004297100114980362, "loss": 0.2325, "step": 177290 }, { "epoch": 7.34, "grad_norm": 0.2294921875, "learning_rate": 0.00042970247201511494, "loss": 0.2125, "step": 177300 }, { "epoch": 7.34, "grad_norm": 0.65625, "learning_rate": 0.0004296949321940129, "loss": 0.2086, "step": 177310 }, { "epoch": 7.34, "grad_norm": 1.328125, "learning_rate": 0.00042968739203474445, "loss": 0.1625, "step": 177320 }, { "epoch": 7.34, "grad_norm": 0.470703125, "learning_rate": 0.0004296798515373237, "loss": 0.1756, "step": 177330 }, { "epoch": 7.35, "grad_norm": 1.0703125, "learning_rate": 0.00042967231070176486, "loss": 0.2234, "step": 177340 }, { "epoch": 7.35, "grad_norm": 0.93359375, "learning_rate": 0.0004296647695280822, "loss": 0.2511, "step": 177350 }, { "epoch": 7.35, "grad_norm": 0.87890625, "learning_rate": 0.00042965722801628983, "loss": 0.1935, "step": 177360 }, { "epoch": 7.35, "grad_norm": 0.5234375, "learning_rate": 0.0004296496861664019, "loss": 0.224, "step": 177370 }, { "epoch": 7.35, "grad_norm": 0.63671875, "learning_rate": 0.0004296421439784327, "loss": 0.1642, "step": 177380 }, { "epoch": 7.35, "grad_norm": 0.421875, "learning_rate": 0.00042963460145239643, "loss": 0.1832, "step": 177390 }, { "epoch": 7.35, "grad_norm": 1.4296875, "learning_rate": 0.0004296270585883072, "loss": 0.2044, "step": 177400 }, { "epoch": 7.35, "grad_norm": 0.333984375, "learning_rate": 0.00042961951538617916, "loss": 0.1907, "step": 177410 }, { "epoch": 7.35, "grad_norm": 0.796875, "learning_rate": 0.00042961197184602666, "loss": 0.2078, "step": 177420 }, { "epoch": 7.35, "grad_norm": 0.8125, "learning_rate": 0.00042960442796786394, "loss": 0.2515, "step": 177430 }, { "epoch": 7.35, "grad_norm": 0.8125, "learning_rate": 0.0004295968837517049, "loss": 0.1965, "step": 177440 }, { "epoch": 7.35, "grad_norm": 1.140625, "learning_rate": 0.000429589339197564, "loss": 0.2285, "step": 177450 }, { "epoch": 7.35, "grad_norm": 0.828125, "learning_rate": 0.00042958179430545535, "loss": 0.2252, "step": 177460 }, { "epoch": 7.35, "grad_norm": 1.8125, "learning_rate": 0.0004295742490753932, "loss": 0.193, "step": 177470 }, { "epoch": 7.35, "grad_norm": 0.52734375, "learning_rate": 0.00042956670350739165, "loss": 0.1992, "step": 177480 }, { "epoch": 7.35, "grad_norm": 0.69921875, "learning_rate": 0.0004295591576014649, "loss": 0.1523, "step": 177490 }, { "epoch": 7.35, "grad_norm": 0.3984375, "learning_rate": 0.0004295516113576273, "loss": 0.1825, "step": 177500 }, { "epoch": 7.35, "grad_norm": 1.046875, "learning_rate": 0.00042954406477589295, "loss": 0.2087, "step": 177510 }, { "epoch": 7.35, "grad_norm": 0.7109375, "learning_rate": 0.000429536517856276, "loss": 0.185, "step": 177520 }, { "epoch": 7.35, "grad_norm": 0.30078125, "learning_rate": 0.00042952897059879083, "loss": 0.2602, "step": 177530 }, { "epoch": 7.35, "grad_norm": 0.640625, "learning_rate": 0.0004295214230034514, "loss": 0.173, "step": 177540 }, { "epoch": 7.35, "grad_norm": 0.25390625, "learning_rate": 0.0004295138750702721, "loss": 0.1846, "step": 177550 }, { "epoch": 7.35, "grad_norm": 2.046875, "learning_rate": 0.000429506326799267, "loss": 0.1767, "step": 177560 }, { "epoch": 7.35, "grad_norm": 0.73828125, "learning_rate": 0.00042949877819045045, "loss": 0.2098, "step": 177570 }, { "epoch": 7.36, "grad_norm": 0.455078125, "learning_rate": 0.0004294912292438366, "loss": 0.1839, "step": 177580 }, { "epoch": 7.36, "grad_norm": 0.423828125, "learning_rate": 0.0004294836799594396, "loss": 0.1926, "step": 177590 }, { "epoch": 7.36, "grad_norm": 0.78515625, "learning_rate": 0.0004294761303372737, "loss": 0.2103, "step": 177600 }, { "epoch": 7.36, "grad_norm": 0.75, "learning_rate": 0.0004294685803773531, "loss": 0.1814, "step": 177610 }, { "epoch": 7.36, "grad_norm": 0.60546875, "learning_rate": 0.00042946103007969206, "loss": 0.2371, "step": 177620 }, { "epoch": 7.36, "grad_norm": 0.5625, "learning_rate": 0.0004294534794443047, "loss": 0.2406, "step": 177630 }, { "epoch": 7.36, "grad_norm": 0.9296875, "learning_rate": 0.00042944592847120533, "loss": 0.2293, "step": 177640 }, { "epoch": 7.36, "grad_norm": 0.984375, "learning_rate": 0.000429438377160408, "loss": 0.2061, "step": 177650 }, { "epoch": 7.36, "grad_norm": 0.515625, "learning_rate": 0.00042943082551192706, "loss": 0.2039, "step": 177660 }, { "epoch": 7.36, "grad_norm": 0.52734375, "learning_rate": 0.0004294232735257767, "loss": 0.2711, "step": 177670 }, { "epoch": 7.36, "grad_norm": 0.47265625, "learning_rate": 0.00042941572120197113, "loss": 0.2434, "step": 177680 }, { "epoch": 7.36, "grad_norm": 0.609375, "learning_rate": 0.0004294081685405246, "loss": 0.1856, "step": 177690 }, { "epoch": 7.36, "grad_norm": 0.0, "learning_rate": 0.00042940061554145117, "loss": 0.2047, "step": 177700 }, { "epoch": 7.36, "grad_norm": 0.95703125, "learning_rate": 0.00042939306220476513, "loss": 0.2269, "step": 177710 }, { "epoch": 7.36, "grad_norm": 1.3671875, "learning_rate": 0.0004293855085304808, "loss": 0.183, "step": 177720 }, { "epoch": 7.36, "grad_norm": 0.64453125, "learning_rate": 0.00042937795451861225, "loss": 0.1804, "step": 177730 }, { "epoch": 7.36, "grad_norm": 1.7109375, "learning_rate": 0.00042937040016917383, "loss": 0.2416, "step": 177740 }, { "epoch": 7.36, "grad_norm": 0.7421875, "learning_rate": 0.00042936284548217963, "loss": 0.196, "step": 177750 }, { "epoch": 7.36, "grad_norm": 0.8984375, "learning_rate": 0.00042935529045764396, "loss": 0.1828, "step": 177760 }, { "epoch": 7.36, "grad_norm": 0.44921875, "learning_rate": 0.000429347735095581, "loss": 0.189, "step": 177770 }, { "epoch": 7.36, "grad_norm": 0.921875, "learning_rate": 0.00042934017939600495, "loss": 0.2014, "step": 177780 }, { "epoch": 7.36, "grad_norm": 0.32421875, "learning_rate": 0.0004293326233589301, "loss": 0.1514, "step": 177790 }, { "epoch": 7.36, "grad_norm": 0.78125, "learning_rate": 0.00042932506698437057, "loss": 0.2286, "step": 177800 }, { "epoch": 7.36, "grad_norm": 0.396484375, "learning_rate": 0.0004293175102723406, "loss": 0.2165, "step": 177810 }, { "epoch": 7.37, "grad_norm": 0.7734375, "learning_rate": 0.0004293099532228545, "loss": 0.2092, "step": 177820 }, { "epoch": 7.37, "grad_norm": 1.3046875, "learning_rate": 0.00042930239583592643, "loss": 0.2093, "step": 177830 }, { "epoch": 7.37, "grad_norm": 0.94140625, "learning_rate": 0.00042929483811157053, "loss": 0.2208, "step": 177840 }, { "epoch": 7.37, "grad_norm": 0.52734375, "learning_rate": 0.0004292872800498012, "loss": 0.2162, "step": 177850 }, { "epoch": 7.37, "grad_norm": 0.7578125, "learning_rate": 0.00042927972165063256, "loss": 0.2686, "step": 177860 }, { "epoch": 7.37, "grad_norm": 1.0078125, "learning_rate": 0.00042927216291407884, "loss": 0.1617, "step": 177870 }, { "epoch": 7.37, "grad_norm": 1.921875, "learning_rate": 0.00042926460384015426, "loss": 0.2461, "step": 177880 }, { "epoch": 7.37, "grad_norm": 0.91796875, "learning_rate": 0.000429257044428873, "loss": 0.2058, "step": 177890 }, { "epoch": 7.37, "grad_norm": 1.2578125, "learning_rate": 0.0004292494846802494, "loss": 0.2556, "step": 177900 }, { "epoch": 7.37, "grad_norm": 1.3828125, "learning_rate": 0.0004292419245942977, "loss": 0.2146, "step": 177910 }, { "epoch": 7.37, "grad_norm": 0.9453125, "learning_rate": 0.0004292343641710319, "loss": 0.1858, "step": 177920 }, { "epoch": 7.37, "grad_norm": 0.46875, "learning_rate": 0.0004292268034104665, "loss": 0.1771, "step": 177930 }, { "epoch": 7.37, "grad_norm": 0.09423828125, "learning_rate": 0.00042921924231261555, "loss": 0.2196, "step": 177940 }, { "epoch": 7.37, "grad_norm": 1.125, "learning_rate": 0.0004292116808774934, "loss": 0.2175, "step": 177950 }, { "epoch": 7.37, "grad_norm": 0.9765625, "learning_rate": 0.00042920411910511415, "loss": 0.2248, "step": 177960 }, { "epoch": 7.37, "grad_norm": 0.80859375, "learning_rate": 0.0004291965569954921, "loss": 0.2376, "step": 177970 }, { "epoch": 7.37, "grad_norm": 0.921875, "learning_rate": 0.0004291889945486415, "loss": 0.1868, "step": 177980 }, { "epoch": 7.37, "grad_norm": 1.9609375, "learning_rate": 0.00042918143176457656, "loss": 0.2168, "step": 177990 }, { "epoch": 7.37, "grad_norm": 0.703125, "learning_rate": 0.00042917386864331156, "loss": 0.1565, "step": 178000 }, { "epoch": 7.37, "grad_norm": 0.71484375, "learning_rate": 0.00042916630518486065, "loss": 0.1848, "step": 178010 }, { "epoch": 7.37, "grad_norm": 0.60546875, "learning_rate": 0.0004291587413892381, "loss": 0.21, "step": 178020 }, { "epoch": 7.37, "grad_norm": 0.6796875, "learning_rate": 0.00042915117725645817, "loss": 0.2075, "step": 178030 }, { "epoch": 7.37, "grad_norm": 0.56640625, "learning_rate": 0.000429143612786535, "loss": 0.1766, "step": 178040 }, { "epoch": 7.37, "grad_norm": 0.625, "learning_rate": 0.000429136047979483, "loss": 0.2278, "step": 178050 }, { "epoch": 7.38, "grad_norm": 0.93359375, "learning_rate": 0.00042912848283531625, "loss": 0.2, "step": 178060 }, { "epoch": 7.38, "grad_norm": 0.92578125, "learning_rate": 0.00042912091735404907, "loss": 0.1788, "step": 178070 }, { "epoch": 7.38, "grad_norm": 0.7578125, "learning_rate": 0.00042911335153569564, "loss": 0.2223, "step": 178080 }, { "epoch": 7.38, "grad_norm": 0.5390625, "learning_rate": 0.00042910578538027025, "loss": 0.1886, "step": 178090 }, { "epoch": 7.38, "grad_norm": 0.68359375, "learning_rate": 0.00042909821888778706, "loss": 0.1865, "step": 178100 }, { "epoch": 7.38, "grad_norm": 0.5859375, "learning_rate": 0.0004290906520582604, "loss": 0.2406, "step": 178110 }, { "epoch": 7.38, "grad_norm": 0.73046875, "learning_rate": 0.0004290830848917044, "loss": 0.1732, "step": 178120 }, { "epoch": 7.38, "grad_norm": 0.625, "learning_rate": 0.00042907551738813343, "loss": 0.1857, "step": 178130 }, { "epoch": 7.38, "grad_norm": 1.2578125, "learning_rate": 0.0004290679495475617, "loss": 0.1922, "step": 178140 }, { "epoch": 7.38, "grad_norm": 0.99609375, "learning_rate": 0.0004290603813700035, "loss": 0.2299, "step": 178150 }, { "epoch": 7.38, "grad_norm": 0.43359375, "learning_rate": 0.00042905281285547284, "loss": 0.208, "step": 178160 }, { "epoch": 7.38, "grad_norm": 1.5390625, "learning_rate": 0.00042904524400398414, "loss": 0.2341, "step": 178170 }, { "epoch": 7.38, "grad_norm": 0.765625, "learning_rate": 0.00042903767481555175, "loss": 0.1804, "step": 178180 }, { "epoch": 7.38, "grad_norm": 0.33203125, "learning_rate": 0.0004290301052901897, "loss": 0.2114, "step": 178190 }, { "epoch": 7.38, "grad_norm": 0.376953125, "learning_rate": 0.0004290225354279124, "loss": 0.1852, "step": 178200 }, { "epoch": 7.38, "grad_norm": 0.2001953125, "learning_rate": 0.0004290149652287339, "loss": 0.2019, "step": 178210 }, { "epoch": 7.38, "grad_norm": 0.8671875, "learning_rate": 0.0004290073946926686, "loss": 0.1854, "step": 178220 }, { "epoch": 7.38, "grad_norm": 0.50390625, "learning_rate": 0.0004289998238197308, "loss": 0.2165, "step": 178230 }, { "epoch": 7.38, "grad_norm": 1.5078125, "learning_rate": 0.0004289922526099346, "loss": 0.2256, "step": 178240 }, { "epoch": 7.38, "grad_norm": 0.412109375, "learning_rate": 0.0004289846810632943, "loss": 0.1885, "step": 178250 }, { "epoch": 7.38, "grad_norm": 0.6796875, "learning_rate": 0.0004289771091798241, "loss": 0.2062, "step": 178260 }, { "epoch": 7.38, "grad_norm": 0.62890625, "learning_rate": 0.00042896953695953843, "loss": 0.1986, "step": 178270 }, { "epoch": 7.38, "grad_norm": 3.484375, "learning_rate": 0.0004289619644024514, "loss": 0.2573, "step": 178280 }, { "epoch": 7.38, "grad_norm": 1.15625, "learning_rate": 0.0004289543915085773, "loss": 0.1613, "step": 178290 }, { "epoch": 7.39, "grad_norm": 0.73828125, "learning_rate": 0.00042894681827793026, "loss": 0.2024, "step": 178300 }, { "epoch": 7.39, "grad_norm": 0.5703125, "learning_rate": 0.0004289392447105247, "loss": 0.1958, "step": 178310 }, { "epoch": 7.39, "grad_norm": 0.33203125, "learning_rate": 0.0004289316708063748, "loss": 0.24, "step": 178320 }, { "epoch": 7.39, "grad_norm": 0.7421875, "learning_rate": 0.0004289240965654948, "loss": 0.1812, "step": 178330 }, { "epoch": 7.39, "grad_norm": 0.4453125, "learning_rate": 0.000428916521987899, "loss": 0.2238, "step": 178340 }, { "epoch": 7.39, "grad_norm": 0.34375, "learning_rate": 0.0004289089470736016, "loss": 0.1593, "step": 178350 }, { "epoch": 7.39, "grad_norm": 1.015625, "learning_rate": 0.0004289013718226169, "loss": 0.1878, "step": 178360 }, { "epoch": 7.39, "grad_norm": 1.0703125, "learning_rate": 0.0004288937962349591, "loss": 0.2424, "step": 178370 }, { "epoch": 7.39, "grad_norm": 1.0390625, "learning_rate": 0.0004288862203106425, "loss": 0.2598, "step": 178380 }, { "epoch": 7.39, "grad_norm": 0.5546875, "learning_rate": 0.0004288786440496814, "loss": 0.2202, "step": 178390 }, { "epoch": 7.39, "grad_norm": 0.6171875, "learning_rate": 0.00042887106745208997, "loss": 0.2228, "step": 178400 }, { "epoch": 7.39, "grad_norm": 1.703125, "learning_rate": 0.00042886349051788253, "loss": 0.182, "step": 178410 }, { "epoch": 7.39, "grad_norm": 1.09375, "learning_rate": 0.0004288559132470733, "loss": 0.1852, "step": 178420 }, { "epoch": 7.39, "grad_norm": 0.69140625, "learning_rate": 0.00042884833563967655, "loss": 0.2255, "step": 178430 }, { "epoch": 7.39, "grad_norm": 1.109375, "learning_rate": 0.0004288407576957066, "loss": 0.1987, "step": 178440 }, { "epoch": 7.39, "grad_norm": 0.03857421875, "learning_rate": 0.00042883317941517764, "loss": 0.1679, "step": 178450 }, { "epoch": 7.39, "grad_norm": 1.6640625, "learning_rate": 0.00042882560079810384, "loss": 0.2033, "step": 178460 }, { "epoch": 7.39, "grad_norm": 2.28125, "learning_rate": 0.0004288180218444997, "loss": 0.2227, "step": 178470 }, { "epoch": 7.39, "grad_norm": 0.423828125, "learning_rate": 0.0004288104425543793, "loss": 0.1868, "step": 178480 }, { "epoch": 7.39, "grad_norm": 0.515625, "learning_rate": 0.00042880286292775697, "loss": 0.2145, "step": 178490 }, { "epoch": 7.39, "grad_norm": 0.8125, "learning_rate": 0.000428795282964647, "loss": 0.1913, "step": 178500 }, { "epoch": 7.39, "grad_norm": 0.326171875, "learning_rate": 0.00042878770266506353, "loss": 0.2139, "step": 178510 }, { "epoch": 7.39, "grad_norm": 0.94140625, "learning_rate": 0.000428780122029021, "loss": 0.1938, "step": 178520 }, { "epoch": 7.39, "grad_norm": 1.203125, "learning_rate": 0.0004287725410565335, "loss": 0.1988, "step": 178530 }, { "epoch": 7.4, "grad_norm": 0.890625, "learning_rate": 0.0004287649597476154, "loss": 0.2136, "step": 178540 }, { "epoch": 7.4, "grad_norm": 0.2099609375, "learning_rate": 0.00042875737810228097, "loss": 0.229, "step": 178550 }, { "epoch": 7.4, "grad_norm": 0.73046875, "learning_rate": 0.0004287497961205445, "loss": 0.1621, "step": 178560 }, { "epoch": 7.4, "grad_norm": 0.640625, "learning_rate": 0.0004287422138024202, "loss": 0.1579, "step": 178570 }, { "epoch": 7.4, "grad_norm": 0.65625, "learning_rate": 0.00042873463114792234, "loss": 0.1755, "step": 178580 }, { "epoch": 7.4, "grad_norm": 0.359375, "learning_rate": 0.0004287270481570652, "loss": 0.1977, "step": 178590 }, { "epoch": 7.4, "grad_norm": 1.3515625, "learning_rate": 0.000428719464829863, "loss": 0.2277, "step": 178600 }, { "epoch": 7.4, "grad_norm": 0.5546875, "learning_rate": 0.0004287118811663302, "loss": 0.1857, "step": 178610 }, { "epoch": 7.4, "grad_norm": 0.388671875, "learning_rate": 0.00042870429716648084, "loss": 0.229, "step": 178620 }, { "epoch": 7.4, "grad_norm": 0.62890625, "learning_rate": 0.00042869671283032927, "loss": 0.1868, "step": 178630 }, { "epoch": 7.4, "grad_norm": 2.359375, "learning_rate": 0.00042868912815788985, "loss": 0.2226, "step": 178640 }, { "epoch": 7.4, "grad_norm": 0.5859375, "learning_rate": 0.00042868154314917677, "loss": 0.1904, "step": 178650 }, { "epoch": 7.4, "grad_norm": 1.25, "learning_rate": 0.0004286739578042043, "loss": 0.2323, "step": 178660 }, { "epoch": 7.4, "grad_norm": 1.2890625, "learning_rate": 0.00042866637212298674, "loss": 0.2011, "step": 178670 }, { "epoch": 7.4, "grad_norm": 0.65625, "learning_rate": 0.0004286587861055384, "loss": 0.2225, "step": 178680 }, { "epoch": 7.4, "grad_norm": 2.515625, "learning_rate": 0.0004286511997518735, "loss": 0.2089, "step": 178690 }, { "epoch": 7.4, "grad_norm": 0.2578125, "learning_rate": 0.00042864361306200636, "loss": 0.1918, "step": 178700 }, { "epoch": 7.4, "grad_norm": 0.2294921875, "learning_rate": 0.0004286360260359512, "loss": 0.2416, "step": 178710 }, { "epoch": 7.4, "grad_norm": 0.84765625, "learning_rate": 0.00042862843867372236, "loss": 0.2364, "step": 178720 }, { "epoch": 7.4, "grad_norm": 1.421875, "learning_rate": 0.000428620850975334, "loss": 0.2069, "step": 178730 }, { "epoch": 7.4, "grad_norm": 1.46875, "learning_rate": 0.0004286132629408006, "loss": 0.165, "step": 178740 }, { "epoch": 7.4, "grad_norm": 1.109375, "learning_rate": 0.0004286056745701363, "loss": 0.1845, "step": 178750 }, { "epoch": 7.4, "grad_norm": 0.43359375, "learning_rate": 0.00042859808586335536, "loss": 0.2462, "step": 178760 }, { "epoch": 7.4, "grad_norm": 0.890625, "learning_rate": 0.00042859049682047215, "loss": 0.217, "step": 178770 }, { "epoch": 7.41, "grad_norm": 0.54296875, "learning_rate": 0.00042858290744150084, "loss": 0.1748, "step": 178780 }, { "epoch": 7.41, "grad_norm": 0.92578125, "learning_rate": 0.00042857531772645587, "loss": 0.215, "step": 178790 }, { "epoch": 7.41, "grad_norm": 0.734375, "learning_rate": 0.00042856772767535143, "loss": 0.2381, "step": 178800 }, { "epoch": 7.41, "grad_norm": 0.71875, "learning_rate": 0.0004285601372882018, "loss": 0.1777, "step": 178810 }, { "epoch": 7.41, "grad_norm": 0.50390625, "learning_rate": 0.0004285525465650212, "loss": 0.2031, "step": 178820 }, { "epoch": 7.41, "grad_norm": 0.6640625, "learning_rate": 0.0004285449555058241, "loss": 0.1931, "step": 178830 }, { "epoch": 7.41, "grad_norm": 1.0078125, "learning_rate": 0.0004285373641106246, "loss": 0.2018, "step": 178840 }, { "epoch": 7.41, "grad_norm": 0.953125, "learning_rate": 0.000428529772379437, "loss": 0.1635, "step": 178850 }, { "epoch": 7.41, "grad_norm": 0.212890625, "learning_rate": 0.0004285221803122758, "loss": 0.2354, "step": 178860 }, { "epoch": 7.41, "grad_norm": 0.96875, "learning_rate": 0.00042851458790915507, "loss": 0.2193, "step": 178870 }, { "epoch": 7.41, "grad_norm": 0.71875, "learning_rate": 0.0004285069951700892, "loss": 0.2144, "step": 178880 }, { "epoch": 7.41, "grad_norm": 0.765625, "learning_rate": 0.00042849940209509235, "loss": 0.2257, "step": 178890 }, { "epoch": 7.41, "grad_norm": 1.109375, "learning_rate": 0.00042849180868417895, "loss": 0.1937, "step": 178900 }, { "epoch": 7.41, "grad_norm": 0.36328125, "learning_rate": 0.0004284842149373633, "loss": 0.213, "step": 178910 }, { "epoch": 7.41, "grad_norm": 0.8828125, "learning_rate": 0.0004284766208546596, "loss": 0.1813, "step": 178920 }, { "epoch": 7.41, "grad_norm": 0.7109375, "learning_rate": 0.0004284690264360821, "loss": 0.2121, "step": 178930 }, { "epoch": 7.41, "grad_norm": 0.63671875, "learning_rate": 0.00042846143168164517, "loss": 0.2563, "step": 178940 }, { "epoch": 7.41, "grad_norm": 0.462890625, "learning_rate": 0.0004284538365913632, "loss": 0.1784, "step": 178950 }, { "epoch": 7.41, "grad_norm": 0.255859375, "learning_rate": 0.00042844624116525034, "loss": 0.2137, "step": 178960 }, { "epoch": 7.41, "grad_norm": 0.80078125, "learning_rate": 0.0004284386454033209, "loss": 0.1814, "step": 178970 }, { "epoch": 7.41, "grad_norm": 0.7734375, "learning_rate": 0.0004284310493055892, "loss": 0.1491, "step": 178980 }, { "epoch": 7.41, "grad_norm": 0.69140625, "learning_rate": 0.0004284234528720696, "loss": 0.2358, "step": 178990 }, { "epoch": 7.41, "grad_norm": 0.3828125, "learning_rate": 0.0004284158561027762, "loss": 0.1951, "step": 179000 }, { "epoch": 7.41, "grad_norm": 0.859375, "learning_rate": 0.0004284082589977235, "loss": 0.2331, "step": 179010 }, { "epoch": 7.41, "grad_norm": 0.8046875, "learning_rate": 0.00042840066155692573, "loss": 0.1747, "step": 179020 }, { "epoch": 7.42, "grad_norm": 0.310546875, "learning_rate": 0.0004283930637803972, "loss": 0.1842, "step": 179030 }, { "epoch": 7.42, "grad_norm": 0.462890625, "learning_rate": 0.0004283854656681522, "loss": 0.2319, "step": 179040 }, { "epoch": 7.42, "grad_norm": 0.392578125, "learning_rate": 0.00042837786722020496, "loss": 0.2297, "step": 179050 }, { "epoch": 7.42, "grad_norm": 0.396484375, "learning_rate": 0.00042837026843656987, "loss": 0.1898, "step": 179060 }, { "epoch": 7.42, "grad_norm": 1.28125, "learning_rate": 0.0004283626693172612, "loss": 0.1658, "step": 179070 }, { "epoch": 7.42, "grad_norm": 0.197265625, "learning_rate": 0.0004283550698622932, "loss": 0.1734, "step": 179080 }, { "epoch": 7.42, "grad_norm": 0.75390625, "learning_rate": 0.00042834747007168024, "loss": 0.249, "step": 179090 }, { "epoch": 7.42, "grad_norm": 1.140625, "learning_rate": 0.0004283398699454366, "loss": 0.2092, "step": 179100 }, { "epoch": 7.42, "grad_norm": 0.46875, "learning_rate": 0.00042833226948357664, "loss": 0.2277, "step": 179110 }, { "epoch": 7.42, "grad_norm": 1.7734375, "learning_rate": 0.0004283246686861145, "loss": 0.1991, "step": 179120 }, { "epoch": 7.42, "grad_norm": 0.451171875, "learning_rate": 0.00042831706755306465, "loss": 0.1653, "step": 179130 }, { "epoch": 7.42, "grad_norm": 0.451171875, "learning_rate": 0.00042830946608444137, "loss": 0.1899, "step": 179140 }, { "epoch": 7.42, "grad_norm": 0.8203125, "learning_rate": 0.0004283018642802589, "loss": 0.2087, "step": 179150 }, { "epoch": 7.42, "grad_norm": 0.7421875, "learning_rate": 0.0004282942621405316, "loss": 0.1937, "step": 179160 }, { "epoch": 7.42, "grad_norm": 0.7578125, "learning_rate": 0.0004282866596652737, "loss": 0.1394, "step": 179170 }, { "epoch": 7.42, "grad_norm": 0.81640625, "learning_rate": 0.00042827905685449957, "loss": 0.2416, "step": 179180 }, { "epoch": 7.42, "grad_norm": 0.4453125, "learning_rate": 0.0004282714537082236, "loss": 0.168, "step": 179190 }, { "epoch": 7.42, "grad_norm": 0.255859375, "learning_rate": 0.00042826385022645984, "loss": 0.1312, "step": 179200 }, { "epoch": 7.42, "grad_norm": 0.92578125, "learning_rate": 0.0004282562464092229, "loss": 0.182, "step": 179210 }, { "epoch": 7.42, "grad_norm": 0.57421875, "learning_rate": 0.00042824864225652693, "loss": 0.239, "step": 179220 }, { "epoch": 7.42, "grad_norm": 0.296875, "learning_rate": 0.00042824103776838623, "loss": 0.2234, "step": 179230 }, { "epoch": 7.42, "grad_norm": 1.3359375, "learning_rate": 0.00042823343294481523, "loss": 0.2063, "step": 179240 }, { "epoch": 7.42, "grad_norm": 0.51953125, "learning_rate": 0.000428225827785828, "loss": 0.2952, "step": 179250 }, { "epoch": 7.42, "grad_norm": 0.97265625, "learning_rate": 0.0004282182222914391, "loss": 0.1795, "step": 179260 }, { "epoch": 7.43, "grad_norm": 0.9921875, "learning_rate": 0.00042821061646166274, "loss": 0.1748, "step": 179270 }, { "epoch": 7.43, "grad_norm": 0.60546875, "learning_rate": 0.0004282030102965133, "loss": 0.19, "step": 179280 }, { "epoch": 7.43, "grad_norm": 1.640625, "learning_rate": 0.00042819540379600496, "loss": 0.2194, "step": 179290 }, { "epoch": 7.43, "grad_norm": 0.353515625, "learning_rate": 0.0004281877969601522, "loss": 0.2827, "step": 179300 }, { "epoch": 7.43, "grad_norm": 0.439453125, "learning_rate": 0.00042818018978896916, "loss": 0.2475, "step": 179310 }, { "epoch": 7.43, "grad_norm": 0.91015625, "learning_rate": 0.0004281725822824702, "loss": 0.2039, "step": 179320 }, { "epoch": 7.43, "grad_norm": 0.99609375, "learning_rate": 0.00042816497444066984, "loss": 0.1586, "step": 179330 }, { "epoch": 7.43, "grad_norm": 0.50390625, "learning_rate": 0.00042815736626358215, "loss": 0.2418, "step": 179340 }, { "epoch": 7.43, "grad_norm": 2.484375, "learning_rate": 0.00042814975775122153, "loss": 0.1776, "step": 179350 }, { "epoch": 7.43, "grad_norm": 0.796875, "learning_rate": 0.00042814214890360235, "loss": 0.2032, "step": 179360 }, { "epoch": 7.43, "grad_norm": 0.6953125, "learning_rate": 0.00042813453972073885, "loss": 0.1675, "step": 179370 }, { "epoch": 7.43, "grad_norm": 0.2333984375, "learning_rate": 0.00042812693020264535, "loss": 0.2203, "step": 179380 }, { "epoch": 7.43, "grad_norm": 1.421875, "learning_rate": 0.0004281193203493363, "loss": 0.218, "step": 179390 }, { "epoch": 7.43, "grad_norm": 0.66015625, "learning_rate": 0.0004281117101608258, "loss": 0.1903, "step": 179400 }, { "epoch": 7.43, "grad_norm": 0.3984375, "learning_rate": 0.00042810409963712836, "loss": 0.1536, "step": 179410 }, { "epoch": 7.43, "grad_norm": 0.44140625, "learning_rate": 0.00042809648877825825, "loss": 0.156, "step": 179420 }, { "epoch": 7.43, "grad_norm": 0.322265625, "learning_rate": 0.00042808887758422976, "loss": 0.2174, "step": 179430 }, { "epoch": 7.43, "grad_norm": 0.625, "learning_rate": 0.00042808126605505724, "loss": 0.2145, "step": 179440 }, { "epoch": 7.43, "grad_norm": 0.0, "learning_rate": 0.00042807365419075507, "loss": 0.1951, "step": 179450 }, { "epoch": 7.43, "grad_norm": 0.5, "learning_rate": 0.00042806604199133744, "loss": 0.2128, "step": 179460 }, { "epoch": 7.43, "grad_norm": 0.84375, "learning_rate": 0.0004280584294568187, "loss": 0.2261, "step": 179470 }, { "epoch": 7.43, "grad_norm": 0.34375, "learning_rate": 0.00042805081658721334, "loss": 0.2102, "step": 179480 }, { "epoch": 7.43, "grad_norm": 0.625, "learning_rate": 0.0004280432033825355, "loss": 0.1717, "step": 179490 }, { "epoch": 7.43, "grad_norm": 1.1484375, "learning_rate": 0.0004280355898427996, "loss": 0.1872, "step": 179500 }, { "epoch": 7.44, "grad_norm": 0.80078125, "learning_rate": 0.00042802797596802, "loss": 0.2275, "step": 179510 }, { "epoch": 7.44, "grad_norm": 0.54296875, "learning_rate": 0.00042802036175821083, "loss": 0.1673, "step": 179520 }, { "epoch": 7.44, "grad_norm": 1.109375, "learning_rate": 0.00042801274721338667, "loss": 0.1588, "step": 179530 }, { "epoch": 7.44, "grad_norm": 0.23046875, "learning_rate": 0.00042800513233356177, "loss": 0.1477, "step": 179540 }, { "epoch": 7.44, "grad_norm": 0.578125, "learning_rate": 0.00042799751711875033, "loss": 0.2399, "step": 179550 }, { "epoch": 7.44, "grad_norm": 0.98046875, "learning_rate": 0.0004279899015689669, "loss": 0.2054, "step": 179560 }, { "epoch": 7.44, "grad_norm": 0.5078125, "learning_rate": 0.0004279822856842256, "loss": 0.161, "step": 179570 }, { "epoch": 7.44, "grad_norm": 0.9375, "learning_rate": 0.0004279746694645409, "loss": 0.2312, "step": 179580 }, { "epoch": 7.44, "grad_norm": 0.625, "learning_rate": 0.0004279670529099271, "loss": 0.2123, "step": 179590 }, { "epoch": 7.44, "grad_norm": 0.7890625, "learning_rate": 0.00042795943602039855, "loss": 0.1828, "step": 179600 }, { "epoch": 7.44, "grad_norm": 0.453125, "learning_rate": 0.00042795181879596943, "loss": 0.2394, "step": 179610 }, { "epoch": 7.44, "grad_norm": 0.84765625, "learning_rate": 0.00042794420123665433, "loss": 0.1313, "step": 179620 }, { "epoch": 7.44, "grad_norm": 0.57421875, "learning_rate": 0.00042793658334246745, "loss": 0.1931, "step": 179630 }, { "epoch": 7.44, "grad_norm": 0.337890625, "learning_rate": 0.0004279289651134231, "loss": 0.2081, "step": 179640 }, { "epoch": 7.44, "grad_norm": 0.6875, "learning_rate": 0.0004279213465495357, "loss": 0.2302, "step": 179650 }, { "epoch": 7.44, "grad_norm": 0.71875, "learning_rate": 0.00042791372765081946, "loss": 0.2283, "step": 179660 }, { "epoch": 7.44, "grad_norm": 0.87109375, "learning_rate": 0.0004279061084172889, "loss": 0.237, "step": 179670 }, { "epoch": 7.44, "grad_norm": 0.94921875, "learning_rate": 0.00042789848884895824, "loss": 0.1709, "step": 179680 }, { "epoch": 7.44, "grad_norm": 1.625, "learning_rate": 0.0004278908689458417, "loss": 0.1892, "step": 179690 }, { "epoch": 7.44, "grad_norm": 1.1640625, "learning_rate": 0.0004278832487079539, "loss": 0.2078, "step": 179700 }, { "epoch": 7.44, "grad_norm": 0.9609375, "learning_rate": 0.000427875628135309, "loss": 0.2058, "step": 179710 }, { "epoch": 7.44, "grad_norm": 0.52734375, "learning_rate": 0.0004278680072279213, "loss": 0.1875, "step": 179720 }, { "epoch": 7.44, "grad_norm": 0.56640625, "learning_rate": 0.0004278603859858053, "loss": 0.2008, "step": 179730 }, { "epoch": 7.44, "grad_norm": 0.53515625, "learning_rate": 0.00042785276440897524, "loss": 0.2229, "step": 179740 }, { "epoch": 7.45, "grad_norm": 0.70703125, "learning_rate": 0.0004278451424974455, "loss": 0.2199, "step": 179750 }, { "epoch": 7.45, "grad_norm": 0.47265625, "learning_rate": 0.00042783752025123036, "loss": 0.2318, "step": 179760 }, { "epoch": 7.45, "grad_norm": 0.8671875, "learning_rate": 0.0004278298976703443, "loss": 0.2341, "step": 179770 }, { "epoch": 7.45, "grad_norm": 0.384765625, "learning_rate": 0.0004278222747548015, "loss": 0.2134, "step": 179780 }, { "epoch": 7.45, "grad_norm": 0.63671875, "learning_rate": 0.0004278146515046164, "loss": 0.2245, "step": 179790 }, { "epoch": 7.45, "grad_norm": 0.953125, "learning_rate": 0.0004278070279198033, "loss": 0.1928, "step": 179800 }, { "epoch": 7.45, "grad_norm": 1.0078125, "learning_rate": 0.00042779940400037664, "loss": 0.1802, "step": 179810 }, { "epoch": 7.45, "grad_norm": 0.369140625, "learning_rate": 0.0004277917797463506, "loss": 0.1814, "step": 179820 }, { "epoch": 7.45, "grad_norm": 0.8984375, "learning_rate": 0.00042778415515773975, "loss": 0.2054, "step": 179830 }, { "epoch": 7.45, "grad_norm": 1.0703125, "learning_rate": 0.0004277765302345583, "loss": 0.2247, "step": 179840 }, { "epoch": 7.45, "grad_norm": 0.435546875, "learning_rate": 0.0004277689049768205, "loss": 0.2008, "step": 179850 }, { "epoch": 7.45, "grad_norm": 0.85546875, "learning_rate": 0.00042776127938454103, "loss": 0.2002, "step": 179860 }, { "epoch": 7.45, "grad_norm": 0.77734375, "learning_rate": 0.0004277536534577339, "loss": 0.1624, "step": 179870 }, { "epoch": 7.45, "grad_norm": 1.4921875, "learning_rate": 0.00042774602719641353, "loss": 0.1839, "step": 179880 }, { "epoch": 7.45, "grad_norm": 1.1328125, "learning_rate": 0.00042773840060059446, "loss": 0.1624, "step": 179890 }, { "epoch": 7.45, "grad_norm": 0.97265625, "learning_rate": 0.0004277307736702908, "loss": 0.2493, "step": 179900 }, { "epoch": 7.45, "grad_norm": 0.796875, "learning_rate": 0.000427723146405517, "loss": 0.1809, "step": 179910 }, { "epoch": 7.45, "grad_norm": 0.6484375, "learning_rate": 0.00042771551880628754, "loss": 0.223, "step": 179920 }, { "epoch": 7.45, "grad_norm": 1.078125, "learning_rate": 0.00042770789087261666, "loss": 0.2232, "step": 179930 }, { "epoch": 7.45, "grad_norm": 1.0390625, "learning_rate": 0.0004277002626045187, "loss": 0.2022, "step": 179940 }, { "epoch": 7.45, "grad_norm": 0.4140625, "learning_rate": 0.000427692634002008, "loss": 0.209, "step": 179950 }, { "epoch": 7.45, "grad_norm": 0.5625, "learning_rate": 0.0004276850050650989, "loss": 0.2397, "step": 179960 }, { "epoch": 7.45, "grad_norm": 0.412109375, "learning_rate": 0.00042767737579380596, "loss": 0.2339, "step": 179970 }, { "epoch": 7.45, "grad_norm": 1.21875, "learning_rate": 0.00042766974618814327, "loss": 0.2032, "step": 179980 }, { "epoch": 7.46, "grad_norm": 0.80078125, "learning_rate": 0.00042766211624812537, "loss": 0.1439, "step": 179990 }, { "epoch": 7.46, "grad_norm": 0.703125, "learning_rate": 0.0004276544859737665, "loss": 0.2486, "step": 180000 }, { "epoch": 7.46, "grad_norm": 0.37890625, "learning_rate": 0.00042764685536508107, "loss": 0.2027, "step": 180010 }, { "epoch": 7.46, "grad_norm": 0.6328125, "learning_rate": 0.00042763922442208346, "loss": 0.2054, "step": 180020 }, { "epoch": 7.46, "grad_norm": 0.373046875, "learning_rate": 0.00042763159314478805, "loss": 0.1994, "step": 180030 }, { "epoch": 7.46, "grad_norm": 0.470703125, "learning_rate": 0.00042762396153320914, "loss": 0.1786, "step": 180040 }, { "epoch": 7.46, "grad_norm": 0.44921875, "learning_rate": 0.00042761632958736106, "loss": 0.2061, "step": 180050 }, { "epoch": 7.46, "grad_norm": 1.1015625, "learning_rate": 0.0004276086973072583, "loss": 0.1953, "step": 180060 }, { "epoch": 7.46, "grad_norm": 0.84765625, "learning_rate": 0.00042760106469291514, "loss": 0.1854, "step": 180070 }, { "epoch": 7.46, "grad_norm": 0.59765625, "learning_rate": 0.00042759343174434593, "loss": 0.1808, "step": 180080 }, { "epoch": 7.46, "grad_norm": 1.0, "learning_rate": 0.0004275857984615651, "loss": 0.2264, "step": 180090 }, { "epoch": 7.46, "grad_norm": 0.640625, "learning_rate": 0.00042757816484458695, "loss": 0.1904, "step": 180100 }, { "epoch": 7.46, "grad_norm": 1.8203125, "learning_rate": 0.0004275705308934259, "loss": 0.2077, "step": 180110 }, { "epoch": 7.46, "grad_norm": 0.7265625, "learning_rate": 0.00042756289660809624, "loss": 0.2125, "step": 180120 }, { "epoch": 7.46, "grad_norm": 0.765625, "learning_rate": 0.00042755526198861237, "loss": 0.1918, "step": 180130 }, { "epoch": 7.46, "grad_norm": 0.53515625, "learning_rate": 0.00042754762703498873, "loss": 0.2383, "step": 180140 }, { "epoch": 7.46, "grad_norm": 0.65234375, "learning_rate": 0.0004275399917472396, "loss": 0.1914, "step": 180150 }, { "epoch": 7.46, "grad_norm": 1.234375, "learning_rate": 0.0004275323561253794, "loss": 0.2018, "step": 180160 }, { "epoch": 7.46, "grad_norm": 0.56640625, "learning_rate": 0.00042752472016942245, "loss": 0.1992, "step": 180170 }, { "epoch": 7.46, "grad_norm": 0.7265625, "learning_rate": 0.00042751708387938313, "loss": 0.2595, "step": 180180 }, { "epoch": 7.46, "grad_norm": 0.78515625, "learning_rate": 0.00042750944725527585, "loss": 0.1495, "step": 180190 }, { "epoch": 7.46, "grad_norm": 0.65625, "learning_rate": 0.000427501810297115, "loss": 0.2079, "step": 180200 }, { "epoch": 7.46, "grad_norm": 0.51171875, "learning_rate": 0.00042749417300491486, "loss": 0.1135, "step": 180210 }, { "epoch": 7.46, "grad_norm": 0.306640625, "learning_rate": 0.0004274865353786899, "loss": 0.1992, "step": 180220 }, { "epoch": 7.47, "grad_norm": 0.462890625, "learning_rate": 0.0004274788974184544, "loss": 0.2253, "step": 180230 }, { "epoch": 7.47, "grad_norm": 2.09375, "learning_rate": 0.0004274712591242228, "loss": 0.1864, "step": 180240 }, { "epoch": 7.47, "grad_norm": 0.35546875, "learning_rate": 0.00042746362049600944, "loss": 0.1677, "step": 180250 }, { "epoch": 7.47, "grad_norm": 1.3515625, "learning_rate": 0.0004274559815338287, "loss": 0.2097, "step": 180260 }, { "epoch": 7.47, "grad_norm": 0.7421875, "learning_rate": 0.00042744834223769503, "loss": 0.2324, "step": 180270 }, { "epoch": 7.47, "grad_norm": 1.2734375, "learning_rate": 0.0004274407026076227, "loss": 0.2471, "step": 180280 }, { "epoch": 7.47, "grad_norm": 1.0234375, "learning_rate": 0.00042743306264362614, "loss": 0.2164, "step": 180290 }, { "epoch": 7.47, "grad_norm": 0.86328125, "learning_rate": 0.0004274254223457197, "loss": 0.2082, "step": 180300 }, { "epoch": 7.47, "grad_norm": 0.87890625, "learning_rate": 0.00042741778171391775, "loss": 0.2405, "step": 180310 }, { "epoch": 7.47, "grad_norm": 0.23828125, "learning_rate": 0.0004274101407482348, "loss": 0.2347, "step": 180320 }, { "epoch": 7.47, "grad_norm": 0.0, "learning_rate": 0.00042740249944868506, "loss": 0.1925, "step": 180330 }, { "epoch": 7.47, "grad_norm": 2.3125, "learning_rate": 0.000427394857815283, "loss": 0.1835, "step": 180340 }, { "epoch": 7.47, "grad_norm": 0.71875, "learning_rate": 0.00042738721584804283, "loss": 0.2195, "step": 180350 }, { "epoch": 7.47, "grad_norm": 0.65234375, "learning_rate": 0.0004273795735469792, "loss": 0.1385, "step": 180360 }, { "epoch": 7.47, "grad_norm": 0.86328125, "learning_rate": 0.00042737193091210636, "loss": 0.2145, "step": 180370 }, { "epoch": 7.47, "grad_norm": 0.6875, "learning_rate": 0.00042736428794343874, "loss": 0.2323, "step": 180380 }, { "epoch": 7.47, "grad_norm": 0.8515625, "learning_rate": 0.00042735664464099065, "loss": 0.2156, "step": 180390 }, { "epoch": 7.47, "grad_norm": 0.416015625, "learning_rate": 0.0004273490010047765, "loss": 0.1908, "step": 180400 }, { "epoch": 7.47, "grad_norm": 0.90625, "learning_rate": 0.0004273413570348107, "loss": 0.1718, "step": 180410 }, { "epoch": 7.47, "grad_norm": 0.58203125, "learning_rate": 0.00042733371273110754, "loss": 0.2006, "step": 180420 }, { "epoch": 7.47, "grad_norm": 1.0859375, "learning_rate": 0.0004273260680936816, "loss": 0.2659, "step": 180430 }, { "epoch": 7.47, "grad_norm": 1.046875, "learning_rate": 0.0004273184231225471, "loss": 0.1893, "step": 180440 }, { "epoch": 7.47, "grad_norm": 0.69921875, "learning_rate": 0.0004273107778177184, "loss": 0.2313, "step": 180450 }, { "epoch": 7.47, "grad_norm": 0.79296875, "learning_rate": 0.00042730313217921004, "loss": 0.1889, "step": 180460 }, { "epoch": 7.48, "grad_norm": 1.0078125, "learning_rate": 0.00042729548620703634, "loss": 0.2201, "step": 180470 }, { "epoch": 7.48, "grad_norm": 1.1953125, "learning_rate": 0.00042728783990121167, "loss": 0.1999, "step": 180480 }, { "epoch": 7.48, "grad_norm": 0.46875, "learning_rate": 0.0004272801932617504, "loss": 0.2361, "step": 180490 }, { "epoch": 7.48, "grad_norm": 1.453125, "learning_rate": 0.00042727254628866694, "loss": 0.2144, "step": 180500 }, { "epoch": 7.48, "grad_norm": 0.9609375, "learning_rate": 0.0004272648989819758, "loss": 0.2566, "step": 180510 }, { "epoch": 7.48, "grad_norm": 0.359375, "learning_rate": 0.0004272572513416911, "loss": 0.1873, "step": 180520 }, { "epoch": 7.48, "grad_norm": 0.3359375, "learning_rate": 0.0004272496033678276, "loss": 0.1687, "step": 180530 }, { "epoch": 7.48, "grad_norm": 0.263671875, "learning_rate": 0.00042724195506039933, "loss": 0.176, "step": 180540 }, { "epoch": 7.48, "grad_norm": 0.6171875, "learning_rate": 0.0004272343064194209, "loss": 0.2051, "step": 180550 }, { "epoch": 7.48, "grad_norm": 0.5546875, "learning_rate": 0.0004272266574449066, "loss": 0.1648, "step": 180560 }, { "epoch": 7.48, "grad_norm": 0.578125, "learning_rate": 0.00042721900813687085, "loss": 0.2354, "step": 180570 }, { "epoch": 7.48, "grad_norm": 2.765625, "learning_rate": 0.00042721135849532824, "loss": 0.1989, "step": 180580 }, { "epoch": 7.48, "grad_norm": 0.8359375, "learning_rate": 0.00042720370852029275, "loss": 0.1637, "step": 180590 }, { "epoch": 7.48, "grad_norm": 0.796875, "learning_rate": 0.0004271960582117792, "loss": 0.1766, "step": 180600 }, { "epoch": 7.48, "grad_norm": 0.703125, "learning_rate": 0.0004271884075698017, "loss": 0.2216, "step": 180610 }, { "epoch": 7.48, "grad_norm": 0.94921875, "learning_rate": 0.0004271807565943748, "loss": 0.1778, "step": 180620 }, { "epoch": 7.48, "grad_norm": 0.220703125, "learning_rate": 0.0004271731052855128, "loss": 0.2182, "step": 180630 }, { "epoch": 7.48, "grad_norm": 0.3984375, "learning_rate": 0.0004271654536432302, "loss": 0.1751, "step": 180640 }, { "epoch": 7.48, "grad_norm": 0.439453125, "learning_rate": 0.0004271578016675414, "loss": 0.1874, "step": 180650 }, { "epoch": 7.48, "grad_norm": 0.79296875, "learning_rate": 0.00042715014935846066, "loss": 0.2081, "step": 180660 }, { "epoch": 7.48, "grad_norm": 0.447265625, "learning_rate": 0.00042714249671600246, "loss": 0.2401, "step": 180670 }, { "epoch": 7.48, "grad_norm": 0.796875, "learning_rate": 0.0004271348437401813, "loss": 0.2506, "step": 180680 }, { "epoch": 7.48, "grad_norm": 0.5703125, "learning_rate": 0.0004271271904310114, "loss": 0.2202, "step": 180690 }, { "epoch": 7.48, "grad_norm": 0.5, "learning_rate": 0.0004271195367885073, "loss": 0.2359, "step": 180700 }, { "epoch": 7.48, "grad_norm": 1.015625, "learning_rate": 0.0004271118828126833, "loss": 0.2286, "step": 180710 }, { "epoch": 7.49, "grad_norm": 0.62109375, "learning_rate": 0.00042710422850355395, "loss": 0.1559, "step": 180720 }, { "epoch": 7.49, "grad_norm": 1.921875, "learning_rate": 0.00042709657386113354, "loss": 0.1744, "step": 180730 }, { "epoch": 7.49, "grad_norm": 0.59375, "learning_rate": 0.0004270889188854365, "loss": 0.2212, "step": 180740 }, { "epoch": 7.49, "grad_norm": 1.0703125, "learning_rate": 0.00042708126357647727, "loss": 0.2251, "step": 180750 }, { "epoch": 7.49, "grad_norm": 0.84765625, "learning_rate": 0.0004270736079342702, "loss": 0.2239, "step": 180760 }, { "epoch": 7.49, "grad_norm": 1.0859375, "learning_rate": 0.0004270659519588297, "loss": 0.1874, "step": 180770 }, { "epoch": 7.49, "grad_norm": 0.6796875, "learning_rate": 0.00042705829565017016, "loss": 0.2028, "step": 180780 }, { "epoch": 7.49, "grad_norm": 0.921875, "learning_rate": 0.0004270506390083061, "loss": 0.1876, "step": 180790 }, { "epoch": 7.49, "grad_norm": 0.65234375, "learning_rate": 0.0004270429820332518, "loss": 0.2404, "step": 180800 }, { "epoch": 7.49, "grad_norm": 0.51953125, "learning_rate": 0.00042703532472502175, "loss": 0.2067, "step": 180810 }, { "epoch": 7.49, "grad_norm": 0.74609375, "learning_rate": 0.0004270276670836304, "loss": 0.207, "step": 180820 }, { "epoch": 7.49, "grad_norm": 0.384765625, "learning_rate": 0.000427020009109092, "loss": 0.1983, "step": 180830 }, { "epoch": 7.49, "grad_norm": 1.125, "learning_rate": 0.0004270123508014211, "loss": 0.1807, "step": 180840 }, { "epoch": 7.49, "grad_norm": 0.73046875, "learning_rate": 0.00042700469216063207, "loss": 0.199, "step": 180850 }, { "epoch": 7.49, "grad_norm": 0.78125, "learning_rate": 0.00042699703318673936, "loss": 0.1777, "step": 180860 }, { "epoch": 7.49, "grad_norm": 0.97265625, "learning_rate": 0.00042698937387975735, "loss": 0.2405, "step": 180870 }, { "epoch": 7.49, "grad_norm": 1.0234375, "learning_rate": 0.00042698171423970037, "loss": 0.1932, "step": 180880 }, { "epoch": 7.49, "grad_norm": 1.2734375, "learning_rate": 0.000426974054266583, "loss": 0.2054, "step": 180890 }, { "epoch": 7.49, "grad_norm": 0.6875, "learning_rate": 0.0004269663939604196, "loss": 0.261, "step": 180900 }, { "epoch": 7.49, "grad_norm": 0.7421875, "learning_rate": 0.00042695873332122445, "loss": 0.2115, "step": 180910 }, { "epoch": 7.49, "grad_norm": 0.220703125, "learning_rate": 0.0004269510723490121, "loss": 0.236, "step": 180920 }, { "epoch": 7.49, "grad_norm": 1.1171875, "learning_rate": 0.000426943411043797, "loss": 0.2072, "step": 180930 }, { "epoch": 7.49, "grad_norm": 0.9921875, "learning_rate": 0.00042693574940559344, "loss": 0.1948, "step": 180940 }, { "epoch": 7.49, "grad_norm": 0.28515625, "learning_rate": 0.0004269280874344159, "loss": 0.1724, "step": 180950 }, { "epoch": 7.5, "grad_norm": 0.70703125, "learning_rate": 0.00042692042513027887, "loss": 0.2164, "step": 180960 }, { "epoch": 7.5, "grad_norm": 0.8671875, "learning_rate": 0.00042691276249319667, "loss": 0.2007, "step": 180970 }, { "epoch": 7.5, "grad_norm": 0.609375, "learning_rate": 0.0004269050995231838, "loss": 0.1676, "step": 180980 }, { "epoch": 7.5, "grad_norm": 0.6171875, "learning_rate": 0.00042689743622025455, "loss": 0.1824, "step": 180990 }, { "epoch": 7.5, "grad_norm": 0.66015625, "learning_rate": 0.00042688977258442353, "loss": 0.1609, "step": 181000 }, { "epoch": 7.5, "grad_norm": 0.51953125, "learning_rate": 0.000426882108615705, "loss": 0.1669, "step": 181010 }, { "epoch": 7.5, "grad_norm": 0.267578125, "learning_rate": 0.00042687444431411343, "loss": 0.2152, "step": 181020 }, { "epoch": 7.5, "grad_norm": 0.796875, "learning_rate": 0.0004268667796796633, "loss": 0.1871, "step": 181030 }, { "epoch": 7.5, "grad_norm": 0.5859375, "learning_rate": 0.0004268591147123689, "loss": 0.2326, "step": 181040 }, { "epoch": 7.5, "grad_norm": 0.515625, "learning_rate": 0.00042685144941224484, "loss": 0.2371, "step": 181050 }, { "epoch": 7.5, "grad_norm": 0.0, "learning_rate": 0.00042684378377930545, "loss": 0.164, "step": 181060 }, { "epoch": 7.5, "grad_norm": 0.8359375, "learning_rate": 0.0004268361178135651, "loss": 0.2249, "step": 181070 }, { "epoch": 7.5, "grad_norm": 0.734375, "learning_rate": 0.00042682845151503833, "loss": 0.1919, "step": 181080 }, { "epoch": 7.5, "grad_norm": 0.6484375, "learning_rate": 0.0004268207848837395, "loss": 0.2277, "step": 181090 }, { "epoch": 7.5, "grad_norm": 0.61328125, "learning_rate": 0.00042681311791968303, "loss": 0.1811, "step": 181100 }, { "epoch": 7.5, "grad_norm": 0.357421875, "learning_rate": 0.0004268054506228834, "loss": 0.1807, "step": 181110 }, { "epoch": 7.5, "grad_norm": 0.671875, "learning_rate": 0.0004267977829933549, "loss": 0.179, "step": 181120 }, { "epoch": 7.5, "grad_norm": 0.76953125, "learning_rate": 0.0004267901150311122, "loss": 0.2039, "step": 181130 }, { "epoch": 7.5, "grad_norm": 0.72265625, "learning_rate": 0.0004267824467361695, "loss": 0.2201, "step": 181140 }, { "epoch": 7.5, "grad_norm": 0.703125, "learning_rate": 0.00042677477810854135, "loss": 0.2389, "step": 181150 }, { "epoch": 7.5, "grad_norm": 0.486328125, "learning_rate": 0.00042676710914824224, "loss": 0.2052, "step": 181160 }, { "epoch": 7.5, "grad_norm": 0.388671875, "learning_rate": 0.00042675943985528644, "loss": 0.2109, "step": 181170 }, { "epoch": 7.5, "grad_norm": 0.396484375, "learning_rate": 0.00042675177022968847, "loss": 0.2052, "step": 181180 }, { "epoch": 7.5, "grad_norm": 0.443359375, "learning_rate": 0.0004267441002714627, "loss": 0.2077, "step": 181190 }, { "epoch": 7.51, "grad_norm": 0.5390625, "learning_rate": 0.0004267364299806237, "loss": 0.1804, "step": 181200 }, { "epoch": 7.51, "grad_norm": 0.88671875, "learning_rate": 0.0004267287593571858, "loss": 0.2103, "step": 181210 }, { "epoch": 7.51, "grad_norm": 0.33984375, "learning_rate": 0.0004267210884011635, "loss": 0.1958, "step": 181220 }, { "epoch": 7.51, "grad_norm": 0.3828125, "learning_rate": 0.0004267134171125711, "loss": 0.1368, "step": 181230 }, { "epoch": 7.51, "grad_norm": 0.625, "learning_rate": 0.0004267057454914232, "loss": 0.2053, "step": 181240 }, { "epoch": 7.51, "grad_norm": 0.7421875, "learning_rate": 0.00042669807353773416, "loss": 0.2298, "step": 181250 }, { "epoch": 7.51, "grad_norm": 1.15625, "learning_rate": 0.00042669040125151847, "loss": 0.1757, "step": 181260 }, { "epoch": 7.51, "grad_norm": 0.578125, "learning_rate": 0.00042668272863279045, "loss": 0.204, "step": 181270 }, { "epoch": 7.51, "grad_norm": 1.3828125, "learning_rate": 0.0004266750556815646, "loss": 0.2085, "step": 181280 }, { "epoch": 7.51, "grad_norm": 0.7421875, "learning_rate": 0.00042666738239785545, "loss": 0.264, "step": 181290 }, { "epoch": 7.51, "grad_norm": 1.234375, "learning_rate": 0.00042665970878167736, "loss": 0.2049, "step": 181300 }, { "epoch": 7.51, "grad_norm": 1.0546875, "learning_rate": 0.0004266520348330447, "loss": 0.1409, "step": 181310 }, { "epoch": 7.51, "grad_norm": 1.4765625, "learning_rate": 0.00042664436055197207, "loss": 0.2106, "step": 181320 }, { "epoch": 7.51, "grad_norm": 0.71484375, "learning_rate": 0.0004266366859384738, "loss": 0.1686, "step": 181330 }, { "epoch": 7.51, "grad_norm": 0.7109375, "learning_rate": 0.0004266290109925644, "loss": 0.2467, "step": 181340 }, { "epoch": 7.51, "grad_norm": 0.73828125, "learning_rate": 0.0004266213357142582, "loss": 0.2639, "step": 181350 }, { "epoch": 7.51, "grad_norm": 1.296875, "learning_rate": 0.00042661366010356974, "loss": 0.1542, "step": 181360 }, { "epoch": 7.51, "grad_norm": 0.458984375, "learning_rate": 0.0004266059841605135, "loss": 0.1211, "step": 181370 }, { "epoch": 7.51, "grad_norm": 0.4375, "learning_rate": 0.0004265983078851038, "loss": 0.2651, "step": 181380 }, { "epoch": 7.51, "grad_norm": 1.0234375, "learning_rate": 0.0004265906312773552, "loss": 0.2435, "step": 181390 }, { "epoch": 7.51, "grad_norm": 0.5703125, "learning_rate": 0.0004265829543372821, "loss": 0.196, "step": 181400 }, { "epoch": 7.51, "grad_norm": 0.55078125, "learning_rate": 0.00042657527706489897, "loss": 0.2009, "step": 181410 }, { "epoch": 7.51, "grad_norm": 0.408203125, "learning_rate": 0.00042656759946022016, "loss": 0.1608, "step": 181420 }, { "epoch": 7.51, "grad_norm": 0.9609375, "learning_rate": 0.0004265599215232603, "loss": 0.1847, "step": 181430 }, { "epoch": 7.52, "grad_norm": 1.015625, "learning_rate": 0.00042655224325403363, "loss": 0.2012, "step": 181440 }, { "epoch": 7.52, "grad_norm": 0.8359375, "learning_rate": 0.00042654456465255475, "loss": 0.1725, "step": 181450 }, { "epoch": 7.52, "grad_norm": 0.345703125, "learning_rate": 0.0004265368857188381, "loss": 0.1996, "step": 181460 }, { "epoch": 7.52, "grad_norm": 0.65234375, "learning_rate": 0.000426529206452898, "loss": 0.1832, "step": 181470 }, { "epoch": 7.52, "grad_norm": 0.7421875, "learning_rate": 0.0004265215268547491, "loss": 0.1908, "step": 181480 }, { "epoch": 7.52, "grad_norm": 0.80859375, "learning_rate": 0.00042651384692440566, "loss": 0.1847, "step": 181490 }, { "epoch": 7.52, "grad_norm": 0.478515625, "learning_rate": 0.00042650616666188226, "loss": 0.1921, "step": 181500 }, { "epoch": 7.52, "grad_norm": 0.484375, "learning_rate": 0.0004264984860671933, "loss": 0.2274, "step": 181510 }, { "epoch": 7.52, "grad_norm": 0.94140625, "learning_rate": 0.00042649080514035333, "loss": 0.198, "step": 181520 }, { "epoch": 7.52, "grad_norm": 0.5078125, "learning_rate": 0.0004264831238813767, "loss": 0.1474, "step": 181530 }, { "epoch": 7.52, "grad_norm": 0.9453125, "learning_rate": 0.0004264754422902778, "loss": 0.2471, "step": 181540 }, { "epoch": 7.52, "grad_norm": 0.75, "learning_rate": 0.00042646776036707126, "loss": 0.2253, "step": 181550 }, { "epoch": 7.52, "grad_norm": 0.91796875, "learning_rate": 0.0004264600781117714, "loss": 0.1716, "step": 181560 }, { "epoch": 7.52, "grad_norm": 3.34375, "learning_rate": 0.00042645239552439277, "loss": 0.1846, "step": 181570 }, { "epoch": 7.52, "grad_norm": 0.9921875, "learning_rate": 0.00042644471260494976, "loss": 0.1656, "step": 181580 }, { "epoch": 7.52, "grad_norm": 0.6171875, "learning_rate": 0.00042643702935345684, "loss": 0.2535, "step": 181590 }, { "epoch": 7.52, "grad_norm": 0.28515625, "learning_rate": 0.00042642934576992846, "loss": 0.2122, "step": 181600 }, { "epoch": 7.52, "grad_norm": 0.61328125, "learning_rate": 0.0004264216618543792, "loss": 0.1842, "step": 181610 }, { "epoch": 7.52, "grad_norm": 0.7734375, "learning_rate": 0.00042641397760682335, "loss": 0.163, "step": 181620 }, { "epoch": 7.52, "grad_norm": 1.234375, "learning_rate": 0.0004264062930272755, "loss": 0.1904, "step": 181630 }, { "epoch": 7.52, "grad_norm": 1.71875, "learning_rate": 0.00042639860811575006, "loss": 0.2249, "step": 181640 }, { "epoch": 7.52, "grad_norm": 0.82421875, "learning_rate": 0.0004263909228722614, "loss": 0.2116, "step": 181650 }, { "epoch": 7.52, "grad_norm": 0.703125, "learning_rate": 0.00042638323729682413, "loss": 0.1961, "step": 181660 }, { "epoch": 7.52, "grad_norm": 0.6796875, "learning_rate": 0.00042637555138945265, "loss": 0.2157, "step": 181670 }, { "epoch": 7.53, "grad_norm": 0.369140625, "learning_rate": 0.00042636786515016145, "loss": 0.1902, "step": 181680 }, { "epoch": 7.53, "grad_norm": 0.64453125, "learning_rate": 0.0004263601785789649, "loss": 0.1788, "step": 181690 }, { "epoch": 7.53, "grad_norm": 1.71875, "learning_rate": 0.00042635249167587765, "loss": 0.1812, "step": 181700 }, { "epoch": 7.53, "grad_norm": 0.94140625, "learning_rate": 0.000426344804440914, "loss": 0.2491, "step": 181710 }, { "epoch": 7.53, "grad_norm": 0.478515625, "learning_rate": 0.00042633711687408847, "loss": 0.2069, "step": 181720 }, { "epoch": 7.53, "grad_norm": 0.490234375, "learning_rate": 0.0004263294289754155, "loss": 0.2334, "step": 181730 }, { "epoch": 7.53, "grad_norm": 0.458984375, "learning_rate": 0.00042632174074490965, "loss": 0.2259, "step": 181740 }, { "epoch": 7.53, "grad_norm": 0.96875, "learning_rate": 0.00042631405218258536, "loss": 0.2461, "step": 181750 }, { "epoch": 7.53, "grad_norm": 0.71484375, "learning_rate": 0.0004263063632884569, "loss": 0.2209, "step": 181760 }, { "epoch": 7.53, "grad_norm": 1.875, "learning_rate": 0.00042629867406253905, "loss": 0.1968, "step": 181770 }, { "epoch": 7.53, "grad_norm": 0.416015625, "learning_rate": 0.00042629098450484604, "loss": 0.1735, "step": 181780 }, { "epoch": 7.53, "grad_norm": 0.7890625, "learning_rate": 0.0004262832946153925, "loss": 0.222, "step": 181790 }, { "epoch": 7.53, "grad_norm": 0.6796875, "learning_rate": 0.00042627560439419284, "loss": 0.2182, "step": 181800 }, { "epoch": 7.53, "grad_norm": 0.8046875, "learning_rate": 0.0004262679138412615, "loss": 0.1302, "step": 181810 }, { "epoch": 7.53, "grad_norm": 1.109375, "learning_rate": 0.00042626022295661294, "loss": 0.205, "step": 181820 }, { "epoch": 7.53, "grad_norm": 0.98828125, "learning_rate": 0.0004262525317402617, "loss": 0.1869, "step": 181830 }, { "epoch": 7.53, "grad_norm": 0.6328125, "learning_rate": 0.0004262448401922223, "loss": 0.1823, "step": 181840 }, { "epoch": 7.53, "grad_norm": 1.21875, "learning_rate": 0.0004262371483125091, "loss": 0.2143, "step": 181850 }, { "epoch": 7.53, "grad_norm": 0.73046875, "learning_rate": 0.0004262294561011366, "loss": 0.2012, "step": 181860 }, { "epoch": 7.53, "grad_norm": 0.76171875, "learning_rate": 0.0004262217635581193, "loss": 0.2015, "step": 181870 }, { "epoch": 7.53, "grad_norm": 1.5234375, "learning_rate": 0.00042621407068347167, "loss": 0.2221, "step": 181880 }, { "epoch": 7.53, "grad_norm": 0.55859375, "learning_rate": 0.0004262063774772082, "loss": 0.173, "step": 181890 }, { "epoch": 7.53, "grad_norm": 0.58984375, "learning_rate": 0.00042619868393934334, "loss": 0.2308, "step": 181900 }, { "epoch": 7.53, "grad_norm": 0.7109375, "learning_rate": 0.0004261909900698916, "loss": 0.1793, "step": 181910 }, { "epoch": 7.54, "grad_norm": 0.99609375, "learning_rate": 0.00042618329586886745, "loss": 0.184, "step": 181920 }, { "epoch": 7.54, "grad_norm": 0.8203125, "learning_rate": 0.00042617560133628534, "loss": 0.2091, "step": 181930 }, { "epoch": 7.54, "grad_norm": 0.6171875, "learning_rate": 0.0004261679064721598, "loss": 0.2157, "step": 181940 }, { "epoch": 7.54, "grad_norm": 0.77734375, "learning_rate": 0.00042616021127650527, "loss": 0.2157, "step": 181950 }, { "epoch": 7.54, "grad_norm": 0.93359375, "learning_rate": 0.0004261525157493362, "loss": 0.1974, "step": 181960 }, { "epoch": 7.54, "grad_norm": 0.59765625, "learning_rate": 0.00042614481989066723, "loss": 0.2345, "step": 181970 }, { "epoch": 7.54, "grad_norm": 0.76171875, "learning_rate": 0.00042613712370051266, "loss": 0.1834, "step": 181980 }, { "epoch": 7.54, "grad_norm": 0.828125, "learning_rate": 0.000426129427178887, "loss": 0.2128, "step": 181990 }, { "epoch": 7.54, "grad_norm": 0.640625, "learning_rate": 0.0004261217303258049, "loss": 0.1737, "step": 182000 }, { "epoch": 7.54, "grad_norm": 0.48046875, "learning_rate": 0.00042611403314128063, "loss": 0.2156, "step": 182010 }, { "epoch": 7.54, "grad_norm": 0.9375, "learning_rate": 0.0004261063356253288, "loss": 0.2091, "step": 182020 }, { "epoch": 7.54, "grad_norm": 0.8984375, "learning_rate": 0.0004260986377779639, "loss": 0.1886, "step": 182030 }, { "epoch": 7.54, "grad_norm": 1.078125, "learning_rate": 0.00042609093959920027, "loss": 0.184, "step": 182040 }, { "epoch": 7.54, "grad_norm": 1.1796875, "learning_rate": 0.0004260832410890526, "loss": 0.213, "step": 182050 }, { "epoch": 7.54, "grad_norm": 0.7421875, "learning_rate": 0.0004260755422475353, "loss": 0.1719, "step": 182060 }, { "epoch": 7.54, "grad_norm": 0.76953125, "learning_rate": 0.00042606784307466273, "loss": 0.1862, "step": 182070 }, { "epoch": 7.54, "grad_norm": 0.96484375, "learning_rate": 0.0004260601435704496, "loss": 0.1704, "step": 182080 }, { "epoch": 7.54, "grad_norm": 0.72265625, "learning_rate": 0.00042605244373491026, "loss": 0.2313, "step": 182090 }, { "epoch": 7.54, "grad_norm": 0.609375, "learning_rate": 0.0004260447435680592, "loss": 0.1728, "step": 182100 }, { "epoch": 7.54, "grad_norm": 0.91015625, "learning_rate": 0.00042603704306991096, "loss": 0.1731, "step": 182110 }, { "epoch": 7.54, "grad_norm": 1.015625, "learning_rate": 0.00042602934224048007, "loss": 0.1642, "step": 182120 }, { "epoch": 7.54, "grad_norm": 1.078125, "learning_rate": 0.0004260216410797809, "loss": 0.1997, "step": 182130 }, { "epoch": 7.54, "grad_norm": 0.33203125, "learning_rate": 0.0004260139395878281, "loss": 0.2151, "step": 182140 }, { "epoch": 7.54, "grad_norm": 0.671875, "learning_rate": 0.000426006237764636, "loss": 0.2193, "step": 182150 }, { "epoch": 7.55, "grad_norm": 0.8515625, "learning_rate": 0.0004259985356102192, "loss": 0.2009, "step": 182160 }, { "epoch": 7.55, "grad_norm": 1.6484375, "learning_rate": 0.0004259908331245921, "loss": 0.2441, "step": 182170 }, { "epoch": 7.55, "grad_norm": 1.2734375, "learning_rate": 0.0004259831303077693, "loss": 0.193, "step": 182180 }, { "epoch": 7.55, "grad_norm": 1.4140625, "learning_rate": 0.00042597542715976523, "loss": 0.2215, "step": 182190 }, { "epoch": 7.55, "grad_norm": 0.5546875, "learning_rate": 0.0004259677236805944, "loss": 0.243, "step": 182200 }, { "epoch": 7.55, "grad_norm": 0.5234375, "learning_rate": 0.0004259600198702714, "loss": 0.1987, "step": 182210 }, { "epoch": 7.55, "grad_norm": 1.5, "learning_rate": 0.0004259523157288106, "loss": 0.1872, "step": 182220 }, { "epoch": 7.55, "grad_norm": 0.6015625, "learning_rate": 0.0004259446112562265, "loss": 0.2255, "step": 182230 }, { "epoch": 7.55, "grad_norm": 1.1796875, "learning_rate": 0.0004259369064525337, "loss": 0.1959, "step": 182240 }, { "epoch": 7.55, "grad_norm": 0.83984375, "learning_rate": 0.0004259292013177466, "loss": 0.1937, "step": 182250 }, { "epoch": 7.55, "grad_norm": 1.3125, "learning_rate": 0.0004259214958518798, "loss": 0.2089, "step": 182260 }, { "epoch": 7.55, "grad_norm": 0.5625, "learning_rate": 0.00042591379005494766, "loss": 0.2137, "step": 182270 }, { "epoch": 7.55, "grad_norm": 0.83203125, "learning_rate": 0.00042590608392696486, "loss": 0.173, "step": 182280 }, { "epoch": 7.55, "grad_norm": 0.6171875, "learning_rate": 0.00042589837746794576, "loss": 0.2001, "step": 182290 }, { "epoch": 7.55, "grad_norm": 0.5546875, "learning_rate": 0.00042589067067790487, "loss": 0.2439, "step": 182300 }, { "epoch": 7.55, "grad_norm": 0.291015625, "learning_rate": 0.0004258829635568568, "loss": 0.257, "step": 182310 }, { "epoch": 7.55, "grad_norm": 0.90625, "learning_rate": 0.00042587525610481594, "loss": 0.2472, "step": 182320 }, { "epoch": 7.55, "grad_norm": 0.765625, "learning_rate": 0.00042586754832179684, "loss": 0.1983, "step": 182330 }, { "epoch": 7.55, "grad_norm": 0.490234375, "learning_rate": 0.000425859840207814, "loss": 0.1853, "step": 182340 }, { "epoch": 7.55, "grad_norm": 0.2138671875, "learning_rate": 0.000425852131762882, "loss": 0.2145, "step": 182350 }, { "epoch": 7.55, "grad_norm": 0.60546875, "learning_rate": 0.0004258444229870152, "loss": 0.1769, "step": 182360 }, { "epoch": 7.55, "grad_norm": 0.64453125, "learning_rate": 0.0004258367138802283, "loss": 0.1734, "step": 182370 }, { "epoch": 7.55, "grad_norm": 0.2431640625, "learning_rate": 0.00042582900444253555, "loss": 0.2284, "step": 182380 }, { "epoch": 7.55, "grad_norm": 0.38671875, "learning_rate": 0.00042582129467395164, "loss": 0.2242, "step": 182390 }, { "epoch": 7.55, "grad_norm": 1.59375, "learning_rate": 0.0004258135845744911, "loss": 0.1941, "step": 182400 }, { "epoch": 7.56, "grad_norm": 0.71484375, "learning_rate": 0.0004258058741441683, "loss": 0.2355, "step": 182410 }, { "epoch": 7.56, "grad_norm": 0.431640625, "learning_rate": 0.0004257981633829979, "loss": 0.1344, "step": 182420 }, { "epoch": 7.56, "grad_norm": 0.474609375, "learning_rate": 0.0004257904522909943, "loss": 0.197, "step": 182430 }, { "epoch": 7.56, "grad_norm": 1.3046875, "learning_rate": 0.000425782740868172, "loss": 0.1913, "step": 182440 }, { "epoch": 7.56, "grad_norm": 1.5703125, "learning_rate": 0.00042577502911454566, "loss": 0.1992, "step": 182450 }, { "epoch": 7.56, "grad_norm": 0.50390625, "learning_rate": 0.0004257673170301297, "loss": 0.1982, "step": 182460 }, { "epoch": 7.56, "grad_norm": 0.7109375, "learning_rate": 0.0004257596046149386, "loss": 0.1446, "step": 182470 }, { "epoch": 7.56, "grad_norm": 1.3203125, "learning_rate": 0.0004257518918689869, "loss": 0.1648, "step": 182480 }, { "epoch": 7.56, "grad_norm": 0.87890625, "learning_rate": 0.0004257441787922891, "loss": 0.193, "step": 182490 }, { "epoch": 7.56, "grad_norm": 0.80859375, "learning_rate": 0.0004257364653848598, "loss": 0.2503, "step": 182500 }, { "epoch": 7.56, "grad_norm": 0.87109375, "learning_rate": 0.0004257287516467134, "loss": 0.182, "step": 182510 }, { "epoch": 7.56, "grad_norm": 0.76171875, "learning_rate": 0.00042572103757786443, "loss": 0.2372, "step": 182520 }, { "epoch": 7.56, "grad_norm": 0.23046875, "learning_rate": 0.00042571332317832756, "loss": 0.2322, "step": 182530 }, { "epoch": 7.56, "grad_norm": 1.9765625, "learning_rate": 0.00042570560844811705, "loss": 0.2343, "step": 182540 }, { "epoch": 7.56, "grad_norm": 0.90234375, "learning_rate": 0.00042569789338724764, "loss": 0.2086, "step": 182550 }, { "epoch": 7.56, "grad_norm": 0.435546875, "learning_rate": 0.00042569017799573376, "loss": 0.1448, "step": 182560 }, { "epoch": 7.56, "grad_norm": 1.828125, "learning_rate": 0.00042568246227358994, "loss": 0.2161, "step": 182570 }, { "epoch": 7.56, "grad_norm": 0.41796875, "learning_rate": 0.0004256747462208307, "loss": 0.1745, "step": 182580 }, { "epoch": 7.56, "grad_norm": 1.8515625, "learning_rate": 0.0004256670298374705, "loss": 0.233, "step": 182590 }, { "epoch": 7.56, "grad_norm": 0.58984375, "learning_rate": 0.000425659313123524, "loss": 0.2217, "step": 182600 }, { "epoch": 7.56, "grad_norm": 0.96875, "learning_rate": 0.00042565159607900563, "loss": 0.1913, "step": 182610 }, { "epoch": 7.56, "grad_norm": 0.8515625, "learning_rate": 0.0004256438787039299, "loss": 0.199, "step": 182620 }, { "epoch": 7.56, "grad_norm": 1.109375, "learning_rate": 0.0004256361609983114, "loss": 0.1998, "step": 182630 }, { "epoch": 7.56, "grad_norm": 0.80078125, "learning_rate": 0.0004256284429621645, "loss": 0.2201, "step": 182640 }, { "epoch": 7.57, "grad_norm": 0.345703125, "learning_rate": 0.0004256207245955039, "loss": 0.2402, "step": 182650 }, { "epoch": 7.57, "grad_norm": 0.765625, "learning_rate": 0.0004256130058983442, "loss": 0.1696, "step": 182660 }, { "epoch": 7.57, "grad_norm": 0.330078125, "learning_rate": 0.0004256052868706996, "loss": 0.2006, "step": 182670 }, { "epoch": 7.57, "grad_norm": 0.3515625, "learning_rate": 0.00042559756751258483, "loss": 0.1947, "step": 182680 }, { "epoch": 7.57, "grad_norm": 0.419921875, "learning_rate": 0.0004255898478240145, "loss": 0.2172, "step": 182690 }, { "epoch": 7.57, "grad_norm": 0.62109375, "learning_rate": 0.00042558212780500294, "loss": 0.1842, "step": 182700 }, { "epoch": 7.57, "grad_norm": 0.93359375, "learning_rate": 0.00042557440745556485, "loss": 0.2106, "step": 182710 }, { "epoch": 7.57, "grad_norm": 0.6171875, "learning_rate": 0.00042556668677571464, "loss": 0.1823, "step": 182720 }, { "epoch": 7.57, "grad_norm": 1.2890625, "learning_rate": 0.00042555896576546693, "loss": 0.182, "step": 182730 }, { "epoch": 7.57, "grad_norm": 0.57421875, "learning_rate": 0.00042555124442483614, "loss": 0.1694, "step": 182740 }, { "epoch": 7.57, "grad_norm": 0.9375, "learning_rate": 0.0004255435227538369, "loss": 0.2124, "step": 182750 }, { "epoch": 7.57, "grad_norm": 1.5859375, "learning_rate": 0.0004255358007524838, "loss": 0.223, "step": 182760 }, { "epoch": 7.57, "grad_norm": 0.46484375, "learning_rate": 0.0004255280784207911, "loss": 0.17, "step": 182770 }, { "epoch": 7.57, "grad_norm": 1.0, "learning_rate": 0.00042552035575877366, "loss": 0.1673, "step": 182780 }, { "epoch": 7.57, "grad_norm": 0.6015625, "learning_rate": 0.0004255126327664458, "loss": 0.178, "step": 182790 }, { "epoch": 7.57, "grad_norm": 0.64453125, "learning_rate": 0.00042550490944382206, "loss": 0.1717, "step": 182800 }, { "epoch": 7.57, "grad_norm": 0.435546875, "learning_rate": 0.0004254971857909171, "loss": 0.1814, "step": 182810 }, { "epoch": 7.57, "grad_norm": 0.5, "learning_rate": 0.0004254894618077455, "loss": 0.2017, "step": 182820 }, { "epoch": 7.57, "grad_norm": 0.51171875, "learning_rate": 0.0004254817374943215, "loss": 0.1957, "step": 182830 }, { "epoch": 7.57, "grad_norm": 2.09375, "learning_rate": 0.0004254740128506599, "loss": 0.2228, "step": 182840 }, { "epoch": 7.57, "grad_norm": 0.48046875, "learning_rate": 0.00042546628787677515, "loss": 0.1796, "step": 182850 }, { "epoch": 7.57, "grad_norm": 1.25, "learning_rate": 0.0004254585625726818, "loss": 0.2443, "step": 182860 }, { "epoch": 7.57, "grad_norm": 1.140625, "learning_rate": 0.00042545083693839436, "loss": 0.2325, "step": 182870 }, { "epoch": 7.57, "grad_norm": 0.5703125, "learning_rate": 0.0004254431109739274, "loss": 0.212, "step": 182880 }, { "epoch": 7.58, "grad_norm": 0.72265625, "learning_rate": 0.00042543538467929547, "loss": 0.2457, "step": 182890 }, { "epoch": 7.58, "grad_norm": 1.421875, "learning_rate": 0.0004254276580545131, "loss": 0.1799, "step": 182900 }, { "epoch": 7.58, "grad_norm": 0.5703125, "learning_rate": 0.0004254199310995948, "loss": 0.202, "step": 182910 }, { "epoch": 7.58, "grad_norm": 0.0, "learning_rate": 0.00042541220381455514, "loss": 0.1707, "step": 182920 }, { "epoch": 7.58, "grad_norm": 0.62109375, "learning_rate": 0.0004254044761994087, "loss": 0.1908, "step": 182930 }, { "epoch": 7.58, "grad_norm": 0.341796875, "learning_rate": 0.0004253967482541699, "loss": 0.1894, "step": 182940 }, { "epoch": 7.58, "grad_norm": 0.69140625, "learning_rate": 0.0004253890199788534, "loss": 0.2058, "step": 182950 }, { "epoch": 7.58, "grad_norm": 0.72265625, "learning_rate": 0.0004253812913734737, "loss": 0.1851, "step": 182960 }, { "epoch": 7.58, "grad_norm": 0.8671875, "learning_rate": 0.00042537356243804535, "loss": 0.205, "step": 182970 }, { "epoch": 7.58, "grad_norm": 0.6328125, "learning_rate": 0.0004253658331725829, "loss": 0.144, "step": 182980 }, { "epoch": 7.58, "grad_norm": 0.58203125, "learning_rate": 0.00042535810357710086, "loss": 0.2198, "step": 182990 }, { "epoch": 7.58, "grad_norm": 0.92578125, "learning_rate": 0.00042535037365161384, "loss": 0.2091, "step": 183000 }, { "epoch": 7.58, "grad_norm": 0.72265625, "learning_rate": 0.00042534264339613633, "loss": 0.1775, "step": 183010 }, { "epoch": 7.58, "grad_norm": 0.76171875, "learning_rate": 0.0004253349128106829, "loss": 0.2334, "step": 183020 }, { "epoch": 7.58, "grad_norm": 0.8359375, "learning_rate": 0.0004253271818952681, "loss": 0.1891, "step": 183030 }, { "epoch": 7.58, "grad_norm": 1.4609375, "learning_rate": 0.0004253194506499065, "loss": 0.1936, "step": 183040 }, { "epoch": 7.58, "grad_norm": 0.8203125, "learning_rate": 0.0004253117190746126, "loss": 0.2947, "step": 183050 }, { "epoch": 7.58, "grad_norm": 0.62890625, "learning_rate": 0.000425303987169401, "loss": 0.1832, "step": 183060 }, { "epoch": 7.58, "grad_norm": 1.0625, "learning_rate": 0.0004252962549342863, "loss": 0.1754, "step": 183070 }, { "epoch": 7.58, "grad_norm": 0.85546875, "learning_rate": 0.0004252885223692828, "loss": 0.1993, "step": 183080 }, { "epoch": 7.58, "grad_norm": 0.478515625, "learning_rate": 0.0004252807894744053, "loss": 0.2186, "step": 183090 }, { "epoch": 7.58, "grad_norm": 0.5078125, "learning_rate": 0.0004252730562496684, "loss": 0.2016, "step": 183100 }, { "epoch": 7.58, "grad_norm": 1.09375, "learning_rate": 0.00042526532269508645, "loss": 0.2259, "step": 183110 }, { "epoch": 7.58, "grad_norm": 0.546875, "learning_rate": 0.000425257588810674, "loss": 0.1678, "step": 183120 }, { "epoch": 7.59, "grad_norm": 0.80859375, "learning_rate": 0.00042524985459644585, "loss": 0.2164, "step": 183130 }, { "epoch": 7.59, "grad_norm": 1.03125, "learning_rate": 0.00042524212005241624, "loss": 0.2158, "step": 183140 }, { "epoch": 7.59, "grad_norm": 0.65234375, "learning_rate": 0.00042523438517860004, "loss": 0.2297, "step": 183150 }, { "epoch": 7.59, "grad_norm": 0.890625, "learning_rate": 0.0004252266499750116, "loss": 0.2236, "step": 183160 }, { "epoch": 7.59, "grad_norm": 0.80859375, "learning_rate": 0.0004252189144416655, "loss": 0.235, "step": 183170 }, { "epoch": 7.59, "grad_norm": 0.953125, "learning_rate": 0.0004252111785785763, "loss": 0.2316, "step": 183180 }, { "epoch": 7.59, "grad_norm": 0.32421875, "learning_rate": 0.00042520344238575864, "loss": 0.2175, "step": 183190 }, { "epoch": 7.59, "grad_norm": 0.953125, "learning_rate": 0.000425195705863227, "loss": 0.2252, "step": 183200 }, { "epoch": 7.59, "grad_norm": 1.0859375, "learning_rate": 0.00042518796901099595, "loss": 0.21, "step": 183210 }, { "epoch": 7.59, "grad_norm": 0.94140625, "learning_rate": 0.00042518023182908007, "loss": 0.2451, "step": 183220 }, { "epoch": 7.59, "grad_norm": 0.275390625, "learning_rate": 0.0004251724943174939, "loss": 0.1905, "step": 183230 }, { "epoch": 7.59, "grad_norm": 0.796875, "learning_rate": 0.0004251647564762521, "loss": 0.2029, "step": 183240 }, { "epoch": 7.59, "grad_norm": 0.84765625, "learning_rate": 0.000425157018305369, "loss": 0.2325, "step": 183250 }, { "epoch": 7.59, "grad_norm": 0.77734375, "learning_rate": 0.0004251492798048594, "loss": 0.2198, "step": 183260 }, { "epoch": 7.59, "grad_norm": 0.5234375, "learning_rate": 0.0004251415409747378, "loss": 0.2147, "step": 183270 }, { "epoch": 7.59, "grad_norm": 0.62109375, "learning_rate": 0.0004251338018150186, "loss": 0.233, "step": 183280 }, { "epoch": 7.59, "grad_norm": 0.69921875, "learning_rate": 0.0004251260623257166, "loss": 0.2371, "step": 183290 }, { "epoch": 7.59, "grad_norm": 0.5703125, "learning_rate": 0.00042511832250684625, "loss": 0.2055, "step": 183300 }, { "epoch": 7.59, "grad_norm": 0.0, "learning_rate": 0.00042511058235842215, "loss": 0.1773, "step": 183310 }, { "epoch": 7.59, "grad_norm": 0.59765625, "learning_rate": 0.0004251028418804588, "loss": 0.2317, "step": 183320 }, { "epoch": 7.59, "grad_norm": 0.921875, "learning_rate": 0.0004250951010729708, "loss": 0.1374, "step": 183330 }, { "epoch": 7.59, "grad_norm": 0.95703125, "learning_rate": 0.00042508735993597273, "loss": 0.1607, "step": 183340 }, { "epoch": 7.59, "grad_norm": 0.59375, "learning_rate": 0.00042507961846947916, "loss": 0.1673, "step": 183350 }, { "epoch": 7.59, "grad_norm": 1.0859375, "learning_rate": 0.00042507187667350474, "loss": 0.2673, "step": 183360 }, { "epoch": 7.6, "grad_norm": 0.5078125, "learning_rate": 0.0004250641345480639, "loss": 0.1449, "step": 183370 }, { "epoch": 7.6, "grad_norm": 1.078125, "learning_rate": 0.0004250563920931712, "loss": 0.246, "step": 183380 }, { "epoch": 7.6, "grad_norm": 0.412109375, "learning_rate": 0.00042504864930884137, "loss": 0.1985, "step": 183390 }, { "epoch": 7.6, "grad_norm": 0.78515625, "learning_rate": 0.0004250409061950888, "loss": 0.2172, "step": 183400 }, { "epoch": 7.6, "grad_norm": 1.421875, "learning_rate": 0.0004250331627519282, "loss": 0.1675, "step": 183410 }, { "epoch": 7.6, "grad_norm": 0.8203125, "learning_rate": 0.0004250254189793741, "loss": 0.197, "step": 183420 }, { "epoch": 7.6, "grad_norm": 1.4609375, "learning_rate": 0.000425017674877441, "loss": 0.2919, "step": 183430 }, { "epoch": 7.6, "grad_norm": 0.80859375, "learning_rate": 0.0004250099304461436, "loss": 0.217, "step": 183440 }, { "epoch": 7.6, "grad_norm": 0.89453125, "learning_rate": 0.00042500218568549645, "loss": 0.1809, "step": 183450 }, { "epoch": 7.6, "grad_norm": 0.400390625, "learning_rate": 0.00042499444059551395, "loss": 0.1797, "step": 183460 }, { "epoch": 7.6, "grad_norm": 0.62890625, "learning_rate": 0.0004249866951762109, "loss": 0.2079, "step": 183470 }, { "epoch": 7.6, "grad_norm": 0.66796875, "learning_rate": 0.00042497894942760176, "loss": 0.1328, "step": 183480 }, { "epoch": 7.6, "grad_norm": 0.734375, "learning_rate": 0.0004249712033497012, "loss": 0.2063, "step": 183490 }, { "epoch": 7.6, "grad_norm": 0.73828125, "learning_rate": 0.0004249634569425237, "loss": 0.2178, "step": 183500 }, { "epoch": 7.6, "grad_norm": 0.6328125, "learning_rate": 0.0004249557102060838, "loss": 0.1885, "step": 183510 }, { "epoch": 7.6, "grad_norm": 0.98828125, "learning_rate": 0.00042494796314039617, "loss": 0.2449, "step": 183520 }, { "epoch": 7.6, "grad_norm": 0.9140625, "learning_rate": 0.0004249402157454754, "loss": 0.1592, "step": 183530 }, { "epoch": 7.6, "grad_norm": 0.2275390625, "learning_rate": 0.00042493246802133603, "loss": 0.2251, "step": 183540 }, { "epoch": 7.6, "grad_norm": 1.3125, "learning_rate": 0.00042492471996799264, "loss": 0.2332, "step": 183550 }, { "epoch": 7.6, "grad_norm": 1.078125, "learning_rate": 0.00042491697158545977, "loss": 0.2109, "step": 183560 }, { "epoch": 7.6, "grad_norm": 1.109375, "learning_rate": 0.0004249092228737521, "loss": 0.2252, "step": 183570 }, { "epoch": 7.6, "grad_norm": 0.96484375, "learning_rate": 0.00042490147383288427, "loss": 0.2229, "step": 183580 }, { "epoch": 7.6, "grad_norm": 0.314453125, "learning_rate": 0.0004248937244628706, "loss": 0.2003, "step": 183590 }, { "epoch": 7.6, "grad_norm": 0.87109375, "learning_rate": 0.0004248859747637258, "loss": 0.216, "step": 183600 }, { "epoch": 7.61, "grad_norm": 1.375, "learning_rate": 0.00042487822473546457, "loss": 0.1661, "step": 183610 }, { "epoch": 7.61, "grad_norm": 0.66015625, "learning_rate": 0.0004248704743781014, "loss": 0.2086, "step": 183620 }, { "epoch": 7.61, "grad_norm": 3.765625, "learning_rate": 0.0004248627236916509, "loss": 0.2213, "step": 183630 }, { "epoch": 7.61, "grad_norm": 0.494140625, "learning_rate": 0.00042485497267612753, "loss": 0.1652, "step": 183640 }, { "epoch": 7.61, "grad_norm": 0.7421875, "learning_rate": 0.0004248472213315461, "loss": 0.1538, "step": 183650 }, { "epoch": 7.61, "grad_norm": 0.98828125, "learning_rate": 0.0004248394696579211, "loss": 0.1927, "step": 183660 }, { "epoch": 7.61, "grad_norm": 1.390625, "learning_rate": 0.00042483171765526694, "loss": 0.1968, "step": 183670 }, { "epoch": 7.61, "grad_norm": 0.546875, "learning_rate": 0.0004248239653235985, "loss": 0.2471, "step": 183680 }, { "epoch": 7.61, "grad_norm": 1.0546875, "learning_rate": 0.00042481621266293024, "loss": 0.1697, "step": 183690 }, { "epoch": 7.61, "grad_norm": 0.5, "learning_rate": 0.0004248084596732767, "loss": 0.2146, "step": 183700 }, { "epoch": 7.61, "grad_norm": 1.15625, "learning_rate": 0.00042480070635465253, "loss": 0.1784, "step": 183710 }, { "epoch": 7.61, "grad_norm": 0.3984375, "learning_rate": 0.0004247929527070723, "loss": 0.198, "step": 183720 }, { "epoch": 7.61, "grad_norm": 0.90625, "learning_rate": 0.0004247851987305506, "loss": 0.2378, "step": 183730 }, { "epoch": 7.61, "grad_norm": 0.66796875, "learning_rate": 0.00042477744442510207, "loss": 0.2269, "step": 183740 }, { "epoch": 7.61, "grad_norm": 0.68359375, "learning_rate": 0.0004247696897907412, "loss": 0.1885, "step": 183750 }, { "epoch": 7.61, "grad_norm": 0.57421875, "learning_rate": 0.0004247619348274827, "loss": 0.1773, "step": 183760 }, { "epoch": 7.61, "grad_norm": 1.59375, "learning_rate": 0.00042475417953534114, "loss": 0.197, "step": 183770 }, { "epoch": 7.61, "grad_norm": 1.03125, "learning_rate": 0.000424746423914331, "loss": 0.2204, "step": 183780 }, { "epoch": 7.61, "grad_norm": 3.0, "learning_rate": 0.000424738667964467, "loss": 0.2329, "step": 183790 }, { "epoch": 7.61, "grad_norm": 0.47265625, "learning_rate": 0.00042473091168576373, "loss": 0.2295, "step": 183800 }, { "epoch": 7.61, "grad_norm": 0.6484375, "learning_rate": 0.0004247231550782357, "loss": 0.209, "step": 183810 }, { "epoch": 7.61, "grad_norm": 0.419921875, "learning_rate": 0.0004247153981418976, "loss": 0.1874, "step": 183820 }, { "epoch": 7.61, "grad_norm": 0.5234375, "learning_rate": 0.00042470764087676395, "loss": 0.203, "step": 183830 }, { "epoch": 7.61, "grad_norm": 0.859375, "learning_rate": 0.00042469988328284943, "loss": 0.1651, "step": 183840 }, { "epoch": 7.62, "grad_norm": 0.439453125, "learning_rate": 0.00042469212536016854, "loss": 0.2281, "step": 183850 }, { "epoch": 7.62, "grad_norm": 0.86328125, "learning_rate": 0.00042468436710873603, "loss": 0.1761, "step": 183860 }, { "epoch": 7.62, "grad_norm": 0.373046875, "learning_rate": 0.0004246766085285663, "loss": 0.1656, "step": 183870 }, { "epoch": 7.62, "grad_norm": 0.9453125, "learning_rate": 0.0004246688496196741, "loss": 0.1907, "step": 183880 }, { "epoch": 7.62, "grad_norm": 0.703125, "learning_rate": 0.000424661090382074, "loss": 0.1854, "step": 183890 }, { "epoch": 7.62, "grad_norm": 0.98046875, "learning_rate": 0.0004246533308157806, "loss": 0.1968, "step": 183900 }, { "epoch": 7.62, "grad_norm": 1.40625, "learning_rate": 0.0004246455709208085, "loss": 0.2008, "step": 183910 }, { "epoch": 7.62, "grad_norm": 0.82421875, "learning_rate": 0.0004246378106971722, "loss": 0.2142, "step": 183920 }, { "epoch": 7.62, "grad_norm": 0.75, "learning_rate": 0.00042463005014488646, "loss": 0.1708, "step": 183930 }, { "epoch": 7.62, "grad_norm": 0.435546875, "learning_rate": 0.00042462228926396585, "loss": 0.23, "step": 183940 }, { "epoch": 7.62, "grad_norm": 1.1796875, "learning_rate": 0.00042461452805442497, "loss": 0.229, "step": 183950 }, { "epoch": 7.62, "grad_norm": 0.828125, "learning_rate": 0.00042460676651627835, "loss": 0.2129, "step": 183960 }, { "epoch": 7.62, "grad_norm": 4.28125, "learning_rate": 0.00042459900464954063, "loss": 0.2351, "step": 183970 }, { "epoch": 7.62, "grad_norm": 0.98046875, "learning_rate": 0.0004245912424542265, "loss": 0.2123, "step": 183980 }, { "epoch": 7.62, "grad_norm": 0.671875, "learning_rate": 0.00042458347993035053, "loss": 0.1259, "step": 183990 }, { "epoch": 7.62, "grad_norm": 0.65234375, "learning_rate": 0.0004245757170779272, "loss": 0.1903, "step": 184000 }, { "epoch": 7.62, "grad_norm": 0.8203125, "learning_rate": 0.0004245679538969713, "loss": 0.1954, "step": 184010 }, { "epoch": 7.62, "grad_norm": 1.0859375, "learning_rate": 0.0004245601903874973, "loss": 0.1791, "step": 184020 }, { "epoch": 7.62, "grad_norm": 0.5703125, "learning_rate": 0.00042455242654951996, "loss": 0.1622, "step": 184030 }, { "epoch": 7.62, "grad_norm": 0.546875, "learning_rate": 0.00042454466238305374, "loss": 0.2256, "step": 184040 }, { "epoch": 7.62, "grad_norm": 0.53125, "learning_rate": 0.00042453689788811335, "loss": 0.212, "step": 184050 }, { "epoch": 7.62, "grad_norm": 0.8515625, "learning_rate": 0.0004245291330647133, "loss": 0.1846, "step": 184060 }, { "epoch": 7.62, "grad_norm": 0.515625, "learning_rate": 0.0004245213679128683, "loss": 0.1804, "step": 184070 }, { "epoch": 7.62, "grad_norm": 1.078125, "learning_rate": 0.00042451360243259297, "loss": 0.1931, "step": 184080 }, { "epoch": 7.62, "grad_norm": 1.1171875, "learning_rate": 0.00042450583662390187, "loss": 0.2146, "step": 184090 }, { "epoch": 7.63, "grad_norm": 0.0, "learning_rate": 0.0004244980704868097, "loss": 0.206, "step": 184100 }, { "epoch": 7.63, "grad_norm": 0.44140625, "learning_rate": 0.0004244903040213308, "loss": 0.1776, "step": 184110 }, { "epoch": 7.63, "grad_norm": 0.58984375, "learning_rate": 0.0004244825372274802, "loss": 0.1772, "step": 184120 }, { "epoch": 7.63, "grad_norm": 0.90234375, "learning_rate": 0.00042447477010527224, "loss": 0.2251, "step": 184130 }, { "epoch": 7.63, "grad_norm": 0.56640625, "learning_rate": 0.0004244670026547216, "loss": 0.1665, "step": 184140 }, { "epoch": 7.63, "grad_norm": 0.5625, "learning_rate": 0.00042445923487584287, "loss": 0.1781, "step": 184150 }, { "epoch": 7.63, "grad_norm": 1.1171875, "learning_rate": 0.00042445146676865075, "loss": 0.2095, "step": 184160 }, { "epoch": 7.63, "grad_norm": 2.046875, "learning_rate": 0.0004244436983331598, "loss": 0.2027, "step": 184170 }, { "epoch": 7.63, "grad_norm": 0.828125, "learning_rate": 0.0004244359295693846, "loss": 0.2509, "step": 184180 }, { "epoch": 7.63, "grad_norm": 0.546875, "learning_rate": 0.00042442816047733983, "loss": 0.1865, "step": 184190 }, { "epoch": 7.63, "grad_norm": 0.62890625, "learning_rate": 0.0004244203910570401, "loss": 0.178, "step": 184200 }, { "epoch": 7.63, "grad_norm": 0.71875, "learning_rate": 0.00042441262130850013, "loss": 0.2149, "step": 184210 }, { "epoch": 7.63, "grad_norm": 0.53125, "learning_rate": 0.0004244048512317343, "loss": 0.1818, "step": 184220 }, { "epoch": 7.63, "grad_norm": 2.46875, "learning_rate": 0.0004243970808267574, "loss": 0.1838, "step": 184230 }, { "epoch": 7.63, "grad_norm": 0.34375, "learning_rate": 0.00042438931009358406, "loss": 0.1651, "step": 184240 }, { "epoch": 7.63, "grad_norm": 0.8984375, "learning_rate": 0.00042438153903222894, "loss": 0.2036, "step": 184250 }, { "epoch": 7.63, "grad_norm": 1.6015625, "learning_rate": 0.00042437376764270653, "loss": 0.1802, "step": 184260 }, { "epoch": 7.63, "grad_norm": 0.6328125, "learning_rate": 0.00042436599592503147, "loss": 0.2219, "step": 184270 }, { "epoch": 7.63, "grad_norm": 1.015625, "learning_rate": 0.0004243582238792185, "loss": 0.1891, "step": 184280 }, { "epoch": 7.63, "grad_norm": 1.2109375, "learning_rate": 0.00042435045150528215, "loss": 0.1672, "step": 184290 }, { "epoch": 7.63, "grad_norm": 0.6328125, "learning_rate": 0.0004243426788032371, "loss": 0.2536, "step": 184300 }, { "epoch": 7.63, "grad_norm": 1.5234375, "learning_rate": 0.00042433490577309797, "loss": 0.1591, "step": 184310 }, { "epoch": 7.63, "grad_norm": 0.42578125, "learning_rate": 0.0004243271324148793, "loss": 0.1771, "step": 184320 }, { "epoch": 7.63, "grad_norm": 0.63671875, "learning_rate": 0.0004243193587285959, "loss": 0.2344, "step": 184330 }, { "epoch": 7.64, "grad_norm": 0.5859375, "learning_rate": 0.0004243115847142622, "loss": 0.1672, "step": 184340 }, { "epoch": 7.64, "grad_norm": 1.578125, "learning_rate": 0.00042430381037189297, "loss": 0.2234, "step": 184350 }, { "epoch": 7.64, "grad_norm": 0.0164794921875, "learning_rate": 0.0004242960357015028, "loss": 0.2023, "step": 184360 }, { "epoch": 7.64, "grad_norm": 0.71484375, "learning_rate": 0.00042428826070310633, "loss": 0.1749, "step": 184370 }, { "epoch": 7.64, "grad_norm": 0.55078125, "learning_rate": 0.0004242804853767181, "loss": 0.2023, "step": 184380 }, { "epoch": 7.64, "grad_norm": 0.427734375, "learning_rate": 0.0004242727097223529, "loss": 0.1615, "step": 184390 }, { "epoch": 7.64, "grad_norm": 1.1015625, "learning_rate": 0.0004242649337400252, "loss": 0.1906, "step": 184400 }, { "epoch": 7.64, "grad_norm": 1.140625, "learning_rate": 0.0004242571574297497, "loss": 0.2524, "step": 184410 }, { "epoch": 7.64, "grad_norm": 0.79296875, "learning_rate": 0.00042424938079154117, "loss": 0.2598, "step": 184420 }, { "epoch": 7.64, "grad_norm": 0.703125, "learning_rate": 0.000424241603825414, "loss": 0.2269, "step": 184430 }, { "epoch": 7.64, "grad_norm": 0.7734375, "learning_rate": 0.00042423382653138304, "loss": 0.2097, "step": 184440 }, { "epoch": 7.64, "grad_norm": 0.87890625, "learning_rate": 0.0004242260489094628, "loss": 0.2164, "step": 184450 }, { "epoch": 7.64, "grad_norm": 0.671875, "learning_rate": 0.0004242182709596679, "loss": 0.1923, "step": 184460 }, { "epoch": 7.64, "grad_norm": 0.9140625, "learning_rate": 0.00042421049268201314, "loss": 0.2612, "step": 184470 }, { "epoch": 7.64, "grad_norm": 1.2109375, "learning_rate": 0.0004242027140765129, "loss": 0.2145, "step": 184480 }, { "epoch": 7.64, "grad_norm": 1.0, "learning_rate": 0.0004241949351431821, "loss": 0.2053, "step": 184490 }, { "epoch": 7.64, "grad_norm": 0.46875, "learning_rate": 0.00042418715588203517, "loss": 0.1974, "step": 184500 }, { "epoch": 7.64, "grad_norm": 1.171875, "learning_rate": 0.00042417937629308676, "loss": 0.2451, "step": 184510 }, { "epoch": 7.64, "grad_norm": 1.6796875, "learning_rate": 0.0004241715963763517, "loss": 0.2012, "step": 184520 }, { "epoch": 7.64, "grad_norm": 1.0546875, "learning_rate": 0.0004241638161318445, "loss": 0.2059, "step": 184530 }, { "epoch": 7.64, "grad_norm": 0.435546875, "learning_rate": 0.0004241560355595797, "loss": 0.1897, "step": 184540 }, { "epoch": 7.64, "grad_norm": 0.95703125, "learning_rate": 0.0004241482546595722, "loss": 0.1785, "step": 184550 }, { "epoch": 7.64, "grad_norm": 0.46484375, "learning_rate": 0.00042414047343183637, "loss": 0.1835, "step": 184560 }, { "epoch": 7.64, "grad_norm": 0.921875, "learning_rate": 0.000424132691876387, "loss": 0.1732, "step": 184570 }, { "epoch": 7.65, "grad_norm": 0.48828125, "learning_rate": 0.0004241249099932387, "loss": 0.201, "step": 184580 }, { "epoch": 7.65, "grad_norm": 0.55859375, "learning_rate": 0.00042411712778240614, "loss": 0.2655, "step": 184590 }, { "epoch": 7.65, "grad_norm": 1.0234375, "learning_rate": 0.000424109345243904, "loss": 0.2404, "step": 184600 }, { "epoch": 7.65, "grad_norm": 0.58984375, "learning_rate": 0.00042410156237774677, "loss": 0.2467, "step": 184610 }, { "epoch": 7.65, "grad_norm": 0.578125, "learning_rate": 0.00042409377918394925, "loss": 0.2098, "step": 184620 }, { "epoch": 7.65, "grad_norm": 1.2578125, "learning_rate": 0.00042408599566252605, "loss": 0.1872, "step": 184630 }, { "epoch": 7.65, "grad_norm": 0.640625, "learning_rate": 0.00042407821181349176, "loss": 0.2222, "step": 184640 }, { "epoch": 7.65, "grad_norm": 1.0625, "learning_rate": 0.0004240704276368611, "loss": 0.1948, "step": 184650 }, { "epoch": 7.65, "grad_norm": 0.58203125, "learning_rate": 0.00042406264313264876, "loss": 0.2054, "step": 184660 }, { "epoch": 7.65, "grad_norm": 0.84375, "learning_rate": 0.0004240548583008692, "loss": 0.2085, "step": 184670 }, { "epoch": 7.65, "grad_norm": 0.94140625, "learning_rate": 0.00042404707314153736, "loss": 0.2334, "step": 184680 }, { "epoch": 7.65, "grad_norm": 0.6171875, "learning_rate": 0.0004240392876546676, "loss": 0.2259, "step": 184690 }, { "epoch": 7.65, "grad_norm": 0.251953125, "learning_rate": 0.00042403150184027475, "loss": 0.1794, "step": 184700 }, { "epoch": 7.65, "grad_norm": 0.51171875, "learning_rate": 0.00042402371569837333, "loss": 0.2114, "step": 184710 }, { "epoch": 7.65, "grad_norm": 0.3515625, "learning_rate": 0.0004240159292289781, "loss": 0.1893, "step": 184720 }, { "epoch": 7.65, "grad_norm": 0.90234375, "learning_rate": 0.00042400814243210373, "loss": 0.1847, "step": 184730 }, { "epoch": 7.65, "grad_norm": 0.79296875, "learning_rate": 0.00042400035530776477, "loss": 0.2248, "step": 184740 }, { "epoch": 7.65, "grad_norm": 0.50390625, "learning_rate": 0.00042399256785597593, "loss": 0.1529, "step": 184750 }, { "epoch": 7.65, "grad_norm": 0.85546875, "learning_rate": 0.0004239847800767519, "loss": 0.1713, "step": 184760 }, { "epoch": 7.65, "grad_norm": 0.64453125, "learning_rate": 0.0004239769919701073, "loss": 0.2495, "step": 184770 }, { "epoch": 7.65, "grad_norm": 0.314453125, "learning_rate": 0.00042396920353605683, "loss": 0.1724, "step": 184780 }, { "epoch": 7.65, "grad_norm": 1.3046875, "learning_rate": 0.00042396141477461503, "loss": 0.1638, "step": 184790 }, { "epoch": 7.65, "grad_norm": 0.79296875, "learning_rate": 0.00042395362568579666, "loss": 0.166, "step": 184800 }, { "epoch": 7.65, "grad_norm": 0.5625, "learning_rate": 0.00042394583626961633, "loss": 0.1687, "step": 184810 }, { "epoch": 7.66, "grad_norm": 0.4765625, "learning_rate": 0.0004239380465260888, "loss": 0.2116, "step": 184820 }, { "epoch": 7.66, "grad_norm": 0.8359375, "learning_rate": 0.00042393025645522857, "loss": 0.2352, "step": 184830 }, { "epoch": 7.66, "grad_norm": 0.6328125, "learning_rate": 0.00042392246605705045, "loss": 0.1477, "step": 184840 }, { "epoch": 7.66, "grad_norm": 0.68359375, "learning_rate": 0.00042391467533156894, "loss": 0.2501, "step": 184850 }, { "epoch": 7.66, "grad_norm": 0.396484375, "learning_rate": 0.0004239068842787989, "loss": 0.2147, "step": 184860 }, { "epoch": 7.66, "grad_norm": 0.45703125, "learning_rate": 0.00042389909289875476, "loss": 0.2105, "step": 184870 }, { "epoch": 7.66, "grad_norm": 0.859375, "learning_rate": 0.00042389130119145136, "loss": 0.2013, "step": 184880 }, { "epoch": 7.66, "grad_norm": 0.7890625, "learning_rate": 0.0004238835091569034, "loss": 0.1645, "step": 184890 }, { "epoch": 7.66, "grad_norm": 0.62890625, "learning_rate": 0.00042387571679512536, "loss": 0.2095, "step": 184900 }, { "epoch": 7.66, "grad_norm": 1.6171875, "learning_rate": 0.00042386792410613203, "loss": 0.2148, "step": 184910 }, { "epoch": 7.66, "grad_norm": 0.408203125, "learning_rate": 0.00042386013108993803, "loss": 0.222, "step": 184920 }, { "epoch": 7.66, "grad_norm": 0.84375, "learning_rate": 0.000423852337746558, "loss": 0.1574, "step": 184930 }, { "epoch": 7.66, "grad_norm": 0.482421875, "learning_rate": 0.0004238445440760067, "loss": 0.2161, "step": 184940 }, { "epoch": 7.66, "grad_norm": 1.1328125, "learning_rate": 0.0004238367500782987, "loss": 0.194, "step": 184950 }, { "epoch": 7.66, "grad_norm": 0.80078125, "learning_rate": 0.0004238289557534487, "loss": 0.227, "step": 184960 }, { "epoch": 7.66, "grad_norm": 2.421875, "learning_rate": 0.00042382116110147147, "loss": 0.1806, "step": 184970 }, { "epoch": 7.66, "grad_norm": 0.8203125, "learning_rate": 0.00042381336612238153, "loss": 0.1974, "step": 184980 }, { "epoch": 7.66, "grad_norm": 2.09375, "learning_rate": 0.00042380557081619355, "loss": 0.1656, "step": 184990 }, { "epoch": 7.66, "grad_norm": 0.15625, "learning_rate": 0.0004237977751829223, "loss": 0.2178, "step": 185000 }, { "epoch": 7.66, "grad_norm": 0.490234375, "learning_rate": 0.00042378997922258246, "loss": 0.258, "step": 185010 }, { "epoch": 7.66, "grad_norm": 1.5703125, "learning_rate": 0.0004237821829351886, "loss": 0.2538, "step": 185020 }, { "epoch": 7.66, "grad_norm": 0.58984375, "learning_rate": 0.00042377438632075536, "loss": 0.1876, "step": 185030 }, { "epoch": 7.66, "grad_norm": 0.98828125, "learning_rate": 0.0004237665893792976, "loss": 0.2006, "step": 185040 }, { "epoch": 7.66, "grad_norm": 0.96875, "learning_rate": 0.0004237587921108299, "loss": 0.1999, "step": 185050 }, { "epoch": 7.67, "grad_norm": 1.2109375, "learning_rate": 0.0004237509945153668, "loss": 0.2151, "step": 185060 }, { "epoch": 7.67, "grad_norm": 0.64453125, "learning_rate": 0.00042374319659292317, "loss": 0.1819, "step": 185070 }, { "epoch": 7.67, "grad_norm": 0.52734375, "learning_rate": 0.0004237353983435136, "loss": 0.2127, "step": 185080 }, { "epoch": 7.67, "grad_norm": 1.671875, "learning_rate": 0.0004237275997671528, "loss": 0.2398, "step": 185090 }, { "epoch": 7.67, "grad_norm": 0.8515625, "learning_rate": 0.0004237198008638554, "loss": 0.2218, "step": 185100 }, { "epoch": 7.67, "grad_norm": 0.95703125, "learning_rate": 0.00042371200163363607, "loss": 0.243, "step": 185110 }, { "epoch": 7.67, "grad_norm": 0.57421875, "learning_rate": 0.0004237042020765095, "loss": 0.2207, "step": 185120 }, { "epoch": 7.67, "grad_norm": 0.5390625, "learning_rate": 0.0004236964021924904, "loss": 0.2112, "step": 185130 }, { "epoch": 7.67, "grad_norm": 0.765625, "learning_rate": 0.0004236886019815934, "loss": 0.1883, "step": 185140 }, { "epoch": 7.67, "grad_norm": 1.21875, "learning_rate": 0.00042368080144383324, "loss": 0.1616, "step": 185150 }, { "epoch": 7.67, "grad_norm": 0.6953125, "learning_rate": 0.0004236730005792246, "loss": 0.1935, "step": 185160 }, { "epoch": 7.67, "grad_norm": 1.203125, "learning_rate": 0.00042366519938778215, "loss": 0.2184, "step": 185170 }, { "epoch": 7.67, "grad_norm": 0.00020885467529296875, "learning_rate": 0.00042365739786952045, "loss": 0.1977, "step": 185180 }, { "epoch": 7.67, "grad_norm": 1.4609375, "learning_rate": 0.0004236495960244543, "loss": 0.2078, "step": 185190 }, { "epoch": 7.67, "grad_norm": 0.57421875, "learning_rate": 0.0004236417938525984, "loss": 0.2117, "step": 185200 }, { "epoch": 7.67, "grad_norm": 0.486328125, "learning_rate": 0.0004236339913539674, "loss": 0.1858, "step": 185210 }, { "epoch": 7.67, "grad_norm": 0.99609375, "learning_rate": 0.0004236261885285759, "loss": 0.2402, "step": 185220 }, { "epoch": 7.67, "grad_norm": 1.0625, "learning_rate": 0.00042361838537643883, "loss": 0.1745, "step": 185230 }, { "epoch": 7.67, "grad_norm": 0.9453125, "learning_rate": 0.00042361058189757054, "loss": 0.2253, "step": 185240 }, { "epoch": 7.67, "grad_norm": 0.220703125, "learning_rate": 0.000423602778091986, "loss": 0.2039, "step": 185250 }, { "epoch": 7.67, "grad_norm": 0.72265625, "learning_rate": 0.0004235949739596997, "loss": 0.2002, "step": 185260 }, { "epoch": 7.67, "grad_norm": 0.392578125, "learning_rate": 0.0004235871695007264, "loss": 0.2111, "step": 185270 }, { "epoch": 7.67, "grad_norm": 0.93359375, "learning_rate": 0.0004235793647150808, "loss": 0.2076, "step": 185280 }, { "epoch": 7.67, "grad_norm": 0.83203125, "learning_rate": 0.00042357155960277766, "loss": 0.1844, "step": 185290 }, { "epoch": 7.68, "grad_norm": 0.609375, "learning_rate": 0.00042356375416383155, "loss": 0.1455, "step": 185300 }, { "epoch": 7.68, "grad_norm": 1.0625, "learning_rate": 0.00042355594839825716, "loss": 0.1686, "step": 185310 }, { "epoch": 7.68, "grad_norm": 0.54296875, "learning_rate": 0.0004235481423060692, "loss": 0.2217, "step": 185320 }, { "epoch": 7.68, "grad_norm": 0.61328125, "learning_rate": 0.0004235403358872825, "loss": 0.2257, "step": 185330 }, { "epoch": 7.68, "grad_norm": 0.6953125, "learning_rate": 0.00042353252914191156, "loss": 0.2169, "step": 185340 }, { "epoch": 7.68, "grad_norm": 0.345703125, "learning_rate": 0.0004235247220699711, "loss": 0.143, "step": 185350 }, { "epoch": 7.68, "grad_norm": 0.546875, "learning_rate": 0.00042351691467147585, "loss": 0.1945, "step": 185360 }, { "epoch": 7.68, "grad_norm": 1.2578125, "learning_rate": 0.0004235091069464405, "loss": 0.2213, "step": 185370 }, { "epoch": 7.68, "grad_norm": 0.3046875, "learning_rate": 0.00042350129889487983, "loss": 0.2232, "step": 185380 }, { "epoch": 7.68, "grad_norm": 0.90625, "learning_rate": 0.0004234934905168084, "loss": 0.1813, "step": 185390 }, { "epoch": 7.68, "grad_norm": 0.953125, "learning_rate": 0.0004234856818122409, "loss": 0.1799, "step": 185400 }, { "epoch": 7.68, "grad_norm": 0.6328125, "learning_rate": 0.0004234778727811922, "loss": 0.241, "step": 185410 }, { "epoch": 7.68, "grad_norm": 1.15625, "learning_rate": 0.0004234700634236768, "loss": 0.2014, "step": 185420 }, { "epoch": 7.68, "grad_norm": 0.859375, "learning_rate": 0.00042346225373970947, "loss": 0.2093, "step": 185430 }, { "epoch": 7.68, "grad_norm": 0.74609375, "learning_rate": 0.00042345444372930496, "loss": 0.1548, "step": 185440 }, { "epoch": 7.68, "grad_norm": 0.53125, "learning_rate": 0.0004234466333924779, "loss": 0.1763, "step": 185450 }, { "epoch": 7.68, "grad_norm": 0.765625, "learning_rate": 0.000423438822729243, "loss": 0.2107, "step": 185460 }, { "epoch": 7.68, "grad_norm": 0.4609375, "learning_rate": 0.00042343101173961497, "loss": 0.1521, "step": 185470 }, { "epoch": 7.68, "grad_norm": 1.125, "learning_rate": 0.0004234232004236085, "loss": 0.2205, "step": 185480 }, { "epoch": 7.68, "grad_norm": 0.453125, "learning_rate": 0.00042341538878123833, "loss": 0.1418, "step": 185490 }, { "epoch": 7.68, "grad_norm": 0.1728515625, "learning_rate": 0.000423407576812519, "loss": 0.1665, "step": 185500 }, { "epoch": 7.68, "grad_norm": 0.349609375, "learning_rate": 0.0004233997645174654, "loss": 0.1909, "step": 185510 }, { "epoch": 7.68, "grad_norm": 0.353515625, "learning_rate": 0.00042339195189609224, "loss": 0.1995, "step": 185520 }, { "epoch": 7.68, "grad_norm": 0.404296875, "learning_rate": 0.0004233841389484141, "loss": 0.2115, "step": 185530 }, { "epoch": 7.69, "grad_norm": 1.09375, "learning_rate": 0.0004233763256744458, "loss": 0.2027, "step": 185540 }, { "epoch": 7.69, "grad_norm": 0.70703125, "learning_rate": 0.0004233685120742019, "loss": 0.1788, "step": 185550 }, { "epoch": 7.69, "grad_norm": 0.72265625, "learning_rate": 0.00042336069814769716, "loss": 0.2224, "step": 185560 }, { "epoch": 7.69, "grad_norm": 0.30078125, "learning_rate": 0.0004233528838949464, "loss": 0.2217, "step": 185570 }, { "epoch": 7.69, "grad_norm": 0.84375, "learning_rate": 0.00042334506931596415, "loss": 0.175, "step": 185580 }, { "epoch": 7.69, "grad_norm": 0.953125, "learning_rate": 0.0004233372544107652, "loss": 0.2133, "step": 185590 }, { "epoch": 7.69, "grad_norm": 0.859375, "learning_rate": 0.00042332943917936433, "loss": 0.1927, "step": 185600 }, { "epoch": 7.69, "grad_norm": 0.66015625, "learning_rate": 0.0004233216236217761, "loss": 0.2171, "step": 185610 }, { "epoch": 7.69, "grad_norm": 1.140625, "learning_rate": 0.0004233138077380153, "loss": 0.1814, "step": 185620 }, { "epoch": 7.69, "grad_norm": 0.5859375, "learning_rate": 0.0004233059915280967, "loss": 0.2018, "step": 185630 }, { "epoch": 7.69, "grad_norm": 0.953125, "learning_rate": 0.00042329817499203487, "loss": 0.1898, "step": 185640 }, { "epoch": 7.69, "grad_norm": 2.28125, "learning_rate": 0.0004232903581298446, "loss": 0.1929, "step": 185650 }, { "epoch": 7.69, "grad_norm": 1.0859375, "learning_rate": 0.0004232825409415406, "loss": 0.2164, "step": 185660 }, { "epoch": 7.69, "grad_norm": 1.2421875, "learning_rate": 0.0004232747234271376, "loss": 0.2183, "step": 185670 }, { "epoch": 7.69, "grad_norm": 0.640625, "learning_rate": 0.00042326690558665026, "loss": 0.1996, "step": 185680 }, { "epoch": 7.69, "grad_norm": 0.7265625, "learning_rate": 0.0004232590874200933, "loss": 0.2091, "step": 185690 }, { "epoch": 7.69, "grad_norm": 1.3515625, "learning_rate": 0.00042325126892748144, "loss": 0.1931, "step": 185700 }, { "epoch": 7.69, "grad_norm": 1.078125, "learning_rate": 0.00042324345010882935, "loss": 0.1964, "step": 185710 }, { "epoch": 7.69, "grad_norm": 0.58984375, "learning_rate": 0.00042323563096415196, "loss": 0.1653, "step": 185720 }, { "epoch": 7.69, "grad_norm": 0.69140625, "learning_rate": 0.00042322781149346366, "loss": 0.2271, "step": 185730 }, { "epoch": 7.69, "grad_norm": 0.7734375, "learning_rate": 0.0004232199916967794, "loss": 0.2046, "step": 185740 }, { "epoch": 7.69, "grad_norm": 0.427734375, "learning_rate": 0.00042321217157411387, "loss": 0.1849, "step": 185750 }, { "epoch": 7.69, "grad_norm": 0.58984375, "learning_rate": 0.0004232043511254816, "loss": 0.198, "step": 185760 }, { "epoch": 7.69, "grad_norm": 1.0703125, "learning_rate": 0.00042319653035089756, "loss": 0.201, "step": 185770 }, { "epoch": 7.69, "grad_norm": 0.5625, "learning_rate": 0.0004231887092503763, "loss": 0.1817, "step": 185780 }, { "epoch": 7.7, "grad_norm": 0.56640625, "learning_rate": 0.0004231808878239326, "loss": 0.1802, "step": 185790 }, { "epoch": 7.7, "grad_norm": 0.63671875, "learning_rate": 0.00042317306607158125, "loss": 0.2254, "step": 185800 }, { "epoch": 7.7, "grad_norm": 0.49609375, "learning_rate": 0.0004231652439933368, "loss": 0.2388, "step": 185810 }, { "epoch": 7.7, "grad_norm": 1.171875, "learning_rate": 0.00042315742158921413, "loss": 0.2066, "step": 185820 }, { "epoch": 7.7, "grad_norm": 1.03125, "learning_rate": 0.0004231495988592278, "loss": 0.2058, "step": 185830 }, { "epoch": 7.7, "grad_norm": 0.357421875, "learning_rate": 0.00042314177580339264, "loss": 0.1716, "step": 185840 }, { "epoch": 7.7, "grad_norm": 0.5, "learning_rate": 0.0004231339524217234, "loss": 0.2269, "step": 185850 }, { "epoch": 7.7, "grad_norm": 0.59375, "learning_rate": 0.0004231261287142347, "loss": 0.1698, "step": 185860 }, { "epoch": 7.7, "grad_norm": 1.046875, "learning_rate": 0.0004231183046809414, "loss": 0.1794, "step": 185870 }, { "epoch": 7.7, "grad_norm": 0.63671875, "learning_rate": 0.0004231104803218581, "loss": 0.1887, "step": 185880 }, { "epoch": 7.7, "grad_norm": 0.494140625, "learning_rate": 0.00042310265563699957, "loss": 0.2548, "step": 185890 }, { "epoch": 7.7, "grad_norm": 1.0390625, "learning_rate": 0.00042309483062638055, "loss": 0.2241, "step": 185900 }, { "epoch": 7.7, "grad_norm": 0.73046875, "learning_rate": 0.00042308700529001576, "loss": 0.1967, "step": 185910 }, { "epoch": 7.7, "grad_norm": 0.8671875, "learning_rate": 0.0004230791796279199, "loss": 0.2048, "step": 185920 }, { "epoch": 7.7, "grad_norm": 0.71875, "learning_rate": 0.0004230713536401077, "loss": 0.2451, "step": 185930 }, { "epoch": 7.7, "grad_norm": 0.671875, "learning_rate": 0.00042306352732659396, "loss": 0.1819, "step": 185940 }, { "epoch": 7.7, "grad_norm": 0.87890625, "learning_rate": 0.0004230557006873933, "loss": 0.1793, "step": 185950 }, { "epoch": 7.7, "grad_norm": 1.2421875, "learning_rate": 0.0004230478737225205, "loss": 0.2236, "step": 185960 }, { "epoch": 7.7, "grad_norm": 0.625, "learning_rate": 0.0004230400464319903, "loss": 0.1894, "step": 185970 }, { "epoch": 7.7, "grad_norm": 0.4375, "learning_rate": 0.00042303221881581746, "loss": 0.1736, "step": 185980 }, { "epoch": 7.7, "grad_norm": 0.58984375, "learning_rate": 0.0004230243908740166, "loss": 0.1797, "step": 185990 }, { "epoch": 7.7, "grad_norm": 0.34765625, "learning_rate": 0.00042301656260660254, "loss": 0.1914, "step": 186000 }, { "epoch": 7.7, "grad_norm": 0.42578125, "learning_rate": 0.0004230087340135901, "loss": 0.1568, "step": 186010 }, { "epoch": 7.7, "grad_norm": 0.74609375, "learning_rate": 0.0004230009050949938, "loss": 0.1853, "step": 186020 }, { "epoch": 7.71, "grad_norm": 0.90625, "learning_rate": 0.0004229930758508285, "loss": 0.2368, "step": 186030 }, { "epoch": 7.71, "grad_norm": 0.5703125, "learning_rate": 0.00042298524628110886, "loss": 0.2113, "step": 186040 }, { "epoch": 7.71, "grad_norm": 0.734375, "learning_rate": 0.0004229774163858497, "loss": 0.1727, "step": 186050 }, { "epoch": 7.71, "grad_norm": 0.41015625, "learning_rate": 0.00042296958616506574, "loss": 0.2448, "step": 186060 }, { "epoch": 7.71, "grad_norm": 0.9140625, "learning_rate": 0.0004229617556187717, "loss": 0.2338, "step": 186070 }, { "epoch": 7.71, "grad_norm": 0.734375, "learning_rate": 0.0004229539247469822, "loss": 0.18, "step": 186080 }, { "epoch": 7.71, "grad_norm": 0.89453125, "learning_rate": 0.0004229460935497123, "loss": 0.2221, "step": 186090 }, { "epoch": 7.71, "grad_norm": 0.5859375, "learning_rate": 0.00042293826202697643, "loss": 0.2206, "step": 186100 }, { "epoch": 7.71, "grad_norm": 1.234375, "learning_rate": 0.0004229304301787894, "loss": 0.2161, "step": 186110 }, { "epoch": 7.71, "grad_norm": 1.140625, "learning_rate": 0.00042292259800516595, "loss": 0.2118, "step": 186120 }, { "epoch": 7.71, "grad_norm": 0.5390625, "learning_rate": 0.00042291476550612093, "loss": 0.1956, "step": 186130 }, { "epoch": 7.71, "grad_norm": 0.435546875, "learning_rate": 0.00042290693268166895, "loss": 0.2341, "step": 186140 }, { "epoch": 7.71, "grad_norm": 0.68359375, "learning_rate": 0.0004228990995318248, "loss": 0.1952, "step": 186150 }, { "epoch": 7.71, "grad_norm": 0.435546875, "learning_rate": 0.00042289126605660324, "loss": 0.2512, "step": 186160 }, { "epoch": 7.71, "grad_norm": 0.796875, "learning_rate": 0.000422883432256019, "loss": 0.1592, "step": 186170 }, { "epoch": 7.71, "grad_norm": 0.4765625, "learning_rate": 0.0004228755981300867, "loss": 0.1951, "step": 186180 }, { "epoch": 7.71, "grad_norm": 0.0693359375, "learning_rate": 0.0004228677636788213, "loss": 0.1894, "step": 186190 }, { "epoch": 7.71, "grad_norm": 1.1484375, "learning_rate": 0.00042285992890223745, "loss": 0.2339, "step": 186200 }, { "epoch": 7.71, "grad_norm": 0.5234375, "learning_rate": 0.00042285209380034985, "loss": 0.24, "step": 186210 }, { "epoch": 7.71, "grad_norm": 0.365234375, "learning_rate": 0.0004228442583731732, "loss": 0.2463, "step": 186220 }, { "epoch": 7.71, "grad_norm": 0.5625, "learning_rate": 0.00042283642262072244, "loss": 0.1849, "step": 186230 }, { "epoch": 7.71, "grad_norm": 1.0859375, "learning_rate": 0.0004228285865430121, "loss": 0.241, "step": 186240 }, { "epoch": 7.71, "grad_norm": 0.81640625, "learning_rate": 0.0004228207501400571, "loss": 0.2694, "step": 186250 }, { "epoch": 7.71, "grad_norm": 0.298828125, "learning_rate": 0.000422812913411872, "loss": 0.2239, "step": 186260 }, { "epoch": 7.72, "grad_norm": 0.76171875, "learning_rate": 0.00042280507635847174, "loss": 0.2119, "step": 186270 }, { "epoch": 7.72, "grad_norm": 0.48046875, "learning_rate": 0.000422797238979871, "loss": 0.1741, "step": 186280 }, { "epoch": 7.72, "grad_norm": 0.6875, "learning_rate": 0.0004227894012760845, "loss": 0.2412, "step": 186290 }, { "epoch": 7.72, "grad_norm": 0.9921875, "learning_rate": 0.000422781563247127, "loss": 0.1974, "step": 186300 }, { "epoch": 7.72, "grad_norm": 0.7578125, "learning_rate": 0.0004227737248930133, "loss": 0.1795, "step": 186310 }, { "epoch": 7.72, "grad_norm": 0.60546875, "learning_rate": 0.00042276588621375805, "loss": 0.1889, "step": 186320 }, { "epoch": 7.72, "grad_norm": 1.1328125, "learning_rate": 0.0004227580472093761, "loss": 0.2305, "step": 186330 }, { "epoch": 7.72, "grad_norm": 0.435546875, "learning_rate": 0.0004227502078798821, "loss": 0.2232, "step": 186340 }, { "epoch": 7.72, "grad_norm": 1.3203125, "learning_rate": 0.00042274236822529096, "loss": 0.2295, "step": 186350 }, { "epoch": 7.72, "grad_norm": 0.55078125, "learning_rate": 0.0004227345282456173, "loss": 0.2083, "step": 186360 }, { "epoch": 7.72, "grad_norm": 0.78125, "learning_rate": 0.0004227266879408759, "loss": 0.2034, "step": 186370 }, { "epoch": 7.72, "grad_norm": 1.2734375, "learning_rate": 0.0004227188473110815, "loss": 0.224, "step": 186380 }, { "epoch": 7.72, "grad_norm": 0.484375, "learning_rate": 0.00042271100635624885, "loss": 0.1924, "step": 186390 }, { "epoch": 7.72, "grad_norm": 0.92578125, "learning_rate": 0.00042270316507639284, "loss": 0.1669, "step": 186400 }, { "epoch": 7.72, "grad_norm": 1.1640625, "learning_rate": 0.00042269532347152805, "loss": 0.1507, "step": 186410 }, { "epoch": 7.72, "grad_norm": 0.79296875, "learning_rate": 0.0004226874815416694, "loss": 0.2059, "step": 186420 }, { "epoch": 7.72, "grad_norm": 0.392578125, "learning_rate": 0.0004226796392868314, "loss": 0.2227, "step": 186430 }, { "epoch": 7.72, "grad_norm": 0.5, "learning_rate": 0.0004226717967070291, "loss": 0.214, "step": 186440 }, { "epoch": 7.72, "grad_norm": 0.61328125, "learning_rate": 0.00042266395380227706, "loss": 0.2178, "step": 186450 }, { "epoch": 7.72, "grad_norm": 1.328125, "learning_rate": 0.00042265611057259013, "loss": 0.2159, "step": 186460 }, { "epoch": 7.72, "grad_norm": 0.953125, "learning_rate": 0.00042264826701798305, "loss": 0.2181, "step": 186470 }, { "epoch": 7.72, "grad_norm": 0.9453125, "learning_rate": 0.00042264042313847056, "loss": 0.1769, "step": 186480 }, { "epoch": 7.72, "grad_norm": 0.46484375, "learning_rate": 0.0004226325789340675, "loss": 0.1647, "step": 186490 }, { "epoch": 7.72, "grad_norm": 0.69921875, "learning_rate": 0.00042262473440478847, "loss": 0.2133, "step": 186500 }, { "epoch": 7.73, "grad_norm": 0.8046875, "learning_rate": 0.00042261688955064837, "loss": 0.2279, "step": 186510 }, { "epoch": 7.73, "grad_norm": 0.8359375, "learning_rate": 0.0004226090443716619, "loss": 0.2156, "step": 186520 }, { "epoch": 7.73, "grad_norm": 0.46484375, "learning_rate": 0.000422601198867844, "loss": 0.2141, "step": 186530 }, { "epoch": 7.73, "grad_norm": 0.7265625, "learning_rate": 0.0004225933530392091, "loss": 0.2017, "step": 186540 }, { "epoch": 7.73, "grad_norm": 0.5, "learning_rate": 0.00042258550688577224, "loss": 0.2076, "step": 186550 }, { "epoch": 7.73, "grad_norm": 1.046875, "learning_rate": 0.0004225776604075481, "loss": 0.2522, "step": 186560 }, { "epoch": 7.73, "grad_norm": 0.6796875, "learning_rate": 0.0004225698136045514, "loss": 0.2083, "step": 186570 }, { "epoch": 7.73, "grad_norm": 1.171875, "learning_rate": 0.0004225619664767969, "loss": 0.2047, "step": 186580 }, { "epoch": 7.73, "grad_norm": 0.64453125, "learning_rate": 0.00042255411902429954, "loss": 0.2082, "step": 186590 }, { "epoch": 7.73, "grad_norm": 1.6953125, "learning_rate": 0.0004225462712470738, "loss": 0.1499, "step": 186600 }, { "epoch": 7.73, "grad_norm": 0.91796875, "learning_rate": 0.00042253842314513473, "loss": 0.2031, "step": 186610 }, { "epoch": 7.73, "grad_norm": 0.51953125, "learning_rate": 0.000422530574718497, "loss": 0.2136, "step": 186620 }, { "epoch": 7.73, "grad_norm": 1.4609375, "learning_rate": 0.0004225227259671752, "loss": 0.2354, "step": 186630 }, { "epoch": 7.73, "grad_norm": 0.59375, "learning_rate": 0.0004225148768911844, "loss": 0.232, "step": 186640 }, { "epoch": 7.73, "grad_norm": 0.625, "learning_rate": 0.0004225070274905393, "loss": 0.218, "step": 186650 }, { "epoch": 7.73, "grad_norm": 0.75, "learning_rate": 0.0004224991777652544, "loss": 0.2135, "step": 186660 }, { "epoch": 7.73, "grad_norm": 0.96875, "learning_rate": 0.0004224913277153448, "loss": 0.1721, "step": 186670 }, { "epoch": 7.73, "grad_norm": 0.9609375, "learning_rate": 0.00042248347734082515, "loss": 0.1748, "step": 186680 }, { "epoch": 7.73, "grad_norm": 0.98046875, "learning_rate": 0.00042247562664171027, "loss": 0.182, "step": 186690 }, { "epoch": 7.73, "grad_norm": 0.78125, "learning_rate": 0.00042246777561801474, "loss": 0.2432, "step": 186700 }, { "epoch": 7.73, "grad_norm": 0.51171875, "learning_rate": 0.00042245992426975354, "loss": 0.1936, "step": 186710 }, { "epoch": 7.73, "grad_norm": 1.46875, "learning_rate": 0.00042245207259694145, "loss": 0.2131, "step": 186720 }, { "epoch": 7.73, "grad_norm": 0.65234375, "learning_rate": 0.00042244422059959307, "loss": 0.2072, "step": 186730 }, { "epoch": 7.73, "grad_norm": 0.310546875, "learning_rate": 0.0004224363682777234, "loss": 0.2076, "step": 186740 }, { "epoch": 7.74, "grad_norm": 0.640625, "learning_rate": 0.0004224285156313471, "loss": 0.188, "step": 186750 }, { "epoch": 7.74, "grad_norm": 0.375, "learning_rate": 0.0004224206626604789, "loss": 0.2016, "step": 186760 }, { "epoch": 7.74, "grad_norm": 0.84375, "learning_rate": 0.0004224128093651337, "loss": 0.1754, "step": 186770 }, { "epoch": 7.74, "grad_norm": 0.373046875, "learning_rate": 0.0004224049557453261, "loss": 0.2106, "step": 186780 }, { "epoch": 7.74, "grad_norm": 0.7734375, "learning_rate": 0.0004223971018010711, "loss": 0.1587, "step": 186790 }, { "epoch": 7.74, "grad_norm": 0.96875, "learning_rate": 0.0004223892475323833, "loss": 0.2254, "step": 186800 }, { "epoch": 7.74, "grad_norm": 0.3828125, "learning_rate": 0.00042238139293927757, "loss": 0.1664, "step": 186810 }, { "epoch": 7.74, "grad_norm": 0.5390625, "learning_rate": 0.0004223735380217687, "loss": 0.1643, "step": 186820 }, { "epoch": 7.74, "grad_norm": 0.61328125, "learning_rate": 0.00042236568277987145, "loss": 0.2405, "step": 186830 }, { "epoch": 7.74, "grad_norm": 1.0234375, "learning_rate": 0.00042235782721360056, "loss": 0.2515, "step": 186840 }, { "epoch": 7.74, "grad_norm": 0.640625, "learning_rate": 0.0004223499713229709, "loss": 0.196, "step": 186850 }, { "epoch": 7.74, "grad_norm": 0.63671875, "learning_rate": 0.0004223421151079972, "loss": 0.1547, "step": 186860 }, { "epoch": 7.74, "grad_norm": 0.494140625, "learning_rate": 0.0004223342585686942, "loss": 0.1885, "step": 186870 }, { "epoch": 7.74, "grad_norm": 0.88671875, "learning_rate": 0.00042232640170507675, "loss": 0.221, "step": 186880 }, { "epoch": 7.74, "grad_norm": 0.97265625, "learning_rate": 0.0004223185445171597, "loss": 0.1823, "step": 186890 }, { "epoch": 7.74, "grad_norm": 0.90234375, "learning_rate": 0.00042231068700495766, "loss": 0.2281, "step": 186900 }, { "epoch": 7.74, "grad_norm": 0.625, "learning_rate": 0.0004223028291684855, "loss": 0.1785, "step": 186910 }, { "epoch": 7.74, "grad_norm": 0.72265625, "learning_rate": 0.00042229497100775806, "loss": 0.1818, "step": 186920 }, { "epoch": 7.74, "grad_norm": 0.85546875, "learning_rate": 0.00042228711252279016, "loss": 0.2154, "step": 186930 }, { "epoch": 7.74, "grad_norm": 1.09375, "learning_rate": 0.0004222792537135964, "loss": 0.2092, "step": 186940 }, { "epoch": 7.74, "grad_norm": 0.50390625, "learning_rate": 0.0004222713945801918, "loss": 0.228, "step": 186950 }, { "epoch": 7.74, "grad_norm": 0.83984375, "learning_rate": 0.00042226353512259097, "loss": 0.2107, "step": 186960 }, { "epoch": 7.74, "grad_norm": 0.478515625, "learning_rate": 0.0004222556753408088, "loss": 0.2065, "step": 186970 }, { "epoch": 7.74, "grad_norm": 0.83984375, "learning_rate": 0.00042224781523486003, "loss": 0.1984, "step": 186980 }, { "epoch": 7.75, "grad_norm": 0.6171875, "learning_rate": 0.00042223995480475954, "loss": 0.1951, "step": 186990 }, { "epoch": 7.75, "grad_norm": 1.03125, "learning_rate": 0.0004222320940505219, "loss": 0.1909, "step": 187000 }, { "epoch": 7.75, "grad_norm": 0.953125, "learning_rate": 0.00042222423297216225, "loss": 0.239, "step": 187010 }, { "epoch": 7.75, "grad_norm": 0.3984375, "learning_rate": 0.00042221637156969504, "loss": 0.1551, "step": 187020 }, { "epoch": 7.75, "grad_norm": 0.35546875, "learning_rate": 0.0004222085098431353, "loss": 0.2113, "step": 187030 }, { "epoch": 7.75, "grad_norm": 0.453125, "learning_rate": 0.0004222006477924977, "loss": 0.2283, "step": 187040 }, { "epoch": 7.75, "grad_norm": 0.671875, "learning_rate": 0.0004221927854177971, "loss": 0.1644, "step": 187050 }, { "epoch": 7.75, "grad_norm": 0.6640625, "learning_rate": 0.0004221849227190483, "loss": 0.174, "step": 187060 }, { "epoch": 7.75, "grad_norm": 1.5703125, "learning_rate": 0.0004221770596962661, "loss": 0.213, "step": 187070 }, { "epoch": 7.75, "grad_norm": 0.8125, "learning_rate": 0.0004221691963494651, "loss": 0.1979, "step": 187080 }, { "epoch": 7.75, "grad_norm": 0.58203125, "learning_rate": 0.0004221613326786604, "loss": 0.1462, "step": 187090 }, { "epoch": 7.75, "grad_norm": 1.109375, "learning_rate": 0.0004221534686838666, "loss": 0.1764, "step": 187100 }, { "epoch": 7.75, "grad_norm": 0.6171875, "learning_rate": 0.0004221456043650986, "loss": 0.2059, "step": 187110 }, { "epoch": 7.75, "grad_norm": 0.84375, "learning_rate": 0.0004221377397223712, "loss": 0.211, "step": 187120 }, { "epoch": 7.75, "grad_norm": 0.81640625, "learning_rate": 0.00042212987475569907, "loss": 0.1804, "step": 187130 }, { "epoch": 7.75, "grad_norm": 0.67578125, "learning_rate": 0.0004221220094650972, "loss": 0.2361, "step": 187140 }, { "epoch": 7.75, "grad_norm": 1.1953125, "learning_rate": 0.00042211414385058023, "loss": 0.1811, "step": 187150 }, { "epoch": 7.75, "grad_norm": 0.458984375, "learning_rate": 0.00042210627791216306, "loss": 0.2065, "step": 187160 }, { "epoch": 7.75, "grad_norm": 0.6484375, "learning_rate": 0.00042209841164986045, "loss": 0.1709, "step": 187170 }, { "epoch": 7.75, "grad_norm": 0.75, "learning_rate": 0.00042209054506368724, "loss": 0.1789, "step": 187180 }, { "epoch": 7.75, "grad_norm": 0.80859375, "learning_rate": 0.0004220826781536582, "loss": 0.2262, "step": 187190 }, { "epoch": 7.75, "grad_norm": 1.7421875, "learning_rate": 0.0004220748109197881, "loss": 0.1844, "step": 187200 }, { "epoch": 7.75, "grad_norm": 0.1728515625, "learning_rate": 0.00042206694336209186, "loss": 0.251, "step": 187210 }, { "epoch": 7.75, "grad_norm": 0.71484375, "learning_rate": 0.0004220590754805841, "loss": 0.1635, "step": 187220 }, { "epoch": 7.76, "grad_norm": 0.59375, "learning_rate": 0.0004220512072752798, "loss": 0.2318, "step": 187230 }, { "epoch": 7.76, "grad_norm": 0.80859375, "learning_rate": 0.00042204333874619373, "loss": 0.1731, "step": 187240 }, { "epoch": 7.76, "grad_norm": 0.671875, "learning_rate": 0.0004220354698933407, "loss": 0.1779, "step": 187250 }, { "epoch": 7.76, "grad_norm": 0.54296875, "learning_rate": 0.0004220276007167354, "loss": 0.1582, "step": 187260 }, { "epoch": 7.76, "grad_norm": 0.6484375, "learning_rate": 0.0004220197312163928, "loss": 0.1844, "step": 187270 }, { "epoch": 7.76, "grad_norm": 0.65625, "learning_rate": 0.0004220118613923276, "loss": 0.1589, "step": 187280 }, { "epoch": 7.76, "grad_norm": 0.74609375, "learning_rate": 0.00042200399124455476, "loss": 0.1835, "step": 187290 }, { "epoch": 7.76, "grad_norm": 0.6171875, "learning_rate": 0.0004219961207730889, "loss": 0.1859, "step": 187300 }, { "epoch": 7.76, "grad_norm": 1.578125, "learning_rate": 0.00042198824997794496, "loss": 0.2253, "step": 187310 }, { "epoch": 7.76, "grad_norm": 1.1171875, "learning_rate": 0.00042198037885913766, "loss": 0.1591, "step": 187320 }, { "epoch": 7.76, "grad_norm": 0.578125, "learning_rate": 0.0004219725074166818, "loss": 0.203, "step": 187330 }, { "epoch": 7.76, "grad_norm": 0.9140625, "learning_rate": 0.0004219646356505923, "loss": 0.1886, "step": 187340 }, { "epoch": 7.76, "grad_norm": 0.6640625, "learning_rate": 0.000421956763560884, "loss": 0.1792, "step": 187350 }, { "epoch": 7.76, "grad_norm": 0.95703125, "learning_rate": 0.00042194889114757165, "loss": 0.2534, "step": 187360 }, { "epoch": 7.76, "grad_norm": 2.5625, "learning_rate": 0.00042194101841066997, "loss": 0.2219, "step": 187370 }, { "epoch": 7.76, "grad_norm": 0.423828125, "learning_rate": 0.0004219331453501939, "loss": 0.1965, "step": 187380 }, { "epoch": 7.76, "grad_norm": 0.271484375, "learning_rate": 0.00042192527196615817, "loss": 0.2274, "step": 187390 }, { "epoch": 7.76, "grad_norm": 0.62109375, "learning_rate": 0.00042191739825857765, "loss": 0.2447, "step": 187400 }, { "epoch": 7.76, "grad_norm": 0.890625, "learning_rate": 0.0004219095242274672, "loss": 0.2195, "step": 187410 }, { "epoch": 7.76, "grad_norm": 0.435546875, "learning_rate": 0.00042190164987284155, "loss": 0.1649, "step": 187420 }, { "epoch": 7.76, "grad_norm": 0.6953125, "learning_rate": 0.0004218937751947156, "loss": 0.2333, "step": 187430 }, { "epoch": 7.76, "grad_norm": 1.4765625, "learning_rate": 0.0004218859001931041, "loss": 0.1982, "step": 187440 }, { "epoch": 7.76, "grad_norm": 0.66796875, "learning_rate": 0.0004218780248680219, "loss": 0.1909, "step": 187450 }, { "epoch": 7.76, "grad_norm": 0.66015625, "learning_rate": 0.00042187014921948385, "loss": 0.1928, "step": 187460 }, { "epoch": 7.76, "grad_norm": 0.71875, "learning_rate": 0.0004218622732475047, "loss": 0.1898, "step": 187470 }, { "epoch": 7.77, "grad_norm": 0.58984375, "learning_rate": 0.00042185439695209933, "loss": 0.183, "step": 187480 }, { "epoch": 7.77, "grad_norm": 0.625, "learning_rate": 0.0004218465203332825, "loss": 0.1898, "step": 187490 }, { "epoch": 7.77, "grad_norm": 0.65625, "learning_rate": 0.00042183864339106916, "loss": 0.1724, "step": 187500 }, { "epoch": 7.77, "grad_norm": 1.15625, "learning_rate": 0.000421830766125474, "loss": 0.2252, "step": 187510 }, { "epoch": 7.77, "grad_norm": 0.96875, "learning_rate": 0.00042182288853651186, "loss": 0.217, "step": 187520 }, { "epoch": 7.77, "grad_norm": 0.5625, "learning_rate": 0.0004218150106241977, "loss": 0.2523, "step": 187530 }, { "epoch": 7.77, "grad_norm": 0.56640625, "learning_rate": 0.00042180713238854615, "loss": 0.251, "step": 187540 }, { "epoch": 7.77, "grad_norm": 0.66015625, "learning_rate": 0.00042179925382957215, "loss": 0.1868, "step": 187550 }, { "epoch": 7.77, "grad_norm": 0.80078125, "learning_rate": 0.0004217913749472905, "loss": 0.1779, "step": 187560 }, { "epoch": 7.77, "grad_norm": 1.203125, "learning_rate": 0.000421783495741716, "loss": 0.1774, "step": 187570 }, { "epoch": 7.77, "grad_norm": 1.2421875, "learning_rate": 0.00042177561621286366, "loss": 0.215, "step": 187580 }, { "epoch": 7.77, "grad_norm": 0.9296875, "learning_rate": 0.00042176773636074807, "loss": 0.1913, "step": 187590 }, { "epoch": 7.77, "grad_norm": 0.61328125, "learning_rate": 0.0004217598561853841, "loss": 0.1555, "step": 187600 }, { "epoch": 7.77, "grad_norm": 1.3125, "learning_rate": 0.0004217519756867867, "loss": 0.1957, "step": 187610 }, { "epoch": 7.77, "grad_norm": 0.73828125, "learning_rate": 0.00042174409486497063, "loss": 0.219, "step": 187620 }, { "epoch": 7.77, "grad_norm": 0.52734375, "learning_rate": 0.0004217362137199506, "loss": 0.1731, "step": 187630 }, { "epoch": 7.77, "grad_norm": 1.3671875, "learning_rate": 0.0004217283322517417, "loss": 0.2312, "step": 187640 }, { "epoch": 7.77, "grad_norm": 1.28125, "learning_rate": 0.00042172045046035857, "loss": 0.2451, "step": 187650 }, { "epoch": 7.77, "grad_norm": 0.5859375, "learning_rate": 0.00042171256834581613, "loss": 0.1737, "step": 187660 }, { "epoch": 7.77, "grad_norm": 0.59765625, "learning_rate": 0.00042170468590812914, "loss": 0.2278, "step": 187670 }, { "epoch": 7.77, "grad_norm": 1.2109375, "learning_rate": 0.0004216968031473125, "loss": 0.1996, "step": 187680 }, { "epoch": 7.77, "grad_norm": 0.8984375, "learning_rate": 0.000421688920063381, "loss": 0.2394, "step": 187690 }, { "epoch": 7.77, "grad_norm": 0.455078125, "learning_rate": 0.0004216810366563495, "loss": 0.2203, "step": 187700 }, { "epoch": 7.77, "grad_norm": 0.94921875, "learning_rate": 0.00042167315292623283, "loss": 0.1934, "step": 187710 }, { "epoch": 7.78, "grad_norm": 1.0078125, "learning_rate": 0.0004216652688730458, "loss": 0.2527, "step": 187720 }, { "epoch": 7.78, "grad_norm": 0.58203125, "learning_rate": 0.00042165738449680335, "loss": 0.2089, "step": 187730 }, { "epoch": 7.78, "grad_norm": 0.67578125, "learning_rate": 0.00042164949979752017, "loss": 0.2324, "step": 187740 }, { "epoch": 7.78, "grad_norm": 0.48828125, "learning_rate": 0.0004216416147752112, "loss": 0.2624, "step": 187750 }, { "epoch": 7.78, "grad_norm": 0.796875, "learning_rate": 0.0004216337294298912, "loss": 0.1986, "step": 187760 }, { "epoch": 7.78, "grad_norm": 1.5625, "learning_rate": 0.0004216258437615751, "loss": 0.2585, "step": 187770 }, { "epoch": 7.78, "grad_norm": 0.7265625, "learning_rate": 0.0004216179577702777, "loss": 0.2224, "step": 187780 }, { "epoch": 7.78, "grad_norm": 0.51171875, "learning_rate": 0.00042161007145601385, "loss": 0.1872, "step": 187790 }, { "epoch": 7.78, "grad_norm": 0.6796875, "learning_rate": 0.0004216021848187983, "loss": 0.2094, "step": 187800 }, { "epoch": 7.78, "grad_norm": 0.77734375, "learning_rate": 0.000421594297858646, "loss": 0.1848, "step": 187810 }, { "epoch": 7.78, "grad_norm": 0.51171875, "learning_rate": 0.0004215864105755718, "loss": 0.2127, "step": 187820 }, { "epoch": 7.78, "grad_norm": 1.515625, "learning_rate": 0.00042157852296959047, "loss": 0.2288, "step": 187830 }, { "epoch": 7.78, "grad_norm": 0.49609375, "learning_rate": 0.0004215706350407169, "loss": 0.2156, "step": 187840 }, { "epoch": 7.78, "grad_norm": 3.921875, "learning_rate": 0.00042156274678896595, "loss": 0.219, "step": 187850 }, { "epoch": 7.78, "grad_norm": 0.80078125, "learning_rate": 0.0004215548582143524, "loss": 0.2284, "step": 187860 }, { "epoch": 7.78, "grad_norm": 1.046875, "learning_rate": 0.00042154696931689115, "loss": 0.1699, "step": 187870 }, { "epoch": 7.78, "grad_norm": 1.1171875, "learning_rate": 0.000421539080096597, "loss": 0.2227, "step": 187880 }, { "epoch": 7.78, "grad_norm": 0.68359375, "learning_rate": 0.0004215311905534849, "loss": 0.1456, "step": 187890 }, { "epoch": 7.78, "grad_norm": 0.9609375, "learning_rate": 0.00042152330068756955, "loss": 0.1826, "step": 187900 }, { "epoch": 7.78, "grad_norm": 0.42578125, "learning_rate": 0.0004215154104988659, "loss": 0.223, "step": 187910 }, { "epoch": 7.78, "grad_norm": 0.388671875, "learning_rate": 0.0004215075199873887, "loss": 0.1619, "step": 187920 }, { "epoch": 7.78, "grad_norm": 0.51953125, "learning_rate": 0.000421499629153153, "loss": 0.2008, "step": 187930 }, { "epoch": 7.78, "grad_norm": 0.4296875, "learning_rate": 0.00042149173799617347, "loss": 0.2235, "step": 187940 }, { "epoch": 7.78, "grad_norm": 0.76953125, "learning_rate": 0.000421483846516465, "loss": 0.2366, "step": 187950 }, { "epoch": 7.79, "grad_norm": 0.62890625, "learning_rate": 0.00042147595471404245, "loss": 0.149, "step": 187960 }, { "epoch": 7.79, "grad_norm": 1.015625, "learning_rate": 0.00042146806258892063, "loss": 0.203, "step": 187970 }, { "epoch": 7.79, "grad_norm": 1.65625, "learning_rate": 0.0004214601701411145, "loss": 0.1721, "step": 187980 }, { "epoch": 7.79, "grad_norm": 1.21875, "learning_rate": 0.0004214522773706387, "loss": 0.2233, "step": 187990 }, { "epoch": 7.79, "grad_norm": 1.046875, "learning_rate": 0.0004214443842775084, "loss": 0.2063, "step": 188000 }, { "epoch": 7.79, "grad_norm": 0.984375, "learning_rate": 0.00042143649086173827, "loss": 0.1968, "step": 188010 }, { "epoch": 7.79, "grad_norm": 0.6015625, "learning_rate": 0.00042142859712334307, "loss": 0.2092, "step": 188020 }, { "epoch": 7.79, "grad_norm": 0.62109375, "learning_rate": 0.0004214207030623378, "loss": 0.1954, "step": 188030 }, { "epoch": 7.79, "grad_norm": 0.53125, "learning_rate": 0.00042141280867873733, "loss": 0.2349, "step": 188040 }, { "epoch": 7.79, "grad_norm": 0.70703125, "learning_rate": 0.00042140491397255644, "loss": 0.2242, "step": 188050 }, { "epoch": 7.79, "grad_norm": 1.1640625, "learning_rate": 0.00042139701894381, "loss": 0.1754, "step": 188060 }, { "epoch": 7.79, "grad_norm": 0.85546875, "learning_rate": 0.0004213891235925129, "loss": 0.2397, "step": 188070 }, { "epoch": 7.79, "grad_norm": 0.0, "learning_rate": 0.00042138122791867994, "loss": 0.1634, "step": 188080 }, { "epoch": 7.79, "grad_norm": 0.40234375, "learning_rate": 0.00042137333192232606, "loss": 0.1857, "step": 188090 }, { "epoch": 7.79, "grad_norm": 0.74609375, "learning_rate": 0.0004213654356034661, "loss": 0.216, "step": 188100 }, { "epoch": 7.79, "grad_norm": 0.86328125, "learning_rate": 0.0004213575389621148, "loss": 0.2096, "step": 188110 }, { "epoch": 7.79, "grad_norm": 0.734375, "learning_rate": 0.00042134964199828717, "loss": 0.2011, "step": 188120 }, { "epoch": 7.79, "grad_norm": 0.87890625, "learning_rate": 0.00042134174471199806, "loss": 0.1997, "step": 188130 }, { "epoch": 7.79, "grad_norm": 0.70703125, "learning_rate": 0.0004213338471032623, "loss": 0.1996, "step": 188140 }, { "epoch": 7.79, "grad_norm": 0.35546875, "learning_rate": 0.00042132594917209466, "loss": 0.231, "step": 188150 }, { "epoch": 7.79, "grad_norm": 0.51953125, "learning_rate": 0.00042131805091851003, "loss": 0.1981, "step": 188160 }, { "epoch": 7.79, "grad_norm": 0.78515625, "learning_rate": 0.00042131015234252346, "loss": 0.1996, "step": 188170 }, { "epoch": 7.79, "grad_norm": 0.453125, "learning_rate": 0.00042130225344414965, "loss": 0.2072, "step": 188180 }, { "epoch": 7.79, "grad_norm": 0.890625, "learning_rate": 0.0004212943542234035, "loss": 0.208, "step": 188190 }, { "epoch": 7.8, "grad_norm": 0.84765625, "learning_rate": 0.00042128645468029986, "loss": 0.2333, "step": 188200 }, { "epoch": 7.8, "grad_norm": 0.458984375, "learning_rate": 0.0004212785548148536, "loss": 0.2483, "step": 188210 }, { "epoch": 7.8, "grad_norm": 0.9609375, "learning_rate": 0.00042127065462707966, "loss": 0.2352, "step": 188220 }, { "epoch": 7.8, "grad_norm": 0.45703125, "learning_rate": 0.0004212627541169927, "loss": 0.2156, "step": 188230 }, { "epoch": 7.8, "grad_norm": 0.54296875, "learning_rate": 0.0004212548532846079, "loss": 0.2282, "step": 188240 }, { "epoch": 7.8, "grad_norm": 0.9765625, "learning_rate": 0.0004212469521299398, "loss": 0.2788, "step": 188250 }, { "epoch": 7.8, "grad_norm": 0.72265625, "learning_rate": 0.00042123905065300356, "loss": 0.2196, "step": 188260 }, { "epoch": 7.8, "grad_norm": 1.0625, "learning_rate": 0.00042123114885381387, "loss": 0.2479, "step": 188270 }, { "epoch": 7.8, "grad_norm": 0.55859375, "learning_rate": 0.00042122324673238564, "loss": 0.1997, "step": 188280 }, { "epoch": 7.8, "grad_norm": 2.078125, "learning_rate": 0.00042121534428873376, "loss": 0.1956, "step": 188290 }, { "epoch": 7.8, "grad_norm": 0.49609375, "learning_rate": 0.00042120744152287316, "loss": 0.2572, "step": 188300 }, { "epoch": 7.8, "grad_norm": 1.2421875, "learning_rate": 0.0004211995384348185, "loss": 0.1838, "step": 188310 }, { "epoch": 7.8, "grad_norm": 0.67578125, "learning_rate": 0.0004211916350245849, "loss": 0.1783, "step": 188320 }, { "epoch": 7.8, "grad_norm": 0.546875, "learning_rate": 0.00042118373129218707, "loss": 0.2292, "step": 188330 }, { "epoch": 7.8, "grad_norm": 1.109375, "learning_rate": 0.00042117582723764, "loss": 0.2465, "step": 188340 }, { "epoch": 7.8, "grad_norm": 0.6875, "learning_rate": 0.0004211679228609585, "loss": 0.2447, "step": 188350 }, { "epoch": 7.8, "grad_norm": 0.416015625, "learning_rate": 0.00042116001816215745, "loss": 0.1692, "step": 188360 }, { "epoch": 7.8, "grad_norm": 0.486328125, "learning_rate": 0.0004211521131412517, "loss": 0.1967, "step": 188370 }, { "epoch": 7.8, "grad_norm": 0.58984375, "learning_rate": 0.0004211442077982562, "loss": 0.206, "step": 188380 }, { "epoch": 7.8, "grad_norm": 0.30078125, "learning_rate": 0.0004211363021331857, "loss": 0.179, "step": 188390 }, { "epoch": 7.8, "grad_norm": 1.046875, "learning_rate": 0.00042112839614605525, "loss": 0.2135, "step": 188400 }, { "epoch": 7.8, "grad_norm": 0.58984375, "learning_rate": 0.0004211204898368796, "loss": 0.178, "step": 188410 }, { "epoch": 7.8, "grad_norm": 0.84765625, "learning_rate": 0.0004211125832056737, "loss": 0.1911, "step": 188420 }, { "epoch": 7.8, "grad_norm": 0.421875, "learning_rate": 0.0004211046762524523, "loss": 0.2311, "step": 188430 }, { "epoch": 7.81, "grad_norm": 0.5234375, "learning_rate": 0.0004210967689772305, "loss": 0.2062, "step": 188440 }, { "epoch": 7.81, "grad_norm": 1.2421875, "learning_rate": 0.000421088861380023, "loss": 0.2439, "step": 188450 }, { "epoch": 7.81, "grad_norm": 1.0390625, "learning_rate": 0.00042108095346084473, "loss": 0.2339, "step": 188460 }, { "epoch": 7.81, "grad_norm": 1.265625, "learning_rate": 0.00042107304521971057, "loss": 0.1713, "step": 188470 }, { "epoch": 7.81, "grad_norm": 0.427734375, "learning_rate": 0.0004210651366566355, "loss": 0.2135, "step": 188480 }, { "epoch": 7.81, "grad_norm": 0.345703125, "learning_rate": 0.00042105722777163425, "loss": 0.2365, "step": 188490 }, { "epoch": 7.81, "grad_norm": 0.79296875, "learning_rate": 0.00042104931856472175, "loss": 0.2106, "step": 188500 }, { "epoch": 7.81, "grad_norm": 0.625, "learning_rate": 0.0004210414090359129, "loss": 0.2376, "step": 188510 }, { "epoch": 7.81, "grad_norm": 0.64453125, "learning_rate": 0.0004210334991852226, "loss": 0.1861, "step": 188520 }, { "epoch": 7.81, "grad_norm": 0.333984375, "learning_rate": 0.0004210255890126658, "loss": 0.2469, "step": 188530 }, { "epoch": 7.81, "grad_norm": 0.478515625, "learning_rate": 0.00042101767851825724, "loss": 0.2053, "step": 188540 }, { "epoch": 7.81, "grad_norm": 0.60546875, "learning_rate": 0.0004210097677020119, "loss": 0.1647, "step": 188550 }, { "epoch": 7.81, "grad_norm": 1.4921875, "learning_rate": 0.0004210018565639446, "loss": 0.2033, "step": 188560 }, { "epoch": 7.81, "grad_norm": 0.5703125, "learning_rate": 0.0004209939451040703, "loss": 0.1644, "step": 188570 }, { "epoch": 7.81, "grad_norm": 0.875, "learning_rate": 0.0004209860333224039, "loss": 0.2093, "step": 188580 }, { "epoch": 7.81, "grad_norm": 0.6171875, "learning_rate": 0.0004209781212189602, "loss": 0.1807, "step": 188590 }, { "epoch": 7.81, "grad_norm": 2.265625, "learning_rate": 0.00042097020879375415, "loss": 0.1897, "step": 188600 }, { "epoch": 7.81, "grad_norm": 0.8984375, "learning_rate": 0.0004209622960468007, "loss": 0.213, "step": 188610 }, { "epoch": 7.81, "grad_norm": 0.546875, "learning_rate": 0.0004209543829781145, "loss": 0.1753, "step": 188620 }, { "epoch": 7.81, "grad_norm": 1.3125, "learning_rate": 0.0004209464695877108, "loss": 0.2342, "step": 188630 }, { "epoch": 7.81, "grad_norm": 0.5546875, "learning_rate": 0.0004209385558756042, "loss": 0.1997, "step": 188640 }, { "epoch": 7.81, "grad_norm": 0.59765625, "learning_rate": 0.00042093064184180976, "loss": 0.2051, "step": 188650 }, { "epoch": 7.81, "grad_norm": 0.431640625, "learning_rate": 0.0004209227274863422, "loss": 0.2351, "step": 188660 }, { "epoch": 7.81, "grad_norm": 0.57421875, "learning_rate": 0.0004209148128092166, "loss": 0.2199, "step": 188670 }, { "epoch": 7.82, "grad_norm": 0.56640625, "learning_rate": 0.00042090689781044776, "loss": 0.209, "step": 188680 }, { "epoch": 7.82, "grad_norm": 0.89453125, "learning_rate": 0.0004208989824900507, "loss": 0.199, "step": 188690 }, { "epoch": 7.82, "grad_norm": 1.53125, "learning_rate": 0.00042089106684804013, "loss": 0.224, "step": 188700 }, { "epoch": 7.82, "grad_norm": 0.6640625, "learning_rate": 0.00042088315088443097, "loss": 0.2296, "step": 188710 }, { "epoch": 7.82, "grad_norm": 0.46875, "learning_rate": 0.0004208752345992383, "loss": 0.2078, "step": 188720 }, { "epoch": 7.82, "grad_norm": 0.640625, "learning_rate": 0.0004208673179924768, "loss": 0.1563, "step": 188730 }, { "epoch": 7.82, "grad_norm": 0.984375, "learning_rate": 0.0004208594010641614, "loss": 0.111, "step": 188740 }, { "epoch": 7.82, "grad_norm": 0.64453125, "learning_rate": 0.00042085148381430717, "loss": 0.1951, "step": 188750 }, { "epoch": 7.82, "grad_norm": 1.0234375, "learning_rate": 0.00042084356624292886, "loss": 0.2462, "step": 188760 }, { "epoch": 7.82, "grad_norm": 0.7734375, "learning_rate": 0.00042083564835004145, "loss": 0.2252, "step": 188770 }, { "epoch": 7.82, "grad_norm": 0.890625, "learning_rate": 0.00042082773013565975, "loss": 0.1935, "step": 188780 }, { "epoch": 7.82, "grad_norm": 0.859375, "learning_rate": 0.0004208198115997987, "loss": 0.2368, "step": 188790 }, { "epoch": 7.82, "grad_norm": 0.53515625, "learning_rate": 0.0004208118927424732, "loss": 0.1958, "step": 188800 }, { "epoch": 7.82, "grad_norm": 0.5859375, "learning_rate": 0.0004208039735636983, "loss": 0.2221, "step": 188810 }, { "epoch": 7.82, "grad_norm": 0.36328125, "learning_rate": 0.0004207960540634886, "loss": 0.2073, "step": 188820 }, { "epoch": 7.82, "grad_norm": 1.0546875, "learning_rate": 0.0004207881342418592, "loss": 0.2737, "step": 188830 }, { "epoch": 7.82, "grad_norm": 0.52734375, "learning_rate": 0.00042078021409882503, "loss": 0.2446, "step": 188840 }, { "epoch": 7.82, "grad_norm": 1.3515625, "learning_rate": 0.00042077229363440095, "loss": 0.2136, "step": 188850 }, { "epoch": 7.82, "grad_norm": 1.2734375, "learning_rate": 0.00042076437284860183, "loss": 0.1542, "step": 188860 }, { "epoch": 7.82, "grad_norm": 0.56640625, "learning_rate": 0.00042075645174144253, "loss": 0.1732, "step": 188870 }, { "epoch": 7.82, "grad_norm": 0.66796875, "learning_rate": 0.0004207485303129381, "loss": 0.1871, "step": 188880 }, { "epoch": 7.82, "grad_norm": 0.703125, "learning_rate": 0.00042074060856310335, "loss": 0.2786, "step": 188890 }, { "epoch": 7.82, "grad_norm": 0.58984375, "learning_rate": 0.0004207326864919533, "loss": 0.1812, "step": 188900 }, { "epoch": 7.82, "grad_norm": 0.390625, "learning_rate": 0.00042072476409950267, "loss": 0.2171, "step": 188910 }, { "epoch": 7.83, "grad_norm": 1.03125, "learning_rate": 0.00042071684138576647, "loss": 0.1989, "step": 188920 }, { "epoch": 7.83, "grad_norm": 0.447265625, "learning_rate": 0.00042070891835075964, "loss": 0.2317, "step": 188930 }, { "epoch": 7.83, "grad_norm": 1.234375, "learning_rate": 0.0004207009949944971, "loss": 0.2105, "step": 188940 }, { "epoch": 7.83, "grad_norm": 0.734375, "learning_rate": 0.00042069307131699363, "loss": 0.1815, "step": 188950 }, { "epoch": 7.83, "grad_norm": 0.86328125, "learning_rate": 0.0004206851473182644, "loss": 0.1834, "step": 188960 }, { "epoch": 7.83, "grad_norm": 1.65625, "learning_rate": 0.000420677222998324, "loss": 0.1796, "step": 188970 }, { "epoch": 7.83, "grad_norm": 0.56640625, "learning_rate": 0.00042066929835718746, "loss": 0.2258, "step": 188980 }, { "epoch": 7.83, "grad_norm": 0.439453125, "learning_rate": 0.0004206613733948699, "loss": 0.2429, "step": 188990 }, { "epoch": 7.83, "grad_norm": 0.9140625, "learning_rate": 0.00042065344811138594, "loss": 0.1943, "step": 189000 }, { "epoch": 7.83, "grad_norm": 0.828125, "learning_rate": 0.0004206455225067506, "loss": 0.2094, "step": 189010 }, { "epoch": 7.83, "grad_norm": 2.734375, "learning_rate": 0.00042063759658097886, "loss": 0.2239, "step": 189020 }, { "epoch": 7.83, "grad_norm": 0.5234375, "learning_rate": 0.00042062967033408566, "loss": 0.2098, "step": 189030 }, { "epoch": 7.83, "grad_norm": 0.91796875, "learning_rate": 0.00042062174376608576, "loss": 0.2177, "step": 189040 }, { "epoch": 7.83, "grad_norm": 2.140625, "learning_rate": 0.0004206138168769942, "loss": 0.2475, "step": 189050 }, { "epoch": 7.83, "grad_norm": 1.53125, "learning_rate": 0.0004206058896668259, "loss": 0.211, "step": 189060 }, { "epoch": 7.83, "grad_norm": 0.431640625, "learning_rate": 0.0004205979621355956, "loss": 0.1813, "step": 189070 }, { "epoch": 7.83, "grad_norm": 0.49609375, "learning_rate": 0.00042059003428331857, "loss": 0.2064, "step": 189080 }, { "epoch": 7.83, "grad_norm": 0.59765625, "learning_rate": 0.00042058210611000936, "loss": 0.2343, "step": 189090 }, { "epoch": 7.83, "grad_norm": 0.875, "learning_rate": 0.00042057417761568307, "loss": 0.2204, "step": 189100 }, { "epoch": 7.83, "grad_norm": 1.1953125, "learning_rate": 0.00042056624880035465, "loss": 0.2256, "step": 189110 }, { "epoch": 7.83, "grad_norm": 0.9296875, "learning_rate": 0.0004205583196640389, "loss": 0.162, "step": 189120 }, { "epoch": 7.83, "grad_norm": 0.51171875, "learning_rate": 0.00042055039020675087, "loss": 0.1318, "step": 189130 }, { "epoch": 7.83, "grad_norm": 0.88671875, "learning_rate": 0.0004205424604285054, "loss": 0.1736, "step": 189140 }, { "epoch": 7.83, "grad_norm": 0.59765625, "learning_rate": 0.0004205345303293174, "loss": 0.1828, "step": 189150 }, { "epoch": 7.83, "grad_norm": 0.828125, "learning_rate": 0.00042052659990920186, "loss": 0.1647, "step": 189160 }, { "epoch": 7.84, "grad_norm": 1.2578125, "learning_rate": 0.00042051866916817366, "loss": 0.2172, "step": 189170 }, { "epoch": 7.84, "grad_norm": 1.15625, "learning_rate": 0.0004205107381062478, "loss": 0.1978, "step": 189180 }, { "epoch": 7.84, "grad_norm": 1.203125, "learning_rate": 0.0004205028067234391, "loss": 0.1996, "step": 189190 }, { "epoch": 7.84, "grad_norm": 0.515625, "learning_rate": 0.00042049487501976253, "loss": 0.2285, "step": 189200 }, { "epoch": 7.84, "grad_norm": 0.388671875, "learning_rate": 0.00042048694299523303, "loss": 0.232, "step": 189210 }, { "epoch": 7.84, "grad_norm": 0.458984375, "learning_rate": 0.0004204790106498655, "loss": 0.2189, "step": 189220 }, { "epoch": 7.84, "grad_norm": 1.2734375, "learning_rate": 0.00042047107798367486, "loss": 0.2073, "step": 189230 }, { "epoch": 7.84, "grad_norm": 0.875, "learning_rate": 0.0004204631449966761, "loss": 0.2695, "step": 189240 }, { "epoch": 7.84, "grad_norm": 0.53515625, "learning_rate": 0.0004204552116888841, "loss": 0.177, "step": 189250 }, { "epoch": 7.84, "grad_norm": 0.53125, "learning_rate": 0.0004204472780603138, "loss": 0.2052, "step": 189260 }, { "epoch": 7.84, "grad_norm": 0.625, "learning_rate": 0.0004204393441109801, "loss": 0.217, "step": 189270 }, { "epoch": 7.84, "grad_norm": 0.62890625, "learning_rate": 0.000420431409840898, "loss": 0.1708, "step": 189280 }, { "epoch": 7.84, "grad_norm": 0.53515625, "learning_rate": 0.0004204234752500824, "loss": 0.1953, "step": 189290 }, { "epoch": 7.84, "grad_norm": 0.87890625, "learning_rate": 0.0004204155403385481, "loss": 0.1923, "step": 189300 }, { "epoch": 7.84, "grad_norm": 0.0, "learning_rate": 0.0004204076051063103, "loss": 0.1422, "step": 189310 }, { "epoch": 7.84, "grad_norm": 0.609375, "learning_rate": 0.0004203996695533838, "loss": 0.2476, "step": 189320 }, { "epoch": 7.84, "grad_norm": 0.421875, "learning_rate": 0.0004203917336797834, "loss": 0.2458, "step": 189330 }, { "epoch": 7.84, "grad_norm": 0.796875, "learning_rate": 0.00042038379748552426, "loss": 0.197, "step": 189340 }, { "epoch": 7.84, "grad_norm": 0.80859375, "learning_rate": 0.0004203758609706212, "loss": 0.203, "step": 189350 }, { "epoch": 7.84, "grad_norm": 0.40234375, "learning_rate": 0.0004203679241350892, "loss": 0.1832, "step": 189360 }, { "epoch": 7.84, "grad_norm": 0.6953125, "learning_rate": 0.00042035998697894305, "loss": 0.1887, "step": 189370 }, { "epoch": 7.84, "grad_norm": 0.953125, "learning_rate": 0.00042035204950219784, "loss": 0.2224, "step": 189380 }, { "epoch": 7.84, "grad_norm": 0.5703125, "learning_rate": 0.0004203441117048685, "loss": 0.2162, "step": 189390 }, { "epoch": 7.84, "grad_norm": 0.640625, "learning_rate": 0.00042033617358696993, "loss": 0.2049, "step": 189400 }, { "epoch": 7.85, "grad_norm": 2.203125, "learning_rate": 0.00042032823514851717, "loss": 0.1924, "step": 189410 }, { "epoch": 7.85, "grad_norm": 0.97265625, "learning_rate": 0.00042032029638952497, "loss": 0.1861, "step": 189420 }, { "epoch": 7.85, "grad_norm": 0.388671875, "learning_rate": 0.0004203123573100083, "loss": 0.1613, "step": 189430 }, { "epoch": 7.85, "grad_norm": 1.15625, "learning_rate": 0.00042030441790998226, "loss": 0.2218, "step": 189440 }, { "epoch": 7.85, "grad_norm": 1.0390625, "learning_rate": 0.00042029647818946173, "loss": 0.1712, "step": 189450 }, { "epoch": 7.85, "grad_norm": 0.412109375, "learning_rate": 0.00042028853814846145, "loss": 0.2446, "step": 189460 }, { "epoch": 7.85, "grad_norm": 0.54296875, "learning_rate": 0.0004202805977869967, "loss": 0.2158, "step": 189470 }, { "epoch": 7.85, "grad_norm": 0.31640625, "learning_rate": 0.0004202726571050822, "loss": 0.1896, "step": 189480 }, { "epoch": 7.85, "grad_norm": 1.1171875, "learning_rate": 0.00042026471610273294, "loss": 0.2173, "step": 189490 }, { "epoch": 7.85, "grad_norm": 0.98046875, "learning_rate": 0.0004202567747799638, "loss": 0.2102, "step": 189500 }, { "epoch": 7.85, "grad_norm": 0.58984375, "learning_rate": 0.00042024883313678994, "loss": 0.1803, "step": 189510 }, { "epoch": 7.85, "grad_norm": 0.54296875, "learning_rate": 0.00042024089117322604, "loss": 0.2023, "step": 189520 }, { "epoch": 7.85, "grad_norm": 1.03125, "learning_rate": 0.0004202329488892873, "loss": 0.21, "step": 189530 }, { "epoch": 7.85, "grad_norm": 0.8046875, "learning_rate": 0.0004202250062849884, "loss": 0.2549, "step": 189540 }, { "epoch": 7.85, "grad_norm": 0.859375, "learning_rate": 0.0004202170633603445, "loss": 0.2001, "step": 189550 }, { "epoch": 7.85, "grad_norm": 0.56640625, "learning_rate": 0.0004202091201153704, "loss": 0.1922, "step": 189560 }, { "epoch": 7.85, "grad_norm": 0.54296875, "learning_rate": 0.0004202011765500812, "loss": 0.1852, "step": 189570 }, { "epoch": 7.85, "grad_norm": 0.46875, "learning_rate": 0.0004201932326644917, "loss": 0.2038, "step": 189580 }, { "epoch": 7.85, "grad_norm": 0.70703125, "learning_rate": 0.0004201852884586169, "loss": 0.22, "step": 189590 }, { "epoch": 7.85, "grad_norm": 1.1953125, "learning_rate": 0.00042017734393247184, "loss": 0.1816, "step": 189600 }, { "epoch": 7.85, "grad_norm": 1.4609375, "learning_rate": 0.0004201693990860714, "loss": 0.2005, "step": 189610 }, { "epoch": 7.85, "grad_norm": 0.478515625, "learning_rate": 0.0004201614539194304, "loss": 0.1966, "step": 189620 }, { "epoch": 7.85, "grad_norm": 0.56640625, "learning_rate": 0.00042015350843256405, "loss": 0.2322, "step": 189630 }, { "epoch": 7.85, "grad_norm": 0.296875, "learning_rate": 0.00042014556262548713, "loss": 0.1739, "step": 189640 }, { "epoch": 7.86, "grad_norm": 1.09375, "learning_rate": 0.0004201376164982146, "loss": 0.2136, "step": 189650 }, { "epoch": 7.86, "grad_norm": 0.96875, "learning_rate": 0.0004201296700507615, "loss": 0.1891, "step": 189660 }, { "epoch": 7.86, "grad_norm": 1.0078125, "learning_rate": 0.00042012172328314277, "loss": 0.2396, "step": 189670 }, { "epoch": 7.86, "grad_norm": 1.234375, "learning_rate": 0.0004201137761953733, "loss": 0.217, "step": 189680 }, { "epoch": 7.86, "grad_norm": 1.4296875, "learning_rate": 0.00042010582878746806, "loss": 0.1921, "step": 189690 }, { "epoch": 7.86, "grad_norm": 0.498046875, "learning_rate": 0.0004200978810594419, "loss": 0.1925, "step": 189700 }, { "epoch": 7.86, "grad_norm": 0.96484375, "learning_rate": 0.00042008993301131004, "loss": 0.2041, "step": 189710 }, { "epoch": 7.86, "grad_norm": 0.89453125, "learning_rate": 0.00042008198464308727, "loss": 0.1913, "step": 189720 }, { "epoch": 7.86, "grad_norm": 0.45703125, "learning_rate": 0.00042007403595478856, "loss": 0.2038, "step": 189730 }, { "epoch": 7.86, "grad_norm": 0.76171875, "learning_rate": 0.00042006608694642887, "loss": 0.1833, "step": 189740 }, { "epoch": 7.86, "grad_norm": 1.2890625, "learning_rate": 0.0004200581376180232, "loss": 0.1866, "step": 189750 }, { "epoch": 7.86, "grad_norm": 1.703125, "learning_rate": 0.0004200501879695865, "loss": 0.256, "step": 189760 }, { "epoch": 7.86, "grad_norm": 0.7578125, "learning_rate": 0.00042004223800113364, "loss": 0.1806, "step": 189770 }, { "epoch": 7.86, "grad_norm": 0.50390625, "learning_rate": 0.0004200342877126797, "loss": 0.1717, "step": 189780 }, { "epoch": 7.86, "grad_norm": 1.0234375, "learning_rate": 0.00042002633710423954, "loss": 0.205, "step": 189790 }, { "epoch": 7.86, "grad_norm": 0.169921875, "learning_rate": 0.0004200183861758282, "loss": 0.2258, "step": 189800 }, { "epoch": 7.86, "grad_norm": 0.486328125, "learning_rate": 0.0004200104349274607, "loss": 0.1883, "step": 189810 }, { "epoch": 7.86, "grad_norm": 0.8359375, "learning_rate": 0.0004200024833591518, "loss": 0.1852, "step": 189820 }, { "epoch": 7.86, "grad_norm": 0.8515625, "learning_rate": 0.0004199945314709167, "loss": 0.1916, "step": 189830 }, { "epoch": 7.86, "grad_norm": 0.78125, "learning_rate": 0.0004199865792627702, "loss": 0.2139, "step": 189840 }, { "epoch": 7.86, "grad_norm": 1.46875, "learning_rate": 0.0004199786267347272, "loss": 0.2176, "step": 189850 }, { "epoch": 7.86, "grad_norm": 0.796875, "learning_rate": 0.00041997067388680295, "loss": 0.2332, "step": 189860 }, { "epoch": 7.86, "grad_norm": 1.09375, "learning_rate": 0.0004199627207190122, "loss": 0.1863, "step": 189870 }, { "epoch": 7.86, "grad_norm": 1.140625, "learning_rate": 0.00041995476723136996, "loss": 0.2091, "step": 189880 }, { "epoch": 7.87, "grad_norm": 0.2041015625, "learning_rate": 0.0004199468134238911, "loss": 0.1961, "step": 189890 }, { "epoch": 7.87, "grad_norm": 0.9453125, "learning_rate": 0.0004199388592965908, "loss": 0.1859, "step": 189900 }, { "epoch": 7.87, "grad_norm": 0.7265625, "learning_rate": 0.00041993090484948386, "loss": 0.1487, "step": 189910 }, { "epoch": 7.87, "grad_norm": 0.5078125, "learning_rate": 0.0004199229500825854, "loss": 0.1965, "step": 189920 }, { "epoch": 7.87, "grad_norm": 0.392578125, "learning_rate": 0.0004199149949959102, "loss": 0.189, "step": 189930 }, { "epoch": 7.87, "grad_norm": 0.4765625, "learning_rate": 0.0004199070395894734, "loss": 0.2095, "step": 189940 }, { "epoch": 7.87, "grad_norm": 0.44140625, "learning_rate": 0.00041989908386328985, "loss": 0.2088, "step": 189950 }, { "epoch": 7.87, "grad_norm": 0.546875, "learning_rate": 0.0004198911278173746, "loss": 0.1887, "step": 189960 }, { "epoch": 7.87, "grad_norm": 0.75390625, "learning_rate": 0.0004198831714517426, "loss": 0.1844, "step": 189970 }, { "epoch": 7.87, "grad_norm": 1.3125, "learning_rate": 0.00041987521476640876, "loss": 0.2246, "step": 189980 }, { "epoch": 7.87, "grad_norm": 0.67578125, "learning_rate": 0.0004198672577613881, "loss": 0.2075, "step": 189990 }, { "epoch": 7.87, "grad_norm": 0.369140625, "learning_rate": 0.0004198593004366957, "loss": 0.1689, "step": 190000 }, { "epoch": 7.87, "grad_norm": 0.70703125, "learning_rate": 0.0004198513427923464, "loss": 0.217, "step": 190010 }, { "epoch": 7.87, "grad_norm": 0.1845703125, "learning_rate": 0.0004198433848283552, "loss": 0.1892, "step": 190020 }, { "epoch": 7.87, "grad_norm": 0.921875, "learning_rate": 0.00041983542654473716, "loss": 0.1796, "step": 190030 }, { "epoch": 7.87, "grad_norm": 0.478515625, "learning_rate": 0.00041982746794150705, "loss": 0.1759, "step": 190040 }, { "epoch": 7.87, "grad_norm": 0.6640625, "learning_rate": 0.0004198195090186801, "loss": 0.1934, "step": 190050 }, { "epoch": 7.87, "grad_norm": 0.6015625, "learning_rate": 0.00041981154977627114, "loss": 0.1751, "step": 190060 }, { "epoch": 7.87, "grad_norm": 0.90234375, "learning_rate": 0.00041980359021429514, "loss": 0.2213, "step": 190070 }, { "epoch": 7.87, "grad_norm": 0.9921875, "learning_rate": 0.0004197956303327672, "loss": 0.2154, "step": 190080 }, { "epoch": 7.87, "grad_norm": 0.97265625, "learning_rate": 0.00041978767013170216, "loss": 0.215, "step": 190090 }, { "epoch": 7.87, "grad_norm": 0.2275390625, "learning_rate": 0.0004197797096111151, "loss": 0.2241, "step": 190100 }, { "epoch": 7.87, "grad_norm": 0.458984375, "learning_rate": 0.000419771748771021, "loss": 0.2162, "step": 190110 }, { "epoch": 7.87, "grad_norm": 0.87109375, "learning_rate": 0.0004197637876114347, "loss": 0.2679, "step": 190120 }, { "epoch": 7.88, "grad_norm": 2.40625, "learning_rate": 0.0004197558261323713, "loss": 0.1835, "step": 190130 }, { "epoch": 7.88, "grad_norm": 1.1328125, "learning_rate": 0.0004197478643338458, "loss": 0.203, "step": 190140 }, { "epoch": 7.88, "grad_norm": 0.68359375, "learning_rate": 0.0004197399022158731, "loss": 0.1648, "step": 190150 }, { "epoch": 7.88, "grad_norm": 0.953125, "learning_rate": 0.0004197319397784683, "loss": 0.1911, "step": 190160 }, { "epoch": 7.88, "grad_norm": 0.6015625, "learning_rate": 0.0004197239770216463, "loss": 0.2303, "step": 190170 }, { "epoch": 7.88, "grad_norm": 1.28125, "learning_rate": 0.0004197160139454221, "loss": 0.1831, "step": 190180 }, { "epoch": 7.88, "grad_norm": 0.296875, "learning_rate": 0.00041970805054981073, "loss": 0.195, "step": 190190 }, { "epoch": 7.88, "grad_norm": 0.76171875, "learning_rate": 0.0004197000868348271, "loss": 0.2068, "step": 190200 }, { "epoch": 7.88, "grad_norm": 0.212890625, "learning_rate": 0.00041969212280048624, "loss": 0.1908, "step": 190210 }, { "epoch": 7.88, "grad_norm": 0.6015625, "learning_rate": 0.00041968415844680307, "loss": 0.1983, "step": 190220 }, { "epoch": 7.88, "grad_norm": 1.390625, "learning_rate": 0.00041967619377379276, "loss": 0.2087, "step": 190230 }, { "epoch": 7.88, "grad_norm": 0.6328125, "learning_rate": 0.00041966822878147005, "loss": 0.2413, "step": 190240 }, { "epoch": 7.88, "grad_norm": 0.85546875, "learning_rate": 0.0004196602634698501, "loss": 0.1855, "step": 190250 }, { "epoch": 7.88, "grad_norm": 0.796875, "learning_rate": 0.00041965229783894785, "loss": 0.1773, "step": 190260 }, { "epoch": 7.88, "grad_norm": 0.51953125, "learning_rate": 0.0004196443318887784, "loss": 0.2323, "step": 190270 }, { "epoch": 7.88, "grad_norm": 0.578125, "learning_rate": 0.00041963636561935655, "loss": 0.1742, "step": 190280 }, { "epoch": 7.88, "grad_norm": 0.3671875, "learning_rate": 0.0004196283990306974, "loss": 0.2052, "step": 190290 }, { "epoch": 7.88, "grad_norm": 0.80078125, "learning_rate": 0.0004196204321228159, "loss": 0.1884, "step": 190300 }, { "epoch": 7.88, "grad_norm": 1.34375, "learning_rate": 0.00041961246489572704, "loss": 0.1787, "step": 190310 }, { "epoch": 7.88, "grad_norm": 0.90625, "learning_rate": 0.0004196044973494458, "loss": 0.1576, "step": 190320 }, { "epoch": 7.88, "grad_norm": 0.5, "learning_rate": 0.0004195965294839873, "loss": 0.2089, "step": 190330 }, { "epoch": 7.88, "grad_norm": 0.7265625, "learning_rate": 0.0004195885612993664, "loss": 0.2101, "step": 190340 }, { "epoch": 7.88, "grad_norm": 0.369140625, "learning_rate": 0.00041958059279559816, "loss": 0.2228, "step": 190350 }, { "epoch": 7.88, "grad_norm": 0.81640625, "learning_rate": 0.00041957262397269757, "loss": 0.269, "step": 190360 }, { "epoch": 7.89, "grad_norm": 0.58984375, "learning_rate": 0.0004195646548306796, "loss": 0.2131, "step": 190370 }, { "epoch": 7.89, "grad_norm": 0.828125, "learning_rate": 0.00041955668536955925, "loss": 0.1832, "step": 190380 }, { "epoch": 7.89, "grad_norm": 0.859375, "learning_rate": 0.0004195487155893516, "loss": 0.2274, "step": 190390 }, { "epoch": 7.89, "grad_norm": 0.404296875, "learning_rate": 0.00041954074549007146, "loss": 0.2074, "step": 190400 }, { "epoch": 7.89, "grad_norm": 0.65234375, "learning_rate": 0.00041953277507173403, "loss": 0.1988, "step": 190410 }, { "epoch": 7.89, "grad_norm": 0.51953125, "learning_rate": 0.0004195248043343542, "loss": 0.1591, "step": 190420 }, { "epoch": 7.89, "grad_norm": 0.421875, "learning_rate": 0.000419516833277947, "loss": 0.164, "step": 190430 }, { "epoch": 7.89, "grad_norm": 0.41796875, "learning_rate": 0.00041950886190252745, "loss": 0.2142, "step": 190440 }, { "epoch": 7.89, "grad_norm": 0.490234375, "learning_rate": 0.0004195008902081106, "loss": 0.1316, "step": 190450 }, { "epoch": 7.89, "grad_norm": 2.703125, "learning_rate": 0.0004194929181947111, "loss": 0.1979, "step": 190460 }, { "epoch": 7.89, "grad_norm": 0.98046875, "learning_rate": 0.00041948494586234447, "loss": 0.2864, "step": 190470 }, { "epoch": 7.89, "grad_norm": 0.52734375, "learning_rate": 0.0004194769732110254, "loss": 0.1579, "step": 190480 }, { "epoch": 7.89, "grad_norm": 1.0546875, "learning_rate": 0.00041946900024076905, "loss": 0.2155, "step": 190490 }, { "epoch": 7.89, "grad_norm": 0.30859375, "learning_rate": 0.00041946102695159025, "loss": 0.2073, "step": 190500 }, { "epoch": 7.89, "grad_norm": 0.546875, "learning_rate": 0.0004194530533435041, "loss": 0.2001, "step": 190510 }, { "epoch": 7.89, "grad_norm": 0.703125, "learning_rate": 0.00041944507941652566, "loss": 0.1827, "step": 190520 }, { "epoch": 7.89, "grad_norm": 0.8828125, "learning_rate": 0.0004194371051706698, "loss": 0.1822, "step": 190530 }, { "epoch": 7.89, "grad_norm": 0.859375, "learning_rate": 0.00041942913060595164, "loss": 0.2223, "step": 190540 }, { "epoch": 7.89, "grad_norm": 1.9296875, "learning_rate": 0.0004194211557223862, "loss": 0.1737, "step": 190550 }, { "epoch": 7.89, "grad_norm": 0.78125, "learning_rate": 0.00041941318051998843, "loss": 0.2702, "step": 190560 }, { "epoch": 7.89, "grad_norm": 0.294921875, "learning_rate": 0.0004194052049987733, "loss": 0.1848, "step": 190570 }, { "epoch": 7.89, "grad_norm": 0.52734375, "learning_rate": 0.0004193972291587559, "loss": 0.2486, "step": 190580 }, { "epoch": 7.89, "grad_norm": 0.81640625, "learning_rate": 0.0004193892529999512, "loss": 0.1927, "step": 190590 }, { "epoch": 7.89, "grad_norm": 0.8125, "learning_rate": 0.00041938127652237425, "loss": 0.2196, "step": 190600 }, { "epoch": 7.9, "grad_norm": 0.58984375, "learning_rate": 0.00041937329972604, "loss": 0.2479, "step": 190610 }, { "epoch": 7.9, "grad_norm": 1.078125, "learning_rate": 0.0004193653226109635, "loss": 0.1918, "step": 190620 }, { "epoch": 7.9, "grad_norm": 0.6640625, "learning_rate": 0.0004193573451771597, "loss": 0.2382, "step": 190630 }, { "epoch": 7.9, "grad_norm": 0.85546875, "learning_rate": 0.0004193493674246437, "loss": 0.2106, "step": 190640 }, { "epoch": 7.9, "grad_norm": 0.62890625, "learning_rate": 0.0004193413893534305, "loss": 0.2269, "step": 190650 }, { "epoch": 7.9, "grad_norm": 0.298828125, "learning_rate": 0.0004193334109635351, "loss": 0.2106, "step": 190660 }, { "epoch": 7.9, "grad_norm": 0.419921875, "learning_rate": 0.00041932543225497244, "loss": 0.1911, "step": 190670 }, { "epoch": 7.9, "grad_norm": 0.83203125, "learning_rate": 0.00041931745322775773, "loss": 0.1814, "step": 190680 }, { "epoch": 7.9, "grad_norm": 0.63671875, "learning_rate": 0.0004193094738819057, "loss": 0.1826, "step": 190690 }, { "epoch": 7.9, "grad_norm": 0.63671875, "learning_rate": 0.00041930149421743164, "loss": 0.2107, "step": 190700 }, { "epoch": 7.9, "grad_norm": 0.72265625, "learning_rate": 0.0004192935142343504, "loss": 0.1961, "step": 190710 }, { "epoch": 7.9, "grad_norm": 1.0, "learning_rate": 0.000419285533932677, "loss": 0.2262, "step": 190720 }, { "epoch": 7.9, "grad_norm": 0.220703125, "learning_rate": 0.0004192775533124266, "loss": 0.2242, "step": 190730 }, { "epoch": 7.9, "grad_norm": 0.6640625, "learning_rate": 0.00041926957237361405, "loss": 0.2396, "step": 190740 }, { "epoch": 7.9, "grad_norm": 1.15625, "learning_rate": 0.0004192615911162545, "loss": 0.1813, "step": 190750 }, { "epoch": 7.9, "grad_norm": 0.34375, "learning_rate": 0.00041925360954036297, "loss": 0.1999, "step": 190760 }, { "epoch": 7.9, "grad_norm": 1.0859375, "learning_rate": 0.0004192456276459543, "loss": 0.2079, "step": 190770 }, { "epoch": 7.9, "grad_norm": 1.25, "learning_rate": 0.0004192376454330437, "loss": 0.1945, "step": 190780 }, { "epoch": 7.9, "grad_norm": 0.73046875, "learning_rate": 0.00041922966290164614, "loss": 0.2174, "step": 190790 }, { "epoch": 7.9, "grad_norm": 1.1640625, "learning_rate": 0.0004192216800517766, "loss": 0.1838, "step": 190800 }, { "epoch": 7.9, "grad_norm": 0.68359375, "learning_rate": 0.00041921369688345013, "loss": 0.201, "step": 190810 }, { "epoch": 7.9, "grad_norm": 0.8515625, "learning_rate": 0.0004192057133966818, "loss": 0.1919, "step": 190820 }, { "epoch": 7.9, "grad_norm": 0.55078125, "learning_rate": 0.0004191977295914865, "loss": 0.2296, "step": 190830 }, { "epoch": 7.9, "grad_norm": 1.1015625, "learning_rate": 0.00041918974546787947, "loss": 0.201, "step": 190840 }, { "epoch": 7.9, "grad_norm": 1.484375, "learning_rate": 0.0004191817610258755, "loss": 0.2111, "step": 190850 }, { "epoch": 7.91, "grad_norm": 1.125, "learning_rate": 0.00041917377626548983, "loss": 0.1746, "step": 190860 }, { "epoch": 7.91, "grad_norm": 0.4921875, "learning_rate": 0.00041916579118673726, "loss": 0.1831, "step": 190870 }, { "epoch": 7.91, "grad_norm": 0.263671875, "learning_rate": 0.000419157805789633, "loss": 0.1979, "step": 190880 }, { "epoch": 7.91, "grad_norm": 0.4296875, "learning_rate": 0.000419149820074192, "loss": 0.1915, "step": 190890 }, { "epoch": 7.91, "grad_norm": 0.8125, "learning_rate": 0.00041914183404042927, "loss": 0.1926, "step": 190900 }, { "epoch": 7.91, "grad_norm": 2.90625, "learning_rate": 0.00041913384768835996, "loss": 0.199, "step": 190910 }, { "epoch": 7.91, "grad_norm": 1.1171875, "learning_rate": 0.000419125861017999, "loss": 0.2421, "step": 190920 }, { "epoch": 7.91, "grad_norm": 0.65234375, "learning_rate": 0.0004191178740293614, "loss": 0.2035, "step": 190930 }, { "epoch": 7.91, "grad_norm": 0.51171875, "learning_rate": 0.00041910988672246225, "loss": 0.1902, "step": 190940 }, { "epoch": 7.91, "grad_norm": 0.51953125, "learning_rate": 0.0004191018990973166, "loss": 0.1495, "step": 190950 }, { "epoch": 7.91, "grad_norm": 0.58203125, "learning_rate": 0.0004190939111539393, "loss": 0.2041, "step": 190960 }, { "epoch": 7.91, "grad_norm": 1.125, "learning_rate": 0.0004190859228923456, "loss": 0.1581, "step": 190970 }, { "epoch": 7.91, "grad_norm": 0.921875, "learning_rate": 0.0004190779343125504, "loss": 0.2482, "step": 190980 }, { "epoch": 7.91, "grad_norm": 0.51171875, "learning_rate": 0.0004190699454145689, "loss": 0.2382, "step": 190990 }, { "epoch": 7.91, "grad_norm": 0.921875, "learning_rate": 0.00041906195619841594, "loss": 0.1871, "step": 191000 }, { "epoch": 7.91, "grad_norm": 0.435546875, "learning_rate": 0.0004190539666641067, "loss": 0.1768, "step": 191010 }, { "epoch": 7.91, "grad_norm": 0.27734375, "learning_rate": 0.0004190459768116561, "loss": 0.2078, "step": 191020 }, { "epoch": 7.91, "grad_norm": 0.41015625, "learning_rate": 0.00041903798664107915, "loss": 0.1986, "step": 191030 }, { "epoch": 7.91, "grad_norm": 0.380859375, "learning_rate": 0.0004190299961523911, "loss": 0.1867, "step": 191040 }, { "epoch": 7.91, "grad_norm": 1.0703125, "learning_rate": 0.0004190220053456068, "loss": 0.1879, "step": 191050 }, { "epoch": 7.91, "grad_norm": 0.48046875, "learning_rate": 0.00041901401422074133, "loss": 0.1675, "step": 191060 }, { "epoch": 7.91, "grad_norm": 1.328125, "learning_rate": 0.00041900602277780973, "loss": 0.1978, "step": 191070 }, { "epoch": 7.91, "grad_norm": 1.21875, "learning_rate": 0.00041899803101682703, "loss": 0.2328, "step": 191080 }, { "epoch": 7.91, "grad_norm": 0.62890625, "learning_rate": 0.0004189900389378083, "loss": 0.1927, "step": 191090 }, { "epoch": 7.92, "grad_norm": 0.470703125, "learning_rate": 0.00041898204654076867, "loss": 0.2165, "step": 191100 }, { "epoch": 7.92, "grad_norm": 0.3984375, "learning_rate": 0.0004189740538257229, "loss": 0.2376, "step": 191110 }, { "epoch": 7.92, "grad_norm": 1.15625, "learning_rate": 0.00041896606079268637, "loss": 0.164, "step": 191120 }, { "epoch": 7.92, "grad_norm": 1.6015625, "learning_rate": 0.00041895806744167386, "loss": 0.2521, "step": 191130 }, { "epoch": 7.92, "grad_norm": 0.58984375, "learning_rate": 0.0004189500737727005, "loss": 0.2214, "step": 191140 }, { "epoch": 7.92, "grad_norm": 0.64453125, "learning_rate": 0.0004189420797857814, "loss": 0.2204, "step": 191150 }, { "epoch": 7.92, "grad_norm": 0.88671875, "learning_rate": 0.00041893408548093156, "loss": 0.2433, "step": 191160 }, { "epoch": 7.92, "grad_norm": 0.8515625, "learning_rate": 0.00041892609085816596, "loss": 0.2074, "step": 191170 }, { "epoch": 7.92, "grad_norm": 0.80078125, "learning_rate": 0.00041891809591749975, "loss": 0.2262, "step": 191180 }, { "epoch": 7.92, "grad_norm": 0.53125, "learning_rate": 0.00041891010065894785, "loss": 0.2014, "step": 191190 }, { "epoch": 7.92, "grad_norm": 0.470703125, "learning_rate": 0.00041890210508252544, "loss": 0.225, "step": 191200 }, { "epoch": 7.92, "grad_norm": 0.40234375, "learning_rate": 0.00041889410918824753, "loss": 0.2026, "step": 191210 }, { "epoch": 7.92, "grad_norm": 1.265625, "learning_rate": 0.0004188861129761291, "loss": 0.1806, "step": 191220 }, { "epoch": 7.92, "grad_norm": 0.28515625, "learning_rate": 0.0004188781164461853, "loss": 0.1835, "step": 191230 }, { "epoch": 7.92, "grad_norm": 1.4609375, "learning_rate": 0.00041887011959843105, "loss": 0.2236, "step": 191240 }, { "epoch": 7.92, "grad_norm": 0.44140625, "learning_rate": 0.00041886212243288153, "loss": 0.1469, "step": 191250 }, { "epoch": 7.92, "grad_norm": 0.85546875, "learning_rate": 0.0004188541249495517, "loss": 0.185, "step": 191260 }, { "epoch": 7.92, "grad_norm": 0.828125, "learning_rate": 0.0004188461271484566, "loss": 0.1891, "step": 191270 }, { "epoch": 7.92, "grad_norm": 0.73828125, "learning_rate": 0.0004188381290296114, "loss": 0.2361, "step": 191280 }, { "epoch": 7.92, "grad_norm": 0.76171875, "learning_rate": 0.00041883013059303104, "loss": 0.181, "step": 191290 }, { "epoch": 7.92, "grad_norm": 1.109375, "learning_rate": 0.00041882213183873064, "loss": 0.1771, "step": 191300 }, { "epoch": 7.92, "grad_norm": 1.25, "learning_rate": 0.0004188141327667252, "loss": 0.2119, "step": 191310 }, { "epoch": 7.92, "grad_norm": 0.7109375, "learning_rate": 0.00041880613337702977, "loss": 0.1855, "step": 191320 }, { "epoch": 7.92, "grad_norm": 0.5234375, "learning_rate": 0.00041879813366965945, "loss": 0.1699, "step": 191330 }, { "epoch": 7.93, "grad_norm": 1.1640625, "learning_rate": 0.00041879013364462927, "loss": 0.2044, "step": 191340 }, { "epoch": 7.93, "grad_norm": 2.15625, "learning_rate": 0.00041878213330195425, "loss": 0.1976, "step": 191350 }, { "epoch": 7.93, "grad_norm": 0.765625, "learning_rate": 0.0004187741326416495, "loss": 0.2252, "step": 191360 }, { "epoch": 7.93, "grad_norm": 1.0, "learning_rate": 0.00041876613166373004, "loss": 0.2416, "step": 191370 }, { "epoch": 7.93, "grad_norm": 0.32421875, "learning_rate": 0.000418758130368211, "loss": 0.146, "step": 191380 }, { "epoch": 7.93, "grad_norm": 0.7109375, "learning_rate": 0.00041875012875510734, "loss": 0.1756, "step": 191390 }, { "epoch": 7.93, "grad_norm": 0.77734375, "learning_rate": 0.0004187421268244342, "loss": 0.2243, "step": 191400 }, { "epoch": 7.93, "grad_norm": 0.55078125, "learning_rate": 0.0004187341245762066, "loss": 0.1921, "step": 191410 }, { "epoch": 7.93, "grad_norm": 0.765625, "learning_rate": 0.0004187261220104396, "loss": 0.2433, "step": 191420 }, { "epoch": 7.93, "grad_norm": 0.5703125, "learning_rate": 0.0004187181191271482, "loss": 0.2161, "step": 191430 }, { "epoch": 7.93, "grad_norm": 0.4296875, "learning_rate": 0.00041871011592634755, "loss": 0.2104, "step": 191440 }, { "epoch": 7.93, "grad_norm": 0.828125, "learning_rate": 0.00041870211240805266, "loss": 0.2269, "step": 191450 }, { "epoch": 7.93, "grad_norm": 0.72265625, "learning_rate": 0.0004186941085722786, "loss": 0.254, "step": 191460 }, { "epoch": 7.93, "grad_norm": 0.54296875, "learning_rate": 0.00041868610441904056, "loss": 0.1876, "step": 191470 }, { "epoch": 7.93, "grad_norm": 1.5, "learning_rate": 0.0004186780999483534, "loss": 0.164, "step": 191480 }, { "epoch": 7.93, "grad_norm": 0.1962890625, "learning_rate": 0.00041867009516023223, "loss": 0.2153, "step": 191490 }, { "epoch": 7.93, "grad_norm": 1.1484375, "learning_rate": 0.00041866209005469226, "loss": 0.2193, "step": 191500 }, { "epoch": 7.93, "grad_norm": 0.7109375, "learning_rate": 0.00041865408463174835, "loss": 0.1539, "step": 191510 }, { "epoch": 7.93, "grad_norm": 0.8359375, "learning_rate": 0.0004186460788914157, "loss": 0.1747, "step": 191520 }, { "epoch": 7.93, "grad_norm": 0.359375, "learning_rate": 0.00041863807283370937, "loss": 0.1837, "step": 191530 }, { "epoch": 7.93, "grad_norm": 0.57421875, "learning_rate": 0.00041863006645864444, "loss": 0.1672, "step": 191540 }, { "epoch": 7.93, "grad_norm": 0.4609375, "learning_rate": 0.00041862205976623586, "loss": 0.2033, "step": 191550 }, { "epoch": 7.93, "grad_norm": 1.1640625, "learning_rate": 0.00041861405275649876, "loss": 0.1988, "step": 191560 }, { "epoch": 7.93, "grad_norm": 0.7578125, "learning_rate": 0.00041860604542944826, "loss": 0.208, "step": 191570 }, { "epoch": 7.94, "grad_norm": 0.54296875, "learning_rate": 0.0004185980377850994, "loss": 0.1693, "step": 191580 }, { "epoch": 7.94, "grad_norm": 0.6171875, "learning_rate": 0.00041859002982346714, "loss": 0.2366, "step": 191590 }, { "epoch": 7.94, "grad_norm": 0.66796875, "learning_rate": 0.00041858202154456683, "loss": 0.249, "step": 191600 }, { "epoch": 7.94, "grad_norm": 0.51171875, "learning_rate": 0.00041857401294841324, "loss": 0.1616, "step": 191610 }, { "epoch": 7.94, "grad_norm": 1.703125, "learning_rate": 0.0004185660040350216, "loss": 0.2067, "step": 191620 }, { "epoch": 7.94, "grad_norm": 1.109375, "learning_rate": 0.00041855799480440694, "loss": 0.1639, "step": 191630 }, { "epoch": 7.94, "grad_norm": 0.77734375, "learning_rate": 0.00041854998525658427, "loss": 0.171, "step": 191640 }, { "epoch": 7.94, "grad_norm": 0.453125, "learning_rate": 0.0004185419753915688, "loss": 0.1832, "step": 191650 }, { "epoch": 7.94, "grad_norm": 0.8828125, "learning_rate": 0.00041853396520937555, "loss": 0.1951, "step": 191660 }, { "epoch": 7.94, "grad_norm": 0.55078125, "learning_rate": 0.00041852595471001953, "loss": 0.2177, "step": 191670 }, { "epoch": 7.94, "grad_norm": 0.50390625, "learning_rate": 0.0004185179438935159, "loss": 0.2013, "step": 191680 }, { "epoch": 7.94, "grad_norm": 0.5234375, "learning_rate": 0.0004185099327598797, "loss": 0.258, "step": 191690 }, { "epoch": 7.94, "grad_norm": 0.498046875, "learning_rate": 0.00041850192130912594, "loss": 0.1821, "step": 191700 }, { "epoch": 7.94, "grad_norm": 1.328125, "learning_rate": 0.0004184939095412698, "loss": 0.2399, "step": 191710 }, { "epoch": 7.94, "grad_norm": 1.609375, "learning_rate": 0.0004184858974563264, "loss": 0.2082, "step": 191720 }, { "epoch": 7.94, "grad_norm": 0.87109375, "learning_rate": 0.0004184778850543106, "loss": 0.1597, "step": 191730 }, { "epoch": 7.94, "grad_norm": 0.5625, "learning_rate": 0.00041846987233523766, "loss": 0.1806, "step": 191740 }, { "epoch": 7.94, "grad_norm": 0.90234375, "learning_rate": 0.0004184618592991226, "loss": 0.2155, "step": 191750 }, { "epoch": 7.94, "grad_norm": 1.2578125, "learning_rate": 0.0004184538459459806, "loss": 0.218, "step": 191760 }, { "epoch": 7.94, "grad_norm": 1.453125, "learning_rate": 0.0004184458322758266, "loss": 0.1696, "step": 191770 }, { "epoch": 7.94, "grad_norm": 0.326171875, "learning_rate": 0.00041843781828867566, "loss": 0.2115, "step": 191780 }, { "epoch": 7.94, "grad_norm": 0.7578125, "learning_rate": 0.000418429803984543, "loss": 0.196, "step": 191790 }, { "epoch": 7.94, "grad_norm": 0.5390625, "learning_rate": 0.0004184217893634437, "loss": 0.193, "step": 191800 }, { "epoch": 7.94, "grad_norm": 1.5078125, "learning_rate": 0.0004184137744253927, "loss": 0.1948, "step": 191810 }, { "epoch": 7.95, "grad_norm": 0.40234375, "learning_rate": 0.00041840575917040515, "loss": 0.1837, "step": 191820 }, { "epoch": 7.95, "grad_norm": 0.96484375, "learning_rate": 0.0004183977435984962, "loss": 0.2227, "step": 191830 }, { "epoch": 7.95, "grad_norm": 0.7109375, "learning_rate": 0.0004183897277096809, "loss": 0.1926, "step": 191840 }, { "epoch": 7.95, "grad_norm": 0.416015625, "learning_rate": 0.0004183817115039742, "loss": 0.1631, "step": 191850 }, { "epoch": 7.95, "grad_norm": 0.2373046875, "learning_rate": 0.00041837369498139143, "loss": 0.216, "step": 191860 }, { "epoch": 7.95, "grad_norm": 0.296875, "learning_rate": 0.00041836567814194746, "loss": 0.1546, "step": 191870 }, { "epoch": 7.95, "grad_norm": 0.6953125, "learning_rate": 0.00041835766098565754, "loss": 0.2658, "step": 191880 }, { "epoch": 7.95, "grad_norm": 0.71484375, "learning_rate": 0.0004183496435125367, "loss": 0.2032, "step": 191890 }, { "epoch": 7.95, "grad_norm": 0.69921875, "learning_rate": 0.00041834162572259995, "loss": 0.1889, "step": 191900 }, { "epoch": 7.95, "grad_norm": 0.5390625, "learning_rate": 0.00041833360761586236, "loss": 0.2201, "step": 191910 }, { "epoch": 7.95, "grad_norm": 0.734375, "learning_rate": 0.0004183255891923392, "loss": 0.2633, "step": 191920 }, { "epoch": 7.95, "grad_norm": 0.58984375, "learning_rate": 0.0004183175704520455, "loss": 0.216, "step": 191930 }, { "epoch": 7.95, "grad_norm": 0.75, "learning_rate": 0.0004183095513949963, "loss": 0.2203, "step": 191940 }, { "epoch": 7.95, "grad_norm": 0.53125, "learning_rate": 0.0004183015320212066, "loss": 0.191, "step": 191950 }, { "epoch": 7.95, "grad_norm": 1.28125, "learning_rate": 0.0004182935123306917, "loss": 0.2076, "step": 191960 }, { "epoch": 7.95, "grad_norm": 0.5703125, "learning_rate": 0.00041828549232346646, "loss": 0.1701, "step": 191970 }, { "epoch": 7.95, "grad_norm": 0.8359375, "learning_rate": 0.0004182774719995462, "loss": 0.2028, "step": 191980 }, { "epoch": 7.95, "grad_norm": 0.62109375, "learning_rate": 0.0004182694513589459, "loss": 0.1824, "step": 191990 }, { "epoch": 7.95, "grad_norm": 0.84375, "learning_rate": 0.0004182614304016806, "loss": 0.2164, "step": 192000 }, { "epoch": 7.95, "grad_norm": 0.2275390625, "learning_rate": 0.00041825340912776557, "loss": 0.1746, "step": 192010 }, { "epoch": 7.95, "grad_norm": 1.21875, "learning_rate": 0.0004182453875372157, "loss": 0.235, "step": 192020 }, { "epoch": 7.95, "grad_norm": 0.62109375, "learning_rate": 0.00041823736563004616, "loss": 0.1766, "step": 192030 }, { "epoch": 7.95, "grad_norm": 1.359375, "learning_rate": 0.00041822934340627217, "loss": 0.2235, "step": 192040 }, { "epoch": 7.95, "grad_norm": 0.796875, "learning_rate": 0.0004182213208659086, "loss": 0.2378, "step": 192050 }, { "epoch": 7.96, "grad_norm": 0.357421875, "learning_rate": 0.0004182132980089708, "loss": 0.1959, "step": 192060 }, { "epoch": 7.96, "grad_norm": 0.4609375, "learning_rate": 0.00041820527483547366, "loss": 0.1936, "step": 192070 }, { "epoch": 7.96, "grad_norm": 1.0390625, "learning_rate": 0.0004181972513454323, "loss": 0.2154, "step": 192080 }, { "epoch": 7.96, "grad_norm": 0.73046875, "learning_rate": 0.000418189227538862, "loss": 0.1732, "step": 192090 }, { "epoch": 7.96, "grad_norm": 0.4296875, "learning_rate": 0.0004181812034157777, "loss": 0.172, "step": 192100 }, { "epoch": 7.96, "grad_norm": 0.83203125, "learning_rate": 0.00041817317897619447, "loss": 0.2558, "step": 192110 }, { "epoch": 7.96, "grad_norm": 0.6640625, "learning_rate": 0.00041816515422012757, "loss": 0.2587, "step": 192120 }, { "epoch": 7.96, "grad_norm": 0.2333984375, "learning_rate": 0.000418157129147592, "loss": 0.199, "step": 192130 }, { "epoch": 7.96, "grad_norm": 0.81640625, "learning_rate": 0.0004181491037586028, "loss": 0.1896, "step": 192140 }, { "epoch": 7.96, "grad_norm": 0.1494140625, "learning_rate": 0.0004181410780531752, "loss": 0.2129, "step": 192150 }, { "epoch": 7.96, "grad_norm": 0.6640625, "learning_rate": 0.00041813305203132424, "loss": 0.1872, "step": 192160 }, { "epoch": 7.96, "grad_norm": 0.89453125, "learning_rate": 0.000418125025693065, "loss": 0.1552, "step": 192170 }, { "epoch": 7.96, "grad_norm": 0.6875, "learning_rate": 0.00041811699903841266, "loss": 0.187, "step": 192180 }, { "epoch": 7.96, "grad_norm": 0.6640625, "learning_rate": 0.00041810897206738225, "loss": 0.2053, "step": 192190 }, { "epoch": 7.96, "grad_norm": 1.0234375, "learning_rate": 0.00041810094477998897, "loss": 0.166, "step": 192200 }, { "epoch": 7.96, "grad_norm": 1.7578125, "learning_rate": 0.00041809291717624777, "loss": 0.1862, "step": 192210 }, { "epoch": 7.96, "grad_norm": 0.68359375, "learning_rate": 0.00041808488925617395, "loss": 0.1904, "step": 192220 }, { "epoch": 7.96, "grad_norm": 0.427734375, "learning_rate": 0.00041807686101978246, "loss": 0.2326, "step": 192230 }, { "epoch": 7.96, "grad_norm": 1.2265625, "learning_rate": 0.0004180688324670885, "loss": 0.1963, "step": 192240 }, { "epoch": 7.96, "grad_norm": 0.453125, "learning_rate": 0.0004180608035981071, "loss": 0.1801, "step": 192250 }, { "epoch": 7.96, "grad_norm": 0.361328125, "learning_rate": 0.0004180527744128534, "loss": 0.1925, "step": 192260 }, { "epoch": 7.96, "grad_norm": 1.2734375, "learning_rate": 0.0004180447449113426, "loss": 0.2057, "step": 192270 }, { "epoch": 7.96, "grad_norm": 0.51953125, "learning_rate": 0.00041803671509358975, "loss": 0.186, "step": 192280 }, { "epoch": 7.96, "grad_norm": 1.09375, "learning_rate": 0.0004180286849596099, "loss": 0.1662, "step": 192290 }, { "epoch": 7.97, "grad_norm": 3.0625, "learning_rate": 0.00041802065450941825, "loss": 0.2118, "step": 192300 }, { "epoch": 7.97, "grad_norm": 0.5234375, "learning_rate": 0.0004180126237430298, "loss": 0.1827, "step": 192310 }, { "epoch": 7.97, "grad_norm": 0.9296875, "learning_rate": 0.0004180045926604598, "loss": 0.2421, "step": 192320 }, { "epoch": 7.97, "grad_norm": 0.55859375, "learning_rate": 0.00041799656126172326, "loss": 0.1759, "step": 192330 }, { "epoch": 7.97, "grad_norm": 1.1015625, "learning_rate": 0.00041798852954683536, "loss": 0.241, "step": 192340 }, { "epoch": 7.97, "grad_norm": 0.828125, "learning_rate": 0.0004179804975158112, "loss": 0.2204, "step": 192350 }, { "epoch": 7.97, "grad_norm": 0.65234375, "learning_rate": 0.00041797246516866586, "loss": 0.2007, "step": 192360 }, { "epoch": 7.97, "grad_norm": 0.466796875, "learning_rate": 0.0004179644325054145, "loss": 0.2007, "step": 192370 }, { "epoch": 7.97, "grad_norm": 0.66015625, "learning_rate": 0.00041795639952607225, "loss": 0.1831, "step": 192380 }, { "epoch": 7.97, "grad_norm": 0.46875, "learning_rate": 0.0004179483662306541, "loss": 0.2588, "step": 192390 }, { "epoch": 7.97, "grad_norm": 1.25, "learning_rate": 0.0004179403326191753, "loss": 0.2323, "step": 192400 }, { "epoch": 7.97, "grad_norm": 0.4765625, "learning_rate": 0.00041793229869165093, "loss": 0.2522, "step": 192410 }, { "epoch": 7.97, "grad_norm": 0.62109375, "learning_rate": 0.0004179242644480961, "loss": 0.1905, "step": 192420 }, { "epoch": 7.97, "grad_norm": 0.45703125, "learning_rate": 0.00041791622988852597, "loss": 0.1816, "step": 192430 }, { "epoch": 7.97, "grad_norm": 1.4921875, "learning_rate": 0.00041790819501295564, "loss": 0.1632, "step": 192440 }, { "epoch": 7.97, "grad_norm": 0.546875, "learning_rate": 0.00041790015982140017, "loss": 0.1863, "step": 192450 }, { "epoch": 7.97, "grad_norm": 0.47265625, "learning_rate": 0.0004178921243138748, "loss": 0.1674, "step": 192460 }, { "epoch": 7.97, "grad_norm": 0.7578125, "learning_rate": 0.0004178840884903945, "loss": 0.1766, "step": 192470 }, { "epoch": 7.97, "grad_norm": 0.8515625, "learning_rate": 0.0004178760523509745, "loss": 0.1838, "step": 192480 }, { "epoch": 7.97, "grad_norm": 0.6875, "learning_rate": 0.0004178680158956299, "loss": 0.1904, "step": 192490 }, { "epoch": 7.97, "grad_norm": 0.6015625, "learning_rate": 0.00041785997912437584, "loss": 0.1751, "step": 192500 }, { "epoch": 7.97, "grad_norm": 1.65625, "learning_rate": 0.0004178519420372274, "loss": 0.1918, "step": 192510 }, { "epoch": 7.97, "grad_norm": 0.7265625, "learning_rate": 0.00041784390463419973, "loss": 0.1474, "step": 192520 }, { "epoch": 7.97, "grad_norm": 0.5, "learning_rate": 0.00041783586691530807, "loss": 0.1898, "step": 192530 }, { "epoch": 7.97, "grad_norm": 0.53125, "learning_rate": 0.00041782782888056733, "loss": 0.2115, "step": 192540 }, { "epoch": 7.98, "grad_norm": 1.1484375, "learning_rate": 0.00041781979052999275, "loss": 0.1813, "step": 192550 }, { "epoch": 7.98, "grad_norm": 0.67578125, "learning_rate": 0.00041781175186359946, "loss": 0.2451, "step": 192560 }, { "epoch": 7.98, "grad_norm": 0.87109375, "learning_rate": 0.0004178037128814025, "loss": 0.2411, "step": 192570 }, { "epoch": 7.98, "grad_norm": 0.60546875, "learning_rate": 0.00041779567358341717, "loss": 0.1991, "step": 192580 }, { "epoch": 7.98, "grad_norm": 0.462890625, "learning_rate": 0.0004177876339696585, "loss": 0.2264, "step": 192590 }, { "epoch": 7.98, "grad_norm": 0.8359375, "learning_rate": 0.0004177795940401415, "loss": 0.1929, "step": 192600 }, { "epoch": 7.98, "grad_norm": 0.396484375, "learning_rate": 0.0004177715537948816, "loss": 0.1702, "step": 192610 }, { "epoch": 7.98, "grad_norm": 0.82421875, "learning_rate": 0.0004177635132338936, "loss": 0.2153, "step": 192620 }, { "epoch": 7.98, "grad_norm": 0.93359375, "learning_rate": 0.0004177554723571929, "loss": 0.1919, "step": 192630 }, { "epoch": 7.98, "grad_norm": 0.8515625, "learning_rate": 0.00041774743116479446, "loss": 0.1881, "step": 192640 }, { "epoch": 7.98, "grad_norm": 0.82421875, "learning_rate": 0.00041773938965671356, "loss": 0.1973, "step": 192650 }, { "epoch": 7.98, "grad_norm": 0.4609375, "learning_rate": 0.0004177313478329651, "loss": 0.1812, "step": 192660 }, { "epoch": 7.98, "grad_norm": 1.21875, "learning_rate": 0.00041772330569356443, "loss": 0.2334, "step": 192670 }, { "epoch": 7.98, "grad_norm": 0.9765625, "learning_rate": 0.0004177152632385266, "loss": 0.2126, "step": 192680 }, { "epoch": 7.98, "grad_norm": 0.69921875, "learning_rate": 0.00041770722046786675, "loss": 0.2443, "step": 192690 }, { "epoch": 7.98, "grad_norm": 0.65625, "learning_rate": 0.0004176991773816, "loss": 0.2481, "step": 192700 }, { "epoch": 7.98, "grad_norm": 1.359375, "learning_rate": 0.00041769113397974156, "loss": 0.1789, "step": 192710 }, { "epoch": 7.98, "grad_norm": 1.1484375, "learning_rate": 0.0004176830902623065, "loss": 0.2678, "step": 192720 }, { "epoch": 7.98, "grad_norm": 0.55859375, "learning_rate": 0.00041767504622931, "loss": 0.2315, "step": 192730 }, { "epoch": 7.98, "grad_norm": 0.439453125, "learning_rate": 0.0004176670018807671, "loss": 0.1996, "step": 192740 }, { "epoch": 7.98, "grad_norm": 0.80078125, "learning_rate": 0.000417658957216693, "loss": 0.2154, "step": 192750 }, { "epoch": 7.98, "grad_norm": 0.5703125, "learning_rate": 0.00041765091223710296, "loss": 0.2143, "step": 192760 }, { "epoch": 7.98, "grad_norm": 1.21875, "learning_rate": 0.0004176428669420119, "loss": 0.2121, "step": 192770 }, { "epoch": 7.98, "grad_norm": 0.6328125, "learning_rate": 0.00041763482133143516, "loss": 0.2192, "step": 192780 }, { "epoch": 7.99, "grad_norm": 0.85546875, "learning_rate": 0.0004176267754053877, "loss": 0.1811, "step": 192790 }, { "epoch": 7.99, "grad_norm": 0.828125, "learning_rate": 0.0004176187291638848, "loss": 0.2041, "step": 192800 }, { "epoch": 7.99, "grad_norm": 0.90234375, "learning_rate": 0.00041761068260694147, "loss": 0.1974, "step": 192810 }, { "epoch": 7.99, "grad_norm": 0.515625, "learning_rate": 0.00041760263573457305, "loss": 0.2134, "step": 192820 }, { "epoch": 7.99, "grad_norm": 0.6640625, "learning_rate": 0.00041759458854679455, "loss": 0.153, "step": 192830 }, { "epoch": 7.99, "grad_norm": 0.404296875, "learning_rate": 0.0004175865410436211, "loss": 0.2015, "step": 192840 }, { "epoch": 7.99, "grad_norm": 0.54296875, "learning_rate": 0.00041757849322506793, "loss": 0.2041, "step": 192850 }, { "epoch": 7.99, "grad_norm": 0.6953125, "learning_rate": 0.00041757044509115005, "loss": 0.2355, "step": 192860 }, { "epoch": 7.99, "grad_norm": 0.52734375, "learning_rate": 0.0004175623966418827, "loss": 0.2236, "step": 192870 }, { "epoch": 7.99, "grad_norm": 0.97265625, "learning_rate": 0.00041755434787728107, "loss": 0.1613, "step": 192880 }, { "epoch": 7.99, "grad_norm": 0.3828125, "learning_rate": 0.00041754629879736015, "loss": 0.1834, "step": 192890 }, { "epoch": 7.99, "grad_norm": 0.89453125, "learning_rate": 0.00041753824940213535, "loss": 0.1789, "step": 192900 }, { "epoch": 7.99, "grad_norm": 0.93359375, "learning_rate": 0.0004175301996916215, "loss": 0.2096, "step": 192910 }, { "epoch": 7.99, "grad_norm": 0.330078125, "learning_rate": 0.000417522149665834, "loss": 0.2103, "step": 192920 }, { "epoch": 7.99, "grad_norm": 1.125, "learning_rate": 0.0004175140993247879, "loss": 0.2354, "step": 192930 }, { "epoch": 7.99, "grad_norm": 0.322265625, "learning_rate": 0.0004175060486684984, "loss": 0.2248, "step": 192940 }, { "epoch": 7.99, "grad_norm": 1.15625, "learning_rate": 0.0004174979976969805, "loss": 0.2445, "step": 192950 }, { "epoch": 7.99, "grad_norm": 0.60546875, "learning_rate": 0.0004174899464102495, "loss": 0.2036, "step": 192960 }, { "epoch": 7.99, "grad_norm": 0.65625, "learning_rate": 0.0004174818948083204, "loss": 0.2133, "step": 192970 }, { "epoch": 7.99, "grad_norm": 0.458984375, "learning_rate": 0.00041747384289120867, "loss": 0.181, "step": 192980 }, { "epoch": 7.99, "grad_norm": 2.5625, "learning_rate": 0.00041746579065892917, "loss": 0.2051, "step": 192990 }, { "epoch": 7.99, "grad_norm": 0.77734375, "learning_rate": 0.00041745773811149715, "loss": 0.1873, "step": 193000 }, { "epoch": 7.99, "grad_norm": 0.828125, "learning_rate": 0.00041744968524892766, "loss": 0.1764, "step": 193010 }, { "epoch": 7.99, "grad_norm": 0.91796875, "learning_rate": 0.00041744163207123596, "loss": 0.1604, "step": 193020 }, { "epoch": 8.0, "grad_norm": 0.66796875, "learning_rate": 0.0004174335785784372, "loss": 0.207, "step": 193030 }, { "epoch": 8.0, "grad_norm": 0.75390625, "learning_rate": 0.00041742552477054665, "loss": 0.2188, "step": 193040 }, { "epoch": 8.0, "grad_norm": 0.890625, "learning_rate": 0.00041741747064757924, "loss": 0.2048, "step": 193050 }, { "epoch": 8.0, "grad_norm": 0.578125, "learning_rate": 0.0004174094162095502, "loss": 0.2015, "step": 193060 }, { "epoch": 8.0, "grad_norm": 0.5703125, "learning_rate": 0.0004174013614564748, "loss": 0.2668, "step": 193070 }, { "epoch": 8.0, "grad_norm": 0.57421875, "learning_rate": 0.000417393306388368, "loss": 0.2057, "step": 193080 }, { "epoch": 8.0, "grad_norm": 1.0859375, "learning_rate": 0.0004173852510052453, "loss": 0.2113, "step": 193090 }, { "epoch": 8.0, "grad_norm": 0.75390625, "learning_rate": 0.00041737719530712136, "loss": 0.2036, "step": 193100 }, { "epoch": 8.0, "grad_norm": 0.88671875, "learning_rate": 0.00041736913929401177, "loss": 0.2028, "step": 193110 }, { "epoch": 8.0, "grad_norm": 0.609375, "learning_rate": 0.00041736108296593153, "loss": 0.2643, "step": 193120 }, { "epoch": 8.0, "grad_norm": 0.6015625, "learning_rate": 0.0004173530263228957, "loss": 0.2127, "step": 193130 }, { "epoch": 8.0, "grad_norm": 0.421875, "learning_rate": 0.0004173449693649197, "loss": 0.187, "step": 193140 }, { "epoch": 8.0, "grad_norm": 0.6640625, "learning_rate": 0.0004173369120920185, "loss": 0.2359, "step": 193150 }, { "epoch": 8.0, "grad_norm": 0.59375, "learning_rate": 0.0004173288545042072, "loss": 0.1854, "step": 193160 }, { "epoch": 8.0, "grad_norm": 0.859375, "learning_rate": 0.0004173207966015011, "loss": 0.2468, "step": 193170 }, { "epoch": 8.0, "grad_norm": 0.4609375, "learning_rate": 0.00041731273838391535, "loss": 0.2067, "step": 193180 }, { "epoch": 8.0, "grad_norm": 0.890625, "learning_rate": 0.0004173046798514651, "loss": 0.2165, "step": 193190 }, { "epoch": 8.0, "grad_norm": 0.5390625, "learning_rate": 0.0004172966210041656, "loss": 0.2312, "step": 193200 }, { "epoch": 8.0, "grad_norm": 0.6875, "learning_rate": 0.0004172885618420318, "loss": 0.2238, "step": 193210 }, { "epoch": 8.0, "grad_norm": 0.54296875, "learning_rate": 0.0004172805023650791, "loss": 0.1927, "step": 193220 }, { "epoch": 8.0, "grad_norm": 0.55859375, "learning_rate": 0.00041727244257332244, "loss": 0.2126, "step": 193230 }, { "epoch": 8.0, "grad_norm": 0.63671875, "learning_rate": 0.0004172643824667772, "loss": 0.2064, "step": 193240 }, { "epoch": 8.0, "grad_norm": 1.5859375, "learning_rate": 0.0004172563220454584, "loss": 0.1857, "step": 193250 }, { "epoch": 8.0, "grad_norm": 0.7265625, "learning_rate": 0.0004172482613093812, "loss": 0.1556, "step": 193260 }, { "epoch": 8.01, "grad_norm": 1.140625, "learning_rate": 0.00041724020025856096, "loss": 0.1888, "step": 193270 }, { "epoch": 8.01, "grad_norm": 0.796875, "learning_rate": 0.0004172321388930127, "loss": 0.24, "step": 193280 }, { "epoch": 8.01, "grad_norm": 0.83984375, "learning_rate": 0.00041722407721275155, "loss": 0.1881, "step": 193290 }, { "epoch": 8.01, "grad_norm": 2.140625, "learning_rate": 0.00041721601521779283, "loss": 0.1963, "step": 193300 }, { "epoch": 8.01, "grad_norm": 0.421875, "learning_rate": 0.00041720795290815153, "loss": 0.1818, "step": 193310 }, { "epoch": 8.01, "grad_norm": 1.046875, "learning_rate": 0.000417199890283843, "loss": 0.246, "step": 193320 }, { "epoch": 8.01, "grad_norm": 1.3125, "learning_rate": 0.0004171918273448823, "loss": 0.1562, "step": 193330 }, { "epoch": 8.01, "grad_norm": 0.66796875, "learning_rate": 0.00041718376409128466, "loss": 0.2384, "step": 193340 }, { "epoch": 8.01, "grad_norm": 0.58203125, "learning_rate": 0.0004171757005230652, "loss": 0.2262, "step": 193350 }, { "epoch": 8.01, "grad_norm": 0.6953125, "learning_rate": 0.00041716763664023914, "loss": 0.161, "step": 193360 }, { "epoch": 8.01, "grad_norm": 1.1640625, "learning_rate": 0.00041715957244282167, "loss": 0.1988, "step": 193370 }, { "epoch": 8.01, "grad_norm": 0.6796875, "learning_rate": 0.0004171515079308279, "loss": 0.1365, "step": 193380 }, { "epoch": 8.01, "grad_norm": 1.296875, "learning_rate": 0.000417143443104273, "loss": 0.1955, "step": 193390 }, { "epoch": 8.01, "grad_norm": 0.65234375, "learning_rate": 0.00041713537796317227, "loss": 0.1842, "step": 193400 }, { "epoch": 8.01, "grad_norm": 0.859375, "learning_rate": 0.0004171273125075408, "loss": 0.1738, "step": 193410 }, { "epoch": 8.01, "grad_norm": 0.64453125, "learning_rate": 0.0004171192467373937, "loss": 0.2511, "step": 193420 }, { "epoch": 8.01, "grad_norm": 0.578125, "learning_rate": 0.0004171111806527463, "loss": 0.2037, "step": 193430 }, { "epoch": 8.01, "grad_norm": 0.77734375, "learning_rate": 0.0004171031142536136, "loss": 0.169, "step": 193440 }, { "epoch": 8.01, "grad_norm": 0.5546875, "learning_rate": 0.000417095047540011, "loss": 0.1991, "step": 193450 }, { "epoch": 8.01, "grad_norm": 0.89453125, "learning_rate": 0.00041708698051195353, "loss": 0.131, "step": 193460 }, { "epoch": 8.01, "grad_norm": 0.91015625, "learning_rate": 0.0004170789131694564, "loss": 0.1863, "step": 193470 }, { "epoch": 8.01, "grad_norm": 2.125, "learning_rate": 0.00041707084551253484, "loss": 0.1906, "step": 193480 }, { "epoch": 8.01, "grad_norm": 0.359375, "learning_rate": 0.0004170627775412039, "loss": 0.1871, "step": 193490 }, { "epoch": 8.01, "grad_norm": 0.65625, "learning_rate": 0.00041705470925547893, "loss": 0.1407, "step": 193500 }, { "epoch": 8.02, "grad_norm": 1.0703125, "learning_rate": 0.000417046640655375, "loss": 0.2045, "step": 193510 }, { "epoch": 8.02, "grad_norm": 0.890625, "learning_rate": 0.0004170385717409074, "loss": 0.2389, "step": 193520 }, { "epoch": 8.02, "grad_norm": 2.203125, "learning_rate": 0.00041703050251209117, "loss": 0.2203, "step": 193530 }, { "epoch": 8.02, "grad_norm": 0.45703125, "learning_rate": 0.0004170224329689416, "loss": 0.2127, "step": 193540 }, { "epoch": 8.02, "grad_norm": 0.5703125, "learning_rate": 0.00041701436311147383, "loss": 0.2229, "step": 193550 }, { "epoch": 8.02, "grad_norm": 0.70703125, "learning_rate": 0.0004170062929397031, "loss": 0.2313, "step": 193560 }, { "epoch": 8.02, "grad_norm": 0.365234375, "learning_rate": 0.0004169982224536445, "loss": 0.2048, "step": 193570 }, { "epoch": 8.02, "grad_norm": 0.369140625, "learning_rate": 0.00041699015165331334, "loss": 0.192, "step": 193580 }, { "epoch": 8.02, "grad_norm": 0.4921875, "learning_rate": 0.00041698208053872476, "loss": 0.2018, "step": 193590 }, { "epoch": 8.02, "grad_norm": 0.84765625, "learning_rate": 0.00041697400910989385, "loss": 0.2113, "step": 193600 }, { "epoch": 8.02, "grad_norm": 0.6171875, "learning_rate": 0.00041696593736683594, "loss": 0.1982, "step": 193610 }, { "epoch": 8.02, "grad_norm": 0.7734375, "learning_rate": 0.00041695786530956627, "loss": 0.2225, "step": 193620 }, { "epoch": 8.02, "grad_norm": 1.3515625, "learning_rate": 0.00041694979293809974, "loss": 0.215, "step": 193630 }, { "epoch": 8.02, "grad_norm": 0.64453125, "learning_rate": 0.0004169417202524518, "loss": 0.192, "step": 193640 }, { "epoch": 8.02, "grad_norm": 1.34375, "learning_rate": 0.0004169336472526376, "loss": 0.1857, "step": 193650 }, { "epoch": 8.02, "grad_norm": 1.421875, "learning_rate": 0.00041692557393867226, "loss": 0.1406, "step": 193660 }, { "epoch": 8.02, "grad_norm": 0.7578125, "learning_rate": 0.00041691750031057107, "loss": 0.155, "step": 193670 }, { "epoch": 8.02, "grad_norm": 2.265625, "learning_rate": 0.0004169094263683492, "loss": 0.1897, "step": 193680 }, { "epoch": 8.02, "grad_norm": 1.15625, "learning_rate": 0.00041690135211202173, "loss": 0.1931, "step": 193690 }, { "epoch": 8.02, "grad_norm": 0.87890625, "learning_rate": 0.000416893277541604, "loss": 0.2089, "step": 193700 }, { "epoch": 8.02, "grad_norm": 0.4296875, "learning_rate": 0.00041688520265711107, "loss": 0.1879, "step": 193710 }, { "epoch": 8.02, "grad_norm": 0.5390625, "learning_rate": 0.00041687712745855826, "loss": 0.1859, "step": 193720 }, { "epoch": 8.02, "grad_norm": 0.87109375, "learning_rate": 0.0004168690519459607, "loss": 0.2122, "step": 193730 }, { "epoch": 8.02, "grad_norm": 1.0703125, "learning_rate": 0.0004168609761193336, "loss": 0.2748, "step": 193740 }, { "epoch": 8.03, "grad_norm": 0.625, "learning_rate": 0.0004168528999786922, "loss": 0.2295, "step": 193750 }, { "epoch": 8.03, "grad_norm": 0.875, "learning_rate": 0.0004168448235240516, "loss": 0.1853, "step": 193760 }, { "epoch": 8.03, "grad_norm": 1.65625, "learning_rate": 0.00041683674675542715, "loss": 0.1771, "step": 193770 }, { "epoch": 8.03, "grad_norm": 0.6875, "learning_rate": 0.00041682866967283384, "loss": 0.2357, "step": 193780 }, { "epoch": 8.03, "grad_norm": 0.4765625, "learning_rate": 0.0004168205922762871, "loss": 0.2009, "step": 193790 }, { "epoch": 8.03, "grad_norm": 0.51171875, "learning_rate": 0.000416812514565802, "loss": 0.2196, "step": 193800 }, { "epoch": 8.03, "grad_norm": 1.875, "learning_rate": 0.00041680443654139364, "loss": 0.204, "step": 193810 }, { "epoch": 8.03, "grad_norm": 1.1328125, "learning_rate": 0.0004167963582030775, "loss": 0.1861, "step": 193820 }, { "epoch": 8.03, "grad_norm": 0.96875, "learning_rate": 0.00041678827955086864, "loss": 0.1458, "step": 193830 }, { "epoch": 8.03, "grad_norm": 0.43359375, "learning_rate": 0.00041678020058478213, "loss": 0.1836, "step": 193840 }, { "epoch": 8.03, "grad_norm": 1.03125, "learning_rate": 0.00041677212130483335, "loss": 0.1897, "step": 193850 }, { "epoch": 8.03, "grad_norm": 0.62109375, "learning_rate": 0.0004167640417110374, "loss": 0.1498, "step": 193860 }, { "epoch": 8.03, "grad_norm": 0.58984375, "learning_rate": 0.0004167559618034096, "loss": 0.2051, "step": 193870 }, { "epoch": 8.03, "grad_norm": 0.73046875, "learning_rate": 0.00041674788158196506, "loss": 0.2347, "step": 193880 }, { "epoch": 8.03, "grad_norm": 0.6875, "learning_rate": 0.000416739801046719, "loss": 0.2748, "step": 193890 }, { "epoch": 8.03, "grad_norm": 0.9296875, "learning_rate": 0.0004167317201976867, "loss": 0.1871, "step": 193900 }, { "epoch": 8.03, "grad_norm": 0.0, "learning_rate": 0.0004167236390348832, "loss": 0.186, "step": 193910 }, { "epoch": 8.03, "grad_norm": 0.6875, "learning_rate": 0.0004167155575583239, "loss": 0.2057, "step": 193920 }, { "epoch": 8.03, "grad_norm": 1.0703125, "learning_rate": 0.00041670747576802393, "loss": 0.1869, "step": 193930 }, { "epoch": 8.03, "grad_norm": 1.234375, "learning_rate": 0.00041669939366399846, "loss": 0.1795, "step": 193940 }, { "epoch": 8.03, "grad_norm": 1.6484375, "learning_rate": 0.0004166913112462627, "loss": 0.2078, "step": 193950 }, { "epoch": 8.03, "grad_norm": 0.9296875, "learning_rate": 0.000416683228514832, "loss": 0.2203, "step": 193960 }, { "epoch": 8.03, "grad_norm": 0.392578125, "learning_rate": 0.00041667514546972137, "loss": 0.2289, "step": 193970 }, { "epoch": 8.03, "grad_norm": 0.46875, "learning_rate": 0.0004166670621109461, "loss": 0.2356, "step": 193980 }, { "epoch": 8.04, "grad_norm": 1.0859375, "learning_rate": 0.0004166589784385215, "loss": 0.1703, "step": 193990 }, { "epoch": 8.04, "grad_norm": 0.494140625, "learning_rate": 0.00041665089445246263, "loss": 0.1523, "step": 194000 }, { "epoch": 8.04, "grad_norm": 2.34375, "learning_rate": 0.0004166428101527848, "loss": 0.2329, "step": 194010 }, { "epoch": 8.04, "grad_norm": 0.4375, "learning_rate": 0.0004166347255395032, "loss": 0.2211, "step": 194020 }, { "epoch": 8.04, "grad_norm": 0.392578125, "learning_rate": 0.00041662664061263305, "loss": 0.1665, "step": 194030 }, { "epoch": 8.04, "grad_norm": 0.6484375, "learning_rate": 0.0004166185553721896, "loss": 0.1871, "step": 194040 }, { "epoch": 8.04, "grad_norm": 0.80078125, "learning_rate": 0.00041661046981818797, "loss": 0.2536, "step": 194050 }, { "epoch": 8.04, "grad_norm": 0.98046875, "learning_rate": 0.00041660238395064343, "loss": 0.1861, "step": 194060 }, { "epoch": 8.04, "grad_norm": 0.79296875, "learning_rate": 0.00041659429776957116, "loss": 0.2129, "step": 194070 }, { "epoch": 8.04, "grad_norm": 0.7578125, "learning_rate": 0.0004165862112749864, "loss": 0.1924, "step": 194080 }, { "epoch": 8.04, "grad_norm": 0.2578125, "learning_rate": 0.00041657812446690447, "loss": 0.1726, "step": 194090 }, { "epoch": 8.04, "grad_norm": 0.75, "learning_rate": 0.0004165700373453404, "loss": 0.1964, "step": 194100 }, { "epoch": 8.04, "grad_norm": 0.7421875, "learning_rate": 0.0004165619499103096, "loss": 0.2235, "step": 194110 }, { "epoch": 8.04, "grad_norm": 0.2294921875, "learning_rate": 0.00041655386216182713, "loss": 0.1571, "step": 194120 }, { "epoch": 8.04, "grad_norm": 0.5625, "learning_rate": 0.00041654577409990835, "loss": 0.2309, "step": 194130 }, { "epoch": 8.04, "grad_norm": 0.75, "learning_rate": 0.0004165376857245684, "loss": 0.1991, "step": 194140 }, { "epoch": 8.04, "grad_norm": 0.8359375, "learning_rate": 0.00041652959703582243, "loss": 0.2221, "step": 194150 }, { "epoch": 8.04, "grad_norm": 0.453125, "learning_rate": 0.0004165215080336858, "loss": 0.2197, "step": 194160 }, { "epoch": 8.04, "grad_norm": 0.80078125, "learning_rate": 0.00041651341871817366, "loss": 0.2578, "step": 194170 }, { "epoch": 8.04, "grad_norm": 0.7421875, "learning_rate": 0.00041650532908930123, "loss": 0.1997, "step": 194180 }, { "epoch": 8.04, "grad_norm": 0.66796875, "learning_rate": 0.0004164972391470838, "loss": 0.1673, "step": 194190 }, { "epoch": 8.04, "grad_norm": 0.78515625, "learning_rate": 0.0004164891488915365, "loss": 0.1776, "step": 194200 }, { "epoch": 8.04, "grad_norm": 0.91796875, "learning_rate": 0.0004164810583226746, "loss": 0.1979, "step": 194210 }, { "epoch": 8.04, "grad_norm": 0.578125, "learning_rate": 0.00041647296744051337, "loss": 0.218, "step": 194220 }, { "epoch": 8.04, "grad_norm": 1.078125, "learning_rate": 0.00041646487624506795, "loss": 0.2187, "step": 194230 }, { "epoch": 8.05, "grad_norm": 0.78515625, "learning_rate": 0.00041645678473635364, "loss": 0.1754, "step": 194240 }, { "epoch": 8.05, "grad_norm": 1.3125, "learning_rate": 0.0004164486929143856, "loss": 0.1597, "step": 194250 }, { "epoch": 8.05, "grad_norm": 0.474609375, "learning_rate": 0.00041644060077917914, "loss": 0.1824, "step": 194260 }, { "epoch": 8.05, "grad_norm": 0.9921875, "learning_rate": 0.0004164325083307494, "loss": 0.225, "step": 194270 }, { "epoch": 8.05, "grad_norm": 0.9296875, "learning_rate": 0.0004164244155691117, "loss": 0.2169, "step": 194280 }, { "epoch": 8.05, "grad_norm": 1.625, "learning_rate": 0.0004164163224942812, "loss": 0.2325, "step": 194290 }, { "epoch": 8.05, "grad_norm": 0.498046875, "learning_rate": 0.0004164082291062732, "loss": 0.1932, "step": 194300 }, { "epoch": 8.05, "grad_norm": 0.466796875, "learning_rate": 0.0004164001354051028, "loss": 0.2053, "step": 194310 }, { "epoch": 8.05, "grad_norm": 1.390625, "learning_rate": 0.00041639204139078535, "loss": 0.2255, "step": 194320 }, { "epoch": 8.05, "grad_norm": 1.21875, "learning_rate": 0.00041638394706333605, "loss": 0.2767, "step": 194330 }, { "epoch": 8.05, "grad_norm": 0.259765625, "learning_rate": 0.0004163758524227701, "loss": 0.204, "step": 194340 }, { "epoch": 8.05, "grad_norm": 0.80859375, "learning_rate": 0.0004163677574691028, "loss": 0.1805, "step": 194350 }, { "epoch": 8.05, "grad_norm": 0.62109375, "learning_rate": 0.0004163596622023493, "loss": 0.2045, "step": 194360 }, { "epoch": 8.05, "grad_norm": 0.5, "learning_rate": 0.0004163515666225249, "loss": 0.2174, "step": 194370 }, { "epoch": 8.05, "grad_norm": 0.875, "learning_rate": 0.0004163434707296449, "loss": 0.1782, "step": 194380 }, { "epoch": 8.05, "grad_norm": 0.69140625, "learning_rate": 0.00041633537452372436, "loss": 0.2014, "step": 194390 }, { "epoch": 8.05, "grad_norm": 0.8359375, "learning_rate": 0.0004163272780047787, "loss": 0.1668, "step": 194400 }, { "epoch": 8.05, "grad_norm": 0.49609375, "learning_rate": 0.0004163191811728229, "loss": 0.2011, "step": 194410 }, { "epoch": 8.05, "grad_norm": 0.34375, "learning_rate": 0.0004163110840278725, "loss": 0.1922, "step": 194420 }, { "epoch": 8.05, "grad_norm": 1.21875, "learning_rate": 0.00041630298656994256, "loss": 0.1771, "step": 194430 }, { "epoch": 8.05, "grad_norm": 1.171875, "learning_rate": 0.0004162948887990483, "loss": 0.1878, "step": 194440 }, { "epoch": 8.05, "grad_norm": 1.359375, "learning_rate": 0.00041628679071520515, "loss": 0.1757, "step": 194450 }, { "epoch": 8.05, "grad_norm": 1.921875, "learning_rate": 0.00041627869231842813, "loss": 0.2103, "step": 194460 }, { "epoch": 8.05, "grad_norm": 0.404296875, "learning_rate": 0.00041627059360873253, "loss": 0.2172, "step": 194470 }, { "epoch": 8.06, "grad_norm": 0.337890625, "learning_rate": 0.00041626249458613375, "loss": 0.2032, "step": 194480 }, { "epoch": 8.06, "grad_norm": 0.263671875, "learning_rate": 0.0004162543952506468, "loss": 0.1884, "step": 194490 }, { "epoch": 8.06, "grad_norm": 1.5078125, "learning_rate": 0.00041624629560228706, "loss": 0.2081, "step": 194500 }, { "epoch": 8.06, "grad_norm": 1.078125, "learning_rate": 0.0004162381956410698, "loss": 0.2042, "step": 194510 }, { "epoch": 8.06, "grad_norm": 1.078125, "learning_rate": 0.0004162300953670101, "loss": 0.2524, "step": 194520 }, { "epoch": 8.06, "grad_norm": 0.6953125, "learning_rate": 0.0004162219947801234, "loss": 0.1802, "step": 194530 }, { "epoch": 8.06, "grad_norm": 0.6015625, "learning_rate": 0.0004162138938804248, "loss": 0.1657, "step": 194540 }, { "epoch": 8.06, "grad_norm": 1.25, "learning_rate": 0.00041620579266792967, "loss": 0.1814, "step": 194550 }, { "epoch": 8.06, "grad_norm": 0.73828125, "learning_rate": 0.00041619769114265313, "loss": 0.1708, "step": 194560 }, { "epoch": 8.06, "grad_norm": 1.1953125, "learning_rate": 0.0004161895893046105, "loss": 0.175, "step": 194570 }, { "epoch": 8.06, "grad_norm": 0.2890625, "learning_rate": 0.00041618148715381697, "loss": 0.242, "step": 194580 }, { "epoch": 8.06, "grad_norm": 2.78125, "learning_rate": 0.0004161733846902879, "loss": 0.1881, "step": 194590 }, { "epoch": 8.06, "grad_norm": 2.015625, "learning_rate": 0.00041616528191403847, "loss": 0.2078, "step": 194600 }, { "epoch": 8.06, "grad_norm": 0.60546875, "learning_rate": 0.00041615717882508387, "loss": 0.1998, "step": 194610 }, { "epoch": 8.06, "grad_norm": 0.177734375, "learning_rate": 0.0004161490754234394, "loss": 0.2234, "step": 194620 }, { "epoch": 8.06, "grad_norm": 0.96875, "learning_rate": 0.00041614097170912035, "loss": 0.2426, "step": 194630 }, { "epoch": 8.06, "grad_norm": 0.427734375, "learning_rate": 0.00041613286768214195, "loss": 0.1811, "step": 194640 }, { "epoch": 8.06, "grad_norm": 1.03125, "learning_rate": 0.0004161247633425194, "loss": 0.1981, "step": 194650 }, { "epoch": 8.06, "grad_norm": 0.412109375, "learning_rate": 0.0004161166586902679, "loss": 0.1949, "step": 194660 }, { "epoch": 8.06, "grad_norm": 0.427734375, "learning_rate": 0.0004161085537254029, "loss": 0.202, "step": 194670 }, { "epoch": 8.06, "grad_norm": 0.392578125, "learning_rate": 0.00041610044844793946, "loss": 0.1622, "step": 194680 }, { "epoch": 8.06, "grad_norm": 0.96875, "learning_rate": 0.000416092342857893, "loss": 0.232, "step": 194690 }, { "epoch": 8.06, "grad_norm": 0.8203125, "learning_rate": 0.00041608423695527864, "loss": 0.1795, "step": 194700 }, { "epoch": 8.06, "grad_norm": 0.69921875, "learning_rate": 0.00041607613074011175, "loss": 0.2069, "step": 194710 }, { "epoch": 8.07, "grad_norm": 0.7109375, "learning_rate": 0.0004160680242124075, "loss": 0.2229, "step": 194720 }, { "epoch": 8.07, "grad_norm": 1.0859375, "learning_rate": 0.0004160599173721811, "loss": 0.2312, "step": 194730 }, { "epoch": 8.07, "grad_norm": 0.46875, "learning_rate": 0.0004160518102194478, "loss": 0.1967, "step": 194740 }, { "epoch": 8.07, "grad_norm": 1.0703125, "learning_rate": 0.0004160437027542231, "loss": 0.2344, "step": 194750 }, { "epoch": 8.07, "grad_norm": 0.67578125, "learning_rate": 0.00041603559497652197, "loss": 0.2207, "step": 194760 }, { "epoch": 8.07, "grad_norm": 0.72265625, "learning_rate": 0.0004160274868863598, "loss": 0.1589, "step": 194770 }, { "epoch": 8.07, "grad_norm": 2.234375, "learning_rate": 0.00041601937848375183, "loss": 0.1958, "step": 194780 }, { "epoch": 8.07, "grad_norm": 0.3203125, "learning_rate": 0.0004160112697687133, "loss": 0.1846, "step": 194790 }, { "epoch": 8.07, "grad_norm": 0.7265625, "learning_rate": 0.00041600316074125957, "loss": 0.1683, "step": 194800 }, { "epoch": 8.07, "grad_norm": 1.2421875, "learning_rate": 0.00041599505140140576, "loss": 0.2107, "step": 194810 }, { "epoch": 8.07, "grad_norm": 0.5625, "learning_rate": 0.00041598694174916717, "loss": 0.2375, "step": 194820 }, { "epoch": 8.07, "grad_norm": 0.45703125, "learning_rate": 0.0004159788317845591, "loss": 0.2421, "step": 194830 }, { "epoch": 8.07, "grad_norm": 0.51171875, "learning_rate": 0.0004159707215075969, "loss": 0.2128, "step": 194840 }, { "epoch": 8.07, "grad_norm": 0.87890625, "learning_rate": 0.0004159626109182956, "loss": 0.2642, "step": 194850 }, { "epoch": 8.07, "grad_norm": 0.4921875, "learning_rate": 0.00041595450001667055, "loss": 0.2229, "step": 194860 }, { "epoch": 8.07, "grad_norm": 0.40234375, "learning_rate": 0.0004159463888027371, "loss": 0.1909, "step": 194870 }, { "epoch": 8.07, "grad_norm": 0.4375, "learning_rate": 0.0004159382772765105, "loss": 0.1282, "step": 194880 }, { "epoch": 8.07, "grad_norm": 0.96875, "learning_rate": 0.0004159301654380059, "loss": 0.1995, "step": 194890 }, { "epoch": 8.07, "grad_norm": 0.67578125, "learning_rate": 0.00041592205328723876, "loss": 0.2134, "step": 194900 }, { "epoch": 8.07, "grad_norm": 1.5, "learning_rate": 0.0004159139408242242, "loss": 0.1842, "step": 194910 }, { "epoch": 8.07, "grad_norm": 0.6796875, "learning_rate": 0.00041590582804897747, "loss": 0.1973, "step": 194920 }, { "epoch": 8.07, "grad_norm": 0.5625, "learning_rate": 0.00041589771496151396, "loss": 0.2153, "step": 194930 }, { "epoch": 8.07, "grad_norm": 0.78515625, "learning_rate": 0.0004158896015618488, "loss": 0.2142, "step": 194940 }, { "epoch": 8.07, "grad_norm": 0.314453125, "learning_rate": 0.0004158814878499974, "loss": 0.2249, "step": 194950 }, { "epoch": 8.08, "grad_norm": 0.23046875, "learning_rate": 0.0004158733738259748, "loss": 0.1368, "step": 194960 }, { "epoch": 8.08, "grad_norm": 0.50390625, "learning_rate": 0.0004158652594897966, "loss": 0.1703, "step": 194970 }, { "epoch": 8.08, "grad_norm": 0.76953125, "learning_rate": 0.00041585714484147784, "loss": 0.2108, "step": 194980 }, { "epoch": 8.08, "grad_norm": 0.9140625, "learning_rate": 0.0004158490298810338, "loss": 0.1696, "step": 194990 }, { "epoch": 8.08, "grad_norm": 1.3828125, "learning_rate": 0.00041584091460847983, "loss": 0.2315, "step": 195000 }, { "epoch": 8.08, "grad_norm": 0.474609375, "learning_rate": 0.0004158327990238312, "loss": 0.2335, "step": 195010 }, { "epoch": 8.08, "grad_norm": 0.66015625, "learning_rate": 0.00041582468312710303, "loss": 0.2295, "step": 195020 }, { "epoch": 8.08, "grad_norm": 0.33984375, "learning_rate": 0.0004158165669183108, "loss": 0.2082, "step": 195030 }, { "epoch": 8.08, "grad_norm": 1.296875, "learning_rate": 0.00041580845039746974, "loss": 0.2086, "step": 195040 }, { "epoch": 8.08, "grad_norm": 0.48046875, "learning_rate": 0.000415800333564595, "loss": 0.1892, "step": 195050 }, { "epoch": 8.08, "grad_norm": 0.89453125, "learning_rate": 0.000415792216419702, "loss": 0.1746, "step": 195060 }, { "epoch": 8.08, "grad_norm": 0.7265625, "learning_rate": 0.0004157840989628059, "loss": 0.2037, "step": 195070 }, { "epoch": 8.08, "grad_norm": 0.58984375, "learning_rate": 0.0004157759811939221, "loss": 0.2092, "step": 195080 }, { "epoch": 8.08, "grad_norm": 0.91796875, "learning_rate": 0.0004157678631130657, "loss": 0.2134, "step": 195090 }, { "epoch": 8.08, "grad_norm": 1.4609375, "learning_rate": 0.00041575974472025213, "loss": 0.1977, "step": 195100 }, { "epoch": 8.08, "grad_norm": 0.61328125, "learning_rate": 0.00041575162601549667, "loss": 0.1823, "step": 195110 }, { "epoch": 8.08, "grad_norm": 1.2421875, "learning_rate": 0.00041574350699881445, "loss": 0.2396, "step": 195120 }, { "epoch": 8.08, "grad_norm": 0.63671875, "learning_rate": 0.00041573538767022093, "loss": 0.2048, "step": 195130 }, { "epoch": 8.08, "grad_norm": 0.875, "learning_rate": 0.00041572726802973134, "loss": 0.2027, "step": 195140 }, { "epoch": 8.08, "grad_norm": 0.62890625, "learning_rate": 0.00041571914807736086, "loss": 0.2142, "step": 195150 }, { "epoch": 8.08, "grad_norm": 0.578125, "learning_rate": 0.0004157110278131249, "loss": 0.2157, "step": 195160 }, { "epoch": 8.08, "grad_norm": 0.87890625, "learning_rate": 0.0004157029072370386, "loss": 0.206, "step": 195170 }, { "epoch": 8.08, "grad_norm": 0.69921875, "learning_rate": 0.0004156947863491173, "loss": 0.2244, "step": 195180 }, { "epoch": 8.08, "grad_norm": 0.38671875, "learning_rate": 0.0004156866651493764, "loss": 0.2073, "step": 195190 }, { "epoch": 8.09, "grad_norm": 1.4296875, "learning_rate": 0.0004156785436378311, "loss": 0.2328, "step": 195200 }, { "epoch": 8.09, "grad_norm": 0.67578125, "learning_rate": 0.0004156704218144966, "loss": 0.1571, "step": 195210 }, { "epoch": 8.09, "grad_norm": 1.0234375, "learning_rate": 0.0004156622996793883, "loss": 0.2414, "step": 195220 }, { "epoch": 8.09, "grad_norm": 0.72265625, "learning_rate": 0.0004156541772325214, "loss": 0.1793, "step": 195230 }, { "epoch": 8.09, "grad_norm": 0.9140625, "learning_rate": 0.00041564605447391126, "loss": 0.1753, "step": 195240 }, { "epoch": 8.09, "grad_norm": 1.1171875, "learning_rate": 0.0004156379314035731, "loss": 0.1457, "step": 195250 }, { "epoch": 8.09, "grad_norm": 2.5, "learning_rate": 0.0004156298080215223, "loss": 0.1664, "step": 195260 }, { "epoch": 8.09, "grad_norm": 1.0546875, "learning_rate": 0.00041562168432777405, "loss": 0.1923, "step": 195270 }, { "epoch": 8.09, "grad_norm": 0.97265625, "learning_rate": 0.00041561356032234365, "loss": 0.1594, "step": 195280 }, { "epoch": 8.09, "grad_norm": 0.443359375, "learning_rate": 0.0004156054360052465, "loss": 0.1369, "step": 195290 }, { "epoch": 8.09, "grad_norm": 0.671875, "learning_rate": 0.0004155973113764977, "loss": 0.2097, "step": 195300 }, { "epoch": 8.09, "grad_norm": 0.74609375, "learning_rate": 0.00041558918643611276, "loss": 0.2275, "step": 195310 }, { "epoch": 8.09, "grad_norm": 0.89453125, "learning_rate": 0.00041558106118410676, "loss": 0.1926, "step": 195320 }, { "epoch": 8.09, "grad_norm": 1.046875, "learning_rate": 0.0004155729356204951, "loss": 0.2146, "step": 195330 }, { "epoch": 8.09, "grad_norm": 0.71875, "learning_rate": 0.0004155648097452931, "loss": 0.222, "step": 195340 }, { "epoch": 8.09, "grad_norm": 0.765625, "learning_rate": 0.000415556683558516, "loss": 0.2406, "step": 195350 }, { "epoch": 8.09, "grad_norm": 1.1796875, "learning_rate": 0.00041554855706017903, "loss": 0.2059, "step": 195360 }, { "epoch": 8.09, "grad_norm": 0.703125, "learning_rate": 0.00041554043025029757, "loss": 0.1635, "step": 195370 }, { "epoch": 8.09, "grad_norm": 0.80078125, "learning_rate": 0.0004155323031288869, "loss": 0.1915, "step": 195380 }, { "epoch": 8.09, "grad_norm": 1.0390625, "learning_rate": 0.00041552417569596235, "loss": 0.2744, "step": 195390 }, { "epoch": 8.09, "grad_norm": 1.4140625, "learning_rate": 0.00041551604795153917, "loss": 0.2137, "step": 195400 }, { "epoch": 8.09, "grad_norm": 2.09375, "learning_rate": 0.0004155079198956326, "loss": 0.257, "step": 195410 }, { "epoch": 8.09, "grad_norm": 0.79296875, "learning_rate": 0.00041549979152825814, "loss": 0.1937, "step": 195420 }, { "epoch": 8.09, "grad_norm": 0.61328125, "learning_rate": 0.00041549166284943087, "loss": 0.2102, "step": 195430 }, { "epoch": 8.1, "grad_norm": 0.62109375, "learning_rate": 0.0004154835338591661, "loss": 0.1941, "step": 195440 }, { "epoch": 8.1, "grad_norm": 1.3671875, "learning_rate": 0.0004154754045574792, "loss": 0.1768, "step": 195450 }, { "epoch": 8.1, "grad_norm": 0.546875, "learning_rate": 0.0004154672749443855, "loss": 0.1729, "step": 195460 }, { "epoch": 8.1, "grad_norm": 1.3203125, "learning_rate": 0.0004154591450199003, "loss": 0.2326, "step": 195470 }, { "epoch": 8.1, "grad_norm": 0.77734375, "learning_rate": 0.0004154510147840388, "loss": 0.1502, "step": 195480 }, { "epoch": 8.1, "grad_norm": 0.71875, "learning_rate": 0.00041544288423681633, "loss": 0.1682, "step": 195490 }, { "epoch": 8.1, "grad_norm": 0.984375, "learning_rate": 0.0004154347533782483, "loss": 0.2043, "step": 195500 }, { "epoch": 8.1, "grad_norm": 1.2109375, "learning_rate": 0.0004154266222083499, "loss": 0.2393, "step": 195510 }, { "epoch": 8.1, "grad_norm": 1.5859375, "learning_rate": 0.00041541849072713647, "loss": 0.1656, "step": 195520 }, { "epoch": 8.1, "grad_norm": 0.859375, "learning_rate": 0.0004154103589346232, "loss": 0.2225, "step": 195530 }, { "epoch": 8.1, "grad_norm": 0.451171875, "learning_rate": 0.0004154022268308256, "loss": 0.1754, "step": 195540 }, { "epoch": 8.1, "grad_norm": 0.77734375, "learning_rate": 0.0004153940944157589, "loss": 0.1865, "step": 195550 }, { "epoch": 8.1, "grad_norm": 0.87890625, "learning_rate": 0.0004153859616894383, "loss": 0.1271, "step": 195560 }, { "epoch": 8.1, "grad_norm": 1.0, "learning_rate": 0.0004153778286518792, "loss": 0.1835, "step": 195570 }, { "epoch": 8.1, "grad_norm": 0.000507354736328125, "learning_rate": 0.00041536969530309685, "loss": 0.177, "step": 195580 }, { "epoch": 8.1, "grad_norm": 0.84375, "learning_rate": 0.0004153615616431067, "loss": 0.2004, "step": 195590 }, { "epoch": 8.1, "grad_norm": 1.21875, "learning_rate": 0.00041535342767192384, "loss": 0.2501, "step": 195600 }, { "epoch": 8.1, "grad_norm": 0.76171875, "learning_rate": 0.00041534529338956374, "loss": 0.2355, "step": 195610 }, { "epoch": 8.1, "grad_norm": 0.91015625, "learning_rate": 0.00041533715879604164, "loss": 0.2165, "step": 195620 }, { "epoch": 8.1, "grad_norm": 1.7421875, "learning_rate": 0.00041532902389137286, "loss": 0.1932, "step": 195630 }, { "epoch": 8.1, "grad_norm": 1.2890625, "learning_rate": 0.0004153208886755727, "loss": 0.2398, "step": 195640 }, { "epoch": 8.1, "grad_norm": 0.59765625, "learning_rate": 0.0004153127531486565, "loss": 0.2295, "step": 195650 }, { "epoch": 8.1, "grad_norm": 0.494140625, "learning_rate": 0.00041530461731063953, "loss": 0.2365, "step": 195660 }, { "epoch": 8.1, "grad_norm": 1.0390625, "learning_rate": 0.00041529648116153716, "loss": 0.1935, "step": 195670 }, { "epoch": 8.11, "grad_norm": 0.79296875, "learning_rate": 0.00041528834470136466, "loss": 0.2107, "step": 195680 }, { "epoch": 8.11, "grad_norm": 1.0, "learning_rate": 0.0004152802079301373, "loss": 0.2143, "step": 195690 }, { "epoch": 8.11, "grad_norm": 0.28125, "learning_rate": 0.00041527207084787043, "loss": 0.2317, "step": 195700 }, { "epoch": 8.11, "grad_norm": 1.046875, "learning_rate": 0.00041526393345457945, "loss": 0.1865, "step": 195710 }, { "epoch": 8.11, "grad_norm": 0.42578125, "learning_rate": 0.0004152557957502795, "loss": 0.2166, "step": 195720 }, { "epoch": 8.11, "grad_norm": 0.97265625, "learning_rate": 0.0004152476577349861, "loss": 0.2129, "step": 195730 }, { "epoch": 8.11, "grad_norm": 0.78125, "learning_rate": 0.00041523951940871436, "loss": 0.1585, "step": 195740 }, { "epoch": 8.11, "grad_norm": 0.8359375, "learning_rate": 0.00041523138077147973, "loss": 0.2063, "step": 195750 }, { "epoch": 8.11, "grad_norm": 0.52734375, "learning_rate": 0.00041522324182329744, "loss": 0.2104, "step": 195760 }, { "epoch": 8.11, "grad_norm": 0.5625, "learning_rate": 0.0004152151025641829, "loss": 0.2039, "step": 195770 }, { "epoch": 8.11, "grad_norm": 0.2080078125, "learning_rate": 0.0004152069629941513, "loss": 0.1849, "step": 195780 }, { "epoch": 8.11, "grad_norm": 0.58984375, "learning_rate": 0.0004151988231132181, "loss": 0.1526, "step": 195790 }, { "epoch": 8.11, "grad_norm": 0.5703125, "learning_rate": 0.0004151906829213986, "loss": 0.206, "step": 195800 }, { "epoch": 8.11, "grad_norm": 0.73828125, "learning_rate": 0.000415182542418708, "loss": 0.2432, "step": 195810 }, { "epoch": 8.11, "grad_norm": 0.50390625, "learning_rate": 0.00041517440160516176, "loss": 0.2051, "step": 195820 }, { "epoch": 8.11, "grad_norm": 1.03125, "learning_rate": 0.0004151662604807751, "loss": 0.1754, "step": 195830 }, { "epoch": 8.11, "grad_norm": 1.125, "learning_rate": 0.00041515811904556334, "loss": 0.2155, "step": 195840 }, { "epoch": 8.11, "grad_norm": 0.55078125, "learning_rate": 0.0004151499772995418, "loss": 0.1969, "step": 195850 }, { "epoch": 8.11, "grad_norm": 0.08056640625, "learning_rate": 0.00041514183524272595, "loss": 0.1636, "step": 195860 }, { "epoch": 8.11, "grad_norm": 0.68359375, "learning_rate": 0.00041513369287513095, "loss": 0.1921, "step": 195870 }, { "epoch": 8.11, "grad_norm": 0.94921875, "learning_rate": 0.0004151255501967722, "loss": 0.1557, "step": 195880 }, { "epoch": 8.11, "grad_norm": 1.0859375, "learning_rate": 0.000415117407207665, "loss": 0.2808, "step": 195890 }, { "epoch": 8.11, "grad_norm": 0.4375, "learning_rate": 0.00041510926390782467, "loss": 0.1685, "step": 195900 }, { "epoch": 8.11, "grad_norm": 0.70703125, "learning_rate": 0.0004151011202972664, "loss": 0.1572, "step": 195910 }, { "epoch": 8.11, "grad_norm": 0.55859375, "learning_rate": 0.00041509297637600586, "loss": 0.2082, "step": 195920 }, { "epoch": 8.12, "grad_norm": 0.5859375, "learning_rate": 0.000415084832144058, "loss": 0.1701, "step": 195930 }, { "epoch": 8.12, "grad_norm": 0.5703125, "learning_rate": 0.00041507668760143836, "loss": 0.2067, "step": 195940 }, { "epoch": 8.12, "grad_norm": 1.234375, "learning_rate": 0.0004150685427481623, "loss": 0.2599, "step": 195950 }, { "epoch": 8.12, "grad_norm": 1.0078125, "learning_rate": 0.00041506039758424496, "loss": 0.2048, "step": 195960 }, { "epoch": 8.12, "grad_norm": 0.67578125, "learning_rate": 0.0004150522521097019, "loss": 0.1604, "step": 195970 }, { "epoch": 8.12, "grad_norm": 0.67578125, "learning_rate": 0.0004150441063245482, "loss": 0.1998, "step": 195980 }, { "epoch": 8.12, "grad_norm": 0.458984375, "learning_rate": 0.0004150359602287994, "loss": 0.2078, "step": 195990 }, { "epoch": 8.12, "grad_norm": 0.76171875, "learning_rate": 0.00041502781382247067, "loss": 0.2249, "step": 196000 }, { "epoch": 8.12, "grad_norm": 1.96875, "learning_rate": 0.0004150196671055774, "loss": 0.2146, "step": 196010 }, { "epoch": 8.12, "grad_norm": 0.63671875, "learning_rate": 0.00041501152007813503, "loss": 0.2526, "step": 196020 }, { "epoch": 8.12, "grad_norm": 0.71875, "learning_rate": 0.0004150033727401588, "loss": 0.2569, "step": 196030 }, { "epoch": 8.12, "grad_norm": 1.0234375, "learning_rate": 0.000414995225091664, "loss": 0.1891, "step": 196040 }, { "epoch": 8.12, "grad_norm": 0.69140625, "learning_rate": 0.000414987077132666, "loss": 0.1912, "step": 196050 }, { "epoch": 8.12, "grad_norm": 0.7578125, "learning_rate": 0.0004149789288631801, "loss": 0.1906, "step": 196060 }, { "epoch": 8.12, "grad_norm": 0.80078125, "learning_rate": 0.0004149707802832218, "loss": 0.216, "step": 196070 }, { "epoch": 8.12, "grad_norm": 1.234375, "learning_rate": 0.0004149626313928062, "loss": 0.2378, "step": 196080 }, { "epoch": 8.12, "grad_norm": 0.859375, "learning_rate": 0.00041495448219194874, "loss": 0.1941, "step": 196090 }, { "epoch": 8.12, "grad_norm": 0.6953125, "learning_rate": 0.0004149463326806648, "loss": 0.2482, "step": 196100 }, { "epoch": 8.12, "grad_norm": 0.796875, "learning_rate": 0.00041493818285896967, "loss": 0.1472, "step": 196110 }, { "epoch": 8.12, "grad_norm": 0.5546875, "learning_rate": 0.0004149300327268787, "loss": 0.2039, "step": 196120 }, { "epoch": 8.12, "grad_norm": 1.46875, "learning_rate": 0.0004149218822844072, "loss": 0.1763, "step": 196130 }, { "epoch": 8.12, "grad_norm": 1.8984375, "learning_rate": 0.0004149137315315705, "loss": 0.2315, "step": 196140 }, { "epoch": 8.12, "grad_norm": 0.337890625, "learning_rate": 0.000414905580468384, "loss": 0.2126, "step": 196150 }, { "epoch": 8.12, "grad_norm": 1.4453125, "learning_rate": 0.000414897429094863, "loss": 0.1942, "step": 196160 }, { "epoch": 8.13, "grad_norm": 0.69140625, "learning_rate": 0.0004148892774110228, "loss": 0.2567, "step": 196170 }, { "epoch": 8.13, "grad_norm": 1.2890625, "learning_rate": 0.00041488112541687884, "loss": 0.2111, "step": 196180 }, { "epoch": 8.13, "grad_norm": 1.2421875, "learning_rate": 0.0004148729731124464, "loss": 0.1883, "step": 196190 }, { "epoch": 8.13, "grad_norm": 0.482421875, "learning_rate": 0.0004148648204977408, "loss": 0.1783, "step": 196200 }, { "epoch": 8.13, "grad_norm": 0.490234375, "learning_rate": 0.0004148566675727775, "loss": 0.231, "step": 196210 }, { "epoch": 8.13, "grad_norm": 0.828125, "learning_rate": 0.00041484851433757166, "loss": 0.261, "step": 196220 }, { "epoch": 8.13, "grad_norm": 0.65234375, "learning_rate": 0.00041484036079213877, "loss": 0.2234, "step": 196230 }, { "epoch": 8.13, "grad_norm": 1.34375, "learning_rate": 0.00041483220693649416, "loss": 0.2073, "step": 196240 }, { "epoch": 8.13, "grad_norm": 0.427734375, "learning_rate": 0.000414824052770653, "loss": 0.2049, "step": 196250 }, { "epoch": 8.13, "grad_norm": 0.53515625, "learning_rate": 0.00041481589829463086, "loss": 0.2107, "step": 196260 }, { "epoch": 8.13, "grad_norm": 0.1982421875, "learning_rate": 0.00041480774350844297, "loss": 0.1694, "step": 196270 }, { "epoch": 8.13, "grad_norm": 1.03125, "learning_rate": 0.0004147995884121047, "loss": 0.2183, "step": 196280 }, { "epoch": 8.13, "grad_norm": 0.9921875, "learning_rate": 0.0004147914330056315, "loss": 0.2139, "step": 196290 }, { "epoch": 8.13, "grad_norm": 0.80078125, "learning_rate": 0.0004147832772890385, "loss": 0.232, "step": 196300 }, { "epoch": 8.13, "grad_norm": 0.9765625, "learning_rate": 0.0004147751212623413, "loss": 0.2073, "step": 196310 }, { "epoch": 8.13, "grad_norm": 0.408203125, "learning_rate": 0.000414766964925555, "loss": 0.1817, "step": 196320 }, { "epoch": 8.13, "grad_norm": 0.8828125, "learning_rate": 0.00041475880827869505, "loss": 0.2319, "step": 196330 }, { "epoch": 8.13, "grad_norm": 0.205078125, "learning_rate": 0.0004147506513217769, "loss": 0.2056, "step": 196340 }, { "epoch": 8.13, "grad_norm": 1.953125, "learning_rate": 0.0004147424940548158, "loss": 0.2016, "step": 196350 }, { "epoch": 8.13, "grad_norm": 0.53125, "learning_rate": 0.0004147343364778271, "loss": 0.1979, "step": 196360 }, { "epoch": 8.13, "grad_norm": 1.609375, "learning_rate": 0.0004147261785908262, "loss": 0.2177, "step": 196370 }, { "epoch": 8.13, "grad_norm": 0.625, "learning_rate": 0.0004147180203938283, "loss": 0.2045, "step": 196380 }, { "epoch": 8.13, "grad_norm": 0.53515625, "learning_rate": 0.00041470986188684903, "loss": 0.1959, "step": 196390 }, { "epoch": 8.13, "grad_norm": 0.67578125, "learning_rate": 0.0004147017030699034, "loss": 0.1971, "step": 196400 }, { "epoch": 8.14, "grad_norm": 0.271484375, "learning_rate": 0.00041469354394300706, "loss": 0.1655, "step": 196410 }, { "epoch": 8.14, "grad_norm": 0.85546875, "learning_rate": 0.0004146853845061753, "loss": 0.1991, "step": 196420 }, { "epoch": 8.14, "grad_norm": 0.66796875, "learning_rate": 0.0004146772247594234, "loss": 0.1935, "step": 196430 }, { "epoch": 8.14, "grad_norm": 0.9140625, "learning_rate": 0.0004146690647027668, "loss": 0.2162, "step": 196440 }, { "epoch": 8.14, "grad_norm": 0.9765625, "learning_rate": 0.0004146609043362207, "loss": 0.2151, "step": 196450 }, { "epoch": 8.14, "grad_norm": 1.59375, "learning_rate": 0.00041465274365980057, "loss": 0.2095, "step": 196460 }, { "epoch": 8.14, "grad_norm": 1.3125, "learning_rate": 0.0004146445826735218, "loss": 0.1703, "step": 196470 }, { "epoch": 8.14, "grad_norm": 0.828125, "learning_rate": 0.0004146364213773996, "loss": 0.174, "step": 196480 }, { "epoch": 8.14, "grad_norm": 0.97265625, "learning_rate": 0.0004146282597714496, "loss": 0.2413, "step": 196490 }, { "epoch": 8.14, "grad_norm": 2.171875, "learning_rate": 0.0004146200978556869, "loss": 0.2063, "step": 196500 }, { "epoch": 8.14, "grad_norm": 0.453125, "learning_rate": 0.000414611935630127, "loss": 0.2111, "step": 196510 }, { "epoch": 8.14, "grad_norm": 0.8671875, "learning_rate": 0.00041460377309478517, "loss": 0.1523, "step": 196520 }, { "epoch": 8.14, "grad_norm": 1.0078125, "learning_rate": 0.0004145956102496768, "loss": 0.1671, "step": 196530 }, { "epoch": 8.14, "grad_norm": 1.8046875, "learning_rate": 0.00041458744709481734, "loss": 0.2131, "step": 196540 }, { "epoch": 8.14, "grad_norm": 1.1015625, "learning_rate": 0.00041457928363022205, "loss": 0.1895, "step": 196550 }, { "epoch": 8.14, "grad_norm": 0.470703125, "learning_rate": 0.00041457111985590637, "loss": 0.1819, "step": 196560 }, { "epoch": 8.14, "grad_norm": 2.015625, "learning_rate": 0.0004145629557718855, "loss": 0.2092, "step": 196570 }, { "epoch": 8.14, "grad_norm": 0.1943359375, "learning_rate": 0.000414554791378175, "loss": 0.1695, "step": 196580 }, { "epoch": 8.14, "grad_norm": 1.3671875, "learning_rate": 0.0004145466266747901, "loss": 0.1801, "step": 196590 }, { "epoch": 8.14, "grad_norm": 0.341796875, "learning_rate": 0.00041453846166174626, "loss": 0.1957, "step": 196600 }, { "epoch": 8.14, "grad_norm": 0.56640625, "learning_rate": 0.00041453029633905874, "loss": 0.1925, "step": 196610 }, { "epoch": 8.14, "grad_norm": 0.38671875, "learning_rate": 0.0004145221307067431, "loss": 0.2089, "step": 196620 }, { "epoch": 8.14, "grad_norm": 0.98828125, "learning_rate": 0.00041451396476481446, "loss": 0.1556, "step": 196630 }, { "epoch": 8.14, "grad_norm": 0.78125, "learning_rate": 0.0004145057985132883, "loss": 0.2267, "step": 196640 }, { "epoch": 8.15, "grad_norm": 0.042236328125, "learning_rate": 0.00041449763195218004, "loss": 0.1482, "step": 196650 }, { "epoch": 8.15, "grad_norm": 0.6015625, "learning_rate": 0.000414489465081505, "loss": 0.1895, "step": 196660 }, { "epoch": 8.15, "grad_norm": 0.84765625, "learning_rate": 0.00041448129790127853, "loss": 0.1689, "step": 196670 }, { "epoch": 8.15, "grad_norm": 0.69921875, "learning_rate": 0.000414473130411516, "loss": 0.2038, "step": 196680 }, { "epoch": 8.15, "grad_norm": 0.97265625, "learning_rate": 0.0004144649626122329, "loss": 0.1823, "step": 196690 }, { "epoch": 8.15, "grad_norm": 0.62109375, "learning_rate": 0.00041445679450344443, "loss": 0.1613, "step": 196700 }, { "epoch": 8.15, "grad_norm": 0.76171875, "learning_rate": 0.00041444862608516596, "loss": 0.2023, "step": 196710 }, { "epoch": 8.15, "grad_norm": 0.71875, "learning_rate": 0.00041444045735741303, "loss": 0.178, "step": 196720 }, { "epoch": 8.15, "grad_norm": 0.57421875, "learning_rate": 0.0004144322883202009, "loss": 0.186, "step": 196730 }, { "epoch": 8.15, "grad_norm": 0.58203125, "learning_rate": 0.000414424118973545, "loss": 0.1938, "step": 196740 }, { "epoch": 8.15, "grad_norm": 0.71875, "learning_rate": 0.00041441594931746064, "loss": 0.1595, "step": 196750 }, { "epoch": 8.15, "grad_norm": 0.5234375, "learning_rate": 0.00041440777935196316, "loss": 0.2186, "step": 196760 }, { "epoch": 8.15, "grad_norm": 0.828125, "learning_rate": 0.000414399609077068, "loss": 0.2381, "step": 196770 }, { "epoch": 8.15, "grad_norm": 1.0703125, "learning_rate": 0.0004143914384927906, "loss": 0.1884, "step": 196780 }, { "epoch": 8.15, "grad_norm": 0.8046875, "learning_rate": 0.00041438326759914623, "loss": 0.1724, "step": 196790 }, { "epoch": 8.15, "grad_norm": 0.478515625, "learning_rate": 0.00041437509639615026, "loss": 0.1848, "step": 196800 }, { "epoch": 8.15, "grad_norm": 0.466796875, "learning_rate": 0.00041436692488381824, "loss": 0.1572, "step": 196810 }, { "epoch": 8.15, "grad_norm": 1.2265625, "learning_rate": 0.0004143587530621653, "loss": 0.1567, "step": 196820 }, { "epoch": 8.15, "grad_norm": 0.703125, "learning_rate": 0.0004143505809312069, "loss": 0.2161, "step": 196830 }, { "epoch": 8.15, "grad_norm": 0.8046875, "learning_rate": 0.0004143424084909586, "loss": 0.1704, "step": 196840 }, { "epoch": 8.15, "grad_norm": 0.6796875, "learning_rate": 0.00041433423574143547, "loss": 0.1952, "step": 196850 }, { "epoch": 8.15, "grad_norm": 0.6015625, "learning_rate": 0.00041432606268265314, "loss": 0.2483, "step": 196860 }, { "epoch": 8.15, "grad_norm": 0.404296875, "learning_rate": 0.0004143178893146269, "loss": 0.1448, "step": 196870 }, { "epoch": 8.15, "grad_norm": 0.6484375, "learning_rate": 0.0004143097156373722, "loss": 0.1624, "step": 196880 }, { "epoch": 8.16, "grad_norm": 0.8046875, "learning_rate": 0.00041430154165090425, "loss": 0.2051, "step": 196890 }, { "epoch": 8.16, "grad_norm": 0.859375, "learning_rate": 0.0004142933673552386, "loss": 0.2096, "step": 196900 }, { "epoch": 8.16, "grad_norm": 0.466796875, "learning_rate": 0.00041428519275039054, "loss": 0.1923, "step": 196910 }, { "epoch": 8.16, "grad_norm": 3.1875, "learning_rate": 0.0004142770178363756, "loss": 0.2154, "step": 196920 }, { "epoch": 8.16, "grad_norm": 0.5390625, "learning_rate": 0.0004142688426132089, "loss": 0.2177, "step": 196930 }, { "epoch": 8.16, "grad_norm": 0.66015625, "learning_rate": 0.00041426066708090604, "loss": 0.1958, "step": 196940 }, { "epoch": 8.16, "grad_norm": 0.703125, "learning_rate": 0.0004142524912394824, "loss": 0.1546, "step": 196950 }, { "epoch": 8.16, "grad_norm": 0.353515625, "learning_rate": 0.0004142443150889532, "loss": 0.2034, "step": 196960 }, { "epoch": 8.16, "grad_norm": 0.61328125, "learning_rate": 0.0004142361386293341, "loss": 0.1807, "step": 196970 }, { "epoch": 8.16, "grad_norm": 0.73046875, "learning_rate": 0.00041422796186064016, "loss": 0.2167, "step": 196980 }, { "epoch": 8.16, "grad_norm": 0.369140625, "learning_rate": 0.000414219784782887, "loss": 0.2728, "step": 196990 }, { "epoch": 8.16, "grad_norm": 0.5, "learning_rate": 0.0004142116073960899, "loss": 0.1895, "step": 197000 }, { "epoch": 8.16, "grad_norm": 1.0390625, "learning_rate": 0.00041420342970026433, "loss": 0.2146, "step": 197010 }, { "epoch": 8.16, "grad_norm": 0.890625, "learning_rate": 0.0004141952516954256, "loss": 0.1762, "step": 197020 }, { "epoch": 8.16, "grad_norm": 0.8125, "learning_rate": 0.00041418707338158916, "loss": 0.1725, "step": 197030 }, { "epoch": 8.16, "grad_norm": 0.62109375, "learning_rate": 0.0004141788947587704, "loss": 0.1913, "step": 197040 }, { "epoch": 8.16, "grad_norm": 1.75, "learning_rate": 0.00041417071582698466, "loss": 0.1981, "step": 197050 }, { "epoch": 8.16, "grad_norm": 2.46875, "learning_rate": 0.00041416253658624737, "loss": 0.2273, "step": 197060 }, { "epoch": 8.16, "grad_norm": 0.373046875, "learning_rate": 0.00041415435703657394, "loss": 0.2004, "step": 197070 }, { "epoch": 8.16, "grad_norm": 1.375, "learning_rate": 0.00041414617717797975, "loss": 0.2538, "step": 197080 }, { "epoch": 8.16, "grad_norm": 0.64453125, "learning_rate": 0.00041413799701048006, "loss": 0.1833, "step": 197090 }, { "epoch": 8.16, "grad_norm": 0.87109375, "learning_rate": 0.00041412981653409056, "loss": 0.1936, "step": 197100 }, { "epoch": 8.16, "grad_norm": 1.03125, "learning_rate": 0.00041412163574882635, "loss": 0.2024, "step": 197110 }, { "epoch": 8.16, "grad_norm": 0.69140625, "learning_rate": 0.00041411345465470296, "loss": 0.2051, "step": 197120 }, { "epoch": 8.17, "grad_norm": 0.578125, "learning_rate": 0.00041410527325173575, "loss": 0.1816, "step": 197130 }, { "epoch": 8.17, "grad_norm": 0.73046875, "learning_rate": 0.00041409709153994017, "loss": 0.2111, "step": 197140 }, { "epoch": 8.17, "grad_norm": 0.578125, "learning_rate": 0.00041408890951933166, "loss": 0.1749, "step": 197150 }, { "epoch": 8.17, "grad_norm": 0.478515625, "learning_rate": 0.0004140807271899254, "loss": 0.1911, "step": 197160 }, { "epoch": 8.17, "grad_norm": 0.6171875, "learning_rate": 0.00041407254455173707, "loss": 0.1694, "step": 197170 }, { "epoch": 8.17, "grad_norm": 0.765625, "learning_rate": 0.0004140643616047818, "loss": 0.1752, "step": 197180 }, { "epoch": 8.17, "grad_norm": 0.9296875, "learning_rate": 0.00041405617834907514, "loss": 0.21, "step": 197190 }, { "epoch": 8.17, "grad_norm": 0.55859375, "learning_rate": 0.00041404799478463256, "loss": 0.1894, "step": 197200 }, { "epoch": 8.17, "grad_norm": 0.8515625, "learning_rate": 0.0004140398109114693, "loss": 0.233, "step": 197210 }, { "epoch": 8.17, "grad_norm": 0.7734375, "learning_rate": 0.0004140316267296008, "loss": 0.216, "step": 197220 }, { "epoch": 8.17, "grad_norm": 0.5859375, "learning_rate": 0.0004140234422390426, "loss": 0.1997, "step": 197230 }, { "epoch": 8.17, "grad_norm": 0.4609375, "learning_rate": 0.0004140152574398099, "loss": 0.2076, "step": 197240 }, { "epoch": 8.17, "grad_norm": 0.58203125, "learning_rate": 0.00041400707233191827, "loss": 0.2488, "step": 197250 }, { "epoch": 8.17, "grad_norm": 0.546875, "learning_rate": 0.000413998886915383, "loss": 0.2534, "step": 197260 }, { "epoch": 8.17, "grad_norm": 0.423828125, "learning_rate": 0.00041399070119021953, "loss": 0.1961, "step": 197270 }, { "epoch": 8.17, "grad_norm": 0.6875, "learning_rate": 0.00041398251515644324, "loss": 0.2252, "step": 197280 }, { "epoch": 8.17, "grad_norm": 0.75, "learning_rate": 0.00041397432881406967, "loss": 0.2079, "step": 197290 }, { "epoch": 8.17, "grad_norm": 0.408203125, "learning_rate": 0.000413966142163114, "loss": 0.1687, "step": 197300 }, { "epoch": 8.17, "grad_norm": 0.84375, "learning_rate": 0.0004139579552035918, "loss": 0.223, "step": 197310 }, { "epoch": 8.17, "grad_norm": 0.494140625, "learning_rate": 0.0004139497679355185, "loss": 0.1891, "step": 197320 }, { "epoch": 8.17, "grad_norm": 0.62109375, "learning_rate": 0.0004139415803589094, "loss": 0.1888, "step": 197330 }, { "epoch": 8.17, "grad_norm": 0.6796875, "learning_rate": 0.00041393339247377994, "loss": 0.2057, "step": 197340 }, { "epoch": 8.17, "grad_norm": 0.6015625, "learning_rate": 0.00041392520428014547, "loss": 0.2193, "step": 197350 }, { "epoch": 8.17, "grad_norm": 0.74609375, "learning_rate": 0.00041391701577802155, "loss": 0.2181, "step": 197360 }, { "epoch": 8.18, "grad_norm": 1.1953125, "learning_rate": 0.0004139088269674235, "loss": 0.2078, "step": 197370 }, { "epoch": 8.18, "grad_norm": 0.7890625, "learning_rate": 0.0004139006378483667, "loss": 0.2108, "step": 197380 }, { "epoch": 8.18, "grad_norm": 0.921875, "learning_rate": 0.0004138924484208667, "loss": 0.1981, "step": 197390 }, { "epoch": 8.18, "grad_norm": 1.109375, "learning_rate": 0.0004138842586849387, "loss": 0.1968, "step": 197400 }, { "epoch": 8.18, "grad_norm": 0.625, "learning_rate": 0.00041387606864059823, "loss": 0.1828, "step": 197410 }, { "epoch": 8.18, "grad_norm": 0.70703125, "learning_rate": 0.0004138678782878608, "loss": 0.1988, "step": 197420 }, { "epoch": 8.18, "grad_norm": 1.1875, "learning_rate": 0.00041385968762674166, "loss": 0.208, "step": 197430 }, { "epoch": 8.18, "grad_norm": 1.359375, "learning_rate": 0.0004138514966572562, "loss": 0.1583, "step": 197440 }, { "epoch": 8.18, "grad_norm": 0.9765625, "learning_rate": 0.00041384330537941994, "loss": 0.1723, "step": 197450 }, { "epoch": 8.18, "grad_norm": 2.40625, "learning_rate": 0.00041383511379324835, "loss": 0.2631, "step": 197460 }, { "epoch": 8.18, "grad_norm": 0.94921875, "learning_rate": 0.00041382692189875683, "loss": 0.1939, "step": 197470 }, { "epoch": 8.18, "grad_norm": 0.83203125, "learning_rate": 0.0004138187296959606, "loss": 0.1501, "step": 197480 }, { "epoch": 8.18, "grad_norm": 0.6484375, "learning_rate": 0.0004138105371848753, "loss": 0.2162, "step": 197490 }, { "epoch": 8.18, "grad_norm": 0.9609375, "learning_rate": 0.00041380234436551614, "loss": 0.251, "step": 197500 }, { "epoch": 8.18, "grad_norm": 0.56640625, "learning_rate": 0.00041379415123789877, "loss": 0.2084, "step": 197510 }, { "epoch": 8.18, "grad_norm": 1.40625, "learning_rate": 0.00041378595780203843, "loss": 0.2502, "step": 197520 }, { "epoch": 8.18, "grad_norm": 0.74609375, "learning_rate": 0.00041377776405795064, "loss": 0.2145, "step": 197530 }, { "epoch": 8.18, "grad_norm": 0.365234375, "learning_rate": 0.00041376957000565073, "loss": 0.1806, "step": 197540 }, { "epoch": 8.18, "grad_norm": 0.66015625, "learning_rate": 0.00041376137564515426, "loss": 0.2053, "step": 197550 }, { "epoch": 8.18, "grad_norm": 0.91796875, "learning_rate": 0.00041375318097647653, "loss": 0.22, "step": 197560 }, { "epoch": 8.18, "grad_norm": 1.078125, "learning_rate": 0.00041374498599963303, "loss": 0.1881, "step": 197570 }, { "epoch": 8.18, "grad_norm": 0.69140625, "learning_rate": 0.0004137367907146391, "loss": 0.2034, "step": 197580 }, { "epoch": 8.18, "grad_norm": 0.322265625, "learning_rate": 0.0004137285951215102, "loss": 0.1633, "step": 197590 }, { "epoch": 8.18, "grad_norm": 1.640625, "learning_rate": 0.00041372039922026185, "loss": 0.2066, "step": 197600 }, { "epoch": 8.18, "grad_norm": 0.26953125, "learning_rate": 0.0004137122030109093, "loss": 0.187, "step": 197610 }, { "epoch": 8.19, "grad_norm": 1.2109375, "learning_rate": 0.00041370400649346807, "loss": 0.1869, "step": 197620 }, { "epoch": 8.19, "grad_norm": 1.1484375, "learning_rate": 0.0004136958096679536, "loss": 0.128, "step": 197630 }, { "epoch": 8.19, "grad_norm": 0.5234375, "learning_rate": 0.00041368761253438133, "loss": 0.2432, "step": 197640 }, { "epoch": 8.19, "grad_norm": 0.48828125, "learning_rate": 0.0004136794150927666, "loss": 0.2284, "step": 197650 }, { "epoch": 8.19, "grad_norm": 1.3359375, "learning_rate": 0.0004136712173431249, "loss": 0.2422, "step": 197660 }, { "epoch": 8.19, "grad_norm": 0.70703125, "learning_rate": 0.0004136630192854717, "loss": 0.1879, "step": 197670 }, { "epoch": 8.19, "grad_norm": 0.419921875, "learning_rate": 0.0004136548209198223, "loss": 0.2181, "step": 197680 }, { "epoch": 8.19, "grad_norm": 0.453125, "learning_rate": 0.0004136466222461922, "loss": 0.1582, "step": 197690 }, { "epoch": 8.19, "grad_norm": 1.671875, "learning_rate": 0.00041363842326459685, "loss": 0.2614, "step": 197700 }, { "epoch": 8.19, "grad_norm": 0.796875, "learning_rate": 0.0004136302239750516, "loss": 0.2801, "step": 197710 }, { "epoch": 8.19, "grad_norm": 0.61328125, "learning_rate": 0.000413622024377572, "loss": 0.1902, "step": 197720 }, { "epoch": 8.19, "grad_norm": 0.6953125, "learning_rate": 0.00041361382447217343, "loss": 0.2008, "step": 197730 }, { "epoch": 8.19, "grad_norm": 0.8671875, "learning_rate": 0.0004136056242588713, "loss": 0.2243, "step": 197740 }, { "epoch": 8.19, "grad_norm": 0.8125, "learning_rate": 0.00041359742373768104, "loss": 0.1737, "step": 197750 }, { "epoch": 8.19, "grad_norm": 0.55078125, "learning_rate": 0.0004135892229086181, "loss": 0.1804, "step": 197760 }, { "epoch": 8.19, "grad_norm": 0.7109375, "learning_rate": 0.00041358102177169794, "loss": 0.1958, "step": 197770 }, { "epoch": 8.19, "grad_norm": 0.5234375, "learning_rate": 0.00041357282032693595, "loss": 0.1987, "step": 197780 }, { "epoch": 8.19, "grad_norm": 0.82421875, "learning_rate": 0.00041356461857434756, "loss": 0.2558, "step": 197790 }, { "epoch": 8.19, "grad_norm": 0.51171875, "learning_rate": 0.0004135564165139482, "loss": 0.1859, "step": 197800 }, { "epoch": 8.19, "grad_norm": 0.41015625, "learning_rate": 0.0004135482141457534, "loss": 0.178, "step": 197810 }, { "epoch": 8.19, "grad_norm": 0.92578125, "learning_rate": 0.0004135400114697784, "loss": 0.2045, "step": 197820 }, { "epoch": 8.19, "grad_norm": 1.859375, "learning_rate": 0.0004135318084860389, "loss": 0.2191, "step": 197830 }, { "epoch": 8.19, "grad_norm": 0.53125, "learning_rate": 0.00041352360519455013, "loss": 0.1903, "step": 197840 }, { "epoch": 8.19, "grad_norm": 1.6484375, "learning_rate": 0.0004135154015953275, "loss": 0.236, "step": 197850 }, { "epoch": 8.2, "grad_norm": 0.73828125, "learning_rate": 0.00041350719768838673, "loss": 0.2044, "step": 197860 }, { "epoch": 8.2, "grad_norm": 0.5, "learning_rate": 0.00041349899347374293, "loss": 0.173, "step": 197870 }, { "epoch": 8.2, "grad_norm": 1.0078125, "learning_rate": 0.0004134907889514117, "loss": 0.192, "step": 197880 }, { "epoch": 8.2, "grad_norm": 0.9765625, "learning_rate": 0.00041348258412140853, "loss": 0.2437, "step": 197890 }, { "epoch": 8.2, "grad_norm": 0.5546875, "learning_rate": 0.0004134743789837487, "loss": 0.1921, "step": 197900 }, { "epoch": 8.2, "grad_norm": 2.453125, "learning_rate": 0.00041346617353844786, "loss": 0.2221, "step": 197910 }, { "epoch": 8.2, "grad_norm": 0.70703125, "learning_rate": 0.00041345796778552126, "loss": 0.2053, "step": 197920 }, { "epoch": 8.2, "grad_norm": 0.66796875, "learning_rate": 0.0004134497617249844, "loss": 0.2394, "step": 197930 }, { "epoch": 8.2, "grad_norm": 0.328125, "learning_rate": 0.0004134415553568528, "loss": 0.1668, "step": 197940 }, { "epoch": 8.2, "grad_norm": 0.375, "learning_rate": 0.0004134333486811418, "loss": 0.1704, "step": 197950 }, { "epoch": 8.2, "grad_norm": 1.359375, "learning_rate": 0.0004134251416978668, "loss": 0.2072, "step": 197960 }, { "epoch": 8.2, "grad_norm": 1.7578125, "learning_rate": 0.00041341693440704355, "loss": 0.2209, "step": 197970 }, { "epoch": 8.2, "grad_norm": 0.57421875, "learning_rate": 0.00041340872680868706, "loss": 0.222, "step": 197980 }, { "epoch": 8.2, "grad_norm": 0.875, "learning_rate": 0.00041340051890281315, "loss": 0.2159, "step": 197990 }, { "epoch": 8.2, "grad_norm": 0.341796875, "learning_rate": 0.00041339231068943696, "loss": 0.2018, "step": 198000 }, { "epoch": 8.2, "grad_norm": 0.40234375, "learning_rate": 0.0004133841021685742, "loss": 0.2018, "step": 198010 }, { "epoch": 8.2, "grad_norm": 0.58984375, "learning_rate": 0.00041337589334024017, "loss": 0.2021, "step": 198020 }, { "epoch": 8.2, "grad_norm": 0.66796875, "learning_rate": 0.00041336768420445035, "loss": 0.1871, "step": 198030 }, { "epoch": 8.2, "grad_norm": 0.8671875, "learning_rate": 0.0004133594747612202, "loss": 0.1687, "step": 198040 }, { "epoch": 8.2, "grad_norm": 0.609375, "learning_rate": 0.0004133512650105652, "loss": 0.2187, "step": 198050 }, { "epoch": 8.2, "grad_norm": 0.92578125, "learning_rate": 0.0004133430549525007, "loss": 0.1678, "step": 198060 }, { "epoch": 8.2, "grad_norm": 1.0234375, "learning_rate": 0.0004133348445870422, "loss": 0.2061, "step": 198070 }, { "epoch": 8.2, "grad_norm": 0.92578125, "learning_rate": 0.00041332663391420514, "loss": 0.1957, "step": 198080 }, { "epoch": 8.2, "grad_norm": 1.296875, "learning_rate": 0.00041331842293400507, "loss": 0.2441, "step": 198090 }, { "epoch": 8.21, "grad_norm": 0.2890625, "learning_rate": 0.0004133102116464573, "loss": 0.2228, "step": 198100 }, { "epoch": 8.21, "grad_norm": 0.80078125, "learning_rate": 0.00041330200005157744, "loss": 0.1898, "step": 198110 }, { "epoch": 8.21, "grad_norm": 0.58984375, "learning_rate": 0.0004132937881493808, "loss": 0.2239, "step": 198120 }, { "epoch": 8.21, "grad_norm": 0.93359375, "learning_rate": 0.0004132855759398828, "loss": 0.1924, "step": 198130 }, { "epoch": 8.21, "grad_norm": 1.09375, "learning_rate": 0.00041327736342309905, "loss": 0.2187, "step": 198140 }, { "epoch": 8.21, "grad_norm": 0.255859375, "learning_rate": 0.000413269150599045, "loss": 0.1801, "step": 198150 }, { "epoch": 8.21, "grad_norm": 0.2578125, "learning_rate": 0.00041326093746773596, "loss": 0.1768, "step": 198160 }, { "epoch": 8.21, "grad_norm": 0.81640625, "learning_rate": 0.00041325272402918754, "loss": 0.1679, "step": 198170 }, { "epoch": 8.21, "grad_norm": 0.8125, "learning_rate": 0.000413244510283415, "loss": 0.1932, "step": 198180 }, { "epoch": 8.21, "grad_norm": 0.765625, "learning_rate": 0.000413236296230434, "loss": 0.2165, "step": 198190 }, { "epoch": 8.21, "grad_norm": 0.6796875, "learning_rate": 0.00041322808187025994, "loss": 0.2155, "step": 198200 }, { "epoch": 8.21, "grad_norm": 1.0546875, "learning_rate": 0.00041321986720290816, "loss": 0.1966, "step": 198210 }, { "epoch": 8.21, "grad_norm": 0.73828125, "learning_rate": 0.0004132116522283943, "loss": 0.2366, "step": 198220 }, { "epoch": 8.21, "grad_norm": 1.015625, "learning_rate": 0.0004132034369467338, "loss": 0.1997, "step": 198230 }, { "epoch": 8.21, "grad_norm": 1.03125, "learning_rate": 0.0004131952213579419, "loss": 0.1831, "step": 198240 }, { "epoch": 8.21, "grad_norm": 0.55078125, "learning_rate": 0.0004131870054620343, "loss": 0.2482, "step": 198250 }, { "epoch": 8.21, "grad_norm": 0.625, "learning_rate": 0.0004131787892590264, "loss": 0.1967, "step": 198260 }, { "epoch": 8.21, "grad_norm": 1.0, "learning_rate": 0.0004131705727489336, "loss": 0.2158, "step": 198270 }, { "epoch": 8.21, "grad_norm": 1.9609375, "learning_rate": 0.00041316235593177143, "loss": 0.2122, "step": 198280 }, { "epoch": 8.21, "grad_norm": 1.0703125, "learning_rate": 0.00041315413880755523, "loss": 0.2306, "step": 198290 }, { "epoch": 8.21, "grad_norm": 1.5078125, "learning_rate": 0.0004131459213763007, "loss": 0.2558, "step": 198300 }, { "epoch": 8.21, "grad_norm": 0.6015625, "learning_rate": 0.0004131377036380231, "loss": 0.2298, "step": 198310 }, { "epoch": 8.21, "grad_norm": 0.9453125, "learning_rate": 0.00041312948559273793, "loss": 0.2149, "step": 198320 }, { "epoch": 8.21, "grad_norm": 0.6953125, "learning_rate": 0.00041312126724046074, "loss": 0.1966, "step": 198330 }, { "epoch": 8.22, "grad_norm": 0.6171875, "learning_rate": 0.00041311304858120693, "loss": 0.2089, "step": 198340 }, { "epoch": 8.22, "grad_norm": 1.6875, "learning_rate": 0.00041310482961499196, "loss": 0.1666, "step": 198350 }, { "epoch": 8.22, "grad_norm": 2.546875, "learning_rate": 0.00041309661034183133, "loss": 0.1877, "step": 198360 }, { "epoch": 8.22, "grad_norm": 0.80859375, "learning_rate": 0.00041308839076174044, "loss": 0.2323, "step": 198370 }, { "epoch": 8.22, "grad_norm": 0.99609375, "learning_rate": 0.00041308017087473485, "loss": 0.2078, "step": 198380 }, { "epoch": 8.22, "grad_norm": 0.87109375, "learning_rate": 0.00041307195068083005, "loss": 0.173, "step": 198390 }, { "epoch": 8.22, "grad_norm": 2.328125, "learning_rate": 0.00041306373018004135, "loss": 0.1522, "step": 198400 }, { "epoch": 8.22, "grad_norm": 2.03125, "learning_rate": 0.00041305550937238434, "loss": 0.1781, "step": 198410 }, { "epoch": 8.22, "grad_norm": 0.66015625, "learning_rate": 0.00041304728825787454, "loss": 0.1779, "step": 198420 }, { "epoch": 8.22, "grad_norm": 0.890625, "learning_rate": 0.0004130390668365273, "loss": 0.2013, "step": 198430 }, { "epoch": 8.22, "grad_norm": 0.85546875, "learning_rate": 0.0004130308451083581, "loss": 0.1935, "step": 198440 }, { "epoch": 8.22, "grad_norm": 0.439453125, "learning_rate": 0.0004130226230733825, "loss": 0.1789, "step": 198450 }, { "epoch": 8.22, "grad_norm": 0.8984375, "learning_rate": 0.0004130144007316159, "loss": 0.185, "step": 198460 }, { "epoch": 8.22, "grad_norm": 1.171875, "learning_rate": 0.00041300617808307384, "loss": 0.2155, "step": 198470 }, { "epoch": 8.22, "grad_norm": 2.546875, "learning_rate": 0.00041299795512777176, "loss": 0.2111, "step": 198480 }, { "epoch": 8.22, "grad_norm": 0.65234375, "learning_rate": 0.00041298973186572514, "loss": 0.228, "step": 198490 }, { "epoch": 8.22, "grad_norm": 0.5390625, "learning_rate": 0.00041298150829694943, "loss": 0.2085, "step": 198500 }, { "epoch": 8.22, "grad_norm": 0.3828125, "learning_rate": 0.0004129732844214601, "loss": 0.198, "step": 198510 }, { "epoch": 8.22, "grad_norm": 0.4375, "learning_rate": 0.0004129650602392727, "loss": 0.2399, "step": 198520 }, { "epoch": 8.22, "grad_norm": 0.60546875, "learning_rate": 0.00041295683575040255, "loss": 0.1776, "step": 198530 }, { "epoch": 8.22, "grad_norm": 0.9921875, "learning_rate": 0.00041294861095486537, "loss": 0.1805, "step": 198540 }, { "epoch": 8.22, "grad_norm": 1.6953125, "learning_rate": 0.00041294038585267635, "loss": 0.1847, "step": 198550 }, { "epoch": 8.22, "grad_norm": 0.7578125, "learning_rate": 0.0004129321604438512, "loss": 0.1674, "step": 198560 }, { "epoch": 8.22, "grad_norm": 0.5078125, "learning_rate": 0.0004129239347284054, "loss": 0.1828, "step": 198570 }, { "epoch": 8.23, "grad_norm": 0.765625, "learning_rate": 0.0004129157087063542, "loss": 0.2086, "step": 198580 }, { "epoch": 8.23, "grad_norm": 0.3671875, "learning_rate": 0.00041290748237771336, "loss": 0.2405, "step": 198590 }, { "epoch": 8.23, "grad_norm": 0.55859375, "learning_rate": 0.00041289925574249813, "loss": 0.2366, "step": 198600 }, { "epoch": 8.23, "grad_norm": 0.21875, "learning_rate": 0.00041289102880072415, "loss": 0.1652, "step": 198610 }, { "epoch": 8.23, "grad_norm": 0.8203125, "learning_rate": 0.0004128828015524068, "loss": 0.2126, "step": 198620 }, { "epoch": 8.23, "grad_norm": 0.57421875, "learning_rate": 0.00041287457399756164, "loss": 0.1877, "step": 198630 }, { "epoch": 8.23, "grad_norm": 0.2392578125, "learning_rate": 0.00041286634613620413, "loss": 0.1482, "step": 198640 }, { "epoch": 8.23, "grad_norm": 0.609375, "learning_rate": 0.0004128581179683497, "loss": 0.2298, "step": 198650 }, { "epoch": 8.23, "grad_norm": 0.294921875, "learning_rate": 0.00041284988949401394, "loss": 0.1587, "step": 198660 }, { "epoch": 8.23, "grad_norm": 0.7890625, "learning_rate": 0.0004128416607132123, "loss": 0.1894, "step": 198670 }, { "epoch": 8.23, "grad_norm": 0.9140625, "learning_rate": 0.0004128334316259601, "loss": 0.2061, "step": 198680 }, { "epoch": 8.23, "grad_norm": 0.4375, "learning_rate": 0.00041282520223227305, "loss": 0.1632, "step": 198690 }, { "epoch": 8.23, "grad_norm": 0.486328125, "learning_rate": 0.00041281697253216655, "loss": 0.1773, "step": 198700 }, { "epoch": 8.23, "grad_norm": 0.203125, "learning_rate": 0.0004128087425256561, "loss": 0.2757, "step": 198710 }, { "epoch": 8.23, "grad_norm": 0.61328125, "learning_rate": 0.0004128005122127572, "loss": 0.1916, "step": 198720 }, { "epoch": 8.23, "grad_norm": 0.427734375, "learning_rate": 0.0004127922815934853, "loss": 0.2007, "step": 198730 }, { "epoch": 8.23, "grad_norm": 0.322265625, "learning_rate": 0.00041278405066785584, "loss": 0.1704, "step": 198740 }, { "epoch": 8.23, "grad_norm": 0.49609375, "learning_rate": 0.0004127758194358844, "loss": 0.1741, "step": 198750 }, { "epoch": 8.23, "grad_norm": 0.349609375, "learning_rate": 0.0004127675878975865, "loss": 0.185, "step": 198760 }, { "epoch": 8.23, "grad_norm": 1.3125, "learning_rate": 0.00041275935605297753, "loss": 0.2239, "step": 198770 }, { "epoch": 8.23, "grad_norm": 0.703125, "learning_rate": 0.0004127511239020731, "loss": 0.1425, "step": 198780 }, { "epoch": 8.23, "grad_norm": 0.6015625, "learning_rate": 0.0004127428914448885, "loss": 0.1827, "step": 198790 }, { "epoch": 8.23, "grad_norm": 1.4296875, "learning_rate": 0.0004127346586814395, "loss": 0.2152, "step": 198800 }, { "epoch": 8.23, "grad_norm": 0.74609375, "learning_rate": 0.0004127264256117414, "loss": 0.1466, "step": 198810 }, { "epoch": 8.24, "grad_norm": 0.58203125, "learning_rate": 0.0004127181922358097, "loss": 0.1837, "step": 198820 }, { "epoch": 8.24, "grad_norm": 0.93359375, "learning_rate": 0.00041270995855366, "loss": 0.2121, "step": 198830 }, { "epoch": 8.24, "grad_norm": 0.5, "learning_rate": 0.0004127017245653076, "loss": 0.2029, "step": 198840 }, { "epoch": 8.24, "grad_norm": 0.859375, "learning_rate": 0.00041269349027076827, "loss": 0.1782, "step": 198850 }, { "epoch": 8.24, "grad_norm": 0.71875, "learning_rate": 0.00041268525567005727, "loss": 0.1546, "step": 198860 }, { "epoch": 8.24, "grad_norm": 0.73828125, "learning_rate": 0.00041267702076319023, "loss": 0.1922, "step": 198870 }, { "epoch": 8.24, "grad_norm": 0.9765625, "learning_rate": 0.0004126687855501826, "loss": 0.1955, "step": 198880 }, { "epoch": 8.24, "grad_norm": 0.734375, "learning_rate": 0.00041266055003104986, "loss": 0.2182, "step": 198890 }, { "epoch": 8.24, "grad_norm": 0.61328125, "learning_rate": 0.00041265231420580754, "loss": 0.213, "step": 198900 }, { "epoch": 8.24, "grad_norm": 0.58203125, "learning_rate": 0.00041264407807447114, "loss": 0.1965, "step": 198910 }, { "epoch": 8.24, "grad_norm": 0.95703125, "learning_rate": 0.0004126358416370561, "loss": 0.1662, "step": 198920 }, { "epoch": 8.24, "grad_norm": 0.68359375, "learning_rate": 0.00041262760489357804, "loss": 0.2244, "step": 198930 }, { "epoch": 8.24, "grad_norm": 0.48828125, "learning_rate": 0.00041261936784405234, "loss": 0.2062, "step": 198940 }, { "epoch": 8.24, "grad_norm": 1.28125, "learning_rate": 0.0004126111304884946, "loss": 0.179, "step": 198950 }, { "epoch": 8.24, "grad_norm": 1.90625, "learning_rate": 0.00041260289282692023, "loss": 0.2247, "step": 198960 }, { "epoch": 8.24, "grad_norm": 2.078125, "learning_rate": 0.00041259465485934477, "loss": 0.211, "step": 198970 }, { "epoch": 8.24, "grad_norm": 0.625, "learning_rate": 0.00041258641658578377, "loss": 0.2254, "step": 198980 }, { "epoch": 8.24, "grad_norm": 1.1484375, "learning_rate": 0.0004125781780062527, "loss": 0.2078, "step": 198990 }, { "epoch": 8.24, "grad_norm": 0.49609375, "learning_rate": 0.000412569939120767, "loss": 0.2526, "step": 199000 }, { "epoch": 8.24, "grad_norm": 0.8125, "learning_rate": 0.0004125616999293422, "loss": 0.1809, "step": 199010 }, { "epoch": 8.24, "grad_norm": 0.1611328125, "learning_rate": 0.00041255346043199393, "loss": 0.1997, "step": 199020 }, { "epoch": 8.24, "grad_norm": 0.365234375, "learning_rate": 0.00041254522062873756, "loss": 0.2236, "step": 199030 }, { "epoch": 8.24, "grad_norm": 0.73828125, "learning_rate": 0.00041253698051958867, "loss": 0.2162, "step": 199040 }, { "epoch": 8.24, "grad_norm": 0.494140625, "learning_rate": 0.00041252874010456266, "loss": 0.2078, "step": 199050 }, { "epoch": 8.25, "grad_norm": 0.765625, "learning_rate": 0.0004125204993836752, "loss": 0.1787, "step": 199060 }, { "epoch": 8.25, "grad_norm": 0.267578125, "learning_rate": 0.0004125122583569417, "loss": 0.1949, "step": 199070 }, { "epoch": 8.25, "grad_norm": 0.53125, "learning_rate": 0.00041250401702437763, "loss": 0.2412, "step": 199080 }, { "epoch": 8.25, "grad_norm": 1.09375, "learning_rate": 0.0004124957753859986, "loss": 0.2375, "step": 199090 }, { "epoch": 8.25, "grad_norm": 0.484375, "learning_rate": 0.00041248753344181997, "loss": 0.2136, "step": 199100 }, { "epoch": 8.25, "grad_norm": 0.93359375, "learning_rate": 0.00041247929119185746, "loss": 0.2469, "step": 199110 }, { "epoch": 8.25, "grad_norm": 0.62890625, "learning_rate": 0.0004124710486361264, "loss": 0.2394, "step": 199120 }, { "epoch": 8.25, "grad_norm": 0.6328125, "learning_rate": 0.0004124628057746424, "loss": 0.1535, "step": 199130 }, { "epoch": 8.25, "grad_norm": 0.73828125, "learning_rate": 0.00041245456260742097, "loss": 0.2225, "step": 199140 }, { "epoch": 8.25, "grad_norm": 0.83984375, "learning_rate": 0.0004124463191344776, "loss": 0.2271, "step": 199150 }, { "epoch": 8.25, "grad_norm": 1.3359375, "learning_rate": 0.00041243807535582775, "loss": 0.2227, "step": 199160 }, { "epoch": 8.25, "grad_norm": 1.0859375, "learning_rate": 0.000412429831271487, "loss": 0.2091, "step": 199170 }, { "epoch": 8.25, "grad_norm": 0.4609375, "learning_rate": 0.0004124215868814708, "loss": 0.2405, "step": 199180 }, { "epoch": 8.25, "grad_norm": 0.8046875, "learning_rate": 0.0004124133421857949, "loss": 0.17, "step": 199190 }, { "epoch": 8.25, "grad_norm": 1.21875, "learning_rate": 0.0004124050971844744, "loss": 0.1808, "step": 199200 }, { "epoch": 8.25, "grad_norm": 0.8046875, "learning_rate": 0.0004123968518775251, "loss": 0.1733, "step": 199210 }, { "epoch": 8.25, "grad_norm": 1.0234375, "learning_rate": 0.00041238860626496257, "loss": 0.168, "step": 199220 }, { "epoch": 8.25, "grad_norm": 0.98828125, "learning_rate": 0.00041238036034680215, "loss": 0.1868, "step": 199230 }, { "epoch": 8.25, "grad_norm": 0.73828125, "learning_rate": 0.0004123721141230594, "loss": 0.198, "step": 199240 }, { "epoch": 8.25, "grad_norm": 0.6640625, "learning_rate": 0.0004123638675937499, "loss": 0.2257, "step": 199250 }, { "epoch": 8.25, "grad_norm": 0.53125, "learning_rate": 0.0004123556207588891, "loss": 0.2154, "step": 199260 }, { "epoch": 8.25, "grad_norm": 1.046875, "learning_rate": 0.0004123473736184926, "loss": 0.2079, "step": 199270 }, { "epoch": 8.25, "grad_norm": 1.078125, "learning_rate": 0.0004123391261725759, "loss": 0.1716, "step": 199280 }, { "epoch": 8.25, "grad_norm": 1.3125, "learning_rate": 0.0004123308784211544, "loss": 0.2484, "step": 199290 }, { "epoch": 8.25, "grad_norm": 1.65625, "learning_rate": 0.0004123226303642438, "loss": 0.1817, "step": 199300 }, { "epoch": 8.26, "grad_norm": 0.51171875, "learning_rate": 0.0004123143820018595, "loss": 0.2284, "step": 199310 }, { "epoch": 8.26, "grad_norm": 0.578125, "learning_rate": 0.00041230613333401715, "loss": 0.176, "step": 199320 }, { "epoch": 8.26, "grad_norm": 0.671875, "learning_rate": 0.00041229788436073205, "loss": 0.1473, "step": 199330 }, { "epoch": 8.26, "grad_norm": 1.0859375, "learning_rate": 0.0004122896350820199, "loss": 0.2062, "step": 199340 }, { "epoch": 8.26, "grad_norm": 0.671875, "learning_rate": 0.0004122813854978962, "loss": 0.2039, "step": 199350 }, { "epoch": 8.26, "grad_norm": 0.53515625, "learning_rate": 0.00041227313560837647, "loss": 0.1959, "step": 199360 }, { "epoch": 8.26, "grad_norm": 1.078125, "learning_rate": 0.0004122648854134762, "loss": 0.1874, "step": 199370 }, { "epoch": 8.26, "grad_norm": 0.70703125, "learning_rate": 0.000412256634913211, "loss": 0.1976, "step": 199380 }, { "epoch": 8.26, "grad_norm": 1.1328125, "learning_rate": 0.00041224838410759625, "loss": 0.2159, "step": 199390 }, { "epoch": 8.26, "grad_norm": 0.62890625, "learning_rate": 0.0004122401329966476, "loss": 0.2167, "step": 199400 }, { "epoch": 8.26, "grad_norm": 0.2099609375, "learning_rate": 0.0004122318815803805, "loss": 0.1892, "step": 199410 }, { "epoch": 8.26, "grad_norm": 1.4921875, "learning_rate": 0.0004122236298588106, "loss": 0.1837, "step": 199420 }, { "epoch": 8.26, "grad_norm": 0.416015625, "learning_rate": 0.0004122153778319533, "loss": 0.2339, "step": 199430 }, { "epoch": 8.26, "grad_norm": 1.015625, "learning_rate": 0.0004122071254998241, "loss": 0.1512, "step": 199440 }, { "epoch": 8.26, "grad_norm": 1.7578125, "learning_rate": 0.00041219887286243875, "loss": 0.1794, "step": 199450 }, { "epoch": 8.26, "grad_norm": 0.98046875, "learning_rate": 0.00041219061991981253, "loss": 0.2144, "step": 199460 }, { "epoch": 8.26, "grad_norm": 0.80859375, "learning_rate": 0.00041218236667196116, "loss": 0.1907, "step": 199470 }, { "epoch": 8.26, "grad_norm": 0.453125, "learning_rate": 0.00041217411311890006, "loss": 0.1151, "step": 199480 }, { "epoch": 8.26, "grad_norm": 0.77734375, "learning_rate": 0.00041216585926064477, "loss": 0.1778, "step": 199490 }, { "epoch": 8.26, "grad_norm": 0.341796875, "learning_rate": 0.00041215760509721085, "loss": 0.2362, "step": 199500 }, { "epoch": 8.26, "grad_norm": 0.87109375, "learning_rate": 0.00041214935062861387, "loss": 0.2303, "step": 199510 }, { "epoch": 8.26, "grad_norm": 0.453125, "learning_rate": 0.00041214109585486927, "loss": 0.1397, "step": 199520 }, { "epoch": 8.26, "grad_norm": 2.390625, "learning_rate": 0.00041213284077599267, "loss": 0.2019, "step": 199530 }, { "epoch": 8.26, "grad_norm": 1.6015625, "learning_rate": 0.00041212458539199956, "loss": 0.2074, "step": 199540 }, { "epoch": 8.27, "grad_norm": 0.466796875, "learning_rate": 0.00041211632970290546, "loss": 0.1586, "step": 199550 }, { "epoch": 8.27, "grad_norm": 0.89453125, "learning_rate": 0.0004121080737087259, "loss": 0.2459, "step": 199560 }, { "epoch": 8.27, "grad_norm": 1.171875, "learning_rate": 0.00041209981740947656, "loss": 0.1822, "step": 199570 }, { "epoch": 8.27, "grad_norm": 0.8359375, "learning_rate": 0.00041209156080517287, "loss": 0.213, "step": 199580 }, { "epoch": 8.27, "grad_norm": 0.890625, "learning_rate": 0.00041208330389583026, "loss": 0.1909, "step": 199590 }, { "epoch": 8.27, "grad_norm": 0.69921875, "learning_rate": 0.00041207504668146444, "loss": 0.1896, "step": 199600 }, { "epoch": 8.27, "grad_norm": 0.984375, "learning_rate": 0.00041206678916209087, "loss": 0.2122, "step": 199610 }, { "epoch": 8.27, "grad_norm": 0.75, "learning_rate": 0.00041205853133772515, "loss": 0.2306, "step": 199620 }, { "epoch": 8.27, "grad_norm": 1.0703125, "learning_rate": 0.00041205027320838274, "loss": 0.212, "step": 199630 }, { "epoch": 8.27, "grad_norm": 0.3984375, "learning_rate": 0.0004120420147740792, "loss": 0.1722, "step": 199640 }, { "epoch": 8.27, "grad_norm": 0.33203125, "learning_rate": 0.0004120337560348301, "loss": 0.2193, "step": 199650 }, { "epoch": 8.27, "grad_norm": 1.15625, "learning_rate": 0.00041202549699065094, "loss": 0.2271, "step": 199660 }, { "epoch": 8.27, "grad_norm": 1.2109375, "learning_rate": 0.0004120172376415574, "loss": 0.2523, "step": 199670 }, { "epoch": 8.27, "grad_norm": 0.53515625, "learning_rate": 0.00041200897798756474, "loss": 0.193, "step": 199680 }, { "epoch": 8.27, "grad_norm": 0.53515625, "learning_rate": 0.00041200071802868885, "loss": 0.2045, "step": 199690 }, { "epoch": 8.27, "grad_norm": 1.3671875, "learning_rate": 0.00041199245776494506, "loss": 0.2101, "step": 199700 }, { "epoch": 8.27, "grad_norm": 0.5234375, "learning_rate": 0.0004119841971963489, "loss": 0.1997, "step": 199710 }, { "epoch": 8.27, "grad_norm": 0.8671875, "learning_rate": 0.000411975936322916, "loss": 0.1833, "step": 199720 }, { "epoch": 8.27, "grad_norm": 0.62890625, "learning_rate": 0.0004119676751446618, "loss": 0.2227, "step": 199730 }, { "epoch": 8.27, "grad_norm": 0.3671875, "learning_rate": 0.00041195941366160203, "loss": 0.1794, "step": 199740 }, { "epoch": 8.27, "grad_norm": 0.69921875, "learning_rate": 0.0004119511518737522, "loss": 0.1724, "step": 199750 }, { "epoch": 8.27, "grad_norm": 0.734375, "learning_rate": 0.0004119428897811276, "loss": 0.2469, "step": 199760 }, { "epoch": 8.27, "grad_norm": 1.4375, "learning_rate": 0.0004119346273837441, "loss": 0.181, "step": 199770 }, { "epoch": 8.27, "grad_norm": 1.078125, "learning_rate": 0.00041192636468161717, "loss": 0.1815, "step": 199780 }, { "epoch": 8.28, "grad_norm": 1.015625, "learning_rate": 0.0004119181016747622, "loss": 0.1672, "step": 199790 }, { "epoch": 8.28, "grad_norm": 0.75390625, "learning_rate": 0.00041190983836319485, "loss": 0.2223, "step": 199800 }, { "epoch": 8.28, "grad_norm": 0.88671875, "learning_rate": 0.0004119015747469307, "loss": 0.2215, "step": 199810 }, { "epoch": 8.28, "grad_norm": 0.80078125, "learning_rate": 0.0004118933108259853, "loss": 0.2045, "step": 199820 }, { "epoch": 8.28, "grad_norm": 0.890625, "learning_rate": 0.0004118850466003742, "loss": 0.1782, "step": 199830 }, { "epoch": 8.28, "grad_norm": 0.64453125, "learning_rate": 0.0004118767820701128, "loss": 0.2112, "step": 199840 }, { "epoch": 8.28, "grad_norm": 0.76953125, "learning_rate": 0.0004118685172352168, "loss": 0.2417, "step": 199850 }, { "epoch": 8.28, "grad_norm": 0.6328125, "learning_rate": 0.0004118602520957018, "loss": 0.1927, "step": 199860 }, { "epoch": 8.28, "grad_norm": 0.490234375, "learning_rate": 0.00041185198665158327, "loss": 0.2159, "step": 199870 }, { "epoch": 8.28, "grad_norm": 0.52734375, "learning_rate": 0.00041184372090287674, "loss": 0.1884, "step": 199880 }, { "epoch": 8.28, "grad_norm": 1.0703125, "learning_rate": 0.0004118354548495978, "loss": 0.178, "step": 199890 }, { "epoch": 8.28, "grad_norm": 0.640625, "learning_rate": 0.0004118271884917621, "loss": 0.1902, "step": 199900 }, { "epoch": 8.28, "grad_norm": 1.6171875, "learning_rate": 0.00041181892182938506, "loss": 0.2138, "step": 199910 }, { "epoch": 8.28, "grad_norm": 0.66796875, "learning_rate": 0.00041181065486248225, "loss": 0.2117, "step": 199920 }, { "epoch": 8.28, "grad_norm": 0.59765625, "learning_rate": 0.0004118023875910692, "loss": 0.2277, "step": 199930 }, { "epoch": 8.28, "grad_norm": 0.59765625, "learning_rate": 0.0004117941200151617, "loss": 0.2303, "step": 199940 }, { "epoch": 8.28, "grad_norm": 0.380859375, "learning_rate": 0.00041178585213477503, "loss": 0.209, "step": 199950 }, { "epoch": 8.28, "grad_norm": 1.4921875, "learning_rate": 0.0004117775839499249, "loss": 0.1865, "step": 199960 }, { "epoch": 8.28, "grad_norm": 1.15625, "learning_rate": 0.0004117693154606268, "loss": 0.169, "step": 199970 }, { "epoch": 8.28, "grad_norm": 0.494140625, "learning_rate": 0.00041176104666689633, "loss": 0.1928, "step": 199980 }, { "epoch": 8.28, "grad_norm": 1.03125, "learning_rate": 0.000411752777568749, "loss": 0.2174, "step": 199990 }, { "epoch": 8.28, "grad_norm": 1.0078125, "learning_rate": 0.00041174450816620044, "loss": 0.2326, "step": 200000 }, { "epoch": 8.28, "eval_runtime": 2823.9495, "eval_samples_per_second": 34.196, "eval_steps_per_second": 8.549, "step": 200000 }, { "epoch": 8.28, "grad_norm": 1.2421875, "learning_rate": 0.0004117362384592662, "loss": 0.2248, "step": 200010 }, { "epoch": 8.28, "grad_norm": 1.046875, "learning_rate": 0.0004117279684479618, "loss": 0.2123, "step": 200020 }, { "epoch": 8.29, "grad_norm": 0.59375, "learning_rate": 0.0004117196981323029, "loss": 0.1849, "step": 200030 }, { "epoch": 8.29, "grad_norm": 0.66796875, "learning_rate": 0.00041171142751230495, "loss": 0.1814, "step": 200040 }, { "epoch": 8.29, "grad_norm": 0.60546875, "learning_rate": 0.00041170315658798354, "loss": 0.1958, "step": 200050 }, { "epoch": 8.29, "grad_norm": 1.015625, "learning_rate": 0.0004116948853593542, "loss": 0.2149, "step": 200060 }, { "epoch": 8.29, "grad_norm": 0.6171875, "learning_rate": 0.0004116866138264326, "loss": 0.1799, "step": 200070 }, { "epoch": 8.29, "grad_norm": 0.5625, "learning_rate": 0.00041167834198923425, "loss": 0.1772, "step": 200080 }, { "epoch": 8.29, "grad_norm": 0.65625, "learning_rate": 0.0004116700698477748, "loss": 0.2188, "step": 200090 }, { "epoch": 8.29, "grad_norm": 0.71484375, "learning_rate": 0.00041166179740206963, "loss": 0.1964, "step": 200100 }, { "epoch": 8.29, "grad_norm": 0.2578125, "learning_rate": 0.00041165352465213445, "loss": 0.1631, "step": 200110 }, { "epoch": 8.29, "grad_norm": 0.30078125, "learning_rate": 0.0004116452515979848, "loss": 0.192, "step": 200120 }, { "epoch": 8.29, "grad_norm": 1.1953125, "learning_rate": 0.0004116369782396362, "loss": 0.2323, "step": 200130 }, { "epoch": 8.29, "grad_norm": 0.89453125, "learning_rate": 0.00041162870457710427, "loss": 0.2209, "step": 200140 }, { "epoch": 8.29, "grad_norm": 1.0625, "learning_rate": 0.0004116204306104046, "loss": 0.1911, "step": 200150 }, { "epoch": 8.29, "grad_norm": 0.578125, "learning_rate": 0.00041161215633955274, "loss": 0.2114, "step": 200160 }, { "epoch": 8.29, "grad_norm": 0.546875, "learning_rate": 0.0004116038817645643, "loss": 0.208, "step": 200170 }, { "epoch": 8.29, "grad_norm": 0.9453125, "learning_rate": 0.0004115956068854547, "loss": 0.1927, "step": 200180 }, { "epoch": 8.29, "grad_norm": 0.42578125, "learning_rate": 0.00041158733170223967, "loss": 0.1788, "step": 200190 }, { "epoch": 8.29, "grad_norm": 0.52734375, "learning_rate": 0.0004115790562149347, "loss": 0.2159, "step": 200200 }, { "epoch": 8.29, "grad_norm": 0.375, "learning_rate": 0.00041157078042355547, "loss": 0.1863, "step": 200210 }, { "epoch": 8.29, "grad_norm": 0.84375, "learning_rate": 0.0004115625043281174, "loss": 0.1751, "step": 200220 }, { "epoch": 8.29, "grad_norm": 0.953125, "learning_rate": 0.00041155422792863615, "loss": 0.1691, "step": 200230 }, { "epoch": 8.29, "grad_norm": 1.0078125, "learning_rate": 0.0004115459512251274, "loss": 0.2071, "step": 200240 }, { "epoch": 8.29, "grad_norm": 0.388671875, "learning_rate": 0.00041153767421760645, "loss": 0.2037, "step": 200250 }, { "epoch": 8.29, "grad_norm": 0.294921875, "learning_rate": 0.00041152939690608915, "loss": 0.2489, "step": 200260 }, { "epoch": 8.3, "grad_norm": 0.90234375, "learning_rate": 0.000411521119290591, "loss": 0.2125, "step": 200270 }, { "epoch": 8.3, "grad_norm": 0.921875, "learning_rate": 0.00041151284137112745, "loss": 0.18, "step": 200280 }, { "epoch": 8.3, "grad_norm": 0.5234375, "learning_rate": 0.0004115045631477142, "loss": 0.1459, "step": 200290 }, { "epoch": 8.3, "grad_norm": 1.0390625, "learning_rate": 0.0004114962846203668, "loss": 0.2174, "step": 200300 }, { "epoch": 8.3, "grad_norm": 0.2001953125, "learning_rate": 0.00041148800578910086, "loss": 0.1665, "step": 200310 }, { "epoch": 8.3, "grad_norm": 0.375, "learning_rate": 0.000411479726653932, "loss": 0.1825, "step": 200320 }, { "epoch": 8.3, "grad_norm": 1.1640625, "learning_rate": 0.0004114714472148756, "loss": 0.1434, "step": 200330 }, { "epoch": 8.3, "grad_norm": 0.51171875, "learning_rate": 0.00041146316747194743, "loss": 0.2116, "step": 200340 }, { "epoch": 8.3, "grad_norm": 0.4375, "learning_rate": 0.00041145488742516304, "loss": 0.1945, "step": 200350 }, { "epoch": 8.3, "grad_norm": 1.0390625, "learning_rate": 0.000411446607074538, "loss": 0.1842, "step": 200360 }, { "epoch": 8.3, "grad_norm": 1.109375, "learning_rate": 0.00041143832642008776, "loss": 0.1961, "step": 200370 }, { "epoch": 8.3, "grad_norm": 1.84375, "learning_rate": 0.00041143004546182816, "loss": 0.1792, "step": 200380 }, { "epoch": 8.3, "grad_norm": 0.890625, "learning_rate": 0.00041142176419977456, "loss": 0.1692, "step": 200390 }, { "epoch": 8.3, "grad_norm": 0.578125, "learning_rate": 0.0004114134826339427, "loss": 0.2225, "step": 200400 }, { "epoch": 8.3, "grad_norm": 0.703125, "learning_rate": 0.000411405200764348, "loss": 0.1816, "step": 200410 }, { "epoch": 8.3, "grad_norm": 1.0703125, "learning_rate": 0.00041139691859100623, "loss": 0.2137, "step": 200420 }, { "epoch": 8.3, "grad_norm": 0.77734375, "learning_rate": 0.00041138863611393285, "loss": 0.1942, "step": 200430 }, { "epoch": 8.3, "grad_norm": 1.5, "learning_rate": 0.0004113803533331435, "loss": 0.2117, "step": 200440 }, { "epoch": 8.3, "grad_norm": 0.0, "learning_rate": 0.00041137207024865375, "loss": 0.1776, "step": 200450 }, { "epoch": 8.3, "grad_norm": 1.796875, "learning_rate": 0.0004113637868604792, "loss": 0.1772, "step": 200460 }, { "epoch": 8.3, "grad_norm": 0.59375, "learning_rate": 0.00041135550316863547, "loss": 0.2087, "step": 200470 }, { "epoch": 8.3, "grad_norm": 0.62890625, "learning_rate": 0.000411347219173138, "loss": 0.179, "step": 200480 }, { "epoch": 8.3, "grad_norm": 0.74609375, "learning_rate": 0.0004113389348740025, "loss": 0.2193, "step": 200490 }, { "epoch": 8.3, "grad_norm": 0.392578125, "learning_rate": 0.0004113306502712447, "loss": 0.2371, "step": 200500 }, { "epoch": 8.31, "grad_norm": 0.99609375, "learning_rate": 0.0004113223653648799, "loss": 0.18, "step": 200510 }, { "epoch": 8.31, "grad_norm": 0.65234375, "learning_rate": 0.00041131408015492385, "loss": 0.1765, "step": 200520 }, { "epoch": 8.31, "grad_norm": 0.8203125, "learning_rate": 0.0004113057946413921, "loss": 0.2339, "step": 200530 }, { "epoch": 8.31, "grad_norm": 0.6328125, "learning_rate": 0.0004112975088243003, "loss": 0.1883, "step": 200540 }, { "epoch": 8.31, "grad_norm": 0.953125, "learning_rate": 0.000411289222703664, "loss": 0.232, "step": 200550 }, { "epoch": 8.31, "grad_norm": 0.58203125, "learning_rate": 0.0004112809362794988, "loss": 0.2102, "step": 200560 }, { "epoch": 8.31, "grad_norm": 0.5078125, "learning_rate": 0.00041127264955182033, "loss": 0.1833, "step": 200570 }, { "epoch": 8.31, "grad_norm": 0.55078125, "learning_rate": 0.00041126436252064414, "loss": 0.2298, "step": 200580 }, { "epoch": 8.31, "grad_norm": 0.359375, "learning_rate": 0.00041125607518598584, "loss": 0.1817, "step": 200590 }, { "epoch": 8.31, "grad_norm": 0.44140625, "learning_rate": 0.00041124778754786097, "loss": 0.198, "step": 200600 }, { "epoch": 8.31, "grad_norm": 0.55078125, "learning_rate": 0.00041123949960628515, "loss": 0.2107, "step": 200610 }, { "epoch": 8.31, "grad_norm": 0.62890625, "learning_rate": 0.00041123121136127404, "loss": 0.2169, "step": 200620 }, { "epoch": 8.31, "grad_norm": 0.58984375, "learning_rate": 0.0004112229228128432, "loss": 0.2149, "step": 200630 }, { "epoch": 8.31, "grad_norm": 0.69921875, "learning_rate": 0.00041121463396100833, "loss": 0.205, "step": 200640 }, { "epoch": 8.31, "grad_norm": 1.390625, "learning_rate": 0.0004112063448057848, "loss": 0.161, "step": 200650 }, { "epoch": 8.31, "grad_norm": 1.125, "learning_rate": 0.0004111980553471884, "loss": 0.221, "step": 200660 }, { "epoch": 8.31, "grad_norm": 1.015625, "learning_rate": 0.0004111897655852347, "loss": 0.2419, "step": 200670 }, { "epoch": 8.31, "grad_norm": 0.83984375, "learning_rate": 0.00041118147551993917, "loss": 0.2062, "step": 200680 }, { "epoch": 8.31, "grad_norm": 0.98828125, "learning_rate": 0.0004111731851513176, "loss": 0.2336, "step": 200690 }, { "epoch": 8.31, "grad_norm": 0.87109375, "learning_rate": 0.0004111648944793854, "loss": 0.2183, "step": 200700 }, { "epoch": 8.31, "grad_norm": 0.8828125, "learning_rate": 0.00041115660350415836, "loss": 0.1933, "step": 200710 }, { "epoch": 8.31, "grad_norm": 0.88671875, "learning_rate": 0.000411148312225652, "loss": 0.1838, "step": 200720 }, { "epoch": 8.31, "grad_norm": 0.6015625, "learning_rate": 0.0004111400206438818, "loss": 0.1783, "step": 200730 }, { "epoch": 8.31, "grad_norm": 1.1171875, "learning_rate": 0.0004111317287588636, "loss": 0.1483, "step": 200740 }, { "epoch": 8.32, "grad_norm": 1.515625, "learning_rate": 0.00041112343657061283, "loss": 0.2428, "step": 200750 }, { "epoch": 8.32, "grad_norm": 0.431640625, "learning_rate": 0.0004111151440791452, "loss": 0.2189, "step": 200760 }, { "epoch": 8.32, "grad_norm": 1.3359375, "learning_rate": 0.0004111068512844763, "loss": 0.1983, "step": 200770 }, { "epoch": 8.32, "grad_norm": 0.71875, "learning_rate": 0.00041109855818662164, "loss": 0.2128, "step": 200780 }, { "epoch": 8.32, "grad_norm": 1.0546875, "learning_rate": 0.00041109026478559685, "loss": 0.2075, "step": 200790 }, { "epoch": 8.32, "grad_norm": 0.69921875, "learning_rate": 0.00041108197108141763, "loss": 0.1907, "step": 200800 }, { "epoch": 8.32, "grad_norm": 0.341796875, "learning_rate": 0.00041107367707409956, "loss": 0.254, "step": 200810 }, { "epoch": 8.32, "grad_norm": 1.0546875, "learning_rate": 0.00041106538276365823, "loss": 0.1646, "step": 200820 }, { "epoch": 8.32, "grad_norm": 0.8359375, "learning_rate": 0.00041105708815010917, "loss": 0.1818, "step": 200830 }, { "epoch": 8.32, "grad_norm": 0.62109375, "learning_rate": 0.0004110487932334681, "loss": 0.2606, "step": 200840 }, { "epoch": 8.32, "grad_norm": 0.56640625, "learning_rate": 0.0004110404980137507, "loss": 0.2093, "step": 200850 }, { "epoch": 8.32, "grad_norm": 0.55859375, "learning_rate": 0.00041103220249097226, "loss": 0.1738, "step": 200860 }, { "epoch": 8.32, "grad_norm": 0.5546875, "learning_rate": 0.0004110239066651488, "loss": 0.2086, "step": 200870 }, { "epoch": 8.32, "grad_norm": 1.15625, "learning_rate": 0.0004110156105362956, "loss": 0.197, "step": 200880 }, { "epoch": 8.32, "grad_norm": 1.015625, "learning_rate": 0.0004110073141044284, "loss": 0.1854, "step": 200890 }, { "epoch": 8.32, "grad_norm": 0.828125, "learning_rate": 0.000410999017369563, "loss": 0.2029, "step": 200900 }, { "epoch": 8.32, "grad_norm": 0.48046875, "learning_rate": 0.00041099072033171465, "loss": 0.1389, "step": 200910 }, { "epoch": 8.32, "grad_norm": 0.9140625, "learning_rate": 0.0004109824229908992, "loss": 0.2365, "step": 200920 }, { "epoch": 8.32, "grad_norm": 0.6953125, "learning_rate": 0.0004109741253471323, "loss": 0.2112, "step": 200930 }, { "epoch": 8.32, "grad_norm": 0.62109375, "learning_rate": 0.0004109658274004294, "loss": 0.204, "step": 200940 }, { "epoch": 8.32, "grad_norm": 0.875, "learning_rate": 0.0004109575291508062, "loss": 0.1526, "step": 200950 }, { "epoch": 8.32, "grad_norm": 0.5703125, "learning_rate": 0.0004109492305982783, "loss": 0.1949, "step": 200960 }, { "epoch": 8.32, "grad_norm": 1.953125, "learning_rate": 0.00041094093174286135, "loss": 0.1599, "step": 200970 }, { "epoch": 8.32, "grad_norm": 0.7421875, "learning_rate": 0.0004109326325845709, "loss": 0.2214, "step": 200980 }, { "epoch": 8.32, "grad_norm": 0.6484375, "learning_rate": 0.0004109243331234227, "loss": 0.1737, "step": 200990 }, { "epoch": 8.33, "grad_norm": 0.4140625, "learning_rate": 0.0004109160333594323, "loss": 0.1984, "step": 201000 }, { "epoch": 8.33, "grad_norm": 1.53125, "learning_rate": 0.00041090773329261523, "loss": 0.2379, "step": 201010 }, { "epoch": 8.33, "grad_norm": 0.703125, "learning_rate": 0.00041089943292298717, "loss": 0.1832, "step": 201020 }, { "epoch": 8.33, "grad_norm": 0.55078125, "learning_rate": 0.0004108911322505638, "loss": 0.1462, "step": 201030 }, { "epoch": 8.33, "grad_norm": 0.92578125, "learning_rate": 0.0004108828312753606, "loss": 0.1905, "step": 201040 }, { "epoch": 8.33, "grad_norm": 0.49609375, "learning_rate": 0.0004108745299973934, "loss": 0.218, "step": 201050 }, { "epoch": 8.33, "grad_norm": 0.57421875, "learning_rate": 0.0004108662284166777, "loss": 0.1459, "step": 201060 }, { "epoch": 8.33, "grad_norm": 6.375, "learning_rate": 0.0004108579265332291, "loss": 0.2154, "step": 201070 }, { "epoch": 8.33, "grad_norm": 1.609375, "learning_rate": 0.0004108496243470632, "loss": 0.1896, "step": 201080 }, { "epoch": 8.33, "grad_norm": 0.61328125, "learning_rate": 0.0004108413218581958, "loss": 0.1971, "step": 201090 }, { "epoch": 8.33, "grad_norm": 0.6015625, "learning_rate": 0.00041083301906664235, "loss": 0.1903, "step": 201100 }, { "epoch": 8.33, "grad_norm": 0.80859375, "learning_rate": 0.0004108247159724185, "loss": 0.1844, "step": 201110 }, { "epoch": 8.33, "grad_norm": 0.8203125, "learning_rate": 0.00041081641257553986, "loss": 0.1493, "step": 201120 }, { "epoch": 8.33, "grad_norm": 1.2421875, "learning_rate": 0.00041080810887602216, "loss": 0.2177, "step": 201130 }, { "epoch": 8.33, "grad_norm": 0.5703125, "learning_rate": 0.00041079980487388097, "loss": 0.2208, "step": 201140 }, { "epoch": 8.33, "grad_norm": 0.58203125, "learning_rate": 0.0004107915005691319, "loss": 0.201, "step": 201150 }, { "epoch": 8.33, "grad_norm": 1.0859375, "learning_rate": 0.0004107831959617906, "loss": 0.2118, "step": 201160 }, { "epoch": 8.33, "grad_norm": 1.28125, "learning_rate": 0.0004107748910518727, "loss": 0.2059, "step": 201170 }, { "epoch": 8.33, "grad_norm": 0.55859375, "learning_rate": 0.0004107665858393937, "loss": 0.2337, "step": 201180 }, { "epoch": 8.33, "grad_norm": 1.03125, "learning_rate": 0.0004107582803243695, "loss": 0.267, "step": 201190 }, { "epoch": 8.33, "grad_norm": 0.06298828125, "learning_rate": 0.0004107499745068155, "loss": 0.1614, "step": 201200 }, { "epoch": 8.33, "grad_norm": 1.078125, "learning_rate": 0.00041074166838674743, "loss": 0.1992, "step": 201210 }, { "epoch": 8.33, "grad_norm": 1.0390625, "learning_rate": 0.0004107333619641809, "loss": 0.2226, "step": 201220 }, { "epoch": 8.33, "grad_norm": 0.302734375, "learning_rate": 0.0004107250552391315, "loss": 0.1834, "step": 201230 }, { "epoch": 8.34, "grad_norm": 0.69921875, "learning_rate": 0.000410716748211615, "loss": 0.227, "step": 201240 }, { "epoch": 8.34, "grad_norm": 1.1953125, "learning_rate": 0.0004107084408816468, "loss": 0.2018, "step": 201250 }, { "epoch": 8.34, "grad_norm": 1.0, "learning_rate": 0.00041070013324924276, "loss": 0.2087, "step": 201260 }, { "epoch": 8.34, "grad_norm": 1.140625, "learning_rate": 0.0004106918253144184, "loss": 0.2347, "step": 201270 }, { "epoch": 8.34, "grad_norm": 0.734375, "learning_rate": 0.00041068351707718935, "loss": 0.2243, "step": 201280 }, { "epoch": 8.34, "grad_norm": 1.0546875, "learning_rate": 0.0004106752085375713, "loss": 0.2044, "step": 201290 }, { "epoch": 8.34, "grad_norm": 1.390625, "learning_rate": 0.00041066689969557993, "loss": 0.1994, "step": 201300 }, { "epoch": 8.34, "grad_norm": 0.7734375, "learning_rate": 0.00041065859055123074, "loss": 0.193, "step": 201310 }, { "epoch": 8.34, "grad_norm": 0.59375, "learning_rate": 0.00041065028110453937, "loss": 0.16, "step": 201320 }, { "epoch": 8.34, "grad_norm": 0.9296875, "learning_rate": 0.0004106419713555216, "loss": 0.1794, "step": 201330 }, { "epoch": 8.34, "grad_norm": 0.72265625, "learning_rate": 0.000410633661304193, "loss": 0.2002, "step": 201340 }, { "epoch": 8.34, "grad_norm": 0.609375, "learning_rate": 0.0004106253509505692, "loss": 0.1816, "step": 201350 }, { "epoch": 8.34, "grad_norm": 0.8046875, "learning_rate": 0.0004106170402946657, "loss": 0.198, "step": 201360 }, { "epoch": 8.34, "grad_norm": 1.8203125, "learning_rate": 0.0004106087293364984, "loss": 0.2056, "step": 201370 }, { "epoch": 8.34, "grad_norm": 0.828125, "learning_rate": 0.0004106004180760828, "loss": 0.2227, "step": 201380 }, { "epoch": 8.34, "grad_norm": 0.92578125, "learning_rate": 0.0004105921065134346, "loss": 0.1828, "step": 201390 }, { "epoch": 8.34, "grad_norm": 1.5703125, "learning_rate": 0.00041058379464856934, "loss": 0.2158, "step": 201400 }, { "epoch": 8.34, "grad_norm": 1.125, "learning_rate": 0.00041057548248150266, "loss": 0.229, "step": 201410 }, { "epoch": 8.34, "grad_norm": 1.3828125, "learning_rate": 0.0004105671700122504, "loss": 0.1901, "step": 201420 }, { "epoch": 8.34, "grad_norm": 0.443359375, "learning_rate": 0.00041055885724082796, "loss": 0.1585, "step": 201430 }, { "epoch": 8.34, "grad_norm": 0.92578125, "learning_rate": 0.0004105505441672511, "loss": 0.2136, "step": 201440 }, { "epoch": 8.34, "grad_norm": 1.0546875, "learning_rate": 0.00041054223079153553, "loss": 0.2395, "step": 201450 }, { "epoch": 8.34, "grad_norm": 0.88671875, "learning_rate": 0.0004105339171136967, "loss": 0.1836, "step": 201460 }, { "epoch": 8.34, "grad_norm": 0.498046875, "learning_rate": 0.0004105256031337505, "loss": 0.1442, "step": 201470 }, { "epoch": 8.35, "grad_norm": 0.984375, "learning_rate": 0.00041051728885171235, "loss": 0.2529, "step": 201480 }, { "epoch": 8.35, "grad_norm": 0.6015625, "learning_rate": 0.00041050897426759804, "loss": 0.2237, "step": 201490 }, { "epoch": 8.35, "grad_norm": 0.78125, "learning_rate": 0.0004105006593814232, "loss": 0.184, "step": 201500 }, { "epoch": 8.35, "grad_norm": 1.0234375, "learning_rate": 0.0004104923441932034, "loss": 0.1606, "step": 201510 }, { "epoch": 8.35, "grad_norm": 0.486328125, "learning_rate": 0.00041048402870295437, "loss": 0.2106, "step": 201520 }, { "epoch": 8.35, "grad_norm": 1.0390625, "learning_rate": 0.00041047571291069173, "loss": 0.1885, "step": 201530 }, { "epoch": 8.35, "grad_norm": 0.419921875, "learning_rate": 0.0004104673968164311, "loss": 0.186, "step": 201540 }, { "epoch": 8.35, "grad_norm": 1.4453125, "learning_rate": 0.0004104590804201882, "loss": 0.1883, "step": 201550 }, { "epoch": 8.35, "grad_norm": 1.078125, "learning_rate": 0.00041045076372197865, "loss": 0.2219, "step": 201560 }, { "epoch": 8.35, "grad_norm": 0.71484375, "learning_rate": 0.00041044244672181797, "loss": 0.2281, "step": 201570 }, { "epoch": 8.35, "grad_norm": 0.8359375, "learning_rate": 0.00041043412941972206, "loss": 0.2257, "step": 201580 }, { "epoch": 8.35, "grad_norm": 0.515625, "learning_rate": 0.00041042581181570636, "loss": 0.2439, "step": 201590 }, { "epoch": 8.35, "grad_norm": 0.40625, "learning_rate": 0.00041041749390978666, "loss": 0.1851, "step": 201600 }, { "epoch": 8.35, "grad_norm": 0.6640625, "learning_rate": 0.00041040917570197855, "loss": 0.1996, "step": 201610 }, { "epoch": 8.35, "grad_norm": 0.447265625, "learning_rate": 0.0004104008571922977, "loss": 0.2257, "step": 201620 }, { "epoch": 8.35, "grad_norm": 1.0078125, "learning_rate": 0.0004103925383807597, "loss": 0.1967, "step": 201630 }, { "epoch": 8.35, "grad_norm": 0.275390625, "learning_rate": 0.0004103842192673804, "loss": 0.216, "step": 201640 }, { "epoch": 8.35, "grad_norm": 0.37109375, "learning_rate": 0.00041037589985217524, "loss": 0.1863, "step": 201650 }, { "epoch": 8.35, "grad_norm": 0.4375, "learning_rate": 0.00041036758013515993, "loss": 0.1626, "step": 201660 }, { "epoch": 8.35, "grad_norm": 0.447265625, "learning_rate": 0.0004103592601163502, "loss": 0.1887, "step": 201670 }, { "epoch": 8.35, "grad_norm": 1.8828125, "learning_rate": 0.0004103509397957616, "loss": 0.2099, "step": 201680 }, { "epoch": 8.35, "grad_norm": 0.8984375, "learning_rate": 0.00041034261917340987, "loss": 0.1801, "step": 201690 }, { "epoch": 8.35, "grad_norm": 1.65625, "learning_rate": 0.00041033429824931066, "loss": 0.198, "step": 201700 }, { "epoch": 8.35, "grad_norm": 0.462890625, "learning_rate": 0.0004103259770234796, "loss": 0.1924, "step": 201710 }, { "epoch": 8.36, "grad_norm": 0.53125, "learning_rate": 0.0004103176554959324, "loss": 0.2194, "step": 201720 }, { "epoch": 8.36, "grad_norm": 0.87109375, "learning_rate": 0.00041030933366668465, "loss": 0.1794, "step": 201730 }, { "epoch": 8.36, "grad_norm": 0.6953125, "learning_rate": 0.0004103010115357521, "loss": 0.1899, "step": 201740 }, { "epoch": 8.36, "grad_norm": 0.6328125, "learning_rate": 0.0004102926891031503, "loss": 0.1875, "step": 201750 }, { "epoch": 8.36, "grad_norm": 1.2578125, "learning_rate": 0.000410284366368895, "loss": 0.2, "step": 201760 }, { "epoch": 8.36, "grad_norm": 1.234375, "learning_rate": 0.0004102760433330018, "loss": 0.1696, "step": 201770 }, { "epoch": 8.36, "grad_norm": 0.6015625, "learning_rate": 0.0004102677199954864, "loss": 0.1628, "step": 201780 }, { "epoch": 8.36, "grad_norm": 1.21875, "learning_rate": 0.00041025939635636445, "loss": 0.127, "step": 201790 }, { "epoch": 8.36, "grad_norm": 3.015625, "learning_rate": 0.00041025107241565164, "loss": 0.2257, "step": 201800 }, { "epoch": 8.36, "grad_norm": 0.369140625, "learning_rate": 0.0004102427481733636, "loss": 0.1432, "step": 201810 }, { "epoch": 8.36, "grad_norm": 1.2265625, "learning_rate": 0.00041023442362951606, "loss": 0.1666, "step": 201820 }, { "epoch": 8.36, "grad_norm": 1.0390625, "learning_rate": 0.00041022609878412457, "loss": 0.1976, "step": 201830 }, { "epoch": 8.36, "grad_norm": 0.5625, "learning_rate": 0.0004102177736372049, "loss": 0.2176, "step": 201840 }, { "epoch": 8.36, "grad_norm": 0.34375, "learning_rate": 0.0004102094481887727, "loss": 0.1939, "step": 201850 }, { "epoch": 8.36, "grad_norm": 0.9453125, "learning_rate": 0.0004102011224388437, "loss": 0.1967, "step": 201860 }, { "epoch": 8.36, "grad_norm": 0.52734375, "learning_rate": 0.00041019279638743335, "loss": 0.2412, "step": 201870 }, { "epoch": 8.36, "grad_norm": 0.68359375, "learning_rate": 0.00041018447003455746, "loss": 0.2059, "step": 201880 }, { "epoch": 8.36, "grad_norm": 0.5625, "learning_rate": 0.00041017614338023173, "loss": 0.1988, "step": 201890 }, { "epoch": 8.36, "grad_norm": 0.5, "learning_rate": 0.00041016781642447183, "loss": 0.1762, "step": 201900 }, { "epoch": 8.36, "grad_norm": 0.953125, "learning_rate": 0.00041015948916729337, "loss": 0.2454, "step": 201910 }, { "epoch": 8.36, "grad_norm": 0.86328125, "learning_rate": 0.000410151161608712, "loss": 0.2442, "step": 201920 }, { "epoch": 8.36, "grad_norm": 0.8515625, "learning_rate": 0.0004101428337487435, "loss": 0.239, "step": 201930 }, { "epoch": 8.36, "grad_norm": 0.80859375, "learning_rate": 0.00041013450558740353, "loss": 0.1967, "step": 201940 }, { "epoch": 8.36, "grad_norm": 0.9296875, "learning_rate": 0.0004101261771247076, "loss": 0.2092, "step": 201950 }, { "epoch": 8.37, "grad_norm": 0.6171875, "learning_rate": 0.0004101178483606716, "loss": 0.2101, "step": 201960 }, { "epoch": 8.37, "grad_norm": 0.7578125, "learning_rate": 0.000410109519295311, "loss": 0.1802, "step": 201970 }, { "epoch": 8.37, "grad_norm": 0.84375, "learning_rate": 0.00041010118992864167, "loss": 0.1629, "step": 201980 }, { "epoch": 8.37, "grad_norm": 0.8671875, "learning_rate": 0.000410092860260679, "loss": 0.2049, "step": 201990 }, { "epoch": 8.37, "grad_norm": 0.8046875, "learning_rate": 0.0004100845302914391, "loss": 0.2, "step": 202000 }, { "epoch": 8.37, "grad_norm": 1.0, "learning_rate": 0.00041007620002093725, "loss": 0.2468, "step": 202010 }, { "epoch": 8.37, "grad_norm": 0.8515625, "learning_rate": 0.00041006786944918936, "loss": 0.1872, "step": 202020 }, { "epoch": 8.37, "grad_norm": 0.3984375, "learning_rate": 0.000410059538576211, "loss": 0.194, "step": 202030 }, { "epoch": 8.37, "grad_norm": 0.64453125, "learning_rate": 0.00041005120740201785, "loss": 0.2017, "step": 202040 }, { "epoch": 8.37, "grad_norm": 0.55859375, "learning_rate": 0.0004100428759266256, "loss": 0.2054, "step": 202050 }, { "epoch": 8.37, "grad_norm": 0.95703125, "learning_rate": 0.00041003454415005004, "loss": 0.2427, "step": 202060 }, { "epoch": 8.37, "grad_norm": 1.03125, "learning_rate": 0.0004100262120723066, "loss": 0.2035, "step": 202070 }, { "epoch": 8.37, "grad_norm": 0.48046875, "learning_rate": 0.0004100178796934112, "loss": 0.181, "step": 202080 }, { "epoch": 8.37, "grad_norm": 0.79296875, "learning_rate": 0.00041000954701337944, "loss": 0.2003, "step": 202090 }, { "epoch": 8.37, "grad_norm": 0.431640625, "learning_rate": 0.000410001214032227, "loss": 0.1906, "step": 202100 }, { "epoch": 8.37, "grad_norm": 0.447265625, "learning_rate": 0.00040999288074996957, "loss": 0.1274, "step": 202110 }, { "epoch": 8.37, "grad_norm": 0.5625, "learning_rate": 0.0004099845471666227, "loss": 0.218, "step": 202120 }, { "epoch": 8.37, "grad_norm": 1.140625, "learning_rate": 0.0004099762132822022, "loss": 0.2174, "step": 202130 }, { "epoch": 8.37, "grad_norm": 0.5625, "learning_rate": 0.0004099678790967238, "loss": 0.2185, "step": 202140 }, { "epoch": 8.37, "grad_norm": 0.58203125, "learning_rate": 0.0004099595446102032, "loss": 0.2398, "step": 202150 }, { "epoch": 8.37, "grad_norm": 0.9296875, "learning_rate": 0.0004099512098226559, "loss": 0.2306, "step": 202160 }, { "epoch": 8.37, "grad_norm": 0.94140625, "learning_rate": 0.00040994287473409776, "loss": 0.2077, "step": 202170 }, { "epoch": 8.37, "grad_norm": 0.384765625, "learning_rate": 0.0004099345393445444, "loss": 0.2202, "step": 202180 }, { "epoch": 8.37, "grad_norm": 1.0078125, "learning_rate": 0.0004099262036540115, "loss": 0.178, "step": 202190 }, { "epoch": 8.38, "grad_norm": 0.6640625, "learning_rate": 0.0004099178676625147, "loss": 0.2265, "step": 202200 }, { "epoch": 8.38, "grad_norm": 0.515625, "learning_rate": 0.0004099095313700698, "loss": 0.2003, "step": 202210 }, { "epoch": 8.38, "grad_norm": 0.283203125, "learning_rate": 0.0004099011947766924, "loss": 0.2064, "step": 202220 }, { "epoch": 8.38, "grad_norm": 0.796875, "learning_rate": 0.0004098928578823983, "loss": 0.1628, "step": 202230 }, { "epoch": 8.38, "grad_norm": 0.482421875, "learning_rate": 0.000409884520687203, "loss": 0.215, "step": 202240 }, { "epoch": 8.38, "grad_norm": 1.2890625, "learning_rate": 0.0004098761831911223, "loss": 0.1434, "step": 202250 }, { "epoch": 8.38, "grad_norm": 0.7109375, "learning_rate": 0.000409867845394172, "loss": 0.2183, "step": 202260 }, { "epoch": 8.38, "grad_norm": 0.5234375, "learning_rate": 0.00040985950729636756, "loss": 0.204, "step": 202270 }, { "epoch": 8.38, "grad_norm": 0.25, "learning_rate": 0.00040985116889772483, "loss": 0.2387, "step": 202280 }, { "epoch": 8.38, "grad_norm": 0.65234375, "learning_rate": 0.00040984283019825953, "loss": 0.1543, "step": 202290 }, { "epoch": 8.38, "grad_norm": 0.41796875, "learning_rate": 0.0004098344911979872, "loss": 0.2174, "step": 202300 }, { "epoch": 8.38, "grad_norm": 0.65234375, "learning_rate": 0.0004098261518969236, "loss": 0.1868, "step": 202310 }, { "epoch": 8.38, "grad_norm": 0.89453125, "learning_rate": 0.00040981781229508453, "loss": 0.1849, "step": 202320 }, { "epoch": 8.38, "grad_norm": 0.12109375, "learning_rate": 0.00040980947239248555, "loss": 0.2093, "step": 202330 }, { "epoch": 8.38, "grad_norm": 0.6015625, "learning_rate": 0.0004098011321891424, "loss": 0.1922, "step": 202340 }, { "epoch": 8.38, "grad_norm": 2.09375, "learning_rate": 0.00040979279168507077, "loss": 0.2488, "step": 202350 }, { "epoch": 8.38, "grad_norm": 2.015625, "learning_rate": 0.0004097844508802864, "loss": 0.1543, "step": 202360 }, { "epoch": 8.38, "grad_norm": 0.859375, "learning_rate": 0.0004097761097748049, "loss": 0.1955, "step": 202370 }, { "epoch": 8.38, "grad_norm": 1.0546875, "learning_rate": 0.00040976776836864205, "loss": 0.232, "step": 202380 }, { "epoch": 8.38, "grad_norm": 0.57421875, "learning_rate": 0.0004097594266618135, "loss": 0.179, "step": 202390 }, { "epoch": 8.38, "grad_norm": 0.96484375, "learning_rate": 0.00040975108465433495, "loss": 0.2141, "step": 202400 }, { "epoch": 8.38, "grad_norm": 0.46484375, "learning_rate": 0.0004097427423462221, "loss": 0.2293, "step": 202410 }, { "epoch": 8.38, "grad_norm": 0.8515625, "learning_rate": 0.0004097343997374907, "loss": 0.1951, "step": 202420 }, { "epoch": 8.38, "grad_norm": 0.5546875, "learning_rate": 0.0004097260568281564, "loss": 0.1638, "step": 202430 }, { "epoch": 8.39, "grad_norm": 0.48828125, "learning_rate": 0.0004097177136182349, "loss": 0.2237, "step": 202440 }, { "epoch": 8.39, "grad_norm": 0.2060546875, "learning_rate": 0.0004097093701077419, "loss": 0.1897, "step": 202450 }, { "epoch": 8.39, "grad_norm": 0.7265625, "learning_rate": 0.0004097010262966931, "loss": 0.2073, "step": 202460 }, { "epoch": 8.39, "grad_norm": 0.8359375, "learning_rate": 0.00040969268218510427, "loss": 0.2077, "step": 202470 }, { "epoch": 8.39, "grad_norm": 0.5703125, "learning_rate": 0.00040968433777299095, "loss": 0.2335, "step": 202480 }, { "epoch": 8.39, "grad_norm": 0.8046875, "learning_rate": 0.00040967599306036904, "loss": 0.2172, "step": 202490 }, { "epoch": 8.39, "grad_norm": 2.359375, "learning_rate": 0.0004096676480472542, "loss": 0.2022, "step": 202500 }, { "epoch": 8.39, "grad_norm": 1.046875, "learning_rate": 0.000409659302733662, "loss": 0.1854, "step": 202510 }, { "epoch": 8.39, "grad_norm": 3.40625, "learning_rate": 0.0004096509571196083, "loss": 0.2582, "step": 202520 }, { "epoch": 8.39, "grad_norm": 0.8046875, "learning_rate": 0.0004096426112051087, "loss": 0.2213, "step": 202530 }, { "epoch": 8.39, "grad_norm": 0.6953125, "learning_rate": 0.00040963426499017897, "loss": 0.1788, "step": 202540 }, { "epoch": 8.39, "grad_norm": 0.75390625, "learning_rate": 0.00040962591847483476, "loss": 0.2123, "step": 202550 }, { "epoch": 8.39, "grad_norm": 0.1875, "learning_rate": 0.0004096175716590918, "loss": 0.1871, "step": 202560 }, { "epoch": 8.39, "grad_norm": 0.69140625, "learning_rate": 0.00040960922454296583, "loss": 0.2457, "step": 202570 }, { "epoch": 8.39, "grad_norm": 0.7421875, "learning_rate": 0.0004096008771264726, "loss": 0.2171, "step": 202580 }, { "epoch": 8.39, "grad_norm": 1.1171875, "learning_rate": 0.00040959252940962765, "loss": 0.1832, "step": 202590 }, { "epoch": 8.39, "grad_norm": 0.81640625, "learning_rate": 0.0004095841813924468, "loss": 0.1756, "step": 202600 }, { "epoch": 8.39, "grad_norm": 0.59765625, "learning_rate": 0.00040957583307494585, "loss": 0.1496, "step": 202610 }, { "epoch": 8.39, "grad_norm": 0.625, "learning_rate": 0.0004095674844571403, "loss": 0.2468, "step": 202620 }, { "epoch": 8.39, "grad_norm": 0.703125, "learning_rate": 0.00040955913553904606, "loss": 0.1875, "step": 202630 }, { "epoch": 8.39, "grad_norm": 1.6171875, "learning_rate": 0.00040955078632067866, "loss": 0.1846, "step": 202640 }, { "epoch": 8.39, "grad_norm": 0.90625, "learning_rate": 0.00040954243680205404, "loss": 0.2357, "step": 202650 }, { "epoch": 8.39, "grad_norm": 1.703125, "learning_rate": 0.0004095340869831877, "loss": 0.2275, "step": 202660 }, { "epoch": 8.39, "grad_norm": 1.40625, "learning_rate": 0.00040952573686409545, "loss": 0.1582, "step": 202670 }, { "epoch": 8.39, "grad_norm": 0.296875, "learning_rate": 0.000409517386444793, "loss": 0.239, "step": 202680 }, { "epoch": 8.4, "grad_norm": 0.63671875, "learning_rate": 0.00040950903572529605, "loss": 0.1834, "step": 202690 }, { "epoch": 8.4, "grad_norm": 0.6953125, "learning_rate": 0.00040950068470562036, "loss": 0.2615, "step": 202700 }, { "epoch": 8.4, "grad_norm": 0.37890625, "learning_rate": 0.00040949233338578156, "loss": 0.1777, "step": 202710 }, { "epoch": 8.4, "grad_norm": 0.59765625, "learning_rate": 0.0004094839817657954, "loss": 0.195, "step": 202720 }, { "epoch": 8.4, "grad_norm": 1.8203125, "learning_rate": 0.00040947562984567764, "loss": 0.2365, "step": 202730 }, { "epoch": 8.4, "grad_norm": 0.9296875, "learning_rate": 0.000409467277625444, "loss": 0.2268, "step": 202740 }, { "epoch": 8.4, "grad_norm": 0.828125, "learning_rate": 0.00040945892510511005, "loss": 0.2409, "step": 202750 }, { "epoch": 8.4, "grad_norm": 0.5, "learning_rate": 0.00040945057228469175, "loss": 0.194, "step": 202760 }, { "epoch": 8.4, "grad_norm": 0.72265625, "learning_rate": 0.0004094422191642046, "loss": 0.2192, "step": 202770 }, { "epoch": 8.4, "grad_norm": 0.79296875, "learning_rate": 0.0004094338657436645, "loss": 0.1879, "step": 202780 }, { "epoch": 8.4, "grad_norm": 1.3984375, "learning_rate": 0.00040942551202308695, "loss": 0.2479, "step": 202790 }, { "epoch": 8.4, "grad_norm": 0.5625, "learning_rate": 0.0004094171580024879, "loss": 0.1949, "step": 202800 }, { "epoch": 8.4, "grad_norm": 1.2109375, "learning_rate": 0.0004094088036818829, "loss": 0.1884, "step": 202810 }, { "epoch": 8.4, "grad_norm": 0.39453125, "learning_rate": 0.0004094004490612879, "loss": 0.192, "step": 202820 }, { "epoch": 8.4, "grad_norm": 0.3359375, "learning_rate": 0.0004093920941407183, "loss": 0.1957, "step": 202830 }, { "epoch": 8.4, "grad_norm": 0.50390625, "learning_rate": 0.0004093837389201901, "loss": 0.1914, "step": 202840 }, { "epoch": 8.4, "grad_norm": 0.99609375, "learning_rate": 0.0004093753833997189, "loss": 0.2466, "step": 202850 }, { "epoch": 8.4, "grad_norm": 0.6484375, "learning_rate": 0.00040936702757932045, "loss": 0.2217, "step": 202860 }, { "epoch": 8.4, "grad_norm": 0.66796875, "learning_rate": 0.0004093586714590104, "loss": 0.1571, "step": 202870 }, { "epoch": 8.4, "grad_norm": 0.52734375, "learning_rate": 0.00040935031503880456, "loss": 0.1638, "step": 202880 }, { "epoch": 8.4, "grad_norm": 0.5, "learning_rate": 0.0004093419583187187, "loss": 0.1434, "step": 202890 }, { "epoch": 8.4, "grad_norm": 0.0, "learning_rate": 0.0004093336012987684, "loss": 0.1902, "step": 202900 }, { "epoch": 8.4, "grad_norm": 0.84375, "learning_rate": 0.00040932524397896945, "loss": 0.2566, "step": 202910 }, { "epoch": 8.4, "grad_norm": 1.1328125, "learning_rate": 0.0004093168863593377, "loss": 0.2317, "step": 202920 }, { "epoch": 8.41, "grad_norm": 0.703125, "learning_rate": 0.0004093085284398887, "loss": 0.1775, "step": 202930 }, { "epoch": 8.41, "grad_norm": 0.90625, "learning_rate": 0.00040930017022063823, "loss": 0.1792, "step": 202940 }, { "epoch": 8.41, "grad_norm": 0.93359375, "learning_rate": 0.00040929181170160214, "loss": 0.2558, "step": 202950 }, { "epoch": 8.41, "grad_norm": 0.60546875, "learning_rate": 0.000409283452882796, "loss": 0.2535, "step": 202960 }, { "epoch": 8.41, "grad_norm": 0.640625, "learning_rate": 0.0004092750937642356, "loss": 0.2085, "step": 202970 }, { "epoch": 8.41, "grad_norm": 0.9765625, "learning_rate": 0.00040926673434593663, "loss": 0.1932, "step": 202980 }, { "epoch": 8.41, "grad_norm": 0.890625, "learning_rate": 0.00040925837462791497, "loss": 0.2185, "step": 202990 }, { "epoch": 8.41, "grad_norm": 1.140625, "learning_rate": 0.00040925001461018616, "loss": 0.2045, "step": 203000 }, { "epoch": 8.41, "grad_norm": 0.220703125, "learning_rate": 0.0004092416542927661, "loss": 0.1938, "step": 203010 }, { "epoch": 8.41, "grad_norm": 0.234375, "learning_rate": 0.00040923329367567043, "loss": 0.2384, "step": 203020 }, { "epoch": 8.41, "grad_norm": 1.1328125, "learning_rate": 0.00040922493275891484, "loss": 0.2321, "step": 203030 }, { "epoch": 8.41, "grad_norm": 0.84765625, "learning_rate": 0.00040921657154251515, "loss": 0.2048, "step": 203040 }, { "epoch": 8.41, "grad_norm": 2.171875, "learning_rate": 0.0004092082100264871, "loss": 0.2429, "step": 203050 }, { "epoch": 8.41, "grad_norm": 1.03125, "learning_rate": 0.00040919984821084635, "loss": 0.1979, "step": 203060 }, { "epoch": 8.41, "grad_norm": 0.59765625, "learning_rate": 0.00040919148609560864, "loss": 0.2117, "step": 203070 }, { "epoch": 8.41, "grad_norm": 0.96875, "learning_rate": 0.0004091831236807898, "loss": 0.2234, "step": 203080 }, { "epoch": 8.41, "grad_norm": 0.39453125, "learning_rate": 0.0004091747609664055, "loss": 0.2055, "step": 203090 }, { "epoch": 8.41, "grad_norm": 0.765625, "learning_rate": 0.0004091663979524715, "loss": 0.1834, "step": 203100 }, { "epoch": 8.41, "grad_norm": 0.81640625, "learning_rate": 0.0004091580346390035, "loss": 0.1978, "step": 203110 }, { "epoch": 8.41, "grad_norm": 0.49609375, "learning_rate": 0.0004091496710260173, "loss": 0.1815, "step": 203120 }, { "epoch": 8.41, "grad_norm": 0.97265625, "learning_rate": 0.0004091413071135286, "loss": 0.2287, "step": 203130 }, { "epoch": 8.41, "grad_norm": 0.6328125, "learning_rate": 0.0004091329429015531, "loss": 0.2429, "step": 203140 }, { "epoch": 8.41, "grad_norm": 0.5078125, "learning_rate": 0.00040912457839010666, "loss": 0.219, "step": 203150 }, { "epoch": 8.41, "grad_norm": 2.453125, "learning_rate": 0.00040911621357920494, "loss": 0.2009, "step": 203160 }, { "epoch": 8.42, "grad_norm": 1.0703125, "learning_rate": 0.0004091078484688636, "loss": 0.1986, "step": 203170 }, { "epoch": 8.42, "grad_norm": 1.28125, "learning_rate": 0.00040909948305909853, "loss": 0.2185, "step": 203180 }, { "epoch": 8.42, "grad_norm": 0.26171875, "learning_rate": 0.00040909111734992545, "loss": 0.2011, "step": 203190 }, { "epoch": 8.42, "grad_norm": 0.88671875, "learning_rate": 0.00040908275134135996, "loss": 0.2394, "step": 203200 }, { "epoch": 8.42, "grad_norm": 0.427734375, "learning_rate": 0.00040907438503341796, "loss": 0.217, "step": 203210 }, { "epoch": 8.42, "grad_norm": 0.0, "learning_rate": 0.00040906601842611526, "loss": 0.1933, "step": 203220 }, { "epoch": 8.42, "grad_norm": 0.76953125, "learning_rate": 0.0004090576515194674, "loss": 0.2217, "step": 203230 }, { "epoch": 8.42, "grad_norm": 0.78125, "learning_rate": 0.00040904928431349016, "loss": 0.1638, "step": 203240 }, { "epoch": 8.42, "grad_norm": 1.3203125, "learning_rate": 0.00040904091680819945, "loss": 0.2147, "step": 203250 }, { "epoch": 8.42, "grad_norm": 1.0078125, "learning_rate": 0.00040903254900361084, "loss": 0.2449, "step": 203260 }, { "epoch": 8.42, "grad_norm": 0.73046875, "learning_rate": 0.00040902418089974014, "loss": 0.2372, "step": 203270 }, { "epoch": 8.42, "grad_norm": 0.375, "learning_rate": 0.00040901581249660313, "loss": 0.2397, "step": 203280 }, { "epoch": 8.42, "grad_norm": 0.2080078125, "learning_rate": 0.0004090074437942155, "loss": 0.1945, "step": 203290 }, { "epoch": 8.42, "grad_norm": 0.859375, "learning_rate": 0.00040899907479259313, "loss": 0.1606, "step": 203300 }, { "epoch": 8.42, "grad_norm": 0.2333984375, "learning_rate": 0.0004089907054917515, "loss": 0.2244, "step": 203310 }, { "epoch": 8.42, "grad_norm": 0.431640625, "learning_rate": 0.0004089823358917067, "loss": 0.2223, "step": 203320 }, { "epoch": 8.42, "grad_norm": 0.859375, "learning_rate": 0.00040897396599247425, "loss": 0.1991, "step": 203330 }, { "epoch": 8.42, "grad_norm": 0.90234375, "learning_rate": 0.00040896559579407, "loss": 0.1796, "step": 203340 }, { "epoch": 8.42, "grad_norm": 0.353515625, "learning_rate": 0.0004089572252965096, "loss": 0.2149, "step": 203350 }, { "epoch": 8.42, "grad_norm": 0.57421875, "learning_rate": 0.00040894885449980887, "loss": 0.2258, "step": 203360 }, { "epoch": 8.42, "grad_norm": 2.03125, "learning_rate": 0.0004089404834039836, "loss": 0.1786, "step": 203370 }, { "epoch": 8.42, "grad_norm": 0.65625, "learning_rate": 0.00040893211200904953, "loss": 0.25, "step": 203380 }, { "epoch": 8.42, "grad_norm": 0.2421875, "learning_rate": 0.00040892374031502234, "loss": 0.1839, "step": 203390 }, { "epoch": 8.42, "grad_norm": 0.859375, "learning_rate": 0.00040891536832191776, "loss": 0.1919, "step": 203400 }, { "epoch": 8.43, "grad_norm": 0.84375, "learning_rate": 0.0004089069960297518, "loss": 0.1899, "step": 203410 }, { "epoch": 8.43, "grad_norm": 0.7109375, "learning_rate": 0.00040889862343853993, "loss": 0.176, "step": 203420 }, { "epoch": 8.43, "grad_norm": 0.453125, "learning_rate": 0.000408890250548298, "loss": 0.1855, "step": 203430 }, { "epoch": 8.43, "grad_norm": 0.62890625, "learning_rate": 0.0004088818773590418, "loss": 0.1904, "step": 203440 }, { "epoch": 8.43, "grad_norm": 0.283203125, "learning_rate": 0.000408873503870787, "loss": 0.1711, "step": 203450 }, { "epoch": 8.43, "grad_norm": 0.68359375, "learning_rate": 0.0004088651300835495, "loss": 0.1681, "step": 203460 }, { "epoch": 8.43, "grad_norm": 0.279296875, "learning_rate": 0.000408856755997345, "loss": 0.2237, "step": 203470 }, { "epoch": 8.43, "grad_norm": 0.875, "learning_rate": 0.0004088483816121892, "loss": 0.1966, "step": 203480 }, { "epoch": 8.43, "grad_norm": 0.5546875, "learning_rate": 0.00040884000692809785, "loss": 0.1845, "step": 203490 }, { "epoch": 8.43, "grad_norm": 0.8828125, "learning_rate": 0.0004088316319450869, "loss": 0.1541, "step": 203500 }, { "epoch": 8.43, "grad_norm": 0.2373046875, "learning_rate": 0.0004088232566631719, "loss": 0.1568, "step": 203510 }, { "epoch": 8.43, "grad_norm": 0.9609375, "learning_rate": 0.00040881488108236865, "loss": 0.204, "step": 203520 }, { "epoch": 8.43, "grad_norm": 0.45703125, "learning_rate": 0.0004088065052026929, "loss": 0.2013, "step": 203530 }, { "epoch": 8.43, "grad_norm": 0.76953125, "learning_rate": 0.0004087981290241606, "loss": 0.1787, "step": 203540 }, { "epoch": 8.43, "grad_norm": 0.95703125, "learning_rate": 0.00040878975254678726, "loss": 0.1802, "step": 203550 }, { "epoch": 8.43, "grad_norm": 0.6953125, "learning_rate": 0.00040878137577058873, "loss": 0.1897, "step": 203560 }, { "epoch": 8.43, "grad_norm": 1.234375, "learning_rate": 0.00040877299869558083, "loss": 0.1685, "step": 203570 }, { "epoch": 8.43, "grad_norm": 0.63671875, "learning_rate": 0.0004087646213217794, "loss": 0.2523, "step": 203580 }, { "epoch": 8.43, "grad_norm": 0.94140625, "learning_rate": 0.00040875624364919997, "loss": 0.2163, "step": 203590 }, { "epoch": 8.43, "grad_norm": 0.72265625, "learning_rate": 0.0004087478656778585, "loss": 0.2203, "step": 203600 }, { "epoch": 8.43, "grad_norm": 0.70703125, "learning_rate": 0.00040873948740777064, "loss": 0.2456, "step": 203610 }, { "epoch": 8.43, "grad_norm": 1.0625, "learning_rate": 0.0004087311088389522, "loss": 0.2333, "step": 203620 }, { "epoch": 8.43, "grad_norm": 0.47265625, "learning_rate": 0.000408722729971419, "loss": 0.1624, "step": 203630 }, { "epoch": 8.43, "grad_norm": 0.65234375, "learning_rate": 0.00040871435080518673, "loss": 0.1795, "step": 203640 }, { "epoch": 8.44, "grad_norm": 0.439453125, "learning_rate": 0.0004087059713402712, "loss": 0.2101, "step": 203650 }, { "epoch": 8.44, "grad_norm": 0.38671875, "learning_rate": 0.00040869759157668816, "loss": 0.1574, "step": 203660 }, { "epoch": 8.44, "grad_norm": 0.58984375, "learning_rate": 0.0004086892115144534, "loss": 0.1993, "step": 203670 }, { "epoch": 8.44, "grad_norm": 0.52734375, "learning_rate": 0.0004086808311535827, "loss": 0.1744, "step": 203680 }, { "epoch": 8.44, "grad_norm": 0.1787109375, "learning_rate": 0.00040867245049409185, "loss": 0.1903, "step": 203690 }, { "epoch": 8.44, "grad_norm": 1.7578125, "learning_rate": 0.0004086640695359966, "loss": 0.1563, "step": 203700 }, { "epoch": 8.44, "grad_norm": 0.359375, "learning_rate": 0.00040865568827931257, "loss": 0.2022, "step": 203710 }, { "epoch": 8.44, "grad_norm": 0.68359375, "learning_rate": 0.0004086473067240557, "loss": 0.2382, "step": 203720 }, { "epoch": 8.44, "grad_norm": 0.4453125, "learning_rate": 0.00040863892487024177, "loss": 0.187, "step": 203730 }, { "epoch": 8.44, "grad_norm": 0.8203125, "learning_rate": 0.0004086305427178865, "loss": 0.2151, "step": 203740 }, { "epoch": 8.44, "grad_norm": 0.15625, "learning_rate": 0.0004086221602670057, "loss": 0.1478, "step": 203750 }, { "epoch": 8.44, "grad_norm": 0.58984375, "learning_rate": 0.0004086137775176152, "loss": 0.215, "step": 203760 }, { "epoch": 8.44, "grad_norm": 0.447265625, "learning_rate": 0.00040860539446973057, "loss": 0.1783, "step": 203770 }, { "epoch": 8.44, "grad_norm": 0.330078125, "learning_rate": 0.00040859701112336776, "loss": 0.1232, "step": 203780 }, { "epoch": 8.44, "grad_norm": 0.98828125, "learning_rate": 0.00040858862747854247, "loss": 0.2426, "step": 203790 }, { "epoch": 8.44, "grad_norm": 0.6015625, "learning_rate": 0.0004085802435352706, "loss": 0.1875, "step": 203800 }, { "epoch": 8.44, "grad_norm": 0.734375, "learning_rate": 0.00040857185929356777, "loss": 0.1959, "step": 203810 }, { "epoch": 8.44, "grad_norm": 0.28515625, "learning_rate": 0.0004085634747534498, "loss": 0.1887, "step": 203820 }, { "epoch": 8.44, "grad_norm": 1.1328125, "learning_rate": 0.00040855508991493255, "loss": 0.2158, "step": 203830 }, { "epoch": 8.44, "grad_norm": 0.72265625, "learning_rate": 0.00040854670477803175, "loss": 0.226, "step": 203840 }, { "epoch": 8.44, "grad_norm": 0.828125, "learning_rate": 0.0004085383193427631, "loss": 0.2122, "step": 203850 }, { "epoch": 8.44, "grad_norm": 0.80859375, "learning_rate": 0.00040852993360914256, "loss": 0.2243, "step": 203860 }, { "epoch": 8.44, "grad_norm": 0.71875, "learning_rate": 0.0004085215475771857, "loss": 0.1797, "step": 203870 }, { "epoch": 8.44, "grad_norm": 0.63671875, "learning_rate": 0.00040851316124690845, "loss": 0.2077, "step": 203880 }, { "epoch": 8.45, "grad_norm": 0.4921875, "learning_rate": 0.0004085047746183266, "loss": 0.1763, "step": 203890 }, { "epoch": 8.45, "grad_norm": 0.33984375, "learning_rate": 0.0004084963876914558, "loss": 0.1864, "step": 203900 }, { "epoch": 8.45, "grad_norm": 0.86328125, "learning_rate": 0.000408488000466312, "loss": 0.224, "step": 203910 }, { "epoch": 8.45, "grad_norm": 0.51171875, "learning_rate": 0.0004084796129429108, "loss": 0.1597, "step": 203920 }, { "epoch": 8.45, "grad_norm": 0.90625, "learning_rate": 0.00040847122512126813, "loss": 0.1803, "step": 203930 }, { "epoch": 8.45, "grad_norm": 0.6953125, "learning_rate": 0.00040846283700139973, "loss": 0.1785, "step": 203940 }, { "epoch": 8.45, "grad_norm": 0.54296875, "learning_rate": 0.00040845444858332136, "loss": 0.1785, "step": 203950 }, { "epoch": 8.45, "grad_norm": 0.96875, "learning_rate": 0.00040844605986704884, "loss": 0.196, "step": 203960 }, { "epoch": 8.45, "grad_norm": 0.59375, "learning_rate": 0.00040843767085259797, "loss": 0.2205, "step": 203970 }, { "epoch": 8.45, "grad_norm": 0.30859375, "learning_rate": 0.0004084292815399845, "loss": 0.2097, "step": 203980 }, { "epoch": 8.45, "grad_norm": 0.72265625, "learning_rate": 0.0004084208919292242, "loss": 0.232, "step": 203990 }, { "epoch": 8.45, "grad_norm": 1.1796875, "learning_rate": 0.0004084125020203329, "loss": 0.2275, "step": 204000 }, { "epoch": 8.45, "grad_norm": 0.55859375, "learning_rate": 0.0004084041118133264, "loss": 0.1671, "step": 204010 }, { "epoch": 8.45, "grad_norm": 0.94921875, "learning_rate": 0.00040839572130822044, "loss": 0.2418, "step": 204020 }, { "epoch": 8.45, "grad_norm": 0.640625, "learning_rate": 0.00040838733050503086, "loss": 0.1233, "step": 204030 }, { "epoch": 8.45, "grad_norm": 0.1728515625, "learning_rate": 0.00040837893940377345, "loss": 0.2118, "step": 204040 }, { "epoch": 8.45, "grad_norm": 0.91015625, "learning_rate": 0.00040837054800446393, "loss": 0.2699, "step": 204050 }, { "epoch": 8.45, "grad_norm": 0.73046875, "learning_rate": 0.0004083621563071182, "loss": 0.1698, "step": 204060 }, { "epoch": 8.45, "grad_norm": 0.70703125, "learning_rate": 0.000408353764311752, "loss": 0.2238, "step": 204070 }, { "epoch": 8.45, "grad_norm": 0.443359375, "learning_rate": 0.000408345372018381, "loss": 0.1949, "step": 204080 }, { "epoch": 8.45, "grad_norm": 1.53125, "learning_rate": 0.00040833697942702123, "loss": 0.2558, "step": 204090 }, { "epoch": 8.45, "grad_norm": 1.109375, "learning_rate": 0.00040832858653768833, "loss": 0.1914, "step": 204100 }, { "epoch": 8.45, "grad_norm": 1.2109375, "learning_rate": 0.00040832019335039813, "loss": 0.2002, "step": 204110 }, { "epoch": 8.45, "grad_norm": 0.6875, "learning_rate": 0.0004083117998651664, "loss": 0.2278, "step": 204120 }, { "epoch": 8.46, "grad_norm": 0.58984375, "learning_rate": 0.000408303406082009, "loss": 0.1614, "step": 204130 }, { "epoch": 8.46, "grad_norm": 0.87109375, "learning_rate": 0.00040829501200094167, "loss": 0.209, "step": 204140 }, { "epoch": 8.46, "grad_norm": 0.77734375, "learning_rate": 0.00040828661762198014, "loss": 0.255, "step": 204150 }, { "epoch": 8.46, "grad_norm": 0.7578125, "learning_rate": 0.0004082782229451404, "loss": 0.2339, "step": 204160 }, { "epoch": 8.46, "grad_norm": 0.78125, "learning_rate": 0.0004082698279704381, "loss": 0.211, "step": 204170 }, { "epoch": 8.46, "grad_norm": 0.59375, "learning_rate": 0.00040826143269788906, "loss": 0.1908, "step": 204180 }, { "epoch": 8.46, "grad_norm": 0.50390625, "learning_rate": 0.00040825303712750904, "loss": 0.2606, "step": 204190 }, { "epoch": 8.46, "grad_norm": 0.8203125, "learning_rate": 0.00040824464125931403, "loss": 0.252, "step": 204200 }, { "epoch": 8.46, "grad_norm": 0.67578125, "learning_rate": 0.0004082362450933196, "loss": 0.2241, "step": 204210 }, { "epoch": 8.46, "grad_norm": 0.6875, "learning_rate": 0.00040822784862954163, "loss": 0.2163, "step": 204220 }, { "epoch": 8.46, "grad_norm": 1.3515625, "learning_rate": 0.000408219451867996, "loss": 0.1842, "step": 204230 }, { "epoch": 8.46, "grad_norm": 0.69140625, "learning_rate": 0.0004082110548086984, "loss": 0.2119, "step": 204240 }, { "epoch": 8.46, "grad_norm": 0.267578125, "learning_rate": 0.00040820265745166476, "loss": 0.16, "step": 204250 }, { "epoch": 8.46, "grad_norm": 0.671875, "learning_rate": 0.0004081942597969107, "loss": 0.1749, "step": 204260 }, { "epoch": 8.46, "grad_norm": 0.69921875, "learning_rate": 0.0004081858618444522, "loss": 0.2266, "step": 204270 }, { "epoch": 8.46, "grad_norm": 0.47265625, "learning_rate": 0.00040817746359430495, "loss": 0.2103, "step": 204280 }, { "epoch": 8.46, "grad_norm": 0.71484375, "learning_rate": 0.0004081690650464848, "loss": 0.2027, "step": 204290 }, { "epoch": 8.46, "grad_norm": 1.2109375, "learning_rate": 0.0004081606662010076, "loss": 0.2339, "step": 204300 }, { "epoch": 8.46, "grad_norm": 0.84375, "learning_rate": 0.00040815226705788905, "loss": 0.1602, "step": 204310 }, { "epoch": 8.46, "grad_norm": 0.81640625, "learning_rate": 0.000408143867617145, "loss": 0.1857, "step": 204320 }, { "epoch": 8.46, "grad_norm": 0.46484375, "learning_rate": 0.0004081354678787913, "loss": 0.1984, "step": 204330 }, { "epoch": 8.46, "grad_norm": 1.046875, "learning_rate": 0.00040812706784284375, "loss": 0.1931, "step": 204340 }, { "epoch": 8.46, "grad_norm": 0.73828125, "learning_rate": 0.00040811866750931807, "loss": 0.1444, "step": 204350 }, { "epoch": 8.46, "grad_norm": 0.462890625, "learning_rate": 0.0004081102668782302, "loss": 0.2292, "step": 204360 }, { "epoch": 8.46, "grad_norm": 1.0078125, "learning_rate": 0.00040810186594959584, "loss": 0.2067, "step": 204370 }, { "epoch": 8.47, "grad_norm": 1.0, "learning_rate": 0.0004080934647234309, "loss": 0.2024, "step": 204380 }, { "epoch": 8.47, "grad_norm": 0.482421875, "learning_rate": 0.000408085063199751, "loss": 0.1898, "step": 204390 }, { "epoch": 8.47, "grad_norm": 0.35546875, "learning_rate": 0.0004080766613785722, "loss": 0.212, "step": 204400 }, { "epoch": 8.47, "grad_norm": 0.58984375, "learning_rate": 0.0004080682592599102, "loss": 0.2086, "step": 204410 }, { "epoch": 8.47, "grad_norm": 2.125, "learning_rate": 0.0004080598568437807, "loss": 0.2284, "step": 204420 }, { "epoch": 8.47, "grad_norm": 1.40625, "learning_rate": 0.00040805145413019974, "loss": 0.1959, "step": 204430 }, { "epoch": 8.47, "grad_norm": 0.9609375, "learning_rate": 0.00040804305111918297, "loss": 0.2101, "step": 204440 }, { "epoch": 8.47, "grad_norm": 0.625, "learning_rate": 0.00040803464781074617, "loss": 0.2232, "step": 204450 }, { "epoch": 8.47, "grad_norm": 0.2099609375, "learning_rate": 0.0004080262442049053, "loss": 0.1812, "step": 204460 }, { "epoch": 8.47, "grad_norm": 0.734375, "learning_rate": 0.00040801784030167616, "loss": 0.1837, "step": 204470 }, { "epoch": 8.47, "grad_norm": 0.498046875, "learning_rate": 0.00040800943610107444, "loss": 0.1712, "step": 204480 }, { "epoch": 8.47, "grad_norm": 0.84765625, "learning_rate": 0.00040800103160311597, "loss": 0.1835, "step": 204490 }, { "epoch": 8.47, "grad_norm": 1.390625, "learning_rate": 0.0004079926268078167, "loss": 0.2221, "step": 204500 }, { "epoch": 8.47, "grad_norm": 0.703125, "learning_rate": 0.00040798422171519234, "loss": 0.2037, "step": 204510 }, { "epoch": 8.47, "grad_norm": 0.154296875, "learning_rate": 0.00040797581632525884, "loss": 0.1771, "step": 204520 }, { "epoch": 8.47, "grad_norm": 0.462890625, "learning_rate": 0.0004079674106380318, "loss": 0.2199, "step": 204530 }, { "epoch": 8.47, "grad_norm": 1.7109375, "learning_rate": 0.0004079590046535271, "loss": 0.1935, "step": 204540 }, { "epoch": 8.47, "grad_norm": 1.5625, "learning_rate": 0.0004079505983717607, "loss": 0.2136, "step": 204550 }, { "epoch": 8.47, "grad_norm": 0.54296875, "learning_rate": 0.0004079421917927483, "loss": 0.226, "step": 204560 }, { "epoch": 8.47, "grad_norm": 1.234375, "learning_rate": 0.0004079337849165058, "loss": 0.2091, "step": 204570 }, { "epoch": 8.47, "grad_norm": 0.8046875, "learning_rate": 0.0004079253777430489, "loss": 0.213, "step": 204580 }, { "epoch": 8.47, "grad_norm": 0.4375, "learning_rate": 0.00040791697027239355, "loss": 0.1853, "step": 204590 }, { "epoch": 8.47, "grad_norm": 0.9765625, "learning_rate": 0.0004079085625045555, "loss": 0.1762, "step": 204600 }, { "epoch": 8.47, "grad_norm": 0.75, "learning_rate": 0.00040790015443955055, "loss": 0.2464, "step": 204610 }, { "epoch": 8.48, "grad_norm": 0.609375, "learning_rate": 0.00040789174607739465, "loss": 0.2061, "step": 204620 }, { "epoch": 8.48, "grad_norm": 0.2373046875, "learning_rate": 0.00040788333741810344, "loss": 0.2096, "step": 204630 }, { "epoch": 8.48, "grad_norm": 0.388671875, "learning_rate": 0.0004078749284616929, "loss": 0.1898, "step": 204640 }, { "epoch": 8.48, "grad_norm": 1.3203125, "learning_rate": 0.00040786651920817873, "loss": 0.2097, "step": 204650 }, { "epoch": 8.48, "grad_norm": 0.79296875, "learning_rate": 0.00040785810965757684, "loss": 0.1899, "step": 204660 }, { "epoch": 8.48, "grad_norm": 0.4921875, "learning_rate": 0.0004078496998099031, "loss": 0.2398, "step": 204670 }, { "epoch": 8.48, "grad_norm": 0.53125, "learning_rate": 0.00040784128966517324, "loss": 0.2076, "step": 204680 }, { "epoch": 8.48, "grad_norm": 0.44140625, "learning_rate": 0.0004078328792234031, "loss": 0.1617, "step": 204690 }, { "epoch": 8.48, "grad_norm": 1.109375, "learning_rate": 0.0004078244684846085, "loss": 0.2133, "step": 204700 }, { "epoch": 8.48, "grad_norm": 1.03125, "learning_rate": 0.00040781605744880534, "loss": 0.2223, "step": 204710 }, { "epoch": 8.48, "grad_norm": 1.6328125, "learning_rate": 0.0004078076461160094, "loss": 0.1931, "step": 204720 }, { "epoch": 8.48, "grad_norm": 0.8984375, "learning_rate": 0.00040779923448623645, "loss": 0.2206, "step": 204730 }, { "epoch": 8.48, "grad_norm": 0.69921875, "learning_rate": 0.0004077908225595025, "loss": 0.1972, "step": 204740 }, { "epoch": 8.48, "grad_norm": 0.8984375, "learning_rate": 0.0004077824103358232, "loss": 0.2476, "step": 204750 }, { "epoch": 8.48, "grad_norm": 0.6484375, "learning_rate": 0.0004077739978152144, "loss": 0.2178, "step": 204760 }, { "epoch": 8.48, "grad_norm": 0.578125, "learning_rate": 0.000407765584997692, "loss": 0.2035, "step": 204770 }, { "epoch": 8.48, "grad_norm": 0.78515625, "learning_rate": 0.0004077571718832719, "loss": 0.2245, "step": 204780 }, { "epoch": 8.48, "grad_norm": 0.29296875, "learning_rate": 0.0004077487584719697, "loss": 0.2207, "step": 204790 }, { "epoch": 8.48, "grad_norm": 0.75390625, "learning_rate": 0.0004077403447638014, "loss": 0.194, "step": 204800 }, { "epoch": 8.48, "grad_norm": 0.3203125, "learning_rate": 0.00040773193075878286, "loss": 0.1664, "step": 204810 }, { "epoch": 8.48, "grad_norm": 0.63671875, "learning_rate": 0.0004077235164569298, "loss": 0.2129, "step": 204820 }, { "epoch": 8.48, "grad_norm": 1.0625, "learning_rate": 0.00040771510185825816, "loss": 0.2096, "step": 204830 }, { "epoch": 8.48, "grad_norm": 0.80078125, "learning_rate": 0.00040770668696278367, "loss": 0.1797, "step": 204840 }, { "epoch": 8.48, "grad_norm": 0.5625, "learning_rate": 0.00040769827177052233, "loss": 0.1881, "step": 204850 }, { "epoch": 8.49, "grad_norm": 0.478515625, "learning_rate": 0.0004076898562814898, "loss": 0.2197, "step": 204860 }, { "epoch": 8.49, "grad_norm": 0.609375, "learning_rate": 0.000407681440495702, "loss": 0.1639, "step": 204870 }, { "epoch": 8.49, "grad_norm": 0.41796875, "learning_rate": 0.00040767302441317477, "loss": 0.2054, "step": 204880 }, { "epoch": 8.49, "grad_norm": 0.8359375, "learning_rate": 0.0004076646080339239, "loss": 0.2326, "step": 204890 }, { "epoch": 8.49, "grad_norm": 0.5546875, "learning_rate": 0.0004076561913579653, "loss": 0.2086, "step": 204900 }, { "epoch": 8.49, "grad_norm": 0.27734375, "learning_rate": 0.00040764777438531474, "loss": 0.1918, "step": 204910 }, { "epoch": 8.49, "grad_norm": 1.2578125, "learning_rate": 0.0004076393571159881, "loss": 0.1939, "step": 204920 }, { "epoch": 8.49, "grad_norm": 0.75, "learning_rate": 0.0004076309395500013, "loss": 0.194, "step": 204930 }, { "epoch": 8.49, "grad_norm": 0.8203125, "learning_rate": 0.00040762252168737, "loss": 0.1885, "step": 204940 }, { "epoch": 8.49, "grad_norm": 0.953125, "learning_rate": 0.0004076141035281101, "loss": 0.2139, "step": 204950 }, { "epoch": 8.49, "grad_norm": 0.71875, "learning_rate": 0.0004076056850722376, "loss": 0.1813, "step": 204960 }, { "epoch": 8.49, "grad_norm": 0.70703125, "learning_rate": 0.0004075972663197681, "loss": 0.1698, "step": 204970 }, { "epoch": 8.49, "grad_norm": 0.62109375, "learning_rate": 0.0004075888472707177, "loss": 0.2236, "step": 204980 }, { "epoch": 8.49, "grad_norm": 0.62109375, "learning_rate": 0.00040758042792510203, "loss": 0.2217, "step": 204990 }, { "epoch": 8.49, "grad_norm": 0.486328125, "learning_rate": 0.000407572008282937, "loss": 0.2352, "step": 205000 }, { "epoch": 8.49, "grad_norm": 0.92578125, "learning_rate": 0.0004075635883442385, "loss": 0.2056, "step": 205010 }, { "epoch": 8.49, "grad_norm": 1.21875, "learning_rate": 0.00040755516810902235, "loss": 0.2119, "step": 205020 }, { "epoch": 8.49, "grad_norm": 1.046875, "learning_rate": 0.0004075467475773043, "loss": 0.2119, "step": 205030 }, { "epoch": 8.49, "grad_norm": 1.640625, "learning_rate": 0.0004075383267491004, "loss": 0.2155, "step": 205040 }, { "epoch": 8.49, "grad_norm": 1.046875, "learning_rate": 0.00040752990562442626, "loss": 0.2241, "step": 205050 }, { "epoch": 8.49, "grad_norm": 0.48828125, "learning_rate": 0.00040752148420329794, "loss": 0.1795, "step": 205060 }, { "epoch": 8.49, "grad_norm": 0.291015625, "learning_rate": 0.0004075130624857312, "loss": 0.1934, "step": 205070 }, { "epoch": 8.49, "grad_norm": 0.85546875, "learning_rate": 0.0004075046404717418, "loss": 0.154, "step": 205080 }, { "epoch": 8.49, "grad_norm": 1.03125, "learning_rate": 0.0004074962181613458, "loss": 0.2421, "step": 205090 }, { "epoch": 8.5, "grad_norm": 0.546875, "learning_rate": 0.00040748779555455886, "loss": 0.1795, "step": 205100 }, { "epoch": 8.5, "grad_norm": 0.51171875, "learning_rate": 0.0004074793726513969, "loss": 0.2186, "step": 205110 }, { "epoch": 8.5, "grad_norm": 0.68359375, "learning_rate": 0.00040747094945187574, "loss": 0.2137, "step": 205120 }, { "epoch": 8.5, "grad_norm": 0.875, "learning_rate": 0.0004074625259560113, "loss": 0.1969, "step": 205130 }, { "epoch": 8.5, "grad_norm": 0.8359375, "learning_rate": 0.0004074541021638194, "loss": 0.2073, "step": 205140 }, { "epoch": 8.5, "grad_norm": 0.6328125, "learning_rate": 0.0004074456780753158, "loss": 0.1954, "step": 205150 }, { "epoch": 8.5, "grad_norm": 0.71875, "learning_rate": 0.0004074372536905165, "loss": 0.1819, "step": 205160 }, { "epoch": 8.5, "grad_norm": 1.40625, "learning_rate": 0.00040742882900943737, "loss": 0.2242, "step": 205170 }, { "epoch": 8.5, "grad_norm": 0.828125, "learning_rate": 0.000407420404032094, "loss": 0.1952, "step": 205180 }, { "epoch": 8.5, "grad_norm": 0.34765625, "learning_rate": 0.00040741197875850257, "loss": 0.2246, "step": 205190 }, { "epoch": 8.5, "grad_norm": 1.5859375, "learning_rate": 0.0004074035531886787, "loss": 0.1914, "step": 205200 }, { "epoch": 8.5, "grad_norm": 0.4375, "learning_rate": 0.0004073951273226384, "loss": 0.1458, "step": 205210 }, { "epoch": 8.5, "grad_norm": 0.63671875, "learning_rate": 0.0004073867011603975, "loss": 0.1451, "step": 205220 }, { "epoch": 8.5, "grad_norm": 0.80078125, "learning_rate": 0.00040737827470197174, "loss": 0.2109, "step": 205230 }, { "epoch": 8.5, "grad_norm": 1.09375, "learning_rate": 0.00040736984794737706, "loss": 0.213, "step": 205240 }, { "epoch": 8.5, "grad_norm": 1.0078125, "learning_rate": 0.0004073614208966294, "loss": 0.2241, "step": 205250 }, { "epoch": 8.5, "grad_norm": 0.333984375, "learning_rate": 0.0004073529935497445, "loss": 0.2165, "step": 205260 }, { "epoch": 8.5, "grad_norm": 0.58203125, "learning_rate": 0.0004073445659067383, "loss": 0.1564, "step": 205270 }, { "epoch": 8.5, "grad_norm": 0.87109375, "learning_rate": 0.0004073361379676266, "loss": 0.1876, "step": 205280 }, { "epoch": 8.5, "grad_norm": 0.45703125, "learning_rate": 0.00040732770973242525, "loss": 0.1925, "step": 205290 }, { "epoch": 8.5, "grad_norm": 0.64453125, "learning_rate": 0.0004073192812011502, "loss": 0.1722, "step": 205300 }, { "epoch": 8.5, "grad_norm": 0.6796875, "learning_rate": 0.0004073108523738172, "loss": 0.2274, "step": 205310 }, { "epoch": 8.5, "grad_norm": 1.0078125, "learning_rate": 0.00040730242325044214, "loss": 0.2182, "step": 205320 }, { "epoch": 8.5, "grad_norm": 0.51953125, "learning_rate": 0.00040729399383104093, "loss": 0.1528, "step": 205330 }, { "epoch": 8.51, "grad_norm": 0.232421875, "learning_rate": 0.00040728556411562944, "loss": 0.1895, "step": 205340 }, { "epoch": 8.51, "grad_norm": 0.83984375, "learning_rate": 0.0004072771341042235, "loss": 0.1709, "step": 205350 }, { "epoch": 8.51, "grad_norm": 1.65625, "learning_rate": 0.00040726870379683897, "loss": 0.2147, "step": 205360 }, { "epoch": 8.51, "grad_norm": 0.388671875, "learning_rate": 0.0004072602731934917, "loss": 0.1776, "step": 205370 }, { "epoch": 8.51, "grad_norm": 2.3125, "learning_rate": 0.0004072518422941975, "loss": 0.2006, "step": 205380 }, { "epoch": 8.51, "grad_norm": 1.0703125, "learning_rate": 0.00040724341109897247, "loss": 0.2422, "step": 205390 }, { "epoch": 8.51, "grad_norm": 1.109375, "learning_rate": 0.0004072349796078322, "loss": 0.1969, "step": 205400 }, { "epoch": 8.51, "grad_norm": 0.5, "learning_rate": 0.0004072265478207928, "loss": 0.2071, "step": 205410 }, { "epoch": 8.51, "grad_norm": 0.890625, "learning_rate": 0.0004072181157378699, "loss": 0.1998, "step": 205420 }, { "epoch": 8.51, "grad_norm": 0.30078125, "learning_rate": 0.0004072096833590796, "loss": 0.2381, "step": 205430 }, { "epoch": 8.51, "grad_norm": 1.484375, "learning_rate": 0.0004072012506844376, "loss": 0.2084, "step": 205440 }, { "epoch": 8.51, "grad_norm": 0.5546875, "learning_rate": 0.00040719281771395976, "loss": 0.202, "step": 205450 }, { "epoch": 8.51, "grad_norm": 0.6640625, "learning_rate": 0.00040718438444766207, "loss": 0.2235, "step": 205460 }, { "epoch": 8.51, "grad_norm": 0.98828125, "learning_rate": 0.00040717595088556036, "loss": 0.201, "step": 205470 }, { "epoch": 8.51, "grad_norm": 0.640625, "learning_rate": 0.0004071675170276704, "loss": 0.2133, "step": 205480 }, { "epoch": 8.51, "grad_norm": 0.482421875, "learning_rate": 0.00040715908287400826, "loss": 0.167, "step": 205490 }, { "epoch": 8.51, "grad_norm": 0.578125, "learning_rate": 0.0004071506484245896, "loss": 0.2197, "step": 205500 }, { "epoch": 8.51, "grad_norm": 0.68359375, "learning_rate": 0.00040714221367943047, "loss": 0.1648, "step": 205510 }, { "epoch": 8.51, "grad_norm": 1.8203125, "learning_rate": 0.0004071337786385466, "loss": 0.194, "step": 205520 }, { "epoch": 8.51, "grad_norm": 0.412109375, "learning_rate": 0.00040712534330195397, "loss": 0.1818, "step": 205530 }, { "epoch": 8.51, "grad_norm": 0.890625, "learning_rate": 0.0004071169076696684, "loss": 0.1978, "step": 205540 }, { "epoch": 8.51, "grad_norm": 0.6484375, "learning_rate": 0.00040710847174170575, "loss": 0.1986, "step": 205550 }, { "epoch": 8.51, "grad_norm": 0.490234375, "learning_rate": 0.00040710003551808194, "loss": 0.2266, "step": 205560 }, { "epoch": 8.51, "grad_norm": 0.392578125, "learning_rate": 0.0004070915989988129, "loss": 0.2403, "step": 205570 }, { "epoch": 8.52, "grad_norm": 0.478515625, "learning_rate": 0.00040708316218391433, "loss": 0.2577, "step": 205580 }, { "epoch": 8.52, "grad_norm": 1.7109375, "learning_rate": 0.00040707472507340225, "loss": 0.2084, "step": 205590 }, { "epoch": 8.52, "grad_norm": 0.5078125, "learning_rate": 0.0004070662876672925, "loss": 0.2318, "step": 205600 }, { "epoch": 8.52, "grad_norm": 2.5625, "learning_rate": 0.00040705784996560094, "loss": 0.1984, "step": 205610 }, { "epoch": 8.52, "grad_norm": 1.0546875, "learning_rate": 0.00040704941196834354, "loss": 0.1989, "step": 205620 }, { "epoch": 8.52, "grad_norm": 1.671875, "learning_rate": 0.00040704097367553604, "loss": 0.2463, "step": 205630 }, { "epoch": 8.52, "grad_norm": 1.1484375, "learning_rate": 0.0004070325350871944, "loss": 0.2287, "step": 205640 }, { "epoch": 8.52, "grad_norm": 0.76171875, "learning_rate": 0.0004070240962033345, "loss": 0.1167, "step": 205650 }, { "epoch": 8.52, "grad_norm": 0.5625, "learning_rate": 0.00040701565702397217, "loss": 0.1587, "step": 205660 }, { "epoch": 8.52, "grad_norm": 0.55859375, "learning_rate": 0.0004070072175491234, "loss": 0.2244, "step": 205670 }, { "epoch": 8.52, "grad_norm": 0.458984375, "learning_rate": 0.0004069987777788039, "loss": 0.2215, "step": 205680 }, { "epoch": 8.52, "grad_norm": 0.76953125, "learning_rate": 0.0004069903377130297, "loss": 0.2094, "step": 205690 }, { "epoch": 8.52, "grad_norm": 0.55859375, "learning_rate": 0.00040698189735181666, "loss": 0.24, "step": 205700 }, { "epoch": 8.52, "grad_norm": 0.62109375, "learning_rate": 0.0004069734566951806, "loss": 0.2183, "step": 205710 }, { "epoch": 8.52, "grad_norm": 0.3125, "learning_rate": 0.0004069650157431375, "loss": 0.2059, "step": 205720 }, { "epoch": 8.52, "grad_norm": 0.16796875, "learning_rate": 0.0004069565744957032, "loss": 0.1986, "step": 205730 }, { "epoch": 8.52, "grad_norm": 0.99609375, "learning_rate": 0.00040694813295289356, "loss": 0.162, "step": 205740 }, { "epoch": 8.52, "grad_norm": 0.94140625, "learning_rate": 0.00040693969111472446, "loss": 0.1788, "step": 205750 }, { "epoch": 8.52, "grad_norm": 0.6328125, "learning_rate": 0.0004069312489812118, "loss": 0.1871, "step": 205760 }, { "epoch": 8.52, "grad_norm": 0.57421875, "learning_rate": 0.00040692280655237154, "loss": 0.2196, "step": 205770 }, { "epoch": 8.52, "grad_norm": 0.7734375, "learning_rate": 0.0004069143638282194, "loss": 0.1976, "step": 205780 }, { "epoch": 8.52, "grad_norm": 0.71875, "learning_rate": 0.0004069059208087714, "loss": 0.225, "step": 205790 }, { "epoch": 8.52, "grad_norm": 0.68359375, "learning_rate": 0.00040689747749404347, "loss": 0.2054, "step": 205800 }, { "epoch": 8.52, "grad_norm": 0.515625, "learning_rate": 0.0004068890338840514, "loss": 0.192, "step": 205810 }, { "epoch": 8.53, "grad_norm": 0.6875, "learning_rate": 0.0004068805899788111, "loss": 0.21, "step": 205820 }, { "epoch": 8.53, "grad_norm": 0.58203125, "learning_rate": 0.00040687214577833853, "loss": 0.2221, "step": 205830 }, { "epoch": 8.53, "grad_norm": 0.23828125, "learning_rate": 0.0004068637012826494, "loss": 0.1959, "step": 205840 }, { "epoch": 8.53, "grad_norm": 0.58984375, "learning_rate": 0.0004068552564917598, "loss": 0.201, "step": 205850 }, { "epoch": 8.53, "grad_norm": 0.890625, "learning_rate": 0.0004068468114056856, "loss": 0.2553, "step": 205860 }, { "epoch": 8.53, "grad_norm": 0.89453125, "learning_rate": 0.0004068383660244425, "loss": 0.1994, "step": 205870 }, { "epoch": 8.53, "grad_norm": 3.0625, "learning_rate": 0.0004068299203480467, "loss": 0.2261, "step": 205880 }, { "epoch": 8.53, "grad_norm": 0.6484375, "learning_rate": 0.00040682147437651374, "loss": 0.2335, "step": 205890 }, { "epoch": 8.53, "grad_norm": 1.03125, "learning_rate": 0.0004068130281098598, "loss": 0.1715, "step": 205900 }, { "epoch": 8.53, "grad_norm": 2.765625, "learning_rate": 0.0004068045815481007, "loss": 0.1641, "step": 205910 }, { "epoch": 8.53, "grad_norm": 0.59765625, "learning_rate": 0.00040679613469125225, "loss": 0.2375, "step": 205920 }, { "epoch": 8.53, "grad_norm": 0.62109375, "learning_rate": 0.00040678768753933046, "loss": 0.1749, "step": 205930 }, { "epoch": 8.53, "grad_norm": 0.8359375, "learning_rate": 0.00040677924009235113, "loss": 0.1836, "step": 205940 }, { "epoch": 8.53, "grad_norm": 0.6484375, "learning_rate": 0.0004067707923503302, "loss": 0.1907, "step": 205950 }, { "epoch": 8.53, "grad_norm": 0.62890625, "learning_rate": 0.00040676234431328363, "loss": 0.2155, "step": 205960 }, { "epoch": 8.53, "grad_norm": 0.87890625, "learning_rate": 0.00040675389598122724, "loss": 0.2006, "step": 205970 }, { "epoch": 8.53, "grad_norm": 0.96875, "learning_rate": 0.00040674544735417695, "loss": 0.1632, "step": 205980 }, { "epoch": 8.53, "grad_norm": 0.6328125, "learning_rate": 0.00040673699843214863, "loss": 0.1443, "step": 205990 }, { "epoch": 8.53, "grad_norm": 1.46875, "learning_rate": 0.00040672854921515813, "loss": 0.175, "step": 206000 }, { "epoch": 8.53, "grad_norm": 0.73828125, "learning_rate": 0.00040672009970322155, "loss": 0.1595, "step": 206010 }, { "epoch": 8.53, "grad_norm": 1.5234375, "learning_rate": 0.0004067116498963546, "loss": 0.2077, "step": 206020 }, { "epoch": 8.53, "grad_norm": 2.578125, "learning_rate": 0.00040670319979457326, "loss": 0.2092, "step": 206030 }, { "epoch": 8.53, "grad_norm": 0.62109375, "learning_rate": 0.00040669474939789344, "loss": 0.2078, "step": 206040 }, { "epoch": 8.53, "grad_norm": 0.83203125, "learning_rate": 0.000406686298706331, "loss": 0.1886, "step": 206050 }, { "epoch": 8.53, "grad_norm": 0.71875, "learning_rate": 0.0004066778477199019, "loss": 0.204, "step": 206060 }, { "epoch": 8.54, "grad_norm": 0.54296875, "learning_rate": 0.00040666939643862197, "loss": 0.177, "step": 206070 }, { "epoch": 8.54, "grad_norm": 1.1484375, "learning_rate": 0.0004066609448625072, "loss": 0.2114, "step": 206080 }, { "epoch": 8.54, "grad_norm": 1.0234375, "learning_rate": 0.0004066524929915734, "loss": 0.1952, "step": 206090 }, { "epoch": 8.54, "grad_norm": 0.4453125, "learning_rate": 0.0004066440408258365, "loss": 0.2096, "step": 206100 }, { "epoch": 8.54, "grad_norm": 0.80859375, "learning_rate": 0.0004066355883653125, "loss": 0.2015, "step": 206110 }, { "epoch": 8.54, "grad_norm": 1.09375, "learning_rate": 0.00040662713561001726, "loss": 0.2477, "step": 206120 }, { "epoch": 8.54, "grad_norm": 0.5234375, "learning_rate": 0.0004066186825599666, "loss": 0.2152, "step": 206130 }, { "epoch": 8.54, "grad_norm": 0.60546875, "learning_rate": 0.00040661022921517653, "loss": 0.2115, "step": 206140 }, { "epoch": 8.54, "grad_norm": 0.51171875, "learning_rate": 0.00040660177557566286, "loss": 0.1895, "step": 206150 }, { "epoch": 8.54, "grad_norm": 0.9609375, "learning_rate": 0.0004065933216414416, "loss": 0.1901, "step": 206160 }, { "epoch": 8.54, "grad_norm": 0.921875, "learning_rate": 0.00040658486741252867, "loss": 0.1875, "step": 206170 }, { "epoch": 8.54, "grad_norm": 0.57421875, "learning_rate": 0.00040657641288893985, "loss": 0.1611, "step": 206180 }, { "epoch": 8.54, "grad_norm": 0.6015625, "learning_rate": 0.0004065679580706911, "loss": 0.2307, "step": 206190 }, { "epoch": 8.54, "grad_norm": 0.7109375, "learning_rate": 0.00040655950295779853, "loss": 0.1997, "step": 206200 }, { "epoch": 8.54, "grad_norm": 0.5234375, "learning_rate": 0.00040655104755027774, "loss": 0.1577, "step": 206210 }, { "epoch": 8.54, "grad_norm": 0.421875, "learning_rate": 0.0004065425918481448, "loss": 0.2074, "step": 206220 }, { "epoch": 8.54, "grad_norm": 1.2421875, "learning_rate": 0.00040653413585141563, "loss": 0.217, "step": 206230 }, { "epoch": 8.54, "grad_norm": 0.625, "learning_rate": 0.00040652567956010613, "loss": 0.2236, "step": 206240 }, { "epoch": 8.54, "grad_norm": 0.765625, "learning_rate": 0.0004065172229742322, "loss": 0.2222, "step": 206250 }, { "epoch": 8.54, "grad_norm": 1.390625, "learning_rate": 0.00040650876609380973, "loss": 0.2216, "step": 206260 }, { "epoch": 8.54, "grad_norm": 1.0859375, "learning_rate": 0.0004065003089188547, "loss": 0.2232, "step": 206270 }, { "epoch": 8.54, "grad_norm": 0.51171875, "learning_rate": 0.00040649185144938293, "loss": 0.2563, "step": 206280 }, { "epoch": 8.54, "grad_norm": 0.84375, "learning_rate": 0.00040648339368541046, "loss": 0.1726, "step": 206290 }, { "epoch": 8.54, "grad_norm": 0.9375, "learning_rate": 0.0004064749356269531, "loss": 0.2328, "step": 206300 }, { "epoch": 8.55, "grad_norm": 0.5390625, "learning_rate": 0.00040646647727402685, "loss": 0.1808, "step": 206310 }, { "epoch": 8.55, "grad_norm": 0.95703125, "learning_rate": 0.00040645801862664754, "loss": 0.2403, "step": 206320 }, { "epoch": 8.55, "grad_norm": 0.8125, "learning_rate": 0.0004064495596848312, "loss": 0.1802, "step": 206330 }, { "epoch": 8.55, "grad_norm": 0.1650390625, "learning_rate": 0.0004064411004485936, "loss": 0.1742, "step": 206340 }, { "epoch": 8.55, "grad_norm": 0.55859375, "learning_rate": 0.00040643264091795085, "loss": 0.1842, "step": 206350 }, { "epoch": 8.55, "grad_norm": 0.361328125, "learning_rate": 0.00040642418109291867, "loss": 0.243, "step": 206360 }, { "epoch": 8.55, "grad_norm": 1.0234375, "learning_rate": 0.0004064157209735131, "loss": 0.1946, "step": 206370 }, { "epoch": 8.55, "grad_norm": 0.6796875, "learning_rate": 0.00040640726055975006, "loss": 0.2597, "step": 206380 }, { "epoch": 8.55, "grad_norm": 2.28125, "learning_rate": 0.0004063987998516454, "loss": 0.1754, "step": 206390 }, { "epoch": 8.55, "grad_norm": 0.70703125, "learning_rate": 0.0004063903388492151, "loss": 0.201, "step": 206400 }, { "epoch": 8.55, "grad_norm": 1.8125, "learning_rate": 0.0004063818775524751, "loss": 0.2545, "step": 206410 }, { "epoch": 8.55, "grad_norm": 0.953125, "learning_rate": 0.0004063734159614413, "loss": 0.2285, "step": 206420 }, { "epoch": 8.55, "grad_norm": 0.49609375, "learning_rate": 0.0004063649540761296, "loss": 0.1899, "step": 206430 }, { "epoch": 8.55, "grad_norm": 1.6328125, "learning_rate": 0.00040635649189655596, "loss": 0.2237, "step": 206440 }, { "epoch": 8.55, "grad_norm": 0.369140625, "learning_rate": 0.0004063480294227362, "loss": 0.1816, "step": 206450 }, { "epoch": 8.55, "grad_norm": 0.5859375, "learning_rate": 0.0004063395666546864, "loss": 0.1747, "step": 206460 }, { "epoch": 8.55, "grad_norm": 1.109375, "learning_rate": 0.0004063311035924224, "loss": 0.2281, "step": 206470 }, { "epoch": 8.55, "grad_norm": 0.92578125, "learning_rate": 0.0004063226402359602, "loss": 0.1809, "step": 206480 }, { "epoch": 8.55, "grad_norm": 0.458984375, "learning_rate": 0.00040631417658531566, "loss": 0.2221, "step": 206490 }, { "epoch": 8.55, "grad_norm": 0.71484375, "learning_rate": 0.00040630571264050464, "loss": 0.1642, "step": 206500 }, { "epoch": 8.55, "grad_norm": 0.625, "learning_rate": 0.0004062972484015433, "loss": 0.1699, "step": 206510 }, { "epoch": 8.55, "grad_norm": 1.4375, "learning_rate": 0.00040628878386844724, "loss": 0.1983, "step": 206520 }, { "epoch": 8.55, "grad_norm": 0.36328125, "learning_rate": 0.0004062803190412327, "loss": 0.1713, "step": 206530 }, { "epoch": 8.55, "grad_norm": 0.67578125, "learning_rate": 0.0004062718539199154, "loss": 0.1713, "step": 206540 }, { "epoch": 8.56, "grad_norm": 0.455078125, "learning_rate": 0.0004062633885045114, "loss": 0.2251, "step": 206550 }, { "epoch": 8.56, "grad_norm": 1.4921875, "learning_rate": 0.0004062549227950365, "loss": 0.1609, "step": 206560 }, { "epoch": 8.56, "grad_norm": 1.5546875, "learning_rate": 0.0004062464567915067, "loss": 0.1871, "step": 206570 }, { "epoch": 8.56, "grad_norm": 0.1806640625, "learning_rate": 0.00040623799049393806, "loss": 0.1901, "step": 206580 }, { "epoch": 8.56, "grad_norm": 0.66015625, "learning_rate": 0.00040622952390234634, "loss": 0.2098, "step": 206590 }, { "epoch": 8.56, "grad_norm": 0.5546875, "learning_rate": 0.0004062210570167475, "loss": 0.2429, "step": 206600 }, { "epoch": 8.56, "grad_norm": 0.9765625, "learning_rate": 0.0004062125898371576, "loss": 0.2098, "step": 206610 }, { "epoch": 8.56, "grad_norm": 0.8046875, "learning_rate": 0.00040620412236359236, "loss": 0.1762, "step": 206620 }, { "epoch": 8.56, "grad_norm": 0.49609375, "learning_rate": 0.00040619565459606787, "loss": 0.221, "step": 206630 }, { "epoch": 8.56, "grad_norm": 1.09375, "learning_rate": 0.0004061871865346, "loss": 0.1886, "step": 206640 }, { "epoch": 8.56, "grad_norm": 0.6875, "learning_rate": 0.00040617871817920473, "loss": 0.2021, "step": 206650 }, { "epoch": 8.56, "grad_norm": 0.66796875, "learning_rate": 0.000406170249529898, "loss": 0.1998, "step": 206660 }, { "epoch": 8.56, "grad_norm": 0.88671875, "learning_rate": 0.00040616178058669575, "loss": 0.1779, "step": 206670 }, { "epoch": 8.56, "grad_norm": 0.9375, "learning_rate": 0.0004061533113496138, "loss": 0.21, "step": 206680 }, { "epoch": 8.56, "grad_norm": 0.1845703125, "learning_rate": 0.00040614484181866826, "loss": 0.231, "step": 206690 }, { "epoch": 8.56, "grad_norm": 0.88671875, "learning_rate": 0.0004061363719938749, "loss": 0.2365, "step": 206700 }, { "epoch": 8.56, "grad_norm": 0.90234375, "learning_rate": 0.0004061279018752498, "loss": 0.2432, "step": 206710 }, { "epoch": 8.56, "grad_norm": 0.78125, "learning_rate": 0.00040611943146280893, "loss": 0.2202, "step": 206720 }, { "epoch": 8.56, "grad_norm": 0.0, "learning_rate": 0.00040611096075656805, "loss": 0.1894, "step": 206730 }, { "epoch": 8.56, "grad_norm": 0.4453125, "learning_rate": 0.0004061024897565433, "loss": 0.2168, "step": 206740 }, { "epoch": 8.56, "grad_norm": 0.333984375, "learning_rate": 0.0004060940184627504, "loss": 0.1694, "step": 206750 }, { "epoch": 8.56, "grad_norm": 0.9609375, "learning_rate": 0.0004060855468752055, "loss": 0.2494, "step": 206760 }, { "epoch": 8.56, "grad_norm": 0.58203125, "learning_rate": 0.00040607707499392445, "loss": 0.2029, "step": 206770 }, { "epoch": 8.56, "grad_norm": 0.53515625, "learning_rate": 0.00040606860281892313, "loss": 0.1951, "step": 206780 }, { "epoch": 8.57, "grad_norm": 0.85546875, "learning_rate": 0.0004060601303502176, "loss": 0.1796, "step": 206790 }, { "epoch": 8.57, "grad_norm": 0.63671875, "learning_rate": 0.00040605165758782376, "loss": 0.229, "step": 206800 }, { "epoch": 8.57, "grad_norm": 0.84765625, "learning_rate": 0.00040604318453175747, "loss": 0.1497, "step": 206810 }, { "epoch": 8.57, "grad_norm": 0.46875, "learning_rate": 0.00040603471118203486, "loss": 0.1941, "step": 206820 }, { "epoch": 8.57, "grad_norm": 0.6796875, "learning_rate": 0.00040602623753867176, "loss": 0.1792, "step": 206830 }, { "epoch": 8.57, "grad_norm": 0.63671875, "learning_rate": 0.0004060177636016841, "loss": 0.1994, "step": 206840 }, { "epoch": 8.57, "grad_norm": 1.5546875, "learning_rate": 0.00040600928937108795, "loss": 0.183, "step": 206850 }, { "epoch": 8.57, "grad_norm": 0.84375, "learning_rate": 0.00040600081484689905, "loss": 0.2167, "step": 206860 }, { "epoch": 8.57, "grad_norm": 1.296875, "learning_rate": 0.00040599234002913347, "loss": 0.1803, "step": 206870 }, { "epoch": 8.57, "grad_norm": 0.59765625, "learning_rate": 0.00040598386491780725, "loss": 0.1748, "step": 206880 }, { "epoch": 8.57, "grad_norm": 0.447265625, "learning_rate": 0.0004059753895129361, "loss": 0.2077, "step": 206890 }, { "epoch": 8.57, "grad_norm": 0.6484375, "learning_rate": 0.0004059669138145362, "loss": 0.183, "step": 206900 }, { "epoch": 8.57, "grad_norm": 0.81640625, "learning_rate": 0.0004059584378226234, "loss": 0.2074, "step": 206910 }, { "epoch": 8.57, "grad_norm": 0.69140625, "learning_rate": 0.0004059499615372137, "loss": 0.1633, "step": 206920 }, { "epoch": 8.57, "grad_norm": 1.03125, "learning_rate": 0.000405941484958323, "loss": 0.2156, "step": 206930 }, { "epoch": 8.57, "grad_norm": 0.8515625, "learning_rate": 0.0004059330080859672, "loss": 0.1945, "step": 206940 }, { "epoch": 8.57, "grad_norm": 0.81640625, "learning_rate": 0.0004059245309201623, "loss": 0.2089, "step": 206950 }, { "epoch": 8.57, "grad_norm": 0.66015625, "learning_rate": 0.0004059160534609244, "loss": 0.1541, "step": 206960 }, { "epoch": 8.57, "grad_norm": 0.43359375, "learning_rate": 0.00040590757570826925, "loss": 0.1944, "step": 206970 }, { "epoch": 8.57, "grad_norm": 0.984375, "learning_rate": 0.0004058990976622129, "loss": 0.2709, "step": 206980 }, { "epoch": 8.57, "grad_norm": 0.69921875, "learning_rate": 0.0004058906193227713, "loss": 0.2016, "step": 206990 }, { "epoch": 8.57, "grad_norm": 0.4453125, "learning_rate": 0.0004058821406899603, "loss": 0.2069, "step": 207000 }, { "epoch": 8.57, "grad_norm": 0.373046875, "learning_rate": 0.000405873661763796, "loss": 0.2642, "step": 207010 }, { "epoch": 8.57, "grad_norm": 0.62890625, "learning_rate": 0.0004058651825442943, "loss": 0.1763, "step": 207020 }, { "epoch": 8.58, "grad_norm": 1.265625, "learning_rate": 0.00040585670303147116, "loss": 0.2306, "step": 207030 }, { "epoch": 8.58, "grad_norm": 1.265625, "learning_rate": 0.00040584822322534256, "loss": 0.1736, "step": 207040 }, { "epoch": 8.58, "grad_norm": 1.859375, "learning_rate": 0.0004058397431259244, "loss": 0.2416, "step": 207050 }, { "epoch": 8.58, "grad_norm": 0.8359375, "learning_rate": 0.0004058312627332327, "loss": 0.1605, "step": 207060 }, { "epoch": 8.58, "grad_norm": 0.59375, "learning_rate": 0.00040582278204728334, "loss": 0.1994, "step": 207070 }, { "epoch": 8.58, "grad_norm": 1.421875, "learning_rate": 0.0004058143010680924, "loss": 0.2254, "step": 207080 }, { "epoch": 8.58, "grad_norm": 0.99609375, "learning_rate": 0.00040580581979567576, "loss": 0.2561, "step": 207090 }, { "epoch": 8.58, "grad_norm": 0.70703125, "learning_rate": 0.00040579733823004933, "loss": 0.2167, "step": 207100 }, { "epoch": 8.58, "grad_norm": 0.166015625, "learning_rate": 0.0004057888563712292, "loss": 0.1332, "step": 207110 }, { "epoch": 8.58, "grad_norm": 0.474609375, "learning_rate": 0.0004057803742192312, "loss": 0.2517, "step": 207120 }, { "epoch": 8.58, "grad_norm": 0.640625, "learning_rate": 0.00040577189177407145, "loss": 0.163, "step": 207130 }, { "epoch": 8.58, "grad_norm": 1.0625, "learning_rate": 0.00040576340903576583, "loss": 0.2026, "step": 207140 }, { "epoch": 8.58, "grad_norm": 0.8671875, "learning_rate": 0.00040575492600433016, "loss": 0.2415, "step": 207150 }, { "epoch": 8.58, "grad_norm": 0.7578125, "learning_rate": 0.0004057464426797806, "loss": 0.1899, "step": 207160 }, { "epoch": 8.58, "grad_norm": 0.3359375, "learning_rate": 0.0004057379590621331, "loss": 0.1769, "step": 207170 }, { "epoch": 8.58, "grad_norm": 0.640625, "learning_rate": 0.0004057294751514036, "loss": 0.1916, "step": 207180 }, { "epoch": 8.58, "grad_norm": 0.76171875, "learning_rate": 0.00040572099094760797, "loss": 0.1384, "step": 207190 }, { "epoch": 8.58, "grad_norm": 2.4375, "learning_rate": 0.00040571250645076226, "loss": 0.1739, "step": 207200 }, { "epoch": 8.58, "grad_norm": 1.09375, "learning_rate": 0.00040570402166088246, "loss": 0.2537, "step": 207210 }, { "epoch": 8.58, "grad_norm": 0.78125, "learning_rate": 0.0004056955365779845, "loss": 0.1777, "step": 207220 }, { "epoch": 8.58, "grad_norm": 0.494140625, "learning_rate": 0.00040568705120208425, "loss": 0.23, "step": 207230 }, { "epoch": 8.58, "grad_norm": 1.03125, "learning_rate": 0.00040567856553319794, "loss": 0.1773, "step": 207240 }, { "epoch": 8.58, "grad_norm": 1.2578125, "learning_rate": 0.0004056700795713413, "loss": 0.2376, "step": 207250 }, { "epoch": 8.58, "grad_norm": 1.0234375, "learning_rate": 0.00040566159331653043, "loss": 0.2118, "step": 207260 }, { "epoch": 8.59, "grad_norm": 0.435546875, "learning_rate": 0.0004056531067687812, "loss": 0.2382, "step": 207270 }, { "epoch": 8.59, "grad_norm": 0.703125, "learning_rate": 0.00040564461992810964, "loss": 0.1607, "step": 207280 }, { "epoch": 8.59, "grad_norm": 0.96484375, "learning_rate": 0.00040563613279453174, "loss": 0.1977, "step": 207290 }, { "epoch": 8.59, "grad_norm": 1.0078125, "learning_rate": 0.00040562764536806344, "loss": 0.1746, "step": 207300 }, { "epoch": 8.59, "grad_norm": 0.796875, "learning_rate": 0.0004056191576487207, "loss": 0.1977, "step": 207310 }, { "epoch": 8.59, "grad_norm": 1.2578125, "learning_rate": 0.0004056106696365195, "loss": 0.169, "step": 207320 }, { "epoch": 8.59, "grad_norm": 0.80859375, "learning_rate": 0.00040560218133147585, "loss": 0.193, "step": 207330 }, { "epoch": 8.59, "grad_norm": 0.6328125, "learning_rate": 0.0004055936927336057, "loss": 0.1944, "step": 207340 }, { "epoch": 8.59, "grad_norm": 0.59375, "learning_rate": 0.00040558520384292504, "loss": 0.2211, "step": 207350 }, { "epoch": 8.59, "grad_norm": 0.58203125, "learning_rate": 0.0004055767146594498, "loss": 0.1609, "step": 207360 }, { "epoch": 8.59, "grad_norm": 2.0, "learning_rate": 0.00040556822518319596, "loss": 0.2599, "step": 207370 }, { "epoch": 8.59, "grad_norm": 0.6484375, "learning_rate": 0.0004055597354141796, "loss": 0.1696, "step": 207380 }, { "epoch": 8.59, "grad_norm": 0.55078125, "learning_rate": 0.0004055512453524165, "loss": 0.2126, "step": 207390 }, { "epoch": 8.59, "grad_norm": 0.5859375, "learning_rate": 0.0004055427549979228, "loss": 0.2031, "step": 207400 }, { "epoch": 8.59, "grad_norm": 1.1953125, "learning_rate": 0.0004055342643507145, "loss": 0.1966, "step": 207410 }, { "epoch": 8.59, "grad_norm": 1.4609375, "learning_rate": 0.0004055257734108074, "loss": 0.1955, "step": 207420 }, { "epoch": 8.59, "grad_norm": 0.0, "learning_rate": 0.0004055172821782177, "loss": 0.1837, "step": 207430 }, { "epoch": 8.59, "grad_norm": 0.55078125, "learning_rate": 0.0004055087906529612, "loss": 0.1998, "step": 207440 }, { "epoch": 8.59, "grad_norm": 0.58984375, "learning_rate": 0.000405500298835054, "loss": 0.2387, "step": 207450 }, { "epoch": 8.59, "grad_norm": 0.71484375, "learning_rate": 0.00040549180672451196, "loss": 0.2164, "step": 207460 }, { "epoch": 8.59, "grad_norm": 0.734375, "learning_rate": 0.0004054833143213512, "loss": 0.2037, "step": 207470 }, { "epoch": 8.59, "grad_norm": 0.216796875, "learning_rate": 0.0004054748216255876, "loss": 0.1778, "step": 207480 }, { "epoch": 8.59, "grad_norm": 0.6484375, "learning_rate": 0.0004054663286372372, "loss": 0.1955, "step": 207490 }, { "epoch": 8.59, "grad_norm": 0.72265625, "learning_rate": 0.000405457835356316, "loss": 0.2016, "step": 207500 }, { "epoch": 8.6, "grad_norm": 1.3203125, "learning_rate": 0.0004054493417828399, "loss": 0.1663, "step": 207510 }, { "epoch": 8.6, "grad_norm": 0.26171875, "learning_rate": 0.00040544084791682483, "loss": 0.1785, "step": 207520 }, { "epoch": 8.6, "grad_norm": 1.1640625, "learning_rate": 0.000405432353758287, "loss": 0.203, "step": 207530 }, { "epoch": 8.6, "grad_norm": 0.66796875, "learning_rate": 0.00040542385930724214, "loss": 0.192, "step": 207540 }, { "epoch": 8.6, "grad_norm": 0.59765625, "learning_rate": 0.00040541536456370643, "loss": 0.2055, "step": 207550 }, { "epoch": 8.6, "grad_norm": 1.0625, "learning_rate": 0.0004054068695276958, "loss": 0.2054, "step": 207560 }, { "epoch": 8.6, "grad_norm": 0.72265625, "learning_rate": 0.0004053983741992262, "loss": 0.221, "step": 207570 }, { "epoch": 8.6, "grad_norm": 1.2734375, "learning_rate": 0.00040538987857831365, "loss": 0.2552, "step": 207580 }, { "epoch": 8.6, "grad_norm": 0.384765625, "learning_rate": 0.00040538138266497416, "loss": 0.2179, "step": 207590 }, { "epoch": 8.6, "grad_norm": 1.2265625, "learning_rate": 0.00040537288645922366, "loss": 0.2491, "step": 207600 }, { "epoch": 8.6, "grad_norm": 0.267578125, "learning_rate": 0.0004053643899610782, "loss": 0.2084, "step": 207610 }, { "epoch": 8.6, "grad_norm": 0.92578125, "learning_rate": 0.00040535589317055367, "loss": 0.1719, "step": 207620 }, { "epoch": 8.6, "grad_norm": 0.8671875, "learning_rate": 0.0004053473960876662, "loss": 0.1729, "step": 207630 }, { "epoch": 8.6, "grad_norm": 1.0234375, "learning_rate": 0.00040533889871243166, "loss": 0.1896, "step": 207640 }, { "epoch": 8.6, "grad_norm": 2.03125, "learning_rate": 0.00040533040104486607, "loss": 0.2034, "step": 207650 }, { "epoch": 8.6, "grad_norm": 2.875, "learning_rate": 0.00040532190308498553, "loss": 0.1808, "step": 207660 }, { "epoch": 8.6, "grad_norm": 0.65625, "learning_rate": 0.0004053134048328058, "loss": 0.1848, "step": 207670 }, { "epoch": 8.6, "grad_norm": 1.328125, "learning_rate": 0.0004053049062883431, "loss": 0.1926, "step": 207680 }, { "epoch": 8.6, "grad_norm": 0.4140625, "learning_rate": 0.00040529640745161333, "loss": 0.2159, "step": 207690 }, { "epoch": 8.6, "grad_norm": 0.87109375, "learning_rate": 0.0004052879083226325, "loss": 0.2518, "step": 207700 }, { "epoch": 8.6, "grad_norm": 1.1796875, "learning_rate": 0.00040527940890141656, "loss": 0.1688, "step": 207710 }, { "epoch": 8.6, "grad_norm": 0.294921875, "learning_rate": 0.0004052709091879816, "loss": 0.1937, "step": 207720 }, { "epoch": 8.6, "grad_norm": 1.0, "learning_rate": 0.00040526240918234354, "loss": 0.1743, "step": 207730 }, { "epoch": 8.6, "grad_norm": 0.68359375, "learning_rate": 0.00040525390888451843, "loss": 0.1843, "step": 207740 }, { "epoch": 8.6, "grad_norm": 0.466796875, "learning_rate": 0.0004052454082945221, "loss": 0.2111, "step": 207750 }, { "epoch": 8.61, "grad_norm": 0.62109375, "learning_rate": 0.00040523690741237073, "loss": 0.2576, "step": 207760 }, { "epoch": 8.61, "grad_norm": 1.265625, "learning_rate": 0.00040522840623808033, "loss": 0.2053, "step": 207770 }, { "epoch": 8.61, "grad_norm": 0.65234375, "learning_rate": 0.00040521990477166683, "loss": 0.1981, "step": 207780 }, { "epoch": 8.61, "grad_norm": 0.357421875, "learning_rate": 0.0004052114030131462, "loss": 0.2037, "step": 207790 }, { "epoch": 8.61, "grad_norm": 0.388671875, "learning_rate": 0.0004052029009625344, "loss": 0.221, "step": 207800 }, { "epoch": 8.61, "grad_norm": 0.5703125, "learning_rate": 0.00040519439861984763, "loss": 0.2062, "step": 207810 }, { "epoch": 8.61, "grad_norm": 1.9296875, "learning_rate": 0.00040518589598510167, "loss": 0.2508, "step": 207820 }, { "epoch": 8.61, "grad_norm": 1.609375, "learning_rate": 0.00040517739305831257, "loss": 0.2102, "step": 207830 }, { "epoch": 8.61, "grad_norm": 0.89453125, "learning_rate": 0.0004051688898394965, "loss": 0.1756, "step": 207840 }, { "epoch": 8.61, "grad_norm": 0.37890625, "learning_rate": 0.0004051603863286693, "loss": 0.196, "step": 207850 }, { "epoch": 8.61, "grad_norm": 0.515625, "learning_rate": 0.000405151882525847, "loss": 0.2235, "step": 207860 }, { "epoch": 8.61, "grad_norm": 1.0546875, "learning_rate": 0.0004051433784310455, "loss": 0.1584, "step": 207870 }, { "epoch": 8.61, "grad_norm": 0.625, "learning_rate": 0.00040513487404428104, "loss": 0.2274, "step": 207880 }, { "epoch": 8.61, "grad_norm": 0.71484375, "learning_rate": 0.00040512636936556955, "loss": 0.2079, "step": 207890 }, { "epoch": 8.61, "grad_norm": 1.046875, "learning_rate": 0.0004051178643949269, "loss": 0.2144, "step": 207900 }, { "epoch": 8.61, "grad_norm": 0.60546875, "learning_rate": 0.00040510935913236915, "loss": 0.2104, "step": 207910 }, { "epoch": 8.61, "grad_norm": 0.6015625, "learning_rate": 0.0004051008535779124, "loss": 0.187, "step": 207920 }, { "epoch": 8.61, "grad_norm": 0.875, "learning_rate": 0.0004050923477315726, "loss": 0.1748, "step": 207930 }, { "epoch": 8.61, "grad_norm": 0.81640625, "learning_rate": 0.0004050838415933657, "loss": 0.1778, "step": 207940 }, { "epoch": 8.61, "grad_norm": 0.5, "learning_rate": 0.0004050753351633077, "loss": 0.2464, "step": 207950 }, { "epoch": 8.61, "grad_norm": 0.83984375, "learning_rate": 0.00040506682844141485, "loss": 0.205, "step": 207960 }, { "epoch": 8.61, "grad_norm": 0.53515625, "learning_rate": 0.00040505832142770284, "loss": 0.1886, "step": 207970 }, { "epoch": 8.61, "grad_norm": 0.63671875, "learning_rate": 0.0004050498141221879, "loss": 0.2088, "step": 207980 }, { "epoch": 8.61, "grad_norm": 0.6953125, "learning_rate": 0.0004050413065248858, "loss": 0.1999, "step": 207990 }, { "epoch": 8.62, "grad_norm": 0.83203125, "learning_rate": 0.0004050327986358128, "loss": 0.2029, "step": 208000 }, { "epoch": 8.62, "grad_norm": 0.46484375, "learning_rate": 0.00040502429045498486, "loss": 0.155, "step": 208010 }, { "epoch": 8.62, "grad_norm": 0.8125, "learning_rate": 0.0004050157819824179, "loss": 0.2516, "step": 208020 }, { "epoch": 8.62, "grad_norm": 0.44140625, "learning_rate": 0.000405007273218128, "loss": 0.221, "step": 208030 }, { "epoch": 8.62, "grad_norm": 0.70703125, "learning_rate": 0.0004049987641621311, "loss": 0.1828, "step": 208040 }, { "epoch": 8.62, "grad_norm": 1.34375, "learning_rate": 0.0004049902548144433, "loss": 0.2107, "step": 208050 }, { "epoch": 8.62, "grad_norm": 0.8046875, "learning_rate": 0.00040498174517508055, "loss": 0.1829, "step": 208060 }, { "epoch": 8.62, "grad_norm": 1.0703125, "learning_rate": 0.0004049732352440589, "loss": 0.1609, "step": 208070 }, { "epoch": 8.62, "grad_norm": 0.6640625, "learning_rate": 0.00040496472502139444, "loss": 0.1947, "step": 208080 }, { "epoch": 8.62, "grad_norm": 0.76171875, "learning_rate": 0.00040495621450710304, "loss": 0.1753, "step": 208090 }, { "epoch": 8.62, "grad_norm": 0.37109375, "learning_rate": 0.0004049477037012008, "loss": 0.1935, "step": 208100 }, { "epoch": 8.62, "grad_norm": 0.36328125, "learning_rate": 0.00040493919260370365, "loss": 0.1789, "step": 208110 }, { "epoch": 8.62, "grad_norm": 0.4921875, "learning_rate": 0.00040493068121462774, "loss": 0.1814, "step": 208120 }, { "epoch": 8.62, "grad_norm": 0.439453125, "learning_rate": 0.00040492216953398895, "loss": 0.1839, "step": 208130 }, { "epoch": 8.62, "grad_norm": 0.73046875, "learning_rate": 0.0004049136575618034, "loss": 0.204, "step": 208140 }, { "epoch": 8.62, "grad_norm": 0.77734375, "learning_rate": 0.0004049051452980871, "loss": 0.2137, "step": 208150 }, { "epoch": 8.62, "grad_norm": 0.373046875, "learning_rate": 0.000404896632742856, "loss": 0.2202, "step": 208160 }, { "epoch": 8.62, "grad_norm": 0.50390625, "learning_rate": 0.0004048881198961262, "loss": 0.1714, "step": 208170 }, { "epoch": 8.62, "grad_norm": 1.703125, "learning_rate": 0.0004048796067579137, "loss": 0.1857, "step": 208180 }, { "epoch": 8.62, "grad_norm": 0.5078125, "learning_rate": 0.00040487109332823447, "loss": 0.1608, "step": 208190 }, { "epoch": 8.62, "grad_norm": 1.0390625, "learning_rate": 0.0004048625796071046, "loss": 0.2077, "step": 208200 }, { "epoch": 8.62, "grad_norm": 0.80859375, "learning_rate": 0.00040485406559454006, "loss": 0.1795, "step": 208210 }, { "epoch": 8.62, "grad_norm": 0.7109375, "learning_rate": 0.00040484555129055686, "loss": 0.2242, "step": 208220 }, { "epoch": 8.62, "grad_norm": 0.734375, "learning_rate": 0.00040483703669517106, "loss": 0.253, "step": 208230 }, { "epoch": 8.63, "grad_norm": 1.8671875, "learning_rate": 0.00040482852180839866, "loss": 0.2184, "step": 208240 }, { "epoch": 8.63, "grad_norm": 0.5625, "learning_rate": 0.0004048200066302558, "loss": 0.1583, "step": 208250 }, { "epoch": 8.63, "grad_norm": 0.58984375, "learning_rate": 0.0004048114911607583, "loss": 0.1983, "step": 208260 }, { "epoch": 8.63, "grad_norm": 0.75390625, "learning_rate": 0.0004048029753999224, "loss": 0.1619, "step": 208270 }, { "epoch": 8.63, "grad_norm": 0.7421875, "learning_rate": 0.0004047944593477639, "loss": 0.2313, "step": 208280 }, { "epoch": 8.63, "grad_norm": 1.78125, "learning_rate": 0.00040478594300429906, "loss": 0.185, "step": 208290 }, { "epoch": 8.63, "grad_norm": 0.859375, "learning_rate": 0.0004047774263695437, "loss": 0.1884, "step": 208300 }, { "epoch": 8.63, "grad_norm": 1.125, "learning_rate": 0.00040476890944351394, "loss": 0.1981, "step": 208310 }, { "epoch": 8.63, "grad_norm": 0.60546875, "learning_rate": 0.0004047603922262259, "loss": 0.2112, "step": 208320 }, { "epoch": 8.63, "grad_norm": 0.79296875, "learning_rate": 0.0004047518747176955, "loss": 0.2292, "step": 208330 }, { "epoch": 8.63, "grad_norm": 0.96484375, "learning_rate": 0.0004047433569179387, "loss": 0.1807, "step": 208340 }, { "epoch": 8.63, "grad_norm": 0.349609375, "learning_rate": 0.0004047348388269716, "loss": 0.1937, "step": 208350 }, { "epoch": 8.63, "grad_norm": 0.5859375, "learning_rate": 0.00040472632044481034, "loss": 0.1488, "step": 208360 }, { "epoch": 8.63, "grad_norm": 0.36328125, "learning_rate": 0.00040471780177147085, "loss": 0.2141, "step": 208370 }, { "epoch": 8.63, "grad_norm": 0.45703125, "learning_rate": 0.00040470928280696904, "loss": 0.1867, "step": 208380 }, { "epoch": 8.63, "grad_norm": 0.74609375, "learning_rate": 0.0004047007635513212, "loss": 0.2422, "step": 208390 }, { "epoch": 8.63, "grad_norm": 0.5234375, "learning_rate": 0.0004046922440045432, "loss": 0.1928, "step": 208400 }, { "epoch": 8.63, "grad_norm": 0.90234375, "learning_rate": 0.000404683724166651, "loss": 0.1746, "step": 208410 }, { "epoch": 8.63, "grad_norm": 1.203125, "learning_rate": 0.0004046752040376609, "loss": 0.1736, "step": 208420 }, { "epoch": 8.63, "grad_norm": 1.4375, "learning_rate": 0.0004046666836175886, "loss": 0.2468, "step": 208430 }, { "epoch": 8.63, "grad_norm": 0.412109375, "learning_rate": 0.0004046581629064504, "loss": 0.2046, "step": 208440 }, { "epoch": 8.63, "grad_norm": 0.435546875, "learning_rate": 0.00040464964190426226, "loss": 0.2225, "step": 208450 }, { "epoch": 8.63, "grad_norm": 0.640625, "learning_rate": 0.00040464112061104016, "loss": 0.1628, "step": 208460 }, { "epoch": 8.63, "grad_norm": 0.83203125, "learning_rate": 0.00040463259902680015, "loss": 0.2034, "step": 208470 }, { "epoch": 8.64, "grad_norm": 0.431640625, "learning_rate": 0.00040462407715155833, "loss": 0.216, "step": 208480 }, { "epoch": 8.64, "grad_norm": 0.333984375, "learning_rate": 0.00040461555498533063, "loss": 0.2431, "step": 208490 }, { "epoch": 8.64, "grad_norm": 0.296875, "learning_rate": 0.00040460703252813326, "loss": 0.1564, "step": 208500 }, { "epoch": 8.64, "grad_norm": 0.66796875, "learning_rate": 0.00040459850977998205, "loss": 0.2425, "step": 208510 }, { "epoch": 8.64, "grad_norm": 1.1015625, "learning_rate": 0.0004045899867408932, "loss": 0.2092, "step": 208520 }, { "epoch": 8.64, "grad_norm": 0.91015625, "learning_rate": 0.0004045814634108827, "loss": 0.1817, "step": 208530 }, { "epoch": 8.64, "grad_norm": 0.302734375, "learning_rate": 0.0004045729397899665, "loss": 0.196, "step": 208540 }, { "epoch": 8.64, "grad_norm": 0.796875, "learning_rate": 0.0004045644158781608, "loss": 0.1953, "step": 208550 }, { "epoch": 8.64, "grad_norm": 0.6953125, "learning_rate": 0.00040455589167548146, "loss": 0.2037, "step": 208560 }, { "epoch": 8.64, "grad_norm": 0.81640625, "learning_rate": 0.00040454736718194474, "loss": 0.1946, "step": 208570 }, { "epoch": 8.64, "grad_norm": 0.63671875, "learning_rate": 0.00040453884239756655, "loss": 0.2053, "step": 208580 }, { "epoch": 8.64, "grad_norm": 0.2158203125, "learning_rate": 0.0004045303173223629, "loss": 0.2057, "step": 208590 }, { "epoch": 8.64, "grad_norm": 0.91015625, "learning_rate": 0.00040452179195634986, "loss": 0.1899, "step": 208600 }, { "epoch": 8.64, "grad_norm": 0.80859375, "learning_rate": 0.00040451326629954357, "loss": 0.1673, "step": 208610 }, { "epoch": 8.64, "grad_norm": 0.435546875, "learning_rate": 0.00040450474035195994, "loss": 0.1542, "step": 208620 }, { "epoch": 8.64, "grad_norm": 0.4453125, "learning_rate": 0.00040449621411361503, "loss": 0.1669, "step": 208630 }, { "epoch": 8.64, "grad_norm": 1.25, "learning_rate": 0.00040448768758452505, "loss": 0.1726, "step": 208640 }, { "epoch": 8.64, "grad_norm": 1.015625, "learning_rate": 0.0004044791607647058, "loss": 0.1897, "step": 208650 }, { "epoch": 8.64, "grad_norm": 0.47265625, "learning_rate": 0.0004044706336541736, "loss": 0.1874, "step": 208660 }, { "epoch": 8.64, "grad_norm": 0.93359375, "learning_rate": 0.00040446210625294423, "loss": 0.2182, "step": 208670 }, { "epoch": 8.64, "grad_norm": 1.078125, "learning_rate": 0.00040445357856103386, "loss": 0.1724, "step": 208680 }, { "epoch": 8.64, "grad_norm": 1.1484375, "learning_rate": 0.0004044450505784586, "loss": 0.2357, "step": 208690 }, { "epoch": 8.64, "grad_norm": 0.1484375, "learning_rate": 0.0004044365223052344, "loss": 0.1826, "step": 208700 }, { "epoch": 8.64, "grad_norm": 0.82421875, "learning_rate": 0.00040442799374137734, "loss": 0.2217, "step": 208710 }, { "epoch": 8.65, "grad_norm": 0.3203125, "learning_rate": 0.00040441946488690343, "loss": 0.187, "step": 208720 }, { "epoch": 8.65, "grad_norm": 0.34765625, "learning_rate": 0.00040441093574182887, "loss": 0.1637, "step": 208730 }, { "epoch": 8.65, "grad_norm": 0.58984375, "learning_rate": 0.00040440240630616953, "loss": 0.2137, "step": 208740 }, { "epoch": 8.65, "grad_norm": 0.578125, "learning_rate": 0.0004043938765799415, "loss": 0.226, "step": 208750 }, { "epoch": 8.65, "grad_norm": 0.72265625, "learning_rate": 0.0004043853465631609, "loss": 0.1979, "step": 208760 }, { "epoch": 8.65, "grad_norm": 0.546875, "learning_rate": 0.00040437681625584375, "loss": 0.1733, "step": 208770 }, { "epoch": 8.65, "grad_norm": 0.5859375, "learning_rate": 0.0004043682856580062, "loss": 0.16, "step": 208780 }, { "epoch": 8.65, "grad_norm": 0.796875, "learning_rate": 0.0004043597547696641, "loss": 0.2165, "step": 208790 }, { "epoch": 8.65, "grad_norm": 0.431640625, "learning_rate": 0.0004043512235908336, "loss": 0.2788, "step": 208800 }, { "epoch": 8.65, "grad_norm": 0.9375, "learning_rate": 0.00040434269212153085, "loss": 0.2174, "step": 208810 }, { "epoch": 8.65, "grad_norm": 0.1767578125, "learning_rate": 0.0004043341603617718, "loss": 0.1435, "step": 208820 }, { "epoch": 8.65, "grad_norm": 0.6640625, "learning_rate": 0.00040432562831157245, "loss": 0.1627, "step": 208830 }, { "epoch": 8.65, "grad_norm": 1.8359375, "learning_rate": 0.00040431709597094903, "loss": 0.2298, "step": 208840 }, { "epoch": 8.65, "grad_norm": 1.09375, "learning_rate": 0.0004043085633399174, "loss": 0.2388, "step": 208850 }, { "epoch": 8.65, "grad_norm": 1.3125, "learning_rate": 0.0004043000304184938, "loss": 0.2204, "step": 208860 }, { "epoch": 8.65, "grad_norm": 0.69140625, "learning_rate": 0.00040429149720669415, "loss": 0.2412, "step": 208870 }, { "epoch": 8.65, "grad_norm": 0.62109375, "learning_rate": 0.0004042829637045346, "loss": 0.2302, "step": 208880 }, { "epoch": 8.65, "grad_norm": 0.72265625, "learning_rate": 0.0004042744299120312, "loss": 0.1815, "step": 208890 }, { "epoch": 8.65, "grad_norm": 0.59375, "learning_rate": 0.00040426589582919994, "loss": 0.2655, "step": 208900 }, { "epoch": 8.65, "grad_norm": 0.205078125, "learning_rate": 0.00040425736145605686, "loss": 0.2022, "step": 208910 }, { "epoch": 8.65, "grad_norm": 0.4609375, "learning_rate": 0.0004042488267926182, "loss": 0.1633, "step": 208920 }, { "epoch": 8.65, "grad_norm": 0.70703125, "learning_rate": 0.0004042402918388999, "loss": 0.1708, "step": 208930 }, { "epoch": 8.65, "grad_norm": 0.458984375, "learning_rate": 0.00040423175659491797, "loss": 0.2098, "step": 208940 }, { "epoch": 8.65, "grad_norm": 0.75, "learning_rate": 0.0004042232210606885, "loss": 0.1957, "step": 208950 }, { "epoch": 8.66, "grad_norm": 0.52734375, "learning_rate": 0.00040421468523622764, "loss": 0.1866, "step": 208960 }, { "epoch": 8.66, "grad_norm": 0.275390625, "learning_rate": 0.0004042061491215514, "loss": 0.2003, "step": 208970 }, { "epoch": 8.66, "grad_norm": 0.006500244140625, "learning_rate": 0.00040419761271667576, "loss": 0.1905, "step": 208980 }, { "epoch": 8.66, "grad_norm": 0.7890625, "learning_rate": 0.00040418907602161696, "loss": 0.2667, "step": 208990 }, { "epoch": 8.66, "grad_norm": 1.03125, "learning_rate": 0.00040418053903639096, "loss": 0.2024, "step": 209000 }, { "epoch": 8.66, "grad_norm": 0.87109375, "learning_rate": 0.00040417200176101376, "loss": 0.188, "step": 209010 }, { "epoch": 8.66, "grad_norm": 0.54296875, "learning_rate": 0.0004041634641955016, "loss": 0.2149, "step": 209020 }, { "epoch": 8.66, "grad_norm": 1.859375, "learning_rate": 0.00040415492633987036, "loss": 0.2359, "step": 209030 }, { "epoch": 8.66, "grad_norm": 0.86328125, "learning_rate": 0.0004041463881941363, "loss": 0.2518, "step": 209040 }, { "epoch": 8.66, "grad_norm": 0.61328125, "learning_rate": 0.00040413784975831536, "loss": 0.1908, "step": 209050 }, { "epoch": 8.66, "grad_norm": 0.421875, "learning_rate": 0.00040412931103242354, "loss": 0.2046, "step": 209060 }, { "epoch": 8.66, "grad_norm": 1.078125, "learning_rate": 0.00040412077201647706, "loss": 0.1802, "step": 209070 }, { "epoch": 8.66, "grad_norm": 0.6328125, "learning_rate": 0.0004041122327104919, "loss": 0.2513, "step": 209080 }, { "epoch": 8.66, "grad_norm": 1.234375, "learning_rate": 0.00040410369311448427, "loss": 0.1722, "step": 209090 }, { "epoch": 8.66, "grad_norm": 1.296875, "learning_rate": 0.00040409515322847, "loss": 0.2282, "step": 209100 }, { "epoch": 8.66, "grad_norm": 0.4375, "learning_rate": 0.00040408661305246535, "loss": 0.1839, "step": 209110 }, { "epoch": 8.66, "grad_norm": 0.828125, "learning_rate": 0.00040407807258648634, "loss": 0.1691, "step": 209120 }, { "epoch": 8.66, "grad_norm": 0.90625, "learning_rate": 0.00040406953183054907, "loss": 0.2174, "step": 209130 }, { "epoch": 8.66, "grad_norm": 0.8203125, "learning_rate": 0.00040406099078466945, "loss": 0.2036, "step": 209140 }, { "epoch": 8.66, "grad_norm": 0.43359375, "learning_rate": 0.0004040524494488638, "loss": 0.1911, "step": 209150 }, { "epoch": 8.66, "grad_norm": 0.37890625, "learning_rate": 0.000404043907823148, "loss": 0.262, "step": 209160 }, { "epoch": 8.66, "grad_norm": 0.37890625, "learning_rate": 0.00040403536590753834, "loss": 0.1666, "step": 209170 }, { "epoch": 8.66, "grad_norm": 0.7109375, "learning_rate": 0.00040402682370205056, "loss": 0.1992, "step": 209180 }, { "epoch": 8.66, "grad_norm": 0.62890625, "learning_rate": 0.000404018281206701, "loss": 0.1958, "step": 209190 }, { "epoch": 8.67, "grad_norm": 0.78125, "learning_rate": 0.0004040097384215058, "loss": 0.2089, "step": 209200 }, { "epoch": 8.67, "grad_norm": 0.66015625, "learning_rate": 0.00040400119534648077, "loss": 0.2155, "step": 209210 }, { "epoch": 8.67, "grad_norm": 0.45703125, "learning_rate": 0.00040399265198164217, "loss": 0.2005, "step": 209220 }, { "epoch": 8.67, "grad_norm": 0.37890625, "learning_rate": 0.000403984108327006, "loss": 0.1322, "step": 209230 }, { "epoch": 8.67, "grad_norm": 1.8515625, "learning_rate": 0.00040397556438258835, "loss": 0.2356, "step": 209240 }, { "epoch": 8.67, "grad_norm": 0.8671875, "learning_rate": 0.0004039670201484054, "loss": 0.2372, "step": 209250 }, { "epoch": 8.67, "grad_norm": 0.81640625, "learning_rate": 0.00040395847562447307, "loss": 0.2271, "step": 209260 }, { "epoch": 8.67, "grad_norm": 0.48046875, "learning_rate": 0.0004039499308108075, "loss": 0.2338, "step": 209270 }, { "epoch": 8.67, "grad_norm": 0.80859375, "learning_rate": 0.0004039413857074248, "loss": 0.1777, "step": 209280 }, { "epoch": 8.67, "grad_norm": 1.6875, "learning_rate": 0.0004039328403143411, "loss": 0.1926, "step": 209290 }, { "epoch": 8.67, "grad_norm": 0.71875, "learning_rate": 0.00040392429463157234, "loss": 0.1851, "step": 209300 }, { "epoch": 8.67, "grad_norm": 1.4921875, "learning_rate": 0.0004039157486591347, "loss": 0.1917, "step": 209310 }, { "epoch": 8.67, "grad_norm": 0.91015625, "learning_rate": 0.0004039072023970443, "loss": 0.2118, "step": 209320 }, { "epoch": 8.67, "grad_norm": 0.0, "learning_rate": 0.00040389865584531716, "loss": 0.1796, "step": 209330 }, { "epoch": 8.67, "grad_norm": 0.8046875, "learning_rate": 0.0004038901090039693, "loss": 0.2138, "step": 209340 }, { "epoch": 8.67, "grad_norm": 0.58984375, "learning_rate": 0.00040388156187301684, "loss": 0.1951, "step": 209350 }, { "epoch": 8.67, "grad_norm": 0.55078125, "learning_rate": 0.000403873014452476, "loss": 0.1856, "step": 209360 }, { "epoch": 8.67, "grad_norm": 0.80859375, "learning_rate": 0.0004038644667423628, "loss": 0.1707, "step": 209370 }, { "epoch": 8.67, "grad_norm": 0.7734375, "learning_rate": 0.00040385591874269314, "loss": 0.2015, "step": 209380 }, { "epoch": 8.67, "grad_norm": 0.5078125, "learning_rate": 0.00040384737045348336, "loss": 0.1607, "step": 209390 }, { "epoch": 8.67, "grad_norm": 0.91015625, "learning_rate": 0.0004038388218747494, "loss": 0.2007, "step": 209400 }, { "epoch": 8.67, "grad_norm": 2.171875, "learning_rate": 0.0004038302730065074, "loss": 0.1896, "step": 209410 }, { "epoch": 8.67, "grad_norm": 0.46875, "learning_rate": 0.00040382172384877346, "loss": 0.1603, "step": 209420 }, { "epoch": 8.67, "grad_norm": 0.8828125, "learning_rate": 0.0004038131744015636, "loss": 0.2058, "step": 209430 }, { "epoch": 8.67, "grad_norm": 0.6640625, "learning_rate": 0.000403804624664894, "loss": 0.2113, "step": 209440 }, { "epoch": 8.68, "grad_norm": 0.44921875, "learning_rate": 0.0004037960746387807, "loss": 0.2043, "step": 209450 }, { "epoch": 8.68, "grad_norm": 0.515625, "learning_rate": 0.0004037875243232398, "loss": 0.2517, "step": 209460 }, { "epoch": 8.68, "grad_norm": 1.265625, "learning_rate": 0.00040377897371828735, "loss": 0.2055, "step": 209470 }, { "epoch": 8.68, "grad_norm": 0.1484375, "learning_rate": 0.0004037704228239395, "loss": 0.2231, "step": 209480 }, { "epoch": 8.68, "grad_norm": 0.7578125, "learning_rate": 0.0004037618716402123, "loss": 0.2211, "step": 209490 }, { "epoch": 8.68, "grad_norm": 0.376953125, "learning_rate": 0.00040375332016712195, "loss": 0.2092, "step": 209500 }, { "epoch": 8.68, "grad_norm": 1.6953125, "learning_rate": 0.0004037447684046843, "loss": 0.2345, "step": 209510 }, { "epoch": 8.68, "grad_norm": 0.8515625, "learning_rate": 0.00040373621635291575, "loss": 0.2353, "step": 209520 }, { "epoch": 8.68, "grad_norm": 0.2119140625, "learning_rate": 0.00040372766401183216, "loss": 0.2202, "step": 209530 }, { "epoch": 8.68, "grad_norm": 1.5625, "learning_rate": 0.0004037191113814498, "loss": 0.1882, "step": 209540 }, { "epoch": 8.68, "grad_norm": 0.185546875, "learning_rate": 0.00040371055846178457, "loss": 0.2161, "step": 209550 }, { "epoch": 8.68, "grad_norm": 0.98046875, "learning_rate": 0.0004037020052528527, "loss": 0.2133, "step": 209560 }, { "epoch": 8.68, "grad_norm": 0.458984375, "learning_rate": 0.00040369345175467024, "loss": 0.2153, "step": 209570 }, { "epoch": 8.68, "grad_norm": 0.796875, "learning_rate": 0.0004036848979672533, "loss": 0.2158, "step": 209580 }, { "epoch": 8.68, "grad_norm": 0.64453125, "learning_rate": 0.000403676343890618, "loss": 0.1865, "step": 209590 }, { "epoch": 8.68, "grad_norm": 1.0625, "learning_rate": 0.00040366778952478043, "loss": 0.197, "step": 209600 }, { "epoch": 8.68, "grad_norm": 1.3203125, "learning_rate": 0.0004036592348697566, "loss": 0.1992, "step": 209610 }, { "epoch": 8.68, "grad_norm": 0.50390625, "learning_rate": 0.0004036506799255628, "loss": 0.2089, "step": 209620 }, { "epoch": 8.68, "grad_norm": 1.515625, "learning_rate": 0.00040364212469221495, "loss": 0.1769, "step": 209630 }, { "epoch": 8.68, "grad_norm": 1.046875, "learning_rate": 0.0004036335691697292, "loss": 0.221, "step": 209640 }, { "epoch": 8.68, "grad_norm": 0.76171875, "learning_rate": 0.00040362501335812175, "loss": 0.148, "step": 209650 }, { "epoch": 8.68, "grad_norm": 0.60546875, "learning_rate": 0.00040361645725740846, "loss": 0.1874, "step": 209660 }, { "epoch": 8.68, "grad_norm": 0.84765625, "learning_rate": 0.00040360790086760567, "loss": 0.2297, "step": 209670 }, { "epoch": 8.68, "grad_norm": 0.462890625, "learning_rate": 0.0004035993441887295, "loss": 0.2456, "step": 209680 }, { "epoch": 8.69, "grad_norm": 0.53125, "learning_rate": 0.00040359078722079577, "loss": 0.2283, "step": 209690 }, { "epoch": 8.69, "grad_norm": 0.298828125, "learning_rate": 0.00040358222996382087, "loss": 0.2173, "step": 209700 }, { "epoch": 8.69, "grad_norm": 0.703125, "learning_rate": 0.00040357367241782076, "loss": 0.1941, "step": 209710 }, { "epoch": 8.69, "grad_norm": 1.0234375, "learning_rate": 0.00040356511458281165, "loss": 0.248, "step": 209720 }, { "epoch": 8.69, "grad_norm": 0.482421875, "learning_rate": 0.0004035565564588095, "loss": 0.1952, "step": 209730 }, { "epoch": 8.69, "grad_norm": 0.79296875, "learning_rate": 0.0004035479980458305, "loss": 0.2213, "step": 209740 }, { "epoch": 8.69, "grad_norm": 0.478515625, "learning_rate": 0.0004035394393438908, "loss": 0.1901, "step": 209750 }, { "epoch": 8.69, "grad_norm": 0.287109375, "learning_rate": 0.00040353088035300646, "loss": 0.1869, "step": 209760 }, { "epoch": 8.69, "grad_norm": 0.5, "learning_rate": 0.0004035223210731935, "loss": 0.172, "step": 209770 }, { "epoch": 8.69, "grad_norm": 1.0234375, "learning_rate": 0.0004035137615044682, "loss": 0.2691, "step": 209780 }, { "epoch": 8.69, "grad_norm": 1.21875, "learning_rate": 0.0004035052016468465, "loss": 0.1305, "step": 209790 }, { "epoch": 8.69, "grad_norm": 0.2890625, "learning_rate": 0.0004034966415003446, "loss": 0.173, "step": 209800 }, { "epoch": 8.69, "grad_norm": 0.263671875, "learning_rate": 0.0004034880810649787, "loss": 0.1747, "step": 209810 }, { "epoch": 8.69, "grad_norm": 0.5859375, "learning_rate": 0.0004034795203407646, "loss": 0.2245, "step": 209820 }, { "epoch": 8.69, "grad_norm": 0.56640625, "learning_rate": 0.00040347095932771884, "loss": 0.2022, "step": 209830 }, { "epoch": 8.69, "grad_norm": 0.91796875, "learning_rate": 0.0004034623980258572, "loss": 0.2388, "step": 209840 }, { "epoch": 8.69, "grad_norm": 1.6328125, "learning_rate": 0.0004034538364351959, "loss": 0.166, "step": 209850 }, { "epoch": 8.69, "grad_norm": 1.3984375, "learning_rate": 0.00040344527455575107, "loss": 0.1952, "step": 209860 }, { "epoch": 8.69, "grad_norm": 0.44140625, "learning_rate": 0.0004034367123875388, "loss": 0.1689, "step": 209870 }, { "epoch": 8.69, "grad_norm": 0.353515625, "learning_rate": 0.00040342814993057517, "loss": 0.1996, "step": 209880 }, { "epoch": 8.69, "grad_norm": 0.75, "learning_rate": 0.0004034195871848764, "loss": 0.1344, "step": 209890 }, { "epoch": 8.69, "grad_norm": 0.6640625, "learning_rate": 0.0004034110241504585, "loss": 0.2009, "step": 209900 }, { "epoch": 8.69, "grad_norm": 0.44921875, "learning_rate": 0.0004034024608273377, "loss": 0.2226, "step": 209910 }, { "epoch": 8.69, "grad_norm": 0.59375, "learning_rate": 0.0004033938972155299, "loss": 0.2084, "step": 209920 }, { "epoch": 8.7, "grad_norm": 0.474609375, "learning_rate": 0.00040338533331505137, "loss": 0.2353, "step": 209930 }, { "epoch": 8.7, "grad_norm": 0.8828125, "learning_rate": 0.0004033767691259183, "loss": 0.1947, "step": 209940 }, { "epoch": 8.7, "grad_norm": 0.5859375, "learning_rate": 0.0004033682046481466, "loss": 0.2169, "step": 209950 }, { "epoch": 8.7, "grad_norm": 0.328125, "learning_rate": 0.0004033596398817526, "loss": 0.1704, "step": 209960 }, { "epoch": 8.7, "grad_norm": 0.54296875, "learning_rate": 0.00040335107482675225, "loss": 0.2082, "step": 209970 }, { "epoch": 8.7, "grad_norm": 0.67578125, "learning_rate": 0.00040334250948316177, "loss": 0.2046, "step": 209980 }, { "epoch": 8.7, "grad_norm": 0.81640625, "learning_rate": 0.0004033339438509973, "loss": 0.207, "step": 209990 }, { "epoch": 8.7, "grad_norm": 1.3671875, "learning_rate": 0.0004033253779302748, "loss": 0.2241, "step": 210000 }, { "epoch": 8.7, "grad_norm": 0.59375, "learning_rate": 0.00040331681172101053, "loss": 0.1825, "step": 210010 }, { "epoch": 8.7, "grad_norm": 0.0, "learning_rate": 0.00040330824522322064, "loss": 0.2452, "step": 210020 }, { "epoch": 8.7, "grad_norm": 0.26171875, "learning_rate": 0.0004032996784369211, "loss": 0.2041, "step": 210030 }, { "epoch": 8.7, "grad_norm": 3.28125, "learning_rate": 0.00040329111136212814, "loss": 0.2061, "step": 210040 }, { "epoch": 8.7, "grad_norm": 0.4765625, "learning_rate": 0.00040328254399885793, "loss": 0.2416, "step": 210050 }, { "epoch": 8.7, "grad_norm": 0.76953125, "learning_rate": 0.00040327397634712646, "loss": 0.1605, "step": 210060 }, { "epoch": 8.7, "grad_norm": 0.5390625, "learning_rate": 0.0004032654084069499, "loss": 0.2337, "step": 210070 }, { "epoch": 8.7, "grad_norm": 0.275390625, "learning_rate": 0.0004032568401783445, "loss": 0.1339, "step": 210080 }, { "epoch": 8.7, "grad_norm": 0.74609375, "learning_rate": 0.0004032482716613262, "loss": 0.2111, "step": 210090 }, { "epoch": 8.7, "grad_norm": 0.458984375, "learning_rate": 0.0004032397028559112, "loss": 0.133, "step": 210100 }, { "epoch": 8.7, "grad_norm": 2.453125, "learning_rate": 0.00040323113376211563, "loss": 0.2201, "step": 210110 }, { "epoch": 8.7, "grad_norm": 0.8671875, "learning_rate": 0.00040322256437995565, "loss": 0.2167, "step": 210120 }, { "epoch": 8.7, "grad_norm": 1.046875, "learning_rate": 0.00040321399470944733, "loss": 0.1806, "step": 210130 }, { "epoch": 8.7, "grad_norm": 0.8046875, "learning_rate": 0.0004032054247506067, "loss": 0.2369, "step": 210140 }, { "epoch": 8.7, "grad_norm": 0.478515625, "learning_rate": 0.0004031968545034502, "loss": 0.212, "step": 210150 }, { "epoch": 8.7, "grad_norm": 0.62890625, "learning_rate": 0.00040318828396799367, "loss": 0.2035, "step": 210160 }, { "epoch": 8.71, "grad_norm": 1.6953125, "learning_rate": 0.0004031797131442533, "loss": 0.2157, "step": 210170 }, { "epoch": 8.71, "grad_norm": 0.7265625, "learning_rate": 0.00040317114203224536, "loss": 0.2208, "step": 210180 }, { "epoch": 8.71, "grad_norm": 0.490234375, "learning_rate": 0.00040316257063198577, "loss": 0.2034, "step": 210190 }, { "epoch": 8.71, "grad_norm": 0.1796875, "learning_rate": 0.0004031539989434908, "loss": 0.1864, "step": 210200 }, { "epoch": 8.71, "grad_norm": 1.15625, "learning_rate": 0.0004031454269667766, "loss": 0.1824, "step": 210210 }, { "epoch": 8.71, "grad_norm": 0.5625, "learning_rate": 0.0004031368547018591, "loss": 0.2283, "step": 210220 }, { "epoch": 8.71, "grad_norm": 2.109375, "learning_rate": 0.00040312828214875466, "loss": 0.1725, "step": 210230 }, { "epoch": 8.71, "grad_norm": 0.7890625, "learning_rate": 0.0004031197093074793, "loss": 0.1978, "step": 210240 }, { "epoch": 8.71, "grad_norm": 0.9296875, "learning_rate": 0.0004031111361780493, "loss": 0.2282, "step": 210250 }, { "epoch": 8.71, "grad_norm": 0.9375, "learning_rate": 0.00040310256276048054, "loss": 0.1724, "step": 210260 }, { "epoch": 8.71, "grad_norm": 0.484375, "learning_rate": 0.00040309398905478933, "loss": 0.2832, "step": 210270 }, { "epoch": 8.71, "grad_norm": 0.1904296875, "learning_rate": 0.00040308541506099184, "loss": 0.1896, "step": 210280 }, { "epoch": 8.71, "grad_norm": 0.6796875, "learning_rate": 0.000403076840779104, "loss": 0.1759, "step": 210290 }, { "epoch": 8.71, "grad_norm": 1.09375, "learning_rate": 0.00040306826620914216, "loss": 0.2034, "step": 210300 }, { "epoch": 8.71, "grad_norm": 0.75, "learning_rate": 0.00040305969135112234, "loss": 0.1662, "step": 210310 }, { "epoch": 8.71, "grad_norm": 1.5, "learning_rate": 0.00040305111620506074, "loss": 0.1812, "step": 210320 }, { "epoch": 8.71, "grad_norm": 0.384765625, "learning_rate": 0.00040304254077097346, "loss": 0.2231, "step": 210330 }, { "epoch": 8.71, "grad_norm": 0.478515625, "learning_rate": 0.0004030339650488766, "loss": 0.1769, "step": 210340 }, { "epoch": 8.71, "grad_norm": 0.66796875, "learning_rate": 0.00040302538903878637, "loss": 0.1758, "step": 210350 }, { "epoch": 8.71, "grad_norm": 2.3125, "learning_rate": 0.00040301681274071893, "loss": 0.1953, "step": 210360 }, { "epoch": 8.71, "grad_norm": 1.1875, "learning_rate": 0.0004030082361546903, "loss": 0.2295, "step": 210370 }, { "epoch": 8.71, "grad_norm": 0.51171875, "learning_rate": 0.00040299965928071674, "loss": 0.2036, "step": 210380 }, { "epoch": 8.71, "grad_norm": 0.50390625, "learning_rate": 0.00040299108211881436, "loss": 0.2027, "step": 210390 }, { "epoch": 8.71, "grad_norm": 0.6015625, "learning_rate": 0.0004029825046689992, "loss": 0.2077, "step": 210400 }, { "epoch": 8.72, "grad_norm": 0.78125, "learning_rate": 0.0004029739269312876, "loss": 0.1889, "step": 210410 }, { "epoch": 8.72, "grad_norm": 0.61328125, "learning_rate": 0.0004029653489056956, "loss": 0.195, "step": 210420 }, { "epoch": 8.72, "grad_norm": 0.84765625, "learning_rate": 0.00040295677059223916, "loss": 0.206, "step": 210430 }, { "epoch": 8.72, "grad_norm": 0.34765625, "learning_rate": 0.00040294819199093474, "loss": 0.2526, "step": 210440 }, { "epoch": 8.72, "grad_norm": 1.3359375, "learning_rate": 0.0004029396131017983, "loss": 0.1999, "step": 210450 }, { "epoch": 8.72, "grad_norm": 0.70703125, "learning_rate": 0.00040293103392484604, "loss": 0.25, "step": 210460 }, { "epoch": 8.72, "grad_norm": 1.0, "learning_rate": 0.00040292245446009407, "loss": 0.1952, "step": 210470 }, { "epoch": 8.72, "grad_norm": 0.52734375, "learning_rate": 0.0004029138747075586, "loss": 0.1804, "step": 210480 }, { "epoch": 8.72, "grad_norm": 0.51171875, "learning_rate": 0.00040290529466725566, "loss": 0.1981, "step": 210490 }, { "epoch": 8.72, "grad_norm": 0.359375, "learning_rate": 0.0004028967143392015, "loss": 0.21, "step": 210500 }, { "epoch": 8.72, "grad_norm": 1.234375, "learning_rate": 0.00040288813372341224, "loss": 0.181, "step": 210510 }, { "epoch": 8.72, "grad_norm": 0.490234375, "learning_rate": 0.00040287955281990403, "loss": 0.264, "step": 210520 }, { "epoch": 8.72, "grad_norm": 0.5625, "learning_rate": 0.00040287097162869297, "loss": 0.1518, "step": 210530 }, { "epoch": 8.72, "grad_norm": 0.435546875, "learning_rate": 0.0004028623901497953, "loss": 0.1595, "step": 210540 }, { "epoch": 8.72, "grad_norm": 0.67578125, "learning_rate": 0.0004028538083832271, "loss": 0.2258, "step": 210550 }, { "epoch": 8.72, "grad_norm": 0.84765625, "learning_rate": 0.00040284522632900457, "loss": 0.1826, "step": 210560 }, { "epoch": 8.72, "grad_norm": 0.6953125, "learning_rate": 0.0004028366439871438, "loss": 0.212, "step": 210570 }, { "epoch": 8.72, "grad_norm": 0.4140625, "learning_rate": 0.0004028280613576609, "loss": 0.1799, "step": 210580 }, { "epoch": 8.72, "grad_norm": 0.6640625, "learning_rate": 0.0004028194784405722, "loss": 0.2029, "step": 210590 }, { "epoch": 8.72, "grad_norm": 0.57421875, "learning_rate": 0.00040281089523589364, "loss": 0.2372, "step": 210600 }, { "epoch": 8.72, "grad_norm": 0.5546875, "learning_rate": 0.00040280231174364155, "loss": 0.1617, "step": 210610 }, { "epoch": 8.72, "grad_norm": 0.51953125, "learning_rate": 0.0004027937279638321, "loss": 0.2222, "step": 210620 }, { "epoch": 8.72, "grad_norm": 0.4453125, "learning_rate": 0.00040278514389648117, "loss": 0.1991, "step": 210630 }, { "epoch": 8.72, "grad_norm": 0.609375, "learning_rate": 0.0004027765595416052, "loss": 0.2064, "step": 210640 }, { "epoch": 8.73, "grad_norm": 0.69140625, "learning_rate": 0.0004027679748992202, "loss": 0.2572, "step": 210650 }, { "epoch": 8.73, "grad_norm": 0.458984375, "learning_rate": 0.0004027593899693424, "loss": 0.1636, "step": 210660 }, { "epoch": 8.73, "grad_norm": 0.62109375, "learning_rate": 0.000402750804751988, "loss": 0.2107, "step": 210670 }, { "epoch": 8.73, "grad_norm": 0.32421875, "learning_rate": 0.000402742219247173, "loss": 0.2028, "step": 210680 }, { "epoch": 8.73, "grad_norm": 0.69140625, "learning_rate": 0.00040273363345491364, "loss": 0.2194, "step": 210690 }, { "epoch": 8.73, "grad_norm": 0.54296875, "learning_rate": 0.0004027250473752261, "loss": 0.1513, "step": 210700 }, { "epoch": 8.73, "grad_norm": 0.8671875, "learning_rate": 0.00040271646100812643, "loss": 0.2146, "step": 210710 }, { "epoch": 8.73, "grad_norm": 0.6796875, "learning_rate": 0.000402707874353631, "loss": 0.1879, "step": 210720 }, { "epoch": 8.73, "grad_norm": 0.76171875, "learning_rate": 0.00040269928741175576, "loss": 0.2045, "step": 210730 }, { "epoch": 8.73, "grad_norm": 0.58203125, "learning_rate": 0.00040269070018251693, "loss": 0.1714, "step": 210740 }, { "epoch": 8.73, "grad_norm": 0.35546875, "learning_rate": 0.0004026821126659307, "loss": 0.2292, "step": 210750 }, { "epoch": 8.73, "grad_norm": 1.4765625, "learning_rate": 0.00040267352486201327, "loss": 0.2061, "step": 210760 }, { "epoch": 8.73, "grad_norm": 1.078125, "learning_rate": 0.00040266493677078074, "loss": 0.2124, "step": 210770 }, { "epoch": 8.73, "grad_norm": 0.48046875, "learning_rate": 0.0004026563483922493, "loss": 0.2201, "step": 210780 }, { "epoch": 8.73, "grad_norm": 0.921875, "learning_rate": 0.0004026477597264351, "loss": 0.1647, "step": 210790 }, { "epoch": 8.73, "grad_norm": 0.58203125, "learning_rate": 0.0004026391707733542, "loss": 0.1535, "step": 210800 }, { "epoch": 8.73, "grad_norm": 0.419921875, "learning_rate": 0.00040263058153302303, "loss": 0.1723, "step": 210810 }, { "epoch": 8.73, "grad_norm": 0.875, "learning_rate": 0.00040262199200545747, "loss": 0.1886, "step": 210820 }, { "epoch": 8.73, "grad_norm": 0.76953125, "learning_rate": 0.00040261340219067385, "loss": 0.1375, "step": 210830 }, { "epoch": 8.73, "grad_norm": 0.51171875, "learning_rate": 0.00040260481208868827, "loss": 0.1653, "step": 210840 }, { "epoch": 8.73, "grad_norm": 1.0625, "learning_rate": 0.0004025962216995169, "loss": 0.2241, "step": 210850 }, { "epoch": 8.73, "grad_norm": 1.90625, "learning_rate": 0.0004025876310231759, "loss": 0.1969, "step": 210860 }, { "epoch": 8.73, "grad_norm": 0.87890625, "learning_rate": 0.0004025790400596815, "loss": 0.2038, "step": 210870 }, { "epoch": 8.73, "grad_norm": 0.62890625, "learning_rate": 0.0004025704488090498, "loss": 0.1841, "step": 210880 }, { "epoch": 8.74, "grad_norm": 0.55859375, "learning_rate": 0.00040256185727129704, "loss": 0.1932, "step": 210890 }, { "epoch": 8.74, "grad_norm": 0.0022735595703125, "learning_rate": 0.0004025532654464393, "loss": 0.187, "step": 210900 }, { "epoch": 8.74, "grad_norm": 0.80078125, "learning_rate": 0.0004025446733344928, "loss": 0.2036, "step": 210910 }, { "epoch": 8.74, "grad_norm": 0.96875, "learning_rate": 0.00040253608093547365, "loss": 0.2391, "step": 210920 }, { "epoch": 8.74, "grad_norm": 0.578125, "learning_rate": 0.0004025274882493981, "loss": 0.1989, "step": 210930 }, { "epoch": 8.74, "grad_norm": 0.5703125, "learning_rate": 0.0004025188952762824, "loss": 0.1847, "step": 210940 }, { "epoch": 8.74, "grad_norm": 1.375, "learning_rate": 0.0004025103020161425, "loss": 0.171, "step": 210950 }, { "epoch": 8.74, "grad_norm": 0.8203125, "learning_rate": 0.00040250170846899467, "loss": 0.2167, "step": 210960 }, { "epoch": 8.74, "grad_norm": 0.640625, "learning_rate": 0.0004024931146348551, "loss": 0.1827, "step": 210970 }, { "epoch": 8.74, "grad_norm": 0.69921875, "learning_rate": 0.00040248452051373996, "loss": 0.2081, "step": 210980 }, { "epoch": 8.74, "grad_norm": 0.53515625, "learning_rate": 0.0004024759261056654, "loss": 0.2187, "step": 210990 }, { "epoch": 8.74, "grad_norm": 0.65234375, "learning_rate": 0.00040246733141064767, "loss": 0.186, "step": 211000 }, { "epoch": 8.74, "grad_norm": 1.125, "learning_rate": 0.0004024587364287029, "loss": 0.2681, "step": 211010 }, { "epoch": 8.74, "grad_norm": 1.078125, "learning_rate": 0.0004024501411598472, "loss": 0.1776, "step": 211020 }, { "epoch": 8.74, "grad_norm": 0.80859375, "learning_rate": 0.00040244154560409675, "loss": 0.2003, "step": 211030 }, { "epoch": 8.74, "grad_norm": 1.1640625, "learning_rate": 0.00040243294976146786, "loss": 0.2116, "step": 211040 }, { "epoch": 8.74, "grad_norm": 1.2734375, "learning_rate": 0.0004024243536319766, "loss": 0.2403, "step": 211050 }, { "epoch": 8.74, "grad_norm": 0.55078125, "learning_rate": 0.0004024157572156392, "loss": 0.1981, "step": 211060 }, { "epoch": 8.74, "grad_norm": 0.703125, "learning_rate": 0.0004024071605124717, "loss": 0.1945, "step": 211070 }, { "epoch": 8.74, "grad_norm": 0.462890625, "learning_rate": 0.0004023985635224905, "loss": 0.1785, "step": 211080 }, { "epoch": 8.74, "grad_norm": 0.51171875, "learning_rate": 0.00040238996624571155, "loss": 0.1934, "step": 211090 }, { "epoch": 8.74, "grad_norm": 0.390625, "learning_rate": 0.00040238136868215126, "loss": 0.1954, "step": 211100 }, { "epoch": 8.74, "grad_norm": 1.9609375, "learning_rate": 0.0004023727708318256, "loss": 0.2305, "step": 211110 }, { "epoch": 8.74, "grad_norm": 0.359375, "learning_rate": 0.0004023641726947509, "loss": 0.1837, "step": 211120 }, { "epoch": 8.74, "grad_norm": 0.84765625, "learning_rate": 0.00040235557427094316, "loss": 0.165, "step": 211130 }, { "epoch": 8.75, "grad_norm": 0.69140625, "learning_rate": 0.00040234697556041884, "loss": 0.1657, "step": 211140 }, { "epoch": 8.75, "grad_norm": 0.88671875, "learning_rate": 0.0004023383765631939, "loss": 0.2071, "step": 211150 }, { "epoch": 8.75, "grad_norm": 0.5390625, "learning_rate": 0.00040232977727928455, "loss": 0.2323, "step": 211160 }, { "epoch": 8.75, "grad_norm": 1.2578125, "learning_rate": 0.00040232117770870704, "loss": 0.2182, "step": 211170 }, { "epoch": 8.75, "grad_norm": 0.9375, "learning_rate": 0.00040231257785147754, "loss": 0.1528, "step": 211180 }, { "epoch": 8.75, "grad_norm": 0.60546875, "learning_rate": 0.0004023039777076122, "loss": 0.1793, "step": 211190 }, { "epoch": 8.75, "grad_norm": 0.63671875, "learning_rate": 0.00040229537727712723, "loss": 0.1994, "step": 211200 }, { "epoch": 8.75, "grad_norm": 0.5625, "learning_rate": 0.0004022867765600388, "loss": 0.2012, "step": 211210 }, { "epoch": 8.75, "grad_norm": 1.46875, "learning_rate": 0.00040227817555636307, "loss": 0.2073, "step": 211220 }, { "epoch": 8.75, "grad_norm": 0.55078125, "learning_rate": 0.00040226957426611634, "loss": 0.197, "step": 211230 }, { "epoch": 8.75, "grad_norm": 0.50390625, "learning_rate": 0.0004022609726893146, "loss": 0.1651, "step": 211240 }, { "epoch": 8.75, "grad_norm": 1.640625, "learning_rate": 0.0004022523708259742, "loss": 0.2669, "step": 211250 }, { "epoch": 8.75, "grad_norm": 0.41015625, "learning_rate": 0.00040224376867611136, "loss": 0.1884, "step": 211260 }, { "epoch": 8.75, "grad_norm": 0.79296875, "learning_rate": 0.0004022351662397421, "loss": 0.1998, "step": 211270 }, { "epoch": 8.75, "grad_norm": 0.1513671875, "learning_rate": 0.00040222656351688273, "loss": 0.1902, "step": 211280 }, { "epoch": 8.75, "grad_norm": 0.90625, "learning_rate": 0.0004022179605075494, "loss": 0.1958, "step": 211290 }, { "epoch": 8.75, "grad_norm": 0.56640625, "learning_rate": 0.0004022093572117583, "loss": 0.1798, "step": 211300 }, { "epoch": 8.75, "grad_norm": 0.58203125, "learning_rate": 0.0004022007536295256, "loss": 0.1835, "step": 211310 }, { "epoch": 8.75, "grad_norm": 1.0078125, "learning_rate": 0.0004021921497608676, "loss": 0.1846, "step": 211320 }, { "epoch": 8.75, "grad_norm": 0.0, "learning_rate": 0.00040218354560580036, "loss": 0.1926, "step": 211330 }, { "epoch": 8.75, "grad_norm": 0.90234375, "learning_rate": 0.0004021749411643401, "loss": 0.1393, "step": 211340 }, { "epoch": 8.75, "grad_norm": 0.890625, "learning_rate": 0.00040216633643650303, "loss": 0.2574, "step": 211350 }, { "epoch": 8.75, "grad_norm": 0.48046875, "learning_rate": 0.00040215773142230536, "loss": 0.157, "step": 211360 }, { "epoch": 8.75, "grad_norm": 0.400390625, "learning_rate": 0.0004021491261217633, "loss": 0.1965, "step": 211370 }, { "epoch": 8.76, "grad_norm": 0.91796875, "learning_rate": 0.000402140520534893, "loss": 0.1905, "step": 211380 }, { "epoch": 8.76, "grad_norm": 0.6875, "learning_rate": 0.0004021319146617107, "loss": 0.1972, "step": 211390 }, { "epoch": 8.76, "grad_norm": 0.5, "learning_rate": 0.00040212330850223255, "loss": 0.1649, "step": 211400 }, { "epoch": 8.76, "grad_norm": 1.03125, "learning_rate": 0.0004021147020564747, "loss": 0.2033, "step": 211410 }, { "epoch": 8.76, "grad_norm": 0.53515625, "learning_rate": 0.0004021060953244534, "loss": 0.16, "step": 211420 }, { "epoch": 8.76, "grad_norm": 0.5859375, "learning_rate": 0.00040209748830618495, "loss": 0.215, "step": 211430 }, { "epoch": 8.76, "grad_norm": 0.7734375, "learning_rate": 0.0004020888810016854, "loss": 0.189, "step": 211440 }, { "epoch": 8.76, "grad_norm": 0.8359375, "learning_rate": 0.00040208027341097103, "loss": 0.2438, "step": 211450 }, { "epoch": 8.76, "grad_norm": 0.8671875, "learning_rate": 0.000402071665534058, "loss": 0.1582, "step": 211460 }, { "epoch": 8.76, "grad_norm": 0.46484375, "learning_rate": 0.0004020630573709625, "loss": 0.1923, "step": 211470 }, { "epoch": 8.76, "grad_norm": 0.3828125, "learning_rate": 0.00040205444892170075, "loss": 0.1702, "step": 211480 }, { "epoch": 8.76, "grad_norm": 0.419921875, "learning_rate": 0.000402045840186289, "loss": 0.221, "step": 211490 }, { "epoch": 8.76, "grad_norm": 0.75390625, "learning_rate": 0.00040203723116474327, "loss": 0.1907, "step": 211500 }, { "epoch": 8.76, "grad_norm": 0.765625, "learning_rate": 0.00040202862185708, "loss": 0.1956, "step": 211510 }, { "epoch": 8.76, "grad_norm": 1.1875, "learning_rate": 0.00040202001226331526, "loss": 0.2808, "step": 211520 }, { "epoch": 8.76, "grad_norm": 1.21875, "learning_rate": 0.0004020114023834652, "loss": 0.1901, "step": 211530 }, { "epoch": 8.76, "grad_norm": 0.62109375, "learning_rate": 0.0004020027922175462, "loss": 0.2138, "step": 211540 }, { "epoch": 8.76, "grad_norm": 0.3671875, "learning_rate": 0.00040199418176557435, "loss": 0.1438, "step": 211550 }, { "epoch": 8.76, "grad_norm": 0.396484375, "learning_rate": 0.00040198557102756583, "loss": 0.1624, "step": 211560 }, { "epoch": 8.76, "grad_norm": 0.671875, "learning_rate": 0.0004019769600035369, "loss": 0.2217, "step": 211570 }, { "epoch": 8.76, "grad_norm": 1.2265625, "learning_rate": 0.0004019683486935037, "loss": 0.2052, "step": 211580 }, { "epoch": 8.76, "grad_norm": 1.640625, "learning_rate": 0.00040195973709748246, "loss": 0.2314, "step": 211590 }, { "epoch": 8.76, "grad_norm": 0.5, "learning_rate": 0.0004019511252154895, "loss": 0.2235, "step": 211600 }, { "epoch": 8.76, "grad_norm": 0.82421875, "learning_rate": 0.0004019425130475409, "loss": 0.2038, "step": 211610 }, { "epoch": 8.77, "grad_norm": 0.5546875, "learning_rate": 0.00040193390059365286, "loss": 0.233, "step": 211620 }, { "epoch": 8.77, "grad_norm": 0.232421875, "learning_rate": 0.00040192528785384165, "loss": 0.1997, "step": 211630 }, { "epoch": 8.77, "grad_norm": 1.09375, "learning_rate": 0.00040191667482812344, "loss": 0.2133, "step": 211640 }, { "epoch": 8.77, "grad_norm": 0.6171875, "learning_rate": 0.0004019080615165145, "loss": 0.2001, "step": 211650 }, { "epoch": 8.77, "grad_norm": 0.43359375, "learning_rate": 0.00040189944791903085, "loss": 0.1773, "step": 211660 }, { "epoch": 8.77, "grad_norm": 0.2890625, "learning_rate": 0.00040189083403568905, "loss": 0.2404, "step": 211670 }, { "epoch": 8.77, "grad_norm": 0.6640625, "learning_rate": 0.000401882219866505, "loss": 0.1993, "step": 211680 }, { "epoch": 8.77, "grad_norm": 1.0859375, "learning_rate": 0.000401873605411495, "loss": 0.1964, "step": 211690 }, { "epoch": 8.77, "grad_norm": 0.66015625, "learning_rate": 0.00040186499067067525, "loss": 0.2239, "step": 211700 }, { "epoch": 8.77, "grad_norm": 0.3984375, "learning_rate": 0.0004018563756440621, "loss": 0.2136, "step": 211710 }, { "epoch": 8.77, "grad_norm": 0.54296875, "learning_rate": 0.0004018477603316716, "loss": 0.2232, "step": 211720 }, { "epoch": 8.77, "grad_norm": 0.77734375, "learning_rate": 0.00040183914473351995, "loss": 0.238, "step": 211730 }, { "epoch": 8.77, "grad_norm": 0.5703125, "learning_rate": 0.00040183052884962343, "loss": 0.1559, "step": 211740 }, { "epoch": 8.77, "grad_norm": 0.86328125, "learning_rate": 0.0004018219126799983, "loss": 0.2158, "step": 211750 }, { "epoch": 8.77, "grad_norm": 0.77734375, "learning_rate": 0.00040181329622466074, "loss": 0.1768, "step": 211760 }, { "epoch": 8.77, "grad_norm": 1.0390625, "learning_rate": 0.00040180467948362693, "loss": 0.1781, "step": 211770 }, { "epoch": 8.77, "grad_norm": 0.7890625, "learning_rate": 0.0004017960624569131, "loss": 0.1689, "step": 211780 }, { "epoch": 8.77, "grad_norm": 1.5625, "learning_rate": 0.00040178744514453546, "loss": 0.1753, "step": 211790 }, { "epoch": 8.77, "grad_norm": 1.4453125, "learning_rate": 0.0004017788275465103, "loss": 0.167, "step": 211800 }, { "epoch": 8.77, "grad_norm": 0.78125, "learning_rate": 0.0004017702096628537, "loss": 0.1713, "step": 211810 }, { "epoch": 8.77, "grad_norm": 1.078125, "learning_rate": 0.00040176159149358197, "loss": 0.2272, "step": 211820 }, { "epoch": 8.77, "grad_norm": 2.5, "learning_rate": 0.0004017529730387114, "loss": 0.2221, "step": 211830 }, { "epoch": 8.77, "grad_norm": 0.66015625, "learning_rate": 0.00040174435429825804, "loss": 0.1565, "step": 211840 }, { "epoch": 8.77, "grad_norm": 0.875, "learning_rate": 0.0004017357352722382, "loss": 0.1747, "step": 211850 }, { "epoch": 8.78, "grad_norm": 0.45703125, "learning_rate": 0.00040172711596066815, "loss": 0.16, "step": 211860 }, { "epoch": 8.78, "grad_norm": 1.390625, "learning_rate": 0.00040171849636356393, "loss": 0.1927, "step": 211870 }, { "epoch": 8.78, "grad_norm": 0.30078125, "learning_rate": 0.00040170987648094206, "loss": 0.1493, "step": 211880 }, { "epoch": 8.78, "grad_norm": 2.0625, "learning_rate": 0.0004017012563128184, "loss": 0.2403, "step": 211890 }, { "epoch": 8.78, "grad_norm": 0.84375, "learning_rate": 0.00040169263585920946, "loss": 0.1232, "step": 211900 }, { "epoch": 8.78, "grad_norm": 0.87890625, "learning_rate": 0.0004016840151201313, "loss": 0.215, "step": 211910 }, { "epoch": 8.78, "grad_norm": 0.56640625, "learning_rate": 0.0004016753940956003, "loss": 0.1657, "step": 211920 }, { "epoch": 8.78, "grad_norm": 0.8671875, "learning_rate": 0.00040166677278563254, "loss": 0.2171, "step": 211930 }, { "epoch": 8.78, "grad_norm": 1.0, "learning_rate": 0.0004016581511902443, "loss": 0.2235, "step": 211940 }, { "epoch": 8.78, "grad_norm": 1.8046875, "learning_rate": 0.00040164952930945175, "loss": 0.182, "step": 211950 }, { "epoch": 8.78, "grad_norm": 1.0625, "learning_rate": 0.0004016409071432712, "loss": 0.1959, "step": 211960 }, { "epoch": 8.78, "grad_norm": 0.27734375, "learning_rate": 0.0004016322846917188, "loss": 0.1923, "step": 211970 }, { "epoch": 8.78, "grad_norm": 0.427734375, "learning_rate": 0.0004016236619548109, "loss": 0.2466, "step": 211980 }, { "epoch": 8.78, "grad_norm": 0.443359375, "learning_rate": 0.0004016150389325636, "loss": 0.2209, "step": 211990 }, { "epoch": 8.78, "grad_norm": 2.1875, "learning_rate": 0.0004016064156249931, "loss": 0.2528, "step": 212000 }, { "epoch": 8.78, "grad_norm": 0.828125, "learning_rate": 0.0004015977920321158, "loss": 0.206, "step": 212010 }, { "epoch": 8.78, "grad_norm": 0.5625, "learning_rate": 0.00040158916815394774, "loss": 0.2038, "step": 212020 }, { "epoch": 8.78, "grad_norm": 0.51171875, "learning_rate": 0.0004015805439905053, "loss": 0.1909, "step": 212030 }, { "epoch": 8.78, "grad_norm": 0.0, "learning_rate": 0.00040157191954180466, "loss": 0.1799, "step": 212040 }, { "epoch": 8.78, "grad_norm": 0.99609375, "learning_rate": 0.00040156329480786195, "loss": 0.2309, "step": 212050 }, { "epoch": 8.78, "grad_norm": 1.015625, "learning_rate": 0.00040155466978869357, "loss": 0.1731, "step": 212060 }, { "epoch": 8.78, "grad_norm": 0.6328125, "learning_rate": 0.00040154604448431567, "loss": 0.1966, "step": 212070 }, { "epoch": 8.78, "grad_norm": 0.67578125, "learning_rate": 0.00040153741889474445, "loss": 0.1726, "step": 212080 }, { "epoch": 8.78, "grad_norm": 0.4140625, "learning_rate": 0.00040152879301999614, "loss": 0.1882, "step": 212090 }, { "epoch": 8.79, "grad_norm": 1.7109375, "learning_rate": 0.00040152016686008704, "loss": 0.2054, "step": 212100 }, { "epoch": 8.79, "grad_norm": 0.59375, "learning_rate": 0.0004015115404150334, "loss": 0.1888, "step": 212110 }, { "epoch": 8.79, "grad_norm": 0.7421875, "learning_rate": 0.00040150291368485134, "loss": 0.2293, "step": 212120 }, { "epoch": 8.79, "grad_norm": 1.28125, "learning_rate": 0.00040149428666955714, "loss": 0.2449, "step": 212130 }, { "epoch": 8.79, "grad_norm": 0.9296875, "learning_rate": 0.00040148565936916705, "loss": 0.1876, "step": 212140 }, { "epoch": 8.79, "grad_norm": 1.3671875, "learning_rate": 0.00040147703178369733, "loss": 0.2168, "step": 212150 }, { "epoch": 8.79, "grad_norm": 0.78125, "learning_rate": 0.0004014684039131642, "loss": 0.1629, "step": 212160 }, { "epoch": 8.79, "grad_norm": 0.86328125, "learning_rate": 0.00040145977575758394, "loss": 0.2101, "step": 212170 }, { "epoch": 8.79, "grad_norm": 0.734375, "learning_rate": 0.00040145114731697263, "loss": 0.1726, "step": 212180 }, { "epoch": 8.79, "grad_norm": 0.5, "learning_rate": 0.0004014425185913467, "loss": 0.1861, "step": 212190 }, { "epoch": 8.79, "grad_norm": 0.46875, "learning_rate": 0.0004014338895807223, "loss": 0.2055, "step": 212200 }, { "epoch": 8.79, "grad_norm": 0.515625, "learning_rate": 0.00040142526028511563, "loss": 0.2141, "step": 212210 }, { "epoch": 8.79, "grad_norm": 1.1171875, "learning_rate": 0.000401416630704543, "loss": 0.1527, "step": 212220 }, { "epoch": 8.79, "grad_norm": 0.51953125, "learning_rate": 0.0004014080008390206, "loss": 0.2209, "step": 212230 }, { "epoch": 8.79, "grad_norm": 0.671875, "learning_rate": 0.0004013993706885647, "loss": 0.2059, "step": 212240 }, { "epoch": 8.79, "grad_norm": 0.4296875, "learning_rate": 0.00040139074025319154, "loss": 0.2129, "step": 212250 }, { "epoch": 8.79, "grad_norm": 0.6953125, "learning_rate": 0.00040138210953291734, "loss": 0.1804, "step": 212260 }, { "epoch": 8.79, "grad_norm": 0.6015625, "learning_rate": 0.0004013734785277584, "loss": 0.2181, "step": 212270 }, { "epoch": 8.79, "grad_norm": 0.82421875, "learning_rate": 0.0004013648472377309, "loss": 0.1574, "step": 212280 }, { "epoch": 8.79, "grad_norm": 0.73828125, "learning_rate": 0.0004013562156628511, "loss": 0.1671, "step": 212290 }, { "epoch": 8.79, "grad_norm": 0.58984375, "learning_rate": 0.0004013475838031353, "loss": 0.1991, "step": 212300 }, { "epoch": 8.79, "grad_norm": 0.83984375, "learning_rate": 0.0004013389516585996, "loss": 0.2031, "step": 212310 }, { "epoch": 8.79, "grad_norm": 0.875, "learning_rate": 0.0004013303192292604, "loss": 0.1813, "step": 212320 }, { "epoch": 8.79, "grad_norm": 0.53515625, "learning_rate": 0.00040132168651513386, "loss": 0.223, "step": 212330 }, { "epoch": 8.8, "grad_norm": 0.52734375, "learning_rate": 0.0004013130535162362, "loss": 0.2192, "step": 212340 }, { "epoch": 8.8, "grad_norm": 0.76953125, "learning_rate": 0.0004013044202325837, "loss": 0.2197, "step": 212350 }, { "epoch": 8.8, "grad_norm": 0.0, "learning_rate": 0.00040129578666419274, "loss": 0.208, "step": 212360 }, { "epoch": 8.8, "grad_norm": 0.0296630859375, "learning_rate": 0.0004012871528110794, "loss": 0.2312, "step": 212370 }, { "epoch": 8.8, "grad_norm": 0.0, "learning_rate": 0.00040127851867325997, "loss": 0.2008, "step": 212380 }, { "epoch": 8.8, "grad_norm": 1.53125, "learning_rate": 0.00040126988425075074, "loss": 0.1427, "step": 212390 }, { "epoch": 8.8, "grad_norm": 0.6171875, "learning_rate": 0.00040126124954356786, "loss": 0.2439, "step": 212400 }, { "epoch": 8.8, "grad_norm": 0.408203125, "learning_rate": 0.0004012526145517277, "loss": 0.1957, "step": 212410 }, { "epoch": 8.8, "grad_norm": 0.54296875, "learning_rate": 0.00040124397927524636, "loss": 0.1737, "step": 212420 }, { "epoch": 8.8, "grad_norm": 0.95703125, "learning_rate": 0.0004012353437141402, "loss": 0.1967, "step": 212430 }, { "epoch": 8.8, "grad_norm": 0.28125, "learning_rate": 0.0004012267078684255, "loss": 0.1783, "step": 212440 }, { "epoch": 8.8, "grad_norm": 0.384765625, "learning_rate": 0.00040121807173811854, "loss": 0.2233, "step": 212450 }, { "epoch": 8.8, "grad_norm": 0.7890625, "learning_rate": 0.0004012094353232354, "loss": 0.1746, "step": 212460 }, { "epoch": 8.8, "grad_norm": 0.59765625, "learning_rate": 0.0004012007986237924, "loss": 0.1921, "step": 212470 }, { "epoch": 8.8, "grad_norm": 0.84375, "learning_rate": 0.00040119216163980595, "loss": 0.1749, "step": 212480 }, { "epoch": 8.8, "grad_norm": 0.478515625, "learning_rate": 0.00040118352437129215, "loss": 0.2189, "step": 212490 }, { "epoch": 8.8, "grad_norm": 1.5390625, "learning_rate": 0.00040117488681826717, "loss": 0.2163, "step": 212500 }, { "epoch": 8.8, "grad_norm": 0.7421875, "learning_rate": 0.0004011662489807474, "loss": 0.2368, "step": 212510 }, { "epoch": 8.8, "grad_norm": 0.427734375, "learning_rate": 0.00040115761085874913, "loss": 0.1998, "step": 212520 }, { "epoch": 8.8, "grad_norm": 0.89453125, "learning_rate": 0.0004011489724522886, "loss": 0.2385, "step": 212530 }, { "epoch": 8.8, "grad_norm": 0.79296875, "learning_rate": 0.000401140333761382, "loss": 0.1867, "step": 212540 }, { "epoch": 8.8, "grad_norm": 0.89453125, "learning_rate": 0.0004011316947860456, "loss": 0.2222, "step": 212550 }, { "epoch": 8.8, "grad_norm": 0.6875, "learning_rate": 0.00040112305552629567, "loss": 0.2239, "step": 212560 }, { "epoch": 8.8, "grad_norm": 0.291015625, "learning_rate": 0.0004011144159821484, "loss": 0.1591, "step": 212570 }, { "epoch": 8.81, "grad_norm": 0.6015625, "learning_rate": 0.00040110577615362023, "loss": 0.2189, "step": 212580 }, { "epoch": 8.81, "grad_norm": 0.8125, "learning_rate": 0.0004010971360407273, "loss": 0.2343, "step": 212590 }, { "epoch": 8.81, "grad_norm": 0.58984375, "learning_rate": 0.0004010884956434858, "loss": 0.1918, "step": 212600 }, { "epoch": 8.81, "grad_norm": 1.1015625, "learning_rate": 0.0004010798549619121, "loss": 0.1948, "step": 212610 }, { "epoch": 8.81, "grad_norm": 0.48828125, "learning_rate": 0.0004010712139960224, "loss": 0.1796, "step": 212620 }, { "epoch": 8.81, "grad_norm": 0.5703125, "learning_rate": 0.000401062572745833, "loss": 0.1546, "step": 212630 }, { "epoch": 8.81, "grad_norm": 1.0546875, "learning_rate": 0.00040105393121136026, "loss": 0.2275, "step": 212640 }, { "epoch": 8.81, "grad_norm": 0.3984375, "learning_rate": 0.0004010452893926202, "loss": 0.1925, "step": 212650 }, { "epoch": 8.81, "grad_norm": 1.0234375, "learning_rate": 0.0004010366472896293, "loss": 0.2385, "step": 212660 }, { "epoch": 8.81, "grad_norm": 1.125, "learning_rate": 0.0004010280049024037, "loss": 0.2027, "step": 212670 }, { "epoch": 8.81, "grad_norm": 0.70703125, "learning_rate": 0.0004010193622309597, "loss": 0.2307, "step": 212680 }, { "epoch": 8.81, "grad_norm": 0.3359375, "learning_rate": 0.00040101071927531356, "loss": 0.1815, "step": 212690 }, { "epoch": 8.81, "grad_norm": 0.53515625, "learning_rate": 0.0004010020760354816, "loss": 0.2188, "step": 212700 }, { "epoch": 8.81, "grad_norm": 0.671875, "learning_rate": 0.00040099343251148, "loss": 0.1757, "step": 212710 }, { "epoch": 8.81, "grad_norm": 0.84375, "learning_rate": 0.0004009847887033251, "loss": 0.2333, "step": 212720 }, { "epoch": 8.81, "grad_norm": 0.53125, "learning_rate": 0.00040097614461103305, "loss": 0.2385, "step": 212730 }, { "epoch": 8.81, "grad_norm": 0.365234375, "learning_rate": 0.0004009675002346203, "loss": 0.2246, "step": 212740 }, { "epoch": 8.81, "grad_norm": 0.2060546875, "learning_rate": 0.000400958855574103, "loss": 0.1418, "step": 212750 }, { "epoch": 8.81, "grad_norm": 0.87890625, "learning_rate": 0.0004009502106294974, "loss": 0.1784, "step": 212760 }, { "epoch": 8.81, "grad_norm": 0.52734375, "learning_rate": 0.00040094156540081984, "loss": 0.2144, "step": 212770 }, { "epoch": 8.81, "grad_norm": 0.62890625, "learning_rate": 0.0004009329198880865, "loss": 0.1713, "step": 212780 }, { "epoch": 8.81, "grad_norm": 0.91796875, "learning_rate": 0.00040092427409131376, "loss": 0.2052, "step": 212790 }, { "epoch": 8.81, "grad_norm": 0.75390625, "learning_rate": 0.00040091562801051784, "loss": 0.2322, "step": 212800 }, { "epoch": 8.81, "grad_norm": 1.2265625, "learning_rate": 0.00040090698164571494, "loss": 0.2322, "step": 212810 }, { "epoch": 8.81, "grad_norm": 0.7109375, "learning_rate": 0.00040089833499692143, "loss": 0.1815, "step": 212820 }, { "epoch": 8.82, "grad_norm": 0.578125, "learning_rate": 0.0004008896880641536, "loss": 0.1712, "step": 212830 }, { "epoch": 8.82, "grad_norm": 0.34765625, "learning_rate": 0.0004008810408474276, "loss": 0.1985, "step": 212840 }, { "epoch": 8.82, "grad_norm": 1.3203125, "learning_rate": 0.00040087239334675977, "loss": 0.1849, "step": 212850 }, { "epoch": 8.82, "grad_norm": 0.83984375, "learning_rate": 0.0004008637455621664, "loss": 0.1964, "step": 212860 }, { "epoch": 8.82, "grad_norm": 0.7265625, "learning_rate": 0.0004008550974936638, "loss": 0.2078, "step": 212870 }, { "epoch": 8.82, "grad_norm": 1.203125, "learning_rate": 0.00040084644914126824, "loss": 0.1705, "step": 212880 }, { "epoch": 8.82, "grad_norm": 1.09375, "learning_rate": 0.0004008378005049958, "loss": 0.1327, "step": 212890 }, { "epoch": 8.82, "grad_norm": 0.81640625, "learning_rate": 0.000400829151584863, "loss": 0.1954, "step": 212900 }, { "epoch": 8.82, "grad_norm": 0.6640625, "learning_rate": 0.000400820502380886, "loss": 0.1519, "step": 212910 }, { "epoch": 8.82, "grad_norm": 0.55859375, "learning_rate": 0.00040081185289308113, "loss": 0.1682, "step": 212920 }, { "epoch": 8.82, "grad_norm": 1.078125, "learning_rate": 0.00040080320312146467, "loss": 0.249, "step": 212930 }, { "epoch": 8.82, "grad_norm": 0.458984375, "learning_rate": 0.0004007945530660528, "loss": 0.2246, "step": 212940 }, { "epoch": 8.82, "grad_norm": 0.73828125, "learning_rate": 0.00040078590272686187, "loss": 0.1983, "step": 212950 }, { "epoch": 8.82, "grad_norm": 1.1484375, "learning_rate": 0.0004007772521039082, "loss": 0.1929, "step": 212960 }, { "epoch": 8.82, "grad_norm": 0.91015625, "learning_rate": 0.000400768601197208, "loss": 0.1981, "step": 212970 }, { "epoch": 8.82, "grad_norm": 0.84765625, "learning_rate": 0.00040075995000677755, "loss": 0.195, "step": 212980 }, { "epoch": 8.82, "grad_norm": 1.40625, "learning_rate": 0.00040075129853263316, "loss": 0.1959, "step": 212990 }, { "epoch": 8.82, "grad_norm": 0.455078125, "learning_rate": 0.00040074264677479116, "loss": 0.2611, "step": 213000 }, { "epoch": 8.82, "grad_norm": 0.94921875, "learning_rate": 0.0004007339947332678, "loss": 0.2187, "step": 213010 }, { "epoch": 8.82, "grad_norm": 0.373046875, "learning_rate": 0.0004007253424080792, "loss": 0.1869, "step": 213020 }, { "epoch": 8.82, "grad_norm": 1.078125, "learning_rate": 0.0004007166897992419, "loss": 0.2124, "step": 213030 }, { "epoch": 8.82, "grad_norm": 0.81640625, "learning_rate": 0.000400708036906772, "loss": 0.2194, "step": 213040 }, { "epoch": 8.82, "grad_norm": 0.439453125, "learning_rate": 0.00040069938373068586, "loss": 0.1596, "step": 213050 }, { "epoch": 8.82, "grad_norm": 0.76171875, "learning_rate": 0.0004006907302709998, "loss": 0.1921, "step": 213060 }, { "epoch": 8.83, "grad_norm": 0.494140625, "learning_rate": 0.00040068207652773003, "loss": 0.1876, "step": 213070 }, { "epoch": 8.83, "grad_norm": 0.1572265625, "learning_rate": 0.0004006734225008929, "loss": 0.1282, "step": 213080 }, { "epoch": 8.83, "grad_norm": 1.3046875, "learning_rate": 0.00040066476819050455, "loss": 0.2053, "step": 213090 }, { "epoch": 8.83, "grad_norm": 1.1875, "learning_rate": 0.0004006561135965815, "loss": 0.1877, "step": 213100 }, { "epoch": 8.83, "grad_norm": 0.462890625, "learning_rate": 0.0004006474587191399, "loss": 0.1905, "step": 213110 }, { "epoch": 8.83, "grad_norm": 0.53515625, "learning_rate": 0.00040063880355819593, "loss": 0.1871, "step": 213120 }, { "epoch": 8.83, "grad_norm": 2.46875, "learning_rate": 0.00040063014811376606, "loss": 0.2421, "step": 213130 }, { "epoch": 8.83, "grad_norm": 0.9765625, "learning_rate": 0.00040062149238586666, "loss": 0.1955, "step": 213140 }, { "epoch": 8.83, "grad_norm": 1.515625, "learning_rate": 0.00040061283637451376, "loss": 0.1866, "step": 213150 }, { "epoch": 8.83, "grad_norm": 0.6484375, "learning_rate": 0.0004006041800797237, "loss": 0.2222, "step": 213160 }, { "epoch": 8.83, "grad_norm": 0.66015625, "learning_rate": 0.000400595523501513, "loss": 0.1882, "step": 213170 }, { "epoch": 8.83, "grad_norm": 0.44140625, "learning_rate": 0.0004005868666398976, "loss": 0.171, "step": 213180 }, { "epoch": 8.83, "grad_norm": 1.4765625, "learning_rate": 0.0004005782094948941, "loss": 0.1829, "step": 213190 }, { "epoch": 8.83, "grad_norm": 2.328125, "learning_rate": 0.00040056955206651856, "loss": 0.2254, "step": 213200 }, { "epoch": 8.83, "grad_norm": 1.2578125, "learning_rate": 0.0004005608943547875, "loss": 0.1913, "step": 213210 }, { "epoch": 8.83, "grad_norm": 1.390625, "learning_rate": 0.0004005522363597171, "loss": 0.2175, "step": 213220 }, { "epoch": 8.83, "grad_norm": 0.197265625, "learning_rate": 0.0004005435780813236, "loss": 0.2105, "step": 213230 }, { "epoch": 8.83, "grad_norm": 1.375, "learning_rate": 0.0004005349195196234, "loss": 0.2239, "step": 213240 }, { "epoch": 8.83, "grad_norm": 1.3828125, "learning_rate": 0.00040052626067463266, "loss": 0.1879, "step": 213250 }, { "epoch": 8.83, "grad_norm": 0.3984375, "learning_rate": 0.00040051760154636776, "loss": 0.1653, "step": 213260 }, { "epoch": 8.83, "grad_norm": 0.41796875, "learning_rate": 0.000400508942134845, "loss": 0.1911, "step": 213270 }, { "epoch": 8.83, "grad_norm": 0.578125, "learning_rate": 0.0004005002824400807, "loss": 0.1404, "step": 213280 }, { "epoch": 8.83, "grad_norm": 0.9765625, "learning_rate": 0.0004004916224620911, "loss": 0.1624, "step": 213290 }, { "epoch": 8.83, "grad_norm": 0.404296875, "learning_rate": 0.00040048296220089255, "loss": 0.1641, "step": 213300 }, { "epoch": 8.84, "grad_norm": 1.1796875, "learning_rate": 0.0004004743016565012, "loss": 0.1886, "step": 213310 }, { "epoch": 8.84, "grad_norm": 0.640625, "learning_rate": 0.0004004656408289336, "loss": 0.1798, "step": 213320 }, { "epoch": 8.84, "grad_norm": 0.63671875, "learning_rate": 0.0004004569797182058, "loss": 0.1738, "step": 213330 }, { "epoch": 8.84, "grad_norm": 0.62890625, "learning_rate": 0.0004004483183243343, "loss": 0.1715, "step": 213340 }, { "epoch": 8.84, "grad_norm": 0.5625, "learning_rate": 0.0004004396566473353, "loss": 0.1794, "step": 213350 }, { "epoch": 8.84, "grad_norm": 0.9140625, "learning_rate": 0.0004004309946872251, "loss": 0.1977, "step": 213360 }, { "epoch": 8.84, "grad_norm": 0.7265625, "learning_rate": 0.00040042233244402006, "loss": 0.2054, "step": 213370 }, { "epoch": 8.84, "grad_norm": 0.9375, "learning_rate": 0.0004004136699177363, "loss": 0.1521, "step": 213380 }, { "epoch": 8.84, "grad_norm": 1.25, "learning_rate": 0.0004004050071083903, "loss": 0.1971, "step": 213390 }, { "epoch": 8.84, "grad_norm": 0.91015625, "learning_rate": 0.00040039634401599843, "loss": 0.1399, "step": 213400 }, { "epoch": 8.84, "grad_norm": 0.33203125, "learning_rate": 0.00040038768064057676, "loss": 0.2016, "step": 213410 }, { "epoch": 8.84, "grad_norm": 1.0625, "learning_rate": 0.00040037901698214184, "loss": 0.2371, "step": 213420 }, { "epoch": 8.84, "grad_norm": 0.5546875, "learning_rate": 0.0004003703530407098, "loss": 0.2059, "step": 213430 }, { "epoch": 8.84, "grad_norm": 0.466796875, "learning_rate": 0.0004003616888162969, "loss": 0.2103, "step": 213440 }, { "epoch": 8.84, "grad_norm": 0.5234375, "learning_rate": 0.0004003530243089196, "loss": 0.2134, "step": 213450 }, { "epoch": 8.84, "grad_norm": 0.61328125, "learning_rate": 0.0004003443595185942, "loss": 0.2009, "step": 213460 }, { "epoch": 8.84, "grad_norm": 1.6015625, "learning_rate": 0.0004003356944453369, "loss": 0.1633, "step": 213470 }, { "epoch": 8.84, "grad_norm": 0.9921875, "learning_rate": 0.0004003270290891641, "loss": 0.1801, "step": 213480 }, { "epoch": 8.84, "grad_norm": 1.5390625, "learning_rate": 0.000400318363450092, "loss": 0.1905, "step": 213490 }, { "epoch": 8.84, "grad_norm": 0.283203125, "learning_rate": 0.0004003096975281369, "loss": 0.204, "step": 213500 }, { "epoch": 8.84, "grad_norm": 0.93359375, "learning_rate": 0.0004003010313233154, "loss": 0.1746, "step": 213510 }, { "epoch": 8.84, "grad_norm": 1.09375, "learning_rate": 0.0004002923648356435, "loss": 0.2062, "step": 213520 }, { "epoch": 8.84, "grad_norm": 0.66015625, "learning_rate": 0.0004002836980651375, "loss": 0.1843, "step": 213530 }, { "epoch": 8.84, "grad_norm": 0.3359375, "learning_rate": 0.00040027503101181384, "loss": 0.2149, "step": 213540 }, { "epoch": 8.85, "grad_norm": 0.765625, "learning_rate": 0.00040026636367568893, "loss": 0.1752, "step": 213550 }, { "epoch": 8.85, "grad_norm": 0.6875, "learning_rate": 0.00040025769605677884, "loss": 0.1923, "step": 213560 }, { "epoch": 8.85, "grad_norm": 0.65234375, "learning_rate": 0.0004002490281551, "loss": 0.1815, "step": 213570 }, { "epoch": 8.85, "grad_norm": 0.828125, "learning_rate": 0.0004002403599706688, "loss": 0.1952, "step": 213580 }, { "epoch": 8.85, "grad_norm": 0.64453125, "learning_rate": 0.00040023169150350135, "loss": 0.2016, "step": 213590 }, { "epoch": 8.85, "grad_norm": 0.58203125, "learning_rate": 0.0004002230227536141, "loss": 0.1804, "step": 213600 }, { "epoch": 8.85, "grad_norm": 0.63671875, "learning_rate": 0.00040021435372102337, "loss": 0.287, "step": 213610 }, { "epoch": 8.85, "grad_norm": 1.8515625, "learning_rate": 0.00040020568440574545, "loss": 0.1914, "step": 213620 }, { "epoch": 8.85, "grad_norm": 1.1015625, "learning_rate": 0.0004001970148077967, "loss": 0.1512, "step": 213630 }, { "epoch": 8.85, "grad_norm": 0.95703125, "learning_rate": 0.00040018834492719336, "loss": 0.2406, "step": 213640 }, { "epoch": 8.85, "grad_norm": 0.75390625, "learning_rate": 0.00040017967476395165, "loss": 0.2551, "step": 213650 }, { "epoch": 8.85, "grad_norm": 0.0, "learning_rate": 0.00040017100431808815, "loss": 0.2352, "step": 213660 }, { "epoch": 8.85, "grad_norm": 1.0625, "learning_rate": 0.000400162333589619, "loss": 0.2349, "step": 213670 }, { "epoch": 8.85, "grad_norm": 0.400390625, "learning_rate": 0.0004001536625785606, "loss": 0.2182, "step": 213680 }, { "epoch": 8.85, "grad_norm": 0.41796875, "learning_rate": 0.00040014499128492915, "loss": 0.2367, "step": 213690 }, { "epoch": 8.85, "grad_norm": 0.98046875, "learning_rate": 0.000400136319708741, "loss": 0.2228, "step": 213700 }, { "epoch": 8.85, "grad_norm": 0.7265625, "learning_rate": 0.00040012764785001264, "loss": 0.1621, "step": 213710 }, { "epoch": 8.85, "grad_norm": 0.87890625, "learning_rate": 0.00040011897570876024, "loss": 0.142, "step": 213720 }, { "epoch": 8.85, "grad_norm": 1.703125, "learning_rate": 0.00040011030328500005, "loss": 0.2122, "step": 213730 }, { "epoch": 8.85, "grad_norm": 1.3046875, "learning_rate": 0.00040010163057874854, "loss": 0.1833, "step": 213740 }, { "epoch": 8.85, "grad_norm": 0.36328125, "learning_rate": 0.00040009295759002193, "loss": 0.2142, "step": 213750 }, { "epoch": 8.85, "grad_norm": 0.8984375, "learning_rate": 0.0004000842843188366, "loss": 0.1798, "step": 213760 }, { "epoch": 8.85, "grad_norm": 0.5703125, "learning_rate": 0.0004000756107652089, "loss": 0.2445, "step": 213770 }, { "epoch": 8.85, "grad_norm": 0.515625, "learning_rate": 0.00040006693692915506, "loss": 0.1944, "step": 213780 }, { "epoch": 8.86, "grad_norm": 0.458984375, "learning_rate": 0.00040005826281069146, "loss": 0.2036, "step": 213790 }, { "epoch": 8.86, "grad_norm": 0.50390625, "learning_rate": 0.0004000495884098344, "loss": 0.2163, "step": 213800 }, { "epoch": 8.86, "grad_norm": 0.451171875, "learning_rate": 0.00040004091372660024, "loss": 0.1977, "step": 213810 }, { "epoch": 8.86, "grad_norm": 0.224609375, "learning_rate": 0.00040003223876100525, "loss": 0.1966, "step": 213820 }, { "epoch": 8.86, "grad_norm": 1.2890625, "learning_rate": 0.00040002356351306586, "loss": 0.1975, "step": 213830 }, { "epoch": 8.86, "grad_norm": 0.5078125, "learning_rate": 0.00040001488798279826, "loss": 0.1506, "step": 213840 }, { "epoch": 8.86, "grad_norm": 0.578125, "learning_rate": 0.00040000621217021895, "loss": 0.1434, "step": 213850 }, { "epoch": 8.86, "grad_norm": 0.578125, "learning_rate": 0.00039999753607534397, "loss": 0.2242, "step": 213860 }, { "epoch": 8.86, "grad_norm": 0.98828125, "learning_rate": 0.0003999888596981899, "loss": 0.2012, "step": 213870 }, { "epoch": 8.86, "grad_norm": 0.74609375, "learning_rate": 0.000399980183038773, "loss": 0.2274, "step": 213880 }, { "epoch": 8.86, "grad_norm": 0.734375, "learning_rate": 0.00039997150609710964, "loss": 0.2064, "step": 213890 }, { "epoch": 8.86, "grad_norm": 0.55078125, "learning_rate": 0.00039996282887321607, "loss": 0.2055, "step": 213900 }, { "epoch": 8.86, "grad_norm": 1.0859375, "learning_rate": 0.0003999541513671087, "loss": 0.1959, "step": 213910 }, { "epoch": 8.86, "grad_norm": 1.40625, "learning_rate": 0.00039994547357880374, "loss": 0.1892, "step": 213920 }, { "epoch": 8.86, "grad_norm": 0.294921875, "learning_rate": 0.00039993679550831757, "loss": 0.1702, "step": 213930 }, { "epoch": 8.86, "grad_norm": 0.6328125, "learning_rate": 0.0003999281171556666, "loss": 0.223, "step": 213940 }, { "epoch": 8.86, "grad_norm": 1.1796875, "learning_rate": 0.000399919438520867, "loss": 0.1916, "step": 213950 }, { "epoch": 8.86, "grad_norm": 0.875, "learning_rate": 0.0003999107596039353, "loss": 0.2119, "step": 213960 }, { "epoch": 8.86, "grad_norm": 0.7109375, "learning_rate": 0.0003999020804048877, "loss": 0.18, "step": 213970 }, { "epoch": 8.86, "grad_norm": 0.20703125, "learning_rate": 0.0003998934009237407, "loss": 0.1412, "step": 213980 }, { "epoch": 8.86, "grad_norm": 0.62109375, "learning_rate": 0.00039988472116051036, "loss": 0.2067, "step": 213990 }, { "epoch": 8.86, "grad_norm": 0.62890625, "learning_rate": 0.00039987604111521325, "loss": 0.1918, "step": 214000 }, { "epoch": 8.86, "grad_norm": 0.859375, "learning_rate": 0.00039986736078786555, "loss": 0.1979, "step": 214010 }, { "epoch": 8.86, "grad_norm": 0.796875, "learning_rate": 0.00039985868017848364, "loss": 0.204, "step": 214020 }, { "epoch": 8.87, "grad_norm": 0.6875, "learning_rate": 0.000399849999287084, "loss": 0.1893, "step": 214030 }, { "epoch": 8.87, "grad_norm": 0.5859375, "learning_rate": 0.00039984131811368276, "loss": 0.1653, "step": 214040 }, { "epoch": 8.87, "grad_norm": 0.419921875, "learning_rate": 0.00039983263665829626, "loss": 0.1882, "step": 214050 }, { "epoch": 8.87, "grad_norm": 0.59765625, "learning_rate": 0.00039982395492094104, "loss": 0.1874, "step": 214060 }, { "epoch": 8.87, "grad_norm": 1.03125, "learning_rate": 0.00039981527290163335, "loss": 0.2001, "step": 214070 }, { "epoch": 8.87, "grad_norm": 0.67578125, "learning_rate": 0.0003998065906003894, "loss": 0.2251, "step": 214080 }, { "epoch": 8.87, "grad_norm": 0.26953125, "learning_rate": 0.0003997979080172256, "loss": 0.1533, "step": 214090 }, { "epoch": 8.87, "grad_norm": 0.55078125, "learning_rate": 0.00039978922515215837, "loss": 0.1866, "step": 214100 }, { "epoch": 8.87, "grad_norm": 0.5234375, "learning_rate": 0.000399780542005204, "loss": 0.1982, "step": 214110 }, { "epoch": 8.87, "grad_norm": 0.451171875, "learning_rate": 0.00039977185857637877, "loss": 0.225, "step": 214120 }, { "epoch": 8.87, "grad_norm": 0.76953125, "learning_rate": 0.00039976317486569915, "loss": 0.1842, "step": 214130 }, { "epoch": 8.87, "grad_norm": 0.302734375, "learning_rate": 0.0003997544908731814, "loss": 0.1725, "step": 214140 }, { "epoch": 8.87, "grad_norm": 0.490234375, "learning_rate": 0.0003997458065988419, "loss": 0.2049, "step": 214150 }, { "epoch": 8.87, "grad_norm": 0.375, "learning_rate": 0.00039973712204269686, "loss": 0.2324, "step": 214160 }, { "epoch": 8.87, "grad_norm": 0.58984375, "learning_rate": 0.0003997284372047627, "loss": 0.2354, "step": 214170 }, { "epoch": 8.87, "grad_norm": 0.703125, "learning_rate": 0.00039971975208505595, "loss": 0.2204, "step": 214180 }, { "epoch": 8.87, "grad_norm": 0.388671875, "learning_rate": 0.0003997110666835927, "loss": 0.2166, "step": 214190 }, { "epoch": 8.87, "grad_norm": 0.703125, "learning_rate": 0.0003997023810003894, "loss": 0.2085, "step": 214200 }, { "epoch": 8.87, "grad_norm": 1.03125, "learning_rate": 0.00039969369503546235, "loss": 0.1838, "step": 214210 }, { "epoch": 8.87, "grad_norm": 1.109375, "learning_rate": 0.000399685008788828, "loss": 0.1857, "step": 214220 }, { "epoch": 8.87, "grad_norm": 0.35546875, "learning_rate": 0.0003996763222605026, "loss": 0.182, "step": 214230 }, { "epoch": 8.87, "grad_norm": 1.4375, "learning_rate": 0.00039966763545050253, "loss": 0.1838, "step": 214240 }, { "epoch": 8.87, "grad_norm": 1.0234375, "learning_rate": 0.0003996589483588441, "loss": 0.202, "step": 214250 }, { "epoch": 8.87, "grad_norm": 1.6796875, "learning_rate": 0.00039965026098554376, "loss": 0.2168, "step": 214260 }, { "epoch": 8.88, "grad_norm": 0.8046875, "learning_rate": 0.00039964157333061774, "loss": 0.2308, "step": 214270 }, { "epoch": 8.88, "grad_norm": 0.8671875, "learning_rate": 0.00039963288539408246, "loss": 0.1758, "step": 214280 }, { "epoch": 8.88, "grad_norm": 0.38671875, "learning_rate": 0.00039962419717595423, "loss": 0.2087, "step": 214290 }, { "epoch": 8.88, "grad_norm": 1.0859375, "learning_rate": 0.0003996155086762494, "loss": 0.2147, "step": 214300 }, { "epoch": 8.88, "grad_norm": 0.474609375, "learning_rate": 0.00039960681989498437, "loss": 0.2379, "step": 214310 }, { "epoch": 8.88, "grad_norm": 1.015625, "learning_rate": 0.0003995981308321754, "loss": 0.1778, "step": 214320 }, { "epoch": 8.88, "grad_norm": 0.275390625, "learning_rate": 0.000399589441487839, "loss": 0.1276, "step": 214330 }, { "epoch": 8.88, "grad_norm": 1.28125, "learning_rate": 0.0003995807518619914, "loss": 0.1841, "step": 214340 }, { "epoch": 8.88, "grad_norm": 0.9375, "learning_rate": 0.00039957206195464893, "loss": 0.1665, "step": 214350 }, { "epoch": 8.88, "grad_norm": 1.109375, "learning_rate": 0.000399563371765828, "loss": 0.2347, "step": 214360 }, { "epoch": 8.88, "grad_norm": 0.734375, "learning_rate": 0.00039955468129554503, "loss": 0.1934, "step": 214370 }, { "epoch": 8.88, "grad_norm": 0.71875, "learning_rate": 0.00039954599054381625, "loss": 0.1886, "step": 214380 }, { "epoch": 8.88, "grad_norm": 0.515625, "learning_rate": 0.0003995372995106581, "loss": 0.2024, "step": 214390 }, { "epoch": 8.88, "grad_norm": 0.5546875, "learning_rate": 0.0003995286081960868, "loss": 0.2614, "step": 214400 }, { "epoch": 8.88, "grad_norm": 0.71484375, "learning_rate": 0.00039951991660011887, "loss": 0.2114, "step": 214410 }, { "epoch": 8.88, "grad_norm": 0.466796875, "learning_rate": 0.0003995112247227706, "loss": 0.2325, "step": 214420 }, { "epoch": 8.88, "grad_norm": 0.88671875, "learning_rate": 0.0003995025325640583, "loss": 0.1538, "step": 214430 }, { "epoch": 8.88, "grad_norm": 0.59765625, "learning_rate": 0.0003994938401239985, "loss": 0.1974, "step": 214440 }, { "epoch": 8.88, "grad_norm": 0.75, "learning_rate": 0.00039948514740260736, "loss": 0.2037, "step": 214450 }, { "epoch": 8.88, "grad_norm": 1.0, "learning_rate": 0.00039947645439990134, "loss": 0.2065, "step": 214460 }, { "epoch": 8.88, "grad_norm": 0.212890625, "learning_rate": 0.0003994677611158968, "loss": 0.2019, "step": 214470 }, { "epoch": 8.88, "grad_norm": 0.69140625, "learning_rate": 0.00039945906755061003, "loss": 0.2542, "step": 214480 }, { "epoch": 8.88, "grad_norm": 0.98828125, "learning_rate": 0.0003994503737040574, "loss": 0.2058, "step": 214490 }, { "epoch": 8.88, "grad_norm": 0.70703125, "learning_rate": 0.00039944167957625535, "loss": 0.1821, "step": 214500 }, { "epoch": 8.88, "grad_norm": 0.482421875, "learning_rate": 0.0003994329851672202, "loss": 0.1441, "step": 214510 }, { "epoch": 8.89, "grad_norm": 0.52734375, "learning_rate": 0.00039942429047696833, "loss": 0.2019, "step": 214520 }, { "epoch": 8.89, "grad_norm": 0.42578125, "learning_rate": 0.000399415595505516, "loss": 0.2411, "step": 214530 }, { "epoch": 8.89, "grad_norm": 0.5078125, "learning_rate": 0.0003994069002528797, "loss": 0.1996, "step": 214540 }, { "epoch": 8.89, "grad_norm": 1.046875, "learning_rate": 0.00039939820471907587, "loss": 0.1588, "step": 214550 }, { "epoch": 8.89, "grad_norm": 0.68359375, "learning_rate": 0.0003993895089041206, "loss": 0.2362, "step": 214560 }, { "epoch": 8.89, "grad_norm": 0.88671875, "learning_rate": 0.0003993808128080304, "loss": 0.2125, "step": 214570 }, { "epoch": 8.89, "grad_norm": 0.82421875, "learning_rate": 0.00039937211643082174, "loss": 0.1936, "step": 214580 }, { "epoch": 8.89, "grad_norm": 1.9453125, "learning_rate": 0.00039936341977251077, "loss": 0.1768, "step": 214590 }, { "epoch": 8.89, "grad_norm": 0.6640625, "learning_rate": 0.0003993547228331141, "loss": 0.1789, "step": 214600 }, { "epoch": 8.89, "grad_norm": 1.1484375, "learning_rate": 0.0003993460256126479, "loss": 0.1864, "step": 214610 }, { "epoch": 8.89, "grad_norm": 0.859375, "learning_rate": 0.0003993373281111286, "loss": 0.2098, "step": 214620 }, { "epoch": 8.89, "grad_norm": 0.466796875, "learning_rate": 0.00039932863032857266, "loss": 0.2092, "step": 214630 }, { "epoch": 8.89, "grad_norm": 0.89453125, "learning_rate": 0.00039931993226499624, "loss": 0.2378, "step": 214640 }, { "epoch": 8.89, "grad_norm": 0.80859375, "learning_rate": 0.00039931123392041593, "loss": 0.2619, "step": 214650 }, { "epoch": 8.89, "grad_norm": 1.0234375, "learning_rate": 0.0003993025352948479, "loss": 0.2255, "step": 214660 }, { "epoch": 8.89, "grad_norm": 0.65625, "learning_rate": 0.00039929383638830876, "loss": 0.1743, "step": 214670 }, { "epoch": 8.89, "grad_norm": 4.46875, "learning_rate": 0.0003992851372008146, "loss": 0.2145, "step": 214680 }, { "epoch": 8.89, "grad_norm": 0.390625, "learning_rate": 0.00039927643773238203, "loss": 0.2004, "step": 214690 }, { "epoch": 8.89, "grad_norm": 0.671875, "learning_rate": 0.0003992677379830273, "loss": 0.1683, "step": 214700 }, { "epoch": 8.89, "grad_norm": 0.98828125, "learning_rate": 0.00039925903795276686, "loss": 0.2105, "step": 214710 }, { "epoch": 8.89, "grad_norm": 1.4140625, "learning_rate": 0.0003992503376416169, "loss": 0.2267, "step": 214720 }, { "epoch": 8.89, "grad_norm": 2.0625, "learning_rate": 0.00039924163704959406, "loss": 0.1944, "step": 214730 }, { "epoch": 8.89, "grad_norm": 0.96875, "learning_rate": 0.00039923293617671445, "loss": 0.2321, "step": 214740 }, { "epoch": 8.89, "grad_norm": 1.0234375, "learning_rate": 0.00039922423502299466, "loss": 0.2123, "step": 214750 }, { "epoch": 8.9, "grad_norm": 0.640625, "learning_rate": 0.0003992155335884509, "loss": 0.2378, "step": 214760 }, { "epoch": 8.9, "grad_norm": 0.77734375, "learning_rate": 0.0003992068318730997, "loss": 0.2302, "step": 214770 }, { "epoch": 8.9, "grad_norm": 0.546875, "learning_rate": 0.00039919812987695725, "loss": 0.1537, "step": 214780 }, { "epoch": 8.9, "grad_norm": 0.6640625, "learning_rate": 0.00039918942760004016, "loss": 0.2259, "step": 214790 }, { "epoch": 8.9, "grad_norm": 0.498046875, "learning_rate": 0.0003991807250423646, "loss": 0.1654, "step": 214800 }, { "epoch": 8.9, "grad_norm": 0.8515625, "learning_rate": 0.00039917202220394706, "loss": 0.1659, "step": 214810 }, { "epoch": 8.9, "grad_norm": 0.51171875, "learning_rate": 0.00039916331908480384, "loss": 0.1732, "step": 214820 }, { "epoch": 8.9, "grad_norm": 0.5703125, "learning_rate": 0.00039915461568495135, "loss": 0.1874, "step": 214830 }, { "epoch": 8.9, "grad_norm": 0.30078125, "learning_rate": 0.0003991459120044061, "loss": 0.1829, "step": 214840 }, { "epoch": 8.9, "grad_norm": 1.3125, "learning_rate": 0.0003991372080431842, "loss": 0.2451, "step": 214850 }, { "epoch": 8.9, "grad_norm": 0.68359375, "learning_rate": 0.0003991285038013023, "loss": 0.2203, "step": 214860 }, { "epoch": 8.9, "grad_norm": 0.8359375, "learning_rate": 0.0003991197992787766, "loss": 0.1954, "step": 214870 }, { "epoch": 8.9, "grad_norm": 0.69140625, "learning_rate": 0.00039911109447562357, "loss": 0.1964, "step": 214880 }, { "epoch": 8.9, "grad_norm": 0.5703125, "learning_rate": 0.00039910238939185955, "loss": 0.2176, "step": 214890 }, { "epoch": 8.9, "grad_norm": 1.6171875, "learning_rate": 0.0003990936840275009, "loss": 0.1533, "step": 214900 }, { "epoch": 8.9, "grad_norm": 1.0390625, "learning_rate": 0.00039908497838256406, "loss": 0.1785, "step": 214910 }, { "epoch": 8.9, "grad_norm": 1.546875, "learning_rate": 0.0003990762724570655, "loss": 0.1893, "step": 214920 }, { "epoch": 8.9, "grad_norm": 0.671875, "learning_rate": 0.0003990675662510213, "loss": 0.1772, "step": 214930 }, { "epoch": 8.9, "grad_norm": 0.306640625, "learning_rate": 0.0003990588597644481, "loss": 0.199, "step": 214940 }, { "epoch": 8.9, "grad_norm": 0.263671875, "learning_rate": 0.0003990501529973623, "loss": 0.2466, "step": 214950 }, { "epoch": 8.9, "grad_norm": 0.5390625, "learning_rate": 0.00039904144594978013, "loss": 0.1754, "step": 214960 }, { "epoch": 8.9, "grad_norm": 0.8125, "learning_rate": 0.0003990327386217181, "loss": 0.1551, "step": 214970 }, { "epoch": 8.9, "grad_norm": 0.64453125, "learning_rate": 0.00039902403101319255, "loss": 0.1923, "step": 214980 }, { "epoch": 8.9, "grad_norm": 1.515625, "learning_rate": 0.00039901532312421983, "loss": 0.1894, "step": 214990 }, { "epoch": 8.91, "grad_norm": 0.70703125, "learning_rate": 0.0003990066149548164, "loss": 0.1868, "step": 215000 }, { "epoch": 8.91, "grad_norm": 0.9921875, "learning_rate": 0.00039899790650499856, "loss": 0.1632, "step": 215010 }, { "epoch": 8.91, "grad_norm": 1.4140625, "learning_rate": 0.0003989891977747828, "loss": 0.1839, "step": 215020 }, { "epoch": 8.91, "grad_norm": 0.7265625, "learning_rate": 0.0003989804887641855, "loss": 0.198, "step": 215030 }, { "epoch": 8.91, "grad_norm": 0.44140625, "learning_rate": 0.0003989717794732229, "loss": 0.1813, "step": 215040 }, { "epoch": 8.91, "grad_norm": 0.78125, "learning_rate": 0.0003989630699019116, "loss": 0.2094, "step": 215050 }, { "epoch": 8.91, "grad_norm": 0.96875, "learning_rate": 0.00039895436005026784, "loss": 0.2118, "step": 215060 }, { "epoch": 8.91, "grad_norm": 0.6953125, "learning_rate": 0.0003989456499183081, "loss": 0.2204, "step": 215070 }, { "epoch": 8.91, "grad_norm": 0.99609375, "learning_rate": 0.0003989369395060487, "loss": 0.1879, "step": 215080 }, { "epoch": 8.91, "grad_norm": 0.5078125, "learning_rate": 0.000398928228813506, "loss": 0.226, "step": 215090 }, { "epoch": 8.91, "grad_norm": 0.96484375, "learning_rate": 0.0003989195178406965, "loss": 0.2411, "step": 215100 }, { "epoch": 8.91, "grad_norm": 0.5234375, "learning_rate": 0.00039891080658763657, "loss": 0.1885, "step": 215110 }, { "epoch": 8.91, "grad_norm": 0.8984375, "learning_rate": 0.0003989020950543426, "loss": 0.1758, "step": 215120 }, { "epoch": 8.91, "grad_norm": 0.58203125, "learning_rate": 0.00039889338324083093, "loss": 0.2078, "step": 215130 }, { "epoch": 8.91, "grad_norm": 0.89453125, "learning_rate": 0.00039888467114711804, "loss": 0.1899, "step": 215140 }, { "epoch": 8.91, "grad_norm": 0.85546875, "learning_rate": 0.00039887595877322025, "loss": 0.1715, "step": 215150 }, { "epoch": 8.91, "grad_norm": 0.6640625, "learning_rate": 0.00039886724611915393, "loss": 0.1635, "step": 215160 }, { "epoch": 8.91, "grad_norm": 0.28515625, "learning_rate": 0.0003988585331849356, "loss": 0.1866, "step": 215170 }, { "epoch": 8.91, "grad_norm": 0.6875, "learning_rate": 0.00039884981997058156, "loss": 0.2322, "step": 215180 }, { "epoch": 8.91, "grad_norm": 0.462890625, "learning_rate": 0.00039884110647610824, "loss": 0.1743, "step": 215190 }, { "epoch": 8.91, "grad_norm": 0.8046875, "learning_rate": 0.00039883239270153205, "loss": 0.2268, "step": 215200 }, { "epoch": 8.91, "grad_norm": 0.53515625, "learning_rate": 0.0003988236786468693, "loss": 0.2185, "step": 215210 }, { "epoch": 8.91, "grad_norm": 0.498046875, "learning_rate": 0.0003988149643121365, "loss": 0.2384, "step": 215220 }, { "epoch": 8.91, "grad_norm": 0.66015625, "learning_rate": 0.00039880624969735, "loss": 0.2069, "step": 215230 }, { "epoch": 8.92, "grad_norm": 0.333984375, "learning_rate": 0.0003987975348025262, "loss": 0.229, "step": 215240 }, { "epoch": 8.92, "grad_norm": 0.55859375, "learning_rate": 0.00039878881962768155, "loss": 0.1938, "step": 215250 }, { "epoch": 8.92, "grad_norm": 0.5, "learning_rate": 0.0003987801041728324, "loss": 0.1911, "step": 215260 }, { "epoch": 8.92, "grad_norm": 0.46484375, "learning_rate": 0.0003987713884379951, "loss": 0.1744, "step": 215270 }, { "epoch": 8.92, "grad_norm": 1.0703125, "learning_rate": 0.0003987626724231862, "loss": 0.1997, "step": 215280 }, { "epoch": 8.92, "grad_norm": 0.8515625, "learning_rate": 0.00039875395612842205, "loss": 0.1967, "step": 215290 }, { "epoch": 8.92, "grad_norm": 1.1796875, "learning_rate": 0.00039874523955371887, "loss": 0.1693, "step": 215300 }, { "epoch": 8.92, "grad_norm": 0.3203125, "learning_rate": 0.00039873652269909333, "loss": 0.1968, "step": 215310 }, { "epoch": 8.92, "grad_norm": 0.396484375, "learning_rate": 0.00039872780556456165, "loss": 0.1232, "step": 215320 }, { "epoch": 8.92, "grad_norm": 0.2490234375, "learning_rate": 0.0003987190881501403, "loss": 0.1911, "step": 215330 }, { "epoch": 8.92, "grad_norm": 0.95703125, "learning_rate": 0.00039871037045584567, "loss": 0.1886, "step": 215340 }, { "epoch": 8.92, "grad_norm": 0.34375, "learning_rate": 0.0003987016524816943, "loss": 0.1674, "step": 215350 }, { "epoch": 8.92, "grad_norm": 0.6953125, "learning_rate": 0.0003986929342277024, "loss": 0.2256, "step": 215360 }, { "epoch": 8.92, "grad_norm": 0.45703125, "learning_rate": 0.0003986842156938865, "loss": 0.1823, "step": 215370 }, { "epoch": 8.92, "grad_norm": 0.296875, "learning_rate": 0.0003986754968802628, "loss": 0.2426, "step": 215380 }, { "epoch": 8.92, "grad_norm": 1.3828125, "learning_rate": 0.000398666777786848, "loss": 0.1434, "step": 215390 }, { "epoch": 8.92, "grad_norm": 0.83203125, "learning_rate": 0.0003986580584136584, "loss": 0.1983, "step": 215400 }, { "epoch": 8.92, "grad_norm": 0.56640625, "learning_rate": 0.00039864933876071034, "loss": 0.1792, "step": 215410 }, { "epoch": 8.92, "grad_norm": 0.494140625, "learning_rate": 0.00039864061882802026, "loss": 0.2179, "step": 215420 }, { "epoch": 8.92, "grad_norm": 0.6875, "learning_rate": 0.00039863189861560466, "loss": 0.2445, "step": 215430 }, { "epoch": 8.92, "grad_norm": 1.125, "learning_rate": 0.0003986231781234798, "loss": 0.2128, "step": 215440 }, { "epoch": 8.92, "grad_norm": 0.51171875, "learning_rate": 0.00039861445735166223, "loss": 0.1991, "step": 215450 }, { "epoch": 8.92, "grad_norm": 0.80859375, "learning_rate": 0.00039860573630016825, "loss": 0.1715, "step": 215460 }, { "epoch": 8.92, "grad_norm": 0.61328125, "learning_rate": 0.0003985970149690144, "loss": 0.2066, "step": 215470 }, { "epoch": 8.93, "grad_norm": 0.94921875, "learning_rate": 0.00039858829335821696, "loss": 0.2, "step": 215480 }, { "epoch": 8.93, "grad_norm": 1.1796875, "learning_rate": 0.0003985795714677924, "loss": 0.1456, "step": 215490 }, { "epoch": 8.93, "grad_norm": 0.75390625, "learning_rate": 0.0003985708492977571, "loss": 0.178, "step": 215500 }, { "epoch": 8.93, "grad_norm": 0.59765625, "learning_rate": 0.0003985621268481275, "loss": 0.2709, "step": 215510 }, { "epoch": 8.93, "grad_norm": 0.6796875, "learning_rate": 0.00039855340411892014, "loss": 0.2361, "step": 215520 }, { "epoch": 8.93, "grad_norm": 1.0078125, "learning_rate": 0.0003985446811101512, "loss": 0.2305, "step": 215530 }, { "epoch": 8.93, "grad_norm": 1.359375, "learning_rate": 0.00039853595782183724, "loss": 0.1907, "step": 215540 }, { "epoch": 8.93, "grad_norm": 0.447265625, "learning_rate": 0.00039852723425399476, "loss": 0.1738, "step": 215550 }, { "epoch": 8.93, "grad_norm": 0.515625, "learning_rate": 0.0003985185104066399, "loss": 0.1761, "step": 215560 }, { "epoch": 8.93, "grad_norm": 0.6640625, "learning_rate": 0.0003985097862797893, "loss": 0.1916, "step": 215570 }, { "epoch": 8.93, "grad_norm": 0.72265625, "learning_rate": 0.00039850106187345937, "loss": 0.2029, "step": 215580 }, { "epoch": 8.93, "grad_norm": 0.63671875, "learning_rate": 0.00039849233718766637, "loss": 0.2046, "step": 215590 }, { "epoch": 8.93, "grad_norm": 0.8984375, "learning_rate": 0.00039848361222242693, "loss": 0.1884, "step": 215600 }, { "epoch": 8.93, "grad_norm": 1.3359375, "learning_rate": 0.00039847488697775725, "loss": 0.213, "step": 215610 }, { "epoch": 8.93, "grad_norm": 0.82421875, "learning_rate": 0.000398466161453674, "loss": 0.2239, "step": 215620 }, { "epoch": 8.93, "grad_norm": 0.5546875, "learning_rate": 0.00039845743565019345, "loss": 0.2039, "step": 215630 }, { "epoch": 8.93, "grad_norm": 0.76171875, "learning_rate": 0.0003984487095673319, "loss": 0.1458, "step": 215640 }, { "epoch": 8.93, "grad_norm": 0.29296875, "learning_rate": 0.000398439983205106, "loss": 0.136, "step": 215650 }, { "epoch": 8.93, "grad_norm": 0.96484375, "learning_rate": 0.0003984312565635321, "loss": 0.1705, "step": 215660 }, { "epoch": 8.93, "grad_norm": 1.046875, "learning_rate": 0.0003984225296426266, "loss": 0.2092, "step": 215670 }, { "epoch": 8.93, "grad_norm": 1.28125, "learning_rate": 0.00039841380244240595, "loss": 0.1968, "step": 215680 }, { "epoch": 8.93, "grad_norm": 0.55859375, "learning_rate": 0.0003984050749628865, "loss": 0.1877, "step": 215690 }, { "epoch": 8.93, "grad_norm": 2.015625, "learning_rate": 0.00039839634720408474, "loss": 0.1858, "step": 215700 }, { "epoch": 8.93, "grad_norm": 0.80078125, "learning_rate": 0.00039838761916601706, "loss": 0.1965, "step": 215710 }, { "epoch": 8.94, "grad_norm": 1.5078125, "learning_rate": 0.00039837889084869994, "loss": 0.2094, "step": 215720 }, { "epoch": 8.94, "grad_norm": 0.88671875, "learning_rate": 0.0003983701622521498, "loss": 0.1847, "step": 215730 }, { "epoch": 8.94, "grad_norm": 0.4765625, "learning_rate": 0.000398361433376383, "loss": 0.1798, "step": 215740 }, { "epoch": 8.94, "grad_norm": 0.95703125, "learning_rate": 0.00039835270422141603, "loss": 0.2314, "step": 215750 }, { "epoch": 8.94, "grad_norm": 1.0078125, "learning_rate": 0.00039834397478726523, "loss": 0.1933, "step": 215760 }, { "epoch": 8.94, "grad_norm": 1.0859375, "learning_rate": 0.00039833524507394707, "loss": 0.2377, "step": 215770 }, { "epoch": 8.94, "grad_norm": 0.59375, "learning_rate": 0.00039832651508147813, "loss": 0.2392, "step": 215780 }, { "epoch": 8.94, "grad_norm": 0.24609375, "learning_rate": 0.00039831778480987456, "loss": 0.1399, "step": 215790 }, { "epoch": 8.94, "grad_norm": 0.5234375, "learning_rate": 0.00039830905425915306, "loss": 0.2317, "step": 215800 }, { "epoch": 8.94, "grad_norm": 0.34375, "learning_rate": 0.00039830032342932985, "loss": 0.2155, "step": 215810 }, { "epoch": 8.94, "grad_norm": 1.015625, "learning_rate": 0.00039829159232042144, "loss": 0.1984, "step": 215820 }, { "epoch": 8.94, "grad_norm": 0.796875, "learning_rate": 0.0003982828609324444, "loss": 0.1829, "step": 215830 }, { "epoch": 8.94, "grad_norm": 0.00390625, "learning_rate": 0.0003982741292654149, "loss": 0.1822, "step": 215840 }, { "epoch": 8.94, "grad_norm": 0.87109375, "learning_rate": 0.00039826539731934954, "loss": 0.2003, "step": 215850 }, { "epoch": 8.94, "grad_norm": 0.416015625, "learning_rate": 0.00039825666509426465, "loss": 0.1804, "step": 215860 }, { "epoch": 8.94, "grad_norm": 0.0, "learning_rate": 0.00039824793259017675, "loss": 0.2059, "step": 215870 }, { "epoch": 8.94, "grad_norm": 0.408203125, "learning_rate": 0.00039823919980710233, "loss": 0.1815, "step": 215880 }, { "epoch": 8.94, "grad_norm": 0.58984375, "learning_rate": 0.0003982304667450577, "loss": 0.1604, "step": 215890 }, { "epoch": 8.94, "grad_norm": 0.92578125, "learning_rate": 0.0003982217334040593, "loss": 0.1814, "step": 215900 }, { "epoch": 8.94, "grad_norm": 0.66796875, "learning_rate": 0.00039821299978412366, "loss": 0.1741, "step": 215910 }, { "epoch": 8.94, "grad_norm": 0.80078125, "learning_rate": 0.00039820426588526713, "loss": 0.1852, "step": 215920 }, { "epoch": 8.94, "grad_norm": 0.64453125, "learning_rate": 0.0003981955317075062, "loss": 0.2165, "step": 215930 }, { "epoch": 8.94, "grad_norm": 0.640625, "learning_rate": 0.00039818679725085726, "loss": 0.187, "step": 215940 }, { "epoch": 8.94, "grad_norm": 0.69140625, "learning_rate": 0.00039817806251533683, "loss": 0.1869, "step": 215950 }, { "epoch": 8.95, "grad_norm": 1.0078125, "learning_rate": 0.0003981693275009612, "loss": 0.2033, "step": 215960 }, { "epoch": 8.95, "grad_norm": 0.8828125, "learning_rate": 0.000398160592207747, "loss": 0.1996, "step": 215970 }, { "epoch": 8.95, "grad_norm": 1.1015625, "learning_rate": 0.00039815185663571046, "loss": 0.2083, "step": 215980 }, { "epoch": 8.95, "grad_norm": 1.3125, "learning_rate": 0.00039814312078486816, "loss": 0.182, "step": 215990 }, { "epoch": 8.95, "grad_norm": 0.875, "learning_rate": 0.00039813438465523656, "loss": 0.2179, "step": 216000 }, { "epoch": 8.95, "grad_norm": 0.69921875, "learning_rate": 0.00039812564824683196, "loss": 0.2127, "step": 216010 }, { "epoch": 8.95, "grad_norm": 1.0234375, "learning_rate": 0.0003981169115596709, "loss": 0.199, "step": 216020 }, { "epoch": 8.95, "grad_norm": 0.0, "learning_rate": 0.0003981081745937698, "loss": 0.2209, "step": 216030 }, { "epoch": 8.95, "grad_norm": 0.68359375, "learning_rate": 0.0003980994373491452, "loss": 0.2164, "step": 216040 }, { "epoch": 8.95, "grad_norm": 1.2421875, "learning_rate": 0.0003980906998258134, "loss": 0.1792, "step": 216050 }, { "epoch": 8.95, "grad_norm": 0.83203125, "learning_rate": 0.00039808196202379087, "loss": 0.179, "step": 216060 }, { "epoch": 8.95, "grad_norm": 0.61328125, "learning_rate": 0.0003980732239430941, "loss": 0.2132, "step": 216070 }, { "epoch": 8.95, "grad_norm": 0.82421875, "learning_rate": 0.0003980644855837395, "loss": 0.1748, "step": 216080 }, { "epoch": 8.95, "grad_norm": 0.58203125, "learning_rate": 0.00039805574694574356, "loss": 0.1568, "step": 216090 }, { "epoch": 8.95, "grad_norm": 1.453125, "learning_rate": 0.0003980470080291226, "loss": 0.2007, "step": 216100 }, { "epoch": 8.95, "grad_norm": 1.09375, "learning_rate": 0.00039803826883389327, "loss": 0.1794, "step": 216110 }, { "epoch": 8.95, "grad_norm": 0.57421875, "learning_rate": 0.0003980295293600719, "loss": 0.2081, "step": 216120 }, { "epoch": 8.95, "grad_norm": 0.59375, "learning_rate": 0.00039802078960767483, "loss": 0.2017, "step": 216130 }, { "epoch": 8.95, "grad_norm": 2.8125, "learning_rate": 0.0003980120495767187, "loss": 0.1944, "step": 216140 }, { "epoch": 8.95, "grad_norm": 1.40625, "learning_rate": 0.00039800330926721987, "loss": 0.2325, "step": 216150 }, { "epoch": 8.95, "grad_norm": 1.3515625, "learning_rate": 0.0003979945686791948, "loss": 0.1931, "step": 216160 }, { "epoch": 8.95, "grad_norm": 1.0, "learning_rate": 0.0003979858278126599, "loss": 0.178, "step": 216170 }, { "epoch": 8.95, "grad_norm": 4.625, "learning_rate": 0.00039797708666763177, "loss": 0.2305, "step": 216180 }, { "epoch": 8.95, "grad_norm": 0.58984375, "learning_rate": 0.0003979683452441266, "loss": 0.1755, "step": 216190 }, { "epoch": 8.95, "grad_norm": 0.75, "learning_rate": 0.00039795960354216105, "loss": 0.2235, "step": 216200 }, { "epoch": 8.96, "grad_norm": 0.453125, "learning_rate": 0.0003979508615617515, "loss": 0.2072, "step": 216210 }, { "epoch": 8.96, "grad_norm": 0.4375, "learning_rate": 0.00039794211930291437, "loss": 0.1633, "step": 216220 }, { "epoch": 8.96, "grad_norm": 0.78515625, "learning_rate": 0.0003979333767656662, "loss": 0.2, "step": 216230 }, { "epoch": 8.96, "grad_norm": 1.0, "learning_rate": 0.0003979246339500233, "loss": 0.2209, "step": 216240 }, { "epoch": 8.96, "grad_norm": 0.75390625, "learning_rate": 0.00039791589085600234, "loss": 0.1986, "step": 216250 }, { "epoch": 8.96, "grad_norm": 1.0546875, "learning_rate": 0.0003979071474836196, "loss": 0.1711, "step": 216260 }, { "epoch": 8.96, "grad_norm": 0.53515625, "learning_rate": 0.00039789840383289154, "loss": 0.1973, "step": 216270 }, { "epoch": 8.96, "grad_norm": 0.275390625, "learning_rate": 0.00039788965990383477, "loss": 0.1822, "step": 216280 }, { "epoch": 8.96, "grad_norm": 0.47265625, "learning_rate": 0.0003978809156964655, "loss": 0.2343, "step": 216290 }, { "epoch": 8.96, "grad_norm": 0.55859375, "learning_rate": 0.0003978721712108003, "loss": 0.1733, "step": 216300 }, { "epoch": 8.96, "grad_norm": 0.578125, "learning_rate": 0.00039786342644685573, "loss": 0.193, "step": 216310 }, { "epoch": 8.96, "grad_norm": 0.228515625, "learning_rate": 0.0003978546814046481, "loss": 0.1634, "step": 216320 }, { "epoch": 8.96, "grad_norm": 0.5234375, "learning_rate": 0.000397845936084194, "loss": 0.1493, "step": 216330 }, { "epoch": 8.96, "grad_norm": 0.8203125, "learning_rate": 0.0003978371904855098, "loss": 0.2233, "step": 216340 }, { "epoch": 8.96, "grad_norm": 0.578125, "learning_rate": 0.0003978284446086119, "loss": 0.1826, "step": 216350 }, { "epoch": 8.96, "grad_norm": 0.9453125, "learning_rate": 0.0003978196984535169, "loss": 0.236, "step": 216360 }, { "epoch": 8.96, "grad_norm": 0.333984375, "learning_rate": 0.00039781095202024114, "loss": 0.1784, "step": 216370 }, { "epoch": 8.96, "grad_norm": 0.9140625, "learning_rate": 0.00039780220530880115, "loss": 0.2051, "step": 216380 }, { "epoch": 8.96, "grad_norm": 0.1982421875, "learning_rate": 0.00039779345831921344, "loss": 0.1807, "step": 216390 }, { "epoch": 8.96, "grad_norm": 1.171875, "learning_rate": 0.00039778471105149425, "loss": 0.2058, "step": 216400 }, { "epoch": 8.96, "grad_norm": 0.5625, "learning_rate": 0.00039777596350566035, "loss": 0.2397, "step": 216410 }, { "epoch": 8.96, "grad_norm": 1.4609375, "learning_rate": 0.000397767215681728, "loss": 0.2173, "step": 216420 }, { "epoch": 8.96, "grad_norm": 1.015625, "learning_rate": 0.00039775846757971367, "loss": 0.2335, "step": 216430 }, { "epoch": 8.96, "grad_norm": 0.8125, "learning_rate": 0.00039774971919963385, "loss": 0.2769, "step": 216440 }, { "epoch": 8.97, "grad_norm": 0.5078125, "learning_rate": 0.000397740970541505, "loss": 0.2105, "step": 216450 }, { "epoch": 8.97, "grad_norm": 0.43359375, "learning_rate": 0.0003977322216053437, "loss": 0.2381, "step": 216460 }, { "epoch": 8.97, "grad_norm": 0.83984375, "learning_rate": 0.0003977234723911662, "loss": 0.1657, "step": 216470 }, { "epoch": 8.97, "grad_norm": 0.55859375, "learning_rate": 0.0003977147228989891, "loss": 0.2499, "step": 216480 }, { "epoch": 8.97, "grad_norm": 0.64453125, "learning_rate": 0.00039770597312882897, "loss": 0.2015, "step": 216490 }, { "epoch": 8.97, "grad_norm": 1.0625, "learning_rate": 0.00039769722308070205, "loss": 0.2275, "step": 216500 }, { "epoch": 8.97, "grad_norm": 1.15625, "learning_rate": 0.00039768847275462493, "loss": 0.2201, "step": 216510 }, { "epoch": 8.97, "grad_norm": 0.82421875, "learning_rate": 0.000397679722150614, "loss": 0.2532, "step": 216520 }, { "epoch": 8.97, "grad_norm": 1.359375, "learning_rate": 0.0003976709712686858, "loss": 0.1683, "step": 216530 }, { "epoch": 8.97, "grad_norm": 0.84375, "learning_rate": 0.0003976622201088568, "loss": 0.1966, "step": 216540 }, { "epoch": 8.97, "grad_norm": 0.6484375, "learning_rate": 0.0003976534686711435, "loss": 0.1644, "step": 216550 }, { "epoch": 8.97, "grad_norm": 1.0390625, "learning_rate": 0.0003976447169555622, "loss": 0.1994, "step": 216560 }, { "epoch": 8.97, "grad_norm": 0.7265625, "learning_rate": 0.00039763596496212954, "loss": 0.191, "step": 216570 }, { "epoch": 8.97, "grad_norm": 0.65625, "learning_rate": 0.00039762721269086195, "loss": 0.265, "step": 216580 }, { "epoch": 8.97, "grad_norm": 0.671875, "learning_rate": 0.0003976184601417759, "loss": 0.1834, "step": 216590 }, { "epoch": 8.97, "grad_norm": 0.80078125, "learning_rate": 0.0003976097073148878, "loss": 0.1756, "step": 216600 }, { "epoch": 8.97, "grad_norm": 0.390625, "learning_rate": 0.00039760095421021417, "loss": 0.1915, "step": 216610 }, { "epoch": 8.97, "grad_norm": 0.76171875, "learning_rate": 0.0003975922008277715, "loss": 0.188, "step": 216620 }, { "epoch": 8.97, "grad_norm": 1.46875, "learning_rate": 0.0003975834471675763, "loss": 0.2033, "step": 216630 }, { "epoch": 8.97, "grad_norm": 0.87890625, "learning_rate": 0.00039757469322964495, "loss": 0.2415, "step": 216640 }, { "epoch": 8.97, "grad_norm": 0.83984375, "learning_rate": 0.00039756593901399395, "loss": 0.2084, "step": 216650 }, { "epoch": 8.97, "grad_norm": 2.125, "learning_rate": 0.0003975571845206398, "loss": 0.1774, "step": 216660 }, { "epoch": 8.97, "grad_norm": 0.58984375, "learning_rate": 0.00039754842974959903, "loss": 0.1767, "step": 216670 }, { "epoch": 8.97, "grad_norm": 0.326171875, "learning_rate": 0.00039753967470088796, "loss": 0.201, "step": 216680 }, { "epoch": 8.98, "grad_norm": 0.453125, "learning_rate": 0.0003975309193745231, "loss": 0.2375, "step": 216690 }, { "epoch": 8.98, "grad_norm": 0.50390625, "learning_rate": 0.00039752216377052116, "loss": 0.2234, "step": 216700 }, { "epoch": 8.98, "grad_norm": 0.88671875, "learning_rate": 0.00039751340788889833, "loss": 0.2154, "step": 216710 }, { "epoch": 8.98, "grad_norm": 0.0, "learning_rate": 0.00039750465172967123, "loss": 0.1795, "step": 216720 }, { "epoch": 8.98, "grad_norm": 0.98828125, "learning_rate": 0.0003974958952928562, "loss": 0.1937, "step": 216730 }, { "epoch": 8.98, "grad_norm": 0.64453125, "learning_rate": 0.00039748713857846996, "loss": 0.2079, "step": 216740 }, { "epoch": 8.98, "grad_norm": 0.80859375, "learning_rate": 0.0003974783815865288, "loss": 0.2026, "step": 216750 }, { "epoch": 8.98, "grad_norm": 0.72265625, "learning_rate": 0.00039746962431704924, "loss": 0.2207, "step": 216760 }, { "epoch": 8.98, "grad_norm": 0.984375, "learning_rate": 0.0003974608667700478, "loss": 0.2222, "step": 216770 }, { "epoch": 8.98, "grad_norm": 0.6953125, "learning_rate": 0.0003974521089455409, "loss": 0.1833, "step": 216780 }, { "epoch": 8.98, "grad_norm": 0.6328125, "learning_rate": 0.00039744335084354506, "loss": 0.1671, "step": 216790 }, { "epoch": 8.98, "grad_norm": 0.8125, "learning_rate": 0.00039743459246407677, "loss": 0.2187, "step": 216800 }, { "epoch": 8.98, "grad_norm": 0.64453125, "learning_rate": 0.00039742583380715247, "loss": 0.2206, "step": 216810 }, { "epoch": 8.98, "grad_norm": 0.5703125, "learning_rate": 0.0003974170748727887, "loss": 0.2002, "step": 216820 }, { "epoch": 8.98, "grad_norm": 0.361328125, "learning_rate": 0.0003974083156610019, "loss": 0.2351, "step": 216830 }, { "epoch": 8.98, "grad_norm": 0.404296875, "learning_rate": 0.0003973995561718087, "loss": 0.1944, "step": 216840 }, { "epoch": 8.98, "grad_norm": 1.1796875, "learning_rate": 0.00039739079640522526, "loss": 0.2113, "step": 216850 }, { "epoch": 8.98, "grad_norm": 1.53125, "learning_rate": 0.0003973820363612684, "loss": 0.1946, "step": 216860 }, { "epoch": 8.98, "grad_norm": 0.60546875, "learning_rate": 0.0003973732760399543, "loss": 0.181, "step": 216870 }, { "epoch": 8.98, "grad_norm": 0.6953125, "learning_rate": 0.0003973645154412997, "loss": 0.1724, "step": 216880 }, { "epoch": 8.98, "grad_norm": 0.890625, "learning_rate": 0.00039735575456532104, "loss": 0.2497, "step": 216890 }, { "epoch": 8.98, "grad_norm": 0.98828125, "learning_rate": 0.00039734699341203474, "loss": 0.2345, "step": 216900 }, { "epoch": 8.98, "grad_norm": 0.9609375, "learning_rate": 0.0003973382319814572, "loss": 0.1982, "step": 216910 }, { "epoch": 8.98, "grad_norm": 0.65234375, "learning_rate": 0.00039732947027360514, "loss": 0.1529, "step": 216920 }, { "epoch": 8.99, "grad_norm": 0.38671875, "learning_rate": 0.00039732070828849486, "loss": 0.2014, "step": 216930 }, { "epoch": 8.99, "grad_norm": 1.2421875, "learning_rate": 0.000397311946026143, "loss": 0.2228, "step": 216940 }, { "epoch": 8.99, "grad_norm": 0.76171875, "learning_rate": 0.0003973031834865659, "loss": 0.2154, "step": 216950 }, { "epoch": 8.99, "grad_norm": 0.4765625, "learning_rate": 0.0003972944206697802, "loss": 0.2057, "step": 216960 }, { "epoch": 8.99, "grad_norm": 0.6953125, "learning_rate": 0.0003972856575758023, "loss": 0.1933, "step": 216970 }, { "epoch": 8.99, "grad_norm": 0.5703125, "learning_rate": 0.00039727689420464854, "loss": 0.2275, "step": 216980 }, { "epoch": 8.99, "grad_norm": 0.89453125, "learning_rate": 0.0003972681305563357, "loss": 0.2, "step": 216990 }, { "epoch": 8.99, "grad_norm": 0.310546875, "learning_rate": 0.0003972593666308801, "loss": 0.2272, "step": 217000 }, { "epoch": 8.99, "grad_norm": 1.015625, "learning_rate": 0.00039725060242829825, "loss": 0.1769, "step": 217010 }, { "epoch": 8.99, "grad_norm": 0.98046875, "learning_rate": 0.00039724183794860677, "loss": 0.2041, "step": 217020 }, { "epoch": 8.99, "grad_norm": 0.53125, "learning_rate": 0.00039723307319182194, "loss": 0.2171, "step": 217030 }, { "epoch": 8.99, "grad_norm": 0.625, "learning_rate": 0.0003972243081579604, "loss": 0.2154, "step": 217040 }, { "epoch": 8.99, "grad_norm": 0.73828125, "learning_rate": 0.00039721554284703867, "loss": 0.2591, "step": 217050 }, { "epoch": 8.99, "grad_norm": 0.94140625, "learning_rate": 0.0003972067772590732, "loss": 0.2102, "step": 217060 }, { "epoch": 8.99, "grad_norm": 0.875, "learning_rate": 0.00039719801139408037, "loss": 0.2362, "step": 217070 }, { "epoch": 8.99, "grad_norm": 2.359375, "learning_rate": 0.0003971892452520768, "loss": 0.2233, "step": 217080 }, { "epoch": 8.99, "grad_norm": 0.79296875, "learning_rate": 0.00039718047883307894, "loss": 0.2195, "step": 217090 }, { "epoch": 8.99, "grad_norm": 1.1171875, "learning_rate": 0.0003971717121371034, "loss": 0.213, "step": 217100 }, { "epoch": 8.99, "grad_norm": 0.52734375, "learning_rate": 0.0003971629451641665, "loss": 0.1836, "step": 217110 }, { "epoch": 8.99, "grad_norm": 0.921875, "learning_rate": 0.0003971541779142849, "loss": 0.1897, "step": 217120 }, { "epoch": 8.99, "grad_norm": 0.85546875, "learning_rate": 0.000397145410387475, "loss": 0.2332, "step": 217130 }, { "epoch": 8.99, "grad_norm": 0.41796875, "learning_rate": 0.0003971366425837534, "loss": 0.172, "step": 217140 }, { "epoch": 8.99, "grad_norm": 0.2353515625, "learning_rate": 0.00039712787450313646, "loss": 0.1631, "step": 217150 }, { "epoch": 8.99, "grad_norm": 0.8203125, "learning_rate": 0.00039711910614564076, "loss": 0.1616, "step": 217160 }, { "epoch": 9.0, "grad_norm": 0.5234375, "learning_rate": 0.0003971103375112828, "loss": 0.1746, "step": 217170 }, { "epoch": 9.0, "grad_norm": 0.85546875, "learning_rate": 0.000397101568600079, "loss": 0.2046, "step": 217180 }, { "epoch": 9.0, "grad_norm": 0.89453125, "learning_rate": 0.00039709279941204604, "loss": 0.166, "step": 217190 }, { "epoch": 9.0, "grad_norm": 0.9453125, "learning_rate": 0.00039708402994720023, "loss": 0.2039, "step": 217200 }, { "epoch": 9.0, "grad_norm": 0.8515625, "learning_rate": 0.00039707526020555815, "loss": 0.2011, "step": 217210 }, { "epoch": 9.0, "grad_norm": 0.46875, "learning_rate": 0.0003970664901871364, "loss": 0.19, "step": 217220 }, { "epoch": 9.0, "grad_norm": 0.25, "learning_rate": 0.00039705771989195137, "loss": 0.1996, "step": 217230 }, { "epoch": 9.0, "grad_norm": 1.640625, "learning_rate": 0.00039704894932001956, "loss": 0.1757, "step": 217240 }, { "epoch": 9.0, "grad_norm": 1.984375, "learning_rate": 0.0003970401784713575, "loss": 0.2151, "step": 217250 }, { "epoch": 9.0, "grad_norm": 0.80078125, "learning_rate": 0.00039703140734598176, "loss": 0.2195, "step": 217260 }, { "epoch": 9.0, "grad_norm": 0.921875, "learning_rate": 0.0003970226359439088, "loss": 0.2235, "step": 217270 }, { "epoch": 9.0, "grad_norm": 0.85546875, "learning_rate": 0.00039701386426515504, "loss": 0.179, "step": 217280 }, { "epoch": 9.0, "grad_norm": 0.8515625, "learning_rate": 0.00039700509230973703, "loss": 0.2005, "step": 217290 }, { "epoch": 9.0, "grad_norm": 0.4609375, "learning_rate": 0.0003969963200776714, "loss": 0.2044, "step": 217300 }, { "epoch": 9.0, "grad_norm": 0.416015625, "learning_rate": 0.0003969875475689746, "loss": 0.1624, "step": 217310 }, { "epoch": 9.0, "grad_norm": 0.7265625, "learning_rate": 0.00039697877478366293, "loss": 0.2243, "step": 217320 }, { "epoch": 9.0, "grad_norm": 1.6796875, "learning_rate": 0.0003969700017217533, "loss": 0.2086, "step": 217330 }, { "epoch": 9.0, "grad_norm": 0.26171875, "learning_rate": 0.0003969612283832619, "loss": 0.1875, "step": 217340 }, { "epoch": 9.0, "grad_norm": 0.81640625, "learning_rate": 0.0003969524547682053, "loss": 0.2134, "step": 217350 }, { "epoch": 9.0, "grad_norm": 0.0, "learning_rate": 0.00039694368087660013, "loss": 0.2171, "step": 217360 }, { "epoch": 9.0, "grad_norm": 0.275390625, "learning_rate": 0.00039693490670846275, "loss": 0.1811, "step": 217370 }, { "epoch": 9.0, "grad_norm": 1.1015625, "learning_rate": 0.0003969261322638098, "loss": 0.1959, "step": 217380 }, { "epoch": 9.0, "grad_norm": 0.68359375, "learning_rate": 0.00039691735754265775, "loss": 0.1879, "step": 217390 }, { "epoch": 9.0, "grad_norm": 0.765625, "learning_rate": 0.000396908582545023, "loss": 0.224, "step": 217400 }, { "epoch": 9.01, "grad_norm": 0.60546875, "learning_rate": 0.00039689980727092224, "loss": 0.1905, "step": 217410 }, { "epoch": 9.01, "grad_norm": 0.58984375, "learning_rate": 0.0003968910317203719, "loss": 0.1866, "step": 217420 }, { "epoch": 9.01, "grad_norm": 0.7265625, "learning_rate": 0.00039688225589338844, "loss": 0.122, "step": 217430 }, { "epoch": 9.01, "grad_norm": 1.1015625, "learning_rate": 0.00039687347978998856, "loss": 0.1927, "step": 217440 }, { "epoch": 9.01, "grad_norm": 0.640625, "learning_rate": 0.0003968647034101885, "loss": 0.1948, "step": 217450 }, { "epoch": 9.01, "grad_norm": 0.84765625, "learning_rate": 0.0003968559267540051, "loss": 0.2053, "step": 217460 }, { "epoch": 9.01, "grad_norm": 0.7578125, "learning_rate": 0.00039684714982145454, "loss": 0.1809, "step": 217470 }, { "epoch": 9.01, "grad_norm": 1.671875, "learning_rate": 0.00039683837261255355, "loss": 0.2216, "step": 217480 }, { "epoch": 9.01, "grad_norm": 0.703125, "learning_rate": 0.00039682959512731865, "loss": 0.1631, "step": 217490 }, { "epoch": 9.01, "grad_norm": 1.2890625, "learning_rate": 0.00039682081736576626, "loss": 0.2103, "step": 217500 }, { "epoch": 9.01, "grad_norm": 1.015625, "learning_rate": 0.00039681203932791296, "loss": 0.1628, "step": 217510 }, { "epoch": 9.01, "grad_norm": 0.89453125, "learning_rate": 0.0003968032610137753, "loss": 0.1731, "step": 217520 }, { "epoch": 9.01, "grad_norm": 0.8203125, "learning_rate": 0.0003967944824233697, "loss": 0.2201, "step": 217530 }, { "epoch": 9.01, "grad_norm": 0.62890625, "learning_rate": 0.0003967857035567127, "loss": 0.2197, "step": 217540 }, { "epoch": 9.01, "grad_norm": 1.3046875, "learning_rate": 0.000396776924413821, "loss": 0.1495, "step": 217550 }, { "epoch": 9.01, "grad_norm": 0.92578125, "learning_rate": 0.0003967681449947109, "loss": 0.2047, "step": 217560 }, { "epoch": 9.01, "grad_norm": 0.69921875, "learning_rate": 0.0003967593652993989, "loss": 0.1862, "step": 217570 }, { "epoch": 9.01, "grad_norm": 0.58984375, "learning_rate": 0.0003967505853279018, "loss": 0.2126, "step": 217580 }, { "epoch": 9.01, "grad_norm": 0.474609375, "learning_rate": 0.0003967418050802358, "loss": 0.1385, "step": 217590 }, { "epoch": 9.01, "grad_norm": 0.859375, "learning_rate": 0.0003967330245564177, "loss": 0.1739, "step": 217600 }, { "epoch": 9.01, "grad_norm": 0.8359375, "learning_rate": 0.0003967242437564638, "loss": 0.2125, "step": 217610 }, { "epoch": 9.01, "grad_norm": 0.859375, "learning_rate": 0.0003967154626803907, "loss": 0.2429, "step": 217620 }, { "epoch": 9.01, "grad_norm": 2.515625, "learning_rate": 0.000396706681328215, "loss": 0.1837, "step": 217630 }, { "epoch": 9.01, "grad_norm": 0.361328125, "learning_rate": 0.00039669789969995316, "loss": 0.1632, "step": 217640 }, { "epoch": 9.02, "grad_norm": 0.4921875, "learning_rate": 0.00039668911779562176, "loss": 0.2188, "step": 217650 }, { "epoch": 9.02, "grad_norm": 0.65625, "learning_rate": 0.0003966803356152372, "loss": 0.1722, "step": 217660 }, { "epoch": 9.02, "grad_norm": 0.99609375, "learning_rate": 0.00039667155315881614, "loss": 0.1743, "step": 217670 }, { "epoch": 9.02, "grad_norm": 0.94140625, "learning_rate": 0.000396662770426375, "loss": 0.2225, "step": 217680 }, { "epoch": 9.02, "grad_norm": 0.6875, "learning_rate": 0.0003966539874179304, "loss": 0.1921, "step": 217690 }, { "epoch": 9.02, "grad_norm": 1.7890625, "learning_rate": 0.00039664520413349884, "loss": 0.2052, "step": 217700 }, { "epoch": 9.02, "grad_norm": 0.5625, "learning_rate": 0.00039663642057309687, "loss": 0.1512, "step": 217710 }, { "epoch": 9.02, "grad_norm": 0.51953125, "learning_rate": 0.00039662763673674095, "loss": 0.1422, "step": 217720 }, { "epoch": 9.02, "grad_norm": 0.76953125, "learning_rate": 0.0003966188526244476, "loss": 0.239, "step": 217730 }, { "epoch": 9.02, "grad_norm": 0.99609375, "learning_rate": 0.0003966100682362335, "loss": 0.1567, "step": 217740 }, { "epoch": 9.02, "grad_norm": 1.03125, "learning_rate": 0.00039660128357211507, "loss": 0.1641, "step": 217750 }, { "epoch": 9.02, "grad_norm": 0.318359375, "learning_rate": 0.0003965924986321088, "loss": 0.2294, "step": 217760 }, { "epoch": 9.02, "grad_norm": 0.6796875, "learning_rate": 0.00039658371341623136, "loss": 0.1812, "step": 217770 }, { "epoch": 9.02, "grad_norm": 2.609375, "learning_rate": 0.00039657492792449914, "loss": 0.1691, "step": 217780 }, { "epoch": 9.02, "grad_norm": 0.7265625, "learning_rate": 0.0003965661421569287, "loss": 0.168, "step": 217790 }, { "epoch": 9.02, "grad_norm": 0.609375, "learning_rate": 0.00039655735611353674, "loss": 0.1698, "step": 217800 }, { "epoch": 9.02, "grad_norm": 0.0, "learning_rate": 0.0003965485697943395, "loss": 0.231, "step": 217810 }, { "epoch": 9.02, "grad_norm": 1.1640625, "learning_rate": 0.0003965397831993538, "loss": 0.1868, "step": 217820 }, { "epoch": 9.02, "grad_norm": 0.76171875, "learning_rate": 0.00039653099632859604, "loss": 0.2219, "step": 217830 }, { "epoch": 9.02, "grad_norm": 0.76953125, "learning_rate": 0.00039652220918208264, "loss": 0.1965, "step": 217840 }, { "epoch": 9.02, "grad_norm": 2.71875, "learning_rate": 0.00039651342175983043, "loss": 0.2002, "step": 217850 }, { "epoch": 9.02, "grad_norm": 1.0078125, "learning_rate": 0.00039650463406185564, "loss": 0.1705, "step": 217860 }, { "epoch": 9.02, "grad_norm": 1.0234375, "learning_rate": 0.000396495846088175, "loss": 0.1491, "step": 217870 }, { "epoch": 9.02, "grad_norm": 0.7734375, "learning_rate": 0.00039648705783880514, "loss": 0.219, "step": 217880 }, { "epoch": 9.02, "grad_norm": 0.51953125, "learning_rate": 0.00039647826931376223, "loss": 0.1787, "step": 217890 }, { "epoch": 9.03, "grad_norm": 0.7109375, "learning_rate": 0.0003964694805130632, "loss": 0.2108, "step": 217900 }, { "epoch": 9.03, "grad_norm": 0.6171875, "learning_rate": 0.00039646069143672435, "loss": 0.1063, "step": 217910 }, { "epoch": 9.03, "grad_norm": 0.59375, "learning_rate": 0.00039645190208476233, "loss": 0.1444, "step": 217920 }, { "epoch": 9.03, "grad_norm": 0.31640625, "learning_rate": 0.00039644311245719363, "loss": 0.1659, "step": 217930 }, { "epoch": 9.03, "grad_norm": 0.494140625, "learning_rate": 0.0003964343225540348, "loss": 0.1364, "step": 217940 }, { "epoch": 9.03, "grad_norm": 0.376953125, "learning_rate": 0.00039642553237530237, "loss": 0.1973, "step": 217950 }, { "epoch": 9.03, "grad_norm": 0.76171875, "learning_rate": 0.000396416741921013, "loss": 0.1978, "step": 217960 }, { "epoch": 9.03, "grad_norm": 0.703125, "learning_rate": 0.000396407951191183, "loss": 0.1782, "step": 217970 }, { "epoch": 9.03, "grad_norm": 0.63671875, "learning_rate": 0.0003963991601858291, "loss": 0.2286, "step": 217980 }, { "epoch": 9.03, "grad_norm": 1.578125, "learning_rate": 0.0003963903689049678, "loss": 0.2382, "step": 217990 }, { "epoch": 9.03, "grad_norm": 0.9921875, "learning_rate": 0.00039638157734861565, "loss": 0.1794, "step": 218000 }, { "epoch": 9.03, "grad_norm": 0.81640625, "learning_rate": 0.0003963727855167891, "loss": 0.1642, "step": 218010 }, { "epoch": 9.03, "grad_norm": 0.78125, "learning_rate": 0.0003963639934095049, "loss": 0.2376, "step": 218020 }, { "epoch": 9.03, "grad_norm": 1.0546875, "learning_rate": 0.00039635520102677935, "loss": 0.2027, "step": 218030 }, { "epoch": 9.03, "grad_norm": 0.49609375, "learning_rate": 0.0003963464083686292, "loss": 0.1511, "step": 218040 }, { "epoch": 9.03, "grad_norm": 0.494140625, "learning_rate": 0.0003963376154350709, "loss": 0.1864, "step": 218050 }, { "epoch": 9.03, "grad_norm": 0.953125, "learning_rate": 0.000396328822226121, "loss": 0.2096, "step": 218060 }, { "epoch": 9.03, "grad_norm": 0.89453125, "learning_rate": 0.0003963200287417961, "loss": 0.1626, "step": 218070 }, { "epoch": 9.03, "grad_norm": 2.34375, "learning_rate": 0.0003963112349821126, "loss": 0.2189, "step": 218080 }, { "epoch": 9.03, "grad_norm": 0.470703125, "learning_rate": 0.00039630244094708724, "loss": 0.2036, "step": 218090 }, { "epoch": 9.03, "grad_norm": 0.51171875, "learning_rate": 0.0003962936466367364, "loss": 0.1768, "step": 218100 }, { "epoch": 9.03, "grad_norm": 0.765625, "learning_rate": 0.0003962848520510768, "loss": 0.1848, "step": 218110 }, { "epoch": 9.03, "grad_norm": 0.51171875, "learning_rate": 0.00039627605719012496, "loss": 0.1621, "step": 218120 }, { "epoch": 9.03, "grad_norm": 0.57421875, "learning_rate": 0.0003962672620538972, "loss": 0.2032, "step": 218130 }, { "epoch": 9.04, "grad_norm": 0.45703125, "learning_rate": 0.0003962584666424104, "loss": 0.2105, "step": 218140 }, { "epoch": 9.04, "grad_norm": 0.8203125, "learning_rate": 0.00039624967095568093, "loss": 0.2046, "step": 218150 }, { "epoch": 9.04, "grad_norm": 0.8359375, "learning_rate": 0.0003962408749937253, "loss": 0.1917, "step": 218160 }, { "epoch": 9.04, "grad_norm": 1.25, "learning_rate": 0.0003962320787565602, "loss": 0.2061, "step": 218170 }, { "epoch": 9.04, "grad_norm": 0.8203125, "learning_rate": 0.0003962232822442021, "loss": 0.2448, "step": 218180 }, { "epoch": 9.04, "grad_norm": 1.4453125, "learning_rate": 0.0003962144854566676, "loss": 0.1589, "step": 218190 }, { "epoch": 9.04, "grad_norm": 1.5859375, "learning_rate": 0.0003962056883939732, "loss": 0.2046, "step": 218200 }, { "epoch": 9.04, "grad_norm": 0.8671875, "learning_rate": 0.0003961968910561354, "loss": 0.2205, "step": 218210 }, { "epoch": 9.04, "grad_norm": 0.82421875, "learning_rate": 0.000396188093443171, "loss": 0.2202, "step": 218220 }, { "epoch": 9.04, "grad_norm": 0.27734375, "learning_rate": 0.0003961792955550964, "loss": 0.1781, "step": 218230 }, { "epoch": 9.04, "grad_norm": 0.4140625, "learning_rate": 0.000396170497391928, "loss": 0.1746, "step": 218240 }, { "epoch": 9.04, "grad_norm": 1.34375, "learning_rate": 0.0003961616989536826, "loss": 0.166, "step": 218250 }, { "epoch": 9.04, "grad_norm": 0.5078125, "learning_rate": 0.00039615290024037664, "loss": 0.2289, "step": 218260 }, { "epoch": 9.04, "grad_norm": 0.77734375, "learning_rate": 0.0003961441012520267, "loss": 0.188, "step": 218270 }, { "epoch": 9.04, "grad_norm": 0.9375, "learning_rate": 0.00039613530198864946, "loss": 0.1932, "step": 218280 }, { "epoch": 9.04, "grad_norm": 1.1484375, "learning_rate": 0.00039612650245026115, "loss": 0.1805, "step": 218290 }, { "epoch": 9.04, "grad_norm": 0.765625, "learning_rate": 0.0003961177026368787, "loss": 0.1843, "step": 218300 }, { "epoch": 9.04, "grad_norm": 0.49609375, "learning_rate": 0.0003961089025485185, "loss": 0.1956, "step": 218310 }, { "epoch": 9.04, "grad_norm": 0.5, "learning_rate": 0.0003961001021851971, "loss": 0.1927, "step": 218320 }, { "epoch": 9.04, "grad_norm": 0.54296875, "learning_rate": 0.0003960913015469311, "loss": 0.2218, "step": 218330 }, { "epoch": 9.04, "grad_norm": 1.0703125, "learning_rate": 0.00039608250063373696, "loss": 0.1816, "step": 218340 }, { "epoch": 9.04, "grad_norm": 0.34765625, "learning_rate": 0.00039607369944563145, "loss": 0.1897, "step": 218350 }, { "epoch": 9.04, "grad_norm": 0.7265625, "learning_rate": 0.00039606489798263097, "loss": 0.2, "step": 218360 }, { "epoch": 9.04, "grad_norm": 0.7421875, "learning_rate": 0.000396056096244752, "loss": 0.2316, "step": 218370 }, { "epoch": 9.05, "grad_norm": 0.6796875, "learning_rate": 0.00039604729423201143, "loss": 0.2121, "step": 218380 }, { "epoch": 9.05, "grad_norm": 0.65625, "learning_rate": 0.00039603849194442555, "loss": 0.1533, "step": 218390 }, { "epoch": 9.05, "grad_norm": 1.53125, "learning_rate": 0.000396029689382011, "loss": 0.2159, "step": 218400 }, { "epoch": 9.05, "grad_norm": 0.828125, "learning_rate": 0.0003960208865447843, "loss": 0.1948, "step": 218410 }, { "epoch": 9.05, "grad_norm": 0.734375, "learning_rate": 0.00039601208343276206, "loss": 0.2178, "step": 218420 }, { "epoch": 9.05, "grad_norm": 0.8671875, "learning_rate": 0.00039600328004596095, "loss": 0.194, "step": 218430 }, { "epoch": 9.05, "grad_norm": 1.4609375, "learning_rate": 0.0003959944763843973, "loss": 0.2249, "step": 218440 }, { "epoch": 9.05, "grad_norm": 0.98046875, "learning_rate": 0.0003959856724480879, "loss": 0.1635, "step": 218450 }, { "epoch": 9.05, "grad_norm": 0.40625, "learning_rate": 0.0003959768682370492, "loss": 0.1817, "step": 218460 }, { "epoch": 9.05, "grad_norm": 0.59765625, "learning_rate": 0.00039596806375129777, "loss": 0.2409, "step": 218470 }, { "epoch": 9.05, "grad_norm": 1.2890625, "learning_rate": 0.0003959592589908503, "loss": 0.1799, "step": 218480 }, { "epoch": 9.05, "grad_norm": 0.59765625, "learning_rate": 0.00039595045395572317, "loss": 0.2098, "step": 218490 }, { "epoch": 9.05, "grad_norm": 0.61328125, "learning_rate": 0.00039594164864593305, "loss": 0.1954, "step": 218500 }, { "epoch": 9.05, "grad_norm": 0.150390625, "learning_rate": 0.0003959328430614966, "loss": 0.168, "step": 218510 }, { "epoch": 9.05, "grad_norm": 0.4375, "learning_rate": 0.0003959240372024302, "loss": 0.2196, "step": 218520 }, { "epoch": 9.05, "grad_norm": 0.67578125, "learning_rate": 0.00039591523106875055, "loss": 0.2119, "step": 218530 }, { "epoch": 9.05, "grad_norm": 0.75390625, "learning_rate": 0.0003959064246604742, "loss": 0.2237, "step": 218540 }, { "epoch": 9.05, "grad_norm": 0.458984375, "learning_rate": 0.0003958976179776177, "loss": 0.193, "step": 218550 }, { "epoch": 9.05, "grad_norm": 0.56640625, "learning_rate": 0.00039588881102019767, "loss": 0.2231, "step": 218560 }, { "epoch": 9.05, "grad_norm": 0.9921875, "learning_rate": 0.0003958800037882306, "loss": 0.1917, "step": 218570 }, { "epoch": 9.05, "grad_norm": 1.765625, "learning_rate": 0.00039587119628173317, "loss": 0.2063, "step": 218580 }, { "epoch": 9.05, "grad_norm": 1.1171875, "learning_rate": 0.00039586238850072196, "loss": 0.1981, "step": 218590 }, { "epoch": 9.05, "grad_norm": 0.44140625, "learning_rate": 0.00039585358044521337, "loss": 0.2499, "step": 218600 }, { "epoch": 9.05, "grad_norm": 0.498046875, "learning_rate": 0.00039584477211522415, "loss": 0.1968, "step": 218610 }, { "epoch": 9.06, "grad_norm": 0.61328125, "learning_rate": 0.00039583596351077077, "loss": 0.2045, "step": 218620 }, { "epoch": 9.06, "grad_norm": 0.84765625, "learning_rate": 0.00039582715463186993, "loss": 0.2096, "step": 218630 }, { "epoch": 9.06, "grad_norm": 1.1953125, "learning_rate": 0.0003958183454785381, "loss": 0.194, "step": 218640 }, { "epoch": 9.06, "grad_norm": 0.4609375, "learning_rate": 0.00039580953605079187, "loss": 0.1467, "step": 218650 }, { "epoch": 9.06, "grad_norm": 0.8828125, "learning_rate": 0.0003958007263486478, "loss": 0.1515, "step": 218660 }, { "epoch": 9.06, "grad_norm": 0.89453125, "learning_rate": 0.00039579191637212265, "loss": 0.1594, "step": 218670 }, { "epoch": 9.06, "grad_norm": 0.83203125, "learning_rate": 0.0003957831061212327, "loss": 0.1948, "step": 218680 }, { "epoch": 9.06, "grad_norm": 1.0390625, "learning_rate": 0.0003957742955959948, "loss": 0.256, "step": 218690 }, { "epoch": 9.06, "grad_norm": 0.96875, "learning_rate": 0.0003957654847964254, "loss": 0.2194, "step": 218700 }, { "epoch": 9.06, "grad_norm": 0.625, "learning_rate": 0.000395756673722541, "loss": 0.2294, "step": 218710 }, { "epoch": 9.06, "grad_norm": 0.439453125, "learning_rate": 0.0003957478623743584, "loss": 0.2117, "step": 218720 }, { "epoch": 9.06, "grad_norm": 1.2421875, "learning_rate": 0.000395739050751894, "loss": 0.2125, "step": 218730 }, { "epoch": 9.06, "grad_norm": 1.078125, "learning_rate": 0.00039573023885516444, "loss": 0.2012, "step": 218740 }, { "epoch": 9.06, "grad_norm": 0.7109375, "learning_rate": 0.00039572142668418633, "loss": 0.1742, "step": 218750 }, { "epoch": 9.06, "grad_norm": 0.4921875, "learning_rate": 0.0003957126142389762, "loss": 0.2241, "step": 218760 }, { "epoch": 9.06, "grad_norm": 0.197265625, "learning_rate": 0.0003957038015195508, "loss": 0.2028, "step": 218770 }, { "epoch": 9.06, "grad_norm": 1.5078125, "learning_rate": 0.00039569498852592646, "loss": 0.1959, "step": 218780 }, { "epoch": 9.06, "grad_norm": 1.1640625, "learning_rate": 0.00039568617525811983, "loss": 0.1828, "step": 218790 }, { "epoch": 9.06, "grad_norm": 1.0859375, "learning_rate": 0.00039567736171614763, "loss": 0.1942, "step": 218800 }, { "epoch": 9.06, "grad_norm": 1.0234375, "learning_rate": 0.00039566854790002635, "loss": 0.2085, "step": 218810 }, { "epoch": 9.06, "grad_norm": 0.71875, "learning_rate": 0.0003956597338097726, "loss": 0.1601, "step": 218820 }, { "epoch": 9.06, "grad_norm": 0.85546875, "learning_rate": 0.000395650919445403, "loss": 0.1702, "step": 218830 }, { "epoch": 9.06, "grad_norm": 1.4453125, "learning_rate": 0.000395642104806934, "loss": 0.1135, "step": 218840 }, { "epoch": 9.06, "grad_norm": 0.6796875, "learning_rate": 0.0003956332898943824, "loss": 0.2046, "step": 218850 }, { "epoch": 9.07, "grad_norm": 0.55859375, "learning_rate": 0.00039562447470776465, "loss": 0.1386, "step": 218860 }, { "epoch": 9.07, "grad_norm": 0.87109375, "learning_rate": 0.00039561565924709733, "loss": 0.2027, "step": 218870 }, { "epoch": 9.07, "grad_norm": 1.7734375, "learning_rate": 0.0003956068435123971, "loss": 0.151, "step": 218880 }, { "epoch": 9.07, "grad_norm": 0.66796875, "learning_rate": 0.0003955980275036804, "loss": 0.1848, "step": 218890 }, { "epoch": 9.07, "grad_norm": 0.62109375, "learning_rate": 0.000395589211220964, "loss": 0.2227, "step": 218900 }, { "epoch": 9.07, "grad_norm": 0.765625, "learning_rate": 0.0003955803946642645, "loss": 0.2416, "step": 218910 }, { "epoch": 9.07, "grad_norm": 1.5625, "learning_rate": 0.00039557157783359835, "loss": 0.1756, "step": 218920 }, { "epoch": 9.07, "grad_norm": 0.55859375, "learning_rate": 0.0003955627607289822, "loss": 0.1873, "step": 218930 }, { "epoch": 9.07, "grad_norm": 2.859375, "learning_rate": 0.0003955539433504327, "loss": 0.2405, "step": 218940 }, { "epoch": 9.07, "grad_norm": 0.498046875, "learning_rate": 0.00039554512569796643, "loss": 0.2201, "step": 218950 }, { "epoch": 9.07, "grad_norm": 0.5859375, "learning_rate": 0.00039553630777159986, "loss": 0.2164, "step": 218960 }, { "epoch": 9.07, "grad_norm": 0.8046875, "learning_rate": 0.0003955274895713497, "loss": 0.2411, "step": 218970 }, { "epoch": 9.07, "grad_norm": 1.6953125, "learning_rate": 0.0003955186710972326, "loss": 0.2464, "step": 218980 }, { "epoch": 9.07, "grad_norm": 0.71875, "learning_rate": 0.00039550985234926495, "loss": 0.2154, "step": 218990 }, { "epoch": 9.07, "grad_norm": 0.65234375, "learning_rate": 0.00039550103332746354, "loss": 0.1816, "step": 219000 }, { "epoch": 9.07, "grad_norm": 0.703125, "learning_rate": 0.00039549221403184493, "loss": 0.1555, "step": 219010 }, { "epoch": 9.07, "grad_norm": 0.50390625, "learning_rate": 0.00039548339446242564, "loss": 0.2191, "step": 219020 }, { "epoch": 9.07, "grad_norm": 2.109375, "learning_rate": 0.0003954745746192223, "loss": 0.2228, "step": 219030 }, { "epoch": 9.07, "grad_norm": 0.416015625, "learning_rate": 0.0003954657545022516, "loss": 0.2493, "step": 219040 }, { "epoch": 9.07, "grad_norm": 0.5390625, "learning_rate": 0.00039545693411152996, "loss": 0.2252, "step": 219050 }, { "epoch": 9.07, "grad_norm": 0.640625, "learning_rate": 0.0003954481134470741, "loss": 0.1516, "step": 219060 }, { "epoch": 9.07, "grad_norm": 1.1484375, "learning_rate": 0.00039543929250890065, "loss": 0.1934, "step": 219070 }, { "epoch": 9.07, "grad_norm": 0.99609375, "learning_rate": 0.0003954304712970261, "loss": 0.1925, "step": 219080 }, { "epoch": 9.07, "grad_norm": 0.94140625, "learning_rate": 0.00039542164981146713, "loss": 0.2058, "step": 219090 }, { "epoch": 9.08, "grad_norm": 0.64453125, "learning_rate": 0.0003954128280522403, "loss": 0.1909, "step": 219100 }, { "epoch": 9.08, "grad_norm": 1.6328125, "learning_rate": 0.0003954040060193623, "loss": 0.1937, "step": 219110 }, { "epoch": 9.08, "grad_norm": 0.921875, "learning_rate": 0.0003953951837128496, "loss": 0.2166, "step": 219120 }, { "epoch": 9.08, "grad_norm": 1.453125, "learning_rate": 0.00039538636113271885, "loss": 0.2365, "step": 219130 }, { "epoch": 9.08, "grad_norm": 0.96484375, "learning_rate": 0.0003953775382789867, "loss": 0.1777, "step": 219140 }, { "epoch": 9.08, "grad_norm": 0.64453125, "learning_rate": 0.0003953687151516697, "loss": 0.1974, "step": 219150 }, { "epoch": 9.08, "grad_norm": 0.734375, "learning_rate": 0.0003953598917507845, "loss": 0.1854, "step": 219160 }, { "epoch": 9.08, "grad_norm": 0.52734375, "learning_rate": 0.0003953510680763477, "loss": 0.2193, "step": 219170 }, { "epoch": 9.08, "grad_norm": 0.298828125, "learning_rate": 0.00039534224412837585, "loss": 0.2032, "step": 219180 }, { "epoch": 9.08, "grad_norm": 1.9453125, "learning_rate": 0.0003953334199068857, "loss": 0.1647, "step": 219190 }, { "epoch": 9.08, "grad_norm": 1.375, "learning_rate": 0.0003953245954118936, "loss": 0.2278, "step": 219200 }, { "epoch": 9.08, "grad_norm": 1.1015625, "learning_rate": 0.0003953157706434163, "loss": 0.2464, "step": 219210 }, { "epoch": 9.08, "grad_norm": 0.271484375, "learning_rate": 0.0003953069456014705, "loss": 0.1655, "step": 219220 }, { "epoch": 9.08, "grad_norm": 0.326171875, "learning_rate": 0.0003952981202860727, "loss": 0.1901, "step": 219230 }, { "epoch": 9.08, "grad_norm": 0.53515625, "learning_rate": 0.0003952892946972395, "loss": 0.1781, "step": 219240 }, { "epoch": 9.08, "grad_norm": 0.921875, "learning_rate": 0.00039528046883498757, "loss": 0.1568, "step": 219250 }, { "epoch": 9.08, "grad_norm": 1.0546875, "learning_rate": 0.00039527164269933347, "loss": 0.1915, "step": 219260 }, { "epoch": 9.08, "grad_norm": 1.5703125, "learning_rate": 0.00039526281629029383, "loss": 0.2119, "step": 219270 }, { "epoch": 9.08, "grad_norm": 0.55859375, "learning_rate": 0.00039525398960788525, "loss": 0.1788, "step": 219280 }, { "epoch": 9.08, "grad_norm": 0.72265625, "learning_rate": 0.0003952451626521244, "loss": 0.2788, "step": 219290 }, { "epoch": 9.08, "grad_norm": 0.318359375, "learning_rate": 0.0003952363354230277, "loss": 0.1607, "step": 219300 }, { "epoch": 9.08, "grad_norm": 1.4609375, "learning_rate": 0.00039522750792061204, "loss": 0.1784, "step": 219310 }, { "epoch": 9.08, "grad_norm": 0.57421875, "learning_rate": 0.0003952186801448939, "loss": 0.2291, "step": 219320 }, { "epoch": 9.08, "grad_norm": 0.84375, "learning_rate": 0.00039520985209588977, "loss": 0.1893, "step": 219330 }, { "epoch": 9.09, "grad_norm": 0.1943359375, "learning_rate": 0.00039520102377361646, "loss": 0.1752, "step": 219340 }, { "epoch": 9.09, "grad_norm": 1.0078125, "learning_rate": 0.00039519219517809053, "loss": 0.2352, "step": 219350 }, { "epoch": 9.09, "grad_norm": 0.796875, "learning_rate": 0.0003951833663093285, "loss": 0.2246, "step": 219360 }, { "epoch": 9.09, "grad_norm": 1.3359375, "learning_rate": 0.0003951745371673471, "loss": 0.2551, "step": 219370 }, { "epoch": 9.09, "grad_norm": 0.640625, "learning_rate": 0.00039516570775216287, "loss": 0.1612, "step": 219380 }, { "epoch": 9.09, "grad_norm": 0.84765625, "learning_rate": 0.00039515687806379243, "loss": 0.2214, "step": 219390 }, { "epoch": 9.09, "grad_norm": 0.93359375, "learning_rate": 0.0003951480481022525, "loss": 0.1643, "step": 219400 }, { "epoch": 9.09, "grad_norm": 0.5703125, "learning_rate": 0.0003951392178675596, "loss": 0.2185, "step": 219410 }, { "epoch": 9.09, "grad_norm": 0.6875, "learning_rate": 0.0003951303873597303, "loss": 0.1387, "step": 219420 }, { "epoch": 9.09, "grad_norm": 0.89453125, "learning_rate": 0.00039512155657878134, "loss": 0.1792, "step": 219430 }, { "epoch": 9.09, "grad_norm": 0.419921875, "learning_rate": 0.0003951127255247292, "loss": 0.1814, "step": 219440 }, { "epoch": 9.09, "grad_norm": 0.93359375, "learning_rate": 0.00039510389419759065, "loss": 0.1947, "step": 219450 }, { "epoch": 9.09, "grad_norm": 0.625, "learning_rate": 0.0003950950625973823, "loss": 0.1952, "step": 219460 }, { "epoch": 9.09, "grad_norm": 0.24609375, "learning_rate": 0.0003950862307241206, "loss": 0.1516, "step": 219470 }, { "epoch": 9.09, "grad_norm": 0.796875, "learning_rate": 0.0003950773985778223, "loss": 0.229, "step": 219480 }, { "epoch": 9.09, "grad_norm": 1.21875, "learning_rate": 0.0003950685661585041, "loss": 0.2064, "step": 219490 }, { "epoch": 9.09, "grad_norm": 0.71484375, "learning_rate": 0.0003950597334661824, "loss": 0.187, "step": 219500 }, { "epoch": 9.09, "grad_norm": 1.6484375, "learning_rate": 0.000395050900500874, "loss": 0.2147, "step": 219510 }, { "epoch": 9.09, "grad_norm": 0.87890625, "learning_rate": 0.0003950420672625955, "loss": 0.2309, "step": 219520 }, { "epoch": 9.09, "grad_norm": 0.59375, "learning_rate": 0.00039503323375136346, "loss": 0.1468, "step": 219530 }, { "epoch": 9.09, "grad_norm": 0.97265625, "learning_rate": 0.0003950243999671945, "loss": 0.2437, "step": 219540 }, { "epoch": 9.09, "grad_norm": 0.90625, "learning_rate": 0.00039501556591010537, "loss": 0.172, "step": 219550 }, { "epoch": 9.09, "grad_norm": 0.87109375, "learning_rate": 0.00039500673158011256, "loss": 0.1765, "step": 219560 }, { "epoch": 9.09, "grad_norm": 0.89453125, "learning_rate": 0.00039499789697723263, "loss": 0.1609, "step": 219570 }, { "epoch": 9.09, "grad_norm": 0.91796875, "learning_rate": 0.00039498906210148253, "loss": 0.2222, "step": 219580 }, { "epoch": 9.1, "grad_norm": 0.470703125, "learning_rate": 0.00039498022695287854, "loss": 0.1693, "step": 219590 }, { "epoch": 9.1, "grad_norm": 1.3046875, "learning_rate": 0.00039497139153143746, "loss": 0.1448, "step": 219600 }, { "epoch": 9.1, "grad_norm": 1.171875, "learning_rate": 0.0003949625558371759, "loss": 0.2161, "step": 219610 }, { "epoch": 9.1, "grad_norm": 0.0, "learning_rate": 0.0003949537198701104, "loss": 0.1958, "step": 219620 }, { "epoch": 9.1, "grad_norm": 0.96484375, "learning_rate": 0.00039494488363025766, "loss": 0.1417, "step": 219630 }, { "epoch": 9.1, "grad_norm": 0.7109375, "learning_rate": 0.0003949360471176343, "loss": 0.2206, "step": 219640 }, { "epoch": 9.1, "grad_norm": 0.85546875, "learning_rate": 0.000394927210332257, "loss": 0.1999, "step": 219650 }, { "epoch": 9.1, "grad_norm": 0.859375, "learning_rate": 0.0003949183732741423, "loss": 0.1907, "step": 219660 }, { "epoch": 9.1, "grad_norm": 0.470703125, "learning_rate": 0.0003949095359433069, "loss": 0.2141, "step": 219670 }, { "epoch": 9.1, "grad_norm": 0.8828125, "learning_rate": 0.00039490069833976736, "loss": 0.2265, "step": 219680 }, { "epoch": 9.1, "grad_norm": 0.39453125, "learning_rate": 0.0003948918604635404, "loss": 0.1605, "step": 219690 }, { "epoch": 9.1, "grad_norm": 0.7109375, "learning_rate": 0.00039488302231464255, "loss": 0.1907, "step": 219700 }, { "epoch": 9.1, "grad_norm": 1.203125, "learning_rate": 0.0003948741838930906, "loss": 0.2027, "step": 219710 }, { "epoch": 9.1, "grad_norm": 0.65625, "learning_rate": 0.000394865345198901, "loss": 0.1733, "step": 219720 }, { "epoch": 9.1, "grad_norm": 1.546875, "learning_rate": 0.00039485650623209044, "loss": 0.1982, "step": 219730 }, { "epoch": 9.1, "grad_norm": 0.306640625, "learning_rate": 0.00039484766699267565, "loss": 0.2104, "step": 219740 }, { "epoch": 9.1, "grad_norm": 0.9140625, "learning_rate": 0.0003948388274806731, "loss": 0.178, "step": 219750 }, { "epoch": 9.1, "grad_norm": 0.52734375, "learning_rate": 0.00039482998769609963, "loss": 0.1803, "step": 219760 }, { "epoch": 9.1, "grad_norm": 0.71875, "learning_rate": 0.0003948211476389717, "loss": 0.2019, "step": 219770 }, { "epoch": 9.1, "grad_norm": 0.390625, "learning_rate": 0.000394812307309306, "loss": 0.1283, "step": 219780 }, { "epoch": 9.1, "grad_norm": 0.330078125, "learning_rate": 0.0003948034667071192, "loss": 0.178, "step": 219790 }, { "epoch": 9.1, "grad_norm": 0.515625, "learning_rate": 0.000394794625832428, "loss": 0.1888, "step": 219800 }, { "epoch": 9.1, "grad_norm": 2.53125, "learning_rate": 0.0003947857846852488, "loss": 0.1864, "step": 219810 }, { "epoch": 9.1, "grad_norm": 0.57421875, "learning_rate": 0.00039477694326559845, "loss": 0.187, "step": 219820 }, { "epoch": 9.11, "grad_norm": 0.9609375, "learning_rate": 0.0003947681015734935, "loss": 0.2146, "step": 219830 }, { "epoch": 9.11, "grad_norm": 0.84765625, "learning_rate": 0.00039475925960895066, "loss": 0.1969, "step": 219840 }, { "epoch": 9.11, "grad_norm": 1.5234375, "learning_rate": 0.0003947504173719865, "loss": 0.1938, "step": 219850 }, { "epoch": 9.11, "grad_norm": 0.224609375, "learning_rate": 0.00039474157486261773, "loss": 0.2191, "step": 219860 }, { "epoch": 9.11, "grad_norm": 0.48046875, "learning_rate": 0.0003947327320808609, "loss": 0.1968, "step": 219870 }, { "epoch": 9.11, "grad_norm": 0.94140625, "learning_rate": 0.00039472388902673275, "loss": 0.2425, "step": 219880 }, { "epoch": 9.11, "grad_norm": 0.79296875, "learning_rate": 0.00039471504570024975, "loss": 0.1957, "step": 219890 }, { "epoch": 9.11, "grad_norm": 0.255859375, "learning_rate": 0.00039470620210142884, "loss": 0.2008, "step": 219900 }, { "epoch": 9.11, "grad_norm": 0.74609375, "learning_rate": 0.0003946973582302863, "loss": 0.173, "step": 219910 }, { "epoch": 9.11, "grad_norm": 0.734375, "learning_rate": 0.00039468851408683904, "loss": 0.1803, "step": 219920 }, { "epoch": 9.11, "grad_norm": 0.515625, "learning_rate": 0.00039467966967110367, "loss": 0.1602, "step": 219930 }, { "epoch": 9.11, "grad_norm": 0.89453125, "learning_rate": 0.0003946708249830967, "loss": 0.2097, "step": 219940 }, { "epoch": 9.11, "grad_norm": 1.2109375, "learning_rate": 0.00039466198002283495, "loss": 0.2217, "step": 219950 }, { "epoch": 9.11, "grad_norm": 0.63671875, "learning_rate": 0.0003946531347903349, "loss": 0.1595, "step": 219960 }, { "epoch": 9.11, "grad_norm": 0.8125, "learning_rate": 0.0003946442892856133, "loss": 0.2092, "step": 219970 }, { "epoch": 9.11, "grad_norm": 0.8359375, "learning_rate": 0.00039463544350868676, "loss": 0.271, "step": 219980 }, { "epoch": 9.11, "grad_norm": 0.64453125, "learning_rate": 0.00039462659745957197, "loss": 0.1924, "step": 219990 }, { "epoch": 9.11, "grad_norm": 0.82421875, "learning_rate": 0.00039461775113828546, "loss": 0.2267, "step": 220000 }, { "epoch": 9.11, "grad_norm": 0.263671875, "learning_rate": 0.00039460890454484404, "loss": 0.1908, "step": 220010 }, { "epoch": 9.11, "grad_norm": 0.8515625, "learning_rate": 0.00039460005767926425, "loss": 0.17, "step": 220020 }, { "epoch": 9.11, "grad_norm": 0.423828125, "learning_rate": 0.00039459121054156277, "loss": 0.1791, "step": 220030 }, { "epoch": 9.11, "grad_norm": 1.7734375, "learning_rate": 0.0003945823631317562, "loss": 0.1991, "step": 220040 }, { "epoch": 9.11, "grad_norm": 1.9453125, "learning_rate": 0.0003945735154498613, "loss": 0.2291, "step": 220050 }, { "epoch": 9.11, "grad_norm": 0.63671875, "learning_rate": 0.0003945646674958947, "loss": 0.1642, "step": 220060 }, { "epoch": 9.12, "grad_norm": 0.65234375, "learning_rate": 0.00039455581926987296, "loss": 0.2619, "step": 220070 }, { "epoch": 9.12, "grad_norm": 0.515625, "learning_rate": 0.0003945469707718127, "loss": 0.1911, "step": 220080 }, { "epoch": 9.12, "grad_norm": 0.39453125, "learning_rate": 0.00039453812200173076, "loss": 0.2224, "step": 220090 }, { "epoch": 9.12, "grad_norm": 0.8984375, "learning_rate": 0.00039452927295964357, "loss": 0.1674, "step": 220100 }, { "epoch": 9.12, "grad_norm": 0.78125, "learning_rate": 0.00039452042364556805, "loss": 0.1583, "step": 220110 }, { "epoch": 9.12, "grad_norm": 0.81640625, "learning_rate": 0.0003945115740595205, "loss": 0.1977, "step": 220120 }, { "epoch": 9.12, "grad_norm": 0.59375, "learning_rate": 0.00039450272420151795, "loss": 0.1669, "step": 220130 }, { "epoch": 9.12, "grad_norm": 0.337890625, "learning_rate": 0.00039449387407157683, "loss": 0.1772, "step": 220140 }, { "epoch": 9.12, "grad_norm": 0.427734375, "learning_rate": 0.00039448502366971384, "loss": 0.1801, "step": 220150 }, { "epoch": 9.12, "grad_norm": 0.671875, "learning_rate": 0.00039447617299594563, "loss": 0.2199, "step": 220160 }, { "epoch": 9.12, "grad_norm": 0.5703125, "learning_rate": 0.0003944673220502888, "loss": 0.184, "step": 220170 }, { "epoch": 9.12, "grad_norm": 0.78515625, "learning_rate": 0.0003944584708327602, "loss": 0.197, "step": 220180 }, { "epoch": 9.12, "grad_norm": 0.390625, "learning_rate": 0.0003944496193433763, "loss": 0.1505, "step": 220190 }, { "epoch": 9.12, "grad_norm": 1.0625, "learning_rate": 0.00039444076758215373, "loss": 0.2051, "step": 220200 }, { "epoch": 9.12, "grad_norm": 1.28125, "learning_rate": 0.00039443191554910934, "loss": 0.1615, "step": 220210 }, { "epoch": 9.12, "grad_norm": 0.6328125, "learning_rate": 0.0003944230632442597, "loss": 0.1937, "step": 220220 }, { "epoch": 9.12, "grad_norm": 0.86328125, "learning_rate": 0.00039441421066762136, "loss": 0.1592, "step": 220230 }, { "epoch": 9.12, "grad_norm": 0.77734375, "learning_rate": 0.0003944053578192111, "loss": 0.1805, "step": 220240 }, { "epoch": 9.12, "grad_norm": 0.62890625, "learning_rate": 0.00039439650469904556, "loss": 0.1589, "step": 220250 }, { "epoch": 9.12, "grad_norm": 1.4921875, "learning_rate": 0.00039438765130714136, "loss": 0.1876, "step": 220260 }, { "epoch": 9.12, "grad_norm": 0.9921875, "learning_rate": 0.0003943787976435153, "loss": 0.1779, "step": 220270 }, { "epoch": 9.12, "grad_norm": 2.828125, "learning_rate": 0.0003943699437081838, "loss": 0.2587, "step": 220280 }, { "epoch": 9.12, "grad_norm": 0.984375, "learning_rate": 0.0003943610895011638, "loss": 0.2382, "step": 220290 }, { "epoch": 9.12, "grad_norm": 0.48046875, "learning_rate": 0.0003943522350224717, "loss": 0.2016, "step": 220300 }, { "epoch": 9.13, "grad_norm": 0.61328125, "learning_rate": 0.00039434338027212435, "loss": 0.2391, "step": 220310 }, { "epoch": 9.13, "grad_norm": 0.796875, "learning_rate": 0.0003943345252501383, "loss": 0.196, "step": 220320 }, { "epoch": 9.13, "grad_norm": 0.84375, "learning_rate": 0.0003943256699565303, "loss": 0.1967, "step": 220330 }, { "epoch": 9.13, "grad_norm": 0.421875, "learning_rate": 0.00039431681439131694, "loss": 0.2247, "step": 220340 }, { "epoch": 9.13, "grad_norm": 0.86328125, "learning_rate": 0.00039430795855451494, "loss": 0.2008, "step": 220350 }, { "epoch": 9.13, "grad_norm": 0.98046875, "learning_rate": 0.00039429910244614085, "loss": 0.2081, "step": 220360 }, { "epoch": 9.13, "grad_norm": 0.359375, "learning_rate": 0.00039429024606621156, "loss": 0.164, "step": 220370 }, { "epoch": 9.13, "grad_norm": 0.84765625, "learning_rate": 0.0003942813894147436, "loss": 0.2113, "step": 220380 }, { "epoch": 9.13, "grad_norm": 0.625, "learning_rate": 0.0003942725324917536, "loss": 0.2358, "step": 220390 }, { "epoch": 9.13, "grad_norm": 0.5078125, "learning_rate": 0.00039426367529725833, "loss": 0.201, "step": 220400 }, { "epoch": 9.13, "grad_norm": 0.41015625, "learning_rate": 0.00039425481783127435, "loss": 0.2374, "step": 220410 }, { "epoch": 9.13, "grad_norm": 1.9453125, "learning_rate": 0.0003942459600938184, "loss": 0.1853, "step": 220420 }, { "epoch": 9.13, "grad_norm": 0.416015625, "learning_rate": 0.0003942371020849072, "loss": 0.2989, "step": 220430 }, { "epoch": 9.13, "grad_norm": 1.0546875, "learning_rate": 0.0003942282438045571, "loss": 0.1879, "step": 220440 }, { "epoch": 9.13, "grad_norm": 0.478515625, "learning_rate": 0.00039421938525278525, "loss": 0.2146, "step": 220450 }, { "epoch": 9.13, "grad_norm": 0.640625, "learning_rate": 0.000394210526429608, "loss": 0.2066, "step": 220460 }, { "epoch": 9.13, "grad_norm": 0.5078125, "learning_rate": 0.0003942016673350422, "loss": 0.2005, "step": 220470 }, { "epoch": 9.13, "grad_norm": 0.99609375, "learning_rate": 0.0003941928079691044, "loss": 0.1379, "step": 220480 }, { "epoch": 9.13, "grad_norm": 3.109375, "learning_rate": 0.00039418394833181124, "loss": 0.2101, "step": 220490 }, { "epoch": 9.13, "grad_norm": 0.373046875, "learning_rate": 0.00039417508842317956, "loss": 0.2042, "step": 220500 }, { "epoch": 9.13, "grad_norm": 1.296875, "learning_rate": 0.00039416622824322585, "loss": 0.2062, "step": 220510 }, { "epoch": 9.13, "grad_norm": 0.6015625, "learning_rate": 0.00039415736779196687, "loss": 0.2621, "step": 220520 }, { "epoch": 9.13, "grad_norm": 1.0625, "learning_rate": 0.0003941485070694194, "loss": 0.1887, "step": 220530 }, { "epoch": 9.13, "grad_norm": 1.453125, "learning_rate": 0.00039413964607559987, "loss": 0.1658, "step": 220540 }, { "epoch": 9.14, "grad_norm": 0.58984375, "learning_rate": 0.0003941307848105252, "loss": 0.1927, "step": 220550 }, { "epoch": 9.14, "grad_norm": 0.78515625, "learning_rate": 0.0003941219232742119, "loss": 0.2321, "step": 220560 }, { "epoch": 9.14, "grad_norm": 1.2109375, "learning_rate": 0.0003941130614666767, "loss": 0.1992, "step": 220570 }, { "epoch": 9.14, "grad_norm": 0.73046875, "learning_rate": 0.0003941041993879363, "loss": 0.1898, "step": 220580 }, { "epoch": 9.14, "grad_norm": 1.1796875, "learning_rate": 0.0003940953370380073, "loss": 0.2119, "step": 220590 }, { "epoch": 9.14, "grad_norm": 1.1328125, "learning_rate": 0.00039408647441690646, "loss": 0.1968, "step": 220600 }, { "epoch": 9.14, "grad_norm": 1.0703125, "learning_rate": 0.00039407761152465047, "loss": 0.1925, "step": 220610 }, { "epoch": 9.14, "grad_norm": 1.6484375, "learning_rate": 0.000394068748361256, "loss": 0.2369, "step": 220620 }, { "epoch": 9.14, "grad_norm": 0.9921875, "learning_rate": 0.00039405988492673973, "loss": 0.2713, "step": 220630 }, { "epoch": 9.14, "grad_norm": 0.46875, "learning_rate": 0.00039405102122111826, "loss": 0.202, "step": 220640 }, { "epoch": 9.14, "grad_norm": 0.271484375, "learning_rate": 0.0003940421572444083, "loss": 0.2049, "step": 220650 }, { "epoch": 9.14, "grad_norm": 0.62109375, "learning_rate": 0.0003940332929966266, "loss": 0.2787, "step": 220660 }, { "epoch": 9.14, "grad_norm": 0.26953125, "learning_rate": 0.0003940244284777897, "loss": 0.2151, "step": 220670 }, { "epoch": 9.14, "grad_norm": 0.388671875, "learning_rate": 0.0003940155636879145, "loss": 0.228, "step": 220680 }, { "epoch": 9.14, "grad_norm": 0.83984375, "learning_rate": 0.0003940066986270175, "loss": 0.1985, "step": 220690 }, { "epoch": 9.14, "grad_norm": 0.59765625, "learning_rate": 0.0003939978332951154, "loss": 0.2101, "step": 220700 }, { "epoch": 9.14, "grad_norm": 0.83203125, "learning_rate": 0.000393988967692225, "loss": 0.2187, "step": 220710 }, { "epoch": 9.14, "grad_norm": 0.3203125, "learning_rate": 0.00039398010181836287, "loss": 0.195, "step": 220720 }, { "epoch": 9.14, "grad_norm": 0.439453125, "learning_rate": 0.00039397123567354574, "loss": 0.1982, "step": 220730 }, { "epoch": 9.14, "grad_norm": 0.64453125, "learning_rate": 0.0003939623692577904, "loss": 0.1669, "step": 220740 }, { "epoch": 9.14, "grad_norm": 0.62109375, "learning_rate": 0.0003939535025711133, "loss": 0.192, "step": 220750 }, { "epoch": 9.14, "grad_norm": 0.69921875, "learning_rate": 0.00039394463561353134, "loss": 0.2103, "step": 220760 }, { "epoch": 9.14, "grad_norm": 1.234375, "learning_rate": 0.0003939357683850611, "loss": 0.2536, "step": 220770 }, { "epoch": 9.14, "grad_norm": 0.447265625, "learning_rate": 0.0003939269008857193, "loss": 0.1849, "step": 220780 }, { "epoch": 9.15, "grad_norm": 0.5703125, "learning_rate": 0.0003939180331155225, "loss": 0.1868, "step": 220790 }, { "epoch": 9.15, "grad_norm": 1.3671875, "learning_rate": 0.0003939091650744876, "loss": 0.2205, "step": 220800 }, { "epoch": 9.15, "grad_norm": 0.27734375, "learning_rate": 0.00039390029676263116, "loss": 0.2057, "step": 220810 }, { "epoch": 9.15, "grad_norm": 0.6328125, "learning_rate": 0.00039389142817996994, "loss": 0.1731, "step": 220820 }, { "epoch": 9.15, "grad_norm": 0.61328125, "learning_rate": 0.0003938825593265205, "loss": 0.168, "step": 220830 }, { "epoch": 9.15, "grad_norm": 0.90234375, "learning_rate": 0.00039387369020229976, "loss": 0.2057, "step": 220840 }, { "epoch": 9.15, "grad_norm": 0.5546875, "learning_rate": 0.00039386482080732424, "loss": 0.2347, "step": 220850 }, { "epoch": 9.15, "grad_norm": 1.1875, "learning_rate": 0.00039385595114161054, "loss": 0.2033, "step": 220860 }, { "epoch": 9.15, "grad_norm": 0.54296875, "learning_rate": 0.00039384708120517557, "loss": 0.2154, "step": 220870 }, { "epoch": 9.15, "grad_norm": 1.2265625, "learning_rate": 0.0003938382109980359, "loss": 0.2194, "step": 220880 }, { "epoch": 9.15, "grad_norm": 0.48828125, "learning_rate": 0.0003938293405202082, "loss": 0.2283, "step": 220890 }, { "epoch": 9.15, "grad_norm": 0.71875, "learning_rate": 0.0003938204697717094, "loss": 0.2068, "step": 220900 }, { "epoch": 9.15, "grad_norm": 0.294921875, "learning_rate": 0.0003938115987525558, "loss": 0.2066, "step": 220910 }, { "epoch": 9.15, "grad_norm": 0.4140625, "learning_rate": 0.00039380272746276446, "loss": 0.1863, "step": 220920 }, { "epoch": 9.15, "grad_norm": 1.5, "learning_rate": 0.00039379385590235184, "loss": 0.2226, "step": 220930 }, { "epoch": 9.15, "grad_norm": 0.5546875, "learning_rate": 0.00039378498407133467, "loss": 0.1675, "step": 220940 }, { "epoch": 9.15, "grad_norm": 1.390625, "learning_rate": 0.0003937761119697298, "loss": 0.2243, "step": 220950 }, { "epoch": 9.15, "grad_norm": 0.60546875, "learning_rate": 0.00039376723959755366, "loss": 0.2195, "step": 220960 }, { "epoch": 9.15, "grad_norm": 1.0234375, "learning_rate": 0.00039375836695482326, "loss": 0.2275, "step": 220970 }, { "epoch": 9.15, "grad_norm": 0.67578125, "learning_rate": 0.00039374949404155505, "loss": 0.163, "step": 220980 }, { "epoch": 9.15, "grad_norm": 0.703125, "learning_rate": 0.00039374062085776587, "loss": 0.2085, "step": 220990 }, { "epoch": 9.15, "grad_norm": 0.400390625, "learning_rate": 0.0003937317474034723, "loss": 0.2102, "step": 221000 }, { "epoch": 9.15, "grad_norm": 0.89453125, "learning_rate": 0.0003937228736786911, "loss": 0.1918, "step": 221010 }, { "epoch": 9.15, "grad_norm": 0.74609375, "learning_rate": 0.000393713999683439, "loss": 0.2273, "step": 221020 }, { "epoch": 9.16, "grad_norm": 0.494140625, "learning_rate": 0.00039370512541773273, "loss": 0.1604, "step": 221030 }, { "epoch": 9.16, "grad_norm": 0.5625, "learning_rate": 0.00039369625088158885, "loss": 0.1671, "step": 221040 }, { "epoch": 9.16, "grad_norm": 0.65625, "learning_rate": 0.00039368737607502414, "loss": 0.19, "step": 221050 }, { "epoch": 9.16, "grad_norm": 0.515625, "learning_rate": 0.00039367850099805533, "loss": 0.1702, "step": 221060 }, { "epoch": 9.16, "grad_norm": 0.69140625, "learning_rate": 0.00039366962565069914, "loss": 0.2346, "step": 221070 }, { "epoch": 9.16, "grad_norm": 0.921875, "learning_rate": 0.0003936607500329722, "loss": 0.1645, "step": 221080 }, { "epoch": 9.16, "grad_norm": 1.0234375, "learning_rate": 0.00039365187414489125, "loss": 0.1646, "step": 221090 }, { "epoch": 9.16, "grad_norm": 0.5703125, "learning_rate": 0.00039364299798647296, "loss": 0.2209, "step": 221100 }, { "epoch": 9.16, "grad_norm": 2.421875, "learning_rate": 0.00039363412155773406, "loss": 0.2072, "step": 221110 }, { "epoch": 9.16, "grad_norm": 1.3671875, "learning_rate": 0.0003936252448586912, "loss": 0.1852, "step": 221120 }, { "epoch": 9.16, "grad_norm": 0.53125, "learning_rate": 0.0003936163678893613, "loss": 0.2245, "step": 221130 }, { "epoch": 9.16, "grad_norm": 0.74609375, "learning_rate": 0.00039360749064976076, "loss": 0.1596, "step": 221140 }, { "epoch": 9.16, "grad_norm": 1.2734375, "learning_rate": 0.0003935986131399065, "loss": 0.2084, "step": 221150 }, { "epoch": 9.16, "grad_norm": 1.2890625, "learning_rate": 0.0003935897353598151, "loss": 0.2261, "step": 221160 }, { "epoch": 9.16, "grad_norm": 0.73046875, "learning_rate": 0.00039358085730950337, "loss": 0.2327, "step": 221170 }, { "epoch": 9.16, "grad_norm": 0.63671875, "learning_rate": 0.000393571978988988, "loss": 0.187, "step": 221180 }, { "epoch": 9.16, "grad_norm": 0.9453125, "learning_rate": 0.00039356310039828565, "loss": 0.1877, "step": 221190 }, { "epoch": 9.16, "grad_norm": 0.62890625, "learning_rate": 0.000393554221537413, "loss": 0.1775, "step": 221200 }, { "epoch": 9.16, "grad_norm": 1.203125, "learning_rate": 0.00039354534240638685, "loss": 0.216, "step": 221210 }, { "epoch": 9.16, "grad_norm": 0.91015625, "learning_rate": 0.00039353646300522384, "loss": 0.2233, "step": 221220 }, { "epoch": 9.16, "grad_norm": 1.6953125, "learning_rate": 0.00039352758333394074, "loss": 0.2206, "step": 221230 }, { "epoch": 9.16, "grad_norm": 0.5390625, "learning_rate": 0.00039351870339255425, "loss": 0.2265, "step": 221240 }, { "epoch": 9.16, "grad_norm": 1.2578125, "learning_rate": 0.000393509823181081, "loss": 0.2028, "step": 221250 }, { "epoch": 9.16, "grad_norm": 0.38671875, "learning_rate": 0.00039350094269953776, "loss": 0.2749, "step": 221260 }, { "epoch": 9.16, "grad_norm": 0.796875, "learning_rate": 0.00039349206194794125, "loss": 0.1958, "step": 221270 }, { "epoch": 9.17, "grad_norm": 0.546875, "learning_rate": 0.0003934831809263082, "loss": 0.177, "step": 221280 }, { "epoch": 9.17, "grad_norm": 0.375, "learning_rate": 0.0003934742996346553, "loss": 0.1901, "step": 221290 }, { "epoch": 9.17, "grad_norm": 0.71875, "learning_rate": 0.00039346541807299925, "loss": 0.1814, "step": 221300 }, { "epoch": 9.17, "grad_norm": 0.466796875, "learning_rate": 0.0003934565362413568, "loss": 0.195, "step": 221310 }, { "epoch": 9.17, "grad_norm": 0.00160980224609375, "learning_rate": 0.0003934476541397446, "loss": 0.1871, "step": 221320 }, { "epoch": 9.17, "grad_norm": 0.63671875, "learning_rate": 0.00039343877176817944, "loss": 0.1607, "step": 221330 }, { "epoch": 9.17, "grad_norm": 1.78125, "learning_rate": 0.000393429889126678, "loss": 0.1534, "step": 221340 }, { "epoch": 9.17, "grad_norm": 0.291015625, "learning_rate": 0.0003934210062152569, "loss": 0.1684, "step": 221350 }, { "epoch": 9.17, "grad_norm": 0.431640625, "learning_rate": 0.000393412123033933, "loss": 0.1863, "step": 221360 }, { "epoch": 9.17, "grad_norm": 0.921875, "learning_rate": 0.0003934032395827231, "loss": 0.2013, "step": 221370 }, { "epoch": 9.17, "grad_norm": 0.7109375, "learning_rate": 0.00039339435586164363, "loss": 0.1743, "step": 221380 }, { "epoch": 9.17, "grad_norm": 1.1953125, "learning_rate": 0.0003933854718707116, "loss": 0.1846, "step": 221390 }, { "epoch": 9.17, "grad_norm": 1.234375, "learning_rate": 0.00039337658760994353, "loss": 0.2266, "step": 221400 }, { "epoch": 9.17, "grad_norm": 1.6875, "learning_rate": 0.0003933677030793562, "loss": 0.2526, "step": 221410 }, { "epoch": 9.17, "grad_norm": 1.203125, "learning_rate": 0.00039335881827896635, "loss": 0.2567, "step": 221420 }, { "epoch": 9.17, "grad_norm": 0.88671875, "learning_rate": 0.00039334993320879066, "loss": 0.1871, "step": 221430 }, { "epoch": 9.17, "grad_norm": 0.53125, "learning_rate": 0.00039334104786884584, "loss": 0.2027, "step": 221440 }, { "epoch": 9.17, "grad_norm": 1.7109375, "learning_rate": 0.00039333216225914875, "loss": 0.1628, "step": 221450 }, { "epoch": 9.17, "grad_norm": 2.15625, "learning_rate": 0.00039332327637971594, "loss": 0.2019, "step": 221460 }, { "epoch": 9.17, "grad_norm": 0.67578125, "learning_rate": 0.00039331439023056425, "loss": 0.2255, "step": 221470 }, { "epoch": 9.17, "grad_norm": 0.353515625, "learning_rate": 0.0003933055038117103, "loss": 0.1938, "step": 221480 }, { "epoch": 9.17, "grad_norm": 0.98046875, "learning_rate": 0.0003932966171231709, "loss": 0.1835, "step": 221490 }, { "epoch": 9.17, "grad_norm": 0.419921875, "learning_rate": 0.00039328773016496276, "loss": 0.1817, "step": 221500 }, { "epoch": 9.17, "grad_norm": 0.447265625, "learning_rate": 0.0003932788429371025, "loss": 0.226, "step": 221510 }, { "epoch": 9.18, "grad_norm": 0.0, "learning_rate": 0.000393269955439607, "loss": 0.1967, "step": 221520 }, { "epoch": 9.18, "grad_norm": 0.8984375, "learning_rate": 0.00039326106767249294, "loss": 0.1987, "step": 221530 }, { "epoch": 9.18, "grad_norm": 0.84375, "learning_rate": 0.000393252179635777, "loss": 0.2295, "step": 221540 }, { "epoch": 9.18, "grad_norm": 0.7109375, "learning_rate": 0.0003932432913294759, "loss": 0.2266, "step": 221550 }, { "epoch": 9.18, "grad_norm": 0.859375, "learning_rate": 0.00039323440275360635, "loss": 0.1898, "step": 221560 }, { "epoch": 9.18, "grad_norm": 0.3359375, "learning_rate": 0.00039322551390818524, "loss": 0.1978, "step": 221570 }, { "epoch": 9.18, "grad_norm": 1.4921875, "learning_rate": 0.0003932166247932292, "loss": 0.1754, "step": 221580 }, { "epoch": 9.18, "grad_norm": 0.474609375, "learning_rate": 0.00039320773540875474, "loss": 0.183, "step": 221590 }, { "epoch": 9.18, "grad_norm": 0.431640625, "learning_rate": 0.0003931988457547789, "loss": 0.2376, "step": 221600 }, { "epoch": 9.18, "grad_norm": 0.4921875, "learning_rate": 0.00039318995583131834, "loss": 0.1481, "step": 221610 }, { "epoch": 9.18, "grad_norm": 0.80078125, "learning_rate": 0.00039318106563838973, "loss": 0.2372, "step": 221620 }, { "epoch": 9.18, "grad_norm": 0.474609375, "learning_rate": 0.0003931721751760098, "loss": 0.2051, "step": 221630 }, { "epoch": 9.18, "grad_norm": 0.484375, "learning_rate": 0.0003931632844441952, "loss": 0.1452, "step": 221640 }, { "epoch": 9.18, "grad_norm": 1.140625, "learning_rate": 0.0003931543934429629, "loss": 0.2429, "step": 221650 }, { "epoch": 9.18, "grad_norm": 0.330078125, "learning_rate": 0.00039314550217232945, "loss": 0.2171, "step": 221660 }, { "epoch": 9.18, "grad_norm": 0.66796875, "learning_rate": 0.0003931366106323116, "loss": 0.2295, "step": 221670 }, { "epoch": 9.18, "grad_norm": 0.515625, "learning_rate": 0.0003931277188229262, "loss": 0.1372, "step": 221680 }, { "epoch": 9.18, "grad_norm": 0.921875, "learning_rate": 0.00039311882674418974, "loss": 0.144, "step": 221690 }, { "epoch": 9.18, "grad_norm": 0.75, "learning_rate": 0.0003931099343961192, "loss": 0.1784, "step": 221700 }, { "epoch": 9.18, "grad_norm": 1.28125, "learning_rate": 0.00039310104177873117, "loss": 0.2008, "step": 221710 }, { "epoch": 9.18, "grad_norm": 0.70703125, "learning_rate": 0.00039309214889204246, "loss": 0.1629, "step": 221720 }, { "epoch": 9.18, "grad_norm": 0.416015625, "learning_rate": 0.00039308325573606976, "loss": 0.1518, "step": 221730 }, { "epoch": 9.18, "grad_norm": 0.8046875, "learning_rate": 0.0003930743623108299, "loss": 0.1847, "step": 221740 }, { "epoch": 9.18, "grad_norm": 1.09375, "learning_rate": 0.0003930654686163395, "loss": 0.1858, "step": 221750 }, { "epoch": 9.19, "grad_norm": 0.515625, "learning_rate": 0.0003930565746526153, "loss": 0.1785, "step": 221760 }, { "epoch": 9.19, "grad_norm": 0.58984375, "learning_rate": 0.0003930476804196741, "loss": 0.1761, "step": 221770 }, { "epoch": 9.19, "grad_norm": 0.703125, "learning_rate": 0.0003930387859175326, "loss": 0.1763, "step": 221780 }, { "epoch": 9.19, "grad_norm": 0.6328125, "learning_rate": 0.0003930298911462076, "loss": 0.1907, "step": 221790 }, { "epoch": 9.19, "grad_norm": 0.73828125, "learning_rate": 0.00039302099610571565, "loss": 0.1863, "step": 221800 }, { "epoch": 9.19, "grad_norm": 0.69921875, "learning_rate": 0.00039301210079607373, "loss": 0.1756, "step": 221810 }, { "epoch": 9.19, "grad_norm": 1.046875, "learning_rate": 0.0003930032052172985, "loss": 0.143, "step": 221820 }, { "epoch": 9.19, "grad_norm": 1.9140625, "learning_rate": 0.0003929943093694067, "loss": 0.1781, "step": 221830 }, { "epoch": 9.19, "grad_norm": 1.65625, "learning_rate": 0.000392985413252415, "loss": 0.2149, "step": 221840 }, { "epoch": 9.19, "grad_norm": 0.953125, "learning_rate": 0.00039297651686634016, "loss": 0.1844, "step": 221850 }, { "epoch": 9.19, "grad_norm": 0.65234375, "learning_rate": 0.000392967620211199, "loss": 0.1767, "step": 221860 }, { "epoch": 9.19, "grad_norm": 0.478515625, "learning_rate": 0.00039295872328700825, "loss": 0.21, "step": 221870 }, { "epoch": 9.19, "grad_norm": 0.6875, "learning_rate": 0.0003929498260937846, "loss": 0.1628, "step": 221880 }, { "epoch": 9.19, "grad_norm": 0.984375, "learning_rate": 0.0003929409286315448, "loss": 0.1878, "step": 221890 }, { "epoch": 9.19, "grad_norm": 1.3359375, "learning_rate": 0.0003929320309003056, "loss": 0.2189, "step": 221900 }, { "epoch": 9.19, "grad_norm": 0.61328125, "learning_rate": 0.0003929231329000837, "loss": 0.2647, "step": 221910 }, { "epoch": 9.19, "grad_norm": 0.5625, "learning_rate": 0.00039291423463089605, "loss": 0.222, "step": 221920 }, { "epoch": 9.19, "grad_norm": 0.41796875, "learning_rate": 0.0003929053360927591, "loss": 0.1982, "step": 221930 }, { "epoch": 9.19, "grad_norm": 0.96484375, "learning_rate": 0.00039289643728568986, "loss": 0.1767, "step": 221940 }, { "epoch": 9.19, "grad_norm": 0.2001953125, "learning_rate": 0.0003928875382097049, "loss": 0.1978, "step": 221950 }, { "epoch": 9.19, "grad_norm": 0.5546875, "learning_rate": 0.000392878638864821, "loss": 0.1752, "step": 221960 }, { "epoch": 9.19, "grad_norm": 0.48828125, "learning_rate": 0.00039286973925105497, "loss": 0.2015, "step": 221970 }, { "epoch": 9.19, "grad_norm": 0.58203125, "learning_rate": 0.0003928608393684235, "loss": 0.1438, "step": 221980 }, { "epoch": 9.19, "grad_norm": 0.76171875, "learning_rate": 0.00039285193921694336, "loss": 0.2215, "step": 221990 }, { "epoch": 9.2, "grad_norm": 0.71875, "learning_rate": 0.0003928430387966313, "loss": 0.1644, "step": 222000 }, { "epoch": 9.2, "grad_norm": 0.98828125, "learning_rate": 0.000392834138107504, "loss": 0.2382, "step": 222010 }, { "epoch": 9.2, "grad_norm": 0.43359375, "learning_rate": 0.0003928252371495784, "loss": 0.157, "step": 222020 }, { "epoch": 9.2, "grad_norm": 0.94921875, "learning_rate": 0.00039281633592287105, "loss": 0.1584, "step": 222030 }, { "epoch": 9.2, "grad_norm": 0.93359375, "learning_rate": 0.00039280743442739874, "loss": 0.1606, "step": 222040 }, { "epoch": 9.2, "grad_norm": 0.5390625, "learning_rate": 0.00039279853266317835, "loss": 0.2199, "step": 222050 }, { "epoch": 9.2, "grad_norm": 0.47265625, "learning_rate": 0.00039278963063022653, "loss": 0.2082, "step": 222060 }, { "epoch": 9.2, "grad_norm": 0.80859375, "learning_rate": 0.00039278072832856, "loss": 0.2089, "step": 222070 }, { "epoch": 9.2, "grad_norm": 0.1943359375, "learning_rate": 0.0003927718257581956, "loss": 0.1826, "step": 222080 }, { "epoch": 9.2, "grad_norm": 0.48828125, "learning_rate": 0.00039276292291915007, "loss": 0.1837, "step": 222090 }, { "epoch": 9.2, "grad_norm": 1.1640625, "learning_rate": 0.00039275401981144, "loss": 0.2169, "step": 222100 }, { "epoch": 9.2, "grad_norm": 2.3125, "learning_rate": 0.0003927451164350824, "loss": 0.207, "step": 222110 }, { "epoch": 9.2, "grad_norm": 1.1640625, "learning_rate": 0.00039273621279009384, "loss": 0.2065, "step": 222120 }, { "epoch": 9.2, "grad_norm": 0.78125, "learning_rate": 0.0003927273088764912, "loss": 0.1935, "step": 222130 }, { "epoch": 9.2, "grad_norm": 0.6328125, "learning_rate": 0.00039271840469429117, "loss": 0.1534, "step": 222140 }, { "epoch": 9.2, "grad_norm": 0.83984375, "learning_rate": 0.0003927095002435105, "loss": 0.2423, "step": 222150 }, { "epoch": 9.2, "grad_norm": 0.6875, "learning_rate": 0.000392700595524166, "loss": 0.1853, "step": 222160 }, { "epoch": 9.2, "grad_norm": 1.0546875, "learning_rate": 0.0003926916905362743, "loss": 0.1561, "step": 222170 }, { "epoch": 9.2, "grad_norm": 1.5625, "learning_rate": 0.00039268278527985237, "loss": 0.1664, "step": 222180 }, { "epoch": 9.2, "grad_norm": 0.6953125, "learning_rate": 0.0003926738797549167, "loss": 0.2005, "step": 222190 }, { "epoch": 9.2, "grad_norm": 0.6484375, "learning_rate": 0.00039266497396148425, "loss": 0.1945, "step": 222200 }, { "epoch": 9.2, "grad_norm": 1.8671875, "learning_rate": 0.0003926560678995718, "loss": 0.2052, "step": 222210 }, { "epoch": 9.2, "grad_norm": 0.98046875, "learning_rate": 0.000392647161569196, "loss": 0.1727, "step": 222220 }, { "epoch": 9.2, "grad_norm": 1.0078125, "learning_rate": 0.0003926382549703736, "loss": 0.1989, "step": 222230 }, { "epoch": 9.21, "grad_norm": 0.84375, "learning_rate": 0.0003926293481031214, "loss": 0.1895, "step": 222240 }, { "epoch": 9.21, "grad_norm": 0.89453125, "learning_rate": 0.0003926204409674562, "loss": 0.257, "step": 222250 }, { "epoch": 9.21, "grad_norm": 1.28125, "learning_rate": 0.0003926115335633948, "loss": 0.2007, "step": 222260 }, { "epoch": 9.21, "grad_norm": 0.91796875, "learning_rate": 0.00039260262589095374, "loss": 0.2331, "step": 222270 }, { "epoch": 9.21, "grad_norm": 0.859375, "learning_rate": 0.00039259371795015007, "loss": 0.1844, "step": 222280 }, { "epoch": 9.21, "grad_norm": 0.546875, "learning_rate": 0.0003925848097410004, "loss": 0.1764, "step": 222290 }, { "epoch": 9.21, "grad_norm": 1.1640625, "learning_rate": 0.0003925759012635215, "loss": 0.1692, "step": 222300 }, { "epoch": 9.21, "grad_norm": 0.625, "learning_rate": 0.0003925669925177302, "loss": 0.2219, "step": 222310 }, { "epoch": 9.21, "grad_norm": 1.0078125, "learning_rate": 0.00039255808350364306, "loss": 0.2292, "step": 222320 }, { "epoch": 9.21, "grad_norm": 0.86328125, "learning_rate": 0.0003925491742212772, "loss": 0.2088, "step": 222330 }, { "epoch": 9.21, "grad_norm": 0.69140625, "learning_rate": 0.0003925402646706491, "loss": 0.2093, "step": 222340 }, { "epoch": 9.21, "grad_norm": 0.64453125, "learning_rate": 0.00039253135485177557, "loss": 0.1804, "step": 222350 }, { "epoch": 9.21, "grad_norm": 2.3125, "learning_rate": 0.00039252244476467346, "loss": 0.2171, "step": 222360 }, { "epoch": 9.21, "grad_norm": 1.078125, "learning_rate": 0.00039251353440935947, "loss": 0.1972, "step": 222370 }, { "epoch": 9.21, "grad_norm": 0.2177734375, "learning_rate": 0.00039250462378585046, "loss": 0.1602, "step": 222380 }, { "epoch": 9.21, "grad_norm": 0.71875, "learning_rate": 0.00039249571289416306, "loss": 0.199, "step": 222390 }, { "epoch": 9.21, "grad_norm": 0.83984375, "learning_rate": 0.0003924868017343142, "loss": 0.207, "step": 222400 }, { "epoch": 9.21, "grad_norm": 1.15625, "learning_rate": 0.00039247789030632055, "loss": 0.2096, "step": 222410 }, { "epoch": 9.21, "grad_norm": 0.91796875, "learning_rate": 0.0003924689786101989, "loss": 0.2077, "step": 222420 }, { "epoch": 9.21, "grad_norm": 0.484375, "learning_rate": 0.00039246006664596595, "loss": 0.2214, "step": 222430 }, { "epoch": 9.21, "grad_norm": 1.1171875, "learning_rate": 0.00039245115441363855, "loss": 0.2112, "step": 222440 }, { "epoch": 9.21, "grad_norm": 0.78515625, "learning_rate": 0.00039244224191323354, "loss": 0.2598, "step": 222450 }, { "epoch": 9.21, "grad_norm": 0.640625, "learning_rate": 0.00039243332914476757, "loss": 0.1598, "step": 222460 }, { "epoch": 9.21, "grad_norm": 0.70703125, "learning_rate": 0.0003924244161082574, "loss": 0.2316, "step": 222470 }, { "epoch": 9.22, "grad_norm": 0.21875, "learning_rate": 0.00039241550280371995, "loss": 0.1818, "step": 222480 }, { "epoch": 9.22, "grad_norm": 0.71484375, "learning_rate": 0.0003924065892311719, "loss": 0.1912, "step": 222490 }, { "epoch": 9.22, "grad_norm": 1.9453125, "learning_rate": 0.00039239767539063, "loss": 0.2131, "step": 222500 }, { "epoch": 9.22, "grad_norm": 0.5078125, "learning_rate": 0.000392388761282111, "loss": 0.2011, "step": 222510 }, { "epoch": 9.22, "grad_norm": 0.373046875, "learning_rate": 0.00039237984690563177, "loss": 0.2355, "step": 222520 }, { "epoch": 9.22, "grad_norm": 0.33984375, "learning_rate": 0.000392370932261209, "loss": 0.1773, "step": 222530 }, { "epoch": 9.22, "grad_norm": 0.51171875, "learning_rate": 0.00039236201734885965, "loss": 0.2235, "step": 222540 }, { "epoch": 9.22, "grad_norm": 0.76171875, "learning_rate": 0.0003923531021686002, "loss": 0.2132, "step": 222550 }, { "epoch": 9.22, "grad_norm": 0.2431640625, "learning_rate": 0.0003923441867204477, "loss": 0.1814, "step": 222560 }, { "epoch": 9.22, "grad_norm": 0.8125, "learning_rate": 0.0003923352710044187, "loss": 0.2196, "step": 222570 }, { "epoch": 9.22, "grad_norm": 0.74609375, "learning_rate": 0.00039232635502053014, "loss": 0.1988, "step": 222580 }, { "epoch": 9.22, "grad_norm": 0.5703125, "learning_rate": 0.0003923174387687988, "loss": 0.1909, "step": 222590 }, { "epoch": 9.22, "grad_norm": 0.25, "learning_rate": 0.0003923085222492414, "loss": 0.1545, "step": 222600 }, { "epoch": 9.22, "grad_norm": 1.03125, "learning_rate": 0.00039229960546187473, "loss": 0.233, "step": 222610 }, { "epoch": 9.22, "grad_norm": 1.859375, "learning_rate": 0.0003922906884067155, "loss": 0.2529, "step": 222620 }, { "epoch": 9.22, "grad_norm": 2.03125, "learning_rate": 0.0003922817710837806, "loss": 0.2009, "step": 222630 }, { "epoch": 9.22, "grad_norm": 0.6953125, "learning_rate": 0.00039227285349308674, "loss": 0.2036, "step": 222640 }, { "epoch": 9.22, "grad_norm": 1.8984375, "learning_rate": 0.0003922639356346508, "loss": 0.176, "step": 222650 }, { "epoch": 9.22, "grad_norm": 0.625, "learning_rate": 0.00039225501750848944, "loss": 0.1978, "step": 222660 }, { "epoch": 9.22, "grad_norm": 0.86328125, "learning_rate": 0.0003922460991146195, "loss": 0.2034, "step": 222670 }, { "epoch": 9.22, "grad_norm": 0.640625, "learning_rate": 0.00039223718045305786, "loss": 0.193, "step": 222680 }, { "epoch": 9.22, "grad_norm": 0.2470703125, "learning_rate": 0.0003922282615238211, "loss": 0.1912, "step": 222690 }, { "epoch": 9.22, "grad_norm": 0.58984375, "learning_rate": 0.00039221934232692616, "loss": 0.2066, "step": 222700 }, { "epoch": 9.22, "grad_norm": 1.0859375, "learning_rate": 0.00039221042286238973, "loss": 0.1986, "step": 222710 }, { "epoch": 9.23, "grad_norm": 1.09375, "learning_rate": 0.0003922015031302286, "loss": 0.2285, "step": 222720 }, { "epoch": 9.23, "grad_norm": 0.59765625, "learning_rate": 0.00039219258313045975, "loss": 0.2019, "step": 222730 }, { "epoch": 9.23, "grad_norm": 0.671875, "learning_rate": 0.0003921836628630997, "loss": 0.1658, "step": 222740 }, { "epoch": 9.23, "grad_norm": 0.427734375, "learning_rate": 0.0003921747423281654, "loss": 0.1863, "step": 222750 }, { "epoch": 9.23, "grad_norm": 2.75, "learning_rate": 0.0003921658215256736, "loss": 0.2091, "step": 222760 }, { "epoch": 9.23, "grad_norm": 0.68359375, "learning_rate": 0.000392156900455641, "loss": 0.1908, "step": 222770 }, { "epoch": 9.23, "grad_norm": 1.0390625, "learning_rate": 0.00039214797911808454, "loss": 0.2645, "step": 222780 }, { "epoch": 9.23, "grad_norm": 1.3671875, "learning_rate": 0.0003921390575130209, "loss": 0.2033, "step": 222790 }, { "epoch": 9.23, "grad_norm": 0.67578125, "learning_rate": 0.0003921301356404669, "loss": 0.2189, "step": 222800 }, { "epoch": 9.23, "grad_norm": 1.34375, "learning_rate": 0.0003921212135004394, "loss": 0.2161, "step": 222810 }, { "epoch": 9.23, "grad_norm": 0.59765625, "learning_rate": 0.000392112291092955, "loss": 0.2192, "step": 222820 }, { "epoch": 9.23, "grad_norm": 0.53125, "learning_rate": 0.0003921033684180307, "loss": 0.2173, "step": 222830 }, { "epoch": 9.23, "grad_norm": 0.76171875, "learning_rate": 0.0003920944454756832, "loss": 0.187, "step": 222840 }, { "epoch": 9.23, "grad_norm": 0.4765625, "learning_rate": 0.0003920855222659293, "loss": 0.1969, "step": 222850 }, { "epoch": 9.23, "grad_norm": 0.462890625, "learning_rate": 0.00039207659878878574, "loss": 0.1712, "step": 222860 }, { "epoch": 9.23, "grad_norm": 0.150390625, "learning_rate": 0.0003920676750442694, "loss": 0.1932, "step": 222870 }, { "epoch": 9.23, "grad_norm": 0.62890625, "learning_rate": 0.00039205875103239706, "loss": 0.1726, "step": 222880 }, { "epoch": 9.23, "grad_norm": 1.2421875, "learning_rate": 0.00039204982675318544, "loss": 0.1537, "step": 222890 }, { "epoch": 9.23, "grad_norm": 0.85546875, "learning_rate": 0.0003920409022066515, "loss": 0.1678, "step": 222900 }, { "epoch": 9.23, "grad_norm": 0.62109375, "learning_rate": 0.0003920319773928118, "loss": 0.2206, "step": 222910 }, { "epoch": 9.23, "grad_norm": 0.953125, "learning_rate": 0.00039202305231168333, "loss": 0.2109, "step": 222920 }, { "epoch": 9.23, "grad_norm": 0.84375, "learning_rate": 0.00039201412696328276, "loss": 0.195, "step": 222930 }, { "epoch": 9.23, "grad_norm": 0.828125, "learning_rate": 0.00039200520134762695, "loss": 0.1994, "step": 222940 }, { "epoch": 9.23, "grad_norm": 0.4453125, "learning_rate": 0.00039199627546473265, "loss": 0.162, "step": 222950 }, { "epoch": 9.23, "grad_norm": 0.6953125, "learning_rate": 0.0003919873493146168, "loss": 0.2095, "step": 222960 }, { "epoch": 9.24, "grad_norm": 0.58984375, "learning_rate": 0.00039197842289729603, "loss": 0.1563, "step": 222970 }, { "epoch": 9.24, "grad_norm": 0.765625, "learning_rate": 0.00039196949621278716, "loss": 0.1606, "step": 222980 }, { "epoch": 9.24, "grad_norm": 0.392578125, "learning_rate": 0.00039196056926110707, "loss": 0.1915, "step": 222990 }, { "epoch": 9.24, "grad_norm": 0.90625, "learning_rate": 0.0003919516420422725, "loss": 0.2602, "step": 223000 }, { "epoch": 9.24, "grad_norm": 0.6640625, "learning_rate": 0.0003919427145563003, "loss": 0.1901, "step": 223010 }, { "epoch": 9.24, "grad_norm": 0.671875, "learning_rate": 0.00039193378680320724, "loss": 0.1794, "step": 223020 }, { "epoch": 9.24, "grad_norm": 1.1171875, "learning_rate": 0.0003919248587830101, "loss": 0.1812, "step": 223030 }, { "epoch": 9.24, "grad_norm": 0.466796875, "learning_rate": 0.0003919159304957257, "loss": 0.2225, "step": 223040 }, { "epoch": 9.24, "grad_norm": 0.84765625, "learning_rate": 0.0003919070019413709, "loss": 0.2206, "step": 223050 }, { "epoch": 9.24, "grad_norm": 0.478515625, "learning_rate": 0.0003918980731199624, "loss": 0.2476, "step": 223060 }, { "epoch": 9.24, "grad_norm": 0.87890625, "learning_rate": 0.000391889144031517, "loss": 0.1912, "step": 223070 }, { "epoch": 9.24, "grad_norm": 0.5234375, "learning_rate": 0.0003918802146760516, "loss": 0.2306, "step": 223080 }, { "epoch": 9.24, "grad_norm": 0.462890625, "learning_rate": 0.00039187128505358297, "loss": 0.1767, "step": 223090 }, { "epoch": 9.24, "grad_norm": 0.90234375, "learning_rate": 0.00039186235516412796, "loss": 0.168, "step": 223100 }, { "epoch": 9.24, "grad_norm": 0.41015625, "learning_rate": 0.00039185342500770313, "loss": 0.2126, "step": 223110 }, { "epoch": 9.24, "grad_norm": 0.9296875, "learning_rate": 0.0003918444945843257, "loss": 0.212, "step": 223120 }, { "epoch": 9.24, "grad_norm": 0.5, "learning_rate": 0.0003918355638940121, "loss": 0.1858, "step": 223130 }, { "epoch": 9.24, "grad_norm": 0.490234375, "learning_rate": 0.00039182663293677933, "loss": 0.2019, "step": 223140 }, { "epoch": 9.24, "grad_norm": 0.70703125, "learning_rate": 0.0003918177017126442, "loss": 0.2223, "step": 223150 }, { "epoch": 9.24, "grad_norm": 0.40625, "learning_rate": 0.00039180877022162335, "loss": 0.2257, "step": 223160 }, { "epoch": 9.24, "grad_norm": 1.203125, "learning_rate": 0.00039179983846373385, "loss": 0.2012, "step": 223170 }, { "epoch": 9.24, "grad_norm": 0.390625, "learning_rate": 0.00039179090643899227, "loss": 0.1586, "step": 223180 }, { "epoch": 9.24, "grad_norm": 1.1875, "learning_rate": 0.0003917819741474156, "loss": 0.2115, "step": 223190 }, { "epoch": 9.24, "grad_norm": 1.703125, "learning_rate": 0.0003917730415890205, "loss": 0.2038, "step": 223200 }, { "epoch": 9.25, "grad_norm": 1.9921875, "learning_rate": 0.00039176410876382386, "loss": 0.2331, "step": 223210 }, { "epoch": 9.25, "grad_norm": 1.203125, "learning_rate": 0.00039175517567184245, "loss": 0.2069, "step": 223220 }, { "epoch": 9.25, "grad_norm": 0.7109375, "learning_rate": 0.00039174624231309315, "loss": 0.2267, "step": 223230 }, { "epoch": 9.25, "grad_norm": 1.84375, "learning_rate": 0.00039173730868759265, "loss": 0.2008, "step": 223240 }, { "epoch": 9.25, "grad_norm": 0.2890625, "learning_rate": 0.00039172837479535793, "loss": 0.1923, "step": 223250 }, { "epoch": 9.25, "grad_norm": 0.326171875, "learning_rate": 0.00039171944063640573, "loss": 0.1934, "step": 223260 }, { "epoch": 9.25, "grad_norm": 0.357421875, "learning_rate": 0.00039171050621075276, "loss": 0.188, "step": 223270 }, { "epoch": 9.25, "grad_norm": 0.30078125, "learning_rate": 0.000391701571518416, "loss": 0.2177, "step": 223280 }, { "epoch": 9.25, "grad_norm": 0.66796875, "learning_rate": 0.00039169263655941213, "loss": 0.1848, "step": 223290 }, { "epoch": 9.25, "grad_norm": 0.400390625, "learning_rate": 0.00039168370133375803, "loss": 0.2239, "step": 223300 }, { "epoch": 9.25, "grad_norm": 0.404296875, "learning_rate": 0.00039167476584147054, "loss": 0.1857, "step": 223310 }, { "epoch": 9.25, "grad_norm": 0.470703125, "learning_rate": 0.00039166583008256635, "loss": 0.1917, "step": 223320 }, { "epoch": 9.25, "grad_norm": 1.359375, "learning_rate": 0.0003916568940570624, "loss": 0.1879, "step": 223330 }, { "epoch": 9.25, "grad_norm": 0.8671875, "learning_rate": 0.0003916479577649755, "loss": 0.205, "step": 223340 }, { "epoch": 9.25, "grad_norm": 0.7421875, "learning_rate": 0.00039163902120632245, "loss": 0.1742, "step": 223350 }, { "epoch": 9.25, "grad_norm": 0.369140625, "learning_rate": 0.0003916300843811201, "loss": 0.176, "step": 223360 }, { "epoch": 9.25, "grad_norm": 1.5234375, "learning_rate": 0.00039162114728938515, "loss": 0.2456, "step": 223370 }, { "epoch": 9.25, "grad_norm": 0.72265625, "learning_rate": 0.00039161220993113455, "loss": 0.1875, "step": 223380 }, { "epoch": 9.25, "grad_norm": 1.25, "learning_rate": 0.00039160327230638504, "loss": 0.2063, "step": 223390 }, { "epoch": 9.25, "grad_norm": 0.4921875, "learning_rate": 0.0003915943344151534, "loss": 0.1938, "step": 223400 }, { "epoch": 9.25, "grad_norm": 1.734375, "learning_rate": 0.0003915853962574566, "loss": 0.1915, "step": 223410 }, { "epoch": 9.25, "grad_norm": 0.97265625, "learning_rate": 0.0003915764578333113, "loss": 0.2019, "step": 223420 }, { "epoch": 9.25, "grad_norm": 1.5078125, "learning_rate": 0.0003915675191427345, "loss": 0.2466, "step": 223430 }, { "epoch": 9.25, "grad_norm": 0.76953125, "learning_rate": 0.00039155858018574284, "loss": 0.2377, "step": 223440 }, { "epoch": 9.26, "grad_norm": 0.9140625, "learning_rate": 0.0003915496409623532, "loss": 0.1667, "step": 223450 }, { "epoch": 9.26, "grad_norm": 0.77734375, "learning_rate": 0.0003915407014725825, "loss": 0.1656, "step": 223460 }, { "epoch": 9.26, "grad_norm": 1.7890625, "learning_rate": 0.00039153176171644743, "loss": 0.2329, "step": 223470 }, { "epoch": 9.26, "grad_norm": 0.546875, "learning_rate": 0.00039152282169396493, "loss": 0.1846, "step": 223480 }, { "epoch": 9.26, "grad_norm": 0.490234375, "learning_rate": 0.00039151388140515163, "loss": 0.1347, "step": 223490 }, { "epoch": 9.26, "grad_norm": 0.88671875, "learning_rate": 0.00039150494085002466, "loss": 0.2263, "step": 223500 }, { "epoch": 9.26, "grad_norm": 0.83984375, "learning_rate": 0.0003914960000286005, "loss": 0.1794, "step": 223510 }, { "epoch": 9.26, "grad_norm": 1.1953125, "learning_rate": 0.0003914870589408963, "loss": 0.1948, "step": 223520 }, { "epoch": 9.26, "grad_norm": 0.71875, "learning_rate": 0.00039147811758692865, "loss": 0.1887, "step": 223530 }, { "epoch": 9.26, "grad_norm": 0.92578125, "learning_rate": 0.0003914691759667145, "loss": 0.1574, "step": 223540 }, { "epoch": 9.26, "grad_norm": 0.6484375, "learning_rate": 0.0003914602340802705, "loss": 0.1454, "step": 223550 }, { "epoch": 9.26, "grad_norm": 0.88671875, "learning_rate": 0.0003914512919276138, "loss": 0.2248, "step": 223560 }, { "epoch": 9.26, "grad_norm": 0.69921875, "learning_rate": 0.000391442349508761, "loss": 0.231, "step": 223570 }, { "epoch": 9.26, "grad_norm": 1.3515625, "learning_rate": 0.00039143340682372895, "loss": 0.2345, "step": 223580 }, { "epoch": 9.26, "grad_norm": 1.125, "learning_rate": 0.0003914244638725345, "loss": 0.2353, "step": 223590 }, { "epoch": 9.26, "grad_norm": 0.94921875, "learning_rate": 0.00039141552065519446, "loss": 0.1746, "step": 223600 }, { "epoch": 9.26, "grad_norm": 0.62890625, "learning_rate": 0.00039140657717172574, "loss": 0.1676, "step": 223610 }, { "epoch": 9.26, "grad_norm": 0.30078125, "learning_rate": 0.00039139763342214506, "loss": 0.2506, "step": 223620 }, { "epoch": 9.26, "grad_norm": 1.03125, "learning_rate": 0.0003913886894064693, "loss": 0.2277, "step": 223630 }, { "epoch": 9.26, "grad_norm": 1.2265625, "learning_rate": 0.0003913797451247153, "loss": 0.2299, "step": 223640 }, { "epoch": 9.26, "grad_norm": 1.09375, "learning_rate": 0.00039137080057689995, "loss": 0.1811, "step": 223650 }, { "epoch": 9.26, "grad_norm": 0.984375, "learning_rate": 0.00039136185576304, "loss": 0.1854, "step": 223660 }, { "epoch": 9.26, "grad_norm": 0.53125, "learning_rate": 0.00039135291068315224, "loss": 0.2121, "step": 223670 }, { "epoch": 9.26, "grad_norm": 0.470703125, "learning_rate": 0.0003913439653372536, "loss": 0.2107, "step": 223680 }, { "epoch": 9.27, "grad_norm": 0.79296875, "learning_rate": 0.0003913350197253609, "loss": 0.2148, "step": 223690 }, { "epoch": 9.27, "grad_norm": 0.8828125, "learning_rate": 0.00039132607384749095, "loss": 0.2162, "step": 223700 }, { "epoch": 9.27, "grad_norm": 1.0625, "learning_rate": 0.00039131712770366055, "loss": 0.187, "step": 223710 }, { "epoch": 9.27, "grad_norm": 0.376953125, "learning_rate": 0.0003913081812938866, "loss": 0.2615, "step": 223720 }, { "epoch": 9.27, "grad_norm": 0.734375, "learning_rate": 0.00039129923461818594, "loss": 0.2217, "step": 223730 }, { "epoch": 9.27, "grad_norm": 1.2421875, "learning_rate": 0.00039129028767657535, "loss": 0.1865, "step": 223740 }, { "epoch": 9.27, "grad_norm": 0.68359375, "learning_rate": 0.0003912813404690717, "loss": 0.1875, "step": 223750 }, { "epoch": 9.27, "grad_norm": 0.80859375, "learning_rate": 0.00039127239299569183, "loss": 0.2134, "step": 223760 }, { "epoch": 9.27, "grad_norm": 0.59375, "learning_rate": 0.00039126344525645257, "loss": 0.2419, "step": 223770 }, { "epoch": 9.27, "grad_norm": 0.98828125, "learning_rate": 0.0003912544972513707, "loss": 0.1875, "step": 223780 }, { "epoch": 9.27, "grad_norm": 0.87109375, "learning_rate": 0.0003912455489804633, "loss": 0.1864, "step": 223790 }, { "epoch": 9.27, "grad_norm": 0.6328125, "learning_rate": 0.00039123660044374685, "loss": 0.207, "step": 223800 }, { "epoch": 9.27, "grad_norm": 0.84765625, "learning_rate": 0.0003912276516412385, "loss": 0.2041, "step": 223810 }, { "epoch": 9.27, "grad_norm": 0.427734375, "learning_rate": 0.0003912187025729548, "loss": 0.1985, "step": 223820 }, { "epoch": 9.27, "grad_norm": 0.44921875, "learning_rate": 0.00039120975323891287, "loss": 0.1902, "step": 223830 }, { "epoch": 9.27, "grad_norm": 0.59765625, "learning_rate": 0.0003912008036391294, "loss": 0.1926, "step": 223840 }, { "epoch": 9.27, "grad_norm": 1.4453125, "learning_rate": 0.00039119185377362134, "loss": 0.1625, "step": 223850 }, { "epoch": 9.27, "grad_norm": 0.416015625, "learning_rate": 0.0003911829036424054, "loss": 0.2897, "step": 223860 }, { "epoch": 9.27, "grad_norm": 1.1171875, "learning_rate": 0.0003911739532454984, "loss": 0.2252, "step": 223870 }, { "epoch": 9.27, "grad_norm": 0.640625, "learning_rate": 0.0003911650025829174, "loss": 0.2093, "step": 223880 }, { "epoch": 9.27, "grad_norm": 1.1484375, "learning_rate": 0.00039115605165467905, "loss": 0.2145, "step": 223890 }, { "epoch": 9.27, "grad_norm": 0.6875, "learning_rate": 0.00039114710046080027, "loss": 0.2004, "step": 223900 }, { "epoch": 9.27, "grad_norm": 0.9609375, "learning_rate": 0.00039113814900129787, "loss": 0.1687, "step": 223910 }, { "epoch": 9.27, "grad_norm": 0.66015625, "learning_rate": 0.0003911291972761887, "loss": 0.2133, "step": 223920 }, { "epoch": 9.28, "grad_norm": 1.03125, "learning_rate": 0.00039112024528548963, "loss": 0.2056, "step": 223930 }, { "epoch": 9.28, "grad_norm": 0.703125, "learning_rate": 0.00039111129302921757, "loss": 0.2104, "step": 223940 }, { "epoch": 9.28, "grad_norm": 0.31640625, "learning_rate": 0.00039110234050738915, "loss": 0.2161, "step": 223950 }, { "epoch": 9.28, "grad_norm": 0.828125, "learning_rate": 0.00039109338772002147, "loss": 0.1869, "step": 223960 }, { "epoch": 9.28, "grad_norm": 0.65625, "learning_rate": 0.00039108443466713127, "loss": 0.2229, "step": 223970 }, { "epoch": 9.28, "grad_norm": 1.0390625, "learning_rate": 0.0003910754813487354, "loss": 0.1656, "step": 223980 }, { "epoch": 9.28, "grad_norm": 1.1640625, "learning_rate": 0.0003910665277648506, "loss": 0.2582, "step": 223990 }, { "epoch": 9.28, "grad_norm": 1.1953125, "learning_rate": 0.00039105757391549395, "loss": 0.2155, "step": 224000 }, { "epoch": 9.28, "grad_norm": 1.90625, "learning_rate": 0.0003910486198006822, "loss": 0.187, "step": 224010 }, { "epoch": 9.28, "grad_norm": 0.36328125, "learning_rate": 0.0003910396654204321, "loss": 0.1556, "step": 224020 }, { "epoch": 9.28, "grad_norm": 0.5546875, "learning_rate": 0.0003910307107747606, "loss": 0.2209, "step": 224030 }, { "epoch": 9.28, "grad_norm": 0.6640625, "learning_rate": 0.00039102175586368455, "loss": 0.1948, "step": 224040 }, { "epoch": 9.28, "grad_norm": 0.90625, "learning_rate": 0.0003910128006872208, "loss": 0.1754, "step": 224050 }, { "epoch": 9.28, "grad_norm": 0.326171875, "learning_rate": 0.0003910038452453861, "loss": 0.1707, "step": 224060 }, { "epoch": 9.28, "grad_norm": 0.671875, "learning_rate": 0.00039099488953819753, "loss": 0.1685, "step": 224070 }, { "epoch": 9.28, "grad_norm": 0.765625, "learning_rate": 0.00039098593356567165, "loss": 0.2439, "step": 224080 }, { "epoch": 9.28, "grad_norm": 0.8359375, "learning_rate": 0.00039097697732782557, "loss": 0.1824, "step": 224090 }, { "epoch": 9.28, "grad_norm": 0.2431640625, "learning_rate": 0.00039096802082467597, "loss": 0.1594, "step": 224100 }, { "epoch": 9.28, "grad_norm": 0.94140625, "learning_rate": 0.0003909590640562398, "loss": 0.1934, "step": 224110 }, { "epoch": 9.28, "grad_norm": 2.609375, "learning_rate": 0.000390950107022534, "loss": 0.1916, "step": 224120 }, { "epoch": 9.28, "grad_norm": 4.125, "learning_rate": 0.0003909411497235752, "loss": 0.1764, "step": 224130 }, { "epoch": 9.28, "grad_norm": 0.29296875, "learning_rate": 0.0003909321921593804, "loss": 0.186, "step": 224140 }, { "epoch": 9.28, "grad_norm": 0.96875, "learning_rate": 0.00039092323432996645, "loss": 0.2185, "step": 224150 }, { "epoch": 9.28, "grad_norm": 0.63671875, "learning_rate": 0.0003909142762353502, "loss": 0.2012, "step": 224160 }, { "epoch": 9.29, "grad_norm": 1.4765625, "learning_rate": 0.0003909053178755485, "loss": 0.206, "step": 224170 }, { "epoch": 9.29, "grad_norm": 0.796875, "learning_rate": 0.0003908963592505782, "loss": 0.2066, "step": 224180 }, { "epoch": 9.29, "grad_norm": 0.76171875, "learning_rate": 0.00039088740036045613, "loss": 0.2182, "step": 224190 }, { "epoch": 9.29, "grad_norm": 1.0703125, "learning_rate": 0.0003908784412051992, "loss": 0.2273, "step": 224200 }, { "epoch": 9.29, "grad_norm": 1.1796875, "learning_rate": 0.0003908694817848243, "loss": 0.1751, "step": 224210 }, { "epoch": 9.29, "grad_norm": 0.5703125, "learning_rate": 0.0003908605220993483, "loss": 0.2026, "step": 224220 }, { "epoch": 9.29, "grad_norm": 0.255859375, "learning_rate": 0.0003908515621487879, "loss": 0.1637, "step": 224230 }, { "epoch": 9.29, "grad_norm": 0.361328125, "learning_rate": 0.0003908426019331601, "loss": 0.2334, "step": 224240 }, { "epoch": 9.29, "grad_norm": 4.875, "learning_rate": 0.0003908336414524817, "loss": 0.1902, "step": 224250 }, { "epoch": 9.29, "grad_norm": 1.2890625, "learning_rate": 0.00039082468070676966, "loss": 0.1708, "step": 224260 }, { "epoch": 9.29, "grad_norm": 0.5703125, "learning_rate": 0.0003908157196960408, "loss": 0.1813, "step": 224270 }, { "epoch": 9.29, "grad_norm": 1.421875, "learning_rate": 0.0003908067584203119, "loss": 0.2174, "step": 224280 }, { "epoch": 9.29, "grad_norm": 0.41796875, "learning_rate": 0.00039079779687959987, "loss": 0.1668, "step": 224290 }, { "epoch": 9.29, "grad_norm": 0.60546875, "learning_rate": 0.0003907888350739216, "loss": 0.1557, "step": 224300 }, { "epoch": 9.29, "grad_norm": 0.328125, "learning_rate": 0.000390779873003294, "loss": 0.1791, "step": 224310 }, { "epoch": 9.29, "grad_norm": 0.66015625, "learning_rate": 0.00039077091066773385, "loss": 0.2661, "step": 224320 }, { "epoch": 9.29, "grad_norm": 1.09375, "learning_rate": 0.00039076194806725805, "loss": 0.2188, "step": 224330 }, { "epoch": 9.29, "grad_norm": 1.5234375, "learning_rate": 0.0003907529852018834, "loss": 0.1941, "step": 224340 }, { "epoch": 9.29, "grad_norm": 0.390625, "learning_rate": 0.00039074402207162697, "loss": 0.2133, "step": 224350 }, { "epoch": 9.29, "grad_norm": 0.59765625, "learning_rate": 0.0003907350586765054, "loss": 0.2066, "step": 224360 }, { "epoch": 9.29, "grad_norm": 0.49609375, "learning_rate": 0.00039072609501653564, "loss": 0.2219, "step": 224370 }, { "epoch": 9.29, "grad_norm": 0.49609375, "learning_rate": 0.00039071713109173464, "loss": 0.2021, "step": 224380 }, { "epoch": 9.29, "grad_norm": 0.3203125, "learning_rate": 0.00039070816690211905, "loss": 0.1733, "step": 224390 }, { "epoch": 9.29, "grad_norm": 1.65625, "learning_rate": 0.00039069920244770605, "loss": 0.182, "step": 224400 }, { "epoch": 9.3, "grad_norm": 0.8203125, "learning_rate": 0.00039069023772851224, "loss": 0.1938, "step": 224410 }, { "epoch": 9.3, "grad_norm": 0.96875, "learning_rate": 0.0003906812727445547, "loss": 0.1838, "step": 224420 }, { "epoch": 9.3, "grad_norm": 0.69921875, "learning_rate": 0.0003906723074958501, "loss": 0.1686, "step": 224430 }, { "epoch": 9.3, "grad_norm": 0.8671875, "learning_rate": 0.00039066334198241545, "loss": 0.1703, "step": 224440 }, { "epoch": 9.3, "grad_norm": 0.9375, "learning_rate": 0.00039065437620426754, "loss": 0.2063, "step": 224450 }, { "epoch": 9.3, "grad_norm": 0.73046875, "learning_rate": 0.0003906454101614233, "loss": 0.1933, "step": 224460 }, { "epoch": 9.3, "grad_norm": 0.494140625, "learning_rate": 0.0003906364438538996, "loss": 0.1958, "step": 224470 }, { "epoch": 9.3, "grad_norm": 0.83203125, "learning_rate": 0.00039062747728171334, "loss": 0.2464, "step": 224480 }, { "epoch": 9.3, "grad_norm": 0.4921875, "learning_rate": 0.00039061851044488126, "loss": 0.1621, "step": 224490 }, { "epoch": 9.3, "grad_norm": 0.66796875, "learning_rate": 0.0003906095433434204, "loss": 0.1459, "step": 224500 }, { "epoch": 9.3, "grad_norm": 0.396484375, "learning_rate": 0.00039060057597734753, "loss": 0.224, "step": 224510 }, { "epoch": 9.3, "grad_norm": 2.34375, "learning_rate": 0.00039059160834667954, "loss": 0.1972, "step": 224520 }, { "epoch": 9.3, "grad_norm": 0.59765625, "learning_rate": 0.0003905826404514334, "loss": 0.1603, "step": 224530 }, { "epoch": 9.3, "grad_norm": 0.91015625, "learning_rate": 0.00039057367229162587, "loss": 0.1689, "step": 224540 }, { "epoch": 9.3, "grad_norm": 0.73828125, "learning_rate": 0.00039056470386727386, "loss": 0.2039, "step": 224550 }, { "epoch": 9.3, "grad_norm": 0.546875, "learning_rate": 0.00039055573517839424, "loss": 0.2113, "step": 224560 }, { "epoch": 9.3, "grad_norm": 0.65234375, "learning_rate": 0.00039054676622500393, "loss": 0.1938, "step": 224570 }, { "epoch": 9.3, "grad_norm": 0.275390625, "learning_rate": 0.0003905377970071198, "loss": 0.2183, "step": 224580 }, { "epoch": 9.3, "grad_norm": 0.51171875, "learning_rate": 0.0003905288275247587, "loss": 0.2262, "step": 224590 }, { "epoch": 9.3, "grad_norm": 0.88671875, "learning_rate": 0.0003905198577779375, "loss": 0.208, "step": 224600 }, { "epoch": 9.3, "grad_norm": 2.40625, "learning_rate": 0.0003905108877666732, "loss": 0.1795, "step": 224610 }, { "epoch": 9.3, "grad_norm": 1.21875, "learning_rate": 0.0003905019174909825, "loss": 0.2248, "step": 224620 }, { "epoch": 9.3, "grad_norm": 1.4375, "learning_rate": 0.00039049294695088233, "loss": 0.1721, "step": 224630 }, { "epoch": 9.3, "grad_norm": 1.390625, "learning_rate": 0.00039048397614638964, "loss": 0.2417, "step": 224640 }, { "epoch": 9.3, "grad_norm": 0.3515625, "learning_rate": 0.0003904750050775213, "loss": 0.2373, "step": 224650 }, { "epoch": 9.31, "grad_norm": 1.40625, "learning_rate": 0.0003904660337442941, "loss": 0.2156, "step": 224660 }, { "epoch": 9.31, "grad_norm": 0.76171875, "learning_rate": 0.0003904570621467251, "loss": 0.1865, "step": 224670 }, { "epoch": 9.31, "grad_norm": 1.1328125, "learning_rate": 0.00039044809028483104, "loss": 0.27, "step": 224680 }, { "epoch": 9.31, "grad_norm": 0.703125, "learning_rate": 0.0003904391181586288, "loss": 0.1901, "step": 224690 }, { "epoch": 9.31, "grad_norm": 0.306640625, "learning_rate": 0.0003904301457681354, "loss": 0.1623, "step": 224700 }, { "epoch": 9.31, "grad_norm": 0.8125, "learning_rate": 0.0003904211731133675, "loss": 0.1985, "step": 224710 }, { "epoch": 9.31, "grad_norm": 1.1796875, "learning_rate": 0.0003904122001943422, "loss": 0.2042, "step": 224720 }, { "epoch": 9.31, "grad_norm": 0.734375, "learning_rate": 0.0003904032270110762, "loss": 0.1953, "step": 224730 }, { "epoch": 9.31, "grad_norm": 0.326171875, "learning_rate": 0.00039039425356358665, "loss": 0.2077, "step": 224740 }, { "epoch": 9.31, "grad_norm": 0.75390625, "learning_rate": 0.00039038527985189023, "loss": 0.1702, "step": 224750 }, { "epoch": 9.31, "grad_norm": 0.984375, "learning_rate": 0.00039037630587600384, "loss": 0.1839, "step": 224760 }, { "epoch": 9.31, "grad_norm": 0.251953125, "learning_rate": 0.0003903673316359444, "loss": 0.1756, "step": 224770 }, { "epoch": 9.31, "grad_norm": 0.53125, "learning_rate": 0.0003903583571317288, "loss": 0.2135, "step": 224780 }, { "epoch": 9.31, "grad_norm": 0.3515625, "learning_rate": 0.00039034938236337394, "loss": 0.19, "step": 224790 }, { "epoch": 9.31, "grad_norm": 0.25390625, "learning_rate": 0.00039034040733089664, "loss": 0.195, "step": 224800 }, { "epoch": 9.31, "grad_norm": 0.875, "learning_rate": 0.00039033143203431387, "loss": 0.189, "step": 224810 }, { "epoch": 9.31, "grad_norm": 1.2265625, "learning_rate": 0.00039032245647364254, "loss": 0.1403, "step": 224820 }, { "epoch": 9.31, "grad_norm": 0.6015625, "learning_rate": 0.0003903134806488995, "loss": 0.2144, "step": 224830 }, { "epoch": 9.31, "grad_norm": 0.55859375, "learning_rate": 0.00039030450456010166, "loss": 0.1487, "step": 224840 }, { "epoch": 9.31, "grad_norm": 0.490234375, "learning_rate": 0.0003902955282072659, "loss": 0.1581, "step": 224850 }, { "epoch": 9.31, "grad_norm": 1.765625, "learning_rate": 0.00039028655159040903, "loss": 0.2377, "step": 224860 }, { "epoch": 9.31, "grad_norm": 0.9765625, "learning_rate": 0.00039027757470954804, "loss": 0.1969, "step": 224870 }, { "epoch": 9.31, "grad_norm": 0.90625, "learning_rate": 0.00039026859756469986, "loss": 0.2511, "step": 224880 }, { "epoch": 9.31, "grad_norm": 1.3203125, "learning_rate": 0.00039025962015588124, "loss": 0.1947, "step": 224890 }, { "epoch": 9.32, "grad_norm": 0.439453125, "learning_rate": 0.0003902506424831093, "loss": 0.22, "step": 224900 }, { "epoch": 9.32, "grad_norm": 0.65234375, "learning_rate": 0.00039024166454640065, "loss": 0.1632, "step": 224910 }, { "epoch": 9.32, "grad_norm": 0.6953125, "learning_rate": 0.0003902326863457724, "loss": 0.2155, "step": 224920 }, { "epoch": 9.32, "grad_norm": 0.83984375, "learning_rate": 0.0003902237078812414, "loss": 0.1635, "step": 224930 }, { "epoch": 9.32, "grad_norm": 0.734375, "learning_rate": 0.0003902147291528244, "loss": 0.1488, "step": 224940 }, { "epoch": 9.32, "grad_norm": 1.1015625, "learning_rate": 0.0003902057501605386, "loss": 0.1565, "step": 224950 }, { "epoch": 9.32, "grad_norm": 0.455078125, "learning_rate": 0.00039019677090440063, "loss": 0.2006, "step": 224960 }, { "epoch": 9.32, "grad_norm": 1.3828125, "learning_rate": 0.00039018779138442746, "loss": 0.1804, "step": 224970 }, { "epoch": 9.32, "grad_norm": 1.1015625, "learning_rate": 0.00039017881160063607, "loss": 0.1855, "step": 224980 }, { "epoch": 9.32, "grad_norm": 0.7421875, "learning_rate": 0.0003901698315530432, "loss": 0.2293, "step": 224990 }, { "epoch": 9.32, "grad_norm": 0.8515625, "learning_rate": 0.0003901608512416659, "loss": 0.2103, "step": 225000 }, { "epoch": 9.32, "grad_norm": 2.78125, "learning_rate": 0.000390151870666521, "loss": 0.1997, "step": 225010 }, { "epoch": 9.32, "grad_norm": 0.7109375, "learning_rate": 0.0003901428898276255, "loss": 0.2208, "step": 225020 }, { "epoch": 9.32, "grad_norm": 0.5703125, "learning_rate": 0.0003901339087249961, "loss": 0.2679, "step": 225030 }, { "epoch": 9.32, "grad_norm": 0.439453125, "learning_rate": 0.0003901249273586498, "loss": 0.2072, "step": 225040 }, { "epoch": 9.32, "grad_norm": 0.62109375, "learning_rate": 0.0003901159457286036, "loss": 0.198, "step": 225050 }, { "epoch": 9.32, "grad_norm": 0.8671875, "learning_rate": 0.0003901069638348743, "loss": 0.2006, "step": 225060 }, { "epoch": 9.32, "grad_norm": 1.484375, "learning_rate": 0.0003900979816774788, "loss": 0.2369, "step": 225070 }, { "epoch": 9.32, "grad_norm": 0.890625, "learning_rate": 0.000390088999256434, "loss": 0.1732, "step": 225080 }, { "epoch": 9.32, "grad_norm": 0.6015625, "learning_rate": 0.00039008001657175694, "loss": 0.1974, "step": 225090 }, { "epoch": 9.32, "grad_norm": 1.0390625, "learning_rate": 0.0003900710336234644, "loss": 0.2892, "step": 225100 }, { "epoch": 9.32, "grad_norm": 0.494140625, "learning_rate": 0.0003900620504115733, "loss": 0.2081, "step": 225110 }, { "epoch": 9.32, "grad_norm": 0.609375, "learning_rate": 0.00039005306693610045, "loss": 0.171, "step": 225120 }, { "epoch": 9.32, "grad_norm": 0.55859375, "learning_rate": 0.00039004408319706294, "loss": 0.1565, "step": 225130 }, { "epoch": 9.33, "grad_norm": 1.8984375, "learning_rate": 0.00039003509919447756, "loss": 0.195, "step": 225140 }, { "epoch": 9.33, "grad_norm": 0.99609375, "learning_rate": 0.0003900261149283612, "loss": 0.2109, "step": 225150 }, { "epoch": 9.33, "grad_norm": 0.99609375, "learning_rate": 0.00039001713039873087, "loss": 0.1889, "step": 225160 }, { "epoch": 9.33, "grad_norm": 3.140625, "learning_rate": 0.0003900081456056034, "loss": 0.1951, "step": 225170 }, { "epoch": 9.33, "grad_norm": 0.4453125, "learning_rate": 0.00038999916054899573, "loss": 0.1687, "step": 225180 }, { "epoch": 9.33, "grad_norm": 1.015625, "learning_rate": 0.0003899901752289248, "loss": 0.2558, "step": 225190 }, { "epoch": 9.33, "grad_norm": 0.62890625, "learning_rate": 0.0003899811896454074, "loss": 0.1866, "step": 225200 }, { "epoch": 9.33, "grad_norm": 1.03125, "learning_rate": 0.00038997220379846057, "loss": 0.1953, "step": 225210 }, { "epoch": 9.33, "grad_norm": 0.94140625, "learning_rate": 0.00038996321768810117, "loss": 0.1963, "step": 225220 }, { "epoch": 9.33, "grad_norm": 0.66796875, "learning_rate": 0.00038995423131434607, "loss": 0.2623, "step": 225230 }, { "epoch": 9.33, "grad_norm": 0.81640625, "learning_rate": 0.0003899452446772122, "loss": 0.2181, "step": 225240 }, { "epoch": 9.33, "grad_norm": 0.5546875, "learning_rate": 0.00038993625777671655, "loss": 0.2039, "step": 225250 }, { "epoch": 9.33, "grad_norm": 0.91796875, "learning_rate": 0.0003899272706128759, "loss": 0.2231, "step": 225260 }, { "epoch": 9.33, "grad_norm": 0.80859375, "learning_rate": 0.0003899182831857073, "loss": 0.1782, "step": 225270 }, { "epoch": 9.33, "grad_norm": 0.65234375, "learning_rate": 0.0003899092954952276, "loss": 0.2104, "step": 225280 }, { "epoch": 9.33, "grad_norm": 0.6640625, "learning_rate": 0.0003899003075414537, "loss": 0.2347, "step": 225290 }, { "epoch": 9.33, "grad_norm": 0.4140625, "learning_rate": 0.0003898913193244026, "loss": 0.1967, "step": 225300 }, { "epoch": 9.33, "grad_norm": 1.6328125, "learning_rate": 0.00038988233084409103, "loss": 0.1807, "step": 225310 }, { "epoch": 9.33, "grad_norm": 0.9140625, "learning_rate": 0.0003898733421005361, "loss": 0.1956, "step": 225320 }, { "epoch": 9.33, "grad_norm": 0.515625, "learning_rate": 0.0003898643530937546, "loss": 0.1721, "step": 225330 }, { "epoch": 9.33, "grad_norm": 0.59375, "learning_rate": 0.00038985536382376353, "loss": 0.1818, "step": 225340 }, { "epoch": 9.33, "grad_norm": 0.466796875, "learning_rate": 0.0003898463742905797, "loss": 0.1854, "step": 225350 }, { "epoch": 9.33, "grad_norm": 0.51171875, "learning_rate": 0.0003898373844942202, "loss": 0.1701, "step": 225360 }, { "epoch": 9.33, "grad_norm": 0.77734375, "learning_rate": 0.00038982839443470175, "loss": 0.2601, "step": 225370 }, { "epoch": 9.34, "grad_norm": 0.224609375, "learning_rate": 0.0003898194041120414, "loss": 0.193, "step": 225380 }, { "epoch": 9.34, "grad_norm": 1.2578125, "learning_rate": 0.000389810413526256, "loss": 0.1623, "step": 225390 }, { "epoch": 9.34, "grad_norm": 0.7265625, "learning_rate": 0.0003898014226773625, "loss": 0.2151, "step": 225400 }, { "epoch": 9.34, "grad_norm": 0.0003509521484375, "learning_rate": 0.0003897924315653779, "loss": 0.1575, "step": 225410 }, { "epoch": 9.34, "grad_norm": 0.76171875, "learning_rate": 0.000389783440190319, "loss": 0.2145, "step": 225420 }, { "epoch": 9.34, "grad_norm": 2.328125, "learning_rate": 0.00038977444855220277, "loss": 0.2661, "step": 225430 }, { "epoch": 9.34, "grad_norm": 0.84375, "learning_rate": 0.00038976545665104606, "loss": 0.1949, "step": 225440 }, { "epoch": 9.34, "grad_norm": 0.96484375, "learning_rate": 0.0003897564644868659, "loss": 0.231, "step": 225450 }, { "epoch": 9.34, "grad_norm": 0.7109375, "learning_rate": 0.0003897474720596792, "loss": 0.1136, "step": 225460 }, { "epoch": 9.34, "grad_norm": 0.8125, "learning_rate": 0.0003897384793695028, "loss": 0.1798, "step": 225470 }, { "epoch": 9.34, "grad_norm": 1.8046875, "learning_rate": 0.00038972948641635366, "loss": 0.1735, "step": 225480 }, { "epoch": 9.34, "grad_norm": 0.65625, "learning_rate": 0.00038972049320024874, "loss": 0.1646, "step": 225490 }, { "epoch": 9.34, "grad_norm": 1.171875, "learning_rate": 0.0003897114997212049, "loss": 0.1678, "step": 225500 }, { "epoch": 9.34, "grad_norm": 2.84375, "learning_rate": 0.00038970250597923916, "loss": 0.2144, "step": 225510 }, { "epoch": 9.34, "grad_norm": 0.95703125, "learning_rate": 0.00038969351197436843, "loss": 0.2365, "step": 225520 }, { "epoch": 9.34, "grad_norm": 0.59765625, "learning_rate": 0.0003896845177066095, "loss": 0.2268, "step": 225530 }, { "epoch": 9.34, "grad_norm": 0.447265625, "learning_rate": 0.00038967552317597945, "loss": 0.1809, "step": 225540 }, { "epoch": 9.34, "grad_norm": 0.78515625, "learning_rate": 0.0003896665283824951, "loss": 0.1956, "step": 225550 }, { "epoch": 9.34, "grad_norm": 0.431640625, "learning_rate": 0.0003896575333261735, "loss": 0.2119, "step": 225560 }, { "epoch": 9.34, "grad_norm": 1.5234375, "learning_rate": 0.00038964853800703143, "loss": 0.2092, "step": 225570 }, { "epoch": 9.34, "grad_norm": 0.384765625, "learning_rate": 0.0003896395424250859, "loss": 0.2087, "step": 225580 }, { "epoch": 9.34, "grad_norm": 0.546875, "learning_rate": 0.0003896305465803539, "loss": 0.217, "step": 225590 }, { "epoch": 9.34, "grad_norm": 1.0234375, "learning_rate": 0.0003896215504728522, "loss": 0.2158, "step": 225600 }, { "epoch": 9.34, "grad_norm": 1.0078125, "learning_rate": 0.0003896125541025979, "loss": 0.2008, "step": 225610 }, { "epoch": 9.35, "grad_norm": 1.0078125, "learning_rate": 0.0003896035574696078, "loss": 0.2243, "step": 225620 }, { "epoch": 9.35, "grad_norm": 0.671875, "learning_rate": 0.0003895945605738989, "loss": 0.1865, "step": 225630 }, { "epoch": 9.35, "grad_norm": 0.9453125, "learning_rate": 0.0003895855634154881, "loss": 0.236, "step": 225640 }, { "epoch": 9.35, "grad_norm": 0.2041015625, "learning_rate": 0.00038957656599439234, "loss": 0.2186, "step": 225650 }, { "epoch": 9.35, "grad_norm": 0.6171875, "learning_rate": 0.00038956756831062854, "loss": 0.1987, "step": 225660 }, { "epoch": 9.35, "grad_norm": 0.77734375, "learning_rate": 0.00038955857036421373, "loss": 0.1818, "step": 225670 }, { "epoch": 9.35, "grad_norm": 0.72265625, "learning_rate": 0.0003895495721551647, "loss": 0.2086, "step": 225680 }, { "epoch": 9.35, "grad_norm": 3.140625, "learning_rate": 0.0003895405736834985, "loss": 0.1772, "step": 225690 }, { "epoch": 9.35, "grad_norm": 0.5625, "learning_rate": 0.00038953157494923187, "loss": 0.1947, "step": 225700 }, { "epoch": 9.35, "grad_norm": 0.51953125, "learning_rate": 0.000389522575952382, "loss": 0.1505, "step": 225710 }, { "epoch": 9.35, "grad_norm": 1.28125, "learning_rate": 0.0003895135766929657, "loss": 0.1641, "step": 225720 }, { "epoch": 9.35, "grad_norm": 0.439453125, "learning_rate": 0.00038950457717099984, "loss": 0.2155, "step": 225730 }, { "epoch": 9.35, "grad_norm": 0.435546875, "learning_rate": 0.0003894955773865015, "loss": 0.1702, "step": 225740 }, { "epoch": 9.35, "grad_norm": 0.8359375, "learning_rate": 0.00038948657733948756, "loss": 0.1713, "step": 225750 }, { "epoch": 9.35, "grad_norm": 0.65625, "learning_rate": 0.0003894775770299749, "loss": 0.198, "step": 225760 }, { "epoch": 9.35, "grad_norm": 0.345703125, "learning_rate": 0.0003894685764579806, "loss": 0.2046, "step": 225770 }, { "epoch": 9.35, "grad_norm": 0.7734375, "learning_rate": 0.00038945957562352144, "loss": 0.2254, "step": 225780 }, { "epoch": 9.35, "grad_norm": 0.6171875, "learning_rate": 0.0003894505745266144, "loss": 0.1755, "step": 225790 }, { "epoch": 9.35, "grad_norm": 1.3984375, "learning_rate": 0.0003894415731672764, "loss": 0.2043, "step": 225800 }, { "epoch": 9.35, "grad_norm": 0.859375, "learning_rate": 0.0003894325715455245, "loss": 0.1531, "step": 225810 }, { "epoch": 9.35, "grad_norm": 0.55078125, "learning_rate": 0.0003894235696613755, "loss": 0.214, "step": 225820 }, { "epoch": 9.35, "grad_norm": 0.7109375, "learning_rate": 0.0003894145675148464, "loss": 0.2111, "step": 225830 }, { "epoch": 9.35, "grad_norm": 0.75, "learning_rate": 0.00038940556510595417, "loss": 0.223, "step": 225840 }, { "epoch": 9.35, "grad_norm": 0.002685546875, "learning_rate": 0.0003893965624347157, "loss": 0.2108, "step": 225850 }, { "epoch": 9.36, "grad_norm": 0.75, "learning_rate": 0.0003893875595011479, "loss": 0.2086, "step": 225860 }, { "epoch": 9.36, "grad_norm": 1.5703125, "learning_rate": 0.00038937855630526787, "loss": 0.1828, "step": 225870 }, { "epoch": 9.36, "grad_norm": 0.60546875, "learning_rate": 0.0003893695528470924, "loss": 0.2206, "step": 225880 }, { "epoch": 9.36, "grad_norm": 0.53515625, "learning_rate": 0.0003893605491266385, "loss": 0.1744, "step": 225890 }, { "epoch": 9.36, "grad_norm": 0.8125, "learning_rate": 0.0003893515451439231, "loss": 0.1391, "step": 225900 }, { "epoch": 9.36, "grad_norm": 1.1640625, "learning_rate": 0.00038934254089896303, "loss": 0.2388, "step": 225910 }, { "epoch": 9.36, "grad_norm": 1.140625, "learning_rate": 0.0003893335363917755, "loss": 0.1542, "step": 225920 }, { "epoch": 9.36, "grad_norm": 0.97265625, "learning_rate": 0.0003893245316223772, "loss": 0.2219, "step": 225930 }, { "epoch": 9.36, "grad_norm": 0.7265625, "learning_rate": 0.00038931552659078524, "loss": 0.1379, "step": 225940 }, { "epoch": 9.36, "grad_norm": 1.0546875, "learning_rate": 0.0003893065212970165, "loss": 0.2353, "step": 225950 }, { "epoch": 9.36, "grad_norm": 1.015625, "learning_rate": 0.0003892975157410879, "loss": 0.2064, "step": 225960 }, { "epoch": 9.36, "grad_norm": 1.0625, "learning_rate": 0.00038928850992301647, "loss": 0.2116, "step": 225970 }, { "epoch": 9.36, "grad_norm": 0.51953125, "learning_rate": 0.00038927950384281905, "loss": 0.1774, "step": 225980 }, { "epoch": 9.36, "grad_norm": 1.03125, "learning_rate": 0.00038927049750051257, "loss": 0.2696, "step": 225990 }, { "epoch": 9.36, "grad_norm": 0.6484375, "learning_rate": 0.0003892614908961142, "loss": 0.1936, "step": 226000 }, { "epoch": 9.36, "grad_norm": 0.609375, "learning_rate": 0.00038925248402964073, "loss": 0.16, "step": 226010 }, { "epoch": 9.36, "grad_norm": 3.125, "learning_rate": 0.00038924347690110904, "loss": 0.2275, "step": 226020 }, { "epoch": 9.36, "grad_norm": 1.015625, "learning_rate": 0.0003892344695105362, "loss": 0.2518, "step": 226030 }, { "epoch": 9.36, "grad_norm": 0.83984375, "learning_rate": 0.00038922546185793914, "loss": 0.1897, "step": 226040 }, { "epoch": 9.36, "grad_norm": 1.0234375, "learning_rate": 0.0003892164539433348, "loss": 0.1706, "step": 226050 }, { "epoch": 9.36, "grad_norm": 0.625, "learning_rate": 0.0003892074457667401, "loss": 0.1965, "step": 226060 }, { "epoch": 9.36, "grad_norm": 0.68359375, "learning_rate": 0.000389198437328172, "loss": 0.2044, "step": 226070 }, { "epoch": 9.36, "grad_norm": 0.0, "learning_rate": 0.0003891894286276475, "loss": 0.2179, "step": 226080 }, { "epoch": 9.36, "grad_norm": 0.89453125, "learning_rate": 0.00038918041966518356, "loss": 0.2113, "step": 226090 }, { "epoch": 9.37, "grad_norm": 2.109375, "learning_rate": 0.0003891714104407971, "loss": 0.2334, "step": 226100 }, { "epoch": 9.37, "grad_norm": 0.59375, "learning_rate": 0.0003891624009545051, "loss": 0.1505, "step": 226110 }, { "epoch": 9.37, "grad_norm": 1.453125, "learning_rate": 0.0003891533912063243, "loss": 0.2078, "step": 226120 }, { "epoch": 9.37, "grad_norm": 0.76953125, "learning_rate": 0.00038914438119627204, "loss": 0.1985, "step": 226130 }, { "epoch": 9.37, "grad_norm": 1.046875, "learning_rate": 0.00038913537092436504, "loss": 0.226, "step": 226140 }, { "epoch": 9.37, "grad_norm": 0.5859375, "learning_rate": 0.00038912636039062023, "loss": 0.2344, "step": 226150 }, { "epoch": 9.37, "grad_norm": 0.19140625, "learning_rate": 0.00038911734959505475, "loss": 0.1792, "step": 226160 }, { "epoch": 9.37, "grad_norm": 0.498046875, "learning_rate": 0.00038910833853768534, "loss": 0.2183, "step": 226170 }, { "epoch": 9.37, "grad_norm": 0.59375, "learning_rate": 0.0003890993272185291, "loss": 0.2111, "step": 226180 }, { "epoch": 9.37, "grad_norm": 0.859375, "learning_rate": 0.00038909031563760294, "loss": 0.1864, "step": 226190 }, { "epoch": 9.37, "grad_norm": 0.17578125, "learning_rate": 0.0003890813037949238, "loss": 0.2025, "step": 226200 }, { "epoch": 9.37, "grad_norm": 0.61328125, "learning_rate": 0.00038907229169050873, "loss": 0.1673, "step": 226210 }, { "epoch": 9.37, "grad_norm": 1.1953125, "learning_rate": 0.0003890632793243746, "loss": 0.1817, "step": 226220 }, { "epoch": 9.37, "grad_norm": 0.78125, "learning_rate": 0.00038905426669653834, "loss": 0.1984, "step": 226230 }, { "epoch": 9.37, "grad_norm": 0.90234375, "learning_rate": 0.00038904525380701704, "loss": 0.1767, "step": 226240 }, { "epoch": 9.37, "grad_norm": 0.462890625, "learning_rate": 0.00038903624065582756, "loss": 0.1572, "step": 226250 }, { "epoch": 9.37, "grad_norm": 2.34375, "learning_rate": 0.00038902722724298687, "loss": 0.183, "step": 226260 }, { "epoch": 9.37, "grad_norm": 0.57421875, "learning_rate": 0.000389018213568512, "loss": 0.1599, "step": 226270 }, { "epoch": 9.37, "grad_norm": 0.73046875, "learning_rate": 0.0003890091996324198, "loss": 0.248, "step": 226280 }, { "epoch": 9.37, "grad_norm": 0.8515625, "learning_rate": 0.0003890001854347274, "loss": 0.1971, "step": 226290 }, { "epoch": 9.37, "grad_norm": 0.87109375, "learning_rate": 0.00038899117097545156, "loss": 0.2016, "step": 226300 }, { "epoch": 9.37, "grad_norm": 0.490234375, "learning_rate": 0.0003889821562546093, "loss": 0.2039, "step": 226310 }, { "epoch": 9.37, "grad_norm": 0.451171875, "learning_rate": 0.0003889731412722178, "loss": 0.1701, "step": 226320 }, { "epoch": 9.37, "grad_norm": 0.48828125, "learning_rate": 0.00038896412602829376, "loss": 0.2338, "step": 226330 }, { "epoch": 9.37, "grad_norm": 0.28515625, "learning_rate": 0.00038895511052285423, "loss": 0.1798, "step": 226340 }, { "epoch": 9.38, "grad_norm": 0.8125, "learning_rate": 0.00038894609475591626, "loss": 0.1868, "step": 226350 }, { "epoch": 9.38, "grad_norm": 0.283203125, "learning_rate": 0.00038893707872749664, "loss": 0.1486, "step": 226360 }, { "epoch": 9.38, "grad_norm": 0.734375, "learning_rate": 0.00038892806243761245, "loss": 0.2052, "step": 226370 }, { "epoch": 9.38, "grad_norm": 0.2431640625, "learning_rate": 0.00038891904588628066, "loss": 0.2065, "step": 226380 }, { "epoch": 9.38, "grad_norm": 0.9375, "learning_rate": 0.00038891002907351835, "loss": 0.2102, "step": 226390 }, { "epoch": 9.38, "grad_norm": 0.8203125, "learning_rate": 0.00038890101199934226, "loss": 0.1964, "step": 226400 }, { "epoch": 9.38, "grad_norm": 0.7421875, "learning_rate": 0.00038889199466376947, "loss": 0.1919, "step": 226410 }, { "epoch": 9.38, "grad_norm": 0.89453125, "learning_rate": 0.00038888297706681695, "loss": 0.2425, "step": 226420 }, { "epoch": 9.38, "grad_norm": 0.443359375, "learning_rate": 0.0003888739592085017, "loss": 0.1556, "step": 226430 }, { "epoch": 9.38, "grad_norm": 0.625, "learning_rate": 0.00038886494108884064, "loss": 0.1656, "step": 226440 }, { "epoch": 9.38, "grad_norm": 0.74609375, "learning_rate": 0.00038885592270785073, "loss": 0.191, "step": 226450 }, { "epoch": 9.38, "grad_norm": 0.87109375, "learning_rate": 0.00038884690406554897, "loss": 0.2137, "step": 226460 }, { "epoch": 9.38, "grad_norm": 0.703125, "learning_rate": 0.00038883788516195237, "loss": 0.2221, "step": 226470 }, { "epoch": 9.38, "grad_norm": 0.734375, "learning_rate": 0.00038882886599707784, "loss": 0.21, "step": 226480 }, { "epoch": 9.38, "grad_norm": 0.58984375, "learning_rate": 0.0003888198465709424, "loss": 0.1629, "step": 226490 }, { "epoch": 9.38, "grad_norm": 0.53515625, "learning_rate": 0.00038881082688356294, "loss": 0.1927, "step": 226500 }, { "epoch": 9.38, "grad_norm": 0.36328125, "learning_rate": 0.00038880180693495655, "loss": 0.1839, "step": 226510 }, { "epoch": 9.38, "grad_norm": 0.69921875, "learning_rate": 0.0003887927867251401, "loss": 0.188, "step": 226520 }, { "epoch": 9.38, "grad_norm": 0.34375, "learning_rate": 0.0003887837662541307, "loss": 0.1949, "step": 226530 }, { "epoch": 9.38, "grad_norm": 1.4765625, "learning_rate": 0.00038877474552194516, "loss": 0.1962, "step": 226540 }, { "epoch": 9.38, "grad_norm": 0.796875, "learning_rate": 0.0003887657245286005, "loss": 0.2363, "step": 226550 }, { "epoch": 9.38, "grad_norm": 0.8359375, "learning_rate": 0.00038875670327411383, "loss": 0.2068, "step": 226560 }, { "epoch": 9.38, "grad_norm": 0.296875, "learning_rate": 0.00038874768175850197, "loss": 0.1936, "step": 226570 }, { "epoch": 9.38, "grad_norm": 0.984375, "learning_rate": 0.00038873865998178197, "loss": 0.2322, "step": 226580 }, { "epoch": 9.39, "grad_norm": 0.5625, "learning_rate": 0.00038872963794397075, "loss": 0.2211, "step": 226590 }, { "epoch": 9.39, "grad_norm": 0.51953125, "learning_rate": 0.0003887206156450854, "loss": 0.1943, "step": 226600 }, { "epoch": 9.39, "grad_norm": 0.625, "learning_rate": 0.00038871159308514287, "loss": 0.2054, "step": 226610 }, { "epoch": 9.39, "grad_norm": 0.5, "learning_rate": 0.00038870257026416, "loss": 0.1885, "step": 226620 }, { "epoch": 9.39, "grad_norm": 0.47265625, "learning_rate": 0.00038869354718215393, "loss": 0.1612, "step": 226630 }, { "epoch": 9.39, "grad_norm": 0.275390625, "learning_rate": 0.00038868452383914153, "loss": 0.1545, "step": 226640 }, { "epoch": 9.39, "grad_norm": 1.2421875, "learning_rate": 0.0003886755002351399, "loss": 0.1877, "step": 226650 }, { "epoch": 9.39, "grad_norm": 0.73046875, "learning_rate": 0.00038866647637016594, "loss": 0.1986, "step": 226660 }, { "epoch": 9.39, "grad_norm": 0.83984375, "learning_rate": 0.0003886574522442366, "loss": 0.1684, "step": 226670 }, { "epoch": 9.39, "grad_norm": 0.65625, "learning_rate": 0.00038864842785736896, "loss": 0.2069, "step": 226680 }, { "epoch": 9.39, "grad_norm": 0.51953125, "learning_rate": 0.0003886394032095799, "loss": 0.194, "step": 226690 }, { "epoch": 9.39, "grad_norm": 1.484375, "learning_rate": 0.0003886303783008865, "loss": 0.2446, "step": 226700 }, { "epoch": 9.39, "grad_norm": 1.0859375, "learning_rate": 0.0003886213531313057, "loss": 0.2127, "step": 226710 }, { "epoch": 9.39, "grad_norm": 0.69921875, "learning_rate": 0.00038861232770085445, "loss": 0.1511, "step": 226720 }, { "epoch": 9.39, "grad_norm": 1.3828125, "learning_rate": 0.0003886033020095498, "loss": 0.2227, "step": 226730 }, { "epoch": 9.39, "grad_norm": 0.55078125, "learning_rate": 0.00038859427605740873, "loss": 0.1896, "step": 226740 }, { "epoch": 9.39, "grad_norm": 1.953125, "learning_rate": 0.0003885852498444481, "loss": 0.1769, "step": 226750 }, { "epoch": 9.39, "grad_norm": 1.046875, "learning_rate": 0.00038857622337068503, "loss": 0.2306, "step": 226760 }, { "epoch": 9.39, "grad_norm": 0.703125, "learning_rate": 0.0003885671966361366, "loss": 0.2103, "step": 226770 }, { "epoch": 9.39, "grad_norm": 0.69921875, "learning_rate": 0.00038855816964081946, "loss": 0.1946, "step": 226780 }, { "epoch": 9.39, "grad_norm": 0.365234375, "learning_rate": 0.00038854914238475093, "loss": 0.1787, "step": 226790 }, { "epoch": 9.39, "grad_norm": 1.3828125, "learning_rate": 0.0003885401148679479, "loss": 0.1967, "step": 226800 }, { "epoch": 9.39, "grad_norm": 0.3984375, "learning_rate": 0.0003885310870904273, "loss": 0.1778, "step": 226810 }, { "epoch": 9.39, "grad_norm": 0.2421875, "learning_rate": 0.0003885220590522062, "loss": 0.1857, "step": 226820 }, { "epoch": 9.4, "grad_norm": 1.7734375, "learning_rate": 0.00038851303075330143, "loss": 0.1656, "step": 226830 }, { "epoch": 9.4, "grad_norm": 0.73046875, "learning_rate": 0.0003885040021937302, "loss": 0.2039, "step": 226840 }, { "epoch": 9.4, "grad_norm": 0.703125, "learning_rate": 0.00038849497337350937, "loss": 0.1933, "step": 226850 }, { "epoch": 9.4, "grad_norm": 0.98828125, "learning_rate": 0.00038848594429265583, "loss": 0.2153, "step": 226860 }, { "epoch": 9.4, "grad_norm": 0.875, "learning_rate": 0.0003884769149511869, "loss": 0.1788, "step": 226870 }, { "epoch": 9.4, "grad_norm": 1.234375, "learning_rate": 0.00038846788534911925, "loss": 0.2031, "step": 226880 }, { "epoch": 9.4, "grad_norm": 2.078125, "learning_rate": 0.00038845885548646997, "loss": 0.2462, "step": 226890 }, { "epoch": 9.4, "grad_norm": 0.51171875, "learning_rate": 0.0003884498253632561, "loss": 0.2016, "step": 226900 }, { "epoch": 9.4, "grad_norm": 1.0, "learning_rate": 0.0003884407949794947, "loss": 0.2192, "step": 226910 }, { "epoch": 9.4, "grad_norm": 0.4609375, "learning_rate": 0.00038843176433520256, "loss": 0.1832, "step": 226920 }, { "epoch": 9.4, "grad_norm": 0.96484375, "learning_rate": 0.0003884227334303969, "loss": 0.1812, "step": 226930 }, { "epoch": 9.4, "grad_norm": 0.90625, "learning_rate": 0.00038841370226509454, "loss": 0.2474, "step": 226940 }, { "epoch": 9.4, "grad_norm": 1.1015625, "learning_rate": 0.0003884046708393125, "loss": 0.2508, "step": 226950 }, { "epoch": 9.4, "grad_norm": 0.5859375, "learning_rate": 0.00038839563915306786, "loss": 0.2159, "step": 226960 }, { "epoch": 9.4, "grad_norm": 1.3125, "learning_rate": 0.0003883866072063776, "loss": 0.2183, "step": 226970 }, { "epoch": 9.4, "grad_norm": 0.458984375, "learning_rate": 0.00038837757499925863, "loss": 0.2301, "step": 226980 }, { "epoch": 9.4, "grad_norm": 1.4296875, "learning_rate": 0.00038836854253172803, "loss": 0.168, "step": 226990 }, { "epoch": 9.4, "grad_norm": 1.0234375, "learning_rate": 0.0003883595098038028, "loss": 0.1978, "step": 227000 }, { "epoch": 9.4, "grad_norm": 0.83984375, "learning_rate": 0.0003883504768154999, "loss": 0.1423, "step": 227010 }, { "epoch": 9.4, "grad_norm": 1.4609375, "learning_rate": 0.0003883414435668363, "loss": 0.2507, "step": 227020 }, { "epoch": 9.4, "grad_norm": 0.7109375, "learning_rate": 0.00038833241005782913, "loss": 0.1554, "step": 227030 }, { "epoch": 9.4, "grad_norm": 0.53515625, "learning_rate": 0.00038832337628849525, "loss": 0.2473, "step": 227040 }, { "epoch": 9.4, "grad_norm": 1.1328125, "learning_rate": 0.00038831434225885175, "loss": 0.2, "step": 227050 }, { "epoch": 9.4, "grad_norm": 0.34765625, "learning_rate": 0.00038830530796891554, "loss": 0.1924, "step": 227060 }, { "epoch": 9.41, "grad_norm": 1.09375, "learning_rate": 0.00038829627341870364, "loss": 0.17, "step": 227070 }, { "epoch": 9.41, "grad_norm": 1.484375, "learning_rate": 0.00038828723860823323, "loss": 0.1792, "step": 227080 }, { "epoch": 9.41, "grad_norm": 1.765625, "learning_rate": 0.0003882782035375211, "loss": 0.1917, "step": 227090 }, { "epoch": 9.41, "grad_norm": 0.56640625, "learning_rate": 0.0003882691682065843, "loss": 0.2051, "step": 227100 }, { "epoch": 9.41, "grad_norm": 1.109375, "learning_rate": 0.00038826013261543987, "loss": 0.1741, "step": 227110 }, { "epoch": 9.41, "grad_norm": 0.45703125, "learning_rate": 0.00038825109676410475, "loss": 0.1457, "step": 227120 }, { "epoch": 9.41, "grad_norm": 0.6875, "learning_rate": 0.0003882420606525961, "loss": 0.1971, "step": 227130 }, { "epoch": 9.41, "grad_norm": 1.3515625, "learning_rate": 0.0003882330242809307, "loss": 0.1673, "step": 227140 }, { "epoch": 9.41, "grad_norm": 2.46875, "learning_rate": 0.0003882239876491258, "loss": 0.1951, "step": 227150 }, { "epoch": 9.41, "grad_norm": 0.7109375, "learning_rate": 0.0003882149507571982, "loss": 0.1934, "step": 227160 }, { "epoch": 9.41, "grad_norm": 0.62109375, "learning_rate": 0.000388205913605165, "loss": 0.1427, "step": 227170 }, { "epoch": 9.41, "grad_norm": 1.109375, "learning_rate": 0.00038819687619304324, "loss": 0.2112, "step": 227180 }, { "epoch": 9.41, "grad_norm": 0.87109375, "learning_rate": 0.0003881878385208499, "loss": 0.141, "step": 227190 }, { "epoch": 9.41, "grad_norm": 1.5, "learning_rate": 0.0003881788005886019, "loss": 0.1606, "step": 227200 }, { "epoch": 9.41, "grad_norm": 0.1142578125, "learning_rate": 0.00038816976239631633, "loss": 0.1687, "step": 227210 }, { "epoch": 9.41, "grad_norm": 1.5703125, "learning_rate": 0.0003881607239440101, "loss": 0.2085, "step": 227220 }, { "epoch": 9.41, "grad_norm": 1.5625, "learning_rate": 0.00038815168523170053, "loss": 0.2029, "step": 227230 }, { "epoch": 9.41, "grad_norm": 2.46875, "learning_rate": 0.00038814264625940424, "loss": 0.1941, "step": 227240 }, { "epoch": 9.41, "grad_norm": 0.66015625, "learning_rate": 0.00038813360702713844, "loss": 0.1687, "step": 227250 }, { "epoch": 9.41, "grad_norm": 0.3125, "learning_rate": 0.0003881245675349202, "loss": 0.1931, "step": 227260 }, { "epoch": 9.41, "grad_norm": 0.58984375, "learning_rate": 0.00038811552778276635, "loss": 0.2105, "step": 227270 }, { "epoch": 9.41, "grad_norm": 0.58203125, "learning_rate": 0.000388106487770694, "loss": 0.1661, "step": 227280 }, { "epoch": 9.41, "grad_norm": 0.91796875, "learning_rate": 0.0003880974474987201, "loss": 0.1996, "step": 227290 }, { "epoch": 9.41, "grad_norm": 1.015625, "learning_rate": 0.00038808840696686177, "loss": 0.2437, "step": 227300 }, { "epoch": 9.42, "grad_norm": 0.9921875, "learning_rate": 0.000388079366175136, "loss": 0.152, "step": 227310 }, { "epoch": 9.42, "grad_norm": 0.76953125, "learning_rate": 0.00038807032512355976, "loss": 0.1419, "step": 227320 }, { "epoch": 9.42, "grad_norm": 0.7734375, "learning_rate": 0.0003880612838121501, "loss": 0.1468, "step": 227330 }, { "epoch": 9.42, "grad_norm": 0.478515625, "learning_rate": 0.00038805224224092394, "loss": 0.1735, "step": 227340 }, { "epoch": 9.42, "grad_norm": 1.0, "learning_rate": 0.0003880432004098984, "loss": 0.1632, "step": 227350 }, { "epoch": 9.42, "grad_norm": 0.416015625, "learning_rate": 0.0003880341583190905, "loss": 0.2055, "step": 227360 }, { "epoch": 9.42, "grad_norm": 0.44140625, "learning_rate": 0.0003880251159685172, "loss": 0.1744, "step": 227370 }, { "epoch": 9.42, "grad_norm": 0.73046875, "learning_rate": 0.00038801607335819553, "loss": 0.2041, "step": 227380 }, { "epoch": 9.42, "grad_norm": 0.76953125, "learning_rate": 0.0003880070304881425, "loss": 0.2447, "step": 227390 }, { "epoch": 9.42, "grad_norm": 0.267578125, "learning_rate": 0.00038799798735837524, "loss": 0.1875, "step": 227400 }, { "epoch": 9.42, "grad_norm": 1.328125, "learning_rate": 0.0003879889439689105, "loss": 0.1989, "step": 227410 }, { "epoch": 9.42, "grad_norm": 0.7890625, "learning_rate": 0.0003879799003197656, "loss": 0.1907, "step": 227420 }, { "epoch": 9.42, "grad_norm": 0.470703125, "learning_rate": 0.00038797085641095736, "loss": 0.1927, "step": 227430 }, { "epoch": 9.42, "grad_norm": 0.60546875, "learning_rate": 0.00038796181224250295, "loss": 0.2074, "step": 227440 }, { "epoch": 9.42, "grad_norm": 1.0390625, "learning_rate": 0.00038795276781441925, "loss": 0.1753, "step": 227450 }, { "epoch": 9.42, "grad_norm": 0.2734375, "learning_rate": 0.00038794372312672335, "loss": 0.2136, "step": 227460 }, { "epoch": 9.42, "grad_norm": 0.86328125, "learning_rate": 0.0003879346781794322, "loss": 0.1928, "step": 227470 }, { "epoch": 9.42, "grad_norm": 0.38671875, "learning_rate": 0.00038792563297256294, "loss": 0.2412, "step": 227480 }, { "epoch": 9.42, "grad_norm": 0.298828125, "learning_rate": 0.0003879165875061326, "loss": 0.1819, "step": 227490 }, { "epoch": 9.42, "grad_norm": 0.609375, "learning_rate": 0.00038790754178015795, "loss": 0.2186, "step": 227500 }, { "epoch": 9.42, "grad_norm": 1.015625, "learning_rate": 0.0003878984957946563, "loss": 0.192, "step": 227510 }, { "epoch": 9.42, "grad_norm": 0.9453125, "learning_rate": 0.0003878894495496447, "loss": 0.1668, "step": 227520 }, { "epoch": 9.42, "grad_norm": 0.75, "learning_rate": 0.00038788040304513986, "loss": 0.221, "step": 227530 }, { "epoch": 9.42, "grad_norm": 0.515625, "learning_rate": 0.00038787135628115906, "loss": 0.2187, "step": 227540 }, { "epoch": 9.43, "grad_norm": 0.98828125, "learning_rate": 0.00038786230925771925, "loss": 0.2289, "step": 227550 }, { "epoch": 9.43, "grad_norm": 0.185546875, "learning_rate": 0.0003878532619748375, "loss": 0.1641, "step": 227560 }, { "epoch": 9.43, "grad_norm": 0.6640625, "learning_rate": 0.00038784421443253074, "loss": 0.156, "step": 227570 }, { "epoch": 9.43, "grad_norm": 0.251953125, "learning_rate": 0.0003878351666308161, "loss": 0.1778, "step": 227580 }, { "epoch": 9.43, "grad_norm": 1.1484375, "learning_rate": 0.0003878261185697105, "loss": 0.2694, "step": 227590 }, { "epoch": 9.43, "grad_norm": 0.62890625, "learning_rate": 0.00038781707024923106, "loss": 0.179, "step": 227600 }, { "epoch": 9.43, "grad_norm": 0.447265625, "learning_rate": 0.00038780802166939486, "loss": 0.1882, "step": 227610 }, { "epoch": 9.43, "grad_norm": 1.25, "learning_rate": 0.0003877989728302187, "loss": 0.1349, "step": 227620 }, { "epoch": 9.43, "grad_norm": 1.3671875, "learning_rate": 0.00038778992373171984, "loss": 0.232, "step": 227630 }, { "epoch": 9.43, "grad_norm": 0.72265625, "learning_rate": 0.00038778087437391524, "loss": 0.1799, "step": 227640 }, { "epoch": 9.43, "grad_norm": 1.0859375, "learning_rate": 0.00038777182475682194, "loss": 0.2207, "step": 227650 }, { "epoch": 9.43, "grad_norm": 0.64453125, "learning_rate": 0.0003877627748804569, "loss": 0.1549, "step": 227660 }, { "epoch": 9.43, "grad_norm": 0.859375, "learning_rate": 0.00038775372474483713, "loss": 0.1892, "step": 227670 }, { "epoch": 9.43, "grad_norm": 1.421875, "learning_rate": 0.00038774467434997975, "loss": 0.2164, "step": 227680 }, { "epoch": 9.43, "grad_norm": 0.138671875, "learning_rate": 0.0003877356236959019, "loss": 0.2192, "step": 227690 }, { "epoch": 9.43, "grad_norm": 0.228515625, "learning_rate": 0.0003877265727826203, "loss": 0.1876, "step": 227700 }, { "epoch": 9.43, "grad_norm": 0.361328125, "learning_rate": 0.0003877175216101523, "loss": 0.2066, "step": 227710 }, { "epoch": 9.43, "grad_norm": 0.8671875, "learning_rate": 0.0003877084701785147, "loss": 0.2386, "step": 227720 }, { "epoch": 9.43, "grad_norm": 1.0, "learning_rate": 0.00038769941848772465, "loss": 0.1794, "step": 227730 }, { "epoch": 9.43, "grad_norm": 0.79296875, "learning_rate": 0.0003876903665377992, "loss": 0.2025, "step": 227740 }, { "epoch": 9.43, "grad_norm": 0.921875, "learning_rate": 0.0003876813143287553, "loss": 0.1683, "step": 227750 }, { "epoch": 9.43, "grad_norm": 0.640625, "learning_rate": 0.0003876722618606101, "loss": 0.2347, "step": 227760 }, { "epoch": 9.43, "grad_norm": 0.98828125, "learning_rate": 0.0003876632091333805, "loss": 0.201, "step": 227770 }, { "epoch": 9.43, "grad_norm": 1.5234375, "learning_rate": 0.0003876541561470837, "loss": 0.2103, "step": 227780 }, { "epoch": 9.44, "grad_norm": 0.5625, "learning_rate": 0.00038764510290173655, "loss": 0.206, "step": 227790 }, { "epoch": 9.44, "grad_norm": 0.328125, "learning_rate": 0.0003876360493973562, "loss": 0.1823, "step": 227800 }, { "epoch": 9.44, "grad_norm": 0.88671875, "learning_rate": 0.0003876269956339597, "loss": 0.2011, "step": 227810 }, { "epoch": 9.44, "grad_norm": 0.55859375, "learning_rate": 0.00038761794161156404, "loss": 0.1789, "step": 227820 }, { "epoch": 9.44, "grad_norm": 1.734375, "learning_rate": 0.0003876088873301863, "loss": 0.1782, "step": 227830 }, { "epoch": 9.44, "grad_norm": 0.87109375, "learning_rate": 0.00038759983278984346, "loss": 0.1644, "step": 227840 }, { "epoch": 9.44, "grad_norm": 0.82421875, "learning_rate": 0.0003875907779905526, "loss": 0.1963, "step": 227850 }, { "epoch": 9.44, "grad_norm": 1.21875, "learning_rate": 0.0003875817229323307, "loss": 0.2065, "step": 227860 }, { "epoch": 9.44, "grad_norm": 0.75390625, "learning_rate": 0.000387572667615195, "loss": 0.2441, "step": 227870 }, { "epoch": 9.44, "grad_norm": 1.3359375, "learning_rate": 0.0003875636120391623, "loss": 0.2195, "step": 227880 }, { "epoch": 9.44, "grad_norm": 0.97265625, "learning_rate": 0.00038755455620424973, "loss": 0.2273, "step": 227890 }, { "epoch": 9.44, "grad_norm": 0.890625, "learning_rate": 0.00038754550011047434, "loss": 0.2074, "step": 227900 }, { "epoch": 9.44, "grad_norm": 0.5390625, "learning_rate": 0.00038753644375785323, "loss": 0.2161, "step": 227910 }, { "epoch": 9.44, "grad_norm": 0.73828125, "learning_rate": 0.00038752738714640335, "loss": 0.1888, "step": 227920 }, { "epoch": 9.44, "grad_norm": 0.60546875, "learning_rate": 0.00038751833027614183, "loss": 0.1554, "step": 227930 }, { "epoch": 9.44, "grad_norm": 0.5859375, "learning_rate": 0.00038750927314708554, "loss": 0.2593, "step": 227940 }, { "epoch": 9.44, "grad_norm": 0.5703125, "learning_rate": 0.00038750021575925176, "loss": 0.1856, "step": 227950 }, { "epoch": 9.44, "grad_norm": 0.94921875, "learning_rate": 0.00038749115811265736, "loss": 0.1919, "step": 227960 }, { "epoch": 9.44, "grad_norm": 1.2421875, "learning_rate": 0.00038748210020731947, "loss": 0.2282, "step": 227970 }, { "epoch": 9.44, "grad_norm": 0.6015625, "learning_rate": 0.00038747304204325505, "loss": 0.1802, "step": 227980 }, { "epoch": 9.44, "grad_norm": 0.67578125, "learning_rate": 0.0003874639836204813, "loss": 0.1856, "step": 227990 }, { "epoch": 9.44, "grad_norm": 0.5390625, "learning_rate": 0.0003874549249390152, "loss": 0.2033, "step": 228000 }, { "epoch": 9.44, "grad_norm": 0.4296875, "learning_rate": 0.00038744586599887373, "loss": 0.1806, "step": 228010 }, { "epoch": 9.44, "grad_norm": 0.150390625, "learning_rate": 0.00038743680680007397, "loss": 0.2589, "step": 228020 }, { "epoch": 9.44, "grad_norm": 0.95703125, "learning_rate": 0.00038742774734263296, "loss": 0.2048, "step": 228030 }, { "epoch": 9.45, "grad_norm": 0.515625, "learning_rate": 0.0003874186876265678, "loss": 0.17, "step": 228040 }, { "epoch": 9.45, "grad_norm": 2.390625, "learning_rate": 0.0003874096276518956, "loss": 0.2099, "step": 228050 }, { "epoch": 9.45, "grad_norm": 0.498046875, "learning_rate": 0.00038740056741863317, "loss": 0.2069, "step": 228060 }, { "epoch": 9.45, "grad_norm": 0.828125, "learning_rate": 0.00038739150692679774, "loss": 0.189, "step": 228070 }, { "epoch": 9.45, "grad_norm": 0.984375, "learning_rate": 0.0003873824461764064, "loss": 0.2274, "step": 228080 }, { "epoch": 9.45, "grad_norm": 1.0234375, "learning_rate": 0.0003873733851674761, "loss": 0.2727, "step": 228090 }, { "epoch": 9.45, "grad_norm": 0.34765625, "learning_rate": 0.0003873643239000239, "loss": 0.2132, "step": 228100 }, { "epoch": 9.45, "grad_norm": 0.58203125, "learning_rate": 0.0003873552623740669, "loss": 0.195, "step": 228110 }, { "epoch": 9.45, "grad_norm": 0.73828125, "learning_rate": 0.0003873462005896221, "loss": 0.226, "step": 228120 }, { "epoch": 9.45, "grad_norm": 1.140625, "learning_rate": 0.0003873371385467067, "loss": 0.2214, "step": 228130 }, { "epoch": 9.45, "grad_norm": 1.2109375, "learning_rate": 0.00038732807624533745, "loss": 0.2657, "step": 228140 }, { "epoch": 9.45, "grad_norm": 0.3359375, "learning_rate": 0.00038731901368553167, "loss": 0.1868, "step": 228150 }, { "epoch": 9.45, "grad_norm": 1.5859375, "learning_rate": 0.00038730995086730635, "loss": 0.2112, "step": 228160 }, { "epoch": 9.45, "grad_norm": 0.4140625, "learning_rate": 0.00038730088779067857, "loss": 0.195, "step": 228170 }, { "epoch": 9.45, "grad_norm": 0.478515625, "learning_rate": 0.00038729182445566536, "loss": 0.1859, "step": 228180 }, { "epoch": 9.45, "grad_norm": 0.6953125, "learning_rate": 0.00038728276086228363, "loss": 0.1872, "step": 228190 }, { "epoch": 9.45, "grad_norm": 0.333984375, "learning_rate": 0.0003872736970105507, "loss": 0.2295, "step": 228200 }, { "epoch": 9.45, "grad_norm": 0.27734375, "learning_rate": 0.0003872646329004834, "loss": 0.1764, "step": 228210 }, { "epoch": 9.45, "grad_norm": 2.40625, "learning_rate": 0.0003872555685320989, "loss": 0.1339, "step": 228220 }, { "epoch": 9.45, "grad_norm": 0.8515625, "learning_rate": 0.0003872465039054143, "loss": 0.1892, "step": 228230 }, { "epoch": 9.45, "grad_norm": 0.71875, "learning_rate": 0.0003872374390204466, "loss": 0.2146, "step": 228240 }, { "epoch": 9.45, "grad_norm": 0.63671875, "learning_rate": 0.0003872283738772128, "loss": 0.1883, "step": 228250 }, { "epoch": 9.45, "grad_norm": 0.578125, "learning_rate": 0.00038721930847573005, "loss": 0.175, "step": 228260 }, { "epoch": 9.45, "grad_norm": 1.125, "learning_rate": 0.00038721024281601537, "loss": 0.2107, "step": 228270 }, { "epoch": 9.46, "grad_norm": 0.1640625, "learning_rate": 0.00038720117689808586, "loss": 0.1818, "step": 228280 }, { "epoch": 9.46, "grad_norm": 0.82421875, "learning_rate": 0.0003871921107219586, "loss": 0.2258, "step": 228290 }, { "epoch": 9.46, "grad_norm": 1.21875, "learning_rate": 0.0003871830442876505, "loss": 0.1866, "step": 228300 }, { "epoch": 9.46, "grad_norm": 0.9140625, "learning_rate": 0.00038717397759517873, "loss": 0.2052, "step": 228310 }, { "epoch": 9.46, "grad_norm": 0.65234375, "learning_rate": 0.00038716491064456036, "loss": 0.1975, "step": 228320 }, { "epoch": 9.46, "grad_norm": 0.58203125, "learning_rate": 0.0003871558434358125, "loss": 0.2761, "step": 228330 }, { "epoch": 9.46, "grad_norm": 0.65625, "learning_rate": 0.0003871467759689521, "loss": 0.1865, "step": 228340 }, { "epoch": 9.46, "grad_norm": 0.287109375, "learning_rate": 0.0003871377082439963, "loss": 0.1993, "step": 228350 }, { "epoch": 9.46, "grad_norm": 1.1875, "learning_rate": 0.0003871286402609622, "loss": 0.2035, "step": 228360 }, { "epoch": 9.46, "grad_norm": 0.59765625, "learning_rate": 0.0003871195720198667, "loss": 0.1987, "step": 228370 }, { "epoch": 9.46, "grad_norm": 0.6796875, "learning_rate": 0.000387110503520727, "loss": 0.1956, "step": 228380 }, { "epoch": 9.46, "grad_norm": 0.796875, "learning_rate": 0.0003871014347635602, "loss": 0.188, "step": 228390 }, { "epoch": 9.46, "grad_norm": 0.0, "learning_rate": 0.00038709236574838325, "loss": 0.2152, "step": 228400 }, { "epoch": 9.46, "grad_norm": 0.0001239776611328125, "learning_rate": 0.00038708329647521323, "loss": 0.2518, "step": 228410 }, { "epoch": 9.46, "grad_norm": 0.52734375, "learning_rate": 0.0003870742269440674, "loss": 0.2043, "step": 228420 }, { "epoch": 9.46, "grad_norm": 0.734375, "learning_rate": 0.0003870651571549625, "loss": 0.1936, "step": 228430 }, { "epoch": 9.46, "grad_norm": 0.76953125, "learning_rate": 0.0003870560871079159, "loss": 0.141, "step": 228440 }, { "epoch": 9.46, "grad_norm": 1.46875, "learning_rate": 0.0003870470168029445, "loss": 0.194, "step": 228450 }, { "epoch": 9.46, "grad_norm": 0.6953125, "learning_rate": 0.0003870379462400654, "loss": 0.164, "step": 228460 }, { "epoch": 9.46, "grad_norm": 0.427734375, "learning_rate": 0.00038702887541929574, "loss": 0.2097, "step": 228470 }, { "epoch": 9.46, "grad_norm": 1.6953125, "learning_rate": 0.0003870198043406524, "loss": 0.1919, "step": 228480 }, { "epoch": 9.46, "grad_norm": 0.83203125, "learning_rate": 0.00038701073300415273, "loss": 0.1683, "step": 228490 }, { "epoch": 9.46, "grad_norm": 1.1328125, "learning_rate": 0.0003870016614098136, "loss": 0.203, "step": 228500 }, { "epoch": 9.46, "grad_norm": 0.63671875, "learning_rate": 0.00038699258955765217, "loss": 0.1791, "step": 228510 }, { "epoch": 9.47, "grad_norm": 1.609375, "learning_rate": 0.0003869835174476854, "loss": 0.1762, "step": 228520 }, { "epoch": 9.47, "grad_norm": 0.51953125, "learning_rate": 0.00038697444507993054, "loss": 0.2249, "step": 228530 }, { "epoch": 9.47, "grad_norm": 1.53125, "learning_rate": 0.0003869653724544045, "loss": 0.2072, "step": 228540 }, { "epoch": 9.47, "grad_norm": 0.890625, "learning_rate": 0.00038695629957112446, "loss": 0.1807, "step": 228550 }, { "epoch": 9.47, "grad_norm": 0.8515625, "learning_rate": 0.0003869472264301074, "loss": 0.2175, "step": 228560 }, { "epoch": 9.47, "grad_norm": 1.0625, "learning_rate": 0.00038693815303137054, "loss": 0.2513, "step": 228570 }, { "epoch": 9.47, "grad_norm": 1.203125, "learning_rate": 0.00038692907937493083, "loss": 0.1874, "step": 228580 }, { "epoch": 9.47, "grad_norm": 0.80859375, "learning_rate": 0.0003869200054608053, "loss": 0.1702, "step": 228590 }, { "epoch": 9.47, "grad_norm": 0.6796875, "learning_rate": 0.0003869109312890112, "loss": 0.1935, "step": 228600 }, { "epoch": 9.47, "grad_norm": 1.03125, "learning_rate": 0.0003869018568595655, "loss": 0.2305, "step": 228610 }, { "epoch": 9.47, "grad_norm": 0.8359375, "learning_rate": 0.00038689278217248526, "loss": 0.2166, "step": 228620 }, { "epoch": 9.47, "grad_norm": 1.5078125, "learning_rate": 0.00038688370722778754, "loss": 0.1775, "step": 228630 }, { "epoch": 9.47, "grad_norm": 0.98828125, "learning_rate": 0.00038687463202548956, "loss": 0.185, "step": 228640 }, { "epoch": 9.47, "grad_norm": 1.6171875, "learning_rate": 0.0003868655565656083, "loss": 0.24, "step": 228650 }, { "epoch": 9.47, "grad_norm": 0.53125, "learning_rate": 0.0003868564808481607, "loss": 0.2083, "step": 228660 }, { "epoch": 9.47, "grad_norm": 1.0234375, "learning_rate": 0.0003868474048731642, "loss": 0.1659, "step": 228670 }, { "epoch": 9.47, "grad_norm": 2.109375, "learning_rate": 0.0003868383286406355, "loss": 0.2401, "step": 228680 }, { "epoch": 9.47, "grad_norm": 1.515625, "learning_rate": 0.0003868292521505919, "loss": 0.1739, "step": 228690 }, { "epoch": 9.47, "grad_norm": 1.296875, "learning_rate": 0.00038682017540305044, "loss": 0.2259, "step": 228700 }, { "epoch": 9.47, "grad_norm": 1.2578125, "learning_rate": 0.00038681109839802815, "loss": 0.1857, "step": 228710 }, { "epoch": 9.47, "grad_norm": 0.78515625, "learning_rate": 0.00038680202113554216, "loss": 0.1892, "step": 228720 }, { "epoch": 9.47, "grad_norm": 0.0004520416259765625, "learning_rate": 0.0003867929436156096, "loss": 0.1957, "step": 228730 }, { "epoch": 9.47, "grad_norm": 0.5859375, "learning_rate": 0.0003867838658382473, "loss": 0.1829, "step": 228740 }, { "epoch": 9.47, "grad_norm": 0.87109375, "learning_rate": 0.00038677478780347273, "loss": 0.2009, "step": 228750 }, { "epoch": 9.48, "grad_norm": 1.3984375, "learning_rate": 0.00038676570951130275, "loss": 0.2012, "step": 228760 }, { "epoch": 9.48, "grad_norm": 0.87109375, "learning_rate": 0.0003867566309617544, "loss": 0.2436, "step": 228770 }, { "epoch": 9.48, "grad_norm": 0.87890625, "learning_rate": 0.0003867475521548449, "loss": 0.2384, "step": 228780 }, { "epoch": 9.48, "grad_norm": 0.81640625, "learning_rate": 0.0003867384730905913, "loss": 0.2243, "step": 228790 }, { "epoch": 9.48, "grad_norm": 0.96484375, "learning_rate": 0.00038672939376901055, "loss": 0.2352, "step": 228800 }, { "epoch": 9.48, "grad_norm": 0.66015625, "learning_rate": 0.00038672031419011995, "loss": 0.1955, "step": 228810 }, { "epoch": 9.48, "grad_norm": 0.87109375, "learning_rate": 0.0003867112343539364, "loss": 0.1654, "step": 228820 }, { "epoch": 9.48, "grad_norm": 0.58984375, "learning_rate": 0.0003867021542604771, "loss": 0.1806, "step": 228830 }, { "epoch": 9.48, "grad_norm": 0.69921875, "learning_rate": 0.00038669307390975914, "loss": 0.213, "step": 228840 }, { "epoch": 9.48, "grad_norm": 1.0625, "learning_rate": 0.0003866839933017996, "loss": 0.2134, "step": 228850 }, { "epoch": 9.48, "grad_norm": 1.5234375, "learning_rate": 0.00038667491243661546, "loss": 0.1961, "step": 228860 }, { "epoch": 9.48, "grad_norm": 0.77734375, "learning_rate": 0.00038666583131422396, "loss": 0.1508, "step": 228870 }, { "epoch": 9.48, "grad_norm": 0.2021484375, "learning_rate": 0.00038665674993464214, "loss": 0.1998, "step": 228880 }, { "epoch": 9.48, "grad_norm": 0.67578125, "learning_rate": 0.00038664766829788703, "loss": 0.151, "step": 228890 }, { "epoch": 9.48, "grad_norm": 0.478515625, "learning_rate": 0.00038663858640397575, "loss": 0.1838, "step": 228900 }, { "epoch": 9.48, "grad_norm": 0.275390625, "learning_rate": 0.0003866295042529254, "loss": 0.1823, "step": 228910 }, { "epoch": 9.48, "grad_norm": 0.75390625, "learning_rate": 0.0003866204218447531, "loss": 0.2016, "step": 228920 }, { "epoch": 9.48, "grad_norm": 0.61328125, "learning_rate": 0.000386611339179476, "loss": 0.1928, "step": 228930 }, { "epoch": 9.48, "grad_norm": 0.90625, "learning_rate": 0.00038660225625711096, "loss": 0.2004, "step": 228940 }, { "epoch": 9.48, "grad_norm": 0.875, "learning_rate": 0.0003865931730776753, "loss": 0.2266, "step": 228950 }, { "epoch": 9.48, "grad_norm": 0.5390625, "learning_rate": 0.00038658408964118606, "loss": 0.2089, "step": 228960 }, { "epoch": 9.48, "grad_norm": 0.59375, "learning_rate": 0.0003865750059476604, "loss": 0.1852, "step": 228970 }, { "epoch": 9.48, "grad_norm": 1.2421875, "learning_rate": 0.00038656592199711517, "loss": 0.1642, "step": 228980 }, { "epoch": 9.48, "grad_norm": 0.70703125, "learning_rate": 0.0003865568377895676, "loss": 0.2234, "step": 228990 }, { "epoch": 9.49, "grad_norm": 0.353515625, "learning_rate": 0.00038654775332503495, "loss": 0.1695, "step": 229000 }, { "epoch": 9.49, "grad_norm": 1.5234375, "learning_rate": 0.0003865386686035341, "loss": 0.1996, "step": 229010 }, { "epoch": 9.49, "grad_norm": 0.353515625, "learning_rate": 0.0003865295836250823, "loss": 0.2024, "step": 229020 }, { "epoch": 9.49, "grad_norm": 0.65625, "learning_rate": 0.00038652049838969643, "loss": 0.1748, "step": 229030 }, { "epoch": 9.49, "grad_norm": 0.73828125, "learning_rate": 0.00038651141289739377, "loss": 0.1883, "step": 229040 }, { "epoch": 9.49, "grad_norm": 0.421875, "learning_rate": 0.00038650232714819145, "loss": 0.1878, "step": 229050 }, { "epoch": 9.49, "grad_norm": 0.75390625, "learning_rate": 0.00038649324114210635, "loss": 0.1876, "step": 229060 }, { "epoch": 9.49, "grad_norm": 0.84765625, "learning_rate": 0.00038648415487915587, "loss": 0.1486, "step": 229070 }, { "epoch": 9.49, "grad_norm": 0.515625, "learning_rate": 0.00038647506835935686, "loss": 0.1997, "step": 229080 }, { "epoch": 9.49, "grad_norm": 1.6328125, "learning_rate": 0.0003864659815827265, "loss": 0.1986, "step": 229090 }, { "epoch": 9.49, "grad_norm": 0.330078125, "learning_rate": 0.00038645689454928195, "loss": 0.1996, "step": 229100 }, { "epoch": 9.49, "grad_norm": 0.8984375, "learning_rate": 0.0003864478072590402, "loss": 0.1844, "step": 229110 }, { "epoch": 9.49, "grad_norm": 0.8203125, "learning_rate": 0.0003864387197120185, "loss": 0.2151, "step": 229120 }, { "epoch": 9.49, "grad_norm": 0.455078125, "learning_rate": 0.00038642963190823384, "loss": 0.2091, "step": 229130 }, { "epoch": 9.49, "grad_norm": 0.73046875, "learning_rate": 0.00038642054384770326, "loss": 0.2139, "step": 229140 }, { "epoch": 9.49, "grad_norm": 0.31640625, "learning_rate": 0.000386411455530444, "loss": 0.1634, "step": 229150 }, { "epoch": 9.49, "grad_norm": 0.90625, "learning_rate": 0.0003864023669564731, "loss": 0.1399, "step": 229160 }, { "epoch": 9.49, "grad_norm": 1.1875, "learning_rate": 0.0003863932781258077, "loss": 0.2166, "step": 229170 }, { "epoch": 9.49, "grad_norm": 0.5625, "learning_rate": 0.00038638418903846493, "loss": 0.2383, "step": 229180 }, { "epoch": 9.49, "grad_norm": 0.5390625, "learning_rate": 0.0003863750996944617, "loss": 0.2042, "step": 229190 }, { "epoch": 9.49, "grad_norm": 0.796875, "learning_rate": 0.00038636601009381545, "loss": 0.2226, "step": 229200 }, { "epoch": 9.49, "grad_norm": 1.3125, "learning_rate": 0.000386356920236543, "loss": 0.2207, "step": 229210 }, { "epoch": 9.49, "grad_norm": 1.2890625, "learning_rate": 0.0003863478301226615, "loss": 0.1788, "step": 229220 }, { "epoch": 9.49, "grad_norm": 1.109375, "learning_rate": 0.0003863387397521881, "loss": 0.1763, "step": 229230 }, { "epoch": 9.5, "grad_norm": 1.3828125, "learning_rate": 0.00038632964912514, "loss": 0.1616, "step": 229240 }, { "epoch": 9.5, "grad_norm": 0.93359375, "learning_rate": 0.0003863205582415342, "loss": 0.2237, "step": 229250 }, { "epoch": 9.5, "grad_norm": 1.2734375, "learning_rate": 0.00038631146710138786, "loss": 0.2318, "step": 229260 }, { "epoch": 9.5, "grad_norm": 0.59375, "learning_rate": 0.0003863023757047179, "loss": 0.1787, "step": 229270 }, { "epoch": 9.5, "grad_norm": 0.328125, "learning_rate": 0.0003862932840515418, "loss": 0.1736, "step": 229280 }, { "epoch": 9.5, "grad_norm": 0.55859375, "learning_rate": 0.00038628419214187633, "loss": 0.2375, "step": 229290 }, { "epoch": 9.5, "grad_norm": 0.82421875, "learning_rate": 0.0003862750999757388, "loss": 0.2115, "step": 229300 }, { "epoch": 9.5, "grad_norm": 0.94921875, "learning_rate": 0.0003862660075531462, "loss": 0.1865, "step": 229310 }, { "epoch": 9.5, "grad_norm": 0.55859375, "learning_rate": 0.00038625691487411567, "loss": 0.2054, "step": 229320 }, { "epoch": 9.5, "grad_norm": 0.33984375, "learning_rate": 0.00038624782193866436, "loss": 0.2058, "step": 229330 }, { "epoch": 9.5, "grad_norm": 0.049560546875, "learning_rate": 0.00038623872874680944, "loss": 0.1687, "step": 229340 }, { "epoch": 9.5, "grad_norm": 1.3046875, "learning_rate": 0.0003862296352985678, "loss": 0.2167, "step": 229350 }, { "epoch": 9.5, "grad_norm": 0.7890625, "learning_rate": 0.00038622054159395686, "loss": 0.2375, "step": 229360 }, { "epoch": 9.5, "grad_norm": 0.427734375, "learning_rate": 0.0003862114476329934, "loss": 0.1791, "step": 229370 }, { "epoch": 9.5, "grad_norm": 0.314453125, "learning_rate": 0.00038620235341569486, "loss": 0.1758, "step": 229380 }, { "epoch": 9.5, "grad_norm": 0.5234375, "learning_rate": 0.00038619325894207813, "loss": 0.2087, "step": 229390 }, { "epoch": 9.5, "grad_norm": 0.73828125, "learning_rate": 0.0003861841642121604, "loss": 0.1947, "step": 229400 }, { "epoch": 9.5, "grad_norm": 0.91796875, "learning_rate": 0.0003861750692259588, "loss": 0.2129, "step": 229410 }, { "epoch": 9.5, "grad_norm": 0.671875, "learning_rate": 0.0003861659739834903, "loss": 0.2184, "step": 229420 }, { "epoch": 9.5, "grad_norm": 0.0, "learning_rate": 0.0003861568784847723, "loss": 0.1652, "step": 229430 }, { "epoch": 9.5, "grad_norm": 0.5390625, "learning_rate": 0.0003861477827298217, "loss": 0.2422, "step": 229440 }, { "epoch": 9.5, "grad_norm": 0.46875, "learning_rate": 0.0003861386867186557, "loss": 0.1876, "step": 229450 }, { "epoch": 9.5, "grad_norm": 1.4140625, "learning_rate": 0.00038612959045129135, "loss": 0.1308, "step": 229460 }, { "epoch": 9.5, "grad_norm": 0.88671875, "learning_rate": 0.00038612049392774587, "loss": 0.2102, "step": 229470 }, { "epoch": 9.51, "grad_norm": 1.140625, "learning_rate": 0.0003861113971480362, "loss": 0.1895, "step": 229480 }, { "epoch": 9.51, "grad_norm": 1.609375, "learning_rate": 0.0003861023001121797, "loss": 0.1768, "step": 229490 }, { "epoch": 9.51, "grad_norm": 2.140625, "learning_rate": 0.0003860932028201933, "loss": 0.1849, "step": 229500 }, { "epoch": 9.51, "grad_norm": 1.625, "learning_rate": 0.00038608410527209424, "loss": 0.1902, "step": 229510 }, { "epoch": 9.51, "grad_norm": 0.7734375, "learning_rate": 0.0003860750074678996, "loss": 0.1789, "step": 229520 }, { "epoch": 9.51, "grad_norm": 1.5390625, "learning_rate": 0.0003860659094076264, "loss": 0.203, "step": 229530 }, { "epoch": 9.51, "grad_norm": 1.7109375, "learning_rate": 0.00038605681109129197, "loss": 0.1848, "step": 229540 }, { "epoch": 9.51, "grad_norm": 0.95703125, "learning_rate": 0.0003860477125189133, "loss": 0.1857, "step": 229550 }, { "epoch": 9.51, "grad_norm": 0.68359375, "learning_rate": 0.0003860386136905075, "loss": 0.2611, "step": 229560 }, { "epoch": 9.51, "grad_norm": 0.455078125, "learning_rate": 0.0003860295146060917, "loss": 0.2028, "step": 229570 }, { "epoch": 9.51, "grad_norm": 0.3125, "learning_rate": 0.000386020415265683, "loss": 0.1917, "step": 229580 }, { "epoch": 9.51, "grad_norm": 1.1171875, "learning_rate": 0.0003860113156692986, "loss": 0.1733, "step": 229590 }, { "epoch": 9.51, "grad_norm": 0.478515625, "learning_rate": 0.0003860022158169556, "loss": 0.2049, "step": 229600 }, { "epoch": 9.51, "grad_norm": 0.52734375, "learning_rate": 0.0003859931157086711, "loss": 0.1987, "step": 229610 }, { "epoch": 9.51, "grad_norm": 1.296875, "learning_rate": 0.0003859840153444623, "loss": 0.2493, "step": 229620 }, { "epoch": 9.51, "grad_norm": 0.734375, "learning_rate": 0.0003859749147243462, "loss": 0.1982, "step": 229630 }, { "epoch": 9.51, "grad_norm": 0.8125, "learning_rate": 0.00038596581384834006, "loss": 0.146, "step": 229640 }, { "epoch": 9.51, "grad_norm": 1.78125, "learning_rate": 0.00038595671271646095, "loss": 0.1824, "step": 229650 }, { "epoch": 9.51, "grad_norm": 0.96484375, "learning_rate": 0.0003859476113287259, "loss": 0.2002, "step": 229660 }, { "epoch": 9.51, "grad_norm": 0.6796875, "learning_rate": 0.0003859385096851522, "loss": 0.171, "step": 229670 }, { "epoch": 9.51, "grad_norm": 1.484375, "learning_rate": 0.0003859294077857569, "loss": 0.2304, "step": 229680 }, { "epoch": 9.51, "grad_norm": 1.09375, "learning_rate": 0.00038592030563055715, "loss": 0.1899, "step": 229690 }, { "epoch": 9.51, "grad_norm": 0.96484375, "learning_rate": 0.00038591120321957, "loss": 0.1754, "step": 229700 }, { "epoch": 9.51, "grad_norm": 0.61328125, "learning_rate": 0.0003859021005528126, "loss": 0.2158, "step": 229710 }, { "epoch": 9.51, "grad_norm": 0.9140625, "learning_rate": 0.0003858929976303023, "loss": 0.2229, "step": 229720 }, { "epoch": 9.52, "grad_norm": 0.79296875, "learning_rate": 0.0003858838944520559, "loss": 0.1981, "step": 229730 }, { "epoch": 9.52, "grad_norm": 0.83984375, "learning_rate": 0.0003858747910180907, "loss": 0.1781, "step": 229740 }, { "epoch": 9.52, "grad_norm": 0.578125, "learning_rate": 0.0003858656873284239, "loss": 0.1813, "step": 229750 }, { "epoch": 9.52, "grad_norm": 0.37109375, "learning_rate": 0.0003858565833830725, "loss": 0.2015, "step": 229760 }, { "epoch": 9.52, "grad_norm": 0.69921875, "learning_rate": 0.00038584747918205366, "loss": 0.1823, "step": 229770 }, { "epoch": 9.52, "grad_norm": 0.58203125, "learning_rate": 0.0003858383747253845, "loss": 0.225, "step": 229780 }, { "epoch": 9.52, "grad_norm": 0.69140625, "learning_rate": 0.0003858292700130823, "loss": 0.219, "step": 229790 }, { "epoch": 9.52, "grad_norm": 0.64453125, "learning_rate": 0.000385820165045164, "loss": 0.2188, "step": 229800 }, { "epoch": 9.52, "grad_norm": 0.72265625, "learning_rate": 0.0003858110598216469, "loss": 0.2271, "step": 229810 }, { "epoch": 9.52, "grad_norm": 0.76953125, "learning_rate": 0.0003858019543425479, "loss": 0.2227, "step": 229820 }, { "epoch": 9.52, "grad_norm": 0.74609375, "learning_rate": 0.00038579284860788436, "loss": 0.2061, "step": 229830 }, { "epoch": 9.52, "grad_norm": 0.84765625, "learning_rate": 0.00038578374261767336, "loss": 0.1998, "step": 229840 }, { "epoch": 9.52, "grad_norm": 0.83984375, "learning_rate": 0.00038577463637193206, "loss": 0.2068, "step": 229850 }, { "epoch": 9.52, "grad_norm": 1.0546875, "learning_rate": 0.00038576552987067754, "loss": 0.223, "step": 229860 }, { "epoch": 9.52, "grad_norm": 0.47265625, "learning_rate": 0.0003857564231139269, "loss": 0.2038, "step": 229870 }, { "epoch": 9.52, "grad_norm": 2.546875, "learning_rate": 0.00038574731610169737, "loss": 0.1804, "step": 229880 }, { "epoch": 9.52, "grad_norm": 2.765625, "learning_rate": 0.0003857382088340061, "loss": 0.2341, "step": 229890 }, { "epoch": 9.52, "grad_norm": 0.466796875, "learning_rate": 0.00038572910131087003, "loss": 0.1576, "step": 229900 }, { "epoch": 9.52, "grad_norm": 1.3359375, "learning_rate": 0.0003857199935323066, "loss": 0.2217, "step": 229910 }, { "epoch": 9.52, "grad_norm": 0.76953125, "learning_rate": 0.00038571088549833275, "loss": 0.1965, "step": 229920 }, { "epoch": 9.52, "grad_norm": 0.609375, "learning_rate": 0.0003857017772089656, "loss": 0.1875, "step": 229930 }, { "epoch": 9.52, "grad_norm": 1.1171875, "learning_rate": 0.0003856926686642225, "loss": 0.1927, "step": 229940 }, { "epoch": 9.52, "grad_norm": 0.83203125, "learning_rate": 0.0003856835598641203, "loss": 0.2284, "step": 229950 }, { "epoch": 9.52, "grad_norm": 0.486328125, "learning_rate": 0.0003856744508086764, "loss": 0.1977, "step": 229960 }, { "epoch": 9.53, "grad_norm": 1.109375, "learning_rate": 0.0003856653414979078, "loss": 0.1938, "step": 229970 }, { "epoch": 9.53, "grad_norm": 0.73046875, "learning_rate": 0.00038565623193183166, "loss": 0.1807, "step": 229980 }, { "epoch": 9.53, "grad_norm": 0.6015625, "learning_rate": 0.00038564712211046516, "loss": 0.1756, "step": 229990 }, { "epoch": 9.53, "grad_norm": 0.419921875, "learning_rate": 0.00038563801203382543, "loss": 0.179, "step": 230000 }, { "epoch": 9.53, "grad_norm": 0.74609375, "learning_rate": 0.0003856289017019296, "loss": 0.2734, "step": 230010 }, { "epoch": 9.53, "grad_norm": 0.9765625, "learning_rate": 0.00038561979111479484, "loss": 0.2219, "step": 230020 }, { "epoch": 9.53, "grad_norm": 0.76171875, "learning_rate": 0.00038561068027243825, "loss": 0.1959, "step": 230030 }, { "epoch": 9.53, "grad_norm": 0.796875, "learning_rate": 0.000385601569174877, "loss": 0.1693, "step": 230040 }, { "epoch": 9.53, "grad_norm": 0.458984375, "learning_rate": 0.0003855924578221283, "loss": 0.2272, "step": 230050 }, { "epoch": 9.53, "grad_norm": 0.6015625, "learning_rate": 0.00038558334621420915, "loss": 0.2143, "step": 230060 }, { "epoch": 9.53, "grad_norm": 0.73828125, "learning_rate": 0.0003855742343511369, "loss": 0.2055, "step": 230070 }, { "epoch": 9.53, "grad_norm": 0.9296875, "learning_rate": 0.00038556512223292846, "loss": 0.1535, "step": 230080 }, { "epoch": 9.53, "grad_norm": 0.5078125, "learning_rate": 0.00038555600985960115, "loss": 0.1338, "step": 230090 }, { "epoch": 9.53, "grad_norm": 1.2421875, "learning_rate": 0.0003855468972311721, "loss": 0.2189, "step": 230100 }, { "epoch": 9.53, "grad_norm": 1.2265625, "learning_rate": 0.00038553778434765836, "loss": 0.2116, "step": 230110 }, { "epoch": 9.53, "grad_norm": 0.451171875, "learning_rate": 0.0003855286712090772, "loss": 0.2034, "step": 230120 }, { "epoch": 9.53, "grad_norm": 1.2890625, "learning_rate": 0.00038551955781544566, "loss": 0.1731, "step": 230130 }, { "epoch": 9.53, "grad_norm": 0.34375, "learning_rate": 0.00038551044416678103, "loss": 0.2012, "step": 230140 }, { "epoch": 9.53, "grad_norm": 0.9921875, "learning_rate": 0.0003855013302631003, "loss": 0.2467, "step": 230150 }, { "epoch": 9.53, "grad_norm": 0.59765625, "learning_rate": 0.00038549221610442074, "loss": 0.2314, "step": 230160 }, { "epoch": 9.53, "grad_norm": 1.109375, "learning_rate": 0.0003854831016907595, "loss": 0.1895, "step": 230170 }, { "epoch": 9.53, "grad_norm": 0.99609375, "learning_rate": 0.0003854739870221336, "loss": 0.164, "step": 230180 }, { "epoch": 9.53, "grad_norm": 0.62109375, "learning_rate": 0.0003854648720985603, "loss": 0.2015, "step": 230190 }, { "epoch": 9.53, "grad_norm": 0.86328125, "learning_rate": 0.00038545575692005684, "loss": 0.2157, "step": 230200 }, { "epoch": 9.54, "grad_norm": 0.640625, "learning_rate": 0.00038544664148664024, "loss": 0.2421, "step": 230210 }, { "epoch": 9.54, "grad_norm": 0.546875, "learning_rate": 0.00038543752579832765, "loss": 0.1767, "step": 230220 }, { "epoch": 9.54, "grad_norm": 1.203125, "learning_rate": 0.00038542840985513626, "loss": 0.2461, "step": 230230 }, { "epoch": 9.54, "grad_norm": 1.078125, "learning_rate": 0.0003854192936570832, "loss": 0.2194, "step": 230240 }, { "epoch": 9.54, "grad_norm": 1.9765625, "learning_rate": 0.0003854101772041857, "loss": 0.2339, "step": 230250 }, { "epoch": 9.54, "grad_norm": 0.609375, "learning_rate": 0.0003854010604964608, "loss": 0.1771, "step": 230260 }, { "epoch": 9.54, "grad_norm": 0.76953125, "learning_rate": 0.0003853919435339258, "loss": 0.1818, "step": 230270 }, { "epoch": 9.54, "grad_norm": 1.171875, "learning_rate": 0.0003853828263165978, "loss": 0.213, "step": 230280 }, { "epoch": 9.54, "grad_norm": 1.3515625, "learning_rate": 0.00038537370884449395, "loss": 0.1935, "step": 230290 }, { "epoch": 9.54, "grad_norm": 0.390625, "learning_rate": 0.0003853645911176313, "loss": 0.1679, "step": 230300 }, { "epoch": 9.54, "grad_norm": 1.4375, "learning_rate": 0.00038535547313602725, "loss": 0.2386, "step": 230310 }, { "epoch": 9.54, "grad_norm": 0.62890625, "learning_rate": 0.0003853463548996987, "loss": 0.2121, "step": 230320 }, { "epoch": 9.54, "grad_norm": 0.0, "learning_rate": 0.00038533723640866296, "loss": 0.207, "step": 230330 }, { "epoch": 9.54, "grad_norm": 0.73828125, "learning_rate": 0.0003853281176629371, "loss": 0.1795, "step": 230340 }, { "epoch": 9.54, "grad_norm": 0.734375, "learning_rate": 0.00038531899866253846, "loss": 0.1706, "step": 230350 }, { "epoch": 9.54, "grad_norm": 0.73828125, "learning_rate": 0.00038530987940748403, "loss": 0.2105, "step": 230360 }, { "epoch": 9.54, "grad_norm": 0.6640625, "learning_rate": 0.00038530075989779095, "loss": 0.2331, "step": 230370 }, { "epoch": 9.54, "grad_norm": 0.671875, "learning_rate": 0.0003852916401334765, "loss": 0.1974, "step": 230380 }, { "epoch": 9.54, "grad_norm": 0.57421875, "learning_rate": 0.0003852825201145578, "loss": 0.1688, "step": 230390 }, { "epoch": 9.54, "grad_norm": 1.1171875, "learning_rate": 0.00038527339984105203, "loss": 0.2076, "step": 230400 }, { "epoch": 9.54, "grad_norm": 0.765625, "learning_rate": 0.0003852642793129763, "loss": 0.2168, "step": 230410 }, { "epoch": 9.54, "grad_norm": 0.466796875, "learning_rate": 0.00038525515853034774, "loss": 0.167, "step": 230420 }, { "epoch": 9.54, "grad_norm": 0.8984375, "learning_rate": 0.00038524603749318364, "loss": 0.1925, "step": 230430 }, { "epoch": 9.54, "grad_norm": 1.0703125, "learning_rate": 0.0003852369162015011, "loss": 0.2153, "step": 230440 }, { "epoch": 9.55, "grad_norm": 0.8828125, "learning_rate": 0.00038522779465531733, "loss": 0.2025, "step": 230450 }, { "epoch": 9.55, "grad_norm": 0.65234375, "learning_rate": 0.0003852186728546494, "loss": 0.1833, "step": 230460 }, { "epoch": 9.55, "grad_norm": 0.69921875, "learning_rate": 0.0003852095507995146, "loss": 0.1822, "step": 230470 }, { "epoch": 9.55, "grad_norm": 1.2265625, "learning_rate": 0.00038520042848993, "loss": 0.1612, "step": 230480 }, { "epoch": 9.55, "grad_norm": 1.0, "learning_rate": 0.0003851913059259127, "loss": 0.1606, "step": 230490 }, { "epoch": 9.55, "grad_norm": 0.76953125, "learning_rate": 0.00038518218310748005, "loss": 0.175, "step": 230500 }, { "epoch": 9.55, "grad_norm": 0.94921875, "learning_rate": 0.0003851730600346491, "loss": 0.1958, "step": 230510 }, { "epoch": 9.55, "grad_norm": 0.765625, "learning_rate": 0.0003851639367074371, "loss": 0.1662, "step": 230520 }, { "epoch": 9.55, "grad_norm": 0.46875, "learning_rate": 0.0003851548131258611, "loss": 0.2121, "step": 230530 }, { "epoch": 9.55, "grad_norm": 0.52734375, "learning_rate": 0.00038514568928993837, "loss": 0.1599, "step": 230540 }, { "epoch": 9.55, "grad_norm": 0.88671875, "learning_rate": 0.000385136565199686, "loss": 0.1992, "step": 230550 }, { "epoch": 9.55, "grad_norm": 0.36328125, "learning_rate": 0.0003851274408551213, "loss": 0.1558, "step": 230560 }, { "epoch": 9.55, "grad_norm": 1.0703125, "learning_rate": 0.00038511831625626125, "loss": 0.2101, "step": 230570 }, { "epoch": 9.55, "grad_norm": 0.4921875, "learning_rate": 0.0003851091914031232, "loss": 0.1959, "step": 230580 }, { "epoch": 9.55, "grad_norm": 0.90625, "learning_rate": 0.0003851000662957242, "loss": 0.192, "step": 230590 }, { "epoch": 9.55, "grad_norm": 0.875, "learning_rate": 0.0003850909409340815, "loss": 0.1837, "step": 230600 }, { "epoch": 9.55, "grad_norm": 0.98046875, "learning_rate": 0.0003850818153182122, "loss": 0.201, "step": 230610 }, { "epoch": 9.55, "grad_norm": 1.0, "learning_rate": 0.00038507268944813354, "loss": 0.2334, "step": 230620 }, { "epoch": 9.55, "grad_norm": 0.2734375, "learning_rate": 0.0003850635633238626, "loss": 0.182, "step": 230630 }, { "epoch": 9.55, "grad_norm": 0.86328125, "learning_rate": 0.00038505443694541666, "loss": 0.2047, "step": 230640 }, { "epoch": 9.55, "grad_norm": 0.474609375, "learning_rate": 0.00038504531031281286, "loss": 0.1334, "step": 230650 }, { "epoch": 9.55, "grad_norm": 0.3828125, "learning_rate": 0.0003850361834260684, "loss": 0.1877, "step": 230660 }, { "epoch": 9.55, "grad_norm": 0.77734375, "learning_rate": 0.00038502705628520043, "loss": 0.1693, "step": 230670 }, { "epoch": 9.55, "grad_norm": 0.146484375, "learning_rate": 0.000385017928890226, "loss": 0.265, "step": 230680 }, { "epoch": 9.56, "grad_norm": 0.578125, "learning_rate": 0.00038500880124116255, "loss": 0.2138, "step": 230690 }, { "epoch": 9.56, "grad_norm": 0.734375, "learning_rate": 0.0003849996733380271, "loss": 0.1936, "step": 230700 }, { "epoch": 9.56, "grad_norm": 1.1953125, "learning_rate": 0.00038499054518083677, "loss": 0.1747, "step": 230710 }, { "epoch": 9.56, "grad_norm": 0.62109375, "learning_rate": 0.00038498141676960885, "loss": 0.1265, "step": 230720 }, { "epoch": 9.56, "grad_norm": 0.5703125, "learning_rate": 0.0003849722881043605, "loss": 0.2221, "step": 230730 }, { "epoch": 9.56, "grad_norm": 0.84765625, "learning_rate": 0.00038496315918510883, "loss": 0.1953, "step": 230740 }, { "epoch": 9.56, "grad_norm": 1.2109375, "learning_rate": 0.00038495403001187115, "loss": 0.1748, "step": 230750 }, { "epoch": 9.56, "grad_norm": 0.443359375, "learning_rate": 0.00038494490058466437, "loss": 0.1871, "step": 230760 }, { "epoch": 9.56, "grad_norm": 1.8125, "learning_rate": 0.00038493577090350606, "loss": 0.1749, "step": 230770 }, { "epoch": 9.56, "grad_norm": 0.63671875, "learning_rate": 0.0003849266409684132, "loss": 0.2039, "step": 230780 }, { "epoch": 9.56, "grad_norm": 0.7421875, "learning_rate": 0.00038491751077940285, "loss": 0.21, "step": 230790 }, { "epoch": 9.56, "grad_norm": 1.296875, "learning_rate": 0.0003849083803364924, "loss": 0.1546, "step": 230800 }, { "epoch": 9.56, "grad_norm": 0.64453125, "learning_rate": 0.0003848992496396989, "loss": 0.2021, "step": 230810 }, { "epoch": 9.56, "grad_norm": 0.67578125, "learning_rate": 0.00038489011868903954, "loss": 0.2215, "step": 230820 }, { "epoch": 9.56, "grad_norm": 0.72265625, "learning_rate": 0.0003848809874845317, "loss": 0.2056, "step": 230830 }, { "epoch": 9.56, "grad_norm": 1.1171875, "learning_rate": 0.0003848718560261922, "loss": 0.1971, "step": 230840 }, { "epoch": 9.56, "grad_norm": 0.71484375, "learning_rate": 0.00038486272431403857, "loss": 0.2177, "step": 230850 }, { "epoch": 9.56, "grad_norm": 0.181640625, "learning_rate": 0.00038485359234808784, "loss": 0.1946, "step": 230860 }, { "epoch": 9.56, "grad_norm": 0.53515625, "learning_rate": 0.0003848444601283572, "loss": 0.1559, "step": 230870 }, { "epoch": 9.56, "grad_norm": 2.203125, "learning_rate": 0.0003848353276548639, "loss": 0.21, "step": 230880 }, { "epoch": 9.56, "grad_norm": 0.494140625, "learning_rate": 0.000384826194927625, "loss": 0.2138, "step": 230890 }, { "epoch": 9.56, "grad_norm": 0.8203125, "learning_rate": 0.0003848170619466578, "loss": 0.2093, "step": 230900 }, { "epoch": 9.56, "grad_norm": 0.64453125, "learning_rate": 0.0003848079287119795, "loss": 0.2064, "step": 230910 }, { "epoch": 9.56, "grad_norm": 0.6875, "learning_rate": 0.00038479879522360707, "loss": 0.1572, "step": 230920 }, { "epoch": 9.57, "grad_norm": 0.515625, "learning_rate": 0.000384789661481558, "loss": 0.1908, "step": 230930 }, { "epoch": 9.57, "grad_norm": 0.65625, "learning_rate": 0.0003847805274858493, "loss": 0.1525, "step": 230940 }, { "epoch": 9.57, "grad_norm": 0.296875, "learning_rate": 0.00038477139323649817, "loss": 0.1787, "step": 230950 }, { "epoch": 9.57, "grad_norm": 0.81640625, "learning_rate": 0.0003847622587335219, "loss": 0.2233, "step": 230960 }, { "epoch": 9.57, "grad_norm": 0.609375, "learning_rate": 0.00038475312397693765, "loss": 0.1814, "step": 230970 }, { "epoch": 9.57, "grad_norm": 0.53125, "learning_rate": 0.00038474398896676243, "loss": 0.1876, "step": 230980 }, { "epoch": 9.57, "grad_norm": 0.75390625, "learning_rate": 0.0003847348537030137, "loss": 0.1989, "step": 230990 }, { "epoch": 9.57, "grad_norm": 0.55859375, "learning_rate": 0.00038472571818570843, "loss": 0.1939, "step": 231000 }, { "epoch": 9.57, "grad_norm": 0.458984375, "learning_rate": 0.00038471658241486396, "loss": 0.1857, "step": 231010 }, { "epoch": 9.57, "grad_norm": 0.98828125, "learning_rate": 0.0003847074463904974, "loss": 0.2112, "step": 231020 }, { "epoch": 9.57, "grad_norm": 1.40625, "learning_rate": 0.000384698310112626, "loss": 0.1814, "step": 231030 }, { "epoch": 9.57, "grad_norm": 0.80859375, "learning_rate": 0.000384689173581267, "loss": 0.2047, "step": 231040 }, { "epoch": 9.57, "grad_norm": 1.515625, "learning_rate": 0.00038468003679643735, "loss": 0.2165, "step": 231050 }, { "epoch": 9.57, "grad_norm": 0.6953125, "learning_rate": 0.0003846708997581546, "loss": 0.1771, "step": 231060 }, { "epoch": 9.57, "grad_norm": 0.388671875, "learning_rate": 0.0003846617624664357, "loss": 0.185, "step": 231070 }, { "epoch": 9.57, "grad_norm": 1.0078125, "learning_rate": 0.0003846526249212978, "loss": 0.1772, "step": 231080 }, { "epoch": 9.57, "grad_norm": 1.53125, "learning_rate": 0.0003846434871227583, "loss": 0.1897, "step": 231090 }, { "epoch": 9.57, "grad_norm": 1.03125, "learning_rate": 0.00038463434907083427, "loss": 0.1817, "step": 231100 }, { "epoch": 9.57, "grad_norm": 2.09375, "learning_rate": 0.0003846252107655429, "loss": 0.1859, "step": 231110 }, { "epoch": 9.57, "grad_norm": 0.67578125, "learning_rate": 0.0003846160722069015, "loss": 0.2259, "step": 231120 }, { "epoch": 9.57, "grad_norm": 0.64453125, "learning_rate": 0.0003846069333949271, "loss": 0.1671, "step": 231130 }, { "epoch": 9.57, "grad_norm": 0.59375, "learning_rate": 0.00038459779432963704, "loss": 0.1747, "step": 231140 }, { "epoch": 9.57, "grad_norm": 0.97265625, "learning_rate": 0.00038458865501104844, "loss": 0.1911, "step": 231150 }, { "epoch": 9.57, "grad_norm": 1.3125, "learning_rate": 0.00038457951543917855, "loss": 0.28, "step": 231160 }, { "epoch": 9.58, "grad_norm": 0.3671875, "learning_rate": 0.00038457037561404463, "loss": 0.2188, "step": 231170 }, { "epoch": 9.58, "grad_norm": 0.609375, "learning_rate": 0.0003845612355356637, "loss": 0.2012, "step": 231180 }, { "epoch": 9.58, "grad_norm": 0.5703125, "learning_rate": 0.000384552095204053, "loss": 0.1771, "step": 231190 }, { "epoch": 9.58, "grad_norm": 1.0, "learning_rate": 0.0003845429546192299, "loss": 0.2097, "step": 231200 }, { "epoch": 9.58, "grad_norm": 0.52734375, "learning_rate": 0.0003845338137812114, "loss": 0.2336, "step": 231210 }, { "epoch": 9.58, "grad_norm": 0.8359375, "learning_rate": 0.0003845246726900148, "loss": 0.1955, "step": 231220 }, { "epoch": 9.58, "grad_norm": 0.86328125, "learning_rate": 0.0003845155313456573, "loss": 0.1764, "step": 231230 }, { "epoch": 9.58, "grad_norm": 1.15625, "learning_rate": 0.0003845063897481562, "loss": 0.2359, "step": 231240 }, { "epoch": 9.58, "grad_norm": 0.6875, "learning_rate": 0.0003844972478975285, "loss": 0.1806, "step": 231250 }, { "epoch": 9.58, "grad_norm": 0.431640625, "learning_rate": 0.0003844881057937916, "loss": 0.1937, "step": 231260 }, { "epoch": 9.58, "grad_norm": 1.1875, "learning_rate": 0.0003844789634369624, "loss": 0.178, "step": 231270 }, { "epoch": 9.58, "grad_norm": 1.1484375, "learning_rate": 0.0003844698208270585, "loss": 0.1935, "step": 231280 }, { "epoch": 9.58, "grad_norm": 0.59375, "learning_rate": 0.0003844606779640969, "loss": 0.2448, "step": 231290 }, { "epoch": 9.58, "grad_norm": 0.423828125, "learning_rate": 0.00038445153484809475, "loss": 0.1857, "step": 231300 }, { "epoch": 9.58, "grad_norm": 2.390625, "learning_rate": 0.00038444239147906935, "loss": 0.2093, "step": 231310 }, { "epoch": 9.58, "grad_norm": 0.44140625, "learning_rate": 0.00038443324785703795, "loss": 0.2087, "step": 231320 }, { "epoch": 9.58, "grad_norm": 0.859375, "learning_rate": 0.0003844241039820177, "loss": 0.1921, "step": 231330 }, { "epoch": 9.58, "grad_norm": 0.6171875, "learning_rate": 0.0003844149598540257, "loss": 0.1788, "step": 231340 }, { "epoch": 9.58, "grad_norm": 0.9140625, "learning_rate": 0.0003844058154730793, "loss": 0.2011, "step": 231350 }, { "epoch": 9.58, "grad_norm": 1.515625, "learning_rate": 0.00038439667083919573, "loss": 0.1977, "step": 231360 }, { "epoch": 9.58, "grad_norm": 1.1953125, "learning_rate": 0.0003843875259523921, "loss": 0.2361, "step": 231370 }, { "epoch": 9.58, "grad_norm": 1.0078125, "learning_rate": 0.00038437838081268564, "loss": 0.2083, "step": 231380 }, { "epoch": 9.58, "grad_norm": 0.51953125, "learning_rate": 0.00038436923542009354, "loss": 0.1783, "step": 231390 }, { "epoch": 9.58, "grad_norm": 1.0, "learning_rate": 0.0003843600897746331, "loss": 0.1704, "step": 231400 }, { "epoch": 9.58, "grad_norm": 0.58984375, "learning_rate": 0.00038435094387632155, "loss": 0.2086, "step": 231410 }, { "epoch": 9.59, "grad_norm": 0.349609375, "learning_rate": 0.00038434179772517597, "loss": 0.2004, "step": 231420 }, { "epoch": 9.59, "grad_norm": 0.65625, "learning_rate": 0.0003843326513212136, "loss": 0.1845, "step": 231430 }, { "epoch": 9.59, "grad_norm": 0.9453125, "learning_rate": 0.0003843235046644517, "loss": 0.2033, "step": 231440 }, { "epoch": 9.59, "grad_norm": 1.9375, "learning_rate": 0.0003843143577549075, "loss": 0.2109, "step": 231450 }, { "epoch": 9.59, "grad_norm": 4.5625, "learning_rate": 0.00038430521059259814, "loss": 0.1941, "step": 231460 }, { "epoch": 9.59, "grad_norm": 0.31640625, "learning_rate": 0.00038429606317754095, "loss": 0.2432, "step": 231470 }, { "epoch": 9.59, "grad_norm": 0.390625, "learning_rate": 0.00038428691550975304, "loss": 0.2009, "step": 231480 }, { "epoch": 9.59, "grad_norm": 0.578125, "learning_rate": 0.00038427776758925163, "loss": 0.1896, "step": 231490 }, { "epoch": 9.59, "grad_norm": 0.6796875, "learning_rate": 0.00038426861941605393, "loss": 0.2073, "step": 231500 }, { "epoch": 9.59, "grad_norm": 1.078125, "learning_rate": 0.0003842594709901773, "loss": 0.1801, "step": 231510 }, { "epoch": 9.59, "grad_norm": 0.7890625, "learning_rate": 0.00038425032231163874, "loss": 0.2021, "step": 231520 }, { "epoch": 9.59, "grad_norm": 0.66796875, "learning_rate": 0.0003842411733804556, "loss": 0.2181, "step": 231530 }, { "epoch": 9.59, "grad_norm": 1.625, "learning_rate": 0.00038423202419664515, "loss": 0.146, "step": 231540 }, { "epoch": 9.59, "grad_norm": 0.5, "learning_rate": 0.0003842228747602244, "loss": 0.2266, "step": 231550 }, { "epoch": 9.59, "grad_norm": 1.5390625, "learning_rate": 0.00038421372507121076, "loss": 0.2087, "step": 231560 }, { "epoch": 9.59, "grad_norm": 0.2490234375, "learning_rate": 0.00038420457512962136, "loss": 0.1901, "step": 231570 }, { "epoch": 9.59, "grad_norm": 0.68359375, "learning_rate": 0.0003841954249354735, "loss": 0.1591, "step": 231580 }, { "epoch": 9.59, "grad_norm": 0.55078125, "learning_rate": 0.0003841862744887843, "loss": 0.1881, "step": 231590 }, { "epoch": 9.59, "grad_norm": 0.81640625, "learning_rate": 0.000384177123789571, "loss": 0.1701, "step": 231600 }, { "epoch": 9.59, "grad_norm": 1.015625, "learning_rate": 0.0003841679728378509, "loss": 0.1679, "step": 231610 }, { "epoch": 9.59, "grad_norm": 0.86328125, "learning_rate": 0.0003841588216336411, "loss": 0.2264, "step": 231620 }, { "epoch": 9.59, "grad_norm": 0.48828125, "learning_rate": 0.0003841496701769589, "loss": 0.2052, "step": 231630 }, { "epoch": 9.59, "grad_norm": 0.30078125, "learning_rate": 0.0003841405184678215, "loss": 0.2094, "step": 231640 }, { "epoch": 9.59, "grad_norm": 0.640625, "learning_rate": 0.00038413136650624615, "loss": 0.1904, "step": 231650 }, { "epoch": 9.6, "grad_norm": 1.765625, "learning_rate": 0.00038412221429225016, "loss": 0.1429, "step": 231660 }, { "epoch": 9.6, "grad_norm": 0.85546875, "learning_rate": 0.0003841130618258505, "loss": 0.1902, "step": 231670 }, { "epoch": 9.6, "grad_norm": 0.70703125, "learning_rate": 0.00038410390910706454, "loss": 0.1959, "step": 231680 }, { "epoch": 9.6, "grad_norm": 0.6953125, "learning_rate": 0.00038409475613590955, "loss": 0.1911, "step": 231690 }, { "epoch": 9.6, "grad_norm": 0.306640625, "learning_rate": 0.00038408560291240267, "loss": 0.1708, "step": 231700 }, { "epoch": 9.6, "grad_norm": 0.7734375, "learning_rate": 0.0003840764494365613, "loss": 0.1733, "step": 231710 }, { "epoch": 9.6, "grad_norm": 1.734375, "learning_rate": 0.00038406729570840235, "loss": 0.1833, "step": 231720 }, { "epoch": 9.6, "grad_norm": 1.0859375, "learning_rate": 0.00038405814172794335, "loss": 0.1762, "step": 231730 }, { "epoch": 9.6, "grad_norm": 1.7109375, "learning_rate": 0.0003840489874952014, "loss": 0.1922, "step": 231740 }, { "epoch": 9.6, "grad_norm": 0.74609375, "learning_rate": 0.0003840398330101936, "loss": 0.2138, "step": 231750 }, { "epoch": 9.6, "grad_norm": 1.3125, "learning_rate": 0.0003840306782729375, "loss": 0.2356, "step": 231760 }, { "epoch": 9.6, "grad_norm": 0.81640625, "learning_rate": 0.00038402152328344995, "loss": 0.2071, "step": 231770 }, { "epoch": 9.6, "grad_norm": 0.453125, "learning_rate": 0.0003840123680417485, "loss": 0.1652, "step": 231780 }, { "epoch": 9.6, "grad_norm": 0.98828125, "learning_rate": 0.0003840032125478502, "loss": 0.2006, "step": 231790 }, { "epoch": 9.6, "grad_norm": 1.3359375, "learning_rate": 0.00038399405680177235, "loss": 0.2402, "step": 231800 }, { "epoch": 9.6, "grad_norm": 0.6875, "learning_rate": 0.0003839849008035321, "loss": 0.2002, "step": 231810 }, { "epoch": 9.6, "grad_norm": 0.703125, "learning_rate": 0.0003839757445531469, "loss": 0.1911, "step": 231820 }, { "epoch": 9.6, "grad_norm": 1.1640625, "learning_rate": 0.0003839665880506336, "loss": 0.2321, "step": 231830 }, { "epoch": 9.6, "grad_norm": 0.58984375, "learning_rate": 0.00038395743129600974, "loss": 0.1883, "step": 231840 }, { "epoch": 9.6, "grad_norm": 0.78125, "learning_rate": 0.00038394827428929245, "loss": 0.2024, "step": 231850 }, { "epoch": 9.6, "grad_norm": 0.74609375, "learning_rate": 0.000383939117030499, "loss": 0.191, "step": 231860 }, { "epoch": 9.6, "grad_norm": 0.369140625, "learning_rate": 0.0003839299595196466, "loss": 0.175, "step": 231870 }, { "epoch": 9.6, "grad_norm": 1.828125, "learning_rate": 0.0003839208017567524, "loss": 0.1898, "step": 231880 }, { "epoch": 9.6, "grad_norm": 0.5234375, "learning_rate": 0.0003839116437418338, "loss": 0.1525, "step": 231890 }, { "epoch": 9.61, "grad_norm": 0.87890625, "learning_rate": 0.0003839024854749079, "loss": 0.2206, "step": 231900 }, { "epoch": 9.61, "grad_norm": 0.59765625, "learning_rate": 0.000383893326955992, "loss": 0.2124, "step": 231910 }, { "epoch": 9.61, "grad_norm": 0.384765625, "learning_rate": 0.0003838841681851033, "loss": 0.2995, "step": 231920 }, { "epoch": 9.61, "grad_norm": 0.7109375, "learning_rate": 0.0003838750091622591, "loss": 0.1994, "step": 231930 }, { "epoch": 9.61, "grad_norm": 0.73828125, "learning_rate": 0.00038386584988747653, "loss": 0.2199, "step": 231940 }, { "epoch": 9.61, "grad_norm": 1.1640625, "learning_rate": 0.00038385669036077296, "loss": 0.2126, "step": 231950 }, { "epoch": 9.61, "grad_norm": 0.5078125, "learning_rate": 0.0003838475305821655, "loss": 0.2074, "step": 231960 }, { "epoch": 9.61, "grad_norm": 0.37890625, "learning_rate": 0.0003838383705516715, "loss": 0.1626, "step": 231970 }, { "epoch": 9.61, "grad_norm": 0.6796875, "learning_rate": 0.00038382921026930807, "loss": 0.209, "step": 231980 }, { "epoch": 9.61, "grad_norm": 1.765625, "learning_rate": 0.0003838200497350925, "loss": 0.1757, "step": 231990 }, { "epoch": 9.61, "grad_norm": 0.87890625, "learning_rate": 0.00038381088894904214, "loss": 0.2536, "step": 232000 }, { "epoch": 9.61, "grad_norm": 0.46484375, "learning_rate": 0.00038380172791117407, "loss": 0.2073, "step": 232010 }, { "epoch": 9.61, "grad_norm": 0.8828125, "learning_rate": 0.00038379256662150556, "loss": 0.1882, "step": 232020 }, { "epoch": 9.61, "grad_norm": 1.328125, "learning_rate": 0.000383783405080054, "loss": 0.2167, "step": 232030 }, { "epoch": 9.61, "grad_norm": 0.94921875, "learning_rate": 0.0003837742432868364, "loss": 0.227, "step": 232040 }, { "epoch": 9.61, "grad_norm": 1.1328125, "learning_rate": 0.0003837650812418702, "loss": 0.2013, "step": 232050 }, { "epoch": 9.61, "grad_norm": 0.9140625, "learning_rate": 0.00038375591894517256, "loss": 0.2336, "step": 232060 }, { "epoch": 9.61, "grad_norm": 1.0703125, "learning_rate": 0.00038374675639676073, "loss": 0.1703, "step": 232070 }, { "epoch": 9.61, "grad_norm": 0.51953125, "learning_rate": 0.00038373759359665196, "loss": 0.1626, "step": 232080 }, { "epoch": 9.61, "grad_norm": 1.03125, "learning_rate": 0.0003837284305448634, "loss": 0.2498, "step": 232090 }, { "epoch": 9.61, "grad_norm": 0.484375, "learning_rate": 0.00038371926724141235, "loss": 0.2005, "step": 232100 }, { "epoch": 9.61, "grad_norm": 0.61328125, "learning_rate": 0.00038371010368631626, "loss": 0.1776, "step": 232110 }, { "epoch": 9.61, "grad_norm": 0.4609375, "learning_rate": 0.00038370093987959196, "loss": 0.2119, "step": 232120 }, { "epoch": 9.61, "grad_norm": 0.83203125, "learning_rate": 0.00038369177582125714, "loss": 0.1316, "step": 232130 }, { "epoch": 9.62, "grad_norm": 0.921875, "learning_rate": 0.00038368261151132876, "loss": 0.1809, "step": 232140 }, { "epoch": 9.62, "grad_norm": 0.2265625, "learning_rate": 0.0003836734469498241, "loss": 0.2517, "step": 232150 }, { "epoch": 9.62, "grad_norm": 1.09375, "learning_rate": 0.00038366428213676055, "loss": 0.2113, "step": 232160 }, { "epoch": 9.62, "grad_norm": 0.8515625, "learning_rate": 0.0003836551170721552, "loss": 0.2388, "step": 232170 }, { "epoch": 9.62, "grad_norm": 0.97265625, "learning_rate": 0.0003836459517560253, "loss": 0.2174, "step": 232180 }, { "epoch": 9.62, "grad_norm": 0.82421875, "learning_rate": 0.00038363678618838823, "loss": 0.1646, "step": 232190 }, { "epoch": 9.62, "grad_norm": 0.6953125, "learning_rate": 0.000383627620369261, "loss": 0.1556, "step": 232200 }, { "epoch": 9.62, "grad_norm": 0.84765625, "learning_rate": 0.00038361845429866126, "loss": 0.2072, "step": 232210 }, { "epoch": 9.62, "grad_norm": 1.1171875, "learning_rate": 0.00038360928797660587, "loss": 0.1939, "step": 232220 }, { "epoch": 9.62, "grad_norm": 0.314453125, "learning_rate": 0.0003836001214031122, "loss": 0.199, "step": 232230 }, { "epoch": 9.62, "grad_norm": 0.546875, "learning_rate": 0.00038359095457819763, "loss": 0.2, "step": 232240 }, { "epoch": 9.62, "grad_norm": 1.640625, "learning_rate": 0.00038358178750187927, "loss": 0.2014, "step": 232250 }, { "epoch": 9.62, "grad_norm": 0.640625, "learning_rate": 0.0003835726201741744, "loss": 0.2337, "step": 232260 }, { "epoch": 9.62, "grad_norm": 2.203125, "learning_rate": 0.00038356345259510035, "loss": 0.2035, "step": 232270 }, { "epoch": 9.62, "grad_norm": 1.1953125, "learning_rate": 0.00038355428476467417, "loss": 0.231, "step": 232280 }, { "epoch": 9.62, "grad_norm": 3.703125, "learning_rate": 0.0003835451166829134, "loss": 0.2111, "step": 232290 }, { "epoch": 9.62, "grad_norm": 0.625, "learning_rate": 0.0003835359483498351, "loss": 0.191, "step": 232300 }, { "epoch": 9.62, "grad_norm": 1.09375, "learning_rate": 0.00038352677976545644, "loss": 0.2657, "step": 232310 }, { "epoch": 9.62, "grad_norm": 0.88671875, "learning_rate": 0.000383517610929795, "loss": 0.1504, "step": 232320 }, { "epoch": 9.62, "grad_norm": 0.53125, "learning_rate": 0.00038350844184286765, "loss": 0.2151, "step": 232330 }, { "epoch": 9.62, "grad_norm": 0.625, "learning_rate": 0.0003834992725046919, "loss": 0.222, "step": 232340 }, { "epoch": 9.62, "grad_norm": 1.0703125, "learning_rate": 0.00038349010291528497, "loss": 0.2129, "step": 232350 }, { "epoch": 9.62, "grad_norm": 1.65625, "learning_rate": 0.00038348093307466404, "loss": 0.2353, "step": 232360 }, { "epoch": 9.62, "grad_norm": 0.53515625, "learning_rate": 0.0003834717629828465, "loss": 0.1883, "step": 232370 }, { "epoch": 9.63, "grad_norm": 0.98046875, "learning_rate": 0.0003834625926398494, "loss": 0.1753, "step": 232380 }, { "epoch": 9.63, "grad_norm": 0.427734375, "learning_rate": 0.00038345342204569013, "loss": 0.21, "step": 232390 }, { "epoch": 9.63, "grad_norm": 0.61328125, "learning_rate": 0.00038344425120038594, "loss": 0.1753, "step": 232400 }, { "epoch": 9.63, "grad_norm": 0.9921875, "learning_rate": 0.00038343508010395413, "loss": 0.1989, "step": 232410 }, { "epoch": 9.63, "grad_norm": 0.2392578125, "learning_rate": 0.0003834259087564119, "loss": 0.1762, "step": 232420 }, { "epoch": 9.63, "grad_norm": 1.09375, "learning_rate": 0.0003834167371577765, "loss": 0.1762, "step": 232430 }, { "epoch": 9.63, "grad_norm": 0.79296875, "learning_rate": 0.00038340756530806517, "loss": 0.1819, "step": 232440 }, { "epoch": 9.63, "grad_norm": 0.953125, "learning_rate": 0.00038339839320729525, "loss": 0.1764, "step": 232450 }, { "epoch": 9.63, "grad_norm": 1.78125, "learning_rate": 0.0003833892208554839, "loss": 0.2085, "step": 232460 }, { "epoch": 9.63, "grad_norm": 0.6640625, "learning_rate": 0.0003833800482526485, "loss": 0.1702, "step": 232470 }, { "epoch": 9.63, "grad_norm": 0.671875, "learning_rate": 0.00038337087539880635, "loss": 0.1489, "step": 232480 }, { "epoch": 9.63, "grad_norm": 0.6171875, "learning_rate": 0.0003833617022939744, "loss": 0.1977, "step": 232490 }, { "epoch": 9.63, "grad_norm": 0.3046875, "learning_rate": 0.0003833525289381703, "loss": 0.1538, "step": 232500 }, { "epoch": 9.63, "grad_norm": 0.56640625, "learning_rate": 0.00038334335533141107, "loss": 0.1835, "step": 232510 }, { "epoch": 9.63, "grad_norm": 1.1953125, "learning_rate": 0.000383334181473714, "loss": 0.2651, "step": 232520 }, { "epoch": 9.63, "grad_norm": 0.98046875, "learning_rate": 0.0003833250073650965, "loss": 0.1747, "step": 232530 }, { "epoch": 9.63, "grad_norm": 1.078125, "learning_rate": 0.0003833158330055757, "loss": 0.1922, "step": 232540 }, { "epoch": 9.63, "grad_norm": 0.7265625, "learning_rate": 0.0003833066583951689, "loss": 0.147, "step": 232550 }, { "epoch": 9.63, "grad_norm": 0.65625, "learning_rate": 0.00038329748353389337, "loss": 0.1105, "step": 232560 }, { "epoch": 9.63, "grad_norm": 0.734375, "learning_rate": 0.00038328830842176634, "loss": 0.2017, "step": 232570 }, { "epoch": 9.63, "grad_norm": 1.9765625, "learning_rate": 0.00038327913305880506, "loss": 0.1717, "step": 232580 }, { "epoch": 9.63, "grad_norm": 0.96484375, "learning_rate": 0.00038326995744502695, "loss": 0.158, "step": 232590 }, { "epoch": 9.63, "grad_norm": 0.9140625, "learning_rate": 0.0003832607815804491, "loss": 0.1481, "step": 232600 }, { "epoch": 9.63, "grad_norm": 1.015625, "learning_rate": 0.00038325160546508885, "loss": 0.2109, "step": 232610 }, { "epoch": 9.64, "grad_norm": 0.9453125, "learning_rate": 0.00038324242909896356, "loss": 0.214, "step": 232620 }, { "epoch": 9.64, "grad_norm": 0.88671875, "learning_rate": 0.00038323325248209027, "loss": 0.2136, "step": 232630 }, { "epoch": 9.64, "grad_norm": 0.9296875, "learning_rate": 0.0003832240756144865, "loss": 0.2075, "step": 232640 }, { "epoch": 9.64, "grad_norm": 0.89453125, "learning_rate": 0.00038321489849616944, "loss": 0.1877, "step": 232650 }, { "epoch": 9.64, "grad_norm": 0.0, "learning_rate": 0.00038320572112715616, "loss": 0.2125, "step": 232660 }, { "epoch": 9.64, "grad_norm": 0.6953125, "learning_rate": 0.0003831965435074642, "loss": 0.1587, "step": 232670 }, { "epoch": 9.64, "grad_norm": 0.796875, "learning_rate": 0.0003831873656371107, "loss": 0.2101, "step": 232680 }, { "epoch": 9.64, "grad_norm": 0.76953125, "learning_rate": 0.000383178187516113, "loss": 0.2032, "step": 232690 }, { "epoch": 9.64, "grad_norm": 0.0, "learning_rate": 0.0003831690091444884, "loss": 0.2448, "step": 232700 }, { "epoch": 9.64, "grad_norm": 0.94921875, "learning_rate": 0.00038315983052225394, "loss": 0.1763, "step": 232710 }, { "epoch": 9.64, "grad_norm": 1.5390625, "learning_rate": 0.0003831506516494272, "loss": 0.2013, "step": 232720 }, { "epoch": 9.64, "grad_norm": 0.44140625, "learning_rate": 0.00038314147252602524, "loss": 0.1921, "step": 232730 }, { "epoch": 9.64, "grad_norm": 0.703125, "learning_rate": 0.0003831322931520654, "loss": 0.1474, "step": 232740 }, { "epoch": 9.64, "grad_norm": 0.84375, "learning_rate": 0.000383123113527565, "loss": 0.1461, "step": 232750 }, { "epoch": 9.64, "grad_norm": 0.5, "learning_rate": 0.0003831139336525412, "loss": 0.2373, "step": 232760 }, { "epoch": 9.64, "grad_norm": 0.64453125, "learning_rate": 0.00038310475352701146, "loss": 0.2075, "step": 232770 }, { "epoch": 9.64, "grad_norm": 1.6171875, "learning_rate": 0.0003830955731509929, "loss": 0.2033, "step": 232780 }, { "epoch": 9.64, "grad_norm": 0.390625, "learning_rate": 0.00038308639252450285, "loss": 0.2021, "step": 232790 }, { "epoch": 9.64, "grad_norm": 0.52734375, "learning_rate": 0.0003830772116475586, "loss": 0.2174, "step": 232800 }, { "epoch": 9.64, "grad_norm": 1.8515625, "learning_rate": 0.00038306803052017734, "loss": 0.1998, "step": 232810 }, { "epoch": 9.64, "grad_norm": 1.5078125, "learning_rate": 0.00038305884914237645, "loss": 0.2241, "step": 232820 }, { "epoch": 9.64, "grad_norm": 0.69921875, "learning_rate": 0.00038304966751417317, "loss": 0.2071, "step": 232830 }, { "epoch": 9.64, "grad_norm": 0.96875, "learning_rate": 0.0003830404856355848, "loss": 0.196, "step": 232840 }, { "epoch": 9.64, "grad_norm": 1.234375, "learning_rate": 0.0003830313035066286, "loss": 0.2187, "step": 232850 }, { "epoch": 9.65, "grad_norm": 0.890625, "learning_rate": 0.00038302212112732183, "loss": 0.2484, "step": 232860 }, { "epoch": 9.65, "grad_norm": 1.2265625, "learning_rate": 0.0003830129384976818, "loss": 0.2283, "step": 232870 }, { "epoch": 9.65, "grad_norm": 0.455078125, "learning_rate": 0.0003830037556177258, "loss": 0.1846, "step": 232880 }, { "epoch": 9.65, "grad_norm": 1.0, "learning_rate": 0.00038299457248747117, "loss": 0.1775, "step": 232890 }, { "epoch": 9.65, "grad_norm": 1.796875, "learning_rate": 0.000382985389106935, "loss": 0.2154, "step": 232900 }, { "epoch": 9.65, "grad_norm": 1.1875, "learning_rate": 0.0003829762054761347, "loss": 0.2298, "step": 232910 }, { "epoch": 9.65, "grad_norm": 0.345703125, "learning_rate": 0.00038296702159508754, "loss": 0.2038, "step": 232920 }, { "epoch": 9.65, "grad_norm": 0.87109375, "learning_rate": 0.00038295783746381093, "loss": 0.195, "step": 232930 }, { "epoch": 9.65, "grad_norm": 1.1171875, "learning_rate": 0.0003829486530823219, "loss": 0.1745, "step": 232940 }, { "epoch": 9.65, "grad_norm": 0.4765625, "learning_rate": 0.0003829394684506379, "loss": 0.2093, "step": 232950 }, { "epoch": 9.65, "grad_norm": 0.64453125, "learning_rate": 0.0003829302835687761, "loss": 0.2012, "step": 232960 }, { "epoch": 9.65, "grad_norm": 0.263671875, "learning_rate": 0.00038292109843675407, "loss": 0.1734, "step": 232970 }, { "epoch": 9.65, "grad_norm": 0.8203125, "learning_rate": 0.00038291191305458874, "loss": 0.1951, "step": 232980 }, { "epoch": 9.65, "grad_norm": 0.7109375, "learning_rate": 0.0003829027274222975, "loss": 0.2065, "step": 232990 }, { "epoch": 9.65, "grad_norm": 0.5859375, "learning_rate": 0.00038289354153989776, "loss": 0.2034, "step": 233000 }, { "epoch": 9.65, "grad_norm": 0.59375, "learning_rate": 0.0003828843554074067, "loss": 0.226, "step": 233010 }, { "epoch": 9.65, "grad_norm": 0.640625, "learning_rate": 0.00038287516902484167, "loss": 0.174, "step": 233020 }, { "epoch": 9.65, "grad_norm": 0.46484375, "learning_rate": 0.0003828659823922198, "loss": 0.2004, "step": 233030 }, { "epoch": 9.65, "grad_norm": 1.6640625, "learning_rate": 0.0003828567955095586, "loss": 0.2017, "step": 233040 }, { "epoch": 9.65, "grad_norm": 0.4609375, "learning_rate": 0.00038284760837687526, "loss": 0.1815, "step": 233050 }, { "epoch": 9.65, "grad_norm": 0.439453125, "learning_rate": 0.0003828384209941871, "loss": 0.1861, "step": 233060 }, { "epoch": 9.65, "grad_norm": 0.77734375, "learning_rate": 0.00038282923336151127, "loss": 0.2248, "step": 233070 }, { "epoch": 9.65, "grad_norm": 0.72265625, "learning_rate": 0.00038282004547886516, "loss": 0.2234, "step": 233080 }, { "epoch": 9.65, "grad_norm": 0.78515625, "learning_rate": 0.00038281085734626617, "loss": 0.1547, "step": 233090 }, { "epoch": 9.65, "grad_norm": 0.60546875, "learning_rate": 0.00038280166896373147, "loss": 0.2226, "step": 233100 }, { "epoch": 9.66, "grad_norm": 0.8125, "learning_rate": 0.00038279248033127835, "loss": 0.1639, "step": 233110 }, { "epoch": 9.66, "grad_norm": 0.87109375, "learning_rate": 0.0003827832914489241, "loss": 0.2218, "step": 233120 }, { "epoch": 9.66, "grad_norm": 0.546875, "learning_rate": 0.00038277410231668605, "loss": 0.2431, "step": 233130 }, { "epoch": 9.66, "grad_norm": 1.0859375, "learning_rate": 0.0003827649129345815, "loss": 0.2283, "step": 233140 }, { "epoch": 9.66, "grad_norm": 0.65625, "learning_rate": 0.00038275572330262773, "loss": 0.242, "step": 233150 }, { "epoch": 9.66, "grad_norm": 0.60546875, "learning_rate": 0.000382746533420842, "loss": 0.1891, "step": 233160 }, { "epoch": 9.66, "grad_norm": 1.2578125, "learning_rate": 0.0003827373432892416, "loss": 0.1984, "step": 233170 }, { "epoch": 9.66, "grad_norm": 2.265625, "learning_rate": 0.0003827281529078439, "loss": 0.1763, "step": 233180 }, { "epoch": 9.66, "grad_norm": 0.625, "learning_rate": 0.00038271896227666616, "loss": 0.1735, "step": 233190 }, { "epoch": 9.66, "grad_norm": 0.859375, "learning_rate": 0.00038270977139572563, "loss": 0.1785, "step": 233200 }, { "epoch": 9.66, "grad_norm": 0.703125, "learning_rate": 0.00038270058026503966, "loss": 0.2119, "step": 233210 }, { "epoch": 9.66, "grad_norm": 1.4921875, "learning_rate": 0.0003826913888846255, "loss": 0.211, "step": 233220 }, { "epoch": 9.66, "grad_norm": 0.8125, "learning_rate": 0.00038268219725450046, "loss": 0.2123, "step": 233230 }, { "epoch": 9.66, "grad_norm": 0.75, "learning_rate": 0.0003826730053746819, "loss": 0.2008, "step": 233240 }, { "epoch": 9.66, "grad_norm": 0.53515625, "learning_rate": 0.00038266381324518706, "loss": 0.1892, "step": 233250 }, { "epoch": 9.66, "grad_norm": 0.51953125, "learning_rate": 0.0003826546208660333, "loss": 0.2115, "step": 233260 }, { "epoch": 9.66, "grad_norm": 0.61328125, "learning_rate": 0.0003826454282372378, "loss": 0.2281, "step": 233270 }, { "epoch": 9.66, "grad_norm": 0.828125, "learning_rate": 0.000382636235358818, "loss": 0.2206, "step": 233280 }, { "epoch": 9.66, "grad_norm": 0.7421875, "learning_rate": 0.00038262704223079104, "loss": 0.1976, "step": 233290 }, { "epoch": 9.66, "grad_norm": 0.423828125, "learning_rate": 0.0003826178488531743, "loss": 0.1947, "step": 233300 }, { "epoch": 9.66, "grad_norm": 0.7578125, "learning_rate": 0.0003826086552259852, "loss": 0.1918, "step": 233310 }, { "epoch": 9.66, "grad_norm": 0.7578125, "learning_rate": 0.00038259946134924083, "loss": 0.1701, "step": 233320 }, { "epoch": 9.66, "grad_norm": 1.1796875, "learning_rate": 0.00038259026722295864, "loss": 0.2241, "step": 233330 }, { "epoch": 9.66, "grad_norm": 1.109375, "learning_rate": 0.00038258107284715594, "loss": 0.2055, "step": 233340 }, { "epoch": 9.67, "grad_norm": 0.478515625, "learning_rate": 0.00038257187822184985, "loss": 0.1793, "step": 233350 }, { "epoch": 9.67, "grad_norm": 0.416015625, "learning_rate": 0.00038256268334705784, "loss": 0.2169, "step": 233360 }, { "epoch": 9.67, "grad_norm": 0.8203125, "learning_rate": 0.00038255348822279725, "loss": 0.1706, "step": 233370 }, { "epoch": 9.67, "grad_norm": 1.2734375, "learning_rate": 0.0003825442928490852, "loss": 0.1631, "step": 233380 }, { "epoch": 9.67, "grad_norm": 0.671875, "learning_rate": 0.0003825350972259392, "loss": 0.1679, "step": 233390 }, { "epoch": 9.67, "grad_norm": 0.47265625, "learning_rate": 0.0003825259013533764, "loss": 0.2081, "step": 233400 }, { "epoch": 9.67, "grad_norm": 1.0625, "learning_rate": 0.00038251670523141424, "loss": 0.2621, "step": 233410 }, { "epoch": 9.67, "grad_norm": 0.69140625, "learning_rate": 0.0003825075088600699, "loss": 0.1838, "step": 233420 }, { "epoch": 9.67, "grad_norm": 0.53515625, "learning_rate": 0.0003824983122393607, "loss": 0.1861, "step": 233430 }, { "epoch": 9.67, "grad_norm": 0.8125, "learning_rate": 0.000382489115369304, "loss": 0.2177, "step": 233440 }, { "epoch": 9.67, "grad_norm": 0.41796875, "learning_rate": 0.0003824799182499171, "loss": 0.2178, "step": 233450 }, { "epoch": 9.67, "grad_norm": 0.443359375, "learning_rate": 0.0003824707208812174, "loss": 0.1604, "step": 233460 }, { "epoch": 9.67, "grad_norm": 1.546875, "learning_rate": 0.00038246152326322204, "loss": 0.2458, "step": 233470 }, { "epoch": 9.67, "grad_norm": 0.6640625, "learning_rate": 0.00038245232539594834, "loss": 0.2526, "step": 233480 }, { "epoch": 9.67, "grad_norm": 0.7890625, "learning_rate": 0.00038244312727941373, "loss": 0.2225, "step": 233490 }, { "epoch": 9.67, "grad_norm": 0.1953125, "learning_rate": 0.00038243392891363547, "loss": 0.1841, "step": 233500 }, { "epoch": 9.67, "grad_norm": 0.72265625, "learning_rate": 0.0003824247302986308, "loss": 0.1961, "step": 233510 }, { "epoch": 9.67, "grad_norm": 1.359375, "learning_rate": 0.0003824155314344171, "loss": 0.174, "step": 233520 }, { "epoch": 9.67, "grad_norm": 0.4765625, "learning_rate": 0.00038240633232101165, "loss": 0.1124, "step": 233530 }, { "epoch": 9.67, "grad_norm": 0.6796875, "learning_rate": 0.00038239713295843184, "loss": 0.1953, "step": 233540 }, { "epoch": 9.67, "grad_norm": 0.94921875, "learning_rate": 0.0003823879333466949, "loss": 0.1719, "step": 233550 }, { "epoch": 9.67, "grad_norm": 0.84765625, "learning_rate": 0.0003823787334858182, "loss": 0.1899, "step": 233560 }, { "epoch": 9.67, "grad_norm": 0.546875, "learning_rate": 0.0003823695333758189, "loss": 0.1924, "step": 233570 }, { "epoch": 9.67, "grad_norm": 0.416015625, "learning_rate": 0.00038236033301671456, "loss": 0.1975, "step": 233580 }, { "epoch": 9.68, "grad_norm": 0.4765625, "learning_rate": 0.00038235113240852226, "loss": 0.1712, "step": 233590 }, { "epoch": 9.68, "grad_norm": 1.0, "learning_rate": 0.00038234193155125954, "loss": 0.2369, "step": 233600 }, { "epoch": 9.68, "grad_norm": 1.4296875, "learning_rate": 0.00038233273044494356, "loss": 0.2308, "step": 233610 }, { "epoch": 9.68, "grad_norm": 0.265625, "learning_rate": 0.0003823235290895917, "loss": 0.2157, "step": 233620 }, { "epoch": 9.68, "grad_norm": 0.6875, "learning_rate": 0.00038231432748522115, "loss": 0.1355, "step": 233630 }, { "epoch": 9.68, "grad_norm": 1.359375, "learning_rate": 0.00038230512563184937, "loss": 0.1829, "step": 233640 }, { "epoch": 9.68, "grad_norm": 0.73046875, "learning_rate": 0.0003822959235294936, "loss": 0.2015, "step": 233650 }, { "epoch": 9.68, "grad_norm": 0.72265625, "learning_rate": 0.0003822867211781713, "loss": 0.1728, "step": 233660 }, { "epoch": 9.68, "grad_norm": 1.0078125, "learning_rate": 0.0003822775185778996, "loss": 0.1922, "step": 233670 }, { "epoch": 9.68, "grad_norm": 0.7109375, "learning_rate": 0.0003822683157286959, "loss": 0.1529, "step": 233680 }, { "epoch": 9.68, "grad_norm": 0.63671875, "learning_rate": 0.0003822591126305774, "loss": 0.1895, "step": 233690 }, { "epoch": 9.68, "grad_norm": 0.984375, "learning_rate": 0.0003822499092835617, "loss": 0.1733, "step": 233700 }, { "epoch": 9.68, "grad_norm": 1.4140625, "learning_rate": 0.00038224070568766596, "loss": 0.1736, "step": 233710 }, { "epoch": 9.68, "grad_norm": 0.953125, "learning_rate": 0.00038223150184290737, "loss": 0.2141, "step": 233720 }, { "epoch": 9.68, "grad_norm": 0.74609375, "learning_rate": 0.00038222229774930344, "loss": 0.1567, "step": 233730 }, { "epoch": 9.68, "grad_norm": 0.283203125, "learning_rate": 0.0003822130934068715, "loss": 0.2165, "step": 233740 }, { "epoch": 9.68, "grad_norm": 1.2890625, "learning_rate": 0.00038220388881562864, "loss": 0.2182, "step": 233750 }, { "epoch": 9.68, "grad_norm": 0.8203125, "learning_rate": 0.00038219468397559245, "loss": 0.2053, "step": 233760 }, { "epoch": 9.68, "grad_norm": 0.703125, "learning_rate": 0.00038218547888678, "loss": 0.2134, "step": 233770 }, { "epoch": 9.68, "grad_norm": 0.8515625, "learning_rate": 0.0003821762735492089, "loss": 0.2011, "step": 233780 }, { "epoch": 9.68, "grad_norm": 1.4765625, "learning_rate": 0.00038216706796289637, "loss": 0.1913, "step": 233790 }, { "epoch": 9.68, "grad_norm": 0.9375, "learning_rate": 0.00038215786212785955, "loss": 0.2462, "step": 233800 }, { "epoch": 9.68, "grad_norm": 0.58203125, "learning_rate": 0.0003821486560441159, "loss": 0.1953, "step": 233810 }, { "epoch": 9.68, "grad_norm": 1.03125, "learning_rate": 0.0003821394497116829, "loss": 0.211, "step": 233820 }, { "epoch": 9.69, "grad_norm": 0.625, "learning_rate": 0.0003821302431305775, "loss": 0.1964, "step": 233830 }, { "epoch": 9.69, "grad_norm": 0.9140625, "learning_rate": 0.00038212103630081746, "loss": 0.2074, "step": 233840 }, { "epoch": 9.69, "grad_norm": 0.09716796875, "learning_rate": 0.0003821118292224198, "loss": 0.1659, "step": 233850 }, { "epoch": 9.69, "grad_norm": 0.310546875, "learning_rate": 0.000382102621895402, "loss": 0.1766, "step": 233860 }, { "epoch": 9.69, "grad_norm": 0.23828125, "learning_rate": 0.0003820934143197813, "loss": 0.1721, "step": 233870 }, { "epoch": 9.69, "grad_norm": 0.478515625, "learning_rate": 0.00038208420649557496, "loss": 0.2388, "step": 233880 }, { "epoch": 9.69, "grad_norm": 0.87109375, "learning_rate": 0.0003820749984228006, "loss": 0.1701, "step": 233890 }, { "epoch": 9.69, "grad_norm": 0.5234375, "learning_rate": 0.00038206579010147525, "loss": 0.2055, "step": 233900 }, { "epoch": 9.69, "grad_norm": 1.15625, "learning_rate": 0.0003820565815316163, "loss": 0.1939, "step": 233910 }, { "epoch": 9.69, "grad_norm": 0.271484375, "learning_rate": 0.00038204737271324123, "loss": 0.2628, "step": 233920 }, { "epoch": 9.69, "grad_norm": 0.578125, "learning_rate": 0.00038203816364636725, "loss": 0.2173, "step": 233930 }, { "epoch": 9.69, "grad_norm": 0.01220703125, "learning_rate": 0.0003820289543310116, "loss": 0.2436, "step": 233940 }, { "epoch": 9.69, "grad_norm": 1.0546875, "learning_rate": 0.0003820197447671918, "loss": 0.1562, "step": 233950 }, { "epoch": 9.69, "grad_norm": 0.8046875, "learning_rate": 0.00038201053495492504, "loss": 0.1991, "step": 233960 }, { "epoch": 9.69, "grad_norm": 0.484375, "learning_rate": 0.0003820013248942288, "loss": 0.1788, "step": 233970 }, { "epoch": 9.69, "grad_norm": 0.48828125, "learning_rate": 0.0003819921145851203, "loss": 0.1496, "step": 233980 }, { "epoch": 9.69, "grad_norm": 0.7890625, "learning_rate": 0.0003819829040276168, "loss": 0.1593, "step": 233990 }, { "epoch": 9.69, "grad_norm": 1.2265625, "learning_rate": 0.0003819736932217358, "loss": 0.1646, "step": 234000 }, { "epoch": 9.69, "grad_norm": 0.97265625, "learning_rate": 0.00038196448216749456, "loss": 0.1752, "step": 234010 }, { "epoch": 9.69, "grad_norm": 0.94921875, "learning_rate": 0.00038195527086491043, "loss": 0.2437, "step": 234020 }, { "epoch": 9.69, "grad_norm": 0.76171875, "learning_rate": 0.00038194605931400074, "loss": 0.1755, "step": 234030 }, { "epoch": 9.69, "grad_norm": 0.76953125, "learning_rate": 0.00038193684751478273, "loss": 0.1719, "step": 234040 }, { "epoch": 9.69, "grad_norm": 0.482421875, "learning_rate": 0.0003819276354672739, "loss": 0.2112, "step": 234050 }, { "epoch": 9.69, "grad_norm": 0.9296875, "learning_rate": 0.0003819184231714915, "loss": 0.1951, "step": 234060 }, { "epoch": 9.7, "grad_norm": 0.328125, "learning_rate": 0.00038190921062745286, "loss": 0.2609, "step": 234070 }, { "epoch": 9.7, "grad_norm": 1.734375, "learning_rate": 0.0003818999978351754, "loss": 0.1917, "step": 234080 }, { "epoch": 9.7, "grad_norm": 0.7109375, "learning_rate": 0.0003818907847946762, "loss": 0.2182, "step": 234090 }, { "epoch": 9.7, "grad_norm": 1.328125, "learning_rate": 0.0003818815715059729, "loss": 0.1966, "step": 234100 }, { "epoch": 9.7, "grad_norm": 1.1484375, "learning_rate": 0.0003818723579690828, "loss": 0.1565, "step": 234110 }, { "epoch": 9.7, "grad_norm": 0.0, "learning_rate": 0.00038186314418402304, "loss": 0.2163, "step": 234120 }, { "epoch": 9.7, "grad_norm": 0.734375, "learning_rate": 0.00038185393015081117, "loss": 0.1873, "step": 234130 }, { "epoch": 9.7, "grad_norm": 0.6015625, "learning_rate": 0.00038184471586946435, "loss": 0.1369, "step": 234140 }, { "epoch": 9.7, "grad_norm": 0.5859375, "learning_rate": 0.00038183550134000013, "loss": 0.1804, "step": 234150 }, { "epoch": 9.7, "grad_norm": 0.9140625, "learning_rate": 0.00038182628656243566, "loss": 0.1728, "step": 234160 }, { "epoch": 9.7, "grad_norm": 1.515625, "learning_rate": 0.00038181707153678833, "loss": 0.1832, "step": 234170 }, { "epoch": 9.7, "grad_norm": 0.95703125, "learning_rate": 0.0003818078562630756, "loss": 0.1722, "step": 234180 }, { "epoch": 9.7, "grad_norm": 0.71484375, "learning_rate": 0.0003817986407413146, "loss": 0.2181, "step": 234190 }, { "epoch": 9.7, "grad_norm": 0.52734375, "learning_rate": 0.0003817894249715228, "loss": 0.1812, "step": 234200 }, { "epoch": 9.7, "grad_norm": 0.6796875, "learning_rate": 0.0003817802089537177, "loss": 0.1943, "step": 234210 }, { "epoch": 9.7, "grad_norm": 0.9609375, "learning_rate": 0.0003817709926879163, "loss": 0.1955, "step": 234220 }, { "epoch": 9.7, "grad_norm": 0.255859375, "learning_rate": 0.0003817617761741362, "loss": 0.2231, "step": 234230 }, { "epoch": 9.7, "grad_norm": 0.353515625, "learning_rate": 0.0003817525594123947, "loss": 0.196, "step": 234240 }, { "epoch": 9.7, "grad_norm": 0.4609375, "learning_rate": 0.00038174334240270903, "loss": 0.1893, "step": 234250 }, { "epoch": 9.7, "grad_norm": 0.66015625, "learning_rate": 0.00038173412514509665, "loss": 0.1922, "step": 234260 }, { "epoch": 9.7, "grad_norm": 3.015625, "learning_rate": 0.00038172490763957483, "loss": 0.2179, "step": 234270 }, { "epoch": 9.7, "grad_norm": 0.63671875, "learning_rate": 0.00038171568988616103, "loss": 0.2057, "step": 234280 }, { "epoch": 9.7, "grad_norm": 0.66015625, "learning_rate": 0.00038170647188487253, "loss": 0.2127, "step": 234290 }, { "epoch": 9.7, "grad_norm": 0.94140625, "learning_rate": 0.0003816972536357266, "loss": 0.2602, "step": 234300 }, { "epoch": 9.71, "grad_norm": 2.125, "learning_rate": 0.00038168803513874075, "loss": 0.1862, "step": 234310 }, { "epoch": 9.71, "grad_norm": 0.484375, "learning_rate": 0.0003816788163939322, "loss": 0.2242, "step": 234320 }, { "epoch": 9.71, "grad_norm": 1.0703125, "learning_rate": 0.0003816695974013183, "loss": 0.2035, "step": 234330 }, { "epoch": 9.71, "grad_norm": 0.86328125, "learning_rate": 0.00038166037816091646, "loss": 0.1822, "step": 234340 }, { "epoch": 9.71, "grad_norm": 1.6953125, "learning_rate": 0.000381651158672744, "loss": 0.211, "step": 234350 }, { "epoch": 9.71, "grad_norm": 0.408203125, "learning_rate": 0.00038164193893681827, "loss": 0.2136, "step": 234360 }, { "epoch": 9.71, "grad_norm": 0.546875, "learning_rate": 0.00038163271895315663, "loss": 0.1994, "step": 234370 }, { "epoch": 9.71, "grad_norm": 0.455078125, "learning_rate": 0.0003816234987217765, "loss": 0.2097, "step": 234380 }, { "epoch": 9.71, "grad_norm": 0.515625, "learning_rate": 0.00038161427824269506, "loss": 0.1652, "step": 234390 }, { "epoch": 9.71, "grad_norm": 0.4609375, "learning_rate": 0.00038160505751592977, "loss": 0.2158, "step": 234400 }, { "epoch": 9.71, "grad_norm": 0.2734375, "learning_rate": 0.0003815958365414981, "loss": 0.2033, "step": 234410 }, { "epoch": 9.71, "grad_norm": 0.95703125, "learning_rate": 0.0003815866153194171, "loss": 0.1849, "step": 234420 }, { "epoch": 9.71, "grad_norm": 0.56640625, "learning_rate": 0.0003815773938497044, "loss": 0.2012, "step": 234430 }, { "epoch": 9.71, "grad_norm": 0.71484375, "learning_rate": 0.00038156817213237724, "loss": 0.1775, "step": 234440 }, { "epoch": 9.71, "grad_norm": 1.546875, "learning_rate": 0.000381558950167453, "loss": 0.204, "step": 234450 }, { "epoch": 9.71, "grad_norm": 1.8125, "learning_rate": 0.00038154972795494905, "loss": 0.1895, "step": 234460 }, { "epoch": 9.71, "grad_norm": 0.0283203125, "learning_rate": 0.00038154050549488264, "loss": 0.1525, "step": 234470 }, { "epoch": 9.71, "grad_norm": 0.76171875, "learning_rate": 0.00038153128278727125, "loss": 0.1837, "step": 234480 }, { "epoch": 9.71, "grad_norm": 1.015625, "learning_rate": 0.00038152205983213217, "loss": 0.171, "step": 234490 }, { "epoch": 9.71, "grad_norm": 1.15625, "learning_rate": 0.0003815128366294828, "loss": 0.2157, "step": 234500 }, { "epoch": 9.71, "grad_norm": 0.94140625, "learning_rate": 0.00038150361317934047, "loss": 0.2328, "step": 234510 }, { "epoch": 9.71, "grad_norm": 1.125, "learning_rate": 0.0003814943894817225, "loss": 0.2552, "step": 234520 }, { "epoch": 9.71, "grad_norm": 0.4921875, "learning_rate": 0.0003814851655366464, "loss": 0.2405, "step": 234530 }, { "epoch": 9.71, "grad_norm": 0.69921875, "learning_rate": 0.00038147594134412926, "loss": 0.2146, "step": 234540 }, { "epoch": 9.72, "grad_norm": 1.546875, "learning_rate": 0.00038146671690418864, "loss": 0.1704, "step": 234550 }, { "epoch": 9.72, "grad_norm": 1.8515625, "learning_rate": 0.0003814574922168419, "loss": 0.1982, "step": 234560 }, { "epoch": 9.72, "grad_norm": 0.8125, "learning_rate": 0.00038144826728210633, "loss": 0.1947, "step": 234570 }, { "epoch": 9.72, "grad_norm": 1.125, "learning_rate": 0.00038143904209999934, "loss": 0.2532, "step": 234580 }, { "epoch": 9.72, "grad_norm": 0.7265625, "learning_rate": 0.00038142981667053835, "loss": 0.2523, "step": 234590 }, { "epoch": 9.72, "grad_norm": 0.8984375, "learning_rate": 0.0003814205909937405, "loss": 0.1713, "step": 234600 }, { "epoch": 9.72, "grad_norm": 0.98046875, "learning_rate": 0.0003814113650696233, "loss": 0.1703, "step": 234610 }, { "epoch": 9.72, "grad_norm": 0.640625, "learning_rate": 0.00038140213889820417, "loss": 0.1856, "step": 234620 }, { "epoch": 9.72, "grad_norm": 0.40625, "learning_rate": 0.00038139291247950036, "loss": 0.2106, "step": 234630 }, { "epoch": 9.72, "grad_norm": 0.66015625, "learning_rate": 0.00038138368581352934, "loss": 0.1808, "step": 234640 }, { "epoch": 9.72, "grad_norm": 0.5546875, "learning_rate": 0.00038137445890030835, "loss": 0.2039, "step": 234650 }, { "epoch": 9.72, "grad_norm": 0.7109375, "learning_rate": 0.00038136523173985484, "loss": 0.1486, "step": 234660 }, { "epoch": 9.72, "grad_norm": 0.578125, "learning_rate": 0.0003813560043321861, "loss": 0.1933, "step": 234670 }, { "epoch": 9.72, "grad_norm": 0.41796875, "learning_rate": 0.0003813467766773196, "loss": 0.1796, "step": 234680 }, { "epoch": 9.72, "grad_norm": 0.875, "learning_rate": 0.0003813375487752726, "loss": 0.2328, "step": 234690 }, { "epoch": 9.72, "grad_norm": 1.4453125, "learning_rate": 0.0003813283206260626, "loss": 0.1691, "step": 234700 }, { "epoch": 9.72, "grad_norm": 0.578125, "learning_rate": 0.00038131909222970686, "loss": 0.1869, "step": 234710 }, { "epoch": 9.72, "grad_norm": 0.640625, "learning_rate": 0.00038130986358622264, "loss": 0.2123, "step": 234720 }, { "epoch": 9.72, "grad_norm": 0.32421875, "learning_rate": 0.00038130063469562765, "loss": 0.187, "step": 234730 }, { "epoch": 9.72, "grad_norm": 2.3125, "learning_rate": 0.00038129140555793887, "loss": 0.1914, "step": 234740 }, { "epoch": 9.72, "grad_norm": 0.69140625, "learning_rate": 0.00038128217617317396, "loss": 0.2049, "step": 234750 }, { "epoch": 9.72, "grad_norm": 0.78515625, "learning_rate": 0.00038127294654135007, "loss": 0.2138, "step": 234760 }, { "epoch": 9.72, "grad_norm": 1.109375, "learning_rate": 0.00038126371666248474, "loss": 0.2351, "step": 234770 }, { "epoch": 9.72, "grad_norm": 0.3671875, "learning_rate": 0.0003812544865365953, "loss": 0.2181, "step": 234780 }, { "epoch": 9.72, "grad_norm": 0.5078125, "learning_rate": 0.000381245256163699, "loss": 0.1775, "step": 234790 }, { "epoch": 9.73, "grad_norm": 0.326171875, "learning_rate": 0.0003812360255438133, "loss": 0.2067, "step": 234800 }, { "epoch": 9.73, "grad_norm": 0.484375, "learning_rate": 0.0003812267946769557, "loss": 0.2067, "step": 234810 }, { "epoch": 9.73, "grad_norm": 1.2890625, "learning_rate": 0.0003812175635631433, "loss": 0.2212, "step": 234820 }, { "epoch": 9.73, "grad_norm": 0.42578125, "learning_rate": 0.0003812083322023937, "loss": 0.1988, "step": 234830 }, { "epoch": 9.73, "grad_norm": 0.004638671875, "learning_rate": 0.00038119910059472415, "loss": 0.139, "step": 234840 }, { "epoch": 9.73, "grad_norm": 1.34375, "learning_rate": 0.00038118986874015207, "loss": 0.2136, "step": 234850 }, { "epoch": 9.73, "grad_norm": 2.328125, "learning_rate": 0.00038118063663869484, "loss": 0.1823, "step": 234860 }, { "epoch": 9.73, "grad_norm": 2.015625, "learning_rate": 0.00038117140429036976, "loss": 0.2296, "step": 234870 }, { "epoch": 9.73, "grad_norm": 0.90234375, "learning_rate": 0.00038116217169519435, "loss": 0.1945, "step": 234880 }, { "epoch": 9.73, "grad_norm": 0.69921875, "learning_rate": 0.0003811529388531859, "loss": 0.1959, "step": 234890 }, { "epoch": 9.73, "grad_norm": 0.328125, "learning_rate": 0.0003811437057643617, "loss": 0.2214, "step": 234900 }, { "epoch": 9.73, "grad_norm": 0.7421875, "learning_rate": 0.0003811344724287393, "loss": 0.1687, "step": 234910 }, { "epoch": 9.73, "grad_norm": 0.337890625, "learning_rate": 0.0003811252388463359, "loss": 0.1858, "step": 234920 }, { "epoch": 9.73, "grad_norm": 0.4921875, "learning_rate": 0.000381116005017169, "loss": 0.2075, "step": 234930 }, { "epoch": 9.73, "grad_norm": 1.09375, "learning_rate": 0.0003811067709412559, "loss": 0.1537, "step": 234940 }, { "epoch": 9.73, "grad_norm": 1.421875, "learning_rate": 0.000381097536618614, "loss": 0.2052, "step": 234950 }, { "epoch": 9.73, "grad_norm": 1.3046875, "learning_rate": 0.00038108830204926084, "loss": 0.1733, "step": 234960 }, { "epoch": 9.73, "grad_norm": 1.21875, "learning_rate": 0.0003810790672332135, "loss": 0.1645, "step": 234970 }, { "epoch": 9.73, "grad_norm": 0.306640625, "learning_rate": 0.0003810698321704895, "loss": 0.1624, "step": 234980 }, { "epoch": 9.73, "grad_norm": 1.3515625, "learning_rate": 0.0003810605968611064, "loss": 0.1658, "step": 234990 }, { "epoch": 9.73, "grad_norm": 1.4140625, "learning_rate": 0.0003810513613050812, "loss": 0.1922, "step": 235000 }, { "epoch": 9.73, "grad_norm": 1.21875, "learning_rate": 0.0003810421255024317, "loss": 0.2554, "step": 235010 }, { "epoch": 9.73, "grad_norm": 0.91796875, "learning_rate": 0.0003810328894531749, "loss": 0.1863, "step": 235020 }, { "epoch": 9.73, "grad_norm": 0.60546875, "learning_rate": 0.0003810236531573284, "loss": 0.2077, "step": 235030 }, { "epoch": 9.74, "grad_norm": 0.8359375, "learning_rate": 0.00038101441661490956, "loss": 0.1563, "step": 235040 }, { "epoch": 9.74, "grad_norm": 0.427734375, "learning_rate": 0.00038100517982593573, "loss": 0.1917, "step": 235050 }, { "epoch": 9.74, "grad_norm": 0.609375, "learning_rate": 0.00038099594279042426, "loss": 0.1734, "step": 235060 }, { "epoch": 9.74, "grad_norm": 0.78515625, "learning_rate": 0.00038098670550839263, "loss": 0.1583, "step": 235070 }, { "epoch": 9.74, "grad_norm": 1.0078125, "learning_rate": 0.00038097746797985816, "loss": 0.1339, "step": 235080 }, { "epoch": 9.74, "grad_norm": 0.458984375, "learning_rate": 0.00038096823020483824, "loss": 0.1929, "step": 235090 }, { "epoch": 9.74, "grad_norm": 1.71875, "learning_rate": 0.00038095899218335027, "loss": 0.1886, "step": 235100 }, { "epoch": 9.74, "grad_norm": 1.0859375, "learning_rate": 0.00038094975391541155, "loss": 0.1662, "step": 235110 }, { "epoch": 9.74, "grad_norm": 0.421875, "learning_rate": 0.0003809405154010396, "loss": 0.209, "step": 235120 }, { "epoch": 9.74, "grad_norm": 1.4765625, "learning_rate": 0.0003809312766402516, "loss": 0.2073, "step": 235130 }, { "epoch": 9.74, "grad_norm": 0.98046875, "learning_rate": 0.0003809220376330652, "loss": 0.2279, "step": 235140 }, { "epoch": 9.74, "grad_norm": 0.51171875, "learning_rate": 0.00038091279837949773, "loss": 0.1869, "step": 235150 }, { "epoch": 9.74, "grad_norm": 0.63671875, "learning_rate": 0.0003809035588795664, "loss": 0.1525, "step": 235160 }, { "epoch": 9.74, "grad_norm": 0.7421875, "learning_rate": 0.00038089431913328875, "loss": 0.195, "step": 235170 }, { "epoch": 9.74, "grad_norm": 1.1953125, "learning_rate": 0.00038088507914068215, "loss": 0.1842, "step": 235180 }, { "epoch": 9.74, "grad_norm": 1.359375, "learning_rate": 0.00038087583890176384, "loss": 0.2085, "step": 235190 }, { "epoch": 9.74, "grad_norm": 1.2578125, "learning_rate": 0.00038086659841655144, "loss": 0.1958, "step": 235200 }, { "epoch": 9.74, "grad_norm": 0.84765625, "learning_rate": 0.00038085735768506225, "loss": 0.2446, "step": 235210 }, { "epoch": 9.74, "grad_norm": 1.4765625, "learning_rate": 0.0003808481167073136, "loss": 0.1618, "step": 235220 }, { "epoch": 9.74, "grad_norm": 1.5625, "learning_rate": 0.000380838875483323, "loss": 0.2073, "step": 235230 }, { "epoch": 9.74, "grad_norm": 0.91015625, "learning_rate": 0.00038082963401310765, "loss": 0.1988, "step": 235240 }, { "epoch": 9.74, "grad_norm": 0.51953125, "learning_rate": 0.00038082039229668513, "loss": 0.1933, "step": 235250 }, { "epoch": 9.74, "grad_norm": 0.625, "learning_rate": 0.00038081115033407277, "loss": 0.2081, "step": 235260 }, { "epoch": 9.74, "grad_norm": 0.5703125, "learning_rate": 0.00038080190812528786, "loss": 0.1751, "step": 235270 }, { "epoch": 9.75, "grad_norm": 1.7734375, "learning_rate": 0.00038079266567034797, "loss": 0.2231, "step": 235280 }, { "epoch": 9.75, "grad_norm": 0.61328125, "learning_rate": 0.0003807834229692704, "loss": 0.2798, "step": 235290 }, { "epoch": 9.75, "grad_norm": 0.78125, "learning_rate": 0.0003807741800220725, "loss": 0.1687, "step": 235300 }, { "epoch": 9.75, "grad_norm": 0.66015625, "learning_rate": 0.0003807649368287718, "loss": 0.1541, "step": 235310 }, { "epoch": 9.75, "grad_norm": 0.74609375, "learning_rate": 0.00038075569338938555, "loss": 0.2463, "step": 235320 }, { "epoch": 9.75, "grad_norm": 0.58984375, "learning_rate": 0.0003807464497039312, "loss": 0.2401, "step": 235330 }, { "epoch": 9.75, "grad_norm": 0.81640625, "learning_rate": 0.0003807372057724262, "loss": 0.2261, "step": 235340 }, { "epoch": 9.75, "grad_norm": 0.66796875, "learning_rate": 0.00038072796159488784, "loss": 0.164, "step": 235350 }, { "epoch": 9.75, "grad_norm": 0.52734375, "learning_rate": 0.00038071871717133365, "loss": 0.2237, "step": 235360 }, { "epoch": 9.75, "grad_norm": 0.8671875, "learning_rate": 0.0003807094725017809, "loss": 0.2242, "step": 235370 }, { "epoch": 9.75, "grad_norm": 0.8359375, "learning_rate": 0.00038070022758624703, "loss": 0.2706, "step": 235380 }, { "epoch": 9.75, "grad_norm": 0.6328125, "learning_rate": 0.0003806909824247494, "loss": 0.1953, "step": 235390 }, { "epoch": 9.75, "grad_norm": 0.390625, "learning_rate": 0.0003806817370173056, "loss": 0.2086, "step": 235400 }, { "epoch": 9.75, "grad_norm": 0.83984375, "learning_rate": 0.0003806724913639327, "loss": 0.2122, "step": 235410 }, { "epoch": 9.75, "grad_norm": 0.58203125, "learning_rate": 0.00038066324546464844, "loss": 0.2127, "step": 235420 }, { "epoch": 9.75, "grad_norm": 1.3828125, "learning_rate": 0.00038065399931946995, "loss": 0.1937, "step": 235430 }, { "epoch": 9.75, "grad_norm": 0.53125, "learning_rate": 0.00038064475292841484, "loss": 0.1922, "step": 235440 }, { "epoch": 9.75, "grad_norm": 0.99609375, "learning_rate": 0.00038063550629150036, "loss": 0.2171, "step": 235450 }, { "epoch": 9.75, "grad_norm": 1.046875, "learning_rate": 0.0003806262594087439, "loss": 0.2128, "step": 235460 }, { "epoch": 9.75, "grad_norm": 0.58984375, "learning_rate": 0.00038061701228016303, "loss": 0.2353, "step": 235470 }, { "epoch": 9.75, "grad_norm": 0.71875, "learning_rate": 0.000380607764905775, "loss": 0.1953, "step": 235480 }, { "epoch": 9.75, "grad_norm": 0.8359375, "learning_rate": 0.00038059851728559723, "loss": 0.213, "step": 235490 }, { "epoch": 9.75, "grad_norm": 0.53125, "learning_rate": 0.0003805892694196472, "loss": 0.1959, "step": 235500 }, { "epoch": 9.75, "grad_norm": 0.83984375, "learning_rate": 0.0003805800213079423, "loss": 0.2464, "step": 235510 }, { "epoch": 9.76, "grad_norm": 0.71875, "learning_rate": 0.00038057077295049993, "loss": 0.2156, "step": 235520 }, { "epoch": 9.76, "grad_norm": 0.42578125, "learning_rate": 0.0003805615243473374, "loss": 0.1877, "step": 235530 }, { "epoch": 9.76, "grad_norm": 0.71484375, "learning_rate": 0.00038055227549847216, "loss": 0.1904, "step": 235540 }, { "epoch": 9.76, "grad_norm": 0.42578125, "learning_rate": 0.0003805430264039217, "loss": 0.1854, "step": 235550 }, { "epoch": 9.76, "grad_norm": 0.80078125, "learning_rate": 0.00038053377706370327, "loss": 0.2214, "step": 235560 }, { "epoch": 9.76, "grad_norm": 0.62890625, "learning_rate": 0.0003805245274778344, "loss": 0.2401, "step": 235570 }, { "epoch": 9.76, "grad_norm": 0.9140625, "learning_rate": 0.0003805152776463324, "loss": 0.1757, "step": 235580 }, { "epoch": 9.76, "grad_norm": 0.890625, "learning_rate": 0.0003805060275692149, "loss": 0.1828, "step": 235590 }, { "epoch": 9.76, "grad_norm": 0.765625, "learning_rate": 0.0003804967772464991, "loss": 0.1686, "step": 235600 }, { "epoch": 9.76, "grad_norm": 0.6796875, "learning_rate": 0.0003804875266782024, "loss": 0.1369, "step": 235610 }, { "epoch": 9.76, "grad_norm": 1.1015625, "learning_rate": 0.0003804782758643423, "loss": 0.1863, "step": 235620 }, { "epoch": 9.76, "grad_norm": 0.416015625, "learning_rate": 0.0003804690248049362, "loss": 0.1772, "step": 235630 }, { "epoch": 9.76, "grad_norm": 0.4921875, "learning_rate": 0.00038045977350000145, "loss": 0.1658, "step": 235640 }, { "epoch": 9.76, "grad_norm": 0.96875, "learning_rate": 0.00038045052194955547, "loss": 0.1998, "step": 235650 }, { "epoch": 9.76, "grad_norm": 0.91796875, "learning_rate": 0.00038044127015361576, "loss": 0.2063, "step": 235660 }, { "epoch": 9.76, "grad_norm": 1.203125, "learning_rate": 0.0003804320181121996, "loss": 0.1767, "step": 235670 }, { "epoch": 9.76, "grad_norm": 1.421875, "learning_rate": 0.00038042276582532454, "loss": 0.1983, "step": 235680 }, { "epoch": 9.76, "grad_norm": 0.68359375, "learning_rate": 0.0003804135132930079, "loss": 0.1991, "step": 235690 }, { "epoch": 9.76, "grad_norm": 0.84375, "learning_rate": 0.00038040426051526706, "loss": 0.2206, "step": 235700 }, { "epoch": 9.76, "grad_norm": 0.87109375, "learning_rate": 0.0003803950074921195, "loss": 0.1735, "step": 235710 }, { "epoch": 9.76, "grad_norm": 0.2138671875, "learning_rate": 0.0003803857542235826, "loss": 0.187, "step": 235720 }, { "epoch": 9.76, "grad_norm": 0.341796875, "learning_rate": 0.0003803765007096739, "loss": 0.1719, "step": 235730 }, { "epoch": 9.76, "grad_norm": 0.5546875, "learning_rate": 0.0003803672469504106, "loss": 0.2127, "step": 235740 }, { "epoch": 9.76, "grad_norm": 0.76953125, "learning_rate": 0.0003803579929458102, "loss": 0.1976, "step": 235750 }, { "epoch": 9.77, "grad_norm": 0.373046875, "learning_rate": 0.00038034873869589023, "loss": 0.2109, "step": 235760 }, { "epoch": 9.77, "grad_norm": 0.8046875, "learning_rate": 0.000380339484200668, "loss": 0.2141, "step": 235770 }, { "epoch": 9.77, "grad_norm": 0.75390625, "learning_rate": 0.00038033022946016085, "loss": 0.1823, "step": 235780 }, { "epoch": 9.77, "grad_norm": 0.73828125, "learning_rate": 0.0003803209744743863, "loss": 0.1889, "step": 235790 }, { "epoch": 9.77, "grad_norm": 1.953125, "learning_rate": 0.00038031171924336185, "loss": 0.2668, "step": 235800 }, { "epoch": 9.77, "grad_norm": 0.6796875, "learning_rate": 0.00038030246376710475, "loss": 0.1566, "step": 235810 }, { "epoch": 9.77, "grad_norm": 0.90625, "learning_rate": 0.00038029320804563245, "loss": 0.2969, "step": 235820 }, { "epoch": 9.77, "grad_norm": 1.3828125, "learning_rate": 0.0003802839520789624, "loss": 0.1608, "step": 235830 }, { "epoch": 9.77, "grad_norm": 0.7734375, "learning_rate": 0.0003802746958671121, "loss": 0.2009, "step": 235840 }, { "epoch": 9.77, "grad_norm": 1.328125, "learning_rate": 0.0003802654394100988, "loss": 0.1944, "step": 235850 }, { "epoch": 9.77, "grad_norm": 1.6875, "learning_rate": 0.0003802561827079401, "loss": 0.1844, "step": 235860 }, { "epoch": 9.77, "grad_norm": 0.765625, "learning_rate": 0.0003802469257606533, "loss": 0.1779, "step": 235870 }, { "epoch": 9.77, "grad_norm": 1.796875, "learning_rate": 0.00038023766856825584, "loss": 0.2125, "step": 235880 }, { "epoch": 9.77, "grad_norm": 1.7421875, "learning_rate": 0.00038022841113076515, "loss": 0.219, "step": 235890 }, { "epoch": 9.77, "grad_norm": 0.74609375, "learning_rate": 0.0003802191534481987, "loss": 0.2397, "step": 235900 }, { "epoch": 9.77, "grad_norm": 0.6328125, "learning_rate": 0.00038020989552057377, "loss": 0.2552, "step": 235910 }, { "epoch": 9.77, "grad_norm": 0.470703125, "learning_rate": 0.0003802006373479079, "loss": 0.2063, "step": 235920 }, { "epoch": 9.77, "grad_norm": 0.65625, "learning_rate": 0.0003801913789302186, "loss": 0.2219, "step": 235930 }, { "epoch": 9.77, "grad_norm": 0.72265625, "learning_rate": 0.00038018212026752306, "loss": 0.1789, "step": 235940 }, { "epoch": 9.77, "grad_norm": 0.77734375, "learning_rate": 0.0003801728613598389, "loss": 0.2193, "step": 235950 }, { "epoch": 9.77, "grad_norm": 1.1015625, "learning_rate": 0.00038016360220718336, "loss": 0.1992, "step": 235960 }, { "epoch": 9.77, "grad_norm": 0.59375, "learning_rate": 0.0003801543428095742, "loss": 0.2095, "step": 235970 }, { "epoch": 9.77, "grad_norm": 0.515625, "learning_rate": 0.00038014508316702847, "loss": 0.1858, "step": 235980 }, { "epoch": 9.77, "grad_norm": 0.96875, "learning_rate": 0.0003801358232795637, "loss": 0.1797, "step": 235990 }, { "epoch": 9.78, "grad_norm": 0.60546875, "learning_rate": 0.0003801265631471974, "loss": 0.19, "step": 236000 }, { "epoch": 9.78, "grad_norm": 1.0078125, "learning_rate": 0.00038011730276994705, "loss": 0.2076, "step": 236010 }, { "epoch": 9.78, "grad_norm": 0.59765625, "learning_rate": 0.0003801080421478299, "loss": 0.2298, "step": 236020 }, { "epoch": 9.78, "grad_norm": 0.59375, "learning_rate": 0.0003800987812808635, "loss": 0.1866, "step": 236030 }, { "epoch": 9.78, "grad_norm": 0.5546875, "learning_rate": 0.00038008952016906517, "loss": 0.1815, "step": 236040 }, { "epoch": 9.78, "grad_norm": 0.37109375, "learning_rate": 0.00038008025881245257, "loss": 0.1563, "step": 236050 }, { "epoch": 9.78, "grad_norm": 0.7265625, "learning_rate": 0.0003800709972110429, "loss": 0.1717, "step": 236060 }, { "epoch": 9.78, "grad_norm": 0.80859375, "learning_rate": 0.00038006173536485354, "loss": 0.1736, "step": 236070 }, { "epoch": 9.78, "grad_norm": 0.69140625, "learning_rate": 0.00038005247327390214, "loss": 0.2169, "step": 236080 }, { "epoch": 9.78, "grad_norm": 0.2470703125, "learning_rate": 0.000380043210938206, "loss": 0.1805, "step": 236090 }, { "epoch": 9.78, "grad_norm": 0.97265625, "learning_rate": 0.0003800339483577827, "loss": 0.2317, "step": 236100 }, { "epoch": 9.78, "grad_norm": 0.490234375, "learning_rate": 0.0003800246855326494, "loss": 0.1676, "step": 236110 }, { "epoch": 9.78, "grad_norm": 1.8515625, "learning_rate": 0.0003800154224628237, "loss": 0.1685, "step": 236120 }, { "epoch": 9.78, "grad_norm": 1.9140625, "learning_rate": 0.0003800061591483231, "loss": 0.215, "step": 236130 }, { "epoch": 9.78, "grad_norm": 0.72265625, "learning_rate": 0.00037999689558916496, "loss": 0.1948, "step": 236140 }, { "epoch": 9.78, "grad_norm": 0.259765625, "learning_rate": 0.0003799876317853666, "loss": 0.1768, "step": 236150 }, { "epoch": 9.78, "grad_norm": 0.78515625, "learning_rate": 0.0003799783677369456, "loss": 0.1625, "step": 236160 }, { "epoch": 9.78, "grad_norm": 0.37109375, "learning_rate": 0.00037996910344391936, "loss": 0.2015, "step": 236170 }, { "epoch": 9.78, "grad_norm": 0.197265625, "learning_rate": 0.00037995983890630523, "loss": 0.1851, "step": 236180 }, { "epoch": 9.78, "grad_norm": 1.1015625, "learning_rate": 0.0003799505741241208, "loss": 0.1921, "step": 236190 }, { "epoch": 9.78, "grad_norm": 0.4765625, "learning_rate": 0.00037994130909738344, "loss": 0.1832, "step": 236200 }, { "epoch": 9.78, "grad_norm": 0.76953125, "learning_rate": 0.00037993204382611047, "loss": 0.2732, "step": 236210 }, { "epoch": 9.78, "grad_norm": 0.423828125, "learning_rate": 0.0003799227783103195, "loss": 0.1822, "step": 236220 }, { "epoch": 9.78, "grad_norm": 1.703125, "learning_rate": 0.0003799135125500279, "loss": 0.1918, "step": 236230 }, { "epoch": 9.79, "grad_norm": 1.265625, "learning_rate": 0.0003799042465452531, "loss": 0.1831, "step": 236240 }, { "epoch": 9.79, "grad_norm": 1.3125, "learning_rate": 0.00037989498029601253, "loss": 0.2179, "step": 236250 }, { "epoch": 9.79, "grad_norm": 1.375, "learning_rate": 0.00037988571380232356, "loss": 0.1839, "step": 236260 }, { "epoch": 9.79, "grad_norm": 0.4140625, "learning_rate": 0.00037987644706420377, "loss": 0.1781, "step": 236270 }, { "epoch": 9.79, "grad_norm": 0.63671875, "learning_rate": 0.00037986718008167054, "loss": 0.1886, "step": 236280 }, { "epoch": 9.79, "grad_norm": 0.37890625, "learning_rate": 0.0003798579128547412, "loss": 0.2099, "step": 236290 }, { "epoch": 9.79, "grad_norm": 0.64453125, "learning_rate": 0.00037984864538343345, "loss": 0.2119, "step": 236300 }, { "epoch": 9.79, "grad_norm": 0.326171875, "learning_rate": 0.0003798393776677645, "loss": 0.1844, "step": 236310 }, { "epoch": 9.79, "grad_norm": 1.1875, "learning_rate": 0.00037983010970775184, "loss": 0.1845, "step": 236320 }, { "epoch": 9.79, "grad_norm": 0.9296875, "learning_rate": 0.000379820841503413, "loss": 0.2101, "step": 236330 }, { "epoch": 9.79, "grad_norm": 0.41015625, "learning_rate": 0.00037981157305476523, "loss": 0.19, "step": 236340 }, { "epoch": 9.79, "grad_norm": 1.2109375, "learning_rate": 0.0003798023043618262, "loss": 0.1282, "step": 236350 }, { "epoch": 9.79, "grad_norm": 0.69921875, "learning_rate": 0.00037979303542461324, "loss": 0.1601, "step": 236360 }, { "epoch": 9.79, "grad_norm": 0.77734375, "learning_rate": 0.0003797837662431438, "loss": 0.2049, "step": 236370 }, { "epoch": 9.79, "grad_norm": 0.90625, "learning_rate": 0.0003797744968174354, "loss": 0.1692, "step": 236380 }, { "epoch": 9.79, "grad_norm": 0.73828125, "learning_rate": 0.0003797652271475053, "loss": 0.1858, "step": 236390 }, { "epoch": 9.79, "grad_norm": 1.2421875, "learning_rate": 0.00037975595723337103, "loss": 0.2294, "step": 236400 }, { "epoch": 9.79, "grad_norm": 0.2431640625, "learning_rate": 0.0003797466870750501, "loss": 0.1693, "step": 236410 }, { "epoch": 9.79, "grad_norm": 0.55859375, "learning_rate": 0.0003797374166725599, "loss": 0.2393, "step": 236420 }, { "epoch": 9.79, "grad_norm": 0.76953125, "learning_rate": 0.000379728146025918, "loss": 0.1467, "step": 236430 }, { "epoch": 9.79, "grad_norm": 1.171875, "learning_rate": 0.0003797188751351416, "loss": 0.2113, "step": 236440 }, { "epoch": 9.79, "grad_norm": 0.87109375, "learning_rate": 0.00037970960400024836, "loss": 0.1923, "step": 236450 }, { "epoch": 9.79, "grad_norm": 0.51171875, "learning_rate": 0.0003797003326212557, "loss": 0.1701, "step": 236460 }, { "epoch": 9.79, "grad_norm": 0.95703125, "learning_rate": 0.0003796910609981809, "loss": 0.184, "step": 236470 }, { "epoch": 9.79, "grad_norm": 0.88671875, "learning_rate": 0.00037968178913104164, "loss": 0.194, "step": 236480 }, { "epoch": 9.8, "grad_norm": 0.421875, "learning_rate": 0.0003796725170198552, "loss": 0.14, "step": 236490 }, { "epoch": 9.8, "grad_norm": 1.7265625, "learning_rate": 0.0003796632446646391, "loss": 0.1632, "step": 236500 }, { "epoch": 9.8, "grad_norm": 1.15625, "learning_rate": 0.00037965397206541073, "loss": 0.2148, "step": 236510 }, { "epoch": 9.8, "grad_norm": 0.984375, "learning_rate": 0.0003796446992221877, "loss": 0.1992, "step": 236520 }, { "epoch": 9.8, "grad_norm": 0.265625, "learning_rate": 0.00037963542613498723, "loss": 0.2098, "step": 236530 }, { "epoch": 9.8, "grad_norm": 0.3828125, "learning_rate": 0.000379626152803827, "loss": 0.1813, "step": 236540 }, { "epoch": 9.8, "grad_norm": 0.84375, "learning_rate": 0.00037961687922872416, "loss": 0.2135, "step": 236550 }, { "epoch": 9.8, "grad_norm": 0.51953125, "learning_rate": 0.0003796076054096965, "loss": 0.2166, "step": 236560 }, { "epoch": 9.8, "grad_norm": 2.8125, "learning_rate": 0.00037959833134676137, "loss": 0.1948, "step": 236570 }, { "epoch": 9.8, "grad_norm": 0.578125, "learning_rate": 0.00037958905703993603, "loss": 0.2168, "step": 236580 }, { "epoch": 9.8, "grad_norm": 0.9296875, "learning_rate": 0.0003795797824892382, "loss": 0.1679, "step": 236590 }, { "epoch": 9.8, "grad_norm": 0.47265625, "learning_rate": 0.00037957050769468515, "loss": 0.1907, "step": 236600 }, { "epoch": 9.8, "grad_norm": 1.703125, "learning_rate": 0.0003795612326562944, "loss": 0.1406, "step": 236610 }, { "epoch": 9.8, "grad_norm": 0.6015625, "learning_rate": 0.00037955195737408344, "loss": 0.1539, "step": 236620 }, { "epoch": 9.8, "grad_norm": 0.625, "learning_rate": 0.00037954268184806956, "loss": 0.1915, "step": 236630 }, { "epoch": 9.8, "grad_norm": 0.8203125, "learning_rate": 0.0003795334060782705, "loss": 0.1686, "step": 236640 }, { "epoch": 9.8, "grad_norm": 0.55859375, "learning_rate": 0.00037952413006470353, "loss": 0.2012, "step": 236650 }, { "epoch": 9.8, "grad_norm": 0.765625, "learning_rate": 0.000379514853807386, "loss": 0.1911, "step": 236660 }, { "epoch": 9.8, "grad_norm": 0.78515625, "learning_rate": 0.0003795055773063357, "loss": 0.2298, "step": 236670 }, { "epoch": 9.8, "grad_norm": 0.578125, "learning_rate": 0.0003794963005615698, "loss": 0.209, "step": 236680 }, { "epoch": 9.8, "grad_norm": 0.333984375, "learning_rate": 0.0003794870235731058, "loss": 0.1464, "step": 236690 }, { "epoch": 9.8, "grad_norm": 1.2109375, "learning_rate": 0.0003794777463409612, "loss": 0.2158, "step": 236700 }, { "epoch": 9.8, "grad_norm": 0.83984375, "learning_rate": 0.0003794684688651535, "loss": 0.1985, "step": 236710 }, { "epoch": 9.8, "grad_norm": 0.6171875, "learning_rate": 0.00037945919114570014, "loss": 0.2107, "step": 236720 }, { "epoch": 9.81, "grad_norm": 1.0625, "learning_rate": 0.00037944991318261857, "loss": 0.209, "step": 236730 }, { "epoch": 9.81, "grad_norm": 1.1796875, "learning_rate": 0.00037944063497592615, "loss": 0.1944, "step": 236740 }, { "epoch": 9.81, "grad_norm": 0.97265625, "learning_rate": 0.0003794313565256406, "loss": 0.1596, "step": 236750 }, { "epoch": 9.81, "grad_norm": 0.255859375, "learning_rate": 0.0003794220778317791, "loss": 0.1841, "step": 236760 }, { "epoch": 9.81, "grad_norm": 0.6796875, "learning_rate": 0.0003794127988943593, "loss": 0.1909, "step": 236770 }, { "epoch": 9.81, "grad_norm": 1.0078125, "learning_rate": 0.00037940351971339846, "loss": 0.1762, "step": 236780 }, { "epoch": 9.81, "grad_norm": 1.015625, "learning_rate": 0.0003793942402889142, "loss": 0.1831, "step": 236790 }, { "epoch": 9.81, "grad_norm": 0.28515625, "learning_rate": 0.0003793849606209241, "loss": 0.1508, "step": 236800 }, { "epoch": 9.81, "grad_norm": 0.98046875, "learning_rate": 0.0003793756807094454, "loss": 0.1985, "step": 236810 }, { "epoch": 9.81, "grad_norm": 0.3828125, "learning_rate": 0.00037936640055449556, "loss": 0.2009, "step": 236820 }, { "epoch": 9.81, "grad_norm": 0.8828125, "learning_rate": 0.0003793571201560922, "loss": 0.2281, "step": 236830 }, { "epoch": 9.81, "grad_norm": 0.6484375, "learning_rate": 0.00037934783951425264, "loss": 0.2049, "step": 236840 }, { "epoch": 9.81, "grad_norm": 0.8828125, "learning_rate": 0.00037933855862899447, "loss": 0.235, "step": 236850 }, { "epoch": 9.81, "grad_norm": 1.8125, "learning_rate": 0.0003793292775003352, "loss": 0.2091, "step": 236860 }, { "epoch": 9.81, "grad_norm": 0.62109375, "learning_rate": 0.00037931999612829197, "loss": 0.1956, "step": 236870 }, { "epoch": 9.81, "grad_norm": 0.71875, "learning_rate": 0.00037931071451288267, "loss": 0.2211, "step": 236880 }, { "epoch": 9.81, "grad_norm": 1.1015625, "learning_rate": 0.0003793014326541244, "loss": 0.2141, "step": 236890 }, { "epoch": 9.81, "grad_norm": 0.5078125, "learning_rate": 0.0003792921505520349, "loss": 0.1654, "step": 236900 }, { "epoch": 9.81, "grad_norm": 1.03125, "learning_rate": 0.0003792828682066316, "loss": 0.2008, "step": 236910 }, { "epoch": 9.81, "grad_norm": 0.423828125, "learning_rate": 0.0003792735856179318, "loss": 0.1891, "step": 236920 }, { "epoch": 9.81, "grad_norm": 0.48828125, "learning_rate": 0.00037926430278595314, "loss": 0.1763, "step": 236930 }, { "epoch": 9.81, "grad_norm": 0.8125, "learning_rate": 0.000379255019710713, "loss": 0.2354, "step": 236940 }, { "epoch": 9.81, "grad_norm": 1.3203125, "learning_rate": 0.00037924573639222883, "loss": 0.1577, "step": 236950 }, { "epoch": 9.81, "grad_norm": 0.65234375, "learning_rate": 0.00037923645283051817, "loss": 0.2565, "step": 236960 }, { "epoch": 9.82, "grad_norm": 1.5546875, "learning_rate": 0.0003792271690255984, "loss": 0.212, "step": 236970 }, { "epoch": 9.82, "grad_norm": 1.1015625, "learning_rate": 0.0003792178849774871, "loss": 0.1887, "step": 236980 }, { "epoch": 9.82, "grad_norm": 0.921875, "learning_rate": 0.00037920860068620175, "loss": 0.189, "step": 236990 }, { "epoch": 9.82, "grad_norm": 0.65234375, "learning_rate": 0.0003791993161517597, "loss": 0.1813, "step": 237000 }, { "epoch": 9.82, "grad_norm": 1.40625, "learning_rate": 0.0003791900313741785, "loss": 0.1908, "step": 237010 }, { "epoch": 9.82, "grad_norm": 0.578125, "learning_rate": 0.00037918074635347567, "loss": 0.1947, "step": 237020 }, { "epoch": 9.82, "grad_norm": 1.3359375, "learning_rate": 0.00037917146108966854, "loss": 0.1901, "step": 237030 }, { "epoch": 9.82, "grad_norm": 1.40625, "learning_rate": 0.0003791621755827748, "loss": 0.2305, "step": 237040 }, { "epoch": 9.82, "grad_norm": 0.73046875, "learning_rate": 0.0003791528898328116, "loss": 0.1918, "step": 237050 }, { "epoch": 9.82, "grad_norm": 1.4921875, "learning_rate": 0.0003791436038397967, "loss": 0.201, "step": 237060 }, { "epoch": 9.82, "grad_norm": 2.265625, "learning_rate": 0.00037913431760374755, "loss": 0.2027, "step": 237070 }, { "epoch": 9.82, "grad_norm": 0.400390625, "learning_rate": 0.0003791250311246815, "loss": 0.1624, "step": 237080 }, { "epoch": 9.82, "grad_norm": 0.5625, "learning_rate": 0.0003791157444026161, "loss": 0.2039, "step": 237090 }, { "epoch": 9.82, "grad_norm": 0.81640625, "learning_rate": 0.00037910645743756877, "loss": 0.1923, "step": 237100 }, { "epoch": 9.82, "grad_norm": 0.443359375, "learning_rate": 0.000379097170229557, "loss": 0.1938, "step": 237110 }, { "epoch": 9.82, "grad_norm": 0.44921875, "learning_rate": 0.00037908788277859844, "loss": 0.1665, "step": 237120 }, { "epoch": 9.82, "grad_norm": 0.458984375, "learning_rate": 0.00037907859508471033, "loss": 0.1843, "step": 237130 }, { "epoch": 9.82, "grad_norm": 1.1328125, "learning_rate": 0.0003790693071479103, "loss": 0.1606, "step": 237140 }, { "epoch": 9.82, "grad_norm": 1.2265625, "learning_rate": 0.00037906001896821574, "loss": 0.2226, "step": 237150 }, { "epoch": 9.82, "grad_norm": 0.55078125, "learning_rate": 0.0003790507305456441, "loss": 0.2147, "step": 237160 }, { "epoch": 9.82, "grad_norm": 0.359375, "learning_rate": 0.00037904144188021297, "loss": 0.2116, "step": 237170 }, { "epoch": 9.82, "grad_norm": 0.875, "learning_rate": 0.0003790321529719398, "loss": 0.2335, "step": 237180 }, { "epoch": 9.82, "grad_norm": 0.6015625, "learning_rate": 0.000379022863820842, "loss": 0.2133, "step": 237190 }, { "epoch": 9.82, "grad_norm": 1.53125, "learning_rate": 0.00037901357442693715, "loss": 0.1624, "step": 237200 }, { "epoch": 9.83, "grad_norm": 1.0546875, "learning_rate": 0.0003790042847902427, "loss": 0.2098, "step": 237210 }, { "epoch": 9.83, "grad_norm": 0.50390625, "learning_rate": 0.00037899499491077606, "loss": 0.2068, "step": 237220 }, { "epoch": 9.83, "grad_norm": 0.5703125, "learning_rate": 0.0003789857047885548, "loss": 0.1766, "step": 237230 }, { "epoch": 9.83, "grad_norm": 0.875, "learning_rate": 0.0003789764144235964, "loss": 0.1785, "step": 237240 }, { "epoch": 9.83, "grad_norm": 1.1328125, "learning_rate": 0.0003789671238159183, "loss": 0.2479, "step": 237250 }, { "epoch": 9.83, "grad_norm": 0.6875, "learning_rate": 0.000378957832965538, "loss": 0.2046, "step": 237260 }, { "epoch": 9.83, "grad_norm": 0.66015625, "learning_rate": 0.00037894854187247294, "loss": 0.1974, "step": 237270 }, { "epoch": 9.83, "grad_norm": 0.66015625, "learning_rate": 0.0003789392505367407, "loss": 0.202, "step": 237280 }, { "epoch": 9.83, "grad_norm": 1.5234375, "learning_rate": 0.0003789299589583587, "loss": 0.214, "step": 237290 }, { "epoch": 9.83, "grad_norm": 0.95703125, "learning_rate": 0.00037892066713734443, "loss": 0.2595, "step": 237300 }, { "epoch": 9.83, "grad_norm": 1.34375, "learning_rate": 0.0003789113750737154, "loss": 0.1624, "step": 237310 }, { "epoch": 9.83, "grad_norm": 0.486328125, "learning_rate": 0.0003789020827674891, "loss": 0.2039, "step": 237320 }, { "epoch": 9.83, "grad_norm": 0.76171875, "learning_rate": 0.000378892790218683, "loss": 0.1886, "step": 237330 }, { "epoch": 9.83, "grad_norm": 0.68359375, "learning_rate": 0.00037888349742731456, "loss": 0.1701, "step": 237340 }, { "epoch": 9.83, "grad_norm": 0.76953125, "learning_rate": 0.00037887420439340126, "loss": 0.2258, "step": 237350 }, { "epoch": 9.83, "grad_norm": 1.0625, "learning_rate": 0.00037886491111696076, "loss": 0.2082, "step": 237360 }, { "epoch": 9.83, "grad_norm": 0.220703125, "learning_rate": 0.00037885561759801027, "loss": 0.1999, "step": 237370 }, { "epoch": 9.83, "grad_norm": 0.6328125, "learning_rate": 0.0003788463238365675, "loss": 0.2159, "step": 237380 }, { "epoch": 9.83, "grad_norm": 1.3046875, "learning_rate": 0.0003788370298326498, "loss": 0.1874, "step": 237390 }, { "epoch": 9.83, "grad_norm": 0.5390625, "learning_rate": 0.0003788277355862748, "loss": 0.1941, "step": 237400 }, { "epoch": 9.83, "grad_norm": 0.6953125, "learning_rate": 0.0003788184410974599, "loss": 0.1888, "step": 237410 }, { "epoch": 9.83, "grad_norm": 0.46875, "learning_rate": 0.00037880914636622255, "loss": 0.1913, "step": 237420 }, { "epoch": 9.83, "grad_norm": 0.625, "learning_rate": 0.00037879985139258033, "loss": 0.2132, "step": 237430 }, { "epoch": 9.83, "grad_norm": 0.32421875, "learning_rate": 0.00037879055617655067, "loss": 0.2197, "step": 237440 }, { "epoch": 9.84, "grad_norm": 1.078125, "learning_rate": 0.00037878126071815113, "loss": 0.1921, "step": 237450 }, { "epoch": 9.84, "grad_norm": 1.0, "learning_rate": 0.0003787719650173992, "loss": 0.2246, "step": 237460 }, { "epoch": 9.84, "grad_norm": 0.8125, "learning_rate": 0.0003787626690743122, "loss": 0.1844, "step": 237470 }, { "epoch": 9.84, "grad_norm": 0.5, "learning_rate": 0.00037875337288890786, "loss": 0.1699, "step": 237480 }, { "epoch": 9.84, "grad_norm": 1.75, "learning_rate": 0.0003787440764612036, "loss": 0.1813, "step": 237490 }, { "epoch": 9.84, "grad_norm": 0.84375, "learning_rate": 0.0003787347797912168, "loss": 0.2265, "step": 237500 }, { "epoch": 9.84, "grad_norm": 0.796875, "learning_rate": 0.0003787254828789651, "loss": 0.1987, "step": 237510 }, { "epoch": 9.84, "grad_norm": 0.30859375, "learning_rate": 0.000378716185724466, "loss": 0.1486, "step": 237520 }, { "epoch": 9.84, "grad_norm": 0.54296875, "learning_rate": 0.0003787068883277369, "loss": 0.2025, "step": 237530 }, { "epoch": 9.84, "grad_norm": 0.44140625, "learning_rate": 0.00037869759068879534, "loss": 0.2307, "step": 237540 }, { "epoch": 9.84, "grad_norm": 0.7265625, "learning_rate": 0.0003786882928076588, "loss": 0.1612, "step": 237550 }, { "epoch": 9.84, "grad_norm": 1.78125, "learning_rate": 0.00037867899468434473, "loss": 0.2332, "step": 237560 }, { "epoch": 9.84, "grad_norm": 1.46875, "learning_rate": 0.0003786696963188708, "loss": 0.2113, "step": 237570 }, { "epoch": 9.84, "grad_norm": 0.72265625, "learning_rate": 0.00037866039771125435, "loss": 0.168, "step": 237580 }, { "epoch": 9.84, "grad_norm": 0.66015625, "learning_rate": 0.00037865109886151287, "loss": 0.1862, "step": 237590 }, { "epoch": 9.84, "grad_norm": 0.337890625, "learning_rate": 0.00037864179976966394, "loss": 0.1905, "step": 237600 }, { "epoch": 9.84, "grad_norm": 0.609375, "learning_rate": 0.0003786325004357251, "loss": 0.1718, "step": 237610 }, { "epoch": 9.84, "grad_norm": 1.125, "learning_rate": 0.00037862320085971376, "loss": 0.1556, "step": 237620 }, { "epoch": 9.84, "grad_norm": 0.87109375, "learning_rate": 0.0003786139010416474, "loss": 0.2199, "step": 237630 }, { "epoch": 9.84, "grad_norm": 0.640625, "learning_rate": 0.0003786046009815436, "loss": 0.2019, "step": 237640 }, { "epoch": 9.84, "grad_norm": 0.60546875, "learning_rate": 0.00037859530067941986, "loss": 0.1816, "step": 237650 }, { "epoch": 9.84, "grad_norm": 0.84765625, "learning_rate": 0.0003785860001352936, "loss": 0.1993, "step": 237660 }, { "epoch": 9.84, "grad_norm": 0.87109375, "learning_rate": 0.0003785766993491824, "loss": 0.1485, "step": 237670 }, { "epoch": 9.84, "grad_norm": 0.287109375, "learning_rate": 0.00037856739832110365, "loss": 0.2006, "step": 237680 }, { "epoch": 9.85, "grad_norm": 0.609375, "learning_rate": 0.00037855809705107505, "loss": 0.2253, "step": 237690 }, { "epoch": 9.85, "grad_norm": 0.625, "learning_rate": 0.000378548795539114, "loss": 0.1736, "step": 237700 }, { "epoch": 9.85, "grad_norm": 0.89453125, "learning_rate": 0.0003785394937852379, "loss": 0.1897, "step": 237710 }, { "epoch": 9.85, "grad_norm": 1.015625, "learning_rate": 0.0003785301917894645, "loss": 0.2169, "step": 237720 }, { "epoch": 9.85, "grad_norm": 0.5546875, "learning_rate": 0.000378520889551811, "loss": 0.174, "step": 237730 }, { "epoch": 9.85, "grad_norm": 0.4609375, "learning_rate": 0.0003785115870722952, "loss": 0.222, "step": 237740 }, { "epoch": 9.85, "grad_norm": 0.9140625, "learning_rate": 0.00037850228435093436, "loss": 0.2014, "step": 237750 }, { "epoch": 9.85, "grad_norm": 0.921875, "learning_rate": 0.0003784929813877461, "loss": 0.1877, "step": 237760 }, { "epoch": 9.85, "grad_norm": 0.205078125, "learning_rate": 0.000378483678182748, "loss": 0.1612, "step": 237770 }, { "epoch": 9.85, "grad_norm": 0.7109375, "learning_rate": 0.0003784743747359575, "loss": 0.1986, "step": 237780 }, { "epoch": 9.85, "grad_norm": 1.015625, "learning_rate": 0.000378465071047392, "loss": 0.1928, "step": 237790 }, { "epoch": 9.85, "grad_norm": 0.78125, "learning_rate": 0.0003784557671170692, "loss": 0.2133, "step": 237800 }, { "epoch": 9.85, "grad_norm": 0.314453125, "learning_rate": 0.0003784464629450065, "loss": 0.2285, "step": 237810 }, { "epoch": 9.85, "grad_norm": 0.64453125, "learning_rate": 0.0003784371585312215, "loss": 0.1615, "step": 237820 }, { "epoch": 9.85, "grad_norm": 0.76953125, "learning_rate": 0.00037842785387573154, "loss": 0.1947, "step": 237830 }, { "epoch": 9.85, "grad_norm": 0.66796875, "learning_rate": 0.00037841854897855423, "loss": 0.2315, "step": 237840 }, { "epoch": 9.85, "grad_norm": 0.671875, "learning_rate": 0.0003784092438397071, "loss": 0.2001, "step": 237850 }, { "epoch": 9.85, "grad_norm": 0.890625, "learning_rate": 0.00037839993845920755, "loss": 0.173, "step": 237860 }, { "epoch": 9.85, "grad_norm": 0.625, "learning_rate": 0.0003783906328370733, "loss": 0.1762, "step": 237870 }, { "epoch": 9.85, "grad_norm": 1.03125, "learning_rate": 0.00037838132697332174, "loss": 0.1892, "step": 237880 }, { "epoch": 9.85, "grad_norm": 0.6953125, "learning_rate": 0.0003783720208679703, "loss": 0.2301, "step": 237890 }, { "epoch": 9.85, "grad_norm": 0.8671875, "learning_rate": 0.00037836271452103666, "loss": 0.1704, "step": 237900 }, { "epoch": 9.85, "grad_norm": 0.7890625, "learning_rate": 0.00037835340793253823, "loss": 0.2212, "step": 237910 }, { "epoch": 9.85, "grad_norm": 0.515625, "learning_rate": 0.00037834410110249256, "loss": 0.1932, "step": 237920 }, { "epoch": 9.86, "grad_norm": 0.361328125, "learning_rate": 0.0003783347940309172, "loss": 0.198, "step": 237930 }, { "epoch": 9.86, "grad_norm": 1.0859375, "learning_rate": 0.00037832548671782945, "loss": 0.1836, "step": 237940 }, { "epoch": 9.86, "grad_norm": 0.921875, "learning_rate": 0.00037831617916324713, "loss": 0.1883, "step": 237950 }, { "epoch": 9.86, "grad_norm": 0.66015625, "learning_rate": 0.0003783068713671876, "loss": 0.1966, "step": 237960 }, { "epoch": 9.86, "grad_norm": 1.109375, "learning_rate": 0.0003782975633296684, "loss": 0.1859, "step": 237970 }, { "epoch": 9.86, "grad_norm": 0.44921875, "learning_rate": 0.000378288255050707, "loss": 0.2137, "step": 237980 }, { "epoch": 9.86, "grad_norm": 0.6796875, "learning_rate": 0.00037827894653032104, "loss": 0.1912, "step": 237990 }, { "epoch": 9.86, "grad_norm": 0.7109375, "learning_rate": 0.00037826963776852786, "loss": 0.204, "step": 238000 }, { "epoch": 9.86, "grad_norm": 0.6484375, "learning_rate": 0.0003782603287653451, "loss": 0.2065, "step": 238010 }, { "epoch": 9.86, "grad_norm": 0.859375, "learning_rate": 0.0003782510195207902, "loss": 0.2351, "step": 238020 }, { "epoch": 9.86, "grad_norm": 0.84375, "learning_rate": 0.00037824171003488085, "loss": 0.1724, "step": 238030 }, { "epoch": 9.86, "grad_norm": 0.48046875, "learning_rate": 0.0003782324003076344, "loss": 0.1828, "step": 238040 }, { "epoch": 9.86, "grad_norm": 0.58984375, "learning_rate": 0.0003782230903390684, "loss": 0.1895, "step": 238050 }, { "epoch": 9.86, "grad_norm": 0.57421875, "learning_rate": 0.00037821378012920035, "loss": 0.1635, "step": 238060 }, { "epoch": 9.86, "grad_norm": 0.62890625, "learning_rate": 0.0003782044696780479, "loss": 0.1961, "step": 238070 }, { "epoch": 9.86, "grad_norm": 1.234375, "learning_rate": 0.00037819515898562843, "loss": 0.216, "step": 238080 }, { "epoch": 9.86, "grad_norm": 0.8125, "learning_rate": 0.00037818584805195956, "loss": 0.1786, "step": 238090 }, { "epoch": 9.86, "grad_norm": 0.73828125, "learning_rate": 0.00037817653687705874, "loss": 0.2402, "step": 238100 }, { "epoch": 9.86, "grad_norm": 1.3046875, "learning_rate": 0.00037816722546094353, "loss": 0.2195, "step": 238110 }, { "epoch": 9.86, "grad_norm": 0.6796875, "learning_rate": 0.0003781579138036314, "loss": 0.2041, "step": 238120 }, { "epoch": 9.86, "grad_norm": 0.484375, "learning_rate": 0.00037814860190513996, "loss": 0.2096, "step": 238130 }, { "epoch": 9.86, "grad_norm": 1.265625, "learning_rate": 0.0003781392897654867, "loss": 0.1886, "step": 238140 }, { "epoch": 9.86, "grad_norm": 0.609375, "learning_rate": 0.000378129977384689, "loss": 0.1894, "step": 238150 }, { "epoch": 9.86, "grad_norm": 1.546875, "learning_rate": 0.00037812066476276476, "loss": 0.1789, "step": 238160 }, { "epoch": 9.86, "grad_norm": 0.75, "learning_rate": 0.00037811135189973113, "loss": 0.2054, "step": 238170 }, { "epoch": 9.87, "grad_norm": 0.55078125, "learning_rate": 0.00037810203879560573, "loss": 0.1626, "step": 238180 }, { "epoch": 9.87, "grad_norm": 0.47265625, "learning_rate": 0.0003780927254504062, "loss": 0.2394, "step": 238190 }, { "epoch": 9.87, "grad_norm": 0.671875, "learning_rate": 0.00037808341186414997, "loss": 0.1667, "step": 238200 }, { "epoch": 9.87, "grad_norm": 1.3984375, "learning_rate": 0.00037807409803685457, "loss": 0.1679, "step": 238210 }, { "epoch": 9.87, "grad_norm": 0.37109375, "learning_rate": 0.0003780647839685376, "loss": 0.1805, "step": 238220 }, { "epoch": 9.87, "grad_norm": 0.462890625, "learning_rate": 0.00037805546965921644, "loss": 0.159, "step": 238230 }, { "epoch": 9.87, "grad_norm": 0.81640625, "learning_rate": 0.00037804615510890884, "loss": 0.165, "step": 238240 }, { "epoch": 9.87, "grad_norm": 0.7734375, "learning_rate": 0.00037803684031763215, "loss": 0.2406, "step": 238250 }, { "epoch": 9.87, "grad_norm": 0.62890625, "learning_rate": 0.00037802752528540384, "loss": 0.1851, "step": 238260 }, { "epoch": 9.87, "grad_norm": 0.55859375, "learning_rate": 0.00037801821001224174, "loss": 0.189, "step": 238270 }, { "epoch": 9.87, "grad_norm": 1.046875, "learning_rate": 0.00037800889449816314, "loss": 0.2412, "step": 238280 }, { "epoch": 9.87, "grad_norm": 0.515625, "learning_rate": 0.00037799957874318555, "loss": 0.2173, "step": 238290 }, { "epoch": 9.87, "grad_norm": 0.5234375, "learning_rate": 0.00037799026274732664, "loss": 0.1635, "step": 238300 }, { "epoch": 9.87, "grad_norm": 0.96484375, "learning_rate": 0.00037798094651060376, "loss": 0.1831, "step": 238310 }, { "epoch": 9.87, "grad_norm": 0.490234375, "learning_rate": 0.0003779716300330347, "loss": 0.2022, "step": 238320 }, { "epoch": 9.87, "grad_norm": 0.71875, "learning_rate": 0.0003779623133146369, "loss": 0.1913, "step": 238330 }, { "epoch": 9.87, "grad_norm": 1.1484375, "learning_rate": 0.00037795299635542766, "loss": 0.1636, "step": 238340 }, { "epoch": 9.87, "grad_norm": 1.84375, "learning_rate": 0.0003779436791554248, "loss": 0.1881, "step": 238350 }, { "epoch": 9.87, "grad_norm": 0.875, "learning_rate": 0.0003779343617146458, "loss": 0.1704, "step": 238360 }, { "epoch": 9.87, "grad_norm": 1.5, "learning_rate": 0.00037792504403310804, "loss": 0.2077, "step": 238370 }, { "epoch": 9.87, "grad_norm": 3.234375, "learning_rate": 0.0003779157261108293, "loss": 0.1945, "step": 238380 }, { "epoch": 9.87, "grad_norm": 0.421875, "learning_rate": 0.00037790640794782684, "loss": 0.2239, "step": 238390 }, { "epoch": 9.87, "grad_norm": 0.45703125, "learning_rate": 0.0003778970895441184, "loss": 0.2154, "step": 238400 }, { "epoch": 9.87, "grad_norm": 0.82421875, "learning_rate": 0.0003778877708997214, "loss": 0.2145, "step": 238410 }, { "epoch": 9.88, "grad_norm": 0.48046875, "learning_rate": 0.0003778784520146534, "loss": 0.2227, "step": 238420 }, { "epoch": 9.88, "grad_norm": 0.64453125, "learning_rate": 0.0003778691328889321, "loss": 0.2091, "step": 238430 }, { "epoch": 9.88, "grad_norm": 0.4921875, "learning_rate": 0.0003778598135225748, "loss": 0.1951, "step": 238440 }, { "epoch": 9.88, "grad_norm": 1.1015625, "learning_rate": 0.00037785049391559917, "loss": 0.2482, "step": 238450 }, { "epoch": 9.88, "grad_norm": 0.91015625, "learning_rate": 0.00037784117406802276, "loss": 0.2702, "step": 238460 }, { "epoch": 9.88, "grad_norm": 1.5078125, "learning_rate": 0.000377831853979863, "loss": 0.1622, "step": 238470 }, { "epoch": 9.88, "grad_norm": 0.53125, "learning_rate": 0.0003778225336511375, "loss": 0.1776, "step": 238480 }, { "epoch": 9.88, "grad_norm": 1.203125, "learning_rate": 0.0003778132130818638, "loss": 0.2047, "step": 238490 }, { "epoch": 9.88, "grad_norm": 1.203125, "learning_rate": 0.0003778038922720594, "loss": 0.2295, "step": 238500 }, { "epoch": 9.88, "grad_norm": 0.431640625, "learning_rate": 0.000377794571221742, "loss": 0.1926, "step": 238510 }, { "epoch": 9.88, "grad_norm": 1.703125, "learning_rate": 0.0003777852499309289, "loss": 0.1893, "step": 238520 }, { "epoch": 9.88, "grad_norm": 0.546875, "learning_rate": 0.0003777759283996378, "loss": 0.1509, "step": 238530 }, { "epoch": 9.88, "grad_norm": 1.1796875, "learning_rate": 0.00037776660662788614, "loss": 0.1329, "step": 238540 }, { "epoch": 9.88, "grad_norm": 1.65625, "learning_rate": 0.0003777572846156916, "loss": 0.2402, "step": 238550 }, { "epoch": 9.88, "grad_norm": 0.26171875, "learning_rate": 0.0003777479623630716, "loss": 0.2006, "step": 238560 }, { "epoch": 9.88, "grad_norm": 1.921875, "learning_rate": 0.00037773863987004374, "loss": 0.1809, "step": 238570 }, { "epoch": 9.88, "grad_norm": 0.52734375, "learning_rate": 0.0003777293171366256, "loss": 0.1607, "step": 238580 }, { "epoch": 9.88, "grad_norm": 0.298828125, "learning_rate": 0.0003777199941628347, "loss": 0.1993, "step": 238590 }, { "epoch": 9.88, "grad_norm": 1.2265625, "learning_rate": 0.0003777106709486885, "loss": 0.1873, "step": 238600 }, { "epoch": 9.88, "grad_norm": 1.1015625, "learning_rate": 0.0003777013474942046, "loss": 0.2038, "step": 238610 }, { "epoch": 9.88, "grad_norm": 1.015625, "learning_rate": 0.0003776920237994006, "loss": 0.2228, "step": 238620 }, { "epoch": 9.88, "grad_norm": 0.32421875, "learning_rate": 0.0003776826998642939, "loss": 0.2127, "step": 238630 }, { "epoch": 9.88, "grad_norm": 1.40625, "learning_rate": 0.0003776733756889023, "loss": 0.1829, "step": 238640 }, { "epoch": 9.88, "grad_norm": 1.25, "learning_rate": 0.0003776640512732431, "loss": 0.1677, "step": 238650 }, { "epoch": 9.89, "grad_norm": 0.7109375, "learning_rate": 0.00037765472661733396, "loss": 0.2532, "step": 238660 }, { "epoch": 9.89, "grad_norm": 0.59765625, "learning_rate": 0.00037764540172119246, "loss": 0.2037, "step": 238670 }, { "epoch": 9.89, "grad_norm": 1.390625, "learning_rate": 0.000377636076584836, "loss": 0.2412, "step": 238680 }, { "epoch": 9.89, "grad_norm": 0.65625, "learning_rate": 0.00037762675120828226, "loss": 0.2326, "step": 238690 }, { "epoch": 9.89, "grad_norm": 0.63671875, "learning_rate": 0.0003776174255915488, "loss": 0.2335, "step": 238700 }, { "epoch": 9.89, "grad_norm": 0.44921875, "learning_rate": 0.000377608099734653, "loss": 0.2307, "step": 238710 }, { "epoch": 9.89, "grad_norm": 0.5390625, "learning_rate": 0.0003775987736376127, "loss": 0.2254, "step": 238720 }, { "epoch": 9.89, "grad_norm": 0.2373046875, "learning_rate": 0.0003775894473004453, "loss": 0.1423, "step": 238730 }, { "epoch": 9.89, "grad_norm": 1.4453125, "learning_rate": 0.0003775801207231682, "loss": 0.2219, "step": 238740 }, { "epoch": 9.89, "grad_norm": 3.53125, "learning_rate": 0.0003775707939057992, "loss": 0.2243, "step": 238750 }, { "epoch": 9.89, "grad_norm": 0.85546875, "learning_rate": 0.0003775614668483557, "loss": 0.1646, "step": 238760 }, { "epoch": 9.89, "grad_norm": 0.6328125, "learning_rate": 0.00037755213955085527, "loss": 0.2153, "step": 238770 }, { "epoch": 9.89, "grad_norm": 0.75, "learning_rate": 0.00037754281201331555, "loss": 0.1731, "step": 238780 }, { "epoch": 9.89, "grad_norm": 1.015625, "learning_rate": 0.00037753348423575397, "loss": 0.1972, "step": 238790 }, { "epoch": 9.89, "grad_norm": 0.482421875, "learning_rate": 0.00037752415621818814, "loss": 0.2361, "step": 238800 }, { "epoch": 9.89, "grad_norm": 1.0703125, "learning_rate": 0.0003775148279606356, "loss": 0.2102, "step": 238810 }, { "epoch": 9.89, "grad_norm": 0.99609375, "learning_rate": 0.00037750549946311403, "loss": 0.2754, "step": 238820 }, { "epoch": 9.89, "grad_norm": 0.5703125, "learning_rate": 0.0003774961707256408, "loss": 0.1821, "step": 238830 }, { "epoch": 9.89, "grad_norm": 0.60546875, "learning_rate": 0.00037748684174823357, "loss": 0.1923, "step": 238840 }, { "epoch": 9.89, "grad_norm": 0.0, "learning_rate": 0.0003774775125309099, "loss": 0.1793, "step": 238850 }, { "epoch": 9.89, "grad_norm": 0.359375, "learning_rate": 0.00037746818307368723, "loss": 0.2177, "step": 238860 }, { "epoch": 9.89, "grad_norm": 0.8515625, "learning_rate": 0.00037745885337658327, "loss": 0.2176, "step": 238870 }, { "epoch": 9.89, "grad_norm": 0.4453125, "learning_rate": 0.0003774495234396155, "loss": 0.1815, "step": 238880 }, { "epoch": 9.89, "grad_norm": 1.3359375, "learning_rate": 0.0003774401932628015, "loss": 0.1553, "step": 238890 }, { "epoch": 9.9, "grad_norm": 0.921875, "learning_rate": 0.0003774308628461588, "loss": 0.1965, "step": 238900 }, { "epoch": 9.9, "grad_norm": 0.9765625, "learning_rate": 0.000377421532189705, "loss": 0.1937, "step": 238910 }, { "epoch": 9.9, "grad_norm": 0.8125, "learning_rate": 0.00037741220129345764, "loss": 0.181, "step": 238920 }, { "epoch": 9.9, "grad_norm": 1.1796875, "learning_rate": 0.0003774028701574342, "loss": 0.2291, "step": 238930 }, { "epoch": 9.9, "grad_norm": 0.9609375, "learning_rate": 0.0003773935387816524, "loss": 0.1975, "step": 238940 }, { "epoch": 9.9, "grad_norm": 0.72265625, "learning_rate": 0.00037738420716612966, "loss": 0.1626, "step": 238950 }, { "epoch": 9.9, "grad_norm": 0.796875, "learning_rate": 0.00037737487531088365, "loss": 0.1766, "step": 238960 }, { "epoch": 9.9, "grad_norm": 2.984375, "learning_rate": 0.0003773655432159319, "loss": 0.2622, "step": 238970 }, { "epoch": 9.9, "grad_norm": 0.55859375, "learning_rate": 0.0003773562108812919, "loss": 0.1714, "step": 238980 }, { "epoch": 9.9, "grad_norm": 0.390625, "learning_rate": 0.0003773468783069812, "loss": 0.2215, "step": 238990 }, { "epoch": 9.9, "grad_norm": 0.5625, "learning_rate": 0.00037733754549301756, "loss": 0.2184, "step": 239000 }, { "epoch": 9.9, "grad_norm": 0.40234375, "learning_rate": 0.0003773282124394184, "loss": 0.2195, "step": 239010 }, { "epoch": 9.9, "grad_norm": 0.21484375, "learning_rate": 0.0003773188791462012, "loss": 0.1766, "step": 239020 }, { "epoch": 9.9, "grad_norm": 0.3203125, "learning_rate": 0.0003773095456133837, "loss": 0.188, "step": 239030 }, { "epoch": 9.9, "grad_norm": 0.65625, "learning_rate": 0.0003773002118409833, "loss": 0.2297, "step": 239040 }, { "epoch": 9.9, "grad_norm": 0.75390625, "learning_rate": 0.0003772908778290177, "loss": 0.2317, "step": 239050 }, { "epoch": 9.9, "grad_norm": 2.046875, "learning_rate": 0.00037728154357750444, "loss": 0.206, "step": 239060 }, { "epoch": 9.9, "grad_norm": 0.52734375, "learning_rate": 0.00037727220908646104, "loss": 0.1932, "step": 239070 }, { "epoch": 9.9, "grad_norm": 1.03125, "learning_rate": 0.0003772628743559051, "loss": 0.2466, "step": 239080 }, { "epoch": 9.9, "grad_norm": 1.453125, "learning_rate": 0.00037725353938585415, "loss": 0.2237, "step": 239090 }, { "epoch": 9.9, "grad_norm": 0.63671875, "learning_rate": 0.00037724420417632574, "loss": 0.196, "step": 239100 }, { "epoch": 9.9, "grad_norm": 0.384765625, "learning_rate": 0.00037723486872733757, "loss": 0.1852, "step": 239110 }, { "epoch": 9.9, "grad_norm": 0.65234375, "learning_rate": 0.00037722553303890703, "loss": 0.2056, "step": 239120 }, { "epoch": 9.9, "grad_norm": 0.1337890625, "learning_rate": 0.00037721619711105186, "loss": 0.1834, "step": 239130 }, { "epoch": 9.91, "grad_norm": 1.3671875, "learning_rate": 0.0003772068609437895, "loss": 0.2506, "step": 239140 }, { "epoch": 9.91, "grad_norm": 0.80859375, "learning_rate": 0.0003771975245371376, "loss": 0.1975, "step": 239150 }, { "epoch": 9.91, "grad_norm": 0.82421875, "learning_rate": 0.0003771881878911136, "loss": 0.1847, "step": 239160 }, { "epoch": 9.91, "grad_norm": 0.7109375, "learning_rate": 0.0003771788510057352, "loss": 0.1986, "step": 239170 }, { "epoch": 9.91, "grad_norm": 0.28515625, "learning_rate": 0.00037716951388102005, "loss": 0.2028, "step": 239180 }, { "epoch": 9.91, "grad_norm": 0.66796875, "learning_rate": 0.0003771601765169855, "loss": 0.1663, "step": 239190 }, { "epoch": 9.91, "grad_norm": 0.73046875, "learning_rate": 0.00037715083891364925, "loss": 0.1985, "step": 239200 }, { "epoch": 9.91, "grad_norm": 0.458984375, "learning_rate": 0.0003771415010710289, "loss": 0.176, "step": 239210 }, { "epoch": 9.91, "grad_norm": 0.5, "learning_rate": 0.0003771321629891419, "loss": 0.1446, "step": 239220 }, { "epoch": 9.91, "grad_norm": 1.0859375, "learning_rate": 0.00037712282466800595, "loss": 0.1805, "step": 239230 }, { "epoch": 9.91, "grad_norm": 1.0390625, "learning_rate": 0.00037711348610763856, "loss": 0.2583, "step": 239240 }, { "epoch": 9.91, "grad_norm": 0.6796875, "learning_rate": 0.00037710414730805726, "loss": 0.181, "step": 239250 }, { "epoch": 9.91, "grad_norm": 0.48828125, "learning_rate": 0.00037709480826927976, "loss": 0.2058, "step": 239260 }, { "epoch": 9.91, "grad_norm": 1.3359375, "learning_rate": 0.0003770854689913235, "loss": 0.2035, "step": 239270 }, { "epoch": 9.91, "grad_norm": 0.40234375, "learning_rate": 0.0003770761294742061, "loss": 0.2611, "step": 239280 }, { "epoch": 9.91, "grad_norm": 0.60546875, "learning_rate": 0.0003770667897179453, "loss": 0.181, "step": 239290 }, { "epoch": 9.91, "grad_norm": 0.90234375, "learning_rate": 0.00037705744972255827, "loss": 0.233, "step": 239300 }, { "epoch": 9.91, "grad_norm": 0.41015625, "learning_rate": 0.00037704810948806304, "loss": 0.187, "step": 239310 }, { "epoch": 9.91, "grad_norm": 1.15625, "learning_rate": 0.000377038769014477, "loss": 0.2111, "step": 239320 }, { "epoch": 9.91, "grad_norm": 0.55859375, "learning_rate": 0.00037702942830181754, "loss": 0.1936, "step": 239330 }, { "epoch": 9.91, "grad_norm": 0.68359375, "learning_rate": 0.00037702008735010257, "loss": 0.186, "step": 239340 }, { "epoch": 9.91, "grad_norm": 1.2265625, "learning_rate": 0.0003770107461593494, "loss": 0.2188, "step": 239350 }, { "epoch": 9.91, "grad_norm": 0.5625, "learning_rate": 0.00037700140472957577, "loss": 0.2268, "step": 239360 }, { "epoch": 9.91, "grad_norm": 0.466796875, "learning_rate": 0.0003769920630607993, "loss": 0.1851, "step": 239370 }, { "epoch": 9.92, "grad_norm": 0.498046875, "learning_rate": 0.00037698272115303734, "loss": 0.1782, "step": 239380 }, { "epoch": 9.92, "grad_norm": 0.8515625, "learning_rate": 0.0003769733790063077, "loss": 0.2121, "step": 239390 }, { "epoch": 9.92, "grad_norm": 0.65234375, "learning_rate": 0.00037696403662062784, "loss": 0.2397, "step": 239400 }, { "epoch": 9.92, "grad_norm": 0.55859375, "learning_rate": 0.0003769546939960153, "loss": 0.2027, "step": 239410 }, { "epoch": 9.92, "grad_norm": 0.95703125, "learning_rate": 0.00037694535113248794, "loss": 0.2357, "step": 239420 }, { "epoch": 9.92, "grad_norm": 1.3125, "learning_rate": 0.000376936008030063, "loss": 0.211, "step": 239430 }, { "epoch": 9.92, "grad_norm": 0.65625, "learning_rate": 0.00037692666468875817, "loss": 0.1971, "step": 239440 }, { "epoch": 9.92, "grad_norm": 0.86328125, "learning_rate": 0.00037691732110859116, "loss": 0.195, "step": 239450 }, { "epoch": 9.92, "grad_norm": 0.69921875, "learning_rate": 0.0003769079772895794, "loss": 0.1441, "step": 239460 }, { "epoch": 9.92, "grad_norm": 1.0390625, "learning_rate": 0.00037689863323174056, "loss": 0.2012, "step": 239470 }, { "epoch": 9.92, "grad_norm": 1.0546875, "learning_rate": 0.0003768892889350921, "loss": 0.1896, "step": 239480 }, { "epoch": 9.92, "grad_norm": 0.69921875, "learning_rate": 0.0003768799443996518, "loss": 0.2132, "step": 239490 }, { "epoch": 9.92, "grad_norm": 0.69140625, "learning_rate": 0.0003768705996254371, "loss": 0.195, "step": 239500 }, { "epoch": 9.92, "grad_norm": 1.0859375, "learning_rate": 0.0003768612546124657, "loss": 0.2089, "step": 239510 }, { "epoch": 9.92, "grad_norm": 0.8515625, "learning_rate": 0.0003768519093607551, "loss": 0.2208, "step": 239520 }, { "epoch": 9.92, "grad_norm": 0.4609375, "learning_rate": 0.0003768425638703229, "loss": 0.2188, "step": 239530 }, { "epoch": 9.92, "grad_norm": 0.9765625, "learning_rate": 0.0003768332181411866, "loss": 0.1935, "step": 239540 }, { "epoch": 9.92, "grad_norm": 0.416015625, "learning_rate": 0.000376823872173364, "loss": 0.189, "step": 239550 }, { "epoch": 9.92, "grad_norm": 0.69140625, "learning_rate": 0.0003768145259668725, "loss": 0.2148, "step": 239560 }, { "epoch": 9.92, "grad_norm": 0.447265625, "learning_rate": 0.00037680517952172975, "loss": 0.2572, "step": 239570 }, { "epoch": 9.92, "grad_norm": 0.5390625, "learning_rate": 0.00037679583283795336, "loss": 0.1712, "step": 239580 }, { "epoch": 9.92, "grad_norm": 0.87890625, "learning_rate": 0.000376786485915561, "loss": 0.1643, "step": 239590 }, { "epoch": 9.92, "grad_norm": 0.66796875, "learning_rate": 0.00037677713875456997, "loss": 0.2031, "step": 239600 }, { "epoch": 9.92, "grad_norm": 0.66015625, "learning_rate": 0.00037676779135499826, "loss": 0.2206, "step": 239610 }, { "epoch": 9.93, "grad_norm": 0.6875, "learning_rate": 0.0003767584437168631, "loss": 0.2052, "step": 239620 }, { "epoch": 9.93, "grad_norm": 0.921875, "learning_rate": 0.00037674909584018237, "loss": 0.2221, "step": 239630 }, { "epoch": 9.93, "grad_norm": 0.5234375, "learning_rate": 0.0003767397477249734, "loss": 0.2231, "step": 239640 }, { "epoch": 9.93, "grad_norm": 1.0078125, "learning_rate": 0.000376730399371254, "loss": 0.2331, "step": 239650 }, { "epoch": 9.93, "grad_norm": 0.7890625, "learning_rate": 0.00037672105077904167, "loss": 0.1697, "step": 239660 }, { "epoch": 9.93, "grad_norm": 0.375, "learning_rate": 0.000376711701948354, "loss": 0.2041, "step": 239670 }, { "epoch": 9.93, "grad_norm": 1.2421875, "learning_rate": 0.0003767023528792085, "loss": 0.229, "step": 239680 }, { "epoch": 9.93, "grad_norm": 1.0703125, "learning_rate": 0.000376693003571623, "loss": 0.2324, "step": 239690 }, { "epoch": 9.93, "grad_norm": 0.00732421875, "learning_rate": 0.0003766836540256148, "loss": 0.1834, "step": 239700 }, { "epoch": 9.93, "grad_norm": 0.63671875, "learning_rate": 0.00037667430424120175, "loss": 0.1526, "step": 239710 }, { "epoch": 9.93, "grad_norm": 0.74609375, "learning_rate": 0.0003766649542184013, "loss": 0.2097, "step": 239720 }, { "epoch": 9.93, "grad_norm": 0.64453125, "learning_rate": 0.00037665560395723106, "loss": 0.1865, "step": 239730 }, { "epoch": 9.93, "grad_norm": 0.515625, "learning_rate": 0.00037664625345770877, "loss": 0.1679, "step": 239740 }, { "epoch": 9.93, "grad_norm": 0.55859375, "learning_rate": 0.00037663690271985185, "loss": 0.2098, "step": 239750 }, { "epoch": 9.93, "grad_norm": 1.375, "learning_rate": 0.0003766275517436779, "loss": 0.2157, "step": 239760 }, { "epoch": 9.93, "grad_norm": 1.7109375, "learning_rate": 0.0003766182005292046, "loss": 0.1698, "step": 239770 }, { "epoch": 9.93, "grad_norm": 0.921875, "learning_rate": 0.0003766088490764495, "loss": 0.1971, "step": 239780 }, { "epoch": 9.93, "grad_norm": 0.48828125, "learning_rate": 0.00037659949738543024, "loss": 0.1774, "step": 239790 }, { "epoch": 9.93, "grad_norm": 0.78515625, "learning_rate": 0.0003765901454561644, "loss": 0.2173, "step": 239800 }, { "epoch": 9.93, "grad_norm": 0.984375, "learning_rate": 0.00037658079328866957, "loss": 0.2503, "step": 239810 }, { "epoch": 9.93, "grad_norm": 1.5234375, "learning_rate": 0.0003765714408829634, "loss": 0.2061, "step": 239820 }, { "epoch": 9.93, "grad_norm": 0.3125, "learning_rate": 0.0003765620882390634, "loss": 0.2594, "step": 239830 }, { "epoch": 9.93, "grad_norm": 0.6171875, "learning_rate": 0.0003765527353569872, "loss": 0.2161, "step": 239840 }, { "epoch": 9.93, "grad_norm": 0.578125, "learning_rate": 0.0003765433822367525, "loss": 0.1613, "step": 239850 }, { "epoch": 9.93, "grad_norm": 0.55859375, "learning_rate": 0.0003765340288783767, "loss": 0.196, "step": 239860 }, { "epoch": 9.94, "grad_norm": 0.83984375, "learning_rate": 0.0003765246752818776, "loss": 0.2344, "step": 239870 }, { "epoch": 9.94, "grad_norm": 1.1953125, "learning_rate": 0.0003765153214472727, "loss": 0.1909, "step": 239880 }, { "epoch": 9.94, "grad_norm": 0.89453125, "learning_rate": 0.00037650596737457965, "loss": 0.2352, "step": 239890 }, { "epoch": 9.94, "grad_norm": 1.015625, "learning_rate": 0.00037649661306381606, "loss": 0.1948, "step": 239900 }, { "epoch": 9.94, "grad_norm": 0.76953125, "learning_rate": 0.00037648725851499945, "loss": 0.1841, "step": 239910 }, { "epoch": 9.94, "grad_norm": 0.69140625, "learning_rate": 0.0003764779037281475, "loss": 0.2168, "step": 239920 }, { "epoch": 9.94, "grad_norm": 2.125, "learning_rate": 0.0003764685487032777, "loss": 0.215, "step": 239930 }, { "epoch": 9.94, "grad_norm": 2.890625, "learning_rate": 0.0003764591934404079, "loss": 0.2272, "step": 239940 }, { "epoch": 9.94, "grad_norm": 0.345703125, "learning_rate": 0.0003764498379395555, "loss": 0.1679, "step": 239950 }, { "epoch": 9.94, "grad_norm": 0.76171875, "learning_rate": 0.00037644048220073813, "loss": 0.1565, "step": 239960 }, { "epoch": 9.94, "grad_norm": 0.5859375, "learning_rate": 0.0003764311262239735, "loss": 0.1476, "step": 239970 }, { "epoch": 9.94, "grad_norm": 1.125, "learning_rate": 0.0003764217700092791, "loss": 0.1718, "step": 239980 }, { "epoch": 9.94, "grad_norm": 0.8515625, "learning_rate": 0.0003764124135566725, "loss": 0.2323, "step": 239990 }, { "epoch": 9.94, "grad_norm": 0.10595703125, "learning_rate": 0.0003764030568661715, "loss": 0.2202, "step": 240000 }, { "epoch": 9.94, "grad_norm": 0.478515625, "learning_rate": 0.00037639369993779353, "loss": 0.2207, "step": 240010 }, { "epoch": 9.94, "grad_norm": 0.294921875, "learning_rate": 0.0003763843427715563, "loss": 0.1644, "step": 240020 }, { "epoch": 9.94, "grad_norm": 1.5390625, "learning_rate": 0.0003763749853674774, "loss": 0.1996, "step": 240030 }, { "epoch": 9.94, "grad_norm": 0.81640625, "learning_rate": 0.00037636562772557435, "loss": 0.1976, "step": 240040 }, { "epoch": 9.94, "grad_norm": 0.55859375, "learning_rate": 0.0003763562698458649, "loss": 0.1931, "step": 240050 }, { "epoch": 9.94, "grad_norm": 1.328125, "learning_rate": 0.0003763469117283666, "loss": 0.2063, "step": 240060 }, { "epoch": 9.94, "grad_norm": 1.109375, "learning_rate": 0.00037633755337309705, "loss": 0.1829, "step": 240070 }, { "epoch": 9.94, "grad_norm": 0.333984375, "learning_rate": 0.00037632819478007384, "loss": 0.2005, "step": 240080 }, { "epoch": 9.94, "grad_norm": 0.7890625, "learning_rate": 0.00037631883594931465, "loss": 0.2077, "step": 240090 }, { "epoch": 9.94, "grad_norm": 0.4140625, "learning_rate": 0.00037630947688083703, "loss": 0.2317, "step": 240100 }, { "epoch": 9.95, "grad_norm": 0.5078125, "learning_rate": 0.00037630011757465865, "loss": 0.1864, "step": 240110 }, { "epoch": 9.95, "grad_norm": 0.333984375, "learning_rate": 0.00037629075803079703, "loss": 0.1822, "step": 240120 }, { "epoch": 9.95, "grad_norm": 0.0, "learning_rate": 0.0003762813982492699, "loss": 0.2039, "step": 240130 }, { "epoch": 9.95, "grad_norm": 0.447265625, "learning_rate": 0.0003762720382300947, "loss": 0.2347, "step": 240140 }, { "epoch": 9.95, "grad_norm": 0.66796875, "learning_rate": 0.00037626267797328925, "loss": 0.1452, "step": 240150 }, { "epoch": 9.95, "grad_norm": 0.78515625, "learning_rate": 0.00037625331747887116, "loss": 0.1792, "step": 240160 }, { "epoch": 9.95, "grad_norm": 0.6640625, "learning_rate": 0.00037624395674685785, "loss": 0.2051, "step": 240170 }, { "epoch": 9.95, "grad_norm": 0.63671875, "learning_rate": 0.000376234595777267, "loss": 0.2141, "step": 240180 }, { "epoch": 9.95, "grad_norm": 0.66015625, "learning_rate": 0.00037622523457011637, "loss": 0.2024, "step": 240190 }, { "epoch": 9.95, "grad_norm": 0.66015625, "learning_rate": 0.00037621587312542347, "loss": 0.1749, "step": 240200 }, { "epoch": 9.95, "grad_norm": 0.6171875, "learning_rate": 0.00037620651144320584, "loss": 0.1749, "step": 240210 }, { "epoch": 9.95, "grad_norm": 1.1484375, "learning_rate": 0.00037619714952348126, "loss": 0.215, "step": 240220 }, { "epoch": 9.95, "grad_norm": 1.7265625, "learning_rate": 0.00037618778736626723, "loss": 0.1977, "step": 240230 }, { "epoch": 9.95, "grad_norm": 0.1806640625, "learning_rate": 0.0003761784249715815, "loss": 0.1742, "step": 240240 }, { "epoch": 9.95, "grad_norm": 0.60546875, "learning_rate": 0.00037616906233944153, "loss": 0.1676, "step": 240250 }, { "epoch": 9.95, "grad_norm": 0.55859375, "learning_rate": 0.00037615969946986505, "loss": 0.1501, "step": 240260 }, { "epoch": 9.95, "grad_norm": 0.94140625, "learning_rate": 0.00037615033636286953, "loss": 0.2202, "step": 240270 }, { "epoch": 9.95, "grad_norm": 0.146484375, "learning_rate": 0.00037614097301847283, "loss": 0.1994, "step": 240280 }, { "epoch": 9.95, "grad_norm": 0.71484375, "learning_rate": 0.00037613160943669244, "loss": 0.1985, "step": 240290 }, { "epoch": 9.95, "grad_norm": 0.396484375, "learning_rate": 0.00037612224561754594, "loss": 0.1736, "step": 240300 }, { "epoch": 9.95, "grad_norm": 0.56640625, "learning_rate": 0.00037611288156105094, "loss": 0.1943, "step": 240310 }, { "epoch": 9.95, "grad_norm": 0.65625, "learning_rate": 0.0003761035172672252, "loss": 0.1995, "step": 240320 }, { "epoch": 9.95, "grad_norm": 0.9765625, "learning_rate": 0.0003760941527360862, "loss": 0.1618, "step": 240330 }, { "epoch": 9.95, "grad_norm": 0.8671875, "learning_rate": 0.0003760847879676517, "loss": 0.2137, "step": 240340 }, { "epoch": 9.96, "grad_norm": 0.8984375, "learning_rate": 0.0003760754229619391, "loss": 0.212, "step": 240350 }, { "epoch": 9.96, "grad_norm": 0.83984375, "learning_rate": 0.0003760660577189663, "loss": 0.1845, "step": 240360 }, { "epoch": 9.96, "grad_norm": 0.73828125, "learning_rate": 0.00037605669223875074, "loss": 0.2105, "step": 240370 }, { "epoch": 9.96, "grad_norm": 0.58984375, "learning_rate": 0.0003760473265213101, "loss": 0.1757, "step": 240380 }, { "epoch": 9.96, "grad_norm": 1.0390625, "learning_rate": 0.000376037960566662, "loss": 0.1769, "step": 240390 }, { "epoch": 9.96, "grad_norm": 1.015625, "learning_rate": 0.00037602859437482414, "loss": 0.2149, "step": 240400 }, { "epoch": 9.96, "grad_norm": 0.92578125, "learning_rate": 0.000376019227945814, "loss": 0.1809, "step": 240410 }, { "epoch": 9.96, "grad_norm": 0.62890625, "learning_rate": 0.00037600986127964934, "loss": 0.1546, "step": 240420 }, { "epoch": 9.96, "grad_norm": 0.66796875, "learning_rate": 0.00037600049437634767, "loss": 0.1845, "step": 240430 }, { "epoch": 9.96, "grad_norm": 1.09375, "learning_rate": 0.00037599112723592665, "loss": 0.221, "step": 240440 }, { "epoch": 9.96, "grad_norm": 0.40234375, "learning_rate": 0.000375981759858404, "loss": 0.2057, "step": 240450 }, { "epoch": 9.96, "grad_norm": 0.93359375, "learning_rate": 0.00037597239224379725, "loss": 0.1705, "step": 240460 }, { "epoch": 9.96, "grad_norm": 0.703125, "learning_rate": 0.00037596302439212405, "loss": 0.2079, "step": 240470 }, { "epoch": 9.96, "grad_norm": 0.5234375, "learning_rate": 0.0003759536563034021, "loss": 0.1883, "step": 240480 }, { "epoch": 9.96, "grad_norm": 0.40234375, "learning_rate": 0.0003759442879776489, "loss": 0.1987, "step": 240490 }, { "epoch": 9.96, "grad_norm": 0.96875, "learning_rate": 0.00037593491941488225, "loss": 0.1781, "step": 240500 }, { "epoch": 9.96, "grad_norm": 0.357421875, "learning_rate": 0.00037592555061511957, "loss": 0.1918, "step": 240510 }, { "epoch": 9.96, "grad_norm": 0.61328125, "learning_rate": 0.0003759161815783787, "loss": 0.21, "step": 240520 }, { "epoch": 9.96, "grad_norm": 0.63671875, "learning_rate": 0.0003759068123046771, "loss": 0.178, "step": 240530 }, { "epoch": 9.96, "grad_norm": 1.046875, "learning_rate": 0.0003758974427940325, "loss": 0.2582, "step": 240540 }, { "epoch": 9.96, "grad_norm": 0.94140625, "learning_rate": 0.00037588807304646244, "loss": 0.1565, "step": 240550 }, { "epoch": 9.96, "grad_norm": 0.89453125, "learning_rate": 0.00037587870306198467, "loss": 0.1737, "step": 240560 }, { "epoch": 9.96, "grad_norm": 0.72265625, "learning_rate": 0.00037586933284061686, "loss": 0.2066, "step": 240570 }, { "epoch": 9.96, "grad_norm": 0.44921875, "learning_rate": 0.0003758599623823765, "loss": 0.1778, "step": 240580 }, { "epoch": 9.97, "grad_norm": 0.59375, "learning_rate": 0.0003758505916872813, "loss": 0.1867, "step": 240590 }, { "epoch": 9.97, "grad_norm": 1.1171875, "learning_rate": 0.00037584122075534886, "loss": 0.1987, "step": 240600 }, { "epoch": 9.97, "grad_norm": 0.427734375, "learning_rate": 0.0003758318495865968, "loss": 0.1931, "step": 240610 }, { "epoch": 9.97, "grad_norm": 0.365234375, "learning_rate": 0.0003758224781810428, "loss": 0.177, "step": 240620 }, { "epoch": 9.97, "grad_norm": 1.75, "learning_rate": 0.00037581310653870447, "loss": 0.2217, "step": 240630 }, { "epoch": 9.97, "grad_norm": 1.0, "learning_rate": 0.0003758037346595995, "loss": 0.1692, "step": 240640 }, { "epoch": 9.97, "grad_norm": 0.6640625, "learning_rate": 0.0003757943625437455, "loss": 0.1904, "step": 240650 }, { "epoch": 9.97, "grad_norm": 0.6875, "learning_rate": 0.00037578499019116007, "loss": 0.1785, "step": 240660 }, { "epoch": 9.97, "grad_norm": 0.66015625, "learning_rate": 0.00037577561760186083, "loss": 0.2368, "step": 240670 }, { "epoch": 9.97, "grad_norm": 0.52734375, "learning_rate": 0.0003757662447758655, "loss": 0.2024, "step": 240680 }, { "epoch": 9.97, "grad_norm": 0.91796875, "learning_rate": 0.0003757568717131917, "loss": 0.1751, "step": 240690 }, { "epoch": 9.97, "grad_norm": 0.95703125, "learning_rate": 0.000375747498413857, "loss": 0.1844, "step": 240700 }, { "epoch": 9.97, "grad_norm": 0.65625, "learning_rate": 0.00037573812487787917, "loss": 0.1993, "step": 240710 }, { "epoch": 9.97, "grad_norm": 0.8359375, "learning_rate": 0.0003757287511052757, "loss": 0.2338, "step": 240720 }, { "epoch": 9.97, "grad_norm": 0.8671875, "learning_rate": 0.0003757193770960643, "loss": 0.2083, "step": 240730 }, { "epoch": 9.97, "grad_norm": 0.333984375, "learning_rate": 0.00037571000285026263, "loss": 0.2007, "step": 240740 }, { "epoch": 9.97, "grad_norm": 0.9609375, "learning_rate": 0.00037570062836788835, "loss": 0.1812, "step": 240750 }, { "epoch": 9.97, "grad_norm": 0.29296875, "learning_rate": 0.000375691253648959, "loss": 0.2272, "step": 240760 }, { "epoch": 9.97, "grad_norm": 0.61328125, "learning_rate": 0.00037568187869349226, "loss": 0.2029, "step": 240770 }, { "epoch": 9.97, "grad_norm": 1.03125, "learning_rate": 0.00037567250350150586, "loss": 0.1515, "step": 240780 }, { "epoch": 9.97, "grad_norm": 0.6953125, "learning_rate": 0.0003756631280730173, "loss": 0.1986, "step": 240790 }, { "epoch": 9.97, "grad_norm": 0.5703125, "learning_rate": 0.00037565375240804433, "loss": 0.1783, "step": 240800 }, { "epoch": 9.97, "grad_norm": 1.6796875, "learning_rate": 0.00037564437650660466, "loss": 0.166, "step": 240810 }, { "epoch": 9.97, "grad_norm": 0.67578125, "learning_rate": 0.00037563500036871567, "loss": 0.1794, "step": 240820 }, { "epoch": 9.98, "grad_norm": 0.70703125, "learning_rate": 0.00037562562399439536, "loss": 0.1965, "step": 240830 }, { "epoch": 9.98, "grad_norm": 1.2421875, "learning_rate": 0.00037561624738366114, "loss": 0.1804, "step": 240840 }, { "epoch": 9.98, "grad_norm": 0.431640625, "learning_rate": 0.0003756068705365306, "loss": 0.1896, "step": 240850 }, { "epoch": 9.98, "grad_norm": 0.85546875, "learning_rate": 0.00037559749345302164, "loss": 0.191, "step": 240860 }, { "epoch": 9.98, "grad_norm": 0.5703125, "learning_rate": 0.0003755881161331517, "loss": 0.1795, "step": 240870 }, { "epoch": 9.98, "grad_norm": 0.5703125, "learning_rate": 0.0003755787385769385, "loss": 0.2225, "step": 240880 }, { "epoch": 9.98, "grad_norm": 1.0390625, "learning_rate": 0.0003755693607843996, "loss": 0.2116, "step": 240890 }, { "epoch": 9.98, "grad_norm": 0.55859375, "learning_rate": 0.00037555998275555275, "loss": 0.1931, "step": 240900 }, { "epoch": 9.98, "grad_norm": 0.45703125, "learning_rate": 0.00037555060449041567, "loss": 0.2225, "step": 240910 }, { "epoch": 9.98, "grad_norm": 0.5546875, "learning_rate": 0.00037554122598900584, "loss": 0.2185, "step": 240920 }, { "epoch": 9.98, "grad_norm": 0.8828125, "learning_rate": 0.00037553184725134094, "loss": 0.1738, "step": 240930 }, { "epoch": 9.98, "grad_norm": 1.234375, "learning_rate": 0.00037552246827743873, "loss": 0.1735, "step": 240940 }, { "epoch": 9.98, "grad_norm": 0.38671875, "learning_rate": 0.00037551308906731674, "loss": 0.2615, "step": 240950 }, { "epoch": 9.98, "grad_norm": 1.125, "learning_rate": 0.0003755037096209927, "loss": 0.2118, "step": 240960 }, { "epoch": 9.98, "grad_norm": 0.65625, "learning_rate": 0.00037549432993848434, "loss": 0.1859, "step": 240970 }, { "epoch": 9.98, "grad_norm": 1.6640625, "learning_rate": 0.00037548495001980895, "loss": 0.2083, "step": 240980 }, { "epoch": 9.98, "grad_norm": 1.125, "learning_rate": 0.00037547556986498463, "loss": 0.2163, "step": 240990 }, { "epoch": 9.98, "grad_norm": 2.140625, "learning_rate": 0.0003754661894740288, "loss": 0.1884, "step": 241000 }, { "epoch": 9.98, "grad_norm": 1.0234375, "learning_rate": 0.0003754568088469591, "loss": 0.2071, "step": 241010 }, { "epoch": 9.98, "grad_norm": 1.1875, "learning_rate": 0.00037544742798379326, "loss": 0.1536, "step": 241020 }, { "epoch": 9.98, "grad_norm": 1.8359375, "learning_rate": 0.00037543804688454894, "loss": 0.1694, "step": 241030 }, { "epoch": 9.98, "grad_norm": 1.328125, "learning_rate": 0.0003754286655492437, "loss": 0.1701, "step": 241040 }, { "epoch": 9.98, "grad_norm": 0.64453125, "learning_rate": 0.0003754192839778954, "loss": 0.2163, "step": 241050 }, { "epoch": 9.98, "grad_norm": 0.50390625, "learning_rate": 0.00037540990217052134, "loss": 0.1796, "step": 241060 }, { "epoch": 9.99, "grad_norm": 1.171875, "learning_rate": 0.00037540052012713956, "loss": 0.198, "step": 241070 }, { "epoch": 9.99, "grad_norm": 0.7734375, "learning_rate": 0.0003753911378477675, "loss": 0.2127, "step": 241080 }, { "epoch": 9.99, "grad_norm": 0.546875, "learning_rate": 0.00037538175533242274, "loss": 0.1964, "step": 241090 }, { "epoch": 9.99, "grad_norm": 0.5859375, "learning_rate": 0.00037537237258112324, "loss": 0.2588, "step": 241100 }, { "epoch": 9.99, "grad_norm": 0.69921875, "learning_rate": 0.0003753629895938864, "loss": 0.2043, "step": 241110 }, { "epoch": 9.99, "grad_norm": 0.431640625, "learning_rate": 0.00037535360637072987, "loss": 0.1972, "step": 241120 }, { "epoch": 9.99, "grad_norm": 0.4921875, "learning_rate": 0.0003753442229116715, "loss": 0.2326, "step": 241130 }, { "epoch": 9.99, "grad_norm": 1.5703125, "learning_rate": 0.0003753348392167287, "loss": 0.1884, "step": 241140 }, { "epoch": 9.99, "grad_norm": 1.171875, "learning_rate": 0.00037532545528591945, "loss": 0.2789, "step": 241150 }, { "epoch": 9.99, "grad_norm": 0.64453125, "learning_rate": 0.00037531607111926114, "loss": 0.1784, "step": 241160 }, { "epoch": 9.99, "grad_norm": 0.5234375, "learning_rate": 0.0003753066867167715, "loss": 0.1502, "step": 241170 }, { "epoch": 9.99, "grad_norm": 0.0, "learning_rate": 0.0003752973020784682, "loss": 0.2215, "step": 241180 }, { "epoch": 9.99, "grad_norm": 0.40625, "learning_rate": 0.00037528791720436896, "loss": 0.2479, "step": 241190 }, { "epoch": 9.99, "grad_norm": 1.03125, "learning_rate": 0.00037527853209449135, "loss": 0.1611, "step": 241200 }, { "epoch": 9.99, "grad_norm": 0.90234375, "learning_rate": 0.0003752691467488531, "loss": 0.251, "step": 241210 }, { "epoch": 9.99, "grad_norm": 1.8125, "learning_rate": 0.00037525976116747174, "loss": 0.2066, "step": 241220 }, { "epoch": 9.99, "grad_norm": 0.62109375, "learning_rate": 0.0003752503753503652, "loss": 0.2145, "step": 241230 }, { "epoch": 9.99, "grad_norm": 0.8203125, "learning_rate": 0.0003752409892975509, "loss": 0.1815, "step": 241240 }, { "epoch": 9.99, "grad_norm": 0.423828125, "learning_rate": 0.00037523160300904657, "loss": 0.1361, "step": 241250 }, { "epoch": 9.99, "grad_norm": 0.69921875, "learning_rate": 0.00037522221648486994, "loss": 0.1711, "step": 241260 }, { "epoch": 9.99, "grad_norm": 0.63671875, "learning_rate": 0.00037521282972503855, "loss": 0.1749, "step": 241270 }, { "epoch": 9.99, "grad_norm": 1.0, "learning_rate": 0.0003752034427295702, "loss": 0.2631, "step": 241280 }, { "epoch": 9.99, "grad_norm": 1.0390625, "learning_rate": 0.0003751940554984825, "loss": 0.1567, "step": 241290 }, { "epoch": 9.99, "grad_norm": 0.3671875, "learning_rate": 0.00037518466803179295, "loss": 0.1664, "step": 241300 }, { "epoch": 10.0, "grad_norm": 0.60546875, "learning_rate": 0.0003751752803295195, "loss": 0.1916, "step": 241310 }, { "epoch": 10.0, "grad_norm": 0.5625, "learning_rate": 0.00037516589239167976, "loss": 0.1971, "step": 241320 }, { "epoch": 10.0, "grad_norm": 0.65234375, "learning_rate": 0.00037515650421829117, "loss": 0.2227, "step": 241330 }, { "epoch": 10.0, "grad_norm": 0.0, "learning_rate": 0.00037514711580937174, "loss": 0.2301, "step": 241340 }, { "epoch": 10.0, "grad_norm": 0.625, "learning_rate": 0.00037513772716493883, "loss": 0.1795, "step": 241350 }, { "epoch": 10.0, "grad_norm": 0.443359375, "learning_rate": 0.00037512833828501026, "loss": 0.2143, "step": 241360 }, { "epoch": 10.0, "grad_norm": 1.0546875, "learning_rate": 0.00037511894916960367, "loss": 0.2047, "step": 241370 }, { "epoch": 10.0, "grad_norm": 0.578125, "learning_rate": 0.0003751095598187366, "loss": 0.1884, "step": 241380 }, { "epoch": 10.0, "grad_norm": 0.53515625, "learning_rate": 0.000375100170232427, "loss": 0.2009, "step": 241390 }, { "epoch": 10.0, "grad_norm": 0.56640625, "learning_rate": 0.0003750907804106924, "loss": 0.2216, "step": 241400 }, { "epoch": 10.0, "grad_norm": 1.1171875, "learning_rate": 0.00037508139035355036, "loss": 0.2359, "step": 241410 }, { "epoch": 10.0, "grad_norm": 0.474609375, "learning_rate": 0.00037507200006101885, "loss": 0.1836, "step": 241420 }, { "epoch": 10.0, "grad_norm": 0.87890625, "learning_rate": 0.0003750626095331151, "loss": 0.201, "step": 241430 }, { "epoch": 10.0, "grad_norm": 1.3671875, "learning_rate": 0.00037505321876985713, "loss": 0.2095, "step": 241440 }, { "epoch": 10.0, "grad_norm": 0.5234375, "learning_rate": 0.0003750438277712625, "loss": 0.1954, "step": 241450 }, { "epoch": 10.0, "grad_norm": 0.498046875, "learning_rate": 0.0003750344365373489, "loss": 0.2054, "step": 241460 }, { "epoch": 10.0, "grad_norm": 0.94921875, "learning_rate": 0.00037502504506813403, "loss": 0.1914, "step": 241470 }, { "epoch": 10.0, "grad_norm": 2.09375, "learning_rate": 0.0003750156533636354, "loss": 0.1443, "step": 241480 }, { "epoch": 10.0, "grad_norm": 1.203125, "learning_rate": 0.00037500626142387093, "loss": 0.1577, "step": 241490 }, { "epoch": 10.0, "grad_norm": 1.0625, "learning_rate": 0.00037499686924885817, "loss": 0.169, "step": 241500 }, { "epoch": 10.0, "grad_norm": 0.279296875, "learning_rate": 0.0003749874768386148, "loss": 0.1251, "step": 241510 }, { "epoch": 10.0, "grad_norm": 1.3203125, "learning_rate": 0.00037497808419315845, "loss": 0.2339, "step": 241520 }, { "epoch": 10.0, "grad_norm": 0.765625, "learning_rate": 0.00037496869131250685, "loss": 0.2122, "step": 241530 }, { "epoch": 10.0, "grad_norm": 0.9921875, "learning_rate": 0.00037495929819667765, "loss": 0.238, "step": 241540 }, { "epoch": 10.0, "grad_norm": 1.0703125, "learning_rate": 0.0003749499048456886, "loss": 0.2373, "step": 241550 }, { "epoch": 10.01, "grad_norm": 0.88671875, "learning_rate": 0.00037494051125955724, "loss": 0.2453, "step": 241560 }, { "epoch": 10.01, "grad_norm": 0.92578125, "learning_rate": 0.0003749311174383014, "loss": 0.1923, "step": 241570 }, { "epoch": 10.01, "grad_norm": 0.0, "learning_rate": 0.0003749217233819386, "loss": 0.1599, "step": 241580 }, { "epoch": 10.01, "grad_norm": 0.66015625, "learning_rate": 0.00037491232909048677, "loss": 0.2493, "step": 241590 }, { "epoch": 10.01, "grad_norm": 0.33984375, "learning_rate": 0.0003749029345639633, "loss": 0.244, "step": 241600 }, { "epoch": 10.01, "grad_norm": 0.6875, "learning_rate": 0.00037489353980238606, "loss": 0.2056, "step": 241610 }, { "epoch": 10.01, "grad_norm": 1.3984375, "learning_rate": 0.0003748841448057726, "loss": 0.192, "step": 241620 }, { "epoch": 10.01, "grad_norm": 0.88671875, "learning_rate": 0.00037487474957414074, "loss": 0.1477, "step": 241630 }, { "epoch": 10.01, "grad_norm": 0.734375, "learning_rate": 0.00037486535410750807, "loss": 0.2138, "step": 241640 }, { "epoch": 10.01, "grad_norm": 0.78125, "learning_rate": 0.0003748559584058922, "loss": 0.1817, "step": 241650 }, { "epoch": 10.01, "grad_norm": 0.78125, "learning_rate": 0.000374846562469311, "loss": 0.1616, "step": 241660 }, { "epoch": 10.01, "grad_norm": 0.6796875, "learning_rate": 0.000374837166297782, "loss": 0.1699, "step": 241670 }, { "epoch": 10.01, "grad_norm": 1.125, "learning_rate": 0.00037482776989132294, "loss": 0.2342, "step": 241680 }, { "epoch": 10.01, "grad_norm": 1.453125, "learning_rate": 0.00037481837324995147, "loss": 0.1946, "step": 241690 }, { "epoch": 10.01, "grad_norm": 1.0, "learning_rate": 0.00037480897637368537, "loss": 0.2049, "step": 241700 }, { "epoch": 10.01, "grad_norm": 0.69921875, "learning_rate": 0.00037479957926254227, "loss": 0.1842, "step": 241710 }, { "epoch": 10.01, "grad_norm": 0.427734375, "learning_rate": 0.0003747901819165398, "loss": 0.1917, "step": 241720 }, { "epoch": 10.01, "grad_norm": 1.3203125, "learning_rate": 0.00037478078433569563, "loss": 0.1356, "step": 241730 }, { "epoch": 10.01, "grad_norm": 1.1640625, "learning_rate": 0.00037477138652002755, "loss": 0.1691, "step": 241740 }, { "epoch": 10.01, "grad_norm": 0.96875, "learning_rate": 0.0003747619884695532, "loss": 0.2296, "step": 241750 }, { "epoch": 10.01, "grad_norm": 0.9609375, "learning_rate": 0.0003747525901842903, "loss": 0.1752, "step": 241760 }, { "epoch": 10.01, "grad_norm": 2.765625, "learning_rate": 0.00037474319166425643, "loss": 0.2015, "step": 241770 }, { "epoch": 10.01, "grad_norm": 1.0703125, "learning_rate": 0.00037473379290946934, "loss": 0.2167, "step": 241780 }, { "epoch": 10.01, "grad_norm": 0.447265625, "learning_rate": 0.00037472439391994674, "loss": 0.1617, "step": 241790 }, { "epoch": 10.02, "grad_norm": 1.1484375, "learning_rate": 0.00037471499469570634, "loss": 0.1734, "step": 241800 }, { "epoch": 10.02, "grad_norm": 1.9609375, "learning_rate": 0.0003747055952367657, "loss": 0.1843, "step": 241810 }, { "epoch": 10.02, "grad_norm": 0.69140625, "learning_rate": 0.00037469619554314265, "loss": 0.2108, "step": 241820 }, { "epoch": 10.02, "grad_norm": 1.0546875, "learning_rate": 0.00037468679561485487, "loss": 0.2011, "step": 241830 }, { "epoch": 10.02, "grad_norm": 0.5859375, "learning_rate": 0.00037467739545192, "loss": 0.1769, "step": 241840 }, { "epoch": 10.02, "grad_norm": 0.91015625, "learning_rate": 0.00037466799505435567, "loss": 0.228, "step": 241850 }, { "epoch": 10.02, "grad_norm": 1.078125, "learning_rate": 0.00037465859442217965, "loss": 0.2195, "step": 241860 }, { "epoch": 10.02, "grad_norm": 1.5078125, "learning_rate": 0.0003746491935554096, "loss": 0.2212, "step": 241870 }, { "epoch": 10.02, "grad_norm": 0.57421875, "learning_rate": 0.0003746397924540633, "loss": 0.1972, "step": 241880 }, { "epoch": 10.02, "grad_norm": 0.69921875, "learning_rate": 0.00037463039111815835, "loss": 0.1939, "step": 241890 }, { "epoch": 10.02, "grad_norm": 0.52734375, "learning_rate": 0.0003746209895477124, "loss": 0.1977, "step": 241900 }, { "epoch": 10.02, "grad_norm": 0.318359375, "learning_rate": 0.0003746115877427432, "loss": 0.1954, "step": 241910 }, { "epoch": 10.02, "grad_norm": 1.484375, "learning_rate": 0.00037460218570326855, "loss": 0.2015, "step": 241920 }, { "epoch": 10.02, "grad_norm": 0.328125, "learning_rate": 0.00037459278342930595, "loss": 0.213, "step": 241930 }, { "epoch": 10.02, "grad_norm": 0.76953125, "learning_rate": 0.00037458338092087324, "loss": 0.1818, "step": 241940 }, { "epoch": 10.02, "grad_norm": 0.65625, "learning_rate": 0.00037457397817798805, "loss": 0.1395, "step": 241950 }, { "epoch": 10.02, "grad_norm": 0.67578125, "learning_rate": 0.0003745645752006681, "loss": 0.1606, "step": 241960 }, { "epoch": 10.02, "grad_norm": 0.76953125, "learning_rate": 0.00037455517198893107, "loss": 0.1995, "step": 241970 }, { "epoch": 10.02, "grad_norm": 2.203125, "learning_rate": 0.00037454576854279467, "loss": 0.2114, "step": 241980 }, { "epoch": 10.02, "grad_norm": 0.458984375, "learning_rate": 0.0003745363648622765, "loss": 0.1956, "step": 241990 }, { "epoch": 10.02, "grad_norm": 0.64453125, "learning_rate": 0.0003745269609473944, "loss": 0.2362, "step": 242000 }, { "epoch": 10.02, "grad_norm": 1.1328125, "learning_rate": 0.000374517556798166, "loss": 0.2286, "step": 242010 }, { "epoch": 10.02, "grad_norm": 0.7890625, "learning_rate": 0.000374508152414609, "loss": 0.1789, "step": 242020 }, { "epoch": 10.02, "grad_norm": 0.345703125, "learning_rate": 0.0003744987477967411, "loss": 0.1379, "step": 242030 }, { "epoch": 10.03, "grad_norm": 0.765625, "learning_rate": 0.00037448934294458005, "loss": 0.2635, "step": 242040 }, { "epoch": 10.03, "grad_norm": 1.7578125, "learning_rate": 0.00037447993785814346, "loss": 0.2089, "step": 242050 }, { "epoch": 10.03, "grad_norm": 0.83984375, "learning_rate": 0.00037447053253744905, "loss": 0.1508, "step": 242060 }, { "epoch": 10.03, "grad_norm": 0.37890625, "learning_rate": 0.00037446112698251454, "loss": 0.1935, "step": 242070 }, { "epoch": 10.03, "grad_norm": 0.57421875, "learning_rate": 0.0003744517211933576, "loss": 0.2184, "step": 242080 }, { "epoch": 10.03, "grad_norm": 0.703125, "learning_rate": 0.0003744423151699961, "loss": 0.1953, "step": 242090 }, { "epoch": 10.03, "grad_norm": 0.330078125, "learning_rate": 0.00037443290891244754, "loss": 0.2006, "step": 242100 }, { "epoch": 10.03, "grad_norm": 0.859375, "learning_rate": 0.0003744235024207296, "loss": 0.1966, "step": 242110 }, { "epoch": 10.03, "grad_norm": 0.8671875, "learning_rate": 0.0003744140956948602, "loss": 0.188, "step": 242120 }, { "epoch": 10.03, "grad_norm": 0.72265625, "learning_rate": 0.00037440468873485675, "loss": 0.1667, "step": 242130 }, { "epoch": 10.03, "grad_norm": 1.0859375, "learning_rate": 0.0003743952815407372, "loss": 0.1769, "step": 242140 }, { "epoch": 10.03, "grad_norm": 0.353515625, "learning_rate": 0.0003743858741125191, "loss": 0.1945, "step": 242150 }, { "epoch": 10.03, "grad_norm": 1.2421875, "learning_rate": 0.0003743764664502203, "loss": 0.211, "step": 242160 }, { "epoch": 10.03, "grad_norm": 0.9296875, "learning_rate": 0.00037436705855385845, "loss": 0.1702, "step": 242170 }, { "epoch": 10.03, "grad_norm": 1.1171875, "learning_rate": 0.0003743576504234512, "loss": 0.1444, "step": 242180 }, { "epoch": 10.03, "grad_norm": 0.52734375, "learning_rate": 0.0003743482420590162, "loss": 0.2049, "step": 242190 }, { "epoch": 10.03, "grad_norm": 0.64453125, "learning_rate": 0.0003743388334605713, "loss": 0.1727, "step": 242200 }, { "epoch": 10.03, "grad_norm": 0.70703125, "learning_rate": 0.000374329424628134, "loss": 0.1574, "step": 242210 }, { "epoch": 10.03, "grad_norm": 0.5703125, "learning_rate": 0.00037432001556172236, "loss": 0.1837, "step": 242220 }, { "epoch": 10.03, "grad_norm": 0.71484375, "learning_rate": 0.00037431060626135377, "loss": 0.201, "step": 242230 }, { "epoch": 10.03, "grad_norm": 0.85546875, "learning_rate": 0.0003743011967270461, "loss": 0.2118, "step": 242240 }, { "epoch": 10.03, "grad_norm": 0.5859375, "learning_rate": 0.0003742917869588169, "loss": 0.1874, "step": 242250 }, { "epoch": 10.03, "grad_norm": 2.1875, "learning_rate": 0.0003742823769566841, "loss": 0.2207, "step": 242260 }, { "epoch": 10.03, "grad_norm": 0.353515625, "learning_rate": 0.00037427296672066525, "loss": 0.1446, "step": 242270 }, { "epoch": 10.04, "grad_norm": 0.671875, "learning_rate": 0.00037426355625077814, "loss": 0.252, "step": 242280 }, { "epoch": 10.04, "grad_norm": 0.58203125, "learning_rate": 0.00037425414554704027, "loss": 0.2123, "step": 242290 }, { "epoch": 10.04, "grad_norm": 0.49609375, "learning_rate": 0.0003742447346094697, "loss": 0.1869, "step": 242300 }, { "epoch": 10.04, "grad_norm": 0.431640625, "learning_rate": 0.0003742353234380839, "loss": 0.1956, "step": 242310 }, { "epoch": 10.04, "grad_norm": 1.5703125, "learning_rate": 0.0003742259120329006, "loss": 0.176, "step": 242320 }, { "epoch": 10.04, "grad_norm": 0.91796875, "learning_rate": 0.00037421650039393757, "loss": 0.1845, "step": 242330 }, { "epoch": 10.04, "grad_norm": 0.5234375, "learning_rate": 0.00037420708852121243, "loss": 0.1354, "step": 242340 }, { "epoch": 10.04, "grad_norm": 0.8203125, "learning_rate": 0.00037419767641474306, "loss": 0.1596, "step": 242350 }, { "epoch": 10.04, "grad_norm": 1.1640625, "learning_rate": 0.0003741882640745471, "loss": 0.1914, "step": 242360 }, { "epoch": 10.04, "grad_norm": 0.96484375, "learning_rate": 0.0003741788515006421, "loss": 0.212, "step": 242370 }, { "epoch": 10.04, "grad_norm": 0.408203125, "learning_rate": 0.000374169438693046, "loss": 0.2254, "step": 242380 }, { "epoch": 10.04, "grad_norm": 0.4140625, "learning_rate": 0.00037416002565177645, "loss": 0.2388, "step": 242390 }, { "epoch": 10.04, "grad_norm": 0.77734375, "learning_rate": 0.0003741506123768511, "loss": 0.2435, "step": 242400 }, { "epoch": 10.04, "grad_norm": 0.388671875, "learning_rate": 0.00037414119886828776, "loss": 0.2255, "step": 242410 }, { "epoch": 10.04, "grad_norm": 2.609375, "learning_rate": 0.000374131785126104, "loss": 0.1963, "step": 242420 }, { "epoch": 10.04, "grad_norm": 0.7734375, "learning_rate": 0.0003741223711503177, "loss": 0.2412, "step": 242430 }, { "epoch": 10.04, "grad_norm": 0.0615234375, "learning_rate": 0.00037411295694094647, "loss": 0.208, "step": 242440 }, { "epoch": 10.04, "grad_norm": 1.2890625, "learning_rate": 0.00037410354249800803, "loss": 0.1807, "step": 242450 }, { "epoch": 10.04, "grad_norm": 1.375, "learning_rate": 0.0003740941278215202, "loss": 0.1718, "step": 242460 }, { "epoch": 10.04, "grad_norm": 1.0078125, "learning_rate": 0.0003740847129115005, "loss": 0.1941, "step": 242470 }, { "epoch": 10.04, "grad_norm": 0.478515625, "learning_rate": 0.0003740752977679669, "loss": 0.1822, "step": 242480 }, { "epoch": 10.04, "grad_norm": 0.423828125, "learning_rate": 0.000374065882390937, "loss": 0.1834, "step": 242490 }, { "epoch": 10.04, "grad_norm": 0.41015625, "learning_rate": 0.0003740564667804284, "loss": 0.2183, "step": 242500 }, { "epoch": 10.04, "grad_norm": 0.62109375, "learning_rate": 0.00037404705093645895, "loss": 0.2186, "step": 242510 }, { "epoch": 10.05, "grad_norm": 2.34375, "learning_rate": 0.0003740376348590464, "loss": 0.1742, "step": 242520 }, { "epoch": 10.05, "grad_norm": 0.349609375, "learning_rate": 0.00037402821854820833, "loss": 0.2101, "step": 242530 }, { "epoch": 10.05, "grad_norm": 0.5078125, "learning_rate": 0.00037401880200396264, "loss": 0.2409, "step": 242540 }, { "epoch": 10.05, "grad_norm": 0.65625, "learning_rate": 0.0003740093852263269, "loss": 0.2113, "step": 242550 }, { "epoch": 10.05, "grad_norm": 1.1640625, "learning_rate": 0.0003739999682153189, "loss": 0.1867, "step": 242560 }, { "epoch": 10.05, "grad_norm": 1.5234375, "learning_rate": 0.00037399055097095635, "loss": 0.2145, "step": 242570 }, { "epoch": 10.05, "grad_norm": 0.427734375, "learning_rate": 0.000373981133493257, "loss": 0.2002, "step": 242580 }, { "epoch": 10.05, "grad_norm": 0.8828125, "learning_rate": 0.00037397171578223856, "loss": 0.1851, "step": 242590 }, { "epoch": 10.05, "grad_norm": 0.81640625, "learning_rate": 0.0003739622978379187, "loss": 0.1783, "step": 242600 }, { "epoch": 10.05, "grad_norm": 0.71875, "learning_rate": 0.0003739528796603151, "loss": 0.2017, "step": 242610 }, { "epoch": 10.05, "grad_norm": 1.0078125, "learning_rate": 0.00037394346124944566, "loss": 0.1777, "step": 242620 }, { "epoch": 10.05, "grad_norm": 1.21875, "learning_rate": 0.00037393404260532804, "loss": 0.2251, "step": 242630 }, { "epoch": 10.05, "grad_norm": 0.7734375, "learning_rate": 0.0003739246237279799, "loss": 0.235, "step": 242640 }, { "epoch": 10.05, "grad_norm": 0.65234375, "learning_rate": 0.00037391520461741904, "loss": 0.2453, "step": 242650 }, { "epoch": 10.05, "grad_norm": 0.0, "learning_rate": 0.00037390578527366305, "loss": 0.1773, "step": 242660 }, { "epoch": 10.05, "grad_norm": 0.640625, "learning_rate": 0.0003738963656967298, "loss": 0.201, "step": 242670 }, { "epoch": 10.05, "grad_norm": 1.3125, "learning_rate": 0.000373886945886637, "loss": 0.2621, "step": 242680 }, { "epoch": 10.05, "grad_norm": 0.55078125, "learning_rate": 0.0003738775258434023, "loss": 0.2294, "step": 242690 }, { "epoch": 10.05, "grad_norm": 1.203125, "learning_rate": 0.0003738681055670435, "loss": 0.233, "step": 242700 }, { "epoch": 10.05, "grad_norm": 0.412109375, "learning_rate": 0.0003738586850575783, "loss": 0.1573, "step": 242710 }, { "epoch": 10.05, "grad_norm": 1.140625, "learning_rate": 0.0003738492643150244, "loss": 0.1847, "step": 242720 }, { "epoch": 10.05, "grad_norm": 0.69140625, "learning_rate": 0.00037383984333939967, "loss": 0.2199, "step": 242730 }, { "epoch": 10.05, "grad_norm": 0.78125, "learning_rate": 0.0003738304221307216, "loss": 0.1905, "step": 242740 }, { "epoch": 10.05, "grad_norm": 0.8359375, "learning_rate": 0.00037382100068900813, "loss": 0.2098, "step": 242750 }, { "epoch": 10.06, "grad_norm": 1.171875, "learning_rate": 0.0003738115790142769, "loss": 0.1748, "step": 242760 }, { "epoch": 10.06, "grad_norm": 1.4921875, "learning_rate": 0.00037380215710654556, "loss": 0.235, "step": 242770 }, { "epoch": 10.06, "grad_norm": 1.46875, "learning_rate": 0.000373792734965832, "loss": 0.1801, "step": 242780 }, { "epoch": 10.06, "grad_norm": 0.91796875, "learning_rate": 0.0003737833125921539, "loss": 0.1864, "step": 242790 }, { "epoch": 10.06, "grad_norm": 0.66015625, "learning_rate": 0.0003737738899855289, "loss": 0.2163, "step": 242800 }, { "epoch": 10.06, "grad_norm": 0.57421875, "learning_rate": 0.0003737644671459749, "loss": 0.1867, "step": 242810 }, { "epoch": 10.06, "grad_norm": 0.82421875, "learning_rate": 0.00037375504407350947, "loss": 0.2065, "step": 242820 }, { "epoch": 10.06, "grad_norm": 0.34765625, "learning_rate": 0.00037374562076815046, "loss": 0.2124, "step": 242830 }, { "epoch": 10.06, "grad_norm": 0.70703125, "learning_rate": 0.00037373619722991545, "loss": 0.2, "step": 242840 }, { "epoch": 10.06, "grad_norm": 1.265625, "learning_rate": 0.0003737267734588224, "loss": 0.1561, "step": 242850 }, { "epoch": 10.06, "grad_norm": 0.451171875, "learning_rate": 0.0003737173494548889, "loss": 0.2189, "step": 242860 }, { "epoch": 10.06, "grad_norm": 1.03125, "learning_rate": 0.00037370792521813266, "loss": 0.199, "step": 242870 }, { "epoch": 10.06, "grad_norm": 2.15625, "learning_rate": 0.0003736985007485715, "loss": 0.1804, "step": 242880 }, { "epoch": 10.06, "grad_norm": 0.80078125, "learning_rate": 0.0003736890760462231, "loss": 0.2097, "step": 242890 }, { "epoch": 10.06, "grad_norm": 0.7890625, "learning_rate": 0.00037367965111110527, "loss": 0.2181, "step": 242900 }, { "epoch": 10.06, "grad_norm": 0.498046875, "learning_rate": 0.00037367022594323563, "loss": 0.1983, "step": 242910 }, { "epoch": 10.06, "grad_norm": 0.94921875, "learning_rate": 0.000373660800542632, "loss": 0.2336, "step": 242920 }, { "epoch": 10.06, "grad_norm": 1.2734375, "learning_rate": 0.0003736513749093121, "loss": 0.2276, "step": 242930 }, { "epoch": 10.06, "grad_norm": 1.21875, "learning_rate": 0.0003736419490432937, "loss": 0.1833, "step": 242940 }, { "epoch": 10.06, "grad_norm": 0.1279296875, "learning_rate": 0.00037363252294459446, "loss": 0.172, "step": 242950 }, { "epoch": 10.06, "grad_norm": 0.369140625, "learning_rate": 0.00037362309661323213, "loss": 0.1734, "step": 242960 }, { "epoch": 10.06, "grad_norm": 0.71875, "learning_rate": 0.0003736136700492245, "loss": 0.1693, "step": 242970 }, { "epoch": 10.06, "grad_norm": 0.734375, "learning_rate": 0.00037360424325258933, "loss": 0.1917, "step": 242980 }, { "epoch": 10.06, "grad_norm": 1.171875, "learning_rate": 0.0003735948162233443, "loss": 0.1905, "step": 242990 }, { "epoch": 10.07, "grad_norm": 0.90234375, "learning_rate": 0.0003735853889615072, "loss": 0.2104, "step": 243000 }, { "epoch": 10.07, "grad_norm": 1.1875, "learning_rate": 0.00037357596146709573, "loss": 0.1746, "step": 243010 }, { "epoch": 10.07, "grad_norm": 0.83203125, "learning_rate": 0.0003735665337401277, "loss": 0.2062, "step": 243020 }, { "epoch": 10.07, "grad_norm": 0.98828125, "learning_rate": 0.00037355710578062076, "loss": 0.2077, "step": 243030 }, { "epoch": 10.07, "grad_norm": 0.032470703125, "learning_rate": 0.00037354767758859266, "loss": 0.1726, "step": 243040 }, { "epoch": 10.07, "grad_norm": 1.2578125, "learning_rate": 0.00037353824916406116, "loss": 0.1739, "step": 243050 }, { "epoch": 10.07, "grad_norm": 0.59765625, "learning_rate": 0.000373528820507044, "loss": 0.1311, "step": 243060 }, { "epoch": 10.07, "grad_norm": 0.76171875, "learning_rate": 0.00037351939161755896, "loss": 0.2289, "step": 243070 }, { "epoch": 10.07, "grad_norm": 0.21484375, "learning_rate": 0.0003735099624956238, "loss": 0.1728, "step": 243080 }, { "epoch": 10.07, "grad_norm": 2.078125, "learning_rate": 0.0003735005331412562, "loss": 0.2262, "step": 243090 }, { "epoch": 10.07, "grad_norm": 0.73046875, "learning_rate": 0.00037349110355447396, "loss": 0.1832, "step": 243100 }, { "epoch": 10.07, "grad_norm": 1.7109375, "learning_rate": 0.00037348167373529476, "loss": 0.2352, "step": 243110 }, { "epoch": 10.07, "grad_norm": 1.140625, "learning_rate": 0.00037347224368373646, "loss": 0.1949, "step": 243120 }, { "epoch": 10.07, "grad_norm": 0.93359375, "learning_rate": 0.00037346281339981665, "loss": 0.1867, "step": 243130 }, { "epoch": 10.07, "grad_norm": 0.82421875, "learning_rate": 0.0003734533828835532, "loss": 0.2202, "step": 243140 }, { "epoch": 10.07, "grad_norm": 0.66015625, "learning_rate": 0.00037344395213496386, "loss": 0.1856, "step": 243150 }, { "epoch": 10.07, "grad_norm": 0.5390625, "learning_rate": 0.0003734345211540662, "loss": 0.1844, "step": 243160 }, { "epoch": 10.07, "grad_norm": 1.0859375, "learning_rate": 0.0003734250899408782, "loss": 0.211, "step": 243170 }, { "epoch": 10.07, "grad_norm": 1.484375, "learning_rate": 0.00037341565849541747, "loss": 0.1921, "step": 243180 }, { "epoch": 10.07, "grad_norm": 0.6015625, "learning_rate": 0.0003734062268177019, "loss": 0.2445, "step": 243190 }, { "epoch": 10.07, "grad_norm": 0.6484375, "learning_rate": 0.00037339679490774896, "loss": 0.2028, "step": 243200 }, { "epoch": 10.07, "grad_norm": 1.3125, "learning_rate": 0.0003733873627655767, "loss": 0.2173, "step": 243210 }, { "epoch": 10.07, "grad_norm": 0.96484375, "learning_rate": 0.0003733779303912027, "loss": 0.1926, "step": 243220 }, { "epoch": 10.07, "grad_norm": 0.84375, "learning_rate": 0.0003733684977846448, "loss": 0.1775, "step": 243230 }, { "epoch": 10.07, "grad_norm": 1.8828125, "learning_rate": 0.00037335906494592063, "loss": 0.1934, "step": 243240 }, { "epoch": 10.08, "grad_norm": 1.7265625, "learning_rate": 0.0003733496318750481, "loss": 0.1885, "step": 243250 }, { "epoch": 10.08, "grad_norm": 0.62109375, "learning_rate": 0.0003733401985720448, "loss": 0.2287, "step": 243260 }, { "epoch": 10.08, "grad_norm": 0.84765625, "learning_rate": 0.0003733307650369287, "loss": 0.1741, "step": 243270 }, { "epoch": 10.08, "grad_norm": 1.34375, "learning_rate": 0.00037332133126971736, "loss": 0.1982, "step": 243280 }, { "epoch": 10.08, "grad_norm": 0.2314453125, "learning_rate": 0.00037331189727042856, "loss": 0.2299, "step": 243290 }, { "epoch": 10.08, "grad_norm": 0.82421875, "learning_rate": 0.0003733024630390801, "loss": 0.2026, "step": 243300 }, { "epoch": 10.08, "grad_norm": 0.4921875, "learning_rate": 0.0003732930285756897, "loss": 0.208, "step": 243310 }, { "epoch": 10.08, "grad_norm": 1.0078125, "learning_rate": 0.0003732835938802752, "loss": 0.2192, "step": 243320 }, { "epoch": 10.08, "grad_norm": 1.0859375, "learning_rate": 0.00037327415895285423, "loss": 0.1932, "step": 243330 }, { "epoch": 10.08, "grad_norm": 1.359375, "learning_rate": 0.0003732647237934446, "loss": 0.2156, "step": 243340 }, { "epoch": 10.08, "grad_norm": 0.92578125, "learning_rate": 0.00037325528840206413, "loss": 0.2271, "step": 243350 }, { "epoch": 10.08, "grad_norm": 0.78515625, "learning_rate": 0.00037324585277873047, "loss": 0.1785, "step": 243360 }, { "epoch": 10.08, "grad_norm": 0.765625, "learning_rate": 0.00037323641692346146, "loss": 0.2093, "step": 243370 }, { "epoch": 10.08, "grad_norm": 0.59765625, "learning_rate": 0.00037322698083627477, "loss": 0.1884, "step": 243380 }, { "epoch": 10.08, "grad_norm": 0.5625, "learning_rate": 0.0003732175445171882, "loss": 0.2416, "step": 243390 }, { "epoch": 10.08, "grad_norm": 0.53515625, "learning_rate": 0.0003732081079662196, "loss": 0.2188, "step": 243400 }, { "epoch": 10.08, "grad_norm": 0.69140625, "learning_rate": 0.0003731986711833866, "loss": 0.2001, "step": 243410 }, { "epoch": 10.08, "grad_norm": 1.5703125, "learning_rate": 0.000373189234168707, "loss": 0.2187, "step": 243420 }, { "epoch": 10.08, "grad_norm": 0.62109375, "learning_rate": 0.0003731797969221986, "loss": 0.1361, "step": 243430 }, { "epoch": 10.08, "grad_norm": 0.1943359375, "learning_rate": 0.0003731703594438791, "loss": 0.1832, "step": 243440 }, { "epoch": 10.08, "grad_norm": 0.73828125, "learning_rate": 0.0003731609217337663, "loss": 0.1858, "step": 243450 }, { "epoch": 10.08, "grad_norm": 0.65625, "learning_rate": 0.0003731514837918779, "loss": 0.2133, "step": 243460 }, { "epoch": 10.08, "grad_norm": 1.1796875, "learning_rate": 0.00037314204561823166, "loss": 0.1708, "step": 243470 }, { "epoch": 10.08, "grad_norm": 0.3984375, "learning_rate": 0.0003731326072128455, "loss": 0.2081, "step": 243480 }, { "epoch": 10.09, "grad_norm": 0.80078125, "learning_rate": 0.000373123168575737, "loss": 0.1687, "step": 243490 }, { "epoch": 10.09, "grad_norm": 0.8515625, "learning_rate": 0.000373113729706924, "loss": 0.1846, "step": 243500 }, { "epoch": 10.09, "grad_norm": 1.5390625, "learning_rate": 0.00037310429060642437, "loss": 0.1903, "step": 243510 }, { "epoch": 10.09, "grad_norm": 0.6015625, "learning_rate": 0.0003730948512742556, "loss": 0.2033, "step": 243520 }, { "epoch": 10.09, "grad_norm": 0.216796875, "learning_rate": 0.00037308541171043576, "loss": 0.1839, "step": 243530 }, { "epoch": 10.09, "grad_norm": 0.58984375, "learning_rate": 0.0003730759719149823, "loss": 0.2117, "step": 243540 }, { "epoch": 10.09, "grad_norm": 0.6484375, "learning_rate": 0.0003730665318879133, "loss": 0.2196, "step": 243550 }, { "epoch": 10.09, "grad_norm": 0.271484375, "learning_rate": 0.00037305709162924633, "loss": 0.1867, "step": 243560 }, { "epoch": 10.09, "grad_norm": 0.78515625, "learning_rate": 0.0003730476511389992, "loss": 0.1478, "step": 243570 }, { "epoch": 10.09, "grad_norm": 2.046875, "learning_rate": 0.00037303821041718966, "loss": 0.2453, "step": 243580 }, { "epoch": 10.09, "grad_norm": 0.7109375, "learning_rate": 0.0003730287694638355, "loss": 0.1841, "step": 243590 }, { "epoch": 10.09, "grad_norm": 0.55859375, "learning_rate": 0.00037301932827895446, "loss": 0.2046, "step": 243600 }, { "epoch": 10.09, "grad_norm": 0.443359375, "learning_rate": 0.0003730098868625644, "loss": 0.2232, "step": 243610 }, { "epoch": 10.09, "grad_norm": 0.90234375, "learning_rate": 0.0003730004452146829, "loss": 0.2097, "step": 243620 }, { "epoch": 10.09, "grad_norm": 0.8984375, "learning_rate": 0.0003729910033353279, "loss": 0.2136, "step": 243630 }, { "epoch": 10.09, "grad_norm": 0.64453125, "learning_rate": 0.0003729815612245172, "loss": 0.1237, "step": 243640 }, { "epoch": 10.09, "grad_norm": 1.8046875, "learning_rate": 0.00037297211888226835, "loss": 0.1682, "step": 243650 }, { "epoch": 10.09, "grad_norm": 1.125, "learning_rate": 0.00037296267630859936, "loss": 0.213, "step": 243660 }, { "epoch": 10.09, "grad_norm": 0.546875, "learning_rate": 0.0003729532335035279, "loss": 0.1798, "step": 243670 }, { "epoch": 10.09, "grad_norm": 1.1796875, "learning_rate": 0.0003729437904670716, "loss": 0.1902, "step": 243680 }, { "epoch": 10.09, "grad_norm": 1.0390625, "learning_rate": 0.00037293434719924847, "loss": 0.2251, "step": 243690 }, { "epoch": 10.09, "grad_norm": 0.73046875, "learning_rate": 0.00037292490370007615, "loss": 0.1822, "step": 243700 }, { "epoch": 10.09, "grad_norm": 1.0703125, "learning_rate": 0.00037291545996957244, "loss": 0.1763, "step": 243710 }, { "epoch": 10.09, "grad_norm": 0.671875, "learning_rate": 0.00037290601600775507, "loss": 0.2811, "step": 243720 }, { "epoch": 10.1, "grad_norm": 0.8046875, "learning_rate": 0.00037289657181464186, "loss": 0.185, "step": 243730 }, { "epoch": 10.1, "grad_norm": 0.7265625, "learning_rate": 0.00037288712739025066, "loss": 0.1745, "step": 243740 }, { "epoch": 10.1, "grad_norm": 0.8125, "learning_rate": 0.0003728776827345991, "loss": 0.172, "step": 243750 }, { "epoch": 10.1, "grad_norm": 0.69921875, "learning_rate": 0.00037286823784770496, "loss": 0.21, "step": 243760 }, { "epoch": 10.1, "grad_norm": 0.69921875, "learning_rate": 0.00037285879272958613, "loss": 0.2112, "step": 243770 }, { "epoch": 10.1, "grad_norm": 0.28515625, "learning_rate": 0.0003728493473802603, "loss": 0.1951, "step": 243780 }, { "epoch": 10.1, "grad_norm": 0.859375, "learning_rate": 0.00037283990179974526, "loss": 0.2566, "step": 243790 }, { "epoch": 10.1, "grad_norm": 0.6484375, "learning_rate": 0.00037283045598805883, "loss": 0.2086, "step": 243800 }, { "epoch": 10.1, "grad_norm": 1.8359375, "learning_rate": 0.0003728210099452186, "loss": 0.1948, "step": 243810 }, { "epoch": 10.1, "grad_norm": 0.703125, "learning_rate": 0.0003728115636712427, "loss": 0.1953, "step": 243820 }, { "epoch": 10.1, "grad_norm": 0.546875, "learning_rate": 0.0003728021171661485, "loss": 0.2247, "step": 243830 }, { "epoch": 10.1, "grad_norm": 0.35546875, "learning_rate": 0.0003727926704299541, "loss": 0.1881, "step": 243840 }, { "epoch": 10.1, "grad_norm": 0.796875, "learning_rate": 0.00037278322346267725, "loss": 0.2006, "step": 243850 }, { "epoch": 10.1, "grad_norm": 0.51171875, "learning_rate": 0.00037277377626433543, "loss": 0.1957, "step": 243860 }, { "epoch": 10.1, "grad_norm": 1.203125, "learning_rate": 0.0003727643288349467, "loss": 0.1935, "step": 243870 }, { "epoch": 10.1, "grad_norm": 0.66015625, "learning_rate": 0.00037275488117452884, "loss": 0.1394, "step": 243880 }, { "epoch": 10.1, "grad_norm": 0.8984375, "learning_rate": 0.0003727454332830994, "loss": 0.1713, "step": 243890 }, { "epoch": 10.1, "grad_norm": 0.84765625, "learning_rate": 0.0003727359851606764, "loss": 0.2333, "step": 243900 }, { "epoch": 10.1, "grad_norm": 0.2265625, "learning_rate": 0.00037272653680727754, "loss": 0.1898, "step": 243910 }, { "epoch": 10.1, "grad_norm": 0.40234375, "learning_rate": 0.00037271708822292056, "loss": 0.1922, "step": 243920 }, { "epoch": 10.1, "grad_norm": 0.84375, "learning_rate": 0.0003727076394076233, "loss": 0.2076, "step": 243930 }, { "epoch": 10.1, "grad_norm": 0.78515625, "learning_rate": 0.00037269819036140354, "loss": 0.1661, "step": 243940 }, { "epoch": 10.1, "grad_norm": 0.75390625, "learning_rate": 0.000372688741084279, "loss": 0.2075, "step": 243950 }, { "epoch": 10.1, "grad_norm": 1.1171875, "learning_rate": 0.00037267929157626747, "loss": 0.2055, "step": 243960 }, { "epoch": 10.11, "grad_norm": 1.5078125, "learning_rate": 0.0003726698418373867, "loss": 0.1798, "step": 243970 }, { "epoch": 10.11, "grad_norm": 0.5546875, "learning_rate": 0.00037266039186765467, "loss": 0.2351, "step": 243980 }, { "epoch": 10.11, "grad_norm": 0.94921875, "learning_rate": 0.00037265094166708896, "loss": 0.2699, "step": 243990 }, { "epoch": 10.11, "grad_norm": 0.82421875, "learning_rate": 0.00037264149123570744, "loss": 0.1925, "step": 244000 }, { "epoch": 10.11, "grad_norm": 0.4609375, "learning_rate": 0.0003726320405735279, "loss": 0.2204, "step": 244010 }, { "epoch": 10.11, "grad_norm": 0.79296875, "learning_rate": 0.00037262258968056807, "loss": 0.181, "step": 244020 }, { "epoch": 10.11, "grad_norm": 1.4375, "learning_rate": 0.00037261313855684573, "loss": 0.1759, "step": 244030 }, { "epoch": 10.11, "grad_norm": 0.69140625, "learning_rate": 0.0003726036872023788, "loss": 0.1306, "step": 244040 }, { "epoch": 10.11, "grad_norm": 0.8515625, "learning_rate": 0.00037259423561718485, "loss": 0.1862, "step": 244050 }, { "epoch": 10.11, "grad_norm": 0.58984375, "learning_rate": 0.00037258478380128193, "loss": 0.1985, "step": 244060 }, { "epoch": 10.11, "grad_norm": 0.64453125, "learning_rate": 0.0003725753317546876, "loss": 0.2227, "step": 244070 }, { "epoch": 10.11, "grad_norm": 1.046875, "learning_rate": 0.0003725658794774197, "loss": 0.1914, "step": 244080 }, { "epoch": 10.11, "grad_norm": 0.70703125, "learning_rate": 0.00037255642696949623, "loss": 0.188, "step": 244090 }, { "epoch": 10.11, "grad_norm": 0.73046875, "learning_rate": 0.0003725469742309346, "loss": 0.1948, "step": 244100 }, { "epoch": 10.11, "grad_norm": 1.328125, "learning_rate": 0.0003725375212617529, "loss": 0.1699, "step": 244110 }, { "epoch": 10.11, "grad_norm": 0.65234375, "learning_rate": 0.0003725280680619688, "loss": 0.1742, "step": 244120 }, { "epoch": 10.11, "grad_norm": 0.5703125, "learning_rate": 0.00037251861463159997, "loss": 0.2232, "step": 244130 }, { "epoch": 10.11, "grad_norm": 0.4453125, "learning_rate": 0.00037250916097066457, "loss": 0.2069, "step": 244140 }, { "epoch": 10.11, "grad_norm": 0.8671875, "learning_rate": 0.00037249970707918, "loss": 0.183, "step": 244150 }, { "epoch": 10.11, "grad_norm": 0.6015625, "learning_rate": 0.0003724902529571643, "loss": 0.2327, "step": 244160 }, { "epoch": 10.11, "grad_norm": 0.4609375, "learning_rate": 0.0003724807986046351, "loss": 0.2006, "step": 244170 }, { "epoch": 10.11, "grad_norm": 0.71484375, "learning_rate": 0.00037247134402161033, "loss": 0.1641, "step": 244180 }, { "epoch": 10.11, "grad_norm": 0.322265625, "learning_rate": 0.00037246188920810763, "loss": 0.1949, "step": 244190 }, { "epoch": 10.11, "grad_norm": 0.69140625, "learning_rate": 0.00037245243416414496, "loss": 0.157, "step": 244200 }, { "epoch": 10.12, "grad_norm": 0.7890625, "learning_rate": 0.00037244297888973995, "loss": 0.1447, "step": 244210 }, { "epoch": 10.12, "grad_norm": 1.8046875, "learning_rate": 0.00037243352338491053, "loss": 0.2341, "step": 244220 }, { "epoch": 10.12, "grad_norm": 0.0, "learning_rate": 0.00037242406764967443, "loss": 0.2026, "step": 244230 }, { "epoch": 10.12, "grad_norm": 1.2109375, "learning_rate": 0.0003724146116840495, "loss": 0.1934, "step": 244240 }, { "epoch": 10.12, "grad_norm": 1.203125, "learning_rate": 0.0003724051554880535, "loss": 0.2378, "step": 244250 }, { "epoch": 10.12, "grad_norm": 0.578125, "learning_rate": 0.0003723956990617041, "loss": 0.1864, "step": 244260 }, { "epoch": 10.12, "grad_norm": 1.015625, "learning_rate": 0.0003723862424050193, "loss": 0.2022, "step": 244270 }, { "epoch": 10.12, "grad_norm": 0.9921875, "learning_rate": 0.0003723767855180168, "loss": 0.2287, "step": 244280 }, { "epoch": 10.12, "grad_norm": 0.5703125, "learning_rate": 0.00037236732840071436, "loss": 0.2011, "step": 244290 }, { "epoch": 10.12, "grad_norm": 1.1484375, "learning_rate": 0.0003723578710531299, "loss": 0.1882, "step": 244300 }, { "epoch": 10.12, "grad_norm": 0.578125, "learning_rate": 0.0003723484134752811, "loss": 0.164, "step": 244310 }, { "epoch": 10.12, "grad_norm": 0.0, "learning_rate": 0.0003723389556671858, "loss": 0.1915, "step": 244320 }, { "epoch": 10.12, "grad_norm": 1.46875, "learning_rate": 0.0003723294976288618, "loss": 0.1826, "step": 244330 }, { "epoch": 10.12, "grad_norm": 1.375, "learning_rate": 0.0003723200393603269, "loss": 0.2159, "step": 244340 }, { "epoch": 10.12, "grad_norm": 1.5703125, "learning_rate": 0.0003723105808615989, "loss": 0.2381, "step": 244350 }, { "epoch": 10.12, "grad_norm": 0.67578125, "learning_rate": 0.0003723011221326955, "loss": 0.2136, "step": 244360 }, { "epoch": 10.12, "grad_norm": 0.87890625, "learning_rate": 0.00037229166317363473, "loss": 0.1834, "step": 244370 }, { "epoch": 10.12, "grad_norm": 0.52734375, "learning_rate": 0.00037228220398443425, "loss": 0.2125, "step": 244380 }, { "epoch": 10.12, "grad_norm": 1.3671875, "learning_rate": 0.0003722727445651118, "loss": 0.2095, "step": 244390 }, { "epoch": 10.12, "grad_norm": 0.90625, "learning_rate": 0.00037226328491568524, "loss": 0.2041, "step": 244400 }, { "epoch": 10.12, "grad_norm": 1.25, "learning_rate": 0.00037225382503617237, "loss": 0.2076, "step": 244410 }, { "epoch": 10.12, "grad_norm": 0.93359375, "learning_rate": 0.0003722443649265911, "loss": 0.219, "step": 244420 }, { "epoch": 10.12, "grad_norm": 0.890625, "learning_rate": 0.0003722349045869591, "loss": 0.1837, "step": 244430 }, { "epoch": 10.12, "grad_norm": 1.2734375, "learning_rate": 0.00037222544401729413, "loss": 0.2032, "step": 244440 }, { "epoch": 10.13, "grad_norm": 0.51953125, "learning_rate": 0.0003722159832176141, "loss": 0.2178, "step": 244450 }, { "epoch": 10.13, "grad_norm": 1.046875, "learning_rate": 0.00037220652218793686, "loss": 0.1916, "step": 244460 }, { "epoch": 10.13, "grad_norm": 0.0, "learning_rate": 0.0003721970609282801, "loss": 0.2004, "step": 244470 }, { "epoch": 10.13, "grad_norm": 0.59375, "learning_rate": 0.0003721875994386617, "loss": 0.2075, "step": 244480 }, { "epoch": 10.13, "grad_norm": 0.306640625, "learning_rate": 0.0003721781377190994, "loss": 0.2092, "step": 244490 }, { "epoch": 10.13, "grad_norm": 1.8046875, "learning_rate": 0.000372168675769611, "loss": 0.2375, "step": 244500 }, { "epoch": 10.13, "grad_norm": 0.8046875, "learning_rate": 0.0003721592135902144, "loss": 0.1646, "step": 244510 }, { "epoch": 10.13, "grad_norm": 0.65625, "learning_rate": 0.00037214975118092736, "loss": 0.2026, "step": 244520 }, { "epoch": 10.13, "grad_norm": 1.4921875, "learning_rate": 0.0003721402885417676, "loss": 0.2235, "step": 244530 }, { "epoch": 10.13, "grad_norm": 0.453125, "learning_rate": 0.0003721308256727531, "loss": 0.1951, "step": 244540 }, { "epoch": 10.13, "grad_norm": 1.2578125, "learning_rate": 0.0003721213625739016, "loss": 0.2264, "step": 244550 }, { "epoch": 10.13, "grad_norm": 0.50390625, "learning_rate": 0.0003721118992452308, "loss": 0.2038, "step": 244560 }, { "epoch": 10.13, "grad_norm": 1.1171875, "learning_rate": 0.0003721024356867586, "loss": 0.1775, "step": 244570 }, { "epoch": 10.13, "grad_norm": 0.90234375, "learning_rate": 0.00037209297189850284, "loss": 0.222, "step": 244580 }, { "epoch": 10.13, "grad_norm": 0.9453125, "learning_rate": 0.00037208350788048126, "loss": 0.2118, "step": 244590 }, { "epoch": 10.13, "grad_norm": 0.7734375, "learning_rate": 0.00037207404363271177, "loss": 0.1859, "step": 244600 }, { "epoch": 10.13, "grad_norm": 0.6875, "learning_rate": 0.000372064579155212, "loss": 0.1791, "step": 244610 }, { "epoch": 10.13, "grad_norm": 0.59375, "learning_rate": 0.0003720551144479999, "loss": 0.1721, "step": 244620 }, { "epoch": 10.13, "grad_norm": 0.0, "learning_rate": 0.00037204564951109334, "loss": 0.1953, "step": 244630 }, { "epoch": 10.13, "grad_norm": 1.2109375, "learning_rate": 0.00037203618434451, "loss": 0.1948, "step": 244640 }, { "epoch": 10.13, "grad_norm": 0.31640625, "learning_rate": 0.0003720267189482677, "loss": 0.1805, "step": 244650 }, { "epoch": 10.13, "grad_norm": 0.494140625, "learning_rate": 0.0003720172533223843, "loss": 0.2456, "step": 244660 }, { "epoch": 10.13, "grad_norm": 0.9296875, "learning_rate": 0.00037200778746687767, "loss": 0.2253, "step": 244670 }, { "epoch": 10.13, "grad_norm": 0.337890625, "learning_rate": 0.00037199832138176556, "loss": 0.2022, "step": 244680 }, { "epoch": 10.14, "grad_norm": 0.58203125, "learning_rate": 0.0003719888550670657, "loss": 0.2344, "step": 244690 }, { "epoch": 10.14, "grad_norm": 1.109375, "learning_rate": 0.000371979388522796, "loss": 0.1631, "step": 244700 }, { "epoch": 10.14, "grad_norm": 0.75, "learning_rate": 0.0003719699217489744, "loss": 0.1941, "step": 244710 }, { "epoch": 10.14, "grad_norm": 1.0078125, "learning_rate": 0.0003719604547456184, "loss": 0.2037, "step": 244720 }, { "epoch": 10.14, "grad_norm": 0.90625, "learning_rate": 0.00037195098751274607, "loss": 0.2001, "step": 244730 }, { "epoch": 10.14, "grad_norm": 1.3046875, "learning_rate": 0.0003719415200503752, "loss": 0.1473, "step": 244740 }, { "epoch": 10.14, "grad_norm": 0.83203125, "learning_rate": 0.0003719320523585235, "loss": 0.1884, "step": 244750 }, { "epoch": 10.14, "grad_norm": 3.109375, "learning_rate": 0.0003719225844372088, "loss": 0.2049, "step": 244760 }, { "epoch": 10.14, "grad_norm": 1.171875, "learning_rate": 0.00037191311628644903, "loss": 0.1823, "step": 244770 }, { "epoch": 10.14, "grad_norm": 0.59375, "learning_rate": 0.000371903647906262, "loss": 0.2655, "step": 244780 }, { "epoch": 10.14, "grad_norm": 0.466796875, "learning_rate": 0.00037189417929666535, "loss": 0.1986, "step": 244790 }, { "epoch": 10.14, "grad_norm": 0.62109375, "learning_rate": 0.00037188471045767707, "loss": 0.2228, "step": 244800 }, { "epoch": 10.14, "grad_norm": 0.99609375, "learning_rate": 0.0003718752413893149, "loss": 0.2101, "step": 244810 }, { "epoch": 10.14, "grad_norm": 0.78125, "learning_rate": 0.00037186577209159674, "loss": 0.2111, "step": 244820 }, { "epoch": 10.14, "grad_norm": 0.404296875, "learning_rate": 0.00037185630256454026, "loss": 0.2146, "step": 244830 }, { "epoch": 10.14, "grad_norm": 0.55859375, "learning_rate": 0.0003718468328081635, "loss": 0.1837, "step": 244840 }, { "epoch": 10.14, "grad_norm": 1.15625, "learning_rate": 0.0003718373628224841, "loss": 0.197, "step": 244850 }, { "epoch": 10.14, "grad_norm": 0.2294921875, "learning_rate": 0.00037182789260751994, "loss": 0.1678, "step": 244860 }, { "epoch": 10.14, "grad_norm": 0.90234375, "learning_rate": 0.00037181842216328886, "loss": 0.2103, "step": 244870 }, { "epoch": 10.14, "grad_norm": 1.0625, "learning_rate": 0.00037180895148980866, "loss": 0.1782, "step": 244880 }, { "epoch": 10.14, "grad_norm": 1.328125, "learning_rate": 0.0003717994805870971, "loss": 0.2068, "step": 244890 }, { "epoch": 10.14, "grad_norm": 0.515625, "learning_rate": 0.00037179000945517215, "loss": 0.1532, "step": 244900 }, { "epoch": 10.14, "grad_norm": 1.0, "learning_rate": 0.00037178053809405157, "loss": 0.216, "step": 244910 }, { "epoch": 10.14, "grad_norm": 1.140625, "learning_rate": 0.00037177106650375315, "loss": 0.2201, "step": 244920 }, { "epoch": 10.14, "grad_norm": 0.5859375, "learning_rate": 0.0003717615946842947, "loss": 0.2025, "step": 244930 }, { "epoch": 10.15, "grad_norm": 0.220703125, "learning_rate": 0.00037175212263569407, "loss": 0.1362, "step": 244940 }, { "epoch": 10.15, "grad_norm": 0.91015625, "learning_rate": 0.0003717426503579692, "loss": 0.1551, "step": 244950 }, { "epoch": 10.15, "grad_norm": 0.85546875, "learning_rate": 0.0003717331778511377, "loss": 0.2104, "step": 244960 }, { "epoch": 10.15, "grad_norm": 0.9765625, "learning_rate": 0.0003717237051152175, "loss": 0.1718, "step": 244970 }, { "epoch": 10.15, "grad_norm": 0.408203125, "learning_rate": 0.00037171423215022644, "loss": 0.2075, "step": 244980 }, { "epoch": 10.15, "grad_norm": 1.4609375, "learning_rate": 0.00037170475895618234, "loss": 0.2076, "step": 244990 }, { "epoch": 10.15, "grad_norm": 1.265625, "learning_rate": 0.00037169528553310303, "loss": 0.1686, "step": 245000 }, { "epoch": 10.15, "grad_norm": 0.41796875, "learning_rate": 0.0003716858118810064, "loss": 0.1787, "step": 245010 }, { "epoch": 10.15, "grad_norm": 0.78125, "learning_rate": 0.00037167633799991016, "loss": 0.1404, "step": 245020 }, { "epoch": 10.15, "grad_norm": 0.4609375, "learning_rate": 0.0003716668638898322, "loss": 0.1952, "step": 245030 }, { "epoch": 10.15, "grad_norm": 0.79296875, "learning_rate": 0.0003716573895507903, "loss": 0.2105, "step": 245040 }, { "epoch": 10.15, "grad_norm": 1.0625, "learning_rate": 0.00037164791498280243, "loss": 0.2498, "step": 245050 }, { "epoch": 10.15, "grad_norm": 0.65234375, "learning_rate": 0.0003716384401858862, "loss": 0.1393, "step": 245060 }, { "epoch": 10.15, "grad_norm": 0.76171875, "learning_rate": 0.0003716289651600596, "loss": 0.2269, "step": 245070 }, { "epoch": 10.15, "grad_norm": 0.86328125, "learning_rate": 0.00037161948990534053, "loss": 0.1972, "step": 245080 }, { "epoch": 10.15, "grad_norm": 0.51171875, "learning_rate": 0.0003716100144217466, "loss": 0.2115, "step": 245090 }, { "epoch": 10.15, "grad_norm": 0.44140625, "learning_rate": 0.0003716005387092958, "loss": 0.1806, "step": 245100 }, { "epoch": 10.15, "grad_norm": 0.71875, "learning_rate": 0.0003715910627680059, "loss": 0.2039, "step": 245110 }, { "epoch": 10.15, "grad_norm": 0.8671875, "learning_rate": 0.0003715815865978947, "loss": 0.1671, "step": 245120 }, { "epoch": 10.15, "grad_norm": 0.53515625, "learning_rate": 0.0003715721101989802, "loss": 0.1543, "step": 245130 }, { "epoch": 10.15, "grad_norm": 0.9375, "learning_rate": 0.00037156263357128, "loss": 0.1741, "step": 245140 }, { "epoch": 10.15, "grad_norm": 0.703125, "learning_rate": 0.0003715531567148121, "loss": 0.2244, "step": 245150 }, { "epoch": 10.15, "grad_norm": 0.50390625, "learning_rate": 0.00037154367962959436, "loss": 0.1538, "step": 245160 }, { "epoch": 10.15, "grad_norm": 1.59375, "learning_rate": 0.0003715342023156444, "loss": 0.1718, "step": 245170 }, { "epoch": 10.16, "grad_norm": 1.7578125, "learning_rate": 0.00037152472477298027, "loss": 0.1795, "step": 245180 }, { "epoch": 10.16, "grad_norm": 0.796875, "learning_rate": 0.0003715152470016198, "loss": 0.2027, "step": 245190 }, { "epoch": 10.16, "grad_norm": 0.78125, "learning_rate": 0.0003715057690015806, "loss": 0.148, "step": 245200 }, { "epoch": 10.16, "grad_norm": 1.328125, "learning_rate": 0.00037149629077288084, "loss": 0.1716, "step": 245210 }, { "epoch": 10.16, "grad_norm": 0.94140625, "learning_rate": 0.00037148681231553805, "loss": 0.2361, "step": 245220 }, { "epoch": 10.16, "grad_norm": 0.68359375, "learning_rate": 0.00037147733362957025, "loss": 0.163, "step": 245230 }, { "epoch": 10.16, "grad_norm": 0.953125, "learning_rate": 0.00037146785471499523, "loss": 0.2439, "step": 245240 }, { "epoch": 10.16, "grad_norm": 0.57421875, "learning_rate": 0.00037145837557183076, "loss": 0.1831, "step": 245250 }, { "epoch": 10.16, "grad_norm": 1.4921875, "learning_rate": 0.0003714488962000948, "loss": 0.1958, "step": 245260 }, { "epoch": 10.16, "grad_norm": 2.0625, "learning_rate": 0.00037143941659980516, "loss": 0.2079, "step": 245270 }, { "epoch": 10.16, "grad_norm": 0.6328125, "learning_rate": 0.0003714299367709796, "loss": 0.2436, "step": 245280 }, { "epoch": 10.16, "grad_norm": 0.341796875, "learning_rate": 0.00037142045671363606, "loss": 0.1434, "step": 245290 }, { "epoch": 10.16, "grad_norm": 0.79296875, "learning_rate": 0.0003714109764277923, "loss": 0.2556, "step": 245300 }, { "epoch": 10.16, "grad_norm": 1.109375, "learning_rate": 0.00037140149591346615, "loss": 0.2116, "step": 245310 }, { "epoch": 10.16, "grad_norm": 0.5, "learning_rate": 0.0003713920151706756, "loss": 0.1817, "step": 245320 }, { "epoch": 10.16, "grad_norm": 0.69140625, "learning_rate": 0.00037138253419943823, "loss": 0.1734, "step": 245330 }, { "epoch": 10.16, "grad_norm": 0.91796875, "learning_rate": 0.0003713730529997722, "loss": 0.2233, "step": 245340 }, { "epoch": 10.16, "grad_norm": 1.171875, "learning_rate": 0.0003713635715716952, "loss": 0.1821, "step": 245350 }, { "epoch": 10.16, "grad_norm": 0.5390625, "learning_rate": 0.0003713540899152249, "loss": 0.21, "step": 245360 }, { "epoch": 10.16, "grad_norm": 0.8046875, "learning_rate": 0.0003713446080303794, "loss": 0.1954, "step": 245370 }, { "epoch": 10.16, "grad_norm": 0.6640625, "learning_rate": 0.0003713351259171765, "loss": 0.1903, "step": 245380 }, { "epoch": 10.16, "grad_norm": 0.4296875, "learning_rate": 0.0003713256435756339, "loss": 0.2109, "step": 245390 }, { "epoch": 10.16, "grad_norm": 0.828125, "learning_rate": 0.00037131616100576965, "loss": 0.2105, "step": 245400 }, { "epoch": 10.16, "grad_norm": 0.59765625, "learning_rate": 0.00037130667820760137, "loss": 0.1785, "step": 245410 }, { "epoch": 10.17, "grad_norm": 0.78125, "learning_rate": 0.0003712971951811471, "loss": 0.1986, "step": 245420 }, { "epoch": 10.17, "grad_norm": 0.2021484375, "learning_rate": 0.00037128771192642464, "loss": 0.1899, "step": 245430 }, { "epoch": 10.17, "grad_norm": 0.5625, "learning_rate": 0.0003712782284434517, "loss": 0.1563, "step": 245440 }, { "epoch": 10.17, "grad_norm": 1.2890625, "learning_rate": 0.0003712687447322463, "loss": 0.1943, "step": 245450 }, { "epoch": 10.17, "grad_norm": 0.734375, "learning_rate": 0.0003712592607928262, "loss": 0.1476, "step": 245460 }, { "epoch": 10.17, "grad_norm": 0.65625, "learning_rate": 0.0003712497766252093, "loss": 0.2446, "step": 245470 }, { "epoch": 10.17, "grad_norm": 0.93359375, "learning_rate": 0.00037124029222941336, "loss": 0.1524, "step": 245480 }, { "epoch": 10.17, "grad_norm": 1.4765625, "learning_rate": 0.00037123080760545626, "loss": 0.1787, "step": 245490 }, { "epoch": 10.17, "grad_norm": 1.140625, "learning_rate": 0.00037122132275335597, "loss": 0.1871, "step": 245500 }, { "epoch": 10.17, "grad_norm": 0.5859375, "learning_rate": 0.0003712118376731302, "loss": 0.1932, "step": 245510 }, { "epoch": 10.17, "grad_norm": 0.55859375, "learning_rate": 0.0003712023523647968, "loss": 0.1744, "step": 245520 }, { "epoch": 10.17, "grad_norm": 0.8515625, "learning_rate": 0.00037119286682837373, "loss": 0.2444, "step": 245530 }, { "epoch": 10.17, "grad_norm": 0.54296875, "learning_rate": 0.00037118338106387867, "loss": 0.2003, "step": 245540 }, { "epoch": 10.17, "grad_norm": 0.8984375, "learning_rate": 0.00037117389507132964, "loss": 0.17, "step": 245550 }, { "epoch": 10.17, "grad_norm": 0.55078125, "learning_rate": 0.00037116440885074444, "loss": 0.1812, "step": 245560 }, { "epoch": 10.17, "grad_norm": 0.8046875, "learning_rate": 0.00037115492240214084, "loss": 0.2366, "step": 245570 }, { "epoch": 10.17, "grad_norm": 0.69921875, "learning_rate": 0.00037114543572553684, "loss": 0.2137, "step": 245580 }, { "epoch": 10.17, "grad_norm": 0.7890625, "learning_rate": 0.0003711359488209501, "loss": 0.2133, "step": 245590 }, { "epoch": 10.17, "grad_norm": 0.400390625, "learning_rate": 0.0003711264616883987, "loss": 0.1526, "step": 245600 }, { "epoch": 10.17, "grad_norm": 0.7421875, "learning_rate": 0.0003711169743279004, "loss": 0.1853, "step": 245610 }, { "epoch": 10.17, "grad_norm": 0.96875, "learning_rate": 0.00037110748673947297, "loss": 0.1916, "step": 245620 }, { "epoch": 10.17, "grad_norm": 0.53125, "learning_rate": 0.00037109799892313434, "loss": 0.2128, "step": 245630 }, { "epoch": 10.17, "grad_norm": 0.66796875, "learning_rate": 0.0003710885108789024, "loss": 0.2365, "step": 245640 }, { "epoch": 10.17, "grad_norm": 1.2421875, "learning_rate": 0.0003710790226067948, "loss": 0.1966, "step": 245650 }, { "epoch": 10.18, "grad_norm": 0.50390625, "learning_rate": 0.00037106953410682975, "loss": 0.145, "step": 245660 }, { "epoch": 10.18, "grad_norm": 0.984375, "learning_rate": 0.00037106004537902474, "loss": 0.1489, "step": 245670 }, { "epoch": 10.18, "grad_norm": 0.490234375, "learning_rate": 0.0003710505564233979, "loss": 0.1642, "step": 245680 }, { "epoch": 10.18, "grad_norm": 0.5859375, "learning_rate": 0.000371041067239967, "loss": 0.1574, "step": 245690 }, { "epoch": 10.18, "grad_norm": 0.98828125, "learning_rate": 0.0003710315778287499, "loss": 0.1938, "step": 245700 }, { "epoch": 10.18, "grad_norm": 1.0625, "learning_rate": 0.00037102208818976434, "loss": 0.187, "step": 245710 }, { "epoch": 10.18, "grad_norm": 0.44140625, "learning_rate": 0.0003710125983230284, "loss": 0.2549, "step": 245720 }, { "epoch": 10.18, "grad_norm": 1.0390625, "learning_rate": 0.00037100310822855964, "loss": 0.1753, "step": 245730 }, { "epoch": 10.18, "grad_norm": 0.73046875, "learning_rate": 0.00037099361790637626, "loss": 0.2136, "step": 245740 }, { "epoch": 10.18, "grad_norm": 0.412109375, "learning_rate": 0.0003709841273564959, "loss": 0.1999, "step": 245750 }, { "epoch": 10.18, "grad_norm": 0.859375, "learning_rate": 0.00037097463657893647, "loss": 0.2353, "step": 245760 }, { "epoch": 10.18, "grad_norm": 0.66796875, "learning_rate": 0.00037096514557371585, "loss": 0.1801, "step": 245770 }, { "epoch": 10.18, "grad_norm": 0.57421875, "learning_rate": 0.00037095565434085186, "loss": 0.2023, "step": 245780 }, { "epoch": 10.18, "grad_norm": 1.171875, "learning_rate": 0.00037094616288036245, "loss": 0.1737, "step": 245790 }, { "epoch": 10.18, "grad_norm": 0.68359375, "learning_rate": 0.0003709366711922654, "loss": 0.2102, "step": 245800 }, { "epoch": 10.18, "grad_norm": 0.9609375, "learning_rate": 0.0003709271792765786, "loss": 0.1902, "step": 245810 }, { "epoch": 10.18, "grad_norm": 1.1640625, "learning_rate": 0.00037091768713331993, "loss": 0.228, "step": 245820 }, { "epoch": 10.18, "grad_norm": 0.9296875, "learning_rate": 0.00037090819476250715, "loss": 0.22, "step": 245830 }, { "epoch": 10.18, "grad_norm": 0.9921875, "learning_rate": 0.00037089870216415824, "loss": 0.2121, "step": 245840 }, { "epoch": 10.18, "grad_norm": 0.65625, "learning_rate": 0.00037088920933829107, "loss": 0.2292, "step": 245850 }, { "epoch": 10.18, "grad_norm": 0.388671875, "learning_rate": 0.0003708797162849235, "loss": 0.2076, "step": 245860 }, { "epoch": 10.18, "grad_norm": 0.453125, "learning_rate": 0.0003708702230040732, "loss": 0.1783, "step": 245870 }, { "epoch": 10.18, "grad_norm": 0.9765625, "learning_rate": 0.00037086072949575827, "loss": 0.1939, "step": 245880 }, { "epoch": 10.18, "grad_norm": 0.546875, "learning_rate": 0.00037085123575999654, "loss": 0.1994, "step": 245890 }, { "epoch": 10.19, "grad_norm": 1.28125, "learning_rate": 0.0003708417417968058, "loss": 0.2529, "step": 245900 }, { "epoch": 10.19, "grad_norm": 0.671875, "learning_rate": 0.0003708322476062039, "loss": 0.1609, "step": 245910 }, { "epoch": 10.19, "grad_norm": 0.890625, "learning_rate": 0.00037082275318820883, "loss": 0.2107, "step": 245920 }, { "epoch": 10.19, "grad_norm": 0.2890625, "learning_rate": 0.0003708132585428384, "loss": 0.1894, "step": 245930 }, { "epoch": 10.19, "grad_norm": 0.38671875, "learning_rate": 0.0003708037636701105, "loss": 0.2156, "step": 245940 }, { "epoch": 10.19, "grad_norm": 0.609375, "learning_rate": 0.00037079426857004286, "loss": 0.2556, "step": 245950 }, { "epoch": 10.19, "grad_norm": 0.78515625, "learning_rate": 0.0003707847732426535, "loss": 0.1965, "step": 245960 }, { "epoch": 10.19, "grad_norm": 1.328125, "learning_rate": 0.00037077527768796014, "loss": 0.1922, "step": 245970 }, { "epoch": 10.19, "grad_norm": 0.9140625, "learning_rate": 0.00037076578190598094, "loss": 0.1865, "step": 245980 }, { "epoch": 10.19, "grad_norm": 0.890625, "learning_rate": 0.00037075628589673345, "loss": 0.2039, "step": 245990 }, { "epoch": 10.19, "grad_norm": 0.6875, "learning_rate": 0.00037074678966023567, "loss": 0.1967, "step": 246000 }, { "epoch": 10.19, "grad_norm": 1.359375, "learning_rate": 0.0003707372931965055, "loss": 0.2179, "step": 246010 }, { "epoch": 10.19, "grad_norm": 0.314453125, "learning_rate": 0.00037072779650556084, "loss": 0.2196, "step": 246020 }, { "epoch": 10.19, "grad_norm": 1.28125, "learning_rate": 0.0003707182995874194, "loss": 0.2267, "step": 246030 }, { "epoch": 10.19, "grad_norm": 0.82421875, "learning_rate": 0.00037070880244209923, "loss": 0.2321, "step": 246040 }, { "epoch": 10.19, "grad_norm": 0.83203125, "learning_rate": 0.00037069930506961806, "loss": 0.2124, "step": 246050 }, { "epoch": 10.19, "grad_norm": 0.60546875, "learning_rate": 0.0003706898074699939, "loss": 0.1917, "step": 246060 }, { "epoch": 10.19, "grad_norm": 0.9609375, "learning_rate": 0.00037068030964324455, "loss": 0.2136, "step": 246070 }, { "epoch": 10.19, "grad_norm": 0.78515625, "learning_rate": 0.00037067081158938784, "loss": 0.2177, "step": 246080 }, { "epoch": 10.19, "grad_norm": 0.74609375, "learning_rate": 0.0003706613133084418, "loss": 0.143, "step": 246090 }, { "epoch": 10.19, "grad_norm": 0.259765625, "learning_rate": 0.00037065181480042407, "loss": 0.2121, "step": 246100 }, { "epoch": 10.19, "grad_norm": 0.54296875, "learning_rate": 0.0003706423160653528, "loss": 0.1818, "step": 246110 }, { "epoch": 10.19, "grad_norm": 0.85546875, "learning_rate": 0.0003706328171032456, "loss": 0.1879, "step": 246120 }, { "epoch": 10.19, "grad_norm": 0.9453125, "learning_rate": 0.0003706233179141205, "loss": 0.1975, "step": 246130 }, { "epoch": 10.2, "grad_norm": 0.51953125, "learning_rate": 0.00037061381849799524, "loss": 0.1925, "step": 246140 }, { "epoch": 10.2, "grad_norm": 0.48828125, "learning_rate": 0.000370604318854888, "loss": 0.1722, "step": 246150 }, { "epoch": 10.2, "grad_norm": 0.28515625, "learning_rate": 0.0003705948189848163, "loss": 0.2503, "step": 246160 }, { "epoch": 10.2, "grad_norm": 0.82421875, "learning_rate": 0.0003705853188877982, "loss": 0.1919, "step": 246170 }, { "epoch": 10.2, "grad_norm": 0.53125, "learning_rate": 0.00037057581856385157, "loss": 0.2407, "step": 246180 }, { "epoch": 10.2, "grad_norm": 0.9375, "learning_rate": 0.0003705663180129943, "loss": 0.1741, "step": 246190 }, { "epoch": 10.2, "grad_norm": 1.0703125, "learning_rate": 0.00037055681723524423, "loss": 0.2119, "step": 246200 }, { "epoch": 10.2, "grad_norm": 0.9375, "learning_rate": 0.0003705473162306192, "loss": 0.1749, "step": 246210 }, { "epoch": 10.2, "grad_norm": 0.890625, "learning_rate": 0.0003705378149991372, "loss": 0.2051, "step": 246220 }, { "epoch": 10.2, "grad_norm": 0.9609375, "learning_rate": 0.00037052831354081605, "loss": 0.1706, "step": 246230 }, { "epoch": 10.2, "grad_norm": 0.8828125, "learning_rate": 0.0003705188118556736, "loss": 0.2, "step": 246240 }, { "epoch": 10.2, "grad_norm": 1.203125, "learning_rate": 0.0003705093099437278, "loss": 0.1813, "step": 246250 }, { "epoch": 10.2, "grad_norm": 0.7265625, "learning_rate": 0.0003704998078049964, "loss": 0.2692, "step": 246260 }, { "epoch": 10.2, "grad_norm": 0.671875, "learning_rate": 0.0003704903054394975, "loss": 0.1479, "step": 246270 }, { "epoch": 10.2, "grad_norm": 0.4609375, "learning_rate": 0.0003704808028472487, "loss": 0.1973, "step": 246280 }, { "epoch": 10.2, "grad_norm": 0.4765625, "learning_rate": 0.00037047130002826816, "loss": 0.179, "step": 246290 }, { "epoch": 10.2, "grad_norm": 0.70703125, "learning_rate": 0.0003704617969825736, "loss": 0.1502, "step": 246300 }, { "epoch": 10.2, "grad_norm": 0.91015625, "learning_rate": 0.000370452293710183, "loss": 0.2074, "step": 246310 }, { "epoch": 10.2, "grad_norm": 1.125, "learning_rate": 0.00037044279021111417, "loss": 0.2083, "step": 246320 }, { "epoch": 10.2, "grad_norm": 0.94140625, "learning_rate": 0.00037043328648538493, "loss": 0.212, "step": 246330 }, { "epoch": 10.2, "grad_norm": 0.87109375, "learning_rate": 0.00037042378253301336, "loss": 0.2366, "step": 246340 }, { "epoch": 10.2, "grad_norm": 0.6640625, "learning_rate": 0.00037041427835401714, "loss": 0.2515, "step": 246350 }, { "epoch": 10.2, "grad_norm": 0.71875, "learning_rate": 0.0003704047739484143, "loss": 0.2477, "step": 246360 }, { "epoch": 10.2, "grad_norm": 0.83203125, "learning_rate": 0.0003703952693162227, "loss": 0.1764, "step": 246370 }, { "epoch": 10.21, "grad_norm": 0.4921875, "learning_rate": 0.0003703857644574602, "loss": 0.1817, "step": 246380 }, { "epoch": 10.21, "grad_norm": 0.51171875, "learning_rate": 0.0003703762593721447, "loss": 0.1555, "step": 246390 }, { "epoch": 10.21, "grad_norm": 0.455078125, "learning_rate": 0.0003703667540602941, "loss": 0.1946, "step": 246400 }, { "epoch": 10.21, "grad_norm": 0.75390625, "learning_rate": 0.00037035724852192616, "loss": 0.1614, "step": 246410 }, { "epoch": 10.21, "grad_norm": 0.6875, "learning_rate": 0.00037034774275705897, "loss": 0.2216, "step": 246420 }, { "epoch": 10.21, "grad_norm": 1.140625, "learning_rate": 0.00037033823676571023, "loss": 0.2103, "step": 246430 }, { "epoch": 10.21, "grad_norm": 0.796875, "learning_rate": 0.00037032873054789806, "loss": 0.164, "step": 246440 }, { "epoch": 10.21, "grad_norm": 0.60546875, "learning_rate": 0.0003703192241036401, "loss": 0.2036, "step": 246450 }, { "epoch": 10.21, "grad_norm": 1.15625, "learning_rate": 0.0003703097174329544, "loss": 0.215, "step": 246460 }, { "epoch": 10.21, "grad_norm": 0.55078125, "learning_rate": 0.0003703002105358588, "loss": 0.1799, "step": 246470 }, { "epoch": 10.21, "grad_norm": 0.4296875, "learning_rate": 0.0003702907034123712, "loss": 0.1771, "step": 246480 }, { "epoch": 10.21, "grad_norm": 0.53125, "learning_rate": 0.00037028119606250946, "loss": 0.2024, "step": 246490 }, { "epoch": 10.21, "grad_norm": 2.078125, "learning_rate": 0.00037027168848629156, "loss": 0.1778, "step": 246500 }, { "epoch": 10.21, "grad_norm": 0.640625, "learning_rate": 0.0003702621806837352, "loss": 0.2037, "step": 246510 }, { "epoch": 10.21, "grad_norm": 0.6015625, "learning_rate": 0.0003702526726548586, "loss": 0.208, "step": 246520 }, { "epoch": 10.21, "grad_norm": 0.66796875, "learning_rate": 0.00037024316439967933, "loss": 0.2032, "step": 246530 }, { "epoch": 10.21, "grad_norm": 0.76953125, "learning_rate": 0.0003702336559182154, "loss": 0.215, "step": 246540 }, { "epoch": 10.21, "grad_norm": 0.7734375, "learning_rate": 0.0003702241472104848, "loss": 0.1742, "step": 246550 }, { "epoch": 10.21, "grad_norm": 0.62890625, "learning_rate": 0.0003702146382765052, "loss": 0.1702, "step": 246560 }, { "epoch": 10.21, "grad_norm": 0.609375, "learning_rate": 0.0003702051291162948, "loss": 0.2137, "step": 246570 }, { "epoch": 10.21, "grad_norm": 1.7421875, "learning_rate": 0.0003701956197298712, "loss": 0.2218, "step": 246580 }, { "epoch": 10.21, "grad_norm": 0.7734375, "learning_rate": 0.00037018611011725244, "loss": 0.241, "step": 246590 }, { "epoch": 10.21, "grad_norm": 1.25, "learning_rate": 0.00037017660027845645, "loss": 0.2208, "step": 246600 }, { "epoch": 10.21, "grad_norm": 0.57421875, "learning_rate": 0.000370167090213501, "loss": 0.1862, "step": 246610 }, { "epoch": 10.21, "grad_norm": 0.69921875, "learning_rate": 0.0003701575799224042, "loss": 0.2271, "step": 246620 }, { "epoch": 10.22, "grad_norm": 0.6796875, "learning_rate": 0.0003701480694051837, "loss": 0.1612, "step": 246630 }, { "epoch": 10.22, "grad_norm": 0.578125, "learning_rate": 0.0003701385586618575, "loss": 0.1682, "step": 246640 }, { "epoch": 10.22, "grad_norm": 1.265625, "learning_rate": 0.0003701290476924436, "loss": 0.1371, "step": 246650 }, { "epoch": 10.22, "grad_norm": 0.96875, "learning_rate": 0.00037011953649695973, "loss": 0.1977, "step": 246660 }, { "epoch": 10.22, "grad_norm": 0.6015625, "learning_rate": 0.0003701100250754238, "loss": 0.161, "step": 246670 }, { "epoch": 10.22, "grad_norm": 0.90625, "learning_rate": 0.00037010051342785394, "loss": 0.2067, "step": 246680 }, { "epoch": 10.22, "grad_norm": 0.5, "learning_rate": 0.00037009100155426775, "loss": 0.1843, "step": 246690 }, { "epoch": 10.22, "grad_norm": 0.92578125, "learning_rate": 0.00037008148945468335, "loss": 0.1813, "step": 246700 }, { "epoch": 10.22, "grad_norm": 0.9609375, "learning_rate": 0.00037007197712911853, "loss": 0.2168, "step": 246710 }, { "epoch": 10.22, "grad_norm": 0.62890625, "learning_rate": 0.00037006246457759113, "loss": 0.1959, "step": 246720 }, { "epoch": 10.22, "grad_norm": 1.046875, "learning_rate": 0.00037005295180011924, "loss": 0.2428, "step": 246730 }, { "epoch": 10.22, "grad_norm": 2.796875, "learning_rate": 0.00037004343879672066, "loss": 0.1845, "step": 246740 }, { "epoch": 10.22, "grad_norm": 1.1796875, "learning_rate": 0.0003700339255674132, "loss": 0.1942, "step": 246750 }, { "epoch": 10.22, "grad_norm": 1.4609375, "learning_rate": 0.00037002441211221495, "loss": 0.2114, "step": 246760 }, { "epoch": 10.22, "grad_norm": 0.73828125, "learning_rate": 0.0003700148984311436, "loss": 0.1551, "step": 246770 }, { "epoch": 10.22, "grad_norm": 0.76953125, "learning_rate": 0.00037000538452421734, "loss": 0.2235, "step": 246780 }, { "epoch": 10.22, "grad_norm": 0.462890625, "learning_rate": 0.0003699958703914538, "loss": 0.2489, "step": 246790 }, { "epoch": 10.22, "grad_norm": 0.435546875, "learning_rate": 0.000369986356032871, "loss": 0.1689, "step": 246800 }, { "epoch": 10.22, "grad_norm": 0.58203125, "learning_rate": 0.0003699768414484869, "loss": 0.2079, "step": 246810 }, { "epoch": 10.22, "grad_norm": 0.439453125, "learning_rate": 0.0003699673266383192, "loss": 0.1898, "step": 246820 }, { "epoch": 10.22, "grad_norm": 0.79296875, "learning_rate": 0.00036995781160238607, "loss": 0.2232, "step": 246830 }, { "epoch": 10.22, "grad_norm": 0.67578125, "learning_rate": 0.0003699482963407053, "loss": 0.196, "step": 246840 }, { "epoch": 10.22, "grad_norm": 1.1796875, "learning_rate": 0.00036993878085329467, "loss": 0.2191, "step": 246850 }, { "epoch": 10.22, "grad_norm": 0.37109375, "learning_rate": 0.0003699292651401723, "loss": 0.1922, "step": 246860 }, { "epoch": 10.23, "grad_norm": 0.859375, "learning_rate": 0.00036991974920135603, "loss": 0.194, "step": 246870 }, { "epoch": 10.23, "grad_norm": 1.1171875, "learning_rate": 0.00036991023303686366, "loss": 0.2633, "step": 246880 }, { "epoch": 10.23, "grad_norm": 1.203125, "learning_rate": 0.0003699007166467132, "loss": 0.2234, "step": 246890 }, { "epoch": 10.23, "grad_norm": 1.03125, "learning_rate": 0.00036989120003092256, "loss": 0.1927, "step": 246900 }, { "epoch": 10.23, "grad_norm": 0.83203125, "learning_rate": 0.00036988168318950965, "loss": 0.1988, "step": 246910 }, { "epoch": 10.23, "grad_norm": 0.68359375, "learning_rate": 0.0003698721661224924, "loss": 0.2049, "step": 246920 }, { "epoch": 10.23, "grad_norm": 0.9140625, "learning_rate": 0.0003698626488298885, "loss": 0.1787, "step": 246930 }, { "epoch": 10.23, "grad_norm": 0.83984375, "learning_rate": 0.0003698531313117162, "loss": 0.2558, "step": 246940 }, { "epoch": 10.23, "grad_norm": 1.5078125, "learning_rate": 0.00036984361356799325, "loss": 0.2137, "step": 246950 }, { "epoch": 10.23, "grad_norm": 0.80859375, "learning_rate": 0.0003698340955987374, "loss": 0.1688, "step": 246960 }, { "epoch": 10.23, "grad_norm": 0.453125, "learning_rate": 0.0003698245774039669, "loss": 0.1706, "step": 246970 }, { "epoch": 10.23, "grad_norm": 0.66796875, "learning_rate": 0.00036981505898369944, "loss": 0.1862, "step": 246980 }, { "epoch": 10.23, "grad_norm": 0.5546875, "learning_rate": 0.00036980554033795294, "loss": 0.1627, "step": 246990 }, { "epoch": 10.23, "grad_norm": 1.8671875, "learning_rate": 0.0003697960214667454, "loss": 0.2067, "step": 247000 }, { "epoch": 10.23, "grad_norm": 0.5546875, "learning_rate": 0.00036978650237009457, "loss": 0.2505, "step": 247010 }, { "epoch": 10.23, "grad_norm": 0.5390625, "learning_rate": 0.0003697769830480186, "loss": 0.2251, "step": 247020 }, { "epoch": 10.23, "grad_norm": 1.3984375, "learning_rate": 0.00036976746350053533, "loss": 0.2375, "step": 247030 }, { "epoch": 10.23, "grad_norm": 0.375, "learning_rate": 0.0003697579437276625, "loss": 0.158, "step": 247040 }, { "epoch": 10.23, "grad_norm": 1.1015625, "learning_rate": 0.0003697484237294182, "loss": 0.1932, "step": 247050 }, { "epoch": 10.23, "grad_norm": 0.640625, "learning_rate": 0.0003697389035058203, "loss": 0.2289, "step": 247060 }, { "epoch": 10.23, "grad_norm": 0.7890625, "learning_rate": 0.0003697293830568866, "loss": 0.2245, "step": 247070 }, { "epoch": 10.23, "grad_norm": 0.921875, "learning_rate": 0.0003697198623826353, "loss": 0.1807, "step": 247080 }, { "epoch": 10.23, "grad_norm": 0.8515625, "learning_rate": 0.0003697103414830841, "loss": 0.1941, "step": 247090 }, { "epoch": 10.23, "grad_norm": 0.9375, "learning_rate": 0.00036970082035825094, "loss": 0.1792, "step": 247100 }, { "epoch": 10.24, "grad_norm": 0.86328125, "learning_rate": 0.0003696912990081538, "loss": 0.1839, "step": 247110 }, { "epoch": 10.24, "grad_norm": 0.56640625, "learning_rate": 0.0003696817774328105, "loss": 0.1878, "step": 247120 }, { "epoch": 10.24, "grad_norm": 0.421875, "learning_rate": 0.0003696722556322391, "loss": 0.225, "step": 247130 }, { "epoch": 10.24, "grad_norm": 1.8359375, "learning_rate": 0.00036966273360645736, "loss": 0.1941, "step": 247140 }, { "epoch": 10.24, "grad_norm": 0.9921875, "learning_rate": 0.0003696532113554833, "loss": 0.1909, "step": 247150 }, { "epoch": 10.24, "grad_norm": 1.7109375, "learning_rate": 0.0003696436888793349, "loss": 0.2537, "step": 247160 }, { "epoch": 10.24, "grad_norm": 0.455078125, "learning_rate": 0.0003696341661780298, "loss": 0.1795, "step": 247170 }, { "epoch": 10.24, "grad_norm": 0.1640625, "learning_rate": 0.0003696246432515863, "loss": 0.1613, "step": 247180 }, { "epoch": 10.24, "grad_norm": 0.65234375, "learning_rate": 0.00036961512010002205, "loss": 0.1969, "step": 247190 }, { "epoch": 10.24, "grad_norm": 0.4921875, "learning_rate": 0.00036960559672335515, "loss": 0.1916, "step": 247200 }, { "epoch": 10.24, "grad_norm": 1.546875, "learning_rate": 0.00036959607312160337, "loss": 0.1973, "step": 247210 }, { "epoch": 10.24, "grad_norm": 1.3046875, "learning_rate": 0.00036958654929478477, "loss": 0.2068, "step": 247220 }, { "epoch": 10.24, "grad_norm": 1.03125, "learning_rate": 0.0003695770252429171, "loss": 0.2316, "step": 247230 }, { "epoch": 10.24, "grad_norm": 1.0390625, "learning_rate": 0.00036956750096601846, "loss": 0.1856, "step": 247240 }, { "epoch": 10.24, "grad_norm": 0.89453125, "learning_rate": 0.0003695579764641065, "loss": 0.1717, "step": 247250 }, { "epoch": 10.24, "grad_norm": 1.3671875, "learning_rate": 0.0003695484517371996, "loss": 0.1918, "step": 247260 }, { "epoch": 10.24, "grad_norm": 0.408203125, "learning_rate": 0.00036953892678531525, "loss": 0.2253, "step": 247270 }, { "epoch": 10.24, "grad_norm": 1.828125, "learning_rate": 0.0003695294016084716, "loss": 0.1591, "step": 247280 }, { "epoch": 10.24, "grad_norm": 0.6796875, "learning_rate": 0.0003695198762066866, "loss": 0.1991, "step": 247290 }, { "epoch": 10.24, "grad_norm": 0.86328125, "learning_rate": 0.000369510350579978, "loss": 0.2124, "step": 247300 }, { "epoch": 10.24, "grad_norm": 1.7734375, "learning_rate": 0.00036950082472836387, "loss": 0.183, "step": 247310 }, { "epoch": 10.24, "grad_norm": 0.75390625, "learning_rate": 0.0003694912986518621, "loss": 0.1745, "step": 247320 }, { "epoch": 10.24, "grad_norm": 1.0390625, "learning_rate": 0.0003694817723504906, "loss": 0.218, "step": 247330 }, { "epoch": 10.24, "grad_norm": 0.55859375, "learning_rate": 0.00036947224582426733, "loss": 0.2383, "step": 247340 }, { "epoch": 10.25, "grad_norm": 0.84375, "learning_rate": 0.00036946271907321014, "loss": 0.2175, "step": 247350 }, { "epoch": 10.25, "grad_norm": 0.88671875, "learning_rate": 0.000369453192097337, "loss": 0.1991, "step": 247360 }, { "epoch": 10.25, "grad_norm": 0.71484375, "learning_rate": 0.00036944366489666594, "loss": 0.1821, "step": 247370 }, { "epoch": 10.25, "grad_norm": 1.484375, "learning_rate": 0.0003694341374712147, "loss": 0.1812, "step": 247380 }, { "epoch": 10.25, "grad_norm": 1.1328125, "learning_rate": 0.0003694246098210014, "loss": 0.2035, "step": 247390 }, { "epoch": 10.25, "grad_norm": 1.1953125, "learning_rate": 0.00036941508194604386, "loss": 0.1765, "step": 247400 }, { "epoch": 10.25, "grad_norm": 0.98828125, "learning_rate": 0.00036940555384636, "loss": 0.1358, "step": 247410 }, { "epoch": 10.25, "grad_norm": 0.5625, "learning_rate": 0.0003693960255219678, "loss": 0.144, "step": 247420 }, { "epoch": 10.25, "grad_norm": 0.240234375, "learning_rate": 0.0003693864969728852, "loss": 0.1609, "step": 247430 }, { "epoch": 10.25, "grad_norm": 1.59375, "learning_rate": 0.00036937696819913006, "loss": 0.1561, "step": 247440 }, { "epoch": 10.25, "grad_norm": 1.8671875, "learning_rate": 0.00036936743920072034, "loss": 0.1629, "step": 247450 }, { "epoch": 10.25, "grad_norm": 0.77734375, "learning_rate": 0.0003693579099776741, "loss": 0.1796, "step": 247460 }, { "epoch": 10.25, "grad_norm": 0.92578125, "learning_rate": 0.00036934838053000906, "loss": 0.178, "step": 247470 }, { "epoch": 10.25, "grad_norm": 0.921875, "learning_rate": 0.0003693388508577433, "loss": 0.1679, "step": 247480 }, { "epoch": 10.25, "grad_norm": 0.4765625, "learning_rate": 0.0003693293209608947, "loss": 0.2435, "step": 247490 }, { "epoch": 10.25, "grad_norm": 0.65625, "learning_rate": 0.0003693197908394812, "loss": 0.2102, "step": 247500 }, { "epoch": 10.25, "grad_norm": 0.859375, "learning_rate": 0.0003693102604935207, "loss": 0.2293, "step": 247510 }, { "epoch": 10.25, "grad_norm": 0.79296875, "learning_rate": 0.0003693007299230313, "loss": 0.2153, "step": 247520 }, { "epoch": 10.25, "grad_norm": 0.451171875, "learning_rate": 0.0003692911991280307, "loss": 0.228, "step": 247530 }, { "epoch": 10.25, "grad_norm": 0.376953125, "learning_rate": 0.000369281668108537, "loss": 0.2134, "step": 247540 }, { "epoch": 10.25, "grad_norm": 0.8515625, "learning_rate": 0.000369272136864568, "loss": 0.1918, "step": 247550 }, { "epoch": 10.25, "grad_norm": 0.8671875, "learning_rate": 0.0003692626053961418, "loss": 0.1636, "step": 247560 }, { "epoch": 10.25, "grad_norm": 0.74609375, "learning_rate": 0.0003692530737032762, "loss": 0.2209, "step": 247570 }, { "epoch": 10.25, "grad_norm": 0.72265625, "learning_rate": 0.00036924354178598923, "loss": 0.1964, "step": 247580 }, { "epoch": 10.26, "grad_norm": 1.015625, "learning_rate": 0.0003692340096442988, "loss": 0.1628, "step": 247590 }, { "epoch": 10.26, "grad_norm": 1.1953125, "learning_rate": 0.0003692244772782228, "loss": 0.1642, "step": 247600 }, { "epoch": 10.26, "grad_norm": 0.91796875, "learning_rate": 0.0003692149446877792, "loss": 0.1775, "step": 247610 }, { "epoch": 10.26, "grad_norm": 0.82421875, "learning_rate": 0.000369205411872986, "loss": 0.2114, "step": 247620 }, { "epoch": 10.26, "grad_norm": 1.828125, "learning_rate": 0.00036919587883386105, "loss": 0.2277, "step": 247630 }, { "epoch": 10.26, "grad_norm": 0.53125, "learning_rate": 0.00036918634557042235, "loss": 0.2188, "step": 247640 }, { "epoch": 10.26, "grad_norm": 0.26171875, "learning_rate": 0.00036917681208268786, "loss": 0.1756, "step": 247650 }, { "epoch": 10.26, "grad_norm": 0.953125, "learning_rate": 0.00036916727837067536, "loss": 0.2293, "step": 247660 }, { "epoch": 10.26, "grad_norm": 0.2236328125, "learning_rate": 0.000369157744434403, "loss": 0.1791, "step": 247670 }, { "epoch": 10.26, "grad_norm": 0.828125, "learning_rate": 0.0003691482102738886, "loss": 0.2224, "step": 247680 }, { "epoch": 10.26, "grad_norm": 0.455078125, "learning_rate": 0.0003691386758891502, "loss": 0.1657, "step": 247690 }, { "epoch": 10.26, "grad_norm": 0.4140625, "learning_rate": 0.00036912914128020563, "loss": 0.1383, "step": 247700 }, { "epoch": 10.26, "grad_norm": 0.5234375, "learning_rate": 0.0003691196064470729, "loss": 0.2012, "step": 247710 }, { "epoch": 10.26, "grad_norm": 0.703125, "learning_rate": 0.0003691100713897699, "loss": 0.2392, "step": 247720 }, { "epoch": 10.26, "grad_norm": 0.35546875, "learning_rate": 0.0003691005361083146, "loss": 0.1463, "step": 247730 }, { "epoch": 10.26, "grad_norm": 1.40625, "learning_rate": 0.00036909100060272495, "loss": 0.2289, "step": 247740 }, { "epoch": 10.26, "grad_norm": 1.3125, "learning_rate": 0.000369081464873019, "loss": 0.1727, "step": 247750 }, { "epoch": 10.26, "grad_norm": 0.43359375, "learning_rate": 0.0003690719289192145, "loss": 0.1989, "step": 247760 }, { "epoch": 10.26, "grad_norm": 0.5859375, "learning_rate": 0.00036906239274132945, "loss": 0.1752, "step": 247770 }, { "epoch": 10.26, "grad_norm": 0.59765625, "learning_rate": 0.000369052856339382, "loss": 0.188, "step": 247780 }, { "epoch": 10.26, "grad_norm": 0.58984375, "learning_rate": 0.00036904331971338977, "loss": 0.1514, "step": 247790 }, { "epoch": 10.26, "grad_norm": 0.7578125, "learning_rate": 0.0003690337828633709, "loss": 0.1878, "step": 247800 }, { "epoch": 10.26, "grad_norm": 0.5859375, "learning_rate": 0.0003690242457893433, "loss": 0.1798, "step": 247810 }, { "epoch": 10.26, "grad_norm": 1.1640625, "learning_rate": 0.00036901470849132494, "loss": 0.2481, "step": 247820 }, { "epoch": 10.27, "grad_norm": 1.1328125, "learning_rate": 0.0003690051709693338, "loss": 0.1752, "step": 247830 }, { "epoch": 10.27, "grad_norm": 0.416015625, "learning_rate": 0.0003689956332233877, "loss": 0.2196, "step": 247840 }, { "epoch": 10.27, "grad_norm": 0.74609375, "learning_rate": 0.00036898609525350476, "loss": 0.1794, "step": 247850 }, { "epoch": 10.27, "grad_norm": 0.6875, "learning_rate": 0.0003689765570597028, "loss": 0.1692, "step": 247860 }, { "epoch": 10.27, "grad_norm": 0.9375, "learning_rate": 0.0003689670186419997, "loss": 0.1819, "step": 247870 }, { "epoch": 10.27, "grad_norm": 0.50390625, "learning_rate": 0.00036895748000041365, "loss": 0.1897, "step": 247880 }, { "epoch": 10.27, "grad_norm": 0.60546875, "learning_rate": 0.00036894794113496245, "loss": 0.2025, "step": 247890 }, { "epoch": 10.27, "grad_norm": 0.453125, "learning_rate": 0.00036893840204566404, "loss": 0.1563, "step": 247900 }, { "epoch": 10.27, "grad_norm": 1.8359375, "learning_rate": 0.00036892886273253643, "loss": 0.1906, "step": 247910 }, { "epoch": 10.27, "grad_norm": 0.337890625, "learning_rate": 0.0003689193231955975, "loss": 0.161, "step": 247920 }, { "epoch": 10.27, "grad_norm": 0.73828125, "learning_rate": 0.00036890978343486527, "loss": 0.2043, "step": 247930 }, { "epoch": 10.27, "grad_norm": 0.921875, "learning_rate": 0.0003689002434503577, "loss": 0.1872, "step": 247940 }, { "epoch": 10.27, "grad_norm": 0.08544921875, "learning_rate": 0.0003688907032420926, "loss": 0.1841, "step": 247950 }, { "epoch": 10.27, "grad_norm": 0.6171875, "learning_rate": 0.0003688811628100882, "loss": 0.1868, "step": 247960 }, { "epoch": 10.27, "grad_norm": 1.140625, "learning_rate": 0.0003688716221543622, "loss": 0.1666, "step": 247970 }, { "epoch": 10.27, "grad_norm": 0.8203125, "learning_rate": 0.0003688620812749326, "loss": 0.2212, "step": 247980 }, { "epoch": 10.27, "grad_norm": 1.3671875, "learning_rate": 0.0003688525401718175, "loss": 0.2063, "step": 247990 }, { "epoch": 10.27, "grad_norm": 0.515625, "learning_rate": 0.0003688429988450347, "loss": 0.2287, "step": 248000 }, { "epoch": 10.27, "grad_norm": 0.5859375, "learning_rate": 0.0003688334572946022, "loss": 0.1983, "step": 248010 }, { "epoch": 10.27, "grad_norm": 1.125, "learning_rate": 0.00036882391552053796, "loss": 0.2276, "step": 248020 }, { "epoch": 10.27, "grad_norm": 0.765625, "learning_rate": 0.00036881437352285996, "loss": 0.1607, "step": 248030 }, { "epoch": 10.27, "grad_norm": 0.94140625, "learning_rate": 0.00036880483130158614, "loss": 0.188, "step": 248040 }, { "epoch": 10.27, "grad_norm": 0.56640625, "learning_rate": 0.0003687952888567345, "loss": 0.2022, "step": 248050 }, { "epoch": 10.27, "grad_norm": 0.57421875, "learning_rate": 0.0003687857461883229, "loss": 0.184, "step": 248060 }, { "epoch": 10.28, "grad_norm": 1.1796875, "learning_rate": 0.0003687762032963694, "loss": 0.1993, "step": 248070 }, { "epoch": 10.28, "grad_norm": 0.70703125, "learning_rate": 0.00036876666018089186, "loss": 0.1951, "step": 248080 }, { "epoch": 10.28, "grad_norm": 1.2109375, "learning_rate": 0.0003687571168419083, "loss": 0.1657, "step": 248090 }, { "epoch": 10.28, "grad_norm": 0.486328125, "learning_rate": 0.00036874757327943667, "loss": 0.2237, "step": 248100 }, { "epoch": 10.28, "grad_norm": 0.2060546875, "learning_rate": 0.00036873802949349496, "loss": 0.2209, "step": 248110 }, { "epoch": 10.28, "grad_norm": 0.7421875, "learning_rate": 0.0003687284854841011, "loss": 0.1777, "step": 248120 }, { "epoch": 10.28, "grad_norm": 0.72265625, "learning_rate": 0.000368718941251273, "loss": 0.1774, "step": 248130 }, { "epoch": 10.28, "grad_norm": 1.359375, "learning_rate": 0.00036870939679502874, "loss": 0.2247, "step": 248140 }, { "epoch": 10.28, "grad_norm": 1.5625, "learning_rate": 0.00036869985211538623, "loss": 0.239, "step": 248150 }, { "epoch": 10.28, "grad_norm": 0.54296875, "learning_rate": 0.0003686903072123633, "loss": 0.1679, "step": 248160 }, { "epoch": 10.28, "grad_norm": 0.69921875, "learning_rate": 0.0003686807620859781, "loss": 0.1841, "step": 248170 }, { "epoch": 10.28, "grad_norm": 1.265625, "learning_rate": 0.0003686712167362485, "loss": 0.1896, "step": 248180 }, { "epoch": 10.28, "grad_norm": 0.81640625, "learning_rate": 0.00036866167116319257, "loss": 0.2016, "step": 248190 }, { "epoch": 10.28, "grad_norm": 1.3125, "learning_rate": 0.0003686521253668281, "loss": 0.2053, "step": 248200 }, { "epoch": 10.28, "grad_norm": 1.0234375, "learning_rate": 0.0003686425793471732, "loss": 0.2285, "step": 248210 }, { "epoch": 10.28, "grad_norm": 1.328125, "learning_rate": 0.00036863303310424574, "loss": 0.2114, "step": 248220 }, { "epoch": 10.28, "grad_norm": 0.7578125, "learning_rate": 0.0003686234866380638, "loss": 0.248, "step": 248230 }, { "epoch": 10.28, "grad_norm": 1.0234375, "learning_rate": 0.0003686139399486451, "loss": 0.2082, "step": 248240 }, { "epoch": 10.28, "grad_norm": 0.90625, "learning_rate": 0.0003686043930360079, "loss": 0.1964, "step": 248250 }, { "epoch": 10.28, "grad_norm": 0.73046875, "learning_rate": 0.00036859484590017, "loss": 0.2168, "step": 248260 }, { "epoch": 10.28, "grad_norm": 0.439453125, "learning_rate": 0.0003685852985411494, "loss": 0.1714, "step": 248270 }, { "epoch": 10.28, "grad_norm": 0.458984375, "learning_rate": 0.0003685757509589641, "loss": 0.205, "step": 248280 }, { "epoch": 10.28, "grad_norm": 0.7265625, "learning_rate": 0.0003685662031536321, "loss": 0.1558, "step": 248290 }, { "epoch": 10.28, "grad_norm": 0.6796875, "learning_rate": 0.0003685566551251712, "loss": 0.1771, "step": 248300 }, { "epoch": 10.28, "grad_norm": 1.0859375, "learning_rate": 0.0003685471068735996, "loss": 0.1881, "step": 248310 }, { "epoch": 10.29, "grad_norm": 0.64453125, "learning_rate": 0.000368537558398935, "loss": 0.1597, "step": 248320 }, { "epoch": 10.29, "grad_norm": 0.58203125, "learning_rate": 0.0003685280097011956, "loss": 0.1865, "step": 248330 }, { "epoch": 10.29, "grad_norm": 0.609375, "learning_rate": 0.0003685184607803993, "loss": 0.2171, "step": 248340 }, { "epoch": 10.29, "grad_norm": 0.921875, "learning_rate": 0.00036850891163656396, "loss": 0.1791, "step": 248350 }, { "epoch": 10.29, "grad_norm": 0.91796875, "learning_rate": 0.0003684993622697078, "loss": 0.2284, "step": 248360 }, { "epoch": 10.29, "grad_norm": 0.80078125, "learning_rate": 0.00036848981267984855, "loss": 0.158, "step": 248370 }, { "epoch": 10.29, "grad_norm": 0.458984375, "learning_rate": 0.0003684802628670043, "loss": 0.1734, "step": 248380 }, { "epoch": 10.29, "grad_norm": 1.203125, "learning_rate": 0.0003684707128311929, "loss": 0.1935, "step": 248390 }, { "epoch": 10.29, "grad_norm": 0.87109375, "learning_rate": 0.00036846116257243245, "loss": 0.1689, "step": 248400 }, { "epoch": 10.29, "grad_norm": 0.9921875, "learning_rate": 0.00036845161209074094, "loss": 0.2464, "step": 248410 }, { "epoch": 10.29, "grad_norm": 0.67578125, "learning_rate": 0.0003684420613861362, "loss": 0.2352, "step": 248420 }, { "epoch": 10.29, "grad_norm": 1.2421875, "learning_rate": 0.00036843251045863643, "loss": 0.1995, "step": 248430 }, { "epoch": 10.29, "grad_norm": 1.1796875, "learning_rate": 0.0003684229593082594, "loss": 0.2198, "step": 248440 }, { "epoch": 10.29, "grad_norm": 0.8203125, "learning_rate": 0.0003684134079350231, "loss": 0.2388, "step": 248450 }, { "epoch": 10.29, "grad_norm": 0.56640625, "learning_rate": 0.00036840385633894556, "loss": 0.1708, "step": 248460 }, { "epoch": 10.29, "grad_norm": 1.078125, "learning_rate": 0.00036839430452004484, "loss": 0.1994, "step": 248470 }, { "epoch": 10.29, "grad_norm": 0.5546875, "learning_rate": 0.0003683847524783387, "loss": 0.2112, "step": 248480 }, { "epoch": 10.29, "grad_norm": 1.3984375, "learning_rate": 0.0003683752002138453, "loss": 0.2041, "step": 248490 }, { "epoch": 10.29, "grad_norm": 1.4765625, "learning_rate": 0.00036836564772658255, "loss": 0.2191, "step": 248500 }, { "epoch": 10.29, "grad_norm": 0.98828125, "learning_rate": 0.0003683560950165684, "loss": 0.1656, "step": 248510 }, { "epoch": 10.29, "grad_norm": 0.68359375, "learning_rate": 0.000368346542083821, "loss": 0.1916, "step": 248520 }, { "epoch": 10.29, "grad_norm": 1.515625, "learning_rate": 0.00036833698892835796, "loss": 0.2124, "step": 248530 }, { "epoch": 10.29, "grad_norm": 0.345703125, "learning_rate": 0.0003683274355501977, "loss": 0.2135, "step": 248540 }, { "epoch": 10.29, "grad_norm": 1.0078125, "learning_rate": 0.00036831788194935795, "loss": 0.2288, "step": 248550 }, { "epoch": 10.3, "grad_norm": 0.84375, "learning_rate": 0.0003683083281258566, "loss": 0.1735, "step": 248560 }, { "epoch": 10.3, "grad_norm": 0.361328125, "learning_rate": 0.00036829877407971187, "loss": 0.2082, "step": 248570 }, { "epoch": 10.3, "grad_norm": 0.322265625, "learning_rate": 0.0003682892198109415, "loss": 0.1912, "step": 248580 }, { "epoch": 10.3, "grad_norm": 0.3984375, "learning_rate": 0.0003682796653195637, "loss": 0.185, "step": 248590 }, { "epoch": 10.3, "grad_norm": 0.59375, "learning_rate": 0.0003682701106055963, "loss": 0.1993, "step": 248600 }, { "epoch": 10.3, "grad_norm": 0.2265625, "learning_rate": 0.00036826055566905734, "loss": 0.1982, "step": 248610 }, { "epoch": 10.3, "grad_norm": 1.1015625, "learning_rate": 0.0003682510005099648, "loss": 0.2099, "step": 248620 }, { "epoch": 10.3, "grad_norm": 0.703125, "learning_rate": 0.00036824144512833664, "loss": 0.2232, "step": 248630 }, { "epoch": 10.3, "grad_norm": 0.6171875, "learning_rate": 0.00036823188952419086, "loss": 0.1825, "step": 248640 }, { "epoch": 10.3, "grad_norm": 0.76953125, "learning_rate": 0.0003682223336975454, "loss": 0.1872, "step": 248650 }, { "epoch": 10.3, "grad_norm": 1.03125, "learning_rate": 0.00036821277764841824, "loss": 0.187, "step": 248660 }, { "epoch": 10.3, "grad_norm": 1.0390625, "learning_rate": 0.0003682032213768275, "loss": 0.1241, "step": 248670 }, { "epoch": 10.3, "grad_norm": 0.890625, "learning_rate": 0.00036819366488279097, "loss": 0.1667, "step": 248680 }, { "epoch": 10.3, "grad_norm": 1.5078125, "learning_rate": 0.0003681841081663267, "loss": 0.1955, "step": 248690 }, { "epoch": 10.3, "grad_norm": 0.5234375, "learning_rate": 0.0003681745512274528, "loss": 0.1837, "step": 248700 }, { "epoch": 10.3, "grad_norm": 0.6953125, "learning_rate": 0.00036816499406618715, "loss": 0.2029, "step": 248710 }, { "epoch": 10.3, "grad_norm": 0.609375, "learning_rate": 0.0003681554366825477, "loss": 0.2227, "step": 248720 }, { "epoch": 10.3, "grad_norm": 0.81640625, "learning_rate": 0.0003681458790765525, "loss": 0.1854, "step": 248730 }, { "epoch": 10.3, "grad_norm": 1.03125, "learning_rate": 0.0003681363212482195, "loss": 0.2135, "step": 248740 }, { "epoch": 10.3, "grad_norm": 0.490234375, "learning_rate": 0.0003681267631975667, "loss": 0.1858, "step": 248750 }, { "epoch": 10.3, "grad_norm": 1.5, "learning_rate": 0.00036811720492461216, "loss": 0.1469, "step": 248760 }, { "epoch": 10.3, "grad_norm": 0.515625, "learning_rate": 0.0003681076464293737, "loss": 0.2245, "step": 248770 }, { "epoch": 10.3, "grad_norm": 2.296875, "learning_rate": 0.0003680980877118694, "loss": 0.1574, "step": 248780 }, { "epoch": 10.3, "grad_norm": 0.91015625, "learning_rate": 0.00036808852877211727, "loss": 0.2012, "step": 248790 }, { "epoch": 10.31, "grad_norm": 1.2109375, "learning_rate": 0.00036807896961013533, "loss": 0.214, "step": 248800 }, { "epoch": 10.31, "grad_norm": 0.400390625, "learning_rate": 0.0003680694102259415, "loss": 0.2352, "step": 248810 }, { "epoch": 10.31, "grad_norm": 0.427734375, "learning_rate": 0.00036805985061955375, "loss": 0.2136, "step": 248820 }, { "epoch": 10.31, "grad_norm": 1.0703125, "learning_rate": 0.0003680502907909901, "loss": 0.1872, "step": 248830 }, { "epoch": 10.31, "grad_norm": 0.6640625, "learning_rate": 0.0003680407307402686, "loss": 0.199, "step": 248840 }, { "epoch": 10.31, "grad_norm": 0.326171875, "learning_rate": 0.00036803117046740717, "loss": 0.2035, "step": 248850 }, { "epoch": 10.31, "grad_norm": 1.2421875, "learning_rate": 0.00036802160997242387, "loss": 0.1553, "step": 248860 }, { "epoch": 10.31, "grad_norm": 1.046875, "learning_rate": 0.0003680120492553366, "loss": 0.1654, "step": 248870 }, { "epoch": 10.31, "grad_norm": 0.5078125, "learning_rate": 0.0003680024883161634, "loss": 0.2114, "step": 248880 }, { "epoch": 10.31, "grad_norm": 1.1796875, "learning_rate": 0.00036799292715492226, "loss": 0.1871, "step": 248890 }, { "epoch": 10.31, "grad_norm": 0.76171875, "learning_rate": 0.0003679833657716312, "loss": 0.2291, "step": 248900 }, { "epoch": 10.31, "grad_norm": 0.92578125, "learning_rate": 0.00036797380416630815, "loss": 0.2138, "step": 248910 }, { "epoch": 10.31, "grad_norm": 0.9140625, "learning_rate": 0.0003679642423389712, "loss": 0.21, "step": 248920 }, { "epoch": 10.31, "grad_norm": 1.359375, "learning_rate": 0.0003679546802896382, "loss": 0.2076, "step": 248930 }, { "epoch": 10.31, "grad_norm": 0.6640625, "learning_rate": 0.0003679451180183273, "loss": 0.1587, "step": 248940 }, { "epoch": 10.31, "grad_norm": 0.640625, "learning_rate": 0.00036793555552505636, "loss": 0.2292, "step": 248950 }, { "epoch": 10.31, "grad_norm": 0.490234375, "learning_rate": 0.0003679259928098435, "loss": 0.1525, "step": 248960 }, { "epoch": 10.31, "grad_norm": 0.2197265625, "learning_rate": 0.00036791642987270664, "loss": 0.1938, "step": 248970 }, { "epoch": 10.31, "grad_norm": 0.96875, "learning_rate": 0.0003679068667136638, "loss": 0.2097, "step": 248980 }, { "epoch": 10.31, "grad_norm": 0.44921875, "learning_rate": 0.00036789730333273296, "loss": 0.1778, "step": 248990 }, { "epoch": 10.31, "grad_norm": 0.56640625, "learning_rate": 0.00036788773972993205, "loss": 0.1939, "step": 249000 }, { "epoch": 10.31, "grad_norm": 0.6484375, "learning_rate": 0.0003678781759052792, "loss": 0.2015, "step": 249010 }, { "epoch": 10.31, "grad_norm": 1.234375, "learning_rate": 0.00036786861185879245, "loss": 0.2232, "step": 249020 }, { "epoch": 10.31, "grad_norm": 0.6171875, "learning_rate": 0.0003678590475904896, "loss": 0.1747, "step": 249030 }, { "epoch": 10.32, "grad_norm": 0.388671875, "learning_rate": 0.0003678494831003888, "loss": 0.2121, "step": 249040 }, { "epoch": 10.32, "grad_norm": 0.62109375, "learning_rate": 0.00036783991838850794, "loss": 0.2241, "step": 249050 }, { "epoch": 10.32, "grad_norm": 0.765625, "learning_rate": 0.00036783035345486513, "loss": 0.1981, "step": 249060 }, { "epoch": 10.32, "grad_norm": 0.58984375, "learning_rate": 0.0003678207882994783, "loss": 0.1896, "step": 249070 }, { "epoch": 10.32, "grad_norm": 0.53515625, "learning_rate": 0.00036781122292236547, "loss": 0.1859, "step": 249080 }, { "epoch": 10.32, "grad_norm": 0.408203125, "learning_rate": 0.0003678016573235446, "loss": 0.1269, "step": 249090 }, { "epoch": 10.32, "grad_norm": 0.6796875, "learning_rate": 0.0003677920915030338, "loss": 0.1862, "step": 249100 }, { "epoch": 10.32, "grad_norm": 0.53125, "learning_rate": 0.00036778252546085103, "loss": 0.1925, "step": 249110 }, { "epoch": 10.32, "grad_norm": 0.8515625, "learning_rate": 0.0003677729591970141, "loss": 0.1745, "step": 249120 }, { "epoch": 10.32, "grad_norm": 0.77734375, "learning_rate": 0.00036776339271154136, "loss": 0.2001, "step": 249130 }, { "epoch": 10.32, "grad_norm": 0.87890625, "learning_rate": 0.00036775382600445056, "loss": 0.2404, "step": 249140 }, { "epoch": 10.32, "grad_norm": 0.796875, "learning_rate": 0.0003677442590757598, "loss": 0.1983, "step": 249150 }, { "epoch": 10.32, "grad_norm": 0.921875, "learning_rate": 0.00036773469192548705, "loss": 0.2043, "step": 249160 }, { "epoch": 10.32, "grad_norm": 0.98828125, "learning_rate": 0.0003677251245536503, "loss": 0.2202, "step": 249170 }, { "epoch": 10.32, "grad_norm": 0.96875, "learning_rate": 0.0003677155569602675, "loss": 0.245, "step": 249180 }, { "epoch": 10.32, "grad_norm": 0.953125, "learning_rate": 0.0003677059891453569, "loss": 0.1935, "step": 249190 }, { "epoch": 10.32, "grad_norm": 0.341796875, "learning_rate": 0.00036769642110893633, "loss": 0.1882, "step": 249200 }, { "epoch": 10.32, "grad_norm": 0.80859375, "learning_rate": 0.0003676868528510237, "loss": 0.2395, "step": 249210 }, { "epoch": 10.32, "grad_norm": 0.625, "learning_rate": 0.00036767728437163714, "loss": 0.2376, "step": 249220 }, { "epoch": 10.32, "grad_norm": 1.421875, "learning_rate": 0.0003676677156707947, "loss": 0.1856, "step": 249230 }, { "epoch": 10.32, "grad_norm": 0.365234375, "learning_rate": 0.00036765814674851426, "loss": 0.193, "step": 249240 }, { "epoch": 10.32, "grad_norm": 0.75, "learning_rate": 0.0003676485776048139, "loss": 0.1908, "step": 249250 }, { "epoch": 10.32, "grad_norm": 1.6796875, "learning_rate": 0.00036763900823971166, "loss": 0.2458, "step": 249260 }, { "epoch": 10.32, "grad_norm": 0.80859375, "learning_rate": 0.0003676294386532255, "loss": 0.1879, "step": 249270 }, { "epoch": 10.33, "grad_norm": 0.484375, "learning_rate": 0.0003676198688453735, "loss": 0.1615, "step": 249280 }, { "epoch": 10.33, "grad_norm": 0.53515625, "learning_rate": 0.00036761029881617345, "loss": 0.1682, "step": 249290 }, { "epoch": 10.33, "grad_norm": 1.046875, "learning_rate": 0.0003676007285656437, "loss": 0.2393, "step": 249300 }, { "epoch": 10.33, "grad_norm": 0.3671875, "learning_rate": 0.00036759115809380195, "loss": 0.2432, "step": 249310 }, { "epoch": 10.33, "grad_norm": 0.68359375, "learning_rate": 0.0003675815874006664, "loss": 0.1758, "step": 249320 }, { "epoch": 10.33, "grad_norm": 0.890625, "learning_rate": 0.00036757201648625494, "loss": 0.2022, "step": 249330 }, { "epoch": 10.33, "grad_norm": 0.81640625, "learning_rate": 0.0003675624453505857, "loss": 0.2008, "step": 249340 }, { "epoch": 10.33, "grad_norm": 0.72265625, "learning_rate": 0.00036755287399367663, "loss": 0.1828, "step": 249350 }, { "epoch": 10.33, "grad_norm": 0.71484375, "learning_rate": 0.00036754330241554575, "loss": 0.1756, "step": 249360 }, { "epoch": 10.33, "grad_norm": 0.62890625, "learning_rate": 0.000367533730616211, "loss": 0.2098, "step": 249370 }, { "epoch": 10.33, "grad_norm": 0.423828125, "learning_rate": 0.00036752415859569055, "loss": 0.1975, "step": 249380 }, { "epoch": 10.33, "grad_norm": 0.21484375, "learning_rate": 0.00036751458635400224, "loss": 0.2345, "step": 249390 }, { "epoch": 10.33, "grad_norm": 0.44140625, "learning_rate": 0.0003675050138911643, "loss": 0.1686, "step": 249400 }, { "epoch": 10.33, "grad_norm": 0.142578125, "learning_rate": 0.0003674954412071945, "loss": 0.2263, "step": 249410 }, { "epoch": 10.33, "grad_norm": 0.58984375, "learning_rate": 0.00036748586830211095, "loss": 0.1554, "step": 249420 }, { "epoch": 10.33, "grad_norm": 0.35546875, "learning_rate": 0.00036747629517593176, "loss": 0.1572, "step": 249430 }, { "epoch": 10.33, "grad_norm": 0.59375, "learning_rate": 0.0003674667218286748, "loss": 0.2125, "step": 249440 }, { "epoch": 10.33, "grad_norm": 2.875, "learning_rate": 0.0003674571482603582, "loss": 0.2072, "step": 249450 }, { "epoch": 10.33, "grad_norm": 0.51953125, "learning_rate": 0.000367447574471, "loss": 0.2054, "step": 249460 }, { "epoch": 10.33, "grad_norm": 0.9140625, "learning_rate": 0.00036743800046061797, "loss": 0.2417, "step": 249470 }, { "epoch": 10.33, "grad_norm": 0.75, "learning_rate": 0.00036742842622923045, "loss": 0.1691, "step": 249480 }, { "epoch": 10.33, "grad_norm": 0.251953125, "learning_rate": 0.0003674188517768553, "loss": 0.1625, "step": 249490 }, { "epoch": 10.33, "grad_norm": 0.71484375, "learning_rate": 0.00036740927710351046, "loss": 0.2016, "step": 249500 }, { "epoch": 10.33, "grad_norm": 0.65234375, "learning_rate": 0.00036739970220921414, "loss": 0.2248, "step": 249510 }, { "epoch": 10.34, "grad_norm": 1.28125, "learning_rate": 0.00036739012709398425, "loss": 0.2107, "step": 249520 }, { "epoch": 10.34, "grad_norm": 0.52734375, "learning_rate": 0.00036738055175783874, "loss": 0.1664, "step": 249530 }, { "epoch": 10.34, "grad_norm": 0.447265625, "learning_rate": 0.0003673709762007958, "loss": 0.1794, "step": 249540 }, { "epoch": 10.34, "grad_norm": 1.3046875, "learning_rate": 0.00036736140042287327, "loss": 0.1909, "step": 249550 }, { "epoch": 10.34, "grad_norm": 0.58203125, "learning_rate": 0.0003673518244240893, "loss": 0.2228, "step": 249560 }, { "epoch": 10.34, "grad_norm": 0.66796875, "learning_rate": 0.00036734224820446186, "loss": 0.1787, "step": 249570 }, { "epoch": 10.34, "grad_norm": 1.84375, "learning_rate": 0.00036733267176400895, "loss": 0.1806, "step": 249580 }, { "epoch": 10.34, "grad_norm": 0.80859375, "learning_rate": 0.0003673230951027488, "loss": 0.2032, "step": 249590 }, { "epoch": 10.34, "grad_norm": 0.99609375, "learning_rate": 0.000367313518220699, "loss": 0.1696, "step": 249600 }, { "epoch": 10.34, "grad_norm": 0.216796875, "learning_rate": 0.000367303941117878, "loss": 0.1888, "step": 249610 }, { "epoch": 10.34, "grad_norm": 0.296875, "learning_rate": 0.00036729436379430365, "loss": 0.2017, "step": 249620 }, { "epoch": 10.34, "grad_norm": 1.1640625, "learning_rate": 0.0003672847862499938, "loss": 0.1684, "step": 249630 }, { "epoch": 10.34, "grad_norm": 0.89453125, "learning_rate": 0.0003672752084849669, "loss": 0.1898, "step": 249640 }, { "epoch": 10.34, "grad_norm": 0.58203125, "learning_rate": 0.0003672656304992406, "loss": 0.1857, "step": 249650 }, { "epoch": 10.34, "grad_norm": 0.42578125, "learning_rate": 0.000367256052292833, "loss": 0.1443, "step": 249660 }, { "epoch": 10.34, "grad_norm": 1.0625, "learning_rate": 0.0003672464738657622, "loss": 0.2023, "step": 249670 }, { "epoch": 10.34, "grad_norm": 0.63671875, "learning_rate": 0.00036723689521804613, "loss": 0.1998, "step": 249680 }, { "epoch": 10.34, "grad_norm": 0.5625, "learning_rate": 0.000367227316349703, "loss": 0.1715, "step": 249690 }, { "epoch": 10.34, "grad_norm": 0.53515625, "learning_rate": 0.00036721773726075075, "loss": 0.1932, "step": 249700 }, { "epoch": 10.34, "grad_norm": 0.7109375, "learning_rate": 0.00036720815795120723, "loss": 0.1939, "step": 249710 }, { "epoch": 10.34, "grad_norm": 0.47265625, "learning_rate": 0.0003671985784210907, "loss": 0.1999, "step": 249720 }, { "epoch": 10.34, "grad_norm": 0.46484375, "learning_rate": 0.0003671889986704191, "loss": 0.1788, "step": 249730 }, { "epoch": 10.34, "grad_norm": 0.7265625, "learning_rate": 0.0003671794186992105, "loss": 0.1664, "step": 249740 }, { "epoch": 10.34, "grad_norm": 1.1796875, "learning_rate": 0.0003671698385074829, "loss": 0.2081, "step": 249750 }, { "epoch": 10.35, "grad_norm": 0.890625, "learning_rate": 0.00036716025809525415, "loss": 0.1778, "step": 249760 }, { "epoch": 10.35, "grad_norm": 1.3828125, "learning_rate": 0.0003671506774625426, "loss": 0.1596, "step": 249770 }, { "epoch": 10.35, "grad_norm": 0.6328125, "learning_rate": 0.0003671410966093661, "loss": 0.1602, "step": 249780 }, { "epoch": 10.35, "grad_norm": 1.0234375, "learning_rate": 0.00036713151553574265, "loss": 0.1689, "step": 249790 }, { "epoch": 10.35, "grad_norm": 0.52734375, "learning_rate": 0.0003671219342416904, "loss": 0.2366, "step": 249800 }, { "epoch": 10.35, "grad_norm": 1.5546875, "learning_rate": 0.0003671123527272273, "loss": 0.1971, "step": 249810 }, { "epoch": 10.35, "grad_norm": 0.59765625, "learning_rate": 0.00036710277099237136, "loss": 0.2146, "step": 249820 }, { "epoch": 10.35, "grad_norm": 0.498046875, "learning_rate": 0.00036709318903714074, "loss": 0.1188, "step": 249830 }, { "epoch": 10.35, "grad_norm": 0.5, "learning_rate": 0.0003670836068615533, "loss": 0.1863, "step": 249840 }, { "epoch": 10.35, "grad_norm": 0.7734375, "learning_rate": 0.00036707402446562717, "loss": 0.1731, "step": 249850 }, { "epoch": 10.35, "grad_norm": 0.55078125, "learning_rate": 0.0003670644418493804, "loss": 0.1773, "step": 249860 }, { "epoch": 10.35, "grad_norm": 1.0703125, "learning_rate": 0.00036705485901283093, "loss": 0.239, "step": 249870 }, { "epoch": 10.35, "grad_norm": 0.91015625, "learning_rate": 0.00036704527595599697, "loss": 0.2279, "step": 249880 }, { "epoch": 10.35, "grad_norm": 0.96875, "learning_rate": 0.0003670356926788964, "loss": 0.1539, "step": 249890 }, { "epoch": 10.35, "grad_norm": 0.75390625, "learning_rate": 0.0003670261091815472, "loss": 0.1963, "step": 249900 }, { "epoch": 10.35, "grad_norm": 0.328125, "learning_rate": 0.0003670165254639676, "loss": 0.16, "step": 249910 }, { "epoch": 10.35, "grad_norm": 0.81640625, "learning_rate": 0.0003670069415261754, "loss": 0.1993, "step": 249920 }, { "epoch": 10.35, "grad_norm": 1.1875, "learning_rate": 0.00036699735736818895, "loss": 0.1644, "step": 249930 }, { "epoch": 10.35, "grad_norm": 0.8984375, "learning_rate": 0.000366987772990026, "loss": 0.2142, "step": 249940 }, { "epoch": 10.35, "grad_norm": 0.734375, "learning_rate": 0.0003669781883917047, "loss": 0.2021, "step": 249950 }, { "epoch": 10.35, "grad_norm": 0.80859375, "learning_rate": 0.0003669686035732432, "loss": 0.1736, "step": 249960 }, { "epoch": 10.35, "grad_norm": 0.5234375, "learning_rate": 0.0003669590185346593, "loss": 0.2195, "step": 249970 }, { "epoch": 10.35, "grad_norm": 0.51953125, "learning_rate": 0.0003669494332759712, "loss": 0.2418, "step": 249980 }, { "epoch": 10.35, "grad_norm": 1.0, "learning_rate": 0.0003669398477971969, "loss": 0.1965, "step": 249990 }, { "epoch": 10.35, "grad_norm": 0.95703125, "learning_rate": 0.0003669302620983543, "loss": 0.2012, "step": 250000 }, { "epoch": 10.36, "grad_norm": 0.482421875, "learning_rate": 0.0003669206761794618, "loss": 0.2038, "step": 250010 }, { "epoch": 10.36, "grad_norm": 0.51953125, "learning_rate": 0.0003669110900405371, "loss": 0.1826, "step": 250020 }, { "epoch": 10.36, "grad_norm": 0.419921875, "learning_rate": 0.0003669015036815983, "loss": 0.2106, "step": 250030 }, { "epoch": 10.36, "grad_norm": 0.435546875, "learning_rate": 0.00036689191710266355, "loss": 0.1985, "step": 250040 }, { "epoch": 10.36, "grad_norm": 0.60546875, "learning_rate": 0.00036688233030375084, "loss": 0.2057, "step": 250050 }, { "epoch": 10.36, "grad_norm": 0.58203125, "learning_rate": 0.0003668727432848782, "loss": 0.1583, "step": 250060 }, { "epoch": 10.36, "grad_norm": 0.7265625, "learning_rate": 0.00036686315604606375, "loss": 0.1179, "step": 250070 }, { "epoch": 10.36, "grad_norm": 0.796875, "learning_rate": 0.0003668535685873253, "loss": 0.213, "step": 250080 }, { "epoch": 10.36, "grad_norm": 0.734375, "learning_rate": 0.0003668439809086812, "loss": 0.148, "step": 250090 }, { "epoch": 10.36, "grad_norm": 0.58203125, "learning_rate": 0.0003668343930101493, "loss": 0.1706, "step": 250100 }, { "epoch": 10.36, "grad_norm": 0.52734375, "learning_rate": 0.0003668248048917476, "loss": 0.2123, "step": 250110 }, { "epoch": 10.36, "grad_norm": 0.85546875, "learning_rate": 0.0003668152165534944, "loss": 0.1988, "step": 250120 }, { "epoch": 10.36, "grad_norm": 1.4140625, "learning_rate": 0.0003668056279954075, "loss": 0.2372, "step": 250130 }, { "epoch": 10.36, "grad_norm": 0.77734375, "learning_rate": 0.00036679603921750497, "loss": 0.2039, "step": 250140 }, { "epoch": 10.36, "grad_norm": 1.109375, "learning_rate": 0.0003667864502198049, "loss": 0.1986, "step": 250150 }, { "epoch": 10.36, "grad_norm": 0.90625, "learning_rate": 0.00036677686100232545, "loss": 0.191, "step": 250160 }, { "epoch": 10.36, "grad_norm": 0.56640625, "learning_rate": 0.00036676727156508454, "loss": 0.2661, "step": 250170 }, { "epoch": 10.36, "grad_norm": 1.1171875, "learning_rate": 0.0003667576819081002, "loss": 0.1973, "step": 250180 }, { "epoch": 10.36, "grad_norm": 1.046875, "learning_rate": 0.0003667480920313905, "loss": 0.2273, "step": 250190 }, { "epoch": 10.36, "grad_norm": 0.376953125, "learning_rate": 0.00036673850193497354, "loss": 0.1507, "step": 250200 }, { "epoch": 10.36, "grad_norm": 0.306640625, "learning_rate": 0.00036672891161886723, "loss": 0.1526, "step": 250210 }, { "epoch": 10.36, "grad_norm": 0.6875, "learning_rate": 0.0003667193210830898, "loss": 0.1979, "step": 250220 }, { "epoch": 10.36, "grad_norm": 1.1171875, "learning_rate": 0.0003667097303276592, "loss": 0.1657, "step": 250230 }, { "epoch": 10.36, "grad_norm": 1.046875, "learning_rate": 0.00036670013935259345, "loss": 0.2044, "step": 250240 }, { "epoch": 10.37, "grad_norm": 0.46875, "learning_rate": 0.0003666905481579107, "loss": 0.1864, "step": 250250 }, { "epoch": 10.37, "grad_norm": 1.1328125, "learning_rate": 0.0003666809567436289, "loss": 0.217, "step": 250260 }, { "epoch": 10.37, "grad_norm": 0.376953125, "learning_rate": 0.00036667136510976615, "loss": 0.191, "step": 250270 }, { "epoch": 10.37, "grad_norm": 0.78515625, "learning_rate": 0.00036666177325634053, "loss": 0.2022, "step": 250280 }, { "epoch": 10.37, "grad_norm": 0.84375, "learning_rate": 0.00036665218118337, "loss": 0.1953, "step": 250290 }, { "epoch": 10.37, "grad_norm": 2.453125, "learning_rate": 0.0003666425888908727, "loss": 0.1947, "step": 250300 }, { "epoch": 10.37, "grad_norm": 0.83203125, "learning_rate": 0.00036663299637886655, "loss": 0.2438, "step": 250310 }, { "epoch": 10.37, "grad_norm": 0.60546875, "learning_rate": 0.0003666234036473698, "loss": 0.2689, "step": 250320 }, { "epoch": 10.37, "grad_norm": 1.0625, "learning_rate": 0.00036661381069640037, "loss": 0.2285, "step": 250330 }, { "epoch": 10.37, "grad_norm": 0.361328125, "learning_rate": 0.00036660421752597626, "loss": 0.2229, "step": 250340 }, { "epoch": 10.37, "grad_norm": 1.3125, "learning_rate": 0.0003665946241361157, "loss": 0.2377, "step": 250350 }, { "epoch": 10.37, "grad_norm": 0.578125, "learning_rate": 0.00036658503052683665, "loss": 0.2119, "step": 250360 }, { "epoch": 10.37, "grad_norm": 0.86328125, "learning_rate": 0.0003665754366981572, "loss": 0.2042, "step": 250370 }, { "epoch": 10.37, "grad_norm": 0.82421875, "learning_rate": 0.0003665658426500953, "loss": 0.2112, "step": 250380 }, { "epoch": 10.37, "grad_norm": 0.73828125, "learning_rate": 0.00036655624838266907, "loss": 0.2072, "step": 250390 }, { "epoch": 10.37, "grad_norm": 0.62109375, "learning_rate": 0.0003665466538958965, "loss": 0.142, "step": 250400 }, { "epoch": 10.37, "grad_norm": 0.703125, "learning_rate": 0.00036653705918979586, "loss": 0.2455, "step": 250410 }, { "epoch": 10.37, "grad_norm": 0.46484375, "learning_rate": 0.000366527464264385, "loss": 0.1949, "step": 250420 }, { "epoch": 10.37, "grad_norm": 0.484375, "learning_rate": 0.000366517869119682, "loss": 0.199, "step": 250430 }, { "epoch": 10.37, "grad_norm": 0.78515625, "learning_rate": 0.00036650827375570494, "loss": 0.1711, "step": 250440 }, { "epoch": 10.37, "grad_norm": 0.91015625, "learning_rate": 0.00036649867817247194, "loss": 0.1869, "step": 250450 }, { "epoch": 10.37, "grad_norm": 1.953125, "learning_rate": 0.000366489082370001, "loss": 0.1704, "step": 250460 }, { "epoch": 10.37, "grad_norm": 1.203125, "learning_rate": 0.00036647948634831016, "loss": 0.1745, "step": 250470 }, { "epoch": 10.37, "grad_norm": 1.203125, "learning_rate": 0.00036646989010741753, "loss": 0.1791, "step": 250480 }, { "epoch": 10.38, "grad_norm": 0.8359375, "learning_rate": 0.00036646029364734113, "loss": 0.1985, "step": 250490 }, { "epoch": 10.38, "grad_norm": 0.9453125, "learning_rate": 0.0003664506969680991, "loss": 0.2019, "step": 250500 }, { "epoch": 10.38, "grad_norm": 1.0859375, "learning_rate": 0.00036644110006970937, "loss": 0.2019, "step": 250510 }, { "epoch": 10.38, "grad_norm": 0.703125, "learning_rate": 0.00036643150295219006, "loss": 0.21, "step": 250520 }, { "epoch": 10.38, "grad_norm": 1.1328125, "learning_rate": 0.0003664219056155593, "loss": 0.2057, "step": 250530 }, { "epoch": 10.38, "grad_norm": 1.5390625, "learning_rate": 0.00036641230805983506, "loss": 0.2471, "step": 250540 }, { "epoch": 10.38, "grad_norm": 1.1875, "learning_rate": 0.00036640271028503537, "loss": 0.1603, "step": 250550 }, { "epoch": 10.38, "grad_norm": 1.859375, "learning_rate": 0.0003663931122911784, "loss": 0.2086, "step": 250560 }, { "epoch": 10.38, "grad_norm": 0.72265625, "learning_rate": 0.0003663835140782821, "loss": 0.206, "step": 250570 }, { "epoch": 10.38, "grad_norm": 1.6640625, "learning_rate": 0.0003663739156463647, "loss": 0.2105, "step": 250580 }, { "epoch": 10.38, "grad_norm": 0.84375, "learning_rate": 0.0003663643169954441, "loss": 0.1208, "step": 250590 }, { "epoch": 10.38, "grad_norm": 0.376953125, "learning_rate": 0.0003663547181255384, "loss": 0.1851, "step": 250600 }, { "epoch": 10.38, "grad_norm": 0.95703125, "learning_rate": 0.0003663451190366657, "loss": 0.2153, "step": 250610 }, { "epoch": 10.38, "grad_norm": 0.5625, "learning_rate": 0.000366335519728844, "loss": 0.1896, "step": 250620 }, { "epoch": 10.38, "grad_norm": 1.2109375, "learning_rate": 0.00036632592020209153, "loss": 0.191, "step": 250630 }, { "epoch": 10.38, "grad_norm": 0.51171875, "learning_rate": 0.00036631632045642615, "loss": 0.1944, "step": 250640 }, { "epoch": 10.38, "grad_norm": 0.734375, "learning_rate": 0.0003663067204918661, "loss": 0.2348, "step": 250650 }, { "epoch": 10.38, "grad_norm": 0.7265625, "learning_rate": 0.0003662971203084293, "loss": 0.1528, "step": 250660 }, { "epoch": 10.38, "grad_norm": 1.5234375, "learning_rate": 0.0003662875199061339, "loss": 0.1882, "step": 250670 }, { "epoch": 10.38, "grad_norm": 0.41796875, "learning_rate": 0.00036627791928499793, "loss": 0.188, "step": 250680 }, { "epoch": 10.38, "grad_norm": 0.78515625, "learning_rate": 0.00036626831844503943, "loss": 0.1854, "step": 250690 }, { "epoch": 10.38, "grad_norm": 0.3515625, "learning_rate": 0.0003662587173862766, "loss": 0.2261, "step": 250700 }, { "epoch": 10.38, "grad_norm": 0.275390625, "learning_rate": 0.00036624911610872737, "loss": 0.1652, "step": 250710 }, { "epoch": 10.38, "grad_norm": 3.453125, "learning_rate": 0.0003662395146124099, "loss": 0.2694, "step": 250720 }, { "epoch": 10.39, "grad_norm": 0.62109375, "learning_rate": 0.0003662299128973421, "loss": 0.1484, "step": 250730 }, { "epoch": 10.39, "grad_norm": 0.59375, "learning_rate": 0.0003662203109635423, "loss": 0.2195, "step": 250740 }, { "epoch": 10.39, "grad_norm": 0.69921875, "learning_rate": 0.0003662107088110284, "loss": 0.2088, "step": 250750 }, { "epoch": 10.39, "grad_norm": 1.015625, "learning_rate": 0.0003662011064398184, "loss": 0.1431, "step": 250760 }, { "epoch": 10.39, "grad_norm": 0.6640625, "learning_rate": 0.0003661915038499305, "loss": 0.2218, "step": 250770 }, { "epoch": 10.39, "grad_norm": 0.380859375, "learning_rate": 0.00036618190104138276, "loss": 0.1489, "step": 250780 }, { "epoch": 10.39, "grad_norm": 0.375, "learning_rate": 0.0003661722980141933, "loss": 0.1938, "step": 250790 }, { "epoch": 10.39, "grad_norm": 1.6796875, "learning_rate": 0.00036616269476838004, "loss": 0.1494, "step": 250800 }, { "epoch": 10.39, "grad_norm": 1.4765625, "learning_rate": 0.0003661530913039611, "loss": 0.1965, "step": 250810 }, { "epoch": 10.39, "grad_norm": 2.515625, "learning_rate": 0.00036614348762095466, "loss": 0.2215, "step": 250820 }, { "epoch": 10.39, "grad_norm": 1.25, "learning_rate": 0.0003661338837193787, "loss": 0.1179, "step": 250830 }, { "epoch": 10.39, "grad_norm": 0.0, "learning_rate": 0.00036612427959925126, "loss": 0.1833, "step": 250840 }, { "epoch": 10.39, "grad_norm": 0.81640625, "learning_rate": 0.0003661146752605905, "loss": 0.2224, "step": 250850 }, { "epoch": 10.39, "grad_norm": 0.5625, "learning_rate": 0.00036610507070341446, "loss": 0.1669, "step": 250860 }, { "epoch": 10.39, "grad_norm": 0.5390625, "learning_rate": 0.00036609546592774125, "loss": 0.2053, "step": 250870 }, { "epoch": 10.39, "grad_norm": 0.490234375, "learning_rate": 0.00036608586093358884, "loss": 0.2221, "step": 250880 }, { "epoch": 10.39, "grad_norm": 0.15625, "learning_rate": 0.0003660762557209755, "loss": 0.1431, "step": 250890 }, { "epoch": 10.39, "grad_norm": 1.9765625, "learning_rate": 0.0003660666502899191, "loss": 0.1708, "step": 250900 }, { "epoch": 10.39, "grad_norm": 2.46875, "learning_rate": 0.0003660570446404377, "loss": 0.2015, "step": 250910 }, { "epoch": 10.39, "grad_norm": 1.2734375, "learning_rate": 0.00036604743877254964, "loss": 0.199, "step": 250920 }, { "epoch": 10.39, "grad_norm": 1.296875, "learning_rate": 0.0003660378326862728, "loss": 0.2197, "step": 250930 }, { "epoch": 10.39, "grad_norm": 2.0, "learning_rate": 0.0003660282263816252, "loss": 0.191, "step": 250940 }, { "epoch": 10.39, "grad_norm": 0.55078125, "learning_rate": 0.0003660186198586251, "loss": 0.1827, "step": 250950 }, { "epoch": 10.39, "grad_norm": 0.515625, "learning_rate": 0.00036600901311729044, "loss": 0.1751, "step": 250960 }, { "epoch": 10.4, "grad_norm": 0.75390625, "learning_rate": 0.0003659994061576393, "loss": 0.253, "step": 250970 }, { "epoch": 10.4, "grad_norm": 0.84765625, "learning_rate": 0.0003659897989796899, "loss": 0.2317, "step": 250980 }, { "epoch": 10.4, "grad_norm": 0.68359375, "learning_rate": 0.0003659801915834602, "loss": 0.194, "step": 250990 }, { "epoch": 10.4, "grad_norm": 0.70703125, "learning_rate": 0.0003659705839689683, "loss": 0.1828, "step": 251000 }, { "epoch": 10.4, "grad_norm": 0.7734375, "learning_rate": 0.00036596097613623226, "loss": 0.16, "step": 251010 }, { "epoch": 10.4, "grad_norm": 0.96875, "learning_rate": 0.0003659513680852702, "loss": 0.1975, "step": 251020 }, { "epoch": 10.4, "grad_norm": 0.75, "learning_rate": 0.0003659417598161002, "loss": 0.2029, "step": 251030 }, { "epoch": 10.4, "grad_norm": 1.25, "learning_rate": 0.0003659321513287403, "loss": 0.1425, "step": 251040 }, { "epoch": 10.4, "grad_norm": 0.33984375, "learning_rate": 0.0003659225426232086, "loss": 0.187, "step": 251050 }, { "epoch": 10.4, "grad_norm": 0.353515625, "learning_rate": 0.00036591293369952327, "loss": 0.2291, "step": 251060 }, { "epoch": 10.4, "grad_norm": 1.0078125, "learning_rate": 0.0003659033245577022, "loss": 0.1375, "step": 251070 }, { "epoch": 10.4, "grad_norm": 0.78515625, "learning_rate": 0.0003658937151977637, "loss": 0.1502, "step": 251080 }, { "epoch": 10.4, "grad_norm": 0.67578125, "learning_rate": 0.00036588410561972566, "loss": 0.1486, "step": 251090 }, { "epoch": 10.4, "grad_norm": 1.421875, "learning_rate": 0.0003658744958236063, "loss": 0.2039, "step": 251100 }, { "epoch": 10.4, "grad_norm": 1.3203125, "learning_rate": 0.0003658648858094237, "loss": 0.1694, "step": 251110 }, { "epoch": 10.4, "grad_norm": 0.38671875, "learning_rate": 0.0003658552755771957, "loss": 0.1933, "step": 251120 }, { "epoch": 10.4, "grad_norm": 0.83203125, "learning_rate": 0.0003658456651269408, "loss": 0.2828, "step": 251130 }, { "epoch": 10.4, "grad_norm": 1.4375, "learning_rate": 0.00036583605445867685, "loss": 0.2034, "step": 251140 }, { "epoch": 10.4, "grad_norm": 1.1171875, "learning_rate": 0.00036582644357242177, "loss": 0.2193, "step": 251150 }, { "epoch": 10.4, "grad_norm": 0.349609375, "learning_rate": 0.000365816832468194, "loss": 0.1368, "step": 251160 }, { "epoch": 10.4, "grad_norm": 0.4765625, "learning_rate": 0.0003658072211460114, "loss": 0.2013, "step": 251170 }, { "epoch": 10.4, "grad_norm": 0.2236328125, "learning_rate": 0.00036579760960589214, "loss": 0.2164, "step": 251180 }, { "epoch": 10.4, "grad_norm": 0.6171875, "learning_rate": 0.0003657879978478543, "loss": 0.2202, "step": 251190 }, { "epoch": 10.4, "grad_norm": 1.1015625, "learning_rate": 0.0003657783858719159, "loss": 0.1775, "step": 251200 }, { "epoch": 10.41, "grad_norm": 0.8515625, "learning_rate": 0.0003657687736780951, "loss": 0.1941, "step": 251210 }, { "epoch": 10.41, "grad_norm": 0.734375, "learning_rate": 0.00036575916126641, "loss": 0.2041, "step": 251220 }, { "epoch": 10.41, "grad_norm": 1.0234375, "learning_rate": 0.0003657495486368786, "loss": 0.2287, "step": 251230 }, { "epoch": 10.41, "grad_norm": 0.60546875, "learning_rate": 0.0003657399357895191, "loss": 0.2088, "step": 251240 }, { "epoch": 10.41, "grad_norm": 0.76171875, "learning_rate": 0.00036573032272434945, "loss": 0.1492, "step": 251250 }, { "epoch": 10.41, "grad_norm": 1.0078125, "learning_rate": 0.0003657207094413879, "loss": 0.1832, "step": 251260 }, { "epoch": 10.41, "grad_norm": 0.87109375, "learning_rate": 0.00036571109594065257, "loss": 0.1795, "step": 251270 }, { "epoch": 10.41, "grad_norm": 0.546875, "learning_rate": 0.00036570148222216125, "loss": 0.191, "step": 251280 }, { "epoch": 10.41, "grad_norm": 0.68359375, "learning_rate": 0.00036569186828593243, "loss": 0.1982, "step": 251290 }, { "epoch": 10.41, "grad_norm": 0.92578125, "learning_rate": 0.0003656822541319839, "loss": 0.197, "step": 251300 }, { "epoch": 10.41, "grad_norm": 1.8203125, "learning_rate": 0.00036567263976033384, "loss": 0.2386, "step": 251310 }, { "epoch": 10.41, "grad_norm": 0.9375, "learning_rate": 0.0003656630251710005, "loss": 0.2239, "step": 251320 }, { "epoch": 10.41, "grad_norm": 0.60546875, "learning_rate": 0.0003656534103640017, "loss": 0.2058, "step": 251330 }, { "epoch": 10.41, "grad_norm": 0.82421875, "learning_rate": 0.00036564379533935565, "loss": 0.1833, "step": 251340 }, { "epoch": 10.41, "grad_norm": 0.5625, "learning_rate": 0.00036563418009708063, "loss": 0.2033, "step": 251350 }, { "epoch": 10.41, "grad_norm": 0.58984375, "learning_rate": 0.0003656245646371944, "loss": 0.2243, "step": 251360 }, { "epoch": 10.41, "grad_norm": 1.28125, "learning_rate": 0.00036561494895971533, "loss": 0.1863, "step": 251370 }, { "epoch": 10.41, "grad_norm": 0.81640625, "learning_rate": 0.0003656053330646614, "loss": 0.1604, "step": 251380 }, { "epoch": 10.41, "grad_norm": 1.5078125, "learning_rate": 0.00036559571695205065, "loss": 0.1709, "step": 251390 }, { "epoch": 10.41, "grad_norm": 0.85546875, "learning_rate": 0.00036558610062190127, "loss": 0.2105, "step": 251400 }, { "epoch": 10.41, "grad_norm": 1.296875, "learning_rate": 0.0003655764840742313, "loss": 0.2034, "step": 251410 }, { "epoch": 10.41, "grad_norm": 0.8671875, "learning_rate": 0.000365566867309059, "loss": 0.1854, "step": 251420 }, { "epoch": 10.41, "grad_norm": 1.3046875, "learning_rate": 0.00036555725032640226, "loss": 0.1952, "step": 251430 }, { "epoch": 10.41, "grad_norm": 0.60546875, "learning_rate": 0.00036554763312627913, "loss": 0.2021, "step": 251440 }, { "epoch": 10.42, "grad_norm": 0.59765625, "learning_rate": 0.00036553801570870804, "loss": 0.179, "step": 251450 }, { "epoch": 10.42, "grad_norm": 0.5859375, "learning_rate": 0.00036552839807370676, "loss": 0.2208, "step": 251460 }, { "epoch": 10.42, "grad_norm": 0.96484375, "learning_rate": 0.00036551878022129356, "loss": 0.1807, "step": 251470 }, { "epoch": 10.42, "grad_norm": 1.3984375, "learning_rate": 0.00036550916215148644, "loss": 0.1876, "step": 251480 }, { "epoch": 10.42, "grad_norm": 1.1640625, "learning_rate": 0.0003654995438643036, "loss": 0.1709, "step": 251490 }, { "epoch": 10.42, "grad_norm": 0.921875, "learning_rate": 0.00036548992535976303, "loss": 0.2167, "step": 251500 }, { "epoch": 10.42, "grad_norm": 1.203125, "learning_rate": 0.000365480306637883, "loss": 0.205, "step": 251510 }, { "epoch": 10.42, "grad_norm": 1.03125, "learning_rate": 0.0003654706876986814, "loss": 0.1951, "step": 251520 }, { "epoch": 10.42, "grad_norm": 0.67578125, "learning_rate": 0.00036546106854217647, "loss": 0.1804, "step": 251530 }, { "epoch": 10.42, "grad_norm": 1.890625, "learning_rate": 0.0003654514491683863, "loss": 0.2156, "step": 251540 }, { "epoch": 10.42, "grad_norm": 0.6171875, "learning_rate": 0.0003654418295773289, "loss": 0.2361, "step": 251550 }, { "epoch": 10.42, "grad_norm": 0.5625, "learning_rate": 0.0003654322097690226, "loss": 0.2104, "step": 251560 }, { "epoch": 10.42, "grad_norm": 0.59765625, "learning_rate": 0.0003654225897434852, "loss": 0.2008, "step": 251570 }, { "epoch": 10.42, "grad_norm": 0.71875, "learning_rate": 0.000365412969500735, "loss": 0.218, "step": 251580 }, { "epoch": 10.42, "grad_norm": 0.29296875, "learning_rate": 0.00036540334904079007, "loss": 0.215, "step": 251590 }, { "epoch": 10.42, "grad_norm": 0.37109375, "learning_rate": 0.0003653937283636684, "loss": 0.2549, "step": 251600 }, { "epoch": 10.42, "grad_norm": 0.890625, "learning_rate": 0.0003653841074693883, "loss": 0.189, "step": 251610 }, { "epoch": 10.42, "grad_norm": 0.73828125, "learning_rate": 0.00036537448635796775, "loss": 0.1813, "step": 251620 }, { "epoch": 10.42, "grad_norm": 0.41015625, "learning_rate": 0.00036536486502942485, "loss": 0.1729, "step": 251630 }, { "epoch": 10.42, "grad_norm": 0.75, "learning_rate": 0.0003653552434837778, "loss": 0.1767, "step": 251640 }, { "epoch": 10.42, "grad_norm": 0.58203125, "learning_rate": 0.0003653456217210446, "loss": 0.2394, "step": 251650 }, { "epoch": 10.42, "grad_norm": 0.19921875, "learning_rate": 0.0003653359997412433, "loss": 0.139, "step": 251660 }, { "epoch": 10.42, "grad_norm": 0.0, "learning_rate": 0.00036532637754439225, "loss": 0.1845, "step": 251670 }, { "epoch": 10.42, "grad_norm": 0.6640625, "learning_rate": 0.0003653167551305093, "loss": 0.2111, "step": 251680 }, { "epoch": 10.42, "grad_norm": 1.015625, "learning_rate": 0.0003653071324996128, "loss": 0.2199, "step": 251690 }, { "epoch": 10.43, "grad_norm": 0.671875, "learning_rate": 0.0003652975096517206, "loss": 0.1665, "step": 251700 }, { "epoch": 10.43, "grad_norm": 1.015625, "learning_rate": 0.000365287886586851, "loss": 0.199, "step": 251710 }, { "epoch": 10.43, "grad_norm": 0.75390625, "learning_rate": 0.00036527826330502207, "loss": 0.2334, "step": 251720 }, { "epoch": 10.43, "grad_norm": 0.5703125, "learning_rate": 0.0003652686398062518, "loss": 0.1754, "step": 251730 }, { "epoch": 10.43, "grad_norm": 0.64453125, "learning_rate": 0.00036525901609055846, "loss": 0.225, "step": 251740 }, { "epoch": 10.43, "grad_norm": 0.953125, "learning_rate": 0.00036524939215796013, "loss": 0.1997, "step": 251750 }, { "epoch": 10.43, "grad_norm": 3.0, "learning_rate": 0.0003652397680084748, "loss": 0.2244, "step": 251760 }, { "epoch": 10.43, "grad_norm": 0.86328125, "learning_rate": 0.0003652301436421208, "loss": 0.2283, "step": 251770 }, { "epoch": 10.43, "grad_norm": 1.4296875, "learning_rate": 0.000365220519058916, "loss": 0.1747, "step": 251780 }, { "epoch": 10.43, "grad_norm": 2.21875, "learning_rate": 0.0003652108942588787, "loss": 0.2236, "step": 251790 }, { "epoch": 10.43, "grad_norm": 0.58203125, "learning_rate": 0.0003652012692420269, "loss": 0.1365, "step": 251800 }, { "epoch": 10.43, "grad_norm": 1.078125, "learning_rate": 0.0003651916440083788, "loss": 0.1819, "step": 251810 }, { "epoch": 10.43, "grad_norm": 2.546875, "learning_rate": 0.0003651820185579524, "loss": 0.2191, "step": 251820 }, { "epoch": 10.43, "grad_norm": 0.8125, "learning_rate": 0.00036517239289076585, "loss": 0.2075, "step": 251830 }, { "epoch": 10.43, "grad_norm": 0.60546875, "learning_rate": 0.00036516276700683737, "loss": 0.2445, "step": 251840 }, { "epoch": 10.43, "grad_norm": 0.94140625, "learning_rate": 0.000365153140906185, "loss": 0.2043, "step": 251850 }, { "epoch": 10.43, "grad_norm": 0.8984375, "learning_rate": 0.00036514351458882686, "loss": 0.2391, "step": 251860 }, { "epoch": 10.43, "grad_norm": 0.486328125, "learning_rate": 0.000365133888054781, "loss": 0.1951, "step": 251870 }, { "epoch": 10.43, "grad_norm": 0.64453125, "learning_rate": 0.0003651242613040656, "loss": 0.1949, "step": 251880 }, { "epoch": 10.43, "grad_norm": 0.74609375, "learning_rate": 0.0003651146343366988, "loss": 0.2097, "step": 251890 }, { "epoch": 10.43, "grad_norm": 1.171875, "learning_rate": 0.00036510500715269874, "loss": 0.1774, "step": 251900 }, { "epoch": 10.43, "grad_norm": 0.71875, "learning_rate": 0.0003650953797520834, "loss": 0.1751, "step": 251910 }, { "epoch": 10.43, "grad_norm": 0.69140625, "learning_rate": 0.000365085752134871, "loss": 0.182, "step": 251920 }, { "epoch": 10.43, "grad_norm": 1.3828125, "learning_rate": 0.0003650761243010797, "loss": 0.1927, "step": 251930 }, { "epoch": 10.44, "grad_norm": 0.8359375, "learning_rate": 0.0003650664962507275, "loss": 0.1981, "step": 251940 }, { "epoch": 10.44, "grad_norm": 0.62109375, "learning_rate": 0.00036505686798383265, "loss": 0.1909, "step": 251950 }, { "epoch": 10.44, "grad_norm": 1.3828125, "learning_rate": 0.0003650472395004131, "loss": 0.2098, "step": 251960 }, { "epoch": 10.44, "grad_norm": 1.0078125, "learning_rate": 0.0003650376108004872, "loss": 0.2173, "step": 251970 }, { "epoch": 10.44, "grad_norm": 0.828125, "learning_rate": 0.00036502798188407283, "loss": 0.194, "step": 251980 }, { "epoch": 10.44, "grad_norm": 0.9609375, "learning_rate": 0.0003650183527511882, "loss": 0.2559, "step": 251990 }, { "epoch": 10.44, "grad_norm": 0.0, "learning_rate": 0.00036500872340185155, "loss": 0.1704, "step": 252000 }, { "epoch": 10.44, "grad_norm": 1.75, "learning_rate": 0.0003649990938360808, "loss": 0.2076, "step": 252010 }, { "epoch": 10.44, "grad_norm": 0.94140625, "learning_rate": 0.00036498946405389435, "loss": 0.2251, "step": 252020 }, { "epoch": 10.44, "grad_norm": 0.4765625, "learning_rate": 0.00036497983405531, "loss": 0.2249, "step": 252030 }, { "epoch": 10.44, "grad_norm": 1.921875, "learning_rate": 0.000364970203840346, "loss": 0.1707, "step": 252040 }, { "epoch": 10.44, "grad_norm": 0.46875, "learning_rate": 0.00036496057340902057, "loss": 0.1813, "step": 252050 }, { "epoch": 10.44, "grad_norm": 0.4140625, "learning_rate": 0.0003649509427613517, "loss": 0.1804, "step": 252060 }, { "epoch": 10.44, "grad_norm": 0.76171875, "learning_rate": 0.0003649413118973576, "loss": 0.1678, "step": 252070 }, { "epoch": 10.44, "grad_norm": 0.578125, "learning_rate": 0.00036493168081705636, "loss": 0.1918, "step": 252080 }, { "epoch": 10.44, "grad_norm": 0.353515625, "learning_rate": 0.00036492204952046607, "loss": 0.2453, "step": 252090 }, { "epoch": 10.44, "grad_norm": 0.68359375, "learning_rate": 0.000364912418007605, "loss": 0.2289, "step": 252100 }, { "epoch": 10.44, "grad_norm": 0.65234375, "learning_rate": 0.00036490278627849106, "loss": 0.1985, "step": 252110 }, { "epoch": 10.44, "grad_norm": 1.578125, "learning_rate": 0.00036489315433314254, "loss": 0.2075, "step": 252120 }, { "epoch": 10.44, "grad_norm": 1.5703125, "learning_rate": 0.0003648835221715775, "loss": 0.1693, "step": 252130 }, { "epoch": 10.44, "grad_norm": 1.7734375, "learning_rate": 0.0003648738897938141, "loss": 0.2553, "step": 252140 }, { "epoch": 10.44, "grad_norm": 0.64453125, "learning_rate": 0.00036486425719987035, "loss": 0.1956, "step": 252150 }, { "epoch": 10.44, "grad_norm": 1.2421875, "learning_rate": 0.00036485462438976457, "loss": 0.2814, "step": 252160 }, { "epoch": 10.44, "grad_norm": 1.0625, "learning_rate": 0.00036484499136351474, "loss": 0.1532, "step": 252170 }, { "epoch": 10.45, "grad_norm": 0.56640625, "learning_rate": 0.0003648353581211391, "loss": 0.2507, "step": 252180 }, { "epoch": 10.45, "grad_norm": 1.3359375, "learning_rate": 0.0003648257246626556, "loss": 0.2205, "step": 252190 }, { "epoch": 10.45, "grad_norm": 0.61328125, "learning_rate": 0.0003648160909880826, "loss": 0.1852, "step": 252200 }, { "epoch": 10.45, "grad_norm": 0.7890625, "learning_rate": 0.0003648064570974381, "loss": 0.1953, "step": 252210 }, { "epoch": 10.45, "grad_norm": 0.3359375, "learning_rate": 0.0003647968229907402, "loss": 0.2004, "step": 252220 }, { "epoch": 10.45, "grad_norm": 0.7578125, "learning_rate": 0.0003647871886680071, "loss": 0.1809, "step": 252230 }, { "epoch": 10.45, "grad_norm": 0.7421875, "learning_rate": 0.0003647775541292569, "loss": 0.1938, "step": 252240 }, { "epoch": 10.45, "grad_norm": 0.4140625, "learning_rate": 0.00036476791937450773, "loss": 0.1692, "step": 252250 }, { "epoch": 10.45, "grad_norm": 1.390625, "learning_rate": 0.0003647582844037778, "loss": 0.1781, "step": 252260 }, { "epoch": 10.45, "grad_norm": 0.392578125, "learning_rate": 0.00036474864921708506, "loss": 0.2244, "step": 252270 }, { "epoch": 10.45, "grad_norm": 0.57421875, "learning_rate": 0.00036473901381444784, "loss": 0.2067, "step": 252280 }, { "epoch": 10.45, "grad_norm": 0.5546875, "learning_rate": 0.00036472937819588417, "loss": 0.2221, "step": 252290 }, { "epoch": 10.45, "grad_norm": 1.1875, "learning_rate": 0.00036471974236141215, "loss": 0.1781, "step": 252300 }, { "epoch": 10.45, "grad_norm": 0.609375, "learning_rate": 0.00036471010631105, "loss": 0.1765, "step": 252310 }, { "epoch": 10.45, "grad_norm": 1.5078125, "learning_rate": 0.0003647004700448158, "loss": 0.2304, "step": 252320 }, { "epoch": 10.45, "grad_norm": 0.7890625, "learning_rate": 0.0003646908335627277, "loss": 0.1893, "step": 252330 }, { "epoch": 10.45, "grad_norm": 0.7421875, "learning_rate": 0.0003646811968648039, "loss": 0.1863, "step": 252340 }, { "epoch": 10.45, "grad_norm": 0.50390625, "learning_rate": 0.0003646715599510624, "loss": 0.2033, "step": 252350 }, { "epoch": 10.45, "grad_norm": 0.6015625, "learning_rate": 0.00036466192282152147, "loss": 0.1803, "step": 252360 }, { "epoch": 10.45, "grad_norm": 1.1328125, "learning_rate": 0.00036465228547619914, "loss": 0.222, "step": 252370 }, { "epoch": 10.45, "grad_norm": 0.66796875, "learning_rate": 0.0003646426479151136, "loss": 0.189, "step": 252380 }, { "epoch": 10.45, "grad_norm": 0.85546875, "learning_rate": 0.00036463301013828296, "loss": 0.2191, "step": 252390 }, { "epoch": 10.45, "grad_norm": 0.7109375, "learning_rate": 0.0003646233721457254, "loss": 0.1991, "step": 252400 }, { "epoch": 10.45, "grad_norm": 0.8671875, "learning_rate": 0.000364613733937459, "loss": 0.1665, "step": 252410 }, { "epoch": 10.46, "grad_norm": 0.66796875, "learning_rate": 0.000364604095513502, "loss": 0.2012, "step": 252420 }, { "epoch": 10.46, "grad_norm": 0.83984375, "learning_rate": 0.0003645944568738724, "loss": 0.1808, "step": 252430 }, { "epoch": 10.46, "grad_norm": 0.63671875, "learning_rate": 0.00036458481801858846, "loss": 0.2274, "step": 252440 }, { "epoch": 10.46, "grad_norm": 0.546875, "learning_rate": 0.00036457517894766816, "loss": 0.1896, "step": 252450 }, { "epoch": 10.46, "grad_norm": 0.40234375, "learning_rate": 0.0003645655396611298, "loss": 0.2186, "step": 252460 }, { "epoch": 10.46, "grad_norm": 0.94140625, "learning_rate": 0.00036455590015899153, "loss": 0.2247, "step": 252470 }, { "epoch": 10.46, "grad_norm": 0.87890625, "learning_rate": 0.00036454626044127135, "loss": 0.2088, "step": 252480 }, { "epoch": 10.46, "grad_norm": 0.66015625, "learning_rate": 0.00036453662050798754, "loss": 0.2064, "step": 252490 }, { "epoch": 10.46, "grad_norm": 0.37890625, "learning_rate": 0.0003645269803591582, "loss": 0.2244, "step": 252500 }, { "epoch": 10.46, "grad_norm": 1.0234375, "learning_rate": 0.0003645173399948013, "loss": 0.1879, "step": 252510 }, { "epoch": 10.46, "grad_norm": 0.8984375, "learning_rate": 0.0003645076994149353, "loss": 0.1914, "step": 252520 }, { "epoch": 10.46, "grad_norm": 0.671875, "learning_rate": 0.0003644980586195781, "loss": 0.2014, "step": 252530 }, { "epoch": 10.46, "grad_norm": 1.8671875, "learning_rate": 0.0003644884176087479, "loss": 0.1746, "step": 252540 }, { "epoch": 10.46, "grad_norm": 0.396484375, "learning_rate": 0.000364478776382463, "loss": 0.2474, "step": 252550 }, { "epoch": 10.46, "grad_norm": 0.7109375, "learning_rate": 0.00036446913494074117, "loss": 0.1915, "step": 252560 }, { "epoch": 10.46, "grad_norm": 0.53515625, "learning_rate": 0.00036445949328360095, "loss": 0.2217, "step": 252570 }, { "epoch": 10.46, "grad_norm": 0.2412109375, "learning_rate": 0.00036444985141106034, "loss": 0.1765, "step": 252580 }, { "epoch": 10.46, "grad_norm": 0.53515625, "learning_rate": 0.0003644402093231374, "loss": 0.1833, "step": 252590 }, { "epoch": 10.46, "grad_norm": 0.3125, "learning_rate": 0.00036443056701985035, "loss": 0.193, "step": 252600 }, { "epoch": 10.46, "grad_norm": 0.6875, "learning_rate": 0.00036442092450121735, "loss": 0.1668, "step": 252610 }, { "epoch": 10.46, "grad_norm": 0.703125, "learning_rate": 0.0003644112817672565, "loss": 0.198, "step": 252620 }, { "epoch": 10.46, "grad_norm": 0.62109375, "learning_rate": 0.000364401638817986, "loss": 0.2103, "step": 252630 }, { "epoch": 10.46, "grad_norm": 2.71875, "learning_rate": 0.00036439199565342395, "loss": 0.1979, "step": 252640 }, { "epoch": 10.46, "grad_norm": 0.9296875, "learning_rate": 0.0003643823522735885, "loss": 0.2515, "step": 252650 }, { "epoch": 10.47, "grad_norm": 0.78125, "learning_rate": 0.0003643727086784979, "loss": 0.1711, "step": 252660 }, { "epoch": 10.47, "grad_norm": 0.87109375, "learning_rate": 0.0003643630648681701, "loss": 0.2317, "step": 252670 }, { "epoch": 10.47, "grad_norm": 1.6015625, "learning_rate": 0.0003643534208426235, "loss": 0.1947, "step": 252680 }, { "epoch": 10.47, "grad_norm": 0.56640625, "learning_rate": 0.000364343776601876, "loss": 0.1823, "step": 252690 }, { "epoch": 10.47, "grad_norm": 0.72265625, "learning_rate": 0.00036433413214594584, "loss": 0.1988, "step": 252700 }, { "epoch": 10.47, "grad_norm": 0.416015625, "learning_rate": 0.0003643244874748513, "loss": 0.1394, "step": 252710 }, { "epoch": 10.47, "grad_norm": 0.51953125, "learning_rate": 0.00036431484258861034, "loss": 0.1583, "step": 252720 }, { "epoch": 10.47, "grad_norm": 0.388671875, "learning_rate": 0.00036430519748724123, "loss": 0.2223, "step": 252730 }, { "epoch": 10.47, "grad_norm": 1.7734375, "learning_rate": 0.0003642955521707621, "loss": 0.2206, "step": 252740 }, { "epoch": 10.47, "grad_norm": 1.1015625, "learning_rate": 0.00036428590663919097, "loss": 0.1767, "step": 252750 }, { "epoch": 10.47, "grad_norm": 1.40625, "learning_rate": 0.00036427626089254625, "loss": 0.2165, "step": 252760 }, { "epoch": 10.47, "grad_norm": 0.50390625, "learning_rate": 0.00036426661493084585, "loss": 0.1831, "step": 252770 }, { "epoch": 10.47, "grad_norm": 0.427734375, "learning_rate": 0.00036425696875410805, "loss": 0.1759, "step": 252780 }, { "epoch": 10.47, "grad_norm": 1.15625, "learning_rate": 0.000364247322362351, "loss": 0.1555, "step": 252790 }, { "epoch": 10.47, "grad_norm": 0.6953125, "learning_rate": 0.00036423767575559276, "loss": 0.1848, "step": 252800 }, { "epoch": 10.47, "grad_norm": 0.46484375, "learning_rate": 0.00036422802893385165, "loss": 0.1539, "step": 252810 }, { "epoch": 10.47, "grad_norm": 0.640625, "learning_rate": 0.00036421838189714566, "loss": 0.1577, "step": 252820 }, { "epoch": 10.47, "grad_norm": 0.302734375, "learning_rate": 0.00036420873464549296, "loss": 0.2089, "step": 252830 }, { "epoch": 10.47, "grad_norm": 1.34375, "learning_rate": 0.0003641990871789119, "loss": 0.1965, "step": 252840 }, { "epoch": 10.47, "grad_norm": 0.80859375, "learning_rate": 0.00036418943949742034, "loss": 0.1937, "step": 252850 }, { "epoch": 10.47, "grad_norm": 0.439453125, "learning_rate": 0.00036417979160103663, "loss": 0.1778, "step": 252860 }, { "epoch": 10.47, "grad_norm": 1.1171875, "learning_rate": 0.00036417014348977896, "loss": 0.1986, "step": 252870 }, { "epoch": 10.47, "grad_norm": 0.765625, "learning_rate": 0.00036416049516366526, "loss": 0.2413, "step": 252880 }, { "epoch": 10.47, "grad_norm": 0.99609375, "learning_rate": 0.00036415084662271403, "loss": 0.1874, "step": 252890 }, { "epoch": 10.48, "grad_norm": 0.53515625, "learning_rate": 0.00036414119786694316, "loss": 0.2144, "step": 252900 }, { "epoch": 10.48, "grad_norm": 1.046875, "learning_rate": 0.0003641315488963708, "loss": 0.1881, "step": 252910 }, { "epoch": 10.48, "grad_norm": 0.91015625, "learning_rate": 0.0003641218997110153, "loss": 0.2054, "step": 252920 }, { "epoch": 10.48, "grad_norm": 0.6796875, "learning_rate": 0.0003641122503108946, "loss": 0.24, "step": 252930 }, { "epoch": 10.48, "grad_norm": 0.94140625, "learning_rate": 0.00036410260069602704, "loss": 0.1792, "step": 252940 }, { "epoch": 10.48, "grad_norm": 2.234375, "learning_rate": 0.0003640929508664307, "loss": 0.2583, "step": 252950 }, { "epoch": 10.48, "grad_norm": 2.28125, "learning_rate": 0.0003640833008221237, "loss": 0.1762, "step": 252960 }, { "epoch": 10.48, "grad_norm": 1.1796875, "learning_rate": 0.0003640736505631243, "loss": 0.1503, "step": 252970 }, { "epoch": 10.48, "grad_norm": 1.0, "learning_rate": 0.0003640640000894506, "loss": 0.2368, "step": 252980 }, { "epoch": 10.48, "grad_norm": 0.6875, "learning_rate": 0.00036405434940112077, "loss": 0.1859, "step": 252990 }, { "epoch": 10.48, "grad_norm": 1.0859375, "learning_rate": 0.000364044698498153, "loss": 0.251, "step": 253000 }, { "epoch": 10.48, "grad_norm": 0.91015625, "learning_rate": 0.00036403504738056537, "loss": 0.2043, "step": 253010 }, { "epoch": 10.48, "grad_norm": 0.64453125, "learning_rate": 0.0003640253960483761, "loss": 0.2224, "step": 253020 }, { "epoch": 10.48, "grad_norm": 0.546875, "learning_rate": 0.00036401574450160336, "loss": 0.1891, "step": 253030 }, { "epoch": 10.48, "grad_norm": 0.8125, "learning_rate": 0.0003640060927402652, "loss": 0.2247, "step": 253040 }, { "epoch": 10.48, "grad_norm": 1.171875, "learning_rate": 0.0003639964407643801, "loss": 0.1748, "step": 253050 }, { "epoch": 10.48, "grad_norm": 0.73828125, "learning_rate": 0.00036398678857396585, "loss": 0.2256, "step": 253060 }, { "epoch": 10.48, "grad_norm": 0.82421875, "learning_rate": 0.0003639771361690408, "loss": 0.2196, "step": 253070 }, { "epoch": 10.48, "grad_norm": 0.66015625, "learning_rate": 0.0003639674835496232, "loss": 0.1815, "step": 253080 }, { "epoch": 10.48, "grad_norm": 1.4375, "learning_rate": 0.00036395783071573094, "loss": 0.2118, "step": 253090 }, { "epoch": 10.48, "grad_norm": 0.5625, "learning_rate": 0.00036394817766738244, "loss": 0.1363, "step": 253100 }, { "epoch": 10.48, "grad_norm": 1.0078125, "learning_rate": 0.00036393852440459577, "loss": 0.175, "step": 253110 }, { "epoch": 10.48, "grad_norm": 2.046875, "learning_rate": 0.00036392887092738903, "loss": 0.1944, "step": 253120 }, { "epoch": 10.48, "grad_norm": 0.828125, "learning_rate": 0.00036391921723578056, "loss": 0.1714, "step": 253130 }, { "epoch": 10.49, "grad_norm": 1.09375, "learning_rate": 0.00036390956332978835, "loss": 0.1882, "step": 253140 }, { "epoch": 10.49, "grad_norm": 1.3359375, "learning_rate": 0.00036389990920943066, "loss": 0.1898, "step": 253150 }, { "epoch": 10.49, "grad_norm": 0.7734375, "learning_rate": 0.0003638902548747257, "loss": 0.2211, "step": 253160 }, { "epoch": 10.49, "grad_norm": 0.703125, "learning_rate": 0.0003638806003256915, "loss": 0.2279, "step": 253170 }, { "epoch": 10.49, "grad_norm": 0.6484375, "learning_rate": 0.00036387094556234633, "loss": 0.185, "step": 253180 }, { "epoch": 10.49, "grad_norm": 0.45703125, "learning_rate": 0.0003638612905847084, "loss": 0.2044, "step": 253190 }, { "epoch": 10.49, "grad_norm": 0.94921875, "learning_rate": 0.0003638516353927958, "loss": 0.2263, "step": 253200 }, { "epoch": 10.49, "grad_norm": 0.54296875, "learning_rate": 0.0003638419799866267, "loss": 0.2096, "step": 253210 }, { "epoch": 10.49, "grad_norm": 0.66796875, "learning_rate": 0.00036383232436621924, "loss": 0.2143, "step": 253220 }, { "epoch": 10.49, "grad_norm": 0.27734375, "learning_rate": 0.0003638226685315916, "loss": 0.1672, "step": 253230 }, { "epoch": 10.49, "grad_norm": 0.55078125, "learning_rate": 0.0003638130124827621, "loss": 0.201, "step": 253240 }, { "epoch": 10.49, "grad_norm": 0.84765625, "learning_rate": 0.0003638033562197488, "loss": 0.192, "step": 253250 }, { "epoch": 10.49, "grad_norm": 0.7421875, "learning_rate": 0.00036379369974256983, "loss": 0.154, "step": 253260 }, { "epoch": 10.49, "grad_norm": 0.458984375, "learning_rate": 0.00036378404305124337, "loss": 0.2134, "step": 253270 }, { "epoch": 10.49, "grad_norm": 0.7265625, "learning_rate": 0.0003637743861457877, "loss": 0.2011, "step": 253280 }, { "epoch": 10.49, "grad_norm": 1.3984375, "learning_rate": 0.0003637647290262209, "loss": 0.2311, "step": 253290 }, { "epoch": 10.49, "grad_norm": 0.40234375, "learning_rate": 0.0003637550716925611, "loss": 0.2124, "step": 253300 }, { "epoch": 10.49, "grad_norm": 0.9375, "learning_rate": 0.00036374541414482657, "loss": 0.2109, "step": 253310 }, { "epoch": 10.49, "grad_norm": 0.6484375, "learning_rate": 0.0003637357563830356, "loss": 0.2374, "step": 253320 }, { "epoch": 10.49, "grad_norm": 0.8828125, "learning_rate": 0.00036372609840720603, "loss": 0.2096, "step": 253330 }, { "epoch": 10.49, "grad_norm": 0.95703125, "learning_rate": 0.00036371644021735626, "loss": 0.2468, "step": 253340 }, { "epoch": 10.49, "grad_norm": 0.458984375, "learning_rate": 0.0003637067818135045, "loss": 0.1351, "step": 253350 }, { "epoch": 10.49, "grad_norm": 0.498046875, "learning_rate": 0.0003636971231956688, "loss": 0.2081, "step": 253360 }, { "epoch": 10.49, "grad_norm": 0.54296875, "learning_rate": 0.00036368746436386747, "loss": 0.1717, "step": 253370 }, { "epoch": 10.49, "grad_norm": 1.0390625, "learning_rate": 0.00036367780531811853, "loss": 0.2242, "step": 253380 }, { "epoch": 10.5, "grad_norm": 1.2578125, "learning_rate": 0.00036366814605844023, "loss": 0.1726, "step": 253390 }, { "epoch": 10.5, "grad_norm": 0.26171875, "learning_rate": 0.0003636584865848508, "loss": 0.1554, "step": 253400 }, { "epoch": 10.5, "grad_norm": 0.59765625, "learning_rate": 0.00036364882689736834, "loss": 0.251, "step": 253410 }, { "epoch": 10.5, "grad_norm": 1.2109375, "learning_rate": 0.00036363916699601106, "loss": 0.1713, "step": 253420 }, { "epoch": 10.5, "grad_norm": 0.625, "learning_rate": 0.0003636295068807972, "loss": 0.1772, "step": 253430 }, { "epoch": 10.5, "grad_norm": 1.15625, "learning_rate": 0.00036361984655174475, "loss": 0.2056, "step": 253440 }, { "epoch": 10.5, "grad_norm": 1.0546875, "learning_rate": 0.0003636101860088722, "loss": 0.1716, "step": 253450 }, { "epoch": 10.5, "grad_norm": 0.73828125, "learning_rate": 0.00036360052525219737, "loss": 0.2075, "step": 253460 }, { "epoch": 10.5, "grad_norm": 0.5859375, "learning_rate": 0.0003635908642817387, "loss": 0.2088, "step": 253470 }, { "epoch": 10.5, "grad_norm": 0.734375, "learning_rate": 0.0003635812030975143, "loss": 0.1617, "step": 253480 }, { "epoch": 10.5, "grad_norm": 0.349609375, "learning_rate": 0.00036357154169954235, "loss": 0.1114, "step": 253490 }, { "epoch": 10.5, "grad_norm": 0.33984375, "learning_rate": 0.00036356188008784097, "loss": 0.1954, "step": 253500 }, { "epoch": 10.5, "grad_norm": 0.68359375, "learning_rate": 0.0003635522182624285, "loss": 0.2369, "step": 253510 }, { "epoch": 10.5, "grad_norm": 1.359375, "learning_rate": 0.0003635425562233229, "loss": 0.1763, "step": 253520 }, { "epoch": 10.5, "grad_norm": 0.54296875, "learning_rate": 0.00036353289397054246, "loss": 0.2229, "step": 253530 }, { "epoch": 10.5, "grad_norm": 1.0546875, "learning_rate": 0.00036352323150410547, "loss": 0.2139, "step": 253540 }, { "epoch": 10.5, "grad_norm": 1.0546875, "learning_rate": 0.00036351356882402997, "loss": 0.1832, "step": 253550 }, { "epoch": 10.5, "grad_norm": 1.59375, "learning_rate": 0.0003635039059303342, "loss": 0.1716, "step": 253560 }, { "epoch": 10.5, "grad_norm": 0.79296875, "learning_rate": 0.0003634942428230364, "loss": 0.2184, "step": 253570 }, { "epoch": 10.5, "grad_norm": 0.263671875, "learning_rate": 0.0003634845795021546, "loss": 0.2056, "step": 253580 }, { "epoch": 10.5, "grad_norm": 1.203125, "learning_rate": 0.0003634749159677071, "loss": 0.1802, "step": 253590 }, { "epoch": 10.5, "grad_norm": 0.87109375, "learning_rate": 0.00036346525221971207, "loss": 0.2728, "step": 253600 }, { "epoch": 10.5, "grad_norm": 0.048583984375, "learning_rate": 0.00036345558825818765, "loss": 0.2032, "step": 253610 }, { "epoch": 10.5, "grad_norm": 0.9921875, "learning_rate": 0.0003634459240831521, "loss": 0.2065, "step": 253620 }, { "epoch": 10.51, "grad_norm": 0.63671875, "learning_rate": 0.0003634362596946236, "loss": 0.1703, "step": 253630 }, { "epoch": 10.51, "grad_norm": 0.62890625, "learning_rate": 0.0003634265950926203, "loss": 0.2401, "step": 253640 }, { "epoch": 10.51, "grad_norm": 1.0625, "learning_rate": 0.0003634169302771604, "loss": 0.2, "step": 253650 }, { "epoch": 10.51, "grad_norm": 0.9453125, "learning_rate": 0.00036340726524826205, "loss": 0.1726, "step": 253660 }, { "epoch": 10.51, "grad_norm": 1.1953125, "learning_rate": 0.0003633976000059435, "loss": 0.2003, "step": 253670 }, { "epoch": 10.51, "grad_norm": 0.68359375, "learning_rate": 0.00036338793455022286, "loss": 0.1995, "step": 253680 }, { "epoch": 10.51, "grad_norm": 0.58203125, "learning_rate": 0.0003633782688811184, "loss": 0.2135, "step": 253690 }, { "epoch": 10.51, "grad_norm": 1.5, "learning_rate": 0.00036336860299864834, "loss": 0.1869, "step": 253700 }, { "epoch": 10.51, "grad_norm": 1.0, "learning_rate": 0.00036335893690283075, "loss": 0.2285, "step": 253710 }, { "epoch": 10.51, "grad_norm": 0.80859375, "learning_rate": 0.0003633492705936839, "loss": 0.2136, "step": 253720 }, { "epoch": 10.51, "grad_norm": 1.203125, "learning_rate": 0.000363339604071226, "loss": 0.1653, "step": 253730 }, { "epoch": 10.51, "grad_norm": 0.5390625, "learning_rate": 0.0003633299373354751, "loss": 0.1603, "step": 253740 }, { "epoch": 10.51, "grad_norm": 0.671875, "learning_rate": 0.0003633202703864496, "loss": 0.2071, "step": 253750 }, { "epoch": 10.51, "grad_norm": 1.2734375, "learning_rate": 0.00036331060322416755, "loss": 0.2274, "step": 253760 }, { "epoch": 10.51, "grad_norm": 1.0234375, "learning_rate": 0.00036330093584864716, "loss": 0.199, "step": 253770 }, { "epoch": 10.51, "grad_norm": 0.875, "learning_rate": 0.0003632912682599067, "loss": 0.1943, "step": 253780 }, { "epoch": 10.51, "grad_norm": 0.59375, "learning_rate": 0.00036328160045796424, "loss": 0.1637, "step": 253790 }, { "epoch": 10.51, "grad_norm": 0.330078125, "learning_rate": 0.0003632719324428381, "loss": 0.1514, "step": 253800 }, { "epoch": 10.51, "grad_norm": 0.625, "learning_rate": 0.00036326226421454645, "loss": 0.1695, "step": 253810 }, { "epoch": 10.51, "grad_norm": 0.84375, "learning_rate": 0.00036325259577310733, "loss": 0.1741, "step": 253820 }, { "epoch": 10.51, "grad_norm": 0.7421875, "learning_rate": 0.0003632429271185391, "loss": 0.2139, "step": 253830 }, { "epoch": 10.51, "grad_norm": 0.88671875, "learning_rate": 0.00036323325825085996, "loss": 0.2089, "step": 253840 }, { "epoch": 10.51, "grad_norm": 0.59375, "learning_rate": 0.000363223589170088, "loss": 0.2147, "step": 253850 }, { "epoch": 10.51, "grad_norm": 0.482421875, "learning_rate": 0.0003632139198762415, "loss": 0.1915, "step": 253860 }, { "epoch": 10.52, "grad_norm": 0.87109375, "learning_rate": 0.00036320425036933865, "loss": 0.2103, "step": 253870 }, { "epoch": 10.52, "grad_norm": 0.7890625, "learning_rate": 0.0003631945806493976, "loss": 0.2553, "step": 253880 }, { "epoch": 10.52, "grad_norm": 1.7734375, "learning_rate": 0.00036318491071643663, "loss": 0.2317, "step": 253890 }, { "epoch": 10.52, "grad_norm": 0.5703125, "learning_rate": 0.00036317524057047373, "loss": 0.1757, "step": 253900 }, { "epoch": 10.52, "grad_norm": 0.7734375, "learning_rate": 0.0003631655702115274, "loss": 0.2338, "step": 253910 }, { "epoch": 10.52, "grad_norm": 0.5234375, "learning_rate": 0.0003631558996396156, "loss": 0.2177, "step": 253920 }, { "epoch": 10.52, "grad_norm": 0.7109375, "learning_rate": 0.0003631462288547566, "loss": 0.1702, "step": 253930 }, { "epoch": 10.52, "grad_norm": 0.392578125, "learning_rate": 0.00036313655785696875, "loss": 0.1676, "step": 253940 }, { "epoch": 10.52, "grad_norm": 0.84375, "learning_rate": 0.00036312688664627, "loss": 0.2005, "step": 253950 }, { "epoch": 10.52, "grad_norm": 0.96875, "learning_rate": 0.0003631172152226787, "loss": 0.2471, "step": 253960 }, { "epoch": 10.52, "grad_norm": 0.265625, "learning_rate": 0.0003631075435862131, "loss": 0.179, "step": 253970 }, { "epoch": 10.52, "grad_norm": 2.046875, "learning_rate": 0.0003630978717368911, "loss": 0.2291, "step": 253980 }, { "epoch": 10.52, "grad_norm": 1.84375, "learning_rate": 0.0003630881996747313, "loss": 0.2747, "step": 253990 }, { "epoch": 10.52, "grad_norm": 0.77734375, "learning_rate": 0.00036307852739975166, "loss": 0.228, "step": 254000 }, { "epoch": 10.52, "grad_norm": 0.55078125, "learning_rate": 0.00036306885491197045, "loss": 0.2039, "step": 254010 }, { "epoch": 10.52, "grad_norm": 0.94921875, "learning_rate": 0.0003630591822114059, "loss": 0.1634, "step": 254020 }, { "epoch": 10.52, "grad_norm": 1.265625, "learning_rate": 0.00036304950929807604, "loss": 0.2246, "step": 254030 }, { "epoch": 10.52, "grad_norm": 0.828125, "learning_rate": 0.00036303983617199943, "loss": 0.1494, "step": 254040 }, { "epoch": 10.52, "grad_norm": 0.55859375, "learning_rate": 0.00036303016283319395, "loss": 0.244, "step": 254050 }, { "epoch": 10.52, "grad_norm": 1.0859375, "learning_rate": 0.00036302048928167787, "loss": 0.1817, "step": 254060 }, { "epoch": 10.52, "grad_norm": 0.55078125, "learning_rate": 0.0003630108155174695, "loss": 0.2296, "step": 254070 }, { "epoch": 10.52, "grad_norm": 0.44921875, "learning_rate": 0.00036300114154058687, "loss": 0.1925, "step": 254080 }, { "epoch": 10.52, "grad_norm": 0.57421875, "learning_rate": 0.0003629914673510484, "loss": 0.2307, "step": 254090 }, { "epoch": 10.52, "grad_norm": 0.8359375, "learning_rate": 0.00036298179294887216, "loss": 0.1812, "step": 254100 }, { "epoch": 10.53, "grad_norm": 0.84765625, "learning_rate": 0.0003629721183340764, "loss": 0.2328, "step": 254110 }, { "epoch": 10.53, "grad_norm": 1.2890625, "learning_rate": 0.0003629624435066793, "loss": 0.1998, "step": 254120 }, { "epoch": 10.53, "grad_norm": 0.64453125, "learning_rate": 0.00036295276846669907, "loss": 0.2127, "step": 254130 }, { "epoch": 10.53, "grad_norm": 1.3984375, "learning_rate": 0.0003629430932141539, "loss": 0.2283, "step": 254140 }, { "epoch": 10.53, "grad_norm": 1.265625, "learning_rate": 0.00036293341774906214, "loss": 0.2075, "step": 254150 }, { "epoch": 10.53, "grad_norm": 0.69140625, "learning_rate": 0.0003629237420714418, "loss": 0.1953, "step": 254160 }, { "epoch": 10.53, "grad_norm": 0.94140625, "learning_rate": 0.0003629140661813112, "loss": 0.2093, "step": 254170 }, { "epoch": 10.53, "grad_norm": 0.984375, "learning_rate": 0.0003629043900786885, "loss": 0.1911, "step": 254180 }, { "epoch": 10.53, "grad_norm": 1.3125, "learning_rate": 0.00036289471376359185, "loss": 0.1825, "step": 254190 }, { "epoch": 10.53, "grad_norm": 0.234375, "learning_rate": 0.00036288503723603974, "loss": 0.1598, "step": 254200 }, { "epoch": 10.53, "grad_norm": 0.005096435546875, "learning_rate": 0.00036287536049605, "loss": 0.197, "step": 254210 }, { "epoch": 10.53, "grad_norm": 0.341796875, "learning_rate": 0.0003628656835436411, "loss": 0.1785, "step": 254220 }, { "epoch": 10.53, "grad_norm": 1.4140625, "learning_rate": 0.00036285600637883116, "loss": 0.1492, "step": 254230 }, { "epoch": 10.53, "grad_norm": 0.9453125, "learning_rate": 0.0003628463290016384, "loss": 0.1793, "step": 254240 }, { "epoch": 10.53, "grad_norm": 0.8828125, "learning_rate": 0.000362836651412081, "loss": 0.2202, "step": 254250 }, { "epoch": 10.53, "grad_norm": 1.1015625, "learning_rate": 0.0003628269736101773, "loss": 0.2238, "step": 254260 }, { "epoch": 10.53, "grad_norm": 0.5625, "learning_rate": 0.00036281729559594527, "loss": 0.1975, "step": 254270 }, { "epoch": 10.53, "grad_norm": 0.98828125, "learning_rate": 0.0003628076173694034, "loss": 0.2227, "step": 254280 }, { "epoch": 10.53, "grad_norm": 0.46484375, "learning_rate": 0.0003627979389305698, "loss": 0.1791, "step": 254290 }, { "epoch": 10.53, "grad_norm": 0.5078125, "learning_rate": 0.00036278826027946254, "loss": 0.2177, "step": 254300 }, { "epoch": 10.53, "grad_norm": 0.2890625, "learning_rate": 0.0003627785814161001, "loss": 0.1803, "step": 254310 }, { "epoch": 10.53, "grad_norm": 1.2265625, "learning_rate": 0.0003627689023405004, "loss": 0.2038, "step": 254320 }, { "epoch": 10.53, "grad_norm": 0.6328125, "learning_rate": 0.0003627592230526819, "loss": 0.1861, "step": 254330 }, { "epoch": 10.53, "grad_norm": 1.234375, "learning_rate": 0.0003627495435526627, "loss": 0.1616, "step": 254340 }, { "epoch": 10.54, "grad_norm": 1.2734375, "learning_rate": 0.00036273986384046097, "loss": 0.176, "step": 254350 }, { "epoch": 10.54, "grad_norm": 1.21875, "learning_rate": 0.0003627301839160951, "loss": 0.1972, "step": 254360 }, { "epoch": 10.54, "grad_norm": 1.1796875, "learning_rate": 0.0003627205037795831, "loss": 0.2334, "step": 254370 }, { "epoch": 10.54, "grad_norm": 0.8125, "learning_rate": 0.0003627108234309433, "loss": 0.1704, "step": 254380 }, { "epoch": 10.54, "grad_norm": 0.5625, "learning_rate": 0.00036270114287019397, "loss": 0.2221, "step": 254390 }, { "epoch": 10.54, "grad_norm": 0.8203125, "learning_rate": 0.0003626914620973532, "loss": 0.1703, "step": 254400 }, { "epoch": 10.54, "grad_norm": 0.5859375, "learning_rate": 0.0003626817811124393, "loss": 0.2079, "step": 254410 }, { "epoch": 10.54, "grad_norm": 1.375, "learning_rate": 0.00036267209991547046, "loss": 0.2152, "step": 254420 }, { "epoch": 10.54, "grad_norm": 0.41015625, "learning_rate": 0.0003626624185064648, "loss": 0.195, "step": 254430 }, { "epoch": 10.54, "grad_norm": 0.6015625, "learning_rate": 0.0003626527368854408, "loss": 0.1829, "step": 254440 }, { "epoch": 10.54, "grad_norm": 0.49609375, "learning_rate": 0.0003626430550524164, "loss": 0.2055, "step": 254450 }, { "epoch": 10.54, "grad_norm": 1.5625, "learning_rate": 0.00036263337300740996, "loss": 0.2303, "step": 254460 }, { "epoch": 10.54, "grad_norm": 0.55078125, "learning_rate": 0.00036262369075043966, "loss": 0.2107, "step": 254470 }, { "epoch": 10.54, "grad_norm": 0.60546875, "learning_rate": 0.0003626140082815238, "loss": 0.2166, "step": 254480 }, { "epoch": 10.54, "grad_norm": 1.0078125, "learning_rate": 0.00036260432560068046, "loss": 0.1835, "step": 254490 }, { "epoch": 10.54, "grad_norm": 1.2421875, "learning_rate": 0.000362594642707928, "loss": 0.1951, "step": 254500 }, { "epoch": 10.54, "grad_norm": 1.1484375, "learning_rate": 0.0003625849596032845, "loss": 0.2369, "step": 254510 }, { "epoch": 10.54, "grad_norm": 1.0390625, "learning_rate": 0.00036257527628676833, "loss": 0.2233, "step": 254520 }, { "epoch": 10.54, "grad_norm": 0.796875, "learning_rate": 0.0003625655927583976, "loss": 0.2457, "step": 254530 }, { "epoch": 10.54, "grad_norm": 0.31640625, "learning_rate": 0.0003625559090181906, "loss": 0.1839, "step": 254540 }, { "epoch": 10.54, "grad_norm": 0.85546875, "learning_rate": 0.0003625462250661656, "loss": 0.2156, "step": 254550 }, { "epoch": 10.54, "grad_norm": 0.91015625, "learning_rate": 0.0003625365409023407, "loss": 0.1892, "step": 254560 }, { "epoch": 10.54, "grad_norm": 0.91015625, "learning_rate": 0.0003625268565267342, "loss": 0.148, "step": 254570 }, { "epoch": 10.54, "grad_norm": 0.7265625, "learning_rate": 0.0003625171719393643, "loss": 0.2094, "step": 254580 }, { "epoch": 10.55, "grad_norm": 0.66796875, "learning_rate": 0.00036250748714024923, "loss": 0.2646, "step": 254590 }, { "epoch": 10.55, "grad_norm": 0.4765625, "learning_rate": 0.00036249780212940726, "loss": 0.1739, "step": 254600 }, { "epoch": 10.55, "grad_norm": 1.0390625, "learning_rate": 0.00036248811690685654, "loss": 0.1644, "step": 254610 }, { "epoch": 10.55, "grad_norm": 0.8984375, "learning_rate": 0.0003624784314726153, "loss": 0.2063, "step": 254620 }, { "epoch": 10.55, "grad_norm": 0.9765625, "learning_rate": 0.00036246874582670184, "loss": 0.1533, "step": 254630 }, { "epoch": 10.55, "grad_norm": 0.515625, "learning_rate": 0.0003624590599691343, "loss": 0.2499, "step": 254640 }, { "epoch": 10.55, "grad_norm": 1.859375, "learning_rate": 0.00036244937389993097, "loss": 0.1985, "step": 254650 }, { "epoch": 10.55, "grad_norm": 0.435546875, "learning_rate": 0.0003624396876191101, "loss": 0.197, "step": 254660 }, { "epoch": 10.55, "grad_norm": 1.5390625, "learning_rate": 0.00036243000112668984, "loss": 0.2383, "step": 254670 }, { "epoch": 10.55, "grad_norm": 0.484375, "learning_rate": 0.00036242031442268853, "loss": 0.2136, "step": 254680 }, { "epoch": 10.55, "grad_norm": 0.4375, "learning_rate": 0.00036241062750712425, "loss": 0.2076, "step": 254690 }, { "epoch": 10.55, "grad_norm": 1.3671875, "learning_rate": 0.0003624009403800153, "loss": 0.2293, "step": 254700 }, { "epoch": 10.55, "grad_norm": 0.8359375, "learning_rate": 0.00036239125304137994, "loss": 0.2051, "step": 254710 }, { "epoch": 10.55, "grad_norm": 1.15625, "learning_rate": 0.00036238156549123636, "loss": 0.1351, "step": 254720 }, { "epoch": 10.55, "grad_norm": 1.6875, "learning_rate": 0.00036237187772960287, "loss": 0.184, "step": 254730 }, { "epoch": 10.55, "grad_norm": 0.322265625, "learning_rate": 0.0003623621897564976, "loss": 0.155, "step": 254740 }, { "epoch": 10.55, "grad_norm": 0.6640625, "learning_rate": 0.00036235250157193885, "loss": 0.2066, "step": 254750 }, { "epoch": 10.55, "grad_norm": 1.078125, "learning_rate": 0.00036234281317594486, "loss": 0.1587, "step": 254760 }, { "epoch": 10.55, "grad_norm": 0.80078125, "learning_rate": 0.0003623331245685337, "loss": 0.1936, "step": 254770 }, { "epoch": 10.55, "grad_norm": 0.392578125, "learning_rate": 0.0003623234357497238, "loss": 0.1659, "step": 254780 }, { "epoch": 10.55, "grad_norm": 0.46875, "learning_rate": 0.0003623137467195334, "loss": 0.2138, "step": 254790 }, { "epoch": 10.55, "grad_norm": 1.3984375, "learning_rate": 0.0003623040574779806, "loss": 0.1965, "step": 254800 }, { "epoch": 10.55, "grad_norm": 0.46484375, "learning_rate": 0.00036229436802508365, "loss": 0.2107, "step": 254810 }, { "epoch": 10.55, "grad_norm": 0.83984375, "learning_rate": 0.00036228467836086085, "loss": 0.2529, "step": 254820 }, { "epoch": 10.56, "grad_norm": 0.423828125, "learning_rate": 0.0003622749884853304, "loss": 0.1867, "step": 254830 }, { "epoch": 10.56, "grad_norm": 0.83984375, "learning_rate": 0.0003622652983985106, "loss": 0.195, "step": 254840 }, { "epoch": 10.56, "grad_norm": 2.15625, "learning_rate": 0.0003622556081004196, "loss": 0.2268, "step": 254850 }, { "epoch": 10.56, "grad_norm": 0.55859375, "learning_rate": 0.0003622459175910757, "loss": 0.2474, "step": 254860 }, { "epoch": 10.56, "grad_norm": 0.875, "learning_rate": 0.000362236226870497, "loss": 0.266, "step": 254870 }, { "epoch": 10.56, "grad_norm": 0.828125, "learning_rate": 0.00036222653593870194, "loss": 0.2332, "step": 254880 }, { "epoch": 10.56, "grad_norm": 1.265625, "learning_rate": 0.0003622168447957086, "loss": 0.1629, "step": 254890 }, { "epoch": 10.56, "grad_norm": 1.953125, "learning_rate": 0.0003622071534415353, "loss": 0.212, "step": 254900 }, { "epoch": 10.56, "grad_norm": 1.0859375, "learning_rate": 0.0003621974618762003, "loss": 0.2378, "step": 254910 }, { "epoch": 10.56, "grad_norm": 0.53515625, "learning_rate": 0.0003621877700997217, "loss": 0.2183, "step": 254920 }, { "epoch": 10.56, "grad_norm": 1.0078125, "learning_rate": 0.00036217807811211796, "loss": 0.1818, "step": 254930 }, { "epoch": 10.56, "grad_norm": 0.40234375, "learning_rate": 0.0003621683859134071, "loss": 0.2418, "step": 254940 }, { "epoch": 10.56, "grad_norm": 0.482421875, "learning_rate": 0.0003621586935036074, "loss": 0.1966, "step": 254950 }, { "epoch": 10.56, "grad_norm": 0.5390625, "learning_rate": 0.0003621490008827372, "loss": 0.2006, "step": 254960 }, { "epoch": 10.56, "grad_norm": 1.125, "learning_rate": 0.0003621393080508148, "loss": 0.1878, "step": 254970 }, { "epoch": 10.56, "grad_norm": 0.6953125, "learning_rate": 0.0003621296150078582, "loss": 0.2085, "step": 254980 }, { "epoch": 10.56, "grad_norm": 1.0625, "learning_rate": 0.00036211992175388583, "loss": 0.2028, "step": 254990 }, { "epoch": 10.56, "grad_norm": 0.9140625, "learning_rate": 0.00036211022828891583, "loss": 0.2627, "step": 255000 }, { "epoch": 10.56, "grad_norm": 1.640625, "learning_rate": 0.00036210053461296655, "loss": 0.2156, "step": 255010 }, { "epoch": 10.56, "grad_norm": 0.7890625, "learning_rate": 0.0003620908407260561, "loss": 0.2632, "step": 255020 }, { "epoch": 10.56, "grad_norm": 0.58203125, "learning_rate": 0.00036208114662820284, "loss": 0.1798, "step": 255030 }, { "epoch": 10.56, "grad_norm": 0.90234375, "learning_rate": 0.00036207145231942497, "loss": 0.193, "step": 255040 }, { "epoch": 10.56, "grad_norm": 1.203125, "learning_rate": 0.0003620617577997407, "loss": 0.2453, "step": 255050 }, { "epoch": 10.56, "grad_norm": 0.609375, "learning_rate": 0.0003620520630691684, "loss": 0.2073, "step": 255060 }, { "epoch": 10.56, "grad_norm": 0.0, "learning_rate": 0.0003620423681277261, "loss": 0.2079, "step": 255070 }, { "epoch": 10.57, "grad_norm": 0.640625, "learning_rate": 0.0003620326729754322, "loss": 0.2213, "step": 255080 }, { "epoch": 10.57, "grad_norm": 1.265625, "learning_rate": 0.0003620229776123049, "loss": 0.1486, "step": 255090 }, { "epoch": 10.57, "grad_norm": 0.74609375, "learning_rate": 0.0003620132820383625, "loss": 0.2057, "step": 255100 }, { "epoch": 10.57, "grad_norm": 0.16796875, "learning_rate": 0.00036200358625362313, "loss": 0.2258, "step": 255110 }, { "epoch": 10.57, "grad_norm": 0.5546875, "learning_rate": 0.0003619938902581051, "loss": 0.2007, "step": 255120 }, { "epoch": 10.57, "grad_norm": 0.6640625, "learning_rate": 0.00036198419405182667, "loss": 0.1898, "step": 255130 }, { "epoch": 10.57, "grad_norm": 0.267578125, "learning_rate": 0.0003619744976348062, "loss": 0.2294, "step": 255140 }, { "epoch": 10.57, "grad_norm": 2.53125, "learning_rate": 0.0003619648010070616, "loss": 0.2244, "step": 255150 }, { "epoch": 10.57, "grad_norm": 0.640625, "learning_rate": 0.0003619551041686116, "loss": 0.1465, "step": 255160 }, { "epoch": 10.57, "grad_norm": 0.96484375, "learning_rate": 0.000361945407119474, "loss": 0.187, "step": 255170 }, { "epoch": 10.57, "grad_norm": 0.703125, "learning_rate": 0.0003619357098596673, "loss": 0.19, "step": 255180 }, { "epoch": 10.57, "grad_norm": 1.109375, "learning_rate": 0.00036192601238920966, "loss": 0.219, "step": 255190 }, { "epoch": 10.57, "grad_norm": 0.68359375, "learning_rate": 0.0003619163147081193, "loss": 0.2334, "step": 255200 }, { "epoch": 10.57, "grad_norm": 1.015625, "learning_rate": 0.00036190661681641457, "loss": 0.1918, "step": 255210 }, { "epoch": 10.57, "grad_norm": 1.28125, "learning_rate": 0.0003618969187141137, "loss": 0.2015, "step": 255220 }, { "epoch": 10.57, "grad_norm": 0.64453125, "learning_rate": 0.0003618872204012349, "loss": 0.216, "step": 255230 }, { "epoch": 10.57, "grad_norm": 0.408203125, "learning_rate": 0.0003618775218777964, "loss": 0.2183, "step": 255240 }, { "epoch": 10.57, "grad_norm": 2.0, "learning_rate": 0.00036186782314381655, "loss": 0.1649, "step": 255250 }, { "epoch": 10.57, "grad_norm": 0.38671875, "learning_rate": 0.00036185812419931344, "loss": 0.1983, "step": 255260 }, { "epoch": 10.57, "grad_norm": 0.69921875, "learning_rate": 0.0003618484250443055, "loss": 0.2145, "step": 255270 }, { "epoch": 10.57, "grad_norm": 0.58984375, "learning_rate": 0.0003618387256788109, "loss": 0.2534, "step": 255280 }, { "epoch": 10.57, "grad_norm": 0.60546875, "learning_rate": 0.00036182902610284784, "loss": 0.2738, "step": 255290 }, { "epoch": 10.57, "grad_norm": 0.99609375, "learning_rate": 0.00036181932631643463, "loss": 0.2097, "step": 255300 }, { "epoch": 10.57, "grad_norm": 0.98046875, "learning_rate": 0.00036180962631958955, "loss": 0.1905, "step": 255310 }, { "epoch": 10.58, "grad_norm": 0.796875, "learning_rate": 0.00036179992611233086, "loss": 0.1699, "step": 255320 }, { "epoch": 10.58, "grad_norm": 1.09375, "learning_rate": 0.0003617902256946768, "loss": 0.1837, "step": 255330 }, { "epoch": 10.58, "grad_norm": 0.62890625, "learning_rate": 0.0003617805250666455, "loss": 0.179, "step": 255340 }, { "epoch": 10.58, "grad_norm": 0.9609375, "learning_rate": 0.00036177082422825544, "loss": 0.167, "step": 255350 }, { "epoch": 10.58, "grad_norm": 0.99609375, "learning_rate": 0.0003617611231795247, "loss": 0.2226, "step": 255360 }, { "epoch": 10.58, "grad_norm": 0.54296875, "learning_rate": 0.0003617514219204716, "loss": 0.2122, "step": 255370 }, { "epoch": 10.58, "grad_norm": 0.859375, "learning_rate": 0.0003617417204511144, "loss": 0.1781, "step": 255380 }, { "epoch": 10.58, "grad_norm": 1.03125, "learning_rate": 0.00036173201877147133, "loss": 0.1748, "step": 255390 }, { "epoch": 10.58, "grad_norm": 0.78515625, "learning_rate": 0.00036172231688156074, "loss": 0.1856, "step": 255400 }, { "epoch": 10.58, "grad_norm": 0.6953125, "learning_rate": 0.00036171261478140074, "loss": 0.1733, "step": 255410 }, { "epoch": 10.58, "grad_norm": 1.734375, "learning_rate": 0.0003617029124710096, "loss": 0.1719, "step": 255420 }, { "epoch": 10.58, "grad_norm": 0.828125, "learning_rate": 0.00036169320995040576, "loss": 0.2087, "step": 255430 }, { "epoch": 10.58, "grad_norm": 1.1015625, "learning_rate": 0.0003616835072196073, "loss": 0.2289, "step": 255440 }, { "epoch": 10.58, "grad_norm": 0.412109375, "learning_rate": 0.00036167380427863254, "loss": 0.1814, "step": 255450 }, { "epoch": 10.58, "grad_norm": 0.46484375, "learning_rate": 0.0003616641011274998, "loss": 0.2035, "step": 255460 }, { "epoch": 10.58, "grad_norm": 0.78515625, "learning_rate": 0.0003616543977662272, "loss": 0.208, "step": 255470 }, { "epoch": 10.58, "grad_norm": 0.859375, "learning_rate": 0.0003616446941948331, "loss": 0.203, "step": 255480 }, { "epoch": 10.58, "grad_norm": 0.73046875, "learning_rate": 0.00036163499041333587, "loss": 0.191, "step": 255490 }, { "epoch": 10.58, "grad_norm": 0.81640625, "learning_rate": 0.00036162528642175347, "loss": 0.2417, "step": 255500 }, { "epoch": 10.58, "grad_norm": 0.6484375, "learning_rate": 0.00036161558222010446, "loss": 0.1769, "step": 255510 }, { "epoch": 10.58, "grad_norm": 1.3671875, "learning_rate": 0.00036160587780840694, "loss": 0.2063, "step": 255520 }, { "epoch": 10.58, "grad_norm": 0.7109375, "learning_rate": 0.00036159617318667913, "loss": 0.1813, "step": 255530 }, { "epoch": 10.58, "grad_norm": 0.76171875, "learning_rate": 0.00036158646835493947, "loss": 0.2269, "step": 255540 }, { "epoch": 10.58, "grad_norm": 0.984375, "learning_rate": 0.00036157676331320605, "loss": 0.159, "step": 255550 }, { "epoch": 10.59, "grad_norm": 1.171875, "learning_rate": 0.00036156705806149726, "loss": 0.1533, "step": 255560 }, { "epoch": 10.59, "grad_norm": 0.7890625, "learning_rate": 0.00036155735259983137, "loss": 0.2372, "step": 255570 }, { "epoch": 10.59, "grad_norm": 0.6796875, "learning_rate": 0.00036154764692822647, "loss": 0.2325, "step": 255580 }, { "epoch": 10.59, "grad_norm": 0.95703125, "learning_rate": 0.000361537941046701, "loss": 0.1968, "step": 255590 }, { "epoch": 10.59, "grad_norm": 0.455078125, "learning_rate": 0.0003615282349552732, "loss": 0.1972, "step": 255600 }, { "epoch": 10.59, "grad_norm": 0.474609375, "learning_rate": 0.00036151852865396123, "loss": 0.1777, "step": 255610 }, { "epoch": 10.59, "grad_norm": 0.98828125, "learning_rate": 0.0003615088221427835, "loss": 0.2106, "step": 255620 }, { "epoch": 10.59, "grad_norm": 0.9765625, "learning_rate": 0.0003614991154217582, "loss": 0.1872, "step": 255630 }, { "epoch": 10.59, "grad_norm": 0.3515625, "learning_rate": 0.00036148940849090364, "loss": 0.1671, "step": 255640 }, { "epoch": 10.59, "grad_norm": 0.263671875, "learning_rate": 0.000361479701350238, "loss": 0.1879, "step": 255650 }, { "epoch": 10.59, "grad_norm": 0.46484375, "learning_rate": 0.0003614699939997795, "loss": 0.1451, "step": 255660 }, { "epoch": 10.59, "grad_norm": 0.95703125, "learning_rate": 0.0003614602864395467, "loss": 0.1979, "step": 255670 }, { "epoch": 10.59, "grad_norm": 0.6875, "learning_rate": 0.00036145057866955756, "loss": 0.2071, "step": 255680 }, { "epoch": 10.59, "grad_norm": 0.46484375, "learning_rate": 0.0003614408706898305, "loss": 0.21, "step": 255690 }, { "epoch": 10.59, "grad_norm": 0.51953125, "learning_rate": 0.00036143116250038376, "loss": 0.1951, "step": 255700 }, { "epoch": 10.59, "grad_norm": 1.21875, "learning_rate": 0.00036142145410123557, "loss": 0.1842, "step": 255710 }, { "epoch": 10.59, "grad_norm": 1.2734375, "learning_rate": 0.0003614117454924043, "loss": 0.1906, "step": 255720 }, { "epoch": 10.59, "grad_norm": 0.5, "learning_rate": 0.00036140203667390814, "loss": 0.2, "step": 255730 }, { "epoch": 10.59, "grad_norm": 0.75390625, "learning_rate": 0.0003613923276457654, "loss": 0.1651, "step": 255740 }, { "epoch": 10.59, "grad_norm": 0.578125, "learning_rate": 0.00036138261840799424, "loss": 0.2222, "step": 255750 }, { "epoch": 10.59, "grad_norm": 0.78125, "learning_rate": 0.0003613729089606131, "loss": 0.185, "step": 255760 }, { "epoch": 10.59, "grad_norm": 0.69921875, "learning_rate": 0.00036136319930364007, "loss": 0.19, "step": 255770 }, { "epoch": 10.59, "grad_norm": 0.859375, "learning_rate": 0.0003613534894370937, "loss": 0.1642, "step": 255780 }, { "epoch": 10.59, "grad_norm": 0.498046875, "learning_rate": 0.0003613437793609919, "loss": 0.186, "step": 255790 }, { "epoch": 10.6, "grad_norm": 0.5078125, "learning_rate": 0.0003613340690753533, "loss": 0.1617, "step": 255800 }, { "epoch": 10.6, "grad_norm": 1.1484375, "learning_rate": 0.00036132435858019586, "loss": 0.237, "step": 255810 }, { "epoch": 10.6, "grad_norm": 0.0, "learning_rate": 0.00036131464787553805, "loss": 0.2003, "step": 255820 }, { "epoch": 10.6, "grad_norm": 0.8359375, "learning_rate": 0.0003613049369613982, "loss": 0.2235, "step": 255830 }, { "epoch": 10.6, "grad_norm": 0.208984375, "learning_rate": 0.0003612952258377943, "loss": 0.1632, "step": 255840 }, { "epoch": 10.6, "grad_norm": 0.64453125, "learning_rate": 0.0003612855145047449, "loss": 0.2032, "step": 255850 }, { "epoch": 10.6, "grad_norm": 1.234375, "learning_rate": 0.0003612758029622682, "loss": 0.1426, "step": 255860 }, { "epoch": 10.6, "grad_norm": 1.9765625, "learning_rate": 0.00036126609121038235, "loss": 0.2288, "step": 255870 }, { "epoch": 10.6, "grad_norm": 0.953125, "learning_rate": 0.0003612563792491059, "loss": 0.2031, "step": 255880 }, { "epoch": 10.6, "grad_norm": 1.15625, "learning_rate": 0.0003612466670784568, "loss": 0.1685, "step": 255890 }, { "epoch": 10.6, "grad_norm": 0.296875, "learning_rate": 0.0003612369546984535, "loss": 0.1933, "step": 255900 }, { "epoch": 10.6, "grad_norm": 0.578125, "learning_rate": 0.0003612272421091144, "loss": 0.1821, "step": 255910 }, { "epoch": 10.6, "grad_norm": 0.5390625, "learning_rate": 0.00036121752931045755, "loss": 0.2075, "step": 255920 }, { "epoch": 10.6, "grad_norm": 0.345703125, "learning_rate": 0.00036120781630250134, "loss": 0.1998, "step": 255930 }, { "epoch": 10.6, "grad_norm": 1.0625, "learning_rate": 0.00036119810308526403, "loss": 0.2092, "step": 255940 }, { "epoch": 10.6, "grad_norm": 1.171875, "learning_rate": 0.0003611883896587638, "loss": 0.2125, "step": 255950 }, { "epoch": 10.6, "grad_norm": 0.388671875, "learning_rate": 0.0003611786760230191, "loss": 0.1897, "step": 255960 }, { "epoch": 10.6, "grad_norm": 0.68359375, "learning_rate": 0.00036116896217804816, "loss": 0.1919, "step": 255970 }, { "epoch": 10.6, "grad_norm": 0.7890625, "learning_rate": 0.0003611592481238692, "loss": 0.1905, "step": 255980 }, { "epoch": 10.6, "grad_norm": 0.515625, "learning_rate": 0.00036114953386050063, "loss": 0.1932, "step": 255990 }, { "epoch": 10.6, "grad_norm": 0.251953125, "learning_rate": 0.0003611398193879606, "loss": 0.1795, "step": 256000 }, { "epoch": 10.6, "grad_norm": 1.984375, "learning_rate": 0.0003611301047062674, "loss": 0.2126, "step": 256010 }, { "epoch": 10.6, "grad_norm": 0.6484375, "learning_rate": 0.0003611203898154393, "loss": 0.1872, "step": 256020 }, { "epoch": 10.6, "grad_norm": 1.046875, "learning_rate": 0.0003611106747154947, "loss": 0.1884, "step": 256030 }, { "epoch": 10.61, "grad_norm": 0.79296875, "learning_rate": 0.00036110095940645185, "loss": 0.2322, "step": 256040 }, { "epoch": 10.61, "grad_norm": 0.90234375, "learning_rate": 0.0003610912438883289, "loss": 0.2154, "step": 256050 }, { "epoch": 10.61, "grad_norm": 0.734375, "learning_rate": 0.00036108152816114424, "loss": 0.1812, "step": 256060 }, { "epoch": 10.61, "grad_norm": 0.2470703125, "learning_rate": 0.0003610718122249162, "loss": 0.1944, "step": 256070 }, { "epoch": 10.61, "grad_norm": 0.6640625, "learning_rate": 0.00036106209607966294, "loss": 0.167, "step": 256080 }, { "epoch": 10.61, "grad_norm": 2.046875, "learning_rate": 0.0003610523797254028, "loss": 0.2467, "step": 256090 }, { "epoch": 10.61, "grad_norm": 0.56640625, "learning_rate": 0.0003610426631621541, "loss": 0.2105, "step": 256100 }, { "epoch": 10.61, "grad_norm": 0.546875, "learning_rate": 0.0003610329463899351, "loss": 0.1872, "step": 256110 }, { "epoch": 10.61, "grad_norm": 0.56640625, "learning_rate": 0.0003610232294087641, "loss": 0.1669, "step": 256120 }, { "epoch": 10.61, "grad_norm": 0.84375, "learning_rate": 0.00036101351221865934, "loss": 0.2002, "step": 256130 }, { "epoch": 10.61, "grad_norm": 1.390625, "learning_rate": 0.0003610037948196392, "loss": 0.1607, "step": 256140 }, { "epoch": 10.61, "grad_norm": 0.56640625, "learning_rate": 0.00036099407721172183, "loss": 0.1941, "step": 256150 }, { "epoch": 10.61, "grad_norm": 1.28125, "learning_rate": 0.00036098435939492567, "loss": 0.1711, "step": 256160 }, { "epoch": 10.61, "grad_norm": 1.359375, "learning_rate": 0.00036097464136926886, "loss": 0.1562, "step": 256170 }, { "epoch": 10.61, "grad_norm": 0.71875, "learning_rate": 0.0003609649231347698, "loss": 0.2329, "step": 256180 }, { "epoch": 10.61, "grad_norm": 0.396484375, "learning_rate": 0.0003609552046914467, "loss": 0.1664, "step": 256190 }, { "epoch": 10.61, "grad_norm": 1.2265625, "learning_rate": 0.00036094548603931795, "loss": 0.206, "step": 256200 }, { "epoch": 10.61, "grad_norm": 0.1904296875, "learning_rate": 0.0003609357671784017, "loss": 0.2011, "step": 256210 }, { "epoch": 10.61, "grad_norm": 0.55078125, "learning_rate": 0.00036092604810871636, "loss": 0.2033, "step": 256220 }, { "epoch": 10.61, "grad_norm": 0.82421875, "learning_rate": 0.00036091632883028014, "loss": 0.1861, "step": 256230 }, { "epoch": 10.61, "grad_norm": 0.515625, "learning_rate": 0.0003609066093431115, "loss": 0.192, "step": 256240 }, { "epoch": 10.61, "grad_norm": 0.36328125, "learning_rate": 0.0003608968896472284, "loss": 0.1644, "step": 256250 }, { "epoch": 10.61, "grad_norm": 0.6171875, "learning_rate": 0.00036088716974264946, "loss": 0.2378, "step": 256260 }, { "epoch": 10.61, "grad_norm": 0.84765625, "learning_rate": 0.0003608774496293928, "loss": 0.1971, "step": 256270 }, { "epoch": 10.62, "grad_norm": 0.640625, "learning_rate": 0.0003608677293074768, "loss": 0.1558, "step": 256280 }, { "epoch": 10.62, "grad_norm": 0.376953125, "learning_rate": 0.00036085800877691965, "loss": 0.1629, "step": 256290 }, { "epoch": 10.62, "grad_norm": 0.2451171875, "learning_rate": 0.00036084828803773975, "loss": 0.198, "step": 256300 }, { "epoch": 10.62, "grad_norm": 0.953125, "learning_rate": 0.0003608385670899552, "loss": 0.2475, "step": 256310 }, { "epoch": 10.62, "grad_norm": 0.90234375, "learning_rate": 0.0003608288459335846, "loss": 0.1786, "step": 256320 }, { "epoch": 10.62, "grad_norm": 2.453125, "learning_rate": 0.000360819124568646, "loss": 0.2066, "step": 256330 }, { "epoch": 10.62, "grad_norm": 1.234375, "learning_rate": 0.0003608094029951578, "loss": 0.1846, "step": 256340 }, { "epoch": 10.62, "grad_norm": 0.498046875, "learning_rate": 0.0003607996812131383, "loss": 0.1709, "step": 256350 }, { "epoch": 10.62, "grad_norm": 0.57421875, "learning_rate": 0.0003607899592226058, "loss": 0.1953, "step": 256360 }, { "epoch": 10.62, "grad_norm": 1.4296875, "learning_rate": 0.00036078023702357844, "loss": 0.2016, "step": 256370 }, { "epoch": 10.62, "grad_norm": 1.1796875, "learning_rate": 0.0003607705146160747, "loss": 0.1874, "step": 256380 }, { "epoch": 10.62, "grad_norm": 1.0, "learning_rate": 0.00036076079200011275, "loss": 0.2448, "step": 256390 }, { "epoch": 10.62, "grad_norm": 0.59765625, "learning_rate": 0.00036075106917571103, "loss": 0.205, "step": 256400 }, { "epoch": 10.62, "grad_norm": 0.80859375, "learning_rate": 0.00036074134614288777, "loss": 0.2475, "step": 256410 }, { "epoch": 10.62, "grad_norm": 0.74609375, "learning_rate": 0.00036073162290166117, "loss": 0.2013, "step": 256420 }, { "epoch": 10.62, "grad_norm": 1.53125, "learning_rate": 0.00036072189945204967, "loss": 0.2112, "step": 256430 }, { "epoch": 10.62, "grad_norm": 0.494140625, "learning_rate": 0.00036071217579407153, "loss": 0.212, "step": 256440 }, { "epoch": 10.62, "grad_norm": 0.70703125, "learning_rate": 0.000360702451927745, "loss": 0.153, "step": 256450 }, { "epoch": 10.62, "grad_norm": 0.318359375, "learning_rate": 0.0003606927278530884, "loss": 0.1814, "step": 256460 }, { "epoch": 10.62, "grad_norm": 0.86328125, "learning_rate": 0.00036068300357012005, "loss": 0.2063, "step": 256470 }, { "epoch": 10.62, "grad_norm": 0.85546875, "learning_rate": 0.00036067327907885827, "loss": 0.2167, "step": 256480 }, { "epoch": 10.62, "grad_norm": 0.62109375, "learning_rate": 0.0003606635543793213, "loss": 0.1808, "step": 256490 }, { "epoch": 10.62, "grad_norm": 1.359375, "learning_rate": 0.0003606538294715275, "loss": 0.1988, "step": 256500 }, { "epoch": 10.62, "grad_norm": 1.2890625, "learning_rate": 0.0003606441043554951, "loss": 0.2108, "step": 256510 }, { "epoch": 10.63, "grad_norm": 0.5078125, "learning_rate": 0.00036063437903124246, "loss": 0.164, "step": 256520 }, { "epoch": 10.63, "grad_norm": 0.4609375, "learning_rate": 0.00036062465349878794, "loss": 0.1977, "step": 256530 }, { "epoch": 10.63, "grad_norm": 0.55859375, "learning_rate": 0.00036061492775814964, "loss": 0.2085, "step": 256540 }, { "epoch": 10.63, "grad_norm": 0.330078125, "learning_rate": 0.0003606052018093461, "loss": 0.2228, "step": 256550 }, { "epoch": 10.63, "grad_norm": 0.443359375, "learning_rate": 0.0003605954756523954, "loss": 0.2169, "step": 256560 }, { "epoch": 10.63, "grad_norm": 0.62890625, "learning_rate": 0.000360585749287316, "loss": 0.222, "step": 256570 }, { "epoch": 10.63, "grad_norm": 0.91796875, "learning_rate": 0.00036057602271412625, "loss": 0.214, "step": 256580 }, { "epoch": 10.63, "grad_norm": 0.412109375, "learning_rate": 0.0003605662959328443, "loss": 0.1943, "step": 256590 }, { "epoch": 10.63, "grad_norm": 0.61328125, "learning_rate": 0.00036055656894348856, "loss": 0.203, "step": 256600 }, { "epoch": 10.63, "grad_norm": 0.89453125, "learning_rate": 0.0003605468417460772, "loss": 0.1669, "step": 256610 }, { "epoch": 10.63, "grad_norm": 0.4296875, "learning_rate": 0.00036053711434062873, "loss": 0.1937, "step": 256620 }, { "epoch": 10.63, "grad_norm": 0.9296875, "learning_rate": 0.0003605273867271613, "loss": 0.1926, "step": 256630 }, { "epoch": 10.63, "grad_norm": 0.62109375, "learning_rate": 0.0003605176589056932, "loss": 0.2521, "step": 256640 }, { "epoch": 10.63, "grad_norm": 1.1875, "learning_rate": 0.0003605079308762429, "loss": 0.2485, "step": 256650 }, { "epoch": 10.63, "grad_norm": 0.91796875, "learning_rate": 0.00036049820263882857, "loss": 0.2009, "step": 256660 }, { "epoch": 10.63, "grad_norm": 1.046875, "learning_rate": 0.00036048847419346857, "loss": 0.1861, "step": 256670 }, { "epoch": 10.63, "grad_norm": 1.015625, "learning_rate": 0.0003604787455401812, "loss": 0.2411, "step": 256680 }, { "epoch": 10.63, "grad_norm": 0.404296875, "learning_rate": 0.0003604690166789847, "loss": 0.1788, "step": 256690 }, { "epoch": 10.63, "grad_norm": 0.55859375, "learning_rate": 0.0003604592876098975, "loss": 0.1882, "step": 256700 }, { "epoch": 10.63, "grad_norm": 0.91796875, "learning_rate": 0.00036044955833293787, "loss": 0.1647, "step": 256710 }, { "epoch": 10.63, "grad_norm": 0.408203125, "learning_rate": 0.00036043982884812403, "loss": 0.1545, "step": 256720 }, { "epoch": 10.63, "grad_norm": 0.828125, "learning_rate": 0.0003604300991554744, "loss": 0.18, "step": 256730 }, { "epoch": 10.63, "grad_norm": 0.306640625, "learning_rate": 0.0003604203692550073, "loss": 0.1997, "step": 256740 }, { "epoch": 10.63, "grad_norm": 1.28125, "learning_rate": 0.0003604106391467409, "loss": 0.2091, "step": 256750 }, { "epoch": 10.63, "grad_norm": 1.6171875, "learning_rate": 0.0003604009088306936, "loss": 0.1986, "step": 256760 }, { "epoch": 10.64, "grad_norm": 0.439453125, "learning_rate": 0.0003603911783068838, "loss": 0.2011, "step": 256770 }, { "epoch": 10.64, "grad_norm": 0.8125, "learning_rate": 0.00036038144757532956, "loss": 0.1704, "step": 256780 }, { "epoch": 10.64, "grad_norm": 1.203125, "learning_rate": 0.0003603717166360495, "loss": 0.2412, "step": 256790 }, { "epoch": 10.64, "grad_norm": 0.53125, "learning_rate": 0.00036036198548906174, "loss": 0.181, "step": 256800 }, { "epoch": 10.64, "grad_norm": 0.5703125, "learning_rate": 0.0003603522541343847, "loss": 0.1836, "step": 256810 }, { "epoch": 10.64, "grad_norm": 0.287109375, "learning_rate": 0.0003603425225720366, "loss": 0.2224, "step": 256820 }, { "epoch": 10.64, "grad_norm": 0.61328125, "learning_rate": 0.0003603327908020357, "loss": 0.1781, "step": 256830 }, { "epoch": 10.64, "grad_norm": 0.5859375, "learning_rate": 0.0003603230588244005, "loss": 0.1821, "step": 256840 }, { "epoch": 10.64, "grad_norm": 0.8203125, "learning_rate": 0.00036031332663914923, "loss": 0.2056, "step": 256850 }, { "epoch": 10.64, "grad_norm": 0.9609375, "learning_rate": 0.0003603035942463001, "loss": 0.1988, "step": 256860 }, { "epoch": 10.64, "grad_norm": 1.3203125, "learning_rate": 0.0003602938616458716, "loss": 0.194, "step": 256870 }, { "epoch": 10.64, "grad_norm": 0.8984375, "learning_rate": 0.0003602841288378819, "loss": 0.2167, "step": 256880 }, { "epoch": 10.64, "grad_norm": 2.21875, "learning_rate": 0.00036027439582234945, "loss": 0.2129, "step": 256890 }, { "epoch": 10.64, "grad_norm": 1.0234375, "learning_rate": 0.0003602646625992925, "loss": 0.1881, "step": 256900 }, { "epoch": 10.64, "grad_norm": 0.91796875, "learning_rate": 0.0003602549291687293, "loss": 0.2159, "step": 256910 }, { "epoch": 10.64, "grad_norm": 0.462890625, "learning_rate": 0.00036024519553067827, "loss": 0.2014, "step": 256920 }, { "epoch": 10.64, "grad_norm": 0.5234375, "learning_rate": 0.00036023546168515764, "loss": 0.2095, "step": 256930 }, { "epoch": 10.64, "grad_norm": 1.265625, "learning_rate": 0.0003602257276321858, "loss": 0.1987, "step": 256940 }, { "epoch": 10.64, "grad_norm": 1.015625, "learning_rate": 0.0003602159933717811, "loss": 0.1702, "step": 256950 }, { "epoch": 10.64, "grad_norm": 0.99609375, "learning_rate": 0.0003602062589039617, "loss": 0.207, "step": 256960 }, { "epoch": 10.64, "grad_norm": 0.75390625, "learning_rate": 0.00036019652422874604, "loss": 0.2034, "step": 256970 }, { "epoch": 10.64, "grad_norm": 0.921875, "learning_rate": 0.0003601867893461525, "loss": 0.1662, "step": 256980 }, { "epoch": 10.64, "grad_norm": 0.7578125, "learning_rate": 0.0003601770542561993, "loss": 0.2065, "step": 256990 }, { "epoch": 10.64, "grad_norm": 0.953125, "learning_rate": 0.00036016731895890475, "loss": 0.1657, "step": 257000 }, { "epoch": 10.65, "grad_norm": 0.6484375, "learning_rate": 0.0003601575834542872, "loss": 0.1415, "step": 257010 }, { "epoch": 10.65, "grad_norm": 2.171875, "learning_rate": 0.000360147847742365, "loss": 0.2074, "step": 257020 }, { "epoch": 10.65, "grad_norm": 0.48046875, "learning_rate": 0.00036013811182315645, "loss": 0.1871, "step": 257030 }, { "epoch": 10.65, "grad_norm": 0.59375, "learning_rate": 0.0003601283756966798, "loss": 0.19, "step": 257040 }, { "epoch": 10.65, "grad_norm": 0.9765625, "learning_rate": 0.00036011863936295346, "loss": 0.1919, "step": 257050 }, { "epoch": 10.65, "grad_norm": 0.51171875, "learning_rate": 0.0003601089028219958, "loss": 0.2185, "step": 257060 }, { "epoch": 10.65, "grad_norm": 0.447265625, "learning_rate": 0.000360099166073825, "loss": 0.2324, "step": 257070 }, { "epoch": 10.65, "grad_norm": 0.462890625, "learning_rate": 0.00036008942911845953, "loss": 0.1848, "step": 257080 }, { "epoch": 10.65, "grad_norm": 1.15625, "learning_rate": 0.00036007969195591766, "loss": 0.2207, "step": 257090 }, { "epoch": 10.65, "grad_norm": 0.7734375, "learning_rate": 0.0003600699545862176, "loss": 0.1628, "step": 257100 }, { "epoch": 10.65, "grad_norm": 0.451171875, "learning_rate": 0.00036006021700937785, "loss": 0.2201, "step": 257110 }, { "epoch": 10.65, "grad_norm": 0.341796875, "learning_rate": 0.00036005047922541656, "loss": 0.1913, "step": 257120 }, { "epoch": 10.65, "grad_norm": 1.515625, "learning_rate": 0.0003600407412343523, "loss": 0.1953, "step": 257130 }, { "epoch": 10.65, "grad_norm": 0.000125885009765625, "learning_rate": 0.0003600310030362032, "loss": 0.2301, "step": 257140 }, { "epoch": 10.65, "grad_norm": 1.96875, "learning_rate": 0.0003600212646309875, "loss": 0.2098, "step": 257150 }, { "epoch": 10.65, "grad_norm": 1.3828125, "learning_rate": 0.0003600115260187238, "loss": 0.1814, "step": 257160 }, { "epoch": 10.65, "grad_norm": 1.078125, "learning_rate": 0.0003600017871994303, "loss": 0.2018, "step": 257170 }, { "epoch": 10.65, "grad_norm": 1.2265625, "learning_rate": 0.0003599920481731252, "loss": 0.1796, "step": 257180 }, { "epoch": 10.65, "grad_norm": 0.365234375, "learning_rate": 0.000359982308939827, "loss": 0.1947, "step": 257190 }, { "epoch": 10.65, "grad_norm": 0.56640625, "learning_rate": 0.000359972569499554, "loss": 0.1778, "step": 257200 }, { "epoch": 10.65, "grad_norm": 0.275390625, "learning_rate": 0.00035996282985232447, "loss": 0.1817, "step": 257210 }, { "epoch": 10.65, "grad_norm": 1.1953125, "learning_rate": 0.0003599530899981568, "loss": 0.1887, "step": 257220 }, { "epoch": 10.65, "grad_norm": 1.1328125, "learning_rate": 0.0003599433499370692, "loss": 0.2397, "step": 257230 }, { "epoch": 10.65, "grad_norm": 0.828125, "learning_rate": 0.00035993360966908025, "loss": 0.157, "step": 257240 }, { "epoch": 10.66, "grad_norm": 0.92578125, "learning_rate": 0.00035992386919420804, "loss": 0.2014, "step": 257250 }, { "epoch": 10.66, "grad_norm": 1.6875, "learning_rate": 0.0003599141285124709, "loss": 0.2073, "step": 257260 }, { "epoch": 10.66, "grad_norm": 0.79296875, "learning_rate": 0.00035990438762388734, "loss": 0.1888, "step": 257270 }, { "epoch": 10.66, "grad_norm": 0.8515625, "learning_rate": 0.00035989464652847554, "loss": 0.1755, "step": 257280 }, { "epoch": 10.66, "grad_norm": 0.62109375, "learning_rate": 0.0003598849052262539, "loss": 0.1885, "step": 257290 }, { "epoch": 10.66, "grad_norm": 0.9296875, "learning_rate": 0.0003598751637172408, "loss": 0.2391, "step": 257300 }, { "epoch": 10.66, "grad_norm": 0.546875, "learning_rate": 0.00035986542200145435, "loss": 0.2033, "step": 257310 }, { "epoch": 10.66, "grad_norm": 0.71875, "learning_rate": 0.0003598556800789132, "loss": 0.2253, "step": 257320 }, { "epoch": 10.66, "grad_norm": 0.45703125, "learning_rate": 0.0003598459379496354, "loss": 0.2078, "step": 257330 }, { "epoch": 10.66, "grad_norm": 1.046875, "learning_rate": 0.0003598361956136394, "loss": 0.1616, "step": 257340 }, { "epoch": 10.66, "grad_norm": 0.306640625, "learning_rate": 0.0003598264530709437, "loss": 0.2, "step": 257350 }, { "epoch": 10.66, "grad_norm": 1.453125, "learning_rate": 0.0003598167103215664, "loss": 0.2145, "step": 257360 }, { "epoch": 10.66, "grad_norm": 1.3984375, "learning_rate": 0.00035980696736552585, "loss": 0.178, "step": 257370 }, { "epoch": 10.66, "grad_norm": 0.53125, "learning_rate": 0.0003597972242028405, "loss": 0.1908, "step": 257380 }, { "epoch": 10.66, "grad_norm": 1.0390625, "learning_rate": 0.00035978748083352857, "loss": 0.1686, "step": 257390 }, { "epoch": 10.66, "grad_norm": 0.90625, "learning_rate": 0.0003597777372576085, "loss": 0.2105, "step": 257400 }, { "epoch": 10.66, "grad_norm": 0.89453125, "learning_rate": 0.00035976799347509856, "loss": 0.1609, "step": 257410 }, { "epoch": 10.66, "grad_norm": 0.404296875, "learning_rate": 0.0003597582494860172, "loss": 0.2029, "step": 257420 }, { "epoch": 10.66, "grad_norm": 0.62109375, "learning_rate": 0.0003597485052903826, "loss": 0.1915, "step": 257430 }, { "epoch": 10.66, "grad_norm": 0.404296875, "learning_rate": 0.0003597387608882132, "loss": 0.1608, "step": 257440 }, { "epoch": 10.66, "grad_norm": 0.3828125, "learning_rate": 0.00035972901627952724, "loss": 0.1973, "step": 257450 }, { "epoch": 10.66, "grad_norm": 0.62109375, "learning_rate": 0.0003597192714643432, "loss": 0.2281, "step": 257460 }, { "epoch": 10.66, "grad_norm": 1.25, "learning_rate": 0.00035970952644267916, "loss": 0.174, "step": 257470 }, { "epoch": 10.66, "grad_norm": 1.78125, "learning_rate": 0.0003596997812145539, "loss": 0.1689, "step": 257480 }, { "epoch": 10.67, "grad_norm": 0.515625, "learning_rate": 0.00035969003577998533, "loss": 0.1767, "step": 257490 }, { "epoch": 10.67, "grad_norm": 1.1796875, "learning_rate": 0.00035968029013899196, "loss": 0.1779, "step": 257500 }, { "epoch": 10.67, "grad_norm": 0.81640625, "learning_rate": 0.00035967054429159224, "loss": 0.246, "step": 257510 }, { "epoch": 10.67, "grad_norm": 1.125, "learning_rate": 0.0003596607982378043, "loss": 0.1859, "step": 257520 }, { "epoch": 10.67, "grad_norm": 1.890625, "learning_rate": 0.0003596510519776466, "loss": 0.2171, "step": 257530 }, { "epoch": 10.67, "grad_norm": 0.66015625, "learning_rate": 0.00035964130551113745, "loss": 0.243, "step": 257540 }, { "epoch": 10.67, "grad_norm": 0.96484375, "learning_rate": 0.0003596315588382952, "loss": 0.1958, "step": 257550 }, { "epoch": 10.67, "grad_norm": 0.984375, "learning_rate": 0.00035962181195913824, "loss": 0.2066, "step": 257560 }, { "epoch": 10.67, "grad_norm": 1.71875, "learning_rate": 0.0003596120648736848, "loss": 0.211, "step": 257570 }, { "epoch": 10.67, "grad_norm": 1.484375, "learning_rate": 0.0003596023175819534, "loss": 0.2035, "step": 257580 }, { "epoch": 10.67, "grad_norm": 0.640625, "learning_rate": 0.0003595925700839622, "loss": 0.2013, "step": 257590 }, { "epoch": 10.67, "grad_norm": 1.171875, "learning_rate": 0.00035958282237972964, "loss": 0.1704, "step": 257600 }, { "epoch": 10.67, "grad_norm": 0.5625, "learning_rate": 0.00035957307446927403, "loss": 0.176, "step": 257610 }, { "epoch": 10.67, "grad_norm": 0.57421875, "learning_rate": 0.0003595633263526137, "loss": 0.1847, "step": 257620 }, { "epoch": 10.67, "grad_norm": 0.5390625, "learning_rate": 0.00035955357802976704, "loss": 0.1993, "step": 257630 }, { "epoch": 10.67, "grad_norm": 1.1171875, "learning_rate": 0.00035954382950075236, "loss": 0.1478, "step": 257640 }, { "epoch": 10.67, "grad_norm": 1.0078125, "learning_rate": 0.00035953408076558807, "loss": 0.1758, "step": 257650 }, { "epoch": 10.67, "grad_norm": 0.74609375, "learning_rate": 0.00035952433182429244, "loss": 0.193, "step": 257660 }, { "epoch": 10.67, "grad_norm": 1.2734375, "learning_rate": 0.00035951458267688386, "loss": 0.1826, "step": 257670 }, { "epoch": 10.67, "grad_norm": 0.357421875, "learning_rate": 0.0003595048333233807, "loss": 0.212, "step": 257680 }, { "epoch": 10.67, "grad_norm": 0.50390625, "learning_rate": 0.00035949508376380117, "loss": 0.1928, "step": 257690 }, { "epoch": 10.67, "grad_norm": 1.6171875, "learning_rate": 0.00035948533399816375, "loss": 0.2461, "step": 257700 }, { "epoch": 10.67, "grad_norm": 1.75, "learning_rate": 0.0003594755840264868, "loss": 0.2141, "step": 257710 }, { "epoch": 10.67, "grad_norm": 1.9140625, "learning_rate": 0.00035946583384878863, "loss": 0.257, "step": 257720 }, { "epoch": 10.68, "grad_norm": 0.83203125, "learning_rate": 0.0003594560834650875, "loss": 0.1544, "step": 257730 }, { "epoch": 10.68, "grad_norm": 0.95703125, "learning_rate": 0.0003594463328754019, "loss": 0.138, "step": 257740 }, { "epoch": 10.68, "grad_norm": 2.296875, "learning_rate": 0.0003594365820797501, "loss": 0.1966, "step": 257750 }, { "epoch": 10.68, "grad_norm": 0.98828125, "learning_rate": 0.0003594268310781505, "loss": 0.2022, "step": 257760 }, { "epoch": 10.68, "grad_norm": 0.93359375, "learning_rate": 0.0003594170798706214, "loss": 0.1815, "step": 257770 }, { "epoch": 10.68, "grad_norm": 1.1640625, "learning_rate": 0.00035940732845718117, "loss": 0.1953, "step": 257780 }, { "epoch": 10.68, "grad_norm": 0.75390625, "learning_rate": 0.0003593975768378481, "loss": 0.2058, "step": 257790 }, { "epoch": 10.68, "grad_norm": 0.90234375, "learning_rate": 0.0003593878250126408, "loss": 0.1952, "step": 257800 }, { "epoch": 10.68, "grad_norm": 1.3828125, "learning_rate": 0.00035937807298157726, "loss": 0.254, "step": 257810 }, { "epoch": 10.68, "grad_norm": 0.4609375, "learning_rate": 0.00035936832074467604, "loss": 0.1905, "step": 257820 }, { "epoch": 10.68, "grad_norm": 0.7265625, "learning_rate": 0.0003593585683019554, "loss": 0.1925, "step": 257830 }, { "epoch": 10.68, "grad_norm": 0.6640625, "learning_rate": 0.0003593488156534338, "loss": 0.1586, "step": 257840 }, { "epoch": 10.68, "grad_norm": 1.109375, "learning_rate": 0.0003593390627991295, "loss": 0.2082, "step": 257850 }, { "epoch": 10.68, "grad_norm": 0.77734375, "learning_rate": 0.000359329309739061, "loss": 0.1592, "step": 257860 }, { "epoch": 10.68, "grad_norm": 0.4140625, "learning_rate": 0.0003593195564732464, "loss": 0.2389, "step": 257870 }, { "epoch": 10.68, "grad_norm": 1.125, "learning_rate": 0.0003593098030017043, "loss": 0.1994, "step": 257880 }, { "epoch": 10.68, "grad_norm": 0.625, "learning_rate": 0.00035930004932445295, "loss": 0.165, "step": 257890 }, { "epoch": 10.68, "grad_norm": 0.765625, "learning_rate": 0.0003592902954415107, "loss": 0.2485, "step": 257900 }, { "epoch": 10.68, "grad_norm": 0.61328125, "learning_rate": 0.00035928054135289585, "loss": 0.1956, "step": 257910 }, { "epoch": 10.68, "grad_norm": 1.484375, "learning_rate": 0.00035927078705862684, "loss": 0.2026, "step": 257920 }, { "epoch": 10.68, "grad_norm": 1.2734375, "learning_rate": 0.00035926103255872204, "loss": 0.2501, "step": 257930 }, { "epoch": 10.68, "grad_norm": 2.078125, "learning_rate": 0.00035925127785319975, "loss": 0.1942, "step": 257940 }, { "epoch": 10.68, "grad_norm": 1.9140625, "learning_rate": 0.0003592415229420784, "loss": 0.2099, "step": 257950 }, { "epoch": 10.68, "grad_norm": 1.0078125, "learning_rate": 0.00035923176782537625, "loss": 0.2194, "step": 257960 }, { "epoch": 10.69, "grad_norm": 0.609375, "learning_rate": 0.0003592220125031118, "loss": 0.2178, "step": 257970 }, { "epoch": 10.69, "grad_norm": 1.6328125, "learning_rate": 0.00035921225697530326, "loss": 0.1867, "step": 257980 }, { "epoch": 10.69, "grad_norm": 1.7578125, "learning_rate": 0.000359202501241969, "loss": 0.1504, "step": 257990 }, { "epoch": 10.69, "grad_norm": 0.349609375, "learning_rate": 0.0003591927453031275, "loss": 0.2508, "step": 258000 }, { "epoch": 10.69, "grad_norm": 1.0, "learning_rate": 0.000359182989158797, "loss": 0.1695, "step": 258010 }, { "epoch": 10.69, "grad_norm": 0.400390625, "learning_rate": 0.00035917323280899594, "loss": 0.204, "step": 258020 }, { "epoch": 10.69, "grad_norm": 1.6171875, "learning_rate": 0.00035916347625374257, "loss": 0.1923, "step": 258030 }, { "epoch": 10.69, "grad_norm": 0.5546875, "learning_rate": 0.00035915371949305543, "loss": 0.2079, "step": 258040 }, { "epoch": 10.69, "grad_norm": 0.81640625, "learning_rate": 0.0003591439625269527, "loss": 0.1767, "step": 258050 }, { "epoch": 10.69, "grad_norm": 0.0002155303955078125, "learning_rate": 0.0003591342053554529, "loss": 0.2135, "step": 258060 }, { "epoch": 10.69, "grad_norm": 0.55078125, "learning_rate": 0.0003591244479785742, "loss": 0.1676, "step": 258070 }, { "epoch": 10.69, "grad_norm": 0.5078125, "learning_rate": 0.00035911469039633516, "loss": 0.2305, "step": 258080 }, { "epoch": 10.69, "grad_norm": 0.96484375, "learning_rate": 0.000359104932608754, "loss": 0.2059, "step": 258090 }, { "epoch": 10.69, "grad_norm": 1.4140625, "learning_rate": 0.00035909517461584915, "loss": 0.242, "step": 258100 }, { "epoch": 10.69, "grad_norm": 0.65625, "learning_rate": 0.000359085416417639, "loss": 0.2224, "step": 258110 }, { "epoch": 10.69, "grad_norm": 0.87890625, "learning_rate": 0.0003590756580141418, "loss": 0.204, "step": 258120 }, { "epoch": 10.69, "grad_norm": 0.74609375, "learning_rate": 0.0003590658994053762, "loss": 0.1761, "step": 258130 }, { "epoch": 10.69, "grad_norm": 0.63671875, "learning_rate": 0.0003590561405913601, "loss": 0.2137, "step": 258140 }, { "epoch": 10.69, "grad_norm": 2.015625, "learning_rate": 0.00035904638157211225, "loss": 0.2, "step": 258150 }, { "epoch": 10.69, "grad_norm": 1.1875, "learning_rate": 0.00035903662234765087, "loss": 0.1915, "step": 258160 }, { "epoch": 10.69, "grad_norm": 0.52734375, "learning_rate": 0.00035902686291799425, "loss": 0.186, "step": 258170 }, { "epoch": 10.69, "grad_norm": 0.33984375, "learning_rate": 0.00035901710328316094, "loss": 0.1823, "step": 258180 }, { "epoch": 10.69, "grad_norm": 0.92578125, "learning_rate": 0.0003590073434431692, "loss": 0.2185, "step": 258190 }, { "epoch": 10.69, "grad_norm": 1.2421875, "learning_rate": 0.00035899758339803746, "loss": 0.1748, "step": 258200 }, { "epoch": 10.7, "grad_norm": 1.484375, "learning_rate": 0.000358987823147784, "loss": 0.2116, "step": 258210 }, { "epoch": 10.7, "grad_norm": 0.64453125, "learning_rate": 0.00035897806269242724, "loss": 0.1911, "step": 258220 }, { "epoch": 10.7, "grad_norm": 1.2109375, "learning_rate": 0.0003589683020319855, "loss": 0.1875, "step": 258230 }, { "epoch": 10.7, "grad_norm": 0.83984375, "learning_rate": 0.0003589585411664772, "loss": 0.2391, "step": 258240 }, { "epoch": 10.7, "grad_norm": 0.71875, "learning_rate": 0.00035894878009592063, "loss": 0.1682, "step": 258250 }, { "epoch": 10.7, "grad_norm": 1.0, "learning_rate": 0.0003589390188203343, "loss": 0.1881, "step": 258260 }, { "epoch": 10.7, "grad_norm": 0.35546875, "learning_rate": 0.00035892925733973645, "loss": 0.2295, "step": 258270 }, { "epoch": 10.7, "grad_norm": 1.203125, "learning_rate": 0.00035891949565414557, "loss": 0.1813, "step": 258280 }, { "epoch": 10.7, "grad_norm": 0.78515625, "learning_rate": 0.00035890973376357993, "loss": 0.2374, "step": 258290 }, { "epoch": 10.7, "grad_norm": 1.0390625, "learning_rate": 0.0003588999716680579, "loss": 0.197, "step": 258300 }, { "epoch": 10.7, "grad_norm": 0.6875, "learning_rate": 0.0003588902093675979, "loss": 0.2206, "step": 258310 }, { "epoch": 10.7, "grad_norm": 0.9296875, "learning_rate": 0.00035888044686221835, "loss": 0.1969, "step": 258320 }, { "epoch": 10.7, "grad_norm": 0.443359375, "learning_rate": 0.0003588706841519375, "loss": 0.1693, "step": 258330 }, { "epoch": 10.7, "grad_norm": 0.0, "learning_rate": 0.00035886092123677373, "loss": 0.2638, "step": 258340 }, { "epoch": 10.7, "grad_norm": 0.7109375, "learning_rate": 0.00035885115811674554, "loss": 0.2179, "step": 258350 }, { "epoch": 10.7, "grad_norm": 0.404296875, "learning_rate": 0.00035884139479187117, "loss": 0.136, "step": 258360 }, { "epoch": 10.7, "grad_norm": 1.0390625, "learning_rate": 0.0003588316312621691, "loss": 0.2429, "step": 258370 }, { "epoch": 10.7, "grad_norm": 1.8203125, "learning_rate": 0.00035882186752765757, "loss": 0.1814, "step": 258380 }, { "epoch": 10.7, "grad_norm": 0.7734375, "learning_rate": 0.00035881210358835514, "loss": 0.1875, "step": 258390 }, { "epoch": 10.7, "grad_norm": 1.546875, "learning_rate": 0.00035880233944428006, "loss": 0.1714, "step": 258400 }, { "epoch": 10.7, "grad_norm": 0.96875, "learning_rate": 0.0003587925750954507, "loss": 0.201, "step": 258410 }, { "epoch": 10.7, "grad_norm": 0.58203125, "learning_rate": 0.00035878281054188544, "loss": 0.1756, "step": 258420 }, { "epoch": 10.7, "grad_norm": 0.412109375, "learning_rate": 0.0003587730457836027, "loss": 0.1894, "step": 258430 }, { "epoch": 10.7, "grad_norm": 0.89453125, "learning_rate": 0.00035876328082062083, "loss": 0.1628, "step": 258440 }, { "epoch": 10.7, "grad_norm": 0.8125, "learning_rate": 0.0003587535156529583, "loss": 0.1677, "step": 258450 }, { "epoch": 10.71, "grad_norm": 0.494140625, "learning_rate": 0.0003587437502806332, "loss": 0.2003, "step": 258460 }, { "epoch": 10.71, "grad_norm": 0.412109375, "learning_rate": 0.0003587339847036643, "loss": 0.1829, "step": 258470 }, { "epoch": 10.71, "grad_norm": 0.63671875, "learning_rate": 0.0003587242189220697, "loss": 0.1754, "step": 258480 }, { "epoch": 10.71, "grad_norm": 0.87109375, "learning_rate": 0.00035871445293586784, "loss": 0.1964, "step": 258490 }, { "epoch": 10.71, "grad_norm": 0.5078125, "learning_rate": 0.00035870468674507715, "loss": 0.2303, "step": 258500 }, { "epoch": 10.71, "grad_norm": 0.74609375, "learning_rate": 0.00035869492034971604, "loss": 0.2117, "step": 258510 }, { "epoch": 10.71, "grad_norm": 0.625, "learning_rate": 0.0003586851537498027, "loss": 0.1926, "step": 258520 }, { "epoch": 10.71, "grad_norm": 0.71484375, "learning_rate": 0.0003586753869453557, "loss": 0.1983, "step": 258530 }, { "epoch": 10.71, "grad_norm": 0.7890625, "learning_rate": 0.0003586656199363933, "loss": 0.2023, "step": 258540 }, { "epoch": 10.71, "grad_norm": 0.8046875, "learning_rate": 0.0003586558527229341, "loss": 0.1997, "step": 258550 }, { "epoch": 10.71, "grad_norm": 0.341796875, "learning_rate": 0.0003586460853049962, "loss": 0.2024, "step": 258560 }, { "epoch": 10.71, "grad_norm": 1.1953125, "learning_rate": 0.00035863631768259807, "loss": 0.1845, "step": 258570 }, { "epoch": 10.71, "grad_norm": 0.458984375, "learning_rate": 0.0003586265498557582, "loss": 0.1571, "step": 258580 }, { "epoch": 10.71, "grad_norm": 1.0859375, "learning_rate": 0.00035861678182449487, "loss": 0.1909, "step": 258590 }, { "epoch": 10.71, "grad_norm": 1.8046875, "learning_rate": 0.00035860701358882645, "loss": 0.2129, "step": 258600 }, { "epoch": 10.71, "grad_norm": 1.5390625, "learning_rate": 0.00035859724514877145, "loss": 0.1703, "step": 258610 }, { "epoch": 10.71, "grad_norm": 0.87109375, "learning_rate": 0.000358587476504348, "loss": 0.1699, "step": 258620 }, { "epoch": 10.71, "grad_norm": 0.7109375, "learning_rate": 0.0003585777076555748, "loss": 0.1551, "step": 258630 }, { "epoch": 10.71, "grad_norm": 1.078125, "learning_rate": 0.00035856793860247006, "loss": 0.2036, "step": 258640 }, { "epoch": 10.71, "grad_norm": 0.6796875, "learning_rate": 0.00035855816934505206, "loss": 0.1885, "step": 258650 }, { "epoch": 10.71, "grad_norm": 0.416015625, "learning_rate": 0.0003585483998833395, "loss": 0.1857, "step": 258660 }, { "epoch": 10.71, "grad_norm": 0.50390625, "learning_rate": 0.00035853863021735046, "loss": 0.238, "step": 258670 }, { "epoch": 10.71, "grad_norm": 0.77734375, "learning_rate": 0.0003585288603471034, "loss": 0.2137, "step": 258680 }, { "epoch": 10.71, "grad_norm": 1.0234375, "learning_rate": 0.00035851909027261687, "loss": 0.1366, "step": 258690 }, { "epoch": 10.72, "grad_norm": 0.51171875, "learning_rate": 0.00035850931999390894, "loss": 0.2072, "step": 258700 }, { "epoch": 10.72, "grad_norm": 1.3515625, "learning_rate": 0.0003584995495109984, "loss": 0.2065, "step": 258710 }, { "epoch": 10.72, "grad_norm": 1.6796875, "learning_rate": 0.00035848977882390335, "loss": 0.1774, "step": 258720 }, { "epoch": 10.72, "grad_norm": 0.443359375, "learning_rate": 0.0003584800079326421, "loss": 0.2202, "step": 258730 }, { "epoch": 10.72, "grad_norm": 0.56640625, "learning_rate": 0.0003584702368372334, "loss": 0.1622, "step": 258740 }, { "epoch": 10.72, "grad_norm": 0.671875, "learning_rate": 0.00035846046553769537, "loss": 0.161, "step": 258750 }, { "epoch": 10.72, "grad_norm": 1.3984375, "learning_rate": 0.0003584506940340464, "loss": 0.1967, "step": 258760 }, { "epoch": 10.72, "grad_norm": 0.83984375, "learning_rate": 0.000358440922326305, "loss": 0.2162, "step": 258770 }, { "epoch": 10.72, "grad_norm": 0.1826171875, "learning_rate": 0.00035843115041448944, "loss": 0.2111, "step": 258780 }, { "epoch": 10.72, "grad_norm": 0.458984375, "learning_rate": 0.0003584213782986182, "loss": 0.1709, "step": 258790 }, { "epoch": 10.72, "grad_norm": 0.68359375, "learning_rate": 0.0003584116059787096, "loss": 0.2661, "step": 258800 }, { "epoch": 10.72, "grad_norm": 0.80859375, "learning_rate": 0.00035840183345478213, "loss": 0.1834, "step": 258810 }, { "epoch": 10.72, "grad_norm": 0.81640625, "learning_rate": 0.0003583920607268541, "loss": 0.1892, "step": 258820 }, { "epoch": 10.72, "grad_norm": 1.0625, "learning_rate": 0.0003583822877949439, "loss": 0.1688, "step": 258830 }, { "epoch": 10.72, "grad_norm": 0.640625, "learning_rate": 0.0003583725146590699, "loss": 0.1835, "step": 258840 }, { "epoch": 10.72, "grad_norm": 1.34375, "learning_rate": 0.0003583627413192505, "loss": 0.1938, "step": 258850 }, { "epoch": 10.72, "grad_norm": 0.7109375, "learning_rate": 0.0003583529677755042, "loss": 0.208, "step": 258860 }, { "epoch": 10.72, "grad_norm": 1.078125, "learning_rate": 0.0003583431940278493, "loss": 0.1949, "step": 258870 }, { "epoch": 10.72, "grad_norm": 0.453125, "learning_rate": 0.0003583334200763042, "loss": 0.166, "step": 258880 }, { "epoch": 10.72, "grad_norm": 0.8828125, "learning_rate": 0.0003583236459208873, "loss": 0.2489, "step": 258890 }, { "epoch": 10.72, "grad_norm": 0.671875, "learning_rate": 0.0003583138715616171, "loss": 0.22, "step": 258900 }, { "epoch": 10.72, "grad_norm": 0.62109375, "learning_rate": 0.00035830409699851175, "loss": 0.178, "step": 258910 }, { "epoch": 10.72, "grad_norm": 0.59765625, "learning_rate": 0.0003582943222315899, "loss": 0.1766, "step": 258920 }, { "epoch": 10.72, "grad_norm": 0.671875, "learning_rate": 0.00035828454726086965, "loss": 0.2399, "step": 258930 }, { "epoch": 10.73, "grad_norm": 1.1875, "learning_rate": 0.0003582747720863697, "loss": 0.2009, "step": 258940 }, { "epoch": 10.73, "grad_norm": 0.4609375, "learning_rate": 0.00035826499670810836, "loss": 0.1599, "step": 258950 }, { "epoch": 10.73, "grad_norm": 0.83203125, "learning_rate": 0.00035825522112610395, "loss": 0.2014, "step": 258960 }, { "epoch": 10.73, "grad_norm": 0.734375, "learning_rate": 0.00035824544534037486, "loss": 0.231, "step": 258970 }, { "epoch": 10.73, "grad_norm": 0.52734375, "learning_rate": 0.00035823566935093957, "loss": 0.2099, "step": 258980 }, { "epoch": 10.73, "grad_norm": 0.5703125, "learning_rate": 0.0003582258931578164, "loss": 0.2308, "step": 258990 }, { "epoch": 10.73, "grad_norm": 0.80859375, "learning_rate": 0.0003582161167610238, "loss": 0.1903, "step": 259000 }, { "epoch": 10.73, "grad_norm": 0.76171875, "learning_rate": 0.0003582063401605802, "loss": 0.1749, "step": 259010 }, { "epoch": 10.73, "grad_norm": 0.72265625, "learning_rate": 0.000358196563356504, "loss": 0.1973, "step": 259020 }, { "epoch": 10.73, "grad_norm": 0.53515625, "learning_rate": 0.00035818678634881346, "loss": 0.1609, "step": 259030 }, { "epoch": 10.73, "grad_norm": 1.0625, "learning_rate": 0.0003581770091375271, "loss": 0.213, "step": 259040 }, { "epoch": 10.73, "grad_norm": 0.53125, "learning_rate": 0.00035816723172266325, "loss": 0.2416, "step": 259050 }, { "epoch": 10.73, "grad_norm": 0.69921875, "learning_rate": 0.0003581574541042404, "loss": 0.1792, "step": 259060 }, { "epoch": 10.73, "grad_norm": 0.84375, "learning_rate": 0.00035814767628227683, "loss": 0.1981, "step": 259070 }, { "epoch": 10.73, "grad_norm": 1.9140625, "learning_rate": 0.0003581378982567911, "loss": 0.1708, "step": 259080 }, { "epoch": 10.73, "grad_norm": 2.09375, "learning_rate": 0.0003581281200278015, "loss": 0.234, "step": 259090 }, { "epoch": 10.73, "grad_norm": 0.91796875, "learning_rate": 0.0003581183415953264, "loss": 0.2193, "step": 259100 }, { "epoch": 10.73, "grad_norm": 0.154296875, "learning_rate": 0.0003581085629593844, "loss": 0.2074, "step": 259110 }, { "epoch": 10.73, "grad_norm": 0.73046875, "learning_rate": 0.00035809878411999365, "loss": 0.19, "step": 259120 }, { "epoch": 10.73, "grad_norm": 0.88671875, "learning_rate": 0.00035808900507717264, "loss": 0.2063, "step": 259130 }, { "epoch": 10.73, "grad_norm": 0.875, "learning_rate": 0.0003580792258309398, "loss": 0.1788, "step": 259140 }, { "epoch": 10.73, "grad_norm": 0.8125, "learning_rate": 0.00035806944638131356, "loss": 0.1825, "step": 259150 }, { "epoch": 10.73, "grad_norm": 0.83984375, "learning_rate": 0.0003580596667283123, "loss": 0.1576, "step": 259160 }, { "epoch": 10.73, "grad_norm": 1.4296875, "learning_rate": 0.0003580498868719544, "loss": 0.2106, "step": 259170 }, { "epoch": 10.74, "grad_norm": 1.1328125, "learning_rate": 0.0003580401068122583, "loss": 0.2237, "step": 259180 }, { "epoch": 10.74, "grad_norm": 0.546875, "learning_rate": 0.0003580303265492424, "loss": 0.2314, "step": 259190 }, { "epoch": 10.74, "grad_norm": 0.78125, "learning_rate": 0.00035802054608292505, "loss": 0.1887, "step": 259200 }, { "epoch": 10.74, "grad_norm": 0.7890625, "learning_rate": 0.00035801076541332465, "loss": 0.1515, "step": 259210 }, { "epoch": 10.74, "grad_norm": 1.078125, "learning_rate": 0.00035800098454045977, "loss": 0.1635, "step": 259220 }, { "epoch": 10.74, "grad_norm": 0.55859375, "learning_rate": 0.0003579912034643486, "loss": 0.2011, "step": 259230 }, { "epoch": 10.74, "grad_norm": 0.48046875, "learning_rate": 0.0003579814221850097, "loss": 0.2302, "step": 259240 }, { "epoch": 10.74, "grad_norm": 0.62109375, "learning_rate": 0.00035797164070246147, "loss": 0.1751, "step": 259250 }, { "epoch": 10.74, "grad_norm": 1.453125, "learning_rate": 0.0003579618590167222, "loss": 0.2412, "step": 259260 }, { "epoch": 10.74, "grad_norm": 0.75390625, "learning_rate": 0.00035795207712781035, "loss": 0.1881, "step": 259270 }, { "epoch": 10.74, "grad_norm": 0.79296875, "learning_rate": 0.00035794229503574446, "loss": 0.205, "step": 259280 }, { "epoch": 10.74, "grad_norm": 0.65234375, "learning_rate": 0.0003579325127405427, "loss": 0.2229, "step": 259290 }, { "epoch": 10.74, "grad_norm": 0.447265625, "learning_rate": 0.00035792273024222367, "loss": 0.1465, "step": 259300 }, { "epoch": 10.74, "grad_norm": 0.77734375, "learning_rate": 0.00035791294754080574, "loss": 0.2113, "step": 259310 }, { "epoch": 10.74, "grad_norm": 1.171875, "learning_rate": 0.00035790316463630726, "loss": 0.1964, "step": 259320 }, { "epoch": 10.74, "grad_norm": 0.5546875, "learning_rate": 0.00035789338152874666, "loss": 0.2328, "step": 259330 }, { "epoch": 10.74, "grad_norm": 0.1826171875, "learning_rate": 0.0003578835982181424, "loss": 0.161, "step": 259340 }, { "epoch": 10.74, "grad_norm": 0.859375, "learning_rate": 0.00035787381470451287, "loss": 0.1918, "step": 259350 }, { "epoch": 10.74, "grad_norm": 0.5625, "learning_rate": 0.00035786403098787644, "loss": 0.1817, "step": 259360 }, { "epoch": 10.74, "grad_norm": 0.1884765625, "learning_rate": 0.00035785424706825155, "loss": 0.2072, "step": 259370 }, { "epoch": 10.74, "grad_norm": 0.5859375, "learning_rate": 0.00035784446294565665, "loss": 0.1816, "step": 259380 }, { "epoch": 10.74, "grad_norm": 1.1171875, "learning_rate": 0.00035783467862011005, "loss": 0.236, "step": 259390 }, { "epoch": 10.74, "grad_norm": 1.0703125, "learning_rate": 0.00035782489409163034, "loss": 0.1462, "step": 259400 }, { "epoch": 10.74, "grad_norm": 0.83203125, "learning_rate": 0.0003578151093602357, "loss": 0.2145, "step": 259410 }, { "epoch": 10.75, "grad_norm": 0.9296875, "learning_rate": 0.0003578053244259447, "loss": 0.161, "step": 259420 }, { "epoch": 10.75, "grad_norm": 0.81640625, "learning_rate": 0.00035779553928877574, "loss": 0.1677, "step": 259430 }, { "epoch": 10.75, "grad_norm": 1.15625, "learning_rate": 0.0003577857539487473, "loss": 0.2272, "step": 259440 }, { "epoch": 10.75, "grad_norm": 0.34375, "learning_rate": 0.0003577759684058776, "loss": 0.1853, "step": 259450 }, { "epoch": 10.75, "grad_norm": 0.89453125, "learning_rate": 0.0003577661826601852, "loss": 0.1978, "step": 259460 }, { "epoch": 10.75, "grad_norm": 1.09375, "learning_rate": 0.0003577563967116885, "loss": 0.1928, "step": 259470 }, { "epoch": 10.75, "grad_norm": 0.9296875, "learning_rate": 0.0003577466105604058, "loss": 0.1832, "step": 259480 }, { "epoch": 10.75, "grad_norm": 0.59375, "learning_rate": 0.0003577368242063557, "loss": 0.1421, "step": 259490 }, { "epoch": 10.75, "grad_norm": 0.44140625, "learning_rate": 0.00035772703764955655, "loss": 0.1777, "step": 259500 }, { "epoch": 10.75, "grad_norm": 2.125, "learning_rate": 0.0003577172508900267, "loss": 0.2175, "step": 259510 }, { "epoch": 10.75, "grad_norm": 0.8203125, "learning_rate": 0.00035770746392778467, "loss": 0.2408, "step": 259520 }, { "epoch": 10.75, "grad_norm": 1.015625, "learning_rate": 0.0003576976767628488, "loss": 0.1737, "step": 259530 }, { "epoch": 10.75, "grad_norm": 0.91796875, "learning_rate": 0.0003576878893952375, "loss": 0.2085, "step": 259540 }, { "epoch": 10.75, "grad_norm": 1.2578125, "learning_rate": 0.00035767810182496925, "loss": 0.2276, "step": 259550 }, { "epoch": 10.75, "grad_norm": 1.5625, "learning_rate": 0.0003576683140520624, "loss": 0.1922, "step": 259560 }, { "epoch": 10.75, "grad_norm": 0.71875, "learning_rate": 0.0003576585260765355, "loss": 0.208, "step": 259570 }, { "epoch": 10.75, "grad_norm": 0.91015625, "learning_rate": 0.0003576487378984068, "loss": 0.2487, "step": 259580 }, { "epoch": 10.75, "grad_norm": 1.1640625, "learning_rate": 0.0003576389495176948, "loss": 0.1722, "step": 259590 }, { "epoch": 10.75, "grad_norm": 0.59765625, "learning_rate": 0.000357629160934418, "loss": 0.2234, "step": 259600 }, { "epoch": 10.75, "grad_norm": 0.2294921875, "learning_rate": 0.00035761937214859465, "loss": 0.1987, "step": 259610 }, { "epoch": 10.75, "grad_norm": 0.74609375, "learning_rate": 0.00035760958316024335, "loss": 0.2072, "step": 259620 }, { "epoch": 10.75, "grad_norm": 0.71484375, "learning_rate": 0.0003575997939693824, "loss": 0.232, "step": 259630 }, { "epoch": 10.75, "grad_norm": 0.9609375, "learning_rate": 0.00035759000457603017, "loss": 0.2069, "step": 259640 }, { "epoch": 10.75, "grad_norm": 0.859375, "learning_rate": 0.0003575802149802053, "loss": 0.2167, "step": 259650 }, { "epoch": 10.76, "grad_norm": 0.6484375, "learning_rate": 0.00035757042518192605, "loss": 0.1851, "step": 259660 }, { "epoch": 10.76, "grad_norm": 1.1640625, "learning_rate": 0.00035756063518121084, "loss": 0.1533, "step": 259670 }, { "epoch": 10.76, "grad_norm": 1.109375, "learning_rate": 0.00035755084497807817, "loss": 0.1924, "step": 259680 }, { "epoch": 10.76, "grad_norm": 0.67578125, "learning_rate": 0.0003575410545725463, "loss": 0.2126, "step": 259690 }, { "epoch": 10.76, "grad_norm": 0.7421875, "learning_rate": 0.00035753126396463397, "loss": 0.1978, "step": 259700 }, { "epoch": 10.76, "grad_norm": 1.25, "learning_rate": 0.0003575214731543593, "loss": 0.2135, "step": 259710 }, { "epoch": 10.76, "grad_norm": 0.040283203125, "learning_rate": 0.00035751168214174085, "loss": 0.2195, "step": 259720 }, { "epoch": 10.76, "grad_norm": 0.6484375, "learning_rate": 0.0003575018909267971, "loss": 0.1847, "step": 259730 }, { "epoch": 10.76, "grad_norm": 0.5078125, "learning_rate": 0.00035749209950954627, "loss": 0.1877, "step": 259740 }, { "epoch": 10.76, "grad_norm": 1.1328125, "learning_rate": 0.00035748230789000695, "loss": 0.2095, "step": 259750 }, { "epoch": 10.76, "grad_norm": 0.62890625, "learning_rate": 0.00035747251606819757, "loss": 0.1954, "step": 259760 }, { "epoch": 10.76, "grad_norm": 0.51171875, "learning_rate": 0.0003574627240441365, "loss": 0.2185, "step": 259770 }, { "epoch": 10.76, "grad_norm": 0.875, "learning_rate": 0.0003574529318178422, "loss": 0.2041, "step": 259780 }, { "epoch": 10.76, "grad_norm": 0.88671875, "learning_rate": 0.00035744313938933307, "loss": 0.1824, "step": 259790 }, { "epoch": 10.76, "grad_norm": 0.6640625, "learning_rate": 0.00035743334675862755, "loss": 0.1576, "step": 259800 }, { "epoch": 10.76, "grad_norm": 0.60546875, "learning_rate": 0.00035742355392574414, "loss": 0.1817, "step": 259810 }, { "epoch": 10.76, "grad_norm": 0.38671875, "learning_rate": 0.0003574137608907011, "loss": 0.1678, "step": 259820 }, { "epoch": 10.76, "grad_norm": 0.421875, "learning_rate": 0.000357403967653517, "loss": 0.205, "step": 259830 }, { "epoch": 10.76, "grad_norm": 0.859375, "learning_rate": 0.0003573941742142103, "loss": 0.2116, "step": 259840 }, { "epoch": 10.76, "grad_norm": 0.75390625, "learning_rate": 0.0003573843805727993, "loss": 0.1852, "step": 259850 }, { "epoch": 10.76, "grad_norm": 1.5546875, "learning_rate": 0.0003573745867293025, "loss": 0.2193, "step": 259860 }, { "epoch": 10.76, "grad_norm": 0.5234375, "learning_rate": 0.00035736479268373835, "loss": 0.1792, "step": 259870 }, { "epoch": 10.76, "grad_norm": 1.1640625, "learning_rate": 0.00035735499843612517, "loss": 0.2401, "step": 259880 }, { "epoch": 10.76, "grad_norm": 0.56640625, "learning_rate": 0.0003573452039864816, "loss": 0.1943, "step": 259890 }, { "epoch": 10.77, "grad_norm": 0.609375, "learning_rate": 0.00035733540933482577, "loss": 0.1836, "step": 259900 }, { "epoch": 10.77, "grad_norm": 0.6953125, "learning_rate": 0.0003573256144811765, "loss": 0.1773, "step": 259910 }, { "epoch": 10.77, "grad_norm": 0.76953125, "learning_rate": 0.0003573158194255519, "loss": 0.1782, "step": 259920 }, { "epoch": 10.77, "grad_norm": 1.515625, "learning_rate": 0.0003573060241679705, "loss": 0.2471, "step": 259930 }, { "epoch": 10.77, "grad_norm": 0.4609375, "learning_rate": 0.0003572962287084508, "loss": 0.1775, "step": 259940 }, { "epoch": 10.77, "grad_norm": 0.95703125, "learning_rate": 0.0003572864330470112, "loss": 0.1661, "step": 259950 }, { "epoch": 10.77, "grad_norm": 0.81640625, "learning_rate": 0.00035727663718367004, "loss": 0.1463, "step": 259960 }, { "epoch": 10.77, "grad_norm": 0.94921875, "learning_rate": 0.0003572668411184459, "loss": 0.154, "step": 259970 }, { "epoch": 10.77, "grad_norm": 2.234375, "learning_rate": 0.00035725704485135715, "loss": 0.1869, "step": 259980 }, { "epoch": 10.77, "grad_norm": 0.67578125, "learning_rate": 0.00035724724838242215, "loss": 0.1788, "step": 259990 }, { "epoch": 10.77, "grad_norm": 1.8125, "learning_rate": 0.00035723745171165955, "loss": 0.25, "step": 260000 }, { "epoch": 10.77, "grad_norm": 1.8046875, "learning_rate": 0.00035722765483908747, "loss": 0.2001, "step": 260010 }, { "epoch": 10.77, "grad_norm": 0.4375, "learning_rate": 0.0003572178577647247, "loss": 0.1739, "step": 260020 }, { "epoch": 10.77, "grad_norm": 0.8125, "learning_rate": 0.00035720806048858937, "loss": 0.1759, "step": 260030 }, { "epoch": 10.77, "grad_norm": 0.4765625, "learning_rate": 0.00035719826301070006, "loss": 0.2089, "step": 260040 }, { "epoch": 10.77, "grad_norm": 0.5859375, "learning_rate": 0.0003571884653310753, "loss": 0.2441, "step": 260050 }, { "epoch": 10.77, "grad_norm": 0.75390625, "learning_rate": 0.0003571786674497333, "loss": 0.1813, "step": 260060 }, { "epoch": 10.77, "grad_norm": 0.47265625, "learning_rate": 0.00035716886936669267, "loss": 0.1876, "step": 260070 }, { "epoch": 10.77, "grad_norm": 0.37109375, "learning_rate": 0.00035715907108197187, "loss": 0.1906, "step": 260080 }, { "epoch": 10.77, "grad_norm": 0.26953125, "learning_rate": 0.00035714927259558913, "loss": 0.1401, "step": 260090 }, { "epoch": 10.77, "grad_norm": 0.5234375, "learning_rate": 0.00035713947390756317, "loss": 0.1933, "step": 260100 }, { "epoch": 10.77, "grad_norm": 0.80859375, "learning_rate": 0.0003571296750179122, "loss": 0.2252, "step": 260110 }, { "epoch": 10.77, "grad_norm": 0.43359375, "learning_rate": 0.00035711987592665476, "loss": 0.2197, "step": 260120 }, { "epoch": 10.77, "grad_norm": 2.125, "learning_rate": 0.0003571100766338093, "loss": 0.2464, "step": 260130 }, { "epoch": 10.77, "grad_norm": 0.53515625, "learning_rate": 0.0003571002771393942, "loss": 0.2222, "step": 260140 }, { "epoch": 10.78, "grad_norm": 0.63671875, "learning_rate": 0.00035709047744342807, "loss": 0.1751, "step": 260150 }, { "epoch": 10.78, "grad_norm": 0.302734375, "learning_rate": 0.00035708067754592916, "loss": 0.1591, "step": 260160 }, { "epoch": 10.78, "grad_norm": 0.984375, "learning_rate": 0.00035707087744691593, "loss": 0.2126, "step": 260170 }, { "epoch": 10.78, "grad_norm": 1.4375, "learning_rate": 0.00035706107714640687, "loss": 0.2097, "step": 260180 }, { "epoch": 10.78, "grad_norm": 0.7265625, "learning_rate": 0.0003570512766444205, "loss": 0.2106, "step": 260190 }, { "epoch": 10.78, "grad_norm": 1.09375, "learning_rate": 0.0003570414759409751, "loss": 0.1931, "step": 260200 }, { "epoch": 10.78, "grad_norm": 0.5, "learning_rate": 0.00035703167503608926, "loss": 0.1809, "step": 260210 }, { "epoch": 10.78, "grad_norm": 0.42578125, "learning_rate": 0.0003570218739297813, "loss": 0.214, "step": 260220 }, { "epoch": 10.78, "grad_norm": 1.7890625, "learning_rate": 0.0003570120726220698, "loss": 0.261, "step": 260230 }, { "epoch": 10.78, "grad_norm": 0.42578125, "learning_rate": 0.00035700227111297314, "loss": 0.2616, "step": 260240 }, { "epoch": 10.78, "grad_norm": 0.484375, "learning_rate": 0.00035699246940250974, "loss": 0.1699, "step": 260250 }, { "epoch": 10.78, "grad_norm": 0.70703125, "learning_rate": 0.00035698266749069807, "loss": 0.1871, "step": 260260 }, { "epoch": 10.78, "grad_norm": 0.72265625, "learning_rate": 0.00035697286537755656, "loss": 0.1762, "step": 260270 }, { "epoch": 10.78, "grad_norm": 0.609375, "learning_rate": 0.00035696306306310364, "loss": 0.1511, "step": 260280 }, { "epoch": 10.78, "grad_norm": 0.98828125, "learning_rate": 0.0003569532605473579, "loss": 0.222, "step": 260290 }, { "epoch": 10.78, "grad_norm": 1.6640625, "learning_rate": 0.0003569434578303375, "loss": 0.2232, "step": 260300 }, { "epoch": 10.78, "grad_norm": 1.2578125, "learning_rate": 0.0003569336549120612, "loss": 0.1871, "step": 260310 }, { "epoch": 10.78, "grad_norm": 0.95703125, "learning_rate": 0.0003569238517925473, "loss": 0.1917, "step": 260320 }, { "epoch": 10.78, "grad_norm": 0.52734375, "learning_rate": 0.0003569140484718142, "loss": 0.1814, "step": 260330 }, { "epoch": 10.78, "grad_norm": 0.412109375, "learning_rate": 0.00035690424494988043, "loss": 0.2387, "step": 260340 }, { "epoch": 10.78, "grad_norm": 0.68359375, "learning_rate": 0.0003568944412267644, "loss": 0.1954, "step": 260350 }, { "epoch": 10.78, "grad_norm": 0.8515625, "learning_rate": 0.0003568846373024846, "loss": 0.2087, "step": 260360 }, { "epoch": 10.78, "grad_norm": 0.62109375, "learning_rate": 0.00035687483317705943, "loss": 0.1925, "step": 260370 }, { "epoch": 10.78, "grad_norm": 2.0625, "learning_rate": 0.00035686502885050737, "loss": 0.2355, "step": 260380 }, { "epoch": 10.79, "grad_norm": 1.03125, "learning_rate": 0.00035685522432284697, "loss": 0.1991, "step": 260390 }, { "epoch": 10.79, "grad_norm": 0.6953125, "learning_rate": 0.00035684541959409645, "loss": 0.2049, "step": 260400 }, { "epoch": 10.79, "grad_norm": 1.1328125, "learning_rate": 0.0003568356146642744, "loss": 0.2122, "step": 260410 }, { "epoch": 10.79, "grad_norm": 0.671875, "learning_rate": 0.0003568258095333993, "loss": 0.1755, "step": 260420 }, { "epoch": 10.79, "grad_norm": 0.69140625, "learning_rate": 0.00035681600420148956, "loss": 0.2051, "step": 260430 }, { "epoch": 10.79, "grad_norm": 0.435546875, "learning_rate": 0.00035680619866856363, "loss": 0.2074, "step": 260440 }, { "epoch": 10.79, "grad_norm": 0.83203125, "learning_rate": 0.00035679639293463995, "loss": 0.2045, "step": 260450 }, { "epoch": 10.79, "grad_norm": 0.177734375, "learning_rate": 0.000356786586999737, "loss": 0.2425, "step": 260460 }, { "epoch": 10.79, "grad_norm": 0.7890625, "learning_rate": 0.00035677678086387324, "loss": 0.2227, "step": 260470 }, { "epoch": 10.79, "grad_norm": 0.80078125, "learning_rate": 0.00035676697452706707, "loss": 0.2088, "step": 260480 }, { "epoch": 10.79, "grad_norm": 0.6484375, "learning_rate": 0.000356757167989337, "loss": 0.218, "step": 260490 }, { "epoch": 10.79, "grad_norm": 1.0078125, "learning_rate": 0.00035674736125070153, "loss": 0.2023, "step": 260500 }, { "epoch": 10.79, "grad_norm": 0.796875, "learning_rate": 0.00035673755431117897, "loss": 0.1757, "step": 260510 }, { "epoch": 10.79, "grad_norm": 0.921875, "learning_rate": 0.00035672774717078796, "loss": 0.2419, "step": 260520 }, { "epoch": 10.79, "grad_norm": 0.765625, "learning_rate": 0.0003567179398295467, "loss": 0.2811, "step": 260530 }, { "epoch": 10.79, "grad_norm": 0.64453125, "learning_rate": 0.0003567081322874739, "loss": 0.2336, "step": 260540 }, { "epoch": 10.79, "grad_norm": 0.466796875, "learning_rate": 0.00035669832454458795, "loss": 0.1939, "step": 260550 }, { "epoch": 10.79, "grad_norm": 0.43359375, "learning_rate": 0.0003566885166009072, "loss": 0.2066, "step": 260560 }, { "epoch": 10.79, "grad_norm": 0.62890625, "learning_rate": 0.0003566787084564502, "loss": 0.1703, "step": 260570 }, { "epoch": 10.79, "grad_norm": 1.8671875, "learning_rate": 0.0003566689001112354, "loss": 0.1764, "step": 260580 }, { "epoch": 10.79, "grad_norm": 0.61328125, "learning_rate": 0.0003566590915652813, "loss": 0.2101, "step": 260590 }, { "epoch": 10.79, "grad_norm": 1.0, "learning_rate": 0.00035664928281860627, "loss": 0.1952, "step": 260600 }, { "epoch": 10.79, "grad_norm": 1.4609375, "learning_rate": 0.0003566394738712288, "loss": 0.2338, "step": 260610 }, { "epoch": 10.79, "grad_norm": 1.1015625, "learning_rate": 0.0003566296647231673, "loss": 0.1932, "step": 260620 }, { "epoch": 10.8, "grad_norm": 0.74609375, "learning_rate": 0.0003566198553744404, "loss": 0.1862, "step": 260630 }, { "epoch": 10.8, "grad_norm": 0.625, "learning_rate": 0.0003566100458250664, "loss": 0.2035, "step": 260640 }, { "epoch": 10.8, "grad_norm": 0.8046875, "learning_rate": 0.00035660023607506373, "loss": 0.1998, "step": 260650 }, { "epoch": 10.8, "grad_norm": 0.734375, "learning_rate": 0.00035659042612445105, "loss": 0.2015, "step": 260660 }, { "epoch": 10.8, "grad_norm": 0.59765625, "learning_rate": 0.00035658061597324664, "loss": 0.2312, "step": 260670 }, { "epoch": 10.8, "grad_norm": 0.640625, "learning_rate": 0.00035657080562146907, "loss": 0.186, "step": 260680 }, { "epoch": 10.8, "grad_norm": 0.7421875, "learning_rate": 0.00035656099506913664, "loss": 0.1958, "step": 260690 }, { "epoch": 10.8, "grad_norm": 0.796875, "learning_rate": 0.00035655118431626796, "loss": 0.2276, "step": 260700 }, { "epoch": 10.8, "grad_norm": 0.8125, "learning_rate": 0.00035654137336288156, "loss": 0.1754, "step": 260710 }, { "epoch": 10.8, "grad_norm": 0.703125, "learning_rate": 0.00035653156220899577, "loss": 0.2182, "step": 260720 }, { "epoch": 10.8, "grad_norm": 1.8515625, "learning_rate": 0.000356521750854629, "loss": 0.1841, "step": 260730 }, { "epoch": 10.8, "grad_norm": 1.40625, "learning_rate": 0.0003565119392997998, "loss": 0.1792, "step": 260740 }, { "epoch": 10.8, "grad_norm": 0.64453125, "learning_rate": 0.0003565021275445267, "loss": 0.1608, "step": 260750 }, { "epoch": 10.8, "grad_norm": 0.62890625, "learning_rate": 0.0003564923155888281, "loss": 0.1716, "step": 260760 }, { "epoch": 10.8, "grad_norm": 0.3828125, "learning_rate": 0.0003564825034327224, "loss": 0.2174, "step": 260770 }, { "epoch": 10.8, "grad_norm": 0.515625, "learning_rate": 0.00035647269107622813, "loss": 0.2333, "step": 260780 }, { "epoch": 10.8, "grad_norm": 0.9140625, "learning_rate": 0.0003564628785193638, "loss": 0.1792, "step": 260790 }, { "epoch": 10.8, "grad_norm": 0.435546875, "learning_rate": 0.00035645306576214785, "loss": 0.1786, "step": 260800 }, { "epoch": 10.8, "grad_norm": 0.376953125, "learning_rate": 0.00035644325280459867, "loss": 0.2126, "step": 260810 }, { "epoch": 10.8, "grad_norm": 0.55859375, "learning_rate": 0.0003564334396467348, "loss": 0.2285, "step": 260820 }, { "epoch": 10.8, "grad_norm": 1.3046875, "learning_rate": 0.00035642362628857474, "loss": 0.1741, "step": 260830 }, { "epoch": 10.8, "grad_norm": 1.671875, "learning_rate": 0.00035641381273013686, "loss": 0.1982, "step": 260840 }, { "epoch": 10.8, "grad_norm": 0.9921875, "learning_rate": 0.0003564039989714397, "loss": 0.2082, "step": 260850 }, { "epoch": 10.8, "grad_norm": 1.0078125, "learning_rate": 0.00035639418501250163, "loss": 0.1766, "step": 260860 }, { "epoch": 10.81, "grad_norm": 0.0, "learning_rate": 0.00035638437085334126, "loss": 0.2151, "step": 260870 }, { "epoch": 10.81, "grad_norm": 0.7109375, "learning_rate": 0.000356374556493977, "loss": 0.2125, "step": 260880 }, { "epoch": 10.81, "grad_norm": 1.4921875, "learning_rate": 0.00035636474193442727, "loss": 0.164, "step": 260890 }, { "epoch": 10.81, "grad_norm": 0.4609375, "learning_rate": 0.0003563549271747106, "loss": 0.193, "step": 260900 }, { "epoch": 10.81, "grad_norm": 0.14453125, "learning_rate": 0.0003563451122148454, "loss": 0.1574, "step": 260910 }, { "epoch": 10.81, "grad_norm": 0.84765625, "learning_rate": 0.0003563352970548503, "loss": 0.1908, "step": 260920 }, { "epoch": 10.81, "grad_norm": 0.6328125, "learning_rate": 0.00035632548169474356, "loss": 0.2219, "step": 260930 }, { "epoch": 10.81, "grad_norm": 0.8203125, "learning_rate": 0.00035631566613454374, "loss": 0.2224, "step": 260940 }, { "epoch": 10.81, "grad_norm": 1.0390625, "learning_rate": 0.0003563058503742694, "loss": 0.1844, "step": 260950 }, { "epoch": 10.81, "grad_norm": 0.88671875, "learning_rate": 0.00035629603441393885, "loss": 0.1624, "step": 260960 }, { "epoch": 10.81, "grad_norm": 0.92578125, "learning_rate": 0.00035628621825357064, "loss": 0.1859, "step": 260970 }, { "epoch": 10.81, "grad_norm": 0.71484375, "learning_rate": 0.0003562764018931833, "loss": 0.2146, "step": 260980 }, { "epoch": 10.81, "grad_norm": 0.5078125, "learning_rate": 0.0003562665853327952, "loss": 0.1684, "step": 260990 }, { "epoch": 10.81, "grad_norm": 0.62890625, "learning_rate": 0.0003562567685724249, "loss": 0.1461, "step": 261000 }, { "epoch": 10.81, "grad_norm": 0.76953125, "learning_rate": 0.0003562469516120909, "loss": 0.1744, "step": 261010 }, { "epoch": 10.81, "grad_norm": 0.640625, "learning_rate": 0.00035623713445181147, "loss": 0.2038, "step": 261020 }, { "epoch": 10.81, "grad_norm": 1.8515625, "learning_rate": 0.0003562273170916053, "loss": 0.2084, "step": 261030 }, { "epoch": 10.81, "grad_norm": 1.5625, "learning_rate": 0.0003562174995314908, "loss": 0.1784, "step": 261040 }, { "epoch": 10.81, "grad_norm": 1.4765625, "learning_rate": 0.00035620768177148637, "loss": 0.227, "step": 261050 }, { "epoch": 10.81, "grad_norm": 1.0078125, "learning_rate": 0.00035619786381161056, "loss": 0.2013, "step": 261060 }, { "epoch": 10.81, "grad_norm": 0.69140625, "learning_rate": 0.0003561880456518819, "loss": 0.2074, "step": 261070 }, { "epoch": 10.81, "grad_norm": 1.21875, "learning_rate": 0.0003561782272923188, "loss": 0.1949, "step": 261080 }, { "epoch": 10.81, "grad_norm": 0.62890625, "learning_rate": 0.00035616840873293977, "loss": 0.2217, "step": 261090 }, { "epoch": 10.81, "grad_norm": 1.0, "learning_rate": 0.0003561585899737632, "loss": 0.1578, "step": 261100 }, { "epoch": 10.82, "grad_norm": 0.52734375, "learning_rate": 0.0003561487710148076, "loss": 0.1927, "step": 261110 }, { "epoch": 10.82, "grad_norm": 0.5390625, "learning_rate": 0.0003561389518560916, "loss": 0.1864, "step": 261120 }, { "epoch": 10.82, "grad_norm": 0.91796875, "learning_rate": 0.00035612913249763337, "loss": 0.2089, "step": 261130 }, { "epoch": 10.82, "grad_norm": 0.58984375, "learning_rate": 0.0003561193129394517, "loss": 0.2155, "step": 261140 }, { "epoch": 10.82, "grad_norm": 0.890625, "learning_rate": 0.00035610949318156494, "loss": 0.2076, "step": 261150 }, { "epoch": 10.82, "grad_norm": 0.5546875, "learning_rate": 0.0003560996732239915, "loss": 0.2634, "step": 261160 }, { "epoch": 10.82, "grad_norm": 0.9921875, "learning_rate": 0.00035608985306675, "loss": 0.248, "step": 261170 }, { "epoch": 10.82, "grad_norm": 0.75, "learning_rate": 0.00035608003270985885, "loss": 0.1652, "step": 261180 }, { "epoch": 10.82, "grad_norm": 0.416015625, "learning_rate": 0.0003560702121533365, "loss": 0.2027, "step": 261190 }, { "epoch": 10.82, "grad_norm": 0.81640625, "learning_rate": 0.00035606039139720154, "loss": 0.2173, "step": 261200 }, { "epoch": 10.82, "grad_norm": 0.439453125, "learning_rate": 0.0003560505704414723, "loss": 0.2233, "step": 261210 }, { "epoch": 10.82, "grad_norm": 1.1796875, "learning_rate": 0.00035604074928616736, "loss": 0.2106, "step": 261220 }, { "epoch": 10.82, "grad_norm": 1.9375, "learning_rate": 0.00035603092793130516, "loss": 0.1523, "step": 261230 }, { "epoch": 10.82, "grad_norm": 1.3203125, "learning_rate": 0.00035602110637690423, "loss": 0.2644, "step": 261240 }, { "epoch": 10.82, "grad_norm": 0.97265625, "learning_rate": 0.00035601128462298305, "loss": 0.2182, "step": 261250 }, { "epoch": 10.82, "grad_norm": 0.75390625, "learning_rate": 0.00035600146266956, "loss": 0.1948, "step": 261260 }, { "epoch": 10.82, "grad_norm": 0.6640625, "learning_rate": 0.0003559916405166537, "loss": 0.186, "step": 261270 }, { "epoch": 10.82, "grad_norm": 0.98046875, "learning_rate": 0.00035598181816428264, "loss": 0.1689, "step": 261280 }, { "epoch": 10.82, "grad_norm": 1.6171875, "learning_rate": 0.00035597199561246515, "loss": 0.2338, "step": 261290 }, { "epoch": 10.82, "grad_norm": 0.435546875, "learning_rate": 0.00035596217286121984, "loss": 0.1735, "step": 261300 }, { "epoch": 10.82, "grad_norm": 3.0625, "learning_rate": 0.00035595234991056513, "loss": 0.2047, "step": 261310 }, { "epoch": 10.82, "grad_norm": 0.73828125, "learning_rate": 0.0003559425267605196, "loss": 0.2175, "step": 261320 }, { "epoch": 10.82, "grad_norm": 0.6875, "learning_rate": 0.00035593270341110166, "loss": 0.1758, "step": 261330 }, { "epoch": 10.82, "grad_norm": 1.5078125, "learning_rate": 0.0003559228798623298, "loss": 0.174, "step": 261340 }, { "epoch": 10.83, "grad_norm": 1.125, "learning_rate": 0.00035591305611422255, "loss": 0.2279, "step": 261350 }, { "epoch": 10.83, "grad_norm": 0.73046875, "learning_rate": 0.00035590323216679834, "loss": 0.1803, "step": 261360 }, { "epoch": 10.83, "grad_norm": 0.7734375, "learning_rate": 0.00035589340802007566, "loss": 0.2274, "step": 261370 }, { "epoch": 10.83, "grad_norm": 0.69140625, "learning_rate": 0.00035588358367407303, "loss": 0.2024, "step": 261380 }, { "epoch": 10.83, "grad_norm": 0.81640625, "learning_rate": 0.00035587375912880895, "loss": 0.2136, "step": 261390 }, { "epoch": 10.83, "grad_norm": 0.96875, "learning_rate": 0.00035586393438430186, "loss": 0.1906, "step": 261400 }, { "epoch": 10.83, "grad_norm": 0.447265625, "learning_rate": 0.00035585410944057033, "loss": 0.2076, "step": 261410 }, { "epoch": 10.83, "grad_norm": 0.76953125, "learning_rate": 0.0003558442842976327, "loss": 0.1867, "step": 261420 }, { "epoch": 10.83, "grad_norm": 0.75390625, "learning_rate": 0.0003558344589555076, "loss": 0.1932, "step": 261430 }, { "epoch": 10.83, "grad_norm": 0.63671875, "learning_rate": 0.0003558246334142136, "loss": 0.223, "step": 261440 }, { "epoch": 10.83, "grad_norm": 1.2734375, "learning_rate": 0.0003558148076737689, "loss": 0.2029, "step": 261450 }, { "epoch": 10.83, "grad_norm": 0.86328125, "learning_rate": 0.00035580498173419224, "loss": 0.1951, "step": 261460 }, { "epoch": 10.83, "grad_norm": 0.34375, "learning_rate": 0.00035579515559550204, "loss": 0.2117, "step": 261470 }, { "epoch": 10.83, "grad_norm": 1.4765625, "learning_rate": 0.00035578532925771675, "loss": 0.1609, "step": 261480 }, { "epoch": 10.83, "grad_norm": 1.1953125, "learning_rate": 0.0003557755027208549, "loss": 0.1552, "step": 261490 }, { "epoch": 10.83, "grad_norm": 2.09375, "learning_rate": 0.00035576567598493493, "loss": 0.1855, "step": 261500 }, { "epoch": 10.83, "grad_norm": 0.62109375, "learning_rate": 0.0003557558490499755, "loss": 0.2196, "step": 261510 }, { "epoch": 10.83, "grad_norm": 0.97265625, "learning_rate": 0.00035574602191599494, "loss": 0.2628, "step": 261520 }, { "epoch": 10.83, "grad_norm": 0.6640625, "learning_rate": 0.00035573619458301165, "loss": 0.1914, "step": 261530 }, { "epoch": 10.83, "grad_norm": 0.6796875, "learning_rate": 0.00035572636705104445, "loss": 0.2196, "step": 261540 }, { "epoch": 10.83, "grad_norm": 0.78515625, "learning_rate": 0.00035571653932011147, "loss": 0.1846, "step": 261550 }, { "epoch": 10.83, "grad_norm": 0.73828125, "learning_rate": 0.00035570671139023146, "loss": 0.1639, "step": 261560 }, { "epoch": 10.83, "grad_norm": 0.275390625, "learning_rate": 0.0003556968832614229, "loss": 0.2213, "step": 261570 }, { "epoch": 10.83, "grad_norm": 0.5546875, "learning_rate": 0.0003556870549337041, "loss": 0.2112, "step": 261580 }, { "epoch": 10.84, "grad_norm": 0.69921875, "learning_rate": 0.0003556772264070938, "loss": 0.2319, "step": 261590 }, { "epoch": 10.84, "grad_norm": 1.109375, "learning_rate": 0.0003556673976816104, "loss": 0.2032, "step": 261600 }, { "epoch": 10.84, "grad_norm": 0.74609375, "learning_rate": 0.0003556575687572722, "loss": 0.1936, "step": 261610 }, { "epoch": 10.84, "grad_norm": 0.73046875, "learning_rate": 0.00035564773963409795, "loss": 0.2009, "step": 261620 }, { "epoch": 10.84, "grad_norm": 1.3515625, "learning_rate": 0.0003556379103121061, "loss": 0.2068, "step": 261630 }, { "epoch": 10.84, "grad_norm": 0.419921875, "learning_rate": 0.000355628080791315, "loss": 0.158, "step": 261640 }, { "epoch": 10.84, "grad_norm": 1.125, "learning_rate": 0.0003556182510717434, "loss": 0.2077, "step": 261650 }, { "epoch": 10.84, "grad_norm": 1.2578125, "learning_rate": 0.0003556084211534095, "loss": 0.1682, "step": 261660 }, { "epoch": 10.84, "grad_norm": 0.65625, "learning_rate": 0.00035559859103633206, "loss": 0.1706, "step": 261670 }, { "epoch": 10.84, "grad_norm": 0.232421875, "learning_rate": 0.00035558876072052954, "loss": 0.1857, "step": 261680 }, { "epoch": 10.84, "grad_norm": 0.478515625, "learning_rate": 0.00035557893020602023, "loss": 0.2055, "step": 261690 }, { "epoch": 10.84, "grad_norm": 2.125, "learning_rate": 0.0003555690994928229, "loss": 0.1955, "step": 261700 }, { "epoch": 10.84, "grad_norm": 0.66796875, "learning_rate": 0.00035555926858095584, "loss": 0.2371, "step": 261710 }, { "epoch": 10.84, "grad_norm": 0.8984375, "learning_rate": 0.0003555494374704377, "loss": 0.1419, "step": 261720 }, { "epoch": 10.84, "grad_norm": 0.65234375, "learning_rate": 0.0003555396061612869, "loss": 0.2371, "step": 261730 }, { "epoch": 10.84, "grad_norm": 0.66796875, "learning_rate": 0.0003555297746535218, "loss": 0.1427, "step": 261740 }, { "epoch": 10.84, "grad_norm": 1.0859375, "learning_rate": 0.00035551994294716127, "loss": 0.2019, "step": 261750 }, { "epoch": 10.84, "grad_norm": 0.45703125, "learning_rate": 0.0003555101110422235, "loss": 0.2035, "step": 261760 }, { "epoch": 10.84, "grad_norm": 1.0234375, "learning_rate": 0.0003555002789387271, "loss": 0.191, "step": 261770 }, { "epoch": 10.84, "grad_norm": 0.984375, "learning_rate": 0.00035549044663669066, "loss": 0.2066, "step": 261780 }, { "epoch": 10.84, "grad_norm": 0.63671875, "learning_rate": 0.00035548061413613255, "loss": 0.1887, "step": 261790 }, { "epoch": 10.84, "grad_norm": 1.25, "learning_rate": 0.00035547078143707123, "loss": 0.1672, "step": 261800 }, { "epoch": 10.84, "grad_norm": 0.87109375, "learning_rate": 0.0003554609485395254, "loss": 0.2316, "step": 261810 }, { "epoch": 10.84, "grad_norm": 0.8046875, "learning_rate": 0.0003554511154435133, "loss": 0.2272, "step": 261820 }, { "epoch": 10.84, "grad_norm": 1.0390625, "learning_rate": 0.00035544128214905376, "loss": 0.1876, "step": 261830 }, { "epoch": 10.85, "grad_norm": 1.765625, "learning_rate": 0.00035543144865616503, "loss": 0.1578, "step": 261840 }, { "epoch": 10.85, "grad_norm": 1.125, "learning_rate": 0.0003554216149648657, "loss": 0.202, "step": 261850 }, { "epoch": 10.85, "grad_norm": 0.59375, "learning_rate": 0.0003554117810751743, "loss": 0.2341, "step": 261860 }, { "epoch": 10.85, "grad_norm": 0.5390625, "learning_rate": 0.0003554019469871093, "loss": 0.2258, "step": 261870 }, { "epoch": 10.85, "grad_norm": 1.28125, "learning_rate": 0.0003553921127006892, "loss": 0.2055, "step": 261880 }, { "epoch": 10.85, "grad_norm": 0.68359375, "learning_rate": 0.00035538227821593256, "loss": 0.2278, "step": 261890 }, { "epoch": 10.85, "grad_norm": 0.60546875, "learning_rate": 0.0003553724435328578, "loss": 0.2127, "step": 261900 }, { "epoch": 10.85, "grad_norm": 0.5, "learning_rate": 0.00035536260865148356, "loss": 0.1948, "step": 261910 }, { "epoch": 10.85, "grad_norm": 0.51171875, "learning_rate": 0.0003553527735718282, "loss": 0.2209, "step": 261920 }, { "epoch": 10.85, "grad_norm": 0.65625, "learning_rate": 0.0003553429382939103, "loss": 0.1842, "step": 261930 }, { "epoch": 10.85, "grad_norm": 0.69921875, "learning_rate": 0.00035533310281774845, "loss": 0.2521, "step": 261940 }, { "epoch": 10.85, "grad_norm": 0.6953125, "learning_rate": 0.000355323267143361, "loss": 0.1806, "step": 261950 }, { "epoch": 10.85, "grad_norm": 0.482421875, "learning_rate": 0.00035531343127076655, "loss": 0.1767, "step": 261960 }, { "epoch": 10.85, "grad_norm": 0.99609375, "learning_rate": 0.00035530359519998355, "loss": 0.2122, "step": 261970 }, { "epoch": 10.85, "grad_norm": 0.283203125, "learning_rate": 0.0003552937589310306, "loss": 0.1908, "step": 261980 }, { "epoch": 10.85, "grad_norm": 1.0234375, "learning_rate": 0.00035528392246392623, "loss": 0.2086, "step": 261990 }, { "epoch": 10.85, "grad_norm": 0.9453125, "learning_rate": 0.00035527408579868883, "loss": 0.2022, "step": 262000 }, { "epoch": 10.85, "grad_norm": 0.6328125, "learning_rate": 0.0003552642489353369, "loss": 0.2042, "step": 262010 }, { "epoch": 10.85, "grad_norm": 1.140625, "learning_rate": 0.0003552544118738891, "loss": 0.2193, "step": 262020 }, { "epoch": 10.85, "grad_norm": 0.34765625, "learning_rate": 0.0003552445746143639, "loss": 0.1568, "step": 262030 }, { "epoch": 10.85, "grad_norm": 0.56640625, "learning_rate": 0.0003552347371567797, "loss": 0.1483, "step": 262040 }, { "epoch": 10.85, "grad_norm": 0.83984375, "learning_rate": 0.00035522489950115505, "loss": 0.1493, "step": 262050 }, { "epoch": 10.85, "grad_norm": 0.9921875, "learning_rate": 0.00035521506164750853, "loss": 0.1903, "step": 262060 }, { "epoch": 10.85, "grad_norm": 1.5, "learning_rate": 0.0003552052235958587, "loss": 0.1982, "step": 262070 }, { "epoch": 10.86, "grad_norm": 1.0703125, "learning_rate": 0.000355195385346224, "loss": 0.1892, "step": 262080 }, { "epoch": 10.86, "grad_norm": 0.9921875, "learning_rate": 0.00035518554689862285, "loss": 0.1915, "step": 262090 }, { "epoch": 10.86, "grad_norm": 0.8125, "learning_rate": 0.0003551757082530739, "loss": 0.2143, "step": 262100 }, { "epoch": 10.86, "grad_norm": 0.59765625, "learning_rate": 0.0003551658694095957, "loss": 0.1665, "step": 262110 }, { "epoch": 10.86, "grad_norm": 0.73046875, "learning_rate": 0.0003551560303682067, "loss": 0.1727, "step": 262120 }, { "epoch": 10.86, "grad_norm": 0.70703125, "learning_rate": 0.00035514619112892527, "loss": 0.2528, "step": 262130 }, { "epoch": 10.86, "grad_norm": 0.90234375, "learning_rate": 0.0003551363516917701, "loss": 0.2102, "step": 262140 }, { "epoch": 10.86, "grad_norm": 0.87890625, "learning_rate": 0.0003551265120567597, "loss": 0.1621, "step": 262150 }, { "epoch": 10.86, "grad_norm": 0.53125, "learning_rate": 0.0003551166722239126, "loss": 0.1974, "step": 262160 }, { "epoch": 10.86, "grad_norm": 0.75390625, "learning_rate": 0.00035510683219324725, "loss": 0.2276, "step": 262170 }, { "epoch": 10.86, "grad_norm": 1.859375, "learning_rate": 0.00035509699196478213, "loss": 0.1771, "step": 262180 }, { "epoch": 10.86, "grad_norm": 0.50390625, "learning_rate": 0.00035508715153853587, "loss": 0.1938, "step": 262190 }, { "epoch": 10.86, "grad_norm": 0.88671875, "learning_rate": 0.00035507731091452696, "loss": 0.2198, "step": 262200 }, { "epoch": 10.86, "grad_norm": 0.431640625, "learning_rate": 0.0003550674700927738, "loss": 0.1542, "step": 262210 }, { "epoch": 10.86, "grad_norm": 0.63671875, "learning_rate": 0.0003550576290732951, "loss": 0.1932, "step": 262220 }, { "epoch": 10.86, "grad_norm": 0.87109375, "learning_rate": 0.00035504778785610934, "loss": 0.1659, "step": 262230 }, { "epoch": 10.86, "grad_norm": 0.4140625, "learning_rate": 0.0003550379464412349, "loss": 0.2252, "step": 262240 }, { "epoch": 10.86, "grad_norm": 0.93359375, "learning_rate": 0.0003550281048286904, "loss": 0.1991, "step": 262250 }, { "epoch": 10.86, "grad_norm": 1.078125, "learning_rate": 0.0003550182630184943, "loss": 0.2401, "step": 262260 }, { "epoch": 10.86, "grad_norm": 1.265625, "learning_rate": 0.00035500842101066527, "loss": 0.1901, "step": 262270 }, { "epoch": 10.86, "grad_norm": 0.48828125, "learning_rate": 0.00035499857880522167, "loss": 0.2107, "step": 262280 }, { "epoch": 10.86, "grad_norm": 0.78125, "learning_rate": 0.0003549887364021821, "loss": 0.2046, "step": 262290 }, { "epoch": 10.86, "grad_norm": 0.76953125, "learning_rate": 0.0003549788938015651, "loss": 0.2014, "step": 262300 }, { "epoch": 10.86, "grad_norm": 1.2734375, "learning_rate": 0.0003549690510033891, "loss": 0.2322, "step": 262310 }, { "epoch": 10.87, "grad_norm": 0.53515625, "learning_rate": 0.0003549592080076727, "loss": 0.2264, "step": 262320 }, { "epoch": 10.87, "grad_norm": 1.421875, "learning_rate": 0.00035494936481443443, "loss": 0.1948, "step": 262330 }, { "epoch": 10.87, "grad_norm": 0.76171875, "learning_rate": 0.0003549395214236927, "loss": 0.1668, "step": 262340 }, { "epoch": 10.87, "grad_norm": 0.2216796875, "learning_rate": 0.0003549296778354663, "loss": 0.2007, "step": 262350 }, { "epoch": 10.87, "grad_norm": 0.466796875, "learning_rate": 0.0003549198340497734, "loss": 0.1974, "step": 262360 }, { "epoch": 10.87, "grad_norm": 0.52734375, "learning_rate": 0.0003549099900666328, "loss": 0.1848, "step": 262370 }, { "epoch": 10.87, "grad_norm": 0.76171875, "learning_rate": 0.00035490014588606284, "loss": 0.18, "step": 262380 }, { "epoch": 10.87, "grad_norm": 1.0234375, "learning_rate": 0.00035489030150808224, "loss": 0.1895, "step": 262390 }, { "epoch": 10.87, "grad_norm": 0.703125, "learning_rate": 0.00035488045693270935, "loss": 0.1943, "step": 262400 }, { "epoch": 10.87, "grad_norm": 2.234375, "learning_rate": 0.0003548706121599628, "loss": 0.2013, "step": 262410 }, { "epoch": 10.87, "grad_norm": 0.63671875, "learning_rate": 0.00035486076718986104, "loss": 0.2236, "step": 262420 }, { "epoch": 10.87, "grad_norm": 0.70703125, "learning_rate": 0.00035485092202242275, "loss": 0.2061, "step": 262430 }, { "epoch": 10.87, "grad_norm": 1.015625, "learning_rate": 0.0003548410766576662, "loss": 0.1738, "step": 262440 }, { "epoch": 10.87, "grad_norm": 0.7890625, "learning_rate": 0.00035483123109561016, "loss": 0.2223, "step": 262450 }, { "epoch": 10.87, "grad_norm": 0.32421875, "learning_rate": 0.000354821385336273, "loss": 0.1907, "step": 262460 }, { "epoch": 10.87, "grad_norm": 1.1953125, "learning_rate": 0.0003548115393796734, "loss": 0.2068, "step": 262470 }, { "epoch": 10.87, "grad_norm": 2.1875, "learning_rate": 0.0003548016932258298, "loss": 0.1777, "step": 262480 }, { "epoch": 10.87, "grad_norm": 0.42578125, "learning_rate": 0.0003547918468747606, "loss": 0.1559, "step": 262490 }, { "epoch": 10.87, "grad_norm": 0.4765625, "learning_rate": 0.00035478200032648455, "loss": 0.1936, "step": 262500 }, { "epoch": 10.87, "grad_norm": 0.294921875, "learning_rate": 0.00035477215358102013, "loss": 0.1767, "step": 262510 }, { "epoch": 10.87, "grad_norm": 1.015625, "learning_rate": 0.0003547623066383857, "loss": 0.1589, "step": 262520 }, { "epoch": 10.87, "grad_norm": 0.515625, "learning_rate": 0.0003547524594986001, "loss": 0.197, "step": 262530 }, { "epoch": 10.87, "grad_norm": 0.6796875, "learning_rate": 0.0003547426121616816, "loss": 0.1981, "step": 262540 }, { "epoch": 10.87, "grad_norm": 0.89453125, "learning_rate": 0.00035473276462764876, "loss": 0.2181, "step": 262550 }, { "epoch": 10.88, "grad_norm": 1.125, "learning_rate": 0.00035472291689652027, "loss": 0.2137, "step": 262560 }, { "epoch": 10.88, "grad_norm": 0.4375, "learning_rate": 0.00035471306896831445, "loss": 0.1758, "step": 262570 }, { "epoch": 10.88, "grad_norm": 1.2109375, "learning_rate": 0.00035470322084305, "loss": 0.2032, "step": 262580 }, { "epoch": 10.88, "grad_norm": 0.66015625, "learning_rate": 0.0003546933725207454, "loss": 0.1819, "step": 262590 }, { "epoch": 10.88, "grad_norm": 0.94140625, "learning_rate": 0.00035468352400141914, "loss": 0.2168, "step": 262600 }, { "epoch": 10.88, "grad_norm": 1.2109375, "learning_rate": 0.00035467367528508985, "loss": 0.1892, "step": 262610 }, { "epoch": 10.88, "grad_norm": 0.40625, "learning_rate": 0.000354663826371776, "loss": 0.1795, "step": 262620 }, { "epoch": 10.88, "grad_norm": 0.91796875, "learning_rate": 0.0003546539772614961, "loss": 0.1602, "step": 262630 }, { "epoch": 10.88, "grad_norm": 0.94140625, "learning_rate": 0.0003546441279542687, "loss": 0.2319, "step": 262640 }, { "epoch": 10.88, "grad_norm": 0.625, "learning_rate": 0.00035463427845011235, "loss": 0.2023, "step": 262650 }, { "epoch": 10.88, "grad_norm": 0.6953125, "learning_rate": 0.00035462442874904565, "loss": 0.2035, "step": 262660 }, { "epoch": 10.88, "grad_norm": 1.375, "learning_rate": 0.0003546145788510871, "loss": 0.206, "step": 262670 }, { "epoch": 10.88, "grad_norm": 0.46484375, "learning_rate": 0.00035460472875625506, "loss": 0.2379, "step": 262680 }, { "epoch": 10.88, "grad_norm": 1.0546875, "learning_rate": 0.0003545948784645684, "loss": 0.1534, "step": 262690 }, { "epoch": 10.88, "grad_norm": 0.96875, "learning_rate": 0.0003545850279760453, "loss": 0.1785, "step": 262700 }, { "epoch": 10.88, "grad_norm": 0.69921875, "learning_rate": 0.00035457517729070456, "loss": 0.197, "step": 262710 }, { "epoch": 10.88, "grad_norm": 1.0234375, "learning_rate": 0.00035456532640856466, "loss": 0.1782, "step": 262720 }, { "epoch": 10.88, "grad_norm": 0.328125, "learning_rate": 0.0003545554753296441, "loss": 0.1602, "step": 262730 }, { "epoch": 10.88, "grad_norm": 0.5390625, "learning_rate": 0.00035454562405396134, "loss": 0.1833, "step": 262740 }, { "epoch": 10.88, "grad_norm": 0.54296875, "learning_rate": 0.0003545357725815351, "loss": 0.1979, "step": 262750 }, { "epoch": 10.88, "grad_norm": 0.92578125, "learning_rate": 0.00035452592091238374, "loss": 0.2156, "step": 262760 }, { "epoch": 10.88, "grad_norm": 0.703125, "learning_rate": 0.000354516069046526, "loss": 0.2004, "step": 262770 }, { "epoch": 10.88, "grad_norm": 0.2197265625, "learning_rate": 0.00035450621698398025, "loss": 0.2269, "step": 262780 }, { "epoch": 10.88, "grad_norm": 0.53125, "learning_rate": 0.00035449636472476497, "loss": 0.2089, "step": 262790 }, { "epoch": 10.89, "grad_norm": 1.171875, "learning_rate": 0.000354486512268899, "loss": 0.1785, "step": 262800 }, { "epoch": 10.89, "grad_norm": 0.80859375, "learning_rate": 0.0003544766596164005, "loss": 0.1823, "step": 262810 }, { "epoch": 10.89, "grad_norm": 0.921875, "learning_rate": 0.00035446680676728845, "loss": 0.2119, "step": 262820 }, { "epoch": 10.89, "grad_norm": 0.5390625, "learning_rate": 0.000354456953721581, "loss": 0.1866, "step": 262830 }, { "epoch": 10.89, "grad_norm": 0.52734375, "learning_rate": 0.00035444710047929684, "loss": 0.1856, "step": 262840 }, { "epoch": 10.89, "grad_norm": 1.1015625, "learning_rate": 0.00035443724704045453, "loss": 0.1986, "step": 262850 }, { "epoch": 10.89, "grad_norm": 0.94140625, "learning_rate": 0.00035442739340507266, "loss": 0.1812, "step": 262860 }, { "epoch": 10.89, "grad_norm": 0.69140625, "learning_rate": 0.0003544175395731696, "loss": 0.1749, "step": 262870 }, { "epoch": 10.89, "grad_norm": 0.86328125, "learning_rate": 0.0003544076855447642, "loss": 0.1965, "step": 262880 }, { "epoch": 10.89, "grad_norm": 0.97265625, "learning_rate": 0.0003543978313198746, "loss": 0.1853, "step": 262890 }, { "epoch": 10.89, "grad_norm": 0.83984375, "learning_rate": 0.00035438797689851966, "loss": 0.169, "step": 262900 }, { "epoch": 10.89, "grad_norm": 0.90625, "learning_rate": 0.0003543781222807178, "loss": 0.2144, "step": 262910 }, { "epoch": 10.89, "grad_norm": 0.82421875, "learning_rate": 0.0003543682674664875, "loss": 0.1462, "step": 262920 }, { "epoch": 10.89, "grad_norm": 0.765625, "learning_rate": 0.0003543584124558476, "loss": 0.2695, "step": 262930 }, { "epoch": 10.89, "grad_norm": 0.80859375, "learning_rate": 0.0003543485572488163, "loss": 0.213, "step": 262940 }, { "epoch": 10.89, "grad_norm": 1.078125, "learning_rate": 0.0003543387018454123, "loss": 0.1694, "step": 262950 }, { "epoch": 10.89, "grad_norm": 1.3203125, "learning_rate": 0.00035432884624565417, "loss": 0.1752, "step": 262960 }, { "epoch": 10.89, "grad_norm": 0.88671875, "learning_rate": 0.00035431899044956026, "loss": 0.1946, "step": 262970 }, { "epoch": 10.89, "grad_norm": 0.859375, "learning_rate": 0.0003543091344571495, "loss": 0.2068, "step": 262980 }, { "epoch": 10.89, "grad_norm": 0.314453125, "learning_rate": 0.00035429927826844006, "loss": 0.1804, "step": 262990 }, { "epoch": 10.89, "grad_norm": 0.5390625, "learning_rate": 0.0003542894218834507, "loss": 0.174, "step": 263000 }, { "epoch": 10.89, "grad_norm": 0.447265625, "learning_rate": 0.0003542795653021999, "loss": 0.1908, "step": 263010 }, { "epoch": 10.89, "grad_norm": 0.52734375, "learning_rate": 0.0003542697085247062, "loss": 0.1828, "step": 263020 }, { "epoch": 10.89, "grad_norm": 0.97265625, "learning_rate": 0.00035425985155098814, "loss": 0.1737, "step": 263030 }, { "epoch": 10.9, "grad_norm": 1.1015625, "learning_rate": 0.0003542499943810644, "loss": 0.2187, "step": 263040 }, { "epoch": 10.9, "grad_norm": 0.5390625, "learning_rate": 0.00035424013701495333, "loss": 0.2187, "step": 263050 }, { "epoch": 10.9, "grad_norm": 0.53125, "learning_rate": 0.00035423027945267365, "loss": 0.1738, "step": 263060 }, { "epoch": 10.9, "grad_norm": 0.5703125, "learning_rate": 0.00035422042169424385, "loss": 0.1288, "step": 263070 }, { "epoch": 10.9, "grad_norm": 0.7890625, "learning_rate": 0.0003542105637396824, "loss": 0.2045, "step": 263080 }, { "epoch": 10.9, "grad_norm": 0.4921875, "learning_rate": 0.000354200705589008, "loss": 0.2018, "step": 263090 }, { "epoch": 10.9, "grad_norm": 0.69921875, "learning_rate": 0.00035419084724223903, "loss": 0.1925, "step": 263100 }, { "epoch": 10.9, "grad_norm": 1.25, "learning_rate": 0.00035418098869939417, "loss": 0.1846, "step": 263110 }, { "epoch": 10.9, "grad_norm": 1.21875, "learning_rate": 0.00035417112996049204, "loss": 0.2002, "step": 263120 }, { "epoch": 10.9, "grad_norm": 1.1171875, "learning_rate": 0.00035416127102555093, "loss": 0.1725, "step": 263130 }, { "epoch": 10.9, "grad_norm": 0.8671875, "learning_rate": 0.0003541514118945897, "loss": 0.1348, "step": 263140 }, { "epoch": 10.9, "grad_norm": 0.9140625, "learning_rate": 0.0003541415525676267, "loss": 0.2019, "step": 263150 }, { "epoch": 10.9, "grad_norm": 0.283203125, "learning_rate": 0.0003541316930446805, "loss": 0.1951, "step": 263160 }, { "epoch": 10.9, "grad_norm": 1.0625, "learning_rate": 0.0003541218333257698, "loss": 0.1417, "step": 263170 }, { "epoch": 10.9, "grad_norm": 0.408203125, "learning_rate": 0.000354111973410913, "loss": 0.1511, "step": 263180 }, { "epoch": 10.9, "grad_norm": 0.38671875, "learning_rate": 0.00035410211330012863, "loss": 0.2095, "step": 263190 }, { "epoch": 10.9, "grad_norm": 0.53125, "learning_rate": 0.0003540922529934355, "loss": 0.2165, "step": 263200 }, { "epoch": 10.9, "grad_norm": 0.7421875, "learning_rate": 0.00035408239249085186, "loss": 0.1773, "step": 263210 }, { "epoch": 10.9, "grad_norm": 0.55859375, "learning_rate": 0.0003540725317923964, "loss": 0.1698, "step": 263220 }, { "epoch": 10.9, "grad_norm": 1.8515625, "learning_rate": 0.00035406267089808767, "loss": 0.1847, "step": 263230 }, { "epoch": 10.9, "grad_norm": 0.5625, "learning_rate": 0.00035405280980794427, "loss": 0.2279, "step": 263240 }, { "epoch": 10.9, "grad_norm": 0.2470703125, "learning_rate": 0.00035404294852198475, "loss": 0.226, "step": 263250 }, { "epoch": 10.9, "grad_norm": 1.3046875, "learning_rate": 0.0003540330870402276, "loss": 0.2287, "step": 263260 }, { "epoch": 10.9, "grad_norm": 0.5078125, "learning_rate": 0.00035402322536269137, "loss": 0.212, "step": 263270 }, { "epoch": 10.91, "grad_norm": 0.765625, "learning_rate": 0.0003540133634893947, "loss": 0.174, "step": 263280 }, { "epoch": 10.91, "grad_norm": 0.75, "learning_rate": 0.00035400350142035617, "loss": 0.1985, "step": 263290 }, { "epoch": 10.91, "grad_norm": 0.478515625, "learning_rate": 0.0003539936391555942, "loss": 0.1396, "step": 263300 }, { "epoch": 10.91, "grad_norm": 1.125, "learning_rate": 0.0003539837766951275, "loss": 0.198, "step": 263310 }, { "epoch": 10.91, "grad_norm": 1.0, "learning_rate": 0.0003539739140389745, "loss": 0.2209, "step": 263320 }, { "epoch": 10.91, "grad_norm": 0.0, "learning_rate": 0.0003539640511871539, "loss": 0.1938, "step": 263330 }, { "epoch": 10.91, "grad_norm": 0.8671875, "learning_rate": 0.00035395418813968403, "loss": 0.2165, "step": 263340 }, { "epoch": 10.91, "grad_norm": 0.89453125, "learning_rate": 0.0003539443248965838, "loss": 0.1294, "step": 263350 }, { "epoch": 10.91, "grad_norm": 1.171875, "learning_rate": 0.00035393446145787146, "loss": 0.153, "step": 263360 }, { "epoch": 10.91, "grad_norm": 0.3828125, "learning_rate": 0.0003539245978235657, "loss": 0.1386, "step": 263370 }, { "epoch": 10.91, "grad_norm": 1.1875, "learning_rate": 0.00035391473399368505, "loss": 0.1968, "step": 263380 }, { "epoch": 10.91, "grad_norm": 0.302734375, "learning_rate": 0.0003539048699682481, "loss": 0.1813, "step": 263390 }, { "epoch": 10.91, "grad_norm": 1.703125, "learning_rate": 0.00035389500574727344, "loss": 0.1986, "step": 263400 }, { "epoch": 10.91, "grad_norm": 0.66796875, "learning_rate": 0.0003538851413307796, "loss": 0.2174, "step": 263410 }, { "epoch": 10.91, "grad_norm": 1.2109375, "learning_rate": 0.00035387527671878506, "loss": 0.1508, "step": 263420 }, { "epoch": 10.91, "grad_norm": 0.9296875, "learning_rate": 0.0003538654119113086, "loss": 0.2265, "step": 263430 }, { "epoch": 10.91, "grad_norm": 1.0390625, "learning_rate": 0.00035385554690836856, "loss": 0.1877, "step": 263440 }, { "epoch": 10.91, "grad_norm": 0.60546875, "learning_rate": 0.0003538456817099836, "loss": 0.2703, "step": 263450 }, { "epoch": 10.91, "grad_norm": 1.3359375, "learning_rate": 0.00035383581631617236, "loss": 0.2219, "step": 263460 }, { "epoch": 10.91, "grad_norm": 0.64453125, "learning_rate": 0.0003538259507269532, "loss": 0.1937, "step": 263470 }, { "epoch": 10.91, "grad_norm": 0.828125, "learning_rate": 0.0003538160849423449, "loss": 0.2365, "step": 263480 }, { "epoch": 10.91, "grad_norm": 0.44140625, "learning_rate": 0.00035380621896236595, "loss": 0.1965, "step": 263490 }, { "epoch": 10.91, "grad_norm": 0.921875, "learning_rate": 0.0003537963527870348, "loss": 0.1768, "step": 263500 }, { "epoch": 10.91, "grad_norm": 0.53125, "learning_rate": 0.00035378648641637027, "loss": 0.1979, "step": 263510 }, { "epoch": 10.91, "grad_norm": 0.96484375, "learning_rate": 0.00035377661985039067, "loss": 0.1547, "step": 263520 }, { "epoch": 10.92, "grad_norm": 0.87890625, "learning_rate": 0.00035376675308911476, "loss": 0.2202, "step": 263530 }, { "epoch": 10.92, "grad_norm": 0.65234375, "learning_rate": 0.00035375688613256104, "loss": 0.1971, "step": 263540 }, { "epoch": 10.92, "grad_norm": 0.53125, "learning_rate": 0.000353747018980748, "loss": 0.1542, "step": 263550 }, { "epoch": 10.92, "grad_norm": 0.87109375, "learning_rate": 0.0003537371516336942, "loss": 0.1832, "step": 263560 }, { "epoch": 10.92, "grad_norm": 0.5546875, "learning_rate": 0.00035372728409141844, "loss": 0.236, "step": 263570 }, { "epoch": 10.92, "grad_norm": 0.703125, "learning_rate": 0.000353717416353939, "loss": 0.2302, "step": 263580 }, { "epoch": 10.92, "grad_norm": 1.7265625, "learning_rate": 0.0003537075484212747, "loss": 0.1785, "step": 263590 }, { "epoch": 10.92, "grad_norm": 1.4921875, "learning_rate": 0.0003536976802934439, "loss": 0.1959, "step": 263600 }, { "epoch": 10.92, "grad_norm": 0.67578125, "learning_rate": 0.0003536878119704653, "loss": 0.195, "step": 263610 }, { "epoch": 10.92, "grad_norm": 2.015625, "learning_rate": 0.00035367794345235736, "loss": 0.2555, "step": 263620 }, { "epoch": 10.92, "grad_norm": 0.7265625, "learning_rate": 0.0003536680747391389, "loss": 0.1873, "step": 263630 }, { "epoch": 10.92, "grad_norm": 0.359375, "learning_rate": 0.0003536582058308282, "loss": 0.1908, "step": 263640 }, { "epoch": 10.92, "grad_norm": 0.6796875, "learning_rate": 0.00035364833672744397, "loss": 0.1718, "step": 263650 }, { "epoch": 10.92, "grad_norm": 0.9375, "learning_rate": 0.0003536384674290047, "loss": 0.1827, "step": 263660 }, { "epoch": 10.92, "grad_norm": 0.765625, "learning_rate": 0.00035362859793552914, "loss": 0.1813, "step": 263670 }, { "epoch": 10.92, "grad_norm": 0.7265625, "learning_rate": 0.00035361872824703574, "loss": 0.1819, "step": 263680 }, { "epoch": 10.92, "grad_norm": 0.498046875, "learning_rate": 0.000353608858363543, "loss": 0.2011, "step": 263690 }, { "epoch": 10.92, "grad_norm": 1.5546875, "learning_rate": 0.0003535989882850696, "loss": 0.2022, "step": 263700 }, { "epoch": 10.92, "grad_norm": 0.703125, "learning_rate": 0.0003535891180116342, "loss": 0.2902, "step": 263710 }, { "epoch": 10.92, "grad_norm": 0.8125, "learning_rate": 0.0003535792475432551, "loss": 0.2147, "step": 263720 }, { "epoch": 10.92, "grad_norm": 0.390625, "learning_rate": 0.00035356937687995116, "loss": 0.1912, "step": 263730 }, { "epoch": 10.92, "grad_norm": 1.15625, "learning_rate": 0.00035355950602174077, "loss": 0.1652, "step": 263740 }, { "epoch": 10.92, "grad_norm": 1.0078125, "learning_rate": 0.0003535496349686426, "loss": 0.1191, "step": 263750 }, { "epoch": 10.92, "grad_norm": 1.3671875, "learning_rate": 0.0003535397637206752, "loss": 0.1811, "step": 263760 }, { "epoch": 10.93, "grad_norm": 0.796875, "learning_rate": 0.0003535298922778571, "loss": 0.1857, "step": 263770 }, { "epoch": 10.93, "grad_norm": 0.8125, "learning_rate": 0.000353520020640207, "loss": 0.1589, "step": 263780 }, { "epoch": 10.93, "grad_norm": 0.70703125, "learning_rate": 0.0003535101488077434, "loss": 0.2015, "step": 263790 }, { "epoch": 10.93, "grad_norm": 0.71484375, "learning_rate": 0.00035350027678048484, "loss": 0.178, "step": 263800 }, { "epoch": 10.93, "grad_norm": 2.359375, "learning_rate": 0.0003534904045584499, "loss": 0.2141, "step": 263810 }, { "epoch": 10.93, "grad_norm": 0.99609375, "learning_rate": 0.0003534805321416573, "loss": 0.186, "step": 263820 }, { "epoch": 10.93, "grad_norm": 0.8671875, "learning_rate": 0.00035347065953012543, "loss": 0.1698, "step": 263830 }, { "epoch": 10.93, "grad_norm": 0.9296875, "learning_rate": 0.000353460786723873, "loss": 0.1966, "step": 263840 }, { "epoch": 10.93, "grad_norm": 0.447265625, "learning_rate": 0.0003534509137229185, "loss": 0.2365, "step": 263850 }, { "epoch": 10.93, "grad_norm": 0.9140625, "learning_rate": 0.00035344104052728055, "loss": 0.214, "step": 263860 }, { "epoch": 10.93, "grad_norm": 0.5546875, "learning_rate": 0.00035343116713697784, "loss": 0.2211, "step": 263870 }, { "epoch": 10.93, "grad_norm": 0.0, "learning_rate": 0.0003534212935520287, "loss": 0.2601, "step": 263880 }, { "epoch": 10.93, "grad_norm": 0.515625, "learning_rate": 0.0003534114197724519, "loss": 0.1752, "step": 263890 }, { "epoch": 10.93, "grad_norm": 0.51171875, "learning_rate": 0.000353401545798266, "loss": 0.2216, "step": 263900 }, { "epoch": 10.93, "grad_norm": 0.61328125, "learning_rate": 0.0003533916716294895, "loss": 0.1631, "step": 263910 }, { "epoch": 10.93, "grad_norm": 1.1640625, "learning_rate": 0.0003533817972661411, "loss": 0.2141, "step": 263920 }, { "epoch": 10.93, "grad_norm": 0.66015625, "learning_rate": 0.0003533719227082393, "loss": 0.1942, "step": 263930 }, { "epoch": 10.93, "grad_norm": 1.7421875, "learning_rate": 0.0003533620479558027, "loss": 0.2467, "step": 263940 }, { "epoch": 10.93, "grad_norm": 0.60546875, "learning_rate": 0.0003533521730088499, "loss": 0.1652, "step": 263950 }, { "epoch": 10.93, "grad_norm": 0.578125, "learning_rate": 0.00035334229786739945, "loss": 0.1692, "step": 263960 }, { "epoch": 10.93, "grad_norm": 0.61328125, "learning_rate": 0.00035333242253147003, "loss": 0.2112, "step": 263970 }, { "epoch": 10.93, "grad_norm": 0.72265625, "learning_rate": 0.00035332254700108, "loss": 0.1606, "step": 263980 }, { "epoch": 10.93, "grad_norm": 0.828125, "learning_rate": 0.00035331267127624823, "loss": 0.2161, "step": 263990 }, { "epoch": 10.93, "grad_norm": 0.55859375, "learning_rate": 0.00035330279535699316, "loss": 0.2129, "step": 264000 }, { "epoch": 10.94, "grad_norm": 0.458984375, "learning_rate": 0.00035329291924333337, "loss": 0.1693, "step": 264010 }, { "epoch": 10.94, "grad_norm": 0.9140625, "learning_rate": 0.0003532830429352874, "loss": 0.1677, "step": 264020 }, { "epoch": 10.94, "grad_norm": 0.41796875, "learning_rate": 0.00035327316643287397, "loss": 0.1782, "step": 264030 }, { "epoch": 10.94, "grad_norm": 0.5078125, "learning_rate": 0.0003532632897361115, "loss": 0.1798, "step": 264040 }, { "epoch": 10.94, "grad_norm": 0.734375, "learning_rate": 0.0003532534128450188, "loss": 0.2237, "step": 264050 }, { "epoch": 10.94, "grad_norm": 0.384765625, "learning_rate": 0.0003532435357596142, "loss": 0.2044, "step": 264060 }, { "epoch": 10.94, "grad_norm": 0.41796875, "learning_rate": 0.00035323365847991653, "loss": 0.1876, "step": 264070 }, { "epoch": 10.94, "grad_norm": 0.83203125, "learning_rate": 0.0003532237810059442, "loss": 0.1921, "step": 264080 }, { "epoch": 10.94, "grad_norm": 0.81640625, "learning_rate": 0.00035321390333771584, "loss": 0.203, "step": 264090 }, { "epoch": 10.94, "grad_norm": 0.443359375, "learning_rate": 0.0003532040254752501, "loss": 0.2254, "step": 264100 }, { "epoch": 10.94, "grad_norm": 0.7578125, "learning_rate": 0.00035319414741856556, "loss": 0.1904, "step": 264110 }, { "epoch": 10.94, "grad_norm": 0.458984375, "learning_rate": 0.00035318426916768064, "loss": 0.2125, "step": 264120 }, { "epoch": 10.94, "grad_norm": 0.74609375, "learning_rate": 0.0003531743907226142, "loss": 0.1908, "step": 264130 }, { "epoch": 10.94, "grad_norm": 0.95703125, "learning_rate": 0.0003531645120833847, "loss": 0.2035, "step": 264140 }, { "epoch": 10.94, "grad_norm": 1.0234375, "learning_rate": 0.00035315463325001064, "loss": 0.2146, "step": 264150 }, { "epoch": 10.94, "grad_norm": 1.28125, "learning_rate": 0.0003531447542225108, "loss": 0.1977, "step": 264160 }, { "epoch": 10.94, "grad_norm": 1.703125, "learning_rate": 0.00035313487500090356, "loss": 0.2126, "step": 264170 }, { "epoch": 10.94, "grad_norm": 0.58984375, "learning_rate": 0.0003531249955852077, "loss": 0.1797, "step": 264180 }, { "epoch": 10.94, "grad_norm": 1.296875, "learning_rate": 0.00035311511597544174, "loss": 0.1812, "step": 264190 }, { "epoch": 10.94, "grad_norm": 1.109375, "learning_rate": 0.00035310523617162424, "loss": 0.181, "step": 264200 }, { "epoch": 10.94, "grad_norm": 1.0078125, "learning_rate": 0.00035309535617377384, "loss": 0.1854, "step": 264210 }, { "epoch": 10.94, "grad_norm": 1.5390625, "learning_rate": 0.0003530854759819091, "loss": 0.2139, "step": 264220 }, { "epoch": 10.94, "grad_norm": 0.7890625, "learning_rate": 0.0003530755955960486, "loss": 0.2003, "step": 264230 }, { "epoch": 10.94, "grad_norm": 0.462890625, "learning_rate": 0.000353065715016211, "loss": 0.1531, "step": 264240 }, { "epoch": 10.95, "grad_norm": 0.77734375, "learning_rate": 0.00035305583424241485, "loss": 0.2232, "step": 264250 }, { "epoch": 10.95, "grad_norm": 1.0390625, "learning_rate": 0.0003530459532746788, "loss": 0.2303, "step": 264260 }, { "epoch": 10.95, "grad_norm": 0.51953125, "learning_rate": 0.0003530360721130213, "loss": 0.2003, "step": 264270 }, { "epoch": 10.95, "grad_norm": 0.166015625, "learning_rate": 0.00035302619075746103, "loss": 0.1465, "step": 264280 }, { "epoch": 10.95, "grad_norm": 0.92578125, "learning_rate": 0.0003530163092080167, "loss": 0.1707, "step": 264290 }, { "epoch": 10.95, "grad_norm": 1.2734375, "learning_rate": 0.0003530064274647067, "loss": 0.232, "step": 264300 }, { "epoch": 10.95, "grad_norm": 0.75390625, "learning_rate": 0.0003529965455275498, "loss": 0.1768, "step": 264310 }, { "epoch": 10.95, "grad_norm": 0.36328125, "learning_rate": 0.0003529866633965645, "loss": 0.1842, "step": 264320 }, { "epoch": 10.95, "grad_norm": 0.5234375, "learning_rate": 0.00035297678107176935, "loss": 0.2402, "step": 264330 }, { "epoch": 10.95, "grad_norm": 0.79296875, "learning_rate": 0.0003529668985531831, "loss": 0.1818, "step": 264340 }, { "epoch": 10.95, "grad_norm": 1.09375, "learning_rate": 0.0003529570158408243, "loss": 0.2199, "step": 264350 }, { "epoch": 10.95, "grad_norm": 0.9140625, "learning_rate": 0.00035294713293471136, "loss": 0.2311, "step": 264360 }, { "epoch": 10.95, "grad_norm": 0.64453125, "learning_rate": 0.0003529372498348632, "loss": 0.17, "step": 264370 }, { "epoch": 10.95, "grad_norm": 0.609375, "learning_rate": 0.0003529273665412982, "loss": 0.1949, "step": 264380 }, { "epoch": 10.95, "grad_norm": 0.33984375, "learning_rate": 0.000352917483054035, "loss": 0.1852, "step": 264390 }, { "epoch": 10.95, "grad_norm": 0.83984375, "learning_rate": 0.00035290759937309224, "loss": 0.2426, "step": 264400 }, { "epoch": 10.95, "grad_norm": 0.51953125, "learning_rate": 0.0003528977154984884, "loss": 0.1332, "step": 264410 }, { "epoch": 10.95, "grad_norm": 0.6796875, "learning_rate": 0.0003528878314302423, "loss": 0.2168, "step": 264420 }, { "epoch": 10.95, "grad_norm": 1.28125, "learning_rate": 0.0003528779471683723, "loss": 0.1708, "step": 264430 }, { "epoch": 10.95, "grad_norm": 0.95703125, "learning_rate": 0.0003528680627128971, "loss": 0.178, "step": 264440 }, { "epoch": 10.95, "grad_norm": 0.57421875, "learning_rate": 0.0003528581780638355, "loss": 0.1689, "step": 264450 }, { "epoch": 10.95, "grad_norm": 0.71484375, "learning_rate": 0.00035284829322120573, "loss": 0.1832, "step": 264460 }, { "epoch": 10.95, "grad_norm": 0.94140625, "learning_rate": 0.0003528384081850266, "loss": 0.2187, "step": 264470 }, { "epoch": 10.95, "grad_norm": 0.6640625, "learning_rate": 0.00035282852295531676, "loss": 0.2148, "step": 264480 }, { "epoch": 10.96, "grad_norm": 1.1640625, "learning_rate": 0.00035281863753209463, "loss": 0.2122, "step": 264490 }, { "epoch": 10.96, "grad_norm": 0.431640625, "learning_rate": 0.0003528087519153791, "loss": 0.185, "step": 264500 }, { "epoch": 10.96, "grad_norm": 0.93359375, "learning_rate": 0.0003527988661051885, "loss": 0.2408, "step": 264510 }, { "epoch": 10.96, "grad_norm": 0.92578125, "learning_rate": 0.0003527889801015415, "loss": 0.2385, "step": 264520 }, { "epoch": 10.96, "grad_norm": 0.9140625, "learning_rate": 0.00035277909390445686, "loss": 0.235, "step": 264530 }, { "epoch": 10.96, "grad_norm": 0.83984375, "learning_rate": 0.00035276920751395297, "loss": 0.1467, "step": 264540 }, { "epoch": 10.96, "grad_norm": 0.64453125, "learning_rate": 0.00035275932093004856, "loss": 0.2537, "step": 264550 }, { "epoch": 10.96, "grad_norm": 0.68359375, "learning_rate": 0.00035274943415276216, "loss": 0.2209, "step": 264560 }, { "epoch": 10.96, "grad_norm": 0.875, "learning_rate": 0.0003527395471821124, "loss": 0.2057, "step": 264570 }, { "epoch": 10.96, "grad_norm": 1.125, "learning_rate": 0.000352729660018118, "loss": 0.1322, "step": 264580 }, { "epoch": 10.96, "grad_norm": 1.9296875, "learning_rate": 0.00035271977266079747, "loss": 0.2609, "step": 264590 }, { "epoch": 10.96, "grad_norm": 1.53125, "learning_rate": 0.0003527098851101694, "loss": 0.181, "step": 264600 }, { "epoch": 10.96, "grad_norm": 0.73046875, "learning_rate": 0.0003526999973662524, "loss": 0.2104, "step": 264610 }, { "epoch": 10.96, "grad_norm": 2.265625, "learning_rate": 0.0003526901094290651, "loss": 0.214, "step": 264620 }, { "epoch": 10.96, "grad_norm": 0.3828125, "learning_rate": 0.0003526802212986261, "loss": 0.1904, "step": 264630 }, { "epoch": 10.96, "grad_norm": 0.30859375, "learning_rate": 0.00035267033297495406, "loss": 0.206, "step": 264640 }, { "epoch": 10.96, "grad_norm": 1.3515625, "learning_rate": 0.00035266044445806744, "loss": 0.2306, "step": 264650 }, { "epoch": 10.96, "grad_norm": 0.65625, "learning_rate": 0.00035265055574798505, "loss": 0.1982, "step": 264660 }, { "epoch": 10.96, "grad_norm": 0.53125, "learning_rate": 0.00035264066684472537, "loss": 0.2304, "step": 264670 }, { "epoch": 10.96, "grad_norm": 0.72265625, "learning_rate": 0.000352630777748307, "loss": 0.1851, "step": 264680 }, { "epoch": 10.96, "grad_norm": 0.77734375, "learning_rate": 0.0003526208884587486, "loss": 0.2128, "step": 264690 }, { "epoch": 10.96, "grad_norm": 1.015625, "learning_rate": 0.0003526109989760688, "loss": 0.205, "step": 264700 }, { "epoch": 10.96, "grad_norm": 0.69140625, "learning_rate": 0.0003526011093002862, "loss": 0.1882, "step": 264710 }, { "epoch": 10.96, "grad_norm": 1.2265625, "learning_rate": 0.0003525912194314193, "loss": 0.1765, "step": 264720 }, { "epoch": 10.97, "grad_norm": 0.91015625, "learning_rate": 0.00035258132936948685, "loss": 0.2055, "step": 264730 }, { "epoch": 10.97, "grad_norm": 0.80078125, "learning_rate": 0.0003525714391145075, "loss": 0.2019, "step": 264740 }, { "epoch": 10.97, "grad_norm": 0.83203125, "learning_rate": 0.0003525615486664997, "loss": 0.2139, "step": 264750 }, { "epoch": 10.97, "grad_norm": 0.8359375, "learning_rate": 0.00035255165802548213, "loss": 0.1823, "step": 264760 }, { "epoch": 10.97, "grad_norm": 0.546875, "learning_rate": 0.0003525417671914735, "loss": 0.2006, "step": 264770 }, { "epoch": 10.97, "grad_norm": 1.0625, "learning_rate": 0.00035253187616449225, "loss": 0.1379, "step": 264780 }, { "epoch": 10.97, "grad_norm": 0.400390625, "learning_rate": 0.0003525219849445571, "loss": 0.1593, "step": 264790 }, { "epoch": 10.97, "grad_norm": 0.474609375, "learning_rate": 0.00035251209353168655, "loss": 0.2512, "step": 264800 }, { "epoch": 10.97, "grad_norm": 0.3359375, "learning_rate": 0.00035250220192589943, "loss": 0.1332, "step": 264810 }, { "epoch": 10.97, "grad_norm": 0.70703125, "learning_rate": 0.00035249231012721426, "loss": 0.205, "step": 264820 }, { "epoch": 10.97, "grad_norm": 0.796875, "learning_rate": 0.0003524824181356496, "loss": 0.1702, "step": 264830 }, { "epoch": 10.97, "grad_norm": 0.28515625, "learning_rate": 0.00035247252595122403, "loss": 0.2153, "step": 264840 }, { "epoch": 10.97, "grad_norm": 0.61328125, "learning_rate": 0.0003524626335739564, "loss": 0.1935, "step": 264850 }, { "epoch": 10.97, "grad_norm": 0.396484375, "learning_rate": 0.00035245274100386494, "loss": 0.1473, "step": 264860 }, { "epoch": 10.97, "grad_norm": 0.8359375, "learning_rate": 0.00035244284824096863, "loss": 0.1501, "step": 264870 }, { "epoch": 10.97, "grad_norm": 0.921875, "learning_rate": 0.00035243295528528584, "loss": 0.2107, "step": 264880 }, { "epoch": 10.97, "grad_norm": 0.2333984375, "learning_rate": 0.0003524230621368353, "loss": 0.2323, "step": 264890 }, { "epoch": 10.97, "grad_norm": 0.34375, "learning_rate": 0.00035241316879563577, "loss": 0.2564, "step": 264900 }, { "epoch": 10.97, "grad_norm": 1.0390625, "learning_rate": 0.00035240327526170557, "loss": 0.166, "step": 264910 }, { "epoch": 10.97, "grad_norm": 0.66015625, "learning_rate": 0.0003523933815350635, "loss": 0.1841, "step": 264920 }, { "epoch": 10.97, "grad_norm": 0.95703125, "learning_rate": 0.0003523834876157281, "loss": 0.1925, "step": 264930 }, { "epoch": 10.97, "grad_norm": 0.859375, "learning_rate": 0.0003523735935037182, "loss": 0.2045, "step": 264940 }, { "epoch": 10.97, "grad_norm": 0.921875, "learning_rate": 0.0003523636991990521, "loss": 0.2233, "step": 264950 }, { "epoch": 10.97, "grad_norm": 1.03125, "learning_rate": 0.0003523538047017486, "loss": 0.1936, "step": 264960 }, { "epoch": 10.98, "grad_norm": 0.83984375, "learning_rate": 0.0003523439100118263, "loss": 0.1861, "step": 264970 }, { "epoch": 10.98, "grad_norm": 1.03125, "learning_rate": 0.00035233401512930383, "loss": 0.1755, "step": 264980 }, { "epoch": 10.98, "grad_norm": 0.54296875, "learning_rate": 0.00035232412005419976, "loss": 0.1998, "step": 264990 }, { "epoch": 10.98, "grad_norm": 1.1015625, "learning_rate": 0.00035231422478653276, "loss": 0.2056, "step": 265000 }, { "epoch": 10.98, "grad_norm": 0.6875, "learning_rate": 0.00035230432932632143, "loss": 0.2059, "step": 265010 }, { "epoch": 10.98, "grad_norm": 1.359375, "learning_rate": 0.00035229443367358447, "loss": 0.144, "step": 265020 }, { "epoch": 10.98, "grad_norm": 0.93359375, "learning_rate": 0.00035228453782834037, "loss": 0.1998, "step": 265030 }, { "epoch": 10.98, "grad_norm": 0.484375, "learning_rate": 0.0003522746417906078, "loss": 0.1686, "step": 265040 }, { "epoch": 10.98, "grad_norm": 1.5859375, "learning_rate": 0.0003522647455604054, "loss": 0.2454, "step": 265050 }, { "epoch": 10.98, "grad_norm": 0.91015625, "learning_rate": 0.00035225484913775184, "loss": 0.1971, "step": 265060 }, { "epoch": 10.98, "grad_norm": 1.03125, "learning_rate": 0.00035224495252266565, "loss": 0.186, "step": 265070 }, { "epoch": 10.98, "grad_norm": 2.265625, "learning_rate": 0.00035223505571516556, "loss": 0.1608, "step": 265080 }, { "epoch": 10.98, "grad_norm": 1.8203125, "learning_rate": 0.00035222515871527005, "loss": 0.1969, "step": 265090 }, { "epoch": 10.98, "grad_norm": 0.65234375, "learning_rate": 0.0003522152615229979, "loss": 0.209, "step": 265100 }, { "epoch": 10.98, "grad_norm": 0.232421875, "learning_rate": 0.0003522053641383677, "loss": 0.1735, "step": 265110 }, { "epoch": 10.98, "grad_norm": 0.55078125, "learning_rate": 0.0003521954665613979, "loss": 0.1865, "step": 265120 }, { "epoch": 10.98, "grad_norm": 0.7265625, "learning_rate": 0.0003521855687921074, "loss": 0.2147, "step": 265130 }, { "epoch": 10.98, "grad_norm": 0.75, "learning_rate": 0.0003521756708305146, "loss": 0.1972, "step": 265140 }, { "epoch": 10.98, "grad_norm": 0.80859375, "learning_rate": 0.0003521657726766383, "loss": 0.1371, "step": 265150 }, { "epoch": 10.98, "grad_norm": 1.15625, "learning_rate": 0.000352155874330497, "loss": 0.1828, "step": 265160 }, { "epoch": 10.98, "grad_norm": 0.0, "learning_rate": 0.0003521459757921094, "loss": 0.2027, "step": 265170 }, { "epoch": 10.98, "grad_norm": 0.51953125, "learning_rate": 0.0003521360770614941, "loss": 0.1939, "step": 265180 }, { "epoch": 10.98, "grad_norm": 0.380859375, "learning_rate": 0.00035212617813866975, "loss": 0.1852, "step": 265190 }, { "epoch": 10.98, "grad_norm": 1.21875, "learning_rate": 0.00035211627902365496, "loss": 0.2119, "step": 265200 }, { "epoch": 10.98, "grad_norm": 0.546875, "learning_rate": 0.0003521063797164683, "loss": 0.2218, "step": 265210 }, { "epoch": 10.99, "grad_norm": 0.921875, "learning_rate": 0.00035209648021712847, "loss": 0.1643, "step": 265220 }, { "epoch": 10.99, "grad_norm": 0.62109375, "learning_rate": 0.0003520865805256542, "loss": 0.2331, "step": 265230 }, { "epoch": 10.99, "grad_norm": 0.32421875, "learning_rate": 0.00035207668064206386, "loss": 0.2172, "step": 265240 }, { "epoch": 10.99, "grad_norm": 0.5234375, "learning_rate": 0.0003520667805663763, "loss": 0.169, "step": 265250 }, { "epoch": 10.99, "grad_norm": 1.1328125, "learning_rate": 0.0003520568802986101, "loss": 0.1819, "step": 265260 }, { "epoch": 10.99, "grad_norm": 1.1640625, "learning_rate": 0.0003520469798387839, "loss": 0.1888, "step": 265270 }, { "epoch": 10.99, "grad_norm": 1.3359375, "learning_rate": 0.0003520370791869162, "loss": 0.1875, "step": 265280 }, { "epoch": 10.99, "grad_norm": 0.6484375, "learning_rate": 0.0003520271783430258, "loss": 0.1551, "step": 265290 }, { "epoch": 10.99, "grad_norm": 0.875, "learning_rate": 0.0003520172773071313, "loss": 0.2674, "step": 265300 }, { "epoch": 10.99, "grad_norm": 0.74609375, "learning_rate": 0.00035200737607925134, "loss": 0.1658, "step": 265310 }, { "epoch": 10.99, "grad_norm": 0.8359375, "learning_rate": 0.0003519974746594044, "loss": 0.1347, "step": 265320 }, { "epoch": 10.99, "grad_norm": 0.734375, "learning_rate": 0.00035198757304760926, "loss": 0.1951, "step": 265330 }, { "epoch": 10.99, "grad_norm": 1.125, "learning_rate": 0.0003519776712438845, "loss": 0.2176, "step": 265340 }, { "epoch": 10.99, "grad_norm": 1.9296875, "learning_rate": 0.0003519677692482488, "loss": 0.2108, "step": 265350 }, { "epoch": 10.99, "grad_norm": 1.0625, "learning_rate": 0.0003519578670607208, "loss": 0.2099, "step": 265360 }, { "epoch": 10.99, "grad_norm": 0.322265625, "learning_rate": 0.00035194796468131906, "loss": 0.1826, "step": 265370 }, { "epoch": 10.99, "grad_norm": 0.640625, "learning_rate": 0.0003519380621100623, "loss": 0.2153, "step": 265380 }, { "epoch": 10.99, "grad_norm": 0.74609375, "learning_rate": 0.0003519281593469691, "loss": 0.1622, "step": 265390 }, { "epoch": 10.99, "grad_norm": 0.68359375, "learning_rate": 0.0003519182563920581, "loss": 0.2288, "step": 265400 }, { "epoch": 10.99, "grad_norm": 0.3515625, "learning_rate": 0.00035190835324534796, "loss": 0.2263, "step": 265410 }, { "epoch": 10.99, "grad_norm": 3.109375, "learning_rate": 0.0003518984499068573, "loss": 0.1895, "step": 265420 }, { "epoch": 10.99, "grad_norm": 2.34375, "learning_rate": 0.00035188854637660473, "loss": 0.1348, "step": 265430 }, { "epoch": 10.99, "grad_norm": 1.453125, "learning_rate": 0.000351878642654609, "loss": 0.2091, "step": 265440 }, { "epoch": 10.99, "grad_norm": 0.41796875, "learning_rate": 0.0003518687387408886, "loss": 0.1671, "step": 265450 }, { "epoch": 11.0, "grad_norm": 0.64453125, "learning_rate": 0.0003518588346354622, "loss": 0.1682, "step": 265460 }, { "epoch": 11.0, "grad_norm": 1.671875, "learning_rate": 0.0003518489303383486, "loss": 0.2119, "step": 265470 }, { "epoch": 11.0, "grad_norm": 1.109375, "learning_rate": 0.0003518390258495662, "loss": 0.22, "step": 265480 }, { "epoch": 11.0, "grad_norm": 2.40625, "learning_rate": 0.0003518291211691338, "loss": 0.2039, "step": 265490 }, { "epoch": 11.0, "grad_norm": 0.515625, "learning_rate": 0.0003518192162970699, "loss": 0.1981, "step": 265500 }, { "epoch": 11.0, "grad_norm": 0.50390625, "learning_rate": 0.00035180931123339335, "loss": 0.1771, "step": 265510 }, { "epoch": 11.0, "grad_norm": 0.83203125, "learning_rate": 0.00035179940597812267, "loss": 0.1771, "step": 265520 }, { "epoch": 11.0, "grad_norm": 0.83203125, "learning_rate": 0.0003517895005312764, "loss": 0.1486, "step": 265530 }, { "epoch": 11.0, "grad_norm": 0.55078125, "learning_rate": 0.0003517795948928734, "loss": 0.1846, "step": 265540 }, { "epoch": 11.0, "grad_norm": 0.6015625, "learning_rate": 0.00035176968906293213, "loss": 0.1766, "step": 265550 }, { "epoch": 11.0, "grad_norm": 0.451171875, "learning_rate": 0.00035175978304147123, "loss": 0.2014, "step": 265560 }, { "epoch": 11.0, "grad_norm": 0.5703125, "learning_rate": 0.0003517498768285095, "loss": 0.1637, "step": 265570 }, { "epoch": 11.0, "grad_norm": 0.9375, "learning_rate": 0.0003517399704240655, "loss": 0.225, "step": 265580 }, { "epoch": 11.0, "grad_norm": 0.4453125, "learning_rate": 0.00035173006382815773, "loss": 0.1653, "step": 265590 }, { "epoch": 11.0, "grad_norm": 1.171875, "learning_rate": 0.0003517201570408052, "loss": 0.2512, "step": 265600 }, { "epoch": 11.0, "grad_norm": 0.61328125, "learning_rate": 0.00035171025006202613, "loss": 0.194, "step": 265610 }, { "epoch": 11.0, "grad_norm": 1.359375, "learning_rate": 0.00035170034289183936, "loss": 0.1708, "step": 265620 }, { "epoch": 11.0, "grad_norm": 0.3203125, "learning_rate": 0.00035169043553026364, "loss": 0.1912, "step": 265630 }, { "epoch": 11.0, "grad_norm": 0.7734375, "learning_rate": 0.00035168052797731735, "loss": 0.1872, "step": 265640 }, { "epoch": 11.0, "grad_norm": 0.318359375, "learning_rate": 0.00035167062023301946, "loss": 0.1992, "step": 265650 }, { "epoch": 11.0, "grad_norm": 1.296875, "learning_rate": 0.00035166071229738834, "loss": 0.1745, "step": 265660 }, { "epoch": 11.0, "grad_norm": 0.6171875, "learning_rate": 0.0003516508041704427, "loss": 0.1863, "step": 265670 }, { "epoch": 11.0, "grad_norm": 1.703125, "learning_rate": 0.0003516408958522013, "loss": 0.1846, "step": 265680 }, { "epoch": 11.0, "grad_norm": 1.09375, "learning_rate": 0.0003516309873426827, "loss": 0.1863, "step": 265690 }, { "epoch": 11.01, "grad_norm": 1.671875, "learning_rate": 0.00035162107864190553, "loss": 0.1664, "step": 265700 }, { "epoch": 11.01, "grad_norm": 0.92578125, "learning_rate": 0.00035161116974988856, "loss": 0.1976, "step": 265710 }, { "epoch": 11.01, "grad_norm": 0.63671875, "learning_rate": 0.00035160126066665015, "loss": 0.1794, "step": 265720 }, { "epoch": 11.01, "grad_norm": 1.1953125, "learning_rate": 0.00035159135139220933, "loss": 0.1595, "step": 265730 }, { "epoch": 11.01, "grad_norm": 0.53515625, "learning_rate": 0.00035158144192658447, "loss": 0.1909, "step": 265740 }, { "epoch": 11.01, "grad_norm": 2.453125, "learning_rate": 0.0003515715322697943, "loss": 0.178, "step": 265750 }, { "epoch": 11.01, "grad_norm": 0.8515625, "learning_rate": 0.0003515616224218575, "loss": 0.1822, "step": 265760 }, { "epoch": 11.01, "grad_norm": 0.76953125, "learning_rate": 0.0003515517123827926, "loss": 0.1863, "step": 265770 }, { "epoch": 11.01, "grad_norm": 0.80078125, "learning_rate": 0.0003515418021526185, "loss": 0.2057, "step": 265780 }, { "epoch": 11.01, "grad_norm": 2.28125, "learning_rate": 0.0003515318917313536, "loss": 0.1788, "step": 265790 }, { "epoch": 11.01, "grad_norm": 0.349609375, "learning_rate": 0.0003515219811190167, "loss": 0.177, "step": 265800 }, { "epoch": 11.01, "grad_norm": 0.5625, "learning_rate": 0.0003515120703156264, "loss": 0.1956, "step": 265810 }, { "epoch": 11.01, "grad_norm": 1.3515625, "learning_rate": 0.0003515021593212012, "loss": 0.1559, "step": 265820 }, { "epoch": 11.01, "grad_norm": 0.70703125, "learning_rate": 0.00035149224813576006, "loss": 0.1819, "step": 265830 }, { "epoch": 11.01, "grad_norm": 0.48046875, "learning_rate": 0.0003514823367593214, "loss": 0.1665, "step": 265840 }, { "epoch": 11.01, "grad_norm": 1.03125, "learning_rate": 0.0003514724251919039, "loss": 0.2022, "step": 265850 }, { "epoch": 11.01, "grad_norm": 1.203125, "learning_rate": 0.00035146251343352636, "loss": 0.1689, "step": 265860 }, { "epoch": 11.01, "grad_norm": 2.046875, "learning_rate": 0.0003514526014842073, "loss": 0.2262, "step": 265870 }, { "epoch": 11.01, "grad_norm": 0.494140625, "learning_rate": 0.00035144268934396536, "loss": 0.1678, "step": 265880 }, { "epoch": 11.01, "grad_norm": 0.7578125, "learning_rate": 0.00035143277701281926, "loss": 0.1838, "step": 265890 }, { "epoch": 11.01, "grad_norm": 0.62109375, "learning_rate": 0.0003514228644907876, "loss": 0.2154, "step": 265900 }, { "epoch": 11.01, "grad_norm": 0.7421875, "learning_rate": 0.0003514129517778891, "loss": 0.2358, "step": 265910 }, { "epoch": 11.01, "grad_norm": 0.88671875, "learning_rate": 0.00035140303887414237, "loss": 0.185, "step": 265920 }, { "epoch": 11.01, "grad_norm": 0.80859375, "learning_rate": 0.00035139312577956594, "loss": 0.1851, "step": 265930 }, { "epoch": 11.02, "grad_norm": 0.66796875, "learning_rate": 0.0003513832124941788, "loss": 0.2001, "step": 265940 }, { "epoch": 11.02, "grad_norm": 0.9453125, "learning_rate": 0.0003513732990179993, "loss": 0.1895, "step": 265950 }, { "epoch": 11.02, "grad_norm": 0.74609375, "learning_rate": 0.00035136338535104615, "loss": 0.2138, "step": 265960 }, { "epoch": 11.02, "grad_norm": 0.82421875, "learning_rate": 0.0003513534714933382, "loss": 0.2156, "step": 265970 }, { "epoch": 11.02, "grad_norm": 0.80078125, "learning_rate": 0.0003513435574448939, "loss": 0.1779, "step": 265980 }, { "epoch": 11.02, "grad_norm": 0.703125, "learning_rate": 0.0003513336432057319, "loss": 0.2055, "step": 265990 }, { "epoch": 11.02, "grad_norm": 1.59375, "learning_rate": 0.00035132372877587104, "loss": 0.1835, "step": 266000 }, { "epoch": 11.02, "grad_norm": 0.466796875, "learning_rate": 0.0003513138141553297, "loss": 0.2003, "step": 266010 }, { "epoch": 11.02, "grad_norm": 0.70703125, "learning_rate": 0.00035130389934412686, "loss": 0.2063, "step": 266020 }, { "epoch": 11.02, "grad_norm": 0.341796875, "learning_rate": 0.000351293984342281, "loss": 0.181, "step": 266030 }, { "epoch": 11.02, "grad_norm": 1.03125, "learning_rate": 0.0003512840691498107, "loss": 0.1597, "step": 266040 }, { "epoch": 11.02, "grad_norm": 0.73828125, "learning_rate": 0.0003512741537667348, "loss": 0.1511, "step": 266050 }, { "epoch": 11.02, "grad_norm": 1.859375, "learning_rate": 0.0003512642381930718, "loss": 0.2074, "step": 266060 }, { "epoch": 11.02, "grad_norm": 0.84765625, "learning_rate": 0.0003512543224288406, "loss": 0.1696, "step": 266070 }, { "epoch": 11.02, "grad_norm": 0.9296875, "learning_rate": 0.0003512444064740596, "loss": 0.1793, "step": 266080 }, { "epoch": 11.02, "grad_norm": 0.26171875, "learning_rate": 0.0003512344903287475, "loss": 0.1923, "step": 266090 }, { "epoch": 11.02, "grad_norm": 1.546875, "learning_rate": 0.00035122457399292315, "loss": 0.2095, "step": 266100 }, { "epoch": 11.02, "grad_norm": 0.72265625, "learning_rate": 0.00035121465746660495, "loss": 0.1791, "step": 266110 }, { "epoch": 11.02, "grad_norm": 0.6875, "learning_rate": 0.0003512047407498118, "loss": 0.2063, "step": 266120 }, { "epoch": 11.02, "grad_norm": 0.87109375, "learning_rate": 0.00035119482384256224, "loss": 0.2205, "step": 266130 }, { "epoch": 11.02, "grad_norm": 0.32421875, "learning_rate": 0.0003511849067448749, "loss": 0.1693, "step": 266140 }, { "epoch": 11.02, "grad_norm": 1.5234375, "learning_rate": 0.0003511749894567685, "loss": 0.1753, "step": 266150 }, { "epoch": 11.02, "grad_norm": 0.361328125, "learning_rate": 0.00035116507197826175, "loss": 0.2035, "step": 266160 }, { "epoch": 11.02, "grad_norm": 0.52734375, "learning_rate": 0.0003511551543093732, "loss": 0.197, "step": 266170 }, { "epoch": 11.03, "grad_norm": 0.7421875, "learning_rate": 0.00035114523645012166, "loss": 0.1612, "step": 266180 }, { "epoch": 11.03, "grad_norm": 1.359375, "learning_rate": 0.0003511353184005256, "loss": 0.2522, "step": 266190 }, { "epoch": 11.03, "grad_norm": 1.125, "learning_rate": 0.00035112540016060385, "loss": 0.1851, "step": 266200 }, { "epoch": 11.03, "grad_norm": 0.7734375, "learning_rate": 0.000351115481730375, "loss": 0.2044, "step": 266210 }, { "epoch": 11.03, "grad_norm": 0.82421875, "learning_rate": 0.00035110556310985773, "loss": 0.1935, "step": 266220 }, { "epoch": 11.03, "grad_norm": 1.0, "learning_rate": 0.0003510956442990707, "loss": 0.2111, "step": 266230 }, { "epoch": 11.03, "grad_norm": 1.1171875, "learning_rate": 0.00035108572529803263, "loss": 0.1905, "step": 266240 }, { "epoch": 11.03, "grad_norm": 0.734375, "learning_rate": 0.0003510758061067621, "loss": 0.1821, "step": 266250 }, { "epoch": 11.03, "grad_norm": 1.0859375, "learning_rate": 0.00035106588672527785, "loss": 0.1759, "step": 266260 }, { "epoch": 11.03, "grad_norm": 0.640625, "learning_rate": 0.0003510559671535985, "loss": 0.1753, "step": 266270 }, { "epoch": 11.03, "grad_norm": 0.52734375, "learning_rate": 0.00035104604739174275, "loss": 0.1849, "step": 266280 }, { "epoch": 11.03, "grad_norm": 0.9140625, "learning_rate": 0.0003510361274397292, "loss": 0.1837, "step": 266290 }, { "epoch": 11.03, "grad_norm": 1.9453125, "learning_rate": 0.0003510262072975766, "loss": 0.2209, "step": 266300 }, { "epoch": 11.03, "grad_norm": 0.59765625, "learning_rate": 0.0003510162869653036, "loss": 0.1632, "step": 266310 }, { "epoch": 11.03, "grad_norm": 0.68359375, "learning_rate": 0.00035100636644292885, "loss": 0.1544, "step": 266320 }, { "epoch": 11.03, "grad_norm": 0.92578125, "learning_rate": 0.000350996445730471, "loss": 0.1928, "step": 266330 }, { "epoch": 11.03, "grad_norm": 0.7109375, "learning_rate": 0.0003509865248279488, "loss": 0.1878, "step": 266340 }, { "epoch": 11.03, "grad_norm": 0.66796875, "learning_rate": 0.0003509766037353809, "loss": 0.2056, "step": 266350 }, { "epoch": 11.03, "grad_norm": 1.75, "learning_rate": 0.00035096668245278577, "loss": 0.2095, "step": 266360 }, { "epoch": 11.03, "grad_norm": 0.86328125, "learning_rate": 0.00035095676098018244, "loss": 0.2002, "step": 266370 }, { "epoch": 11.03, "grad_norm": 1.5703125, "learning_rate": 0.00035094683931758926, "loss": 0.2074, "step": 266380 }, { "epoch": 11.03, "grad_norm": 0.7734375, "learning_rate": 0.00035093691746502506, "loss": 0.1952, "step": 266390 }, { "epoch": 11.03, "grad_norm": 0.54296875, "learning_rate": 0.0003509269954225085, "loss": 0.1565, "step": 266400 }, { "epoch": 11.03, "grad_norm": 0.6484375, "learning_rate": 0.00035091707319005814, "loss": 0.1783, "step": 266410 }, { "epoch": 11.04, "grad_norm": 1.1171875, "learning_rate": 0.0003509071507676929, "loss": 0.1733, "step": 266420 }, { "epoch": 11.04, "grad_norm": 2.421875, "learning_rate": 0.00035089722815543126, "loss": 0.2062, "step": 266430 }, { "epoch": 11.04, "grad_norm": 0.8828125, "learning_rate": 0.00035088730535329184, "loss": 0.1907, "step": 266440 }, { "epoch": 11.04, "grad_norm": 0.9453125, "learning_rate": 0.0003508773823612935, "loss": 0.2086, "step": 266450 }, { "epoch": 11.04, "grad_norm": 1.0234375, "learning_rate": 0.0003508674591794548, "loss": 0.1526, "step": 266460 }, { "epoch": 11.04, "grad_norm": 0.48046875, "learning_rate": 0.0003508575358077944, "loss": 0.1587, "step": 266470 }, { "epoch": 11.04, "grad_norm": 1.125, "learning_rate": 0.000350847612246331, "loss": 0.1775, "step": 266480 }, { "epoch": 11.04, "grad_norm": 0.380859375, "learning_rate": 0.0003508376884950833, "loss": 0.1594, "step": 266490 }, { "epoch": 11.04, "grad_norm": 1.65625, "learning_rate": 0.00035082776455407, "loss": 0.1856, "step": 266500 }, { "epoch": 11.04, "grad_norm": 1.2109375, "learning_rate": 0.00035081784042330973, "loss": 0.173, "step": 266510 }, { "epoch": 11.04, "grad_norm": 0.34375, "learning_rate": 0.00035080791610282117, "loss": 0.1795, "step": 266520 }, { "epoch": 11.04, "grad_norm": 0.6015625, "learning_rate": 0.0003507979915926229, "loss": 0.1542, "step": 266530 }, { "epoch": 11.04, "grad_norm": 0.671875, "learning_rate": 0.00035078806689273385, "loss": 0.256, "step": 266540 }, { "epoch": 11.04, "grad_norm": 1.5703125, "learning_rate": 0.00035077814200317245, "loss": 0.2025, "step": 266550 }, { "epoch": 11.04, "grad_norm": 0.640625, "learning_rate": 0.0003507682169239574, "loss": 0.1625, "step": 266560 }, { "epoch": 11.04, "grad_norm": 0.419921875, "learning_rate": 0.00035075829165510757, "loss": 0.271, "step": 266570 }, { "epoch": 11.04, "grad_norm": 1.3203125, "learning_rate": 0.00035074836619664154, "loss": 0.1561, "step": 266580 }, { "epoch": 11.04, "grad_norm": 1.28125, "learning_rate": 0.0003507384405485779, "loss": 0.2212, "step": 266590 }, { "epoch": 11.04, "grad_norm": 0.34375, "learning_rate": 0.00035072851471093535, "loss": 0.219, "step": 266600 }, { "epoch": 11.04, "grad_norm": 1.0234375, "learning_rate": 0.0003507185886837327, "loss": 0.2247, "step": 266610 }, { "epoch": 11.04, "grad_norm": 1.765625, "learning_rate": 0.0003507086624669885, "loss": 0.2081, "step": 266620 }, { "epoch": 11.04, "grad_norm": 1.0625, "learning_rate": 0.00035069873606072147, "loss": 0.1855, "step": 266630 }, { "epoch": 11.04, "grad_norm": 0.5390625, "learning_rate": 0.0003506888094649503, "loss": 0.2035, "step": 266640 }, { "epoch": 11.04, "grad_norm": 0.953125, "learning_rate": 0.0003506788826796936, "loss": 0.1713, "step": 266650 }, { "epoch": 11.05, "grad_norm": 0.37890625, "learning_rate": 0.0003506689557049702, "loss": 0.1629, "step": 266660 }, { "epoch": 11.05, "grad_norm": 0.0966796875, "learning_rate": 0.0003506590285407987, "loss": 0.1702, "step": 266670 }, { "epoch": 11.05, "grad_norm": 1.3125, "learning_rate": 0.0003506491011871978, "loss": 0.1723, "step": 266680 }, { "epoch": 11.05, "grad_norm": 0.486328125, "learning_rate": 0.0003506391736441861, "loss": 0.193, "step": 266690 }, { "epoch": 11.05, "grad_norm": 0.65234375, "learning_rate": 0.00035062924591178234, "loss": 0.2232, "step": 266700 }, { "epoch": 11.05, "grad_norm": 0.87109375, "learning_rate": 0.00035061931799000524, "loss": 0.1802, "step": 266710 }, { "epoch": 11.05, "grad_norm": 0.6796875, "learning_rate": 0.0003506093898788734, "loss": 0.214, "step": 266720 }, { "epoch": 11.05, "grad_norm": 0.625, "learning_rate": 0.0003505994615784056, "loss": 0.2485, "step": 266730 }, { "epoch": 11.05, "grad_norm": 0.0167236328125, "learning_rate": 0.00035058953308862045, "loss": 0.168, "step": 266740 }, { "epoch": 11.05, "grad_norm": 0.765625, "learning_rate": 0.00035057960440953675, "loss": 0.2022, "step": 266750 }, { "epoch": 11.05, "grad_norm": 1.484375, "learning_rate": 0.000350569675541173, "loss": 0.2036, "step": 266760 }, { "epoch": 11.05, "grad_norm": 0.76171875, "learning_rate": 0.00035055974648354795, "loss": 0.1679, "step": 266770 }, { "epoch": 11.05, "grad_norm": 1.9921875, "learning_rate": 0.0003505498172366804, "loss": 0.2079, "step": 266780 }, { "epoch": 11.05, "grad_norm": 0.890625, "learning_rate": 0.00035053988780058893, "loss": 0.183, "step": 266790 }, { "epoch": 11.05, "grad_norm": 2.90625, "learning_rate": 0.00035052995817529223, "loss": 0.224, "step": 266800 }, { "epoch": 11.05, "grad_norm": 0.400390625, "learning_rate": 0.000350520028360809, "loss": 0.1827, "step": 266810 }, { "epoch": 11.05, "grad_norm": 0.478515625, "learning_rate": 0.000350510098357158, "loss": 0.1483, "step": 266820 }, { "epoch": 11.05, "grad_norm": 0.60546875, "learning_rate": 0.00035050016816435783, "loss": 0.1683, "step": 266830 }, { "epoch": 11.05, "grad_norm": 0.890625, "learning_rate": 0.0003504902377824271, "loss": 0.1813, "step": 266840 }, { "epoch": 11.05, "grad_norm": 0.9453125, "learning_rate": 0.0003504803072113846, "loss": 0.1725, "step": 266850 }, { "epoch": 11.05, "grad_norm": 1.7421875, "learning_rate": 0.0003504703764512491, "loss": 0.2148, "step": 266860 }, { "epoch": 11.05, "grad_norm": 2.90625, "learning_rate": 0.00035046044550203915, "loss": 0.199, "step": 266870 }, { "epoch": 11.05, "grad_norm": 0.60546875, "learning_rate": 0.00035045051436377356, "loss": 0.2239, "step": 266880 }, { "epoch": 11.05, "grad_norm": 1.234375, "learning_rate": 0.00035044058303647087, "loss": 0.2077, "step": 266890 }, { "epoch": 11.05, "grad_norm": 1.6015625, "learning_rate": 0.0003504306515201499, "loss": 0.1875, "step": 266900 }, { "epoch": 11.06, "grad_norm": 0.59765625, "learning_rate": 0.0003504207198148293, "loss": 0.1695, "step": 266910 }, { "epoch": 11.06, "grad_norm": 0.498046875, "learning_rate": 0.0003504107879205277, "loss": 0.1993, "step": 266920 }, { "epoch": 11.06, "grad_norm": 0.98046875, "learning_rate": 0.0003504008558372639, "loss": 0.2351, "step": 266930 }, { "epoch": 11.06, "grad_norm": 1.46875, "learning_rate": 0.0003503909235650565, "loss": 0.214, "step": 266940 }, { "epoch": 11.06, "grad_norm": 1.46875, "learning_rate": 0.00035038099110392415, "loss": 0.1961, "step": 266950 }, { "epoch": 11.06, "grad_norm": 0.61328125, "learning_rate": 0.0003503710584538857, "loss": 0.1866, "step": 266960 }, { "epoch": 11.06, "grad_norm": 0.458984375, "learning_rate": 0.0003503611256149598, "loss": 0.2002, "step": 266970 }, { "epoch": 11.06, "grad_norm": 1.2421875, "learning_rate": 0.00035035119258716496, "loss": 0.1991, "step": 266980 }, { "epoch": 11.06, "grad_norm": 1.09375, "learning_rate": 0.0003503412593705201, "loss": 0.2412, "step": 266990 }, { "epoch": 11.06, "grad_norm": 0.26953125, "learning_rate": 0.0003503313259650438, "loss": 0.1959, "step": 267000 }, { "epoch": 11.06, "grad_norm": 0.5703125, "learning_rate": 0.0003503213923707548, "loss": 0.2248, "step": 267010 }, { "epoch": 11.06, "grad_norm": 0.74609375, "learning_rate": 0.00035031145858767176, "loss": 0.1729, "step": 267020 }, { "epoch": 11.06, "grad_norm": 1.296875, "learning_rate": 0.00035030152461581343, "loss": 0.1615, "step": 267030 }, { "epoch": 11.06, "grad_norm": 1.25, "learning_rate": 0.00035029159045519843, "loss": 0.2465, "step": 267040 }, { "epoch": 11.06, "grad_norm": 0.7734375, "learning_rate": 0.0003502816561058455, "loss": 0.2144, "step": 267050 }, { "epoch": 11.06, "grad_norm": 0.455078125, "learning_rate": 0.00035027172156777334, "loss": 0.1704, "step": 267060 }, { "epoch": 11.06, "grad_norm": 1.015625, "learning_rate": 0.0003502617868410006, "loss": 0.1612, "step": 267070 }, { "epoch": 11.06, "grad_norm": 0.875, "learning_rate": 0.00035025185192554594, "loss": 0.2028, "step": 267080 }, { "epoch": 11.06, "grad_norm": 1.1875, "learning_rate": 0.0003502419168214282, "loss": 0.1731, "step": 267090 }, { "epoch": 11.06, "grad_norm": 0.47265625, "learning_rate": 0.00035023198152866596, "loss": 0.1444, "step": 267100 }, { "epoch": 11.06, "grad_norm": 0.46875, "learning_rate": 0.000350222046047278, "loss": 0.168, "step": 267110 }, { "epoch": 11.06, "grad_norm": 1.5625, "learning_rate": 0.00035021211037728297, "loss": 0.2245, "step": 267120 }, { "epoch": 11.06, "grad_norm": 1.0703125, "learning_rate": 0.0003502021745186995, "loss": 0.2062, "step": 267130 }, { "epoch": 11.06, "grad_norm": 1.328125, "learning_rate": 0.0003501922384715464, "loss": 0.175, "step": 267140 }, { "epoch": 11.07, "grad_norm": 0.57421875, "learning_rate": 0.00035018230223584235, "loss": 0.1936, "step": 267150 }, { "epoch": 11.07, "grad_norm": 0.431640625, "learning_rate": 0.000350172365811606, "loss": 0.2359, "step": 267160 }, { "epoch": 11.07, "grad_norm": 0.79296875, "learning_rate": 0.0003501624291988561, "loss": 0.1977, "step": 267170 }, { "epoch": 11.07, "grad_norm": 1.03125, "learning_rate": 0.0003501524923976113, "loss": 0.2085, "step": 267180 }, { "epoch": 11.07, "grad_norm": 0.62890625, "learning_rate": 0.0003501425554078903, "loss": 0.1725, "step": 267190 }, { "epoch": 11.07, "grad_norm": 2.3125, "learning_rate": 0.0003501326182297119, "loss": 0.1789, "step": 267200 }, { "epoch": 11.07, "grad_norm": 0.5546875, "learning_rate": 0.00035012268086309466, "loss": 0.1985, "step": 267210 }, { "epoch": 11.07, "grad_norm": 0.9921875, "learning_rate": 0.00035011274330805744, "loss": 0.2024, "step": 267220 }, { "epoch": 11.07, "grad_norm": 0.30078125, "learning_rate": 0.0003501028055646188, "loss": 0.1693, "step": 267230 }, { "epoch": 11.07, "grad_norm": 0.84765625, "learning_rate": 0.00035009286763279735, "loss": 0.2004, "step": 267240 }, { "epoch": 11.07, "grad_norm": 0.95703125, "learning_rate": 0.0003500829295126121, "loss": 0.2352, "step": 267250 }, { "epoch": 11.07, "grad_norm": 0.69921875, "learning_rate": 0.00035007299120408153, "loss": 0.1824, "step": 267260 }, { "epoch": 11.07, "grad_norm": 0.859375, "learning_rate": 0.0003500630527072244, "loss": 0.1999, "step": 267270 }, { "epoch": 11.07, "grad_norm": 0.6796875, "learning_rate": 0.00035005311402205944, "loss": 0.2267, "step": 267280 }, { "epoch": 11.07, "grad_norm": 0.9296875, "learning_rate": 0.0003500431751486053, "loss": 0.2489, "step": 267290 }, { "epoch": 11.07, "grad_norm": 0.6953125, "learning_rate": 0.0003500332360868807, "loss": 0.2087, "step": 267300 }, { "epoch": 11.07, "grad_norm": 0.6328125, "learning_rate": 0.00035002329683690443, "loss": 0.216, "step": 267310 }, { "epoch": 11.07, "grad_norm": 0.6484375, "learning_rate": 0.000350013357398695, "loss": 0.1894, "step": 267320 }, { "epoch": 11.07, "grad_norm": 0.7734375, "learning_rate": 0.0003500034177722713, "loss": 0.1978, "step": 267330 }, { "epoch": 11.07, "grad_norm": 1.953125, "learning_rate": 0.0003499934779576519, "loss": 0.2123, "step": 267340 }, { "epoch": 11.07, "grad_norm": 0.62109375, "learning_rate": 0.00034998353795485563, "loss": 0.1747, "step": 267350 }, { "epoch": 11.07, "grad_norm": 1.2734375, "learning_rate": 0.00034997359776390115, "loss": 0.1704, "step": 267360 }, { "epoch": 11.07, "grad_norm": 0.6484375, "learning_rate": 0.00034996365738480714, "loss": 0.1515, "step": 267370 }, { "epoch": 11.07, "grad_norm": 1.0, "learning_rate": 0.0003499537168175923, "loss": 0.2251, "step": 267380 }, { "epoch": 11.08, "grad_norm": 0.625, "learning_rate": 0.0003499437760622754, "loss": 0.2373, "step": 267390 }, { "epoch": 11.08, "grad_norm": 1.2734375, "learning_rate": 0.000349933835118875, "loss": 0.2359, "step": 267400 }, { "epoch": 11.08, "grad_norm": 0.85546875, "learning_rate": 0.0003499238939874101, "loss": 0.2106, "step": 267410 }, { "epoch": 11.08, "grad_norm": 0.79296875, "learning_rate": 0.00034991395266789903, "loss": 0.209, "step": 267420 }, { "epoch": 11.08, "grad_norm": 0.8046875, "learning_rate": 0.0003499040111603608, "loss": 0.1685, "step": 267430 }, { "epoch": 11.08, "grad_norm": 1.0625, "learning_rate": 0.00034989406946481406, "loss": 0.2022, "step": 267440 }, { "epoch": 11.08, "grad_norm": 1.25, "learning_rate": 0.00034988412758127737, "loss": 0.1922, "step": 267450 }, { "epoch": 11.08, "grad_norm": 0.26953125, "learning_rate": 0.0003498741855097695, "loss": 0.1623, "step": 267460 }, { "epoch": 11.08, "grad_norm": 0.70703125, "learning_rate": 0.00034986424325030935, "loss": 0.2072, "step": 267470 }, { "epoch": 11.08, "grad_norm": 0.59375, "learning_rate": 0.0003498543008029153, "loss": 0.1688, "step": 267480 }, { "epoch": 11.08, "grad_norm": 0.6015625, "learning_rate": 0.0003498443581676064, "loss": 0.1512, "step": 267490 }, { "epoch": 11.08, "grad_norm": 0.96875, "learning_rate": 0.00034983441534440106, "loss": 0.1599, "step": 267500 }, { "epoch": 11.08, "grad_norm": 1.1328125, "learning_rate": 0.0003498244723333183, "loss": 0.2161, "step": 267510 }, { "epoch": 11.08, "grad_norm": 0.0002460479736328125, "learning_rate": 0.00034981452913437655, "loss": 0.1546, "step": 267520 }, { "epoch": 11.08, "grad_norm": 0.84765625, "learning_rate": 0.0003498045857475945, "loss": 0.179, "step": 267530 }, { "epoch": 11.08, "grad_norm": 0.298828125, "learning_rate": 0.0003497946421729913, "loss": 0.2043, "step": 267540 }, { "epoch": 11.08, "grad_norm": 0.625, "learning_rate": 0.00034978469841058523, "loss": 0.1873, "step": 267550 }, { "epoch": 11.08, "grad_norm": 0.640625, "learning_rate": 0.00034977475446039505, "loss": 0.1826, "step": 267560 }, { "epoch": 11.08, "grad_norm": 0.59375, "learning_rate": 0.00034976481032243965, "loss": 0.1548, "step": 267570 }, { "epoch": 11.08, "grad_norm": 0.7109375, "learning_rate": 0.0003497548659967376, "loss": 0.2038, "step": 267580 }, { "epoch": 11.08, "grad_norm": 0.81640625, "learning_rate": 0.00034974492148330766, "loss": 0.1864, "step": 267590 }, { "epoch": 11.08, "grad_norm": 0.68359375, "learning_rate": 0.00034973497678216864, "loss": 0.1676, "step": 267600 }, { "epoch": 11.08, "grad_norm": 0.80078125, "learning_rate": 0.00034972503189333905, "loss": 0.1712, "step": 267610 }, { "epoch": 11.08, "grad_norm": 0.51171875, "learning_rate": 0.0003497150868168378, "loss": 0.2033, "step": 267620 }, { "epoch": 11.09, "grad_norm": 2.40625, "learning_rate": 0.00034970514155268343, "loss": 0.1966, "step": 267630 }, { "epoch": 11.09, "grad_norm": 0.3203125, "learning_rate": 0.0003496951961008948, "loss": 0.193, "step": 267640 }, { "epoch": 11.09, "grad_norm": 2.625, "learning_rate": 0.0003496852504614907, "loss": 0.177, "step": 267650 }, { "epoch": 11.09, "grad_norm": 1.109375, "learning_rate": 0.00034967530463448954, "loss": 0.1827, "step": 267660 }, { "epoch": 11.09, "grad_norm": 0.63671875, "learning_rate": 0.00034966535861991033, "loss": 0.187, "step": 267670 }, { "epoch": 11.09, "grad_norm": 0.7421875, "learning_rate": 0.00034965541241777163, "loss": 0.1997, "step": 267680 }, { "epoch": 11.09, "grad_norm": 0.1962890625, "learning_rate": 0.00034964546602809214, "loss": 0.2083, "step": 267690 }, { "epoch": 11.09, "grad_norm": 0.90234375, "learning_rate": 0.00034963551945089077, "loss": 0.1968, "step": 267700 }, { "epoch": 11.09, "grad_norm": 0.337890625, "learning_rate": 0.0003496255726861861, "loss": 0.1871, "step": 267710 }, { "epoch": 11.09, "grad_norm": 0.515625, "learning_rate": 0.0003496156257339968, "loss": 0.162, "step": 267720 }, { "epoch": 11.09, "grad_norm": 1.1015625, "learning_rate": 0.0003496056785943417, "loss": 0.2484, "step": 267730 }, { "epoch": 11.09, "grad_norm": 0.703125, "learning_rate": 0.00034959573126723943, "loss": 0.1714, "step": 267740 }, { "epoch": 11.09, "grad_norm": 0.62109375, "learning_rate": 0.00034958578375270875, "loss": 0.2492, "step": 267750 }, { "epoch": 11.09, "grad_norm": 0.82421875, "learning_rate": 0.00034957583605076836, "loss": 0.174, "step": 267760 }, { "epoch": 11.09, "grad_norm": 1.1796875, "learning_rate": 0.00034956588816143706, "loss": 0.1835, "step": 267770 }, { "epoch": 11.09, "grad_norm": 1.1875, "learning_rate": 0.0003495559400847335, "loss": 0.1758, "step": 267780 }, { "epoch": 11.09, "grad_norm": 0.404296875, "learning_rate": 0.0003495459918206764, "loss": 0.1437, "step": 267790 }, { "epoch": 11.09, "grad_norm": 0.8125, "learning_rate": 0.00034953604336928446, "loss": 0.1733, "step": 267800 }, { "epoch": 11.09, "grad_norm": 1.34375, "learning_rate": 0.0003495260947305765, "loss": 0.177, "step": 267810 }, { "epoch": 11.09, "grad_norm": 0.9375, "learning_rate": 0.00034951614590457116, "loss": 0.2065, "step": 267820 }, { "epoch": 11.09, "grad_norm": 0.5078125, "learning_rate": 0.00034950619689128715, "loss": 0.1789, "step": 267830 }, { "epoch": 11.09, "grad_norm": 0.5859375, "learning_rate": 0.0003494962476907432, "loss": 0.1663, "step": 267840 }, { "epoch": 11.09, "grad_norm": 0.48046875, "learning_rate": 0.0003494862983029581, "loss": 0.1837, "step": 267850 }, { "epoch": 11.09, "grad_norm": 0.796875, "learning_rate": 0.00034947634872795053, "loss": 0.2082, "step": 267860 }, { "epoch": 11.1, "grad_norm": 1.1171875, "learning_rate": 0.00034946639896573923, "loss": 0.1797, "step": 267870 }, { "epoch": 11.1, "grad_norm": 0.50390625, "learning_rate": 0.0003494564490163429, "loss": 0.2355, "step": 267880 }, { "epoch": 11.1, "grad_norm": 0.69921875, "learning_rate": 0.00034944649887978026, "loss": 0.207, "step": 267890 }, { "epoch": 11.1, "grad_norm": 0.8359375, "learning_rate": 0.00034943654855607004, "loss": 0.171, "step": 267900 }, { "epoch": 11.1, "grad_norm": 0.7890625, "learning_rate": 0.00034942659804523097, "loss": 0.2101, "step": 267910 }, { "epoch": 11.1, "grad_norm": 1.0390625, "learning_rate": 0.00034941664734728175, "loss": 0.1771, "step": 267920 }, { "epoch": 11.1, "grad_norm": 0.6875, "learning_rate": 0.0003494066964622412, "loss": 0.2037, "step": 267930 }, { "epoch": 11.1, "grad_norm": 0.6875, "learning_rate": 0.000349396745390128, "loss": 0.1934, "step": 267940 }, { "epoch": 11.1, "grad_norm": 1.6171875, "learning_rate": 0.00034938679413096073, "loss": 0.2023, "step": 267950 }, { "epoch": 11.1, "grad_norm": 0.625, "learning_rate": 0.0003493768426847584, "loss": 0.2203, "step": 267960 }, { "epoch": 11.1, "grad_norm": 0.7109375, "learning_rate": 0.0003493668910515395, "loss": 0.1989, "step": 267970 }, { "epoch": 11.1, "grad_norm": 0.90234375, "learning_rate": 0.00034935693923132286, "loss": 0.1608, "step": 267980 }, { "epoch": 11.1, "grad_norm": 0.953125, "learning_rate": 0.00034934698722412715, "loss": 0.2011, "step": 267990 }, { "epoch": 11.1, "grad_norm": 0.921875, "learning_rate": 0.00034933703502997124, "loss": 0.2051, "step": 268000 }, { "epoch": 11.1, "grad_norm": 0.3515625, "learning_rate": 0.00034932708264887366, "loss": 0.1579, "step": 268010 }, { "epoch": 11.1, "grad_norm": 0.625, "learning_rate": 0.0003493171300808533, "loss": 0.219, "step": 268020 }, { "epoch": 11.1, "grad_norm": 0.99609375, "learning_rate": 0.00034930717732592877, "loss": 0.1734, "step": 268030 }, { "epoch": 11.1, "grad_norm": 0.96875, "learning_rate": 0.00034929722438411886, "loss": 0.1674, "step": 268040 }, { "epoch": 11.1, "grad_norm": 0.859375, "learning_rate": 0.00034928727125544236, "loss": 0.2017, "step": 268050 }, { "epoch": 11.1, "grad_norm": 1.5625, "learning_rate": 0.0003492773179399179, "loss": 0.2245, "step": 268060 }, { "epoch": 11.1, "grad_norm": 0.77734375, "learning_rate": 0.0003492673644375642, "loss": 0.1997, "step": 268070 }, { "epoch": 11.1, "grad_norm": 0.8515625, "learning_rate": 0.00034925741074840014, "loss": 0.1906, "step": 268080 }, { "epoch": 11.1, "grad_norm": 0.62109375, "learning_rate": 0.0003492474568724443, "loss": 0.2305, "step": 268090 }, { "epoch": 11.1, "grad_norm": 1.09375, "learning_rate": 0.0003492375028097155, "loss": 0.1831, "step": 268100 }, { "epoch": 11.11, "grad_norm": 0.51171875, "learning_rate": 0.00034922754856023236, "loss": 0.1938, "step": 268110 }, { "epoch": 11.11, "grad_norm": 1.421875, "learning_rate": 0.00034921759412401374, "loss": 0.2046, "step": 268120 }, { "epoch": 11.11, "grad_norm": 0.55078125, "learning_rate": 0.00034920763950107826, "loss": 0.249, "step": 268130 }, { "epoch": 11.11, "grad_norm": 1.234375, "learning_rate": 0.00034919768469144486, "loss": 0.2042, "step": 268140 }, { "epoch": 11.11, "grad_norm": 0.8046875, "learning_rate": 0.000349187729695132, "loss": 0.2288, "step": 268150 }, { "epoch": 11.11, "grad_norm": 1.6640625, "learning_rate": 0.0003491777745121586, "loss": 0.1953, "step": 268160 }, { "epoch": 11.11, "grad_norm": 0.3984375, "learning_rate": 0.00034916781914254324, "loss": 0.1755, "step": 268170 }, { "epoch": 11.11, "grad_norm": 0.640625, "learning_rate": 0.00034915786358630485, "loss": 0.1933, "step": 268180 }, { "epoch": 11.11, "grad_norm": 1.3125, "learning_rate": 0.00034914790784346215, "loss": 0.2249, "step": 268190 }, { "epoch": 11.11, "grad_norm": 0.52734375, "learning_rate": 0.0003491379519140336, "loss": 0.2024, "step": 268200 }, { "epoch": 11.11, "grad_norm": 0.6015625, "learning_rate": 0.00034912799579803823, "loss": 0.2087, "step": 268210 }, { "epoch": 11.11, "grad_norm": 1.578125, "learning_rate": 0.00034911803949549475, "loss": 0.2239, "step": 268220 }, { "epoch": 11.11, "grad_norm": 0.349609375, "learning_rate": 0.0003491080830064217, "loss": 0.2083, "step": 268230 }, { "epoch": 11.11, "grad_norm": 0.396484375, "learning_rate": 0.000349098126330838, "loss": 0.1884, "step": 268240 }, { "epoch": 11.11, "grad_norm": 1.0234375, "learning_rate": 0.0003490881694687623, "loss": 0.2039, "step": 268250 }, { "epoch": 11.11, "grad_norm": 1.046875, "learning_rate": 0.00034907821242021337, "loss": 0.2262, "step": 268260 }, { "epoch": 11.11, "grad_norm": 0.890625, "learning_rate": 0.00034906825518521, "loss": 0.2335, "step": 268270 }, { "epoch": 11.11, "grad_norm": 1.0, "learning_rate": 0.0003490582977637708, "loss": 0.179, "step": 268280 }, { "epoch": 11.11, "grad_norm": 1.265625, "learning_rate": 0.0003490483401559146, "loss": 0.2107, "step": 268290 }, { "epoch": 11.11, "grad_norm": 0.640625, "learning_rate": 0.0003490383823616602, "loss": 0.1949, "step": 268300 }, { "epoch": 11.11, "grad_norm": 0.85546875, "learning_rate": 0.0003490284243810262, "loss": 0.2022, "step": 268310 }, { "epoch": 11.11, "grad_norm": 0.578125, "learning_rate": 0.00034901846621403134, "loss": 0.2626, "step": 268320 }, { "epoch": 11.11, "grad_norm": 1.015625, "learning_rate": 0.00034900850786069446, "loss": 0.1489, "step": 268330 }, { "epoch": 11.11, "grad_norm": 0.7734375, "learning_rate": 0.00034899854932103424, "loss": 0.1603, "step": 268340 }, { "epoch": 11.12, "grad_norm": 0.44140625, "learning_rate": 0.0003489885905950695, "loss": 0.1892, "step": 268350 }, { "epoch": 11.12, "grad_norm": 0.85546875, "learning_rate": 0.0003489786316828189, "loss": 0.2036, "step": 268360 }, { "epoch": 11.12, "grad_norm": 2.265625, "learning_rate": 0.0003489686725843012, "loss": 0.1965, "step": 268370 }, { "epoch": 11.12, "grad_norm": 0.57421875, "learning_rate": 0.0003489587132995352, "loss": 0.2139, "step": 268380 }, { "epoch": 11.12, "grad_norm": 0.60546875, "learning_rate": 0.00034894875382853943, "loss": 0.1518, "step": 268390 }, { "epoch": 11.12, "grad_norm": 0.70703125, "learning_rate": 0.0003489387941713329, "loss": 0.2136, "step": 268400 }, { "epoch": 11.12, "grad_norm": 0.68359375, "learning_rate": 0.00034892883432793425, "loss": 0.2567, "step": 268410 }, { "epoch": 11.12, "grad_norm": 0.56640625, "learning_rate": 0.00034891887429836215, "loss": 0.2233, "step": 268420 }, { "epoch": 11.12, "grad_norm": 0.59375, "learning_rate": 0.0003489089140826355, "loss": 0.215, "step": 268430 }, { "epoch": 11.12, "grad_norm": 0.87890625, "learning_rate": 0.00034889895368077296, "loss": 0.1757, "step": 268440 }, { "epoch": 11.12, "grad_norm": 1.0390625, "learning_rate": 0.0003488889930927932, "loss": 0.1737, "step": 268450 }, { "epoch": 11.12, "grad_norm": 0.87109375, "learning_rate": 0.0003488790323187151, "loss": 0.1622, "step": 268460 }, { "epoch": 11.12, "grad_norm": 0.98046875, "learning_rate": 0.0003488690713585572, "loss": 0.2053, "step": 268470 }, { "epoch": 11.12, "grad_norm": 0.96875, "learning_rate": 0.0003488591102123386, "loss": 0.2176, "step": 268480 }, { "epoch": 11.12, "grad_norm": 1.921875, "learning_rate": 0.0003488491488800777, "loss": 0.1585, "step": 268490 }, { "epoch": 11.12, "grad_norm": 0.6640625, "learning_rate": 0.00034883918736179333, "loss": 0.24, "step": 268500 }, { "epoch": 11.12, "grad_norm": 1.1640625, "learning_rate": 0.0003488292256575044, "loss": 0.1426, "step": 268510 }, { "epoch": 11.12, "grad_norm": 1.1640625, "learning_rate": 0.0003488192637672294, "loss": 0.2016, "step": 268520 }, { "epoch": 11.12, "grad_norm": 0.9453125, "learning_rate": 0.00034880930169098735, "loss": 0.2115, "step": 268530 }, { "epoch": 11.12, "grad_norm": 0.95703125, "learning_rate": 0.0003487993394287968, "loss": 0.2023, "step": 268540 }, { "epoch": 11.12, "grad_norm": 1.0703125, "learning_rate": 0.0003487893769806766, "loss": 0.2068, "step": 268550 }, { "epoch": 11.12, "grad_norm": 1.2890625, "learning_rate": 0.00034877941434664544, "loss": 0.2113, "step": 268560 }, { "epoch": 11.12, "grad_norm": 0.318359375, "learning_rate": 0.00034876945152672207, "loss": 0.231, "step": 268570 }, { "epoch": 11.12, "grad_norm": 1.0234375, "learning_rate": 0.0003487594885209253, "loss": 0.1855, "step": 268580 }, { "epoch": 11.12, "grad_norm": 0.361328125, "learning_rate": 0.0003487495253292738, "loss": 0.223, "step": 268590 }, { "epoch": 11.13, "grad_norm": 0.77734375, "learning_rate": 0.00034873956195178636, "loss": 0.1867, "step": 268600 }, { "epoch": 11.13, "grad_norm": 1.6796875, "learning_rate": 0.00034872959838848176, "loss": 0.1808, "step": 268610 }, { "epoch": 11.13, "grad_norm": 0.54296875, "learning_rate": 0.00034871963463937875, "loss": 0.1492, "step": 268620 }, { "epoch": 11.13, "grad_norm": 0.51171875, "learning_rate": 0.00034870967070449595, "loss": 0.2218, "step": 268630 }, { "epoch": 11.13, "grad_norm": 1.46875, "learning_rate": 0.0003486997065838523, "loss": 0.1903, "step": 268640 }, { "epoch": 11.13, "grad_norm": 1.2109375, "learning_rate": 0.00034868974227746633, "loss": 0.2152, "step": 268650 }, { "epoch": 11.13, "grad_norm": 0.796875, "learning_rate": 0.000348679777785357, "loss": 0.1825, "step": 268660 }, { "epoch": 11.13, "grad_norm": 0.75, "learning_rate": 0.00034866981310754307, "loss": 0.1971, "step": 268670 }, { "epoch": 11.13, "grad_norm": 0.4453125, "learning_rate": 0.00034865984824404306, "loss": 0.1758, "step": 268680 }, { "epoch": 11.13, "grad_norm": 1.65625, "learning_rate": 0.000348649883194876, "loss": 0.209, "step": 268690 }, { "epoch": 11.13, "grad_norm": 0.59765625, "learning_rate": 0.0003486399179600605, "loss": 0.1208, "step": 268700 }, { "epoch": 11.13, "grad_norm": 0.5, "learning_rate": 0.0003486299525396152, "loss": 0.1597, "step": 268710 }, { "epoch": 11.13, "grad_norm": 0.4921875, "learning_rate": 0.0003486199869335591, "loss": 0.213, "step": 268720 }, { "epoch": 11.13, "grad_norm": 0.55859375, "learning_rate": 0.00034861002114191077, "loss": 0.1668, "step": 268730 }, { "epoch": 11.13, "grad_norm": 0.94140625, "learning_rate": 0.0003486000551646891, "loss": 0.1538, "step": 268740 }, { "epoch": 11.13, "grad_norm": 0.67578125, "learning_rate": 0.00034859008900191274, "loss": 0.1904, "step": 268750 }, { "epoch": 11.13, "grad_norm": 0.70703125, "learning_rate": 0.0003485801226536004, "loss": 0.1739, "step": 268760 }, { "epoch": 11.13, "grad_norm": 0.44140625, "learning_rate": 0.000348570156119771, "loss": 0.164, "step": 268770 }, { "epoch": 11.13, "grad_norm": 0.220703125, "learning_rate": 0.0003485601894004433, "loss": 0.1706, "step": 268780 }, { "epoch": 11.13, "grad_norm": 0.92578125, "learning_rate": 0.00034855022249563573, "loss": 0.1684, "step": 268790 }, { "epoch": 11.13, "grad_norm": 1.3203125, "learning_rate": 0.0003485402554053675, "loss": 0.2436, "step": 268800 }, { "epoch": 11.13, "grad_norm": 0.796875, "learning_rate": 0.000348530288129657, "loss": 0.1922, "step": 268810 }, { "epoch": 11.13, "grad_norm": 4.46875, "learning_rate": 0.00034852032066852326, "loss": 0.1758, "step": 268820 }, { "epoch": 11.13, "grad_norm": 0.96875, "learning_rate": 0.00034851035302198486, "loss": 0.1958, "step": 268830 }, { "epoch": 11.14, "grad_norm": 0.380859375, "learning_rate": 0.0003485003851900606, "loss": 0.1851, "step": 268840 }, { "epoch": 11.14, "grad_norm": 0.609375, "learning_rate": 0.0003484904171727692, "loss": 0.1798, "step": 268850 }, { "epoch": 11.14, "grad_norm": 0.6640625, "learning_rate": 0.0003484804489701296, "loss": 0.2079, "step": 268860 }, { "epoch": 11.14, "grad_norm": 0.474609375, "learning_rate": 0.00034847048058216036, "loss": 0.1649, "step": 268870 }, { "epoch": 11.14, "grad_norm": 0.79296875, "learning_rate": 0.0003484605120088803, "loss": 0.1831, "step": 268880 }, { "epoch": 11.14, "grad_norm": 0.49609375, "learning_rate": 0.0003484505432503082, "loss": 0.1959, "step": 268890 }, { "epoch": 11.14, "grad_norm": 1.625, "learning_rate": 0.0003484405743064628, "loss": 0.1753, "step": 268900 }, { "epoch": 11.14, "grad_norm": 0.59765625, "learning_rate": 0.0003484306051773629, "loss": 0.2171, "step": 268910 }, { "epoch": 11.14, "grad_norm": 0.859375, "learning_rate": 0.0003484206358630272, "loss": 0.1814, "step": 268920 }, { "epoch": 11.14, "grad_norm": 1.296875, "learning_rate": 0.0003484106663634745, "loss": 0.2098, "step": 268930 }, { "epoch": 11.14, "grad_norm": 0.66015625, "learning_rate": 0.0003484006966787236, "loss": 0.2143, "step": 268940 }, { "epoch": 11.14, "grad_norm": 0.79296875, "learning_rate": 0.0003483907268087931, "loss": 0.1788, "step": 268950 }, { "epoch": 11.14, "grad_norm": 1.8828125, "learning_rate": 0.00034838075675370204, "loss": 0.1923, "step": 268960 }, { "epoch": 11.14, "grad_norm": 0.52734375, "learning_rate": 0.0003483707865134689, "loss": 0.1921, "step": 268970 }, { "epoch": 11.14, "grad_norm": 0.6796875, "learning_rate": 0.0003483608160881126, "loss": 0.2213, "step": 268980 }, { "epoch": 11.14, "grad_norm": 1.0234375, "learning_rate": 0.0003483508454776519, "loss": 0.2306, "step": 268990 }, { "epoch": 11.14, "grad_norm": 0.44921875, "learning_rate": 0.00034834087468210543, "loss": 0.1806, "step": 269000 }, { "epoch": 11.14, "grad_norm": 0.765625, "learning_rate": 0.00034833090370149216, "loss": 0.1632, "step": 269010 }, { "epoch": 11.14, "grad_norm": 1.40625, "learning_rate": 0.00034832093253583067, "loss": 0.2007, "step": 269020 }, { "epoch": 11.14, "grad_norm": 0.703125, "learning_rate": 0.00034831096118513984, "loss": 0.1673, "step": 269030 }, { "epoch": 11.14, "grad_norm": 0.54296875, "learning_rate": 0.00034830098964943847, "loss": 0.2554, "step": 269040 }, { "epoch": 11.14, "grad_norm": 0.63671875, "learning_rate": 0.0003482910179287451, "loss": 0.2088, "step": 269050 }, { "epoch": 11.14, "grad_norm": 0.94921875, "learning_rate": 0.0003482810460230788, "loss": 0.2377, "step": 269060 }, { "epoch": 11.14, "grad_norm": 0.65234375, "learning_rate": 0.0003482710739324581, "loss": 0.2055, "step": 269070 }, { "epoch": 11.15, "grad_norm": 0.78125, "learning_rate": 0.0003482611016569018, "loss": 0.2109, "step": 269080 }, { "epoch": 11.15, "grad_norm": 0.63671875, "learning_rate": 0.00034825112919642883, "loss": 0.2163, "step": 269090 }, { "epoch": 11.15, "grad_norm": 0.6796875, "learning_rate": 0.00034824115655105776, "loss": 0.1804, "step": 269100 }, { "epoch": 11.15, "grad_norm": 0.6796875, "learning_rate": 0.00034823118372080745, "loss": 0.2547, "step": 269110 }, { "epoch": 11.15, "grad_norm": 1.9375, "learning_rate": 0.0003482212107056967, "loss": 0.1877, "step": 269120 }, { "epoch": 11.15, "grad_norm": 1.0234375, "learning_rate": 0.00034821123750574426, "loss": 0.1708, "step": 269130 }, { "epoch": 11.15, "grad_norm": 0.423828125, "learning_rate": 0.0003482012641209689, "loss": 0.2001, "step": 269140 }, { "epoch": 11.15, "grad_norm": 0.71484375, "learning_rate": 0.0003481912905513892, "loss": 0.1747, "step": 269150 }, { "epoch": 11.15, "grad_norm": 1.03125, "learning_rate": 0.00034818131679702425, "loss": 0.1636, "step": 269160 }, { "epoch": 11.15, "grad_norm": 0.92578125, "learning_rate": 0.00034817134285789267, "loss": 0.1862, "step": 269170 }, { "epoch": 11.15, "grad_norm": 0.57421875, "learning_rate": 0.0003481613687340131, "loss": 0.19, "step": 269180 }, { "epoch": 11.15, "grad_norm": 0.875, "learning_rate": 0.0003481513944254045, "loss": 0.2401, "step": 269190 }, { "epoch": 11.15, "grad_norm": 0.48828125, "learning_rate": 0.00034814141993208563, "loss": 0.1573, "step": 269200 }, { "epoch": 11.15, "grad_norm": 1.2734375, "learning_rate": 0.00034813144525407515, "loss": 0.2016, "step": 269210 }, { "epoch": 11.15, "grad_norm": 0.50390625, "learning_rate": 0.00034812147039139186, "loss": 0.1852, "step": 269220 }, { "epoch": 11.15, "grad_norm": 1.234375, "learning_rate": 0.00034811149534405463, "loss": 0.1568, "step": 269230 }, { "epoch": 11.15, "grad_norm": 0.9609375, "learning_rate": 0.00034810152011208206, "loss": 0.2078, "step": 269240 }, { "epoch": 11.15, "grad_norm": 0.6171875, "learning_rate": 0.00034809154469549316, "loss": 0.1834, "step": 269250 }, { "epoch": 11.15, "grad_norm": 0.62890625, "learning_rate": 0.0003480815690943065, "loss": 0.2332, "step": 269260 }, { "epoch": 11.15, "grad_norm": 1.1953125, "learning_rate": 0.0003480715933085409, "loss": 0.1842, "step": 269270 }, { "epoch": 11.15, "grad_norm": 0.94140625, "learning_rate": 0.0003480616173382151, "loss": 0.2328, "step": 269280 }, { "epoch": 11.15, "grad_norm": 1.203125, "learning_rate": 0.0003480516411833481, "loss": 0.2287, "step": 269290 }, { "epoch": 11.15, "grad_norm": 0.6640625, "learning_rate": 0.0003480416648439583, "loss": 0.236, "step": 269300 }, { "epoch": 11.15, "grad_norm": 0.7265625, "learning_rate": 0.00034803168832006485, "loss": 0.1855, "step": 269310 }, { "epoch": 11.16, "grad_norm": 1.2578125, "learning_rate": 0.00034802171161168614, "loss": 0.1918, "step": 269320 }, { "epoch": 11.16, "grad_norm": 0.265625, "learning_rate": 0.0003480117347188414, "loss": 0.1788, "step": 269330 }, { "epoch": 11.16, "grad_norm": 0.50390625, "learning_rate": 0.000348001757641549, "loss": 0.1772, "step": 269340 }, { "epoch": 11.16, "grad_norm": 0.9453125, "learning_rate": 0.00034799178037982795, "loss": 0.2064, "step": 269350 }, { "epoch": 11.16, "grad_norm": 2.640625, "learning_rate": 0.0003479818029336969, "loss": 0.1989, "step": 269360 }, { "epoch": 11.16, "grad_norm": 1.0625, "learning_rate": 0.00034797182530317463, "loss": 0.2036, "step": 269370 }, { "epoch": 11.16, "grad_norm": 0.81640625, "learning_rate": 0.00034796184748828006, "loss": 0.1865, "step": 269380 }, { "epoch": 11.16, "grad_norm": 1.3203125, "learning_rate": 0.00034795186948903187, "loss": 0.2056, "step": 269390 }, { "epoch": 11.16, "grad_norm": 2.734375, "learning_rate": 0.00034794189130544877, "loss": 0.2303, "step": 269400 }, { "epoch": 11.16, "grad_norm": 0.5546875, "learning_rate": 0.00034793191293754967, "loss": 0.1692, "step": 269410 }, { "epoch": 11.16, "grad_norm": 0.64453125, "learning_rate": 0.0003479219343853532, "loss": 0.1725, "step": 269420 }, { "epoch": 11.16, "grad_norm": 2.984375, "learning_rate": 0.00034791195564887835, "loss": 0.1697, "step": 269430 }, { "epoch": 11.16, "grad_norm": 0.78125, "learning_rate": 0.00034790197672814375, "loss": 0.1872, "step": 269440 }, { "epoch": 11.16, "grad_norm": 0.43359375, "learning_rate": 0.00034789199762316807, "loss": 0.1783, "step": 269450 }, { "epoch": 11.16, "grad_norm": 0.384765625, "learning_rate": 0.0003478820183339704, "loss": 0.1659, "step": 269460 }, { "epoch": 11.16, "grad_norm": 1.171875, "learning_rate": 0.0003478720388605692, "loss": 0.1486, "step": 269470 }, { "epoch": 11.16, "grad_norm": 0.6171875, "learning_rate": 0.00034786205920298344, "loss": 0.1624, "step": 269480 }, { "epoch": 11.16, "grad_norm": 0.80078125, "learning_rate": 0.0003478520793612319, "loss": 0.1576, "step": 269490 }, { "epoch": 11.16, "grad_norm": 0.68359375, "learning_rate": 0.00034784209933533326, "loss": 0.1572, "step": 269500 }, { "epoch": 11.16, "grad_norm": 0.478515625, "learning_rate": 0.00034783211912530637, "loss": 0.2117, "step": 269510 }, { "epoch": 11.16, "grad_norm": 0.875, "learning_rate": 0.00034782213873116996, "loss": 0.2269, "step": 269520 }, { "epoch": 11.16, "grad_norm": 1.0703125, "learning_rate": 0.00034781215815294286, "loss": 0.2363, "step": 269530 }, { "epoch": 11.16, "grad_norm": 0.4765625, "learning_rate": 0.000347802177390644, "loss": 0.172, "step": 269540 }, { "epoch": 11.16, "grad_norm": 0.671875, "learning_rate": 0.00034779219644429186, "loss": 0.2169, "step": 269550 }, { "epoch": 11.17, "grad_norm": 0.8046875, "learning_rate": 0.0003477822153139053, "loss": 0.1742, "step": 269560 }, { "epoch": 11.17, "grad_norm": 0.80078125, "learning_rate": 0.00034777223399950325, "loss": 0.2185, "step": 269570 }, { "epoch": 11.17, "grad_norm": 0.240234375, "learning_rate": 0.00034776225250110444, "loss": 0.2233, "step": 269580 }, { "epoch": 11.17, "grad_norm": 1.203125, "learning_rate": 0.0003477522708187276, "loss": 0.2167, "step": 269590 }, { "epoch": 11.17, "grad_norm": 0.87890625, "learning_rate": 0.00034774228895239153, "loss": 0.1879, "step": 269600 }, { "epoch": 11.17, "grad_norm": 0.80859375, "learning_rate": 0.000347732306902115, "loss": 0.205, "step": 269610 }, { "epoch": 11.17, "grad_norm": 1.921875, "learning_rate": 0.0003477223246679169, "loss": 0.1965, "step": 269620 }, { "epoch": 11.17, "grad_norm": 0.7890625, "learning_rate": 0.0003477123422498159, "loss": 0.1912, "step": 269630 }, { "epoch": 11.17, "grad_norm": 1.1796875, "learning_rate": 0.0003477023596478308, "loss": 0.2187, "step": 269640 }, { "epoch": 11.17, "grad_norm": 1.2734375, "learning_rate": 0.0003476923768619804, "loss": 0.1893, "step": 269650 }, { "epoch": 11.17, "grad_norm": 1.0546875, "learning_rate": 0.0003476823938922836, "loss": 0.2106, "step": 269660 }, { "epoch": 11.17, "grad_norm": 0.5546875, "learning_rate": 0.000347672410738759, "loss": 0.1826, "step": 269670 }, { "epoch": 11.17, "grad_norm": 1.2890625, "learning_rate": 0.0003476624274014255, "loss": 0.2059, "step": 269680 }, { "epoch": 11.17, "grad_norm": 0.59375, "learning_rate": 0.0003476524438803018, "loss": 0.2186, "step": 269690 }, { "epoch": 11.17, "grad_norm": 0.94921875, "learning_rate": 0.0003476424601754068, "loss": 0.1967, "step": 269700 }, { "epoch": 11.17, "grad_norm": 0.6484375, "learning_rate": 0.0003476324762867593, "loss": 0.1949, "step": 269710 }, { "epoch": 11.17, "grad_norm": 0.6875, "learning_rate": 0.00034762249221437794, "loss": 0.1609, "step": 269720 }, { "epoch": 11.17, "grad_norm": 0.66796875, "learning_rate": 0.0003476125079582815, "loss": 0.2179, "step": 269730 }, { "epoch": 11.17, "grad_norm": 0.2333984375, "learning_rate": 0.00034760252351848905, "loss": 0.1863, "step": 269740 }, { "epoch": 11.17, "grad_norm": 0.455078125, "learning_rate": 0.0003475925388950191, "loss": 0.2329, "step": 269750 }, { "epoch": 11.17, "grad_norm": 1.6484375, "learning_rate": 0.0003475825540878905, "loss": 0.1941, "step": 269760 }, { "epoch": 11.17, "grad_norm": 1.1171875, "learning_rate": 0.0003475725690971221, "loss": 0.2487, "step": 269770 }, { "epoch": 11.17, "grad_norm": 0.390625, "learning_rate": 0.00034756258392273264, "loss": 0.1939, "step": 269780 }, { "epoch": 11.17, "grad_norm": 1.3359375, "learning_rate": 0.00034755259856474095, "loss": 0.2115, "step": 269790 }, { "epoch": 11.18, "grad_norm": 2.828125, "learning_rate": 0.00034754261302316587, "loss": 0.188, "step": 269800 }, { "epoch": 11.18, "grad_norm": 1.15625, "learning_rate": 0.00034753262729802603, "loss": 0.209, "step": 269810 }, { "epoch": 11.18, "grad_norm": 0.380859375, "learning_rate": 0.0003475226413893404, "loss": 0.1575, "step": 269820 }, { "epoch": 11.18, "grad_norm": 0.265625, "learning_rate": 0.0003475126552971276, "loss": 0.1757, "step": 269830 }, { "epoch": 11.18, "grad_norm": 1.2265625, "learning_rate": 0.0003475026690214066, "loss": 0.1617, "step": 269840 }, { "epoch": 11.18, "grad_norm": 0.68359375, "learning_rate": 0.000347492682562196, "loss": 0.1732, "step": 269850 }, { "epoch": 11.18, "grad_norm": 0.6796875, "learning_rate": 0.00034748269591951475, "loss": 0.2449, "step": 269860 }, { "epoch": 11.18, "grad_norm": 3.109375, "learning_rate": 0.0003474727090933817, "loss": 0.1742, "step": 269870 }, { "epoch": 11.18, "grad_norm": 0.66796875, "learning_rate": 0.0003474627220838154, "loss": 0.1384, "step": 269880 }, { "epoch": 11.18, "grad_norm": 0.859375, "learning_rate": 0.00034745273489083487, "loss": 0.2248, "step": 269890 }, { "epoch": 11.18, "grad_norm": 1.1015625, "learning_rate": 0.0003474427475144588, "loss": 0.1624, "step": 269900 }, { "epoch": 11.18, "grad_norm": 0.8828125, "learning_rate": 0.00034743275995470594, "loss": 0.2122, "step": 269910 }, { "epoch": 11.18, "grad_norm": 0.69921875, "learning_rate": 0.0003474227722115952, "loss": 0.2238, "step": 269920 }, { "epoch": 11.18, "grad_norm": 0.53125, "learning_rate": 0.0003474127842851453, "loss": 0.2384, "step": 269930 }, { "epoch": 11.18, "grad_norm": 0.9609375, "learning_rate": 0.00034740279617537505, "loss": 0.1647, "step": 269940 }, { "epoch": 11.18, "grad_norm": 1.34375, "learning_rate": 0.00034739280788230334, "loss": 0.253, "step": 269950 }, { "epoch": 11.18, "grad_norm": 0.66015625, "learning_rate": 0.0003473828194059487, "loss": 0.219, "step": 269960 }, { "epoch": 11.18, "grad_norm": 0.80078125, "learning_rate": 0.0003473728307463303, "loss": 0.202, "step": 269970 }, { "epoch": 11.18, "grad_norm": 0.55859375, "learning_rate": 0.0003473628419034667, "loss": 0.2324, "step": 269980 }, { "epoch": 11.18, "grad_norm": 0.9453125, "learning_rate": 0.0003473528528773767, "loss": 0.1978, "step": 269990 }, { "epoch": 11.18, "grad_norm": 1.875, "learning_rate": 0.00034734286366807916, "loss": 0.1858, "step": 270000 }, { "epoch": 11.18, "grad_norm": 0.65234375, "learning_rate": 0.00034733287427559284, "loss": 0.2121, "step": 270010 }, { "epoch": 11.18, "grad_norm": 0.6015625, "learning_rate": 0.00034732288469993663, "loss": 0.2536, "step": 270020 }, { "epoch": 11.18, "grad_norm": 1.0234375, "learning_rate": 0.00034731289494112924, "loss": 0.1761, "step": 270030 }, { "epoch": 11.19, "grad_norm": 2.46875, "learning_rate": 0.0003473029049991894, "loss": 0.2766, "step": 270040 }, { "epoch": 11.19, "grad_norm": 0.470703125, "learning_rate": 0.0003472929148741361, "loss": 0.179, "step": 270050 }, { "epoch": 11.19, "grad_norm": 0.3203125, "learning_rate": 0.0003472829245659881, "loss": 0.1707, "step": 270060 }, { "epoch": 11.19, "grad_norm": 0.72265625, "learning_rate": 0.00034727293407476397, "loss": 0.2313, "step": 270070 }, { "epoch": 11.19, "grad_norm": 0.365234375, "learning_rate": 0.0003472629434004828, "loss": 0.2287, "step": 270080 }, { "epoch": 11.19, "grad_norm": 1.0078125, "learning_rate": 0.00034725295254316326, "loss": 0.1696, "step": 270090 }, { "epoch": 11.19, "grad_norm": 0.5546875, "learning_rate": 0.0003472429615028241, "loss": 0.1822, "step": 270100 }, { "epoch": 11.19, "grad_norm": 0.349609375, "learning_rate": 0.0003472329702794843, "loss": 0.196, "step": 270110 }, { "epoch": 11.19, "grad_norm": 1.7578125, "learning_rate": 0.0003472229788731624, "loss": 0.2102, "step": 270120 }, { "epoch": 11.19, "grad_norm": 0.80859375, "learning_rate": 0.00034721298728387744, "loss": 0.2393, "step": 270130 }, { "epoch": 11.19, "grad_norm": 0.9453125, "learning_rate": 0.0003472029955116482, "loss": 0.1605, "step": 270140 }, { "epoch": 11.19, "grad_norm": 0.6328125, "learning_rate": 0.0003471930035564933, "loss": 0.2204, "step": 270150 }, { "epoch": 11.19, "grad_norm": 1.828125, "learning_rate": 0.0003471830114184318, "loss": 0.2381, "step": 270160 }, { "epoch": 11.19, "grad_norm": 1.0546875, "learning_rate": 0.0003471730190974822, "loss": 0.1397, "step": 270170 }, { "epoch": 11.19, "grad_norm": 0.40234375, "learning_rate": 0.0003471630265936636, "loss": 0.217, "step": 270180 }, { "epoch": 11.19, "grad_norm": 0.46875, "learning_rate": 0.00034715303390699467, "loss": 0.1685, "step": 270190 }, { "epoch": 11.19, "grad_norm": 0.6796875, "learning_rate": 0.0003471430410374941, "loss": 0.1957, "step": 270200 }, { "epoch": 11.19, "grad_norm": 1.796875, "learning_rate": 0.000347133047985181, "loss": 0.2044, "step": 270210 }, { "epoch": 11.19, "grad_norm": 1.03125, "learning_rate": 0.0003471230547500739, "loss": 0.2036, "step": 270220 }, { "epoch": 11.19, "grad_norm": 0.65234375, "learning_rate": 0.0003471130613321916, "loss": 0.1842, "step": 270230 }, { "epoch": 11.19, "grad_norm": 0.55859375, "learning_rate": 0.00034710306773155314, "loss": 0.1693, "step": 270240 }, { "epoch": 11.19, "grad_norm": 0.66015625, "learning_rate": 0.00034709307394817715, "loss": 0.1681, "step": 270250 }, { "epoch": 11.19, "grad_norm": 1.8515625, "learning_rate": 0.00034708307998208246, "loss": 0.2486, "step": 270260 }, { "epoch": 11.19, "grad_norm": 0.189453125, "learning_rate": 0.00034707308583328795, "loss": 0.1726, "step": 270270 }, { "epoch": 11.19, "grad_norm": 0.5859375, "learning_rate": 0.0003470630915018123, "loss": 0.1542, "step": 270280 }, { "epoch": 11.2, "grad_norm": 1.28125, "learning_rate": 0.0003470530969876745, "loss": 0.1964, "step": 270290 }, { "epoch": 11.2, "grad_norm": 0.357421875, "learning_rate": 0.0003470431022908932, "loss": 0.1573, "step": 270300 }, { "epoch": 11.2, "grad_norm": 0.73046875, "learning_rate": 0.0003470331074114872, "loss": 0.198, "step": 270310 }, { "epoch": 11.2, "grad_norm": 0.54296875, "learning_rate": 0.0003470231123494755, "loss": 0.1673, "step": 270320 }, { "epoch": 11.2, "grad_norm": 0.33203125, "learning_rate": 0.00034701311710487665, "loss": 0.2061, "step": 270330 }, { "epoch": 11.2, "grad_norm": 1.234375, "learning_rate": 0.00034700312167770965, "loss": 0.1758, "step": 270340 }, { "epoch": 11.2, "grad_norm": 0.2158203125, "learning_rate": 0.00034699312606799326, "loss": 0.2004, "step": 270350 }, { "epoch": 11.2, "grad_norm": 1.34375, "learning_rate": 0.0003469831302757462, "loss": 0.1912, "step": 270360 }, { "epoch": 11.2, "grad_norm": 0.71484375, "learning_rate": 0.00034697313430098747, "loss": 0.1976, "step": 270370 }, { "epoch": 11.2, "grad_norm": 0.267578125, "learning_rate": 0.0003469631381437358, "loss": 0.1965, "step": 270380 }, { "epoch": 11.2, "grad_norm": 0.7734375, "learning_rate": 0.00034695314180400983, "loss": 0.1004, "step": 270390 }, { "epoch": 11.2, "grad_norm": 1.125, "learning_rate": 0.0003469431452818286, "loss": 0.2057, "step": 270400 }, { "epoch": 11.2, "grad_norm": 0.59375, "learning_rate": 0.00034693314857721087, "loss": 0.1698, "step": 270410 }, { "epoch": 11.2, "grad_norm": 0.82421875, "learning_rate": 0.00034692315169017535, "loss": 0.1931, "step": 270420 }, { "epoch": 11.2, "grad_norm": 0.486328125, "learning_rate": 0.00034691315462074103, "loss": 0.2146, "step": 270430 }, { "epoch": 11.2, "grad_norm": 0.0, "learning_rate": 0.00034690315736892645, "loss": 0.2275, "step": 270440 }, { "epoch": 11.2, "grad_norm": 1.1640625, "learning_rate": 0.00034689315993475075, "loss": 0.2615, "step": 270450 }, { "epoch": 11.2, "grad_norm": 0.427734375, "learning_rate": 0.0003468831623182325, "loss": 0.2235, "step": 270460 }, { "epoch": 11.2, "grad_norm": 0.89453125, "learning_rate": 0.0003468731645193906, "loss": 0.1954, "step": 270470 }, { "epoch": 11.2, "grad_norm": 1.4609375, "learning_rate": 0.00034686316653824396, "loss": 0.1902, "step": 270480 }, { "epoch": 11.2, "grad_norm": 1.0625, "learning_rate": 0.0003468531683748112, "loss": 0.1806, "step": 270490 }, { "epoch": 11.2, "grad_norm": 0.40234375, "learning_rate": 0.00034684317002911123, "loss": 0.1907, "step": 270500 }, { "epoch": 11.2, "grad_norm": 1.2421875, "learning_rate": 0.000346833171501163, "loss": 0.2521, "step": 270510 }, { "epoch": 11.2, "grad_norm": 0.345703125, "learning_rate": 0.0003468231727909851, "loss": 0.1882, "step": 270520 }, { "epoch": 11.21, "grad_norm": 2.34375, "learning_rate": 0.00034681317389859644, "loss": 0.2013, "step": 270530 }, { "epoch": 11.21, "grad_norm": 0.5625, "learning_rate": 0.00034680317482401585, "loss": 0.1724, "step": 270540 }, { "epoch": 11.21, "grad_norm": 0.369140625, "learning_rate": 0.00034679317556726214, "loss": 0.2107, "step": 270550 }, { "epoch": 11.21, "grad_norm": 0.51953125, "learning_rate": 0.0003467831761283542, "loss": 0.1955, "step": 270560 }, { "epoch": 11.21, "grad_norm": 0.4453125, "learning_rate": 0.00034677317650731065, "loss": 0.1662, "step": 270570 }, { "epoch": 11.21, "grad_norm": 0.69140625, "learning_rate": 0.0003467631767041505, "loss": 0.2168, "step": 270580 }, { "epoch": 11.21, "grad_norm": 1.515625, "learning_rate": 0.0003467531767188925, "loss": 0.2227, "step": 270590 }, { "epoch": 11.21, "grad_norm": 0.69921875, "learning_rate": 0.0003467431765515554, "loss": 0.2178, "step": 270600 }, { "epoch": 11.21, "grad_norm": 0.7421875, "learning_rate": 0.0003467331762021582, "loss": 0.1939, "step": 270610 }, { "epoch": 11.21, "grad_norm": 0.6640625, "learning_rate": 0.00034672317567071957, "loss": 0.2015, "step": 270620 }, { "epoch": 11.21, "grad_norm": 0.5234375, "learning_rate": 0.00034671317495725833, "loss": 0.1828, "step": 270630 }, { "epoch": 11.21, "grad_norm": 0.79296875, "learning_rate": 0.0003467031740617934, "loss": 0.2104, "step": 270640 }, { "epoch": 11.21, "grad_norm": 0.201171875, "learning_rate": 0.00034669317298434356, "loss": 0.202, "step": 270650 }, { "epoch": 11.21, "grad_norm": 0.45703125, "learning_rate": 0.00034668317172492755, "loss": 0.2001, "step": 270660 }, { "epoch": 11.21, "grad_norm": 0.80078125, "learning_rate": 0.0003466731702835642, "loss": 0.1833, "step": 270670 }, { "epoch": 11.21, "grad_norm": 0.828125, "learning_rate": 0.0003466631686602724, "loss": 0.1964, "step": 270680 }, { "epoch": 11.21, "grad_norm": 1.578125, "learning_rate": 0.0003466531668550711, "loss": 0.1411, "step": 270690 }, { "epoch": 11.21, "grad_norm": 0.181640625, "learning_rate": 0.00034664316486797886, "loss": 0.184, "step": 270700 }, { "epoch": 11.21, "grad_norm": 0.9765625, "learning_rate": 0.0003466331626990146, "loss": 0.1552, "step": 270710 }, { "epoch": 11.21, "grad_norm": 0.734375, "learning_rate": 0.00034662316034819724, "loss": 0.1845, "step": 270720 }, { "epoch": 11.21, "grad_norm": 0.78125, "learning_rate": 0.00034661315781554547, "loss": 0.2325, "step": 270730 }, { "epoch": 11.21, "grad_norm": 0.5546875, "learning_rate": 0.0003466031551010782, "loss": 0.1956, "step": 270740 }, { "epoch": 11.21, "grad_norm": 0.287109375, "learning_rate": 0.0003465931522048142, "loss": 0.1847, "step": 270750 }, { "epoch": 11.21, "grad_norm": 0.9453125, "learning_rate": 0.0003465831491267723, "loss": 0.2544, "step": 270760 }, { "epoch": 11.22, "grad_norm": 0.96484375, "learning_rate": 0.0003465731458669714, "loss": 0.1748, "step": 270770 }, { "epoch": 11.22, "grad_norm": 0.546875, "learning_rate": 0.00034656314242543026, "loss": 0.1652, "step": 270780 }, { "epoch": 11.22, "grad_norm": 0.66015625, "learning_rate": 0.00034655313880216766, "loss": 0.1655, "step": 270790 }, { "epoch": 11.22, "grad_norm": 0.828125, "learning_rate": 0.0003465431349972025, "loss": 0.2255, "step": 270800 }, { "epoch": 11.22, "grad_norm": 0.52734375, "learning_rate": 0.00034653313101055364, "loss": 0.2258, "step": 270810 }, { "epoch": 11.22, "grad_norm": 0.578125, "learning_rate": 0.0003465231268422398, "loss": 0.157, "step": 270820 }, { "epoch": 11.22, "grad_norm": 1.2265625, "learning_rate": 0.00034651312249227984, "loss": 0.2155, "step": 270830 }, { "epoch": 11.22, "grad_norm": 0.51171875, "learning_rate": 0.0003465031179606926, "loss": 0.1703, "step": 270840 }, { "epoch": 11.22, "grad_norm": 1.0234375, "learning_rate": 0.000346493113247497, "loss": 0.1898, "step": 270850 }, { "epoch": 11.22, "grad_norm": 0.60546875, "learning_rate": 0.00034648310835271175, "loss": 0.1755, "step": 270860 }, { "epoch": 11.22, "grad_norm": 0.625, "learning_rate": 0.00034647310327635565, "loss": 0.1725, "step": 270870 }, { "epoch": 11.22, "grad_norm": 0.859375, "learning_rate": 0.00034646309801844767, "loss": 0.2268, "step": 270880 }, { "epoch": 11.22, "grad_norm": 1.4140625, "learning_rate": 0.0003464530925790065, "loss": 0.2237, "step": 270890 }, { "epoch": 11.22, "grad_norm": 2.09375, "learning_rate": 0.00034644308695805105, "loss": 0.2052, "step": 270900 }, { "epoch": 11.22, "grad_norm": 0.625, "learning_rate": 0.00034643308115560014, "loss": 0.189, "step": 270910 }, { "epoch": 11.22, "grad_norm": 1.0703125, "learning_rate": 0.0003464230751716725, "loss": 0.2091, "step": 270920 }, { "epoch": 11.22, "grad_norm": 0.59375, "learning_rate": 0.0003464130690062872, "loss": 0.1444, "step": 270930 }, { "epoch": 11.22, "grad_norm": 0.89453125, "learning_rate": 0.00034640306265946275, "loss": 0.1659, "step": 270940 }, { "epoch": 11.22, "grad_norm": 0.87890625, "learning_rate": 0.00034639305613121823, "loss": 0.1892, "step": 270950 }, { "epoch": 11.22, "grad_norm": 1.09375, "learning_rate": 0.0003463830494215724, "loss": 0.1665, "step": 270960 }, { "epoch": 11.22, "grad_norm": 0.51953125, "learning_rate": 0.000346373042530544, "loss": 0.1909, "step": 270970 }, { "epoch": 11.22, "grad_norm": 1.6171875, "learning_rate": 0.00034636303545815207, "loss": 0.1426, "step": 270980 }, { "epoch": 11.22, "grad_norm": 1.0390625, "learning_rate": 0.00034635302820441524, "loss": 0.2498, "step": 270990 }, { "epoch": 11.22, "grad_norm": 0.95703125, "learning_rate": 0.00034634302076935237, "loss": 0.2247, "step": 271000 }, { "epoch": 11.23, "grad_norm": 0.94140625, "learning_rate": 0.0003463330131529824, "loss": 0.2097, "step": 271010 }, { "epoch": 11.23, "grad_norm": 0.52734375, "learning_rate": 0.00034632300535532415, "loss": 0.1645, "step": 271020 }, { "epoch": 11.23, "grad_norm": 0.83203125, "learning_rate": 0.00034631299737639625, "loss": 0.2185, "step": 271030 }, { "epoch": 11.23, "grad_norm": 0.44140625, "learning_rate": 0.00034630298921621783, "loss": 0.1807, "step": 271040 }, { "epoch": 11.23, "grad_norm": 0.69921875, "learning_rate": 0.00034629298087480754, "loss": 0.1922, "step": 271050 }, { "epoch": 11.23, "grad_norm": 0.341796875, "learning_rate": 0.00034628297235218423, "loss": 0.1986, "step": 271060 }, { "epoch": 11.23, "grad_norm": 0.435546875, "learning_rate": 0.0003462729636483668, "loss": 0.1683, "step": 271070 }, { "epoch": 11.23, "grad_norm": 0.6953125, "learning_rate": 0.00034626295476337406, "loss": 0.1814, "step": 271080 }, { "epoch": 11.23, "grad_norm": 0.91015625, "learning_rate": 0.00034625294569722476, "loss": 0.1791, "step": 271090 }, { "epoch": 11.23, "grad_norm": 0.62109375, "learning_rate": 0.0003462429364499379, "loss": 0.1837, "step": 271100 }, { "epoch": 11.23, "grad_norm": 0.8984375, "learning_rate": 0.0003462329270215322, "loss": 0.1819, "step": 271110 }, { "epoch": 11.23, "grad_norm": 1.296875, "learning_rate": 0.0003462229174120265, "loss": 0.1596, "step": 271120 }, { "epoch": 11.23, "grad_norm": 0.62890625, "learning_rate": 0.00034621290762143974, "loss": 0.2113, "step": 271130 }, { "epoch": 11.23, "grad_norm": 1.03125, "learning_rate": 0.0003462028976497906, "loss": 0.1585, "step": 271140 }, { "epoch": 11.23, "grad_norm": 0.50390625, "learning_rate": 0.000346192887497098, "loss": 0.2678, "step": 271150 }, { "epoch": 11.23, "grad_norm": 0.875, "learning_rate": 0.0003461828771633808, "loss": 0.2143, "step": 271160 }, { "epoch": 11.23, "grad_norm": 0.5859375, "learning_rate": 0.0003461728666486578, "loss": 0.1999, "step": 271170 }, { "epoch": 11.23, "grad_norm": 0.7890625, "learning_rate": 0.00034616285595294785, "loss": 0.2294, "step": 271180 }, { "epoch": 11.23, "grad_norm": 2.09375, "learning_rate": 0.0003461528450762698, "loss": 0.1531, "step": 271190 }, { "epoch": 11.23, "grad_norm": 0.55078125, "learning_rate": 0.0003461428340186424, "loss": 0.1784, "step": 271200 }, { "epoch": 11.23, "grad_norm": 1.7265625, "learning_rate": 0.0003461328227800847, "loss": 0.1795, "step": 271210 }, { "epoch": 11.23, "grad_norm": 0.349609375, "learning_rate": 0.0003461228113606152, "loss": 0.1426, "step": 271220 }, { "epoch": 11.23, "grad_norm": 1.5859375, "learning_rate": 0.0003461127997602532, "loss": 0.203, "step": 271230 }, { "epoch": 11.23, "grad_norm": 0.65234375, "learning_rate": 0.0003461027879790172, "loss": 0.193, "step": 271240 }, { "epoch": 11.24, "grad_norm": 0.8125, "learning_rate": 0.00034609277601692606, "loss": 0.1683, "step": 271250 }, { "epoch": 11.24, "grad_norm": 0.494140625, "learning_rate": 0.0003460827638739988, "loss": 0.1996, "step": 271260 }, { "epoch": 11.24, "grad_norm": 0.95703125, "learning_rate": 0.0003460727515502541, "loss": 0.1943, "step": 271270 }, { "epoch": 11.24, "grad_norm": 0.45703125, "learning_rate": 0.0003460627390457108, "loss": 0.2071, "step": 271280 }, { "epoch": 11.24, "grad_norm": 1.8125, "learning_rate": 0.00034605272636038787, "loss": 0.1719, "step": 271290 }, { "epoch": 11.24, "grad_norm": 1.8984375, "learning_rate": 0.0003460427134943041, "loss": 0.2153, "step": 271300 }, { "epoch": 11.24, "grad_norm": 0.466796875, "learning_rate": 0.00034603270044747825, "loss": 0.154, "step": 271310 }, { "epoch": 11.24, "grad_norm": 0.6328125, "learning_rate": 0.0003460226872199293, "loss": 0.232, "step": 271320 }, { "epoch": 11.24, "grad_norm": 1.265625, "learning_rate": 0.0003460126738116759, "loss": 0.1993, "step": 271330 }, { "epoch": 11.24, "grad_norm": 1.5546875, "learning_rate": 0.0003460026602227372, "loss": 0.1964, "step": 271340 }, { "epoch": 11.24, "grad_norm": 1.3203125, "learning_rate": 0.00034599264645313174, "loss": 0.1824, "step": 271350 }, { "epoch": 11.24, "grad_norm": 1.2109375, "learning_rate": 0.00034598263250287846, "loss": 0.2246, "step": 271360 }, { "epoch": 11.24, "grad_norm": 0.765625, "learning_rate": 0.0003459726183719963, "loss": 0.2142, "step": 271370 }, { "epoch": 11.24, "grad_norm": 0.79296875, "learning_rate": 0.000345962604060504, "loss": 0.2255, "step": 271380 }, { "epoch": 11.24, "grad_norm": 1.1953125, "learning_rate": 0.0003459525895684205, "loss": 0.206, "step": 271390 }, { "epoch": 11.24, "grad_norm": 1.6015625, "learning_rate": 0.00034594257489576453, "loss": 0.1547, "step": 271400 }, { "epoch": 11.24, "grad_norm": 0.68359375, "learning_rate": 0.000345932560042555, "loss": 0.1644, "step": 271410 }, { "epoch": 11.24, "grad_norm": 1.953125, "learning_rate": 0.00034592254500881084, "loss": 0.1939, "step": 271420 }, { "epoch": 11.24, "grad_norm": 0.69140625, "learning_rate": 0.0003459125297945506, "loss": 0.1736, "step": 271430 }, { "epoch": 11.24, "grad_norm": 1.375, "learning_rate": 0.0003459025143997935, "loss": 0.2106, "step": 271440 }, { "epoch": 11.24, "grad_norm": 0.765625, "learning_rate": 0.0003458924988245582, "loss": 0.2013, "step": 271450 }, { "epoch": 11.24, "grad_norm": 0.921875, "learning_rate": 0.00034588248306886355, "loss": 0.2122, "step": 271460 }, { "epoch": 11.24, "grad_norm": 0.58984375, "learning_rate": 0.00034587246713272844, "loss": 0.1715, "step": 271470 }, { "epoch": 11.24, "grad_norm": 0.73046875, "learning_rate": 0.00034586245101617173, "loss": 0.1431, "step": 271480 }, { "epoch": 11.25, "grad_norm": 1.03125, "learning_rate": 0.0003458524347192122, "loss": 0.199, "step": 271490 }, { "epoch": 11.25, "grad_norm": 0.9921875, "learning_rate": 0.0003458424182418688, "loss": 0.1847, "step": 271500 }, { "epoch": 11.25, "grad_norm": 1.3359375, "learning_rate": 0.0003458324015841602, "loss": 0.2136, "step": 271510 }, { "epoch": 11.25, "grad_norm": 0.62890625, "learning_rate": 0.00034582238474610544, "loss": 0.1826, "step": 271520 }, { "epoch": 11.25, "grad_norm": 1.4921875, "learning_rate": 0.0003458123677277233, "loss": 0.1898, "step": 271530 }, { "epoch": 11.25, "grad_norm": 0.6875, "learning_rate": 0.00034580235052903265, "loss": 0.1805, "step": 271540 }, { "epoch": 11.25, "grad_norm": 0.75390625, "learning_rate": 0.00034579233315005234, "loss": 0.1808, "step": 271550 }, { "epoch": 11.25, "grad_norm": 0.6796875, "learning_rate": 0.0003457823155908011, "loss": 0.191, "step": 271560 }, { "epoch": 11.25, "grad_norm": 0.93359375, "learning_rate": 0.00034577229785129793, "loss": 0.2403, "step": 271570 }, { "epoch": 11.25, "grad_norm": 0.75390625, "learning_rate": 0.00034576227993156166, "loss": 0.1993, "step": 271580 }, { "epoch": 11.25, "grad_norm": 1.1171875, "learning_rate": 0.0003457522618316111, "loss": 0.1882, "step": 271590 }, { "epoch": 11.25, "grad_norm": 0.90234375, "learning_rate": 0.00034574224355146514, "loss": 0.2142, "step": 271600 }, { "epoch": 11.25, "grad_norm": 0.19140625, "learning_rate": 0.0003457322250911426, "loss": 0.1774, "step": 271610 }, { "epoch": 11.25, "grad_norm": 0.80859375, "learning_rate": 0.00034572220645066236, "loss": 0.1846, "step": 271620 }, { "epoch": 11.25, "grad_norm": 0.59765625, "learning_rate": 0.0003457121876300433, "loss": 0.1439, "step": 271630 }, { "epoch": 11.25, "grad_norm": 0.408203125, "learning_rate": 0.0003457021686293042, "loss": 0.1654, "step": 271640 }, { "epoch": 11.25, "grad_norm": 0.4296875, "learning_rate": 0.00034569214944846395, "loss": 0.1981, "step": 271650 }, { "epoch": 11.25, "grad_norm": 0.875, "learning_rate": 0.0003456821300875415, "loss": 0.1933, "step": 271660 }, { "epoch": 11.25, "grad_norm": 1.3984375, "learning_rate": 0.00034567211054655546, "loss": 0.2054, "step": 271670 }, { "epoch": 11.25, "grad_norm": 1.0234375, "learning_rate": 0.0003456620908255249, "loss": 0.1758, "step": 271680 }, { "epoch": 11.25, "grad_norm": 0.8203125, "learning_rate": 0.00034565207092446867, "loss": 0.2015, "step": 271690 }, { "epoch": 11.25, "grad_norm": 0.87109375, "learning_rate": 0.0003456420508434055, "loss": 0.1862, "step": 271700 }, { "epoch": 11.25, "grad_norm": 0.7578125, "learning_rate": 0.00034563203058235437, "loss": 0.2167, "step": 271710 }, { "epoch": 11.25, "grad_norm": 0.2451171875, "learning_rate": 0.0003456220101413341, "loss": 0.1762, "step": 271720 }, { "epoch": 11.26, "grad_norm": 0.54296875, "learning_rate": 0.00034561198952036344, "loss": 0.1497, "step": 271730 }, { "epoch": 11.26, "grad_norm": 0.6328125, "learning_rate": 0.0003456019687194615, "loss": 0.1734, "step": 271740 }, { "epoch": 11.26, "grad_norm": 0.3046875, "learning_rate": 0.00034559194773864675, "loss": 0.1673, "step": 271750 }, { "epoch": 11.26, "grad_norm": 0.55078125, "learning_rate": 0.0003455819265779385, "loss": 0.1709, "step": 271760 }, { "epoch": 11.26, "grad_norm": 0.98046875, "learning_rate": 0.00034557190523735527, "loss": 0.1988, "step": 271770 }, { "epoch": 11.26, "grad_norm": 0.83203125, "learning_rate": 0.00034556188371691603, "loss": 0.1677, "step": 271780 }, { "epoch": 11.26, "grad_norm": 0.85546875, "learning_rate": 0.0003455518620166397, "loss": 0.1882, "step": 271790 }, { "epoch": 11.26, "grad_norm": 0.98828125, "learning_rate": 0.000345541840136545, "loss": 0.2124, "step": 271800 }, { "epoch": 11.26, "grad_norm": 0.76953125, "learning_rate": 0.00034553181807665096, "loss": 0.1862, "step": 271810 }, { "epoch": 11.26, "grad_norm": 0.72265625, "learning_rate": 0.0003455217958369764, "loss": 0.1886, "step": 271820 }, { "epoch": 11.26, "grad_norm": 0.98828125, "learning_rate": 0.00034551177341754, "loss": 0.2297, "step": 271830 }, { "epoch": 11.26, "grad_norm": 0.44921875, "learning_rate": 0.00034550175081836084, "loss": 0.1883, "step": 271840 }, { "epoch": 11.26, "grad_norm": 0.75390625, "learning_rate": 0.0003454917280394577, "loss": 0.2039, "step": 271850 }, { "epoch": 11.26, "grad_norm": 1.1015625, "learning_rate": 0.00034548170508084936, "loss": 0.2273, "step": 271860 }, { "epoch": 11.26, "grad_norm": 0.2734375, "learning_rate": 0.00034547168194255486, "loss": 0.233, "step": 271870 }, { "epoch": 11.26, "grad_norm": 1.28125, "learning_rate": 0.0003454616586245929, "loss": 0.1996, "step": 271880 }, { "epoch": 11.26, "grad_norm": 0.0, "learning_rate": 0.0003454516351269824, "loss": 0.2115, "step": 271890 }, { "epoch": 11.26, "grad_norm": 0.7265625, "learning_rate": 0.00034544161144974233, "loss": 0.1822, "step": 271900 }, { "epoch": 11.26, "grad_norm": 0.75, "learning_rate": 0.00034543158759289133, "loss": 0.2409, "step": 271910 }, { "epoch": 11.26, "grad_norm": 0.375, "learning_rate": 0.00034542156355644847, "loss": 0.1479, "step": 271920 }, { "epoch": 11.26, "grad_norm": 1.0546875, "learning_rate": 0.0003454115393404325, "loss": 0.2077, "step": 271930 }, { "epoch": 11.26, "grad_norm": 1.3828125, "learning_rate": 0.0003454015149448623, "loss": 0.2062, "step": 271940 }, { "epoch": 11.26, "grad_norm": 0.68359375, "learning_rate": 0.0003453914903697568, "loss": 0.2003, "step": 271950 }, { "epoch": 11.26, "grad_norm": 0.66015625, "learning_rate": 0.00034538146561513474, "loss": 0.2024, "step": 271960 }, { "epoch": 11.26, "grad_norm": 0.75390625, "learning_rate": 0.0003453714406810152, "loss": 0.2491, "step": 271970 }, { "epoch": 11.27, "grad_norm": 2.015625, "learning_rate": 0.00034536141556741677, "loss": 0.2161, "step": 271980 }, { "epoch": 11.27, "grad_norm": 0.62890625, "learning_rate": 0.0003453513902743585, "loss": 0.1973, "step": 271990 }, { "epoch": 11.27, "grad_norm": 1.296875, "learning_rate": 0.0003453413648018592, "loss": 0.2581, "step": 272000 }, { "epoch": 11.27, "grad_norm": 0.484375, "learning_rate": 0.00034533133914993783, "loss": 0.2068, "step": 272010 }, { "epoch": 11.27, "grad_norm": 0.69140625, "learning_rate": 0.0003453213133186131, "loss": 0.2118, "step": 272020 }, { "epoch": 11.27, "grad_norm": 1.0625, "learning_rate": 0.000345311287307904, "loss": 0.1606, "step": 272030 }, { "epoch": 11.27, "grad_norm": 1.2578125, "learning_rate": 0.00034530126111782924, "loss": 0.1765, "step": 272040 }, { "epoch": 11.27, "grad_norm": 0.75390625, "learning_rate": 0.00034529123474840784, "loss": 0.2003, "step": 272050 }, { "epoch": 11.27, "grad_norm": 0.447265625, "learning_rate": 0.0003452812081996587, "loss": 0.2179, "step": 272060 }, { "epoch": 11.27, "grad_norm": 0.63671875, "learning_rate": 0.0003452711814716005, "loss": 0.1856, "step": 272070 }, { "epoch": 11.27, "grad_norm": 0.68359375, "learning_rate": 0.0003452611545642524, "loss": 0.2, "step": 272080 }, { "epoch": 11.27, "grad_norm": 0.5546875, "learning_rate": 0.0003452511274776329, "loss": 0.2173, "step": 272090 }, { "epoch": 11.27, "grad_norm": 1.078125, "learning_rate": 0.0003452411002117612, "loss": 0.198, "step": 272100 }, { "epoch": 11.27, "grad_norm": 0.466796875, "learning_rate": 0.00034523107276665597, "loss": 0.1861, "step": 272110 }, { "epoch": 11.27, "grad_norm": 1.28125, "learning_rate": 0.00034522104514233615, "loss": 0.2071, "step": 272120 }, { "epoch": 11.27, "grad_norm": 0.353515625, "learning_rate": 0.00034521101733882064, "loss": 0.1769, "step": 272130 }, { "epoch": 11.27, "grad_norm": 0.78515625, "learning_rate": 0.00034520098935612826, "loss": 0.2076, "step": 272140 }, { "epoch": 11.27, "grad_norm": 2.625, "learning_rate": 0.0003451909611942779, "loss": 0.1891, "step": 272150 }, { "epoch": 11.27, "grad_norm": 0.46484375, "learning_rate": 0.00034518093285328843, "loss": 0.2051, "step": 272160 }, { "epoch": 11.27, "grad_norm": 0.392578125, "learning_rate": 0.0003451709043331787, "loss": 0.1764, "step": 272170 }, { "epoch": 11.27, "grad_norm": 0.60546875, "learning_rate": 0.00034516087563396763, "loss": 0.2088, "step": 272180 }, { "epoch": 11.27, "grad_norm": 0.8046875, "learning_rate": 0.00034515084675567406, "loss": 0.1706, "step": 272190 }, { "epoch": 11.27, "grad_norm": 1.875, "learning_rate": 0.00034514081769831685, "loss": 0.2442, "step": 272200 }, { "epoch": 11.27, "grad_norm": 0.625, "learning_rate": 0.000345130788461915, "loss": 0.2065, "step": 272210 }, { "epoch": 11.28, "grad_norm": 0.5234375, "learning_rate": 0.00034512075904648715, "loss": 0.1802, "step": 272220 }, { "epoch": 11.28, "grad_norm": 1.1875, "learning_rate": 0.00034511072945205234, "loss": 0.2134, "step": 272230 }, { "epoch": 11.28, "grad_norm": 0.6484375, "learning_rate": 0.0003451006996786295, "loss": 0.1492, "step": 272240 }, { "epoch": 11.28, "grad_norm": 0.9921875, "learning_rate": 0.00034509066972623724, "loss": 0.2002, "step": 272250 }, { "epoch": 11.28, "grad_norm": 0.50390625, "learning_rate": 0.00034508063959489473, "loss": 0.2049, "step": 272260 }, { "epoch": 11.28, "grad_norm": 1.0546875, "learning_rate": 0.0003450706092846207, "loss": 0.2093, "step": 272270 }, { "epoch": 11.28, "grad_norm": 0.5625, "learning_rate": 0.000345060578795434, "loss": 0.173, "step": 272280 }, { "epoch": 11.28, "grad_norm": 0.921875, "learning_rate": 0.0003450505481273537, "loss": 0.1801, "step": 272290 }, { "epoch": 11.28, "grad_norm": 0.6484375, "learning_rate": 0.00034504051728039844, "loss": 0.2305, "step": 272300 }, { "epoch": 11.28, "grad_norm": 2.296875, "learning_rate": 0.00034503048625458713, "loss": 0.139, "step": 272310 }, { "epoch": 11.28, "grad_norm": 0.61328125, "learning_rate": 0.00034502045504993873, "loss": 0.2555, "step": 272320 }, { "epoch": 11.28, "grad_norm": 0.578125, "learning_rate": 0.00034501042366647216, "loss": 0.1965, "step": 272330 }, { "epoch": 11.28, "grad_norm": 0.4140625, "learning_rate": 0.0003450003921042062, "loss": 0.1647, "step": 272340 }, { "epoch": 11.28, "grad_norm": 2.078125, "learning_rate": 0.00034499036036315977, "loss": 0.2597, "step": 272350 }, { "epoch": 11.28, "grad_norm": 0.84765625, "learning_rate": 0.0003449803284433517, "loss": 0.2039, "step": 272360 }, { "epoch": 11.28, "grad_norm": 0.4375, "learning_rate": 0.00034497029634480097, "loss": 0.1654, "step": 272370 }, { "epoch": 11.28, "grad_norm": 0.921875, "learning_rate": 0.0003449602640675264, "loss": 0.195, "step": 272380 }, { "epoch": 11.28, "grad_norm": 2.15625, "learning_rate": 0.0003449502316115468, "loss": 0.1866, "step": 272390 }, { "epoch": 11.28, "grad_norm": 1.1484375, "learning_rate": 0.00034494019897688116, "loss": 0.2276, "step": 272400 }, { "epoch": 11.28, "grad_norm": 0.83203125, "learning_rate": 0.0003449301661635483, "loss": 0.19, "step": 272410 }, { "epoch": 11.28, "grad_norm": 0.4296875, "learning_rate": 0.00034492013317156717, "loss": 0.1956, "step": 272420 }, { "epoch": 11.28, "grad_norm": 0.8671875, "learning_rate": 0.00034491010000095655, "loss": 0.1578, "step": 272430 }, { "epoch": 11.28, "grad_norm": 0.7421875, "learning_rate": 0.0003449000666517353, "loss": 0.1726, "step": 272440 }, { "epoch": 11.28, "grad_norm": 0.8046875, "learning_rate": 0.00034489003312392254, "loss": 0.1758, "step": 272450 }, { "epoch": 11.29, "grad_norm": 0.609375, "learning_rate": 0.0003448799994175369, "loss": 0.2073, "step": 272460 }, { "epoch": 11.29, "grad_norm": 0.9609375, "learning_rate": 0.0003448699655325974, "loss": 0.178, "step": 272470 }, { "epoch": 11.29, "grad_norm": 0.439453125, "learning_rate": 0.00034485993146912274, "loss": 0.2252, "step": 272480 }, { "epoch": 11.29, "grad_norm": 0.890625, "learning_rate": 0.00034484989722713205, "loss": 0.1593, "step": 272490 }, { "epoch": 11.29, "grad_norm": 0.73046875, "learning_rate": 0.00034483986280664405, "loss": 0.1634, "step": 272500 }, { "epoch": 11.29, "grad_norm": 0.5703125, "learning_rate": 0.0003448298282076777, "loss": 0.2135, "step": 272510 }, { "epoch": 11.29, "grad_norm": 0.671875, "learning_rate": 0.00034481979343025184, "loss": 0.1696, "step": 272520 }, { "epoch": 11.29, "grad_norm": 1.03125, "learning_rate": 0.00034480975847438536, "loss": 0.1996, "step": 272530 }, { "epoch": 11.29, "grad_norm": 0.486328125, "learning_rate": 0.00034479972334009715, "loss": 0.1726, "step": 272540 }, { "epoch": 11.29, "grad_norm": 0.83203125, "learning_rate": 0.00034478968802740607, "loss": 0.2041, "step": 272550 }, { "epoch": 11.29, "grad_norm": 0.83984375, "learning_rate": 0.00034477965253633104, "loss": 0.2159, "step": 272560 }, { "epoch": 11.29, "grad_norm": 0.85546875, "learning_rate": 0.000344769616866891, "loss": 0.2573, "step": 272570 }, { "epoch": 11.29, "grad_norm": 0.287109375, "learning_rate": 0.0003447595810191048, "loss": 0.2172, "step": 272580 }, { "epoch": 11.29, "grad_norm": 2.40625, "learning_rate": 0.00034474954499299115, "loss": 0.211, "step": 272590 }, { "epoch": 11.29, "grad_norm": 0.7578125, "learning_rate": 0.0003447395087885692, "loss": 0.1805, "step": 272600 }, { "epoch": 11.29, "grad_norm": 0.48828125, "learning_rate": 0.00034472947240585765, "loss": 0.1714, "step": 272610 }, { "epoch": 11.29, "grad_norm": 0.828125, "learning_rate": 0.0003447194358448756, "loss": 0.1938, "step": 272620 }, { "epoch": 11.29, "grad_norm": 0.7265625, "learning_rate": 0.0003447093991056417, "loss": 0.217, "step": 272630 }, { "epoch": 11.29, "grad_norm": 0.4375, "learning_rate": 0.0003446993621881749, "loss": 0.1774, "step": 272640 }, { "epoch": 11.29, "grad_norm": 0.671875, "learning_rate": 0.0003446893250924943, "loss": 0.2117, "step": 272650 }, { "epoch": 11.29, "grad_norm": 1.5625, "learning_rate": 0.00034467928781861846, "loss": 0.2577, "step": 272660 }, { "epoch": 11.29, "grad_norm": 0.4375, "learning_rate": 0.00034466925036656643, "loss": 0.223, "step": 272670 }, { "epoch": 11.29, "grad_norm": 0.59375, "learning_rate": 0.00034465921273635716, "loss": 0.183, "step": 272680 }, { "epoch": 11.29, "grad_norm": 0.55078125, "learning_rate": 0.0003446491749280094, "loss": 0.1503, "step": 272690 }, { "epoch": 11.3, "grad_norm": 0.78515625, "learning_rate": 0.0003446391369415422, "loss": 0.2099, "step": 272700 }, { "epoch": 11.3, "grad_norm": 1.046875, "learning_rate": 0.00034462909877697434, "loss": 0.2299, "step": 272710 }, { "epoch": 11.3, "grad_norm": 0.0, "learning_rate": 0.00034461906043432465, "loss": 0.255, "step": 272720 }, { "epoch": 11.3, "grad_norm": 1.1640625, "learning_rate": 0.0003446090219136122, "loss": 0.2, "step": 272730 }, { "epoch": 11.3, "grad_norm": 0.166015625, "learning_rate": 0.0003445989832148557, "loss": 0.1601, "step": 272740 }, { "epoch": 11.3, "grad_norm": 2.53125, "learning_rate": 0.00034458894433807427, "loss": 0.2134, "step": 272750 }, { "epoch": 11.3, "grad_norm": 1.1484375, "learning_rate": 0.00034457890528328655, "loss": 0.2396, "step": 272760 }, { "epoch": 11.3, "grad_norm": 1.171875, "learning_rate": 0.0003445688660505116, "loss": 0.1945, "step": 272770 }, { "epoch": 11.3, "grad_norm": 0.62890625, "learning_rate": 0.0003445588266397682, "loss": 0.276, "step": 272780 }, { "epoch": 11.3, "grad_norm": 0.85546875, "learning_rate": 0.0003445487870510754, "loss": 0.1995, "step": 272790 }, { "epoch": 11.3, "grad_norm": 0.62890625, "learning_rate": 0.0003445387472844519, "loss": 0.2198, "step": 272800 }, { "epoch": 11.3, "grad_norm": 1.1953125, "learning_rate": 0.0003445287073399167, "loss": 0.1531, "step": 272810 }, { "epoch": 11.3, "grad_norm": 1.15625, "learning_rate": 0.00034451866721748865, "loss": 0.1697, "step": 272820 }, { "epoch": 11.3, "grad_norm": 1.1015625, "learning_rate": 0.00034450862691718677, "loss": 0.2284, "step": 272830 }, { "epoch": 11.3, "grad_norm": 1.265625, "learning_rate": 0.00034449858643902976, "loss": 0.2005, "step": 272840 }, { "epoch": 11.3, "grad_norm": 0.0, "learning_rate": 0.00034448854578303666, "loss": 0.2019, "step": 272850 }, { "epoch": 11.3, "grad_norm": 0.6953125, "learning_rate": 0.00034447850494922637, "loss": 0.2326, "step": 272860 }, { "epoch": 11.3, "grad_norm": 0.77734375, "learning_rate": 0.00034446846393761766, "loss": 0.2486, "step": 272870 }, { "epoch": 11.3, "grad_norm": 0.5, "learning_rate": 0.00034445842274822956, "loss": 0.1904, "step": 272880 }, { "epoch": 11.3, "grad_norm": 0.67578125, "learning_rate": 0.0003444483813810809, "loss": 0.1763, "step": 272890 }, { "epoch": 11.3, "grad_norm": 1.3359375, "learning_rate": 0.00034443833983619057, "loss": 0.1389, "step": 272900 }, { "epoch": 11.3, "grad_norm": 0.6953125, "learning_rate": 0.0003444282981135775, "loss": 0.1946, "step": 272910 }, { "epoch": 11.3, "grad_norm": 1.125, "learning_rate": 0.0003444182562132605, "loss": 0.138, "step": 272920 }, { "epoch": 11.3, "grad_norm": 1.625, "learning_rate": 0.00034440821413525856, "loss": 0.1861, "step": 272930 }, { "epoch": 11.31, "grad_norm": 0.578125, "learning_rate": 0.0003443981718795907, "loss": 0.1946, "step": 272940 }, { "epoch": 11.31, "grad_norm": 0.93359375, "learning_rate": 0.00034438812944627547, "loss": 0.2402, "step": 272950 }, { "epoch": 11.31, "grad_norm": 0.205078125, "learning_rate": 0.00034437808683533206, "loss": 0.2554, "step": 272960 }, { "epoch": 11.31, "grad_norm": 0.474609375, "learning_rate": 0.00034436804404677936, "loss": 0.1993, "step": 272970 }, { "epoch": 11.31, "grad_norm": 0.35546875, "learning_rate": 0.000344358001080636, "loss": 0.2077, "step": 272980 }, { "epoch": 11.31, "grad_norm": 1.1875, "learning_rate": 0.00034434795793692125, "loss": 0.1919, "step": 272990 }, { "epoch": 11.31, "grad_norm": 0.90234375, "learning_rate": 0.0003443379146156538, "loss": 0.1908, "step": 273000 }, { "epoch": 11.31, "grad_norm": 0.92578125, "learning_rate": 0.0003443278711168525, "loss": 0.2387, "step": 273010 }, { "epoch": 11.31, "grad_norm": 0.95703125, "learning_rate": 0.0003443178274405364, "loss": 0.1732, "step": 273020 }, { "epoch": 11.31, "grad_norm": 0.7734375, "learning_rate": 0.00034430778358672434, "loss": 0.207, "step": 273030 }, { "epoch": 11.31, "grad_norm": 0.349609375, "learning_rate": 0.0003442977395554352, "loss": 0.1077, "step": 273040 }, { "epoch": 11.31, "grad_norm": 0.77734375, "learning_rate": 0.0003442876953466879, "loss": 0.18, "step": 273050 }, { "epoch": 11.31, "grad_norm": 1.125, "learning_rate": 0.0003442776509605013, "loss": 0.2243, "step": 273060 }, { "epoch": 11.31, "grad_norm": 0.7890625, "learning_rate": 0.00034426760639689444, "loss": 0.1914, "step": 273070 }, { "epoch": 11.31, "grad_norm": 0.984375, "learning_rate": 0.000344257561655886, "loss": 0.1793, "step": 273080 }, { "epoch": 11.31, "grad_norm": 0.5390625, "learning_rate": 0.0003442475167374951, "loss": 0.1555, "step": 273090 }, { "epoch": 11.31, "grad_norm": 0.40234375, "learning_rate": 0.0003442374716417406, "loss": 0.1984, "step": 273100 }, { "epoch": 11.31, "grad_norm": 0.72265625, "learning_rate": 0.00034422742636864114, "loss": 0.1573, "step": 273110 }, { "epoch": 11.31, "grad_norm": 0.6640625, "learning_rate": 0.00034421738091821604, "loss": 0.2162, "step": 273120 }, { "epoch": 11.31, "grad_norm": 0.59375, "learning_rate": 0.00034420733529048393, "loss": 0.1837, "step": 273130 }, { "epoch": 11.31, "grad_norm": 0.99609375, "learning_rate": 0.00034419728948546373, "loss": 0.1896, "step": 273140 }, { "epoch": 11.31, "grad_norm": 0.99609375, "learning_rate": 0.0003441872435031745, "loss": 0.2336, "step": 273150 }, { "epoch": 11.31, "grad_norm": 0.3125, "learning_rate": 0.00034417719734363507, "loss": 0.2051, "step": 273160 }, { "epoch": 11.31, "grad_norm": 0.578125, "learning_rate": 0.0003441671510068642, "loss": 0.1895, "step": 273170 }, { "epoch": 11.32, "grad_norm": 0.53125, "learning_rate": 0.0003441571044928811, "loss": 0.2843, "step": 273180 }, { "epoch": 11.32, "grad_norm": 1.2421875, "learning_rate": 0.0003441470578017043, "loss": 0.1639, "step": 273190 }, { "epoch": 11.32, "grad_norm": 0.453125, "learning_rate": 0.000344137010933353, "loss": 0.1674, "step": 273200 }, { "epoch": 11.32, "grad_norm": 0.9921875, "learning_rate": 0.00034412696388784605, "loss": 0.194, "step": 273210 }, { "epoch": 11.32, "grad_norm": 0.77734375, "learning_rate": 0.0003441169166652023, "loss": 0.1675, "step": 273220 }, { "epoch": 11.32, "grad_norm": 0.76953125, "learning_rate": 0.00034410686926544066, "loss": 0.2257, "step": 273230 }, { "epoch": 11.32, "grad_norm": 1.015625, "learning_rate": 0.00034409682168858003, "loss": 0.2172, "step": 273240 }, { "epoch": 11.32, "grad_norm": 0.73046875, "learning_rate": 0.0003440867739346393, "loss": 0.2241, "step": 273250 }, { "epoch": 11.32, "grad_norm": 0.6640625, "learning_rate": 0.0003440767260036376, "loss": 0.1832, "step": 273260 }, { "epoch": 11.32, "grad_norm": 0.7265625, "learning_rate": 0.0003440666778955934, "loss": 0.2292, "step": 273270 }, { "epoch": 11.32, "grad_norm": 0.51953125, "learning_rate": 0.00034405662961052607, "loss": 0.186, "step": 273280 }, { "epoch": 11.32, "grad_norm": 0.7578125, "learning_rate": 0.00034404658114845426, "loss": 0.2307, "step": 273290 }, { "epoch": 11.32, "grad_norm": 0.69921875, "learning_rate": 0.0003440365325093969, "loss": 0.2315, "step": 273300 }, { "epoch": 11.32, "grad_norm": 0.6640625, "learning_rate": 0.0003440264836933731, "loss": 0.2161, "step": 273310 }, { "epoch": 11.32, "grad_norm": 1.0859375, "learning_rate": 0.00034401643470040143, "loss": 0.1873, "step": 273320 }, { "epoch": 11.32, "grad_norm": 0.921875, "learning_rate": 0.0003440063855305011, "loss": 0.204, "step": 273330 }, { "epoch": 11.32, "grad_norm": 0.9453125, "learning_rate": 0.00034399633618369086, "loss": 0.179, "step": 273340 }, { "epoch": 11.32, "grad_norm": 0.373046875, "learning_rate": 0.0003439862866599896, "loss": 0.1983, "step": 273350 }, { "epoch": 11.32, "grad_norm": 0.875, "learning_rate": 0.0003439762369594164, "loss": 0.1884, "step": 273360 }, { "epoch": 11.32, "grad_norm": 0.96484375, "learning_rate": 0.00034396618708199, "loss": 0.249, "step": 273370 }, { "epoch": 11.32, "grad_norm": 0.392578125, "learning_rate": 0.0003439561370277294, "loss": 0.2037, "step": 273380 }, { "epoch": 11.32, "grad_norm": 0.0076904296875, "learning_rate": 0.00034394608679665355, "loss": 0.2044, "step": 273390 }, { "epoch": 11.32, "grad_norm": 0.8671875, "learning_rate": 0.0003439360363887813, "loss": 0.1911, "step": 273400 }, { "epoch": 11.32, "grad_norm": 1.2578125, "learning_rate": 0.0003439259858041315, "loss": 0.1861, "step": 273410 }, { "epoch": 11.33, "grad_norm": 0.76171875, "learning_rate": 0.00034391593504272324, "loss": 0.1735, "step": 273420 }, { "epoch": 11.33, "grad_norm": 0.6640625, "learning_rate": 0.0003439058841045752, "loss": 0.1721, "step": 273430 }, { "epoch": 11.33, "grad_norm": 1.1875, "learning_rate": 0.00034389583298970655, "loss": 0.2075, "step": 273440 }, { "epoch": 11.33, "grad_norm": 1.21875, "learning_rate": 0.000343885781698136, "loss": 0.2046, "step": 273450 }, { "epoch": 11.33, "grad_norm": 0.72265625, "learning_rate": 0.00034387573022988263, "loss": 0.1962, "step": 273460 }, { "epoch": 11.33, "grad_norm": 0.8671875, "learning_rate": 0.0003438656785849652, "loss": 0.173, "step": 273470 }, { "epoch": 11.33, "grad_norm": 0.6953125, "learning_rate": 0.0003438556267634027, "loss": 0.1966, "step": 273480 }, { "epoch": 11.33, "grad_norm": 0.58984375, "learning_rate": 0.00034384557476521406, "loss": 0.1748, "step": 273490 }, { "epoch": 11.33, "grad_norm": 0.7734375, "learning_rate": 0.00034383552259041817, "loss": 0.1882, "step": 273500 }, { "epoch": 11.33, "grad_norm": 0.58203125, "learning_rate": 0.000343825470239034, "loss": 0.1921, "step": 273510 }, { "epoch": 11.33, "grad_norm": 0.86328125, "learning_rate": 0.00034381541771108043, "loss": 0.2294, "step": 273520 }, { "epoch": 11.33, "grad_norm": 1.09375, "learning_rate": 0.0003438053650065763, "loss": 0.1871, "step": 273530 }, { "epoch": 11.33, "grad_norm": 0.5703125, "learning_rate": 0.00034379531212554073, "loss": 0.1822, "step": 273540 }, { "epoch": 11.33, "grad_norm": 1.109375, "learning_rate": 0.00034378525906799247, "loss": 0.2186, "step": 273550 }, { "epoch": 11.33, "grad_norm": 0.62109375, "learning_rate": 0.00034377520583395046, "loss": 0.1699, "step": 273560 }, { "epoch": 11.33, "grad_norm": 0.83203125, "learning_rate": 0.0003437651524234336, "loss": 0.1933, "step": 273570 }, { "epoch": 11.33, "grad_norm": 0.7578125, "learning_rate": 0.00034375509883646083, "loss": 0.1779, "step": 273580 }, { "epoch": 11.33, "grad_norm": 0.6796875, "learning_rate": 0.00034374504507305115, "loss": 0.2133, "step": 273590 }, { "epoch": 11.33, "grad_norm": 1.1796875, "learning_rate": 0.00034373499113322345, "loss": 0.167, "step": 273600 }, { "epoch": 11.33, "grad_norm": 1.0625, "learning_rate": 0.00034372493701699655, "loss": 0.2032, "step": 273610 }, { "epoch": 11.33, "grad_norm": 0.73828125, "learning_rate": 0.00034371488272438947, "loss": 0.2279, "step": 273620 }, { "epoch": 11.33, "grad_norm": 0.69921875, "learning_rate": 0.0003437048282554211, "loss": 0.1743, "step": 273630 }, { "epoch": 11.33, "grad_norm": 0.859375, "learning_rate": 0.00034369477361011034, "loss": 0.1968, "step": 273640 }, { "epoch": 11.33, "grad_norm": 0.85546875, "learning_rate": 0.0003436847187884762, "loss": 0.1886, "step": 273650 }, { "epoch": 11.33, "grad_norm": 0.54296875, "learning_rate": 0.0003436746637905375, "loss": 0.1998, "step": 273660 }, { "epoch": 11.34, "grad_norm": 0.88671875, "learning_rate": 0.0003436646086163131, "loss": 0.1906, "step": 273670 }, { "epoch": 11.34, "grad_norm": 2.90625, "learning_rate": 0.0003436545532658222, "loss": 0.2482, "step": 273680 }, { "epoch": 11.34, "grad_norm": 1.1640625, "learning_rate": 0.0003436444977390834, "loss": 0.2406, "step": 273690 }, { "epoch": 11.34, "grad_norm": 0.6953125, "learning_rate": 0.0003436344420361158, "loss": 0.2009, "step": 273700 }, { "epoch": 11.34, "grad_norm": 1.046875, "learning_rate": 0.0003436243861569383, "loss": 0.2302, "step": 273710 }, { "epoch": 11.34, "grad_norm": 0.87890625, "learning_rate": 0.00034361433010156984, "loss": 0.1952, "step": 273720 }, { "epoch": 11.34, "grad_norm": 0.734375, "learning_rate": 0.00034360427387002937, "loss": 0.1843, "step": 273730 }, { "epoch": 11.34, "grad_norm": 0.76171875, "learning_rate": 0.00034359421746233565, "loss": 0.2098, "step": 273740 }, { "epoch": 11.34, "grad_norm": 1.2890625, "learning_rate": 0.00034358416087850775, "loss": 0.1867, "step": 273750 }, { "epoch": 11.34, "grad_norm": 0.640625, "learning_rate": 0.00034357410411856466, "loss": 0.2097, "step": 273760 }, { "epoch": 11.34, "grad_norm": 0.640625, "learning_rate": 0.0003435640471825251, "loss": 0.2364, "step": 273770 }, { "epoch": 11.34, "grad_norm": 0.5625, "learning_rate": 0.00034355399007040817, "loss": 0.1849, "step": 273780 }, { "epoch": 11.34, "grad_norm": 0.4375, "learning_rate": 0.00034354393278223267, "loss": 0.2187, "step": 273790 }, { "epoch": 11.34, "grad_norm": 2.265625, "learning_rate": 0.0003435338753180176, "loss": 0.1931, "step": 273800 }, { "epoch": 11.34, "grad_norm": 0.2119140625, "learning_rate": 0.00034352381767778197, "loss": 0.1969, "step": 273810 }, { "epoch": 11.34, "grad_norm": 1.0546875, "learning_rate": 0.00034351375986154455, "loss": 0.2093, "step": 273820 }, { "epoch": 11.34, "grad_norm": 0.62109375, "learning_rate": 0.0003435037018693243, "loss": 0.2041, "step": 273830 }, { "epoch": 11.34, "grad_norm": 2.734375, "learning_rate": 0.00034349364370114024, "loss": 0.1701, "step": 273840 }, { "epoch": 11.34, "grad_norm": 0.9921875, "learning_rate": 0.00034348358535701124, "loss": 0.2007, "step": 273850 }, { "epoch": 11.34, "grad_norm": 0.7265625, "learning_rate": 0.00034347352683695623, "loss": 0.1701, "step": 273860 }, { "epoch": 11.34, "grad_norm": 0.82421875, "learning_rate": 0.00034346346814099403, "loss": 0.1872, "step": 273870 }, { "epoch": 11.34, "grad_norm": 1.140625, "learning_rate": 0.0003434534092691438, "loss": 0.1898, "step": 273880 }, { "epoch": 11.34, "grad_norm": 1.2265625, "learning_rate": 0.0003434433502214243, "loss": 0.1943, "step": 273890 }, { "epoch": 11.34, "grad_norm": 0.78125, "learning_rate": 0.00034343329099785453, "loss": 0.1784, "step": 273900 }, { "epoch": 11.35, "grad_norm": 1.5078125, "learning_rate": 0.00034342323159845334, "loss": 0.2271, "step": 273910 }, { "epoch": 11.35, "grad_norm": 0.71484375, "learning_rate": 0.0003434131720232397, "loss": 0.1873, "step": 273920 }, { "epoch": 11.35, "grad_norm": 1.71875, "learning_rate": 0.0003434031122722327, "loss": 0.2231, "step": 273930 }, { "epoch": 11.35, "grad_norm": 0.49609375, "learning_rate": 0.00034339305234545105, "loss": 0.2136, "step": 273940 }, { "epoch": 11.35, "grad_norm": 0.546875, "learning_rate": 0.0003433829922429137, "loss": 0.2121, "step": 273950 }, { "epoch": 11.35, "grad_norm": 1.2578125, "learning_rate": 0.0003433729319646397, "loss": 0.1884, "step": 273960 }, { "epoch": 11.35, "grad_norm": 0.77734375, "learning_rate": 0.00034336287151064793, "loss": 0.1891, "step": 273970 }, { "epoch": 11.35, "grad_norm": 0.86328125, "learning_rate": 0.0003433528108809573, "loss": 0.1435, "step": 273980 }, { "epoch": 11.35, "grad_norm": 1.796875, "learning_rate": 0.00034334275007558677, "loss": 0.1792, "step": 273990 }, { "epoch": 11.35, "grad_norm": 0.3125, "learning_rate": 0.0003433326890945553, "loss": 0.1899, "step": 274000 }, { "epoch": 11.35, "grad_norm": 0.9296875, "learning_rate": 0.0003433226279378817, "loss": 0.1943, "step": 274010 }, { "epoch": 11.35, "grad_norm": 0.86328125, "learning_rate": 0.00034331256660558506, "loss": 0.1795, "step": 274020 }, { "epoch": 11.35, "grad_norm": 0.66015625, "learning_rate": 0.00034330250509768425, "loss": 0.161, "step": 274030 }, { "epoch": 11.35, "grad_norm": 1.0234375, "learning_rate": 0.0003432924434141982, "loss": 0.1935, "step": 274040 }, { "epoch": 11.35, "grad_norm": 0.392578125, "learning_rate": 0.0003432823815551458, "loss": 0.1588, "step": 274050 }, { "epoch": 11.35, "grad_norm": 0.4765625, "learning_rate": 0.000343272319520546, "loss": 0.1833, "step": 274060 }, { "epoch": 11.35, "grad_norm": 1.0078125, "learning_rate": 0.00034326225731041787, "loss": 0.1801, "step": 274070 }, { "epoch": 11.35, "grad_norm": 0.71875, "learning_rate": 0.0003432521949247802, "loss": 0.1748, "step": 274080 }, { "epoch": 11.35, "grad_norm": 0.51953125, "learning_rate": 0.000343242132363652, "loss": 0.182, "step": 274090 }, { "epoch": 11.35, "grad_norm": 0.62890625, "learning_rate": 0.0003432320696270521, "loss": 0.1526, "step": 274100 }, { "epoch": 11.35, "grad_norm": 0.251953125, "learning_rate": 0.0003432220067149996, "loss": 0.2148, "step": 274110 }, { "epoch": 11.35, "grad_norm": 0.734375, "learning_rate": 0.00034321194362751326, "loss": 0.2376, "step": 274120 }, { "epoch": 11.35, "grad_norm": 1.2578125, "learning_rate": 0.00034320188036461217, "loss": 0.1549, "step": 274130 }, { "epoch": 11.35, "grad_norm": 0.6953125, "learning_rate": 0.00034319181692631524, "loss": 0.2042, "step": 274140 }, { "epoch": 11.36, "grad_norm": 0.451171875, "learning_rate": 0.0003431817533126413, "loss": 0.1582, "step": 274150 }, { "epoch": 11.36, "grad_norm": 0.98828125, "learning_rate": 0.0003431716895236094, "loss": 0.1454, "step": 274160 }, { "epoch": 11.36, "grad_norm": 1.3984375, "learning_rate": 0.00034316162555923845, "loss": 0.2306, "step": 274170 }, { "epoch": 11.36, "grad_norm": 1.1328125, "learning_rate": 0.00034315156141954733, "loss": 0.1735, "step": 274180 }, { "epoch": 11.36, "grad_norm": 1.546875, "learning_rate": 0.00034314149710455504, "loss": 0.2235, "step": 274190 }, { "epoch": 11.36, "grad_norm": 1.234375, "learning_rate": 0.00034313143261428054, "loss": 0.2049, "step": 274200 }, { "epoch": 11.36, "grad_norm": 0.33984375, "learning_rate": 0.0003431213679487427, "loss": 0.2014, "step": 274210 }, { "epoch": 11.36, "grad_norm": 0.9921875, "learning_rate": 0.0003431113031079606, "loss": 0.2137, "step": 274220 }, { "epoch": 11.36, "grad_norm": 1.40625, "learning_rate": 0.00034310123809195306, "loss": 0.203, "step": 274230 }, { "epoch": 11.36, "grad_norm": 2.296875, "learning_rate": 0.00034309117290073893, "loss": 0.2048, "step": 274240 }, { "epoch": 11.36, "grad_norm": 0.78515625, "learning_rate": 0.0003430811075343374, "loss": 0.14, "step": 274250 }, { "epoch": 11.36, "grad_norm": 0.75390625, "learning_rate": 0.0003430710419927672, "loss": 0.2051, "step": 274260 }, { "epoch": 11.36, "grad_norm": 1.2265625, "learning_rate": 0.0003430609762760474, "loss": 0.2041, "step": 274270 }, { "epoch": 11.36, "grad_norm": 1.140625, "learning_rate": 0.00034305091038419676, "loss": 0.1846, "step": 274280 }, { "epoch": 11.36, "grad_norm": 1.546875, "learning_rate": 0.0003430408443172345, "loss": 0.2442, "step": 274290 }, { "epoch": 11.36, "grad_norm": 0.95703125, "learning_rate": 0.0003430307780751794, "loss": 0.147, "step": 274300 }, { "epoch": 11.36, "grad_norm": 1.3046875, "learning_rate": 0.0003430207116580504, "loss": 0.2036, "step": 274310 }, { "epoch": 11.36, "grad_norm": 0.478515625, "learning_rate": 0.00034301064506586643, "loss": 0.2119, "step": 274320 }, { "epoch": 11.36, "grad_norm": 0.98046875, "learning_rate": 0.00034300057829864653, "loss": 0.2108, "step": 274330 }, { "epoch": 11.36, "grad_norm": 0.76953125, "learning_rate": 0.0003429905113564095, "loss": 0.1898, "step": 274340 }, { "epoch": 11.36, "grad_norm": 0.23046875, "learning_rate": 0.00034298044423917447, "loss": 0.1749, "step": 274350 }, { "epoch": 11.36, "grad_norm": 0.357421875, "learning_rate": 0.0003429703769469602, "loss": 0.2402, "step": 274360 }, { "epoch": 11.36, "grad_norm": 1.0078125, "learning_rate": 0.0003429603094797858, "loss": 0.2139, "step": 274370 }, { "epoch": 11.36, "grad_norm": 1.5859375, "learning_rate": 0.0003429502418376701, "loss": 0.2135, "step": 274380 }, { "epoch": 11.37, "grad_norm": 0.298828125, "learning_rate": 0.0003429401740206321, "loss": 0.2181, "step": 274390 }, { "epoch": 11.37, "grad_norm": 0.98046875, "learning_rate": 0.0003429301060286907, "loss": 0.1464, "step": 274400 }, { "epoch": 11.37, "grad_norm": 1.234375, "learning_rate": 0.00034292003786186493, "loss": 0.1889, "step": 274410 }, { "epoch": 11.37, "grad_norm": 1.1640625, "learning_rate": 0.0003429099695201736, "loss": 0.2612, "step": 274420 }, { "epoch": 11.37, "grad_norm": 0.38671875, "learning_rate": 0.00034289990100363577, "loss": 0.1767, "step": 274430 }, { "epoch": 11.37, "grad_norm": 1.2265625, "learning_rate": 0.00034288983231227035, "loss": 0.2035, "step": 274440 }, { "epoch": 11.37, "grad_norm": 2.359375, "learning_rate": 0.00034287976344609636, "loss": 0.215, "step": 274450 }, { "epoch": 11.37, "grad_norm": 0.7734375, "learning_rate": 0.00034286969440513267, "loss": 0.1433, "step": 274460 }, { "epoch": 11.37, "grad_norm": 2.1875, "learning_rate": 0.0003428596251893981, "loss": 0.2369, "step": 274470 }, { "epoch": 11.37, "grad_norm": 0.46875, "learning_rate": 0.00034284955579891194, "loss": 0.1907, "step": 274480 }, { "epoch": 11.37, "grad_norm": 0.828125, "learning_rate": 0.00034283948623369285, "loss": 0.2612, "step": 274490 }, { "epoch": 11.37, "grad_norm": 0.56640625, "learning_rate": 0.0003428294164937599, "loss": 0.1843, "step": 274500 }, { "epoch": 11.37, "grad_norm": 0.69140625, "learning_rate": 0.000342819346579132, "loss": 0.2197, "step": 274510 }, { "epoch": 11.37, "grad_norm": 0.4921875, "learning_rate": 0.00034280927648982807, "loss": 0.1839, "step": 274520 }, { "epoch": 11.37, "grad_norm": 0.30078125, "learning_rate": 0.00034279920622586713, "loss": 0.2184, "step": 274530 }, { "epoch": 11.37, "grad_norm": 2.203125, "learning_rate": 0.0003427891357872682, "loss": 0.2216, "step": 274540 }, { "epoch": 11.37, "grad_norm": 1.4765625, "learning_rate": 0.00034277906517405, "loss": 0.1675, "step": 274550 }, { "epoch": 11.37, "grad_norm": 0.212890625, "learning_rate": 0.00034276899438623164, "loss": 0.2305, "step": 274560 }, { "epoch": 11.37, "grad_norm": 1.2265625, "learning_rate": 0.00034275892342383216, "loss": 0.1909, "step": 274570 }, { "epoch": 11.37, "grad_norm": 1.125, "learning_rate": 0.00034274885228687023, "loss": 0.1819, "step": 274580 }, { "epoch": 11.37, "grad_norm": 0.7578125, "learning_rate": 0.00034273878097536514, "loss": 0.1893, "step": 274590 }, { "epoch": 11.37, "grad_norm": 0.78125, "learning_rate": 0.00034272870948933565, "loss": 0.1743, "step": 274600 }, { "epoch": 11.37, "grad_norm": 1.3671875, "learning_rate": 0.0003427186378288006, "loss": 0.1691, "step": 274610 }, { "epoch": 11.37, "grad_norm": 1.796875, "learning_rate": 0.0003427085659937793, "loss": 0.2145, "step": 274620 }, { "epoch": 11.38, "grad_norm": 0.2421875, "learning_rate": 0.0003426984939842903, "loss": 0.1717, "step": 274630 }, { "epoch": 11.38, "grad_norm": 1.125, "learning_rate": 0.00034268842180035285, "loss": 0.1472, "step": 274640 }, { "epoch": 11.38, "grad_norm": 0.6640625, "learning_rate": 0.00034267834944198583, "loss": 0.2264, "step": 274650 }, { "epoch": 11.38, "grad_norm": 1.203125, "learning_rate": 0.0003426682769092081, "loss": 0.1889, "step": 274660 }, { "epoch": 11.38, "grad_norm": 0.74609375, "learning_rate": 0.0003426582042020387, "loss": 0.1693, "step": 274670 }, { "epoch": 11.38, "grad_norm": 0.875, "learning_rate": 0.00034264813132049655, "loss": 0.187, "step": 274680 }, { "epoch": 11.38, "grad_norm": 0.73828125, "learning_rate": 0.0003426380582646006, "loss": 0.1457, "step": 274690 }, { "epoch": 11.38, "grad_norm": 0.82421875, "learning_rate": 0.0003426279850343699, "loss": 0.1807, "step": 274700 }, { "epoch": 11.38, "grad_norm": 0.71484375, "learning_rate": 0.00034261791162982326, "loss": 0.2061, "step": 274710 }, { "epoch": 11.38, "grad_norm": 0.4609375, "learning_rate": 0.0003426078380509798, "loss": 0.2076, "step": 274720 }, { "epoch": 11.38, "grad_norm": 0.6640625, "learning_rate": 0.0003425977642978584, "loss": 0.1642, "step": 274730 }, { "epoch": 11.38, "grad_norm": 0.2001953125, "learning_rate": 0.00034258769037047784, "loss": 0.1494, "step": 274740 }, { "epoch": 11.38, "grad_norm": 1.4765625, "learning_rate": 0.00034257761626885734, "loss": 0.1923, "step": 274750 }, { "epoch": 11.38, "grad_norm": 1.0625, "learning_rate": 0.00034256754199301585, "loss": 0.2404, "step": 274760 }, { "epoch": 11.38, "grad_norm": 1.6015625, "learning_rate": 0.0003425574675429721, "loss": 0.2236, "step": 274770 }, { "epoch": 11.38, "grad_norm": 1.2734375, "learning_rate": 0.0003425473929187453, "loss": 0.202, "step": 274780 }, { "epoch": 11.38, "grad_norm": 0.7890625, "learning_rate": 0.0003425373181203542, "loss": 0.2121, "step": 274790 }, { "epoch": 11.38, "grad_norm": 0.99609375, "learning_rate": 0.000342527243147818, "loss": 0.1879, "step": 274800 }, { "epoch": 11.38, "grad_norm": 0.53125, "learning_rate": 0.0003425171680011554, "loss": 0.2283, "step": 274810 }, { "epoch": 11.38, "grad_norm": 1.2265625, "learning_rate": 0.0003425070926803855, "loss": 0.2006, "step": 274820 }, { "epoch": 11.38, "grad_norm": 0.76171875, "learning_rate": 0.0003424970171855273, "loss": 0.2058, "step": 274830 }, { "epoch": 11.38, "grad_norm": 1.5625, "learning_rate": 0.00034248694151659963, "loss": 0.235, "step": 274840 }, { "epoch": 11.38, "grad_norm": 0.81640625, "learning_rate": 0.0003424768656736216, "loss": 0.1904, "step": 274850 }, { "epoch": 11.38, "grad_norm": 0.77734375, "learning_rate": 0.00034246678965661206, "loss": 0.1993, "step": 274860 }, { "epoch": 11.39, "grad_norm": 0.92578125, "learning_rate": 0.00034245671346558993, "loss": 0.1793, "step": 274870 }, { "epoch": 11.39, "grad_norm": 0.5, "learning_rate": 0.00034244663710057433, "loss": 0.2589, "step": 274880 }, { "epoch": 11.39, "grad_norm": 0.26171875, "learning_rate": 0.00034243656056158413, "loss": 0.2158, "step": 274890 }, { "epoch": 11.39, "grad_norm": 0.31640625, "learning_rate": 0.0003424264838486383, "loss": 0.2271, "step": 274900 }, { "epoch": 11.39, "grad_norm": 1.6328125, "learning_rate": 0.00034241640696175586, "loss": 0.2003, "step": 274910 }, { "epoch": 11.39, "grad_norm": 0.921875, "learning_rate": 0.00034240632990095563, "loss": 0.1982, "step": 274920 }, { "epoch": 11.39, "grad_norm": 0.765625, "learning_rate": 0.00034239625266625675, "loss": 0.1788, "step": 274930 }, { "epoch": 11.39, "grad_norm": 0.87109375, "learning_rate": 0.0003423861752576781, "loss": 0.2347, "step": 274940 }, { "epoch": 11.39, "grad_norm": 4.125, "learning_rate": 0.00034237609767523853, "loss": 0.2045, "step": 274950 }, { "epoch": 11.39, "grad_norm": 0.921875, "learning_rate": 0.0003423660199189572, "loss": 0.1673, "step": 274960 }, { "epoch": 11.39, "grad_norm": 1.2890625, "learning_rate": 0.000342355941988853, "loss": 0.2152, "step": 274970 }, { "epoch": 11.39, "grad_norm": 0.76171875, "learning_rate": 0.0003423458638849449, "loss": 0.1785, "step": 274980 }, { "epoch": 11.39, "grad_norm": 1.3515625, "learning_rate": 0.00034233578560725187, "loss": 0.2432, "step": 274990 }, { "epoch": 11.39, "grad_norm": 0.9140625, "learning_rate": 0.0003423257071557928, "loss": 0.158, "step": 275000 }, { "epoch": 11.39, "grad_norm": 0.35546875, "learning_rate": 0.0003423156285305868, "loss": 0.1683, "step": 275010 }, { "epoch": 11.39, "grad_norm": 0.8359375, "learning_rate": 0.00034230554973165267, "loss": 0.1864, "step": 275020 }, { "epoch": 11.39, "grad_norm": 0.32421875, "learning_rate": 0.00034229547075900947, "loss": 0.1719, "step": 275030 }, { "epoch": 11.39, "grad_norm": 1.25, "learning_rate": 0.00034228539161267627, "loss": 0.19, "step": 275040 }, { "epoch": 11.39, "grad_norm": 0.56640625, "learning_rate": 0.0003422753122926718, "loss": 0.2111, "step": 275050 }, { "epoch": 11.39, "grad_norm": 0.4140625, "learning_rate": 0.0003422652327990152, "loss": 0.2295, "step": 275060 }, { "epoch": 11.39, "grad_norm": 0.66796875, "learning_rate": 0.00034225515313172553, "loss": 0.1716, "step": 275070 }, { "epoch": 11.39, "grad_norm": 1.171875, "learning_rate": 0.00034224507329082146, "loss": 0.186, "step": 275080 }, { "epoch": 11.39, "grad_norm": 0.8984375, "learning_rate": 0.0003422349932763221, "loss": 0.1667, "step": 275090 }, { "epoch": 11.39, "grad_norm": 1.2265625, "learning_rate": 0.00034222491308824653, "loss": 0.2006, "step": 275100 }, { "epoch": 11.4, "grad_norm": 0.73046875, "learning_rate": 0.0003422148327266136, "loss": 0.1511, "step": 275110 }, { "epoch": 11.4, "grad_norm": 0.95703125, "learning_rate": 0.00034220475219144236, "loss": 0.2191, "step": 275120 }, { "epoch": 11.4, "grad_norm": 0.39453125, "learning_rate": 0.0003421946714827517, "loss": 0.2009, "step": 275130 }, { "epoch": 11.4, "grad_norm": 0.66015625, "learning_rate": 0.00034218459060056063, "loss": 0.1507, "step": 275140 }, { "epoch": 11.4, "grad_norm": 0.50390625, "learning_rate": 0.0003421745095448881, "loss": 0.1765, "step": 275150 }, { "epoch": 11.4, "grad_norm": 0.6640625, "learning_rate": 0.00034216442831575314, "loss": 0.1643, "step": 275160 }, { "epoch": 11.4, "grad_norm": 1.25, "learning_rate": 0.0003421543469131746, "loss": 0.1806, "step": 275170 }, { "epoch": 11.4, "grad_norm": 1.2109375, "learning_rate": 0.00034214426533717157, "loss": 0.2274, "step": 275180 }, { "epoch": 11.4, "grad_norm": 0.4609375, "learning_rate": 0.00034213418358776295, "loss": 0.2032, "step": 275190 }, { "epoch": 11.4, "grad_norm": 0.75390625, "learning_rate": 0.0003421241016649678, "loss": 0.1888, "step": 275200 }, { "epoch": 11.4, "grad_norm": 2.953125, "learning_rate": 0.00034211401956880504, "loss": 0.222, "step": 275210 }, { "epoch": 11.4, "grad_norm": 1.2421875, "learning_rate": 0.00034210393729929364, "loss": 0.1966, "step": 275220 }, { "epoch": 11.4, "grad_norm": 0.9453125, "learning_rate": 0.0003420938548564525, "loss": 0.172, "step": 275230 }, { "epoch": 11.4, "grad_norm": 0.8046875, "learning_rate": 0.0003420837722403008, "loss": 0.2349, "step": 275240 }, { "epoch": 11.4, "grad_norm": 0.6484375, "learning_rate": 0.00034207368945085733, "loss": 0.2293, "step": 275250 }, { "epoch": 11.4, "grad_norm": 1.4375, "learning_rate": 0.00034206360648814105, "loss": 0.2133, "step": 275260 }, { "epoch": 11.4, "grad_norm": 0.73046875, "learning_rate": 0.0003420535233521711, "loss": 0.1693, "step": 275270 }, { "epoch": 11.4, "grad_norm": 0.96875, "learning_rate": 0.00034204344004296636, "loss": 0.1931, "step": 275280 }, { "epoch": 11.4, "grad_norm": 6.125, "learning_rate": 0.00034203335656054573, "loss": 0.212, "step": 275290 }, { "epoch": 11.4, "grad_norm": 0.314453125, "learning_rate": 0.0003420232729049283, "loss": 0.2397, "step": 275300 }, { "epoch": 11.4, "grad_norm": 0.94921875, "learning_rate": 0.000342013189076133, "loss": 0.1836, "step": 275310 }, { "epoch": 11.4, "grad_norm": 1.15625, "learning_rate": 0.00034200310507417886, "loss": 0.1988, "step": 275320 }, { "epoch": 11.4, "grad_norm": 0.7578125, "learning_rate": 0.0003419930208990847, "loss": 0.2093, "step": 275330 }, { "epoch": 11.4, "grad_norm": 0.25390625, "learning_rate": 0.0003419829365508697, "loss": 0.1977, "step": 275340 }, { "epoch": 11.4, "grad_norm": 1.3359375, "learning_rate": 0.00034197285202955277, "loss": 0.1889, "step": 275350 }, { "epoch": 11.41, "grad_norm": 0.73046875, "learning_rate": 0.00034196276733515274, "loss": 0.1699, "step": 275360 }, { "epoch": 11.41, "grad_norm": 0.70703125, "learning_rate": 0.00034195268246768883, "loss": 0.2637, "step": 275370 }, { "epoch": 11.41, "grad_norm": 0.85546875, "learning_rate": 0.0003419425974271799, "loss": 0.232, "step": 275380 }, { "epoch": 11.41, "grad_norm": 0.58203125, "learning_rate": 0.00034193251221364485, "loss": 0.1745, "step": 275390 }, { "epoch": 11.41, "grad_norm": 1.0, "learning_rate": 0.00034192242682710283, "loss": 0.2244, "step": 275400 }, { "epoch": 11.41, "grad_norm": 0.38671875, "learning_rate": 0.0003419123412675727, "loss": 0.1448, "step": 275410 }, { "epoch": 11.41, "grad_norm": 0.69140625, "learning_rate": 0.00034190225553507346, "loss": 0.2018, "step": 275420 }, { "epoch": 11.41, "grad_norm": 0.2734375, "learning_rate": 0.0003418921696296241, "loss": 0.1508, "step": 275430 }, { "epoch": 11.41, "grad_norm": 0.435546875, "learning_rate": 0.00034188208355124356, "loss": 0.2082, "step": 275440 }, { "epoch": 11.41, "grad_norm": 0.181640625, "learning_rate": 0.00034187199729995095, "loss": 0.1791, "step": 275450 }, { "epoch": 11.41, "grad_norm": 1.0625, "learning_rate": 0.0003418619108757651, "loss": 0.1944, "step": 275460 }, { "epoch": 11.41, "grad_norm": 0.77734375, "learning_rate": 0.00034185182427870503, "loss": 0.2961, "step": 275470 }, { "epoch": 11.41, "grad_norm": 1.390625, "learning_rate": 0.00034184173750878983, "loss": 0.1785, "step": 275480 }, { "epoch": 11.41, "grad_norm": 0.5390625, "learning_rate": 0.00034183165056603834, "loss": 0.2118, "step": 275490 }, { "epoch": 11.41, "grad_norm": 0.83203125, "learning_rate": 0.0003418215634504696, "loss": 0.2113, "step": 275500 }, { "epoch": 11.41, "grad_norm": 0.455078125, "learning_rate": 0.0003418114761621026, "loss": 0.182, "step": 275510 }, { "epoch": 11.41, "grad_norm": 0.78125, "learning_rate": 0.00034180138870095627, "loss": 0.2186, "step": 275520 }, { "epoch": 11.41, "grad_norm": 0.80859375, "learning_rate": 0.00034179130106704974, "loss": 0.2065, "step": 275530 }, { "epoch": 11.41, "grad_norm": 0.8203125, "learning_rate": 0.00034178121326040184, "loss": 0.2171, "step": 275540 }, { "epoch": 11.41, "grad_norm": 1.3046875, "learning_rate": 0.0003417711252810316, "loss": 0.1916, "step": 275550 }, { "epoch": 11.41, "grad_norm": 0.96875, "learning_rate": 0.000341761037128958, "loss": 0.1805, "step": 275560 }, { "epoch": 11.41, "grad_norm": 0.84765625, "learning_rate": 0.0003417509488042001, "loss": 0.2319, "step": 275570 }, { "epoch": 11.41, "grad_norm": 0.3671875, "learning_rate": 0.0003417408603067768, "loss": 0.1738, "step": 275580 }, { "epoch": 11.41, "grad_norm": 0.5859375, "learning_rate": 0.00034173077163670706, "loss": 0.2214, "step": 275590 }, { "epoch": 11.42, "grad_norm": 0.53125, "learning_rate": 0.00034172068279400997, "loss": 0.1637, "step": 275600 }, { "epoch": 11.42, "grad_norm": 0.494140625, "learning_rate": 0.00034171059377870443, "loss": 0.1446, "step": 275610 }, { "epoch": 11.42, "grad_norm": 0.87109375, "learning_rate": 0.00034170050459080947, "loss": 0.2289, "step": 275620 }, { "epoch": 11.42, "grad_norm": 0.40234375, "learning_rate": 0.00034169041523034405, "loss": 0.2158, "step": 275630 }, { "epoch": 11.42, "grad_norm": 0.484375, "learning_rate": 0.00034168032569732715, "loss": 0.2083, "step": 275640 }, { "epoch": 11.42, "grad_norm": 1.03125, "learning_rate": 0.0003416702359917778, "loss": 0.1917, "step": 275650 }, { "epoch": 11.42, "grad_norm": 0.9453125, "learning_rate": 0.000341660146113715, "loss": 0.2002, "step": 275660 }, { "epoch": 11.42, "grad_norm": 0.75390625, "learning_rate": 0.0003416500560631577, "loss": 0.174, "step": 275670 }, { "epoch": 11.42, "grad_norm": 1.296875, "learning_rate": 0.00034163996584012486, "loss": 0.1563, "step": 275680 }, { "epoch": 11.42, "grad_norm": 0.263671875, "learning_rate": 0.00034162987544463554, "loss": 0.2039, "step": 275690 }, { "epoch": 11.42, "grad_norm": 0.2734375, "learning_rate": 0.00034161978487670864, "loss": 0.1753, "step": 275700 }, { "epoch": 11.42, "grad_norm": 1.2265625, "learning_rate": 0.0003416096941363632, "loss": 0.2196, "step": 275710 }, { "epoch": 11.42, "grad_norm": 1.015625, "learning_rate": 0.00034159960322361824, "loss": 0.1785, "step": 275720 }, { "epoch": 11.42, "grad_norm": 0.8046875, "learning_rate": 0.00034158951213849264, "loss": 0.1847, "step": 275730 }, { "epoch": 11.42, "grad_norm": 1.8046875, "learning_rate": 0.00034157942088100556, "loss": 0.1507, "step": 275740 }, { "epoch": 11.42, "grad_norm": 0.640625, "learning_rate": 0.0003415693294511759, "loss": 0.196, "step": 275750 }, { "epoch": 11.42, "grad_norm": 0.71875, "learning_rate": 0.0003415592378490226, "loss": 0.2126, "step": 275760 }, { "epoch": 11.42, "grad_norm": 1.2890625, "learning_rate": 0.0003415491460745648, "loss": 0.1791, "step": 275770 }, { "epoch": 11.42, "grad_norm": 0.70703125, "learning_rate": 0.00034153905412782123, "loss": 0.209, "step": 275780 }, { "epoch": 11.42, "grad_norm": 0.94921875, "learning_rate": 0.0003415289620088111, "loss": 0.1904, "step": 275790 }, { "epoch": 11.42, "grad_norm": 1.84375, "learning_rate": 0.0003415188697175534, "loss": 0.2187, "step": 275800 }, { "epoch": 11.42, "grad_norm": 0.76953125, "learning_rate": 0.000341508777254067, "loss": 0.1813, "step": 275810 }, { "epoch": 11.42, "grad_norm": 0.6640625, "learning_rate": 0.0003414986846183711, "loss": 0.1668, "step": 275820 }, { "epoch": 11.42, "grad_norm": 0.5859375, "learning_rate": 0.0003414885918104845, "loss": 0.1926, "step": 275830 }, { "epoch": 11.43, "grad_norm": 0.83984375, "learning_rate": 0.00034147849883042617, "loss": 0.1772, "step": 275840 }, { "epoch": 11.43, "grad_norm": 0.66796875, "learning_rate": 0.00034146840567821524, "loss": 0.2314, "step": 275850 }, { "epoch": 11.43, "grad_norm": 1.1875, "learning_rate": 0.0003414583123538706, "loss": 0.1761, "step": 275860 }, { "epoch": 11.43, "grad_norm": 0.9609375, "learning_rate": 0.0003414482188574114, "loss": 0.1948, "step": 275870 }, { "epoch": 11.43, "grad_norm": 1.2265625, "learning_rate": 0.0003414381251888564, "loss": 0.1333, "step": 275880 }, { "epoch": 11.43, "grad_norm": 0.7109375, "learning_rate": 0.0003414280313482248, "loss": 0.1911, "step": 275890 }, { "epoch": 11.43, "grad_norm": 0.5859375, "learning_rate": 0.0003414179373355356, "loss": 0.1968, "step": 275900 }, { "epoch": 11.43, "grad_norm": 2.109375, "learning_rate": 0.00034140784315080753, "loss": 0.1462, "step": 275910 }, { "epoch": 11.43, "grad_norm": 0.55078125, "learning_rate": 0.00034139774879405986, "loss": 0.1897, "step": 275920 }, { "epoch": 11.43, "grad_norm": 0.9375, "learning_rate": 0.00034138765426531154, "loss": 0.1778, "step": 275930 }, { "epoch": 11.43, "grad_norm": 0.47265625, "learning_rate": 0.0003413775595645814, "loss": 0.1745, "step": 275940 }, { "epoch": 11.43, "grad_norm": 0.23828125, "learning_rate": 0.0003413674646918887, "loss": 0.2092, "step": 275950 }, { "epoch": 11.43, "grad_norm": 1.203125, "learning_rate": 0.00034135736964725224, "loss": 0.1796, "step": 275960 }, { "epoch": 11.43, "grad_norm": 0.6875, "learning_rate": 0.00034134727443069105, "loss": 0.2003, "step": 275970 }, { "epoch": 11.43, "grad_norm": 2.03125, "learning_rate": 0.00034133717904222423, "loss": 0.207, "step": 275980 }, { "epoch": 11.43, "grad_norm": 0.89453125, "learning_rate": 0.00034132708348187066, "loss": 0.2767, "step": 275990 }, { "epoch": 11.43, "grad_norm": 0.71875, "learning_rate": 0.0003413169877496493, "loss": 0.1918, "step": 276000 }, { "epoch": 11.43, "grad_norm": 0.859375, "learning_rate": 0.0003413068918455794, "loss": 0.1789, "step": 276010 }, { "epoch": 11.43, "grad_norm": 0.9453125, "learning_rate": 0.0003412967957696796, "loss": 0.1965, "step": 276020 }, { "epoch": 11.43, "grad_norm": 0.30859375, "learning_rate": 0.0003412866995219692, "loss": 0.1734, "step": 276030 }, { "epoch": 11.43, "grad_norm": 1.21875, "learning_rate": 0.00034127660310246706, "loss": 0.1919, "step": 276040 }, { "epoch": 11.43, "grad_norm": 1.6015625, "learning_rate": 0.0003412665065111922, "loss": 0.2342, "step": 276050 }, { "epoch": 11.43, "grad_norm": 0.61328125, "learning_rate": 0.00034125640974816374, "loss": 0.242, "step": 276060 }, { "epoch": 11.43, "grad_norm": 0.6640625, "learning_rate": 0.0003412463128134004, "loss": 0.2178, "step": 276070 }, { "epoch": 11.44, "grad_norm": 1.6953125, "learning_rate": 0.0003412362157069215, "loss": 0.2265, "step": 276080 }, { "epoch": 11.44, "grad_norm": 0.9765625, "learning_rate": 0.00034122611842874584, "loss": 0.1581, "step": 276090 }, { "epoch": 11.44, "grad_norm": 0.482421875, "learning_rate": 0.0003412160209788924, "loss": 0.196, "step": 276100 }, { "epoch": 11.44, "grad_norm": 2.296875, "learning_rate": 0.0003412059233573804, "loss": 0.2121, "step": 276110 }, { "epoch": 11.44, "grad_norm": 0.953125, "learning_rate": 0.00034119582556422855, "loss": 0.2495, "step": 276120 }, { "epoch": 11.44, "grad_norm": 1.03125, "learning_rate": 0.0003411857275994561, "loss": 0.1945, "step": 276130 }, { "epoch": 11.44, "grad_norm": 0.45703125, "learning_rate": 0.000341175629463082, "loss": 0.1798, "step": 276140 }, { "epoch": 11.44, "grad_norm": 0.7421875, "learning_rate": 0.00034116553115512503, "loss": 0.1874, "step": 276150 }, { "epoch": 11.44, "grad_norm": 1.28125, "learning_rate": 0.00034115543267560454, "loss": 0.1948, "step": 276160 }, { "epoch": 11.44, "grad_norm": 0.76171875, "learning_rate": 0.0003411453340245394, "loss": 0.1835, "step": 276170 }, { "epoch": 11.44, "grad_norm": 0.83984375, "learning_rate": 0.00034113523520194847, "loss": 0.1967, "step": 276180 }, { "epoch": 11.44, "grad_norm": 0.43359375, "learning_rate": 0.00034112513620785094, "loss": 0.218, "step": 276190 }, { "epoch": 11.44, "grad_norm": 1.53125, "learning_rate": 0.0003411150370422657, "loss": 0.2025, "step": 276200 }, { "epoch": 11.44, "grad_norm": 1.2890625, "learning_rate": 0.00034110493770521177, "loss": 0.2508, "step": 276210 }, { "epoch": 11.44, "grad_norm": 1.1015625, "learning_rate": 0.0003410948381967083, "loss": 0.2074, "step": 276220 }, { "epoch": 11.44, "grad_norm": 0.54296875, "learning_rate": 0.00034108473851677406, "loss": 0.155, "step": 276230 }, { "epoch": 11.44, "grad_norm": 0.69140625, "learning_rate": 0.00034107463866542824, "loss": 0.2011, "step": 276240 }, { "epoch": 11.44, "grad_norm": 0.640625, "learning_rate": 0.00034106453864268983, "loss": 0.2202, "step": 276250 }, { "epoch": 11.44, "grad_norm": 0.486328125, "learning_rate": 0.0003410544384485777, "loss": 0.2094, "step": 276260 }, { "epoch": 11.44, "grad_norm": 0.828125, "learning_rate": 0.000341044338083111, "loss": 0.153, "step": 276270 }, { "epoch": 11.44, "grad_norm": 1.0703125, "learning_rate": 0.0003410342375463087, "loss": 0.2034, "step": 276280 }, { "epoch": 11.44, "grad_norm": 0.0986328125, "learning_rate": 0.0003410241368381897, "loss": 0.1984, "step": 276290 }, { "epoch": 11.44, "grad_norm": 1.234375, "learning_rate": 0.00034101403595877323, "loss": 0.1754, "step": 276300 }, { "epoch": 11.44, "grad_norm": 0.0, "learning_rate": 0.0003410039349080781, "loss": 0.1582, "step": 276310 }, { "epoch": 11.45, "grad_norm": 0.328125, "learning_rate": 0.0003409938336861235, "loss": 0.2029, "step": 276320 }, { "epoch": 11.45, "grad_norm": 0.875, "learning_rate": 0.00034098373229292817, "loss": 0.2107, "step": 276330 }, { "epoch": 11.45, "grad_norm": 0.77734375, "learning_rate": 0.00034097363072851136, "loss": 0.1819, "step": 276340 }, { "epoch": 11.45, "grad_norm": 1.5859375, "learning_rate": 0.00034096352899289203, "loss": 0.2397, "step": 276350 }, { "epoch": 11.45, "grad_norm": 0.58984375, "learning_rate": 0.0003409534270860891, "loss": 0.1878, "step": 276360 }, { "epoch": 11.45, "grad_norm": 0.66015625, "learning_rate": 0.0003409433250081216, "loss": 0.1799, "step": 276370 }, { "epoch": 11.45, "grad_norm": 1.1171875, "learning_rate": 0.00034093322275900875, "loss": 0.149, "step": 276380 }, { "epoch": 11.45, "grad_norm": 0.71875, "learning_rate": 0.00034092312033876925, "loss": 0.2232, "step": 276390 }, { "epoch": 11.45, "grad_norm": 0.201171875, "learning_rate": 0.0003409130177474223, "loss": 0.208, "step": 276400 }, { "epoch": 11.45, "grad_norm": 0.80078125, "learning_rate": 0.0003409029149849869, "loss": 0.1979, "step": 276410 }, { "epoch": 11.45, "grad_norm": 0.78125, "learning_rate": 0.00034089281205148197, "loss": 0.2011, "step": 276420 }, { "epoch": 11.45, "grad_norm": 0.515625, "learning_rate": 0.00034088270894692664, "loss": 0.1705, "step": 276430 }, { "epoch": 11.45, "grad_norm": 0.8984375, "learning_rate": 0.0003408726056713399, "loss": 0.199, "step": 276440 }, { "epoch": 11.45, "grad_norm": 0.9765625, "learning_rate": 0.0003408625022247406, "loss": 0.2156, "step": 276450 }, { "epoch": 11.45, "grad_norm": 0.470703125, "learning_rate": 0.000340852398607148, "loss": 0.1461, "step": 276460 }, { "epoch": 11.45, "grad_norm": 0.90234375, "learning_rate": 0.00034084229481858084, "loss": 0.1814, "step": 276470 }, { "epoch": 11.45, "grad_norm": 0.6171875, "learning_rate": 0.0003408321908590585, "loss": 0.221, "step": 276480 }, { "epoch": 11.45, "grad_norm": 0.953125, "learning_rate": 0.00034082208672859974, "loss": 0.2099, "step": 276490 }, { "epoch": 11.45, "grad_norm": 0.333984375, "learning_rate": 0.0003408119824272235, "loss": 0.1884, "step": 276500 }, { "epoch": 11.45, "grad_norm": 1.1953125, "learning_rate": 0.000340801877954949, "loss": 0.2009, "step": 276510 }, { "epoch": 11.45, "grad_norm": 0.341796875, "learning_rate": 0.0003407917733117952, "loss": 0.1868, "step": 276520 }, { "epoch": 11.45, "grad_norm": 0.58984375, "learning_rate": 0.000340781668497781, "loss": 0.1414, "step": 276530 }, { "epoch": 11.45, "grad_norm": 0.474609375, "learning_rate": 0.0003407715635129255, "loss": 0.1635, "step": 276540 }, { "epoch": 11.45, "grad_norm": 0.1533203125, "learning_rate": 0.0003407614583572478, "loss": 0.191, "step": 276550 }, { "epoch": 11.46, "grad_norm": 1.0703125, "learning_rate": 0.0003407513530307668, "loss": 0.1907, "step": 276560 }, { "epoch": 11.46, "grad_norm": 0.86328125, "learning_rate": 0.00034074124753350155, "loss": 0.1821, "step": 276570 }, { "epoch": 11.46, "grad_norm": 1.484375, "learning_rate": 0.0003407311418654711, "loss": 0.165, "step": 276580 }, { "epoch": 11.46, "grad_norm": 1.125, "learning_rate": 0.0003407210360266945, "loss": 0.2136, "step": 276590 }, { "epoch": 11.46, "grad_norm": 0.88671875, "learning_rate": 0.0003407109300171906, "loss": 0.2436, "step": 276600 }, { "epoch": 11.46, "grad_norm": 0.51953125, "learning_rate": 0.00034070082383697855, "loss": 0.1594, "step": 276610 }, { "epoch": 11.46, "grad_norm": 0.9296875, "learning_rate": 0.00034069071748607727, "loss": 0.1766, "step": 276620 }, { "epoch": 11.46, "grad_norm": 1.828125, "learning_rate": 0.000340680610964506, "loss": 0.1747, "step": 276630 }, { "epoch": 11.46, "grad_norm": 0.98828125, "learning_rate": 0.00034067050427228355, "loss": 0.1737, "step": 276640 }, { "epoch": 11.46, "grad_norm": 0.294921875, "learning_rate": 0.00034066039740942894, "loss": 0.2229, "step": 276650 }, { "epoch": 11.46, "grad_norm": 1.1328125, "learning_rate": 0.0003406502903759613, "loss": 0.182, "step": 276660 }, { "epoch": 11.46, "grad_norm": 1.5234375, "learning_rate": 0.00034064018317189965, "loss": 0.165, "step": 276670 }, { "epoch": 11.46, "grad_norm": 1.4453125, "learning_rate": 0.0003406300757972629, "loss": 0.1686, "step": 276680 }, { "epoch": 11.46, "grad_norm": 1.453125, "learning_rate": 0.0003406199682520702, "loss": 0.1927, "step": 276690 }, { "epoch": 11.46, "grad_norm": 1.671875, "learning_rate": 0.0003406098605363404, "loss": 0.2593, "step": 276700 }, { "epoch": 11.46, "grad_norm": 0.3203125, "learning_rate": 0.00034059975265009266, "loss": 0.2471, "step": 276710 }, { "epoch": 11.46, "grad_norm": 0.431640625, "learning_rate": 0.00034058964459334607, "loss": 0.2031, "step": 276720 }, { "epoch": 11.46, "grad_norm": 1.703125, "learning_rate": 0.00034057953636611947, "loss": 0.2118, "step": 276730 }, { "epoch": 11.46, "grad_norm": 0.9140625, "learning_rate": 0.000340569427968432, "loss": 0.2358, "step": 276740 }, { "epoch": 11.46, "grad_norm": 0.5390625, "learning_rate": 0.0003405593194003026, "loss": 0.1877, "step": 276750 }, { "epoch": 11.46, "grad_norm": 0.78515625, "learning_rate": 0.00034054921066175035, "loss": 0.1828, "step": 276760 }, { "epoch": 11.46, "grad_norm": 0.0, "learning_rate": 0.00034053910175279434, "loss": 0.1804, "step": 276770 }, { "epoch": 11.46, "grad_norm": 0.66796875, "learning_rate": 0.00034052899267345346, "loss": 0.1661, "step": 276780 }, { "epoch": 11.46, "grad_norm": 0.88671875, "learning_rate": 0.00034051888342374675, "loss": 0.1504, "step": 276790 }, { "epoch": 11.47, "grad_norm": 0.8828125, "learning_rate": 0.00034050877400369333, "loss": 0.1625, "step": 276800 }, { "epoch": 11.47, "grad_norm": 0.80859375, "learning_rate": 0.0003404986644133122, "loss": 0.1705, "step": 276810 }, { "epoch": 11.47, "grad_norm": 1.53125, "learning_rate": 0.0003404885546526223, "loss": 0.181, "step": 276820 }, { "epoch": 11.47, "grad_norm": 0.98046875, "learning_rate": 0.00034047844472164267, "loss": 0.1761, "step": 276830 }, { "epoch": 11.47, "grad_norm": 0.72265625, "learning_rate": 0.0003404683346203925, "loss": 0.2149, "step": 276840 }, { "epoch": 11.47, "grad_norm": 0.62109375, "learning_rate": 0.0003404582243488906, "loss": 0.138, "step": 276850 }, { "epoch": 11.47, "grad_norm": 1.2109375, "learning_rate": 0.00034044811390715615, "loss": 0.2273, "step": 276860 }, { "epoch": 11.47, "grad_norm": 0.53515625, "learning_rate": 0.000340438003295208, "loss": 0.1998, "step": 276870 }, { "epoch": 11.47, "grad_norm": 0.82421875, "learning_rate": 0.0003404278925130654, "loss": 0.1892, "step": 276880 }, { "epoch": 11.47, "grad_norm": 0.68359375, "learning_rate": 0.0003404177815607473, "loss": 0.1782, "step": 276890 }, { "epoch": 11.47, "grad_norm": 1.328125, "learning_rate": 0.00034040767043827264, "loss": 0.1698, "step": 276900 }, { "epoch": 11.47, "grad_norm": 1.375, "learning_rate": 0.00034039755914566053, "loss": 0.2485, "step": 276910 }, { "epoch": 11.47, "grad_norm": 1.203125, "learning_rate": 0.00034038744768292994, "loss": 0.1826, "step": 276920 }, { "epoch": 11.47, "grad_norm": 1.296875, "learning_rate": 0.00034037733605009996, "loss": 0.2201, "step": 276930 }, { "epoch": 11.47, "grad_norm": 1.65625, "learning_rate": 0.00034036722424718956, "loss": 0.2373, "step": 276940 }, { "epoch": 11.47, "grad_norm": 1.0625, "learning_rate": 0.0003403571122742178, "loss": 0.1912, "step": 276950 }, { "epoch": 11.47, "grad_norm": 0.62109375, "learning_rate": 0.00034034700013120376, "loss": 0.2142, "step": 276960 }, { "epoch": 11.47, "grad_norm": 0.2392578125, "learning_rate": 0.0003403368878181665, "loss": 0.1394, "step": 276970 }, { "epoch": 11.47, "grad_norm": 0.6953125, "learning_rate": 0.00034032677533512483, "loss": 0.1948, "step": 276980 }, { "epoch": 11.47, "grad_norm": 0.39453125, "learning_rate": 0.000340316662682098, "loss": 0.1928, "step": 276990 }, { "epoch": 11.47, "grad_norm": 1.203125, "learning_rate": 0.000340306549859105, "loss": 0.2003, "step": 277000 }, { "epoch": 11.47, "grad_norm": 0.63671875, "learning_rate": 0.00034029643686616474, "loss": 0.1779, "step": 277010 }, { "epoch": 11.47, "grad_norm": 1.40625, "learning_rate": 0.0003402863237032963, "loss": 0.1866, "step": 277020 }, { "epoch": 11.47, "grad_norm": 1.2265625, "learning_rate": 0.00034027621037051884, "loss": 0.2054, "step": 277030 }, { "epoch": 11.47, "grad_norm": 1.3671875, "learning_rate": 0.00034026609686785124, "loss": 0.1839, "step": 277040 }, { "epoch": 11.48, "grad_norm": 0.61328125, "learning_rate": 0.0003402559831953127, "loss": 0.2066, "step": 277050 }, { "epoch": 11.48, "grad_norm": 0.89453125, "learning_rate": 0.000340245869352922, "loss": 0.227, "step": 277060 }, { "epoch": 11.48, "grad_norm": 0.63671875, "learning_rate": 0.0003402357553406984, "loss": 0.2591, "step": 277070 }, { "epoch": 11.48, "grad_norm": 0.341796875, "learning_rate": 0.0003402256411586609, "loss": 0.1759, "step": 277080 }, { "epoch": 11.48, "grad_norm": 0.6328125, "learning_rate": 0.00034021552680682835, "loss": 0.2254, "step": 277090 }, { "epoch": 11.48, "grad_norm": 0.671875, "learning_rate": 0.00034020541228522007, "loss": 0.1863, "step": 277100 }, { "epoch": 11.48, "grad_norm": 3.828125, "learning_rate": 0.00034019529759385484, "loss": 0.1525, "step": 277110 }, { "epoch": 11.48, "grad_norm": 0.57421875, "learning_rate": 0.00034018518273275185, "loss": 0.2438, "step": 277120 }, { "epoch": 11.48, "grad_norm": 1.328125, "learning_rate": 0.00034017506770193007, "loss": 0.2153, "step": 277130 }, { "epoch": 11.48, "grad_norm": 0.59375, "learning_rate": 0.00034016495250140853, "loss": 0.2518, "step": 277140 }, { "epoch": 11.48, "grad_norm": 0.376953125, "learning_rate": 0.0003401548371312063, "loss": 0.1981, "step": 277150 }, { "epoch": 11.48, "grad_norm": 0.6328125, "learning_rate": 0.00034014472159134247, "loss": 0.1878, "step": 277160 }, { "epoch": 11.48, "grad_norm": 0.44921875, "learning_rate": 0.00034013460588183593, "loss": 0.1907, "step": 277170 }, { "epoch": 11.48, "grad_norm": 1.640625, "learning_rate": 0.00034012449000270586, "loss": 0.1673, "step": 277180 }, { "epoch": 11.48, "grad_norm": 1.9375, "learning_rate": 0.00034011437395397117, "loss": 0.2408, "step": 277190 }, { "epoch": 11.48, "grad_norm": 0.64453125, "learning_rate": 0.00034010425773565095, "loss": 0.1911, "step": 277200 }, { "epoch": 11.48, "grad_norm": 0.6171875, "learning_rate": 0.00034009414134776426, "loss": 0.1496, "step": 277210 }, { "epoch": 11.48, "grad_norm": 1.2734375, "learning_rate": 0.0003400840247903302, "loss": 0.1947, "step": 277220 }, { "epoch": 11.48, "grad_norm": 0.78125, "learning_rate": 0.00034007390806336764, "loss": 0.2154, "step": 277230 }, { "epoch": 11.48, "grad_norm": 1.59375, "learning_rate": 0.00034006379116689576, "loss": 0.1844, "step": 277240 }, { "epoch": 11.48, "grad_norm": 0.2734375, "learning_rate": 0.00034005367410093354, "loss": 0.2128, "step": 277250 }, { "epoch": 11.48, "grad_norm": 0.423828125, "learning_rate": 0.00034004355686550005, "loss": 0.1709, "step": 277260 }, { "epoch": 11.48, "grad_norm": 0.7265625, "learning_rate": 0.0003400334394606143, "loss": 0.1972, "step": 277270 }, { "epoch": 11.48, "grad_norm": 2.15625, "learning_rate": 0.00034002332188629524, "loss": 0.1835, "step": 277280 }, { "epoch": 11.49, "grad_norm": 0.59375, "learning_rate": 0.0003400132041425622, "loss": 0.219, "step": 277290 }, { "epoch": 11.49, "grad_norm": 1.0234375, "learning_rate": 0.0003400030862294338, "loss": 0.1824, "step": 277300 }, { "epoch": 11.49, "grad_norm": 2.015625, "learning_rate": 0.00033999296814692955, "loss": 0.2228, "step": 277310 }, { "epoch": 11.49, "grad_norm": 0.421875, "learning_rate": 0.0003399828498950682, "loss": 0.1599, "step": 277320 }, { "epoch": 11.49, "grad_norm": 0.82421875, "learning_rate": 0.00033997273147386864, "loss": 0.1931, "step": 277330 }, { "epoch": 11.49, "grad_norm": 1.1328125, "learning_rate": 0.0003399626128833503, "loss": 0.1632, "step": 277340 }, { "epoch": 11.49, "grad_norm": 0.28125, "learning_rate": 0.000339952494123532, "loss": 0.2417, "step": 277350 }, { "epoch": 11.49, "grad_norm": 0.3203125, "learning_rate": 0.00033994237519443285, "loss": 0.1672, "step": 277360 }, { "epoch": 11.49, "grad_norm": 1.2578125, "learning_rate": 0.00033993225609607185, "loss": 0.1881, "step": 277370 }, { "epoch": 11.49, "grad_norm": 0.73046875, "learning_rate": 0.000339922136828468, "loss": 0.217, "step": 277380 }, { "epoch": 11.49, "grad_norm": 0.359375, "learning_rate": 0.0003399120173916405, "loss": 0.216, "step": 277390 }, { "epoch": 11.49, "grad_norm": 0.5546875, "learning_rate": 0.00033990189778560816, "loss": 0.2215, "step": 277400 }, { "epoch": 11.49, "grad_norm": 0.65234375, "learning_rate": 0.0003398917780103902, "loss": 0.1924, "step": 277410 }, { "epoch": 11.49, "grad_norm": 0.462890625, "learning_rate": 0.0003398816580660057, "loss": 0.2179, "step": 277420 }, { "epoch": 11.49, "grad_norm": 0.5546875, "learning_rate": 0.0003398715379524736, "loss": 0.2413, "step": 277430 }, { "epoch": 11.49, "grad_norm": 1.078125, "learning_rate": 0.0003398614176698129, "loss": 0.1885, "step": 277440 }, { "epoch": 11.49, "grad_norm": 0.69921875, "learning_rate": 0.00033985129721804283, "loss": 0.2246, "step": 277450 }, { "epoch": 11.49, "grad_norm": 0.53515625, "learning_rate": 0.00033984117659718215, "loss": 0.1983, "step": 277460 }, { "epoch": 11.49, "grad_norm": 0.369140625, "learning_rate": 0.0003398310558072503, "loss": 0.1891, "step": 277470 }, { "epoch": 11.49, "grad_norm": 1.1953125, "learning_rate": 0.0003398209348482659, "loss": 0.2539, "step": 277480 }, { "epoch": 11.49, "grad_norm": 1.65625, "learning_rate": 0.0003398108137202483, "loss": 0.2406, "step": 277490 }, { "epoch": 11.49, "grad_norm": 1.0078125, "learning_rate": 0.00033980069242321644, "loss": 0.1973, "step": 277500 }, { "epoch": 11.49, "grad_norm": 0.76953125, "learning_rate": 0.0003397905709571894, "loss": 0.1475, "step": 277510 }, { "epoch": 11.49, "grad_norm": 0.640625, "learning_rate": 0.0003397804493221861, "loss": 0.183, "step": 277520 }, { "epoch": 11.5, "grad_norm": 0.365234375, "learning_rate": 0.00033977032751822585, "loss": 0.1471, "step": 277530 }, { "epoch": 11.5, "grad_norm": 0.765625, "learning_rate": 0.0003397602055453274, "loss": 0.1828, "step": 277540 }, { "epoch": 11.5, "grad_norm": 0.55078125, "learning_rate": 0.00033975008340351, "loss": 0.1739, "step": 277550 }, { "epoch": 11.5, "grad_norm": 0.462890625, "learning_rate": 0.00033973996109279257, "loss": 0.1467, "step": 277560 }, { "epoch": 11.5, "grad_norm": 0.92578125, "learning_rate": 0.0003397298386131943, "loss": 0.1853, "step": 277570 }, { "epoch": 11.5, "grad_norm": 1.6171875, "learning_rate": 0.00033971971596473416, "loss": 0.1983, "step": 277580 }, { "epoch": 11.5, "grad_norm": 0.65625, "learning_rate": 0.0003397095931474312, "loss": 0.2436, "step": 277590 }, { "epoch": 11.5, "grad_norm": 0.90625, "learning_rate": 0.0003396994701613044, "loss": 0.1353, "step": 277600 }, { "epoch": 11.5, "grad_norm": 0.359375, "learning_rate": 0.00033968934700637297, "loss": 0.2059, "step": 277610 }, { "epoch": 11.5, "grad_norm": 0.78125, "learning_rate": 0.0003396792236826558, "loss": 0.1742, "step": 277620 }, { "epoch": 11.5, "grad_norm": 0.71484375, "learning_rate": 0.00033966910019017204, "loss": 0.2026, "step": 277630 }, { "epoch": 11.5, "grad_norm": 0.98828125, "learning_rate": 0.0003396589765289407, "loss": 0.226, "step": 277640 }, { "epoch": 11.5, "grad_norm": 0.349609375, "learning_rate": 0.00033964885269898093, "loss": 0.1968, "step": 277650 }, { "epoch": 11.5, "grad_norm": 0.57421875, "learning_rate": 0.0003396387287003117, "loss": 0.2057, "step": 277660 }, { "epoch": 11.5, "grad_norm": 0.3984375, "learning_rate": 0.000339628604532952, "loss": 0.2136, "step": 277670 }, { "epoch": 11.5, "grad_norm": 0.62109375, "learning_rate": 0.00033961848019692094, "loss": 0.1785, "step": 277680 }, { "epoch": 11.5, "grad_norm": 0.40234375, "learning_rate": 0.00033960835569223763, "loss": 0.1775, "step": 277690 }, { "epoch": 11.5, "grad_norm": 0.86328125, "learning_rate": 0.000339598231018921, "loss": 0.2024, "step": 277700 }, { "epoch": 11.5, "grad_norm": 0.78515625, "learning_rate": 0.00033958810617699026, "loss": 0.176, "step": 277710 }, { "epoch": 11.5, "grad_norm": 0.859375, "learning_rate": 0.0003395779811664643, "loss": 0.1574, "step": 277720 }, { "epoch": 11.5, "grad_norm": 1.2734375, "learning_rate": 0.0003395678559873623, "loss": 0.2065, "step": 277730 }, { "epoch": 11.5, "grad_norm": 0.640625, "learning_rate": 0.0003395577306397033, "loss": 0.1861, "step": 277740 }, { "epoch": 11.5, "grad_norm": 0.921875, "learning_rate": 0.00033954760512350626, "loss": 0.2253, "step": 277750 }, { "epoch": 11.5, "grad_norm": 0.52734375, "learning_rate": 0.0003395374794387903, "loss": 0.1577, "step": 277760 }, { "epoch": 11.51, "grad_norm": 0.671875, "learning_rate": 0.0003395273535855745, "loss": 0.1767, "step": 277770 }, { "epoch": 11.51, "grad_norm": 0.703125, "learning_rate": 0.0003395172275638778, "loss": 0.2127, "step": 277780 }, { "epoch": 11.51, "grad_norm": 0.65625, "learning_rate": 0.0003395071013737195, "loss": 0.1757, "step": 277790 }, { "epoch": 11.51, "grad_norm": 0.59375, "learning_rate": 0.0003394969750151184, "loss": 0.1732, "step": 277800 }, { "epoch": 11.51, "grad_norm": 0.84375, "learning_rate": 0.00033948684848809365, "loss": 0.177, "step": 277810 }, { "epoch": 11.51, "grad_norm": 0.2890625, "learning_rate": 0.00033947672179266444, "loss": 0.1967, "step": 277820 }, { "epoch": 11.51, "grad_norm": 0.9921875, "learning_rate": 0.0003394665949288496, "loss": 0.2105, "step": 277830 }, { "epoch": 11.51, "grad_norm": 0.984375, "learning_rate": 0.0003394564678966682, "loss": 0.1732, "step": 277840 }, { "epoch": 11.51, "grad_norm": 2.09375, "learning_rate": 0.0003394463406961395, "loss": 0.2044, "step": 277850 }, { "epoch": 11.51, "grad_norm": 0.9140625, "learning_rate": 0.0003394362133272824, "loss": 0.2032, "step": 277860 }, { "epoch": 11.51, "grad_norm": 1.6953125, "learning_rate": 0.00033942608579011615, "loss": 0.2153, "step": 277870 }, { "epoch": 11.51, "grad_norm": 0.70703125, "learning_rate": 0.00033941595808465954, "loss": 0.2001, "step": 277880 }, { "epoch": 11.51, "grad_norm": 0.83984375, "learning_rate": 0.0003394058302109317, "loss": 0.2226, "step": 277890 }, { "epoch": 11.51, "grad_norm": 0.01123046875, "learning_rate": 0.0003393957021689519, "loss": 0.1863, "step": 277900 }, { "epoch": 11.51, "grad_norm": 2.578125, "learning_rate": 0.0003393855739587389, "loss": 0.2237, "step": 277910 }, { "epoch": 11.51, "grad_norm": 0.7109375, "learning_rate": 0.0003393754455803119, "loss": 0.145, "step": 277920 }, { "epoch": 11.51, "grad_norm": 0.7734375, "learning_rate": 0.00033936531703369, "loss": 0.1824, "step": 277930 }, { "epoch": 11.51, "grad_norm": 0.54296875, "learning_rate": 0.0003393551883188922, "loss": 0.1752, "step": 277940 }, { "epoch": 11.51, "grad_norm": 1.1015625, "learning_rate": 0.00033934505943593774, "loss": 0.2468, "step": 277950 }, { "epoch": 11.51, "grad_norm": 0.9140625, "learning_rate": 0.00033933493038484534, "loss": 0.1941, "step": 277960 }, { "epoch": 11.51, "grad_norm": 0.6171875, "learning_rate": 0.0003393248011656344, "loss": 0.2074, "step": 277970 }, { "epoch": 11.51, "grad_norm": 0.056884765625, "learning_rate": 0.00033931467177832373, "loss": 0.1805, "step": 277980 }, { "epoch": 11.51, "grad_norm": 0.63671875, "learning_rate": 0.0003393045422229324, "loss": 0.1744, "step": 277990 }, { "epoch": 11.51, "grad_norm": 1.4765625, "learning_rate": 0.0003392944124994798, "loss": 0.1537, "step": 278000 }, { "epoch": 11.52, "grad_norm": 0.8046875, "learning_rate": 0.0003392842826079846, "loss": 0.1867, "step": 278010 }, { "epoch": 11.52, "grad_norm": 0.7421875, "learning_rate": 0.0003392741525484661, "loss": 0.1591, "step": 278020 }, { "epoch": 11.52, "grad_norm": 1.15625, "learning_rate": 0.0003392640223209432, "loss": 0.1998, "step": 278030 }, { "epoch": 11.52, "grad_norm": 0.97265625, "learning_rate": 0.0003392538919254351, "loss": 0.1757, "step": 278040 }, { "epoch": 11.52, "grad_norm": 0.61328125, "learning_rate": 0.00033924376136196076, "loss": 0.2076, "step": 278050 }, { "epoch": 11.52, "grad_norm": 0.65625, "learning_rate": 0.00033923363063053936, "loss": 0.1983, "step": 278060 }, { "epoch": 11.52, "grad_norm": 1.1953125, "learning_rate": 0.0003392234997311899, "loss": 0.1781, "step": 278070 }, { "epoch": 11.52, "grad_norm": 0.419921875, "learning_rate": 0.0003392133686639314, "loss": 0.1892, "step": 278080 }, { "epoch": 11.52, "grad_norm": 0.95703125, "learning_rate": 0.00033920323742878307, "loss": 0.1922, "step": 278090 }, { "epoch": 11.52, "grad_norm": 0.99609375, "learning_rate": 0.0003391931060257638, "loss": 0.2082, "step": 278100 }, { "epoch": 11.52, "grad_norm": 1.1484375, "learning_rate": 0.00033918297445489275, "loss": 0.2019, "step": 278110 }, { "epoch": 11.52, "grad_norm": 0.9453125, "learning_rate": 0.000339172842716189, "loss": 0.1829, "step": 278120 }, { "epoch": 11.52, "grad_norm": 0.765625, "learning_rate": 0.00033916271080967154, "loss": 0.2081, "step": 278130 }, { "epoch": 11.52, "grad_norm": 0.490234375, "learning_rate": 0.00033915257873535956, "loss": 0.151, "step": 278140 }, { "epoch": 11.52, "grad_norm": 0.46875, "learning_rate": 0.00033914244649327195, "loss": 0.2334, "step": 278150 }, { "epoch": 11.52, "grad_norm": 0.5234375, "learning_rate": 0.000339132314083428, "loss": 0.1662, "step": 278160 }, { "epoch": 11.52, "grad_norm": 0.2578125, "learning_rate": 0.0003391221815058466, "loss": 0.1759, "step": 278170 }, { "epoch": 11.52, "grad_norm": 0.88671875, "learning_rate": 0.0003391120487605469, "loss": 0.1579, "step": 278180 }, { "epoch": 11.52, "grad_norm": 1.15625, "learning_rate": 0.00033910191584754794, "loss": 0.1931, "step": 278190 }, { "epoch": 11.52, "grad_norm": 0.408203125, "learning_rate": 0.0003390917827668688, "loss": 0.244, "step": 278200 }, { "epoch": 11.52, "grad_norm": 0.91796875, "learning_rate": 0.0003390816495185285, "loss": 0.1809, "step": 278210 }, { "epoch": 11.52, "grad_norm": 1.9296875, "learning_rate": 0.00033907151610254615, "loss": 0.2077, "step": 278220 }, { "epoch": 11.52, "grad_norm": 1.265625, "learning_rate": 0.00033906138251894093, "loss": 0.1859, "step": 278230 }, { "epoch": 11.52, "grad_norm": 0.50390625, "learning_rate": 0.0003390512487677317, "loss": 0.1994, "step": 278240 }, { "epoch": 11.53, "grad_norm": 0.515625, "learning_rate": 0.0003390411148489377, "loss": 0.2053, "step": 278250 }, { "epoch": 11.53, "grad_norm": 0.74609375, "learning_rate": 0.00033903098076257785, "loss": 0.2322, "step": 278260 }, { "epoch": 11.53, "grad_norm": 0.56640625, "learning_rate": 0.0003390208465086714, "loss": 0.1812, "step": 278270 }, { "epoch": 11.53, "grad_norm": 0.90625, "learning_rate": 0.0003390107120872373, "loss": 0.2104, "step": 278280 }, { "epoch": 11.53, "grad_norm": 0.8359375, "learning_rate": 0.00033900057749829474, "loss": 0.2159, "step": 278290 }, { "epoch": 11.53, "grad_norm": 0.4453125, "learning_rate": 0.0003389904427418626, "loss": 0.1906, "step": 278300 }, { "epoch": 11.53, "grad_norm": 0.79296875, "learning_rate": 0.00033898030781796004, "loss": 0.1655, "step": 278310 }, { "epoch": 11.53, "grad_norm": 0.48046875, "learning_rate": 0.00033897017272660625, "loss": 0.1873, "step": 278320 }, { "epoch": 11.53, "grad_norm": 0.41015625, "learning_rate": 0.0003389600374678201, "loss": 0.2207, "step": 278330 }, { "epoch": 11.53, "grad_norm": 0.609375, "learning_rate": 0.00033894990204162077, "loss": 0.204, "step": 278340 }, { "epoch": 11.53, "grad_norm": 0.71484375, "learning_rate": 0.0003389397664480274, "loss": 0.2214, "step": 278350 }, { "epoch": 11.53, "grad_norm": 0.68359375, "learning_rate": 0.00033892963068705897, "loss": 0.1694, "step": 278360 }, { "epoch": 11.53, "grad_norm": 0.7890625, "learning_rate": 0.00033891949475873455, "loss": 0.1814, "step": 278370 }, { "epoch": 11.53, "grad_norm": 0.62890625, "learning_rate": 0.0003389093586630733, "loss": 0.2552, "step": 278380 }, { "epoch": 11.53, "grad_norm": 0.80078125, "learning_rate": 0.0003388992224000942, "loss": 0.1742, "step": 278390 }, { "epoch": 11.53, "grad_norm": 0.6171875, "learning_rate": 0.0003388890859698163, "loss": 0.1749, "step": 278400 }, { "epoch": 11.53, "grad_norm": 1.34375, "learning_rate": 0.00033887894937225884, "loss": 0.1934, "step": 278410 }, { "epoch": 11.53, "grad_norm": 0.68359375, "learning_rate": 0.00033886881260744074, "loss": 0.2389, "step": 278420 }, { "epoch": 11.53, "grad_norm": 1.2890625, "learning_rate": 0.00033885867567538114, "loss": 0.1869, "step": 278430 }, { "epoch": 11.53, "grad_norm": 0.341796875, "learning_rate": 0.0003388485385760992, "loss": 0.2159, "step": 278440 }, { "epoch": 11.53, "grad_norm": 0.97265625, "learning_rate": 0.0003388384013096138, "loss": 0.2038, "step": 278450 }, { "epoch": 11.53, "grad_norm": 0.60546875, "learning_rate": 0.0003388282638759441, "loss": 0.1894, "step": 278460 }, { "epoch": 11.53, "grad_norm": 0.61328125, "learning_rate": 0.00033881812627510925, "loss": 0.1976, "step": 278470 }, { "epoch": 11.53, "grad_norm": 0.490234375, "learning_rate": 0.0003388079885071283, "loss": 0.2147, "step": 278480 }, { "epoch": 11.54, "grad_norm": 0.58203125, "learning_rate": 0.00033879785057202026, "loss": 0.1714, "step": 278490 }, { "epoch": 11.54, "grad_norm": 0.63671875, "learning_rate": 0.00033878771246980435, "loss": 0.2279, "step": 278500 }, { "epoch": 11.54, "grad_norm": 0.84375, "learning_rate": 0.00033877757420049944, "loss": 0.1907, "step": 278510 }, { "epoch": 11.54, "grad_norm": 0.7109375, "learning_rate": 0.0003387674357641248, "loss": 0.193, "step": 278520 }, { "epoch": 11.54, "grad_norm": 0.5625, "learning_rate": 0.00033875729716069934, "loss": 0.1931, "step": 278530 }, { "epoch": 11.54, "grad_norm": 0.435546875, "learning_rate": 0.0003387471583902423, "loss": 0.1966, "step": 278540 }, { "epoch": 11.54, "grad_norm": 0.69140625, "learning_rate": 0.0003387370194527727, "loss": 0.2299, "step": 278550 }, { "epoch": 11.54, "grad_norm": 1.25, "learning_rate": 0.0003387268803483096, "loss": 0.1917, "step": 278560 }, { "epoch": 11.54, "grad_norm": 1.390625, "learning_rate": 0.00033871674107687204, "loss": 0.1506, "step": 278570 }, { "epoch": 11.54, "grad_norm": 1.4296875, "learning_rate": 0.0003387066016384792, "loss": 0.1858, "step": 278580 }, { "epoch": 11.54, "grad_norm": 1.1953125, "learning_rate": 0.00033869646203315006, "loss": 0.2227, "step": 278590 }, { "epoch": 11.54, "grad_norm": 0.25390625, "learning_rate": 0.0003386863222609039, "loss": 0.2008, "step": 278600 }, { "epoch": 11.54, "grad_norm": 0.796875, "learning_rate": 0.0003386761823217595, "loss": 0.2327, "step": 278610 }, { "epoch": 11.54, "grad_norm": 0.6875, "learning_rate": 0.0003386660422157362, "loss": 0.1677, "step": 278620 }, { "epoch": 11.54, "grad_norm": 0.00531005859375, "learning_rate": 0.00033865590194285297, "loss": 0.1653, "step": 278630 }, { "epoch": 11.54, "grad_norm": 0.72265625, "learning_rate": 0.0003386457615031289, "loss": 0.2058, "step": 278640 }, { "epoch": 11.54, "grad_norm": 0.3515625, "learning_rate": 0.00033863562089658306, "loss": 0.126, "step": 278650 }, { "epoch": 11.54, "grad_norm": 0.76953125, "learning_rate": 0.0003386254801232346, "loss": 0.1626, "step": 278660 }, { "epoch": 11.54, "grad_norm": 0.240234375, "learning_rate": 0.00033861533918310246, "loss": 0.2332, "step": 278670 }, { "epoch": 11.54, "grad_norm": 0.78125, "learning_rate": 0.00033860519807620595, "loss": 0.1595, "step": 278680 }, { "epoch": 11.54, "grad_norm": 0.43359375, "learning_rate": 0.00033859505680256387, "loss": 0.1667, "step": 278690 }, { "epoch": 11.54, "grad_norm": 0.828125, "learning_rate": 0.0003385849153621956, "loss": 0.1779, "step": 278700 }, { "epoch": 11.54, "grad_norm": 1.1875, "learning_rate": 0.00033857477375512, "loss": 0.2207, "step": 278710 }, { "epoch": 11.54, "grad_norm": 0.85546875, "learning_rate": 0.0003385646319813562, "loss": 0.179, "step": 278720 }, { "epoch": 11.54, "grad_norm": 0.76953125, "learning_rate": 0.0003385544900409235, "loss": 0.1994, "step": 278730 }, { "epoch": 11.55, "grad_norm": 0.6640625, "learning_rate": 0.0003385443479338407, "loss": 0.1577, "step": 278740 }, { "epoch": 11.55, "grad_norm": 1.0625, "learning_rate": 0.000338534205660127, "loss": 0.1885, "step": 278750 }, { "epoch": 11.55, "grad_norm": 0.53515625, "learning_rate": 0.00033852406321980156, "loss": 0.1597, "step": 278760 }, { "epoch": 11.55, "grad_norm": 2.109375, "learning_rate": 0.0003385139206128832, "loss": 0.1381, "step": 278770 }, { "epoch": 11.55, "grad_norm": 0.37109375, "learning_rate": 0.0003385037778393914, "loss": 0.195, "step": 278780 }, { "epoch": 11.55, "grad_norm": 0.6484375, "learning_rate": 0.000338493634899345, "loss": 0.2072, "step": 278790 }, { "epoch": 11.55, "grad_norm": 1.1796875, "learning_rate": 0.0003384834917927631, "loss": 0.205, "step": 278800 }, { "epoch": 11.55, "grad_norm": 1.109375, "learning_rate": 0.00033847334851966484, "loss": 0.1957, "step": 278810 }, { "epoch": 11.55, "grad_norm": 0.78515625, "learning_rate": 0.00033846320508006925, "loss": 0.1387, "step": 278820 }, { "epoch": 11.55, "grad_norm": 1.53125, "learning_rate": 0.0003384530614739955, "loss": 0.2167, "step": 278830 }, { "epoch": 11.55, "grad_norm": 0.76171875, "learning_rate": 0.00033844291770146267, "loss": 0.1681, "step": 278840 }, { "epoch": 11.55, "grad_norm": 0.87890625, "learning_rate": 0.0003384327737624898, "loss": 0.1956, "step": 278850 }, { "epoch": 11.55, "grad_norm": 0.287109375, "learning_rate": 0.00033842262965709595, "loss": 0.173, "step": 278860 }, { "epoch": 11.55, "grad_norm": 1.0, "learning_rate": 0.0003384124853853003, "loss": 0.2005, "step": 278870 }, { "epoch": 11.55, "grad_norm": 1.6875, "learning_rate": 0.0003384023409471219, "loss": 0.2159, "step": 278880 }, { "epoch": 11.55, "grad_norm": 0.35546875, "learning_rate": 0.0003383921963425799, "loss": 0.1832, "step": 278890 }, { "epoch": 11.55, "grad_norm": 0.41796875, "learning_rate": 0.00033838205157169323, "loss": 0.2004, "step": 278900 }, { "epoch": 11.55, "grad_norm": 0.5546875, "learning_rate": 0.00033837190663448116, "loss": 0.2468, "step": 278910 }, { "epoch": 11.55, "grad_norm": 0.9140625, "learning_rate": 0.0003383617615309627, "loss": 0.2605, "step": 278920 }, { "epoch": 11.55, "grad_norm": 0.6484375, "learning_rate": 0.0003383516162611568, "loss": 0.2087, "step": 278930 }, { "epoch": 11.55, "grad_norm": 0.56640625, "learning_rate": 0.00033834147082508295, "loss": 0.227, "step": 278940 }, { "epoch": 11.55, "grad_norm": 0.2314453125, "learning_rate": 0.0003383313252227598, "loss": 0.1463, "step": 278950 }, { "epoch": 11.55, "grad_norm": 0.458984375, "learning_rate": 0.0003383211794542067, "loss": 0.1401, "step": 278960 }, { "epoch": 11.55, "grad_norm": 1.4296875, "learning_rate": 0.0003383110335194427, "loss": 0.2366, "step": 278970 }, { "epoch": 11.56, "grad_norm": 0.82421875, "learning_rate": 0.0003383008874184868, "loss": 0.2372, "step": 278980 }, { "epoch": 11.56, "grad_norm": 0.87890625, "learning_rate": 0.0003382907411513582, "loss": 0.1629, "step": 278990 }, { "epoch": 11.56, "grad_norm": 0.5859375, "learning_rate": 0.00033828059471807605, "loss": 0.2022, "step": 279000 }, { "epoch": 11.56, "grad_norm": 1.25, "learning_rate": 0.0003382704481186592, "loss": 0.1795, "step": 279010 }, { "epoch": 11.56, "grad_norm": 0.99609375, "learning_rate": 0.000338260301353127, "loss": 0.1869, "step": 279020 }, { "epoch": 11.56, "grad_norm": 0.1689453125, "learning_rate": 0.00033825015442149844, "loss": 0.2316, "step": 279030 }, { "epoch": 11.56, "grad_norm": 0.65625, "learning_rate": 0.0003382400073237926, "loss": 0.1635, "step": 279040 }, { "epoch": 11.56, "grad_norm": 0.482421875, "learning_rate": 0.00033822986006002857, "loss": 0.1997, "step": 279050 }, { "epoch": 11.56, "grad_norm": 1.15625, "learning_rate": 0.00033821971263022546, "loss": 0.2315, "step": 279060 }, { "epoch": 11.56, "grad_norm": 0.62109375, "learning_rate": 0.00033820956503440247, "loss": 0.175, "step": 279070 }, { "epoch": 11.56, "grad_norm": 0.451171875, "learning_rate": 0.0003381994172725786, "loss": 0.1933, "step": 279080 }, { "epoch": 11.56, "grad_norm": 0.58984375, "learning_rate": 0.0003381892693447728, "loss": 0.1817, "step": 279090 }, { "epoch": 11.56, "grad_norm": 1.9921875, "learning_rate": 0.0003381791212510045, "loss": 0.1572, "step": 279100 }, { "epoch": 11.56, "grad_norm": 1.0546875, "learning_rate": 0.0003381689729912925, "loss": 0.233, "step": 279110 }, { "epoch": 11.56, "grad_norm": 1.5546875, "learning_rate": 0.000338158824565656, "loss": 0.2316, "step": 279120 }, { "epoch": 11.56, "grad_norm": 0.6875, "learning_rate": 0.0003381486759741142, "loss": 0.161, "step": 279130 }, { "epoch": 11.56, "grad_norm": 0.66015625, "learning_rate": 0.00033813852721668603, "loss": 0.2333, "step": 279140 }, { "epoch": 11.56, "grad_norm": 0.7421875, "learning_rate": 0.00033812837829339074, "loss": 0.1982, "step": 279150 }, { "epoch": 11.56, "grad_norm": 0.4609375, "learning_rate": 0.0003381182292042473, "loss": 0.2228, "step": 279160 }, { "epoch": 11.56, "grad_norm": 0.609375, "learning_rate": 0.0003381080799492749, "loss": 0.1782, "step": 279170 }, { "epoch": 11.56, "grad_norm": 0.4140625, "learning_rate": 0.0003380979305284926, "loss": 0.1659, "step": 279180 }, { "epoch": 11.56, "grad_norm": 0.76171875, "learning_rate": 0.0003380877809419196, "loss": 0.194, "step": 279190 }, { "epoch": 11.56, "grad_norm": 0.7265625, "learning_rate": 0.0003380776311895748, "loss": 0.181, "step": 279200 }, { "epoch": 11.56, "grad_norm": 1.5234375, "learning_rate": 0.0003380674812714775, "loss": 0.2051, "step": 279210 }, { "epoch": 11.57, "grad_norm": 0.63671875, "learning_rate": 0.00033805733118764654, "loss": 0.2309, "step": 279220 }, { "epoch": 11.57, "grad_norm": 0.83203125, "learning_rate": 0.0003380471809381013, "loss": 0.1553, "step": 279230 }, { "epoch": 11.57, "grad_norm": 0.388671875, "learning_rate": 0.0003380370305228608, "loss": 0.2909, "step": 279240 }, { "epoch": 11.57, "grad_norm": 0.82421875, "learning_rate": 0.000338026879941944, "loss": 0.2, "step": 279250 }, { "epoch": 11.57, "grad_norm": 1.1015625, "learning_rate": 0.0003380167291953703, "loss": 0.137, "step": 279260 }, { "epoch": 11.57, "grad_norm": 0.7109375, "learning_rate": 0.0003380065782831585, "loss": 0.1714, "step": 279270 }, { "epoch": 11.57, "grad_norm": 0.66015625, "learning_rate": 0.0003379964272053279, "loss": 0.1838, "step": 279280 }, { "epoch": 11.57, "grad_norm": 0.427734375, "learning_rate": 0.00033798627596189746, "loss": 0.182, "step": 279290 }, { "epoch": 11.57, "grad_norm": 0.8203125, "learning_rate": 0.00033797612455288627, "loss": 0.1747, "step": 279300 }, { "epoch": 11.57, "grad_norm": 0.83984375, "learning_rate": 0.00033796597297831366, "loss": 0.1974, "step": 279310 }, { "epoch": 11.57, "grad_norm": 0.70703125, "learning_rate": 0.00033795582123819857, "loss": 0.2491, "step": 279320 }, { "epoch": 11.57, "grad_norm": 0.373046875, "learning_rate": 0.00033794566933256006, "loss": 0.2088, "step": 279330 }, { "epoch": 11.57, "grad_norm": 0.7109375, "learning_rate": 0.0003379355172614174, "loss": 0.2028, "step": 279340 }, { "epoch": 11.57, "grad_norm": 0.875, "learning_rate": 0.00033792536502478946, "loss": 0.1699, "step": 279350 }, { "epoch": 11.57, "grad_norm": 0.83984375, "learning_rate": 0.00033791521262269554, "loss": 0.1816, "step": 279360 }, { "epoch": 11.57, "grad_norm": 1.078125, "learning_rate": 0.00033790506005515464, "loss": 0.1796, "step": 279370 }, { "epoch": 11.57, "grad_norm": 0.291015625, "learning_rate": 0.00033789490732218596, "loss": 0.1381, "step": 279380 }, { "epoch": 11.57, "grad_norm": 0.0, "learning_rate": 0.00033788475442380863, "loss": 0.2084, "step": 279390 }, { "epoch": 11.57, "grad_norm": 0.65234375, "learning_rate": 0.00033787460136004157, "loss": 0.2067, "step": 279400 }, { "epoch": 11.57, "grad_norm": 0.466796875, "learning_rate": 0.000337864448130904, "loss": 0.1885, "step": 279410 }, { "epoch": 11.57, "grad_norm": 0.26171875, "learning_rate": 0.0003378542947364151, "loss": 0.1678, "step": 279420 }, { "epoch": 11.57, "grad_norm": 0.71484375, "learning_rate": 0.00033784414117659387, "loss": 0.1999, "step": 279430 }, { "epoch": 11.57, "grad_norm": 0.33203125, "learning_rate": 0.0003378339874514594, "loss": 0.2033, "step": 279440 }, { "epoch": 11.57, "grad_norm": 0.62890625, "learning_rate": 0.0003378238335610309, "loss": 0.1884, "step": 279450 }, { "epoch": 11.58, "grad_norm": 0.400390625, "learning_rate": 0.00033781367950532745, "loss": 0.1874, "step": 279460 }, { "epoch": 11.58, "grad_norm": 0.8046875, "learning_rate": 0.00033780352528436816, "loss": 0.2398, "step": 279470 }, { "epoch": 11.58, "grad_norm": 0.91796875, "learning_rate": 0.000337793370898172, "loss": 0.1879, "step": 279480 }, { "epoch": 11.58, "grad_norm": 1.65625, "learning_rate": 0.0003377832163467583, "loss": 0.1926, "step": 279490 }, { "epoch": 11.58, "grad_norm": 0.453125, "learning_rate": 0.00033777306163014606, "loss": 0.1825, "step": 279500 }, { "epoch": 11.58, "grad_norm": 0.85546875, "learning_rate": 0.0003377629067483544, "loss": 0.2423, "step": 279510 }, { "epoch": 11.58, "grad_norm": 0.64453125, "learning_rate": 0.0003377527517014023, "loss": 0.1759, "step": 279520 }, { "epoch": 11.58, "grad_norm": 0.7109375, "learning_rate": 0.00033774259648930915, "loss": 0.1491, "step": 279530 }, { "epoch": 11.58, "grad_norm": 0.236328125, "learning_rate": 0.00033773244111209383, "loss": 0.151, "step": 279540 }, { "epoch": 11.58, "grad_norm": 0.8515625, "learning_rate": 0.00033772228556977555, "loss": 0.1705, "step": 279550 }, { "epoch": 11.58, "grad_norm": 1.2421875, "learning_rate": 0.00033771212986237345, "loss": 0.1321, "step": 279560 }, { "epoch": 11.58, "grad_norm": 0.56640625, "learning_rate": 0.00033770197398990656, "loss": 0.1699, "step": 279570 }, { "epoch": 11.58, "grad_norm": 0.6328125, "learning_rate": 0.00033769181795239396, "loss": 0.1661, "step": 279580 }, { "epoch": 11.58, "grad_norm": 0.890625, "learning_rate": 0.00033768166174985494, "loss": 0.2059, "step": 279590 }, { "epoch": 11.58, "grad_norm": 3.8125, "learning_rate": 0.00033767150538230843, "loss": 0.1902, "step": 279600 }, { "epoch": 11.58, "grad_norm": 0.6953125, "learning_rate": 0.0003376613488497736, "loss": 0.2314, "step": 279610 }, { "epoch": 11.58, "grad_norm": 0.59765625, "learning_rate": 0.00033765119215226964, "loss": 0.1826, "step": 279620 }, { "epoch": 11.58, "grad_norm": 0.78125, "learning_rate": 0.00033764103528981563, "loss": 0.2182, "step": 279630 }, { "epoch": 11.58, "grad_norm": 0.8203125, "learning_rate": 0.00033763087826243056, "loss": 0.1797, "step": 279640 }, { "epoch": 11.58, "grad_norm": 1.078125, "learning_rate": 0.0003376207210701336, "loss": 0.1744, "step": 279650 }, { "epoch": 11.58, "grad_norm": 0.58984375, "learning_rate": 0.000337610563712944, "loss": 0.1884, "step": 279660 }, { "epoch": 11.58, "grad_norm": 1.3203125, "learning_rate": 0.00033760040619088086, "loss": 0.178, "step": 279670 }, { "epoch": 11.58, "grad_norm": 0.578125, "learning_rate": 0.0003375902485039631, "loss": 0.1759, "step": 279680 }, { "epoch": 11.58, "grad_norm": 0.54296875, "learning_rate": 0.00033758009065220994, "loss": 0.2152, "step": 279690 }, { "epoch": 11.59, "grad_norm": 0.5390625, "learning_rate": 0.0003375699326356405, "loss": 0.1783, "step": 279700 }, { "epoch": 11.59, "grad_norm": 0.67578125, "learning_rate": 0.000337559774454274, "loss": 0.2367, "step": 279710 }, { "epoch": 11.59, "grad_norm": 0.42578125, "learning_rate": 0.0003375496161081294, "loss": 0.162, "step": 279720 }, { "epoch": 11.59, "grad_norm": 0.82421875, "learning_rate": 0.0003375394575972259, "loss": 0.1748, "step": 279730 }, { "epoch": 11.59, "grad_norm": 0.9921875, "learning_rate": 0.0003375292989215825, "loss": 0.2114, "step": 279740 }, { "epoch": 11.59, "grad_norm": 0.93359375, "learning_rate": 0.00033751914008121853, "loss": 0.2216, "step": 279750 }, { "epoch": 11.59, "grad_norm": 1.1640625, "learning_rate": 0.00033750898107615297, "loss": 0.1739, "step": 279760 }, { "epoch": 11.59, "grad_norm": 1.7421875, "learning_rate": 0.0003374988219064049, "loss": 0.1987, "step": 279770 }, { "epoch": 11.59, "grad_norm": 0.65625, "learning_rate": 0.0003374886625719935, "loss": 0.1854, "step": 279780 }, { "epoch": 11.59, "grad_norm": 0.8515625, "learning_rate": 0.0003374785030729379, "loss": 0.2219, "step": 279790 }, { "epoch": 11.59, "grad_norm": 0.70703125, "learning_rate": 0.00033746834340925725, "loss": 0.1943, "step": 279800 }, { "epoch": 11.59, "grad_norm": 1.1328125, "learning_rate": 0.0003374581835809706, "loss": 0.1748, "step": 279810 }, { "epoch": 11.59, "grad_norm": 0.77734375, "learning_rate": 0.00033744802358809704, "loss": 0.2275, "step": 279820 }, { "epoch": 11.59, "grad_norm": 1.0078125, "learning_rate": 0.0003374378634306558, "loss": 0.2147, "step": 279830 }, { "epoch": 11.59, "grad_norm": 0.4296875, "learning_rate": 0.00033742770310866597, "loss": 0.1771, "step": 279840 }, { "epoch": 11.59, "grad_norm": 1.0078125, "learning_rate": 0.0003374175426221466, "loss": 0.1481, "step": 279850 }, { "epoch": 11.59, "grad_norm": 0.98828125, "learning_rate": 0.00033740738197111686, "loss": 0.1859, "step": 279860 }, { "epoch": 11.59, "grad_norm": 0.78125, "learning_rate": 0.00033739722115559587, "loss": 0.1872, "step": 279870 }, { "epoch": 11.59, "grad_norm": 0.89453125, "learning_rate": 0.0003373870601756027, "loss": 0.2032, "step": 279880 }, { "epoch": 11.59, "grad_norm": 0.47265625, "learning_rate": 0.0003373768990311566, "loss": 0.1963, "step": 279890 }, { "epoch": 11.59, "grad_norm": 0.6953125, "learning_rate": 0.00033736673772227657, "loss": 0.2068, "step": 279900 }, { "epoch": 11.59, "grad_norm": 1.6796875, "learning_rate": 0.0003373565762489818, "loss": 0.2207, "step": 279910 }, { "epoch": 11.59, "grad_norm": 0.75390625, "learning_rate": 0.00033734641461129135, "loss": 0.1692, "step": 279920 }, { "epoch": 11.59, "grad_norm": 0.388671875, "learning_rate": 0.00033733625280922444, "loss": 0.1882, "step": 279930 }, { "epoch": 11.6, "grad_norm": 0.58984375, "learning_rate": 0.00033732609084280007, "loss": 0.19, "step": 279940 }, { "epoch": 11.6, "grad_norm": 0.9765625, "learning_rate": 0.00033731592871203743, "loss": 0.1698, "step": 279950 }, { "epoch": 11.6, "grad_norm": 1.2734375, "learning_rate": 0.00033730576641695565, "loss": 0.1465, "step": 279960 }, { "epoch": 11.6, "grad_norm": 0.470703125, "learning_rate": 0.0003372956039575739, "loss": 0.2087, "step": 279970 }, { "epoch": 11.6, "grad_norm": 0.7890625, "learning_rate": 0.00033728544133391114, "loss": 0.1645, "step": 279980 }, { "epoch": 11.6, "grad_norm": 1.5234375, "learning_rate": 0.00033727527854598673, "loss": 0.2264, "step": 279990 }, { "epoch": 11.6, "grad_norm": 1.46875, "learning_rate": 0.00033726511559381957, "loss": 0.1963, "step": 280000 }, { "epoch": 11.6, "grad_norm": 0.85546875, "learning_rate": 0.000337254952477429, "loss": 0.1715, "step": 280010 }, { "epoch": 11.6, "grad_norm": 0.75390625, "learning_rate": 0.00033724478919683386, "loss": 0.2055, "step": 280020 }, { "epoch": 11.6, "grad_norm": 0.9453125, "learning_rate": 0.0003372346257520536, "loss": 0.2146, "step": 280030 }, { "epoch": 11.6, "grad_norm": 0.466796875, "learning_rate": 0.0003372244621431072, "loss": 0.2113, "step": 280040 }, { "epoch": 11.6, "grad_norm": 0.88671875, "learning_rate": 0.0003372142983700137, "loss": 0.2247, "step": 280050 }, { "epoch": 11.6, "grad_norm": 0.7734375, "learning_rate": 0.0003372041344327923, "loss": 0.2727, "step": 280060 }, { "epoch": 11.6, "grad_norm": 0.71484375, "learning_rate": 0.00033719397033146216, "loss": 0.216, "step": 280070 }, { "epoch": 11.6, "grad_norm": 1.2578125, "learning_rate": 0.0003371838060660424, "loss": 0.1768, "step": 280080 }, { "epoch": 11.6, "grad_norm": 0.59375, "learning_rate": 0.0003371736416365522, "loss": 0.2387, "step": 280090 }, { "epoch": 11.6, "grad_norm": 0.625, "learning_rate": 0.0003371634770430105, "loss": 0.2335, "step": 280100 }, { "epoch": 11.6, "grad_norm": 1.1953125, "learning_rate": 0.0003371533122854365, "loss": 0.2057, "step": 280110 }, { "epoch": 11.6, "grad_norm": 0.94140625, "learning_rate": 0.0003371431473638496, "loss": 0.201, "step": 280120 }, { "epoch": 11.6, "grad_norm": 1.3046875, "learning_rate": 0.0003371329822782685, "loss": 0.2448, "step": 280130 }, { "epoch": 11.6, "grad_norm": 1.0234375, "learning_rate": 0.00033712281702871267, "loss": 0.2007, "step": 280140 }, { "epoch": 11.6, "grad_norm": 0.9140625, "learning_rate": 0.00033711265161520107, "loss": 0.1599, "step": 280150 }, { "epoch": 11.6, "grad_norm": 0.8203125, "learning_rate": 0.0003371024860377528, "loss": 0.222, "step": 280160 }, { "epoch": 11.6, "grad_norm": 1.3984375, "learning_rate": 0.00033709232029638717, "loss": 0.1924, "step": 280170 }, { "epoch": 11.61, "grad_norm": 0.6171875, "learning_rate": 0.0003370821543911231, "loss": 0.1987, "step": 280180 }, { "epoch": 11.61, "grad_norm": 0.7890625, "learning_rate": 0.00033707198832197985, "loss": 0.2259, "step": 280190 }, { "epoch": 11.61, "grad_norm": 0.53125, "learning_rate": 0.0003370618220889766, "loss": 0.1898, "step": 280200 }, { "epoch": 11.61, "grad_norm": 0.55078125, "learning_rate": 0.00033705165569213226, "loss": 0.2182, "step": 280210 }, { "epoch": 11.61, "grad_norm": 0.703125, "learning_rate": 0.0003370414891314662, "loss": 0.1614, "step": 280220 }, { "epoch": 11.61, "grad_norm": 0.81640625, "learning_rate": 0.0003370313224069974, "loss": 0.2021, "step": 280230 }, { "epoch": 11.61, "grad_norm": 1.21875, "learning_rate": 0.00033702115551874514, "loss": 0.1774, "step": 280240 }, { "epoch": 11.61, "grad_norm": 0.6171875, "learning_rate": 0.0003370109884667284, "loss": 0.1723, "step": 280250 }, { "epoch": 11.61, "grad_norm": 1.125, "learning_rate": 0.0003370008212509664, "loss": 0.236, "step": 280260 }, { "epoch": 11.61, "grad_norm": 0.91015625, "learning_rate": 0.0003369906538714782, "loss": 0.2016, "step": 280270 }, { "epoch": 11.61, "grad_norm": 0.45703125, "learning_rate": 0.00033698048632828305, "loss": 0.1534, "step": 280280 }, { "epoch": 11.61, "grad_norm": 1.15625, "learning_rate": 0.00033697031862139996, "loss": 0.2028, "step": 280290 }, { "epoch": 11.61, "grad_norm": 0.5234375, "learning_rate": 0.00033696015075084814, "loss": 0.1935, "step": 280300 }, { "epoch": 11.61, "grad_norm": 0.6171875, "learning_rate": 0.0003369499827166467, "loss": 0.1829, "step": 280310 }, { "epoch": 11.61, "grad_norm": 0.94140625, "learning_rate": 0.0003369398145188148, "loss": 0.1915, "step": 280320 }, { "epoch": 11.61, "grad_norm": 0.78515625, "learning_rate": 0.00033692964615737156, "loss": 0.2058, "step": 280330 }, { "epoch": 11.61, "grad_norm": 0.9375, "learning_rate": 0.0003369194776323361, "loss": 0.2327, "step": 280340 }, { "epoch": 11.61, "grad_norm": 0.1962890625, "learning_rate": 0.00033690930894372757, "loss": 0.2566, "step": 280350 }, { "epoch": 11.61, "grad_norm": 1.171875, "learning_rate": 0.0003368991400915651, "loss": 0.2313, "step": 280360 }, { "epoch": 11.61, "grad_norm": 0.68359375, "learning_rate": 0.0003368889710758678, "loss": 0.175, "step": 280370 }, { "epoch": 11.61, "grad_norm": 0.5546875, "learning_rate": 0.00033687880189665496, "loss": 0.1847, "step": 280380 }, { "epoch": 11.61, "grad_norm": 1.0546875, "learning_rate": 0.0003368686325539455, "loss": 0.213, "step": 280390 }, { "epoch": 11.61, "grad_norm": 0.90234375, "learning_rate": 0.0003368584630477587, "loss": 0.1998, "step": 280400 }, { "epoch": 11.61, "grad_norm": 0.40234375, "learning_rate": 0.00033684829337811363, "loss": 0.1919, "step": 280410 }, { "epoch": 11.61, "grad_norm": 0.77734375, "learning_rate": 0.0003368381235450294, "loss": 0.1638, "step": 280420 }, { "epoch": 11.62, "grad_norm": 0.609375, "learning_rate": 0.00033682795354852523, "loss": 0.1853, "step": 280430 }, { "epoch": 11.62, "grad_norm": 0.671875, "learning_rate": 0.00033681778338862036, "loss": 0.1946, "step": 280440 }, { "epoch": 11.62, "grad_norm": 0.57421875, "learning_rate": 0.0003368076130653336, "loss": 0.1919, "step": 280450 }, { "epoch": 11.62, "grad_norm": 0.44921875, "learning_rate": 0.0003367974425786844, "loss": 0.2286, "step": 280460 }, { "epoch": 11.62, "grad_norm": 0.90625, "learning_rate": 0.00033678727192869176, "loss": 0.1623, "step": 280470 }, { "epoch": 11.62, "grad_norm": 0.54296875, "learning_rate": 0.00033677710111537477, "loss": 0.1721, "step": 280480 }, { "epoch": 11.62, "grad_norm": 0.671875, "learning_rate": 0.0003367669301387528, "loss": 0.1789, "step": 280490 }, { "epoch": 11.62, "grad_norm": 1.2109375, "learning_rate": 0.00033675675899884474, "loss": 0.2026, "step": 280500 }, { "epoch": 11.62, "grad_norm": 1.96875, "learning_rate": 0.00033674658769566987, "loss": 0.209, "step": 280510 }, { "epoch": 11.62, "grad_norm": 0.79296875, "learning_rate": 0.00033673641622924727, "loss": 0.1916, "step": 280520 }, { "epoch": 11.62, "grad_norm": 0.97265625, "learning_rate": 0.00033672624459959603, "loss": 0.1769, "step": 280530 }, { "epoch": 11.62, "grad_norm": 0.31640625, "learning_rate": 0.00033671607280673545, "loss": 0.1657, "step": 280540 }, { "epoch": 11.62, "grad_norm": 0.1572265625, "learning_rate": 0.0003367059008506846, "loss": 0.1838, "step": 280550 }, { "epoch": 11.62, "grad_norm": 0.6015625, "learning_rate": 0.0003366957287314625, "loss": 0.1963, "step": 280560 }, { "epoch": 11.62, "grad_norm": 0.546875, "learning_rate": 0.00033668555644908855, "loss": 0.1467, "step": 280570 }, { "epoch": 11.62, "grad_norm": 1.2890625, "learning_rate": 0.00033667538400358163, "loss": 0.2029, "step": 280580 }, { "epoch": 11.62, "grad_norm": 1.015625, "learning_rate": 0.00033666521139496104, "loss": 0.1818, "step": 280590 }, { "epoch": 11.62, "grad_norm": 0.62890625, "learning_rate": 0.0003366550386232459, "loss": 0.1983, "step": 280600 }, { "epoch": 11.62, "grad_norm": 1.0078125, "learning_rate": 0.00033664486568845526, "loss": 0.1516, "step": 280610 }, { "epoch": 11.62, "grad_norm": 0.64453125, "learning_rate": 0.0003366346925906084, "loss": 0.18, "step": 280620 }, { "epoch": 11.62, "grad_norm": 0.0, "learning_rate": 0.00033662451932972446, "loss": 0.1742, "step": 280630 }, { "epoch": 11.62, "grad_norm": 0.455078125, "learning_rate": 0.00033661434590582243, "loss": 0.1952, "step": 280640 }, { "epoch": 11.62, "grad_norm": 0.91796875, "learning_rate": 0.0003366041723189216, "loss": 0.2029, "step": 280650 }, { "epoch": 11.62, "grad_norm": 1.0859375, "learning_rate": 0.00033659399856904103, "loss": 0.1872, "step": 280660 }, { "epoch": 11.63, "grad_norm": 0.69921875, "learning_rate": 0.0003365838246562, "loss": 0.1765, "step": 280670 }, { "epoch": 11.63, "grad_norm": 0.60546875, "learning_rate": 0.00033657365058041745, "loss": 0.2032, "step": 280680 }, { "epoch": 11.63, "grad_norm": 1.671875, "learning_rate": 0.0003365634763417127, "loss": 0.2249, "step": 280690 }, { "epoch": 11.63, "grad_norm": 0.197265625, "learning_rate": 0.0003365533019401048, "loss": 0.2251, "step": 280700 }, { "epoch": 11.63, "grad_norm": 0.984375, "learning_rate": 0.000336543127375613, "loss": 0.198, "step": 280710 }, { "epoch": 11.63, "grad_norm": 1.734375, "learning_rate": 0.0003365329526482563, "loss": 0.2343, "step": 280720 }, { "epoch": 11.63, "grad_norm": 0.35546875, "learning_rate": 0.000336522777758054, "loss": 0.1892, "step": 280730 }, { "epoch": 11.63, "grad_norm": 0.9765625, "learning_rate": 0.00033651260270502507, "loss": 0.2118, "step": 280740 }, { "epoch": 11.63, "grad_norm": 0.58203125, "learning_rate": 0.00033650242748918893, "loss": 0.1785, "step": 280750 }, { "epoch": 11.63, "grad_norm": 0.63671875, "learning_rate": 0.0003364922521105644, "loss": 0.2545, "step": 280760 }, { "epoch": 11.63, "grad_norm": 0.96484375, "learning_rate": 0.00033648207656917085, "loss": 0.1753, "step": 280770 }, { "epoch": 11.63, "grad_norm": 1.3515625, "learning_rate": 0.00033647190086502746, "loss": 0.1668, "step": 280780 }, { "epoch": 11.63, "grad_norm": 0.40625, "learning_rate": 0.0003364617249981531, "loss": 0.2475, "step": 280790 }, { "epoch": 11.63, "grad_norm": 0.4765625, "learning_rate": 0.00033645154896856725, "loss": 0.2225, "step": 280800 }, { "epoch": 11.63, "grad_norm": 0.78515625, "learning_rate": 0.00033644137277628894, "loss": 0.1979, "step": 280810 }, { "epoch": 11.63, "grad_norm": 0.482421875, "learning_rate": 0.0003364311964213372, "loss": 0.1668, "step": 280820 }, { "epoch": 11.63, "grad_norm": 0.64453125, "learning_rate": 0.0003364210199037314, "loss": 0.1508, "step": 280830 }, { "epoch": 11.63, "grad_norm": 0.6875, "learning_rate": 0.0003364108432234905, "loss": 0.2472, "step": 280840 }, { "epoch": 11.63, "grad_norm": 1.6171875, "learning_rate": 0.0003364006663806337, "loss": 0.1868, "step": 280850 }, { "epoch": 11.63, "grad_norm": 0.859375, "learning_rate": 0.00033639048937518024, "loss": 0.2315, "step": 280860 }, { "epoch": 11.63, "grad_norm": 0.55859375, "learning_rate": 0.00033638031220714913, "loss": 0.2201, "step": 280870 }, { "epoch": 11.63, "grad_norm": 1.1953125, "learning_rate": 0.00033637013487655963, "loss": 0.1868, "step": 280880 }, { "epoch": 11.63, "grad_norm": 0.6328125, "learning_rate": 0.0003363599573834309, "loss": 0.1785, "step": 280890 }, { "epoch": 11.63, "grad_norm": 0.8359375, "learning_rate": 0.000336349779727782, "loss": 0.2064, "step": 280900 }, { "epoch": 11.64, "grad_norm": 1.109375, "learning_rate": 0.0003363396019096323, "loss": 0.1912, "step": 280910 }, { "epoch": 11.64, "grad_norm": 0.73828125, "learning_rate": 0.0003363294239290006, "loss": 0.2044, "step": 280920 }, { "epoch": 11.64, "grad_norm": 0.2080078125, "learning_rate": 0.00033631924578590635, "loss": 0.1914, "step": 280930 }, { "epoch": 11.64, "grad_norm": 0.734375, "learning_rate": 0.00033630906748036854, "loss": 0.2136, "step": 280940 }, { "epoch": 11.64, "grad_norm": 0.96484375, "learning_rate": 0.0003362988890124064, "loss": 0.2077, "step": 280950 }, { "epoch": 11.64, "grad_norm": 0.8828125, "learning_rate": 0.0003362887103820391, "loss": 0.1597, "step": 280960 }, { "epoch": 11.64, "grad_norm": 0.65234375, "learning_rate": 0.00033627853158928574, "loss": 0.2, "step": 280970 }, { "epoch": 11.64, "grad_norm": 0.515625, "learning_rate": 0.0003362683526341655, "loss": 0.1703, "step": 280980 }, { "epoch": 11.64, "grad_norm": 0.7265625, "learning_rate": 0.00033625817351669757, "loss": 0.1909, "step": 280990 }, { "epoch": 11.64, "grad_norm": 0.4765625, "learning_rate": 0.00033624799423690105, "loss": 0.198, "step": 281000 }, { "epoch": 11.64, "grad_norm": 0.65625, "learning_rate": 0.0003362378147947951, "loss": 0.2026, "step": 281010 }, { "epoch": 11.64, "grad_norm": 1.1875, "learning_rate": 0.00033622763519039895, "loss": 0.2191, "step": 281020 }, { "epoch": 11.64, "grad_norm": 0.83203125, "learning_rate": 0.00033621745542373164, "loss": 0.2296, "step": 281030 }, { "epoch": 11.64, "grad_norm": 0.640625, "learning_rate": 0.00033620727549481244, "loss": 0.2188, "step": 281040 }, { "epoch": 11.64, "grad_norm": 0.89453125, "learning_rate": 0.0003361970954036604, "loss": 0.19, "step": 281050 }, { "epoch": 11.64, "grad_norm": 0.43359375, "learning_rate": 0.0003361869151502948, "loss": 0.1849, "step": 281060 }, { "epoch": 11.64, "grad_norm": 0.3671875, "learning_rate": 0.0003361767347347347, "loss": 0.2338, "step": 281070 }, { "epoch": 11.64, "grad_norm": 0.9375, "learning_rate": 0.00033616655415699925, "loss": 0.1689, "step": 281080 }, { "epoch": 11.64, "grad_norm": 0.6171875, "learning_rate": 0.00033615637341710767, "loss": 0.1533, "step": 281090 }, { "epoch": 11.64, "grad_norm": 0.46875, "learning_rate": 0.0003361461925150792, "loss": 0.2027, "step": 281100 }, { "epoch": 11.64, "grad_norm": 1.3359375, "learning_rate": 0.0003361360114509328, "loss": 0.1974, "step": 281110 }, { "epoch": 11.64, "grad_norm": 0.515625, "learning_rate": 0.0003361258302246877, "loss": 0.1903, "step": 281120 }, { "epoch": 11.64, "grad_norm": 0.87109375, "learning_rate": 0.00033611564883636317, "loss": 0.2161, "step": 281130 }, { "epoch": 11.64, "grad_norm": 0.87109375, "learning_rate": 0.00033610546728597826, "loss": 0.233, "step": 281140 }, { "epoch": 11.65, "grad_norm": 1.7578125, "learning_rate": 0.0003360952855735522, "loss": 0.2034, "step": 281150 }, { "epoch": 11.65, "grad_norm": 0.609375, "learning_rate": 0.00033608510369910406, "loss": 0.205, "step": 281160 }, { "epoch": 11.65, "grad_norm": 0.51171875, "learning_rate": 0.000336074921662653, "loss": 0.1525, "step": 281170 }, { "epoch": 11.65, "grad_norm": 0.53125, "learning_rate": 0.0003360647394642183, "loss": 0.1922, "step": 281180 }, { "epoch": 11.65, "grad_norm": 1.59375, "learning_rate": 0.00033605455710381904, "loss": 0.1633, "step": 281190 }, { "epoch": 11.65, "grad_norm": 0.9140625, "learning_rate": 0.0003360443745814744, "loss": 0.1844, "step": 281200 }, { "epoch": 11.65, "grad_norm": 0.5703125, "learning_rate": 0.0003360341918972035, "loss": 0.1695, "step": 281210 }, { "epoch": 11.65, "grad_norm": 0.98828125, "learning_rate": 0.0003360240090510256, "loss": 0.1409, "step": 281220 }, { "epoch": 11.65, "grad_norm": 0.52734375, "learning_rate": 0.0003360138260429598, "loss": 0.1729, "step": 281230 }, { "epoch": 11.65, "grad_norm": 0.828125, "learning_rate": 0.00033600364287302524, "loss": 0.2033, "step": 281240 }, { "epoch": 11.65, "grad_norm": 0.64453125, "learning_rate": 0.00033599345954124114, "loss": 0.1777, "step": 281250 }, { "epoch": 11.65, "grad_norm": 0.369140625, "learning_rate": 0.00033598327604762664, "loss": 0.2022, "step": 281260 }, { "epoch": 11.65, "grad_norm": 2.109375, "learning_rate": 0.0003359730923922009, "loss": 0.1348, "step": 281270 }, { "epoch": 11.65, "grad_norm": 0.7421875, "learning_rate": 0.0003359629085749831, "loss": 0.2043, "step": 281280 }, { "epoch": 11.65, "grad_norm": 0.216796875, "learning_rate": 0.0003359527245959923, "loss": 0.1884, "step": 281290 }, { "epoch": 11.65, "grad_norm": 1.6328125, "learning_rate": 0.0003359425404552478, "loss": 0.1656, "step": 281300 }, { "epoch": 11.65, "grad_norm": 0.953125, "learning_rate": 0.00033593235615276873, "loss": 0.2103, "step": 281310 }, { "epoch": 11.65, "grad_norm": 0.625, "learning_rate": 0.0003359221716885743, "loss": 0.1848, "step": 281320 }, { "epoch": 11.65, "grad_norm": 0.248046875, "learning_rate": 0.0003359119870626835, "loss": 0.1987, "step": 281330 }, { "epoch": 11.65, "grad_norm": 0.8515625, "learning_rate": 0.0003359018022751157, "loss": 0.1699, "step": 281340 }, { "epoch": 11.65, "grad_norm": 0.5, "learning_rate": 0.00033589161732589, "loss": 0.1696, "step": 281350 }, { "epoch": 11.65, "grad_norm": 0.1728515625, "learning_rate": 0.00033588143221502555, "loss": 0.1984, "step": 281360 }, { "epoch": 11.65, "grad_norm": 0.8515625, "learning_rate": 0.00033587124694254146, "loss": 0.1829, "step": 281370 }, { "epoch": 11.65, "grad_norm": 0.69921875, "learning_rate": 0.000335861061508457, "loss": 0.23, "step": 281380 }, { "epoch": 11.66, "grad_norm": 0.78515625, "learning_rate": 0.00033585087591279125, "loss": 0.175, "step": 281390 }, { "epoch": 11.66, "grad_norm": 0.6015625, "learning_rate": 0.00033584069015556346, "loss": 0.1945, "step": 281400 }, { "epoch": 11.66, "grad_norm": 1.1015625, "learning_rate": 0.00033583050423679274, "loss": 0.1853, "step": 281410 }, { "epoch": 11.66, "grad_norm": 1.0859375, "learning_rate": 0.0003358203181564983, "loss": 0.1912, "step": 281420 }, { "epoch": 11.66, "grad_norm": 1.1796875, "learning_rate": 0.00033581013191469935, "loss": 0.1774, "step": 281430 }, { "epoch": 11.66, "grad_norm": 1.0390625, "learning_rate": 0.0003357999455114148, "loss": 0.1763, "step": 281440 }, { "epoch": 11.66, "grad_norm": 1.015625, "learning_rate": 0.0003357897589466642, "loss": 0.1853, "step": 281450 }, { "epoch": 11.66, "grad_norm": 1.546875, "learning_rate": 0.0003357795722204665, "loss": 0.2187, "step": 281460 }, { "epoch": 11.66, "grad_norm": 0.609375, "learning_rate": 0.00033576938533284086, "loss": 0.2067, "step": 281470 }, { "epoch": 11.66, "grad_norm": 1.3984375, "learning_rate": 0.00033575919828380656, "loss": 0.2503, "step": 281480 }, { "epoch": 11.66, "grad_norm": 1.109375, "learning_rate": 0.0003357490110733827, "loss": 0.2234, "step": 281490 }, { "epoch": 11.66, "grad_norm": 0.490234375, "learning_rate": 0.0003357388237015884, "loss": 0.189, "step": 281500 }, { "epoch": 11.66, "grad_norm": 1.1640625, "learning_rate": 0.000335728636168443, "loss": 0.2025, "step": 281510 }, { "epoch": 11.66, "grad_norm": 1.109375, "learning_rate": 0.0003357184484739654, "loss": 0.1389, "step": 281520 }, { "epoch": 11.66, "grad_norm": 0.69921875, "learning_rate": 0.0003357082606181751, "loss": 0.2044, "step": 281530 }, { "epoch": 11.66, "grad_norm": 0.2490234375, "learning_rate": 0.0003356980726010911, "loss": 0.2034, "step": 281540 }, { "epoch": 11.66, "grad_norm": 0.6875, "learning_rate": 0.00033568788442273246, "loss": 0.1675, "step": 281550 }, { "epoch": 11.66, "grad_norm": 0.74609375, "learning_rate": 0.0003356776960831186, "loss": 0.2142, "step": 281560 }, { "epoch": 11.66, "grad_norm": 0.482421875, "learning_rate": 0.0003356675075822685, "loss": 0.2025, "step": 281570 }, { "epoch": 11.66, "grad_norm": 0.83984375, "learning_rate": 0.0003356573189202014, "loss": 0.2101, "step": 281580 }, { "epoch": 11.66, "grad_norm": 0.484375, "learning_rate": 0.00033564713009693654, "loss": 0.2095, "step": 281590 }, { "epoch": 11.66, "grad_norm": 0.796875, "learning_rate": 0.00033563694111249295, "loss": 0.1716, "step": 281600 }, { "epoch": 11.66, "grad_norm": 1.40625, "learning_rate": 0.00033562675196688996, "loss": 0.1682, "step": 281610 }, { "epoch": 11.66, "grad_norm": 0.9921875, "learning_rate": 0.0003356165626601466, "loss": 0.2153, "step": 281620 }, { "epoch": 11.67, "grad_norm": 0.8515625, "learning_rate": 0.00033560637319228216, "loss": 0.207, "step": 281630 }, { "epoch": 11.67, "grad_norm": 0.828125, "learning_rate": 0.00033559618356331577, "loss": 0.2035, "step": 281640 }, { "epoch": 11.67, "grad_norm": 1.1484375, "learning_rate": 0.0003355859937732665, "loss": 0.2432, "step": 281650 }, { "epoch": 11.67, "grad_norm": 0.9765625, "learning_rate": 0.0003355758038221538, "loss": 0.2348, "step": 281660 }, { "epoch": 11.67, "grad_norm": 0.62109375, "learning_rate": 0.0003355656137099966, "loss": 0.1558, "step": 281670 }, { "epoch": 11.67, "grad_norm": 1.90625, "learning_rate": 0.0003355554234368141, "loss": 0.2055, "step": 281680 }, { "epoch": 11.67, "grad_norm": 0.384765625, "learning_rate": 0.00033554523300262563, "loss": 0.1837, "step": 281690 }, { "epoch": 11.67, "grad_norm": 0.93359375, "learning_rate": 0.00033553504240745025, "loss": 0.2156, "step": 281700 }, { "epoch": 11.67, "grad_norm": 0.90234375, "learning_rate": 0.00033552485165130706, "loss": 0.2243, "step": 281710 }, { "epoch": 11.67, "grad_norm": 0.515625, "learning_rate": 0.00033551466073421547, "loss": 0.1928, "step": 281720 }, { "epoch": 11.67, "grad_norm": 1.0546875, "learning_rate": 0.0003355044696561944, "loss": 0.2096, "step": 281730 }, { "epoch": 11.67, "grad_norm": 0.455078125, "learning_rate": 0.00033549427841726324, "loss": 0.2694, "step": 281740 }, { "epoch": 11.67, "grad_norm": 0.765625, "learning_rate": 0.00033548408701744104, "loss": 0.2472, "step": 281750 }, { "epoch": 11.67, "grad_norm": 1.2421875, "learning_rate": 0.00033547389545674703, "loss": 0.1854, "step": 281760 }, { "epoch": 11.67, "grad_norm": 0.3203125, "learning_rate": 0.0003354637037352004, "loss": 0.1645, "step": 281770 }, { "epoch": 11.67, "grad_norm": 0.8671875, "learning_rate": 0.00033545351185282037, "loss": 0.1593, "step": 281780 }, { "epoch": 11.67, "grad_norm": 1.5234375, "learning_rate": 0.00033544331980962593, "loss": 0.2094, "step": 281790 }, { "epoch": 11.67, "grad_norm": 0.373046875, "learning_rate": 0.0003354331276056365, "loss": 0.1405, "step": 281800 }, { "epoch": 11.67, "grad_norm": 1.6015625, "learning_rate": 0.00033542293524087095, "loss": 0.2497, "step": 281810 }, { "epoch": 11.67, "grad_norm": 0.80078125, "learning_rate": 0.0003354127427153489, "loss": 0.1635, "step": 281820 }, { "epoch": 11.67, "grad_norm": 0.6875, "learning_rate": 0.00033540255002908917, "loss": 0.1531, "step": 281830 }, { "epoch": 11.67, "grad_norm": 1.484375, "learning_rate": 0.0003353923571821111, "loss": 0.2178, "step": 281840 }, { "epoch": 11.67, "grad_norm": 1.3671875, "learning_rate": 0.0003353821641744338, "loss": 0.2204, "step": 281850 }, { "epoch": 11.67, "grad_norm": 0.93359375, "learning_rate": 0.00033537197100607654, "loss": 0.2161, "step": 281860 }, { "epoch": 11.68, "grad_norm": 0.859375, "learning_rate": 0.0003353617776770584, "loss": 0.1836, "step": 281870 }, { "epoch": 11.68, "grad_norm": 1.0390625, "learning_rate": 0.0003353515841873987, "loss": 0.212, "step": 281880 }, { "epoch": 11.68, "grad_norm": 1.0390625, "learning_rate": 0.0003353413905371164, "loss": 0.2248, "step": 281890 }, { "epoch": 11.68, "grad_norm": 0.625, "learning_rate": 0.0003353311967262309, "loss": 0.2106, "step": 281900 }, { "epoch": 11.68, "grad_norm": 0.87890625, "learning_rate": 0.0003353210027547613, "loss": 0.1804, "step": 281910 }, { "epoch": 11.68, "grad_norm": 0.5859375, "learning_rate": 0.0003353108086227268, "loss": 0.2319, "step": 281920 }, { "epoch": 11.68, "grad_norm": 0.6328125, "learning_rate": 0.0003353006143301465, "loss": 0.2317, "step": 281930 }, { "epoch": 11.68, "grad_norm": 2.09375, "learning_rate": 0.00033529041987703974, "loss": 0.1824, "step": 281940 }, { "epoch": 11.68, "grad_norm": 0.142578125, "learning_rate": 0.00033528022526342556, "loss": 0.196, "step": 281950 }, { "epoch": 11.68, "grad_norm": 0.609375, "learning_rate": 0.00033527003048932323, "loss": 0.206, "step": 281960 }, { "epoch": 11.68, "grad_norm": 0.9609375, "learning_rate": 0.0003352598355547519, "loss": 0.252, "step": 281970 }, { "epoch": 11.68, "grad_norm": 0.54296875, "learning_rate": 0.00033524964045973085, "loss": 0.1757, "step": 281980 }, { "epoch": 11.68, "grad_norm": 0.4375, "learning_rate": 0.0003352394452042791, "loss": 0.1702, "step": 281990 }, { "epoch": 11.68, "grad_norm": 0.89453125, "learning_rate": 0.00033522924978841585, "loss": 0.1761, "step": 282000 }, { "epoch": 11.68, "grad_norm": 1.703125, "learning_rate": 0.00033521905421216047, "loss": 0.2153, "step": 282010 }, { "epoch": 11.68, "grad_norm": 0.76171875, "learning_rate": 0.000335208858475532, "loss": 0.1984, "step": 282020 }, { "epoch": 11.68, "grad_norm": 1.0078125, "learning_rate": 0.00033519866257854963, "loss": 0.2265, "step": 282030 }, { "epoch": 11.68, "grad_norm": 1.7265625, "learning_rate": 0.0003351884665212327, "loss": 0.1916, "step": 282040 }, { "epoch": 11.68, "grad_norm": 0.23828125, "learning_rate": 0.0003351782703036001, "loss": 0.1766, "step": 282050 }, { "epoch": 11.68, "grad_norm": 0.41796875, "learning_rate": 0.0003351680739256713, "loss": 0.2087, "step": 282060 }, { "epoch": 11.68, "grad_norm": 1.359375, "learning_rate": 0.00033515787738746535, "loss": 0.217, "step": 282070 }, { "epoch": 11.68, "grad_norm": 0.578125, "learning_rate": 0.00033514768068900147, "loss": 0.2046, "step": 282080 }, { "epoch": 11.68, "grad_norm": 0.8984375, "learning_rate": 0.00033513748383029885, "loss": 0.2019, "step": 282090 }, { "epoch": 11.68, "grad_norm": 0.69921875, "learning_rate": 0.00033512728681137674, "loss": 0.2033, "step": 282100 }, { "epoch": 11.68, "grad_norm": 1.3125, "learning_rate": 0.00033511708963225416, "loss": 0.1482, "step": 282110 }, { "epoch": 11.69, "grad_norm": 0.58984375, "learning_rate": 0.0003351068922929506, "loss": 0.219, "step": 282120 }, { "epoch": 11.69, "grad_norm": 0.71484375, "learning_rate": 0.0003350966947934848, "loss": 0.2583, "step": 282130 }, { "epoch": 11.69, "grad_norm": 0.8046875, "learning_rate": 0.00033508649713387634, "loss": 0.1768, "step": 282140 }, { "epoch": 11.69, "grad_norm": 0.57421875, "learning_rate": 0.0003350762993141443, "loss": 0.2079, "step": 282150 }, { "epoch": 11.69, "grad_norm": 1.453125, "learning_rate": 0.00033506610133430783, "loss": 0.1946, "step": 282160 }, { "epoch": 11.69, "grad_norm": 0.77734375, "learning_rate": 0.00033505590319438615, "loss": 0.1772, "step": 282170 }, { "epoch": 11.69, "grad_norm": 0.67578125, "learning_rate": 0.00033504570489439845, "loss": 0.1932, "step": 282180 }, { "epoch": 11.69, "grad_norm": 0.390625, "learning_rate": 0.00033503550643436387, "loss": 0.1792, "step": 282190 }, { "epoch": 11.69, "grad_norm": 0.5625, "learning_rate": 0.0003350253078143017, "loss": 0.2067, "step": 282200 }, { "epoch": 11.69, "grad_norm": 0.61328125, "learning_rate": 0.00033501510903423106, "loss": 0.214, "step": 282210 }, { "epoch": 11.69, "grad_norm": 0.73828125, "learning_rate": 0.00033500491009417116, "loss": 0.1863, "step": 282220 }, { "epoch": 11.69, "grad_norm": 0.9140625, "learning_rate": 0.00033499471099414125, "loss": 0.1626, "step": 282230 }, { "epoch": 11.69, "grad_norm": 0.921875, "learning_rate": 0.0003349845117341604, "loss": 0.1439, "step": 282240 }, { "epoch": 11.69, "grad_norm": 1.1640625, "learning_rate": 0.0003349743123142479, "loss": 0.217, "step": 282250 }, { "epoch": 11.69, "grad_norm": 1.8828125, "learning_rate": 0.0003349641127344229, "loss": 0.2041, "step": 282260 }, { "epoch": 11.69, "grad_norm": 0.3203125, "learning_rate": 0.00033495391299470466, "loss": 0.1842, "step": 282270 }, { "epoch": 11.69, "grad_norm": 0.78125, "learning_rate": 0.00033494371309511235, "loss": 0.1587, "step": 282280 }, { "epoch": 11.69, "grad_norm": 0.72265625, "learning_rate": 0.0003349335130356651, "loss": 0.2379, "step": 282290 }, { "epoch": 11.69, "grad_norm": 0.5390625, "learning_rate": 0.0003349233128163822, "loss": 0.1964, "step": 282300 }, { "epoch": 11.69, "grad_norm": 0.435546875, "learning_rate": 0.00033491311243728273, "loss": 0.1711, "step": 282310 }, { "epoch": 11.69, "grad_norm": 1.09375, "learning_rate": 0.00033490291189838594, "loss": 0.2347, "step": 282320 }, { "epoch": 11.69, "grad_norm": 0.46484375, "learning_rate": 0.00033489271119971115, "loss": 0.1942, "step": 282330 }, { "epoch": 11.69, "grad_norm": 1.0546875, "learning_rate": 0.0003348825103412773, "loss": 0.2024, "step": 282340 }, { "epoch": 11.69, "grad_norm": 2.140625, "learning_rate": 0.00033487230932310386, "loss": 0.2336, "step": 282350 }, { "epoch": 11.7, "grad_norm": 1.578125, "learning_rate": 0.00033486210814520986, "loss": 0.233, "step": 282360 }, { "epoch": 11.7, "grad_norm": 0.5234375, "learning_rate": 0.00033485190680761445, "loss": 0.189, "step": 282370 }, { "epoch": 11.7, "grad_norm": 0.8046875, "learning_rate": 0.0003348417053103371, "loss": 0.1898, "step": 282380 }, { "epoch": 11.7, "grad_norm": 0.51953125, "learning_rate": 0.00033483150365339666, "loss": 0.1869, "step": 282390 }, { "epoch": 11.7, "grad_norm": 0.15625, "learning_rate": 0.00033482130183681255, "loss": 0.2063, "step": 282400 }, { "epoch": 11.7, "grad_norm": 1.5078125, "learning_rate": 0.00033481109986060385, "loss": 0.2179, "step": 282410 }, { "epoch": 11.7, "grad_norm": 0.419921875, "learning_rate": 0.0003348008977247899, "loss": 0.2132, "step": 282420 }, { "epoch": 11.7, "grad_norm": 0.7734375, "learning_rate": 0.00033479069542938976, "loss": 0.2219, "step": 282430 }, { "epoch": 11.7, "grad_norm": 0.84765625, "learning_rate": 0.00033478049297442275, "loss": 0.2157, "step": 282440 }, { "epoch": 11.7, "grad_norm": 2.984375, "learning_rate": 0.00033477029035990796, "loss": 0.2033, "step": 282450 }, { "epoch": 11.7, "grad_norm": 0.93359375, "learning_rate": 0.0003347600875858647, "loss": 0.2109, "step": 282460 }, { "epoch": 11.7, "grad_norm": 0.7734375, "learning_rate": 0.00033474988465231203, "loss": 0.2136, "step": 282470 }, { "epoch": 11.7, "grad_norm": 0.76171875, "learning_rate": 0.0003347396815592693, "loss": 0.1824, "step": 282480 }, { "epoch": 11.7, "grad_norm": 0.76171875, "learning_rate": 0.00033472947830675556, "loss": 0.2188, "step": 282490 }, { "epoch": 11.7, "grad_norm": 0.64453125, "learning_rate": 0.00033471927489479007, "loss": 0.1595, "step": 282500 }, { "epoch": 11.7, "grad_norm": 0.55859375, "learning_rate": 0.00033470907132339216, "loss": 0.2104, "step": 282510 }, { "epoch": 11.7, "grad_norm": 0.7109375, "learning_rate": 0.00033469886759258087, "loss": 0.1589, "step": 282520 }, { "epoch": 11.7, "grad_norm": 0.5390625, "learning_rate": 0.0003346886637023755, "loss": 0.1842, "step": 282530 }, { "epoch": 11.7, "grad_norm": 0.6484375, "learning_rate": 0.0003346784596527952, "loss": 0.2247, "step": 282540 }, { "epoch": 11.7, "grad_norm": 0.7109375, "learning_rate": 0.00033466825544385914, "loss": 0.1814, "step": 282550 }, { "epoch": 11.7, "grad_norm": 1.1875, "learning_rate": 0.00033465805107558657, "loss": 0.2225, "step": 282560 }, { "epoch": 11.7, "grad_norm": 4.75, "learning_rate": 0.0003346478465479967, "loss": 0.1947, "step": 282570 }, { "epoch": 11.7, "grad_norm": 1.328125, "learning_rate": 0.00033463764186110874, "loss": 0.2282, "step": 282580 }, { "epoch": 11.7, "grad_norm": 0.63671875, "learning_rate": 0.0003346274370149419, "loss": 0.2532, "step": 282590 }, { "epoch": 11.71, "grad_norm": 0.73046875, "learning_rate": 0.00033461723200951533, "loss": 0.1873, "step": 282600 }, { "epoch": 11.71, "grad_norm": 0.5234375, "learning_rate": 0.00033460702684484823, "loss": 0.193, "step": 282610 }, { "epoch": 11.71, "grad_norm": 0.73828125, "learning_rate": 0.00033459682152095987, "loss": 0.2278, "step": 282620 }, { "epoch": 11.71, "grad_norm": 1.03125, "learning_rate": 0.0003345866160378695, "loss": 0.1999, "step": 282630 }, { "epoch": 11.71, "grad_norm": 1.234375, "learning_rate": 0.00033457641039559614, "loss": 0.1134, "step": 282640 }, { "epoch": 11.71, "grad_norm": 1.203125, "learning_rate": 0.0003345662045941592, "loss": 0.1794, "step": 282650 }, { "epoch": 11.71, "grad_norm": 0.63671875, "learning_rate": 0.0003345559986335778, "loss": 0.1889, "step": 282660 }, { "epoch": 11.71, "grad_norm": 1.4296875, "learning_rate": 0.00033454579251387105, "loss": 0.1859, "step": 282670 }, { "epoch": 11.71, "grad_norm": 0.427734375, "learning_rate": 0.00033453558623505834, "loss": 0.1609, "step": 282680 }, { "epoch": 11.71, "grad_norm": 0.6484375, "learning_rate": 0.0003345253797971587, "loss": 0.179, "step": 282690 }, { "epoch": 11.71, "grad_norm": 0.75390625, "learning_rate": 0.00033451517320019146, "loss": 0.1866, "step": 282700 }, { "epoch": 11.71, "grad_norm": 0.82421875, "learning_rate": 0.00033450496644417583, "loss": 0.196, "step": 282710 }, { "epoch": 11.71, "grad_norm": 0.474609375, "learning_rate": 0.00033449475952913095, "loss": 0.2443, "step": 282720 }, { "epoch": 11.71, "grad_norm": 0.91796875, "learning_rate": 0.000334484552455076, "loss": 0.2047, "step": 282730 }, { "epoch": 11.71, "grad_norm": 0.6640625, "learning_rate": 0.00033447434522203035, "loss": 0.1944, "step": 282740 }, { "epoch": 11.71, "grad_norm": 1.21875, "learning_rate": 0.00033446413783001306, "loss": 0.1504, "step": 282750 }, { "epoch": 11.71, "grad_norm": 0.48828125, "learning_rate": 0.0003344539302790434, "loss": 0.2271, "step": 282760 }, { "epoch": 11.71, "grad_norm": 0.4765625, "learning_rate": 0.0003344437225691405, "loss": 0.2313, "step": 282770 }, { "epoch": 11.71, "grad_norm": 0.39453125, "learning_rate": 0.0003344335147003237, "loss": 0.1629, "step": 282780 }, { "epoch": 11.71, "grad_norm": 1.1875, "learning_rate": 0.0003344233066726121, "loss": 0.2054, "step": 282790 }, { "epoch": 11.71, "grad_norm": 0.58984375, "learning_rate": 0.000334413098486025, "loss": 0.1913, "step": 282800 }, { "epoch": 11.71, "grad_norm": 1.1015625, "learning_rate": 0.0003344028901405815, "loss": 0.2458, "step": 282810 }, { "epoch": 11.71, "grad_norm": 0.380859375, "learning_rate": 0.00033439268163630093, "loss": 0.1916, "step": 282820 }, { "epoch": 11.71, "grad_norm": 0.796875, "learning_rate": 0.0003343824729732024, "loss": 0.1631, "step": 282830 }, { "epoch": 11.72, "grad_norm": 0.359375, "learning_rate": 0.0003343722641513052, "loss": 0.1173, "step": 282840 }, { "epoch": 11.72, "grad_norm": 0.63671875, "learning_rate": 0.0003343620551706286, "loss": 0.1766, "step": 282850 }, { "epoch": 11.72, "grad_norm": 0.6796875, "learning_rate": 0.00033435184603119154, "loss": 0.144, "step": 282860 }, { "epoch": 11.72, "grad_norm": 0.83203125, "learning_rate": 0.0003343416367330135, "loss": 0.1419, "step": 282870 }, { "epoch": 11.72, "grad_norm": 0.400390625, "learning_rate": 0.00033433142727611363, "loss": 0.1553, "step": 282880 }, { "epoch": 11.72, "grad_norm": 0.44921875, "learning_rate": 0.00033432121766051114, "loss": 0.1654, "step": 282890 }, { "epoch": 11.72, "grad_norm": 0.37109375, "learning_rate": 0.00033431100788622515, "loss": 0.1701, "step": 282900 }, { "epoch": 11.72, "grad_norm": 0.765625, "learning_rate": 0.000334300797953275, "loss": 0.2004, "step": 282910 }, { "epoch": 11.72, "grad_norm": 0.419921875, "learning_rate": 0.00033429058786167985, "loss": 0.2071, "step": 282920 }, { "epoch": 11.72, "grad_norm": 0.5625, "learning_rate": 0.0003342803776114589, "loss": 0.242, "step": 282930 }, { "epoch": 11.72, "grad_norm": 1.203125, "learning_rate": 0.00033427016720263136, "loss": 0.1569, "step": 282940 }, { "epoch": 11.72, "grad_norm": 0.7421875, "learning_rate": 0.00033425995663521657, "loss": 0.1864, "step": 282950 }, { "epoch": 11.72, "grad_norm": 1.046875, "learning_rate": 0.0003342497459092335, "loss": 0.2184, "step": 282960 }, { "epoch": 11.72, "grad_norm": 0.8125, "learning_rate": 0.00033423953502470164, "loss": 0.2437, "step": 282970 }, { "epoch": 11.72, "grad_norm": 0.55859375, "learning_rate": 0.00033422932398164, "loss": 0.1885, "step": 282980 }, { "epoch": 11.72, "grad_norm": 0.9140625, "learning_rate": 0.0003342191127800678, "loss": 0.2011, "step": 282990 }, { "epoch": 11.72, "grad_norm": 0.93359375, "learning_rate": 0.0003342089014200044, "loss": 0.1988, "step": 283000 }, { "epoch": 11.72, "grad_norm": 0.7265625, "learning_rate": 0.000334198689901469, "loss": 0.2019, "step": 283010 }, { "epoch": 11.72, "grad_norm": 0.515625, "learning_rate": 0.0003341884782244806, "loss": 0.2319, "step": 283020 }, { "epoch": 11.72, "grad_norm": 0.279296875, "learning_rate": 0.00033417826638905873, "loss": 0.1636, "step": 283030 }, { "epoch": 11.72, "grad_norm": 0.380859375, "learning_rate": 0.00033416805439522235, "loss": 0.2299, "step": 283040 }, { "epoch": 11.72, "grad_norm": 0.6796875, "learning_rate": 0.00033415784224299086, "loss": 0.1831, "step": 283050 }, { "epoch": 11.72, "grad_norm": 0.359375, "learning_rate": 0.00033414762993238335, "loss": 0.2243, "step": 283060 }, { "epoch": 11.72, "grad_norm": 0.515625, "learning_rate": 0.00033413741746341906, "loss": 0.2379, "step": 283070 }, { "epoch": 11.73, "grad_norm": 1.0625, "learning_rate": 0.00033412720483611734, "loss": 0.1811, "step": 283080 }, { "epoch": 11.73, "grad_norm": 1.1171875, "learning_rate": 0.0003341169920504972, "loss": 0.1946, "step": 283090 }, { "epoch": 11.73, "grad_norm": 0.5, "learning_rate": 0.00033410677910657806, "loss": 0.1649, "step": 283100 }, { "epoch": 11.73, "grad_norm": 0.67578125, "learning_rate": 0.000334096566004379, "loss": 0.2035, "step": 283110 }, { "epoch": 11.73, "grad_norm": 0.4765625, "learning_rate": 0.0003340863527439192, "loss": 0.2154, "step": 283120 }, { "epoch": 11.73, "grad_norm": 0.2197265625, "learning_rate": 0.0003340761393252181, "loss": 0.1984, "step": 283130 }, { "epoch": 11.73, "grad_norm": 0.25, "learning_rate": 0.0003340659257482947, "loss": 0.1421, "step": 283140 }, { "epoch": 11.73, "grad_norm": 0.76171875, "learning_rate": 0.0003340557120131683, "loss": 0.2256, "step": 283150 }, { "epoch": 11.73, "grad_norm": 0.6875, "learning_rate": 0.00033404549811985823, "loss": 0.1523, "step": 283160 }, { "epoch": 11.73, "grad_norm": 1.140625, "learning_rate": 0.00033403528406838345, "loss": 0.1805, "step": 283170 }, { "epoch": 11.73, "grad_norm": 0.86328125, "learning_rate": 0.00033402506985876347, "loss": 0.1768, "step": 283180 }, { "epoch": 11.73, "grad_norm": 0.8359375, "learning_rate": 0.0003340148554910174, "loss": 0.186, "step": 283190 }, { "epoch": 11.73, "grad_norm": 1.0390625, "learning_rate": 0.0003340046409651643, "loss": 0.2308, "step": 283200 }, { "epoch": 11.73, "grad_norm": 0.89453125, "learning_rate": 0.0003339944262812237, "loss": 0.1808, "step": 283210 }, { "epoch": 11.73, "grad_norm": 1.1796875, "learning_rate": 0.00033398421143921456, "loss": 0.1572, "step": 283220 }, { "epoch": 11.73, "grad_norm": 0.53125, "learning_rate": 0.0003339739964391562, "loss": 0.2617, "step": 283230 }, { "epoch": 11.73, "grad_norm": 1.109375, "learning_rate": 0.0003339637812810679, "loss": 0.173, "step": 283240 }, { "epoch": 11.73, "grad_norm": 0.5, "learning_rate": 0.00033395356596496874, "loss": 0.1624, "step": 283250 }, { "epoch": 11.73, "grad_norm": 0.50390625, "learning_rate": 0.00033394335049087814, "loss": 0.1934, "step": 283260 }, { "epoch": 11.73, "grad_norm": 0.73046875, "learning_rate": 0.0003339331348588152, "loss": 0.2056, "step": 283270 }, { "epoch": 11.73, "grad_norm": 0.7109375, "learning_rate": 0.0003339229190687991, "loss": 0.1691, "step": 283280 }, { "epoch": 11.73, "grad_norm": 0.26953125, "learning_rate": 0.0003339127031208492, "loss": 0.196, "step": 283290 }, { "epoch": 11.73, "grad_norm": 1.0, "learning_rate": 0.0003339024870149846, "loss": 0.162, "step": 283300 }, { "epoch": 11.73, "grad_norm": 1.390625, "learning_rate": 0.00033389227075122463, "loss": 0.1751, "step": 283310 }, { "epoch": 11.74, "grad_norm": 0.671875, "learning_rate": 0.0003338820543295885, "loss": 0.1764, "step": 283320 }, { "epoch": 11.74, "grad_norm": 1.1796875, "learning_rate": 0.0003338718377500953, "loss": 0.2678, "step": 283330 }, { "epoch": 11.74, "grad_norm": 0.57421875, "learning_rate": 0.0003338616210127644, "loss": 0.1889, "step": 283340 }, { "epoch": 11.74, "grad_norm": 0.84765625, "learning_rate": 0.00033385140411761505, "loss": 0.2311, "step": 283350 }, { "epoch": 11.74, "grad_norm": 0.6171875, "learning_rate": 0.00033384118706466625, "loss": 0.2252, "step": 283360 }, { "epoch": 11.74, "grad_norm": 0.82421875, "learning_rate": 0.0003338309698539376, "loss": 0.244, "step": 283370 }, { "epoch": 11.74, "grad_norm": 0.40625, "learning_rate": 0.00033382075248544795, "loss": 0.1801, "step": 283380 }, { "epoch": 11.74, "grad_norm": 0.7890625, "learning_rate": 0.00033381053495921677, "loss": 0.2191, "step": 283390 }, { "epoch": 11.74, "grad_norm": 1.4375, "learning_rate": 0.00033380031727526323, "loss": 0.2016, "step": 283400 }, { "epoch": 11.74, "grad_norm": 0.50390625, "learning_rate": 0.00033379009943360646, "loss": 0.2164, "step": 283410 }, { "epoch": 11.74, "grad_norm": 0.578125, "learning_rate": 0.00033377988143426583, "loss": 0.1661, "step": 283420 }, { "epoch": 11.74, "grad_norm": 1.171875, "learning_rate": 0.00033376966327726057, "loss": 0.2103, "step": 283430 }, { "epoch": 11.74, "grad_norm": 0.734375, "learning_rate": 0.00033375944496260966, "loss": 0.1872, "step": 283440 }, { "epoch": 11.74, "grad_norm": 1.1640625, "learning_rate": 0.0003337492264903327, "loss": 0.1761, "step": 283450 }, { "epoch": 11.74, "grad_norm": 0.474609375, "learning_rate": 0.00033373900786044867, "loss": 0.1541, "step": 283460 }, { "epoch": 11.74, "grad_norm": 1.6328125, "learning_rate": 0.00033372878907297687, "loss": 0.1597, "step": 283470 }, { "epoch": 11.74, "grad_norm": 1.140625, "learning_rate": 0.0003337185701279366, "loss": 0.2027, "step": 283480 }, { "epoch": 11.74, "grad_norm": 1.34375, "learning_rate": 0.0003337083510253469, "loss": 0.1764, "step": 283490 }, { "epoch": 11.74, "grad_norm": 0.7265625, "learning_rate": 0.0003336981317652272, "loss": 0.1782, "step": 283500 }, { "epoch": 11.74, "grad_norm": 0.640625, "learning_rate": 0.0003336879123475967, "loss": 0.2069, "step": 283510 }, { "epoch": 11.74, "grad_norm": 1.0859375, "learning_rate": 0.00033367769277247453, "loss": 0.2148, "step": 283520 }, { "epoch": 11.74, "grad_norm": 0.984375, "learning_rate": 0.00033366747303988, "loss": 0.1411, "step": 283530 }, { "epoch": 11.74, "grad_norm": 0.6171875, "learning_rate": 0.00033365725314983227, "loss": 0.1917, "step": 283540 }, { "epoch": 11.74, "grad_norm": 0.9140625, "learning_rate": 0.0003336470331023507, "loss": 0.1459, "step": 283550 }, { "epoch": 11.75, "grad_norm": 1.2265625, "learning_rate": 0.0003336368128974544, "loss": 0.25, "step": 283560 }, { "epoch": 11.75, "grad_norm": 0.369140625, "learning_rate": 0.0003336265925351626, "loss": 0.1797, "step": 283570 }, { "epoch": 11.75, "grad_norm": 0.56640625, "learning_rate": 0.00033361637201549465, "loss": 0.1929, "step": 283580 }, { "epoch": 11.75, "grad_norm": 0.88671875, "learning_rate": 0.00033360615133846966, "loss": 0.213, "step": 283590 }, { "epoch": 11.75, "grad_norm": 0.3671875, "learning_rate": 0.000333595930504107, "loss": 0.2342, "step": 283600 }, { "epoch": 11.75, "grad_norm": 0.43359375, "learning_rate": 0.0003335857095124258, "loss": 0.207, "step": 283610 }, { "epoch": 11.75, "grad_norm": 0.6640625, "learning_rate": 0.0003335754883634453, "loss": 0.2073, "step": 283620 }, { "epoch": 11.75, "grad_norm": 0.71875, "learning_rate": 0.00033356526705718477, "loss": 0.1515, "step": 283630 }, { "epoch": 11.75, "grad_norm": 0.5390625, "learning_rate": 0.0003335550455936635, "loss": 0.2261, "step": 283640 }, { "epoch": 11.75, "grad_norm": 0.8515625, "learning_rate": 0.0003335448239729005, "loss": 0.1963, "step": 283650 }, { "epoch": 11.75, "grad_norm": 0.9453125, "learning_rate": 0.00033353460219491527, "loss": 0.1666, "step": 283660 }, { "epoch": 11.75, "grad_norm": 2.1875, "learning_rate": 0.00033352438025972695, "loss": 0.1981, "step": 283670 }, { "epoch": 11.75, "grad_norm": 0.65625, "learning_rate": 0.0003335141581673547, "loss": 0.1728, "step": 283680 }, { "epoch": 11.75, "grad_norm": 0.4375, "learning_rate": 0.00033350393591781793, "loss": 0.1516, "step": 283690 }, { "epoch": 11.75, "grad_norm": 1.5625, "learning_rate": 0.0003334937135111357, "loss": 0.2133, "step": 283700 }, { "epoch": 11.75, "grad_norm": 1.7265625, "learning_rate": 0.0003334834909473272, "loss": 0.2432, "step": 283710 }, { "epoch": 11.75, "grad_norm": 0.625, "learning_rate": 0.00033347326822641193, "loss": 0.1506, "step": 283720 }, { "epoch": 11.75, "grad_norm": 1.03125, "learning_rate": 0.000333463045348409, "loss": 0.2099, "step": 283730 }, { "epoch": 11.75, "grad_norm": 1.21875, "learning_rate": 0.00033345282231333757, "loss": 0.2041, "step": 283740 }, { "epoch": 11.75, "grad_norm": 0.5625, "learning_rate": 0.000333442599121217, "loss": 0.2361, "step": 283750 }, { "epoch": 11.75, "grad_norm": 0.7890625, "learning_rate": 0.0003334323757720664, "loss": 0.2014, "step": 283760 }, { "epoch": 11.75, "grad_norm": 1.03125, "learning_rate": 0.0003334221522659051, "loss": 0.2185, "step": 283770 }, { "epoch": 11.75, "grad_norm": 1.390625, "learning_rate": 0.00033341192860275235, "loss": 0.192, "step": 283780 }, { "epoch": 11.75, "grad_norm": 1.3359375, "learning_rate": 0.0003334017047826273, "loss": 0.2108, "step": 283790 }, { "epoch": 11.75, "grad_norm": 0.51171875, "learning_rate": 0.0003333914808055493, "loss": 0.2319, "step": 283800 }, { "epoch": 11.76, "grad_norm": 0.478515625, "learning_rate": 0.00033338125667153754, "loss": 0.2226, "step": 283810 }, { "epoch": 11.76, "grad_norm": 0.69140625, "learning_rate": 0.0003333710323806112, "loss": 0.1964, "step": 283820 }, { "epoch": 11.76, "grad_norm": 0.74609375, "learning_rate": 0.0003333608079327896, "loss": 0.2038, "step": 283830 }, { "epoch": 11.76, "grad_norm": 0.09765625, "learning_rate": 0.000333350583328092, "loss": 0.2144, "step": 283840 }, { "epoch": 11.76, "grad_norm": 1.609375, "learning_rate": 0.0003333403585665377, "loss": 0.1883, "step": 283850 }, { "epoch": 11.76, "grad_norm": 0.80859375, "learning_rate": 0.0003333301336481456, "loss": 0.2002, "step": 283860 }, { "epoch": 11.76, "grad_norm": 0.84765625, "learning_rate": 0.00033331990857293536, "loss": 0.1927, "step": 283870 }, { "epoch": 11.76, "grad_norm": 0.70703125, "learning_rate": 0.000333309683340926, "loss": 0.1591, "step": 283880 }, { "epoch": 11.76, "grad_norm": 0.7578125, "learning_rate": 0.0003332994579521368, "loss": 0.2033, "step": 283890 }, { "epoch": 11.76, "grad_norm": 2.484375, "learning_rate": 0.00033328923240658704, "loss": 0.2066, "step": 283900 }, { "epoch": 11.76, "grad_norm": 3.15625, "learning_rate": 0.00033327900670429594, "loss": 0.1957, "step": 283910 }, { "epoch": 11.76, "grad_norm": 0.50390625, "learning_rate": 0.0003332687808452827, "loss": 0.1999, "step": 283920 }, { "epoch": 11.76, "grad_norm": 0.78515625, "learning_rate": 0.0003332585548295667, "loss": 0.1773, "step": 283930 }, { "epoch": 11.76, "grad_norm": 0.73046875, "learning_rate": 0.00033324832865716704, "loss": 0.2282, "step": 283940 }, { "epoch": 11.76, "grad_norm": 0.91015625, "learning_rate": 0.00033323810232810305, "loss": 0.1679, "step": 283950 }, { "epoch": 11.76, "grad_norm": 0.9453125, "learning_rate": 0.00033322787584239385, "loss": 0.2002, "step": 283960 }, { "epoch": 11.76, "grad_norm": 1.0625, "learning_rate": 0.0003332176492000588, "loss": 0.2105, "step": 283970 }, { "epoch": 11.76, "grad_norm": 1.375, "learning_rate": 0.0003332074224011172, "loss": 0.2098, "step": 283980 }, { "epoch": 11.76, "grad_norm": 0.88671875, "learning_rate": 0.00033319719544558813, "loss": 0.2217, "step": 283990 }, { "epoch": 11.76, "grad_norm": 0.55859375, "learning_rate": 0.00033318696833349094, "loss": 0.197, "step": 284000 }, { "epoch": 11.76, "grad_norm": 0.1826171875, "learning_rate": 0.0003331767410648449, "loss": 0.1604, "step": 284010 }, { "epoch": 11.76, "grad_norm": 1.0625, "learning_rate": 0.00033316651363966924, "loss": 0.185, "step": 284020 }, { "epoch": 11.76, "grad_norm": 0.53515625, "learning_rate": 0.0003331562860579831, "loss": 0.2197, "step": 284030 }, { "epoch": 11.76, "grad_norm": 0.55078125, "learning_rate": 0.00033314605831980584, "loss": 0.2047, "step": 284040 }, { "epoch": 11.77, "grad_norm": 0.2001953125, "learning_rate": 0.0003331358304251567, "loss": 0.1831, "step": 284050 }, { "epoch": 11.77, "grad_norm": 0.80078125, "learning_rate": 0.00033312560237405486, "loss": 0.2187, "step": 284060 }, { "epoch": 11.77, "grad_norm": 0.56640625, "learning_rate": 0.00033311537416651964, "loss": 0.2019, "step": 284070 }, { "epoch": 11.77, "grad_norm": 0.92578125, "learning_rate": 0.00033310514580257026, "loss": 0.1833, "step": 284080 }, { "epoch": 11.77, "grad_norm": 0.65625, "learning_rate": 0.00033309491728222596, "loss": 0.1821, "step": 284090 }, { "epoch": 11.77, "grad_norm": 0.671875, "learning_rate": 0.000333084688605506, "loss": 0.2168, "step": 284100 }, { "epoch": 11.77, "grad_norm": 0.62109375, "learning_rate": 0.0003330744597724297, "loss": 0.2018, "step": 284110 }, { "epoch": 11.77, "grad_norm": 0.91015625, "learning_rate": 0.0003330642307830161, "loss": 0.2131, "step": 284120 }, { "epoch": 11.77, "grad_norm": 0.71875, "learning_rate": 0.00033305400163728465, "loss": 0.2, "step": 284130 }, { "epoch": 11.77, "grad_norm": 0.8359375, "learning_rate": 0.00033304377233525455, "loss": 0.2067, "step": 284140 }, { "epoch": 11.77, "grad_norm": 0.76953125, "learning_rate": 0.0003330335428769451, "loss": 0.235, "step": 284150 }, { "epoch": 11.77, "grad_norm": 0.50390625, "learning_rate": 0.00033302331326237546, "loss": 0.2111, "step": 284160 }, { "epoch": 11.77, "grad_norm": 1.15625, "learning_rate": 0.00033301308349156485, "loss": 0.1834, "step": 284170 }, { "epoch": 11.77, "grad_norm": 0.3984375, "learning_rate": 0.0003330028535645326, "loss": 0.2504, "step": 284180 }, { "epoch": 11.77, "grad_norm": 1.1484375, "learning_rate": 0.000332992623481298, "loss": 0.2102, "step": 284190 }, { "epoch": 11.77, "grad_norm": 1.0234375, "learning_rate": 0.0003329823932418802, "loss": 0.1625, "step": 284200 }, { "epoch": 11.77, "grad_norm": 1.21875, "learning_rate": 0.00033297216284629847, "loss": 0.1924, "step": 284210 }, { "epoch": 11.77, "grad_norm": 0.98828125, "learning_rate": 0.00033296193229457205, "loss": 0.1975, "step": 284220 }, { "epoch": 11.77, "grad_norm": 0.490234375, "learning_rate": 0.00033295170158672036, "loss": 0.1095, "step": 284230 }, { "epoch": 11.77, "grad_norm": 1.03125, "learning_rate": 0.0003329414707227625, "loss": 0.1456, "step": 284240 }, { "epoch": 11.77, "grad_norm": 1.640625, "learning_rate": 0.00033293123970271765, "loss": 0.1822, "step": 284250 }, { "epoch": 11.77, "grad_norm": 0.62890625, "learning_rate": 0.0003329210085266052, "loss": 0.2151, "step": 284260 }, { "epoch": 11.77, "grad_norm": 0.57421875, "learning_rate": 0.00033291077719444444, "loss": 0.1839, "step": 284270 }, { "epoch": 11.77, "grad_norm": 0.94921875, "learning_rate": 0.0003329005457062545, "loss": 0.1625, "step": 284280 }, { "epoch": 11.78, "grad_norm": 1.2890625, "learning_rate": 0.00033289031406205465, "loss": 0.1973, "step": 284290 }, { "epoch": 11.78, "grad_norm": 0.55859375, "learning_rate": 0.00033288008226186423, "loss": 0.1803, "step": 284300 }, { "epoch": 11.78, "grad_norm": 0.380859375, "learning_rate": 0.0003328698503057025, "loss": 0.1962, "step": 284310 }, { "epoch": 11.78, "grad_norm": 0.609375, "learning_rate": 0.00033285961819358857, "loss": 0.2261, "step": 284320 }, { "epoch": 11.78, "grad_norm": 0.9140625, "learning_rate": 0.0003328493859255418, "loss": 0.2173, "step": 284330 }, { "epoch": 11.78, "grad_norm": 0.85546875, "learning_rate": 0.00033283915350158143, "loss": 0.1737, "step": 284340 }, { "epoch": 11.78, "grad_norm": 0.65234375, "learning_rate": 0.00033282892092172675, "loss": 0.212, "step": 284350 }, { "epoch": 11.78, "grad_norm": 0.353515625, "learning_rate": 0.000332818688185997, "loss": 0.1559, "step": 284360 }, { "epoch": 11.78, "grad_norm": 0.51171875, "learning_rate": 0.0003328084552944114, "loss": 0.1521, "step": 284370 }, { "epoch": 11.78, "grad_norm": 0.87890625, "learning_rate": 0.0003327982222469892, "loss": 0.1782, "step": 284380 }, { "epoch": 11.78, "grad_norm": 0.69140625, "learning_rate": 0.00033278798904374976, "loss": 0.2074, "step": 284390 }, { "epoch": 11.78, "grad_norm": 0.5390625, "learning_rate": 0.00033277775568471216, "loss": 0.2141, "step": 284400 }, { "epoch": 11.78, "grad_norm": 0.87890625, "learning_rate": 0.0003327675221698958, "loss": 0.166, "step": 284410 }, { "epoch": 11.78, "grad_norm": 0.50390625, "learning_rate": 0.0003327572884993199, "loss": 0.1746, "step": 284420 }, { "epoch": 11.78, "grad_norm": 0.66796875, "learning_rate": 0.00033274705467300375, "loss": 0.2, "step": 284430 }, { "epoch": 11.78, "grad_norm": 0.640625, "learning_rate": 0.0003327368206909666, "loss": 0.1504, "step": 284440 }, { "epoch": 11.78, "grad_norm": 0.482421875, "learning_rate": 0.00033272658655322763, "loss": 0.1677, "step": 284450 }, { "epoch": 11.78, "grad_norm": 0.75, "learning_rate": 0.00033271635225980617, "loss": 0.2204, "step": 284460 }, { "epoch": 11.78, "grad_norm": 0.38671875, "learning_rate": 0.0003327061178107215, "loss": 0.2413, "step": 284470 }, { "epoch": 11.78, "grad_norm": 0.9296875, "learning_rate": 0.0003326958832059928, "loss": 0.1954, "step": 284480 }, { "epoch": 11.78, "grad_norm": 0.50390625, "learning_rate": 0.0003326856484456394, "loss": 0.2062, "step": 284490 }, { "epoch": 11.78, "grad_norm": 0.55078125, "learning_rate": 0.00033267541352968055, "loss": 0.1973, "step": 284500 }, { "epoch": 11.78, "grad_norm": 0.474609375, "learning_rate": 0.0003326651784581355, "loss": 0.1556, "step": 284510 }, { "epoch": 11.78, "grad_norm": 1.0, "learning_rate": 0.00033265494323102355, "loss": 0.1893, "step": 284520 }, { "epoch": 11.79, "grad_norm": 0.91796875, "learning_rate": 0.00033264470784836385, "loss": 0.1971, "step": 284530 }, { "epoch": 11.79, "grad_norm": 0.0, "learning_rate": 0.0003326344723101758, "loss": 0.2048, "step": 284540 }, { "epoch": 11.79, "grad_norm": 0.494140625, "learning_rate": 0.0003326242366164786, "loss": 0.1891, "step": 284550 }, { "epoch": 11.79, "grad_norm": 0.91015625, "learning_rate": 0.00033261400076729135, "loss": 0.1811, "step": 284560 }, { "epoch": 11.79, "grad_norm": 0.8828125, "learning_rate": 0.00033260376476263365, "loss": 0.2012, "step": 284570 }, { "epoch": 11.79, "grad_norm": 0.75, "learning_rate": 0.00033259352860252455, "loss": 0.1969, "step": 284580 }, { "epoch": 11.79, "grad_norm": 0.5859375, "learning_rate": 0.0003325832922869833, "loss": 0.1831, "step": 284590 }, { "epoch": 11.79, "grad_norm": 0.81640625, "learning_rate": 0.0003325730558160293, "loss": 0.1961, "step": 284600 }, { "epoch": 11.79, "grad_norm": 1.6640625, "learning_rate": 0.00033256281918968164, "loss": 0.2196, "step": 284610 }, { "epoch": 11.79, "grad_norm": 0.765625, "learning_rate": 0.00033255258240795965, "loss": 0.1666, "step": 284620 }, { "epoch": 11.79, "grad_norm": 0.58203125, "learning_rate": 0.0003325423454708827, "loss": 0.208, "step": 284630 }, { "epoch": 11.79, "grad_norm": 1.1171875, "learning_rate": 0.0003325321083784699, "loss": 0.2408, "step": 284640 }, { "epoch": 11.79, "grad_norm": 0.6015625, "learning_rate": 0.00033252187113074063, "loss": 0.2086, "step": 284650 }, { "epoch": 11.79, "grad_norm": 0.5234375, "learning_rate": 0.0003325116337277141, "loss": 0.1707, "step": 284660 }, { "epoch": 11.79, "grad_norm": 0.60546875, "learning_rate": 0.0003325013961694095, "loss": 0.1987, "step": 284670 }, { "epoch": 11.79, "grad_norm": 0.30859375, "learning_rate": 0.0003324911584558464, "loss": 0.197, "step": 284680 }, { "epoch": 11.79, "grad_norm": 0.53125, "learning_rate": 0.0003324809205870436, "loss": 0.2464, "step": 284690 }, { "epoch": 11.79, "grad_norm": 0.62109375, "learning_rate": 0.00033247068256302077, "loss": 0.2019, "step": 284700 }, { "epoch": 11.79, "grad_norm": 1.171875, "learning_rate": 0.00033246044438379704, "loss": 0.1434, "step": 284710 }, { "epoch": 11.79, "grad_norm": 1.0234375, "learning_rate": 0.0003324502060493915, "loss": 0.1907, "step": 284720 }, { "epoch": 11.79, "grad_norm": 0.62890625, "learning_rate": 0.00033243996755982373, "loss": 0.1933, "step": 284730 }, { "epoch": 11.79, "grad_norm": 1.0859375, "learning_rate": 0.00033242972891511276, "loss": 0.1885, "step": 284740 }, { "epoch": 11.79, "grad_norm": 0.412109375, "learning_rate": 0.00033241949011527793, "loss": 0.151, "step": 284750 }, { "epoch": 11.79, "grad_norm": 1.4453125, "learning_rate": 0.0003324092511603386, "loss": 0.1653, "step": 284760 }, { "epoch": 11.8, "grad_norm": 0.4453125, "learning_rate": 0.00033239901205031386, "loss": 0.1642, "step": 284770 }, { "epoch": 11.8, "grad_norm": 0.69921875, "learning_rate": 0.00033238877278522316, "loss": 0.172, "step": 284780 }, { "epoch": 11.8, "grad_norm": 0.58203125, "learning_rate": 0.00033237853336508575, "loss": 0.2095, "step": 284790 }, { "epoch": 11.8, "grad_norm": 0.58203125, "learning_rate": 0.00033236829378992065, "loss": 0.1511, "step": 284800 }, { "epoch": 11.8, "grad_norm": 0.96875, "learning_rate": 0.00033235805405974745, "loss": 0.1401, "step": 284810 }, { "epoch": 11.8, "grad_norm": 0.53125, "learning_rate": 0.00033234781417458526, "loss": 0.2176, "step": 284820 }, { "epoch": 11.8, "grad_norm": 0.97265625, "learning_rate": 0.0003323375741344533, "loss": 0.2452, "step": 284830 }, { "epoch": 11.8, "grad_norm": 2.453125, "learning_rate": 0.0003323273339393711, "loss": 0.2193, "step": 284840 }, { "epoch": 11.8, "grad_norm": 0.73828125, "learning_rate": 0.0003323170935893575, "loss": 0.1688, "step": 284850 }, { "epoch": 11.8, "grad_norm": 0.921875, "learning_rate": 0.0003323068530844322, "loss": 0.2298, "step": 284860 }, { "epoch": 11.8, "grad_norm": 0.287109375, "learning_rate": 0.00033229661242461427, "loss": 0.1494, "step": 284870 }, { "epoch": 11.8, "grad_norm": 3.171875, "learning_rate": 0.0003322863716099229, "loss": 0.2416, "step": 284880 }, { "epoch": 11.8, "grad_norm": 1.3984375, "learning_rate": 0.00033227613064037764, "loss": 0.1958, "step": 284890 }, { "epoch": 11.8, "grad_norm": 0.7109375, "learning_rate": 0.00033226588951599747, "loss": 0.2171, "step": 284900 }, { "epoch": 11.8, "grad_norm": 0.453125, "learning_rate": 0.00033225564823680174, "loss": 0.1708, "step": 284910 }, { "epoch": 11.8, "grad_norm": 0.6796875, "learning_rate": 0.0003322454068028099, "loss": 0.1945, "step": 284920 }, { "epoch": 11.8, "grad_norm": 0.51953125, "learning_rate": 0.0003322351652140409, "loss": 0.2008, "step": 284930 }, { "epoch": 11.8, "grad_norm": 0.29296875, "learning_rate": 0.00033222492347051436, "loss": 0.2073, "step": 284940 }, { "epoch": 11.8, "grad_norm": 1.265625, "learning_rate": 0.0003322146815722494, "loss": 0.212, "step": 284950 }, { "epoch": 11.8, "grad_norm": 0.87890625, "learning_rate": 0.0003322044395192652, "loss": 0.2305, "step": 284960 }, { "epoch": 11.8, "grad_norm": 0.44140625, "learning_rate": 0.00033219419731158115, "loss": 0.216, "step": 284970 }, { "epoch": 11.8, "grad_norm": 0.61328125, "learning_rate": 0.0003321839549492165, "loss": 0.1597, "step": 284980 }, { "epoch": 11.8, "grad_norm": 0.76171875, "learning_rate": 0.00033217371243219045, "loss": 0.2057, "step": 284990 }, { "epoch": 11.8, "grad_norm": 1.3671875, "learning_rate": 0.0003321634697605225, "loss": 0.2386, "step": 285000 }, { "epoch": 11.81, "grad_norm": 0.6328125, "learning_rate": 0.00033215322693423157, "loss": 0.1398, "step": 285010 }, { "epoch": 11.81, "grad_norm": 0.84765625, "learning_rate": 0.0003321429839533373, "loss": 0.1765, "step": 285020 }, { "epoch": 11.81, "grad_norm": 0.55078125, "learning_rate": 0.00033213274081785884, "loss": 0.133, "step": 285030 }, { "epoch": 11.81, "grad_norm": 0.498046875, "learning_rate": 0.00033212249752781524, "loss": 0.1955, "step": 285040 }, { "epoch": 11.81, "grad_norm": 0.98046875, "learning_rate": 0.00033211225408322616, "loss": 0.1632, "step": 285050 }, { "epoch": 11.81, "grad_norm": 0.8984375, "learning_rate": 0.00033210201048411057, "loss": 0.1784, "step": 285060 }, { "epoch": 11.81, "grad_norm": 0.28515625, "learning_rate": 0.0003320917667304879, "loss": 0.1728, "step": 285070 }, { "epoch": 11.81, "grad_norm": 0.64453125, "learning_rate": 0.0003320815228223774, "loss": 0.2132, "step": 285080 }, { "epoch": 11.81, "grad_norm": 0.8828125, "learning_rate": 0.00033207127875979826, "loss": 0.2335, "step": 285090 }, { "epoch": 11.81, "grad_norm": 1.5625, "learning_rate": 0.00033206103454276993, "loss": 0.1901, "step": 285100 }, { "epoch": 11.81, "grad_norm": 0.62890625, "learning_rate": 0.00033205079017131154, "loss": 0.1958, "step": 285110 }, { "epoch": 11.81, "grad_norm": 0.73046875, "learning_rate": 0.0003320405456454425, "loss": 0.1789, "step": 285120 }, { "epoch": 11.81, "grad_norm": 0.50390625, "learning_rate": 0.00033203030096518195, "loss": 0.1882, "step": 285130 }, { "epoch": 11.81, "grad_norm": 1.1796875, "learning_rate": 0.00033202005613054925, "loss": 0.2179, "step": 285140 }, { "epoch": 11.81, "grad_norm": 0.69921875, "learning_rate": 0.00033200981114156367, "loss": 0.1793, "step": 285150 }, { "epoch": 11.81, "grad_norm": 0.796875, "learning_rate": 0.0003319995659982445, "loss": 0.181, "step": 285160 }, { "epoch": 11.81, "grad_norm": 0.64453125, "learning_rate": 0.0003319893207006109, "loss": 0.1821, "step": 285170 }, { "epoch": 11.81, "grad_norm": 1.515625, "learning_rate": 0.0003319790752486824, "loss": 0.1768, "step": 285180 }, { "epoch": 11.81, "grad_norm": 0.96875, "learning_rate": 0.000331968829642478, "loss": 0.1992, "step": 285190 }, { "epoch": 11.81, "grad_norm": 1.359375, "learning_rate": 0.00033195858388201715, "loss": 0.1712, "step": 285200 }, { "epoch": 11.81, "grad_norm": 0.66015625, "learning_rate": 0.00033194833796731916, "loss": 0.17, "step": 285210 }, { "epoch": 11.81, "grad_norm": 0.042724609375, "learning_rate": 0.00033193809189840316, "loss": 0.1694, "step": 285220 }, { "epoch": 11.81, "grad_norm": 0.54296875, "learning_rate": 0.00033192784567528857, "loss": 0.1918, "step": 285230 }, { "epoch": 11.81, "grad_norm": 1.4609375, "learning_rate": 0.00033191759929799457, "loss": 0.177, "step": 285240 }, { "epoch": 11.82, "grad_norm": 0.96875, "learning_rate": 0.0003319073527665406, "loss": 0.2009, "step": 285250 }, { "epoch": 11.82, "grad_norm": 0.74609375, "learning_rate": 0.00033189710608094584, "loss": 0.2051, "step": 285260 }, { "epoch": 11.82, "grad_norm": 0.396484375, "learning_rate": 0.0003318868592412294, "loss": 0.1712, "step": 285270 }, { "epoch": 11.82, "grad_norm": 0.94140625, "learning_rate": 0.0003318766122474109, "loss": 0.1994, "step": 285280 }, { "epoch": 11.82, "grad_norm": 0.83203125, "learning_rate": 0.0003318663650995094, "loss": 0.2271, "step": 285290 }, { "epoch": 11.82, "grad_norm": 1.1171875, "learning_rate": 0.00033185611779754424, "loss": 0.1777, "step": 285300 }, { "epoch": 11.82, "grad_norm": 1.03125, "learning_rate": 0.0003318458703415347, "loss": 0.161, "step": 285310 }, { "epoch": 11.82, "grad_norm": 1.3046875, "learning_rate": 0.00033183562273150004, "loss": 0.2203, "step": 285320 }, { "epoch": 11.82, "grad_norm": 0.349609375, "learning_rate": 0.00033182537496745964, "loss": 0.1899, "step": 285330 }, { "epoch": 11.82, "grad_norm": 1.9921875, "learning_rate": 0.0003318151270494327, "loss": 0.1928, "step": 285340 }, { "epoch": 11.82, "grad_norm": 1.7265625, "learning_rate": 0.0003318048789774385, "loss": 0.1762, "step": 285350 }, { "epoch": 11.82, "grad_norm": 0.451171875, "learning_rate": 0.0003317946307514964, "loss": 0.2205, "step": 285360 }, { "epoch": 11.82, "grad_norm": 0.68359375, "learning_rate": 0.00033178438237162563, "loss": 0.1382, "step": 285370 }, { "epoch": 11.82, "grad_norm": 1.3203125, "learning_rate": 0.00033177413383784544, "loss": 0.2119, "step": 285380 }, { "epoch": 11.82, "grad_norm": 0.625, "learning_rate": 0.0003317638851501752, "loss": 0.2002, "step": 285390 }, { "epoch": 11.82, "grad_norm": 0.5859375, "learning_rate": 0.0003317536363086341, "loss": 0.2026, "step": 285400 }, { "epoch": 11.82, "grad_norm": 1.0546875, "learning_rate": 0.0003317433873132415, "loss": 0.2505, "step": 285410 }, { "epoch": 11.82, "grad_norm": 0.796875, "learning_rate": 0.0003317331381640168, "loss": 0.1881, "step": 285420 }, { "epoch": 11.82, "grad_norm": 0.462890625, "learning_rate": 0.000331722888860979, "loss": 0.1881, "step": 285430 }, { "epoch": 11.82, "grad_norm": 0.875, "learning_rate": 0.00033171263940414764, "loss": 0.1604, "step": 285440 }, { "epoch": 11.82, "grad_norm": 0.8671875, "learning_rate": 0.00033170238979354186, "loss": 0.2006, "step": 285450 }, { "epoch": 11.82, "grad_norm": 0.9765625, "learning_rate": 0.0003316921400291811, "loss": 0.2638, "step": 285460 }, { "epoch": 11.82, "grad_norm": 0.59375, "learning_rate": 0.0003316818901110844, "loss": 0.2076, "step": 285470 }, { "epoch": 11.82, "grad_norm": 0.296875, "learning_rate": 0.00033167164003927133, "loss": 0.1753, "step": 285480 }, { "epoch": 11.82, "grad_norm": 1.71875, "learning_rate": 0.000331661389813761, "loss": 0.2023, "step": 285490 }, { "epoch": 11.83, "grad_norm": 0.48046875, "learning_rate": 0.0003316511394345728, "loss": 0.1631, "step": 285500 }, { "epoch": 11.83, "grad_norm": 1.03125, "learning_rate": 0.00033164088890172595, "loss": 0.21, "step": 285510 }, { "epoch": 11.83, "grad_norm": 0.53515625, "learning_rate": 0.0003316306382152397, "loss": 0.1583, "step": 285520 }, { "epoch": 11.83, "grad_norm": 0.625, "learning_rate": 0.00033162038737513346, "loss": 0.1577, "step": 285530 }, { "epoch": 11.83, "grad_norm": 0.59375, "learning_rate": 0.00033161013638142653, "loss": 0.1998, "step": 285540 }, { "epoch": 11.83, "grad_norm": 0.41015625, "learning_rate": 0.000331599885234138, "loss": 0.159, "step": 285550 }, { "epoch": 11.83, "grad_norm": 1.3359375, "learning_rate": 0.00033158963393328735, "loss": 0.2071, "step": 285560 }, { "epoch": 11.83, "grad_norm": 1.1015625, "learning_rate": 0.00033157938247889386, "loss": 0.1518, "step": 285570 }, { "epoch": 11.83, "grad_norm": 1.09375, "learning_rate": 0.0003315691308709768, "loss": 0.1872, "step": 285580 }, { "epoch": 11.83, "grad_norm": 0.96484375, "learning_rate": 0.0003315588791095554, "loss": 0.1726, "step": 285590 }, { "epoch": 11.83, "grad_norm": 0.6953125, "learning_rate": 0.00033154862719464895, "loss": 0.1964, "step": 285600 }, { "epoch": 11.83, "grad_norm": 0.9453125, "learning_rate": 0.0003315383751262768, "loss": 0.1361, "step": 285610 }, { "epoch": 11.83, "grad_norm": 0.23828125, "learning_rate": 0.00033152812290445835, "loss": 0.1754, "step": 285620 }, { "epoch": 11.83, "grad_norm": 0.73828125, "learning_rate": 0.00033151787052921266, "loss": 0.198, "step": 285630 }, { "epoch": 11.83, "grad_norm": 1.3046875, "learning_rate": 0.0003315076180005592, "loss": 0.2194, "step": 285640 }, { "epoch": 11.83, "grad_norm": 0.5859375, "learning_rate": 0.0003314973653185172, "loss": 0.2044, "step": 285650 }, { "epoch": 11.83, "grad_norm": 0.69140625, "learning_rate": 0.00033148711248310594, "loss": 0.1794, "step": 285660 }, { "epoch": 11.83, "grad_norm": 1.3984375, "learning_rate": 0.00033147685949434476, "loss": 0.2319, "step": 285670 }, { "epoch": 11.83, "grad_norm": 0.62109375, "learning_rate": 0.0003314666063522529, "loss": 0.183, "step": 285680 }, { "epoch": 11.83, "grad_norm": 0.77734375, "learning_rate": 0.0003314563530568497, "loss": 0.1975, "step": 285690 }, { "epoch": 11.83, "grad_norm": 0.333984375, "learning_rate": 0.0003314460996081544, "loss": 0.174, "step": 285700 }, { "epoch": 11.83, "grad_norm": 2.65625, "learning_rate": 0.00033143584600618637, "loss": 0.215, "step": 285710 }, { "epoch": 11.83, "grad_norm": 0.78125, "learning_rate": 0.0003314255922509649, "loss": 0.1817, "step": 285720 }, { "epoch": 11.83, "grad_norm": 0.7265625, "learning_rate": 0.00033141533834250917, "loss": 0.2044, "step": 285730 }, { "epoch": 11.84, "grad_norm": 0.0, "learning_rate": 0.00033140508428083866, "loss": 0.187, "step": 285740 }, { "epoch": 11.84, "grad_norm": 0.384765625, "learning_rate": 0.0003313948300659726, "loss": 0.1449, "step": 285750 }, { "epoch": 11.84, "grad_norm": 0.4140625, "learning_rate": 0.00033138457569793013, "loss": 0.2491, "step": 285760 }, { "epoch": 11.84, "grad_norm": 0.53515625, "learning_rate": 0.0003313743211767307, "loss": 0.1748, "step": 285770 }, { "epoch": 11.84, "grad_norm": 0.28125, "learning_rate": 0.0003313640665023937, "loss": 0.2026, "step": 285780 }, { "epoch": 11.84, "grad_norm": 0.890625, "learning_rate": 0.0003313538116749382, "loss": 0.2035, "step": 285790 }, { "epoch": 11.84, "grad_norm": 2.8125, "learning_rate": 0.0003313435566943837, "loss": 0.2039, "step": 285800 }, { "epoch": 11.84, "grad_norm": 1.1484375, "learning_rate": 0.00033133330156074936, "loss": 0.162, "step": 285810 }, { "epoch": 11.84, "grad_norm": 1.9140625, "learning_rate": 0.00033132304627405453, "loss": 0.1993, "step": 285820 }, { "epoch": 11.84, "grad_norm": 1.109375, "learning_rate": 0.00033131279083431855, "loss": 0.1947, "step": 285830 }, { "epoch": 11.84, "grad_norm": 0.232421875, "learning_rate": 0.0003313025352415606, "loss": 0.2471, "step": 285840 }, { "epoch": 11.84, "grad_norm": 0.86328125, "learning_rate": 0.0003312922794958001, "loss": 0.2136, "step": 285850 }, { "epoch": 11.84, "grad_norm": 1.296875, "learning_rate": 0.0003312820235970564, "loss": 0.1598, "step": 285860 }, { "epoch": 11.84, "grad_norm": 1.03125, "learning_rate": 0.00033127176754534847, "loss": 0.2199, "step": 285870 }, { "epoch": 11.84, "grad_norm": 1.9296875, "learning_rate": 0.0003312615113406961, "loss": 0.1793, "step": 285880 }, { "epoch": 11.84, "grad_norm": 1.9921875, "learning_rate": 0.0003312512549831182, "loss": 0.216, "step": 285890 }, { "epoch": 11.84, "grad_norm": 0.671875, "learning_rate": 0.0003312409984726342, "loss": 0.243, "step": 285900 }, { "epoch": 11.84, "grad_norm": 1.4609375, "learning_rate": 0.00033123074180926355, "loss": 0.1679, "step": 285910 }, { "epoch": 11.84, "grad_norm": 2.0625, "learning_rate": 0.0003312204849930253, "loss": 0.1894, "step": 285920 }, { "epoch": 11.84, "grad_norm": 0.5078125, "learning_rate": 0.0003312102280239389, "loss": 0.1741, "step": 285930 }, { "epoch": 11.84, "grad_norm": 0.51953125, "learning_rate": 0.0003311999709020237, "loss": 0.1756, "step": 285940 }, { "epoch": 11.84, "grad_norm": 0.84375, "learning_rate": 0.00033118971362729876, "loss": 0.181, "step": 285950 }, { "epoch": 11.84, "grad_norm": 0.66796875, "learning_rate": 0.0003311794561997837, "loss": 0.2138, "step": 285960 }, { "epoch": 11.84, "grad_norm": 0.431640625, "learning_rate": 0.00033116919861949756, "loss": 0.171, "step": 285970 }, { "epoch": 11.85, "grad_norm": 1.046875, "learning_rate": 0.0003311589408864598, "loss": 0.1854, "step": 285980 }, { "epoch": 11.85, "grad_norm": 0.69140625, "learning_rate": 0.0003311486830006897, "loss": 0.2566, "step": 285990 }, { "epoch": 11.85, "grad_norm": 1.1171875, "learning_rate": 0.00033113842496220646, "loss": 0.211, "step": 286000 }, { "epoch": 11.85, "grad_norm": 0.60546875, "learning_rate": 0.0003311281667710296, "loss": 0.1915, "step": 286010 }, { "epoch": 11.85, "grad_norm": 0.83203125, "learning_rate": 0.0003311179084271782, "loss": 0.2002, "step": 286020 }, { "epoch": 11.85, "grad_norm": 1.140625, "learning_rate": 0.00033110764993067165, "loss": 0.1787, "step": 286030 }, { "epoch": 11.85, "grad_norm": 0.8671875, "learning_rate": 0.00033109739128152934, "loss": 0.1916, "step": 286040 }, { "epoch": 11.85, "grad_norm": 0.578125, "learning_rate": 0.0003310871324797704, "loss": 0.2008, "step": 286050 }, { "epoch": 11.85, "grad_norm": 0.64453125, "learning_rate": 0.00033107687352541425, "loss": 0.1537, "step": 286060 }, { "epoch": 11.85, "grad_norm": 0.9375, "learning_rate": 0.00033106661441848026, "loss": 0.1879, "step": 286070 }, { "epoch": 11.85, "grad_norm": 1.2890625, "learning_rate": 0.00033105635515898754, "loss": 0.2099, "step": 286080 }, { "epoch": 11.85, "grad_norm": 0.98828125, "learning_rate": 0.0003310460957469556, "loss": 0.2204, "step": 286090 }, { "epoch": 11.85, "grad_norm": 0.51953125, "learning_rate": 0.0003310358361824036, "loss": 0.1381, "step": 286100 }, { "epoch": 11.85, "grad_norm": 0.76171875, "learning_rate": 0.0003310255764653509, "loss": 0.2278, "step": 286110 }, { "epoch": 11.85, "grad_norm": 0.44140625, "learning_rate": 0.000331015316595817, "loss": 0.1611, "step": 286120 }, { "epoch": 11.85, "grad_norm": 0.91015625, "learning_rate": 0.0003310050565738208, "loss": 0.1843, "step": 286130 }, { "epoch": 11.85, "grad_norm": 0.5859375, "learning_rate": 0.0003309947963993819, "loss": 0.1345, "step": 286140 }, { "epoch": 11.85, "grad_norm": 1.5078125, "learning_rate": 0.00033098453607251953, "loss": 0.2503, "step": 286150 }, { "epoch": 11.85, "grad_norm": 0.6953125, "learning_rate": 0.000330974275593253, "loss": 0.1881, "step": 286160 }, { "epoch": 11.85, "grad_norm": 1.1015625, "learning_rate": 0.0003309640149616017, "loss": 0.1827, "step": 286170 }, { "epoch": 11.85, "grad_norm": 0.369140625, "learning_rate": 0.00033095375417758484, "loss": 0.1719, "step": 286180 }, { "epoch": 11.85, "grad_norm": 0.97265625, "learning_rate": 0.0003309434932412217, "loss": 0.1942, "step": 286190 }, { "epoch": 11.85, "grad_norm": 0.25390625, "learning_rate": 0.00033093323215253173, "loss": 0.215, "step": 286200 }, { "epoch": 11.85, "grad_norm": 0.291015625, "learning_rate": 0.0003309229709115341, "loss": 0.1885, "step": 286210 }, { "epoch": 11.86, "grad_norm": 1.09375, "learning_rate": 0.0003309127095182482, "loss": 0.1651, "step": 286220 }, { "epoch": 11.86, "grad_norm": 0.2138671875, "learning_rate": 0.00033090244797269333, "loss": 0.1876, "step": 286230 }, { "epoch": 11.86, "grad_norm": 0.435546875, "learning_rate": 0.0003308921862748887, "loss": 0.1655, "step": 286240 }, { "epoch": 11.86, "grad_norm": 0.48046875, "learning_rate": 0.0003308819244248538, "loss": 0.2157, "step": 286250 }, { "epoch": 11.86, "grad_norm": 0.265625, "learning_rate": 0.00033087166242260787, "loss": 0.1871, "step": 286260 }, { "epoch": 11.86, "grad_norm": 0.953125, "learning_rate": 0.0003308614002681701, "loss": 0.1997, "step": 286270 }, { "epoch": 11.86, "grad_norm": 0.90625, "learning_rate": 0.0003308511379615601, "loss": 0.2045, "step": 286280 }, { "epoch": 11.86, "grad_norm": 0.92578125, "learning_rate": 0.0003308408755027968, "loss": 0.2061, "step": 286290 }, { "epoch": 11.86, "grad_norm": 0.70703125, "learning_rate": 0.0003308306128918998, "loss": 0.1844, "step": 286300 }, { "epoch": 11.86, "grad_norm": 0.6484375, "learning_rate": 0.0003308203501288883, "loss": 0.2154, "step": 286310 }, { "epoch": 11.86, "grad_norm": 0.97265625, "learning_rate": 0.0003308100872137815, "loss": 0.205, "step": 286320 }, { "epoch": 11.86, "grad_norm": 0.61328125, "learning_rate": 0.00033079982414659906, "loss": 0.2179, "step": 286330 }, { "epoch": 11.86, "grad_norm": 0.9453125, "learning_rate": 0.0003307895609273599, "loss": 0.1948, "step": 286340 }, { "epoch": 11.86, "grad_norm": 1.6640625, "learning_rate": 0.0003307792975560836, "loss": 0.1682, "step": 286350 }, { "epoch": 11.86, "grad_norm": 1.15625, "learning_rate": 0.00033076903403278935, "loss": 0.1873, "step": 286360 }, { "epoch": 11.86, "grad_norm": 0.9296875, "learning_rate": 0.0003307587703574965, "loss": 0.1966, "step": 286370 }, { "epoch": 11.86, "grad_norm": 0.58984375, "learning_rate": 0.0003307485065302244, "loss": 0.2145, "step": 286380 }, { "epoch": 11.86, "grad_norm": 0.427734375, "learning_rate": 0.0003307382425509924, "loss": 0.1964, "step": 286390 }, { "epoch": 11.86, "grad_norm": 0.6015625, "learning_rate": 0.00033072797841981956, "loss": 0.2089, "step": 286400 }, { "epoch": 11.86, "grad_norm": 0.484375, "learning_rate": 0.0003307177141367255, "loss": 0.1539, "step": 286410 }, { "epoch": 11.86, "grad_norm": 0.6796875, "learning_rate": 0.0003307074497017294, "loss": 0.2044, "step": 286420 }, { "epoch": 11.86, "grad_norm": 2.609375, "learning_rate": 0.0003306971851148506, "loss": 0.1545, "step": 286430 }, { "epoch": 11.86, "grad_norm": 0.78125, "learning_rate": 0.0003306869203761084, "loss": 0.2203, "step": 286440 }, { "epoch": 11.86, "grad_norm": 0.6171875, "learning_rate": 0.00033067665548552216, "loss": 0.1929, "step": 286450 }, { "epoch": 11.87, "grad_norm": 0.59375, "learning_rate": 0.00033066639044311113, "loss": 0.1822, "step": 286460 }, { "epoch": 11.87, "grad_norm": 0.83984375, "learning_rate": 0.0003306561252488947, "loss": 0.169, "step": 286470 }, { "epoch": 11.87, "grad_norm": 0.7109375, "learning_rate": 0.00033064585990289207, "loss": 0.1633, "step": 286480 }, { "epoch": 11.87, "grad_norm": 0.384765625, "learning_rate": 0.00033063559440512275, "loss": 0.1358, "step": 286490 }, { "epoch": 11.87, "grad_norm": 1.0234375, "learning_rate": 0.00033062532875560594, "loss": 0.1964, "step": 286500 }, { "epoch": 11.87, "grad_norm": 0.86328125, "learning_rate": 0.00033061506295436083, "loss": 0.236, "step": 286510 }, { "epoch": 11.87, "grad_norm": 0.48828125, "learning_rate": 0.00033060479700140706, "loss": 0.1792, "step": 286520 }, { "epoch": 11.87, "grad_norm": 1.3515625, "learning_rate": 0.0003305945308967636, "loss": 0.2339, "step": 286530 }, { "epoch": 11.87, "grad_norm": 0.408203125, "learning_rate": 0.00033058426464045005, "loss": 0.2255, "step": 286540 }, { "epoch": 11.87, "grad_norm": 0.5546875, "learning_rate": 0.00033057399823248557, "loss": 0.2187, "step": 286550 }, { "epoch": 11.87, "grad_norm": 0.8671875, "learning_rate": 0.00033056373167288957, "loss": 0.1954, "step": 286560 }, { "epoch": 11.87, "grad_norm": 0.8984375, "learning_rate": 0.00033055346496168133, "loss": 0.2347, "step": 286570 }, { "epoch": 11.87, "grad_norm": 0.462890625, "learning_rate": 0.0003305431980988801, "loss": 0.186, "step": 286580 }, { "epoch": 11.87, "grad_norm": 0.7578125, "learning_rate": 0.0003305329310845053, "loss": 0.1636, "step": 286590 }, { "epoch": 11.87, "grad_norm": 0.5546875, "learning_rate": 0.0003305226639185763, "loss": 0.2043, "step": 286600 }, { "epoch": 11.87, "grad_norm": 1.4375, "learning_rate": 0.0003305123966011122, "loss": 0.211, "step": 286610 }, { "epoch": 11.87, "grad_norm": 0.8671875, "learning_rate": 0.0003305021291321325, "loss": 0.1821, "step": 286620 }, { "epoch": 11.87, "grad_norm": 0.953125, "learning_rate": 0.00033049186151165653, "loss": 0.1708, "step": 286630 }, { "epoch": 11.87, "grad_norm": 1.0078125, "learning_rate": 0.00033048159373970354, "loss": 0.1858, "step": 286640 }, { "epoch": 11.87, "grad_norm": 0.515625, "learning_rate": 0.000330471325816293, "loss": 0.1772, "step": 286650 }, { "epoch": 11.87, "grad_norm": 0.703125, "learning_rate": 0.000330461057741444, "loss": 0.1804, "step": 286660 }, { "epoch": 11.87, "grad_norm": 0.98046875, "learning_rate": 0.00033045078951517593, "loss": 0.1764, "step": 286670 }, { "epoch": 11.87, "grad_norm": 0.90234375, "learning_rate": 0.0003304405211375083, "loss": 0.2199, "step": 286680 }, { "epoch": 11.87, "grad_norm": 0.93359375, "learning_rate": 0.0003304302526084602, "loss": 0.164, "step": 286690 }, { "epoch": 11.88, "grad_norm": 0.8515625, "learning_rate": 0.00033041998392805105, "loss": 0.1615, "step": 286700 }, { "epoch": 11.88, "grad_norm": 1.125, "learning_rate": 0.0003304097150963002, "loss": 0.1798, "step": 286710 }, { "epoch": 11.88, "grad_norm": 1.109375, "learning_rate": 0.0003303994461132269, "loss": 0.1366, "step": 286720 }, { "epoch": 11.88, "grad_norm": 0.5546875, "learning_rate": 0.0003303891769788506, "loss": 0.1164, "step": 286730 }, { "epoch": 11.88, "grad_norm": 1.75, "learning_rate": 0.00033037890769319056, "loss": 0.2113, "step": 286740 }, { "epoch": 11.88, "grad_norm": 0.85546875, "learning_rate": 0.00033036863825626603, "loss": 0.2133, "step": 286750 }, { "epoch": 11.88, "grad_norm": 1.0546875, "learning_rate": 0.00033035836866809643, "loss": 0.2024, "step": 286760 }, { "epoch": 11.88, "grad_norm": 0.69921875, "learning_rate": 0.00033034809892870106, "loss": 0.1706, "step": 286770 }, { "epoch": 11.88, "grad_norm": 1.6328125, "learning_rate": 0.0003303378290380993, "loss": 0.1858, "step": 286780 }, { "epoch": 11.88, "grad_norm": 0.96875, "learning_rate": 0.0003303275589963103, "loss": 0.175, "step": 286790 }, { "epoch": 11.88, "grad_norm": 0.75390625, "learning_rate": 0.0003303172888033535, "loss": 0.2091, "step": 286800 }, { "epoch": 11.88, "grad_norm": 0.78515625, "learning_rate": 0.00033030701845924835, "loss": 0.1827, "step": 286810 }, { "epoch": 11.88, "grad_norm": 0.640625, "learning_rate": 0.000330296747964014, "loss": 0.2159, "step": 286820 }, { "epoch": 11.88, "grad_norm": 1.0234375, "learning_rate": 0.00033028647731766993, "loss": 0.1273, "step": 286830 }, { "epoch": 11.88, "grad_norm": 0.376953125, "learning_rate": 0.0003302762065202353, "loss": 0.211, "step": 286840 }, { "epoch": 11.88, "grad_norm": 0.6875, "learning_rate": 0.0003302659355717295, "loss": 0.1905, "step": 286850 }, { "epoch": 11.88, "grad_norm": 0.443359375, "learning_rate": 0.00033025566447217195, "loss": 0.1381, "step": 286860 }, { "epoch": 11.88, "grad_norm": 0.671875, "learning_rate": 0.0003302453932215819, "loss": 0.1654, "step": 286870 }, { "epoch": 11.88, "grad_norm": 1.0078125, "learning_rate": 0.0003302351218199786, "loss": 0.2006, "step": 286880 }, { "epoch": 11.88, "grad_norm": 0.734375, "learning_rate": 0.00033022485026738157, "loss": 0.1933, "step": 286890 }, { "epoch": 11.88, "grad_norm": 0.62109375, "learning_rate": 0.00033021457856380993, "loss": 0.1978, "step": 286900 }, { "epoch": 11.88, "grad_norm": 0.65625, "learning_rate": 0.00033020430670928315, "loss": 0.2022, "step": 286910 }, { "epoch": 11.88, "grad_norm": 0.6015625, "learning_rate": 0.0003301940347038205, "loss": 0.1728, "step": 286920 }, { "epoch": 11.88, "grad_norm": 0.73046875, "learning_rate": 0.00033018376254744143, "loss": 0.235, "step": 286930 }, { "epoch": 11.89, "grad_norm": 0.412109375, "learning_rate": 0.0003301734902401652, "loss": 0.158, "step": 286940 }, { "epoch": 11.89, "grad_norm": 1.1328125, "learning_rate": 0.000330163217782011, "loss": 0.1489, "step": 286950 }, { "epoch": 11.89, "grad_norm": 0.4609375, "learning_rate": 0.0003301529451729983, "loss": 0.185, "step": 286960 }, { "epoch": 11.89, "grad_norm": 0.357421875, "learning_rate": 0.00033014267241314644, "loss": 0.1585, "step": 286970 }, { "epoch": 11.89, "grad_norm": 0.375, "learning_rate": 0.00033013239950247474, "loss": 0.2215, "step": 286980 }, { "epoch": 11.89, "grad_norm": 1.71875, "learning_rate": 0.00033012212644100255, "loss": 0.2091, "step": 286990 }, { "epoch": 11.89, "grad_norm": 1.0390625, "learning_rate": 0.0003301118532287491, "loss": 0.1615, "step": 287000 }, { "epoch": 11.89, "grad_norm": 0.87109375, "learning_rate": 0.00033010157986573373, "loss": 0.2164, "step": 287010 }, { "epoch": 11.89, "grad_norm": 2.0625, "learning_rate": 0.00033009130635197604, "loss": 0.1977, "step": 287020 }, { "epoch": 11.89, "grad_norm": 0.65234375, "learning_rate": 0.000330081032687495, "loss": 0.1879, "step": 287030 }, { "epoch": 11.89, "grad_norm": 0.64453125, "learning_rate": 0.0003300707588723101, "loss": 0.1802, "step": 287040 }, { "epoch": 11.89, "grad_norm": 0.30078125, "learning_rate": 0.00033006048490644076, "loss": 0.1367, "step": 287050 }, { "epoch": 11.89, "grad_norm": 0.54296875, "learning_rate": 0.0003300502107899063, "loss": 0.195, "step": 287060 }, { "epoch": 11.89, "grad_norm": 0.486328125, "learning_rate": 0.0003300399365227258, "loss": 0.1782, "step": 287070 }, { "epoch": 11.89, "grad_norm": 0.54296875, "learning_rate": 0.0003300296621049189, "loss": 0.1958, "step": 287080 }, { "epoch": 11.89, "grad_norm": 2.640625, "learning_rate": 0.00033001938753650476, "loss": 0.1656, "step": 287090 }, { "epoch": 11.89, "grad_norm": 1.328125, "learning_rate": 0.0003300091128175028, "loss": 0.2264, "step": 287100 }, { "epoch": 11.89, "grad_norm": 0.61328125, "learning_rate": 0.0003299988379479324, "loss": 0.1903, "step": 287110 }, { "epoch": 11.89, "grad_norm": 0.55078125, "learning_rate": 0.00032998856292781273, "loss": 0.1973, "step": 287120 }, { "epoch": 11.89, "grad_norm": 0.4296875, "learning_rate": 0.00032997828775716324, "loss": 0.1927, "step": 287130 }, { "epoch": 11.89, "grad_norm": 0.60546875, "learning_rate": 0.00032996801243600327, "loss": 0.2287, "step": 287140 }, { "epoch": 11.89, "grad_norm": 0.462890625, "learning_rate": 0.00032995773696435215, "loss": 0.2163, "step": 287150 }, { "epoch": 11.89, "grad_norm": 1.3671875, "learning_rate": 0.0003299474613422292, "loss": 0.211, "step": 287160 }, { "epoch": 11.89, "grad_norm": 0.6640625, "learning_rate": 0.0003299371855696537, "loss": 0.171, "step": 287170 }, { "epoch": 11.89, "grad_norm": 0.84375, "learning_rate": 0.0003299269096466451, "loss": 0.1674, "step": 287180 }, { "epoch": 11.9, "grad_norm": 0.36328125, "learning_rate": 0.0003299166335732227, "loss": 0.2247, "step": 287190 }, { "epoch": 11.9, "grad_norm": 0.470703125, "learning_rate": 0.0003299063573494058, "loss": 0.1959, "step": 287200 }, { "epoch": 11.9, "grad_norm": 0.55078125, "learning_rate": 0.0003298960809752137, "loss": 0.1644, "step": 287210 }, { "epoch": 11.9, "grad_norm": 0.83984375, "learning_rate": 0.00032988580445066594, "loss": 0.1883, "step": 287220 }, { "epoch": 11.9, "grad_norm": 0.85546875, "learning_rate": 0.00032987552777578166, "loss": 0.2104, "step": 287230 }, { "epoch": 11.9, "grad_norm": 1.0, "learning_rate": 0.0003298652509505803, "loss": 0.2391, "step": 287240 }, { "epoch": 11.9, "grad_norm": 0.71875, "learning_rate": 0.00032985497397508106, "loss": 0.1539, "step": 287250 }, { "epoch": 11.9, "grad_norm": 0.2197265625, "learning_rate": 0.00032984469684930345, "loss": 0.1828, "step": 287260 }, { "epoch": 11.9, "grad_norm": 1.3359375, "learning_rate": 0.00032983441957326674, "loss": 0.2204, "step": 287270 }, { "epoch": 11.9, "grad_norm": 1.2109375, "learning_rate": 0.0003298241421469903, "loss": 0.1849, "step": 287280 }, { "epoch": 11.9, "grad_norm": 0.9140625, "learning_rate": 0.0003298138645704934, "loss": 0.1963, "step": 287290 }, { "epoch": 11.9, "grad_norm": 0.9921875, "learning_rate": 0.00032980358684379544, "loss": 0.2398, "step": 287300 }, { "epoch": 11.9, "grad_norm": 1.171875, "learning_rate": 0.00032979330896691574, "loss": 0.2341, "step": 287310 }, { "epoch": 11.9, "grad_norm": 0.244140625, "learning_rate": 0.0003297830309398737, "loss": 0.2078, "step": 287320 }, { "epoch": 11.9, "grad_norm": 0.59375, "learning_rate": 0.00032977275276268854, "loss": 0.1863, "step": 287330 }, { "epoch": 11.9, "grad_norm": 0.55859375, "learning_rate": 0.0003297624744353797, "loss": 0.2025, "step": 287340 }, { "epoch": 11.9, "grad_norm": 0.96484375, "learning_rate": 0.0003297521959579666, "loss": 0.1781, "step": 287350 }, { "epoch": 11.9, "grad_norm": 0.515625, "learning_rate": 0.0003297419173304683, "loss": 0.2248, "step": 287360 }, { "epoch": 11.9, "grad_norm": 2.390625, "learning_rate": 0.0003297316385529044, "loss": 0.208, "step": 287370 }, { "epoch": 11.9, "grad_norm": 1.21875, "learning_rate": 0.0003297213596252942, "loss": 0.2057, "step": 287380 }, { "epoch": 11.9, "grad_norm": 0.8125, "learning_rate": 0.0003297110805476569, "loss": 0.1902, "step": 287390 }, { "epoch": 11.9, "grad_norm": 0.5390625, "learning_rate": 0.0003297008013200121, "loss": 0.1848, "step": 287400 }, { "epoch": 11.9, "grad_norm": 0.61328125, "learning_rate": 0.0003296905219423789, "loss": 0.2201, "step": 287410 }, { "epoch": 11.9, "grad_norm": 1.0, "learning_rate": 0.0003296802424147768, "loss": 0.1187, "step": 287420 }, { "epoch": 11.91, "grad_norm": 1.5390625, "learning_rate": 0.0003296699627372251, "loss": 0.1253, "step": 287430 }, { "epoch": 11.91, "grad_norm": 0.427734375, "learning_rate": 0.00032965968290974313, "loss": 0.1831, "step": 287440 }, { "epoch": 11.91, "grad_norm": 0.1806640625, "learning_rate": 0.0003296494029323502, "loss": 0.1943, "step": 287450 }, { "epoch": 11.91, "grad_norm": 0.44140625, "learning_rate": 0.0003296391228050657, "loss": 0.2066, "step": 287460 }, { "epoch": 11.91, "grad_norm": 0.58203125, "learning_rate": 0.0003296288425279089, "loss": 0.2119, "step": 287470 }, { "epoch": 11.91, "grad_norm": 0.294921875, "learning_rate": 0.00032961856210089936, "loss": 0.2004, "step": 287480 }, { "epoch": 11.91, "grad_norm": 0.71484375, "learning_rate": 0.0003296082815240562, "loss": 0.1863, "step": 287490 }, { "epoch": 11.91, "grad_norm": 1.5234375, "learning_rate": 0.0003295980007973989, "loss": 0.1891, "step": 287500 }, { "epoch": 11.91, "grad_norm": 1.15625, "learning_rate": 0.0003295877199209467, "loss": 0.1701, "step": 287510 }, { "epoch": 11.91, "grad_norm": 0.75, "learning_rate": 0.000329577438894719, "loss": 0.1584, "step": 287520 }, { "epoch": 11.91, "grad_norm": 0.84375, "learning_rate": 0.0003295671577187352, "loss": 0.1886, "step": 287530 }, { "epoch": 11.91, "grad_norm": 1.21875, "learning_rate": 0.0003295568763930147, "loss": 0.1594, "step": 287540 }, { "epoch": 11.91, "grad_norm": 0.63671875, "learning_rate": 0.0003295465949175765, "loss": 0.2044, "step": 287550 }, { "epoch": 11.91, "grad_norm": 0.921875, "learning_rate": 0.0003295363132924404, "loss": 0.1737, "step": 287560 }, { "epoch": 11.91, "grad_norm": 1.4609375, "learning_rate": 0.0003295260315176255, "loss": 0.224, "step": 287570 }, { "epoch": 11.91, "grad_norm": 0.59375, "learning_rate": 0.0003295157495931512, "loss": 0.1869, "step": 287580 }, { "epoch": 11.91, "grad_norm": 0.62890625, "learning_rate": 0.00032950546751903684, "loss": 0.2188, "step": 287590 }, { "epoch": 11.91, "grad_norm": 0.60546875, "learning_rate": 0.00032949518529530163, "loss": 0.2022, "step": 287600 }, { "epoch": 11.91, "grad_norm": 0.06787109375, "learning_rate": 0.00032948490292196534, "loss": 0.1426, "step": 287610 }, { "epoch": 11.91, "grad_norm": 0.87890625, "learning_rate": 0.0003294746203990469, "loss": 0.1867, "step": 287620 }, { "epoch": 11.91, "grad_norm": 0.8203125, "learning_rate": 0.00032946433772656575, "loss": 0.1758, "step": 287630 }, { "epoch": 11.91, "grad_norm": 1.453125, "learning_rate": 0.0003294540549045414, "loss": 0.1608, "step": 287640 }, { "epoch": 11.91, "grad_norm": 0.69921875, "learning_rate": 0.000329443771932993, "loss": 0.1879, "step": 287650 }, { "epoch": 11.91, "grad_norm": 1.0703125, "learning_rate": 0.00032943348881194005, "loss": 0.1791, "step": 287660 }, { "epoch": 11.92, "grad_norm": 0.625, "learning_rate": 0.00032942320554140195, "loss": 0.1899, "step": 287670 }, { "epoch": 11.92, "grad_norm": 0.66015625, "learning_rate": 0.0003294129221213978, "loss": 0.2268, "step": 287680 }, { "epoch": 11.92, "grad_norm": 0.953125, "learning_rate": 0.0003294026385519472, "loss": 0.2123, "step": 287690 }, { "epoch": 11.92, "grad_norm": 0.73046875, "learning_rate": 0.00032939235483306947, "loss": 0.1847, "step": 287700 }, { "epoch": 11.92, "grad_norm": 0.41796875, "learning_rate": 0.0003293820709647837, "loss": 0.2122, "step": 287710 }, { "epoch": 11.92, "grad_norm": 0.90234375, "learning_rate": 0.0003293717869471096, "loss": 0.299, "step": 287720 }, { "epoch": 11.92, "grad_norm": 0.45703125, "learning_rate": 0.0003293615027800663, "loss": 0.2375, "step": 287730 }, { "epoch": 11.92, "grad_norm": 0.7265625, "learning_rate": 0.0003293512184636733, "loss": 0.1814, "step": 287740 }, { "epoch": 11.92, "grad_norm": 0.78125, "learning_rate": 0.00032934093399794985, "loss": 0.2015, "step": 287750 }, { "epoch": 11.92, "grad_norm": 0.03515625, "learning_rate": 0.00032933064938291525, "loss": 0.1643, "step": 287760 }, { "epoch": 11.92, "grad_norm": 0.96875, "learning_rate": 0.000329320364618589, "loss": 0.2225, "step": 287770 }, { "epoch": 11.92, "grad_norm": 0.484375, "learning_rate": 0.0003293100797049904, "loss": 0.1615, "step": 287780 }, { "epoch": 11.92, "grad_norm": 0.640625, "learning_rate": 0.00032929979464213873, "loss": 0.2223, "step": 287790 }, { "epoch": 11.92, "grad_norm": 0.953125, "learning_rate": 0.0003292895094300535, "loss": 0.179, "step": 287800 }, { "epoch": 11.92, "grad_norm": 0.5546875, "learning_rate": 0.0003292792240687539, "loss": 0.2031, "step": 287810 }, { "epoch": 11.92, "grad_norm": 1.4140625, "learning_rate": 0.0003292689385582594, "loss": 0.1677, "step": 287820 }, { "epoch": 11.92, "grad_norm": 0.6640625, "learning_rate": 0.0003292586528985894, "loss": 0.1798, "step": 287830 }, { "epoch": 11.92, "grad_norm": 0.640625, "learning_rate": 0.00032924836708976297, "loss": 0.1803, "step": 287840 }, { "epoch": 11.92, "grad_norm": 0.396484375, "learning_rate": 0.0003292380811317998, "loss": 0.2112, "step": 287850 }, { "epoch": 11.92, "grad_norm": 0.400390625, "learning_rate": 0.0003292277950247191, "loss": 0.1757, "step": 287860 }, { "epoch": 11.92, "grad_norm": 1.09375, "learning_rate": 0.0003292175087685403, "loss": 0.1913, "step": 287870 }, { "epoch": 11.92, "grad_norm": 1.6953125, "learning_rate": 0.00032920722236328266, "loss": 0.2451, "step": 287880 }, { "epoch": 11.92, "grad_norm": 1.0625, "learning_rate": 0.00032919693580896555, "loss": 0.2036, "step": 287890 }, { "epoch": 11.92, "grad_norm": 0.0, "learning_rate": 0.0003291866491056084, "loss": 0.2126, "step": 287900 }, { "epoch": 11.93, "grad_norm": 0.400390625, "learning_rate": 0.00032917636225323054, "loss": 0.1447, "step": 287910 }, { "epoch": 11.93, "grad_norm": 1.828125, "learning_rate": 0.00032916607525185123, "loss": 0.2427, "step": 287920 }, { "epoch": 11.93, "grad_norm": 0.42578125, "learning_rate": 0.00032915578810149, "loss": 0.1662, "step": 287930 }, { "epoch": 11.93, "grad_norm": 1.375, "learning_rate": 0.0003291455008021661, "loss": 0.2045, "step": 287940 }, { "epoch": 11.93, "grad_norm": 0.484375, "learning_rate": 0.0003291352133538989, "loss": 0.1719, "step": 287950 }, { "epoch": 11.93, "grad_norm": 0.431640625, "learning_rate": 0.0003291249257567078, "loss": 0.1923, "step": 287960 }, { "epoch": 11.93, "grad_norm": 0.58984375, "learning_rate": 0.00032911463801061215, "loss": 0.2312, "step": 287970 }, { "epoch": 11.93, "grad_norm": 0.84765625, "learning_rate": 0.00032910435011563124, "loss": 0.2062, "step": 287980 }, { "epoch": 11.93, "grad_norm": 0.443359375, "learning_rate": 0.00032909406207178455, "loss": 0.1981, "step": 287990 }, { "epoch": 11.93, "grad_norm": 0.400390625, "learning_rate": 0.00032908377387909127, "loss": 0.1814, "step": 288000 }, { "epoch": 11.93, "grad_norm": 0.54296875, "learning_rate": 0.000329073485537571, "loss": 0.1871, "step": 288010 }, { "epoch": 11.93, "grad_norm": 0.74609375, "learning_rate": 0.0003290631970472429, "loss": 0.1924, "step": 288020 }, { "epoch": 11.93, "grad_norm": 0.9375, "learning_rate": 0.00032905290840812637, "loss": 0.1642, "step": 288030 }, { "epoch": 11.93, "grad_norm": 0.75, "learning_rate": 0.00032904261962024084, "loss": 0.1905, "step": 288040 }, { "epoch": 11.93, "grad_norm": 0.71875, "learning_rate": 0.00032903233068360565, "loss": 0.1978, "step": 288050 }, { "epoch": 11.93, "grad_norm": 0.408203125, "learning_rate": 0.0003290220415982401, "loss": 0.2307, "step": 288060 }, { "epoch": 11.93, "grad_norm": 0.58203125, "learning_rate": 0.00032901175236416363, "loss": 0.2125, "step": 288070 }, { "epoch": 11.93, "grad_norm": 0.55859375, "learning_rate": 0.0003290014629813956, "loss": 0.2711, "step": 288080 }, { "epoch": 11.93, "grad_norm": 1.109375, "learning_rate": 0.00032899117344995535, "loss": 0.2168, "step": 288090 }, { "epoch": 11.93, "grad_norm": 0.70703125, "learning_rate": 0.00032898088376986224, "loss": 0.2188, "step": 288100 }, { "epoch": 11.93, "grad_norm": 0.333984375, "learning_rate": 0.0003289705939411356, "loss": 0.1641, "step": 288110 }, { "epoch": 11.93, "grad_norm": 0.41796875, "learning_rate": 0.00032896030396379483, "loss": 0.2206, "step": 288120 }, { "epoch": 11.93, "grad_norm": 1.1796875, "learning_rate": 0.00032895001383785935, "loss": 0.1881, "step": 288130 }, { "epoch": 11.93, "grad_norm": 0.30078125, "learning_rate": 0.0003289397235633484, "loss": 0.2116, "step": 288140 }, { "epoch": 11.94, "grad_norm": 0.5, "learning_rate": 0.00032892943314028145, "loss": 0.1869, "step": 288150 }, { "epoch": 11.94, "grad_norm": 1.8359375, "learning_rate": 0.00032891914256867784, "loss": 0.2092, "step": 288160 }, { "epoch": 11.94, "grad_norm": 0.7109375, "learning_rate": 0.0003289088518485569, "loss": 0.1874, "step": 288170 }, { "epoch": 11.94, "grad_norm": 0.94921875, "learning_rate": 0.000328898560979938, "loss": 0.2432, "step": 288180 }, { "epoch": 11.94, "grad_norm": 0.640625, "learning_rate": 0.0003288882699628406, "loss": 0.1985, "step": 288190 }, { "epoch": 11.94, "grad_norm": 0.84375, "learning_rate": 0.000328877978797284, "loss": 0.2476, "step": 288200 }, { "epoch": 11.94, "grad_norm": 1.203125, "learning_rate": 0.00032886768748328747, "loss": 0.2136, "step": 288210 }, { "epoch": 11.94, "grad_norm": 2.125, "learning_rate": 0.00032885739602087056, "loss": 0.2322, "step": 288220 }, { "epoch": 11.94, "grad_norm": 1.109375, "learning_rate": 0.0003288471044100525, "loss": 0.1502, "step": 288230 }, { "epoch": 11.94, "grad_norm": 0.5859375, "learning_rate": 0.0003288368126508527, "loss": 0.1761, "step": 288240 }, { "epoch": 11.94, "grad_norm": 0.6953125, "learning_rate": 0.00032882652074329056, "loss": 0.208, "step": 288250 }, { "epoch": 11.94, "grad_norm": 0.302734375, "learning_rate": 0.0003288162286873854, "loss": 0.2344, "step": 288260 }, { "epoch": 11.94, "grad_norm": 0.453125, "learning_rate": 0.0003288059364831566, "loss": 0.2153, "step": 288270 }, { "epoch": 11.94, "grad_norm": 1.015625, "learning_rate": 0.0003287956441306235, "loss": 0.1701, "step": 288280 }, { "epoch": 11.94, "grad_norm": 0.67578125, "learning_rate": 0.0003287853516298056, "loss": 0.1655, "step": 288290 }, { "epoch": 11.94, "grad_norm": 0.83984375, "learning_rate": 0.0003287750589807221, "loss": 0.2143, "step": 288300 }, { "epoch": 11.94, "grad_norm": 0.765625, "learning_rate": 0.0003287647661833925, "loss": 0.184, "step": 288310 }, { "epoch": 11.94, "grad_norm": 1.0078125, "learning_rate": 0.00032875447323783613, "loss": 0.197, "step": 288320 }, { "epoch": 11.94, "grad_norm": 0.3046875, "learning_rate": 0.0003287441801440723, "loss": 0.1477, "step": 288330 }, { "epoch": 11.94, "grad_norm": 0.6640625, "learning_rate": 0.0003287338869021205, "loss": 0.2026, "step": 288340 }, { "epoch": 11.94, "grad_norm": 0.765625, "learning_rate": 0.0003287235935119999, "loss": 0.1796, "step": 288350 }, { "epoch": 11.94, "grad_norm": 0.41015625, "learning_rate": 0.00032871329997373005, "loss": 0.2083, "step": 288360 }, { "epoch": 11.94, "grad_norm": 1.0859375, "learning_rate": 0.0003287030062873303, "loss": 0.1986, "step": 288370 }, { "epoch": 11.94, "grad_norm": 0.341796875, "learning_rate": 0.00032869271245282, "loss": 0.1893, "step": 288380 }, { "epoch": 11.95, "grad_norm": 0.76171875, "learning_rate": 0.0003286824184702185, "loss": 0.19, "step": 288390 }, { "epoch": 11.95, "grad_norm": 0.76171875, "learning_rate": 0.00032867212433954513, "loss": 0.2058, "step": 288400 }, { "epoch": 11.95, "grad_norm": 0.87890625, "learning_rate": 0.0003286618300608194, "loss": 0.2365, "step": 288410 }, { "epoch": 11.95, "grad_norm": 0.53125, "learning_rate": 0.0003286515356340606, "loss": 0.1935, "step": 288420 }, { "epoch": 11.95, "grad_norm": 1.1875, "learning_rate": 0.00032864124105928805, "loss": 0.1732, "step": 288430 }, { "epoch": 11.95, "grad_norm": 0.6640625, "learning_rate": 0.00032863094633652114, "loss": 0.2233, "step": 288440 }, { "epoch": 11.95, "grad_norm": 0.51953125, "learning_rate": 0.00032862065146577933, "loss": 0.1803, "step": 288450 }, { "epoch": 11.95, "grad_norm": 1.1796875, "learning_rate": 0.000328610356447082, "loss": 0.2491, "step": 288460 }, { "epoch": 11.95, "grad_norm": 1.171875, "learning_rate": 0.0003286000612804484, "loss": 0.1782, "step": 288470 }, { "epoch": 11.95, "grad_norm": 0.4765625, "learning_rate": 0.000328589765965898, "loss": 0.2006, "step": 288480 }, { "epoch": 11.95, "grad_norm": 0.6015625, "learning_rate": 0.0003285794705034501, "loss": 0.2009, "step": 288490 }, { "epoch": 11.95, "grad_norm": 0.61328125, "learning_rate": 0.0003285691748931242, "loss": 0.2087, "step": 288500 }, { "epoch": 11.95, "grad_norm": 0.80859375, "learning_rate": 0.00032855887913493955, "loss": 0.1858, "step": 288510 }, { "epoch": 11.95, "grad_norm": 0.66015625, "learning_rate": 0.0003285485832289156, "loss": 0.2156, "step": 288520 }, { "epoch": 11.95, "grad_norm": 1.8203125, "learning_rate": 0.0003285382871750716, "loss": 0.1837, "step": 288530 }, { "epoch": 11.95, "grad_norm": 0.396484375, "learning_rate": 0.0003285279909734272, "loss": 0.2495, "step": 288540 }, { "epoch": 11.95, "grad_norm": 0.8828125, "learning_rate": 0.00032851769462400145, "loss": 0.1734, "step": 288550 }, { "epoch": 11.95, "grad_norm": 0.73046875, "learning_rate": 0.000328507398126814, "loss": 0.1966, "step": 288560 }, { "epoch": 11.95, "grad_norm": 0.90625, "learning_rate": 0.00032849710148188395, "loss": 0.2494, "step": 288570 }, { "epoch": 11.95, "grad_norm": 1.4453125, "learning_rate": 0.00032848680468923095, "loss": 0.1708, "step": 288580 }, { "epoch": 11.95, "grad_norm": 0.53125, "learning_rate": 0.0003284765077488742, "loss": 0.1999, "step": 288590 }, { "epoch": 11.95, "grad_norm": 0.2470703125, "learning_rate": 0.00032846621066083315, "loss": 0.2396, "step": 288600 }, { "epoch": 11.95, "grad_norm": 0.52734375, "learning_rate": 0.0003284559134251272, "loss": 0.1785, "step": 288610 }, { "epoch": 11.95, "grad_norm": 0.6796875, "learning_rate": 0.0003284456160417757, "loss": 0.2532, "step": 288620 }, { "epoch": 11.96, "grad_norm": 1.9375, "learning_rate": 0.000328435318510798, "loss": 0.2156, "step": 288630 }, { "epoch": 11.96, "grad_norm": 1.0625, "learning_rate": 0.0003284250208322135, "loss": 0.2156, "step": 288640 }, { "epoch": 11.96, "grad_norm": 0.5859375, "learning_rate": 0.0003284147230060416, "loss": 0.1981, "step": 288650 }, { "epoch": 11.96, "grad_norm": 0.671875, "learning_rate": 0.0003284044250323016, "loss": 0.1992, "step": 288660 }, { "epoch": 11.96, "grad_norm": 0.2216796875, "learning_rate": 0.000328394126911013, "loss": 0.191, "step": 288670 }, { "epoch": 11.96, "grad_norm": 1.25, "learning_rate": 0.0003283838286421951, "loss": 0.1803, "step": 288680 }, { "epoch": 11.96, "grad_norm": 0.38671875, "learning_rate": 0.00032837353022586725, "loss": 0.21, "step": 288690 }, { "epoch": 11.96, "grad_norm": 0.7421875, "learning_rate": 0.0003283632316620489, "loss": 0.1779, "step": 288700 }, { "epoch": 11.96, "grad_norm": 0.65625, "learning_rate": 0.0003283529329507595, "loss": 0.1846, "step": 288710 }, { "epoch": 11.96, "grad_norm": 1.0703125, "learning_rate": 0.0003283426340920183, "loss": 0.1894, "step": 288720 }, { "epoch": 11.96, "grad_norm": 1.578125, "learning_rate": 0.0003283323350858446, "loss": 0.191, "step": 288730 }, { "epoch": 11.96, "grad_norm": 0.8046875, "learning_rate": 0.00032832203593225807, "loss": 0.1945, "step": 288740 }, { "epoch": 11.96, "grad_norm": 1.8203125, "learning_rate": 0.0003283117366312779, "loss": 0.198, "step": 288750 }, { "epoch": 11.96, "grad_norm": 0.79296875, "learning_rate": 0.00032830143718292335, "loss": 0.1799, "step": 288760 }, { "epoch": 11.96, "grad_norm": 2.65625, "learning_rate": 0.00032829113758721406, "loss": 0.2189, "step": 288770 }, { "epoch": 11.96, "grad_norm": 0.73828125, "learning_rate": 0.00032828083784416934, "loss": 0.2192, "step": 288780 }, { "epoch": 11.96, "grad_norm": 0.60546875, "learning_rate": 0.0003282705379538086, "loss": 0.2084, "step": 288790 }, { "epoch": 11.96, "grad_norm": 0.765625, "learning_rate": 0.000328260237916151, "loss": 0.1978, "step": 288800 }, { "epoch": 11.96, "grad_norm": 1.0390625, "learning_rate": 0.00032824993773121616, "loss": 0.2116, "step": 288810 }, { "epoch": 11.96, "grad_norm": 0.37890625, "learning_rate": 0.00032823963739902335, "loss": 0.2004, "step": 288820 }, { "epoch": 11.96, "grad_norm": 1.359375, "learning_rate": 0.0003282293369195919, "loss": 0.1721, "step": 288830 }, { "epoch": 11.96, "grad_norm": 0.75, "learning_rate": 0.00032821903629294145, "loss": 0.2055, "step": 288840 }, { "epoch": 11.96, "grad_norm": 0.51171875, "learning_rate": 0.0003282087355190912, "loss": 0.1758, "step": 288850 }, { "epoch": 11.96, "grad_norm": 1.5703125, "learning_rate": 0.00032819843459806053, "loss": 0.2052, "step": 288860 }, { "epoch": 11.96, "grad_norm": 1.0625, "learning_rate": 0.0003281881335298689, "loss": 0.179, "step": 288870 }, { "epoch": 11.97, "grad_norm": 1.0078125, "learning_rate": 0.00032817783231453557, "loss": 0.1955, "step": 288880 }, { "epoch": 11.97, "grad_norm": 0.66015625, "learning_rate": 0.00032816753095208, "loss": 0.1959, "step": 288890 }, { "epoch": 11.97, "grad_norm": 0.5859375, "learning_rate": 0.00032815722944252166, "loss": 0.184, "step": 288900 }, { "epoch": 11.97, "grad_norm": 0.53515625, "learning_rate": 0.0003281469277858797, "loss": 0.1879, "step": 288910 }, { "epoch": 11.97, "grad_norm": 0.361328125, "learning_rate": 0.0003281366259821738, "loss": 0.1835, "step": 288920 }, { "epoch": 11.97, "grad_norm": 0.6015625, "learning_rate": 0.00032812632403142315, "loss": 0.2241, "step": 288930 }, { "epoch": 11.97, "grad_norm": 0.51171875, "learning_rate": 0.0003281160219336472, "loss": 0.1788, "step": 288940 }, { "epoch": 11.97, "grad_norm": 1.1640625, "learning_rate": 0.0003281057196888654, "loss": 0.234, "step": 288950 }, { "epoch": 11.97, "grad_norm": 0.396484375, "learning_rate": 0.00032809541729709703, "loss": 0.2093, "step": 288960 }, { "epoch": 11.97, "grad_norm": 1.1171875, "learning_rate": 0.00032808511475836144, "loss": 0.23, "step": 288970 }, { "epoch": 11.97, "grad_norm": 0.484375, "learning_rate": 0.0003280748120726782, "loss": 0.1187, "step": 288980 }, { "epoch": 11.97, "grad_norm": 0.92578125, "learning_rate": 0.0003280645092400665, "loss": 0.2374, "step": 288990 }, { "epoch": 11.97, "grad_norm": 0.001983642578125, "learning_rate": 0.00032805420626054596, "loss": 0.1801, "step": 289000 }, { "epoch": 11.97, "grad_norm": 0.98828125, "learning_rate": 0.0003280439031341357, "loss": 0.2424, "step": 289010 }, { "epoch": 11.97, "grad_norm": 0.330078125, "learning_rate": 0.0003280335998608553, "loss": 0.2041, "step": 289020 }, { "epoch": 11.97, "grad_norm": 0.81640625, "learning_rate": 0.0003280232964407241, "loss": 0.1493, "step": 289030 }, { "epoch": 11.97, "grad_norm": 0.91796875, "learning_rate": 0.00032801299287376143, "loss": 0.2012, "step": 289040 }, { "epoch": 11.97, "grad_norm": 0.6640625, "learning_rate": 0.00032800268915998676, "loss": 0.2153, "step": 289050 }, { "epoch": 11.97, "grad_norm": 0.67578125, "learning_rate": 0.0003279923852994195, "loss": 0.2259, "step": 289060 }, { "epoch": 11.97, "grad_norm": 0.921875, "learning_rate": 0.0003279820812920789, "loss": 0.2455, "step": 289070 }, { "epoch": 11.97, "grad_norm": 1.0, "learning_rate": 0.00032797177713798453, "loss": 0.1669, "step": 289080 }, { "epoch": 11.97, "grad_norm": 0.58203125, "learning_rate": 0.00032796147283715565, "loss": 0.1853, "step": 289090 }, { "epoch": 11.97, "grad_norm": 1.1015625, "learning_rate": 0.0003279511683896117, "loss": 0.2256, "step": 289100 }, { "epoch": 11.97, "grad_norm": 0.734375, "learning_rate": 0.0003279408637953721, "loss": 0.1628, "step": 289110 }, { "epoch": 11.98, "grad_norm": 0.734375, "learning_rate": 0.0003279305590544562, "loss": 0.2171, "step": 289120 }, { "epoch": 11.98, "grad_norm": 0.69921875, "learning_rate": 0.00032792025416688344, "loss": 0.2554, "step": 289130 }, { "epoch": 11.98, "grad_norm": 0.640625, "learning_rate": 0.00032790994913267315, "loss": 0.1852, "step": 289140 }, { "epoch": 11.98, "grad_norm": 0.59375, "learning_rate": 0.00032789964395184463, "loss": 0.2048, "step": 289150 }, { "epoch": 11.98, "grad_norm": 0.0, "learning_rate": 0.00032788933862441756, "loss": 0.1341, "step": 289160 }, { "epoch": 11.98, "grad_norm": 0.515625, "learning_rate": 0.00032787903315041107, "loss": 0.2042, "step": 289170 }, { "epoch": 11.98, "grad_norm": 0.86328125, "learning_rate": 0.0003278687275298446, "loss": 0.2158, "step": 289180 }, { "epoch": 11.98, "grad_norm": 1.03125, "learning_rate": 0.0003278584217627378, "loss": 0.1711, "step": 289190 }, { "epoch": 11.98, "grad_norm": 0.625, "learning_rate": 0.00032784811584910965, "loss": 0.1957, "step": 289200 }, { "epoch": 11.98, "grad_norm": 0.8125, "learning_rate": 0.0003278378097889799, "loss": 0.2167, "step": 289210 }, { "epoch": 11.98, "grad_norm": 0.8984375, "learning_rate": 0.00032782750358236774, "loss": 0.212, "step": 289220 }, { "epoch": 11.98, "grad_norm": 0.328125, "learning_rate": 0.0003278171972292925, "loss": 0.1534, "step": 289230 }, { "epoch": 11.98, "grad_norm": 0.388671875, "learning_rate": 0.0003278068907297739, "loss": 0.1894, "step": 289240 }, { "epoch": 11.98, "grad_norm": 0.8046875, "learning_rate": 0.00032779658408383104, "loss": 0.2102, "step": 289250 }, { "epoch": 11.98, "grad_norm": 1.1171875, "learning_rate": 0.0003277862772914834, "loss": 0.2185, "step": 289260 }, { "epoch": 11.98, "grad_norm": 1.2265625, "learning_rate": 0.0003277759703527504, "loss": 0.1768, "step": 289270 }, { "epoch": 11.98, "grad_norm": 0.33984375, "learning_rate": 0.0003277656632676514, "loss": 0.2159, "step": 289280 }, { "epoch": 11.98, "grad_norm": 0.86328125, "learning_rate": 0.0003277553560362059, "loss": 0.2179, "step": 289290 }, { "epoch": 11.98, "grad_norm": 1.125, "learning_rate": 0.0003277450486584332, "loss": 0.2209, "step": 289300 }, { "epoch": 11.98, "grad_norm": 1.3046875, "learning_rate": 0.00032773474113435266, "loss": 0.212, "step": 289310 }, { "epoch": 11.98, "grad_norm": 0.8515625, "learning_rate": 0.0003277244334639837, "loss": 0.1947, "step": 289320 }, { "epoch": 11.98, "grad_norm": 0.73046875, "learning_rate": 0.0003277141256473458, "loss": 0.2114, "step": 289330 }, { "epoch": 11.98, "grad_norm": 1.0703125, "learning_rate": 0.0003277038176844583, "loss": 0.2131, "step": 289340 }, { "epoch": 11.98, "grad_norm": 0.78125, "learning_rate": 0.0003276935095753407, "loss": 0.1936, "step": 289350 }, { "epoch": 11.99, "grad_norm": 1.8671875, "learning_rate": 0.0003276832013200121, "loss": 0.2022, "step": 289360 }, { "epoch": 11.99, "grad_norm": 0.61328125, "learning_rate": 0.0003276728929184923, "loss": 0.162, "step": 289370 }, { "epoch": 11.99, "grad_norm": 0.85546875, "learning_rate": 0.00032766258437080044, "loss": 0.2139, "step": 289380 }, { "epoch": 11.99, "grad_norm": 0.53125, "learning_rate": 0.0003276522756769559, "loss": 0.2066, "step": 289390 }, { "epoch": 11.99, "grad_norm": 0.2294921875, "learning_rate": 0.00032764196683697825, "loss": 0.1697, "step": 289400 }, { "epoch": 11.99, "grad_norm": 0.7265625, "learning_rate": 0.0003276316578508868, "loss": 0.1943, "step": 289410 }, { "epoch": 11.99, "grad_norm": 0.85546875, "learning_rate": 0.000327621348718701, "loss": 0.1626, "step": 289420 }, { "epoch": 11.99, "grad_norm": 0.71875, "learning_rate": 0.0003276110394404401, "loss": 0.2262, "step": 289430 }, { "epoch": 11.99, "grad_norm": 0.703125, "learning_rate": 0.0003276007300161236, "loss": 0.2243, "step": 289440 }, { "epoch": 11.99, "grad_norm": 2.15625, "learning_rate": 0.000327590420445771, "loss": 0.2059, "step": 289450 }, { "epoch": 11.99, "grad_norm": 0.83984375, "learning_rate": 0.0003275801107294015, "loss": 0.1694, "step": 289460 }, { "epoch": 11.99, "grad_norm": 0.74609375, "learning_rate": 0.0003275698008670347, "loss": 0.1714, "step": 289470 }, { "epoch": 11.99, "grad_norm": 1.2890625, "learning_rate": 0.0003275594908586899, "loss": 0.2145, "step": 289480 }, { "epoch": 11.99, "grad_norm": 0.73828125, "learning_rate": 0.0003275491807043865, "loss": 0.2237, "step": 289490 }, { "epoch": 11.99, "grad_norm": 0.4609375, "learning_rate": 0.00032753887040414394, "loss": 0.2198, "step": 289500 }, { "epoch": 11.99, "grad_norm": 1.1484375, "learning_rate": 0.00032752855995798157, "loss": 0.1396, "step": 289510 }, { "epoch": 11.99, "grad_norm": 0.66796875, "learning_rate": 0.00032751824936591876, "loss": 0.2205, "step": 289520 }, { "epoch": 11.99, "grad_norm": 1.109375, "learning_rate": 0.00032750793862797514, "loss": 0.2431, "step": 289530 }, { "epoch": 11.99, "grad_norm": 0.90234375, "learning_rate": 0.0003274976277441698, "loss": 0.1657, "step": 289540 }, { "epoch": 11.99, "grad_norm": 1.1875, "learning_rate": 0.0003274873167145223, "loss": 0.2339, "step": 289550 }, { "epoch": 11.99, "grad_norm": 0.7890625, "learning_rate": 0.00032747700553905214, "loss": 0.2224, "step": 289560 }, { "epoch": 11.99, "grad_norm": 0.90234375, "learning_rate": 0.00032746669421777853, "loss": 0.1531, "step": 289570 }, { "epoch": 11.99, "grad_norm": 0.0, "learning_rate": 0.000327456382750721, "loss": 0.1603, "step": 289580 }, { "epoch": 11.99, "grad_norm": 2.546875, "learning_rate": 0.0003274460711378989, "loss": 0.1944, "step": 289590 }, { "epoch": 12.0, "grad_norm": 1.125, "learning_rate": 0.00032743575937933166, "loss": 0.2111, "step": 289600 }, { "epoch": 12.0, "grad_norm": 0.765625, "learning_rate": 0.0003274254474750388, "loss": 0.1722, "step": 289610 }, { "epoch": 12.0, "grad_norm": 0.76953125, "learning_rate": 0.00032741513542503946, "loss": 0.217, "step": 289620 }, { "epoch": 12.0, "grad_norm": 0.357421875, "learning_rate": 0.0003274048232293533, "loss": 0.1867, "step": 289630 }, { "epoch": 12.0, "grad_norm": 0.49609375, "learning_rate": 0.00032739451088799955, "loss": 0.1823, "step": 289640 }, { "epoch": 12.0, "grad_norm": 0.451171875, "learning_rate": 0.0003273841984009977, "loss": 0.1837, "step": 289650 }, { "epoch": 12.0, "grad_norm": 1.1875, "learning_rate": 0.0003273738857683671, "loss": 0.1709, "step": 289660 }, { "epoch": 12.0, "grad_norm": 0.44921875, "learning_rate": 0.0003273635729901273, "loss": 0.2498, "step": 289670 }, { "epoch": 12.0, "grad_norm": 0.5546875, "learning_rate": 0.00032735326006629755, "loss": 0.1871, "step": 289680 }, { "epoch": 12.0, "grad_norm": 2.15625, "learning_rate": 0.00032734294699689735, "loss": 0.1879, "step": 289690 }, { "epoch": 12.0, "grad_norm": 0.93359375, "learning_rate": 0.00032733263378194604, "loss": 0.2316, "step": 289700 }, { "epoch": 12.0, "grad_norm": 0.2890625, "learning_rate": 0.0003273223204214631, "loss": 0.2323, "step": 289710 }, { "epoch": 12.0, "grad_norm": 0.8359375, "learning_rate": 0.00032731200691546794, "loss": 0.2035, "step": 289720 }, { "epoch": 12.0, "grad_norm": 0.8515625, "learning_rate": 0.00032730169326397986, "loss": 0.1696, "step": 289730 }, { "epoch": 12.0, "grad_norm": 1.390625, "learning_rate": 0.00032729137946701836, "loss": 0.2162, "step": 289740 }, { "epoch": 12.0, "grad_norm": 0.53125, "learning_rate": 0.0003272810655246028, "loss": 0.1495, "step": 289750 }, { "epoch": 12.0, "grad_norm": 1.2890625, "learning_rate": 0.00032727075143675267, "loss": 0.1842, "step": 289760 }, { "epoch": 12.0, "grad_norm": 0.298828125, "learning_rate": 0.0003272604372034873, "loss": 0.1879, "step": 289770 }, { "epoch": 12.0, "grad_norm": 0.6875, "learning_rate": 0.0003272501228248261, "loss": 0.1568, "step": 289780 }, { "epoch": 12.0, "grad_norm": 0.625, "learning_rate": 0.0003272398083007886, "loss": 0.2204, "step": 289790 }, { "epoch": 12.0, "grad_norm": 0.9453125, "learning_rate": 0.00032722949363139407, "loss": 0.189, "step": 289800 }, { "epoch": 12.0, "grad_norm": 0.8828125, "learning_rate": 0.000327219178816662, "loss": 0.1772, "step": 289810 }, { "epoch": 12.0, "grad_norm": 1.109375, "learning_rate": 0.00032720886385661173, "loss": 0.1706, "step": 289820 }, { "epoch": 12.0, "grad_norm": 0.93359375, "learning_rate": 0.00032719854875126275, "loss": 0.2117, "step": 289830 }, { "epoch": 12.01, "grad_norm": 0.482421875, "learning_rate": 0.0003271882335006344, "loss": 0.2128, "step": 289840 }, { "epoch": 12.01, "grad_norm": 0.8984375, "learning_rate": 0.00032717791810474626, "loss": 0.2623, "step": 289850 }, { "epoch": 12.01, "grad_norm": 0.49609375, "learning_rate": 0.00032716760256361744, "loss": 0.1588, "step": 289860 }, { "epoch": 12.01, "grad_norm": 0.1015625, "learning_rate": 0.0003271572868772676, "loss": 0.2184, "step": 289870 }, { "epoch": 12.01, "grad_norm": 1.4140625, "learning_rate": 0.00032714697104571606, "loss": 0.1546, "step": 289880 }, { "epoch": 12.01, "grad_norm": 0.66796875, "learning_rate": 0.0003271366550689823, "loss": 0.1916, "step": 289890 }, { "epoch": 12.01, "grad_norm": 0.6875, "learning_rate": 0.00032712633894708563, "loss": 0.1817, "step": 289900 }, { "epoch": 12.01, "grad_norm": 0.63671875, "learning_rate": 0.0003271160226800455, "loss": 0.2249, "step": 289910 }, { "epoch": 12.01, "grad_norm": 0.51171875, "learning_rate": 0.00032710570626788137, "loss": 0.1914, "step": 289920 }, { "epoch": 12.01, "grad_norm": 0.6171875, "learning_rate": 0.0003270953897106127, "loss": 0.2321, "step": 289930 }, { "epoch": 12.01, "grad_norm": 0.5234375, "learning_rate": 0.0003270850730082588, "loss": 0.1961, "step": 289940 }, { "epoch": 12.01, "grad_norm": 0.6875, "learning_rate": 0.00032707475616083903, "loss": 0.1722, "step": 289950 }, { "epoch": 12.01, "grad_norm": 1.0546875, "learning_rate": 0.00032706443916837296, "loss": 0.1577, "step": 289960 }, { "epoch": 12.01, "grad_norm": 0.8515625, "learning_rate": 0.00032705412203088, "loss": 0.197, "step": 289970 }, { "epoch": 12.01, "grad_norm": 0.87109375, "learning_rate": 0.00032704380474837943, "loss": 0.1574, "step": 289980 }, { "epoch": 12.01, "grad_norm": 0.36328125, "learning_rate": 0.00032703348732089074, "loss": 0.1637, "step": 289990 }, { "epoch": 12.01, "grad_norm": 0.41796875, "learning_rate": 0.00032702316974843326, "loss": 0.1872, "step": 290000 }, { "epoch": 12.01, "grad_norm": 0.66796875, "learning_rate": 0.0003270128520310266, "loss": 0.183, "step": 290010 }, { "epoch": 12.01, "grad_norm": 0.8203125, "learning_rate": 0.0003270025341686901, "loss": 0.1924, "step": 290020 }, { "epoch": 12.01, "grad_norm": 1.171875, "learning_rate": 0.0003269922161614431, "loss": 0.1999, "step": 290030 }, { "epoch": 12.01, "grad_norm": 1.2578125, "learning_rate": 0.000326981898009305, "loss": 0.1836, "step": 290040 }, { "epoch": 12.01, "grad_norm": 1.0078125, "learning_rate": 0.00032697157971229543, "loss": 0.2015, "step": 290050 }, { "epoch": 12.01, "grad_norm": 1.0625, "learning_rate": 0.0003269612612704336, "loss": 0.2205, "step": 290060 }, { "epoch": 12.01, "grad_norm": 1.0859375, "learning_rate": 0.00032695094268373885, "loss": 0.1937, "step": 290070 }, { "epoch": 12.02, "grad_norm": 0.2314453125, "learning_rate": 0.0003269406239522309, "loss": 0.2064, "step": 290080 }, { "epoch": 12.02, "grad_norm": 0.55859375, "learning_rate": 0.0003269303050759289, "loss": 0.1983, "step": 290090 }, { "epoch": 12.02, "grad_norm": 0.8984375, "learning_rate": 0.00032691998605485247, "loss": 0.1682, "step": 290100 }, { "epoch": 12.02, "grad_norm": 0.60546875, "learning_rate": 0.00032690966688902085, "loss": 0.1904, "step": 290110 }, { "epoch": 12.02, "grad_norm": 0.54296875, "learning_rate": 0.00032689934757845356, "loss": 0.1556, "step": 290120 }, { "epoch": 12.02, "grad_norm": 0.98828125, "learning_rate": 0.0003268890281231701, "loss": 0.1831, "step": 290130 }, { "epoch": 12.02, "grad_norm": 2.265625, "learning_rate": 0.0003268787085231897, "loss": 0.2013, "step": 290140 }, { "epoch": 12.02, "grad_norm": 0.90625, "learning_rate": 0.00032686838877853184, "loss": 0.2034, "step": 290150 }, { "epoch": 12.02, "grad_norm": 0.79296875, "learning_rate": 0.00032685806888921597, "loss": 0.175, "step": 290160 }, { "epoch": 12.02, "grad_norm": 1.0703125, "learning_rate": 0.0003268477488552616, "loss": 0.2139, "step": 290170 }, { "epoch": 12.02, "grad_norm": 0.345703125, "learning_rate": 0.0003268374286766881, "loss": 0.1972, "step": 290180 }, { "epoch": 12.02, "grad_norm": 0.86328125, "learning_rate": 0.00032682710835351477, "loss": 0.1843, "step": 290190 }, { "epoch": 12.02, "grad_norm": 0.87109375, "learning_rate": 0.0003268167878857611, "loss": 0.1769, "step": 290200 }, { "epoch": 12.02, "grad_norm": 0.80859375, "learning_rate": 0.0003268064672734466, "loss": 0.1614, "step": 290210 }, { "epoch": 12.02, "grad_norm": 0.546875, "learning_rate": 0.0003267961465165905, "loss": 0.1803, "step": 290220 }, { "epoch": 12.02, "grad_norm": 1.625, "learning_rate": 0.0003267858256152125, "loss": 0.2128, "step": 290230 }, { "epoch": 12.02, "grad_norm": 0.390625, "learning_rate": 0.0003267755045693318, "loss": 0.1948, "step": 290240 }, { "epoch": 12.02, "grad_norm": 1.953125, "learning_rate": 0.0003267651833789679, "loss": 0.2207, "step": 290250 }, { "epoch": 12.02, "grad_norm": 0.40625, "learning_rate": 0.0003267548620441402, "loss": 0.1955, "step": 290260 }, { "epoch": 12.02, "grad_norm": 0.57421875, "learning_rate": 0.0003267445405648681, "loss": 0.1915, "step": 290270 }, { "epoch": 12.02, "grad_norm": 0.578125, "learning_rate": 0.00032673421894117115, "loss": 0.1367, "step": 290280 }, { "epoch": 12.02, "grad_norm": 1.1484375, "learning_rate": 0.0003267238971730686, "loss": 0.1862, "step": 290290 }, { "epoch": 12.02, "grad_norm": 0.62890625, "learning_rate": 0.00032671357526057996, "loss": 0.231, "step": 290300 }, { "epoch": 12.02, "grad_norm": 0.6640625, "learning_rate": 0.00032670325320372477, "loss": 0.2291, "step": 290310 }, { "epoch": 12.03, "grad_norm": 1.078125, "learning_rate": 0.00032669293100252226, "loss": 0.1923, "step": 290320 }, { "epoch": 12.03, "grad_norm": 0.56640625, "learning_rate": 0.00032668260865699195, "loss": 0.1825, "step": 290330 }, { "epoch": 12.03, "grad_norm": 0.44140625, "learning_rate": 0.00032667228616715325, "loss": 0.1686, "step": 290340 }, { "epoch": 12.03, "grad_norm": 1.03125, "learning_rate": 0.00032666196353302556, "loss": 0.2424, "step": 290350 }, { "epoch": 12.03, "grad_norm": 0.6171875, "learning_rate": 0.0003266516407546284, "loss": 0.2003, "step": 290360 }, { "epoch": 12.03, "grad_norm": 0.326171875, "learning_rate": 0.0003266413178319811, "loss": 0.2415, "step": 290370 }, { "epoch": 12.03, "grad_norm": 0.275390625, "learning_rate": 0.00032663099476510304, "loss": 0.1828, "step": 290380 }, { "epoch": 12.03, "grad_norm": 0.90234375, "learning_rate": 0.00032662067155401385, "loss": 0.2043, "step": 290390 }, { "epoch": 12.03, "grad_norm": 0.40625, "learning_rate": 0.00032661034819873283, "loss": 0.2032, "step": 290400 }, { "epoch": 12.03, "grad_norm": 0.59375, "learning_rate": 0.0003266000246992793, "loss": 0.2022, "step": 290410 }, { "epoch": 12.03, "grad_norm": 1.1640625, "learning_rate": 0.0003265897010556729, "loss": 0.22, "step": 290420 }, { "epoch": 12.03, "grad_norm": 0.8984375, "learning_rate": 0.0003265793772679328, "loss": 0.1909, "step": 290430 }, { "epoch": 12.03, "grad_norm": 1.140625, "learning_rate": 0.00032656905333607874, "loss": 0.2475, "step": 290440 }, { "epoch": 12.03, "grad_norm": 0.333984375, "learning_rate": 0.00032655872926012984, "loss": 0.2317, "step": 290450 }, { "epoch": 12.03, "grad_norm": 1.21875, "learning_rate": 0.0003265484050401058, "loss": 0.1987, "step": 290460 }, { "epoch": 12.03, "grad_norm": 0.70703125, "learning_rate": 0.000326538080676026, "loss": 0.2027, "step": 290470 }, { "epoch": 12.03, "grad_norm": 1.390625, "learning_rate": 0.00032652775616790966, "loss": 0.1823, "step": 290480 }, { "epoch": 12.03, "grad_norm": 1.7890625, "learning_rate": 0.00032651743151577636, "loss": 0.2311, "step": 290490 }, { "epoch": 12.03, "grad_norm": 2.0, "learning_rate": 0.00032650710671964557, "loss": 0.1831, "step": 290500 }, { "epoch": 12.03, "grad_norm": 1.0546875, "learning_rate": 0.0003264967817795366, "loss": 0.1475, "step": 290510 }, { "epoch": 12.03, "grad_norm": 0.80859375, "learning_rate": 0.00032648645669546895, "loss": 0.1818, "step": 290520 }, { "epoch": 12.03, "grad_norm": 0.8828125, "learning_rate": 0.0003264761314674621, "loss": 0.1684, "step": 290530 }, { "epoch": 12.03, "grad_norm": 0.43359375, "learning_rate": 0.0003264658060955354, "loss": 0.2159, "step": 290540 }, { "epoch": 12.03, "grad_norm": 0.0, "learning_rate": 0.00032645548057970837, "loss": 0.201, "step": 290550 }, { "epoch": 12.03, "grad_norm": 0.6328125, "learning_rate": 0.0003264451549200003, "loss": 0.2175, "step": 290560 }, { "epoch": 12.04, "grad_norm": 0.671875, "learning_rate": 0.00032643482911643074, "loss": 0.182, "step": 290570 }, { "epoch": 12.04, "grad_norm": 0.421875, "learning_rate": 0.0003264245031690191, "loss": 0.1945, "step": 290580 }, { "epoch": 12.04, "grad_norm": 0.82421875, "learning_rate": 0.00032641417707778475, "loss": 0.1983, "step": 290590 }, { "epoch": 12.04, "grad_norm": 2.3125, "learning_rate": 0.0003264038508427472, "loss": 0.1834, "step": 290600 }, { "epoch": 12.04, "grad_norm": 0.75390625, "learning_rate": 0.0003263935244639259, "loss": 0.1728, "step": 290610 }, { "epoch": 12.04, "grad_norm": 0.53515625, "learning_rate": 0.00032638319794134015, "loss": 0.1757, "step": 290620 }, { "epoch": 12.04, "grad_norm": 0.46875, "learning_rate": 0.00032637287127500956, "loss": 0.2147, "step": 290630 }, { "epoch": 12.04, "grad_norm": 0.8125, "learning_rate": 0.0003263625444649534, "loss": 0.2034, "step": 290640 }, { "epoch": 12.04, "grad_norm": 1.3046875, "learning_rate": 0.0003263522175111912, "loss": 0.2083, "step": 290650 }, { "epoch": 12.04, "grad_norm": 0.94921875, "learning_rate": 0.0003263418904137424, "loss": 0.2319, "step": 290660 }, { "epoch": 12.04, "grad_norm": 0.671875, "learning_rate": 0.00032633156317262633, "loss": 0.182, "step": 290670 }, { "epoch": 12.04, "grad_norm": 0.66015625, "learning_rate": 0.0003263212357878626, "loss": 0.1887, "step": 290680 }, { "epoch": 12.04, "grad_norm": 0.8203125, "learning_rate": 0.00032631090825947047, "loss": 0.1548, "step": 290690 }, { "epoch": 12.04, "grad_norm": 1.1640625, "learning_rate": 0.0003263005805874695, "loss": 0.2415, "step": 290700 }, { "epoch": 12.04, "grad_norm": 1.203125, "learning_rate": 0.0003262902527718791, "loss": 0.1944, "step": 290710 }, { "epoch": 12.04, "grad_norm": 0.7109375, "learning_rate": 0.0003262799248127186, "loss": 0.1859, "step": 290720 }, { "epoch": 12.04, "grad_norm": 0.91796875, "learning_rate": 0.00032626959671000754, "loss": 0.1539, "step": 290730 }, { "epoch": 12.04, "grad_norm": 1.6171875, "learning_rate": 0.0003262592684637654, "loss": 0.2004, "step": 290740 }, { "epoch": 12.04, "grad_norm": 0.78125, "learning_rate": 0.0003262489400740115, "loss": 0.2147, "step": 290750 }, { "epoch": 12.04, "grad_norm": 1.375, "learning_rate": 0.00032623861154076536, "loss": 0.1936, "step": 290760 }, { "epoch": 12.04, "grad_norm": 0.455078125, "learning_rate": 0.00032622828286404633, "loss": 0.1964, "step": 290770 }, { "epoch": 12.04, "grad_norm": 1.0078125, "learning_rate": 0.0003262179540438739, "loss": 0.2135, "step": 290780 }, { "epoch": 12.04, "grad_norm": 0.95703125, "learning_rate": 0.00032620762508026756, "loss": 0.2062, "step": 290790 }, { "epoch": 12.04, "grad_norm": 1.4296875, "learning_rate": 0.00032619729597324664, "loss": 0.2143, "step": 290800 }, { "epoch": 12.05, "grad_norm": 1.421875, "learning_rate": 0.00032618696672283076, "loss": 0.2066, "step": 290810 }, { "epoch": 12.05, "grad_norm": 0.34765625, "learning_rate": 0.00032617663732903917, "loss": 0.1634, "step": 290820 }, { "epoch": 12.05, "grad_norm": 0.369140625, "learning_rate": 0.00032616630779189133, "loss": 0.1785, "step": 290830 }, { "epoch": 12.05, "grad_norm": 0.291015625, "learning_rate": 0.00032615597811140677, "loss": 0.1648, "step": 290840 }, { "epoch": 12.05, "grad_norm": 0.40625, "learning_rate": 0.0003261456482876049, "loss": 0.2033, "step": 290850 }, { "epoch": 12.05, "grad_norm": 0.7578125, "learning_rate": 0.00032613531832050507, "loss": 0.1219, "step": 290860 }, { "epoch": 12.05, "grad_norm": 0.193359375, "learning_rate": 0.0003261249882101269, "loss": 0.1662, "step": 290870 }, { "epoch": 12.05, "grad_norm": 0.625, "learning_rate": 0.0003261146579564896, "loss": 0.1354, "step": 290880 }, { "epoch": 12.05, "grad_norm": 1.1328125, "learning_rate": 0.0003261043275596128, "loss": 0.1627, "step": 290890 }, { "epoch": 12.05, "grad_norm": 0.83984375, "learning_rate": 0.0003260939970195159, "loss": 0.1326, "step": 290900 }, { "epoch": 12.05, "grad_norm": 0.4765625, "learning_rate": 0.00032608366633621826, "loss": 0.1422, "step": 290910 }, { "epoch": 12.05, "grad_norm": 1.015625, "learning_rate": 0.0003260733355097394, "loss": 0.175, "step": 290920 }, { "epoch": 12.05, "grad_norm": 1.5234375, "learning_rate": 0.00032606300454009874, "loss": 0.2131, "step": 290930 }, { "epoch": 12.05, "grad_norm": 1.234375, "learning_rate": 0.0003260526734273157, "loss": 0.1923, "step": 290940 }, { "epoch": 12.05, "grad_norm": 0.373046875, "learning_rate": 0.0003260423421714098, "loss": 0.1594, "step": 290950 }, { "epoch": 12.05, "grad_norm": 1.2109375, "learning_rate": 0.00032603201077240036, "loss": 0.1577, "step": 290960 }, { "epoch": 12.05, "grad_norm": 0.9296875, "learning_rate": 0.00032602167923030694, "loss": 0.1758, "step": 290970 }, { "epoch": 12.05, "grad_norm": 0.59375, "learning_rate": 0.0003260113475451489, "loss": 0.201, "step": 290980 }, { "epoch": 12.05, "grad_norm": 0.5859375, "learning_rate": 0.0003260010157169457, "loss": 0.1983, "step": 290990 }, { "epoch": 12.05, "grad_norm": 4.78125, "learning_rate": 0.00032599068374571685, "loss": 0.2055, "step": 291000 }, { "epoch": 12.05, "grad_norm": 1.0859375, "learning_rate": 0.00032598035163148166, "loss": 0.2095, "step": 291010 }, { "epoch": 12.05, "grad_norm": 0.87890625, "learning_rate": 0.0003259700193742597, "loss": 0.2199, "step": 291020 }, { "epoch": 12.05, "grad_norm": 2.265625, "learning_rate": 0.0003259596869740704, "loss": 0.19, "step": 291030 }, { "epoch": 12.05, "grad_norm": 0.96484375, "learning_rate": 0.00032594935443093303, "loss": 0.2066, "step": 291040 }, { "epoch": 12.06, "grad_norm": 1.1015625, "learning_rate": 0.0003259390217448673, "loss": 0.2051, "step": 291050 }, { "epoch": 12.06, "grad_norm": 0.7421875, "learning_rate": 0.00032592868891589255, "loss": 0.2519, "step": 291060 }, { "epoch": 12.06, "grad_norm": 1.3046875, "learning_rate": 0.0003259183559440281, "loss": 0.2104, "step": 291070 }, { "epoch": 12.06, "grad_norm": 0.91796875, "learning_rate": 0.00032590802282929366, "loss": 0.1884, "step": 291080 }, { "epoch": 12.06, "grad_norm": 0.87109375, "learning_rate": 0.0003258976895717084, "loss": 0.2289, "step": 291090 }, { "epoch": 12.06, "grad_norm": 0.55859375, "learning_rate": 0.0003258873561712919, "loss": 0.1861, "step": 291100 }, { "epoch": 12.06, "grad_norm": 0.3046875, "learning_rate": 0.00032587702262806356, "loss": 0.1863, "step": 291110 }, { "epoch": 12.06, "grad_norm": 0.8828125, "learning_rate": 0.0003258666889420429, "loss": 0.2048, "step": 291120 }, { "epoch": 12.06, "grad_norm": 1.046875, "learning_rate": 0.0003258563551132494, "loss": 0.2259, "step": 291130 }, { "epoch": 12.06, "grad_norm": 0.92578125, "learning_rate": 0.0003258460211417023, "loss": 0.1812, "step": 291140 }, { "epoch": 12.06, "grad_norm": 0.79296875, "learning_rate": 0.00032583568702742124, "loss": 0.2362, "step": 291150 }, { "epoch": 12.06, "grad_norm": 0.859375, "learning_rate": 0.00032582535277042563, "loss": 0.2422, "step": 291160 }, { "epoch": 12.06, "grad_norm": 1.109375, "learning_rate": 0.00032581501837073486, "loss": 0.1745, "step": 291170 }, { "epoch": 12.06, "grad_norm": 0.87109375, "learning_rate": 0.0003258046838283684, "loss": 0.1584, "step": 291180 }, { "epoch": 12.06, "grad_norm": 0.2353515625, "learning_rate": 0.0003257943491433457, "loss": 0.1953, "step": 291190 }, { "epoch": 12.06, "grad_norm": 0.7265625, "learning_rate": 0.00032578401431568625, "loss": 0.2028, "step": 291200 }, { "epoch": 12.06, "grad_norm": 0.73046875, "learning_rate": 0.00032577367934540953, "loss": 0.1989, "step": 291210 }, { "epoch": 12.06, "grad_norm": 1.3671875, "learning_rate": 0.00032576334423253486, "loss": 0.2555, "step": 291220 }, { "epoch": 12.06, "grad_norm": 1.7109375, "learning_rate": 0.0003257530089770817, "loss": 0.2057, "step": 291230 }, { "epoch": 12.06, "grad_norm": 0.5, "learning_rate": 0.0003257426735790697, "loss": 0.1992, "step": 291240 }, { "epoch": 12.06, "grad_norm": 0.408203125, "learning_rate": 0.0003257323380385181, "loss": 0.2227, "step": 291250 }, { "epoch": 12.06, "grad_norm": 0.46875, "learning_rate": 0.0003257220023554464, "loss": 0.2202, "step": 291260 }, { "epoch": 12.06, "grad_norm": 0.49609375, "learning_rate": 0.00032571166652987406, "loss": 0.1935, "step": 291270 }, { "epoch": 12.06, "grad_norm": 0.6875, "learning_rate": 0.0003257013305618205, "loss": 0.1998, "step": 291280 }, { "epoch": 12.07, "grad_norm": 0.92578125, "learning_rate": 0.0003256909944513053, "loss": 0.2335, "step": 291290 }, { "epoch": 12.07, "grad_norm": 1.3828125, "learning_rate": 0.0003256806581983478, "loss": 0.1859, "step": 291300 }, { "epoch": 12.07, "grad_norm": 1.9765625, "learning_rate": 0.0003256703218029675, "loss": 0.1809, "step": 291310 }, { "epoch": 12.07, "grad_norm": 0.62109375, "learning_rate": 0.00032565998526518374, "loss": 0.1647, "step": 291320 }, { "epoch": 12.07, "grad_norm": 0.98828125, "learning_rate": 0.0003256496485850161, "loss": 0.1563, "step": 291330 }, { "epoch": 12.07, "grad_norm": 1.40625, "learning_rate": 0.000325639311762484, "loss": 0.2055, "step": 291340 }, { "epoch": 12.07, "grad_norm": 0.53125, "learning_rate": 0.0003256289747976069, "loss": 0.1698, "step": 291350 }, { "epoch": 12.07, "grad_norm": 0.80859375, "learning_rate": 0.00032561863769040424, "loss": 0.1987, "step": 291360 }, { "epoch": 12.07, "grad_norm": 0.64453125, "learning_rate": 0.00032560830044089543, "loss": 0.1888, "step": 291370 }, { "epoch": 12.07, "grad_norm": 1.84375, "learning_rate": 0.0003255979630491, "loss": 0.2115, "step": 291380 }, { "epoch": 12.07, "grad_norm": 0.91796875, "learning_rate": 0.00032558762551503735, "loss": 0.148, "step": 291390 }, { "epoch": 12.07, "grad_norm": 0.6171875, "learning_rate": 0.00032557728783872694, "loss": 0.1541, "step": 291400 }, { "epoch": 12.07, "grad_norm": 0.9921875, "learning_rate": 0.0003255669500201883, "loss": 0.2216, "step": 291410 }, { "epoch": 12.07, "grad_norm": 0.640625, "learning_rate": 0.00032555661205944074, "loss": 0.1601, "step": 291420 }, { "epoch": 12.07, "grad_norm": 0.89453125, "learning_rate": 0.0003255462739565038, "loss": 0.227, "step": 291430 }, { "epoch": 12.07, "grad_norm": 1.0, "learning_rate": 0.00032553593571139694, "loss": 0.154, "step": 291440 }, { "epoch": 12.07, "grad_norm": 1.7265625, "learning_rate": 0.00032552559732413965, "loss": 0.1776, "step": 291450 }, { "epoch": 12.07, "grad_norm": 0.2236328125, "learning_rate": 0.0003255152587947513, "loss": 0.1938, "step": 291460 }, { "epoch": 12.07, "grad_norm": 1.7421875, "learning_rate": 0.0003255049201232514, "loss": 0.1622, "step": 291470 }, { "epoch": 12.07, "grad_norm": 0.8671875, "learning_rate": 0.0003254945813096594, "loss": 0.1974, "step": 291480 }, { "epoch": 12.07, "grad_norm": 1.1953125, "learning_rate": 0.00032548424235399474, "loss": 0.2106, "step": 291490 }, { "epoch": 12.07, "grad_norm": 1.109375, "learning_rate": 0.0003254739032562769, "loss": 0.219, "step": 291500 }, { "epoch": 12.07, "grad_norm": 0.7734375, "learning_rate": 0.00032546356401652534, "loss": 0.2102, "step": 291510 }, { "epoch": 12.07, "grad_norm": 0.80859375, "learning_rate": 0.0003254532246347595, "loss": 0.2396, "step": 291520 }, { "epoch": 12.08, "grad_norm": 1.296875, "learning_rate": 0.0003254428851109987, "loss": 0.1902, "step": 291530 }, { "epoch": 12.08, "grad_norm": 2.28125, "learning_rate": 0.00032543254544526275, "loss": 0.2198, "step": 291540 }, { "epoch": 12.08, "grad_norm": 1.0234375, "learning_rate": 0.0003254222056375708, "loss": 0.1826, "step": 291550 }, { "epoch": 12.08, "grad_norm": 1.6484375, "learning_rate": 0.0003254118656879424, "loss": 0.1977, "step": 291560 }, { "epoch": 12.08, "grad_norm": 0.77734375, "learning_rate": 0.00032540152559639704, "loss": 0.2411, "step": 291570 }, { "epoch": 12.08, "grad_norm": 0.46484375, "learning_rate": 0.0003253911853629541, "loss": 0.139, "step": 291580 }, { "epoch": 12.08, "grad_norm": 0.578125, "learning_rate": 0.0003253808449876331, "loss": 0.1507, "step": 291590 }, { "epoch": 12.08, "grad_norm": 1.125, "learning_rate": 0.00032537050447045354, "loss": 0.1804, "step": 291600 }, { "epoch": 12.08, "grad_norm": 0.55078125, "learning_rate": 0.00032536016381143476, "loss": 0.1609, "step": 291610 }, { "epoch": 12.08, "grad_norm": 1.5859375, "learning_rate": 0.00032534982301059636, "loss": 0.1486, "step": 291620 }, { "epoch": 12.08, "grad_norm": 0.921875, "learning_rate": 0.0003253394820679577, "loss": 0.1888, "step": 291630 }, { "epoch": 12.08, "grad_norm": 0.8125, "learning_rate": 0.0003253291409835383, "loss": 0.2015, "step": 291640 }, { "epoch": 12.08, "grad_norm": 0.84375, "learning_rate": 0.00032531879975735757, "loss": 0.2152, "step": 291650 }, { "epoch": 12.08, "grad_norm": 0.55859375, "learning_rate": 0.000325308458389435, "loss": 0.1187, "step": 291660 }, { "epoch": 12.08, "grad_norm": 0.8671875, "learning_rate": 0.00032529811687979, "loss": 0.1946, "step": 291670 }, { "epoch": 12.08, "grad_norm": 0.6015625, "learning_rate": 0.0003252877752284421, "loss": 0.2077, "step": 291680 }, { "epoch": 12.08, "grad_norm": 0.8203125, "learning_rate": 0.00032527743343541073, "loss": 0.2012, "step": 291690 }, { "epoch": 12.08, "grad_norm": 1.15625, "learning_rate": 0.0003252670915007154, "loss": 0.1893, "step": 291700 }, { "epoch": 12.08, "grad_norm": 1.25, "learning_rate": 0.0003252567494243755, "loss": 0.22, "step": 291710 }, { "epoch": 12.08, "grad_norm": 0.423828125, "learning_rate": 0.00032524640720641053, "loss": 0.169, "step": 291720 }, { "epoch": 12.08, "grad_norm": 0.8203125, "learning_rate": 0.00032523606484684, "loss": 0.1983, "step": 291730 }, { "epoch": 12.08, "grad_norm": 0.59765625, "learning_rate": 0.00032522572234568316, "loss": 0.1779, "step": 291740 }, { "epoch": 12.08, "grad_norm": 1.1328125, "learning_rate": 0.0003252153797029598, "loss": 0.2356, "step": 291750 }, { "epoch": 12.08, "grad_norm": 0.76953125, "learning_rate": 0.00032520503691868915, "loss": 0.1837, "step": 291760 }, { "epoch": 12.09, "grad_norm": 0.92578125, "learning_rate": 0.0003251946939928907, "loss": 0.1501, "step": 291770 }, { "epoch": 12.09, "grad_norm": 0.7734375, "learning_rate": 0.00032518435092558407, "loss": 0.1901, "step": 291780 }, { "epoch": 12.09, "grad_norm": 1.015625, "learning_rate": 0.0003251740077167885, "loss": 0.2066, "step": 291790 }, { "epoch": 12.09, "grad_norm": 1.140625, "learning_rate": 0.0003251636643665236, "loss": 0.2295, "step": 291800 }, { "epoch": 12.09, "grad_norm": 0.70703125, "learning_rate": 0.0003251533208748089, "loss": 0.1771, "step": 291810 }, { "epoch": 12.09, "grad_norm": 0.77734375, "learning_rate": 0.00032514297724166357, "loss": 0.2416, "step": 291820 }, { "epoch": 12.09, "grad_norm": 0.419921875, "learning_rate": 0.00032513263346710744, "loss": 0.226, "step": 291830 }, { "epoch": 12.09, "grad_norm": 0.703125, "learning_rate": 0.00032512228955115975, "loss": 0.1951, "step": 291840 }, { "epoch": 12.09, "grad_norm": 0.8671875, "learning_rate": 0.00032511194549384, "loss": 0.1782, "step": 291850 }, { "epoch": 12.09, "grad_norm": 0.57421875, "learning_rate": 0.00032510160129516774, "loss": 0.1991, "step": 291860 }, { "epoch": 12.09, "grad_norm": 1.015625, "learning_rate": 0.0003250912569551623, "loss": 0.1589, "step": 291870 }, { "epoch": 12.09, "grad_norm": 1.2578125, "learning_rate": 0.0003250809124738433, "loss": 0.1694, "step": 291880 }, { "epoch": 12.09, "grad_norm": 1.375, "learning_rate": 0.0003250705678512301, "loss": 0.2277, "step": 291890 }, { "epoch": 12.09, "grad_norm": 0.89453125, "learning_rate": 0.0003250602230873421, "loss": 0.1875, "step": 291900 }, { "epoch": 12.09, "grad_norm": 0.6640625, "learning_rate": 0.00032504987818219905, "loss": 0.1868, "step": 291910 }, { "epoch": 12.09, "grad_norm": 0.8671875, "learning_rate": 0.0003250395331358201, "loss": 0.1831, "step": 291920 }, { "epoch": 12.09, "grad_norm": 0.6875, "learning_rate": 0.0003250291879482249, "loss": 0.19, "step": 291930 }, { "epoch": 12.09, "grad_norm": 0.9921875, "learning_rate": 0.00032501884261943294, "loss": 0.2254, "step": 291940 }, { "epoch": 12.09, "grad_norm": 0.83203125, "learning_rate": 0.00032500849714946347, "loss": 0.2149, "step": 291950 }, { "epoch": 12.09, "grad_norm": 2.484375, "learning_rate": 0.0003249981515383362, "loss": 0.1974, "step": 291960 }, { "epoch": 12.09, "grad_norm": 1.5625, "learning_rate": 0.0003249878057860705, "loss": 0.1553, "step": 291970 }, { "epoch": 12.09, "grad_norm": 0.54296875, "learning_rate": 0.0003249774598926858, "loss": 0.2016, "step": 291980 }, { "epoch": 12.09, "grad_norm": 1.1328125, "learning_rate": 0.0003249671138582017, "loss": 0.217, "step": 291990 }, { "epoch": 12.09, "grad_norm": 0.7578125, "learning_rate": 0.0003249567676826375, "loss": 0.22, "step": 292000 }, { "epoch": 12.1, "grad_norm": 1.0234375, "learning_rate": 0.00032494642136601283, "loss": 0.2204, "step": 292010 }, { "epoch": 12.1, "grad_norm": 0.55859375, "learning_rate": 0.0003249360749083471, "loss": 0.2064, "step": 292020 }, { "epoch": 12.1, "grad_norm": 0.2490234375, "learning_rate": 0.0003249257283096596, "loss": 0.1686, "step": 292030 }, { "epoch": 12.1, "grad_norm": 1.6640625, "learning_rate": 0.0003249153815699702, "loss": 0.2741, "step": 292040 }, { "epoch": 12.1, "grad_norm": 1.3984375, "learning_rate": 0.00032490503468929804, "loss": 0.2177, "step": 292050 }, { "epoch": 12.1, "grad_norm": 0.98046875, "learning_rate": 0.00032489468766766266, "loss": 0.2014, "step": 292060 }, { "epoch": 12.1, "grad_norm": 0.6796875, "learning_rate": 0.00032488434050508366, "loss": 0.1768, "step": 292070 }, { "epoch": 12.1, "grad_norm": 0.95703125, "learning_rate": 0.00032487399320158027, "loss": 0.1562, "step": 292080 }, { "epoch": 12.1, "grad_norm": 0.412109375, "learning_rate": 0.0003248636457571722, "loss": 0.2148, "step": 292090 }, { "epoch": 12.1, "grad_norm": 0.9375, "learning_rate": 0.0003248532981718789, "loss": 0.1754, "step": 292100 }, { "epoch": 12.1, "grad_norm": 2.265625, "learning_rate": 0.0003248429504457197, "loss": 0.2409, "step": 292110 }, { "epoch": 12.1, "grad_norm": 1.125, "learning_rate": 0.0003248326025787142, "loss": 0.2295, "step": 292120 }, { "epoch": 12.1, "grad_norm": 0.91796875, "learning_rate": 0.00032482225457088174, "loss": 0.1707, "step": 292130 }, { "epoch": 12.1, "grad_norm": 1.4453125, "learning_rate": 0.0003248119064222419, "loss": 0.1963, "step": 292140 }, { "epoch": 12.1, "grad_norm": 0.6875, "learning_rate": 0.0003248015581328142, "loss": 0.2409, "step": 292150 }, { "epoch": 12.1, "grad_norm": 0.5859375, "learning_rate": 0.00032479120970261796, "loss": 0.2116, "step": 292160 }, { "epoch": 12.1, "grad_norm": 0.71875, "learning_rate": 0.00032478086113167274, "loss": 0.1714, "step": 292170 }, { "epoch": 12.1, "grad_norm": 0.66796875, "learning_rate": 0.0003247705124199981, "loss": 0.1997, "step": 292180 }, { "epoch": 12.1, "grad_norm": 0.8359375, "learning_rate": 0.00032476016356761334, "loss": 0.1447, "step": 292190 }, { "epoch": 12.1, "grad_norm": 0.875, "learning_rate": 0.00032474981457453813, "loss": 0.1976, "step": 292200 }, { "epoch": 12.1, "grad_norm": 0.95703125, "learning_rate": 0.0003247394654407917, "loss": 0.2036, "step": 292210 }, { "epoch": 12.1, "grad_norm": 0.5078125, "learning_rate": 0.0003247291161663938, "loss": 0.1882, "step": 292220 }, { "epoch": 12.1, "grad_norm": 1.203125, "learning_rate": 0.00032471876675136367, "loss": 0.1717, "step": 292230 }, { "epoch": 12.1, "grad_norm": 1.0859375, "learning_rate": 0.000324708417195721, "loss": 0.196, "step": 292240 }, { "epoch": 12.1, "grad_norm": 0.416015625, "learning_rate": 0.00032469806749948506, "loss": 0.2127, "step": 292250 }, { "epoch": 12.11, "grad_norm": 0.95703125, "learning_rate": 0.0003246877176626755, "loss": 0.1658, "step": 292260 }, { "epoch": 12.11, "grad_norm": 0.8984375, "learning_rate": 0.0003246773676853116, "loss": 0.2056, "step": 292270 }, { "epoch": 12.11, "grad_norm": 0.5703125, "learning_rate": 0.0003246670175674131, "loss": 0.2118, "step": 292280 }, { "epoch": 12.11, "grad_norm": 0.984375, "learning_rate": 0.0003246566673089992, "loss": 0.2019, "step": 292290 }, { "epoch": 12.11, "grad_norm": 0.90625, "learning_rate": 0.00032464631691008964, "loss": 0.1994, "step": 292300 }, { "epoch": 12.11, "grad_norm": 1.6328125, "learning_rate": 0.00032463596637070377, "loss": 0.1943, "step": 292310 }, { "epoch": 12.11, "grad_norm": 1.078125, "learning_rate": 0.00032462561569086097, "loss": 0.2021, "step": 292320 }, { "epoch": 12.11, "grad_norm": 0.58203125, "learning_rate": 0.00032461526487058087, "loss": 0.1534, "step": 292330 }, { "epoch": 12.11, "grad_norm": 0.291015625, "learning_rate": 0.0003246049139098829, "loss": 0.1867, "step": 292340 }, { "epoch": 12.11, "grad_norm": 0.796875, "learning_rate": 0.00032459456280878654, "loss": 0.2279, "step": 292350 }, { "epoch": 12.11, "grad_norm": 0.92578125, "learning_rate": 0.00032458421156731124, "loss": 0.2833, "step": 292360 }, { "epoch": 12.11, "grad_norm": 0.80078125, "learning_rate": 0.0003245738601854766, "loss": 0.2424, "step": 292370 }, { "epoch": 12.11, "grad_norm": 0.93359375, "learning_rate": 0.00032456350866330184, "loss": 0.2116, "step": 292380 }, { "epoch": 12.11, "grad_norm": 0.55859375, "learning_rate": 0.00032455315700080677, "loss": 0.2074, "step": 292390 }, { "epoch": 12.11, "grad_norm": 0.87890625, "learning_rate": 0.0003245428051980107, "loss": 0.1803, "step": 292400 }, { "epoch": 12.11, "grad_norm": 1.5390625, "learning_rate": 0.00032453245325493303, "loss": 0.1921, "step": 292410 }, { "epoch": 12.11, "grad_norm": 0.6484375, "learning_rate": 0.0003245221011715934, "loss": 0.2196, "step": 292420 }, { "epoch": 12.11, "grad_norm": 0.703125, "learning_rate": 0.0003245117489480112, "loss": 0.2398, "step": 292430 }, { "epoch": 12.11, "grad_norm": 0.5625, "learning_rate": 0.00032450139658420595, "loss": 0.1475, "step": 292440 }, { "epoch": 12.11, "grad_norm": 0.625, "learning_rate": 0.00032449104408019706, "loss": 0.1926, "step": 292450 }, { "epoch": 12.11, "grad_norm": 0.51171875, "learning_rate": 0.00032448069143600414, "loss": 0.2192, "step": 292460 }, { "epoch": 12.11, "grad_norm": 0.6796875, "learning_rate": 0.0003244703386516466, "loss": 0.196, "step": 292470 }, { "epoch": 12.11, "grad_norm": 0.2890625, "learning_rate": 0.0003244599857271439, "loss": 0.1563, "step": 292480 }, { "epoch": 12.11, "grad_norm": 0.62109375, "learning_rate": 0.00032444963266251546, "loss": 0.202, "step": 292490 }, { "epoch": 12.12, "grad_norm": 0.7578125, "learning_rate": 0.00032443927945778096, "loss": 0.204, "step": 292500 }, { "epoch": 12.12, "grad_norm": 1.09375, "learning_rate": 0.00032442892611295974, "loss": 0.2069, "step": 292510 }, { "epoch": 12.12, "grad_norm": 0.8984375, "learning_rate": 0.0003244185726280714, "loss": 0.1927, "step": 292520 }, { "epoch": 12.12, "grad_norm": 0.8515625, "learning_rate": 0.0003244082190031352, "loss": 0.1949, "step": 292530 }, { "epoch": 12.12, "grad_norm": 0.4609375, "learning_rate": 0.00032439786523817086, "loss": 0.1972, "step": 292540 }, { "epoch": 12.12, "grad_norm": 0.7421875, "learning_rate": 0.00032438751133319777, "loss": 0.2314, "step": 292550 }, { "epoch": 12.12, "grad_norm": 1.3984375, "learning_rate": 0.00032437715728823543, "loss": 0.2045, "step": 292560 }, { "epoch": 12.12, "grad_norm": 0.53515625, "learning_rate": 0.00032436680310330324, "loss": 0.2498, "step": 292570 }, { "epoch": 12.12, "grad_norm": 1.078125, "learning_rate": 0.0003243564487784208, "loss": 0.1922, "step": 292580 }, { "epoch": 12.12, "grad_norm": 0.87890625, "learning_rate": 0.00032434609431360753, "loss": 0.2051, "step": 292590 }, { "epoch": 12.12, "grad_norm": 1.0234375, "learning_rate": 0.000324335739708883, "loss": 0.2095, "step": 292600 }, { "epoch": 12.12, "grad_norm": 0.482421875, "learning_rate": 0.00032432538496426656, "loss": 0.1913, "step": 292610 }, { "epoch": 12.12, "grad_norm": 0.5078125, "learning_rate": 0.0003243150300797778, "loss": 0.1796, "step": 292620 }, { "epoch": 12.12, "grad_norm": 1.265625, "learning_rate": 0.0003243046750554362, "loss": 0.1753, "step": 292630 }, { "epoch": 12.12, "grad_norm": 0.71875, "learning_rate": 0.00032429431989126125, "loss": 0.2067, "step": 292640 }, { "epoch": 12.12, "grad_norm": 0.93359375, "learning_rate": 0.0003242839645872724, "loss": 0.2151, "step": 292650 }, { "epoch": 12.12, "grad_norm": 1.40625, "learning_rate": 0.0003242736091434891, "loss": 0.2091, "step": 292660 }, { "epoch": 12.12, "grad_norm": 0.80078125, "learning_rate": 0.00032426325355993085, "loss": 0.1952, "step": 292670 }, { "epoch": 12.12, "grad_norm": 0.79296875, "learning_rate": 0.00032425289783661725, "loss": 0.1938, "step": 292680 }, { "epoch": 12.12, "grad_norm": 0.3984375, "learning_rate": 0.0003242425419735677, "loss": 0.1743, "step": 292690 }, { "epoch": 12.12, "grad_norm": 0.53515625, "learning_rate": 0.00032423218597080173, "loss": 0.2221, "step": 292700 }, { "epoch": 12.12, "grad_norm": 0.859375, "learning_rate": 0.00032422182982833883, "loss": 0.2241, "step": 292710 }, { "epoch": 12.12, "grad_norm": 0.796875, "learning_rate": 0.0003242114735461983, "loss": 0.1659, "step": 292720 }, { "epoch": 12.12, "grad_norm": 0.74609375, "learning_rate": 0.0003242011171244, "loss": 0.2074, "step": 292730 }, { "epoch": 12.13, "grad_norm": 1.78125, "learning_rate": 0.0003241907605629631, "loss": 0.1812, "step": 292740 }, { "epoch": 12.13, "grad_norm": 0.953125, "learning_rate": 0.00032418040386190716, "loss": 0.2064, "step": 292750 }, { "epoch": 12.13, "grad_norm": 0.88671875, "learning_rate": 0.00032417004702125184, "loss": 0.1705, "step": 292760 }, { "epoch": 12.13, "grad_norm": 0.61328125, "learning_rate": 0.00032415969004101643, "loss": 0.2439, "step": 292770 }, { "epoch": 12.13, "grad_norm": 0.86328125, "learning_rate": 0.00032414933292122046, "loss": 0.2109, "step": 292780 }, { "epoch": 12.13, "grad_norm": 0.62890625, "learning_rate": 0.0003241389756618835, "loss": 0.2091, "step": 292790 }, { "epoch": 12.13, "grad_norm": 0.984375, "learning_rate": 0.00032412861826302495, "loss": 0.1991, "step": 292800 }, { "epoch": 12.13, "grad_norm": 0.59765625, "learning_rate": 0.0003241182607246644, "loss": 0.1885, "step": 292810 }, { "epoch": 12.13, "grad_norm": 1.3515625, "learning_rate": 0.00032410790304682126, "loss": 0.1914, "step": 292820 }, { "epoch": 12.13, "grad_norm": 0.69140625, "learning_rate": 0.000324097545229515, "loss": 0.1754, "step": 292830 }, { "epoch": 12.13, "grad_norm": 0.66015625, "learning_rate": 0.0003240871872727652, "loss": 0.2044, "step": 292840 }, { "epoch": 12.13, "grad_norm": 0.5078125, "learning_rate": 0.0003240768291765914, "loss": 0.2069, "step": 292850 }, { "epoch": 12.13, "grad_norm": 0.58984375, "learning_rate": 0.00032406647094101294, "loss": 0.1866, "step": 292860 }, { "epoch": 12.13, "grad_norm": 1.2578125, "learning_rate": 0.00032405611256604934, "loss": 0.1995, "step": 292870 }, { "epoch": 12.13, "grad_norm": 1.0234375, "learning_rate": 0.00032404575405172017, "loss": 0.1866, "step": 292880 }, { "epoch": 12.13, "grad_norm": 0.85546875, "learning_rate": 0.0003240353953980449, "loss": 0.1962, "step": 292890 }, { "epoch": 12.13, "grad_norm": 0.73828125, "learning_rate": 0.000324025036605043, "loss": 0.2014, "step": 292900 }, { "epoch": 12.13, "grad_norm": 0.412109375, "learning_rate": 0.000324014677672734, "loss": 0.1785, "step": 292910 }, { "epoch": 12.13, "grad_norm": 0.73828125, "learning_rate": 0.0003240043186011372, "loss": 0.1765, "step": 292920 }, { "epoch": 12.13, "grad_norm": 0.79296875, "learning_rate": 0.0003239939593902725, "loss": 0.1865, "step": 292930 }, { "epoch": 12.13, "grad_norm": 0.447265625, "learning_rate": 0.000323983600040159, "loss": 0.1833, "step": 292940 }, { "epoch": 12.13, "grad_norm": 0.92578125, "learning_rate": 0.00032397324055081643, "loss": 0.1621, "step": 292950 }, { "epoch": 12.13, "grad_norm": 1.171875, "learning_rate": 0.00032396288092226424, "loss": 0.1728, "step": 292960 }, { "epoch": 12.13, "grad_norm": 0.60546875, "learning_rate": 0.00032395252115452185, "loss": 0.1706, "step": 292970 }, { "epoch": 12.14, "grad_norm": 0.77734375, "learning_rate": 0.00032394216124760877, "loss": 0.2432, "step": 292980 }, { "epoch": 12.14, "grad_norm": 0.35546875, "learning_rate": 0.0003239318012015446, "loss": 0.1629, "step": 292990 }, { "epoch": 12.14, "grad_norm": 0.8046875, "learning_rate": 0.0003239214410163487, "loss": 0.1744, "step": 293000 }, { "epoch": 12.14, "grad_norm": 0.671875, "learning_rate": 0.0003239110806920407, "loss": 0.1426, "step": 293010 }, { "epoch": 12.14, "grad_norm": 1.5, "learning_rate": 0.0003239007202286399, "loss": 0.1748, "step": 293020 }, { "epoch": 12.14, "grad_norm": 0.435546875, "learning_rate": 0.00032389035962616605, "loss": 0.1951, "step": 293030 }, { "epoch": 12.14, "grad_norm": 0.5234375, "learning_rate": 0.0003238799988846385, "loss": 0.2185, "step": 293040 }, { "epoch": 12.14, "grad_norm": 0.6953125, "learning_rate": 0.0003238696380040767, "loss": 0.2414, "step": 293050 }, { "epoch": 12.14, "grad_norm": 0.67578125, "learning_rate": 0.0003238592769845003, "loss": 0.1693, "step": 293060 }, { "epoch": 12.14, "grad_norm": 0.91015625, "learning_rate": 0.00032384891582592866, "loss": 0.2171, "step": 293070 }, { "epoch": 12.14, "grad_norm": 0.69140625, "learning_rate": 0.00032383855452838134, "loss": 0.2016, "step": 293080 }, { "epoch": 12.14, "grad_norm": 0.81640625, "learning_rate": 0.00032382819309187795, "loss": 0.2281, "step": 293090 }, { "epoch": 12.14, "grad_norm": 0.62890625, "learning_rate": 0.00032381783151643774, "loss": 0.1796, "step": 293100 }, { "epoch": 12.14, "grad_norm": 0.0, "learning_rate": 0.0003238074698020804, "loss": 0.2109, "step": 293110 }, { "epoch": 12.14, "grad_norm": 0.89453125, "learning_rate": 0.00032379710794882535, "loss": 0.2282, "step": 293120 }, { "epoch": 12.14, "grad_norm": 0.7578125, "learning_rate": 0.00032378674595669204, "loss": 0.2227, "step": 293130 }, { "epoch": 12.14, "grad_norm": 0.6796875, "learning_rate": 0.0003237763838257002, "loss": 0.1946, "step": 293140 }, { "epoch": 12.14, "grad_norm": 0.48828125, "learning_rate": 0.00032376602155586903, "loss": 0.1527, "step": 293150 }, { "epoch": 12.14, "grad_norm": 0.7890625, "learning_rate": 0.00032375565914721826, "loss": 0.1881, "step": 293160 }, { "epoch": 12.14, "grad_norm": 0.71875, "learning_rate": 0.00032374529659976733, "loss": 0.1628, "step": 293170 }, { "epoch": 12.14, "grad_norm": 0.6640625, "learning_rate": 0.00032373493391353565, "loss": 0.1762, "step": 293180 }, { "epoch": 12.14, "grad_norm": 0.8359375, "learning_rate": 0.0003237245710885427, "loss": 0.1832, "step": 293190 }, { "epoch": 12.14, "grad_norm": 0.7890625, "learning_rate": 0.00032371420812480825, "loss": 0.1684, "step": 293200 }, { "epoch": 12.14, "grad_norm": 0.9921875, "learning_rate": 0.0003237038450223515, "loss": 0.1617, "step": 293210 }, { "epoch": 12.15, "grad_norm": 1.6875, "learning_rate": 0.0003236934817811922, "loss": 0.1428, "step": 293220 }, { "epoch": 12.15, "grad_norm": 0.671875, "learning_rate": 0.00032368311840134955, "loss": 0.1521, "step": 293230 }, { "epoch": 12.15, "grad_norm": 0.56640625, "learning_rate": 0.0003236727548828433, "loss": 0.1396, "step": 293240 }, { "epoch": 12.15, "grad_norm": 2.515625, "learning_rate": 0.00032366239122569296, "loss": 0.1776, "step": 293250 }, { "epoch": 12.15, "grad_norm": 0.95703125, "learning_rate": 0.00032365202742991783, "loss": 0.184, "step": 293260 }, { "epoch": 12.15, "grad_norm": 0.546875, "learning_rate": 0.0003236416634955377, "loss": 0.1471, "step": 293270 }, { "epoch": 12.15, "grad_norm": 1.2265625, "learning_rate": 0.00032363129942257176, "loss": 0.1482, "step": 293280 }, { "epoch": 12.15, "grad_norm": 0.77734375, "learning_rate": 0.00032362093521103974, "loss": 0.1729, "step": 293290 }, { "epoch": 12.15, "grad_norm": 0.78515625, "learning_rate": 0.0003236105708609611, "loss": 0.1893, "step": 293300 }, { "epoch": 12.15, "grad_norm": 0.8359375, "learning_rate": 0.0003236002063723552, "loss": 0.1996, "step": 293310 }, { "epoch": 12.15, "grad_norm": 0.640625, "learning_rate": 0.00032358984174524175, "loss": 0.2144, "step": 293320 }, { "epoch": 12.15, "grad_norm": 1.734375, "learning_rate": 0.0003235794769796402, "loss": 0.2444, "step": 293330 }, { "epoch": 12.15, "grad_norm": 0.5859375, "learning_rate": 0.0003235691120755698, "loss": 0.2456, "step": 293340 }, { "epoch": 12.15, "grad_norm": 0.79296875, "learning_rate": 0.00032355874703305054, "loss": 0.1486, "step": 293350 }, { "epoch": 12.15, "grad_norm": 1.1328125, "learning_rate": 0.00032354838185210153, "loss": 0.1886, "step": 293360 }, { "epoch": 12.15, "grad_norm": 1.1484375, "learning_rate": 0.0003235380165327424, "loss": 0.1966, "step": 293370 }, { "epoch": 12.15, "grad_norm": 0.498046875, "learning_rate": 0.00032352765107499274, "loss": 0.2028, "step": 293380 }, { "epoch": 12.15, "grad_norm": 0.263671875, "learning_rate": 0.00032351728547887193, "loss": 0.178, "step": 293390 }, { "epoch": 12.15, "grad_norm": 2.0, "learning_rate": 0.00032350691974439955, "loss": 0.1999, "step": 293400 }, { "epoch": 12.15, "grad_norm": 0.236328125, "learning_rate": 0.0003234965538715951, "loss": 0.1536, "step": 293410 }, { "epoch": 12.15, "grad_norm": 0.84375, "learning_rate": 0.000323486187860478, "loss": 0.1549, "step": 293420 }, { "epoch": 12.15, "grad_norm": 1.328125, "learning_rate": 0.00032347582171106794, "loss": 0.2263, "step": 293430 }, { "epoch": 12.15, "grad_norm": 0.80859375, "learning_rate": 0.0003234654554233842, "loss": 0.2061, "step": 293440 }, { "epoch": 12.15, "grad_norm": 0.84765625, "learning_rate": 0.00032345508899744646, "loss": 0.2257, "step": 293450 }, { "epoch": 12.16, "grad_norm": 2.53125, "learning_rate": 0.0003234447224332742, "loss": 0.1884, "step": 293460 }, { "epoch": 12.16, "grad_norm": 0.3984375, "learning_rate": 0.00032343435573088685, "loss": 0.1771, "step": 293470 }, { "epoch": 12.16, "grad_norm": 1.046875, "learning_rate": 0.000323423988890304, "loss": 0.1553, "step": 293480 }, { "epoch": 12.16, "grad_norm": 0.89453125, "learning_rate": 0.0003234136219115452, "loss": 0.1545, "step": 293490 }, { "epoch": 12.16, "grad_norm": 1.0703125, "learning_rate": 0.0003234032547946297, "loss": 0.1696, "step": 293500 }, { "epoch": 12.16, "grad_norm": 0.546875, "learning_rate": 0.0003233928875395774, "loss": 0.2529, "step": 293510 }, { "epoch": 12.16, "grad_norm": 0.94140625, "learning_rate": 0.00032338252014640754, "loss": 0.171, "step": 293520 }, { "epoch": 12.16, "grad_norm": 1.4765625, "learning_rate": 0.0003233721526151397, "loss": 0.1892, "step": 293530 }, { "epoch": 12.16, "grad_norm": 0.498046875, "learning_rate": 0.00032336178494579346, "loss": 0.2069, "step": 293540 }, { "epoch": 12.16, "grad_norm": 0.81640625, "learning_rate": 0.0003233514171383881, "loss": 0.2299, "step": 293550 }, { "epoch": 12.16, "grad_norm": 0.462890625, "learning_rate": 0.00032334104919294344, "loss": 0.1512, "step": 293560 }, { "epoch": 12.16, "grad_norm": 1.0625, "learning_rate": 0.0003233306811094788, "loss": 0.2119, "step": 293570 }, { "epoch": 12.16, "grad_norm": 0.7265625, "learning_rate": 0.0003233203128880137, "loss": 0.1972, "step": 293580 }, { "epoch": 12.16, "grad_norm": 1.1953125, "learning_rate": 0.00032330994452856775, "loss": 0.2232, "step": 293590 }, { "epoch": 12.16, "grad_norm": 0.88671875, "learning_rate": 0.0003232995760311604, "loss": 0.2106, "step": 293600 }, { "epoch": 12.16, "grad_norm": 0.7734375, "learning_rate": 0.0003232892073958111, "loss": 0.2147, "step": 293610 }, { "epoch": 12.16, "grad_norm": 1.5078125, "learning_rate": 0.0003232788386225395, "loss": 0.2004, "step": 293620 }, { "epoch": 12.16, "grad_norm": 0.703125, "learning_rate": 0.00032326846971136495, "loss": 0.1981, "step": 293630 }, { "epoch": 12.16, "grad_norm": 0.53515625, "learning_rate": 0.00032325810066230714, "loss": 0.2247, "step": 293640 }, { "epoch": 12.16, "grad_norm": 0.267578125, "learning_rate": 0.0003232477314753855, "loss": 0.106, "step": 293650 }, { "epoch": 12.16, "grad_norm": 1.09375, "learning_rate": 0.0003232373621506194, "loss": 0.1898, "step": 293660 }, { "epoch": 12.16, "grad_norm": 0.80078125, "learning_rate": 0.0003232269926880287, "loss": 0.234, "step": 293670 }, { "epoch": 12.16, "grad_norm": 1.0625, "learning_rate": 0.00032321662308763257, "loss": 0.2124, "step": 293680 }, { "epoch": 12.16, "grad_norm": 1.65625, "learning_rate": 0.00032320625334945067, "loss": 0.1666, "step": 293690 }, { "epoch": 12.17, "grad_norm": 0.68359375, "learning_rate": 0.0003231958834735026, "loss": 0.1663, "step": 293700 }, { "epoch": 12.17, "grad_norm": 0.369140625, "learning_rate": 0.00032318551345980763, "loss": 0.1883, "step": 293710 }, { "epoch": 12.17, "grad_norm": 1.5, "learning_rate": 0.00032317514330838554, "loss": 0.2043, "step": 293720 }, { "epoch": 12.17, "grad_norm": 0.1640625, "learning_rate": 0.00032316477301925573, "loss": 0.2379, "step": 293730 }, { "epoch": 12.17, "grad_norm": 1.046875, "learning_rate": 0.0003231544025924376, "loss": 0.2062, "step": 293740 }, { "epoch": 12.17, "grad_norm": 0.494140625, "learning_rate": 0.00032314403202795096, "loss": 0.2012, "step": 293750 }, { "epoch": 12.17, "grad_norm": 0.6015625, "learning_rate": 0.000323133661325815, "loss": 0.1537, "step": 293760 }, { "epoch": 12.17, "grad_norm": 0.87109375, "learning_rate": 0.00032312329048604947, "loss": 0.2026, "step": 293770 }, { "epoch": 12.17, "grad_norm": 0.328125, "learning_rate": 0.0003231129195086738, "loss": 0.1915, "step": 293780 }, { "epoch": 12.17, "grad_norm": 1.4296875, "learning_rate": 0.0003231025483937075, "loss": 0.1825, "step": 293790 }, { "epoch": 12.17, "grad_norm": 0.79296875, "learning_rate": 0.00032309217714117014, "loss": 0.1892, "step": 293800 }, { "epoch": 12.17, "grad_norm": 0.27734375, "learning_rate": 0.0003230818057510811, "loss": 0.2128, "step": 293810 }, { "epoch": 12.17, "grad_norm": 0.51171875, "learning_rate": 0.00032307143422346, "loss": 0.2176, "step": 293820 }, { "epoch": 12.17, "grad_norm": 0.796875, "learning_rate": 0.00032306106255832644, "loss": 0.2179, "step": 293830 }, { "epoch": 12.17, "grad_norm": 0.87890625, "learning_rate": 0.00032305069075569984, "loss": 0.2228, "step": 293840 }, { "epoch": 12.17, "grad_norm": 0.58984375, "learning_rate": 0.00032304031881559965, "loss": 0.1406, "step": 293850 }, { "epoch": 12.17, "grad_norm": 0.5625, "learning_rate": 0.00032302994673804556, "loss": 0.1996, "step": 293860 }, { "epoch": 12.17, "grad_norm": 1.3515625, "learning_rate": 0.00032301957452305686, "loss": 0.1692, "step": 293870 }, { "epoch": 12.17, "grad_norm": 2.109375, "learning_rate": 0.00032300920217065334, "loss": 0.1753, "step": 293880 }, { "epoch": 12.17, "grad_norm": 0.53515625, "learning_rate": 0.00032299882968085436, "loss": 0.1769, "step": 293890 }, { "epoch": 12.17, "grad_norm": 0.71484375, "learning_rate": 0.00032298845705367943, "loss": 0.2062, "step": 293900 }, { "epoch": 12.17, "grad_norm": 0.431640625, "learning_rate": 0.00032297808428914817, "loss": 0.1973, "step": 293910 }, { "epoch": 12.17, "grad_norm": 0.88671875, "learning_rate": 0.00032296771138728, "loss": 0.2066, "step": 293920 }, { "epoch": 12.17, "grad_norm": 1.328125, "learning_rate": 0.0003229573383480944, "loss": 0.202, "step": 293930 }, { "epoch": 12.17, "grad_norm": 1.4609375, "learning_rate": 0.0003229469651716111, "loss": 0.2001, "step": 293940 }, { "epoch": 12.18, "grad_norm": 0.87890625, "learning_rate": 0.00032293659185784936, "loss": 0.1648, "step": 293950 }, { "epoch": 12.18, "grad_norm": 0.7578125, "learning_rate": 0.0003229262184068289, "loss": 0.2098, "step": 293960 }, { "epoch": 12.18, "grad_norm": 0.64453125, "learning_rate": 0.00032291584481856917, "loss": 0.1789, "step": 293970 }, { "epoch": 12.18, "grad_norm": 2.484375, "learning_rate": 0.0003229054710930896, "loss": 0.233, "step": 293980 }, { "epoch": 12.18, "grad_norm": 1.046875, "learning_rate": 0.00032289509723041, "loss": 0.2774, "step": 293990 }, { "epoch": 12.18, "grad_norm": 0.6484375, "learning_rate": 0.0003228847232305496, "loss": 0.1503, "step": 294000 }, { "epoch": 12.18, "grad_norm": 0.80859375, "learning_rate": 0.000322874349093528, "loss": 0.2278, "step": 294010 }, { "epoch": 12.18, "grad_norm": 0.58203125, "learning_rate": 0.00032286397481936477, "loss": 0.202, "step": 294020 }, { "epoch": 12.18, "grad_norm": 0.55859375, "learning_rate": 0.0003228536004080794, "loss": 0.2023, "step": 294030 }, { "epoch": 12.18, "grad_norm": 0.48046875, "learning_rate": 0.0003228432258596914, "loss": 0.1582, "step": 294040 }, { "epoch": 12.18, "grad_norm": 0.94921875, "learning_rate": 0.00032283285117422036, "loss": 0.1781, "step": 294050 }, { "epoch": 12.18, "grad_norm": 0.57421875, "learning_rate": 0.0003228224763516857, "loss": 0.2035, "step": 294060 }, { "epoch": 12.18, "grad_norm": 0.9140625, "learning_rate": 0.00032281210139210707, "loss": 0.1606, "step": 294070 }, { "epoch": 12.18, "grad_norm": 0.494140625, "learning_rate": 0.00032280172629550394, "loss": 0.1537, "step": 294080 }, { "epoch": 12.18, "grad_norm": 1.515625, "learning_rate": 0.00032279135106189576, "loss": 0.1701, "step": 294090 }, { "epoch": 12.18, "grad_norm": 0.84765625, "learning_rate": 0.0003227809756913021, "loss": 0.2429, "step": 294100 }, { "epoch": 12.18, "grad_norm": 0.9453125, "learning_rate": 0.00032277060018374255, "loss": 0.1829, "step": 294110 }, { "epoch": 12.18, "grad_norm": 0.6171875, "learning_rate": 0.00032276022453923666, "loss": 0.1934, "step": 294120 }, { "epoch": 12.18, "grad_norm": 0.515625, "learning_rate": 0.0003227498487578038, "loss": 0.224, "step": 294130 }, { "epoch": 12.18, "grad_norm": 0.55078125, "learning_rate": 0.00032273947283946355, "loss": 0.1851, "step": 294140 }, { "epoch": 12.18, "grad_norm": 1.65625, "learning_rate": 0.00032272909678423547, "loss": 0.1413, "step": 294150 }, { "epoch": 12.18, "grad_norm": 0.78515625, "learning_rate": 0.0003227187205921391, "loss": 0.2035, "step": 294160 }, { "epoch": 12.18, "grad_norm": 1.2421875, "learning_rate": 0.000322708344263194, "loss": 0.1891, "step": 294170 }, { "epoch": 12.18, "grad_norm": 1.0234375, "learning_rate": 0.00032269796779741967, "loss": 0.2861, "step": 294180 }, { "epoch": 12.19, "grad_norm": 0.9140625, "learning_rate": 0.0003226875911948355, "loss": 0.2232, "step": 294190 }, { "epoch": 12.19, "grad_norm": 0.99609375, "learning_rate": 0.00032267721445546126, "loss": 0.1568, "step": 294200 }, { "epoch": 12.19, "grad_norm": 3.09375, "learning_rate": 0.0003226668375793163, "loss": 0.1608, "step": 294210 }, { "epoch": 12.19, "grad_norm": 1.359375, "learning_rate": 0.0003226564605664201, "loss": 0.1892, "step": 294220 }, { "epoch": 12.19, "grad_norm": 0.462890625, "learning_rate": 0.0003226460834167924, "loss": 0.1772, "step": 294230 }, { "epoch": 12.19, "grad_norm": 0.69921875, "learning_rate": 0.0003226357061304526, "loss": 0.2063, "step": 294240 }, { "epoch": 12.19, "grad_norm": 0.72265625, "learning_rate": 0.0003226253287074202, "loss": 0.2236, "step": 294250 }, { "epoch": 12.19, "grad_norm": 2.25, "learning_rate": 0.00032261495114771483, "loss": 0.1962, "step": 294260 }, { "epoch": 12.19, "grad_norm": 0.458984375, "learning_rate": 0.00032260457345135594, "loss": 0.2046, "step": 294270 }, { "epoch": 12.19, "grad_norm": 1.15625, "learning_rate": 0.00032259419561836314, "loss": 0.1731, "step": 294280 }, { "epoch": 12.19, "grad_norm": 0.671875, "learning_rate": 0.0003225838176487558, "loss": 0.1757, "step": 294290 }, { "epoch": 12.19, "grad_norm": 0.54296875, "learning_rate": 0.00032257343954255366, "loss": 0.2353, "step": 294300 }, { "epoch": 12.19, "grad_norm": 1.1640625, "learning_rate": 0.000322563061299776, "loss": 0.2219, "step": 294310 }, { "epoch": 12.19, "grad_norm": 0.4921875, "learning_rate": 0.0003225526829204427, "loss": 0.2077, "step": 294320 }, { "epoch": 12.19, "grad_norm": 0.263671875, "learning_rate": 0.0003225423044045729, "loss": 0.142, "step": 294330 }, { "epoch": 12.19, "grad_norm": 0.37890625, "learning_rate": 0.00032253192575218637, "loss": 0.1868, "step": 294340 }, { "epoch": 12.19, "grad_norm": 0.796875, "learning_rate": 0.0003225215469633026, "loss": 0.1952, "step": 294350 }, { "epoch": 12.19, "grad_norm": 0.63671875, "learning_rate": 0.00032251116803794114, "loss": 0.2038, "step": 294360 }, { "epoch": 12.19, "grad_norm": 0.578125, "learning_rate": 0.00032250078897612145, "loss": 0.1941, "step": 294370 }, { "epoch": 12.19, "grad_norm": 2.90625, "learning_rate": 0.00032249040977786316, "loss": 0.2098, "step": 294380 }, { "epoch": 12.19, "grad_norm": 0.80078125, "learning_rate": 0.0003224800304431856, "loss": 0.2013, "step": 294390 }, { "epoch": 12.19, "grad_norm": 1.5625, "learning_rate": 0.00032246965097210866, "loss": 0.19, "step": 294400 }, { "epoch": 12.19, "grad_norm": 0.83203125, "learning_rate": 0.00032245927136465154, "loss": 0.1704, "step": 294410 }, { "epoch": 12.19, "grad_norm": 0.423828125, "learning_rate": 0.0003224488916208339, "loss": 0.1708, "step": 294420 }, { "epoch": 12.2, "grad_norm": 1.0859375, "learning_rate": 0.0003224385117406753, "loss": 0.2107, "step": 294430 }, { "epoch": 12.2, "grad_norm": 0.59765625, "learning_rate": 0.00032242813172419523, "loss": 0.1847, "step": 294440 }, { "epoch": 12.2, "grad_norm": 2.859375, "learning_rate": 0.0003224177515714132, "loss": 0.1614, "step": 294450 }, { "epoch": 12.2, "grad_norm": 1.390625, "learning_rate": 0.00032240737128234886, "loss": 0.1931, "step": 294460 }, { "epoch": 12.2, "grad_norm": 0.52734375, "learning_rate": 0.0003223969908570216, "loss": 0.1529, "step": 294470 }, { "epoch": 12.2, "grad_norm": 0.76171875, "learning_rate": 0.0003223866102954511, "loss": 0.192, "step": 294480 }, { "epoch": 12.2, "grad_norm": 0.79296875, "learning_rate": 0.0003223762295976568, "loss": 0.2078, "step": 294490 }, { "epoch": 12.2, "grad_norm": 1.5390625, "learning_rate": 0.0003223658487636582, "loss": 0.2663, "step": 294500 }, { "epoch": 12.2, "grad_norm": 0.3984375, "learning_rate": 0.0003223554677934749, "loss": 0.1685, "step": 294510 }, { "epoch": 12.2, "grad_norm": 1.2890625, "learning_rate": 0.00032234508668712637, "loss": 0.1659, "step": 294520 }, { "epoch": 12.2, "grad_norm": 0.64453125, "learning_rate": 0.00032233470544463226, "loss": 0.2257, "step": 294530 }, { "epoch": 12.2, "grad_norm": 1.28125, "learning_rate": 0.000322324324066012, "loss": 0.2529, "step": 294540 }, { "epoch": 12.2, "grad_norm": 0.6484375, "learning_rate": 0.00032231394255128525, "loss": 0.1585, "step": 294550 }, { "epoch": 12.2, "grad_norm": 0.62890625, "learning_rate": 0.00032230356090047144, "loss": 0.1759, "step": 294560 }, { "epoch": 12.2, "grad_norm": 0.53515625, "learning_rate": 0.00032229317911359014, "loss": 0.2088, "step": 294570 }, { "epoch": 12.2, "grad_norm": 0.7265625, "learning_rate": 0.0003222827971906609, "loss": 0.1649, "step": 294580 }, { "epoch": 12.2, "grad_norm": 0.734375, "learning_rate": 0.0003222724151317032, "loss": 0.2363, "step": 294590 }, { "epoch": 12.2, "grad_norm": 2.03125, "learning_rate": 0.0003222620329367366, "loss": 0.2241, "step": 294600 }, { "epoch": 12.2, "grad_norm": 2.046875, "learning_rate": 0.0003222516506057807, "loss": 0.1982, "step": 294610 }, { "epoch": 12.2, "grad_norm": 0.5546875, "learning_rate": 0.000322241268138855, "loss": 0.2152, "step": 294620 }, { "epoch": 12.2, "grad_norm": 0.765625, "learning_rate": 0.00032223088553597903, "loss": 0.1874, "step": 294630 }, { "epoch": 12.2, "grad_norm": 1.1796875, "learning_rate": 0.0003222205027971723, "loss": 0.1762, "step": 294640 }, { "epoch": 12.2, "grad_norm": 0.60546875, "learning_rate": 0.0003222101199224544, "loss": 0.1804, "step": 294650 }, { "epoch": 12.2, "grad_norm": 3.828125, "learning_rate": 0.0003221997369118449, "loss": 0.2024, "step": 294660 }, { "epoch": 12.21, "grad_norm": 1.1484375, "learning_rate": 0.0003221893537653632, "loss": 0.224, "step": 294670 }, { "epoch": 12.21, "grad_norm": 0.486328125, "learning_rate": 0.000322178970483029, "loss": 0.1623, "step": 294680 }, { "epoch": 12.21, "grad_norm": 0.85546875, "learning_rate": 0.0003221685870648618, "loss": 0.2067, "step": 294690 }, { "epoch": 12.21, "grad_norm": 1.28125, "learning_rate": 0.0003221582035108811, "loss": 0.1765, "step": 294700 }, { "epoch": 12.21, "grad_norm": 0.345703125, "learning_rate": 0.0003221478198211064, "loss": 0.1674, "step": 294710 }, { "epoch": 12.21, "grad_norm": 1.234375, "learning_rate": 0.00032213743599555727, "loss": 0.1695, "step": 294720 }, { "epoch": 12.21, "grad_norm": 1.078125, "learning_rate": 0.0003221270520342533, "loss": 0.1791, "step": 294730 }, { "epoch": 12.21, "grad_norm": 0.53125, "learning_rate": 0.0003221166679372141, "loss": 0.1611, "step": 294740 }, { "epoch": 12.21, "grad_norm": 0.78515625, "learning_rate": 0.000322106283704459, "loss": 0.1932, "step": 294750 }, { "epoch": 12.21, "grad_norm": 0.7421875, "learning_rate": 0.00032209589933600774, "loss": 0.1505, "step": 294760 }, { "epoch": 12.21, "grad_norm": 0.27734375, "learning_rate": 0.0003220855148318798, "loss": 0.1514, "step": 294770 }, { "epoch": 12.21, "grad_norm": 0.640625, "learning_rate": 0.0003220751301920946, "loss": 0.2053, "step": 294780 }, { "epoch": 12.21, "grad_norm": 0.80078125, "learning_rate": 0.00032206474541667185, "loss": 0.1705, "step": 294790 }, { "epoch": 12.21, "grad_norm": 0.7265625, "learning_rate": 0.00032205436050563105, "loss": 0.1682, "step": 294800 }, { "epoch": 12.21, "grad_norm": 1.890625, "learning_rate": 0.0003220439754589917, "loss": 0.2216, "step": 294810 }, { "epoch": 12.21, "grad_norm": 0.57421875, "learning_rate": 0.00032203359027677337, "loss": 0.1413, "step": 294820 }, { "epoch": 12.21, "grad_norm": 0.7578125, "learning_rate": 0.00032202320495899563, "loss": 0.1879, "step": 294830 }, { "epoch": 12.21, "grad_norm": 2.59375, "learning_rate": 0.00032201281950567794, "loss": 0.1535, "step": 294840 }, { "epoch": 12.21, "grad_norm": 1.3359375, "learning_rate": 0.00032200243391683996, "loss": 0.2178, "step": 294850 }, { "epoch": 12.21, "grad_norm": 0.75390625, "learning_rate": 0.0003219920481925011, "loss": 0.1471, "step": 294860 }, { "epoch": 12.21, "grad_norm": 1.390625, "learning_rate": 0.00032198166233268104, "loss": 0.1733, "step": 294870 }, { "epoch": 12.21, "grad_norm": 1.0625, "learning_rate": 0.00032197127633739925, "loss": 0.188, "step": 294880 }, { "epoch": 12.21, "grad_norm": 0.392578125, "learning_rate": 0.0003219608902066753, "loss": 0.2032, "step": 294890 }, { "epoch": 12.21, "grad_norm": 0.89453125, "learning_rate": 0.0003219505039405288, "loss": 0.189, "step": 294900 }, { "epoch": 12.22, "grad_norm": 1.28125, "learning_rate": 0.0003219401175389791, "loss": 0.1911, "step": 294910 }, { "epoch": 12.22, "grad_norm": 0.53125, "learning_rate": 0.0003219297310020459, "loss": 0.1914, "step": 294920 }, { "epoch": 12.22, "grad_norm": 0.3828125, "learning_rate": 0.00032191934432974873, "loss": 0.1489, "step": 294930 }, { "epoch": 12.22, "grad_norm": 0.984375, "learning_rate": 0.00032190895752210705, "loss": 0.1674, "step": 294940 }, { "epoch": 12.22, "grad_norm": 1.0546875, "learning_rate": 0.00032189857057914056, "loss": 0.1749, "step": 294950 }, { "epoch": 12.22, "grad_norm": 0.92578125, "learning_rate": 0.0003218881835008687, "loss": 0.2575, "step": 294960 }, { "epoch": 12.22, "grad_norm": 0.466796875, "learning_rate": 0.0003218777962873111, "loss": 0.199, "step": 294970 }, { "epoch": 12.22, "grad_norm": 0.388671875, "learning_rate": 0.0003218674089384872, "loss": 0.1891, "step": 294980 }, { "epoch": 12.22, "grad_norm": 1.2734375, "learning_rate": 0.0003218570214544165, "loss": 0.2233, "step": 294990 }, { "epoch": 12.22, "grad_norm": 0.7109375, "learning_rate": 0.0003218466338351188, "loss": 0.1704, "step": 295000 }, { "epoch": 12.22, "grad_norm": 0.90625, "learning_rate": 0.00032183624608061345, "loss": 0.1925, "step": 295010 }, { "epoch": 12.22, "grad_norm": 1.5, "learning_rate": 0.00032182585819092, "loss": 0.1905, "step": 295020 }, { "epoch": 12.22, "grad_norm": 0.921875, "learning_rate": 0.00032181547016605803, "loss": 0.1448, "step": 295030 }, { "epoch": 12.22, "grad_norm": 1.0546875, "learning_rate": 0.0003218050820060472, "loss": 0.191, "step": 295040 }, { "epoch": 12.22, "grad_norm": 1.0078125, "learning_rate": 0.00032179469371090684, "loss": 0.2455, "step": 295050 }, { "epoch": 12.22, "grad_norm": 0.55078125, "learning_rate": 0.00032178430528065675, "loss": 0.2349, "step": 295060 }, { "epoch": 12.22, "grad_norm": 0.5234375, "learning_rate": 0.0003217739167153162, "loss": 0.1872, "step": 295070 }, { "epoch": 12.22, "grad_norm": 0.9296875, "learning_rate": 0.000321763528014905, "loss": 0.1298, "step": 295080 }, { "epoch": 12.22, "grad_norm": 0.9609375, "learning_rate": 0.0003217531391794426, "loss": 0.2225, "step": 295090 }, { "epoch": 12.22, "grad_norm": 0.95703125, "learning_rate": 0.0003217427502089484, "loss": 0.2251, "step": 295100 }, { "epoch": 12.22, "grad_norm": 0.859375, "learning_rate": 0.0003217323611034422, "loss": 0.1722, "step": 295110 }, { "epoch": 12.22, "grad_norm": 0.671875, "learning_rate": 0.00032172197186294344, "loss": 0.1907, "step": 295120 }, { "epoch": 12.22, "grad_norm": 0.388671875, "learning_rate": 0.0003217115824874717, "loss": 0.1775, "step": 295130 }, { "epoch": 12.22, "grad_norm": 1.1640625, "learning_rate": 0.0003217011929770465, "loss": 0.19, "step": 295140 }, { "epoch": 12.23, "grad_norm": 1.2421875, "learning_rate": 0.00032169080333168727, "loss": 0.2261, "step": 295150 }, { "epoch": 12.23, "grad_norm": 0.37109375, "learning_rate": 0.0003216804135514138, "loss": 0.1655, "step": 295160 }, { "epoch": 12.23, "grad_norm": 1.203125, "learning_rate": 0.0003216700236362456, "loss": 0.1886, "step": 295170 }, { "epoch": 12.23, "grad_norm": 0.380859375, "learning_rate": 0.00032165963358620197, "loss": 0.2162, "step": 295180 }, { "epoch": 12.23, "grad_norm": 1.1484375, "learning_rate": 0.0003216492434013028, "loss": 0.1912, "step": 295190 }, { "epoch": 12.23, "grad_norm": 0.5703125, "learning_rate": 0.0003216388530815675, "loss": 0.1958, "step": 295200 }, { "epoch": 12.23, "grad_norm": 0.82421875, "learning_rate": 0.0003216284626270155, "loss": 0.1497, "step": 295210 }, { "epoch": 12.23, "grad_norm": 0.5625, "learning_rate": 0.0003216180720376666, "loss": 0.1812, "step": 295220 }, { "epoch": 12.23, "grad_norm": 1.109375, "learning_rate": 0.00032160768131354005, "loss": 0.1448, "step": 295230 }, { "epoch": 12.23, "grad_norm": 0.765625, "learning_rate": 0.0003215972904546557, "loss": 0.1868, "step": 295240 }, { "epoch": 12.23, "grad_norm": 2.296875, "learning_rate": 0.00032158689946103306, "loss": 0.2249, "step": 295250 }, { "epoch": 12.23, "grad_norm": 0.65625, "learning_rate": 0.00032157650833269144, "loss": 0.1619, "step": 295260 }, { "epoch": 12.23, "grad_norm": 1.203125, "learning_rate": 0.0003215661170696506, "loss": 0.1893, "step": 295270 }, { "epoch": 12.23, "grad_norm": 0.38671875, "learning_rate": 0.0003215557256719301, "loss": 0.202, "step": 295280 }, { "epoch": 12.23, "grad_norm": 1.8671875, "learning_rate": 0.0003215453341395494, "loss": 0.1897, "step": 295290 }, { "epoch": 12.23, "grad_norm": 0.76953125, "learning_rate": 0.0003215349424725282, "loss": 0.2018, "step": 295300 }, { "epoch": 12.23, "grad_norm": 0.71484375, "learning_rate": 0.0003215245506708858, "loss": 0.1565, "step": 295310 }, { "epoch": 12.23, "grad_norm": 0.80078125, "learning_rate": 0.0003215141587346421, "loss": 0.1905, "step": 295320 }, { "epoch": 12.23, "grad_norm": 0.8359375, "learning_rate": 0.00032150376666381636, "loss": 0.2019, "step": 295330 }, { "epoch": 12.23, "grad_norm": 1.1328125, "learning_rate": 0.0003214933744584283, "loss": 0.1513, "step": 295340 }, { "epoch": 12.23, "grad_norm": 0.9375, "learning_rate": 0.00032148298211849747, "loss": 0.2452, "step": 295350 }, { "epoch": 12.23, "grad_norm": 1.296875, "learning_rate": 0.00032147258964404334, "loss": 0.1813, "step": 295360 }, { "epoch": 12.23, "grad_norm": 0.82421875, "learning_rate": 0.0003214621970350855, "loss": 0.1963, "step": 295370 }, { "epoch": 12.23, "grad_norm": 0.447265625, "learning_rate": 0.00032145180429164354, "loss": 0.2218, "step": 295380 }, { "epoch": 12.24, "grad_norm": 0.84765625, "learning_rate": 0.00032144141141373696, "loss": 0.2385, "step": 295390 }, { "epoch": 12.24, "grad_norm": 1.1015625, "learning_rate": 0.00032143101840138546, "loss": 0.217, "step": 295400 }, { "epoch": 12.24, "grad_norm": 0.7109375, "learning_rate": 0.0003214206252546084, "loss": 0.2134, "step": 295410 }, { "epoch": 12.24, "grad_norm": 0.498046875, "learning_rate": 0.00032141023197342544, "loss": 0.2006, "step": 295420 }, { "epoch": 12.24, "grad_norm": 1.9921875, "learning_rate": 0.00032139983855785623, "loss": 0.1541, "step": 295430 }, { "epoch": 12.24, "grad_norm": 0.3515625, "learning_rate": 0.0003213894450079201, "loss": 0.1446, "step": 295440 }, { "epoch": 12.24, "grad_norm": 0.0, "learning_rate": 0.00032137905132363686, "loss": 0.1924, "step": 295450 }, { "epoch": 12.24, "grad_norm": 0.69921875, "learning_rate": 0.0003213686575050259, "loss": 0.2121, "step": 295460 }, { "epoch": 12.24, "grad_norm": 0.388671875, "learning_rate": 0.0003213582635521068, "loss": 0.2177, "step": 295470 }, { "epoch": 12.24, "grad_norm": 0.62109375, "learning_rate": 0.00032134786946489926, "loss": 0.1505, "step": 295480 }, { "epoch": 12.24, "grad_norm": 0.7109375, "learning_rate": 0.0003213374752434226, "loss": 0.2199, "step": 295490 }, { "epoch": 12.24, "grad_norm": 0.5390625, "learning_rate": 0.0003213270808876966, "loss": 0.2255, "step": 295500 }, { "epoch": 12.24, "grad_norm": 0.81640625, "learning_rate": 0.00032131668639774077, "loss": 0.1521, "step": 295510 }, { "epoch": 12.24, "grad_norm": 0.42578125, "learning_rate": 0.0003213062917735746, "loss": 0.1856, "step": 295520 }, { "epoch": 12.24, "grad_norm": 0.75390625, "learning_rate": 0.00032129589701521767, "loss": 0.1935, "step": 295530 }, { "epoch": 12.24, "grad_norm": 1.3125, "learning_rate": 0.00032128550212268955, "loss": 0.2388, "step": 295540 }, { "epoch": 12.24, "grad_norm": 0.90234375, "learning_rate": 0.0003212751070960098, "loss": 0.1884, "step": 295550 }, { "epoch": 12.24, "grad_norm": 0.80859375, "learning_rate": 0.00032126471193519806, "loss": 0.2247, "step": 295560 }, { "epoch": 12.24, "grad_norm": 0.328125, "learning_rate": 0.00032125431664027377, "loss": 0.1808, "step": 295570 }, { "epoch": 12.24, "grad_norm": 0.50390625, "learning_rate": 0.00032124392121125656, "loss": 0.1898, "step": 295580 }, { "epoch": 12.24, "grad_norm": 0.490234375, "learning_rate": 0.00032123352564816603, "loss": 0.1851, "step": 295590 }, { "epoch": 12.24, "grad_norm": 0.734375, "learning_rate": 0.00032122312995102166, "loss": 0.2157, "step": 295600 }, { "epoch": 12.24, "grad_norm": 0.578125, "learning_rate": 0.00032121273411984307, "loss": 0.1329, "step": 295610 }, { "epoch": 12.24, "grad_norm": 1.25, "learning_rate": 0.00032120233815464977, "loss": 0.2667, "step": 295620 }, { "epoch": 12.24, "grad_norm": 1.7578125, "learning_rate": 0.0003211919420554614, "loss": 0.1842, "step": 295630 }, { "epoch": 12.25, "grad_norm": 2.28125, "learning_rate": 0.0003211815458222975, "loss": 0.1699, "step": 295640 }, { "epoch": 12.25, "grad_norm": 0.56640625, "learning_rate": 0.00032117114945517754, "loss": 0.1898, "step": 295650 }, { "epoch": 12.25, "grad_norm": 0.578125, "learning_rate": 0.00032116075295412117, "loss": 0.2288, "step": 295660 }, { "epoch": 12.25, "grad_norm": 1.328125, "learning_rate": 0.00032115035631914804, "loss": 0.1661, "step": 295670 }, { "epoch": 12.25, "grad_norm": 1.1953125, "learning_rate": 0.0003211399595502776, "loss": 0.2003, "step": 295680 }, { "epoch": 12.25, "grad_norm": 1.1953125, "learning_rate": 0.00032112956264752934, "loss": 0.1962, "step": 295690 }, { "epoch": 12.25, "grad_norm": 0.5625, "learning_rate": 0.00032111916561092295, "loss": 0.1578, "step": 295700 }, { "epoch": 12.25, "grad_norm": 0.39453125, "learning_rate": 0.00032110876844047804, "loss": 0.2473, "step": 295710 }, { "epoch": 12.25, "grad_norm": 1.5859375, "learning_rate": 0.0003210983711362141, "loss": 0.198, "step": 295720 }, { "epoch": 12.25, "grad_norm": 0.3125, "learning_rate": 0.00032108797369815066, "loss": 0.1765, "step": 295730 }, { "epoch": 12.25, "grad_norm": 0.5234375, "learning_rate": 0.0003210775761263073, "loss": 0.182, "step": 295740 }, { "epoch": 12.25, "grad_norm": 2.296875, "learning_rate": 0.0003210671784207037, "loss": 0.2323, "step": 295750 }, { "epoch": 12.25, "grad_norm": 1.328125, "learning_rate": 0.0003210567805813593, "loss": 0.2042, "step": 295760 }, { "epoch": 12.25, "grad_norm": 0.462890625, "learning_rate": 0.00032104638260829375, "loss": 0.2108, "step": 295770 }, { "epoch": 12.25, "grad_norm": 0.8125, "learning_rate": 0.0003210359845015266, "loss": 0.1462, "step": 295780 }, { "epoch": 12.25, "grad_norm": 0.65625, "learning_rate": 0.0003210255862610773, "loss": 0.2193, "step": 295790 }, { "epoch": 12.25, "grad_norm": 3.46875, "learning_rate": 0.0003210151878869656, "loss": 0.1778, "step": 295800 }, { "epoch": 12.25, "grad_norm": 1.078125, "learning_rate": 0.000321004789379211, "loss": 0.2217, "step": 295810 }, { "epoch": 12.25, "grad_norm": 0.39453125, "learning_rate": 0.000320994390737833, "loss": 0.1654, "step": 295820 }, { "epoch": 12.25, "grad_norm": 0.55859375, "learning_rate": 0.00032098399196285116, "loss": 0.1628, "step": 295830 }, { "epoch": 12.25, "grad_norm": 0.75, "learning_rate": 0.00032097359305428526, "loss": 0.2176, "step": 295840 }, { "epoch": 12.25, "grad_norm": 1.125, "learning_rate": 0.0003209631940121546, "loss": 0.1936, "step": 295850 }, { "epoch": 12.25, "grad_norm": 0.40234375, "learning_rate": 0.0003209527948364789, "loss": 0.1864, "step": 295860 }, { "epoch": 12.25, "grad_norm": 0.53125, "learning_rate": 0.0003209423955272777, "loss": 0.1979, "step": 295870 }, { "epoch": 12.26, "grad_norm": 1.3515625, "learning_rate": 0.0003209319960845706, "loss": 0.1834, "step": 295880 }, { "epoch": 12.26, "grad_norm": 1.5390625, "learning_rate": 0.0003209215965083772, "loss": 0.2144, "step": 295890 }, { "epoch": 12.26, "grad_norm": 0.734375, "learning_rate": 0.0003209111967987169, "loss": 0.1835, "step": 295900 }, { "epoch": 12.26, "grad_norm": 1.265625, "learning_rate": 0.00032090079695560946, "loss": 0.1841, "step": 295910 }, { "epoch": 12.26, "grad_norm": 0.9921875, "learning_rate": 0.0003208903969790744, "loss": 0.2039, "step": 295920 }, { "epoch": 12.26, "grad_norm": 0.8984375, "learning_rate": 0.00032087999686913123, "loss": 0.2131, "step": 295930 }, { "epoch": 12.26, "grad_norm": 0.96484375, "learning_rate": 0.0003208695966257995, "loss": 0.1386, "step": 295940 }, { "epoch": 12.26, "grad_norm": 0.7578125, "learning_rate": 0.0003208591962490989, "loss": 0.2521, "step": 295950 }, { "epoch": 12.26, "grad_norm": 0.5703125, "learning_rate": 0.0003208487957390489, "loss": 0.2043, "step": 295960 }, { "epoch": 12.26, "grad_norm": 0.50390625, "learning_rate": 0.0003208383950956693, "loss": 0.1817, "step": 295970 }, { "epoch": 12.26, "grad_norm": 0.447265625, "learning_rate": 0.00032082799431897924, "loss": 0.1977, "step": 295980 }, { "epoch": 12.26, "grad_norm": 0.96484375, "learning_rate": 0.0003208175934089987, "loss": 0.2036, "step": 295990 }, { "epoch": 12.26, "grad_norm": 1.3046875, "learning_rate": 0.0003208071923657471, "loss": 0.2079, "step": 296000 }, { "epoch": 12.26, "grad_norm": 1.2109375, "learning_rate": 0.0003207967911892439, "loss": 0.2013, "step": 296010 }, { "epoch": 12.26, "grad_norm": 0.314453125, "learning_rate": 0.0003207863898795088, "loss": 0.1848, "step": 296020 }, { "epoch": 12.26, "grad_norm": 0.8515625, "learning_rate": 0.0003207759884365614, "loss": 0.1754, "step": 296030 }, { "epoch": 12.26, "grad_norm": 0.9609375, "learning_rate": 0.00032076558686042124, "loss": 0.1977, "step": 296040 }, { "epoch": 12.26, "grad_norm": 0.451171875, "learning_rate": 0.0003207551851511079, "loss": 0.2072, "step": 296050 }, { "epoch": 12.26, "grad_norm": 0.4765625, "learning_rate": 0.0003207447833086409, "loss": 0.2007, "step": 296060 }, { "epoch": 12.26, "grad_norm": 0.8203125, "learning_rate": 0.0003207343813330399, "loss": 0.2646, "step": 296070 }, { "epoch": 12.26, "grad_norm": 0.671875, "learning_rate": 0.0003207239792243245, "loss": 0.1612, "step": 296080 }, { "epoch": 12.26, "grad_norm": 0.474609375, "learning_rate": 0.00032071357698251404, "loss": 0.2363, "step": 296090 }, { "epoch": 12.26, "grad_norm": 1.3671875, "learning_rate": 0.00032070317460762836, "loss": 0.2291, "step": 296100 }, { "epoch": 12.26, "grad_norm": 0.71875, "learning_rate": 0.00032069277209968695, "loss": 0.1917, "step": 296110 }, { "epoch": 12.27, "grad_norm": 0.91015625, "learning_rate": 0.0003206823694587093, "loss": 0.2112, "step": 296120 }, { "epoch": 12.27, "grad_norm": 0.1416015625, "learning_rate": 0.0003206719666847152, "loss": 0.1826, "step": 296130 }, { "epoch": 12.27, "grad_norm": 0.7421875, "learning_rate": 0.00032066156377772397, "loss": 0.1834, "step": 296140 }, { "epoch": 12.27, "grad_norm": 0.49609375, "learning_rate": 0.00032065116073775533, "loss": 0.1827, "step": 296150 }, { "epoch": 12.27, "grad_norm": 0.77734375, "learning_rate": 0.0003206407575648289, "loss": 0.1693, "step": 296160 }, { "epoch": 12.27, "grad_norm": 2.765625, "learning_rate": 0.0003206303542589641, "loss": 0.1415, "step": 296170 }, { "epoch": 12.27, "grad_norm": 1.390625, "learning_rate": 0.0003206199508201807, "loss": 0.1866, "step": 296180 }, { "epoch": 12.27, "grad_norm": 0.9140625, "learning_rate": 0.0003206095472484981, "loss": 0.1659, "step": 296190 }, { "epoch": 12.27, "grad_norm": 0.59765625, "learning_rate": 0.000320599143543936, "loss": 0.2148, "step": 296200 }, { "epoch": 12.27, "grad_norm": 1.1328125, "learning_rate": 0.00032058873970651394, "loss": 0.206, "step": 296210 }, { "epoch": 12.27, "grad_norm": 0.4765625, "learning_rate": 0.00032057833573625143, "loss": 0.2248, "step": 296220 }, { "epoch": 12.27, "grad_norm": 1.1640625, "learning_rate": 0.0003205679316331682, "loss": 0.2119, "step": 296230 }, { "epoch": 12.27, "grad_norm": 0.94921875, "learning_rate": 0.00032055752739728374, "loss": 0.214, "step": 296240 }, { "epoch": 12.27, "grad_norm": 0.84375, "learning_rate": 0.0003205471230286175, "loss": 0.2101, "step": 296250 }, { "epoch": 12.27, "grad_norm": 0.515625, "learning_rate": 0.00032053671852718936, "loss": 0.1668, "step": 296260 }, { "epoch": 12.27, "grad_norm": 0.31640625, "learning_rate": 0.00032052631389301863, "loss": 0.1812, "step": 296270 }, { "epoch": 12.27, "grad_norm": 0.60546875, "learning_rate": 0.00032051590912612497, "loss": 0.148, "step": 296280 }, { "epoch": 12.27, "grad_norm": 0.73046875, "learning_rate": 0.0003205055042265281, "loss": 0.1674, "step": 296290 }, { "epoch": 12.27, "grad_norm": 3.375, "learning_rate": 0.00032049509919424734, "loss": 0.2299, "step": 296300 }, { "epoch": 12.27, "grad_norm": 0.56640625, "learning_rate": 0.00032048469402930256, "loss": 0.2445, "step": 296310 }, { "epoch": 12.27, "grad_norm": 1.09375, "learning_rate": 0.0003204742887317132, "loss": 0.1941, "step": 296320 }, { "epoch": 12.27, "grad_norm": 0.94921875, "learning_rate": 0.0003204638833014987, "loss": 0.1942, "step": 296330 }, { "epoch": 12.27, "grad_norm": 0.74609375, "learning_rate": 0.0003204534777386789, "loss": 0.2044, "step": 296340 }, { "epoch": 12.27, "grad_norm": 1.140625, "learning_rate": 0.00032044307204327316, "loss": 0.1615, "step": 296350 }, { "epoch": 12.28, "grad_norm": 0.392578125, "learning_rate": 0.00032043266621530124, "loss": 0.1521, "step": 296360 }, { "epoch": 12.28, "grad_norm": 0.392578125, "learning_rate": 0.0003204222602547827, "loss": 0.1294, "step": 296370 }, { "epoch": 12.28, "grad_norm": 0.314453125, "learning_rate": 0.0003204118541617369, "loss": 0.194, "step": 296380 }, { "epoch": 12.28, "grad_norm": 0.8125, "learning_rate": 0.00032040144793618375, "loss": 0.1571, "step": 296390 }, { "epoch": 12.28, "grad_norm": 1.1328125, "learning_rate": 0.0003203910415781426, "loss": 0.1528, "step": 296400 }, { "epoch": 12.28, "grad_norm": 1.203125, "learning_rate": 0.00032038063508763314, "loss": 0.1877, "step": 296410 }, { "epoch": 12.28, "grad_norm": 1.46875, "learning_rate": 0.00032037022846467496, "loss": 0.1835, "step": 296420 }, { "epoch": 12.28, "grad_norm": 1.09375, "learning_rate": 0.00032035982170928757, "loss": 0.1578, "step": 296430 }, { "epoch": 12.28, "grad_norm": 1.140625, "learning_rate": 0.0003203494148214906, "loss": 0.2195, "step": 296440 }, { "epoch": 12.28, "grad_norm": 1.0078125, "learning_rate": 0.00032033900780130365, "loss": 0.2002, "step": 296450 }, { "epoch": 12.28, "grad_norm": 1.3125, "learning_rate": 0.00032032860064874617, "loss": 0.1175, "step": 296460 }, { "epoch": 12.28, "grad_norm": 1.0625, "learning_rate": 0.000320318193363838, "loss": 0.2047, "step": 296470 }, { "epoch": 12.28, "grad_norm": 1.21875, "learning_rate": 0.00032030778594659853, "loss": 0.2029, "step": 296480 }, { "epoch": 12.28, "grad_norm": 1.0546875, "learning_rate": 0.00032029737839704735, "loss": 0.2086, "step": 296490 }, { "epoch": 12.28, "grad_norm": 1.3046875, "learning_rate": 0.00032028697071520427, "loss": 0.1813, "step": 296500 }, { "epoch": 12.28, "grad_norm": 0.97265625, "learning_rate": 0.0003202765629010885, "loss": 0.2045, "step": 296510 }, { "epoch": 12.28, "grad_norm": 0.330078125, "learning_rate": 0.0003202661549547199, "loss": 0.1759, "step": 296520 }, { "epoch": 12.28, "grad_norm": 0.98828125, "learning_rate": 0.000320255746876118, "loss": 0.169, "step": 296530 }, { "epoch": 12.28, "grad_norm": 0.4296875, "learning_rate": 0.0003202453386653024, "loss": 0.2134, "step": 296540 }, { "epoch": 12.28, "grad_norm": 0.84375, "learning_rate": 0.00032023493032229253, "loss": 0.17, "step": 296550 }, { "epoch": 12.28, "grad_norm": 1.0859375, "learning_rate": 0.00032022452184710825, "loss": 0.1759, "step": 296560 }, { "epoch": 12.28, "grad_norm": 0.38671875, "learning_rate": 0.0003202141132397689, "loss": 0.1619, "step": 296570 }, { "epoch": 12.28, "grad_norm": 0.380859375, "learning_rate": 0.0003202037045002943, "loss": 0.205, "step": 296580 }, { "epoch": 12.28, "grad_norm": 0.84765625, "learning_rate": 0.0003201932956287038, "loss": 0.1923, "step": 296590 }, { "epoch": 12.29, "grad_norm": 0.94921875, "learning_rate": 0.0003201828866250171, "loss": 0.1949, "step": 296600 }, { "epoch": 12.29, "grad_norm": 1.0234375, "learning_rate": 0.0003201724774892539, "loss": 0.2065, "step": 296610 }, { "epoch": 12.29, "grad_norm": 1.046875, "learning_rate": 0.0003201620682214335, "loss": 0.1249, "step": 296620 }, { "epoch": 12.29, "grad_norm": 0.828125, "learning_rate": 0.0003201516588215758, "loss": 0.2043, "step": 296630 }, { "epoch": 12.29, "grad_norm": 0.72265625, "learning_rate": 0.0003201412492897002, "loss": 0.1901, "step": 296640 }, { "epoch": 12.29, "grad_norm": 0.9375, "learning_rate": 0.00032013083962582634, "loss": 0.2247, "step": 296650 }, { "epoch": 12.29, "grad_norm": 0.82421875, "learning_rate": 0.0003201204298299739, "loss": 0.203, "step": 296660 }, { "epoch": 12.29, "grad_norm": 0.439453125, "learning_rate": 0.00032011001990216224, "loss": 0.1789, "step": 296670 }, { "epoch": 12.29, "grad_norm": 0.67578125, "learning_rate": 0.0003200996098424112, "loss": 0.1688, "step": 296680 }, { "epoch": 12.29, "grad_norm": 1.8203125, "learning_rate": 0.00032008919965074024, "loss": 0.164, "step": 296690 }, { "epoch": 12.29, "grad_norm": 0.94921875, "learning_rate": 0.0003200787893271689, "loss": 0.1652, "step": 296700 }, { "epoch": 12.29, "grad_norm": 0.9921875, "learning_rate": 0.0003200683788717169, "loss": 0.1899, "step": 296710 }, { "epoch": 12.29, "grad_norm": 1.1953125, "learning_rate": 0.00032005796828440383, "loss": 0.2319, "step": 296720 }, { "epoch": 12.29, "grad_norm": 0.12451171875, "learning_rate": 0.00032004755756524913, "loss": 0.1862, "step": 296730 }, { "epoch": 12.29, "grad_norm": 0.75390625, "learning_rate": 0.0003200371467142726, "loss": 0.1984, "step": 296740 }, { "epoch": 12.29, "grad_norm": 1.2890625, "learning_rate": 0.00032002673573149363, "loss": 0.1872, "step": 296750 }, { "epoch": 12.29, "grad_norm": 0.73828125, "learning_rate": 0.0003200163246169319, "loss": 0.1795, "step": 296760 }, { "epoch": 12.29, "grad_norm": 0.0, "learning_rate": 0.0003200059133706071, "loss": 0.1987, "step": 296770 }, { "epoch": 12.29, "grad_norm": 2.640625, "learning_rate": 0.00031999550199253863, "loss": 0.1911, "step": 296780 }, { "epoch": 12.29, "grad_norm": 0.91796875, "learning_rate": 0.00031998509048274625, "loss": 0.1359, "step": 296790 }, { "epoch": 12.29, "grad_norm": 0.40625, "learning_rate": 0.00031997467884124954, "loss": 0.2146, "step": 296800 }, { "epoch": 12.29, "grad_norm": 1.0234375, "learning_rate": 0.0003199642670680679, "loss": 0.1994, "step": 296810 }, { "epoch": 12.29, "grad_norm": 3.0625, "learning_rate": 0.0003199538551632212, "loss": 0.2296, "step": 296820 }, { "epoch": 12.29, "grad_norm": 0.546875, "learning_rate": 0.00031994344312672883, "loss": 0.1708, "step": 296830 }, { "epoch": 12.3, "grad_norm": 1.3046875, "learning_rate": 0.00031993303095861046, "loss": 0.2112, "step": 296840 }, { "epoch": 12.3, "grad_norm": 0.6171875, "learning_rate": 0.00031992261865888567, "loss": 0.2378, "step": 296850 }, { "epoch": 12.3, "grad_norm": 0.7890625, "learning_rate": 0.0003199122062275741, "loss": 0.2182, "step": 296860 }, { "epoch": 12.3, "grad_norm": 1.0625, "learning_rate": 0.0003199017936646953, "loss": 0.198, "step": 296870 }, { "epoch": 12.3, "grad_norm": 0.98828125, "learning_rate": 0.00031989138097026883, "loss": 0.1999, "step": 296880 }, { "epoch": 12.3, "grad_norm": 0.54296875, "learning_rate": 0.0003198809681443143, "loss": 0.2042, "step": 296890 }, { "epoch": 12.3, "grad_norm": 0.50390625, "learning_rate": 0.0003198705551868515, "loss": 0.1987, "step": 296900 }, { "epoch": 12.3, "grad_norm": 0.51953125, "learning_rate": 0.00031986014209789973, "loss": 0.1741, "step": 296910 }, { "epoch": 12.3, "grad_norm": 0.65234375, "learning_rate": 0.00031984972887747864, "loss": 0.2122, "step": 296920 }, { "epoch": 12.3, "grad_norm": 0.44921875, "learning_rate": 0.000319839315525608, "loss": 0.152, "step": 296930 }, { "epoch": 12.3, "grad_norm": 1.3984375, "learning_rate": 0.0003198289020423073, "loss": 0.181, "step": 296940 }, { "epoch": 12.3, "grad_norm": 0.8984375, "learning_rate": 0.0003198184884275962, "loss": 0.1727, "step": 296950 }, { "epoch": 12.3, "grad_norm": 0.73046875, "learning_rate": 0.0003198080746814942, "loss": 0.1935, "step": 296960 }, { "epoch": 12.3, "grad_norm": 1.0703125, "learning_rate": 0.00031979766080402096, "loss": 0.1478, "step": 296970 }, { "epoch": 12.3, "grad_norm": 1.875, "learning_rate": 0.0003197872467951961, "loss": 0.2009, "step": 296980 }, { "epoch": 12.3, "grad_norm": 0.76171875, "learning_rate": 0.00031977683265503896, "loss": 0.2257, "step": 296990 }, { "epoch": 12.3, "grad_norm": 0.68359375, "learning_rate": 0.0003197664183835696, "loss": 0.2266, "step": 297000 }, { "epoch": 12.3, "grad_norm": 0.61328125, "learning_rate": 0.0003197560039808073, "loss": 0.1782, "step": 297010 }, { "epoch": 12.3, "grad_norm": 2.515625, "learning_rate": 0.0003197455894467717, "loss": 0.1873, "step": 297020 }, { "epoch": 12.3, "grad_norm": 1.09375, "learning_rate": 0.0003197351747814825, "loss": 0.1806, "step": 297030 }, { "epoch": 12.3, "grad_norm": 0.65625, "learning_rate": 0.0003197247599849592, "loss": 0.1595, "step": 297040 }, { "epoch": 12.3, "grad_norm": 0.73046875, "learning_rate": 0.00031971434505722137, "loss": 0.2269, "step": 297050 }, { "epoch": 12.3, "grad_norm": 1.7734375, "learning_rate": 0.00031970392999828873, "loss": 0.1749, "step": 297060 }, { "epoch": 12.3, "grad_norm": 0.6875, "learning_rate": 0.00031969351480818075, "loss": 0.1796, "step": 297070 }, { "epoch": 12.31, "grad_norm": 1.0390625, "learning_rate": 0.0003196830994869172, "loss": 0.1919, "step": 297080 }, { "epoch": 12.31, "grad_norm": 1.9453125, "learning_rate": 0.00031967268403451757, "loss": 0.1654, "step": 297090 }, { "epoch": 12.31, "grad_norm": 0.56640625, "learning_rate": 0.00031966226845100146, "loss": 0.1749, "step": 297100 }, { "epoch": 12.31, "grad_norm": 0.2333984375, "learning_rate": 0.00031965185273638845, "loss": 0.1824, "step": 297110 }, { "epoch": 12.31, "grad_norm": 0.33203125, "learning_rate": 0.0003196414368906982, "loss": 0.1923, "step": 297120 }, { "epoch": 12.31, "grad_norm": 0.83203125, "learning_rate": 0.0003196310209139503, "loss": 0.1946, "step": 297130 }, { "epoch": 12.31, "grad_norm": 1.7421875, "learning_rate": 0.0003196206048061643, "loss": 0.195, "step": 297140 }, { "epoch": 12.31, "grad_norm": 0.78125, "learning_rate": 0.00031961018856735987, "loss": 0.1865, "step": 297150 }, { "epoch": 12.31, "grad_norm": 0.65234375, "learning_rate": 0.0003195997721975566, "loss": 0.1935, "step": 297160 }, { "epoch": 12.31, "grad_norm": 1.015625, "learning_rate": 0.00031958935569677407, "loss": 0.2393, "step": 297170 }, { "epoch": 12.31, "grad_norm": 0.7578125, "learning_rate": 0.00031957893906503184, "loss": 0.2197, "step": 297180 }, { "epoch": 12.31, "grad_norm": 0.58984375, "learning_rate": 0.00031956852230234954, "loss": 0.1973, "step": 297190 }, { "epoch": 12.31, "grad_norm": 1.8203125, "learning_rate": 0.00031955810540874684, "loss": 0.2072, "step": 297200 }, { "epoch": 12.31, "grad_norm": 0.68359375, "learning_rate": 0.0003195476883842433, "loss": 0.1668, "step": 297210 }, { "epoch": 12.31, "grad_norm": 1.125, "learning_rate": 0.00031953727122885855, "loss": 0.2296, "step": 297220 }, { "epoch": 12.31, "grad_norm": 0.96875, "learning_rate": 0.0003195268539426121, "loss": 0.1432, "step": 297230 }, { "epoch": 12.31, "grad_norm": 0.921875, "learning_rate": 0.0003195164365255237, "loss": 0.165, "step": 297240 }, { "epoch": 12.31, "grad_norm": 1.0703125, "learning_rate": 0.0003195060189776128, "loss": 0.1608, "step": 297250 }, { "epoch": 12.31, "grad_norm": 0.765625, "learning_rate": 0.0003194956012988991, "loss": 0.2128, "step": 297260 }, { "epoch": 12.31, "grad_norm": 1.4140625, "learning_rate": 0.00031948518348940216, "loss": 0.2076, "step": 297270 }, { "epoch": 12.31, "grad_norm": 1.0, "learning_rate": 0.0003194747655491417, "loss": 0.2379, "step": 297280 }, { "epoch": 12.31, "grad_norm": 0.83203125, "learning_rate": 0.00031946434747813713, "loss": 0.2119, "step": 297290 }, { "epoch": 12.31, "grad_norm": 0.96484375, "learning_rate": 0.0003194539292764082, "loss": 0.2411, "step": 297300 }, { "epoch": 12.31, "grad_norm": 1.75, "learning_rate": 0.00031944351094397445, "loss": 0.2085, "step": 297310 }, { "epoch": 12.31, "grad_norm": 0.3359375, "learning_rate": 0.0003194330924808556, "loss": 0.1918, "step": 297320 }, { "epoch": 12.32, "grad_norm": 0.97265625, "learning_rate": 0.00031942267388707107, "loss": 0.1902, "step": 297330 }, { "epoch": 12.32, "grad_norm": 1.4921875, "learning_rate": 0.0003194122551626406, "loss": 0.2069, "step": 297340 }, { "epoch": 12.32, "grad_norm": 0.90234375, "learning_rate": 0.00031940183630758376, "loss": 0.1929, "step": 297350 }, { "epoch": 12.32, "grad_norm": 0.90234375, "learning_rate": 0.00031939141732192016, "loss": 0.1315, "step": 297360 }, { "epoch": 12.32, "grad_norm": 0.1630859375, "learning_rate": 0.0003193809982056694, "loss": 0.1902, "step": 297370 }, { "epoch": 12.32, "grad_norm": 0.98828125, "learning_rate": 0.00031937057895885116, "loss": 0.1876, "step": 297380 }, { "epoch": 12.32, "grad_norm": 0.73828125, "learning_rate": 0.00031936015958148483, "loss": 0.1915, "step": 297390 }, { "epoch": 12.32, "grad_norm": 0.66015625, "learning_rate": 0.0003193497400735903, "loss": 0.227, "step": 297400 }, { "epoch": 12.32, "grad_norm": 0.87109375, "learning_rate": 0.00031933932043518704, "loss": 0.149, "step": 297410 }, { "epoch": 12.32, "grad_norm": 1.1640625, "learning_rate": 0.0003193289006662946, "loss": 0.1918, "step": 297420 }, { "epoch": 12.32, "grad_norm": 0.0, "learning_rate": 0.0003193184807669327, "loss": 0.1463, "step": 297430 }, { "epoch": 12.32, "grad_norm": 1.6328125, "learning_rate": 0.0003193080607371209, "loss": 0.1855, "step": 297440 }, { "epoch": 12.32, "grad_norm": 0.6640625, "learning_rate": 0.00031929764057687883, "loss": 0.1777, "step": 297450 }, { "epoch": 12.32, "grad_norm": 1.40625, "learning_rate": 0.000319287220286226, "loss": 0.1765, "step": 297460 }, { "epoch": 12.32, "grad_norm": 0.328125, "learning_rate": 0.00031927679986518224, "loss": 0.1853, "step": 297470 }, { "epoch": 12.32, "grad_norm": 0.6328125, "learning_rate": 0.0003192663793137669, "loss": 0.1799, "step": 297480 }, { "epoch": 12.32, "grad_norm": 1.2890625, "learning_rate": 0.0003192559586319998, "loss": 0.1795, "step": 297490 }, { "epoch": 12.32, "grad_norm": 0.400390625, "learning_rate": 0.00031924553781990044, "loss": 0.228, "step": 297500 }, { "epoch": 12.32, "grad_norm": 1.015625, "learning_rate": 0.0003192351168774884, "loss": 0.1946, "step": 297510 }, { "epoch": 12.32, "grad_norm": 0.81640625, "learning_rate": 0.0003192246958047835, "loss": 0.1646, "step": 297520 }, { "epoch": 12.32, "grad_norm": 0.765625, "learning_rate": 0.00031921427460180506, "loss": 0.2028, "step": 297530 }, { "epoch": 12.32, "grad_norm": 0.71484375, "learning_rate": 0.0003192038532685728, "loss": 0.2079, "step": 297540 }, { "epoch": 12.32, "grad_norm": 0.54296875, "learning_rate": 0.00031919343180510643, "loss": 0.2108, "step": 297550 }, { "epoch": 12.32, "grad_norm": 2.125, "learning_rate": 0.00031918301021142547, "loss": 0.2249, "step": 297560 }, { "epoch": 12.33, "grad_norm": 1.1328125, "learning_rate": 0.00031917258848754965, "loss": 0.2072, "step": 297570 }, { "epoch": 12.33, "grad_norm": 1.265625, "learning_rate": 0.0003191621666334984, "loss": 0.2038, "step": 297580 }, { "epoch": 12.33, "grad_norm": 0.74609375, "learning_rate": 0.00031915174464929144, "loss": 0.1355, "step": 297590 }, { "epoch": 12.33, "grad_norm": 1.3203125, "learning_rate": 0.0003191413225349484, "loss": 0.242, "step": 297600 }, { "epoch": 12.33, "grad_norm": 0.5546875, "learning_rate": 0.00031913090029048874, "loss": 0.2077, "step": 297610 }, { "epoch": 12.33, "grad_norm": 0.62109375, "learning_rate": 0.0003191204779159323, "loss": 0.2361, "step": 297620 }, { "epoch": 12.33, "grad_norm": 1.0, "learning_rate": 0.0003191100554112985, "loss": 0.2484, "step": 297630 }, { "epoch": 12.33, "grad_norm": 0.87890625, "learning_rate": 0.0003190996327766071, "loss": 0.2129, "step": 297640 }, { "epoch": 12.33, "grad_norm": 0.75390625, "learning_rate": 0.0003190892100118777, "loss": 0.2119, "step": 297650 }, { "epoch": 12.33, "grad_norm": 0.51171875, "learning_rate": 0.0003190787871171298, "loss": 0.1784, "step": 297660 }, { "epoch": 12.33, "grad_norm": 1.109375, "learning_rate": 0.000319068364092383, "loss": 0.2352, "step": 297670 }, { "epoch": 12.33, "grad_norm": 1.1875, "learning_rate": 0.00031905794093765714, "loss": 0.1915, "step": 297680 }, { "epoch": 12.33, "grad_norm": 0.68359375, "learning_rate": 0.00031904751765297163, "loss": 0.1917, "step": 297690 }, { "epoch": 12.33, "grad_norm": 0.921875, "learning_rate": 0.00031903709423834623, "loss": 0.1875, "step": 297700 }, { "epoch": 12.33, "grad_norm": 1.875, "learning_rate": 0.0003190266706938004, "loss": 0.1878, "step": 297710 }, { "epoch": 12.33, "grad_norm": 0.9453125, "learning_rate": 0.0003190162470193538, "loss": 0.1524, "step": 297720 }, { "epoch": 12.33, "grad_norm": 0.84375, "learning_rate": 0.0003190058232150261, "loss": 0.224, "step": 297730 }, { "epoch": 12.33, "grad_norm": 0.69921875, "learning_rate": 0.00031899539928083694, "loss": 0.2023, "step": 297740 }, { "epoch": 12.33, "grad_norm": 1.078125, "learning_rate": 0.0003189849752168059, "loss": 0.2213, "step": 297750 }, { "epoch": 12.33, "grad_norm": 0.47265625, "learning_rate": 0.00031897455102295255, "loss": 0.1685, "step": 297760 }, { "epoch": 12.33, "grad_norm": 1.359375, "learning_rate": 0.00031896412669929643, "loss": 0.2243, "step": 297770 }, { "epoch": 12.33, "grad_norm": 1.34375, "learning_rate": 0.0003189537022458574, "loss": 0.1627, "step": 297780 }, { "epoch": 12.33, "grad_norm": 0.921875, "learning_rate": 0.00031894327766265494, "loss": 0.195, "step": 297790 }, { "epoch": 12.33, "grad_norm": 0.92578125, "learning_rate": 0.00031893285294970865, "loss": 0.239, "step": 297800 }, { "epoch": 12.34, "grad_norm": 0.8125, "learning_rate": 0.0003189224281070382, "loss": 0.1736, "step": 297810 }, { "epoch": 12.34, "grad_norm": 0.88671875, "learning_rate": 0.00031891200313466313, "loss": 0.1601, "step": 297820 }, { "epoch": 12.34, "grad_norm": 0.51171875, "learning_rate": 0.00031890157803260324, "loss": 0.1644, "step": 297830 }, { "epoch": 12.34, "grad_norm": 0.578125, "learning_rate": 0.00031889115280087793, "loss": 0.1872, "step": 297840 }, { "epoch": 12.34, "grad_norm": 2.890625, "learning_rate": 0.00031888072743950686, "loss": 0.174, "step": 297850 }, { "epoch": 12.34, "grad_norm": 0.5, "learning_rate": 0.0003188703019485097, "loss": 0.1627, "step": 297860 }, { "epoch": 12.34, "grad_norm": 0.328125, "learning_rate": 0.0003188598763279062, "loss": 0.1771, "step": 297870 }, { "epoch": 12.34, "grad_norm": 3.234375, "learning_rate": 0.00031884945057771577, "loss": 0.1884, "step": 297880 }, { "epoch": 12.34, "grad_norm": 0.8046875, "learning_rate": 0.0003188390246979581, "loss": 0.1923, "step": 297890 }, { "epoch": 12.34, "grad_norm": 1.25, "learning_rate": 0.00031882859868865275, "loss": 0.2155, "step": 297900 }, { "epoch": 12.34, "grad_norm": 1.171875, "learning_rate": 0.0003188181725498196, "loss": 0.2088, "step": 297910 }, { "epoch": 12.34, "grad_norm": 1.484375, "learning_rate": 0.00031880774628147797, "loss": 0.1371, "step": 297920 }, { "epoch": 12.34, "grad_norm": 0.73828125, "learning_rate": 0.0003187973198836475, "loss": 0.1652, "step": 297930 }, { "epoch": 12.34, "grad_norm": 0.5859375, "learning_rate": 0.00031878689335634804, "loss": 0.1618, "step": 297940 }, { "epoch": 12.34, "grad_norm": 0.828125, "learning_rate": 0.00031877646669959905, "loss": 0.1802, "step": 297950 }, { "epoch": 12.34, "grad_norm": 0.61328125, "learning_rate": 0.0003187660399134201, "loss": 0.1802, "step": 297960 }, { "epoch": 12.34, "grad_norm": 0.9453125, "learning_rate": 0.00031875561299783104, "loss": 0.2019, "step": 297970 }, { "epoch": 12.34, "grad_norm": 0.5390625, "learning_rate": 0.0003187451859528512, "loss": 0.1999, "step": 297980 }, { "epoch": 12.34, "grad_norm": 0.734375, "learning_rate": 0.0003187347587785004, "loss": 0.2113, "step": 297990 }, { "epoch": 12.34, "grad_norm": 1.453125, "learning_rate": 0.00031872433147479823, "loss": 0.2267, "step": 298000 }, { "epoch": 12.34, "grad_norm": 0.8046875, "learning_rate": 0.00031871390404176423, "loss": 0.209, "step": 298010 }, { "epoch": 12.34, "grad_norm": 0.55078125, "learning_rate": 0.0003187034764794181, "loss": 0.2126, "step": 298020 }, { "epoch": 12.34, "grad_norm": 1.125, "learning_rate": 0.0003186930487877794, "loss": 0.2295, "step": 298030 }, { "epoch": 12.34, "grad_norm": 1.21875, "learning_rate": 0.0003186826209668679, "loss": 0.1643, "step": 298040 }, { "epoch": 12.35, "grad_norm": 0.41796875, "learning_rate": 0.00031867219301670314, "loss": 0.2296, "step": 298050 }, { "epoch": 12.35, "grad_norm": 0.357421875, "learning_rate": 0.0003186617649373046, "loss": 0.2005, "step": 298060 }, { "epoch": 12.35, "grad_norm": 1.28125, "learning_rate": 0.0003186513367286921, "loss": 0.1569, "step": 298070 }, { "epoch": 12.35, "grad_norm": 0.5390625, "learning_rate": 0.00031864090839088527, "loss": 0.1498, "step": 298080 }, { "epoch": 12.35, "grad_norm": 0.625, "learning_rate": 0.00031863047992390355, "loss": 0.2071, "step": 298090 }, { "epoch": 12.35, "grad_norm": 0.73828125, "learning_rate": 0.0003186200513277667, "loss": 0.1916, "step": 298100 }, { "epoch": 12.35, "grad_norm": 0.77734375, "learning_rate": 0.0003186096226024944, "loss": 0.1694, "step": 298110 }, { "epoch": 12.35, "grad_norm": 0.703125, "learning_rate": 0.0003185991937481061, "loss": 0.1858, "step": 298120 }, { "epoch": 12.35, "grad_norm": 1.4609375, "learning_rate": 0.0003185887647646216, "loss": 0.1696, "step": 298130 }, { "epoch": 12.35, "grad_norm": 0.2431640625, "learning_rate": 0.0003185783356520604, "loss": 0.1975, "step": 298140 }, { "epoch": 12.35, "grad_norm": 0.8671875, "learning_rate": 0.0003185679064104422, "loss": 0.224, "step": 298150 }, { "epoch": 12.35, "grad_norm": 1.4375, "learning_rate": 0.0003185574770397866, "loss": 0.2169, "step": 298160 }, { "epoch": 12.35, "grad_norm": 0.63671875, "learning_rate": 0.00031854704754011326, "loss": 0.217, "step": 298170 }, { "epoch": 12.35, "grad_norm": 0.67578125, "learning_rate": 0.0003185366179114418, "loss": 0.147, "step": 298180 }, { "epoch": 12.35, "grad_norm": 1.0625, "learning_rate": 0.0003185261881537918, "loss": 0.2082, "step": 298190 }, { "epoch": 12.35, "grad_norm": 0.53125, "learning_rate": 0.00031851575826718283, "loss": 0.1977, "step": 298200 }, { "epoch": 12.35, "grad_norm": 1.2734375, "learning_rate": 0.0003185053282516347, "loss": 0.24, "step": 298210 }, { "epoch": 12.35, "grad_norm": 1.7890625, "learning_rate": 0.00031849489810716687, "loss": 0.2134, "step": 298220 }, { "epoch": 12.35, "grad_norm": 1.078125, "learning_rate": 0.00031848446783379916, "loss": 0.1838, "step": 298230 }, { "epoch": 12.35, "grad_norm": 1.2109375, "learning_rate": 0.000318474037431551, "loss": 0.1736, "step": 298240 }, { "epoch": 12.35, "grad_norm": 0.83203125, "learning_rate": 0.000318463606900442, "loss": 0.2065, "step": 298250 }, { "epoch": 12.35, "grad_norm": 1.1015625, "learning_rate": 0.00031845317624049204, "loss": 0.2094, "step": 298260 }, { "epoch": 12.35, "grad_norm": 1.3984375, "learning_rate": 0.00031844274545172053, "loss": 0.261, "step": 298270 }, { "epoch": 12.35, "grad_norm": 1.8828125, "learning_rate": 0.0003184323145341471, "loss": 0.1974, "step": 298280 }, { "epoch": 12.36, "grad_norm": 0.5, "learning_rate": 0.00031842188348779154, "loss": 0.1443, "step": 298290 }, { "epoch": 12.36, "grad_norm": 1.34375, "learning_rate": 0.0003184114523126733, "loss": 0.2247, "step": 298300 }, { "epoch": 12.36, "grad_norm": 0.63671875, "learning_rate": 0.00031840102100881226, "loss": 0.1747, "step": 298310 }, { "epoch": 12.36, "grad_norm": 0.37890625, "learning_rate": 0.00031839058957622773, "loss": 0.1613, "step": 298320 }, { "epoch": 12.36, "grad_norm": 1.0078125, "learning_rate": 0.0003183801580149395, "loss": 0.2023, "step": 298330 }, { "epoch": 12.36, "grad_norm": 0.99609375, "learning_rate": 0.0003183697263249673, "loss": 0.1846, "step": 298340 }, { "epoch": 12.36, "grad_norm": 1.1171875, "learning_rate": 0.0003183592945063306, "loss": 0.2327, "step": 298350 }, { "epoch": 12.36, "grad_norm": 0.59375, "learning_rate": 0.00031834886255904907, "loss": 0.2053, "step": 298360 }, { "epoch": 12.36, "grad_norm": 0.890625, "learning_rate": 0.0003183384304831424, "loss": 0.1799, "step": 298370 }, { "epoch": 12.36, "grad_norm": 0.75, "learning_rate": 0.00031832799827863015, "loss": 0.2037, "step": 298380 }, { "epoch": 12.36, "grad_norm": 0.93359375, "learning_rate": 0.00031831756594553196, "loss": 0.1792, "step": 298390 }, { "epoch": 12.36, "grad_norm": 1.078125, "learning_rate": 0.00031830713348386755, "loss": 0.2063, "step": 298400 }, { "epoch": 12.36, "grad_norm": 0.66796875, "learning_rate": 0.0003182967008936564, "loss": 0.1487, "step": 298410 }, { "epoch": 12.36, "grad_norm": 1.765625, "learning_rate": 0.00031828626817491834, "loss": 0.2153, "step": 298420 }, { "epoch": 12.36, "grad_norm": 0.59765625, "learning_rate": 0.0003182758353276729, "loss": 0.1577, "step": 298430 }, { "epoch": 12.36, "grad_norm": 0.92578125, "learning_rate": 0.0003182654023519396, "loss": 0.191, "step": 298440 }, { "epoch": 12.36, "grad_norm": 0.466796875, "learning_rate": 0.0003182549692477383, "loss": 0.1916, "step": 298450 }, { "epoch": 12.36, "grad_norm": 0.40234375, "learning_rate": 0.00031824453601508846, "loss": 0.1848, "step": 298460 }, { "epoch": 12.36, "grad_norm": 0.67578125, "learning_rate": 0.00031823410265400983, "loss": 0.1913, "step": 298470 }, { "epoch": 12.36, "grad_norm": 0.875, "learning_rate": 0.0003182236691645219, "loss": 0.2311, "step": 298480 }, { "epoch": 12.36, "grad_norm": 0.77734375, "learning_rate": 0.00031821323554664444, "loss": 0.1707, "step": 298490 }, { "epoch": 12.36, "grad_norm": 0.578125, "learning_rate": 0.0003182028018003971, "loss": 0.1967, "step": 298500 }, { "epoch": 12.36, "grad_norm": 0.84765625, "learning_rate": 0.00031819236792579933, "loss": 0.1796, "step": 298510 }, { "epoch": 12.36, "grad_norm": 0.8125, "learning_rate": 0.0003181819339228709, "loss": 0.2072, "step": 298520 }, { "epoch": 12.37, "grad_norm": 0.7890625, "learning_rate": 0.0003181714997916315, "loss": 0.2235, "step": 298530 }, { "epoch": 12.37, "grad_norm": 1.1875, "learning_rate": 0.00031816106553210065, "loss": 0.2175, "step": 298540 }, { "epoch": 12.37, "grad_norm": 0.0, "learning_rate": 0.0003181506311442981, "loss": 0.1847, "step": 298550 }, { "epoch": 12.37, "grad_norm": 0.73828125, "learning_rate": 0.0003181401966282434, "loss": 0.1991, "step": 298560 }, { "epoch": 12.37, "grad_norm": 2.40625, "learning_rate": 0.00031812976198395613, "loss": 0.1957, "step": 298570 }, { "epoch": 12.37, "grad_norm": 0.640625, "learning_rate": 0.00031811932721145607, "loss": 0.2211, "step": 298580 }, { "epoch": 12.37, "grad_norm": 0.81640625, "learning_rate": 0.0003181088923107628, "loss": 0.2065, "step": 298590 }, { "epoch": 12.37, "grad_norm": 0.87890625, "learning_rate": 0.00031809845728189595, "loss": 0.174, "step": 298600 }, { "epoch": 12.37, "grad_norm": 1.171875, "learning_rate": 0.00031808802212487513, "loss": 0.155, "step": 298610 }, { "epoch": 12.37, "grad_norm": 1.140625, "learning_rate": 0.00031807758683972004, "loss": 0.2166, "step": 298620 }, { "epoch": 12.37, "grad_norm": 0.859375, "learning_rate": 0.00031806715142645027, "loss": 0.2079, "step": 298630 }, { "epoch": 12.37, "grad_norm": 0.43359375, "learning_rate": 0.00031805671588508547, "loss": 0.165, "step": 298640 }, { "epoch": 12.37, "grad_norm": 0.6875, "learning_rate": 0.0003180462802156453, "loss": 0.2093, "step": 298650 }, { "epoch": 12.37, "grad_norm": 0.494140625, "learning_rate": 0.0003180358444181493, "loss": 0.1851, "step": 298660 }, { "epoch": 12.37, "grad_norm": 0.453125, "learning_rate": 0.0003180254084926173, "loss": 0.235, "step": 298670 }, { "epoch": 12.37, "grad_norm": 0.5625, "learning_rate": 0.00031801497243906876, "loss": 0.1801, "step": 298680 }, { "epoch": 12.37, "grad_norm": 0.5234375, "learning_rate": 0.00031800453625752335, "loss": 0.1943, "step": 298690 }, { "epoch": 12.37, "grad_norm": 0.78515625, "learning_rate": 0.0003179940999480008, "loss": 0.1743, "step": 298700 }, { "epoch": 12.37, "grad_norm": 0.9609375, "learning_rate": 0.00031798366351052065, "loss": 0.2, "step": 298710 }, { "epoch": 12.37, "grad_norm": 1.0, "learning_rate": 0.0003179732269451027, "loss": 0.1605, "step": 298720 }, { "epoch": 12.37, "grad_norm": 1.3125, "learning_rate": 0.0003179627902517664, "loss": 0.1979, "step": 298730 }, { "epoch": 12.37, "grad_norm": 0.73828125, "learning_rate": 0.00031795235343053146, "loss": 0.2076, "step": 298740 }, { "epoch": 12.37, "grad_norm": 1.0546875, "learning_rate": 0.0003179419164814176, "loss": 0.161, "step": 298750 }, { "epoch": 12.37, "grad_norm": 0.78515625, "learning_rate": 0.0003179314794044443, "loss": 0.2239, "step": 298760 }, { "epoch": 12.38, "grad_norm": 0.68359375, "learning_rate": 0.0003179210421996313, "loss": 0.2024, "step": 298770 }, { "epoch": 12.38, "grad_norm": 1.421875, "learning_rate": 0.00031791060486699826, "loss": 0.2006, "step": 298780 }, { "epoch": 12.38, "grad_norm": 1.3046875, "learning_rate": 0.0003179001674065648, "loss": 0.2077, "step": 298790 }, { "epoch": 12.38, "grad_norm": 0.86328125, "learning_rate": 0.0003178897298183506, "loss": 0.1445, "step": 298800 }, { "epoch": 12.38, "grad_norm": 1.1953125, "learning_rate": 0.00031787929210237514, "loss": 0.2229, "step": 298810 }, { "epoch": 12.38, "grad_norm": 0.76953125, "learning_rate": 0.00031786885425865824, "loss": 0.2109, "step": 298820 }, { "epoch": 12.38, "grad_norm": 0.7421875, "learning_rate": 0.0003178584162872195, "loss": 0.1695, "step": 298830 }, { "epoch": 12.38, "grad_norm": 0.359375, "learning_rate": 0.0003178479781880786, "loss": 0.1605, "step": 298840 }, { "epoch": 12.38, "grad_norm": 1.125, "learning_rate": 0.0003178375399612551, "loss": 0.1591, "step": 298850 }, { "epoch": 12.38, "grad_norm": 0.578125, "learning_rate": 0.0003178271016067686, "loss": 0.1854, "step": 298860 }, { "epoch": 12.38, "grad_norm": 0.6484375, "learning_rate": 0.00031781666312463887, "loss": 0.1632, "step": 298870 }, { "epoch": 12.38, "grad_norm": 1.0546875, "learning_rate": 0.00031780622451488554, "loss": 0.1888, "step": 298880 }, { "epoch": 12.38, "grad_norm": 0.380859375, "learning_rate": 0.00031779578577752817, "loss": 0.1933, "step": 298890 }, { "epoch": 12.38, "grad_norm": 0.66015625, "learning_rate": 0.0003177853469125865, "loss": 0.1763, "step": 298900 }, { "epoch": 12.38, "grad_norm": 0.419921875, "learning_rate": 0.00031777490792008, "loss": 0.2189, "step": 298910 }, { "epoch": 12.38, "grad_norm": 0.6015625, "learning_rate": 0.0003177644688000286, "loss": 0.2091, "step": 298920 }, { "epoch": 12.38, "grad_norm": 1.0, "learning_rate": 0.00031775402955245175, "loss": 0.2084, "step": 298930 }, { "epoch": 12.38, "grad_norm": 1.1640625, "learning_rate": 0.0003177435901773691, "loss": 0.2013, "step": 298940 }, { "epoch": 12.38, "grad_norm": 0.46875, "learning_rate": 0.0003177331506748003, "loss": 0.1286, "step": 298950 }, { "epoch": 12.38, "grad_norm": 1.109375, "learning_rate": 0.00031772271104476516, "loss": 0.2174, "step": 298960 }, { "epoch": 12.38, "grad_norm": 0.8359375, "learning_rate": 0.00031771227128728305, "loss": 0.1855, "step": 298970 }, { "epoch": 12.38, "grad_norm": 0.734375, "learning_rate": 0.00031770183140237374, "loss": 0.2194, "step": 298980 }, { "epoch": 12.38, "grad_norm": 0.59375, "learning_rate": 0.00031769139139005697, "loss": 0.1842, "step": 298990 }, { "epoch": 12.38, "grad_norm": 1.359375, "learning_rate": 0.0003176809512503523, "loss": 0.1779, "step": 299000 }, { "epoch": 12.38, "grad_norm": 0.83984375, "learning_rate": 0.00031767051098327944, "loss": 0.1921, "step": 299010 }, { "epoch": 12.39, "grad_norm": 0.65234375, "learning_rate": 0.0003176600705888579, "loss": 0.1846, "step": 299020 }, { "epoch": 12.39, "grad_norm": 0.84375, "learning_rate": 0.00031764963006710746, "loss": 0.2252, "step": 299030 }, { "epoch": 12.39, "grad_norm": 0.8828125, "learning_rate": 0.0003176391894180477, "loss": 0.1481, "step": 299040 }, { "epoch": 12.39, "grad_norm": 0.55859375, "learning_rate": 0.0003176287486416983, "loss": 0.1865, "step": 299050 }, { "epoch": 12.39, "grad_norm": 0.90625, "learning_rate": 0.00031761830773807887, "loss": 0.1855, "step": 299060 }, { "epoch": 12.39, "grad_norm": 0.8984375, "learning_rate": 0.0003176078667072091, "loss": 0.1724, "step": 299070 }, { "epoch": 12.39, "grad_norm": 0.78125, "learning_rate": 0.0003175974255491086, "loss": 0.1653, "step": 299080 }, { "epoch": 12.39, "grad_norm": 0.84765625, "learning_rate": 0.00031758698426379717, "loss": 0.1538, "step": 299090 }, { "epoch": 12.39, "grad_norm": 1.25, "learning_rate": 0.0003175765428512942, "loss": 0.1827, "step": 299100 }, { "epoch": 12.39, "grad_norm": 0.9296875, "learning_rate": 0.00031756610131161955, "loss": 0.2149, "step": 299110 }, { "epoch": 12.39, "grad_norm": 0.8671875, "learning_rate": 0.00031755565964479275, "loss": 0.1693, "step": 299120 }, { "epoch": 12.39, "grad_norm": 0.921875, "learning_rate": 0.0003175452178508335, "loss": 0.1819, "step": 299130 }, { "epoch": 12.39, "grad_norm": 2.28125, "learning_rate": 0.0003175347759297615, "loss": 0.2166, "step": 299140 }, { "epoch": 12.39, "grad_norm": 0.8515625, "learning_rate": 0.00031752433388159627, "loss": 0.1616, "step": 299150 }, { "epoch": 12.39, "grad_norm": 1.078125, "learning_rate": 0.0003175138917063575, "loss": 0.1551, "step": 299160 }, { "epoch": 12.39, "grad_norm": 0.90234375, "learning_rate": 0.0003175034494040651, "loss": 0.2086, "step": 299170 }, { "epoch": 12.39, "grad_norm": 1.34375, "learning_rate": 0.00031749300697473824, "loss": 0.2413, "step": 299180 }, { "epoch": 12.39, "grad_norm": 0.703125, "learning_rate": 0.00031748256441839696, "loss": 0.1753, "step": 299190 }, { "epoch": 12.39, "grad_norm": 0.92578125, "learning_rate": 0.0003174721217350608, "loss": 0.1505, "step": 299200 }, { "epoch": 12.39, "grad_norm": 1.21875, "learning_rate": 0.00031746167892474927, "loss": 0.1672, "step": 299210 }, { "epoch": 12.39, "grad_norm": 0.484375, "learning_rate": 0.0003174512359874822, "loss": 0.1668, "step": 299220 }, { "epoch": 12.39, "grad_norm": 0.80859375, "learning_rate": 0.0003174407929232792, "loss": 0.1609, "step": 299230 }, { "epoch": 12.39, "grad_norm": 0.443359375, "learning_rate": 0.0003174303497321599, "loss": 0.1322, "step": 299240 }, { "epoch": 12.39, "grad_norm": 1.2734375, "learning_rate": 0.00031741990641414404, "loss": 0.1892, "step": 299250 }, { "epoch": 12.4, "grad_norm": 0.93359375, "learning_rate": 0.0003174094629692511, "loss": 0.2299, "step": 299260 }, { "epoch": 12.4, "grad_norm": 1.6328125, "learning_rate": 0.00031739901939750086, "loss": 0.209, "step": 299270 }, { "epoch": 12.4, "grad_norm": 0.42578125, "learning_rate": 0.000317388575698913, "loss": 0.1807, "step": 299280 }, { "epoch": 12.4, "grad_norm": 0.36328125, "learning_rate": 0.00031737813187350706, "loss": 0.1354, "step": 299290 }, { "epoch": 12.4, "grad_norm": 1.0234375, "learning_rate": 0.00031736768792130275, "loss": 0.1905, "step": 299300 }, { "epoch": 12.4, "grad_norm": 1.734375, "learning_rate": 0.00031735724384231975, "loss": 0.1947, "step": 299310 }, { "epoch": 12.4, "grad_norm": 0.5546875, "learning_rate": 0.00031734679963657764, "loss": 0.1815, "step": 299320 }, { "epoch": 12.4, "grad_norm": 1.0625, "learning_rate": 0.0003173363553040962, "loss": 0.2372, "step": 299330 }, { "epoch": 12.4, "grad_norm": 1.6875, "learning_rate": 0.0003173259108448949, "loss": 0.2044, "step": 299340 }, { "epoch": 12.4, "grad_norm": 0.345703125, "learning_rate": 0.00031731546625899363, "loss": 0.1748, "step": 299350 }, { "epoch": 12.4, "grad_norm": 1.4296875, "learning_rate": 0.00031730502154641194, "loss": 0.2018, "step": 299360 }, { "epoch": 12.4, "grad_norm": 0.74609375, "learning_rate": 0.0003172945767071693, "loss": 0.2132, "step": 299370 }, { "epoch": 12.4, "grad_norm": 0.5546875, "learning_rate": 0.0003172841317412857, "loss": 0.2433, "step": 299380 }, { "epoch": 12.4, "grad_norm": 0.69921875, "learning_rate": 0.0003172736866487806, "loss": 0.1823, "step": 299390 }, { "epoch": 12.4, "grad_norm": 0.72265625, "learning_rate": 0.0003172632414296736, "loss": 0.1754, "step": 299400 }, { "epoch": 12.4, "grad_norm": 1.015625, "learning_rate": 0.00031725279608398456, "loss": 0.2391, "step": 299410 }, { "epoch": 12.4, "grad_norm": 0.671875, "learning_rate": 0.0003172423506117329, "loss": 0.1987, "step": 299420 }, { "epoch": 12.4, "grad_norm": 0.81640625, "learning_rate": 0.00031723190501293846, "loss": 0.1853, "step": 299430 }, { "epoch": 12.4, "grad_norm": 0.86328125, "learning_rate": 0.0003172214592876209, "loss": 0.1711, "step": 299440 }, { "epoch": 12.4, "grad_norm": 1.078125, "learning_rate": 0.00031721101343579964, "loss": 0.1884, "step": 299450 }, { "epoch": 12.4, "grad_norm": 0.193359375, "learning_rate": 0.0003172005674574947, "loss": 0.1777, "step": 299460 }, { "epoch": 12.4, "grad_norm": 0.294921875, "learning_rate": 0.0003171901213527254, "loss": 0.2178, "step": 299470 }, { "epoch": 12.4, "grad_norm": 0.7421875, "learning_rate": 0.00031717967512151165, "loss": 0.1776, "step": 299480 }, { "epoch": 12.4, "grad_norm": 0.65234375, "learning_rate": 0.000317169228763873, "loss": 0.2059, "step": 299490 }, { "epoch": 12.41, "grad_norm": 1.1640625, "learning_rate": 0.00031715878227982897, "loss": 0.2225, "step": 299500 }, { "epoch": 12.41, "grad_norm": 0.4140625, "learning_rate": 0.00031714833566939956, "loss": 0.1495, "step": 299510 }, { "epoch": 12.41, "grad_norm": 0.4296875, "learning_rate": 0.0003171378889326042, "loss": 0.1875, "step": 299520 }, { "epoch": 12.41, "grad_norm": 0.81640625, "learning_rate": 0.0003171274420694624, "loss": 0.197, "step": 299530 }, { "epoch": 12.41, "grad_norm": 1.859375, "learning_rate": 0.0003171169950799942, "loss": 0.2602, "step": 299540 }, { "epoch": 12.41, "grad_norm": 0.52734375, "learning_rate": 0.000317106547964219, "loss": 0.1884, "step": 299550 }, { "epoch": 12.41, "grad_norm": 0.859375, "learning_rate": 0.0003170961007221565, "loss": 0.1431, "step": 299560 }, { "epoch": 12.41, "grad_norm": 0.9765625, "learning_rate": 0.00031708565335382644, "loss": 0.2523, "step": 299570 }, { "epoch": 12.41, "grad_norm": 1.109375, "learning_rate": 0.0003170752058592483, "loss": 0.2011, "step": 299580 }, { "epoch": 12.41, "grad_norm": 1.28125, "learning_rate": 0.00031706475823844207, "loss": 0.2241, "step": 299590 }, { "epoch": 12.41, "grad_norm": 0.4921875, "learning_rate": 0.0003170543104914271, "loss": 0.1846, "step": 299600 }, { "epoch": 12.41, "grad_norm": 0.48828125, "learning_rate": 0.0003170438626182231, "loss": 0.2029, "step": 299610 }, { "epoch": 12.41, "grad_norm": 0.53515625, "learning_rate": 0.00031703341461884987, "loss": 0.2021, "step": 299620 }, { "epoch": 12.41, "grad_norm": 1.0, "learning_rate": 0.00031702296649332697, "loss": 0.2112, "step": 299630 }, { "epoch": 12.41, "grad_norm": 1.5078125, "learning_rate": 0.0003170125182416741, "loss": 0.233, "step": 299640 }, { "epoch": 12.41, "grad_norm": 0.9375, "learning_rate": 0.00031700206986391087, "loss": 0.1719, "step": 299650 }, { "epoch": 12.41, "grad_norm": 0.515625, "learning_rate": 0.00031699162136005697, "loss": 0.1699, "step": 299660 }, { "epoch": 12.41, "grad_norm": 0.77734375, "learning_rate": 0.00031698117273013215, "loss": 0.1445, "step": 299670 }, { "epoch": 12.41, "grad_norm": 0.8671875, "learning_rate": 0.00031697072397415593, "loss": 0.1894, "step": 299680 }, { "epoch": 12.41, "grad_norm": 0.53125, "learning_rate": 0.00031696027509214804, "loss": 0.2417, "step": 299690 }, { "epoch": 12.41, "grad_norm": 0.890625, "learning_rate": 0.0003169498260841282, "loss": 0.1666, "step": 299700 }, { "epoch": 12.41, "grad_norm": 2.6875, "learning_rate": 0.000316939376950116, "loss": 0.1962, "step": 299710 }, { "epoch": 12.41, "grad_norm": 1.1015625, "learning_rate": 0.00031692892769013105, "loss": 0.2017, "step": 299720 }, { "epoch": 12.41, "grad_norm": 0.82421875, "learning_rate": 0.0003169184783041932, "loss": 0.182, "step": 299730 }, { "epoch": 12.42, "grad_norm": 1.4453125, "learning_rate": 0.00031690802879232185, "loss": 0.1608, "step": 299740 }, { "epoch": 12.42, "grad_norm": 1.28125, "learning_rate": 0.00031689757915453695, "loss": 0.164, "step": 299750 }, { "epoch": 12.42, "grad_norm": 0.51953125, "learning_rate": 0.00031688712939085796, "loss": 0.2463, "step": 299760 }, { "epoch": 12.42, "grad_norm": 0.61328125, "learning_rate": 0.0003168766795013046, "loss": 0.205, "step": 299770 }, { "epoch": 12.42, "grad_norm": 0.54296875, "learning_rate": 0.00031686622948589666, "loss": 0.1939, "step": 299780 }, { "epoch": 12.42, "grad_norm": 0.0, "learning_rate": 0.0003168557793446536, "loss": 0.1993, "step": 299790 }, { "epoch": 12.42, "grad_norm": 0.8515625, "learning_rate": 0.0003168453290775952, "loss": 0.2211, "step": 299800 }, { "epoch": 12.42, "grad_norm": 0.75, "learning_rate": 0.0003168348786847411, "loss": 0.2685, "step": 299810 }, { "epoch": 12.42, "grad_norm": 0.70703125, "learning_rate": 0.000316824428166111, "loss": 0.1826, "step": 299820 }, { "epoch": 12.42, "grad_norm": 0.9296875, "learning_rate": 0.0003168139775217246, "loss": 0.1478, "step": 299830 }, { "epoch": 12.42, "grad_norm": 0.7890625, "learning_rate": 0.0003168035267516014, "loss": 0.2061, "step": 299840 }, { "epoch": 12.42, "grad_norm": 0.388671875, "learning_rate": 0.00031679307585576123, "loss": 0.2057, "step": 299850 }, { "epoch": 12.42, "grad_norm": 0.44140625, "learning_rate": 0.0003167826248342237, "loss": 0.1681, "step": 299860 }, { "epoch": 12.42, "grad_norm": 0.55078125, "learning_rate": 0.0003167721736870085, "loss": 0.1459, "step": 299870 }, { "epoch": 12.42, "grad_norm": 1.1171875, "learning_rate": 0.0003167617224141353, "loss": 0.1813, "step": 299880 }, { "epoch": 12.42, "grad_norm": 0.63671875, "learning_rate": 0.0003167512710156236, "loss": 0.1967, "step": 299890 }, { "epoch": 12.42, "grad_norm": 0.62890625, "learning_rate": 0.00031674081949149336, "loss": 0.2142, "step": 299900 }, { "epoch": 12.42, "grad_norm": 0.98828125, "learning_rate": 0.00031673036784176414, "loss": 0.2242, "step": 299910 }, { "epoch": 12.42, "grad_norm": 0.71875, "learning_rate": 0.00031671991606645547, "loss": 0.1796, "step": 299920 }, { "epoch": 12.42, "grad_norm": 0.416015625, "learning_rate": 0.0003167094641655871, "loss": 0.2154, "step": 299930 }, { "epoch": 12.42, "grad_norm": 0.58984375, "learning_rate": 0.00031669901213917886, "loss": 0.204, "step": 299940 }, { "epoch": 12.42, "grad_norm": 1.2734375, "learning_rate": 0.00031668855998725023, "loss": 0.1972, "step": 299950 }, { "epoch": 12.42, "grad_norm": 0.271484375, "learning_rate": 0.00031667810770982086, "loss": 0.1709, "step": 299960 }, { "epoch": 12.42, "grad_norm": 0.37890625, "learning_rate": 0.0003166676553069105, "loss": 0.1701, "step": 299970 }, { "epoch": 12.43, "grad_norm": 1.109375, "learning_rate": 0.00031665720277853883, "loss": 0.2606, "step": 299980 }, { "epoch": 12.43, "grad_norm": 1.6484375, "learning_rate": 0.00031664675012472554, "loss": 0.1747, "step": 299990 }, { "epoch": 12.43, "grad_norm": 0.49609375, "learning_rate": 0.00031663629734549027, "loss": 0.1847, "step": 300000 }, { "epoch": 12.43, "grad_norm": 0.5390625, "learning_rate": 0.00031662584444085265, "loss": 0.2369, "step": 300010 }, { "epoch": 12.43, "grad_norm": 1.265625, "learning_rate": 0.0003166153914108324, "loss": 0.2065, "step": 300020 }, { "epoch": 12.43, "grad_norm": 0.7734375, "learning_rate": 0.00031660493825544914, "loss": 0.2021, "step": 300030 }, { "epoch": 12.43, "grad_norm": 1.2890625, "learning_rate": 0.00031659448497472266, "loss": 0.17, "step": 300040 }, { "epoch": 12.43, "grad_norm": 1.4921875, "learning_rate": 0.00031658403156867244, "loss": 0.2024, "step": 300050 }, { "epoch": 12.43, "grad_norm": 1.2109375, "learning_rate": 0.0003165735780373183, "loss": 0.1968, "step": 300060 }, { "epoch": 12.43, "grad_norm": 1.3515625, "learning_rate": 0.00031656312438068, "loss": 0.2044, "step": 300070 }, { "epoch": 12.43, "grad_norm": 0.416015625, "learning_rate": 0.000316552670598777, "loss": 0.2086, "step": 300080 }, { "epoch": 12.43, "grad_norm": 0.53125, "learning_rate": 0.00031654221669162895, "loss": 0.1649, "step": 300090 }, { "epoch": 12.43, "grad_norm": 1.375, "learning_rate": 0.0003165317626592558, "loss": 0.2184, "step": 300100 }, { "epoch": 12.43, "grad_norm": 0.43359375, "learning_rate": 0.000316521308501677, "loss": 0.1639, "step": 300110 }, { "epoch": 12.43, "grad_norm": 1.4296875, "learning_rate": 0.00031651085421891223, "loss": 0.2124, "step": 300120 }, { "epoch": 12.43, "grad_norm": 0.59765625, "learning_rate": 0.0003165003998109813, "loss": 0.1593, "step": 300130 }, { "epoch": 12.43, "grad_norm": 0.9609375, "learning_rate": 0.0003164899452779037, "loss": 0.1785, "step": 300140 }, { "epoch": 12.43, "grad_norm": 1.15625, "learning_rate": 0.00031647949061969926, "loss": 0.2024, "step": 300150 }, { "epoch": 12.43, "grad_norm": 1.3359375, "learning_rate": 0.0003164690358363876, "loss": 0.1242, "step": 300160 }, { "epoch": 12.43, "grad_norm": 0.97265625, "learning_rate": 0.0003164585809279884, "loss": 0.1843, "step": 300170 }, { "epoch": 12.43, "grad_norm": 0.546875, "learning_rate": 0.0003164481258945213, "loss": 0.1831, "step": 300180 }, { "epoch": 12.43, "grad_norm": 1.2109375, "learning_rate": 0.00031643767073600605, "loss": 0.1689, "step": 300190 }, { "epoch": 12.43, "grad_norm": 1.625, "learning_rate": 0.0003164272154524622, "loss": 0.1627, "step": 300200 }, { "epoch": 12.43, "grad_norm": 0.53125, "learning_rate": 0.00031641676004390956, "loss": 0.1754, "step": 300210 }, { "epoch": 12.44, "grad_norm": 0.6015625, "learning_rate": 0.0003164063045103678, "loss": 0.1595, "step": 300220 }, { "epoch": 12.44, "grad_norm": 1.7109375, "learning_rate": 0.00031639584885185645, "loss": 0.1349, "step": 300230 }, { "epoch": 12.44, "grad_norm": 0.77734375, "learning_rate": 0.0003163853930683953, "loss": 0.2197, "step": 300240 }, { "epoch": 12.44, "grad_norm": 1.0078125, "learning_rate": 0.000316374937160004, "loss": 0.1922, "step": 300250 }, { "epoch": 12.44, "grad_norm": 0.98046875, "learning_rate": 0.0003163644811267023, "loss": 0.2287, "step": 300260 }, { "epoch": 12.44, "grad_norm": 1.265625, "learning_rate": 0.00031635402496850987, "loss": 0.1936, "step": 300270 }, { "epoch": 12.44, "grad_norm": 1.359375, "learning_rate": 0.0003163435686854462, "loss": 0.1795, "step": 300280 }, { "epoch": 12.44, "grad_norm": 0.8203125, "learning_rate": 0.00031633311227753115, "loss": 0.1881, "step": 300290 }, { "epoch": 12.44, "grad_norm": 0.703125, "learning_rate": 0.0003163226557447842, "loss": 0.1989, "step": 300300 }, { "epoch": 12.44, "grad_norm": 0.9453125, "learning_rate": 0.00031631219908722535, "loss": 0.2591, "step": 300310 }, { "epoch": 12.44, "grad_norm": 0.5234375, "learning_rate": 0.00031630174230487415, "loss": 0.1642, "step": 300320 }, { "epoch": 12.44, "grad_norm": 2.296875, "learning_rate": 0.0003162912853977501, "loss": 0.1922, "step": 300330 }, { "epoch": 12.44, "grad_norm": 1.0625, "learning_rate": 0.000316280828365873, "loss": 0.1958, "step": 300340 }, { "epoch": 12.44, "grad_norm": 1.1875, "learning_rate": 0.00031627037120926266, "loss": 0.2042, "step": 300350 }, { "epoch": 12.44, "grad_norm": 0.984375, "learning_rate": 0.0003162599139279386, "loss": 0.1824, "step": 300360 }, { "epoch": 12.44, "grad_norm": 1.3828125, "learning_rate": 0.0003162494565219204, "loss": 0.2342, "step": 300370 }, { "epoch": 12.44, "grad_norm": 0.7265625, "learning_rate": 0.00031623899899122806, "loss": 0.1723, "step": 300380 }, { "epoch": 12.44, "grad_norm": 0.408203125, "learning_rate": 0.00031622854133588096, "loss": 0.2001, "step": 300390 }, { "epoch": 12.44, "grad_norm": 0.478515625, "learning_rate": 0.00031621808355589896, "loss": 0.2073, "step": 300400 }, { "epoch": 12.44, "grad_norm": 1.109375, "learning_rate": 0.0003162076256513017, "loss": 0.1807, "step": 300410 }, { "epoch": 12.44, "grad_norm": 0.703125, "learning_rate": 0.0003161971676221087, "loss": 0.2065, "step": 300420 }, { "epoch": 12.44, "grad_norm": 2.84375, "learning_rate": 0.00031618670946833996, "loss": 0.2032, "step": 300430 }, { "epoch": 12.44, "grad_norm": 0.5859375, "learning_rate": 0.0003161762511900148, "loss": 0.2098, "step": 300440 }, { "epoch": 12.44, "grad_norm": 1.6875, "learning_rate": 0.00031616579278715323, "loss": 0.2242, "step": 300450 }, { "epoch": 12.45, "grad_norm": 0.7421875, "learning_rate": 0.00031615533425977474, "loss": 0.1642, "step": 300460 }, { "epoch": 12.45, "grad_norm": 0.6328125, "learning_rate": 0.000316144875607899, "loss": 0.2193, "step": 300470 }, { "epoch": 12.45, "grad_norm": 1.2421875, "learning_rate": 0.0003161344168315458, "loss": 0.153, "step": 300480 }, { "epoch": 12.45, "grad_norm": 0.59375, "learning_rate": 0.00031612395793073476, "loss": 0.1979, "step": 300490 }, { "epoch": 12.45, "grad_norm": 0.984375, "learning_rate": 0.0003161134989054856, "loss": 0.2122, "step": 300500 }, { "epoch": 12.45, "grad_norm": 0.55078125, "learning_rate": 0.0003161030397558179, "loss": 0.191, "step": 300510 }, { "epoch": 12.45, "grad_norm": 0.625, "learning_rate": 0.0003160925804817515, "loss": 0.1282, "step": 300520 }, { "epoch": 12.45, "grad_norm": 0.447265625, "learning_rate": 0.00031608212108330595, "loss": 0.2359, "step": 300530 }, { "epoch": 12.45, "grad_norm": 0.0, "learning_rate": 0.000316071661560501, "loss": 0.1904, "step": 300540 }, { "epoch": 12.45, "grad_norm": 0.69921875, "learning_rate": 0.0003160612019133563, "loss": 0.2253, "step": 300550 }, { "epoch": 12.45, "grad_norm": 0.69140625, "learning_rate": 0.0003160507421418916, "loss": 0.1668, "step": 300560 }, { "epoch": 12.45, "grad_norm": 0.515625, "learning_rate": 0.0003160402822461265, "loss": 0.1948, "step": 300570 }, { "epoch": 12.45, "grad_norm": 0.53125, "learning_rate": 0.00031602982222608074, "loss": 0.2025, "step": 300580 }, { "epoch": 12.45, "grad_norm": 0.98046875, "learning_rate": 0.00031601936208177397, "loss": 0.2377, "step": 300590 }, { "epoch": 12.45, "grad_norm": 1.5625, "learning_rate": 0.0003160089018132258, "loss": 0.2078, "step": 300600 }, { "epoch": 12.45, "grad_norm": 0.5234375, "learning_rate": 0.0003159984414204562, "loss": 0.1918, "step": 300610 }, { "epoch": 12.45, "grad_norm": 1.1875, "learning_rate": 0.0003159879809034845, "loss": 0.1472, "step": 300620 }, { "epoch": 12.45, "grad_norm": 0.921875, "learning_rate": 0.0003159775202623306, "loss": 0.2028, "step": 300630 }, { "epoch": 12.45, "grad_norm": 0.77734375, "learning_rate": 0.0003159670594970142, "loss": 0.1804, "step": 300640 }, { "epoch": 12.45, "grad_norm": 0.9609375, "learning_rate": 0.00031595659860755474, "loss": 0.1996, "step": 300650 }, { "epoch": 12.45, "grad_norm": 1.3515625, "learning_rate": 0.0003159461375939723, "loss": 0.2432, "step": 300660 }, { "epoch": 12.45, "grad_norm": 0.51953125, "learning_rate": 0.00031593567645628616, "loss": 0.1689, "step": 300670 }, { "epoch": 12.45, "grad_norm": 1.1171875, "learning_rate": 0.00031592521519451634, "loss": 0.1508, "step": 300680 }, { "epoch": 12.45, "grad_norm": 0.6953125, "learning_rate": 0.00031591475380868234, "loss": 0.2232, "step": 300690 }, { "epoch": 12.45, "grad_norm": 0.63671875, "learning_rate": 0.00031590429229880386, "loss": 0.2227, "step": 300700 }, { "epoch": 12.46, "grad_norm": 0.703125, "learning_rate": 0.0003158938306649006, "loss": 0.1991, "step": 300710 }, { "epoch": 12.46, "grad_norm": 1.0234375, "learning_rate": 0.0003158833689069924, "loss": 0.2091, "step": 300720 }, { "epoch": 12.46, "grad_norm": 0.56640625, "learning_rate": 0.0003158729070250986, "loss": 0.1371, "step": 300730 }, { "epoch": 12.46, "grad_norm": 0.37890625, "learning_rate": 0.00031586244501923927, "loss": 0.2104, "step": 300740 }, { "epoch": 12.46, "grad_norm": 0.455078125, "learning_rate": 0.0003158519828894339, "loss": 0.2437, "step": 300750 }, { "epoch": 12.46, "grad_norm": 0.0, "learning_rate": 0.00031584152063570217, "loss": 0.1602, "step": 300760 }, { "epoch": 12.46, "grad_norm": 1.125, "learning_rate": 0.0003158310582580639, "loss": 0.2133, "step": 300770 }, { "epoch": 12.46, "grad_norm": 0.76953125, "learning_rate": 0.0003158205957565386, "loss": 0.2322, "step": 300780 }, { "epoch": 12.46, "grad_norm": 0.875, "learning_rate": 0.00031581013313114605, "loss": 0.2749, "step": 300790 }, { "epoch": 12.46, "grad_norm": 0.640625, "learning_rate": 0.000315799670381906, "loss": 0.2145, "step": 300800 }, { "epoch": 12.46, "grad_norm": 0.6953125, "learning_rate": 0.00031578920750883793, "loss": 0.1974, "step": 300810 }, { "epoch": 12.46, "grad_norm": 0.3046875, "learning_rate": 0.0003157787445119618, "loss": 0.1837, "step": 300820 }, { "epoch": 12.46, "grad_norm": 1.5703125, "learning_rate": 0.00031576828139129716, "loss": 0.1729, "step": 300830 }, { "epoch": 12.46, "grad_norm": 1.46875, "learning_rate": 0.0003157578181468637, "loss": 0.211, "step": 300840 }, { "epoch": 12.46, "grad_norm": 1.4609375, "learning_rate": 0.00031574735477868116, "loss": 0.1764, "step": 300850 }, { "epoch": 12.46, "grad_norm": 0.87890625, "learning_rate": 0.0003157368912867692, "loss": 0.1854, "step": 300860 }, { "epoch": 12.46, "grad_norm": 0.9609375, "learning_rate": 0.00031572642767114747, "loss": 0.2435, "step": 300870 }, { "epoch": 12.46, "grad_norm": 1.359375, "learning_rate": 0.0003157159639318357, "loss": 0.2422, "step": 300880 }, { "epoch": 12.46, "grad_norm": 1.34375, "learning_rate": 0.00031570550006885357, "loss": 0.1741, "step": 300890 }, { "epoch": 12.46, "grad_norm": 0.953125, "learning_rate": 0.0003156950360822208, "loss": 0.2065, "step": 300900 }, { "epoch": 12.46, "grad_norm": 0.62109375, "learning_rate": 0.0003156845719719571, "loss": 0.2567, "step": 300910 }, { "epoch": 12.46, "grad_norm": 0.6953125, "learning_rate": 0.0003156741077380821, "loss": 0.168, "step": 300920 }, { "epoch": 12.46, "grad_norm": 0.52734375, "learning_rate": 0.00031566364338061554, "loss": 0.2041, "step": 300930 }, { "epoch": 12.46, "grad_norm": 0.423828125, "learning_rate": 0.0003156531788995771, "loss": 0.1793, "step": 300940 }, { "epoch": 12.47, "grad_norm": 0.88671875, "learning_rate": 0.0003156427142949864, "loss": 0.1519, "step": 300950 }, { "epoch": 12.47, "grad_norm": 0.412109375, "learning_rate": 0.0003156322495668633, "loss": 0.1797, "step": 300960 }, { "epoch": 12.47, "grad_norm": 0.70703125, "learning_rate": 0.0003156217847152273, "loss": 0.1697, "step": 300970 }, { "epoch": 12.47, "grad_norm": 0.68359375, "learning_rate": 0.00031561131974009826, "loss": 0.1989, "step": 300980 }, { "epoch": 12.47, "grad_norm": 1.15625, "learning_rate": 0.0003156008546414958, "loss": 0.1891, "step": 300990 }, { "epoch": 12.47, "grad_norm": 1.03125, "learning_rate": 0.0003155903894194395, "loss": 0.2078, "step": 301000 }, { "epoch": 12.47, "grad_norm": 0.74609375, "learning_rate": 0.0003155799240739493, "loss": 0.1482, "step": 301010 }, { "epoch": 12.47, "grad_norm": 1.078125, "learning_rate": 0.0003155694586050447, "loss": 0.1857, "step": 301020 }, { "epoch": 12.47, "grad_norm": 0.75390625, "learning_rate": 0.0003155589930127455, "loss": 0.2068, "step": 301030 }, { "epoch": 12.47, "grad_norm": 1.6953125, "learning_rate": 0.00031554852729707137, "loss": 0.1833, "step": 301040 }, { "epoch": 12.47, "grad_norm": 0.828125, "learning_rate": 0.0003155380614580419, "loss": 0.2113, "step": 301050 }, { "epoch": 12.47, "grad_norm": 0.0, "learning_rate": 0.000315527595495677, "loss": 0.2075, "step": 301060 }, { "epoch": 12.47, "grad_norm": 0.68359375, "learning_rate": 0.00031551712940999614, "loss": 0.2437, "step": 301070 }, { "epoch": 12.47, "grad_norm": 0.8203125, "learning_rate": 0.0003155066632010192, "loss": 0.2157, "step": 301080 }, { "epoch": 12.47, "grad_norm": 0.453125, "learning_rate": 0.0003154961968687657, "loss": 0.2028, "step": 301090 }, { "epoch": 12.47, "grad_norm": 1.03125, "learning_rate": 0.0003154857304132555, "loss": 0.2228, "step": 301100 }, { "epoch": 12.47, "grad_norm": 1.0546875, "learning_rate": 0.0003154752638345082, "loss": 0.1971, "step": 301110 }, { "epoch": 12.47, "grad_norm": 0.62109375, "learning_rate": 0.00031546479713254357, "loss": 0.179, "step": 301120 }, { "epoch": 12.47, "grad_norm": 0.65234375, "learning_rate": 0.0003154543303073811, "loss": 0.1931, "step": 301130 }, { "epoch": 12.47, "grad_norm": 1.140625, "learning_rate": 0.00031544386335904083, "loss": 0.1818, "step": 301140 }, { "epoch": 12.47, "grad_norm": 1.78125, "learning_rate": 0.0003154333962875423, "loss": 0.156, "step": 301150 }, { "epoch": 12.47, "grad_norm": 0.67578125, "learning_rate": 0.00031542292909290503, "loss": 0.2229, "step": 301160 }, { "epoch": 12.47, "grad_norm": 1.1875, "learning_rate": 0.000315412461775149, "loss": 0.219, "step": 301170 }, { "epoch": 12.47, "grad_norm": 1.296875, "learning_rate": 0.0003154019943342937, "loss": 0.1976, "step": 301180 }, { "epoch": 12.48, "grad_norm": 0.7890625, "learning_rate": 0.00031539152677035893, "loss": 0.2081, "step": 301190 }, { "epoch": 12.48, "grad_norm": 0.68359375, "learning_rate": 0.00031538105908336434, "loss": 0.1764, "step": 301200 }, { "epoch": 12.48, "grad_norm": 0.55859375, "learning_rate": 0.00031537059127332965, "loss": 0.175, "step": 301210 }, { "epoch": 12.48, "grad_norm": 0.96484375, "learning_rate": 0.00031536012334027467, "loss": 0.1685, "step": 301220 }, { "epoch": 12.48, "grad_norm": 0.95703125, "learning_rate": 0.00031534965528421896, "loss": 0.1628, "step": 301230 }, { "epoch": 12.48, "grad_norm": 1.1171875, "learning_rate": 0.0003153391871051822, "loss": 0.1907, "step": 301240 }, { "epoch": 12.48, "grad_norm": 0.9765625, "learning_rate": 0.0003153287188031842, "loss": 0.1803, "step": 301250 }, { "epoch": 12.48, "grad_norm": 2.671875, "learning_rate": 0.00031531825037824454, "loss": 0.2517, "step": 301260 }, { "epoch": 12.48, "grad_norm": 0.93359375, "learning_rate": 0.00031530778183038306, "loss": 0.1319, "step": 301270 }, { "epoch": 12.48, "grad_norm": 1.0390625, "learning_rate": 0.0003152973131596193, "loss": 0.2232, "step": 301280 }, { "epoch": 12.48, "grad_norm": 1.21875, "learning_rate": 0.0003152868443659731, "loss": 0.1803, "step": 301290 }, { "epoch": 12.48, "grad_norm": 0.59375, "learning_rate": 0.0003152763754494642, "loss": 0.1767, "step": 301300 }, { "epoch": 12.48, "grad_norm": 0.7734375, "learning_rate": 0.0003152659064101121, "loss": 0.2071, "step": 301310 }, { "epoch": 12.48, "grad_norm": 0.7578125, "learning_rate": 0.0003152554372479366, "loss": 0.1849, "step": 301320 }, { "epoch": 12.48, "grad_norm": 0.357421875, "learning_rate": 0.00031524496796295746, "loss": 0.1827, "step": 301330 }, { "epoch": 12.48, "grad_norm": 1.015625, "learning_rate": 0.00031523449855519426, "loss": 0.1861, "step": 301340 }, { "epoch": 12.48, "grad_norm": 0.8515625, "learning_rate": 0.0003152240290246669, "loss": 0.1607, "step": 301350 }, { "epoch": 12.48, "grad_norm": 0.44921875, "learning_rate": 0.0003152135593713949, "loss": 0.1646, "step": 301360 }, { "epoch": 12.48, "grad_norm": 0.72265625, "learning_rate": 0.000315203089595398, "loss": 0.2336, "step": 301370 }, { "epoch": 12.48, "grad_norm": 1.9921875, "learning_rate": 0.000315192619696696, "loss": 0.1741, "step": 301380 }, { "epoch": 12.48, "grad_norm": 0.890625, "learning_rate": 0.00031518214967530845, "loss": 0.1989, "step": 301390 }, { "epoch": 12.48, "grad_norm": 0.7734375, "learning_rate": 0.0003151716795312552, "loss": 0.2047, "step": 301400 }, { "epoch": 12.48, "grad_norm": 0.80859375, "learning_rate": 0.0003151612092645558, "loss": 0.1743, "step": 301410 }, { "epoch": 12.48, "grad_norm": 0.5546875, "learning_rate": 0.0003151507388752301, "loss": 0.1788, "step": 301420 }, { "epoch": 12.49, "grad_norm": 2.015625, "learning_rate": 0.0003151402683632978, "loss": 0.14, "step": 301430 }, { "epoch": 12.49, "grad_norm": 0.73046875, "learning_rate": 0.00031512979772877847, "loss": 0.2188, "step": 301440 }, { "epoch": 12.49, "grad_norm": 1.0703125, "learning_rate": 0.0003151193269716919, "loss": 0.2508, "step": 301450 }, { "epoch": 12.49, "grad_norm": 0.9375, "learning_rate": 0.00031510885609205785, "loss": 0.1535, "step": 301460 }, { "epoch": 12.49, "grad_norm": 0.76953125, "learning_rate": 0.0003150983850898959, "loss": 0.1492, "step": 301470 }, { "epoch": 12.49, "grad_norm": 0.9375, "learning_rate": 0.0003150879139652258, "loss": 0.1488, "step": 301480 }, { "epoch": 12.49, "grad_norm": 0.34765625, "learning_rate": 0.0003150774427180673, "loss": 0.1487, "step": 301490 }, { "epoch": 12.49, "grad_norm": 0.80078125, "learning_rate": 0.00031506697134844007, "loss": 0.1668, "step": 301500 }, { "epoch": 12.49, "grad_norm": 0.58984375, "learning_rate": 0.0003150564998563639, "loss": 0.2035, "step": 301510 }, { "epoch": 12.49, "grad_norm": 0.451171875, "learning_rate": 0.0003150460282418584, "loss": 0.2401, "step": 301520 }, { "epoch": 12.49, "grad_norm": 1.21875, "learning_rate": 0.0003150355565049432, "loss": 0.2139, "step": 301530 }, { "epoch": 12.49, "grad_norm": 0.9609375, "learning_rate": 0.00031502508464563824, "loss": 0.1986, "step": 301540 }, { "epoch": 12.49, "grad_norm": 0.640625, "learning_rate": 0.00031501461266396304, "loss": 0.2145, "step": 301550 }, { "epoch": 12.49, "grad_norm": 2.546875, "learning_rate": 0.0003150041405599373, "loss": 0.1954, "step": 301560 }, { "epoch": 12.49, "grad_norm": 0.53125, "learning_rate": 0.0003149936683335808, "loss": 0.1693, "step": 301570 }, { "epoch": 12.49, "grad_norm": 0.244140625, "learning_rate": 0.0003149831959849133, "loss": 0.1916, "step": 301580 }, { "epoch": 12.49, "grad_norm": 0.796875, "learning_rate": 0.00031497272351395443, "loss": 0.1631, "step": 301590 }, { "epoch": 12.49, "grad_norm": 0.96875, "learning_rate": 0.0003149622509207239, "loss": 0.2242, "step": 301600 }, { "epoch": 12.49, "grad_norm": 0.8515625, "learning_rate": 0.0003149517782052414, "loss": 0.1611, "step": 301610 }, { "epoch": 12.49, "grad_norm": 1.0703125, "learning_rate": 0.0003149413053675267, "loss": 0.1545, "step": 301620 }, { "epoch": 12.49, "grad_norm": 1.21875, "learning_rate": 0.0003149308324075995, "loss": 0.2381, "step": 301630 }, { "epoch": 12.49, "grad_norm": 0.58984375, "learning_rate": 0.0003149203593254795, "loss": 0.1974, "step": 301640 }, { "epoch": 12.49, "grad_norm": 1.7109375, "learning_rate": 0.00031490988612118624, "loss": 0.2415, "step": 301650 }, { "epoch": 12.49, "grad_norm": 1.0390625, "learning_rate": 0.00031489941279473974, "loss": 0.1711, "step": 301660 }, { "epoch": 12.5, "grad_norm": 0.494140625, "learning_rate": 0.00031488893934615954, "loss": 0.2099, "step": 301670 }, { "epoch": 12.5, "grad_norm": 0.8203125, "learning_rate": 0.00031487846577546533, "loss": 0.2105, "step": 301680 }, { "epoch": 12.5, "grad_norm": 0.609375, "learning_rate": 0.0003148679920826768, "loss": 0.1658, "step": 301690 }, { "epoch": 12.5, "grad_norm": 1.625, "learning_rate": 0.00031485751826781374, "loss": 0.221, "step": 301700 }, { "epoch": 12.5, "grad_norm": 0.62890625, "learning_rate": 0.00031484704433089593, "loss": 0.1972, "step": 301710 }, { "epoch": 12.5, "grad_norm": 0.365234375, "learning_rate": 0.00031483657027194293, "loss": 0.2004, "step": 301720 }, { "epoch": 12.5, "grad_norm": 1.21875, "learning_rate": 0.00031482609609097446, "loss": 0.2032, "step": 301730 }, { "epoch": 12.5, "grad_norm": 0.439453125, "learning_rate": 0.00031481562178801027, "loss": 0.1564, "step": 301740 }, { "epoch": 12.5, "grad_norm": 0.67578125, "learning_rate": 0.0003148051473630701, "loss": 0.2511, "step": 301750 }, { "epoch": 12.5, "grad_norm": 0.87109375, "learning_rate": 0.0003147946728161737, "loss": 0.1831, "step": 301760 }, { "epoch": 12.5, "grad_norm": 1.0078125, "learning_rate": 0.00031478419814734074, "loss": 0.2038, "step": 301770 }, { "epoch": 12.5, "grad_norm": 0.5234375, "learning_rate": 0.00031477372335659084, "loss": 0.1819, "step": 301780 }, { "epoch": 12.5, "grad_norm": 0.99609375, "learning_rate": 0.00031476324844394377, "loss": 0.1491, "step": 301790 }, { "epoch": 12.5, "grad_norm": 1.0703125, "learning_rate": 0.0003147527734094193, "loss": 0.1807, "step": 301800 }, { "epoch": 12.5, "grad_norm": 2.109375, "learning_rate": 0.00031474229825303714, "loss": 0.2043, "step": 301810 }, { "epoch": 12.5, "grad_norm": 0.74609375, "learning_rate": 0.0003147318229748169, "loss": 0.1776, "step": 301820 }, { "epoch": 12.5, "grad_norm": 1.1328125, "learning_rate": 0.0003147213475747783, "loss": 0.2214, "step": 301830 }, { "epoch": 12.5, "grad_norm": 0.91015625, "learning_rate": 0.0003147108720529413, "loss": 0.2006, "step": 301840 }, { "epoch": 12.5, "grad_norm": 0.34375, "learning_rate": 0.0003147003964093253, "loss": 0.176, "step": 301850 }, { "epoch": 12.5, "grad_norm": 0.625, "learning_rate": 0.0003146899206439502, "loss": 0.1798, "step": 301860 }, { "epoch": 12.5, "grad_norm": 0.8828125, "learning_rate": 0.00031467944475683564, "loss": 0.1569, "step": 301870 }, { "epoch": 12.5, "grad_norm": 0.79296875, "learning_rate": 0.00031466896874800135, "loss": 0.1449, "step": 301880 }, { "epoch": 12.5, "grad_norm": 0.6484375, "learning_rate": 0.00031465849261746694, "loss": 0.2113, "step": 301890 }, { "epoch": 12.5, "grad_norm": 0.765625, "learning_rate": 0.0003146480163652523, "loss": 0.2075, "step": 301900 }, { "epoch": 12.51, "grad_norm": 0.73046875, "learning_rate": 0.0003146375399913771, "loss": 0.2197, "step": 301910 }, { "epoch": 12.51, "grad_norm": 2.046875, "learning_rate": 0.00031462706349586107, "loss": 0.2235, "step": 301920 }, { "epoch": 12.51, "grad_norm": 0.66015625, "learning_rate": 0.0003146165868787238, "loss": 0.1911, "step": 301930 }, { "epoch": 12.51, "grad_norm": 0.88671875, "learning_rate": 0.0003146061101399851, "loss": 0.1636, "step": 301940 }, { "epoch": 12.51, "grad_norm": 2.171875, "learning_rate": 0.0003145956332796648, "loss": 0.1611, "step": 301950 }, { "epoch": 12.51, "grad_norm": 0.71484375, "learning_rate": 0.00031458515629778226, "loss": 0.2309, "step": 301960 }, { "epoch": 12.51, "grad_norm": 0.73828125, "learning_rate": 0.0003145746791943577, "loss": 0.1522, "step": 301970 }, { "epoch": 12.51, "grad_norm": 1.171875, "learning_rate": 0.00031456420196941036, "loss": 0.1757, "step": 301980 }, { "epoch": 12.51, "grad_norm": 0.95703125, "learning_rate": 0.0003145537246229603, "loss": 0.1752, "step": 301990 }, { "epoch": 12.51, "grad_norm": 0.380859375, "learning_rate": 0.00031454324715502705, "loss": 0.1788, "step": 302000 }, { "epoch": 12.51, "grad_norm": 0.7578125, "learning_rate": 0.00031453276956563037, "loss": 0.2121, "step": 302010 }, { "epoch": 12.51, "grad_norm": 0.5625, "learning_rate": 0.00031452229185479, "loss": 0.1438, "step": 302020 }, { "epoch": 12.51, "grad_norm": 0.77734375, "learning_rate": 0.0003145118140225256, "loss": 0.187, "step": 302030 }, { "epoch": 12.51, "grad_norm": 0.376953125, "learning_rate": 0.0003145013360688569, "loss": 0.2208, "step": 302040 }, { "epoch": 12.51, "grad_norm": 1.1484375, "learning_rate": 0.0003144908579938038, "loss": 0.207, "step": 302050 }, { "epoch": 12.51, "grad_norm": 0.326171875, "learning_rate": 0.00031448037979738577, "loss": 0.1432, "step": 302060 }, { "epoch": 12.51, "grad_norm": 0.6640625, "learning_rate": 0.0003144699014796226, "loss": 0.2179, "step": 302070 }, { "epoch": 12.51, "grad_norm": 0.3359375, "learning_rate": 0.00031445942304053414, "loss": 0.1905, "step": 302080 }, { "epoch": 12.51, "grad_norm": 0.57421875, "learning_rate": 0.0003144489444801399, "loss": 0.1702, "step": 302090 }, { "epoch": 12.51, "grad_norm": 1.84375, "learning_rate": 0.0003144384657984598, "loss": 0.1827, "step": 302100 }, { "epoch": 12.51, "grad_norm": 1.3046875, "learning_rate": 0.00031442798699551347, "loss": 0.2232, "step": 302110 }, { "epoch": 12.51, "grad_norm": 1.09375, "learning_rate": 0.00031441750807132047, "loss": 0.2306, "step": 302120 }, { "epoch": 12.51, "grad_norm": 0.4375, "learning_rate": 0.0003144070290259008, "loss": 0.1931, "step": 302130 }, { "epoch": 12.51, "grad_norm": 0.5703125, "learning_rate": 0.0003143965498592741, "loss": 0.2204, "step": 302140 }, { "epoch": 12.52, "grad_norm": 1.4921875, "learning_rate": 0.00031438607057145997, "loss": 0.1727, "step": 302150 }, { "epoch": 12.52, "grad_norm": 0.91015625, "learning_rate": 0.0003143755911624782, "loss": 0.1861, "step": 302160 }, { "epoch": 12.52, "grad_norm": 1.71875, "learning_rate": 0.0003143651116323485, "loss": 0.189, "step": 302170 }, { "epoch": 12.52, "grad_norm": 0.42578125, "learning_rate": 0.0003143546319810907, "loss": 0.1959, "step": 302180 }, { "epoch": 12.52, "grad_norm": 0.953125, "learning_rate": 0.0003143441522087244, "loss": 0.1809, "step": 302190 }, { "epoch": 12.52, "grad_norm": 1.546875, "learning_rate": 0.00031433367231526935, "loss": 0.1823, "step": 302200 }, { "epoch": 12.52, "grad_norm": 0.859375, "learning_rate": 0.00031432319230074524, "loss": 0.1766, "step": 302210 }, { "epoch": 12.52, "grad_norm": 1.1640625, "learning_rate": 0.00031431271216517185, "loss": 0.1701, "step": 302220 }, { "epoch": 12.52, "grad_norm": 0.85546875, "learning_rate": 0.0003143022319085689, "loss": 0.1567, "step": 302230 }, { "epoch": 12.52, "grad_norm": 1.28125, "learning_rate": 0.00031429175153095614, "loss": 0.1901, "step": 302240 }, { "epoch": 12.52, "grad_norm": 0.494140625, "learning_rate": 0.0003142812710323531, "loss": 0.1801, "step": 302250 }, { "epoch": 12.52, "grad_norm": 0.58203125, "learning_rate": 0.0003142707904127798, "loss": 0.1731, "step": 302260 }, { "epoch": 12.52, "grad_norm": 0.796875, "learning_rate": 0.0003142603096722558, "loss": 0.1535, "step": 302270 }, { "epoch": 12.52, "grad_norm": 0.921875, "learning_rate": 0.00031424982881080065, "loss": 0.1777, "step": 302280 }, { "epoch": 12.52, "grad_norm": 1.1953125, "learning_rate": 0.0003142393478284345, "loss": 0.1497, "step": 302290 }, { "epoch": 12.52, "grad_norm": 1.0078125, "learning_rate": 0.0003142288667251767, "loss": 0.2027, "step": 302300 }, { "epoch": 12.52, "grad_norm": 0.5390625, "learning_rate": 0.00031421838550104716, "loss": 0.1888, "step": 302310 }, { "epoch": 12.52, "grad_norm": 0.275390625, "learning_rate": 0.00031420790415606555, "loss": 0.2273, "step": 302320 }, { "epoch": 12.52, "grad_norm": 0.306640625, "learning_rate": 0.0003141974226902515, "loss": 0.2192, "step": 302330 }, { "epoch": 12.52, "grad_norm": 0.59375, "learning_rate": 0.000314186941103625, "loss": 0.2215, "step": 302340 }, { "epoch": 12.52, "grad_norm": 0.89453125, "learning_rate": 0.00031417645939620553, "loss": 0.1933, "step": 302350 }, { "epoch": 12.52, "grad_norm": 1.2734375, "learning_rate": 0.00031416597756801285, "loss": 0.2272, "step": 302360 }, { "epoch": 12.52, "grad_norm": 0.609375, "learning_rate": 0.00031415549561906683, "loss": 0.213, "step": 302370 }, { "epoch": 12.52, "grad_norm": 2.296875, "learning_rate": 0.000314145013549387, "loss": 0.2242, "step": 302380 }, { "epoch": 12.52, "grad_norm": 0.294921875, "learning_rate": 0.0003141345313589932, "loss": 0.2062, "step": 302390 }, { "epoch": 12.53, "grad_norm": 1.109375, "learning_rate": 0.00031412404904790516, "loss": 0.156, "step": 302400 }, { "epoch": 12.53, "grad_norm": 0.5390625, "learning_rate": 0.00031411356661614255, "loss": 0.1912, "step": 302410 }, { "epoch": 12.53, "grad_norm": 1.0703125, "learning_rate": 0.00031410308406372515, "loss": 0.1807, "step": 302420 }, { "epoch": 12.53, "grad_norm": 0.578125, "learning_rate": 0.00031409260139067275, "loss": 0.1949, "step": 302430 }, { "epoch": 12.53, "grad_norm": 0.90625, "learning_rate": 0.00031408211859700486, "loss": 0.208, "step": 302440 }, { "epoch": 12.53, "grad_norm": 1.1640625, "learning_rate": 0.00031407163568274144, "loss": 0.1603, "step": 302450 }, { "epoch": 12.53, "grad_norm": 1.15625, "learning_rate": 0.0003140611526479021, "loss": 0.1848, "step": 302460 }, { "epoch": 12.53, "grad_norm": 1.2109375, "learning_rate": 0.00031405066949250653, "loss": 0.21, "step": 302470 }, { "epoch": 12.53, "grad_norm": 0.578125, "learning_rate": 0.00031404018621657457, "loss": 0.2495, "step": 302480 }, { "epoch": 12.53, "grad_norm": 1.0625, "learning_rate": 0.0003140297028201259, "loss": 0.2238, "step": 302490 }, { "epoch": 12.53, "grad_norm": 0.67578125, "learning_rate": 0.00031401921930318023, "loss": 0.2111, "step": 302500 }, { "epoch": 12.53, "grad_norm": 1.015625, "learning_rate": 0.00031400873566575727, "loss": 0.2041, "step": 302510 }, { "epoch": 12.53, "grad_norm": 0.63671875, "learning_rate": 0.00031399825190787684, "loss": 0.1422, "step": 302520 }, { "epoch": 12.53, "grad_norm": 0.8828125, "learning_rate": 0.0003139877680295586, "loss": 0.214, "step": 302530 }, { "epoch": 12.53, "grad_norm": 1.390625, "learning_rate": 0.00031397728403082225, "loss": 0.2034, "step": 302540 }, { "epoch": 12.53, "grad_norm": 0.1181640625, "learning_rate": 0.00031396679991168766, "loss": 0.1921, "step": 302550 }, { "epoch": 12.53, "grad_norm": 0.6484375, "learning_rate": 0.0003139563156721744, "loss": 0.19, "step": 302560 }, { "epoch": 12.53, "grad_norm": 1.171875, "learning_rate": 0.00031394583131230214, "loss": 0.1734, "step": 302570 }, { "epoch": 12.53, "grad_norm": 1.171875, "learning_rate": 0.00031393534683209093, "loss": 0.197, "step": 302580 }, { "epoch": 12.53, "grad_norm": 0.53125, "learning_rate": 0.00031392486223156017, "loss": 0.171, "step": 302590 }, { "epoch": 12.53, "grad_norm": 1.6875, "learning_rate": 0.00031391437751072975, "loss": 0.206, "step": 302600 }, { "epoch": 12.53, "grad_norm": 0.7265625, "learning_rate": 0.00031390389266961946, "loss": 0.1932, "step": 302610 }, { "epoch": 12.53, "grad_norm": 0.609375, "learning_rate": 0.0003138934077082488, "loss": 0.2052, "step": 302620 }, { "epoch": 12.53, "grad_norm": 0.63671875, "learning_rate": 0.00031388292262663777, "loss": 0.1545, "step": 302630 }, { "epoch": 12.54, "grad_norm": 0.98046875, "learning_rate": 0.00031387243742480594, "loss": 0.1631, "step": 302640 }, { "epoch": 12.54, "grad_norm": 0.84375, "learning_rate": 0.000313861952102773, "loss": 0.1557, "step": 302650 }, { "epoch": 12.54, "grad_norm": 0.87890625, "learning_rate": 0.00031385146666055886, "loss": 0.1939, "step": 302660 }, { "epoch": 12.54, "grad_norm": 0.81640625, "learning_rate": 0.0003138409810981831, "loss": 0.1945, "step": 302670 }, { "epoch": 12.54, "grad_norm": 1.0546875, "learning_rate": 0.00031383049541566555, "loss": 0.1863, "step": 302680 }, { "epoch": 12.54, "grad_norm": 0.8203125, "learning_rate": 0.00031382000961302595, "loss": 0.1771, "step": 302690 }, { "epoch": 12.54, "grad_norm": 0.5703125, "learning_rate": 0.00031380952369028386, "loss": 0.2194, "step": 302700 }, { "epoch": 12.54, "grad_norm": 0.6796875, "learning_rate": 0.00031379903764745923, "loss": 0.2265, "step": 302710 }, { "epoch": 12.54, "grad_norm": 1.2109375, "learning_rate": 0.0003137885514845716, "loss": 0.1695, "step": 302720 }, { "epoch": 12.54, "grad_norm": 1.03125, "learning_rate": 0.00031377806520164087, "loss": 0.2434, "step": 302730 }, { "epoch": 12.54, "grad_norm": 0.7109375, "learning_rate": 0.0003137675787986868, "loss": 0.2113, "step": 302740 }, { "epoch": 12.54, "grad_norm": 0.66015625, "learning_rate": 0.0003137570922757289, "loss": 0.1817, "step": 302750 }, { "epoch": 12.54, "grad_norm": 0.302734375, "learning_rate": 0.0003137466056327871, "loss": 0.1784, "step": 302760 }, { "epoch": 12.54, "grad_norm": 0.94921875, "learning_rate": 0.00031373611886988107, "loss": 0.1949, "step": 302770 }, { "epoch": 12.54, "grad_norm": 0.91796875, "learning_rate": 0.0003137256319870305, "loss": 0.2172, "step": 302780 }, { "epoch": 12.54, "grad_norm": 0.69140625, "learning_rate": 0.0003137151449842552, "loss": 0.1505, "step": 302790 }, { "epoch": 12.54, "grad_norm": 0.9453125, "learning_rate": 0.0003137046578615749, "loss": 0.2033, "step": 302800 }, { "epoch": 12.54, "grad_norm": 0.859375, "learning_rate": 0.00031369417061900926, "loss": 0.1555, "step": 302810 }, { "epoch": 12.54, "grad_norm": 0.85546875, "learning_rate": 0.00031368368325657815, "loss": 0.2175, "step": 302820 }, { "epoch": 12.54, "grad_norm": 0.75390625, "learning_rate": 0.00031367319577430117, "loss": 0.197, "step": 302830 }, { "epoch": 12.54, "grad_norm": 0.318359375, "learning_rate": 0.0003136627081721981, "loss": 0.1431, "step": 302840 }, { "epoch": 12.54, "grad_norm": 0.4765625, "learning_rate": 0.00031365222045028876, "loss": 0.1976, "step": 302850 }, { "epoch": 12.54, "grad_norm": 1.1484375, "learning_rate": 0.00031364173260859274, "loss": 0.1979, "step": 302860 }, { "epoch": 12.54, "grad_norm": 1.1328125, "learning_rate": 0.00031363124464712984, "loss": 0.1453, "step": 302870 }, { "epoch": 12.55, "grad_norm": 1.546875, "learning_rate": 0.0003136207565659198, "loss": 0.1956, "step": 302880 }, { "epoch": 12.55, "grad_norm": 1.1171875, "learning_rate": 0.0003136102683649824, "loss": 0.1945, "step": 302890 }, { "epoch": 12.55, "grad_norm": 0.71484375, "learning_rate": 0.0003135997800443374, "loss": 0.1604, "step": 302900 }, { "epoch": 12.55, "grad_norm": 0.8984375, "learning_rate": 0.0003135892916040045, "loss": 0.2, "step": 302910 }, { "epoch": 12.55, "grad_norm": 0.322265625, "learning_rate": 0.0003135788030440033, "loss": 0.1455, "step": 302920 }, { "epoch": 12.55, "grad_norm": 0.53125, "learning_rate": 0.00031356831436435374, "loss": 0.222, "step": 302930 }, { "epoch": 12.55, "grad_norm": 0.953125, "learning_rate": 0.00031355782556507546, "loss": 0.2932, "step": 302940 }, { "epoch": 12.55, "grad_norm": 0.55859375, "learning_rate": 0.00031354733664618815, "loss": 0.1819, "step": 302950 }, { "epoch": 12.55, "grad_norm": 0.55859375, "learning_rate": 0.0003135368476077117, "loss": 0.1618, "step": 302960 }, { "epoch": 12.55, "grad_norm": 1.0703125, "learning_rate": 0.0003135263584496657, "loss": 0.179, "step": 302970 }, { "epoch": 12.55, "grad_norm": 0.439453125, "learning_rate": 0.0003135158691720701, "loss": 0.1946, "step": 302980 }, { "epoch": 12.55, "grad_norm": 0.8046875, "learning_rate": 0.0003135053797749443, "loss": 0.1863, "step": 302990 }, { "epoch": 12.55, "grad_norm": 0.40234375, "learning_rate": 0.00031349489025830836, "loss": 0.1685, "step": 303000 }, { "epoch": 12.55, "grad_norm": 0.5234375, "learning_rate": 0.00031348440062218185, "loss": 0.1707, "step": 303010 }, { "epoch": 12.55, "grad_norm": 1.328125, "learning_rate": 0.0003134739108665846, "loss": 0.2094, "step": 303020 }, { "epoch": 12.55, "grad_norm": 0.6640625, "learning_rate": 0.0003134634209915362, "loss": 0.2222, "step": 303030 }, { "epoch": 12.55, "grad_norm": 0.78515625, "learning_rate": 0.00031345293099705655, "loss": 0.1907, "step": 303040 }, { "epoch": 12.55, "grad_norm": 0.79296875, "learning_rate": 0.0003134424408831653, "loss": 0.1751, "step": 303050 }, { "epoch": 12.55, "grad_norm": 0.65625, "learning_rate": 0.00031343195064988236, "loss": 0.184, "step": 303060 }, { "epoch": 12.55, "grad_norm": 0.72265625, "learning_rate": 0.00031342146029722723, "loss": 0.195, "step": 303070 }, { "epoch": 12.55, "grad_norm": 0.59375, "learning_rate": 0.0003134109698252198, "loss": 0.2474, "step": 303080 }, { "epoch": 12.55, "grad_norm": 1.0546875, "learning_rate": 0.0003134004792338797, "loss": 0.2638, "step": 303090 }, { "epoch": 12.55, "grad_norm": 0.625, "learning_rate": 0.0003133899885232268, "loss": 0.1659, "step": 303100 }, { "epoch": 12.55, "grad_norm": 1.25, "learning_rate": 0.00031337949769328076, "loss": 0.2265, "step": 303110 }, { "epoch": 12.56, "grad_norm": 0.78125, "learning_rate": 0.00031336900674406145, "loss": 0.2115, "step": 303120 }, { "epoch": 12.56, "grad_norm": 0.9140625, "learning_rate": 0.00031335851567558835, "loss": 0.1456, "step": 303130 }, { "epoch": 12.56, "grad_norm": 1.859375, "learning_rate": 0.00031334802448788147, "loss": 0.1772, "step": 303140 }, { "epoch": 12.56, "grad_norm": 0.0, "learning_rate": 0.0003133375331809605, "loss": 0.1569, "step": 303150 }, { "epoch": 12.56, "grad_norm": 0.59375, "learning_rate": 0.00031332704175484504, "loss": 0.1618, "step": 303160 }, { "epoch": 12.56, "grad_norm": 0.55859375, "learning_rate": 0.0003133165502095549, "loss": 0.1868, "step": 303170 }, { "epoch": 12.56, "grad_norm": 1.046875, "learning_rate": 0.0003133060585451099, "loss": 0.2158, "step": 303180 }, { "epoch": 12.56, "grad_norm": 0.91796875, "learning_rate": 0.00031329556676152974, "loss": 0.1926, "step": 303190 }, { "epoch": 12.56, "grad_norm": 0.7734375, "learning_rate": 0.0003132850748588342, "loss": 0.1978, "step": 303200 }, { "epoch": 12.56, "grad_norm": 0.1484375, "learning_rate": 0.0003132745828370429, "loss": 0.1836, "step": 303210 }, { "epoch": 12.56, "grad_norm": 0.427734375, "learning_rate": 0.00031326409069617567, "loss": 0.2192, "step": 303220 }, { "epoch": 12.56, "grad_norm": 0.28515625, "learning_rate": 0.00031325359843625237, "loss": 0.1964, "step": 303230 }, { "epoch": 12.56, "grad_norm": 0.9609375, "learning_rate": 0.0003132431060572925, "loss": 0.1567, "step": 303240 }, { "epoch": 12.56, "grad_norm": 0.91015625, "learning_rate": 0.00031323261355931597, "loss": 0.2082, "step": 303250 }, { "epoch": 12.56, "grad_norm": 1.2890625, "learning_rate": 0.0003132221209423425, "loss": 0.2509, "step": 303260 }, { "epoch": 12.56, "grad_norm": 1.234375, "learning_rate": 0.00031321162820639183, "loss": 0.1987, "step": 303270 }, { "epoch": 12.56, "grad_norm": 0.87890625, "learning_rate": 0.0003132011353514837, "loss": 0.1875, "step": 303280 }, { "epoch": 12.56, "grad_norm": 0.73046875, "learning_rate": 0.0003131906423776378, "loss": 0.1705, "step": 303290 }, { "epoch": 12.56, "grad_norm": 1.2109375, "learning_rate": 0.00031318014928487405, "loss": 0.1711, "step": 303300 }, { "epoch": 12.56, "grad_norm": 0.5703125, "learning_rate": 0.000313169656073212, "loss": 0.1868, "step": 303310 }, { "epoch": 12.56, "grad_norm": 1.078125, "learning_rate": 0.0003131591627426715, "loss": 0.2352, "step": 303320 }, { "epoch": 12.56, "grad_norm": 1.6015625, "learning_rate": 0.0003131486692932723, "loss": 0.21, "step": 303330 }, { "epoch": 12.56, "grad_norm": 0.58203125, "learning_rate": 0.0003131381757250341, "loss": 0.181, "step": 303340 }, { "epoch": 12.56, "grad_norm": 0.3828125, "learning_rate": 0.00031312768203797667, "loss": 0.1595, "step": 303350 }, { "epoch": 12.57, "grad_norm": 0.55859375, "learning_rate": 0.00031311718823211974, "loss": 0.2029, "step": 303360 }, { "epoch": 12.57, "grad_norm": 1.0078125, "learning_rate": 0.0003131066943074831, "loss": 0.196, "step": 303370 }, { "epoch": 12.57, "grad_norm": 0.83203125, "learning_rate": 0.00031309620026408647, "loss": 0.1928, "step": 303380 }, { "epoch": 12.57, "grad_norm": 1.2109375, "learning_rate": 0.00031308570610194964, "loss": 0.1796, "step": 303390 }, { "epoch": 12.57, "grad_norm": 0.2060546875, "learning_rate": 0.00031307521182109236, "loss": 0.1683, "step": 303400 }, { "epoch": 12.57, "grad_norm": 0.462890625, "learning_rate": 0.0003130647174215342, "loss": 0.2216, "step": 303410 }, { "epoch": 12.57, "grad_norm": 1.1484375, "learning_rate": 0.0003130542229032951, "loss": 0.1907, "step": 303420 }, { "epoch": 12.57, "grad_norm": 0.96484375, "learning_rate": 0.0003130437282663948, "loss": 0.1598, "step": 303430 }, { "epoch": 12.57, "grad_norm": 0.56640625, "learning_rate": 0.00031303323351085307, "loss": 0.2172, "step": 303440 }, { "epoch": 12.57, "grad_norm": 0.0, "learning_rate": 0.0003130227386366895, "loss": 0.1702, "step": 303450 }, { "epoch": 12.57, "grad_norm": 1.4375, "learning_rate": 0.000313012243643924, "loss": 0.2025, "step": 303460 }, { "epoch": 12.57, "grad_norm": 0.46484375, "learning_rate": 0.00031300174853257623, "loss": 0.2157, "step": 303470 }, { "epoch": 12.57, "grad_norm": 0.671875, "learning_rate": 0.00031299125330266594, "loss": 0.1968, "step": 303480 }, { "epoch": 12.57, "grad_norm": 0.298828125, "learning_rate": 0.000312980757954213, "loss": 0.1635, "step": 303490 }, { "epoch": 12.57, "grad_norm": 0.6015625, "learning_rate": 0.00031297026248723706, "loss": 0.1599, "step": 303500 }, { "epoch": 12.57, "grad_norm": 1.1015625, "learning_rate": 0.0003129597669017578, "loss": 0.1323, "step": 303510 }, { "epoch": 12.57, "grad_norm": 0.88671875, "learning_rate": 0.00031294927119779515, "loss": 0.1467, "step": 303520 }, { "epoch": 12.57, "grad_norm": 0.79296875, "learning_rate": 0.00031293877537536875, "loss": 0.2389, "step": 303530 }, { "epoch": 12.57, "grad_norm": 0.875, "learning_rate": 0.0003129282794344983, "loss": 0.2208, "step": 303540 }, { "epoch": 12.57, "grad_norm": 2.234375, "learning_rate": 0.0003129177833752037, "loss": 0.1576, "step": 303550 }, { "epoch": 12.57, "grad_norm": 1.2421875, "learning_rate": 0.00031290728719750457, "loss": 0.191, "step": 303560 }, { "epoch": 12.57, "grad_norm": 0.78515625, "learning_rate": 0.0003128967909014208, "loss": 0.182, "step": 303570 }, { "epoch": 12.57, "grad_norm": 0.1806640625, "learning_rate": 0.000312886294486972, "loss": 0.2243, "step": 303580 }, { "epoch": 12.57, "grad_norm": 0.7421875, "learning_rate": 0.000312875797954178, "loss": 0.217, "step": 303590 }, { "epoch": 12.58, "grad_norm": 0.67578125, "learning_rate": 0.0003128653013030586, "loss": 0.2064, "step": 303600 }, { "epoch": 12.58, "grad_norm": 0.87890625, "learning_rate": 0.00031285480453363334, "loss": 0.2106, "step": 303610 }, { "epoch": 12.58, "grad_norm": 1.0859375, "learning_rate": 0.00031284430764592225, "loss": 0.1808, "step": 303620 }, { "epoch": 12.58, "grad_norm": 0.85546875, "learning_rate": 0.00031283381063994497, "loss": 0.1717, "step": 303630 }, { "epoch": 12.58, "grad_norm": 0.494140625, "learning_rate": 0.00031282331351572114, "loss": 0.1835, "step": 303640 }, { "epoch": 12.58, "grad_norm": 1.3359375, "learning_rate": 0.00031281281627327075, "loss": 0.1734, "step": 303650 }, { "epoch": 12.58, "grad_norm": 0.58203125, "learning_rate": 0.00031280231891261327, "loss": 0.1495, "step": 303660 }, { "epoch": 12.58, "grad_norm": 1.296875, "learning_rate": 0.0003127918214337687, "loss": 0.194, "step": 303670 }, { "epoch": 12.58, "grad_norm": 1.03125, "learning_rate": 0.0003127813238367567, "loss": 0.1694, "step": 303680 }, { "epoch": 12.58, "grad_norm": 1.375, "learning_rate": 0.000312770826121597, "loss": 0.2071, "step": 303690 }, { "epoch": 12.58, "grad_norm": 0.94140625, "learning_rate": 0.00031276032828830944, "loss": 0.1964, "step": 303700 }, { "epoch": 12.58, "grad_norm": 0.3359375, "learning_rate": 0.0003127498303369137, "loss": 0.2198, "step": 303710 }, { "epoch": 12.58, "grad_norm": 0.4453125, "learning_rate": 0.00031273933226742953, "loss": 0.2116, "step": 303720 }, { "epoch": 12.58, "grad_norm": 1.9375, "learning_rate": 0.00031272883407987674, "loss": 0.2329, "step": 303730 }, { "epoch": 12.58, "grad_norm": 0.8046875, "learning_rate": 0.000312718335774275, "loss": 0.1757, "step": 303740 }, { "epoch": 12.58, "grad_norm": 0.640625, "learning_rate": 0.0003127078373506442, "loss": 0.1881, "step": 303750 }, { "epoch": 12.58, "grad_norm": 0.69140625, "learning_rate": 0.000312697338809004, "loss": 0.1499, "step": 303760 }, { "epoch": 12.58, "grad_norm": 0.609375, "learning_rate": 0.0003126868401493741, "loss": 0.1679, "step": 303770 }, { "epoch": 12.58, "grad_norm": 0.5625, "learning_rate": 0.0003126763413717744, "loss": 0.1611, "step": 303780 }, { "epoch": 12.58, "grad_norm": 0.53125, "learning_rate": 0.00031266584247622464, "loss": 0.2068, "step": 303790 }, { "epoch": 12.58, "grad_norm": 0.26171875, "learning_rate": 0.00031265534346274447, "loss": 0.1312, "step": 303800 }, { "epoch": 12.58, "grad_norm": 0.451171875, "learning_rate": 0.00031264484433135375, "loss": 0.1937, "step": 303810 }, { "epoch": 12.58, "grad_norm": 0.66015625, "learning_rate": 0.0003126343450820722, "loss": 0.1791, "step": 303820 }, { "epoch": 12.58, "grad_norm": 0.6640625, "learning_rate": 0.00031262384571491953, "loss": 0.1858, "step": 303830 }, { "epoch": 12.59, "grad_norm": 0.78515625, "learning_rate": 0.0003126133462299156, "loss": 0.2474, "step": 303840 }, { "epoch": 12.59, "grad_norm": 1.5390625, "learning_rate": 0.00031260284662708, "loss": 0.2019, "step": 303850 }, { "epoch": 12.59, "grad_norm": 0.55859375, "learning_rate": 0.0003125923469064327, "loss": 0.1868, "step": 303860 }, { "epoch": 12.59, "grad_norm": 0.69140625, "learning_rate": 0.0003125818470679934, "loss": 0.1628, "step": 303870 }, { "epoch": 12.59, "grad_norm": 0.87890625, "learning_rate": 0.0003125713471117817, "loss": 0.1817, "step": 303880 }, { "epoch": 12.59, "grad_norm": 0.7578125, "learning_rate": 0.00031256084703781763, "loss": 0.2076, "step": 303890 }, { "epoch": 12.59, "grad_norm": 0.67578125, "learning_rate": 0.0003125503468461207, "loss": 0.2234, "step": 303900 }, { "epoch": 12.59, "grad_norm": 0.63671875, "learning_rate": 0.00031253984653671076, "loss": 0.1954, "step": 303910 }, { "epoch": 12.59, "grad_norm": 0.70703125, "learning_rate": 0.0003125293461096077, "loss": 0.2152, "step": 303920 }, { "epoch": 12.59, "grad_norm": 0.6875, "learning_rate": 0.000312518845564831, "loss": 0.2066, "step": 303930 }, { "epoch": 12.59, "grad_norm": 0.62890625, "learning_rate": 0.0003125083449024007, "loss": 0.1819, "step": 303940 }, { "epoch": 12.59, "grad_norm": 0.43359375, "learning_rate": 0.00031249784412233644, "loss": 0.2291, "step": 303950 }, { "epoch": 12.59, "grad_norm": 0.73046875, "learning_rate": 0.00031248734322465786, "loss": 0.19, "step": 303960 }, { "epoch": 12.59, "grad_norm": 1.4921875, "learning_rate": 0.00031247684220938503, "loss": 0.17, "step": 303970 }, { "epoch": 12.59, "grad_norm": 0.6015625, "learning_rate": 0.0003124663410765374, "loss": 0.2076, "step": 303980 }, { "epoch": 12.59, "grad_norm": 0.98828125, "learning_rate": 0.0003124558398261349, "loss": 0.1903, "step": 303990 }, { "epoch": 12.59, "grad_norm": 1.234375, "learning_rate": 0.00031244533845819735, "loss": 0.1919, "step": 304000 }, { "epoch": 12.59, "grad_norm": 0.412109375, "learning_rate": 0.0003124348369727442, "loss": 0.1907, "step": 304010 }, { "epoch": 12.59, "grad_norm": 0.78125, "learning_rate": 0.00031242433536979563, "loss": 0.2565, "step": 304020 }, { "epoch": 12.59, "grad_norm": 0.46875, "learning_rate": 0.00031241383364937115, "loss": 0.1873, "step": 304030 }, { "epoch": 12.59, "grad_norm": 0.67578125, "learning_rate": 0.0003124033318114905, "loss": 0.1902, "step": 304040 }, { "epoch": 12.59, "grad_norm": 0.435546875, "learning_rate": 0.0003123928298561736, "loss": 0.1638, "step": 304050 }, { "epoch": 12.59, "grad_norm": 0.29296875, "learning_rate": 0.00031238232778344014, "loss": 0.1461, "step": 304060 }, { "epoch": 12.59, "grad_norm": 1.140625, "learning_rate": 0.0003123718255933098, "loss": 0.1117, "step": 304070 }, { "epoch": 12.59, "grad_norm": 1.1328125, "learning_rate": 0.0003123613232858025, "loss": 0.2152, "step": 304080 }, { "epoch": 12.6, "grad_norm": 1.4765625, "learning_rate": 0.0003123508208609378, "loss": 0.2117, "step": 304090 }, { "epoch": 12.6, "grad_norm": 0.5625, "learning_rate": 0.0003123403183187357, "loss": 0.226, "step": 304100 }, { "epoch": 12.6, "grad_norm": 0.29296875, "learning_rate": 0.00031232981565921587, "loss": 0.1616, "step": 304110 }, { "epoch": 12.6, "grad_norm": 1.359375, "learning_rate": 0.00031231931288239797, "loss": 0.1864, "step": 304120 }, { "epoch": 12.6, "grad_norm": 0.78125, "learning_rate": 0.000312308809988302, "loss": 0.1241, "step": 304130 }, { "epoch": 12.6, "grad_norm": 0.55078125, "learning_rate": 0.0003122983069769474, "loss": 0.1937, "step": 304140 }, { "epoch": 12.6, "grad_norm": 0.78515625, "learning_rate": 0.0003122878038483542, "loss": 0.1874, "step": 304150 }, { "epoch": 12.6, "grad_norm": 0.94921875, "learning_rate": 0.0003122773006025421, "loss": 0.1646, "step": 304160 }, { "epoch": 12.6, "grad_norm": 1.09375, "learning_rate": 0.0003122667972395307, "loss": 0.2244, "step": 304170 }, { "epoch": 12.6, "grad_norm": 0.2421875, "learning_rate": 0.0003122562937593401, "loss": 0.1823, "step": 304180 }, { "epoch": 12.6, "grad_norm": 1.0859375, "learning_rate": 0.00031224579016198977, "loss": 0.221, "step": 304190 }, { "epoch": 12.6, "grad_norm": 0.9921875, "learning_rate": 0.0003122352864474997, "loss": 0.2079, "step": 304200 }, { "epoch": 12.6, "grad_norm": 0.53515625, "learning_rate": 0.0003122247826158894, "loss": 0.2475, "step": 304210 }, { "epoch": 12.6, "grad_norm": 0.4453125, "learning_rate": 0.0003122142786671789, "loss": 0.1481, "step": 304220 }, { "epoch": 12.6, "grad_norm": 0.8828125, "learning_rate": 0.0003122037746013877, "loss": 0.1942, "step": 304230 }, { "epoch": 12.6, "grad_norm": 1.4140625, "learning_rate": 0.00031219327041853587, "loss": 0.181, "step": 304240 }, { "epoch": 12.6, "grad_norm": 0.515625, "learning_rate": 0.0003121827661186429, "loss": 0.211, "step": 304250 }, { "epoch": 12.6, "grad_norm": 2.015625, "learning_rate": 0.00031217226170172875, "loss": 0.2095, "step": 304260 }, { "epoch": 12.6, "grad_norm": 0.9296875, "learning_rate": 0.0003121617571678131, "loss": 0.1972, "step": 304270 }, { "epoch": 12.6, "grad_norm": 0.65234375, "learning_rate": 0.00031215125251691573, "loss": 0.2176, "step": 304280 }, { "epoch": 12.6, "grad_norm": 0.6328125, "learning_rate": 0.00031214074774905644, "loss": 0.1781, "step": 304290 }, { "epoch": 12.6, "grad_norm": 0.51953125, "learning_rate": 0.00031213024286425495, "loss": 0.1937, "step": 304300 }, { "epoch": 12.6, "grad_norm": 1.3359375, "learning_rate": 0.0003121197378625311, "loss": 0.2011, "step": 304310 }, { "epoch": 12.6, "grad_norm": 0.48828125, "learning_rate": 0.00031210923274390453, "loss": 0.1876, "step": 304320 }, { "epoch": 12.61, "grad_norm": 0.8515625, "learning_rate": 0.00031209872750839507, "loss": 0.1572, "step": 304330 }, { "epoch": 12.61, "grad_norm": 0.66796875, "learning_rate": 0.0003120882221560226, "loss": 0.2018, "step": 304340 }, { "epoch": 12.61, "grad_norm": 2.09375, "learning_rate": 0.0003120777166868068, "loss": 0.1951, "step": 304350 }, { "epoch": 12.61, "grad_norm": 0.423828125, "learning_rate": 0.00031206721110076737, "loss": 0.2323, "step": 304360 }, { "epoch": 12.61, "grad_norm": 0.875, "learning_rate": 0.00031205670539792427, "loss": 0.1824, "step": 304370 }, { "epoch": 12.61, "grad_norm": 1.375, "learning_rate": 0.00031204619957829705, "loss": 0.1899, "step": 304380 }, { "epoch": 12.61, "grad_norm": 0.84375, "learning_rate": 0.00031203569364190553, "loss": 0.1861, "step": 304390 }, { "epoch": 12.61, "grad_norm": 0.447265625, "learning_rate": 0.0003120251875887696, "loss": 0.2039, "step": 304400 }, { "epoch": 12.61, "grad_norm": 0.5078125, "learning_rate": 0.000312014681418909, "loss": 0.225, "step": 304410 }, { "epoch": 12.61, "grad_norm": 0.62109375, "learning_rate": 0.00031200417513234347, "loss": 0.1622, "step": 304420 }, { "epoch": 12.61, "grad_norm": 1.2578125, "learning_rate": 0.00031199366872909276, "loss": 0.202, "step": 304430 }, { "epoch": 12.61, "grad_norm": 0.1611328125, "learning_rate": 0.0003119831622091766, "loss": 0.1623, "step": 304440 }, { "epoch": 12.61, "grad_norm": 1.0078125, "learning_rate": 0.0003119726555726149, "loss": 0.1816, "step": 304450 }, { "epoch": 12.61, "grad_norm": 0.72265625, "learning_rate": 0.00031196214881942737, "loss": 0.2328, "step": 304460 }, { "epoch": 12.61, "grad_norm": 0.51171875, "learning_rate": 0.0003119516419496337, "loss": 0.2102, "step": 304470 }, { "epoch": 12.61, "grad_norm": 0.455078125, "learning_rate": 0.0003119411349632537, "loss": 0.1971, "step": 304480 }, { "epoch": 12.61, "grad_norm": 1.0078125, "learning_rate": 0.00031193062786030723, "loss": 0.2072, "step": 304490 }, { "epoch": 12.61, "grad_norm": 0.93359375, "learning_rate": 0.00031192012064081405, "loss": 0.1665, "step": 304500 }, { "epoch": 12.61, "grad_norm": 1.0546875, "learning_rate": 0.00031190961330479386, "loss": 0.2019, "step": 304510 }, { "epoch": 12.61, "grad_norm": 1.171875, "learning_rate": 0.00031189910585226644, "loss": 0.1804, "step": 304520 }, { "epoch": 12.61, "grad_norm": 1.2578125, "learning_rate": 0.0003118885982832516, "loss": 0.1963, "step": 304530 }, { "epoch": 12.61, "grad_norm": 0.88671875, "learning_rate": 0.00031187809059776906, "loss": 0.1891, "step": 304540 }, { "epoch": 12.61, "grad_norm": 0.07080078125, "learning_rate": 0.00031186758279583873, "loss": 0.1953, "step": 304550 }, { "epoch": 12.61, "grad_norm": 1.1796875, "learning_rate": 0.00031185707487748016, "loss": 0.1956, "step": 304560 }, { "epoch": 12.62, "grad_norm": 0.56640625, "learning_rate": 0.00031184656684271335, "loss": 0.1851, "step": 304570 }, { "epoch": 12.62, "grad_norm": 0.9296875, "learning_rate": 0.000311836058691558, "loss": 0.2137, "step": 304580 }, { "epoch": 12.62, "grad_norm": 0.50390625, "learning_rate": 0.0003118255504240338, "loss": 0.1743, "step": 304590 }, { "epoch": 12.62, "grad_norm": 0.57421875, "learning_rate": 0.0003118150420401606, "loss": 0.1436, "step": 304600 }, { "epoch": 12.62, "grad_norm": 0.55859375, "learning_rate": 0.00031180453353995814, "loss": 0.1743, "step": 304610 }, { "epoch": 12.62, "grad_norm": 1.796875, "learning_rate": 0.00031179402492344634, "loss": 0.2178, "step": 304620 }, { "epoch": 12.62, "grad_norm": 0.53515625, "learning_rate": 0.0003117835161906447, "loss": 0.1912, "step": 304630 }, { "epoch": 12.62, "grad_norm": 0.478515625, "learning_rate": 0.00031177300734157326, "loss": 0.2163, "step": 304640 }, { "epoch": 12.62, "grad_norm": 1.6640625, "learning_rate": 0.00031176249837625166, "loss": 0.1731, "step": 304650 }, { "epoch": 12.62, "grad_norm": 0.44140625, "learning_rate": 0.00031175198929469974, "loss": 0.2167, "step": 304660 }, { "epoch": 12.62, "grad_norm": 0.7265625, "learning_rate": 0.0003117414800969372, "loss": 0.173, "step": 304670 }, { "epoch": 12.62, "grad_norm": 1.234375, "learning_rate": 0.0003117309707829839, "loss": 0.182, "step": 304680 }, { "epoch": 12.62, "grad_norm": 1.0, "learning_rate": 0.00031172046135285954, "loss": 0.2072, "step": 304690 }, { "epoch": 12.62, "grad_norm": 0.35546875, "learning_rate": 0.000311709951806584, "loss": 0.1679, "step": 304700 }, { "epoch": 12.62, "grad_norm": 0.7421875, "learning_rate": 0.0003116994421441769, "loss": 0.1688, "step": 304710 }, { "epoch": 12.62, "grad_norm": 0.58203125, "learning_rate": 0.00031168893236565816, "loss": 0.191, "step": 304720 }, { "epoch": 12.62, "grad_norm": 0.57421875, "learning_rate": 0.0003116784224710475, "loss": 0.226, "step": 304730 }, { "epoch": 12.62, "grad_norm": 0.80078125, "learning_rate": 0.00031166791246036476, "loss": 0.163, "step": 304740 }, { "epoch": 12.62, "grad_norm": 1.09375, "learning_rate": 0.00031165740233362964, "loss": 0.2038, "step": 304750 }, { "epoch": 12.62, "grad_norm": 0.330078125, "learning_rate": 0.00031164689209086196, "loss": 0.1662, "step": 304760 }, { "epoch": 12.62, "grad_norm": 1.2734375, "learning_rate": 0.00031163638173208145, "loss": 0.1801, "step": 304770 }, { "epoch": 12.62, "grad_norm": 0.6796875, "learning_rate": 0.00031162587125730794, "loss": 0.1829, "step": 304780 }, { "epoch": 12.62, "grad_norm": 0.474609375, "learning_rate": 0.00031161536066656113, "loss": 0.1704, "step": 304790 }, { "epoch": 12.62, "grad_norm": 0.515625, "learning_rate": 0.000311604849959861, "loss": 0.2051, "step": 304800 }, { "epoch": 12.63, "grad_norm": 1.53125, "learning_rate": 0.00031159433913722714, "loss": 0.1625, "step": 304810 }, { "epoch": 12.63, "grad_norm": 0.3828125, "learning_rate": 0.00031158382819867933, "loss": 0.169, "step": 304820 }, { "epoch": 12.63, "grad_norm": 0.44921875, "learning_rate": 0.00031157331714423746, "loss": 0.1972, "step": 304830 }, { "epoch": 12.63, "grad_norm": 0.63671875, "learning_rate": 0.0003115628059739212, "loss": 0.195, "step": 304840 }, { "epoch": 12.63, "grad_norm": 2.546875, "learning_rate": 0.0003115522946877505, "loss": 0.1792, "step": 304850 }, { "epoch": 12.63, "grad_norm": 0.396484375, "learning_rate": 0.00031154178328574494, "loss": 0.1524, "step": 304860 }, { "epoch": 12.63, "grad_norm": 0.60546875, "learning_rate": 0.0003115312717679244, "loss": 0.1784, "step": 304870 }, { "epoch": 12.63, "grad_norm": 0.546875, "learning_rate": 0.0003115207601343087, "loss": 0.1831, "step": 304880 }, { "epoch": 12.63, "grad_norm": 1.09375, "learning_rate": 0.0003115102483849175, "loss": 0.2224, "step": 304890 }, { "epoch": 12.63, "grad_norm": 1.375, "learning_rate": 0.0003114997365197707, "loss": 0.1694, "step": 304900 }, { "epoch": 12.63, "grad_norm": 0.89453125, "learning_rate": 0.00031148922453888805, "loss": 0.1718, "step": 304910 }, { "epoch": 12.63, "grad_norm": 0.78125, "learning_rate": 0.00031147871244228926, "loss": 0.1915, "step": 304920 }, { "epoch": 12.63, "grad_norm": 0.921875, "learning_rate": 0.0003114682002299942, "loss": 0.2177, "step": 304930 }, { "epoch": 12.63, "grad_norm": 0.396484375, "learning_rate": 0.0003114576879020227, "loss": 0.1482, "step": 304940 }, { "epoch": 12.63, "grad_norm": 1.46875, "learning_rate": 0.00031144717545839436, "loss": 0.1982, "step": 304950 }, { "epoch": 12.63, "grad_norm": 0.7421875, "learning_rate": 0.00031143666289912915, "loss": 0.1824, "step": 304960 }, { "epoch": 12.63, "grad_norm": 0.98828125, "learning_rate": 0.0003114261502242467, "loss": 0.2271, "step": 304970 }, { "epoch": 12.63, "grad_norm": 1.609375, "learning_rate": 0.0003114156374337669, "loss": 0.1824, "step": 304980 }, { "epoch": 12.63, "grad_norm": 1.3515625, "learning_rate": 0.00031140512452770955, "loss": 0.1969, "step": 304990 }, { "epoch": 12.63, "grad_norm": 1.484375, "learning_rate": 0.0003113946115060943, "loss": 0.2466, "step": 305000 }, { "epoch": 12.63, "grad_norm": 1.0546875, "learning_rate": 0.0003113840983689411, "loss": 0.1849, "step": 305010 }, { "epoch": 12.63, "grad_norm": 0.56640625, "learning_rate": 0.0003113735851162696, "loss": 0.1863, "step": 305020 }, { "epoch": 12.63, "grad_norm": 0.59375, "learning_rate": 0.00031136307174809964, "loss": 0.165, "step": 305030 }, { "epoch": 12.63, "grad_norm": 1.140625, "learning_rate": 0.0003113525582644511, "loss": 0.1406, "step": 305040 }, { "epoch": 12.64, "grad_norm": 0.0, "learning_rate": 0.00031134204466534357, "loss": 0.2199, "step": 305050 }, { "epoch": 12.64, "grad_norm": 1.8203125, "learning_rate": 0.000311331530950797, "loss": 0.1627, "step": 305060 }, { "epoch": 12.64, "grad_norm": 0.859375, "learning_rate": 0.00031132101712083105, "loss": 0.2416, "step": 305070 }, { "epoch": 12.64, "grad_norm": 0.55078125, "learning_rate": 0.00031131050317546557, "loss": 0.2105, "step": 305080 }, { "epoch": 12.64, "grad_norm": 1.3046875, "learning_rate": 0.00031129998911472043, "loss": 0.1799, "step": 305090 }, { "epoch": 12.64, "grad_norm": 0.69140625, "learning_rate": 0.00031128947493861526, "loss": 0.1766, "step": 305100 }, { "epoch": 12.64, "grad_norm": 1.0703125, "learning_rate": 0.0003112789606471699, "loss": 0.2176, "step": 305110 }, { "epoch": 12.64, "grad_norm": 0.48828125, "learning_rate": 0.00031126844624040424, "loss": 0.1833, "step": 305120 }, { "epoch": 12.64, "grad_norm": 0.453125, "learning_rate": 0.0003112579317183379, "loss": 0.1947, "step": 305130 }, { "epoch": 12.64, "grad_norm": 0.83984375, "learning_rate": 0.00031124741708099074, "loss": 0.1861, "step": 305140 }, { "epoch": 12.64, "grad_norm": 0.55078125, "learning_rate": 0.00031123690232838264, "loss": 0.1593, "step": 305150 }, { "epoch": 12.64, "grad_norm": 0.921875, "learning_rate": 0.00031122638746053317, "loss": 0.1762, "step": 305160 }, { "epoch": 12.64, "grad_norm": 1.421875, "learning_rate": 0.00031121587247746234, "loss": 0.2195, "step": 305170 }, { "epoch": 12.64, "grad_norm": 0.80859375, "learning_rate": 0.0003112053573791899, "loss": 0.1967, "step": 305180 }, { "epoch": 12.64, "grad_norm": 1.140625, "learning_rate": 0.00031119484216573547, "loss": 0.19, "step": 305190 }, { "epoch": 12.64, "grad_norm": 0.625, "learning_rate": 0.00031118432683711905, "loss": 0.1805, "step": 305200 }, { "epoch": 12.64, "grad_norm": 1.0625, "learning_rate": 0.00031117381139336025, "loss": 0.2037, "step": 305210 }, { "epoch": 12.64, "grad_norm": 0.96484375, "learning_rate": 0.00031116329583447897, "loss": 0.1333, "step": 305220 }, { "epoch": 12.64, "grad_norm": 1.21875, "learning_rate": 0.0003111527801604951, "loss": 0.1827, "step": 305230 }, { "epoch": 12.64, "grad_norm": 0.80078125, "learning_rate": 0.0003111422643714281, "loss": 0.1479, "step": 305240 }, { "epoch": 12.64, "grad_norm": 0.7890625, "learning_rate": 0.0003111317484672981, "loss": 0.2213, "step": 305250 }, { "epoch": 12.64, "grad_norm": 0.64453125, "learning_rate": 0.0003111212324481247, "loss": 0.2039, "step": 305260 }, { "epoch": 12.64, "grad_norm": 2.5625, "learning_rate": 0.00031111071631392774, "loss": 0.2216, "step": 305270 }, { "epoch": 12.64, "grad_norm": 0.4921875, "learning_rate": 0.0003111002000647271, "loss": 0.1895, "step": 305280 }, { "epoch": 12.65, "grad_norm": 1.453125, "learning_rate": 0.0003110896837005423, "loss": 0.2152, "step": 305290 }, { "epoch": 12.65, "grad_norm": 0.51953125, "learning_rate": 0.0003110791672213934, "loss": 0.2316, "step": 305300 }, { "epoch": 12.65, "grad_norm": 0.80859375, "learning_rate": 0.00031106865062730015, "loss": 0.2393, "step": 305310 }, { "epoch": 12.65, "grad_norm": 0.91015625, "learning_rate": 0.0003110581339182822, "loss": 0.212, "step": 305320 }, { "epoch": 12.65, "grad_norm": 0.439453125, "learning_rate": 0.0003110476170943595, "loss": 0.2359, "step": 305330 }, { "epoch": 12.65, "grad_norm": 0.578125, "learning_rate": 0.00031103710015555183, "loss": 0.1943, "step": 305340 }, { "epoch": 12.65, "grad_norm": 0.56640625, "learning_rate": 0.0003110265831018788, "loss": 0.2096, "step": 305350 }, { "epoch": 12.65, "grad_norm": 0.34765625, "learning_rate": 0.00031101606593336045, "loss": 0.1658, "step": 305360 }, { "epoch": 12.65, "grad_norm": 1.0703125, "learning_rate": 0.0003110055486500164, "loss": 0.2168, "step": 305370 }, { "epoch": 12.65, "grad_norm": 0.578125, "learning_rate": 0.0003109950312518665, "loss": 0.1546, "step": 305380 }, { "epoch": 12.65, "grad_norm": 0.4921875, "learning_rate": 0.00031098451373893056, "loss": 0.1733, "step": 305390 }, { "epoch": 12.65, "grad_norm": 0.56640625, "learning_rate": 0.0003109739961112283, "loss": 0.246, "step": 305400 }, { "epoch": 12.65, "grad_norm": 0.0, "learning_rate": 0.00031096347836877963, "loss": 0.1944, "step": 305410 }, { "epoch": 12.65, "grad_norm": 0.8359375, "learning_rate": 0.0003109529605116042, "loss": 0.1814, "step": 305420 }, { "epoch": 12.65, "grad_norm": 1.0859375, "learning_rate": 0.0003109424425397219, "loss": 0.1731, "step": 305430 }, { "epoch": 12.65, "grad_norm": 0.6796875, "learning_rate": 0.0003109319244531526, "loss": 0.1725, "step": 305440 }, { "epoch": 12.65, "grad_norm": 1.1875, "learning_rate": 0.00031092140625191587, "loss": 0.2607, "step": 305450 }, { "epoch": 12.65, "grad_norm": 0.8046875, "learning_rate": 0.00031091088793603174, "loss": 0.1979, "step": 305460 }, { "epoch": 12.65, "grad_norm": 1.4453125, "learning_rate": 0.00031090036950551984, "loss": 0.219, "step": 305470 }, { "epoch": 12.65, "grad_norm": 0.59375, "learning_rate": 0.0003108898509604, "loss": 0.1925, "step": 305480 }, { "epoch": 12.65, "grad_norm": 0.86328125, "learning_rate": 0.000310879332300692, "loss": 0.2084, "step": 305490 }, { "epoch": 12.65, "grad_norm": 1.3203125, "learning_rate": 0.0003108688135264158, "loss": 0.2155, "step": 305500 }, { "epoch": 12.65, "grad_norm": 1.0546875, "learning_rate": 0.000310858294637591, "loss": 0.2493, "step": 305510 }, { "epoch": 12.65, "grad_norm": 0.416015625, "learning_rate": 0.0003108477756342375, "loss": 0.1765, "step": 305520 }, { "epoch": 12.66, "grad_norm": 0.671875, "learning_rate": 0.0003108372565163749, "loss": 0.1997, "step": 305530 }, { "epoch": 12.66, "grad_norm": 0.609375, "learning_rate": 0.0003108267372840233, "loss": 0.1782, "step": 305540 }, { "epoch": 12.66, "grad_norm": 0.9453125, "learning_rate": 0.0003108162179372023, "loss": 0.2406, "step": 305550 }, { "epoch": 12.66, "grad_norm": 1.2578125, "learning_rate": 0.00031080569847593176, "loss": 0.1749, "step": 305560 }, { "epoch": 12.66, "grad_norm": 0.65234375, "learning_rate": 0.00031079517890023154, "loss": 0.1999, "step": 305570 }, { "epoch": 12.66, "grad_norm": 0.59765625, "learning_rate": 0.00031078465921012126, "loss": 0.2087, "step": 305580 }, { "epoch": 12.66, "grad_norm": 1.6640625, "learning_rate": 0.0003107741394056208, "loss": 0.2356, "step": 305590 }, { "epoch": 12.66, "grad_norm": 0.8984375, "learning_rate": 0.00031076361948675007, "loss": 0.1581, "step": 305600 }, { "epoch": 12.66, "grad_norm": 1.3359375, "learning_rate": 0.0003107530994535286, "loss": 0.1991, "step": 305610 }, { "epoch": 12.66, "grad_norm": 0.71484375, "learning_rate": 0.00031074257930597654, "loss": 0.2124, "step": 305620 }, { "epoch": 12.66, "grad_norm": 0.5078125, "learning_rate": 0.00031073205904411344, "loss": 0.2227, "step": 305630 }, { "epoch": 12.66, "grad_norm": 1.1328125, "learning_rate": 0.00031072153866795916, "loss": 0.1984, "step": 305640 }, { "epoch": 12.66, "grad_norm": 0.408203125, "learning_rate": 0.00031071101817753356, "loss": 0.1793, "step": 305650 }, { "epoch": 12.66, "grad_norm": 0.890625, "learning_rate": 0.0003107004975728563, "loss": 0.2264, "step": 305660 }, { "epoch": 12.66, "grad_norm": 0.79296875, "learning_rate": 0.00031068997685394724, "loss": 0.213, "step": 305670 }, { "epoch": 12.66, "grad_norm": 0.5703125, "learning_rate": 0.00031067945602082626, "loss": 0.1942, "step": 305680 }, { "epoch": 12.66, "grad_norm": 0.98828125, "learning_rate": 0.00031066893507351303, "loss": 0.2215, "step": 305690 }, { "epoch": 12.66, "grad_norm": 0.6953125, "learning_rate": 0.00031065841401202747, "loss": 0.2406, "step": 305700 }, { "epoch": 12.66, "grad_norm": 0.275390625, "learning_rate": 0.00031064789283638934, "loss": 0.1701, "step": 305710 }, { "epoch": 12.66, "grad_norm": 0.71875, "learning_rate": 0.00031063737154661834, "loss": 0.165, "step": 305720 }, { "epoch": 12.66, "grad_norm": 0.78125, "learning_rate": 0.0003106268501427345, "loss": 0.2145, "step": 305730 }, { "epoch": 12.66, "grad_norm": 3.28125, "learning_rate": 0.00031061632862475734, "loss": 0.2171, "step": 305740 }, { "epoch": 12.66, "grad_norm": 0.953125, "learning_rate": 0.00031060580699270686, "loss": 0.1967, "step": 305750 }, { "epoch": 12.66, "grad_norm": 0.251953125, "learning_rate": 0.00031059528524660275, "loss": 0.1933, "step": 305760 }, { "epoch": 12.66, "grad_norm": 0.34765625, "learning_rate": 0.0003105847633864649, "loss": 0.172, "step": 305770 }, { "epoch": 12.67, "grad_norm": 0.70703125, "learning_rate": 0.0003105742414123131, "loss": 0.195, "step": 305780 }, { "epoch": 12.67, "grad_norm": 0.80078125, "learning_rate": 0.00031056371932416705, "loss": 0.2445, "step": 305790 }, { "epoch": 12.67, "grad_norm": 1.109375, "learning_rate": 0.00031055319712204664, "loss": 0.1827, "step": 305800 }, { "epoch": 12.67, "grad_norm": 0.5234375, "learning_rate": 0.0003105426748059717, "loss": 0.2182, "step": 305810 }, { "epoch": 12.67, "grad_norm": 1.0078125, "learning_rate": 0.0003105321523759619, "loss": 0.1874, "step": 305820 }, { "epoch": 12.67, "grad_norm": 1.3359375, "learning_rate": 0.0003105216298320372, "loss": 0.1794, "step": 305830 }, { "epoch": 12.67, "grad_norm": 0.34375, "learning_rate": 0.0003105111071742173, "loss": 0.1827, "step": 305840 }, { "epoch": 12.67, "grad_norm": 0.55859375, "learning_rate": 0.000310500584402522, "loss": 0.2051, "step": 305850 }, { "epoch": 12.67, "grad_norm": 0.85546875, "learning_rate": 0.00031049006151697117, "loss": 0.1448, "step": 305860 }, { "epoch": 12.67, "grad_norm": 0.37890625, "learning_rate": 0.0003104795385175846, "loss": 0.2061, "step": 305870 }, { "epoch": 12.67, "grad_norm": 0.71875, "learning_rate": 0.000310469015404382, "loss": 0.1974, "step": 305880 }, { "epoch": 12.67, "grad_norm": 0.4296875, "learning_rate": 0.0003104584921773833, "loss": 0.2129, "step": 305890 }, { "epoch": 12.67, "grad_norm": 0.5390625, "learning_rate": 0.0003104479688366082, "loss": 0.1494, "step": 305900 }, { "epoch": 12.67, "grad_norm": 0.51171875, "learning_rate": 0.0003104374453820766, "loss": 0.2174, "step": 305910 }, { "epoch": 12.67, "grad_norm": 0.3515625, "learning_rate": 0.0003104269218138082, "loss": 0.2071, "step": 305920 }, { "epoch": 12.67, "grad_norm": 2.078125, "learning_rate": 0.00031041639813182286, "loss": 0.1856, "step": 305930 }, { "epoch": 12.67, "grad_norm": 0.56640625, "learning_rate": 0.00031040587433614047, "loss": 0.1877, "step": 305940 }, { "epoch": 12.67, "grad_norm": 1.390625, "learning_rate": 0.0003103953504267807, "loss": 0.1852, "step": 305950 }, { "epoch": 12.67, "grad_norm": 0.39453125, "learning_rate": 0.0003103848264037634, "loss": 0.2242, "step": 305960 }, { "epoch": 12.67, "grad_norm": 0.50390625, "learning_rate": 0.00031037430226710835, "loss": 0.1622, "step": 305970 }, { "epoch": 12.67, "grad_norm": 0.96875, "learning_rate": 0.0003103637780168354, "loss": 0.2091, "step": 305980 }, { "epoch": 12.67, "grad_norm": 1.2109375, "learning_rate": 0.00031035325365296433, "loss": 0.2233, "step": 305990 }, { "epoch": 12.67, "grad_norm": 0.8671875, "learning_rate": 0.00031034272917551496, "loss": 0.1719, "step": 306000 }, { "epoch": 12.67, "grad_norm": 1.6484375, "learning_rate": 0.00031033220458450713, "loss": 0.2554, "step": 306010 }, { "epoch": 12.68, "grad_norm": 0.37890625, "learning_rate": 0.00031032167987996056, "loss": 0.1523, "step": 306020 }, { "epoch": 12.68, "grad_norm": 1.2890625, "learning_rate": 0.0003103111550618951, "loss": 0.2364, "step": 306030 }, { "epoch": 12.68, "grad_norm": 0.96875, "learning_rate": 0.0003103006301303306, "loss": 0.227, "step": 306040 }, { "epoch": 12.68, "grad_norm": 1.0390625, "learning_rate": 0.0003102901050852868, "loss": 0.2548, "step": 306050 }, { "epoch": 12.68, "grad_norm": 0.515625, "learning_rate": 0.00031027957992678357, "loss": 0.1837, "step": 306060 }, { "epoch": 12.68, "grad_norm": 0.6796875, "learning_rate": 0.0003102690546548406, "loss": 0.1887, "step": 306070 }, { "epoch": 12.68, "grad_norm": 0.63671875, "learning_rate": 0.00031025852926947785, "loss": 0.1775, "step": 306080 }, { "epoch": 12.68, "grad_norm": 0.8046875, "learning_rate": 0.00031024800377071497, "loss": 0.235, "step": 306090 }, { "epoch": 12.68, "grad_norm": 0.609375, "learning_rate": 0.000310237478158572, "loss": 0.2162, "step": 306100 }, { "epoch": 12.68, "grad_norm": 0.85546875, "learning_rate": 0.0003102269524330685, "loss": 0.1813, "step": 306110 }, { "epoch": 12.68, "grad_norm": 0.86328125, "learning_rate": 0.00031021642659422434, "loss": 0.1945, "step": 306120 }, { "epoch": 12.68, "grad_norm": 0.447265625, "learning_rate": 0.00031020590064205944, "loss": 0.1596, "step": 306130 }, { "epoch": 12.68, "grad_norm": 1.078125, "learning_rate": 0.0003101953745765935, "loss": 0.1506, "step": 306140 }, { "epoch": 12.68, "grad_norm": 0.294921875, "learning_rate": 0.0003101848483978464, "loss": 0.1329, "step": 306150 }, { "epoch": 12.68, "grad_norm": 0.72265625, "learning_rate": 0.0003101743221058379, "loss": 0.1661, "step": 306160 }, { "epoch": 12.68, "grad_norm": 0.51171875, "learning_rate": 0.0003101637957005878, "loss": 0.2087, "step": 306170 }, { "epoch": 12.68, "grad_norm": 0.3828125, "learning_rate": 0.00031015326918211597, "loss": 0.1868, "step": 306180 }, { "epoch": 12.68, "grad_norm": 0.72265625, "learning_rate": 0.00031014274255044215, "loss": 0.2204, "step": 306190 }, { "epoch": 12.68, "grad_norm": 1.0703125, "learning_rate": 0.0003101322158055862, "loss": 0.1883, "step": 306200 }, { "epoch": 12.68, "grad_norm": 0.40234375, "learning_rate": 0.00031012168894756795, "loss": 0.2033, "step": 306210 }, { "epoch": 12.68, "grad_norm": 0.63671875, "learning_rate": 0.0003101111619764071, "loss": 0.1979, "step": 306220 }, { "epoch": 12.68, "grad_norm": 0.66015625, "learning_rate": 0.0003101006348921236, "loss": 0.1919, "step": 306230 }, { "epoch": 12.68, "grad_norm": 0.6953125, "learning_rate": 0.00031009010769473714, "loss": 0.133, "step": 306240 }, { "epoch": 12.68, "grad_norm": 0.984375, "learning_rate": 0.00031007958038426766, "loss": 0.1497, "step": 306250 }, { "epoch": 12.69, "grad_norm": 0.59765625, "learning_rate": 0.0003100690529607348, "loss": 0.1689, "step": 306260 }, { "epoch": 12.69, "grad_norm": 2.125, "learning_rate": 0.0003100585254241586, "loss": 0.1673, "step": 306270 }, { "epoch": 12.69, "grad_norm": 0.703125, "learning_rate": 0.0003100479977745586, "loss": 0.2038, "step": 306280 }, { "epoch": 12.69, "grad_norm": 0.88671875, "learning_rate": 0.0003100374700119548, "loss": 0.1915, "step": 306290 }, { "epoch": 12.69, "grad_norm": 0.68359375, "learning_rate": 0.00031002694213636705, "loss": 0.1645, "step": 306300 }, { "epoch": 12.69, "grad_norm": 0.60546875, "learning_rate": 0.0003100164141478149, "loss": 0.2058, "step": 306310 }, { "epoch": 12.69, "grad_norm": 0.60546875, "learning_rate": 0.00031000588604631853, "loss": 0.1821, "step": 306320 }, { "epoch": 12.69, "grad_norm": 1.875, "learning_rate": 0.0003099953578318975, "loss": 0.1774, "step": 306330 }, { "epoch": 12.69, "grad_norm": 0.51953125, "learning_rate": 0.0003099848295045716, "loss": 0.2098, "step": 306340 }, { "epoch": 12.69, "grad_norm": 0.86328125, "learning_rate": 0.00030997430106436086, "loss": 0.1896, "step": 306350 }, { "epoch": 12.69, "grad_norm": 0.9296875, "learning_rate": 0.0003099637725112849, "loss": 0.1785, "step": 306360 }, { "epoch": 12.69, "grad_norm": 0.349609375, "learning_rate": 0.00030995324384536357, "loss": 0.1711, "step": 306370 }, { "epoch": 12.69, "grad_norm": 1.9453125, "learning_rate": 0.00030994271506661677, "loss": 0.2287, "step": 306380 }, { "epoch": 12.69, "grad_norm": 0.326171875, "learning_rate": 0.00030993218617506415, "loss": 0.1697, "step": 306390 }, { "epoch": 12.69, "grad_norm": 1.6328125, "learning_rate": 0.0003099216571707257, "loss": 0.1933, "step": 306400 }, { "epoch": 12.69, "grad_norm": 0.66015625, "learning_rate": 0.00030991112805362115, "loss": 0.2443, "step": 306410 }, { "epoch": 12.69, "grad_norm": 0.419921875, "learning_rate": 0.0003099005988237703, "loss": 0.1386, "step": 306420 }, { "epoch": 12.69, "grad_norm": 0.890625, "learning_rate": 0.0003098900694811931, "loss": 0.2735, "step": 306430 }, { "epoch": 12.69, "grad_norm": 0.384765625, "learning_rate": 0.0003098795400259092, "loss": 0.2038, "step": 306440 }, { "epoch": 12.69, "grad_norm": 0.71875, "learning_rate": 0.00030986901045793844, "loss": 0.1625, "step": 306450 }, { "epoch": 12.69, "grad_norm": 0.66796875, "learning_rate": 0.00030985848077730066, "loss": 0.1629, "step": 306460 }, { "epoch": 12.69, "grad_norm": 1.5234375, "learning_rate": 0.00030984795098401563, "loss": 0.2047, "step": 306470 }, { "epoch": 12.69, "grad_norm": 1.078125, "learning_rate": 0.00030983742107810336, "loss": 0.1761, "step": 306480 }, { "epoch": 12.69, "grad_norm": 0.8984375, "learning_rate": 0.00030982689105958346, "loss": 0.2123, "step": 306490 }, { "epoch": 12.7, "grad_norm": 0.50390625, "learning_rate": 0.0003098163609284758, "loss": 0.177, "step": 306500 }, { "epoch": 12.7, "grad_norm": 0.87109375, "learning_rate": 0.0003098058306848002, "loss": 0.1944, "step": 306510 }, { "epoch": 12.7, "grad_norm": 0.5625, "learning_rate": 0.0003097953003285765, "loss": 0.205, "step": 306520 }, { "epoch": 12.7, "grad_norm": 1.9296875, "learning_rate": 0.0003097847698598245, "loss": 0.1975, "step": 306530 }, { "epoch": 12.7, "grad_norm": 0.5859375, "learning_rate": 0.00030977423927856405, "loss": 0.1801, "step": 306540 }, { "epoch": 12.7, "grad_norm": 1.9375, "learning_rate": 0.0003097637085848149, "loss": 0.1915, "step": 306550 }, { "epoch": 12.7, "grad_norm": 0.66796875, "learning_rate": 0.000309753177778597, "loss": 0.2045, "step": 306560 }, { "epoch": 12.7, "grad_norm": 0.890625, "learning_rate": 0.00030974264685992994, "loss": 0.2183, "step": 306570 }, { "epoch": 12.7, "grad_norm": 1.125, "learning_rate": 0.00030973211582883373, "loss": 0.1954, "step": 306580 }, { "epoch": 12.7, "grad_norm": 0.71875, "learning_rate": 0.00030972158468532815, "loss": 0.2169, "step": 306590 }, { "epoch": 12.7, "grad_norm": 0.98046875, "learning_rate": 0.00030971105342943286, "loss": 0.2042, "step": 306600 }, { "epoch": 12.7, "grad_norm": 0.76171875, "learning_rate": 0.00030970052206116794, "loss": 0.1801, "step": 306610 }, { "epoch": 12.7, "grad_norm": 0.62890625, "learning_rate": 0.0003096899905805531, "loss": 0.1172, "step": 306620 }, { "epoch": 12.7, "grad_norm": 0.7109375, "learning_rate": 0.00030967945898760805, "loss": 0.1966, "step": 306630 }, { "epoch": 12.7, "grad_norm": 0.388671875, "learning_rate": 0.0003096689272823529, "loss": 0.1786, "step": 306640 }, { "epoch": 12.7, "grad_norm": 0.87109375, "learning_rate": 0.00030965839546480704, "loss": 0.1561, "step": 306650 }, { "epoch": 12.7, "grad_norm": 1.15625, "learning_rate": 0.00030964786353499066, "loss": 0.164, "step": 306660 }, { "epoch": 12.7, "grad_norm": 0.7578125, "learning_rate": 0.00030963733149292337, "loss": 0.2192, "step": 306670 }, { "epoch": 12.7, "grad_norm": 0.61328125, "learning_rate": 0.00030962679933862504, "loss": 0.2368, "step": 306680 }, { "epoch": 12.7, "grad_norm": 0.65234375, "learning_rate": 0.0003096162670721156, "loss": 0.2212, "step": 306690 }, { "epoch": 12.7, "grad_norm": 1.53125, "learning_rate": 0.0003096057346934148, "loss": 0.1836, "step": 306700 }, { "epoch": 12.7, "grad_norm": 1.1953125, "learning_rate": 0.00030959520220254233, "loss": 0.2043, "step": 306710 }, { "epoch": 12.7, "grad_norm": 0.63671875, "learning_rate": 0.0003095846695995182, "loss": 0.1778, "step": 306720 }, { "epoch": 12.7, "grad_norm": 1.0859375, "learning_rate": 0.00030957413688436215, "loss": 0.1975, "step": 306730 }, { "epoch": 12.71, "grad_norm": 0.408203125, "learning_rate": 0.000309563604057094, "loss": 0.2017, "step": 306740 }, { "epoch": 12.71, "grad_norm": 0.52734375, "learning_rate": 0.0003095530711177336, "loss": 0.2166, "step": 306750 }, { "epoch": 12.71, "grad_norm": 1.4453125, "learning_rate": 0.00030954253806630066, "loss": 0.1763, "step": 306760 }, { "epoch": 12.71, "grad_norm": 0.796875, "learning_rate": 0.0003095320049028152, "loss": 0.2352, "step": 306770 }, { "epoch": 12.71, "grad_norm": 0.7109375, "learning_rate": 0.00030952147162729694, "loss": 0.1944, "step": 306780 }, { "epoch": 12.71, "grad_norm": 0.66796875, "learning_rate": 0.0003095109382397656, "loss": 0.1873, "step": 306790 }, { "epoch": 12.71, "grad_norm": 0.34375, "learning_rate": 0.0003095004047402412, "loss": 0.2077, "step": 306800 }, { "epoch": 12.71, "grad_norm": 0.62109375, "learning_rate": 0.00030948987112874344, "loss": 0.2023, "step": 306810 }, { "epoch": 12.71, "grad_norm": 1.8046875, "learning_rate": 0.0003094793374052921, "loss": 0.2125, "step": 306820 }, { "epoch": 12.71, "grad_norm": 0.89453125, "learning_rate": 0.0003094688035699072, "loss": 0.1954, "step": 306830 }, { "epoch": 12.71, "grad_norm": 0.443359375, "learning_rate": 0.00030945826962260823, "loss": 0.19, "step": 306840 }, { "epoch": 12.71, "grad_norm": 0.5625, "learning_rate": 0.00030944773556341543, "loss": 0.1664, "step": 306850 }, { "epoch": 12.71, "grad_norm": 0.66015625, "learning_rate": 0.0003094372013923483, "loss": 0.212, "step": 306860 }, { "epoch": 12.71, "grad_norm": 1.3671875, "learning_rate": 0.0003094266671094268, "loss": 0.1705, "step": 306870 }, { "epoch": 12.71, "grad_norm": 0.96484375, "learning_rate": 0.0003094161327146707, "loss": 0.1773, "step": 306880 }, { "epoch": 12.71, "grad_norm": 0.84765625, "learning_rate": 0.00030940559820809993, "loss": 0.1629, "step": 306890 }, { "epoch": 12.71, "grad_norm": 2.1875, "learning_rate": 0.0003093950635897342, "loss": 0.2197, "step": 306900 }, { "epoch": 12.71, "grad_norm": 0.5625, "learning_rate": 0.0003093845288595934, "loss": 0.2119, "step": 306910 }, { "epoch": 12.71, "grad_norm": 1.2890625, "learning_rate": 0.00030937399401769725, "loss": 0.1594, "step": 306920 }, { "epoch": 12.71, "grad_norm": 0.6015625, "learning_rate": 0.00030936345906406573, "loss": 0.2087, "step": 306930 }, { "epoch": 12.71, "grad_norm": 0.8828125, "learning_rate": 0.0003093529239987185, "loss": 0.1996, "step": 306940 }, { "epoch": 12.71, "grad_norm": 0.73828125, "learning_rate": 0.00030934238882167553, "loss": 0.1614, "step": 306950 }, { "epoch": 12.71, "grad_norm": 0.86328125, "learning_rate": 0.0003093318535329567, "loss": 0.1807, "step": 306960 }, { "epoch": 12.71, "grad_norm": 0.66796875, "learning_rate": 0.0003093213181325816, "loss": 0.1798, "step": 306970 }, { "epoch": 12.72, "grad_norm": 0.2275390625, "learning_rate": 0.00030931078262057016, "loss": 0.156, "step": 306980 }, { "epoch": 12.72, "grad_norm": 0.96484375, "learning_rate": 0.00030930024699694236, "loss": 0.1972, "step": 306990 }, { "epoch": 12.72, "grad_norm": 1.15625, "learning_rate": 0.00030928971126171774, "loss": 0.2101, "step": 307000 }, { "epoch": 12.72, "grad_norm": 1.3125, "learning_rate": 0.0003092791754149164, "loss": 0.1827, "step": 307010 }, { "epoch": 12.72, "grad_norm": 0.7734375, "learning_rate": 0.00030926863945655804, "loss": 0.2019, "step": 307020 }, { "epoch": 12.72, "grad_norm": 0.95703125, "learning_rate": 0.0003092581033866625, "loss": 0.1688, "step": 307030 }, { "epoch": 12.72, "grad_norm": 1.046875, "learning_rate": 0.0003092475672052496, "loss": 0.1779, "step": 307040 }, { "epoch": 12.72, "grad_norm": 1.15625, "learning_rate": 0.00030923703091233917, "loss": 0.2373, "step": 307050 }, { "epoch": 12.72, "grad_norm": 0.58203125, "learning_rate": 0.00030922649450795104, "loss": 0.1759, "step": 307060 }, { "epoch": 12.72, "grad_norm": 0.73046875, "learning_rate": 0.000309215957992105, "loss": 0.2001, "step": 307070 }, { "epoch": 12.72, "grad_norm": 1.0234375, "learning_rate": 0.00030920542136482097, "loss": 0.2039, "step": 307080 }, { "epoch": 12.72, "grad_norm": 0.61328125, "learning_rate": 0.00030919488462611877, "loss": 0.1619, "step": 307090 }, { "epoch": 12.72, "grad_norm": 0.5234375, "learning_rate": 0.00030918434777601816, "loss": 0.188, "step": 307100 }, { "epoch": 12.72, "grad_norm": 1.34375, "learning_rate": 0.000309173810814539, "loss": 0.1494, "step": 307110 }, { "epoch": 12.72, "grad_norm": 0.72265625, "learning_rate": 0.0003091632737417012, "loss": 0.2154, "step": 307120 }, { "epoch": 12.72, "grad_norm": 1.0390625, "learning_rate": 0.0003091527365575244, "loss": 0.1784, "step": 307130 }, { "epoch": 12.72, "grad_norm": 1.2421875, "learning_rate": 0.00030914219926202847, "loss": 0.2488, "step": 307140 }, { "epoch": 12.72, "grad_norm": 0.67578125, "learning_rate": 0.0003091316618552334, "loss": 0.1526, "step": 307150 }, { "epoch": 12.72, "grad_norm": 1.0390625, "learning_rate": 0.0003091211243371589, "loss": 0.2122, "step": 307160 }, { "epoch": 12.72, "grad_norm": 0.45703125, "learning_rate": 0.0003091105867078249, "loss": 0.2023, "step": 307170 }, { "epoch": 12.72, "grad_norm": 0.84765625, "learning_rate": 0.0003091000489672511, "loss": 0.214, "step": 307180 }, { "epoch": 12.72, "grad_norm": 0.92578125, "learning_rate": 0.0003090895111154574, "loss": 0.1998, "step": 307190 }, { "epoch": 12.72, "grad_norm": 1.6875, "learning_rate": 0.0003090789731524636, "loss": 0.1997, "step": 307200 }, { "epoch": 12.72, "grad_norm": 0.65625, "learning_rate": 0.00030906843507828966, "loss": 0.1505, "step": 307210 }, { "epoch": 12.73, "grad_norm": 0.447265625, "learning_rate": 0.00030905789689295515, "loss": 0.1793, "step": 307220 }, { "epoch": 12.73, "grad_norm": 1.0546875, "learning_rate": 0.0003090473585964801, "loss": 0.1881, "step": 307230 }, { "epoch": 12.73, "grad_norm": 1.21875, "learning_rate": 0.0003090368201888843, "loss": 0.2464, "step": 307240 }, { "epoch": 12.73, "grad_norm": 0.70703125, "learning_rate": 0.00030902628167018767, "loss": 0.1847, "step": 307250 }, { "epoch": 12.73, "grad_norm": 1.1171875, "learning_rate": 0.00030901574304040985, "loss": 0.1978, "step": 307260 }, { "epoch": 12.73, "grad_norm": 1.3046875, "learning_rate": 0.0003090052042995708, "loss": 0.1743, "step": 307270 }, { "epoch": 12.73, "grad_norm": 1.3203125, "learning_rate": 0.0003089946654476903, "loss": 0.2059, "step": 307280 }, { "epoch": 12.73, "grad_norm": 2.5, "learning_rate": 0.0003089841264847883, "loss": 0.1338, "step": 307290 }, { "epoch": 12.73, "grad_norm": 0.7109375, "learning_rate": 0.0003089735874108845, "loss": 0.1853, "step": 307300 }, { "epoch": 12.73, "grad_norm": 0.58203125, "learning_rate": 0.00030896304822599876, "loss": 0.1985, "step": 307310 }, { "epoch": 12.73, "grad_norm": 0.474609375, "learning_rate": 0.00030895250893015093, "loss": 0.1483, "step": 307320 }, { "epoch": 12.73, "grad_norm": 0.78515625, "learning_rate": 0.00030894196952336087, "loss": 0.1965, "step": 307330 }, { "epoch": 12.73, "grad_norm": 0.76171875, "learning_rate": 0.00030893143000564836, "loss": 0.1929, "step": 307340 }, { "epoch": 12.73, "grad_norm": 0.94921875, "learning_rate": 0.00030892089037703324, "loss": 0.2641, "step": 307350 }, { "epoch": 12.73, "grad_norm": 1.7265625, "learning_rate": 0.0003089103506375354, "loss": 0.2028, "step": 307360 }, { "epoch": 12.73, "grad_norm": 0.70703125, "learning_rate": 0.00030889981078717466, "loss": 0.1324, "step": 307370 }, { "epoch": 12.73, "grad_norm": 0.2216796875, "learning_rate": 0.0003088892708259708, "loss": 0.1968, "step": 307380 }, { "epoch": 12.73, "grad_norm": 0.6796875, "learning_rate": 0.00030887873075394374, "loss": 0.1702, "step": 307390 }, { "epoch": 12.73, "grad_norm": 1.46875, "learning_rate": 0.0003088681905711132, "loss": 0.196, "step": 307400 }, { "epoch": 12.73, "grad_norm": 0.75390625, "learning_rate": 0.00030885765027749917, "loss": 0.2003, "step": 307410 }, { "epoch": 12.73, "grad_norm": 0.7421875, "learning_rate": 0.00030884710987312135, "loss": 0.2483, "step": 307420 }, { "epoch": 12.73, "grad_norm": 0.75390625, "learning_rate": 0.0003088365693579996, "loss": 0.1603, "step": 307430 }, { "epoch": 12.73, "grad_norm": 2.640625, "learning_rate": 0.0003088260287321538, "loss": 0.1985, "step": 307440 }, { "epoch": 12.73, "grad_norm": 1.0703125, "learning_rate": 0.00030881548799560377, "loss": 0.1907, "step": 307450 }, { "epoch": 12.73, "grad_norm": 0.8359375, "learning_rate": 0.00030880494714836937, "loss": 0.2083, "step": 307460 }, { "epoch": 12.74, "grad_norm": 0.64453125, "learning_rate": 0.0003087944061904704, "loss": 0.161, "step": 307470 }, { "epoch": 12.74, "grad_norm": 0.6171875, "learning_rate": 0.00030878386512192665, "loss": 0.2273, "step": 307480 }, { "epoch": 12.74, "grad_norm": 0.82421875, "learning_rate": 0.000308773323942758, "loss": 0.2758, "step": 307490 }, { "epoch": 12.74, "grad_norm": 0.8359375, "learning_rate": 0.0003087627826529844, "loss": 0.1783, "step": 307500 }, { "epoch": 12.74, "grad_norm": 0.361328125, "learning_rate": 0.0003087522412526256, "loss": 0.1659, "step": 307510 }, { "epoch": 12.74, "grad_norm": 0.66796875, "learning_rate": 0.0003087416997417013, "loss": 0.1886, "step": 307520 }, { "epoch": 12.74, "grad_norm": 0.265625, "learning_rate": 0.00030873115812023156, "loss": 0.1859, "step": 307530 }, { "epoch": 12.74, "grad_norm": 0.53515625, "learning_rate": 0.00030872061638823615, "loss": 0.1955, "step": 307540 }, { "epoch": 12.74, "grad_norm": 0.96875, "learning_rate": 0.0003087100745457348, "loss": 0.1909, "step": 307550 }, { "epoch": 12.74, "grad_norm": 0.78125, "learning_rate": 0.0003086995325927474, "loss": 0.2116, "step": 307560 }, { "epoch": 12.74, "grad_norm": 1.1484375, "learning_rate": 0.0003086889905292939, "loss": 0.2075, "step": 307570 }, { "epoch": 12.74, "grad_norm": 0.83984375, "learning_rate": 0.00030867844835539405, "loss": 0.2054, "step": 307580 }, { "epoch": 12.74, "grad_norm": 0.2578125, "learning_rate": 0.0003086679060710677, "loss": 0.1878, "step": 307590 }, { "epoch": 12.74, "grad_norm": 0.796875, "learning_rate": 0.00030865736367633465, "loss": 0.1835, "step": 307600 }, { "epoch": 12.74, "grad_norm": 1.2890625, "learning_rate": 0.0003086468211712148, "loss": 0.188, "step": 307610 }, { "epoch": 12.74, "grad_norm": 1.203125, "learning_rate": 0.0003086362785557279, "loss": 0.2044, "step": 307620 }, { "epoch": 12.74, "grad_norm": 0.49609375, "learning_rate": 0.00030862573582989396, "loss": 0.2275, "step": 307630 }, { "epoch": 12.74, "grad_norm": 1.375, "learning_rate": 0.0003086151929937327, "loss": 0.2075, "step": 307640 }, { "epoch": 12.74, "grad_norm": 0.859375, "learning_rate": 0.0003086046500472639, "loss": 0.1818, "step": 307650 }, { "epoch": 12.74, "grad_norm": 0.5625, "learning_rate": 0.0003085941069905076, "loss": 0.1975, "step": 307660 }, { "epoch": 12.74, "grad_norm": 0.6328125, "learning_rate": 0.0003085835638234834, "loss": 0.2314, "step": 307670 }, { "epoch": 12.74, "grad_norm": 0.828125, "learning_rate": 0.0003085730205462113, "loss": 0.1786, "step": 307680 }, { "epoch": 12.74, "grad_norm": 0.671875, "learning_rate": 0.0003085624771587111, "loss": 0.205, "step": 307690 }, { "epoch": 12.74, "grad_norm": 1.515625, "learning_rate": 0.00030855193366100263, "loss": 0.1909, "step": 307700 }, { "epoch": 12.75, "grad_norm": 0.6171875, "learning_rate": 0.00030854139005310577, "loss": 0.1856, "step": 307710 }, { "epoch": 12.75, "grad_norm": 1.5546875, "learning_rate": 0.00030853084633504035, "loss": 0.1695, "step": 307720 }, { "epoch": 12.75, "grad_norm": 0.76171875, "learning_rate": 0.0003085203025068262, "loss": 0.1721, "step": 307730 }, { "epoch": 12.75, "grad_norm": 0.392578125, "learning_rate": 0.00030850975856848314, "loss": 0.2022, "step": 307740 }, { "epoch": 12.75, "grad_norm": 0.0, "learning_rate": 0.00030849921452003097, "loss": 0.1886, "step": 307750 }, { "epoch": 12.75, "grad_norm": 0.55859375, "learning_rate": 0.0003084886703614896, "loss": 0.203, "step": 307760 }, { "epoch": 12.75, "grad_norm": 0.734375, "learning_rate": 0.000308478126092879, "loss": 0.1844, "step": 307770 }, { "epoch": 12.75, "grad_norm": 0.4765625, "learning_rate": 0.0003084675817142188, "loss": 0.2172, "step": 307780 }, { "epoch": 12.75, "grad_norm": 0.8515625, "learning_rate": 0.0003084570372255289, "loss": 0.1773, "step": 307790 }, { "epoch": 12.75, "grad_norm": 0.5, "learning_rate": 0.00030844649262682923, "loss": 0.2071, "step": 307800 }, { "epoch": 12.75, "grad_norm": 0.59765625, "learning_rate": 0.0003084359479181395, "loss": 0.2194, "step": 307810 }, { "epoch": 12.75, "grad_norm": 0.9921875, "learning_rate": 0.00030842540309947977, "loss": 0.2396, "step": 307820 }, { "epoch": 12.75, "grad_norm": 0.98046875, "learning_rate": 0.00030841485817086956, "loss": 0.2252, "step": 307830 }, { "epoch": 12.75, "grad_norm": 1.0078125, "learning_rate": 0.00030840431313232896, "loss": 0.1799, "step": 307840 }, { "epoch": 12.75, "grad_norm": 1.40625, "learning_rate": 0.00030839376798387773, "loss": 0.1991, "step": 307850 }, { "epoch": 12.75, "grad_norm": 1.6015625, "learning_rate": 0.00030838322272553584, "loss": 0.1905, "step": 307860 }, { "epoch": 12.75, "grad_norm": 0.8671875, "learning_rate": 0.00030837267735732296, "loss": 0.18, "step": 307870 }, { "epoch": 12.75, "grad_norm": 1.15625, "learning_rate": 0.000308362131879259, "loss": 0.1749, "step": 307880 }, { "epoch": 12.75, "grad_norm": 0.71484375, "learning_rate": 0.00030835158629136377, "loss": 0.2201, "step": 307890 }, { "epoch": 12.75, "grad_norm": 1.5703125, "learning_rate": 0.00030834104059365724, "loss": 0.1652, "step": 307900 }, { "epoch": 12.75, "grad_norm": 1.71875, "learning_rate": 0.0003083304947861591, "loss": 0.2183, "step": 307910 }, { "epoch": 12.75, "grad_norm": 1.171875, "learning_rate": 0.0003083199488688893, "loss": 0.2185, "step": 307920 }, { "epoch": 12.75, "grad_norm": 1.0703125, "learning_rate": 0.00030830940284186766, "loss": 0.1673, "step": 307930 }, { "epoch": 12.75, "grad_norm": 1.1328125, "learning_rate": 0.000308298856705114, "loss": 0.2472, "step": 307940 }, { "epoch": 12.76, "grad_norm": 0.6796875, "learning_rate": 0.00030828831045864825, "loss": 0.1571, "step": 307950 }, { "epoch": 12.76, "grad_norm": 0.87890625, "learning_rate": 0.00030827776410249013, "loss": 0.1981, "step": 307960 }, { "epoch": 12.76, "grad_norm": 1.671875, "learning_rate": 0.00030826721763665954, "loss": 0.2069, "step": 307970 }, { "epoch": 12.76, "grad_norm": 1.0234375, "learning_rate": 0.0003082566710611764, "loss": 0.2104, "step": 307980 }, { "epoch": 12.76, "grad_norm": 0.2060546875, "learning_rate": 0.0003082461243760604, "loss": 0.1867, "step": 307990 }, { "epoch": 12.76, "grad_norm": 1.375, "learning_rate": 0.0003082355775813316, "loss": 0.1632, "step": 308000 }, { "epoch": 12.76, "grad_norm": 0.765625, "learning_rate": 0.0003082250306770096, "loss": 0.1571, "step": 308010 }, { "epoch": 12.76, "grad_norm": 1.859375, "learning_rate": 0.0003082144836631145, "loss": 0.243, "step": 308020 }, { "epoch": 12.76, "grad_norm": 0.79296875, "learning_rate": 0.000308203936539666, "loss": 0.1657, "step": 308030 }, { "epoch": 12.76, "grad_norm": 0.765625, "learning_rate": 0.00030819338930668396, "loss": 0.1726, "step": 308040 }, { "epoch": 12.76, "grad_norm": 0.1953125, "learning_rate": 0.0003081828419641882, "loss": 0.1543, "step": 308050 }, { "epoch": 12.76, "grad_norm": 0.08544921875, "learning_rate": 0.00030817229451219876, "loss": 0.1415, "step": 308060 }, { "epoch": 12.76, "grad_norm": 1.046875, "learning_rate": 0.0003081617469507352, "loss": 0.1484, "step": 308070 }, { "epoch": 12.76, "grad_norm": 0.765625, "learning_rate": 0.0003081511992798175, "loss": 0.1876, "step": 308080 }, { "epoch": 12.76, "grad_norm": 0.58984375, "learning_rate": 0.00030814065149946564, "loss": 0.1945, "step": 308090 }, { "epoch": 12.76, "grad_norm": 1.3828125, "learning_rate": 0.00030813010360969926, "loss": 0.1896, "step": 308100 }, { "epoch": 12.76, "grad_norm": 0.5625, "learning_rate": 0.0003081195556105384, "loss": 0.2026, "step": 308110 }, { "epoch": 12.76, "grad_norm": 0.75390625, "learning_rate": 0.00030810900750200264, "loss": 0.1545, "step": 308120 }, { "epoch": 12.76, "grad_norm": 0.419921875, "learning_rate": 0.0003080984592841122, "loss": 0.1951, "step": 308130 }, { "epoch": 12.76, "grad_norm": 0.6640625, "learning_rate": 0.0003080879109568867, "loss": 0.2381, "step": 308140 }, { "epoch": 12.76, "grad_norm": 0.62109375, "learning_rate": 0.0003080773625203459, "loss": 0.2388, "step": 308150 }, { "epoch": 12.76, "grad_norm": 0.9765625, "learning_rate": 0.0003080668139745099, "loss": 0.1623, "step": 308160 }, { "epoch": 12.76, "grad_norm": 0.9140625, "learning_rate": 0.00030805626531939836, "loss": 0.1782, "step": 308170 }, { "epoch": 12.76, "grad_norm": 0.60546875, "learning_rate": 0.00030804571655503114, "loss": 0.1446, "step": 308180 }, { "epoch": 12.77, "grad_norm": 0.66015625, "learning_rate": 0.00030803516768142833, "loss": 0.1886, "step": 308190 }, { "epoch": 12.77, "grad_norm": 0.5703125, "learning_rate": 0.00030802461869860944, "loss": 0.2144, "step": 308200 }, { "epoch": 12.77, "grad_norm": 0.55078125, "learning_rate": 0.0003080140696065946, "loss": 0.1878, "step": 308210 }, { "epoch": 12.77, "grad_norm": 0.5546875, "learning_rate": 0.00030800352040540345, "loss": 0.1961, "step": 308220 }, { "epoch": 12.77, "grad_norm": 0.671875, "learning_rate": 0.0003079929710950559, "loss": 0.1875, "step": 308230 }, { "epoch": 12.77, "grad_norm": 0.58203125, "learning_rate": 0.000307982421675572, "loss": 0.2243, "step": 308240 }, { "epoch": 12.77, "grad_norm": 0.5546875, "learning_rate": 0.0003079718721469713, "loss": 0.2178, "step": 308250 }, { "epoch": 12.77, "grad_norm": 0.625, "learning_rate": 0.00030796132250927383, "loss": 0.2021, "step": 308260 }, { "epoch": 12.77, "grad_norm": 0.482421875, "learning_rate": 0.0003079507727624995, "loss": 0.1769, "step": 308270 }, { "epoch": 12.77, "grad_norm": 0.99609375, "learning_rate": 0.00030794022290666793, "loss": 0.1724, "step": 308280 }, { "epoch": 12.77, "grad_norm": 0.80078125, "learning_rate": 0.00030792967294179926, "loss": 0.1941, "step": 308290 }, { "epoch": 12.77, "grad_norm": 1.0, "learning_rate": 0.0003079191228679132, "loss": 0.1933, "step": 308300 }, { "epoch": 12.77, "grad_norm": 0.98828125, "learning_rate": 0.0003079085726850294, "loss": 0.1689, "step": 308310 }, { "epoch": 12.77, "grad_norm": 1.15625, "learning_rate": 0.0003078980223931682, "loss": 0.1983, "step": 308320 }, { "epoch": 12.77, "grad_norm": 0.734375, "learning_rate": 0.000307887471992349, "loss": 0.2176, "step": 308330 }, { "epoch": 12.77, "grad_norm": 0.6953125, "learning_rate": 0.00030787692148259184, "loss": 0.1689, "step": 308340 }, { "epoch": 12.77, "grad_norm": 0.78515625, "learning_rate": 0.0003078663708639166, "loss": 0.2142, "step": 308350 }, { "epoch": 12.77, "grad_norm": 0.84765625, "learning_rate": 0.00030785582013634306, "loss": 0.185, "step": 308360 }, { "epoch": 12.77, "grad_norm": 0.96484375, "learning_rate": 0.00030784526929989115, "loss": 0.2107, "step": 308370 }, { "epoch": 12.77, "grad_norm": 0.7265625, "learning_rate": 0.0003078347183545807, "loss": 0.1841, "step": 308380 }, { "epoch": 12.77, "grad_norm": 0.921875, "learning_rate": 0.0003078241673004315, "loss": 0.1734, "step": 308390 }, { "epoch": 12.77, "grad_norm": 0.7109375, "learning_rate": 0.0003078136161374636, "loss": 0.1732, "step": 308400 }, { "epoch": 12.77, "grad_norm": 0.68359375, "learning_rate": 0.0003078030648656965, "loss": 0.2025, "step": 308410 }, { "epoch": 12.77, "grad_norm": 0.5625, "learning_rate": 0.00030779251348515043, "loss": 0.1848, "step": 308420 }, { "epoch": 12.78, "grad_norm": 0.98828125, "learning_rate": 0.0003077819619958451, "loss": 0.1537, "step": 308430 }, { "epoch": 12.78, "grad_norm": 1.4921875, "learning_rate": 0.0003077714103978002, "loss": 0.2169, "step": 308440 }, { "epoch": 12.78, "grad_norm": 0.90625, "learning_rate": 0.00030776085869103594, "loss": 0.1818, "step": 308450 }, { "epoch": 12.78, "grad_norm": 0.50390625, "learning_rate": 0.00030775030687557186, "loss": 0.1741, "step": 308460 }, { "epoch": 12.78, "grad_norm": 0.384765625, "learning_rate": 0.000307739754951428, "loss": 0.1755, "step": 308470 }, { "epoch": 12.78, "grad_norm": 4.125, "learning_rate": 0.00030772920291862414, "loss": 0.2242, "step": 308480 }, { "epoch": 12.78, "grad_norm": 1.359375, "learning_rate": 0.00030771865077718015, "loss": 0.2017, "step": 308490 }, { "epoch": 12.78, "grad_norm": 0.6875, "learning_rate": 0.00030770809852711584, "loss": 0.2629, "step": 308500 }, { "epoch": 12.78, "grad_norm": 0.8203125, "learning_rate": 0.00030769754616845124, "loss": 0.203, "step": 308510 }, { "epoch": 12.78, "grad_norm": 1.015625, "learning_rate": 0.00030768699370120595, "loss": 0.183, "step": 308520 }, { "epoch": 12.78, "grad_norm": 1.1875, "learning_rate": 0.00030767644112540004, "loss": 0.213, "step": 308530 }, { "epoch": 12.78, "grad_norm": 1.6484375, "learning_rate": 0.0003076658884410533, "loss": 0.2147, "step": 308540 }, { "epoch": 12.78, "grad_norm": 0.298828125, "learning_rate": 0.0003076553356481856, "loss": 0.182, "step": 308550 }, { "epoch": 12.78, "grad_norm": 0.65234375, "learning_rate": 0.0003076447827468168, "loss": 0.2135, "step": 308560 }, { "epoch": 12.78, "grad_norm": 1.8671875, "learning_rate": 0.00030763422973696664, "loss": 0.2315, "step": 308570 }, { "epoch": 12.78, "grad_norm": 0.78125, "learning_rate": 0.0003076236766186551, "loss": 0.1839, "step": 308580 }, { "epoch": 12.78, "grad_norm": 0.9296875, "learning_rate": 0.0003076131233919021, "loss": 0.2236, "step": 308590 }, { "epoch": 12.78, "grad_norm": 0.4453125, "learning_rate": 0.00030760257005672744, "loss": 0.2029, "step": 308600 }, { "epoch": 12.78, "grad_norm": 0.51953125, "learning_rate": 0.00030759201661315094, "loss": 0.2062, "step": 308610 }, { "epoch": 12.78, "grad_norm": 0.96875, "learning_rate": 0.00030758146306119244, "loss": 0.2163, "step": 308620 }, { "epoch": 12.78, "grad_norm": 2.9375, "learning_rate": 0.00030757090940087184, "loss": 0.1714, "step": 308630 }, { "epoch": 12.78, "grad_norm": 0.98828125, "learning_rate": 0.0003075603556322091, "loss": 0.1936, "step": 308640 }, { "epoch": 12.78, "grad_norm": 1.796875, "learning_rate": 0.0003075498017552239, "loss": 0.1631, "step": 308650 }, { "epoch": 12.78, "grad_norm": 0.96484375, "learning_rate": 0.0003075392477699362, "loss": 0.204, "step": 308660 }, { "epoch": 12.79, "grad_norm": 0.51171875, "learning_rate": 0.0003075286936763658, "loss": 0.1925, "step": 308670 }, { "epoch": 12.79, "grad_norm": 0.40625, "learning_rate": 0.0003075181394745327, "loss": 0.1944, "step": 308680 }, { "epoch": 12.79, "grad_norm": 0.62890625, "learning_rate": 0.00030750758516445665, "loss": 0.1965, "step": 308690 }, { "epoch": 12.79, "grad_norm": 0.8671875, "learning_rate": 0.0003074970307461575, "loss": 0.1538, "step": 308700 }, { "epoch": 12.79, "grad_norm": 0.4375, "learning_rate": 0.00030748647621965517, "loss": 0.2246, "step": 308710 }, { "epoch": 12.79, "grad_norm": 0.94921875, "learning_rate": 0.00030747592158496956, "loss": 0.2053, "step": 308720 }, { "epoch": 12.79, "grad_norm": 1.3046875, "learning_rate": 0.0003074653668421204, "loss": 0.2183, "step": 308730 }, { "epoch": 12.79, "grad_norm": 0.890625, "learning_rate": 0.00030745481199112763, "loss": 0.172, "step": 308740 }, { "epoch": 12.79, "grad_norm": 1.53125, "learning_rate": 0.0003074442570320111, "loss": 0.2412, "step": 308750 }, { "epoch": 12.79, "grad_norm": 1.0703125, "learning_rate": 0.0003074337019647907, "loss": 0.1861, "step": 308760 }, { "epoch": 12.79, "grad_norm": 1.2265625, "learning_rate": 0.00030742314678948636, "loss": 0.1833, "step": 308770 }, { "epoch": 12.79, "grad_norm": 0.33984375, "learning_rate": 0.0003074125915061177, "loss": 0.1815, "step": 308780 }, { "epoch": 12.79, "grad_norm": 0.375, "learning_rate": 0.00030740203611470484, "loss": 0.1808, "step": 308790 }, { "epoch": 12.79, "grad_norm": 0.68359375, "learning_rate": 0.0003073914806152675, "loss": 0.1778, "step": 308800 }, { "epoch": 12.79, "grad_norm": 0.90625, "learning_rate": 0.00030738092500782565, "loss": 0.2025, "step": 308810 }, { "epoch": 12.79, "grad_norm": 0.482421875, "learning_rate": 0.000307370369292399, "loss": 0.2662, "step": 308820 }, { "epoch": 12.79, "grad_norm": 1.1484375, "learning_rate": 0.0003073598134690076, "loss": 0.1742, "step": 308830 }, { "epoch": 12.79, "grad_norm": 0.4453125, "learning_rate": 0.0003073492575376712, "loss": 0.1849, "step": 308840 }, { "epoch": 12.79, "grad_norm": 0.6328125, "learning_rate": 0.00030733870149840975, "loss": 0.1956, "step": 308850 }, { "epoch": 12.79, "grad_norm": 2.0625, "learning_rate": 0.000307328145351243, "loss": 0.1926, "step": 308860 }, { "epoch": 12.79, "grad_norm": 0.82421875, "learning_rate": 0.00030731758909619087, "loss": 0.202, "step": 308870 }, { "epoch": 12.79, "grad_norm": 0.026611328125, "learning_rate": 0.00030730703273327317, "loss": 0.1707, "step": 308880 }, { "epoch": 12.79, "grad_norm": 0.1279296875, "learning_rate": 0.0003072964762625099, "loss": 0.1697, "step": 308890 }, { "epoch": 12.79, "grad_norm": 0.8984375, "learning_rate": 0.00030728591968392084, "loss": 0.208, "step": 308900 }, { "epoch": 12.8, "grad_norm": 0.67578125, "learning_rate": 0.00030727536299752584, "loss": 0.1946, "step": 308910 }, { "epoch": 12.8, "grad_norm": 0.58203125, "learning_rate": 0.0003072648062033447, "loss": 0.1706, "step": 308920 }, { "epoch": 12.8, "grad_norm": 0.51171875, "learning_rate": 0.00030725424930139764, "loss": 0.1799, "step": 308930 }, { "epoch": 12.8, "grad_norm": 0.6640625, "learning_rate": 0.00030724369229170406, "loss": 0.1759, "step": 308940 }, { "epoch": 12.8, "grad_norm": 0.94921875, "learning_rate": 0.0003072331351742841, "loss": 0.2075, "step": 308950 }, { "epoch": 12.8, "grad_norm": 0.86328125, "learning_rate": 0.00030722257794915745, "loss": 0.197, "step": 308960 }, { "epoch": 12.8, "grad_norm": 0.474609375, "learning_rate": 0.00030721202061634426, "loss": 0.2091, "step": 308970 }, { "epoch": 12.8, "grad_norm": 0.0260009765625, "learning_rate": 0.0003072014631758641, "loss": 0.2467, "step": 308980 }, { "epoch": 12.8, "grad_norm": 1.0546875, "learning_rate": 0.000307190905627737, "loss": 0.2144, "step": 308990 }, { "epoch": 12.8, "grad_norm": 0.5625, "learning_rate": 0.0003071803479719828, "loss": 0.2159, "step": 309000 }, { "epoch": 12.8, "grad_norm": 0.4921875, "learning_rate": 0.00030716979020862134, "loss": 0.1815, "step": 309010 }, { "epoch": 12.8, "grad_norm": 0.484375, "learning_rate": 0.0003071592323376726, "loss": 0.1802, "step": 309020 }, { "epoch": 12.8, "grad_norm": 0.5703125, "learning_rate": 0.0003071486743591563, "loss": 0.1703, "step": 309030 }, { "epoch": 12.8, "grad_norm": 1.0078125, "learning_rate": 0.00030713811627309227, "loss": 0.2542, "step": 309040 }, { "epoch": 12.8, "grad_norm": 1.234375, "learning_rate": 0.0003071275580795006, "loss": 0.2305, "step": 309050 }, { "epoch": 12.8, "grad_norm": 0.216796875, "learning_rate": 0.00030711699977840104, "loss": 0.1777, "step": 309060 }, { "epoch": 12.8, "grad_norm": 0.68359375, "learning_rate": 0.00030710644136981333, "loss": 0.1723, "step": 309070 }, { "epoch": 12.8, "grad_norm": 1.15625, "learning_rate": 0.00030709588285375756, "loss": 0.2335, "step": 309080 }, { "epoch": 12.8, "grad_norm": 1.15625, "learning_rate": 0.0003070853242302535, "loss": 0.1867, "step": 309090 }, { "epoch": 12.8, "grad_norm": 0.96484375, "learning_rate": 0.000307074765499321, "loss": 0.111, "step": 309100 }, { "epoch": 12.8, "grad_norm": 0.4453125, "learning_rate": 0.00030706420666097995, "loss": 0.2129, "step": 309110 }, { "epoch": 12.8, "grad_norm": 0.462890625, "learning_rate": 0.00030705364771525025, "loss": 0.2141, "step": 309120 }, { "epoch": 12.8, "grad_norm": 0.9296875, "learning_rate": 0.00030704308866215174, "loss": 0.1673, "step": 309130 }, { "epoch": 12.8, "grad_norm": 1.09375, "learning_rate": 0.00030703252950170425, "loss": 0.1861, "step": 309140 }, { "epoch": 12.8, "grad_norm": 0.73046875, "learning_rate": 0.0003070219702339278, "loss": 0.1815, "step": 309150 }, { "epoch": 12.81, "grad_norm": 0.6953125, "learning_rate": 0.0003070114108588421, "loss": 0.2379, "step": 309160 }, { "epoch": 12.81, "grad_norm": 0.60546875, "learning_rate": 0.00030700085137646703, "loss": 0.1936, "step": 309170 }, { "epoch": 12.81, "grad_norm": 0.70703125, "learning_rate": 0.00030699029178682266, "loss": 0.1939, "step": 309180 }, { "epoch": 12.81, "grad_norm": 0.671875, "learning_rate": 0.0003069797320899286, "loss": 0.1593, "step": 309190 }, { "epoch": 12.81, "grad_norm": 1.7421875, "learning_rate": 0.0003069691722858048, "loss": 0.1833, "step": 309200 }, { "epoch": 12.81, "grad_norm": 0.68359375, "learning_rate": 0.0003069586123744713, "loss": 0.2337, "step": 309210 }, { "epoch": 12.81, "grad_norm": 0.5078125, "learning_rate": 0.0003069480523559477, "loss": 0.1459, "step": 309220 }, { "epoch": 12.81, "grad_norm": 0.498046875, "learning_rate": 0.00030693749223025413, "loss": 0.1853, "step": 309230 }, { "epoch": 12.81, "grad_norm": 0.62109375, "learning_rate": 0.0003069269319974103, "loss": 0.2345, "step": 309240 }, { "epoch": 12.81, "grad_norm": 0.9375, "learning_rate": 0.0003069163716574361, "loss": 0.1825, "step": 309250 }, { "epoch": 12.81, "grad_norm": 1.1015625, "learning_rate": 0.00030690581121035155, "loss": 0.2129, "step": 309260 }, { "epoch": 12.81, "grad_norm": 0.515625, "learning_rate": 0.00030689525065617627, "loss": 0.1394, "step": 309270 }, { "epoch": 12.81, "grad_norm": 0.6484375, "learning_rate": 0.00030688468999493033, "loss": 0.1494, "step": 309280 }, { "epoch": 12.81, "grad_norm": 0.58203125, "learning_rate": 0.0003068741292266335, "loss": 0.2088, "step": 309290 }, { "epoch": 12.81, "grad_norm": 1.203125, "learning_rate": 0.0003068635683513058, "loss": 0.1882, "step": 309300 }, { "epoch": 12.81, "grad_norm": 0.39453125, "learning_rate": 0.000306853007368967, "loss": 0.1796, "step": 309310 }, { "epoch": 12.81, "grad_norm": 0.7890625, "learning_rate": 0.0003068424462796369, "loss": 0.2178, "step": 309320 }, { "epoch": 12.81, "grad_norm": 0.71875, "learning_rate": 0.0003068318850833355, "loss": 0.2151, "step": 309330 }, { "epoch": 12.81, "grad_norm": 0.85546875, "learning_rate": 0.00030682132378008264, "loss": 0.1869, "step": 309340 }, { "epoch": 12.81, "grad_norm": 0.95703125, "learning_rate": 0.0003068107623698981, "loss": 0.2209, "step": 309350 }, { "epoch": 12.81, "grad_norm": 1.015625, "learning_rate": 0.0003068002008528019, "loss": 0.1934, "step": 309360 }, { "epoch": 12.81, "grad_norm": 1.0078125, "learning_rate": 0.0003067896392288139, "loss": 0.208, "step": 309370 }, { "epoch": 12.81, "grad_norm": 0.6640625, "learning_rate": 0.0003067790774979539, "loss": 0.2382, "step": 309380 }, { "epoch": 12.81, "grad_norm": 0.55078125, "learning_rate": 0.0003067685156602418, "loss": 0.1907, "step": 309390 }, { "epoch": 12.82, "grad_norm": 0.66796875, "learning_rate": 0.00030675795371569746, "loss": 0.1887, "step": 309400 }, { "epoch": 12.82, "grad_norm": 0.6796875, "learning_rate": 0.0003067473916643408, "loss": 0.1831, "step": 309410 }, { "epoch": 12.82, "grad_norm": 0.6328125, "learning_rate": 0.00030673682950619176, "loss": 0.1802, "step": 309420 }, { "epoch": 12.82, "grad_norm": 0.65234375, "learning_rate": 0.00030672626724126997, "loss": 0.2598, "step": 309430 }, { "epoch": 12.82, "grad_norm": 0.90625, "learning_rate": 0.00030671570486959563, "loss": 0.176, "step": 309440 }, { "epoch": 12.82, "grad_norm": 1.3359375, "learning_rate": 0.0003067051423911883, "loss": 0.1411, "step": 309450 }, { "epoch": 12.82, "grad_norm": 0.396484375, "learning_rate": 0.0003066945798060681, "loss": 0.2393, "step": 309460 }, { "epoch": 12.82, "grad_norm": 0.57421875, "learning_rate": 0.0003066840171142549, "loss": 0.1935, "step": 309470 }, { "epoch": 12.82, "grad_norm": 0.83984375, "learning_rate": 0.00030667345431576836, "loss": 0.1538, "step": 309480 }, { "epoch": 12.82, "grad_norm": 0.94140625, "learning_rate": 0.0003066628914106286, "loss": 0.1684, "step": 309490 }, { "epoch": 12.82, "grad_norm": 0.6484375, "learning_rate": 0.0003066523283988554, "loss": 0.2176, "step": 309500 }, { "epoch": 12.82, "grad_norm": 0.875, "learning_rate": 0.0003066417652804685, "loss": 0.1799, "step": 309510 }, { "epoch": 12.82, "grad_norm": 1.15625, "learning_rate": 0.00030663120205548804, "loss": 0.223, "step": 309520 }, { "epoch": 12.82, "grad_norm": 1.0625, "learning_rate": 0.00030662063872393373, "loss": 0.1595, "step": 309530 }, { "epoch": 12.82, "grad_norm": 1.1484375, "learning_rate": 0.0003066100752858255, "loss": 0.2208, "step": 309540 }, { "epoch": 12.82, "grad_norm": 0.80078125, "learning_rate": 0.0003065995117411833, "loss": 0.2059, "step": 309550 }, { "epoch": 12.82, "grad_norm": 0.53125, "learning_rate": 0.0003065889480900268, "loss": 0.2277, "step": 309560 }, { "epoch": 12.82, "grad_norm": 0.546875, "learning_rate": 0.00030657838433237604, "loss": 0.1805, "step": 309570 }, { "epoch": 12.82, "grad_norm": 1.1953125, "learning_rate": 0.000306567820468251, "loss": 0.1944, "step": 309580 }, { "epoch": 12.82, "grad_norm": 0.455078125, "learning_rate": 0.00030655725649767125, "loss": 0.211, "step": 309590 }, { "epoch": 12.82, "grad_norm": 0.83984375, "learning_rate": 0.000306546692420657, "loss": 0.1587, "step": 309600 }, { "epoch": 12.82, "grad_norm": 0.9609375, "learning_rate": 0.00030653612823722785, "loss": 0.1799, "step": 309610 }, { "epoch": 12.82, "grad_norm": 0.7890625, "learning_rate": 0.0003065255639474039, "loss": 0.196, "step": 309620 }, { "epoch": 12.82, "grad_norm": 1.40625, "learning_rate": 0.0003065149995512049, "loss": 0.1875, "step": 309630 }, { "epoch": 12.83, "grad_norm": 1.1796875, "learning_rate": 0.00030650443504865084, "loss": 0.2291, "step": 309640 }, { "epoch": 12.83, "grad_norm": 0.3515625, "learning_rate": 0.00030649387043976147, "loss": 0.1841, "step": 309650 }, { "epoch": 12.83, "grad_norm": 0.353515625, "learning_rate": 0.0003064833057245568, "loss": 0.1931, "step": 309660 }, { "epoch": 12.83, "grad_norm": 0.7734375, "learning_rate": 0.00030647274090305653, "loss": 0.1994, "step": 309670 }, { "epoch": 12.83, "grad_norm": 1.421875, "learning_rate": 0.00030646217597528076, "loss": 0.2011, "step": 309680 }, { "epoch": 12.83, "grad_norm": 0.7578125, "learning_rate": 0.00030645161094124926, "loss": 0.2145, "step": 309690 }, { "epoch": 12.83, "grad_norm": 1.5234375, "learning_rate": 0.0003064410458009819, "loss": 0.2563, "step": 309700 }, { "epoch": 12.83, "grad_norm": 0.55859375, "learning_rate": 0.00030643048055449865, "loss": 0.1721, "step": 309710 }, { "epoch": 12.83, "grad_norm": 0.9609375, "learning_rate": 0.0003064199152018192, "loss": 0.1403, "step": 309720 }, { "epoch": 12.83, "grad_norm": 1.0546875, "learning_rate": 0.00030640934974296365, "loss": 0.1938, "step": 309730 }, { "epoch": 12.83, "grad_norm": 1.1171875, "learning_rate": 0.00030639878417795184, "loss": 0.1497, "step": 309740 }, { "epoch": 12.83, "grad_norm": 0.474609375, "learning_rate": 0.00030638821850680347, "loss": 0.1841, "step": 309750 }, { "epoch": 12.83, "grad_norm": 2.28125, "learning_rate": 0.0003063776527295387, "loss": 0.1699, "step": 309760 }, { "epoch": 12.83, "grad_norm": 0.7265625, "learning_rate": 0.0003063670868461772, "loss": 0.1658, "step": 309770 }, { "epoch": 12.83, "grad_norm": 0.84765625, "learning_rate": 0.0003063565208567389, "loss": 0.1999, "step": 309780 }, { "epoch": 12.83, "grad_norm": 0.66015625, "learning_rate": 0.00030634595476124383, "loss": 0.176, "step": 309790 }, { "epoch": 12.83, "grad_norm": 1.625, "learning_rate": 0.00030633538855971167, "loss": 0.2126, "step": 309800 }, { "epoch": 12.83, "grad_norm": 1.0703125, "learning_rate": 0.0003063248222521624, "loss": 0.2265, "step": 309810 }, { "epoch": 12.83, "grad_norm": 1.3203125, "learning_rate": 0.000306314255838616, "loss": 0.1524, "step": 309820 }, { "epoch": 12.83, "grad_norm": 0.80859375, "learning_rate": 0.0003063036893190921, "loss": 0.1777, "step": 309830 }, { "epoch": 12.83, "grad_norm": 1.078125, "learning_rate": 0.0003062931226936108, "loss": 0.1762, "step": 309840 }, { "epoch": 12.83, "grad_norm": 0.65625, "learning_rate": 0.0003062825559621919, "loss": 0.19, "step": 309850 }, { "epoch": 12.83, "grad_norm": 0.7578125, "learning_rate": 0.0003062719891248553, "loss": 0.1798, "step": 309860 }, { "epoch": 12.83, "grad_norm": 0.9609375, "learning_rate": 0.000306261422181621, "loss": 0.2228, "step": 309870 }, { "epoch": 12.84, "grad_norm": 0.9375, "learning_rate": 0.0003062508551325087, "loss": 0.233, "step": 309880 }, { "epoch": 12.84, "grad_norm": 0.87109375, "learning_rate": 0.00030624028797753835, "loss": 0.1867, "step": 309890 }, { "epoch": 12.84, "grad_norm": 0.6640625, "learning_rate": 0.0003062297207167299, "loss": 0.1855, "step": 309900 }, { "epoch": 12.84, "grad_norm": 0.1435546875, "learning_rate": 0.00030621915335010306, "loss": 0.2006, "step": 309910 }, { "epoch": 12.84, "grad_norm": 1.25, "learning_rate": 0.000306208585877678, "loss": 0.1927, "step": 309920 }, { "epoch": 12.84, "grad_norm": 0.640625, "learning_rate": 0.0003061980182994744, "loss": 0.2074, "step": 309930 }, { "epoch": 12.84, "grad_norm": 0.34765625, "learning_rate": 0.00030618745061551216, "loss": 0.1208, "step": 309940 }, { "epoch": 12.84, "grad_norm": 0.734375, "learning_rate": 0.0003061768828258113, "loss": 0.1789, "step": 309950 }, { "epoch": 12.84, "grad_norm": 0.375, "learning_rate": 0.00030616631493039147, "loss": 0.1693, "step": 309960 }, { "epoch": 12.84, "grad_norm": 1.8828125, "learning_rate": 0.00030615574692927284, "loss": 0.1821, "step": 309970 }, { "epoch": 12.84, "grad_norm": 0.6953125, "learning_rate": 0.00030614517882247513, "loss": 0.1603, "step": 309980 }, { "epoch": 12.84, "grad_norm": 0.96484375, "learning_rate": 0.0003061346106100182, "loss": 0.162, "step": 309990 }, { "epoch": 12.84, "grad_norm": 1.40625, "learning_rate": 0.00030612404229192204, "loss": 0.2232, "step": 310000 }, { "epoch": 12.84, "grad_norm": 1.0625, "learning_rate": 0.0003061134738682065, "loss": 0.1751, "step": 310010 }, { "epoch": 12.84, "grad_norm": 0.95703125, "learning_rate": 0.0003061029053388914, "loss": 0.1845, "step": 310020 }, { "epoch": 12.84, "grad_norm": 0.85546875, "learning_rate": 0.00030609233670399677, "loss": 0.1724, "step": 310030 }, { "epoch": 12.84, "grad_norm": 1.4296875, "learning_rate": 0.0003060817679635423, "loss": 0.1984, "step": 310040 }, { "epoch": 12.84, "grad_norm": 0.81640625, "learning_rate": 0.0003060711991175481, "loss": 0.2183, "step": 310050 }, { "epoch": 12.84, "grad_norm": 0.7265625, "learning_rate": 0.0003060606301660339, "loss": 0.1943, "step": 310060 }, { "epoch": 12.84, "grad_norm": 1.1796875, "learning_rate": 0.0003060500611090197, "loss": 0.1532, "step": 310070 }, { "epoch": 12.84, "grad_norm": 0.94921875, "learning_rate": 0.0003060394919465254, "loss": 0.2076, "step": 310080 }, { "epoch": 12.84, "grad_norm": 0.8125, "learning_rate": 0.00030602892267857066, "loss": 0.1183, "step": 310090 }, { "epoch": 12.84, "grad_norm": 0.4609375, "learning_rate": 0.0003060183533051756, "loss": 0.1743, "step": 310100 }, { "epoch": 12.84, "grad_norm": 0.77734375, "learning_rate": 0.0003060077838263601, "loss": 0.2055, "step": 310110 }, { "epoch": 12.85, "grad_norm": 1.4296875, "learning_rate": 0.000305997214242144, "loss": 0.1914, "step": 310120 }, { "epoch": 12.85, "grad_norm": 1.0859375, "learning_rate": 0.0003059866445525472, "loss": 0.1696, "step": 310130 }, { "epoch": 12.85, "grad_norm": 0.71875, "learning_rate": 0.0003059760747575895, "loss": 0.1903, "step": 310140 }, { "epoch": 12.85, "grad_norm": 1.2421875, "learning_rate": 0.0003059655048572909, "loss": 0.2181, "step": 310150 }, { "epoch": 12.85, "grad_norm": 0.8203125, "learning_rate": 0.0003059549348516714, "loss": 0.1967, "step": 310160 }, { "epoch": 12.85, "grad_norm": 1.3515625, "learning_rate": 0.00030594436474075057, "loss": 0.173, "step": 310170 }, { "epoch": 12.85, "grad_norm": 1.0234375, "learning_rate": 0.0003059337945245485, "loss": 0.2025, "step": 310180 }, { "epoch": 12.85, "grad_norm": 0.58984375, "learning_rate": 0.0003059232242030852, "loss": 0.2199, "step": 310190 }, { "epoch": 12.85, "grad_norm": 0.486328125, "learning_rate": 0.0003059126537763803, "loss": 0.2013, "step": 310200 }, { "epoch": 12.85, "grad_norm": 1.2109375, "learning_rate": 0.0003059020832444539, "loss": 0.1521, "step": 310210 }, { "epoch": 12.85, "grad_norm": 0.9140625, "learning_rate": 0.00030589151260732576, "loss": 0.1849, "step": 310220 }, { "epoch": 12.85, "grad_norm": 0.81640625, "learning_rate": 0.00030588094186501585, "loss": 0.2206, "step": 310230 }, { "epoch": 12.85, "grad_norm": 0.75390625, "learning_rate": 0.00030587037101754413, "loss": 0.1812, "step": 310240 }, { "epoch": 12.85, "grad_norm": 1.34375, "learning_rate": 0.0003058598000649303, "loss": 0.2006, "step": 310250 }, { "epoch": 12.85, "grad_norm": 1.09375, "learning_rate": 0.00030584922900719436, "loss": 0.1716, "step": 310260 }, { "epoch": 12.85, "grad_norm": 1.015625, "learning_rate": 0.0003058386578443562, "loss": 0.1726, "step": 310270 }, { "epoch": 12.85, "grad_norm": 0.84765625, "learning_rate": 0.0003058280865764358, "loss": 0.1985, "step": 310280 }, { "epoch": 12.85, "grad_norm": 0.58984375, "learning_rate": 0.00030581751520345293, "loss": 0.1237, "step": 310290 }, { "epoch": 12.85, "grad_norm": 0.95703125, "learning_rate": 0.0003058069437254275, "loss": 0.2026, "step": 310300 }, { "epoch": 12.85, "grad_norm": 1.6484375, "learning_rate": 0.0003057963721423794, "loss": 0.1225, "step": 310310 }, { "epoch": 12.85, "grad_norm": 1.578125, "learning_rate": 0.00030578580045432857, "loss": 0.2499, "step": 310320 }, { "epoch": 12.85, "grad_norm": 0.373046875, "learning_rate": 0.00030577522866129497, "loss": 0.2103, "step": 310330 }, { "epoch": 12.85, "grad_norm": 1.1484375, "learning_rate": 0.00030576465676329835, "loss": 0.1622, "step": 310340 }, { "epoch": 12.85, "grad_norm": 0.9765625, "learning_rate": 0.00030575408476035866, "loss": 0.1891, "step": 310350 }, { "epoch": 12.86, "grad_norm": 0.46484375, "learning_rate": 0.0003057435126524958, "loss": 0.1635, "step": 310360 }, { "epoch": 12.86, "grad_norm": 1.296875, "learning_rate": 0.0003057329404397297, "loss": 0.2005, "step": 310370 }, { "epoch": 12.86, "grad_norm": 0.98046875, "learning_rate": 0.0003057223681220802, "loss": 0.185, "step": 310380 }, { "epoch": 12.86, "grad_norm": 1.265625, "learning_rate": 0.00030571179569956723, "loss": 0.163, "step": 310390 }, { "epoch": 12.86, "grad_norm": 0.66015625, "learning_rate": 0.00030570122317221066, "loss": 0.1835, "step": 310400 }, { "epoch": 12.86, "grad_norm": 1.4609375, "learning_rate": 0.0003056906505400305, "loss": 0.2005, "step": 310410 }, { "epoch": 12.86, "grad_norm": 0.6015625, "learning_rate": 0.0003056800778030464, "loss": 0.198, "step": 310420 }, { "epoch": 12.86, "grad_norm": 0.61328125, "learning_rate": 0.00030566950496127847, "loss": 0.1809, "step": 310430 }, { "epoch": 12.86, "grad_norm": 1.375, "learning_rate": 0.0003056589320147465, "loss": 0.1661, "step": 310440 }, { "epoch": 12.86, "grad_norm": 0.7734375, "learning_rate": 0.0003056483589634706, "loss": 0.21, "step": 310450 }, { "epoch": 12.86, "grad_norm": 0.6015625, "learning_rate": 0.00030563778580747035, "loss": 0.1949, "step": 310460 }, { "epoch": 12.86, "grad_norm": 1.171875, "learning_rate": 0.00030562721254676577, "loss": 0.2138, "step": 310470 }, { "epoch": 12.86, "grad_norm": 0.85546875, "learning_rate": 0.0003056166391813768, "loss": 0.2053, "step": 310480 }, { "epoch": 12.86, "grad_norm": 1.421875, "learning_rate": 0.0003056060657113234, "loss": 0.14, "step": 310490 }, { "epoch": 12.86, "grad_norm": 1.5546875, "learning_rate": 0.00030559549213662543, "loss": 0.1789, "step": 310500 }, { "epoch": 12.86, "grad_norm": 1.1015625, "learning_rate": 0.00030558491845730264, "loss": 0.1942, "step": 310510 }, { "epoch": 12.86, "grad_norm": 1.0078125, "learning_rate": 0.00030557434467337504, "loss": 0.2152, "step": 310520 }, { "epoch": 12.86, "grad_norm": 0.70703125, "learning_rate": 0.00030556377078486255, "loss": 0.1685, "step": 310530 }, { "epoch": 12.86, "grad_norm": 0.6953125, "learning_rate": 0.00030555319679178504, "loss": 0.173, "step": 310540 }, { "epoch": 12.86, "grad_norm": 0.859375, "learning_rate": 0.0003055426226941624, "loss": 0.2631, "step": 310550 }, { "epoch": 12.86, "grad_norm": 0.474609375, "learning_rate": 0.00030553204849201456, "loss": 0.1702, "step": 310560 }, { "epoch": 12.86, "grad_norm": 1.046875, "learning_rate": 0.0003055214741853614, "loss": 0.1916, "step": 310570 }, { "epoch": 12.86, "grad_norm": 1.09375, "learning_rate": 0.0003055108997742228, "loss": 0.2226, "step": 310580 }, { "epoch": 12.86, "grad_norm": 2.671875, "learning_rate": 0.0003055003252586187, "loss": 0.1892, "step": 310590 }, { "epoch": 12.87, "grad_norm": 0.671875, "learning_rate": 0.000305489750638569, "loss": 0.1794, "step": 310600 }, { "epoch": 12.87, "grad_norm": 0.70703125, "learning_rate": 0.0003054791759140935, "loss": 0.2322, "step": 310610 }, { "epoch": 12.87, "grad_norm": 0.302734375, "learning_rate": 0.0003054686010852123, "loss": 0.1641, "step": 310620 }, { "epoch": 12.87, "grad_norm": 0.1767578125, "learning_rate": 0.0003054580261519451, "loss": 0.218, "step": 310630 }, { "epoch": 12.87, "grad_norm": 0.83984375, "learning_rate": 0.0003054474511143119, "loss": 0.2126, "step": 310640 }, { "epoch": 12.87, "grad_norm": 0.216796875, "learning_rate": 0.00030543687597233267, "loss": 0.2037, "step": 310650 }, { "epoch": 12.87, "grad_norm": 0.7734375, "learning_rate": 0.00030542630072602706, "loss": 0.2059, "step": 310660 }, { "epoch": 12.87, "grad_norm": 0.32421875, "learning_rate": 0.0003054157253754153, "loss": 0.2751, "step": 310670 }, { "epoch": 12.87, "grad_norm": 1.078125, "learning_rate": 0.000305405149920517, "loss": 0.1945, "step": 310680 }, { "epoch": 12.87, "grad_norm": 1.15625, "learning_rate": 0.00030539457436135227, "loss": 0.1623, "step": 310690 }, { "epoch": 12.87, "grad_norm": 1.1796875, "learning_rate": 0.0003053839986979409, "loss": 0.1899, "step": 310700 }, { "epoch": 12.87, "grad_norm": 0.5234375, "learning_rate": 0.00030537342293030287, "loss": 0.1741, "step": 310710 }, { "epoch": 12.87, "grad_norm": 0.80859375, "learning_rate": 0.000305362847058458, "loss": 0.2391, "step": 310720 }, { "epoch": 12.87, "grad_norm": 0.470703125, "learning_rate": 0.00030535227108242625, "loss": 0.1931, "step": 310730 }, { "epoch": 12.87, "grad_norm": 0.1884765625, "learning_rate": 0.0003053416950022274, "loss": 0.2376, "step": 310740 }, { "epoch": 12.87, "grad_norm": 0.734375, "learning_rate": 0.00030533111881788163, "loss": 0.1985, "step": 310750 }, { "epoch": 12.87, "grad_norm": 0.546875, "learning_rate": 0.00030532054252940853, "loss": 0.1604, "step": 310760 }, { "epoch": 12.87, "grad_norm": 0.375, "learning_rate": 0.00030530996613682823, "loss": 0.1708, "step": 310770 }, { "epoch": 12.87, "grad_norm": 0.67578125, "learning_rate": 0.0003052993896401605, "loss": 0.1854, "step": 310780 }, { "epoch": 12.87, "grad_norm": 2.109375, "learning_rate": 0.00030528881303942536, "loss": 0.1734, "step": 310790 }, { "epoch": 12.87, "grad_norm": 0.89453125, "learning_rate": 0.00030527823633464253, "loss": 0.1656, "step": 310800 }, { "epoch": 12.87, "grad_norm": 1.53125, "learning_rate": 0.00030526765952583213, "loss": 0.2148, "step": 310810 }, { "epoch": 12.87, "grad_norm": 0.42578125, "learning_rate": 0.0003052570826130139, "loss": 0.2251, "step": 310820 }, { "epoch": 12.87, "grad_norm": 0.7109375, "learning_rate": 0.0003052465055962078, "loss": 0.192, "step": 310830 }, { "epoch": 12.87, "grad_norm": 0.74609375, "learning_rate": 0.0003052359284754338, "loss": 0.1723, "step": 310840 }, { "epoch": 12.88, "grad_norm": 1.2734375, "learning_rate": 0.00030522535125071174, "loss": 0.1749, "step": 310850 }, { "epoch": 12.88, "grad_norm": 0.73828125, "learning_rate": 0.0003052147739220615, "loss": 0.2187, "step": 310860 }, { "epoch": 12.88, "grad_norm": 2.4375, "learning_rate": 0.000305204196489503, "loss": 0.1789, "step": 310870 }, { "epoch": 12.88, "grad_norm": 0.76953125, "learning_rate": 0.00030519361895305617, "loss": 0.2476, "step": 310880 }, { "epoch": 12.88, "grad_norm": 0.73828125, "learning_rate": 0.000305183041312741, "loss": 0.2101, "step": 310890 }, { "epoch": 12.88, "grad_norm": 1.265625, "learning_rate": 0.00030517246356857717, "loss": 0.2208, "step": 310900 }, { "epoch": 12.88, "grad_norm": 1.40625, "learning_rate": 0.00030516188572058485, "loss": 0.1828, "step": 310910 }, { "epoch": 12.88, "grad_norm": 0.8671875, "learning_rate": 0.00030515130776878374, "loss": 0.2097, "step": 310920 }, { "epoch": 12.88, "grad_norm": 0.8125, "learning_rate": 0.00030514072971319386, "loss": 0.2073, "step": 310930 }, { "epoch": 12.88, "grad_norm": 0.59765625, "learning_rate": 0.00030513015155383505, "loss": 0.2041, "step": 310940 }, { "epoch": 12.88, "grad_norm": 0.62890625, "learning_rate": 0.0003051195732907272, "loss": 0.1796, "step": 310950 }, { "epoch": 12.88, "grad_norm": 0.625, "learning_rate": 0.0003051089949238904, "loss": 0.1667, "step": 310960 }, { "epoch": 12.88, "grad_norm": 0.84765625, "learning_rate": 0.0003050984164533444, "loss": 0.1715, "step": 310970 }, { "epoch": 12.88, "grad_norm": 0.67578125, "learning_rate": 0.000305087837879109, "loss": 0.1836, "step": 310980 }, { "epoch": 12.88, "grad_norm": 0.48828125, "learning_rate": 0.0003050772592012044, "loss": 0.1983, "step": 310990 }, { "epoch": 12.88, "grad_norm": 1.0703125, "learning_rate": 0.00030506668041965025, "loss": 0.2647, "step": 311000 }, { "epoch": 12.88, "grad_norm": 1.0859375, "learning_rate": 0.0003050561015344665, "loss": 0.1894, "step": 311010 }, { "epoch": 12.88, "grad_norm": 1.03125, "learning_rate": 0.0003050455225456732, "loss": 0.209, "step": 311020 }, { "epoch": 12.88, "grad_norm": 0.7734375, "learning_rate": 0.00030503494345329014, "loss": 0.2134, "step": 311030 }, { "epoch": 12.88, "grad_norm": 0.88671875, "learning_rate": 0.00030502436425733733, "loss": 0.1895, "step": 311040 }, { "epoch": 12.88, "grad_norm": 0.93359375, "learning_rate": 0.00030501378495783455, "loss": 0.2107, "step": 311050 }, { "epoch": 12.88, "grad_norm": 0.7421875, "learning_rate": 0.00030500320555480173, "loss": 0.1705, "step": 311060 }, { "epoch": 12.88, "grad_norm": 1.0, "learning_rate": 0.0003049926260482589, "loss": 0.1914, "step": 311070 }, { "epoch": 12.88, "grad_norm": 1.0703125, "learning_rate": 0.0003049820464382258, "loss": 0.192, "step": 311080 }, { "epoch": 12.89, "grad_norm": 1.015625, "learning_rate": 0.0003049714667247225, "loss": 0.253, "step": 311090 }, { "epoch": 12.89, "grad_norm": 0.60546875, "learning_rate": 0.0003049608869077688, "loss": 0.2231, "step": 311100 }, { "epoch": 12.89, "grad_norm": 0.6484375, "learning_rate": 0.0003049503069873846, "loss": 0.2285, "step": 311110 }, { "epoch": 12.89, "grad_norm": 0.97265625, "learning_rate": 0.00030493972696358997, "loss": 0.1891, "step": 311120 }, { "epoch": 12.89, "grad_norm": 0.58203125, "learning_rate": 0.0003049291468364046, "loss": 0.2129, "step": 311130 }, { "epoch": 12.89, "grad_norm": 0.58984375, "learning_rate": 0.00030491856660584855, "loss": 0.1784, "step": 311140 }, { "epoch": 12.89, "grad_norm": 0.69140625, "learning_rate": 0.0003049079862719417, "loss": 0.1919, "step": 311150 }, { "epoch": 12.89, "grad_norm": 1.140625, "learning_rate": 0.00030489740583470394, "loss": 0.2006, "step": 311160 }, { "epoch": 12.89, "grad_norm": 0.80859375, "learning_rate": 0.00030488682529415515, "loss": 0.2164, "step": 311170 }, { "epoch": 12.89, "grad_norm": 0.52734375, "learning_rate": 0.0003048762446503154, "loss": 0.2296, "step": 311180 }, { "epoch": 12.89, "grad_norm": 0.482421875, "learning_rate": 0.00030486566390320436, "loss": 0.1607, "step": 311190 }, { "epoch": 12.89, "grad_norm": 0.96875, "learning_rate": 0.00030485508305284214, "loss": 0.2241, "step": 311200 }, { "epoch": 12.89, "grad_norm": 0.5234375, "learning_rate": 0.00030484450209924854, "loss": 0.1784, "step": 311210 }, { "epoch": 12.89, "grad_norm": 0.59375, "learning_rate": 0.0003048339210424435, "loss": 0.2045, "step": 311220 }, { "epoch": 12.89, "grad_norm": 0.859375, "learning_rate": 0.000304823339882447, "loss": 0.1894, "step": 311230 }, { "epoch": 12.89, "grad_norm": 0.255859375, "learning_rate": 0.00030481275861927884, "loss": 0.2085, "step": 311240 }, { "epoch": 12.89, "grad_norm": 0.6640625, "learning_rate": 0.000304802177252959, "loss": 0.1632, "step": 311250 }, { "epoch": 12.89, "grad_norm": 2.078125, "learning_rate": 0.0003047915957835074, "loss": 0.1707, "step": 311260 }, { "epoch": 12.89, "grad_norm": 1.390625, "learning_rate": 0.0003047810142109439, "loss": 0.1894, "step": 311270 }, { "epoch": 12.89, "grad_norm": 0.8515625, "learning_rate": 0.00030477043253528845, "loss": 0.2237, "step": 311280 }, { "epoch": 12.89, "grad_norm": 0.87890625, "learning_rate": 0.000304759850756561, "loss": 0.1874, "step": 311290 }, { "epoch": 12.89, "grad_norm": 1.3828125, "learning_rate": 0.00030474926887478137, "loss": 0.246, "step": 311300 }, { "epoch": 12.89, "grad_norm": 2.359375, "learning_rate": 0.0003047386868899696, "loss": 0.2243, "step": 311310 }, { "epoch": 12.89, "grad_norm": 0.578125, "learning_rate": 0.00030472810480214546, "loss": 0.1988, "step": 311320 }, { "epoch": 12.9, "grad_norm": 1.1328125, "learning_rate": 0.00030471752261132897, "loss": 0.1781, "step": 311330 }, { "epoch": 12.9, "grad_norm": 0.6875, "learning_rate": 0.00030470694031754005, "loss": 0.1851, "step": 311340 }, { "epoch": 12.9, "grad_norm": 0.546875, "learning_rate": 0.0003046963579207985, "loss": 0.2187, "step": 311350 }, { "epoch": 12.9, "grad_norm": 0.6015625, "learning_rate": 0.00030468577542112435, "loss": 0.2316, "step": 311360 }, { "epoch": 12.9, "grad_norm": 0.73828125, "learning_rate": 0.00030467519281853747, "loss": 0.214, "step": 311370 }, { "epoch": 12.9, "grad_norm": 0.69921875, "learning_rate": 0.0003046646101130578, "loss": 0.201, "step": 311380 }, { "epoch": 12.9, "grad_norm": 1.53125, "learning_rate": 0.0003046540273047053, "loss": 0.1802, "step": 311390 }, { "epoch": 12.9, "grad_norm": 1.578125, "learning_rate": 0.00030464344439349965, "loss": 0.2461, "step": 311400 }, { "epoch": 12.9, "grad_norm": 1.2421875, "learning_rate": 0.0003046328613794611, "loss": 0.2515, "step": 311410 }, { "epoch": 12.9, "grad_norm": 0.75390625, "learning_rate": 0.00030462227826260924, "loss": 0.2108, "step": 311420 }, { "epoch": 12.9, "grad_norm": 0.79296875, "learning_rate": 0.00030461169504296425, "loss": 0.1738, "step": 311430 }, { "epoch": 12.9, "grad_norm": 0.7890625, "learning_rate": 0.000304601111720546, "loss": 0.1952, "step": 311440 }, { "epoch": 12.9, "grad_norm": 0.9140625, "learning_rate": 0.0003045905282953743, "loss": 0.1822, "step": 311450 }, { "epoch": 12.9, "grad_norm": 1.2109375, "learning_rate": 0.0003045799447674691, "loss": 0.1787, "step": 311460 }, { "epoch": 12.9, "grad_norm": 0.90625, "learning_rate": 0.00030456936113685044, "loss": 0.2526, "step": 311470 }, { "epoch": 12.9, "grad_norm": 0.53515625, "learning_rate": 0.000304558777403538, "loss": 0.2013, "step": 311480 }, { "epoch": 12.9, "grad_norm": 1.640625, "learning_rate": 0.0003045481935675519, "loss": 0.1721, "step": 311490 }, { "epoch": 12.9, "grad_norm": 2.09375, "learning_rate": 0.00030453760962891193, "loss": 0.2464, "step": 311500 }, { "epoch": 12.9, "grad_norm": 0.53125, "learning_rate": 0.0003045270255876381, "loss": 0.2425, "step": 311510 }, { "epoch": 12.9, "grad_norm": 0.79296875, "learning_rate": 0.0003045164414437503, "loss": 0.1963, "step": 311520 }, { "epoch": 12.9, "grad_norm": 0.431640625, "learning_rate": 0.00030450585719726846, "loss": 0.2227, "step": 311530 }, { "epoch": 12.9, "grad_norm": 1.4375, "learning_rate": 0.0003044952728482125, "loss": 0.1555, "step": 311540 }, { "epoch": 12.9, "grad_norm": 0.65625, "learning_rate": 0.0003044846883966023, "loss": 0.1969, "step": 311550 }, { "epoch": 12.9, "grad_norm": 0.921875, "learning_rate": 0.00030447410384245785, "loss": 0.2058, "step": 311560 }, { "epoch": 12.91, "grad_norm": 0.671875, "learning_rate": 0.00030446351918579894, "loss": 0.2019, "step": 311570 }, { "epoch": 12.91, "grad_norm": 0.8515625, "learning_rate": 0.0003044529344266456, "loss": 0.1912, "step": 311580 }, { "epoch": 12.91, "grad_norm": 0.8359375, "learning_rate": 0.0003044423495650177, "loss": 0.2369, "step": 311590 }, { "epoch": 12.91, "grad_norm": 1.078125, "learning_rate": 0.00030443176460093527, "loss": 0.1815, "step": 311600 }, { "epoch": 12.91, "grad_norm": 0.8671875, "learning_rate": 0.000304421179534418, "loss": 0.2175, "step": 311610 }, { "epoch": 12.91, "grad_norm": 1.25, "learning_rate": 0.000304410594365486, "loss": 0.2013, "step": 311620 }, { "epoch": 12.91, "grad_norm": 0.341796875, "learning_rate": 0.0003044000090941591, "loss": 0.1644, "step": 311630 }, { "epoch": 12.91, "grad_norm": 0.69140625, "learning_rate": 0.0003043894237204573, "loss": 0.2346, "step": 311640 }, { "epoch": 12.91, "grad_norm": 1.5234375, "learning_rate": 0.00030437883824440055, "loss": 0.2201, "step": 311650 }, { "epoch": 12.91, "grad_norm": 0.90234375, "learning_rate": 0.00030436825266600857, "loss": 0.1734, "step": 311660 }, { "epoch": 12.91, "grad_norm": 1.1875, "learning_rate": 0.00030435766698530146, "loss": 0.2146, "step": 311670 }, { "epoch": 12.91, "grad_norm": 0.65625, "learning_rate": 0.0003043470812022991, "loss": 0.1999, "step": 311680 }, { "epoch": 12.91, "grad_norm": 0.85546875, "learning_rate": 0.00030433649531702144, "loss": 0.2429, "step": 311690 }, { "epoch": 12.91, "grad_norm": 2.625, "learning_rate": 0.0003043259093294883, "loss": 0.2007, "step": 311700 }, { "epoch": 12.91, "grad_norm": 1.28125, "learning_rate": 0.0003043153232397197, "loss": 0.206, "step": 311710 }, { "epoch": 12.91, "grad_norm": 0.5078125, "learning_rate": 0.00030430473704773554, "loss": 0.1805, "step": 311720 }, { "epoch": 12.91, "grad_norm": 1.1953125, "learning_rate": 0.00030429415075355573, "loss": 0.1858, "step": 311730 }, { "epoch": 12.91, "grad_norm": 0.734375, "learning_rate": 0.00030428356435720016, "loss": 0.1808, "step": 311740 }, { "epoch": 12.91, "grad_norm": 0.84375, "learning_rate": 0.0003042729778586888, "loss": 0.2048, "step": 311750 }, { "epoch": 12.91, "grad_norm": 0.78125, "learning_rate": 0.00030426239125804156, "loss": 0.2371, "step": 311760 }, { "epoch": 12.91, "grad_norm": 0.90234375, "learning_rate": 0.00030425180455527837, "loss": 0.185, "step": 311770 }, { "epoch": 12.91, "grad_norm": 0.40234375, "learning_rate": 0.00030424121775041906, "loss": 0.2157, "step": 311780 }, { "epoch": 12.91, "grad_norm": 0.53515625, "learning_rate": 0.0003042306308434837, "loss": 0.2134, "step": 311790 }, { "epoch": 12.91, "grad_norm": 0.62890625, "learning_rate": 0.00030422004383449215, "loss": 0.1551, "step": 311800 }, { "epoch": 12.92, "grad_norm": 0.51171875, "learning_rate": 0.00030420945672346433, "loss": 0.1834, "step": 311810 }, { "epoch": 12.92, "grad_norm": 0.435546875, "learning_rate": 0.0003041988695104202, "loss": 0.2033, "step": 311820 }, { "epoch": 12.92, "grad_norm": 0.87890625, "learning_rate": 0.0003041882821953796, "loss": 0.1638, "step": 311830 }, { "epoch": 12.92, "grad_norm": 0.9765625, "learning_rate": 0.0003041776947783625, "loss": 0.2245, "step": 311840 }, { "epoch": 12.92, "grad_norm": 0.72265625, "learning_rate": 0.0003041671072593889, "loss": 0.2391, "step": 311850 }, { "epoch": 12.92, "grad_norm": 1.34375, "learning_rate": 0.00030415651963847855, "loss": 0.144, "step": 311860 }, { "epoch": 12.92, "grad_norm": 0.0, "learning_rate": 0.00030414593191565155, "loss": 0.1868, "step": 311870 }, { "epoch": 12.92, "grad_norm": 1.1953125, "learning_rate": 0.0003041353440909277, "loss": 0.2105, "step": 311880 }, { "epoch": 12.92, "grad_norm": 0.52734375, "learning_rate": 0.0003041247561643271, "loss": 0.1679, "step": 311890 }, { "epoch": 12.92, "grad_norm": 1.1484375, "learning_rate": 0.00030411416813586945, "loss": 0.2217, "step": 311900 }, { "epoch": 12.92, "grad_norm": 1.0234375, "learning_rate": 0.0003041035800055748, "loss": 0.2138, "step": 311910 }, { "epoch": 12.92, "grad_norm": 0.4609375, "learning_rate": 0.000304092991773463, "loss": 0.1744, "step": 311920 }, { "epoch": 12.92, "grad_norm": 0.419921875, "learning_rate": 0.0003040824034395541, "loss": 0.1526, "step": 311930 }, { "epoch": 12.92, "grad_norm": 1.2109375, "learning_rate": 0.00030407181500386794, "loss": 0.1611, "step": 311940 }, { "epoch": 12.92, "grad_norm": 1.2421875, "learning_rate": 0.0003040612264664245, "loss": 0.2273, "step": 311950 }, { "epoch": 12.92, "grad_norm": 0.76953125, "learning_rate": 0.0003040506378272436, "loss": 0.1617, "step": 311960 }, { "epoch": 12.92, "grad_norm": 1.21875, "learning_rate": 0.0003040400490863453, "loss": 0.164, "step": 311970 }, { "epoch": 12.92, "grad_norm": 1.515625, "learning_rate": 0.00030402946024374946, "loss": 0.1783, "step": 311980 }, { "epoch": 12.92, "grad_norm": 0.53125, "learning_rate": 0.0003040188712994759, "loss": 0.1888, "step": 311990 }, { "epoch": 12.92, "grad_norm": 0.62890625, "learning_rate": 0.0003040082822535448, "loss": 0.216, "step": 312000 }, { "epoch": 12.92, "grad_norm": 0.69140625, "learning_rate": 0.0003039976931059759, "loss": 0.1737, "step": 312010 }, { "epoch": 12.92, "grad_norm": 0.369140625, "learning_rate": 0.00030398710385678914, "loss": 0.1222, "step": 312020 }, { "epoch": 12.92, "grad_norm": 1.078125, "learning_rate": 0.0003039765145060045, "loss": 0.2165, "step": 312030 }, { "epoch": 12.92, "grad_norm": 0.369140625, "learning_rate": 0.0003039659250536418, "loss": 0.2193, "step": 312040 }, { "epoch": 12.93, "grad_norm": 0.390625, "learning_rate": 0.0003039553354997212, "loss": 0.1693, "step": 312050 }, { "epoch": 12.93, "grad_norm": 0.78125, "learning_rate": 0.00030394474584426244, "loss": 0.2096, "step": 312060 }, { "epoch": 12.93, "grad_norm": 0.5703125, "learning_rate": 0.0003039341560872855, "loss": 0.1685, "step": 312070 }, { "epoch": 12.93, "grad_norm": 0.77734375, "learning_rate": 0.0003039235662288103, "loss": 0.1558, "step": 312080 }, { "epoch": 12.93, "grad_norm": 0.498046875, "learning_rate": 0.00030391297626885676, "loss": 0.2156, "step": 312090 }, { "epoch": 12.93, "grad_norm": 0.318359375, "learning_rate": 0.0003039023862074448, "loss": 0.2002, "step": 312100 }, { "epoch": 12.93, "grad_norm": 0.9765625, "learning_rate": 0.00030389179604459437, "loss": 0.205, "step": 312110 }, { "epoch": 12.93, "grad_norm": 0.7890625, "learning_rate": 0.0003038812057803255, "loss": 0.186, "step": 312120 }, { "epoch": 12.93, "grad_norm": 0.71875, "learning_rate": 0.0003038706154146579, "loss": 0.1863, "step": 312130 }, { "epoch": 12.93, "grad_norm": 0.83203125, "learning_rate": 0.00030386002494761167, "loss": 0.1776, "step": 312140 }, { "epoch": 12.93, "grad_norm": 1.203125, "learning_rate": 0.0003038494343792066, "loss": 0.173, "step": 312150 }, { "epoch": 12.93, "grad_norm": 0.36328125, "learning_rate": 0.0003038388437094629, "loss": 0.1741, "step": 312160 }, { "epoch": 12.93, "grad_norm": 1.0625, "learning_rate": 0.0003038282529384002, "loss": 0.2046, "step": 312170 }, { "epoch": 12.93, "grad_norm": 0.953125, "learning_rate": 0.00030381766206603845, "loss": 0.1554, "step": 312180 }, { "epoch": 12.93, "grad_norm": 0.59375, "learning_rate": 0.0003038070710923978, "loss": 0.1712, "step": 312190 }, { "epoch": 12.93, "grad_norm": 0.75390625, "learning_rate": 0.00030379648001749803, "loss": 0.1631, "step": 312200 }, { "epoch": 12.93, "grad_norm": 1.046875, "learning_rate": 0.00030378588884135907, "loss": 0.1957, "step": 312210 }, { "epoch": 12.93, "grad_norm": 1.0703125, "learning_rate": 0.0003037752975640009, "loss": 0.1271, "step": 312220 }, { "epoch": 12.93, "grad_norm": 0.80078125, "learning_rate": 0.0003037647061854434, "loss": 0.1578, "step": 312230 }, { "epoch": 12.93, "grad_norm": 0.00038909912109375, "learning_rate": 0.0003037541147057065, "loss": 0.1925, "step": 312240 }, { "epoch": 12.93, "grad_norm": 0.59765625, "learning_rate": 0.00030374352312481027, "loss": 0.2081, "step": 312250 }, { "epoch": 12.93, "grad_norm": 0.671875, "learning_rate": 0.00030373293144277444, "loss": 0.19, "step": 312260 }, { "epoch": 12.93, "grad_norm": 0.61328125, "learning_rate": 0.00030372233965961904, "loss": 0.1754, "step": 312270 }, { "epoch": 12.93, "grad_norm": 1.7109375, "learning_rate": 0.00030371174777536406, "loss": 0.174, "step": 312280 }, { "epoch": 12.94, "grad_norm": 1.015625, "learning_rate": 0.0003037011557900293, "loss": 0.1494, "step": 312290 }, { "epoch": 12.94, "grad_norm": 1.3828125, "learning_rate": 0.0003036905637036348, "loss": 0.2108, "step": 312300 }, { "epoch": 12.94, "grad_norm": 2.34375, "learning_rate": 0.00030367997151620045, "loss": 0.1916, "step": 312310 }, { "epoch": 12.94, "grad_norm": 1.2109375, "learning_rate": 0.00030366937922774617, "loss": 0.1865, "step": 312320 }, { "epoch": 12.94, "grad_norm": 0.69921875, "learning_rate": 0.000303658786838292, "loss": 0.22, "step": 312330 }, { "epoch": 12.94, "grad_norm": 0.78515625, "learning_rate": 0.00030364819434785764, "loss": 0.1907, "step": 312340 }, { "epoch": 12.94, "grad_norm": 0.765625, "learning_rate": 0.00030363760175646334, "loss": 0.2043, "step": 312350 }, { "epoch": 12.94, "grad_norm": 0.30859375, "learning_rate": 0.00030362700906412873, "loss": 0.2055, "step": 312360 }, { "epoch": 12.94, "grad_norm": 0.8984375, "learning_rate": 0.00030361641627087393, "loss": 0.1751, "step": 312370 }, { "epoch": 12.94, "grad_norm": 0.490234375, "learning_rate": 0.0003036058233767189, "loss": 0.2017, "step": 312380 }, { "epoch": 12.94, "grad_norm": 1.2421875, "learning_rate": 0.0003035952303816833, "loss": 0.1992, "step": 312390 }, { "epoch": 12.94, "grad_norm": 0.490234375, "learning_rate": 0.00030358463728578743, "loss": 0.16, "step": 312400 }, { "epoch": 12.94, "grad_norm": 0.625, "learning_rate": 0.00030357404408905107, "loss": 0.1528, "step": 312410 }, { "epoch": 12.94, "grad_norm": 0.486328125, "learning_rate": 0.00030356345079149405, "loss": 0.2242, "step": 312420 }, { "epoch": 12.94, "grad_norm": 0.99609375, "learning_rate": 0.0003035528573931365, "loss": 0.1682, "step": 312430 }, { "epoch": 12.94, "grad_norm": 1.7265625, "learning_rate": 0.00030354226389399814, "loss": 0.2065, "step": 312440 }, { "epoch": 12.94, "grad_norm": 1.0625, "learning_rate": 0.00030353167029409904, "loss": 0.1732, "step": 312450 }, { "epoch": 12.94, "grad_norm": 1.59375, "learning_rate": 0.00030352107659345923, "loss": 0.2129, "step": 312460 }, { "epoch": 12.94, "grad_norm": 0.6015625, "learning_rate": 0.0003035104827920984, "loss": 0.1893, "step": 312470 }, { "epoch": 12.94, "grad_norm": 0.9296875, "learning_rate": 0.0003034998888900367, "loss": 0.1761, "step": 312480 }, { "epoch": 12.94, "grad_norm": 0.78125, "learning_rate": 0.00030348929488729405, "loss": 0.191, "step": 312490 }, { "epoch": 12.94, "grad_norm": 1.421875, "learning_rate": 0.00030347870078389016, "loss": 0.1637, "step": 312500 }, { "epoch": 12.94, "grad_norm": 1.140625, "learning_rate": 0.00030346810657984525, "loss": 0.2206, "step": 312510 }, { "epoch": 12.94, "grad_norm": 0.8203125, "learning_rate": 0.00030345751227517907, "loss": 0.1878, "step": 312520 }, { "epoch": 12.94, "grad_norm": 0.423828125, "learning_rate": 0.0003034469178699116, "loss": 0.1814, "step": 312530 }, { "epoch": 12.95, "grad_norm": 1.0859375, "learning_rate": 0.000303436323364063, "loss": 0.164, "step": 312540 }, { "epoch": 12.95, "grad_norm": 0.302734375, "learning_rate": 0.0003034257287576527, "loss": 0.2323, "step": 312550 }, { "epoch": 12.95, "grad_norm": 0.38671875, "learning_rate": 0.0003034151340507012, "loss": 0.1283, "step": 312560 }, { "epoch": 12.95, "grad_norm": 0.5703125, "learning_rate": 0.00030340453924322815, "loss": 0.1768, "step": 312570 }, { "epoch": 12.95, "grad_norm": 0.82421875, "learning_rate": 0.0003033939443352534, "loss": 0.1961, "step": 312580 }, { "epoch": 12.95, "grad_norm": 0.65234375, "learning_rate": 0.0003033833493267971, "loss": 0.1828, "step": 312590 }, { "epoch": 12.95, "grad_norm": 0.8203125, "learning_rate": 0.0003033727542178791, "loss": 0.168, "step": 312600 }, { "epoch": 12.95, "grad_norm": 1.4609375, "learning_rate": 0.0003033621590085193, "loss": 0.2253, "step": 312610 }, { "epoch": 12.95, "grad_norm": 1.03125, "learning_rate": 0.0003033515636987378, "loss": 0.2673, "step": 312620 }, { "epoch": 12.95, "grad_norm": 1.6796875, "learning_rate": 0.00030334096828855424, "loss": 0.1907, "step": 312630 }, { "epoch": 12.95, "grad_norm": 0.41796875, "learning_rate": 0.00030333037277798885, "loss": 0.191, "step": 312640 }, { "epoch": 12.95, "grad_norm": 1.6484375, "learning_rate": 0.0003033197771670615, "loss": 0.1647, "step": 312650 }, { "epoch": 12.95, "grad_norm": 0.7265625, "learning_rate": 0.00030330918145579194, "loss": 0.1593, "step": 312660 }, { "epoch": 12.95, "grad_norm": 0.515625, "learning_rate": 0.00030329858564420037, "loss": 0.1637, "step": 312670 }, { "epoch": 12.95, "grad_norm": 0.482421875, "learning_rate": 0.0003032879897323066, "loss": 0.2331, "step": 312680 }, { "epoch": 12.95, "grad_norm": 0.8515625, "learning_rate": 0.0003032773937201305, "loss": 0.208, "step": 312690 }, { "epoch": 12.95, "grad_norm": 0.36328125, "learning_rate": 0.00030326679760769225, "loss": 0.2139, "step": 312700 }, { "epoch": 12.95, "grad_norm": 1.0078125, "learning_rate": 0.00030325620139501146, "loss": 0.1762, "step": 312710 }, { "epoch": 12.95, "grad_norm": 1.421875, "learning_rate": 0.0003032456050821084, "loss": 0.1768, "step": 312720 }, { "epoch": 12.95, "grad_norm": 0.318359375, "learning_rate": 0.0003032350086690029, "loss": 0.2076, "step": 312730 }, { "epoch": 12.95, "grad_norm": 0.6015625, "learning_rate": 0.00030322441215571466, "loss": 0.1983, "step": 312740 }, { "epoch": 12.95, "grad_norm": 0.734375, "learning_rate": 0.000303213815542264, "loss": 0.1778, "step": 312750 }, { "epoch": 12.95, "grad_norm": 0.78125, "learning_rate": 0.0003032032188286706, "loss": 0.2172, "step": 312760 }, { "epoch": 12.95, "grad_norm": 0.1875, "learning_rate": 0.0003031926220149545, "loss": 0.2284, "step": 312770 }, { "epoch": 12.96, "grad_norm": 0.6796875, "learning_rate": 0.0003031820251011357, "loss": 0.1713, "step": 312780 }, { "epoch": 12.96, "grad_norm": 0.63671875, "learning_rate": 0.000303171428087234, "loss": 0.1718, "step": 312790 }, { "epoch": 12.96, "grad_norm": 0.83203125, "learning_rate": 0.00030316083097326943, "loss": 0.2251, "step": 312800 }, { "epoch": 12.96, "grad_norm": 0.388671875, "learning_rate": 0.00030315023375926186, "loss": 0.1914, "step": 312810 }, { "epoch": 12.96, "grad_norm": 1.3125, "learning_rate": 0.00030313963644523133, "loss": 0.2288, "step": 312820 }, { "epoch": 12.96, "grad_norm": 1.234375, "learning_rate": 0.00030312903903119783, "loss": 0.1471, "step": 312830 }, { "epoch": 12.96, "grad_norm": 0.859375, "learning_rate": 0.00030311844151718117, "loss": 0.2272, "step": 312840 }, { "epoch": 12.96, "grad_norm": 0.8984375, "learning_rate": 0.0003031078439032013, "loss": 0.1606, "step": 312850 }, { "epoch": 12.96, "grad_norm": 1.4375, "learning_rate": 0.0003030972461892782, "loss": 0.2044, "step": 312860 }, { "epoch": 12.96, "grad_norm": 0.70703125, "learning_rate": 0.0003030866483754318, "loss": 0.176, "step": 312870 }, { "epoch": 12.96, "grad_norm": 0.87890625, "learning_rate": 0.00030307605046168214, "loss": 0.2151, "step": 312880 }, { "epoch": 12.96, "grad_norm": 0.53515625, "learning_rate": 0.000303065452448049, "loss": 0.2068, "step": 312890 }, { "epoch": 12.96, "grad_norm": 0.388671875, "learning_rate": 0.00030305485433455246, "loss": 0.1483, "step": 312900 }, { "epoch": 12.96, "grad_norm": 1.609375, "learning_rate": 0.00030304425612121244, "loss": 0.1848, "step": 312910 }, { "epoch": 12.96, "grad_norm": 0.43359375, "learning_rate": 0.00030303365780804883, "loss": 0.1705, "step": 312920 }, { "epoch": 12.96, "grad_norm": 0.3671875, "learning_rate": 0.0003030230593950815, "loss": 0.2099, "step": 312930 }, { "epoch": 12.96, "grad_norm": 0.7890625, "learning_rate": 0.0003030124608823306, "loss": 0.2164, "step": 312940 }, { "epoch": 12.96, "grad_norm": 1.71875, "learning_rate": 0.00030300186226981594, "loss": 0.2178, "step": 312950 }, { "epoch": 12.96, "grad_norm": 1.1015625, "learning_rate": 0.0003029912635575576, "loss": 0.2025, "step": 312960 }, { "epoch": 12.96, "grad_norm": 0.72265625, "learning_rate": 0.0003029806647455753, "loss": 0.1606, "step": 312970 }, { "epoch": 12.96, "grad_norm": 0.671875, "learning_rate": 0.00030297006583388913, "loss": 0.2154, "step": 312980 }, { "epoch": 12.96, "grad_norm": 0.0, "learning_rate": 0.0003029594668225191, "loss": 0.1726, "step": 312990 }, { "epoch": 12.96, "grad_norm": 1.5078125, "learning_rate": 0.000302948867711485, "loss": 0.1751, "step": 313000 }, { "epoch": 12.96, "grad_norm": 2.421875, "learning_rate": 0.00030293826850080687, "loss": 0.2008, "step": 313010 }, { "epoch": 12.97, "grad_norm": 0.73046875, "learning_rate": 0.00030292766919050464, "loss": 0.1766, "step": 313020 }, { "epoch": 12.97, "grad_norm": 0.74609375, "learning_rate": 0.00030291706978059817, "loss": 0.155, "step": 313030 }, { "epoch": 12.97, "grad_norm": 0.6015625, "learning_rate": 0.0003029064702711076, "loss": 0.1466, "step": 313040 }, { "epoch": 12.97, "grad_norm": 0.86328125, "learning_rate": 0.0003028958706620528, "loss": 0.2417, "step": 313050 }, { "epoch": 12.97, "grad_norm": 0.88671875, "learning_rate": 0.0003028852709534536, "loss": 0.192, "step": 313060 }, { "epoch": 12.97, "grad_norm": 0.92578125, "learning_rate": 0.0003028746711453301, "loss": 0.1212, "step": 313070 }, { "epoch": 12.97, "grad_norm": 0.875, "learning_rate": 0.00030286407123770206, "loss": 0.2015, "step": 313080 }, { "epoch": 12.97, "grad_norm": 1.1015625, "learning_rate": 0.0003028534712305896, "loss": 0.2933, "step": 313090 }, { "epoch": 12.97, "grad_norm": 0.5078125, "learning_rate": 0.0003028428711240126, "loss": 0.185, "step": 313100 }, { "epoch": 12.97, "grad_norm": 2.21875, "learning_rate": 0.00030283227091799106, "loss": 0.2188, "step": 313110 }, { "epoch": 12.97, "grad_norm": 0.640625, "learning_rate": 0.00030282167061254487, "loss": 0.2219, "step": 313120 }, { "epoch": 12.97, "grad_norm": 1.2578125, "learning_rate": 0.00030281107020769404, "loss": 0.2082, "step": 313130 }, { "epoch": 12.97, "grad_norm": 0.640625, "learning_rate": 0.0003028004697034584, "loss": 0.2045, "step": 313140 }, { "epoch": 12.97, "grad_norm": 0.447265625, "learning_rate": 0.0003027898690998581, "loss": 0.2024, "step": 313150 }, { "epoch": 12.97, "grad_norm": 0.46484375, "learning_rate": 0.0003027792683969129, "loss": 0.1822, "step": 313160 }, { "epoch": 12.97, "grad_norm": 0.30078125, "learning_rate": 0.0003027686675946428, "loss": 0.2144, "step": 313170 }, { "epoch": 12.97, "grad_norm": 1.203125, "learning_rate": 0.0003027580666930677, "loss": 0.2144, "step": 313180 }, { "epoch": 12.97, "grad_norm": 1.2265625, "learning_rate": 0.0003027474656922077, "loss": 0.1289, "step": 313190 }, { "epoch": 12.97, "grad_norm": 0.859375, "learning_rate": 0.0003027368645920827, "loss": 0.1539, "step": 313200 }, { "epoch": 12.97, "grad_norm": 0.34765625, "learning_rate": 0.0003027262633927126, "loss": 0.1835, "step": 313210 }, { "epoch": 12.97, "grad_norm": 0.27734375, "learning_rate": 0.0003027156620941173, "loss": 0.2082, "step": 313220 }, { "epoch": 12.97, "grad_norm": 0.53125, "learning_rate": 0.0003027050606963168, "loss": 0.1818, "step": 313230 }, { "epoch": 12.97, "grad_norm": 0.984375, "learning_rate": 0.0003026944591993312, "loss": 0.1712, "step": 313240 }, { "epoch": 12.97, "grad_norm": 0.68359375, "learning_rate": 0.00030268385760318025, "loss": 0.2009, "step": 313250 }, { "epoch": 12.98, "grad_norm": 1.25, "learning_rate": 0.0003026732559078839, "loss": 0.2078, "step": 313260 }, { "epoch": 12.98, "grad_norm": 1.03125, "learning_rate": 0.0003026626541134622, "loss": 0.1748, "step": 313270 }, { "epoch": 12.98, "grad_norm": 1.2734375, "learning_rate": 0.0003026520522199352, "loss": 0.2129, "step": 313280 }, { "epoch": 12.98, "grad_norm": 0.71875, "learning_rate": 0.0003026414502273226, "loss": 0.1781, "step": 313290 }, { "epoch": 12.98, "grad_norm": 1.203125, "learning_rate": 0.00030263084813564453, "loss": 0.177, "step": 313300 }, { "epoch": 12.98, "grad_norm": 0.2109375, "learning_rate": 0.0003026202459449208, "loss": 0.1661, "step": 313310 }, { "epoch": 12.98, "grad_norm": 0.8359375, "learning_rate": 0.00030260964365517155, "loss": 0.2079, "step": 313320 }, { "epoch": 12.98, "grad_norm": 0.0, "learning_rate": 0.0003025990412664166, "loss": 0.1924, "step": 313330 }, { "epoch": 12.98, "grad_norm": 1.0859375, "learning_rate": 0.00030258843877867595, "loss": 0.1093, "step": 313340 }, { "epoch": 12.98, "grad_norm": 0.59375, "learning_rate": 0.00030257783619196945, "loss": 0.1538, "step": 313350 }, { "epoch": 12.98, "grad_norm": 1.234375, "learning_rate": 0.0003025672335063172, "loss": 0.1959, "step": 313360 }, { "epoch": 12.98, "grad_norm": 1.265625, "learning_rate": 0.0003025566307217391, "loss": 0.1959, "step": 313370 }, { "epoch": 12.98, "grad_norm": 0.78515625, "learning_rate": 0.00030254602783825514, "loss": 0.2191, "step": 313380 }, { "epoch": 12.98, "grad_norm": 0.7890625, "learning_rate": 0.00030253542485588516, "loss": 0.2154, "step": 313390 }, { "epoch": 12.98, "grad_norm": 1.4296875, "learning_rate": 0.00030252482177464923, "loss": 0.21, "step": 313400 }, { "epoch": 12.98, "grad_norm": 1.21875, "learning_rate": 0.00030251421859456724, "loss": 0.2101, "step": 313410 }, { "epoch": 12.98, "grad_norm": 0.6015625, "learning_rate": 0.00030250361531565913, "loss": 0.2238, "step": 313420 }, { "epoch": 12.98, "grad_norm": 0.5703125, "learning_rate": 0.00030249301193794486, "loss": 0.2593, "step": 313430 }, { "epoch": 12.98, "grad_norm": 0.45703125, "learning_rate": 0.0003024824084614445, "loss": 0.2205, "step": 313440 }, { "epoch": 12.98, "grad_norm": 0.9140625, "learning_rate": 0.0003024718048861779, "loss": 0.1705, "step": 313450 }, { "epoch": 12.98, "grad_norm": 1.2734375, "learning_rate": 0.000302461201212165, "loss": 0.1936, "step": 313460 }, { "epoch": 12.98, "grad_norm": 1.5234375, "learning_rate": 0.0003024505974394258, "loss": 0.1594, "step": 313470 }, { "epoch": 12.98, "grad_norm": 0.640625, "learning_rate": 0.0003024399935679802, "loss": 0.1917, "step": 313480 }, { "epoch": 12.98, "grad_norm": 0.67578125, "learning_rate": 0.0003024293895978483, "loss": 0.2127, "step": 313490 }, { "epoch": 12.99, "grad_norm": 0.6953125, "learning_rate": 0.00030241878552904984, "loss": 0.2074, "step": 313500 }, { "epoch": 12.99, "grad_norm": 0.41796875, "learning_rate": 0.00030240818136160493, "loss": 0.2056, "step": 313510 }, { "epoch": 12.99, "grad_norm": 1.046875, "learning_rate": 0.00030239757709553345, "loss": 0.1507, "step": 313520 }, { "epoch": 12.99, "grad_norm": 0.6015625, "learning_rate": 0.0003023869727308555, "loss": 0.2213, "step": 313530 }, { "epoch": 12.99, "grad_norm": 0.96484375, "learning_rate": 0.00030237636826759077, "loss": 0.1859, "step": 313540 }, { "epoch": 12.99, "grad_norm": 0.64453125, "learning_rate": 0.0003023657637057594, "loss": 0.1743, "step": 313550 }, { "epoch": 12.99, "grad_norm": 0.43359375, "learning_rate": 0.00030235515904538133, "loss": 0.1541, "step": 313560 }, { "epoch": 12.99, "grad_norm": 0.68359375, "learning_rate": 0.00030234455428647654, "loss": 0.2508, "step": 313570 }, { "epoch": 12.99, "grad_norm": 0.76953125, "learning_rate": 0.000302333949429065, "loss": 0.2188, "step": 313580 }, { "epoch": 12.99, "grad_norm": 0.5078125, "learning_rate": 0.0003023233444731666, "loss": 0.1864, "step": 313590 }, { "epoch": 12.99, "grad_norm": 0.55859375, "learning_rate": 0.0003023127394188012, "loss": 0.2019, "step": 313600 }, { "epoch": 12.99, "grad_norm": 0.83984375, "learning_rate": 0.000302302134265989, "loss": 0.1612, "step": 313610 }, { "epoch": 12.99, "grad_norm": 1.71875, "learning_rate": 0.00030229152901474975, "loss": 0.1945, "step": 313620 }, { "epoch": 12.99, "grad_norm": 1.046875, "learning_rate": 0.0003022809236651036, "loss": 0.195, "step": 313630 }, { "epoch": 12.99, "grad_norm": 0.87109375, "learning_rate": 0.0003022703182170703, "loss": 0.1676, "step": 313640 }, { "epoch": 12.99, "grad_norm": 0.416015625, "learning_rate": 0.00030225971267066985, "loss": 0.1933, "step": 313650 }, { "epoch": 12.99, "grad_norm": 1.4296875, "learning_rate": 0.0003022491070259224, "loss": 0.2228, "step": 313660 }, { "epoch": 12.99, "grad_norm": 0.7890625, "learning_rate": 0.00030223850128284767, "loss": 0.2003, "step": 313670 }, { "epoch": 12.99, "grad_norm": 1.1640625, "learning_rate": 0.0003022278954414658, "loss": 0.185, "step": 313680 }, { "epoch": 12.99, "grad_norm": 0.93359375, "learning_rate": 0.0003022172895017967, "loss": 0.1918, "step": 313690 }, { "epoch": 12.99, "grad_norm": 0.515625, "learning_rate": 0.0003022066834638602, "loss": 0.1785, "step": 313700 }, { "epoch": 12.99, "grad_norm": 1.0859375, "learning_rate": 0.0003021960773276764, "loss": 0.193, "step": 313710 }, { "epoch": 12.99, "grad_norm": 0.486328125, "learning_rate": 0.0003021854710932652, "loss": 0.1948, "step": 313720 }, { "epoch": 12.99, "grad_norm": 0.38671875, "learning_rate": 0.0003021748647606466, "loss": 0.1823, "step": 313730 }, { "epoch": 13.0, "grad_norm": 0.71484375, "learning_rate": 0.0003021642583298406, "loss": 0.2216, "step": 313740 }, { "epoch": 13.0, "grad_norm": 1.125, "learning_rate": 0.00030215365180086696, "loss": 0.1605, "step": 313750 }, { "epoch": 13.0, "grad_norm": 0.84765625, "learning_rate": 0.00030214304517374587, "loss": 0.189, "step": 313760 }, { "epoch": 13.0, "grad_norm": 0.482421875, "learning_rate": 0.00030213243844849717, "loss": 0.1976, "step": 313770 }, { "epoch": 13.0, "grad_norm": 0.62109375, "learning_rate": 0.00030212183162514085, "loss": 0.2097, "step": 313780 }, { "epoch": 13.0, "grad_norm": 0.76171875, "learning_rate": 0.0003021112247036969, "loss": 0.1814, "step": 313790 }, { "epoch": 13.0, "grad_norm": 2.375, "learning_rate": 0.00030210061768418524, "loss": 0.179, "step": 313800 }, { "epoch": 13.0, "grad_norm": 0.388671875, "learning_rate": 0.00030209001056662584, "loss": 0.157, "step": 313810 }, { "epoch": 13.0, "grad_norm": 0.62890625, "learning_rate": 0.0003020794033510387, "loss": 0.2022, "step": 313820 }, { "epoch": 13.0, "grad_norm": 0.2060546875, "learning_rate": 0.0003020687960374437, "loss": 0.1604, "step": 313830 }, { "epoch": 13.0, "grad_norm": 0.384765625, "learning_rate": 0.0003020581886258608, "loss": 0.1918, "step": 313840 }, { "epoch": 13.0, "grad_norm": 0.32421875, "learning_rate": 0.00030204758111631013, "loss": 0.1788, "step": 313850 }, { "epoch": 13.0, "grad_norm": 0.5859375, "learning_rate": 0.0003020369735088114, "loss": 0.2173, "step": 313860 }, { "epoch": 13.0, "grad_norm": 0.7265625, "learning_rate": 0.0003020263658033848, "loss": 0.1736, "step": 313870 }, { "epoch": 13.0, "grad_norm": 0.5546875, "learning_rate": 0.0003020157580000502, "loss": 0.2014, "step": 313880 }, { "epoch": 13.0, "grad_norm": 1.484375, "learning_rate": 0.00030200515009882757, "loss": 0.1585, "step": 313890 }, { "epoch": 13.0, "grad_norm": 2.09375, "learning_rate": 0.0003019945420997369, "loss": 0.1597, "step": 313900 }, { "epoch": 13.0, "grad_norm": 1.8359375, "learning_rate": 0.000301983934002798, "loss": 0.2206, "step": 313910 }, { "epoch": 13.0, "grad_norm": 0.7109375, "learning_rate": 0.0003019733258080309, "loss": 0.191, "step": 313920 }, { "epoch": 13.0, "grad_norm": 0.53125, "learning_rate": 0.0003019627175154558, "loss": 0.227, "step": 313930 }, { "epoch": 13.0, "grad_norm": 0.9921875, "learning_rate": 0.00030195210912509234, "loss": 0.1937, "step": 313940 }, { "epoch": 13.0, "grad_norm": 1.6171875, "learning_rate": 0.0003019415006369607, "loss": 0.1694, "step": 313950 }, { "epoch": 13.0, "grad_norm": 0.671875, "learning_rate": 0.00030193089205108074, "loss": 0.1447, "step": 313960 }, { "epoch": 13.0, "grad_norm": 1.2265625, "learning_rate": 0.00030192028336747245, "loss": 0.192, "step": 313970 }, { "epoch": 13.01, "grad_norm": 0.64453125, "learning_rate": 0.0003019096745861558, "loss": 0.1844, "step": 313980 }, { "epoch": 13.01, "grad_norm": 0.486328125, "learning_rate": 0.00030189906570715077, "loss": 0.1852, "step": 313990 }, { "epoch": 13.01, "grad_norm": 0.80859375, "learning_rate": 0.0003018884567304772, "loss": 0.2092, "step": 314000 }, { "epoch": 13.01, "grad_norm": 1.4765625, "learning_rate": 0.00030187784765615527, "loss": 0.1528, "step": 314010 }, { "epoch": 13.01, "grad_norm": 0.7265625, "learning_rate": 0.00030186723848420476, "loss": 0.1987, "step": 314020 }, { "epoch": 13.01, "grad_norm": 0.71484375, "learning_rate": 0.0003018566292146458, "loss": 0.2079, "step": 314030 }, { "epoch": 13.01, "grad_norm": 1.5390625, "learning_rate": 0.00030184601984749814, "loss": 0.1711, "step": 314040 }, { "epoch": 13.01, "grad_norm": 1.3203125, "learning_rate": 0.00030183541038278193, "loss": 0.1582, "step": 314050 }, { "epoch": 13.01, "grad_norm": 0.8203125, "learning_rate": 0.0003018248008205172, "loss": 0.239, "step": 314060 }, { "epoch": 13.01, "grad_norm": 0.921875, "learning_rate": 0.00030181419116072356, "loss": 0.182, "step": 314070 }, { "epoch": 13.01, "grad_norm": 1.09375, "learning_rate": 0.00030180358140342135, "loss": 0.2219, "step": 314080 }, { "epoch": 13.01, "grad_norm": 0.53515625, "learning_rate": 0.0003017929715486304, "loss": 0.1796, "step": 314090 }, { "epoch": 13.01, "grad_norm": 0.41796875, "learning_rate": 0.0003017823615963706, "loss": 0.2195, "step": 314100 }, { "epoch": 13.01, "grad_norm": 1.46875, "learning_rate": 0.00030177175154666203, "loss": 0.1719, "step": 314110 }, { "epoch": 13.01, "grad_norm": 1.3046875, "learning_rate": 0.0003017611413995246, "loss": 0.1746, "step": 314120 }, { "epoch": 13.01, "grad_norm": 0.51171875, "learning_rate": 0.0003017505311549782, "loss": 0.2076, "step": 314130 }, { "epoch": 13.01, "grad_norm": 2.15625, "learning_rate": 0.0003017399208130431, "loss": 0.2231, "step": 314140 }, { "epoch": 13.01, "grad_norm": 0.8515625, "learning_rate": 0.00030172931037373885, "loss": 0.1566, "step": 314150 }, { "epoch": 13.01, "grad_norm": 0.71484375, "learning_rate": 0.00030171869983708577, "loss": 0.1815, "step": 314160 }, { "epoch": 13.01, "grad_norm": 1.1953125, "learning_rate": 0.00030170808920310365, "loss": 0.211, "step": 314170 }, { "epoch": 13.01, "grad_norm": 0.85546875, "learning_rate": 0.00030169747847181243, "loss": 0.1798, "step": 314180 }, { "epoch": 13.01, "grad_norm": 0.5703125, "learning_rate": 0.00030168686764323223, "loss": 0.2239, "step": 314190 }, { "epoch": 13.01, "grad_norm": 0.578125, "learning_rate": 0.0003016762567173829, "loss": 0.1593, "step": 314200 }, { "epoch": 13.01, "grad_norm": 0.55859375, "learning_rate": 0.00030166564569428434, "loss": 0.2338, "step": 314210 }, { "epoch": 13.01, "grad_norm": 0.0, "learning_rate": 0.0003016550345739567, "loss": 0.154, "step": 314220 }, { "epoch": 13.02, "grad_norm": 2.71875, "learning_rate": 0.0003016444233564198, "loss": 0.2066, "step": 314230 }, { "epoch": 13.02, "grad_norm": 0.98046875, "learning_rate": 0.00030163381204169375, "loss": 0.2236, "step": 314240 }, { "epoch": 13.02, "grad_norm": 0.58203125, "learning_rate": 0.00030162320062979847, "loss": 0.1943, "step": 314250 }, { "epoch": 13.02, "grad_norm": 0.71875, "learning_rate": 0.0003016125891207538, "loss": 0.1841, "step": 314260 }, { "epoch": 13.02, "grad_norm": 0.41796875, "learning_rate": 0.00030160197751457983, "loss": 0.1702, "step": 314270 }, { "epoch": 13.02, "grad_norm": 0.75, "learning_rate": 0.00030159136581129654, "loss": 0.2195, "step": 314280 }, { "epoch": 13.02, "grad_norm": 1.0703125, "learning_rate": 0.00030158075401092387, "loss": 0.1488, "step": 314290 }, { "epoch": 13.02, "grad_norm": 0.92578125, "learning_rate": 0.0003015701421134818, "loss": 0.1944, "step": 314300 }, { "epoch": 13.02, "grad_norm": 2.109375, "learning_rate": 0.0003015595301189902, "loss": 0.2359, "step": 314310 }, { "epoch": 13.02, "grad_norm": 1.359375, "learning_rate": 0.0003015489180274693, "loss": 0.2051, "step": 314320 }, { "epoch": 13.02, "grad_norm": 0.796875, "learning_rate": 0.00030153830583893877, "loss": 0.1898, "step": 314330 }, { "epoch": 13.02, "grad_norm": 1.0859375, "learning_rate": 0.00030152769355341874, "loss": 0.236, "step": 314340 }, { "epoch": 13.02, "grad_norm": 0.341796875, "learning_rate": 0.00030151708117092925, "loss": 0.1771, "step": 314350 }, { "epoch": 13.02, "grad_norm": 0.7109375, "learning_rate": 0.0003015064686914901, "loss": 0.2311, "step": 314360 }, { "epoch": 13.02, "grad_norm": 1.671875, "learning_rate": 0.0003014958561151213, "loss": 0.1979, "step": 314370 }, { "epoch": 13.02, "grad_norm": 0.9609375, "learning_rate": 0.0003014852434418429, "loss": 0.1944, "step": 314380 }, { "epoch": 13.02, "grad_norm": 0.52734375, "learning_rate": 0.0003014746306716748, "loss": 0.1781, "step": 314390 }, { "epoch": 13.02, "grad_norm": 0.953125, "learning_rate": 0.00030146401780463704, "loss": 0.1576, "step": 314400 }, { "epoch": 13.02, "grad_norm": 0.66015625, "learning_rate": 0.00030145340484074955, "loss": 0.2311, "step": 314410 }, { "epoch": 13.02, "grad_norm": 0.78125, "learning_rate": 0.0003014427917800323, "loss": 0.1533, "step": 314420 }, { "epoch": 13.02, "grad_norm": 0.93359375, "learning_rate": 0.0003014321786225053, "loss": 0.2126, "step": 314430 }, { "epoch": 13.02, "grad_norm": 0.90234375, "learning_rate": 0.0003014215653681885, "loss": 0.184, "step": 314440 }, { "epoch": 13.02, "grad_norm": 1.0859375, "learning_rate": 0.00030141095201710176, "loss": 0.1928, "step": 314450 }, { "epoch": 13.02, "grad_norm": 1.03125, "learning_rate": 0.00030140033856926524, "loss": 0.1646, "step": 314460 }, { "epoch": 13.03, "grad_norm": 1.1171875, "learning_rate": 0.00030138972502469886, "loss": 0.1698, "step": 314470 }, { "epoch": 13.03, "grad_norm": 0.8125, "learning_rate": 0.00030137911138342256, "loss": 0.2125, "step": 314480 }, { "epoch": 13.03, "grad_norm": 1.546875, "learning_rate": 0.00030136849764545625, "loss": 0.1638, "step": 314490 }, { "epoch": 13.03, "grad_norm": 0.77734375, "learning_rate": 0.0003013578838108201, "loss": 0.1738, "step": 314500 }, { "epoch": 13.03, "grad_norm": 0.77734375, "learning_rate": 0.00030134726987953385, "loss": 0.2542, "step": 314510 }, { "epoch": 13.03, "grad_norm": 0.62890625, "learning_rate": 0.0003013366558516176, "loss": 0.1622, "step": 314520 }, { "epoch": 13.03, "grad_norm": 0.609375, "learning_rate": 0.0003013260417270913, "loss": 0.2265, "step": 314530 }, { "epoch": 13.03, "grad_norm": 0.3125, "learning_rate": 0.00030131542750597504, "loss": 0.1535, "step": 314540 }, { "epoch": 13.03, "grad_norm": 2.6875, "learning_rate": 0.0003013048131882886, "loss": 0.1834, "step": 314550 }, { "epoch": 13.03, "grad_norm": 0.6328125, "learning_rate": 0.00030129419877405203, "loss": 0.2562, "step": 314560 }, { "epoch": 13.03, "grad_norm": 4.4375, "learning_rate": 0.00030128358426328533, "loss": 0.2048, "step": 314570 }, { "epoch": 13.03, "grad_norm": 0.67578125, "learning_rate": 0.0003012729696560084, "loss": 0.194, "step": 314580 }, { "epoch": 13.03, "grad_norm": 0.5234375, "learning_rate": 0.00030126235495224143, "loss": 0.2021, "step": 314590 }, { "epoch": 13.03, "grad_norm": 0.55859375, "learning_rate": 0.00030125174015200413, "loss": 0.207, "step": 314600 }, { "epoch": 13.03, "grad_norm": 0.734375, "learning_rate": 0.0003012411252553166, "loss": 0.1746, "step": 314610 }, { "epoch": 13.03, "grad_norm": 0.392578125, "learning_rate": 0.00030123051026219886, "loss": 0.2084, "step": 314620 }, { "epoch": 13.03, "grad_norm": 1.1484375, "learning_rate": 0.0003012198951726707, "loss": 0.1905, "step": 314630 }, { "epoch": 13.03, "grad_norm": 0.69140625, "learning_rate": 0.0003012092799867524, "loss": 0.1855, "step": 314640 }, { "epoch": 13.03, "grad_norm": 0.58203125, "learning_rate": 0.00030119866470446364, "loss": 0.1797, "step": 314650 }, { "epoch": 13.03, "grad_norm": 0.546875, "learning_rate": 0.0003011880493258246, "loss": 0.225, "step": 314660 }, { "epoch": 13.03, "grad_norm": 0.78515625, "learning_rate": 0.00030117743385085516, "loss": 0.2261, "step": 314670 }, { "epoch": 13.03, "grad_norm": 0.2578125, "learning_rate": 0.0003011668182795753, "loss": 0.1883, "step": 314680 }, { "epoch": 13.03, "grad_norm": 0.8515625, "learning_rate": 0.00030115620261200504, "loss": 0.214, "step": 314690 }, { "epoch": 13.03, "grad_norm": 1.515625, "learning_rate": 0.0003011455868481643, "loss": 0.191, "step": 314700 }, { "epoch": 13.04, "grad_norm": 0.6328125, "learning_rate": 0.0003011349709880731, "loss": 0.1568, "step": 314710 }, { "epoch": 13.04, "grad_norm": 0.58203125, "learning_rate": 0.00030112435503175145, "loss": 0.1724, "step": 314720 }, { "epoch": 13.04, "grad_norm": 0.77734375, "learning_rate": 0.0003011137389792192, "loss": 0.1924, "step": 314730 }, { "epoch": 13.04, "grad_norm": 0.609375, "learning_rate": 0.00030110312283049647, "loss": 0.2146, "step": 314740 }, { "epoch": 13.04, "grad_norm": 0.8515625, "learning_rate": 0.00030109250658560315, "loss": 0.1768, "step": 314750 }, { "epoch": 13.04, "grad_norm": 0.66015625, "learning_rate": 0.0003010818902445593, "loss": 0.2189, "step": 314760 }, { "epoch": 13.04, "grad_norm": 0.451171875, "learning_rate": 0.0003010712738073848, "loss": 0.1282, "step": 314770 }, { "epoch": 13.04, "grad_norm": 1.078125, "learning_rate": 0.0003010606572740997, "loss": 0.2319, "step": 314780 }, { "epoch": 13.04, "grad_norm": 0.455078125, "learning_rate": 0.0003010500406447239, "loss": 0.1538, "step": 314790 }, { "epoch": 13.04, "grad_norm": 0.70703125, "learning_rate": 0.0003010394239192775, "loss": 0.1856, "step": 314800 }, { "epoch": 13.04, "grad_norm": 0.546875, "learning_rate": 0.00030102880709778043, "loss": 0.1612, "step": 314810 }, { "epoch": 13.04, "grad_norm": 0.484375, "learning_rate": 0.00030101819018025263, "loss": 0.1926, "step": 314820 }, { "epoch": 13.04, "grad_norm": 0.828125, "learning_rate": 0.00030100757316671407, "loss": 0.1624, "step": 314830 }, { "epoch": 13.04, "grad_norm": 1.171875, "learning_rate": 0.00030099695605718485, "loss": 0.2178, "step": 314840 }, { "epoch": 13.04, "grad_norm": 1.078125, "learning_rate": 0.0003009863388516848, "loss": 0.2448, "step": 314850 }, { "epoch": 13.04, "grad_norm": 0.4453125, "learning_rate": 0.00030097572155023393, "loss": 0.1865, "step": 314860 }, { "epoch": 13.04, "grad_norm": 0.53515625, "learning_rate": 0.00030096510415285235, "loss": 0.1629, "step": 314870 }, { "epoch": 13.04, "grad_norm": 1.109375, "learning_rate": 0.00030095448665955985, "loss": 0.2338, "step": 314880 }, { "epoch": 13.04, "grad_norm": 0.3984375, "learning_rate": 0.0003009438690703766, "loss": 0.1669, "step": 314890 }, { "epoch": 13.04, "grad_norm": 1.5234375, "learning_rate": 0.00030093325138532247, "loss": 0.1855, "step": 314900 }, { "epoch": 13.04, "grad_norm": 0.8671875, "learning_rate": 0.00030092263360441743, "loss": 0.2553, "step": 314910 }, { "epoch": 13.04, "grad_norm": 1.0234375, "learning_rate": 0.00030091201572768155, "loss": 0.2238, "step": 314920 }, { "epoch": 13.04, "grad_norm": 0.0, "learning_rate": 0.0003009013977551347, "loss": 0.1854, "step": 314930 }, { "epoch": 13.04, "grad_norm": 0.5234375, "learning_rate": 0.0003008907796867969, "loss": 0.2061, "step": 314940 }, { "epoch": 13.05, "grad_norm": 0.375, "learning_rate": 0.00030088016152268813, "loss": 0.2395, "step": 314950 }, { "epoch": 13.05, "grad_norm": 0.98828125, "learning_rate": 0.0003008695432628285, "loss": 0.1951, "step": 314960 }, { "epoch": 13.05, "grad_norm": 1.3359375, "learning_rate": 0.00030085892490723786, "loss": 0.1874, "step": 314970 }, { "epoch": 13.05, "grad_norm": 0.9453125, "learning_rate": 0.00030084830645593614, "loss": 0.2161, "step": 314980 }, { "epoch": 13.05, "grad_norm": 1.3046875, "learning_rate": 0.00030083768790894344, "loss": 0.1974, "step": 314990 }, { "epoch": 13.05, "grad_norm": 0.6640625, "learning_rate": 0.0003008270692662797, "loss": 0.1699, "step": 315000 }, { "epoch": 13.05, "grad_norm": 2.171875, "learning_rate": 0.0003008164505279649, "loss": 0.204, "step": 315010 }, { "epoch": 13.05, "grad_norm": 0.9140625, "learning_rate": 0.00030080583169401906, "loss": 0.2125, "step": 315020 }, { "epoch": 13.05, "grad_norm": 1.2421875, "learning_rate": 0.0003007952127644621, "loss": 0.1677, "step": 315030 }, { "epoch": 13.05, "grad_norm": 0.73828125, "learning_rate": 0.000300784593739314, "loss": 0.2057, "step": 315040 }, { "epoch": 13.05, "grad_norm": 0.59765625, "learning_rate": 0.00030077397461859487, "loss": 0.1821, "step": 315050 }, { "epoch": 13.05, "grad_norm": 0.56640625, "learning_rate": 0.00030076335540232455, "loss": 0.2085, "step": 315060 }, { "epoch": 13.05, "grad_norm": 1.171875, "learning_rate": 0.000300752736090523, "loss": 0.2151, "step": 315070 }, { "epoch": 13.05, "grad_norm": 0.89453125, "learning_rate": 0.0003007421166832104, "loss": 0.2373, "step": 315080 }, { "epoch": 13.05, "grad_norm": 1.078125, "learning_rate": 0.0003007314971804065, "loss": 0.192, "step": 315090 }, { "epoch": 13.05, "grad_norm": 0.87109375, "learning_rate": 0.0003007208775821315, "loss": 0.174, "step": 315100 }, { "epoch": 13.05, "grad_norm": 0.62109375, "learning_rate": 0.0003007102578884053, "loss": 0.171, "step": 315110 }, { "epoch": 13.05, "grad_norm": 1.390625, "learning_rate": 0.0003006996380992478, "loss": 0.1559, "step": 315120 }, { "epoch": 13.05, "grad_norm": 0.80078125, "learning_rate": 0.0003006890182146791, "loss": 0.2027, "step": 315130 }, { "epoch": 13.05, "grad_norm": 0.66796875, "learning_rate": 0.0003006783982347191, "loss": 0.1263, "step": 315140 }, { "epoch": 13.05, "grad_norm": 0.890625, "learning_rate": 0.00030066777815938785, "loss": 0.2271, "step": 315150 }, { "epoch": 13.05, "grad_norm": 1.1015625, "learning_rate": 0.00030065715798870536, "loss": 0.172, "step": 315160 }, { "epoch": 13.05, "grad_norm": 0.486328125, "learning_rate": 0.00030064653772269147, "loss": 0.173, "step": 315170 }, { "epoch": 13.05, "grad_norm": 2.140625, "learning_rate": 0.00030063591736136634, "loss": 0.1851, "step": 315180 }, { "epoch": 13.06, "grad_norm": 1.59375, "learning_rate": 0.00030062529690474987, "loss": 0.1807, "step": 315190 }, { "epoch": 13.06, "grad_norm": 0.96484375, "learning_rate": 0.00030061467635286206, "loss": 0.208, "step": 315200 }, { "epoch": 13.06, "grad_norm": 0.640625, "learning_rate": 0.0003006040557057229, "loss": 0.1628, "step": 315210 }, { "epoch": 13.06, "grad_norm": 0.90625, "learning_rate": 0.00030059343496335227, "loss": 0.1325, "step": 315220 }, { "epoch": 13.06, "grad_norm": 0.9609375, "learning_rate": 0.0003005828141257704, "loss": 0.2347, "step": 315230 }, { "epoch": 13.06, "grad_norm": 0.62890625, "learning_rate": 0.00030057219319299717, "loss": 0.1874, "step": 315240 }, { "epoch": 13.06, "grad_norm": 1.15625, "learning_rate": 0.00030056157216505234, "loss": 0.2152, "step": 315250 }, { "epoch": 13.06, "grad_norm": 0.8828125, "learning_rate": 0.0003005509510419563, "loss": 0.1812, "step": 315260 }, { "epoch": 13.06, "grad_norm": 0.373046875, "learning_rate": 0.0003005403298237287, "loss": 0.2644, "step": 315270 }, { "epoch": 13.06, "grad_norm": 1.1015625, "learning_rate": 0.00030052970851038964, "loss": 0.1823, "step": 315280 }, { "epoch": 13.06, "grad_norm": 0.455078125, "learning_rate": 0.00030051908710195923, "loss": 0.1813, "step": 315290 }, { "epoch": 13.06, "grad_norm": 1.3125, "learning_rate": 0.00030050846559845725, "loss": 0.1474, "step": 315300 }, { "epoch": 13.06, "grad_norm": 1.421875, "learning_rate": 0.00030049784399990387, "loss": 0.1799, "step": 315310 }, { "epoch": 13.06, "grad_norm": 0.8046875, "learning_rate": 0.00030048722230631897, "loss": 0.2109, "step": 315320 }, { "epoch": 13.06, "grad_norm": 0.984375, "learning_rate": 0.0003004766005177225, "loss": 0.1557, "step": 315330 }, { "epoch": 13.06, "grad_norm": 0.84765625, "learning_rate": 0.00030046597863413464, "loss": 0.2143, "step": 315340 }, { "epoch": 13.06, "grad_norm": 1.0234375, "learning_rate": 0.0003004553566555752, "loss": 0.1633, "step": 315350 }, { "epoch": 13.06, "grad_norm": 0.27734375, "learning_rate": 0.0003004447345820642, "loss": 0.1733, "step": 315360 }, { "epoch": 13.06, "grad_norm": 0.49609375, "learning_rate": 0.00030043411241362174, "loss": 0.2255, "step": 315370 }, { "epoch": 13.06, "grad_norm": 0.5390625, "learning_rate": 0.0003004234901502676, "loss": 0.1564, "step": 315380 }, { "epoch": 13.06, "grad_norm": 2.203125, "learning_rate": 0.00030041286779202206, "loss": 0.1855, "step": 315390 }, { "epoch": 13.06, "grad_norm": 1.09375, "learning_rate": 0.0003004022453389048, "loss": 0.1424, "step": 315400 }, { "epoch": 13.06, "grad_norm": 0.6953125, "learning_rate": 0.000300391622790936, "loss": 0.172, "step": 315410 }, { "epoch": 13.06, "grad_norm": 1.1875, "learning_rate": 0.0003003810001481357, "loss": 0.1917, "step": 315420 }, { "epoch": 13.07, "grad_norm": 0.53125, "learning_rate": 0.0003003703774105237, "loss": 0.2006, "step": 315430 }, { "epoch": 13.07, "grad_norm": 1.1171875, "learning_rate": 0.0003003597545781201, "loss": 0.1974, "step": 315440 }, { "epoch": 13.07, "grad_norm": 0.6796875, "learning_rate": 0.0003003491316509449, "loss": 0.2181, "step": 315450 }, { "epoch": 13.07, "grad_norm": 1.3828125, "learning_rate": 0.000300338508629018, "loss": 0.1991, "step": 315460 }, { "epoch": 13.07, "grad_norm": 0.765625, "learning_rate": 0.0003003278855123596, "loss": 0.2314, "step": 315470 }, { "epoch": 13.07, "grad_norm": 0.5078125, "learning_rate": 0.00030031726230098945, "loss": 0.2074, "step": 315480 }, { "epoch": 13.07, "grad_norm": 1.1953125, "learning_rate": 0.0003003066389949277, "loss": 0.1899, "step": 315490 }, { "epoch": 13.07, "grad_norm": 0.65625, "learning_rate": 0.00030029601559419426, "loss": 0.1703, "step": 315500 }, { "epoch": 13.07, "grad_norm": 0.5234375, "learning_rate": 0.00030028539209880917, "loss": 0.1645, "step": 315510 }, { "epoch": 13.07, "grad_norm": 1.1171875, "learning_rate": 0.00030027476850879236, "loss": 0.1773, "step": 315520 }, { "epoch": 13.07, "grad_norm": 1.0390625, "learning_rate": 0.0003002641448241639, "loss": 0.199, "step": 315530 }, { "epoch": 13.07, "grad_norm": 0.609375, "learning_rate": 0.00030025352104494374, "loss": 0.1648, "step": 315540 }, { "epoch": 13.07, "grad_norm": 1.1875, "learning_rate": 0.00030024289717115187, "loss": 0.1272, "step": 315550 }, { "epoch": 13.07, "grad_norm": 1.1796875, "learning_rate": 0.00030023227320280835, "loss": 0.2102, "step": 315560 }, { "epoch": 13.07, "grad_norm": 0.98828125, "learning_rate": 0.00030022164913993303, "loss": 0.1957, "step": 315570 }, { "epoch": 13.07, "grad_norm": 0.92578125, "learning_rate": 0.000300211024982546, "loss": 0.2106, "step": 315580 }, { "epoch": 13.07, "grad_norm": 0.55078125, "learning_rate": 0.0003002004007306673, "loss": 0.127, "step": 315590 }, { "epoch": 13.07, "grad_norm": 0.494140625, "learning_rate": 0.0003001897763843169, "loss": 0.171, "step": 315600 }, { "epoch": 13.07, "grad_norm": 0.0, "learning_rate": 0.0003001791519435147, "loss": 0.1798, "step": 315610 }, { "epoch": 13.07, "grad_norm": 1.078125, "learning_rate": 0.0003001685274082807, "loss": 0.1998, "step": 315620 }, { "epoch": 13.07, "grad_norm": 1.875, "learning_rate": 0.000300157902778635, "loss": 0.1628, "step": 315630 }, { "epoch": 13.07, "grad_norm": 0.65234375, "learning_rate": 0.0003001472780545976, "loss": 0.2251, "step": 315640 }, { "epoch": 13.07, "grad_norm": 1.3671875, "learning_rate": 0.00030013665323618834, "loss": 0.2359, "step": 315650 }, { "epoch": 13.07, "grad_norm": 0.62109375, "learning_rate": 0.0003001260283234274, "loss": 0.2103, "step": 315660 }, { "epoch": 13.08, "grad_norm": 1.03125, "learning_rate": 0.00030011540331633464, "loss": 0.1971, "step": 315670 }, { "epoch": 13.08, "grad_norm": 0.63671875, "learning_rate": 0.0003001047782149301, "loss": 0.2021, "step": 315680 }, { "epoch": 13.08, "grad_norm": 0.28125, "learning_rate": 0.00030009415301923383, "loss": 0.2079, "step": 315690 }, { "epoch": 13.08, "grad_norm": 1.2890625, "learning_rate": 0.00030008352772926565, "loss": 0.212, "step": 315700 }, { "epoch": 13.08, "grad_norm": 0.412109375, "learning_rate": 0.0003000729023450458, "loss": 0.2177, "step": 315710 }, { "epoch": 13.08, "grad_norm": 1.25, "learning_rate": 0.0003000622768665941, "loss": 0.1766, "step": 315720 }, { "epoch": 13.08, "grad_norm": 0.66015625, "learning_rate": 0.00030005165129393064, "loss": 0.171, "step": 315730 }, { "epoch": 13.08, "grad_norm": 1.0859375, "learning_rate": 0.00030004102562707535, "loss": 0.2241, "step": 315740 }, { "epoch": 13.08, "grad_norm": 0.58203125, "learning_rate": 0.00030003039986604827, "loss": 0.1429, "step": 315750 }, { "epoch": 13.08, "grad_norm": 0.7734375, "learning_rate": 0.0003000197740108694, "loss": 0.2302, "step": 315760 }, { "epoch": 13.08, "grad_norm": 0.8359375, "learning_rate": 0.00030000914806155873, "loss": 0.2292, "step": 315770 }, { "epoch": 13.08, "grad_norm": 2.40625, "learning_rate": 0.0002999985220181361, "loss": 0.1446, "step": 315780 }, { "epoch": 13.08, "grad_norm": 0.322265625, "learning_rate": 0.0002999878958806218, "loss": 0.2371, "step": 315790 }, { "epoch": 13.08, "grad_norm": 0.5546875, "learning_rate": 0.0002999772696490356, "loss": 0.2085, "step": 315800 }, { "epoch": 13.08, "grad_norm": 0.91796875, "learning_rate": 0.0002999666433233976, "loss": 0.2282, "step": 315810 }, { "epoch": 13.08, "grad_norm": 0.59765625, "learning_rate": 0.0002999560169037278, "loss": 0.16, "step": 315820 }, { "epoch": 13.08, "grad_norm": 1.9296875, "learning_rate": 0.00029994539039004617, "loss": 0.1687, "step": 315830 }, { "epoch": 13.08, "grad_norm": 0.6640625, "learning_rate": 0.00029993476378237265, "loss": 0.1896, "step": 315840 }, { "epoch": 13.08, "grad_norm": 0.32421875, "learning_rate": 0.0002999241370807274, "loss": 0.1789, "step": 315850 }, { "epoch": 13.08, "grad_norm": 0.400390625, "learning_rate": 0.00029991351028513014, "loss": 0.2136, "step": 315860 }, { "epoch": 13.08, "grad_norm": 1.0625, "learning_rate": 0.00029990288339560125, "loss": 0.1572, "step": 315870 }, { "epoch": 13.08, "grad_norm": 1.046875, "learning_rate": 0.0002998922564121604, "loss": 0.1801, "step": 315880 }, { "epoch": 13.08, "grad_norm": 0.8828125, "learning_rate": 0.0002998816293348277, "loss": 0.2267, "step": 315890 }, { "epoch": 13.08, "grad_norm": 1.5, "learning_rate": 0.0002998710021636233, "loss": 0.1734, "step": 315900 }, { "epoch": 13.08, "grad_norm": 1.421875, "learning_rate": 0.0002998603748985669, "loss": 0.1961, "step": 315910 }, { "epoch": 13.09, "grad_norm": 1.46875, "learning_rate": 0.0002998497475396787, "loss": 0.1787, "step": 315920 }, { "epoch": 13.09, "grad_norm": 0.93359375, "learning_rate": 0.00029983912008697865, "loss": 0.1756, "step": 315930 }, { "epoch": 13.09, "grad_norm": 0.921875, "learning_rate": 0.0002998284925404868, "loss": 0.2492, "step": 315940 }, { "epoch": 13.09, "grad_norm": 0.4140625, "learning_rate": 0.00029981786490022313, "loss": 0.1823, "step": 315950 }, { "epoch": 13.09, "grad_norm": 0.77734375, "learning_rate": 0.00029980723716620754, "loss": 0.1791, "step": 315960 }, { "epoch": 13.09, "grad_norm": 0.625, "learning_rate": 0.00029979660933846013, "loss": 0.1798, "step": 315970 }, { "epoch": 13.09, "grad_norm": 0.54296875, "learning_rate": 0.0002997859814170009, "loss": 0.2035, "step": 315980 }, { "epoch": 13.09, "grad_norm": 1.0703125, "learning_rate": 0.0002997753534018498, "loss": 0.2369, "step": 315990 }, { "epoch": 13.09, "grad_norm": 0.412109375, "learning_rate": 0.00029976472529302697, "loss": 0.1985, "step": 316000 }, { "epoch": 13.09, "grad_norm": 1.3828125, "learning_rate": 0.0002997540970905522, "loss": 0.1979, "step": 316010 }, { "epoch": 13.09, "grad_norm": 0.5390625, "learning_rate": 0.0002997434687944456, "loss": 0.2382, "step": 316020 }, { "epoch": 13.09, "grad_norm": 0.9140625, "learning_rate": 0.0002997328404047272, "loss": 0.1701, "step": 316030 }, { "epoch": 13.09, "grad_norm": 0.71875, "learning_rate": 0.0002997222119214169, "loss": 0.2186, "step": 316040 }, { "epoch": 13.09, "grad_norm": 1.3359375, "learning_rate": 0.0002997115833445348, "loss": 0.1494, "step": 316050 }, { "epoch": 13.09, "grad_norm": 0.9765625, "learning_rate": 0.00029970095467410086, "loss": 0.188, "step": 316060 }, { "epoch": 13.09, "grad_norm": 0.263671875, "learning_rate": 0.0002996903259101351, "loss": 0.2218, "step": 316070 }, { "epoch": 13.09, "grad_norm": 0.86328125, "learning_rate": 0.0002996796970526576, "loss": 0.1911, "step": 316080 }, { "epoch": 13.09, "grad_norm": 1.9921875, "learning_rate": 0.00029966906810168815, "loss": 0.1917, "step": 316090 }, { "epoch": 13.09, "grad_norm": 0.73828125, "learning_rate": 0.0002996584390572469, "loss": 0.2278, "step": 316100 }, { "epoch": 13.09, "grad_norm": 0.478515625, "learning_rate": 0.0002996478099193539, "loss": 0.1911, "step": 316110 }, { "epoch": 13.09, "grad_norm": 1.3203125, "learning_rate": 0.00029963718068802905, "loss": 0.2162, "step": 316120 }, { "epoch": 13.09, "grad_norm": 1.21875, "learning_rate": 0.0002996265513632923, "loss": 0.1941, "step": 316130 }, { "epoch": 13.09, "grad_norm": 0.91796875, "learning_rate": 0.00029961592194516386, "loss": 0.1869, "step": 316140 }, { "epoch": 13.09, "grad_norm": 1.21875, "learning_rate": 0.00029960529243366354, "loss": 0.2003, "step": 316150 }, { "epoch": 13.1, "grad_norm": 0.7109375, "learning_rate": 0.0002995946628288114, "loss": 0.1927, "step": 316160 }, { "epoch": 13.1, "grad_norm": 1.0390625, "learning_rate": 0.00029958403313062756, "loss": 0.2301, "step": 316170 }, { "epoch": 13.1, "grad_norm": 1.046875, "learning_rate": 0.00029957340333913187, "loss": 0.2057, "step": 316180 }, { "epoch": 13.1, "grad_norm": 0.51953125, "learning_rate": 0.00029956277345434437, "loss": 0.1867, "step": 316190 }, { "epoch": 13.1, "grad_norm": 0.5859375, "learning_rate": 0.00029955214347628513, "loss": 0.1959, "step": 316200 }, { "epoch": 13.1, "grad_norm": 0.302734375, "learning_rate": 0.00029954151340497404, "loss": 0.1848, "step": 316210 }, { "epoch": 13.1, "grad_norm": 0.6875, "learning_rate": 0.00029953088324043115, "loss": 0.1486, "step": 316220 }, { "epoch": 13.1, "grad_norm": 1.0625, "learning_rate": 0.00029952025298267653, "loss": 0.1981, "step": 316230 }, { "epoch": 13.1, "grad_norm": 0.8359375, "learning_rate": 0.0002995096226317302, "loss": 0.2034, "step": 316240 }, { "epoch": 13.1, "grad_norm": 1.3046875, "learning_rate": 0.00029949899218761206, "loss": 0.2206, "step": 316250 }, { "epoch": 13.1, "grad_norm": 0.890625, "learning_rate": 0.00029948836165034215, "loss": 0.181, "step": 316260 }, { "epoch": 13.1, "grad_norm": 0.96875, "learning_rate": 0.00029947773101994047, "loss": 0.235, "step": 316270 }, { "epoch": 13.1, "grad_norm": 0.76953125, "learning_rate": 0.00029946710029642707, "loss": 0.2315, "step": 316280 }, { "epoch": 13.1, "grad_norm": 0.9375, "learning_rate": 0.00029945646947982196, "loss": 0.1987, "step": 316290 }, { "epoch": 13.1, "grad_norm": 0.66015625, "learning_rate": 0.0002994458385701451, "loss": 0.1723, "step": 316300 }, { "epoch": 13.1, "grad_norm": 1.5859375, "learning_rate": 0.0002994352075674165, "loss": 0.1915, "step": 316310 }, { "epoch": 13.1, "grad_norm": 0.271484375, "learning_rate": 0.0002994245764716562, "loss": 0.1722, "step": 316320 }, { "epoch": 13.1, "grad_norm": 0.88671875, "learning_rate": 0.00029941394528288414, "loss": 0.282, "step": 316330 }, { "epoch": 13.1, "grad_norm": 0.734375, "learning_rate": 0.00029940331400112036, "loss": 0.2111, "step": 316340 }, { "epoch": 13.1, "grad_norm": 1.1015625, "learning_rate": 0.00029939268262638494, "loss": 0.2115, "step": 316350 }, { "epoch": 13.1, "grad_norm": 0.90234375, "learning_rate": 0.00029938205115869783, "loss": 0.154, "step": 316360 }, { "epoch": 13.1, "grad_norm": 1.296875, "learning_rate": 0.00029937141959807893, "loss": 0.2431, "step": 316370 }, { "epoch": 13.1, "grad_norm": 0.6640625, "learning_rate": 0.0002993607879445484, "loss": 0.1837, "step": 316380 }, { "epoch": 13.1, "grad_norm": 1.5546875, "learning_rate": 0.0002993501561981262, "loss": 0.2018, "step": 316390 }, { "epoch": 13.11, "grad_norm": 0.83984375, "learning_rate": 0.00029933952435883237, "loss": 0.186, "step": 316400 }, { "epoch": 13.11, "grad_norm": 1.1875, "learning_rate": 0.00029932889242668693, "loss": 0.161, "step": 316410 }, { "epoch": 13.11, "grad_norm": 0.92578125, "learning_rate": 0.0002993182604017097, "loss": 0.1755, "step": 316420 }, { "epoch": 13.11, "grad_norm": 0.96875, "learning_rate": 0.00029930762828392094, "loss": 0.1835, "step": 316430 }, { "epoch": 13.11, "grad_norm": 2.75, "learning_rate": 0.00029929699607334056, "loss": 0.2194, "step": 316440 }, { "epoch": 13.11, "grad_norm": 0.6015625, "learning_rate": 0.0002992863637699885, "loss": 0.1829, "step": 316450 }, { "epoch": 13.11, "grad_norm": 3.625, "learning_rate": 0.0002992757313738848, "loss": 0.1504, "step": 316460 }, { "epoch": 13.11, "grad_norm": 0.546875, "learning_rate": 0.0002992650988850495, "loss": 0.1549, "step": 316470 }, { "epoch": 13.11, "grad_norm": 0.8671875, "learning_rate": 0.00029925446630350264, "loss": 0.223, "step": 316480 }, { "epoch": 13.11, "grad_norm": 2.96875, "learning_rate": 0.00029924383362926423, "loss": 0.1861, "step": 316490 }, { "epoch": 13.11, "grad_norm": 1.6796875, "learning_rate": 0.0002992332008623542, "loss": 0.1198, "step": 316500 }, { "epoch": 13.11, "grad_norm": 1.59375, "learning_rate": 0.00029922256800279256, "loss": 0.1707, "step": 316510 }, { "epoch": 13.11, "grad_norm": 1.1875, "learning_rate": 0.00029921193505059947, "loss": 0.2185, "step": 316520 }, { "epoch": 13.11, "grad_norm": 0.9921875, "learning_rate": 0.00029920130200579475, "loss": 0.2146, "step": 316530 }, { "epoch": 13.11, "grad_norm": 2.75, "learning_rate": 0.0002991906688683985, "loss": 0.1828, "step": 316540 }, { "epoch": 13.11, "grad_norm": 0.78515625, "learning_rate": 0.0002991800356384307, "loss": 0.2651, "step": 316550 }, { "epoch": 13.11, "grad_norm": 0.765625, "learning_rate": 0.00029916940231591144, "loss": 0.168, "step": 316560 }, { "epoch": 13.11, "grad_norm": 2.515625, "learning_rate": 0.0002991587689008607, "loss": 0.1973, "step": 316570 }, { "epoch": 13.11, "grad_norm": 1.8046875, "learning_rate": 0.00029914813539329836, "loss": 0.2226, "step": 316580 }, { "epoch": 13.11, "grad_norm": 0.6640625, "learning_rate": 0.0002991375017932446, "loss": 0.1891, "step": 316590 }, { "epoch": 13.11, "grad_norm": 0.98828125, "learning_rate": 0.00029912686810071944, "loss": 0.2377, "step": 316600 }, { "epoch": 13.11, "grad_norm": 0.9609375, "learning_rate": 0.00029911623431574263, "loss": 0.2167, "step": 316610 }, { "epoch": 13.11, "grad_norm": 0.578125, "learning_rate": 0.00029910560043833455, "loss": 0.1609, "step": 316620 }, { "epoch": 13.11, "grad_norm": 0.45703125, "learning_rate": 0.000299094966468515, "loss": 0.2033, "step": 316630 }, { "epoch": 13.12, "grad_norm": 1.046875, "learning_rate": 0.00029908433240630395, "loss": 0.2391, "step": 316640 }, { "epoch": 13.12, "grad_norm": 0.423828125, "learning_rate": 0.0002990736982517216, "loss": 0.1245, "step": 316650 }, { "epoch": 13.12, "grad_norm": 0.640625, "learning_rate": 0.0002990630640047877, "loss": 0.18, "step": 316660 }, { "epoch": 13.12, "grad_norm": 1.3203125, "learning_rate": 0.00029905242966552246, "loss": 0.151, "step": 316670 }, { "epoch": 13.12, "grad_norm": 2.171875, "learning_rate": 0.00029904179523394597, "loss": 0.2098, "step": 316680 }, { "epoch": 13.12, "grad_norm": 1.09375, "learning_rate": 0.00029903116071007795, "loss": 0.1457, "step": 316690 }, { "epoch": 13.12, "grad_norm": 0.546875, "learning_rate": 0.0002990205260939387, "loss": 0.1566, "step": 316700 }, { "epoch": 13.12, "grad_norm": 1.5, "learning_rate": 0.0002990098913855481, "loss": 0.1984, "step": 316710 }, { "epoch": 13.12, "grad_norm": 0.51953125, "learning_rate": 0.0002989992565849261, "loss": 0.2025, "step": 316720 }, { "epoch": 13.12, "grad_norm": 0.4765625, "learning_rate": 0.00029898862169209286, "loss": 0.1832, "step": 316730 }, { "epoch": 13.12, "grad_norm": 0.81640625, "learning_rate": 0.00029897798670706823, "loss": 0.2304, "step": 316740 }, { "epoch": 13.12, "grad_norm": 1.6640625, "learning_rate": 0.00029896735162987244, "loss": 0.2158, "step": 316750 }, { "epoch": 13.12, "grad_norm": 0.435546875, "learning_rate": 0.00029895671646052536, "loss": 0.2055, "step": 316760 }, { "epoch": 13.12, "grad_norm": 0.46875, "learning_rate": 0.0002989460811990469, "loss": 0.1983, "step": 316770 }, { "epoch": 13.12, "grad_norm": 0.76171875, "learning_rate": 0.00029893544584545735, "loss": 0.143, "step": 316780 }, { "epoch": 13.12, "grad_norm": 0.8828125, "learning_rate": 0.00029892481039977653, "loss": 0.1293, "step": 316790 }, { "epoch": 13.12, "grad_norm": 0.69921875, "learning_rate": 0.0002989141748620245, "loss": 0.1638, "step": 316800 }, { "epoch": 13.12, "grad_norm": 0.8125, "learning_rate": 0.0002989035392322213, "loss": 0.2351, "step": 316810 }, { "epoch": 13.12, "grad_norm": 0.84375, "learning_rate": 0.00029889290351038677, "loss": 0.1744, "step": 316820 }, { "epoch": 13.12, "grad_norm": 0.890625, "learning_rate": 0.00029888226769654125, "loss": 0.2471, "step": 316830 }, { "epoch": 13.12, "grad_norm": 0.322265625, "learning_rate": 0.0002988716317907046, "loss": 0.2029, "step": 316840 }, { "epoch": 13.12, "grad_norm": 0.77734375, "learning_rate": 0.0002988609957928967, "loss": 0.1932, "step": 316850 }, { "epoch": 13.12, "grad_norm": 0.703125, "learning_rate": 0.00029885035970313775, "loss": 0.1877, "step": 316860 }, { "epoch": 13.12, "grad_norm": 0.90234375, "learning_rate": 0.00029883972352144763, "loss": 0.1917, "step": 316870 }, { "epoch": 13.13, "grad_norm": 1.109375, "learning_rate": 0.00029882908724784645, "loss": 0.1867, "step": 316880 }, { "epoch": 13.13, "grad_norm": 0.94921875, "learning_rate": 0.00029881845088235425, "loss": 0.2227, "step": 316890 }, { "epoch": 13.13, "grad_norm": 0.5859375, "learning_rate": 0.00029880781442499093, "loss": 0.1868, "step": 316900 }, { "epoch": 13.13, "grad_norm": 0.6875, "learning_rate": 0.00029879717787577667, "loss": 0.1518, "step": 316910 }, { "epoch": 13.13, "grad_norm": 0.7734375, "learning_rate": 0.00029878654123473134, "loss": 0.1735, "step": 316920 }, { "epoch": 13.13, "grad_norm": 1.3515625, "learning_rate": 0.0002987759045018749, "loss": 0.201, "step": 316930 }, { "epoch": 13.13, "grad_norm": 0.3984375, "learning_rate": 0.00029876526767722765, "loss": 0.1607, "step": 316940 }, { "epoch": 13.13, "grad_norm": 0.51171875, "learning_rate": 0.00029875463076080934, "loss": 0.2037, "step": 316950 }, { "epoch": 13.13, "grad_norm": 0.6796875, "learning_rate": 0.00029874399375264004, "loss": 0.1972, "step": 316960 }, { "epoch": 13.13, "grad_norm": 0.55078125, "learning_rate": 0.0002987333566527399, "loss": 0.1806, "step": 316970 }, { "epoch": 13.13, "grad_norm": 1.21875, "learning_rate": 0.00029872271946112877, "loss": 0.1682, "step": 316980 }, { "epoch": 13.13, "grad_norm": 1.78125, "learning_rate": 0.00029871208217782685, "loss": 0.1469, "step": 316990 }, { "epoch": 13.13, "grad_norm": 0.52734375, "learning_rate": 0.000298701444802854, "loss": 0.2112, "step": 317000 }, { "epoch": 13.13, "grad_norm": 0.578125, "learning_rate": 0.0002986908073362302, "loss": 0.1916, "step": 317010 }, { "epoch": 13.13, "grad_norm": 0.81640625, "learning_rate": 0.0002986801697779757, "loss": 0.1638, "step": 317020 }, { "epoch": 13.13, "grad_norm": 0.99609375, "learning_rate": 0.00029866953212811034, "loss": 0.1922, "step": 317030 }, { "epoch": 13.13, "grad_norm": 0.66015625, "learning_rate": 0.0002986588943866541, "loss": 0.2221, "step": 317040 }, { "epoch": 13.13, "grad_norm": 0.54296875, "learning_rate": 0.00029864825655362713, "loss": 0.2048, "step": 317050 }, { "epoch": 13.13, "grad_norm": 0.9296875, "learning_rate": 0.0002986376186290494, "loss": 0.1618, "step": 317060 }, { "epoch": 13.13, "grad_norm": 0.0, "learning_rate": 0.0002986269806129409, "loss": 0.2191, "step": 317070 }, { "epoch": 13.13, "grad_norm": 0.4921875, "learning_rate": 0.0002986163425053217, "loss": 0.214, "step": 317080 }, { "epoch": 13.13, "grad_norm": 1.2890625, "learning_rate": 0.0002986057043062117, "loss": 0.2257, "step": 317090 }, { "epoch": 13.13, "grad_norm": 1.2265625, "learning_rate": 0.00029859506601563114, "loss": 0.1568, "step": 317100 }, { "epoch": 13.13, "grad_norm": 1.1953125, "learning_rate": 0.0002985844276335999, "loss": 0.2023, "step": 317110 }, { "epoch": 13.14, "grad_norm": 0.76171875, "learning_rate": 0.00029857378916013796, "loss": 0.2136, "step": 317120 }, { "epoch": 13.14, "grad_norm": 1.3671875, "learning_rate": 0.00029856315059526543, "loss": 0.1854, "step": 317130 }, { "epoch": 13.14, "grad_norm": 0.6484375, "learning_rate": 0.00029855251193900225, "loss": 0.2022, "step": 317140 }, { "epoch": 13.14, "grad_norm": 0.220703125, "learning_rate": 0.0002985418731913685, "loss": 0.2217, "step": 317150 }, { "epoch": 13.14, "grad_norm": 1.1484375, "learning_rate": 0.0002985312343523842, "loss": 0.2126, "step": 317160 }, { "epoch": 13.14, "grad_norm": 1.1953125, "learning_rate": 0.0002985205954220693, "loss": 0.191, "step": 317170 }, { "epoch": 13.14, "grad_norm": 1.1015625, "learning_rate": 0.00029850995640044405, "loss": 0.1898, "step": 317180 }, { "epoch": 13.14, "grad_norm": 0.9140625, "learning_rate": 0.00029849931728752815, "loss": 0.2336, "step": 317190 }, { "epoch": 13.14, "grad_norm": 0.72265625, "learning_rate": 0.0002984886780833418, "loss": 0.2441, "step": 317200 }, { "epoch": 13.14, "grad_norm": 0.625, "learning_rate": 0.00029847803878790504, "loss": 0.2226, "step": 317210 }, { "epoch": 13.14, "grad_norm": 0.435546875, "learning_rate": 0.00029846739940123774, "loss": 0.195, "step": 317220 }, { "epoch": 13.14, "grad_norm": 0.77734375, "learning_rate": 0.0002984567599233602, "loss": 0.182, "step": 317230 }, { "epoch": 13.14, "grad_norm": 0.8046875, "learning_rate": 0.00029844612035429213, "loss": 0.2109, "step": 317240 }, { "epoch": 13.14, "grad_norm": 0.86328125, "learning_rate": 0.0002984354806940537, "loss": 0.2376, "step": 317250 }, { "epoch": 13.14, "grad_norm": 1.3671875, "learning_rate": 0.00029842484094266505, "loss": 0.2069, "step": 317260 }, { "epoch": 13.14, "grad_norm": 0.57421875, "learning_rate": 0.00029841420110014597, "loss": 0.1714, "step": 317270 }, { "epoch": 13.14, "grad_norm": 0.87109375, "learning_rate": 0.00029840356116651656, "loss": 0.1971, "step": 317280 }, { "epoch": 13.14, "grad_norm": 1.0390625, "learning_rate": 0.0002983929211417969, "loss": 0.24, "step": 317290 }, { "epoch": 13.14, "grad_norm": 0.703125, "learning_rate": 0.000298382281026007, "loss": 0.2128, "step": 317300 }, { "epoch": 13.14, "grad_norm": 0.71484375, "learning_rate": 0.00029837164081916696, "loss": 0.1846, "step": 317310 }, { "epoch": 13.14, "grad_norm": 0.875, "learning_rate": 0.00029836100052129663, "loss": 0.1986, "step": 317320 }, { "epoch": 13.14, "grad_norm": 0.6640625, "learning_rate": 0.00029835036013241615, "loss": 0.1735, "step": 317330 }, { "epoch": 13.14, "grad_norm": 0.92578125, "learning_rate": 0.00029833971965254546, "loss": 0.2117, "step": 317340 }, { "epoch": 13.14, "grad_norm": 0.55859375, "learning_rate": 0.0002983290790817047, "loss": 0.2275, "step": 317350 }, { "epoch": 13.15, "grad_norm": 1.75, "learning_rate": 0.0002983184384199138, "loss": 0.1753, "step": 317360 }, { "epoch": 13.15, "grad_norm": 1.84375, "learning_rate": 0.0002983077976671928, "loss": 0.2208, "step": 317370 }, { "epoch": 13.15, "grad_norm": 1.1328125, "learning_rate": 0.0002982971568235617, "loss": 0.1969, "step": 317380 }, { "epoch": 13.15, "grad_norm": 1.2578125, "learning_rate": 0.0002982865158890407, "loss": 0.2121, "step": 317390 }, { "epoch": 13.15, "grad_norm": 0.8046875, "learning_rate": 0.0002982758748636496, "loss": 0.189, "step": 317400 }, { "epoch": 13.15, "grad_norm": 0.55078125, "learning_rate": 0.00029826523374740856, "loss": 0.1826, "step": 317410 }, { "epoch": 13.15, "grad_norm": 0.57421875, "learning_rate": 0.00029825459254033756, "loss": 0.2055, "step": 317420 }, { "epoch": 13.15, "grad_norm": 0.7578125, "learning_rate": 0.0002982439512424566, "loss": 0.1945, "step": 317430 }, { "epoch": 13.15, "grad_norm": 0.85546875, "learning_rate": 0.0002982333098537857, "loss": 0.1693, "step": 317440 }, { "epoch": 13.15, "grad_norm": 0.7265625, "learning_rate": 0.00029822266837434497, "loss": 0.1962, "step": 317450 }, { "epoch": 13.15, "grad_norm": 0.50390625, "learning_rate": 0.0002982120268041544, "loss": 0.1907, "step": 317460 }, { "epoch": 13.15, "grad_norm": 0.5546875, "learning_rate": 0.000298201385143234, "loss": 0.1531, "step": 317470 }, { "epoch": 13.15, "grad_norm": 0.44921875, "learning_rate": 0.00029819074339160374, "loss": 0.1735, "step": 317480 }, { "epoch": 13.15, "grad_norm": 1.4453125, "learning_rate": 0.0002981801015492838, "loss": 0.2092, "step": 317490 }, { "epoch": 13.15, "grad_norm": 2.109375, "learning_rate": 0.000298169459616294, "loss": 0.206, "step": 317500 }, { "epoch": 13.15, "grad_norm": 1.1328125, "learning_rate": 0.0002981588175926546, "loss": 0.188, "step": 317510 }, { "epoch": 13.15, "grad_norm": 0.9765625, "learning_rate": 0.00029814817547838533, "loss": 0.1632, "step": 317520 }, { "epoch": 13.15, "grad_norm": 0.765625, "learning_rate": 0.0002981375332735066, "loss": 0.2444, "step": 317530 }, { "epoch": 13.15, "grad_norm": 0.96484375, "learning_rate": 0.0002981268909780381, "loss": 0.138, "step": 317540 }, { "epoch": 13.15, "grad_norm": 0.53125, "learning_rate": 0.00029811624859200003, "loss": 0.1961, "step": 317550 }, { "epoch": 13.15, "grad_norm": 0.40625, "learning_rate": 0.00029810560611541237, "loss": 0.1396, "step": 317560 }, { "epoch": 13.15, "grad_norm": 0.4765625, "learning_rate": 0.00029809496354829513, "loss": 0.1779, "step": 317570 }, { "epoch": 13.15, "grad_norm": 0.671875, "learning_rate": 0.00029808432089066837, "loss": 0.2027, "step": 317580 }, { "epoch": 13.15, "grad_norm": 0.466796875, "learning_rate": 0.00029807367814255214, "loss": 0.1892, "step": 317590 }, { "epoch": 13.15, "grad_norm": 0.515625, "learning_rate": 0.00029806303530396645, "loss": 0.2314, "step": 317600 }, { "epoch": 13.16, "grad_norm": 1.203125, "learning_rate": 0.00029805239237493125, "loss": 0.1923, "step": 317610 }, { "epoch": 13.16, "grad_norm": 0.89453125, "learning_rate": 0.0002980417493554667, "loss": 0.2031, "step": 317620 }, { "epoch": 13.16, "grad_norm": 1.1640625, "learning_rate": 0.00029803110624559276, "loss": 0.1466, "step": 317630 }, { "epoch": 13.16, "grad_norm": 0.494140625, "learning_rate": 0.0002980204630453294, "loss": 0.1783, "step": 317640 }, { "epoch": 13.16, "grad_norm": 0.8515625, "learning_rate": 0.00029800981975469675, "loss": 0.1691, "step": 317650 }, { "epoch": 13.16, "grad_norm": 0.203125, "learning_rate": 0.00029799917637371486, "loss": 0.2086, "step": 317660 }, { "epoch": 13.16, "grad_norm": 1.2109375, "learning_rate": 0.0002979885329024037, "loss": 0.1692, "step": 317670 }, { "epoch": 13.16, "grad_norm": 0.65234375, "learning_rate": 0.00029797788934078325, "loss": 0.2008, "step": 317680 }, { "epoch": 13.16, "grad_norm": 1.046875, "learning_rate": 0.00029796724568887354, "loss": 0.1847, "step": 317690 }, { "epoch": 13.16, "grad_norm": 0.50390625, "learning_rate": 0.0002979566019466948, "loss": 0.1781, "step": 317700 }, { "epoch": 13.16, "grad_norm": 0.6171875, "learning_rate": 0.0002979459581142668, "loss": 0.207, "step": 317710 }, { "epoch": 13.16, "grad_norm": 0.5390625, "learning_rate": 0.0002979353141916097, "loss": 0.2134, "step": 317720 }, { "epoch": 13.16, "grad_norm": 1.1171875, "learning_rate": 0.0002979246701787436, "loss": 0.2249, "step": 317730 }, { "epoch": 13.16, "grad_norm": 0.95703125, "learning_rate": 0.00029791402607568836, "loss": 0.2328, "step": 317740 }, { "epoch": 13.16, "grad_norm": 1.265625, "learning_rate": 0.00029790338188246415, "loss": 0.2111, "step": 317750 }, { "epoch": 13.16, "grad_norm": 0.765625, "learning_rate": 0.0002978927375990909, "loss": 0.2072, "step": 317760 }, { "epoch": 13.16, "grad_norm": 0.51953125, "learning_rate": 0.00029788209322558867, "loss": 0.23, "step": 317770 }, { "epoch": 13.16, "grad_norm": 0.66796875, "learning_rate": 0.0002978714487619775, "loss": 0.1644, "step": 317780 }, { "epoch": 13.16, "grad_norm": 0.8671875, "learning_rate": 0.00029786080420827755, "loss": 0.2127, "step": 317790 }, { "epoch": 13.16, "grad_norm": 0.435546875, "learning_rate": 0.0002978501595645087, "loss": 0.1821, "step": 317800 }, { "epoch": 13.16, "grad_norm": 1.2265625, "learning_rate": 0.00029783951483069093, "loss": 0.187, "step": 317810 }, { "epoch": 13.16, "grad_norm": 0.796875, "learning_rate": 0.00029782887000684443, "loss": 0.196, "step": 317820 }, { "epoch": 13.16, "grad_norm": 0.66796875, "learning_rate": 0.0002978182250929891, "loss": 0.1489, "step": 317830 }, { "epoch": 13.16, "grad_norm": 0.443359375, "learning_rate": 0.00029780758008914515, "loss": 0.1737, "step": 317840 }, { "epoch": 13.17, "grad_norm": 0.99609375, "learning_rate": 0.00029779693499533237, "loss": 0.1965, "step": 317850 }, { "epoch": 13.17, "grad_norm": 0.87109375, "learning_rate": 0.00029778628981157103, "loss": 0.1509, "step": 317860 }, { "epoch": 13.17, "grad_norm": 1.21875, "learning_rate": 0.000297775644537881, "loss": 0.2007, "step": 317870 }, { "epoch": 13.17, "grad_norm": 0.859375, "learning_rate": 0.0002977649991742823, "loss": 0.2334, "step": 317880 }, { "epoch": 13.17, "grad_norm": 0.69921875, "learning_rate": 0.0002977543537207951, "loss": 0.1652, "step": 317890 }, { "epoch": 13.17, "grad_norm": 0.58203125, "learning_rate": 0.0002977437081774394, "loss": 0.1799, "step": 317900 }, { "epoch": 13.17, "grad_norm": 1.3984375, "learning_rate": 0.00029773306254423515, "loss": 0.26, "step": 317910 }, { "epoch": 13.17, "grad_norm": 0.54296875, "learning_rate": 0.0002977224168212024, "loss": 0.1514, "step": 317920 }, { "epoch": 13.17, "grad_norm": 1.4140625, "learning_rate": 0.0002977117710083613, "loss": 0.1898, "step": 317930 }, { "epoch": 13.17, "grad_norm": 0.8515625, "learning_rate": 0.0002977011251057317, "loss": 0.2128, "step": 317940 }, { "epoch": 13.17, "grad_norm": 0.52734375, "learning_rate": 0.0002976904791133338, "loss": 0.2425, "step": 317950 }, { "epoch": 13.17, "grad_norm": 0.796875, "learning_rate": 0.0002976798330311876, "loss": 0.2189, "step": 317960 }, { "epoch": 13.17, "grad_norm": 1.125, "learning_rate": 0.00029766918685931304, "loss": 0.1898, "step": 317970 }, { "epoch": 13.17, "grad_norm": 1.40625, "learning_rate": 0.0002976585405977302, "loss": 0.216, "step": 317980 }, { "epoch": 13.17, "grad_norm": 0.5703125, "learning_rate": 0.0002976478942464592, "loss": 0.2019, "step": 317990 }, { "epoch": 13.17, "grad_norm": 0.875, "learning_rate": 0.00029763724780551993, "loss": 0.161, "step": 318000 }, { "epoch": 13.17, "grad_norm": 1.28125, "learning_rate": 0.0002976266012749326, "loss": 0.1847, "step": 318010 }, { "epoch": 13.17, "grad_norm": 0.53125, "learning_rate": 0.0002976159546547171, "loss": 0.2203, "step": 318020 }, { "epoch": 13.17, "grad_norm": 1.203125, "learning_rate": 0.0002976053079448935, "loss": 0.1909, "step": 318030 }, { "epoch": 13.17, "grad_norm": 1.2109375, "learning_rate": 0.0002975946611454819, "loss": 0.1633, "step": 318040 }, { "epoch": 13.17, "grad_norm": 1.953125, "learning_rate": 0.00029758401425650227, "loss": 0.2182, "step": 318050 }, { "epoch": 13.17, "grad_norm": 0.91015625, "learning_rate": 0.0002975733672779746, "loss": 0.2348, "step": 318060 }, { "epoch": 13.17, "grad_norm": 1.109375, "learning_rate": 0.000297562720209919, "loss": 0.1745, "step": 318070 }, { "epoch": 13.17, "grad_norm": 0.78515625, "learning_rate": 0.00029755207305235553, "loss": 0.1863, "step": 318080 }, { "epoch": 13.18, "grad_norm": 1.28125, "learning_rate": 0.0002975414258053043, "loss": 0.1484, "step": 318090 }, { "epoch": 13.18, "grad_norm": 1.0859375, "learning_rate": 0.0002975307784687851, "loss": 0.2022, "step": 318100 }, { "epoch": 13.18, "grad_norm": 1.0625, "learning_rate": 0.00029752013104281816, "loss": 0.1471, "step": 318110 }, { "epoch": 13.18, "grad_norm": 0.9140625, "learning_rate": 0.00029750948352742345, "loss": 0.2184, "step": 318120 }, { "epoch": 13.18, "grad_norm": 0.87890625, "learning_rate": 0.000297498835922621, "loss": 0.2163, "step": 318130 }, { "epoch": 13.18, "grad_norm": 0.79296875, "learning_rate": 0.00029748818822843094, "loss": 0.2198, "step": 318140 }, { "epoch": 13.18, "grad_norm": 0.53125, "learning_rate": 0.0002974775404448732, "loss": 0.2132, "step": 318150 }, { "epoch": 13.18, "grad_norm": 0.5546875, "learning_rate": 0.0002974668925719678, "loss": 0.2318, "step": 318160 }, { "epoch": 13.18, "grad_norm": 1.09375, "learning_rate": 0.0002974562446097349, "loss": 0.2227, "step": 318170 }, { "epoch": 13.18, "grad_norm": 1.4375, "learning_rate": 0.00029744559655819447, "loss": 0.1851, "step": 318180 }, { "epoch": 13.18, "grad_norm": 0.154296875, "learning_rate": 0.0002974349484173665, "loss": 0.1569, "step": 318190 }, { "epoch": 13.18, "grad_norm": 0.0, "learning_rate": 0.00029742430018727116, "loss": 0.1803, "step": 318200 }, { "epoch": 13.18, "grad_norm": 0.82421875, "learning_rate": 0.00029741365186792835, "loss": 0.1972, "step": 318210 }, { "epoch": 13.18, "grad_norm": 1.0625, "learning_rate": 0.00029740300345935825, "loss": 0.1947, "step": 318220 }, { "epoch": 13.18, "grad_norm": 0.76171875, "learning_rate": 0.0002973923549615807, "loss": 0.2016, "step": 318230 }, { "epoch": 13.18, "grad_norm": 0.8984375, "learning_rate": 0.00029738170637461585, "loss": 0.1808, "step": 318240 }, { "epoch": 13.18, "grad_norm": 0.609375, "learning_rate": 0.0002973710576984839, "loss": 0.1981, "step": 318250 }, { "epoch": 13.18, "grad_norm": 0.8671875, "learning_rate": 0.0002973604089332046, "loss": 0.1711, "step": 318260 }, { "epoch": 13.18, "grad_norm": 0.49609375, "learning_rate": 0.00029734976007879816, "loss": 0.1481, "step": 318270 }, { "epoch": 13.18, "grad_norm": 1.203125, "learning_rate": 0.0002973391111352846, "loss": 0.2179, "step": 318280 }, { "epoch": 13.18, "grad_norm": 0.609375, "learning_rate": 0.0002973284621026839, "loss": 0.1902, "step": 318290 }, { "epoch": 13.18, "grad_norm": 0.53515625, "learning_rate": 0.0002973178129810162, "loss": 0.2335, "step": 318300 }, { "epoch": 13.18, "grad_norm": 0.28515625, "learning_rate": 0.00029730716377030143, "loss": 0.1651, "step": 318310 }, { "epoch": 13.18, "grad_norm": 0.63671875, "learning_rate": 0.0002972965144705597, "loss": 0.2158, "step": 318320 }, { "epoch": 13.19, "grad_norm": 0.77734375, "learning_rate": 0.0002972858650818111, "loss": 0.211, "step": 318330 }, { "epoch": 13.19, "grad_norm": 1.296875, "learning_rate": 0.0002972752156040755, "loss": 0.2154, "step": 318340 }, { "epoch": 13.19, "grad_norm": 0.3359375, "learning_rate": 0.00029726456603737316, "loss": 0.2255, "step": 318350 }, { "epoch": 13.19, "grad_norm": 0.8828125, "learning_rate": 0.000297253916381724, "loss": 0.2137, "step": 318360 }, { "epoch": 13.19, "grad_norm": 1.046875, "learning_rate": 0.00029724326663714795, "loss": 0.2228, "step": 318370 }, { "epoch": 13.19, "grad_norm": 1.0234375, "learning_rate": 0.0002972326168036653, "loss": 0.1793, "step": 318380 }, { "epoch": 13.19, "grad_norm": 0.6015625, "learning_rate": 0.0002972219668812959, "loss": 0.2013, "step": 318390 }, { "epoch": 13.19, "grad_norm": 0.8203125, "learning_rate": 0.00029721131687005995, "loss": 0.2126, "step": 318400 }, { "epoch": 13.19, "grad_norm": 1.4765625, "learning_rate": 0.0002972006667699773, "loss": 0.1938, "step": 318410 }, { "epoch": 13.19, "grad_norm": 0.67578125, "learning_rate": 0.0002971900165810681, "loss": 0.1961, "step": 318420 }, { "epoch": 13.19, "grad_norm": 0.55078125, "learning_rate": 0.00029717936630335246, "loss": 0.1701, "step": 318430 }, { "epoch": 13.19, "grad_norm": 0.75390625, "learning_rate": 0.00029716871593685033, "loss": 0.1501, "step": 318440 }, { "epoch": 13.19, "grad_norm": 0.8828125, "learning_rate": 0.00029715806548158164, "loss": 0.2182, "step": 318450 }, { "epoch": 13.19, "grad_norm": 0.16796875, "learning_rate": 0.0002971474149375668, "loss": 0.2045, "step": 318460 }, { "epoch": 13.19, "grad_norm": 1.1796875, "learning_rate": 0.0002971367643048254, "loss": 0.2248, "step": 318470 }, { "epoch": 13.19, "grad_norm": 0.37109375, "learning_rate": 0.00029712611358337777, "loss": 0.1525, "step": 318480 }, { "epoch": 13.19, "grad_norm": 0.390625, "learning_rate": 0.0002971154627732439, "loss": 0.2185, "step": 318490 }, { "epoch": 13.19, "grad_norm": 0.58203125, "learning_rate": 0.0002971048118744438, "loss": 0.2021, "step": 318500 }, { "epoch": 13.19, "grad_norm": 1.25, "learning_rate": 0.0002970941608869976, "loss": 0.2092, "step": 318510 }, { "epoch": 13.19, "grad_norm": 1.1484375, "learning_rate": 0.00029708350981092526, "loss": 0.1796, "step": 318520 }, { "epoch": 13.19, "grad_norm": 0.98828125, "learning_rate": 0.00029707285864624667, "loss": 0.1892, "step": 318530 }, { "epoch": 13.19, "grad_norm": 1.15625, "learning_rate": 0.00029706220739298225, "loss": 0.2324, "step": 318540 }, { "epoch": 13.19, "grad_norm": 1.09375, "learning_rate": 0.0002970515560511517, "loss": 0.2338, "step": 318550 }, { "epoch": 13.19, "grad_norm": 0.84765625, "learning_rate": 0.00029704090462077525, "loss": 0.1502, "step": 318560 }, { "epoch": 13.2, "grad_norm": 0.8828125, "learning_rate": 0.000297030253101873, "loss": 0.1856, "step": 318570 }, { "epoch": 13.2, "grad_norm": 0.54296875, "learning_rate": 0.00029701960149446473, "loss": 0.1802, "step": 318580 }, { "epoch": 13.2, "grad_norm": 0.82421875, "learning_rate": 0.0002970089497985707, "loss": 0.2178, "step": 318590 }, { "epoch": 13.2, "grad_norm": 0.36328125, "learning_rate": 0.00029699829801421094, "loss": 0.1726, "step": 318600 }, { "epoch": 13.2, "grad_norm": 0.30859375, "learning_rate": 0.0002969876461414054, "loss": 0.1944, "step": 318610 }, { "epoch": 13.2, "grad_norm": 1.3125, "learning_rate": 0.0002969769941801743, "loss": 0.244, "step": 318620 }, { "epoch": 13.2, "grad_norm": 0.89453125, "learning_rate": 0.00029696634213053746, "loss": 0.1793, "step": 318630 }, { "epoch": 13.2, "grad_norm": 0.765625, "learning_rate": 0.00029695568999251506, "loss": 0.1141, "step": 318640 }, { "epoch": 13.2, "grad_norm": 1.0859375, "learning_rate": 0.00029694503776612715, "loss": 0.1999, "step": 318650 }, { "epoch": 13.2, "grad_norm": 0.78515625, "learning_rate": 0.0002969343854513937, "loss": 0.184, "step": 318660 }, { "epoch": 13.2, "grad_norm": 0.84765625, "learning_rate": 0.00029692373304833486, "loss": 0.1681, "step": 318670 }, { "epoch": 13.2, "grad_norm": 0.87109375, "learning_rate": 0.0002969130805569706, "loss": 0.2004, "step": 318680 }, { "epoch": 13.2, "grad_norm": 0.76171875, "learning_rate": 0.00029690242797732094, "loss": 0.1951, "step": 318690 }, { "epoch": 13.2, "grad_norm": 1.609375, "learning_rate": 0.0002968917753094061, "loss": 0.1639, "step": 318700 }, { "epoch": 13.2, "grad_norm": 0.78515625, "learning_rate": 0.0002968811225532459, "loss": 0.1718, "step": 318710 }, { "epoch": 13.2, "grad_norm": 1.34375, "learning_rate": 0.0002968704697088605, "loss": 0.1643, "step": 318720 }, { "epoch": 13.2, "grad_norm": 0.396484375, "learning_rate": 0.00029685981677626995, "loss": 0.1697, "step": 318730 }, { "epoch": 13.2, "grad_norm": 0.6484375, "learning_rate": 0.00029684916375549426, "loss": 0.2243, "step": 318740 }, { "epoch": 13.2, "grad_norm": 0.98046875, "learning_rate": 0.00029683851064655353, "loss": 0.1745, "step": 318750 }, { "epoch": 13.2, "grad_norm": 0.74609375, "learning_rate": 0.00029682785744946776, "loss": 0.2411, "step": 318760 }, { "epoch": 13.2, "grad_norm": 1.109375, "learning_rate": 0.00029681720416425706, "loss": 0.2046, "step": 318770 }, { "epoch": 13.2, "grad_norm": 0.73828125, "learning_rate": 0.00029680655079094144, "loss": 0.2096, "step": 318780 }, { "epoch": 13.2, "grad_norm": 0.91796875, "learning_rate": 0.00029679589732954095, "loss": 0.1979, "step": 318790 }, { "epoch": 13.2, "grad_norm": 0.6015625, "learning_rate": 0.00029678524378007554, "loss": 0.2058, "step": 318800 }, { "epoch": 13.21, "grad_norm": 0.8828125, "learning_rate": 0.0002967745901425655, "loss": 0.1746, "step": 318810 }, { "epoch": 13.21, "grad_norm": 0.59765625, "learning_rate": 0.0002967639364170306, "loss": 0.2481, "step": 318820 }, { "epoch": 13.21, "grad_norm": 0.765625, "learning_rate": 0.0002967532826034911, "loss": 0.1628, "step": 318830 }, { "epoch": 13.21, "grad_norm": 0.90625, "learning_rate": 0.00029674262870196696, "loss": 0.1978, "step": 318840 }, { "epoch": 13.21, "grad_norm": 1.2265625, "learning_rate": 0.0002967319747124782, "loss": 0.1695, "step": 318850 }, { "epoch": 13.21, "grad_norm": 1.2265625, "learning_rate": 0.000296721320635045, "loss": 0.1804, "step": 318860 }, { "epoch": 13.21, "grad_norm": 0.546875, "learning_rate": 0.0002967106664696872, "loss": 0.2202, "step": 318870 }, { "epoch": 13.21, "grad_norm": 0.498046875, "learning_rate": 0.00029670001221642503, "loss": 0.2223, "step": 318880 }, { "epoch": 13.21, "grad_norm": 1.09375, "learning_rate": 0.00029668935787527845, "loss": 0.2211, "step": 318890 }, { "epoch": 13.21, "grad_norm": 0.5078125, "learning_rate": 0.0002966787034462676, "loss": 0.2253, "step": 318900 }, { "epoch": 13.21, "grad_norm": 0.60546875, "learning_rate": 0.0002966680489294125, "loss": 0.2175, "step": 318910 }, { "epoch": 13.21, "grad_norm": 0.92578125, "learning_rate": 0.00029665739432473305, "loss": 0.2053, "step": 318920 }, { "epoch": 13.21, "grad_norm": 0.83203125, "learning_rate": 0.00029664673963224945, "loss": 0.1413, "step": 318930 }, { "epoch": 13.21, "grad_norm": 1.4921875, "learning_rate": 0.0002966360848519818, "loss": 0.2196, "step": 318940 }, { "epoch": 13.21, "grad_norm": 0.9140625, "learning_rate": 0.00029662542998395004, "loss": 0.2142, "step": 318950 }, { "epoch": 13.21, "grad_norm": 2.015625, "learning_rate": 0.00029661477502817424, "loss": 0.1935, "step": 318960 }, { "epoch": 13.21, "grad_norm": 0.7109375, "learning_rate": 0.00029660411998467444, "loss": 0.2102, "step": 318970 }, { "epoch": 13.21, "grad_norm": 0.7265625, "learning_rate": 0.0002965934648534707, "loss": 0.1674, "step": 318980 }, { "epoch": 13.21, "grad_norm": 0.7109375, "learning_rate": 0.0002965828096345832, "loss": 0.2038, "step": 318990 }, { "epoch": 13.21, "grad_norm": 0.85546875, "learning_rate": 0.00029657215432803185, "loss": 0.1836, "step": 319000 }, { "epoch": 13.21, "grad_norm": 1.40625, "learning_rate": 0.00029656149893383666, "loss": 0.191, "step": 319010 }, { "epoch": 13.21, "grad_norm": 1.3671875, "learning_rate": 0.00029655084345201784, "loss": 0.2213, "step": 319020 }, { "epoch": 13.21, "grad_norm": 0.59765625, "learning_rate": 0.00029654018788259536, "loss": 0.1817, "step": 319030 }, { "epoch": 13.21, "grad_norm": 0.494140625, "learning_rate": 0.0002965295322255892, "loss": 0.1842, "step": 319040 }, { "epoch": 13.22, "grad_norm": 1.4140625, "learning_rate": 0.00029651887648101955, "loss": 0.1568, "step": 319050 }, { "epoch": 13.22, "grad_norm": 0.91796875, "learning_rate": 0.0002965082206489064, "loss": 0.1692, "step": 319060 }, { "epoch": 13.22, "grad_norm": 0.44140625, "learning_rate": 0.00029649756472926975, "loss": 0.2393, "step": 319070 }, { "epoch": 13.22, "grad_norm": 1.09375, "learning_rate": 0.00029648690872212976, "loss": 0.1855, "step": 319080 }, { "epoch": 13.22, "grad_norm": 1.59375, "learning_rate": 0.0002964762526275063, "loss": 0.1794, "step": 319090 }, { "epoch": 13.22, "grad_norm": 0.5859375, "learning_rate": 0.0002964655964454197, "loss": 0.1461, "step": 319100 }, { "epoch": 13.22, "grad_norm": 0.78125, "learning_rate": 0.00029645494017588986, "loss": 0.2277, "step": 319110 }, { "epoch": 13.22, "grad_norm": 0.5546875, "learning_rate": 0.0002964442838189368, "loss": 0.2027, "step": 319120 }, { "epoch": 13.22, "grad_norm": 1.1484375, "learning_rate": 0.0002964336273745806, "loss": 0.1573, "step": 319130 }, { "epoch": 13.22, "grad_norm": 0.828125, "learning_rate": 0.0002964229708428413, "loss": 0.2244, "step": 319140 }, { "epoch": 13.22, "grad_norm": 0.5234375, "learning_rate": 0.00029641231422373906, "loss": 0.1825, "step": 319150 }, { "epoch": 13.22, "grad_norm": 0.62109375, "learning_rate": 0.00029640165751729377, "loss": 0.189, "step": 319160 }, { "epoch": 13.22, "grad_norm": 1.3125, "learning_rate": 0.0002963910007235256, "loss": 0.2031, "step": 319170 }, { "epoch": 13.22, "grad_norm": 0.380859375, "learning_rate": 0.0002963803438424546, "loss": 0.1659, "step": 319180 }, { "epoch": 13.22, "grad_norm": 0.62109375, "learning_rate": 0.00029636968687410083, "loss": 0.2077, "step": 319190 }, { "epoch": 13.22, "grad_norm": 1.328125, "learning_rate": 0.00029635902981848427, "loss": 0.14, "step": 319200 }, { "epoch": 13.22, "grad_norm": 0.333984375, "learning_rate": 0.00029634837267562505, "loss": 0.1581, "step": 319210 }, { "epoch": 13.22, "grad_norm": 1.4921875, "learning_rate": 0.0002963377154455431, "loss": 0.198, "step": 319220 }, { "epoch": 13.22, "grad_norm": 0.6875, "learning_rate": 0.00029632705812825865, "loss": 0.2303, "step": 319230 }, { "epoch": 13.22, "grad_norm": 1.03125, "learning_rate": 0.00029631640072379174, "loss": 0.171, "step": 319240 }, { "epoch": 13.22, "grad_norm": 1.4375, "learning_rate": 0.0002963057432321623, "loss": 0.183, "step": 319250 }, { "epoch": 13.22, "grad_norm": 1.1015625, "learning_rate": 0.0002962950856533905, "loss": 0.207, "step": 319260 }, { "epoch": 13.22, "grad_norm": 0.5859375, "learning_rate": 0.0002962844279874963, "loss": 0.2236, "step": 319270 }, { "epoch": 13.22, "grad_norm": 0.9296875, "learning_rate": 0.0002962737702344998, "loss": 0.168, "step": 319280 }, { "epoch": 13.22, "grad_norm": 1.96875, "learning_rate": 0.00029626311239442103, "loss": 0.2354, "step": 319290 }, { "epoch": 13.23, "grad_norm": 1.6796875, "learning_rate": 0.00029625245446728016, "loss": 0.1467, "step": 319300 }, { "epoch": 13.23, "grad_norm": 1.078125, "learning_rate": 0.00029624179645309714, "loss": 0.1504, "step": 319310 }, { "epoch": 13.23, "grad_norm": 0.423828125, "learning_rate": 0.00029623113835189205, "loss": 0.1664, "step": 319320 }, { "epoch": 13.23, "grad_norm": 1.4453125, "learning_rate": 0.0002962204801636849, "loss": 0.2493, "step": 319330 }, { "epoch": 13.23, "grad_norm": 1.2578125, "learning_rate": 0.00029620982188849587, "loss": 0.2061, "step": 319340 }, { "epoch": 13.23, "grad_norm": 0.419921875, "learning_rate": 0.00029619916352634495, "loss": 0.1259, "step": 319350 }, { "epoch": 13.23, "grad_norm": 1.140625, "learning_rate": 0.0002961885050772521, "loss": 0.1831, "step": 319360 }, { "epoch": 13.23, "grad_norm": 0.81640625, "learning_rate": 0.00029617784654123756, "loss": 0.1954, "step": 319370 }, { "epoch": 13.23, "grad_norm": 0.44921875, "learning_rate": 0.00029616718791832127, "loss": 0.1659, "step": 319380 }, { "epoch": 13.23, "grad_norm": 0.421875, "learning_rate": 0.00029615652920852325, "loss": 0.1518, "step": 319390 }, { "epoch": 13.23, "grad_norm": 0.58984375, "learning_rate": 0.0002961458704118637, "loss": 0.1858, "step": 319400 }, { "epoch": 13.23, "grad_norm": 0.0, "learning_rate": 0.00029613521152836264, "loss": 0.1845, "step": 319410 }, { "epoch": 13.23, "grad_norm": 0.8515625, "learning_rate": 0.00029612455255804005, "loss": 0.1854, "step": 319420 }, { "epoch": 13.23, "grad_norm": 0.859375, "learning_rate": 0.00029611389350091613, "loss": 0.173, "step": 319430 }, { "epoch": 13.23, "grad_norm": 0.7265625, "learning_rate": 0.00029610323435701067, "loss": 0.223, "step": 319440 }, { "epoch": 13.23, "grad_norm": 0.63671875, "learning_rate": 0.000296092575126344, "loss": 0.146, "step": 319450 }, { "epoch": 13.23, "grad_norm": 1.21875, "learning_rate": 0.0002960819158089361, "loss": 0.1573, "step": 319460 }, { "epoch": 13.23, "grad_norm": 0.5, "learning_rate": 0.0002960712564048069, "loss": 0.1838, "step": 319470 }, { "epoch": 13.23, "grad_norm": 0.53515625, "learning_rate": 0.0002960605969139767, "loss": 0.188, "step": 319480 }, { "epoch": 13.23, "grad_norm": 0.81640625, "learning_rate": 0.00029604993733646533, "loss": 0.2234, "step": 319490 }, { "epoch": 13.23, "grad_norm": 1.09375, "learning_rate": 0.000296039277672293, "loss": 0.1746, "step": 319500 }, { "epoch": 13.23, "grad_norm": 1.8203125, "learning_rate": 0.0002960286179214797, "loss": 0.2059, "step": 319510 }, { "epoch": 13.23, "grad_norm": 0.1962890625, "learning_rate": 0.0002960179580840455, "loss": 0.1792, "step": 319520 }, { "epoch": 13.23, "grad_norm": 0.6640625, "learning_rate": 0.00029600729816001056, "loss": 0.2205, "step": 319530 }, { "epoch": 13.24, "grad_norm": 0.0, "learning_rate": 0.00029599663814939474, "loss": 0.1576, "step": 319540 }, { "epoch": 13.24, "grad_norm": 0.921875, "learning_rate": 0.0002959859780522183, "loss": 0.1585, "step": 319550 }, { "epoch": 13.24, "grad_norm": 0.87109375, "learning_rate": 0.00029597531786850124, "loss": 0.1983, "step": 319560 }, { "epoch": 13.24, "grad_norm": 0.482421875, "learning_rate": 0.0002959646575982635, "loss": 0.1883, "step": 319570 }, { "epoch": 13.24, "grad_norm": 1.2421875, "learning_rate": 0.00029595399724152526, "loss": 0.1615, "step": 319580 }, { "epoch": 13.24, "grad_norm": 0.89453125, "learning_rate": 0.00029594333679830665, "loss": 0.1109, "step": 319590 }, { "epoch": 13.24, "grad_norm": 0.8125, "learning_rate": 0.00029593267626862747, "loss": 0.1972, "step": 319600 }, { "epoch": 13.24, "grad_norm": 0.455078125, "learning_rate": 0.0002959220156525081, "loss": 0.2223, "step": 319610 }, { "epoch": 13.24, "grad_norm": 1.15625, "learning_rate": 0.0002959113549499684, "loss": 0.1847, "step": 319620 }, { "epoch": 13.24, "grad_norm": 0.55859375, "learning_rate": 0.0002959006941610285, "loss": 0.2027, "step": 319630 }, { "epoch": 13.24, "grad_norm": 1.1484375, "learning_rate": 0.00029589003328570843, "loss": 0.193, "step": 319640 }, { "epoch": 13.24, "grad_norm": 0.66796875, "learning_rate": 0.0002958793723240283, "loss": 0.2008, "step": 319650 }, { "epoch": 13.24, "grad_norm": 0.76953125, "learning_rate": 0.0002958687112760081, "loss": 0.226, "step": 319660 }, { "epoch": 13.24, "grad_norm": 0.56640625, "learning_rate": 0.000295858050141668, "loss": 0.1976, "step": 319670 }, { "epoch": 13.24, "grad_norm": 0.57421875, "learning_rate": 0.000295847388921028, "loss": 0.1348, "step": 319680 }, { "epoch": 13.24, "grad_norm": 0.671875, "learning_rate": 0.00029583672761410813, "loss": 0.1995, "step": 319690 }, { "epoch": 13.24, "grad_norm": 0.060546875, "learning_rate": 0.0002958260662209285, "loss": 0.1315, "step": 319700 }, { "epoch": 13.24, "grad_norm": 0.490234375, "learning_rate": 0.00029581540474150913, "loss": 0.1759, "step": 319710 }, { "epoch": 13.24, "grad_norm": 0.7734375, "learning_rate": 0.00029580474317587017, "loss": 0.2241, "step": 319720 }, { "epoch": 13.24, "grad_norm": 0.1630859375, "learning_rate": 0.0002957940815240316, "loss": 0.1568, "step": 319730 }, { "epoch": 13.24, "grad_norm": 0.8515625, "learning_rate": 0.00029578341978601355, "loss": 0.1842, "step": 319740 }, { "epoch": 13.24, "grad_norm": 0.84765625, "learning_rate": 0.000295772757961836, "loss": 0.1917, "step": 319750 }, { "epoch": 13.24, "grad_norm": 0.6328125, "learning_rate": 0.00029576209605151904, "loss": 0.1549, "step": 319760 }, { "epoch": 13.24, "grad_norm": 1.2890625, "learning_rate": 0.00029575143405508286, "loss": 0.2758, "step": 319770 }, { "epoch": 13.25, "grad_norm": 0.5703125, "learning_rate": 0.00029574077197254737, "loss": 0.1959, "step": 319780 }, { "epoch": 13.25, "grad_norm": 2.234375, "learning_rate": 0.00029573010980393267, "loss": 0.1969, "step": 319790 }, { "epoch": 13.25, "grad_norm": 0.5625, "learning_rate": 0.0002957194475492589, "loss": 0.1329, "step": 319800 }, { "epoch": 13.25, "grad_norm": 0.87109375, "learning_rate": 0.00029570878520854597, "loss": 0.2427, "step": 319810 }, { "epoch": 13.25, "grad_norm": 0.84765625, "learning_rate": 0.00029569812278181407, "loss": 0.2043, "step": 319820 }, { "epoch": 13.25, "grad_norm": 0.0, "learning_rate": 0.00029568746026908325, "loss": 0.1874, "step": 319830 }, { "epoch": 13.25, "grad_norm": 0.765625, "learning_rate": 0.0002956767976703736, "loss": 0.1761, "step": 319840 }, { "epoch": 13.25, "grad_norm": 1.3671875, "learning_rate": 0.00029566613498570516, "loss": 0.234, "step": 319850 }, { "epoch": 13.25, "grad_norm": 0.7109375, "learning_rate": 0.00029565547221509794, "loss": 0.1769, "step": 319860 }, { "epoch": 13.25, "grad_norm": 1.40625, "learning_rate": 0.00029564480935857204, "loss": 0.2056, "step": 319870 }, { "epoch": 13.25, "grad_norm": 0.97265625, "learning_rate": 0.00029563414641614757, "loss": 0.1897, "step": 319880 }, { "epoch": 13.25, "grad_norm": 0.40234375, "learning_rate": 0.0002956234833878445, "loss": 0.1786, "step": 319890 }, { "epoch": 13.25, "grad_norm": 0.70703125, "learning_rate": 0.000295612820273683, "loss": 0.234, "step": 319900 }, { "epoch": 13.25, "grad_norm": 0.46875, "learning_rate": 0.0002956021570736831, "loss": 0.1645, "step": 319910 }, { "epoch": 13.25, "grad_norm": 1.28125, "learning_rate": 0.00029559149378786487, "loss": 0.2482, "step": 319920 }, { "epoch": 13.25, "grad_norm": 0.6484375, "learning_rate": 0.00029558083041624845, "loss": 0.2215, "step": 319930 }, { "epoch": 13.25, "grad_norm": 0.64453125, "learning_rate": 0.00029557016695885374, "loss": 0.202, "step": 319940 }, { "epoch": 13.25, "grad_norm": 1.09375, "learning_rate": 0.00029555950341570087, "loss": 0.1996, "step": 319950 }, { "epoch": 13.25, "grad_norm": 1.125, "learning_rate": 0.00029554883978681, "loss": 0.1793, "step": 319960 }, { "epoch": 13.25, "grad_norm": 0.0, "learning_rate": 0.00029553817607220106, "loss": 0.1766, "step": 319970 }, { "epoch": 13.25, "grad_norm": 0.6796875, "learning_rate": 0.0002955275122718943, "loss": 0.2029, "step": 319980 }, { "epoch": 13.25, "grad_norm": 1.0, "learning_rate": 0.0002955168483859096, "loss": 0.1829, "step": 319990 }, { "epoch": 13.25, "grad_norm": 1.046875, "learning_rate": 0.00029550618441426713, "loss": 0.2073, "step": 320000 }, { "epoch": 13.25, "grad_norm": 1.09375, "learning_rate": 0.00029549552035698694, "loss": 0.1759, "step": 320010 }, { "epoch": 13.26, "grad_norm": 0.65234375, "learning_rate": 0.0002954848562140891, "loss": 0.2168, "step": 320020 }, { "epoch": 13.26, "grad_norm": 0.9609375, "learning_rate": 0.00029547419198559365, "loss": 0.1665, "step": 320030 }, { "epoch": 13.26, "grad_norm": 1.921875, "learning_rate": 0.0002954635276715207, "loss": 0.2167, "step": 320040 }, { "epoch": 13.26, "grad_norm": 0.92578125, "learning_rate": 0.00029545286327189025, "loss": 0.2334, "step": 320050 }, { "epoch": 13.26, "grad_norm": 0.58203125, "learning_rate": 0.0002954421987867225, "loss": 0.2019, "step": 320060 }, { "epoch": 13.26, "grad_norm": 1.8203125, "learning_rate": 0.00029543153421603737, "loss": 0.1731, "step": 320070 }, { "epoch": 13.26, "grad_norm": 0.58203125, "learning_rate": 0.000295420869559855, "loss": 0.1734, "step": 320080 }, { "epoch": 13.26, "grad_norm": 1.0078125, "learning_rate": 0.0002954102048181955, "loss": 0.1939, "step": 320090 }, { "epoch": 13.26, "grad_norm": 1.5859375, "learning_rate": 0.00029539953999107893, "loss": 0.2015, "step": 320100 }, { "epoch": 13.26, "grad_norm": 0.46875, "learning_rate": 0.00029538887507852527, "loss": 0.175, "step": 320110 }, { "epoch": 13.26, "grad_norm": 0.83984375, "learning_rate": 0.0002953782100805547, "loss": 0.1772, "step": 320120 }, { "epoch": 13.26, "grad_norm": 0.51171875, "learning_rate": 0.0002953675449971871, "loss": 0.1932, "step": 320130 }, { "epoch": 13.26, "grad_norm": 1.5625, "learning_rate": 0.00029535687982844283, "loss": 0.2073, "step": 320140 }, { "epoch": 13.26, "grad_norm": 0.91796875, "learning_rate": 0.00029534621457434173, "loss": 0.1872, "step": 320150 }, { "epoch": 13.26, "grad_norm": 1.203125, "learning_rate": 0.00029533554923490395, "loss": 0.1635, "step": 320160 }, { "epoch": 13.26, "grad_norm": 1.078125, "learning_rate": 0.00029532488381014965, "loss": 0.1751, "step": 320170 }, { "epoch": 13.26, "grad_norm": 0.7578125, "learning_rate": 0.0002953142183000987, "loss": 0.1883, "step": 320180 }, { "epoch": 13.26, "grad_norm": 1.765625, "learning_rate": 0.0002953035527047714, "loss": 0.1477, "step": 320190 }, { "epoch": 13.26, "grad_norm": 0.60546875, "learning_rate": 0.0002952928870241876, "loss": 0.1689, "step": 320200 }, { "epoch": 13.26, "grad_norm": 0.11962890625, "learning_rate": 0.00029528222125836754, "loss": 0.1764, "step": 320210 }, { "epoch": 13.26, "grad_norm": 0.8515625, "learning_rate": 0.00029527155540733124, "loss": 0.205, "step": 320220 }, { "epoch": 13.26, "grad_norm": 1.1328125, "learning_rate": 0.00029526088947109876, "loss": 0.1701, "step": 320230 }, { "epoch": 13.26, "grad_norm": 0.66015625, "learning_rate": 0.0002952502234496901, "loss": 0.2078, "step": 320240 }, { "epoch": 13.26, "grad_norm": 1.140625, "learning_rate": 0.0002952395573431255, "loss": 0.1986, "step": 320250 }, { "epoch": 13.27, "grad_norm": 0.4609375, "learning_rate": 0.0002952288911514248, "loss": 0.1665, "step": 320260 }, { "epoch": 13.27, "grad_norm": 1.1171875, "learning_rate": 0.0002952182248746084, "loss": 0.2173, "step": 320270 }, { "epoch": 13.27, "grad_norm": 1.7734375, "learning_rate": 0.0002952075585126961, "loss": 0.167, "step": 320280 }, { "epoch": 13.27, "grad_norm": 0.427734375, "learning_rate": 0.000295196892065708, "loss": 0.1221, "step": 320290 }, { "epoch": 13.27, "grad_norm": 1.125, "learning_rate": 0.0002951862255336643, "loss": 0.16, "step": 320300 }, { "epoch": 13.27, "grad_norm": 0.8515625, "learning_rate": 0.000295175558916585, "loss": 0.2178, "step": 320310 }, { "epoch": 13.27, "grad_norm": 1.2421875, "learning_rate": 0.0002951648922144901, "loss": 0.1608, "step": 320320 }, { "epoch": 13.27, "grad_norm": 1.0234375, "learning_rate": 0.0002951542254273999, "loss": 0.2307, "step": 320330 }, { "epoch": 13.27, "grad_norm": 1.125, "learning_rate": 0.00029514355855533414, "loss": 0.2013, "step": 320340 }, { "epoch": 13.27, "grad_norm": 0.62109375, "learning_rate": 0.00029513289159831317, "loss": 0.1841, "step": 320350 }, { "epoch": 13.27, "grad_norm": 0.5234375, "learning_rate": 0.000295122224556357, "loss": 0.2005, "step": 320360 }, { "epoch": 13.27, "grad_norm": 0.51171875, "learning_rate": 0.00029511155742948563, "loss": 0.2227, "step": 320370 }, { "epoch": 13.27, "grad_norm": 1.9921875, "learning_rate": 0.0002951008902177192, "loss": 0.1995, "step": 320380 }, { "epoch": 13.27, "grad_norm": 1.7578125, "learning_rate": 0.0002950902229210778, "loss": 0.1858, "step": 320390 }, { "epoch": 13.27, "grad_norm": 1.0078125, "learning_rate": 0.0002950795555395814, "loss": 0.1982, "step": 320400 }, { "epoch": 13.27, "grad_norm": 1.4296875, "learning_rate": 0.00029506888807325014, "loss": 0.1983, "step": 320410 }, { "epoch": 13.27, "grad_norm": 1.4765625, "learning_rate": 0.0002950582205221041, "loss": 0.1966, "step": 320420 }, { "epoch": 13.27, "grad_norm": 0.357421875, "learning_rate": 0.0002950475528861635, "loss": 0.1949, "step": 320430 }, { "epoch": 13.27, "grad_norm": 1.0625, "learning_rate": 0.00029503688516544814, "loss": 0.1771, "step": 320440 }, { "epoch": 13.27, "grad_norm": 0.890625, "learning_rate": 0.00029502621735997826, "loss": 0.1777, "step": 320450 }, { "epoch": 13.27, "grad_norm": 0.3984375, "learning_rate": 0.00029501554946977386, "loss": 0.1502, "step": 320460 }, { "epoch": 13.27, "grad_norm": 0.6171875, "learning_rate": 0.0002950048814948551, "loss": 0.1508, "step": 320470 }, { "epoch": 13.27, "grad_norm": 0.66796875, "learning_rate": 0.000294994213435242, "loss": 0.201, "step": 320480 }, { "epoch": 13.27, "grad_norm": 0.515625, "learning_rate": 0.0002949835452909546, "loss": 0.1931, "step": 320490 }, { "epoch": 13.28, "grad_norm": 0.7109375, "learning_rate": 0.0002949728770620131, "loss": 0.1754, "step": 320500 }, { "epoch": 13.28, "grad_norm": 0.91015625, "learning_rate": 0.0002949622087484375, "loss": 0.2308, "step": 320510 }, { "epoch": 13.28, "grad_norm": 0.470703125, "learning_rate": 0.00029495154035024784, "loss": 0.1547, "step": 320520 }, { "epoch": 13.28, "grad_norm": 1.5703125, "learning_rate": 0.00029494087186746423, "loss": 0.1979, "step": 320530 }, { "epoch": 13.28, "grad_norm": 0.93359375, "learning_rate": 0.0002949302033001068, "loss": 0.1697, "step": 320540 }, { "epoch": 13.28, "grad_norm": 3.28125, "learning_rate": 0.00029491953464819555, "loss": 0.2097, "step": 320550 }, { "epoch": 13.28, "grad_norm": 1.1640625, "learning_rate": 0.0002949088659117506, "loss": 0.1601, "step": 320560 }, { "epoch": 13.28, "grad_norm": 1.2578125, "learning_rate": 0.00029489819709079196, "loss": 0.1984, "step": 320570 }, { "epoch": 13.28, "grad_norm": 1.2734375, "learning_rate": 0.00029488752818533984, "loss": 0.1998, "step": 320580 }, { "epoch": 13.28, "grad_norm": 0.4453125, "learning_rate": 0.00029487685919541416, "loss": 0.1502, "step": 320590 }, { "epoch": 13.28, "grad_norm": 0.54296875, "learning_rate": 0.00029486619012103515, "loss": 0.1831, "step": 320600 }, { "epoch": 13.28, "grad_norm": 1.171875, "learning_rate": 0.0002948555209622228, "loss": 0.2143, "step": 320610 }, { "epoch": 13.28, "grad_norm": 1.953125, "learning_rate": 0.00029484485171899714, "loss": 0.1635, "step": 320620 }, { "epoch": 13.28, "grad_norm": 0.4453125, "learning_rate": 0.0002948341823913784, "loss": 0.2243, "step": 320630 }, { "epoch": 13.28, "grad_norm": 0.96484375, "learning_rate": 0.00029482351297938655, "loss": 0.1805, "step": 320640 }, { "epoch": 13.28, "grad_norm": 0.5390625, "learning_rate": 0.00029481284348304164, "loss": 0.1541, "step": 320650 }, { "epoch": 13.28, "grad_norm": 1.046875, "learning_rate": 0.0002948021739023638, "loss": 0.1826, "step": 320660 }, { "epoch": 13.28, "grad_norm": 0.39453125, "learning_rate": 0.00029479150423737324, "loss": 0.1971, "step": 320670 }, { "epoch": 13.28, "grad_norm": 0.41015625, "learning_rate": 0.0002947808344880897, "loss": 0.215, "step": 320680 }, { "epoch": 13.28, "grad_norm": 0.59375, "learning_rate": 0.0002947701646545336, "loss": 0.1875, "step": 320690 }, { "epoch": 13.28, "grad_norm": 0.6953125, "learning_rate": 0.0002947594947367248, "loss": 0.1652, "step": 320700 }, { "epoch": 13.28, "grad_norm": 0.0, "learning_rate": 0.0002947488247346836, "loss": 0.2134, "step": 320710 }, { "epoch": 13.28, "grad_norm": 0.859375, "learning_rate": 0.00029473815464842987, "loss": 0.1979, "step": 320720 }, { "epoch": 13.28, "grad_norm": 0.83203125, "learning_rate": 0.0002947274844779837, "loss": 0.1965, "step": 320730 }, { "epoch": 13.29, "grad_norm": 0.5390625, "learning_rate": 0.00029471681422336526, "loss": 0.2085, "step": 320740 }, { "epoch": 13.29, "grad_norm": 0.82421875, "learning_rate": 0.00029470614388459466, "loss": 0.1741, "step": 320750 }, { "epoch": 13.29, "grad_norm": 0.90234375, "learning_rate": 0.00029469547346169193, "loss": 0.1966, "step": 320760 }, { "epoch": 13.29, "grad_norm": 1.0078125, "learning_rate": 0.0002946848029546771, "loss": 0.2679, "step": 320770 }, { "epoch": 13.29, "grad_norm": 0.46484375, "learning_rate": 0.00029467413236357033, "loss": 0.1849, "step": 320780 }, { "epoch": 13.29, "grad_norm": 1.0, "learning_rate": 0.00029466346168839164, "loss": 0.2098, "step": 320790 }, { "epoch": 13.29, "grad_norm": 0.5859375, "learning_rate": 0.00029465279092916115, "loss": 0.2144, "step": 320800 }, { "epoch": 13.29, "grad_norm": 0.7578125, "learning_rate": 0.0002946421200858989, "loss": 0.196, "step": 320810 }, { "epoch": 13.29, "grad_norm": 1.8359375, "learning_rate": 0.00029463144915862507, "loss": 0.1952, "step": 320820 }, { "epoch": 13.29, "grad_norm": 1.1953125, "learning_rate": 0.0002946207781473596, "loss": 0.2189, "step": 320830 }, { "epoch": 13.29, "grad_norm": 1.5625, "learning_rate": 0.0002946101070521227, "loss": 0.2217, "step": 320840 }, { "epoch": 13.29, "grad_norm": 0.734375, "learning_rate": 0.00029459943587293436, "loss": 0.1564, "step": 320850 }, { "epoch": 13.29, "grad_norm": 0.984375, "learning_rate": 0.00029458876460981473, "loss": 0.1913, "step": 320860 }, { "epoch": 13.29, "grad_norm": 0.82421875, "learning_rate": 0.0002945780932627838, "loss": 0.2351, "step": 320870 }, { "epoch": 13.29, "grad_norm": 1.4921875, "learning_rate": 0.0002945674218318618, "loss": 0.2315, "step": 320880 }, { "epoch": 13.29, "grad_norm": 0.9375, "learning_rate": 0.00029455675031706864, "loss": 0.1937, "step": 320890 }, { "epoch": 13.29, "grad_norm": 0.359375, "learning_rate": 0.00029454607871842454, "loss": 0.1995, "step": 320900 }, { "epoch": 13.29, "grad_norm": 1.1875, "learning_rate": 0.00029453540703594945, "loss": 0.1634, "step": 320910 }, { "epoch": 13.29, "grad_norm": 1.109375, "learning_rate": 0.0002945247352696637, "loss": 0.1702, "step": 320920 }, { "epoch": 13.29, "grad_norm": 0.93359375, "learning_rate": 0.000294514063419587, "loss": 0.1214, "step": 320930 }, { "epoch": 13.29, "grad_norm": 3.265625, "learning_rate": 0.0002945033914857398, "loss": 0.1631, "step": 320940 }, { "epoch": 13.29, "grad_norm": 0.296875, "learning_rate": 0.000294492719468142, "loss": 0.1938, "step": 320950 }, { "epoch": 13.29, "grad_norm": 0.6171875, "learning_rate": 0.0002944820473668136, "loss": 0.2198, "step": 320960 }, { "epoch": 13.29, "grad_norm": 0.65625, "learning_rate": 0.0002944713751817749, "loss": 0.2179, "step": 320970 }, { "epoch": 13.29, "grad_norm": 0.7421875, "learning_rate": 0.0002944607029130458, "loss": 0.1341, "step": 320980 }, { "epoch": 13.3, "grad_norm": 1.296875, "learning_rate": 0.0002944500305606465, "loss": 0.2132, "step": 320990 }, { "epoch": 13.3, "grad_norm": 1.8125, "learning_rate": 0.0002944393581245971, "loss": 0.1687, "step": 321000 }, { "epoch": 13.3, "grad_norm": 1.0234375, "learning_rate": 0.0002944286856049175, "loss": 0.2023, "step": 321010 }, { "epoch": 13.3, "grad_norm": 0.53125, "learning_rate": 0.000294418013001628, "loss": 0.1596, "step": 321020 }, { "epoch": 13.3, "grad_norm": 1.25, "learning_rate": 0.0002944073403147486, "loss": 0.1533, "step": 321030 }, { "epoch": 13.3, "grad_norm": 0.69140625, "learning_rate": 0.0002943966675442993, "loss": 0.2509, "step": 321040 }, { "epoch": 13.3, "grad_norm": 0.3046875, "learning_rate": 0.0002943859946903004, "loss": 0.2031, "step": 321050 }, { "epoch": 13.3, "grad_norm": 1.1015625, "learning_rate": 0.0002943753217527717, "loss": 0.2345, "step": 321060 }, { "epoch": 13.3, "grad_norm": 0.93359375, "learning_rate": 0.0002943646487317335, "loss": 0.1841, "step": 321070 }, { "epoch": 13.3, "grad_norm": 0.9140625, "learning_rate": 0.0002943539756272059, "loss": 0.1601, "step": 321080 }, { "epoch": 13.3, "grad_norm": 1.8984375, "learning_rate": 0.0002943433024392088, "loss": 0.2071, "step": 321090 }, { "epoch": 13.3, "grad_norm": 0.6953125, "learning_rate": 0.00029433262916776245, "loss": 0.118, "step": 321100 }, { "epoch": 13.3, "grad_norm": 0.80078125, "learning_rate": 0.0002943219558128869, "loss": 0.162, "step": 321110 }, { "epoch": 13.3, "grad_norm": 1.0390625, "learning_rate": 0.00029431128237460206, "loss": 0.1573, "step": 321120 }, { "epoch": 13.3, "grad_norm": 0.50390625, "learning_rate": 0.00029430060885292836, "loss": 0.1813, "step": 321130 }, { "epoch": 13.3, "grad_norm": 0.84765625, "learning_rate": 0.00029428993524788563, "loss": 0.1723, "step": 321140 }, { "epoch": 13.3, "grad_norm": 0.63671875, "learning_rate": 0.00029427926155949405, "loss": 0.1372, "step": 321150 }, { "epoch": 13.3, "grad_norm": 0.86328125, "learning_rate": 0.0002942685877877737, "loss": 0.2303, "step": 321160 }, { "epoch": 13.3, "grad_norm": 1.3203125, "learning_rate": 0.0002942579139327445, "loss": 0.2424, "step": 321170 }, { "epoch": 13.3, "grad_norm": 0.62109375, "learning_rate": 0.0002942472399944268, "loss": 0.1514, "step": 321180 }, { "epoch": 13.3, "grad_norm": 1.0078125, "learning_rate": 0.0002942365659728406, "loss": 0.2127, "step": 321190 }, { "epoch": 13.3, "grad_norm": 0.5703125, "learning_rate": 0.00029422589186800584, "loss": 0.1572, "step": 321200 }, { "epoch": 13.3, "grad_norm": 0.84765625, "learning_rate": 0.0002942152176799429, "loss": 0.166, "step": 321210 }, { "epoch": 13.3, "grad_norm": 0.333984375, "learning_rate": 0.0002942045434086715, "loss": 0.146, "step": 321220 }, { "epoch": 13.31, "grad_norm": 0.66015625, "learning_rate": 0.0002941938690542121, "loss": 0.1462, "step": 321230 }, { "epoch": 13.31, "grad_norm": 0.71875, "learning_rate": 0.00029418319461658457, "loss": 0.1594, "step": 321240 }, { "epoch": 13.31, "grad_norm": 0.8359375, "learning_rate": 0.0002941725200958089, "loss": 0.1825, "step": 321250 }, { "epoch": 13.31, "grad_norm": 0.90625, "learning_rate": 0.0002941618454919055, "loss": 0.1951, "step": 321260 }, { "epoch": 13.31, "grad_norm": 0.85546875, "learning_rate": 0.00029415117080489425, "loss": 0.2325, "step": 321270 }, { "epoch": 13.31, "grad_norm": 0.61328125, "learning_rate": 0.0002941404960347951, "loss": 0.2283, "step": 321280 }, { "epoch": 13.31, "grad_norm": 0.80078125, "learning_rate": 0.00029412982118162846, "loss": 0.1871, "step": 321290 }, { "epoch": 13.31, "grad_norm": 0.953125, "learning_rate": 0.00029411914624541424, "loss": 0.2116, "step": 321300 }, { "epoch": 13.31, "grad_norm": 0.7734375, "learning_rate": 0.0002941084712261725, "loss": 0.1933, "step": 321310 }, { "epoch": 13.31, "grad_norm": 1.03125, "learning_rate": 0.00029409779612392346, "loss": 0.1654, "step": 321320 }, { "epoch": 13.31, "grad_norm": 2.078125, "learning_rate": 0.000294087120938687, "loss": 0.2027, "step": 321330 }, { "epoch": 13.31, "grad_norm": 1.4296875, "learning_rate": 0.0002940764456704835, "loss": 0.1428, "step": 321340 }, { "epoch": 13.31, "grad_norm": 1.015625, "learning_rate": 0.0002940657703193328, "loss": 0.2128, "step": 321350 }, { "epoch": 13.31, "grad_norm": 1.1796875, "learning_rate": 0.0002940550948852551, "loss": 0.2419, "step": 321360 }, { "epoch": 13.31, "grad_norm": 0.68359375, "learning_rate": 0.0002940444193682704, "loss": 0.2257, "step": 321370 }, { "epoch": 13.31, "grad_norm": 0.484375, "learning_rate": 0.00029403374376839894, "loss": 0.162, "step": 321380 }, { "epoch": 13.31, "grad_norm": 0.6640625, "learning_rate": 0.0002940230680856607, "loss": 0.1662, "step": 321390 }, { "epoch": 13.31, "grad_norm": 0.5546875, "learning_rate": 0.0002940123923200758, "loss": 0.1941, "step": 321400 }, { "epoch": 13.31, "grad_norm": 0.458984375, "learning_rate": 0.0002940017164716643, "loss": 0.2027, "step": 321410 }, { "epoch": 13.31, "grad_norm": 0.52734375, "learning_rate": 0.0002939910405404464, "loss": 0.2199, "step": 321420 }, { "epoch": 13.31, "grad_norm": 0.93359375, "learning_rate": 0.0002939803645264421, "loss": 0.1748, "step": 321430 }, { "epoch": 13.31, "grad_norm": 1.3828125, "learning_rate": 0.00029396968842967144, "loss": 0.2032, "step": 321440 }, { "epoch": 13.31, "grad_norm": 0.50390625, "learning_rate": 0.00029395901225015466, "loss": 0.2287, "step": 321450 }, { "epoch": 13.31, "grad_norm": 0.6015625, "learning_rate": 0.00029394833598791163, "loss": 0.2009, "step": 321460 }, { "epoch": 13.32, "grad_norm": 1.1171875, "learning_rate": 0.00029393765964296273, "loss": 0.2002, "step": 321470 }, { "epoch": 13.32, "grad_norm": 0.77734375, "learning_rate": 0.00029392698321532786, "loss": 0.1458, "step": 321480 }, { "epoch": 13.32, "grad_norm": 0.765625, "learning_rate": 0.000293916306705027, "loss": 0.1852, "step": 321490 }, { "epoch": 13.32, "grad_norm": 0.498046875, "learning_rate": 0.0002939056301120805, "loss": 0.1869, "step": 321500 }, { "epoch": 13.32, "grad_norm": 0.494140625, "learning_rate": 0.0002938949534365084, "loss": 0.1375, "step": 321510 }, { "epoch": 13.32, "grad_norm": 0.9375, "learning_rate": 0.00029388427667833075, "loss": 0.2319, "step": 321520 }, { "epoch": 13.32, "grad_norm": 0.62890625, "learning_rate": 0.0002938735998375676, "loss": 0.1904, "step": 321530 }, { "epoch": 13.32, "grad_norm": 0.6171875, "learning_rate": 0.000293862922914239, "loss": 0.2285, "step": 321540 }, { "epoch": 13.32, "grad_norm": 0.94140625, "learning_rate": 0.00029385224590836517, "loss": 0.1859, "step": 321550 }, { "epoch": 13.32, "grad_norm": 1.234375, "learning_rate": 0.00029384156881996615, "loss": 0.1936, "step": 321560 }, { "epoch": 13.32, "grad_norm": 0.92578125, "learning_rate": 0.000293830891649062, "loss": 0.1914, "step": 321570 }, { "epoch": 13.32, "grad_norm": 1.1796875, "learning_rate": 0.0002938202143956729, "loss": 0.1864, "step": 321580 }, { "epoch": 13.32, "grad_norm": 1.546875, "learning_rate": 0.00029380953705981884, "loss": 0.1808, "step": 321590 }, { "epoch": 13.32, "grad_norm": 0.96484375, "learning_rate": 0.00029379885964152, "loss": 0.2014, "step": 321600 }, { "epoch": 13.32, "grad_norm": 0.640625, "learning_rate": 0.00029378818214079644, "loss": 0.1582, "step": 321610 }, { "epoch": 13.32, "grad_norm": 0.8984375, "learning_rate": 0.0002937775045576682, "loss": 0.1796, "step": 321620 }, { "epoch": 13.32, "grad_norm": 0.53515625, "learning_rate": 0.0002937668268921555, "loss": 0.176, "step": 321630 }, { "epoch": 13.32, "grad_norm": 1.765625, "learning_rate": 0.00029375614914427836, "loss": 0.2038, "step": 321640 }, { "epoch": 13.32, "grad_norm": 0.462890625, "learning_rate": 0.0002937454713140567, "loss": 0.175, "step": 321650 }, { "epoch": 13.32, "grad_norm": 0.96484375, "learning_rate": 0.00029373479340151097, "loss": 0.1781, "step": 321660 }, { "epoch": 13.32, "grad_norm": 1.0234375, "learning_rate": 0.000293724115406661, "loss": 0.1769, "step": 321670 }, { "epoch": 13.32, "grad_norm": 0.63671875, "learning_rate": 0.000293713437329527, "loss": 0.2058, "step": 321680 }, { "epoch": 13.32, "grad_norm": 0.396484375, "learning_rate": 0.00029370275917012907, "loss": 0.1831, "step": 321690 }, { "epoch": 13.32, "grad_norm": 0.6171875, "learning_rate": 0.0002936920809284872, "loss": 0.1945, "step": 321700 }, { "epoch": 13.33, "grad_norm": 0.5390625, "learning_rate": 0.0002936814026046216, "loss": 0.1635, "step": 321710 }, { "epoch": 13.33, "grad_norm": 0.7265625, "learning_rate": 0.00029367072419855225, "loss": 0.2106, "step": 321720 }, { "epoch": 13.33, "grad_norm": 1.078125, "learning_rate": 0.0002936600457102993, "loss": 0.2364, "step": 321730 }, { "epoch": 13.33, "grad_norm": 3.796875, "learning_rate": 0.000293649367139883, "loss": 0.2149, "step": 321740 }, { "epoch": 13.33, "grad_norm": 1.1015625, "learning_rate": 0.0002936386884873232, "loss": 0.1762, "step": 321750 }, { "epoch": 13.33, "grad_norm": 0.474609375, "learning_rate": 0.0002936280097526401, "loss": 0.1645, "step": 321760 }, { "epoch": 13.33, "grad_norm": 0.427734375, "learning_rate": 0.00029361733093585384, "loss": 0.2157, "step": 321770 }, { "epoch": 13.33, "grad_norm": 0.9921875, "learning_rate": 0.00029360665203698443, "loss": 0.2056, "step": 321780 }, { "epoch": 13.33, "grad_norm": 0.95703125, "learning_rate": 0.00029359597305605203, "loss": 0.2188, "step": 321790 }, { "epoch": 13.33, "grad_norm": 0.70703125, "learning_rate": 0.00029358529399307664, "loss": 0.2163, "step": 321800 }, { "epoch": 13.33, "grad_norm": 0.7734375, "learning_rate": 0.0002935746148480785, "loss": 0.1585, "step": 321810 }, { "epoch": 13.33, "grad_norm": 0.67578125, "learning_rate": 0.00029356393562107773, "loss": 0.2072, "step": 321820 }, { "epoch": 13.33, "grad_norm": 0.63671875, "learning_rate": 0.0002935532563120942, "loss": 0.2451, "step": 321830 }, { "epoch": 13.33, "grad_norm": 0.337890625, "learning_rate": 0.0002935425769211482, "loss": 0.2178, "step": 321840 }, { "epoch": 13.33, "grad_norm": 0.8046875, "learning_rate": 0.00029353189744825984, "loss": 0.2017, "step": 321850 }, { "epoch": 13.33, "grad_norm": 1.03125, "learning_rate": 0.00029352121789344904, "loss": 0.1675, "step": 321860 }, { "epoch": 13.33, "grad_norm": 2.9375, "learning_rate": 0.0002935105382567361, "loss": 0.2047, "step": 321870 }, { "epoch": 13.33, "grad_norm": 1.1953125, "learning_rate": 0.00029349985853814093, "loss": 0.1516, "step": 321880 }, { "epoch": 13.33, "grad_norm": 0.828125, "learning_rate": 0.00029348917873768375, "loss": 0.2494, "step": 321890 }, { "epoch": 13.33, "grad_norm": 0.65625, "learning_rate": 0.0002934784988553847, "loss": 0.1607, "step": 321900 }, { "epoch": 13.33, "grad_norm": 0.90234375, "learning_rate": 0.0002934678188912637, "loss": 0.2774, "step": 321910 }, { "epoch": 13.33, "grad_norm": 0.65625, "learning_rate": 0.0002934571388453411, "loss": 0.1943, "step": 321920 }, { "epoch": 13.33, "grad_norm": 3.265625, "learning_rate": 0.0002934464587176367, "loss": 0.1781, "step": 321930 }, { "epoch": 13.33, "grad_norm": 1.171875, "learning_rate": 0.0002934357785081709, "loss": 0.1621, "step": 321940 }, { "epoch": 13.34, "grad_norm": 0.5390625, "learning_rate": 0.0002934250982169636, "loss": 0.1946, "step": 321950 }, { "epoch": 13.34, "grad_norm": 0.6796875, "learning_rate": 0.0002934144178440349, "loss": 0.154, "step": 321960 }, { "epoch": 13.34, "grad_norm": 0.55078125, "learning_rate": 0.000293403737389405, "loss": 0.1932, "step": 321970 }, { "epoch": 13.34, "grad_norm": 0.8671875, "learning_rate": 0.000293393056853094, "loss": 0.2651, "step": 321980 }, { "epoch": 13.34, "grad_norm": 1.5625, "learning_rate": 0.00029338237623512185, "loss": 0.2233, "step": 321990 }, { "epoch": 13.34, "grad_norm": 0.447265625, "learning_rate": 0.00029337169553550886, "loss": 0.1768, "step": 322000 }, { "epoch": 13.34, "grad_norm": 1.0546875, "learning_rate": 0.0002933610147542749, "loss": 0.1781, "step": 322010 }, { "epoch": 13.34, "grad_norm": 0.63671875, "learning_rate": 0.0002933503338914403, "loss": 0.1959, "step": 322020 }, { "epoch": 13.34, "grad_norm": 0.65234375, "learning_rate": 0.00029333965294702505, "loss": 0.1975, "step": 322030 }, { "epoch": 13.34, "grad_norm": 1.3515625, "learning_rate": 0.00029332897192104916, "loss": 0.1822, "step": 322040 }, { "epoch": 13.34, "grad_norm": 1.3359375, "learning_rate": 0.0002933182908135329, "loss": 0.1934, "step": 322050 }, { "epoch": 13.34, "grad_norm": 1.078125, "learning_rate": 0.00029330760962449626, "loss": 0.1814, "step": 322060 }, { "epoch": 13.34, "grad_norm": 0.63671875, "learning_rate": 0.0002932969283539594, "loss": 0.1544, "step": 322070 }, { "epoch": 13.34, "grad_norm": 1.90625, "learning_rate": 0.00029328624700194247, "loss": 0.1704, "step": 322080 }, { "epoch": 13.34, "grad_norm": 1.4921875, "learning_rate": 0.00029327556556846534, "loss": 0.1873, "step": 322090 }, { "epoch": 13.34, "grad_norm": 1.5390625, "learning_rate": 0.00029326488405354836, "loss": 0.2295, "step": 322100 }, { "epoch": 13.34, "grad_norm": 0.61328125, "learning_rate": 0.0002932542024572116, "loss": 0.2466, "step": 322110 }, { "epoch": 13.34, "grad_norm": 1.1015625, "learning_rate": 0.000293243520779475, "loss": 0.1662, "step": 322120 }, { "epoch": 13.34, "grad_norm": 0.58203125, "learning_rate": 0.0002932328390203588, "loss": 0.1608, "step": 322130 }, { "epoch": 13.34, "grad_norm": 0.77734375, "learning_rate": 0.000293222157179883, "loss": 0.1712, "step": 322140 }, { "epoch": 13.34, "grad_norm": 1.25, "learning_rate": 0.0002932114752580679, "loss": 0.1517, "step": 322150 }, { "epoch": 13.34, "grad_norm": 0.70703125, "learning_rate": 0.0002932007932549334, "loss": 0.1854, "step": 322160 }, { "epoch": 13.34, "grad_norm": 0.8125, "learning_rate": 0.00029319011117049965, "loss": 0.1776, "step": 322170 }, { "epoch": 13.34, "grad_norm": 0.7734375, "learning_rate": 0.0002931794290047868, "loss": 0.1948, "step": 322180 }, { "epoch": 13.35, "grad_norm": 0.5546875, "learning_rate": 0.0002931687467578149, "loss": 0.1711, "step": 322190 }, { "epoch": 13.35, "grad_norm": 0.6640625, "learning_rate": 0.00029315806442960414, "loss": 0.1725, "step": 322200 }, { "epoch": 13.35, "grad_norm": 0.8359375, "learning_rate": 0.0002931473820201745, "loss": 0.2159, "step": 322210 }, { "epoch": 13.35, "grad_norm": 0.90234375, "learning_rate": 0.00029313669952954615, "loss": 0.1555, "step": 322220 }, { "epoch": 13.35, "grad_norm": 1.1015625, "learning_rate": 0.00029312601695773925, "loss": 0.2006, "step": 322230 }, { "epoch": 13.35, "grad_norm": 0.59765625, "learning_rate": 0.0002931153343047738, "loss": 0.1722, "step": 322240 }, { "epoch": 13.35, "grad_norm": 1.1015625, "learning_rate": 0.00029310465157066995, "loss": 0.1661, "step": 322250 }, { "epoch": 13.35, "grad_norm": 1.3828125, "learning_rate": 0.0002930939687554478, "loss": 0.2206, "step": 322260 }, { "epoch": 13.35, "grad_norm": 1.078125, "learning_rate": 0.0002930832858591274, "loss": 0.2016, "step": 322270 }, { "epoch": 13.35, "grad_norm": 1.1484375, "learning_rate": 0.00029307260288172904, "loss": 0.2007, "step": 322280 }, { "epoch": 13.35, "grad_norm": 0.63671875, "learning_rate": 0.00029306191982327257, "loss": 0.1857, "step": 322290 }, { "epoch": 13.35, "grad_norm": 0.68359375, "learning_rate": 0.0002930512366837783, "loss": 0.1881, "step": 322300 }, { "epoch": 13.35, "grad_norm": 0.79296875, "learning_rate": 0.00029304055346326627, "loss": 0.1759, "step": 322310 }, { "epoch": 13.35, "grad_norm": 0.2265625, "learning_rate": 0.0002930298701617565, "loss": 0.1974, "step": 322320 }, { "epoch": 13.35, "grad_norm": 1.0234375, "learning_rate": 0.00029301918677926913, "loss": 0.207, "step": 322330 }, { "epoch": 13.35, "grad_norm": 0.6640625, "learning_rate": 0.00029300850331582433, "loss": 0.1676, "step": 322340 }, { "epoch": 13.35, "grad_norm": 1.4609375, "learning_rate": 0.00029299781977144216, "loss": 0.198, "step": 322350 }, { "epoch": 13.35, "grad_norm": 0.3125, "learning_rate": 0.0002929871361461428, "loss": 0.1872, "step": 322360 }, { "epoch": 13.35, "grad_norm": 0.443359375, "learning_rate": 0.00029297645243994626, "loss": 0.1805, "step": 322370 }, { "epoch": 13.35, "grad_norm": 1.6875, "learning_rate": 0.00029296576865287265, "loss": 0.2223, "step": 322380 }, { "epoch": 13.35, "grad_norm": 0.58984375, "learning_rate": 0.00029295508478494216, "loss": 0.2115, "step": 322390 }, { "epoch": 13.35, "grad_norm": 0.40234375, "learning_rate": 0.0002929444008361748, "loss": 0.1433, "step": 322400 }, { "epoch": 13.35, "grad_norm": 0.76171875, "learning_rate": 0.0002929337168065907, "loss": 0.1792, "step": 322410 }, { "epoch": 13.35, "grad_norm": 0.7578125, "learning_rate": 0.00029292303269621, "loss": 0.1865, "step": 322420 }, { "epoch": 13.36, "grad_norm": 0.59375, "learning_rate": 0.00029291234850505277, "loss": 0.1705, "step": 322430 }, { "epoch": 13.36, "grad_norm": 0.640625, "learning_rate": 0.0002929016642331392, "loss": 0.1807, "step": 322440 }, { "epoch": 13.36, "grad_norm": 0.890625, "learning_rate": 0.0002928909798804892, "loss": 0.1706, "step": 322450 }, { "epoch": 13.36, "grad_norm": 1.96875, "learning_rate": 0.0002928802954471231, "loss": 0.1941, "step": 322460 }, { "epoch": 13.36, "grad_norm": 0.76953125, "learning_rate": 0.000292869610933061, "loss": 0.1995, "step": 322470 }, { "epoch": 13.36, "grad_norm": 3.375, "learning_rate": 0.00029285892633832273, "loss": 0.1929, "step": 322480 }, { "epoch": 13.36, "grad_norm": 0.8671875, "learning_rate": 0.0002928482416629287, "loss": 0.1796, "step": 322490 }, { "epoch": 13.36, "grad_norm": 0.63671875, "learning_rate": 0.0002928375569068989, "loss": 0.1964, "step": 322500 }, { "epoch": 13.36, "grad_norm": 0.76953125, "learning_rate": 0.00029282687207025346, "loss": 0.1221, "step": 322510 }, { "epoch": 13.36, "grad_norm": 0.466796875, "learning_rate": 0.00029281618715301243, "loss": 0.1423, "step": 322520 }, { "epoch": 13.36, "grad_norm": 0.5234375, "learning_rate": 0.000292805502155196, "loss": 0.1825, "step": 322530 }, { "epoch": 13.36, "grad_norm": 1.1015625, "learning_rate": 0.0002927948170768242, "loss": 0.2285, "step": 322540 }, { "epoch": 13.36, "grad_norm": 1.1015625, "learning_rate": 0.0002927841319179172, "loss": 0.2175, "step": 322550 }, { "epoch": 13.36, "grad_norm": 0.6640625, "learning_rate": 0.00029277344667849505, "loss": 0.1533, "step": 322560 }, { "epoch": 13.36, "grad_norm": 0.375, "learning_rate": 0.0002927627613585779, "loss": 0.1638, "step": 322570 }, { "epoch": 13.36, "grad_norm": 0.515625, "learning_rate": 0.0002927520759581859, "loss": 0.128, "step": 322580 }, { "epoch": 13.36, "grad_norm": 1.1171875, "learning_rate": 0.00029274139047733906, "loss": 0.1734, "step": 322590 }, { "epoch": 13.36, "grad_norm": 1.515625, "learning_rate": 0.0002927307049160576, "loss": 0.2236, "step": 322600 }, { "epoch": 13.36, "grad_norm": 1.046875, "learning_rate": 0.00029272001927436146, "loss": 0.2437, "step": 322610 }, { "epoch": 13.36, "grad_norm": 0.8828125, "learning_rate": 0.000292709333552271, "loss": 0.1879, "step": 322620 }, { "epoch": 13.36, "grad_norm": 0.5859375, "learning_rate": 0.0002926986477498061, "loss": 0.1675, "step": 322630 }, { "epoch": 13.36, "grad_norm": 1.3984375, "learning_rate": 0.00029268796186698686, "loss": 0.174, "step": 322640 }, { "epoch": 13.36, "grad_norm": 0.53515625, "learning_rate": 0.00029267727590383366, "loss": 0.2265, "step": 322650 }, { "epoch": 13.36, "grad_norm": 0.83984375, "learning_rate": 0.00029266658986036635, "loss": 0.2433, "step": 322660 }, { "epoch": 13.36, "grad_norm": 0.71484375, "learning_rate": 0.0002926559037366051, "loss": 0.1828, "step": 322670 }, { "epoch": 13.37, "grad_norm": 0.5546875, "learning_rate": 0.00029264521753257015, "loss": 0.1941, "step": 322680 }, { "epoch": 13.37, "grad_norm": 0.671875, "learning_rate": 0.00029263453124828137, "loss": 0.1936, "step": 322690 }, { "epoch": 13.37, "grad_norm": 0.8359375, "learning_rate": 0.00029262384488375914, "loss": 0.208, "step": 322700 }, { "epoch": 13.37, "grad_norm": 0.5625, "learning_rate": 0.0002926131584390234, "loss": 0.1961, "step": 322710 }, { "epoch": 13.37, "grad_norm": 0.90234375, "learning_rate": 0.0002926024719140942, "loss": 0.1856, "step": 322720 }, { "epoch": 13.37, "grad_norm": 0.82421875, "learning_rate": 0.0002925917853089919, "loss": 0.224, "step": 322730 }, { "epoch": 13.37, "grad_norm": 0.9921875, "learning_rate": 0.00029258109862373633, "loss": 0.2466, "step": 322740 }, { "epoch": 13.37, "grad_norm": 0.6171875, "learning_rate": 0.00029257041185834775, "loss": 0.179, "step": 322750 }, { "epoch": 13.37, "grad_norm": 0.21484375, "learning_rate": 0.0002925597250128463, "loss": 0.1808, "step": 322760 }, { "epoch": 13.37, "grad_norm": 0.95703125, "learning_rate": 0.000292549038087252, "loss": 0.1533, "step": 322770 }, { "epoch": 13.37, "grad_norm": 0.81640625, "learning_rate": 0.0002925383510815851, "loss": 0.1332, "step": 322780 }, { "epoch": 13.37, "grad_norm": 0.80078125, "learning_rate": 0.0002925276639958656, "loss": 0.2189, "step": 322790 }, { "epoch": 13.37, "grad_norm": 0.9140625, "learning_rate": 0.0002925169768301135, "loss": 0.2113, "step": 322800 }, { "epoch": 13.37, "grad_norm": 0.99609375, "learning_rate": 0.00029250628958434917, "loss": 0.175, "step": 322810 }, { "epoch": 13.37, "grad_norm": 0.419921875, "learning_rate": 0.0002924956022585925, "loss": 0.222, "step": 322820 }, { "epoch": 13.37, "grad_norm": 0.8828125, "learning_rate": 0.00029248491485286374, "loss": 0.1809, "step": 322830 }, { "epoch": 13.37, "grad_norm": 0.72265625, "learning_rate": 0.000292474227367183, "loss": 0.1476, "step": 322840 }, { "epoch": 13.37, "grad_norm": 0.64453125, "learning_rate": 0.00029246353980157027, "loss": 0.1734, "step": 322850 }, { "epoch": 13.37, "grad_norm": 1.0078125, "learning_rate": 0.00029245285215604577, "loss": 0.1975, "step": 322860 }, { "epoch": 13.37, "grad_norm": 0.453125, "learning_rate": 0.00029244216443062966, "loss": 0.222, "step": 322870 }, { "epoch": 13.37, "grad_norm": 1.6796875, "learning_rate": 0.00029243147662534185, "loss": 0.1968, "step": 322880 }, { "epoch": 13.37, "grad_norm": 0.515625, "learning_rate": 0.00029242078874020265, "loss": 0.1983, "step": 322890 }, { "epoch": 13.37, "grad_norm": 1.125, "learning_rate": 0.0002924101007752321, "loss": 0.1534, "step": 322900 }, { "epoch": 13.37, "grad_norm": 1.3984375, "learning_rate": 0.00029239941273045034, "loss": 0.2515, "step": 322910 }, { "epoch": 13.38, "grad_norm": 1.1640625, "learning_rate": 0.0002923887246058775, "loss": 0.1926, "step": 322920 }, { "epoch": 13.38, "grad_norm": 0.58984375, "learning_rate": 0.0002923780364015335, "loss": 0.2238, "step": 322930 }, { "epoch": 13.38, "grad_norm": 0.8359375, "learning_rate": 0.0002923673481174388, "loss": 0.1689, "step": 322940 }, { "epoch": 13.38, "grad_norm": 1.75, "learning_rate": 0.0002923566597536133, "loss": 0.1986, "step": 322950 }, { "epoch": 13.38, "grad_norm": 0.439453125, "learning_rate": 0.000292345971310077, "loss": 0.1871, "step": 322960 }, { "epoch": 13.38, "grad_norm": 0.71875, "learning_rate": 0.00029233528278685026, "loss": 0.193, "step": 322970 }, { "epoch": 13.38, "grad_norm": 0.9140625, "learning_rate": 0.00029232459418395307, "loss": 0.2248, "step": 322980 }, { "epoch": 13.38, "grad_norm": 2.0, "learning_rate": 0.00029231390550140556, "loss": 0.2654, "step": 322990 }, { "epoch": 13.38, "grad_norm": 0.875, "learning_rate": 0.00029230321673922796, "loss": 0.2158, "step": 323000 }, { "epoch": 13.38, "grad_norm": 0.7578125, "learning_rate": 0.00029229252789744005, "loss": 0.1407, "step": 323010 }, { "epoch": 13.38, "grad_norm": 0.8671875, "learning_rate": 0.00029228183897606237, "loss": 0.1934, "step": 323020 }, { "epoch": 13.38, "grad_norm": 1.359375, "learning_rate": 0.0002922711499751147, "loss": 0.149, "step": 323030 }, { "epoch": 13.38, "grad_norm": 0.80859375, "learning_rate": 0.0002922604608946173, "loss": 0.1469, "step": 323040 }, { "epoch": 13.38, "grad_norm": 0.8359375, "learning_rate": 0.0002922497717345904, "loss": 0.1702, "step": 323050 }, { "epoch": 13.38, "grad_norm": 1.0078125, "learning_rate": 0.00029223908249505394, "loss": 0.1803, "step": 323060 }, { "epoch": 13.38, "grad_norm": 0.9296875, "learning_rate": 0.000292228393176028, "loss": 0.2022, "step": 323070 }, { "epoch": 13.38, "grad_norm": 0.75, "learning_rate": 0.00029221770377753294, "loss": 0.1797, "step": 323080 }, { "epoch": 13.38, "grad_norm": 1.015625, "learning_rate": 0.00029220701429958853, "loss": 0.2093, "step": 323090 }, { "epoch": 13.38, "grad_norm": 0.53125, "learning_rate": 0.00029219632474221526, "loss": 0.2109, "step": 323100 }, { "epoch": 13.38, "grad_norm": 1.0859375, "learning_rate": 0.00029218563510543296, "loss": 0.1908, "step": 323110 }, { "epoch": 13.38, "grad_norm": 1.203125, "learning_rate": 0.0002921749453892618, "loss": 0.222, "step": 323120 }, { "epoch": 13.38, "grad_norm": 0.8515625, "learning_rate": 0.00029216425559372205, "loss": 0.1852, "step": 323130 }, { "epoch": 13.38, "grad_norm": 0.7890625, "learning_rate": 0.0002921535657188337, "loss": 0.1953, "step": 323140 }, { "epoch": 13.38, "grad_norm": 0.5234375, "learning_rate": 0.0002921428757646169, "loss": 0.2271, "step": 323150 }, { "epoch": 13.39, "grad_norm": 1.0859375, "learning_rate": 0.00029213218573109176, "loss": 0.179, "step": 323160 }, { "epoch": 13.39, "grad_norm": 0.98828125, "learning_rate": 0.0002921214956182783, "loss": 0.1756, "step": 323170 }, { "epoch": 13.39, "grad_norm": 0.52734375, "learning_rate": 0.0002921108054261968, "loss": 0.198, "step": 323180 }, { "epoch": 13.39, "grad_norm": 0.7421875, "learning_rate": 0.00029210011515486736, "loss": 0.1484, "step": 323190 }, { "epoch": 13.39, "grad_norm": 0.578125, "learning_rate": 0.00029208942480431, "loss": 0.2082, "step": 323200 }, { "epoch": 13.39, "grad_norm": 0.625, "learning_rate": 0.000292078734374545, "loss": 0.2011, "step": 323210 }, { "epoch": 13.39, "grad_norm": 0.765625, "learning_rate": 0.00029206804386559224, "loss": 0.1892, "step": 323220 }, { "epoch": 13.39, "grad_norm": 0.5234375, "learning_rate": 0.00029205735327747197, "loss": 0.182, "step": 323230 }, { "epoch": 13.39, "grad_norm": 0.8671875, "learning_rate": 0.0002920466626102043, "loss": 0.1757, "step": 323240 }, { "epoch": 13.39, "grad_norm": 0.427734375, "learning_rate": 0.0002920359718638094, "loss": 0.188, "step": 323250 }, { "epoch": 13.39, "grad_norm": 1.578125, "learning_rate": 0.0002920252810383073, "loss": 0.2021, "step": 323260 }, { "epoch": 13.39, "grad_norm": 0.33984375, "learning_rate": 0.00029201459013371815, "loss": 0.1707, "step": 323270 }, { "epoch": 13.39, "grad_norm": 1.3359375, "learning_rate": 0.00029200389915006207, "loss": 0.1886, "step": 323280 }, { "epoch": 13.39, "grad_norm": 0.921875, "learning_rate": 0.0002919932080873593, "loss": 0.2396, "step": 323290 }, { "epoch": 13.39, "grad_norm": 1.0390625, "learning_rate": 0.0002919825169456298, "loss": 0.1989, "step": 323300 }, { "epoch": 13.39, "grad_norm": 0.9609375, "learning_rate": 0.00029197182572489365, "loss": 0.1496, "step": 323310 }, { "epoch": 13.39, "grad_norm": 1.3515625, "learning_rate": 0.00029196113442517106, "loss": 0.2309, "step": 323320 }, { "epoch": 13.39, "grad_norm": 0.8671875, "learning_rate": 0.00029195044304648223, "loss": 0.1659, "step": 323330 }, { "epoch": 13.39, "grad_norm": 0.5, "learning_rate": 0.0002919397515888472, "loss": 0.123, "step": 323340 }, { "epoch": 13.39, "grad_norm": 0.77734375, "learning_rate": 0.000291929060052286, "loss": 0.2013, "step": 323350 }, { "epoch": 13.39, "grad_norm": 1.1171875, "learning_rate": 0.00029191836843681893, "loss": 0.1807, "step": 323360 }, { "epoch": 13.39, "grad_norm": 0.0, "learning_rate": 0.000291907676742466, "loss": 0.2059, "step": 323370 }, { "epoch": 13.39, "grad_norm": 0.71875, "learning_rate": 0.0002918969849692473, "loss": 0.2091, "step": 323380 }, { "epoch": 13.39, "grad_norm": 0.91015625, "learning_rate": 0.000291886293117183, "loss": 0.186, "step": 323390 }, { "epoch": 13.4, "grad_norm": 0.7734375, "learning_rate": 0.00029187560118629327, "loss": 0.1562, "step": 323400 }, { "epoch": 13.4, "grad_norm": 1.46875, "learning_rate": 0.0002918649091765981, "loss": 0.2191, "step": 323410 }, { "epoch": 13.4, "grad_norm": 1.0234375, "learning_rate": 0.0002918542170881178, "loss": 0.1815, "step": 323420 }, { "epoch": 13.4, "grad_norm": 0.93359375, "learning_rate": 0.0002918435249208723, "loss": 0.2476, "step": 323430 }, { "epoch": 13.4, "grad_norm": 0.6953125, "learning_rate": 0.00029183283267488185, "loss": 0.129, "step": 323440 }, { "epoch": 13.4, "grad_norm": 0.91796875, "learning_rate": 0.0002918221403501665, "loss": 0.1416, "step": 323450 }, { "epoch": 13.4, "grad_norm": 0.373046875, "learning_rate": 0.00029181144794674643, "loss": 0.1838, "step": 323460 }, { "epoch": 13.4, "grad_norm": 1.890625, "learning_rate": 0.0002918007554646417, "loss": 0.1681, "step": 323470 }, { "epoch": 13.4, "grad_norm": 0.462890625, "learning_rate": 0.00029179006290387243, "loss": 0.223, "step": 323480 }, { "epoch": 13.4, "grad_norm": 0.58984375, "learning_rate": 0.00029177937026445884, "loss": 0.1906, "step": 323490 }, { "epoch": 13.4, "grad_norm": 0.63671875, "learning_rate": 0.000291768677546421, "loss": 0.2036, "step": 323500 }, { "epoch": 13.4, "grad_norm": 1.8359375, "learning_rate": 0.00029175798474977897, "loss": 0.2067, "step": 323510 }, { "epoch": 13.4, "grad_norm": 0.044189453125, "learning_rate": 0.0002917472918745529, "loss": 0.131, "step": 323520 }, { "epoch": 13.4, "grad_norm": 0.77734375, "learning_rate": 0.000291736598920763, "loss": 0.1681, "step": 323530 }, { "epoch": 13.4, "grad_norm": 1.3359375, "learning_rate": 0.00029172590588842927, "loss": 0.1998, "step": 323540 }, { "epoch": 13.4, "grad_norm": 0.859375, "learning_rate": 0.00029171521277757194, "loss": 0.2067, "step": 323550 }, { "epoch": 13.4, "grad_norm": 0.81640625, "learning_rate": 0.00029170451958821105, "loss": 0.2143, "step": 323560 }, { "epoch": 13.4, "grad_norm": 0.6875, "learning_rate": 0.0002916938263203668, "loss": 0.1839, "step": 323570 }, { "epoch": 13.4, "grad_norm": 0.54296875, "learning_rate": 0.00029168313297405916, "loss": 0.1625, "step": 323580 }, { "epoch": 13.4, "grad_norm": 1.1328125, "learning_rate": 0.0002916724395493085, "loss": 0.2253, "step": 323590 }, { "epoch": 13.4, "grad_norm": 1.5859375, "learning_rate": 0.0002916617460461348, "loss": 0.2656, "step": 323600 }, { "epoch": 13.4, "grad_norm": 0.45703125, "learning_rate": 0.00029165105246455806, "loss": 0.1551, "step": 323610 }, { "epoch": 13.4, "grad_norm": 0.60546875, "learning_rate": 0.00029164035880459874, "loss": 0.2107, "step": 323620 }, { "epoch": 13.4, "grad_norm": 0.54296875, "learning_rate": 0.0002916296650662766, "loss": 0.2158, "step": 323630 }, { "epoch": 13.41, "grad_norm": 1.0625, "learning_rate": 0.00029161897124961193, "loss": 0.1851, "step": 323640 }, { "epoch": 13.41, "grad_norm": 0.98828125, "learning_rate": 0.0002916082773546249, "loss": 0.205, "step": 323650 }, { "epoch": 13.41, "grad_norm": 1.515625, "learning_rate": 0.0002915975833813356, "loss": 0.1332, "step": 323660 }, { "epoch": 13.41, "grad_norm": 0.734375, "learning_rate": 0.0002915868893297641, "loss": 0.2075, "step": 323670 }, { "epoch": 13.41, "grad_norm": 0.7734375, "learning_rate": 0.00029157619519993064, "loss": 0.2037, "step": 323680 }, { "epoch": 13.41, "grad_norm": 1.1875, "learning_rate": 0.0002915655009918552, "loss": 0.1813, "step": 323690 }, { "epoch": 13.41, "grad_norm": 0.75390625, "learning_rate": 0.000291554806705558, "loss": 0.1974, "step": 323700 }, { "epoch": 13.41, "grad_norm": 1.578125, "learning_rate": 0.0002915441123410591, "loss": 0.2338, "step": 323710 }, { "epoch": 13.41, "grad_norm": 1.1015625, "learning_rate": 0.00029153341789837873, "loss": 0.169, "step": 323720 }, { "epoch": 13.41, "grad_norm": 0.76171875, "learning_rate": 0.00029152272337753686, "loss": 0.19, "step": 323730 }, { "epoch": 13.41, "grad_norm": 1.28125, "learning_rate": 0.00029151202877855377, "loss": 0.1841, "step": 323740 }, { "epoch": 13.41, "grad_norm": 1.0546875, "learning_rate": 0.0002915013341014496, "loss": 0.1835, "step": 323750 }, { "epoch": 13.41, "grad_norm": 1.6875, "learning_rate": 0.00029149063934624434, "loss": 0.246, "step": 323760 }, { "epoch": 13.41, "grad_norm": 1.15625, "learning_rate": 0.0002914799445129581, "loss": 0.1709, "step": 323770 }, { "epoch": 13.41, "grad_norm": 0.439453125, "learning_rate": 0.0002914692496016112, "loss": 0.1727, "step": 323780 }, { "epoch": 13.41, "grad_norm": 1.3515625, "learning_rate": 0.0002914585546122235, "loss": 0.1607, "step": 323790 }, { "epoch": 13.41, "grad_norm": 0.70703125, "learning_rate": 0.00029144785954481544, "loss": 0.2078, "step": 323800 }, { "epoch": 13.41, "grad_norm": 1.3203125, "learning_rate": 0.0002914371643994069, "loss": 0.1951, "step": 323810 }, { "epoch": 13.41, "grad_norm": 0.275390625, "learning_rate": 0.00029142646917601814, "loss": 0.1435, "step": 323820 }, { "epoch": 13.41, "grad_norm": 1.203125, "learning_rate": 0.0002914157738746692, "loss": 0.18, "step": 323830 }, { "epoch": 13.41, "grad_norm": 0.640625, "learning_rate": 0.0002914050784953803, "loss": 0.1608, "step": 323840 }, { "epoch": 13.41, "grad_norm": 1.1484375, "learning_rate": 0.0002913943830381714, "loss": 0.1796, "step": 323850 }, { "epoch": 13.41, "grad_norm": 0.275390625, "learning_rate": 0.0002913836875030629, "loss": 0.152, "step": 323860 }, { "epoch": 13.41, "grad_norm": 1.1171875, "learning_rate": 0.0002913729918900746, "loss": 0.2188, "step": 323870 }, { "epoch": 13.42, "grad_norm": 1.0546875, "learning_rate": 0.0002913622961992269, "loss": 0.1906, "step": 323880 }, { "epoch": 13.42, "grad_norm": 0.59765625, "learning_rate": 0.0002913516004305398, "loss": 0.1425, "step": 323890 }, { "epoch": 13.42, "grad_norm": 1.4765625, "learning_rate": 0.00029134090458403345, "loss": 0.1441, "step": 323900 }, { "epoch": 13.42, "grad_norm": 0.6796875, "learning_rate": 0.000291330208659728, "loss": 0.1989, "step": 323910 }, { "epoch": 13.42, "grad_norm": 1.0703125, "learning_rate": 0.00029131951265764356, "loss": 0.159, "step": 323920 }, { "epoch": 13.42, "grad_norm": 0.41796875, "learning_rate": 0.0002913088165778002, "loss": 0.1931, "step": 323930 }, { "epoch": 13.42, "grad_norm": 0.98046875, "learning_rate": 0.0002912981204202182, "loss": 0.1701, "step": 323940 }, { "epoch": 13.42, "grad_norm": 1.3046875, "learning_rate": 0.0002912874241849175, "loss": 0.1831, "step": 323950 }, { "epoch": 13.42, "grad_norm": 1.28125, "learning_rate": 0.0002912767278719184, "loss": 0.2337, "step": 323960 }, { "epoch": 13.42, "grad_norm": 0.8515625, "learning_rate": 0.000291266031481241, "loss": 0.1919, "step": 323970 }, { "epoch": 13.42, "grad_norm": 1.1015625, "learning_rate": 0.00029125533501290525, "loss": 0.2332, "step": 323980 }, { "epoch": 13.42, "grad_norm": 1.15625, "learning_rate": 0.0002912446384669316, "loss": 0.2148, "step": 323990 }, { "epoch": 13.42, "grad_norm": 1.4140625, "learning_rate": 0.0002912339418433397, "loss": 0.1278, "step": 324000 }, { "epoch": 13.42, "grad_norm": 0.8203125, "learning_rate": 0.00029122324514215025, "loss": 0.1998, "step": 324010 }, { "epoch": 13.42, "grad_norm": 0.89453125, "learning_rate": 0.00029121254836338297, "loss": 0.1904, "step": 324020 }, { "epoch": 13.42, "grad_norm": 0.2119140625, "learning_rate": 0.00029120185150705816, "loss": 0.2075, "step": 324030 }, { "epoch": 13.42, "grad_norm": 1.25, "learning_rate": 0.00029119115457319593, "loss": 0.1959, "step": 324040 }, { "epoch": 13.42, "grad_norm": 0.546875, "learning_rate": 0.0002911804575618164, "loss": 0.1638, "step": 324050 }, { "epoch": 13.42, "grad_norm": 0.81640625, "learning_rate": 0.0002911697604729397, "loss": 0.1497, "step": 324060 }, { "epoch": 13.42, "grad_norm": 0.73046875, "learning_rate": 0.0002911590633065859, "loss": 0.1863, "step": 324070 }, { "epoch": 13.42, "grad_norm": 1.4375, "learning_rate": 0.0002911483660627752, "loss": 0.1897, "step": 324080 }, { "epoch": 13.42, "grad_norm": 0.94921875, "learning_rate": 0.0002911376687415278, "loss": 0.1624, "step": 324090 }, { "epoch": 13.42, "grad_norm": 1.546875, "learning_rate": 0.0002911269713428636, "loss": 0.1968, "step": 324100 }, { "epoch": 13.42, "grad_norm": 1.65625, "learning_rate": 0.000291116273866803, "loss": 0.2088, "step": 324110 }, { "epoch": 13.43, "grad_norm": 1.4140625, "learning_rate": 0.00029110557631336606, "loss": 0.1961, "step": 324120 }, { "epoch": 13.43, "grad_norm": 0.69921875, "learning_rate": 0.00029109487868257276, "loss": 0.2472, "step": 324130 }, { "epoch": 13.43, "grad_norm": 0.6171875, "learning_rate": 0.0002910841809744433, "loss": 0.237, "step": 324140 }, { "epoch": 13.43, "grad_norm": 1.2109375, "learning_rate": 0.000291073483188998, "loss": 0.2046, "step": 324150 }, { "epoch": 13.43, "grad_norm": 1.1015625, "learning_rate": 0.0002910627853262567, "loss": 0.2001, "step": 324160 }, { "epoch": 13.43, "grad_norm": 0.6171875, "learning_rate": 0.00029105208738623977, "loss": 0.2083, "step": 324170 }, { "epoch": 13.43, "grad_norm": 0.359375, "learning_rate": 0.00029104138936896716, "loss": 0.2029, "step": 324180 }, { "epoch": 13.43, "grad_norm": 1.109375, "learning_rate": 0.0002910306912744591, "loss": 0.2051, "step": 324190 }, { "epoch": 13.43, "grad_norm": 0.57421875, "learning_rate": 0.00029101999310273584, "loss": 0.2095, "step": 324200 }, { "epoch": 13.43, "grad_norm": 0.478515625, "learning_rate": 0.00029100929485381726, "loss": 0.2053, "step": 324210 }, { "epoch": 13.43, "grad_norm": 1.0703125, "learning_rate": 0.00029099859652772364, "loss": 0.1576, "step": 324220 }, { "epoch": 13.43, "grad_norm": 0.51953125, "learning_rate": 0.0002909878981244751, "loss": 0.1525, "step": 324230 }, { "epoch": 13.43, "grad_norm": 2.0625, "learning_rate": 0.0002909771996440917, "loss": 0.167, "step": 324240 }, { "epoch": 13.43, "grad_norm": 1.109375, "learning_rate": 0.00029096650108659375, "loss": 0.2088, "step": 324250 }, { "epoch": 13.43, "grad_norm": 0.71484375, "learning_rate": 0.00029095580245200114, "loss": 0.2406, "step": 324260 }, { "epoch": 13.43, "grad_norm": 1.2578125, "learning_rate": 0.0002909451037403342, "loss": 0.2354, "step": 324270 }, { "epoch": 13.43, "grad_norm": 0.83203125, "learning_rate": 0.00029093440495161306, "loss": 0.1938, "step": 324280 }, { "epoch": 13.43, "grad_norm": 0.4375, "learning_rate": 0.00029092370608585765, "loss": 0.1656, "step": 324290 }, { "epoch": 13.43, "grad_norm": 0.734375, "learning_rate": 0.0002909130071430883, "loss": 0.1669, "step": 324300 }, { "epoch": 13.43, "grad_norm": 0.9140625, "learning_rate": 0.0002909023081233252, "loss": 0.195, "step": 324310 }, { "epoch": 13.43, "grad_norm": 1.2109375, "learning_rate": 0.00029089160902658816, "loss": 0.2098, "step": 324320 }, { "epoch": 13.43, "grad_norm": 0.44921875, "learning_rate": 0.0002908809098528977, "loss": 0.1856, "step": 324330 }, { "epoch": 13.43, "grad_norm": 0.5390625, "learning_rate": 0.0002908702106022738, "loss": 0.1783, "step": 324340 }, { "epoch": 13.43, "grad_norm": 0.73828125, "learning_rate": 0.00029085951127473644, "loss": 0.1916, "step": 324350 }, { "epoch": 13.43, "grad_norm": 1.4765625, "learning_rate": 0.000290848811870306, "loss": 0.2316, "step": 324360 }, { "epoch": 13.44, "grad_norm": 1.578125, "learning_rate": 0.0002908381123890024, "loss": 0.1976, "step": 324370 }, { "epoch": 13.44, "grad_norm": 0.63671875, "learning_rate": 0.000290827412830846, "loss": 0.2169, "step": 324380 }, { "epoch": 13.44, "grad_norm": 0.87890625, "learning_rate": 0.0002908167131958568, "loss": 0.1614, "step": 324390 }, { "epoch": 13.44, "grad_norm": 1.0546875, "learning_rate": 0.0002908060134840548, "loss": 0.2345, "step": 324400 }, { "epoch": 13.44, "grad_norm": 0.51171875, "learning_rate": 0.00029079531369546055, "loss": 0.2101, "step": 324410 }, { "epoch": 13.44, "grad_norm": 0.671875, "learning_rate": 0.0002907846138300937, "loss": 0.1627, "step": 324420 }, { "epoch": 13.44, "grad_norm": 0.6015625, "learning_rate": 0.0002907739138879747, "loss": 0.1499, "step": 324430 }, { "epoch": 13.44, "grad_norm": 0.388671875, "learning_rate": 0.0002907632138691236, "loss": 0.1853, "step": 324440 }, { "epoch": 13.44, "grad_norm": 0.85546875, "learning_rate": 0.00029075251377356056, "loss": 0.2348, "step": 324450 }, { "epoch": 13.44, "grad_norm": 1.5546875, "learning_rate": 0.00029074181360130564, "loss": 0.2084, "step": 324460 }, { "epoch": 13.44, "grad_norm": 0.3203125, "learning_rate": 0.00029073111335237904, "loss": 0.163, "step": 324470 }, { "epoch": 13.44, "grad_norm": 0.7421875, "learning_rate": 0.00029072041302680087, "loss": 0.1904, "step": 324480 }, { "epoch": 13.44, "grad_norm": 1.8046875, "learning_rate": 0.00029070971262459133, "loss": 0.2127, "step": 324490 }, { "epoch": 13.44, "grad_norm": 0.640625, "learning_rate": 0.00029069901214577045, "loss": 0.2224, "step": 324500 }, { "epoch": 13.44, "grad_norm": 0.75390625, "learning_rate": 0.0002906883115903585, "loss": 0.1155, "step": 324510 }, { "epoch": 13.44, "grad_norm": 1.2578125, "learning_rate": 0.00029067761095837545, "loss": 0.1825, "step": 324520 }, { "epoch": 13.44, "grad_norm": 0.51171875, "learning_rate": 0.00029066691024984156, "loss": 0.1862, "step": 324530 }, { "epoch": 13.44, "grad_norm": 0.412109375, "learning_rate": 0.00029065620946477693, "loss": 0.192, "step": 324540 }, { "epoch": 13.44, "grad_norm": 0.6640625, "learning_rate": 0.00029064550860320173, "loss": 0.1903, "step": 324550 }, { "epoch": 13.44, "grad_norm": 0.5546875, "learning_rate": 0.00029063480766513606, "loss": 0.1785, "step": 324560 }, { "epoch": 13.44, "grad_norm": 0.498046875, "learning_rate": 0.00029062410665060015, "loss": 0.1743, "step": 324570 }, { "epoch": 13.44, "grad_norm": 0.6953125, "learning_rate": 0.0002906134055596139, "loss": 0.1551, "step": 324580 }, { "epoch": 13.44, "grad_norm": 0.42578125, "learning_rate": 0.0002906027043921977, "loss": 0.1791, "step": 324590 }, { "epoch": 13.44, "grad_norm": 1.1640625, "learning_rate": 0.00029059200314837164, "loss": 0.2047, "step": 324600 }, { "epoch": 13.45, "grad_norm": 0.74609375, "learning_rate": 0.00029058130182815566, "loss": 0.1969, "step": 324610 }, { "epoch": 13.45, "grad_norm": 0.201171875, "learning_rate": 0.00029057060043157016, "loss": 0.1661, "step": 324620 }, { "epoch": 13.45, "grad_norm": 0.66796875, "learning_rate": 0.00029055989895863515, "loss": 0.2293, "step": 324630 }, { "epoch": 13.45, "grad_norm": 0.7890625, "learning_rate": 0.0002905491974093708, "loss": 0.1782, "step": 324640 }, { "epoch": 13.45, "grad_norm": 1.2734375, "learning_rate": 0.00029053849578379733, "loss": 0.1539, "step": 324650 }, { "epoch": 13.45, "grad_norm": 0.796875, "learning_rate": 0.00029052779408193465, "loss": 0.1805, "step": 324660 }, { "epoch": 13.45, "grad_norm": 0.9140625, "learning_rate": 0.00029051709230380307, "loss": 0.1355, "step": 324670 }, { "epoch": 13.45, "grad_norm": 0.7421875, "learning_rate": 0.00029050639044942274, "loss": 0.2205, "step": 324680 }, { "epoch": 13.45, "grad_norm": 0.98046875, "learning_rate": 0.0002904956885188137, "loss": 0.2177, "step": 324690 }, { "epoch": 13.45, "grad_norm": 1.0859375, "learning_rate": 0.00029048498651199615, "loss": 0.2219, "step": 324700 }, { "epoch": 13.45, "grad_norm": 1.8515625, "learning_rate": 0.00029047428442899027, "loss": 0.1739, "step": 324710 }, { "epoch": 13.45, "grad_norm": 1.078125, "learning_rate": 0.00029046358226981616, "loss": 0.1554, "step": 324720 }, { "epoch": 13.45, "grad_norm": 0.7421875, "learning_rate": 0.00029045288003449397, "loss": 0.1974, "step": 324730 }, { "epoch": 13.45, "grad_norm": 0.640625, "learning_rate": 0.00029044217772304377, "loss": 0.1978, "step": 324740 }, { "epoch": 13.45, "grad_norm": 0.76171875, "learning_rate": 0.00029043147533548583, "loss": 0.2207, "step": 324750 }, { "epoch": 13.45, "grad_norm": 0.5859375, "learning_rate": 0.0002904207728718401, "loss": 0.2269, "step": 324760 }, { "epoch": 13.45, "grad_norm": 1.1875, "learning_rate": 0.0002904100703321269, "loss": 0.2119, "step": 324770 }, { "epoch": 13.45, "grad_norm": 1.2109375, "learning_rate": 0.00029039936771636636, "loss": 0.1615, "step": 324780 }, { "epoch": 13.45, "grad_norm": 0.765625, "learning_rate": 0.0002903886650245786, "loss": 0.1879, "step": 324790 }, { "epoch": 13.45, "grad_norm": 0.39453125, "learning_rate": 0.00029037796225678367, "loss": 0.1514, "step": 324800 }, { "epoch": 13.45, "grad_norm": 0.87109375, "learning_rate": 0.0002903672594130018, "loss": 0.1667, "step": 324810 }, { "epoch": 13.45, "grad_norm": 1.2109375, "learning_rate": 0.00029035655649325303, "loss": 0.241, "step": 324820 }, { "epoch": 13.45, "grad_norm": 1.0625, "learning_rate": 0.00029034585349755764, "loss": 0.2157, "step": 324830 }, { "epoch": 13.45, "grad_norm": 0.5, "learning_rate": 0.0002903351504259357, "loss": 0.2096, "step": 324840 }, { "epoch": 13.46, "grad_norm": 0.5703125, "learning_rate": 0.00029032444727840735, "loss": 0.2549, "step": 324850 }, { "epoch": 13.46, "grad_norm": 1.015625, "learning_rate": 0.00029031374405499285, "loss": 0.2113, "step": 324860 }, { "epoch": 13.46, "grad_norm": 0.81640625, "learning_rate": 0.0002903030407557121, "loss": 0.1873, "step": 324870 }, { "epoch": 13.46, "grad_norm": 1.140625, "learning_rate": 0.00029029233738058543, "loss": 0.1666, "step": 324880 }, { "epoch": 13.46, "grad_norm": 1.375, "learning_rate": 0.000290281633929633, "loss": 0.206, "step": 324890 }, { "epoch": 13.46, "grad_norm": 0.62890625, "learning_rate": 0.0002902709304028748, "loss": 0.1884, "step": 324900 }, { "epoch": 13.46, "grad_norm": 0.6328125, "learning_rate": 0.00029026022680033104, "loss": 0.218, "step": 324910 }, { "epoch": 13.46, "grad_norm": 0.486328125, "learning_rate": 0.0002902495231220219, "loss": 0.2207, "step": 324920 }, { "epoch": 13.46, "grad_norm": 0.51953125, "learning_rate": 0.0002902388193679675, "loss": 0.1772, "step": 324930 }, { "epoch": 13.46, "grad_norm": 0.75, "learning_rate": 0.0002902281155381881, "loss": 0.2148, "step": 324940 }, { "epoch": 13.46, "grad_norm": 0.251953125, "learning_rate": 0.0002902174116327036, "loss": 0.2003, "step": 324950 }, { "epoch": 13.46, "grad_norm": 0.478515625, "learning_rate": 0.00029020670765153426, "loss": 0.2189, "step": 324960 }, { "epoch": 13.46, "grad_norm": 1.53125, "learning_rate": 0.00029019600359470033, "loss": 0.2046, "step": 324970 }, { "epoch": 13.46, "grad_norm": 0.8125, "learning_rate": 0.0002901852994622219, "loss": 0.2252, "step": 324980 }, { "epoch": 13.46, "grad_norm": 0.37890625, "learning_rate": 0.0002901745952541189, "loss": 0.1714, "step": 324990 }, { "epoch": 13.46, "grad_norm": 0.79296875, "learning_rate": 0.0002901638909704118, "loss": 0.1655, "step": 325000 }, { "epoch": 13.46, "grad_norm": 0.62109375, "learning_rate": 0.00029015318661112057, "loss": 0.1771, "step": 325010 }, { "epoch": 13.46, "grad_norm": 0.59375, "learning_rate": 0.0002901424821762654, "loss": 0.2299, "step": 325020 }, { "epoch": 13.46, "grad_norm": 0.828125, "learning_rate": 0.0002901317776658663, "loss": 0.1832, "step": 325030 }, { "epoch": 13.46, "grad_norm": 0.69140625, "learning_rate": 0.00029012107307994364, "loss": 0.1699, "step": 325040 }, { "epoch": 13.46, "grad_norm": 0.5, "learning_rate": 0.00029011036841851737, "loss": 0.1714, "step": 325050 }, { "epoch": 13.46, "grad_norm": 0.90625, "learning_rate": 0.00029009966368160776, "loss": 0.1801, "step": 325060 }, { "epoch": 13.46, "grad_norm": 1.3671875, "learning_rate": 0.0002900889588692349, "loss": 0.1644, "step": 325070 }, { "epoch": 13.46, "grad_norm": 0.546875, "learning_rate": 0.00029007825398141894, "loss": 0.2074, "step": 325080 }, { "epoch": 13.47, "grad_norm": 0.76953125, "learning_rate": 0.00029006754901818007, "loss": 0.2366, "step": 325090 }, { "epoch": 13.47, "grad_norm": 1.2109375, "learning_rate": 0.00029005684397953834, "loss": 0.1654, "step": 325100 }, { "epoch": 13.47, "grad_norm": 1.0859375, "learning_rate": 0.00029004613886551405, "loss": 0.2228, "step": 325110 }, { "epoch": 13.47, "grad_norm": 0.6796875, "learning_rate": 0.00029003543367612725, "loss": 0.1772, "step": 325120 }, { "epoch": 13.47, "grad_norm": 1.2578125, "learning_rate": 0.000290024728411398, "loss": 0.235, "step": 325130 }, { "epoch": 13.47, "grad_norm": 0.60546875, "learning_rate": 0.0002900140230713466, "loss": 0.2116, "step": 325140 }, { "epoch": 13.47, "grad_norm": 0.796875, "learning_rate": 0.0002900033176559931, "loss": 0.1835, "step": 325150 }, { "epoch": 13.47, "grad_norm": 1.25, "learning_rate": 0.00028999261216535767, "loss": 0.2324, "step": 325160 }, { "epoch": 13.47, "grad_norm": 0.37109375, "learning_rate": 0.00028998190659946046, "loss": 0.2243, "step": 325170 }, { "epoch": 13.47, "grad_norm": 0.353515625, "learning_rate": 0.0002899712009583216, "loss": 0.1953, "step": 325180 }, { "epoch": 13.47, "grad_norm": 1.1328125, "learning_rate": 0.00028996049524196134, "loss": 0.1958, "step": 325190 }, { "epoch": 13.47, "grad_norm": 1.390625, "learning_rate": 0.0002899497894503997, "loss": 0.203, "step": 325200 }, { "epoch": 13.47, "grad_norm": 1.0625, "learning_rate": 0.0002899390835836568, "loss": 0.2327, "step": 325210 }, { "epoch": 13.47, "grad_norm": 1.0625, "learning_rate": 0.00028992837764175294, "loss": 0.1796, "step": 325220 }, { "epoch": 13.47, "grad_norm": 0.625, "learning_rate": 0.00028991767162470816, "loss": 0.1724, "step": 325230 }, { "epoch": 13.47, "grad_norm": 0.3359375, "learning_rate": 0.00028990696553254263, "loss": 0.1677, "step": 325240 }, { "epoch": 13.47, "grad_norm": 0.97265625, "learning_rate": 0.00028989625936527653, "loss": 0.2168, "step": 325250 }, { "epoch": 13.47, "grad_norm": 0.302734375, "learning_rate": 0.0002898855531229299, "loss": 0.1857, "step": 325260 }, { "epoch": 13.47, "grad_norm": 1.09375, "learning_rate": 0.0002898748468055231, "loss": 0.2292, "step": 325270 }, { "epoch": 13.47, "grad_norm": 1.0703125, "learning_rate": 0.000289864140413076, "loss": 0.2544, "step": 325280 }, { "epoch": 13.47, "grad_norm": 0.875, "learning_rate": 0.00028985343394560896, "loss": 0.1986, "step": 325290 }, { "epoch": 13.47, "grad_norm": 0.69921875, "learning_rate": 0.0002898427274031421, "loss": 0.1763, "step": 325300 }, { "epoch": 13.47, "grad_norm": 0.63671875, "learning_rate": 0.00028983202078569544, "loss": 0.1784, "step": 325310 }, { "epoch": 13.47, "grad_norm": 0.890625, "learning_rate": 0.00028982131409328925, "loss": 0.2534, "step": 325320 }, { "epoch": 13.48, "grad_norm": 0.40234375, "learning_rate": 0.0002898106073259437, "loss": 0.1623, "step": 325330 }, { "epoch": 13.48, "grad_norm": 0.62890625, "learning_rate": 0.0002897999004836788, "loss": 0.2089, "step": 325340 }, { "epoch": 13.48, "grad_norm": 0.80859375, "learning_rate": 0.0002897891935665149, "loss": 0.1603, "step": 325350 }, { "epoch": 13.48, "grad_norm": 0.64453125, "learning_rate": 0.000289778486574472, "loss": 0.1866, "step": 325360 }, { "epoch": 13.48, "grad_norm": 1.8203125, "learning_rate": 0.0002897677795075702, "loss": 0.1909, "step": 325370 }, { "epoch": 13.48, "grad_norm": 0.328125, "learning_rate": 0.0002897570723658298, "loss": 0.1556, "step": 325380 }, { "epoch": 13.48, "grad_norm": 0.70703125, "learning_rate": 0.0002897463651492708, "loss": 0.1695, "step": 325390 }, { "epoch": 13.48, "grad_norm": 1.0859375, "learning_rate": 0.00028973565785791355, "loss": 0.1732, "step": 325400 }, { "epoch": 13.48, "grad_norm": 0.65625, "learning_rate": 0.00028972495049177805, "loss": 0.2246, "step": 325410 }, { "epoch": 13.48, "grad_norm": 0.72265625, "learning_rate": 0.00028971424305088443, "loss": 0.188, "step": 325420 }, { "epoch": 13.48, "grad_norm": 1.40625, "learning_rate": 0.000289703535535253, "loss": 0.1582, "step": 325430 }, { "epoch": 13.48, "grad_norm": 0.291015625, "learning_rate": 0.00028969282794490375, "loss": 0.1994, "step": 325440 }, { "epoch": 13.48, "grad_norm": 0.58203125, "learning_rate": 0.00028968212027985686, "loss": 0.2164, "step": 325450 }, { "epoch": 13.48, "grad_norm": 0.490234375, "learning_rate": 0.0002896714125401325, "loss": 0.1709, "step": 325460 }, { "epoch": 13.48, "grad_norm": 0.68359375, "learning_rate": 0.0002896607047257508, "loss": 0.2195, "step": 325470 }, { "epoch": 13.48, "grad_norm": 0.29296875, "learning_rate": 0.00028964999683673203, "loss": 0.1454, "step": 325480 }, { "epoch": 13.48, "grad_norm": 0.423828125, "learning_rate": 0.0002896392888730962, "loss": 0.2018, "step": 325490 }, { "epoch": 13.48, "grad_norm": 0.80078125, "learning_rate": 0.0002896285808348635, "loss": 0.1827, "step": 325500 }, { "epoch": 13.48, "grad_norm": 0.9296875, "learning_rate": 0.00028961787272205406, "loss": 0.1308, "step": 325510 }, { "epoch": 13.48, "grad_norm": 0.52734375, "learning_rate": 0.0002896071645346881, "loss": 0.1918, "step": 325520 }, { "epoch": 13.48, "grad_norm": 1.671875, "learning_rate": 0.00028959645627278576, "loss": 0.2114, "step": 325530 }, { "epoch": 13.48, "grad_norm": 0.5546875, "learning_rate": 0.00028958574793636716, "loss": 0.1221, "step": 325540 }, { "epoch": 13.48, "grad_norm": 0.40234375, "learning_rate": 0.0002895750395254524, "loss": 0.1713, "step": 325550 }, { "epoch": 13.48, "grad_norm": 0.73828125, "learning_rate": 0.00028956433104006176, "loss": 0.1695, "step": 325560 }, { "epoch": 13.49, "grad_norm": 1.7734375, "learning_rate": 0.0002895536224802153, "loss": 0.1957, "step": 325570 }, { "epoch": 13.49, "grad_norm": 0.921875, "learning_rate": 0.0002895429138459331, "loss": 0.2041, "step": 325580 }, { "epoch": 13.49, "grad_norm": 0.396484375, "learning_rate": 0.00028953220513723555, "loss": 0.173, "step": 325590 }, { "epoch": 13.49, "grad_norm": 1.578125, "learning_rate": 0.00028952149635414257, "loss": 0.1663, "step": 325600 }, { "epoch": 13.49, "grad_norm": 0.8984375, "learning_rate": 0.0002895107874966744, "loss": 0.1616, "step": 325610 }, { "epoch": 13.49, "grad_norm": 0.5859375, "learning_rate": 0.0002895000785648513, "loss": 0.2449, "step": 325620 }, { "epoch": 13.49, "grad_norm": 0.60546875, "learning_rate": 0.00028948936955869314, "loss": 0.2067, "step": 325630 }, { "epoch": 13.49, "grad_norm": 0.5859375, "learning_rate": 0.0002894786604782204, "loss": 0.2112, "step": 325640 }, { "epoch": 13.49, "grad_norm": 1.078125, "learning_rate": 0.000289467951323453, "loss": 0.2261, "step": 325650 }, { "epoch": 13.49, "grad_norm": 1.2265625, "learning_rate": 0.0002894572420944112, "loss": 0.1767, "step": 325660 }, { "epoch": 13.49, "grad_norm": 0.9296875, "learning_rate": 0.0002894465327911152, "loss": 0.1971, "step": 325670 }, { "epoch": 13.49, "grad_norm": 1.9375, "learning_rate": 0.000289435823413585, "loss": 0.1988, "step": 325680 }, { "epoch": 13.49, "grad_norm": 1.03125, "learning_rate": 0.0002894251139618409, "loss": 0.2109, "step": 325690 }, { "epoch": 13.49, "grad_norm": 0.87890625, "learning_rate": 0.000289414404435903, "loss": 0.2029, "step": 325700 }, { "epoch": 13.49, "grad_norm": 2.1875, "learning_rate": 0.0002894036948357913, "loss": 0.1813, "step": 325710 }, { "epoch": 13.49, "grad_norm": 0.3671875, "learning_rate": 0.0002893929851615263, "loss": 0.17, "step": 325720 }, { "epoch": 13.49, "grad_norm": 0.0, "learning_rate": 0.0002893822754131278, "loss": 0.2312, "step": 325730 }, { "epoch": 13.49, "grad_norm": 0.439453125, "learning_rate": 0.0002893715655906162, "loss": 0.2347, "step": 325740 }, { "epoch": 13.49, "grad_norm": 0.515625, "learning_rate": 0.00028936085569401153, "loss": 0.2227, "step": 325750 }, { "epoch": 13.49, "grad_norm": 0.9140625, "learning_rate": 0.00028935014572333397, "loss": 0.2245, "step": 325760 }, { "epoch": 13.49, "grad_norm": 0.9765625, "learning_rate": 0.00028933943567860377, "loss": 0.2114, "step": 325770 }, { "epoch": 13.49, "grad_norm": 1.5234375, "learning_rate": 0.000289328725559841, "loss": 0.1961, "step": 325780 }, { "epoch": 13.49, "grad_norm": 0.94921875, "learning_rate": 0.00028931801536706564, "loss": 0.2206, "step": 325790 }, { "epoch": 13.49, "grad_norm": 0.8125, "learning_rate": 0.0002893073051002982, "loss": 0.2067, "step": 325800 }, { "epoch": 13.5, "grad_norm": 1.90625, "learning_rate": 0.00028929659475955853, "loss": 0.1967, "step": 325810 }, { "epoch": 13.5, "grad_norm": 0.93359375, "learning_rate": 0.00028928588434486694, "loss": 0.176, "step": 325820 }, { "epoch": 13.5, "grad_norm": 0.5, "learning_rate": 0.0002892751738562437, "loss": 0.2352, "step": 325830 }, { "epoch": 13.5, "grad_norm": 0.765625, "learning_rate": 0.0002892644632937086, "loss": 0.2101, "step": 325840 }, { "epoch": 13.5, "grad_norm": 0.6796875, "learning_rate": 0.0002892537526572822, "loss": 0.1426, "step": 325850 }, { "epoch": 13.5, "grad_norm": 0.9375, "learning_rate": 0.00028924304194698436, "loss": 0.2082, "step": 325860 }, { "epoch": 13.5, "grad_norm": 1.109375, "learning_rate": 0.00028923233116283543, "loss": 0.1927, "step": 325870 }, { "epoch": 13.5, "grad_norm": 0.84375, "learning_rate": 0.0002892216203048555, "loss": 0.1652, "step": 325880 }, { "epoch": 13.5, "grad_norm": 1.2890625, "learning_rate": 0.0002892109093730647, "loss": 0.1843, "step": 325890 }, { "epoch": 13.5, "grad_norm": 0.80859375, "learning_rate": 0.0002892001983674831, "loss": 0.2505, "step": 325900 }, { "epoch": 13.5, "grad_norm": 1.0859375, "learning_rate": 0.0002891894872881311, "loss": 0.1682, "step": 325910 }, { "epoch": 13.5, "grad_norm": 2.21875, "learning_rate": 0.00028917877613502866, "loss": 0.21, "step": 325920 }, { "epoch": 13.5, "grad_norm": 0.55859375, "learning_rate": 0.00028916806490819597, "loss": 0.1871, "step": 325930 }, { "epoch": 13.5, "grad_norm": 0.4609375, "learning_rate": 0.0002891573536076532, "loss": 0.1536, "step": 325940 }, { "epoch": 13.5, "grad_norm": 2.5, "learning_rate": 0.0002891466422334206, "loss": 0.2092, "step": 325950 }, { "epoch": 13.5, "grad_norm": 1.3203125, "learning_rate": 0.0002891359307855182, "loss": 0.155, "step": 325960 }, { "epoch": 13.5, "grad_norm": 1.2890625, "learning_rate": 0.00028912521926396624, "loss": 0.2347, "step": 325970 }, { "epoch": 13.5, "grad_norm": 0.83203125, "learning_rate": 0.00028911450766878476, "loss": 0.1969, "step": 325980 }, { "epoch": 13.5, "grad_norm": 0.6015625, "learning_rate": 0.0002891037959999941, "loss": 0.2557, "step": 325990 }, { "epoch": 13.5, "grad_norm": 0.6328125, "learning_rate": 0.0002890930842576142, "loss": 0.2243, "step": 326000 }, { "epoch": 13.5, "grad_norm": 0.8125, "learning_rate": 0.0002890823724416655, "loss": 0.2134, "step": 326010 }, { "epoch": 13.5, "grad_norm": 0.53125, "learning_rate": 0.0002890716605521678, "loss": 0.2062, "step": 326020 }, { "epoch": 13.5, "grad_norm": 1.171875, "learning_rate": 0.00028906094858914154, "loss": 0.1731, "step": 326030 }, { "epoch": 13.5, "grad_norm": 0.9375, "learning_rate": 0.0002890502365526069, "loss": 0.182, "step": 326040 }, { "epoch": 13.5, "grad_norm": 0.55078125, "learning_rate": 0.0002890395244425838, "loss": 0.1662, "step": 326050 }, { "epoch": 13.51, "grad_norm": 0.76171875, "learning_rate": 0.00028902881225909254, "loss": 0.1938, "step": 326060 }, { "epoch": 13.51, "grad_norm": 1.1015625, "learning_rate": 0.00028901810000215336, "loss": 0.1461, "step": 326070 }, { "epoch": 13.51, "grad_norm": 0.0, "learning_rate": 0.0002890073876717862, "loss": 0.2341, "step": 326080 }, { "epoch": 13.51, "grad_norm": 0.84765625, "learning_rate": 0.0002889966752680115, "loss": 0.1888, "step": 326090 }, { "epoch": 13.51, "grad_norm": 1.6328125, "learning_rate": 0.0002889859627908492, "loss": 0.2018, "step": 326100 }, { "epoch": 13.51, "grad_norm": 2.15625, "learning_rate": 0.0002889752502403195, "loss": 0.1915, "step": 326110 }, { "epoch": 13.51, "grad_norm": 1.0078125, "learning_rate": 0.0002889645376164426, "loss": 0.1896, "step": 326120 }, { "epoch": 13.51, "grad_norm": 0.88671875, "learning_rate": 0.0002889538249192386, "loss": 0.1981, "step": 326130 }, { "epoch": 13.51, "grad_norm": 0.6171875, "learning_rate": 0.00028894311214872775, "loss": 0.2015, "step": 326140 }, { "epoch": 13.51, "grad_norm": 1.0625, "learning_rate": 0.0002889323993049302, "loss": 0.2191, "step": 326150 }, { "epoch": 13.51, "grad_norm": 0.75, "learning_rate": 0.000288921686387866, "loss": 0.1763, "step": 326160 }, { "epoch": 13.51, "grad_norm": 1.171875, "learning_rate": 0.0002889109733975555, "loss": 0.1651, "step": 326170 }, { "epoch": 13.51, "grad_norm": 0.8671875, "learning_rate": 0.00028890026033401875, "loss": 0.2056, "step": 326180 }, { "epoch": 13.51, "grad_norm": 2.78125, "learning_rate": 0.0002888895471972758, "loss": 0.1597, "step": 326190 }, { "epoch": 13.51, "grad_norm": 1.390625, "learning_rate": 0.00028887883398734707, "loss": 0.1878, "step": 326200 }, { "epoch": 13.51, "grad_norm": 1.328125, "learning_rate": 0.00028886812070425243, "loss": 0.1615, "step": 326210 }, { "epoch": 13.51, "grad_norm": 0.6171875, "learning_rate": 0.00028885740734801226, "loss": 0.2041, "step": 326220 }, { "epoch": 13.51, "grad_norm": 0.94140625, "learning_rate": 0.0002888466939186466, "loss": 0.1564, "step": 326230 }, { "epoch": 13.51, "grad_norm": 0.0, "learning_rate": 0.00028883598041617563, "loss": 0.2204, "step": 326240 }, { "epoch": 13.51, "grad_norm": 1.0234375, "learning_rate": 0.00028882526684061963, "loss": 0.2649, "step": 326250 }, { "epoch": 13.51, "grad_norm": 0.77734375, "learning_rate": 0.00028881455319199864, "loss": 0.1852, "step": 326260 }, { "epoch": 13.51, "grad_norm": 1.296875, "learning_rate": 0.0002888038394703328, "loss": 0.146, "step": 326270 }, { "epoch": 13.51, "grad_norm": 2.046875, "learning_rate": 0.0002887931256756424, "loss": 0.1792, "step": 326280 }, { "epoch": 13.51, "grad_norm": 1.1796875, "learning_rate": 0.0002887824118079475, "loss": 0.1763, "step": 326290 }, { "epoch": 13.52, "grad_norm": 1.0859375, "learning_rate": 0.00028877169786726826, "loss": 0.1787, "step": 326300 }, { "epoch": 13.52, "grad_norm": 1.2578125, "learning_rate": 0.00028876098385362486, "loss": 0.2119, "step": 326310 }, { "epoch": 13.52, "grad_norm": 0.29296875, "learning_rate": 0.0002887502697670375, "loss": 0.1859, "step": 326320 }, { "epoch": 13.52, "grad_norm": 0.96875, "learning_rate": 0.0002887395556075264, "loss": 0.1839, "step": 326330 }, { "epoch": 13.52, "grad_norm": 1.21875, "learning_rate": 0.00028872884137511154, "loss": 0.225, "step": 326340 }, { "epoch": 13.52, "grad_norm": 0.392578125, "learning_rate": 0.0002887181270698132, "loss": 0.2679, "step": 326350 }, { "epoch": 13.52, "grad_norm": 0.515625, "learning_rate": 0.0002887074126916515, "loss": 0.229, "step": 326360 }, { "epoch": 13.52, "grad_norm": 0.7578125, "learning_rate": 0.00028869669824064663, "loss": 0.2105, "step": 326370 }, { "epoch": 13.52, "grad_norm": 0.8359375, "learning_rate": 0.0002886859837168189, "loss": 0.1896, "step": 326380 }, { "epoch": 13.52, "grad_norm": 0.33984375, "learning_rate": 0.00028867526912018817, "loss": 0.2082, "step": 326390 }, { "epoch": 13.52, "grad_norm": 0.7578125, "learning_rate": 0.0002886645544507748, "loss": 0.2035, "step": 326400 }, { "epoch": 13.52, "grad_norm": 0.71484375, "learning_rate": 0.0002886538397085989, "loss": 0.1928, "step": 326410 }, { "epoch": 13.52, "grad_norm": 0.8984375, "learning_rate": 0.0002886431248936807, "loss": 0.1772, "step": 326420 }, { "epoch": 13.52, "grad_norm": 0.8984375, "learning_rate": 0.0002886324100060402, "loss": 0.1638, "step": 326430 }, { "epoch": 13.52, "grad_norm": 0.66015625, "learning_rate": 0.0002886216950456978, "loss": 0.1748, "step": 326440 }, { "epoch": 13.52, "grad_norm": 0.953125, "learning_rate": 0.00028861098001267346, "loss": 0.2213, "step": 326450 }, { "epoch": 13.52, "grad_norm": 0.78515625, "learning_rate": 0.0002886002649069875, "loss": 0.1642, "step": 326460 }, { "epoch": 13.52, "grad_norm": 0.3203125, "learning_rate": 0.00028858954972865996, "loss": 0.1668, "step": 326470 }, { "epoch": 13.52, "grad_norm": 0.421875, "learning_rate": 0.00028857883447771105, "loss": 0.1902, "step": 326480 }, { "epoch": 13.52, "grad_norm": 0.353515625, "learning_rate": 0.00028856811915416094, "loss": 0.1914, "step": 326490 }, { "epoch": 13.52, "grad_norm": 0.7265625, "learning_rate": 0.0002885574037580299, "loss": 0.175, "step": 326500 }, { "epoch": 13.52, "grad_norm": 0.75390625, "learning_rate": 0.00028854668828933784, "loss": 0.1585, "step": 326510 }, { "epoch": 13.52, "grad_norm": 0.72265625, "learning_rate": 0.00028853597274810513, "loss": 0.2161, "step": 326520 }, { "epoch": 13.52, "grad_norm": 0.78515625, "learning_rate": 0.0002885252571343518, "loss": 0.1455, "step": 326530 }, { "epoch": 13.53, "grad_norm": 1.640625, "learning_rate": 0.00028851454144809824, "loss": 0.1656, "step": 326540 }, { "epoch": 13.53, "grad_norm": 0.81640625, "learning_rate": 0.0002885038256893644, "loss": 0.1238, "step": 326550 }, { "epoch": 13.53, "grad_norm": 0.3359375, "learning_rate": 0.0002884931098581706, "loss": 0.1473, "step": 326560 }, { "epoch": 13.53, "grad_norm": 2.296875, "learning_rate": 0.0002884823939545368, "loss": 0.1834, "step": 326570 }, { "epoch": 13.53, "grad_norm": 0.37890625, "learning_rate": 0.0002884716779784834, "loss": 0.1954, "step": 326580 }, { "epoch": 13.53, "grad_norm": 0.3359375, "learning_rate": 0.00028846096193003035, "loss": 0.1955, "step": 326590 }, { "epoch": 13.53, "grad_norm": 1.40625, "learning_rate": 0.00028845024580919797, "loss": 0.2328, "step": 326600 }, { "epoch": 13.53, "grad_norm": 1.0625, "learning_rate": 0.0002884395296160064, "loss": 0.2013, "step": 326610 }, { "epoch": 13.53, "grad_norm": 1.8828125, "learning_rate": 0.0002884288133504757, "loss": 0.1829, "step": 326620 }, { "epoch": 13.53, "grad_norm": 1.3828125, "learning_rate": 0.0002884180970126262, "loss": 0.1895, "step": 326630 }, { "epoch": 13.53, "grad_norm": 1.03125, "learning_rate": 0.000288407380602478, "loss": 0.1607, "step": 326640 }, { "epoch": 13.53, "grad_norm": 0.9921875, "learning_rate": 0.0002883966641200513, "loss": 0.1709, "step": 326650 }, { "epoch": 13.53, "grad_norm": 0.84375, "learning_rate": 0.0002883859475653662, "loss": 0.173, "step": 326660 }, { "epoch": 13.53, "grad_norm": 0.388671875, "learning_rate": 0.0002883752309384428, "loss": 0.2218, "step": 326670 }, { "epoch": 13.53, "grad_norm": 0.9921875, "learning_rate": 0.0002883645142393014, "loss": 0.2182, "step": 326680 }, { "epoch": 13.53, "grad_norm": 1.34375, "learning_rate": 0.00028835379746796214, "loss": 0.1534, "step": 326690 }, { "epoch": 13.53, "grad_norm": 1.0390625, "learning_rate": 0.00028834308062444515, "loss": 0.2338, "step": 326700 }, { "epoch": 13.53, "grad_norm": 1.9375, "learning_rate": 0.0002883323637087707, "loss": 0.185, "step": 326710 }, { "epoch": 13.53, "grad_norm": 1.4765625, "learning_rate": 0.0002883216467209588, "loss": 0.1763, "step": 326720 }, { "epoch": 13.53, "grad_norm": 0.65234375, "learning_rate": 0.00028831092966102975, "loss": 0.1979, "step": 326730 }, { "epoch": 13.53, "grad_norm": 0.61328125, "learning_rate": 0.0002883002125290037, "loss": 0.1638, "step": 326740 }, { "epoch": 13.53, "grad_norm": 0.515625, "learning_rate": 0.00028828949532490073, "loss": 0.1487, "step": 326750 }, { "epoch": 13.53, "grad_norm": 0.65625, "learning_rate": 0.00028827877804874104, "loss": 0.1905, "step": 326760 }, { "epoch": 13.53, "grad_norm": 0.4140625, "learning_rate": 0.00028826806070054485, "loss": 0.2381, "step": 326770 }, { "epoch": 13.54, "grad_norm": 0.734375, "learning_rate": 0.00028825734328033226, "loss": 0.1991, "step": 326780 }, { "epoch": 13.54, "grad_norm": 0.9140625, "learning_rate": 0.0002882466257881236, "loss": 0.2062, "step": 326790 }, { "epoch": 13.54, "grad_norm": 0.88671875, "learning_rate": 0.00028823590822393884, "loss": 0.1969, "step": 326800 }, { "epoch": 13.54, "grad_norm": 1.1015625, "learning_rate": 0.00028822519058779823, "loss": 0.2202, "step": 326810 }, { "epoch": 13.54, "grad_norm": 0.390625, "learning_rate": 0.00028821447287972194, "loss": 0.1559, "step": 326820 }, { "epoch": 13.54, "grad_norm": 0.5625, "learning_rate": 0.00028820375509973014, "loss": 0.225, "step": 326830 }, { "epoch": 13.54, "grad_norm": 1.3359375, "learning_rate": 0.000288193037247843, "loss": 0.2114, "step": 326840 }, { "epoch": 13.54, "grad_norm": 1.0, "learning_rate": 0.0002881823193240807, "loss": 0.1953, "step": 326850 }, { "epoch": 13.54, "grad_norm": 0.90234375, "learning_rate": 0.00028817160132846336, "loss": 0.1685, "step": 326860 }, { "epoch": 13.54, "grad_norm": 0.8984375, "learning_rate": 0.0002881608832610113, "loss": 0.1788, "step": 326870 }, { "epoch": 13.54, "grad_norm": 0.99609375, "learning_rate": 0.00028815016512174444, "loss": 0.2071, "step": 326880 }, { "epoch": 13.54, "grad_norm": 1.1328125, "learning_rate": 0.0002881394469106832, "loss": 0.2177, "step": 326890 }, { "epoch": 13.54, "grad_norm": 1.15625, "learning_rate": 0.0002881287286278476, "loss": 0.1639, "step": 326900 }, { "epoch": 13.54, "grad_norm": 0.421875, "learning_rate": 0.00028811801027325774, "loss": 0.2213, "step": 326910 }, { "epoch": 13.54, "grad_norm": 1.390625, "learning_rate": 0.00028810729184693406, "loss": 0.1933, "step": 326920 }, { "epoch": 13.54, "grad_norm": 1.0, "learning_rate": 0.0002880965733488965, "loss": 0.1818, "step": 326930 }, { "epoch": 13.54, "grad_norm": 0.734375, "learning_rate": 0.00028808585477916525, "loss": 0.1965, "step": 326940 }, { "epoch": 13.54, "grad_norm": 1.3125, "learning_rate": 0.00028807513613776073, "loss": 0.1684, "step": 326950 }, { "epoch": 13.54, "grad_norm": 1.1015625, "learning_rate": 0.00028806441742470275, "loss": 0.1879, "step": 326960 }, { "epoch": 13.54, "grad_norm": 1.28125, "learning_rate": 0.00028805369864001165, "loss": 0.1792, "step": 326970 }, { "epoch": 13.54, "grad_norm": 0.52734375, "learning_rate": 0.00028804297978370764, "loss": 0.1856, "step": 326980 }, { "epoch": 13.54, "grad_norm": 0.62890625, "learning_rate": 0.0002880322608558108, "loss": 0.1873, "step": 326990 }, { "epoch": 13.54, "grad_norm": 0.55859375, "learning_rate": 0.00028802154185634144, "loss": 0.175, "step": 327000 }, { "epoch": 13.54, "grad_norm": 1.3828125, "learning_rate": 0.0002880108227853196, "loss": 0.2037, "step": 327010 }, { "epoch": 13.55, "grad_norm": 1.6953125, "learning_rate": 0.00028800010364276544, "loss": 0.1651, "step": 327020 }, { "epoch": 13.55, "grad_norm": 0.609375, "learning_rate": 0.0002879893844286993, "loss": 0.2173, "step": 327030 }, { "epoch": 13.55, "grad_norm": 0.73046875, "learning_rate": 0.00028797866514314107, "loss": 0.171, "step": 327040 }, { "epoch": 13.55, "grad_norm": 1.2890625, "learning_rate": 0.0002879679457861113, "loss": 0.2057, "step": 327050 }, { "epoch": 13.55, "grad_norm": 1.015625, "learning_rate": 0.0002879572263576299, "loss": 0.1811, "step": 327060 }, { "epoch": 13.55, "grad_norm": 1.1328125, "learning_rate": 0.000287946506857717, "loss": 0.1866, "step": 327070 }, { "epoch": 13.55, "grad_norm": 0.69921875, "learning_rate": 0.000287935787286393, "loss": 0.1945, "step": 327080 }, { "epoch": 13.55, "grad_norm": 0.84375, "learning_rate": 0.00028792506764367784, "loss": 0.1811, "step": 327090 }, { "epoch": 13.55, "grad_norm": 0.5546875, "learning_rate": 0.00028791434792959183, "loss": 0.205, "step": 327100 }, { "epoch": 13.55, "grad_norm": 0.7734375, "learning_rate": 0.0002879036281441552, "loss": 0.2042, "step": 327110 }, { "epoch": 13.55, "grad_norm": 0.8828125, "learning_rate": 0.00028789290828738783, "loss": 0.1988, "step": 327120 }, { "epoch": 13.55, "grad_norm": 0.76171875, "learning_rate": 0.00028788218835931024, "loss": 0.1651, "step": 327130 }, { "epoch": 13.55, "grad_norm": 0.8359375, "learning_rate": 0.0002878714683599425, "loss": 0.2224, "step": 327140 }, { "epoch": 13.55, "grad_norm": 0.419921875, "learning_rate": 0.0002878607482893046, "loss": 0.1895, "step": 327150 }, { "epoch": 13.55, "grad_norm": 0.73828125, "learning_rate": 0.000287850028147417, "loss": 0.2041, "step": 327160 }, { "epoch": 13.55, "grad_norm": 1.25, "learning_rate": 0.00028783930793429966, "loss": 0.244, "step": 327170 }, { "epoch": 13.55, "grad_norm": 0.76171875, "learning_rate": 0.0002878285876499729, "loss": 0.2052, "step": 327180 }, { "epoch": 13.55, "grad_norm": 0.95703125, "learning_rate": 0.0002878178672944568, "loss": 0.2002, "step": 327190 }, { "epoch": 13.55, "grad_norm": 0.49609375, "learning_rate": 0.0002878071468677714, "loss": 0.1672, "step": 327200 }, { "epoch": 13.55, "grad_norm": 0.75, "learning_rate": 0.00028779642636993723, "loss": 0.1774, "step": 327210 }, { "epoch": 13.55, "grad_norm": 0.59375, "learning_rate": 0.0002877857058009743, "loss": 0.1632, "step": 327220 }, { "epoch": 13.55, "grad_norm": 0.6484375, "learning_rate": 0.00028777498516090254, "loss": 0.2203, "step": 327230 }, { "epoch": 13.55, "grad_norm": 0.890625, "learning_rate": 0.0002877642644497425, "loss": 0.1686, "step": 327240 }, { "epoch": 13.55, "grad_norm": 0.78125, "learning_rate": 0.0002877535436675142, "loss": 0.1685, "step": 327250 }, { "epoch": 13.56, "grad_norm": 1.1640625, "learning_rate": 0.0002877428228142377, "loss": 0.2106, "step": 327260 }, { "epoch": 13.56, "grad_norm": 1.25, "learning_rate": 0.00028773210188993336, "loss": 0.2183, "step": 327270 }, { "epoch": 13.56, "grad_norm": 1.0859375, "learning_rate": 0.0002877213808946212, "loss": 0.2159, "step": 327280 }, { "epoch": 13.56, "grad_norm": 0.703125, "learning_rate": 0.0002877106598283216, "loss": 0.2038, "step": 327290 }, { "epoch": 13.56, "grad_norm": 1.0078125, "learning_rate": 0.00028769993869105456, "loss": 0.1571, "step": 327300 }, { "epoch": 13.56, "grad_norm": 0.55078125, "learning_rate": 0.0002876892174828402, "loss": 0.2003, "step": 327310 }, { "epoch": 13.56, "grad_norm": 0.6640625, "learning_rate": 0.000287678496203699, "loss": 0.2014, "step": 327320 }, { "epoch": 13.56, "grad_norm": 2.515625, "learning_rate": 0.0002876677748536508, "loss": 0.1835, "step": 327330 }, { "epoch": 13.56, "grad_norm": 0.67578125, "learning_rate": 0.0002876570534327159, "loss": 0.2075, "step": 327340 }, { "epoch": 13.56, "grad_norm": 0.5078125, "learning_rate": 0.0002876463319409146, "loss": 0.1862, "step": 327350 }, { "epoch": 13.56, "grad_norm": 1.4375, "learning_rate": 0.0002876356103782668, "loss": 0.2112, "step": 327360 }, { "epoch": 13.56, "grad_norm": 1.9140625, "learning_rate": 0.00028762488874479303, "loss": 0.1848, "step": 327370 }, { "epoch": 13.56, "grad_norm": 0.69921875, "learning_rate": 0.0002876141670405132, "loss": 0.2104, "step": 327380 }, { "epoch": 13.56, "grad_norm": 0.66796875, "learning_rate": 0.0002876034452654475, "loss": 0.1997, "step": 327390 }, { "epoch": 13.56, "grad_norm": 1.3203125, "learning_rate": 0.00028759272341961635, "loss": 0.2122, "step": 327400 }, { "epoch": 13.56, "grad_norm": 0.447265625, "learning_rate": 0.0002875820015030396, "loss": 0.1857, "step": 327410 }, { "epoch": 13.56, "grad_norm": 0.86328125, "learning_rate": 0.0002875712795157376, "loss": 0.1964, "step": 327420 }, { "epoch": 13.56, "grad_norm": 0.66796875, "learning_rate": 0.00028756055745773066, "loss": 0.1657, "step": 327430 }, { "epoch": 13.56, "grad_norm": 1.6015625, "learning_rate": 0.0002875498353290386, "loss": 0.1836, "step": 327440 }, { "epoch": 13.56, "grad_norm": 0.2177734375, "learning_rate": 0.00028753911312968196, "loss": 0.1665, "step": 327450 }, { "epoch": 13.56, "grad_norm": 0.625, "learning_rate": 0.00028752839085968067, "loss": 0.2006, "step": 327460 }, { "epoch": 13.56, "grad_norm": 1.2421875, "learning_rate": 0.00028751766851905505, "loss": 0.1954, "step": 327470 }, { "epoch": 13.56, "grad_norm": 0.54296875, "learning_rate": 0.00028750694610782526, "loss": 0.1894, "step": 327480 }, { "epoch": 13.56, "grad_norm": 0.54296875, "learning_rate": 0.00028749622362601136, "loss": 0.1949, "step": 327490 }, { "epoch": 13.57, "grad_norm": 0.64453125, "learning_rate": 0.0002874855010736336, "loss": 0.203, "step": 327500 }, { "epoch": 13.57, "grad_norm": 1.1015625, "learning_rate": 0.0002874747784507123, "loss": 0.1819, "step": 327510 }, { "epoch": 13.57, "grad_norm": 1.078125, "learning_rate": 0.00028746405575726737, "loss": 0.2202, "step": 327520 }, { "epoch": 13.57, "grad_norm": 0.48828125, "learning_rate": 0.0002874533329933193, "loss": 0.1982, "step": 327530 }, { "epoch": 13.57, "grad_norm": 0.78515625, "learning_rate": 0.00028744261015888794, "loss": 0.1741, "step": 327540 }, { "epoch": 13.57, "grad_norm": 0.50390625, "learning_rate": 0.00028743188725399374, "loss": 0.1494, "step": 327550 }, { "epoch": 13.57, "grad_norm": 1.3203125, "learning_rate": 0.0002874211642786567, "loss": 0.2109, "step": 327560 }, { "epoch": 13.57, "grad_norm": 0.96484375, "learning_rate": 0.0002874104412328971, "loss": 0.2066, "step": 327570 }, { "epoch": 13.57, "grad_norm": 1.1796875, "learning_rate": 0.0002873997181167351, "loss": 0.208, "step": 327580 }, { "epoch": 13.57, "grad_norm": 0.9296875, "learning_rate": 0.0002873889949301908, "loss": 0.1733, "step": 327590 }, { "epoch": 13.57, "grad_norm": 0.458984375, "learning_rate": 0.0002873782716732846, "loss": 0.1977, "step": 327600 }, { "epoch": 13.57, "grad_norm": 0.890625, "learning_rate": 0.0002873675483460364, "loss": 0.2412, "step": 327610 }, { "epoch": 13.57, "grad_norm": 1.5, "learning_rate": 0.00028735682494846653, "loss": 0.1688, "step": 327620 }, { "epoch": 13.57, "grad_norm": 0.439453125, "learning_rate": 0.0002873461014805952, "loss": 0.147, "step": 327630 }, { "epoch": 13.57, "grad_norm": 0.1640625, "learning_rate": 0.00028733537794244253, "loss": 0.1973, "step": 327640 }, { "epoch": 13.57, "grad_norm": 0.478515625, "learning_rate": 0.0002873246543340287, "loss": 0.2372, "step": 327650 }, { "epoch": 13.57, "grad_norm": 0.88671875, "learning_rate": 0.00028731393065537387, "loss": 0.1928, "step": 327660 }, { "epoch": 13.57, "grad_norm": 0.62109375, "learning_rate": 0.00028730320690649827, "loss": 0.1712, "step": 327670 }, { "epoch": 13.57, "grad_norm": 0.765625, "learning_rate": 0.00028729248308742203, "loss": 0.2045, "step": 327680 }, { "epoch": 13.57, "grad_norm": 0.5078125, "learning_rate": 0.00028728175919816545, "loss": 0.175, "step": 327690 }, { "epoch": 13.57, "grad_norm": 1.1484375, "learning_rate": 0.0002872710352387485, "loss": 0.1961, "step": 327700 }, { "epoch": 13.57, "grad_norm": 0.8046875, "learning_rate": 0.0002872603112091916, "loss": 0.2175, "step": 327710 }, { "epoch": 13.57, "grad_norm": 0.765625, "learning_rate": 0.0002872495871095148, "loss": 0.2105, "step": 327720 }, { "epoch": 13.57, "grad_norm": 0.318359375, "learning_rate": 0.00028723886293973827, "loss": 0.1534, "step": 327730 }, { "epoch": 13.57, "grad_norm": 0.4609375, "learning_rate": 0.00028722813869988225, "loss": 0.1928, "step": 327740 }, { "epoch": 13.58, "grad_norm": 1.265625, "learning_rate": 0.0002872174143899668, "loss": 0.1853, "step": 327750 }, { "epoch": 13.58, "grad_norm": 0.65625, "learning_rate": 0.00028720669001001226, "loss": 0.1648, "step": 327760 }, { "epoch": 13.58, "grad_norm": 0.478515625, "learning_rate": 0.0002871959655600388, "loss": 0.1666, "step": 327770 }, { "epoch": 13.58, "grad_norm": 0.515625, "learning_rate": 0.0002871852410400665, "loss": 0.1965, "step": 327780 }, { "epoch": 13.58, "grad_norm": 0.52734375, "learning_rate": 0.0002871745164501156, "loss": 0.128, "step": 327790 }, { "epoch": 13.58, "grad_norm": 0.66796875, "learning_rate": 0.00028716379179020626, "loss": 0.2282, "step": 327800 }, { "epoch": 13.58, "grad_norm": 0.890625, "learning_rate": 0.0002871530670603587, "loss": 0.2082, "step": 327810 }, { "epoch": 13.58, "grad_norm": 0.8203125, "learning_rate": 0.0002871423422605931, "loss": 0.1955, "step": 327820 }, { "epoch": 13.58, "grad_norm": 0.384765625, "learning_rate": 0.0002871316173909296, "loss": 0.2184, "step": 327830 }, { "epoch": 13.58, "grad_norm": 0.83203125, "learning_rate": 0.0002871208924513883, "loss": 0.2123, "step": 327840 }, { "epoch": 13.58, "grad_norm": 1.734375, "learning_rate": 0.0002871101674419897, "loss": 0.2099, "step": 327850 }, { "epoch": 13.58, "grad_norm": 0.5234375, "learning_rate": 0.0002870994423627536, "loss": 0.2284, "step": 327860 }, { "epoch": 13.58, "grad_norm": 0.95703125, "learning_rate": 0.00028708871721370044, "loss": 0.2145, "step": 327870 }, { "epoch": 13.58, "grad_norm": 0.82421875, "learning_rate": 0.00028707799199485025, "loss": 0.2066, "step": 327880 }, { "epoch": 13.58, "grad_norm": 0.7421875, "learning_rate": 0.0002870672667062234, "loss": 0.162, "step": 327890 }, { "epoch": 13.58, "grad_norm": 0.37890625, "learning_rate": 0.00028705654134783987, "loss": 0.1612, "step": 327900 }, { "epoch": 13.58, "grad_norm": 0.98046875, "learning_rate": 0.0002870458159197199, "loss": 0.1269, "step": 327910 }, { "epoch": 13.58, "grad_norm": 0.95703125, "learning_rate": 0.00028703509042188373, "loss": 0.1402, "step": 327920 }, { "epoch": 13.58, "grad_norm": 1.2578125, "learning_rate": 0.0002870243648543516, "loss": 0.2234, "step": 327930 }, { "epoch": 13.58, "grad_norm": 0.98046875, "learning_rate": 0.00028701363921714354, "loss": 0.1991, "step": 327940 }, { "epoch": 13.58, "grad_norm": 1.2109375, "learning_rate": 0.0002870029135102798, "loss": 0.1673, "step": 327950 }, { "epoch": 13.58, "grad_norm": 0.99609375, "learning_rate": 0.0002869921877337806, "loss": 0.1563, "step": 327960 }, { "epoch": 13.58, "grad_norm": 0.7109375, "learning_rate": 0.0002869814618876661, "loss": 0.1874, "step": 327970 }, { "epoch": 13.58, "grad_norm": 0.9140625, "learning_rate": 0.0002869707359719565, "loss": 0.2022, "step": 327980 }, { "epoch": 13.59, "grad_norm": 1.234375, "learning_rate": 0.0002869600099866719, "loss": 0.1817, "step": 327990 }, { "epoch": 13.59, "grad_norm": 0.85546875, "learning_rate": 0.00028694928393183263, "loss": 0.1817, "step": 328000 }, { "epoch": 13.59, "grad_norm": 1.2890625, "learning_rate": 0.0002869385578074587, "loss": 0.1564, "step": 328010 }, { "epoch": 13.59, "grad_norm": 0.474609375, "learning_rate": 0.0002869278316135705, "loss": 0.2231, "step": 328020 }, { "epoch": 13.59, "grad_norm": 0.55078125, "learning_rate": 0.0002869171053501881, "loss": 0.1567, "step": 328030 }, { "epoch": 13.59, "grad_norm": 0.6484375, "learning_rate": 0.00028690637901733165, "loss": 0.2258, "step": 328040 }, { "epoch": 13.59, "grad_norm": 1.0390625, "learning_rate": 0.0002868956526150214, "loss": 0.1774, "step": 328050 }, { "epoch": 13.59, "grad_norm": 0.9765625, "learning_rate": 0.00028688492614327747, "loss": 0.1951, "step": 328060 }, { "epoch": 13.59, "grad_norm": 2.265625, "learning_rate": 0.00028687419960212016, "loss": 0.2161, "step": 328070 }, { "epoch": 13.59, "grad_norm": 0.73046875, "learning_rate": 0.0002868634729915695, "loss": 0.1553, "step": 328080 }, { "epoch": 13.59, "grad_norm": 0.443359375, "learning_rate": 0.0002868527463116458, "loss": 0.1751, "step": 328090 }, { "epoch": 13.59, "grad_norm": 0.9921875, "learning_rate": 0.0002868420195623693, "loss": 0.1635, "step": 328100 }, { "epoch": 13.59, "grad_norm": 1.875, "learning_rate": 0.00028683129274376004, "loss": 0.2233, "step": 328110 }, { "epoch": 13.59, "grad_norm": 0.828125, "learning_rate": 0.00028682056585583823, "loss": 0.1801, "step": 328120 }, { "epoch": 13.59, "grad_norm": 1.3125, "learning_rate": 0.00028680983889862414, "loss": 0.1752, "step": 328130 }, { "epoch": 13.59, "grad_norm": 1.3046875, "learning_rate": 0.0002867991118721378, "loss": 0.1789, "step": 328140 }, { "epoch": 13.59, "grad_norm": 0.9609375, "learning_rate": 0.0002867883847763996, "loss": 0.2224, "step": 328150 }, { "epoch": 13.59, "grad_norm": 0.81640625, "learning_rate": 0.00028677765761142966, "loss": 0.1954, "step": 328160 }, { "epoch": 13.59, "grad_norm": 1.3125, "learning_rate": 0.00028676693037724805, "loss": 0.1768, "step": 328170 }, { "epoch": 13.59, "grad_norm": 0.859375, "learning_rate": 0.0002867562030738752, "loss": 0.2432, "step": 328180 }, { "epoch": 13.59, "grad_norm": 0.65625, "learning_rate": 0.00028674547570133095, "loss": 0.1942, "step": 328190 }, { "epoch": 13.59, "grad_norm": 1.0078125, "learning_rate": 0.0002867347482596358, "loss": 0.192, "step": 328200 }, { "epoch": 13.59, "grad_norm": 0.326171875, "learning_rate": 0.0002867240207488098, "loss": 0.1385, "step": 328210 }, { "epoch": 13.59, "grad_norm": 0.80078125, "learning_rate": 0.00028671329316887315, "loss": 0.1826, "step": 328220 }, { "epoch": 13.6, "grad_norm": 0.91015625, "learning_rate": 0.0002867025655198461, "loss": 0.2129, "step": 328230 }, { "epoch": 13.6, "grad_norm": 1.0625, "learning_rate": 0.00028669183780174865, "loss": 0.1554, "step": 328240 }, { "epoch": 13.6, "grad_norm": 1.3125, "learning_rate": 0.0002866811100146012, "loss": 0.1662, "step": 328250 }, { "epoch": 13.6, "grad_norm": 0.0, "learning_rate": 0.00028667038215842393, "loss": 0.2016, "step": 328260 }, { "epoch": 13.6, "grad_norm": 0.98046875, "learning_rate": 0.0002866596542332369, "loss": 0.1831, "step": 328270 }, { "epoch": 13.6, "grad_norm": 0.68359375, "learning_rate": 0.00028664892623906036, "loss": 0.2645, "step": 328280 }, { "epoch": 13.6, "grad_norm": 0.6328125, "learning_rate": 0.0002866381981759145, "loss": 0.1484, "step": 328290 }, { "epoch": 13.6, "grad_norm": 0.4609375, "learning_rate": 0.0002866274700438195, "loss": 0.1811, "step": 328300 }, { "epoch": 13.6, "grad_norm": 1.0703125, "learning_rate": 0.00028661674184279564, "loss": 0.1499, "step": 328310 }, { "epoch": 13.6, "grad_norm": 1.6171875, "learning_rate": 0.0002866060135728629, "loss": 0.217, "step": 328320 }, { "epoch": 13.6, "grad_norm": 1.6953125, "learning_rate": 0.0002865952852340417, "loss": 0.1809, "step": 328330 }, { "epoch": 13.6, "grad_norm": 0.54296875, "learning_rate": 0.0002865845568263521, "loss": 0.1993, "step": 328340 }, { "epoch": 13.6, "grad_norm": 0.90625, "learning_rate": 0.00028657382834981423, "loss": 0.2235, "step": 328350 }, { "epoch": 13.6, "grad_norm": 1.5625, "learning_rate": 0.00028656309980444844, "loss": 0.2004, "step": 328360 }, { "epoch": 13.6, "grad_norm": 0.36328125, "learning_rate": 0.0002865523711902748, "loss": 0.2071, "step": 328370 }, { "epoch": 13.6, "grad_norm": 0.734375, "learning_rate": 0.0002865416425073136, "loss": 0.193, "step": 328380 }, { "epoch": 13.6, "grad_norm": 0.54296875, "learning_rate": 0.00028653091375558493, "loss": 0.1984, "step": 328390 }, { "epoch": 13.6, "grad_norm": 1.0234375, "learning_rate": 0.00028652018493510907, "loss": 0.1779, "step": 328400 }, { "epoch": 13.6, "grad_norm": 1.5546875, "learning_rate": 0.0002865094560459061, "loss": 0.1794, "step": 328410 }, { "epoch": 13.6, "grad_norm": 0.61328125, "learning_rate": 0.00028649872708799636, "loss": 0.2203, "step": 328420 }, { "epoch": 13.6, "grad_norm": 0.62109375, "learning_rate": 0.00028648799806139984, "loss": 0.1285, "step": 328430 }, { "epoch": 13.6, "grad_norm": 1.4921875, "learning_rate": 0.00028647726896613695, "loss": 0.1923, "step": 328440 }, { "epoch": 13.6, "grad_norm": 1.421875, "learning_rate": 0.00028646653980222774, "loss": 0.1383, "step": 328450 }, { "epoch": 13.6, "grad_norm": 0.3046875, "learning_rate": 0.00028645581056969244, "loss": 0.1896, "step": 328460 }, { "epoch": 13.61, "grad_norm": 1.7734375, "learning_rate": 0.00028644508126855127, "loss": 0.1801, "step": 328470 }, { "epoch": 13.61, "grad_norm": 0.439453125, "learning_rate": 0.0002864343518988244, "loss": 0.1905, "step": 328480 }, { "epoch": 13.61, "grad_norm": 1.2421875, "learning_rate": 0.000286423622460532, "loss": 0.1482, "step": 328490 }, { "epoch": 13.61, "grad_norm": 1.453125, "learning_rate": 0.00028641289295369427, "loss": 0.2376, "step": 328500 }, { "epoch": 13.61, "grad_norm": 1.421875, "learning_rate": 0.00028640216337833137, "loss": 0.2317, "step": 328510 }, { "epoch": 13.61, "grad_norm": 0.07958984375, "learning_rate": 0.0002863914337344636, "loss": 0.1838, "step": 328520 }, { "epoch": 13.61, "grad_norm": 0.384765625, "learning_rate": 0.00028638070402211104, "loss": 0.1849, "step": 328530 }, { "epoch": 13.61, "grad_norm": 0.55859375, "learning_rate": 0.0002863699742412939, "loss": 0.2058, "step": 328540 }, { "epoch": 13.61, "grad_norm": 1.1171875, "learning_rate": 0.00028635924439203246, "loss": 0.2251, "step": 328550 }, { "epoch": 13.61, "grad_norm": 0.41015625, "learning_rate": 0.00028634851447434677, "loss": 0.1814, "step": 328560 }, { "epoch": 13.61, "grad_norm": 0.625, "learning_rate": 0.00028633778448825713, "loss": 0.1886, "step": 328570 }, { "epoch": 13.61, "grad_norm": 1.203125, "learning_rate": 0.00028632705443378377, "loss": 0.1449, "step": 328580 }, { "epoch": 13.61, "grad_norm": 0.796875, "learning_rate": 0.00028631632431094675, "loss": 0.228, "step": 328590 }, { "epoch": 13.61, "grad_norm": 0.69921875, "learning_rate": 0.00028630559411976634, "loss": 0.2235, "step": 328600 }, { "epoch": 13.61, "grad_norm": 0.7265625, "learning_rate": 0.0002862948638602627, "loss": 0.1582, "step": 328610 }, { "epoch": 13.61, "grad_norm": 0.6328125, "learning_rate": 0.0002862841335324561, "loss": 0.1668, "step": 328620 }, { "epoch": 13.61, "grad_norm": 0.62109375, "learning_rate": 0.00028627340313636666, "loss": 0.211, "step": 328630 }, { "epoch": 13.61, "grad_norm": 0.412109375, "learning_rate": 0.00028626267267201457, "loss": 0.1837, "step": 328640 }, { "epoch": 13.61, "grad_norm": 0.7109375, "learning_rate": 0.0002862519421394201, "loss": 0.2071, "step": 328650 }, { "epoch": 13.61, "grad_norm": 0.490234375, "learning_rate": 0.0002862412115386033, "loss": 0.1761, "step": 328660 }, { "epoch": 13.61, "grad_norm": 0.46484375, "learning_rate": 0.0002862304808695845, "loss": 0.1884, "step": 328670 }, { "epoch": 13.61, "grad_norm": 0.2314453125, "learning_rate": 0.0002862197501323839, "loss": 0.2172, "step": 328680 }, { "epoch": 13.61, "grad_norm": 0.48828125, "learning_rate": 0.00028620901932702153, "loss": 0.1778, "step": 328690 }, { "epoch": 13.61, "grad_norm": 1.171875, "learning_rate": 0.00028619828845351773, "loss": 0.2076, "step": 328700 }, { "epoch": 13.62, "grad_norm": 0.478515625, "learning_rate": 0.00028618755751189275, "loss": 0.2131, "step": 328710 }, { "epoch": 13.62, "grad_norm": 0.59765625, "learning_rate": 0.00028617682650216664, "loss": 0.1866, "step": 328720 }, { "epoch": 13.62, "grad_norm": 1.7265625, "learning_rate": 0.00028616609542435963, "loss": 0.1891, "step": 328730 }, { "epoch": 13.62, "grad_norm": 0.83984375, "learning_rate": 0.000286155364278492, "loss": 0.182, "step": 328740 }, { "epoch": 13.62, "grad_norm": 0.6484375, "learning_rate": 0.0002861446330645837, "loss": 0.1959, "step": 328750 }, { "epoch": 13.62, "grad_norm": 0.416015625, "learning_rate": 0.0002861339017826553, "loss": 0.1985, "step": 328760 }, { "epoch": 13.62, "grad_norm": 0.93359375, "learning_rate": 0.00028612317043272674, "loss": 0.2007, "step": 328770 }, { "epoch": 13.62, "grad_norm": 0.7734375, "learning_rate": 0.0002861124390148182, "loss": 0.1758, "step": 328780 }, { "epoch": 13.62, "grad_norm": 0.6015625, "learning_rate": 0.0002861017075289501, "loss": 0.2436, "step": 328790 }, { "epoch": 13.62, "grad_norm": 0.451171875, "learning_rate": 0.00028609097597514236, "loss": 0.1645, "step": 328800 }, { "epoch": 13.62, "grad_norm": 0.3828125, "learning_rate": 0.00028608024435341535, "loss": 0.2245, "step": 328810 }, { "epoch": 13.62, "grad_norm": 1.0625, "learning_rate": 0.0002860695126637892, "loss": 0.1747, "step": 328820 }, { "epoch": 13.62, "grad_norm": 1.4140625, "learning_rate": 0.0002860587809062841, "loss": 0.1644, "step": 328830 }, { "epoch": 13.62, "grad_norm": 0.79296875, "learning_rate": 0.00028604804908092033, "loss": 0.2147, "step": 328840 }, { "epoch": 13.62, "grad_norm": 0.30078125, "learning_rate": 0.000286037317187718, "loss": 0.1917, "step": 328850 }, { "epoch": 13.62, "grad_norm": 0.7421875, "learning_rate": 0.0002860265852266973, "loss": 0.1887, "step": 328860 }, { "epoch": 13.62, "grad_norm": 0.8515625, "learning_rate": 0.0002860158531978785, "loss": 0.1887, "step": 328870 }, { "epoch": 13.62, "grad_norm": 4.125, "learning_rate": 0.00028600512110128166, "loss": 0.2058, "step": 328880 }, { "epoch": 13.62, "grad_norm": 3.453125, "learning_rate": 0.0002859943889369271, "loss": 0.2041, "step": 328890 }, { "epoch": 13.62, "grad_norm": 0.75, "learning_rate": 0.00028598365670483506, "loss": 0.2016, "step": 328900 }, { "epoch": 13.62, "grad_norm": 1.0078125, "learning_rate": 0.0002859729244050256, "loss": 0.1901, "step": 328910 }, { "epoch": 13.62, "grad_norm": 1.0390625, "learning_rate": 0.0002859621920375191, "loss": 0.2111, "step": 328920 }, { "epoch": 13.62, "grad_norm": 0.58203125, "learning_rate": 0.0002859514596023355, "loss": 0.2238, "step": 328930 }, { "epoch": 13.62, "grad_norm": 0.484375, "learning_rate": 0.0002859407270994952, "loss": 0.1819, "step": 328940 }, { "epoch": 13.63, "grad_norm": 1.2890625, "learning_rate": 0.00028592999452901836, "loss": 0.2149, "step": 328950 }, { "epoch": 13.63, "grad_norm": 0.77734375, "learning_rate": 0.00028591926189092505, "loss": 0.1958, "step": 328960 }, { "epoch": 13.63, "grad_norm": 0.796875, "learning_rate": 0.00028590852918523564, "loss": 0.2145, "step": 328970 }, { "epoch": 13.63, "grad_norm": 1.453125, "learning_rate": 0.00028589779641197024, "loss": 0.177, "step": 328980 }, { "epoch": 13.63, "grad_norm": 0.41796875, "learning_rate": 0.00028588706357114907, "loss": 0.1469, "step": 328990 }, { "epoch": 13.63, "grad_norm": 0.734375, "learning_rate": 0.00028587633066279235, "loss": 0.1916, "step": 329000 }, { "epoch": 13.63, "grad_norm": 1.0234375, "learning_rate": 0.0002858655976869202, "loss": 0.1739, "step": 329010 }, { "epoch": 13.63, "grad_norm": 0.287109375, "learning_rate": 0.0002858548646435529, "loss": 0.1639, "step": 329020 }, { "epoch": 13.63, "grad_norm": 0.6875, "learning_rate": 0.0002858441315327106, "loss": 0.2185, "step": 329030 }, { "epoch": 13.63, "grad_norm": 0.63671875, "learning_rate": 0.00028583339835441346, "loss": 0.2007, "step": 329040 }, { "epoch": 13.63, "grad_norm": 0.267578125, "learning_rate": 0.0002858226651086818, "loss": 0.1567, "step": 329050 }, { "epoch": 13.63, "grad_norm": 0.921875, "learning_rate": 0.00028581193179553577, "loss": 0.2107, "step": 329060 }, { "epoch": 13.63, "grad_norm": 0.625, "learning_rate": 0.0002858011984149955, "loss": 0.1913, "step": 329070 }, { "epoch": 13.63, "grad_norm": 0.52734375, "learning_rate": 0.00028579046496708134, "loss": 0.1745, "step": 329080 }, { "epoch": 13.63, "grad_norm": 1.0078125, "learning_rate": 0.0002857797314518133, "loss": 0.2337, "step": 329090 }, { "epoch": 13.63, "grad_norm": 0.58203125, "learning_rate": 0.0002857689978692117, "loss": 0.2061, "step": 329100 }, { "epoch": 13.63, "grad_norm": 0.60546875, "learning_rate": 0.00028575826421929667, "loss": 0.1797, "step": 329110 }, { "epoch": 13.63, "grad_norm": 0.76953125, "learning_rate": 0.0002857475305020885, "loss": 0.1937, "step": 329120 }, { "epoch": 13.63, "grad_norm": 0.8515625, "learning_rate": 0.00028573679671760734, "loss": 0.1825, "step": 329130 }, { "epoch": 13.63, "grad_norm": 0.6796875, "learning_rate": 0.00028572606286587335, "loss": 0.194, "step": 329140 }, { "epoch": 13.63, "grad_norm": 1.0234375, "learning_rate": 0.00028571532894690674, "loss": 0.2111, "step": 329150 }, { "epoch": 13.63, "grad_norm": 1.171875, "learning_rate": 0.00028570459496072783, "loss": 0.2346, "step": 329160 }, { "epoch": 13.63, "grad_norm": 0.69921875, "learning_rate": 0.0002856938609073566, "loss": 0.1626, "step": 329170 }, { "epoch": 13.63, "grad_norm": 0.76953125, "learning_rate": 0.0002856831267868135, "loss": 0.1642, "step": 329180 }, { "epoch": 13.64, "grad_norm": 0.625, "learning_rate": 0.0002856723925991185, "loss": 0.1894, "step": 329190 }, { "epoch": 13.64, "grad_norm": 0.54296875, "learning_rate": 0.000285661658344292, "loss": 0.191, "step": 329200 }, { "epoch": 13.64, "grad_norm": 0.703125, "learning_rate": 0.0002856509240223541, "loss": 0.2458, "step": 329210 }, { "epoch": 13.64, "grad_norm": 0.65234375, "learning_rate": 0.000285640189633325, "loss": 0.1733, "step": 329220 }, { "epoch": 13.64, "grad_norm": 0.59765625, "learning_rate": 0.0002856294551772249, "loss": 0.1763, "step": 329230 }, { "epoch": 13.64, "grad_norm": 0.6484375, "learning_rate": 0.00028561872065407403, "loss": 0.2301, "step": 329240 }, { "epoch": 13.64, "grad_norm": 1.640625, "learning_rate": 0.00028560798606389254, "loss": 0.2252, "step": 329250 }, { "epoch": 13.64, "grad_norm": 0.55859375, "learning_rate": 0.0002855972514067007, "loss": 0.1878, "step": 329260 }, { "epoch": 13.64, "grad_norm": 0.5625, "learning_rate": 0.0002855865166825186, "loss": 0.1224, "step": 329270 }, { "epoch": 13.64, "grad_norm": 1.328125, "learning_rate": 0.0002855757818913666, "loss": 0.2342, "step": 329280 }, { "epoch": 13.64, "grad_norm": 1.5078125, "learning_rate": 0.00028556504703326486, "loss": 0.2494, "step": 329290 }, { "epoch": 13.64, "grad_norm": 0.8046875, "learning_rate": 0.00028555431210823345, "loss": 0.1694, "step": 329300 }, { "epoch": 13.64, "grad_norm": 0.765625, "learning_rate": 0.0002855435771162927, "loss": 0.1572, "step": 329310 }, { "epoch": 13.64, "grad_norm": 1.046875, "learning_rate": 0.0002855328420574628, "loss": 0.2063, "step": 329320 }, { "epoch": 13.64, "grad_norm": 1.1953125, "learning_rate": 0.00028552210693176387, "loss": 0.1988, "step": 329330 }, { "epoch": 13.64, "grad_norm": 0.5, "learning_rate": 0.0002855113717392162, "loss": 0.1588, "step": 329340 }, { "epoch": 13.64, "grad_norm": 1.1640625, "learning_rate": 0.00028550063647983997, "loss": 0.1997, "step": 329350 }, { "epoch": 13.64, "grad_norm": 1.484375, "learning_rate": 0.0002854899011536554, "loss": 0.238, "step": 329360 }, { "epoch": 13.64, "grad_norm": 1.125, "learning_rate": 0.0002854791657606827, "loss": 0.1556, "step": 329370 }, { "epoch": 13.64, "grad_norm": 0.90234375, "learning_rate": 0.00028546843030094194, "loss": 0.2083, "step": 329380 }, { "epoch": 13.64, "grad_norm": 1.3359375, "learning_rate": 0.00028545769477445344, "loss": 0.1665, "step": 329390 }, { "epoch": 13.64, "grad_norm": 0.27734375, "learning_rate": 0.00028544695918123744, "loss": 0.2103, "step": 329400 }, { "epoch": 13.64, "grad_norm": 0.296875, "learning_rate": 0.00028543622352131406, "loss": 0.191, "step": 329410 }, { "epoch": 13.64, "grad_norm": 1.171875, "learning_rate": 0.0002854254877947035, "loss": 0.2141, "step": 329420 }, { "epoch": 13.64, "grad_norm": 1.46875, "learning_rate": 0.00028541475200142603, "loss": 0.2316, "step": 329430 }, { "epoch": 13.65, "grad_norm": 0.50390625, "learning_rate": 0.0002854040161415019, "loss": 0.1857, "step": 329440 }, { "epoch": 13.65, "grad_norm": 0.291015625, "learning_rate": 0.00028539328021495107, "loss": 0.2035, "step": 329450 }, { "epoch": 13.65, "grad_norm": 0.3984375, "learning_rate": 0.000285382544221794, "loss": 0.204, "step": 329460 }, { "epoch": 13.65, "grad_norm": 0.51953125, "learning_rate": 0.0002853718081620509, "loss": 0.184, "step": 329470 }, { "epoch": 13.65, "grad_norm": 0.8125, "learning_rate": 0.0002853610720357417, "loss": 0.2131, "step": 329480 }, { "epoch": 13.65, "grad_norm": 0.5703125, "learning_rate": 0.00028535033584288694, "loss": 0.1716, "step": 329490 }, { "epoch": 13.65, "grad_norm": 0.244140625, "learning_rate": 0.00028533959958350656, "loss": 0.2005, "step": 329500 }, { "epoch": 13.65, "grad_norm": 1.296875, "learning_rate": 0.0002853288632576209, "loss": 0.1945, "step": 329510 }, { "epoch": 13.65, "grad_norm": 0.80078125, "learning_rate": 0.0002853181268652501, "loss": 0.1672, "step": 329520 }, { "epoch": 13.65, "grad_norm": 1.6328125, "learning_rate": 0.0002853073904064144, "loss": 0.1985, "step": 329530 }, { "epoch": 13.65, "grad_norm": 0.6328125, "learning_rate": 0.0002852966538811341, "loss": 0.2028, "step": 329540 }, { "epoch": 13.65, "grad_norm": 1.0, "learning_rate": 0.00028528591728942923, "loss": 0.1855, "step": 329550 }, { "epoch": 13.65, "grad_norm": 0.004241943359375, "learning_rate": 0.00028527518063132005, "loss": 0.1863, "step": 329560 }, { "epoch": 13.65, "grad_norm": 2.078125, "learning_rate": 0.00028526444390682696, "loss": 0.1681, "step": 329570 }, { "epoch": 13.65, "grad_norm": 1.5546875, "learning_rate": 0.0002852537071159698, "loss": 0.2391, "step": 329580 }, { "epoch": 13.65, "grad_norm": 0.51171875, "learning_rate": 0.000285242970258769, "loss": 0.1697, "step": 329590 }, { "epoch": 13.65, "grad_norm": 1.3828125, "learning_rate": 0.0002852322333352448, "loss": 0.1928, "step": 329600 }, { "epoch": 13.65, "grad_norm": 1.453125, "learning_rate": 0.0002852214963454173, "loss": 0.1994, "step": 329610 }, { "epoch": 13.65, "grad_norm": 0.275390625, "learning_rate": 0.00028521075928930676, "loss": 0.2143, "step": 329620 }, { "epoch": 13.65, "grad_norm": 0.79296875, "learning_rate": 0.0002852000221669334, "loss": 0.1903, "step": 329630 }, { "epoch": 13.65, "grad_norm": 1.0234375, "learning_rate": 0.0002851892849783173, "loss": 0.2363, "step": 329640 }, { "epoch": 13.65, "grad_norm": 0.75390625, "learning_rate": 0.0002851785477234789, "loss": 0.1819, "step": 329650 }, { "epoch": 13.65, "grad_norm": 0.953125, "learning_rate": 0.00028516781040243815, "loss": 0.1658, "step": 329660 }, { "epoch": 13.65, "grad_norm": 0.7421875, "learning_rate": 0.0002851570730152155, "loss": 0.2139, "step": 329670 }, { "epoch": 13.66, "grad_norm": 0.5390625, "learning_rate": 0.00028514633556183094, "loss": 0.1818, "step": 329680 }, { "epoch": 13.66, "grad_norm": 0.47265625, "learning_rate": 0.0002851355980423048, "loss": 0.1779, "step": 329690 }, { "epoch": 13.66, "grad_norm": 0.84765625, "learning_rate": 0.0002851248604566573, "loss": 0.1733, "step": 329700 }, { "epoch": 13.66, "grad_norm": 0.92578125, "learning_rate": 0.00028511412280490854, "loss": 0.1554, "step": 329710 }, { "epoch": 13.66, "grad_norm": 1.03125, "learning_rate": 0.00028510338508707885, "loss": 0.214, "step": 329720 }, { "epoch": 13.66, "grad_norm": 0.95703125, "learning_rate": 0.0002850926473031884, "loss": 0.1771, "step": 329730 }, { "epoch": 13.66, "grad_norm": 0.73046875, "learning_rate": 0.00028508190945325724, "loss": 0.2097, "step": 329740 }, { "epoch": 13.66, "grad_norm": 0.87890625, "learning_rate": 0.00028507117153730587, "loss": 0.1615, "step": 329750 }, { "epoch": 13.66, "grad_norm": 0.91015625, "learning_rate": 0.0002850604335553543, "loss": 0.2057, "step": 329760 }, { "epoch": 13.66, "grad_norm": 0.51171875, "learning_rate": 0.0002850496955074227, "loss": 0.1904, "step": 329770 }, { "epoch": 13.66, "grad_norm": 0.3984375, "learning_rate": 0.00028503895739353147, "loss": 0.2018, "step": 329780 }, { "epoch": 13.66, "grad_norm": 1.625, "learning_rate": 0.00028502821921370063, "loss": 0.1732, "step": 329790 }, { "epoch": 13.66, "grad_norm": 1.046875, "learning_rate": 0.00028501748096795046, "loss": 0.2683, "step": 329800 }, { "epoch": 13.66, "grad_norm": 0.890625, "learning_rate": 0.00028500674265630123, "loss": 0.1993, "step": 329810 }, { "epoch": 13.66, "grad_norm": 0.8671875, "learning_rate": 0.00028499600427877306, "loss": 0.2277, "step": 329820 }, { "epoch": 13.66, "grad_norm": 0.7578125, "learning_rate": 0.0002849852658353862, "loss": 0.2039, "step": 329830 }, { "epoch": 13.66, "grad_norm": 1.0078125, "learning_rate": 0.0002849745273261608, "loss": 0.2395, "step": 329840 }, { "epoch": 13.66, "grad_norm": 1.1015625, "learning_rate": 0.00028496378875111716, "loss": 0.2261, "step": 329850 }, { "epoch": 13.66, "grad_norm": 1.1640625, "learning_rate": 0.0002849530501102754, "loss": 0.2166, "step": 329860 }, { "epoch": 13.66, "grad_norm": 1.84375, "learning_rate": 0.0002849423114036558, "loss": 0.2087, "step": 329870 }, { "epoch": 13.66, "grad_norm": 0.9453125, "learning_rate": 0.0002849315726312786, "loss": 0.2053, "step": 329880 }, { "epoch": 13.66, "grad_norm": 0.56640625, "learning_rate": 0.000284920833793164, "loss": 0.1483, "step": 329890 }, { "epoch": 13.66, "grad_norm": 0.9375, "learning_rate": 0.00028491009488933194, "loss": 0.1665, "step": 329900 }, { "epoch": 13.66, "grad_norm": 0.6953125, "learning_rate": 0.00028489935591980304, "loss": 0.1827, "step": 329910 }, { "epoch": 13.67, "grad_norm": 0.6171875, "learning_rate": 0.0002848886168845973, "loss": 0.1832, "step": 329920 }, { "epoch": 13.67, "grad_norm": 0.2451171875, "learning_rate": 0.00028487787778373486, "loss": 0.1897, "step": 329930 }, { "epoch": 13.67, "grad_norm": 0.578125, "learning_rate": 0.00028486713861723613, "loss": 0.1465, "step": 329940 }, { "epoch": 13.67, "grad_norm": 0.470703125, "learning_rate": 0.0002848563993851211, "loss": 0.2001, "step": 329950 }, { "epoch": 13.67, "grad_norm": 1.171875, "learning_rate": 0.0002848456600874101, "loss": 0.2014, "step": 329960 }, { "epoch": 13.67, "grad_norm": 0.609375, "learning_rate": 0.0002848349207241235, "loss": 0.1278, "step": 329970 }, { "epoch": 13.67, "grad_norm": 0.53125, "learning_rate": 0.0002848241812952811, "loss": 0.1649, "step": 329980 }, { "epoch": 13.67, "grad_norm": 0.3671875, "learning_rate": 0.0002848134418009035, "loss": 0.1744, "step": 329990 }, { "epoch": 13.67, "grad_norm": 1.0390625, "learning_rate": 0.0002848027022410108, "loss": 0.2156, "step": 330000 }, { "epoch": 13.67, "grad_norm": 0.53515625, "learning_rate": 0.00028479196261562307, "loss": 0.1932, "step": 330010 }, { "epoch": 13.67, "grad_norm": 1.1796875, "learning_rate": 0.00028478122292476064, "loss": 0.1796, "step": 330020 }, { "epoch": 13.67, "grad_norm": 0.60546875, "learning_rate": 0.0002847704831684437, "loss": 0.1978, "step": 330030 }, { "epoch": 13.67, "grad_norm": 0.8125, "learning_rate": 0.0002847597433466925, "loss": 0.1801, "step": 330040 }, { "epoch": 13.67, "grad_norm": 0.5546875, "learning_rate": 0.00028474900345952725, "loss": 0.1889, "step": 330050 }, { "epoch": 13.67, "grad_norm": 0.404296875, "learning_rate": 0.000284738263506968, "loss": 0.1896, "step": 330060 }, { "epoch": 13.67, "grad_norm": 1.046875, "learning_rate": 0.00028472752348903524, "loss": 0.1967, "step": 330070 }, { "epoch": 13.67, "grad_norm": 0.65234375, "learning_rate": 0.000284716783405749, "loss": 0.1678, "step": 330080 }, { "epoch": 13.67, "grad_norm": 0.984375, "learning_rate": 0.00028470604325712945, "loss": 0.164, "step": 330090 }, { "epoch": 13.67, "grad_norm": 1.3828125, "learning_rate": 0.0002846953030431969, "loss": 0.1901, "step": 330100 }, { "epoch": 13.67, "grad_norm": 0.396484375, "learning_rate": 0.00028468456276397155, "loss": 0.163, "step": 330110 }, { "epoch": 13.67, "grad_norm": 0.71875, "learning_rate": 0.0002846738224194736, "loss": 0.2295, "step": 330120 }, { "epoch": 13.67, "grad_norm": 1.5625, "learning_rate": 0.0002846630820097232, "loss": 0.1647, "step": 330130 }, { "epoch": 13.67, "grad_norm": 0.6875, "learning_rate": 0.00028465234153474074, "loss": 0.1852, "step": 330140 }, { "epoch": 13.67, "grad_norm": 0.796875, "learning_rate": 0.00028464160099454626, "loss": 0.2189, "step": 330150 }, { "epoch": 13.68, "grad_norm": 0.474609375, "learning_rate": 0.00028463086038916005, "loss": 0.2, "step": 330160 }, { "epoch": 13.68, "grad_norm": 1.15625, "learning_rate": 0.00028462011971860224, "loss": 0.1991, "step": 330170 }, { "epoch": 13.68, "grad_norm": 1.046875, "learning_rate": 0.0002846093789828932, "loss": 0.1675, "step": 330180 }, { "epoch": 13.68, "grad_norm": 0.2431640625, "learning_rate": 0.0002845986381820529, "loss": 0.2105, "step": 330190 }, { "epoch": 13.68, "grad_norm": 0.5234375, "learning_rate": 0.0002845878973161018, "loss": 0.1878, "step": 330200 }, { "epoch": 13.68, "grad_norm": 0.58203125, "learning_rate": 0.00028457715638506, "loss": 0.2255, "step": 330210 }, { "epoch": 13.68, "grad_norm": 0.474609375, "learning_rate": 0.00028456641538894767, "loss": 0.1641, "step": 330220 }, { "epoch": 13.68, "grad_norm": 0.369140625, "learning_rate": 0.00028455567432778515, "loss": 0.2058, "step": 330230 }, { "epoch": 13.68, "grad_norm": 0.76953125, "learning_rate": 0.00028454493320159257, "loss": 0.2005, "step": 330240 }, { "epoch": 13.68, "grad_norm": 0.9609375, "learning_rate": 0.00028453419201039014, "loss": 0.2073, "step": 330250 }, { "epoch": 13.68, "grad_norm": 0.74609375, "learning_rate": 0.00028452345075419814, "loss": 0.1771, "step": 330260 }, { "epoch": 13.68, "grad_norm": 0.69921875, "learning_rate": 0.0002845127094330366, "loss": 0.1849, "step": 330270 }, { "epoch": 13.68, "grad_norm": 0.26953125, "learning_rate": 0.00028450196804692603, "loss": 0.2216, "step": 330280 }, { "epoch": 13.68, "grad_norm": 0.490234375, "learning_rate": 0.00028449122659588637, "loss": 0.1444, "step": 330290 }, { "epoch": 13.68, "grad_norm": 0.890625, "learning_rate": 0.000284480485079938, "loss": 0.1729, "step": 330300 }, { "epoch": 13.68, "grad_norm": 0.671875, "learning_rate": 0.0002844697434991011, "loss": 0.199, "step": 330310 }, { "epoch": 13.68, "grad_norm": 0.74609375, "learning_rate": 0.00028445900185339584, "loss": 0.1591, "step": 330320 }, { "epoch": 13.68, "grad_norm": 0.50390625, "learning_rate": 0.0002844482601428424, "loss": 0.1735, "step": 330330 }, { "epoch": 13.68, "grad_norm": 0.431640625, "learning_rate": 0.00028443751836746117, "loss": 0.222, "step": 330340 }, { "epoch": 13.68, "grad_norm": 0.734375, "learning_rate": 0.0002844267765272721, "loss": 0.2005, "step": 330350 }, { "epoch": 13.68, "grad_norm": 0.66015625, "learning_rate": 0.0002844160346222957, "loss": 0.1636, "step": 330360 }, { "epoch": 13.68, "grad_norm": 0.69140625, "learning_rate": 0.000284405292652552, "loss": 0.1997, "step": 330370 }, { "epoch": 13.68, "grad_norm": 1.359375, "learning_rate": 0.00028439455061806117, "loss": 0.2423, "step": 330380 }, { "epoch": 13.68, "grad_norm": 0.66796875, "learning_rate": 0.00028438380851884365, "loss": 0.2143, "step": 330390 }, { "epoch": 13.69, "grad_norm": 0.78515625, "learning_rate": 0.0002843730663549194, "loss": 0.1616, "step": 330400 }, { "epoch": 13.69, "grad_norm": 1.6328125, "learning_rate": 0.0002843623241263088, "loss": 0.176, "step": 330410 }, { "epoch": 13.69, "grad_norm": 0.921875, "learning_rate": 0.000284351581833032, "loss": 0.1669, "step": 330420 }, { "epoch": 13.69, "grad_norm": 1.078125, "learning_rate": 0.0002843408394751092, "loss": 0.1572, "step": 330430 }, { "epoch": 13.69, "grad_norm": 0.92578125, "learning_rate": 0.00028433009705256077, "loss": 0.1968, "step": 330440 }, { "epoch": 13.69, "grad_norm": 0.42578125, "learning_rate": 0.0002843193545654067, "loss": 0.1658, "step": 330450 }, { "epoch": 13.69, "grad_norm": 0.59375, "learning_rate": 0.00028430861201366733, "loss": 0.1553, "step": 330460 }, { "epoch": 13.69, "grad_norm": 1.28125, "learning_rate": 0.0002842978693973629, "loss": 0.1719, "step": 330470 }, { "epoch": 13.69, "grad_norm": 0.625, "learning_rate": 0.0002842871267165135, "loss": 0.1916, "step": 330480 }, { "epoch": 13.69, "grad_norm": 0.69921875, "learning_rate": 0.00028427638397113947, "loss": 0.1803, "step": 330490 }, { "epoch": 13.69, "grad_norm": 0.859375, "learning_rate": 0.00028426564116126104, "loss": 0.2085, "step": 330500 }, { "epoch": 13.69, "grad_norm": 0.0, "learning_rate": 0.00028425489828689827, "loss": 0.113, "step": 330510 }, { "epoch": 13.69, "grad_norm": 0.5703125, "learning_rate": 0.00028424415534807155, "loss": 0.1706, "step": 330520 }, { "epoch": 13.69, "grad_norm": 2.203125, "learning_rate": 0.00028423341234480105, "loss": 0.1584, "step": 330530 }, { "epoch": 13.69, "grad_norm": 0.39453125, "learning_rate": 0.00028422266927710687, "loss": 0.1912, "step": 330540 }, { "epoch": 13.69, "grad_norm": 0.78515625, "learning_rate": 0.00028421192614500946, "loss": 0.1671, "step": 330550 }, { "epoch": 13.69, "grad_norm": 1.0, "learning_rate": 0.0002842011829485288, "loss": 0.1943, "step": 330560 }, { "epoch": 13.69, "grad_norm": 0.66796875, "learning_rate": 0.0002841904396876852, "loss": 0.1607, "step": 330570 }, { "epoch": 13.69, "grad_norm": 0.765625, "learning_rate": 0.00028417969636249893, "loss": 0.1688, "step": 330580 }, { "epoch": 13.69, "grad_norm": 0.7890625, "learning_rate": 0.00028416895297299013, "loss": 0.148, "step": 330590 }, { "epoch": 13.69, "grad_norm": 0.0, "learning_rate": 0.0002841582095191791, "loss": 0.1996, "step": 330600 }, { "epoch": 13.69, "grad_norm": 0.71484375, "learning_rate": 0.000284147466001086, "loss": 0.2072, "step": 330610 }, { "epoch": 13.69, "grad_norm": 0.72265625, "learning_rate": 0.000284136722418731, "loss": 0.1954, "step": 330620 }, { "epoch": 13.69, "grad_norm": 0.359375, "learning_rate": 0.00028412597877213434, "loss": 0.182, "step": 330630 }, { "epoch": 13.7, "grad_norm": 0.875, "learning_rate": 0.00028411523506131637, "loss": 0.1912, "step": 330640 }, { "epoch": 13.7, "grad_norm": 0.62109375, "learning_rate": 0.0002841044912862972, "loss": 0.2243, "step": 330650 }, { "epoch": 13.7, "grad_norm": 2.28125, "learning_rate": 0.000284093747447097, "loss": 0.1918, "step": 330660 }, { "epoch": 13.7, "grad_norm": 0.71484375, "learning_rate": 0.00028408300354373613, "loss": 0.1957, "step": 330670 }, { "epoch": 13.7, "grad_norm": 0.82421875, "learning_rate": 0.0002840722595762347, "loss": 0.1688, "step": 330680 }, { "epoch": 13.7, "grad_norm": 0.55859375, "learning_rate": 0.00028406151554461297, "loss": 0.2154, "step": 330690 }, { "epoch": 13.7, "grad_norm": 0.46484375, "learning_rate": 0.00028405077144889114, "loss": 0.1603, "step": 330700 }, { "epoch": 13.7, "grad_norm": 0.48828125, "learning_rate": 0.00028404002728908933, "loss": 0.2395, "step": 330710 }, { "epoch": 13.7, "grad_norm": 1.234375, "learning_rate": 0.000284029283065228, "loss": 0.2626, "step": 330720 }, { "epoch": 13.7, "grad_norm": 0.72265625, "learning_rate": 0.0002840185387773272, "loss": 0.185, "step": 330730 }, { "epoch": 13.7, "grad_norm": 1.171875, "learning_rate": 0.00028400779442540715, "loss": 0.1995, "step": 330740 }, { "epoch": 13.7, "grad_norm": 1.1171875, "learning_rate": 0.0002839970500094881, "loss": 0.2235, "step": 330750 }, { "epoch": 13.7, "grad_norm": 0.8359375, "learning_rate": 0.00028398630552959036, "loss": 0.1999, "step": 330760 }, { "epoch": 13.7, "grad_norm": 1.109375, "learning_rate": 0.00028397556098573394, "loss": 0.1993, "step": 330770 }, { "epoch": 13.7, "grad_norm": 1.1640625, "learning_rate": 0.00028396481637793926, "loss": 0.174, "step": 330780 }, { "epoch": 13.7, "grad_norm": 1.0234375, "learning_rate": 0.00028395407170622646, "loss": 0.2033, "step": 330790 }, { "epoch": 13.7, "grad_norm": 1.515625, "learning_rate": 0.00028394332697061577, "loss": 0.2817, "step": 330800 }, { "epoch": 13.7, "grad_norm": 0.484375, "learning_rate": 0.00028393258217112736, "loss": 0.1633, "step": 330810 }, { "epoch": 13.7, "grad_norm": 0.609375, "learning_rate": 0.00028392183730778156, "loss": 0.2008, "step": 330820 }, { "epoch": 13.7, "grad_norm": 0.8984375, "learning_rate": 0.0002839110923805984, "loss": 0.1948, "step": 330830 }, { "epoch": 13.7, "grad_norm": 1.0234375, "learning_rate": 0.00028390034738959833, "loss": 0.1861, "step": 330840 }, { "epoch": 13.7, "grad_norm": 0.83984375, "learning_rate": 0.0002838896023348015, "loss": 0.1564, "step": 330850 }, { "epoch": 13.7, "grad_norm": 0.95703125, "learning_rate": 0.000283878857216228, "loss": 0.1562, "step": 330860 }, { "epoch": 13.7, "grad_norm": 0.7734375, "learning_rate": 0.0002838681120338982, "loss": 0.1748, "step": 330870 }, { "epoch": 13.71, "grad_norm": 1.546875, "learning_rate": 0.00028385736678783225, "loss": 0.1667, "step": 330880 }, { "epoch": 13.71, "grad_norm": 0.2451171875, "learning_rate": 0.0002838466214780505, "loss": 0.1666, "step": 330890 }, { "epoch": 13.71, "grad_norm": 1.9296875, "learning_rate": 0.00028383587610457297, "loss": 0.2086, "step": 330900 }, { "epoch": 13.71, "grad_norm": 1.0234375, "learning_rate": 0.00028382513066741995, "loss": 0.2186, "step": 330910 }, { "epoch": 13.71, "grad_norm": 0.4140625, "learning_rate": 0.0002838143851666117, "loss": 0.2233, "step": 330920 }, { "epoch": 13.71, "grad_norm": 1.046875, "learning_rate": 0.0002838036396021685, "loss": 0.21, "step": 330930 }, { "epoch": 13.71, "grad_norm": 2.515625, "learning_rate": 0.0002837928939741104, "loss": 0.1768, "step": 330940 }, { "epoch": 13.71, "grad_norm": 1.2109375, "learning_rate": 0.00028378214828245777, "loss": 0.1613, "step": 330950 }, { "epoch": 13.71, "grad_norm": 0.71484375, "learning_rate": 0.0002837714025272307, "loss": 0.1786, "step": 330960 }, { "epoch": 13.71, "grad_norm": 0.68359375, "learning_rate": 0.0002837606567084496, "loss": 0.2454, "step": 330970 }, { "epoch": 13.71, "grad_norm": 0.5703125, "learning_rate": 0.0002837499108261346, "loss": 0.187, "step": 330980 }, { "epoch": 13.71, "grad_norm": 0.291015625, "learning_rate": 0.0002837391648803059, "loss": 0.2351, "step": 330990 }, { "epoch": 13.71, "grad_norm": 0.91796875, "learning_rate": 0.00028372841887098365, "loss": 0.1759, "step": 331000 }, { "epoch": 13.71, "grad_norm": 0.40625, "learning_rate": 0.00028371767279818826, "loss": 0.2079, "step": 331010 }, { "epoch": 13.71, "grad_norm": 0.671875, "learning_rate": 0.0002837069266619399, "loss": 0.2322, "step": 331020 }, { "epoch": 13.71, "grad_norm": 1.078125, "learning_rate": 0.0002836961804622586, "loss": 0.174, "step": 331030 }, { "epoch": 13.71, "grad_norm": 0.82421875, "learning_rate": 0.0002836854341991648, "loss": 0.1484, "step": 331040 }, { "epoch": 13.71, "grad_norm": 0.609375, "learning_rate": 0.00028367468787267856, "loss": 0.1418, "step": 331050 }, { "epoch": 13.71, "grad_norm": 1.4921875, "learning_rate": 0.00028366394148282033, "loss": 0.2147, "step": 331060 }, { "epoch": 13.71, "grad_norm": 0.8671875, "learning_rate": 0.0002836531950296101, "loss": 0.2298, "step": 331070 }, { "epoch": 13.71, "grad_norm": 1.1171875, "learning_rate": 0.00028364244851306823, "loss": 0.195, "step": 331080 }, { "epoch": 13.71, "grad_norm": 0.60546875, "learning_rate": 0.00028363170193321496, "loss": 0.1955, "step": 331090 }, { "epoch": 13.71, "grad_norm": 0.94140625, "learning_rate": 0.00028362095529007035, "loss": 0.1986, "step": 331100 }, { "epoch": 13.71, "grad_norm": 0.6171875, "learning_rate": 0.0002836102085836548, "loss": 0.2022, "step": 331110 }, { "epoch": 13.71, "grad_norm": 0.408203125, "learning_rate": 0.0002835994618139884, "loss": 0.2095, "step": 331120 }, { "epoch": 13.72, "grad_norm": 1.25, "learning_rate": 0.0002835887149810915, "loss": 0.2331, "step": 331130 }, { "epoch": 13.72, "grad_norm": 1.0, "learning_rate": 0.0002835779680849844, "loss": 0.2185, "step": 331140 }, { "epoch": 13.72, "grad_norm": 0.58984375, "learning_rate": 0.000283567221125687, "loss": 0.1677, "step": 331150 }, { "epoch": 13.72, "grad_norm": 0.828125, "learning_rate": 0.0002835564741032197, "loss": 0.1804, "step": 331160 }, { "epoch": 13.72, "grad_norm": 0.453125, "learning_rate": 0.00028354572701760287, "loss": 0.1451, "step": 331170 }, { "epoch": 13.72, "grad_norm": 0.65234375, "learning_rate": 0.00028353497986885646, "loss": 0.1853, "step": 331180 }, { "epoch": 13.72, "grad_norm": 0.83984375, "learning_rate": 0.000283524232657001, "loss": 0.1568, "step": 331190 }, { "epoch": 13.72, "grad_norm": 0.91015625, "learning_rate": 0.0002835134853820565, "loss": 0.2124, "step": 331200 }, { "epoch": 13.72, "grad_norm": 0.384765625, "learning_rate": 0.00028350273804404313, "loss": 0.1956, "step": 331210 }, { "epoch": 13.72, "grad_norm": 0.95703125, "learning_rate": 0.00028349199064298136, "loss": 0.2237, "step": 331220 }, { "epoch": 13.72, "grad_norm": 0.77734375, "learning_rate": 0.00028348124317889123, "loss": 0.1797, "step": 331230 }, { "epoch": 13.72, "grad_norm": 1.0078125, "learning_rate": 0.000283470495651793, "loss": 0.2131, "step": 331240 }, { "epoch": 13.72, "grad_norm": 1.2578125, "learning_rate": 0.00028345974806170694, "loss": 0.2136, "step": 331250 }, { "epoch": 13.72, "grad_norm": 0.69140625, "learning_rate": 0.00028344900040865316, "loss": 0.1796, "step": 331260 }, { "epoch": 13.72, "grad_norm": 1.3359375, "learning_rate": 0.00028343825269265213, "loss": 0.1438, "step": 331270 }, { "epoch": 13.72, "grad_norm": 0.98046875, "learning_rate": 0.00028342750491372384, "loss": 0.1921, "step": 331280 }, { "epoch": 13.72, "grad_norm": 0.58203125, "learning_rate": 0.0002834167570718885, "loss": 0.215, "step": 331290 }, { "epoch": 13.72, "grad_norm": 1.078125, "learning_rate": 0.00028340600916716664, "loss": 0.1869, "step": 331300 }, { "epoch": 13.72, "grad_norm": 0.9296875, "learning_rate": 0.0002833952611995781, "loss": 0.2291, "step": 331310 }, { "epoch": 13.72, "grad_norm": 0.9921875, "learning_rate": 0.0002833845131691434, "loss": 0.2065, "step": 331320 }, { "epoch": 13.72, "grad_norm": 0.44140625, "learning_rate": 0.0002833737650758826, "loss": 0.2122, "step": 331330 }, { "epoch": 13.72, "grad_norm": 0.828125, "learning_rate": 0.00028336301691981593, "loss": 0.1388, "step": 331340 }, { "epoch": 13.72, "grad_norm": 0.25, "learning_rate": 0.0002833522687009637, "loss": 0.2216, "step": 331350 }, { "epoch": 13.72, "grad_norm": 0.51171875, "learning_rate": 0.0002833415204193461, "loss": 0.1727, "step": 331360 }, { "epoch": 13.73, "grad_norm": 0.59375, "learning_rate": 0.00028333077207498336, "loss": 0.2252, "step": 331370 }, { "epoch": 13.73, "grad_norm": 1.1171875, "learning_rate": 0.00028332002366789576, "loss": 0.1931, "step": 331380 }, { "epoch": 13.73, "grad_norm": 0.34375, "learning_rate": 0.0002833092751981034, "loss": 0.1631, "step": 331390 }, { "epoch": 13.73, "grad_norm": 1.078125, "learning_rate": 0.0002832985266656266, "loss": 0.2055, "step": 331400 }, { "epoch": 13.73, "grad_norm": 0.9921875, "learning_rate": 0.00028328777807048566, "loss": 0.1418, "step": 331410 }, { "epoch": 13.73, "grad_norm": 0.5234375, "learning_rate": 0.00028327702941270054, "loss": 0.1553, "step": 331420 }, { "epoch": 13.73, "grad_norm": 0.337890625, "learning_rate": 0.00028326628069229185, "loss": 0.182, "step": 331430 }, { "epoch": 13.73, "grad_norm": 1.1484375, "learning_rate": 0.0002832555319092794, "loss": 0.2056, "step": 331440 }, { "epoch": 13.73, "grad_norm": 0.921875, "learning_rate": 0.00028324478306368376, "loss": 0.1812, "step": 331450 }, { "epoch": 13.73, "grad_norm": 0.671875, "learning_rate": 0.0002832340341555251, "loss": 0.1973, "step": 331460 }, { "epoch": 13.73, "grad_norm": 0.48046875, "learning_rate": 0.0002832232851848234, "loss": 0.2286, "step": 331470 }, { "epoch": 13.73, "grad_norm": 1.9375, "learning_rate": 0.00028321253615159916, "loss": 0.2068, "step": 331480 }, { "epoch": 13.73, "grad_norm": 0.6171875, "learning_rate": 0.0002832017870558726, "loss": 0.2019, "step": 331490 }, { "epoch": 13.73, "grad_norm": 0.765625, "learning_rate": 0.00028319103789766365, "loss": 0.2079, "step": 331500 }, { "epoch": 13.73, "grad_norm": 0.9609375, "learning_rate": 0.0002831802886769929, "loss": 0.1575, "step": 331510 }, { "epoch": 13.73, "grad_norm": 1.3046875, "learning_rate": 0.00028316953939388036, "loss": 0.2023, "step": 331520 }, { "epoch": 13.73, "grad_norm": 0.5, "learning_rate": 0.0002831587900483464, "loss": 0.2036, "step": 331530 }, { "epoch": 13.73, "grad_norm": 1.28125, "learning_rate": 0.00028314804064041117, "loss": 0.155, "step": 331540 }, { "epoch": 13.73, "grad_norm": 1.203125, "learning_rate": 0.0002831372911700948, "loss": 0.2238, "step": 331550 }, { "epoch": 13.73, "grad_norm": 0.6640625, "learning_rate": 0.0002831265416374177, "loss": 0.1876, "step": 331560 }, { "epoch": 13.73, "grad_norm": 0.5859375, "learning_rate": 0.0002831157920424001, "loss": 0.2486, "step": 331570 }, { "epoch": 13.73, "grad_norm": 1.09375, "learning_rate": 0.000283105042385062, "loss": 0.1616, "step": 331580 }, { "epoch": 13.73, "grad_norm": 1.0078125, "learning_rate": 0.0002830942926654239, "loss": 0.19, "step": 331590 }, { "epoch": 13.73, "grad_norm": 0.2275390625, "learning_rate": 0.0002830835428835059, "loss": 0.1916, "step": 331600 }, { "epoch": 13.74, "grad_norm": 0.54296875, "learning_rate": 0.0002830727930393282, "loss": 0.1746, "step": 331610 }, { "epoch": 13.74, "grad_norm": 0.46484375, "learning_rate": 0.0002830620431329112, "loss": 0.1766, "step": 331620 }, { "epoch": 13.74, "grad_norm": 0.74609375, "learning_rate": 0.0002830512931642748, "loss": 0.1617, "step": 331630 }, { "epoch": 13.74, "grad_norm": 0.53515625, "learning_rate": 0.00028304054313343956, "loss": 0.2093, "step": 331640 }, { "epoch": 13.74, "grad_norm": 0.466796875, "learning_rate": 0.00028302979304042566, "loss": 0.1811, "step": 331650 }, { "epoch": 13.74, "grad_norm": 0.82421875, "learning_rate": 0.0002830190428852531, "loss": 0.1977, "step": 331660 }, { "epoch": 13.74, "grad_norm": 0.65234375, "learning_rate": 0.00028300829266794236, "loss": 0.187, "step": 331670 }, { "epoch": 13.74, "grad_norm": 0.6640625, "learning_rate": 0.00028299754238851355, "loss": 0.2061, "step": 331680 }, { "epoch": 13.74, "grad_norm": 1.6953125, "learning_rate": 0.0002829867920469869, "loss": 0.2025, "step": 331690 }, { "epoch": 13.74, "grad_norm": 1.359375, "learning_rate": 0.00028297604164338275, "loss": 0.1758, "step": 331700 }, { "epoch": 13.74, "grad_norm": 0.71484375, "learning_rate": 0.00028296529117772113, "loss": 0.1655, "step": 331710 }, { "epoch": 13.74, "grad_norm": 0.78515625, "learning_rate": 0.0002829545406500225, "loss": 0.1742, "step": 331720 }, { "epoch": 13.74, "grad_norm": 0.9375, "learning_rate": 0.00028294379006030696, "loss": 0.194, "step": 331730 }, { "epoch": 13.74, "grad_norm": 0.74609375, "learning_rate": 0.00028293303940859474, "loss": 0.2162, "step": 331740 }, { "epoch": 13.74, "grad_norm": 0.5703125, "learning_rate": 0.00028292228869490616, "loss": 0.1913, "step": 331750 }, { "epoch": 13.74, "grad_norm": 1.0234375, "learning_rate": 0.00028291153791926133, "loss": 0.1991, "step": 331760 }, { "epoch": 13.74, "grad_norm": 0.52734375, "learning_rate": 0.0002829007870816805, "loss": 0.2326, "step": 331770 }, { "epoch": 13.74, "grad_norm": 0.72265625, "learning_rate": 0.00028289003618218404, "loss": 0.2122, "step": 331780 }, { "epoch": 13.74, "grad_norm": 0.65234375, "learning_rate": 0.00028287928522079196, "loss": 0.1732, "step": 331790 }, { "epoch": 13.74, "grad_norm": 0.4921875, "learning_rate": 0.00028286853419752473, "loss": 0.2087, "step": 331800 }, { "epoch": 13.74, "grad_norm": 1.171875, "learning_rate": 0.0002828577831124024, "loss": 0.1722, "step": 331810 }, { "epoch": 13.74, "grad_norm": 0.4140625, "learning_rate": 0.0002828470319654453, "loss": 0.1715, "step": 331820 }, { "epoch": 13.74, "grad_norm": 0.447265625, "learning_rate": 0.0002828362807566737, "loss": 0.1577, "step": 331830 }, { "epoch": 13.74, "grad_norm": 0.88671875, "learning_rate": 0.00028282552948610763, "loss": 0.2305, "step": 331840 }, { "epoch": 13.75, "grad_norm": 0.91015625, "learning_rate": 0.00028281477815376754, "loss": 0.1873, "step": 331850 }, { "epoch": 13.75, "grad_norm": 1.03125, "learning_rate": 0.0002828040267596736, "loss": 0.1827, "step": 331860 }, { "epoch": 13.75, "grad_norm": 1.5546875, "learning_rate": 0.0002827932753038459, "loss": 0.1521, "step": 331870 }, { "epoch": 13.75, "grad_norm": 0.80078125, "learning_rate": 0.00028278252378630494, "loss": 0.1417, "step": 331880 }, { "epoch": 13.75, "grad_norm": 0.59375, "learning_rate": 0.0002827717722070707, "loss": 0.1638, "step": 331890 }, { "epoch": 13.75, "grad_norm": 0.50390625, "learning_rate": 0.0002827610205661636, "loss": 0.2218, "step": 331900 }, { "epoch": 13.75, "grad_norm": 0.6640625, "learning_rate": 0.0002827502688636038, "loss": 0.1884, "step": 331910 }, { "epoch": 13.75, "grad_norm": 0.8203125, "learning_rate": 0.0002827395170994115, "loss": 0.1474, "step": 331920 }, { "epoch": 13.75, "grad_norm": 1.578125, "learning_rate": 0.00028272876527360696, "loss": 0.1561, "step": 331930 }, { "epoch": 13.75, "grad_norm": 0.328125, "learning_rate": 0.00028271801338621043, "loss": 0.2062, "step": 331940 }, { "epoch": 13.75, "grad_norm": 0.96875, "learning_rate": 0.0002827072614372421, "loss": 0.1788, "step": 331950 }, { "epoch": 13.75, "grad_norm": 1.3359375, "learning_rate": 0.0002826965094267223, "loss": 0.1774, "step": 331960 }, { "epoch": 13.75, "grad_norm": 0.5390625, "learning_rate": 0.0002826857573546712, "loss": 0.2596, "step": 331970 }, { "epoch": 13.75, "grad_norm": 1.5, "learning_rate": 0.000282675005221109, "loss": 0.2117, "step": 331980 }, { "epoch": 13.75, "grad_norm": 1.3203125, "learning_rate": 0.000282664253026056, "loss": 0.1939, "step": 331990 }, { "epoch": 13.75, "grad_norm": 0.76953125, "learning_rate": 0.00028265350076953235, "loss": 0.1945, "step": 332000 }, { "epoch": 13.75, "grad_norm": 0.498046875, "learning_rate": 0.0002826427484515584, "loss": 0.1425, "step": 332010 }, { "epoch": 13.75, "grad_norm": 1.15625, "learning_rate": 0.0002826319960721542, "loss": 0.148, "step": 332020 }, { "epoch": 13.75, "grad_norm": 1.046875, "learning_rate": 0.00028262124363134024, "loss": 0.1825, "step": 332030 }, { "epoch": 13.75, "grad_norm": 0.74609375, "learning_rate": 0.00028261049112913663, "loss": 0.1763, "step": 332040 }, { "epoch": 13.75, "grad_norm": 1.140625, "learning_rate": 0.0002825997385655635, "loss": 0.1661, "step": 332050 }, { "epoch": 13.75, "grad_norm": 1.2421875, "learning_rate": 0.0002825889859406412, "loss": 0.1924, "step": 332060 }, { "epoch": 13.75, "grad_norm": 0.69140625, "learning_rate": 0.00028257823325439005, "loss": 0.1716, "step": 332070 }, { "epoch": 13.75, "grad_norm": 0.58203125, "learning_rate": 0.0002825674805068301, "loss": 0.1507, "step": 332080 }, { "epoch": 13.76, "grad_norm": 0.71484375, "learning_rate": 0.0002825567276979816, "loss": 0.2051, "step": 332090 }, { "epoch": 13.76, "grad_norm": 0.59375, "learning_rate": 0.0002825459748278649, "loss": 0.1982, "step": 332100 }, { "epoch": 13.76, "grad_norm": 0.73828125, "learning_rate": 0.00028253522189650024, "loss": 0.191, "step": 332110 }, { "epoch": 13.76, "grad_norm": 0.83203125, "learning_rate": 0.0002825244689039078, "loss": 0.2004, "step": 332120 }, { "epoch": 13.76, "grad_norm": 1.625, "learning_rate": 0.00028251371585010784, "loss": 0.2021, "step": 332130 }, { "epoch": 13.76, "grad_norm": 1.1640625, "learning_rate": 0.0002825029627351205, "loss": 0.2079, "step": 332140 }, { "epoch": 13.76, "grad_norm": 0.63671875, "learning_rate": 0.0002824922095589662, "loss": 0.1874, "step": 332150 }, { "epoch": 13.76, "grad_norm": 0.7578125, "learning_rate": 0.000282481456321665, "loss": 0.2252, "step": 332160 }, { "epoch": 13.76, "grad_norm": 0.6640625, "learning_rate": 0.00028247070302323717, "loss": 0.1756, "step": 332170 }, { "epoch": 13.76, "grad_norm": 2.078125, "learning_rate": 0.000282459949663703, "loss": 0.1665, "step": 332180 }, { "epoch": 13.76, "grad_norm": 0.8984375, "learning_rate": 0.00028244919624308276, "loss": 0.1993, "step": 332190 }, { "epoch": 13.76, "grad_norm": 1.1875, "learning_rate": 0.00028243844276139665, "loss": 0.1738, "step": 332200 }, { "epoch": 13.76, "grad_norm": 0.7734375, "learning_rate": 0.00028242768921866486, "loss": 0.2177, "step": 332210 }, { "epoch": 13.76, "grad_norm": 0.384765625, "learning_rate": 0.00028241693561490767, "loss": 0.1661, "step": 332220 }, { "epoch": 13.76, "grad_norm": 0.6328125, "learning_rate": 0.0002824061819501453, "loss": 0.2195, "step": 332230 }, { "epoch": 13.76, "grad_norm": 0.96875, "learning_rate": 0.000282395428224398, "loss": 0.1592, "step": 332240 }, { "epoch": 13.76, "grad_norm": 0.333984375, "learning_rate": 0.000282384674437686, "loss": 0.1755, "step": 332250 }, { "epoch": 13.76, "grad_norm": 1.21875, "learning_rate": 0.0002823739205900295, "loss": 0.1851, "step": 332260 }, { "epoch": 13.76, "grad_norm": 0.4921875, "learning_rate": 0.0002823631666814488, "loss": 0.181, "step": 332270 }, { "epoch": 13.76, "grad_norm": 0.51171875, "learning_rate": 0.00028235241271196414, "loss": 0.1616, "step": 332280 }, { "epoch": 13.76, "grad_norm": 0.67578125, "learning_rate": 0.00028234165868159573, "loss": 0.2407, "step": 332290 }, { "epoch": 13.76, "grad_norm": 0.0615234375, "learning_rate": 0.00028233090459036384, "loss": 0.228, "step": 332300 }, { "epoch": 13.76, "grad_norm": 0.828125, "learning_rate": 0.0002823201504382886, "loss": 0.1684, "step": 332310 }, { "epoch": 13.76, "grad_norm": 0.65625, "learning_rate": 0.0002823093962253904, "loss": 0.2608, "step": 332320 }, { "epoch": 13.77, "grad_norm": 1.1640625, "learning_rate": 0.00028229864195168934, "loss": 0.1907, "step": 332330 }, { "epoch": 13.77, "grad_norm": 1.140625, "learning_rate": 0.00028228788761720575, "loss": 0.209, "step": 332340 }, { "epoch": 13.77, "grad_norm": 0.96875, "learning_rate": 0.0002822771332219599, "loss": 0.1772, "step": 332350 }, { "epoch": 13.77, "grad_norm": 0.1494140625, "learning_rate": 0.00028226637876597186, "loss": 0.1457, "step": 332360 }, { "epoch": 13.77, "grad_norm": 0.5703125, "learning_rate": 0.0002822556242492621, "loss": 0.1449, "step": 332370 }, { "epoch": 13.77, "grad_norm": 3.703125, "learning_rate": 0.00028224486967185066, "loss": 0.218, "step": 332380 }, { "epoch": 13.77, "grad_norm": 1.1328125, "learning_rate": 0.0002822341150337578, "loss": 0.2152, "step": 332390 }, { "epoch": 13.77, "grad_norm": 1.2421875, "learning_rate": 0.00028222336033500403, "loss": 0.2494, "step": 332400 }, { "epoch": 13.77, "grad_norm": 1.390625, "learning_rate": 0.0002822126055756092, "loss": 0.1839, "step": 332410 }, { "epoch": 13.77, "grad_norm": 1.53125, "learning_rate": 0.00028220185075559373, "loss": 0.2214, "step": 332420 }, { "epoch": 13.77, "grad_norm": 0.66015625, "learning_rate": 0.0002821910958749779, "loss": 0.16, "step": 332430 }, { "epoch": 13.77, "grad_norm": 0.2080078125, "learning_rate": 0.00028218034093378184, "loss": 0.1817, "step": 332440 }, { "epoch": 13.77, "grad_norm": 0.65625, "learning_rate": 0.00028216958593202596, "loss": 0.1953, "step": 332450 }, { "epoch": 13.77, "grad_norm": 1.046875, "learning_rate": 0.0002821588308697303, "loss": 0.1502, "step": 332460 }, { "epoch": 13.77, "grad_norm": 0.98046875, "learning_rate": 0.0002821480757469153, "loss": 0.2367, "step": 332470 }, { "epoch": 13.77, "grad_norm": 0.5625, "learning_rate": 0.000282137320563601, "loss": 0.1937, "step": 332480 }, { "epoch": 13.77, "grad_norm": 0.71875, "learning_rate": 0.00028212656531980773, "loss": 0.1693, "step": 332490 }, { "epoch": 13.77, "grad_norm": 0.72265625, "learning_rate": 0.0002821158100155558, "loss": 0.1907, "step": 332500 }, { "epoch": 13.77, "grad_norm": 0.625, "learning_rate": 0.0002821050546508653, "loss": 0.1763, "step": 332510 }, { "epoch": 13.77, "grad_norm": 0.8515625, "learning_rate": 0.00028209429922575657, "loss": 0.1456, "step": 332520 }, { "epoch": 13.77, "grad_norm": 0.68359375, "learning_rate": 0.0002820835437402499, "loss": 0.1192, "step": 332530 }, { "epoch": 13.77, "grad_norm": 0.6953125, "learning_rate": 0.0002820727881943654, "loss": 0.228, "step": 332540 }, { "epoch": 13.77, "grad_norm": 0.640625, "learning_rate": 0.00028206203258812347, "loss": 0.145, "step": 332550 }, { "epoch": 13.77, "grad_norm": 1.6640625, "learning_rate": 0.00028205127692154414, "loss": 0.1988, "step": 332560 }, { "epoch": 13.78, "grad_norm": 1.7578125, "learning_rate": 0.00028204052119464783, "loss": 0.1458, "step": 332570 }, { "epoch": 13.78, "grad_norm": 1.203125, "learning_rate": 0.00028202976540745475, "loss": 0.1811, "step": 332580 }, { "epoch": 13.78, "grad_norm": 0.72265625, "learning_rate": 0.00028201900955998507, "loss": 0.2179, "step": 332590 }, { "epoch": 13.78, "grad_norm": 0.671875, "learning_rate": 0.00028200825365225907, "loss": 0.1374, "step": 332600 }, { "epoch": 13.78, "grad_norm": 1.046875, "learning_rate": 0.000281997497684297, "loss": 0.2122, "step": 332610 }, { "epoch": 13.78, "grad_norm": 1.4765625, "learning_rate": 0.0002819867416561191, "loss": 0.2525, "step": 332620 }, { "epoch": 13.78, "grad_norm": 1.078125, "learning_rate": 0.0002819759855677456, "loss": 0.211, "step": 332630 }, { "epoch": 13.78, "grad_norm": 0.33203125, "learning_rate": 0.0002819652294191967, "loss": 0.1915, "step": 332640 }, { "epoch": 13.78, "grad_norm": 0.8359375, "learning_rate": 0.0002819544732104927, "loss": 0.2312, "step": 332650 }, { "epoch": 13.78, "grad_norm": 1.375, "learning_rate": 0.00028194371694165393, "loss": 0.1984, "step": 332660 }, { "epoch": 13.78, "grad_norm": 1.34375, "learning_rate": 0.0002819329606127005, "loss": 0.1605, "step": 332670 }, { "epoch": 13.78, "grad_norm": 0.86328125, "learning_rate": 0.0002819222042236526, "loss": 0.2171, "step": 332680 }, { "epoch": 13.78, "grad_norm": 0.86328125, "learning_rate": 0.0002819114477745307, "loss": 0.1839, "step": 332690 }, { "epoch": 13.78, "grad_norm": 0.72265625, "learning_rate": 0.0002819006912653547, "loss": 0.2132, "step": 332700 }, { "epoch": 13.78, "grad_norm": 1.109375, "learning_rate": 0.00028188993469614526, "loss": 0.1632, "step": 332710 }, { "epoch": 13.78, "grad_norm": 0.921875, "learning_rate": 0.0002818791780669223, "loss": 0.1965, "step": 332720 }, { "epoch": 13.78, "grad_norm": 1.453125, "learning_rate": 0.0002818684213777061, "loss": 0.1866, "step": 332730 }, { "epoch": 13.78, "grad_norm": 1.0859375, "learning_rate": 0.00028185766462851715, "loss": 0.16, "step": 332740 }, { "epoch": 13.78, "grad_norm": 0.94140625, "learning_rate": 0.0002818469078193754, "loss": 0.2092, "step": 332750 }, { "epoch": 13.78, "grad_norm": 0.86328125, "learning_rate": 0.00028183615095030124, "loss": 0.143, "step": 332760 }, { "epoch": 13.78, "grad_norm": 0.6171875, "learning_rate": 0.0002818253940213149, "loss": 0.2017, "step": 332770 }, { "epoch": 13.78, "grad_norm": 1.765625, "learning_rate": 0.0002818146370324365, "loss": 0.2156, "step": 332780 }, { "epoch": 13.78, "grad_norm": 0.75390625, "learning_rate": 0.0002818038799836865, "loss": 0.186, "step": 332790 }, { "epoch": 13.78, "grad_norm": 0.796875, "learning_rate": 0.00028179312287508496, "loss": 0.2169, "step": 332800 }, { "epoch": 13.78, "grad_norm": 0.9375, "learning_rate": 0.0002817823657066522, "loss": 0.1658, "step": 332810 }, { "epoch": 13.79, "grad_norm": 0.5234375, "learning_rate": 0.00028177160847840855, "loss": 0.212, "step": 332820 }, { "epoch": 13.79, "grad_norm": 0.6640625, "learning_rate": 0.0002817608511903741, "loss": 0.1941, "step": 332830 }, { "epoch": 13.79, "grad_norm": 0.44921875, "learning_rate": 0.0002817500938425692, "loss": 0.2344, "step": 332840 }, { "epoch": 13.79, "grad_norm": 0.98828125, "learning_rate": 0.00028173933643501405, "loss": 0.1628, "step": 332850 }, { "epoch": 13.79, "grad_norm": 0.7578125, "learning_rate": 0.00028172857896772876, "loss": 0.1721, "step": 332860 }, { "epoch": 13.79, "grad_norm": 0.81640625, "learning_rate": 0.0002817178214407338, "loss": 0.1583, "step": 332870 }, { "epoch": 13.79, "grad_norm": 0.50390625, "learning_rate": 0.0002817070638540494, "loss": 0.2057, "step": 332880 }, { "epoch": 13.79, "grad_norm": 1.265625, "learning_rate": 0.0002816963062076956, "loss": 0.2105, "step": 332890 }, { "epoch": 13.79, "grad_norm": 0.765625, "learning_rate": 0.0002816855485016929, "loss": 0.1968, "step": 332900 }, { "epoch": 13.79, "grad_norm": 1.109375, "learning_rate": 0.00028167479073606136, "loss": 0.2022, "step": 332910 }, { "epoch": 13.79, "grad_norm": 0.9453125, "learning_rate": 0.00028166403291082125, "loss": 0.1366, "step": 332920 }, { "epoch": 13.79, "grad_norm": 1.3203125, "learning_rate": 0.0002816532750259929, "loss": 0.1936, "step": 332930 }, { "epoch": 13.79, "grad_norm": 0.333984375, "learning_rate": 0.0002816425170815965, "loss": 0.1931, "step": 332940 }, { "epoch": 13.79, "grad_norm": 1.078125, "learning_rate": 0.0002816317590776523, "loss": 0.1738, "step": 332950 }, { "epoch": 13.79, "grad_norm": 3.015625, "learning_rate": 0.0002816210010141805, "loss": 0.1714, "step": 332960 }, { "epoch": 13.79, "grad_norm": 0.5546875, "learning_rate": 0.0002816102428912014, "loss": 0.169, "step": 332970 }, { "epoch": 13.79, "grad_norm": 0.58203125, "learning_rate": 0.00028159948470873533, "loss": 0.2097, "step": 332980 }, { "epoch": 13.79, "grad_norm": 0.70703125, "learning_rate": 0.00028158872646680234, "loss": 0.2023, "step": 332990 }, { "epoch": 13.79, "grad_norm": 1.65625, "learning_rate": 0.0002815779681654228, "loss": 0.1554, "step": 333000 }, { "epoch": 13.79, "grad_norm": 1.9921875, "learning_rate": 0.000281567209804617, "loss": 0.2251, "step": 333010 }, { "epoch": 13.79, "grad_norm": 2.078125, "learning_rate": 0.000281556451384405, "loss": 0.2438, "step": 333020 }, { "epoch": 13.79, "grad_norm": 0.41015625, "learning_rate": 0.00028154569290480733, "loss": 0.2303, "step": 333030 }, { "epoch": 13.79, "grad_norm": 0.45703125, "learning_rate": 0.0002815349343658439, "loss": 0.1908, "step": 333040 }, { "epoch": 13.79, "grad_norm": 0.8359375, "learning_rate": 0.0002815241757675352, "loss": 0.2085, "step": 333050 }, { "epoch": 13.8, "grad_norm": 0.5546875, "learning_rate": 0.00028151341710990147, "loss": 0.1751, "step": 333060 }, { "epoch": 13.8, "grad_norm": 0.62109375, "learning_rate": 0.00028150265839296275, "loss": 0.2034, "step": 333070 }, { "epoch": 13.8, "grad_norm": 1.1796875, "learning_rate": 0.00028149189961673953, "loss": 0.1598, "step": 333080 }, { "epoch": 13.8, "grad_norm": 1.28125, "learning_rate": 0.00028148114078125204, "loss": 0.1744, "step": 333090 }, { "epoch": 13.8, "grad_norm": 0.875, "learning_rate": 0.00028147038188652026, "loss": 0.1958, "step": 333100 }, { "epoch": 13.8, "grad_norm": 0.373046875, "learning_rate": 0.0002814596229325647, "loss": 0.1687, "step": 333110 }, { "epoch": 13.8, "grad_norm": 0.47265625, "learning_rate": 0.00028144886391940553, "loss": 0.2256, "step": 333120 }, { "epoch": 13.8, "grad_norm": 0.45703125, "learning_rate": 0.000281438104847063, "loss": 0.1391, "step": 333130 }, { "epoch": 13.8, "grad_norm": 0.828125, "learning_rate": 0.0002814273457155574, "loss": 0.1786, "step": 333140 }, { "epoch": 13.8, "grad_norm": 1.0078125, "learning_rate": 0.00028141658652490876, "loss": 0.2083, "step": 333150 }, { "epoch": 13.8, "grad_norm": 1.21875, "learning_rate": 0.00028140582727513764, "loss": 0.237, "step": 333160 }, { "epoch": 13.8, "grad_norm": 0.88671875, "learning_rate": 0.00028139506796626417, "loss": 0.1913, "step": 333170 }, { "epoch": 13.8, "grad_norm": 1.2890625, "learning_rate": 0.00028138430859830845, "loss": 0.1909, "step": 333180 }, { "epoch": 13.8, "grad_norm": 0.91015625, "learning_rate": 0.000281373549171291, "loss": 0.17, "step": 333190 }, { "epoch": 13.8, "grad_norm": 0.69140625, "learning_rate": 0.00028136278968523176, "loss": 0.14, "step": 333200 }, { "epoch": 13.8, "grad_norm": 1.171875, "learning_rate": 0.0002813520301401512, "loss": 0.1646, "step": 333210 }, { "epoch": 13.8, "grad_norm": 0.73046875, "learning_rate": 0.00028134127053606957, "loss": 0.2261, "step": 333220 }, { "epoch": 13.8, "grad_norm": 0.275390625, "learning_rate": 0.0002813305108730069, "loss": 0.1681, "step": 333230 }, { "epoch": 13.8, "grad_norm": 0.75390625, "learning_rate": 0.0002813197511509838, "loss": 0.1649, "step": 333240 }, { "epoch": 13.8, "grad_norm": 1.0390625, "learning_rate": 0.00028130899137002017, "loss": 0.2141, "step": 333250 }, { "epoch": 13.8, "grad_norm": 1.0703125, "learning_rate": 0.0002812982315301364, "loss": 0.1451, "step": 333260 }, { "epoch": 13.8, "grad_norm": 0.7578125, "learning_rate": 0.0002812874716313528, "loss": 0.1563, "step": 333270 }, { "epoch": 13.8, "grad_norm": 0.388671875, "learning_rate": 0.0002812767116736895, "loss": 0.1776, "step": 333280 }, { "epoch": 13.8, "grad_norm": 0.8671875, "learning_rate": 0.00028126595165716686, "loss": 0.1722, "step": 333290 }, { "epoch": 13.81, "grad_norm": 0.205078125, "learning_rate": 0.00028125519158180506, "loss": 0.1459, "step": 333300 }, { "epoch": 13.81, "grad_norm": 0.291015625, "learning_rate": 0.0002812444314476243, "loss": 0.1963, "step": 333310 }, { "epoch": 13.81, "grad_norm": 0.64453125, "learning_rate": 0.000281233671254645, "loss": 0.2165, "step": 333320 }, { "epoch": 13.81, "grad_norm": 0.57421875, "learning_rate": 0.0002812229110028872, "loss": 0.187, "step": 333330 }, { "epoch": 13.81, "grad_norm": 1.265625, "learning_rate": 0.0002812121506923713, "loss": 0.197, "step": 333340 }, { "epoch": 13.81, "grad_norm": 0.66015625, "learning_rate": 0.0002812013903231176, "loss": 0.1735, "step": 333350 }, { "epoch": 13.81, "grad_norm": 1.21875, "learning_rate": 0.00028119062989514614, "loss": 0.1806, "step": 333360 }, { "epoch": 13.81, "grad_norm": 0.50390625, "learning_rate": 0.0002811798694084773, "loss": 0.1589, "step": 333370 }, { "epoch": 13.81, "grad_norm": 0.95703125, "learning_rate": 0.00028116910886313135, "loss": 0.2121, "step": 333380 }, { "epoch": 13.81, "grad_norm": 0.546875, "learning_rate": 0.0002811583482591284, "loss": 0.1936, "step": 333390 }, { "epoch": 13.81, "grad_norm": 1.0390625, "learning_rate": 0.0002811475875964889, "loss": 0.2135, "step": 333400 }, { "epoch": 13.81, "grad_norm": 0.85546875, "learning_rate": 0.00028113682687523297, "loss": 0.215, "step": 333410 }, { "epoch": 13.81, "grad_norm": 0.9140625, "learning_rate": 0.0002811260660953809, "loss": 0.1738, "step": 333420 }, { "epoch": 13.81, "grad_norm": 1.0390625, "learning_rate": 0.00028111530525695293, "loss": 0.2365, "step": 333430 }, { "epoch": 13.81, "grad_norm": 1.1171875, "learning_rate": 0.00028110454435996933, "loss": 0.1682, "step": 333440 }, { "epoch": 13.81, "grad_norm": 0.9765625, "learning_rate": 0.0002810937834044503, "loss": 0.2604, "step": 333450 }, { "epoch": 13.81, "grad_norm": 0.41015625, "learning_rate": 0.0002810830223904162, "loss": 0.1339, "step": 333460 }, { "epoch": 13.81, "grad_norm": 1.1796875, "learning_rate": 0.0002810722613178871, "loss": 0.2269, "step": 333470 }, { "epoch": 13.81, "grad_norm": 0.8203125, "learning_rate": 0.0002810615001868835, "loss": 0.1378, "step": 333480 }, { "epoch": 13.81, "grad_norm": 1.09375, "learning_rate": 0.0002810507389974254, "loss": 0.2039, "step": 333490 }, { "epoch": 13.81, "grad_norm": 0.435546875, "learning_rate": 0.00028103997774953316, "loss": 0.1769, "step": 333500 }, { "epoch": 13.81, "grad_norm": 0.52734375, "learning_rate": 0.00028102921644322713, "loss": 0.1809, "step": 333510 }, { "epoch": 13.81, "grad_norm": 0.83984375, "learning_rate": 0.0002810184550785274, "loss": 0.2415, "step": 333520 }, { "epoch": 13.81, "grad_norm": 0.6796875, "learning_rate": 0.00028100769365545426, "loss": 0.1919, "step": 333530 }, { "epoch": 13.82, "grad_norm": 1.140625, "learning_rate": 0.00028099693217402806, "loss": 0.1864, "step": 333540 }, { "epoch": 13.82, "grad_norm": 0.5703125, "learning_rate": 0.0002809861706342689, "loss": 0.1997, "step": 333550 }, { "epoch": 13.82, "grad_norm": 0.7734375, "learning_rate": 0.0002809754090361972, "loss": 0.2487, "step": 333560 }, { "epoch": 13.82, "grad_norm": 0.8359375, "learning_rate": 0.0002809646473798331, "loss": 0.1902, "step": 333570 }, { "epoch": 13.82, "grad_norm": 0.73046875, "learning_rate": 0.0002809538856651968, "loss": 0.17, "step": 333580 }, { "epoch": 13.82, "grad_norm": 0.9375, "learning_rate": 0.00028094312389230876, "loss": 0.1885, "step": 333590 }, { "epoch": 13.82, "grad_norm": 0.36328125, "learning_rate": 0.000280932362061189, "loss": 0.1888, "step": 333600 }, { "epoch": 13.82, "grad_norm": 2.5, "learning_rate": 0.0002809216001718579, "loss": 0.1574, "step": 333610 }, { "epoch": 13.82, "grad_norm": 0.6640625, "learning_rate": 0.00028091083822433564, "loss": 0.2086, "step": 333620 }, { "epoch": 13.82, "grad_norm": 0.408203125, "learning_rate": 0.0002809000762186426, "loss": 0.1582, "step": 333630 }, { "epoch": 13.82, "grad_norm": 0.6875, "learning_rate": 0.00028088931415479895, "loss": 0.189, "step": 333640 }, { "epoch": 13.82, "grad_norm": 0.69140625, "learning_rate": 0.0002808785520328249, "loss": 0.1602, "step": 333650 }, { "epoch": 13.82, "grad_norm": 0.8515625, "learning_rate": 0.0002808677898527408, "loss": 0.1664, "step": 333660 }, { "epoch": 13.82, "grad_norm": 1.125, "learning_rate": 0.00028085702761456685, "loss": 0.2136, "step": 333670 }, { "epoch": 13.82, "grad_norm": 1.9609375, "learning_rate": 0.0002808462653183233, "loss": 0.2203, "step": 333680 }, { "epoch": 13.82, "grad_norm": 0.5859375, "learning_rate": 0.0002808355029640304, "loss": 0.2344, "step": 333690 }, { "epoch": 13.82, "grad_norm": 1.09375, "learning_rate": 0.0002808247405517084, "loss": 0.2269, "step": 333700 }, { "epoch": 13.82, "grad_norm": 1.0546875, "learning_rate": 0.00028081397808137754, "loss": 0.1923, "step": 333710 }, { "epoch": 13.82, "grad_norm": 2.53125, "learning_rate": 0.00028080321555305814, "loss": 0.1705, "step": 333720 }, { "epoch": 13.82, "grad_norm": 1.890625, "learning_rate": 0.0002807924529667704, "loss": 0.1614, "step": 333730 }, { "epoch": 13.82, "grad_norm": 0.69921875, "learning_rate": 0.00028078169032253456, "loss": 0.1224, "step": 333740 }, { "epoch": 13.82, "grad_norm": 0.7734375, "learning_rate": 0.000280770927620371, "loss": 0.2209, "step": 333750 }, { "epoch": 13.82, "grad_norm": 1.5, "learning_rate": 0.0002807601648602998, "loss": 0.2059, "step": 333760 }, { "epoch": 13.82, "grad_norm": 1.265625, "learning_rate": 0.0002807494020423413, "loss": 0.2238, "step": 333770 }, { "epoch": 13.83, "grad_norm": 0.388671875, "learning_rate": 0.00028073863916651577, "loss": 0.2046, "step": 333780 }, { "epoch": 13.83, "grad_norm": 1.5, "learning_rate": 0.00028072787623284343, "loss": 0.1838, "step": 333790 }, { "epoch": 13.83, "grad_norm": 0.81640625, "learning_rate": 0.00028071711324134457, "loss": 0.1615, "step": 333800 }, { "epoch": 13.83, "grad_norm": 1.109375, "learning_rate": 0.0002807063501920394, "loss": 0.1916, "step": 333810 }, { "epoch": 13.83, "grad_norm": 0.93359375, "learning_rate": 0.0002806955870849481, "loss": 0.1797, "step": 333820 }, { "epoch": 13.83, "grad_norm": 1.0390625, "learning_rate": 0.00028068482392009113, "loss": 0.1532, "step": 333830 }, { "epoch": 13.83, "grad_norm": 0.515625, "learning_rate": 0.00028067406069748866, "loss": 0.1906, "step": 333840 }, { "epoch": 13.83, "grad_norm": 0.50390625, "learning_rate": 0.00028066329741716084, "loss": 0.2231, "step": 333850 }, { "epoch": 13.83, "grad_norm": 0.890625, "learning_rate": 0.00028065253407912803, "loss": 0.2044, "step": 333860 }, { "epoch": 13.83, "grad_norm": 1.1484375, "learning_rate": 0.00028064177068341047, "loss": 0.2301, "step": 333870 }, { "epoch": 13.83, "grad_norm": 0.357421875, "learning_rate": 0.00028063100723002836, "loss": 0.1822, "step": 333880 }, { "epoch": 13.83, "grad_norm": 0.90234375, "learning_rate": 0.00028062024371900215, "loss": 0.2216, "step": 333890 }, { "epoch": 13.83, "grad_norm": 0.90625, "learning_rate": 0.0002806094801503518, "loss": 0.1656, "step": 333900 }, { "epoch": 13.83, "grad_norm": 1.03125, "learning_rate": 0.0002805987165240977, "loss": 0.2084, "step": 333910 }, { "epoch": 13.83, "grad_norm": 0.470703125, "learning_rate": 0.0002805879528402603, "loss": 0.1979, "step": 333920 }, { "epoch": 13.83, "grad_norm": 1.1875, "learning_rate": 0.0002805771890988595, "loss": 0.2293, "step": 333930 }, { "epoch": 13.83, "grad_norm": 0.73046875, "learning_rate": 0.0002805664252999158, "loss": 0.1674, "step": 333940 }, { "epoch": 13.83, "grad_norm": 1.2734375, "learning_rate": 0.00028055566144344937, "loss": 0.2189, "step": 333950 }, { "epoch": 13.83, "grad_norm": 0.90625, "learning_rate": 0.00028054489752948045, "loss": 0.219, "step": 333960 }, { "epoch": 13.83, "grad_norm": 0.74609375, "learning_rate": 0.0002805341335580294, "loss": 0.2639, "step": 333970 }, { "epoch": 13.83, "grad_norm": 0.65234375, "learning_rate": 0.00028052336952911637, "loss": 0.1915, "step": 333980 }, { "epoch": 13.83, "grad_norm": 0.625, "learning_rate": 0.0002805126054427617, "loss": 0.2027, "step": 333990 }, { "epoch": 13.83, "grad_norm": 0.0, "learning_rate": 0.0002805018412989856, "loss": 0.2027, "step": 334000 }, { "epoch": 13.83, "grad_norm": 1.03125, "learning_rate": 0.0002804910770978082, "loss": 0.1766, "step": 334010 }, { "epoch": 13.84, "grad_norm": 1.2421875, "learning_rate": 0.00028048031283925, "loss": 0.2301, "step": 334020 }, { "epoch": 13.84, "grad_norm": 0.314453125, "learning_rate": 0.0002804695485233312, "loss": 0.2018, "step": 334030 }, { "epoch": 13.84, "grad_norm": 1.34375, "learning_rate": 0.0002804587841500719, "loss": 0.2038, "step": 334040 }, { "epoch": 13.84, "grad_norm": 0.66796875, "learning_rate": 0.00028044801971949246, "loss": 0.1369, "step": 334050 }, { "epoch": 13.84, "grad_norm": 0.94140625, "learning_rate": 0.00028043725523161316, "loss": 0.1427, "step": 334060 }, { "epoch": 13.84, "grad_norm": 0.61328125, "learning_rate": 0.0002804264906864542, "loss": 0.1976, "step": 334070 }, { "epoch": 13.84, "grad_norm": 1.0390625, "learning_rate": 0.000280415726084036, "loss": 0.1741, "step": 334080 }, { "epoch": 13.84, "grad_norm": 1.0859375, "learning_rate": 0.00028040496142437847, "loss": 0.1824, "step": 334090 }, { "epoch": 13.84, "grad_norm": 1.0390625, "learning_rate": 0.0002803941967075023, "loss": 0.1983, "step": 334100 }, { "epoch": 13.84, "grad_norm": 0.88671875, "learning_rate": 0.0002803834319334274, "loss": 0.1886, "step": 334110 }, { "epoch": 13.84, "grad_norm": 1.21875, "learning_rate": 0.0002803726671021742, "loss": 0.1308, "step": 334120 }, { "epoch": 13.84, "grad_norm": 1.1171875, "learning_rate": 0.000280361902213763, "loss": 0.1999, "step": 334130 }, { "epoch": 13.84, "grad_norm": 0.6484375, "learning_rate": 0.0002803511372682138, "loss": 0.1699, "step": 334140 }, { "epoch": 13.84, "grad_norm": 0.984375, "learning_rate": 0.0002803403722655471, "loss": 0.1941, "step": 334150 }, { "epoch": 13.84, "grad_norm": 0.55859375, "learning_rate": 0.0002803296072057832, "loss": 0.2347, "step": 334160 }, { "epoch": 13.84, "grad_norm": 1.0546875, "learning_rate": 0.00028031884208894214, "loss": 0.1779, "step": 334170 }, { "epoch": 13.84, "grad_norm": 0.92578125, "learning_rate": 0.0002803080769150444, "loss": 0.1857, "step": 334180 }, { "epoch": 13.84, "grad_norm": 1.046875, "learning_rate": 0.00028029731168411004, "loss": 0.2249, "step": 334190 }, { "epoch": 13.84, "grad_norm": 0.44921875, "learning_rate": 0.0002802865463961594, "loss": 0.1334, "step": 334200 }, { "epoch": 13.84, "grad_norm": 0.6796875, "learning_rate": 0.0002802757810512129, "loss": 0.199, "step": 334210 }, { "epoch": 13.84, "grad_norm": 1.390625, "learning_rate": 0.00028026501564929043, "loss": 0.1974, "step": 334220 }, { "epoch": 13.84, "grad_norm": 0.361328125, "learning_rate": 0.0002802542501904126, "loss": 0.2199, "step": 334230 }, { "epoch": 13.84, "grad_norm": 0.42578125, "learning_rate": 0.0002802434846745996, "loss": 0.1932, "step": 334240 }, { "epoch": 13.84, "grad_norm": 0.61328125, "learning_rate": 0.00028023271910187147, "loss": 0.2093, "step": 334250 }, { "epoch": 13.85, "grad_norm": 0.69140625, "learning_rate": 0.0002802219534722488, "loss": 0.1833, "step": 334260 }, { "epoch": 13.85, "grad_norm": 0.474609375, "learning_rate": 0.0002802111877857515, "loss": 0.1564, "step": 334270 }, { "epoch": 13.85, "grad_norm": 0.6640625, "learning_rate": 0.0002802004220424001, "loss": 0.1622, "step": 334280 }, { "epoch": 13.85, "grad_norm": 0.337890625, "learning_rate": 0.00028018965624221484, "loss": 0.2478, "step": 334290 }, { "epoch": 13.85, "grad_norm": 0.51953125, "learning_rate": 0.0002801788903852157, "loss": 0.1221, "step": 334300 }, { "epoch": 13.85, "grad_norm": 0.72265625, "learning_rate": 0.0002801681244714233, "loss": 0.2178, "step": 334310 }, { "epoch": 13.85, "grad_norm": 0.515625, "learning_rate": 0.00028015735850085775, "loss": 0.2122, "step": 334320 }, { "epoch": 13.85, "grad_norm": 0.72265625, "learning_rate": 0.00028014659247353925, "loss": 0.2189, "step": 334330 }, { "epoch": 13.85, "grad_norm": 0.5078125, "learning_rate": 0.0002801358263894882, "loss": 0.2247, "step": 334340 }, { "epoch": 13.85, "grad_norm": 1.578125, "learning_rate": 0.0002801250602487247, "loss": 0.2162, "step": 334350 }, { "epoch": 13.85, "grad_norm": 0.94140625, "learning_rate": 0.0002801142940512691, "loss": 0.2008, "step": 334360 }, { "epoch": 13.85, "grad_norm": 0.609375, "learning_rate": 0.00028010352779714174, "loss": 0.2223, "step": 334370 }, { "epoch": 13.85, "grad_norm": 0.7109375, "learning_rate": 0.00028009276148636264, "loss": 0.1997, "step": 334380 }, { "epoch": 13.85, "grad_norm": 0.80078125, "learning_rate": 0.0002800819951189523, "loss": 0.1692, "step": 334390 }, { "epoch": 13.85, "grad_norm": 0.71875, "learning_rate": 0.0002800712286949309, "loss": 0.1939, "step": 334400 }, { "epoch": 13.85, "grad_norm": 1.3125, "learning_rate": 0.00028006046221431863, "loss": 0.1987, "step": 334410 }, { "epoch": 13.85, "grad_norm": 0.86328125, "learning_rate": 0.00028004969567713595, "loss": 0.219, "step": 334420 }, { "epoch": 13.85, "grad_norm": 0.66015625, "learning_rate": 0.0002800389290834029, "loss": 0.1427, "step": 334430 }, { "epoch": 13.85, "grad_norm": 1.171875, "learning_rate": 0.0002800281624331398, "loss": 0.2, "step": 334440 }, { "epoch": 13.85, "grad_norm": 1.515625, "learning_rate": 0.000280017395726367, "loss": 0.2268, "step": 334450 }, { "epoch": 13.85, "grad_norm": 0.8671875, "learning_rate": 0.00028000662896310465, "loss": 0.141, "step": 334460 }, { "epoch": 13.85, "grad_norm": 0.6484375, "learning_rate": 0.0002799958621433731, "loss": 0.1832, "step": 334470 }, { "epoch": 13.85, "grad_norm": 1.5390625, "learning_rate": 0.0002799850952671926, "loss": 0.1784, "step": 334480 }, { "epoch": 13.85, "grad_norm": 1.375, "learning_rate": 0.0002799743283345833, "loss": 0.2396, "step": 334490 }, { "epoch": 13.85, "grad_norm": 0.7734375, "learning_rate": 0.00027996356134556567, "loss": 0.207, "step": 334500 }, { "epoch": 13.86, "grad_norm": 1.34375, "learning_rate": 0.00027995279430015975, "loss": 0.1409, "step": 334510 }, { "epoch": 13.86, "grad_norm": 0.5625, "learning_rate": 0.00027994202719838594, "loss": 0.2278, "step": 334520 }, { "epoch": 13.86, "grad_norm": 0.408203125, "learning_rate": 0.0002799312600402645, "loss": 0.2461, "step": 334530 }, { "epoch": 13.86, "grad_norm": 1.140625, "learning_rate": 0.00027992049282581555, "loss": 0.2147, "step": 334540 }, { "epoch": 13.86, "grad_norm": 1.3828125, "learning_rate": 0.0002799097255550596, "loss": 0.2047, "step": 334550 }, { "epoch": 13.86, "grad_norm": 0.2236328125, "learning_rate": 0.00027989895822801673, "loss": 0.2319, "step": 334560 }, { "epoch": 13.86, "grad_norm": 0.43359375, "learning_rate": 0.0002798881908447072, "loss": 0.1704, "step": 334570 }, { "epoch": 13.86, "grad_norm": 2.59375, "learning_rate": 0.0002798774234051514, "loss": 0.2479, "step": 334580 }, { "epoch": 13.86, "grad_norm": 0.8671875, "learning_rate": 0.00027986665590936955, "loss": 0.1688, "step": 334590 }, { "epoch": 13.86, "grad_norm": 1.265625, "learning_rate": 0.0002798558883573818, "loss": 0.1811, "step": 334600 }, { "epoch": 13.86, "grad_norm": 0.9375, "learning_rate": 0.0002798451207492085, "loss": 0.2567, "step": 334610 }, { "epoch": 13.86, "grad_norm": 0.9921875, "learning_rate": 0.0002798343530848699, "loss": 0.1717, "step": 334620 }, { "epoch": 13.86, "grad_norm": 0.875, "learning_rate": 0.0002798235853643863, "loss": 0.1782, "step": 334630 }, { "epoch": 13.86, "grad_norm": 0.82421875, "learning_rate": 0.00027981281758777786, "loss": 0.1695, "step": 334640 }, { "epoch": 13.86, "grad_norm": 1.2265625, "learning_rate": 0.000279802049755065, "loss": 0.1715, "step": 334650 }, { "epoch": 13.86, "grad_norm": 0.2080078125, "learning_rate": 0.0002797912818662679, "loss": 0.1856, "step": 334660 }, { "epoch": 13.86, "grad_norm": 0.6640625, "learning_rate": 0.0002797805139214068, "loss": 0.2055, "step": 334670 }, { "epoch": 13.86, "grad_norm": 1.015625, "learning_rate": 0.00027976974592050193, "loss": 0.2358, "step": 334680 }, { "epoch": 13.86, "grad_norm": 0.8203125, "learning_rate": 0.0002797589778635737, "loss": 0.1696, "step": 334690 }, { "epoch": 13.86, "grad_norm": 1.5078125, "learning_rate": 0.00027974820975064217, "loss": 0.16, "step": 334700 }, { "epoch": 13.86, "grad_norm": 0.43359375, "learning_rate": 0.00027973744158172784, "loss": 0.213, "step": 334710 }, { "epoch": 13.86, "grad_norm": 0.66015625, "learning_rate": 0.00027972667335685085, "loss": 0.2309, "step": 334720 }, { "epoch": 13.86, "grad_norm": 1.125, "learning_rate": 0.00027971590507603147, "loss": 0.1953, "step": 334730 }, { "epoch": 13.86, "grad_norm": 0.73046875, "learning_rate": 0.0002797051367392899, "loss": 0.2455, "step": 334740 }, { "epoch": 13.87, "grad_norm": 0.67578125, "learning_rate": 0.0002796943683466465, "loss": 0.1844, "step": 334750 }, { "epoch": 13.87, "grad_norm": 0.85546875, "learning_rate": 0.00027968359989812155, "loss": 0.2074, "step": 334760 }, { "epoch": 13.87, "grad_norm": 0.75390625, "learning_rate": 0.00027967283139373523, "loss": 0.1668, "step": 334770 }, { "epoch": 13.87, "grad_norm": 1.5859375, "learning_rate": 0.00027966206283350784, "loss": 0.1588, "step": 334780 }, { "epoch": 13.87, "grad_norm": 0.55078125, "learning_rate": 0.0002796512942174597, "loss": 0.1884, "step": 334790 }, { "epoch": 13.87, "grad_norm": 1.1328125, "learning_rate": 0.000279640525545611, "loss": 0.2084, "step": 334800 }, { "epoch": 13.87, "grad_norm": 2.140625, "learning_rate": 0.00027962975681798197, "loss": 0.2014, "step": 334810 }, { "epoch": 13.87, "grad_norm": 0.9453125, "learning_rate": 0.00027961898803459303, "loss": 0.194, "step": 334820 }, { "epoch": 13.87, "grad_norm": 0.84765625, "learning_rate": 0.00027960821919546433, "loss": 0.2021, "step": 334830 }, { "epoch": 13.87, "grad_norm": 0.9375, "learning_rate": 0.0002795974503006161, "loss": 0.1659, "step": 334840 }, { "epoch": 13.87, "grad_norm": 0.83203125, "learning_rate": 0.0002795866813500687, "loss": 0.2603, "step": 334850 }, { "epoch": 13.87, "grad_norm": 1.3671875, "learning_rate": 0.00027957591234384235, "loss": 0.1721, "step": 334860 }, { "epoch": 13.87, "grad_norm": 0.65625, "learning_rate": 0.0002795651432819574, "loss": 0.2228, "step": 334870 }, { "epoch": 13.87, "grad_norm": 1.3125, "learning_rate": 0.000279554374164434, "loss": 0.1724, "step": 334880 }, { "epoch": 13.87, "grad_norm": 1.375, "learning_rate": 0.0002795436049912924, "loss": 0.1398, "step": 334890 }, { "epoch": 13.87, "grad_norm": 1.890625, "learning_rate": 0.000279532835762553, "loss": 0.2173, "step": 334900 }, { "epoch": 13.87, "grad_norm": 0.515625, "learning_rate": 0.00027952206647823593, "loss": 0.1784, "step": 334910 }, { "epoch": 13.87, "grad_norm": 0.93359375, "learning_rate": 0.0002795112971383615, "loss": 0.2326, "step": 334920 }, { "epoch": 13.87, "grad_norm": 0.81640625, "learning_rate": 0.0002795005277429501, "loss": 0.1498, "step": 334930 }, { "epoch": 13.87, "grad_norm": 0.890625, "learning_rate": 0.00027948975829202185, "loss": 0.2273, "step": 334940 }, { "epoch": 13.87, "grad_norm": 0.67578125, "learning_rate": 0.00027947898878559707, "loss": 0.1944, "step": 334950 }, { "epoch": 13.87, "grad_norm": 1.0390625, "learning_rate": 0.00027946821922369595, "loss": 0.1615, "step": 334960 }, { "epoch": 13.87, "grad_norm": 0.94140625, "learning_rate": 0.0002794574496063389, "loss": 0.1454, "step": 334970 }, { "epoch": 13.87, "grad_norm": 0.1728515625, "learning_rate": 0.0002794466799335461, "loss": 0.2127, "step": 334980 }, { "epoch": 13.88, "grad_norm": 0.07373046875, "learning_rate": 0.0002794359102053378, "loss": 0.184, "step": 334990 }, { "epoch": 13.88, "grad_norm": 1.171875, "learning_rate": 0.0002794251404217343, "loss": 0.2175, "step": 335000 }, { "epoch": 13.88, "grad_norm": 0.46484375, "learning_rate": 0.0002794143705827558, "loss": 0.1478, "step": 335010 }, { "epoch": 13.88, "grad_norm": 0.87890625, "learning_rate": 0.00027940360068842273, "loss": 0.1652, "step": 335020 }, { "epoch": 13.88, "grad_norm": 0.46484375, "learning_rate": 0.0002793928307387553, "loss": 0.1957, "step": 335030 }, { "epoch": 13.88, "grad_norm": 0.53515625, "learning_rate": 0.0002793820607337736, "loss": 0.1783, "step": 335040 }, { "epoch": 13.88, "grad_norm": 1.359375, "learning_rate": 0.00027937129067349807, "loss": 0.1879, "step": 335050 }, { "epoch": 13.88, "grad_norm": 0.35546875, "learning_rate": 0.000279360520557949, "loss": 0.173, "step": 335060 }, { "epoch": 13.88, "grad_norm": 0.412109375, "learning_rate": 0.00027934975038714654, "loss": 0.2028, "step": 335070 }, { "epoch": 13.88, "grad_norm": 0.86328125, "learning_rate": 0.0002793389801611111, "loss": 0.168, "step": 335080 }, { "epoch": 13.88, "grad_norm": 1.65625, "learning_rate": 0.0002793282098798628, "loss": 0.1739, "step": 335090 }, { "epoch": 13.88, "grad_norm": 0.69140625, "learning_rate": 0.00027931743954342196, "loss": 0.1597, "step": 335100 }, { "epoch": 13.88, "grad_norm": 1.5625, "learning_rate": 0.000279306669151809, "loss": 0.2034, "step": 335110 }, { "epoch": 13.88, "grad_norm": 0.69921875, "learning_rate": 0.0002792958987050439, "loss": 0.239, "step": 335120 }, { "epoch": 13.88, "grad_norm": 0.5546875, "learning_rate": 0.00027928512820314715, "loss": 0.2077, "step": 335130 }, { "epoch": 13.88, "grad_norm": 1.2578125, "learning_rate": 0.000279274357646139, "loss": 0.1501, "step": 335140 }, { "epoch": 13.88, "grad_norm": 0.8203125, "learning_rate": 0.0002792635870340395, "loss": 0.1604, "step": 335150 }, { "epoch": 13.88, "grad_norm": 1.3828125, "learning_rate": 0.0002792528163668693, "loss": 0.201, "step": 335160 }, { "epoch": 13.88, "grad_norm": 0.55078125, "learning_rate": 0.0002792420456446483, "loss": 0.1915, "step": 335170 }, { "epoch": 13.88, "grad_norm": 0.83984375, "learning_rate": 0.000279231274867397, "loss": 0.2356, "step": 335180 }, { "epoch": 13.88, "grad_norm": 0.9140625, "learning_rate": 0.00027922050403513557, "loss": 0.1683, "step": 335190 }, { "epoch": 13.88, "grad_norm": 0.58984375, "learning_rate": 0.00027920973314788435, "loss": 0.1903, "step": 335200 }, { "epoch": 13.88, "grad_norm": 0.83984375, "learning_rate": 0.0002791989622056635, "loss": 0.1133, "step": 335210 }, { "epoch": 13.88, "grad_norm": 0.875, "learning_rate": 0.00027918819120849336, "loss": 0.1947, "step": 335220 }, { "epoch": 13.89, "grad_norm": 8.344650268554688e-05, "learning_rate": 0.00027917742015639424, "loss": 0.1846, "step": 335230 }, { "epoch": 13.89, "grad_norm": 1.140625, "learning_rate": 0.00027916664904938636, "loss": 0.1866, "step": 335240 }, { "epoch": 13.89, "grad_norm": 0.89453125, "learning_rate": 0.00027915587788749003, "loss": 0.1888, "step": 335250 }, { "epoch": 13.89, "grad_norm": 0.466796875, "learning_rate": 0.0002791451066707254, "loss": 0.1856, "step": 335260 }, { "epoch": 13.89, "grad_norm": 0.609375, "learning_rate": 0.0002791343353991129, "loss": 0.1732, "step": 335270 }, { "epoch": 13.89, "grad_norm": 1.1328125, "learning_rate": 0.0002791235640726727, "loss": 0.1922, "step": 335280 }, { "epoch": 13.89, "grad_norm": 0.474609375, "learning_rate": 0.00027911279269142507, "loss": 0.1768, "step": 335290 }, { "epoch": 13.89, "grad_norm": 1.2890625, "learning_rate": 0.00027910202125539034, "loss": 0.1937, "step": 335300 }, { "epoch": 13.89, "grad_norm": 1.265625, "learning_rate": 0.00027909124976458876, "loss": 0.2186, "step": 335310 }, { "epoch": 13.89, "grad_norm": 0.890625, "learning_rate": 0.0002790804782190406, "loss": 0.2211, "step": 335320 }, { "epoch": 13.89, "grad_norm": 0.56640625, "learning_rate": 0.00027906970661876606, "loss": 0.2189, "step": 335330 }, { "epoch": 13.89, "grad_norm": 1.015625, "learning_rate": 0.00027905893496378553, "loss": 0.1687, "step": 335340 }, { "epoch": 13.89, "grad_norm": 0.58203125, "learning_rate": 0.0002790481632541192, "loss": 0.1309, "step": 335350 }, { "epoch": 13.89, "grad_norm": 0.9609375, "learning_rate": 0.0002790373914897874, "loss": 0.204, "step": 335360 }, { "epoch": 13.89, "grad_norm": 0.373046875, "learning_rate": 0.00027902661967081025, "loss": 0.1627, "step": 335370 }, { "epoch": 13.89, "grad_norm": 1.2890625, "learning_rate": 0.0002790158477972082, "loss": 0.1815, "step": 335380 }, { "epoch": 13.89, "grad_norm": 1.015625, "learning_rate": 0.00027900507586900154, "loss": 0.1792, "step": 335390 }, { "epoch": 13.89, "grad_norm": 0.8828125, "learning_rate": 0.0002789943038862103, "loss": 0.1877, "step": 335400 }, { "epoch": 13.89, "grad_norm": 1.1015625, "learning_rate": 0.0002789835318488551, "loss": 0.206, "step": 335410 }, { "epoch": 13.89, "grad_norm": 0.91796875, "learning_rate": 0.0002789727597569559, "loss": 0.2027, "step": 335420 }, { "epoch": 13.89, "grad_norm": 0.72265625, "learning_rate": 0.0002789619876105331, "loss": 0.1569, "step": 335430 }, { "epoch": 13.89, "grad_norm": 1.1015625, "learning_rate": 0.00027895121540960703, "loss": 0.225, "step": 335440 }, { "epoch": 13.89, "grad_norm": 0.91015625, "learning_rate": 0.0002789404431541979, "loss": 0.2077, "step": 335450 }, { "epoch": 13.89, "grad_norm": 1.703125, "learning_rate": 0.00027892967084432586, "loss": 0.2337, "step": 335460 }, { "epoch": 13.9, "grad_norm": 1.0, "learning_rate": 0.0002789188984800114, "loss": 0.2185, "step": 335470 }, { "epoch": 13.9, "grad_norm": 0.515625, "learning_rate": 0.0002789081260612746, "loss": 0.1966, "step": 335480 }, { "epoch": 13.9, "grad_norm": 1.4453125, "learning_rate": 0.000278897353588136, "loss": 0.2225, "step": 335490 }, { "epoch": 13.9, "grad_norm": 1.2265625, "learning_rate": 0.0002788865810606156, "loss": 0.1834, "step": 335500 }, { "epoch": 13.9, "grad_norm": 0.7578125, "learning_rate": 0.00027887580847873376, "loss": 0.2095, "step": 335510 }, { "epoch": 13.9, "grad_norm": 0.82421875, "learning_rate": 0.0002788650358425108, "loss": 0.1423, "step": 335520 }, { "epoch": 13.9, "grad_norm": 0.50390625, "learning_rate": 0.00027885426315196694, "loss": 0.2145, "step": 335530 }, { "epoch": 13.9, "grad_norm": 0.65625, "learning_rate": 0.0002788434904071225, "loss": 0.2068, "step": 335540 }, { "epoch": 13.9, "grad_norm": 1.1875, "learning_rate": 0.00027883271760799767, "loss": 0.2389, "step": 335550 }, { "epoch": 13.9, "grad_norm": 0.5, "learning_rate": 0.0002788219447546128, "loss": 0.2167, "step": 335560 }, { "epoch": 13.9, "grad_norm": 0.66796875, "learning_rate": 0.00027881117184698826, "loss": 0.185, "step": 335570 }, { "epoch": 13.9, "grad_norm": 1.234375, "learning_rate": 0.00027880039888514405, "loss": 0.1787, "step": 335580 }, { "epoch": 13.9, "grad_norm": 0.26953125, "learning_rate": 0.0002787896258691006, "loss": 0.1852, "step": 335590 }, { "epoch": 13.9, "grad_norm": 0.48828125, "learning_rate": 0.0002787788527988783, "loss": 0.1944, "step": 335600 }, { "epoch": 13.9, "grad_norm": 0.419921875, "learning_rate": 0.0002787680796744972, "loss": 0.2103, "step": 335610 }, { "epoch": 13.9, "grad_norm": 0.75390625, "learning_rate": 0.00027875730649597777, "loss": 0.2357, "step": 335620 }, { "epoch": 13.9, "grad_norm": 1.0546875, "learning_rate": 0.0002787465332633401, "loss": 0.1892, "step": 335630 }, { "epoch": 13.9, "grad_norm": 0.9453125, "learning_rate": 0.0002787357599766046, "loss": 0.185, "step": 335640 }, { "epoch": 13.9, "grad_norm": 0.61328125, "learning_rate": 0.00027872498663579156, "loss": 0.1926, "step": 335650 }, { "epoch": 13.9, "grad_norm": 0.90234375, "learning_rate": 0.0002787142132409212, "loss": 0.2685, "step": 335660 }, { "epoch": 13.9, "grad_norm": 0.94140625, "learning_rate": 0.00027870343979201367, "loss": 0.1931, "step": 335670 }, { "epoch": 13.9, "grad_norm": 0.283203125, "learning_rate": 0.0002786926662890895, "loss": 0.1571, "step": 335680 }, { "epoch": 13.9, "grad_norm": 0.85546875, "learning_rate": 0.00027868189273216865, "loss": 0.1609, "step": 335690 }, { "epoch": 13.9, "grad_norm": 0.8671875, "learning_rate": 0.00027867111912127174, "loss": 0.1857, "step": 335700 }, { "epoch": 13.91, "grad_norm": 0.8125, "learning_rate": 0.0002786603454564188, "loss": 0.1802, "step": 335710 }, { "epoch": 13.91, "grad_norm": 1.0625, "learning_rate": 0.00027864957173763023, "loss": 0.2024, "step": 335720 }, { "epoch": 13.91, "grad_norm": 1.046875, "learning_rate": 0.0002786387979649263, "loss": 0.1607, "step": 335730 }, { "epoch": 13.91, "grad_norm": 1.7265625, "learning_rate": 0.0002786280241383271, "loss": 0.2019, "step": 335740 }, { "epoch": 13.91, "grad_norm": 1.40625, "learning_rate": 0.00027861725025785317, "loss": 0.2024, "step": 335750 }, { "epoch": 13.91, "grad_norm": 0.8046875, "learning_rate": 0.0002786064763235247, "loss": 0.1725, "step": 335760 }, { "epoch": 13.91, "grad_norm": 1.1796875, "learning_rate": 0.0002785957023353618, "loss": 0.196, "step": 335770 }, { "epoch": 13.91, "grad_norm": 0.74609375, "learning_rate": 0.000278584928293385, "loss": 0.183, "step": 335780 }, { "epoch": 13.91, "grad_norm": 1.046875, "learning_rate": 0.0002785741541976144, "loss": 0.1706, "step": 335790 }, { "epoch": 13.91, "grad_norm": 0.78515625, "learning_rate": 0.0002785633800480703, "loss": 0.1893, "step": 335800 }, { "epoch": 13.91, "grad_norm": 0.7890625, "learning_rate": 0.00027855260584477306, "loss": 0.2629, "step": 335810 }, { "epoch": 13.91, "grad_norm": 0.40625, "learning_rate": 0.0002785418315877428, "loss": 0.2056, "step": 335820 }, { "epoch": 13.91, "grad_norm": 1.3671875, "learning_rate": 0.00027853105727700003, "loss": 0.1968, "step": 335830 }, { "epoch": 13.91, "grad_norm": 1.265625, "learning_rate": 0.00027852028291256483, "loss": 0.1845, "step": 335840 }, { "epoch": 13.91, "grad_norm": 1.9140625, "learning_rate": 0.00027850950849445745, "loss": 0.1504, "step": 335850 }, { "epoch": 13.91, "grad_norm": 0.703125, "learning_rate": 0.0002784987340226984, "loss": 0.1898, "step": 335860 }, { "epoch": 13.91, "grad_norm": 0.78515625, "learning_rate": 0.00027848795949730773, "loss": 0.2588, "step": 335870 }, { "epoch": 13.91, "grad_norm": 0.4453125, "learning_rate": 0.0002784771849183058, "loss": 0.1692, "step": 335880 }, { "epoch": 13.91, "grad_norm": 0.478515625, "learning_rate": 0.0002784664102857129, "loss": 0.2029, "step": 335890 }, { "epoch": 13.91, "grad_norm": 0.5859375, "learning_rate": 0.0002784556355995492, "loss": 0.2044, "step": 335900 }, { "epoch": 13.91, "grad_norm": 0.765625, "learning_rate": 0.0002784448608598352, "loss": 0.2016, "step": 335910 }, { "epoch": 13.91, "grad_norm": 0.52734375, "learning_rate": 0.00027843408606659107, "loss": 0.1528, "step": 335920 }, { "epoch": 13.91, "grad_norm": 0.90234375, "learning_rate": 0.0002784233112198369, "loss": 0.1537, "step": 335930 }, { "epoch": 13.91, "grad_norm": 0.984375, "learning_rate": 0.00027841253631959325, "loss": 0.1634, "step": 335940 }, { "epoch": 13.92, "grad_norm": 1.015625, "learning_rate": 0.00027840176136588023, "loss": 0.2507, "step": 335950 }, { "epoch": 13.92, "grad_norm": 0.484375, "learning_rate": 0.0002783909863587181, "loss": 0.1694, "step": 335960 }, { "epoch": 13.92, "grad_norm": 0.609375, "learning_rate": 0.0002783802112981273, "loss": 0.1871, "step": 335970 }, { "epoch": 13.92, "grad_norm": 1.2734375, "learning_rate": 0.000278369436184128, "loss": 0.1664, "step": 335980 }, { "epoch": 13.92, "grad_norm": 0.84375, "learning_rate": 0.00027835866101674046, "loss": 0.1941, "step": 335990 }, { "epoch": 13.92, "grad_norm": 0.59765625, "learning_rate": 0.00027834788579598503, "loss": 0.209, "step": 336000 }, { "epoch": 13.92, "grad_norm": 0.671875, "learning_rate": 0.0002783371105218818, "loss": 0.2089, "step": 336010 }, { "epoch": 13.92, "grad_norm": 1.40625, "learning_rate": 0.0002783263351944513, "loss": 0.2127, "step": 336020 }, { "epoch": 13.92, "grad_norm": 0.41796875, "learning_rate": 0.0002783155598137137, "loss": 0.1802, "step": 336030 }, { "epoch": 13.92, "grad_norm": 0.70703125, "learning_rate": 0.00027830478437968924, "loss": 0.2116, "step": 336040 }, { "epoch": 13.92, "grad_norm": 0.859375, "learning_rate": 0.0002782940088923983, "loss": 0.2125, "step": 336050 }, { "epoch": 13.92, "grad_norm": 0.90234375, "learning_rate": 0.00027828323335186094, "loss": 0.1965, "step": 336060 }, { "epoch": 13.92, "grad_norm": 1.796875, "learning_rate": 0.00027827245775809774, "loss": 0.2051, "step": 336070 }, { "epoch": 13.92, "grad_norm": 0.87109375, "learning_rate": 0.00027826168211112884, "loss": 0.2278, "step": 336080 }, { "epoch": 13.92, "grad_norm": 0.78125, "learning_rate": 0.00027825090641097434, "loss": 0.181, "step": 336090 }, { "epoch": 13.92, "grad_norm": 1.0703125, "learning_rate": 0.00027824013065765485, "loss": 0.1891, "step": 336100 }, { "epoch": 13.92, "grad_norm": 0.84375, "learning_rate": 0.0002782293548511904, "loss": 0.1859, "step": 336110 }, { "epoch": 13.92, "grad_norm": 1.1484375, "learning_rate": 0.00027821857899160135, "loss": 0.2149, "step": 336120 }, { "epoch": 13.92, "grad_norm": 0.84375, "learning_rate": 0.00027820780307890805, "loss": 0.2021, "step": 336130 }, { "epoch": 13.92, "grad_norm": 0.7265625, "learning_rate": 0.0002781970271131305, "loss": 0.2053, "step": 336140 }, { "epoch": 13.92, "grad_norm": 0.6953125, "learning_rate": 0.00027818625109428944, "loss": 0.21, "step": 336150 }, { "epoch": 13.92, "grad_norm": 0.455078125, "learning_rate": 0.0002781754750224048, "loss": 0.1808, "step": 336160 }, { "epoch": 13.92, "grad_norm": 0.1796875, "learning_rate": 0.00027816469889749697, "loss": 0.1883, "step": 336170 }, { "epoch": 13.92, "grad_norm": 0.345703125, "learning_rate": 0.0002781539227195863, "loss": 0.1894, "step": 336180 }, { "epoch": 13.92, "grad_norm": 1.109375, "learning_rate": 0.0002781431464886929, "loss": 0.2191, "step": 336190 }, { "epoch": 13.93, "grad_norm": 1.359375, "learning_rate": 0.0002781323702048371, "loss": 0.2267, "step": 336200 }, { "epoch": 13.93, "grad_norm": 0.81640625, "learning_rate": 0.0002781215938680393, "loss": 0.2392, "step": 336210 }, { "epoch": 13.93, "grad_norm": 1.0234375, "learning_rate": 0.00027811081747831955, "loss": 0.2099, "step": 336220 }, { "epoch": 13.93, "grad_norm": 0.45703125, "learning_rate": 0.0002781000410356985, "loss": 0.1822, "step": 336230 }, { "epoch": 13.93, "grad_norm": 1.8984375, "learning_rate": 0.00027808926454019604, "loss": 0.1976, "step": 336240 }, { "epoch": 13.93, "grad_norm": 0.78125, "learning_rate": 0.00027807848799183267, "loss": 0.2233, "step": 336250 }, { "epoch": 13.93, "grad_norm": 1.046875, "learning_rate": 0.00027806771139062865, "loss": 0.1781, "step": 336260 }, { "epoch": 13.93, "grad_norm": 1.734375, "learning_rate": 0.0002780569347366042, "loss": 0.1761, "step": 336270 }, { "epoch": 13.93, "grad_norm": 0.54296875, "learning_rate": 0.00027804615802977964, "loss": 0.2115, "step": 336280 }, { "epoch": 13.93, "grad_norm": 0.8671875, "learning_rate": 0.0002780353812701752, "loss": 0.2051, "step": 336290 }, { "epoch": 13.93, "grad_norm": 0.3515625, "learning_rate": 0.0002780246044578112, "loss": 0.1654, "step": 336300 }, { "epoch": 13.93, "grad_norm": 0.6171875, "learning_rate": 0.000278013827592708, "loss": 0.2394, "step": 336310 }, { "epoch": 13.93, "grad_norm": 1.2265625, "learning_rate": 0.0002780030506748857, "loss": 0.1806, "step": 336320 }, { "epoch": 13.93, "grad_norm": 0.890625, "learning_rate": 0.0002779922737043647, "loss": 0.19, "step": 336330 }, { "epoch": 13.93, "grad_norm": 1.3359375, "learning_rate": 0.00027798149668116533, "loss": 0.1982, "step": 336340 }, { "epoch": 13.93, "grad_norm": 1.671875, "learning_rate": 0.0002779707196053078, "loss": 0.1905, "step": 336350 }, { "epoch": 13.93, "grad_norm": 0.765625, "learning_rate": 0.00027795994247681235, "loss": 0.2071, "step": 336360 }, { "epoch": 13.93, "grad_norm": 0.6953125, "learning_rate": 0.0002779491652956993, "loss": 0.1641, "step": 336370 }, { "epoch": 13.93, "grad_norm": 3.25, "learning_rate": 0.00027793838806198894, "loss": 0.2168, "step": 336380 }, { "epoch": 13.93, "grad_norm": 0.8515625, "learning_rate": 0.00027792761077570163, "loss": 0.1635, "step": 336390 }, { "epoch": 13.93, "grad_norm": 0.3515625, "learning_rate": 0.00027791683343685746, "loss": 0.1869, "step": 336400 }, { "epoch": 13.93, "grad_norm": 1.3203125, "learning_rate": 0.00027790605604547687, "loss": 0.2101, "step": 336410 }, { "epoch": 13.93, "grad_norm": 0.427734375, "learning_rate": 0.00027789527860158013, "loss": 0.1926, "step": 336420 }, { "epoch": 13.93, "grad_norm": 0.84375, "learning_rate": 0.0002778845011051875, "loss": 0.2079, "step": 336430 }, { "epoch": 13.94, "grad_norm": 0.67578125, "learning_rate": 0.0002778737235563192, "loss": 0.1997, "step": 336440 }, { "epoch": 13.94, "grad_norm": 0.44921875, "learning_rate": 0.00027786294595499557, "loss": 0.1653, "step": 336450 }, { "epoch": 13.94, "grad_norm": 1.3125, "learning_rate": 0.0002778521683012368, "loss": 0.1826, "step": 336460 }, { "epoch": 13.94, "grad_norm": 0.73046875, "learning_rate": 0.0002778413905950634, "loss": 0.1965, "step": 336470 }, { "epoch": 13.94, "grad_norm": 0.85546875, "learning_rate": 0.0002778306128364955, "loss": 0.2137, "step": 336480 }, { "epoch": 13.94, "grad_norm": 1.421875, "learning_rate": 0.0002778198350255533, "loss": 0.1966, "step": 336490 }, { "epoch": 13.94, "grad_norm": 0.734375, "learning_rate": 0.00027780905716225725, "loss": 0.2126, "step": 336500 }, { "epoch": 13.94, "grad_norm": 0.41796875, "learning_rate": 0.00027779827924662756, "loss": 0.1542, "step": 336510 }, { "epoch": 13.94, "grad_norm": 0.25390625, "learning_rate": 0.00027778750127868446, "loss": 0.1396, "step": 336520 }, { "epoch": 13.94, "grad_norm": 0.625, "learning_rate": 0.0002777767232584483, "loss": 0.2213, "step": 336530 }, { "epoch": 13.94, "grad_norm": 0.86328125, "learning_rate": 0.00027776594518593936, "loss": 0.1936, "step": 336540 }, { "epoch": 13.94, "grad_norm": 0.486328125, "learning_rate": 0.0002777551670611779, "loss": 0.166, "step": 336550 }, { "epoch": 13.94, "grad_norm": 1.2265625, "learning_rate": 0.0002777443888841842, "loss": 0.2085, "step": 336560 }, { "epoch": 13.94, "grad_norm": 0.9375, "learning_rate": 0.0002777336106549785, "loss": 0.2038, "step": 336570 }, { "epoch": 13.94, "grad_norm": 1.0546875, "learning_rate": 0.00027772283237358124, "loss": 0.1982, "step": 336580 }, { "epoch": 13.94, "grad_norm": 0.921875, "learning_rate": 0.00027771205404001264, "loss": 0.2169, "step": 336590 }, { "epoch": 13.94, "grad_norm": 1.09375, "learning_rate": 0.0002777012756542929, "loss": 0.2075, "step": 336600 }, { "epoch": 13.94, "grad_norm": 0.78125, "learning_rate": 0.0002776904972164423, "loss": 0.1753, "step": 336610 }, { "epoch": 13.94, "grad_norm": 1.640625, "learning_rate": 0.0002776797187264812, "loss": 0.1823, "step": 336620 }, { "epoch": 13.94, "grad_norm": 0.82421875, "learning_rate": 0.0002776689401844299, "loss": 0.2324, "step": 336630 }, { "epoch": 13.94, "grad_norm": 0.5546875, "learning_rate": 0.00027765816159030853, "loss": 0.193, "step": 336640 }, { "epoch": 13.94, "grad_norm": 0.87109375, "learning_rate": 0.0002776473829441376, "loss": 0.2156, "step": 336650 }, { "epoch": 13.94, "grad_norm": 1.1640625, "learning_rate": 0.00027763660424593724, "loss": 0.1877, "step": 336660 }, { "epoch": 13.94, "grad_norm": 0.92578125, "learning_rate": 0.0002776258254957278, "loss": 0.1856, "step": 336670 }, { "epoch": 13.95, "grad_norm": 0.7890625, "learning_rate": 0.0002776150466935295, "loss": 0.1511, "step": 336680 }, { "epoch": 13.95, "grad_norm": 0.41796875, "learning_rate": 0.00027760426783936265, "loss": 0.2024, "step": 336690 }, { "epoch": 13.95, "grad_norm": 0.5625, "learning_rate": 0.0002775934889332476, "loss": 0.2284, "step": 336700 }, { "epoch": 13.95, "grad_norm": 1.1015625, "learning_rate": 0.0002775827099752045, "loss": 0.2096, "step": 336710 }, { "epoch": 13.95, "grad_norm": 0.4375, "learning_rate": 0.00027757193096525384, "loss": 0.1889, "step": 336720 }, { "epoch": 13.95, "grad_norm": 0.84765625, "learning_rate": 0.00027756115190341574, "loss": 0.2125, "step": 336730 }, { "epoch": 13.95, "grad_norm": 0.53125, "learning_rate": 0.0002775503727897105, "loss": 0.1884, "step": 336740 }, { "epoch": 13.95, "grad_norm": 1.5078125, "learning_rate": 0.0002775395936241585, "loss": 0.1681, "step": 336750 }, { "epoch": 13.95, "grad_norm": 1.890625, "learning_rate": 0.0002775288144067799, "loss": 0.218, "step": 336760 }, { "epoch": 13.95, "grad_norm": 1.078125, "learning_rate": 0.00027751803513759507, "loss": 0.1741, "step": 336770 }, { "epoch": 13.95, "grad_norm": 0.98828125, "learning_rate": 0.00027750725581662415, "loss": 0.1577, "step": 336780 }, { "epoch": 13.95, "grad_norm": 0.205078125, "learning_rate": 0.0002774964764438877, "loss": 0.1672, "step": 336790 }, { "epoch": 13.95, "grad_norm": 1.3671875, "learning_rate": 0.00027748569701940585, "loss": 0.1409, "step": 336800 }, { "epoch": 13.95, "grad_norm": 0.52734375, "learning_rate": 0.0002774749175431988, "loss": 0.1988, "step": 336810 }, { "epoch": 13.95, "grad_norm": 1.6328125, "learning_rate": 0.0002774641380152869, "loss": 0.2121, "step": 336820 }, { "epoch": 13.95, "grad_norm": 0.80078125, "learning_rate": 0.0002774533584356905, "loss": 0.1829, "step": 336830 }, { "epoch": 13.95, "grad_norm": 0.80859375, "learning_rate": 0.0002774425788044299, "loss": 0.235, "step": 336840 }, { "epoch": 13.95, "grad_norm": 0.640625, "learning_rate": 0.0002774317991215253, "loss": 0.1972, "step": 336850 }, { "epoch": 13.95, "grad_norm": 0.671875, "learning_rate": 0.00027742101938699697, "loss": 0.2285, "step": 336860 }, { "epoch": 13.95, "grad_norm": 0.4375, "learning_rate": 0.0002774102396008653, "loss": 0.1932, "step": 336870 }, { "epoch": 13.95, "grad_norm": 0.78125, "learning_rate": 0.00027739945976315057, "loss": 0.1927, "step": 336880 }, { "epoch": 13.95, "grad_norm": 0.8828125, "learning_rate": 0.0002773886798738729, "loss": 0.2432, "step": 336890 }, { "epoch": 13.95, "grad_norm": 1.0078125, "learning_rate": 0.00027737789993305276, "loss": 0.1834, "step": 336900 }, { "epoch": 13.95, "grad_norm": 0.423828125, "learning_rate": 0.0002773671199407103, "loss": 0.1678, "step": 336910 }, { "epoch": 13.96, "grad_norm": 0.2294921875, "learning_rate": 0.0002773563398968659, "loss": 0.2207, "step": 336920 }, { "epoch": 13.96, "grad_norm": 0.8515625, "learning_rate": 0.00027734555980153984, "loss": 0.2198, "step": 336930 }, { "epoch": 13.96, "grad_norm": 0.77734375, "learning_rate": 0.0002773347796547524, "loss": 0.1961, "step": 336940 }, { "epoch": 13.96, "grad_norm": 1.6875, "learning_rate": 0.0002773239994565239, "loss": 0.1961, "step": 336950 }, { "epoch": 13.96, "grad_norm": 0.68359375, "learning_rate": 0.0002773132192068745, "loss": 0.1859, "step": 336960 }, { "epoch": 13.96, "grad_norm": 0.419921875, "learning_rate": 0.0002773024389058246, "loss": 0.1797, "step": 336970 }, { "epoch": 13.96, "grad_norm": 0.65234375, "learning_rate": 0.0002772916585533944, "loss": 0.1793, "step": 336980 }, { "epoch": 13.96, "grad_norm": 1.1328125, "learning_rate": 0.00027728087814960433, "loss": 0.2199, "step": 336990 }, { "epoch": 13.96, "grad_norm": 1.09375, "learning_rate": 0.0002772700976944746, "loss": 0.2057, "step": 337000 }, { "epoch": 13.96, "grad_norm": 1.8203125, "learning_rate": 0.0002772593171880255, "loss": 0.1605, "step": 337010 }, { "epoch": 13.96, "grad_norm": 0.9296875, "learning_rate": 0.0002772485366302773, "loss": 0.2282, "step": 337020 }, { "epoch": 13.96, "grad_norm": 0.000537872314453125, "learning_rate": 0.0002772377560212502, "loss": 0.2092, "step": 337030 }, { "epoch": 13.96, "grad_norm": 1.4921875, "learning_rate": 0.0002772269753609647, "loss": 0.1846, "step": 337040 }, { "epoch": 13.96, "grad_norm": 0.8515625, "learning_rate": 0.0002772161946494409, "loss": 0.157, "step": 337050 }, { "epoch": 13.96, "grad_norm": 0.5, "learning_rate": 0.00027720541388669915, "loss": 0.1444, "step": 337060 }, { "epoch": 13.96, "grad_norm": 1.09375, "learning_rate": 0.0002771946330727598, "loss": 0.2027, "step": 337070 }, { "epoch": 13.96, "grad_norm": 0.7421875, "learning_rate": 0.00027718385220764307, "loss": 0.2208, "step": 337080 }, { "epoch": 13.96, "grad_norm": 0.49609375, "learning_rate": 0.0002771730712913693, "loss": 0.2015, "step": 337090 }, { "epoch": 13.96, "grad_norm": 0.84765625, "learning_rate": 0.0002771622903239587, "loss": 0.2059, "step": 337100 }, { "epoch": 13.96, "grad_norm": 0.609375, "learning_rate": 0.0002771515093054316, "loss": 0.2361, "step": 337110 }, { "epoch": 13.96, "grad_norm": 1.0, "learning_rate": 0.00027714072823580835, "loss": 0.1924, "step": 337120 }, { "epoch": 13.96, "grad_norm": 0.466796875, "learning_rate": 0.0002771299471151091, "loss": 0.2434, "step": 337130 }, { "epoch": 13.96, "grad_norm": 0.9375, "learning_rate": 0.0002771191659433543, "loss": 0.1566, "step": 337140 }, { "epoch": 13.96, "grad_norm": 0.6015625, "learning_rate": 0.00027710838472056403, "loss": 0.1704, "step": 337150 }, { "epoch": 13.97, "grad_norm": 0.609375, "learning_rate": 0.00027709760344675884, "loss": 0.1567, "step": 337160 }, { "epoch": 13.97, "grad_norm": 0.51953125, "learning_rate": 0.00027708682212195884, "loss": 0.1934, "step": 337170 }, { "epoch": 13.97, "grad_norm": 2.125, "learning_rate": 0.0002770760407461844, "loss": 0.1769, "step": 337180 }, { "epoch": 13.97, "grad_norm": 0.90625, "learning_rate": 0.0002770652593194557, "loss": 0.207, "step": 337190 }, { "epoch": 13.97, "grad_norm": 0.921875, "learning_rate": 0.00027705447784179315, "loss": 0.1898, "step": 337200 }, { "epoch": 13.97, "grad_norm": 0.75, "learning_rate": 0.000277043696313217, "loss": 0.2088, "step": 337210 }, { "epoch": 13.97, "grad_norm": 0.55859375, "learning_rate": 0.00027703291473374754, "loss": 0.2057, "step": 337220 }, { "epoch": 13.97, "grad_norm": 0.7421875, "learning_rate": 0.00027702213310340496, "loss": 0.1761, "step": 337230 }, { "epoch": 13.97, "grad_norm": 1.296875, "learning_rate": 0.0002770113514222097, "loss": 0.1747, "step": 337240 }, { "epoch": 13.97, "grad_norm": 1.0390625, "learning_rate": 0.00027700056969018214, "loss": 0.176, "step": 337250 }, { "epoch": 13.97, "grad_norm": 0.8046875, "learning_rate": 0.00027698978790734224, "loss": 0.1754, "step": 337260 }, { "epoch": 13.97, "grad_norm": 1.1015625, "learning_rate": 0.0002769790060737105, "loss": 0.2251, "step": 337270 }, { "epoch": 13.97, "grad_norm": 0.251953125, "learning_rate": 0.0002769682241893073, "loss": 0.1784, "step": 337280 }, { "epoch": 13.97, "grad_norm": 0.62890625, "learning_rate": 0.00027695744225415266, "loss": 0.2286, "step": 337290 }, { "epoch": 13.97, "grad_norm": 1.8515625, "learning_rate": 0.0002769466602682671, "loss": 0.1862, "step": 337300 }, { "epoch": 13.97, "grad_norm": 1.3515625, "learning_rate": 0.00027693587823167084, "loss": 0.1476, "step": 337310 }, { "epoch": 13.97, "grad_norm": 2.234375, "learning_rate": 0.0002769250961443841, "loss": 0.1453, "step": 337320 }, { "epoch": 13.97, "grad_norm": 0.9453125, "learning_rate": 0.00027691431400642737, "loss": 0.1973, "step": 337330 }, { "epoch": 13.97, "grad_norm": 1.109375, "learning_rate": 0.00027690353181782063, "loss": 0.2253, "step": 337340 }, { "epoch": 13.97, "grad_norm": 0.765625, "learning_rate": 0.0002768927495785845, "loss": 0.1638, "step": 337350 }, { "epoch": 13.97, "grad_norm": 1.5234375, "learning_rate": 0.0002768819672887391, "loss": 0.1762, "step": 337360 }, { "epoch": 13.97, "grad_norm": 1.0625, "learning_rate": 0.00027687118494830465, "loss": 0.1506, "step": 337370 }, { "epoch": 13.97, "grad_norm": 0.7109375, "learning_rate": 0.00027686040255730165, "loss": 0.1807, "step": 337380 }, { "epoch": 13.97, "grad_norm": 0.68359375, "learning_rate": 0.0002768496201157502, "loss": 0.2119, "step": 337390 }, { "epoch": 13.98, "grad_norm": 0.421875, "learning_rate": 0.0002768388376236707, "loss": 0.122, "step": 337400 }, { "epoch": 13.98, "grad_norm": 0.96484375, "learning_rate": 0.00027682805508108337, "loss": 0.1792, "step": 337410 }, { "epoch": 13.98, "grad_norm": 0.69921875, "learning_rate": 0.00027681727248800845, "loss": 0.2024, "step": 337420 }, { "epoch": 13.98, "grad_norm": 0.91796875, "learning_rate": 0.00027680648984446647, "loss": 0.1648, "step": 337430 }, { "epoch": 13.98, "grad_norm": 1.7421875, "learning_rate": 0.0002767957071504776, "loss": 0.1908, "step": 337440 }, { "epoch": 13.98, "grad_norm": 0.69140625, "learning_rate": 0.000276784924406062, "loss": 0.201, "step": 337450 }, { "epoch": 13.98, "grad_norm": 0.890625, "learning_rate": 0.00027677414161124007, "loss": 0.1553, "step": 337460 }, { "epoch": 13.98, "grad_norm": 1.0, "learning_rate": 0.0002767633587660321, "loss": 0.2418, "step": 337470 }, { "epoch": 13.98, "grad_norm": 1.0390625, "learning_rate": 0.00027675257587045837, "loss": 0.2054, "step": 337480 }, { "epoch": 13.98, "grad_norm": 0.515625, "learning_rate": 0.0002767417929245393, "loss": 0.1771, "step": 337490 }, { "epoch": 13.98, "grad_norm": 0.45703125, "learning_rate": 0.00027673100992829484, "loss": 0.1768, "step": 337500 }, { "epoch": 13.98, "grad_norm": 0.54296875, "learning_rate": 0.0002767202268817457, "loss": 0.1955, "step": 337510 }, { "epoch": 13.98, "grad_norm": 0.734375, "learning_rate": 0.00027670944378491196, "loss": 0.205, "step": 337520 }, { "epoch": 13.98, "grad_norm": 1.1484375, "learning_rate": 0.00027669866063781385, "loss": 0.2319, "step": 337530 }, { "epoch": 13.98, "grad_norm": 0.80078125, "learning_rate": 0.0002766878774404718, "loss": 0.1556, "step": 337540 }, { "epoch": 13.98, "grad_norm": 1.25, "learning_rate": 0.000276677094192906, "loss": 0.2033, "step": 337550 }, { "epoch": 13.98, "grad_norm": 0.93359375, "learning_rate": 0.00027666631089513685, "loss": 0.2144, "step": 337560 }, { "epoch": 13.98, "grad_norm": 0.6640625, "learning_rate": 0.00027665552754718454, "loss": 0.1524, "step": 337570 }, { "epoch": 13.98, "grad_norm": 1.0078125, "learning_rate": 0.0002766447441490694, "loss": 0.1966, "step": 337580 }, { "epoch": 13.98, "grad_norm": 0.85546875, "learning_rate": 0.00027663396070081174, "loss": 0.2177, "step": 337590 }, { "epoch": 13.98, "grad_norm": 1.8515625, "learning_rate": 0.0002766231772024319, "loss": 0.1648, "step": 337600 }, { "epoch": 13.98, "grad_norm": 0.921875, "learning_rate": 0.00027661239365394996, "loss": 0.1634, "step": 337610 }, { "epoch": 13.98, "grad_norm": 0.466796875, "learning_rate": 0.00027660161005538654, "loss": 0.2039, "step": 337620 }, { "epoch": 13.98, "grad_norm": 0.63671875, "learning_rate": 0.00027659082640676166, "loss": 0.1734, "step": 337630 }, { "epoch": 13.99, "grad_norm": 0.66015625, "learning_rate": 0.0002765800427080957, "loss": 0.1634, "step": 337640 }, { "epoch": 13.99, "grad_norm": 0.427734375, "learning_rate": 0.0002765692589594091, "loss": 0.1882, "step": 337650 }, { "epoch": 13.99, "grad_norm": 1.3359375, "learning_rate": 0.00027655847516072185, "loss": 0.2371, "step": 337660 }, { "epoch": 13.99, "grad_norm": 0.2890625, "learning_rate": 0.0002765476913120546, "loss": 0.1947, "step": 337670 }, { "epoch": 13.99, "grad_norm": 1.03125, "learning_rate": 0.00027653690741342735, "loss": 0.2107, "step": 337680 }, { "epoch": 13.99, "grad_norm": 0.5703125, "learning_rate": 0.0002765261234648605, "loss": 0.2295, "step": 337690 }, { "epoch": 13.99, "grad_norm": 0.390625, "learning_rate": 0.0002765153394663744, "loss": 0.1924, "step": 337700 }, { "epoch": 13.99, "grad_norm": 0.734375, "learning_rate": 0.0002765045554179893, "loss": 0.1955, "step": 337710 }, { "epoch": 13.99, "grad_norm": 1.921875, "learning_rate": 0.0002764937713197254, "loss": 0.1677, "step": 337720 }, { "epoch": 13.99, "grad_norm": 1.1484375, "learning_rate": 0.0002764829871716032, "loss": 0.2191, "step": 337730 }, { "epoch": 13.99, "grad_norm": 0.45703125, "learning_rate": 0.00027647220297364277, "loss": 0.1235, "step": 337740 }, { "epoch": 13.99, "grad_norm": 0.400390625, "learning_rate": 0.0002764614187258646, "loss": 0.1831, "step": 337750 }, { "epoch": 13.99, "grad_norm": 0.703125, "learning_rate": 0.00027645063442828885, "loss": 0.2099, "step": 337760 }, { "epoch": 13.99, "grad_norm": 1.0, "learning_rate": 0.0002764398500809359, "loss": 0.2274, "step": 337770 }, { "epoch": 13.99, "grad_norm": 0.68359375, "learning_rate": 0.00027642906568382596, "loss": 0.1228, "step": 337780 }, { "epoch": 13.99, "grad_norm": 3.375, "learning_rate": 0.0002764182812369794, "loss": 0.2173, "step": 337790 }, { "epoch": 13.99, "grad_norm": 0.703125, "learning_rate": 0.0002764074967404165, "loss": 0.2297, "step": 337800 }, { "epoch": 13.99, "grad_norm": 0.60546875, "learning_rate": 0.00027639671219415753, "loss": 0.1908, "step": 337810 }, { "epoch": 13.99, "grad_norm": 0.59765625, "learning_rate": 0.0002763859275982227, "loss": 0.1713, "step": 337820 }, { "epoch": 13.99, "grad_norm": 0.43359375, "learning_rate": 0.0002763751429526326, "loss": 0.1864, "step": 337830 }, { "epoch": 13.99, "grad_norm": 0.443359375, "learning_rate": 0.00027636435825740716, "loss": 0.1953, "step": 337840 }, { "epoch": 13.99, "grad_norm": 0.255859375, "learning_rate": 0.00027635357351256693, "loss": 0.2159, "step": 337850 }, { "epoch": 13.99, "grad_norm": 0.8828125, "learning_rate": 0.00027634278871813215, "loss": 0.2163, "step": 337860 }, { "epoch": 13.99, "grad_norm": 0.64453125, "learning_rate": 0.00027633200387412304, "loss": 0.1726, "step": 337870 }, { "epoch": 13.99, "grad_norm": 1.0390625, "learning_rate": 0.0002763212189805599, "loss": 0.2106, "step": 337880 }, { "epoch": 14.0, "grad_norm": 1.0703125, "learning_rate": 0.0002763104340374631, "loss": 0.2384, "step": 337890 }, { "epoch": 14.0, "grad_norm": 1.09375, "learning_rate": 0.00027629964904485294, "loss": 0.148, "step": 337900 }, { "epoch": 14.0, "grad_norm": 0.81640625, "learning_rate": 0.0002762888640027497, "loss": 0.1822, "step": 337910 }, { "epoch": 14.0, "grad_norm": 0.703125, "learning_rate": 0.0002762780789111736, "loss": 0.174, "step": 337920 }, { "epoch": 14.0, "grad_norm": 1.3125, "learning_rate": 0.000276267293770145, "loss": 0.1571, "step": 337930 }, { "epoch": 14.0, "grad_norm": 0.6640625, "learning_rate": 0.00027625650857968423, "loss": 0.2093, "step": 337940 }, { "epoch": 14.0, "grad_norm": 1.8671875, "learning_rate": 0.00027624572333981144, "loss": 0.2018, "step": 337950 }, { "epoch": 14.0, "grad_norm": 0.5078125, "learning_rate": 0.0002762349380505471, "loss": 0.2032, "step": 337960 }, { "epoch": 14.0, "grad_norm": 1.5078125, "learning_rate": 0.0002762241527119114, "loss": 0.1962, "step": 337970 }, { "epoch": 14.0, "grad_norm": 0.83984375, "learning_rate": 0.0002762133673239248, "loss": 0.2022, "step": 337980 }, { "epoch": 14.0, "grad_norm": 0.7265625, "learning_rate": 0.0002762025818866074, "loss": 0.1558, "step": 337990 }, { "epoch": 14.0, "grad_norm": 0.361328125, "learning_rate": 0.00027619179639997956, "loss": 0.2158, "step": 338000 }, { "epoch": 14.0, "grad_norm": 1.203125, "learning_rate": 0.00027618101086406157, "loss": 0.2472, "step": 338010 }, { "epoch": 14.0, "grad_norm": 0.99609375, "learning_rate": 0.00027617022527887374, "loss": 0.1734, "step": 338020 }, { "epoch": 14.0, "grad_norm": 0.734375, "learning_rate": 0.00027615943964443647, "loss": 0.2107, "step": 338030 }, { "epoch": 14.0, "grad_norm": 1.5703125, "learning_rate": 0.0002761486539607698, "loss": 0.2201, "step": 338040 }, { "epoch": 14.0, "grad_norm": 0.828125, "learning_rate": 0.00027613786822789434, "loss": 0.199, "step": 338050 }, { "epoch": 14.0, "grad_norm": 0.71484375, "learning_rate": 0.00027612708244583015, "loss": 0.18, "step": 338060 }, { "epoch": 14.0, "grad_norm": 0.48046875, "learning_rate": 0.00027611629661459765, "loss": 0.1764, "step": 338070 }, { "epoch": 14.0, "grad_norm": 0.78125, "learning_rate": 0.0002761055107342171, "loss": 0.1742, "step": 338080 }, { "epoch": 14.0, "grad_norm": 1.359375, "learning_rate": 0.00027609472480470877, "loss": 0.1887, "step": 338090 }, { "epoch": 14.0, "grad_norm": 1.796875, "learning_rate": 0.000276083938826093, "loss": 0.1823, "step": 338100 }, { "epoch": 14.0, "grad_norm": 0.55078125, "learning_rate": 0.0002760731527983901, "loss": 0.138, "step": 338110 }, { "epoch": 14.0, "grad_norm": 0.66015625, "learning_rate": 0.0002760623667216203, "loss": 0.245, "step": 338120 }, { "epoch": 14.01, "grad_norm": 0.8671875, "learning_rate": 0.0002760515805958039, "loss": 0.2204, "step": 338130 }, { "epoch": 14.01, "grad_norm": 1.2421875, "learning_rate": 0.0002760407944209613, "loss": 0.2241, "step": 338140 }, { "epoch": 14.01, "grad_norm": 0.984375, "learning_rate": 0.0002760300081971128, "loss": 0.1764, "step": 338150 }, { "epoch": 14.01, "grad_norm": 1.2578125, "learning_rate": 0.00027601922192427845, "loss": 0.2027, "step": 338160 }, { "epoch": 14.01, "grad_norm": 0.63671875, "learning_rate": 0.0002760084356024789, "loss": 0.2205, "step": 338170 }, { "epoch": 14.01, "grad_norm": 0.8671875, "learning_rate": 0.00027599764923173423, "loss": 0.1193, "step": 338180 }, { "epoch": 14.01, "grad_norm": 1.21875, "learning_rate": 0.00027598686281206483, "loss": 0.2127, "step": 338190 }, { "epoch": 14.01, "grad_norm": 1.03125, "learning_rate": 0.00027597607634349087, "loss": 0.204, "step": 338200 }, { "epoch": 14.01, "grad_norm": 0.3046875, "learning_rate": 0.0002759652898260328, "loss": 0.1697, "step": 338210 }, { "epoch": 14.01, "grad_norm": 1.421875, "learning_rate": 0.00027595450325971084, "loss": 0.209, "step": 338220 }, { "epoch": 14.01, "grad_norm": 1.3515625, "learning_rate": 0.0002759437166445453, "loss": 0.188, "step": 338230 }, { "epoch": 14.01, "grad_norm": 1.0390625, "learning_rate": 0.00027593292998055654, "loss": 0.1951, "step": 338240 }, { "epoch": 14.01, "grad_norm": 1.078125, "learning_rate": 0.0002759221432677648, "loss": 0.155, "step": 338250 }, { "epoch": 14.01, "grad_norm": 0.84375, "learning_rate": 0.00027591135650619034, "loss": 0.1623, "step": 338260 }, { "epoch": 14.01, "grad_norm": 1.796875, "learning_rate": 0.0002759005696958535, "loss": 0.1983, "step": 338270 }, { "epoch": 14.01, "grad_norm": 0.625, "learning_rate": 0.00027588978283677465, "loss": 0.1695, "step": 338280 }, { "epoch": 14.01, "grad_norm": 0.71875, "learning_rate": 0.00027587899592897396, "loss": 0.2101, "step": 338290 }, { "epoch": 14.01, "grad_norm": 0.671875, "learning_rate": 0.0002758682089724718, "loss": 0.2016, "step": 338300 }, { "epoch": 14.01, "grad_norm": 0.84375, "learning_rate": 0.0002758574219672885, "loss": 0.1905, "step": 338310 }, { "epoch": 14.01, "grad_norm": 0.546875, "learning_rate": 0.0002758466349134443, "loss": 0.1796, "step": 338320 }, { "epoch": 14.01, "grad_norm": 0.54296875, "learning_rate": 0.00027583584781095956, "loss": 0.2013, "step": 338330 }, { "epoch": 14.01, "grad_norm": 0.69140625, "learning_rate": 0.00027582506065985447, "loss": 0.1621, "step": 338340 }, { "epoch": 14.01, "grad_norm": 0.68359375, "learning_rate": 0.0002758142734601495, "loss": 0.2072, "step": 338350 }, { "epoch": 14.01, "grad_norm": 1.0234375, "learning_rate": 0.0002758034862118648, "loss": 0.1937, "step": 338360 }, { "epoch": 14.02, "grad_norm": 1.8984375, "learning_rate": 0.00027579269891502066, "loss": 0.2174, "step": 338370 }, { "epoch": 14.02, "grad_norm": 0.9765625, "learning_rate": 0.00027578191156963754, "loss": 0.1403, "step": 338380 }, { "epoch": 14.02, "grad_norm": 0.84765625, "learning_rate": 0.00027577112417573557, "loss": 0.2443, "step": 338390 }, { "epoch": 14.02, "grad_norm": 0.7890625, "learning_rate": 0.00027576033673333523, "loss": 0.2222, "step": 338400 }, { "epoch": 14.02, "grad_norm": 0.921875, "learning_rate": 0.00027574954924245663, "loss": 0.1507, "step": 338410 }, { "epoch": 14.02, "grad_norm": 0.65625, "learning_rate": 0.0002757387617031202, "loss": 0.1514, "step": 338420 }, { "epoch": 14.02, "grad_norm": 1.5234375, "learning_rate": 0.0002757279741153463, "loss": 0.1811, "step": 338430 }, { "epoch": 14.02, "grad_norm": 0.69140625, "learning_rate": 0.00027571718647915496, "loss": 0.1888, "step": 338440 }, { "epoch": 14.02, "grad_norm": 1.1875, "learning_rate": 0.0002757063987945667, "loss": 0.1929, "step": 338450 }, { "epoch": 14.02, "grad_norm": 0.2255859375, "learning_rate": 0.00027569561106160177, "loss": 0.198, "step": 338460 }, { "epoch": 14.02, "grad_norm": 1.1015625, "learning_rate": 0.00027568482328028054, "loss": 0.2233, "step": 338470 }, { "epoch": 14.02, "grad_norm": 0.37109375, "learning_rate": 0.00027567403545062323, "loss": 0.2077, "step": 338480 }, { "epoch": 14.02, "grad_norm": 1.4453125, "learning_rate": 0.0002756632475726501, "loss": 0.1717, "step": 338490 }, { "epoch": 14.02, "grad_norm": 0.87890625, "learning_rate": 0.00027565245964638154, "loss": 0.2371, "step": 338500 }, { "epoch": 14.02, "grad_norm": 0.345703125, "learning_rate": 0.0002756416716718379, "loss": 0.1757, "step": 338510 }, { "epoch": 14.02, "grad_norm": 0.51953125, "learning_rate": 0.0002756308836490392, "loss": 0.1779, "step": 338520 }, { "epoch": 14.02, "grad_norm": 0.6171875, "learning_rate": 0.00027562009557800614, "loss": 0.1654, "step": 338530 }, { "epoch": 14.02, "grad_norm": 0.64453125, "learning_rate": 0.0002756093074587587, "loss": 0.1851, "step": 338540 }, { "epoch": 14.02, "grad_norm": 0.52734375, "learning_rate": 0.0002755985192913174, "loss": 0.184, "step": 338550 }, { "epoch": 14.02, "grad_norm": 1.6328125, "learning_rate": 0.00027558773107570246, "loss": 0.1991, "step": 338560 }, { "epoch": 14.02, "grad_norm": 0.2734375, "learning_rate": 0.0002755769428119341, "loss": 0.1681, "step": 338570 }, { "epoch": 14.02, "grad_norm": 1.171875, "learning_rate": 0.00027556615450003276, "loss": 0.1786, "step": 338580 }, { "epoch": 14.02, "grad_norm": 1.015625, "learning_rate": 0.0002755553661400186, "loss": 0.1815, "step": 338590 }, { "epoch": 14.02, "grad_norm": 0.0, "learning_rate": 0.000275544577731912, "loss": 0.1966, "step": 338600 }, { "epoch": 14.03, "grad_norm": 0.6171875, "learning_rate": 0.0002755337892757334, "loss": 0.1489, "step": 338610 }, { "epoch": 14.03, "grad_norm": 0.337890625, "learning_rate": 0.00027552300077150283, "loss": 0.1626, "step": 338620 }, { "epoch": 14.03, "grad_norm": 0.86328125, "learning_rate": 0.00027551221221924077, "loss": 0.1802, "step": 338630 }, { "epoch": 14.03, "grad_norm": 1.1953125, "learning_rate": 0.00027550142361896753, "loss": 0.2369, "step": 338640 }, { "epoch": 14.03, "grad_norm": 0.734375, "learning_rate": 0.0002754906349707033, "loss": 0.2182, "step": 338650 }, { "epoch": 14.03, "grad_norm": 0.5234375, "learning_rate": 0.0002754798462744685, "loss": 0.1963, "step": 338660 }, { "epoch": 14.03, "grad_norm": 0.546875, "learning_rate": 0.0002754690575302833, "loss": 0.2334, "step": 338670 }, { "epoch": 14.03, "grad_norm": 1.0625, "learning_rate": 0.00027545826873816813, "loss": 0.2179, "step": 338680 }, { "epoch": 14.03, "grad_norm": 0.5703125, "learning_rate": 0.00027544747989814326, "loss": 0.1593, "step": 338690 }, { "epoch": 14.03, "grad_norm": 2.609375, "learning_rate": 0.000275436691010229, "loss": 0.1993, "step": 338700 }, { "epoch": 14.03, "grad_norm": 0.69140625, "learning_rate": 0.00027542590207444556, "loss": 0.1899, "step": 338710 }, { "epoch": 14.03, "grad_norm": 0.64453125, "learning_rate": 0.0002754151130908134, "loss": 0.2237, "step": 338720 }, { "epoch": 14.03, "grad_norm": 0.57421875, "learning_rate": 0.0002754043240593527, "loss": 0.1852, "step": 338730 }, { "epoch": 14.03, "grad_norm": 1.75, "learning_rate": 0.0002753935349800838, "loss": 0.1676, "step": 338740 }, { "epoch": 14.03, "grad_norm": 0.96484375, "learning_rate": 0.00027538274585302703, "loss": 0.1931, "step": 338750 }, { "epoch": 14.03, "grad_norm": 0.57421875, "learning_rate": 0.00027537195667820264, "loss": 0.182, "step": 338760 }, { "epoch": 14.03, "grad_norm": 0.396484375, "learning_rate": 0.0002753611674556311, "loss": 0.1374, "step": 338770 }, { "epoch": 14.03, "grad_norm": 0.56640625, "learning_rate": 0.0002753503781853324, "loss": 0.1565, "step": 338780 }, { "epoch": 14.03, "grad_norm": 0.87890625, "learning_rate": 0.00027533958886732707, "loss": 0.166, "step": 338790 }, { "epoch": 14.03, "grad_norm": 1.0546875, "learning_rate": 0.0002753287995016355, "loss": 0.2016, "step": 338800 }, { "epoch": 14.03, "grad_norm": 0.91796875, "learning_rate": 0.0002753180100882777, "loss": 0.2132, "step": 338810 }, { "epoch": 14.03, "grad_norm": 1.5703125, "learning_rate": 0.0002753072206272742, "loss": 0.1946, "step": 338820 }, { "epoch": 14.03, "grad_norm": 0.478515625, "learning_rate": 0.0002752964311186452, "loss": 0.1803, "step": 338830 }, { "epoch": 14.03, "grad_norm": 0.99609375, "learning_rate": 0.0002752856415624111, "loss": 0.1916, "step": 338840 }, { "epoch": 14.04, "grad_norm": 0.69140625, "learning_rate": 0.0002752748519585922, "loss": 0.1977, "step": 338850 }, { "epoch": 14.04, "grad_norm": 0.53515625, "learning_rate": 0.0002752640623072087, "loss": 0.1871, "step": 338860 }, { "epoch": 14.04, "grad_norm": 0.90234375, "learning_rate": 0.00027525327260828094, "loss": 0.1424, "step": 338870 }, { "epoch": 14.04, "grad_norm": 0.796875, "learning_rate": 0.00027524248286182933, "loss": 0.1544, "step": 338880 }, { "epoch": 14.04, "grad_norm": 1.1015625, "learning_rate": 0.000275231693067874, "loss": 0.1856, "step": 338890 }, { "epoch": 14.04, "grad_norm": 3.84375, "learning_rate": 0.0002752209032264354, "loss": 0.1698, "step": 338900 }, { "epoch": 14.04, "grad_norm": 1.265625, "learning_rate": 0.00027521011333753376, "loss": 0.1995, "step": 338910 }, { "epoch": 14.04, "grad_norm": 0.546875, "learning_rate": 0.0002751993234011894, "loss": 0.1595, "step": 338920 }, { "epoch": 14.04, "grad_norm": 0.8046875, "learning_rate": 0.00027518853341742275, "loss": 0.175, "step": 338930 }, { "epoch": 14.04, "grad_norm": 1.265625, "learning_rate": 0.00027517774338625385, "loss": 0.1677, "step": 338940 }, { "epoch": 14.04, "grad_norm": 0.74609375, "learning_rate": 0.0002751669533077032, "loss": 0.1853, "step": 338950 }, { "epoch": 14.04, "grad_norm": 1.609375, "learning_rate": 0.0002751561631817911, "loss": 0.1895, "step": 338960 }, { "epoch": 14.04, "grad_norm": 1.0, "learning_rate": 0.00027514537300853776, "loss": 0.2078, "step": 338970 }, { "epoch": 14.04, "grad_norm": 0.578125, "learning_rate": 0.0002751345827879636, "loss": 0.1972, "step": 338980 }, { "epoch": 14.04, "grad_norm": 1.0234375, "learning_rate": 0.00027512379252008884, "loss": 0.1751, "step": 338990 }, { "epoch": 14.04, "grad_norm": 0.5625, "learning_rate": 0.0002751130022049338, "loss": 0.1871, "step": 339000 }, { "epoch": 14.04, "grad_norm": 0.80859375, "learning_rate": 0.0002751022118425189, "loss": 0.2084, "step": 339010 }, { "epoch": 14.04, "grad_norm": 1.0859375, "learning_rate": 0.0002750914214328642, "loss": 0.1916, "step": 339020 }, { "epoch": 14.04, "grad_norm": 0.322265625, "learning_rate": 0.00027508063097599024, "loss": 0.1748, "step": 339030 }, { "epoch": 14.04, "grad_norm": 0.87109375, "learning_rate": 0.00027506984047191723, "loss": 0.176, "step": 339040 }, { "epoch": 14.04, "grad_norm": 0.921875, "learning_rate": 0.00027505904992066544, "loss": 0.2315, "step": 339050 }, { "epoch": 14.04, "grad_norm": 0.69140625, "learning_rate": 0.0002750482593222553, "loss": 0.233, "step": 339060 }, { "epoch": 14.04, "grad_norm": 2.328125, "learning_rate": 0.000275037468676707, "loss": 0.2092, "step": 339070 }, { "epoch": 14.04, "grad_norm": 0.8828125, "learning_rate": 0.0002750266779840409, "loss": 0.2204, "step": 339080 }, { "epoch": 14.05, "grad_norm": 1.03125, "learning_rate": 0.0002750158872442773, "loss": 0.1627, "step": 339090 }, { "epoch": 14.05, "grad_norm": 0.640625, "learning_rate": 0.0002750050964574365, "loss": 0.1899, "step": 339100 }, { "epoch": 14.05, "grad_norm": 1.015625, "learning_rate": 0.00027499430562353877, "loss": 0.1952, "step": 339110 }, { "epoch": 14.05, "grad_norm": 1.875, "learning_rate": 0.0002749835147426045, "loss": 0.2058, "step": 339120 }, { "epoch": 14.05, "grad_norm": 0.6484375, "learning_rate": 0.00027497272381465395, "loss": 0.2165, "step": 339130 }, { "epoch": 14.05, "grad_norm": 0.5859375, "learning_rate": 0.00027496193283970744, "loss": 0.1873, "step": 339140 }, { "epoch": 14.05, "grad_norm": 0.9765625, "learning_rate": 0.0002749511418177852, "loss": 0.1772, "step": 339150 }, { "epoch": 14.05, "grad_norm": 2.234375, "learning_rate": 0.00027494035074890767, "loss": 0.1901, "step": 339160 }, { "epoch": 14.05, "grad_norm": 1.3359375, "learning_rate": 0.0002749295596330951, "loss": 0.2357, "step": 339170 }, { "epoch": 14.05, "grad_norm": 0.486328125, "learning_rate": 0.00027491876847036777, "loss": 0.1777, "step": 339180 }, { "epoch": 14.05, "grad_norm": 0.63671875, "learning_rate": 0.00027490797726074603, "loss": 0.1829, "step": 339190 }, { "epoch": 14.05, "grad_norm": 0.63671875, "learning_rate": 0.0002748971860042501, "loss": 0.1927, "step": 339200 }, { "epoch": 14.05, "grad_norm": 0.71875, "learning_rate": 0.0002748863947009004, "loss": 0.1438, "step": 339210 }, { "epoch": 14.05, "grad_norm": 1.078125, "learning_rate": 0.0002748756033507172, "loss": 0.1857, "step": 339220 }, { "epoch": 14.05, "grad_norm": 1.828125, "learning_rate": 0.00027486481195372083, "loss": 0.2111, "step": 339230 }, { "epoch": 14.05, "grad_norm": 1.3515625, "learning_rate": 0.0002748540205099315, "loss": 0.2076, "step": 339240 }, { "epoch": 14.05, "grad_norm": 1.140625, "learning_rate": 0.00027484322901936964, "loss": 0.2364, "step": 339250 }, { "epoch": 14.05, "grad_norm": 1.1171875, "learning_rate": 0.00027483243748205544, "loss": 0.1747, "step": 339260 }, { "epoch": 14.05, "grad_norm": 0.84375, "learning_rate": 0.00027482164589800936, "loss": 0.2133, "step": 339270 }, { "epoch": 14.05, "grad_norm": 0.76171875, "learning_rate": 0.00027481085426725156, "loss": 0.1782, "step": 339280 }, { "epoch": 14.05, "grad_norm": 0.60546875, "learning_rate": 0.0002748000625898024, "loss": 0.1806, "step": 339290 }, { "epoch": 14.05, "grad_norm": 1.375, "learning_rate": 0.00027478927086568226, "loss": 0.1576, "step": 339300 }, { "epoch": 14.05, "grad_norm": 0.65234375, "learning_rate": 0.00027477847909491136, "loss": 0.1988, "step": 339310 }, { "epoch": 14.05, "grad_norm": 1.015625, "learning_rate": 0.00027476768727751006, "loss": 0.1431, "step": 339320 }, { "epoch": 14.06, "grad_norm": 0.380859375, "learning_rate": 0.0002747568954134986, "loss": 0.1535, "step": 339330 }, { "epoch": 14.06, "grad_norm": 0.953125, "learning_rate": 0.00027474610350289736, "loss": 0.2048, "step": 339340 }, { "epoch": 14.06, "grad_norm": 0.78125, "learning_rate": 0.0002747353115457267, "loss": 0.1932, "step": 339350 }, { "epoch": 14.06, "grad_norm": 1.2265625, "learning_rate": 0.0002747245195420067, "loss": 0.1782, "step": 339360 }, { "epoch": 14.06, "grad_norm": 1.046875, "learning_rate": 0.0002747137274917579, "loss": 0.2191, "step": 339370 }, { "epoch": 14.06, "grad_norm": 0.8671875, "learning_rate": 0.00027470293539500065, "loss": 0.2054, "step": 339380 }, { "epoch": 14.06, "grad_norm": 1.84375, "learning_rate": 0.000274692143251755, "loss": 0.1952, "step": 339390 }, { "epoch": 14.06, "grad_norm": 0.71484375, "learning_rate": 0.0002746813510620414, "loss": 0.184, "step": 339400 }, { "epoch": 14.06, "grad_norm": 0.76171875, "learning_rate": 0.00027467055882588023, "loss": 0.1612, "step": 339410 }, { "epoch": 14.06, "grad_norm": 1.1953125, "learning_rate": 0.0002746597665432917, "loss": 0.2584, "step": 339420 }, { "epoch": 14.06, "grad_norm": 1.0078125, "learning_rate": 0.0002746489742142962, "loss": 0.2021, "step": 339430 }, { "epoch": 14.06, "grad_norm": 1.171875, "learning_rate": 0.00027463818183891394, "loss": 0.2335, "step": 339440 }, { "epoch": 14.06, "grad_norm": 1.0078125, "learning_rate": 0.00027462738941716523, "loss": 0.2046, "step": 339450 }, { "epoch": 14.06, "grad_norm": 0.7578125, "learning_rate": 0.00027461659694907057, "loss": 0.2219, "step": 339460 }, { "epoch": 14.06, "grad_norm": 0.546875, "learning_rate": 0.00027460580443465, "loss": 0.1795, "step": 339470 }, { "epoch": 14.06, "grad_norm": 1.09375, "learning_rate": 0.000274595011873924, "loss": 0.2075, "step": 339480 }, { "epoch": 14.06, "grad_norm": 1.703125, "learning_rate": 0.0002745842192669129, "loss": 0.19, "step": 339490 }, { "epoch": 14.06, "grad_norm": 0.7578125, "learning_rate": 0.0002745734266136369, "loss": 0.2096, "step": 339500 }, { "epoch": 14.06, "grad_norm": 0.6328125, "learning_rate": 0.0002745626339141164, "loss": 0.2325, "step": 339510 }, { "epoch": 14.06, "grad_norm": 0.94140625, "learning_rate": 0.00027455184116837166, "loss": 0.1835, "step": 339520 }, { "epoch": 14.06, "grad_norm": 0.4375, "learning_rate": 0.000274541048376423, "loss": 0.2201, "step": 339530 }, { "epoch": 14.06, "grad_norm": 1.171875, "learning_rate": 0.0002745302555382907, "loss": 0.1545, "step": 339540 }, { "epoch": 14.06, "grad_norm": 0.8515625, "learning_rate": 0.00027451946265399517, "loss": 0.2112, "step": 339550 }, { "epoch": 14.06, "grad_norm": 0.97265625, "learning_rate": 0.00027450866972355664, "loss": 0.1743, "step": 339560 }, { "epoch": 14.06, "grad_norm": 1.578125, "learning_rate": 0.00027449787674699544, "loss": 0.1986, "step": 339570 }, { "epoch": 14.07, "grad_norm": 0.71875, "learning_rate": 0.0002744870837243319, "loss": 0.1912, "step": 339580 }, { "epoch": 14.07, "grad_norm": 0.7890625, "learning_rate": 0.0002744762906555863, "loss": 0.1998, "step": 339590 }, { "epoch": 14.07, "grad_norm": 1.6015625, "learning_rate": 0.00027446549754077897, "loss": 0.1798, "step": 339600 }, { "epoch": 14.07, "grad_norm": 0.6015625, "learning_rate": 0.0002744547043799302, "loss": 0.2096, "step": 339610 }, { "epoch": 14.07, "grad_norm": 0.80859375, "learning_rate": 0.00027444391117306026, "loss": 0.2189, "step": 339620 }, { "epoch": 14.07, "grad_norm": 0.98046875, "learning_rate": 0.00027443311792018965, "loss": 0.1925, "step": 339630 }, { "epoch": 14.07, "grad_norm": 0.53125, "learning_rate": 0.00027442232462133846, "loss": 0.1626, "step": 339640 }, { "epoch": 14.07, "grad_norm": 0.70703125, "learning_rate": 0.0002744115312765271, "loss": 0.2061, "step": 339650 }, { "epoch": 14.07, "grad_norm": 0.7109375, "learning_rate": 0.0002744007378857759, "loss": 0.1296, "step": 339660 }, { "epoch": 14.07, "grad_norm": 0.84375, "learning_rate": 0.0002743899444491051, "loss": 0.1912, "step": 339670 }, { "epoch": 14.07, "grad_norm": 0.484375, "learning_rate": 0.00027437915096653517, "loss": 0.2125, "step": 339680 }, { "epoch": 14.07, "grad_norm": 0.421875, "learning_rate": 0.00027436835743808617, "loss": 0.1693, "step": 339690 }, { "epoch": 14.07, "grad_norm": 0.357421875, "learning_rate": 0.0002743575638637786, "loss": 0.2455, "step": 339700 }, { "epoch": 14.07, "grad_norm": 0.921875, "learning_rate": 0.0002743467702436328, "loss": 0.2118, "step": 339710 }, { "epoch": 14.07, "grad_norm": 0.60546875, "learning_rate": 0.00027433597657766895, "loss": 0.1655, "step": 339720 }, { "epoch": 14.07, "grad_norm": 0.9296875, "learning_rate": 0.00027432518286590743, "loss": 0.2724, "step": 339730 }, { "epoch": 14.07, "grad_norm": 0.5390625, "learning_rate": 0.00027431438910836847, "loss": 0.183, "step": 339740 }, { "epoch": 14.07, "grad_norm": 1.984375, "learning_rate": 0.00027430359530507254, "loss": 0.2383, "step": 339750 }, { "epoch": 14.07, "grad_norm": 1.125, "learning_rate": 0.00027429280145603986, "loss": 0.1621, "step": 339760 }, { "epoch": 14.07, "grad_norm": 0.90234375, "learning_rate": 0.0002742820075612907, "loss": 0.177, "step": 339770 }, { "epoch": 14.07, "grad_norm": 0.94140625, "learning_rate": 0.00027427121362084544, "loss": 0.1875, "step": 339780 }, { "epoch": 14.07, "grad_norm": 0.9296875, "learning_rate": 0.0002742604196347245, "loss": 0.1938, "step": 339790 }, { "epoch": 14.07, "grad_norm": 2.78125, "learning_rate": 0.00027424962560294793, "loss": 0.2381, "step": 339800 }, { "epoch": 14.07, "grad_norm": 0.67578125, "learning_rate": 0.0002742388315255362, "loss": 0.2505, "step": 339810 }, { "epoch": 14.08, "grad_norm": 0.6640625, "learning_rate": 0.00027422803740250964, "loss": 0.1985, "step": 339820 }, { "epoch": 14.08, "grad_norm": 0.6875, "learning_rate": 0.0002742172432338885, "loss": 0.175, "step": 339830 }, { "epoch": 14.08, "grad_norm": 0.578125, "learning_rate": 0.00027420644901969316, "loss": 0.1814, "step": 339840 }, { "epoch": 14.08, "grad_norm": 1.0703125, "learning_rate": 0.0002741956547599438, "loss": 0.2088, "step": 339850 }, { "epoch": 14.08, "grad_norm": 1.5390625, "learning_rate": 0.00027418486045466094, "loss": 0.1748, "step": 339860 }, { "epoch": 14.08, "grad_norm": 0.71875, "learning_rate": 0.0002741740661038648, "loss": 0.1882, "step": 339870 }, { "epoch": 14.08, "grad_norm": 0.68359375, "learning_rate": 0.00027416327170757567, "loss": 0.1891, "step": 339880 }, { "epoch": 14.08, "grad_norm": 1.0390625, "learning_rate": 0.0002741524772658138, "loss": 0.2301, "step": 339890 }, { "epoch": 14.08, "grad_norm": 0.95703125, "learning_rate": 0.0002741416827785996, "loss": 0.23, "step": 339900 }, { "epoch": 14.08, "grad_norm": 0.79296875, "learning_rate": 0.00027413088824595333, "loss": 0.219, "step": 339910 }, { "epoch": 14.08, "grad_norm": 0.76171875, "learning_rate": 0.0002741200936678954, "loss": 0.1839, "step": 339920 }, { "epoch": 14.08, "grad_norm": 0.49609375, "learning_rate": 0.000274109299044446, "loss": 0.1778, "step": 339930 }, { "epoch": 14.08, "grad_norm": 1.2734375, "learning_rate": 0.00027409850437562554, "loss": 0.1652, "step": 339940 }, { "epoch": 14.08, "grad_norm": 1.453125, "learning_rate": 0.00027408770966145434, "loss": 0.1853, "step": 339950 }, { "epoch": 14.08, "grad_norm": 1.1640625, "learning_rate": 0.0002740769149019526, "loss": 0.1352, "step": 339960 }, { "epoch": 14.08, "grad_norm": 0.828125, "learning_rate": 0.0002740661200971408, "loss": 0.2107, "step": 339970 }, { "epoch": 14.08, "grad_norm": 0.4921875, "learning_rate": 0.00027405532524703904, "loss": 0.1852, "step": 339980 }, { "epoch": 14.08, "grad_norm": 0.271484375, "learning_rate": 0.0002740445303516678, "loss": 0.2135, "step": 339990 }, { "epoch": 14.08, "grad_norm": 0.6015625, "learning_rate": 0.00027403373541104735, "loss": 0.1892, "step": 340000 }, { "epoch": 14.08, "grad_norm": 0.80078125, "learning_rate": 0.000274022940425198, "loss": 0.2013, "step": 340010 }, { "epoch": 14.08, "grad_norm": 0.875, "learning_rate": 0.0002740121453941401, "loss": 0.1996, "step": 340020 }, { "epoch": 14.08, "grad_norm": 0.76953125, "learning_rate": 0.00027400135031789395, "loss": 0.1896, "step": 340030 }, { "epoch": 14.08, "grad_norm": 0.66796875, "learning_rate": 0.00027399055519647977, "loss": 0.2277, "step": 340040 }, { "epoch": 14.08, "grad_norm": 0.5625, "learning_rate": 0.00027397976002991806, "loss": 0.1151, "step": 340050 }, { "epoch": 14.09, "grad_norm": 1.5390625, "learning_rate": 0.0002739689648182289, "loss": 0.1918, "step": 340060 }, { "epoch": 14.09, "grad_norm": 0.9453125, "learning_rate": 0.0002739581695614328, "loss": 0.2349, "step": 340070 }, { "epoch": 14.09, "grad_norm": 0.85546875, "learning_rate": 0.0002739473742595501, "loss": 0.17, "step": 340080 }, { "epoch": 14.09, "grad_norm": 0.78515625, "learning_rate": 0.00027393657891260087, "loss": 0.1666, "step": 340090 }, { "epoch": 14.09, "grad_norm": 1.265625, "learning_rate": 0.0002739257835206057, "loss": 0.183, "step": 340100 }, { "epoch": 14.09, "grad_norm": 0.8359375, "learning_rate": 0.00027391498808358483, "loss": 0.196, "step": 340110 }, { "epoch": 14.09, "grad_norm": 1.265625, "learning_rate": 0.00027390419260155834, "loss": 0.2125, "step": 340120 }, { "epoch": 14.09, "grad_norm": 0.77734375, "learning_rate": 0.0002738933970745469, "loss": 0.1985, "step": 340130 }, { "epoch": 14.09, "grad_norm": 0.54296875, "learning_rate": 0.00027388260150257066, "loss": 0.195, "step": 340140 }, { "epoch": 14.09, "grad_norm": 1.046875, "learning_rate": 0.0002738718058856499, "loss": 0.1828, "step": 340150 }, { "epoch": 14.09, "grad_norm": 0.6484375, "learning_rate": 0.00027386101022380506, "loss": 0.2106, "step": 340160 }, { "epoch": 14.09, "grad_norm": 0.5859375, "learning_rate": 0.0002738502145170562, "loss": 0.1967, "step": 340170 }, { "epoch": 14.09, "grad_norm": 0.490234375, "learning_rate": 0.0002738394187654239, "loss": 0.2011, "step": 340180 }, { "epoch": 14.09, "grad_norm": 0.41015625, "learning_rate": 0.0002738286229689285, "loss": 0.1941, "step": 340190 }, { "epoch": 14.09, "grad_norm": 0.55859375, "learning_rate": 0.0002738178271275901, "loss": 0.214, "step": 340200 }, { "epoch": 14.09, "grad_norm": 1.1015625, "learning_rate": 0.00027380703124142914, "loss": 0.1816, "step": 340210 }, { "epoch": 14.09, "grad_norm": 1.09375, "learning_rate": 0.00027379623531046586, "loss": 0.2087, "step": 340220 }, { "epoch": 14.09, "grad_norm": 0.0, "learning_rate": 0.00027378543933472064, "loss": 0.2105, "step": 340230 }, { "epoch": 14.09, "grad_norm": 0.62890625, "learning_rate": 0.0002737746433142139, "loss": 0.2465, "step": 340240 }, { "epoch": 14.09, "grad_norm": 0.73828125, "learning_rate": 0.00027376384724896576, "loss": 0.1611, "step": 340250 }, { "epoch": 14.09, "grad_norm": 0.9765625, "learning_rate": 0.0002737530511389967, "loss": 0.2, "step": 340260 }, { "epoch": 14.09, "grad_norm": 1.546875, "learning_rate": 0.00027374225498432694, "loss": 0.2045, "step": 340270 }, { "epoch": 14.09, "grad_norm": 0.40625, "learning_rate": 0.00027373145878497674, "loss": 0.1374, "step": 340280 }, { "epoch": 14.09, "grad_norm": 0.44921875, "learning_rate": 0.00027372066254096656, "loss": 0.1799, "step": 340290 }, { "epoch": 14.1, "grad_norm": 0.404296875, "learning_rate": 0.00027370986625231664, "loss": 0.2508, "step": 340300 }, { "epoch": 14.1, "grad_norm": 1.6484375, "learning_rate": 0.0002736990699190473, "loss": 0.1679, "step": 340310 }, { "epoch": 14.1, "grad_norm": 1.46875, "learning_rate": 0.0002736882735411789, "loss": 0.1764, "step": 340320 }, { "epoch": 14.1, "grad_norm": 0.0, "learning_rate": 0.0002736774771187317, "loss": 0.1501, "step": 340330 }, { "epoch": 14.1, "grad_norm": 0.8203125, "learning_rate": 0.0002736666806517261, "loss": 0.1957, "step": 340340 }, { "epoch": 14.1, "grad_norm": 1.3359375, "learning_rate": 0.0002736558841401824, "loss": 0.1803, "step": 340350 }, { "epoch": 14.1, "grad_norm": 1.0859375, "learning_rate": 0.0002736450875841207, "loss": 0.1544, "step": 340360 }, { "epoch": 14.1, "grad_norm": 0.51171875, "learning_rate": 0.0002736342909835616, "loss": 0.2114, "step": 340370 }, { "epoch": 14.1, "grad_norm": 0.67578125, "learning_rate": 0.0002736234943385253, "loss": 0.1281, "step": 340380 }, { "epoch": 14.1, "grad_norm": 1.0625, "learning_rate": 0.0002736126976490322, "loss": 0.2141, "step": 340390 }, { "epoch": 14.1, "grad_norm": 0.84765625, "learning_rate": 0.0002736019009151025, "loss": 0.1897, "step": 340400 }, { "epoch": 14.1, "grad_norm": 1.484375, "learning_rate": 0.0002735911041367565, "loss": 0.2035, "step": 340410 }, { "epoch": 14.1, "grad_norm": 0.55078125, "learning_rate": 0.0002735803073140147, "loss": 0.252, "step": 340420 }, { "epoch": 14.1, "grad_norm": 0.8828125, "learning_rate": 0.00027356951044689735, "loss": 0.1719, "step": 340430 }, { "epoch": 14.1, "grad_norm": 1.6171875, "learning_rate": 0.0002735587135354246, "loss": 0.1928, "step": 340440 }, { "epoch": 14.1, "grad_norm": 0.48828125, "learning_rate": 0.00027354791657961696, "loss": 0.1948, "step": 340450 }, { "epoch": 14.1, "grad_norm": 1.125, "learning_rate": 0.00027353711957949466, "loss": 0.1881, "step": 340460 }, { "epoch": 14.1, "grad_norm": 1.03125, "learning_rate": 0.0002735263225350781, "loss": 0.1801, "step": 340470 }, { "epoch": 14.1, "grad_norm": 0.66015625, "learning_rate": 0.0002735155254463875, "loss": 0.2, "step": 340480 }, { "epoch": 14.1, "grad_norm": 0.625, "learning_rate": 0.0002735047283134431, "loss": 0.1409, "step": 340490 }, { "epoch": 14.1, "grad_norm": 0.8046875, "learning_rate": 0.00027349393113626556, "loss": 0.1801, "step": 340500 }, { "epoch": 14.1, "grad_norm": 0.98828125, "learning_rate": 0.00027348313391487483, "loss": 0.2364, "step": 340510 }, { "epoch": 14.1, "grad_norm": 1.2265625, "learning_rate": 0.0002734723366492914, "loss": 0.1413, "step": 340520 }, { "epoch": 14.1, "grad_norm": 0.50390625, "learning_rate": 0.00027346153933953567, "loss": 0.1827, "step": 340530 }, { "epoch": 14.11, "grad_norm": 1.0625, "learning_rate": 0.00027345074198562773, "loss": 0.2007, "step": 340540 }, { "epoch": 14.11, "grad_norm": 1.140625, "learning_rate": 0.0002734399445875881, "loss": 0.1565, "step": 340550 }, { "epoch": 14.11, "grad_norm": 0.353515625, "learning_rate": 0.00027342914714543706, "loss": 0.2006, "step": 340560 }, { "epoch": 14.11, "grad_norm": 0.32421875, "learning_rate": 0.00027341834965919476, "loss": 0.1484, "step": 340570 }, { "epoch": 14.11, "grad_norm": 0.875, "learning_rate": 0.00027340755212888176, "loss": 0.1217, "step": 340580 }, { "epoch": 14.11, "grad_norm": 0.85546875, "learning_rate": 0.00027339675455451823, "loss": 0.2358, "step": 340590 }, { "epoch": 14.11, "grad_norm": 0.73828125, "learning_rate": 0.00027338595693612454, "loss": 0.1029, "step": 340600 }, { "epoch": 14.11, "grad_norm": 0.6640625, "learning_rate": 0.000273375159273721, "loss": 0.1633, "step": 340610 }, { "epoch": 14.11, "grad_norm": 1.984375, "learning_rate": 0.00027336436156732795, "loss": 0.2201, "step": 340620 }, { "epoch": 14.11, "grad_norm": 0.93359375, "learning_rate": 0.0002733535638169657, "loss": 0.1519, "step": 340630 }, { "epoch": 14.11, "grad_norm": 0.58203125, "learning_rate": 0.00027334276602265464, "loss": 0.1962, "step": 340640 }, { "epoch": 14.11, "grad_norm": 0.66796875, "learning_rate": 0.0002733319681844148, "loss": 0.1408, "step": 340650 }, { "epoch": 14.11, "grad_norm": 1.2578125, "learning_rate": 0.0002733211703022669, "loss": 0.2069, "step": 340660 }, { "epoch": 14.11, "grad_norm": 0.7421875, "learning_rate": 0.000273310372376231, "loss": 0.2017, "step": 340670 }, { "epoch": 14.11, "grad_norm": 0.546875, "learning_rate": 0.00027329957440632753, "loss": 0.2108, "step": 340680 }, { "epoch": 14.11, "grad_norm": 3.5, "learning_rate": 0.0002732887763925768, "loss": 0.1883, "step": 340690 }, { "epoch": 14.11, "grad_norm": 1.1875, "learning_rate": 0.00027327797833499905, "loss": 0.2433, "step": 340700 }, { "epoch": 14.11, "grad_norm": 0.84765625, "learning_rate": 0.0002732671802336147, "loss": 0.2218, "step": 340710 }, { "epoch": 14.11, "grad_norm": 0.93359375, "learning_rate": 0.000273256382088444, "loss": 0.1643, "step": 340720 }, { "epoch": 14.11, "grad_norm": 0.82421875, "learning_rate": 0.0002732455838995073, "loss": 0.189, "step": 340730 }, { "epoch": 14.11, "grad_norm": 0.59375, "learning_rate": 0.00027323478566682497, "loss": 0.1938, "step": 340740 }, { "epoch": 14.11, "grad_norm": 0.640625, "learning_rate": 0.0002732239873904172, "loss": 0.204, "step": 340750 }, { "epoch": 14.11, "grad_norm": 0.91796875, "learning_rate": 0.00027321318907030446, "loss": 0.1891, "step": 340760 }, { "epoch": 14.11, "grad_norm": 0.55078125, "learning_rate": 0.00027320239070650703, "loss": 0.1761, "step": 340770 }, { "epoch": 14.12, "grad_norm": 1.625, "learning_rate": 0.00027319159229904514, "loss": 0.201, "step": 340780 }, { "epoch": 14.12, "grad_norm": 0.7890625, "learning_rate": 0.0002731807938479392, "loss": 0.2016, "step": 340790 }, { "epoch": 14.12, "grad_norm": 1.484375, "learning_rate": 0.00027316999535320944, "loss": 0.2201, "step": 340800 }, { "epoch": 14.12, "grad_norm": 0.2119140625, "learning_rate": 0.00027315919681487633, "loss": 0.223, "step": 340810 }, { "epoch": 14.12, "grad_norm": 0.9765625, "learning_rate": 0.0002731483982329602, "loss": 0.1403, "step": 340820 }, { "epoch": 14.12, "grad_norm": 0.5234375, "learning_rate": 0.00027313759960748114, "loss": 0.1619, "step": 340830 }, { "epoch": 14.12, "grad_norm": 1.34375, "learning_rate": 0.00027312680093845965, "loss": 0.1728, "step": 340840 }, { "epoch": 14.12, "grad_norm": 0.828125, "learning_rate": 0.000273116002225916, "loss": 0.2047, "step": 340850 }, { "epoch": 14.12, "grad_norm": 0.71875, "learning_rate": 0.0002731052034698706, "loss": 0.2425, "step": 340860 }, { "epoch": 14.12, "grad_norm": 1.7734375, "learning_rate": 0.0002730944046703437, "loss": 0.2013, "step": 340870 }, { "epoch": 14.12, "grad_norm": 0.353515625, "learning_rate": 0.00027308360582735554, "loss": 0.2381, "step": 340880 }, { "epoch": 14.12, "grad_norm": 1.0546875, "learning_rate": 0.0002730728069409266, "loss": 0.1767, "step": 340890 }, { "epoch": 14.12, "grad_norm": 0.73828125, "learning_rate": 0.0002730620080110771, "loss": 0.1861, "step": 340900 }, { "epoch": 14.12, "grad_norm": 0.640625, "learning_rate": 0.00027305120903782737, "loss": 0.1728, "step": 340910 }, { "epoch": 14.12, "grad_norm": 0.23046875, "learning_rate": 0.00027304041002119777, "loss": 0.1688, "step": 340920 }, { "epoch": 14.12, "grad_norm": 1.59375, "learning_rate": 0.0002730296109612086, "loss": 0.192, "step": 340930 }, { "epoch": 14.12, "grad_norm": 0.7265625, "learning_rate": 0.0002730188118578802, "loss": 0.2117, "step": 340940 }, { "epoch": 14.12, "grad_norm": 0.55078125, "learning_rate": 0.00027300801271123293, "loss": 0.1422, "step": 340950 }, { "epoch": 14.12, "grad_norm": 0.9140625, "learning_rate": 0.00027299721352128705, "loss": 0.2236, "step": 340960 }, { "epoch": 14.12, "grad_norm": 0.4453125, "learning_rate": 0.0002729864142880628, "loss": 0.2084, "step": 340970 }, { "epoch": 14.12, "grad_norm": 0.91015625, "learning_rate": 0.00027297561501158073, "loss": 0.1663, "step": 340980 }, { "epoch": 14.12, "grad_norm": 1.8046875, "learning_rate": 0.00027296481569186095, "loss": 0.1822, "step": 340990 }, { "epoch": 14.12, "grad_norm": 1.0546875, "learning_rate": 0.00027295401632892384, "loss": 0.1783, "step": 341000 }, { "epoch": 14.12, "grad_norm": 2.15625, "learning_rate": 0.00027294321692278984, "loss": 0.2289, "step": 341010 }, { "epoch": 14.13, "grad_norm": 1.171875, "learning_rate": 0.00027293241747347916, "loss": 0.2091, "step": 341020 }, { "epoch": 14.13, "grad_norm": 1.15625, "learning_rate": 0.0002729216179810121, "loss": 0.1394, "step": 341030 }, { "epoch": 14.13, "grad_norm": 1.109375, "learning_rate": 0.0002729108184454091, "loss": 0.1853, "step": 341040 }, { "epoch": 14.13, "grad_norm": 1.3671875, "learning_rate": 0.00027290001886669035, "loss": 0.2034, "step": 341050 }, { "epoch": 14.13, "grad_norm": 0.5234375, "learning_rate": 0.0002728892192448763, "loss": 0.1797, "step": 341060 }, { "epoch": 14.13, "grad_norm": 1.0625, "learning_rate": 0.0002728784195799872, "loss": 0.2285, "step": 341070 }, { "epoch": 14.13, "grad_norm": 1.2265625, "learning_rate": 0.0002728676198720433, "loss": 0.1679, "step": 341080 }, { "epoch": 14.13, "grad_norm": 0.80078125, "learning_rate": 0.00027285682012106506, "loss": 0.1817, "step": 341090 }, { "epoch": 14.13, "grad_norm": 0.77734375, "learning_rate": 0.00027284602032707274, "loss": 0.1609, "step": 341100 }, { "epoch": 14.13, "grad_norm": 0.87109375, "learning_rate": 0.0002728352204900867, "loss": 0.2239, "step": 341110 }, { "epoch": 14.13, "grad_norm": 2.25, "learning_rate": 0.00027282442061012726, "loss": 0.1733, "step": 341120 }, { "epoch": 14.13, "grad_norm": 1.40625, "learning_rate": 0.00027281362068721474, "loss": 0.1463, "step": 341130 }, { "epoch": 14.13, "grad_norm": 0.69140625, "learning_rate": 0.0002728028207213694, "loss": 0.2024, "step": 341140 }, { "epoch": 14.13, "grad_norm": 0.80859375, "learning_rate": 0.0002727920207126116, "loss": 0.2116, "step": 341150 }, { "epoch": 14.13, "grad_norm": 0.4375, "learning_rate": 0.0002727812206609617, "loss": 0.1948, "step": 341160 }, { "epoch": 14.13, "grad_norm": 0.6171875, "learning_rate": 0.00027277042056644005, "loss": 0.2098, "step": 341170 }, { "epoch": 14.13, "grad_norm": 0.890625, "learning_rate": 0.0002727596204290668, "loss": 0.1531, "step": 341180 }, { "epoch": 14.13, "grad_norm": 0.5234375, "learning_rate": 0.00027274882024886255, "loss": 0.1424, "step": 341190 }, { "epoch": 14.13, "grad_norm": 1.4765625, "learning_rate": 0.0002727380200258474, "loss": 0.1852, "step": 341200 }, { "epoch": 14.13, "grad_norm": 0.439453125, "learning_rate": 0.00027272721976004177, "loss": 0.2167, "step": 341210 }, { "epoch": 14.13, "grad_norm": 0.357421875, "learning_rate": 0.0002727164194514659, "loss": 0.1982, "step": 341220 }, { "epoch": 14.13, "grad_norm": 0.72265625, "learning_rate": 0.0002727056191001403, "loss": 0.1704, "step": 341230 }, { "epoch": 14.13, "grad_norm": 1.484375, "learning_rate": 0.00027269481870608516, "loss": 0.1793, "step": 341240 }, { "epoch": 14.13, "grad_norm": 1.2265625, "learning_rate": 0.0002726840182693207, "loss": 0.1723, "step": 341250 }, { "epoch": 14.13, "grad_norm": 0.98046875, "learning_rate": 0.0002726732177898675, "loss": 0.1987, "step": 341260 }, { "epoch": 14.14, "grad_norm": 0.5625, "learning_rate": 0.00027266241726774565, "loss": 0.1755, "step": 341270 }, { "epoch": 14.14, "grad_norm": 1.078125, "learning_rate": 0.0002726516167029757, "loss": 0.2191, "step": 341280 }, { "epoch": 14.14, "grad_norm": 1.3359375, "learning_rate": 0.00027264081609557777, "loss": 0.1653, "step": 341290 }, { "epoch": 14.14, "grad_norm": 0.75390625, "learning_rate": 0.0002726300154455723, "loss": 0.1924, "step": 341300 }, { "epoch": 14.14, "grad_norm": 0.380859375, "learning_rate": 0.00027261921475297955, "loss": 0.169, "step": 341310 }, { "epoch": 14.14, "grad_norm": 1.609375, "learning_rate": 0.0002726084140178199, "loss": 0.2195, "step": 341320 }, { "epoch": 14.14, "grad_norm": 0.75390625, "learning_rate": 0.00027259761324011366, "loss": 0.206, "step": 341330 }, { "epoch": 14.14, "grad_norm": 1.234375, "learning_rate": 0.00027258681241988116, "loss": 0.1725, "step": 341340 }, { "epoch": 14.14, "grad_norm": 0.97265625, "learning_rate": 0.0002725760115571426, "loss": 0.203, "step": 341350 }, { "epoch": 14.14, "grad_norm": 0.97265625, "learning_rate": 0.00027256521065191865, "loss": 0.2319, "step": 341360 }, { "epoch": 14.14, "grad_norm": 1.8671875, "learning_rate": 0.00027255440970422925, "loss": 0.1798, "step": 341370 }, { "epoch": 14.14, "grad_norm": 0.86328125, "learning_rate": 0.0002725436087140949, "loss": 0.1919, "step": 341380 }, { "epoch": 14.14, "grad_norm": 1.9453125, "learning_rate": 0.000272532807681536, "loss": 0.1943, "step": 341390 }, { "epoch": 14.14, "grad_norm": 0.8515625, "learning_rate": 0.00027252200660657274, "loss": 0.1942, "step": 341400 }, { "epoch": 14.14, "grad_norm": 1.90625, "learning_rate": 0.00027251120548922543, "loss": 0.1726, "step": 341410 }, { "epoch": 14.14, "grad_norm": 1.2265625, "learning_rate": 0.0002725004043295145, "loss": 0.1982, "step": 341420 }, { "epoch": 14.14, "grad_norm": 0.466796875, "learning_rate": 0.00027248960312746025, "loss": 0.1972, "step": 341430 }, { "epoch": 14.14, "grad_norm": 0.69921875, "learning_rate": 0.00027247880188308304, "loss": 0.1744, "step": 341440 }, { "epoch": 14.14, "grad_norm": 0.56640625, "learning_rate": 0.0002724680005964031, "loss": 0.179, "step": 341450 }, { "epoch": 14.14, "grad_norm": 0.99609375, "learning_rate": 0.00027245719926744086, "loss": 0.2022, "step": 341460 }, { "epoch": 14.14, "grad_norm": 1.125, "learning_rate": 0.00027244639789621654, "loss": 0.1469, "step": 341470 }, { "epoch": 14.14, "grad_norm": 0.75, "learning_rate": 0.0002724355964827505, "loss": 0.1331, "step": 341480 }, { "epoch": 14.14, "grad_norm": 0.76171875, "learning_rate": 0.0002724247950270632, "loss": 0.1727, "step": 341490 }, { "epoch": 14.14, "grad_norm": 1.0390625, "learning_rate": 0.0002724139935291747, "loss": 0.1496, "step": 341500 }, { "epoch": 14.15, "grad_norm": 0.41015625, "learning_rate": 0.0002724031919891056, "loss": 0.1841, "step": 341510 }, { "epoch": 14.15, "grad_norm": 1.5078125, "learning_rate": 0.00027239239040687614, "loss": 0.2174, "step": 341520 }, { "epoch": 14.15, "grad_norm": 0.72265625, "learning_rate": 0.00027238158878250656, "loss": 0.1977, "step": 341530 }, { "epoch": 14.15, "grad_norm": 2.734375, "learning_rate": 0.00027237078711601724, "loss": 0.1835, "step": 341540 }, { "epoch": 14.15, "grad_norm": 0.478515625, "learning_rate": 0.0002723599854074286, "loss": 0.2673, "step": 341550 }, { "epoch": 14.15, "grad_norm": 3.4375, "learning_rate": 0.0002723491836567607, "loss": 0.2491, "step": 341560 }, { "epoch": 14.15, "grad_norm": 0.55078125, "learning_rate": 0.0002723383818640342, "loss": 0.1744, "step": 341570 }, { "epoch": 14.15, "grad_norm": 0.8515625, "learning_rate": 0.00027232758002926924, "loss": 0.1743, "step": 341580 }, { "epoch": 14.15, "grad_norm": 1.0703125, "learning_rate": 0.00027231677815248617, "loss": 0.1614, "step": 341590 }, { "epoch": 14.15, "grad_norm": 0.72265625, "learning_rate": 0.0002723059762337054, "loss": 0.1828, "step": 341600 }, { "epoch": 14.15, "grad_norm": 0.8515625, "learning_rate": 0.000272295174272947, "loss": 0.1652, "step": 341610 }, { "epoch": 14.15, "grad_norm": 0.82421875, "learning_rate": 0.00027228437227023176, "loss": 0.2075, "step": 341620 }, { "epoch": 14.15, "grad_norm": 0.439453125, "learning_rate": 0.0002722735702255796, "loss": 0.1591, "step": 341630 }, { "epoch": 14.15, "grad_norm": 0.62890625, "learning_rate": 0.0002722627681390109, "loss": 0.1682, "step": 341640 }, { "epoch": 14.15, "grad_norm": 0.443359375, "learning_rate": 0.00027225196601054623, "loss": 0.2027, "step": 341650 }, { "epoch": 14.15, "grad_norm": 2.28125, "learning_rate": 0.0002722411638402057, "loss": 0.1793, "step": 341660 }, { "epoch": 14.15, "grad_norm": 0.77734375, "learning_rate": 0.0002722303616280097, "loss": 0.1994, "step": 341670 }, { "epoch": 14.15, "grad_norm": 0.796875, "learning_rate": 0.0002722195593739786, "loss": 0.174, "step": 341680 }, { "epoch": 14.15, "grad_norm": 0.69921875, "learning_rate": 0.00027220875707813257, "loss": 0.1842, "step": 341690 }, { "epoch": 14.15, "grad_norm": 0.96484375, "learning_rate": 0.0002721979547404922, "loss": 0.1675, "step": 341700 }, { "epoch": 14.15, "grad_norm": 0.640625, "learning_rate": 0.0002721871523610776, "loss": 0.1854, "step": 341710 }, { "epoch": 14.15, "grad_norm": 0.91796875, "learning_rate": 0.00027217634993990914, "loss": 0.1992, "step": 341720 }, { "epoch": 14.15, "grad_norm": 0.55859375, "learning_rate": 0.0002721655474770073, "loss": 0.164, "step": 341730 }, { "epoch": 14.15, "grad_norm": 0.51171875, "learning_rate": 0.0002721547449723922, "loss": 0.1953, "step": 341740 }, { "epoch": 14.16, "grad_norm": 0.56640625, "learning_rate": 0.00027214394242608425, "loss": 0.1794, "step": 341750 }, { "epoch": 14.16, "grad_norm": 0.470703125, "learning_rate": 0.0002721331398381039, "loss": 0.2124, "step": 341760 }, { "epoch": 14.16, "grad_norm": 1.2421875, "learning_rate": 0.00027212233720847124, "loss": 0.1716, "step": 341770 }, { "epoch": 14.16, "grad_norm": 0.9140625, "learning_rate": 0.0002721115345372068, "loss": 0.2069, "step": 341780 }, { "epoch": 14.16, "grad_norm": 1.265625, "learning_rate": 0.00027210073182433084, "loss": 0.176, "step": 341790 }, { "epoch": 14.16, "grad_norm": 0.87890625, "learning_rate": 0.0002720899290698637, "loss": 0.2429, "step": 341800 }, { "epoch": 14.16, "grad_norm": 1.484375, "learning_rate": 0.0002720791262738257, "loss": 0.1986, "step": 341810 }, { "epoch": 14.16, "grad_norm": 0.5390625, "learning_rate": 0.0002720683234362371, "loss": 0.2215, "step": 341820 }, { "epoch": 14.16, "grad_norm": 0.7421875, "learning_rate": 0.0002720575205571184, "loss": 0.1858, "step": 341830 }, { "epoch": 14.16, "grad_norm": 0.80859375, "learning_rate": 0.00027204671763648975, "loss": 0.1641, "step": 341840 }, { "epoch": 14.16, "grad_norm": 1.4609375, "learning_rate": 0.00027203591467437157, "loss": 0.1943, "step": 341850 }, { "epoch": 14.16, "grad_norm": 0.89453125, "learning_rate": 0.0002720251116707842, "loss": 0.1515, "step": 341860 }, { "epoch": 14.16, "grad_norm": 1.015625, "learning_rate": 0.000272014308625748, "loss": 0.2315, "step": 341870 }, { "epoch": 14.16, "grad_norm": 0.96875, "learning_rate": 0.0002720035055392831, "loss": 0.1702, "step": 341880 }, { "epoch": 14.16, "grad_norm": 0.90234375, "learning_rate": 0.0002719927024114101, "loss": 0.2224, "step": 341890 }, { "epoch": 14.16, "grad_norm": 0.0, "learning_rate": 0.0002719818992421492, "loss": 0.2331, "step": 341900 }, { "epoch": 14.16, "grad_norm": 0.99609375, "learning_rate": 0.0002719710960315207, "loss": 0.1507, "step": 341910 }, { "epoch": 14.16, "grad_norm": 0.224609375, "learning_rate": 0.000271960292779545, "loss": 0.1691, "step": 341920 }, { "epoch": 14.16, "grad_norm": 0.4140625, "learning_rate": 0.00027194948948624235, "loss": 0.2017, "step": 341930 }, { "epoch": 14.16, "grad_norm": 0.5859375, "learning_rate": 0.0002719386861516332, "loss": 0.2062, "step": 341940 }, { "epoch": 14.16, "grad_norm": 0.734375, "learning_rate": 0.0002719278827757378, "loss": 0.1814, "step": 341950 }, { "epoch": 14.16, "grad_norm": 0.58984375, "learning_rate": 0.0002719170793585764, "loss": 0.1372, "step": 341960 }, { "epoch": 14.16, "grad_norm": 0.90234375, "learning_rate": 0.00027190627590016955, "loss": 0.2301, "step": 341970 }, { "epoch": 14.16, "grad_norm": 0.65625, "learning_rate": 0.00027189547240053733, "loss": 0.1439, "step": 341980 }, { "epoch": 14.17, "grad_norm": 0.76171875, "learning_rate": 0.00027188466885970033, "loss": 0.1561, "step": 341990 }, { "epoch": 14.17, "grad_norm": 0.365234375, "learning_rate": 0.0002718738652776787, "loss": 0.215, "step": 342000 }, { "epoch": 14.17, "grad_norm": 0.62890625, "learning_rate": 0.0002718630616544927, "loss": 0.1787, "step": 342010 }, { "epoch": 14.17, "grad_norm": 0.91015625, "learning_rate": 0.0002718522579901629, "loss": 0.1451, "step": 342020 }, { "epoch": 14.17, "grad_norm": 1.734375, "learning_rate": 0.0002718414542847095, "loss": 0.1874, "step": 342030 }, { "epoch": 14.17, "grad_norm": 0.5859375, "learning_rate": 0.00027183065053815284, "loss": 0.2098, "step": 342040 }, { "epoch": 14.17, "grad_norm": 0.9296875, "learning_rate": 0.0002718198467505133, "loss": 0.1778, "step": 342050 }, { "epoch": 14.17, "grad_norm": 2.015625, "learning_rate": 0.0002718090429218111, "loss": 0.2545, "step": 342060 }, { "epoch": 14.17, "grad_norm": 0.44140625, "learning_rate": 0.0002717982390520666, "loss": 0.2474, "step": 342070 }, { "epoch": 14.17, "grad_norm": 0.875, "learning_rate": 0.0002717874351413003, "loss": 0.2038, "step": 342080 }, { "epoch": 14.17, "grad_norm": 0.953125, "learning_rate": 0.0002717766311895322, "loss": 0.1908, "step": 342090 }, { "epoch": 14.17, "grad_norm": 1.03125, "learning_rate": 0.000271765827196783, "loss": 0.1855, "step": 342100 }, { "epoch": 14.17, "grad_norm": 1.875, "learning_rate": 0.0002717550231630728, "loss": 0.1922, "step": 342110 }, { "epoch": 14.17, "grad_norm": 2.03125, "learning_rate": 0.000271744219088422, "loss": 0.1707, "step": 342120 }, { "epoch": 14.17, "grad_norm": 0.9609375, "learning_rate": 0.00027173341497285097, "loss": 0.1775, "step": 342130 }, { "epoch": 14.17, "grad_norm": 0.291015625, "learning_rate": 0.00027172261081637996, "loss": 0.165, "step": 342140 }, { "epoch": 14.17, "grad_norm": 0.609375, "learning_rate": 0.00027171180661902933, "loss": 0.1979, "step": 342150 }, { "epoch": 14.17, "grad_norm": 2.90625, "learning_rate": 0.00027170100238081945, "loss": 0.2218, "step": 342160 }, { "epoch": 14.17, "grad_norm": 1.28125, "learning_rate": 0.00027169019810177055, "loss": 0.196, "step": 342170 }, { "epoch": 14.17, "grad_norm": 1.0546875, "learning_rate": 0.0002716793937819032, "loss": 0.18, "step": 342180 }, { "epoch": 14.17, "grad_norm": 0.6484375, "learning_rate": 0.0002716685894212374, "loss": 0.1674, "step": 342190 }, { "epoch": 14.17, "grad_norm": 0.8125, "learning_rate": 0.00027165778501979376, "loss": 0.1631, "step": 342200 }, { "epoch": 14.17, "grad_norm": 1.15625, "learning_rate": 0.0002716469805775925, "loss": 0.222, "step": 342210 }, { "epoch": 14.17, "grad_norm": 0.83203125, "learning_rate": 0.0002716361760946539, "loss": 0.2117, "step": 342220 }, { "epoch": 14.18, "grad_norm": 1.109375, "learning_rate": 0.0002716253715709984, "loss": 0.1936, "step": 342230 }, { "epoch": 14.18, "grad_norm": 0.99609375, "learning_rate": 0.00027161456700664627, "loss": 0.1831, "step": 342240 }, { "epoch": 14.18, "grad_norm": 0.66015625, "learning_rate": 0.0002716037624016179, "loss": 0.1501, "step": 342250 }, { "epoch": 14.18, "grad_norm": 1.03125, "learning_rate": 0.00027159295775593356, "loss": 0.13, "step": 342260 }, { "epoch": 14.18, "grad_norm": 0.51171875, "learning_rate": 0.0002715821530696136, "loss": 0.1517, "step": 342270 }, { "epoch": 14.18, "grad_norm": 1.390625, "learning_rate": 0.00027157134834267834, "loss": 0.155, "step": 342280 }, { "epoch": 14.18, "grad_norm": 0.6328125, "learning_rate": 0.0002715605435751481, "loss": 0.1862, "step": 342290 }, { "epoch": 14.18, "grad_norm": 0.89453125, "learning_rate": 0.00027154973876704336, "loss": 0.2031, "step": 342300 }, { "epoch": 14.18, "grad_norm": 1.109375, "learning_rate": 0.00027153893391838424, "loss": 0.1917, "step": 342310 }, { "epoch": 14.18, "grad_norm": 1.1875, "learning_rate": 0.00027152812902919124, "loss": 0.2032, "step": 342320 }, { "epoch": 14.18, "grad_norm": 0.4921875, "learning_rate": 0.00027151732409948454, "loss": 0.187, "step": 342330 }, { "epoch": 14.18, "grad_norm": 0.7421875, "learning_rate": 0.00027150651912928467, "loss": 0.1543, "step": 342340 }, { "epoch": 14.18, "grad_norm": 0.6171875, "learning_rate": 0.00027149571411861175, "loss": 0.1616, "step": 342350 }, { "epoch": 14.18, "grad_norm": 0.78515625, "learning_rate": 0.0002714849090674862, "loss": 0.1602, "step": 342360 }, { "epoch": 14.18, "grad_norm": 0.6796875, "learning_rate": 0.00027147410397592846, "loss": 0.2157, "step": 342370 }, { "epoch": 14.18, "grad_norm": 0.5234375, "learning_rate": 0.00027146329884395875, "loss": 0.1789, "step": 342380 }, { "epoch": 14.18, "grad_norm": 0.63671875, "learning_rate": 0.00027145249367159746, "loss": 0.1606, "step": 342390 }, { "epoch": 14.18, "grad_norm": 0.279296875, "learning_rate": 0.0002714416884588649, "loss": 0.1699, "step": 342400 }, { "epoch": 14.18, "grad_norm": 0.322265625, "learning_rate": 0.0002714308832057813, "loss": 0.1349, "step": 342410 }, { "epoch": 14.18, "grad_norm": 1.875, "learning_rate": 0.0002714200779123672, "loss": 0.2051, "step": 342420 }, { "epoch": 14.18, "grad_norm": 0.6875, "learning_rate": 0.0002714092725786427, "loss": 0.1572, "step": 342430 }, { "epoch": 14.18, "grad_norm": 1.375, "learning_rate": 0.00027139846720462834, "loss": 0.1524, "step": 342440 }, { "epoch": 14.18, "grad_norm": 0.9765625, "learning_rate": 0.00027138766179034436, "loss": 0.1996, "step": 342450 }, { "epoch": 14.18, "grad_norm": 1.5703125, "learning_rate": 0.00027137685633581117, "loss": 0.209, "step": 342460 }, { "epoch": 14.19, "grad_norm": 0.671875, "learning_rate": 0.00027136605084104897, "loss": 0.1949, "step": 342470 }, { "epoch": 14.19, "grad_norm": 0.43359375, "learning_rate": 0.00027135524530607816, "loss": 0.1849, "step": 342480 }, { "epoch": 14.19, "grad_norm": 0.58203125, "learning_rate": 0.0002713444397309192, "loss": 0.1854, "step": 342490 }, { "epoch": 14.19, "grad_norm": 2.15625, "learning_rate": 0.0002713336341155922, "loss": 0.2258, "step": 342500 }, { "epoch": 14.19, "grad_norm": 0.189453125, "learning_rate": 0.0002713228284601176, "loss": 0.1963, "step": 342510 }, { "epoch": 14.19, "grad_norm": 0.68359375, "learning_rate": 0.0002713120227645158, "loss": 0.1784, "step": 342520 }, { "epoch": 14.19, "grad_norm": 0.9765625, "learning_rate": 0.00027130121702880703, "loss": 0.2704, "step": 342530 }, { "epoch": 14.19, "grad_norm": 0.478515625, "learning_rate": 0.0002712904112530117, "loss": 0.1523, "step": 342540 }, { "epoch": 14.19, "grad_norm": 0.51171875, "learning_rate": 0.00027127960543715015, "loss": 0.1638, "step": 342550 }, { "epoch": 14.19, "grad_norm": 0.984375, "learning_rate": 0.0002712687995812426, "loss": 0.1751, "step": 342560 }, { "epoch": 14.19, "grad_norm": 1.015625, "learning_rate": 0.0002712579936853095, "loss": 0.1889, "step": 342570 }, { "epoch": 14.19, "grad_norm": 0.435546875, "learning_rate": 0.00027124718774937113, "loss": 0.1404, "step": 342580 }, { "epoch": 14.19, "grad_norm": 0.8828125, "learning_rate": 0.0002712363817734479, "loss": 0.1722, "step": 342590 }, { "epoch": 14.19, "grad_norm": 1.5546875, "learning_rate": 0.00027122557575756006, "loss": 0.2064, "step": 342600 }, { "epoch": 14.19, "grad_norm": 1.9375, "learning_rate": 0.00027121476970172795, "loss": 0.2102, "step": 342610 }, { "epoch": 14.19, "grad_norm": 0.349609375, "learning_rate": 0.00027120396360597206, "loss": 0.2122, "step": 342620 }, { "epoch": 14.19, "grad_norm": 0.94921875, "learning_rate": 0.0002711931574703125, "loss": 0.2043, "step": 342630 }, { "epoch": 14.19, "grad_norm": 1.2890625, "learning_rate": 0.00027118235129476966, "loss": 0.2124, "step": 342640 }, { "epoch": 14.19, "grad_norm": 0.205078125, "learning_rate": 0.000271171545079364, "loss": 0.1821, "step": 342650 }, { "epoch": 14.19, "grad_norm": 1.3203125, "learning_rate": 0.00027116073882411566, "loss": 0.2003, "step": 342660 }, { "epoch": 14.19, "grad_norm": 1.8359375, "learning_rate": 0.0002711499325290453, "loss": 0.1977, "step": 342670 }, { "epoch": 14.19, "grad_norm": 0.99609375, "learning_rate": 0.0002711391261941729, "loss": 0.1478, "step": 342680 }, { "epoch": 14.19, "grad_norm": 0.578125, "learning_rate": 0.00027112831981951894, "loss": 0.2508, "step": 342690 }, { "epoch": 14.19, "grad_norm": 1.3125, "learning_rate": 0.0002711175134051039, "loss": 0.1597, "step": 342700 }, { "epoch": 14.2, "grad_norm": 0.5625, "learning_rate": 0.00027110670695094783, "loss": 0.2148, "step": 342710 }, { "epoch": 14.2, "grad_norm": 0.70703125, "learning_rate": 0.00027109590045707127, "loss": 0.1533, "step": 342720 }, { "epoch": 14.2, "grad_norm": 0.8515625, "learning_rate": 0.0002710850939234945, "loss": 0.2421, "step": 342730 }, { "epoch": 14.2, "grad_norm": 0.5625, "learning_rate": 0.00027107428735023784, "loss": 0.1763, "step": 342740 }, { "epoch": 14.2, "grad_norm": 1.5234375, "learning_rate": 0.00027106348073732174, "loss": 0.2087, "step": 342750 }, { "epoch": 14.2, "grad_norm": 0.50390625, "learning_rate": 0.00027105267408476634, "loss": 0.2198, "step": 342760 }, { "epoch": 14.2, "grad_norm": 1.9140625, "learning_rate": 0.0002710418673925922, "loss": 0.2045, "step": 342770 }, { "epoch": 14.2, "grad_norm": 0.46484375, "learning_rate": 0.00027103106066081944, "loss": 0.1384, "step": 342780 }, { "epoch": 14.2, "grad_norm": 0.8984375, "learning_rate": 0.00027102025388946844, "loss": 0.2002, "step": 342790 }, { "epoch": 14.2, "grad_norm": 1.0625, "learning_rate": 0.0002710094470785597, "loss": 0.1957, "step": 342800 }, { "epoch": 14.2, "grad_norm": 1.3828125, "learning_rate": 0.0002709986402281134, "loss": 0.1816, "step": 342810 }, { "epoch": 14.2, "grad_norm": 1.5546875, "learning_rate": 0.00027098783333815, "loss": 0.1774, "step": 342820 }, { "epoch": 14.2, "grad_norm": 0.79296875, "learning_rate": 0.00027097702640868973, "loss": 0.1982, "step": 342830 }, { "epoch": 14.2, "grad_norm": 0.64453125, "learning_rate": 0.0002709662194397529, "loss": 0.2015, "step": 342840 }, { "epoch": 14.2, "grad_norm": 0.302734375, "learning_rate": 0.00027095541243136, "loss": 0.1965, "step": 342850 }, { "epoch": 14.2, "grad_norm": 0.2431640625, "learning_rate": 0.00027094460538353124, "loss": 0.175, "step": 342860 }, { "epoch": 14.2, "grad_norm": 0.7421875, "learning_rate": 0.0002709337982962869, "loss": 0.1657, "step": 342870 }, { "epoch": 14.2, "grad_norm": 0.341796875, "learning_rate": 0.00027092299116964754, "loss": 0.2183, "step": 342880 }, { "epoch": 14.2, "grad_norm": 1.0078125, "learning_rate": 0.0002709121840036333, "loss": 0.186, "step": 342890 }, { "epoch": 14.2, "grad_norm": 0.83984375, "learning_rate": 0.0002709013767982646, "loss": 0.1519, "step": 342900 }, { "epoch": 14.2, "grad_norm": 0.828125, "learning_rate": 0.0002708905695535618, "loss": 0.2182, "step": 342910 }, { "epoch": 14.2, "grad_norm": 0.890625, "learning_rate": 0.00027087976226954514, "loss": 0.1366, "step": 342920 }, { "epoch": 14.2, "grad_norm": 0.78125, "learning_rate": 0.0002708689549462351, "loss": 0.1968, "step": 342930 }, { "epoch": 14.2, "grad_norm": 0.75, "learning_rate": 0.00027085814758365186, "loss": 0.1741, "step": 342940 }, { "epoch": 14.2, "grad_norm": 1.25, "learning_rate": 0.0002708473401818159, "loss": 0.2028, "step": 342950 }, { "epoch": 14.21, "grad_norm": 0.55859375, "learning_rate": 0.0002708365327407475, "loss": 0.2053, "step": 342960 }, { "epoch": 14.21, "grad_norm": 0.63671875, "learning_rate": 0.00027082572526046695, "loss": 0.2157, "step": 342970 }, { "epoch": 14.21, "grad_norm": 1.0859375, "learning_rate": 0.00027081491774099466, "loss": 0.1865, "step": 342980 }, { "epoch": 14.21, "grad_norm": 0.80859375, "learning_rate": 0.00027080411018235096, "loss": 0.1805, "step": 342990 }, { "epoch": 14.21, "grad_norm": 0.89453125, "learning_rate": 0.00027079330258455607, "loss": 0.2106, "step": 343000 }, { "epoch": 14.21, "grad_norm": 1.1640625, "learning_rate": 0.0002707824949476306, "loss": 0.196, "step": 343010 }, { "epoch": 14.21, "grad_norm": 0.640625, "learning_rate": 0.0002707716872715946, "loss": 0.2095, "step": 343020 }, { "epoch": 14.21, "grad_norm": 0.7109375, "learning_rate": 0.00027076087955646855, "loss": 0.1891, "step": 343030 }, { "epoch": 14.21, "grad_norm": 1.265625, "learning_rate": 0.0002707500718022728, "loss": 0.1942, "step": 343040 }, { "epoch": 14.21, "grad_norm": 0.66796875, "learning_rate": 0.0002707392640090277, "loss": 0.1818, "step": 343050 }, { "epoch": 14.21, "grad_norm": 0.6796875, "learning_rate": 0.0002707284561767534, "loss": 0.228, "step": 343060 }, { "epoch": 14.21, "grad_norm": 1.1484375, "learning_rate": 0.0002707176483054705, "loss": 0.1884, "step": 343070 }, { "epoch": 14.21, "grad_norm": 0.98828125, "learning_rate": 0.00027070684039519913, "loss": 0.1899, "step": 343080 }, { "epoch": 14.21, "grad_norm": 1.015625, "learning_rate": 0.00027069603244595983, "loss": 0.2237, "step": 343090 }, { "epoch": 14.21, "grad_norm": 0.95703125, "learning_rate": 0.00027068522445777274, "loss": 0.2029, "step": 343100 }, { "epoch": 14.21, "grad_norm": 1.3046875, "learning_rate": 0.00027067441643065834, "loss": 0.1553, "step": 343110 }, { "epoch": 14.21, "grad_norm": 1.5703125, "learning_rate": 0.0002706636083646369, "loss": 0.2493, "step": 343120 }, { "epoch": 14.21, "grad_norm": 0.33984375, "learning_rate": 0.0002706528002597288, "loss": 0.2032, "step": 343130 }, { "epoch": 14.21, "grad_norm": 1.171875, "learning_rate": 0.0002706419921159543, "loss": 0.223, "step": 343140 }, { "epoch": 14.21, "grad_norm": 0.8046875, "learning_rate": 0.0002706311839333339, "loss": 0.1784, "step": 343150 }, { "epoch": 14.21, "grad_norm": 0.93359375, "learning_rate": 0.00027062037571188777, "loss": 0.1943, "step": 343160 }, { "epoch": 14.21, "grad_norm": 0.6171875, "learning_rate": 0.00027060956745163635, "loss": 0.2133, "step": 343170 }, { "epoch": 14.21, "grad_norm": 0.87890625, "learning_rate": 0.0002705987591526, "loss": 0.1993, "step": 343180 }, { "epoch": 14.21, "grad_norm": 0.3984375, "learning_rate": 0.0002705879508147989, "loss": 0.2101, "step": 343190 }, { "epoch": 14.22, "grad_norm": 0.482421875, "learning_rate": 0.00027057714243825356, "loss": 0.138, "step": 343200 }, { "epoch": 14.22, "grad_norm": 0.69921875, "learning_rate": 0.00027056633402298426, "loss": 0.1758, "step": 343210 }, { "epoch": 14.22, "grad_norm": 0.0004367828369140625, "learning_rate": 0.0002705555255690113, "loss": 0.1656, "step": 343220 }, { "epoch": 14.22, "grad_norm": 0.88671875, "learning_rate": 0.00027054471707635517, "loss": 0.1949, "step": 343230 }, { "epoch": 14.22, "grad_norm": 0.640625, "learning_rate": 0.0002705339085450359, "loss": 0.1734, "step": 343240 }, { "epoch": 14.22, "grad_norm": 0.6953125, "learning_rate": 0.00027052309997507417, "loss": 0.1816, "step": 343250 }, { "epoch": 14.22, "grad_norm": 0.640625, "learning_rate": 0.00027051229136649017, "loss": 0.1544, "step": 343260 }, { "epoch": 14.22, "grad_norm": 0.9453125, "learning_rate": 0.00027050148271930423, "loss": 0.224, "step": 343270 }, { "epoch": 14.22, "grad_norm": 1.53125, "learning_rate": 0.0002704906740335368, "loss": 0.1553, "step": 343280 }, { "epoch": 14.22, "grad_norm": 0.96484375, "learning_rate": 0.00027047986530920803, "loss": 0.1769, "step": 343290 }, { "epoch": 14.22, "grad_norm": 1.3828125, "learning_rate": 0.00027046905654633843, "loss": 0.2356, "step": 343300 }, { "epoch": 14.22, "grad_norm": 0.28125, "learning_rate": 0.00027045824774494826, "loss": 0.1714, "step": 343310 }, { "epoch": 14.22, "grad_norm": 0.6328125, "learning_rate": 0.0002704474389050578, "loss": 0.2367, "step": 343320 }, { "epoch": 14.22, "grad_norm": 0.76953125, "learning_rate": 0.00027043663002668754, "loss": 0.2052, "step": 343330 }, { "epoch": 14.22, "grad_norm": 0.72265625, "learning_rate": 0.00027042582110985776, "loss": 0.1719, "step": 343340 }, { "epoch": 14.22, "grad_norm": 0.37890625, "learning_rate": 0.00027041501215458873, "loss": 0.1562, "step": 343350 }, { "epoch": 14.22, "grad_norm": 0.470703125, "learning_rate": 0.000270404203160901, "loss": 0.192, "step": 343360 }, { "epoch": 14.22, "grad_norm": 0.9609375, "learning_rate": 0.0002703933941288146, "loss": 0.1998, "step": 343370 }, { "epoch": 14.22, "grad_norm": 0.69921875, "learning_rate": 0.00027038258505835013, "loss": 0.2152, "step": 343380 }, { "epoch": 14.22, "grad_norm": 0.80078125, "learning_rate": 0.00027037177594952777, "loss": 0.1974, "step": 343390 }, { "epoch": 14.22, "grad_norm": 0.37109375, "learning_rate": 0.00027036096680236796, "loss": 0.2033, "step": 343400 }, { "epoch": 14.22, "grad_norm": 0.6328125, "learning_rate": 0.00027035015761689104, "loss": 0.1692, "step": 343410 }, { "epoch": 14.22, "grad_norm": 0.423828125, "learning_rate": 0.0002703393483931172, "loss": 0.1955, "step": 343420 }, { "epoch": 14.22, "grad_norm": 0.68359375, "learning_rate": 0.000270328539131067, "loss": 0.2026, "step": 343430 }, { "epoch": 14.23, "grad_norm": 1.015625, "learning_rate": 0.0002703177298307608, "loss": 0.2011, "step": 343440 }, { "epoch": 14.23, "grad_norm": 0.90625, "learning_rate": 0.00027030692049221863, "loss": 0.1631, "step": 343450 }, { "epoch": 14.23, "grad_norm": 0.68359375, "learning_rate": 0.000270296111115461, "loss": 0.1841, "step": 343460 }, { "epoch": 14.23, "grad_norm": 0.6953125, "learning_rate": 0.0002702853017005085, "loss": 0.177, "step": 343470 }, { "epoch": 14.23, "grad_norm": 0.92578125, "learning_rate": 0.000270274492247381, "loss": 0.1824, "step": 343480 }, { "epoch": 14.23, "grad_norm": 1.5390625, "learning_rate": 0.0002702636827560993, "loss": 0.1505, "step": 343490 }, { "epoch": 14.23, "grad_norm": 0.6328125, "learning_rate": 0.00027025287322668347, "loss": 0.1906, "step": 343500 }, { "epoch": 14.23, "grad_norm": 0.89453125, "learning_rate": 0.00027024206365915387, "loss": 0.1723, "step": 343510 }, { "epoch": 14.23, "grad_norm": 0.494140625, "learning_rate": 0.000270231254053531, "loss": 0.1688, "step": 343520 }, { "epoch": 14.23, "grad_norm": 0.310546875, "learning_rate": 0.00027022044440983494, "loss": 0.1941, "step": 343530 }, { "epoch": 14.23, "grad_norm": 0.8828125, "learning_rate": 0.0002702096347280864, "loss": 0.178, "step": 343540 }, { "epoch": 14.23, "grad_norm": 0.0, "learning_rate": 0.00027019882500830536, "loss": 0.1958, "step": 343550 }, { "epoch": 14.23, "grad_norm": 0.82421875, "learning_rate": 0.00027018801525051226, "loss": 0.1943, "step": 343560 }, { "epoch": 14.23, "grad_norm": 2.4375, "learning_rate": 0.00027017720545472764, "loss": 0.1671, "step": 343570 }, { "epoch": 14.23, "grad_norm": 1.515625, "learning_rate": 0.0002701663956209716, "loss": 0.2152, "step": 343580 }, { "epoch": 14.23, "grad_norm": 1.65625, "learning_rate": 0.00027015558574926463, "loss": 0.1681, "step": 343590 }, { "epoch": 14.23, "grad_norm": 1.1015625, "learning_rate": 0.000270144775839627, "loss": 0.2198, "step": 343600 }, { "epoch": 14.23, "grad_norm": 0.439453125, "learning_rate": 0.0002701339658920791, "loss": 0.164, "step": 343610 }, { "epoch": 14.23, "grad_norm": 0.51953125, "learning_rate": 0.00027012315590664126, "loss": 0.2582, "step": 343620 }, { "epoch": 14.23, "grad_norm": 2.28125, "learning_rate": 0.0002701123458833338, "loss": 0.1954, "step": 343630 }, { "epoch": 14.23, "grad_norm": 0.8828125, "learning_rate": 0.000270101535822177, "loss": 0.1729, "step": 343640 }, { "epoch": 14.23, "grad_norm": 1.8984375, "learning_rate": 0.00027009072572319135, "loss": 0.22, "step": 343650 }, { "epoch": 14.23, "grad_norm": 0.84375, "learning_rate": 0.00027007991558639717, "loss": 0.2002, "step": 343660 }, { "epoch": 14.23, "grad_norm": 0.7578125, "learning_rate": 0.0002700691054118147, "loss": 0.1684, "step": 343670 }, { "epoch": 14.24, "grad_norm": 2.359375, "learning_rate": 0.0002700582951994644, "loss": 0.1368, "step": 343680 }, { "epoch": 14.24, "grad_norm": 0.9765625, "learning_rate": 0.00027004748494936646, "loss": 0.225, "step": 343690 }, { "epoch": 14.24, "grad_norm": 2.15625, "learning_rate": 0.00027003667466154134, "loss": 0.2085, "step": 343700 }, { "epoch": 14.24, "grad_norm": 0.77734375, "learning_rate": 0.0002700258643360094, "loss": 0.2265, "step": 343710 }, { "epoch": 14.24, "grad_norm": 1.1484375, "learning_rate": 0.00027001505397279094, "loss": 0.2484, "step": 343720 }, { "epoch": 14.24, "grad_norm": 2.015625, "learning_rate": 0.00027000424357190636, "loss": 0.1893, "step": 343730 }, { "epoch": 14.24, "grad_norm": 0.8203125, "learning_rate": 0.00026999343313337585, "loss": 0.1753, "step": 343740 }, { "epoch": 14.24, "grad_norm": 0.70703125, "learning_rate": 0.00026998262265721983, "loss": 0.1819, "step": 343750 }, { "epoch": 14.24, "grad_norm": 1.421875, "learning_rate": 0.0002699718121434588, "loss": 0.1479, "step": 343760 }, { "epoch": 14.24, "grad_norm": 1.1875, "learning_rate": 0.0002699610015921129, "loss": 0.221, "step": 343770 }, { "epoch": 14.24, "grad_norm": 0.59765625, "learning_rate": 0.0002699501910032026, "loss": 0.1973, "step": 343780 }, { "epoch": 14.24, "grad_norm": 1.296875, "learning_rate": 0.00026993938037674813, "loss": 0.2125, "step": 343790 }, { "epoch": 14.24, "grad_norm": 0.8828125, "learning_rate": 0.00026992856971276997, "loss": 0.1711, "step": 343800 }, { "epoch": 14.24, "grad_norm": 0.44140625, "learning_rate": 0.0002699177590112884, "loss": 0.2289, "step": 343810 }, { "epoch": 14.24, "grad_norm": 1.296875, "learning_rate": 0.0002699069482723237, "loss": 0.1856, "step": 343820 }, { "epoch": 14.24, "grad_norm": 0.5390625, "learning_rate": 0.0002698961374958963, "loss": 0.1865, "step": 343830 }, { "epoch": 14.24, "grad_norm": 0.515625, "learning_rate": 0.00026988532668202646, "loss": 0.212, "step": 343840 }, { "epoch": 14.24, "grad_norm": 0.0002841949462890625, "learning_rate": 0.00026987451583073465, "loss": 0.1631, "step": 343850 }, { "epoch": 14.24, "grad_norm": 0.890625, "learning_rate": 0.00026986370494204117, "loss": 0.1507, "step": 343860 }, { "epoch": 14.24, "grad_norm": 0.64453125, "learning_rate": 0.00026985289401596624, "loss": 0.2025, "step": 343870 }, { "epoch": 14.24, "grad_norm": 1.9921875, "learning_rate": 0.0002698420830525304, "loss": 0.1652, "step": 343880 }, { "epoch": 14.24, "grad_norm": 0.37109375, "learning_rate": 0.00026983127205175384, "loss": 0.1967, "step": 343890 }, { "epoch": 14.24, "grad_norm": 1.0390625, "learning_rate": 0.00026982046101365703, "loss": 0.2124, "step": 343900 }, { "epoch": 14.24, "grad_norm": 0.98828125, "learning_rate": 0.0002698096499382602, "loss": 0.1882, "step": 343910 }, { "epoch": 14.25, "grad_norm": 1.234375, "learning_rate": 0.0002697988388255838, "loss": 0.1575, "step": 343920 }, { "epoch": 14.25, "grad_norm": 1.875, "learning_rate": 0.00026978802767564804, "loss": 0.2208, "step": 343930 }, { "epoch": 14.25, "grad_norm": 0.984375, "learning_rate": 0.00026977721648847343, "loss": 0.146, "step": 343940 }, { "epoch": 14.25, "grad_norm": 0.47265625, "learning_rate": 0.0002697664052640802, "loss": 0.2087, "step": 343950 }, { "epoch": 14.25, "grad_norm": 0.82421875, "learning_rate": 0.00026975559400248874, "loss": 0.2085, "step": 343960 }, { "epoch": 14.25, "grad_norm": 0.90234375, "learning_rate": 0.00026974478270371937, "loss": 0.1811, "step": 343970 }, { "epoch": 14.25, "grad_norm": 1.234375, "learning_rate": 0.0002697339713677925, "loss": 0.1782, "step": 343980 }, { "epoch": 14.25, "grad_norm": 0.8984375, "learning_rate": 0.00026972315999472833, "loss": 0.2039, "step": 343990 }, { "epoch": 14.25, "grad_norm": 0.59375, "learning_rate": 0.0002697123485845474, "loss": 0.1803, "step": 344000 }, { "epoch": 14.25, "grad_norm": 0.95703125, "learning_rate": 0.0002697015371372699, "loss": 0.1694, "step": 344010 }, { "epoch": 14.25, "grad_norm": 0.90234375, "learning_rate": 0.0002696907256529163, "loss": 0.1824, "step": 344020 }, { "epoch": 14.25, "grad_norm": 0.8515625, "learning_rate": 0.0002696799141315068, "loss": 0.1885, "step": 344030 }, { "epoch": 14.25, "grad_norm": 0.859375, "learning_rate": 0.00026966910257306186, "loss": 0.2566, "step": 344040 }, { "epoch": 14.25, "grad_norm": 0.828125, "learning_rate": 0.0002696582909776018, "loss": 0.1791, "step": 344050 }, { "epoch": 14.25, "grad_norm": 3.046875, "learning_rate": 0.000269647479345147, "loss": 0.1713, "step": 344060 }, { "epoch": 14.25, "grad_norm": 0.99609375, "learning_rate": 0.0002696366676757177, "loss": 0.205, "step": 344070 }, { "epoch": 14.25, "grad_norm": 0.78125, "learning_rate": 0.0002696258559693343, "loss": 0.2095, "step": 344080 }, { "epoch": 14.25, "grad_norm": 0.484375, "learning_rate": 0.0002696150442260172, "loss": 0.1805, "step": 344090 }, { "epoch": 14.25, "grad_norm": 1.671875, "learning_rate": 0.0002696042324457867, "loss": 0.2184, "step": 344100 }, { "epoch": 14.25, "grad_norm": 0.52734375, "learning_rate": 0.00026959342062866313, "loss": 0.2107, "step": 344110 }, { "epoch": 14.25, "grad_norm": 0.83984375, "learning_rate": 0.0002695826087746669, "loss": 0.1338, "step": 344120 }, { "epoch": 14.25, "grad_norm": 0.73828125, "learning_rate": 0.0002695717968838183, "loss": 0.199, "step": 344130 }, { "epoch": 14.25, "grad_norm": 0.490234375, "learning_rate": 0.0002695609849561377, "loss": 0.2029, "step": 344140 }, { "epoch": 14.25, "grad_norm": 0.69921875, "learning_rate": 0.0002695501729916454, "loss": 0.1871, "step": 344150 }, { "epoch": 14.26, "grad_norm": 0.3828125, "learning_rate": 0.0002695393609903618, "loss": 0.1858, "step": 344160 }, { "epoch": 14.26, "grad_norm": 0.84375, "learning_rate": 0.0002695285489523073, "loss": 0.1769, "step": 344170 }, { "epoch": 14.26, "grad_norm": 0.18359375, "learning_rate": 0.0002695177368775021, "loss": 0.2156, "step": 344180 }, { "epoch": 14.26, "grad_norm": 1.5390625, "learning_rate": 0.0002695069247659667, "loss": 0.2173, "step": 344190 }, { "epoch": 14.26, "grad_norm": 0.73828125, "learning_rate": 0.0002694961126177213, "loss": 0.1885, "step": 344200 }, { "epoch": 14.26, "grad_norm": 0.9453125, "learning_rate": 0.0002694853004327863, "loss": 0.2053, "step": 344210 }, { "epoch": 14.26, "grad_norm": 0.50390625, "learning_rate": 0.00026947448821118216, "loss": 0.1992, "step": 344220 }, { "epoch": 14.26, "grad_norm": 1.0078125, "learning_rate": 0.00026946367595292905, "loss": 0.1747, "step": 344230 }, { "epoch": 14.26, "grad_norm": 0.435546875, "learning_rate": 0.0002694528636580474, "loss": 0.1564, "step": 344240 }, { "epoch": 14.26, "grad_norm": 0.7109375, "learning_rate": 0.00026944205132655764, "loss": 0.2018, "step": 344250 }, { "epoch": 14.26, "grad_norm": 0.296875, "learning_rate": 0.00026943123895848, "loss": 0.1783, "step": 344260 }, { "epoch": 14.26, "grad_norm": 1.1171875, "learning_rate": 0.0002694204265538349, "loss": 0.165, "step": 344270 }, { "epoch": 14.26, "grad_norm": 0.98828125, "learning_rate": 0.0002694096141126426, "loss": 0.2029, "step": 344280 }, { "epoch": 14.26, "grad_norm": 1.390625, "learning_rate": 0.00026939880163492355, "loss": 0.1816, "step": 344290 }, { "epoch": 14.26, "grad_norm": 0.77734375, "learning_rate": 0.000269387989120698, "loss": 0.2089, "step": 344300 }, { "epoch": 14.26, "grad_norm": 0.53515625, "learning_rate": 0.00026937717656998634, "loss": 0.1344, "step": 344310 }, { "epoch": 14.26, "grad_norm": 0.0, "learning_rate": 0.000269366363982809, "loss": 0.1505, "step": 344320 }, { "epoch": 14.26, "grad_norm": 0.66796875, "learning_rate": 0.0002693555513591862, "loss": 0.1845, "step": 344330 }, { "epoch": 14.26, "grad_norm": 0.9765625, "learning_rate": 0.00026934473869913835, "loss": 0.1823, "step": 344340 }, { "epoch": 14.26, "grad_norm": 2.34375, "learning_rate": 0.00026933392600268584, "loss": 0.2475, "step": 344350 }, { "epoch": 14.26, "grad_norm": 0.7109375, "learning_rate": 0.00026932311326984893, "loss": 0.2632, "step": 344360 }, { "epoch": 14.26, "grad_norm": 0.625, "learning_rate": 0.000269312300500648, "loss": 0.2212, "step": 344370 }, { "epoch": 14.26, "grad_norm": 1.59375, "learning_rate": 0.00026930148769510343, "loss": 0.1874, "step": 344380 }, { "epoch": 14.26, "grad_norm": 2.40625, "learning_rate": 0.00026929067485323546, "loss": 0.2023, "step": 344390 }, { "epoch": 14.27, "grad_norm": 1.2109375, "learning_rate": 0.0002692798619750646, "loss": 0.1691, "step": 344400 }, { "epoch": 14.27, "grad_norm": 0.80859375, "learning_rate": 0.00026926904906061114, "loss": 0.1666, "step": 344410 }, { "epoch": 14.27, "grad_norm": 0.65625, "learning_rate": 0.00026925823610989534, "loss": 0.2346, "step": 344420 }, { "epoch": 14.27, "grad_norm": 1.4375, "learning_rate": 0.0002692474231229377, "loss": 0.2312, "step": 344430 }, { "epoch": 14.27, "grad_norm": 0.408203125, "learning_rate": 0.00026923661009975843, "loss": 0.2302, "step": 344440 }, { "epoch": 14.27, "grad_norm": 0.5390625, "learning_rate": 0.000269225797040378, "loss": 0.193, "step": 344450 }, { "epoch": 14.27, "grad_norm": 0.87109375, "learning_rate": 0.0002692149839448166, "loss": 0.1462, "step": 344460 }, { "epoch": 14.27, "grad_norm": 0.703125, "learning_rate": 0.0002692041708130947, "loss": 0.1675, "step": 344470 }, { "epoch": 14.27, "grad_norm": 0.443359375, "learning_rate": 0.00026919335764523275, "loss": 0.1512, "step": 344480 }, { "epoch": 14.27, "grad_norm": 0.55078125, "learning_rate": 0.0002691825444412508, "loss": 0.1535, "step": 344490 }, { "epoch": 14.27, "grad_norm": 1.125, "learning_rate": 0.00026917173120116945, "loss": 0.1906, "step": 344500 }, { "epoch": 14.27, "grad_norm": 0.859375, "learning_rate": 0.000269160917925009, "loss": 0.1846, "step": 344510 }, { "epoch": 14.27, "grad_norm": 0.80859375, "learning_rate": 0.00026915010461278963, "loss": 0.1814, "step": 344520 }, { "epoch": 14.27, "grad_norm": 0.7890625, "learning_rate": 0.000269139291264532, "loss": 0.1619, "step": 344530 }, { "epoch": 14.27, "grad_norm": 0.734375, "learning_rate": 0.0002691284778802563, "loss": 0.1982, "step": 344540 }, { "epoch": 14.27, "grad_norm": 0.70703125, "learning_rate": 0.0002691176644599828, "loss": 0.1947, "step": 344550 }, { "epoch": 14.27, "grad_norm": 1.0, "learning_rate": 0.0002691068510037319, "loss": 0.1551, "step": 344560 }, { "epoch": 14.27, "grad_norm": 0.91796875, "learning_rate": 0.000269096037511524, "loss": 0.224, "step": 344570 }, { "epoch": 14.27, "grad_norm": 0.73046875, "learning_rate": 0.00026908522398337944, "loss": 0.1668, "step": 344580 }, { "epoch": 14.27, "grad_norm": 1.0625, "learning_rate": 0.0002690744104193185, "loss": 0.2027, "step": 344590 }, { "epoch": 14.27, "grad_norm": 0.703125, "learning_rate": 0.00026906359681936156, "loss": 0.1992, "step": 344600 }, { "epoch": 14.27, "grad_norm": 0.7421875, "learning_rate": 0.00026905278318352903, "loss": 0.2117, "step": 344610 }, { "epoch": 14.27, "grad_norm": 0.50390625, "learning_rate": 0.0002690419695118413, "loss": 0.2048, "step": 344620 }, { "epoch": 14.27, "grad_norm": 1.15625, "learning_rate": 0.0002690311558043185, "loss": 0.1926, "step": 344630 }, { "epoch": 14.27, "grad_norm": 1.65625, "learning_rate": 0.00026902034206098123, "loss": 0.1836, "step": 344640 }, { "epoch": 14.28, "grad_norm": 0.88671875, "learning_rate": 0.00026900952828184965, "loss": 0.2104, "step": 344650 }, { "epoch": 14.28, "grad_norm": 1.625, "learning_rate": 0.0002689987144669442, "loss": 0.1949, "step": 344660 }, { "epoch": 14.28, "grad_norm": 1.0859375, "learning_rate": 0.0002689879006162853, "loss": 0.1854, "step": 344670 }, { "epoch": 14.28, "grad_norm": 0.3671875, "learning_rate": 0.0002689770867298931, "loss": 0.1892, "step": 344680 }, { "epoch": 14.28, "grad_norm": 0.7578125, "learning_rate": 0.00026896627280778817, "loss": 0.2096, "step": 344690 }, { "epoch": 14.28, "grad_norm": 1.1328125, "learning_rate": 0.0002689554588499908, "loss": 0.2545, "step": 344700 }, { "epoch": 14.28, "grad_norm": 2.1875, "learning_rate": 0.0002689446448565212, "loss": 0.201, "step": 344710 }, { "epoch": 14.28, "grad_norm": 0.73046875, "learning_rate": 0.0002689338308273999, "loss": 0.1894, "step": 344720 }, { "epoch": 14.28, "grad_norm": 0.546875, "learning_rate": 0.00026892301676264706, "loss": 0.2247, "step": 344730 }, { "epoch": 14.28, "grad_norm": 0.484375, "learning_rate": 0.0002689122026622832, "loss": 0.2013, "step": 344740 }, { "epoch": 14.28, "grad_norm": 0.6484375, "learning_rate": 0.00026890138852632867, "loss": 0.231, "step": 344750 }, { "epoch": 14.28, "grad_norm": 0.74609375, "learning_rate": 0.00026889057435480366, "loss": 0.2257, "step": 344760 }, { "epoch": 14.28, "grad_norm": 0.482421875, "learning_rate": 0.00026887976014772875, "loss": 0.2425, "step": 344770 }, { "epoch": 14.28, "grad_norm": 0.9140625, "learning_rate": 0.00026886894590512415, "loss": 0.1753, "step": 344780 }, { "epoch": 14.28, "grad_norm": 1.3125, "learning_rate": 0.00026885813162701015, "loss": 0.1693, "step": 344790 }, { "epoch": 14.28, "grad_norm": 1.5234375, "learning_rate": 0.0002688473173134073, "loss": 0.2082, "step": 344800 }, { "epoch": 14.28, "grad_norm": 0.63671875, "learning_rate": 0.00026883650296433576, "loss": 0.2178, "step": 344810 }, { "epoch": 14.28, "grad_norm": 1.1015625, "learning_rate": 0.00026882568857981593, "loss": 0.2269, "step": 344820 }, { "epoch": 14.28, "grad_norm": 0.1943359375, "learning_rate": 0.0002688148741598683, "loss": 0.1945, "step": 344830 }, { "epoch": 14.28, "grad_norm": 0.375, "learning_rate": 0.00026880405970451296, "loss": 0.146, "step": 344840 }, { "epoch": 14.28, "grad_norm": 0.41015625, "learning_rate": 0.00026879324521377054, "loss": 0.1869, "step": 344850 }, { "epoch": 14.28, "grad_norm": 0.3671875, "learning_rate": 0.00026878243068766117, "loss": 0.1823, "step": 344860 }, { "epoch": 14.28, "grad_norm": 0.2890625, "learning_rate": 0.00026877161612620534, "loss": 0.1659, "step": 344870 }, { "epoch": 14.28, "grad_norm": 1.0625, "learning_rate": 0.00026876080152942334, "loss": 0.1399, "step": 344880 }, { "epoch": 14.29, "grad_norm": 0.6796875, "learning_rate": 0.00026874998689733554, "loss": 0.1918, "step": 344890 }, { "epoch": 14.29, "grad_norm": 0.984375, "learning_rate": 0.0002687391722299623, "loss": 0.2162, "step": 344900 }, { "epoch": 14.29, "grad_norm": 0.95703125, "learning_rate": 0.000268728357527324, "loss": 0.1599, "step": 344910 }, { "epoch": 14.29, "grad_norm": 0.9609375, "learning_rate": 0.0002687175427894408, "loss": 0.1698, "step": 344920 }, { "epoch": 14.29, "grad_norm": 1.1171875, "learning_rate": 0.0002687067280163334, "loss": 0.2247, "step": 344930 }, { "epoch": 14.29, "grad_norm": 1.1796875, "learning_rate": 0.00026869591320802184, "loss": 0.157, "step": 344940 }, { "epoch": 14.29, "grad_norm": 0.8671875, "learning_rate": 0.0002686850983645266, "loss": 0.194, "step": 344950 }, { "epoch": 14.29, "grad_norm": 1.484375, "learning_rate": 0.0002686742834858681, "loss": 0.1967, "step": 344960 }, { "epoch": 14.29, "grad_norm": 1.109375, "learning_rate": 0.00026866346857206655, "loss": 0.2034, "step": 344970 }, { "epoch": 14.29, "grad_norm": 0.71875, "learning_rate": 0.00026865265362314235, "loss": 0.1876, "step": 344980 }, { "epoch": 14.29, "grad_norm": 1.328125, "learning_rate": 0.0002686418386391159, "loss": 0.1797, "step": 344990 }, { "epoch": 14.29, "grad_norm": 0.96875, "learning_rate": 0.00026863102362000745, "loss": 0.2006, "step": 345000 }, { "epoch": 14.29, "grad_norm": 1.234375, "learning_rate": 0.0002686202085658376, "loss": 0.1716, "step": 345010 }, { "epoch": 14.29, "grad_norm": 0.40234375, "learning_rate": 0.0002686093934766264, "loss": 0.1592, "step": 345020 }, { "epoch": 14.29, "grad_norm": 0.8671875, "learning_rate": 0.00026859857835239434, "loss": 0.1923, "step": 345030 }, { "epoch": 14.29, "grad_norm": 1.7890625, "learning_rate": 0.0002685877631931618, "loss": 0.1761, "step": 345040 }, { "epoch": 14.29, "grad_norm": 0.75, "learning_rate": 0.00026857694799894906, "loss": 0.2104, "step": 345050 }, { "epoch": 14.29, "grad_norm": 0.6953125, "learning_rate": 0.0002685661327697765, "loss": 0.1465, "step": 345060 }, { "epoch": 14.29, "grad_norm": 0.91796875, "learning_rate": 0.00026855531750566447, "loss": 0.1541, "step": 345070 }, { "epoch": 14.29, "grad_norm": 1.421875, "learning_rate": 0.0002685445022066334, "loss": 0.1998, "step": 345080 }, { "epoch": 14.29, "grad_norm": 0.78125, "learning_rate": 0.00026853368687270357, "loss": 0.1977, "step": 345090 }, { "epoch": 14.29, "grad_norm": 0.6953125, "learning_rate": 0.0002685228715038953, "loss": 0.2069, "step": 345100 }, { "epoch": 14.29, "grad_norm": 0.91015625, "learning_rate": 0.000268512056100229, "loss": 0.1746, "step": 345110 }, { "epoch": 14.29, "grad_norm": 0.59375, "learning_rate": 0.0002685012406617251, "loss": 0.1975, "step": 345120 }, { "epoch": 14.3, "grad_norm": 1.4921875, "learning_rate": 0.00026849042518840377, "loss": 0.2094, "step": 345130 }, { "epoch": 14.3, "grad_norm": 1.2734375, "learning_rate": 0.00026847960968028544, "loss": 0.2266, "step": 345140 }, { "epoch": 14.3, "grad_norm": 0.76953125, "learning_rate": 0.0002684687941373905, "loss": 0.1868, "step": 345150 }, { "epoch": 14.3, "grad_norm": 1.140625, "learning_rate": 0.0002684579785597393, "loss": 0.1921, "step": 345160 }, { "epoch": 14.3, "grad_norm": 1.3984375, "learning_rate": 0.0002684471629473522, "loss": 0.2433, "step": 345170 }, { "epoch": 14.3, "grad_norm": 0.53125, "learning_rate": 0.00026843634730024953, "loss": 0.1993, "step": 345180 }, { "epoch": 14.3, "grad_norm": 0.6640625, "learning_rate": 0.00026842553161845155, "loss": 0.2312, "step": 345190 }, { "epoch": 14.3, "grad_norm": 1.1015625, "learning_rate": 0.00026841471590197883, "loss": 0.2114, "step": 345200 }, { "epoch": 14.3, "grad_norm": 0.58203125, "learning_rate": 0.00026840390015085155, "loss": 0.1717, "step": 345210 }, { "epoch": 14.3, "grad_norm": 1.046875, "learning_rate": 0.0002683930843650901, "loss": 0.1439, "step": 345220 }, { "epoch": 14.3, "grad_norm": 0.55859375, "learning_rate": 0.00026838226854471485, "loss": 0.1704, "step": 345230 }, { "epoch": 14.3, "grad_norm": 0.59765625, "learning_rate": 0.00026837145268974614, "loss": 0.17, "step": 345240 }, { "epoch": 14.3, "grad_norm": 0.55078125, "learning_rate": 0.00026836063680020446, "loss": 0.2093, "step": 345250 }, { "epoch": 14.3, "grad_norm": 1.390625, "learning_rate": 0.00026834982087610995, "loss": 0.2209, "step": 345260 }, { "epoch": 14.3, "grad_norm": 0.625, "learning_rate": 0.00026833900491748307, "loss": 0.1475, "step": 345270 }, { "epoch": 14.3, "grad_norm": 0.376953125, "learning_rate": 0.00026832818892434413, "loss": 0.1956, "step": 345280 }, { "epoch": 14.3, "grad_norm": 1.40625, "learning_rate": 0.0002683173728967136, "loss": 0.2227, "step": 345290 }, { "epoch": 14.3, "grad_norm": 0.69921875, "learning_rate": 0.0002683065568346117, "loss": 0.2221, "step": 345300 }, { "epoch": 14.3, "grad_norm": 0.9453125, "learning_rate": 0.00026829574073805883, "loss": 0.2266, "step": 345310 }, { "epoch": 14.3, "grad_norm": 1.046875, "learning_rate": 0.00026828492460707535, "loss": 0.1768, "step": 345320 }, { "epoch": 14.3, "grad_norm": 0.478515625, "learning_rate": 0.0002682741084416817, "loss": 0.1777, "step": 345330 }, { "epoch": 14.3, "grad_norm": 0.373046875, "learning_rate": 0.00026826329224189803, "loss": 0.2562, "step": 345340 }, { "epoch": 14.3, "grad_norm": 0.57421875, "learning_rate": 0.00026825247600774485, "loss": 0.1865, "step": 345350 }, { "epoch": 14.3, "grad_norm": 1.171875, "learning_rate": 0.00026824165973924247, "loss": 0.1589, "step": 345360 }, { "epoch": 14.31, "grad_norm": 0.76953125, "learning_rate": 0.0002682308434364113, "loss": 0.1942, "step": 345370 }, { "epoch": 14.31, "grad_norm": 0.8828125, "learning_rate": 0.00026822002709927167, "loss": 0.1933, "step": 345380 }, { "epoch": 14.31, "grad_norm": 0.5078125, "learning_rate": 0.00026820921072784386, "loss": 0.151, "step": 345390 }, { "epoch": 14.31, "grad_norm": 1.59375, "learning_rate": 0.0002681983943221483, "loss": 0.1798, "step": 345400 }, { "epoch": 14.31, "grad_norm": 1.09375, "learning_rate": 0.00026818757788220533, "loss": 0.2091, "step": 345410 }, { "epoch": 14.31, "grad_norm": 0.87109375, "learning_rate": 0.0002681767614080353, "loss": 0.1777, "step": 345420 }, { "epoch": 14.31, "grad_norm": 0.69140625, "learning_rate": 0.0002681659448996586, "loss": 0.1778, "step": 345430 }, { "epoch": 14.31, "grad_norm": 0.78515625, "learning_rate": 0.00026815512835709547, "loss": 0.239, "step": 345440 }, { "epoch": 14.31, "grad_norm": 0.3671875, "learning_rate": 0.0002681443117803664, "loss": 0.1956, "step": 345450 }, { "epoch": 14.31, "grad_norm": 1.9140625, "learning_rate": 0.00026813349516949177, "loss": 0.1729, "step": 345460 }, { "epoch": 14.31, "grad_norm": 1.0234375, "learning_rate": 0.0002681226785244917, "loss": 0.1976, "step": 345470 }, { "epoch": 14.31, "grad_norm": 0.7421875, "learning_rate": 0.00026811186184538685, "loss": 0.1903, "step": 345480 }, { "epoch": 14.31, "grad_norm": 0.369140625, "learning_rate": 0.0002681010451321974, "loss": 0.187, "step": 345490 }, { "epoch": 14.31, "grad_norm": 0.50390625, "learning_rate": 0.00026809022838494373, "loss": 0.1781, "step": 345500 }, { "epoch": 14.31, "grad_norm": 0.87109375, "learning_rate": 0.0002680794116036462, "loss": 0.1761, "step": 345510 }, { "epoch": 14.31, "grad_norm": 1.0078125, "learning_rate": 0.00026806859478832514, "loss": 0.1699, "step": 345520 }, { "epoch": 14.31, "grad_norm": 0.365234375, "learning_rate": 0.00026805777793900085, "loss": 0.1985, "step": 345530 }, { "epoch": 14.31, "grad_norm": 0.71484375, "learning_rate": 0.000268046961055694, "loss": 0.2264, "step": 345540 }, { "epoch": 14.31, "grad_norm": 1.1640625, "learning_rate": 0.00026803614413842454, "loss": 0.214, "step": 345550 }, { "epoch": 14.31, "grad_norm": 1.3828125, "learning_rate": 0.00026802532718721306, "loss": 0.2317, "step": 345560 }, { "epoch": 14.31, "grad_norm": 0.93359375, "learning_rate": 0.0002680145102020798, "loss": 0.2395, "step": 345570 }, { "epoch": 14.31, "grad_norm": 0.5234375, "learning_rate": 0.0002680036931830453, "loss": 0.1843, "step": 345580 }, { "epoch": 14.31, "grad_norm": 0.7109375, "learning_rate": 0.00026799287613012975, "loss": 0.1987, "step": 345590 }, { "epoch": 14.31, "grad_norm": 1.328125, "learning_rate": 0.0002679820590433535, "loss": 0.1832, "step": 345600 }, { "epoch": 14.32, "grad_norm": 0.828125, "learning_rate": 0.000267971241922737, "loss": 0.1738, "step": 345610 }, { "epoch": 14.32, "grad_norm": 1.8515625, "learning_rate": 0.00026796042476830053, "loss": 0.1899, "step": 345620 }, { "epoch": 14.32, "grad_norm": 0.5703125, "learning_rate": 0.0002679496075800645, "loss": 0.2453, "step": 345630 }, { "epoch": 14.32, "grad_norm": 0.6015625, "learning_rate": 0.00026793879035804925, "loss": 0.1792, "step": 345640 }, { "epoch": 14.32, "grad_norm": 0.59765625, "learning_rate": 0.00026792797310227514, "loss": 0.1817, "step": 345650 }, { "epoch": 14.32, "grad_norm": 0.7734375, "learning_rate": 0.00026791715581276254, "loss": 0.1791, "step": 345660 }, { "epoch": 14.32, "grad_norm": 0.5625, "learning_rate": 0.00026790633848953175, "loss": 0.17, "step": 345670 }, { "epoch": 14.32, "grad_norm": 0.482421875, "learning_rate": 0.0002678955211326032, "loss": 0.1991, "step": 345680 }, { "epoch": 14.32, "grad_norm": 0.60546875, "learning_rate": 0.00026788470374199716, "loss": 0.1954, "step": 345690 }, { "epoch": 14.32, "grad_norm": 0.71484375, "learning_rate": 0.000267873886317734, "loss": 0.1877, "step": 345700 }, { "epoch": 14.32, "grad_norm": 0.455078125, "learning_rate": 0.00026786306885983425, "loss": 0.219, "step": 345710 }, { "epoch": 14.32, "grad_norm": 0.494140625, "learning_rate": 0.00026785225136831804, "loss": 0.1748, "step": 345720 }, { "epoch": 14.32, "grad_norm": 0.416015625, "learning_rate": 0.0002678414338432059, "loss": 0.1488, "step": 345730 }, { "epoch": 14.32, "grad_norm": 0.6875, "learning_rate": 0.00026783061628451804, "loss": 0.1748, "step": 345740 }, { "epoch": 14.32, "grad_norm": 0.314453125, "learning_rate": 0.0002678197986922749, "loss": 0.2275, "step": 345750 }, { "epoch": 14.32, "grad_norm": 0.77734375, "learning_rate": 0.00026780898106649683, "loss": 0.1445, "step": 345760 }, { "epoch": 14.32, "grad_norm": 0.69140625, "learning_rate": 0.00026779816340720414, "loss": 0.1861, "step": 345770 }, { "epoch": 14.32, "grad_norm": 0.50390625, "learning_rate": 0.0002677873457144173, "loss": 0.2571, "step": 345780 }, { "epoch": 14.32, "grad_norm": 1.140625, "learning_rate": 0.00026777652798815657, "loss": 0.2324, "step": 345790 }, { "epoch": 14.32, "grad_norm": 0.53125, "learning_rate": 0.00026776571022844224, "loss": 0.1917, "step": 345800 }, { "epoch": 14.32, "grad_norm": 0.88671875, "learning_rate": 0.00026775489243529486, "loss": 0.2144, "step": 345810 }, { "epoch": 14.32, "grad_norm": 0.6640625, "learning_rate": 0.0002677440746087347, "loss": 0.1702, "step": 345820 }, { "epoch": 14.32, "grad_norm": 1.0, "learning_rate": 0.000267733256748782, "loss": 0.1743, "step": 345830 }, { "epoch": 14.32, "grad_norm": 1.4921875, "learning_rate": 0.00026772243885545737, "loss": 0.1953, "step": 345840 }, { "epoch": 14.33, "grad_norm": 1.015625, "learning_rate": 0.000267711620928781, "loss": 0.1821, "step": 345850 }, { "epoch": 14.33, "grad_norm": 3.40625, "learning_rate": 0.0002677008029687732, "loss": 0.1843, "step": 345860 }, { "epoch": 14.33, "grad_norm": 1.609375, "learning_rate": 0.0002676899849754545, "loss": 0.2041, "step": 345870 }, { "epoch": 14.33, "grad_norm": 0.70703125, "learning_rate": 0.00026767916694884504, "loss": 0.17, "step": 345880 }, { "epoch": 14.33, "grad_norm": 0.59375, "learning_rate": 0.00026766834888896534, "loss": 0.1779, "step": 345890 }, { "epoch": 14.33, "grad_norm": 1.171875, "learning_rate": 0.00026765753079583575, "loss": 0.2307, "step": 345900 }, { "epoch": 14.33, "grad_norm": 0.64453125, "learning_rate": 0.0002676467126694765, "loss": 0.1604, "step": 345910 }, { "epoch": 14.33, "grad_norm": 0.60546875, "learning_rate": 0.00026763589450990816, "loss": 0.1851, "step": 345920 }, { "epoch": 14.33, "grad_norm": 1.28125, "learning_rate": 0.00026762507631715096, "loss": 0.2041, "step": 345930 }, { "epoch": 14.33, "grad_norm": 0.86328125, "learning_rate": 0.0002676142580912252, "loss": 0.2044, "step": 345940 }, { "epoch": 14.33, "grad_norm": 0.87890625, "learning_rate": 0.0002676034398321514, "loss": 0.1908, "step": 345950 }, { "epoch": 14.33, "grad_norm": 0.67578125, "learning_rate": 0.00026759262153994975, "loss": 0.2145, "step": 345960 }, { "epoch": 14.33, "grad_norm": 0.46875, "learning_rate": 0.00026758180321464066, "loss": 0.1806, "step": 345970 }, { "epoch": 14.33, "grad_norm": 1.265625, "learning_rate": 0.0002675709848562446, "loss": 0.215, "step": 345980 }, { "epoch": 14.33, "grad_norm": 1.078125, "learning_rate": 0.0002675601664647817, "loss": 0.2214, "step": 345990 }, { "epoch": 14.33, "grad_norm": 0.55859375, "learning_rate": 0.00026754934804027266, "loss": 0.1904, "step": 346000 }, { "epoch": 14.33, "grad_norm": 1.765625, "learning_rate": 0.0002675385295827375, "loss": 0.2351, "step": 346010 }, { "epoch": 14.33, "grad_norm": 0.87109375, "learning_rate": 0.0002675277110921968, "loss": 0.1724, "step": 346020 }, { "epoch": 14.33, "grad_norm": 0.5703125, "learning_rate": 0.00026751689256867086, "loss": 0.1703, "step": 346030 }, { "epoch": 14.33, "grad_norm": 0.8359375, "learning_rate": 0.00026750607401217983, "loss": 0.1676, "step": 346040 }, { "epoch": 14.33, "grad_norm": 1.09375, "learning_rate": 0.0002674952554227445, "loss": 0.1937, "step": 346050 }, { "epoch": 14.33, "grad_norm": 0.6484375, "learning_rate": 0.00026748443680038494, "loss": 0.193, "step": 346060 }, { "epoch": 14.33, "grad_norm": 0.7421875, "learning_rate": 0.0002674736181451214, "loss": 0.1921, "step": 346070 }, { "epoch": 14.33, "grad_norm": 0.294921875, "learning_rate": 0.0002674627994569746, "loss": 0.2152, "step": 346080 }, { "epoch": 14.34, "grad_norm": 1.078125, "learning_rate": 0.00026745198073596454, "loss": 0.1421, "step": 346090 }, { "epoch": 14.34, "grad_norm": 0.96875, "learning_rate": 0.0002674411619821118, "loss": 0.1814, "step": 346100 }, { "epoch": 14.34, "grad_norm": 1.3203125, "learning_rate": 0.0002674303431954367, "loss": 0.178, "step": 346110 }, { "epoch": 14.34, "grad_norm": 0.75390625, "learning_rate": 0.00026741952437595945, "loss": 0.2156, "step": 346120 }, { "epoch": 14.34, "grad_norm": 1.484375, "learning_rate": 0.0002674087055237007, "loss": 0.1374, "step": 346130 }, { "epoch": 14.34, "grad_norm": 0.431640625, "learning_rate": 0.00026739788663868066, "loss": 0.1794, "step": 346140 }, { "epoch": 14.34, "grad_norm": 0.58203125, "learning_rate": 0.0002673870677209195, "loss": 0.1813, "step": 346150 }, { "epoch": 14.34, "grad_norm": 0.546875, "learning_rate": 0.00026737624877043795, "loss": 0.1692, "step": 346160 }, { "epoch": 14.34, "grad_norm": 0.80078125, "learning_rate": 0.000267365429787256, "loss": 0.2095, "step": 346170 }, { "epoch": 14.34, "grad_norm": 0.33984375, "learning_rate": 0.00026735461077139426, "loss": 0.1944, "step": 346180 }, { "epoch": 14.34, "grad_norm": 0.734375, "learning_rate": 0.0002673437917228731, "loss": 0.1847, "step": 346190 }, { "epoch": 14.34, "grad_norm": 0.76171875, "learning_rate": 0.0002673329726417126, "loss": 0.1941, "step": 346200 }, { "epoch": 14.34, "grad_norm": 1.328125, "learning_rate": 0.00026732215352793354, "loss": 0.1223, "step": 346210 }, { "epoch": 14.34, "grad_norm": 0.318359375, "learning_rate": 0.000267311334381556, "loss": 0.1813, "step": 346220 }, { "epoch": 14.34, "grad_norm": 0.8203125, "learning_rate": 0.00026730051520260025, "loss": 0.1982, "step": 346230 }, { "epoch": 14.34, "grad_norm": 1.046875, "learning_rate": 0.00026728969599108696, "loss": 0.1526, "step": 346240 }, { "epoch": 14.34, "grad_norm": 0.62890625, "learning_rate": 0.00026727887674703627, "loss": 0.1468, "step": 346250 }, { "epoch": 14.34, "grad_norm": 1.234375, "learning_rate": 0.00026726805747046857, "loss": 0.1991, "step": 346260 }, { "epoch": 14.34, "grad_norm": 0.306640625, "learning_rate": 0.00026725723816140435, "loss": 0.2176, "step": 346270 }, { "epoch": 14.34, "grad_norm": 0.6015625, "learning_rate": 0.00026724641881986373, "loss": 0.1903, "step": 346280 }, { "epoch": 14.34, "grad_norm": 0.890625, "learning_rate": 0.00026723559944586735, "loss": 0.2284, "step": 346290 }, { "epoch": 14.34, "grad_norm": 0.54296875, "learning_rate": 0.0002672247800394354, "loss": 0.2125, "step": 346300 }, { "epoch": 14.34, "grad_norm": 1.453125, "learning_rate": 0.00026721396060058815, "loss": 0.2203, "step": 346310 }, { "epoch": 14.34, "grad_norm": 1.1328125, "learning_rate": 0.00026720314112934626, "loss": 0.1678, "step": 346320 }, { "epoch": 14.34, "grad_norm": 1.4609375, "learning_rate": 0.0002671923216257298, "loss": 0.2074, "step": 346330 }, { "epoch": 14.35, "grad_norm": 0.703125, "learning_rate": 0.00026718150208975927, "loss": 0.1825, "step": 346340 }, { "epoch": 14.35, "grad_norm": 0.796875, "learning_rate": 0.00026717068252145505, "loss": 0.1771, "step": 346350 }, { "epoch": 14.35, "grad_norm": 0.51171875, "learning_rate": 0.0002671598629208374, "loss": 0.1992, "step": 346360 }, { "epoch": 14.35, "grad_norm": 0.7109375, "learning_rate": 0.0002671490432879268, "loss": 0.1976, "step": 346370 }, { "epoch": 14.35, "grad_norm": 0.72265625, "learning_rate": 0.00026713822362274347, "loss": 0.206, "step": 346380 }, { "epoch": 14.35, "grad_norm": 0.419921875, "learning_rate": 0.00026712740392530795, "loss": 0.2122, "step": 346390 }, { "epoch": 14.35, "grad_norm": 0.0, "learning_rate": 0.00026711658419564045, "loss": 0.2292, "step": 346400 }, { "epoch": 14.35, "grad_norm": 1.484375, "learning_rate": 0.00026710576443376137, "loss": 0.1674, "step": 346410 }, { "epoch": 14.35, "grad_norm": 0.55078125, "learning_rate": 0.0002670949446396911, "loss": 0.1626, "step": 346420 }, { "epoch": 14.35, "grad_norm": 0.4453125, "learning_rate": 0.0002670841248134501, "loss": 0.2182, "step": 346430 }, { "epoch": 14.35, "grad_norm": 2.671875, "learning_rate": 0.00026707330495505843, "loss": 0.1964, "step": 346440 }, { "epoch": 14.35, "grad_norm": 1.34375, "learning_rate": 0.00026706248506453686, "loss": 0.2151, "step": 346450 }, { "epoch": 14.35, "grad_norm": 1.03125, "learning_rate": 0.0002670516651419054, "loss": 0.1583, "step": 346460 }, { "epoch": 14.35, "grad_norm": 0.3125, "learning_rate": 0.0002670408451871845, "loss": 0.1829, "step": 346470 }, { "epoch": 14.35, "grad_norm": 0.90625, "learning_rate": 0.0002670300252003947, "loss": 0.1639, "step": 346480 }, { "epoch": 14.35, "grad_norm": 0.859375, "learning_rate": 0.00026701920518155617, "loss": 0.1734, "step": 346490 }, { "epoch": 14.35, "grad_norm": 0.3125, "learning_rate": 0.00026700838513068933, "loss": 0.1705, "step": 346500 }, { "epoch": 14.35, "grad_norm": 1.859375, "learning_rate": 0.0002669975650478146, "loss": 0.1554, "step": 346510 }, { "epoch": 14.35, "grad_norm": 0.75, "learning_rate": 0.00026698674493295216, "loss": 0.1967, "step": 346520 }, { "epoch": 14.35, "grad_norm": 0.58203125, "learning_rate": 0.0002669759247861226, "loss": 0.1833, "step": 346530 }, { "epoch": 14.35, "grad_norm": 0.71484375, "learning_rate": 0.00026696510460734614, "loss": 0.2116, "step": 346540 }, { "epoch": 14.35, "grad_norm": 0.69140625, "learning_rate": 0.0002669542843966432, "loss": 0.2556, "step": 346550 }, { "epoch": 14.35, "grad_norm": 1.3203125, "learning_rate": 0.00026694346415403417, "loss": 0.2352, "step": 346560 }, { "epoch": 14.35, "grad_norm": 1.046875, "learning_rate": 0.0002669326438795393, "loss": 0.189, "step": 346570 }, { "epoch": 14.36, "grad_norm": 0.88671875, "learning_rate": 0.00026692182357317904, "loss": 0.1796, "step": 346580 }, { "epoch": 14.36, "grad_norm": 0.78125, "learning_rate": 0.00026691100323497375, "loss": 0.1821, "step": 346590 }, { "epoch": 14.36, "grad_norm": 0.0, "learning_rate": 0.00026690018286494376, "loss": 0.2196, "step": 346600 }, { "epoch": 14.36, "grad_norm": 0.494140625, "learning_rate": 0.0002668893624631096, "loss": 0.1831, "step": 346610 }, { "epoch": 14.36, "grad_norm": 0.69921875, "learning_rate": 0.0002668785420294913, "loss": 0.1758, "step": 346620 }, { "epoch": 14.36, "grad_norm": 0.486328125, "learning_rate": 0.0002668677215641094, "loss": 0.1248, "step": 346630 }, { "epoch": 14.36, "grad_norm": 1.34375, "learning_rate": 0.0002668569010669844, "loss": 0.2072, "step": 346640 }, { "epoch": 14.36, "grad_norm": 2.28125, "learning_rate": 0.0002668460805381365, "loss": 0.2099, "step": 346650 }, { "epoch": 14.36, "grad_norm": 0.77734375, "learning_rate": 0.000266835259977586, "loss": 0.1854, "step": 346660 }, { "epoch": 14.36, "grad_norm": 0.7578125, "learning_rate": 0.0002668244393853534, "loss": 0.1711, "step": 346670 }, { "epoch": 14.36, "grad_norm": 0.4375, "learning_rate": 0.00026681361876145903, "loss": 0.2074, "step": 346680 }, { "epoch": 14.36, "grad_norm": 1.5390625, "learning_rate": 0.0002668027981059233, "loss": 0.1596, "step": 346690 }, { "epoch": 14.36, "grad_norm": 1.1640625, "learning_rate": 0.0002667919774187665, "loss": 0.163, "step": 346700 }, { "epoch": 14.36, "grad_norm": 0.6953125, "learning_rate": 0.000266781156700009, "loss": 0.1627, "step": 346710 }, { "epoch": 14.36, "grad_norm": 1.203125, "learning_rate": 0.0002667703359496711, "loss": 0.2129, "step": 346720 }, { "epoch": 14.36, "grad_norm": 0.6640625, "learning_rate": 0.00026675951516777333, "loss": 0.1771, "step": 346730 }, { "epoch": 14.36, "grad_norm": 1.4921875, "learning_rate": 0.0002667486943543359, "loss": 0.1262, "step": 346740 }, { "epoch": 14.36, "grad_norm": 4.375, "learning_rate": 0.00026673787350937927, "loss": 0.2049, "step": 346750 }, { "epoch": 14.36, "grad_norm": 0.3359375, "learning_rate": 0.00026672705263292376, "loss": 0.2323, "step": 346760 }, { "epoch": 14.36, "grad_norm": 0.59765625, "learning_rate": 0.0002667162317249898, "loss": 0.2004, "step": 346770 }, { "epoch": 14.36, "grad_norm": 1.265625, "learning_rate": 0.00026670541078559765, "loss": 0.1955, "step": 346780 }, { "epoch": 14.36, "grad_norm": 1.4140625, "learning_rate": 0.0002666945898147677, "loss": 0.2349, "step": 346790 }, { "epoch": 14.36, "grad_norm": 0.69140625, "learning_rate": 0.0002666837688125203, "loss": 0.1731, "step": 346800 }, { "epoch": 14.36, "grad_norm": 0.69140625, "learning_rate": 0.00026667294777887597, "loss": 0.2077, "step": 346810 }, { "epoch": 14.37, "grad_norm": 1.4296875, "learning_rate": 0.0002666621267138549, "loss": 0.2033, "step": 346820 }, { "epoch": 14.37, "grad_norm": 0.9609375, "learning_rate": 0.00026665130561747747, "loss": 0.1877, "step": 346830 }, { "epoch": 14.37, "grad_norm": 1.1328125, "learning_rate": 0.00026664048448976407, "loss": 0.2028, "step": 346840 }, { "epoch": 14.37, "grad_norm": 2.765625, "learning_rate": 0.0002666296633307352, "loss": 0.2105, "step": 346850 }, { "epoch": 14.37, "grad_norm": 0.671875, "learning_rate": 0.00026661884214041094, "loss": 0.1946, "step": 346860 }, { "epoch": 14.37, "grad_norm": 0.90625, "learning_rate": 0.00026660802091881187, "loss": 0.1868, "step": 346870 }, { "epoch": 14.37, "grad_norm": 0.60546875, "learning_rate": 0.0002665971996659583, "loss": 0.1075, "step": 346880 }, { "epoch": 14.37, "grad_norm": 0.87890625, "learning_rate": 0.0002665863783818707, "loss": 0.1867, "step": 346890 }, { "epoch": 14.37, "grad_norm": 0.54296875, "learning_rate": 0.00026657555706656916, "loss": 0.1577, "step": 346900 }, { "epoch": 14.37, "grad_norm": 1.15625, "learning_rate": 0.0002665647357200743, "loss": 0.1719, "step": 346910 }, { "epoch": 14.37, "grad_norm": 0.8203125, "learning_rate": 0.0002665539143424063, "loss": 0.247, "step": 346920 }, { "epoch": 14.37, "grad_norm": 1.625, "learning_rate": 0.00026654309293358574, "loss": 0.2505, "step": 346930 }, { "epoch": 14.37, "grad_norm": 0.67578125, "learning_rate": 0.00026653227149363285, "loss": 0.1433, "step": 346940 }, { "epoch": 14.37, "grad_norm": 1.46875, "learning_rate": 0.000266521450022568, "loss": 0.1967, "step": 346950 }, { "epoch": 14.37, "grad_norm": 0.435546875, "learning_rate": 0.0002665106285204115, "loss": 0.191, "step": 346960 }, { "epoch": 14.37, "grad_norm": 1.1484375, "learning_rate": 0.0002664998069871839, "loss": 0.2152, "step": 346970 }, { "epoch": 14.37, "grad_norm": 0.50390625, "learning_rate": 0.00026648898542290533, "loss": 0.2103, "step": 346980 }, { "epoch": 14.37, "grad_norm": 2.484375, "learning_rate": 0.0002664781638275963, "loss": 0.1854, "step": 346990 }, { "epoch": 14.37, "grad_norm": 1.6796875, "learning_rate": 0.0002664673422012772, "loss": 0.1971, "step": 347000 }, { "epoch": 14.37, "grad_norm": 0.67578125, "learning_rate": 0.00026645652054396827, "loss": 0.1433, "step": 347010 }, { "epoch": 14.37, "grad_norm": 1.40625, "learning_rate": 0.00026644569885569, "loss": 0.1998, "step": 347020 }, { "epoch": 14.37, "grad_norm": 1.0, "learning_rate": 0.00026643487713646266, "loss": 0.1807, "step": 347030 }, { "epoch": 14.37, "grad_norm": 0.86328125, "learning_rate": 0.0002664240553863067, "loss": 0.1621, "step": 347040 }, { "epoch": 14.37, "grad_norm": 2.203125, "learning_rate": 0.0002664132336052424, "loss": 0.2428, "step": 347050 }, { "epoch": 14.38, "grad_norm": 0.73046875, "learning_rate": 0.00026640241179329017, "loss": 0.2113, "step": 347060 }, { "epoch": 14.38, "grad_norm": 0.7578125, "learning_rate": 0.0002663915899504704, "loss": 0.1844, "step": 347070 }, { "epoch": 14.38, "grad_norm": 0.404296875, "learning_rate": 0.00026638076807680337, "loss": 0.1967, "step": 347080 }, { "epoch": 14.38, "grad_norm": 1.0390625, "learning_rate": 0.0002663699461723095, "loss": 0.2113, "step": 347090 }, { "epoch": 14.38, "grad_norm": 1.0390625, "learning_rate": 0.00026635912423700933, "loss": 0.1773, "step": 347100 }, { "epoch": 14.38, "grad_norm": 0.466796875, "learning_rate": 0.0002663483022709229, "loss": 0.1876, "step": 347110 }, { "epoch": 14.38, "grad_norm": 0.96875, "learning_rate": 0.00026633748027407074, "loss": 0.1665, "step": 347120 }, { "epoch": 14.38, "grad_norm": 0.63671875, "learning_rate": 0.0002663266582464732, "loss": 0.1657, "step": 347130 }, { "epoch": 14.38, "grad_norm": 0.75, "learning_rate": 0.00026631583618815066, "loss": 0.2, "step": 347140 }, { "epoch": 14.38, "grad_norm": 0.62890625, "learning_rate": 0.00026630501409912355, "loss": 0.1765, "step": 347150 }, { "epoch": 14.38, "grad_norm": 0.953125, "learning_rate": 0.00026629419197941206, "loss": 0.1534, "step": 347160 }, { "epoch": 14.38, "grad_norm": 0.341796875, "learning_rate": 0.0002662833698290367, "loss": 0.2201, "step": 347170 }, { "epoch": 14.38, "grad_norm": 0.828125, "learning_rate": 0.0002662725476480179, "loss": 0.1795, "step": 347180 }, { "epoch": 14.38, "grad_norm": 0.87890625, "learning_rate": 0.0002662617254363758, "loss": 0.195, "step": 347190 }, { "epoch": 14.38, "grad_norm": 0.337890625, "learning_rate": 0.0002662509031941309, "loss": 0.1868, "step": 347200 }, { "epoch": 14.38, "grad_norm": 1.1171875, "learning_rate": 0.00026624008092130357, "loss": 0.2127, "step": 347210 }, { "epoch": 14.38, "grad_norm": 0.62890625, "learning_rate": 0.00026622925861791413, "loss": 0.189, "step": 347220 }, { "epoch": 14.38, "grad_norm": 0.353515625, "learning_rate": 0.0002662184362839831, "loss": 0.1826, "step": 347230 }, { "epoch": 14.38, "grad_norm": 1.1796875, "learning_rate": 0.0002662076139195306, "loss": 0.1678, "step": 347240 }, { "epoch": 14.38, "grad_norm": 1.515625, "learning_rate": 0.0002661967915245772, "loss": 0.1621, "step": 347250 }, { "epoch": 14.38, "grad_norm": 1.3671875, "learning_rate": 0.0002661859690991431, "loss": 0.2102, "step": 347260 }, { "epoch": 14.38, "grad_norm": 0.69921875, "learning_rate": 0.0002661751466432488, "loss": 0.1385, "step": 347270 }, { "epoch": 14.38, "grad_norm": 0.67578125, "learning_rate": 0.00026616432415691463, "loss": 0.2958, "step": 347280 }, { "epoch": 14.38, "grad_norm": 0.91796875, "learning_rate": 0.00026615350164016087, "loss": 0.167, "step": 347290 }, { "epoch": 14.39, "grad_norm": 0.4609375, "learning_rate": 0.00026614267909300804, "loss": 0.1717, "step": 347300 }, { "epoch": 14.39, "grad_norm": 1.625, "learning_rate": 0.0002661318565154765, "loss": 0.1841, "step": 347310 }, { "epoch": 14.39, "grad_norm": 1.0, "learning_rate": 0.00026612103390758645, "loss": 0.1468, "step": 347320 }, { "epoch": 14.39, "grad_norm": 1.15625, "learning_rate": 0.00026611021126935836, "loss": 0.1827, "step": 347330 }, { "epoch": 14.39, "grad_norm": 0.373046875, "learning_rate": 0.00026609938860081266, "loss": 0.1958, "step": 347340 }, { "epoch": 14.39, "grad_norm": 0.9375, "learning_rate": 0.0002660885659019695, "loss": 0.1593, "step": 347350 }, { "epoch": 14.39, "grad_norm": 2.40625, "learning_rate": 0.0002660777431728496, "loss": 0.2066, "step": 347360 }, { "epoch": 14.39, "grad_norm": 0.73828125, "learning_rate": 0.00026606692041347295, "loss": 0.2028, "step": 347370 }, { "epoch": 14.39, "grad_norm": 0.384765625, "learning_rate": 0.0002660560976238602, "loss": 0.2067, "step": 347380 }, { "epoch": 14.39, "grad_norm": 0.98046875, "learning_rate": 0.00026604527480403155, "loss": 0.2005, "step": 347390 }, { "epoch": 14.39, "grad_norm": 0.796875, "learning_rate": 0.00026603445195400743, "loss": 0.1665, "step": 347400 }, { "epoch": 14.39, "grad_norm": 0.921875, "learning_rate": 0.00026602362907380823, "loss": 0.177, "step": 347410 }, { "epoch": 14.39, "grad_norm": 0.8671875, "learning_rate": 0.0002660128061634543, "loss": 0.2037, "step": 347420 }, { "epoch": 14.39, "grad_norm": 0.59375, "learning_rate": 0.0002660019832229659, "loss": 0.195, "step": 347430 }, { "epoch": 14.39, "grad_norm": 0.82421875, "learning_rate": 0.00026599116025236366, "loss": 0.1876, "step": 347440 }, { "epoch": 14.39, "grad_norm": 0.8046875, "learning_rate": 0.00026598033725166764, "loss": 0.214, "step": 347450 }, { "epoch": 14.39, "grad_norm": 1.4140625, "learning_rate": 0.00026596951422089835, "loss": 0.1919, "step": 347460 }, { "epoch": 14.39, "grad_norm": 0.5703125, "learning_rate": 0.00026595869116007633, "loss": 0.2301, "step": 347470 }, { "epoch": 14.39, "grad_norm": 0.73046875, "learning_rate": 0.0002659478680692216, "loss": 0.1885, "step": 347480 }, { "epoch": 14.39, "grad_norm": 1.1875, "learning_rate": 0.0002659370449483548, "loss": 0.2101, "step": 347490 }, { "epoch": 14.39, "grad_norm": 2.46875, "learning_rate": 0.0002659262217974962, "loss": 0.241, "step": 347500 }, { "epoch": 14.39, "grad_norm": 0.80078125, "learning_rate": 0.000265915398616666, "loss": 0.2078, "step": 347510 }, { "epoch": 14.39, "grad_norm": 0.83203125, "learning_rate": 0.00026590457540588496, "loss": 0.1427, "step": 347520 }, { "epoch": 14.39, "grad_norm": 1.375, "learning_rate": 0.00026589375216517315, "loss": 0.2067, "step": 347530 }, { "epoch": 14.4, "grad_norm": 0.78515625, "learning_rate": 0.00026588292889455096, "loss": 0.1796, "step": 347540 }, { "epoch": 14.4, "grad_norm": 0.384765625, "learning_rate": 0.0002658721055940389, "loss": 0.1477, "step": 347550 }, { "epoch": 14.4, "grad_norm": 0.55078125, "learning_rate": 0.00026586128226365727, "loss": 0.1192, "step": 347560 }, { "epoch": 14.4, "grad_norm": 1.171875, "learning_rate": 0.0002658504589034263, "loss": 0.1809, "step": 347570 }, { "epoch": 14.4, "grad_norm": 1.0703125, "learning_rate": 0.0002658396355133666, "loss": 0.2165, "step": 347580 }, { "epoch": 14.4, "grad_norm": 1.015625, "learning_rate": 0.0002658288120934983, "loss": 0.1943, "step": 347590 }, { "epoch": 14.4, "grad_norm": 0.62890625, "learning_rate": 0.00026581798864384193, "loss": 0.2109, "step": 347600 }, { "epoch": 14.4, "grad_norm": 0.78515625, "learning_rate": 0.00026580716516441785, "loss": 0.1742, "step": 347610 }, { "epoch": 14.4, "grad_norm": 1.40625, "learning_rate": 0.00026579634165524636, "loss": 0.199, "step": 347620 }, { "epoch": 14.4, "grad_norm": 0.8046875, "learning_rate": 0.0002657855181163479, "loss": 0.1528, "step": 347630 }, { "epoch": 14.4, "grad_norm": 0.68359375, "learning_rate": 0.0002657746945477427, "loss": 0.2235, "step": 347640 }, { "epoch": 14.4, "grad_norm": 0.7734375, "learning_rate": 0.0002657638709494513, "loss": 0.1406, "step": 347650 }, { "epoch": 14.4, "grad_norm": 1.375, "learning_rate": 0.0002657530473214941, "loss": 0.1885, "step": 347660 }, { "epoch": 14.4, "grad_norm": 1.2421875, "learning_rate": 0.0002657422236638912, "loss": 0.1708, "step": 347670 }, { "epoch": 14.4, "grad_norm": 1.140625, "learning_rate": 0.0002657313999766632, "loss": 0.1956, "step": 347680 }, { "epoch": 14.4, "grad_norm": 1.640625, "learning_rate": 0.0002657205762598304, "loss": 0.1376, "step": 347690 }, { "epoch": 14.4, "grad_norm": 0.4140625, "learning_rate": 0.0002657097525134131, "loss": 0.173, "step": 347700 }, { "epoch": 14.4, "grad_norm": 0.53125, "learning_rate": 0.0002656989287374319, "loss": 0.1835, "step": 347710 }, { "epoch": 14.4, "grad_norm": 0.83203125, "learning_rate": 0.0002656881049319069, "loss": 0.1834, "step": 347720 }, { "epoch": 14.4, "grad_norm": 0.69921875, "learning_rate": 0.0002656772810968586, "loss": 0.1962, "step": 347730 }, { "epoch": 14.4, "grad_norm": 0.6875, "learning_rate": 0.00026566645723230737, "loss": 0.1868, "step": 347740 }, { "epoch": 14.4, "grad_norm": 1.28125, "learning_rate": 0.0002656556333382735, "loss": 0.19, "step": 347750 }, { "epoch": 14.4, "grad_norm": 0.63671875, "learning_rate": 0.0002656448094147775, "loss": 0.2101, "step": 347760 }, { "epoch": 14.4, "grad_norm": 0.890625, "learning_rate": 0.0002656339854618396, "loss": 0.1907, "step": 347770 }, { "epoch": 14.41, "grad_norm": 0.5546875, "learning_rate": 0.0002656231614794802, "loss": 0.1424, "step": 347780 }, { "epoch": 14.41, "grad_norm": 1.0703125, "learning_rate": 0.00026561233746771987, "loss": 0.2129, "step": 347790 }, { "epoch": 14.41, "grad_norm": 0.5703125, "learning_rate": 0.00026560151342657856, "loss": 0.1893, "step": 347800 }, { "epoch": 14.41, "grad_norm": 0.474609375, "learning_rate": 0.00026559068935607706, "loss": 0.1856, "step": 347810 }, { "epoch": 14.41, "grad_norm": 0.375, "learning_rate": 0.0002655798652562356, "loss": 0.1718, "step": 347820 }, { "epoch": 14.41, "grad_norm": 1.6796875, "learning_rate": 0.0002655690411270743, "loss": 0.1741, "step": 347830 }, { "epoch": 14.41, "grad_norm": 0.40625, "learning_rate": 0.0002655582169686139, "loss": 0.1981, "step": 347840 }, { "epoch": 14.41, "grad_norm": 0.734375, "learning_rate": 0.00026554739278087465, "loss": 0.1967, "step": 347850 }, { "epoch": 14.41, "grad_norm": 1.96875, "learning_rate": 0.00026553656856387683, "loss": 0.1286, "step": 347860 }, { "epoch": 14.41, "grad_norm": 0.87109375, "learning_rate": 0.00026552574431764085, "loss": 0.2551, "step": 347870 }, { "epoch": 14.41, "grad_norm": 0.6640625, "learning_rate": 0.0002655149200421871, "loss": 0.1637, "step": 347880 }, { "epoch": 14.41, "grad_norm": 1.125, "learning_rate": 0.00026550409573753597, "loss": 0.1956, "step": 347890 }, { "epoch": 14.41, "grad_norm": 1.859375, "learning_rate": 0.0002654932714037078, "loss": 0.2043, "step": 347900 }, { "epoch": 14.41, "grad_norm": 0.74609375, "learning_rate": 0.00026548244704072295, "loss": 0.1714, "step": 347910 }, { "epoch": 14.41, "grad_norm": 0.734375, "learning_rate": 0.0002654716226486018, "loss": 0.1622, "step": 347920 }, { "epoch": 14.41, "grad_norm": 0.330078125, "learning_rate": 0.00026546079822736476, "loss": 0.1111, "step": 347930 }, { "epoch": 14.41, "grad_norm": 1.15625, "learning_rate": 0.0002654499737770321, "loss": 0.2117, "step": 347940 }, { "epoch": 14.41, "grad_norm": 1.109375, "learning_rate": 0.0002654391492976244, "loss": 0.1836, "step": 347950 }, { "epoch": 14.41, "grad_norm": 0.64453125, "learning_rate": 0.0002654283247891617, "loss": 0.1897, "step": 347960 }, { "epoch": 14.41, "grad_norm": 1.125, "learning_rate": 0.00026541750025166474, "loss": 0.19, "step": 347970 }, { "epoch": 14.41, "grad_norm": 0.61328125, "learning_rate": 0.0002654066756851536, "loss": 0.174, "step": 347980 }, { "epoch": 14.41, "grad_norm": 1.7890625, "learning_rate": 0.00026539585108964876, "loss": 0.1773, "step": 347990 }, { "epoch": 14.41, "grad_norm": 2.15625, "learning_rate": 0.00026538502646517064, "loss": 0.1899, "step": 348000 }, { "epoch": 14.41, "grad_norm": 0.5546875, "learning_rate": 0.00026537420181173954, "loss": 0.2232, "step": 348010 }, { "epoch": 14.41, "grad_norm": 1.4140625, "learning_rate": 0.00026536337712937586, "loss": 0.215, "step": 348020 }, { "epoch": 14.42, "grad_norm": 0.640625, "learning_rate": 0.0002653525524181, "loss": 0.2244, "step": 348030 }, { "epoch": 14.42, "grad_norm": 0.9921875, "learning_rate": 0.0002653417276779321, "loss": 0.211, "step": 348040 }, { "epoch": 14.42, "grad_norm": 1.328125, "learning_rate": 0.0002653309029088929, "loss": 0.1767, "step": 348050 }, { "epoch": 14.42, "grad_norm": 0.51171875, "learning_rate": 0.0002653200781110026, "loss": 0.1587, "step": 348060 }, { "epoch": 14.42, "grad_norm": 0.275390625, "learning_rate": 0.00026530925328428155, "loss": 0.167, "step": 348070 }, { "epoch": 14.42, "grad_norm": 0.625, "learning_rate": 0.0002652984284287502, "loss": 0.2121, "step": 348080 }, { "epoch": 14.42, "grad_norm": 1.4765625, "learning_rate": 0.0002652876035444287, "loss": 0.2264, "step": 348090 }, { "epoch": 14.42, "grad_norm": 1.03125, "learning_rate": 0.0002652767786313377, "loss": 0.2065, "step": 348100 }, { "epoch": 14.42, "grad_norm": 0.69140625, "learning_rate": 0.0002652659536894974, "loss": 0.1857, "step": 348110 }, { "epoch": 14.42, "grad_norm": 1.3203125, "learning_rate": 0.0002652551287189282, "loss": 0.2264, "step": 348120 }, { "epoch": 14.42, "grad_norm": 2.375, "learning_rate": 0.0002652443037196506, "loss": 0.2537, "step": 348130 }, { "epoch": 14.42, "grad_norm": 0.76171875, "learning_rate": 0.0002652334786916848, "loss": 0.2182, "step": 348140 }, { "epoch": 14.42, "grad_norm": 0.6796875, "learning_rate": 0.0002652226536350512, "loss": 0.1908, "step": 348150 }, { "epoch": 14.42, "grad_norm": 0.376953125, "learning_rate": 0.0002652118285497703, "loss": 0.1712, "step": 348160 }, { "epoch": 14.42, "grad_norm": 0.58984375, "learning_rate": 0.00026520100343586234, "loss": 0.1895, "step": 348170 }, { "epoch": 14.42, "grad_norm": 1.2109375, "learning_rate": 0.00026519017829334775, "loss": 0.1818, "step": 348180 }, { "epoch": 14.42, "grad_norm": 0.6328125, "learning_rate": 0.0002651793531222468, "loss": 0.2069, "step": 348190 }, { "epoch": 14.42, "grad_norm": 2.453125, "learning_rate": 0.00026516852792258, "loss": 0.1831, "step": 348200 }, { "epoch": 14.42, "grad_norm": 1.375, "learning_rate": 0.0002651577026943677, "loss": 0.2159, "step": 348210 }, { "epoch": 14.42, "grad_norm": 0.8046875, "learning_rate": 0.0002651468774376302, "loss": 0.231, "step": 348220 }, { "epoch": 14.42, "grad_norm": 1.2578125, "learning_rate": 0.00026513605215238795, "loss": 0.2174, "step": 348230 }, { "epoch": 14.42, "grad_norm": 0.8125, "learning_rate": 0.0002651252268386613, "loss": 0.1935, "step": 348240 }, { "epoch": 14.42, "grad_norm": 0.6640625, "learning_rate": 0.00026511440149647056, "loss": 0.2314, "step": 348250 }, { "epoch": 14.42, "grad_norm": 1.21875, "learning_rate": 0.00026510357612583615, "loss": 0.2115, "step": 348260 }, { "epoch": 14.43, "grad_norm": 0.2001953125, "learning_rate": 0.0002650927507267784, "loss": 0.2046, "step": 348270 }, { "epoch": 14.43, "grad_norm": 0.99609375, "learning_rate": 0.0002650819252993178, "loss": 0.1992, "step": 348280 }, { "epoch": 14.43, "grad_norm": 1.234375, "learning_rate": 0.00026507109984347466, "loss": 0.2313, "step": 348290 }, { "epoch": 14.43, "grad_norm": 0.5859375, "learning_rate": 0.00026506027435926926, "loss": 0.1594, "step": 348300 }, { "epoch": 14.43, "grad_norm": 1.3984375, "learning_rate": 0.0002650494488467221, "loss": 0.2041, "step": 348310 }, { "epoch": 14.43, "grad_norm": 0.73828125, "learning_rate": 0.0002650386233058535, "loss": 0.1444, "step": 348320 }, { "epoch": 14.43, "grad_norm": 0.90625, "learning_rate": 0.0002650277977366838, "loss": 0.2129, "step": 348330 }, { "epoch": 14.43, "grad_norm": 0.68359375, "learning_rate": 0.00026501697213923345, "loss": 0.1728, "step": 348340 }, { "epoch": 14.43, "grad_norm": 1.4453125, "learning_rate": 0.00026500614651352275, "loss": 0.2081, "step": 348350 }, { "epoch": 14.43, "grad_norm": 0.86328125, "learning_rate": 0.0002649953208595721, "loss": 0.183, "step": 348360 }, { "epoch": 14.43, "grad_norm": 0.7421875, "learning_rate": 0.0002649844951774019, "loss": 0.1818, "step": 348370 }, { "epoch": 14.43, "grad_norm": 0.88671875, "learning_rate": 0.0002649736694670325, "loss": 0.2164, "step": 348380 }, { "epoch": 14.43, "grad_norm": 0.287109375, "learning_rate": 0.0002649628437284842, "loss": 0.2074, "step": 348390 }, { "epoch": 14.43, "grad_norm": 0.60546875, "learning_rate": 0.0002649520179617775, "loss": 0.172, "step": 348400 }, { "epoch": 14.43, "grad_norm": 1.0859375, "learning_rate": 0.0002649411921669328, "loss": 0.2123, "step": 348410 }, { "epoch": 14.43, "grad_norm": 2.171875, "learning_rate": 0.00026493036634397027, "loss": 0.195, "step": 348420 }, { "epoch": 14.43, "grad_norm": 0.5390625, "learning_rate": 0.0002649195404929104, "loss": 0.1751, "step": 348430 }, { "epoch": 14.43, "grad_norm": 1.2109375, "learning_rate": 0.00026490871461377365, "loss": 0.1405, "step": 348440 }, { "epoch": 14.43, "grad_norm": 0.36328125, "learning_rate": 0.00026489788870658027, "loss": 0.2005, "step": 348450 }, { "epoch": 14.43, "grad_norm": 0.71875, "learning_rate": 0.0002648870627713507, "loss": 0.183, "step": 348460 }, { "epoch": 14.43, "grad_norm": 0.77734375, "learning_rate": 0.00026487623680810523, "loss": 0.1797, "step": 348470 }, { "epoch": 14.43, "grad_norm": 0.67578125, "learning_rate": 0.00026486541081686434, "loss": 0.1492, "step": 348480 }, { "epoch": 14.43, "grad_norm": 0.388671875, "learning_rate": 0.00026485458479764837, "loss": 0.2184, "step": 348490 }, { "epoch": 14.43, "grad_norm": 0.8203125, "learning_rate": 0.00026484375875047755, "loss": 0.1225, "step": 348500 }, { "epoch": 14.44, "grad_norm": 0.7890625, "learning_rate": 0.00026483293267537243, "loss": 0.1619, "step": 348510 }, { "epoch": 14.44, "grad_norm": 1.09375, "learning_rate": 0.0002648221065723534, "loss": 0.2153, "step": 348520 }, { "epoch": 14.44, "grad_norm": 1.0390625, "learning_rate": 0.0002648112804414407, "loss": 0.1488, "step": 348530 }, { "epoch": 14.44, "grad_norm": 0.41796875, "learning_rate": 0.0002648004542826549, "loss": 0.1568, "step": 348540 }, { "epoch": 14.44, "grad_norm": 0.984375, "learning_rate": 0.0002647896280960161, "loss": 0.1871, "step": 348550 }, { "epoch": 14.44, "grad_norm": 0.70703125, "learning_rate": 0.00026477880188154485, "loss": 0.2054, "step": 348560 }, { "epoch": 14.44, "grad_norm": 0.97265625, "learning_rate": 0.00026476797563926155, "loss": 0.2002, "step": 348570 }, { "epoch": 14.44, "grad_norm": 1.15625, "learning_rate": 0.0002647571493691864, "loss": 0.2775, "step": 348580 }, { "epoch": 14.44, "grad_norm": 0.96484375, "learning_rate": 0.0002647463230713399, "loss": 0.1942, "step": 348590 }, { "epoch": 14.44, "grad_norm": 0.72265625, "learning_rate": 0.0002647354967457425, "loss": 0.1856, "step": 348600 }, { "epoch": 14.44, "grad_norm": 1.3125, "learning_rate": 0.0002647246703924144, "loss": 0.2003, "step": 348610 }, { "epoch": 14.44, "grad_norm": 1.484375, "learning_rate": 0.0002647138440113762, "loss": 0.1854, "step": 348620 }, { "epoch": 14.44, "grad_norm": 0.83203125, "learning_rate": 0.00026470301760264805, "loss": 0.1703, "step": 348630 }, { "epoch": 14.44, "grad_norm": 0.296875, "learning_rate": 0.0002646921911662504, "loss": 0.1686, "step": 348640 }, { "epoch": 14.44, "grad_norm": 0.69921875, "learning_rate": 0.00026468136470220367, "loss": 0.2053, "step": 348650 }, { "epoch": 14.44, "grad_norm": 0.765625, "learning_rate": 0.0002646705382105281, "loss": 0.1475, "step": 348660 }, { "epoch": 14.44, "grad_norm": 0.4765625, "learning_rate": 0.00026465971169124423, "loss": 0.1466, "step": 348670 }, { "epoch": 14.44, "grad_norm": 1.3359375, "learning_rate": 0.0002646488851443724, "loss": 0.2386, "step": 348680 }, { "epoch": 14.44, "grad_norm": 0.92578125, "learning_rate": 0.0002646380585699329, "loss": 0.1717, "step": 348690 }, { "epoch": 14.44, "grad_norm": 0.640625, "learning_rate": 0.0002646272319679462, "loss": 0.1852, "step": 348700 }, { "epoch": 14.44, "grad_norm": 0.91796875, "learning_rate": 0.00026461640533843254, "loss": 0.1942, "step": 348710 }, { "epoch": 14.44, "grad_norm": 1.2890625, "learning_rate": 0.0002646055786814124, "loss": 0.2113, "step": 348720 }, { "epoch": 14.44, "grad_norm": 0.90234375, "learning_rate": 0.0002645947519969063, "loss": 0.2019, "step": 348730 }, { "epoch": 14.44, "grad_norm": 0.87890625, "learning_rate": 0.00026458392528493427, "loss": 0.1914, "step": 348740 }, { "epoch": 14.45, "grad_norm": 1.25, "learning_rate": 0.0002645730985455169, "loss": 0.1565, "step": 348750 }, { "epoch": 14.45, "grad_norm": 0.92578125, "learning_rate": 0.0002645622717786746, "loss": 0.2028, "step": 348760 }, { "epoch": 14.45, "grad_norm": 0.81640625, "learning_rate": 0.00026455144498442764, "loss": 0.1773, "step": 348770 }, { "epoch": 14.45, "grad_norm": 0.9296875, "learning_rate": 0.00026454061816279646, "loss": 0.2017, "step": 348780 }, { "epoch": 14.45, "grad_norm": 1.5390625, "learning_rate": 0.0002645297913138013, "loss": 0.181, "step": 348790 }, { "epoch": 14.45, "grad_norm": 0.82421875, "learning_rate": 0.00026451896443746277, "loss": 0.1376, "step": 348800 }, { "epoch": 14.45, "grad_norm": 2.6875, "learning_rate": 0.0002645081375338011, "loss": 0.2027, "step": 348810 }, { "epoch": 14.45, "grad_norm": 0.86328125, "learning_rate": 0.00026449731060283657, "loss": 0.1495, "step": 348820 }, { "epoch": 14.45, "grad_norm": 1.546875, "learning_rate": 0.0002644864836445898, "loss": 0.1901, "step": 348830 }, { "epoch": 14.45, "grad_norm": 0.45703125, "learning_rate": 0.00026447565665908097, "loss": 0.1735, "step": 348840 }, { "epoch": 14.45, "grad_norm": 0.640625, "learning_rate": 0.0002644648296463305, "loss": 0.141, "step": 348850 }, { "epoch": 14.45, "grad_norm": 1.2109375, "learning_rate": 0.0002644540026063588, "loss": 0.2032, "step": 348860 }, { "epoch": 14.45, "grad_norm": 2.5625, "learning_rate": 0.0002644431755391862, "loss": 0.1853, "step": 348870 }, { "epoch": 14.45, "grad_norm": 1.0625, "learning_rate": 0.0002644323484448332, "loss": 0.1965, "step": 348880 }, { "epoch": 14.45, "grad_norm": 0.99609375, "learning_rate": 0.00026442152132332005, "loss": 0.2159, "step": 348890 }, { "epoch": 14.45, "grad_norm": 0.62109375, "learning_rate": 0.00026441069417466705, "loss": 0.2076, "step": 348900 }, { "epoch": 14.45, "grad_norm": 1.078125, "learning_rate": 0.0002643998669988948, "loss": 0.1838, "step": 348910 }, { "epoch": 14.45, "grad_norm": 0.84375, "learning_rate": 0.0002643890397960235, "loss": 0.1595, "step": 348920 }, { "epoch": 14.45, "grad_norm": 2.03125, "learning_rate": 0.0002643782125660736, "loss": 0.172, "step": 348930 }, { "epoch": 14.45, "grad_norm": 0.80859375, "learning_rate": 0.00026436738530906547, "loss": 0.1977, "step": 348940 }, { "epoch": 14.45, "grad_norm": 1.5625, "learning_rate": 0.00026435655802501936, "loss": 0.1628, "step": 348950 }, { "epoch": 14.45, "grad_norm": 0.7578125, "learning_rate": 0.0002643457307139559, "loss": 0.1611, "step": 348960 }, { "epoch": 14.45, "grad_norm": 0.0, "learning_rate": 0.00026433490337589527, "loss": 0.2187, "step": 348970 }, { "epoch": 14.45, "grad_norm": 0.80859375, "learning_rate": 0.00026432407601085784, "loss": 0.1525, "step": 348980 }, { "epoch": 14.46, "grad_norm": 0.443359375, "learning_rate": 0.0002643132486188641, "loss": 0.21, "step": 348990 }, { "epoch": 14.46, "grad_norm": 0.51953125, "learning_rate": 0.0002643024211999344, "loss": 0.1852, "step": 349000 }, { "epoch": 14.46, "grad_norm": 0.796875, "learning_rate": 0.0002642915937540891, "loss": 0.1514, "step": 349010 }, { "epoch": 14.46, "grad_norm": 0.56640625, "learning_rate": 0.0002642807662813486, "loss": 0.2083, "step": 349020 }, { "epoch": 14.46, "grad_norm": 0.73828125, "learning_rate": 0.00026426993878173304, "loss": 0.2092, "step": 349030 }, { "epoch": 14.46, "grad_norm": 0.30078125, "learning_rate": 0.0002642591112552631, "loss": 0.1734, "step": 349040 }, { "epoch": 14.46, "grad_norm": 0.96484375, "learning_rate": 0.0002642482837019592, "loss": 0.2126, "step": 349050 }, { "epoch": 14.46, "grad_norm": 0.3046875, "learning_rate": 0.00026423745612184137, "loss": 0.1939, "step": 349060 }, { "epoch": 14.46, "grad_norm": 1.3671875, "learning_rate": 0.0002642266285149303, "loss": 0.2342, "step": 349070 }, { "epoch": 14.46, "grad_norm": 0.416015625, "learning_rate": 0.0002642158008812462, "loss": 0.1847, "step": 349080 }, { "epoch": 14.46, "grad_norm": 0.71875, "learning_rate": 0.0002642049732208095, "loss": 0.186, "step": 349090 }, { "epoch": 14.46, "grad_norm": 0.578125, "learning_rate": 0.0002641941455336406, "loss": 0.2548, "step": 349100 }, { "epoch": 14.46, "grad_norm": 1.2265625, "learning_rate": 0.0002641833178197598, "loss": 0.2293, "step": 349110 }, { "epoch": 14.46, "grad_norm": 1.109375, "learning_rate": 0.0002641724900791876, "loss": 0.1672, "step": 349120 }, { "epoch": 14.46, "grad_norm": 0.263671875, "learning_rate": 0.00026416166231194423, "loss": 0.1524, "step": 349130 }, { "epoch": 14.46, "grad_norm": 0.6640625, "learning_rate": 0.00026415083451805015, "loss": 0.1658, "step": 349140 }, { "epoch": 14.46, "grad_norm": 0.71875, "learning_rate": 0.0002641400066975258, "loss": 0.2128, "step": 349150 }, { "epoch": 14.46, "grad_norm": 0.6640625, "learning_rate": 0.00026412917885039147, "loss": 0.2299, "step": 349160 }, { "epoch": 14.46, "grad_norm": 0.98828125, "learning_rate": 0.00026411835097666747, "loss": 0.2265, "step": 349170 }, { "epoch": 14.46, "grad_norm": 0.455078125, "learning_rate": 0.0002641075230763744, "loss": 0.1684, "step": 349180 }, { "epoch": 14.46, "grad_norm": 1.828125, "learning_rate": 0.0002640966951495323, "loss": 0.1751, "step": 349190 }, { "epoch": 14.46, "grad_norm": 1.21875, "learning_rate": 0.0002640858671961619, "loss": 0.2174, "step": 349200 }, { "epoch": 14.46, "grad_norm": 0.51953125, "learning_rate": 0.00026407503921628333, "loss": 0.1943, "step": 349210 }, { "epoch": 14.46, "grad_norm": 0.92578125, "learning_rate": 0.00026406421120991707, "loss": 0.2047, "step": 349220 }, { "epoch": 14.47, "grad_norm": 0.294921875, "learning_rate": 0.00026405338317708353, "loss": 0.2063, "step": 349230 }, { "epoch": 14.47, "grad_norm": 0.9140625, "learning_rate": 0.000264042555117803, "loss": 0.2232, "step": 349240 }, { "epoch": 14.47, "grad_norm": 0.9921875, "learning_rate": 0.00026403172703209595, "loss": 0.1579, "step": 349250 }, { "epoch": 14.47, "grad_norm": 1.625, "learning_rate": 0.00026402089891998273, "loss": 0.1998, "step": 349260 }, { "epoch": 14.47, "grad_norm": 0.90234375, "learning_rate": 0.00026401007078148355, "loss": 0.1725, "step": 349270 }, { "epoch": 14.47, "grad_norm": 1.328125, "learning_rate": 0.000263999242616619, "loss": 0.1785, "step": 349280 }, { "epoch": 14.47, "grad_norm": 0.2109375, "learning_rate": 0.0002639884144254094, "loss": 0.1859, "step": 349290 }, { "epoch": 14.47, "grad_norm": 2.40625, "learning_rate": 0.0002639775862078751, "loss": 0.1574, "step": 349300 }, { "epoch": 14.47, "grad_norm": 1.2734375, "learning_rate": 0.00026396675796403656, "loss": 0.1903, "step": 349310 }, { "epoch": 14.47, "grad_norm": 0.82421875, "learning_rate": 0.00026395592969391405, "loss": 0.2012, "step": 349320 }, { "epoch": 14.47, "grad_norm": 0.65625, "learning_rate": 0.00026394510139752793, "loss": 0.1601, "step": 349330 }, { "epoch": 14.47, "grad_norm": 0.55078125, "learning_rate": 0.0002639342730748987, "loss": 0.1885, "step": 349340 }, { "epoch": 14.47, "grad_norm": 0.6875, "learning_rate": 0.00026392344472604657, "loss": 0.192, "step": 349350 }, { "epoch": 14.47, "grad_norm": 0.8671875, "learning_rate": 0.00026391261635099215, "loss": 0.1831, "step": 349360 }, { "epoch": 14.47, "grad_norm": 0.703125, "learning_rate": 0.0002639017879497556, "loss": 0.2061, "step": 349370 }, { "epoch": 14.47, "grad_norm": 0.71484375, "learning_rate": 0.0002638909595223575, "loss": 0.224, "step": 349380 }, { "epoch": 14.47, "grad_norm": 0.515625, "learning_rate": 0.00026388013106881804, "loss": 0.1682, "step": 349390 }, { "epoch": 14.47, "grad_norm": 0.8828125, "learning_rate": 0.00026386930258915763, "loss": 0.1847, "step": 349400 }, { "epoch": 14.47, "grad_norm": 0.96484375, "learning_rate": 0.00026385847408339673, "loss": 0.1901, "step": 349410 }, { "epoch": 14.47, "grad_norm": 0.70703125, "learning_rate": 0.0002638476455515557, "loss": 0.1939, "step": 349420 }, { "epoch": 14.47, "grad_norm": 0.90234375, "learning_rate": 0.00026383681699365484, "loss": 0.1987, "step": 349430 }, { "epoch": 14.47, "grad_norm": 2.078125, "learning_rate": 0.0002638259884097147, "loss": 0.2022, "step": 349440 }, { "epoch": 14.47, "grad_norm": 0.546875, "learning_rate": 0.0002638151597997554, "loss": 0.2235, "step": 349450 }, { "epoch": 14.47, "grad_norm": 0.43359375, "learning_rate": 0.0002638043311637975, "loss": 0.2053, "step": 349460 }, { "epoch": 14.48, "grad_norm": 0.55078125, "learning_rate": 0.0002637935025018614, "loss": 0.1974, "step": 349470 }, { "epoch": 14.48, "grad_norm": 0.66796875, "learning_rate": 0.0002637826738139674, "loss": 0.1811, "step": 349480 }, { "epoch": 14.48, "grad_norm": 1.140625, "learning_rate": 0.00026377184510013586, "loss": 0.1954, "step": 349490 }, { "epoch": 14.48, "grad_norm": 0.76171875, "learning_rate": 0.00026376101636038716, "loss": 0.1916, "step": 349500 }, { "epoch": 14.48, "grad_norm": 1.1171875, "learning_rate": 0.0002637501875947418, "loss": 0.1849, "step": 349510 }, { "epoch": 14.48, "grad_norm": 1.359375, "learning_rate": 0.0002637393588032201, "loss": 0.1797, "step": 349520 }, { "epoch": 14.48, "grad_norm": 1.0078125, "learning_rate": 0.0002637285299858423, "loss": 0.2045, "step": 349530 }, { "epoch": 14.48, "grad_norm": 1.5078125, "learning_rate": 0.00026371770114262894, "loss": 0.1942, "step": 349540 }, { "epoch": 14.48, "grad_norm": 1.46875, "learning_rate": 0.0002637068722736004, "loss": 0.2097, "step": 349550 }, { "epoch": 14.48, "grad_norm": 0.58984375, "learning_rate": 0.000263696043378777, "loss": 0.1803, "step": 349560 }, { "epoch": 14.48, "grad_norm": 1.0078125, "learning_rate": 0.000263685214458179, "loss": 0.1455, "step": 349570 }, { "epoch": 14.48, "grad_norm": 0.85546875, "learning_rate": 0.0002636743855118269, "loss": 0.1727, "step": 349580 }, { "epoch": 14.48, "grad_norm": 0.462890625, "learning_rate": 0.00026366355653974124, "loss": 0.1272, "step": 349590 }, { "epoch": 14.48, "grad_norm": 0.76171875, "learning_rate": 0.0002636527275419422, "loss": 0.2202, "step": 349600 }, { "epoch": 14.48, "grad_norm": 1.0546875, "learning_rate": 0.00026364189851845013, "loss": 0.1631, "step": 349610 }, { "epoch": 14.48, "grad_norm": 0.66796875, "learning_rate": 0.0002636310694692855, "loss": 0.1742, "step": 349620 }, { "epoch": 14.48, "grad_norm": 0.578125, "learning_rate": 0.0002636202403944687, "loss": 0.2051, "step": 349630 }, { "epoch": 14.48, "grad_norm": 1.2265625, "learning_rate": 0.00026360941129402004, "loss": 0.1609, "step": 349640 }, { "epoch": 14.48, "grad_norm": 0.46875, "learning_rate": 0.00026359858216796003, "loss": 0.1856, "step": 349650 }, { "epoch": 14.48, "grad_norm": 0.4921875, "learning_rate": 0.00026358775301630884, "loss": 0.1752, "step": 349660 }, { "epoch": 14.48, "grad_norm": 0.62109375, "learning_rate": 0.000263576923839087, "loss": 0.166, "step": 349670 }, { "epoch": 14.48, "grad_norm": 1.3984375, "learning_rate": 0.0002635660946363149, "loss": 0.1696, "step": 349680 }, { "epoch": 14.48, "grad_norm": 1.625, "learning_rate": 0.0002635552654080129, "loss": 0.1958, "step": 349690 }, { "epoch": 14.48, "grad_norm": 0.890625, "learning_rate": 0.00026354443615420124, "loss": 0.1904, "step": 349700 }, { "epoch": 14.48, "grad_norm": 3.8623809814453125e-05, "learning_rate": 0.00026353360687490047, "loss": 0.203, "step": 349710 }, { "epoch": 14.49, "grad_norm": 0.57421875, "learning_rate": 0.0002635227775701309, "loss": 0.1909, "step": 349720 }, { "epoch": 14.49, "grad_norm": 0.68359375, "learning_rate": 0.000263511948239913, "loss": 0.1942, "step": 349730 }, { "epoch": 14.49, "grad_norm": 0.671875, "learning_rate": 0.000263501118884267, "loss": 0.1793, "step": 349740 }, { "epoch": 14.49, "grad_norm": 0.8359375, "learning_rate": 0.00026349028950321335, "loss": 0.186, "step": 349750 }, { "epoch": 14.49, "grad_norm": 0.55078125, "learning_rate": 0.0002634794600967725, "loss": 0.2008, "step": 349760 }, { "epoch": 14.49, "grad_norm": 1.59375, "learning_rate": 0.0002634686306649647, "loss": 0.2136, "step": 349770 }, { "epoch": 14.49, "grad_norm": 1.0234375, "learning_rate": 0.00026345780120781036, "loss": 0.213, "step": 349780 }, { "epoch": 14.49, "grad_norm": 0.77734375, "learning_rate": 0.00026344697172532995, "loss": 0.1853, "step": 349790 }, { "epoch": 14.49, "grad_norm": 1.3515625, "learning_rate": 0.00026343614221754376, "loss": 0.1978, "step": 349800 }, { "epoch": 14.49, "grad_norm": 0.921875, "learning_rate": 0.00026342531268447225, "loss": 0.1707, "step": 349810 }, { "epoch": 14.49, "grad_norm": 0.4609375, "learning_rate": 0.0002634144831261357, "loss": 0.1369, "step": 349820 }, { "epoch": 14.49, "grad_norm": 0.7421875, "learning_rate": 0.00026340365354255455, "loss": 0.184, "step": 349830 }, { "epoch": 14.49, "grad_norm": 0.6640625, "learning_rate": 0.0002633928239337491, "loss": 0.2289, "step": 349840 }, { "epoch": 14.49, "grad_norm": 0.5859375, "learning_rate": 0.00026338199429973996, "loss": 0.203, "step": 349850 }, { "epoch": 14.49, "grad_norm": 0.5625, "learning_rate": 0.00026337116464054727, "loss": 0.1964, "step": 349860 }, { "epoch": 14.49, "grad_norm": 0.71875, "learning_rate": 0.0002633603349561915, "loss": 0.1441, "step": 349870 }, { "epoch": 14.49, "grad_norm": 1.5078125, "learning_rate": 0.00026334950524669297, "loss": 0.1535, "step": 349880 }, { "epoch": 14.49, "grad_norm": 1.109375, "learning_rate": 0.00026333867551207223, "loss": 0.1784, "step": 349890 }, { "epoch": 14.49, "grad_norm": 1.859375, "learning_rate": 0.0002633278457523495, "loss": 0.1872, "step": 349900 }, { "epoch": 14.49, "grad_norm": 1.5625, "learning_rate": 0.00026331701596754516, "loss": 0.1892, "step": 349910 }, { "epoch": 14.49, "grad_norm": 1.453125, "learning_rate": 0.0002633061861576796, "loss": 0.1878, "step": 349920 }, { "epoch": 14.49, "grad_norm": 1.6953125, "learning_rate": 0.00026329535632277335, "loss": 0.1628, "step": 349930 }, { "epoch": 14.49, "grad_norm": 1.4609375, "learning_rate": 0.0002632845264628466, "loss": 0.194, "step": 349940 }, { "epoch": 14.49, "grad_norm": 1.453125, "learning_rate": 0.00026327369657791976, "loss": 0.2388, "step": 349950 }, { "epoch": 14.5, "grad_norm": 2.203125, "learning_rate": 0.00026326286666801334, "loss": 0.2076, "step": 349960 }, { "epoch": 14.5, "grad_norm": 0.462890625, "learning_rate": 0.00026325203673314756, "loss": 0.1639, "step": 349970 }, { "epoch": 14.5, "grad_norm": 0.48046875, "learning_rate": 0.000263241206773343, "loss": 0.1599, "step": 349980 }, { "epoch": 14.5, "grad_norm": 1.046875, "learning_rate": 0.0002632303767886198, "loss": 0.2077, "step": 349990 }, { "epoch": 14.5, "grad_norm": 2.28125, "learning_rate": 0.0002632195467789985, "loss": 0.1669, "step": 350000 }, { "epoch": 14.5, "grad_norm": 0.6171875, "learning_rate": 0.00026320871674449954, "loss": 0.2168, "step": 350010 }, { "epoch": 14.5, "grad_norm": 1.1015625, "learning_rate": 0.0002631978866851431, "loss": 0.1588, "step": 350020 }, { "epoch": 14.5, "grad_norm": 0.5, "learning_rate": 0.00026318705660094963, "loss": 0.1595, "step": 350030 }, { "epoch": 14.5, "grad_norm": 1.3984375, "learning_rate": 0.00026317622649193957, "loss": 0.1965, "step": 350040 }, { "epoch": 14.5, "grad_norm": 0.96484375, "learning_rate": 0.0002631653963581333, "loss": 0.1739, "step": 350050 }, { "epoch": 14.5, "grad_norm": 1.3125, "learning_rate": 0.0002631545661995512, "loss": 0.2051, "step": 350060 }, { "epoch": 14.5, "grad_norm": 0.466796875, "learning_rate": 0.00026314373601621355, "loss": 0.1829, "step": 350070 }, { "epoch": 14.5, "grad_norm": 0.87109375, "learning_rate": 0.00026313290580814086, "loss": 0.1813, "step": 350080 }, { "epoch": 14.5, "grad_norm": 0.703125, "learning_rate": 0.0002631220755753535, "loss": 0.1604, "step": 350090 }, { "epoch": 14.5, "grad_norm": 0.5546875, "learning_rate": 0.0002631112453178717, "loss": 0.1916, "step": 350100 }, { "epoch": 14.5, "grad_norm": 0.451171875, "learning_rate": 0.000263100415035716, "loss": 0.1655, "step": 350110 }, { "epoch": 14.5, "grad_norm": 0.314453125, "learning_rate": 0.0002630895847289068, "loss": 0.2057, "step": 350120 }, { "epoch": 14.5, "grad_norm": 1.2265625, "learning_rate": 0.0002630787543974643, "loss": 0.2154, "step": 350130 }, { "epoch": 14.5, "grad_norm": 1.1015625, "learning_rate": 0.0002630679240414091, "loss": 0.2055, "step": 350140 }, { "epoch": 14.5, "grad_norm": 1.2890625, "learning_rate": 0.00026305709366076146, "loss": 0.1906, "step": 350150 }, { "epoch": 14.5, "grad_norm": 0.5, "learning_rate": 0.0002630462632555418, "loss": 0.2481, "step": 350160 }, { "epoch": 14.5, "grad_norm": 1.140625, "learning_rate": 0.0002630354328257704, "loss": 0.2192, "step": 350170 }, { "epoch": 14.5, "grad_norm": 0.8203125, "learning_rate": 0.0002630246023714677, "loss": 0.155, "step": 350180 }, { "epoch": 14.5, "grad_norm": 0.76171875, "learning_rate": 0.0002630137718926542, "loss": 0.1529, "step": 350190 }, { "epoch": 14.51, "grad_norm": 1.2109375, "learning_rate": 0.0002630029413893502, "loss": 0.2369, "step": 350200 }, { "epoch": 14.51, "grad_norm": 1.4140625, "learning_rate": 0.000262992110861576, "loss": 0.164, "step": 350210 }, { "epoch": 14.51, "grad_norm": 0.9140625, "learning_rate": 0.0002629812803093521, "loss": 0.1876, "step": 350220 }, { "epoch": 14.51, "grad_norm": 0.84765625, "learning_rate": 0.0002629704497326988, "loss": 0.2141, "step": 350230 }, { "epoch": 14.51, "grad_norm": 0.67578125, "learning_rate": 0.0002629596191316365, "loss": 0.1563, "step": 350240 }, { "epoch": 14.51, "grad_norm": 1.2890625, "learning_rate": 0.00026294878850618563, "loss": 0.1722, "step": 350250 }, { "epoch": 14.51, "grad_norm": 1.1328125, "learning_rate": 0.00026293795785636647, "loss": 0.1869, "step": 350260 }, { "epoch": 14.51, "grad_norm": 2.15625, "learning_rate": 0.0002629271271821996, "loss": 0.19, "step": 350270 }, { "epoch": 14.51, "grad_norm": 0.7734375, "learning_rate": 0.00026291629648370513, "loss": 0.157, "step": 350280 }, { "epoch": 14.51, "grad_norm": 0.8984375, "learning_rate": 0.0002629054657609036, "loss": 0.2077, "step": 350290 }, { "epoch": 14.51, "grad_norm": 0.609375, "learning_rate": 0.0002628946350138155, "loss": 0.1887, "step": 350300 }, { "epoch": 14.51, "grad_norm": 0.037353515625, "learning_rate": 0.00026288380424246104, "loss": 0.1664, "step": 350310 }, { "epoch": 14.51, "grad_norm": 1.203125, "learning_rate": 0.0002628729734468606, "loss": 0.1892, "step": 350320 }, { "epoch": 14.51, "grad_norm": 1.203125, "learning_rate": 0.0002628621426270347, "loss": 0.2053, "step": 350330 }, { "epoch": 14.51, "grad_norm": 1.1796875, "learning_rate": 0.0002628513117830035, "loss": 0.1564, "step": 350340 }, { "epoch": 14.51, "grad_norm": 2.515625, "learning_rate": 0.0002628404809147876, "loss": 0.158, "step": 350350 }, { "epoch": 14.51, "grad_norm": 0.70703125, "learning_rate": 0.00026282965002240733, "loss": 0.175, "step": 350360 }, { "epoch": 14.51, "grad_norm": 0.74609375, "learning_rate": 0.000262818819105883, "loss": 0.2548, "step": 350370 }, { "epoch": 14.51, "grad_norm": 0.482421875, "learning_rate": 0.0002628079881652351, "loss": 0.153, "step": 350380 }, { "epoch": 14.51, "grad_norm": 1.0625, "learning_rate": 0.0002627971572004838, "loss": 0.1996, "step": 350390 }, { "epoch": 14.51, "grad_norm": 0.75390625, "learning_rate": 0.0002627863262116498, "loss": 0.2414, "step": 350400 }, { "epoch": 14.51, "grad_norm": 0.365234375, "learning_rate": 0.00026277549519875334, "loss": 0.1745, "step": 350410 }, { "epoch": 14.51, "grad_norm": 0.490234375, "learning_rate": 0.00026276466416181454, "loss": 0.1645, "step": 350420 }, { "epoch": 14.51, "grad_norm": 2.59375, "learning_rate": 0.00026275383310085426, "loss": 0.211, "step": 350430 }, { "epoch": 14.52, "grad_norm": 0.71484375, "learning_rate": 0.0002627430020158925, "loss": 0.1391, "step": 350440 }, { "epoch": 14.52, "grad_norm": 0.734375, "learning_rate": 0.00026273217090694984, "loss": 0.1981, "step": 350450 }, { "epoch": 14.52, "grad_norm": 0.486328125, "learning_rate": 0.0002627213397740467, "loss": 0.197, "step": 350460 }, { "epoch": 14.52, "grad_norm": 0.60546875, "learning_rate": 0.00026271050861720313, "loss": 0.1819, "step": 350470 }, { "epoch": 14.52, "grad_norm": 0.8125, "learning_rate": 0.00026269967743644, "loss": 0.2049, "step": 350480 }, { "epoch": 14.52, "grad_norm": 0.6484375, "learning_rate": 0.0002626888462317774, "loss": 0.1777, "step": 350490 }, { "epoch": 14.52, "grad_norm": 0.7109375, "learning_rate": 0.0002626780150032356, "loss": 0.2484, "step": 350500 }, { "epoch": 14.52, "grad_norm": 0.65625, "learning_rate": 0.0002626671837508353, "loss": 0.1748, "step": 350510 }, { "epoch": 14.52, "grad_norm": 1.015625, "learning_rate": 0.00026265635247459666, "loss": 0.1933, "step": 350520 }, { "epoch": 14.52, "grad_norm": 1.2578125, "learning_rate": 0.00026264552117454015, "loss": 0.2028, "step": 350530 }, { "epoch": 14.52, "grad_norm": 0.59765625, "learning_rate": 0.0002626346898506862, "loss": 0.2033, "step": 350540 }, { "epoch": 14.52, "grad_norm": 1.09375, "learning_rate": 0.000262623858503055, "loss": 0.2159, "step": 350550 }, { "epoch": 14.52, "grad_norm": 0.86328125, "learning_rate": 0.00026261302713166717, "loss": 0.1807, "step": 350560 }, { "epoch": 14.52, "grad_norm": 1.1015625, "learning_rate": 0.000262602195736543, "loss": 0.212, "step": 350570 }, { "epoch": 14.52, "grad_norm": 0.93359375, "learning_rate": 0.00026259136431770275, "loss": 0.1508, "step": 350580 }, { "epoch": 14.52, "grad_norm": 1.109375, "learning_rate": 0.000262580532875167, "loss": 0.1874, "step": 350590 }, { "epoch": 14.52, "grad_norm": 0.76171875, "learning_rate": 0.00026256970140895597, "loss": 0.2254, "step": 350600 }, { "epoch": 14.52, "grad_norm": 1.8359375, "learning_rate": 0.0002625588699190902, "loss": 0.1765, "step": 350610 }, { "epoch": 14.52, "grad_norm": 1.265625, "learning_rate": 0.00026254803840559, "loss": 0.2463, "step": 350620 }, { "epoch": 14.52, "grad_norm": 0.578125, "learning_rate": 0.0002625372068684756, "loss": 0.1895, "step": 350630 }, { "epoch": 14.52, "grad_norm": 0.486328125, "learning_rate": 0.0002625263753077677, "loss": 0.2269, "step": 350640 }, { "epoch": 14.52, "grad_norm": 1.7421875, "learning_rate": 0.0002625155437234865, "loss": 0.1606, "step": 350650 }, { "epoch": 14.52, "grad_norm": 0.5, "learning_rate": 0.00026250471211565225, "loss": 0.184, "step": 350660 }, { "epoch": 14.52, "grad_norm": 0.9140625, "learning_rate": 0.0002624938804842856, "loss": 0.193, "step": 350670 }, { "epoch": 14.53, "grad_norm": 1.2109375, "learning_rate": 0.0002624830488294068, "loss": 0.1969, "step": 350680 }, { "epoch": 14.53, "grad_norm": 1.2890625, "learning_rate": 0.0002624722171510363, "loss": 0.2328, "step": 350690 }, { "epoch": 14.53, "grad_norm": 2.109375, "learning_rate": 0.00026246138544919434, "loss": 0.1912, "step": 350700 }, { "epoch": 14.53, "grad_norm": 0.7890625, "learning_rate": 0.0002624505537239014, "loss": 0.155, "step": 350710 }, { "epoch": 14.53, "grad_norm": 0.373046875, "learning_rate": 0.00026243972197517794, "loss": 0.1871, "step": 350720 }, { "epoch": 14.53, "grad_norm": 1.3046875, "learning_rate": 0.00026242889020304424, "loss": 0.2507, "step": 350730 }, { "epoch": 14.53, "grad_norm": 1.2578125, "learning_rate": 0.0002624180584075206, "loss": 0.2711, "step": 350740 }, { "epoch": 14.53, "grad_norm": 0.97265625, "learning_rate": 0.0002624072265886277, "loss": 0.2151, "step": 350750 }, { "epoch": 14.53, "grad_norm": 0.306640625, "learning_rate": 0.00026239639474638567, "loss": 0.1546, "step": 350760 }, { "epoch": 14.53, "grad_norm": 0.86328125, "learning_rate": 0.0002623855628808149, "loss": 0.1961, "step": 350770 }, { "epoch": 14.53, "grad_norm": 0.58203125, "learning_rate": 0.00026237473099193593, "loss": 0.2112, "step": 350780 }, { "epoch": 14.53, "grad_norm": 1.234375, "learning_rate": 0.0002623638990797689, "loss": 0.2003, "step": 350790 }, { "epoch": 14.53, "grad_norm": 0.490234375, "learning_rate": 0.0002623530671443345, "loss": 0.1541, "step": 350800 }, { "epoch": 14.53, "grad_norm": 0.24609375, "learning_rate": 0.0002623422351856529, "loss": 0.2, "step": 350810 }, { "epoch": 14.53, "grad_norm": 0.42578125, "learning_rate": 0.0002623314032037446, "loss": 0.1638, "step": 350820 }, { "epoch": 14.53, "grad_norm": 0.408203125, "learning_rate": 0.0002623205711986299, "loss": 0.1729, "step": 350830 }, { "epoch": 14.53, "grad_norm": 0.953125, "learning_rate": 0.0002623097391703292, "loss": 0.2204, "step": 350840 }, { "epoch": 14.53, "grad_norm": 0.359375, "learning_rate": 0.0002622989071188629, "loss": 0.2172, "step": 350850 }, { "epoch": 14.53, "grad_norm": 1.9453125, "learning_rate": 0.0002622880750442515, "loss": 0.195, "step": 350860 }, { "epoch": 14.53, "grad_norm": 0.78515625, "learning_rate": 0.00026227724294651506, "loss": 0.2058, "step": 350870 }, { "epoch": 14.53, "grad_norm": 0.890625, "learning_rate": 0.0002622664108256743, "loss": 0.1372, "step": 350880 }, { "epoch": 14.53, "grad_norm": 2.515625, "learning_rate": 0.0002622555786817494, "loss": 0.1824, "step": 350890 }, { "epoch": 14.53, "grad_norm": 1.109375, "learning_rate": 0.0002622447465147609, "loss": 0.2287, "step": 350900 }, { "epoch": 14.53, "grad_norm": 0.83203125, "learning_rate": 0.00026223391432472916, "loss": 0.2216, "step": 350910 }, { "epoch": 14.54, "grad_norm": 3.984375, "learning_rate": 0.0002622230821116744, "loss": 0.2486, "step": 350920 }, { "epoch": 14.54, "grad_norm": 0.6640625, "learning_rate": 0.00026221224987561716, "loss": 0.2563, "step": 350930 }, { "epoch": 14.54, "grad_norm": 0.828125, "learning_rate": 0.0002622014176165778, "loss": 0.2082, "step": 350940 }, { "epoch": 14.54, "grad_norm": 0.828125, "learning_rate": 0.00026219058533457663, "loss": 0.216, "step": 350950 }, { "epoch": 14.54, "grad_norm": 0.75390625, "learning_rate": 0.0002621797530296342, "loss": 0.2022, "step": 350960 }, { "epoch": 14.54, "grad_norm": 1.296875, "learning_rate": 0.00026216892070177075, "loss": 0.1767, "step": 350970 }, { "epoch": 14.54, "grad_norm": 0.953125, "learning_rate": 0.00026215808835100665, "loss": 0.2117, "step": 350980 }, { "epoch": 14.54, "grad_norm": 1.0859375, "learning_rate": 0.00026214725597736236, "loss": 0.1792, "step": 350990 }, { "epoch": 14.54, "grad_norm": 0.9921875, "learning_rate": 0.0002621364235808583, "loss": 0.2286, "step": 351000 }, { "epoch": 14.54, "grad_norm": 0.310546875, "learning_rate": 0.00026212559116151475, "loss": 0.176, "step": 351010 }, { "epoch": 14.54, "grad_norm": 0.8515625, "learning_rate": 0.00026211475871935213, "loss": 0.1902, "step": 351020 }, { "epoch": 14.54, "grad_norm": 0.57421875, "learning_rate": 0.0002621039262543909, "loss": 0.137, "step": 351030 }, { "epoch": 14.54, "grad_norm": 0.484375, "learning_rate": 0.0002620930937666514, "loss": 0.2008, "step": 351040 }, { "epoch": 14.54, "grad_norm": 1.2578125, "learning_rate": 0.000262082261256154, "loss": 0.1792, "step": 351050 }, { "epoch": 14.54, "grad_norm": 0.86328125, "learning_rate": 0.000262071428722919, "loss": 0.1911, "step": 351060 }, { "epoch": 14.54, "grad_norm": 0.79296875, "learning_rate": 0.00026206059616696697, "loss": 0.1726, "step": 351070 }, { "epoch": 14.54, "grad_norm": 1.0234375, "learning_rate": 0.00026204976358831816, "loss": 0.2498, "step": 351080 }, { "epoch": 14.54, "grad_norm": 0.90234375, "learning_rate": 0.000262038930986993, "loss": 0.1769, "step": 351090 }, { "epoch": 14.54, "grad_norm": 0.6875, "learning_rate": 0.00026202809836301183, "loss": 0.2011, "step": 351100 }, { "epoch": 14.54, "grad_norm": 0.99609375, "learning_rate": 0.0002620172657163951, "loss": 0.2204, "step": 351110 }, { "epoch": 14.54, "grad_norm": 0.85546875, "learning_rate": 0.00026200643304716324, "loss": 0.2496, "step": 351120 }, { "epoch": 14.54, "grad_norm": 0.74609375, "learning_rate": 0.00026199560035533646, "loss": 0.1892, "step": 351130 }, { "epoch": 14.54, "grad_norm": 1.109375, "learning_rate": 0.0002619847676409354, "loss": 0.1813, "step": 351140 }, { "epoch": 14.54, "grad_norm": 0.95703125, "learning_rate": 0.0002619739349039801, "loss": 0.1959, "step": 351150 }, { "epoch": 14.55, "grad_norm": 0.9921875, "learning_rate": 0.00026196310214449136, "loss": 0.1923, "step": 351160 }, { "epoch": 14.55, "grad_norm": 0.8203125, "learning_rate": 0.0002619522693624892, "loss": 0.2012, "step": 351170 }, { "epoch": 14.55, "grad_norm": 1.125, "learning_rate": 0.00026194143655799425, "loss": 0.1714, "step": 351180 }, { "epoch": 14.55, "grad_norm": 0.80078125, "learning_rate": 0.00026193060373102677, "loss": 0.1535, "step": 351190 }, { "epoch": 14.55, "grad_norm": 1.75, "learning_rate": 0.00026191977088160724, "loss": 0.1925, "step": 351200 }, { "epoch": 14.55, "grad_norm": 0.40625, "learning_rate": 0.0002619089380097559, "loss": 0.1874, "step": 351210 }, { "epoch": 14.55, "grad_norm": 0.85546875, "learning_rate": 0.0002618981051154933, "loss": 0.2203, "step": 351220 }, { "epoch": 14.55, "grad_norm": 1.203125, "learning_rate": 0.0002618872721988397, "loss": 0.18, "step": 351230 }, { "epoch": 14.55, "grad_norm": 0.423828125, "learning_rate": 0.0002618764392598155, "loss": 0.2014, "step": 351240 }, { "epoch": 14.55, "grad_norm": 1.3984375, "learning_rate": 0.0002618656062984412, "loss": 0.1959, "step": 351250 }, { "epoch": 14.55, "grad_norm": 0.99609375, "learning_rate": 0.00026185477331473707, "loss": 0.2079, "step": 351260 }, { "epoch": 14.55, "grad_norm": 0.9609375, "learning_rate": 0.00026184394030872356, "loss": 0.1529, "step": 351270 }, { "epoch": 14.55, "grad_norm": 0.796875, "learning_rate": 0.000261833107280421, "loss": 0.1458, "step": 351280 }, { "epoch": 14.55, "grad_norm": 0.890625, "learning_rate": 0.00026182227422984986, "loss": 0.198, "step": 351290 }, { "epoch": 14.55, "grad_norm": 0.455078125, "learning_rate": 0.0002618114411570304, "loss": 0.1835, "step": 351300 }, { "epoch": 14.55, "grad_norm": 0.98828125, "learning_rate": 0.0002618006080619831, "loss": 0.1794, "step": 351310 }, { "epoch": 14.55, "grad_norm": 0.44921875, "learning_rate": 0.00026178977494472846, "loss": 0.1554, "step": 351320 }, { "epoch": 14.55, "grad_norm": 0.65234375, "learning_rate": 0.0002617789418052866, "loss": 0.1923, "step": 351330 }, { "epoch": 14.55, "grad_norm": 0.54296875, "learning_rate": 0.0002617681086436781, "loss": 0.2007, "step": 351340 }, { "epoch": 14.55, "grad_norm": 0.46875, "learning_rate": 0.00026175727545992326, "loss": 0.1678, "step": 351350 }, { "epoch": 14.55, "grad_norm": 0.82421875, "learning_rate": 0.0002617464422540425, "loss": 0.1934, "step": 351360 }, { "epoch": 14.55, "grad_norm": 0.43359375, "learning_rate": 0.00026173560902605623, "loss": 0.1332, "step": 351370 }, { "epoch": 14.55, "grad_norm": 1.0234375, "learning_rate": 0.0002617247757759848, "loss": 0.2237, "step": 351380 }, { "epoch": 14.55, "grad_norm": 1.0625, "learning_rate": 0.00026171394250384863, "loss": 0.1667, "step": 351390 }, { "epoch": 14.55, "grad_norm": 0.236328125, "learning_rate": 0.00026170310920966807, "loss": 0.2266, "step": 351400 }, { "epoch": 14.56, "grad_norm": 0.5859375, "learning_rate": 0.0002616922758934635, "loss": 0.1551, "step": 351410 }, { "epoch": 14.56, "grad_norm": 0.5390625, "learning_rate": 0.00026168144255525534, "loss": 0.1505, "step": 351420 }, { "epoch": 14.56, "grad_norm": 0.53125, "learning_rate": 0.00026167060919506395, "loss": 0.2006, "step": 351430 }, { "epoch": 14.56, "grad_norm": 1.7265625, "learning_rate": 0.0002616597758129097, "loss": 0.2369, "step": 351440 }, { "epoch": 14.56, "grad_norm": 1.0, "learning_rate": 0.00026164894240881316, "loss": 0.1865, "step": 351450 }, { "epoch": 14.56, "grad_norm": 0.9921875, "learning_rate": 0.00026163810898279443, "loss": 0.2093, "step": 351460 }, { "epoch": 14.56, "grad_norm": 0.98828125, "learning_rate": 0.0002616272755348741, "loss": 0.1998, "step": 351470 }, { "epoch": 14.56, "grad_norm": 0.4375, "learning_rate": 0.00026161644206507253, "loss": 0.2204, "step": 351480 }, { "epoch": 14.56, "grad_norm": 0.5234375, "learning_rate": 0.0002616056085734099, "loss": 0.1699, "step": 351490 }, { "epoch": 14.56, "grad_norm": 0.83203125, "learning_rate": 0.0002615947750599069, "loss": 0.2532, "step": 351500 }, { "epoch": 14.56, "grad_norm": 0.9921875, "learning_rate": 0.00026158394152458374, "loss": 0.1721, "step": 351510 }, { "epoch": 14.56, "grad_norm": 0.84765625, "learning_rate": 0.0002615731079674609, "loss": 0.1678, "step": 351520 }, { "epoch": 14.56, "grad_norm": 0.78125, "learning_rate": 0.0002615622743885587, "loss": 0.1308, "step": 351530 }, { "epoch": 14.56, "grad_norm": 0.640625, "learning_rate": 0.00026155144078789756, "loss": 0.211, "step": 351540 }, { "epoch": 14.56, "grad_norm": 2.234375, "learning_rate": 0.00026154060716549783, "loss": 0.1715, "step": 351550 }, { "epoch": 14.56, "grad_norm": 0.345703125, "learning_rate": 0.0002615297735213799, "loss": 0.1992, "step": 351560 }, { "epoch": 14.56, "grad_norm": 1.375, "learning_rate": 0.0002615189398555642, "loss": 0.1709, "step": 351570 }, { "epoch": 14.56, "grad_norm": 0.921875, "learning_rate": 0.00026150810616807116, "loss": 0.2072, "step": 351580 }, { "epoch": 14.56, "grad_norm": 1.1328125, "learning_rate": 0.00026149727245892106, "loss": 0.2457, "step": 351590 }, { "epoch": 14.56, "grad_norm": 0.859375, "learning_rate": 0.00026148643872813434, "loss": 0.2048, "step": 351600 }, { "epoch": 14.56, "grad_norm": 1.5859375, "learning_rate": 0.0002614756049757314, "loss": 0.1833, "step": 351610 }, { "epoch": 14.56, "grad_norm": 0.8828125, "learning_rate": 0.0002614647712017326, "loss": 0.2337, "step": 351620 }, { "epoch": 14.56, "grad_norm": 1.09375, "learning_rate": 0.0002614539374061583, "loss": 0.1514, "step": 351630 }, { "epoch": 14.56, "grad_norm": 0.86328125, "learning_rate": 0.00026144310358902894, "loss": 0.1498, "step": 351640 }, { "epoch": 14.57, "grad_norm": 1.0078125, "learning_rate": 0.0002614322697503649, "loss": 0.1978, "step": 351650 }, { "epoch": 14.57, "grad_norm": 0.5078125, "learning_rate": 0.00026142143589018665, "loss": 0.1938, "step": 351660 }, { "epoch": 14.57, "grad_norm": 0.72265625, "learning_rate": 0.0002614106020085144, "loss": 0.1877, "step": 351670 }, { "epoch": 14.57, "grad_norm": 0.96875, "learning_rate": 0.00026139976810536864, "loss": 0.1764, "step": 351680 }, { "epoch": 14.57, "grad_norm": 2.28125, "learning_rate": 0.0002613889341807698, "loss": 0.187, "step": 351690 }, { "epoch": 14.57, "grad_norm": 1.1015625, "learning_rate": 0.0002613781002347381, "loss": 0.1812, "step": 351700 }, { "epoch": 14.57, "grad_norm": 0.85546875, "learning_rate": 0.0002613672662672941, "loss": 0.127, "step": 351710 }, { "epoch": 14.57, "grad_norm": 2.078125, "learning_rate": 0.00026135643227845817, "loss": 0.2055, "step": 351720 }, { "epoch": 14.57, "grad_norm": 1.046875, "learning_rate": 0.0002613455982682506, "loss": 0.2126, "step": 351730 }, { "epoch": 14.57, "grad_norm": 1.2578125, "learning_rate": 0.00026133476423669194, "loss": 0.2134, "step": 351740 }, { "epoch": 14.57, "grad_norm": 1.09375, "learning_rate": 0.0002613239301838024, "loss": 0.1652, "step": 351750 }, { "epoch": 14.57, "grad_norm": 1.0234375, "learning_rate": 0.0002613130961096024, "loss": 0.1911, "step": 351760 }, { "epoch": 14.57, "grad_norm": 1.046875, "learning_rate": 0.0002613022620141125, "loss": 0.2103, "step": 351770 }, { "epoch": 14.57, "grad_norm": 0.65234375, "learning_rate": 0.00026129142789735283, "loss": 0.1492, "step": 351780 }, { "epoch": 14.57, "grad_norm": 0.9296875, "learning_rate": 0.00026128059375934407, "loss": 0.2178, "step": 351790 }, { "epoch": 14.57, "grad_norm": 0.7734375, "learning_rate": 0.0002612697596001063, "loss": 0.1907, "step": 351800 }, { "epoch": 14.57, "grad_norm": 1.3203125, "learning_rate": 0.0002612589254196601, "loss": 0.1804, "step": 351810 }, { "epoch": 14.57, "grad_norm": 0.37109375, "learning_rate": 0.0002612480912180259, "loss": 0.2279, "step": 351820 }, { "epoch": 14.57, "grad_norm": 0.25, "learning_rate": 0.0002612372569952239, "loss": 0.1383, "step": 351830 }, { "epoch": 14.57, "grad_norm": 1.0390625, "learning_rate": 0.0002612264227512746, "loss": 0.2047, "step": 351840 }, { "epoch": 14.57, "grad_norm": 0.90234375, "learning_rate": 0.00026121558848619855, "loss": 0.2064, "step": 351850 }, { "epoch": 14.57, "grad_norm": 0.6796875, "learning_rate": 0.0002612047542000157, "loss": 0.1932, "step": 351860 }, { "epoch": 14.57, "grad_norm": 1.046875, "learning_rate": 0.00026119391989274694, "loss": 0.1978, "step": 351870 }, { "epoch": 14.57, "grad_norm": 1.9375, "learning_rate": 0.00026118308556441234, "loss": 0.244, "step": 351880 }, { "epoch": 14.58, "grad_norm": 0.73828125, "learning_rate": 0.0002611722512150324, "loss": 0.1764, "step": 351890 }, { "epoch": 14.58, "grad_norm": 0.91796875, "learning_rate": 0.00026116141684462754, "loss": 0.1671, "step": 351900 }, { "epoch": 14.58, "grad_norm": 0.32421875, "learning_rate": 0.000261150582453218, "loss": 0.1724, "step": 351910 }, { "epoch": 14.58, "grad_norm": 0.640625, "learning_rate": 0.0002611397480408243, "loss": 0.2102, "step": 351920 }, { "epoch": 14.58, "grad_norm": 0.92578125, "learning_rate": 0.0002611289136074668, "loss": 0.2135, "step": 351930 }, { "epoch": 14.58, "grad_norm": 1.8828125, "learning_rate": 0.00026111807915316586, "loss": 0.229, "step": 351940 }, { "epoch": 14.58, "grad_norm": 0.5703125, "learning_rate": 0.000261107244677942, "loss": 0.1733, "step": 351950 }, { "epoch": 14.58, "grad_norm": 0.494140625, "learning_rate": 0.00026109641018181543, "loss": 0.2092, "step": 351960 }, { "epoch": 14.58, "grad_norm": 0.7265625, "learning_rate": 0.00026108557566480655, "loss": 0.2602, "step": 351970 }, { "epoch": 14.58, "grad_norm": 1.6171875, "learning_rate": 0.00026107474112693595, "loss": 0.2491, "step": 351980 }, { "epoch": 14.58, "grad_norm": 1.078125, "learning_rate": 0.00026106390656822383, "loss": 0.194, "step": 351990 }, { "epoch": 14.58, "grad_norm": 0.94921875, "learning_rate": 0.00026105307198869053, "loss": 0.1728, "step": 352000 }, { "epoch": 14.58, "grad_norm": 0.76953125, "learning_rate": 0.0002610422373883567, "loss": 0.2023, "step": 352010 }, { "epoch": 14.58, "grad_norm": 0.78515625, "learning_rate": 0.00026103140276724247, "loss": 0.1152, "step": 352020 }, { "epoch": 14.58, "grad_norm": 1.234375, "learning_rate": 0.0002610205681253684, "loss": 0.1684, "step": 352030 }, { "epoch": 14.58, "grad_norm": 1.21875, "learning_rate": 0.00026100973346275476, "loss": 0.1975, "step": 352040 }, { "epoch": 14.58, "grad_norm": 0.51953125, "learning_rate": 0.00026099889877942195, "loss": 0.1826, "step": 352050 }, { "epoch": 14.58, "grad_norm": 0.9375, "learning_rate": 0.00026098806407539053, "loss": 0.1597, "step": 352060 }, { "epoch": 14.58, "grad_norm": 0.259765625, "learning_rate": 0.0002609772293506806, "loss": 0.2169, "step": 352070 }, { "epoch": 14.58, "grad_norm": 0.78515625, "learning_rate": 0.00026096639460531284, "loss": 0.1583, "step": 352080 }, { "epoch": 14.58, "grad_norm": 0.8125, "learning_rate": 0.0002609555598393075, "loss": 0.1719, "step": 352090 }, { "epoch": 14.58, "grad_norm": 0.490234375, "learning_rate": 0.0002609447250526849, "loss": 0.1744, "step": 352100 }, { "epoch": 14.58, "grad_norm": 0.7890625, "learning_rate": 0.0002609338902454656, "loss": 0.1577, "step": 352110 }, { "epoch": 14.58, "grad_norm": 0.26953125, "learning_rate": 0.0002609230554176698, "loss": 0.1998, "step": 352120 }, { "epoch": 14.59, "grad_norm": 0.921875, "learning_rate": 0.00026091222056931804, "loss": 0.1677, "step": 352130 }, { "epoch": 14.59, "grad_norm": 0.71875, "learning_rate": 0.0002609013857004307, "loss": 0.1528, "step": 352140 }, { "epoch": 14.59, "grad_norm": 1.4296875, "learning_rate": 0.00026089055081102803, "loss": 0.1665, "step": 352150 }, { "epoch": 14.59, "grad_norm": 0.77734375, "learning_rate": 0.00026087971590113063, "loss": 0.1973, "step": 352160 }, { "epoch": 14.59, "grad_norm": 1.0546875, "learning_rate": 0.0002608688809707588, "loss": 0.2036, "step": 352170 }, { "epoch": 14.59, "grad_norm": 0.77734375, "learning_rate": 0.00026085804601993275, "loss": 0.1684, "step": 352180 }, { "epoch": 14.59, "grad_norm": 0.458984375, "learning_rate": 0.00026084721104867314, "loss": 0.1965, "step": 352190 }, { "epoch": 14.59, "grad_norm": 0.625, "learning_rate": 0.00026083637605700023, "loss": 0.1592, "step": 352200 }, { "epoch": 14.59, "grad_norm": 0.65625, "learning_rate": 0.0002608255410449344, "loss": 0.1678, "step": 352210 }, { "epoch": 14.59, "grad_norm": 0.6328125, "learning_rate": 0.0002608147060124962, "loss": 0.2023, "step": 352220 }, { "epoch": 14.59, "grad_norm": 0.97265625, "learning_rate": 0.0002608038709597057, "loss": 0.1826, "step": 352230 }, { "epoch": 14.59, "grad_norm": 1.4453125, "learning_rate": 0.0002607930358865836, "loss": 0.1924, "step": 352240 }, { "epoch": 14.59, "grad_norm": 0.7734375, "learning_rate": 0.0002607822007931501, "loss": 0.2187, "step": 352250 }, { "epoch": 14.59, "grad_norm": 0.6796875, "learning_rate": 0.00026077136567942573, "loss": 0.1844, "step": 352260 }, { "epoch": 14.59, "grad_norm": 0.8203125, "learning_rate": 0.0002607605305454308, "loss": 0.2212, "step": 352270 }, { "epoch": 14.59, "grad_norm": 0.96875, "learning_rate": 0.0002607496953911857, "loss": 0.2115, "step": 352280 }, { "epoch": 14.59, "grad_norm": 0.6171875, "learning_rate": 0.00026073886021671085, "loss": 0.2173, "step": 352290 }, { "epoch": 14.59, "grad_norm": 1.0625, "learning_rate": 0.00026072802502202666, "loss": 0.1769, "step": 352300 }, { "epoch": 14.59, "grad_norm": 1.6640625, "learning_rate": 0.0002607171898071534, "loss": 0.1835, "step": 352310 }, { "epoch": 14.59, "grad_norm": 0.447265625, "learning_rate": 0.00026070635457211156, "loss": 0.2092, "step": 352320 }, { "epoch": 14.59, "grad_norm": 1.375, "learning_rate": 0.0002606955193169215, "loss": 0.1903, "step": 352330 }, { "epoch": 14.59, "grad_norm": 0.51953125, "learning_rate": 0.0002606846840416037, "loss": 0.1646, "step": 352340 }, { "epoch": 14.59, "grad_norm": 1.3125, "learning_rate": 0.0002606738487461785, "loss": 0.1937, "step": 352350 }, { "epoch": 14.59, "grad_norm": 0.498046875, "learning_rate": 0.00026066301343066626, "loss": 0.1979, "step": 352360 }, { "epoch": 14.6, "grad_norm": 1.71875, "learning_rate": 0.0002606521780950873, "loss": 0.2371, "step": 352370 }, { "epoch": 14.6, "grad_norm": 0.3359375, "learning_rate": 0.0002606413427394622, "loss": 0.1923, "step": 352380 }, { "epoch": 14.6, "grad_norm": 0.71484375, "learning_rate": 0.00026063050736381104, "loss": 0.1569, "step": 352390 }, { "epoch": 14.6, "grad_norm": 0.921875, "learning_rate": 0.0002606196719681546, "loss": 0.182, "step": 352400 }, { "epoch": 14.6, "grad_norm": 0.578125, "learning_rate": 0.000260608836552513, "loss": 0.1589, "step": 352410 }, { "epoch": 14.6, "grad_norm": 0.9375, "learning_rate": 0.0002605980011169067, "loss": 0.2016, "step": 352420 }, { "epoch": 14.6, "grad_norm": 1.203125, "learning_rate": 0.0002605871656613562, "loss": 0.2259, "step": 352430 }, { "epoch": 14.6, "grad_norm": 0.29296875, "learning_rate": 0.0002605763301858818, "loss": 0.1744, "step": 352440 }, { "epoch": 14.6, "grad_norm": 1.078125, "learning_rate": 0.00026056549469050377, "loss": 0.186, "step": 352450 }, { "epoch": 14.6, "grad_norm": 0.98828125, "learning_rate": 0.0002605546591752427, "loss": 0.1962, "step": 352460 }, { "epoch": 14.6, "grad_norm": 0.79296875, "learning_rate": 0.00026054382364011885, "loss": 0.1453, "step": 352470 }, { "epoch": 14.6, "grad_norm": 0.142578125, "learning_rate": 0.00026053298808515277, "loss": 0.186, "step": 352480 }, { "epoch": 14.6, "grad_norm": 0.69921875, "learning_rate": 0.00026052215251036464, "loss": 0.1995, "step": 352490 }, { "epoch": 14.6, "grad_norm": 0.94140625, "learning_rate": 0.000260511316915775, "loss": 0.1515, "step": 352500 }, { "epoch": 14.6, "grad_norm": 1.1328125, "learning_rate": 0.0002605004813014042, "loss": 0.1853, "step": 352510 }, { "epoch": 14.6, "grad_norm": 0.9765625, "learning_rate": 0.0002604896456672726, "loss": 0.1485, "step": 352520 }, { "epoch": 14.6, "grad_norm": 0.83984375, "learning_rate": 0.0002604788100134006, "loss": 0.1974, "step": 352530 }, { "epoch": 14.6, "grad_norm": 1.0703125, "learning_rate": 0.00026046797433980866, "loss": 0.2045, "step": 352540 }, { "epoch": 14.6, "grad_norm": 0.5625, "learning_rate": 0.0002604571386465171, "loss": 0.1939, "step": 352550 }, { "epoch": 14.6, "grad_norm": 0.53125, "learning_rate": 0.00026044630293354636, "loss": 0.1223, "step": 352560 }, { "epoch": 14.6, "grad_norm": 1.7421875, "learning_rate": 0.0002604354672009168, "loss": 0.1839, "step": 352570 }, { "epoch": 14.6, "grad_norm": 0.7578125, "learning_rate": 0.0002604246314486487, "loss": 0.2306, "step": 352580 }, { "epoch": 14.6, "grad_norm": 0.08544921875, "learning_rate": 0.00026041379567676284, "loss": 0.1431, "step": 352590 }, { "epoch": 14.6, "grad_norm": 0.78125, "learning_rate": 0.00026040295988527914, "loss": 0.238, "step": 352600 }, { "epoch": 14.61, "grad_norm": 0.59375, "learning_rate": 0.0002603921240742182, "loss": 0.1559, "step": 352610 }, { "epoch": 14.61, "grad_norm": 0.68359375, "learning_rate": 0.0002603812882436004, "loss": 0.1869, "step": 352620 }, { "epoch": 14.61, "grad_norm": 0.84765625, "learning_rate": 0.00026037045239344617, "loss": 0.1925, "step": 352630 }, { "epoch": 14.61, "grad_norm": 2.15625, "learning_rate": 0.0002603596165237759, "loss": 0.211, "step": 352640 }, { "epoch": 14.61, "grad_norm": 0.486328125, "learning_rate": 0.00026034878063460994, "loss": 0.1924, "step": 352650 }, { "epoch": 14.61, "grad_norm": 0.447265625, "learning_rate": 0.0002603379447259686, "loss": 0.1503, "step": 352660 }, { "epoch": 14.61, "grad_norm": 0.30859375, "learning_rate": 0.0002603271087978725, "loss": 0.1858, "step": 352670 }, { "epoch": 14.61, "grad_norm": 0.6328125, "learning_rate": 0.00026031627285034184, "loss": 0.2145, "step": 352680 }, { "epoch": 14.61, "grad_norm": 0.64453125, "learning_rate": 0.00026030543688339704, "loss": 0.1517, "step": 352690 }, { "epoch": 14.61, "grad_norm": 0.482421875, "learning_rate": 0.0002602946008970585, "loss": 0.1907, "step": 352700 }, { "epoch": 14.61, "grad_norm": 1.484375, "learning_rate": 0.0002602837648913467, "loss": 0.2118, "step": 352710 }, { "epoch": 14.61, "grad_norm": 0.71875, "learning_rate": 0.00026027292886628194, "loss": 0.1909, "step": 352720 }, { "epoch": 14.61, "grad_norm": 2.03125, "learning_rate": 0.00026026209282188466, "loss": 0.2196, "step": 352730 }, { "epoch": 14.61, "grad_norm": 0.671875, "learning_rate": 0.0002602512567581752, "loss": 0.1924, "step": 352740 }, { "epoch": 14.61, "grad_norm": 0.90234375, "learning_rate": 0.00026024042067517396, "loss": 0.1708, "step": 352750 }, { "epoch": 14.61, "grad_norm": 1.015625, "learning_rate": 0.00026022958457290143, "loss": 0.2376, "step": 352760 }, { "epoch": 14.61, "grad_norm": 0.66015625, "learning_rate": 0.0002602187484513779, "loss": 0.2245, "step": 352770 }, { "epoch": 14.61, "grad_norm": 2.078125, "learning_rate": 0.00026020791231062373, "loss": 0.2392, "step": 352780 }, { "epoch": 14.61, "grad_norm": 0.91015625, "learning_rate": 0.0002601970761506594, "loss": 0.1826, "step": 352790 }, { "epoch": 14.61, "grad_norm": 0.8359375, "learning_rate": 0.00026018623997150526, "loss": 0.2184, "step": 352800 }, { "epoch": 14.61, "grad_norm": 0.8125, "learning_rate": 0.0002601754037731818, "loss": 0.2422, "step": 352810 }, { "epoch": 14.61, "grad_norm": 0.76953125, "learning_rate": 0.0002601645675557092, "loss": 0.1932, "step": 352820 }, { "epoch": 14.61, "grad_norm": 0.88671875, "learning_rate": 0.0002601537313191081, "loss": 0.1347, "step": 352830 }, { "epoch": 14.61, "grad_norm": 0.57421875, "learning_rate": 0.0002601428950633987, "loss": 0.1918, "step": 352840 }, { "epoch": 14.62, "grad_norm": 0.66796875, "learning_rate": 0.00026013205878860153, "loss": 0.2005, "step": 352850 }, { "epoch": 14.62, "grad_norm": 2.125, "learning_rate": 0.00026012122249473687, "loss": 0.1692, "step": 352860 }, { "epoch": 14.62, "grad_norm": 0.30078125, "learning_rate": 0.00026011038618182515, "loss": 0.15, "step": 352870 }, { "epoch": 14.62, "grad_norm": 0.283203125, "learning_rate": 0.0002600995498498868, "loss": 0.2104, "step": 352880 }, { "epoch": 14.62, "grad_norm": 0.322265625, "learning_rate": 0.0002600887134989422, "loss": 0.1967, "step": 352890 }, { "epoch": 14.62, "grad_norm": 1.890625, "learning_rate": 0.0002600778771290117, "loss": 0.221, "step": 352900 }, { "epoch": 14.62, "grad_norm": 0.7109375, "learning_rate": 0.0002600670407401157, "loss": 0.1915, "step": 352910 }, { "epoch": 14.62, "grad_norm": 0.51953125, "learning_rate": 0.0002600562043322747, "loss": 0.1744, "step": 352920 }, { "epoch": 14.62, "grad_norm": 1.6171875, "learning_rate": 0.000260045367905509, "loss": 0.1805, "step": 352930 }, { "epoch": 14.62, "grad_norm": 1.1171875, "learning_rate": 0.0002600345314598389, "loss": 0.1774, "step": 352940 }, { "epoch": 14.62, "grad_norm": 0.921875, "learning_rate": 0.000260023694995285, "loss": 0.1747, "step": 352950 }, { "epoch": 14.62, "grad_norm": 1.234375, "learning_rate": 0.0002600128585118675, "loss": 0.1473, "step": 352960 }, { "epoch": 14.62, "grad_norm": 1.5, "learning_rate": 0.000260002022009607, "loss": 0.196, "step": 352970 }, { "epoch": 14.62, "grad_norm": 0.67578125, "learning_rate": 0.0002599911854885237, "loss": 0.1422, "step": 352980 }, { "epoch": 14.62, "grad_norm": 1.1328125, "learning_rate": 0.00025998034894863804, "loss": 0.2203, "step": 352990 }, { "epoch": 14.62, "grad_norm": 1.34375, "learning_rate": 0.00025996951238997054, "loss": 0.1507, "step": 353000 }, { "epoch": 14.62, "grad_norm": 0.98828125, "learning_rate": 0.00025995867581254137, "loss": 0.1628, "step": 353010 }, { "epoch": 14.62, "grad_norm": 0.88671875, "learning_rate": 0.0002599478392163711, "loss": 0.2017, "step": 353020 }, { "epoch": 14.62, "grad_norm": 0.66796875, "learning_rate": 0.00025993700260148014, "loss": 0.1945, "step": 353030 }, { "epoch": 14.62, "grad_norm": 0.7109375, "learning_rate": 0.0002599261659678887, "loss": 0.1633, "step": 353040 }, { "epoch": 14.62, "grad_norm": 0.890625, "learning_rate": 0.0002599153293156174, "loss": 0.2256, "step": 353050 }, { "epoch": 14.62, "grad_norm": 0.78125, "learning_rate": 0.0002599044926446864, "loss": 0.1904, "step": 353060 }, { "epoch": 14.62, "grad_norm": 0.90234375, "learning_rate": 0.00025989365595511626, "loss": 0.2221, "step": 353070 }, { "epoch": 14.62, "grad_norm": 0.455078125, "learning_rate": 0.0002598828192469274, "loss": 0.1475, "step": 353080 }, { "epoch": 14.62, "grad_norm": 0.62890625, "learning_rate": 0.00025987198252014007, "loss": 0.2341, "step": 353090 }, { "epoch": 14.63, "grad_norm": 0.53125, "learning_rate": 0.00025986114577477475, "loss": 0.1884, "step": 353100 }, { "epoch": 14.63, "grad_norm": 0.375, "learning_rate": 0.00025985030901085185, "loss": 0.1825, "step": 353110 }, { "epoch": 14.63, "grad_norm": 0.7578125, "learning_rate": 0.00025983947222839163, "loss": 0.1543, "step": 353120 }, { "epoch": 14.63, "grad_norm": 0.25, "learning_rate": 0.00025982863542741477, "loss": 0.1981, "step": 353130 }, { "epoch": 14.63, "grad_norm": 0.80859375, "learning_rate": 0.0002598177986079413, "loss": 0.2066, "step": 353140 }, { "epoch": 14.63, "grad_norm": 1.0546875, "learning_rate": 0.00025980696176999184, "loss": 0.1929, "step": 353150 }, { "epoch": 14.63, "grad_norm": 0.75390625, "learning_rate": 0.0002597961249135868, "loss": 0.1649, "step": 353160 }, { "epoch": 14.63, "grad_norm": 1.5078125, "learning_rate": 0.0002597852880387464, "loss": 0.1802, "step": 353170 }, { "epoch": 14.63, "grad_norm": 0.609375, "learning_rate": 0.00025977445114549124, "loss": 0.1986, "step": 353180 }, { "epoch": 14.63, "grad_norm": 0.796875, "learning_rate": 0.0002597636142338416, "loss": 0.207, "step": 353190 }, { "epoch": 14.63, "grad_norm": 1.28125, "learning_rate": 0.0002597527773038179, "loss": 0.1912, "step": 353200 }, { "epoch": 14.63, "grad_norm": 0.953125, "learning_rate": 0.0002597419403554405, "loss": 0.1842, "step": 353210 }, { "epoch": 14.63, "grad_norm": 1.140625, "learning_rate": 0.0002597311033887298, "loss": 0.1647, "step": 353220 }, { "epoch": 14.63, "grad_norm": 0.90234375, "learning_rate": 0.0002597202664037063, "loss": 0.1271, "step": 353230 }, { "epoch": 14.63, "grad_norm": 1.6171875, "learning_rate": 0.00025970942940039025, "loss": 0.1473, "step": 353240 }, { "epoch": 14.63, "grad_norm": 0.71484375, "learning_rate": 0.00025969859237880204, "loss": 0.1554, "step": 353250 }, { "epoch": 14.63, "grad_norm": 0.60546875, "learning_rate": 0.00025968775533896226, "loss": 0.2205, "step": 353260 }, { "epoch": 14.63, "grad_norm": 0.92578125, "learning_rate": 0.0002596769182808911, "loss": 0.1546, "step": 353270 }, { "epoch": 14.63, "grad_norm": 0.326171875, "learning_rate": 0.000259666081204609, "loss": 0.2154, "step": 353280 }, { "epoch": 14.63, "grad_norm": 0.7890625, "learning_rate": 0.0002596552441101365, "loss": 0.2427, "step": 353290 }, { "epoch": 14.63, "grad_norm": 0.6484375, "learning_rate": 0.0002596444069974937, "loss": 0.2023, "step": 353300 }, { "epoch": 14.63, "grad_norm": 0.5625, "learning_rate": 0.0002596335698667013, "loss": 0.2119, "step": 353310 }, { "epoch": 14.63, "grad_norm": 0.65625, "learning_rate": 0.0002596227327177795, "loss": 0.1866, "step": 353320 }, { "epoch": 14.63, "grad_norm": 0.8125, "learning_rate": 0.0002596118955507487, "loss": 0.2093, "step": 353330 }, { "epoch": 14.64, "grad_norm": 0.6171875, "learning_rate": 0.0002596010583656295, "loss": 0.2068, "step": 353340 }, { "epoch": 14.64, "grad_norm": 0.640625, "learning_rate": 0.000259590221162442, "loss": 0.2094, "step": 353350 }, { "epoch": 14.64, "grad_norm": 1.3671875, "learning_rate": 0.0002595793839412068, "loss": 0.2048, "step": 353360 }, { "epoch": 14.64, "grad_norm": 2.296875, "learning_rate": 0.0002595685467019443, "loss": 0.2088, "step": 353370 }, { "epoch": 14.64, "grad_norm": 0.359375, "learning_rate": 0.00025955770944467466, "loss": 0.1798, "step": 353380 }, { "epoch": 14.64, "grad_norm": 0.85546875, "learning_rate": 0.0002595468721694186, "loss": 0.1997, "step": 353390 }, { "epoch": 14.64, "grad_norm": 0.8203125, "learning_rate": 0.0002595360348761963, "loss": 0.228, "step": 353400 }, { "epoch": 14.64, "grad_norm": 1.1484375, "learning_rate": 0.00025952519756502817, "loss": 0.1951, "step": 353410 }, { "epoch": 14.64, "grad_norm": 1.359375, "learning_rate": 0.0002595143602359348, "loss": 0.1812, "step": 353420 }, { "epoch": 14.64, "grad_norm": 1.5546875, "learning_rate": 0.00025950352288893626, "loss": 0.1865, "step": 353430 }, { "epoch": 14.64, "grad_norm": 1.359375, "learning_rate": 0.0002594926855240532, "loss": 0.1939, "step": 353440 }, { "epoch": 14.64, "grad_norm": 0.7734375, "learning_rate": 0.00025948184814130593, "loss": 0.2105, "step": 353450 }, { "epoch": 14.64, "grad_norm": 0.29296875, "learning_rate": 0.0002594710107407148, "loss": 0.2079, "step": 353460 }, { "epoch": 14.64, "grad_norm": 1.03125, "learning_rate": 0.00025946017332230024, "loss": 0.1846, "step": 353470 }, { "epoch": 14.64, "grad_norm": 3.0, "learning_rate": 0.0002594493358860827, "loss": 0.1958, "step": 353480 }, { "epoch": 14.64, "grad_norm": 1.4453125, "learning_rate": 0.0002594384984320825, "loss": 0.1697, "step": 353490 }, { "epoch": 14.64, "grad_norm": 0.9140625, "learning_rate": 0.0002594276609603202, "loss": 0.1711, "step": 353500 }, { "epoch": 14.64, "grad_norm": 1.3515625, "learning_rate": 0.0002594168234708159, "loss": 0.19, "step": 353510 }, { "epoch": 14.64, "grad_norm": 1.25, "learning_rate": 0.00025940598596359016, "loss": 0.2038, "step": 353520 }, { "epoch": 14.64, "grad_norm": 0.76953125, "learning_rate": 0.0002593951484386635, "loss": 0.2236, "step": 353530 }, { "epoch": 14.64, "grad_norm": 1.4453125, "learning_rate": 0.000259384310896056, "loss": 0.1884, "step": 353540 }, { "epoch": 14.64, "grad_norm": 0.62109375, "learning_rate": 0.00025937347333578834, "loss": 0.1624, "step": 353550 }, { "epoch": 14.64, "grad_norm": 0.80078125, "learning_rate": 0.0002593626357578808, "loss": 0.1742, "step": 353560 }, { "epoch": 14.64, "grad_norm": 0.427734375, "learning_rate": 0.0002593517981623538, "loss": 0.1833, "step": 353570 }, { "epoch": 14.65, "grad_norm": 0.6328125, "learning_rate": 0.00025934096054922777, "loss": 0.2079, "step": 353580 }, { "epoch": 14.65, "grad_norm": 0.328125, "learning_rate": 0.000259330122918523, "loss": 0.2386, "step": 353590 }, { "epoch": 14.65, "grad_norm": 0.58984375, "learning_rate": 0.00025931928527026, "loss": 0.19, "step": 353600 }, { "epoch": 14.65, "grad_norm": 1.109375, "learning_rate": 0.00025930844760445917, "loss": 0.2575, "step": 353610 }, { "epoch": 14.65, "grad_norm": 1.421875, "learning_rate": 0.00025929760992114063, "loss": 0.1748, "step": 353620 }, { "epoch": 14.65, "grad_norm": 0.7421875, "learning_rate": 0.00025928677222032513, "loss": 0.1136, "step": 353630 }, { "epoch": 14.65, "grad_norm": 1.1328125, "learning_rate": 0.00025927593450203294, "loss": 0.1691, "step": 353640 }, { "epoch": 14.65, "grad_norm": 0.64453125, "learning_rate": 0.0002592650967662844, "loss": 0.181, "step": 353650 }, { "epoch": 14.65, "grad_norm": 0.76953125, "learning_rate": 0.0002592542590131, "loss": 0.1217, "step": 353660 }, { "epoch": 14.65, "grad_norm": 1.75, "learning_rate": 0.00025924342124250007, "loss": 0.18, "step": 353670 }, { "epoch": 14.65, "grad_norm": 1.9296875, "learning_rate": 0.00025923258345450495, "loss": 0.1764, "step": 353680 }, { "epoch": 14.65, "grad_norm": 0.51953125, "learning_rate": 0.00025922174564913523, "loss": 0.2126, "step": 353690 }, { "epoch": 14.65, "grad_norm": 0.83984375, "learning_rate": 0.00025921090782641106, "loss": 0.2198, "step": 353700 }, { "epoch": 14.65, "grad_norm": 1.2734375, "learning_rate": 0.00025920006998635304, "loss": 0.2386, "step": 353710 }, { "epoch": 14.65, "grad_norm": 0.84765625, "learning_rate": 0.00025918923212898146, "loss": 0.1665, "step": 353720 }, { "epoch": 14.65, "grad_norm": 1.296875, "learning_rate": 0.0002591783942543167, "loss": 0.2432, "step": 353730 }, { "epoch": 14.65, "grad_norm": 0.4140625, "learning_rate": 0.0002591675563623792, "loss": 0.1744, "step": 353740 }, { "epoch": 14.65, "grad_norm": 0.7109375, "learning_rate": 0.0002591567184531894, "loss": 0.2351, "step": 353750 }, { "epoch": 14.65, "grad_norm": 0.44140625, "learning_rate": 0.00025914588052676756, "loss": 0.1838, "step": 353760 }, { "epoch": 14.65, "grad_norm": 1.25, "learning_rate": 0.0002591350425831342, "loss": 0.1805, "step": 353770 }, { "epoch": 14.65, "grad_norm": 0.6484375, "learning_rate": 0.00025912420462230966, "loss": 0.1959, "step": 353780 }, { "epoch": 14.65, "grad_norm": 0.8125, "learning_rate": 0.00025911336664431446, "loss": 0.2443, "step": 353790 }, { "epoch": 14.65, "grad_norm": 0.25, "learning_rate": 0.00025910252864916875, "loss": 0.1921, "step": 353800 }, { "epoch": 14.65, "grad_norm": 0.765625, "learning_rate": 0.00025909169063689314, "loss": 0.1922, "step": 353810 }, { "epoch": 14.66, "grad_norm": 0.8359375, "learning_rate": 0.00025908085260750794, "loss": 0.2135, "step": 353820 }, { "epoch": 14.66, "grad_norm": 0.77734375, "learning_rate": 0.00025907001456103356, "loss": 0.2282, "step": 353830 }, { "epoch": 14.66, "grad_norm": 0.87890625, "learning_rate": 0.00025905917649749036, "loss": 0.1829, "step": 353840 }, { "epoch": 14.66, "grad_norm": 1.609375, "learning_rate": 0.0002590483384168988, "loss": 0.1711, "step": 353850 }, { "epoch": 14.66, "grad_norm": 0.8515625, "learning_rate": 0.0002590375003192792, "loss": 0.1719, "step": 353860 }, { "epoch": 14.66, "grad_norm": 1.359375, "learning_rate": 0.00025902666220465204, "loss": 0.2097, "step": 353870 }, { "epoch": 14.66, "grad_norm": 0.609375, "learning_rate": 0.0002590158240730377, "loss": 0.224, "step": 353880 }, { "epoch": 14.66, "grad_norm": 0.71875, "learning_rate": 0.00025900498592445646, "loss": 0.1774, "step": 353890 }, { "epoch": 14.66, "grad_norm": 0.78515625, "learning_rate": 0.0002589941477589289, "loss": 0.1981, "step": 353900 }, { "epoch": 14.66, "grad_norm": 1.4296875, "learning_rate": 0.00025898330957647525, "loss": 0.1868, "step": 353910 }, { "epoch": 14.66, "grad_norm": 1.1796875, "learning_rate": 0.0002589724713771161, "loss": 0.2038, "step": 353920 }, { "epoch": 14.66, "grad_norm": 0.84765625, "learning_rate": 0.00025896163316087165, "loss": 0.1623, "step": 353930 }, { "epoch": 14.66, "grad_norm": 1.9296875, "learning_rate": 0.0002589507949277624, "loss": 0.1867, "step": 353940 }, { "epoch": 14.66, "grad_norm": 0.66796875, "learning_rate": 0.00025893995667780866, "loss": 0.1808, "step": 353950 }, { "epoch": 14.66, "grad_norm": 0.84765625, "learning_rate": 0.00025892911841103095, "loss": 0.1873, "step": 353960 }, { "epoch": 14.66, "grad_norm": 0.7265625, "learning_rate": 0.0002589182801274496, "loss": 0.1835, "step": 353970 }, { "epoch": 14.66, "grad_norm": 0.63671875, "learning_rate": 0.0002589074418270849, "loss": 0.1903, "step": 353980 }, { "epoch": 14.66, "grad_norm": 0.7109375, "learning_rate": 0.0002588966035099575, "loss": 0.2415, "step": 353990 }, { "epoch": 14.66, "grad_norm": 0.49609375, "learning_rate": 0.00025888576517608757, "loss": 0.195, "step": 354000 }, { "epoch": 14.66, "grad_norm": 0.9921875, "learning_rate": 0.0002588749268254956, "loss": 0.2198, "step": 354010 }, { "epoch": 14.66, "grad_norm": 1.171875, "learning_rate": 0.000258864088458202, "loss": 0.1932, "step": 354020 }, { "epoch": 14.66, "grad_norm": 0.9375, "learning_rate": 0.00025885325007422714, "loss": 0.223, "step": 354030 }, { "epoch": 14.66, "grad_norm": 1.1484375, "learning_rate": 0.00025884241167359144, "loss": 0.2108, "step": 354040 }, { "epoch": 14.66, "grad_norm": 0.91015625, "learning_rate": 0.00025883157325631525, "loss": 0.1754, "step": 354050 }, { "epoch": 14.67, "grad_norm": 0.458984375, "learning_rate": 0.00025882073482241894, "loss": 0.1838, "step": 354060 }, { "epoch": 14.67, "grad_norm": 1.171875, "learning_rate": 0.000258809896371923, "loss": 0.2107, "step": 354070 }, { "epoch": 14.67, "grad_norm": 0.59765625, "learning_rate": 0.00025879905790484784, "loss": 0.199, "step": 354080 }, { "epoch": 14.67, "grad_norm": 0.66796875, "learning_rate": 0.00025878821942121376, "loss": 0.184, "step": 354090 }, { "epoch": 14.67, "grad_norm": 0.8515625, "learning_rate": 0.00025877738092104126, "loss": 0.243, "step": 354100 }, { "epoch": 14.67, "grad_norm": 1.046875, "learning_rate": 0.0002587665424043506, "loss": 0.1879, "step": 354110 }, { "epoch": 14.67, "grad_norm": 0.470703125, "learning_rate": 0.0002587557038711623, "loss": 0.1866, "step": 354120 }, { "epoch": 14.67, "grad_norm": 0.75390625, "learning_rate": 0.0002587448653214967, "loss": 0.1756, "step": 354130 }, { "epoch": 14.67, "grad_norm": 0.796875, "learning_rate": 0.00025873402675537417, "loss": 0.1866, "step": 354140 }, { "epoch": 14.67, "grad_norm": 1.015625, "learning_rate": 0.00025872318817281515, "loss": 0.1977, "step": 354150 }, { "epoch": 14.67, "grad_norm": 1.5, "learning_rate": 0.00025871234957384006, "loss": 0.181, "step": 354160 }, { "epoch": 14.67, "grad_norm": 0.482421875, "learning_rate": 0.0002587015109584693, "loss": 0.2148, "step": 354170 }, { "epoch": 14.67, "grad_norm": 0.8828125, "learning_rate": 0.00025869067232672316, "loss": 0.2445, "step": 354180 }, { "epoch": 14.67, "grad_norm": 0.62109375, "learning_rate": 0.0002586798336786221, "loss": 0.1668, "step": 354190 }, { "epoch": 14.67, "grad_norm": 1.0703125, "learning_rate": 0.0002586689950141866, "loss": 0.2198, "step": 354200 }, { "epoch": 14.67, "grad_norm": 1.3515625, "learning_rate": 0.00025865815633343696, "loss": 0.1662, "step": 354210 }, { "epoch": 14.67, "grad_norm": 0.48046875, "learning_rate": 0.0002586473176363937, "loss": 0.2519, "step": 354220 }, { "epoch": 14.67, "grad_norm": 2.203125, "learning_rate": 0.00025863647892307695, "loss": 0.1457, "step": 354230 }, { "epoch": 14.67, "grad_norm": 0.74609375, "learning_rate": 0.00025862564019350743, "loss": 0.2073, "step": 354240 }, { "epoch": 14.67, "grad_norm": 0.7265625, "learning_rate": 0.0002586148014477053, "loss": 0.1504, "step": 354250 }, { "epoch": 14.67, "grad_norm": 0.95703125, "learning_rate": 0.00025860396268569103, "loss": 0.2127, "step": 354260 }, { "epoch": 14.67, "grad_norm": 0.953125, "learning_rate": 0.00025859312390748507, "loss": 0.1889, "step": 354270 }, { "epoch": 14.67, "grad_norm": 0.578125, "learning_rate": 0.00025858228511310775, "loss": 0.2231, "step": 354280 }, { "epoch": 14.67, "grad_norm": 1.0078125, "learning_rate": 0.00025857144630257956, "loss": 0.202, "step": 354290 }, { "epoch": 14.68, "grad_norm": 1.234375, "learning_rate": 0.00025856060747592077, "loss": 0.167, "step": 354300 }, { "epoch": 14.68, "grad_norm": 1.2734375, "learning_rate": 0.00025854976863315187, "loss": 0.2032, "step": 354310 }, { "epoch": 14.68, "grad_norm": 0.8046875, "learning_rate": 0.0002585389297742932, "loss": 0.2021, "step": 354320 }, { "epoch": 14.68, "grad_norm": 0.6875, "learning_rate": 0.00025852809089936524, "loss": 0.1827, "step": 354330 }, { "epoch": 14.68, "grad_norm": 0.5859375, "learning_rate": 0.00025851725200838827, "loss": 0.212, "step": 354340 }, { "epoch": 14.68, "grad_norm": 0.7265625, "learning_rate": 0.0002585064131013828, "loss": 0.1755, "step": 354350 }, { "epoch": 14.68, "grad_norm": 0.4921875, "learning_rate": 0.0002584955741783692, "loss": 0.2142, "step": 354360 }, { "epoch": 14.68, "grad_norm": 0.56640625, "learning_rate": 0.00025848473523936783, "loss": 0.1579, "step": 354370 }, { "epoch": 14.68, "grad_norm": 0.330078125, "learning_rate": 0.000258473896284399, "loss": 0.1769, "step": 354380 }, { "epoch": 14.68, "grad_norm": 0.7734375, "learning_rate": 0.00025846305731348334, "loss": 0.2063, "step": 354390 }, { "epoch": 14.68, "grad_norm": 1.625, "learning_rate": 0.00025845221832664105, "loss": 0.1719, "step": 354400 }, { "epoch": 14.68, "grad_norm": 0.59375, "learning_rate": 0.00025844137932389265, "loss": 0.2001, "step": 354410 }, { "epoch": 14.68, "grad_norm": 0.83203125, "learning_rate": 0.00025843054030525846, "loss": 0.1984, "step": 354420 }, { "epoch": 14.68, "grad_norm": 1.0625, "learning_rate": 0.00025841970127075893, "loss": 0.1401, "step": 354430 }, { "epoch": 14.68, "grad_norm": 1.3125, "learning_rate": 0.00025840886222041443, "loss": 0.2055, "step": 354440 }, { "epoch": 14.68, "grad_norm": 0.70703125, "learning_rate": 0.0002583980231542453, "loss": 0.1836, "step": 354450 }, { "epoch": 14.68, "grad_norm": 0.75, "learning_rate": 0.000258387184072272, "loss": 0.1846, "step": 354460 }, { "epoch": 14.68, "grad_norm": 1.71875, "learning_rate": 0.000258376344974515, "loss": 0.2234, "step": 354470 }, { "epoch": 14.68, "grad_norm": 0.91796875, "learning_rate": 0.00025836550586099457, "loss": 0.2019, "step": 354480 }, { "epoch": 14.68, "grad_norm": 1.1328125, "learning_rate": 0.0002583546667317312, "loss": 0.1602, "step": 354490 }, { "epoch": 14.68, "grad_norm": 1.421875, "learning_rate": 0.0002583438275867452, "loss": 0.1759, "step": 354500 }, { "epoch": 14.68, "grad_norm": 0.78125, "learning_rate": 0.000258332988426057, "loss": 0.2226, "step": 354510 }, { "epoch": 14.68, "grad_norm": 1.0625, "learning_rate": 0.0002583221492496871, "loss": 0.1574, "step": 354520 }, { "epoch": 14.68, "grad_norm": 1.546875, "learning_rate": 0.0002583113100576557, "loss": 0.1507, "step": 354530 }, { "epoch": 14.69, "grad_norm": 0.7890625, "learning_rate": 0.0002583004708499835, "loss": 0.1325, "step": 354540 }, { "epoch": 14.69, "grad_norm": 1.6640625, "learning_rate": 0.00025828963162669055, "loss": 0.2431, "step": 354550 }, { "epoch": 14.69, "grad_norm": 1.0546875, "learning_rate": 0.00025827879238779744, "loss": 0.2179, "step": 354560 }, { "epoch": 14.69, "grad_norm": 1.125, "learning_rate": 0.00025826795313332456, "loss": 0.1586, "step": 354570 }, { "epoch": 14.69, "grad_norm": 1.296875, "learning_rate": 0.00025825711386329224, "loss": 0.1886, "step": 354580 }, { "epoch": 14.69, "grad_norm": 1.25, "learning_rate": 0.000258246274577721, "loss": 0.1987, "step": 354590 }, { "epoch": 14.69, "grad_norm": 0.73046875, "learning_rate": 0.00025823543527663114, "loss": 0.2128, "step": 354600 }, { "epoch": 14.69, "grad_norm": 0.89453125, "learning_rate": 0.000258224595960043, "loss": 0.2085, "step": 354610 }, { "epoch": 14.69, "grad_norm": 0.22265625, "learning_rate": 0.00025821375662797715, "loss": 0.1625, "step": 354620 }, { "epoch": 14.69, "grad_norm": 0.796875, "learning_rate": 0.00025820291728045387, "loss": 0.2284, "step": 354630 }, { "epoch": 14.69, "grad_norm": 0.77734375, "learning_rate": 0.00025819207791749355, "loss": 0.1763, "step": 354640 }, { "epoch": 14.69, "grad_norm": 0.78515625, "learning_rate": 0.0002581812385391167, "loss": 0.1744, "step": 354650 }, { "epoch": 14.69, "grad_norm": 2.1875, "learning_rate": 0.00025817039914534357, "loss": 0.1888, "step": 354660 }, { "epoch": 14.69, "grad_norm": 0.875, "learning_rate": 0.00025815955973619467, "loss": 0.1755, "step": 354670 }, { "epoch": 14.69, "grad_norm": 0.62890625, "learning_rate": 0.0002581487203116903, "loss": 0.2088, "step": 354680 }, { "epoch": 14.69, "grad_norm": 1.6484375, "learning_rate": 0.00025813788087185096, "loss": 0.1792, "step": 354690 }, { "epoch": 14.69, "grad_norm": 0.8984375, "learning_rate": 0.0002581270414166971, "loss": 0.1969, "step": 354700 }, { "epoch": 14.69, "grad_norm": 0.56640625, "learning_rate": 0.00025811620194624886, "loss": 0.1363, "step": 354710 }, { "epoch": 14.69, "grad_norm": 1.21875, "learning_rate": 0.0002581053624605269, "loss": 0.2043, "step": 354720 }, { "epoch": 14.69, "grad_norm": 2.046875, "learning_rate": 0.00025809452295955153, "loss": 0.2109, "step": 354730 }, { "epoch": 14.69, "grad_norm": 0.59375, "learning_rate": 0.00025808368344334303, "loss": 0.207, "step": 354740 }, { "epoch": 14.69, "grad_norm": 0.578125, "learning_rate": 0.00025807284391192204, "loss": 0.2406, "step": 354750 }, { "epoch": 14.69, "grad_norm": 1.609375, "learning_rate": 0.0002580620043653089, "loss": 0.2062, "step": 354760 }, { "epoch": 14.69, "grad_norm": 0.890625, "learning_rate": 0.0002580511648035237, "loss": 0.1855, "step": 354770 }, { "epoch": 14.69, "grad_norm": 0.51953125, "learning_rate": 0.00025804032522658727, "loss": 0.208, "step": 354780 }, { "epoch": 14.7, "grad_norm": 0.93359375, "learning_rate": 0.00025802948563451975, "loss": 0.1782, "step": 354790 }, { "epoch": 14.7, "grad_norm": 0.58203125, "learning_rate": 0.00025801864602734153, "loss": 0.2383, "step": 354800 }, { "epoch": 14.7, "grad_norm": 1.3828125, "learning_rate": 0.00025800780640507324, "loss": 0.262, "step": 354810 }, { "epoch": 14.7, "grad_norm": 0.90625, "learning_rate": 0.0002579969667677349, "loss": 0.1941, "step": 354820 }, { "epoch": 14.7, "grad_norm": 0.3203125, "learning_rate": 0.00025798612711534736, "loss": 0.2401, "step": 354830 }, { "epoch": 14.7, "grad_norm": 0.56640625, "learning_rate": 0.00025797528744793077, "loss": 0.1773, "step": 354840 }, { "epoch": 14.7, "grad_norm": 0.416015625, "learning_rate": 0.0002579644477655054, "loss": 0.1911, "step": 354850 }, { "epoch": 14.7, "grad_norm": 0.3046875, "learning_rate": 0.0002579536080680919, "loss": 0.238, "step": 354860 }, { "epoch": 14.7, "grad_norm": 1.046875, "learning_rate": 0.0002579427683557106, "loss": 0.2087, "step": 354870 }, { "epoch": 14.7, "grad_norm": 0.6875, "learning_rate": 0.0002579319286283818, "loss": 0.2146, "step": 354880 }, { "epoch": 14.7, "grad_norm": 0.419921875, "learning_rate": 0.00025792108888612604, "loss": 0.1896, "step": 354890 }, { "epoch": 14.7, "grad_norm": 0.6328125, "learning_rate": 0.0002579102491289635, "loss": 0.1895, "step": 354900 }, { "epoch": 14.7, "grad_norm": 0.91796875, "learning_rate": 0.00025789940935691487, "loss": 0.1872, "step": 354910 }, { "epoch": 14.7, "grad_norm": 1.2578125, "learning_rate": 0.0002578885695700004, "loss": 0.1999, "step": 354920 }, { "epoch": 14.7, "grad_norm": 0.5234375, "learning_rate": 0.00025787772976824036, "loss": 0.1812, "step": 354930 }, { "epoch": 14.7, "grad_norm": 1.046875, "learning_rate": 0.00025786688995165543, "loss": 0.2509, "step": 354940 }, { "epoch": 14.7, "grad_norm": 0.8359375, "learning_rate": 0.00025785605012026573, "loss": 0.1884, "step": 354950 }, { "epoch": 14.7, "grad_norm": 2.03125, "learning_rate": 0.0002578452102740919, "loss": 0.1966, "step": 354960 }, { "epoch": 14.7, "grad_norm": 0.4375, "learning_rate": 0.0002578343704131542, "loss": 0.189, "step": 354970 }, { "epoch": 14.7, "grad_norm": 0.00010251998901367188, "learning_rate": 0.000257823530537473, "loss": 0.1689, "step": 354980 }, { "epoch": 14.7, "grad_norm": 0.953125, "learning_rate": 0.00025781269064706884, "loss": 0.1951, "step": 354990 }, { "epoch": 14.7, "grad_norm": 0.5703125, "learning_rate": 0.0002578018507419621, "loss": 0.1883, "step": 355000 }, { "epoch": 14.7, "grad_norm": 0.98828125, "learning_rate": 0.00025779101082217296, "loss": 0.1853, "step": 355010 }, { "epoch": 14.7, "grad_norm": 0.88671875, "learning_rate": 0.0002577801708877221, "loss": 0.1823, "step": 355020 }, { "epoch": 14.71, "grad_norm": 0.7890625, "learning_rate": 0.0002577693309386298, "loss": 0.2109, "step": 355030 }, { "epoch": 14.71, "grad_norm": 1.0390625, "learning_rate": 0.0002577584909749164, "loss": 0.2161, "step": 355040 }, { "epoch": 14.71, "grad_norm": 0.90625, "learning_rate": 0.00025774765099660234, "loss": 0.1574, "step": 355050 }, { "epoch": 14.71, "grad_norm": 1.2421875, "learning_rate": 0.00025773681100370804, "loss": 0.2094, "step": 355060 }, { "epoch": 14.71, "grad_norm": 0.5234375, "learning_rate": 0.00025772597099625393, "loss": 0.119, "step": 355070 }, { "epoch": 14.71, "grad_norm": 0.9453125, "learning_rate": 0.0002577151309742604, "loss": 0.1668, "step": 355080 }, { "epoch": 14.71, "grad_norm": 1.4609375, "learning_rate": 0.0002577042909377478, "loss": 0.1291, "step": 355090 }, { "epoch": 14.71, "grad_norm": 1.1875, "learning_rate": 0.00025769345088673656, "loss": 0.1906, "step": 355100 }, { "epoch": 14.71, "grad_norm": 0.5859375, "learning_rate": 0.00025768261082124706, "loss": 0.2169, "step": 355110 }, { "epoch": 14.71, "grad_norm": 0.92578125, "learning_rate": 0.00025767177074129973, "loss": 0.1624, "step": 355120 }, { "epoch": 14.71, "grad_norm": 0.44921875, "learning_rate": 0.000257660930646915, "loss": 0.2091, "step": 355130 }, { "epoch": 14.71, "grad_norm": 0.5078125, "learning_rate": 0.0002576500905381131, "loss": 0.1501, "step": 355140 }, { "epoch": 14.71, "grad_norm": 0.59375, "learning_rate": 0.0002576392504149147, "loss": 0.1573, "step": 355150 }, { "epoch": 14.71, "grad_norm": 0.53515625, "learning_rate": 0.00025762841027733997, "loss": 0.1715, "step": 355160 }, { "epoch": 14.71, "grad_norm": 0.490234375, "learning_rate": 0.00025761757012540936, "loss": 0.2834, "step": 355170 }, { "epoch": 14.71, "grad_norm": 0.5234375, "learning_rate": 0.00025760672995914344, "loss": 0.135, "step": 355180 }, { "epoch": 14.71, "grad_norm": 0.33203125, "learning_rate": 0.0002575958897785624, "loss": 0.1344, "step": 355190 }, { "epoch": 14.71, "grad_norm": 0.85546875, "learning_rate": 0.00025758504958368667, "loss": 0.2055, "step": 355200 }, { "epoch": 14.71, "grad_norm": 1.1875, "learning_rate": 0.0002575742093745368, "loss": 0.1911, "step": 355210 }, { "epoch": 14.71, "grad_norm": 0.76171875, "learning_rate": 0.00025756336915113293, "loss": 0.2006, "step": 355220 }, { "epoch": 14.71, "grad_norm": 1.2734375, "learning_rate": 0.00025755252891349577, "loss": 0.1389, "step": 355230 }, { "epoch": 14.71, "grad_norm": 0.51171875, "learning_rate": 0.0002575416886616455, "loss": 0.1678, "step": 355240 }, { "epoch": 14.71, "grad_norm": 1.453125, "learning_rate": 0.0002575308483956026, "loss": 0.2103, "step": 355250 }, { "epoch": 14.71, "grad_norm": 0.84375, "learning_rate": 0.0002575200081153875, "loss": 0.1872, "step": 355260 }, { "epoch": 14.72, "grad_norm": 0.5390625, "learning_rate": 0.0002575091678210205, "loss": 0.1545, "step": 355270 }, { "epoch": 14.72, "grad_norm": 0.7421875, "learning_rate": 0.000257498327512522, "loss": 0.1667, "step": 355280 }, { "epoch": 14.72, "grad_norm": 0.96484375, "learning_rate": 0.00025748748718991253, "loss": 0.2064, "step": 355290 }, { "epoch": 14.72, "grad_norm": 1.2734375, "learning_rate": 0.0002574766468532124, "loss": 0.2391, "step": 355300 }, { "epoch": 14.72, "grad_norm": 0.63671875, "learning_rate": 0.00025746580650244205, "loss": 0.2058, "step": 355310 }, { "epoch": 14.72, "grad_norm": 0.008544921875, "learning_rate": 0.0002574549661376219, "loss": 0.1798, "step": 355320 }, { "epoch": 14.72, "grad_norm": 0.55078125, "learning_rate": 0.00025744412575877217, "loss": 0.2045, "step": 355330 }, { "epoch": 14.72, "grad_norm": 1.0, "learning_rate": 0.0002574332853659135, "loss": 0.183, "step": 355340 }, { "epoch": 14.72, "grad_norm": 0.4609375, "learning_rate": 0.00025742244495906627, "loss": 0.1722, "step": 355350 }, { "epoch": 14.72, "grad_norm": 1.203125, "learning_rate": 0.00025741160453825066, "loss": 0.1747, "step": 355360 }, { "epoch": 14.72, "grad_norm": 2.0625, "learning_rate": 0.0002574007641034872, "loss": 0.1972, "step": 355370 }, { "epoch": 14.72, "grad_norm": 0.275390625, "learning_rate": 0.0002573899236547964, "loss": 0.1575, "step": 355380 }, { "epoch": 14.72, "grad_norm": 0.95703125, "learning_rate": 0.00025737908319219854, "loss": 0.1947, "step": 355390 }, { "epoch": 14.72, "grad_norm": 0.7421875, "learning_rate": 0.000257368242715714, "loss": 0.1635, "step": 355400 }, { "epoch": 14.72, "grad_norm": 0.578125, "learning_rate": 0.00025735740222536326, "loss": 0.1688, "step": 355410 }, { "epoch": 14.72, "grad_norm": 0.96484375, "learning_rate": 0.00025734656172116664, "loss": 0.2422, "step": 355420 }, { "epoch": 14.72, "grad_norm": 1.2109375, "learning_rate": 0.00025733572120314465, "loss": 0.1944, "step": 355430 }, { "epoch": 14.72, "grad_norm": 1.046875, "learning_rate": 0.0002573248806713175, "loss": 0.1903, "step": 355440 }, { "epoch": 14.72, "grad_norm": 0.384765625, "learning_rate": 0.00025731404012570584, "loss": 0.2394, "step": 355450 }, { "epoch": 14.72, "grad_norm": 1.140625, "learning_rate": 0.0002573031995663299, "loss": 0.1732, "step": 355460 }, { "epoch": 14.72, "grad_norm": 0.96875, "learning_rate": 0.00025729235899321016, "loss": 0.2245, "step": 355470 }, { "epoch": 14.72, "grad_norm": 1.0625, "learning_rate": 0.000257281518406367, "loss": 0.1993, "step": 355480 }, { "epoch": 14.72, "grad_norm": 0.40234375, "learning_rate": 0.00025727067780582073, "loss": 0.2031, "step": 355490 }, { "epoch": 14.72, "grad_norm": 1.4296875, "learning_rate": 0.00025725983719159185, "loss": 0.2231, "step": 355500 }, { "epoch": 14.73, "grad_norm": 0.91015625, "learning_rate": 0.0002572489965637008, "loss": 0.2069, "step": 355510 }, { "epoch": 14.73, "grad_norm": 1.5234375, "learning_rate": 0.0002572381559221679, "loss": 0.1984, "step": 355520 }, { "epoch": 14.73, "grad_norm": 0.9140625, "learning_rate": 0.0002572273152670135, "loss": 0.1547, "step": 355530 }, { "epoch": 14.73, "grad_norm": 0.22265625, "learning_rate": 0.00025721647459825815, "loss": 0.2381, "step": 355540 }, { "epoch": 14.73, "grad_norm": 0.65625, "learning_rate": 0.0002572056339159222, "loss": 0.1993, "step": 355550 }, { "epoch": 14.73, "grad_norm": 1.265625, "learning_rate": 0.00025719479322002594, "loss": 0.2338, "step": 355560 }, { "epoch": 14.73, "grad_norm": 0.94140625, "learning_rate": 0.0002571839525105899, "loss": 0.2272, "step": 355570 }, { "epoch": 14.73, "grad_norm": 0.5625, "learning_rate": 0.0002571731117876344, "loss": 0.1927, "step": 355580 }, { "epoch": 14.73, "grad_norm": 1.15625, "learning_rate": 0.00025716227105117994, "loss": 0.2033, "step": 355590 }, { "epoch": 14.73, "grad_norm": 0.60546875, "learning_rate": 0.00025715143030124684, "loss": 0.1668, "step": 355600 }, { "epoch": 14.73, "grad_norm": 0.52734375, "learning_rate": 0.0002571405895378555, "loss": 0.1703, "step": 355610 }, { "epoch": 14.73, "grad_norm": 0.7890625, "learning_rate": 0.0002571297487610263, "loss": 0.2109, "step": 355620 }, { "epoch": 14.73, "grad_norm": 0.85546875, "learning_rate": 0.0002571189079707798, "loss": 0.1758, "step": 355630 }, { "epoch": 14.73, "grad_norm": 1.015625, "learning_rate": 0.0002571080671671362, "loss": 0.1969, "step": 355640 }, { "epoch": 14.73, "grad_norm": 2.171875, "learning_rate": 0.00025709722635011603, "loss": 0.1977, "step": 355650 }, { "epoch": 14.73, "grad_norm": 0.6484375, "learning_rate": 0.00025708638551973957, "loss": 0.2539, "step": 355660 }, { "epoch": 14.73, "grad_norm": 0.369140625, "learning_rate": 0.00025707554467602737, "loss": 0.1757, "step": 355670 }, { "epoch": 14.73, "grad_norm": 0.5703125, "learning_rate": 0.0002570647038189997, "loss": 0.1712, "step": 355680 }, { "epoch": 14.73, "grad_norm": 0.318359375, "learning_rate": 0.00025705386294867707, "loss": 0.182, "step": 355690 }, { "epoch": 14.73, "grad_norm": 1.5625, "learning_rate": 0.0002570430220650798, "loss": 0.1656, "step": 355700 }, { "epoch": 14.73, "grad_norm": 1.28125, "learning_rate": 0.00025703218116822834, "loss": 0.1806, "step": 355710 }, { "epoch": 14.73, "grad_norm": 0.60546875, "learning_rate": 0.00025702134025814313, "loss": 0.2238, "step": 355720 }, { "epoch": 14.73, "grad_norm": 0.89453125, "learning_rate": 0.0002570104993348444, "loss": 0.1695, "step": 355730 }, { "epoch": 14.73, "grad_norm": 0.5625, "learning_rate": 0.00025699965839835276, "loss": 0.2032, "step": 355740 }, { "epoch": 14.74, "grad_norm": 0.390625, "learning_rate": 0.00025698881744868853, "loss": 0.1664, "step": 355750 }, { "epoch": 14.74, "grad_norm": 1.3515625, "learning_rate": 0.000256977976485872, "loss": 0.1471, "step": 355760 }, { "epoch": 14.74, "grad_norm": 0.67578125, "learning_rate": 0.00025696713550992374, "loss": 0.1971, "step": 355770 }, { "epoch": 14.74, "grad_norm": 1.875, "learning_rate": 0.00025695629452086405, "loss": 0.1904, "step": 355780 }, { "epoch": 14.74, "grad_norm": 0.486328125, "learning_rate": 0.0002569454535187134, "loss": 0.225, "step": 355790 }, { "epoch": 14.74, "grad_norm": 0.765625, "learning_rate": 0.00025693461250349216, "loss": 0.2718, "step": 355800 }, { "epoch": 14.74, "grad_norm": 1.03125, "learning_rate": 0.0002569237714752207, "loss": 0.2055, "step": 355810 }, { "epoch": 14.74, "grad_norm": 0.96484375, "learning_rate": 0.00025691293043391945, "loss": 0.1739, "step": 355820 }, { "epoch": 14.74, "grad_norm": 2.0, "learning_rate": 0.0002569020893796088, "loss": 0.2021, "step": 355830 }, { "epoch": 14.74, "grad_norm": 2.0, "learning_rate": 0.0002568912483123092, "loss": 0.209, "step": 355840 }, { "epoch": 14.74, "grad_norm": 0.53515625, "learning_rate": 0.000256880407232041, "loss": 0.1658, "step": 355850 }, { "epoch": 14.74, "grad_norm": 1.4453125, "learning_rate": 0.00025686956613882463, "loss": 0.15, "step": 355860 }, { "epoch": 14.74, "grad_norm": 1.203125, "learning_rate": 0.0002568587250326805, "loss": 0.2236, "step": 355870 }, { "epoch": 14.74, "grad_norm": 1.703125, "learning_rate": 0.00025684788391362897, "loss": 0.166, "step": 355880 }, { "epoch": 14.74, "grad_norm": 0.859375, "learning_rate": 0.0002568370427816904, "loss": 0.1796, "step": 355890 }, { "epoch": 14.74, "grad_norm": 0.7109375, "learning_rate": 0.0002568262016368853, "loss": 0.146, "step": 355900 }, { "epoch": 14.74, "grad_norm": 0.68359375, "learning_rate": 0.00025681536047923406, "loss": 0.1677, "step": 355910 }, { "epoch": 14.74, "grad_norm": 0.55859375, "learning_rate": 0.000256804519308757, "loss": 0.1777, "step": 355920 }, { "epoch": 14.74, "grad_norm": 0.546875, "learning_rate": 0.0002567936781254746, "loss": 0.169, "step": 355930 }, { "epoch": 14.74, "grad_norm": 0.6484375, "learning_rate": 0.0002567828369294072, "loss": 0.2335, "step": 355940 }, { "epoch": 14.74, "grad_norm": 0.8359375, "learning_rate": 0.00025677199572057526, "loss": 0.1496, "step": 355950 }, { "epoch": 14.74, "grad_norm": 0.60546875, "learning_rate": 0.0002567611544989992, "loss": 0.2058, "step": 355960 }, { "epoch": 14.74, "grad_norm": 1.53125, "learning_rate": 0.00025675031326469926, "loss": 0.2247, "step": 355970 }, { "epoch": 14.74, "grad_norm": 0.79296875, "learning_rate": 0.00025673947201769606, "loss": 0.1725, "step": 355980 }, { "epoch": 14.75, "grad_norm": 0.427734375, "learning_rate": 0.0002567286307580098, "loss": 0.1899, "step": 355990 }, { "epoch": 14.75, "grad_norm": 0.48828125, "learning_rate": 0.00025671778948566104, "loss": 0.1709, "step": 356000 }, { "epoch": 14.75, "grad_norm": 2.703125, "learning_rate": 0.0002567069482006702, "loss": 0.2108, "step": 356010 }, { "epoch": 14.75, "grad_norm": 0.94140625, "learning_rate": 0.00025669610690305753, "loss": 0.1832, "step": 356020 }, { "epoch": 14.75, "grad_norm": 2.3125, "learning_rate": 0.0002566852655928435, "loss": 0.1904, "step": 356030 }, { "epoch": 14.75, "grad_norm": 0.3671875, "learning_rate": 0.00025667442427004854, "loss": 0.1735, "step": 356040 }, { "epoch": 14.75, "grad_norm": 0.63671875, "learning_rate": 0.000256663582934693, "loss": 0.1965, "step": 356050 }, { "epoch": 14.75, "grad_norm": 1.2578125, "learning_rate": 0.0002566527415867974, "loss": 0.1779, "step": 356060 }, { "epoch": 14.75, "grad_norm": 1.3359375, "learning_rate": 0.000256641900226382, "loss": 0.2187, "step": 356070 }, { "epoch": 14.75, "grad_norm": 0.69140625, "learning_rate": 0.0002566310588534673, "loss": 0.2028, "step": 356080 }, { "epoch": 14.75, "grad_norm": 0.244140625, "learning_rate": 0.0002566202174680736, "loss": 0.164, "step": 356090 }, { "epoch": 14.75, "grad_norm": 0.5703125, "learning_rate": 0.0002566093760702214, "loss": 0.1655, "step": 356100 }, { "epoch": 14.75, "grad_norm": 0.53125, "learning_rate": 0.00025659853465993105, "loss": 0.2397, "step": 356110 }, { "epoch": 14.75, "grad_norm": 1.546875, "learning_rate": 0.000256587693237223, "loss": 0.2136, "step": 356120 }, { "epoch": 14.75, "grad_norm": 1.3046875, "learning_rate": 0.0002565768518021176, "loss": 0.1973, "step": 356130 }, { "epoch": 14.75, "grad_norm": 1.203125, "learning_rate": 0.00025656601035463534, "loss": 0.1522, "step": 356140 }, { "epoch": 14.75, "grad_norm": 0.734375, "learning_rate": 0.0002565551688947965, "loss": 0.1961, "step": 356150 }, { "epoch": 14.75, "grad_norm": 0.65234375, "learning_rate": 0.0002565443274226215, "loss": 0.2191, "step": 356160 }, { "epoch": 14.75, "grad_norm": 2.453125, "learning_rate": 0.00025653348593813086, "loss": 0.1947, "step": 356170 }, { "epoch": 14.75, "grad_norm": 0.7890625, "learning_rate": 0.0002565226444413449, "loss": 0.1953, "step": 356180 }, { "epoch": 14.75, "grad_norm": 0.64453125, "learning_rate": 0.000256511802932284, "loss": 0.1899, "step": 356190 }, { "epoch": 14.75, "grad_norm": 0.484375, "learning_rate": 0.00025650096141096863, "loss": 0.1669, "step": 356200 }, { "epoch": 14.75, "grad_norm": 0.91796875, "learning_rate": 0.000256490119877419, "loss": 0.1935, "step": 356210 }, { "epoch": 14.75, "grad_norm": 0.314453125, "learning_rate": 0.00025647927833165584, "loss": 0.1898, "step": 356220 }, { "epoch": 14.76, "grad_norm": 0.486328125, "learning_rate": 0.0002564684367736993, "loss": 0.2043, "step": 356230 }, { "epoch": 14.76, "grad_norm": 0.89453125, "learning_rate": 0.0002564575952035699, "loss": 0.2151, "step": 356240 }, { "epoch": 14.76, "grad_norm": 1.59375, "learning_rate": 0.000256446753621288, "loss": 0.1763, "step": 356250 }, { "epoch": 14.76, "grad_norm": 2.484375, "learning_rate": 0.00025643591202687395, "loss": 0.1819, "step": 356260 }, { "epoch": 14.76, "grad_norm": 1.2578125, "learning_rate": 0.0002564250704203482, "loss": 0.2079, "step": 356270 }, { "epoch": 14.76, "grad_norm": 0.59765625, "learning_rate": 0.0002564142288017313, "loss": 0.2174, "step": 356280 }, { "epoch": 14.76, "grad_norm": 0.85546875, "learning_rate": 0.0002564033871710433, "loss": 0.1827, "step": 356290 }, { "epoch": 14.76, "grad_norm": 2.140625, "learning_rate": 0.000256392545528305, "loss": 0.1949, "step": 356300 }, { "epoch": 14.76, "grad_norm": 0.90234375, "learning_rate": 0.0002563817038735365, "loss": 0.2229, "step": 356310 }, { "epoch": 14.76, "grad_norm": 0.62890625, "learning_rate": 0.0002563708622067584, "loss": 0.2014, "step": 356320 }, { "epoch": 14.76, "grad_norm": 1.0859375, "learning_rate": 0.00025636002052799106, "loss": 0.2272, "step": 356330 }, { "epoch": 14.76, "grad_norm": 0.76171875, "learning_rate": 0.00025634917883725475, "loss": 0.1939, "step": 356340 }, { "epoch": 14.76, "grad_norm": 0.56640625, "learning_rate": 0.00025633833713457004, "loss": 0.2386, "step": 356350 }, { "epoch": 14.76, "grad_norm": 0.75390625, "learning_rate": 0.0002563274954199573, "loss": 0.2035, "step": 356360 }, { "epoch": 14.76, "grad_norm": 1.5078125, "learning_rate": 0.00025631665369343676, "loss": 0.1817, "step": 356370 }, { "epoch": 14.76, "grad_norm": 1.0703125, "learning_rate": 0.00025630581195502906, "loss": 0.1957, "step": 356380 }, { "epoch": 14.76, "grad_norm": 0.404296875, "learning_rate": 0.00025629497020475444, "loss": 0.186, "step": 356390 }, { "epoch": 14.76, "grad_norm": 3.453125, "learning_rate": 0.00025628412844263344, "loss": 0.1756, "step": 356400 }, { "epoch": 14.76, "grad_norm": 2.109375, "learning_rate": 0.00025627328666868635, "loss": 0.2081, "step": 356410 }, { "epoch": 14.76, "grad_norm": 0.98046875, "learning_rate": 0.0002562624448829336, "loss": 0.1434, "step": 356420 }, { "epoch": 14.76, "grad_norm": 1.0546875, "learning_rate": 0.0002562516030853956, "loss": 0.1596, "step": 356430 }, { "epoch": 14.76, "grad_norm": 0.71484375, "learning_rate": 0.00025624076127609286, "loss": 0.2222, "step": 356440 }, { "epoch": 14.76, "grad_norm": 1.0859375, "learning_rate": 0.00025622991945504554, "loss": 0.221, "step": 356450 }, { "epoch": 14.76, "grad_norm": 0.376953125, "learning_rate": 0.00025621907762227424, "loss": 0.1673, "step": 356460 }, { "epoch": 14.76, "grad_norm": 0.7109375, "learning_rate": 0.0002562082357777993, "loss": 0.2016, "step": 356470 }, { "epoch": 14.77, "grad_norm": 0.46875, "learning_rate": 0.00025619739392164113, "loss": 0.1618, "step": 356480 }, { "epoch": 14.77, "grad_norm": 0.69921875, "learning_rate": 0.0002561865520538202, "loss": 0.186, "step": 356490 }, { "epoch": 14.77, "grad_norm": 0.24609375, "learning_rate": 0.00025617571017435675, "loss": 0.2468, "step": 356500 }, { "epoch": 14.77, "grad_norm": 0.83203125, "learning_rate": 0.00025616486828327137, "loss": 0.1639, "step": 356510 }, { "epoch": 14.77, "grad_norm": 1.2265625, "learning_rate": 0.0002561540263805843, "loss": 0.1726, "step": 356520 }, { "epoch": 14.77, "grad_norm": 3.34375, "learning_rate": 0.000256143184466316, "loss": 0.1908, "step": 356530 }, { "epoch": 14.77, "grad_norm": 1.3125, "learning_rate": 0.00025613234254048703, "loss": 0.2306, "step": 356540 }, { "epoch": 14.77, "grad_norm": 0.98828125, "learning_rate": 0.0002561215006031175, "loss": 0.2137, "step": 356550 }, { "epoch": 14.77, "grad_norm": 0.53515625, "learning_rate": 0.000256110658654228, "loss": 0.2349, "step": 356560 }, { "epoch": 14.77, "grad_norm": 0.6796875, "learning_rate": 0.000256099816693839, "loss": 0.1516, "step": 356570 }, { "epoch": 14.77, "grad_norm": 1.234375, "learning_rate": 0.00025608897472197064, "loss": 0.2106, "step": 356580 }, { "epoch": 14.77, "grad_norm": 1.1015625, "learning_rate": 0.0002560781327386436, "loss": 0.2073, "step": 356590 }, { "epoch": 14.77, "grad_norm": 0.94140625, "learning_rate": 0.00025606729074387815, "loss": 0.1946, "step": 356600 }, { "epoch": 14.77, "grad_norm": 1.015625, "learning_rate": 0.0002560564487376947, "loss": 0.2431, "step": 356610 }, { "epoch": 14.77, "grad_norm": 0.49609375, "learning_rate": 0.00025604560672011376, "loss": 0.1599, "step": 356620 }, { "epoch": 14.77, "grad_norm": 1.65625, "learning_rate": 0.0002560347646911555, "loss": 0.1811, "step": 356630 }, { "epoch": 14.77, "grad_norm": 0.515625, "learning_rate": 0.0002560239226508405, "loss": 0.1752, "step": 356640 }, { "epoch": 14.77, "grad_norm": 1.6171875, "learning_rate": 0.0002560130805991892, "loss": 0.2709, "step": 356650 }, { "epoch": 14.77, "grad_norm": 1.0234375, "learning_rate": 0.00025600223853622183, "loss": 0.169, "step": 356660 }, { "epoch": 14.77, "grad_norm": 0.6328125, "learning_rate": 0.000255991396461959, "loss": 0.1607, "step": 356670 }, { "epoch": 14.77, "grad_norm": 1.03125, "learning_rate": 0.0002559805543764209, "loss": 0.2227, "step": 356680 }, { "epoch": 14.77, "grad_norm": 0.78125, "learning_rate": 0.0002559697122796281, "loss": 0.1868, "step": 356690 }, { "epoch": 14.77, "grad_norm": 0.84375, "learning_rate": 0.00025595887017160104, "loss": 0.1661, "step": 356700 }, { "epoch": 14.77, "grad_norm": 0.6953125, "learning_rate": 0.0002559480280523599, "loss": 0.2021, "step": 356710 }, { "epoch": 14.78, "grad_norm": 0.91015625, "learning_rate": 0.00025593718592192523, "loss": 0.1325, "step": 356720 }, { "epoch": 14.78, "grad_norm": 0.421875, "learning_rate": 0.0002559263437803175, "loss": 0.2048, "step": 356730 }, { "epoch": 14.78, "grad_norm": 0.91796875, "learning_rate": 0.0002559155016275569, "loss": 0.1959, "step": 356740 }, { "epoch": 14.78, "grad_norm": 2.625, "learning_rate": 0.000255904659463664, "loss": 0.2094, "step": 356750 }, { "epoch": 14.78, "grad_norm": 0.7421875, "learning_rate": 0.00025589381728865926, "loss": 0.1875, "step": 356760 }, { "epoch": 14.78, "grad_norm": 1.5078125, "learning_rate": 0.00025588297510256285, "loss": 0.2144, "step": 356770 }, { "epoch": 14.78, "grad_norm": 0.82421875, "learning_rate": 0.00025587213290539544, "loss": 0.2043, "step": 356780 }, { "epoch": 14.78, "grad_norm": 0.72265625, "learning_rate": 0.0002558612906971773, "loss": 0.1949, "step": 356790 }, { "epoch": 14.78, "grad_norm": 0.52734375, "learning_rate": 0.00025585044847792874, "loss": 0.1878, "step": 356800 }, { "epoch": 14.78, "grad_norm": 2.9375, "learning_rate": 0.0002558396062476704, "loss": 0.1834, "step": 356810 }, { "epoch": 14.78, "grad_norm": 0.66796875, "learning_rate": 0.00025582876400642246, "loss": 0.2032, "step": 356820 }, { "epoch": 14.78, "grad_norm": 0.345703125, "learning_rate": 0.0002558179217542055, "loss": 0.2141, "step": 356830 }, { "epoch": 14.78, "grad_norm": 1.2109375, "learning_rate": 0.00025580707949103977, "loss": 0.1958, "step": 356840 }, { "epoch": 14.78, "grad_norm": 0.96875, "learning_rate": 0.0002557962372169457, "loss": 0.2023, "step": 356850 }, { "epoch": 14.78, "grad_norm": 0.90234375, "learning_rate": 0.0002557853949319439, "loss": 0.2005, "step": 356860 }, { "epoch": 14.78, "grad_norm": 1.3515625, "learning_rate": 0.0002557745526360545, "loss": 0.1502, "step": 356870 }, { "epoch": 14.78, "grad_norm": 0.87890625, "learning_rate": 0.000255763710329298, "loss": 0.2049, "step": 356880 }, { "epoch": 14.78, "grad_norm": 0.9453125, "learning_rate": 0.00025575286801169484, "loss": 0.1726, "step": 356890 }, { "epoch": 14.78, "grad_norm": 0.6015625, "learning_rate": 0.0002557420256832654, "loss": 0.1616, "step": 356900 }, { "epoch": 14.78, "grad_norm": 0.46875, "learning_rate": 0.0002557311833440301, "loss": 0.2082, "step": 356910 }, { "epoch": 14.78, "grad_norm": 1.28125, "learning_rate": 0.0002557203409940093, "loss": 0.1667, "step": 356920 }, { "epoch": 14.78, "grad_norm": 3.03125, "learning_rate": 0.0002557094986332235, "loss": 0.2062, "step": 356930 }, { "epoch": 14.78, "grad_norm": 0.5859375, "learning_rate": 0.00025569865626169305, "loss": 0.142, "step": 356940 }, { "epoch": 14.78, "grad_norm": 0.6875, "learning_rate": 0.00025568781387943823, "loss": 0.2169, "step": 356950 }, { "epoch": 14.79, "grad_norm": 1.015625, "learning_rate": 0.0002556769714864796, "loss": 0.1385, "step": 356960 }, { "epoch": 14.79, "grad_norm": 0.439453125, "learning_rate": 0.0002556661290828376, "loss": 0.2275, "step": 356970 }, { "epoch": 14.79, "grad_norm": 0.78125, "learning_rate": 0.0002556552866685325, "loss": 0.1696, "step": 356980 }, { "epoch": 14.79, "grad_norm": 0.875, "learning_rate": 0.0002556444442435848, "loss": 0.2199, "step": 356990 }, { "epoch": 14.79, "grad_norm": 0.44140625, "learning_rate": 0.00025563360180801484, "loss": 0.1774, "step": 357000 }, { "epoch": 14.79, "grad_norm": 0.44140625, "learning_rate": 0.000255622759361843, "loss": 0.2024, "step": 357010 }, { "epoch": 14.79, "grad_norm": 1.2734375, "learning_rate": 0.00025561191690508977, "loss": 0.186, "step": 357020 }, { "epoch": 14.79, "grad_norm": 1.375, "learning_rate": 0.00025560107443777555, "loss": 0.205, "step": 357030 }, { "epoch": 14.79, "grad_norm": 2.1875, "learning_rate": 0.00025559023195992066, "loss": 0.1886, "step": 357040 }, { "epoch": 14.79, "grad_norm": 0.48828125, "learning_rate": 0.0002555793894715456, "loss": 0.1538, "step": 357050 }, { "epoch": 14.79, "grad_norm": 0.6796875, "learning_rate": 0.0002555685469726707, "loss": 0.1753, "step": 357060 }, { "epoch": 14.79, "grad_norm": 0.703125, "learning_rate": 0.00025555770446331646, "loss": 0.1911, "step": 357070 }, { "epoch": 14.79, "grad_norm": 0.94140625, "learning_rate": 0.0002555468619435031, "loss": 0.178, "step": 357080 }, { "epoch": 14.79, "grad_norm": 0.6015625, "learning_rate": 0.0002555360194132513, "loss": 0.189, "step": 357090 }, { "epoch": 14.79, "grad_norm": 1.5390625, "learning_rate": 0.00025552517687258113, "loss": 0.209, "step": 357100 }, { "epoch": 14.79, "grad_norm": 0.83203125, "learning_rate": 0.0002555143343215133, "loss": 0.1585, "step": 357110 }, { "epoch": 14.79, "grad_norm": 0.6875, "learning_rate": 0.0002555034917600681, "loss": 0.1458, "step": 357120 }, { "epoch": 14.79, "grad_norm": 5.6875, "learning_rate": 0.0002554926491882659, "loss": 0.1951, "step": 357130 }, { "epoch": 14.79, "grad_norm": 0.65625, "learning_rate": 0.00025548180660612705, "loss": 0.1683, "step": 357140 }, { "epoch": 14.79, "grad_norm": 0.70703125, "learning_rate": 0.00025547096401367216, "loss": 0.1863, "step": 357150 }, { "epoch": 14.79, "grad_norm": 0.68359375, "learning_rate": 0.0002554601214109214, "loss": 0.1631, "step": 357160 }, { "epoch": 14.79, "grad_norm": 0.83984375, "learning_rate": 0.0002554492787978953, "loss": 0.1641, "step": 357170 }, { "epoch": 14.79, "grad_norm": 1.2109375, "learning_rate": 0.00025543843617461425, "loss": 0.2125, "step": 357180 }, { "epoch": 14.79, "grad_norm": 1.09375, "learning_rate": 0.0002554275935410987, "loss": 0.2507, "step": 357190 }, { "epoch": 14.8, "grad_norm": 2.203125, "learning_rate": 0.000255416750897369, "loss": 0.2005, "step": 357200 }, { "epoch": 14.8, "grad_norm": 0.5625, "learning_rate": 0.0002554059082434455, "loss": 0.1921, "step": 357210 }, { "epoch": 14.8, "grad_norm": 0.84375, "learning_rate": 0.00025539506557934865, "loss": 0.1873, "step": 357220 }, { "epoch": 14.8, "grad_norm": 0.78515625, "learning_rate": 0.00025538422290509894, "loss": 0.1965, "step": 357230 }, { "epoch": 14.8, "grad_norm": 1.40625, "learning_rate": 0.0002553733802207167, "loss": 0.2496, "step": 357240 }, { "epoch": 14.8, "grad_norm": 0.95703125, "learning_rate": 0.0002553625375262223, "loss": 0.2048, "step": 357250 }, { "epoch": 14.8, "grad_norm": 0.87109375, "learning_rate": 0.0002553516948216362, "loss": 0.1757, "step": 357260 }, { "epoch": 14.8, "grad_norm": 0.7734375, "learning_rate": 0.00025534085210697884, "loss": 0.1963, "step": 357270 }, { "epoch": 14.8, "grad_norm": 0.279296875, "learning_rate": 0.0002553300093822705, "loss": 0.1736, "step": 357280 }, { "epoch": 14.8, "grad_norm": 0.7734375, "learning_rate": 0.0002553191666475317, "loss": 0.1986, "step": 357290 }, { "epoch": 14.8, "grad_norm": 0.375, "learning_rate": 0.00025530832390278273, "loss": 0.1745, "step": 357300 }, { "epoch": 14.8, "grad_norm": 0.390625, "learning_rate": 0.00025529748114804413, "loss": 0.1966, "step": 357310 }, { "epoch": 14.8, "grad_norm": 1.0625, "learning_rate": 0.0002552866383833363, "loss": 0.2016, "step": 357320 }, { "epoch": 14.8, "grad_norm": 1.0, "learning_rate": 0.0002552757956086795, "loss": 0.1569, "step": 357330 }, { "epoch": 14.8, "grad_norm": 1.5703125, "learning_rate": 0.0002552649528240942, "loss": 0.1934, "step": 357340 }, { "epoch": 14.8, "grad_norm": 0.57421875, "learning_rate": 0.00025525411002960094, "loss": 0.1835, "step": 357350 }, { "epoch": 14.8, "grad_norm": 0.33984375, "learning_rate": 0.00025524326722521987, "loss": 0.1364, "step": 357360 }, { "epoch": 14.8, "grad_norm": 0.703125, "learning_rate": 0.00025523242441097163, "loss": 0.1822, "step": 357370 }, { "epoch": 14.8, "grad_norm": 0.7109375, "learning_rate": 0.00025522158158687645, "loss": 0.1839, "step": 357380 }, { "epoch": 14.8, "grad_norm": 1.625, "learning_rate": 0.00025521073875295493, "loss": 0.1591, "step": 357390 }, { "epoch": 14.8, "grad_norm": 0.57421875, "learning_rate": 0.00025519989590922735, "loss": 0.1624, "step": 357400 }, { "epoch": 14.8, "grad_norm": 0.59375, "learning_rate": 0.000255189053055714, "loss": 0.1298, "step": 357410 }, { "epoch": 14.8, "grad_norm": 0.7890625, "learning_rate": 0.0002551782101924355, "loss": 0.1704, "step": 357420 }, { "epoch": 14.8, "grad_norm": 1.0, "learning_rate": 0.00025516736731941224, "loss": 0.1375, "step": 357430 }, { "epoch": 14.81, "grad_norm": 1.6015625, "learning_rate": 0.00025515652443666443, "loss": 0.1806, "step": 357440 }, { "epoch": 14.81, "grad_norm": 0.640625, "learning_rate": 0.0002551456815442127, "loss": 0.1992, "step": 357450 }, { "epoch": 14.81, "grad_norm": 0.796875, "learning_rate": 0.00025513483864207724, "loss": 0.178, "step": 357460 }, { "epoch": 14.81, "grad_norm": 0.34765625, "learning_rate": 0.00025512399573027867, "loss": 0.1822, "step": 357470 }, { "epoch": 14.81, "grad_norm": 1.21875, "learning_rate": 0.0002551131528088373, "loss": 0.2055, "step": 357480 }, { "epoch": 14.81, "grad_norm": 0.921875, "learning_rate": 0.00025510230987777345, "loss": 0.1824, "step": 357490 }, { "epoch": 14.81, "grad_norm": 0.68359375, "learning_rate": 0.0002550914669371076, "loss": 0.1778, "step": 357500 }, { "epoch": 14.81, "grad_norm": 0.87890625, "learning_rate": 0.00025508062398686026, "loss": 0.1857, "step": 357510 }, { "epoch": 14.81, "grad_norm": 0.3359375, "learning_rate": 0.0002550697810270516, "loss": 0.1745, "step": 357520 }, { "epoch": 14.81, "grad_norm": 0.93359375, "learning_rate": 0.0002550589380577023, "loss": 0.1752, "step": 357530 }, { "epoch": 14.81, "grad_norm": 0.94140625, "learning_rate": 0.0002550480950788325, "loss": 0.1708, "step": 357540 }, { "epoch": 14.81, "grad_norm": 0.7734375, "learning_rate": 0.0002550372520904628, "loss": 0.1823, "step": 357550 }, { "epoch": 14.81, "grad_norm": 1.7109375, "learning_rate": 0.00025502640909261357, "loss": 0.2205, "step": 357560 }, { "epoch": 14.81, "grad_norm": 2.09375, "learning_rate": 0.0002550155660853051, "loss": 0.2176, "step": 357570 }, { "epoch": 14.81, "grad_norm": 0.73828125, "learning_rate": 0.00025500472306855786, "loss": 0.1872, "step": 357580 }, { "epoch": 14.81, "grad_norm": 0.609375, "learning_rate": 0.0002549938800423924, "loss": 0.209, "step": 357590 }, { "epoch": 14.81, "grad_norm": 0.4765625, "learning_rate": 0.00025498303700682883, "loss": 0.1327, "step": 357600 }, { "epoch": 14.81, "grad_norm": 1.34375, "learning_rate": 0.00025497219396188785, "loss": 0.2371, "step": 357610 }, { "epoch": 14.81, "grad_norm": 0.0, "learning_rate": 0.0002549613509075897, "loss": 0.1968, "step": 357620 }, { "epoch": 14.81, "grad_norm": 0.515625, "learning_rate": 0.0002549505078439548, "loss": 0.1804, "step": 357630 }, { "epoch": 14.81, "grad_norm": 2.421875, "learning_rate": 0.0002549396647710037, "loss": 0.1502, "step": 357640 }, { "epoch": 14.81, "grad_norm": 0.58203125, "learning_rate": 0.00025492882168875645, "loss": 0.201, "step": 357650 }, { "epoch": 14.81, "grad_norm": 0.85546875, "learning_rate": 0.00025491797859723386, "loss": 0.1757, "step": 357660 }, { "epoch": 14.81, "grad_norm": 0.71484375, "learning_rate": 0.0002549071354964561, "loss": 0.1844, "step": 357670 }, { "epoch": 14.82, "grad_norm": 1.3828125, "learning_rate": 0.0002548962923864437, "loss": 0.2078, "step": 357680 }, { "epoch": 14.82, "grad_norm": 0.57421875, "learning_rate": 0.000254885449267217, "loss": 0.1397, "step": 357690 }, { "epoch": 14.82, "grad_norm": 1.6015625, "learning_rate": 0.00025487460613879637, "loss": 0.161, "step": 357700 }, { "epoch": 14.82, "grad_norm": 1.3359375, "learning_rate": 0.0002548637630012023, "loss": 0.2075, "step": 357710 }, { "epoch": 14.82, "grad_norm": 1.34375, "learning_rate": 0.00025485291985445516, "loss": 0.1777, "step": 357720 }, { "epoch": 14.82, "grad_norm": 0.486328125, "learning_rate": 0.00025484207669857525, "loss": 0.2209, "step": 357730 }, { "epoch": 14.82, "grad_norm": 1.2109375, "learning_rate": 0.00025483123353358316, "loss": 0.138, "step": 357740 }, { "epoch": 14.82, "grad_norm": 1.59375, "learning_rate": 0.0002548203903594992, "loss": 0.1962, "step": 357750 }, { "epoch": 14.82, "grad_norm": 0.49609375, "learning_rate": 0.0002548095471763437, "loss": 0.204, "step": 357760 }, { "epoch": 14.82, "grad_norm": 0.75390625, "learning_rate": 0.00025479870398413735, "loss": 0.1997, "step": 357770 }, { "epoch": 14.82, "grad_norm": 0.765625, "learning_rate": 0.00025478786078290017, "loss": 0.1788, "step": 357780 }, { "epoch": 14.82, "grad_norm": 0.5859375, "learning_rate": 0.00025477701757265284, "loss": 0.208, "step": 357790 }, { "epoch": 14.82, "grad_norm": 0.94140625, "learning_rate": 0.0002547661743534157, "loss": 0.1991, "step": 357800 }, { "epoch": 14.82, "grad_norm": 0.91015625, "learning_rate": 0.000254755331125209, "loss": 0.2407, "step": 357810 }, { "epoch": 14.82, "grad_norm": 0.6015625, "learning_rate": 0.0002547444878880534, "loss": 0.1747, "step": 357820 }, { "epoch": 14.82, "grad_norm": 1.2109375, "learning_rate": 0.0002547336446419692, "loss": 0.1873, "step": 357830 }, { "epoch": 14.82, "grad_norm": 0.326171875, "learning_rate": 0.0002547228013869767, "loss": 0.1824, "step": 357840 }, { "epoch": 14.82, "grad_norm": 0.8125, "learning_rate": 0.0002547119581230965, "loss": 0.2136, "step": 357850 }, { "epoch": 14.82, "grad_norm": 1.03125, "learning_rate": 0.0002547011148503489, "loss": 0.1938, "step": 357860 }, { "epoch": 14.82, "grad_norm": 0.89453125, "learning_rate": 0.0002546902715687542, "loss": 0.2412, "step": 357870 }, { "epoch": 14.82, "grad_norm": 0.66796875, "learning_rate": 0.000254679428278333, "loss": 0.1239, "step": 357880 }, { "epoch": 14.82, "grad_norm": 0.6640625, "learning_rate": 0.00025466858497910557, "loss": 0.1963, "step": 357890 }, { "epoch": 14.82, "grad_norm": 0.81640625, "learning_rate": 0.0002546577416710924, "loss": 0.2009, "step": 357900 }, { "epoch": 14.82, "grad_norm": 1.1796875, "learning_rate": 0.00025464689835431387, "loss": 0.2095, "step": 357910 }, { "epoch": 14.83, "grad_norm": 0.375, "learning_rate": 0.0002546360550287903, "loss": 0.1574, "step": 357920 }, { "epoch": 14.83, "grad_norm": 0.6796875, "learning_rate": 0.00025462521169454234, "loss": 0.2021, "step": 357930 }, { "epoch": 14.83, "grad_norm": 1.7890625, "learning_rate": 0.00025461436835159013, "loss": 0.1816, "step": 357940 }, { "epoch": 14.83, "grad_norm": 0.48046875, "learning_rate": 0.00025460352499995416, "loss": 0.1524, "step": 357950 }, { "epoch": 14.83, "grad_norm": 2.203125, "learning_rate": 0.0002545926816396549, "loss": 0.2689, "step": 357960 }, { "epoch": 14.83, "grad_norm": 0.423828125, "learning_rate": 0.00025458183827071264, "loss": 0.1531, "step": 357970 }, { "epoch": 14.83, "grad_norm": 0.71484375, "learning_rate": 0.0002545709948931479, "loss": 0.1802, "step": 357980 }, { "epoch": 14.83, "grad_norm": 0.76953125, "learning_rate": 0.0002545601515069811, "loss": 0.2532, "step": 357990 }, { "epoch": 14.83, "grad_norm": 1.1015625, "learning_rate": 0.00025454930811223247, "loss": 0.1528, "step": 358000 }, { "epoch": 14.83, "grad_norm": 0.5546875, "learning_rate": 0.00025453846470892266, "loss": 0.1681, "step": 358010 }, { "epoch": 14.83, "grad_norm": 0.458984375, "learning_rate": 0.00025452762129707184, "loss": 0.1394, "step": 358020 }, { "epoch": 14.83, "grad_norm": 0.625, "learning_rate": 0.0002545167778767006, "loss": 0.2256, "step": 358030 }, { "epoch": 14.83, "grad_norm": 0.5234375, "learning_rate": 0.00025450593444782926, "loss": 0.1899, "step": 358040 }, { "epoch": 14.83, "grad_norm": 0.6484375, "learning_rate": 0.0002544950910104781, "loss": 0.1637, "step": 358050 }, { "epoch": 14.83, "grad_norm": 0.54296875, "learning_rate": 0.00025448424756466785, "loss": 0.1764, "step": 358060 }, { "epoch": 14.83, "grad_norm": 0.00066375732421875, "learning_rate": 0.00025447340411041865, "loss": 0.1723, "step": 358070 }, { "epoch": 14.83, "grad_norm": 1.0078125, "learning_rate": 0.000254462560647751, "loss": 0.2168, "step": 358080 }, { "epoch": 14.83, "grad_norm": 0.58203125, "learning_rate": 0.00025445171717668533, "loss": 0.1361, "step": 358090 }, { "epoch": 14.83, "grad_norm": 0.484375, "learning_rate": 0.0002544408736972419, "loss": 0.1922, "step": 358100 }, { "epoch": 14.83, "grad_norm": 0.283203125, "learning_rate": 0.00025443003020944133, "loss": 0.1789, "step": 358110 }, { "epoch": 14.83, "grad_norm": 0.83203125, "learning_rate": 0.00025441918671330387, "loss": 0.1936, "step": 358120 }, { "epoch": 14.83, "grad_norm": 0.376953125, "learning_rate": 0.00025440834320885, "loss": 0.1639, "step": 358130 }, { "epoch": 14.83, "grad_norm": 0.58984375, "learning_rate": 0.00025439749969610016, "loss": 0.1589, "step": 358140 }, { "epoch": 14.83, "grad_norm": 1.2734375, "learning_rate": 0.0002543866561750746, "loss": 0.2012, "step": 358150 }, { "epoch": 14.83, "grad_norm": 1.1015625, "learning_rate": 0.00025437581264579385, "loss": 0.2073, "step": 358160 }, { "epoch": 14.84, "grad_norm": 1.4609375, "learning_rate": 0.0002543649691082783, "loss": 0.1541, "step": 358170 }, { "epoch": 14.84, "grad_norm": 0.73046875, "learning_rate": 0.0002543541255625484, "loss": 0.1305, "step": 358180 }, { "epoch": 14.84, "grad_norm": 0.37890625, "learning_rate": 0.00025434328200862443, "loss": 0.1559, "step": 358190 }, { "epoch": 14.84, "grad_norm": 0.765625, "learning_rate": 0.00025433243844652693, "loss": 0.1656, "step": 358200 }, { "epoch": 14.84, "grad_norm": 0.5078125, "learning_rate": 0.00025432159487627623, "loss": 0.1874, "step": 358210 }, { "epoch": 14.84, "grad_norm": 0.6328125, "learning_rate": 0.00025431075129789276, "loss": 0.1694, "step": 358220 }, { "epoch": 14.84, "grad_norm": 1.046875, "learning_rate": 0.00025429990771139686, "loss": 0.2156, "step": 358230 }, { "epoch": 14.84, "grad_norm": 0.640625, "learning_rate": 0.0002542890641168091, "loss": 0.2276, "step": 358240 }, { "epoch": 14.84, "grad_norm": 1.1640625, "learning_rate": 0.0002542782205141497, "loss": 0.1726, "step": 358250 }, { "epoch": 14.84, "grad_norm": 1.6640625, "learning_rate": 0.00025426737690343915, "loss": 0.1673, "step": 358260 }, { "epoch": 14.84, "grad_norm": 0.72265625, "learning_rate": 0.00025425653328469794, "loss": 0.1183, "step": 358270 }, { "epoch": 14.84, "grad_norm": 0.69140625, "learning_rate": 0.00025424568965794637, "loss": 0.1279, "step": 358280 }, { "epoch": 14.84, "grad_norm": 0.43359375, "learning_rate": 0.00025423484602320477, "loss": 0.1633, "step": 358290 }, { "epoch": 14.84, "grad_norm": 0.91015625, "learning_rate": 0.0002542240023804938, "loss": 0.1553, "step": 358300 }, { "epoch": 14.84, "grad_norm": 0.625, "learning_rate": 0.0002542131587298336, "loss": 0.1792, "step": 358310 }, { "epoch": 14.84, "grad_norm": 1.046875, "learning_rate": 0.00025420231507124473, "loss": 0.2248, "step": 358320 }, { "epoch": 14.84, "grad_norm": 1.015625, "learning_rate": 0.00025419147140474756, "loss": 0.1716, "step": 358330 }, { "epoch": 14.84, "grad_norm": 0.5078125, "learning_rate": 0.0002541806277303625, "loss": 0.1778, "step": 358340 }, { "epoch": 14.84, "grad_norm": 0.95703125, "learning_rate": 0.00025416978404810996, "loss": 0.198, "step": 358350 }, { "epoch": 14.84, "grad_norm": 0.6640625, "learning_rate": 0.0002541589403580103, "loss": 0.1594, "step": 358360 }, { "epoch": 14.84, "grad_norm": 1.0390625, "learning_rate": 0.00025414809666008396, "loss": 0.1609, "step": 358370 }, { "epoch": 14.84, "grad_norm": 0.8046875, "learning_rate": 0.0002541372529543514, "loss": 0.1671, "step": 358380 }, { "epoch": 14.84, "grad_norm": 0.83984375, "learning_rate": 0.000254126409240833, "loss": 0.2066, "step": 358390 }, { "epoch": 14.84, "grad_norm": 0.71484375, "learning_rate": 0.000254115565519549, "loss": 0.2047, "step": 358400 }, { "epoch": 14.85, "grad_norm": 1.25, "learning_rate": 0.00025410472179052004, "loss": 0.2159, "step": 358410 }, { "epoch": 14.85, "grad_norm": 1.25, "learning_rate": 0.00025409387805376646, "loss": 0.1826, "step": 358420 }, { "epoch": 14.85, "grad_norm": 0.90625, "learning_rate": 0.00025408303430930865, "loss": 0.1484, "step": 358430 }, { "epoch": 14.85, "grad_norm": 0.46875, "learning_rate": 0.000254072190557167, "loss": 0.2288, "step": 358440 }, { "epoch": 14.85, "grad_norm": 0.86328125, "learning_rate": 0.00025406134679736186, "loss": 0.1836, "step": 358450 }, { "epoch": 14.85, "grad_norm": 2.1875, "learning_rate": 0.0002540505030299138, "loss": 0.202, "step": 358460 }, { "epoch": 14.85, "grad_norm": 0.88671875, "learning_rate": 0.00025403965925484306, "loss": 0.221, "step": 358470 }, { "epoch": 14.85, "grad_norm": 0.345703125, "learning_rate": 0.0002540288154721701, "loss": 0.2146, "step": 358480 }, { "epoch": 14.85, "grad_norm": 0.98828125, "learning_rate": 0.00025401797168191537, "loss": 0.1904, "step": 358490 }, { "epoch": 14.85, "grad_norm": 0.6796875, "learning_rate": 0.00025400712788409925, "loss": 0.2514, "step": 358500 }, { "epoch": 14.85, "grad_norm": 1.0234375, "learning_rate": 0.0002539962840787422, "loss": 0.1997, "step": 358510 }, { "epoch": 14.85, "grad_norm": 0.248046875, "learning_rate": 0.00025398544026586454, "loss": 0.1807, "step": 358520 }, { "epoch": 14.85, "grad_norm": 0.640625, "learning_rate": 0.0002539745964454867, "loss": 0.2131, "step": 358530 }, { "epoch": 14.85, "grad_norm": 0.609375, "learning_rate": 0.0002539637526176291, "loss": 0.1603, "step": 358540 }, { "epoch": 14.85, "grad_norm": 0.59765625, "learning_rate": 0.0002539529087823122, "loss": 0.1724, "step": 358550 }, { "epoch": 14.85, "grad_norm": 0.64453125, "learning_rate": 0.0002539420649395563, "loss": 0.1844, "step": 358560 }, { "epoch": 14.85, "grad_norm": 1.9921875, "learning_rate": 0.0002539312210893818, "loss": 0.1754, "step": 358570 }, { "epoch": 14.85, "grad_norm": 0.765625, "learning_rate": 0.00025392037723180925, "loss": 0.1999, "step": 358580 }, { "epoch": 14.85, "grad_norm": 0.62890625, "learning_rate": 0.000253909533366859, "loss": 0.224, "step": 358590 }, { "epoch": 14.85, "grad_norm": 0.94921875, "learning_rate": 0.00025389868949455137, "loss": 0.1793, "step": 358600 }, { "epoch": 14.85, "grad_norm": 0.69921875, "learning_rate": 0.0002538878456149069, "loss": 0.2121, "step": 358610 }, { "epoch": 14.85, "grad_norm": 1.7421875, "learning_rate": 0.0002538770017279458, "loss": 0.1516, "step": 358620 }, { "epoch": 14.85, "grad_norm": 1.46875, "learning_rate": 0.0002538661578336887, "loss": 0.1908, "step": 358630 }, { "epoch": 14.85, "grad_norm": 0.7421875, "learning_rate": 0.00025385531393215587, "loss": 0.2066, "step": 358640 }, { "epoch": 14.86, "grad_norm": 0.546875, "learning_rate": 0.00025384447002336775, "loss": 0.1962, "step": 358650 }, { "epoch": 14.86, "grad_norm": 0.4375, "learning_rate": 0.0002538336261073447, "loss": 0.2117, "step": 358660 }, { "epoch": 14.86, "grad_norm": 0.6328125, "learning_rate": 0.00025382278218410737, "loss": 0.1989, "step": 358670 }, { "epoch": 14.86, "grad_norm": 0.78515625, "learning_rate": 0.0002538119382536758, "loss": 0.2051, "step": 358680 }, { "epoch": 14.86, "grad_norm": 0.87890625, "learning_rate": 0.00025380109431607064, "loss": 0.181, "step": 358690 }, { "epoch": 14.86, "grad_norm": 0.58984375, "learning_rate": 0.0002537902503713122, "loss": 0.2245, "step": 358700 }, { "epoch": 14.86, "grad_norm": 0.69140625, "learning_rate": 0.000253779406419421, "loss": 0.2495, "step": 358710 }, { "epoch": 14.86, "grad_norm": 0.63671875, "learning_rate": 0.00025376856246041724, "loss": 0.209, "step": 358720 }, { "epoch": 14.86, "grad_norm": 0.400390625, "learning_rate": 0.0002537577184943215, "loss": 0.1868, "step": 358730 }, { "epoch": 14.86, "grad_norm": 0.486328125, "learning_rate": 0.0002537468745211541, "loss": 0.2076, "step": 358740 }, { "epoch": 14.86, "grad_norm": 0.7734375, "learning_rate": 0.00025373603054093557, "loss": 0.2117, "step": 358750 }, { "epoch": 14.86, "grad_norm": 3.1875, "learning_rate": 0.00025372518655368624, "loss": 0.2006, "step": 358760 }, { "epoch": 14.86, "grad_norm": 1.0234375, "learning_rate": 0.0002537143425594265, "loss": 0.202, "step": 358770 }, { "epoch": 14.86, "grad_norm": 2.609375, "learning_rate": 0.0002537034985581767, "loss": 0.1715, "step": 358780 }, { "epoch": 14.86, "grad_norm": 1.0, "learning_rate": 0.00025369265454995744, "loss": 0.175, "step": 358790 }, { "epoch": 14.86, "grad_norm": 0.8984375, "learning_rate": 0.0002536818105347889, "loss": 0.1898, "step": 358800 }, { "epoch": 14.86, "grad_norm": 0.439453125, "learning_rate": 0.00025367096651269155, "loss": 0.1529, "step": 358810 }, { "epoch": 14.86, "grad_norm": 0.81640625, "learning_rate": 0.0002536601224836859, "loss": 0.1974, "step": 358820 }, { "epoch": 14.86, "grad_norm": 0.78125, "learning_rate": 0.00025364927844779227, "loss": 0.2176, "step": 358830 }, { "epoch": 14.86, "grad_norm": 1.125, "learning_rate": 0.0002536384344050312, "loss": 0.2036, "step": 358840 }, { "epoch": 14.86, "grad_norm": 0.69140625, "learning_rate": 0.0002536275903554229, "loss": 0.1483, "step": 358850 }, { "epoch": 14.86, "grad_norm": 0.36328125, "learning_rate": 0.00025361674629898785, "loss": 0.2166, "step": 358860 }, { "epoch": 14.86, "grad_norm": 1.4921875, "learning_rate": 0.0002536059022357465, "loss": 0.1811, "step": 358870 }, { "epoch": 14.86, "grad_norm": 0.62109375, "learning_rate": 0.0002535950581657192, "loss": 0.1628, "step": 358880 }, { "epoch": 14.87, "grad_norm": 0.671875, "learning_rate": 0.00025358421408892644, "loss": 0.2123, "step": 358890 }, { "epoch": 14.87, "grad_norm": 1.625, "learning_rate": 0.00025357337000538857, "loss": 0.1746, "step": 358900 }, { "epoch": 14.87, "grad_norm": 0.275390625, "learning_rate": 0.00025356252591512594, "loss": 0.1505, "step": 358910 }, { "epoch": 14.87, "grad_norm": 1.453125, "learning_rate": 0.00025355168181815914, "loss": 0.176, "step": 358920 }, { "epoch": 14.87, "grad_norm": 1.328125, "learning_rate": 0.00025354083771450835, "loss": 0.1695, "step": 358930 }, { "epoch": 14.87, "grad_norm": 0.890625, "learning_rate": 0.0002535299936041941, "loss": 0.1771, "step": 358940 }, { "epoch": 14.87, "grad_norm": 0.73828125, "learning_rate": 0.0002535191494872369, "loss": 0.206, "step": 358950 }, { "epoch": 14.87, "grad_norm": 0.345703125, "learning_rate": 0.00025350830536365683, "loss": 0.1936, "step": 358960 }, { "epoch": 14.87, "grad_norm": 0.875, "learning_rate": 0.0002534974612334747, "loss": 0.1785, "step": 358970 }, { "epoch": 14.87, "grad_norm": 0.95703125, "learning_rate": 0.0002534866170967106, "loss": 0.1574, "step": 358980 }, { "epoch": 14.87, "grad_norm": 0.91015625, "learning_rate": 0.0002534757729533851, "loss": 0.2264, "step": 358990 }, { "epoch": 14.87, "grad_norm": 1.765625, "learning_rate": 0.00025346492880351865, "loss": 0.1429, "step": 359000 }, { "epoch": 14.87, "grad_norm": 2.453125, "learning_rate": 0.0002534540846471315, "loss": 0.177, "step": 359010 }, { "epoch": 14.87, "grad_norm": 2.515625, "learning_rate": 0.0002534432404842441, "loss": 0.1626, "step": 359020 }, { "epoch": 14.87, "grad_norm": 0.8046875, "learning_rate": 0.00025343239631487696, "loss": 0.1751, "step": 359030 }, { "epoch": 14.87, "grad_norm": 1.015625, "learning_rate": 0.0002534215521390504, "loss": 0.1586, "step": 359040 }, { "epoch": 14.87, "grad_norm": 1.0, "learning_rate": 0.00025341070795678486, "loss": 0.1668, "step": 359050 }, { "epoch": 14.87, "grad_norm": 0.625, "learning_rate": 0.0002533998637681007, "loss": 0.2804, "step": 359060 }, { "epoch": 14.87, "grad_norm": 0.54296875, "learning_rate": 0.00025338901957301836, "loss": 0.1531, "step": 359070 }, { "epoch": 14.87, "grad_norm": 0.53125, "learning_rate": 0.0002533781753715583, "loss": 0.1799, "step": 359080 }, { "epoch": 14.87, "grad_norm": 1.34375, "learning_rate": 0.00025336733116374085, "loss": 0.2043, "step": 359090 }, { "epoch": 14.87, "grad_norm": 0.7578125, "learning_rate": 0.00025335648694958644, "loss": 0.1864, "step": 359100 }, { "epoch": 14.87, "grad_norm": 0.65625, "learning_rate": 0.00025334564272911557, "loss": 0.1963, "step": 359110 }, { "epoch": 14.87, "grad_norm": 0.48046875, "learning_rate": 0.0002533347985023484, "loss": 0.1924, "step": 359120 }, { "epoch": 14.88, "grad_norm": 1.234375, "learning_rate": 0.0002533239542693057, "loss": 0.1944, "step": 359130 }, { "epoch": 14.88, "grad_norm": 0.94921875, "learning_rate": 0.00025331311003000753, "loss": 0.1704, "step": 359140 }, { "epoch": 14.88, "grad_norm": 1.015625, "learning_rate": 0.00025330226578447446, "loss": 0.1746, "step": 359150 }, { "epoch": 14.88, "grad_norm": 1.2421875, "learning_rate": 0.00025329142153272696, "loss": 0.2239, "step": 359160 }, { "epoch": 14.88, "grad_norm": 0.53125, "learning_rate": 0.0002532805772747852, "loss": 0.2413, "step": 359170 }, { "epoch": 14.88, "grad_norm": 0.5546875, "learning_rate": 0.00025326973301066993, "loss": 0.1697, "step": 359180 }, { "epoch": 14.88, "grad_norm": 0.69921875, "learning_rate": 0.00025325888874040133, "loss": 0.1769, "step": 359190 }, { "epoch": 14.88, "grad_norm": 1.2265625, "learning_rate": 0.00025324804446399973, "loss": 0.1755, "step": 359200 }, { "epoch": 14.88, "grad_norm": 0.67578125, "learning_rate": 0.0002532372001814858, "loss": 0.1935, "step": 359210 }, { "epoch": 14.88, "grad_norm": 0.62890625, "learning_rate": 0.00025322635589287973, "loss": 0.1859, "step": 359220 }, { "epoch": 14.88, "grad_norm": 0.71484375, "learning_rate": 0.00025321551159820206, "loss": 0.1731, "step": 359230 }, { "epoch": 14.88, "grad_norm": 1.0546875, "learning_rate": 0.0002532046672974732, "loss": 0.192, "step": 359240 }, { "epoch": 14.88, "grad_norm": 0.57421875, "learning_rate": 0.0002531938229907133, "loss": 0.1945, "step": 359250 }, { "epoch": 14.88, "grad_norm": 0.8046875, "learning_rate": 0.0002531829786779432, "loss": 0.2061, "step": 359260 }, { "epoch": 14.88, "grad_norm": 1.7265625, "learning_rate": 0.000253172134359183, "loss": 0.2329, "step": 359270 }, { "epoch": 14.88, "grad_norm": 1.34375, "learning_rate": 0.00025316129003445306, "loss": 0.1909, "step": 359280 }, { "epoch": 14.88, "grad_norm": 0.9453125, "learning_rate": 0.00025315044570377407, "loss": 0.1992, "step": 359290 }, { "epoch": 14.88, "grad_norm": 0.625, "learning_rate": 0.0002531396013671662, "loss": 0.2165, "step": 359300 }, { "epoch": 14.88, "grad_norm": 0.640625, "learning_rate": 0.0002531287570246499, "loss": 0.1835, "step": 359310 }, { "epoch": 14.88, "grad_norm": 0.8125, "learning_rate": 0.00025311791267624575, "loss": 0.1744, "step": 359320 }, { "epoch": 14.88, "grad_norm": 0.7890625, "learning_rate": 0.00025310706832197387, "loss": 0.2325, "step": 359330 }, { "epoch": 14.88, "grad_norm": 0.59765625, "learning_rate": 0.00025309622396185494, "loss": 0.1806, "step": 359340 }, { "epoch": 14.88, "grad_norm": 0.671875, "learning_rate": 0.0002530853795959093, "loss": 0.163, "step": 359350 }, { "epoch": 14.88, "grad_norm": 0.87890625, "learning_rate": 0.00025307453522415715, "loss": 0.1625, "step": 359360 }, { "epoch": 14.89, "grad_norm": 0.625, "learning_rate": 0.00025306369084661915, "loss": 0.1433, "step": 359370 }, { "epoch": 14.89, "grad_norm": 0.85546875, "learning_rate": 0.0002530528464633156, "loss": 0.1696, "step": 359380 }, { "epoch": 14.89, "grad_norm": 0.21484375, "learning_rate": 0.0002530420020742669, "loss": 0.1851, "step": 359390 }, { "epoch": 14.89, "grad_norm": 0.90625, "learning_rate": 0.00025303115767949356, "loss": 0.2032, "step": 359400 }, { "epoch": 14.89, "grad_norm": 0.828125, "learning_rate": 0.00025302031327901575, "loss": 0.1747, "step": 359410 }, { "epoch": 14.89, "grad_norm": 0.466796875, "learning_rate": 0.0002530094688728542, "loss": 0.1978, "step": 359420 }, { "epoch": 14.89, "grad_norm": 1.28125, "learning_rate": 0.00025299862446102916, "loss": 0.1921, "step": 359430 }, { "epoch": 14.89, "grad_norm": 1.015625, "learning_rate": 0.00025298778004356087, "loss": 0.2023, "step": 359440 }, { "epoch": 14.89, "grad_norm": 2.109375, "learning_rate": 0.00025297693562047, "loss": 0.1664, "step": 359450 }, { "epoch": 14.89, "grad_norm": 3.125, "learning_rate": 0.00025296609119177687, "loss": 0.1948, "step": 359460 }, { "epoch": 14.89, "grad_norm": 2.4375, "learning_rate": 0.0002529552467575019, "loss": 0.2153, "step": 359470 }, { "epoch": 14.89, "grad_norm": 0.94921875, "learning_rate": 0.0002529444023176654, "loss": 0.2, "step": 359480 }, { "epoch": 14.89, "grad_norm": 1.234375, "learning_rate": 0.00025293355787228786, "loss": 0.1725, "step": 359490 }, { "epoch": 14.89, "grad_norm": 1.171875, "learning_rate": 0.0002529227134213897, "loss": 0.2134, "step": 359500 }, { "epoch": 14.89, "grad_norm": 0.6875, "learning_rate": 0.00025291186896499136, "loss": 0.1979, "step": 359510 }, { "epoch": 14.89, "grad_norm": 1.4453125, "learning_rate": 0.0002529010245031131, "loss": 0.201, "step": 359520 }, { "epoch": 14.89, "grad_norm": 1.09375, "learning_rate": 0.0002528901800357756, "loss": 0.2236, "step": 359530 }, { "epoch": 14.89, "grad_norm": 0.95703125, "learning_rate": 0.0002528793355629989, "loss": 0.199, "step": 359540 }, { "epoch": 14.89, "grad_norm": 0.0, "learning_rate": 0.0002528684910848037, "loss": 0.1422, "step": 359550 }, { "epoch": 14.89, "grad_norm": 0.91796875, "learning_rate": 0.0002528576466012103, "loss": 0.2095, "step": 359560 }, { "epoch": 14.89, "grad_norm": 0.7109375, "learning_rate": 0.000252846802112239, "loss": 0.2108, "step": 359570 }, { "epoch": 14.89, "grad_norm": 0.76171875, "learning_rate": 0.0002528359576179105, "loss": 0.213, "step": 359580 }, { "epoch": 14.89, "grad_norm": 0.52734375, "learning_rate": 0.00025282511311824495, "loss": 0.2246, "step": 359590 }, { "epoch": 14.89, "grad_norm": 1.453125, "learning_rate": 0.00025281426861326284, "loss": 0.1901, "step": 359600 }, { "epoch": 14.9, "grad_norm": 0.63671875, "learning_rate": 0.0002528034241029846, "loss": 0.1519, "step": 359610 }, { "epoch": 14.9, "grad_norm": 2.5, "learning_rate": 0.0002527925795874306, "loss": 0.2397, "step": 359620 }, { "epoch": 14.9, "grad_norm": 0.423828125, "learning_rate": 0.0002527817350666213, "loss": 0.1527, "step": 359630 }, { "epoch": 14.9, "grad_norm": 1.296875, "learning_rate": 0.0002527708905405771, "loss": 0.1789, "step": 359640 }, { "epoch": 14.9, "grad_norm": 0.70703125, "learning_rate": 0.0002527600460093183, "loss": 0.1719, "step": 359650 }, { "epoch": 14.9, "grad_norm": 0.69140625, "learning_rate": 0.00025274920147286544, "loss": 0.2247, "step": 359660 }, { "epoch": 14.9, "grad_norm": 0.625, "learning_rate": 0.00025273835693123887, "loss": 0.2209, "step": 359670 }, { "epoch": 14.9, "grad_norm": 0.9296875, "learning_rate": 0.000252727512384459, "loss": 0.2147, "step": 359680 }, { "epoch": 14.9, "grad_norm": 2.359375, "learning_rate": 0.00025271666783254626, "loss": 0.2017, "step": 359690 }, { "epoch": 14.9, "grad_norm": 0.9921875, "learning_rate": 0.00025270582327552104, "loss": 0.1505, "step": 359700 }, { "epoch": 14.9, "grad_norm": 1.2421875, "learning_rate": 0.00025269497871340375, "loss": 0.2072, "step": 359710 }, { "epoch": 14.9, "grad_norm": 0.9921875, "learning_rate": 0.0002526841341462148, "loss": 0.1733, "step": 359720 }, { "epoch": 14.9, "grad_norm": 1.296875, "learning_rate": 0.0002526732895739746, "loss": 0.1803, "step": 359730 }, { "epoch": 14.9, "grad_norm": 0.78125, "learning_rate": 0.00025266244499670356, "loss": 0.2306, "step": 359740 }, { "epoch": 14.9, "grad_norm": 0.67578125, "learning_rate": 0.00025265160041442207, "loss": 0.2089, "step": 359750 }, { "epoch": 14.9, "grad_norm": 0.53125, "learning_rate": 0.0002526407558271506, "loss": 0.1768, "step": 359760 }, { "epoch": 14.9, "grad_norm": 1.0078125, "learning_rate": 0.00025262991123490945, "loss": 0.2032, "step": 359770 }, { "epoch": 14.9, "grad_norm": 0.8828125, "learning_rate": 0.00025261906663771915, "loss": 0.2276, "step": 359780 }, { "epoch": 14.9, "grad_norm": 0.84765625, "learning_rate": 0.0002526082220356, "loss": 0.2029, "step": 359790 }, { "epoch": 14.9, "grad_norm": 1.234375, "learning_rate": 0.00025259737742857247, "loss": 0.1949, "step": 359800 }, { "epoch": 14.9, "grad_norm": 1.234375, "learning_rate": 0.00025258653281665696, "loss": 0.1974, "step": 359810 }, { "epoch": 14.9, "grad_norm": 1.8984375, "learning_rate": 0.00025257568819987393, "loss": 0.1751, "step": 359820 }, { "epoch": 14.9, "grad_norm": 0.87890625, "learning_rate": 0.00025256484357824365, "loss": 0.2002, "step": 359830 }, { "epoch": 14.9, "grad_norm": 0.71484375, "learning_rate": 0.00025255399895178667, "loss": 0.2068, "step": 359840 }, { "epoch": 14.9, "grad_norm": 0.59375, "learning_rate": 0.00025254315432052325, "loss": 0.2071, "step": 359850 }, { "epoch": 14.91, "grad_norm": 0.474609375, "learning_rate": 0.000252532309684474, "loss": 0.1662, "step": 359860 }, { "epoch": 14.91, "grad_norm": 0.90234375, "learning_rate": 0.00025252146504365914, "loss": 0.146, "step": 359870 }, { "epoch": 14.91, "grad_norm": 0.7890625, "learning_rate": 0.00025251062039809917, "loss": 0.238, "step": 359880 }, { "epoch": 14.91, "grad_norm": 0.81640625, "learning_rate": 0.0002524997757478145, "loss": 0.1775, "step": 359890 }, { "epoch": 14.91, "grad_norm": 0.486328125, "learning_rate": 0.00025248893109282555, "loss": 0.2188, "step": 359900 }, { "epoch": 14.91, "grad_norm": 1.09375, "learning_rate": 0.00025247808643315267, "loss": 0.1719, "step": 359910 }, { "epoch": 14.91, "grad_norm": 1.3203125, "learning_rate": 0.0002524672417688163, "loss": 0.1732, "step": 359920 }, { "epoch": 14.91, "grad_norm": 1.03125, "learning_rate": 0.0002524563970998368, "loss": 0.1857, "step": 359930 }, { "epoch": 14.91, "grad_norm": 1.4140625, "learning_rate": 0.00025244555242623476, "loss": 0.2159, "step": 359940 }, { "epoch": 14.91, "grad_norm": 0.57421875, "learning_rate": 0.0002524347077480303, "loss": 0.2092, "step": 359950 }, { "epoch": 14.91, "grad_norm": 1.1953125, "learning_rate": 0.00025242386306524405, "loss": 0.1888, "step": 359960 }, { "epoch": 14.91, "grad_norm": 0.74609375, "learning_rate": 0.00025241301837789637, "loss": 0.206, "step": 359970 }, { "epoch": 14.91, "grad_norm": 1.921875, "learning_rate": 0.0002524021736860077, "loss": 0.2136, "step": 359980 }, { "epoch": 14.91, "grad_norm": 0.95703125, "learning_rate": 0.0002523913289895983, "loss": 0.1948, "step": 359990 }, { "epoch": 14.91, "grad_norm": 0.486328125, "learning_rate": 0.00025238048428868865, "loss": 0.1798, "step": 360000 } ], "logging_steps": 10, "max_steps": 724290, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 20000, "total_flos": 2.2684460939497964e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }