{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 114.50381679389314, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03816793893129771, "grad_norm": 11.100768089294434, "learning_rate": 6.000000000000001e-08, "loss": 1.2659, "step": 10 }, { "epoch": 0.07633587786259542, "grad_norm": 4.894315242767334, "learning_rate": 1.2666666666666666e-07, "loss": 1.1033, "step": 20 }, { "epoch": 0.11450381679389313, "grad_norm": 8.250714302062988, "learning_rate": 1.9333333333333337e-07, "loss": 1.0279, "step": 30 }, { "epoch": 0.15267175572519084, "grad_norm": 4.837605953216553, "learning_rate": 2.6e-07, "loss": 0.9319, "step": 40 }, { "epoch": 0.19083969465648856, "grad_norm": 6.9533257484436035, "learning_rate": 3.266666666666667e-07, "loss": 1.3479, "step": 50 }, { "epoch": 0.22900763358778625, "grad_norm": 7.103541851043701, "learning_rate": 3.9333333333333336e-07, "loss": 1.0561, "step": 60 }, { "epoch": 0.26717557251908397, "grad_norm": 5.55342435836792, "learning_rate": 4.6000000000000004e-07, "loss": 1.0501, "step": 70 }, { "epoch": 0.3053435114503817, "grad_norm": 6.5388264656066895, "learning_rate": 5.266666666666667e-07, "loss": 1.2378, "step": 80 }, { "epoch": 0.3435114503816794, "grad_norm": 7.388347148895264, "learning_rate": 5.933333333333334e-07, "loss": 0.8909, "step": 90 }, { "epoch": 0.3816793893129771, "grad_norm": 3.4540562629699707, "learning_rate": 6.6e-07, "loss": 1.1626, "step": 100 }, { "epoch": 0.4198473282442748, "grad_norm": 5.60060977935791, "learning_rate": 7.266666666666668e-07, "loss": 1.0488, "step": 110 }, { "epoch": 0.4580152671755725, "grad_norm": 4.210075378417969, "learning_rate": 7.933333333333335e-07, "loss": 1.0841, "step": 120 }, { "epoch": 0.4961832061068702, "grad_norm": 10.231063842773438, "learning_rate": 8.6e-07, "loss": 1.0938, "step": 130 }, { "epoch": 0.5343511450381679, "grad_norm": 5.16438102722168, "learning_rate": 9.266666666666667e-07, "loss": 0.875, "step": 140 }, { "epoch": 0.5725190839694656, "grad_norm": 1.9672963619232178, "learning_rate": 9.933333333333333e-07, "loss": 0.7903, "step": 150 }, { "epoch": 0.6106870229007634, "grad_norm": 2.4632840156555176, "learning_rate": 1.06e-06, "loss": 0.7718, "step": 160 }, { "epoch": 0.648854961832061, "grad_norm": 2.09865140914917, "learning_rate": 1.1266666666666667e-06, "loss": 0.8049, "step": 170 }, { "epoch": 0.6870229007633588, "grad_norm": 2.6679656505584717, "learning_rate": 1.1933333333333335e-06, "loss": 0.6023, "step": 180 }, { "epoch": 0.7251908396946565, "grad_norm": 4.250058650970459, "learning_rate": 1.26e-06, "loss": 0.6673, "step": 190 }, { "epoch": 0.7633587786259542, "grad_norm": 2.9157729148864746, "learning_rate": 1.3266666666666667e-06, "loss": 0.6071, "step": 200 }, { "epoch": 0.8015267175572519, "grad_norm": 2.76187801361084, "learning_rate": 1.3933333333333335e-06, "loss": 0.6026, "step": 210 }, { "epoch": 0.8396946564885496, "grad_norm": 1.7407684326171875, "learning_rate": 1.46e-06, "loss": 0.5992, "step": 220 }, { "epoch": 0.8778625954198473, "grad_norm": 1.8290092945098877, "learning_rate": 1.526666666666667e-06, "loss": 0.4348, "step": 230 }, { "epoch": 0.916030534351145, "grad_norm": 1.939551591873169, "learning_rate": 1.5933333333333335e-06, "loss": 0.5998, "step": 240 }, { "epoch": 0.9541984732824428, "grad_norm": 1.6572309732437134, "learning_rate": 1.6600000000000002e-06, "loss": 0.3822, "step": 250 }, { "epoch": 0.9923664122137404, "grad_norm": 2.4682672023773193, "learning_rate": 1.7266666666666667e-06, "loss": 0.4126, "step": 260 }, { "epoch": 1.0305343511450382, "grad_norm": 2.106403112411499, "learning_rate": 1.7933333333333337e-06, "loss": 0.3898, "step": 270 }, { "epoch": 1.0687022900763359, "grad_norm": 1.331116795539856, "learning_rate": 1.8600000000000002e-06, "loss": 0.3547, "step": 280 }, { "epoch": 1.1068702290076335, "grad_norm": 1.4754210710525513, "learning_rate": 1.926666666666667e-06, "loss": 0.3567, "step": 290 }, { "epoch": 1.1450381679389312, "grad_norm": 1.8555392026901245, "learning_rate": 1.9933333333333334e-06, "loss": 0.3293, "step": 300 }, { "epoch": 1.183206106870229, "grad_norm": 1.6697174310684204, "learning_rate": 2.06e-06, "loss": 0.3181, "step": 310 }, { "epoch": 1.2213740458015268, "grad_norm": 1.0834065675735474, "learning_rate": 2.126666666666667e-06, "loss": 0.2892, "step": 320 }, { "epoch": 1.2595419847328244, "grad_norm": 1.046123743057251, "learning_rate": 2.1933333333333332e-06, "loss": 0.3114, "step": 330 }, { "epoch": 1.297709923664122, "grad_norm": 3.9311656951904297, "learning_rate": 2.2600000000000004e-06, "loss": 0.2735, "step": 340 }, { "epoch": 1.33587786259542, "grad_norm": 2.4286820888519287, "learning_rate": 2.3266666666666667e-06, "loss": 0.2951, "step": 350 }, { "epoch": 1.3740458015267176, "grad_norm": 1.0940420627593994, "learning_rate": 2.3933333333333334e-06, "loss": 0.2591, "step": 360 }, { "epoch": 1.4122137404580153, "grad_norm": 1.823198914527893, "learning_rate": 2.46e-06, "loss": 0.2793, "step": 370 }, { "epoch": 1.450381679389313, "grad_norm": 2.960927724838257, "learning_rate": 2.526666666666667e-06, "loss": 0.2855, "step": 380 }, { "epoch": 1.4885496183206106, "grad_norm": 1.0646835565567017, "learning_rate": 2.5933333333333336e-06, "loss": 0.2421, "step": 390 }, { "epoch": 1.5267175572519083, "grad_norm": 1.0445008277893066, "learning_rate": 2.6600000000000004e-06, "loss": 0.2547, "step": 400 }, { "epoch": 1.5648854961832062, "grad_norm": 1.456775188446045, "learning_rate": 2.726666666666667e-06, "loss": 0.2318, "step": 410 }, { "epoch": 1.6030534351145038, "grad_norm": 1.3221451044082642, "learning_rate": 2.7933333333333334e-06, "loss": 0.2203, "step": 420 }, { "epoch": 1.6412213740458015, "grad_norm": 0.8609516024589539, "learning_rate": 2.86e-06, "loss": 0.2129, "step": 430 }, { "epoch": 1.6793893129770994, "grad_norm": 1.8033744096755981, "learning_rate": 2.9266666666666673e-06, "loss": 0.2442, "step": 440 }, { "epoch": 1.717557251908397, "grad_norm": 1.3024567365646362, "learning_rate": 2.9933333333333336e-06, "loss": 0.2504, "step": 450 }, { "epoch": 1.7557251908396947, "grad_norm": 1.1774224042892456, "learning_rate": 3.0600000000000003e-06, "loss": 0.2251, "step": 460 }, { "epoch": 1.7938931297709924, "grad_norm": 0.9202947020530701, "learning_rate": 3.1266666666666667e-06, "loss": 0.1936, "step": 470 }, { "epoch": 1.83206106870229, "grad_norm": 1.2168657779693604, "learning_rate": 3.193333333333334e-06, "loss": 0.2638, "step": 480 }, { "epoch": 1.8702290076335877, "grad_norm": 1.1469149589538574, "learning_rate": 3.2600000000000006e-06, "loss": 0.1834, "step": 490 }, { "epoch": 1.9083969465648853, "grad_norm": 1.923591136932373, "learning_rate": 3.326666666666667e-06, "loss": 0.1972, "step": 500 }, { "epoch": 1.9465648854961832, "grad_norm": 0.7359837889671326, "learning_rate": 3.3933333333333336e-06, "loss": 0.2008, "step": 510 }, { "epoch": 1.984732824427481, "grad_norm": 1.34367036819458, "learning_rate": 3.46e-06, "loss": 0.1836, "step": 520 }, { "epoch": 2.0229007633587788, "grad_norm": 0.9186052680015564, "learning_rate": 3.526666666666667e-06, "loss": 0.19, "step": 530 }, { "epoch": 2.0610687022900764, "grad_norm": 1.0746238231658936, "learning_rate": 3.593333333333334e-06, "loss": 0.1821, "step": 540 }, { "epoch": 2.099236641221374, "grad_norm": 0.6840347051620483, "learning_rate": 3.66e-06, "loss": 0.1709, "step": 550 }, { "epoch": 2.1374045801526718, "grad_norm": 1.209904670715332, "learning_rate": 3.726666666666667e-06, "loss": 0.1677, "step": 560 }, { "epoch": 2.1755725190839694, "grad_norm": 1.1672343015670776, "learning_rate": 3.793333333333334e-06, "loss": 0.1811, "step": 570 }, { "epoch": 2.213740458015267, "grad_norm": 1.155468225479126, "learning_rate": 3.86e-06, "loss": 0.2217, "step": 580 }, { "epoch": 2.2519083969465647, "grad_norm": 1.3697214126586914, "learning_rate": 3.926666666666667e-06, "loss": 0.2069, "step": 590 }, { "epoch": 2.2900763358778624, "grad_norm": 1.2069604396820068, "learning_rate": 3.993333333333334e-06, "loss": 0.1824, "step": 600 }, { "epoch": 2.32824427480916, "grad_norm": 1.0572060346603394, "learning_rate": 4.060000000000001e-06, "loss": 0.2115, "step": 610 }, { "epoch": 2.366412213740458, "grad_norm": 3.517552137374878, "learning_rate": 4.126666666666667e-06, "loss": 0.1802, "step": 620 }, { "epoch": 2.404580152671756, "grad_norm": 0.6924391388893127, "learning_rate": 4.1933333333333336e-06, "loss": 0.1771, "step": 630 }, { "epoch": 2.4427480916030535, "grad_norm": 1.402863621711731, "learning_rate": 4.26e-06, "loss": 0.1774, "step": 640 }, { "epoch": 2.480916030534351, "grad_norm": 3.4374442100524902, "learning_rate": 4.326666666666667e-06, "loss": 0.1678, "step": 650 }, { "epoch": 2.519083969465649, "grad_norm": 1.5495679378509521, "learning_rate": 4.393333333333334e-06, "loss": 0.1867, "step": 660 }, { "epoch": 2.5572519083969465, "grad_norm": 0.8096297383308411, "learning_rate": 4.4600000000000005e-06, "loss": 0.1647, "step": 670 }, { "epoch": 2.595419847328244, "grad_norm": 1.6879674196243286, "learning_rate": 4.526666666666667e-06, "loss": 0.1838, "step": 680 }, { "epoch": 2.633587786259542, "grad_norm": 1.4142144918441772, "learning_rate": 4.593333333333333e-06, "loss": 0.15, "step": 690 }, { "epoch": 2.67175572519084, "grad_norm": 1.1931599378585815, "learning_rate": 4.66e-06, "loss": 0.1637, "step": 700 }, { "epoch": 2.7099236641221376, "grad_norm": 0.7196221947669983, "learning_rate": 4.7266666666666674e-06, "loss": 0.1746, "step": 710 }, { "epoch": 2.7480916030534353, "grad_norm": 1.426375150680542, "learning_rate": 4.793333333333334e-06, "loss": 0.1618, "step": 720 }, { "epoch": 2.786259541984733, "grad_norm": 1.2002971172332764, "learning_rate": 4.86e-06, "loss": 0.1647, "step": 730 }, { "epoch": 2.8244274809160306, "grad_norm": 1.275342345237732, "learning_rate": 4.926666666666667e-06, "loss": 0.1531, "step": 740 }, { "epoch": 2.8625954198473282, "grad_norm": 0.6153695583343506, "learning_rate": 4.9933333333333335e-06, "loss": 0.1747, "step": 750 }, { "epoch": 2.900763358778626, "grad_norm": 0.6474863290786743, "learning_rate": 5.060000000000001e-06, "loss": 0.1724, "step": 760 }, { "epoch": 2.9389312977099236, "grad_norm": 0.8682901263237, "learning_rate": 5.126666666666668e-06, "loss": 0.1724, "step": 770 }, { "epoch": 2.9770992366412212, "grad_norm": 0.9209064841270447, "learning_rate": 5.193333333333333e-06, "loss": 0.1791, "step": 780 }, { "epoch": 3.015267175572519, "grad_norm": 1.5052716732025146, "learning_rate": 5.2600000000000005e-06, "loss": 0.1536, "step": 790 }, { "epoch": 3.053435114503817, "grad_norm": 0.6915569305419922, "learning_rate": 5.326666666666667e-06, "loss": 0.1644, "step": 800 }, { "epoch": 3.0916030534351147, "grad_norm": 1.194627046585083, "learning_rate": 5.393333333333334e-06, "loss": 0.1673, "step": 810 }, { "epoch": 3.1297709923664123, "grad_norm": 1.9529887437820435, "learning_rate": 5.460000000000001e-06, "loss": 0.1541, "step": 820 }, { "epoch": 3.16793893129771, "grad_norm": 0.9561631679534912, "learning_rate": 5.5266666666666666e-06, "loss": 0.154, "step": 830 }, { "epoch": 3.2061068702290076, "grad_norm": 0.7585867643356323, "learning_rate": 5.593333333333334e-06, "loss": 0.1443, "step": 840 }, { "epoch": 3.2442748091603053, "grad_norm": 0.9479978084564209, "learning_rate": 5.66e-06, "loss": 0.1705, "step": 850 }, { "epoch": 3.282442748091603, "grad_norm": 1.1037328243255615, "learning_rate": 5.726666666666667e-06, "loss": 0.1538, "step": 860 }, { "epoch": 3.3206106870229006, "grad_norm": 1.2902960777282715, "learning_rate": 5.793333333333334e-06, "loss": 0.1739, "step": 870 }, { "epoch": 3.3587786259541983, "grad_norm": 0.8667603135108948, "learning_rate": 5.86e-06, "loss": 0.1451, "step": 880 }, { "epoch": 3.3969465648854964, "grad_norm": 0.6339547038078308, "learning_rate": 5.926666666666667e-06, "loss": 0.1514, "step": 890 }, { "epoch": 3.435114503816794, "grad_norm": 0.6177077889442444, "learning_rate": 5.993333333333334e-06, "loss": 0.1513, "step": 900 }, { "epoch": 3.4732824427480917, "grad_norm": 1.273289680480957, "learning_rate": 6.0600000000000004e-06, "loss": 0.1457, "step": 910 }, { "epoch": 3.5114503816793894, "grad_norm": 0.7146188020706177, "learning_rate": 6.126666666666668e-06, "loss": 0.174, "step": 920 }, { "epoch": 3.549618320610687, "grad_norm": 0.6311309337615967, "learning_rate": 6.193333333333333e-06, "loss": 0.1493, "step": 930 }, { "epoch": 3.5877862595419847, "grad_norm": 0.916165292263031, "learning_rate": 6.26e-06, "loss": 0.1552, "step": 940 }, { "epoch": 3.6259541984732824, "grad_norm": 1.2083804607391357, "learning_rate": 6.326666666666667e-06, "loss": 0.1533, "step": 950 }, { "epoch": 3.66412213740458, "grad_norm": 0.7804637551307678, "learning_rate": 6.393333333333334e-06, "loss": 0.1527, "step": 960 }, { "epoch": 3.7022900763358777, "grad_norm": 2.1257057189941406, "learning_rate": 6.460000000000001e-06, "loss": 0.1838, "step": 970 }, { "epoch": 3.7404580152671754, "grad_norm": 0.7072081565856934, "learning_rate": 6.526666666666666e-06, "loss": 0.1574, "step": 980 }, { "epoch": 3.778625954198473, "grad_norm": 1.387868046760559, "learning_rate": 6.5933333333333335e-06, "loss": 0.1409, "step": 990 }, { "epoch": 3.816793893129771, "grad_norm": 2.3242297172546387, "learning_rate": 6.660000000000001e-06, "loss": 0.1384, "step": 1000 }, { "epoch": 3.854961832061069, "grad_norm": 0.578836977481842, "learning_rate": 6.726666666666667e-06, "loss": 0.151, "step": 1010 }, { "epoch": 3.8931297709923665, "grad_norm": 0.8613945841789246, "learning_rate": 6.793333333333334e-06, "loss": 0.1561, "step": 1020 }, { "epoch": 3.931297709923664, "grad_norm": 0.8455677628517151, "learning_rate": 6.860000000000001e-06, "loss": 0.1561, "step": 1030 }, { "epoch": 3.969465648854962, "grad_norm": 2.132455825805664, "learning_rate": 6.926666666666667e-06, "loss": 0.1589, "step": 1040 }, { "epoch": 4.00763358778626, "grad_norm": 2.1734399795532227, "learning_rate": 6.993333333333334e-06, "loss": 0.1643, "step": 1050 }, { "epoch": 4.0458015267175576, "grad_norm": 0.7833858132362366, "learning_rate": 7.06e-06, "loss": 0.1425, "step": 1060 }, { "epoch": 4.083969465648855, "grad_norm": 1.160144567489624, "learning_rate": 7.126666666666667e-06, "loss": 0.1446, "step": 1070 }, { "epoch": 4.122137404580153, "grad_norm": 0.5361282825469971, "learning_rate": 7.1933333333333345e-06, "loss": 0.13, "step": 1080 }, { "epoch": 4.1603053435114505, "grad_norm": 1.3808670043945312, "learning_rate": 7.260000000000001e-06, "loss": 0.1692, "step": 1090 }, { "epoch": 4.198473282442748, "grad_norm": 1.0317463874816895, "learning_rate": 7.326666666666667e-06, "loss": 0.1346, "step": 1100 }, { "epoch": 4.236641221374046, "grad_norm": 0.7219449877738953, "learning_rate": 7.393333333333333e-06, "loss": 0.1508, "step": 1110 }, { "epoch": 4.2748091603053435, "grad_norm": 4.125285625457764, "learning_rate": 7.4600000000000006e-06, "loss": 0.1658, "step": 1120 }, { "epoch": 4.312977099236641, "grad_norm": 1.0689539909362793, "learning_rate": 7.526666666666668e-06, "loss": 0.1459, "step": 1130 }, { "epoch": 4.351145038167939, "grad_norm": 0.8281168937683105, "learning_rate": 7.593333333333334e-06, "loss": 0.1327, "step": 1140 }, { "epoch": 4.3893129770992365, "grad_norm": 0.4322452247142792, "learning_rate": 7.660000000000001e-06, "loss": 0.1469, "step": 1150 }, { "epoch": 4.427480916030534, "grad_norm": 0.7191686630249023, "learning_rate": 7.726666666666667e-06, "loss": 0.1352, "step": 1160 }, { "epoch": 4.465648854961832, "grad_norm": 0.676465630531311, "learning_rate": 7.793333333333334e-06, "loss": 0.1336, "step": 1170 }, { "epoch": 4.5038167938931295, "grad_norm": 0.7372109293937683, "learning_rate": 7.860000000000001e-06, "loss": 0.1483, "step": 1180 }, { "epoch": 4.541984732824427, "grad_norm": 1.089241862297058, "learning_rate": 7.926666666666666e-06, "loss": 0.142, "step": 1190 }, { "epoch": 4.580152671755725, "grad_norm": 0.5060117840766907, "learning_rate": 7.993333333333334e-06, "loss": 0.1542, "step": 1200 }, { "epoch": 4.618320610687023, "grad_norm": 0.5482001900672913, "learning_rate": 8.06e-06, "loss": 0.1326, "step": 1210 }, { "epoch": 4.65648854961832, "grad_norm": 0.677850604057312, "learning_rate": 8.126666666666668e-06, "loss": 0.1377, "step": 1220 }, { "epoch": 4.694656488549619, "grad_norm": 0.4879799783229828, "learning_rate": 8.193333333333335e-06, "loss": 0.1323, "step": 1230 }, { "epoch": 4.732824427480916, "grad_norm": 0.8837642073631287, "learning_rate": 8.26e-06, "loss": 0.134, "step": 1240 }, { "epoch": 4.770992366412214, "grad_norm": 0.6340999007225037, "learning_rate": 8.326666666666668e-06, "loss": 0.1267, "step": 1250 }, { "epoch": 4.809160305343512, "grad_norm": 1.3912076950073242, "learning_rate": 8.393333333333335e-06, "loss": 0.1483, "step": 1260 }, { "epoch": 4.847328244274809, "grad_norm": 0.44081875681877136, "learning_rate": 8.46e-06, "loss": 0.1514, "step": 1270 }, { "epoch": 4.885496183206107, "grad_norm": 1.8900103569030762, "learning_rate": 8.526666666666667e-06, "loss": 0.1492, "step": 1280 }, { "epoch": 4.923664122137405, "grad_norm": 1.0593299865722656, "learning_rate": 8.593333333333333e-06, "loss": 0.1339, "step": 1290 }, { "epoch": 4.961832061068702, "grad_norm": 0.8677412271499634, "learning_rate": 8.66e-06, "loss": 0.1383, "step": 1300 }, { "epoch": 5.0, "grad_norm": 0.49994489550590515, "learning_rate": 8.726666666666667e-06, "loss": 0.1562, "step": 1310 }, { "epoch": 5.038167938931298, "grad_norm": 0.9444197416305542, "learning_rate": 8.793333333333334e-06, "loss": 0.1584, "step": 1320 }, { "epoch": 5.076335877862595, "grad_norm": 0.6566011309623718, "learning_rate": 8.860000000000002e-06, "loss": 0.1259, "step": 1330 }, { "epoch": 5.114503816793893, "grad_norm": 0.574004590511322, "learning_rate": 8.926666666666669e-06, "loss": 0.1242, "step": 1340 }, { "epoch": 5.152671755725191, "grad_norm": 0.4502525329589844, "learning_rate": 8.993333333333334e-06, "loss": 0.126, "step": 1350 }, { "epoch": 5.190839694656488, "grad_norm": 0.6264625787734985, "learning_rate": 9.060000000000001e-06, "loss": 0.1458, "step": 1360 }, { "epoch": 5.229007633587786, "grad_norm": 0.6364710927009583, "learning_rate": 9.126666666666667e-06, "loss": 0.1417, "step": 1370 }, { "epoch": 5.267175572519084, "grad_norm": 0.8996373414993286, "learning_rate": 9.193333333333334e-06, "loss": 0.151, "step": 1380 }, { "epoch": 5.305343511450381, "grad_norm": 1.3520936965942383, "learning_rate": 9.260000000000001e-06, "loss": 0.1447, "step": 1390 }, { "epoch": 5.34351145038168, "grad_norm": 0.7287914752960205, "learning_rate": 9.326666666666667e-06, "loss": 0.1417, "step": 1400 }, { "epoch": 5.3816793893129775, "grad_norm": 1.365167498588562, "learning_rate": 9.393333333333334e-06, "loss": 0.137, "step": 1410 }, { "epoch": 5.419847328244275, "grad_norm": 0.889162540435791, "learning_rate": 9.460000000000001e-06, "loss": 0.1686, "step": 1420 }, { "epoch": 5.458015267175573, "grad_norm": 1.228797197341919, "learning_rate": 9.526666666666668e-06, "loss": 0.1359, "step": 1430 }, { "epoch": 5.4961832061068705, "grad_norm": 0.6598213911056519, "learning_rate": 9.593333333333335e-06, "loss": 0.135, "step": 1440 }, { "epoch": 5.534351145038168, "grad_norm": 0.6431540846824646, "learning_rate": 9.66e-06, "loss": 0.1381, "step": 1450 }, { "epoch": 5.572519083969466, "grad_norm": 0.731825053691864, "learning_rate": 9.726666666666668e-06, "loss": 0.1493, "step": 1460 }, { "epoch": 5.6106870229007635, "grad_norm": 0.5904266238212585, "learning_rate": 9.793333333333333e-06, "loss": 0.1279, "step": 1470 }, { "epoch": 5.648854961832061, "grad_norm": 1.9022412300109863, "learning_rate": 9.86e-06, "loss": 0.1275, "step": 1480 }, { "epoch": 5.687022900763359, "grad_norm": 0.5651864409446716, "learning_rate": 9.926666666666668e-06, "loss": 0.1326, "step": 1490 }, { "epoch": 5.7251908396946565, "grad_norm": 0.8466259241104126, "learning_rate": 9.993333333333333e-06, "loss": 0.141, "step": 1500 }, { "epoch": 5.763358778625954, "grad_norm": 0.9748515486717224, "learning_rate": 9.999997539434007e-06, "loss": 0.1293, "step": 1510 }, { "epoch": 5.801526717557252, "grad_norm": 0.6662544012069702, "learning_rate": 9.999989033776898e-06, "loss": 0.1321, "step": 1520 }, { "epoch": 5.8396946564885495, "grad_norm": 0.4294588267803192, "learning_rate": 9.999974452661642e-06, "loss": 0.1387, "step": 1530 }, { "epoch": 5.877862595419847, "grad_norm": 1.3427824974060059, "learning_rate": 9.99995379610596e-06, "loss": 0.144, "step": 1540 }, { "epoch": 5.916030534351145, "grad_norm": 0.6164063215255737, "learning_rate": 9.99992706413495e-06, "loss": 0.1639, "step": 1550 }, { "epoch": 5.9541984732824424, "grad_norm": 0.511205792427063, "learning_rate": 9.999894256781095e-06, "loss": 0.1401, "step": 1560 }, { "epoch": 5.99236641221374, "grad_norm": 0.5297313928604126, "learning_rate": 9.99985537408426e-06, "loss": 0.1284, "step": 1570 }, { "epoch": 6.030534351145038, "grad_norm": 0.4822237491607666, "learning_rate": 9.999810416091689e-06, "loss": 0.1251, "step": 1580 }, { "epoch": 6.068702290076336, "grad_norm": 0.49136868119239807, "learning_rate": 9.99975938285801e-06, "loss": 0.135, "step": 1590 }, { "epoch": 6.106870229007634, "grad_norm": 0.5754089951515198, "learning_rate": 9.999702274445235e-06, "loss": 0.1154, "step": 1600 }, { "epoch": 6.145038167938932, "grad_norm": 0.5563395619392395, "learning_rate": 9.999639090922758e-06, "loss": 0.1229, "step": 1610 }, { "epoch": 6.183206106870229, "grad_norm": 0.6620687246322632, "learning_rate": 9.999569832367346e-06, "loss": 0.1239, "step": 1620 }, { "epoch": 6.221374045801527, "grad_norm": 0.7389596700668335, "learning_rate": 9.999494498863163e-06, "loss": 0.1308, "step": 1630 }, { "epoch": 6.259541984732825, "grad_norm": 1.2152165174484253, "learning_rate": 9.99941309050174e-06, "loss": 0.1382, "step": 1640 }, { "epoch": 6.297709923664122, "grad_norm": 0.7054020762443542, "learning_rate": 9.999325607382e-06, "loss": 0.1657, "step": 1650 }, { "epoch": 6.33587786259542, "grad_norm": 0.48537522554397583, "learning_rate": 9.999232049610239e-06, "loss": 0.1295, "step": 1660 }, { "epoch": 6.374045801526718, "grad_norm": 0.9919182062149048, "learning_rate": 9.999132417300141e-06, "loss": 0.1454, "step": 1670 }, { "epoch": 6.412213740458015, "grad_norm": 0.5933136343955994, "learning_rate": 9.99902671057277e-06, "loss": 0.1393, "step": 1680 }, { "epoch": 6.450381679389313, "grad_norm": 1.4632704257965088, "learning_rate": 9.998914929556569e-06, "loss": 0.1269, "step": 1690 }, { "epoch": 6.488549618320611, "grad_norm": 0.8186988234519958, "learning_rate": 9.99879707438736e-06, "loss": 0.1328, "step": 1700 }, { "epoch": 6.526717557251908, "grad_norm": 0.7318149209022522, "learning_rate": 9.998673145208351e-06, "loss": 0.1278, "step": 1710 }, { "epoch": 6.564885496183206, "grad_norm": 0.6014979481697083, "learning_rate": 9.998543142170127e-06, "loss": 0.1331, "step": 1720 }, { "epoch": 6.603053435114504, "grad_norm": 0.5737615823745728, "learning_rate": 9.99840706543065e-06, "loss": 0.1369, "step": 1730 }, { "epoch": 6.641221374045801, "grad_norm": 1.0225228071212769, "learning_rate": 9.998264915155274e-06, "loss": 0.1429, "step": 1740 }, { "epoch": 6.679389312977099, "grad_norm": 0.7193993330001831, "learning_rate": 9.998116691516718e-06, "loss": 0.1235, "step": 1750 }, { "epoch": 6.717557251908397, "grad_norm": 2.232433795928955, "learning_rate": 9.997962394695091e-06, "loss": 0.1266, "step": 1760 }, { "epoch": 6.755725190839694, "grad_norm": 0.7240338325500488, "learning_rate": 9.997802024877876e-06, "loss": 0.1358, "step": 1770 }, { "epoch": 6.793893129770993, "grad_norm": 0.48704954981803894, "learning_rate": 9.997635582259941e-06, "loss": 0.1292, "step": 1780 }, { "epoch": 6.8320610687022905, "grad_norm": 0.4361136257648468, "learning_rate": 9.997463067043526e-06, "loss": 0.1282, "step": 1790 }, { "epoch": 6.870229007633588, "grad_norm": 0.8777408003807068, "learning_rate": 9.997284479438254e-06, "loss": 0.1289, "step": 1800 }, { "epoch": 6.908396946564886, "grad_norm": 0.586004912853241, "learning_rate": 9.997099819661127e-06, "loss": 0.1308, "step": 1810 }, { "epoch": 6.9465648854961835, "grad_norm": 0.34389933943748474, "learning_rate": 9.996909087936523e-06, "loss": 0.1323, "step": 1820 }, { "epoch": 6.984732824427481, "grad_norm": 0.6134178638458252, "learning_rate": 9.996712284496202e-06, "loss": 0.1215, "step": 1830 }, { "epoch": 7.022900763358779, "grad_norm": 0.5625924468040466, "learning_rate": 9.996509409579293e-06, "loss": 0.1316, "step": 1840 }, { "epoch": 7.061068702290076, "grad_norm": 0.6711544394493103, "learning_rate": 9.996300463432313e-06, "loss": 0.1188, "step": 1850 }, { "epoch": 7.099236641221374, "grad_norm": 0.46400946378707886, "learning_rate": 9.996085446309149e-06, "loss": 0.1356, "step": 1860 }, { "epoch": 7.137404580152672, "grad_norm": 0.6120234727859497, "learning_rate": 9.995864358471067e-06, "loss": 0.1551, "step": 1870 }, { "epoch": 7.175572519083969, "grad_norm": 0.8439425230026245, "learning_rate": 9.99563720018671e-06, "loss": 0.1382, "step": 1880 }, { "epoch": 7.213740458015267, "grad_norm": 0.7507050037384033, "learning_rate": 9.995403971732098e-06, "loss": 0.1292, "step": 1890 }, { "epoch": 7.251908396946565, "grad_norm": 1.3689290285110474, "learning_rate": 9.995164673390624e-06, "loss": 0.1244, "step": 1900 }, { "epoch": 7.290076335877862, "grad_norm": 0.8372470140457153, "learning_rate": 9.994919305453059e-06, "loss": 0.1318, "step": 1910 }, { "epoch": 7.32824427480916, "grad_norm": 0.5124345421791077, "learning_rate": 9.994667868217549e-06, "loss": 0.1198, "step": 1920 }, { "epoch": 7.366412213740458, "grad_norm": 0.48633715510368347, "learning_rate": 9.99441036198961e-06, "loss": 0.1364, "step": 1930 }, { "epoch": 7.404580152671755, "grad_norm": 0.800594687461853, "learning_rate": 9.994146787082141e-06, "loss": 0.1246, "step": 1940 }, { "epoch": 7.442748091603053, "grad_norm": 0.5229592323303223, "learning_rate": 9.993877143815406e-06, "loss": 0.1236, "step": 1950 }, { "epoch": 7.480916030534351, "grad_norm": 0.49684378504753113, "learning_rate": 9.993601432517052e-06, "loss": 0.1209, "step": 1960 }, { "epoch": 7.519083969465649, "grad_norm": 0.4344727396965027, "learning_rate": 9.99331965352209e-06, "loss": 0.119, "step": 1970 }, { "epoch": 7.557251908396947, "grad_norm": 1.0743569135665894, "learning_rate": 9.993031807172912e-06, "loss": 0.1304, "step": 1980 }, { "epoch": 7.595419847328245, "grad_norm": 0.44631901383399963, "learning_rate": 9.992737893819274e-06, "loss": 0.1309, "step": 1990 }, { "epoch": 7.633587786259542, "grad_norm": 0.8176268935203552, "learning_rate": 9.992437913818311e-06, "loss": 0.1245, "step": 2000 }, { "epoch": 7.67175572519084, "grad_norm": 0.6087300181388855, "learning_rate": 9.992131867534526e-06, "loss": 0.1262, "step": 2010 }, { "epoch": 7.709923664122138, "grad_norm": 0.5834507346153259, "learning_rate": 9.991819755339796e-06, "loss": 0.1215, "step": 2020 }, { "epoch": 7.748091603053435, "grad_norm": 0.4410012364387512, "learning_rate": 9.991501577613365e-06, "loss": 0.1271, "step": 2030 }, { "epoch": 7.786259541984733, "grad_norm": 0.8793311715126038, "learning_rate": 9.991177334741851e-06, "loss": 0.1199, "step": 2040 }, { "epoch": 7.824427480916031, "grad_norm": 0.4199819564819336, "learning_rate": 9.990847027119235e-06, "loss": 0.1229, "step": 2050 }, { "epoch": 7.862595419847328, "grad_norm": 0.5443435311317444, "learning_rate": 9.990510655146877e-06, "loss": 0.1155, "step": 2060 }, { "epoch": 7.900763358778626, "grad_norm": 1.0193560123443604, "learning_rate": 9.990168219233496e-06, "loss": 0.1295, "step": 2070 }, { "epoch": 7.938931297709924, "grad_norm": 0.5311276912689209, "learning_rate": 9.989819719795189e-06, "loss": 0.1274, "step": 2080 }, { "epoch": 7.977099236641221, "grad_norm": 0.5494500994682312, "learning_rate": 9.989465157255413e-06, "loss": 0.1356, "step": 2090 }, { "epoch": 8.01526717557252, "grad_norm": 0.4246234595775604, "learning_rate": 9.989104532044994e-06, "loss": 0.1213, "step": 2100 }, { "epoch": 8.053435114503817, "grad_norm": 0.5394090414047241, "learning_rate": 9.988737844602128e-06, "loss": 0.1284, "step": 2110 }, { "epoch": 8.091603053435115, "grad_norm": 0.6479999423027039, "learning_rate": 9.988365095372373e-06, "loss": 0.1278, "step": 2120 }, { "epoch": 8.129770992366412, "grad_norm": 0.7949787378311157, "learning_rate": 9.987986284808654e-06, "loss": 0.1285, "step": 2130 }, { "epoch": 8.16793893129771, "grad_norm": 0.37061432003974915, "learning_rate": 9.987601413371264e-06, "loss": 0.1218, "step": 2140 }, { "epoch": 8.206106870229007, "grad_norm": 0.6125175952911377, "learning_rate": 9.987210481527857e-06, "loss": 0.1284, "step": 2150 }, { "epoch": 8.244274809160306, "grad_norm": 1.3589234352111816, "learning_rate": 9.98681348975345e-06, "loss": 0.1281, "step": 2160 }, { "epoch": 8.282442748091603, "grad_norm": 0.6536274552345276, "learning_rate": 9.986410438530428e-06, "loss": 0.1237, "step": 2170 }, { "epoch": 8.320610687022901, "grad_norm": 0.5439227223396301, "learning_rate": 9.986001328348534e-06, "loss": 0.1292, "step": 2180 }, { "epoch": 8.358778625954198, "grad_norm": 0.6106489896774292, "learning_rate": 9.985586159704879e-06, "loss": 0.1237, "step": 2190 }, { "epoch": 8.396946564885496, "grad_norm": 0.422396719455719, "learning_rate": 9.98516493310393e-06, "loss": 0.1247, "step": 2200 }, { "epoch": 8.435114503816793, "grad_norm": 0.4791705012321472, "learning_rate": 9.984737649057514e-06, "loss": 0.1291, "step": 2210 }, { "epoch": 8.473282442748092, "grad_norm": 1.0834544897079468, "learning_rate": 9.984304308084827e-06, "loss": 0.1383, "step": 2220 }, { "epoch": 8.511450381679388, "grad_norm": 0.5249638557434082, "learning_rate": 9.983864910712416e-06, "loss": 0.1249, "step": 2230 }, { "epoch": 8.549618320610687, "grad_norm": 0.6899205446243286, "learning_rate": 9.98341945747419e-06, "loss": 0.1242, "step": 2240 }, { "epoch": 8.587786259541986, "grad_norm": 0.6143678426742554, "learning_rate": 9.98296794891142e-06, "loss": 0.1394, "step": 2250 }, { "epoch": 8.625954198473282, "grad_norm": 0.4065885543823242, "learning_rate": 9.982510385572725e-06, "loss": 0.1343, "step": 2260 }, { "epoch": 8.664122137404581, "grad_norm": 1.2763944864273071, "learning_rate": 9.982046768014093e-06, "loss": 0.1448, "step": 2270 }, { "epoch": 8.702290076335878, "grad_norm": 0.924741268157959, "learning_rate": 9.981577096798864e-06, "loss": 0.1442, "step": 2280 }, { "epoch": 8.740458015267176, "grad_norm": 0.8587446212768555, "learning_rate": 9.981101372497729e-06, "loss": 0.13, "step": 2290 }, { "epoch": 8.778625954198473, "grad_norm": 0.8710682392120361, "learning_rate": 9.980619595688737e-06, "loss": 0.1263, "step": 2300 }, { "epoch": 8.816793893129772, "grad_norm": 0.7033365964889526, "learning_rate": 9.980131766957296e-06, "loss": 0.1207, "step": 2310 }, { "epoch": 8.854961832061068, "grad_norm": 0.4609795808792114, "learning_rate": 9.979637886896162e-06, "loss": 0.1244, "step": 2320 }, { "epoch": 8.893129770992367, "grad_norm": 0.7439802289009094, "learning_rate": 9.979137956105446e-06, "loss": 0.1202, "step": 2330 }, { "epoch": 8.931297709923664, "grad_norm": 1.673240065574646, "learning_rate": 9.978631975192613e-06, "loss": 0.1258, "step": 2340 }, { "epoch": 8.969465648854962, "grad_norm": 1.4078855514526367, "learning_rate": 9.978119944772476e-06, "loss": 0.1305, "step": 2350 }, { "epoch": 9.007633587786259, "grad_norm": 0.703015923500061, "learning_rate": 9.977601865467197e-06, "loss": 0.1186, "step": 2360 }, { "epoch": 9.045801526717558, "grad_norm": 0.973673939704895, "learning_rate": 9.977077737906296e-06, "loss": 0.129, "step": 2370 }, { "epoch": 9.083969465648854, "grad_norm": 0.6487618684768677, "learning_rate": 9.976547562726637e-06, "loss": 0.1108, "step": 2380 }, { "epoch": 9.122137404580153, "grad_norm": 0.4547242820262909, "learning_rate": 9.97601134057243e-06, "loss": 0.1211, "step": 2390 }, { "epoch": 9.16030534351145, "grad_norm": 0.7747370600700378, "learning_rate": 9.975469072095236e-06, "loss": 0.122, "step": 2400 }, { "epoch": 9.198473282442748, "grad_norm": 0.6227743029594421, "learning_rate": 9.974920757953965e-06, "loss": 0.1337, "step": 2410 }, { "epoch": 9.236641221374045, "grad_norm": 1.6230658292770386, "learning_rate": 9.97436639881487e-06, "loss": 0.1333, "step": 2420 }, { "epoch": 9.274809160305344, "grad_norm": 0.4734322130680084, "learning_rate": 9.973805995351545e-06, "loss": 0.1312, "step": 2430 }, { "epoch": 9.312977099236642, "grad_norm": 1.1201488971710205, "learning_rate": 9.973239548244938e-06, "loss": 0.1332, "step": 2440 }, { "epoch": 9.351145038167939, "grad_norm": 0.6548280119895935, "learning_rate": 9.972667058183333e-06, "loss": 0.1282, "step": 2450 }, { "epoch": 9.389312977099237, "grad_norm": 0.9208749532699585, "learning_rate": 9.972088525862363e-06, "loss": 0.1397, "step": 2460 }, { "epoch": 9.427480916030534, "grad_norm": 0.8364688158035278, "learning_rate": 9.971503951984996e-06, "loss": 0.1346, "step": 2470 }, { "epoch": 9.465648854961833, "grad_norm": 0.5072993636131287, "learning_rate": 9.970913337261544e-06, "loss": 0.1216, "step": 2480 }, { "epoch": 9.50381679389313, "grad_norm": 0.5047227144241333, "learning_rate": 9.97031668240966e-06, "loss": 0.1158, "step": 2490 }, { "epoch": 9.541984732824428, "grad_norm": 0.6214285492897034, "learning_rate": 9.96971398815434e-06, "loss": 0.1275, "step": 2500 }, { "epoch": 9.580152671755725, "grad_norm": 0.2843784987926483, "learning_rate": 9.969105255227906e-06, "loss": 0.1202, "step": 2510 }, { "epoch": 9.618320610687023, "grad_norm": 0.9346826672554016, "learning_rate": 9.968490484370035e-06, "loss": 0.1257, "step": 2520 }, { "epoch": 9.65648854961832, "grad_norm": 0.5003984570503235, "learning_rate": 9.967869676327726e-06, "loss": 0.1189, "step": 2530 }, { "epoch": 9.694656488549619, "grad_norm": 0.535346508026123, "learning_rate": 9.967242831855321e-06, "loss": 0.1166, "step": 2540 }, { "epoch": 9.732824427480915, "grad_norm": 2.279946804046631, "learning_rate": 9.966609951714495e-06, "loss": 0.1317, "step": 2550 }, { "epoch": 9.770992366412214, "grad_norm": 0.4801623523235321, "learning_rate": 9.965971036674256e-06, "loss": 0.1233, "step": 2560 }, { "epoch": 9.80916030534351, "grad_norm": 0.861571192741394, "learning_rate": 9.965326087510947e-06, "loss": 0.1238, "step": 2570 }, { "epoch": 9.84732824427481, "grad_norm": 0.4469713568687439, "learning_rate": 9.964675105008243e-06, "loss": 0.1162, "step": 2580 }, { "epoch": 9.885496183206106, "grad_norm": 0.7882781028747559, "learning_rate": 9.964018089957148e-06, "loss": 0.1225, "step": 2590 }, { "epoch": 9.923664122137405, "grad_norm": 0.38498684763908386, "learning_rate": 9.963355043155997e-06, "loss": 0.1202, "step": 2600 }, { "epoch": 9.961832061068701, "grad_norm": 0.5153051614761353, "learning_rate": 9.962685965410455e-06, "loss": 0.1202, "step": 2610 }, { "epoch": 10.0, "grad_norm": 0.4856826663017273, "learning_rate": 9.962010857533514e-06, "loss": 0.1152, "step": 2620 }, { "epoch": 10.038167938931299, "grad_norm": 0.44617682695388794, "learning_rate": 9.961329720345494e-06, "loss": 0.1225, "step": 2630 }, { "epoch": 10.076335877862595, "grad_norm": 0.5542685389518738, "learning_rate": 9.96064255467404e-06, "loss": 0.1271, "step": 2640 }, { "epoch": 10.114503816793894, "grad_norm": 0.46713921427726746, "learning_rate": 9.959949361354127e-06, "loss": 0.1034, "step": 2650 }, { "epoch": 10.15267175572519, "grad_norm": 0.3956896662712097, "learning_rate": 9.959250141228046e-06, "loss": 0.1086, "step": 2660 }, { "epoch": 10.19083969465649, "grad_norm": 0.34693989157676697, "learning_rate": 9.958544895145415e-06, "loss": 0.1185, "step": 2670 }, { "epoch": 10.229007633587786, "grad_norm": 0.5464341044425964, "learning_rate": 9.957833623963178e-06, "loss": 0.1213, "step": 2680 }, { "epoch": 10.267175572519085, "grad_norm": 1.7212783098220825, "learning_rate": 9.957116328545593e-06, "loss": 0.1231, "step": 2690 }, { "epoch": 10.305343511450381, "grad_norm": 0.5747115015983582, "learning_rate": 9.956393009764244e-06, "loss": 0.1436, "step": 2700 }, { "epoch": 10.34351145038168, "grad_norm": 0.7528557181358337, "learning_rate": 9.955663668498032e-06, "loss": 0.1238, "step": 2710 }, { "epoch": 10.381679389312977, "grad_norm": 0.7713605761528015, "learning_rate": 9.954928305633174e-06, "loss": 0.1309, "step": 2720 }, { "epoch": 10.419847328244275, "grad_norm": 0.42558568716049194, "learning_rate": 9.954186922063204e-06, "loss": 0.1258, "step": 2730 }, { "epoch": 10.458015267175572, "grad_norm": 0.44242027401924133, "learning_rate": 9.953439518688974e-06, "loss": 0.1097, "step": 2740 }, { "epoch": 10.49618320610687, "grad_norm": 0.4029052257537842, "learning_rate": 9.952686096418652e-06, "loss": 0.1273, "step": 2750 }, { "epoch": 10.534351145038167, "grad_norm": 0.38173234462738037, "learning_rate": 9.951926656167715e-06, "loss": 0.1163, "step": 2760 }, { "epoch": 10.572519083969466, "grad_norm": 0.6817983984947205, "learning_rate": 9.951161198858952e-06, "loss": 0.1242, "step": 2770 }, { "epoch": 10.610687022900763, "grad_norm": 0.47834619879722595, "learning_rate": 9.95038972542247e-06, "loss": 0.1173, "step": 2780 }, { "epoch": 10.648854961832061, "grad_norm": 1.0968841314315796, "learning_rate": 9.949612236795682e-06, "loss": 0.1172, "step": 2790 }, { "epoch": 10.68702290076336, "grad_norm": 0.4130131006240845, "learning_rate": 9.948828733923305e-06, "loss": 0.1202, "step": 2800 }, { "epoch": 10.725190839694656, "grad_norm": 0.5207512974739075, "learning_rate": 9.948039217757375e-06, "loss": 0.1318, "step": 2810 }, { "epoch": 10.763358778625955, "grad_norm": 0.5142215490341187, "learning_rate": 9.947243689257226e-06, "loss": 0.1175, "step": 2820 }, { "epoch": 10.801526717557252, "grad_norm": 0.46614205837249756, "learning_rate": 9.946442149389498e-06, "loss": 0.1248, "step": 2830 }, { "epoch": 10.83969465648855, "grad_norm": 0.6606496572494507, "learning_rate": 9.94563459912814e-06, "loss": 0.132, "step": 2840 }, { "epoch": 10.877862595419847, "grad_norm": 0.7258016467094421, "learning_rate": 9.944821039454403e-06, "loss": 0.1153, "step": 2850 }, { "epoch": 10.916030534351146, "grad_norm": 0.5228508114814758, "learning_rate": 9.944001471356835e-06, "loss": 0.134, "step": 2860 }, { "epoch": 10.954198473282442, "grad_norm": 0.7532591223716736, "learning_rate": 9.94317589583129e-06, "loss": 0.1203, "step": 2870 }, { "epoch": 10.992366412213741, "grad_norm": 0.8692820072174072, "learning_rate": 9.942344313880922e-06, "loss": 0.1177, "step": 2880 }, { "epoch": 11.030534351145038, "grad_norm": 0.40786102414131165, "learning_rate": 9.941506726516179e-06, "loss": 0.1336, "step": 2890 }, { "epoch": 11.068702290076336, "grad_norm": 0.4568503201007843, "learning_rate": 9.94066313475481e-06, "loss": 0.1187, "step": 2900 }, { "epoch": 11.106870229007633, "grad_norm": 0.4302501678466797, "learning_rate": 9.939813539621858e-06, "loss": 0.1209, "step": 2910 }, { "epoch": 11.145038167938932, "grad_norm": 0.32359543442726135, "learning_rate": 9.93895794214966e-06, "loss": 0.1077, "step": 2920 }, { "epoch": 11.183206106870228, "grad_norm": 0.5436263680458069, "learning_rate": 9.938096343377853e-06, "loss": 0.1312, "step": 2930 }, { "epoch": 11.221374045801527, "grad_norm": 0.3711869418621063, "learning_rate": 9.937228744353354e-06, "loss": 0.1173, "step": 2940 }, { "epoch": 11.259541984732824, "grad_norm": 0.35187089443206787, "learning_rate": 9.93635514613038e-06, "loss": 0.1153, "step": 2950 }, { "epoch": 11.297709923664122, "grad_norm": 0.5851736664772034, "learning_rate": 9.935475549770436e-06, "loss": 0.1169, "step": 2960 }, { "epoch": 11.335877862595419, "grad_norm": 0.3298185467720032, "learning_rate": 9.934589956342315e-06, "loss": 0.1163, "step": 2970 }, { "epoch": 11.374045801526718, "grad_norm": 0.6017898917198181, "learning_rate": 9.933698366922093e-06, "loss": 0.1216, "step": 2980 }, { "epoch": 11.412213740458014, "grad_norm": 0.6507192254066467, "learning_rate": 9.932800782593141e-06, "loss": 0.1226, "step": 2990 }, { "epoch": 11.450381679389313, "grad_norm": 0.3257594406604767, "learning_rate": 9.931897204446104e-06, "loss": 0.1308, "step": 3000 }, { "epoch": 11.488549618320612, "grad_norm": 4.35601806640625, "learning_rate": 9.930987633578916e-06, "loss": 0.1232, "step": 3010 }, { "epoch": 11.526717557251908, "grad_norm": 0.3781754970550537, "learning_rate": 9.930072071096791e-06, "loss": 0.1277, "step": 3020 }, { "epoch": 11.564885496183207, "grad_norm": 0.8004752397537231, "learning_rate": 9.929150518112225e-06, "loss": 0.1216, "step": 3030 }, { "epoch": 11.603053435114504, "grad_norm": 0.6701073050498962, "learning_rate": 9.928222975744992e-06, "loss": 0.1208, "step": 3040 }, { "epoch": 11.641221374045802, "grad_norm": 0.39749905467033386, "learning_rate": 9.92728944512214e-06, "loss": 0.1079, "step": 3050 }, { "epoch": 11.679389312977099, "grad_norm": 0.6213489174842834, "learning_rate": 9.926349927378e-06, "loss": 0.1297, "step": 3060 }, { "epoch": 11.717557251908397, "grad_norm": 0.4827481508255005, "learning_rate": 9.925404423654175e-06, "loss": 0.1244, "step": 3070 }, { "epoch": 11.755725190839694, "grad_norm": 0.7463303804397583, "learning_rate": 9.924452935099537e-06, "loss": 0.1128, "step": 3080 }, { "epoch": 11.793893129770993, "grad_norm": 1.3172951936721802, "learning_rate": 9.923495462870241e-06, "loss": 0.1119, "step": 3090 }, { "epoch": 11.83206106870229, "grad_norm": 0.8011961579322815, "learning_rate": 9.9225320081297e-06, "loss": 0.1115, "step": 3100 }, { "epoch": 11.870229007633588, "grad_norm": 1.2579299211502075, "learning_rate": 9.921562572048606e-06, "loss": 0.1416, "step": 3110 }, { "epoch": 11.908396946564885, "grad_norm": 0.6220617294311523, "learning_rate": 9.920587155804913e-06, "loss": 0.121, "step": 3120 }, { "epoch": 11.946564885496183, "grad_norm": 0.4087788760662079, "learning_rate": 9.919605760583846e-06, "loss": 0.123, "step": 3130 }, { "epoch": 11.98473282442748, "grad_norm": 0.3896748125553131, "learning_rate": 9.91861838757789e-06, "loss": 0.1182, "step": 3140 }, { "epoch": 12.022900763358779, "grad_norm": 0.467168927192688, "learning_rate": 9.917625037986798e-06, "loss": 0.1145, "step": 3150 }, { "epoch": 12.061068702290076, "grad_norm": 0.6059496998786926, "learning_rate": 9.916625713017582e-06, "loss": 0.1218, "step": 3160 }, { "epoch": 12.099236641221374, "grad_norm": 0.579326868057251, "learning_rate": 9.91562041388452e-06, "loss": 0.114, "step": 3170 }, { "epoch": 12.137404580152673, "grad_norm": 1.0949403047561646, "learning_rate": 9.91460914180914e-06, "loss": 0.1306, "step": 3180 }, { "epoch": 12.17557251908397, "grad_norm": 0.4158419370651245, "learning_rate": 9.913591898020234e-06, "loss": 0.1169, "step": 3190 }, { "epoch": 12.213740458015268, "grad_norm": 1.5080878734588623, "learning_rate": 9.912568683753853e-06, "loss": 0.1215, "step": 3200 }, { "epoch": 12.251908396946565, "grad_norm": 0.5173312425613403, "learning_rate": 9.911539500253295e-06, "loss": 0.1037, "step": 3210 }, { "epoch": 12.290076335877863, "grad_norm": 0.33992186188697815, "learning_rate": 9.910504348769118e-06, "loss": 0.1145, "step": 3220 }, { "epoch": 12.32824427480916, "grad_norm": 0.881161093711853, "learning_rate": 9.909463230559127e-06, "loss": 0.1205, "step": 3230 }, { "epoch": 12.366412213740459, "grad_norm": 0.376974493265152, "learning_rate": 9.908416146888376e-06, "loss": 0.1234, "step": 3240 }, { "epoch": 12.404580152671755, "grad_norm": 0.4849688410758972, "learning_rate": 9.907363099029175e-06, "loss": 0.1221, "step": 3250 }, { "epoch": 12.442748091603054, "grad_norm": 0.6830031871795654, "learning_rate": 9.906304088261073e-06, "loss": 0.1522, "step": 3260 }, { "epoch": 12.48091603053435, "grad_norm": 0.450184166431427, "learning_rate": 9.905239115870873e-06, "loss": 0.1109, "step": 3270 }, { "epoch": 12.51908396946565, "grad_norm": 0.32189562916755676, "learning_rate": 9.904168183152611e-06, "loss": 0.1344, "step": 3280 }, { "epoch": 12.557251908396946, "grad_norm": 0.6055088043212891, "learning_rate": 9.903091291407574e-06, "loss": 0.1377, "step": 3290 }, { "epoch": 12.595419847328245, "grad_norm": 0.833159863948822, "learning_rate": 9.902008441944287e-06, "loss": 0.1304, "step": 3300 }, { "epoch": 12.633587786259541, "grad_norm": 0.6403981447219849, "learning_rate": 9.900919636078511e-06, "loss": 0.111, "step": 3310 }, { "epoch": 12.67175572519084, "grad_norm": 1.0145337581634521, "learning_rate": 9.899824875133255e-06, "loss": 0.123, "step": 3320 }, { "epoch": 12.709923664122137, "grad_norm": 0.8037505149841309, "learning_rate": 9.89872416043875e-06, "loss": 0.1188, "step": 3330 }, { "epoch": 12.748091603053435, "grad_norm": 0.5243023633956909, "learning_rate": 9.89761749333247e-06, "loss": 0.1184, "step": 3340 }, { "epoch": 12.786259541984732, "grad_norm": 0.3315357565879822, "learning_rate": 9.896504875159122e-06, "loss": 0.1168, "step": 3350 }, { "epoch": 12.82442748091603, "grad_norm": 0.7898366451263428, "learning_rate": 9.89538630727064e-06, "loss": 0.1138, "step": 3360 }, { "epoch": 12.862595419847327, "grad_norm": 0.3733002841472626, "learning_rate": 9.89426179102619e-06, "loss": 0.1168, "step": 3370 }, { "epoch": 12.900763358778626, "grad_norm": 0.5741856098175049, "learning_rate": 9.893131327792166e-06, "loss": 0.1189, "step": 3380 }, { "epoch": 12.938931297709924, "grad_norm": 0.5322036743164062, "learning_rate": 9.891994918942183e-06, "loss": 0.1182, "step": 3390 }, { "epoch": 12.977099236641221, "grad_norm": 0.3387764096260071, "learning_rate": 9.890852565857092e-06, "loss": 0.1201, "step": 3400 }, { "epoch": 13.01526717557252, "grad_norm": 0.6571619510650635, "learning_rate": 9.889704269924955e-06, "loss": 0.1217, "step": 3410 }, { "epoch": 13.053435114503817, "grad_norm": 0.5754358172416687, "learning_rate": 9.88855003254106e-06, "loss": 0.1446, "step": 3420 }, { "epoch": 13.091603053435115, "grad_norm": 0.5554888844490051, "learning_rate": 9.887389855107917e-06, "loss": 0.1107, "step": 3430 }, { "epoch": 13.129770992366412, "grad_norm": 0.5900585055351257, "learning_rate": 9.886223739035249e-06, "loss": 0.1211, "step": 3440 }, { "epoch": 13.16793893129771, "grad_norm": 0.42058074474334717, "learning_rate": 9.885051685739997e-06, "loss": 0.1208, "step": 3450 }, { "epoch": 13.206106870229007, "grad_norm": 0.959195077419281, "learning_rate": 9.883873696646316e-06, "loss": 0.1127, "step": 3460 }, { "epoch": 13.244274809160306, "grad_norm": 0.45505401492118835, "learning_rate": 9.882689773185575e-06, "loss": 0.1251, "step": 3470 }, { "epoch": 13.282442748091603, "grad_norm": 0.7457808256149292, "learning_rate": 9.881499916796354e-06, "loss": 0.1189, "step": 3480 }, { "epoch": 13.320610687022901, "grad_norm": 0.8850777745246887, "learning_rate": 9.880304128924435e-06, "loss": 0.1307, "step": 3490 }, { "epoch": 13.358778625954198, "grad_norm": 0.7178351879119873, "learning_rate": 9.879102411022818e-06, "loss": 0.1172, "step": 3500 }, { "epoch": 13.396946564885496, "grad_norm": 0.8748807311058044, "learning_rate": 9.877894764551704e-06, "loss": 0.1167, "step": 3510 }, { "epoch": 13.435114503816793, "grad_norm": 1.205231785774231, "learning_rate": 9.876681190978494e-06, "loss": 0.1294, "step": 3520 }, { "epoch": 13.473282442748092, "grad_norm": 0.4213230013847351, "learning_rate": 9.875461691777797e-06, "loss": 0.1131, "step": 3530 }, { "epoch": 13.511450381679388, "grad_norm": 0.5431898832321167, "learning_rate": 9.874236268431417e-06, "loss": 0.1157, "step": 3540 }, { "epoch": 13.549618320610687, "grad_norm": 0.8572608828544617, "learning_rate": 9.873004922428362e-06, "loss": 0.1259, "step": 3550 }, { "epoch": 13.587786259541986, "grad_norm": 0.44893670082092285, "learning_rate": 9.87176765526483e-06, "loss": 0.129, "step": 3560 }, { "epoch": 13.625954198473282, "grad_norm": 0.5084060430526733, "learning_rate": 9.87052446844422e-06, "loss": 0.1134, "step": 3570 }, { "epoch": 13.664122137404581, "grad_norm": 1.1047592163085938, "learning_rate": 9.869275363477122e-06, "loss": 0.1176, "step": 3580 }, { "epoch": 13.702290076335878, "grad_norm": 0.836169421672821, "learning_rate": 9.868020341881313e-06, "loss": 0.1186, "step": 3590 }, { "epoch": 13.740458015267176, "grad_norm": 0.7019125819206238, "learning_rate": 9.866759405181766e-06, "loss": 0.1199, "step": 3600 }, { "epoch": 13.778625954198473, "grad_norm": 0.5258054733276367, "learning_rate": 9.865492554910634e-06, "loss": 0.1227, "step": 3610 }, { "epoch": 13.816793893129772, "grad_norm": 0.670691967010498, "learning_rate": 9.864219792607262e-06, "loss": 0.115, "step": 3620 }, { "epoch": 13.854961832061068, "grad_norm": 0.5219152569770813, "learning_rate": 9.862941119818176e-06, "loss": 0.1249, "step": 3630 }, { "epoch": 13.893129770992367, "grad_norm": 0.9201920032501221, "learning_rate": 9.861656538097086e-06, "loss": 0.1211, "step": 3640 }, { "epoch": 13.931297709923664, "grad_norm": 0.4263167679309845, "learning_rate": 9.860366049004878e-06, "loss": 0.1115, "step": 3650 }, { "epoch": 13.969465648854962, "grad_norm": 0.8185678124427795, "learning_rate": 9.859069654109615e-06, "loss": 0.133, "step": 3660 }, { "epoch": 14.007633587786259, "grad_norm": 0.4073340594768524, "learning_rate": 9.857767354986545e-06, "loss": 0.1306, "step": 3670 }, { "epoch": 14.045801526717558, "grad_norm": 0.6191373467445374, "learning_rate": 9.856459153218078e-06, "loss": 0.114, "step": 3680 }, { "epoch": 14.083969465648854, "grad_norm": 0.9513982534408569, "learning_rate": 9.855145050393808e-06, "loss": 0.1276, "step": 3690 }, { "epoch": 14.122137404580153, "grad_norm": 0.3555004894733429, "learning_rate": 9.853825048110491e-06, "loss": 0.1184, "step": 3700 }, { "epoch": 14.16030534351145, "grad_norm": 0.4830268919467926, "learning_rate": 9.852499147972055e-06, "loss": 0.1063, "step": 3710 }, { "epoch": 14.198473282442748, "grad_norm": 0.5373522043228149, "learning_rate": 9.851167351589593e-06, "loss": 0.1422, "step": 3720 }, { "epoch": 14.236641221374045, "grad_norm": 0.36047184467315674, "learning_rate": 9.849829660581364e-06, "loss": 0.1151, "step": 3730 }, { "epoch": 14.274809160305344, "grad_norm": 0.3627484142780304, "learning_rate": 9.848486076572787e-06, "loss": 0.1273, "step": 3740 }, { "epoch": 14.312977099236642, "grad_norm": 0.37799179553985596, "learning_rate": 9.847136601196445e-06, "loss": 0.1098, "step": 3750 }, { "epoch": 14.351145038167939, "grad_norm": 0.7704446315765381, "learning_rate": 9.845781236092078e-06, "loss": 0.1134, "step": 3760 }, { "epoch": 14.389312977099237, "grad_norm": 0.36698824167251587, "learning_rate": 9.844419982906584e-06, "loss": 0.1395, "step": 3770 }, { "epoch": 14.427480916030534, "grad_norm": 0.6548821926116943, "learning_rate": 9.84305284329401e-06, "loss": 0.1282, "step": 3780 }, { "epoch": 14.465648854961833, "grad_norm": 0.3647122085094452, "learning_rate": 9.84167981891556e-06, "loss": 0.1082, "step": 3790 }, { "epoch": 14.50381679389313, "grad_norm": 0.334097683429718, "learning_rate": 9.84030091143959e-06, "loss": 0.1065, "step": 3800 }, { "epoch": 14.541984732824428, "grad_norm": 0.37364843487739563, "learning_rate": 9.838916122541604e-06, "loss": 0.1208, "step": 3810 }, { "epoch": 14.580152671755725, "grad_norm": 0.40124377608299255, "learning_rate": 9.837525453904247e-06, "loss": 0.1315, "step": 3820 }, { "epoch": 14.618320610687023, "grad_norm": 0.8429109454154968, "learning_rate": 9.836128907217315e-06, "loss": 0.1095, "step": 3830 }, { "epoch": 14.65648854961832, "grad_norm": 0.441574364900589, "learning_rate": 9.834726484177743e-06, "loss": 0.1197, "step": 3840 }, { "epoch": 14.694656488549619, "grad_norm": 0.8704144358634949, "learning_rate": 9.833318186489608e-06, "loss": 0.112, "step": 3850 }, { "epoch": 14.732824427480915, "grad_norm": 0.5962323546409607, "learning_rate": 9.831904015864127e-06, "loss": 0.1105, "step": 3860 }, { "epoch": 14.770992366412214, "grad_norm": 0.38795116543769836, "learning_rate": 9.830483974019644e-06, "loss": 0.1107, "step": 3870 }, { "epoch": 14.80916030534351, "grad_norm": 0.847028911113739, "learning_rate": 9.829058062681652e-06, "loss": 0.1209, "step": 3880 }, { "epoch": 14.84732824427481, "grad_norm": 0.3294160068035126, "learning_rate": 9.82762628358276e-06, "loss": 0.1082, "step": 3890 }, { "epoch": 14.885496183206106, "grad_norm": 1.4115614891052246, "learning_rate": 9.826188638462719e-06, "loss": 0.1318, "step": 3900 }, { "epoch": 14.923664122137405, "grad_norm": 0.47591233253479004, "learning_rate": 9.824745129068403e-06, "loss": 0.1409, "step": 3910 }, { "epoch": 14.961832061068701, "grad_norm": 0.369427353143692, "learning_rate": 9.82329575715381e-06, "loss": 0.1184, "step": 3920 }, { "epoch": 15.0, "grad_norm": 0.4399472177028656, "learning_rate": 9.821840524480066e-06, "loss": 0.1095, "step": 3930 }, { "epoch": 15.038167938931299, "grad_norm": 0.6284782886505127, "learning_rate": 9.820379432815414e-06, "loss": 0.1161, "step": 3940 }, { "epoch": 15.076335877862595, "grad_norm": 0.4220380485057831, "learning_rate": 9.81891248393522e-06, "loss": 0.1151, "step": 3950 }, { "epoch": 15.114503816793894, "grad_norm": 0.5931194424629211, "learning_rate": 9.817439679621964e-06, "loss": 0.1244, "step": 3960 }, { "epoch": 15.15267175572519, "grad_norm": 0.31137576699256897, "learning_rate": 9.815961021665243e-06, "loss": 0.1194, "step": 3970 }, { "epoch": 15.19083969465649, "grad_norm": 0.506864070892334, "learning_rate": 9.814476511861764e-06, "loss": 0.1112, "step": 3980 }, { "epoch": 15.229007633587786, "grad_norm": 0.7712796330451965, "learning_rate": 9.812986152015349e-06, "loss": 0.1197, "step": 3990 }, { "epoch": 15.267175572519085, "grad_norm": 0.49577248096466064, "learning_rate": 9.811489943936922e-06, "loss": 0.1148, "step": 4000 }, { "epoch": 15.305343511450381, "grad_norm": 0.404431015253067, "learning_rate": 9.809987889444523e-06, "loss": 0.1186, "step": 4010 }, { "epoch": 15.34351145038168, "grad_norm": 0.44168826937675476, "learning_rate": 9.808479990363282e-06, "loss": 0.1277, "step": 4020 }, { "epoch": 15.381679389312977, "grad_norm": 0.2900712490081787, "learning_rate": 9.806966248525447e-06, "loss": 0.1116, "step": 4030 }, { "epoch": 15.419847328244275, "grad_norm": 0.474483460187912, "learning_rate": 9.805446665770348e-06, "loss": 0.1249, "step": 4040 }, { "epoch": 15.458015267175572, "grad_norm": 0.4446212947368622, "learning_rate": 9.80392124394443e-06, "loss": 0.1082, "step": 4050 }, { "epoch": 15.49618320610687, "grad_norm": 0.3462320566177368, "learning_rate": 9.802389984901218e-06, "loss": 0.1263, "step": 4060 }, { "epoch": 15.534351145038167, "grad_norm": 0.481645792722702, "learning_rate": 9.80085289050134e-06, "loss": 0.1164, "step": 4070 }, { "epoch": 15.572519083969466, "grad_norm": 0.5727097988128662, "learning_rate": 9.799309962612508e-06, "loss": 0.1354, "step": 4080 }, { "epoch": 15.610687022900763, "grad_norm": 0.9570332765579224, "learning_rate": 9.797761203109527e-06, "loss": 0.1192, "step": 4090 }, { "epoch": 15.648854961832061, "grad_norm": 0.6497268080711365, "learning_rate": 9.796206613874283e-06, "loss": 0.1097, "step": 4100 }, { "epoch": 15.68702290076336, "grad_norm": 0.4102814197540283, "learning_rate": 9.794646196795754e-06, "loss": 0.1041, "step": 4110 }, { "epoch": 15.725190839694656, "grad_norm": 0.41581079363822937, "learning_rate": 9.793079953769988e-06, "loss": 0.1218, "step": 4120 }, { "epoch": 15.763358778625955, "grad_norm": 0.5571273565292358, "learning_rate": 9.79150788670012e-06, "loss": 0.1083, "step": 4130 }, { "epoch": 15.801526717557252, "grad_norm": 0.4825478792190552, "learning_rate": 9.789929997496362e-06, "loss": 0.1112, "step": 4140 }, { "epoch": 15.83969465648855, "grad_norm": 0.35019156336784363, "learning_rate": 9.788346288075993e-06, "loss": 0.1187, "step": 4150 }, { "epoch": 15.877862595419847, "grad_norm": 0.4018893539905548, "learning_rate": 9.786756760363374e-06, "loss": 0.1151, "step": 4160 }, { "epoch": 15.916030534351146, "grad_norm": 0.6337506175041199, "learning_rate": 9.78516141628993e-06, "loss": 0.1114, "step": 4170 }, { "epoch": 15.954198473282442, "grad_norm": 0.3153977394104004, "learning_rate": 9.783560257794153e-06, "loss": 0.1217, "step": 4180 }, { "epoch": 15.992366412213741, "grad_norm": 0.8760756254196167, "learning_rate": 9.781953286821604e-06, "loss": 0.1148, "step": 4190 }, { "epoch": 16.03053435114504, "grad_norm": 0.572750985622406, "learning_rate": 9.7803405053249e-06, "loss": 0.1161, "step": 4200 }, { "epoch": 16.068702290076335, "grad_norm": 0.6838567852973938, "learning_rate": 9.778721915263729e-06, "loss": 0.1203, "step": 4210 }, { "epoch": 16.106870229007633, "grad_norm": 0.4191221594810486, "learning_rate": 9.777097518604824e-06, "loss": 0.1159, "step": 4220 }, { "epoch": 16.14503816793893, "grad_norm": 0.8830999135971069, "learning_rate": 9.775467317321986e-06, "loss": 0.1169, "step": 4230 }, { "epoch": 16.18320610687023, "grad_norm": 0.4778836965560913, "learning_rate": 9.773831313396056e-06, "loss": 0.1149, "step": 4240 }, { "epoch": 16.221374045801525, "grad_norm": 0.3479765057563782, "learning_rate": 9.77218950881494e-06, "loss": 0.1195, "step": 4250 }, { "epoch": 16.259541984732824, "grad_norm": 0.39662837982177734, "learning_rate": 9.770541905573583e-06, "loss": 0.1275, "step": 4260 }, { "epoch": 16.297709923664122, "grad_norm": 0.6865242719650269, "learning_rate": 9.768888505673976e-06, "loss": 0.1238, "step": 4270 }, { "epoch": 16.33587786259542, "grad_norm": 0.3782191574573517, "learning_rate": 9.767229311125162e-06, "loss": 0.1133, "step": 4280 }, { "epoch": 16.374045801526716, "grad_norm": 0.588668942451477, "learning_rate": 9.76556432394321e-06, "loss": 0.1362, "step": 4290 }, { "epoch": 16.412213740458014, "grad_norm": 1.0646703243255615, "learning_rate": 9.763893546151244e-06, "loss": 0.1185, "step": 4300 }, { "epoch": 16.450381679389313, "grad_norm": 0.4589475989341736, "learning_rate": 9.762216979779412e-06, "loss": 0.1173, "step": 4310 }, { "epoch": 16.48854961832061, "grad_norm": 1.4795030355453491, "learning_rate": 9.760534626864902e-06, "loss": 0.125, "step": 4320 }, { "epoch": 16.52671755725191, "grad_norm": 0.6526045799255371, "learning_rate": 9.758846489451932e-06, "loss": 0.126, "step": 4330 }, { "epoch": 16.564885496183205, "grad_norm": 0.35587215423583984, "learning_rate": 9.757152569591748e-06, "loss": 0.1027, "step": 4340 }, { "epoch": 16.603053435114504, "grad_norm": 0.8031662106513977, "learning_rate": 9.755452869342621e-06, "loss": 0.1163, "step": 4350 }, { "epoch": 16.641221374045802, "grad_norm": 0.41244399547576904, "learning_rate": 9.753747390769848e-06, "loss": 0.1222, "step": 4360 }, { "epoch": 16.6793893129771, "grad_norm": 0.4697263836860657, "learning_rate": 9.752036135945743e-06, "loss": 0.1078, "step": 4370 }, { "epoch": 16.717557251908396, "grad_norm": 0.6083208322525024, "learning_rate": 9.75031910694965e-06, "loss": 0.1151, "step": 4380 }, { "epoch": 16.755725190839694, "grad_norm": 0.4035137891769409, "learning_rate": 9.748596305867913e-06, "loss": 0.1085, "step": 4390 }, { "epoch": 16.793893129770993, "grad_norm": 0.7176216244697571, "learning_rate": 9.746867734793904e-06, "loss": 0.1116, "step": 4400 }, { "epoch": 16.83206106870229, "grad_norm": 0.4055471122264862, "learning_rate": 9.745133395827994e-06, "loss": 0.1248, "step": 4410 }, { "epoch": 16.870229007633586, "grad_norm": 0.6174433827400208, "learning_rate": 9.743393291077571e-06, "loss": 0.1184, "step": 4420 }, { "epoch": 16.908396946564885, "grad_norm": 0.5224699378013611, "learning_rate": 9.741647422657029e-06, "loss": 0.1114, "step": 4430 }, { "epoch": 16.946564885496183, "grad_norm": 0.4283505976200104, "learning_rate": 9.739895792687758e-06, "loss": 0.1203, "step": 4440 }, { "epoch": 16.984732824427482, "grad_norm": 1.0380663871765137, "learning_rate": 9.738138403298158e-06, "loss": 0.1119, "step": 4450 }, { "epoch": 17.022900763358777, "grad_norm": 0.5753864645957947, "learning_rate": 9.73637525662362e-06, "loss": 0.1073, "step": 4460 }, { "epoch": 17.061068702290076, "grad_norm": 0.8454107046127319, "learning_rate": 9.734606354806533e-06, "loss": 0.1148, "step": 4470 }, { "epoch": 17.099236641221374, "grad_norm": 0.45036813616752625, "learning_rate": 9.732831699996282e-06, "loss": 0.1091, "step": 4480 }, { "epoch": 17.137404580152673, "grad_norm": 0.3069184720516205, "learning_rate": 9.731051294349238e-06, "loss": 0.1298, "step": 4490 }, { "epoch": 17.17557251908397, "grad_norm": 0.5899654626846313, "learning_rate": 9.729265140028762e-06, "loss": 0.1212, "step": 4500 }, { "epoch": 17.213740458015266, "grad_norm": 0.5663384199142456, "learning_rate": 9.727473239205202e-06, "loss": 0.1163, "step": 4510 }, { "epoch": 17.251908396946565, "grad_norm": 0.47053390741348267, "learning_rate": 9.725675594055884e-06, "loss": 0.1104, "step": 4520 }, { "epoch": 17.290076335877863, "grad_norm": 0.5472087264060974, "learning_rate": 9.723872206765115e-06, "loss": 0.1186, "step": 4530 }, { "epoch": 17.328244274809162, "grad_norm": 0.43163037300109863, "learning_rate": 9.722063079524185e-06, "loss": 0.118, "step": 4540 }, { "epoch": 17.366412213740457, "grad_norm": 0.3856120705604553, "learning_rate": 9.720248214531352e-06, "loss": 0.1052, "step": 4550 }, { "epoch": 17.404580152671755, "grad_norm": 0.47452834248542786, "learning_rate": 9.718427613991848e-06, "loss": 0.1026, "step": 4560 }, { "epoch": 17.442748091603054, "grad_norm": 0.5274608731269836, "learning_rate": 9.716601280117874e-06, "loss": 0.118, "step": 4570 }, { "epoch": 17.480916030534353, "grad_norm": 1.0746313333511353, "learning_rate": 9.714769215128597e-06, "loss": 0.1103, "step": 4580 }, { "epoch": 17.519083969465647, "grad_norm": 0.3799738585948944, "learning_rate": 9.712931421250152e-06, "loss": 0.1026, "step": 4590 }, { "epoch": 17.557251908396946, "grad_norm": 0.36720654368400574, "learning_rate": 9.711087900715627e-06, "loss": 0.1121, "step": 4600 }, { "epoch": 17.595419847328245, "grad_norm": 0.4583773612976074, "learning_rate": 9.709238655765079e-06, "loss": 0.1129, "step": 4610 }, { "epoch": 17.633587786259543, "grad_norm": 0.3917942941188812, "learning_rate": 9.70738368864551e-06, "loss": 0.1113, "step": 4620 }, { "epoch": 17.671755725190838, "grad_norm": 0.4856705367565155, "learning_rate": 9.705523001610884e-06, "loss": 0.1059, "step": 4630 }, { "epoch": 17.709923664122137, "grad_norm": 0.38836991786956787, "learning_rate": 9.703656596922107e-06, "loss": 0.1062, "step": 4640 }, { "epoch": 17.748091603053435, "grad_norm": 0.77637779712677, "learning_rate": 9.70178447684704e-06, "loss": 0.1125, "step": 4650 }, { "epoch": 17.786259541984734, "grad_norm": 1.447717547416687, "learning_rate": 9.699906643660484e-06, "loss": 0.1234, "step": 4660 }, { "epoch": 17.82442748091603, "grad_norm": 0.4381782114505768, "learning_rate": 9.698023099644186e-06, "loss": 0.1179, "step": 4670 }, { "epoch": 17.862595419847327, "grad_norm": 0.7676070928573608, "learning_rate": 9.696133847086824e-06, "loss": 0.1047, "step": 4680 }, { "epoch": 17.900763358778626, "grad_norm": 0.36729001998901367, "learning_rate": 9.694238888284023e-06, "loss": 0.1246, "step": 4690 }, { "epoch": 17.938931297709924, "grad_norm": 0.7202317714691162, "learning_rate": 9.692338225538334e-06, "loss": 0.1234, "step": 4700 }, { "epoch": 17.977099236641223, "grad_norm": 0.8629083037376404, "learning_rate": 9.690431861159242e-06, "loss": 0.1153, "step": 4710 }, { "epoch": 18.015267175572518, "grad_norm": 0.7108657956123352, "learning_rate": 9.68851979746316e-06, "loss": 0.1157, "step": 4720 }, { "epoch": 18.053435114503817, "grad_norm": 0.3120957612991333, "learning_rate": 9.686602036773427e-06, "loss": 0.1134, "step": 4730 }, { "epoch": 18.091603053435115, "grad_norm": 0.6133302450180054, "learning_rate": 9.684678581420302e-06, "loss": 0.1085, "step": 4740 }, { "epoch": 18.129770992366414, "grad_norm": 0.8384220004081726, "learning_rate": 9.682749433740963e-06, "loss": 0.1116, "step": 4750 }, { "epoch": 18.16793893129771, "grad_norm": 0.4204612672328949, "learning_rate": 9.680814596079508e-06, "loss": 0.1129, "step": 4760 }, { "epoch": 18.206106870229007, "grad_norm": 0.8686299324035645, "learning_rate": 9.678874070786945e-06, "loss": 0.1226, "step": 4770 }, { "epoch": 18.244274809160306, "grad_norm": 0.450674444437027, "learning_rate": 9.676927860221199e-06, "loss": 0.1189, "step": 4780 }, { "epoch": 18.282442748091604, "grad_norm": 1.338226079940796, "learning_rate": 9.674975966747098e-06, "loss": 0.1275, "step": 4790 }, { "epoch": 18.3206106870229, "grad_norm": 0.4029911160469055, "learning_rate": 9.673018392736373e-06, "loss": 0.1108, "step": 4800 }, { "epoch": 18.358778625954198, "grad_norm": 0.6675863862037659, "learning_rate": 9.671055140567667e-06, "loss": 0.1127, "step": 4810 }, { "epoch": 18.396946564885496, "grad_norm": 0.44830095767974854, "learning_rate": 9.669086212626512e-06, "loss": 0.1193, "step": 4820 }, { "epoch": 18.435114503816795, "grad_norm": 0.5231329202651978, "learning_rate": 9.667111611305343e-06, "loss": 0.1326, "step": 4830 }, { "epoch": 18.47328244274809, "grad_norm": 0.447273850440979, "learning_rate": 9.665131339003487e-06, "loss": 0.1144, "step": 4840 }, { "epoch": 18.51145038167939, "grad_norm": 1.005131721496582, "learning_rate": 9.663145398127158e-06, "loss": 0.1227, "step": 4850 }, { "epoch": 18.549618320610687, "grad_norm": 0.6508563160896301, "learning_rate": 9.661153791089468e-06, "loss": 0.117, "step": 4860 }, { "epoch": 18.587786259541986, "grad_norm": 0.9407038688659668, "learning_rate": 9.659156520310402e-06, "loss": 0.1238, "step": 4870 }, { "epoch": 18.625954198473284, "grad_norm": 0.356827050447464, "learning_rate": 9.657153588216834e-06, "loss": 0.1107, "step": 4880 }, { "epoch": 18.66412213740458, "grad_norm": 0.4456828534603119, "learning_rate": 9.655144997242516e-06, "loss": 0.1157, "step": 4890 }, { "epoch": 18.702290076335878, "grad_norm": 0.6558853387832642, "learning_rate": 9.653130749828074e-06, "loss": 0.1165, "step": 4900 }, { "epoch": 18.740458015267176, "grad_norm": 0.5114887952804565, "learning_rate": 9.651110848421011e-06, "loss": 0.1238, "step": 4910 }, { "epoch": 18.778625954198475, "grad_norm": 0.7068324089050293, "learning_rate": 9.649085295475695e-06, "loss": 0.1141, "step": 4920 }, { "epoch": 18.81679389312977, "grad_norm": 0.47622790932655334, "learning_rate": 9.647054093453366e-06, "loss": 0.1117, "step": 4930 }, { "epoch": 18.85496183206107, "grad_norm": 0.320960134267807, "learning_rate": 9.645017244822124e-06, "loss": 0.1023, "step": 4940 }, { "epoch": 18.893129770992367, "grad_norm": 0.3788800835609436, "learning_rate": 9.642974752056931e-06, "loss": 0.119, "step": 4950 }, { "epoch": 18.931297709923665, "grad_norm": 1.6596555709838867, "learning_rate": 9.640926617639614e-06, "loss": 0.1351, "step": 4960 }, { "epoch": 18.96946564885496, "grad_norm": 0.5164493918418884, "learning_rate": 9.638872844058844e-06, "loss": 0.1051, "step": 4970 }, { "epoch": 19.00763358778626, "grad_norm": 0.41121843457221985, "learning_rate": 9.636813433810151e-06, "loss": 0.1129, "step": 4980 }, { "epoch": 19.045801526717558, "grad_norm": 0.3111073672771454, "learning_rate": 9.634748389395914e-06, "loss": 0.1084, "step": 4990 }, { "epoch": 19.083969465648856, "grad_norm": 0.2994742691516876, "learning_rate": 9.632677713325353e-06, "loss": 0.1048, "step": 5000 }, { "epoch": 19.12213740458015, "grad_norm": 0.32396626472473145, "learning_rate": 9.63060140811454e-06, "loss": 0.1049, "step": 5010 }, { "epoch": 19.16030534351145, "grad_norm": 0.42831170558929443, "learning_rate": 9.628519476286379e-06, "loss": 0.1001, "step": 5020 }, { "epoch": 19.198473282442748, "grad_norm": 0.46095895767211914, "learning_rate": 9.626431920370613e-06, "loss": 0.1203, "step": 5030 }, { "epoch": 19.236641221374047, "grad_norm": 0.40226057171821594, "learning_rate": 9.624338742903819e-06, "loss": 0.1088, "step": 5040 }, { "epoch": 19.274809160305345, "grad_norm": 0.5237085819244385, "learning_rate": 9.622239946429407e-06, "loss": 0.1111, "step": 5050 }, { "epoch": 19.31297709923664, "grad_norm": 0.4355655014514923, "learning_rate": 9.62013553349761e-06, "loss": 0.1147, "step": 5060 }, { "epoch": 19.35114503816794, "grad_norm": 0.7808547616004944, "learning_rate": 9.61802550666549e-06, "loss": 0.1117, "step": 5070 }, { "epoch": 19.389312977099237, "grad_norm": 0.3424168527126312, "learning_rate": 9.615909868496928e-06, "loss": 0.1131, "step": 5080 }, { "epoch": 19.427480916030536, "grad_norm": 0.38718533515930176, "learning_rate": 9.613788621562622e-06, "loss": 0.113, "step": 5090 }, { "epoch": 19.46564885496183, "grad_norm": 0.3584721088409424, "learning_rate": 9.611661768440092e-06, "loss": 0.1029, "step": 5100 }, { "epoch": 19.50381679389313, "grad_norm": 0.5123441815376282, "learning_rate": 9.609529311713662e-06, "loss": 0.1108, "step": 5110 }, { "epoch": 19.541984732824428, "grad_norm": 0.37226274609565735, "learning_rate": 9.607391253974467e-06, "loss": 0.1103, "step": 5120 }, { "epoch": 19.580152671755727, "grad_norm": 0.542453408241272, "learning_rate": 9.605247597820448e-06, "loss": 0.1119, "step": 5130 }, { "epoch": 19.61832061068702, "grad_norm": 0.3321150839328766, "learning_rate": 9.603098345856354e-06, "loss": 0.1103, "step": 5140 }, { "epoch": 19.65648854961832, "grad_norm": 0.4373333752155304, "learning_rate": 9.600943500693724e-06, "loss": 0.1147, "step": 5150 }, { "epoch": 19.69465648854962, "grad_norm": 0.393868625164032, "learning_rate": 9.598783064950902e-06, "loss": 0.1079, "step": 5160 }, { "epoch": 19.732824427480917, "grad_norm": 0.5854748487472534, "learning_rate": 9.596617041253017e-06, "loss": 0.116, "step": 5170 }, { "epoch": 19.770992366412212, "grad_norm": 0.33527272939682007, "learning_rate": 9.594445432231996e-06, "loss": 0.1071, "step": 5180 }, { "epoch": 19.80916030534351, "grad_norm": 0.38696524500846863, "learning_rate": 9.592268240526546e-06, "loss": 0.1237, "step": 5190 }, { "epoch": 19.84732824427481, "grad_norm": 0.5272545218467712, "learning_rate": 9.590085468782162e-06, "loss": 0.122, "step": 5200 }, { "epoch": 19.885496183206108, "grad_norm": 0.3966885209083557, "learning_rate": 9.587897119651115e-06, "loss": 0.1165, "step": 5210 }, { "epoch": 19.923664122137403, "grad_norm": 0.6632593870162964, "learning_rate": 9.585703195792459e-06, "loss": 0.1279, "step": 5220 }, { "epoch": 19.9618320610687, "grad_norm": 0.41774246096611023, "learning_rate": 9.583503699872017e-06, "loss": 0.1043, "step": 5230 }, { "epoch": 20.0, "grad_norm": 0.38030463457107544, "learning_rate": 9.581298634562382e-06, "loss": 0.1124, "step": 5240 }, { "epoch": 20.0381679389313, "grad_norm": 0.314238965511322, "learning_rate": 9.579088002542918e-06, "loss": 0.1062, "step": 5250 }, { "epoch": 20.076335877862597, "grad_norm": 0.3432968556880951, "learning_rate": 9.57687180649975e-06, "loss": 0.1182, "step": 5260 }, { "epoch": 20.114503816793892, "grad_norm": 0.7172240614891052, "learning_rate": 9.57465004912577e-06, "loss": 0.1114, "step": 5270 }, { "epoch": 20.15267175572519, "grad_norm": 0.3679429590702057, "learning_rate": 9.572422733120614e-06, "loss": 0.1146, "step": 5280 }, { "epoch": 20.19083969465649, "grad_norm": 0.8204743266105652, "learning_rate": 9.57018986119069e-06, "loss": 0.1215, "step": 5290 }, { "epoch": 20.229007633587788, "grad_norm": 0.5028970241546631, "learning_rate": 9.56795143604914e-06, "loss": 0.1157, "step": 5300 }, { "epoch": 20.267175572519083, "grad_norm": 0.5000373125076294, "learning_rate": 9.56570746041587e-06, "loss": 0.116, "step": 5310 }, { "epoch": 20.30534351145038, "grad_norm": 0.36823147535324097, "learning_rate": 9.563457937017514e-06, "loss": 0.1211, "step": 5320 }, { "epoch": 20.34351145038168, "grad_norm": 0.39545708894729614, "learning_rate": 9.56120286858746e-06, "loss": 0.1164, "step": 5330 }, { "epoch": 20.38167938931298, "grad_norm": 1.3910619020462036, "learning_rate": 9.558942257865829e-06, "loss": 0.124, "step": 5340 }, { "epoch": 20.419847328244273, "grad_norm": 0.7138753533363342, "learning_rate": 9.556676107599472e-06, "loss": 0.1313, "step": 5350 }, { "epoch": 20.458015267175572, "grad_norm": 0.36400383710861206, "learning_rate": 9.554404420541979e-06, "loss": 0.1193, "step": 5360 }, { "epoch": 20.49618320610687, "grad_norm": 0.42554593086242676, "learning_rate": 9.552127199453662e-06, "loss": 0.1122, "step": 5370 }, { "epoch": 20.53435114503817, "grad_norm": 0.3719650208950043, "learning_rate": 9.549844447101559e-06, "loss": 0.1112, "step": 5380 }, { "epoch": 20.572519083969464, "grad_norm": 0.37084949016571045, "learning_rate": 9.547556166259433e-06, "loss": 0.1121, "step": 5390 }, { "epoch": 20.610687022900763, "grad_norm": 0.4586028456687927, "learning_rate": 9.545262359707756e-06, "loss": 0.1044, "step": 5400 }, { "epoch": 20.64885496183206, "grad_norm": 0.5926239490509033, "learning_rate": 9.542963030233725e-06, "loss": 0.1071, "step": 5410 }, { "epoch": 20.68702290076336, "grad_norm": 0.4982014000415802, "learning_rate": 9.540658180631236e-06, "loss": 0.1117, "step": 5420 }, { "epoch": 20.725190839694655, "grad_norm": 0.6193251013755798, "learning_rate": 9.538347813700903e-06, "loss": 0.1083, "step": 5430 }, { "epoch": 20.763358778625953, "grad_norm": 0.3325391411781311, "learning_rate": 9.536031932250037e-06, "loss": 0.135, "step": 5440 }, { "epoch": 20.801526717557252, "grad_norm": 0.2922006845474243, "learning_rate": 9.533710539092653e-06, "loss": 0.1128, "step": 5450 }, { "epoch": 20.83969465648855, "grad_norm": 0.6511474251747131, "learning_rate": 9.531383637049465e-06, "loss": 0.119, "step": 5460 }, { "epoch": 20.87786259541985, "grad_norm": 0.7577188014984131, "learning_rate": 9.529051228947875e-06, "loss": 0.1144, "step": 5470 }, { "epoch": 20.916030534351144, "grad_norm": 0.8727062940597534, "learning_rate": 9.52671331762198e-06, "loss": 0.1243, "step": 5480 }, { "epoch": 20.954198473282442, "grad_norm": 0.4881748557090759, "learning_rate": 9.524369905912566e-06, "loss": 0.1183, "step": 5490 }, { "epoch": 20.99236641221374, "grad_norm": 1.3852527141571045, "learning_rate": 9.522020996667092e-06, "loss": 0.109, "step": 5500 }, { "epoch": 21.03053435114504, "grad_norm": 0.3250916302204132, "learning_rate": 9.51966659273971e-06, "loss": 0.1099, "step": 5510 }, { "epoch": 21.068702290076335, "grad_norm": 0.3848394453525543, "learning_rate": 9.517306696991241e-06, "loss": 0.1153, "step": 5520 }, { "epoch": 21.106870229007633, "grad_norm": 0.2975100576877594, "learning_rate": 9.51494131228918e-06, "loss": 0.1031, "step": 5530 }, { "epoch": 21.14503816793893, "grad_norm": 0.5803893804550171, "learning_rate": 9.512570441507696e-06, "loss": 0.1136, "step": 5540 }, { "epoch": 21.18320610687023, "grad_norm": 0.5576812028884888, "learning_rate": 9.510194087527615e-06, "loss": 0.1202, "step": 5550 }, { "epoch": 21.221374045801525, "grad_norm": 0.5307789444923401, "learning_rate": 9.507812253236436e-06, "loss": 0.1223, "step": 5560 }, { "epoch": 21.259541984732824, "grad_norm": 0.6918527483940125, "learning_rate": 9.50542494152831e-06, "loss": 0.1177, "step": 5570 }, { "epoch": 21.297709923664122, "grad_norm": 0.489572674036026, "learning_rate": 9.503032155304046e-06, "loss": 0.1104, "step": 5580 }, { "epoch": 21.33587786259542, "grad_norm": 0.32068297266960144, "learning_rate": 9.500633897471105e-06, "loss": 0.109, "step": 5590 }, { "epoch": 21.374045801526716, "grad_norm": 0.5839059948921204, "learning_rate": 9.498230170943597e-06, "loss": 0.1197, "step": 5600 }, { "epoch": 21.412213740458014, "grad_norm": 0.8069677352905273, "learning_rate": 9.495820978642275e-06, "loss": 0.1146, "step": 5610 }, { "epoch": 21.450381679389313, "grad_norm": 0.5091967582702637, "learning_rate": 9.493406323494537e-06, "loss": 0.1342, "step": 5620 }, { "epoch": 21.48854961832061, "grad_norm": 0.44150835275650024, "learning_rate": 9.490986208434415e-06, "loss": 0.1161, "step": 5630 }, { "epoch": 21.52671755725191, "grad_norm": 0.9089648127555847, "learning_rate": 9.488560636402577e-06, "loss": 0.1134, "step": 5640 }, { "epoch": 21.564885496183205, "grad_norm": 0.6889301538467407, "learning_rate": 9.486129610346322e-06, "loss": 0.1129, "step": 5650 }, { "epoch": 21.603053435114504, "grad_norm": 0.3800375461578369, "learning_rate": 9.483693133219576e-06, "loss": 0.1042, "step": 5660 }, { "epoch": 21.641221374045802, "grad_norm": 0.4531780481338501, "learning_rate": 9.481251207982888e-06, "loss": 0.1137, "step": 5670 }, { "epoch": 21.6793893129771, "grad_norm": 0.4850050210952759, "learning_rate": 9.47880383760343e-06, "loss": 0.115, "step": 5680 }, { "epoch": 21.717557251908396, "grad_norm": 0.41983506083488464, "learning_rate": 9.476351025054984e-06, "loss": 0.1146, "step": 5690 }, { "epoch": 21.755725190839694, "grad_norm": 0.47073015570640564, "learning_rate": 9.473892773317952e-06, "loss": 0.1142, "step": 5700 }, { "epoch": 21.793893129770993, "grad_norm": 0.7770823836326599, "learning_rate": 9.471429085379339e-06, "loss": 0.1043, "step": 5710 }, { "epoch": 21.83206106870229, "grad_norm": 0.7332136631011963, "learning_rate": 9.468959964232757e-06, "loss": 0.1048, "step": 5720 }, { "epoch": 21.870229007633586, "grad_norm": 0.4616805911064148, "learning_rate": 9.466485412878425e-06, "loss": 0.1118, "step": 5730 }, { "epoch": 21.908396946564885, "grad_norm": 0.3106837868690491, "learning_rate": 9.464005434323154e-06, "loss": 0.1116, "step": 5740 }, { "epoch": 21.946564885496183, "grad_norm": 0.606867790222168, "learning_rate": 9.461520031580352e-06, "loss": 0.1116, "step": 5750 }, { "epoch": 21.984732824427482, "grad_norm": 0.3522845506668091, "learning_rate": 9.459029207670018e-06, "loss": 0.1007, "step": 5760 }, { "epoch": 22.022900763358777, "grad_norm": 0.5821218490600586, "learning_rate": 9.456532965618738e-06, "loss": 0.1169, "step": 5770 }, { "epoch": 22.061068702290076, "grad_norm": 0.6198912262916565, "learning_rate": 9.454031308459681e-06, "loss": 0.1144, "step": 5780 }, { "epoch": 22.099236641221374, "grad_norm": 0.396658331155777, "learning_rate": 9.451524239232596e-06, "loss": 0.1117, "step": 5790 }, { "epoch": 22.137404580152673, "grad_norm": 0.4272678792476654, "learning_rate": 9.44901176098381e-06, "loss": 0.13, "step": 5800 }, { "epoch": 22.17557251908397, "grad_norm": 0.5688971877098083, "learning_rate": 9.446493876766219e-06, "loss": 0.1147, "step": 5810 }, { "epoch": 22.213740458015266, "grad_norm": 0.8139527440071106, "learning_rate": 9.44397058963929e-06, "loss": 0.1215, "step": 5820 }, { "epoch": 22.251908396946565, "grad_norm": 0.9012984037399292, "learning_rate": 9.441441902669057e-06, "loss": 0.1144, "step": 5830 }, { "epoch": 22.290076335877863, "grad_norm": 0.38339322805404663, "learning_rate": 9.43890781892811e-06, "loss": 0.1202, "step": 5840 }, { "epoch": 22.328244274809162, "grad_norm": 0.34039992094039917, "learning_rate": 9.436368341495603e-06, "loss": 0.118, "step": 5850 }, { "epoch": 22.366412213740457, "grad_norm": 1.0768190622329712, "learning_rate": 9.43382347345724e-06, "loss": 0.1057, "step": 5860 }, { "epoch": 22.404580152671755, "grad_norm": 0.682651937007904, "learning_rate": 9.431273217905272e-06, "loss": 0.1111, "step": 5870 }, { "epoch": 22.442748091603054, "grad_norm": 0.7133651375770569, "learning_rate": 9.428717577938505e-06, "loss": 0.105, "step": 5880 }, { "epoch": 22.480916030534353, "grad_norm": 0.4730619192123413, "learning_rate": 9.426156556662276e-06, "loss": 0.1056, "step": 5890 }, { "epoch": 22.519083969465647, "grad_norm": 0.37307435274124146, "learning_rate": 9.423590157188475e-06, "loss": 0.1122, "step": 5900 }, { "epoch": 22.557251908396946, "grad_norm": 0.3784120976924896, "learning_rate": 9.421018382635514e-06, "loss": 0.1327, "step": 5910 }, { "epoch": 22.595419847328245, "grad_norm": 0.6031510233879089, "learning_rate": 9.418441236128344e-06, "loss": 0.1074, "step": 5920 }, { "epoch": 22.633587786259543, "grad_norm": 0.6121441721916199, "learning_rate": 9.41585872079844e-06, "loss": 0.1079, "step": 5930 }, { "epoch": 22.671755725190838, "grad_norm": 0.5662669539451599, "learning_rate": 9.413270839783802e-06, "loss": 0.1096, "step": 5940 }, { "epoch": 22.709923664122137, "grad_norm": 0.4588741064071655, "learning_rate": 9.41067759622895e-06, "loss": 0.1125, "step": 5950 }, { "epoch": 22.748091603053435, "grad_norm": 0.5805822014808655, "learning_rate": 9.408078993284917e-06, "loss": 0.1143, "step": 5960 }, { "epoch": 22.786259541984734, "grad_norm": 0.5425497889518738, "learning_rate": 9.405475034109254e-06, "loss": 0.1006, "step": 5970 }, { "epoch": 22.82442748091603, "grad_norm": 0.43856534361839294, "learning_rate": 9.402865721866017e-06, "loss": 0.1156, "step": 5980 }, { "epoch": 22.862595419847327, "grad_norm": 0.6085226535797119, "learning_rate": 9.400251059725762e-06, "loss": 0.1173, "step": 5990 }, { "epoch": 22.900763358778626, "grad_norm": 0.41723042726516724, "learning_rate": 9.397631050865554e-06, "loss": 0.115, "step": 6000 }, { "epoch": 22.938931297709924, "grad_norm": 0.47364330291748047, "learning_rate": 9.395005698468948e-06, "loss": 0.1096, "step": 6010 }, { "epoch": 22.977099236641223, "grad_norm": 0.5531240701675415, "learning_rate": 9.392375005726e-06, "loss": 0.1115, "step": 6020 }, { "epoch": 23.015267175572518, "grad_norm": 1.9053860902786255, "learning_rate": 9.389738975833243e-06, "loss": 0.1107, "step": 6030 }, { "epoch": 23.053435114503817, "grad_norm": 0.4159781336784363, "learning_rate": 9.387097611993707e-06, "loss": 0.1061, "step": 6040 }, { "epoch": 23.091603053435115, "grad_norm": 0.37060031294822693, "learning_rate": 9.384450917416894e-06, "loss": 0.1087, "step": 6050 }, { "epoch": 23.129770992366414, "grad_norm": 0.3280550241470337, "learning_rate": 9.381798895318792e-06, "loss": 0.1082, "step": 6060 }, { "epoch": 23.16793893129771, "grad_norm": 0.6817305088043213, "learning_rate": 9.379141548921855e-06, "loss": 0.1084, "step": 6070 }, { "epoch": 23.206106870229007, "grad_norm": 1.089829683303833, "learning_rate": 9.376478881455008e-06, "loss": 0.1191, "step": 6080 }, { "epoch": 23.244274809160306, "grad_norm": 0.514206051826477, "learning_rate": 9.373810896153647e-06, "loss": 0.108, "step": 6090 }, { "epoch": 23.282442748091604, "grad_norm": 0.3831291198730469, "learning_rate": 9.371137596259623e-06, "loss": 0.1141, "step": 6100 }, { "epoch": 23.3206106870229, "grad_norm": 0.4334690570831299, "learning_rate": 9.368458985021249e-06, "loss": 0.11, "step": 6110 }, { "epoch": 23.358778625954198, "grad_norm": 0.43179851770401, "learning_rate": 9.365775065693288e-06, "loss": 0.1128, "step": 6120 }, { "epoch": 23.396946564885496, "grad_norm": 0.43169301748275757, "learning_rate": 9.363085841536958e-06, "loss": 0.1113, "step": 6130 }, { "epoch": 23.435114503816795, "grad_norm": 0.6322646737098694, "learning_rate": 9.360391315819917e-06, "loss": 0.1113, "step": 6140 }, { "epoch": 23.47328244274809, "grad_norm": 0.37557345628738403, "learning_rate": 9.35769149181627e-06, "loss": 0.1107, "step": 6150 }, { "epoch": 23.51145038167939, "grad_norm": 0.48005855083465576, "learning_rate": 9.354986372806557e-06, "loss": 0.1067, "step": 6160 }, { "epoch": 23.549618320610687, "grad_norm": 0.5975325107574463, "learning_rate": 9.352275962077752e-06, "loss": 0.1199, "step": 6170 }, { "epoch": 23.587786259541986, "grad_norm": 0.49410223960876465, "learning_rate": 9.349560262923262e-06, "loss": 0.1296, "step": 6180 }, { "epoch": 23.625954198473284, "grad_norm": 0.3413020670413971, "learning_rate": 9.346839278642915e-06, "loss": 0.1102, "step": 6190 }, { "epoch": 23.66412213740458, "grad_norm": 0.43629974126815796, "learning_rate": 9.344113012542964e-06, "loss": 0.1163, "step": 6200 }, { "epoch": 23.702290076335878, "grad_norm": 0.4831625521183014, "learning_rate": 9.341381467936079e-06, "loss": 0.1056, "step": 6210 }, { "epoch": 23.740458015267176, "grad_norm": 0.4807736575603485, "learning_rate": 9.338644648141347e-06, "loss": 0.1002, "step": 6220 }, { "epoch": 23.778625954198475, "grad_norm": 0.6690664887428284, "learning_rate": 9.335902556484258e-06, "loss": 0.112, "step": 6230 }, { "epoch": 23.81679389312977, "grad_norm": 0.7592681646347046, "learning_rate": 9.333155196296712e-06, "loss": 0.1051, "step": 6240 }, { "epoch": 23.85496183206107, "grad_norm": 0.3155533969402313, "learning_rate": 9.330402570917017e-06, "loss": 0.1104, "step": 6250 }, { "epoch": 23.893129770992367, "grad_norm": 0.43112990260124207, "learning_rate": 9.327644683689866e-06, "loss": 0.1098, "step": 6260 }, { "epoch": 23.931297709923665, "grad_norm": 0.28097161650657654, "learning_rate": 9.324881537966355e-06, "loss": 0.1086, "step": 6270 }, { "epoch": 23.96946564885496, "grad_norm": 0.48976218700408936, "learning_rate": 9.322113137103964e-06, "loss": 0.1106, "step": 6280 }, { "epoch": 24.00763358778626, "grad_norm": 0.5290048718452454, "learning_rate": 9.319339484466565e-06, "loss": 0.1065, "step": 6290 }, { "epoch": 24.045801526717558, "grad_norm": 0.48621800541877747, "learning_rate": 9.316560583424404e-06, "loss": 0.106, "step": 6300 }, { "epoch": 24.083969465648856, "grad_norm": 0.3502595126628876, "learning_rate": 9.313776437354109e-06, "loss": 0.105, "step": 6310 }, { "epoch": 24.12213740458015, "grad_norm": 0.3239123225212097, "learning_rate": 9.310987049638681e-06, "loss": 0.1121, "step": 6320 }, { "epoch": 24.16030534351145, "grad_norm": 0.38720473647117615, "learning_rate": 9.308192423667486e-06, "loss": 0.1035, "step": 6330 }, { "epoch": 24.198473282442748, "grad_norm": 0.382134348154068, "learning_rate": 9.305392562836262e-06, "loss": 0.1073, "step": 6340 }, { "epoch": 24.236641221374047, "grad_norm": 0.37410208582878113, "learning_rate": 9.302587470547101e-06, "loss": 0.1247, "step": 6350 }, { "epoch": 24.274809160305345, "grad_norm": 0.3606686592102051, "learning_rate": 9.299777150208456e-06, "loss": 0.1206, "step": 6360 }, { "epoch": 24.31297709923664, "grad_norm": 0.5555996894836426, "learning_rate": 9.296961605235133e-06, "loss": 0.1219, "step": 6370 }, { "epoch": 24.35114503816794, "grad_norm": 0.40895533561706543, "learning_rate": 9.29414083904828e-06, "loss": 0.1151, "step": 6380 }, { "epoch": 24.389312977099237, "grad_norm": 0.3560793101787567, "learning_rate": 9.291314855075397e-06, "loss": 0.1171, "step": 6390 }, { "epoch": 24.427480916030536, "grad_norm": 0.583595871925354, "learning_rate": 9.288483656750322e-06, "loss": 0.1203, "step": 6400 }, { "epoch": 24.46564885496183, "grad_norm": 0.3709441125392914, "learning_rate": 9.285647247513225e-06, "loss": 0.1087, "step": 6410 }, { "epoch": 24.50381679389313, "grad_norm": 0.5347302556037903, "learning_rate": 9.282805630810614e-06, "loss": 0.1089, "step": 6420 }, { "epoch": 24.541984732824428, "grad_norm": 0.9013820290565491, "learning_rate": 9.279958810095317e-06, "loss": 0.1124, "step": 6430 }, { "epoch": 24.580152671755727, "grad_norm": 0.32639890909194946, "learning_rate": 9.277106788826494e-06, "loss": 0.1224, "step": 6440 }, { "epoch": 24.61832061068702, "grad_norm": 0.462785542011261, "learning_rate": 9.274249570469618e-06, "loss": 0.1157, "step": 6450 }, { "epoch": 24.65648854961832, "grad_norm": 0.6265701651573181, "learning_rate": 9.271387158496477e-06, "loss": 0.1041, "step": 6460 }, { "epoch": 24.69465648854962, "grad_norm": 0.4119669198989868, "learning_rate": 9.268519556385173e-06, "loss": 0.1091, "step": 6470 }, { "epoch": 24.732824427480917, "grad_norm": 0.2758201062679291, "learning_rate": 9.265646767620113e-06, "loss": 0.1023, "step": 6480 }, { "epoch": 24.770992366412212, "grad_norm": 0.4721677601337433, "learning_rate": 9.262768795692006e-06, "loss": 0.1004, "step": 6490 }, { "epoch": 24.80916030534351, "grad_norm": 0.4377496838569641, "learning_rate": 9.259885644097861e-06, "loss": 0.1251, "step": 6500 }, { "epoch": 24.84732824427481, "grad_norm": 0.3082864284515381, "learning_rate": 9.256997316340976e-06, "loss": 0.1116, "step": 6510 }, { "epoch": 24.885496183206108, "grad_norm": 0.36191001534461975, "learning_rate": 9.254103815930944e-06, "loss": 0.1164, "step": 6520 }, { "epoch": 24.923664122137403, "grad_norm": 0.34867507219314575, "learning_rate": 9.251205146383637e-06, "loss": 0.1155, "step": 6530 }, { "epoch": 24.9618320610687, "grad_norm": 0.665786862373352, "learning_rate": 9.248301311221216e-06, "loss": 0.1075, "step": 6540 }, { "epoch": 25.0, "grad_norm": 0.5270240306854248, "learning_rate": 9.245392313972116e-06, "loss": 0.112, "step": 6550 }, { "epoch": 25.0381679389313, "grad_norm": 0.5124590396881104, "learning_rate": 9.242478158171038e-06, "loss": 0.1091, "step": 6560 }, { "epoch": 25.076335877862597, "grad_norm": 0.6183871626853943, "learning_rate": 9.239558847358959e-06, "loss": 0.1148, "step": 6570 }, { "epoch": 25.114503816793892, "grad_norm": 0.4855692982673645, "learning_rate": 9.236634385083115e-06, "loss": 0.1086, "step": 6580 }, { "epoch": 25.15267175572519, "grad_norm": 0.37100329995155334, "learning_rate": 9.233704774897006e-06, "loss": 0.1123, "step": 6590 }, { "epoch": 25.19083969465649, "grad_norm": 0.5242245197296143, "learning_rate": 9.230770020360383e-06, "loss": 0.1155, "step": 6600 }, { "epoch": 25.229007633587788, "grad_norm": 0.36837512254714966, "learning_rate": 9.227830125039248e-06, "loss": 0.1312, "step": 6610 }, { "epoch": 25.267175572519083, "grad_norm": 0.4537639915943146, "learning_rate": 9.224885092505853e-06, "loss": 0.1111, "step": 6620 }, { "epoch": 25.30534351145038, "grad_norm": 0.42237746715545654, "learning_rate": 9.22193492633869e-06, "loss": 0.1185, "step": 6630 }, { "epoch": 25.34351145038168, "grad_norm": 0.6770774722099304, "learning_rate": 9.21897963012249e-06, "loss": 0.1094, "step": 6640 }, { "epoch": 25.38167938931298, "grad_norm": 0.5275667905807495, "learning_rate": 9.216019207448216e-06, "loss": 0.1112, "step": 6650 }, { "epoch": 25.419847328244273, "grad_norm": 0.5454118251800537, "learning_rate": 9.213053661913061e-06, "loss": 0.108, "step": 6660 }, { "epoch": 25.458015267175572, "grad_norm": 0.4298250079154968, "learning_rate": 9.210082997120439e-06, "loss": 0.108, "step": 6670 }, { "epoch": 25.49618320610687, "grad_norm": 0.5686354637145996, "learning_rate": 9.207107216679994e-06, "loss": 0.121, "step": 6680 }, { "epoch": 25.53435114503817, "grad_norm": 0.7463754415512085, "learning_rate": 9.204126324207575e-06, "loss": 0.1155, "step": 6690 }, { "epoch": 25.572519083969464, "grad_norm": 0.9898240566253662, "learning_rate": 9.201140323325248e-06, "loss": 0.1226, "step": 6700 }, { "epoch": 25.610687022900763, "grad_norm": 0.40233856439590454, "learning_rate": 9.198149217661287e-06, "loss": 0.1073, "step": 6710 }, { "epoch": 25.64885496183206, "grad_norm": 0.44068315625190735, "learning_rate": 9.195153010850166e-06, "loss": 0.1161, "step": 6720 }, { "epoch": 25.68702290076336, "grad_norm": 0.5321930050849915, "learning_rate": 9.192151706532562e-06, "loss": 0.1041, "step": 6730 }, { "epoch": 25.725190839694655, "grad_norm": 0.34386226534843445, "learning_rate": 9.189145308355339e-06, "loss": 0.111, "step": 6740 }, { "epoch": 25.763358778625953, "grad_norm": 0.9037399291992188, "learning_rate": 9.186133819971556e-06, "loss": 0.1146, "step": 6750 }, { "epoch": 25.801526717557252, "grad_norm": 0.2989799976348877, "learning_rate": 9.183117245040455e-06, "loss": 0.1291, "step": 6760 }, { "epoch": 25.83969465648855, "grad_norm": 0.5754449367523193, "learning_rate": 9.18009558722746e-06, "loss": 0.1061, "step": 6770 }, { "epoch": 25.87786259541985, "grad_norm": 0.3005794882774353, "learning_rate": 9.177068850204167e-06, "loss": 0.1062, "step": 6780 }, { "epoch": 25.916030534351144, "grad_norm": 0.466094046831131, "learning_rate": 9.174037037648351e-06, "loss": 0.119, "step": 6790 }, { "epoch": 25.954198473282442, "grad_norm": 0.44860929250717163, "learning_rate": 9.171000153243948e-06, "loss": 0.1121, "step": 6800 }, { "epoch": 25.99236641221374, "grad_norm": 0.6850860714912415, "learning_rate": 9.167958200681058e-06, "loss": 0.1133, "step": 6810 }, { "epoch": 26.03053435114504, "grad_norm": 0.5172090530395508, "learning_rate": 9.164911183655943e-06, "loss": 0.1096, "step": 6820 }, { "epoch": 26.068702290076335, "grad_norm": 0.28496426343917847, "learning_rate": 9.161859105871013e-06, "loss": 0.1001, "step": 6830 }, { "epoch": 26.106870229007633, "grad_norm": 0.4888004958629608, "learning_rate": 9.158801971034832e-06, "loss": 0.0996, "step": 6840 }, { "epoch": 26.14503816793893, "grad_norm": 0.4661968946456909, "learning_rate": 9.155739782862107e-06, "loss": 0.1062, "step": 6850 }, { "epoch": 26.18320610687023, "grad_norm": 0.6031951308250427, "learning_rate": 9.152672545073687e-06, "loss": 0.1138, "step": 6860 }, { "epoch": 26.221374045801525, "grad_norm": 0.35957977175712585, "learning_rate": 9.149600261396552e-06, "loss": 0.1061, "step": 6870 }, { "epoch": 26.259541984732824, "grad_norm": 0.3762602210044861, "learning_rate": 9.146522935563816e-06, "loss": 0.1143, "step": 6880 }, { "epoch": 26.297709923664122, "grad_norm": 0.763342559337616, "learning_rate": 9.143440571314723e-06, "loss": 0.1143, "step": 6890 }, { "epoch": 26.33587786259542, "grad_norm": 0.331662654876709, "learning_rate": 9.140353172394637e-06, "loss": 0.1009, "step": 6900 }, { "epoch": 26.374045801526716, "grad_norm": 0.6234320402145386, "learning_rate": 9.137260742555033e-06, "loss": 0.1046, "step": 6910 }, { "epoch": 26.412213740458014, "grad_norm": 0.2962917685508728, "learning_rate": 9.134163285553511e-06, "loss": 0.1058, "step": 6920 }, { "epoch": 26.450381679389313, "grad_norm": 0.43455183506011963, "learning_rate": 9.13106080515377e-06, "loss": 0.116, "step": 6930 }, { "epoch": 26.48854961832061, "grad_norm": 0.4187309145927429, "learning_rate": 9.127953305125618e-06, "loss": 0.1063, "step": 6940 }, { "epoch": 26.52671755725191, "grad_norm": 0.5334097146987915, "learning_rate": 9.124840789244958e-06, "loss": 0.1191, "step": 6950 }, { "epoch": 26.564885496183205, "grad_norm": 0.5690393447875977, "learning_rate": 9.121723261293793e-06, "loss": 0.1119, "step": 6960 }, { "epoch": 26.603053435114504, "grad_norm": 0.41400864720344543, "learning_rate": 9.118600725060214e-06, "loss": 0.1023, "step": 6970 }, { "epoch": 26.641221374045802, "grad_norm": 0.4753328561782837, "learning_rate": 9.115473184338393e-06, "loss": 0.1104, "step": 6980 }, { "epoch": 26.6793893129771, "grad_norm": 0.29748255014419556, "learning_rate": 9.11234064292859e-06, "loss": 0.1003, "step": 6990 }, { "epoch": 26.717557251908396, "grad_norm": 0.34052833914756775, "learning_rate": 9.109203104637138e-06, "loss": 0.1167, "step": 7000 }, { "epoch": 26.755725190839694, "grad_norm": 0.39550068974494934, "learning_rate": 9.10606057327644e-06, "loss": 0.1128, "step": 7010 }, { "epoch": 26.793893129770993, "grad_norm": 0.3241926431655884, "learning_rate": 9.102913052664971e-06, "loss": 0.1078, "step": 7020 }, { "epoch": 26.83206106870229, "grad_norm": 0.3458397686481476, "learning_rate": 9.099760546627262e-06, "loss": 0.1104, "step": 7030 }, { "epoch": 26.870229007633586, "grad_norm": 0.6425669193267822, "learning_rate": 9.096603058993907e-06, "loss": 0.1119, "step": 7040 }, { "epoch": 26.908396946564885, "grad_norm": 0.39472290873527527, "learning_rate": 9.093440593601553e-06, "loss": 0.1109, "step": 7050 }, { "epoch": 26.946564885496183, "grad_norm": 0.3740868866443634, "learning_rate": 9.090273154292889e-06, "loss": 0.1181, "step": 7060 }, { "epoch": 26.984732824427482, "grad_norm": 0.38200682401657104, "learning_rate": 9.087100744916656e-06, "loss": 0.1065, "step": 7070 }, { "epoch": 27.022900763358777, "grad_norm": 0.4096829295158386, "learning_rate": 9.08392336932763e-06, "loss": 0.1099, "step": 7080 }, { "epoch": 27.061068702290076, "grad_norm": 0.6823604702949524, "learning_rate": 9.08074103138662e-06, "loss": 0.1167, "step": 7090 }, { "epoch": 27.099236641221374, "grad_norm": 0.5257452726364136, "learning_rate": 9.077553734960469e-06, "loss": 0.1047, "step": 7100 }, { "epoch": 27.137404580152673, "grad_norm": 0.4600130617618561, "learning_rate": 9.074361483922041e-06, "loss": 0.1006, "step": 7110 }, { "epoch": 27.17557251908397, "grad_norm": 0.6489649415016174, "learning_rate": 9.071164282150224e-06, "loss": 0.1035, "step": 7120 }, { "epoch": 27.213740458015266, "grad_norm": 1.3038793802261353, "learning_rate": 9.067962133529919e-06, "loss": 0.1039, "step": 7130 }, { "epoch": 27.251908396946565, "grad_norm": 0.542229950428009, "learning_rate": 9.064755041952036e-06, "loss": 0.1162, "step": 7140 }, { "epoch": 27.290076335877863, "grad_norm": 0.3620493412017822, "learning_rate": 9.061543011313498e-06, "loss": 0.1153, "step": 7150 }, { "epoch": 27.328244274809162, "grad_norm": 0.44058114290237427, "learning_rate": 9.05832604551722e-06, "loss": 0.1148, "step": 7160 }, { "epoch": 27.366412213740457, "grad_norm": 0.53098064661026, "learning_rate": 9.055104148472123e-06, "loss": 0.1158, "step": 7170 }, { "epoch": 27.404580152671755, "grad_norm": 0.7122390270233154, "learning_rate": 9.051877324093114e-06, "loss": 0.1186, "step": 7180 }, { "epoch": 27.442748091603054, "grad_norm": 0.6435628533363342, "learning_rate": 9.04864557630109e-06, "loss": 0.1176, "step": 7190 }, { "epoch": 27.480916030534353, "grad_norm": 0.2909921705722809, "learning_rate": 9.045408909022928e-06, "loss": 0.1115, "step": 7200 }, { "epoch": 27.519083969465647, "grad_norm": 0.39591115713119507, "learning_rate": 9.042167326191484e-06, "loss": 0.1077, "step": 7210 }, { "epoch": 27.557251908396946, "grad_norm": 0.7656340003013611, "learning_rate": 9.038920831745587e-06, "loss": 0.1072, "step": 7220 }, { "epoch": 27.595419847328245, "grad_norm": 0.5420284271240234, "learning_rate": 9.035669429630036e-06, "loss": 0.1113, "step": 7230 }, { "epoch": 27.633587786259543, "grad_norm": 0.3711391091346741, "learning_rate": 9.032413123795589e-06, "loss": 0.1094, "step": 7240 }, { "epoch": 27.671755725190838, "grad_norm": 0.6970059871673584, "learning_rate": 9.029151918198962e-06, "loss": 0.1083, "step": 7250 }, { "epoch": 27.709923664122137, "grad_norm": 0.5760360956192017, "learning_rate": 9.025885816802833e-06, "loss": 0.1195, "step": 7260 }, { "epoch": 27.748091603053435, "grad_norm": 0.3566513955593109, "learning_rate": 9.022614823575819e-06, "loss": 0.1091, "step": 7270 }, { "epoch": 27.786259541984734, "grad_norm": 0.3161095380783081, "learning_rate": 9.019338942492485e-06, "loss": 0.1095, "step": 7280 }, { "epoch": 27.82442748091603, "grad_norm": 0.3499198853969574, "learning_rate": 9.01605817753334e-06, "loss": 0.1113, "step": 7290 }, { "epoch": 27.862595419847327, "grad_norm": 0.3654879927635193, "learning_rate": 9.012772532684819e-06, "loss": 0.111, "step": 7300 }, { "epoch": 27.900763358778626, "grad_norm": 0.5612740516662598, "learning_rate": 9.00948201193929e-06, "loss": 0.107, "step": 7310 }, { "epoch": 27.938931297709924, "grad_norm": 0.6282134652137756, "learning_rate": 9.006186619295048e-06, "loss": 0.117, "step": 7320 }, { "epoch": 27.977099236641223, "grad_norm": 0.9098697900772095, "learning_rate": 9.002886358756304e-06, "loss": 0.115, "step": 7330 }, { "epoch": 28.015267175572518, "grad_norm": 0.5928300619125366, "learning_rate": 8.999581234333189e-06, "loss": 0.1075, "step": 7340 }, { "epoch": 28.053435114503817, "grad_norm": 0.39797747135162354, "learning_rate": 8.996271250041735e-06, "loss": 0.1185, "step": 7350 }, { "epoch": 28.091603053435115, "grad_norm": 0.5730596780776978, "learning_rate": 8.99295640990389e-06, "loss": 0.1113, "step": 7360 }, { "epoch": 28.129770992366414, "grad_norm": 0.40439486503601074, "learning_rate": 8.989636717947496e-06, "loss": 0.1126, "step": 7370 }, { "epoch": 28.16793893129771, "grad_norm": 0.978628933429718, "learning_rate": 8.986312178206291e-06, "loss": 0.1018, "step": 7380 }, { "epoch": 28.206106870229007, "grad_norm": 0.45754435658454895, "learning_rate": 8.982982794719904e-06, "loss": 0.1116, "step": 7390 }, { "epoch": 28.244274809160306, "grad_norm": 0.3176497519016266, "learning_rate": 8.979648571533852e-06, "loss": 0.1055, "step": 7400 }, { "epoch": 28.282442748091604, "grad_norm": 0.3644157946109772, "learning_rate": 8.97630951269953e-06, "loss": 0.1086, "step": 7410 }, { "epoch": 28.3206106870229, "grad_norm": 0.4856671988964081, "learning_rate": 8.972965622274206e-06, "loss": 0.1148, "step": 7420 }, { "epoch": 28.358778625954198, "grad_norm": 0.3878462314605713, "learning_rate": 8.969616904321026e-06, "loss": 0.1041, "step": 7430 }, { "epoch": 28.396946564885496, "grad_norm": 0.4682347774505615, "learning_rate": 8.966263362908998e-06, "loss": 0.1167, "step": 7440 }, { "epoch": 28.435114503816795, "grad_norm": 0.41615232825279236, "learning_rate": 8.962905002112989e-06, "loss": 0.1112, "step": 7450 }, { "epoch": 28.47328244274809, "grad_norm": 0.39141789078712463, "learning_rate": 8.959541826013725e-06, "loss": 0.1099, "step": 7460 }, { "epoch": 28.51145038167939, "grad_norm": 0.4517311453819275, "learning_rate": 8.95617383869778e-06, "loss": 0.1093, "step": 7470 }, { "epoch": 28.549618320610687, "grad_norm": 0.3166011571884155, "learning_rate": 8.952801044257581e-06, "loss": 0.1008, "step": 7480 }, { "epoch": 28.587786259541986, "grad_norm": 0.5795003771781921, "learning_rate": 8.949423446791388e-06, "loss": 0.1185, "step": 7490 }, { "epoch": 28.625954198473284, "grad_norm": 0.7326614856719971, "learning_rate": 8.9460410504033e-06, "loss": 0.1177, "step": 7500 }, { "epoch": 28.66412213740458, "grad_norm": 0.4169262647628784, "learning_rate": 8.942653859203248e-06, "loss": 0.1113, "step": 7510 }, { "epoch": 28.702290076335878, "grad_norm": 0.43298423290252686, "learning_rate": 8.93926187730699e-06, "loss": 0.1072, "step": 7520 }, { "epoch": 28.740458015267176, "grad_norm": 0.3784909248352051, "learning_rate": 8.935865108836103e-06, "loss": 0.1097, "step": 7530 }, { "epoch": 28.778625954198475, "grad_norm": 0.7653591632843018, "learning_rate": 8.932463557917982e-06, "loss": 0.1127, "step": 7540 }, { "epoch": 28.81679389312977, "grad_norm": 0.47900792956352234, "learning_rate": 8.929057228685829e-06, "loss": 0.1121, "step": 7550 }, { "epoch": 28.85496183206107, "grad_norm": 0.6226459741592407, "learning_rate": 8.925646125278657e-06, "loss": 0.1082, "step": 7560 }, { "epoch": 28.893129770992367, "grad_norm": 0.4821157455444336, "learning_rate": 8.92223025184128e-06, "loss": 0.1081, "step": 7570 }, { "epoch": 28.931297709923665, "grad_norm": 0.3318312466144562, "learning_rate": 8.918809612524305e-06, "loss": 0.113, "step": 7580 }, { "epoch": 28.96946564885496, "grad_norm": 0.4457385241985321, "learning_rate": 8.91538421148413e-06, "loss": 0.1162, "step": 7590 }, { "epoch": 29.00763358778626, "grad_norm": 0.40070948004722595, "learning_rate": 8.911954052882941e-06, "loss": 0.1127, "step": 7600 }, { "epoch": 29.045801526717558, "grad_norm": 0.4417935609817505, "learning_rate": 8.908519140888704e-06, "loss": 0.1035, "step": 7610 }, { "epoch": 29.083969465648856, "grad_norm": 0.3581926226615906, "learning_rate": 8.905079479675164e-06, "loss": 0.1033, "step": 7620 }, { "epoch": 29.12213740458015, "grad_norm": 0.4203307330608368, "learning_rate": 8.901635073421831e-06, "loss": 0.1021, "step": 7630 }, { "epoch": 29.16030534351145, "grad_norm": 0.4549827575683594, "learning_rate": 8.898185926313982e-06, "loss": 0.1093, "step": 7640 }, { "epoch": 29.198473282442748, "grad_norm": 0.3619544208049774, "learning_rate": 8.894732042542659e-06, "loss": 0.1126, "step": 7650 }, { "epoch": 29.236641221374047, "grad_norm": 0.32612374424934387, "learning_rate": 8.891273426304656e-06, "loss": 0.1082, "step": 7660 }, { "epoch": 29.274809160305345, "grad_norm": 0.44945961236953735, "learning_rate": 8.887810081802514e-06, "loss": 0.11, "step": 7670 }, { "epoch": 29.31297709923664, "grad_norm": 0.5542881488800049, "learning_rate": 8.88434201324453e-06, "loss": 0.1066, "step": 7680 }, { "epoch": 29.35114503816794, "grad_norm": 0.7978936433792114, "learning_rate": 8.880869224844727e-06, "loss": 0.1143, "step": 7690 }, { "epoch": 29.389312977099237, "grad_norm": 0.6490185260772705, "learning_rate": 8.877391720822874e-06, "loss": 0.1155, "step": 7700 }, { "epoch": 29.427480916030536, "grad_norm": 0.5318716168403625, "learning_rate": 8.873909505404467e-06, "loss": 0.1078, "step": 7710 }, { "epoch": 29.46564885496183, "grad_norm": 0.40619373321533203, "learning_rate": 8.870422582820726e-06, "loss": 0.1147, "step": 7720 }, { "epoch": 29.50381679389313, "grad_norm": 0.3707321286201477, "learning_rate": 8.866930957308589e-06, "loss": 0.1077, "step": 7730 }, { "epoch": 29.541984732824428, "grad_norm": 0.5502585172653198, "learning_rate": 8.863434633110711e-06, "loss": 0.1046, "step": 7740 }, { "epoch": 29.580152671755727, "grad_norm": 0.34364914894104004, "learning_rate": 8.859933614475454e-06, "loss": 0.1071, "step": 7750 }, { "epoch": 29.61832061068702, "grad_norm": 0.302059531211853, "learning_rate": 8.85642790565689e-06, "loss": 0.1041, "step": 7760 }, { "epoch": 29.65648854961832, "grad_norm": 0.5553315281867981, "learning_rate": 8.852917510914783e-06, "loss": 0.111, "step": 7770 }, { "epoch": 29.69465648854962, "grad_norm": 0.3059275448322296, "learning_rate": 8.84940243451459e-06, "loss": 0.1057, "step": 7780 }, { "epoch": 29.732824427480917, "grad_norm": 0.3817649483680725, "learning_rate": 8.84588268072747e-06, "loss": 0.1044, "step": 7790 }, { "epoch": 29.770992366412212, "grad_norm": 0.3629932403564453, "learning_rate": 8.842358253830245e-06, "loss": 0.1092, "step": 7800 }, { "epoch": 29.80916030534351, "grad_norm": 0.39645668864250183, "learning_rate": 8.838829158105434e-06, "loss": 0.1092, "step": 7810 }, { "epoch": 29.84732824427481, "grad_norm": 0.3265850245952606, "learning_rate": 8.835295397841217e-06, "loss": 0.1039, "step": 7820 }, { "epoch": 29.885496183206108, "grad_norm": 0.9192505478858948, "learning_rate": 8.831756977331447e-06, "loss": 0.1192, "step": 7830 }, { "epoch": 29.923664122137403, "grad_norm": 1.0596504211425781, "learning_rate": 8.828213900875639e-06, "loss": 0.1154, "step": 7840 }, { "epoch": 29.9618320610687, "grad_norm": 0.6525912880897522, "learning_rate": 8.824666172778964e-06, "loss": 0.113, "step": 7850 }, { "epoch": 30.0, "grad_norm": 0.4801918864250183, "learning_rate": 8.821113797352246e-06, "loss": 0.106, "step": 7860 }, { "epoch": 30.0381679389313, "grad_norm": 0.8165585398674011, "learning_rate": 8.817556778911957e-06, "loss": 0.1145, "step": 7870 }, { "epoch": 30.076335877862597, "grad_norm": 0.31382817029953003, "learning_rate": 8.81399512178021e-06, "loss": 0.1096, "step": 7880 }, { "epoch": 30.114503816793892, "grad_norm": 0.38235539197921753, "learning_rate": 8.810428830284752e-06, "loss": 0.1064, "step": 7890 }, { "epoch": 30.15267175572519, "grad_norm": 0.9553492069244385, "learning_rate": 8.806857908758968e-06, "loss": 0.11, "step": 7900 }, { "epoch": 30.19083969465649, "grad_norm": 0.33214470744132996, "learning_rate": 8.80328236154186e-06, "loss": 0.1346, "step": 7910 }, { "epoch": 30.229007633587788, "grad_norm": 0.5911892056465149, "learning_rate": 8.799702192978056e-06, "loss": 0.1101, "step": 7920 }, { "epoch": 30.267175572519083, "grad_norm": 0.6221767067909241, "learning_rate": 8.7961174074178e-06, "loss": 0.1067, "step": 7930 }, { "epoch": 30.30534351145038, "grad_norm": 0.9988340735435486, "learning_rate": 8.792528009216942e-06, "loss": 0.1043, "step": 7940 }, { "epoch": 30.34351145038168, "grad_norm": 0.3819482624530792, "learning_rate": 8.788934002736944e-06, "loss": 0.1047, "step": 7950 }, { "epoch": 30.38167938931298, "grad_norm": 0.36361387372016907, "learning_rate": 8.785335392344858e-06, "loss": 0.1092, "step": 7960 }, { "epoch": 30.419847328244273, "grad_norm": 0.4566929340362549, "learning_rate": 8.781732182413336e-06, "loss": 0.1103, "step": 7970 }, { "epoch": 30.458015267175572, "grad_norm": 0.6546979546546936, "learning_rate": 8.778124377320619e-06, "loss": 0.1107, "step": 7980 }, { "epoch": 30.49618320610687, "grad_norm": 0.38045239448547363, "learning_rate": 8.774511981450529e-06, "loss": 0.1074, "step": 7990 }, { "epoch": 30.53435114503817, "grad_norm": 0.6376705169677734, "learning_rate": 8.770894999192468e-06, "loss": 0.1162, "step": 8000 }, { "epoch": 30.572519083969464, "grad_norm": 0.6514620184898376, "learning_rate": 8.767273434941413e-06, "loss": 0.1094, "step": 8010 }, { "epoch": 30.610687022900763, "grad_norm": 0.49210497736930847, "learning_rate": 8.763647293097902e-06, "loss": 0.1143, "step": 8020 }, { "epoch": 30.64885496183206, "grad_norm": 0.9991191029548645, "learning_rate": 8.76001657806804e-06, "loss": 0.108, "step": 8030 }, { "epoch": 30.68702290076336, "grad_norm": 0.3525305688381195, "learning_rate": 8.75638129426349e-06, "loss": 0.1043, "step": 8040 }, { "epoch": 30.725190839694655, "grad_norm": 0.3630237281322479, "learning_rate": 8.752741446101464e-06, "loss": 0.1033, "step": 8050 }, { "epoch": 30.763358778625953, "grad_norm": 0.544648289680481, "learning_rate": 8.749097038004722e-06, "loss": 0.1059, "step": 8060 }, { "epoch": 30.801526717557252, "grad_norm": 0.31798774003982544, "learning_rate": 8.745448074401562e-06, "loss": 0.1073, "step": 8070 }, { "epoch": 30.83969465648855, "grad_norm": 0.35739681124687195, "learning_rate": 8.741794559725818e-06, "loss": 0.1084, "step": 8080 }, { "epoch": 30.87786259541985, "grad_norm": 0.38239383697509766, "learning_rate": 8.738136498416857e-06, "loss": 0.11, "step": 8090 }, { "epoch": 30.916030534351144, "grad_norm": 1.0929396152496338, "learning_rate": 8.734473894919564e-06, "loss": 0.1153, "step": 8100 }, { "epoch": 30.954198473282442, "grad_norm": 0.4055207669734955, "learning_rate": 8.730806753684354e-06, "loss": 0.1021, "step": 8110 }, { "epoch": 30.99236641221374, "grad_norm": 0.4326501190662384, "learning_rate": 8.727135079167144e-06, "loss": 0.1144, "step": 8120 }, { "epoch": 31.03053435114504, "grad_norm": 0.39526522159576416, "learning_rate": 8.723458875829368e-06, "loss": 0.1018, "step": 8130 }, { "epoch": 31.068702290076335, "grad_norm": 0.35406747460365295, "learning_rate": 8.719778148137959e-06, "loss": 0.1056, "step": 8140 }, { "epoch": 31.106870229007633, "grad_norm": 0.380480021238327, "learning_rate": 8.716092900565347e-06, "loss": 0.1079, "step": 8150 }, { "epoch": 31.14503816793893, "grad_norm": 0.3873949944972992, "learning_rate": 8.712403137589455e-06, "loss": 0.1065, "step": 8160 }, { "epoch": 31.18320610687023, "grad_norm": 0.3808857202529907, "learning_rate": 8.708708863693696e-06, "loss": 0.1104, "step": 8170 }, { "epoch": 31.221374045801525, "grad_norm": 0.4861178994178772, "learning_rate": 8.705010083366961e-06, "loss": 0.1064, "step": 8180 }, { "epoch": 31.259541984732824, "grad_norm": 0.8248606324195862, "learning_rate": 8.701306801103611e-06, "loss": 0.1039, "step": 8190 }, { "epoch": 31.297709923664122, "grad_norm": 0.5083164572715759, "learning_rate": 8.69759902140349e-06, "loss": 0.1093, "step": 8200 }, { "epoch": 31.33587786259542, "grad_norm": 0.4453307092189789, "learning_rate": 8.693886748771896e-06, "loss": 0.1081, "step": 8210 }, { "epoch": 31.374045801526716, "grad_norm": 0.4088967442512512, "learning_rate": 8.690169987719593e-06, "loss": 0.1193, "step": 8220 }, { "epoch": 31.412213740458014, "grad_norm": 0.42236557602882385, "learning_rate": 8.686448742762792e-06, "loss": 0.1143, "step": 8230 }, { "epoch": 31.450381679389313, "grad_norm": 0.9636140465736389, "learning_rate": 8.68272301842316e-06, "loss": 0.1119, "step": 8240 }, { "epoch": 31.48854961832061, "grad_norm": 1.1265870332717896, "learning_rate": 8.678992819227804e-06, "loss": 0.1045, "step": 8250 }, { "epoch": 31.52671755725191, "grad_norm": 0.7294782400131226, "learning_rate": 8.675258149709265e-06, "loss": 0.1139, "step": 8260 }, { "epoch": 31.564885496183205, "grad_norm": 0.9136938452720642, "learning_rate": 8.67151901440552e-06, "loss": 0.1111, "step": 8270 }, { "epoch": 31.603053435114504, "grad_norm": 0.4900221824645996, "learning_rate": 8.667775417859971e-06, "loss": 0.1077, "step": 8280 }, { "epoch": 31.641221374045802, "grad_norm": 0.2998524010181427, "learning_rate": 8.664027364621442e-06, "loss": 0.1085, "step": 8290 }, { "epoch": 31.6793893129771, "grad_norm": 0.5325132012367249, "learning_rate": 8.660274859244167e-06, "loss": 0.1093, "step": 8300 }, { "epoch": 31.717557251908396, "grad_norm": 0.6447851657867432, "learning_rate": 8.656517906287798e-06, "loss": 0.111, "step": 8310 }, { "epoch": 31.755725190839694, "grad_norm": 0.4099143147468567, "learning_rate": 8.652756510317387e-06, "loss": 0.1085, "step": 8320 }, { "epoch": 31.793893129770993, "grad_norm": 0.30737578868865967, "learning_rate": 8.648990675903382e-06, "loss": 0.1077, "step": 8330 }, { "epoch": 31.83206106870229, "grad_norm": 0.43916499614715576, "learning_rate": 8.645220407621629e-06, "loss": 0.1042, "step": 8340 }, { "epoch": 31.870229007633586, "grad_norm": 0.3269640803337097, "learning_rate": 8.64144571005336e-06, "loss": 0.1038, "step": 8350 }, { "epoch": 31.908396946564885, "grad_norm": 0.38933065533638, "learning_rate": 8.637666587785185e-06, "loss": 0.101, "step": 8360 }, { "epoch": 31.946564885496183, "grad_norm": 0.42047321796417236, "learning_rate": 8.633883045409096e-06, "loss": 0.1107, "step": 8370 }, { "epoch": 31.984732824427482, "grad_norm": 0.36545827984809875, "learning_rate": 8.630095087522458e-06, "loss": 0.1053, "step": 8380 }, { "epoch": 32.02290076335878, "grad_norm": 0.5923290848731995, "learning_rate": 8.62630271872799e-06, "loss": 0.1096, "step": 8390 }, { "epoch": 32.06106870229008, "grad_norm": 0.4061322510242462, "learning_rate": 8.622505943633781e-06, "loss": 0.1131, "step": 8400 }, { "epoch": 32.099236641221374, "grad_norm": 0.3834056854248047, "learning_rate": 8.618704766853271e-06, "loss": 0.1003, "step": 8410 }, { "epoch": 32.13740458015267, "grad_norm": 0.29086577892303467, "learning_rate": 8.614899193005248e-06, "loss": 0.1085, "step": 8420 }, { "epoch": 32.17557251908397, "grad_norm": 0.48934558033943176, "learning_rate": 8.611089226713843e-06, "loss": 0.1089, "step": 8430 }, { "epoch": 32.213740458015266, "grad_norm": 0.5814259648323059, "learning_rate": 8.607274872608521e-06, "loss": 0.1229, "step": 8440 }, { "epoch": 32.25190839694657, "grad_norm": 0.36074042320251465, "learning_rate": 8.603456135324089e-06, "loss": 0.1099, "step": 8450 }, { "epoch": 32.29007633587786, "grad_norm": 0.40568429231643677, "learning_rate": 8.599633019500665e-06, "loss": 0.1234, "step": 8460 }, { "epoch": 32.32824427480916, "grad_norm": 0.4491671621799469, "learning_rate": 8.595805529783703e-06, "loss": 0.1156, "step": 8470 }, { "epoch": 32.36641221374046, "grad_norm": 0.4605843126773834, "learning_rate": 8.59197367082396e-06, "loss": 0.1075, "step": 8480 }, { "epoch": 32.404580152671755, "grad_norm": 0.5685547590255737, "learning_rate": 8.588137447277502e-06, "loss": 0.1205, "step": 8490 }, { "epoch": 32.44274809160305, "grad_norm": 0.32008451223373413, "learning_rate": 8.584296863805708e-06, "loss": 0.1065, "step": 8500 }, { "epoch": 32.48091603053435, "grad_norm": 0.3442102074623108, "learning_rate": 8.580451925075249e-06, "loss": 0.1123, "step": 8510 }, { "epoch": 32.51908396946565, "grad_norm": 0.5090252161026001, "learning_rate": 8.576602635758086e-06, "loss": 0.1108, "step": 8520 }, { "epoch": 32.55725190839695, "grad_norm": 0.7189807891845703, "learning_rate": 8.572749000531468e-06, "loss": 0.1109, "step": 8530 }, { "epoch": 32.595419847328245, "grad_norm": 0.6325716972351074, "learning_rate": 8.568891024077925e-06, "loss": 0.1113, "step": 8540 }, { "epoch": 32.63358778625954, "grad_norm": 0.28530317544937134, "learning_rate": 8.565028711085266e-06, "loss": 0.1014, "step": 8550 }, { "epoch": 32.67175572519084, "grad_norm": 0.5091580152511597, "learning_rate": 8.561162066246562e-06, "loss": 0.1085, "step": 8560 }, { "epoch": 32.70992366412214, "grad_norm": 0.39473584294319153, "learning_rate": 8.557291094260151e-06, "loss": 0.1041, "step": 8570 }, { "epoch": 32.74809160305343, "grad_norm": 0.5815795660018921, "learning_rate": 8.55341579982963e-06, "loss": 0.1011, "step": 8580 }, { "epoch": 32.786259541984734, "grad_norm": 0.46441009640693665, "learning_rate": 8.549536187663848e-06, "loss": 0.1027, "step": 8590 }, { "epoch": 32.82442748091603, "grad_norm": 0.4190653860569, "learning_rate": 8.545652262476898e-06, "loss": 0.1075, "step": 8600 }, { "epoch": 32.86259541984733, "grad_norm": 0.3595786392688751, "learning_rate": 8.541764028988115e-06, "loss": 0.097, "step": 8610 }, { "epoch": 32.900763358778626, "grad_norm": 0.6225172281265259, "learning_rate": 8.537871491922072e-06, "loss": 0.1145, "step": 8620 }, { "epoch": 32.93893129770992, "grad_norm": 1.1140257120132446, "learning_rate": 8.533974656008566e-06, "loss": 0.129, "step": 8630 }, { "epoch": 32.97709923664122, "grad_norm": 1.0088690519332886, "learning_rate": 8.530073525982621e-06, "loss": 0.1171, "step": 8640 }, { "epoch": 33.01526717557252, "grad_norm": 0.2912192940711975, "learning_rate": 8.526168106584476e-06, "loss": 0.1004, "step": 8650 }, { "epoch": 33.05343511450382, "grad_norm": 0.337960422039032, "learning_rate": 8.522258402559587e-06, "loss": 0.1179, "step": 8660 }, { "epoch": 33.091603053435115, "grad_norm": 0.30016374588012695, "learning_rate": 8.518344418658612e-06, "loss": 0.0991, "step": 8670 }, { "epoch": 33.12977099236641, "grad_norm": 0.36123228073120117, "learning_rate": 8.51442615963741e-06, "loss": 0.1, "step": 8680 }, { "epoch": 33.16793893129771, "grad_norm": 0.3525253236293793, "learning_rate": 8.510503630257034e-06, "loss": 0.0969, "step": 8690 }, { "epoch": 33.20610687022901, "grad_norm": 0.3419656753540039, "learning_rate": 8.506576835283731e-06, "loss": 0.1014, "step": 8700 }, { "epoch": 33.2442748091603, "grad_norm": 0.3661300837993622, "learning_rate": 8.502645779488923e-06, "loss": 0.1014, "step": 8710 }, { "epoch": 33.282442748091604, "grad_norm": 0.6394238471984863, "learning_rate": 8.498710467649214e-06, "loss": 0.1046, "step": 8720 }, { "epoch": 33.3206106870229, "grad_norm": 0.3386576473712921, "learning_rate": 8.494770904546381e-06, "loss": 0.1125, "step": 8730 }, { "epoch": 33.3587786259542, "grad_norm": 0.5160825848579407, "learning_rate": 8.490827094967364e-06, "loss": 0.1072, "step": 8740 }, { "epoch": 33.396946564885496, "grad_norm": 0.37867069244384766, "learning_rate": 8.486879043704263e-06, "loss": 0.1043, "step": 8750 }, { "epoch": 33.43511450381679, "grad_norm": 0.30260443687438965, "learning_rate": 8.482926755554333e-06, "loss": 0.1074, "step": 8760 }, { "epoch": 33.47328244274809, "grad_norm": 0.589114248752594, "learning_rate": 8.478970235319975e-06, "loss": 0.1072, "step": 8770 }, { "epoch": 33.51145038167939, "grad_norm": 0.3591925799846649, "learning_rate": 8.475009487808738e-06, "loss": 0.1009, "step": 8780 }, { "epoch": 33.54961832061069, "grad_norm": 0.6600471138954163, "learning_rate": 8.471044517833299e-06, "loss": 0.122, "step": 8790 }, { "epoch": 33.587786259541986, "grad_norm": 0.36351725459098816, "learning_rate": 8.467075330211474e-06, "loss": 0.1099, "step": 8800 }, { "epoch": 33.62595419847328, "grad_norm": 0.31405356526374817, "learning_rate": 8.463101929766197e-06, "loss": 0.1024, "step": 8810 }, { "epoch": 33.66412213740458, "grad_norm": 0.34903696179389954, "learning_rate": 8.459124321325529e-06, "loss": 0.1087, "step": 8820 }, { "epoch": 33.70229007633588, "grad_norm": 0.43546533584594727, "learning_rate": 8.455142509722635e-06, "loss": 0.1072, "step": 8830 }, { "epoch": 33.74045801526717, "grad_norm": 0.43064677715301514, "learning_rate": 8.451156499795791e-06, "loss": 0.1159, "step": 8840 }, { "epoch": 33.778625954198475, "grad_norm": 0.3457275331020355, "learning_rate": 8.44716629638838e-06, "loss": 0.1137, "step": 8850 }, { "epoch": 33.81679389312977, "grad_norm": 0.4882639944553375, "learning_rate": 8.443171904348873e-06, "loss": 0.1035, "step": 8860 }, { "epoch": 33.85496183206107, "grad_norm": 0.3734376132488251, "learning_rate": 8.439173328530829e-06, "loss": 0.1045, "step": 8870 }, { "epoch": 33.89312977099237, "grad_norm": 0.3466688096523285, "learning_rate": 8.435170573792902e-06, "loss": 0.1041, "step": 8880 }, { "epoch": 33.93129770992366, "grad_norm": 0.27618467807769775, "learning_rate": 8.431163644998808e-06, "loss": 0.1138, "step": 8890 }, { "epoch": 33.969465648854964, "grad_norm": 0.3809553384780884, "learning_rate": 8.42715254701735e-06, "loss": 0.1111, "step": 8900 }, { "epoch": 34.00763358778626, "grad_norm": 0.3887907564640045, "learning_rate": 8.423137284722389e-06, "loss": 0.1069, "step": 8910 }, { "epoch": 34.045801526717554, "grad_norm": 0.3740697205066681, "learning_rate": 8.419117862992846e-06, "loss": 0.1063, "step": 8920 }, { "epoch": 34.083969465648856, "grad_norm": 0.38636720180511475, "learning_rate": 8.415094286712694e-06, "loss": 0.1108, "step": 8930 }, { "epoch": 34.12213740458015, "grad_norm": 0.2964230477809906, "learning_rate": 8.411066560770965e-06, "loss": 0.1062, "step": 8940 }, { "epoch": 34.16030534351145, "grad_norm": 0.4712314009666443, "learning_rate": 8.407034690061722e-06, "loss": 0.0999, "step": 8950 }, { "epoch": 34.19847328244275, "grad_norm": 0.7659854292869568, "learning_rate": 8.402998679484067e-06, "loss": 0.1045, "step": 8960 }, { "epoch": 34.23664122137404, "grad_norm": 0.4793913960456848, "learning_rate": 8.398958533942135e-06, "loss": 0.1036, "step": 8970 }, { "epoch": 34.274809160305345, "grad_norm": 0.37287986278533936, "learning_rate": 8.394914258345084e-06, "loss": 0.1144, "step": 8980 }, { "epoch": 34.31297709923664, "grad_norm": 0.541438639163971, "learning_rate": 8.390865857607089e-06, "loss": 0.1055, "step": 8990 }, { "epoch": 34.35114503816794, "grad_norm": 0.46863681077957153, "learning_rate": 8.386813336647339e-06, "loss": 0.1055, "step": 9000 }, { "epoch": 34.38931297709924, "grad_norm": 0.7329716086387634, "learning_rate": 8.38275670039003e-06, "loss": 0.1179, "step": 9010 }, { "epoch": 34.42748091603053, "grad_norm": 0.4788097143173218, "learning_rate": 8.378695953764357e-06, "loss": 0.1261, "step": 9020 }, { "epoch": 34.465648854961835, "grad_norm": 0.4310746192932129, "learning_rate": 8.374631101704509e-06, "loss": 0.1175, "step": 9030 }, { "epoch": 34.50381679389313, "grad_norm": 0.4793173372745514, "learning_rate": 8.370562149149666e-06, "loss": 0.1089, "step": 9040 }, { "epoch": 34.541984732824424, "grad_norm": 0.5185418725013733, "learning_rate": 8.366489101043989e-06, "loss": 0.1043, "step": 9050 }, { "epoch": 34.58015267175573, "grad_norm": 0.37807098031044006, "learning_rate": 8.362411962336613e-06, "loss": 0.1102, "step": 9060 }, { "epoch": 34.61832061068702, "grad_norm": 0.5667879581451416, "learning_rate": 8.358330737981651e-06, "loss": 0.1137, "step": 9070 }, { "epoch": 34.656488549618324, "grad_norm": 1.1132043600082397, "learning_rate": 8.35424543293817e-06, "loss": 0.1117, "step": 9080 }, { "epoch": 34.69465648854962, "grad_norm": 0.3736783564090729, "learning_rate": 8.350156052170206e-06, "loss": 0.1099, "step": 9090 }, { "epoch": 34.732824427480914, "grad_norm": 0.3822537362575531, "learning_rate": 8.346062600646739e-06, "loss": 0.0993, "step": 9100 }, { "epoch": 34.770992366412216, "grad_norm": 0.32378950715065, "learning_rate": 8.341965083341696e-06, "loss": 0.1012, "step": 9110 }, { "epoch": 34.80916030534351, "grad_norm": 0.3589276969432831, "learning_rate": 8.337863505233954e-06, "loss": 0.1064, "step": 9120 }, { "epoch": 34.847328244274806, "grad_norm": 0.3442489802837372, "learning_rate": 8.333757871307311e-06, "loss": 0.1066, "step": 9130 }, { "epoch": 34.88549618320611, "grad_norm": 0.43421897292137146, "learning_rate": 8.329648186550501e-06, "loss": 0.1107, "step": 9140 }, { "epoch": 34.9236641221374, "grad_norm": 0.33839526772499084, "learning_rate": 8.32553445595718e-06, "loss": 0.1105, "step": 9150 }, { "epoch": 34.961832061068705, "grad_norm": 0.3755687475204468, "learning_rate": 8.321416684525917e-06, "loss": 0.1102, "step": 9160 }, { "epoch": 35.0, "grad_norm": 0.3840588629245758, "learning_rate": 8.317294877260193e-06, "loss": 0.1087, "step": 9170 }, { "epoch": 35.038167938931295, "grad_norm": 0.5461086630821228, "learning_rate": 8.313169039168395e-06, "loss": 0.1093, "step": 9180 }, { "epoch": 35.0763358778626, "grad_norm": 0.42317765951156616, "learning_rate": 8.3090391752638e-06, "loss": 0.1133, "step": 9190 }, { "epoch": 35.11450381679389, "grad_norm": 0.5004700422286987, "learning_rate": 8.304905290564586e-06, "loss": 0.0985, "step": 9200 }, { "epoch": 35.152671755725194, "grad_norm": 0.42691975831985474, "learning_rate": 8.300767390093814e-06, "loss": 0.1029, "step": 9210 }, { "epoch": 35.19083969465649, "grad_norm": 0.6003366708755493, "learning_rate": 8.296625478879417e-06, "loss": 0.1216, "step": 9220 }, { "epoch": 35.229007633587784, "grad_norm": 0.31051206588745117, "learning_rate": 8.292479561954214e-06, "loss": 0.1022, "step": 9230 }, { "epoch": 35.267175572519086, "grad_norm": 0.47282811999320984, "learning_rate": 8.288329644355884e-06, "loss": 0.1074, "step": 9240 }, { "epoch": 35.30534351145038, "grad_norm": 0.7961530089378357, "learning_rate": 8.284175731126964e-06, "loss": 0.1041, "step": 9250 }, { "epoch": 35.343511450381676, "grad_norm": 0.536555826663971, "learning_rate": 8.280017827314854e-06, "loss": 0.1102, "step": 9260 }, { "epoch": 35.38167938931298, "grad_norm": 0.6327351331710815, "learning_rate": 8.275855937971799e-06, "loss": 0.1135, "step": 9270 }, { "epoch": 35.41984732824427, "grad_norm": 0.39085444808006287, "learning_rate": 8.271690068154887e-06, "loss": 0.1086, "step": 9280 }, { "epoch": 35.458015267175576, "grad_norm": 0.3909195065498352, "learning_rate": 8.26752022292604e-06, "loss": 0.1047, "step": 9290 }, { "epoch": 35.49618320610687, "grad_norm": 0.44586417078971863, "learning_rate": 8.263346407352017e-06, "loss": 0.1016, "step": 9300 }, { "epoch": 35.534351145038165, "grad_norm": 0.9170414805412292, "learning_rate": 8.259168626504395e-06, "loss": 0.1119, "step": 9310 }, { "epoch": 35.57251908396947, "grad_norm": 0.5176953673362732, "learning_rate": 8.25498688545957e-06, "loss": 0.109, "step": 9320 }, { "epoch": 35.61068702290076, "grad_norm": 0.3105170428752899, "learning_rate": 8.250801189298759e-06, "loss": 0.1089, "step": 9330 }, { "epoch": 35.64885496183206, "grad_norm": 0.9050326943397522, "learning_rate": 8.246611543107968e-06, "loss": 0.1105, "step": 9340 }, { "epoch": 35.68702290076336, "grad_norm": 0.36389780044555664, "learning_rate": 8.24241795197802e-06, "loss": 0.1075, "step": 9350 }, { "epoch": 35.725190839694655, "grad_norm": 0.2961239814758301, "learning_rate": 8.238220421004518e-06, "loss": 0.1073, "step": 9360 }, { "epoch": 35.76335877862596, "grad_norm": 0.4220345914363861, "learning_rate": 8.23401895528786e-06, "loss": 0.1004, "step": 9370 }, { "epoch": 35.80152671755725, "grad_norm": 0.3124537765979767, "learning_rate": 8.229813559933225e-06, "loss": 0.1113, "step": 9380 }, { "epoch": 35.83969465648855, "grad_norm": 0.4424472153186798, "learning_rate": 8.22560424005056e-06, "loss": 0.1074, "step": 9390 }, { "epoch": 35.87786259541985, "grad_norm": 0.46610021591186523, "learning_rate": 8.22139100075459e-06, "loss": 0.1089, "step": 9400 }, { "epoch": 35.916030534351144, "grad_norm": 0.5336711406707764, "learning_rate": 8.217173847164799e-06, "loss": 0.1031, "step": 9410 }, { "epoch": 35.954198473282446, "grad_norm": 0.4767981171607971, "learning_rate": 8.212952784405423e-06, "loss": 0.1033, "step": 9420 }, { "epoch": 35.99236641221374, "grad_norm": 0.3861401677131653, "learning_rate": 8.208727817605453e-06, "loss": 0.1031, "step": 9430 }, { "epoch": 36.030534351145036, "grad_norm": 0.5217192769050598, "learning_rate": 8.204498951898618e-06, "loss": 0.1086, "step": 9440 }, { "epoch": 36.06870229007634, "grad_norm": 0.41782206296920776, "learning_rate": 8.200266192423396e-06, "loss": 0.1091, "step": 9450 }, { "epoch": 36.10687022900763, "grad_norm": 0.29525044560432434, "learning_rate": 8.196029544322983e-06, "loss": 0.1116, "step": 9460 }, { "epoch": 36.14503816793893, "grad_norm": 0.4232509732246399, "learning_rate": 8.191789012745306e-06, "loss": 0.1101, "step": 9470 }, { "epoch": 36.18320610687023, "grad_norm": 0.9247789978981018, "learning_rate": 8.187544602843014e-06, "loss": 0.0989, "step": 9480 }, { "epoch": 36.221374045801525, "grad_norm": 0.3943084180355072, "learning_rate": 8.183296319773466e-06, "loss": 0.1031, "step": 9490 }, { "epoch": 36.25954198473283, "grad_norm": 0.4537927806377411, "learning_rate": 8.179044168698722e-06, "loss": 0.1021, "step": 9500 }, { "epoch": 36.29770992366412, "grad_norm": 0.5286827683448792, "learning_rate": 8.174788154785548e-06, "loss": 0.1085, "step": 9510 }, { "epoch": 36.33587786259542, "grad_norm": 0.40371379256248474, "learning_rate": 8.170528283205404e-06, "loss": 0.1035, "step": 9520 }, { "epoch": 36.37404580152672, "grad_norm": 0.4854196608066559, "learning_rate": 8.166264559134434e-06, "loss": 0.1115, "step": 9530 }, { "epoch": 36.412213740458014, "grad_norm": 0.673309862613678, "learning_rate": 8.161996987753466e-06, "loss": 0.1044, "step": 9540 }, { "epoch": 36.45038167938931, "grad_norm": 0.37787386775016785, "learning_rate": 8.157725574248e-06, "loss": 0.1136, "step": 9550 }, { "epoch": 36.48854961832061, "grad_norm": 1.057741641998291, "learning_rate": 8.15345032380821e-06, "loss": 0.1129, "step": 9560 }, { "epoch": 36.52671755725191, "grad_norm": 0.5287107229232788, "learning_rate": 8.149171241628924e-06, "loss": 0.0985, "step": 9570 }, { "epoch": 36.56488549618321, "grad_norm": 0.4657130539417267, "learning_rate": 8.144888332909631e-06, "loss": 0.0994, "step": 9580 }, { "epoch": 36.603053435114504, "grad_norm": 0.2887883484363556, "learning_rate": 8.140601602854471e-06, "loss": 0.1009, "step": 9590 }, { "epoch": 36.6412213740458, "grad_norm": 0.5169790983200073, "learning_rate": 8.136311056672224e-06, "loss": 0.1026, "step": 9600 }, { "epoch": 36.6793893129771, "grad_norm": 0.42216652631759644, "learning_rate": 8.132016699576308e-06, "loss": 0.1009, "step": 9610 }, { "epoch": 36.717557251908396, "grad_norm": 0.39106106758117676, "learning_rate": 8.127718536784771e-06, "loss": 0.1056, "step": 9620 }, { "epoch": 36.7557251908397, "grad_norm": 0.5721985697746277, "learning_rate": 8.123416573520289e-06, "loss": 0.1017, "step": 9630 }, { "epoch": 36.79389312977099, "grad_norm": 0.5179575085639954, "learning_rate": 8.119110815010152e-06, "loss": 0.1127, "step": 9640 }, { "epoch": 36.83206106870229, "grad_norm": 0.5956861972808838, "learning_rate": 8.11480126648626e-06, "loss": 0.1059, "step": 9650 }, { "epoch": 36.87022900763359, "grad_norm": 0.37840545177459717, "learning_rate": 8.110487933185123e-06, "loss": 0.1052, "step": 9660 }, { "epoch": 36.908396946564885, "grad_norm": 0.42260074615478516, "learning_rate": 8.106170820347849e-06, "loss": 0.1003, "step": 9670 }, { "epoch": 36.94656488549618, "grad_norm": 0.7760491371154785, "learning_rate": 8.101849933220134e-06, "loss": 0.1081, "step": 9680 }, { "epoch": 36.98473282442748, "grad_norm": 0.3848549723625183, "learning_rate": 8.097525277052265e-06, "loss": 0.1011, "step": 9690 }, { "epoch": 37.02290076335878, "grad_norm": 0.72954261302948, "learning_rate": 8.093196857099105e-06, "loss": 0.1005, "step": 9700 }, { "epoch": 37.06106870229008, "grad_norm": 0.515247642993927, "learning_rate": 8.088864678620096e-06, "loss": 0.1046, "step": 9710 }, { "epoch": 37.099236641221374, "grad_norm": 0.6722749471664429, "learning_rate": 8.084528746879243e-06, "loss": 0.095, "step": 9720 }, { "epoch": 37.13740458015267, "grad_norm": 0.3528861999511719, "learning_rate": 8.080189067145107e-06, "loss": 0.1046, "step": 9730 }, { "epoch": 37.17557251908397, "grad_norm": 0.42519357800483704, "learning_rate": 8.075845644690814e-06, "loss": 0.0938, "step": 9740 }, { "epoch": 37.213740458015266, "grad_norm": 0.5400734543800354, "learning_rate": 8.07149848479403e-06, "loss": 0.103, "step": 9750 }, { "epoch": 37.25190839694657, "grad_norm": 0.6142025589942932, "learning_rate": 8.067147592736963e-06, "loss": 0.1027, "step": 9760 }, { "epoch": 37.29007633587786, "grad_norm": 0.4489392936229706, "learning_rate": 8.062792973806358e-06, "loss": 0.1222, "step": 9770 }, { "epoch": 37.32824427480916, "grad_norm": 0.576805830001831, "learning_rate": 8.058434633293485e-06, "loss": 0.1025, "step": 9780 }, { "epoch": 37.36641221374046, "grad_norm": 0.38401392102241516, "learning_rate": 8.054072576494142e-06, "loss": 0.1, "step": 9790 }, { "epoch": 37.404580152671755, "grad_norm": 0.4029317796230316, "learning_rate": 8.04970680870864e-06, "loss": 0.1047, "step": 9800 }, { "epoch": 37.44274809160305, "grad_norm": 0.7393083572387695, "learning_rate": 8.045337335241793e-06, "loss": 0.0922, "step": 9810 }, { "epoch": 37.48091603053435, "grad_norm": 0.4550725519657135, "learning_rate": 8.040964161402932e-06, "loss": 0.0988, "step": 9820 }, { "epoch": 37.51908396946565, "grad_norm": 0.5443292260169983, "learning_rate": 8.036587292505869e-06, "loss": 0.0993, "step": 9830 }, { "epoch": 37.55725190839695, "grad_norm": 0.6734360456466675, "learning_rate": 8.032206733868912e-06, "loss": 0.0907, "step": 9840 }, { "epoch": 37.595419847328245, "grad_norm": 0.4426909387111664, "learning_rate": 8.027822490814859e-06, "loss": 0.0963, "step": 9850 }, { "epoch": 37.63358778625954, "grad_norm": 0.7196950316429138, "learning_rate": 8.023434568670971e-06, "loss": 0.0974, "step": 9860 }, { "epoch": 37.67175572519084, "grad_norm": 0.8929295539855957, "learning_rate": 8.019042972768992e-06, "loss": 0.0923, "step": 9870 }, { "epoch": 37.70992366412214, "grad_norm": 0.4260941445827484, "learning_rate": 8.014647708445124e-06, "loss": 0.0862, "step": 9880 }, { "epoch": 37.74809160305343, "grad_norm": 0.581256628036499, "learning_rate": 8.010248781040027e-06, "loss": 0.0962, "step": 9890 }, { "epoch": 37.786259541984734, "grad_norm": 0.5044671893119812, "learning_rate": 8.005846195898815e-06, "loss": 0.0849, "step": 9900 }, { "epoch": 37.82442748091603, "grad_norm": 0.5715168714523315, "learning_rate": 8.00143995837104e-06, "loss": 0.0864, "step": 9910 }, { "epoch": 37.86259541984733, "grad_norm": 0.4902816116809845, "learning_rate": 7.997030073810699e-06, "loss": 0.09, "step": 9920 }, { "epoch": 37.900763358778626, "grad_norm": 0.8679199814796448, "learning_rate": 7.992616547576218e-06, "loss": 0.1111, "step": 9930 }, { "epoch": 37.93893129770992, "grad_norm": 0.5654365420341492, "learning_rate": 7.988199385030446e-06, "loss": 0.0981, "step": 9940 }, { "epoch": 37.97709923664122, "grad_norm": 1.2245254516601562, "learning_rate": 7.98377859154065e-06, "loss": 0.0936, "step": 9950 }, { "epoch": 38.01526717557252, "grad_norm": 0.9778199195861816, "learning_rate": 7.979354172478516e-06, "loss": 0.0862, "step": 9960 }, { "epoch": 38.05343511450382, "grad_norm": 0.4366597831249237, "learning_rate": 7.974926133220127e-06, "loss": 0.0897, "step": 9970 }, { "epoch": 38.091603053435115, "grad_norm": 0.5576930642127991, "learning_rate": 7.970494479145968e-06, "loss": 0.1004, "step": 9980 }, { "epoch": 38.12977099236641, "grad_norm": 0.5354223251342773, "learning_rate": 7.966059215640918e-06, "loss": 0.0855, "step": 9990 }, { "epoch": 38.16793893129771, "grad_norm": 0.4817463457584381, "learning_rate": 7.96162034809424e-06, "loss": 0.103, "step": 10000 }, { "epoch": 38.20610687022901, "grad_norm": 1.0092859268188477, "learning_rate": 7.957177881899579e-06, "loss": 0.0936, "step": 10010 }, { "epoch": 38.2442748091603, "grad_norm": 0.3715302050113678, "learning_rate": 7.952731822454944e-06, "loss": 0.0907, "step": 10020 }, { "epoch": 38.282442748091604, "grad_norm": 0.7082487940788269, "learning_rate": 7.948282175162723e-06, "loss": 0.0889, "step": 10030 }, { "epoch": 38.3206106870229, "grad_norm": 0.5549430251121521, "learning_rate": 7.943828945429653e-06, "loss": 0.1023, "step": 10040 }, { "epoch": 38.3587786259542, "grad_norm": 0.41316139698028564, "learning_rate": 7.939372138666828e-06, "loss": 0.0843, "step": 10050 }, { "epoch": 38.396946564885496, "grad_norm": 0.4269944429397583, "learning_rate": 7.934911760289692e-06, "loss": 0.0876, "step": 10060 }, { "epoch": 38.43511450381679, "grad_norm": 0.35790082812309265, "learning_rate": 7.930447815718022e-06, "loss": 0.0835, "step": 10070 }, { "epoch": 38.47328244274809, "grad_norm": 0.6934918165206909, "learning_rate": 7.925980310375933e-06, "loss": 0.0874, "step": 10080 }, { "epoch": 38.51145038167939, "grad_norm": 0.36909425258636475, "learning_rate": 7.921509249691865e-06, "loss": 0.0937, "step": 10090 }, { "epoch": 38.54961832061069, "grad_norm": 0.8461699485778809, "learning_rate": 7.917034639098578e-06, "loss": 0.0862, "step": 10100 }, { "epoch": 38.587786259541986, "grad_norm": 0.45856791734695435, "learning_rate": 7.912556484033146e-06, "loss": 0.0915, "step": 10110 }, { "epoch": 38.62595419847328, "grad_norm": 0.49433228373527527, "learning_rate": 7.908074789936952e-06, "loss": 0.0774, "step": 10120 }, { "epoch": 38.66412213740458, "grad_norm": 0.3476361334323883, "learning_rate": 7.903589562255673e-06, "loss": 0.0933, "step": 10130 }, { "epoch": 38.70229007633588, "grad_norm": 0.3297344744205475, "learning_rate": 7.899100806439287e-06, "loss": 0.0846, "step": 10140 }, { "epoch": 38.74045801526717, "grad_norm": 0.36996668577194214, "learning_rate": 7.894608527942049e-06, "loss": 0.0948, "step": 10150 }, { "epoch": 38.778625954198475, "grad_norm": 0.7153366804122925, "learning_rate": 7.89011273222251e-06, "loss": 0.0868, "step": 10160 }, { "epoch": 38.81679389312977, "grad_norm": 0.571403443813324, "learning_rate": 7.88561342474348e-06, "loss": 0.0801, "step": 10170 }, { "epoch": 38.85496183206107, "grad_norm": 0.5913143157958984, "learning_rate": 7.881110610972045e-06, "loss": 0.085, "step": 10180 }, { "epoch": 38.89312977099237, "grad_norm": 0.36453530192375183, "learning_rate": 7.876604296379545e-06, "loss": 0.0777, "step": 10190 }, { "epoch": 38.93129770992366, "grad_norm": 0.5440592169761658, "learning_rate": 7.87209448644158e-06, "loss": 0.0882, "step": 10200 }, { "epoch": 38.969465648854964, "grad_norm": 0.5457788705825806, "learning_rate": 7.867581186637991e-06, "loss": 0.0779, "step": 10210 }, { "epoch": 39.00763358778626, "grad_norm": 0.6085828542709351, "learning_rate": 7.863064402452867e-06, "loss": 0.088, "step": 10220 }, { "epoch": 39.045801526717554, "grad_norm": 1.3797415494918823, "learning_rate": 7.858544139374524e-06, "loss": 0.0929, "step": 10230 }, { "epoch": 39.083969465648856, "grad_norm": 0.5593680739402771, "learning_rate": 7.854020402895508e-06, "loss": 0.0891, "step": 10240 }, { "epoch": 39.12213740458015, "grad_norm": 0.7378935813903809, "learning_rate": 7.849493198512587e-06, "loss": 0.085, "step": 10250 }, { "epoch": 39.16030534351145, "grad_norm": 0.5513617992401123, "learning_rate": 7.844962531726742e-06, "loss": 0.0924, "step": 10260 }, { "epoch": 39.19847328244275, "grad_norm": 0.47100234031677246, "learning_rate": 7.840428408043156e-06, "loss": 0.087, "step": 10270 }, { "epoch": 39.23664122137404, "grad_norm": 0.7264495491981506, "learning_rate": 7.835890832971218e-06, "loss": 0.0797, "step": 10280 }, { "epoch": 39.274809160305345, "grad_norm": 0.6747552752494812, "learning_rate": 7.831349812024513e-06, "loss": 0.0816, "step": 10290 }, { "epoch": 39.31297709923664, "grad_norm": 0.568621814250946, "learning_rate": 7.826805350720807e-06, "loss": 0.0829, "step": 10300 }, { "epoch": 39.35114503816794, "grad_norm": 0.8591163158416748, "learning_rate": 7.82225745458205e-06, "loss": 0.0945, "step": 10310 }, { "epoch": 39.38931297709924, "grad_norm": 0.6126816272735596, "learning_rate": 7.817706129134363e-06, "loss": 0.09, "step": 10320 }, { "epoch": 39.42748091603053, "grad_norm": 0.5068755745887756, "learning_rate": 7.813151379908037e-06, "loss": 0.085, "step": 10330 }, { "epoch": 39.465648854961835, "grad_norm": 0.5643684267997742, "learning_rate": 7.808593212437523e-06, "loss": 0.0795, "step": 10340 }, { "epoch": 39.50381679389313, "grad_norm": 0.2955627143383026, "learning_rate": 7.804031632261421e-06, "loss": 0.0744, "step": 10350 }, { "epoch": 39.541984732824424, "grad_norm": 0.4418695569038391, "learning_rate": 7.799466644922484e-06, "loss": 0.0872, "step": 10360 }, { "epoch": 39.58015267175573, "grad_norm": 0.5301413536071777, "learning_rate": 7.794898255967602e-06, "loss": 0.0899, "step": 10370 }, { "epoch": 39.61832061068702, "grad_norm": 0.5224511623382568, "learning_rate": 7.790326470947796e-06, "loss": 0.0891, "step": 10380 }, { "epoch": 39.656488549618324, "grad_norm": 0.447360634803772, "learning_rate": 7.785751295418218e-06, "loss": 0.0831, "step": 10390 }, { "epoch": 39.69465648854962, "grad_norm": 0.6668679118156433, "learning_rate": 7.781172734938136e-06, "loss": 0.0857, "step": 10400 }, { "epoch": 39.732824427480914, "grad_norm": 0.5265706777572632, "learning_rate": 7.776590795070932e-06, "loss": 0.0826, "step": 10410 }, { "epoch": 39.770992366412216, "grad_norm": 0.44735923409461975, "learning_rate": 7.772005481384099e-06, "loss": 0.0889, "step": 10420 }, { "epoch": 39.80916030534351, "grad_norm": 0.4243389070034027, "learning_rate": 7.767416799449223e-06, "loss": 0.0784, "step": 10430 }, { "epoch": 39.847328244274806, "grad_norm": 0.5234137177467346, "learning_rate": 7.762824754841985e-06, "loss": 0.0805, "step": 10440 }, { "epoch": 39.88549618320611, "grad_norm": 0.6243206858634949, "learning_rate": 7.758229353142153e-06, "loss": 0.08, "step": 10450 }, { "epoch": 39.9236641221374, "grad_norm": 0.368437796831131, "learning_rate": 7.753630599933572e-06, "loss": 0.0857, "step": 10460 }, { "epoch": 39.961832061068705, "grad_norm": 0.6152259707450867, "learning_rate": 7.74902850080416e-06, "loss": 0.0829, "step": 10470 }, { "epoch": 40.0, "grad_norm": 0.43827810883522034, "learning_rate": 7.744423061345907e-06, "loss": 0.0865, "step": 10480 }, { "epoch": 40.038167938931295, "grad_norm": 0.39631178975105286, "learning_rate": 7.73981428715485e-06, "loss": 0.0917, "step": 10490 }, { "epoch": 40.0763358778626, "grad_norm": 0.45038852095603943, "learning_rate": 7.735202183831085e-06, "loss": 0.0864, "step": 10500 }, { "epoch": 40.11450381679389, "grad_norm": 0.451773077249527, "learning_rate": 7.730586756978758e-06, "loss": 0.0758, "step": 10510 }, { "epoch": 40.152671755725194, "grad_norm": 0.48386844992637634, "learning_rate": 7.72596801220604e-06, "loss": 0.0782, "step": 10520 }, { "epoch": 40.19083969465649, "grad_norm": 0.8366134166717529, "learning_rate": 7.721345955125147e-06, "loss": 0.0861, "step": 10530 }, { "epoch": 40.229007633587784, "grad_norm": 0.4540402889251709, "learning_rate": 7.716720591352311e-06, "loss": 0.0814, "step": 10540 }, { "epoch": 40.267175572519086, "grad_norm": 0.7006372213363647, "learning_rate": 7.712091926507788e-06, "loss": 0.0939, "step": 10550 }, { "epoch": 40.30534351145038, "grad_norm": 0.4313132166862488, "learning_rate": 7.70745996621584e-06, "loss": 0.0869, "step": 10560 }, { "epoch": 40.343511450381676, "grad_norm": 0.33567196130752563, "learning_rate": 7.702824716104736e-06, "loss": 0.0924, "step": 10570 }, { "epoch": 40.38167938931298, "grad_norm": 0.5061486959457397, "learning_rate": 7.698186181806744e-06, "loss": 0.0839, "step": 10580 }, { "epoch": 40.41984732824427, "grad_norm": 0.4450092017650604, "learning_rate": 7.693544368958116e-06, "loss": 0.0804, "step": 10590 }, { "epoch": 40.458015267175576, "grad_norm": 0.8481007814407349, "learning_rate": 7.688899283199097e-06, "loss": 0.0778, "step": 10600 }, { "epoch": 40.49618320610687, "grad_norm": 0.34229525923728943, "learning_rate": 7.684250930173902e-06, "loss": 0.0805, "step": 10610 }, { "epoch": 40.534351145038165, "grad_norm": 0.39725250005722046, "learning_rate": 7.679599315530717e-06, "loss": 0.0836, "step": 10620 }, { "epoch": 40.57251908396947, "grad_norm": 0.4148560166358948, "learning_rate": 7.674944444921696e-06, "loss": 0.0881, "step": 10630 }, { "epoch": 40.61068702290076, "grad_norm": 0.3573581874370575, "learning_rate": 7.670286324002943e-06, "loss": 0.0754, "step": 10640 }, { "epoch": 40.64885496183206, "grad_norm": 0.4562970995903015, "learning_rate": 7.665624958434514e-06, "loss": 0.0943, "step": 10650 }, { "epoch": 40.68702290076336, "grad_norm": 0.7594497799873352, "learning_rate": 7.66096035388041e-06, "loss": 0.0772, "step": 10660 }, { "epoch": 40.725190839694655, "grad_norm": 0.5188819169998169, "learning_rate": 7.656292516008563e-06, "loss": 0.079, "step": 10670 }, { "epoch": 40.76335877862596, "grad_norm": 0.48357266187667847, "learning_rate": 7.651621450490836e-06, "loss": 0.0822, "step": 10680 }, { "epoch": 40.80152671755725, "grad_norm": 0.46848195791244507, "learning_rate": 7.646947163003017e-06, "loss": 0.0769, "step": 10690 }, { "epoch": 40.83969465648855, "grad_norm": 0.448741614818573, "learning_rate": 7.642269659224804e-06, "loss": 0.0817, "step": 10700 }, { "epoch": 40.87786259541985, "grad_norm": 0.2872468829154968, "learning_rate": 7.637588944839803e-06, "loss": 0.0789, "step": 10710 }, { "epoch": 40.916030534351144, "grad_norm": 0.641671359539032, "learning_rate": 7.632905025535529e-06, "loss": 0.0874, "step": 10720 }, { "epoch": 40.954198473282446, "grad_norm": 0.39625340700149536, "learning_rate": 7.628217907003379e-06, "loss": 0.0761, "step": 10730 }, { "epoch": 40.99236641221374, "grad_norm": 0.47728121280670166, "learning_rate": 7.623527594938649e-06, "loss": 0.0771, "step": 10740 }, { "epoch": 41.030534351145036, "grad_norm": 0.42139771580696106, "learning_rate": 7.618834095040508e-06, "loss": 0.0797, "step": 10750 }, { "epoch": 41.06870229007634, "grad_norm": 0.5375596284866333, "learning_rate": 7.614137413012001e-06, "loss": 0.0822, "step": 10760 }, { "epoch": 41.10687022900763, "grad_norm": 0.26976653933525085, "learning_rate": 7.609437554560042e-06, "loss": 0.0758, "step": 10770 }, { "epoch": 41.14503816793893, "grad_norm": 0.293875515460968, "learning_rate": 7.604734525395398e-06, "loss": 0.0801, "step": 10780 }, { "epoch": 41.18320610687023, "grad_norm": 1.289528727531433, "learning_rate": 7.600028331232698e-06, "loss": 0.0854, "step": 10790 }, { "epoch": 41.221374045801525, "grad_norm": 0.6084293723106384, "learning_rate": 7.595318977790408e-06, "loss": 0.0744, "step": 10800 }, { "epoch": 41.25954198473283, "grad_norm": 0.660408079624176, "learning_rate": 7.5906064707908355e-06, "loss": 0.0777, "step": 10810 }, { "epoch": 41.29770992366412, "grad_norm": 3.1526737213134766, "learning_rate": 7.585890815960125e-06, "loss": 0.0843, "step": 10820 }, { "epoch": 41.33587786259542, "grad_norm": 0.5699805617332458, "learning_rate": 7.581172019028238e-06, "loss": 0.0812, "step": 10830 }, { "epoch": 41.37404580152672, "grad_norm": 0.9830774068832397, "learning_rate": 7.576450085728959e-06, "loss": 0.0824, "step": 10840 }, { "epoch": 41.412213740458014, "grad_norm": 0.4558395445346832, "learning_rate": 7.571725021799885e-06, "loss": 0.075, "step": 10850 }, { "epoch": 41.45038167938931, "grad_norm": 0.4857986569404602, "learning_rate": 7.566996832982409e-06, "loss": 0.0835, "step": 10860 }, { "epoch": 41.48854961832061, "grad_norm": 0.8208315968513489, "learning_rate": 7.56226552502173e-06, "loss": 0.0939, "step": 10870 }, { "epoch": 41.52671755725191, "grad_norm": 0.6192477941513062, "learning_rate": 7.557531103666833e-06, "loss": 0.0813, "step": 10880 }, { "epoch": 41.56488549618321, "grad_norm": 0.3482249081134796, "learning_rate": 7.552793574670485e-06, "loss": 0.0743, "step": 10890 }, { "epoch": 41.603053435114504, "grad_norm": 0.35872194170951843, "learning_rate": 7.5480529437892304e-06, "loss": 0.0756, "step": 10900 }, { "epoch": 41.6412213740458, "grad_norm": 0.38801950216293335, "learning_rate": 7.543309216783384e-06, "loss": 0.0712, "step": 10910 }, { "epoch": 41.6793893129771, "grad_norm": 0.5348705053329468, "learning_rate": 7.538562399417021e-06, "loss": 0.077, "step": 10920 }, { "epoch": 41.717557251908396, "grad_norm": 0.5075365304946899, "learning_rate": 7.533812497457972e-06, "loss": 0.0791, "step": 10930 }, { "epoch": 41.7557251908397, "grad_norm": 0.32560238242149353, "learning_rate": 7.529059516677815e-06, "loss": 0.0706, "step": 10940 }, { "epoch": 41.79389312977099, "grad_norm": 0.5091714859008789, "learning_rate": 7.524303462851872e-06, "loss": 0.0773, "step": 10950 }, { "epoch": 41.83206106870229, "grad_norm": 0.5478523373603821, "learning_rate": 7.519544341759193e-06, "loss": 0.0739, "step": 10960 }, { "epoch": 41.87022900763359, "grad_norm": 0.891545832157135, "learning_rate": 7.514782159182562e-06, "loss": 0.0806, "step": 10970 }, { "epoch": 41.908396946564885, "grad_norm": 0.6090933680534363, "learning_rate": 7.510016920908481e-06, "loss": 0.0795, "step": 10980 }, { "epoch": 41.94656488549618, "grad_norm": 0.9301139116287231, "learning_rate": 7.505248632727159e-06, "loss": 0.0815, "step": 10990 }, { "epoch": 41.98473282442748, "grad_norm": 0.6181934475898743, "learning_rate": 7.50047730043252e-06, "loss": 0.0806, "step": 11000 }, { "epoch": 42.02290076335878, "grad_norm": 0.5225881934165955, "learning_rate": 7.495702929822183e-06, "loss": 0.0746, "step": 11010 }, { "epoch": 42.06106870229008, "grad_norm": 0.7685074806213379, "learning_rate": 7.490925526697455e-06, "loss": 0.0796, "step": 11020 }, { "epoch": 42.099236641221374, "grad_norm": 0.475824773311615, "learning_rate": 7.486145096863334e-06, "loss": 0.0735, "step": 11030 }, { "epoch": 42.13740458015267, "grad_norm": 0.3295309245586395, "learning_rate": 7.481361646128491e-06, "loss": 0.0705, "step": 11040 }, { "epoch": 42.17557251908397, "grad_norm": 0.2535337805747986, "learning_rate": 7.476575180305271e-06, "loss": 0.0808, "step": 11050 }, { "epoch": 42.213740458015266, "grad_norm": 0.4357345402240753, "learning_rate": 7.471785705209682e-06, "loss": 0.075, "step": 11060 }, { "epoch": 42.25190839694657, "grad_norm": 0.9749108552932739, "learning_rate": 7.4669932266613875e-06, "loss": 0.0795, "step": 11070 }, { "epoch": 42.29007633587786, "grad_norm": 0.428632527589798, "learning_rate": 7.4621977504837e-06, "loss": 0.0723, "step": 11080 }, { "epoch": 42.32824427480916, "grad_norm": 0.26020991802215576, "learning_rate": 7.457399282503574e-06, "loss": 0.0853, "step": 11090 }, { "epoch": 42.36641221374046, "grad_norm": 0.3331349492073059, "learning_rate": 7.4525978285516046e-06, "loss": 0.0757, "step": 11100 }, { "epoch": 42.404580152671755, "grad_norm": 0.8639835119247437, "learning_rate": 7.447793394462006e-06, "loss": 0.078, "step": 11110 }, { "epoch": 42.44274809160305, "grad_norm": 2.107271909713745, "learning_rate": 7.442985986072624e-06, "loss": 0.0769, "step": 11120 }, { "epoch": 42.48091603053435, "grad_norm": 0.25591397285461426, "learning_rate": 7.438175609224908e-06, "loss": 0.0764, "step": 11130 }, { "epoch": 42.51908396946565, "grad_norm": 0.346733957529068, "learning_rate": 7.433362269763924e-06, "loss": 0.0811, "step": 11140 }, { "epoch": 42.55725190839695, "grad_norm": 0.29196739196777344, "learning_rate": 7.428545973538329e-06, "loss": 0.0797, "step": 11150 }, { "epoch": 42.595419847328245, "grad_norm": 0.6709998846054077, "learning_rate": 7.423726726400381e-06, "loss": 0.084, "step": 11160 }, { "epoch": 42.63358778625954, "grad_norm": 0.46684983372688293, "learning_rate": 7.418904534205917e-06, "loss": 0.0788, "step": 11170 }, { "epoch": 42.67175572519084, "grad_norm": 0.39609742164611816, "learning_rate": 7.414079402814356e-06, "loss": 0.0734, "step": 11180 }, { "epoch": 42.70992366412214, "grad_norm": 0.38470694422721863, "learning_rate": 7.4092513380886876e-06, "loss": 0.0797, "step": 11190 }, { "epoch": 42.74809160305343, "grad_norm": 0.26760122179985046, "learning_rate": 7.4044203458954665e-06, "loss": 0.084, "step": 11200 }, { "epoch": 42.786259541984734, "grad_norm": 0.8822160363197327, "learning_rate": 7.3995864321048036e-06, "loss": 0.0837, "step": 11210 }, { "epoch": 42.82442748091603, "grad_norm": 0.42078760266304016, "learning_rate": 7.394749602590359e-06, "loss": 0.0775, "step": 11220 }, { "epoch": 42.86259541984733, "grad_norm": 0.2612259089946747, "learning_rate": 7.389909863229337e-06, "loss": 0.0713, "step": 11230 }, { "epoch": 42.900763358778626, "grad_norm": 0.6772575974464417, "learning_rate": 7.385067219902478e-06, "loss": 0.0813, "step": 11240 }, { "epoch": 42.93893129770992, "grad_norm": 0.37130260467529297, "learning_rate": 7.380221678494048e-06, "loss": 0.081, "step": 11250 }, { "epoch": 42.97709923664122, "grad_norm": 0.30204957723617554, "learning_rate": 7.375373244891839e-06, "loss": 0.0723, "step": 11260 }, { "epoch": 43.01526717557252, "grad_norm": 0.515661358833313, "learning_rate": 7.370521924987155e-06, "loss": 0.078, "step": 11270 }, { "epoch": 43.05343511450382, "grad_norm": 0.40739643573760986, "learning_rate": 7.3656677246748064e-06, "loss": 0.0858, "step": 11280 }, { "epoch": 43.091603053435115, "grad_norm": 0.37059029936790466, "learning_rate": 7.360810649853105e-06, "loss": 0.073, "step": 11290 }, { "epoch": 43.12977099236641, "grad_norm": 0.36438652873039246, "learning_rate": 7.355950706423854e-06, "loss": 0.1083, "step": 11300 }, { "epoch": 43.16793893129771, "grad_norm": 0.4451878070831299, "learning_rate": 7.351087900292342e-06, "loss": 0.0794, "step": 11310 }, { "epoch": 43.20610687022901, "grad_norm": 0.5628499984741211, "learning_rate": 7.346222237367339e-06, "loss": 0.0822, "step": 11320 }, { "epoch": 43.2442748091603, "grad_norm": 0.374586820602417, "learning_rate": 7.341353723561082e-06, "loss": 0.0789, "step": 11330 }, { "epoch": 43.282442748091604, "grad_norm": 0.8239962458610535, "learning_rate": 7.336482364789277e-06, "loss": 0.0809, "step": 11340 }, { "epoch": 43.3206106870229, "grad_norm": 0.36449357867240906, "learning_rate": 7.331608166971082e-06, "loss": 0.0776, "step": 11350 }, { "epoch": 43.3587786259542, "grad_norm": 0.5167990326881409, "learning_rate": 7.326731136029108e-06, "loss": 0.089, "step": 11360 }, { "epoch": 43.396946564885496, "grad_norm": 0.6136403679847717, "learning_rate": 7.321851277889408e-06, "loss": 0.0766, "step": 11370 }, { "epoch": 43.43511450381679, "grad_norm": 0.35888543725013733, "learning_rate": 7.31696859848147e-06, "loss": 0.0896, "step": 11380 }, { "epoch": 43.47328244274809, "grad_norm": 0.4521687626838684, "learning_rate": 7.312083103738207e-06, "loss": 0.0749, "step": 11390 }, { "epoch": 43.51145038167939, "grad_norm": 0.4348390996456146, "learning_rate": 7.307194799595958e-06, "loss": 0.0852, "step": 11400 }, { "epoch": 43.54961832061069, "grad_norm": 0.7922037839889526, "learning_rate": 7.302303691994474e-06, "loss": 0.0793, "step": 11410 }, { "epoch": 43.587786259541986, "grad_norm": 0.3498508334159851, "learning_rate": 7.29740978687691e-06, "loss": 0.0883, "step": 11420 }, { "epoch": 43.62595419847328, "grad_norm": 0.3470660448074341, "learning_rate": 7.292513090189824e-06, "loss": 0.0693, "step": 11430 }, { "epoch": 43.66412213740458, "grad_norm": 0.2837408483028412, "learning_rate": 7.287613607883164e-06, "loss": 0.0732, "step": 11440 }, { "epoch": 43.70229007633588, "grad_norm": 0.5988171696662903, "learning_rate": 7.282711345910263e-06, "loss": 0.0734, "step": 11450 }, { "epoch": 43.74045801526717, "grad_norm": 0.5002927780151367, "learning_rate": 7.277806310227831e-06, "loss": 0.0744, "step": 11460 }, { "epoch": 43.778625954198475, "grad_norm": 0.4815424382686615, "learning_rate": 7.272898506795948e-06, "loss": 0.0866, "step": 11470 }, { "epoch": 43.81679389312977, "grad_norm": 0.4680640697479248, "learning_rate": 7.267987941578058e-06, "loss": 0.0883, "step": 11480 }, { "epoch": 43.85496183206107, "grad_norm": 0.3622831404209137, "learning_rate": 7.263074620540963e-06, "loss": 0.0812, "step": 11490 }, { "epoch": 43.89312977099237, "grad_norm": 0.289620965719223, "learning_rate": 7.25815854965481e-06, "loss": 0.0788, "step": 11500 }, { "epoch": 43.93129770992366, "grad_norm": 0.4161958396434784, "learning_rate": 7.253239734893089e-06, "loss": 0.0877, "step": 11510 }, { "epoch": 43.969465648854964, "grad_norm": 0.27894461154937744, "learning_rate": 7.248318182232623e-06, "loss": 0.0706, "step": 11520 }, { "epoch": 44.00763358778626, "grad_norm": 0.4374925494194031, "learning_rate": 7.243393897653565e-06, "loss": 0.0757, "step": 11530 }, { "epoch": 44.045801526717554, "grad_norm": 0.4744294285774231, "learning_rate": 7.23846688713938e-06, "loss": 0.0805, "step": 11540 }, { "epoch": 44.083969465648856, "grad_norm": 0.7247238755226135, "learning_rate": 7.233537156676854e-06, "loss": 0.0801, "step": 11550 }, { "epoch": 44.12213740458015, "grad_norm": 0.5223896503448486, "learning_rate": 7.228604712256076e-06, "loss": 0.0763, "step": 11560 }, { "epoch": 44.16030534351145, "grad_norm": 0.32578030228614807, "learning_rate": 7.2236695598704265e-06, "loss": 0.0667, "step": 11570 }, { "epoch": 44.19847328244275, "grad_norm": 0.809870183467865, "learning_rate": 7.218731705516585e-06, "loss": 0.072, "step": 11580 }, { "epoch": 44.23664122137404, "grad_norm": 0.722334623336792, "learning_rate": 7.21379115519451e-06, "loss": 0.093, "step": 11590 }, { "epoch": 44.274809160305345, "grad_norm": 0.8618139028549194, "learning_rate": 7.208847914907431e-06, "loss": 0.0869, "step": 11600 }, { "epoch": 44.31297709923664, "grad_norm": 0.40315625071525574, "learning_rate": 7.203901990661857e-06, "loss": 0.0805, "step": 11610 }, { "epoch": 44.35114503816794, "grad_norm": 0.5582493543624878, "learning_rate": 7.1989533884675486e-06, "loss": 0.0782, "step": 11620 }, { "epoch": 44.38931297709924, "grad_norm": 0.5148290395736694, "learning_rate": 7.1940021143375264e-06, "loss": 0.0817, "step": 11630 }, { "epoch": 44.42748091603053, "grad_norm": 0.685417652130127, "learning_rate": 7.189048174288054e-06, "loss": 0.0824, "step": 11640 }, { "epoch": 44.465648854961835, "grad_norm": 0.4359172284603119, "learning_rate": 7.184091574338637e-06, "loss": 0.0813, "step": 11650 }, { "epoch": 44.50381679389313, "grad_norm": 0.37148523330688477, "learning_rate": 7.179132320512009e-06, "loss": 0.0835, "step": 11660 }, { "epoch": 44.541984732824424, "grad_norm": 0.714996337890625, "learning_rate": 7.174170418834134e-06, "loss": 0.0835, "step": 11670 }, { "epoch": 44.58015267175573, "grad_norm": 0.30582910776138306, "learning_rate": 7.1692058753341885e-06, "loss": 0.071, "step": 11680 }, { "epoch": 44.61832061068702, "grad_norm": 0.4738406836986542, "learning_rate": 7.164238696044562e-06, "loss": 0.0722, "step": 11690 }, { "epoch": 44.656488549618324, "grad_norm": 0.39357179403305054, "learning_rate": 7.159268887000845e-06, "loss": 0.075, "step": 11700 }, { "epoch": 44.69465648854962, "grad_norm": 0.4723822772502899, "learning_rate": 7.1542964542418265e-06, "loss": 0.0876, "step": 11710 }, { "epoch": 44.732824427480914, "grad_norm": 0.43717026710510254, "learning_rate": 7.149321403809478e-06, "loss": 0.0779, "step": 11720 }, { "epoch": 44.770992366412216, "grad_norm": 0.40371444821357727, "learning_rate": 7.1443437417489555e-06, "loss": 0.073, "step": 11730 }, { "epoch": 44.80916030534351, "grad_norm": 0.5036951899528503, "learning_rate": 7.13936347410859e-06, "loss": 0.0821, "step": 11740 }, { "epoch": 44.847328244274806, "grad_norm": 0.4972344934940338, "learning_rate": 7.1343806069398745e-06, "loss": 0.0886, "step": 11750 }, { "epoch": 44.88549618320611, "grad_norm": 0.4003848135471344, "learning_rate": 7.12939514629746e-06, "loss": 0.0782, "step": 11760 }, { "epoch": 44.9236641221374, "grad_norm": 0.36399978399276733, "learning_rate": 7.1244070982391556e-06, "loss": 0.0918, "step": 11770 }, { "epoch": 44.961832061068705, "grad_norm": 0.2910782992839813, "learning_rate": 7.119416468825908e-06, "loss": 0.0756, "step": 11780 }, { "epoch": 45.0, "grad_norm": 0.429004043340683, "learning_rate": 7.114423264121804e-06, "loss": 0.0846, "step": 11790 }, { "epoch": 45.038167938931295, "grad_norm": 0.3626013994216919, "learning_rate": 7.1094274901940566e-06, "loss": 0.1058, "step": 11800 }, { "epoch": 45.0763358778626, "grad_norm": 0.631897509098053, "learning_rate": 7.104429153113001e-06, "loss": 0.0802, "step": 11810 }, { "epoch": 45.11450381679389, "grad_norm": 0.4777592122554779, "learning_rate": 7.099428258952092e-06, "loss": 0.0744, "step": 11820 }, { "epoch": 45.152671755725194, "grad_norm": 0.4223073422908783, "learning_rate": 7.094424813787883e-06, "loss": 0.0749, "step": 11830 }, { "epoch": 45.19083969465649, "grad_norm": 0.3444991707801819, "learning_rate": 7.089418823700035e-06, "loss": 0.0805, "step": 11840 }, { "epoch": 45.229007633587784, "grad_norm": 0.41768884658813477, "learning_rate": 7.084410294771298e-06, "loss": 0.0725, "step": 11850 }, { "epoch": 45.267175572519086, "grad_norm": 0.25872498750686646, "learning_rate": 7.079399233087504e-06, "loss": 0.076, "step": 11860 }, { "epoch": 45.30534351145038, "grad_norm": 0.35184618830680847, "learning_rate": 7.074385644737568e-06, "loss": 0.0768, "step": 11870 }, { "epoch": 45.343511450381676, "grad_norm": 0.6684393286705017, "learning_rate": 7.069369535813473e-06, "loss": 0.0831, "step": 11880 }, { "epoch": 45.38167938931298, "grad_norm": 0.6411822438240051, "learning_rate": 7.064350912410261e-06, "loss": 0.0759, "step": 11890 }, { "epoch": 45.41984732824427, "grad_norm": 0.363640695810318, "learning_rate": 7.059329780626036e-06, "loss": 0.0761, "step": 11900 }, { "epoch": 45.458015267175576, "grad_norm": 0.5293934345245361, "learning_rate": 7.054306146561944e-06, "loss": 0.0751, "step": 11910 }, { "epoch": 45.49618320610687, "grad_norm": 0.2698386609554291, "learning_rate": 7.049280016322177e-06, "loss": 0.0762, "step": 11920 }, { "epoch": 45.534351145038165, "grad_norm": 0.39354628324508667, "learning_rate": 7.044251396013957e-06, "loss": 0.0678, "step": 11930 }, { "epoch": 45.57251908396947, "grad_norm": 0.8730633854866028, "learning_rate": 7.039220291747528e-06, "loss": 0.0888, "step": 11940 }, { "epoch": 45.61068702290076, "grad_norm": 0.4490543603897095, "learning_rate": 7.034186709636159e-06, "loss": 0.0761, "step": 11950 }, { "epoch": 45.64885496183206, "grad_norm": 0.39466390013694763, "learning_rate": 7.029150655796129e-06, "loss": 0.0722, "step": 11960 }, { "epoch": 45.68702290076336, "grad_norm": 0.3895810842514038, "learning_rate": 7.024112136346713e-06, "loss": 0.0774, "step": 11970 }, { "epoch": 45.725190839694655, "grad_norm": 0.816741406917572, "learning_rate": 7.019071157410191e-06, "loss": 0.0879, "step": 11980 }, { "epoch": 45.76335877862596, "grad_norm": 0.5164619088172913, "learning_rate": 7.014027725111826e-06, "loss": 0.0811, "step": 11990 }, { "epoch": 45.80152671755725, "grad_norm": 0.5740540623664856, "learning_rate": 7.0089818455798655e-06, "loss": 0.1007, "step": 12000 }, { "epoch": 45.83969465648855, "grad_norm": 0.5206035375595093, "learning_rate": 7.0039335249455285e-06, "loss": 0.0829, "step": 12010 }, { "epoch": 45.87786259541985, "grad_norm": 0.7271233797073364, "learning_rate": 6.998882769342998e-06, "loss": 0.0747, "step": 12020 }, { "epoch": 45.916030534351144, "grad_norm": 0.59157794713974, "learning_rate": 6.993829584909423e-06, "loss": 0.0856, "step": 12030 }, { "epoch": 45.954198473282446, "grad_norm": 0.6484169363975525, "learning_rate": 6.988773977784895e-06, "loss": 0.0833, "step": 12040 }, { "epoch": 45.99236641221374, "grad_norm": 0.494017094373703, "learning_rate": 6.9837159541124544e-06, "loss": 0.085, "step": 12050 }, { "epoch": 46.030534351145036, "grad_norm": 0.5254559516906738, "learning_rate": 6.978655520038079e-06, "loss": 0.079, "step": 12060 }, { "epoch": 46.06870229007634, "grad_norm": 0.5512828230857849, "learning_rate": 6.9735926817106704e-06, "loss": 0.0768, "step": 12070 }, { "epoch": 46.10687022900763, "grad_norm": 0.48154160380363464, "learning_rate": 6.968527445282056e-06, "loss": 0.0768, "step": 12080 }, { "epoch": 46.14503816793893, "grad_norm": 0.4283272624015808, "learning_rate": 6.963459816906977e-06, "loss": 0.0804, "step": 12090 }, { "epoch": 46.18320610687023, "grad_norm": 1.275875449180603, "learning_rate": 6.958389802743078e-06, "loss": 0.0798, "step": 12100 }, { "epoch": 46.221374045801525, "grad_norm": 0.565065324306488, "learning_rate": 6.953317408950903e-06, "loss": 0.0735, "step": 12110 }, { "epoch": 46.25954198473283, "grad_norm": 0.7162991762161255, "learning_rate": 6.948242641693891e-06, "loss": 0.0741, "step": 12120 }, { "epoch": 46.29770992366412, "grad_norm": 0.41520068049430847, "learning_rate": 6.9431655071383605e-06, "loss": 0.0735, "step": 12130 }, { "epoch": 46.33587786259542, "grad_norm": 0.3980543613433838, "learning_rate": 6.938086011453513e-06, "loss": 0.0822, "step": 12140 }, { "epoch": 46.37404580152672, "grad_norm": 0.28883951902389526, "learning_rate": 6.93300416081141e-06, "loss": 0.0836, "step": 12150 }, { "epoch": 46.412213740458014, "grad_norm": 0.5937278270721436, "learning_rate": 6.927919961386984e-06, "loss": 0.0764, "step": 12160 }, { "epoch": 46.45038167938931, "grad_norm": 0.40771183371543884, "learning_rate": 6.922833419358012e-06, "loss": 0.0831, "step": 12170 }, { "epoch": 46.48854961832061, "grad_norm": 0.33484435081481934, "learning_rate": 6.917744540905125e-06, "loss": 0.0801, "step": 12180 }, { "epoch": 46.52671755725191, "grad_norm": 0.5971487164497375, "learning_rate": 6.9126533322117875e-06, "loss": 0.0843, "step": 12190 }, { "epoch": 46.56488549618321, "grad_norm": 0.612615704536438, "learning_rate": 6.9075597994643e-06, "loss": 0.0836, "step": 12200 }, { "epoch": 46.603053435114504, "grad_norm": 0.2975360155105591, "learning_rate": 6.902463948851786e-06, "loss": 0.0734, "step": 12210 }, { "epoch": 46.6412213740458, "grad_norm": 0.4995099604129791, "learning_rate": 6.897365786566184e-06, "loss": 0.0795, "step": 12220 }, { "epoch": 46.6793893129771, "grad_norm": 0.3913675844669342, "learning_rate": 6.892265318802242e-06, "loss": 0.0782, "step": 12230 }, { "epoch": 46.717557251908396, "grad_norm": 0.4445989429950714, "learning_rate": 6.887162551757507e-06, "loss": 0.0844, "step": 12240 }, { "epoch": 46.7557251908397, "grad_norm": 0.3842538297176361, "learning_rate": 6.882057491632326e-06, "loss": 0.0721, "step": 12250 }, { "epoch": 46.79389312977099, "grad_norm": 0.3080384135246277, "learning_rate": 6.876950144629824e-06, "loss": 0.0702, "step": 12260 }, { "epoch": 46.83206106870229, "grad_norm": 0.5869445204734802, "learning_rate": 6.8718405169559114e-06, "loss": 0.0687, "step": 12270 }, { "epoch": 46.87022900763359, "grad_norm": 0.4181062579154968, "learning_rate": 6.866728614819268e-06, "loss": 0.0786, "step": 12280 }, { "epoch": 46.908396946564885, "grad_norm": 0.749423086643219, "learning_rate": 6.861614444431337e-06, "loss": 0.0862, "step": 12290 }, { "epoch": 46.94656488549618, "grad_norm": 0.5488717555999756, "learning_rate": 6.856498012006318e-06, "loss": 0.0881, "step": 12300 }, { "epoch": 46.98473282442748, "grad_norm": 0.8571105003356934, "learning_rate": 6.851379323761157e-06, "loss": 0.0773, "step": 12310 }, { "epoch": 47.02290076335878, "grad_norm": 0.5261535048484802, "learning_rate": 6.846258385915545e-06, "loss": 0.0912, "step": 12320 }, { "epoch": 47.06106870229008, "grad_norm": 0.895200252532959, "learning_rate": 6.841135204691902e-06, "loss": 0.0858, "step": 12330 }, { "epoch": 47.099236641221374, "grad_norm": 0.49354881048202515, "learning_rate": 6.8360097863153775e-06, "loss": 0.0762, "step": 12340 }, { "epoch": 47.13740458015267, "grad_norm": 0.42916223406791687, "learning_rate": 6.830882137013839e-06, "loss": 0.0749, "step": 12350 }, { "epoch": 47.17557251908397, "grad_norm": 0.6174752116203308, "learning_rate": 6.825752263017863e-06, "loss": 0.0797, "step": 12360 }, { "epoch": 47.213740458015266, "grad_norm": 0.4598800241947174, "learning_rate": 6.820620170560731e-06, "loss": 0.0828, "step": 12370 }, { "epoch": 47.25190839694657, "grad_norm": 0.3219451904296875, "learning_rate": 6.815485865878418e-06, "loss": 0.0845, "step": 12380 }, { "epoch": 47.29007633587786, "grad_norm": 0.4897594451904297, "learning_rate": 6.8103493552095875e-06, "loss": 0.0874, "step": 12390 }, { "epoch": 47.32824427480916, "grad_norm": 0.35891905426979065, "learning_rate": 6.805210644795588e-06, "loss": 0.0715, "step": 12400 }, { "epoch": 47.36641221374046, "grad_norm": 0.6998705267906189, "learning_rate": 6.8000697408804326e-06, "loss": 0.0799, "step": 12410 }, { "epoch": 47.404580152671755, "grad_norm": 0.7352116703987122, "learning_rate": 6.794926649710807e-06, "loss": 0.0873, "step": 12420 }, { "epoch": 47.44274809160305, "grad_norm": 0.34309205412864685, "learning_rate": 6.7897813775360536e-06, "loss": 0.0753, "step": 12430 }, { "epoch": 47.48091603053435, "grad_norm": 0.6588733792304993, "learning_rate": 6.784633930608158e-06, "loss": 0.0777, "step": 12440 }, { "epoch": 47.51908396946565, "grad_norm": 0.43710416555404663, "learning_rate": 6.779484315181759e-06, "loss": 0.0841, "step": 12450 }, { "epoch": 47.55725190839695, "grad_norm": 0.4144386649131775, "learning_rate": 6.774332537514122e-06, "loss": 0.0727, "step": 12460 }, { "epoch": 47.595419847328245, "grad_norm": 0.3395448625087738, "learning_rate": 6.769178603865143e-06, "loss": 0.08, "step": 12470 }, { "epoch": 47.63358778625954, "grad_norm": 0.38854730129241943, "learning_rate": 6.764022520497337e-06, "loss": 0.0768, "step": 12480 }, { "epoch": 47.67175572519084, "grad_norm": 0.43010884523391724, "learning_rate": 6.758864293675833e-06, "loss": 0.0757, "step": 12490 }, { "epoch": 47.70992366412214, "grad_norm": 0.31416982412338257, "learning_rate": 6.753703929668363e-06, "loss": 0.0828, "step": 12500 }, { "epoch": 47.74809160305343, "grad_norm": 0.3258754312992096, "learning_rate": 6.7485414347452535e-06, "loss": 0.0707, "step": 12510 }, { "epoch": 47.786259541984734, "grad_norm": 0.4140434265136719, "learning_rate": 6.743376815179424e-06, "loss": 0.0717, "step": 12520 }, { "epoch": 47.82442748091603, "grad_norm": 0.3196008801460266, "learning_rate": 6.738210077246377e-06, "loss": 0.0771, "step": 12530 }, { "epoch": 47.86259541984733, "grad_norm": 0.36054739356040955, "learning_rate": 6.733041227224182e-06, "loss": 0.0739, "step": 12540 }, { "epoch": 47.900763358778626, "grad_norm": 0.32305601239204407, "learning_rate": 6.72787027139348e-06, "loss": 0.0793, "step": 12550 }, { "epoch": 47.93893129770992, "grad_norm": 0.41172558069229126, "learning_rate": 6.72269721603747e-06, "loss": 0.0746, "step": 12560 }, { "epoch": 47.97709923664122, "grad_norm": 0.5158224701881409, "learning_rate": 6.717522067441904e-06, "loss": 0.0709, "step": 12570 }, { "epoch": 48.01526717557252, "grad_norm": 0.33051443099975586, "learning_rate": 6.712344831895075e-06, "loss": 0.0799, "step": 12580 }, { "epoch": 48.05343511450382, "grad_norm": 0.2499360889196396, "learning_rate": 6.707165515687811e-06, "loss": 0.0933, "step": 12590 }, { "epoch": 48.091603053435115, "grad_norm": 0.3040105700492859, "learning_rate": 6.70198412511347e-06, "loss": 0.0684, "step": 12600 }, { "epoch": 48.12977099236641, "grad_norm": 0.5028464794158936, "learning_rate": 6.696800666467931e-06, "loss": 0.0763, "step": 12610 }, { "epoch": 48.16793893129771, "grad_norm": 0.6218233704566956, "learning_rate": 6.691615146049584e-06, "loss": 0.0713, "step": 12620 }, { "epoch": 48.20610687022901, "grad_norm": 1.142249345779419, "learning_rate": 6.686427570159324e-06, "loss": 0.0881, "step": 12630 }, { "epoch": 48.2442748091603, "grad_norm": 0.40421727299690247, "learning_rate": 6.681237945100549e-06, "loss": 0.0699, "step": 12640 }, { "epoch": 48.282442748091604, "grad_norm": 0.4574754238128662, "learning_rate": 6.676046277179139e-06, "loss": 0.0794, "step": 12650 }, { "epoch": 48.3206106870229, "grad_norm": 0.33654555678367615, "learning_rate": 6.670852572703462e-06, "loss": 0.0722, "step": 12660 }, { "epoch": 48.3587786259542, "grad_norm": 0.2677624523639679, "learning_rate": 6.665656837984359e-06, "loss": 0.0855, "step": 12670 }, { "epoch": 48.396946564885496, "grad_norm": 0.7722212672233582, "learning_rate": 6.660459079335136e-06, "loss": 0.0804, "step": 12680 }, { "epoch": 48.43511450381679, "grad_norm": 0.32007500529289246, "learning_rate": 6.655259303071558e-06, "loss": 0.0786, "step": 12690 }, { "epoch": 48.47328244274809, "grad_norm": 0.28395479917526245, "learning_rate": 6.650057515511849e-06, "loss": 0.0741, "step": 12700 }, { "epoch": 48.51145038167939, "grad_norm": 0.5436563491821289, "learning_rate": 6.644853722976667e-06, "loss": 0.0879, "step": 12710 }, { "epoch": 48.54961832061069, "grad_norm": 0.5980028510093689, "learning_rate": 6.639647931789114e-06, "loss": 0.0855, "step": 12720 }, { "epoch": 48.587786259541986, "grad_norm": 0.345226913690567, "learning_rate": 6.634440148274712e-06, "loss": 0.0693, "step": 12730 }, { "epoch": 48.62595419847328, "grad_norm": 0.559097409248352, "learning_rate": 6.6292303787614156e-06, "loss": 0.0704, "step": 12740 }, { "epoch": 48.66412213740458, "grad_norm": 0.2557688355445862, "learning_rate": 6.624018629579582e-06, "loss": 0.0746, "step": 12750 }, { "epoch": 48.70229007633588, "grad_norm": 0.38881349563598633, "learning_rate": 6.618804907061977e-06, "loss": 0.0727, "step": 12760 }, { "epoch": 48.74045801526717, "grad_norm": 0.34416061639785767, "learning_rate": 6.613589217543765e-06, "loss": 0.0781, "step": 12770 }, { "epoch": 48.778625954198475, "grad_norm": 0.6163748502731323, "learning_rate": 6.608371567362505e-06, "loss": 0.0785, "step": 12780 }, { "epoch": 48.81679389312977, "grad_norm": 0.2579529583454132, "learning_rate": 6.60315196285813e-06, "loss": 0.064, "step": 12790 }, { "epoch": 48.85496183206107, "grad_norm": 0.32075974345207214, "learning_rate": 6.5979304103729545e-06, "loss": 0.0831, "step": 12800 }, { "epoch": 48.89312977099237, "grad_norm": 0.28072690963745117, "learning_rate": 6.592706916251653e-06, "loss": 0.068, "step": 12810 }, { "epoch": 48.93129770992366, "grad_norm": 0.3512701690196991, "learning_rate": 6.587481486841267e-06, "loss": 0.0832, "step": 12820 }, { "epoch": 48.969465648854964, "grad_norm": 0.3708653151988983, "learning_rate": 6.582254128491184e-06, "loss": 0.08, "step": 12830 }, { "epoch": 49.00763358778626, "grad_norm": 0.5999414324760437, "learning_rate": 6.577024847553139e-06, "loss": 0.0707, "step": 12840 }, { "epoch": 49.045801526717554, "grad_norm": 0.5251232385635376, "learning_rate": 6.5717936503812e-06, "loss": 0.078, "step": 12850 }, { "epoch": 49.083969465648856, "grad_norm": 0.30655843019485474, "learning_rate": 6.5665605433317655e-06, "loss": 0.0761, "step": 12860 }, { "epoch": 49.12213740458015, "grad_norm": 0.3946235775947571, "learning_rate": 6.561325532763554e-06, "loss": 0.0757, "step": 12870 }, { "epoch": 49.16030534351145, "grad_norm": 0.27524781227111816, "learning_rate": 6.556088625037598e-06, "loss": 0.071, "step": 12880 }, { "epoch": 49.19847328244275, "grad_norm": 0.27096670866012573, "learning_rate": 6.550849826517231e-06, "loss": 0.0781, "step": 12890 }, { "epoch": 49.23664122137404, "grad_norm": 0.4801909029483795, "learning_rate": 6.54560914356809e-06, "loss": 0.0745, "step": 12900 }, { "epoch": 49.274809160305345, "grad_norm": 0.4032978117465973, "learning_rate": 6.5403665825580975e-06, "loss": 0.0764, "step": 12910 }, { "epoch": 49.31297709923664, "grad_norm": 0.46122097969055176, "learning_rate": 6.53512214985746e-06, "loss": 0.0733, "step": 12920 }, { "epoch": 49.35114503816794, "grad_norm": 0.6307662129402161, "learning_rate": 6.529875851838659e-06, "loss": 0.0768, "step": 12930 }, { "epoch": 49.38931297709924, "grad_norm": 0.28868502378463745, "learning_rate": 6.5246276948764394e-06, "loss": 0.0859, "step": 12940 }, { "epoch": 49.42748091603053, "grad_norm": 0.463450163602829, "learning_rate": 6.519377685347808e-06, "loss": 0.0768, "step": 12950 }, { "epoch": 49.465648854961835, "grad_norm": 0.4888218641281128, "learning_rate": 6.514125829632021e-06, "loss": 0.0741, "step": 12960 }, { "epoch": 49.50381679389313, "grad_norm": 0.615373432636261, "learning_rate": 6.508872134110578e-06, "loss": 0.0736, "step": 12970 }, { "epoch": 49.541984732824424, "grad_norm": 0.2985082268714905, "learning_rate": 6.5036166051672135e-06, "loss": 0.0885, "step": 12980 }, { "epoch": 49.58015267175573, "grad_norm": 0.461047887802124, "learning_rate": 6.498359249187893e-06, "loss": 0.0756, "step": 12990 }, { "epoch": 49.61832061068702, "grad_norm": 0.46043241024017334, "learning_rate": 6.4931000725607985e-06, "loss": 0.0753, "step": 13000 }, { "epoch": 49.656488549618324, "grad_norm": 0.4072940945625305, "learning_rate": 6.487839081676327e-06, "loss": 0.0678, "step": 13010 }, { "epoch": 49.69465648854962, "grad_norm": 0.6342893242835999, "learning_rate": 6.482576282927076e-06, "loss": 0.0719, "step": 13020 }, { "epoch": 49.732824427480914, "grad_norm": 0.4662512242794037, "learning_rate": 6.477311682707844e-06, "loss": 0.0745, "step": 13030 }, { "epoch": 49.770992366412216, "grad_norm": 0.2835651934146881, "learning_rate": 6.472045287415616e-06, "loss": 0.0689, "step": 13040 }, { "epoch": 49.80916030534351, "grad_norm": 0.3895277678966522, "learning_rate": 6.466777103449559e-06, "loss": 0.0747, "step": 13050 }, { "epoch": 49.847328244274806, "grad_norm": 0.32042866945266724, "learning_rate": 6.461507137211012e-06, "loss": 0.0688, "step": 13060 }, { "epoch": 49.88549618320611, "grad_norm": 0.5186818838119507, "learning_rate": 6.456235395103483e-06, "loss": 0.0784, "step": 13070 }, { "epoch": 49.9236641221374, "grad_norm": 0.28963205218315125, "learning_rate": 6.450961883532635e-06, "loss": 0.0769, "step": 13080 }, { "epoch": 49.961832061068705, "grad_norm": 0.374496728181839, "learning_rate": 6.445686608906283e-06, "loss": 0.0674, "step": 13090 }, { "epoch": 50.0, "grad_norm": 0.35718169808387756, "learning_rate": 6.44040957763438e-06, "loss": 0.0773, "step": 13100 }, { "epoch": 50.038167938931295, "grad_norm": 0.5534579753875732, "learning_rate": 6.435130796129019e-06, "loss": 0.0677, "step": 13110 }, { "epoch": 50.0763358778626, "grad_norm": 0.5444230437278748, "learning_rate": 6.4298502708044165e-06, "loss": 0.08, "step": 13120 }, { "epoch": 50.11450381679389, "grad_norm": 0.5170355439186096, "learning_rate": 6.424568008076909e-06, "loss": 0.07, "step": 13130 }, { "epoch": 50.152671755725194, "grad_norm": 0.514159083366394, "learning_rate": 6.419284014364944e-06, "loss": 0.0907, "step": 13140 }, { "epoch": 50.19083969465649, "grad_norm": 0.31660693883895874, "learning_rate": 6.413998296089071e-06, "loss": 0.0744, "step": 13150 }, { "epoch": 50.229007633587784, "grad_norm": 0.2514859139919281, "learning_rate": 6.408710859671938e-06, "loss": 0.0733, "step": 13160 }, { "epoch": 50.267175572519086, "grad_norm": 0.6438994407653809, "learning_rate": 6.403421711538278e-06, "loss": 0.0816, "step": 13170 }, { "epoch": 50.30534351145038, "grad_norm": 0.5075148940086365, "learning_rate": 6.398130858114904e-06, "loss": 0.0808, "step": 13180 }, { "epoch": 50.343511450381676, "grad_norm": 1.2176218032836914, "learning_rate": 6.392838305830702e-06, "loss": 0.0821, "step": 13190 }, { "epoch": 50.38167938931298, "grad_norm": 0.32374969124794006, "learning_rate": 6.387544061116622e-06, "loss": 0.0789, "step": 13200 }, { "epoch": 50.41984732824427, "grad_norm": 0.539487898349762, "learning_rate": 6.382248130405671e-06, "loss": 0.0717, "step": 13210 }, { "epoch": 50.458015267175576, "grad_norm": 0.29437029361724854, "learning_rate": 6.376950520132906e-06, "loss": 0.0715, "step": 13220 }, { "epoch": 50.49618320610687, "grad_norm": 0.2797829806804657, "learning_rate": 6.371651236735418e-06, "loss": 0.0691, "step": 13230 }, { "epoch": 50.534351145038165, "grad_norm": 1.2593516111373901, "learning_rate": 6.366350286652341e-06, "loss": 0.0775, "step": 13240 }, { "epoch": 50.57251908396947, "grad_norm": 0.5753695964813232, "learning_rate": 6.361047676324827e-06, "loss": 0.0787, "step": 13250 }, { "epoch": 50.61068702290076, "grad_norm": 0.6826583743095398, "learning_rate": 6.355743412196047e-06, "loss": 0.071, "step": 13260 }, { "epoch": 50.64885496183206, "grad_norm": 0.655083417892456, "learning_rate": 6.350437500711184e-06, "loss": 0.07, "step": 13270 }, { "epoch": 50.68702290076336, "grad_norm": 0.3876498341560364, "learning_rate": 6.345129948317419e-06, "loss": 0.0641, "step": 13280 }, { "epoch": 50.725190839694655, "grad_norm": 0.38112854957580566, "learning_rate": 6.339820761463929e-06, "loss": 0.0754, "step": 13290 }, { "epoch": 50.76335877862596, "grad_norm": 0.402295857667923, "learning_rate": 6.334509946601879e-06, "loss": 0.0878, "step": 13300 }, { "epoch": 50.80152671755725, "grad_norm": 0.3208377957344055, "learning_rate": 6.329197510184406e-06, "loss": 0.0746, "step": 13310 }, { "epoch": 50.83969465648855, "grad_norm": 0.3299608528614044, "learning_rate": 6.323883458666624e-06, "loss": 0.0748, "step": 13320 }, { "epoch": 50.87786259541985, "grad_norm": 0.3016846477985382, "learning_rate": 6.318567798505605e-06, "loss": 0.0708, "step": 13330 }, { "epoch": 50.916030534351144, "grad_norm": 0.6639785766601562, "learning_rate": 6.313250536160378e-06, "loss": 0.0789, "step": 13340 }, { "epoch": 50.954198473282446, "grad_norm": 0.3203372061252594, "learning_rate": 6.307931678091918e-06, "loss": 0.0794, "step": 13350 }, { "epoch": 50.99236641221374, "grad_norm": 0.44946426153182983, "learning_rate": 6.3026112307631385e-06, "loss": 0.0792, "step": 13360 }, { "epoch": 51.030534351145036, "grad_norm": 0.32271233201026917, "learning_rate": 6.297289200638888e-06, "loss": 0.0767, "step": 13370 }, { "epoch": 51.06870229007634, "grad_norm": 0.32984182238578796, "learning_rate": 6.29196559418593e-06, "loss": 0.077, "step": 13380 }, { "epoch": 51.10687022900763, "grad_norm": 0.27354347705841064, "learning_rate": 6.286640417872951e-06, "loss": 0.0725, "step": 13390 }, { "epoch": 51.14503816793893, "grad_norm": 0.4007064700126648, "learning_rate": 6.281313678170543e-06, "loss": 0.0697, "step": 13400 }, { "epoch": 51.18320610687023, "grad_norm": 0.2881835401058197, "learning_rate": 6.275985381551195e-06, "loss": 0.0816, "step": 13410 }, { "epoch": 51.221374045801525, "grad_norm": 0.7041976451873779, "learning_rate": 6.270655534489292e-06, "loss": 0.0757, "step": 13420 }, { "epoch": 51.25954198473283, "grad_norm": 0.28111717104911804, "learning_rate": 6.265324143461098e-06, "loss": 0.0808, "step": 13430 }, { "epoch": 51.29770992366412, "grad_norm": 0.38881856203079224, "learning_rate": 6.259991214944758e-06, "loss": 0.0752, "step": 13440 }, { "epoch": 51.33587786259542, "grad_norm": 0.25334322452545166, "learning_rate": 6.254656755420283e-06, "loss": 0.0712, "step": 13450 }, { "epoch": 51.37404580152672, "grad_norm": 0.37281477451324463, "learning_rate": 6.249320771369545e-06, "loss": 0.0813, "step": 13460 }, { "epoch": 51.412213740458014, "grad_norm": 1.6410506963729858, "learning_rate": 6.243983269276263e-06, "loss": 0.0907, "step": 13470 }, { "epoch": 51.45038167938931, "grad_norm": 0.2578519582748413, "learning_rate": 6.238644255626013e-06, "loss": 0.0719, "step": 13480 }, { "epoch": 51.48854961832061, "grad_norm": 0.912161648273468, "learning_rate": 6.233303736906193e-06, "loss": 0.0714, "step": 13490 }, { "epoch": 51.52671755725191, "grad_norm": 0.5383570790290833, "learning_rate": 6.2279617196060394e-06, "loss": 0.0743, "step": 13500 }, { "epoch": 51.56488549618321, "grad_norm": 0.6001830697059631, "learning_rate": 6.2226182102166085e-06, "loss": 0.0739, "step": 13510 }, { "epoch": 51.603053435114504, "grad_norm": 0.40439608693122864, "learning_rate": 6.217273215230767e-06, "loss": 0.0749, "step": 13520 }, { "epoch": 51.6412213740458, "grad_norm": 0.298961877822876, "learning_rate": 6.2119267411431885e-06, "loss": 0.0668, "step": 13530 }, { "epoch": 51.6793893129771, "grad_norm": 0.4611186385154724, "learning_rate": 6.206578794450339e-06, "loss": 0.0789, "step": 13540 }, { "epoch": 51.717557251908396, "grad_norm": 0.2702903747558594, "learning_rate": 6.2012293816504855e-06, "loss": 0.077, "step": 13550 }, { "epoch": 51.7557251908397, "grad_norm": 0.38614171743392944, "learning_rate": 6.195878509243661e-06, "loss": 0.0801, "step": 13560 }, { "epoch": 51.79389312977099, "grad_norm": 0.3832460343837738, "learning_rate": 6.190526183731686e-06, "loss": 0.0675, "step": 13570 }, { "epoch": 51.83206106870229, "grad_norm": 0.7933033108711243, "learning_rate": 6.185172411618138e-06, "loss": 0.0703, "step": 13580 }, { "epoch": 51.87022900763359, "grad_norm": 0.3744434714317322, "learning_rate": 6.179817199408355e-06, "loss": 0.0737, "step": 13590 }, { "epoch": 51.908396946564885, "grad_norm": 0.49073082208633423, "learning_rate": 6.174460553609426e-06, "loss": 0.0843, "step": 13600 }, { "epoch": 51.94656488549618, "grad_norm": 0.438060462474823, "learning_rate": 6.16910248073018e-06, "loss": 0.0729, "step": 13610 }, { "epoch": 51.98473282442748, "grad_norm": 0.3065158724784851, "learning_rate": 6.16374298728118e-06, "loss": 0.075, "step": 13620 }, { "epoch": 52.02290076335878, "grad_norm": 0.3208974301815033, "learning_rate": 6.158382079774716e-06, "loss": 0.0757, "step": 13630 }, { "epoch": 52.06106870229008, "grad_norm": 0.36276963353157043, "learning_rate": 6.153019764724799e-06, "loss": 0.079, "step": 13640 }, { "epoch": 52.099236641221374, "grad_norm": 0.36144378781318665, "learning_rate": 6.147656048647144e-06, "loss": 0.0665, "step": 13650 }, { "epoch": 52.13740458015267, "grad_norm": 0.42839837074279785, "learning_rate": 6.1422909380591724e-06, "loss": 0.0732, "step": 13660 }, { "epoch": 52.17557251908397, "grad_norm": 0.589056134223938, "learning_rate": 6.136924439480001e-06, "loss": 0.0813, "step": 13670 }, { "epoch": 52.213740458015266, "grad_norm": 0.7054497599601746, "learning_rate": 6.13155655943043e-06, "loss": 0.0792, "step": 13680 }, { "epoch": 52.25190839694657, "grad_norm": 0.5286399126052856, "learning_rate": 6.126187304432941e-06, "loss": 0.0732, "step": 13690 }, { "epoch": 52.29007633587786, "grad_norm": 0.4254150986671448, "learning_rate": 6.1208166810116846e-06, "loss": 0.0774, "step": 13700 }, { "epoch": 52.32824427480916, "grad_norm": 0.6747491955757141, "learning_rate": 6.115444695692474e-06, "loss": 0.0777, "step": 13710 }, { "epoch": 52.36641221374046, "grad_norm": 0.3658086657524109, "learning_rate": 6.110071355002779e-06, "loss": 0.0655, "step": 13720 }, { "epoch": 52.404580152671755, "grad_norm": 0.4212816655635834, "learning_rate": 6.104696665471715e-06, "loss": 0.0842, "step": 13730 }, { "epoch": 52.44274809160305, "grad_norm": 0.6053048372268677, "learning_rate": 6.099320633630036e-06, "loss": 0.0804, "step": 13740 }, { "epoch": 52.48091603053435, "grad_norm": 0.3349289894104004, "learning_rate": 6.093943266010128e-06, "loss": 0.0729, "step": 13750 }, { "epoch": 52.51908396946565, "grad_norm": 0.3947334289550781, "learning_rate": 6.088564569146001e-06, "loss": 0.088, "step": 13760 }, { "epoch": 52.55725190839695, "grad_norm": 0.4069903790950775, "learning_rate": 6.083184549573275e-06, "loss": 0.0761, "step": 13770 }, { "epoch": 52.595419847328245, "grad_norm": 0.526199221611023, "learning_rate": 6.0778032138291845e-06, "loss": 0.0743, "step": 13780 }, { "epoch": 52.63358778625954, "grad_norm": 0.37848567962646484, "learning_rate": 6.072420568452559e-06, "loss": 0.0722, "step": 13790 }, { "epoch": 52.67175572519084, "grad_norm": 0.479997843503952, "learning_rate": 6.067036619983822e-06, "loss": 0.0813, "step": 13800 }, { "epoch": 52.70992366412214, "grad_norm": 0.37879565358161926, "learning_rate": 6.061651374964974e-06, "loss": 0.0713, "step": 13810 }, { "epoch": 52.74809160305343, "grad_norm": 0.2613222897052765, "learning_rate": 6.056264839939601e-06, "loss": 0.0733, "step": 13820 }, { "epoch": 52.786259541984734, "grad_norm": 0.4881426990032196, "learning_rate": 6.050877021452845e-06, "loss": 0.0693, "step": 13830 }, { "epoch": 52.82442748091603, "grad_norm": 0.8297280073165894, "learning_rate": 6.0454879260514196e-06, "loss": 0.0751, "step": 13840 }, { "epoch": 52.86259541984733, "grad_norm": 0.8701691627502441, "learning_rate": 6.0400975602835795e-06, "loss": 0.0745, "step": 13850 }, { "epoch": 52.900763358778626, "grad_norm": 0.40549200773239136, "learning_rate": 6.034705930699129e-06, "loss": 0.077, "step": 13860 }, { "epoch": 52.93893129770992, "grad_norm": 0.5269019603729248, "learning_rate": 6.029313043849407e-06, "loss": 0.0732, "step": 13870 }, { "epoch": 52.97709923664122, "grad_norm": 0.6514595746994019, "learning_rate": 6.0239189062872795e-06, "loss": 0.0683, "step": 13880 }, { "epoch": 53.01526717557252, "grad_norm": 0.270759254693985, "learning_rate": 6.01852352456713e-06, "loss": 0.0774, "step": 13890 }, { "epoch": 53.05343511450382, "grad_norm": 0.37720581889152527, "learning_rate": 6.013126905244858e-06, "loss": 0.0703, "step": 13900 }, { "epoch": 53.091603053435115, "grad_norm": 0.45019468665122986, "learning_rate": 6.007729054877865e-06, "loss": 0.0709, "step": 13910 }, { "epoch": 53.12977099236641, "grad_norm": 0.4115181565284729, "learning_rate": 6.002329980025047e-06, "loss": 0.0723, "step": 13920 }, { "epoch": 53.16793893129771, "grad_norm": 0.5874790549278259, "learning_rate": 5.99692968724679e-06, "loss": 0.0708, "step": 13930 }, { "epoch": 53.20610687022901, "grad_norm": 0.31998172402381897, "learning_rate": 5.991528183104959e-06, "loss": 0.0739, "step": 13940 }, { "epoch": 53.2442748091603, "grad_norm": 0.2828161120414734, "learning_rate": 5.98612547416289e-06, "loss": 0.0739, "step": 13950 }, { "epoch": 53.282442748091604, "grad_norm": 0.2661030888557434, "learning_rate": 5.9807215669853855e-06, "loss": 0.1, "step": 13960 }, { "epoch": 53.3206106870229, "grad_norm": 1.1698861122131348, "learning_rate": 5.9753164681387e-06, "loss": 0.0806, "step": 13970 }, { "epoch": 53.3587786259542, "grad_norm": 0.413404256105423, "learning_rate": 5.969910184190539e-06, "loss": 0.0695, "step": 13980 }, { "epoch": 53.396946564885496, "grad_norm": 0.5563871264457703, "learning_rate": 5.9645027217100485e-06, "loss": 0.0831, "step": 13990 }, { "epoch": 53.43511450381679, "grad_norm": 0.5594649314880371, "learning_rate": 5.9590940872678035e-06, "loss": 0.0806, "step": 14000 }, { "epoch": 53.47328244274809, "grad_norm": 0.2644912898540497, "learning_rate": 5.953684287435807e-06, "loss": 0.0744, "step": 14010 }, { "epoch": 53.51145038167939, "grad_norm": 0.31306540966033936, "learning_rate": 5.948273328787474e-06, "loss": 0.0806, "step": 14020 }, { "epoch": 53.54961832061069, "grad_norm": 0.298922598361969, "learning_rate": 5.942861217897631e-06, "loss": 0.0722, "step": 14030 }, { "epoch": 53.587786259541986, "grad_norm": 0.38911426067352295, "learning_rate": 5.937447961342501e-06, "loss": 0.0811, "step": 14040 }, { "epoch": 53.62595419847328, "grad_norm": 0.44509273767471313, "learning_rate": 5.932033565699704e-06, "loss": 0.0824, "step": 14050 }, { "epoch": 53.66412213740458, "grad_norm": 0.6681466102600098, "learning_rate": 5.926618037548237e-06, "loss": 0.0815, "step": 14060 }, { "epoch": 53.70229007633588, "grad_norm": 0.5035837888717651, "learning_rate": 5.921201383468483e-06, "loss": 0.0693, "step": 14070 }, { "epoch": 53.74045801526717, "grad_norm": 0.4998972713947296, "learning_rate": 5.915783610042183e-06, "loss": 0.0758, "step": 14080 }, { "epoch": 53.778625954198475, "grad_norm": 0.36603638529777527, "learning_rate": 5.910364723852444e-06, "loss": 0.0701, "step": 14090 }, { "epoch": 53.81679389312977, "grad_norm": 0.4405144453048706, "learning_rate": 5.904944731483724e-06, "loss": 0.073, "step": 14100 }, { "epoch": 53.85496183206107, "grad_norm": 0.4353579878807068, "learning_rate": 5.899523639521825e-06, "loss": 0.0771, "step": 14110 }, { "epoch": 53.89312977099237, "grad_norm": 0.2582431435585022, "learning_rate": 5.894101454553883e-06, "loss": 0.0665, "step": 14120 }, { "epoch": 53.93129770992366, "grad_norm": 0.2779502868652344, "learning_rate": 5.888678183168368e-06, "loss": 0.0805, "step": 14130 }, { "epoch": 53.969465648854964, "grad_norm": 0.35140860080718994, "learning_rate": 5.883253831955061e-06, "loss": 0.0758, "step": 14140 }, { "epoch": 54.00763358778626, "grad_norm": 0.7238326668739319, "learning_rate": 5.877828407505063e-06, "loss": 0.0818, "step": 14150 }, { "epoch": 54.045801526717554, "grad_norm": 0.43056339025497437, "learning_rate": 5.872401916410777e-06, "loss": 0.0942, "step": 14160 }, { "epoch": 54.083969465648856, "grad_norm": 0.5613008141517639, "learning_rate": 5.866974365265901e-06, "loss": 0.0642, "step": 14170 }, { "epoch": 54.12213740458015, "grad_norm": 0.3026123046875, "learning_rate": 5.86154576066542e-06, "loss": 0.0776, "step": 14180 }, { "epoch": 54.16030534351145, "grad_norm": 0.3152710199356079, "learning_rate": 5.856116109205602e-06, "loss": 0.0674, "step": 14190 }, { "epoch": 54.19847328244275, "grad_norm": 0.8080897331237793, "learning_rate": 5.850685417483983e-06, "loss": 0.0859, "step": 14200 }, { "epoch": 54.23664122137404, "grad_norm": 0.3381716310977936, "learning_rate": 5.845253692099369e-06, "loss": 0.0822, "step": 14210 }, { "epoch": 54.274809160305345, "grad_norm": 0.25009527802467346, "learning_rate": 5.839820939651817e-06, "loss": 0.0705, "step": 14220 }, { "epoch": 54.31297709923664, "grad_norm": 0.24691715836524963, "learning_rate": 5.8343871667426326e-06, "loss": 0.0736, "step": 14230 }, { "epoch": 54.35114503816794, "grad_norm": 0.32508522272109985, "learning_rate": 5.828952379974364e-06, "loss": 0.0692, "step": 14240 }, { "epoch": 54.38931297709924, "grad_norm": 0.3096218407154083, "learning_rate": 5.823516585950787e-06, "loss": 0.0691, "step": 14250 }, { "epoch": 54.42748091603053, "grad_norm": 0.3918653726577759, "learning_rate": 5.818079791276907e-06, "loss": 0.0761, "step": 14260 }, { "epoch": 54.465648854961835, "grad_norm": 0.4393802881240845, "learning_rate": 5.8126420025589415e-06, "loss": 0.0816, "step": 14270 }, { "epoch": 54.50381679389313, "grad_norm": 0.3444501459598541, "learning_rate": 5.807203226404313e-06, "loss": 0.0983, "step": 14280 }, { "epoch": 54.541984732824424, "grad_norm": 0.6188817024230957, "learning_rate": 5.801763469421652e-06, "loss": 0.0816, "step": 14290 }, { "epoch": 54.58015267175573, "grad_norm": 0.39169570803642273, "learning_rate": 5.796322738220774e-06, "loss": 0.0833, "step": 14300 }, { "epoch": 54.61832061068702, "grad_norm": 0.5085362792015076, "learning_rate": 5.79088103941268e-06, "loss": 0.0685, "step": 14310 }, { "epoch": 54.656488549618324, "grad_norm": 1.19497549533844, "learning_rate": 5.78543837960955e-06, "loss": 0.086, "step": 14320 }, { "epoch": 54.69465648854962, "grad_norm": 0.8479430079460144, "learning_rate": 5.7799947654247244e-06, "loss": 0.0815, "step": 14330 }, { "epoch": 54.732824427480914, "grad_norm": 0.4414825439453125, "learning_rate": 5.774550203472712e-06, "loss": 0.076, "step": 14340 }, { "epoch": 54.770992366412216, "grad_norm": 0.3391081690788269, "learning_rate": 5.769104700369165e-06, "loss": 0.0733, "step": 14350 }, { "epoch": 54.80916030534351, "grad_norm": 0.6143276691436768, "learning_rate": 5.763658262730886e-06, "loss": 0.0741, "step": 14360 }, { "epoch": 54.847328244274806, "grad_norm": 0.42588314414024353, "learning_rate": 5.7582108971758095e-06, "loss": 0.0688, "step": 14370 }, { "epoch": 54.88549618320611, "grad_norm": 0.7601977586746216, "learning_rate": 5.752762610323e-06, "loss": 0.0862, "step": 14380 }, { "epoch": 54.9236641221374, "grad_norm": 0.30980366468429565, "learning_rate": 5.747313408792636e-06, "loss": 0.072, "step": 14390 }, { "epoch": 54.961832061068705, "grad_norm": 0.8562023639678955, "learning_rate": 5.7418632992060145e-06, "loss": 0.0743, "step": 14400 }, { "epoch": 55.0, "grad_norm": 0.4307748079299927, "learning_rate": 5.73641228818553e-06, "loss": 0.0679, "step": 14410 }, { "epoch": 55.038167938931295, "grad_norm": 0.2995448708534241, "learning_rate": 5.730960382354677e-06, "loss": 0.0749, "step": 14420 }, { "epoch": 55.0763358778626, "grad_norm": 0.5709684491157532, "learning_rate": 5.725507588338035e-06, "loss": 0.0749, "step": 14430 }, { "epoch": 55.11450381679389, "grad_norm": 0.5325103402137756, "learning_rate": 5.720053912761261e-06, "loss": 0.0696, "step": 14440 }, { "epoch": 55.152671755725194, "grad_norm": 0.40759608149528503, "learning_rate": 5.714599362251088e-06, "loss": 0.0725, "step": 14450 }, { "epoch": 55.19083969465649, "grad_norm": 0.3020817041397095, "learning_rate": 5.709143943435307e-06, "loss": 0.0776, "step": 14460 }, { "epoch": 55.229007633587784, "grad_norm": 0.4237860441207886, "learning_rate": 5.703687662942765e-06, "loss": 0.0763, "step": 14470 }, { "epoch": 55.267175572519086, "grad_norm": 0.8698391914367676, "learning_rate": 5.6982305274033616e-06, "loss": 0.0735, "step": 14480 }, { "epoch": 55.30534351145038, "grad_norm": 0.5781353116035461, "learning_rate": 5.692772543448027e-06, "loss": 0.0779, "step": 14490 }, { "epoch": 55.343511450381676, "grad_norm": 0.27626991271972656, "learning_rate": 5.687313717708728e-06, "loss": 0.0703, "step": 14500 }, { "epoch": 55.38167938931298, "grad_norm": 0.28378692269325256, "learning_rate": 5.681854056818453e-06, "loss": 0.0795, "step": 14510 }, { "epoch": 55.41984732824427, "grad_norm": 0.5798959732055664, "learning_rate": 5.6763935674112045e-06, "loss": 0.0706, "step": 14520 }, { "epoch": 55.458015267175576, "grad_norm": 0.288756400346756, "learning_rate": 5.670932256121992e-06, "loss": 0.0691, "step": 14530 }, { "epoch": 55.49618320610687, "grad_norm": 0.25545430183410645, "learning_rate": 5.665470129586822e-06, "loss": 0.074, "step": 14540 }, { "epoch": 55.534351145038165, "grad_norm": 0.3490813374519348, "learning_rate": 5.660007194442697e-06, "loss": 0.078, "step": 14550 }, { "epoch": 55.57251908396947, "grad_norm": 0.31008848547935486, "learning_rate": 5.6545434573275945e-06, "loss": 0.0772, "step": 14560 }, { "epoch": 55.61068702290076, "grad_norm": 0.3187563717365265, "learning_rate": 5.649078924880472e-06, "loss": 0.0724, "step": 14570 }, { "epoch": 55.64885496183206, "grad_norm": 0.4331674575805664, "learning_rate": 5.643613603741253e-06, "loss": 0.0677, "step": 14580 }, { "epoch": 55.68702290076336, "grad_norm": 0.354324609041214, "learning_rate": 5.6381475005508156e-06, "loss": 0.0759, "step": 14590 }, { "epoch": 55.725190839694655, "grad_norm": 0.4229590594768524, "learning_rate": 5.632680621950992e-06, "loss": 0.0783, "step": 14600 }, { "epoch": 55.76335877862596, "grad_norm": 0.5442618727684021, "learning_rate": 5.627212974584555e-06, "loss": 0.0709, "step": 14610 }, { "epoch": 55.80152671755725, "grad_norm": 0.31692686676979065, "learning_rate": 5.62174456509521e-06, "loss": 0.0718, "step": 14620 }, { "epoch": 55.83969465648855, "grad_norm": 0.3284524977207184, "learning_rate": 5.616275400127594e-06, "loss": 0.0688, "step": 14630 }, { "epoch": 55.87786259541985, "grad_norm": 0.3089140057563782, "learning_rate": 5.610805486327254e-06, "loss": 0.0686, "step": 14640 }, { "epoch": 55.916030534351144, "grad_norm": 0.4001990556716919, "learning_rate": 5.6053348303406545e-06, "loss": 0.0766, "step": 14650 }, { "epoch": 55.954198473282446, "grad_norm": 0.5436351299285889, "learning_rate": 5.599863438815157e-06, "loss": 0.0682, "step": 14660 }, { "epoch": 55.99236641221374, "grad_norm": 0.47523510456085205, "learning_rate": 5.594391318399017e-06, "loss": 0.0724, "step": 14670 }, { "epoch": 56.030534351145036, "grad_norm": 0.2652002274990082, "learning_rate": 5.588918475741378e-06, "loss": 0.0714, "step": 14680 }, { "epoch": 56.06870229007634, "grad_norm": 0.2382209599018097, "learning_rate": 5.583444917492259e-06, "loss": 0.0711, "step": 14690 }, { "epoch": 56.10687022900763, "grad_norm": 0.4424055516719818, "learning_rate": 5.57797065030255e-06, "loss": 0.0698, "step": 14700 }, { "epoch": 56.14503816793893, "grad_norm": 0.25786203145980835, "learning_rate": 5.572495680824001e-06, "loss": 0.0745, "step": 14710 }, { "epoch": 56.18320610687023, "grad_norm": 0.7077968716621399, "learning_rate": 5.5670200157092195e-06, "loss": 0.0747, "step": 14720 }, { "epoch": 56.221374045801525, "grad_norm": 0.32011500000953674, "learning_rate": 5.561543661611649e-06, "loss": 0.0728, "step": 14730 }, { "epoch": 56.25954198473283, "grad_norm": 0.6672192811965942, "learning_rate": 5.556066625185584e-06, "loss": 0.0744, "step": 14740 }, { "epoch": 56.29770992366412, "grad_norm": 1.5504484176635742, "learning_rate": 5.550588913086131e-06, "loss": 0.0781, "step": 14750 }, { "epoch": 56.33587786259542, "grad_norm": 0.32770517468452454, "learning_rate": 5.545110531969234e-06, "loss": 0.0737, "step": 14760 }, { "epoch": 56.37404580152672, "grad_norm": 0.42965736985206604, "learning_rate": 5.539631488491641e-06, "loss": 0.0736, "step": 14770 }, { "epoch": 56.412213740458014, "grad_norm": 0.277537077665329, "learning_rate": 5.534151789310904e-06, "loss": 0.0774, "step": 14780 }, { "epoch": 56.45038167938931, "grad_norm": 0.5046352744102478, "learning_rate": 5.528671441085376e-06, "loss": 0.0713, "step": 14790 }, { "epoch": 56.48854961832061, "grad_norm": 1.0368688106536865, "learning_rate": 5.523190450474198e-06, "loss": 0.0766, "step": 14800 }, { "epoch": 56.52671755725191, "grad_norm": 0.36909282207489014, "learning_rate": 5.517708824137287e-06, "loss": 0.0755, "step": 14810 }, { "epoch": 56.56488549618321, "grad_norm": 0.5959531664848328, "learning_rate": 5.512226568735338e-06, "loss": 0.0817, "step": 14820 }, { "epoch": 56.603053435114504, "grad_norm": 0.2804238796234131, "learning_rate": 5.506743690929809e-06, "loss": 0.0779, "step": 14830 }, { "epoch": 56.6412213740458, "grad_norm": 0.6354565620422363, "learning_rate": 5.501260197382913e-06, "loss": 0.0833, "step": 14840 }, { "epoch": 56.6793893129771, "grad_norm": 0.6478805541992188, "learning_rate": 5.49577609475761e-06, "loss": 0.0808, "step": 14850 }, { "epoch": 56.717557251908396, "grad_norm": 0.39894139766693115, "learning_rate": 5.4902913897176035e-06, "loss": 0.0729, "step": 14860 }, { "epoch": 56.7557251908397, "grad_norm": 0.346055805683136, "learning_rate": 5.484806088927329e-06, "loss": 0.0772, "step": 14870 }, { "epoch": 56.79389312977099, "grad_norm": 0.3596509099006653, "learning_rate": 5.479320199051942e-06, "loss": 0.0701, "step": 14880 }, { "epoch": 56.83206106870229, "grad_norm": 0.46233484148979187, "learning_rate": 5.473833726757314e-06, "loss": 0.0734, "step": 14890 }, { "epoch": 56.87022900763359, "grad_norm": 0.7055529952049255, "learning_rate": 5.46834667871003e-06, "loss": 0.0718, "step": 14900 }, { "epoch": 56.908396946564885, "grad_norm": 0.9955065846443176, "learning_rate": 5.462859061577369e-06, "loss": 0.0794, "step": 14910 }, { "epoch": 56.94656488549618, "grad_norm": 0.34203898906707764, "learning_rate": 5.457370882027303e-06, "loss": 0.0739, "step": 14920 }, { "epoch": 56.98473282442748, "grad_norm": 0.21919408440589905, "learning_rate": 5.451882146728489e-06, "loss": 0.077, "step": 14930 }, { "epoch": 57.02290076335878, "grad_norm": 0.4146571457386017, "learning_rate": 5.446392862350255e-06, "loss": 0.0791, "step": 14940 }, { "epoch": 57.06106870229008, "grad_norm": 0.4011072516441345, "learning_rate": 5.4409030355626035e-06, "loss": 0.0835, "step": 14950 }, { "epoch": 57.099236641221374, "grad_norm": 0.2836225628852844, "learning_rate": 5.435412673036188e-06, "loss": 0.0835, "step": 14960 }, { "epoch": 57.13740458015267, "grad_norm": 0.41876521706581116, "learning_rate": 5.429921781442318e-06, "loss": 0.0754, "step": 14970 }, { "epoch": 57.17557251908397, "grad_norm": 0.24527506530284882, "learning_rate": 5.424430367452946e-06, "loss": 0.0717, "step": 14980 }, { "epoch": 57.213740458015266, "grad_norm": 0.5479999780654907, "learning_rate": 5.418938437740655e-06, "loss": 0.073, "step": 14990 }, { "epoch": 57.25190839694657, "grad_norm": 1.0041180849075317, "learning_rate": 5.413445998978658e-06, "loss": 0.0765, "step": 15000 }, { "epoch": 57.29007633587786, "grad_norm": 0.34665971994400024, "learning_rate": 5.4079530578407895e-06, "loss": 0.0738, "step": 15010 }, { "epoch": 57.32824427480916, "grad_norm": 0.3036256730556488, "learning_rate": 5.402459621001486e-06, "loss": 0.0675, "step": 15020 }, { "epoch": 57.36641221374046, "grad_norm": 0.36312854290008545, "learning_rate": 5.396965695135794e-06, "loss": 0.08, "step": 15030 }, { "epoch": 57.404580152671755, "grad_norm": 0.2859637439250946, "learning_rate": 5.391471286919351e-06, "loss": 0.0661, "step": 15040 }, { "epoch": 57.44274809160305, "grad_norm": 0.4589112401008606, "learning_rate": 5.385976403028381e-06, "loss": 0.0697, "step": 15050 }, { "epoch": 57.48091603053435, "grad_norm": 0.3610994517803192, "learning_rate": 5.380481050139688e-06, "loss": 0.0742, "step": 15060 }, { "epoch": 57.51908396946565, "grad_norm": 0.37091097235679626, "learning_rate": 5.37498523493064e-06, "loss": 0.0716, "step": 15070 }, { "epoch": 57.55725190839695, "grad_norm": 0.30698439478874207, "learning_rate": 5.369488964079172e-06, "loss": 0.0838, "step": 15080 }, { "epoch": 57.595419847328245, "grad_norm": 0.31963449716567993, "learning_rate": 5.363992244263774e-06, "loss": 0.0736, "step": 15090 }, { "epoch": 57.63358778625954, "grad_norm": 0.3956395089626312, "learning_rate": 5.358495082163476e-06, "loss": 0.0678, "step": 15100 }, { "epoch": 57.67175572519084, "grad_norm": 0.4276552200317383, "learning_rate": 5.35299748445785e-06, "loss": 0.0768, "step": 15110 }, { "epoch": 57.70992366412214, "grad_norm": 0.3108503818511963, "learning_rate": 5.347499457826995e-06, "loss": 0.0803, "step": 15120 }, { "epoch": 57.74809160305343, "grad_norm": 0.35856226086616516, "learning_rate": 5.342001008951531e-06, "loss": 0.0794, "step": 15130 }, { "epoch": 57.786259541984734, "grad_norm": 0.4197726547718048, "learning_rate": 5.336502144512592e-06, "loss": 0.0731, "step": 15140 }, { "epoch": 57.82442748091603, "grad_norm": 0.3682864308357239, "learning_rate": 5.331002871191817e-06, "loss": 0.0723, "step": 15150 }, { "epoch": 57.86259541984733, "grad_norm": 0.7807731628417969, "learning_rate": 5.325503195671345e-06, "loss": 0.0747, "step": 15160 }, { "epoch": 57.900763358778626, "grad_norm": 0.3009580373764038, "learning_rate": 5.320003124633795e-06, "loss": 0.0732, "step": 15170 }, { "epoch": 57.93893129770992, "grad_norm": 0.5685849189758301, "learning_rate": 5.314502664762275e-06, "loss": 0.0753, "step": 15180 }, { "epoch": 57.97709923664122, "grad_norm": 0.32221153378486633, "learning_rate": 5.3090018227403605e-06, "loss": 0.0841, "step": 15190 }, { "epoch": 58.01526717557252, "grad_norm": 0.4299134314060211, "learning_rate": 5.303500605252095e-06, "loss": 0.076, "step": 15200 }, { "epoch": 58.05343511450382, "grad_norm": 0.604964017868042, "learning_rate": 5.297999018981977e-06, "loss": 0.079, "step": 15210 }, { "epoch": 58.091603053435115, "grad_norm": 0.3654126226902008, "learning_rate": 5.2924970706149505e-06, "loss": 0.071, "step": 15220 }, { "epoch": 58.12977099236641, "grad_norm": 0.4091419279575348, "learning_rate": 5.286994766836402e-06, "loss": 0.0785, "step": 15230 }, { "epoch": 58.16793893129771, "grad_norm": 0.45684048533439636, "learning_rate": 5.2814921143321506e-06, "loss": 0.0678, "step": 15240 }, { "epoch": 58.20610687022901, "grad_norm": 0.32718759775161743, "learning_rate": 5.275989119788436e-06, "loss": 0.0739, "step": 15250 }, { "epoch": 58.2442748091603, "grad_norm": 0.36803168058395386, "learning_rate": 5.2704857898919195e-06, "loss": 0.0699, "step": 15260 }, { "epoch": 58.282442748091604, "grad_norm": 0.38092291355133057, "learning_rate": 5.264982131329661e-06, "loss": 0.0712, "step": 15270 }, { "epoch": 58.3206106870229, "grad_norm": 0.3395163118839264, "learning_rate": 5.259478150789128e-06, "loss": 0.0717, "step": 15280 }, { "epoch": 58.3587786259542, "grad_norm": 0.27850109338760376, "learning_rate": 5.253973854958173e-06, "loss": 0.0668, "step": 15290 }, { "epoch": 58.396946564885496, "grad_norm": 0.28005483746528625, "learning_rate": 5.2484692505250375e-06, "loss": 0.0762, "step": 15300 }, { "epoch": 58.43511450381679, "grad_norm": 0.3931730091571808, "learning_rate": 5.2429643441783325e-06, "loss": 0.0768, "step": 15310 }, { "epoch": 58.47328244274809, "grad_norm": 0.33013564348220825, "learning_rate": 5.237459142607041e-06, "loss": 0.0764, "step": 15320 }, { "epoch": 58.51145038167939, "grad_norm": 0.3280738592147827, "learning_rate": 5.2319536525004974e-06, "loss": 0.0749, "step": 15330 }, { "epoch": 58.54961832061069, "grad_norm": 0.3545534610748291, "learning_rate": 5.226447880548398e-06, "loss": 0.0801, "step": 15340 }, { "epoch": 58.587786259541986, "grad_norm": 0.4157971739768982, "learning_rate": 5.220941833440768e-06, "loss": 0.0791, "step": 15350 }, { "epoch": 58.62595419847328, "grad_norm": 0.4566729962825775, "learning_rate": 5.215435517867978e-06, "loss": 0.0721, "step": 15360 }, { "epoch": 58.66412213740458, "grad_norm": 0.5043272972106934, "learning_rate": 5.209928940520719e-06, "loss": 0.0723, "step": 15370 }, { "epoch": 58.70229007633588, "grad_norm": 0.5831508636474609, "learning_rate": 5.204422108090004e-06, "loss": 0.0728, "step": 15380 }, { "epoch": 58.74045801526717, "grad_norm": 0.30468279123306274, "learning_rate": 5.19891502726715e-06, "loss": 0.0735, "step": 15390 }, { "epoch": 58.778625954198475, "grad_norm": 0.6569890975952148, "learning_rate": 5.193407704743782e-06, "loss": 0.0751, "step": 15400 }, { "epoch": 58.81679389312977, "grad_norm": 0.4217868745326996, "learning_rate": 5.1879001472118155e-06, "loss": 0.0742, "step": 15410 }, { "epoch": 58.85496183206107, "grad_norm": 0.3305950164794922, "learning_rate": 5.182392361363453e-06, "loss": 0.08, "step": 15420 }, { "epoch": 58.89312977099237, "grad_norm": 0.2556851804256439, "learning_rate": 5.176884353891172e-06, "loss": 0.0835, "step": 15430 }, { "epoch": 58.93129770992366, "grad_norm": 0.5155206322669983, "learning_rate": 5.171376131487722e-06, "loss": 0.0758, "step": 15440 }, { "epoch": 58.969465648854964, "grad_norm": 0.3321559429168701, "learning_rate": 5.165867700846113e-06, "loss": 0.071, "step": 15450 }, { "epoch": 59.00763358778626, "grad_norm": 0.49137699604034424, "learning_rate": 5.1603590686596065e-06, "loss": 0.08, "step": 15460 }, { "epoch": 59.045801526717554, "grad_norm": 0.4781742990016937, "learning_rate": 5.154850241621712e-06, "loss": 0.0768, "step": 15470 }, { "epoch": 59.083969465648856, "grad_norm": 0.5240231156349182, "learning_rate": 5.149341226426172e-06, "loss": 0.0742, "step": 15480 }, { "epoch": 59.12213740458015, "grad_norm": 0.5776994824409485, "learning_rate": 5.143832029766959e-06, "loss": 0.0685, "step": 15490 }, { "epoch": 59.16030534351145, "grad_norm": 0.23645877838134766, "learning_rate": 5.138322658338269e-06, "loss": 0.0668, "step": 15500 }, { "epoch": 59.19847328244275, "grad_norm": 0.3662410378456116, "learning_rate": 5.132813118834504e-06, "loss": 0.0765, "step": 15510 }, { "epoch": 59.23664122137404, "grad_norm": 0.8370821475982666, "learning_rate": 5.127303417950278e-06, "loss": 0.0758, "step": 15520 }, { "epoch": 59.274809160305345, "grad_norm": 0.29751116037368774, "learning_rate": 5.121793562380395e-06, "loss": 0.0799, "step": 15530 }, { "epoch": 59.31297709923664, "grad_norm": 0.3275250792503357, "learning_rate": 5.116283558819848e-06, "loss": 0.0758, "step": 15540 }, { "epoch": 59.35114503816794, "grad_norm": 0.36758387088775635, "learning_rate": 5.110773413963813e-06, "loss": 0.071, "step": 15550 }, { "epoch": 59.38931297709924, "grad_norm": 0.4558661878108978, "learning_rate": 5.1052631345076365e-06, "loss": 0.0754, "step": 15560 }, { "epoch": 59.42748091603053, "grad_norm": 0.7922174334526062, "learning_rate": 5.099752727146824e-06, "loss": 0.0802, "step": 15570 }, { "epoch": 59.465648854961835, "grad_norm": 0.5909420847892761, "learning_rate": 5.0942421985770415e-06, "loss": 0.0734, "step": 15580 }, { "epoch": 59.50381679389313, "grad_norm": 0.3770803213119507, "learning_rate": 5.088731555494102e-06, "loss": 0.0726, "step": 15590 }, { "epoch": 59.541984732824424, "grad_norm": 0.4959609806537628, "learning_rate": 5.083220804593956e-06, "loss": 0.0751, "step": 15600 }, { "epoch": 59.58015267175573, "grad_norm": 0.8414520621299744, "learning_rate": 5.077709952572685e-06, "loss": 0.0779, "step": 15610 }, { "epoch": 59.61832061068702, "grad_norm": 0.43522128462791443, "learning_rate": 5.072199006126494e-06, "loss": 0.072, "step": 15620 }, { "epoch": 59.656488549618324, "grad_norm": 0.3183342218399048, "learning_rate": 5.066687971951702e-06, "loss": 0.0743, "step": 15630 }, { "epoch": 59.69465648854962, "grad_norm": 0.25728127360343933, "learning_rate": 5.0611768567447375e-06, "loss": 0.071, "step": 15640 }, { "epoch": 59.732824427480914, "grad_norm": 0.24269990622997284, "learning_rate": 5.055665667202121e-06, "loss": 0.0682, "step": 15650 }, { "epoch": 59.770992366412216, "grad_norm": 0.3147321343421936, "learning_rate": 5.050154410020473e-06, "loss": 0.072, "step": 15660 }, { "epoch": 59.80916030534351, "grad_norm": 0.259830504655838, "learning_rate": 5.044643091896485e-06, "loss": 0.0728, "step": 15670 }, { "epoch": 59.847328244274806, "grad_norm": 0.31697461009025574, "learning_rate": 5.039131719526932e-06, "loss": 0.0709, "step": 15680 }, { "epoch": 59.88549618320611, "grad_norm": 0.6029514670372009, "learning_rate": 5.03362029960865e-06, "loss": 0.0728, "step": 15690 }, { "epoch": 59.9236641221374, "grad_norm": 0.33722543716430664, "learning_rate": 5.028108838838533e-06, "loss": 0.0742, "step": 15700 }, { "epoch": 59.961832061068705, "grad_norm": 0.6031141877174377, "learning_rate": 5.022597343913528e-06, "loss": 0.073, "step": 15710 }, { "epoch": 60.0, "grad_norm": 0.7514012455940247, "learning_rate": 5.017085821530617e-06, "loss": 0.0768, "step": 15720 }, { "epoch": 60.038167938931295, "grad_norm": 0.3948346972465515, "learning_rate": 5.011574278386823e-06, "loss": 0.0766, "step": 15730 }, { "epoch": 60.0763358778626, "grad_norm": 0.3414020538330078, "learning_rate": 5.006062721179189e-06, "loss": 0.0766, "step": 15740 }, { "epoch": 60.11450381679389, "grad_norm": 0.2455693781375885, "learning_rate": 5.000551156604777e-06, "loss": 0.0718, "step": 15750 }, { "epoch": 60.152671755725194, "grad_norm": 0.7095368504524231, "learning_rate": 4.99503959136066e-06, "loss": 0.07, "step": 15760 }, { "epoch": 60.19083969465649, "grad_norm": 0.37839195132255554, "learning_rate": 4.9895280321439036e-06, "loss": 0.0733, "step": 15770 }, { "epoch": 60.229007633587784, "grad_norm": 0.3145567774772644, "learning_rate": 4.984016485651578e-06, "loss": 0.0784, "step": 15780 }, { "epoch": 60.267175572519086, "grad_norm": 0.24902135133743286, "learning_rate": 4.978504958580728e-06, "loss": 0.0767, "step": 15790 }, { "epoch": 60.30534351145038, "grad_norm": 0.5785359144210815, "learning_rate": 4.9729934576283815e-06, "loss": 0.0916, "step": 15800 }, { "epoch": 60.343511450381676, "grad_norm": 0.6354800462722778, "learning_rate": 4.967481989491531e-06, "loss": 0.0777, "step": 15810 }, { "epoch": 60.38167938931298, "grad_norm": 0.6582098603248596, "learning_rate": 4.961970560867126e-06, "loss": 0.0743, "step": 15820 }, { "epoch": 60.41984732824427, "grad_norm": 0.5716459155082703, "learning_rate": 4.956459178452079e-06, "loss": 0.0869, "step": 15830 }, { "epoch": 60.458015267175576, "grad_norm": 0.3027888536453247, "learning_rate": 4.950947848943235e-06, "loss": 0.0897, "step": 15840 }, { "epoch": 60.49618320610687, "grad_norm": 0.4717438519001007, "learning_rate": 4.94543657903738e-06, "loss": 0.0734, "step": 15850 }, { "epoch": 60.534351145038165, "grad_norm": 0.3485773503780365, "learning_rate": 4.939925375431226e-06, "loss": 0.0764, "step": 15860 }, { "epoch": 60.57251908396947, "grad_norm": 0.46527671813964844, "learning_rate": 4.934414244821405e-06, "loss": 0.0734, "step": 15870 }, { "epoch": 60.61068702290076, "grad_norm": 0.2664990723133087, "learning_rate": 4.928903193904461e-06, "loss": 0.0738, "step": 15880 }, { "epoch": 60.64885496183206, "grad_norm": 0.23172251880168915, "learning_rate": 4.923392229376841e-06, "loss": 0.0695, "step": 15890 }, { "epoch": 60.68702290076336, "grad_norm": 0.6194725632667542, "learning_rate": 4.917881357934886e-06, "loss": 0.0733, "step": 15900 }, { "epoch": 60.725190839694655, "grad_norm": 0.5128942131996155, "learning_rate": 4.912370586274825e-06, "loss": 0.0675, "step": 15910 }, { "epoch": 60.76335877862596, "grad_norm": 0.3195113241672516, "learning_rate": 4.906859921092763e-06, "loss": 0.0694, "step": 15920 }, { "epoch": 60.80152671755725, "grad_norm": 0.4648715555667877, "learning_rate": 4.901349369084681e-06, "loss": 0.0752, "step": 15930 }, { "epoch": 60.83969465648855, "grad_norm": 0.37270602583885193, "learning_rate": 4.895838936946416e-06, "loss": 0.0813, "step": 15940 }, { "epoch": 60.87786259541985, "grad_norm": 0.3094925284385681, "learning_rate": 4.890328631373666e-06, "loss": 0.0787, "step": 15950 }, { "epoch": 60.916030534351144, "grad_norm": 0.8085688352584839, "learning_rate": 4.88481845906197e-06, "loss": 0.077, "step": 15960 }, { "epoch": 60.954198473282446, "grad_norm": 0.45573312044143677, "learning_rate": 4.879308426706707e-06, "loss": 0.0752, "step": 15970 }, { "epoch": 60.99236641221374, "grad_norm": 0.29330453276634216, "learning_rate": 4.873798541003084e-06, "loss": 0.0805, "step": 15980 }, { "epoch": 61.030534351145036, "grad_norm": 0.26652470231056213, "learning_rate": 4.868288808646136e-06, "loss": 0.074, "step": 15990 }, { "epoch": 61.06870229007634, "grad_norm": 0.2818049490451813, "learning_rate": 4.862779236330705e-06, "loss": 0.0706, "step": 16000 }, { "epoch": 61.10687022900763, "grad_norm": 0.32146692276000977, "learning_rate": 4.8572698307514395e-06, "loss": 0.0704, "step": 16010 }, { "epoch": 61.14503816793893, "grad_norm": 0.3116312325000763, "learning_rate": 4.8517605986027904e-06, "loss": 0.0671, "step": 16020 }, { "epoch": 61.18320610687023, "grad_norm": 0.3970802128314972, "learning_rate": 4.846251546578989e-06, "loss": 0.0745, "step": 16030 }, { "epoch": 61.221374045801525, "grad_norm": 0.35653743147850037, "learning_rate": 4.8407426813740584e-06, "loss": 0.0737, "step": 16040 }, { "epoch": 61.25954198473283, "grad_norm": 0.3067050576210022, "learning_rate": 4.835234009681787e-06, "loss": 0.0702, "step": 16050 }, { "epoch": 61.29770992366412, "grad_norm": 0.5067670941352844, "learning_rate": 4.82972553819573e-06, "loss": 0.0713, "step": 16060 }, { "epoch": 61.33587786259542, "grad_norm": 0.31573083996772766, "learning_rate": 4.824217273609199e-06, "loss": 0.071, "step": 16070 }, { "epoch": 61.37404580152672, "grad_norm": 0.29241570830345154, "learning_rate": 4.818709222615255e-06, "loss": 0.0712, "step": 16080 }, { "epoch": 61.412213740458014, "grad_norm": 0.29491475224494934, "learning_rate": 4.813201391906702e-06, "loss": 0.0783, "step": 16090 }, { "epoch": 61.45038167938931, "grad_norm": 0.5220329761505127, "learning_rate": 4.807693788176071e-06, "loss": 0.072, "step": 16100 }, { "epoch": 61.48854961832061, "grad_norm": 0.27246901392936707, "learning_rate": 4.802186418115622e-06, "loss": 0.0756, "step": 16110 }, { "epoch": 61.52671755725191, "grad_norm": 0.7175832390785217, "learning_rate": 4.796679288417326e-06, "loss": 0.0762, "step": 16120 }, { "epoch": 61.56488549618321, "grad_norm": 0.4071796238422394, "learning_rate": 4.791172405772866e-06, "loss": 0.0787, "step": 16130 }, { "epoch": 61.603053435114504, "grad_norm": 0.2860810458660126, "learning_rate": 4.785665776873626e-06, "loss": 0.0705, "step": 16140 }, { "epoch": 61.6412213740458, "grad_norm": 0.34713274240493774, "learning_rate": 4.780159408410677e-06, "loss": 0.0759, "step": 16150 }, { "epoch": 61.6793893129771, "grad_norm": 0.2859640419483185, "learning_rate": 4.774653307074775e-06, "loss": 0.0855, "step": 16160 }, { "epoch": 61.717557251908396, "grad_norm": 0.30147480964660645, "learning_rate": 4.7691474795563556e-06, "loss": 0.0885, "step": 16170 }, { "epoch": 61.7557251908397, "grad_norm": 0.516560435295105, "learning_rate": 4.763641932545516e-06, "loss": 0.0729, "step": 16180 }, { "epoch": 61.79389312977099, "grad_norm": 0.29214778542518616, "learning_rate": 4.758136672732013e-06, "loss": 0.0762, "step": 16190 }, { "epoch": 61.83206106870229, "grad_norm": 0.3169700503349304, "learning_rate": 4.752631706805261e-06, "loss": 0.069, "step": 16200 }, { "epoch": 61.87022900763359, "grad_norm": 0.4579392969608307, "learning_rate": 4.747127041454311e-06, "loss": 0.0845, "step": 16210 }, { "epoch": 61.908396946564885, "grad_norm": 0.4441590905189514, "learning_rate": 4.741622683367849e-06, "loss": 0.0681, "step": 16220 }, { "epoch": 61.94656488549618, "grad_norm": 0.3080080449581146, "learning_rate": 4.736118639234191e-06, "loss": 0.0759, "step": 16230 }, { "epoch": 61.98473282442748, "grad_norm": 0.23667652904987335, "learning_rate": 4.7306149157412666e-06, "loss": 0.0729, "step": 16240 }, { "epoch": 62.02290076335878, "grad_norm": 0.3574734926223755, "learning_rate": 4.7251115195766234e-06, "loss": 0.0759, "step": 16250 }, { "epoch": 62.06106870229008, "grad_norm": 1.1783217191696167, "learning_rate": 4.719608457427404e-06, "loss": 0.0943, "step": 16260 }, { "epoch": 62.099236641221374, "grad_norm": 0.46935534477233887, "learning_rate": 4.714105735980348e-06, "loss": 0.0772, "step": 16270 }, { "epoch": 62.13740458015267, "grad_norm": 0.32659921050071716, "learning_rate": 4.708603361921779e-06, "loss": 0.0761, "step": 16280 }, { "epoch": 62.17557251908397, "grad_norm": 0.37154969573020935, "learning_rate": 4.703101341937604e-06, "loss": 0.0764, "step": 16290 }, { "epoch": 62.213740458015266, "grad_norm": 0.5954548716545105, "learning_rate": 4.697599682713292e-06, "loss": 0.0725, "step": 16300 }, { "epoch": 62.25190839694657, "grad_norm": 0.43994778394699097, "learning_rate": 4.692098390933883e-06, "loss": 0.0798, "step": 16310 }, { "epoch": 62.29007633587786, "grad_norm": 0.42407238483428955, "learning_rate": 4.686597473283962e-06, "loss": 0.0674, "step": 16320 }, { "epoch": 62.32824427480916, "grad_norm": 0.31318119168281555, "learning_rate": 4.681096936447662e-06, "loss": 0.0801, "step": 16330 }, { "epoch": 62.36641221374046, "grad_norm": 0.4887228310108185, "learning_rate": 4.675596787108652e-06, "loss": 0.0717, "step": 16340 }, { "epoch": 62.404580152671755, "grad_norm": 0.2779092788696289, "learning_rate": 4.670097031950138e-06, "loss": 0.0712, "step": 16350 }, { "epoch": 62.44274809160305, "grad_norm": 0.839206874370575, "learning_rate": 4.664597677654839e-06, "loss": 0.0699, "step": 16360 }, { "epoch": 62.48091603053435, "grad_norm": 0.25964847207069397, "learning_rate": 4.6590987309049855e-06, "loss": 0.0636, "step": 16370 }, { "epoch": 62.51908396946565, "grad_norm": 0.34289413690567017, "learning_rate": 4.65360019838232e-06, "loss": 0.0779, "step": 16380 }, { "epoch": 62.55725190839695, "grad_norm": 0.33554473519325256, "learning_rate": 4.648102086768077e-06, "loss": 0.0791, "step": 16390 }, { "epoch": 62.595419847328245, "grad_norm": 0.4923129677772522, "learning_rate": 4.642604402742979e-06, "loss": 0.073, "step": 16400 }, { "epoch": 62.63358778625954, "grad_norm": 0.44560110569000244, "learning_rate": 4.6371071529872336e-06, "loss": 0.0735, "step": 16410 }, { "epoch": 62.67175572519084, "grad_norm": 0.47609812021255493, "learning_rate": 4.6316103441805155e-06, "loss": 0.071, "step": 16420 }, { "epoch": 62.70992366412214, "grad_norm": 0.6052244901657104, "learning_rate": 4.626113983001965e-06, "loss": 0.0764, "step": 16430 }, { "epoch": 62.74809160305343, "grad_norm": 0.3735920786857605, "learning_rate": 4.620618076130182e-06, "loss": 0.0671, "step": 16440 }, { "epoch": 62.786259541984734, "grad_norm": 0.279205858707428, "learning_rate": 4.615122630243207e-06, "loss": 0.0767, "step": 16450 }, { "epoch": 62.82442748091603, "grad_norm": 0.7630006670951843, "learning_rate": 4.60962765201853e-06, "loss": 0.0739, "step": 16460 }, { "epoch": 62.86259541984733, "grad_norm": 0.22555769979953766, "learning_rate": 4.604133148133066e-06, "loss": 0.071, "step": 16470 }, { "epoch": 62.900763358778626, "grad_norm": 0.42524755001068115, "learning_rate": 4.598639125263155e-06, "loss": 0.0794, "step": 16480 }, { "epoch": 62.93893129770992, "grad_norm": 0.38487425446510315, "learning_rate": 4.593145590084553e-06, "loss": 0.0764, "step": 16490 }, { "epoch": 62.97709923664122, "grad_norm": 0.3085957467556, "learning_rate": 4.58765254927242e-06, "loss": 0.0691, "step": 16500 }, { "epoch": 63.01526717557252, "grad_norm": 0.4019497334957123, "learning_rate": 4.582160009501323e-06, "loss": 0.0692, "step": 16510 }, { "epoch": 63.05343511450382, "grad_norm": 0.2319970428943634, "learning_rate": 4.576667977445214e-06, "loss": 0.0677, "step": 16520 }, { "epoch": 63.091603053435115, "grad_norm": 0.4725869596004486, "learning_rate": 4.571176459777431e-06, "loss": 0.0739, "step": 16530 }, { "epoch": 63.12977099236641, "grad_norm": 0.3336828351020813, "learning_rate": 4.565685463170685e-06, "loss": 0.0676, "step": 16540 }, { "epoch": 63.16793893129771, "grad_norm": 0.6508419513702393, "learning_rate": 4.560194994297054e-06, "loss": 0.0765, "step": 16550 }, { "epoch": 63.20610687022901, "grad_norm": 1.2349802255630493, "learning_rate": 4.554705059827974e-06, "loss": 0.0974, "step": 16560 }, { "epoch": 63.2442748091603, "grad_norm": 0.23870617151260376, "learning_rate": 4.549215666434237e-06, "loss": 0.0632, "step": 16570 }, { "epoch": 63.282442748091604, "grad_norm": 0.45109471678733826, "learning_rate": 4.54372682078597e-06, "loss": 0.0704, "step": 16580 }, { "epoch": 63.3206106870229, "grad_norm": 0.9649439454078674, "learning_rate": 4.538238529552641e-06, "loss": 0.0808, "step": 16590 }, { "epoch": 63.3587786259542, "grad_norm": 0.35490062832832336, "learning_rate": 4.532750799403039e-06, "loss": 0.0762, "step": 16600 }, { "epoch": 63.396946564885496, "grad_norm": 0.453528493642807, "learning_rate": 4.527263637005274e-06, "loss": 0.0764, "step": 16610 }, { "epoch": 63.43511450381679, "grad_norm": 0.4732442796230316, "learning_rate": 4.521777049026767e-06, "loss": 0.0772, "step": 16620 }, { "epoch": 63.47328244274809, "grad_norm": 0.4358898401260376, "learning_rate": 4.516291042134238e-06, "loss": 0.0697, "step": 16630 }, { "epoch": 63.51145038167939, "grad_norm": 0.3423472046852112, "learning_rate": 4.5108056229937055e-06, "loss": 0.0741, "step": 16640 }, { "epoch": 63.54961832061069, "grad_norm": 0.5359134078025818, "learning_rate": 4.505320798270467e-06, "loss": 0.072, "step": 16650 }, { "epoch": 63.587786259541986, "grad_norm": 0.2593790590763092, "learning_rate": 4.4998365746291045e-06, "loss": 0.0761, "step": 16660 }, { "epoch": 63.62595419847328, "grad_norm": 0.24456137418746948, "learning_rate": 4.494352958733466e-06, "loss": 0.0652, "step": 16670 }, { "epoch": 63.66412213740458, "grad_norm": 1.0902378559112549, "learning_rate": 4.4888699572466624e-06, "loss": 0.0741, "step": 16680 }, { "epoch": 63.70229007633588, "grad_norm": 0.3199808895587921, "learning_rate": 4.483387576831058e-06, "loss": 0.0774, "step": 16690 }, { "epoch": 63.74045801526717, "grad_norm": 1.09122896194458, "learning_rate": 4.47790582414826e-06, "loss": 0.0737, "step": 16700 }, { "epoch": 63.778625954198475, "grad_norm": 0.3936939537525177, "learning_rate": 4.472424705859115e-06, "loss": 0.0705, "step": 16710 }, { "epoch": 63.81679389312977, "grad_norm": 0.3976505398750305, "learning_rate": 4.466944228623701e-06, "loss": 0.073, "step": 16720 }, { "epoch": 63.85496183206107, "grad_norm": 0.2906537652015686, "learning_rate": 4.4614643991013125e-06, "loss": 0.0748, "step": 16730 }, { "epoch": 63.89312977099237, "grad_norm": 0.31357595324516296, "learning_rate": 4.45598522395046e-06, "loss": 0.0729, "step": 16740 }, { "epoch": 63.93129770992366, "grad_norm": 0.30560722947120667, "learning_rate": 4.450506709828858e-06, "loss": 0.0751, "step": 16750 }, { "epoch": 63.969465648854964, "grad_norm": 0.3903256356716156, "learning_rate": 4.445028863393417e-06, "loss": 0.0737, "step": 16760 }, { "epoch": 64.00763358778626, "grad_norm": 0.5252776741981506, "learning_rate": 4.439551691300236e-06, "loss": 0.0748, "step": 16770 }, { "epoch": 64.04580152671755, "grad_norm": 0.389498233795166, "learning_rate": 4.4340752002045985e-06, "loss": 0.0749, "step": 16780 }, { "epoch": 64.08396946564885, "grad_norm": 0.3666820228099823, "learning_rate": 4.428599396760957e-06, "loss": 0.0691, "step": 16790 }, { "epoch": 64.12213740458016, "grad_norm": 0.30693432688713074, "learning_rate": 4.4231242876229256e-06, "loss": 0.0736, "step": 16800 }, { "epoch": 64.16030534351145, "grad_norm": 0.44508853554725647, "learning_rate": 4.417649879443282e-06, "loss": 0.0683, "step": 16810 }, { "epoch": 64.19847328244275, "grad_norm": 0.3270077109336853, "learning_rate": 4.4121761788739445e-06, "loss": 0.0672, "step": 16820 }, { "epoch": 64.23664122137404, "grad_norm": 0.2393168956041336, "learning_rate": 4.406703192565981e-06, "loss": 0.0708, "step": 16830 }, { "epoch": 64.27480916030534, "grad_norm": 0.4205631613731384, "learning_rate": 4.401230927169582e-06, "loss": 0.0735, "step": 16840 }, { "epoch": 64.31297709923665, "grad_norm": 0.7220420241355896, "learning_rate": 4.395759389334067e-06, "loss": 0.0704, "step": 16850 }, { "epoch": 64.35114503816794, "grad_norm": 0.4618149697780609, "learning_rate": 4.3902885857078685e-06, "loss": 0.0716, "step": 16860 }, { "epoch": 64.38931297709924, "grad_norm": 0.304708868265152, "learning_rate": 4.384818522938531e-06, "loss": 0.074, "step": 16870 }, { "epoch": 64.42748091603053, "grad_norm": 0.6205267310142517, "learning_rate": 4.379349207672696e-06, "loss": 0.0716, "step": 16880 }, { "epoch": 64.46564885496183, "grad_norm": 0.4849410057067871, "learning_rate": 4.3738806465560975e-06, "loss": 0.0744, "step": 16890 }, { "epoch": 64.50381679389314, "grad_norm": 0.2881622612476349, "learning_rate": 4.368412846233554e-06, "loss": 0.0694, "step": 16900 }, { "epoch": 64.54198473282443, "grad_norm": 0.5529125928878784, "learning_rate": 4.362945813348956e-06, "loss": 0.0724, "step": 16910 }, { "epoch": 64.58015267175573, "grad_norm": 0.41142237186431885, "learning_rate": 4.357479554545263e-06, "loss": 0.0715, "step": 16920 }, { "epoch": 64.61832061068702, "grad_norm": 0.6459675431251526, "learning_rate": 4.352014076464499e-06, "loss": 0.0723, "step": 16930 }, { "epoch": 64.65648854961832, "grad_norm": 0.5050190091133118, "learning_rate": 4.346549385747733e-06, "loss": 0.0754, "step": 16940 }, { "epoch": 64.69465648854961, "grad_norm": 0.3293865919113159, "learning_rate": 4.34108548903508e-06, "loss": 0.0696, "step": 16950 }, { "epoch": 64.73282442748092, "grad_norm": 0.43672609329223633, "learning_rate": 4.335622392965689e-06, "loss": 0.0745, "step": 16960 }, { "epoch": 64.77099236641222, "grad_norm": 0.43421950936317444, "learning_rate": 4.330160104177738e-06, "loss": 0.0691, "step": 16970 }, { "epoch": 64.80916030534351, "grad_norm": 1.2267439365386963, "learning_rate": 4.324698629308419e-06, "loss": 0.0748, "step": 16980 }, { "epoch": 64.8473282442748, "grad_norm": 0.27527856826782227, "learning_rate": 4.3192379749939466e-06, "loss": 0.0736, "step": 16990 }, { "epoch": 64.8854961832061, "grad_norm": 0.84715735912323, "learning_rate": 4.313778147869524e-06, "loss": 0.0794, "step": 17000 }, { "epoch": 64.92366412213741, "grad_norm": 0.49666497111320496, "learning_rate": 4.308319154569358e-06, "loss": 0.0752, "step": 17010 }, { "epoch": 64.9618320610687, "grad_norm": 0.6374955773353577, "learning_rate": 4.302861001726642e-06, "loss": 0.0796, "step": 17020 }, { "epoch": 65.0, "grad_norm": 0.31495794653892517, "learning_rate": 4.297403695973542e-06, "loss": 0.0733, "step": 17030 }, { "epoch": 65.0381679389313, "grad_norm": 0.4066680073738098, "learning_rate": 4.291947243941203e-06, "loss": 0.0755, "step": 17040 }, { "epoch": 65.07633587786259, "grad_norm": 0.33870530128479004, "learning_rate": 4.286491652259729e-06, "loss": 0.0676, "step": 17050 }, { "epoch": 65.1145038167939, "grad_norm": 0.2708606421947479, "learning_rate": 4.281036927558178e-06, "loss": 0.0708, "step": 17060 }, { "epoch": 65.1526717557252, "grad_norm": 0.2700878083705902, "learning_rate": 4.275583076464552e-06, "loss": 0.071, "step": 17070 }, { "epoch": 65.19083969465649, "grad_norm": 0.3311876952648163, "learning_rate": 4.270130105605794e-06, "loss": 0.0732, "step": 17080 }, { "epoch": 65.22900763358778, "grad_norm": 0.2695978581905365, "learning_rate": 4.264678021607782e-06, "loss": 0.0736, "step": 17090 }, { "epoch": 65.26717557251908, "grad_norm": 0.3300071954727173, "learning_rate": 4.259226831095311e-06, "loss": 0.0724, "step": 17100 }, { "epoch": 65.30534351145039, "grad_norm": 0.2677368223667145, "learning_rate": 4.25377654069209e-06, "loss": 0.0703, "step": 17110 }, { "epoch": 65.34351145038168, "grad_norm": 0.29270052909851074, "learning_rate": 4.248327157020737e-06, "loss": 0.0724, "step": 17120 }, { "epoch": 65.38167938931298, "grad_norm": 0.6969024538993835, "learning_rate": 4.242878686702763e-06, "loss": 0.0909, "step": 17130 }, { "epoch": 65.41984732824427, "grad_norm": 0.3158918023109436, "learning_rate": 4.237431136358579e-06, "loss": 0.0732, "step": 17140 }, { "epoch": 65.45801526717557, "grad_norm": 0.4324190318584442, "learning_rate": 4.231984512607471e-06, "loss": 0.0729, "step": 17150 }, { "epoch": 65.49618320610686, "grad_norm": 0.522308886051178, "learning_rate": 4.226538822067598e-06, "loss": 0.0781, "step": 17160 }, { "epoch": 65.53435114503817, "grad_norm": 0.7167192697525024, "learning_rate": 4.22109407135599e-06, "loss": 0.0677, "step": 17170 }, { "epoch": 65.57251908396947, "grad_norm": 0.4006265103816986, "learning_rate": 4.2156502670885304e-06, "loss": 0.0681, "step": 17180 }, { "epoch": 65.61068702290076, "grad_norm": 0.246806800365448, "learning_rate": 4.210207415879953e-06, "loss": 0.0702, "step": 17190 }, { "epoch": 65.64885496183206, "grad_norm": 0.3185785710811615, "learning_rate": 4.204765524343841e-06, "loss": 0.0648, "step": 17200 }, { "epoch": 65.68702290076335, "grad_norm": 0.2700112760066986, "learning_rate": 4.199324599092603e-06, "loss": 0.0742, "step": 17210 }, { "epoch": 65.72519083969466, "grad_norm": 0.7986395955085754, "learning_rate": 4.1938846467374745e-06, "loss": 0.0727, "step": 17220 }, { "epoch": 65.76335877862596, "grad_norm": 0.34733664989471436, "learning_rate": 4.1884456738885125e-06, "loss": 0.0647, "step": 17230 }, { "epoch": 65.80152671755725, "grad_norm": 0.35277488827705383, "learning_rate": 4.18300768715458e-06, "loss": 0.0784, "step": 17240 }, { "epoch": 65.83969465648855, "grad_norm": 0.31571856141090393, "learning_rate": 4.177570693143347e-06, "loss": 0.0736, "step": 17250 }, { "epoch": 65.87786259541984, "grad_norm": 0.47372332215309143, "learning_rate": 4.172134698461271e-06, "loss": 0.0749, "step": 17260 }, { "epoch": 65.91603053435115, "grad_norm": 0.3981868624687195, "learning_rate": 4.166699709713599e-06, "loss": 0.0677, "step": 17270 }, { "epoch": 65.95419847328245, "grad_norm": 0.4929441511631012, "learning_rate": 4.161265733504355e-06, "loss": 0.0733, "step": 17280 }, { "epoch": 65.99236641221374, "grad_norm": 0.24368400871753693, "learning_rate": 4.155832776436331e-06, "loss": 0.0687, "step": 17290 }, { "epoch": 66.03053435114504, "grad_norm": 0.3198246359825134, "learning_rate": 4.150400845111085e-06, "loss": 0.0813, "step": 17300 }, { "epoch": 66.06870229007633, "grad_norm": 0.288518488407135, "learning_rate": 4.144969946128923e-06, "loss": 0.072, "step": 17310 }, { "epoch": 66.10687022900764, "grad_norm": 0.2651676833629608, "learning_rate": 4.139540086088901e-06, "loss": 0.0696, "step": 17320 }, { "epoch": 66.14503816793894, "grad_norm": 0.21474707126617432, "learning_rate": 4.1341112715888106e-06, "loss": 0.0711, "step": 17330 }, { "epoch": 66.18320610687023, "grad_norm": 0.3536648452281952, "learning_rate": 4.128683509225172e-06, "loss": 0.0783, "step": 17340 }, { "epoch": 66.22137404580153, "grad_norm": 0.3147779703140259, "learning_rate": 4.123256805593231e-06, "loss": 0.0691, "step": 17350 }, { "epoch": 66.25954198473282, "grad_norm": 0.45088210701942444, "learning_rate": 4.117831167286943e-06, "loss": 0.0716, "step": 17360 }, { "epoch": 66.29770992366412, "grad_norm": 0.4173077940940857, "learning_rate": 4.112406600898968e-06, "loss": 0.0756, "step": 17370 }, { "epoch": 66.33587786259542, "grad_norm": 0.2509874403476715, "learning_rate": 4.106983113020669e-06, "loss": 0.0699, "step": 17380 }, { "epoch": 66.37404580152672, "grad_norm": 0.5545008778572083, "learning_rate": 4.101560710242094e-06, "loss": 0.0757, "step": 17390 }, { "epoch": 66.41221374045801, "grad_norm": 0.3343713879585266, "learning_rate": 4.096139399151971e-06, "loss": 0.0713, "step": 17400 }, { "epoch": 66.45038167938931, "grad_norm": 0.40432122349739075, "learning_rate": 4.090719186337709e-06, "loss": 0.0685, "step": 17410 }, { "epoch": 66.4885496183206, "grad_norm": 0.8056888580322266, "learning_rate": 4.085300078385375e-06, "loss": 0.0753, "step": 17420 }, { "epoch": 66.52671755725191, "grad_norm": 0.43857723474502563, "learning_rate": 4.079882081879696e-06, "loss": 0.0714, "step": 17430 }, { "epoch": 66.56488549618321, "grad_norm": 0.8525163531303406, "learning_rate": 4.074465203404048e-06, "loss": 0.0833, "step": 17440 }, { "epoch": 66.6030534351145, "grad_norm": 0.7485587000846863, "learning_rate": 4.06904944954045e-06, "loss": 0.07, "step": 17450 }, { "epoch": 66.6412213740458, "grad_norm": 0.22170186042785645, "learning_rate": 4.063634826869553e-06, "loss": 0.0746, "step": 17460 }, { "epoch": 66.6793893129771, "grad_norm": 0.40687039494514465, "learning_rate": 4.058221341970638e-06, "loss": 0.0709, "step": 17470 }, { "epoch": 66.7175572519084, "grad_norm": 0.3101550340652466, "learning_rate": 4.052809001421595e-06, "loss": 0.0653, "step": 17480 }, { "epoch": 66.7557251908397, "grad_norm": 0.4546465277671814, "learning_rate": 4.047397811798929e-06, "loss": 0.0698, "step": 17490 }, { "epoch": 66.79389312977099, "grad_norm": 0.35028156638145447, "learning_rate": 4.041987779677745e-06, "loss": 0.0774, "step": 17500 }, { "epoch": 66.83206106870229, "grad_norm": 0.47379425168037415, "learning_rate": 4.036578911631746e-06, "loss": 0.0726, "step": 17510 }, { "epoch": 66.87022900763358, "grad_norm": 0.6644511222839355, "learning_rate": 4.0311712142332115e-06, "loss": 0.069, "step": 17520 }, { "epoch": 66.90839694656489, "grad_norm": 0.6375014185905457, "learning_rate": 4.025764694053008e-06, "loss": 0.0676, "step": 17530 }, { "epoch": 66.94656488549619, "grad_norm": 0.39866891503334045, "learning_rate": 4.020359357660566e-06, "loss": 0.0773, "step": 17540 }, { "epoch": 66.98473282442748, "grad_norm": 0.31311362981796265, "learning_rate": 4.014955211623875e-06, "loss": 0.0661, "step": 17550 }, { "epoch": 67.02290076335878, "grad_norm": 0.42248404026031494, "learning_rate": 4.00955226250949e-06, "loss": 0.0787, "step": 17560 }, { "epoch": 67.06106870229007, "grad_norm": 0.7026179432868958, "learning_rate": 4.0041505168824976e-06, "loss": 0.0755, "step": 17570 }, { "epoch": 67.09923664122137, "grad_norm": 0.32166993618011475, "learning_rate": 3.99874998130653e-06, "loss": 0.0659, "step": 17580 }, { "epoch": 67.13740458015268, "grad_norm": 0.2971125543117523, "learning_rate": 3.993350662343746e-06, "loss": 0.0675, "step": 17590 }, { "epoch": 67.17557251908397, "grad_norm": 0.3091699182987213, "learning_rate": 3.987952566554828e-06, "loss": 0.0719, "step": 17600 }, { "epoch": 67.21374045801527, "grad_norm": 0.24681086838245392, "learning_rate": 3.982555700498971e-06, "loss": 0.0732, "step": 17610 }, { "epoch": 67.25190839694656, "grad_norm": 0.4795871078968048, "learning_rate": 3.977160070733878e-06, "loss": 0.0691, "step": 17620 }, { "epoch": 67.29007633587786, "grad_norm": 0.6124231815338135, "learning_rate": 3.971765683815746e-06, "loss": 0.0702, "step": 17630 }, { "epoch": 67.32824427480917, "grad_norm": 0.659125030040741, "learning_rate": 3.966372546299264e-06, "loss": 0.0744, "step": 17640 }, { "epoch": 67.36641221374046, "grad_norm": 0.6344574689865112, "learning_rate": 3.960980664737604e-06, "loss": 0.0723, "step": 17650 }, { "epoch": 67.40458015267176, "grad_norm": 0.3194618225097656, "learning_rate": 3.955590045682408e-06, "loss": 0.0714, "step": 17660 }, { "epoch": 67.44274809160305, "grad_norm": 0.44845250248908997, "learning_rate": 3.950200695683788e-06, "loss": 0.0723, "step": 17670 }, { "epoch": 67.48091603053435, "grad_norm": 1.0037466287612915, "learning_rate": 3.944812621290314e-06, "loss": 0.0691, "step": 17680 }, { "epoch": 67.51908396946565, "grad_norm": 0.2766275107860565, "learning_rate": 3.939425829049002e-06, "loss": 0.0717, "step": 17690 }, { "epoch": 67.55725190839695, "grad_norm": 0.654982328414917, "learning_rate": 3.934040325505313e-06, "loss": 0.0672, "step": 17700 }, { "epoch": 67.59541984732824, "grad_norm": 0.36373141407966614, "learning_rate": 3.928656117203141e-06, "loss": 0.0713, "step": 17710 }, { "epoch": 67.63358778625954, "grad_norm": 0.34860584139823914, "learning_rate": 3.923273210684809e-06, "loss": 0.0757, "step": 17720 }, { "epoch": 67.67175572519083, "grad_norm": 0.5683857202529907, "learning_rate": 3.917891612491055e-06, "loss": 0.0675, "step": 17730 }, { "epoch": 67.70992366412214, "grad_norm": 0.5951647162437439, "learning_rate": 3.912511329161027e-06, "loss": 0.0702, "step": 17740 }, { "epoch": 67.74809160305344, "grad_norm": 0.27941733598709106, "learning_rate": 3.907132367232279e-06, "loss": 0.0696, "step": 17750 }, { "epoch": 67.78625954198473, "grad_norm": 0.27847856283187866, "learning_rate": 3.901754733240753e-06, "loss": 0.0727, "step": 17760 }, { "epoch": 67.82442748091603, "grad_norm": 0.5478169322013855, "learning_rate": 3.896378433720786e-06, "loss": 0.0719, "step": 17770 }, { "epoch": 67.86259541984732, "grad_norm": 0.5233021378517151, "learning_rate": 3.891003475205086e-06, "loss": 0.065, "step": 17780 }, { "epoch": 67.90076335877862, "grad_norm": 0.2591749131679535, "learning_rate": 3.885629864224736e-06, "loss": 0.0731, "step": 17790 }, { "epoch": 67.93893129770993, "grad_norm": 0.39864397048950195, "learning_rate": 3.880257607309178e-06, "loss": 0.0697, "step": 17800 }, { "epoch": 67.97709923664122, "grad_norm": 0.46604353189468384, "learning_rate": 3.874886710986213e-06, "loss": 0.0708, "step": 17810 }, { "epoch": 68.01526717557252, "grad_norm": 0.35567206144332886, "learning_rate": 3.869517181781983e-06, "loss": 0.0679, "step": 17820 }, { "epoch": 68.05343511450381, "grad_norm": 0.8951524496078491, "learning_rate": 3.864149026220977e-06, "loss": 0.076, "step": 17830 }, { "epoch": 68.09160305343511, "grad_norm": 0.43323707580566406, "learning_rate": 3.858782250826009e-06, "loss": 0.0781, "step": 17840 }, { "epoch": 68.12977099236642, "grad_norm": 0.3977680206298828, "learning_rate": 3.853416862118214e-06, "loss": 0.0766, "step": 17850 }, { "epoch": 68.16793893129771, "grad_norm": 0.7726107239723206, "learning_rate": 3.8480528666170495e-06, "loss": 0.075, "step": 17860 }, { "epoch": 68.20610687022901, "grad_norm": 0.40981167554855347, "learning_rate": 3.8426902708402695e-06, "loss": 0.0697, "step": 17870 }, { "epoch": 68.2442748091603, "grad_norm": 0.23430369794368744, "learning_rate": 3.8373290813039404e-06, "loss": 0.0744, "step": 17880 }, { "epoch": 68.2824427480916, "grad_norm": 0.33602553606033325, "learning_rate": 3.83196930452241e-06, "loss": 0.0729, "step": 17890 }, { "epoch": 68.3206106870229, "grad_norm": 0.49401628971099854, "learning_rate": 3.826610947008313e-06, "loss": 0.0703, "step": 17900 }, { "epoch": 68.3587786259542, "grad_norm": 0.4614551067352295, "learning_rate": 3.821254015272558e-06, "loss": 0.0699, "step": 17910 }, { "epoch": 68.3969465648855, "grad_norm": 0.39021456241607666, "learning_rate": 3.8158985158243214e-06, "loss": 0.0722, "step": 17920 }, { "epoch": 68.43511450381679, "grad_norm": 0.2698560059070587, "learning_rate": 3.810544455171044e-06, "loss": 0.0725, "step": 17930 }, { "epoch": 68.47328244274809, "grad_norm": 0.339725136756897, "learning_rate": 3.805191839818412e-06, "loss": 0.0667, "step": 17940 }, { "epoch": 68.5114503816794, "grad_norm": 0.36522993445396423, "learning_rate": 3.7998406762703566e-06, "loss": 0.0783, "step": 17950 }, { "epoch": 68.54961832061069, "grad_norm": 0.4435858726501465, "learning_rate": 3.794490971029048e-06, "loss": 0.0769, "step": 17960 }, { "epoch": 68.58778625954199, "grad_norm": 0.3447798490524292, "learning_rate": 3.7891427305948815e-06, "loss": 0.0898, "step": 17970 }, { "epoch": 68.62595419847328, "grad_norm": 0.2673998177051544, "learning_rate": 3.7837959614664714e-06, "loss": 0.0806, "step": 17980 }, { "epoch": 68.66412213740458, "grad_norm": 0.3040778636932373, "learning_rate": 3.778450670140651e-06, "loss": 0.0745, "step": 17990 }, { "epoch": 68.70229007633588, "grad_norm": 0.3017938733100891, "learning_rate": 3.773106863112451e-06, "loss": 0.0692, "step": 18000 }, { "epoch": 68.74045801526718, "grad_norm": 0.28814712166786194, "learning_rate": 3.7677645468751e-06, "loss": 0.0656, "step": 18010 }, { "epoch": 68.77862595419847, "grad_norm": 0.5016555190086365, "learning_rate": 3.7624237279200175e-06, "loss": 0.0721, "step": 18020 }, { "epoch": 68.81679389312977, "grad_norm": 0.23837906122207642, "learning_rate": 3.7570844127367994e-06, "loss": 0.0642, "step": 18030 }, { "epoch": 68.85496183206106, "grad_norm": 0.5186642408370972, "learning_rate": 3.7517466078132213e-06, "loss": 0.0689, "step": 18040 }, { "epoch": 68.89312977099236, "grad_norm": 0.4285561442375183, "learning_rate": 3.7464103196352176e-06, "loss": 0.0816, "step": 18050 }, { "epoch": 68.93129770992367, "grad_norm": 0.3345412015914917, "learning_rate": 3.7410755546868803e-06, "loss": 0.0794, "step": 18060 }, { "epoch": 68.96946564885496, "grad_norm": 0.3829120695590973, "learning_rate": 3.7357423194504538e-06, "loss": 0.0698, "step": 18070 }, { "epoch": 69.00763358778626, "grad_norm": 0.327454537153244, "learning_rate": 3.7304106204063186e-06, "loss": 0.0763, "step": 18080 }, { "epoch": 69.04580152671755, "grad_norm": 0.4839470088481903, "learning_rate": 3.725080464032996e-06, "loss": 0.0722, "step": 18090 }, { "epoch": 69.08396946564885, "grad_norm": 0.2581571340560913, "learning_rate": 3.7197518568071256e-06, "loss": 0.0678, "step": 18100 }, { "epoch": 69.12213740458016, "grad_norm": 0.41950273513793945, "learning_rate": 3.7144248052034696e-06, "loss": 0.0769, "step": 18110 }, { "epoch": 69.16030534351145, "grad_norm": 0.35857993364334106, "learning_rate": 3.7090993156948973e-06, "loss": 0.069, "step": 18120 }, { "epoch": 69.19847328244275, "grad_norm": 0.5615084171295166, "learning_rate": 3.7037753947523786e-06, "loss": 0.0648, "step": 18130 }, { "epoch": 69.23664122137404, "grad_norm": 0.32720375061035156, "learning_rate": 3.6984530488449833e-06, "loss": 0.0669, "step": 18140 }, { "epoch": 69.27480916030534, "grad_norm": 0.38296082615852356, "learning_rate": 3.693132284439861e-06, "loss": 0.0698, "step": 18150 }, { "epoch": 69.31297709923665, "grad_norm": 0.39868733286857605, "learning_rate": 3.687813108002242e-06, "loss": 0.0693, "step": 18160 }, { "epoch": 69.35114503816794, "grad_norm": 0.2485743910074234, "learning_rate": 3.6824955259954285e-06, "loss": 0.0765, "step": 18170 }, { "epoch": 69.38931297709924, "grad_norm": 0.3294006884098053, "learning_rate": 3.6771795448807847e-06, "loss": 0.0684, "step": 18180 }, { "epoch": 69.42748091603053, "grad_norm": 0.584002673625946, "learning_rate": 3.6718651711177244e-06, "loss": 0.0775, "step": 18190 }, { "epoch": 69.46564885496183, "grad_norm": 0.3454589545726776, "learning_rate": 3.6665524111637184e-06, "loss": 0.0703, "step": 18200 }, { "epoch": 69.50381679389314, "grad_norm": 0.4234253466129303, "learning_rate": 3.6612412714742695e-06, "loss": 0.07, "step": 18210 }, { "epoch": 69.54198473282443, "grad_norm": 0.8012011051177979, "learning_rate": 3.655931758502912e-06, "loss": 0.0753, "step": 18220 }, { "epoch": 69.58015267175573, "grad_norm": 0.25509560108184814, "learning_rate": 3.6506238787012038e-06, "loss": 0.0704, "step": 18230 }, { "epoch": 69.61832061068702, "grad_norm": 0.4144178628921509, "learning_rate": 3.645317638518721e-06, "loss": 0.0743, "step": 18240 }, { "epoch": 69.65648854961832, "grad_norm": 0.2962321937084198, "learning_rate": 3.6400130444030456e-06, "loss": 0.0815, "step": 18250 }, { "epoch": 69.69465648854961, "grad_norm": 0.6895132660865784, "learning_rate": 3.634710102799761e-06, "loss": 0.0696, "step": 18260 }, { "epoch": 69.73282442748092, "grad_norm": 0.2801784873008728, "learning_rate": 3.62940882015244e-06, "loss": 0.0733, "step": 18270 }, { "epoch": 69.77099236641222, "grad_norm": 0.24803809821605682, "learning_rate": 3.6241092029026405e-06, "loss": 0.0778, "step": 18280 }, { "epoch": 69.80916030534351, "grad_norm": 0.7623275518417358, "learning_rate": 3.6188112574898955e-06, "loss": 0.0768, "step": 18290 }, { "epoch": 69.8473282442748, "grad_norm": 0.2937699556350708, "learning_rate": 3.613514990351712e-06, "loss": 0.0761, "step": 18300 }, { "epoch": 69.8854961832061, "grad_norm": 0.3946186900138855, "learning_rate": 3.608220407923552e-06, "loss": 0.0707, "step": 18310 }, { "epoch": 69.92366412213741, "grad_norm": 0.28563305735588074, "learning_rate": 3.602927516638833e-06, "loss": 0.0698, "step": 18320 }, { "epoch": 69.9618320610687, "grad_norm": 0.25289323925971985, "learning_rate": 3.597636322928917e-06, "loss": 0.0726, "step": 18330 }, { "epoch": 70.0, "grad_norm": 0.21828551590442657, "learning_rate": 3.5923468332231003e-06, "loss": 0.0706, "step": 18340 }, { "epoch": 70.0381679389313, "grad_norm": 0.2835104763507843, "learning_rate": 3.5870590539486163e-06, "loss": 0.0672, "step": 18350 }, { "epoch": 70.07633587786259, "grad_norm": 0.2114870399236679, "learning_rate": 3.5817729915306138e-06, "loss": 0.0672, "step": 18360 }, { "epoch": 70.1145038167939, "grad_norm": 0.33038604259490967, "learning_rate": 3.5764886523921567e-06, "loss": 0.062, "step": 18370 }, { "epoch": 70.1526717557252, "grad_norm": 0.25041481852531433, "learning_rate": 3.571206042954214e-06, "loss": 0.0711, "step": 18380 }, { "epoch": 70.19083969465649, "grad_norm": 0.30329805612564087, "learning_rate": 3.565925169635657e-06, "loss": 0.0814, "step": 18390 }, { "epoch": 70.22900763358778, "grad_norm": 0.6908921599388123, "learning_rate": 3.5606460388532406e-06, "loss": 0.0786, "step": 18400 }, { "epoch": 70.26717557251908, "grad_norm": 0.8706730008125305, "learning_rate": 3.5553686570216116e-06, "loss": 0.0758, "step": 18410 }, { "epoch": 70.30534351145039, "grad_norm": 0.6713736057281494, "learning_rate": 3.5500930305532845e-06, "loss": 0.0827, "step": 18420 }, { "epoch": 70.34351145038168, "grad_norm": 0.4993293583393097, "learning_rate": 3.5448191658586423e-06, "loss": 0.0777, "step": 18430 }, { "epoch": 70.38167938931298, "grad_norm": 0.7732614874839783, "learning_rate": 3.5395470693459267e-06, "loss": 0.0762, "step": 18440 }, { "epoch": 70.41984732824427, "grad_norm": 0.37474754452705383, "learning_rate": 3.5342767474212344e-06, "loss": 0.0741, "step": 18450 }, { "epoch": 70.45801526717557, "grad_norm": 0.6375201344490051, "learning_rate": 3.5290082064885025e-06, "loss": 0.0704, "step": 18460 }, { "epoch": 70.49618320610686, "grad_norm": 0.3896820545196533, "learning_rate": 3.5237414529495056e-06, "loss": 0.0739, "step": 18470 }, { "epoch": 70.53435114503817, "grad_norm": 0.26550376415252686, "learning_rate": 3.5184764932038457e-06, "loss": 0.0941, "step": 18480 }, { "epoch": 70.57251908396947, "grad_norm": 0.41228294372558594, "learning_rate": 3.513213333648945e-06, "loss": 0.0693, "step": 18490 }, { "epoch": 70.61068702290076, "grad_norm": 0.3219086825847626, "learning_rate": 3.507951980680037e-06, "loss": 0.0706, "step": 18500 }, { "epoch": 70.64885496183206, "grad_norm": 0.2977215051651001, "learning_rate": 3.502692440690165e-06, "loss": 0.0728, "step": 18510 }, { "epoch": 70.68702290076335, "grad_norm": 0.45944830775260925, "learning_rate": 3.497434720070165e-06, "loss": 0.0658, "step": 18520 }, { "epoch": 70.72519083969466, "grad_norm": 0.45942363142967224, "learning_rate": 3.492178825208662e-06, "loss": 0.0708, "step": 18530 }, { "epoch": 70.76335877862596, "grad_norm": 0.401991069316864, "learning_rate": 3.486924762492065e-06, "loss": 0.0772, "step": 18540 }, { "epoch": 70.80152671755725, "grad_norm": 0.390428751707077, "learning_rate": 3.4816725383045534e-06, "loss": 0.0712, "step": 18550 }, { "epoch": 70.83969465648855, "grad_norm": 0.36165851354599, "learning_rate": 3.476422159028079e-06, "loss": 0.0714, "step": 18560 }, { "epoch": 70.87786259541984, "grad_norm": 0.3357897996902466, "learning_rate": 3.471173631042345e-06, "loss": 0.0746, "step": 18570 }, { "epoch": 70.91603053435115, "grad_norm": 0.9048196077346802, "learning_rate": 3.465926960724808e-06, "loss": 0.0731, "step": 18580 }, { "epoch": 70.95419847328245, "grad_norm": 0.34917864203453064, "learning_rate": 3.4606821544506664e-06, "loss": 0.0722, "step": 18590 }, { "epoch": 70.99236641221374, "grad_norm": 0.2705357074737549, "learning_rate": 3.4554392185928563e-06, "loss": 0.0867, "step": 18600 }, { "epoch": 71.03053435114504, "grad_norm": 0.27343451976776123, "learning_rate": 3.450198159522037e-06, "loss": 0.0635, "step": 18610 }, { "epoch": 71.06870229007633, "grad_norm": 0.48020657896995544, "learning_rate": 3.444958983606592e-06, "loss": 0.0731, "step": 18620 }, { "epoch": 71.10687022900764, "grad_norm": 0.25576311349868774, "learning_rate": 3.4397216972126126e-06, "loss": 0.0738, "step": 18630 }, { "epoch": 71.14503816793894, "grad_norm": 0.3990323543548584, "learning_rate": 3.434486306703896e-06, "loss": 0.0861, "step": 18640 }, { "epoch": 71.18320610687023, "grad_norm": 0.25410884618759155, "learning_rate": 3.429252818441935e-06, "loss": 0.0759, "step": 18650 }, { "epoch": 71.22137404580153, "grad_norm": 0.2940288782119751, "learning_rate": 3.4240212387859097e-06, "loss": 0.0745, "step": 18660 }, { "epoch": 71.25954198473282, "grad_norm": 0.5581989288330078, "learning_rate": 3.4187915740926856e-06, "loss": 0.0738, "step": 18670 }, { "epoch": 71.29770992366412, "grad_norm": 0.3819045424461365, "learning_rate": 3.4135638307167962e-06, "loss": 0.0729, "step": 18680 }, { "epoch": 71.33587786259542, "grad_norm": 0.34871965646743774, "learning_rate": 3.408338015010445e-06, "loss": 0.0729, "step": 18690 }, { "epoch": 71.37404580152672, "grad_norm": 0.3207714557647705, "learning_rate": 3.4031141333234895e-06, "loss": 0.0706, "step": 18700 }, { "epoch": 71.41221374045801, "grad_norm": 0.27491647005081177, "learning_rate": 3.397892192003437e-06, "loss": 0.0704, "step": 18710 }, { "epoch": 71.45038167938931, "grad_norm": 0.3418438732624054, "learning_rate": 3.392672197395441e-06, "loss": 0.0685, "step": 18720 }, { "epoch": 71.4885496183206, "grad_norm": 0.8138360381126404, "learning_rate": 3.3874541558422874e-06, "loss": 0.0704, "step": 18730 }, { "epoch": 71.52671755725191, "grad_norm": 0.2727760672569275, "learning_rate": 3.3822380736843865e-06, "loss": 0.087, "step": 18740 }, { "epoch": 71.56488549618321, "grad_norm": 0.5229569673538208, "learning_rate": 3.3770239572597715e-06, "loss": 0.0794, "step": 18750 }, { "epoch": 71.6030534351145, "grad_norm": 0.3278786540031433, "learning_rate": 3.3718118129040833e-06, "loss": 0.0712, "step": 18760 }, { "epoch": 71.6412213740458, "grad_norm": 0.2960107624530792, "learning_rate": 3.3666016469505725e-06, "loss": 0.0671, "step": 18770 }, { "epoch": 71.6793893129771, "grad_norm": 0.5897856950759888, "learning_rate": 3.3613934657300793e-06, "loss": 0.0704, "step": 18780 }, { "epoch": 71.7175572519084, "grad_norm": 0.30159324407577515, "learning_rate": 3.356187275571037e-06, "loss": 0.0683, "step": 18790 }, { "epoch": 71.7557251908397, "grad_norm": 0.32948631048202515, "learning_rate": 3.350983082799456e-06, "loss": 0.0779, "step": 18800 }, { "epoch": 71.79389312977099, "grad_norm": 0.3158397078514099, "learning_rate": 3.34578089373892e-06, "loss": 0.0723, "step": 18810 }, { "epoch": 71.83206106870229, "grad_norm": 0.32471826672554016, "learning_rate": 3.3405807147105814e-06, "loss": 0.0667, "step": 18820 }, { "epoch": 71.87022900763358, "grad_norm": 0.5005679130554199, "learning_rate": 3.3353825520331466e-06, "loss": 0.0684, "step": 18830 }, { "epoch": 71.90839694656489, "grad_norm": 0.9290713667869568, "learning_rate": 3.330186412022876e-06, "loss": 0.0772, "step": 18840 }, { "epoch": 71.94656488549619, "grad_norm": 0.9230539202690125, "learning_rate": 3.324992300993568e-06, "loss": 0.0702, "step": 18850 }, { "epoch": 71.98473282442748, "grad_norm": 0.2002493143081665, "learning_rate": 3.3198002252565564e-06, "loss": 0.0724, "step": 18860 }, { "epoch": 72.02290076335878, "grad_norm": 0.7362732887268066, "learning_rate": 3.3146101911207024e-06, "loss": 0.0693, "step": 18870 }, { "epoch": 72.06106870229007, "grad_norm": 0.6342445611953735, "learning_rate": 3.3094222048923895e-06, "loss": 0.0692, "step": 18880 }, { "epoch": 72.09923664122137, "grad_norm": 0.997917652130127, "learning_rate": 3.3042362728755084e-06, "loss": 0.0727, "step": 18890 }, { "epoch": 72.13740458015268, "grad_norm": 0.3136437237262726, "learning_rate": 3.2990524013714565e-06, "loss": 0.0626, "step": 18900 }, { "epoch": 72.17557251908397, "grad_norm": 0.5527306795120239, "learning_rate": 3.293870596679125e-06, "loss": 0.067, "step": 18910 }, { "epoch": 72.21374045801527, "grad_norm": 0.3831974267959595, "learning_rate": 3.288690865094895e-06, "loss": 0.0655, "step": 18920 }, { "epoch": 72.25190839694656, "grad_norm": 0.297125905752182, "learning_rate": 3.283513212912632e-06, "loss": 0.0747, "step": 18930 }, { "epoch": 72.29007633587786, "grad_norm": 0.30272427201271057, "learning_rate": 3.278337646423669e-06, "loss": 0.0821, "step": 18940 }, { "epoch": 72.32824427480917, "grad_norm": 0.4911503493785858, "learning_rate": 3.273164171916806e-06, "loss": 0.0715, "step": 18950 }, { "epoch": 72.36641221374046, "grad_norm": 0.8124505281448364, "learning_rate": 3.267992795678306e-06, "loss": 0.0676, "step": 18960 }, { "epoch": 72.40458015267176, "grad_norm": 0.2938520610332489, "learning_rate": 3.262823523991875e-06, "loss": 0.0754, "step": 18970 }, { "epoch": 72.44274809160305, "grad_norm": 0.26553019881248474, "learning_rate": 3.2576563631386694e-06, "loss": 0.0733, "step": 18980 }, { "epoch": 72.48091603053435, "grad_norm": 0.3607490062713623, "learning_rate": 3.2524913193972747e-06, "loss": 0.074, "step": 18990 }, { "epoch": 72.51908396946565, "grad_norm": 0.5048254132270813, "learning_rate": 3.247328399043706e-06, "loss": 0.0707, "step": 19000 }, { "epoch": 72.55725190839695, "grad_norm": 0.3066476583480835, "learning_rate": 3.2421676083513987e-06, "loss": 0.0728, "step": 19010 }, { "epoch": 72.59541984732824, "grad_norm": 0.2978527843952179, "learning_rate": 3.2370089535911988e-06, "loss": 0.0721, "step": 19020 }, { "epoch": 72.63358778625954, "grad_norm": 0.2315007746219635, "learning_rate": 3.2318524410313602e-06, "loss": 0.0722, "step": 19030 }, { "epoch": 72.67175572519083, "grad_norm": 0.5974384546279907, "learning_rate": 3.22669807693753e-06, "loss": 0.0716, "step": 19040 }, { "epoch": 72.70992366412214, "grad_norm": 0.2158258557319641, "learning_rate": 3.2215458675727497e-06, "loss": 0.0687, "step": 19050 }, { "epoch": 72.74809160305344, "grad_norm": 0.23273561894893646, "learning_rate": 3.2163958191974375e-06, "loss": 0.0676, "step": 19060 }, { "epoch": 72.78625954198473, "grad_norm": 0.9219455122947693, "learning_rate": 3.211247938069387e-06, "loss": 0.0708, "step": 19070 }, { "epoch": 72.82442748091603, "grad_norm": 0.949112057685852, "learning_rate": 3.2061022304437596e-06, "loss": 0.0779, "step": 19080 }, { "epoch": 72.86259541984732, "grad_norm": 0.3178614675998688, "learning_rate": 3.2009587025730765e-06, "loss": 0.0762, "step": 19090 }, { "epoch": 72.90076335877862, "grad_norm": 0.3184674084186554, "learning_rate": 3.1958173607072075e-06, "loss": 0.071, "step": 19100 }, { "epoch": 72.93893129770993, "grad_norm": 0.6181210875511169, "learning_rate": 3.1906782110933698e-06, "loss": 0.0842, "step": 19110 }, { "epoch": 72.97709923664122, "grad_norm": 0.4641569256782532, "learning_rate": 3.1855412599761137e-06, "loss": 0.0761, "step": 19120 }, { "epoch": 73.01526717557252, "grad_norm": 0.8154666423797607, "learning_rate": 3.1804065135973165e-06, "loss": 0.0765, "step": 19130 }, { "epoch": 73.05343511450381, "grad_norm": 0.4169461131095886, "learning_rate": 3.175273978196184e-06, "loss": 0.0796, "step": 19140 }, { "epoch": 73.09160305343511, "grad_norm": 0.3701060712337494, "learning_rate": 3.1701436600092283e-06, "loss": 0.0755, "step": 19150 }, { "epoch": 73.12977099236642, "grad_norm": 0.49110186100006104, "learning_rate": 3.16501556527027e-06, "loss": 0.0661, "step": 19160 }, { "epoch": 73.16793893129771, "grad_norm": 0.2956679165363312, "learning_rate": 3.1598897002104266e-06, "loss": 0.0703, "step": 19170 }, { "epoch": 73.20610687022901, "grad_norm": 0.35779961943626404, "learning_rate": 3.1547660710581087e-06, "loss": 0.074, "step": 19180 }, { "epoch": 73.2442748091603, "grad_norm": 0.3238672614097595, "learning_rate": 3.149644684039008e-06, "loss": 0.0714, "step": 19190 }, { "epoch": 73.2824427480916, "grad_norm": 0.3601135015487671, "learning_rate": 3.144525545376095e-06, "loss": 0.0733, "step": 19200 }, { "epoch": 73.3206106870229, "grad_norm": 0.3360394537448883, "learning_rate": 3.1394086612896035e-06, "loss": 0.0708, "step": 19210 }, { "epoch": 73.3587786259542, "grad_norm": 0.6372358202934265, "learning_rate": 3.1342940379970315e-06, "loss": 0.0712, "step": 19220 }, { "epoch": 73.3969465648855, "grad_norm": 0.44446098804473877, "learning_rate": 3.129181681713127e-06, "loss": 0.0729, "step": 19230 }, { "epoch": 73.43511450381679, "grad_norm": 0.2544264793395996, "learning_rate": 3.1240715986498856e-06, "loss": 0.0775, "step": 19240 }, { "epoch": 73.47328244274809, "grad_norm": 0.30455854535102844, "learning_rate": 3.1189637950165398e-06, "loss": 0.0663, "step": 19250 }, { "epoch": 73.5114503816794, "grad_norm": 0.28932657837867737, "learning_rate": 3.1138582770195547e-06, "loss": 0.0689, "step": 19260 }, { "epoch": 73.54961832061069, "grad_norm": 0.40639564394950867, "learning_rate": 3.1087550508626145e-06, "loss": 0.0699, "step": 19270 }, { "epoch": 73.58778625954199, "grad_norm": 0.3635731041431427, "learning_rate": 3.1036541227466204e-06, "loss": 0.0744, "step": 19280 }, { "epoch": 73.62595419847328, "grad_norm": 0.25109219551086426, "learning_rate": 3.098555498869679e-06, "loss": 0.0765, "step": 19290 }, { "epoch": 73.66412213740458, "grad_norm": 0.3934338390827179, "learning_rate": 3.093459185427102e-06, "loss": 0.0739, "step": 19300 }, { "epoch": 73.70229007633588, "grad_norm": 0.2998042106628418, "learning_rate": 3.088365188611391e-06, "loss": 0.0724, "step": 19310 }, { "epoch": 73.74045801526718, "grad_norm": 0.36657044291496277, "learning_rate": 3.0832735146122295e-06, "loss": 0.0676, "step": 19320 }, { "epoch": 73.77862595419847, "grad_norm": 0.6263729929924011, "learning_rate": 3.078184169616485e-06, "loss": 0.0705, "step": 19330 }, { "epoch": 73.81679389312977, "grad_norm": 0.6651374101638794, "learning_rate": 3.073097159808187e-06, "loss": 0.0761, "step": 19340 }, { "epoch": 73.85496183206106, "grad_norm": 0.33448323607444763, "learning_rate": 3.068012491368537e-06, "loss": 0.0698, "step": 19350 }, { "epoch": 73.89312977099236, "grad_norm": 0.2528843879699707, "learning_rate": 3.0629301704758846e-06, "loss": 0.0692, "step": 19360 }, { "epoch": 73.93129770992367, "grad_norm": 0.36234578490257263, "learning_rate": 3.0578502033057288e-06, "loss": 0.0729, "step": 19370 }, { "epoch": 73.96946564885496, "grad_norm": 0.2573143243789673, "learning_rate": 3.0527725960307083e-06, "loss": 0.0715, "step": 19380 }, { "epoch": 74.00763358778626, "grad_norm": 0.3500208556652069, "learning_rate": 3.0476973548205945e-06, "loss": 0.0688, "step": 19390 }, { "epoch": 74.04580152671755, "grad_norm": 0.3336540460586548, "learning_rate": 3.042624485842285e-06, "loss": 0.0645, "step": 19400 }, { "epoch": 74.08396946564885, "grad_norm": 0.2523120641708374, "learning_rate": 3.0375539952597943e-06, "loss": 0.0718, "step": 19410 }, { "epoch": 74.12213740458016, "grad_norm": 0.38191351294517517, "learning_rate": 3.0324858892342467e-06, "loss": 0.0698, "step": 19420 }, { "epoch": 74.16030534351145, "grad_norm": 0.2716592252254486, "learning_rate": 3.027420173923867e-06, "loss": 0.0686, "step": 19430 }, { "epoch": 74.19847328244275, "grad_norm": 0.4997287094593048, "learning_rate": 3.022356855483979e-06, "loss": 0.0691, "step": 19440 }, { "epoch": 74.23664122137404, "grad_norm": 0.2641426622867584, "learning_rate": 3.017295940066989e-06, "loss": 0.0725, "step": 19450 }, { "epoch": 74.27480916030534, "grad_norm": 0.23110733926296234, "learning_rate": 3.0122374338223905e-06, "loss": 0.0732, "step": 19460 }, { "epoch": 74.31297709923665, "grad_norm": 0.31251829862594604, "learning_rate": 3.007181342896743e-06, "loss": 0.068, "step": 19470 }, { "epoch": 74.35114503816794, "grad_norm": 0.39099785685539246, "learning_rate": 3.0021276734336744e-06, "loss": 0.0712, "step": 19480 }, { "epoch": 74.38931297709924, "grad_norm": 0.26635631918907166, "learning_rate": 2.997076431573871e-06, "loss": 0.0722, "step": 19490 }, { "epoch": 74.42748091603053, "grad_norm": 0.3487379848957062, "learning_rate": 2.9920276234550636e-06, "loss": 0.0711, "step": 19500 }, { "epoch": 74.46564885496183, "grad_norm": 0.33060070872306824, "learning_rate": 2.9869812552120355e-06, "loss": 0.0738, "step": 19510 }, { "epoch": 74.50381679389314, "grad_norm": 0.3587673604488373, "learning_rate": 2.9819373329765977e-06, "loss": 0.0679, "step": 19520 }, { "epoch": 74.54198473282443, "grad_norm": 0.33571016788482666, "learning_rate": 2.97689586287759e-06, "loss": 0.0735, "step": 19530 }, { "epoch": 74.58015267175573, "grad_norm": 0.5554803609848022, "learning_rate": 2.9718568510408763e-06, "loss": 0.0672, "step": 19540 }, { "epoch": 74.61832061068702, "grad_norm": 0.35926058888435364, "learning_rate": 2.966820303589327e-06, "loss": 0.0747, "step": 19550 }, { "epoch": 74.65648854961832, "grad_norm": 0.2982501685619354, "learning_rate": 2.961786226642829e-06, "loss": 0.0716, "step": 19560 }, { "epoch": 74.69465648854961, "grad_norm": 0.35031527280807495, "learning_rate": 2.9567546263182554e-06, "loss": 0.0742, "step": 19570 }, { "epoch": 74.73282442748092, "grad_norm": 0.6128705143928528, "learning_rate": 2.951725508729476e-06, "loss": 0.0731, "step": 19580 }, { "epoch": 74.77099236641222, "grad_norm": 0.2246423214673996, "learning_rate": 2.9466988799873443e-06, "loss": 0.0675, "step": 19590 }, { "epoch": 74.80916030534351, "grad_norm": 0.2989789843559265, "learning_rate": 2.9416747461996853e-06, "loss": 0.0762, "step": 19600 }, { "epoch": 74.8473282442748, "grad_norm": 0.34754475951194763, "learning_rate": 2.9366531134712974e-06, "loss": 0.0699, "step": 19610 }, { "epoch": 74.8854961832061, "grad_norm": 0.37204280495643616, "learning_rate": 2.931633987903937e-06, "loss": 0.0711, "step": 19620 }, { "epoch": 74.92366412213741, "grad_norm": 0.5289718508720398, "learning_rate": 2.926617375596317e-06, "loss": 0.0818, "step": 19630 }, { "epoch": 74.9618320610687, "grad_norm": 0.3462826907634735, "learning_rate": 2.9216032826440927e-06, "loss": 0.0702, "step": 19640 }, { "epoch": 75.0, "grad_norm": 0.29999420046806335, "learning_rate": 2.9165917151398594e-06, "loss": 0.072, "step": 19650 }, { "epoch": 75.0381679389313, "grad_norm": 0.28326013684272766, "learning_rate": 2.9115826791731426e-06, "loss": 0.0695, "step": 19660 }, { "epoch": 75.07633587786259, "grad_norm": 0.7487019300460815, "learning_rate": 2.9065761808303983e-06, "loss": 0.0744, "step": 19670 }, { "epoch": 75.1145038167939, "grad_norm": 0.24250933527946472, "learning_rate": 2.9015722261949918e-06, "loss": 0.0716, "step": 19680 }, { "epoch": 75.1526717557252, "grad_norm": 0.272839218378067, "learning_rate": 2.8965708213471987e-06, "loss": 0.0738, "step": 19690 }, { "epoch": 75.19083969465649, "grad_norm": 0.26355302333831787, "learning_rate": 2.891571972364198e-06, "loss": 0.0717, "step": 19700 }, { "epoch": 75.22900763358778, "grad_norm": 0.28697460889816284, "learning_rate": 2.8865756853200605e-06, "loss": 0.0708, "step": 19710 }, { "epoch": 75.26717557251908, "grad_norm": 0.5366164445877075, "learning_rate": 2.8815819662857505e-06, "loss": 0.0696, "step": 19720 }, { "epoch": 75.30534351145039, "grad_norm": 0.5985289812088013, "learning_rate": 2.876590821329105e-06, "loss": 0.0767, "step": 19730 }, { "epoch": 75.34351145038168, "grad_norm": 1.137639045715332, "learning_rate": 2.8716022565148362e-06, "loss": 0.0733, "step": 19740 }, { "epoch": 75.38167938931298, "grad_norm": 0.27952316403388977, "learning_rate": 2.8666162779045205e-06, "loss": 0.0733, "step": 19750 }, { "epoch": 75.41984732824427, "grad_norm": 1.1592620611190796, "learning_rate": 2.8616328915565907e-06, "loss": 0.0744, "step": 19760 }, { "epoch": 75.45801526717557, "grad_norm": 0.5299224257469177, "learning_rate": 2.856652103526334e-06, "loss": 0.0706, "step": 19770 }, { "epoch": 75.49618320610686, "grad_norm": 0.2791510224342346, "learning_rate": 2.8516739198658753e-06, "loss": 0.0666, "step": 19780 }, { "epoch": 75.53435114503817, "grad_norm": 0.32924363017082214, "learning_rate": 2.8466983466241772e-06, "loss": 0.0777, "step": 19790 }, { "epoch": 75.57251908396947, "grad_norm": 0.4635617434978485, "learning_rate": 2.841725389847032e-06, "loss": 0.0685, "step": 19800 }, { "epoch": 75.61068702290076, "grad_norm": 0.42956969141960144, "learning_rate": 2.8367550555770507e-06, "loss": 0.0747, "step": 19810 }, { "epoch": 75.64885496183206, "grad_norm": 0.3257053792476654, "learning_rate": 2.8317873498536554e-06, "loss": 0.0705, "step": 19820 }, { "epoch": 75.68702290076335, "grad_norm": 0.3546193540096283, "learning_rate": 2.8268222787130805e-06, "loss": 0.0747, "step": 19830 }, { "epoch": 75.72519083969466, "grad_norm": 0.23434428870677948, "learning_rate": 2.8218598481883552e-06, "loss": 0.0728, "step": 19840 }, { "epoch": 75.76335877862596, "grad_norm": 0.3509843647480011, "learning_rate": 2.816900064309299e-06, "loss": 0.0685, "step": 19850 }, { "epoch": 75.80152671755725, "grad_norm": 0.350881963968277, "learning_rate": 2.811942933102517e-06, "loss": 0.0668, "step": 19860 }, { "epoch": 75.83969465648855, "grad_norm": 0.2302703559398651, "learning_rate": 2.8069884605913912e-06, "loss": 0.0679, "step": 19870 }, { "epoch": 75.87786259541984, "grad_norm": 0.39022761583328247, "learning_rate": 2.802036652796074e-06, "loss": 0.0718, "step": 19880 }, { "epoch": 75.91603053435115, "grad_norm": 0.536963939666748, "learning_rate": 2.797087515733478e-06, "loss": 0.0688, "step": 19890 }, { "epoch": 75.95419847328245, "grad_norm": 0.33239343762397766, "learning_rate": 2.7921410554172724e-06, "loss": 0.0714, "step": 19900 }, { "epoch": 75.99236641221374, "grad_norm": 0.2767488658428192, "learning_rate": 2.787197277857871e-06, "loss": 0.0702, "step": 19910 }, { "epoch": 76.03053435114504, "grad_norm": 0.30666476488113403, "learning_rate": 2.7822561890624287e-06, "loss": 0.0674, "step": 19920 }, { "epoch": 76.06870229007633, "grad_norm": 0.2637414038181305, "learning_rate": 2.777317795034839e-06, "loss": 0.0684, "step": 19930 }, { "epoch": 76.10687022900764, "grad_norm": 0.23923492431640625, "learning_rate": 2.772382101775711e-06, "loss": 0.0719, "step": 19940 }, { "epoch": 76.14503816793894, "grad_norm": 0.8826484084129333, "learning_rate": 2.7674491152823825e-06, "loss": 0.0727, "step": 19950 }, { "epoch": 76.18320610687023, "grad_norm": 0.6208845973014832, "learning_rate": 2.7625188415488946e-06, "loss": 0.0715, "step": 19960 }, { "epoch": 76.22137404580153, "grad_norm": 0.3055213987827301, "learning_rate": 2.7575912865659925e-06, "loss": 0.0719, "step": 19970 }, { "epoch": 76.25954198473282, "grad_norm": 0.3592512011528015, "learning_rate": 2.752666456321125e-06, "loss": 0.0659, "step": 19980 }, { "epoch": 76.29770992366412, "grad_norm": 0.38851675391197205, "learning_rate": 2.7477443567984225e-06, "loss": 0.0701, "step": 19990 }, { "epoch": 76.33587786259542, "grad_norm": 0.6826211810112, "learning_rate": 2.7428249939787e-06, "loss": 0.0745, "step": 20000 }, { "epoch": 76.37404580152672, "grad_norm": 0.26521873474121094, "learning_rate": 2.7379083738394485e-06, "loss": 0.0772, "step": 20010 }, { "epoch": 76.41221374045801, "grad_norm": 0.5465177893638611, "learning_rate": 2.732994502354823e-06, "loss": 0.0699, "step": 20020 }, { "epoch": 76.45038167938931, "grad_norm": 0.22223325073719025, "learning_rate": 2.72808338549564e-06, "loss": 0.0703, "step": 20030 }, { "epoch": 76.4885496183206, "grad_norm": 0.3529285788536072, "learning_rate": 2.723175029229374e-06, "loss": 0.0709, "step": 20040 }, { "epoch": 76.52671755725191, "grad_norm": 0.686523973941803, "learning_rate": 2.718269439520138e-06, "loss": 0.0706, "step": 20050 }, { "epoch": 76.56488549618321, "grad_norm": 0.31421926617622375, "learning_rate": 2.713366622328686e-06, "loss": 0.0698, "step": 20060 }, { "epoch": 76.6030534351145, "grad_norm": 0.5857685804367065, "learning_rate": 2.7084665836124006e-06, "loss": 0.0771, "step": 20070 }, { "epoch": 76.6412213740458, "grad_norm": 0.3716685473918915, "learning_rate": 2.703569329325296e-06, "loss": 0.0697, "step": 20080 }, { "epoch": 76.6793893129771, "grad_norm": 0.3061921298503876, "learning_rate": 2.698674865417994e-06, "loss": 0.0723, "step": 20090 }, { "epoch": 76.7175572519084, "grad_norm": 0.23903951048851013, "learning_rate": 2.693783197837733e-06, "loss": 0.0714, "step": 20100 }, { "epoch": 76.7557251908397, "grad_norm": 0.22559994459152222, "learning_rate": 2.6888943325283482e-06, "loss": 0.0774, "step": 20110 }, { "epoch": 76.79389312977099, "grad_norm": 0.48428580164909363, "learning_rate": 2.6840082754302734e-06, "loss": 0.0686, "step": 20120 }, { "epoch": 76.83206106870229, "grad_norm": 0.3806080222129822, "learning_rate": 2.6791250324805252e-06, "loss": 0.0689, "step": 20130 }, { "epoch": 76.87022900763358, "grad_norm": 0.49289458990097046, "learning_rate": 2.6742446096127086e-06, "loss": 0.068, "step": 20140 }, { "epoch": 76.90839694656489, "grad_norm": 0.5932338833808899, "learning_rate": 2.669367012756996e-06, "loss": 0.068, "step": 20150 }, { "epoch": 76.94656488549619, "grad_norm": 0.2798921465873718, "learning_rate": 2.664492247840127e-06, "loss": 0.0731, "step": 20160 }, { "epoch": 76.98473282442748, "grad_norm": 0.25367864966392517, "learning_rate": 2.6596203207854006e-06, "loss": 0.0678, "step": 20170 }, { "epoch": 77.02290076335878, "grad_norm": 0.28277409076690674, "learning_rate": 2.654751237512666e-06, "loss": 0.0713, "step": 20180 }, { "epoch": 77.06106870229007, "grad_norm": 0.3937804102897644, "learning_rate": 2.649885003938323e-06, "loss": 0.0698, "step": 20190 }, { "epoch": 77.09923664122137, "grad_norm": 0.35799962282180786, "learning_rate": 2.6450216259753005e-06, "loss": 0.0761, "step": 20200 }, { "epoch": 77.13740458015268, "grad_norm": 0.26287925243377686, "learning_rate": 2.6401611095330632e-06, "loss": 0.0654, "step": 20210 }, { "epoch": 77.17557251908397, "grad_norm": 0.46736228466033936, "learning_rate": 2.6353034605175937e-06, "loss": 0.0715, "step": 20220 }, { "epoch": 77.21374045801527, "grad_norm": 0.46314504742622375, "learning_rate": 2.6304486848313982e-06, "loss": 0.0633, "step": 20230 }, { "epoch": 77.25190839694656, "grad_norm": 0.5143646597862244, "learning_rate": 2.6255967883734823e-06, "loss": 0.0706, "step": 20240 }, { "epoch": 77.29007633587786, "grad_norm": 0.2608904242515564, "learning_rate": 2.620747777039363e-06, "loss": 0.068, "step": 20250 }, { "epoch": 77.32824427480917, "grad_norm": 0.8567964434623718, "learning_rate": 2.6159016567210426e-06, "loss": 0.086, "step": 20260 }, { "epoch": 77.36641221374046, "grad_norm": 0.3883376717567444, "learning_rate": 2.6110584333070153e-06, "loss": 0.075, "step": 20270 }, { "epoch": 77.40458015267176, "grad_norm": 0.27981120347976685, "learning_rate": 2.606218112682254e-06, "loss": 0.0666, "step": 20280 }, { "epoch": 77.44274809160305, "grad_norm": 0.2423625886440277, "learning_rate": 2.601380700728203e-06, "loss": 0.0745, "step": 20290 }, { "epoch": 77.48091603053435, "grad_norm": 0.23799222707748413, "learning_rate": 2.596546203322777e-06, "loss": 0.0709, "step": 20300 }, { "epoch": 77.51908396946565, "grad_norm": 0.2332644760608673, "learning_rate": 2.591714626340346e-06, "loss": 0.0746, "step": 20310 }, { "epoch": 77.55725190839695, "grad_norm": 0.29361745715141296, "learning_rate": 2.5868859756517294e-06, "loss": 0.0765, "step": 20320 }, { "epoch": 77.59541984732824, "grad_norm": 0.28440865874290466, "learning_rate": 2.582060257124195e-06, "loss": 0.068, "step": 20330 }, { "epoch": 77.63358778625954, "grad_norm": 0.3904019296169281, "learning_rate": 2.577237476621442e-06, "loss": 0.072, "step": 20340 }, { "epoch": 77.67175572519083, "grad_norm": 0.833919882774353, "learning_rate": 2.5724176400036094e-06, "loss": 0.0713, "step": 20350 }, { "epoch": 77.70992366412214, "grad_norm": 0.2217206060886383, "learning_rate": 2.5676007531272475e-06, "loss": 0.0812, "step": 20360 }, { "epoch": 77.74809160305344, "grad_norm": 0.307720810174942, "learning_rate": 2.562786821845333e-06, "loss": 0.072, "step": 20370 }, { "epoch": 77.78625954198473, "grad_norm": 0.28931522369384766, "learning_rate": 2.5579758520072446e-06, "loss": 0.0757, "step": 20380 }, { "epoch": 77.82442748091603, "grad_norm": 0.39654994010925293, "learning_rate": 2.5531678494587612e-06, "loss": 0.0693, "step": 20390 }, { "epoch": 77.86259541984732, "grad_norm": 0.55362868309021, "learning_rate": 2.5483628200420648e-06, "loss": 0.07, "step": 20400 }, { "epoch": 77.90076335877862, "grad_norm": 0.5507999658584595, "learning_rate": 2.5435607695957153e-06, "loss": 0.0727, "step": 20410 }, { "epoch": 77.93893129770993, "grad_norm": 0.36570632457733154, "learning_rate": 2.5387617039546585e-06, "loss": 0.0741, "step": 20420 }, { "epoch": 77.97709923664122, "grad_norm": 0.589455246925354, "learning_rate": 2.5339656289502105e-06, "loss": 0.0753, "step": 20430 }, { "epoch": 78.01526717557252, "grad_norm": 0.40846219658851624, "learning_rate": 2.5291725504100563e-06, "loss": 0.0695, "step": 20440 }, { "epoch": 78.05343511450381, "grad_norm": 0.28900301456451416, "learning_rate": 2.524382474158234e-06, "loss": 0.0691, "step": 20450 }, { "epoch": 78.09160305343511, "grad_norm": 0.33728376030921936, "learning_rate": 2.5195954060151433e-06, "loss": 0.0755, "step": 20460 }, { "epoch": 78.12977099236642, "grad_norm": 0.25033625960350037, "learning_rate": 2.5148113517975216e-06, "loss": 0.0724, "step": 20470 }, { "epoch": 78.16793893129771, "grad_norm": 0.8060714602470398, "learning_rate": 2.510030317318445e-06, "loss": 0.0751, "step": 20480 }, { "epoch": 78.20610687022901, "grad_norm": 0.5355228781700134, "learning_rate": 2.50525230838732e-06, "loss": 0.0735, "step": 20490 }, { "epoch": 78.2442748091603, "grad_norm": 0.28783127665519714, "learning_rate": 2.5004773308098814e-06, "loss": 0.0664, "step": 20500 }, { "epoch": 78.2824427480916, "grad_norm": 0.32949912548065186, "learning_rate": 2.4957053903881736e-06, "loss": 0.0654, "step": 20510 }, { "epoch": 78.3206106870229, "grad_norm": 0.40475648641586304, "learning_rate": 2.4909364929205575e-06, "loss": 0.0726, "step": 20520 }, { "epoch": 78.3587786259542, "grad_norm": 0.34688708186149597, "learning_rate": 2.4861706442016923e-06, "loss": 0.0671, "step": 20530 }, { "epoch": 78.3969465648855, "grad_norm": 0.38922348618507385, "learning_rate": 2.481407850022533e-06, "loss": 0.076, "step": 20540 }, { "epoch": 78.43511450381679, "grad_norm": 0.25327450037002563, "learning_rate": 2.4766481161703216e-06, "loss": 0.0729, "step": 20550 }, { "epoch": 78.47328244274809, "grad_norm": 0.36593905091285706, "learning_rate": 2.4718914484285876e-06, "loss": 0.0795, "step": 20560 }, { "epoch": 78.5114503816794, "grad_norm": 0.34967660903930664, "learning_rate": 2.467137852577129e-06, "loss": 0.0787, "step": 20570 }, { "epoch": 78.54961832061069, "grad_norm": 0.272087424993515, "learning_rate": 2.4623873343920123e-06, "loss": 0.0722, "step": 20580 }, { "epoch": 78.58778625954199, "grad_norm": 0.2819080352783203, "learning_rate": 2.4576398996455657e-06, "loss": 0.0631, "step": 20590 }, { "epoch": 78.62595419847328, "grad_norm": 0.8356397151947021, "learning_rate": 2.4528955541063683e-06, "loss": 0.0724, "step": 20600 }, { "epoch": 78.66412213740458, "grad_norm": 0.8203865885734558, "learning_rate": 2.448154303539251e-06, "loss": 0.0673, "step": 20610 }, { "epoch": 78.70229007633588, "grad_norm": 1.2636505365371704, "learning_rate": 2.4434161537052776e-06, "loss": 0.0779, "step": 20620 }, { "epoch": 78.74045801526718, "grad_norm": 0.42922544479370117, "learning_rate": 2.4386811103617474e-06, "loss": 0.0714, "step": 20630 }, { "epoch": 78.77862595419847, "grad_norm": 0.4266103208065033, "learning_rate": 2.4339491792621833e-06, "loss": 0.0698, "step": 20640 }, { "epoch": 78.81679389312977, "grad_norm": 0.2615494132041931, "learning_rate": 2.4292203661563313e-06, "loss": 0.0747, "step": 20650 }, { "epoch": 78.85496183206106, "grad_norm": 0.2261299043893814, "learning_rate": 2.424494676790141e-06, "loss": 0.0723, "step": 20660 }, { "epoch": 78.89312977099236, "grad_norm": 0.2378481775522232, "learning_rate": 2.419772116905775e-06, "loss": 0.0674, "step": 20670 }, { "epoch": 78.93129770992367, "grad_norm": 0.27929824590682983, "learning_rate": 2.4150526922415855e-06, "loss": 0.0772, "step": 20680 }, { "epoch": 78.96946564885496, "grad_norm": 0.3526342511177063, "learning_rate": 2.41033640853212e-06, "loss": 0.0782, "step": 20690 }, { "epoch": 79.00763358778626, "grad_norm": 0.3187890350818634, "learning_rate": 2.405623271508108e-06, "loss": 0.0697, "step": 20700 }, { "epoch": 79.04580152671755, "grad_norm": 0.38696810603141785, "learning_rate": 2.4009132868964525e-06, "loss": 0.0689, "step": 20710 }, { "epoch": 79.08396946564885, "grad_norm": 0.2635570466518402, "learning_rate": 2.3962064604202327e-06, "loss": 0.0675, "step": 20720 }, { "epoch": 79.12213740458016, "grad_norm": 0.36084693670272827, "learning_rate": 2.391502797798686e-06, "loss": 0.0697, "step": 20730 }, { "epoch": 79.16030534351145, "grad_norm": 0.38881757855415344, "learning_rate": 2.386802304747205e-06, "loss": 0.0666, "step": 20740 }, { "epoch": 79.19847328244275, "grad_norm": 0.4907824397087097, "learning_rate": 2.382104986977332e-06, "loss": 0.0668, "step": 20750 }, { "epoch": 79.23664122137404, "grad_norm": 0.2782943844795227, "learning_rate": 2.3774108501967493e-06, "loss": 0.0716, "step": 20760 }, { "epoch": 79.27480916030534, "grad_norm": 0.5779755711555481, "learning_rate": 2.37271990010928e-06, "loss": 0.073, "step": 20770 }, { "epoch": 79.31297709923665, "grad_norm": 0.3922247886657715, "learning_rate": 2.3680321424148678e-06, "loss": 0.07, "step": 20780 }, { "epoch": 79.35114503816794, "grad_norm": 0.3388228416442871, "learning_rate": 2.363347582809579e-06, "loss": 0.0672, "step": 20790 }, { "epoch": 79.38931297709924, "grad_norm": 0.20889022946357727, "learning_rate": 2.358666226985599e-06, "loss": 0.0699, "step": 20800 }, { "epoch": 79.42748091603053, "grad_norm": 0.5814685225486755, "learning_rate": 2.3539880806312134e-06, "loss": 0.0714, "step": 20810 }, { "epoch": 79.46564885496183, "grad_norm": 0.2144891619682312, "learning_rate": 2.349313149430814e-06, "loss": 0.0667, "step": 20820 }, { "epoch": 79.50381679389314, "grad_norm": 0.6816794276237488, "learning_rate": 2.3446414390648815e-06, "loss": 0.0745, "step": 20830 }, { "epoch": 79.54198473282443, "grad_norm": 0.32124826312065125, "learning_rate": 2.3399729552099844e-06, "loss": 0.0716, "step": 20840 }, { "epoch": 79.58015267175573, "grad_norm": 0.22228087484836578, "learning_rate": 2.335307703538771e-06, "loss": 0.0693, "step": 20850 }, { "epoch": 79.61832061068702, "grad_norm": 0.29664310812950134, "learning_rate": 2.330645689719962e-06, "loss": 0.0702, "step": 20860 }, { "epoch": 79.65648854961832, "grad_norm": 0.35011011362075806, "learning_rate": 2.3259869194183415e-06, "loss": 0.0662, "step": 20870 }, { "epoch": 79.69465648854961, "grad_norm": 0.5317111015319824, "learning_rate": 2.321331398294759e-06, "loss": 0.0675, "step": 20880 }, { "epoch": 79.73282442748092, "grad_norm": 0.7479346990585327, "learning_rate": 2.3166791320061095e-06, "loss": 0.0699, "step": 20890 }, { "epoch": 79.77099236641222, "grad_norm": 0.25332704186439514, "learning_rate": 2.312030126205335e-06, "loss": 0.0709, "step": 20900 }, { "epoch": 79.80916030534351, "grad_norm": 0.5418967604637146, "learning_rate": 2.3073843865414163e-06, "loss": 0.0714, "step": 20910 }, { "epoch": 79.8473282442748, "grad_norm": 0.5316504240036011, "learning_rate": 2.302741918659363e-06, "loss": 0.0735, "step": 20920 }, { "epoch": 79.8854961832061, "grad_norm": 0.31350257992744446, "learning_rate": 2.2981027282002155e-06, "loss": 0.0714, "step": 20930 }, { "epoch": 79.92366412213741, "grad_norm": 0.47436007857322693, "learning_rate": 2.2934668208010235e-06, "loss": 0.071, "step": 20940 }, { "epoch": 79.9618320610687, "grad_norm": 0.28028592467308044, "learning_rate": 2.2888342020948556e-06, "loss": 0.0701, "step": 20950 }, { "epoch": 80.0, "grad_norm": 0.25115305185317993, "learning_rate": 2.2842048777107783e-06, "loss": 0.0673, "step": 20960 }, { "epoch": 80.0381679389313, "grad_norm": 0.22422641515731812, "learning_rate": 2.2795788532738555e-06, "loss": 0.0737, "step": 20970 }, { "epoch": 80.07633587786259, "grad_norm": 0.31794339418411255, "learning_rate": 2.274956134405147e-06, "loss": 0.0674, "step": 20980 }, { "epoch": 80.1145038167939, "grad_norm": 0.38259294629096985, "learning_rate": 2.2703367267216896e-06, "loss": 0.0634, "step": 20990 }, { "epoch": 80.1526717557252, "grad_norm": 0.22496770322322845, "learning_rate": 2.2657206358365e-06, "loss": 0.0683, "step": 21000 }, { "epoch": 80.19083969465649, "grad_norm": 0.24763353168964386, "learning_rate": 2.261107867358563e-06, "loss": 0.0732, "step": 21010 }, { "epoch": 80.22900763358778, "grad_norm": 0.31791627407073975, "learning_rate": 2.2564984268928264e-06, "loss": 0.0807, "step": 21020 }, { "epoch": 80.26717557251908, "grad_norm": 0.2604450583457947, "learning_rate": 2.251892320040198e-06, "loss": 0.069, "step": 21030 }, { "epoch": 80.30534351145039, "grad_norm": 0.21588589251041412, "learning_rate": 2.2472895523975315e-06, "loss": 0.0731, "step": 21040 }, { "epoch": 80.34351145038168, "grad_norm": 0.8700875043869019, "learning_rate": 2.2426901295576215e-06, "loss": 0.0801, "step": 21050 }, { "epoch": 80.38167938931298, "grad_norm": 0.2903733551502228, "learning_rate": 2.2380940571092013e-06, "loss": 0.0656, "step": 21060 }, { "epoch": 80.41984732824427, "grad_norm": 0.41912519931793213, "learning_rate": 2.2335013406369303e-06, "loss": 0.0693, "step": 21070 }, { "epoch": 80.45801526717557, "grad_norm": 0.5005356669425964, "learning_rate": 2.228911985721397e-06, "loss": 0.0676, "step": 21080 }, { "epoch": 80.49618320610686, "grad_norm": 0.39985841512680054, "learning_rate": 2.224325997939095e-06, "loss": 0.0703, "step": 21090 }, { "epoch": 80.53435114503817, "grad_norm": 0.3365218937397003, "learning_rate": 2.2197433828624372e-06, "loss": 0.0669, "step": 21100 }, { "epoch": 80.57251908396947, "grad_norm": 0.2456941455602646, "learning_rate": 2.2151641460597295e-06, "loss": 0.0694, "step": 21110 }, { "epoch": 80.61068702290076, "grad_norm": 0.3186664879322052, "learning_rate": 2.210588293095177e-06, "loss": 0.0626, "step": 21120 }, { "epoch": 80.64885496183206, "grad_norm": 0.3170889616012573, "learning_rate": 2.2060158295288716e-06, "loss": 0.0628, "step": 21130 }, { "epoch": 80.68702290076335, "grad_norm": 0.49212417006492615, "learning_rate": 2.2014467609167904e-06, "loss": 0.07, "step": 21140 }, { "epoch": 80.72519083969466, "grad_norm": 0.35683688521385193, "learning_rate": 2.196881092810781e-06, "loss": 0.0707, "step": 21150 }, { "epoch": 80.76335877862596, "grad_norm": 0.20278224349021912, "learning_rate": 2.192318830758561e-06, "loss": 0.0693, "step": 21160 }, { "epoch": 80.80152671755725, "grad_norm": 0.27485206723213196, "learning_rate": 2.187759980303708e-06, "loss": 0.0648, "step": 21170 }, { "epoch": 80.83969465648855, "grad_norm": 0.3982328474521637, "learning_rate": 2.1832045469856544e-06, "loss": 0.0679, "step": 21180 }, { "epoch": 80.87786259541984, "grad_norm": 0.7986206412315369, "learning_rate": 2.178652536339684e-06, "loss": 0.073, "step": 21190 }, { "epoch": 80.91603053435115, "grad_norm": 0.45346391201019287, "learning_rate": 2.1741039538969184e-06, "loss": 0.0801, "step": 21200 }, { "epoch": 80.95419847328245, "grad_norm": 0.33075547218322754, "learning_rate": 2.169558805184313e-06, "loss": 0.0913, "step": 21210 }, { "epoch": 80.99236641221374, "grad_norm": 0.20161128044128418, "learning_rate": 2.165017095724651e-06, "loss": 0.0724, "step": 21220 }, { "epoch": 81.03053435114504, "grad_norm": 0.4609990417957306, "learning_rate": 2.1604788310365404e-06, "loss": 0.0798, "step": 21230 }, { "epoch": 81.06870229007633, "grad_norm": 0.9437888860702515, "learning_rate": 2.155944016634398e-06, "loss": 0.0825, "step": 21240 }, { "epoch": 81.10687022900764, "grad_norm": 0.3145671486854553, "learning_rate": 2.1514126580284538e-06, "loss": 0.0653, "step": 21250 }, { "epoch": 81.14503816793894, "grad_norm": 0.25964006781578064, "learning_rate": 2.1468847607247344e-06, "loss": 0.0665, "step": 21260 }, { "epoch": 81.18320610687023, "grad_norm": 0.41988322138786316, "learning_rate": 2.1423603302250625e-06, "loss": 0.0732, "step": 21270 }, { "epoch": 81.22137404580153, "grad_norm": 0.2630063593387604, "learning_rate": 2.137839372027047e-06, "loss": 0.073, "step": 21280 }, { "epoch": 81.25954198473282, "grad_norm": 0.42665886878967285, "learning_rate": 2.133321891624076e-06, "loss": 0.0777, "step": 21290 }, { "epoch": 81.29770992366412, "grad_norm": 0.6088788509368896, "learning_rate": 2.1288078945053194e-06, "loss": 0.0715, "step": 21300 }, { "epoch": 81.33587786259542, "grad_norm": 0.7648810744285583, "learning_rate": 2.1242973861557064e-06, "loss": 0.0714, "step": 21310 }, { "epoch": 81.37404580152672, "grad_norm": 0.4655439555644989, "learning_rate": 2.1197903720559303e-06, "loss": 0.0734, "step": 21320 }, { "epoch": 81.41221374045801, "grad_norm": 0.8302370309829712, "learning_rate": 2.1152868576824383e-06, "loss": 0.0769, "step": 21330 }, { "epoch": 81.45038167938931, "grad_norm": 0.7731935977935791, "learning_rate": 2.110786848507423e-06, "loss": 0.0725, "step": 21340 }, { "epoch": 81.4885496183206, "grad_norm": 0.8815107345581055, "learning_rate": 2.1062903499988235e-06, "loss": 0.0682, "step": 21350 }, { "epoch": 81.52671755725191, "grad_norm": 0.4499039649963379, "learning_rate": 2.101797367620308e-06, "loss": 0.0738, "step": 21360 }, { "epoch": 81.56488549618321, "grad_norm": 0.3854046165943146, "learning_rate": 2.0973079068312713e-06, "loss": 0.072, "step": 21370 }, { "epoch": 81.6030534351145, "grad_norm": 0.33728718757629395, "learning_rate": 2.0928219730868358e-06, "loss": 0.0666, "step": 21380 }, { "epoch": 81.6412213740458, "grad_norm": 0.24927888810634613, "learning_rate": 2.0883395718378304e-06, "loss": 0.0624, "step": 21390 }, { "epoch": 81.6793893129771, "grad_norm": 0.45682278275489807, "learning_rate": 2.083860708530798e-06, "loss": 0.0992, "step": 21400 }, { "epoch": 81.7175572519084, "grad_norm": 0.29621076583862305, "learning_rate": 2.0793853886079794e-06, "loss": 0.0766, "step": 21410 }, { "epoch": 81.7557251908397, "grad_norm": 0.34035375714302063, "learning_rate": 2.074913617507309e-06, "loss": 0.0694, "step": 21420 }, { "epoch": 81.79389312977099, "grad_norm": 0.23349541425704956, "learning_rate": 2.0704454006624116e-06, "loss": 0.0709, "step": 21430 }, { "epoch": 81.83206106870229, "grad_norm": 0.2720065414905548, "learning_rate": 2.0659807435025907e-06, "loss": 0.0791, "step": 21440 }, { "epoch": 81.87022900763358, "grad_norm": 0.24106143414974213, "learning_rate": 2.061519651452825e-06, "loss": 0.0716, "step": 21450 }, { "epoch": 81.90839694656489, "grad_norm": 0.5342702865600586, "learning_rate": 2.0570621299337656e-06, "loss": 0.073, "step": 21460 }, { "epoch": 81.94656488549619, "grad_norm": 0.3628579080104828, "learning_rate": 2.0526081843617183e-06, "loss": 0.0784, "step": 21470 }, { "epoch": 81.98473282442748, "grad_norm": 0.35211580991744995, "learning_rate": 2.0481578201486484e-06, "loss": 0.0673, "step": 21480 }, { "epoch": 82.02290076335878, "grad_norm": 0.44048014283180237, "learning_rate": 2.043711042702168e-06, "loss": 0.0757, "step": 21490 }, { "epoch": 82.06106870229007, "grad_norm": 0.5018337965011597, "learning_rate": 2.039267857425528e-06, "loss": 0.0699, "step": 21500 }, { "epoch": 82.09923664122137, "grad_norm": 0.4347798824310303, "learning_rate": 2.034828269717622e-06, "loss": 0.0703, "step": 21510 }, { "epoch": 82.13740458015268, "grad_norm": 0.2908450663089752, "learning_rate": 2.030392284972964e-06, "loss": 0.071, "step": 21520 }, { "epoch": 82.17557251908397, "grad_norm": 0.39640676975250244, "learning_rate": 2.0259599085816973e-06, "loss": 0.0735, "step": 21530 }, { "epoch": 82.21374045801527, "grad_norm": 0.8347475528717041, "learning_rate": 2.0215311459295757e-06, "loss": 0.0798, "step": 21540 }, { "epoch": 82.25190839694656, "grad_norm": 0.3542044758796692, "learning_rate": 2.0171060023979603e-06, "loss": 0.0669, "step": 21550 }, { "epoch": 82.29007633587786, "grad_norm": 0.5587815046310425, "learning_rate": 2.012684483363823e-06, "loss": 0.0728, "step": 21560 }, { "epoch": 82.32824427480917, "grad_norm": 0.2797441780567169, "learning_rate": 2.0082665941997236e-06, "loss": 0.0699, "step": 21570 }, { "epoch": 82.36641221374046, "grad_norm": 0.5870032906532288, "learning_rate": 2.0038523402738147e-06, "loss": 0.0684, "step": 21580 }, { "epoch": 82.40458015267176, "grad_norm": 0.608632504940033, "learning_rate": 1.99944172694983e-06, "loss": 0.0767, "step": 21590 }, { "epoch": 82.44274809160305, "grad_norm": 0.28920474648475647, "learning_rate": 1.99503475958708e-06, "loss": 0.0731, "step": 21600 }, { "epoch": 82.48091603053435, "grad_norm": 0.22390879690647125, "learning_rate": 1.9906314435404484e-06, "loss": 0.0676, "step": 21610 }, { "epoch": 82.51908396946565, "grad_norm": 0.3375746011734009, "learning_rate": 1.986231784160378e-06, "loss": 0.0724, "step": 21620 }, { "epoch": 82.55725190839695, "grad_norm": 0.37056073546409607, "learning_rate": 1.9818357867928697e-06, "loss": 0.0741, "step": 21630 }, { "epoch": 82.59541984732824, "grad_norm": 0.25039762258529663, "learning_rate": 1.9774434567794744e-06, "loss": 0.0709, "step": 21640 }, { "epoch": 82.63358778625954, "grad_norm": 0.33184704184532166, "learning_rate": 1.973054799457286e-06, "loss": 0.0685, "step": 21650 }, { "epoch": 82.67175572519083, "grad_norm": 0.4607788920402527, "learning_rate": 1.9686698201589395e-06, "loss": 0.0739, "step": 21660 }, { "epoch": 82.70992366412214, "grad_norm": 0.7115771770477295, "learning_rate": 1.9642885242125962e-06, "loss": 0.0819, "step": 21670 }, { "epoch": 82.74809160305344, "grad_norm": 0.3059196472167969, "learning_rate": 1.9599109169419467e-06, "loss": 0.065, "step": 21680 }, { "epoch": 82.78625954198473, "grad_norm": 0.2690870761871338, "learning_rate": 1.9555370036661946e-06, "loss": 0.069, "step": 21690 }, { "epoch": 82.82442748091603, "grad_norm": 0.8284600973129272, "learning_rate": 1.9511667897000577e-06, "loss": 0.0672, "step": 21700 }, { "epoch": 82.86259541984732, "grad_norm": 0.5146779417991638, "learning_rate": 1.946800280353755e-06, "loss": 0.063, "step": 21710 }, { "epoch": 82.90076335877862, "grad_norm": 0.27559417486190796, "learning_rate": 1.9424374809330117e-06, "loss": 0.0717, "step": 21720 }, { "epoch": 82.93893129770993, "grad_norm": 0.3968946933746338, "learning_rate": 1.938078396739038e-06, "loss": 0.0662, "step": 21730 }, { "epoch": 82.97709923664122, "grad_norm": 0.3083120286464691, "learning_rate": 1.9337230330685332e-06, "loss": 0.0686, "step": 21740 }, { "epoch": 83.01526717557252, "grad_norm": 0.46116316318511963, "learning_rate": 1.929371395213674e-06, "loss": 0.0809, "step": 21750 }, { "epoch": 83.05343511450381, "grad_norm": 0.3158531188964844, "learning_rate": 1.9250234884621093e-06, "loss": 0.0725, "step": 21760 }, { "epoch": 83.09160305343511, "grad_norm": 0.5462889671325684, "learning_rate": 1.9206793180969593e-06, "loss": 0.0671, "step": 21770 }, { "epoch": 83.12977099236642, "grad_norm": 0.4078521430492401, "learning_rate": 1.916338889396798e-06, "loss": 0.0725, "step": 21780 }, { "epoch": 83.16793893129771, "grad_norm": 0.46893805265426636, "learning_rate": 1.9120022076356577e-06, "loss": 0.0741, "step": 21790 }, { "epoch": 83.20610687022901, "grad_norm": 0.8775691986083984, "learning_rate": 1.9076692780830115e-06, "loss": 0.0706, "step": 21800 }, { "epoch": 83.2442748091603, "grad_norm": 0.24274039268493652, "learning_rate": 1.903340106003782e-06, "loss": 0.0711, "step": 21810 }, { "epoch": 83.2824427480916, "grad_norm": 0.5412287712097168, "learning_rate": 1.8990146966583183e-06, "loss": 0.0716, "step": 21820 }, { "epoch": 83.3206106870229, "grad_norm": 0.26575616002082825, "learning_rate": 1.8946930553024034e-06, "loss": 0.0693, "step": 21830 }, { "epoch": 83.3587786259542, "grad_norm": 0.39014455676078796, "learning_rate": 1.8903751871872377e-06, "loss": 0.0669, "step": 21840 }, { "epoch": 83.3969465648855, "grad_norm": 0.2963654398918152, "learning_rate": 1.8860610975594384e-06, "loss": 0.0783, "step": 21850 }, { "epoch": 83.43511450381679, "grad_norm": 0.2706157863140106, "learning_rate": 1.8817507916610307e-06, "loss": 0.0688, "step": 21860 }, { "epoch": 83.47328244274809, "grad_norm": 0.369393914937973, "learning_rate": 1.8774442747294407e-06, "loss": 0.0666, "step": 21870 }, { "epoch": 83.5114503816794, "grad_norm": 0.44239068031311035, "learning_rate": 1.8731415519974967e-06, "loss": 0.0754, "step": 21880 }, { "epoch": 83.54961832061069, "grad_norm": 0.29093948006629944, "learning_rate": 1.8688426286934102e-06, "loss": 0.0718, "step": 21890 }, { "epoch": 83.58778625954199, "grad_norm": 0.45363718271255493, "learning_rate": 1.864547510040779e-06, "loss": 0.0676, "step": 21900 }, { "epoch": 83.62595419847328, "grad_norm": 0.427654892206192, "learning_rate": 1.8602562012585768e-06, "loss": 0.0667, "step": 21910 }, { "epoch": 83.66412213740458, "grad_norm": 0.2690734267234802, "learning_rate": 1.8559687075611466e-06, "loss": 0.0648, "step": 21920 }, { "epoch": 83.70229007633588, "grad_norm": 0.26419100165367126, "learning_rate": 1.8516850341582015e-06, "loss": 0.0647, "step": 21930 }, { "epoch": 83.74045801526718, "grad_norm": 0.4640803337097168, "learning_rate": 1.847405186254807e-06, "loss": 0.0662, "step": 21940 }, { "epoch": 83.77862595419847, "grad_norm": 0.3314109742641449, "learning_rate": 1.8431291690513791e-06, "loss": 0.0684, "step": 21950 }, { "epoch": 83.81679389312977, "grad_norm": 1.6556986570358276, "learning_rate": 1.8388569877436863e-06, "loss": 0.0736, "step": 21960 }, { "epoch": 83.85496183206106, "grad_norm": 0.2407653033733368, "learning_rate": 1.834588647522828e-06, "loss": 0.0648, "step": 21970 }, { "epoch": 83.89312977099236, "grad_norm": 0.2633666694164276, "learning_rate": 1.8303241535752437e-06, "loss": 0.0793, "step": 21980 }, { "epoch": 83.93129770992367, "grad_norm": 0.3340645730495453, "learning_rate": 1.8260635110826936e-06, "loss": 0.0684, "step": 21990 }, { "epoch": 83.96946564885496, "grad_norm": 0.5088317394256592, "learning_rate": 1.82180672522226e-06, "loss": 0.0668, "step": 22000 }, { "epoch": 84.00763358778626, "grad_norm": 0.4002598822116852, "learning_rate": 1.817553801166339e-06, "loss": 0.0733, "step": 22010 }, { "epoch": 84.04580152671755, "grad_norm": 0.34704163670539856, "learning_rate": 1.8133047440826335e-06, "loss": 0.0655, "step": 22020 }, { "epoch": 84.08396946564885, "grad_norm": 0.27369430661201477, "learning_rate": 1.8090595591341509e-06, "loss": 0.0687, "step": 22030 }, { "epoch": 84.12213740458016, "grad_norm": 0.6098417043685913, "learning_rate": 1.8048182514791901e-06, "loss": 0.0667, "step": 22040 }, { "epoch": 84.16030534351145, "grad_norm": 0.3603808581829071, "learning_rate": 1.8005808262713399e-06, "loss": 0.0636, "step": 22050 }, { "epoch": 84.19847328244275, "grad_norm": 0.5261178016662598, "learning_rate": 1.7963472886594713e-06, "loss": 0.0674, "step": 22060 }, { "epoch": 84.23664122137404, "grad_norm": 0.27495747804641724, "learning_rate": 1.7921176437877302e-06, "loss": 0.0663, "step": 22070 }, { "epoch": 84.27480916030534, "grad_norm": 0.5118312835693359, "learning_rate": 1.7878918967955366e-06, "loss": 0.0722, "step": 22080 }, { "epoch": 84.31297709923665, "grad_norm": 0.2527947723865509, "learning_rate": 1.7836700528175693e-06, "loss": 0.0681, "step": 22090 }, { "epoch": 84.35114503816794, "grad_norm": 0.25550681352615356, "learning_rate": 1.7794521169837693e-06, "loss": 0.0694, "step": 22100 }, { "epoch": 84.38931297709924, "grad_norm": 0.6813188195228577, "learning_rate": 1.7752380944193248e-06, "loss": 0.0719, "step": 22110 }, { "epoch": 84.42748091603053, "grad_norm": 0.6162485480308533, "learning_rate": 1.771027990244671e-06, "loss": 0.0768, "step": 22120 }, { "epoch": 84.46564885496183, "grad_norm": 0.2738633453845978, "learning_rate": 1.7668218095754797e-06, "loss": 0.0703, "step": 22130 }, { "epoch": 84.50381679389314, "grad_norm": 0.9221097230911255, "learning_rate": 1.7626195575226595e-06, "loss": 0.0679, "step": 22140 }, { "epoch": 84.54198473282443, "grad_norm": 0.2497481107711792, "learning_rate": 1.7584212391923428e-06, "loss": 0.0721, "step": 22150 }, { "epoch": 84.58015267175573, "grad_norm": 0.36537232995033264, "learning_rate": 1.7542268596858813e-06, "loss": 0.0772, "step": 22160 }, { "epoch": 84.61832061068702, "grad_norm": 0.2652266025543213, "learning_rate": 1.7500364240998412e-06, "loss": 0.0693, "step": 22170 }, { "epoch": 84.65648854961832, "grad_norm": 0.3559548258781433, "learning_rate": 1.7458499375259957e-06, "loss": 0.0703, "step": 22180 }, { "epoch": 84.69465648854961, "grad_norm": 0.40493687987327576, "learning_rate": 1.7416674050513243e-06, "loss": 0.0613, "step": 22190 }, { "epoch": 84.73282442748092, "grad_norm": 0.38015782833099365, "learning_rate": 1.7374888317579968e-06, "loss": 0.0736, "step": 22200 }, { "epoch": 84.77099236641222, "grad_norm": 0.19811968505382538, "learning_rate": 1.7333142227233728e-06, "loss": 0.0659, "step": 22210 }, { "epoch": 84.80916030534351, "grad_norm": 0.5039312839508057, "learning_rate": 1.7291435830199954e-06, "loss": 0.0726, "step": 22220 }, { "epoch": 84.8473282442748, "grad_norm": 0.36674124002456665, "learning_rate": 1.7249769177155879e-06, "loss": 0.0657, "step": 22230 }, { "epoch": 84.8854961832061, "grad_norm": 0.7190355062484741, "learning_rate": 1.720814231873038e-06, "loss": 0.0783, "step": 22240 }, { "epoch": 84.92366412213741, "grad_norm": 0.3150536119937897, "learning_rate": 1.716655530550405e-06, "loss": 0.0755, "step": 22250 }, { "epoch": 84.9618320610687, "grad_norm": 0.29341939091682434, "learning_rate": 1.7125008188009018e-06, "loss": 0.0667, "step": 22260 }, { "epoch": 85.0, "grad_norm": 0.3069048225879669, "learning_rate": 1.7083501016728944e-06, "loss": 0.0674, "step": 22270 }, { "epoch": 85.0381679389313, "grad_norm": 0.3248658776283264, "learning_rate": 1.704203384209896e-06, "loss": 0.0649, "step": 22280 }, { "epoch": 85.07633587786259, "grad_norm": 0.2633165121078491, "learning_rate": 1.700060671450557e-06, "loss": 0.0705, "step": 22290 }, { "epoch": 85.1145038167939, "grad_norm": 0.2645620107650757, "learning_rate": 1.6959219684286681e-06, "loss": 0.062, "step": 22300 }, { "epoch": 85.1526717557252, "grad_norm": 0.25467944145202637, "learning_rate": 1.6917872801731417e-06, "loss": 0.0726, "step": 22310 }, { "epoch": 85.19083969465649, "grad_norm": 0.4649989902973175, "learning_rate": 1.6876566117080139e-06, "loss": 0.0737, "step": 22320 }, { "epoch": 85.22900763358778, "grad_norm": 0.4027472138404846, "learning_rate": 1.683529968052437e-06, "loss": 0.069, "step": 22330 }, { "epoch": 85.26717557251908, "grad_norm": 0.35890597105026245, "learning_rate": 1.679407354220669e-06, "loss": 0.0709, "step": 22340 }, { "epoch": 85.30534351145039, "grad_norm": 0.24897009134292603, "learning_rate": 1.6752887752220792e-06, "loss": 0.0682, "step": 22350 }, { "epoch": 85.34351145038168, "grad_norm": 0.31344956159591675, "learning_rate": 1.6711742360611277e-06, "loss": 0.0736, "step": 22360 }, { "epoch": 85.38167938931298, "grad_norm": 0.633032500743866, "learning_rate": 1.6670637417373652e-06, "loss": 0.0769, "step": 22370 }, { "epoch": 85.41984732824427, "grad_norm": 0.41895341873168945, "learning_rate": 1.6629572972454333e-06, "loss": 0.0666, "step": 22380 }, { "epoch": 85.45801526717557, "grad_norm": 0.25024113059043884, "learning_rate": 1.6588549075750466e-06, "loss": 0.075, "step": 22390 }, { "epoch": 85.49618320610686, "grad_norm": 0.3272351622581482, "learning_rate": 1.6547565777109975e-06, "loss": 0.0718, "step": 22400 }, { "epoch": 85.53435114503817, "grad_norm": 0.6027217507362366, "learning_rate": 1.6506623126331427e-06, "loss": 0.0696, "step": 22410 }, { "epoch": 85.57251908396947, "grad_norm": 0.4890408515930176, "learning_rate": 1.6465721173164e-06, "loss": 0.0708, "step": 22420 }, { "epoch": 85.61068702290076, "grad_norm": 0.39123833179473877, "learning_rate": 1.6424859967307427e-06, "loss": 0.0687, "step": 22430 }, { "epoch": 85.64885496183206, "grad_norm": 0.26333490014076233, "learning_rate": 1.6384039558411902e-06, "loss": 0.0725, "step": 22440 }, { "epoch": 85.68702290076335, "grad_norm": 0.32792478799819946, "learning_rate": 1.6343259996078109e-06, "loss": 0.0816, "step": 22450 }, { "epoch": 85.72519083969466, "grad_norm": 0.32284873723983765, "learning_rate": 1.6302521329857046e-06, "loss": 0.0686, "step": 22460 }, { "epoch": 85.76335877862596, "grad_norm": 0.35827475786209106, "learning_rate": 1.6261823609250027e-06, "loss": 0.0702, "step": 22470 }, { "epoch": 85.80152671755725, "grad_norm": 0.28327593207359314, "learning_rate": 1.6221166883708629e-06, "loss": 0.0657, "step": 22480 }, { "epoch": 85.83969465648855, "grad_norm": 0.2737085223197937, "learning_rate": 1.6180551202634603e-06, "loss": 0.0697, "step": 22490 }, { "epoch": 85.87786259541984, "grad_norm": 0.29437166452407837, "learning_rate": 1.613997661537981e-06, "loss": 0.0692, "step": 22500 }, { "epoch": 85.91603053435115, "grad_norm": 0.6566985249519348, "learning_rate": 1.6099443171246243e-06, "loss": 0.0664, "step": 22510 }, { "epoch": 85.95419847328245, "grad_norm": 0.3802522122859955, "learning_rate": 1.6058950919485823e-06, "loss": 0.0635, "step": 22520 }, { "epoch": 85.99236641221374, "grad_norm": 0.44286301732063293, "learning_rate": 1.6018499909300478e-06, "loss": 0.0697, "step": 22530 }, { "epoch": 86.03053435114504, "grad_norm": 0.2453600913286209, "learning_rate": 1.5978090189841988e-06, "loss": 0.0671, "step": 22540 }, { "epoch": 86.06870229007633, "grad_norm": 0.2875606417655945, "learning_rate": 1.5937721810211958e-06, "loss": 0.0676, "step": 22550 }, { "epoch": 86.10687022900764, "grad_norm": 0.7595479488372803, "learning_rate": 1.5897394819461815e-06, "loss": 0.078, "step": 22560 }, { "epoch": 86.14503816793894, "grad_norm": 0.2808679938316345, "learning_rate": 1.5857109266592624e-06, "loss": 0.0699, "step": 22570 }, { "epoch": 86.18320610687023, "grad_norm": 0.27129414677619934, "learning_rate": 1.5816865200555142e-06, "loss": 0.0662, "step": 22580 }, { "epoch": 86.22137404580153, "grad_norm": 0.27305206656455994, "learning_rate": 1.5776662670249704e-06, "loss": 0.0666, "step": 22590 }, { "epoch": 86.25954198473282, "grad_norm": 0.4132048487663269, "learning_rate": 1.573650172452615e-06, "loss": 0.0711, "step": 22600 }, { "epoch": 86.29770992366412, "grad_norm": 0.39085114002227783, "learning_rate": 1.5696382412183853e-06, "loss": 0.0721, "step": 22610 }, { "epoch": 86.33587786259542, "grad_norm": 0.570540726184845, "learning_rate": 1.5656304781971549e-06, "loss": 0.0726, "step": 22620 }, { "epoch": 86.37404580152672, "grad_norm": 0.452827513217926, "learning_rate": 1.5616268882587331e-06, "loss": 0.0698, "step": 22630 }, { "epoch": 86.41221374045801, "grad_norm": 0.2821395993232727, "learning_rate": 1.5576274762678594e-06, "loss": 0.0672, "step": 22640 }, { "epoch": 86.45038167938931, "grad_norm": 0.30585944652557373, "learning_rate": 1.5536322470841953e-06, "loss": 0.0714, "step": 22650 }, { "epoch": 86.4885496183206, "grad_norm": 1.8794187307357788, "learning_rate": 1.5496412055623238e-06, "loss": 0.0727, "step": 22660 }, { "epoch": 86.52671755725191, "grad_norm": 0.3682367205619812, "learning_rate": 1.5456543565517334e-06, "loss": 0.0684, "step": 22670 }, { "epoch": 86.56488549618321, "grad_norm": 0.6367046236991882, "learning_rate": 1.541671704896825e-06, "loss": 0.0734, "step": 22680 }, { "epoch": 86.6030534351145, "grad_norm": 0.2984490692615509, "learning_rate": 1.537693255436894e-06, "loss": 0.0801, "step": 22690 }, { "epoch": 86.6412213740458, "grad_norm": 0.26280051469802856, "learning_rate": 1.533719013006132e-06, "loss": 0.0734, "step": 22700 }, { "epoch": 86.6793893129771, "grad_norm": 0.6878067255020142, "learning_rate": 1.5297489824336165e-06, "loss": 0.0751, "step": 22710 }, { "epoch": 86.7175572519084, "grad_norm": 0.42184221744537354, "learning_rate": 1.5257831685433121e-06, "loss": 0.0679, "step": 22720 }, { "epoch": 86.7557251908397, "grad_norm": 0.33254826068878174, "learning_rate": 1.521821576154055e-06, "loss": 0.0673, "step": 22730 }, { "epoch": 86.79389312977099, "grad_norm": 0.20484282076358795, "learning_rate": 1.5178642100795543e-06, "loss": 0.0659, "step": 22740 }, { "epoch": 86.83206106870229, "grad_norm": 0.32377350330352783, "learning_rate": 1.5139110751283819e-06, "loss": 0.0704, "step": 22750 }, { "epoch": 86.87022900763358, "grad_norm": 0.3272833228111267, "learning_rate": 1.5099621761039684e-06, "loss": 0.0702, "step": 22760 }, { "epoch": 86.90839694656489, "grad_norm": 0.5103157162666321, "learning_rate": 1.5060175178046017e-06, "loss": 0.0779, "step": 22770 }, { "epoch": 86.94656488549619, "grad_norm": 0.2767445743083954, "learning_rate": 1.5020771050234118e-06, "loss": 0.065, "step": 22780 }, { "epoch": 86.98473282442748, "grad_norm": 0.6275362968444824, "learning_rate": 1.4981409425483716e-06, "loss": 0.065, "step": 22790 }, { "epoch": 87.02290076335878, "grad_norm": 0.2717098593711853, "learning_rate": 1.4942090351622884e-06, "loss": 0.0772, "step": 22800 }, { "epoch": 87.06106870229007, "grad_norm": 0.5612622499465942, "learning_rate": 1.490281387642804e-06, "loss": 0.0681, "step": 22810 }, { "epoch": 87.09923664122137, "grad_norm": 0.27303266525268555, "learning_rate": 1.4863580047623767e-06, "loss": 0.068, "step": 22820 }, { "epoch": 87.13740458015268, "grad_norm": 0.3088218867778778, "learning_rate": 1.4824388912882897e-06, "loss": 0.076, "step": 22830 }, { "epoch": 87.17557251908397, "grad_norm": 0.22109341621398926, "learning_rate": 1.4785240519826345e-06, "loss": 0.065, "step": 22840 }, { "epoch": 87.21374045801527, "grad_norm": 0.3760296404361725, "learning_rate": 1.4746134916023096e-06, "loss": 0.0653, "step": 22850 }, { "epoch": 87.25190839694656, "grad_norm": 0.4311428368091583, "learning_rate": 1.4707072148990142e-06, "loss": 0.0673, "step": 22860 }, { "epoch": 87.29007633587786, "grad_norm": 0.9990677237510681, "learning_rate": 1.4668052266192423e-06, "loss": 0.0692, "step": 22870 }, { "epoch": 87.32824427480917, "grad_norm": 0.2663988769054413, "learning_rate": 1.4629075315042795e-06, "loss": 0.0654, "step": 22880 }, { "epoch": 87.36641221374046, "grad_norm": 0.2906090021133423, "learning_rate": 1.4590141342901926e-06, "loss": 0.0667, "step": 22890 }, { "epoch": 87.40458015267176, "grad_norm": 0.5465324521064758, "learning_rate": 1.4551250397078253e-06, "loss": 0.0716, "step": 22900 }, { "epoch": 87.44274809160305, "grad_norm": 0.29929447174072266, "learning_rate": 1.4512402524827945e-06, "loss": 0.0755, "step": 22910 }, { "epoch": 87.48091603053435, "grad_norm": 0.3241725564002991, "learning_rate": 1.447359777335482e-06, "loss": 0.0635, "step": 22920 }, { "epoch": 87.51908396946565, "grad_norm": 0.28157156705856323, "learning_rate": 1.4434836189810337e-06, "loss": 0.0651, "step": 22930 }, { "epoch": 87.55725190839695, "grad_norm": 0.32432863116264343, "learning_rate": 1.4396117821293454e-06, "loss": 0.0682, "step": 22940 }, { "epoch": 87.59541984732824, "grad_norm": 0.39860352873802185, "learning_rate": 1.4357442714850634e-06, "loss": 0.0703, "step": 22950 }, { "epoch": 87.63358778625954, "grad_norm": 0.4769950211048126, "learning_rate": 1.43188109174758e-06, "loss": 0.0657, "step": 22960 }, { "epoch": 87.67175572519083, "grad_norm": 0.42243120074272156, "learning_rate": 1.4280222476110206e-06, "loss": 0.0674, "step": 22970 }, { "epoch": 87.70992366412214, "grad_norm": 0.6192082166671753, "learning_rate": 1.424167743764247e-06, "loss": 0.0724, "step": 22980 }, { "epoch": 87.74809160305344, "grad_norm": 0.6476142406463623, "learning_rate": 1.420317584890844e-06, "loss": 0.0671, "step": 22990 }, { "epoch": 87.78625954198473, "grad_norm": 0.6006608605384827, "learning_rate": 1.4164717756691176e-06, "loss": 0.0727, "step": 23000 }, { "epoch": 87.82442748091603, "grad_norm": 0.5018774271011353, "learning_rate": 1.4126303207720882e-06, "loss": 0.0644, "step": 23010 }, { "epoch": 87.86259541984732, "grad_norm": 0.19781306385993958, "learning_rate": 1.408793224867484e-06, "loss": 0.0683, "step": 23020 }, { "epoch": 87.90076335877862, "grad_norm": 0.3770202100276947, "learning_rate": 1.4049604926177423e-06, "loss": 0.0748, "step": 23030 }, { "epoch": 87.93893129770993, "grad_norm": 0.23254378139972687, "learning_rate": 1.4011321286799918e-06, "loss": 0.071, "step": 23040 }, { "epoch": 87.97709923664122, "grad_norm": 0.2871849238872528, "learning_rate": 1.3973081377060565e-06, "loss": 0.0685, "step": 23050 }, { "epoch": 88.01526717557252, "grad_norm": 0.34911373257637024, "learning_rate": 1.393488524342445e-06, "loss": 0.0669, "step": 23060 }, { "epoch": 88.05343511450381, "grad_norm": 0.23986873030662537, "learning_rate": 1.3896732932303485e-06, "loss": 0.0698, "step": 23070 }, { "epoch": 88.09160305343511, "grad_norm": 0.26337021589279175, "learning_rate": 1.3858624490056304e-06, "loss": 0.067, "step": 23080 }, { "epoch": 88.12977099236642, "grad_norm": 0.4079955220222473, "learning_rate": 1.38205599629883e-06, "loss": 0.0705, "step": 23090 }, { "epoch": 88.16793893129771, "grad_norm": 0.5138763785362244, "learning_rate": 1.378253939735142e-06, "loss": 0.0691, "step": 23100 }, { "epoch": 88.20610687022901, "grad_norm": 0.4184453785419464, "learning_rate": 1.3744562839344267e-06, "loss": 0.0688, "step": 23110 }, { "epoch": 88.2442748091603, "grad_norm": 0.24194218218326569, "learning_rate": 1.3706630335111932e-06, "loss": 0.0689, "step": 23120 }, { "epoch": 88.2824427480916, "grad_norm": 0.48007258772850037, "learning_rate": 1.3668741930745966e-06, "loss": 0.0766, "step": 23130 }, { "epoch": 88.3206106870229, "grad_norm": 0.2578524351119995, "learning_rate": 1.3630897672284382e-06, "loss": 0.0634, "step": 23140 }, { "epoch": 88.3587786259542, "grad_norm": 0.40297532081604004, "learning_rate": 1.3593097605711508e-06, "loss": 0.0656, "step": 23150 }, { "epoch": 88.3969465648855, "grad_norm": 0.31493544578552246, "learning_rate": 1.3555341776957992e-06, "loss": 0.0729, "step": 23160 }, { "epoch": 88.43511450381679, "grad_norm": 0.2946300208568573, "learning_rate": 1.3517630231900724e-06, "loss": 0.0722, "step": 23170 }, { "epoch": 88.47328244274809, "grad_norm": 0.23656029999256134, "learning_rate": 1.3479963016362768e-06, "loss": 0.0721, "step": 23180 }, { "epoch": 88.5114503816794, "grad_norm": 0.3117351233959198, "learning_rate": 1.3442340176113378e-06, "loss": 0.0699, "step": 23190 }, { "epoch": 88.54961832061069, "grad_norm": 0.4488414227962494, "learning_rate": 1.340476175686784e-06, "loss": 0.0683, "step": 23200 }, { "epoch": 88.58778625954199, "grad_norm": 0.3813345730304718, "learning_rate": 1.336722780428747e-06, "loss": 0.0699, "step": 23210 }, { "epoch": 88.62595419847328, "grad_norm": 0.2527889311313629, "learning_rate": 1.3329738363979561e-06, "loss": 0.0717, "step": 23220 }, { "epoch": 88.66412213740458, "grad_norm": 0.5992899537086487, "learning_rate": 1.329229348149731e-06, "loss": 0.07, "step": 23230 }, { "epoch": 88.70229007633588, "grad_norm": 1.4383306503295898, "learning_rate": 1.3254893202339798e-06, "loss": 0.0725, "step": 23240 }, { "epoch": 88.74045801526718, "grad_norm": 0.48335134983062744, "learning_rate": 1.3217537571951872e-06, "loss": 0.0736, "step": 23250 }, { "epoch": 88.77862595419847, "grad_norm": 0.23111307621002197, "learning_rate": 1.3180226635724169e-06, "loss": 0.0851, "step": 23260 }, { "epoch": 88.81679389312977, "grad_norm": 2.051677942276001, "learning_rate": 1.314296043899298e-06, "loss": 0.0734, "step": 23270 }, { "epoch": 88.85496183206106, "grad_norm": 0.24527736008167267, "learning_rate": 1.3105739027040248e-06, "loss": 0.0708, "step": 23280 }, { "epoch": 88.89312977099236, "grad_norm": 0.3417549729347229, "learning_rate": 1.306856244509349e-06, "loss": 0.0741, "step": 23290 }, { "epoch": 88.93129770992367, "grad_norm": 0.42347151041030884, "learning_rate": 1.3031430738325778e-06, "loss": 0.0738, "step": 23300 }, { "epoch": 88.96946564885496, "grad_norm": 0.3796163499355316, "learning_rate": 1.299434395185563e-06, "loss": 0.0675, "step": 23310 }, { "epoch": 89.00763358778626, "grad_norm": 0.5870466828346252, "learning_rate": 1.295730213074699e-06, "loss": 0.0689, "step": 23320 }, { "epoch": 89.04580152671755, "grad_norm": 0.3642314076423645, "learning_rate": 1.2920305320009153e-06, "loss": 0.064, "step": 23330 }, { "epoch": 89.08396946564885, "grad_norm": 0.23387549817562103, "learning_rate": 1.2883353564596729e-06, "loss": 0.0675, "step": 23340 }, { "epoch": 89.12213740458016, "grad_norm": 0.29250991344451904, "learning_rate": 1.2846446909409605e-06, "loss": 0.0627, "step": 23350 }, { "epoch": 89.16030534351145, "grad_norm": 0.3102540075778961, "learning_rate": 1.280958539929284e-06, "loss": 0.0674, "step": 23360 }, { "epoch": 89.19847328244275, "grad_norm": 0.2419544756412506, "learning_rate": 1.2772769079036639e-06, "loss": 0.0644, "step": 23370 }, { "epoch": 89.23664122137404, "grad_norm": 0.26334354281425476, "learning_rate": 1.2735997993376299e-06, "loss": 0.0717, "step": 23380 }, { "epoch": 89.27480916030534, "grad_norm": 0.30459722876548767, "learning_rate": 1.2699272186992168e-06, "loss": 0.0671, "step": 23390 }, { "epoch": 89.31297709923665, "grad_norm": 0.49876540899276733, "learning_rate": 1.2662591704509548e-06, "loss": 0.068, "step": 23400 }, { "epoch": 89.35114503816794, "grad_norm": 0.8938238620758057, "learning_rate": 1.2625956590498712e-06, "loss": 0.0736, "step": 23410 }, { "epoch": 89.38931297709924, "grad_norm": 0.3588089942932129, "learning_rate": 1.2589366889474758e-06, "loss": 0.0731, "step": 23420 }, { "epoch": 89.42748091603053, "grad_norm": 0.24369336664676666, "learning_rate": 1.2552822645897623e-06, "loss": 0.0688, "step": 23430 }, { "epoch": 89.46564885496183, "grad_norm": 0.9177067279815674, "learning_rate": 1.2516323904172001e-06, "loss": 0.0654, "step": 23440 }, { "epoch": 89.50381679389314, "grad_norm": 0.2567913830280304, "learning_rate": 1.2479870708647324e-06, "loss": 0.0669, "step": 23450 }, { "epoch": 89.54198473282443, "grad_norm": 1.152028203010559, "learning_rate": 1.2443463103617658e-06, "loss": 0.066, "step": 23460 }, { "epoch": 89.58015267175573, "grad_norm": 0.28860822319984436, "learning_rate": 1.240710113332167e-06, "loss": 0.068, "step": 23470 }, { "epoch": 89.61832061068702, "grad_norm": 0.17750433087348938, "learning_rate": 1.2370784841942584e-06, "loss": 0.0676, "step": 23480 }, { "epoch": 89.65648854961832, "grad_norm": 0.28479427099227905, "learning_rate": 1.2334514273608117e-06, "loss": 0.0668, "step": 23490 }, { "epoch": 89.69465648854961, "grad_norm": 0.2883140444755554, "learning_rate": 1.2298289472390417e-06, "loss": 0.0837, "step": 23500 }, { "epoch": 89.73282442748092, "grad_norm": 0.22831976413726807, "learning_rate": 1.226211048230606e-06, "loss": 0.073, "step": 23510 }, { "epoch": 89.77099236641222, "grad_norm": 0.20338012278079987, "learning_rate": 1.222597734731592e-06, "loss": 0.0617, "step": 23520 }, { "epoch": 89.80916030534351, "grad_norm": 0.4108172655105591, "learning_rate": 1.2189890111325149e-06, "loss": 0.0682, "step": 23530 }, { "epoch": 89.8473282442748, "grad_norm": 0.3931494951248169, "learning_rate": 1.2153848818183161e-06, "loss": 0.0656, "step": 23540 }, { "epoch": 89.8854961832061, "grad_norm": 0.5028300285339355, "learning_rate": 1.2117853511683509e-06, "loss": 0.0717, "step": 23550 }, { "epoch": 89.92366412213741, "grad_norm": 0.5321047902107239, "learning_rate": 1.2081904235563908e-06, "loss": 0.0713, "step": 23560 }, { "epoch": 89.9618320610687, "grad_norm": 0.40350303053855896, "learning_rate": 1.20460010335061e-06, "loss": 0.0666, "step": 23570 }, { "epoch": 90.0, "grad_norm": 0.23981277644634247, "learning_rate": 1.2010143949135866e-06, "loss": 0.0726, "step": 23580 }, { "epoch": 90.0381679389313, "grad_norm": 0.3623555600643158, "learning_rate": 1.1974333026022939e-06, "loss": 0.0692, "step": 23590 }, { "epoch": 90.07633587786259, "grad_norm": 0.23631367087364197, "learning_rate": 1.1938568307680965e-06, "loss": 0.0698, "step": 23600 }, { "epoch": 90.1145038167939, "grad_norm": 0.4967484176158905, "learning_rate": 1.1902849837567466e-06, "loss": 0.0695, "step": 23610 }, { "epoch": 90.1526717557252, "grad_norm": 0.24320781230926514, "learning_rate": 1.186717765908374e-06, "loss": 0.0705, "step": 23620 }, { "epoch": 90.19083969465649, "grad_norm": 0.7703472375869751, "learning_rate": 1.1831551815574848e-06, "loss": 0.0642, "step": 23630 }, { "epoch": 90.22900763358778, "grad_norm": 0.24756668508052826, "learning_rate": 1.1795972350329554e-06, "loss": 0.0712, "step": 23640 }, { "epoch": 90.26717557251908, "grad_norm": 0.45252394676208496, "learning_rate": 1.176043930658024e-06, "loss": 0.0674, "step": 23650 }, { "epoch": 90.30534351145039, "grad_norm": 0.47805851697921753, "learning_rate": 1.1724952727502942e-06, "loss": 0.0754, "step": 23660 }, { "epoch": 90.34351145038168, "grad_norm": 0.6508886814117432, "learning_rate": 1.1689512656217178e-06, "loss": 0.0732, "step": 23670 }, { "epoch": 90.38167938931298, "grad_norm": 0.37030789256095886, "learning_rate": 1.1654119135785964e-06, "loss": 0.072, "step": 23680 }, { "epoch": 90.41984732824427, "grad_norm": 0.43612009286880493, "learning_rate": 1.1618772209215795e-06, "loss": 0.0714, "step": 23690 }, { "epoch": 90.45801526717557, "grad_norm": 0.3412275016307831, "learning_rate": 1.1583471919456506e-06, "loss": 0.0713, "step": 23700 }, { "epoch": 90.49618320610686, "grad_norm": 0.24611587822437286, "learning_rate": 1.1548218309401267e-06, "loss": 0.075, "step": 23710 }, { "epoch": 90.53435114503817, "grad_norm": 0.4314908981323242, "learning_rate": 1.1513011421886554e-06, "loss": 0.0646, "step": 23720 }, { "epoch": 90.57251908396947, "grad_norm": 0.38820695877075195, "learning_rate": 1.1477851299692056e-06, "loss": 0.0712, "step": 23730 }, { "epoch": 90.61068702290076, "grad_norm": 0.28595876693725586, "learning_rate": 1.1442737985540631e-06, "loss": 0.0646, "step": 23740 }, { "epoch": 90.64885496183206, "grad_norm": 0.6347960233688354, "learning_rate": 1.1407671522098262e-06, "loss": 0.0855, "step": 23750 }, { "epoch": 90.68702290076335, "grad_norm": 0.19028426706790924, "learning_rate": 1.1372651951974001e-06, "loss": 0.0702, "step": 23760 }, { "epoch": 90.72519083969466, "grad_norm": 0.49181249737739563, "learning_rate": 1.1337679317719952e-06, "loss": 0.0758, "step": 23770 }, { "epoch": 90.76335877862596, "grad_norm": 0.3415592312812805, "learning_rate": 1.130275366183115e-06, "loss": 0.0721, "step": 23780 }, { "epoch": 90.80152671755725, "grad_norm": 0.3656644821166992, "learning_rate": 1.1267875026745562e-06, "loss": 0.0716, "step": 23790 }, { "epoch": 90.83969465648855, "grad_norm": 0.2646436393260956, "learning_rate": 1.1233043454844017e-06, "loss": 0.067, "step": 23800 }, { "epoch": 90.87786259541984, "grad_norm": 0.7572306394577026, "learning_rate": 1.1198258988450145e-06, "loss": 0.0698, "step": 23810 }, { "epoch": 90.91603053435115, "grad_norm": 0.42010462284088135, "learning_rate": 1.116352166983037e-06, "loss": 0.0675, "step": 23820 }, { "epoch": 90.95419847328245, "grad_norm": 0.26916852593421936, "learning_rate": 1.112883154119382e-06, "loss": 0.0651, "step": 23830 }, { "epoch": 90.99236641221374, "grad_norm": 0.30210235714912415, "learning_rate": 1.1094188644692255e-06, "loss": 0.0734, "step": 23840 }, { "epoch": 91.03053435114504, "grad_norm": 0.41560983657836914, "learning_rate": 1.1059593022420067e-06, "loss": 0.063, "step": 23850 }, { "epoch": 91.06870229007633, "grad_norm": 0.37110573053359985, "learning_rate": 1.1025044716414185e-06, "loss": 0.0748, "step": 23860 }, { "epoch": 91.10687022900764, "grad_norm": 0.5967647433280945, "learning_rate": 1.0990543768654084e-06, "loss": 0.0675, "step": 23870 }, { "epoch": 91.14503816793894, "grad_norm": 0.2824907600879669, "learning_rate": 1.0956090221061655e-06, "loss": 0.0612, "step": 23880 }, { "epoch": 91.18320610687023, "grad_norm": 0.536435604095459, "learning_rate": 1.0921684115501208e-06, "loss": 0.06, "step": 23890 }, { "epoch": 91.22137404580153, "grad_norm": 0.25722911953926086, "learning_rate": 1.0887325493779405e-06, "loss": 0.0671, "step": 23900 }, { "epoch": 91.25954198473282, "grad_norm": 0.28526943922042847, "learning_rate": 1.0853014397645205e-06, "loss": 0.0684, "step": 23910 }, { "epoch": 91.29770992366412, "grad_norm": 0.28562769293785095, "learning_rate": 1.0818750868789828e-06, "loss": 0.0637, "step": 23920 }, { "epoch": 91.33587786259542, "grad_norm": 0.3431946039199829, "learning_rate": 1.0784534948846704e-06, "loss": 0.0651, "step": 23930 }, { "epoch": 91.37404580152672, "grad_norm": 1.2695426940917969, "learning_rate": 1.0750366679391393e-06, "loss": 0.0718, "step": 23940 }, { "epoch": 91.41221374045801, "grad_norm": 0.27434664964675903, "learning_rate": 1.0716246101941558e-06, "loss": 0.0692, "step": 23950 }, { "epoch": 91.45038167938931, "grad_norm": 0.30581653118133545, "learning_rate": 1.0682173257956935e-06, "loss": 0.0685, "step": 23960 }, { "epoch": 91.4885496183206, "grad_norm": 0.2575470209121704, "learning_rate": 1.064814818883922e-06, "loss": 0.0675, "step": 23970 }, { "epoch": 91.52671755725191, "grad_norm": 0.31820449233055115, "learning_rate": 1.06141709359321e-06, "loss": 0.0752, "step": 23980 }, { "epoch": 91.56488549618321, "grad_norm": 0.5742216110229492, "learning_rate": 1.0580241540521142e-06, "loss": 0.0847, "step": 23990 }, { "epoch": 91.6030534351145, "grad_norm": 0.545124351978302, "learning_rate": 1.0546360043833747e-06, "loss": 0.0692, "step": 24000 }, { "epoch": 91.6412213740458, "grad_norm": 0.27798354625701904, "learning_rate": 1.0512526487039138e-06, "loss": 0.071, "step": 24010 }, { "epoch": 91.6793893129771, "grad_norm": 0.6156341433525085, "learning_rate": 1.0478740911248259e-06, "loss": 0.0694, "step": 24020 }, { "epoch": 91.7175572519084, "grad_norm": 0.318088173866272, "learning_rate": 1.0445003357513794e-06, "loss": 0.0695, "step": 24030 }, { "epoch": 91.7557251908397, "grad_norm": 0.28530198335647583, "learning_rate": 1.0411313866830042e-06, "loss": 0.0656, "step": 24040 }, { "epoch": 91.79389312977099, "grad_norm": 0.717268168926239, "learning_rate": 1.0377672480132917e-06, "loss": 0.0689, "step": 24050 }, { "epoch": 91.83206106870229, "grad_norm": 0.2844495475292206, "learning_rate": 1.0344079238299865e-06, "loss": 0.0672, "step": 24060 }, { "epoch": 91.87022900763358, "grad_norm": 0.8445849418640137, "learning_rate": 1.0310534182149835e-06, "loss": 0.074, "step": 24070 }, { "epoch": 91.90839694656489, "grad_norm": 0.6028487682342529, "learning_rate": 1.0277037352443258e-06, "loss": 0.0714, "step": 24080 }, { "epoch": 91.94656488549619, "grad_norm": 0.29483121633529663, "learning_rate": 1.0243588789881931e-06, "loss": 0.0693, "step": 24090 }, { "epoch": 91.98473282442748, "grad_norm": 0.2845158576965332, "learning_rate": 1.0210188535108995e-06, "loss": 0.0692, "step": 24100 }, { "epoch": 92.02290076335878, "grad_norm": 0.28095564246177673, "learning_rate": 1.0176836628708937e-06, "loss": 0.0705, "step": 24110 }, { "epoch": 92.06106870229007, "grad_norm": 0.6553061008453369, "learning_rate": 1.0143533111207455e-06, "loss": 0.0724, "step": 24120 }, { "epoch": 92.09923664122137, "grad_norm": 0.22242268919944763, "learning_rate": 1.0110278023071445e-06, "loss": 0.0688, "step": 24130 }, { "epoch": 92.13740458015268, "grad_norm": 0.4113728106021881, "learning_rate": 1.007707140470901e-06, "loss": 0.0726, "step": 24140 }, { "epoch": 92.17557251908397, "grad_norm": 0.22345533967018127, "learning_rate": 1.00439132964693e-06, "loss": 0.0668, "step": 24150 }, { "epoch": 92.21374045801527, "grad_norm": 0.7509258389472961, "learning_rate": 1.001080373864255e-06, "loss": 0.0783, "step": 24160 }, { "epoch": 92.25190839694656, "grad_norm": 0.3895869255065918, "learning_rate": 9.977742771459992e-07, "loss": 0.0698, "step": 24170 }, { "epoch": 92.29007633587786, "grad_norm": 0.360806941986084, "learning_rate": 9.944730435093803e-07, "loss": 0.0658, "step": 24180 }, { "epoch": 92.32824427480917, "grad_norm": 0.6112274527549744, "learning_rate": 9.911766769657116e-07, "loss": 0.0724, "step": 24190 }, { "epoch": 92.36641221374046, "grad_norm": 0.5387519598007202, "learning_rate": 9.878851815203883e-07, "loss": 0.0705, "step": 24200 }, { "epoch": 92.40458015267176, "grad_norm": 0.9864296913146973, "learning_rate": 9.845985611728864e-07, "loss": 0.0681, "step": 24210 }, { "epoch": 92.44274809160305, "grad_norm": 0.337456613779068, "learning_rate": 9.813168199167604e-07, "loss": 0.0694, "step": 24220 }, { "epoch": 92.48091603053435, "grad_norm": 0.3321627974510193, "learning_rate": 9.78039961739634e-07, "loss": 0.0657, "step": 24230 }, { "epoch": 92.51908396946565, "grad_norm": 0.5132209062576294, "learning_rate": 9.747679906232016e-07, "loss": 0.0679, "step": 24240 }, { "epoch": 92.55725190839695, "grad_norm": 0.503035843372345, "learning_rate": 9.71500910543214e-07, "loss": 0.0673, "step": 24250 }, { "epoch": 92.59541984732824, "grad_norm": 0.21332845091819763, "learning_rate": 9.682387254694835e-07, "loss": 0.073, "step": 24260 }, { "epoch": 92.63358778625954, "grad_norm": 0.32724636793136597, "learning_rate": 9.649814393658725e-07, "loss": 0.0662, "step": 24270 }, { "epoch": 92.67175572519083, "grad_norm": 0.607822597026825, "learning_rate": 9.61729056190288e-07, "loss": 0.0681, "step": 24280 }, { "epoch": 92.70992366412214, "grad_norm": 0.3636825978755951, "learning_rate": 9.584815798946862e-07, "loss": 0.072, "step": 24290 }, { "epoch": 92.74809160305344, "grad_norm": 0.22937163710594177, "learning_rate": 9.552390144250552e-07, "loss": 0.0665, "step": 24300 }, { "epoch": 92.78625954198473, "grad_norm": 0.2677508294582367, "learning_rate": 9.520013637214176e-07, "loss": 0.0675, "step": 24310 }, { "epoch": 92.82442748091603, "grad_norm": 0.2615829408168793, "learning_rate": 9.487686317178241e-07, "loss": 0.0653, "step": 24320 }, { "epoch": 92.86259541984732, "grad_norm": 0.2584886848926544, "learning_rate": 9.455408223423496e-07, "loss": 0.0643, "step": 24330 }, { "epoch": 92.90076335877862, "grad_norm": 0.24199534952640533, "learning_rate": 9.423179395170845e-07, "loss": 0.0702, "step": 24340 }, { "epoch": 92.93893129770993, "grad_norm": 0.3466721475124359, "learning_rate": 9.390999871581391e-07, "loss": 0.0767, "step": 24350 }, { "epoch": 92.97709923664122, "grad_norm": 0.5451226830482483, "learning_rate": 9.358869691756273e-07, "loss": 0.0705, "step": 24360 }, { "epoch": 93.01526717557252, "grad_norm": 0.5285539627075195, "learning_rate": 9.326788894736688e-07, "loss": 0.0692, "step": 24370 }, { "epoch": 93.05343511450381, "grad_norm": 0.5585796236991882, "learning_rate": 9.294757519503811e-07, "loss": 0.0677, "step": 24380 }, { "epoch": 93.09160305343511, "grad_norm": 0.31735959649086, "learning_rate": 9.262775604978819e-07, "loss": 0.073, "step": 24390 }, { "epoch": 93.12977099236642, "grad_norm": 0.2296765148639679, "learning_rate": 9.230843190022726e-07, "loss": 0.0698, "step": 24400 }, { "epoch": 93.16793893129771, "grad_norm": 0.5008897185325623, "learning_rate": 9.198960313436444e-07, "loss": 0.0874, "step": 24410 }, { "epoch": 93.20610687022901, "grad_norm": 0.4472108483314514, "learning_rate": 9.16712701396067e-07, "loss": 0.0697, "step": 24420 }, { "epoch": 93.2442748091603, "grad_norm": 0.2684642970561981, "learning_rate": 9.135343330275864e-07, "loss": 0.0654, "step": 24430 }, { "epoch": 93.2824427480916, "grad_norm": 0.4187399744987488, "learning_rate": 9.103609301002181e-07, "loss": 0.0692, "step": 24440 }, { "epoch": 93.3206106870229, "grad_norm": 0.6167700886726379, "learning_rate": 9.071924964699491e-07, "loss": 0.0724, "step": 24450 }, { "epoch": 93.3587786259542, "grad_norm": 0.34823620319366455, "learning_rate": 9.040290359867232e-07, "loss": 0.0674, "step": 24460 }, { "epoch": 93.3969465648855, "grad_norm": 0.27436694502830505, "learning_rate": 9.008705524944439e-07, "loss": 0.0692, "step": 24470 }, { "epoch": 93.43511450381679, "grad_norm": 0.2943238914012909, "learning_rate": 8.977170498309651e-07, "loss": 0.0635, "step": 24480 }, { "epoch": 93.47328244274809, "grad_norm": 0.6278515458106995, "learning_rate": 8.945685318280917e-07, "loss": 0.079, "step": 24490 }, { "epoch": 93.5114503816794, "grad_norm": 0.23879125714302063, "learning_rate": 8.914250023115672e-07, "loss": 0.0696, "step": 24500 }, { "epoch": 93.54961832061069, "grad_norm": 0.35707348585128784, "learning_rate": 8.882864651010798e-07, "loss": 0.0733, "step": 24510 }, { "epoch": 93.58778625954199, "grad_norm": 0.3247864246368408, "learning_rate": 8.851529240102464e-07, "loss": 0.0663, "step": 24520 }, { "epoch": 93.62595419847328, "grad_norm": 0.4654683768749237, "learning_rate": 8.820243828466135e-07, "loss": 0.072, "step": 24530 }, { "epoch": 93.66412213740458, "grad_norm": 0.504030704498291, "learning_rate": 8.789008454116566e-07, "loss": 0.0712, "step": 24540 }, { "epoch": 93.70229007633588, "grad_norm": 0.6368444561958313, "learning_rate": 8.757823155007655e-07, "loss": 0.0685, "step": 24550 }, { "epoch": 93.74045801526718, "grad_norm": 0.28386011719703674, "learning_rate": 8.72668796903251e-07, "loss": 0.0714, "step": 24560 }, { "epoch": 93.77862595419847, "grad_norm": 0.3366830348968506, "learning_rate": 8.6956029340233e-07, "loss": 0.0704, "step": 24570 }, { "epoch": 93.81679389312977, "grad_norm": 0.391422837972641, "learning_rate": 8.664568087751274e-07, "loss": 0.069, "step": 24580 }, { "epoch": 93.85496183206106, "grad_norm": 0.41178223490715027, "learning_rate": 8.633583467926698e-07, "loss": 0.0687, "step": 24590 }, { "epoch": 93.89312977099236, "grad_norm": 0.23154325783252716, "learning_rate": 8.602649112198796e-07, "loss": 0.0677, "step": 24600 }, { "epoch": 93.93129770992367, "grad_norm": 0.3986658751964569, "learning_rate": 8.571765058155745e-07, "loss": 0.0698, "step": 24610 }, { "epoch": 93.96946564885496, "grad_norm": 0.3332964777946472, "learning_rate": 8.540931343324582e-07, "loss": 0.0714, "step": 24620 }, { "epoch": 94.00763358778626, "grad_norm": 0.2473592758178711, "learning_rate": 8.510148005171171e-07, "loss": 0.0684, "step": 24630 }, { "epoch": 94.04580152671755, "grad_norm": 0.5109586715698242, "learning_rate": 8.479415081100167e-07, "loss": 0.0721, "step": 24640 }, { "epoch": 94.08396946564885, "grad_norm": 0.2871462404727936, "learning_rate": 8.448732608454968e-07, "loss": 0.065, "step": 24650 }, { "epoch": 94.12213740458016, "grad_norm": 0.27044838666915894, "learning_rate": 8.418100624517688e-07, "loss": 0.0678, "step": 24660 }, { "epoch": 94.16030534351145, "grad_norm": 0.2179548740386963, "learning_rate": 8.387519166509062e-07, "loss": 0.0654, "step": 24670 }, { "epoch": 94.19847328244275, "grad_norm": 0.2881402373313904, "learning_rate": 8.356988271588445e-07, "loss": 0.0738, "step": 24680 }, { "epoch": 94.23664122137404, "grad_norm": 0.23480482399463654, "learning_rate": 8.326507976853765e-07, "loss": 0.067, "step": 24690 }, { "epoch": 94.27480916030534, "grad_norm": 0.5004714131355286, "learning_rate": 8.296078319341444e-07, "loss": 0.0707, "step": 24700 }, { "epoch": 94.31297709923665, "grad_norm": 0.41305282711982727, "learning_rate": 8.265699336026384e-07, "loss": 0.0795, "step": 24710 }, { "epoch": 94.35114503816794, "grad_norm": 0.7068077325820923, "learning_rate": 8.235371063821923e-07, "loss": 0.0639, "step": 24720 }, { "epoch": 94.38931297709924, "grad_norm": 0.3450511693954468, "learning_rate": 8.205093539579768e-07, "loss": 0.0691, "step": 24730 }, { "epoch": 94.42748091603053, "grad_norm": 0.3504796624183655, "learning_rate": 8.174866800089964e-07, "loss": 0.0697, "step": 24740 }, { "epoch": 94.46564885496183, "grad_norm": 0.9486528635025024, "learning_rate": 8.144690882080853e-07, "loss": 0.0706, "step": 24750 }, { "epoch": 94.50381679389314, "grad_norm": 0.4473707973957062, "learning_rate": 8.114565822219006e-07, "loss": 0.0683, "step": 24760 }, { "epoch": 94.54198473282443, "grad_norm": 0.2520901560783386, "learning_rate": 8.084491657109233e-07, "loss": 0.0697, "step": 24770 }, { "epoch": 94.58015267175573, "grad_norm": 0.31963804364204407, "learning_rate": 8.054468423294476e-07, "loss": 0.072, "step": 24780 }, { "epoch": 94.61832061068702, "grad_norm": 0.6297641396522522, "learning_rate": 8.024496157255784e-07, "loss": 0.0716, "step": 24790 }, { "epoch": 94.65648854961832, "grad_norm": 0.4397490322589874, "learning_rate": 7.994574895412294e-07, "loss": 0.0687, "step": 24800 }, { "epoch": 94.69465648854961, "grad_norm": 0.3805553615093231, "learning_rate": 7.964704674121149e-07, "loss": 0.0654, "step": 24810 }, { "epoch": 94.73282442748092, "grad_norm": 0.3672832250595093, "learning_rate": 7.934885529677505e-07, "loss": 0.068, "step": 24820 }, { "epoch": 94.77099236641222, "grad_norm": 0.2700408399105072, "learning_rate": 7.905117498314413e-07, "loss": 0.0652, "step": 24830 }, { "epoch": 94.80916030534351, "grad_norm": 0.20384560525417328, "learning_rate": 7.875400616202861e-07, "loss": 0.074, "step": 24840 }, { "epoch": 94.8473282442748, "grad_norm": 0.27636826038360596, "learning_rate": 7.845734919451647e-07, "loss": 0.0737, "step": 24850 }, { "epoch": 94.8854961832061, "grad_norm": 0.9613207578659058, "learning_rate": 7.816120444107384e-07, "loss": 0.0823, "step": 24860 }, { "epoch": 94.92366412213741, "grad_norm": 0.3635960817337036, "learning_rate": 7.786557226154473e-07, "loss": 0.0747, "step": 24870 }, { "epoch": 94.9618320610687, "grad_norm": 0.276128888130188, "learning_rate": 7.757045301514998e-07, "loss": 0.0715, "step": 24880 }, { "epoch": 95.0, "grad_norm": 0.4147992432117462, "learning_rate": 7.727584706048735e-07, "loss": 0.0695, "step": 24890 }, { "epoch": 95.0381679389313, "grad_norm": 0.3812028467655182, "learning_rate": 7.698175475553076e-07, "loss": 0.0689, "step": 24900 }, { "epoch": 95.07633587786259, "grad_norm": 0.3648328483104706, "learning_rate": 7.668817645763021e-07, "loss": 0.073, "step": 24910 }, { "epoch": 95.1145038167939, "grad_norm": 0.4475592076778412, "learning_rate": 7.63951125235109e-07, "loss": 0.0769, "step": 24920 }, { "epoch": 95.1526717557252, "grad_norm": 0.3380667269229889, "learning_rate": 7.610256330927323e-07, "loss": 0.0676, "step": 24930 }, { "epoch": 95.19083969465649, "grad_norm": 0.8378775119781494, "learning_rate": 7.581052917039211e-07, "loss": 0.0733, "step": 24940 }, { "epoch": 95.22900763358778, "grad_norm": 0.21355819702148438, "learning_rate": 7.551901046171645e-07, "loss": 0.0696, "step": 24950 }, { "epoch": 95.26717557251908, "grad_norm": 0.41850680112838745, "learning_rate": 7.522800753746895e-07, "loss": 0.0662, "step": 24960 }, { "epoch": 95.30534351145039, "grad_norm": 0.3715193271636963, "learning_rate": 7.493752075124577e-07, "loss": 0.0705, "step": 24970 }, { "epoch": 95.34351145038168, "grad_norm": 0.6367314457893372, "learning_rate": 7.464755045601557e-07, "loss": 0.0645, "step": 24980 }, { "epoch": 95.38167938931298, "grad_norm": 0.2814936935901642, "learning_rate": 7.435809700411972e-07, "loss": 0.0774, "step": 24990 }, { "epoch": 95.41984732824427, "grad_norm": 0.22556699812412262, "learning_rate": 7.40691607472715e-07, "loss": 0.067, "step": 25000 }, { "epoch": 95.45801526717557, "grad_norm": 0.8220465183258057, "learning_rate": 7.378074203655561e-07, "loss": 0.0731, "step": 25010 }, { "epoch": 95.49618320610686, "grad_norm": 0.2574046552181244, "learning_rate": 7.349284122242783e-07, "loss": 0.0736, "step": 25020 }, { "epoch": 95.53435114503817, "grad_norm": 0.5456118583679199, "learning_rate": 7.320545865471513e-07, "loss": 0.0725, "step": 25030 }, { "epoch": 95.57251908396947, "grad_norm": 0.6138007044792175, "learning_rate": 7.291859468261426e-07, "loss": 0.0662, "step": 25040 }, { "epoch": 95.61068702290076, "grad_norm": 0.7047519683837891, "learning_rate": 7.263224965469195e-07, "loss": 0.0729, "step": 25050 }, { "epoch": 95.64885496183206, "grad_norm": 0.20071052014827728, "learning_rate": 7.234642391888446e-07, "loss": 0.0697, "step": 25060 }, { "epoch": 95.68702290076335, "grad_norm": 0.43756791949272156, "learning_rate": 7.206111782249698e-07, "loss": 0.0681, "step": 25070 }, { "epoch": 95.72519083969466, "grad_norm": 0.4008914530277252, "learning_rate": 7.177633171220339e-07, "loss": 0.0742, "step": 25080 }, { "epoch": 95.76335877862596, "grad_norm": 0.9968006014823914, "learning_rate": 7.149206593404562e-07, "loss": 0.0692, "step": 25090 }, { "epoch": 95.80152671755725, "grad_norm": 0.2601996958255768, "learning_rate": 7.120832083343337e-07, "loss": 0.0672, "step": 25100 }, { "epoch": 95.83969465648855, "grad_norm": 0.2922241687774658, "learning_rate": 7.092509675514369e-07, "loss": 0.0655, "step": 25110 }, { "epoch": 95.87786259541984, "grad_norm": 0.40154197812080383, "learning_rate": 7.064239404332063e-07, "loss": 0.0678, "step": 25120 }, { "epoch": 95.91603053435115, "grad_norm": 0.8301547169685364, "learning_rate": 7.03602130414745e-07, "loss": 0.0677, "step": 25130 }, { "epoch": 95.95419847328245, "grad_norm": 0.28282907605171204, "learning_rate": 7.007855409248199e-07, "loss": 0.066, "step": 25140 }, { "epoch": 95.99236641221374, "grad_norm": 0.278046578168869, "learning_rate": 6.979741753858521e-07, "loss": 0.072, "step": 25150 }, { "epoch": 96.03053435114504, "grad_norm": 0.22601468861103058, "learning_rate": 6.951680372139158e-07, "loss": 0.0701, "step": 25160 }, { "epoch": 96.06870229007633, "grad_norm": 0.3387024700641632, "learning_rate": 6.923671298187335e-07, "loss": 0.0757, "step": 25170 }, { "epoch": 96.10687022900764, "grad_norm": 0.26798492670059204, "learning_rate": 6.895714566036705e-07, "loss": 0.0624, "step": 25180 }, { "epoch": 96.14503816793894, "grad_norm": 0.2048361450433731, "learning_rate": 6.86781020965736e-07, "loss": 0.0675, "step": 25190 }, { "epoch": 96.18320610687023, "grad_norm": 0.4887497127056122, "learning_rate": 6.839958262955709e-07, "loss": 0.0733, "step": 25200 }, { "epoch": 96.22137404580153, "grad_norm": 0.37875017523765564, "learning_rate": 6.812158759774489e-07, "loss": 0.065, "step": 25210 }, { "epoch": 96.25954198473282, "grad_norm": 0.3595539629459381, "learning_rate": 6.784411733892732e-07, "loss": 0.0682, "step": 25220 }, { "epoch": 96.29770992366412, "grad_norm": 0.44329530000686646, "learning_rate": 6.756717219025666e-07, "loss": 0.0657, "step": 25230 }, { "epoch": 96.33587786259542, "grad_norm": 0.3791472613811493, "learning_rate": 6.729075248824762e-07, "loss": 0.0641, "step": 25240 }, { "epoch": 96.37404580152672, "grad_norm": 0.5714471936225891, "learning_rate": 6.701485856877615e-07, "loss": 0.0714, "step": 25250 }, { "epoch": 96.41221374045801, "grad_norm": 0.5484432578086853, "learning_rate": 6.673949076707925e-07, "loss": 0.0691, "step": 25260 }, { "epoch": 96.45038167938931, "grad_norm": 0.32033249735832214, "learning_rate": 6.646464941775499e-07, "loss": 0.0655, "step": 25270 }, { "epoch": 96.4885496183206, "grad_norm": 0.4129583537578583, "learning_rate": 6.619033485476129e-07, "loss": 0.0723, "step": 25280 }, { "epoch": 96.52671755725191, "grad_norm": 0.216600239276886, "learning_rate": 6.591654741141639e-07, "loss": 0.0672, "step": 25290 }, { "epoch": 96.56488549618321, "grad_norm": 0.29276999831199646, "learning_rate": 6.564328742039782e-07, "loss": 0.0637, "step": 25300 }, { "epoch": 96.6030534351145, "grad_norm": 0.20918267965316772, "learning_rate": 6.537055521374219e-07, "loss": 0.0667, "step": 25310 }, { "epoch": 96.6412213740458, "grad_norm": 0.5599342584609985, "learning_rate": 6.509835112284485e-07, "loss": 0.0699, "step": 25320 }, { "epoch": 96.6793893129771, "grad_norm": 0.253470242023468, "learning_rate": 6.482667547845944e-07, "loss": 0.069, "step": 25330 }, { "epoch": 96.7175572519084, "grad_norm": 0.2381214201450348, "learning_rate": 6.455552861069736e-07, "loss": 0.0646, "step": 25340 }, { "epoch": 96.7557251908397, "grad_norm": 0.3462165296077728, "learning_rate": 6.428491084902788e-07, "loss": 0.0656, "step": 25350 }, { "epoch": 96.79389312977099, "grad_norm": 0.2496246099472046, "learning_rate": 6.401482252227697e-07, "loss": 0.0689, "step": 25360 }, { "epoch": 96.83206106870229, "grad_norm": 0.4224330484867096, "learning_rate": 6.37452639586274e-07, "loss": 0.0723, "step": 25370 }, { "epoch": 96.87022900763358, "grad_norm": 0.22836147248744965, "learning_rate": 6.347623548561827e-07, "loss": 0.0675, "step": 25380 }, { "epoch": 96.90839694656489, "grad_norm": 0.33980873227119446, "learning_rate": 6.320773743014441e-07, "loss": 0.0696, "step": 25390 }, { "epoch": 96.94656488549619, "grad_norm": 0.5132980942726135, "learning_rate": 6.293977011845648e-07, "loss": 0.0704, "step": 25400 }, { "epoch": 96.98473282442748, "grad_norm": 0.2276553362607956, "learning_rate": 6.267233387615984e-07, "loss": 0.068, "step": 25410 }, { "epoch": 97.02290076335878, "grad_norm": 0.3203546106815338, "learning_rate": 6.2405429028215e-07, "loss": 0.077, "step": 25420 }, { "epoch": 97.06106870229007, "grad_norm": 0.5133867859840393, "learning_rate": 6.213905589893632e-07, "loss": 0.0696, "step": 25430 }, { "epoch": 97.09923664122137, "grad_norm": 0.810599684715271, "learning_rate": 6.187321481199221e-07, "loss": 0.07, "step": 25440 }, { "epoch": 97.13740458015268, "grad_norm": 0.36244019865989685, "learning_rate": 6.16079060904049e-07, "loss": 0.0705, "step": 25450 }, { "epoch": 97.17557251908397, "grad_norm": 0.383052259683609, "learning_rate": 6.134313005654929e-07, "loss": 0.0666, "step": 25460 }, { "epoch": 97.21374045801527, "grad_norm": 0.5795229077339172, "learning_rate": 6.107888703215337e-07, "loss": 0.0699, "step": 25470 }, { "epoch": 97.25190839694656, "grad_norm": 0.37245771288871765, "learning_rate": 6.081517733829723e-07, "loss": 0.0668, "step": 25480 }, { "epoch": 97.29007633587786, "grad_norm": 0.47016429901123047, "learning_rate": 6.055200129541294e-07, "loss": 0.0689, "step": 25490 }, { "epoch": 97.32824427480917, "grad_norm": 0.21799707412719727, "learning_rate": 6.028935922328444e-07, "loss": 0.0626, "step": 25500 }, { "epoch": 97.36641221374046, "grad_norm": 0.2931235432624817, "learning_rate": 6.002725144104649e-07, "loss": 0.0733, "step": 25510 }, { "epoch": 97.40458015267176, "grad_norm": 0.5701701641082764, "learning_rate": 5.976567826718476e-07, "loss": 0.0735, "step": 25520 }, { "epoch": 97.44274809160305, "grad_norm": 0.5489487648010254, "learning_rate": 5.950464001953532e-07, "loss": 0.0786, "step": 25530 }, { "epoch": 97.48091603053435, "grad_norm": 0.4438866376876831, "learning_rate": 5.924413701528415e-07, "loss": 0.0679, "step": 25540 }, { "epoch": 97.51908396946565, "grad_norm": 0.6267343163490295, "learning_rate": 5.898416957096704e-07, "loss": 0.0724, "step": 25550 }, { "epoch": 97.55725190839695, "grad_norm": 0.3363583981990814, "learning_rate": 5.872473800246914e-07, "loss": 0.0681, "step": 25560 }, { "epoch": 97.59541984732824, "grad_norm": 0.5211305022239685, "learning_rate": 5.846584262502403e-07, "loss": 0.0794, "step": 25570 }, { "epoch": 97.63358778625954, "grad_norm": 0.32496604323387146, "learning_rate": 5.820748375321411e-07, "loss": 0.0661, "step": 25580 }, { "epoch": 97.67175572519083, "grad_norm": 0.29008033871650696, "learning_rate": 5.794966170096977e-07, "loss": 0.0665, "step": 25590 }, { "epoch": 97.70992366412214, "grad_norm": 0.4152953624725342, "learning_rate": 5.769237678156897e-07, "loss": 0.0687, "step": 25600 }, { "epoch": 97.74809160305344, "grad_norm": 0.33877304196357727, "learning_rate": 5.743562930763735e-07, "loss": 0.067, "step": 25610 }, { "epoch": 97.78625954198473, "grad_norm": 0.2797228693962097, "learning_rate": 5.717941959114726e-07, "loss": 0.076, "step": 25620 }, { "epoch": 97.82442748091603, "grad_norm": 0.3312159776687622, "learning_rate": 5.69237479434176e-07, "loss": 0.0684, "step": 25630 }, { "epoch": 97.86259541984732, "grad_norm": 0.36744144558906555, "learning_rate": 5.666861467511353e-07, "loss": 0.0686, "step": 25640 }, { "epoch": 97.90076335877862, "grad_norm": 0.40221524238586426, "learning_rate": 5.641402009624591e-07, "loss": 0.0697, "step": 25650 }, { "epoch": 97.93893129770993, "grad_norm": 0.8071361184120178, "learning_rate": 5.615996451617145e-07, "loss": 0.0737, "step": 25660 }, { "epoch": 97.97709923664122, "grad_norm": 0.2889866232872009, "learning_rate": 5.590644824359148e-07, "loss": 0.0682, "step": 25670 }, { "epoch": 98.01526717557252, "grad_norm": 0.27220702171325684, "learning_rate": 5.56534715865521e-07, "loss": 0.066, "step": 25680 }, { "epoch": 98.05343511450381, "grad_norm": 0.2493731528520584, "learning_rate": 5.540103485244397e-07, "loss": 0.062, "step": 25690 }, { "epoch": 98.09160305343511, "grad_norm": 0.33816832304000854, "learning_rate": 5.514913834800134e-07, "loss": 0.0676, "step": 25700 }, { "epoch": 98.12977099236642, "grad_norm": 0.7667673230171204, "learning_rate": 5.489778237930238e-07, "loss": 0.0755, "step": 25710 }, { "epoch": 98.16793893129771, "grad_norm": 0.35118091106414795, "learning_rate": 5.464696725176827e-07, "loss": 0.0683, "step": 25720 }, { "epoch": 98.20610687022901, "grad_norm": 0.28815001249313354, "learning_rate": 5.439669327016295e-07, "loss": 0.0702, "step": 25730 }, { "epoch": 98.2442748091603, "grad_norm": 0.5837283134460449, "learning_rate": 5.414696073859299e-07, "loss": 0.0691, "step": 25740 }, { "epoch": 98.2824427480916, "grad_norm": 0.37461745738983154, "learning_rate": 5.389776996050695e-07, "loss": 0.0708, "step": 25750 }, { "epoch": 98.3206106870229, "grad_norm": 0.23216131329536438, "learning_rate": 5.364912123869493e-07, "loss": 0.0671, "step": 25760 }, { "epoch": 98.3587786259542, "grad_norm": 0.3143027424812317, "learning_rate": 5.340101487528887e-07, "loss": 0.0693, "step": 25770 }, { "epoch": 98.3969465648855, "grad_norm": 0.35901618003845215, "learning_rate": 5.315345117176129e-07, "loss": 0.0713, "step": 25780 }, { "epoch": 98.43511450381679, "grad_norm": 0.22512131929397583, "learning_rate": 5.290643042892541e-07, "loss": 0.0714, "step": 25790 }, { "epoch": 98.47328244274809, "grad_norm": 0.2000698745250702, "learning_rate": 5.265995294693472e-07, "loss": 0.0681, "step": 25800 }, { "epoch": 98.5114503816794, "grad_norm": 1.0218034982681274, "learning_rate": 5.241401902528253e-07, "loss": 0.0682, "step": 25810 }, { "epoch": 98.54961832061069, "grad_norm": 0.3218908905982971, "learning_rate": 5.21686289628019e-07, "loss": 0.066, "step": 25820 }, { "epoch": 98.58778625954199, "grad_norm": 0.4360622465610504, "learning_rate": 5.192378305766472e-07, "loss": 0.0684, "step": 25830 }, { "epoch": 98.62595419847328, "grad_norm": 0.2918654680252075, "learning_rate": 5.167948160738206e-07, "loss": 0.0634, "step": 25840 }, { "epoch": 98.66412213740458, "grad_norm": 0.5152965188026428, "learning_rate": 5.143572490880311e-07, "loss": 0.0678, "step": 25850 }, { "epoch": 98.70229007633588, "grad_norm": 1.0082851648330688, "learning_rate": 5.119251325811513e-07, "loss": 0.066, "step": 25860 }, { "epoch": 98.74045801526718, "grad_norm": 0.28817683458328247, "learning_rate": 5.094984695084348e-07, "loss": 0.0629, "step": 25870 }, { "epoch": 98.77862595419847, "grad_norm": 0.5672718286514282, "learning_rate": 5.070772628185039e-07, "loss": 0.0688, "step": 25880 }, { "epoch": 98.81679389312977, "grad_norm": 0.7285252213478088, "learning_rate": 5.046615154533535e-07, "loss": 0.0681, "step": 25890 }, { "epoch": 98.85496183206106, "grad_norm": 0.4292791783809662, "learning_rate": 5.022512303483451e-07, "loss": 0.0658, "step": 25900 }, { "epoch": 98.89312977099236, "grad_norm": 0.20751267671585083, "learning_rate": 4.998464104322015e-07, "loss": 0.0703, "step": 25910 }, { "epoch": 98.93129770992367, "grad_norm": 0.2615070939064026, "learning_rate": 4.974470586270047e-07, "loss": 0.0651, "step": 25920 }, { "epoch": 98.96946564885496, "grad_norm": 0.2248445302248001, "learning_rate": 4.950531778481965e-07, "loss": 0.0672, "step": 25930 }, { "epoch": 99.00763358778626, "grad_norm": 0.4531436264514923, "learning_rate": 4.926647710045652e-07, "loss": 0.0625, "step": 25940 }, { "epoch": 99.04580152671755, "grad_norm": 0.4915529787540436, "learning_rate": 4.902818409982513e-07, "loss": 0.066, "step": 25950 }, { "epoch": 99.08396946564885, "grad_norm": 0.28831353783607483, "learning_rate": 4.879043907247383e-07, "loss": 0.0672, "step": 25960 }, { "epoch": 99.12213740458016, "grad_norm": 0.2686160206794739, "learning_rate": 4.855324230728542e-07, "loss": 0.0696, "step": 25970 }, { "epoch": 99.16030534351145, "grad_norm": 0.3270827829837799, "learning_rate": 4.831659409247619e-07, "loss": 0.0645, "step": 25980 }, { "epoch": 99.19847328244275, "grad_norm": 0.25841906666755676, "learning_rate": 4.808049471559617e-07, "loss": 0.0696, "step": 25990 }, { "epoch": 99.23664122137404, "grad_norm": 0.4139740467071533, "learning_rate": 4.784494446352833e-07, "loss": 0.0721, "step": 26000 }, { "epoch": 99.27480916030534, "grad_norm": 0.6838223338127136, "learning_rate": 4.7609943622488333e-07, "loss": 0.0711, "step": 26010 }, { "epoch": 99.31297709923665, "grad_norm": 0.5432331562042236, "learning_rate": 4.737549247802442e-07, "loss": 0.0803, "step": 26020 }, { "epoch": 99.35114503816794, "grad_norm": 0.7913231253623962, "learning_rate": 4.714159131501689e-07, "loss": 0.0696, "step": 26030 }, { "epoch": 99.38931297709924, "grad_norm": 0.5764281749725342, "learning_rate": 4.690824041767766e-07, "loss": 0.0753, "step": 26040 }, { "epoch": 99.42748091603053, "grad_norm": 0.793904185295105, "learning_rate": 4.66754400695501e-07, "loss": 0.0798, "step": 26050 }, { "epoch": 99.46564885496183, "grad_norm": 0.38731345534324646, "learning_rate": 4.6443190553508597e-07, "loss": 0.0695, "step": 26060 }, { "epoch": 99.50381679389314, "grad_norm": 0.3625410199165344, "learning_rate": 4.6211492151758076e-07, "loss": 0.0681, "step": 26070 }, { "epoch": 99.54198473282443, "grad_norm": 0.47990864515304565, "learning_rate": 4.598034514583416e-07, "loss": 0.0696, "step": 26080 }, { "epoch": 99.58015267175573, "grad_norm": 0.2346879243850708, "learning_rate": 4.574974981660213e-07, "loss": 0.0675, "step": 26090 }, { "epoch": 99.61832061068702, "grad_norm": 0.29999473690986633, "learning_rate": 4.5519706444257073e-07, "loss": 0.0679, "step": 26100 }, { "epoch": 99.65648854961832, "grad_norm": 0.516541063785553, "learning_rate": 4.529021530832328e-07, "loss": 0.0648, "step": 26110 }, { "epoch": 99.69465648854961, "grad_norm": 0.32835254073143005, "learning_rate": 4.506127668765431e-07, "loss": 0.0666, "step": 26120 }, { "epoch": 99.73282442748092, "grad_norm": 0.3316701054573059, "learning_rate": 4.483289086043197e-07, "loss": 0.0686, "step": 26130 }, { "epoch": 99.77099236641222, "grad_norm": 0.27133581042289734, "learning_rate": 4.460505810416682e-07, "loss": 0.0713, "step": 26140 }, { "epoch": 99.80916030534351, "grad_norm": 0.6455039978027344, "learning_rate": 4.437777869569698e-07, "loss": 0.0716, "step": 26150 }, { "epoch": 99.8473282442748, "grad_norm": 0.3414127826690674, "learning_rate": 4.415105291118843e-07, "loss": 0.0654, "step": 26160 }, { "epoch": 99.8854961832061, "grad_norm": 0.28300490975379944, "learning_rate": 4.392488102613435e-07, "loss": 0.0684, "step": 26170 }, { "epoch": 99.92366412213741, "grad_norm": 0.3191199004650116, "learning_rate": 4.3699263315354735e-07, "loss": 0.0706, "step": 26180 }, { "epoch": 99.9618320610687, "grad_norm": 0.25766992568969727, "learning_rate": 4.3474200052996685e-07, "loss": 0.0659, "step": 26190 }, { "epoch": 100.0, "grad_norm": 0.3972707986831665, "learning_rate": 4.324969151253317e-07, "loss": 0.0705, "step": 26200 }, { "epoch": 100.0381679389313, "grad_norm": 0.21596112847328186, "learning_rate": 4.302573796676313e-07, "loss": 0.0675, "step": 26210 }, { "epoch": 100.07633587786259, "grad_norm": 0.3397051692008972, "learning_rate": 4.280233968781139e-07, "loss": 0.0658, "step": 26220 }, { "epoch": 100.1145038167939, "grad_norm": 0.32072919607162476, "learning_rate": 4.257949694712771e-07, "loss": 0.0702, "step": 26230 }, { "epoch": 100.1526717557252, "grad_norm": 0.2912977337837219, "learning_rate": 4.235721001548726e-07, "loss": 0.0697, "step": 26240 }, { "epoch": 100.19083969465649, "grad_norm": 0.3459882140159607, "learning_rate": 4.21354791629896e-07, "loss": 0.069, "step": 26250 }, { "epoch": 100.22900763358778, "grad_norm": 0.19958975911140442, "learning_rate": 4.1914304659058437e-07, "loss": 0.0626, "step": 26260 }, { "epoch": 100.26717557251908, "grad_norm": 0.35708945989608765, "learning_rate": 4.1693686772441846e-07, "loss": 0.0687, "step": 26270 }, { "epoch": 100.30534351145039, "grad_norm": 0.7189772129058838, "learning_rate": 4.1473625771211224e-07, "loss": 0.0748, "step": 26280 }, { "epoch": 100.34351145038168, "grad_norm": 0.21925044059753418, "learning_rate": 4.125412192276157e-07, "loss": 0.0681, "step": 26290 }, { "epoch": 100.38167938931298, "grad_norm": 0.3142671585083008, "learning_rate": 4.1035175493810696e-07, "loss": 0.0725, "step": 26300 }, { "epoch": 100.41984732824427, "grad_norm": 0.5476031303405762, "learning_rate": 4.081678675039913e-07, "loss": 0.0667, "step": 26310 }, { "epoch": 100.45801526717557, "grad_norm": 0.2075609415769577, "learning_rate": 4.059895595788987e-07, "loss": 0.0669, "step": 26320 }, { "epoch": 100.49618320610686, "grad_norm": 0.3402668237686157, "learning_rate": 4.038168338096776e-07, "loss": 0.0715, "step": 26330 }, { "epoch": 100.53435114503817, "grad_norm": 0.2339000403881073, "learning_rate": 4.016496928363944e-07, "loss": 0.0724, "step": 26340 }, { "epoch": 100.57251908396947, "grad_norm": 0.435660183429718, "learning_rate": 3.994881392923317e-07, "loss": 0.072, "step": 26350 }, { "epoch": 100.61068702290076, "grad_norm": 0.24794331192970276, "learning_rate": 3.973321758039794e-07, "loss": 0.0633, "step": 26360 }, { "epoch": 100.64885496183206, "grad_norm": 0.2800830602645874, "learning_rate": 3.9518180499103665e-07, "loss": 0.0644, "step": 26370 }, { "epoch": 100.68702290076335, "grad_norm": 0.25040721893310547, "learning_rate": 3.930370294664071e-07, "loss": 0.0698, "step": 26380 }, { "epoch": 100.72519083969466, "grad_norm": 0.2757255733013153, "learning_rate": 3.9089785183619386e-07, "loss": 0.0644, "step": 26390 }, { "epoch": 100.76335877862596, "grad_norm": 0.6161481738090515, "learning_rate": 3.8876427469970167e-07, "loss": 0.0693, "step": 26400 }, { "epoch": 100.80152671755725, "grad_norm": 0.24093779921531677, "learning_rate": 3.866363006494256e-07, "loss": 0.0653, "step": 26410 }, { "epoch": 100.83969465648855, "grad_norm": 0.27081120014190674, "learning_rate": 3.845139322710573e-07, "loss": 0.0662, "step": 26420 }, { "epoch": 100.87786259541984, "grad_norm": 0.2640981376171112, "learning_rate": 3.823971721434727e-07, "loss": 0.0657, "step": 26430 }, { "epoch": 100.91603053435115, "grad_norm": 1.1142594814300537, "learning_rate": 3.8028602283873504e-07, "loss": 0.0662, "step": 26440 }, { "epoch": 100.95419847328245, "grad_norm": 0.3841104209423065, "learning_rate": 3.781804869220912e-07, "loss": 0.07, "step": 26450 }, { "epoch": 100.99236641221374, "grad_norm": 0.23676466941833496, "learning_rate": 3.760805669519646e-07, "loss": 0.0676, "step": 26460 }, { "epoch": 101.03053435114504, "grad_norm": 0.7228853702545166, "learning_rate": 3.7398626547995585e-07, "loss": 0.0881, "step": 26470 }, { "epoch": 101.06870229007633, "grad_norm": 0.665624737739563, "learning_rate": 3.7189758505083994e-07, "loss": 0.0639, "step": 26480 }, { "epoch": 101.10687022900764, "grad_norm": 0.30101853609085083, "learning_rate": 3.6981452820255836e-07, "loss": 0.0621, "step": 26490 }, { "epoch": 101.14503816793894, "grad_norm": 0.36952438950538635, "learning_rate": 3.6773709746622365e-07, "loss": 0.0672, "step": 26500 }, { "epoch": 101.18320610687023, "grad_norm": 0.528901219367981, "learning_rate": 3.6566529536611e-07, "loss": 0.07, "step": 26510 }, { "epoch": 101.22137404580153, "grad_norm": 0.3763343393802643, "learning_rate": 3.635991244196513e-07, "loss": 0.0788, "step": 26520 }, { "epoch": 101.25954198473282, "grad_norm": 0.33685335516929626, "learning_rate": 3.615385871374405e-07, "loss": 0.0696, "step": 26530 }, { "epoch": 101.29770992366412, "grad_norm": 0.2437128871679306, "learning_rate": 3.594836860232237e-07, "loss": 0.0682, "step": 26540 }, { "epoch": 101.33587786259542, "grad_norm": 0.2727994918823242, "learning_rate": 3.574344235739019e-07, "loss": 0.0698, "step": 26550 }, { "epoch": 101.37404580152672, "grad_norm": 0.6007474064826965, "learning_rate": 3.553908022795194e-07, "loss": 0.0701, "step": 26560 }, { "epoch": 101.41221374045801, "grad_norm": 0.23269033432006836, "learning_rate": 3.5335282462327093e-07, "loss": 0.0656, "step": 26570 }, { "epoch": 101.45038167938931, "grad_norm": 0.3645854592323303, "learning_rate": 3.513204930814912e-07, "loss": 0.068, "step": 26580 }, { "epoch": 101.4885496183206, "grad_norm": 0.19710974395275116, "learning_rate": 3.4929381012365424e-07, "loss": 0.0682, "step": 26590 }, { "epoch": 101.52671755725191, "grad_norm": 0.5920630097389221, "learning_rate": 3.472727782123697e-07, "loss": 0.0692, "step": 26600 }, { "epoch": 101.56488549618321, "grad_norm": 0.7211366891860962, "learning_rate": 3.452573998033842e-07, "loss": 0.0689, "step": 26610 }, { "epoch": 101.6030534351145, "grad_norm": 0.2851048409938812, "learning_rate": 3.432476773455712e-07, "loss": 0.0753, "step": 26620 }, { "epoch": 101.6412213740458, "grad_norm": 0.38267195224761963, "learning_rate": 3.412436132809338e-07, "loss": 0.068, "step": 26630 }, { "epoch": 101.6793893129771, "grad_norm": 0.5825901031494141, "learning_rate": 3.392452100445975e-07, "loss": 0.0682, "step": 26640 }, { "epoch": 101.7175572519084, "grad_norm": 0.37534067034721375, "learning_rate": 3.3725247006481167e-07, "loss": 0.0746, "step": 26650 }, { "epoch": 101.7557251908397, "grad_norm": 0.3646638095378876, "learning_rate": 3.3526539576294315e-07, "loss": 0.0671, "step": 26660 }, { "epoch": 101.79389312977099, "grad_norm": 0.24642281234264374, "learning_rate": 3.332839895534745e-07, "loss": 0.0706, "step": 26670 }, { "epoch": 101.83206106870229, "grad_norm": 0.45950332283973694, "learning_rate": 3.3130825384400156e-07, "loss": 0.0662, "step": 26680 }, { "epoch": 101.87022900763358, "grad_norm": 0.3571898639202118, "learning_rate": 3.293381910352278e-07, "loss": 0.0646, "step": 26690 }, { "epoch": 101.90839694656489, "grad_norm": 0.2520214021205902, "learning_rate": 3.273738035209678e-07, "loss": 0.0577, "step": 26700 }, { "epoch": 101.94656488549619, "grad_norm": 0.28303033113479614, "learning_rate": 3.254150936881356e-07, "loss": 0.0689, "step": 26710 }, { "epoch": 101.98473282442748, "grad_norm": 0.3684123754501343, "learning_rate": 3.234620639167496e-07, "loss": 0.0655, "step": 26720 }, { "epoch": 102.02290076335878, "grad_norm": 0.5102269053459167, "learning_rate": 3.2151471657992485e-07, "loss": 0.0746, "step": 26730 }, { "epoch": 102.06106870229007, "grad_norm": 0.24563086032867432, "learning_rate": 3.1957305404387187e-07, "loss": 0.0726, "step": 26740 }, { "epoch": 102.09923664122137, "grad_norm": 0.37572887539863586, "learning_rate": 3.176370786678934e-07, "loss": 0.0636, "step": 26750 }, { "epoch": 102.13740458015268, "grad_norm": 0.2450253665447235, "learning_rate": 3.1570679280438163e-07, "loss": 0.0726, "step": 26760 }, { "epoch": 102.17557251908397, "grad_norm": 0.48279571533203125, "learning_rate": 3.1378219879881764e-07, "loss": 0.0705, "step": 26770 }, { "epoch": 102.21374045801527, "grad_norm": 0.39897432923316956, "learning_rate": 3.1186329898976296e-07, "loss": 0.0731, "step": 26780 }, { "epoch": 102.25190839694656, "grad_norm": 0.23803192377090454, "learning_rate": 3.0995009570886305e-07, "loss": 0.0779, "step": 26790 }, { "epoch": 102.29007633587786, "grad_norm": 0.6093907356262207, "learning_rate": 3.0804259128083936e-07, "loss": 0.0667, "step": 26800 }, { "epoch": 102.32824427480917, "grad_norm": 0.275802880525589, "learning_rate": 3.0614078802348903e-07, "loss": 0.0642, "step": 26810 }, { "epoch": 102.36641221374046, "grad_norm": 0.2756710648536682, "learning_rate": 3.042446882476846e-07, "loss": 0.0668, "step": 26820 }, { "epoch": 102.40458015267176, "grad_norm": 0.2288149744272232, "learning_rate": 3.023542942573643e-07, "loss": 0.0716, "step": 26830 }, { "epoch": 102.44274809160305, "grad_norm": 0.2968932092189789, "learning_rate": 3.004696083495351e-07, "loss": 0.0671, "step": 26840 }, { "epoch": 102.48091603053435, "grad_norm": 0.27723342180252075, "learning_rate": 2.98590632814269e-07, "loss": 0.0673, "step": 26850 }, { "epoch": 102.51908396946565, "grad_norm": 0.36572501063346863, "learning_rate": 2.96717369934697e-07, "loss": 0.0679, "step": 26860 }, { "epoch": 102.55725190839695, "grad_norm": 0.19056031107902527, "learning_rate": 2.948498219870122e-07, "loss": 0.067, "step": 26870 }, { "epoch": 102.59541984732824, "grad_norm": 0.220974862575531, "learning_rate": 2.929879912404604e-07, "loss": 0.067, "step": 26880 }, { "epoch": 102.63358778625954, "grad_norm": 0.2869727909564972, "learning_rate": 2.911318799573415e-07, "loss": 0.0699, "step": 26890 }, { "epoch": 102.67175572519083, "grad_norm": 1.0001291036605835, "learning_rate": 2.8928149039300525e-07, "loss": 0.0713, "step": 26900 }, { "epoch": 102.70992366412214, "grad_norm": 0.3044949173927307, "learning_rate": 2.8743682479584977e-07, "loss": 0.0816, "step": 26910 }, { "epoch": 102.74809160305344, "grad_norm": 0.2789320945739746, "learning_rate": 2.8559788540731826e-07, "loss": 0.0637, "step": 26920 }, { "epoch": 102.78625954198473, "grad_norm": 0.7257505655288696, "learning_rate": 2.837646744618949e-07, "loss": 0.0823, "step": 26930 }, { "epoch": 102.82442748091603, "grad_norm": 0.3718293309211731, "learning_rate": 2.8193719418710405e-07, "loss": 0.0703, "step": 26940 }, { "epoch": 102.86259541984732, "grad_norm": 0.32060879468917847, "learning_rate": 2.8011544680350667e-07, "loss": 0.0709, "step": 26950 }, { "epoch": 102.90076335877862, "grad_norm": 0.6443219780921936, "learning_rate": 2.7829943452469753e-07, "loss": 0.0755, "step": 26960 }, { "epoch": 102.93893129770993, "grad_norm": 0.5253495573997498, "learning_rate": 2.764891595573022e-07, "loss": 0.0703, "step": 26970 }, { "epoch": 102.97709923664122, "grad_norm": 0.3999308943748474, "learning_rate": 2.746846241009765e-07, "loss": 0.0673, "step": 26980 }, { "epoch": 103.01526717557252, "grad_norm": 0.2527059018611908, "learning_rate": 2.7288583034839944e-07, "loss": 0.0687, "step": 26990 }, { "epoch": 103.05343511450381, "grad_norm": 0.296678751707077, "learning_rate": 2.7109278048527756e-07, "loss": 0.0654, "step": 27000 }, { "epoch": 103.09160305343511, "grad_norm": 0.7945778965950012, "learning_rate": 2.6930547669033415e-07, "loss": 0.0737, "step": 27010 }, { "epoch": 103.12977099236642, "grad_norm": 0.32029277086257935, "learning_rate": 2.675239211353109e-07, "loss": 0.0693, "step": 27020 }, { "epoch": 103.16793893129771, "grad_norm": 0.2556557357311249, "learning_rate": 2.6574811598496787e-07, "loss": 0.0755, "step": 27030 }, { "epoch": 103.20610687022901, "grad_norm": 0.23679769039154053, "learning_rate": 2.6397806339707457e-07, "loss": 0.0756, "step": 27040 }, { "epoch": 103.2442748091603, "grad_norm": 0.4383762776851654, "learning_rate": 2.622137655224122e-07, "loss": 0.0758, "step": 27050 }, { "epoch": 103.2824427480916, "grad_norm": 0.5516987442970276, "learning_rate": 2.6045522450476814e-07, "loss": 0.0728, "step": 27060 }, { "epoch": 103.3206106870229, "grad_norm": 0.4582926630973816, "learning_rate": 2.587024424809359e-07, "loss": 0.0779, "step": 27070 }, { "epoch": 103.3587786259542, "grad_norm": 0.5267468094825745, "learning_rate": 2.5695542158071187e-07, "loss": 0.0677, "step": 27080 }, { "epoch": 103.3969465648855, "grad_norm": 0.3101442754268646, "learning_rate": 2.5521416392689066e-07, "loss": 0.0666, "step": 27090 }, { "epoch": 103.43511450381679, "grad_norm": 0.22319281101226807, "learning_rate": 2.5347867163526387e-07, "loss": 0.0672, "step": 27100 }, { "epoch": 103.47328244274809, "grad_norm": 0.2605898678302765, "learning_rate": 2.517489468146189e-07, "loss": 0.0673, "step": 27110 }, { "epoch": 103.5114503816794, "grad_norm": 0.2470434010028839, "learning_rate": 2.500249915667341e-07, "loss": 0.0683, "step": 27120 }, { "epoch": 103.54961832061069, "grad_norm": 0.18632711470127106, "learning_rate": 2.4830680798637817e-07, "loss": 0.0679, "step": 27130 }, { "epoch": 103.58778625954199, "grad_norm": 0.2968946695327759, "learning_rate": 2.4659439816130557e-07, "loss": 0.0667, "step": 27140 }, { "epoch": 103.62595419847328, "grad_norm": 0.3109892010688782, "learning_rate": 2.448877641722569e-07, "loss": 0.0656, "step": 27150 }, { "epoch": 103.66412213740458, "grad_norm": 0.6580647826194763, "learning_rate": 2.431869080929522e-07, "loss": 0.0691, "step": 27160 }, { "epoch": 103.70229007633588, "grad_norm": 0.44333863258361816, "learning_rate": 2.414918319900922e-07, "loss": 0.0796, "step": 27170 }, { "epoch": 103.74045801526718, "grad_norm": 0.201785147190094, "learning_rate": 2.398025379233543e-07, "loss": 0.0642, "step": 27180 }, { "epoch": 103.77862595419847, "grad_norm": 0.3678027391433716, "learning_rate": 2.381190279453899e-07, "loss": 0.0824, "step": 27190 }, { "epoch": 103.81679389312977, "grad_norm": 0.3141831159591675, "learning_rate": 2.364413041018232e-07, "loss": 0.0694, "step": 27200 }, { "epoch": 103.85496183206106, "grad_norm": 0.35555902123451233, "learning_rate": 2.3476936843124633e-07, "loss": 0.0665, "step": 27210 }, { "epoch": 103.89312977099236, "grad_norm": 0.2217165231704712, "learning_rate": 2.3310322296521859e-07, "loss": 0.0652, "step": 27220 }, { "epoch": 103.93129770992367, "grad_norm": 0.36410123109817505, "learning_rate": 2.314428697282628e-07, "loss": 0.0719, "step": 27230 }, { "epoch": 103.96946564885496, "grad_norm": 0.7411630749702454, "learning_rate": 2.2978831073786735e-07, "loss": 0.0803, "step": 27240 }, { "epoch": 104.00763358778626, "grad_norm": 0.2194436937570572, "learning_rate": 2.2813954800447512e-07, "loss": 0.0631, "step": 27250 }, { "epoch": 104.04580152671755, "grad_norm": 0.3712310194969177, "learning_rate": 2.2649658353148974e-07, "loss": 0.0655, "step": 27260 }, { "epoch": 104.08396946564885, "grad_norm": 0.2021886706352234, "learning_rate": 2.2485941931526646e-07, "loss": 0.0663, "step": 27270 }, { "epoch": 104.12213740458016, "grad_norm": 0.5154406428337097, "learning_rate": 2.232280573451151e-07, "loss": 0.0712, "step": 27280 }, { "epoch": 104.16030534351145, "grad_norm": 0.28259921073913574, "learning_rate": 2.216024996032945e-07, "loss": 0.0654, "step": 27290 }, { "epoch": 104.19847328244275, "grad_norm": 0.23785048723220825, "learning_rate": 2.1998274806501074e-07, "loss": 0.0725, "step": 27300 }, { "epoch": 104.23664122137404, "grad_norm": 0.3378708064556122, "learning_rate": 2.1836880469841391e-07, "loss": 0.071, "step": 27310 }, { "epoch": 104.27480916030534, "grad_norm": 0.4880586266517639, "learning_rate": 2.1676067146459812e-07, "loss": 0.072, "step": 27320 }, { "epoch": 104.31297709923665, "grad_norm": 0.4210817217826843, "learning_rate": 2.151583503175958e-07, "loss": 0.0669, "step": 27330 }, { "epoch": 104.35114503816794, "grad_norm": 0.5876713991165161, "learning_rate": 2.1356184320437955e-07, "loss": 0.0688, "step": 27340 }, { "epoch": 104.38931297709924, "grad_norm": 0.3950154483318329, "learning_rate": 2.1197115206485542e-07, "loss": 0.0646, "step": 27350 }, { "epoch": 104.42748091603053, "grad_norm": 0.5265299081802368, "learning_rate": 2.103862788318628e-07, "loss": 0.0676, "step": 27360 }, { "epoch": 104.46564885496183, "grad_norm": 0.4005798399448395, "learning_rate": 2.0880722543117293e-07, "loss": 0.0629, "step": 27370 }, { "epoch": 104.50381679389314, "grad_norm": 0.8654969930648804, "learning_rate": 2.0723399378148434e-07, "loss": 0.0687, "step": 27380 }, { "epoch": 104.54198473282443, "grad_norm": 0.2732725739479065, "learning_rate": 2.0566658579442067e-07, "loss": 0.0669, "step": 27390 }, { "epoch": 104.58015267175573, "grad_norm": 0.5799111723899841, "learning_rate": 2.0410500337453176e-07, "loss": 0.0705, "step": 27400 }, { "epoch": 104.61832061068702, "grad_norm": 0.28128835558891296, "learning_rate": 2.0254924841928647e-07, "loss": 0.0654, "step": 27410 }, { "epoch": 104.65648854961832, "grad_norm": 0.41618332266807556, "learning_rate": 2.0099932281907542e-07, "loss": 0.0699, "step": 27420 }, { "epoch": 104.69465648854961, "grad_norm": 0.5150026679039001, "learning_rate": 1.9945522845720323e-07, "loss": 0.0704, "step": 27430 }, { "epoch": 104.73282442748092, "grad_norm": 0.2852458953857422, "learning_rate": 1.9791696720988963e-07, "loss": 0.0631, "step": 27440 }, { "epoch": 104.77099236641222, "grad_norm": 0.2500865161418915, "learning_rate": 1.9638454094626836e-07, "loss": 0.0695, "step": 27450 }, { "epoch": 104.80916030534351, "grad_norm": 0.3002980947494507, "learning_rate": 1.9485795152838105e-07, "loss": 0.0703, "step": 27460 }, { "epoch": 104.8473282442748, "grad_norm": 0.22461900115013123, "learning_rate": 1.933372008111778e-07, "loss": 0.0646, "step": 27470 }, { "epoch": 104.8854961832061, "grad_norm": 0.2733502686023712, "learning_rate": 1.9182229064251433e-07, "loss": 0.0649, "step": 27480 }, { "epoch": 104.92366412213741, "grad_norm": 0.35162800550460815, "learning_rate": 1.9031322286314878e-07, "loss": 0.0745, "step": 27490 }, { "epoch": 104.9618320610687, "grad_norm": 0.22003084421157837, "learning_rate": 1.8880999930674216e-07, "loss": 0.0635, "step": 27500 }, { "epoch": 105.0, "grad_norm": 0.39051511883735657, "learning_rate": 1.8731262179985166e-07, "loss": 0.0752, "step": 27510 }, { "epoch": 105.0381679389313, "grad_norm": 0.4942927658557892, "learning_rate": 1.8582109216193245e-07, "loss": 0.0925, "step": 27520 }, { "epoch": 105.07633587786259, "grad_norm": 0.26188600063323975, "learning_rate": 1.8433541220533368e-07, "loss": 0.0646, "step": 27530 }, { "epoch": 105.1145038167939, "grad_norm": 0.28856149315834045, "learning_rate": 1.8285558373529578e-07, "loss": 0.0676, "step": 27540 }, { "epoch": 105.1526717557252, "grad_norm": 0.24751274287700653, "learning_rate": 1.8138160854995145e-07, "loss": 0.0711, "step": 27550 }, { "epoch": 105.19083969465649, "grad_norm": 0.3480677902698517, "learning_rate": 1.7991348844031864e-07, "loss": 0.0659, "step": 27560 }, { "epoch": 105.22900763358778, "grad_norm": 0.42214107513427734, "learning_rate": 1.78451225190302e-07, "loss": 0.0706, "step": 27570 }, { "epoch": 105.26717557251908, "grad_norm": 0.47172361612319946, "learning_rate": 1.7699482057668916e-07, "loss": 0.0669, "step": 27580 }, { "epoch": 105.30534351145039, "grad_norm": 0.6258100867271423, "learning_rate": 1.7554427636914894e-07, "loss": 0.0689, "step": 27590 }, { "epoch": 105.34351145038168, "grad_norm": 0.21334940195083618, "learning_rate": 1.7409959433022873e-07, "loss": 0.0653, "step": 27600 }, { "epoch": 105.38167938931298, "grad_norm": 0.39631593227386475, "learning_rate": 1.726607762153548e-07, "loss": 0.0843, "step": 27610 }, { "epoch": 105.41984732824427, "grad_norm": 0.49448487162590027, "learning_rate": 1.7122782377282598e-07, "loss": 0.0667, "step": 27620 }, { "epoch": 105.45801526717557, "grad_norm": 0.26243850588798523, "learning_rate": 1.6980073874381497e-07, "loss": 0.0646, "step": 27630 }, { "epoch": 105.49618320610686, "grad_norm": 0.3036261200904846, "learning_rate": 1.6837952286236415e-07, "loss": 0.0757, "step": 27640 }, { "epoch": 105.53435114503817, "grad_norm": 0.37669137120246887, "learning_rate": 1.6696417785538487e-07, "loss": 0.0729, "step": 27650 }, { "epoch": 105.57251908396947, "grad_norm": 0.2965763807296753, "learning_rate": 1.6555470544265539e-07, "loss": 0.0724, "step": 27660 }, { "epoch": 105.61068702290076, "grad_norm": 0.4320843815803528, "learning_rate": 1.6415110733681737e-07, "loss": 0.0712, "step": 27670 }, { "epoch": 105.64885496183206, "grad_norm": 0.2053402066230774, "learning_rate": 1.6275338524337437e-07, "loss": 0.0683, "step": 27680 }, { "epoch": 105.68702290076335, "grad_norm": 0.27618706226348877, "learning_rate": 1.6136154086069054e-07, "loss": 0.064, "step": 27690 }, { "epoch": 105.72519083969466, "grad_norm": 0.5700408220291138, "learning_rate": 1.599755758799887e-07, "loss": 0.0694, "step": 27700 }, { "epoch": 105.76335877862596, "grad_norm": 0.31304672360420227, "learning_rate": 1.5859549198534607e-07, "loss": 0.0695, "step": 27710 }, { "epoch": 105.80152671755725, "grad_norm": 0.23126287758350372, "learning_rate": 1.572212908536963e-07, "loss": 0.0689, "step": 27720 }, { "epoch": 105.83969465648855, "grad_norm": 0.3003278374671936, "learning_rate": 1.5585297415482203e-07, "loss": 0.0724, "step": 27730 }, { "epoch": 105.87786259541984, "grad_norm": 0.4534234404563904, "learning_rate": 1.5449054355135718e-07, "loss": 0.0669, "step": 27740 }, { "epoch": 105.91603053435115, "grad_norm": 0.5015247464179993, "learning_rate": 1.5313400069878416e-07, "loss": 0.0691, "step": 27750 }, { "epoch": 105.95419847328245, "grad_norm": 0.2227436900138855, "learning_rate": 1.5178334724542887e-07, "loss": 0.0653, "step": 27760 }, { "epoch": 105.99236641221374, "grad_norm": 0.9543538093566895, "learning_rate": 1.504385848324641e-07, "loss": 0.0665, "step": 27770 }, { "epoch": 106.03053435114504, "grad_norm": 0.7465227246284485, "learning_rate": 1.4909971509390332e-07, "loss": 0.072, "step": 27780 }, { "epoch": 106.06870229007633, "grad_norm": 0.23705200850963593, "learning_rate": 1.4776673965659793e-07, "loss": 0.0608, "step": 27790 }, { "epoch": 106.10687022900764, "grad_norm": 0.29993176460266113, "learning_rate": 1.4643966014023958e-07, "loss": 0.0686, "step": 27800 }, { "epoch": 106.14503816793894, "grad_norm": 0.24530579149723053, "learning_rate": 1.4511847815735503e-07, "loss": 0.0666, "step": 27810 }, { "epoch": 106.18320610687023, "grad_norm": 0.24449963867664337, "learning_rate": 1.438031953133051e-07, "loss": 0.0694, "step": 27820 }, { "epoch": 106.22137404580153, "grad_norm": 0.3024006485939026, "learning_rate": 1.4249381320628197e-07, "loss": 0.0774, "step": 27830 }, { "epoch": 106.25954198473282, "grad_norm": 0.3465965986251831, "learning_rate": 1.4119033342730904e-07, "loss": 0.0644, "step": 27840 }, { "epoch": 106.29770992366412, "grad_norm": 0.2610500156879425, "learning_rate": 1.3989275756023658e-07, "loss": 0.0673, "step": 27850 }, { "epoch": 106.33587786259542, "grad_norm": 0.34050482511520386, "learning_rate": 1.386010871817417e-07, "loss": 0.0695, "step": 27860 }, { "epoch": 106.37404580152672, "grad_norm": 0.3278212249279022, "learning_rate": 1.3731532386132617e-07, "loss": 0.0736, "step": 27870 }, { "epoch": 106.41221374045801, "grad_norm": 0.27031412720680237, "learning_rate": 1.360354691613136e-07, "loss": 0.0695, "step": 27880 }, { "epoch": 106.45038167938931, "grad_norm": 0.2940131723880768, "learning_rate": 1.3476152463684778e-07, "loss": 0.0666, "step": 27890 }, { "epoch": 106.4885496183206, "grad_norm": 0.25805696845054626, "learning_rate": 1.3349349183589155e-07, "loss": 0.0729, "step": 27900 }, { "epoch": 106.52671755725191, "grad_norm": 1.4068719148635864, "learning_rate": 1.3223137229922356e-07, "loss": 0.0731, "step": 27910 }, { "epoch": 106.56488549618321, "grad_norm": 0.6141703724861145, "learning_rate": 1.3097516756043982e-07, "loss": 0.0695, "step": 27920 }, { "epoch": 106.6030534351145, "grad_norm": 0.2119428515434265, "learning_rate": 1.29724879145946e-07, "loss": 0.0727, "step": 27930 }, { "epoch": 106.6412213740458, "grad_norm": 0.24076199531555176, "learning_rate": 1.284805085749613e-07, "loss": 0.0788, "step": 27940 }, { "epoch": 106.6793893129771, "grad_norm": 0.4202733337879181, "learning_rate": 1.2724205735951288e-07, "loss": 0.0714, "step": 27950 }, { "epoch": 106.7175572519084, "grad_norm": 0.2482680380344391, "learning_rate": 1.2600952700443592e-07, "loss": 0.0708, "step": 27960 }, { "epoch": 106.7557251908397, "grad_norm": 0.65274977684021, "learning_rate": 1.247829190073707e-07, "loss": 0.0727, "step": 27970 }, { "epoch": 106.79389312977099, "grad_norm": 0.23994283378124237, "learning_rate": 1.2356223485876174e-07, "loss": 0.072, "step": 27980 }, { "epoch": 106.83206106870229, "grad_norm": 0.2923242747783661, "learning_rate": 1.2234747604185527e-07, "loss": 0.0726, "step": 27990 }, { "epoch": 106.87022900763358, "grad_norm": 0.2507231533527374, "learning_rate": 1.2113864403269837e-07, "loss": 0.0723, "step": 28000 }, { "epoch": 106.90839694656489, "grad_norm": 0.6731197237968445, "learning_rate": 1.1993574030013554e-07, "loss": 0.0674, "step": 28010 }, { "epoch": 106.94656488549619, "grad_norm": 0.17780010402202606, "learning_rate": 1.1873876630580816e-07, "loss": 0.0758, "step": 28020 }, { "epoch": 106.98473282442748, "grad_norm": 0.7125159502029419, "learning_rate": 1.1754772350415278e-07, "loss": 0.0748, "step": 28030 }, { "epoch": 107.02290076335878, "grad_norm": 0.355917364358902, "learning_rate": 1.163626133423984e-07, "loss": 0.0743, "step": 28040 }, { "epoch": 107.06106870229007, "grad_norm": 0.4013403058052063, "learning_rate": 1.151834372605659e-07, "loss": 0.0645, "step": 28050 }, { "epoch": 107.09923664122137, "grad_norm": 0.6267454624176025, "learning_rate": 1.1401019669146474e-07, "loss": 0.0681, "step": 28060 }, { "epoch": 107.13740458015268, "grad_norm": 0.3011610209941864, "learning_rate": 1.128428930606934e-07, "loss": 0.0671, "step": 28070 }, { "epoch": 107.17557251908397, "grad_norm": 0.5167744159698486, "learning_rate": 1.1168152778663621e-07, "loss": 0.0678, "step": 28080 }, { "epoch": 107.21374045801527, "grad_norm": 0.2572309970855713, "learning_rate": 1.10526102280461e-07, "loss": 0.0666, "step": 28090 }, { "epoch": 107.25190839694656, "grad_norm": 0.2860301434993744, "learning_rate": 1.0937661794611865e-07, "loss": 0.0628, "step": 28100 }, { "epoch": 107.29007633587786, "grad_norm": 0.7779892086982727, "learning_rate": 1.0823307618034129e-07, "loss": 0.0683, "step": 28110 }, { "epoch": 107.32824427480917, "grad_norm": 0.26767176389694214, "learning_rate": 1.0709547837263967e-07, "loss": 0.062, "step": 28120 }, { "epoch": 107.36641221374046, "grad_norm": 0.38694190979003906, "learning_rate": 1.0596382590530252e-07, "loss": 0.0651, "step": 28130 }, { "epoch": 107.40458015267176, "grad_norm": 0.9800488948822021, "learning_rate": 1.0483812015339435e-07, "loss": 0.0708, "step": 28140 }, { "epoch": 107.44274809160305, "grad_norm": 0.5889298319816589, "learning_rate": 1.0371836248475431e-07, "loss": 0.0698, "step": 28150 }, { "epoch": 107.48091603053435, "grad_norm": 0.2541733384132385, "learning_rate": 1.0260455425999239e-07, "loss": 0.0704, "step": 28160 }, { "epoch": 107.51908396946565, "grad_norm": 0.26119011640548706, "learning_rate": 1.0149669683249209e-07, "loss": 0.0698, "step": 28170 }, { "epoch": 107.55725190839695, "grad_norm": 0.42216214537620544, "learning_rate": 1.0039479154840271e-07, "loss": 0.0663, "step": 28180 }, { "epoch": 107.59541984732824, "grad_norm": 0.7032060623168945, "learning_rate": 9.92988397466449e-08, "loss": 0.0772, "step": 28190 }, { "epoch": 107.63358778625954, "grad_norm": 0.26121023297309875, "learning_rate": 9.820884275890286e-08, "loss": 0.0682, "step": 28200 }, { "epoch": 107.67175572519083, "grad_norm": 0.7753889560699463, "learning_rate": 9.712480190962548e-08, "loss": 0.0703, "step": 28210 }, { "epoch": 107.70992366412214, "grad_norm": 0.48209503293037415, "learning_rate": 9.604671851602466e-08, "loss": 0.0697, "step": 28220 }, { "epoch": 107.74809160305344, "grad_norm": 0.3045661449432373, "learning_rate": 9.497459388807306e-08, "loss": 0.0659, "step": 28230 }, { "epoch": 107.78625954198473, "grad_norm": 0.4404143691062927, "learning_rate": 9.390842932850364e-08, "loss": 0.0724, "step": 28240 }, { "epoch": 107.82442748091603, "grad_norm": 0.34857988357543945, "learning_rate": 9.284822613280731e-08, "loss": 0.0706, "step": 28250 }, { "epoch": 107.86259541984732, "grad_norm": 0.654318630695343, "learning_rate": 9.179398558923024e-08, "loss": 0.0696, "step": 28260 }, { "epoch": 107.90076335877862, "grad_norm": 0.6860091686248779, "learning_rate": 9.074570897877388e-08, "loss": 0.0774, "step": 28270 }, { "epoch": 107.93893129770993, "grad_norm": 0.36173543334007263, "learning_rate": 8.970339757519375e-08, "loss": 0.0646, "step": 28280 }, { "epoch": 107.97709923664122, "grad_norm": 0.37401559948921204, "learning_rate": 8.86670526449962e-08, "loss": 0.0652, "step": 28290 }, { "epoch": 108.01526717557252, "grad_norm": 0.4267309904098511, "learning_rate": 8.763667544743837e-08, "loss": 0.0682, "step": 28300 }, { "epoch": 108.05343511450381, "grad_norm": 0.9610293507575989, "learning_rate": 8.661226723452542e-08, "loss": 0.0796, "step": 28310 }, { "epoch": 108.09160305343511, "grad_norm": 0.8470723032951355, "learning_rate": 8.559382925101e-08, "loss": 0.0682, "step": 28320 }, { "epoch": 108.12977099236642, "grad_norm": 0.5021095871925354, "learning_rate": 8.458136273438943e-08, "loss": 0.0686, "step": 28330 }, { "epoch": 108.16793893129771, "grad_norm": 0.4912349581718445, "learning_rate": 8.357486891490796e-08, "loss": 0.064, "step": 28340 }, { "epoch": 108.20610687022901, "grad_norm": 0.2633496820926666, "learning_rate": 8.257434901554895e-08, "loss": 0.0684, "step": 28350 }, { "epoch": 108.2442748091603, "grad_norm": 0.25884994864463806, "learning_rate": 8.157980425203938e-08, "loss": 0.0704, "step": 28360 }, { "epoch": 108.2824427480916, "grad_norm": 0.5608441233634949, "learning_rate": 8.059123583284368e-08, "loss": 0.0731, "step": 28370 }, { "epoch": 108.3206106870229, "grad_norm": 0.32129549980163574, "learning_rate": 7.960864495916653e-08, "loss": 0.0691, "step": 28380 }, { "epoch": 108.3587786259542, "grad_norm": 0.367439866065979, "learning_rate": 7.863203282494846e-08, "loss": 0.0764, "step": 28390 }, { "epoch": 108.3969465648855, "grad_norm": 0.5697895884513855, "learning_rate": 7.766140061686522e-08, "loss": 0.0698, "step": 28400 }, { "epoch": 108.43511450381679, "grad_norm": 0.5481321215629578, "learning_rate": 7.669674951432615e-08, "loss": 0.067, "step": 28410 }, { "epoch": 108.47328244274809, "grad_norm": 0.2596859931945801, "learning_rate": 7.573808068947363e-08, "loss": 0.0645, "step": 28420 }, { "epoch": 108.5114503816794, "grad_norm": 0.3332163989543915, "learning_rate": 7.478539530718087e-08, "loss": 0.0775, "step": 28430 }, { "epoch": 108.54961832061069, "grad_norm": 0.3313477635383606, "learning_rate": 7.383869452504966e-08, "loss": 0.0666, "step": 28440 }, { "epoch": 108.58778625954199, "grad_norm": 0.28801289200782776, "learning_rate": 7.289797949341204e-08, "loss": 0.0711, "step": 28450 }, { "epoch": 108.62595419847328, "grad_norm": 0.4016149640083313, "learning_rate": 7.196325135532422e-08, "loss": 0.0649, "step": 28460 }, { "epoch": 108.66412213740458, "grad_norm": 0.27881985902786255, "learning_rate": 7.10345112465699e-08, "loss": 0.0619, "step": 28470 }, { "epoch": 108.70229007633588, "grad_norm": 0.5351571440696716, "learning_rate": 7.011176029565525e-08, "loss": 0.0687, "step": 28480 }, { "epoch": 108.74045801526718, "grad_norm": 0.34915560483932495, "learning_rate": 6.919499962381004e-08, "loss": 0.0717, "step": 28490 }, { "epoch": 108.77862595419847, "grad_norm": 0.3323579430580139, "learning_rate": 6.828423034498488e-08, "loss": 0.0642, "step": 28500 }, { "epoch": 108.81679389312977, "grad_norm": 0.23722094297409058, "learning_rate": 6.737945356585008e-08, "loss": 0.067, "step": 28510 }, { "epoch": 108.85496183206106, "grad_norm": 0.4655064344406128, "learning_rate": 6.648067038579508e-08, "loss": 0.0644, "step": 28520 }, { "epoch": 108.89312977099236, "grad_norm": 0.27879801392555237, "learning_rate": 6.558788189692578e-08, "loss": 0.0705, "step": 28530 }, { "epoch": 108.93129770992367, "grad_norm": 0.3153517246246338, "learning_rate": 6.470108918406493e-08, "loss": 0.0733, "step": 28540 }, { "epoch": 108.96946564885496, "grad_norm": 0.42960867285728455, "learning_rate": 6.382029332474893e-08, "loss": 0.0667, "step": 28550 }, { "epoch": 109.00763358778626, "grad_norm": 0.28105250000953674, "learning_rate": 6.294549538922778e-08, "loss": 0.0622, "step": 28560 }, { "epoch": 109.04580152671755, "grad_norm": 0.597682535648346, "learning_rate": 6.207669644046344e-08, "loss": 0.0681, "step": 28570 }, { "epoch": 109.08396946564885, "grad_norm": 0.31500354409217834, "learning_rate": 6.121389753412866e-08, "loss": 0.0698, "step": 28580 }, { "epoch": 109.12213740458016, "grad_norm": 0.2600068151950836, "learning_rate": 6.035709971860592e-08, "loss": 0.0747, "step": 28590 }, { "epoch": 109.16030534351145, "grad_norm": 0.2531625032424927, "learning_rate": 5.95063040349847e-08, "loss": 0.0688, "step": 28600 }, { "epoch": 109.19847328244275, "grad_norm": 0.42508846521377563, "learning_rate": 5.86615115170619e-08, "loss": 0.0695, "step": 28610 }, { "epoch": 109.23664122137404, "grad_norm": 0.38278618454933167, "learning_rate": 5.782272319134086e-08, "loss": 0.0705, "step": 28620 }, { "epoch": 109.27480916030534, "grad_norm": 0.2914545238018036, "learning_rate": 5.698994007702796e-08, "loss": 0.0983, "step": 28630 }, { "epoch": 109.31297709923665, "grad_norm": 0.2625536620616913, "learning_rate": 5.616316318603321e-08, "loss": 0.0712, "step": 28640 }, { "epoch": 109.35114503816794, "grad_norm": 0.21834562718868256, "learning_rate": 5.5342393522968e-08, "loss": 0.0681, "step": 28650 }, { "epoch": 109.38931297709924, "grad_norm": 0.305205374956131, "learning_rate": 5.452763208514622e-08, "loss": 0.0677, "step": 28660 }, { "epoch": 109.42748091603053, "grad_norm": 0.4056416153907776, "learning_rate": 5.371887986257873e-08, "loss": 0.0676, "step": 28670 }, { "epoch": 109.46564885496183, "grad_norm": 0.2562243640422821, "learning_rate": 5.291613783797611e-08, "loss": 0.067, "step": 28680 }, { "epoch": 109.50381679389314, "grad_norm": 0.4858242869377136, "learning_rate": 5.2119406986745336e-08, "loss": 0.0686, "step": 28690 }, { "epoch": 109.54198473282443, "grad_norm": 0.8462430238723755, "learning_rate": 5.132868827698978e-08, "loss": 0.0723, "step": 28700 }, { "epoch": 109.58015267175573, "grad_norm": 0.46360403299331665, "learning_rate": 5.0543982669507554e-08, "loss": 0.0666, "step": 28710 }, { "epoch": 109.61832061068702, "grad_norm": 0.2263144552707672, "learning_rate": 4.976529111778872e-08, "loss": 0.0672, "step": 28720 }, { "epoch": 109.65648854961832, "grad_norm": 0.5241805911064148, "learning_rate": 4.8992614568018624e-08, "loss": 0.0671, "step": 28730 }, { "epoch": 109.69465648854961, "grad_norm": 0.3242310881614685, "learning_rate": 4.822595395907126e-08, "loss": 0.0679, "step": 28740 }, { "epoch": 109.73282442748092, "grad_norm": 0.300382137298584, "learning_rate": 4.7465310222510885e-08, "loss": 0.0718, "step": 28750 }, { "epoch": 109.77099236641222, "grad_norm": 0.25149857997894287, "learning_rate": 4.671068428259318e-08, "loss": 0.0707, "step": 28760 }, { "epoch": 109.80916030534351, "grad_norm": 0.49300718307495117, "learning_rate": 4.596207705625799e-08, "loss": 0.0614, "step": 28770 }, { "epoch": 109.8473282442748, "grad_norm": 0.2667870819568634, "learning_rate": 4.521948945313492e-08, "loss": 0.0733, "step": 28780 }, { "epoch": 109.8854961832061, "grad_norm": 0.32633841037750244, "learning_rate": 4.4482922375537196e-08, "loss": 0.073, "step": 28790 }, { "epoch": 109.92366412213741, "grad_norm": 0.23223261535167694, "learning_rate": 4.375237671846333e-08, "loss": 0.0684, "step": 28800 }, { "epoch": 109.9618320610687, "grad_norm": 0.2435031235218048, "learning_rate": 4.302785336959547e-08, "loss": 0.0648, "step": 28810 }, { "epoch": 110.0, "grad_norm": 0.273689866065979, "learning_rate": 4.230935320929774e-08, "loss": 0.0732, "step": 28820 }, { "epoch": 110.0381679389313, "grad_norm": 0.290106862783432, "learning_rate": 4.159687711061566e-08, "loss": 0.0728, "step": 28830 }, { "epoch": 110.07633587786259, "grad_norm": 0.3028109669685364, "learning_rate": 4.089042593927506e-08, "loss": 0.0642, "step": 28840 }, { "epoch": 110.1145038167939, "grad_norm": 0.2728022634983063, "learning_rate": 4.019000055367983e-08, "loss": 0.0614, "step": 28850 }, { "epoch": 110.1526717557252, "grad_norm": 0.3827283978462219, "learning_rate": 3.949560180491363e-08, "loss": 0.0665, "step": 28860 }, { "epoch": 110.19083969465649, "grad_norm": 0.29793211817741394, "learning_rate": 3.880723053673652e-08, "loss": 0.0652, "step": 28870 }, { "epoch": 110.22900763358778, "grad_norm": 0.3043680191040039, "learning_rate": 3.812488758558386e-08, "loss": 0.065, "step": 28880 }, { "epoch": 110.26717557251908, "grad_norm": 0.47717395424842834, "learning_rate": 3.744857378056743e-08, "loss": 0.0696, "step": 28890 }, { "epoch": 110.30534351145039, "grad_norm": 0.47816434502601624, "learning_rate": 3.677828994347155e-08, "loss": 0.0655, "step": 28900 }, { "epoch": 110.34351145038168, "grad_norm": 0.4901565611362457, "learning_rate": 3.61140368887547e-08, "loss": 0.074, "step": 28910 }, { "epoch": 110.38167938931298, "grad_norm": 0.22826752066612244, "learning_rate": 3.5455815423546815e-08, "loss": 0.0712, "step": 28920 }, { "epoch": 110.41984732824427, "grad_norm": 0.44958510994911194, "learning_rate": 3.480362634764922e-08, "loss": 0.0687, "step": 28930 }, { "epoch": 110.45801526717557, "grad_norm": 0.21928606927394867, "learning_rate": 3.4157470453533015e-08, "loss": 0.0683, "step": 28940 }, { "epoch": 110.49618320610686, "grad_norm": 0.28822770714759827, "learning_rate": 3.3517348526339034e-08, "loss": 0.0678, "step": 28950 }, { "epoch": 110.53435114503817, "grad_norm": 0.44351062178611755, "learning_rate": 3.288326134387454e-08, "loss": 0.0647, "step": 28960 }, { "epoch": 110.57251908396947, "grad_norm": 0.7302173972129822, "learning_rate": 3.225520967661655e-08, "loss": 0.0683, "step": 28970 }, { "epoch": 110.61068702290076, "grad_norm": 0.4752961993217468, "learning_rate": 3.163319428770628e-08, "loss": 0.0713, "step": 28980 }, { "epoch": 110.64885496183206, "grad_norm": 0.35356375575065613, "learning_rate": 3.1017215932951375e-08, "loss": 0.0694, "step": 28990 }, { "epoch": 110.68702290076335, "grad_norm": 0.2346401959657669, "learning_rate": 3.040727536082366e-08, "loss": 0.0752, "step": 29000 }, { "epoch": 110.72519083969466, "grad_norm": 0.3200841546058655, "learning_rate": 2.980337331245864e-08, "loss": 0.0686, "step": 29010 }, { "epoch": 110.76335877862596, "grad_norm": 0.26796045899391174, "learning_rate": 2.9205510521653214e-08, "loss": 0.0625, "step": 29020 }, { "epoch": 110.80152671755725, "grad_norm": 0.297757089138031, "learning_rate": 2.8613687714868497e-08, "loss": 0.0635, "step": 29030 }, { "epoch": 110.83969465648855, "grad_norm": 0.256091833114624, "learning_rate": 2.8027905611223704e-08, "loss": 0.0645, "step": 29040 }, { "epoch": 110.87786259541984, "grad_norm": 0.27508917450904846, "learning_rate": 2.7448164922500577e-08, "loss": 0.0867, "step": 29050 }, { "epoch": 110.91603053435115, "grad_norm": 0.4821559488773346, "learning_rate": 2.687446635313784e-08, "loss": 0.0717, "step": 29060 }, { "epoch": 110.95419847328245, "grad_norm": 0.26554882526397705, "learning_rate": 2.6306810600233435e-08, "loss": 0.0655, "step": 29070 }, { "epoch": 110.99236641221374, "grad_norm": 0.19737420976161957, "learning_rate": 2.5745198353542834e-08, "loss": 0.0663, "step": 29080 }, { "epoch": 111.03053435114504, "grad_norm": 0.258440762758255, "learning_rate": 2.518963029547794e-08, "loss": 0.0601, "step": 29090 }, { "epoch": 111.06870229007633, "grad_norm": 0.25001877546310425, "learning_rate": 2.464010710110598e-08, "loss": 0.0745, "step": 29100 }, { "epoch": 111.10687022900764, "grad_norm": 0.2975136935710907, "learning_rate": 2.4096629438150055e-08, "loss": 0.0669, "step": 29110 }, { "epoch": 111.14503816793894, "grad_norm": 0.2732112407684326, "learning_rate": 2.3559197966985802e-08, "loss": 0.0692, "step": 29120 }, { "epoch": 111.18320610687023, "grad_norm": 0.3679260313510895, "learning_rate": 2.302781334064419e-08, "loss": 0.063, "step": 29130 }, { "epoch": 111.22137404580153, "grad_norm": 0.2609327435493469, "learning_rate": 2.2502476204807056e-08, "loss": 0.0694, "step": 29140 }, { "epoch": 111.25954198473282, "grad_norm": 0.436776727437973, "learning_rate": 2.1983187197808788e-08, "loss": 0.0758, "step": 29150 }, { "epoch": 111.29770992366412, "grad_norm": 0.6425755023956299, "learning_rate": 2.1469946950634645e-08, "loss": 0.0716, "step": 29160 }, { "epoch": 111.33587786259542, "grad_norm": 0.22093476355075836, "learning_rate": 2.0962756086919112e-08, "loss": 0.0765, "step": 29170 }, { "epoch": 111.37404580152672, "grad_norm": 0.5801058411598206, "learning_rate": 2.046161522294754e-08, "loss": 0.0782, "step": 29180 }, { "epoch": 111.41221374045801, "grad_norm": 0.5053842663764954, "learning_rate": 1.9966524967653943e-08, "loss": 0.0657, "step": 29190 }, { "epoch": 111.45038167938931, "grad_norm": 0.31466546654701233, "learning_rate": 1.9477485922618222e-08, "loss": 0.0752, "step": 29200 }, { "epoch": 111.4885496183206, "grad_norm": 0.3375924825668335, "learning_rate": 1.899449868207004e-08, "loss": 0.0683, "step": 29210 }, { "epoch": 111.52671755725191, "grad_norm": 0.49920454621315, "learning_rate": 1.8517563832884387e-08, "loss": 0.0734, "step": 29220 }, { "epoch": 111.56488549618321, "grad_norm": 0.6840377449989319, "learning_rate": 1.8046681954581035e-08, "loss": 0.0684, "step": 29230 }, { "epoch": 111.6030534351145, "grad_norm": 0.37621980905532837, "learning_rate": 1.7581853619327295e-08, "loss": 0.0656, "step": 29240 }, { "epoch": 111.6412213740458, "grad_norm": 0.23118160665035248, "learning_rate": 1.7123079391932474e-08, "loss": 0.0704, "step": 29250 }, { "epoch": 111.6793893129771, "grad_norm": 0.3853679299354553, "learning_rate": 1.6670359829850657e-08, "loss": 0.07, "step": 29260 }, { "epoch": 111.7175572519084, "grad_norm": 0.20712091028690338, "learning_rate": 1.6223695483179035e-08, "loss": 0.0667, "step": 29270 }, { "epoch": 111.7557251908397, "grad_norm": 0.45825672149658203, "learning_rate": 1.5783086894656796e-08, "loss": 0.0658, "step": 29280 }, { "epoch": 111.79389312977099, "grad_norm": 0.25062382221221924, "learning_rate": 1.5348534599665122e-08, "loss": 0.0656, "step": 29290 }, { "epoch": 111.83206106870229, "grad_norm": 0.7445496320724487, "learning_rate": 1.4920039126225528e-08, "loss": 0.0756, "step": 29300 }, { "epoch": 111.87022900763358, "grad_norm": 0.4825686812400818, "learning_rate": 1.449760099500097e-08, "loss": 0.0684, "step": 29310 }, { "epoch": 111.90839694656489, "grad_norm": 0.2946581244468689, "learning_rate": 1.4081220719293076e-08, "loss": 0.0647, "step": 29320 }, { "epoch": 111.94656488549619, "grad_norm": 0.2547703981399536, "learning_rate": 1.3670898805043798e-08, "loss": 0.0762, "step": 29330 }, { "epoch": 111.98473282442748, "grad_norm": 0.44975385069847107, "learning_rate": 1.326663575083209e-08, "loss": 0.0698, "step": 29340 }, { "epoch": 112.02290076335878, "grad_norm": 0.2827472686767578, "learning_rate": 1.286843204787669e-08, "loss": 0.0679, "step": 29350 }, { "epoch": 112.06106870229007, "grad_norm": 0.4250585734844208, "learning_rate": 1.2476288180032214e-08, "loss": 0.0638, "step": 29360 }, { "epoch": 112.09923664122137, "grad_norm": 0.3234623670578003, "learning_rate": 1.2090204623790292e-08, "loss": 0.0611, "step": 29370 }, { "epoch": 112.13740458015268, "grad_norm": 0.20758773386478424, "learning_rate": 1.1710181848278435e-08, "loss": 0.0717, "step": 29380 }, { "epoch": 112.17557251908397, "grad_norm": 0.476921945810318, "learning_rate": 1.133622031526116e-08, "loss": 0.0667, "step": 29390 }, { "epoch": 112.21374045801527, "grad_norm": 0.809885561466217, "learning_rate": 1.0968320479136097e-08, "loss": 0.0722, "step": 29400 }, { "epoch": 112.25190839694656, "grad_norm": 0.218302920460701, "learning_rate": 1.0606482786936768e-08, "loss": 0.0659, "step": 29410 }, { "epoch": 112.29007633587786, "grad_norm": 0.9261871576309204, "learning_rate": 1.0250707678329808e-08, "loss": 0.0777, "step": 29420 }, { "epoch": 112.32824427480917, "grad_norm": 0.3251490890979767, "learning_rate": 9.900995585615525e-09, "loss": 0.0663, "step": 29430 }, { "epoch": 112.36641221374046, "grad_norm": 0.4006260931491852, "learning_rate": 9.55734693372734e-09, "loss": 0.067, "step": 29440 }, { "epoch": 112.40458015267176, "grad_norm": 0.3395634889602661, "learning_rate": 9.219762140231237e-09, "loss": 0.0695, "step": 29450 }, { "epoch": 112.44274809160305, "grad_norm": 0.5408310294151306, "learning_rate": 8.888241615322979e-09, "loss": 0.0658, "step": 29460 }, { "epoch": 112.48091603053435, "grad_norm": 0.22297032177448273, "learning_rate": 8.562785761833114e-09, "loss": 0.0761, "step": 29470 }, { "epoch": 112.51908396946565, "grad_norm": 0.1803329437971115, "learning_rate": 8.243394975219753e-09, "loss": 0.0647, "step": 29480 }, { "epoch": 112.55725190839695, "grad_norm": 0.4298841059207916, "learning_rate": 7.930069643573568e-09, "loss": 0.07, "step": 29490 }, { "epoch": 112.59541984732824, "grad_norm": 0.27020174264907837, "learning_rate": 7.622810147614457e-09, "loss": 0.0712, "step": 29500 }, { "epoch": 112.63358778625954, "grad_norm": 0.2004048377275467, "learning_rate": 7.321616860690995e-09, "loss": 0.0665, "step": 29510 }, { "epoch": 112.67175572519083, "grad_norm": 0.25727108120918274, "learning_rate": 7.026490148782095e-09, "loss": 0.0726, "step": 29520 }, { "epoch": 112.70992366412214, "grad_norm": 0.3128455877304077, "learning_rate": 6.737430370494236e-09, "loss": 0.0684, "step": 29530 }, { "epoch": 112.74809160305344, "grad_norm": 0.27916234731674194, "learning_rate": 6.4544378770625695e-09, "loss": 0.0669, "step": 29540 }, { "epoch": 112.78625954198473, "grad_norm": 0.26003262400627136, "learning_rate": 6.177513012349257e-09, "loss": 0.0651, "step": 29550 }, { "epoch": 112.82442748091603, "grad_norm": 0.6421084403991699, "learning_rate": 5.906656112844578e-09, "loss": 0.0683, "step": 29560 }, { "epoch": 112.86259541984732, "grad_norm": 0.2419942170381546, "learning_rate": 5.641867507664156e-09, "loss": 0.066, "step": 29570 }, { "epoch": 112.90076335877862, "grad_norm": 0.4693322479724884, "learning_rate": 5.383147518552845e-09, "loss": 0.0672, "step": 29580 }, { "epoch": 112.93893129770993, "grad_norm": 0.29389244318008423, "learning_rate": 5.13049645987862e-09, "loss": 0.0692, "step": 29590 }, { "epoch": 112.97709923664122, "grad_norm": 0.5387037396430969, "learning_rate": 4.883914638636467e-09, "loss": 0.074, "step": 29600 }, { "epoch": 113.01526717557252, "grad_norm": 0.2849062383174896, "learning_rate": 4.6434023544467135e-09, "loss": 0.0698, "step": 29610 }, { "epoch": 113.05343511450381, "grad_norm": 0.2868102192878723, "learning_rate": 4.408959899554477e-09, "loss": 0.0694, "step": 29620 }, { "epoch": 113.09160305343511, "grad_norm": 0.21665853261947632, "learning_rate": 4.180587558829663e-09, "loss": 0.0681, "step": 29630 }, { "epoch": 113.12977099236642, "grad_norm": 0.2538401484489441, "learning_rate": 3.9582856097658554e-09, "loss": 0.0667, "step": 29640 }, { "epoch": 113.16793893129771, "grad_norm": 0.4532374143600464, "learning_rate": 3.74205432248087e-09, "loss": 0.0852, "step": 29650 }, { "epoch": 113.20610687022901, "grad_norm": 0.3058018386363983, "learning_rate": 3.531893959716204e-09, "loss": 0.0707, "step": 29660 }, { "epoch": 113.2442748091603, "grad_norm": 0.21909840404987335, "learning_rate": 3.327804776837029e-09, "loss": 0.0932, "step": 29670 }, { "epoch": 113.2824427480916, "grad_norm": 0.49850842356681824, "learning_rate": 3.1297870218299776e-09, "loss": 0.0721, "step": 29680 }, { "epoch": 113.3206106870229, "grad_norm": 0.4520060420036316, "learning_rate": 2.9378409353059133e-09, "loss": 0.0695, "step": 29690 }, { "epoch": 113.3587786259542, "grad_norm": 0.24441687762737274, "learning_rate": 2.7519667504971593e-09, "loss": 0.078, "step": 29700 }, { "epoch": 113.3969465648855, "grad_norm": 0.4024795889854431, "learning_rate": 2.572164693258605e-09, "loss": 0.0649, "step": 29710 }, { "epoch": 113.43511450381679, "grad_norm": 0.2969071567058563, "learning_rate": 2.3984349820665997e-09, "loss": 0.0697, "step": 29720 }, { "epoch": 113.47328244274809, "grad_norm": 0.24398191273212433, "learning_rate": 2.2307778280189484e-09, "loss": 0.0691, "step": 29730 }, { "epoch": 113.5114503816794, "grad_norm": 0.5313243269920349, "learning_rate": 2.06919343483547e-09, "loss": 0.0686, "step": 29740 }, { "epoch": 113.54961832061069, "grad_norm": 0.33933746814727783, "learning_rate": 1.9136819988557765e-09, "loss": 0.0683, "step": 29750 }, { "epoch": 113.58778625954199, "grad_norm": 0.39412805438041687, "learning_rate": 1.764243709041491e-09, "loss": 0.0755, "step": 29760 }, { "epoch": 113.62595419847328, "grad_norm": 0.3232634961605072, "learning_rate": 1.6208787469734755e-09, "loss": 0.0709, "step": 29770 }, { "epoch": 113.66412213740458, "grad_norm": 0.34813275933265686, "learning_rate": 1.4835872868546041e-09, "loss": 0.0706, "step": 29780 }, { "epoch": 113.70229007633588, "grad_norm": 0.5944547057151794, "learning_rate": 1.3523694955064338e-09, "loss": 0.0737, "step": 29790 }, { "epoch": 113.74045801526718, "grad_norm": 0.5624353289604187, "learning_rate": 1.2272255323708682e-09, "loss": 0.0642, "step": 29800 }, { "epoch": 113.77862595419847, "grad_norm": 0.7441560626029968, "learning_rate": 1.1081555495096042e-09, "loss": 0.0699, "step": 29810 }, { "epoch": 113.81679389312977, "grad_norm": 0.8023766875267029, "learning_rate": 9.951596916041307e-10, "loss": 0.0705, "step": 29820 }, { "epoch": 113.85496183206106, "grad_norm": 0.3070337176322937, "learning_rate": 8.88238095955174e-10, "loss": 0.0678, "step": 29830 }, { "epoch": 113.89312977099236, "grad_norm": 0.3391276001930237, "learning_rate": 7.873908924821427e-10, "loss": 0.0659, "step": 29840 }, { "epoch": 113.93129770992367, "grad_norm": 0.2922404110431671, "learning_rate": 6.926182037242379e-10, "loss": 0.0701, "step": 29850 }, { "epoch": 113.96946564885496, "grad_norm": 0.34531670808792114, "learning_rate": 6.039201448387877e-10, "loss": 0.0731, "step": 29860 }, { "epoch": 114.00763358778626, "grad_norm": 0.4919748902320862, "learning_rate": 5.212968236029126e-10, "loss": 0.0744, "step": 29870 }, { "epoch": 114.04580152671755, "grad_norm": 0.35671672224998474, "learning_rate": 4.447483404118602e-10, "loss": 0.07, "step": 29880 }, { "epoch": 114.08396946564885, "grad_norm": 0.25368714332580566, "learning_rate": 3.7427478827845033e-10, "loss": 0.0624, "step": 29890 }, { "epoch": 114.12213740458016, "grad_norm": 1.127943992614746, "learning_rate": 3.0987625283473986e-10, "loss": 0.09, "step": 29900 }, { "epoch": 114.16030534351145, "grad_norm": 0.26309409737586975, "learning_rate": 2.5155281233202324e-10, "loss": 0.066, "step": 29910 }, { "epoch": 114.19847328244275, "grad_norm": 0.26694461703300476, "learning_rate": 1.9930453763750135e-10, "loss": 0.0771, "step": 29920 }, { "epoch": 114.23664122137404, "grad_norm": 0.3160324692726135, "learning_rate": 1.531314922387228e-10, "loss": 0.0669, "step": 29930 }, { "epoch": 114.27480916030534, "grad_norm": 0.24074247479438782, "learning_rate": 1.1303373224025305e-10, "loss": 0.0718, "step": 29940 }, { "epoch": 114.31297709923665, "grad_norm": 0.47037428617477417, "learning_rate": 7.901130636367437e-11, "loss": 0.0722, "step": 29950 }, { "epoch": 114.35114503816794, "grad_norm": 0.5677265524864197, "learning_rate": 5.1064255950361574e-11, "loss": 0.0716, "step": 29960 }, { "epoch": 114.38931297709924, "grad_norm": 0.3310537338256836, "learning_rate": 2.9192614958706286e-11, "loss": 0.0715, "step": 29970 }, { "epoch": 114.42748091603053, "grad_norm": 0.5864256024360657, "learning_rate": 1.3396409964117062e-11, "loss": 0.072, "step": 29980 }, { "epoch": 114.46564885496183, "grad_norm": 0.6437695026397705, "learning_rate": 3.6756601606846574e-12, "loss": 0.0669, "step": 29990 }, { "epoch": 114.50381679389314, "grad_norm": 0.734368085861206, "learning_rate": 3.037736062694308e-14, "loss": 0.0751, "step": 30000 }, { "epoch": 114.50381679389314, "step": 30000, "total_flos": 0.0, "train_loss": 0.0959479827662309, "train_runtime": 13839.331, "train_samples_per_second": 17.342, "train_steps_per_second": 2.168 } ], "logging_steps": 10, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 115, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }